From b1de0f01b0115575982cf24c88b35106449e9aa7 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Sat, 13 Feb 2021 18:02:04 +0100
Subject: batman-adv: Use netif_rx_any_context().

The usage of in_interrupt() in non-core code is phased out. Ideally the
information of the calling context should be passed by the callers or the
functions be split as appropriate.

The attempt to consolidate the code by passing an arguemnt or by
distangling it failed due lack of knowledge about this driver and because
the call chains are hard to follow.

As a stop gap use netif_rx_any_context() which invokes the correct code path
depending on context and confines the in_interrupt() usage to core code.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/bridge_loop_avoidance.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 360bdbf44748..bcd543ce835b 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -438,10 +438,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
 	batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES,
 			   skb->len + ETH_HLEN);
 
-	if (in_interrupt())
-		netif_rx(skb);
-	else
-		netif_rx_ni(skb);
+	netif_rx_any_context(skb);
 out:
 	if (primary_if)
 		batadv_hardif_put(primary_if);
-- 
cgit v1.2.3


From a56c14bb21b296fb6d395164ab62ef2e419e5069 Mon Sep 17 00:00:00 2001
From: Shuah Khan <skhan@linuxfoundation.org>
Date: Sat, 13 Feb 2021 07:58:47 +0200
Subject: ath9k: fix ath_tx_process_buffer() potential null ptr dereference

ath_tx_process_buffer() references ieee80211_find_sta_by_ifaddr()
return pointer (sta) outside null check. Fix it by moving the code
block under the null check.

This problem was found while reviewing code to debug RCU warn from
ath10k_wmi_tlv_parse_peer_stats_info() and a subsequent manual audit
of other callers of ieee80211_find_sta_by_ifaddr() that don't hold
RCU read lock.

Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/43ed9abb9e8d7112f3cc168c2f8c489e253635ba.1613090339.git.skhan@linuxfoundation.org
---
 drivers/net/wireless/ath/ath9k/xmit.c | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
index e60d4737fc6e..828b96bf55a2 100644
--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c
@@ -708,20 +708,24 @@ static void ath_tx_process_buffer(struct ath_softc *sc, struct ath_txq *txq,
 		ath_tx_count_airtime(sc, sta, bf, ts, tid->tidno);
 		if (ts->ts_status & (ATH9K_TXERR_FILT | ATH9K_TXERR_XRETRY))
 			tid->clear_ps_filter = true;
-	}
 
-	if (!bf_isampdu(bf)) {
-		if (!flush) {
-			info = IEEE80211_SKB_CB(bf->bf_mpdu);
-			memcpy(info->control.rates, bf->rates,
-			       sizeof(info->control.rates));
-			ath_tx_rc_status(sc, bf, ts, 1, txok ? 0 : 1, txok);
-			ath_dynack_sample_tx_ts(sc->sc_ah, bf->bf_mpdu, ts,
-						sta);
+		if (!bf_isampdu(bf)) {
+			if (!flush) {
+				info = IEEE80211_SKB_CB(bf->bf_mpdu);
+				memcpy(info->control.rates, bf->rates,
+				       sizeof(info->control.rates));
+				ath_tx_rc_status(sc, bf, ts, 1,
+						 txok ? 0 : 1, txok);
+				ath_dynack_sample_tx_ts(sc->sc_ah,
+							bf->bf_mpdu, ts, sta);
+			}
+			ath_tx_complete_buf(sc, bf, txq, bf_head, sta,
+					    ts, txok);
+		} else {
+			ath_tx_complete_aggr(sc, txq, bf, bf_head, sta,
+					     tid, ts, txok);
 		}
-		ath_tx_complete_buf(sc, bf, txq, bf_head, sta, ts, txok);
-	} else
-		ath_tx_complete_aggr(sc, txq, bf, bf_head, sta, tid, ts, txok);
+	}
 
 	if (!flush)
 		ath_txq_schedule(sc, txq);
-- 
cgit v1.2.3


From 9c349dbd07523ed175b7f5ec42c4b4a1bc7ae1df Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 12 Feb 2021 11:36:27 +0000
Subject: ath11k: debugfs: Fix spelling mistake "Opportunies" ->
 "Opportunities"

There is a spelling mistake in some debug text, fix this.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210212113627.212787-1-colin.king@canonical.com
---
 drivers/net/wireless/ath/ath11k/debugfs_htt_stats.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath11k/debugfs_htt_stats.c b/drivers/net/wireless/ath/ath11k/debugfs_htt_stats.c
index e13684343ec3..ec93f14e6d2a 100644
--- a/drivers/net/wireless/ath/ath11k/debugfs_htt_stats.c
+++ b/drivers/net/wireless/ath/ath11k/debugfs_htt_stats.c
@@ -3851,7 +3851,7 @@ htt_print_pdev_obss_pd_stats_tlv_v(const void *tag_buf,
 			   htt_stats_buf->num_non_srg_ppdu_tried);
 	len += HTT_DBG_OUT(buf + len, buf_len - len, "Non-SRG success PPDU = %u\n",
 			   htt_stats_buf->num_non_srg_ppdu_success);
-	len += HTT_DBG_OUT(buf + len, buf_len - len, "SRG Opportunies = %u\n",
+	len += HTT_DBG_OUT(buf + len, buf_len - len, "SRG Opportunities = %u\n",
 			   htt_stats_buf->num_srg_opportunities);
 	len += HTT_DBG_OUT(buf + len, buf_len - len, "SRG tried PPDU = %u\n",
 			   htt_stats_buf->num_srg_ppdu_tried);
-- 
cgit v1.2.3


From 7a3aed0c3c36cc08a1b123d752f141797f6ba79a Mon Sep 17 00:00:00 2001
From: Anilkumar Kolli <akolli@codeaurora.org>
Date: Tue, 16 Feb 2021 09:15:35 +0200
Subject: ath11k: Refactor ath11k_msi_config

Move ath11k_msi_config to array of structures to add multiple
pci devices support. No functional changes.

Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.4.0.1.r2-00012-QCAHKSWPL_SILICONZ-1

Signed-off-by: Anilkumar Kolli <akolli@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1612946530-28504-2-git-send-email-akolli@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/pci.c | 39 ++++++++++++++++++++---------------
 drivers/net/wireless/ath/ath11k/pci.h |  1 +
 2 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c
index d14416816acc..046eac75eab8 100644
--- a/drivers/net/wireless/ath/ath11k/pci.c
+++ b/drivers/net/wireless/ath/ath11k/pci.c
@@ -50,14 +50,16 @@ static const struct ath11k_bus_params ath11k_pci_bus_params = {
 	.fixed_mem_region = false,
 };
 
-static const struct ath11k_msi_config msi_config = {
-	.total_vectors = 32,
-	.total_users = 4,
-	.users = (struct ath11k_msi_user[]) {
-		{ .name = "MHI", .num_vectors = 3, .base_vector = 0 },
-		{ .name = "CE", .num_vectors = 10, .base_vector = 3 },
-		{ .name = "WAKE", .num_vectors = 1, .base_vector = 13 },
-		{ .name = "DP", .num_vectors = 18, .base_vector = 14 },
+static const struct ath11k_msi_config ath11k_msi_config[] = {
+	{
+		.total_vectors = 32,
+		.total_users = 4,
+		.users = (struct ath11k_msi_user[]) {
+			{ .name = "MHI", .num_vectors = 3, .base_vector = 0 },
+			{ .name = "CE", .num_vectors = 10, .base_vector = 3 },
+			{ .name = "WAKE", .num_vectors = 1, .base_vector = 13 },
+			{ .name = "DP", .num_vectors = 18, .base_vector = 14 },
+		},
 	},
 };
 
@@ -406,14 +408,15 @@ int ath11k_pci_get_user_msi_assignment(struct ath11k_pci *ab_pci, char *user_nam
 				       u32 *base_vector)
 {
 	struct ath11k_base *ab = ab_pci->ab;
+	const struct ath11k_msi_config *msi_config = ab_pci->msi_config;
 	int idx;
 
-	for (idx = 0; idx < msi_config.total_users; idx++) {
-		if (strcmp(user_name, msi_config.users[idx].name) == 0) {
-			*num_vectors = msi_config.users[idx].num_vectors;
-			*user_base_data = msi_config.users[idx].base_vector
+	for (idx = 0; idx < msi_config->total_users; idx++) {
+		if (strcmp(user_name, msi_config->users[idx].name) == 0) {
+			*num_vectors = msi_config->users[idx].num_vectors;
+			*user_base_data = msi_config->users[idx].base_vector
 				+ ab_pci->msi_ep_base_data;
-			*base_vector = msi_config.users[idx].base_vector;
+			*base_vector = msi_config->users[idx].base_vector;
 
 			ath11k_dbg(ab, ATH11K_DBG_PCI, "Assign MSI to user: %s, num_vectors: %d, user_base_data: %u, base_vector: %u\n",
 				   user_name, *num_vectors, *user_base_data,
@@ -760,17 +763,18 @@ static void ath11k_pci_ce_irqs_enable(struct ath11k_base *ab)
 static int ath11k_pci_enable_msi(struct ath11k_pci *ab_pci)
 {
 	struct ath11k_base *ab = ab_pci->ab;
+	const struct ath11k_msi_config *msi_config = ab_pci->msi_config;
 	struct msi_desc *msi_desc;
 	int num_vectors;
 	int ret;
 
 	num_vectors = pci_alloc_irq_vectors(ab_pci->pdev,
-					    msi_config.total_vectors,
-					    msi_config.total_vectors,
+					    msi_config->total_vectors,
+					    msi_config->total_vectors,
 					    PCI_IRQ_MSI);
-	if (num_vectors != msi_config.total_vectors) {
+	if (num_vectors != msi_config->total_vectors) {
 		ath11k_err(ab, "failed to get %d MSI vectors, only %d available",
-			   msi_config.total_vectors, num_vectors);
+			   msi_config->total_vectors, num_vectors);
 
 		if (num_vectors >= 0)
 			return -EINVAL;
@@ -1138,6 +1142,7 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
 		goto err_pci_free_region;
 	}
 
+	ab_pci->msi_config = &ath11k_msi_config[0];
 	ret = ath11k_pci_enable_msi(ab_pci);
 	if (ret) {
 		ath11k_err(ab, "failed to enable msi: %d\n", ret);
diff --git a/drivers/net/wireless/ath/ath11k/pci.h b/drivers/net/wireless/ath/ath11k/pci.h
index fe44d0dfce19..92eeb7c30546 100644
--- a/drivers/net/wireless/ath/ath11k/pci.h
+++ b/drivers/net/wireless/ath/ath11k/pci.h
@@ -73,6 +73,7 @@ struct ath11k_pci {
 	char amss_path[100];
 	u32 msi_ep_base_data;
 	struct mhi_controller *mhi_ctrl;
+	const struct ath11k_msi_config *msi_config;
 	unsigned long mhi_state;
 	u32 register_window;
 
-- 
cgit v1.2.3


From 16001e4b2e681b8fb5e7bc50db5522081d46347a Mon Sep 17 00:00:00 2001
From: Anilkumar Kolli <akolli@codeaurora.org>
Date: Tue, 16 Feb 2021 09:15:48 +0200
Subject: ath11k: Move qmi service_ins_id to hw_params

qmi service_ins_id is unique for QCA6390 and QCN9074,
this is needed for adding QCN9074 support. No functional
changes.

Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.4.0.1.r2-00012-QCAHKSWPL_SILICONZ-1

Signed-off-by: Anilkumar Kolli <akolli@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1612946530-28504-3-git-send-email-akolli@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/ahb.c  | 2 +-
 drivers/net/wireless/ath/ath11k/core.c | 3 +++
 drivers/net/wireless/ath/ath11k/hw.h   | 1 +
 drivers/net/wireless/ath/ath11k/pci.c  | 2 +-
 4 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/ahb.c b/drivers/net/wireless/ath/ath11k/ahb.c
index d4ef45cd0685..8c9c781afc3e 100644
--- a/drivers/net/wireless/ath/ath11k/ahb.c
+++ b/drivers/net/wireless/ath/ath11k/ahb.c
@@ -373,7 +373,7 @@ static void ath11k_ahb_init_qmi_ce_config(struct ath11k_base *ab)
 	cfg->tgt_ce = ab->hw_params.target_ce_config;
 	cfg->svc_to_ce_map_len = ab->hw_params.svc_to_ce_map_len;
 	cfg->svc_to_ce_map = ab->hw_params.svc_to_ce_map;
-	ab->qmi.service_ins_id = ATH11K_QMI_WLFW_SERVICE_INS_ID_V01_IPQ8074;
+	ab->qmi.service_ins_id = ab->hw_params.qmi_service_ins_id;
 }
 
 static void ath11k_ahb_free_ext_irq(struct ath11k_base *ab)
diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c
index 350b7913622c..9ccc7231afa0 100644
--- a/drivers/net/wireless/ath/ath11k/core.c
+++ b/drivers/net/wireless/ath/ath11k/core.c
@@ -45,6 +45,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
 		.ring_mask = &ath11k_hw_ring_mask_ipq8074,
 		.internal_sleep_clock = false,
 		.regs = &ipq8074_regs,
+		.qmi_service_ins_id = ATH11K_QMI_WLFW_SERVICE_INS_ID_V01_IPQ8074,
 		.host_ce_config = ath11k_host_ce_config_ipq8074,
 		.ce_count = 12,
 		.target_ce_config = ath11k_target_ce_config_wlan_ipq8074,
@@ -83,6 +84,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
 		.ring_mask = &ath11k_hw_ring_mask_ipq8074,
 		.internal_sleep_clock = false,
 		.regs = &ipq8074_regs,
+		.qmi_service_ins_id = ATH11K_QMI_WLFW_SERVICE_INS_ID_V01_IPQ8074,
 		.host_ce_config = ath11k_host_ce_config_ipq8074,
 		.ce_count = 12,
 		.target_ce_config = ath11k_target_ce_config_wlan_ipq8074,
@@ -121,6 +123,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
 		.ring_mask = &ath11k_hw_ring_mask_qca6390,
 		.internal_sleep_clock = true,
 		.regs = &qca6390_regs,
+		.qmi_service_ins_id = ATH11K_QMI_WLFW_SERVICE_INS_ID_V01_QCA6390,
 		.host_ce_config = ath11k_host_ce_config_qca6390,
 		.ce_count = 9,
 		.target_ce_config = ath11k_target_ce_config_wlan_qca6390,
diff --git a/drivers/net/wireless/ath/ath11k/hw.h b/drivers/net/wireless/ath/ath11k/hw.h
index 8af0034fdb05..94c36e6149cf 100644
--- a/drivers/net/wireless/ath/ath11k/hw.h
+++ b/drivers/net/wireless/ath/ath11k/hw.h
@@ -134,6 +134,7 @@ struct ath11k_hw_params {
 	bool internal_sleep_clock;
 
 	const struct ath11k_hw_regs *regs;
+	u32 qmi_service_ins_id;
 	const struct ce_attr *host_ce_config;
 	u32 ce_count;
 	const struct ce_pipe_config *target_ce_config;
diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c
index 046eac75eab8..ea7715484de0 100644
--- a/drivers/net/wireless/ath/ath11k/pci.c
+++ b/drivers/net/wireless/ath/ath11k/pci.c
@@ -743,7 +743,7 @@ static void ath11k_pci_init_qmi_ce_config(struct ath11k_base *ab)
 
 	cfg->svc_to_ce_map = ab->hw_params.svc_to_ce_map;
 	cfg->svc_to_ce_map_len = ab->hw_params.svc_to_ce_map_len;
-	ab->qmi.service_ins_id = ATH11K_QMI_WLFW_SERVICE_INS_ID_V01_QCA6390;
+	ab->qmi.service_ins_id = ab->hw_params.qmi_service_ins_id;
 
 	ath11k_ce_get_shadow_config(ab, &cfg->shadow_reg_v2,
 				    &cfg->shadow_reg_v2_len);
-- 
cgit v1.2.3


From fa5f473d764398a09f7deea3a042a1130ee50e90 Mon Sep 17 00:00:00 2001
From: Anilkumar Kolli <akolli@codeaurora.org>
Date: Tue, 16 Feb 2021 09:15:53 +0200
Subject: ath11k: qmi: increase the number of fw segments

QCN9074 firmware uses 20MB of HOST DDR memory, fw requests
the memory in segmnets of size 1MB/512KB/256KB.
Increase the number of fw memory segments to 52.

Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.4.0.1.r2-00012-QCAHKSWPL_SILICONZ-1

Signed-off-by: Anilkumar Kolli <akolli@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1612946530-28504-4-git-send-email-akolli@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/qmi.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/qmi.h b/drivers/net/wireless/ath/ath11k/qmi.h
index 7bad374cc23a..1797c3b59f48 100644
--- a/drivers/net/wireless/ath/ath11k/qmi.h
+++ b/drivers/net/wireless/ath/ath11k/qmi.h
@@ -23,7 +23,7 @@
 #define ATH11K_QMI_WLFW_SERVICE_INS_ID_V01_IPQ8074	0x02
 #define ATH11K_QMI_WLANFW_MAX_TIMESTAMP_LEN_V01	32
 #define ATH11K_QMI_RESP_LEN_MAX			8192
-#define ATH11K_QMI_WLANFW_MAX_NUM_MEM_SEG_V01	32
+#define ATH11K_QMI_WLANFW_MAX_NUM_MEM_SEG_V01	52
 #define ATH11K_QMI_CALDB_SIZE			0x480000
 #define ATH11K_QMI_BDF_EXT_STR_LENGTH		0x20
 
@@ -216,8 +216,8 @@ struct qmi_wlanfw_ind_register_resp_msg_v01 {
 	u64 fw_status;
 };
 
-#define QMI_WLANFW_REQUEST_MEM_IND_MSG_V01_MAX_LEN	1124
-#define QMI_WLANFW_RESPOND_MEM_REQ_MSG_V01_MAX_LEN	548
+#define QMI_WLANFW_REQUEST_MEM_IND_MSG_V01_MAX_LEN	1824
+#define QMI_WLANFW_RESPOND_MEM_REQ_MSG_V01_MAX_LEN	888
 #define QMI_WLANFW_RESPOND_MEM_RESP_MSG_V01_MAX_LEN	7
 #define QMI_WLANFW_REQUEST_MEM_IND_V01			0x0035
 #define QMI_WLANFW_RESPOND_MEM_REQ_V01			0x0036
-- 
cgit v1.2.3


From 5f67d306155e6a757f0b6b2b061e3ea13f44c536 Mon Sep 17 00:00:00 2001
From: Anilkumar Kolli <akolli@codeaurora.org>
Date: Tue, 16 Feb 2021 09:16:03 +0200
Subject: ath11k: Update memory segment count for qcn9074

QCN9074 FW requests three types memory segments during the boot,
	qmi mem seg type 1 of size 15728640
	qmi mem seg type 4 of size 3735552
	qmi mem seg type 3 of size 1048576
Segment type 3 is for M3 coredump memory.

Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.4.0.1.r2-00012-QCAHKSWPL_SILICONZ-1

Signed-off-by: Anilkumar Kolli <akolli@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1612946530-28504-5-git-send-email-akolli@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/qmi.c | 2 +-
 drivers/net/wireless/ath/ath11k/qmi.h | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath11k/qmi.c b/drivers/net/wireless/ath/ath11k/qmi.c
index 1aca841cd147..eb1b987c9df2 100644
--- a/drivers/net/wireless/ath/ath11k/qmi.c
+++ b/drivers/net/wireless/ath/ath11k/qmi.c
@@ -1765,7 +1765,7 @@ static int ath11k_qmi_alloc_target_mem_chunk(struct ath11k_base *ab)
 						  &chunk->paddr,
 						  GFP_KERNEL);
 		if (!chunk->vaddr) {
-			if (ab->qmi.mem_seg_count <= 2) {
+			if (ab->qmi.mem_seg_count <= ATH11K_QMI_FW_MEM_REQ_SEGMENT_CNT) {
 				ath11k_dbg(ab, ATH11K_DBG_QMI,
 					   "qmi dma allocation failed (%d B type %u), will try later with small size\n",
 					    chunk->size,
diff --git a/drivers/net/wireless/ath/ath11k/qmi.h b/drivers/net/wireless/ath/ath11k/qmi.h
index 1797c3b59f48..a7d01c295e77 100644
--- a/drivers/net/wireless/ath/ath11k/qmi.h
+++ b/drivers/net/wireless/ath/ath11k/qmi.h
@@ -26,6 +26,7 @@
 #define ATH11K_QMI_WLANFW_MAX_NUM_MEM_SEG_V01	52
 #define ATH11K_QMI_CALDB_SIZE			0x480000
 #define ATH11K_QMI_BDF_EXT_STR_LENGTH		0x20
+#define ATH11K_QMI_FW_MEM_REQ_SEGMENT_CNT	3
 
 #define QMI_WLFW_REQUEST_MEM_IND_V01		0x0035
 #define QMI_WLFW_FW_MEM_READY_IND_V01		0x0037
@@ -141,6 +142,7 @@ struct ath11k_qmi {
 #define QMI_IPQ8074_FW_MEM_MODE				0xFF
 #define HOST_DDR_REGION_TYPE				0x1
 #define BDF_MEM_REGION_TYPE				0x2
+#define M3_DUMP_REGION_TYPE				0x3
 #define CALDB_MEM_REGION_TYPE				0x4
 
 struct qmi_wlanfw_host_cap_req_msg_v01 {
-- 
cgit v1.2.3


From a233811ef60081192a2b13ce23253671114308d8 Mon Sep 17 00:00:00 2001
From: Anilkumar Kolli <akolli@codeaurora.org>
Date: Tue, 16 Feb 2021 09:16:08 +0200
Subject: ath11k: Add qcn9074 mhi controller config

Add MHI config for QCN9074 also populate ath11k_hw_params for QCN9074.

Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.4.0.1.r2-00012-QCAHKSWPL_SILICONZ-1

Signed-off-by: Anilkumar Kolli <akolli@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1612946530-28504-6-git-send-email-akolli@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/core.c |  12 ++++
 drivers/net/wireless/ath/ath11k/core.h |   1 +
 drivers/net/wireless/ath/ath11k/mhi.c  | 116 ++++++++++++++++++++++++++++++---
 drivers/net/wireless/ath/ath11k/qmi.h  |   1 +
 4 files changed, 122 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c
index 9ccc7231afa0..fa452de49457 100644
--- a/drivers/net/wireless/ath/ath11k/core.c
+++ b/drivers/net/wireless/ath/ath11k/core.c
@@ -147,6 +147,18 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
 		.cold_boot_calib = false,
 		.supports_suspend = true,
 	},
+	{
+		.name = "qcn9074 hw1.0",
+		.hw_rev = ATH11K_HW_QCN9074_HW10,
+		.fw = {
+			.dir = "QCN9074/hw1.0",
+			.board_size = 256 * 1024,
+			.cal_size = 256 * 1024,
+		},
+		.max_radios = 1,
+		.single_pdev_only = false,
+		.qmi_service_ins_id = ATH11K_QMI_WLFW_SERVICE_INS_ID_V01_QCN9074,
+	},
 };
 
 int ath11k_core_suspend(struct ath11k_base *ab)
diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h
index 8d29845774df..9a8fb23577b0 100644
--- a/drivers/net/wireless/ath/ath11k/core.h
+++ b/drivers/net/wireless/ath/ath11k/core.h
@@ -105,6 +105,7 @@ enum ath11k_hw_rev {
 	ATH11K_HW_IPQ8074,
 	ATH11K_HW_QCA6390_HW20,
 	ATH11K_HW_IPQ6018_HW10,
+	ATH11K_HW_QCN9074_HW10,
 };
 
 enum ath11k_firmware_mode {
diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c
index 09858e516903..626764da4d6f 100644
--- a/drivers/net/wireless/ath/ath11k/mhi.c
+++ b/drivers/net/wireless/ath/ath11k/mhi.c
@@ -7,10 +7,11 @@
 #include "core.h"
 #include "debug.h"
 #include "mhi.h"
+#include "pci.h"
 
 #define MHI_TIMEOUT_DEFAULT_MS	90000
 
-static struct mhi_channel_config ath11k_mhi_channels[] = {
+static struct mhi_channel_config ath11k_mhi_channels_qca6390[] = {
 	{
 		.num = 0,
 		.name = "LOOPBACK",
@@ -69,7 +70,7 @@ static struct mhi_channel_config ath11k_mhi_channels[] = {
 	},
 };
 
-static struct mhi_event_config ath11k_mhi_events[] = {
+static struct mhi_event_config ath11k_mhi_events_qca6390[] = {
 	{
 		.num_elements = 32,
 		.irq_moderation_ms = 0,
@@ -92,15 +93,108 @@ static struct mhi_event_config ath11k_mhi_events[] = {
 	},
 };
 
-static struct mhi_controller_config ath11k_mhi_config = {
+static struct mhi_controller_config ath11k_mhi_config_qca6390 = {
 	.max_channels = 128,
 	.timeout_ms = 2000,
 	.use_bounce_buf = false,
 	.buf_len = 0,
-	.num_channels = ARRAY_SIZE(ath11k_mhi_channels),
-	.ch_cfg = ath11k_mhi_channels,
-	.num_events = ARRAY_SIZE(ath11k_mhi_events),
-	.event_cfg = ath11k_mhi_events,
+	.num_channels = ARRAY_SIZE(ath11k_mhi_channels_qca6390),
+	.ch_cfg = ath11k_mhi_channels_qca6390,
+	.num_events = ARRAY_SIZE(ath11k_mhi_events_qca6390),
+	.event_cfg = ath11k_mhi_events_qca6390,
+};
+
+static struct mhi_channel_config ath11k_mhi_channels_qcn9074[] = {
+	{
+		.num = 0,
+		.name = "LOOPBACK",
+		.num_elements = 32,
+		.event_ring = 1,
+		.dir = DMA_TO_DEVICE,
+		.ee_mask = 0x14,
+		.pollcfg = 0,
+		.doorbell = MHI_DB_BRST_DISABLE,
+		.lpm_notify = false,
+		.offload_channel = false,
+		.doorbell_mode_switch = false,
+		.auto_queue = false,
+	},
+	{
+		.num = 1,
+		.name = "LOOPBACK",
+		.num_elements = 32,
+		.event_ring = 1,
+		.dir = DMA_FROM_DEVICE,
+		.ee_mask = 0x14,
+		.pollcfg = 0,
+		.doorbell = MHI_DB_BRST_DISABLE,
+		.lpm_notify = false,
+		.offload_channel = false,
+		.doorbell_mode_switch = false,
+		.auto_queue = false,
+	},
+	{
+		.num = 20,
+		.name = "IPCR",
+		.num_elements = 32,
+		.event_ring = 1,
+		.dir = DMA_TO_DEVICE,
+		.ee_mask = 0x14,
+		.pollcfg = 0,
+		.doorbell = MHI_DB_BRST_DISABLE,
+		.lpm_notify = false,
+		.offload_channel = false,
+		.doorbell_mode_switch = false,
+		.auto_queue = false,
+	},
+	{
+		.num = 21,
+		.name = "IPCR",
+		.num_elements = 32,
+		.event_ring = 1,
+		.dir = DMA_FROM_DEVICE,
+		.ee_mask = 0x14,
+		.pollcfg = 0,
+		.doorbell = MHI_DB_BRST_DISABLE,
+		.lpm_notify = false,
+		.offload_channel = false,
+		.doorbell_mode_switch = false,
+		.auto_queue = true,
+	},
+};
+
+static struct mhi_event_config ath11k_mhi_events_qcn9074[] = {
+	{
+		.num_elements = 32,
+		.irq_moderation_ms = 0,
+		.irq = 1,
+		.data_type = MHI_ER_CTRL,
+		.mode = MHI_DB_BRST_DISABLE,
+		.hardware_event = false,
+		.client_managed = false,
+		.offload_channel = false,
+	},
+	{
+		.num_elements = 256,
+		.irq_moderation_ms = 1,
+		.irq = 2,
+		.mode = MHI_DB_BRST_DISABLE,
+		.priority = 1,
+		.hardware_event = false,
+		.client_managed = false,
+		.offload_channel = false,
+	},
+};
+
+static struct mhi_controller_config ath11k_mhi_config_qcn9074 = {
+	.max_channels = 30,
+	.timeout_ms = 10000,
+	.use_bounce_buf = false,
+	.buf_len = 0,
+	.num_channels = ARRAY_SIZE(ath11k_mhi_channels_qcn9074),
+	.ch_cfg = ath11k_mhi_channels_qcn9074,
+	.num_events = ARRAY_SIZE(ath11k_mhi_events_qcn9074),
+	.event_cfg = ath11k_mhi_events_qcn9074,
 };
 
 void ath11k_mhi_set_mhictrl_reset(struct ath11k_base *ab)
@@ -221,6 +315,7 @@ int ath11k_mhi_register(struct ath11k_pci *ab_pci)
 {
 	struct ath11k_base *ab = ab_pci->ab;
 	struct mhi_controller *mhi_ctrl;
+	struct mhi_controller_config *ath11k_mhi_config;
 	int ret;
 
 	mhi_ctrl = mhi_alloc_controller();
@@ -254,7 +349,12 @@ int ath11k_mhi_register(struct ath11k_pci *ab_pci)
 	mhi_ctrl->read_reg = ath11k_mhi_op_read_reg;
 	mhi_ctrl->write_reg = ath11k_mhi_op_write_reg;
 
-	ret = mhi_register_controller(mhi_ctrl, &ath11k_mhi_config);
+	if (ab->hw_rev == ATH11K_HW_QCA6390_HW20)
+		ath11k_mhi_config = &ath11k_mhi_config_qca6390;
+	else if (ab->hw_rev == ATH11K_HW_QCN9074_HW10)
+		ath11k_mhi_config = &ath11k_mhi_config_qcn9074;
+
+	ret = mhi_register_controller(mhi_ctrl, ath11k_mhi_config);
 	if (ret) {
 		ath11k_err(ab, "failed to register to mhi bus, err = %d\n", ret);
 		mhi_free_controller(mhi_ctrl);
diff --git a/drivers/net/wireless/ath/ath11k/qmi.h b/drivers/net/wireless/ath/ath11k/qmi.h
index a7d01c295e77..3d5930330703 100644
--- a/drivers/net/wireless/ath/ath11k/qmi.h
+++ b/drivers/net/wireless/ath/ath11k/qmi.h
@@ -21,6 +21,7 @@
 #define ATH11K_QMI_WLFW_SERVICE_INS_ID_V01	0x02
 #define ATH11K_QMI_WLFW_SERVICE_INS_ID_V01_QCA6390	0x01
 #define ATH11K_QMI_WLFW_SERVICE_INS_ID_V01_IPQ8074	0x02
+#define ATH11K_QMI_WLFW_SERVICE_INS_ID_V01_QCN9074	0x07
 #define ATH11K_QMI_WLANFW_MAX_TIMESTAMP_LEN_V01	32
 #define ATH11K_QMI_RESP_LEN_MAX			8192
 #define ATH11K_QMI_WLANFW_MAX_NUM_MEM_SEG_V01	52
-- 
cgit v1.2.3


From 480a73610c95511e42fb7d0359b523f66883e51a Mon Sep 17 00:00:00 2001
From: Karthikeyan Periyasamy <periyasa@codeaurora.org>
Date: Tue, 16 Feb 2021 09:16:17 +0200
Subject: ath11k: add static window support for register access

Three window slots can be configure. First window slot
dedicate for dynamic selection and remaining two slots
dedicate for static selection. To optimise the window
selection, frequent registers (UMAC, CE) are configure
in static window slot. so that we minimise the window
selection. Other registers are configure in dynamic window
slot. Get the window start address from the respective
offset and access the read/write register.

Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.4.0.1.r2-00012-QCAHKSWPL_SILICONZ-1

Signed-off-by: Karthikeyan Periyasamy <periyasa@codeaurora.org>
Signed-off-by: Anilkumar Kolli <akolli@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1612946530-28504-7-git-send-email-akolli@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/core.h |  1 +
 drivers/net/wireless/ath/ath11k/hal.h  |  4 ++
 drivers/net/wireless/ath/ath11k/pci.c  | 72 +++++++++++++++++++++++++++++-----
 3 files changed, 68 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h
index 9a8fb23577b0..912294f7657c 100644
--- a/drivers/net/wireless/ath/ath11k/core.h
+++ b/drivers/net/wireless/ath/ath11k/core.h
@@ -608,6 +608,7 @@ struct ath11k_bus_params {
 	bool m3_fw_support;
 	bool fixed_bdf_addr;
 	bool fixed_mem_region;
+	bool static_window_map;
 };
 
 /* IPQ8074 HW channel counters frequency value in hertz */
diff --git a/drivers/net/wireless/ath/ath11k/hal.h b/drivers/net/wireless/ath/ath11k/hal.h
index 1f1b29cd0aa3..3f5687ebe1fc 100644
--- a/drivers/net/wireless/ath/ath11k/hal.h
+++ b/drivers/net/wireless/ath/ath11k/hal.h
@@ -39,6 +39,7 @@ struct ath11k_base;
 #define HAL_SHADOW_REG(x) (HAL_SHADOW_BASE_ADDR + (4 * (x)))
 
 /* WCSS Relative address */
+#define HAL_SEQ_WCSS_UMAC_OFFSET		0x00a00000
 #define HAL_SEQ_WCSS_UMAC_REO_REG		0x00a38000
 #define HAL_SEQ_WCSS_UMAC_TCL_REG		0x00a44000
 #define HAL_SEQ_WCSS_UMAC_CE0_SRC_REG		0x00a00000
@@ -47,6 +48,9 @@ struct ath11k_base;
 #define HAL_SEQ_WCSS_UMAC_CE1_DST_REG		0x00a03000
 #define HAL_SEQ_WCSS_UMAC_WBM_REG		0x00a34000
 
+#define HAL_CE_WFSS_CE_REG_BASE			0x01b80000
+#define HAL_WLAON_REG_BASE			0x01f80000
+
 /* SW2TCL(x) R0 ring configuration address */
 #define HAL_TCL1_RING_CMN_CTRL_REG		0x00000014
 #define HAL_TCL1_RING_DSCP_TID_MAP		0x0000002c
diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c
index ea7715484de0..88d0e5be17f5 100644
--- a/drivers/net/wireless/ath/ath11k/pci.c
+++ b/drivers/net/wireless/ath/ath11k/pci.c
@@ -133,9 +133,38 @@ static inline void ath11k_pci_select_window(struct ath11k_pci *ab_pci, u32 offse
 	}
 }
 
+static inline void ath11k_pci_select_static_window(struct ath11k_pci *ab_pci)
+{
+	u32 umac_window = FIELD_GET(WINDOW_VALUE_MASK, HAL_SEQ_WCSS_UMAC_OFFSET);
+	u32 ce_window = FIELD_GET(WINDOW_VALUE_MASK, HAL_CE_WFSS_CE_REG_BASE);
+	u32 window;
+
+	window = (umac_window << 12) | (ce_window << 6);
+
+	iowrite32(WINDOW_ENABLE_BIT | window, ab_pci->ab->mem + WINDOW_REG_ADDRESS);
+}
+
+static inline u32 ath11k_pci_get_window_start(struct ath11k_base *ab,
+					      u32 offset)
+{
+	u32 window_start;
+
+	/* If offset lies within DP register range, use 3rd window */
+	if ((offset ^ HAL_SEQ_WCSS_UMAC_OFFSET) < WINDOW_RANGE_MASK)
+		window_start = 3 * WINDOW_START;
+	/* If offset lies within CE register range, use 2nd window */
+	else if ((offset ^ HAL_CE_WFSS_CE_REG_BASE) < WINDOW_RANGE_MASK)
+		window_start = 2 * WINDOW_START;
+	else
+		window_start = WINDOW_START;
+
+	return window_start;
+}
+
 void ath11k_pci_write32(struct ath11k_base *ab, u32 offset, u32 value)
 {
 	struct ath11k_pci *ab_pci = ath11k_pci_priv(ab);
+	u32 window_start;
 
 	/* for offset beyond BAR + 4K - 32, may
 	 * need to wakeup MHI to access.
@@ -147,10 +176,21 @@ void ath11k_pci_write32(struct ath11k_base *ab, u32 offset, u32 value)
 	if (offset < WINDOW_START) {
 		iowrite32(value, ab->mem  + offset);
 	} else {
-		spin_lock_bh(&ab_pci->window_lock);
-		ath11k_pci_select_window(ab_pci, offset);
-		iowrite32(value, ab->mem + WINDOW_START + (offset & WINDOW_RANGE_MASK));
-		spin_unlock_bh(&ab_pci->window_lock);
+		if (ab->bus_params.static_window_map)
+			window_start = ath11k_pci_get_window_start(ab, offset);
+		else
+			window_start = WINDOW_START;
+
+		if (window_start == WINDOW_START) {
+			spin_lock_bh(&ab_pci->window_lock);
+			ath11k_pci_select_window(ab_pci, offset);
+			iowrite32(value, ab->mem + window_start +
+				  (offset & WINDOW_RANGE_MASK));
+			spin_unlock_bh(&ab_pci->window_lock);
+		} else {
+			iowrite32(value, ab->mem + window_start +
+				  (offset & WINDOW_RANGE_MASK));
+		}
 	}
 
 	if (test_bit(ATH11K_PCI_FLAG_INIT_DONE, &ab_pci->flags) &&
@@ -161,7 +201,7 @@ void ath11k_pci_write32(struct ath11k_base *ab, u32 offset, u32 value)
 u32 ath11k_pci_read32(struct ath11k_base *ab, u32 offset)
 {
 	struct ath11k_pci *ab_pci = ath11k_pci_priv(ab);
-	u32 val;
+	u32 val, window_start;
 
 	/* for offset beyond BAR + 4K - 32, may
 	 * need to wakeup MHI to access.
@@ -173,10 +213,21 @@ u32 ath11k_pci_read32(struct ath11k_base *ab, u32 offset)
 	if (offset < WINDOW_START) {
 		val = ioread32(ab->mem + offset);
 	} else {
-		spin_lock_bh(&ab_pci->window_lock);
-		ath11k_pci_select_window(ab_pci, offset);
-		val = ioread32(ab->mem + WINDOW_START + (offset & WINDOW_RANGE_MASK));
-		spin_unlock_bh(&ab_pci->window_lock);
+		if (ab->bus_params.static_window_map)
+			window_start = ath11k_pci_get_window_start(ab, offset);
+		else
+			window_start = WINDOW_START;
+
+		if (window_start == WINDOW_START) {
+			spin_lock_bh(&ab_pci->window_lock);
+			ath11k_pci_select_window(ab_pci, offset);
+			val = ioread32(ab->mem + window_start +
+				       (offset & WINDOW_RANGE_MASK));
+			spin_unlock_bh(&ab_pci->window_lock);
+		} else {
+			val = ioread32(ab->mem + window_start +
+				       (offset & WINDOW_RANGE_MASK));
+		}
 	}
 
 	if (test_bit(ATH11K_PCI_FLAG_INIT_DONE, &ab_pci->flags) &&
@@ -936,6 +987,9 @@ static int ath11k_pci_power_up(struct ath11k_base *ab)
 		return ret;
 	}
 
+	if (ab->bus_params.static_window_map)
+		ath11k_pci_select_static_window(ab_pci);
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 6fe6f68fef7f7d5f6b5b62fde78de91cdc528c58 Mon Sep 17 00:00:00 2001
From: Karthikeyan Periyasamy <periyasa@codeaurora.org>
Date: Tue, 16 Feb 2021 09:16:22 +0200
Subject: ath11k: add hal support for QCN9074

Define the hal ring address and ring meta descriptor mask for
QCN9074. Move the platform specific address to the ath11k_hw_regs.
Define tx_mesh_enable ops in ath11k_hw_ops since its accessing
platform specific TCL descriptor.

Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.4.0.1.r2-00012-QCAHKSWPL_SILICONZ-1

Signed-off-by: Karthikeyan Periyasamy <periyasa@codeaurora.org>
Signed-off-by: Anilkumar Kolli <akolli@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1612946530-28504-8-git-send-email-akolli@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/core.c     |  17 ++++
 drivers/net/wireless/ath/ath11k/dp_tx.c    |   2 +-
 drivers/net/wireless/ath/ath11k/hal.c      |  96 +++++++++-----------
 drivers/net/wireless/ath/ath11k/hal.h      |  27 ++++--
 drivers/net/wireless/ath/ath11k/hal_desc.h |  13 +--
 drivers/net/wireless/ath/ath11k/hal_tx.c   |   3 +
 drivers/net/wireless/ath/ath11k/hal_tx.h   |   1 +
 drivers/net/wireless/ath/ath11k/hw.c       | 137 +++++++++++++++++++++++++++++
 drivers/net/wireless/ath/ath11k/hw.h       |  22 +++++
 drivers/net/wireless/ath/ath11k/pci.c      |  20 ++---
 drivers/net/wireless/ath/ath11k/pci.h      |  20 +++--
 11 files changed, 269 insertions(+), 89 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c
index fa452de49457..2d9a00d96885 100644
--- a/drivers/net/wireless/ath/ath11k/core.c
+++ b/drivers/net/wireless/ath/ath11k/core.c
@@ -158,6 +158,23 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
 		.max_radios = 1,
 		.single_pdev_only = false,
 		.qmi_service_ins_id = ATH11K_QMI_WLFW_SERVICE_INS_ID_V01_QCN9074,
+		.hw_ops = &qcn9074_ops,
+		.internal_sleep_clock = false,
+		.regs = &qcn9074_regs,
+		.rxdma1_enable = true,
+		.num_rxmda_per_pdev = 1,
+		.rx_mac_buf_ring = false,
+		.vdev_start_delay = false,
+		.htt_peer_map_v2 = true,
+		.tcl_0_only = false,
+		.interface_modes = BIT(NL80211_IFTYPE_STATION) |
+					BIT(NL80211_IFTYPE_AP) |
+					BIT(NL80211_IFTYPE_MESH_POINT),
+		.supports_monitor = true,
+		.supports_shadow_regs = false,
+		.idle_ps = false,
+		.cold_boot_calib = false,
+		.supports_suspend = false,
 	},
 };
 
diff --git a/drivers/net/wireless/ath/ath11k/dp_tx.c b/drivers/net/wireless/ath/ath11k/dp_tx.c
index 1a0b9be9ce6a..f5c277977e08 100644
--- a/drivers/net/wireless/ath/ath11k/dp_tx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_tx.c
@@ -178,7 +178,7 @@ tcl_ring_sel:
 	}
 
 	if (ieee80211_vif_is_mesh(arvif->vif))
-		ti.flags1 |= FIELD_PREP(HAL_TCL_DATA_CMD_INFO2_MESH_ENABLE, 1);
+		ti.enable_mesh = true;
 
 	ti.flags1 |= FIELD_PREP(HAL_TCL_DATA_CMD_INFO2_TID_OVERWRITE, 1);
 
diff --git a/drivers/net/wireless/ath/ath11k/hal.c b/drivers/net/wireless/ath/ath11k/hal.c
index 9904c0eb7587..08e3c72d9237 100644
--- a/drivers/net/wireless/ath/ath11k/hal.c
+++ b/drivers/net/wireless/ath/ath11k/hal.c
@@ -89,17 +89,6 @@ static const struct hal_srng_config hw_srng_config_template[] = {
 		.entry_size = sizeof(struct hal_ce_srng_src_desc) >> 2,
 		.lmac_ring = false,
 		.ring_dir = HAL_SRNG_DIR_SRC,
-		.reg_start = {
-			(HAL_SEQ_WCSS_UMAC_CE0_SRC_REG +
-			 HAL_CE_DST_RING_BASE_LSB),
-			HAL_SEQ_WCSS_UMAC_CE0_SRC_REG + HAL_CE_DST_RING_HP,
-		},
-		.reg_size = {
-			(HAL_SEQ_WCSS_UMAC_CE1_SRC_REG -
-			 HAL_SEQ_WCSS_UMAC_CE0_SRC_REG),
-			(HAL_SEQ_WCSS_UMAC_CE1_SRC_REG -
-			 HAL_SEQ_WCSS_UMAC_CE0_SRC_REG),
-		},
 		.max_size = HAL_CE_SRC_RING_BASE_MSB_RING_SIZE,
 	},
 	{ /* CE_DST */
@@ -108,17 +97,6 @@ static const struct hal_srng_config hw_srng_config_template[] = {
 		.entry_size = sizeof(struct hal_ce_srng_dest_desc) >> 2,
 		.lmac_ring = false,
 		.ring_dir = HAL_SRNG_DIR_SRC,
-		.reg_start = {
-			(HAL_SEQ_WCSS_UMAC_CE0_DST_REG +
-			 HAL_CE_DST_RING_BASE_LSB),
-			HAL_SEQ_WCSS_UMAC_CE0_DST_REG + HAL_CE_DST_RING_HP,
-		},
-		.reg_size = {
-			(HAL_SEQ_WCSS_UMAC_CE1_DST_REG -
-			 HAL_SEQ_WCSS_UMAC_CE0_DST_REG),
-			(HAL_SEQ_WCSS_UMAC_CE1_DST_REG -
-			 HAL_SEQ_WCSS_UMAC_CE0_DST_REG),
-		},
 		.max_size = HAL_CE_DST_RING_BASE_MSB_RING_SIZE,
 	},
 	{ /* CE_DST_STATUS */
@@ -127,18 +105,6 @@ static const struct hal_srng_config hw_srng_config_template[] = {
 		.entry_size = sizeof(struct hal_ce_srng_dst_status_desc) >> 2,
 		.lmac_ring = false,
 		.ring_dir = HAL_SRNG_DIR_DST,
-		.reg_start = {
-			(HAL_SEQ_WCSS_UMAC_CE0_DST_REG +
-			 HAL_CE_DST_STATUS_RING_BASE_LSB),
-			(HAL_SEQ_WCSS_UMAC_CE0_DST_REG +
-			 HAL_CE_DST_STATUS_RING_HP),
-		},
-		.reg_size = {
-			(HAL_SEQ_WCSS_UMAC_CE1_DST_REG -
-			 HAL_SEQ_WCSS_UMAC_CE0_DST_REG),
-			(HAL_SEQ_WCSS_UMAC_CE1_DST_REG -
-			 HAL_SEQ_WCSS_UMAC_CE0_DST_REG),
-		},
 		.max_size = HAL_CE_DST_STATUS_RING_BASE_MSB_RING_SIZE,
 	},
 	{ /* WBM_IDLE_LINK */
@@ -147,11 +113,6 @@ static const struct hal_srng_config hw_srng_config_template[] = {
 		.entry_size = sizeof(struct hal_wbm_link_desc) >> 2,
 		.lmac_ring = false,
 		.ring_dir = HAL_SRNG_DIR_SRC,
-		.reg_start = {
-			(HAL_SEQ_WCSS_UMAC_WBM_REG +
-			 HAL_WBM_IDLE_LINK_RING_BASE_LSB),
-			(HAL_SEQ_WCSS_UMAC_WBM_REG + HAL_WBM_IDLE_LINK_RING_HP),
-		},
 		.max_size = HAL_WBM_IDLE_LINK_RING_BASE_MSB_RING_SIZE,
 	},
 	{ /* SW2WBM_RELEASE */
@@ -160,11 +121,6 @@ static const struct hal_srng_config hw_srng_config_template[] = {
 		.entry_size = sizeof(struct hal_wbm_release_ring) >> 2,
 		.lmac_ring = false,
 		.ring_dir = HAL_SRNG_DIR_SRC,
-		.reg_start = {
-			(HAL_SEQ_WCSS_UMAC_WBM_REG +
-			 HAL_WBM_RELEASE_RING_BASE_LSB),
-			(HAL_SEQ_WCSS_UMAC_WBM_REG + HAL_WBM_RELEASE_RING_HP),
-		},
 		.max_size = HAL_SW2WBM_RELEASE_RING_BASE_MSB_RING_SIZE,
 	},
 	{ /* WBM2SW_RELEASE */
@@ -173,16 +129,6 @@ static const struct hal_srng_config hw_srng_config_template[] = {
 		.entry_size = sizeof(struct hal_wbm_release_ring) >> 2,
 		.lmac_ring = false,
 		.ring_dir = HAL_SRNG_DIR_DST,
-		.reg_start = {
-			(HAL_SEQ_WCSS_UMAC_WBM_REG +
-			 HAL_WBM0_RELEASE_RING_BASE_LSB),
-			(HAL_SEQ_WCSS_UMAC_WBM_REG + HAL_WBM0_RELEASE_RING_HP),
-		},
-		.reg_size = {
-			(HAL_WBM1_RELEASE_RING_BASE_LSB -
-			 HAL_WBM0_RELEASE_RING_BASE_LSB),
-			(HAL_WBM1_RELEASE_RING_HP - HAL_WBM0_RELEASE_RING_HP),
-		},
 		.max_size = HAL_WBM2SW_RELEASE_RING_BASE_MSB_RING_SIZE,
 	},
 	{ /* RXDMA_BUF */
@@ -955,7 +901,7 @@ void ath11k_hal_setup_link_idle_list(struct ath11k_base *ab,
 	/* Enable the SRNG */
 	ath11k_hif_write32(ab,
 			   HAL_SEQ_WCSS_UMAC_WBM_REG +
-			   HAL_WBM_IDLE_LINK_RING_MISC_ADDR, 0x40);
+			   HAL_WBM_IDLE_LINK_RING_MISC_ADDR(ab), 0x40);
 }
 
 int ath11k_hal_srng_setup(struct ath11k_base *ab, enum hal_ring_type type,
@@ -1234,6 +1180,46 @@ static int ath11k_hal_srng_create_config(struct ath11k_base *ab)
 	s->reg_start[0] = HAL_SEQ_WCSS_UMAC_TCL_REG + HAL_TCL_STATUS_RING_BASE_LSB(ab);
 	s->reg_start[1] = HAL_SEQ_WCSS_UMAC_TCL_REG + HAL_TCL_STATUS_RING_HP;
 
+	s = &hal->srng_config[HAL_CE_SRC];
+	s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab) + HAL_CE_DST_RING_BASE_LSB;
+	s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab) + HAL_CE_DST_RING_HP;
+	s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_SRC_REG(ab) -
+		HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab);
+	s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_SRC_REG(ab) -
+		HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab);
+
+	s = &hal->srng_config[HAL_CE_DST];
+	s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) + HAL_CE_DST_RING_BASE_LSB;
+	s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) + HAL_CE_DST_RING_HP;
+	s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) -
+		HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab);
+	s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) -
+		HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab);
+
+	s = &hal->srng_config[HAL_CE_DST_STATUS];
+	s->reg_start[0] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) +
+		HAL_CE_DST_STATUS_RING_BASE_LSB;
+	s->reg_start[1] = HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab) + HAL_CE_DST_STATUS_RING_HP;
+	s->reg_size[0] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) -
+		HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab);
+	s->reg_size[1] = HAL_SEQ_WCSS_UMAC_CE1_DST_REG(ab) -
+		HAL_SEQ_WCSS_UMAC_CE0_DST_REG(ab);
+
+	s = &hal->srng_config[HAL_WBM_IDLE_LINK];
+	s->reg_start[0] = HAL_SEQ_WCSS_UMAC_WBM_REG + HAL_WBM_IDLE_LINK_RING_BASE_LSB(ab);
+	s->reg_start[1] = HAL_SEQ_WCSS_UMAC_WBM_REG + HAL_WBM_IDLE_LINK_RING_HP;
+
+	s = &hal->srng_config[HAL_SW2WBM_RELEASE];
+	s->reg_start[0] = HAL_SEQ_WCSS_UMAC_WBM_REG + HAL_WBM_RELEASE_RING_BASE_LSB(ab);
+	s->reg_start[1] = HAL_SEQ_WCSS_UMAC_WBM_REG + HAL_WBM_RELEASE_RING_HP;
+
+	s = &hal->srng_config[HAL_WBM2SW_RELEASE];
+	s->reg_start[0] = HAL_SEQ_WCSS_UMAC_WBM_REG + HAL_WBM0_RELEASE_RING_BASE_LSB(ab);
+	s->reg_start[1] = HAL_SEQ_WCSS_UMAC_WBM_REG + HAL_WBM0_RELEASE_RING_HP;
+	s->reg_size[0] = HAL_WBM1_RELEASE_RING_BASE_LSB(ab) -
+		HAL_WBM0_RELEASE_RING_BASE_LSB(ab);
+	s->reg_size[1] = HAL_WBM1_RELEASE_RING_HP - HAL_WBM0_RELEASE_RING_HP;
+
 	return 0;
 }
 
diff --git a/drivers/net/wireless/ath/ath11k/hal.h b/drivers/net/wireless/ath/ath11k/hal.h
index 3f5687ebe1fc..082d31f0c9ec 100644
--- a/drivers/net/wireless/ath/ath11k/hal.h
+++ b/drivers/net/wireless/ath/ath11k/hal.h
@@ -42,10 +42,14 @@ struct ath11k_base;
 #define HAL_SEQ_WCSS_UMAC_OFFSET		0x00a00000
 #define HAL_SEQ_WCSS_UMAC_REO_REG		0x00a38000
 #define HAL_SEQ_WCSS_UMAC_TCL_REG		0x00a44000
-#define HAL_SEQ_WCSS_UMAC_CE0_SRC_REG		0x00a00000
-#define HAL_SEQ_WCSS_UMAC_CE0_DST_REG		0x00a01000
-#define HAL_SEQ_WCSS_UMAC_CE1_SRC_REG		0x00a02000
-#define HAL_SEQ_WCSS_UMAC_CE1_DST_REG		0x00a03000
+#define HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(x) \
+		(ab->hw_params.regs->hal_seq_wcss_umac_ce0_src_reg)
+#define HAL_SEQ_WCSS_UMAC_CE0_DST_REG(x) \
+		(ab->hw_params.regs->hal_seq_wcss_umac_ce0_dst_reg)
+#define HAL_SEQ_WCSS_UMAC_CE1_SRC_REG(x) \
+		(ab->hw_params.regs->hal_seq_wcss_umac_ce1_src_reg)
+#define HAL_SEQ_WCSS_UMAC_CE1_DST_REG(x) \
+		(ab->hw_params.regs->hal_seq_wcss_umac_ce1_dst_reg)
 #define HAL_SEQ_WCSS_UMAC_WBM_REG		0x00a34000
 
 #define HAL_CE_WFSS_CE_REG_BASE			0x01b80000
@@ -201,8 +205,10 @@ struct ath11k_base;
 #define HAL_REO_STATUS_HP(ab)			ab->hw_params.regs->hal_reo_status_hp
 
 /* WBM Idle R0 address */
-#define HAL_WBM_IDLE_LINK_RING_BASE_LSB		0x00000860
-#define HAL_WBM_IDLE_LINK_RING_MISC_ADDR	0x00000870
+#define HAL_WBM_IDLE_LINK_RING_BASE_LSB(x) \
+		(ab->hw_params.regs->hal_wbm_idle_link_ring_base_lsb)
+#define HAL_WBM_IDLE_LINK_RING_MISC_ADDR(x) \
+		(ab->hw_params.regs->hal_wbm_idle_link_ring_misc)
 #define HAL_WBM_R0_IDLE_LIST_CONTROL_ADDR	0x00000048
 #define HAL_WBM_R0_IDLE_LIST_SIZE_ADDR		0x0000004c
 #define HAL_WBM_SCATTERED_RING_BASE_LSB		0x00000058
@@ -217,14 +223,17 @@ struct ath11k_base;
 #define HAL_WBM_IDLE_LINK_RING_HP		0x000030b0
 
 /* SW2WBM R0 release address */
-#define HAL_WBM_RELEASE_RING_BASE_LSB		0x000001d8
+#define HAL_WBM_RELEASE_RING_BASE_LSB(x) \
+		(ab->hw_params.regs->hal_wbm_release_ring_base_lsb)
 
 /* SW2WBM R2 release address */
 #define HAL_WBM_RELEASE_RING_HP			0x00003018
 
 /* WBM2SW R0 release address */
-#define HAL_WBM0_RELEASE_RING_BASE_LSB		0x00000910
-#define HAL_WBM1_RELEASE_RING_BASE_LSB		0x00000968
+#define HAL_WBM0_RELEASE_RING_BASE_LSB(x) \
+		(ab->hw_params.regs->hal_wbm0_release_ring_base_lsb)
+#define HAL_WBM1_RELEASE_RING_BASE_LSB(x) \
+		(ab->hw_params.regs->hal_wbm1_release_ring_base_lsb)
 
 /* WBM2SW R2 release address */
 #define HAL_WBM0_RELEASE_RING_HP		0x000030c0
diff --git a/drivers/net/wireless/ath/ath11k/hal_desc.h b/drivers/net/wireless/ath/ath11k/hal_desc.h
index 1b713cb13b90..d54ec6aa6281 100644
--- a/drivers/net/wireless/ath/ath11k/hal_desc.h
+++ b/drivers/net/wireless/ath/ath11k/hal_desc.h
@@ -949,16 +949,17 @@ struct hal_reo_flush_cache {
 #define HAL_TCL_DATA_CMD_INFO1_TO_FW		BIT(21)
 #define HAL_TCL_DATA_CMD_INFO1_PKT_OFFSET	GENMASK(31, 23)
 
-#define HAL_TCL_DATA_CMD_INFO2_BUF_TIMESTAMP	GENMASK(18, 0)
-#define HAL_TCL_DATA_CMD_INFO2_BUF_T_VALID	BIT(19)
-#define HAL_TCL_DATA_CMD_INFO2_MESH_ENABLE	BIT(20)
-#define HAL_TCL_DATA_CMD_INFO2_TID_OVERWRITE	BIT(21)
-#define HAL_TCL_DATA_CMD_INFO2_TID		GENMASK(25, 22)
-#define HAL_TCL_DATA_CMD_INFO2_LMAC_ID		GENMASK(27, 26)
+#define HAL_TCL_DATA_CMD_INFO2_BUF_TIMESTAMP		GENMASK(18, 0)
+#define HAL_TCL_DATA_CMD_INFO2_BUF_T_VALID		BIT(19)
+#define HAL_IPQ8074_TCL_DATA_CMD_INFO2_MESH_ENABLE	BIT(20)
+#define HAL_TCL_DATA_CMD_INFO2_TID_OVERWRITE		BIT(21)
+#define HAL_TCL_DATA_CMD_INFO2_TID			GENMASK(25, 22)
+#define HAL_TCL_DATA_CMD_INFO2_LMAC_ID			GENMASK(27, 26)
 
 #define HAL_TCL_DATA_CMD_INFO3_DSCP_TID_TABLE_IDX	GENMASK(5, 0)
 #define HAL_TCL_DATA_CMD_INFO3_SEARCH_INDEX		GENMASK(25, 6)
 #define HAL_TCL_DATA_CMD_INFO3_CACHE_SET_NUM		GENMASK(29, 26)
+#define HAL_QCN9074_TCL_DATA_CMD_INFO3_MESH_ENABLE	GENMASK(31, 30)
 
 #define HAL_TCL_DATA_CMD_INFO4_RING_ID			GENMASK(27, 20)
 #define HAL_TCL_DATA_CMD_INFO4_LOOPING_COUNT		GENMASK(31, 28)
diff --git a/drivers/net/wireless/ath/ath11k/hal_tx.c b/drivers/net/wireless/ath/ath11k/hal_tx.c
index 569e790d83a1..86ed450f8238 100644
--- a/drivers/net/wireless/ath/ath11k/hal_tx.c
+++ b/drivers/net/wireless/ath/ath11k/hal_tx.c
@@ -75,6 +75,9 @@ void ath11k_hal_tx_cmd_desc_setup(struct ath11k_base *ab, void *cmd,
 			 FIELD_PREP(HAL_TCL_DATA_CMD_INFO3_CACHE_SET_NUM,
 				    ti->bss_ast_hash);
 	tcl_cmd->info4 = 0;
+
+	if (ti->enable_mesh && ab->hw_params.hw_ops->tx_mesh_enable)
+		ab->hw_params.hw_ops->tx_mesh_enable(ab, tcl_cmd);
 }
 
 void ath11k_hal_tx_set_dscp_tid_map(struct ath11k_base *ab, int id)
diff --git a/drivers/net/wireless/ath/ath11k/hal_tx.h b/drivers/net/wireless/ath/ath11k/hal_tx.h
index c291e59c3ca6..36f4f6f6cbc2 100644
--- a/drivers/net/wireless/ath/ath11k/hal_tx.h
+++ b/drivers/net/wireless/ath/ath11k/hal_tx.h
@@ -34,6 +34,7 @@ struct hal_tx_info {
 	u8 search_type; /* %HAL_TX_ADDR_SEARCH_ */
 	u8 lmac_id;
 	u8 dscp_tid_tbl_idx;
+	bool enable_mesh;
 };
 
 /* TODO: Check if the actual desc macros can be used instead */
diff --git a/drivers/net/wireless/ath/ath11k/hw.c b/drivers/net/wireless/ath/ath11k/hw.c
index 66331da35012..626739b54989 100644
--- a/drivers/net/wireless/ath/ath11k/hw.c
+++ b/drivers/net/wireless/ath/ath11k/hw.c
@@ -31,6 +31,20 @@ static u8 ath11k_hw_ipq6018_mac_from_pdev_id(int pdev_idx)
 	return pdev_idx;
 }
 
+static void ath11k_hw_ipq8074_tx_mesh_enable(struct ath11k_base *ab,
+					     struct hal_tcl_data_cmd *tcl_cmd)
+{
+	tcl_cmd->info2 |= FIELD_PREP(HAL_IPQ8074_TCL_DATA_CMD_INFO2_MESH_ENABLE,
+				     true);
+}
+
+static void ath11k_hw_qcn9074_tx_mesh_enable(struct ath11k_base *ab,
+					     struct hal_tcl_data_cmd *tcl_cmd)
+{
+	tcl_cmd->info3 |= FIELD_PREP(HAL_QCN9074_TCL_DATA_CMD_INFO3_MESH_ENABLE,
+				     true);
+}
+
 static void ath11k_init_wmi_config_qca6390(struct ath11k_base *ab,
 					   struct target_resource_config *config)
 {
@@ -160,6 +174,7 @@ const struct ath11k_hw_ops ipq8074_ops = {
 	.wmi_init_config = ath11k_init_wmi_config_ipq8074,
 	.mac_id_to_pdev_id = ath11k_hw_mac_id_to_pdev_id_ipq8074,
 	.mac_id_to_srng_id = ath11k_hw_mac_id_to_srng_id_ipq8074,
+	.tx_mesh_enable = ath11k_hw_ipq8074_tx_mesh_enable,
 };
 
 const struct ath11k_hw_ops ipq6018_ops = {
@@ -167,6 +182,7 @@ const struct ath11k_hw_ops ipq6018_ops = {
 	.wmi_init_config = ath11k_init_wmi_config_ipq8074,
 	.mac_id_to_pdev_id = ath11k_hw_mac_id_to_pdev_id_ipq8074,
 	.mac_id_to_srng_id = ath11k_hw_mac_id_to_srng_id_ipq8074,
+	.tx_mesh_enable = ath11k_hw_ipq8074_tx_mesh_enable,
 };
 
 const struct ath11k_hw_ops qca6390_ops = {
@@ -174,6 +190,15 @@ const struct ath11k_hw_ops qca6390_ops = {
 	.wmi_init_config = ath11k_init_wmi_config_qca6390,
 	.mac_id_to_pdev_id = ath11k_hw_mac_id_to_pdev_id_qca6390,
 	.mac_id_to_srng_id = ath11k_hw_mac_id_to_srng_id_qca6390,
+	.tx_mesh_enable = ath11k_hw_ipq8074_tx_mesh_enable,
+};
+
+const struct ath11k_hw_ops qcn9074_ops = {
+	.get_hw_mac_from_pdev_id = ath11k_hw_ipq6018_mac_from_pdev_id,
+	.wmi_init_config = ath11k_init_wmi_config_ipq8074,
+	.mac_id_to_pdev_id = ath11k_hw_mac_id_to_pdev_id_ipq8074,
+	.mac_id_to_srng_id = ath11k_hw_mac_id_to_srng_id_ipq8074,
+	.tx_mesh_enable = ath11k_hw_qcn9074_tx_mesh_enable,
 };
 
 #define ATH11K_TX_RING_MASK_0 0x1
@@ -841,6 +866,26 @@ const struct ath11k_hw_regs ipq8074_regs = {
 	.hal_reo_status_ring_base_lsb = 0x00000504,
 	.hal_reo_status_hp = 0x00003070,
 
+	/* WCSS relative address */
+	.hal_seq_wcss_umac_ce0_src_reg = 0x00a00000,
+	.hal_seq_wcss_umac_ce0_dst_reg = 0x00a01000,
+	.hal_seq_wcss_umac_ce1_src_reg = 0x00a02000,
+	.hal_seq_wcss_umac_ce1_dst_reg = 0x00a03000,
+
+	/* WBM Idle address */
+	.hal_wbm_idle_link_ring_base_lsb = 0x00000860,
+	.hal_wbm_idle_link_ring_misc = 0x00000870,
+
+	/* SW2WBM release address */
+	.hal_wbm_release_ring_base_lsb = 0x000001d8,
+
+	/* WBM2SW release address */
+	.hal_wbm0_release_ring_base_lsb = 0x00000910,
+	.hal_wbm1_release_ring_base_lsb = 0x00000968,
+
+	/* PCIe base address */
+	.pcie_qserdes_sysclk_en_sel = 0x0,
+	.pcie_pcs_osc_dtct_config_base = 0x0,
 };
 
 const struct ath11k_hw_regs qca6390_regs = {
@@ -891,4 +936,96 @@ const struct ath11k_hw_regs qca6390_regs = {
 	/* REO status address */
 	.hal_reo_status_ring_base_lsb = 0x000004ac,
 	.hal_reo_status_hp = 0x00003068,
+
+	/* WCSS relative address */
+	.hal_seq_wcss_umac_ce0_src_reg = 0x00a00000,
+	.hal_seq_wcss_umac_ce0_dst_reg = 0x00a01000,
+	.hal_seq_wcss_umac_ce1_src_reg = 0x00a02000,
+	.hal_seq_wcss_umac_ce1_dst_reg = 0x00a03000,
+
+	/* WBM Idle address */
+	.hal_wbm_idle_link_ring_base_lsb = 0x00000860,
+	.hal_wbm_idle_link_ring_misc = 0x00000870,
+
+	/* SW2WBM release address */
+	.hal_wbm_release_ring_base_lsb = 0x000001d8,
+
+	/* WBM2SW release address */
+	.hal_wbm0_release_ring_base_lsb = 0x00000910,
+	.hal_wbm1_release_ring_base_lsb = 0x00000968,
+
+	/* PCIe base address */
+	.pcie_qserdes_sysclk_en_sel = 0x01e0c0ac,
+	.pcie_pcs_osc_dtct_config_base = 0x01e0c628,
+};
+
+const struct ath11k_hw_regs qcn9074_regs = {
+	/* SW2TCL(x) R0 ring configuration address */
+	.hal_tcl1_ring_base_lsb = 0x000004f0,
+	.hal_tcl1_ring_base_msb = 0x000004f4,
+	.hal_tcl1_ring_id = 0x000004f8,
+	.hal_tcl1_ring_misc = 0x00000500,
+	.hal_tcl1_ring_tp_addr_lsb = 0x0000050c,
+	.hal_tcl1_ring_tp_addr_msb = 0x00000510,
+	.hal_tcl1_ring_consumer_int_setup_ix0 = 0x00000520,
+	.hal_tcl1_ring_consumer_int_setup_ix1 = 0x00000524,
+	.hal_tcl1_ring_msi1_base_lsb = 0x00000538,
+	.hal_tcl1_ring_msi1_base_msb = 0x0000053c,
+	.hal_tcl1_ring_msi1_data = 0x00000540,
+	.hal_tcl2_ring_base_lsb = 0x00000548,
+	.hal_tcl_ring_base_lsb = 0x000005f8,
+
+	/* TCL STATUS ring address */
+	.hal_tcl_status_ring_base_lsb = 0x00000700,
+
+	/* REO2SW(x) R0 ring configuration address */
+	.hal_reo1_ring_base_lsb = 0x0000029c,
+	.hal_reo1_ring_base_msb = 0x000002a0,
+	.hal_reo1_ring_id = 0x000002a4,
+	.hal_reo1_ring_misc = 0x000002ac,
+	.hal_reo1_ring_hp_addr_lsb = 0x000002b0,
+	.hal_reo1_ring_hp_addr_msb = 0x000002b4,
+	.hal_reo1_ring_producer_int_setup = 0x000002c0,
+	.hal_reo1_ring_msi1_base_lsb = 0x000002e4,
+	.hal_reo1_ring_msi1_base_msb = 0x000002e8,
+	.hal_reo1_ring_msi1_data = 0x000002ec,
+	.hal_reo2_ring_base_lsb = 0x000002f4,
+	.hal_reo1_aging_thresh_ix_0 = 0x00000564,
+	.hal_reo1_aging_thresh_ix_1 = 0x00000568,
+	.hal_reo1_aging_thresh_ix_2 = 0x0000056c,
+	.hal_reo1_aging_thresh_ix_3 = 0x00000570,
+
+	/* REO2SW(x) R2 ring pointers (head/tail) address */
+	.hal_reo1_ring_hp = 0x00003038,
+	.hal_reo1_ring_tp = 0x0000303c,
+	.hal_reo2_ring_hp = 0x00003040,
+
+	/* REO2TCL R0 ring configuration address */
+	.hal_reo_tcl_ring_base_lsb = 0x000003fc,
+	.hal_reo_tcl_ring_hp = 0x00003058,
+
+	/* REO status address */
+	.hal_reo_status_ring_base_lsb = 0x00000504,
+	.hal_reo_status_hp = 0x00003070,
+
+	/* WCSS relative address */
+	.hal_seq_wcss_umac_ce0_src_reg = 0x01b80000,
+	.hal_seq_wcss_umac_ce0_dst_reg = 0x01b81000,
+	.hal_seq_wcss_umac_ce1_src_reg = 0x01b82000,
+	.hal_seq_wcss_umac_ce1_dst_reg = 0x01b83000,
+
+	/* WBM Idle address */
+	.hal_wbm_idle_link_ring_base_lsb = 0x00000874,
+	.hal_wbm_idle_link_ring_misc = 0x00000884,
+
+	/* SW2WBM release address */
+	.hal_wbm_release_ring_base_lsb = 0x000001ec,
+
+	/* WBM2SW release address */
+	.hal_wbm0_release_ring_base_lsb = 0x00000924,
+	.hal_wbm1_release_ring_base_lsb = 0x0000097c,
+
+	/* PCIe base address */
+	.pcie_qserdes_sysclk_en_sel = 0x01e0e0a8,
+	.pcie_pcs_osc_dtct_config_base = 0x01e0f45c,
 };
diff --git a/drivers/net/wireless/ath/ath11k/hw.h b/drivers/net/wireless/ath/ath11k/hw.h
index 94c36e6149cf..13f4f9c9814b 100644
--- a/drivers/net/wireless/ath/ath11k/hw.h
+++ b/drivers/net/wireless/ath/ath11k/hw.h
@@ -105,6 +105,8 @@ enum ath11k_bus {
 
 #define ATH11K_EXT_IRQ_GRP_NUM_MAX 11
 
+struct hal_tcl_data_cmd;
+
 struct ath11k_hw_ring_mask {
 	u8 tx[ATH11K_EXT_IRQ_GRP_NUM_MAX];
 	u8 rx_mon_status[ATH11K_EXT_IRQ_GRP_NUM_MAX];
@@ -166,11 +168,14 @@ struct ath11k_hw_ops {
 				struct target_resource_config *config);
 	int (*mac_id_to_pdev_id)(struct ath11k_hw_params *hw, int mac_id);
 	int (*mac_id_to_srng_id)(struct ath11k_hw_params *hw, int mac_id);
+	void (*tx_mesh_enable)(struct ath11k_base *ab,
+			       struct hal_tcl_data_cmd *tcl_cmd);
 };
 
 extern const struct ath11k_hw_ops ipq8074_ops;
 extern const struct ath11k_hw_ops ipq6018_ops;
 extern const struct ath11k_hw_ops qca6390_ops;
+extern const struct ath11k_hw_ops qcn9074_ops;
 
 extern const struct ath11k_hw_ring_mask ath11k_hw_ring_mask_ipq8074;
 extern const struct ath11k_hw_ring_mask ath11k_hw_ring_mask_qca6390;
@@ -262,9 +267,26 @@ struct ath11k_hw_regs {
 
 	u32 hal_reo_status_ring_base_lsb;
 	u32 hal_reo_status_hp;
+
+	u32 hal_seq_wcss_umac_ce0_src_reg;
+	u32 hal_seq_wcss_umac_ce0_dst_reg;
+	u32 hal_seq_wcss_umac_ce1_src_reg;
+	u32 hal_seq_wcss_umac_ce1_dst_reg;
+
+	u32 hal_wbm_idle_link_ring_base_lsb;
+	u32 hal_wbm_idle_link_ring_misc;
+
+	u32 hal_wbm_release_ring_base_lsb;
+
+	u32 hal_wbm0_release_ring_base_lsb;
+	u32 hal_wbm1_release_ring_base_lsb;
+
+	u32 pcie_qserdes_sysclk_en_sel;
+	u32 pcie_pcs_osc_dtct_config_base;
 };
 
 extern const struct ath11k_hw_regs ipq8074_regs;
 extern const struct ath11k_hw_regs qca6390_regs;
+extern const struct ath11k_hw_regs qcn9074_regs;
 
 #endif
diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c
index 88d0e5be17f5..d919e5d36105 100644
--- a/drivers/net/wireless/ath/ath11k/pci.c
+++ b/drivers/net/wireless/ath/ath11k/pci.c
@@ -324,7 +324,7 @@ static int ath11k_pci_fix_l1ss(struct ath11k_base *ab)
 	int ret;
 
 	ret = ath11k_pci_set_link_reg(ab,
-				      PCIE_QSERDES_COM_SYSCLK_EN_SEL_REG,
+				      PCIE_QSERDES_COM_SYSCLK_EN_SEL_REG(ab),
 				      PCIE_QSERDES_COM_SYSCLK_EN_SEL_VAL,
 				      PCIE_QSERDES_COM_SYSCLK_EN_SEL_MSK);
 	if (ret) {
@@ -333,27 +333,27 @@ static int ath11k_pci_fix_l1ss(struct ath11k_base *ab)
 	}
 
 	ret = ath11k_pci_set_link_reg(ab,
-				      PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG1_REG,
-				      PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG1_VAL,
-				      PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG_MSK);
+				      PCIE_PCS_OSC_DTCT_CONFIG1_REG(ab),
+				      PCIE_PCS_OSC_DTCT_CONFIG1_VAL,
+				      PCIE_PCS_OSC_DTCT_CONFIG_MSK);
 	if (ret) {
 		ath11k_warn(ab, "failed to set dtct config1 error: %d\n", ret);
 		return ret;
 	}
 
 	ret = ath11k_pci_set_link_reg(ab,
-				      PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG2_REG,
-				      PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG2_VAL,
-				      PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG_MSK);
+				      PCIE_PCS_OSC_DTCT_CONFIG2_REG(ab),
+				      PCIE_PCS_OSC_DTCT_CONFIG2_VAL,
+				      PCIE_PCS_OSC_DTCT_CONFIG_MSK);
 	if (ret) {
 		ath11k_warn(ab, "failed to set dtct config2: %d\n", ret);
 		return ret;
 	}
 
 	ret = ath11k_pci_set_link_reg(ab,
-				      PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG4_REG,
-				      PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG4_VAL,
-				      PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG_MSK);
+				      PCIE_PCS_OSC_DTCT_CONFIG4_REG(ab),
+				      PCIE_PCS_OSC_DTCT_CONFIG4_VAL,
+				      PCIE_PCS_OSC_DTCT_CONFIG_MSK);
 	if (ret) {
 		ath11k_warn(ab, "failed to set dtct config4: %d\n", ret);
 		return ret;
diff --git a/drivers/net/wireless/ath/ath11k/pci.h b/drivers/net/wireless/ath/ath11k/pci.h
index 92eeb7c30546..f3e645891d19 100644
--- a/drivers/net/wireless/ath/ath11k/pci.h
+++ b/drivers/net/wireless/ath/ath11k/pci.h
@@ -34,16 +34,20 @@
 #define PCIE_SMLH_REQ_RST_LINK_DOWN		0x2
 #define PCIE_INT_CLEAR_ALL			0xffffffff
 
-#define PCIE_QSERDES_COM_SYSCLK_EN_SEL_REG	0x01e0c0ac
+#define PCIE_QSERDES_COM_SYSCLK_EN_SEL_REG(x) \
+		(ab->hw_params.regs->pcie_qserdes_sysclk_en_sel)
 #define PCIE_QSERDES_COM_SYSCLK_EN_SEL_VAL	0x10
 #define PCIE_QSERDES_COM_SYSCLK_EN_SEL_MSK	0xffffffff
-#define PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG1_REG	0x01e0c628
-#define PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG1_VAL	0x02
-#define PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG2_REG	0x01e0c62c
-#define PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG2_VAL	0x52
-#define PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG4_REG	0x01e0c634
-#define PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG4_VAL	0xff
-#define PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG_MSK	0x000000ff
+#define PCIE_PCS_OSC_DTCT_CONFIG1_REG(x) \
+		(ab->hw_params.regs->pcie_pcs_osc_dtct_config_base)
+#define PCIE_PCS_OSC_DTCT_CONFIG1_VAL		0x02
+#define PCIE_PCS_OSC_DTCT_CONFIG2_REG(x) \
+		(ab->hw_params.regs->pcie_pcs_osc_dtct_config_base + 0x4)
+#define PCIE_PCS_OSC_DTCT_CONFIG2_VAL		0x52
+#define PCIE_PCS_OSC_DTCT_CONFIG4_REG(x) \
+		(ab->hw_params.regs->pcie_pcs_osc_dtct_config_base + 0xc)
+#define PCIE_PCS_OSC_DTCT_CONFIG4_VAL		0xff
+#define PCIE_PCS_OSC_DTCT_CONFIG_MSK		0x000000ff
 
 #define WLAON_QFPROM_PWR_CTRL_REG		0x01f8031c
 #define QFPROM_PWR_CTRL_VDD4BLOW_MASK		0x4
-- 
cgit v1.2.3


From e678fbd401b9bdca9d1bd64065abfcc87ae66b94 Mon Sep 17 00:00:00 2001
From: Karthikeyan Periyasamy <periyasa@codeaurora.org>
Date: Tue, 16 Feb 2021 09:16:23 +0200
Subject: ath11k: add data path support for QCN9074

hal rx descriptor is different for QCN9074 target type. since
rx_msdu_end, rx_msdu_start, rx_mpdu_start elements are in
different placement/alignment. In order to have generic data path,
introduce platform specific hal rx descriptor access ops in
ath11k_hw_ops.

Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.4.0.1.r2-00012-QCAHKSWPL_SILICONZ-1

Signed-off-by: Karthikeyan Periyasamy <periyasa@codeaurora.org>
Signed-off-by: Anilkumar Kolli <akolli@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1612946530-28504-9-git-send-email-akolli@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/core.c    |   4 +
 drivers/net/wireless/ath/ath11k/dp_rx.c   | 476 ++++++++++++++++--------------
 drivers/net/wireless/ath/ath11k/hal.h     |   2 -
 drivers/net/wireless/ath/ath11k/hal_tx.c  |   2 +-
 drivers/net/wireless/ath/ath11k/hw.c      | 424 ++++++++++++++++++++++++++
 drivers/net/wireless/ath/ath11k/hw.h      |  29 ++
 drivers/net/wireless/ath/ath11k/pci.c     |   1 +
 drivers/net/wireless/ath/ath11k/rx_desc.h | 212 ++++++++++++-
 8 files changed, 913 insertions(+), 237 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c
index 2d9a00d96885..a17812270c07 100644
--- a/drivers/net/wireless/ath/ath11k/core.c
+++ b/drivers/net/wireless/ath/ath11k/core.c
@@ -69,6 +69,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
 		.idle_ps = false,
 		.cold_boot_calib = true,
 		.supports_suspend = false,
+		.hal_desc_sz = sizeof(struct hal_rx_desc_ipq8074),
 	},
 	{
 		.hw_rev = ATH11K_HW_IPQ6018_HW10,
@@ -108,6 +109,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
 		.idle_ps = false,
 		.cold_boot_calib = true,
 		.supports_suspend = false,
+		.hal_desc_sz = sizeof(struct hal_rx_desc_ipq8074),
 	},
 	{
 		.name = "qca6390 hw2.0",
@@ -146,6 +148,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
 		.idle_ps = true,
 		.cold_boot_calib = false,
 		.supports_suspend = true,
+		.hal_desc_sz = sizeof(struct hal_rx_desc_ipq8074),
 	},
 	{
 		.name = "qcn9074 hw1.0",
@@ -175,6 +178,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
 		.idle_ps = false,
 		.cold_boot_calib = false,
 		.supports_suspend = false,
+		.hal_desc_sz = sizeof(struct hal_rx_desc_qcn9074),
 	},
 };
 
diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c
index 850ad38b888f..1d9aa1bb6b6e 100644
--- a/drivers/net/wireless/ath/ath11k/dp_rx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_rx.c
@@ -20,95 +20,102 @@
 
 #define ATH11K_DP_RX_FRAGMENT_TIMEOUT_MS (2 * HZ)
 
-static u8 *ath11k_dp_rx_h_80211_hdr(struct hal_rx_desc *desc)
+static u8 *ath11k_dp_rx_h_80211_hdr(struct ath11k_base *ab, struct hal_rx_desc *desc)
 {
-	return desc->hdr_status;
+	return ab->hw_params.hw_ops->rx_desc_get_hdr_status(desc);
 }
 
-static enum hal_encrypt_type ath11k_dp_rx_h_mpdu_start_enctype(struct hal_rx_desc *desc)
+static enum hal_encrypt_type ath11k_dp_rx_h_mpdu_start_enctype(struct ath11k_base *ab,
+							       struct hal_rx_desc *desc)
 {
-	if (!(__le32_to_cpu(desc->mpdu_start.info1) &
-	    RX_MPDU_START_INFO1_ENCRYPT_INFO_VALID))
+	if (!ab->hw_params.hw_ops->rx_desc_encrypt_valid(desc))
 		return HAL_ENCRYPT_TYPE_OPEN;
 
-	return FIELD_GET(RX_MPDU_START_INFO2_ENC_TYPE,
-			 __le32_to_cpu(desc->mpdu_start.info2));
+	return ab->hw_params.hw_ops->rx_desc_get_encrypt_type(desc);
 }
 
-static u8 ath11k_dp_rx_h_msdu_start_decap_type(struct hal_rx_desc *desc)
+static u8 ath11k_dp_rx_h_msdu_start_decap_type(struct ath11k_base *ab,
+					       struct hal_rx_desc *desc)
 {
-	return FIELD_GET(RX_MSDU_START_INFO2_DECAP_FORMAT,
-			 __le32_to_cpu(desc->msdu_start.info2));
+	return ab->hw_params.hw_ops->rx_desc_get_decap_type(desc);
 }
 
-static u8 ath11k_dp_rx_h_msdu_start_mesh_ctl_present(struct hal_rx_desc *desc)
+static u8 ath11k_dp_rx_h_msdu_start_mesh_ctl_present(struct ath11k_base *ab,
+						     struct hal_rx_desc *desc)
 {
-	return FIELD_GET(RX_MSDU_START_INFO2_MESH_CTRL_PRESENT,
-			 __le32_to_cpu(desc->msdu_start.info2));
+	return ab->hw_params.hw_ops->rx_desc_get_mesh_ctl(desc);
 }
 
-static bool ath11k_dp_rx_h_mpdu_start_seq_ctrl_valid(struct hal_rx_desc *desc)
+static bool ath11k_dp_rx_h_mpdu_start_seq_ctrl_valid(struct ath11k_base *ab,
+						     struct hal_rx_desc *desc)
 {
-	return !!FIELD_GET(RX_MPDU_START_INFO1_MPDU_SEQ_CTRL_VALID,
-			   __le32_to_cpu(desc->mpdu_start.info1));
+	return ab->hw_params.hw_ops->rx_desc_get_mpdu_seq_ctl_vld(desc);
 }
 
-static bool ath11k_dp_rx_h_mpdu_start_fc_valid(struct hal_rx_desc *desc)
+static bool ath11k_dp_rx_h_mpdu_start_fc_valid(struct ath11k_base *ab,
+					       struct hal_rx_desc *desc)
 {
-	return !!FIELD_GET(RX_MPDU_START_INFO1_MPDU_FCTRL_VALID,
-			   __le32_to_cpu(desc->mpdu_start.info1));
+	return ab->hw_params.hw_ops->rx_desc_get_mpdu_fc_valid(desc);
 }
 
-static bool ath11k_dp_rx_h_mpdu_start_more_frags(struct sk_buff *skb)
+static bool ath11k_dp_rx_h_mpdu_start_more_frags(struct ath11k_base *ab,
+						 struct sk_buff *skb)
 {
 	struct ieee80211_hdr *hdr;
 
-	hdr = (struct ieee80211_hdr *)(skb->data + HAL_RX_DESC_SIZE);
+	hdr = (struct ieee80211_hdr *)(skb->data + ab->hw_params.hal_desc_sz);
 	return ieee80211_has_morefrags(hdr->frame_control);
 }
 
-static u16 ath11k_dp_rx_h_mpdu_start_frag_no(struct sk_buff *skb)
+static u16 ath11k_dp_rx_h_mpdu_start_frag_no(struct ath11k_base *ab,
+					     struct sk_buff *skb)
 {
 	struct ieee80211_hdr *hdr;
 
-	hdr = (struct ieee80211_hdr *)(skb->data + HAL_RX_DESC_SIZE);
+	hdr = (struct ieee80211_hdr *)(skb->data + ab->hw_params.hal_desc_sz);
 	return le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_FRAG;
 }
 
-static u16 ath11k_dp_rx_h_mpdu_start_seq_no(struct hal_rx_desc *desc)
+static u16 ath11k_dp_rx_h_mpdu_start_seq_no(struct ath11k_base *ab,
+					    struct hal_rx_desc *desc)
 {
-	return FIELD_GET(RX_MPDU_START_INFO1_MPDU_SEQ_NUM,
-			 __le32_to_cpu(desc->mpdu_start.info1));
+	return ab->hw_params.hw_ops->rx_desc_get_mpdu_start_seq_no(desc);
 }
 
-static bool ath11k_dp_rx_h_attn_msdu_done(struct hal_rx_desc *desc)
+static void *ath11k_dp_rx_get_attention(struct ath11k_base *ab,
+					struct hal_rx_desc *desc)
+{
+	return ab->hw_params.hw_ops->rx_desc_get_attention(desc);
+}
+
+static bool ath11k_dp_rx_h_attn_msdu_done(struct rx_attention *attn)
 {
 	return !!FIELD_GET(RX_ATTENTION_INFO2_MSDU_DONE,
-			   __le32_to_cpu(desc->attention.info2));
+			   __le32_to_cpu(attn->info2));
 }
 
-static bool ath11k_dp_rx_h_attn_l4_cksum_fail(struct hal_rx_desc *desc)
+static bool ath11k_dp_rx_h_attn_l4_cksum_fail(struct rx_attention *attn)
 {
 	return !!FIELD_GET(RX_ATTENTION_INFO1_TCP_UDP_CKSUM_FAIL,
-			   __le32_to_cpu(desc->attention.info1));
+			   __le32_to_cpu(attn->info1));
 }
 
-static bool ath11k_dp_rx_h_attn_ip_cksum_fail(struct hal_rx_desc *desc)
+static bool ath11k_dp_rx_h_attn_ip_cksum_fail(struct rx_attention *attn)
 {
 	return !!FIELD_GET(RX_ATTENTION_INFO1_IP_CKSUM_FAIL,
-			   __le32_to_cpu(desc->attention.info1));
+			   __le32_to_cpu(attn->info1));
 }
 
-static bool ath11k_dp_rx_h_attn_is_decrypted(struct hal_rx_desc *desc)
+static bool ath11k_dp_rx_h_attn_is_decrypted(struct rx_attention *attn)
 {
 	return (FIELD_GET(RX_ATTENTION_INFO2_DCRYPT_STATUS_CODE,
-			  __le32_to_cpu(desc->attention.info2)) ==
+			  __le32_to_cpu(attn->info2)) ==
 		RX_DESC_DECRYPT_STATUS_CODE_OK);
 }
 
-static u32 ath11k_dp_rx_h_attn_mpdu_err(struct hal_rx_desc *desc)
+static u32 ath11k_dp_rx_h_attn_mpdu_err(struct rx_attention *attn)
 {
-	u32 info = __le32_to_cpu(desc->attention.info1);
+	u32 info = __le32_to_cpu(attn->info1);
 	u32 errmap = 0;
 
 	if (info & RX_ATTENTION_INFO1_FCS_ERR)
@@ -135,131 +142,122 @@ static u32 ath11k_dp_rx_h_attn_mpdu_err(struct hal_rx_desc *desc)
 	return errmap;
 }
 
-static u16 ath11k_dp_rx_h_msdu_start_msdu_len(struct hal_rx_desc *desc)
+static u16 ath11k_dp_rx_h_msdu_start_msdu_len(struct ath11k_base *ab,
+					      struct hal_rx_desc *desc)
 {
-	return FIELD_GET(RX_MSDU_START_INFO1_MSDU_LENGTH,
-			 __le32_to_cpu(desc->msdu_start.info1));
+	return ab->hw_params.hw_ops->rx_desc_get_msdu_len(desc);
 }
 
-static u8 ath11k_dp_rx_h_msdu_start_sgi(struct hal_rx_desc *desc)
+static u8 ath11k_dp_rx_h_msdu_start_sgi(struct ath11k_base *ab,
+					struct hal_rx_desc *desc)
 {
-	return FIELD_GET(RX_MSDU_START_INFO3_SGI,
-			 __le32_to_cpu(desc->msdu_start.info3));
+	return ab->hw_params.hw_ops->rx_desc_get_msdu_sgi(desc);
 }
 
-static u8 ath11k_dp_rx_h_msdu_start_rate_mcs(struct hal_rx_desc *desc)
+static u8 ath11k_dp_rx_h_msdu_start_rate_mcs(struct ath11k_base *ab,
+					     struct hal_rx_desc *desc)
 {
-	return FIELD_GET(RX_MSDU_START_INFO3_RATE_MCS,
-			 __le32_to_cpu(desc->msdu_start.info3));
+	return ab->hw_params.hw_ops->rx_desc_get_msdu_rate_mcs(desc);
 }
 
-static u8 ath11k_dp_rx_h_msdu_start_rx_bw(struct hal_rx_desc *desc)
+static u8 ath11k_dp_rx_h_msdu_start_rx_bw(struct ath11k_base *ab,
+					  struct hal_rx_desc *desc)
 {
-	return FIELD_GET(RX_MSDU_START_INFO3_RECV_BW,
-			 __le32_to_cpu(desc->msdu_start.info3));
+	return ab->hw_params.hw_ops->rx_desc_get_msdu_rx_bw(desc);
 }
 
-static u32 ath11k_dp_rx_h_msdu_start_freq(struct hal_rx_desc *desc)
+static u32 ath11k_dp_rx_h_msdu_start_freq(struct ath11k_base *ab,
+					  struct hal_rx_desc *desc)
 {
-	return __le32_to_cpu(desc->msdu_start.phy_meta_data);
+	return ab->hw_params.hw_ops->rx_desc_get_msdu_freq(desc);
 }
 
-static u8 ath11k_dp_rx_h_msdu_start_pkt_type(struct hal_rx_desc *desc)
+static u8 ath11k_dp_rx_h_msdu_start_pkt_type(struct ath11k_base *ab,
+					     struct hal_rx_desc *desc)
 {
-	return FIELD_GET(RX_MSDU_START_INFO3_PKT_TYPE,
-			 __le32_to_cpu(desc->msdu_start.info3));
+	return ab->hw_params.hw_ops->rx_desc_get_msdu_pkt_type(desc);
 }
 
-static u8 ath11k_dp_rx_h_msdu_start_nss(struct hal_rx_desc *desc)
+static u8 ath11k_dp_rx_h_msdu_start_nss(struct ath11k_base *ab,
+					struct hal_rx_desc *desc)
 {
-	u8 mimo_ss_bitmap = FIELD_GET(RX_MSDU_START_INFO3_MIMO_SS_BITMAP,
-				      __le32_to_cpu(desc->msdu_start.info3));
-
-	return hweight8(mimo_ss_bitmap);
+	return hweight8(ab->hw_params.hw_ops->rx_desc_get_msdu_nss(desc));
 }
 
-static u8 ath11k_dp_rx_h_mpdu_start_tid(struct hal_rx_desc *desc)
+static u8 ath11k_dp_rx_h_mpdu_start_tid(struct ath11k_base *ab,
+					struct hal_rx_desc *desc)
 {
-	return FIELD_GET(RX_MPDU_START_INFO2_TID,
-			 __le32_to_cpu(desc->mpdu_start.info2));
+	return ab->hw_params.hw_ops->rx_desc_get_mpdu_tid(desc);
 }
 
-static u16 ath11k_dp_rx_h_mpdu_start_peer_id(struct hal_rx_desc *desc)
+static u16 ath11k_dp_rx_h_mpdu_start_peer_id(struct ath11k_base *ab,
+					     struct hal_rx_desc *desc)
 {
-	return __le16_to_cpu(desc->mpdu_start.sw_peer_id);
+	return ab->hw_params.hw_ops->rx_desc_get_mpdu_peer_id(desc);
 }
 
-static u8 ath11k_dp_rx_h_msdu_end_l3pad(struct hal_rx_desc *desc)
+static u8 ath11k_dp_rx_h_msdu_end_l3pad(struct ath11k_base *ab,
+					struct hal_rx_desc *desc)
 {
-	return FIELD_GET(RX_MSDU_END_INFO2_L3_HDR_PADDING,
-			 __le32_to_cpu(desc->msdu_end.info2));
+	return ab->hw_params.hw_ops->rx_desc_get_l3_pad_bytes(desc);
 }
 
-static bool ath11k_dp_rx_h_msdu_end_first_msdu(struct hal_rx_desc *desc)
+static bool ath11k_dp_rx_h_msdu_end_first_msdu(struct ath11k_base *ab,
+					       struct hal_rx_desc *desc)
 {
-	return !!FIELD_GET(RX_MSDU_END_INFO2_FIRST_MSDU,
-			   __le32_to_cpu(desc->msdu_end.info2));
+	return ab->hw_params.hw_ops->rx_desc_get_first_msdu(desc);
 }
 
-static bool ath11k_dp_rx_h_msdu_end_last_msdu(struct hal_rx_desc *desc)
+static bool ath11k_dp_rx_h_msdu_end_last_msdu(struct ath11k_base *ab,
+					      struct hal_rx_desc *desc)
 {
-	return !!FIELD_GET(RX_MSDU_END_INFO2_LAST_MSDU,
-			   __le32_to_cpu(desc->msdu_end.info2));
+	return ab->hw_params.hw_ops->rx_desc_get_last_msdu(desc);
 }
 
-static void ath11k_dp_rx_desc_end_tlv_copy(struct hal_rx_desc *fdesc,
+static void ath11k_dp_rx_desc_end_tlv_copy(struct ath11k_base *ab,
+					   struct hal_rx_desc *fdesc,
 					   struct hal_rx_desc *ldesc)
 {
-	memcpy((u8 *)&fdesc->msdu_end, (u8 *)&ldesc->msdu_end,
-	       sizeof(struct rx_msdu_end));
-	memcpy((u8 *)&fdesc->attention, (u8 *)&ldesc->attention,
-	       sizeof(struct rx_attention));
-	memcpy((u8 *)&fdesc->mpdu_end, (u8 *)&ldesc->mpdu_end,
-	       sizeof(struct rx_mpdu_end));
+	ab->hw_params.hw_ops->rx_desc_copy_attn_end_tlv(fdesc, ldesc);
 }
 
-static u32 ath11k_dp_rxdesc_get_mpdulen_err(struct hal_rx_desc *rx_desc)
+static u32 ath11k_dp_rxdesc_get_mpdulen_err(struct rx_attention *attn)
 {
-	struct rx_attention *rx_attn;
-
-	rx_attn = &rx_desc->attention;
-
 	return FIELD_GET(RX_ATTENTION_INFO1_MPDU_LEN_ERR,
-			 __le32_to_cpu(rx_attn->info1));
+			 __le32_to_cpu(attn->info1));
 }
 
-static u32 ath11k_dp_rxdesc_get_decap_format(struct hal_rx_desc *rx_desc)
-{
-	struct rx_msdu_start *rx_msdu_start;
-
-	rx_msdu_start = &rx_desc->msdu_start;
-
-	return FIELD_GET(RX_MSDU_START_INFO2_DECAP_FORMAT,
-			 __le32_to_cpu(rx_msdu_start->info2));
-}
-
-static u8 *ath11k_dp_rxdesc_get_80211hdr(struct hal_rx_desc *rx_desc)
+static u8 *ath11k_dp_rxdesc_get_80211hdr(struct ath11k_base *ab,
+					 struct hal_rx_desc *rx_desc)
 {
 	u8 *rx_pkt_hdr;
 
-	rx_pkt_hdr = &rx_desc->msdu_payload[0];
+	rx_pkt_hdr = ab->hw_params.hw_ops->rx_desc_get_msdu_payload(rx_desc);
 
 	return rx_pkt_hdr;
 }
 
-static bool ath11k_dp_rxdesc_mpdu_valid(struct hal_rx_desc *rx_desc)
+static bool ath11k_dp_rxdesc_mpdu_valid(struct ath11k_base *ab,
+					struct hal_rx_desc *rx_desc)
 {
 	u32 tlv_tag;
 
-	tlv_tag = FIELD_GET(HAL_TLV_HDR_TAG,
-			    __le32_to_cpu(rx_desc->mpdu_start_tag));
+	tlv_tag = ab->hw_params.hw_ops->rx_desc_get_mpdu_start_tag(rx_desc);
 
 	return tlv_tag == HAL_RX_MPDU_START;
 }
 
-static u32 ath11k_dp_rxdesc_get_ppduid(struct hal_rx_desc *rx_desc)
+static u32 ath11k_dp_rxdesc_get_ppduid(struct ath11k_base *ab,
+				       struct hal_rx_desc *rx_desc)
+{
+	return ab->hw_params.hw_ops->rx_desc_get_mpdu_ppdu_id(rx_desc);
+}
+
+static void ath11k_dp_rxdesc_set_msdu_len(struct ath11k_base *ab,
+					  struct hal_rx_desc *desc,
+					  u16 len)
 {
-	return __le16_to_cpu(rx_desc->mpdu_start.phy_ppdu_id);
+	ab->hw_params.hw_ops->rx_desc_set_msdu_len(desc, len);
 }
 
 static void ath11k_dp_service_mon_ring(struct timer_list *t)
@@ -1722,19 +1720,19 @@ static int ath11k_dp_rx_msdu_coalesce(struct ath11k *ar,
 				      struct sk_buff *first, struct sk_buff *last,
 				      u8 l3pad_bytes, int msdu_len)
 {
+	struct ath11k_base *ab = ar->ab;
 	struct sk_buff *skb;
 	struct ath11k_skb_rxcb *rxcb = ATH11K_SKB_RXCB(first);
 	int buf_first_hdr_len, buf_first_len;
 	struct hal_rx_desc *ldesc;
-	int space_extra;
-	int rem_len;
-	int buf_len;
+	int space_extra, rem_len, buf_len;
+	u32 hal_rx_desc_sz = ar->ab->hw_params.hal_desc_sz;
 
 	/* As the msdu is spread across multiple rx buffers,
 	 * find the offset to the start of msdu for computing
 	 * the length of the msdu in the first buffer.
 	 */
-	buf_first_hdr_len = HAL_RX_DESC_SIZE + l3pad_bytes;
+	buf_first_hdr_len = hal_rx_desc_sz + l3pad_bytes;
 	buf_first_len = DP_RX_BUFFER_SIZE - buf_first_hdr_len;
 
 	if (WARN_ON_ONCE(msdu_len <= buf_first_len)) {
@@ -1744,8 +1742,8 @@ static int ath11k_dp_rx_msdu_coalesce(struct ath11k *ar,
 	}
 
 	ldesc = (struct hal_rx_desc *)last->data;
-	rxcb->is_first_msdu = ath11k_dp_rx_h_msdu_end_first_msdu(ldesc);
-	rxcb->is_last_msdu = ath11k_dp_rx_h_msdu_end_last_msdu(ldesc);
+	rxcb->is_first_msdu = ath11k_dp_rx_h_msdu_end_first_msdu(ab, ldesc);
+	rxcb->is_last_msdu = ath11k_dp_rx_h_msdu_end_last_msdu(ab, ldesc);
 
 	/* MSDU spans over multiple buffers because the length of the MSDU
 	 * exceeds DP_RX_BUFFER_SIZE - HAL_RX_DESC_SIZE. So assume the data
@@ -1757,7 +1755,7 @@ static int ath11k_dp_rx_msdu_coalesce(struct ath11k *ar,
 	/* When an MSDU spread over multiple buffers attention, MSDU_END and
 	 * MPDU_END tlvs are valid only in the last buffer. Copy those tlvs.
 	 */
-	ath11k_dp_rx_desc_end_tlv_copy(rxcb->rx_desc, ldesc);
+	ath11k_dp_rx_desc_end_tlv_copy(ab, rxcb->rx_desc, ldesc);
 
 	space_extra = msdu_len - (buf_first_len + skb_tailroom(first));
 	if (space_extra > 0 &&
@@ -1778,18 +1776,18 @@ static int ath11k_dp_rx_msdu_coalesce(struct ath11k *ar,
 	while ((skb = __skb_dequeue(msdu_list)) != NULL && rem_len > 0) {
 		rxcb = ATH11K_SKB_RXCB(skb);
 		if (rxcb->is_continuation)
-			buf_len = DP_RX_BUFFER_SIZE - HAL_RX_DESC_SIZE;
+			buf_len = DP_RX_BUFFER_SIZE - hal_rx_desc_sz;
 		else
 			buf_len = rem_len;
 
-		if (buf_len > (DP_RX_BUFFER_SIZE - HAL_RX_DESC_SIZE)) {
+		if (buf_len > (DP_RX_BUFFER_SIZE - hal_rx_desc_sz)) {
 			WARN_ON_ONCE(1);
 			dev_kfree_skb_any(skb);
 			return -EINVAL;
 		}
 
-		skb_put(skb, buf_len + HAL_RX_DESC_SIZE);
-		skb_pull(skb, HAL_RX_DESC_SIZE);
+		skb_put(skb, buf_len + hal_rx_desc_sz);
+		skb_pull(skb, hal_rx_desc_sz);
 		skb_copy_from_linear_data(skb, skb_put(first, buf_len),
 					  buf_len);
 		dev_kfree_skb_any(skb);
@@ -1820,13 +1818,15 @@ static struct sk_buff *ath11k_dp_rx_get_msdu_last_buf(struct sk_buff_head *msdu_
 	return NULL;
 }
 
-static void ath11k_dp_rx_h_csum_offload(struct sk_buff *msdu)
+static void ath11k_dp_rx_h_csum_offload(struct ath11k *ar, struct sk_buff *msdu)
 {
 	struct ath11k_skb_rxcb *rxcb = ATH11K_SKB_RXCB(msdu);
+	struct rx_attention *rx_attention;
 	bool ip_csum_fail, l4_csum_fail;
 
-	ip_csum_fail = ath11k_dp_rx_h_attn_ip_cksum_fail(rxcb->rx_desc);
-	l4_csum_fail = ath11k_dp_rx_h_attn_l4_cksum_fail(rxcb->rx_desc);
+	rx_attention = ath11k_dp_rx_get_attention(ar->ab, rxcb->rx_desc);
+	ip_csum_fail = ath11k_dp_rx_h_attn_ip_cksum_fail(rx_attention);
+	l4_csum_fail = ath11k_dp_rx_h_attn_l4_cksum_fail(rx_attention);
 
 	msdu->ip_summed = (ip_csum_fail || l4_csum_fail) ?
 			  CHECKSUM_NONE : CHECKSUM_UNNECESSARY;
@@ -1957,7 +1957,7 @@ static void ath11k_dp_rx_h_undecap_nwifi(struct ath11k *ar,
 
 		qos_ctl = rxcb->tid;
 
-		if (ath11k_dp_rx_h_msdu_start_mesh_ctl_present(rxcb->rx_desc))
+		if (ath11k_dp_rx_h_msdu_start_mesh_ctl_present(ar->ab, rxcb->rx_desc))
 			qos_ctl |= IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT;
 
 		/* TODO Add other QoS ctl fields when required */
@@ -2061,7 +2061,7 @@ static void *ath11k_dp_rx_h_find_rfc1042(struct ath11k *ar,
 	bool is_amsdu;
 
 	is_amsdu = !(rxcb->is_first_msdu && rxcb->is_last_msdu);
-	hdr = (struct ieee80211_hdr *)ath11k_dp_rx_h_80211_hdr(rxcb->rx_desc);
+	hdr = (struct ieee80211_hdr *)ath11k_dp_rx_h_80211_hdr(ar->ab, rxcb->rx_desc);
 	rfc1042 = hdr;
 
 	if (rxcb->is_first_msdu) {
@@ -2134,8 +2134,8 @@ static void ath11k_dp_rx_h_undecap(struct ath11k *ar, struct sk_buff *msdu,
 	u8 *first_hdr;
 	u8 decap;
 
-	first_hdr = ath11k_dp_rx_h_80211_hdr(rx_desc);
-	decap = ath11k_dp_rx_h_msdu_start_decap_type(rx_desc);
+	first_hdr = ath11k_dp_rx_h_80211_hdr(ar->ab, rx_desc);
+	decap = ath11k_dp_rx_h_msdu_start_decap_type(ar->ab, rx_desc);
 
 	switch (decap) {
 	case DP_RX_DECAP_TYPE_NATIVE_WIFI:
@@ -2167,6 +2167,7 @@ static void ath11k_dp_rx_h_mpdu(struct ath11k *ar,
 	bool is_decrypted = false;
 	struct ieee80211_hdr *hdr;
 	struct ath11k_peer *peer;
+	struct rx_attention *rx_attention;
 	u32 err_bitmap;
 
 	hdr = (struct ieee80211_hdr *)msdu->data;
@@ -2188,9 +2189,10 @@ static void ath11k_dp_rx_h_mpdu(struct ath11k *ar,
 	}
 	spin_unlock_bh(&ar->ab->base_lock);
 
-	err_bitmap = ath11k_dp_rx_h_attn_mpdu_err(rx_desc);
+	rx_attention = ath11k_dp_rx_get_attention(ar->ab, rx_desc);
+	err_bitmap = ath11k_dp_rx_h_attn_mpdu_err(rx_attention);
 	if (enctype != HAL_ENCRYPT_TYPE_OPEN && !err_bitmap)
-		is_decrypted = ath11k_dp_rx_h_attn_is_decrypted(rx_desc);
+		is_decrypted = ath11k_dp_rx_h_attn_is_decrypted(rx_attention);
 
 	/* Clear per-MPDU flags while leaving per-PPDU flags intact */
 	rx_status->flag &= ~(RX_FLAG_FAILED_FCS_CRC |
@@ -2215,7 +2217,7 @@ static void ath11k_dp_rx_h_mpdu(struct ath11k *ar,
 					   RX_FLAG_PN_VALIDATED;
 	}
 
-	ath11k_dp_rx_h_csum_offload(msdu);
+	ath11k_dp_rx_h_csum_offload(ar, msdu);
 	ath11k_dp_rx_h_undecap(ar, msdu, rx_desc,
 			       enctype, rx_status, is_decrypted);
 
@@ -2236,11 +2238,11 @@ static void ath11k_dp_rx_h_rate(struct ath11k *ar, struct hal_rx_desc *rx_desc,
 	u8 sgi;
 	bool is_cck;
 
-	pkt_type = ath11k_dp_rx_h_msdu_start_pkt_type(rx_desc);
-	bw = ath11k_dp_rx_h_msdu_start_rx_bw(rx_desc);
-	rate_mcs = ath11k_dp_rx_h_msdu_start_rate_mcs(rx_desc);
-	nss = ath11k_dp_rx_h_msdu_start_nss(rx_desc);
-	sgi = ath11k_dp_rx_h_msdu_start_sgi(rx_desc);
+	pkt_type = ath11k_dp_rx_h_msdu_start_pkt_type(ar->ab, rx_desc);
+	bw = ath11k_dp_rx_h_msdu_start_rx_bw(ar->ab, rx_desc);
+	rate_mcs = ath11k_dp_rx_h_msdu_start_rate_mcs(ar->ab, rx_desc);
+	nss = ath11k_dp_rx_h_msdu_start_nss(ar->ab, rx_desc);
+	sgi = ath11k_dp_rx_h_msdu_start_sgi(ar->ab, rx_desc);
 
 	switch (pkt_type) {
 	case RX_MSDU_START_PKT_TYPE_11A:
@@ -2297,7 +2299,7 @@ static void ath11k_dp_rx_h_ppdu(struct ath11k *ar, struct hal_rx_desc *rx_desc,
 				struct ieee80211_rx_status *rx_status)
 {
 	u8 channel_num;
-	u32 center_freq;
+	u32 center_freq, meta_data;
 	struct ieee80211_channel *channel;
 
 	rx_status->freq = 0;
@@ -2308,8 +2310,9 @@ static void ath11k_dp_rx_h_ppdu(struct ath11k *ar, struct hal_rx_desc *rx_desc,
 
 	rx_status->flag |= RX_FLAG_NO_SIGNAL_VAL;
 
-	channel_num = ath11k_dp_rx_h_msdu_start_freq(rx_desc);
-	center_freq = ath11k_dp_rx_h_msdu_start_freq(rx_desc) >> 16;
+	meta_data = ath11k_dp_rx_h_msdu_start_freq(ar->ab, rx_desc);
+	channel_num = meta_data;
+	center_freq = meta_data >> 16;
 
 	if (center_freq >= 5935 && center_freq <= 7105) {
 		rx_status->band = NL80211_BAND_6GHZ;
@@ -2409,7 +2412,9 @@ static int ath11k_dp_rx_process_msdu(struct ath11k *ar,
 				     struct sk_buff *msdu,
 				     struct sk_buff_head *msdu_list)
 {
+	struct ath11k_base *ab = ar->ab;
 	struct hal_rx_desc *rx_desc, *lrx_desc;
+	struct rx_attention *rx_attention;
 	struct ieee80211_rx_status rx_status = {0};
 	struct ieee80211_rx_status *status;
 	struct ath11k_skb_rxcb *rxcb;
@@ -2419,10 +2424,11 @@ static int ath11k_dp_rx_process_msdu(struct ath11k *ar,
 	u8 *hdr_status;
 	u16 msdu_len;
 	int ret;
+	u32 hal_rx_desc_sz = ar->ab->hw_params.hal_desc_sz;
 
 	last_buf = ath11k_dp_rx_get_msdu_last_buf(msdu_list, msdu);
 	if (!last_buf) {
-		ath11k_warn(ar->ab,
+		ath11k_warn(ab,
 			    "No valid Rx buffer to access Atten/MSDU_END/MPDU_END tlvs\n");
 		ret = -EIO;
 		goto free_out;
@@ -2430,38 +2436,39 @@ static int ath11k_dp_rx_process_msdu(struct ath11k *ar,
 
 	rx_desc = (struct hal_rx_desc *)msdu->data;
 	lrx_desc = (struct hal_rx_desc *)last_buf->data;
-	if (!ath11k_dp_rx_h_attn_msdu_done(lrx_desc)) {
-		ath11k_warn(ar->ab, "msdu_done bit in attention is not set\n");
+	rx_attention = ath11k_dp_rx_get_attention(ab, lrx_desc);
+	if (!ath11k_dp_rx_h_attn_msdu_done(rx_attention)) {
+		ath11k_warn(ab, "msdu_done bit in attention is not set\n");
 		ret = -EIO;
 		goto free_out;
 	}
 
 	rxcb = ATH11K_SKB_RXCB(msdu);
 	rxcb->rx_desc = rx_desc;
-	msdu_len = ath11k_dp_rx_h_msdu_start_msdu_len(rx_desc);
-	l3_pad_bytes = ath11k_dp_rx_h_msdu_end_l3pad(lrx_desc);
+	msdu_len = ath11k_dp_rx_h_msdu_start_msdu_len(ab, rx_desc);
+	l3_pad_bytes = ath11k_dp_rx_h_msdu_end_l3pad(ab, lrx_desc);
 
 	if (rxcb->is_frag) {
-		skb_pull(msdu, HAL_RX_DESC_SIZE);
+		skb_pull(msdu, hal_rx_desc_sz);
 	} else if (!rxcb->is_continuation) {
-		if ((msdu_len + HAL_RX_DESC_SIZE) > DP_RX_BUFFER_SIZE) {
-			hdr_status = ath11k_dp_rx_h_80211_hdr(rx_desc);
+		if ((msdu_len + hal_rx_desc_sz) > DP_RX_BUFFER_SIZE) {
+			hdr_status = ath11k_dp_rx_h_80211_hdr(ab, rx_desc);
 			ret = -EINVAL;
-			ath11k_warn(ar->ab, "invalid msdu len %u\n", msdu_len);
-			ath11k_dbg_dump(ar->ab, ATH11K_DBG_DATA, NULL, "", hdr_status,
+			ath11k_warn(ab, "invalid msdu len %u\n", msdu_len);
+			ath11k_dbg_dump(ab, ATH11K_DBG_DATA, NULL, "", hdr_status,
 					sizeof(struct ieee80211_hdr));
-			ath11k_dbg_dump(ar->ab, ATH11K_DBG_DATA, NULL, "", rx_desc,
+			ath11k_dbg_dump(ab, ATH11K_DBG_DATA, NULL, "", rx_desc,
 					sizeof(struct hal_rx_desc));
 			goto free_out;
 		}
-		skb_put(msdu, HAL_RX_DESC_SIZE + l3_pad_bytes + msdu_len);
-		skb_pull(msdu, HAL_RX_DESC_SIZE + l3_pad_bytes);
+		skb_put(msdu, hal_rx_desc_sz + l3_pad_bytes + msdu_len);
+		skb_pull(msdu, hal_rx_desc_sz + l3_pad_bytes);
 	} else {
 		ret = ath11k_dp_rx_msdu_coalesce(ar, msdu_list,
 						 msdu, last_buf,
 						 l3_pad_bytes, msdu_len);
 		if (ret) {
-			ath11k_warn(ar->ab,
+			ath11k_warn(ab,
 				    "failed to coalesce msdu rx buffer%d\n", ret);
 			goto free_out;
 		}
@@ -3090,16 +3097,17 @@ static int ath11k_dp_rx_h_verify_tkip_mic(struct ath11k *ar, struct ath11k_peer
 	u8 mic[IEEE80211_CCMP_MIC_LEN];
 	int head_len, tail_len, ret;
 	size_t data_len;
-	u32 hdr_len;
+	u32 hdr_len, hal_rx_desc_sz = ar->ab->hw_params.hal_desc_sz;
 	u8 *key, *data;
 	u8 key_idx;
 
-	if (ath11k_dp_rx_h_mpdu_start_enctype(rx_desc) != HAL_ENCRYPT_TYPE_TKIP_MIC)
+	if (ath11k_dp_rx_h_mpdu_start_enctype(ar->ab, rx_desc) !=
+	    HAL_ENCRYPT_TYPE_TKIP_MIC)
 		return 0;
 
-	hdr = (struct ieee80211_hdr *)(msdu->data + HAL_RX_DESC_SIZE);
+	hdr = (struct ieee80211_hdr *)(msdu->data + hal_rx_desc_sz);
 	hdr_len = ieee80211_hdrlen(hdr->frame_control);
-	head_len = hdr_len + HAL_RX_DESC_SIZE + IEEE80211_TKIP_IV_LEN;
+	head_len = hdr_len + hal_rx_desc_sz + IEEE80211_TKIP_IV_LEN;
 	tail_len = IEEE80211_CCMP_MIC_LEN + IEEE80211_TKIP_ICV_LEN + FCS_LEN;
 
 	if (!is_multicast_ether_addr(hdr->addr1))
@@ -3125,7 +3133,7 @@ mic_fail:
 
 	rxs->flag |= RX_FLAG_MMIC_ERROR | RX_FLAG_MMIC_STRIPPED |
 		    RX_FLAG_IV_STRIPPED | RX_FLAG_DECRYPTED;
-	skb_pull(msdu, HAL_RX_DESC_SIZE);
+	skb_pull(msdu, hal_rx_desc_sz);
 
 	ath11k_dp_rx_h_ppdu(ar, rx_desc, rxs);
 	ath11k_dp_rx_h_undecap(ar, msdu, rx_desc,
@@ -3140,11 +3148,12 @@ static void ath11k_dp_rx_h_undecap_frag(struct ath11k *ar, struct sk_buff *msdu,
 	struct ieee80211_hdr *hdr;
 	size_t hdr_len;
 	size_t crypto_len;
+	u32 hal_rx_desc_sz = ar->ab->hw_params.hal_desc_sz;
 
 	if (!flags)
 		return;
 
-	hdr = (struct ieee80211_hdr *)(msdu->data + HAL_RX_DESC_SIZE);
+	hdr = (struct ieee80211_hdr *)(msdu->data + hal_rx_desc_sz);
 
 	if (flags & RX_FLAG_MIC_STRIPPED)
 		skb_trim(msdu, msdu->len -
@@ -3158,8 +3167,8 @@ static void ath11k_dp_rx_h_undecap_frag(struct ath11k *ar, struct sk_buff *msdu,
 		hdr_len = ieee80211_hdrlen(hdr->frame_control);
 		crypto_len = ath11k_dp_rx_crypto_param_len(ar, enctype);
 
-		memmove((void *)msdu->data + HAL_RX_DESC_SIZE + crypto_len,
-			(void *)msdu->data + HAL_RX_DESC_SIZE, hdr_len);
+		memmove((void *)msdu->data + hal_rx_desc_sz + crypto_len,
+			(void *)msdu->data + hal_rx_desc_sz, hdr_len);
 		skb_pull(msdu, crypto_len);
 	}
 }
@@ -3172,11 +3181,12 @@ static int ath11k_dp_rx_h_defrag(struct ath11k *ar,
 	struct hal_rx_desc *rx_desc;
 	struct sk_buff *skb, *first_frag, *last_frag;
 	struct ieee80211_hdr *hdr;
+	struct rx_attention *rx_attention;
 	enum hal_encrypt_type enctype;
 	bool is_decrypted = false;
 	int msdu_len = 0;
 	int extra_space;
-	u32 flags;
+	u32 flags, hal_rx_desc_sz = ar->ab->hw_params.hal_desc_sz;
 
 	first_frag = skb_peek(&rx_tid->rx_frags);
 	last_frag = skb_peek_tail(&rx_tid->rx_frags);
@@ -3184,11 +3194,13 @@ static int ath11k_dp_rx_h_defrag(struct ath11k *ar,
 	skb_queue_walk(&rx_tid->rx_frags, skb) {
 		flags = 0;
 		rx_desc = (struct hal_rx_desc *)skb->data;
-		hdr = (struct ieee80211_hdr *)(skb->data + HAL_RX_DESC_SIZE);
+		hdr = (struct ieee80211_hdr *)(skb->data + hal_rx_desc_sz);
 
-		enctype = ath11k_dp_rx_h_mpdu_start_enctype(rx_desc);
-		if (enctype != HAL_ENCRYPT_TYPE_OPEN)
-			is_decrypted = ath11k_dp_rx_h_attn_is_decrypted(rx_desc);
+		enctype = ath11k_dp_rx_h_mpdu_start_enctype(ar->ab, rx_desc);
+		if (enctype != HAL_ENCRYPT_TYPE_OPEN) {
+			rx_attention = ath11k_dp_rx_get_attention(ar->ab, rx_desc);
+			is_decrypted = ath11k_dp_rx_h_attn_is_decrypted(rx_attention);
+		}
 
 		if (is_decrypted) {
 			if (skb != first_frag)
@@ -3204,7 +3216,7 @@ static int ath11k_dp_rx_h_defrag(struct ath11k *ar,
 		ath11k_dp_rx_h_undecap_frag(ar, skb, enctype, flags);
 
 		if (skb != first_frag)
-			skb_pull(skb, HAL_RX_DESC_SIZE +
+			skb_pull(skb, hal_rx_desc_sz +
 				      ieee80211_hdrlen(hdr->frame_control));
 		msdu_len += skb->len;
 	}
@@ -3220,7 +3232,7 @@ static int ath11k_dp_rx_h_defrag(struct ath11k *ar,
 		dev_kfree_skb_any(skb);
 	}
 
-	hdr = (struct ieee80211_hdr *)(first_frag->data + HAL_RX_DESC_SIZE);
+	hdr = (struct ieee80211_hdr *)(first_frag->data + hal_rx_desc_sz);
 	hdr->frame_control &= ~__cpu_to_le16(IEEE80211_FCTL_MOREFRAGS);
 	ATH11K_SKB_RXCB(first_frag)->is_frag = 1;
 
@@ -3246,10 +3258,10 @@ static int ath11k_dp_rx_h_defrag_reo_reinject(struct ath11k *ar, struct dp_rx_ti
 	struct hal_srng *srng;
 	dma_addr_t paddr;
 	u32 desc_bank, msdu_info, mpdu_info;
-	u32 dst_idx, cookie;
-	u32 *msdu_len_offset;
+	u32 dst_idx, cookie, hal_rx_desc_sz;
 	int ret, buf_id;
 
+	hal_rx_desc_sz = ab->hw_params.hal_desc_sz;
 	link_desc_banks = ab->dp.link_desc_banks;
 	reo_dest_ring = rx_tid->dst_ring_desc;
 
@@ -3264,16 +3276,14 @@ static int ath11k_dp_rx_h_defrag_reo_reinject(struct ath11k *ar, struct dp_rx_ti
 		    FIELD_PREP(RX_MSDU_DESC_INFO0_LAST_MSDU_IN_MPDU, 1) |
 		    FIELD_PREP(RX_MSDU_DESC_INFO0_MSDU_CONTINUATION, 0) |
 		    FIELD_PREP(RX_MSDU_DESC_INFO0_MSDU_LENGTH,
-			       defrag_skb->len - HAL_RX_DESC_SIZE) |
+			       defrag_skb->len - hal_rx_desc_sz) |
 		    FIELD_PREP(RX_MSDU_DESC_INFO0_REO_DEST_IND, dst_idx) |
 		    FIELD_PREP(RX_MSDU_DESC_INFO0_VALID_SA, 1) |
 		    FIELD_PREP(RX_MSDU_DESC_INFO0_VALID_DA, 1);
 	msdu0->rx_msdu_info.info0 = msdu_info;
 
 	/* change msdu len in hal rx desc */
-	msdu_len_offset = (u32 *)&rx_desc->msdu_start;
-	*msdu_len_offset &= ~(RX_MSDU_START_INFO1_MSDU_LENGTH);
-	*msdu_len_offset |= defrag_skb->len - HAL_RX_DESC_SIZE;
+	ath11k_dp_rxdesc_set_msdu_len(ab, rx_desc, defrag_skb->len - hal_rx_desc_sz);
 
 	paddr = dma_map_single(ab->dev, defrag_skb->data,
 			       defrag_skb->len + skb_tailroom(defrag_skb),
@@ -3346,24 +3356,26 @@ err_unmap_dma:
 	return ret;
 }
 
-static int ath11k_dp_rx_h_cmp_frags(struct sk_buff *a, struct sk_buff *b)
+static int ath11k_dp_rx_h_cmp_frags(struct ath11k *ar,
+				    struct sk_buff *a, struct sk_buff *b)
 {
 	int frag1, frag2;
 
-	frag1 = ath11k_dp_rx_h_mpdu_start_frag_no(a);
-	frag2 = ath11k_dp_rx_h_mpdu_start_frag_no(b);
+	frag1 = ath11k_dp_rx_h_mpdu_start_frag_no(ar->ab, a);
+	frag2 = ath11k_dp_rx_h_mpdu_start_frag_no(ar->ab, b);
 
 	return frag1 - frag2;
 }
 
-static void ath11k_dp_rx_h_sort_frags(struct sk_buff_head *frag_list,
+static void ath11k_dp_rx_h_sort_frags(struct ath11k *ar,
+				      struct sk_buff_head *frag_list,
 				      struct sk_buff *cur_frag)
 {
 	struct sk_buff *skb;
 	int cmp;
 
 	skb_queue_walk(frag_list, skb) {
-		cmp = ath11k_dp_rx_h_cmp_frags(skb, cur_frag);
+		cmp = ath11k_dp_rx_h_cmp_frags(ar, skb, cur_frag);
 		if (cmp < 0)
 			continue;
 		__skb_queue_before(frag_list, skb, cur_frag);
@@ -3372,14 +3384,15 @@ static void ath11k_dp_rx_h_sort_frags(struct sk_buff_head *frag_list,
 	__skb_queue_tail(frag_list, cur_frag);
 }
 
-static u64 ath11k_dp_rx_h_get_pn(struct sk_buff *skb)
+static u64 ath11k_dp_rx_h_get_pn(struct ath11k *ar, struct sk_buff *skb)
 {
 	struct ieee80211_hdr *hdr;
 	u64 pn = 0;
 	u8 *ehdr;
+	u32 hal_rx_desc_sz = ar->ab->hw_params.hal_desc_sz;
 
-	hdr = (struct ieee80211_hdr *)(skb->data + HAL_RX_DESC_SIZE);
-	ehdr = skb->data + HAL_RX_DESC_SIZE + ieee80211_hdrlen(hdr->frame_control);
+	hdr = (struct ieee80211_hdr *)(skb->data + hal_rx_desc_sz);
+	ehdr = skb->data + hal_rx_desc_sz + ieee80211_hdrlen(hdr->frame_control);
 
 	pn = ehdr[0];
 	pn |= (u64)ehdr[1] << 8;
@@ -3403,19 +3416,19 @@ ath11k_dp_rx_h_defrag_validate_incr_pn(struct ath11k *ar, struct dp_rx_tid *rx_t
 	first_frag = skb_peek(&rx_tid->rx_frags);
 	desc = (struct hal_rx_desc *)first_frag->data;
 
-	encrypt_type = ath11k_dp_rx_h_mpdu_start_enctype(desc);
+	encrypt_type = ath11k_dp_rx_h_mpdu_start_enctype(ar->ab, desc);
 	if (encrypt_type != HAL_ENCRYPT_TYPE_CCMP_128 &&
 	    encrypt_type != HAL_ENCRYPT_TYPE_CCMP_256 &&
 	    encrypt_type != HAL_ENCRYPT_TYPE_GCMP_128 &&
 	    encrypt_type != HAL_ENCRYPT_TYPE_AES_GCMP_256)
 		return true;
 
-	last_pn = ath11k_dp_rx_h_get_pn(first_frag);
+	last_pn = ath11k_dp_rx_h_get_pn(ar, first_frag);
 	skb_queue_walk(&rx_tid->rx_frags, skb) {
 		if (skb == first_frag)
 			continue;
 
-		cur_pn = ath11k_dp_rx_h_get_pn(skb);
+		cur_pn = ath11k_dp_rx_h_get_pn(ar, skb);
 		if (cur_pn != last_pn + 1)
 			return false;
 		last_pn = cur_pn;
@@ -3439,14 +3452,14 @@ static int ath11k_dp_rx_frag_h_mpdu(struct ath11k *ar,
 	bool more_frags;
 
 	rx_desc = (struct hal_rx_desc *)msdu->data;
-	peer_id = ath11k_dp_rx_h_mpdu_start_peer_id(rx_desc);
-	tid = ath11k_dp_rx_h_mpdu_start_tid(rx_desc);
-	seqno = ath11k_dp_rx_h_mpdu_start_seq_no(rx_desc);
-	frag_no = ath11k_dp_rx_h_mpdu_start_frag_no(msdu);
-	more_frags = ath11k_dp_rx_h_mpdu_start_more_frags(msdu);
-
-	if (!ath11k_dp_rx_h_mpdu_start_seq_ctrl_valid(rx_desc) ||
-	    !ath11k_dp_rx_h_mpdu_start_fc_valid(rx_desc) ||
+	peer_id = ath11k_dp_rx_h_mpdu_start_peer_id(ar->ab, rx_desc);
+	tid = ath11k_dp_rx_h_mpdu_start_tid(ar->ab, rx_desc);
+	seqno = ath11k_dp_rx_h_mpdu_start_seq_no(ar->ab, rx_desc);
+	frag_no = ath11k_dp_rx_h_mpdu_start_frag_no(ar->ab, msdu);
+	more_frags = ath11k_dp_rx_h_mpdu_start_more_frags(ar->ab, msdu);
+
+	if (!ath11k_dp_rx_h_mpdu_start_seq_ctrl_valid(ar->ab, rx_desc) ||
+	    !ath11k_dp_rx_h_mpdu_start_fc_valid(ar->ab, rx_desc) ||
 	    tid > IEEE80211_NUM_TIDS)
 		return -EINVAL;
 
@@ -3484,7 +3497,7 @@ static int ath11k_dp_rx_frag_h_mpdu(struct ath11k *ar,
 	if (frag_no > __fls(rx_tid->rx_frag_bitmap))
 		__skb_queue_tail(&rx_tid->rx_frags, msdu);
 	else
-		ath11k_dp_rx_h_sort_frags(&rx_tid->rx_frags, msdu);
+		ath11k_dp_rx_h_sort_frags(ar, &rx_tid->rx_frags, msdu);
 
 	rx_tid->rx_frag_bitmap |= BIT(frag_no);
 	if (!more_frags)
@@ -3551,6 +3564,7 @@ ath11k_dp_process_rx_err_buf(struct ath11k *ar, u32 *ring_desc, int buf_id, bool
 	struct hal_rx_desc *rx_desc;
 	u8 *hdr_status;
 	u16 msdu_len;
+	u32 hal_rx_desc_sz = ar->ab->hw_params.hal_desc_sz;
 
 	spin_lock_bh(&rx_ring->idr_lock);
 	msdu = idr_find(&rx_ring->bufs_idr, buf_id);
@@ -3586,9 +3600,9 @@ ath11k_dp_process_rx_err_buf(struct ath11k *ar, u32 *ring_desc, int buf_id, bool
 	}
 
 	rx_desc = (struct hal_rx_desc *)msdu->data;
-	msdu_len = ath11k_dp_rx_h_msdu_start_msdu_len(rx_desc);
-	if ((msdu_len + HAL_RX_DESC_SIZE) > DP_RX_BUFFER_SIZE) {
-		hdr_status = ath11k_dp_rx_h_80211_hdr(rx_desc);
+	msdu_len = ath11k_dp_rx_h_msdu_start_msdu_len(ar->ab, rx_desc);
+	if ((msdu_len + hal_rx_desc_sz) > DP_RX_BUFFER_SIZE) {
+		hdr_status = ath11k_dp_rx_h_80211_hdr(ar->ab, rx_desc);
 		ath11k_warn(ar->ab, "invalid msdu leng %u", msdu_len);
 		ath11k_dbg_dump(ar->ab, ATH11K_DBG_DATA, NULL, "", hdr_status,
 				sizeof(struct ieee80211_hdr));
@@ -3598,7 +3612,7 @@ ath11k_dp_process_rx_err_buf(struct ath11k *ar, u32 *ring_desc, int buf_id, bool
 		goto exit;
 	}
 
-	skb_put(msdu, HAL_RX_DESC_SIZE + msdu_len);
+	skb_put(msdu, hal_rx_desc_sz + msdu_len);
 
 	if (ath11k_dp_rx_frag_h_mpdu(ar, msdu, ring_desc)) {
 		dev_kfree_skb_any(msdu);
@@ -3732,7 +3746,7 @@ static void ath11k_dp_rx_null_q_desc_sg_drop(struct ath11k *ar,
 	int n_buffs;
 
 	n_buffs = DIV_ROUND_UP(msdu_len,
-			       (DP_RX_BUFFER_SIZE - HAL_RX_DESC_SIZE));
+			       (DP_RX_BUFFER_SIZE - ar->ab->hw_params.hal_desc_sz));
 
 	skb_queue_walk_safe(msdu_list, skb, tmp) {
 		rxcb = ATH11K_SKB_RXCB(skb);
@@ -3753,19 +3767,22 @@ static int ath11k_dp_rx_h_null_q_desc(struct ath11k *ar, struct sk_buff *msdu,
 {
 	u16 msdu_len;
 	struct hal_rx_desc *desc = (struct hal_rx_desc *)msdu->data;
+	struct rx_attention *rx_attention;
 	u8 l3pad_bytes;
 	struct ath11k_skb_rxcb *rxcb = ATH11K_SKB_RXCB(msdu);
+	u32 hal_rx_desc_sz = ar->ab->hw_params.hal_desc_sz;
 
-	msdu_len = ath11k_dp_rx_h_msdu_start_msdu_len(desc);
+	msdu_len = ath11k_dp_rx_h_msdu_start_msdu_len(ar->ab, desc);
 
-	if (!rxcb->is_frag && ((msdu_len + HAL_RX_DESC_SIZE) > DP_RX_BUFFER_SIZE)) {
+	if (!rxcb->is_frag && ((msdu_len + hal_rx_desc_sz) > DP_RX_BUFFER_SIZE)) {
 		/* First buffer will be freed by the caller, so deduct it's length */
-		msdu_len = msdu_len - (DP_RX_BUFFER_SIZE - HAL_RX_DESC_SIZE);
+		msdu_len = msdu_len - (DP_RX_BUFFER_SIZE - hal_rx_desc_sz);
 		ath11k_dp_rx_null_q_desc_sg_drop(ar, msdu_len, msdu_list);
 		return -EINVAL;
 	}
 
-	if (!ath11k_dp_rx_h_attn_msdu_done(desc)) {
+	rx_attention = ath11k_dp_rx_get_attention(ar->ab, desc);
+	if (!ath11k_dp_rx_h_attn_msdu_done(rx_attention)) {
 		ath11k_warn(ar->ab,
 			    "msdu_done bit not set in null_q_des processing\n");
 		__skb_queue_purge(msdu_list);
@@ -3781,25 +3798,25 @@ static int ath11k_dp_rx_h_null_q_desc(struct ath11k *ar, struct sk_buff *msdu,
 	 * This error can show up both in a REO destination or WBM release ring.
 	 */
 
-	rxcb->is_first_msdu = ath11k_dp_rx_h_msdu_end_first_msdu(desc);
-	rxcb->is_last_msdu = ath11k_dp_rx_h_msdu_end_last_msdu(desc);
+	rxcb->is_first_msdu = ath11k_dp_rx_h_msdu_end_first_msdu(ar->ab, desc);
+	rxcb->is_last_msdu = ath11k_dp_rx_h_msdu_end_last_msdu(ar->ab, desc);
 
 	if (rxcb->is_frag) {
-		skb_pull(msdu, HAL_RX_DESC_SIZE);
+		skb_pull(msdu, hal_rx_desc_sz);
 	} else {
-		l3pad_bytes = ath11k_dp_rx_h_msdu_end_l3pad(desc);
+		l3pad_bytes = ath11k_dp_rx_h_msdu_end_l3pad(ar->ab, desc);
 
-		if ((HAL_RX_DESC_SIZE + l3pad_bytes + msdu_len) > DP_RX_BUFFER_SIZE)
+		if ((hal_rx_desc_sz + l3pad_bytes + msdu_len) > DP_RX_BUFFER_SIZE)
 			return -EINVAL;
 
-		skb_put(msdu, HAL_RX_DESC_SIZE + l3pad_bytes + msdu_len);
-		skb_pull(msdu, HAL_RX_DESC_SIZE + l3pad_bytes);
+		skb_put(msdu, hal_rx_desc_sz + l3pad_bytes + msdu_len);
+		skb_pull(msdu, hal_rx_desc_sz + l3pad_bytes);
 	}
 	ath11k_dp_rx_h_ppdu(ar, desc, status);
 
 	ath11k_dp_rx_h_mpdu(ar, msdu, desc, status);
 
-	rxcb->tid = ath11k_dp_rx_h_mpdu_start_tid(desc);
+	rxcb->tid = ath11k_dp_rx_h_mpdu_start_tid(ar->ab, desc);
 
 	/* Please note that caller will having the access to msdu and completing
 	 * rx with mac80211. Need not worry about cleaning up amsdu_list.
@@ -3846,14 +3863,15 @@ static void ath11k_dp_rx_h_tkip_mic_err(struct ath11k *ar, struct sk_buff *msdu,
 	struct hal_rx_desc *desc = (struct hal_rx_desc *)msdu->data;
 	u8 l3pad_bytes;
 	struct ath11k_skb_rxcb *rxcb = ATH11K_SKB_RXCB(msdu);
+	u32 hal_rx_desc_sz = ar->ab->hw_params.hal_desc_sz;
 
-	rxcb->is_first_msdu = ath11k_dp_rx_h_msdu_end_first_msdu(desc);
-	rxcb->is_last_msdu = ath11k_dp_rx_h_msdu_end_last_msdu(desc);
+	rxcb->is_first_msdu = ath11k_dp_rx_h_msdu_end_first_msdu(ar->ab, desc);
+	rxcb->is_last_msdu = ath11k_dp_rx_h_msdu_end_last_msdu(ar->ab, desc);
 
-	l3pad_bytes = ath11k_dp_rx_h_msdu_end_l3pad(desc);
-	msdu_len = ath11k_dp_rx_h_msdu_start_msdu_len(desc);
-	skb_put(msdu, HAL_RX_DESC_SIZE + l3pad_bytes + msdu_len);
-	skb_pull(msdu, HAL_RX_DESC_SIZE + l3pad_bytes);
+	l3pad_bytes = ath11k_dp_rx_h_msdu_end_l3pad(ar->ab, desc);
+	msdu_len = ath11k_dp_rx_h_msdu_start_msdu_len(ar->ab, desc);
+	skb_put(msdu, hal_rx_desc_sz + l3pad_bytes + msdu_len);
+	skb_pull(msdu, hal_rx_desc_sz + l3pad_bytes);
 
 	ath11k_dp_rx_h_ppdu(ar, desc, status);
 
@@ -4595,10 +4613,10 @@ ath11k_dp_rx_mon_mpdu_pop(struct ath11k *ar, int mac_id,
 			rx_desc = (struct hal_rx_desc *)msdu->data;
 
 			rx_pkt_offset = sizeof(struct hal_rx_desc);
-			l2_hdr_offset = ath11k_dp_rx_h_msdu_end_l3pad(rx_desc);
+			l2_hdr_offset = ath11k_dp_rx_h_msdu_end_l3pad(ar->ab, rx_desc);
 
 			if (is_first_msdu) {
-				if (!ath11k_dp_rxdesc_mpdu_valid(rx_desc)) {
+				if (!ath11k_dp_rxdesc_mpdu_valid(ar->ab, rx_desc)) {
 					drop_mpdu = true;
 					dev_kfree_skb_any(msdu);
 					msdu = NULL;
@@ -4607,7 +4625,7 @@ ath11k_dp_rx_mon_mpdu_pop(struct ath11k *ar, int mac_id,
 				}
 
 				msdu_ppdu_id =
-					ath11k_dp_rxdesc_get_ppduid(rx_desc);
+					ath11k_dp_rxdesc_get_ppduid(ar->ab, rx_desc);
 
 				if (ath11k_dp_rx_mon_comp_ppduid(msdu_ppdu_id,
 								 ppdu_id,
@@ -4676,12 +4694,13 @@ next_msdu:
 	return rx_bufs_used;
 }
 
-static void ath11k_dp_rx_msdus_set_payload(struct sk_buff *msdu)
+static void ath11k_dp_rx_msdus_set_payload(struct ath11k *ar, struct sk_buff *msdu)
 {
 	u32 rx_pkt_offset, l2_hdr_offset;
 
-	rx_pkt_offset = sizeof(struct hal_rx_desc);
-	l2_hdr_offset = ath11k_dp_rx_h_msdu_end_l3pad((struct hal_rx_desc *)msdu->data);
+	rx_pkt_offset = ar->ab->hw_params.hal_desc_sz;
+	l2_hdr_offset = ath11k_dp_rx_h_msdu_end_l3pad(ar->ab,
+						      (struct hal_rx_desc *)msdu->data);
 	skb_pull(msdu, rx_pkt_offset + l2_hdr_offset);
 }
 
@@ -4691,12 +4710,14 @@ ath11k_dp_rx_mon_merg_msdus(struct ath11k *ar,
 			    struct sk_buff *last_msdu,
 			    struct ieee80211_rx_status *rxs)
 {
+	struct ath11k_base *ab = ar->ab;
 	struct sk_buff *msdu, *mpdu_buf, *prev_buf;
-	u32 decap_format, wifi_hdr_len;
+	u32 wifi_hdr_len;
 	struct hal_rx_desc *rx_desc;
 	char *hdr_desc;
-	u8 *dest;
+	u8 *dest, decap_format;
 	struct ieee80211_hdr_3addr *wh;
+	struct rx_attention *rx_attention;
 
 	mpdu_buf = NULL;
 
@@ -4704,22 +4725,23 @@ ath11k_dp_rx_mon_merg_msdus(struct ath11k *ar,
 		goto err_merge_fail;
 
 	rx_desc = (struct hal_rx_desc *)head_msdu->data;
+	rx_attention = ath11k_dp_rx_get_attention(ab, rx_desc);
 
-	if (ath11k_dp_rxdesc_get_mpdulen_err(rx_desc))
+	if (ath11k_dp_rxdesc_get_mpdulen_err(rx_attention))
 		return NULL;
 
-	decap_format = ath11k_dp_rxdesc_get_decap_format(rx_desc);
+	decap_format = ath11k_dp_rx_h_msdu_start_decap_type(ab, rx_desc);
 
 	ath11k_dp_rx_h_ppdu(ar, rx_desc, rxs);
 
 	if (decap_format == DP_RX_DECAP_TYPE_RAW) {
-		ath11k_dp_rx_msdus_set_payload(head_msdu);
+		ath11k_dp_rx_msdus_set_payload(ar, head_msdu);
 
 		prev_buf = head_msdu;
 		msdu = head_msdu->next;
 
 		while (msdu) {
-			ath11k_dp_rx_msdus_set_payload(msdu);
+			ath11k_dp_rx_msdus_set_payload(ar, msdu);
 
 			prev_buf = msdu;
 			msdu = msdu->next;
@@ -4733,7 +4755,7 @@ ath11k_dp_rx_mon_merg_msdus(struct ath11k *ar,
 		u8 qos_pkt = 0;
 
 		rx_desc = (struct hal_rx_desc *)head_msdu->data;
-		hdr_desc = ath11k_dp_rxdesc_get_80211hdr(rx_desc);
+		hdr_desc = ath11k_dp_rxdesc_get_80211hdr(ab, rx_desc);
 
 		/* Base size */
 		wifi_hdr_len = sizeof(struct ieee80211_hdr_3addr);
@@ -4750,7 +4772,7 @@ ath11k_dp_rx_mon_merg_msdus(struct ath11k *ar,
 
 		while (msdu) {
 			rx_desc = (struct hal_rx_desc *)msdu->data;
-			hdr_desc = ath11k_dp_rxdesc_get_80211hdr(rx_desc);
+			hdr_desc = ath11k_dp_rxdesc_get_80211hdr(ab, rx_desc);
 
 			if (qos_pkt) {
 				dest = skb_push(msdu, sizeof(__le16));
@@ -4760,7 +4782,7 @@ ath11k_dp_rx_mon_merg_msdus(struct ath11k *ar,
 				memcpy(dest + wifi_hdr_len,
 				       (u8 *)&qos_field, sizeof(__le16));
 			}
-			ath11k_dp_rx_msdus_set_payload(msdu);
+			ath11k_dp_rx_msdus_set_payload(ar, msdu);
 			prev_buf = msdu;
 			msdu = msdu->next;
 		}
@@ -4768,11 +4790,11 @@ ath11k_dp_rx_mon_merg_msdus(struct ath11k *ar,
 		if (!dest)
 			goto err_merge_fail;
 
-		ath11k_dbg(ar->ab, ATH11K_DBG_DATA,
+		ath11k_dbg(ab, ATH11K_DBG_DATA,
 			   "mpdu_buf %pK mpdu_buf->len %u",
 			   prev_buf, prev_buf->len);
 	} else {
-		ath11k_dbg(ar->ab, ATH11K_DBG_DATA,
+		ath11k_dbg(ab, ATH11K_DBG_DATA,
 			   "decap format %d is not supported!\n",
 			   decap_format);
 		goto err_merge_fail;
@@ -4782,7 +4804,7 @@ ath11k_dp_rx_mon_merg_msdus(struct ath11k *ar,
 
 err_merge_fail:
 	if (mpdu_buf && decap_format != DP_RX_DECAP_TYPE_RAW) {
-		ath11k_dbg(ar->ab, ATH11K_DBG_DATA,
+		ath11k_dbg(ab, ATH11K_DBG_DATA,
 			   "err_merge_fail mpdu_buf %pK", mpdu_buf);
 		/* Free the head buffer */
 		dev_kfree_skb_any(mpdu_buf);
diff --git a/drivers/net/wireless/ath/ath11k/hal.h b/drivers/net/wireless/ath/ath11k/hal.h
index 082d31f0c9ec..91d1428b8b94 100644
--- a/drivers/net/wireless/ath/ath11k/hal.h
+++ b/drivers/net/wireless/ath/ath11k/hal.h
@@ -316,8 +316,6 @@ struct ath11k_base;
 #define HAL_WBM2SW_RELEASE_RING_BASE_MSB_RING_SIZE	0x000fffff
 #define HAL_RXDMA_RING_MAX_SIZE				0x0000ffff
 
-#define HAL_RX_DESC_SIZE (sizeof(struct hal_rx_desc))
-
 /* Add any other errors here and return them in
  * ath11k_hal_rx_desc_get_err().
  */
diff --git a/drivers/net/wireless/ath/ath11k/hal_tx.c b/drivers/net/wireless/ath/ath11k/hal_tx.c
index 86ed450f8238..c8929de8ce6c 100644
--- a/drivers/net/wireless/ath/ath11k/hal_tx.c
+++ b/drivers/net/wireless/ath/ath11k/hal_tx.c
@@ -76,7 +76,7 @@ void ath11k_hal_tx_cmd_desc_setup(struct ath11k_base *ab, void *cmd,
 				    ti->bss_ast_hash);
 	tcl_cmd->info4 = 0;
 
-	if (ti->enable_mesh && ab->hw_params.hw_ops->tx_mesh_enable)
+	if (ti->enable_mesh)
 		ab->hw_params.hw_ops->tx_mesh_enable(ab, tcl_cmd);
 }
 
diff --git a/drivers/net/wireless/ath/ath11k/hw.c b/drivers/net/wireless/ath/ath11k/hw.c
index 626739b54989..462d5e81bee8 100644
--- a/drivers/net/wireless/ath/ath11k/hw.c
+++ b/drivers/net/wireless/ath/ath11k/hw.c
@@ -169,12 +169,358 @@ static int ath11k_hw_mac_id_to_srng_id_qca6390(struct ath11k_hw_params *hw,
 	return mac_id;
 }
 
+static bool ath11k_hw_ipq8074_rx_desc_get_first_msdu(struct hal_rx_desc *desc)
+{
+	return !!FIELD_GET(RX_MSDU_END_INFO2_FIRST_MSDU,
+			   __le32_to_cpu(desc->u.ipq8074.msdu_end.info2));
+}
+
+static bool ath11k_hw_ipq8074_rx_desc_get_last_msdu(struct hal_rx_desc *desc)
+{
+	return !!FIELD_GET(RX_MSDU_END_INFO2_LAST_MSDU,
+			   __le32_to_cpu(desc->u.ipq8074.msdu_end.info2));
+}
+
+static u8 ath11k_hw_ipq8074_rx_desc_get_l3_pad_bytes(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MSDU_END_INFO2_L3_HDR_PADDING,
+			 __le32_to_cpu(desc->u.ipq8074.msdu_end.info2));
+}
+
+static u8 *ath11k_hw_ipq8074_rx_desc_get_hdr_status(struct hal_rx_desc *desc)
+{
+	return desc->u.ipq8074.hdr_status;
+}
+
+static bool ath11k_hw_ipq8074_rx_desc_encrypt_valid(struct hal_rx_desc *desc)
+{
+	return __le32_to_cpu(desc->u.ipq8074.mpdu_start.info1) &
+	       RX_MPDU_START_INFO1_ENCRYPT_INFO_VALID;
+}
+
+static u32 ath11k_hw_ipq8074_rx_desc_get_encrypt_type(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MPDU_START_INFO2_ENC_TYPE,
+			 __le32_to_cpu(desc->u.ipq8074.mpdu_start.info2));
+}
+
+static u8 ath11k_hw_ipq8074_rx_desc_get_decap_type(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MSDU_START_INFO2_DECAP_FORMAT,
+			 __le32_to_cpu(desc->u.ipq8074.msdu_start.info2));
+}
+
+static u8 ath11k_hw_ipq8074_rx_desc_get_mesh_ctl(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MSDU_START_INFO2_MESH_CTRL_PRESENT,
+			 __le32_to_cpu(desc->u.ipq8074.msdu_start.info2));
+}
+
+static bool ath11k_hw_ipq8074_rx_desc_get_mpdu_seq_ctl_vld(struct hal_rx_desc *desc)
+{
+	return !!FIELD_GET(RX_MPDU_START_INFO1_MPDU_SEQ_CTRL_VALID,
+			   __le32_to_cpu(desc->u.ipq8074.mpdu_start.info1));
+}
+
+static bool ath11k_hw_ipq8074_rx_desc_get_mpdu_fc_valid(struct hal_rx_desc *desc)
+{
+	return !!FIELD_GET(RX_MPDU_START_INFO1_MPDU_FCTRL_VALID,
+			   __le32_to_cpu(desc->u.ipq8074.mpdu_start.info1));
+}
+
+static u16 ath11k_hw_ipq8074_rx_desc_get_mpdu_start_seq_no(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MPDU_START_INFO1_MPDU_SEQ_NUM,
+			 __le32_to_cpu(desc->u.ipq8074.mpdu_start.info1));
+}
+
+static u16 ath11k_hw_ipq8074_rx_desc_get_msdu_len(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MSDU_START_INFO1_MSDU_LENGTH,
+			 __le32_to_cpu(desc->u.ipq8074.msdu_start.info1));
+}
+
+static u8 ath11k_hw_ipq8074_rx_desc_get_msdu_sgi(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MSDU_START_INFO3_SGI,
+			 __le32_to_cpu(desc->u.ipq8074.msdu_start.info3));
+}
+
+static u8 ath11k_hw_ipq8074_rx_desc_get_msdu_rate_mcs(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MSDU_START_INFO3_RATE_MCS,
+			 __le32_to_cpu(desc->u.ipq8074.msdu_start.info3));
+}
+
+static u8 ath11k_hw_ipq8074_rx_desc_get_msdu_rx_bw(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MSDU_START_INFO3_RECV_BW,
+			 __le32_to_cpu(desc->u.ipq8074.msdu_start.info3));
+}
+
+static u32 ath11k_hw_ipq8074_rx_desc_get_msdu_freq(struct hal_rx_desc *desc)
+{
+	return __le32_to_cpu(desc->u.ipq8074.msdu_start.phy_meta_data);
+}
+
+static u8 ath11k_hw_ipq8074_rx_desc_get_msdu_pkt_type(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MSDU_START_INFO3_PKT_TYPE,
+			 __le32_to_cpu(desc->u.ipq8074.msdu_start.info3));
+}
+
+static u8 ath11k_hw_ipq8074_rx_desc_get_msdu_nss(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MSDU_START_INFO3_MIMO_SS_BITMAP,
+			 __le32_to_cpu(desc->u.ipq8074.msdu_start.info3));
+}
+
+static u8 ath11k_hw_ipq8074_rx_desc_get_mpdu_tid(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MPDU_START_INFO2_TID,
+			 __le32_to_cpu(desc->u.ipq8074.mpdu_start.info2));
+}
+
+static u16 ath11k_hw_ipq8074_rx_desc_get_mpdu_peer_id(struct hal_rx_desc *desc)
+{
+	return __le16_to_cpu(desc->u.ipq8074.mpdu_start.sw_peer_id);
+}
+
+static void ath11k_hw_ipq8074_rx_desc_copy_attn_end(struct hal_rx_desc *fdesc,
+						    struct hal_rx_desc *ldesc)
+{
+	memcpy((u8 *)&fdesc->u.ipq8074.msdu_end, (u8 *)&ldesc->u.ipq8074.msdu_end,
+	       sizeof(struct rx_msdu_end_ipq8074));
+	memcpy((u8 *)&fdesc->u.ipq8074.attention, (u8 *)&ldesc->u.ipq8074.attention,
+	       sizeof(struct rx_attention));
+	memcpy((u8 *)&fdesc->u.ipq8074.mpdu_end, (u8 *)&ldesc->u.ipq8074.mpdu_end,
+	       sizeof(struct rx_mpdu_end));
+}
+
+static u32 ath11k_hw_ipq8074_rx_desc_get_mpdu_start_tag(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(HAL_TLV_HDR_TAG,
+			 __le32_to_cpu(desc->u.ipq8074.mpdu_start_tag));
+}
+
+static u32 ath11k_hw_ipq8074_rx_desc_get_mpdu_ppdu_id(struct hal_rx_desc *desc)
+{
+	return __le16_to_cpu(desc->u.ipq8074.mpdu_start.phy_ppdu_id);
+}
+
+static void ath11k_hw_ipq8074_rx_desc_set_msdu_len(struct hal_rx_desc *desc, u16 len)
+{
+	u32 info = __le32_to_cpu(desc->u.ipq8074.msdu_start.info1);
+
+	info &= ~RX_MSDU_START_INFO1_MSDU_LENGTH;
+	info |= FIELD_PREP(RX_MSDU_START_INFO1_MSDU_LENGTH, len);
+
+	desc->u.ipq8074.msdu_start.info1 = __cpu_to_le32(info);
+}
+
+static
+struct rx_attention *ath11k_hw_ipq8074_rx_desc_get_attention(struct hal_rx_desc *desc)
+{
+	return &desc->u.ipq8074.attention;
+}
+
+static u8 *ath11k_hw_ipq8074_rx_desc_get_msdu_payload(struct hal_rx_desc *desc)
+{
+	return &desc->u.ipq8074.msdu_payload[0];
+}
+
+static bool ath11k_hw_qcn9074_rx_desc_get_first_msdu(struct hal_rx_desc *desc)
+{
+	return !!FIELD_GET(RX_MSDU_END_INFO4_FIRST_MSDU,
+			   __le16_to_cpu(desc->u.qcn9074.msdu_end.info4));
+}
+
+static bool ath11k_hw_qcn9074_rx_desc_get_last_msdu(struct hal_rx_desc *desc)
+{
+	return !!FIELD_GET(RX_MSDU_END_INFO4_LAST_MSDU,
+			   __le16_to_cpu(desc->u.qcn9074.msdu_end.info4));
+}
+
+static u8 ath11k_hw_qcn9074_rx_desc_get_l3_pad_bytes(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MSDU_END_INFO4_L3_HDR_PADDING,
+			 __le16_to_cpu(desc->u.qcn9074.msdu_end.info4));
+}
+
+static u8 *ath11k_hw_qcn9074_rx_desc_get_hdr_status(struct hal_rx_desc *desc)
+{
+	return desc->u.qcn9074.hdr_status;
+}
+
+static bool ath11k_hw_qcn9074_rx_desc_encrypt_valid(struct hal_rx_desc *desc)
+{
+	return __le32_to_cpu(desc->u.qcn9074.mpdu_start.info11) &
+	       RX_MPDU_START_INFO11_ENCRYPT_INFO_VALID;
+}
+
+static u32 ath11k_hw_qcn9074_rx_desc_get_encrypt_type(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MPDU_START_INFO9_ENC_TYPE,
+			 __le32_to_cpu(desc->u.qcn9074.mpdu_start.info9));
+}
+
+static u8 ath11k_hw_qcn9074_rx_desc_get_decap_type(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MSDU_START_INFO2_DECAP_FORMAT,
+			 __le32_to_cpu(desc->u.qcn9074.msdu_start.info2));
+}
+
+static u8 ath11k_hw_qcn9074_rx_desc_get_mesh_ctl(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MSDU_START_INFO2_MESH_CTRL_PRESENT,
+			 __le32_to_cpu(desc->u.qcn9074.msdu_start.info2));
+}
+
+static bool ath11k_hw_qcn9074_rx_desc_get_mpdu_seq_ctl_vld(struct hal_rx_desc *desc)
+{
+	return !!FIELD_GET(RX_MPDU_START_INFO11_MPDU_SEQ_CTRL_VALID,
+			   __le32_to_cpu(desc->u.qcn9074.mpdu_start.info11));
+}
+
+static bool ath11k_hw_qcn9074_rx_desc_get_mpdu_fc_valid(struct hal_rx_desc *desc)
+{
+	return !!FIELD_GET(RX_MPDU_START_INFO11_MPDU_FCTRL_VALID,
+			   __le32_to_cpu(desc->u.qcn9074.mpdu_start.info11));
+}
+
+static u16 ath11k_hw_qcn9074_rx_desc_get_mpdu_start_seq_no(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MPDU_START_INFO11_MPDU_SEQ_NUM,
+			 __le32_to_cpu(desc->u.qcn9074.mpdu_start.info11));
+}
+
+static u16 ath11k_hw_qcn9074_rx_desc_get_msdu_len(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MSDU_START_INFO1_MSDU_LENGTH,
+			 __le32_to_cpu(desc->u.qcn9074.msdu_start.info1));
+}
+
+static u8 ath11k_hw_qcn9074_rx_desc_get_msdu_sgi(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MSDU_START_INFO3_SGI,
+			 __le32_to_cpu(desc->u.qcn9074.msdu_start.info3));
+}
+
+static u8 ath11k_hw_qcn9074_rx_desc_get_msdu_rate_mcs(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MSDU_START_INFO3_RATE_MCS,
+			 __le32_to_cpu(desc->u.qcn9074.msdu_start.info3));
+}
+
+static u8 ath11k_hw_qcn9074_rx_desc_get_msdu_rx_bw(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MSDU_START_INFO3_RECV_BW,
+			 __le32_to_cpu(desc->u.qcn9074.msdu_start.info3));
+}
+
+static u32 ath11k_hw_qcn9074_rx_desc_get_msdu_freq(struct hal_rx_desc *desc)
+{
+	return __le32_to_cpu(desc->u.qcn9074.msdu_start.phy_meta_data);
+}
+
+static u8 ath11k_hw_qcn9074_rx_desc_get_msdu_pkt_type(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MSDU_START_INFO3_PKT_TYPE,
+			 __le32_to_cpu(desc->u.qcn9074.msdu_start.info3));
+}
+
+static u8 ath11k_hw_qcn9074_rx_desc_get_msdu_nss(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MSDU_START_INFO3_MIMO_SS_BITMAP,
+			 __le32_to_cpu(desc->u.qcn9074.msdu_start.info3));
+}
+
+static u8 ath11k_hw_qcn9074_rx_desc_get_mpdu_tid(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(RX_MPDU_START_INFO9_TID,
+			 __le32_to_cpu(desc->u.qcn9074.mpdu_start.info9));
+}
+
+static u16 ath11k_hw_qcn9074_rx_desc_get_mpdu_peer_id(struct hal_rx_desc *desc)
+{
+	return __le16_to_cpu(desc->u.qcn9074.mpdu_start.sw_peer_id);
+}
+
+static void ath11k_hw_qcn9074_rx_desc_copy_attn_end(struct hal_rx_desc *fdesc,
+						    struct hal_rx_desc *ldesc)
+{
+	memcpy((u8 *)&fdesc->u.qcn9074.msdu_end, (u8 *)&ldesc->u.qcn9074.msdu_end,
+	       sizeof(struct rx_msdu_end_qcn9074));
+	memcpy((u8 *)&fdesc->u.qcn9074.attention, (u8 *)&ldesc->u.qcn9074.attention,
+	       sizeof(struct rx_attention));
+	memcpy((u8 *)&fdesc->u.qcn9074.mpdu_end, (u8 *)&ldesc->u.qcn9074.mpdu_end,
+	       sizeof(struct rx_mpdu_end));
+}
+
+static u32 ath11k_hw_qcn9074_rx_desc_get_mpdu_start_tag(struct hal_rx_desc *desc)
+{
+	return FIELD_GET(HAL_TLV_HDR_TAG,
+			 __le32_to_cpu(desc->u.qcn9074.mpdu_start_tag));
+}
+
+static u32 ath11k_hw_qcn9074_rx_desc_get_mpdu_ppdu_id(struct hal_rx_desc *desc)
+{
+	return __le16_to_cpu(desc->u.qcn9074.mpdu_start.phy_ppdu_id);
+}
+
+static void ath11k_hw_qcn9074_rx_desc_set_msdu_len(struct hal_rx_desc *desc, u16 len)
+{
+	u32 info = __le32_to_cpu(desc->u.qcn9074.msdu_start.info1);
+
+	info &= ~RX_MSDU_START_INFO1_MSDU_LENGTH;
+	info |= FIELD_PREP(RX_MSDU_START_INFO1_MSDU_LENGTH, len);
+
+	desc->u.qcn9074.msdu_start.info1 = __cpu_to_le32(info);
+}
+
+static
+struct rx_attention *ath11k_hw_qcn9074_rx_desc_get_attention(struct hal_rx_desc *desc)
+{
+	return &desc->u.qcn9074.attention;
+}
+
+static u8 *ath11k_hw_qcn9074_rx_desc_get_msdu_payload(struct hal_rx_desc *desc)
+{
+	return &desc->u.qcn9074.msdu_payload[0];
+}
+
 const struct ath11k_hw_ops ipq8074_ops = {
 	.get_hw_mac_from_pdev_id = ath11k_hw_ipq8074_mac_from_pdev_id,
 	.wmi_init_config = ath11k_init_wmi_config_ipq8074,
 	.mac_id_to_pdev_id = ath11k_hw_mac_id_to_pdev_id_ipq8074,
 	.mac_id_to_srng_id = ath11k_hw_mac_id_to_srng_id_ipq8074,
 	.tx_mesh_enable = ath11k_hw_ipq8074_tx_mesh_enable,
+	.rx_desc_get_first_msdu = ath11k_hw_ipq8074_rx_desc_get_first_msdu,
+	.rx_desc_get_last_msdu = ath11k_hw_ipq8074_rx_desc_get_last_msdu,
+	.rx_desc_get_l3_pad_bytes = ath11k_hw_ipq8074_rx_desc_get_l3_pad_bytes,
+	.rx_desc_get_hdr_status = ath11k_hw_ipq8074_rx_desc_get_hdr_status,
+	.rx_desc_encrypt_valid = ath11k_hw_ipq8074_rx_desc_encrypt_valid,
+	.rx_desc_get_encrypt_type = ath11k_hw_ipq8074_rx_desc_get_encrypt_type,
+	.rx_desc_get_decap_type = ath11k_hw_ipq8074_rx_desc_get_decap_type,
+	.rx_desc_get_mesh_ctl = ath11k_hw_ipq8074_rx_desc_get_mesh_ctl,
+	.rx_desc_get_mpdu_seq_ctl_vld = ath11k_hw_ipq8074_rx_desc_get_mpdu_seq_ctl_vld,
+	.rx_desc_get_mpdu_fc_valid = ath11k_hw_ipq8074_rx_desc_get_mpdu_fc_valid,
+	.rx_desc_get_mpdu_start_seq_no = ath11k_hw_ipq8074_rx_desc_get_mpdu_start_seq_no,
+	.rx_desc_get_msdu_len = ath11k_hw_ipq8074_rx_desc_get_msdu_len,
+	.rx_desc_get_msdu_sgi = ath11k_hw_ipq8074_rx_desc_get_msdu_sgi,
+	.rx_desc_get_msdu_rate_mcs = ath11k_hw_ipq8074_rx_desc_get_msdu_rate_mcs,
+	.rx_desc_get_msdu_rx_bw = ath11k_hw_ipq8074_rx_desc_get_msdu_rx_bw,
+	.rx_desc_get_msdu_freq = ath11k_hw_ipq8074_rx_desc_get_msdu_freq,
+	.rx_desc_get_msdu_pkt_type = ath11k_hw_ipq8074_rx_desc_get_msdu_pkt_type,
+	.rx_desc_get_msdu_nss = ath11k_hw_ipq8074_rx_desc_get_msdu_nss,
+	.rx_desc_get_mpdu_tid = ath11k_hw_ipq8074_rx_desc_get_mpdu_tid,
+	.rx_desc_get_mpdu_peer_id = ath11k_hw_ipq8074_rx_desc_get_mpdu_peer_id,
+	.rx_desc_copy_attn_end_tlv = ath11k_hw_ipq8074_rx_desc_copy_attn_end,
+	.rx_desc_get_mpdu_start_tag = ath11k_hw_ipq8074_rx_desc_get_mpdu_start_tag,
+	.rx_desc_get_mpdu_ppdu_id = ath11k_hw_ipq8074_rx_desc_get_mpdu_ppdu_id,
+	.rx_desc_set_msdu_len = ath11k_hw_ipq8074_rx_desc_set_msdu_len,
+	.rx_desc_get_attention = ath11k_hw_ipq8074_rx_desc_get_attention,
+	.rx_desc_get_msdu_payload = ath11k_hw_ipq8074_rx_desc_get_msdu_payload,
 };
 
 const struct ath11k_hw_ops ipq6018_ops = {
@@ -183,6 +529,32 @@ const struct ath11k_hw_ops ipq6018_ops = {
 	.mac_id_to_pdev_id = ath11k_hw_mac_id_to_pdev_id_ipq8074,
 	.mac_id_to_srng_id = ath11k_hw_mac_id_to_srng_id_ipq8074,
 	.tx_mesh_enable = ath11k_hw_ipq8074_tx_mesh_enable,
+	.rx_desc_get_first_msdu = ath11k_hw_ipq8074_rx_desc_get_first_msdu,
+	.rx_desc_get_last_msdu = ath11k_hw_ipq8074_rx_desc_get_last_msdu,
+	.rx_desc_get_l3_pad_bytes = ath11k_hw_ipq8074_rx_desc_get_l3_pad_bytes,
+	.rx_desc_get_hdr_status = ath11k_hw_ipq8074_rx_desc_get_hdr_status,
+	.rx_desc_encrypt_valid = ath11k_hw_ipq8074_rx_desc_encrypt_valid,
+	.rx_desc_get_encrypt_type = ath11k_hw_ipq8074_rx_desc_get_encrypt_type,
+	.rx_desc_get_decap_type = ath11k_hw_ipq8074_rx_desc_get_decap_type,
+	.rx_desc_get_mesh_ctl = ath11k_hw_ipq8074_rx_desc_get_mesh_ctl,
+	.rx_desc_get_mpdu_seq_ctl_vld = ath11k_hw_ipq8074_rx_desc_get_mpdu_seq_ctl_vld,
+	.rx_desc_get_mpdu_fc_valid = ath11k_hw_ipq8074_rx_desc_get_mpdu_fc_valid,
+	.rx_desc_get_mpdu_start_seq_no = ath11k_hw_ipq8074_rx_desc_get_mpdu_start_seq_no,
+	.rx_desc_get_msdu_len = ath11k_hw_ipq8074_rx_desc_get_msdu_len,
+	.rx_desc_get_msdu_sgi = ath11k_hw_ipq8074_rx_desc_get_msdu_sgi,
+	.rx_desc_get_msdu_rate_mcs = ath11k_hw_ipq8074_rx_desc_get_msdu_rate_mcs,
+	.rx_desc_get_msdu_rx_bw = ath11k_hw_ipq8074_rx_desc_get_msdu_rx_bw,
+	.rx_desc_get_msdu_freq = ath11k_hw_ipq8074_rx_desc_get_msdu_freq,
+	.rx_desc_get_msdu_pkt_type = ath11k_hw_ipq8074_rx_desc_get_msdu_pkt_type,
+	.rx_desc_get_msdu_nss = ath11k_hw_ipq8074_rx_desc_get_msdu_nss,
+	.rx_desc_get_mpdu_tid = ath11k_hw_ipq8074_rx_desc_get_mpdu_tid,
+	.rx_desc_get_mpdu_peer_id = ath11k_hw_ipq8074_rx_desc_get_mpdu_peer_id,
+	.rx_desc_copy_attn_end_tlv = ath11k_hw_ipq8074_rx_desc_copy_attn_end,
+	.rx_desc_get_mpdu_start_tag = ath11k_hw_ipq8074_rx_desc_get_mpdu_start_tag,
+	.rx_desc_get_mpdu_ppdu_id = ath11k_hw_ipq8074_rx_desc_get_mpdu_ppdu_id,
+	.rx_desc_set_msdu_len = ath11k_hw_ipq8074_rx_desc_set_msdu_len,
+	.rx_desc_get_attention = ath11k_hw_ipq8074_rx_desc_get_attention,
+	.rx_desc_get_msdu_payload = ath11k_hw_ipq8074_rx_desc_get_msdu_payload,
 };
 
 const struct ath11k_hw_ops qca6390_ops = {
@@ -191,6 +563,32 @@ const struct ath11k_hw_ops qca6390_ops = {
 	.mac_id_to_pdev_id = ath11k_hw_mac_id_to_pdev_id_qca6390,
 	.mac_id_to_srng_id = ath11k_hw_mac_id_to_srng_id_qca6390,
 	.tx_mesh_enable = ath11k_hw_ipq8074_tx_mesh_enable,
+	.rx_desc_get_first_msdu = ath11k_hw_ipq8074_rx_desc_get_first_msdu,
+	.rx_desc_get_last_msdu = ath11k_hw_ipq8074_rx_desc_get_last_msdu,
+	.rx_desc_get_l3_pad_bytes = ath11k_hw_ipq8074_rx_desc_get_l3_pad_bytes,
+	.rx_desc_get_hdr_status = ath11k_hw_ipq8074_rx_desc_get_hdr_status,
+	.rx_desc_encrypt_valid = ath11k_hw_ipq8074_rx_desc_encrypt_valid,
+	.rx_desc_get_encrypt_type = ath11k_hw_ipq8074_rx_desc_get_encrypt_type,
+	.rx_desc_get_decap_type = ath11k_hw_ipq8074_rx_desc_get_decap_type,
+	.rx_desc_get_mesh_ctl = ath11k_hw_ipq8074_rx_desc_get_mesh_ctl,
+	.rx_desc_get_mpdu_seq_ctl_vld = ath11k_hw_ipq8074_rx_desc_get_mpdu_seq_ctl_vld,
+	.rx_desc_get_mpdu_fc_valid = ath11k_hw_ipq8074_rx_desc_get_mpdu_fc_valid,
+	.rx_desc_get_mpdu_start_seq_no = ath11k_hw_ipq8074_rx_desc_get_mpdu_start_seq_no,
+	.rx_desc_get_msdu_len = ath11k_hw_ipq8074_rx_desc_get_msdu_len,
+	.rx_desc_get_msdu_sgi = ath11k_hw_ipq8074_rx_desc_get_msdu_sgi,
+	.rx_desc_get_msdu_rate_mcs = ath11k_hw_ipq8074_rx_desc_get_msdu_rate_mcs,
+	.rx_desc_get_msdu_rx_bw = ath11k_hw_ipq8074_rx_desc_get_msdu_rx_bw,
+	.rx_desc_get_msdu_freq = ath11k_hw_ipq8074_rx_desc_get_msdu_freq,
+	.rx_desc_get_msdu_pkt_type = ath11k_hw_ipq8074_rx_desc_get_msdu_pkt_type,
+	.rx_desc_get_msdu_nss = ath11k_hw_ipq8074_rx_desc_get_msdu_nss,
+	.rx_desc_get_mpdu_tid = ath11k_hw_ipq8074_rx_desc_get_mpdu_tid,
+	.rx_desc_get_mpdu_peer_id = ath11k_hw_ipq8074_rx_desc_get_mpdu_peer_id,
+	.rx_desc_copy_attn_end_tlv = ath11k_hw_ipq8074_rx_desc_copy_attn_end,
+	.rx_desc_get_mpdu_start_tag = ath11k_hw_ipq8074_rx_desc_get_mpdu_start_tag,
+	.rx_desc_get_mpdu_ppdu_id = ath11k_hw_ipq8074_rx_desc_get_mpdu_ppdu_id,
+	.rx_desc_set_msdu_len = ath11k_hw_ipq8074_rx_desc_set_msdu_len,
+	.rx_desc_get_attention = ath11k_hw_ipq8074_rx_desc_get_attention,
+	.rx_desc_get_msdu_payload = ath11k_hw_ipq8074_rx_desc_get_msdu_payload,
 };
 
 const struct ath11k_hw_ops qcn9074_ops = {
@@ -199,6 +597,32 @@ const struct ath11k_hw_ops qcn9074_ops = {
 	.mac_id_to_pdev_id = ath11k_hw_mac_id_to_pdev_id_ipq8074,
 	.mac_id_to_srng_id = ath11k_hw_mac_id_to_srng_id_ipq8074,
 	.tx_mesh_enable = ath11k_hw_qcn9074_tx_mesh_enable,
+	.rx_desc_get_first_msdu = ath11k_hw_qcn9074_rx_desc_get_first_msdu,
+	.rx_desc_get_last_msdu = ath11k_hw_qcn9074_rx_desc_get_last_msdu,
+	.rx_desc_get_l3_pad_bytes = ath11k_hw_qcn9074_rx_desc_get_l3_pad_bytes,
+	.rx_desc_get_hdr_status = ath11k_hw_qcn9074_rx_desc_get_hdr_status,
+	.rx_desc_encrypt_valid = ath11k_hw_qcn9074_rx_desc_encrypt_valid,
+	.rx_desc_get_encrypt_type = ath11k_hw_qcn9074_rx_desc_get_encrypt_type,
+	.rx_desc_get_decap_type = ath11k_hw_qcn9074_rx_desc_get_decap_type,
+	.rx_desc_get_mesh_ctl = ath11k_hw_qcn9074_rx_desc_get_mesh_ctl,
+	.rx_desc_get_mpdu_seq_ctl_vld = ath11k_hw_qcn9074_rx_desc_get_mpdu_seq_ctl_vld,
+	.rx_desc_get_mpdu_fc_valid = ath11k_hw_qcn9074_rx_desc_get_mpdu_fc_valid,
+	.rx_desc_get_mpdu_start_seq_no = ath11k_hw_qcn9074_rx_desc_get_mpdu_start_seq_no,
+	.rx_desc_get_msdu_len = ath11k_hw_qcn9074_rx_desc_get_msdu_len,
+	.rx_desc_get_msdu_sgi = ath11k_hw_qcn9074_rx_desc_get_msdu_sgi,
+	.rx_desc_get_msdu_rate_mcs = ath11k_hw_qcn9074_rx_desc_get_msdu_rate_mcs,
+	.rx_desc_get_msdu_rx_bw = ath11k_hw_qcn9074_rx_desc_get_msdu_rx_bw,
+	.rx_desc_get_msdu_freq = ath11k_hw_qcn9074_rx_desc_get_msdu_freq,
+	.rx_desc_get_msdu_pkt_type = ath11k_hw_qcn9074_rx_desc_get_msdu_pkt_type,
+	.rx_desc_get_msdu_nss = ath11k_hw_qcn9074_rx_desc_get_msdu_nss,
+	.rx_desc_get_mpdu_tid = ath11k_hw_qcn9074_rx_desc_get_mpdu_tid,
+	.rx_desc_get_mpdu_peer_id = ath11k_hw_qcn9074_rx_desc_get_mpdu_peer_id,
+	.rx_desc_copy_attn_end_tlv = ath11k_hw_qcn9074_rx_desc_copy_attn_end,
+	.rx_desc_get_mpdu_start_tag = ath11k_hw_qcn9074_rx_desc_get_mpdu_start_tag,
+	.rx_desc_get_mpdu_ppdu_id = ath11k_hw_qcn9074_rx_desc_get_mpdu_ppdu_id,
+	.rx_desc_set_msdu_len = ath11k_hw_qcn9074_rx_desc_set_msdu_len,
+	.rx_desc_get_attention = ath11k_hw_qcn9074_rx_desc_get_attention,
+	.rx_desc_get_msdu_payload = ath11k_hw_qcn9074_rx_desc_get_msdu_payload,
 };
 
 #define ATH11K_TX_RING_MASK_0 0x1
diff --git a/drivers/net/wireless/ath/ath11k/hw.h b/drivers/net/wireless/ath/ath11k/hw.h
index 13f4f9c9814b..54b7edfc4f88 100644
--- a/drivers/net/wireless/ath/ath11k/hw.h
+++ b/drivers/net/wireless/ath/ath11k/hw.h
@@ -105,6 +105,7 @@ enum ath11k_bus {
 
 #define ATH11K_EXT_IRQ_GRP_NUM_MAX 11
 
+struct hal_rx_desc;
 struct hal_tcl_data_cmd;
 
 struct ath11k_hw_ring_mask {
@@ -160,6 +161,7 @@ struct ath11k_hw_params {
 	bool idle_ps;
 	bool cold_boot_calib;
 	bool supports_suspend;
+	u32 hal_desc_sz;
 };
 
 struct ath11k_hw_ops {
@@ -170,6 +172,33 @@ struct ath11k_hw_ops {
 	int (*mac_id_to_srng_id)(struct ath11k_hw_params *hw, int mac_id);
 	void (*tx_mesh_enable)(struct ath11k_base *ab,
 			       struct hal_tcl_data_cmd *tcl_cmd);
+	bool (*rx_desc_get_first_msdu)(struct hal_rx_desc *desc);
+	bool (*rx_desc_get_last_msdu)(struct hal_rx_desc *desc);
+	u8 (*rx_desc_get_l3_pad_bytes)(struct hal_rx_desc *desc);
+	u8 *(*rx_desc_get_hdr_status)(struct hal_rx_desc *desc);
+	bool (*rx_desc_encrypt_valid)(struct hal_rx_desc *desc);
+	u32 (*rx_desc_get_encrypt_type)(struct hal_rx_desc *desc);
+	u8 (*rx_desc_get_decap_type)(struct hal_rx_desc *desc);
+	u8 (*rx_desc_get_mesh_ctl)(struct hal_rx_desc *desc);
+	bool (*rx_desc_get_mpdu_seq_ctl_vld)(struct hal_rx_desc *desc);
+	bool (*rx_desc_get_mpdu_fc_valid)(struct hal_rx_desc *desc);
+	u16 (*rx_desc_get_mpdu_start_seq_no)(struct hal_rx_desc *desc);
+	u16 (*rx_desc_get_msdu_len)(struct hal_rx_desc *desc);
+	u8 (*rx_desc_get_msdu_sgi)(struct hal_rx_desc *desc);
+	u8 (*rx_desc_get_msdu_rate_mcs)(struct hal_rx_desc *desc);
+	u8 (*rx_desc_get_msdu_rx_bw)(struct hal_rx_desc *desc);
+	u32 (*rx_desc_get_msdu_freq)(struct hal_rx_desc *desc);
+	u8 (*rx_desc_get_msdu_pkt_type)(struct hal_rx_desc *desc);
+	u8 (*rx_desc_get_msdu_nss)(struct hal_rx_desc *desc);
+	u8 (*rx_desc_get_mpdu_tid)(struct hal_rx_desc *desc);
+	u16 (*rx_desc_get_mpdu_peer_id)(struct hal_rx_desc *desc);
+	void (*rx_desc_copy_attn_end_tlv)(struct hal_rx_desc *fdesc,
+					  struct hal_rx_desc *ldesc);
+	u32 (*rx_desc_get_mpdu_start_tag)(struct hal_rx_desc *desc);
+	u32 (*rx_desc_get_mpdu_ppdu_id)(struct hal_rx_desc *desc);
+	void (*rx_desc_set_msdu_len)(struct hal_rx_desc *desc, u16 len);
+	struct rx_attention *(*rx_desc_get_attention)(struct hal_rx_desc *desc);
+	u8 *(*rx_desc_get_msdu_payload)(struct hal_rx_desc *desc);
 };
 
 extern const struct ath11k_hw_ops ipq8074_ops;
diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c
index d919e5d36105..e54f1ef65d74 100644
--- a/drivers/net/wireless/ath/ath11k/pci.c
+++ b/drivers/net/wireless/ath/ath11k/pci.c
@@ -775,6 +775,7 @@ static int ath11k_pci_config_irq(struct ath11k_base *ab)
 		}
 
 		ab->irq_num[irq_idx] = irq;
+
 		ath11k_pci_ce_irq_disable(ab, i);
 	}
 
diff --git a/drivers/net/wireless/ath/ath11k/rx_desc.h b/drivers/net/wireless/ath/ath11k/rx_desc.h
index 86494da1069a..0cdb4a1f816e 100644
--- a/drivers/net/wireless/ath/ath11k/rx_desc.h
+++ b/drivers/net/wireless/ath/ath11k/rx_desc.h
@@ -414,7 +414,7 @@ struct rx_attention {
 
 #define RX_MPDU_START_RAW_MPDU			BIT(0)
 
-struct rx_mpdu_start {
+struct rx_mpdu_start_ipq8074 {
 	__le16 info0;
 	__le16 phy_ppdu_id;
 	__le16 ast_index;
@@ -440,6 +440,112 @@ struct rx_mpdu_start {
 	__le32 raw;
 } __packed;
 
+#define RX_MPDU_START_INFO7_REO_DEST_IND		GENMASK(4, 0)
+#define RX_MPDU_START_INFO7_LMAC_PEER_ID_MSB		GENMASK(6, 5)
+#define RX_MPDU_START_INFO7_FLOW_ID_TOEPLITZ		BIT(7)
+#define RX_MPDU_START_INFO7_PKT_SEL_FP_UCAST_DATA	BIT(8)
+#define RX_MPDU_START_INFO7_PKT_SEL_FP_MCAST_DATA	BIT(9)
+#define RX_MPDU_START_INFO7_PKT_SEL_FP_CTRL_BAR		BIT(10)
+#define RX_MPDU_START_INFO7_RXDMA0_SRC_RING_SEL		GENMASK(12, 11)
+#define RX_MPDU_START_INFO7_RXDMA0_DST_RING_SEL		GENMASK(14, 13)
+
+#define RX_MPDU_START_INFO8_REO_QUEUE_DESC_HI		GENMASK(7, 0)
+#define RX_MPDU_START_INFO8_RECV_QUEUE_NUM		GENMASK(23, 8)
+#define RX_MPDU_START_INFO8_PRE_DELIM_ERR_WARN		BIT(24)
+#define RX_MPDU_START_INFO8_FIRST_DELIM_ERR		BIT(25)
+
+#define RX_MPDU_START_INFO9_EPD_EN			BIT(0)
+#define RX_MPDU_START_INFO9_ALL_FRAME_ENCPD		BIT(1)
+#define RX_MPDU_START_INFO9_ENC_TYPE			GENMASK(5, 2)
+#define RX_MPDU_START_INFO9_VAR_WEP_KEY_WIDTH		GENMASK(7, 6)
+#define RX_MPDU_START_INFO9_MESH_STA			GENMASK(9, 8)
+#define RX_MPDU_START_INFO9_BSSID_HIT			BIT(10)
+#define RX_MPDU_START_INFO9_BSSID_NUM			GENMASK(14, 11)
+#define RX_MPDU_START_INFO9_TID				GENMASK(18, 15)
+
+#define RX_MPDU_START_INFO10_RXPCU_MPDU_FLTR		GENMASK(1, 0)
+#define RX_MPDU_START_INFO10_SW_FRAME_GRP_ID		GENMASK(8, 2)
+#define RX_MPDU_START_INFO10_NDP_FRAME			BIT(9)
+#define RX_MPDU_START_INFO10_PHY_ERR			BIT(10)
+#define RX_MPDU_START_INFO10_PHY_ERR_MPDU_HDR		BIT(11)
+#define RX_MPDU_START_INFO10_PROTO_VER_ERR		BIT(12)
+#define RX_MPDU_START_INFO10_AST_LOOKUP_VALID		BIT(13)
+
+#define RX_MPDU_START_INFO11_MPDU_FCTRL_VALID		BIT(0)
+#define RX_MPDU_START_INFO11_MPDU_DUR_VALID		BIT(1)
+#define RX_MPDU_START_INFO11_MAC_ADDR1_VALID		BIT(2)
+#define RX_MPDU_START_INFO11_MAC_ADDR2_VALID		BIT(3)
+#define RX_MPDU_START_INFO11_MAC_ADDR3_VALID		BIT(4)
+#define RX_MPDU_START_INFO11_MAC_ADDR4_VALID		BIT(5)
+#define RX_MPDU_START_INFO11_MPDU_SEQ_CTRL_VALID	BIT(6)
+#define RX_MPDU_START_INFO11_MPDU_QOS_CTRL_VALID	BIT(7)
+#define RX_MPDU_START_INFO11_MPDU_HT_CTRL_VALID		BIT(8)
+#define RX_MPDU_START_INFO11_ENCRYPT_INFO_VALID		BIT(9)
+#define RX_MPDU_START_INFO11_MPDU_FRAG_NUMBER		GENMASK(13, 10)
+#define RX_MPDU_START_INFO11_MORE_FRAG_FLAG		BIT(14)
+#define RX_MPDU_START_INFO11_FROM_DS			BIT(16)
+#define RX_MPDU_START_INFO11_TO_DS			BIT(17)
+#define RX_MPDU_START_INFO11_ENCRYPTED			BIT(18)
+#define RX_MPDU_START_INFO11_MPDU_RETRY			BIT(19)
+#define RX_MPDU_START_INFO11_MPDU_SEQ_NUM		GENMASK(31, 20)
+
+#define RX_MPDU_START_INFO12_KEY_ID			GENMASK(7, 0)
+#define RX_MPDU_START_INFO12_NEW_PEER_ENTRY		BIT(8)
+#define RX_MPDU_START_INFO12_DECRYPT_NEEDED		BIT(9)
+#define RX_MPDU_START_INFO12_DECAP_TYPE			GENMASK(11, 10)
+#define RX_MPDU_START_INFO12_VLAN_TAG_C_PADDING		BIT(12)
+#define RX_MPDU_START_INFO12_VLAN_TAG_S_PADDING		BIT(13)
+#define RX_MPDU_START_INFO12_STRIP_VLAN_TAG_C		BIT(14)
+#define RX_MPDU_START_INFO12_STRIP_VLAN_TAG_S		BIT(15)
+#define RX_MPDU_START_INFO12_PRE_DELIM_COUNT		GENMASK(27, 16)
+#define RX_MPDU_START_INFO12_AMPDU_FLAG			BIT(28)
+#define RX_MPDU_START_INFO12_BAR_FRAME			BIT(29)
+#define RX_MPDU_START_INFO12_RAW_MPDU			BIT(30)
+
+#define RX_MPDU_START_INFO13_MPDU_LEN			GENMASK(13, 0)
+#define RX_MPDU_START_INFO13_FIRST_MPDU			BIT(14)
+#define RX_MPDU_START_INFO13_MCAST_BCAST		BIT(15)
+#define RX_MPDU_START_INFO13_AST_IDX_NOT_FOUND		BIT(16)
+#define RX_MPDU_START_INFO13_AST_IDX_TIMEOUT		BIT(17)
+#define RX_MPDU_START_INFO13_POWER_MGMT			BIT(18)
+#define RX_MPDU_START_INFO13_NON_QOS			BIT(19)
+#define RX_MPDU_START_INFO13_NULL_DATA			BIT(20)
+#define RX_MPDU_START_INFO13_MGMT_TYPE			BIT(21)
+#define RX_MPDU_START_INFO13_CTRL_TYPE			BIT(22)
+#define RX_MPDU_START_INFO13_MORE_DATA			BIT(23)
+#define RX_MPDU_START_INFO13_EOSP			BIT(24)
+#define RX_MPDU_START_INFO13_FRAGMENT			BIT(25)
+#define RX_MPDU_START_INFO13_ORDER			BIT(26)
+#define RX_MPDU_START_INFO13_UAPSD_TRIGGER		BIT(27)
+#define RX_MPDU_START_INFO13_ENCRYPT_REQUIRED		BIT(28)
+#define RX_MPDU_START_INFO13_DIRECTED			BIT(29)
+#define RX_MPDU_START_INFO13_AMSDU_PRESENT		BIT(30)
+
+struct rx_mpdu_start_qcn9074 {
+	__le32 info7;
+	__le32 reo_queue_desc_lo;
+	__le32 info8;
+	__le32 pn[4];
+	__le32 info9;
+	__le32 peer_meta_data;
+	__le16 info10;
+	__le16 phy_ppdu_id;
+	__le16 ast_index;
+	__le16 sw_peer_id;
+	__le32 info11;
+	__le32 info12;
+	__le32 info13;
+	__le16 frame_ctrl;
+	__le16 duration;
+	u8 addr1[ETH_ALEN];
+	u8 addr2[ETH_ALEN];
+	u8 addr3[ETH_ALEN];
+	__le16 seq_ctrl;
+	u8 addr4[ETH_ALEN];
+	__le16 qos_ctrl;
+	__le32 ht_ctrl;
+} __packed;
+
 /* rx_mpdu_start
  *
  * rxpcu_mpdu_filter_in_category
@@ -672,7 +778,19 @@ enum rx_msdu_start_reception_type {
 #define RX_MSDU_START_INFO3_RECEPTION_TYPE	GENMASK(23, 21)
 #define RX_MSDU_START_INFO3_MIMO_SS_BITMAP	GENMASK(31, 24)
 
-struct rx_msdu_start {
+struct rx_msdu_start_ipq8074 {
+	__le16 info0;
+	__le16 phy_ppdu_id;
+	__le32 info1;
+	__le32 info2;
+	__le32 toeplitz_hash;
+	__le32 flow_id_toeplitz;
+	__le32 info3;
+	__le32 ppdu_start_timestamp;
+	__le32 phy_meta_data;
+} __packed;
+
+struct rx_msdu_start_qcn9074 {
 	__le16 info0;
 	__le16 phy_ppdu_id;
 	__le32 info1;
@@ -682,6 +800,8 @@ struct rx_msdu_start {
 	__le32 info3;
 	__le32 ppdu_start_timestamp;
 	__le32 phy_meta_data;
+	__le16 vlan_ctag_c1;
+	__le16 vlan_stag_c1;
 } __packed;
 
 /* rx_msdu_start
@@ -894,7 +1014,7 @@ struct rx_msdu_start {
 #define RX_MSDU_END_INFO5_REO_DEST_IND		GENMASK(5, 1)
 #define RX_MSDU_END_INFO5_FLOW_IDX		GENMASK(25, 6)
 
-struct rx_msdu_end {
+struct rx_msdu_end_ipq8074 {
 	__le16 info0;
 	__le16 phy_ppdu_id;
 	__le16 ip_hdr_cksum;
@@ -917,6 +1037,58 @@ struct rx_msdu_end {
 	__le16 sa_sw_peer_id;
 } __packed;
 
+#define RX_MSDU_END_MPDU_LENGTH_INFO		GENMASK(13, 0)
+
+#define RX_MSDU_END_INFO2_DA_OFFSET		GENMASK(5, 0)
+#define RX_MSDU_END_INFO2_SA_OFFSET		GENMASK(11, 6)
+#define RX_MSDU_END_INFO2_DA_OFFSET_VALID	BIT(12)
+#define RX_MSDU_END_INFO2_SA_OFFSET_VALID	BIT(13)
+#define RX_MSDU_END_INFO2_L3_TYPE		GENMASK(31, 16)
+
+#define RX_MSDU_END_INFO4_SA_IDX_TIMEOUT	BIT(0)
+#define RX_MSDU_END_INFO4_DA_IDX_TIMEOUT	BIT(1)
+#define RX_MSDU_END_INFO4_MSDU_LIMIT_ERR	BIT(2)
+#define RX_MSDU_END_INFO4_FLOW_IDX_TIMEOUT	BIT(3)
+#define RX_MSDU_END_INFO4_FLOW_IDX_INVALID	BIT(4)
+#define RX_MSDU_END_INFO4_WIFI_PARSER_ERR	BIT(5)
+#define RX_MSDU_END_INFO4_AMSDU_PARSER_ERR	BIT(6)
+#define RX_MSDU_END_INFO4_SA_IS_VALID		BIT(7)
+#define RX_MSDU_END_INFO4_DA_IS_VALID		BIT(8)
+#define RX_MSDU_END_INFO4_DA_IS_MCBC		BIT(9)
+#define RX_MSDU_END_INFO4_L3_HDR_PADDING	GENMASK(11, 10)
+#define RX_MSDU_END_INFO4_FIRST_MSDU		BIT(12)
+#define RX_MSDU_END_INFO4_LAST_MSDU		BIT(13)
+
+#define RX_MSDU_END_INFO6_AGGR_COUNT		GENMASK(7, 0)
+#define RX_MSDU_END_INFO6_FLOW_AGGR_CONTN	BIT(8)
+#define RX_MSDU_END_INFO6_FISA_TIMEOUT		BIT(9)
+
+struct rx_msdu_end_qcn9074 {
+	__le16 info0;
+	__le16 phy_ppdu_id;
+	__le16 ip_hdr_cksum;
+	__le16 mpdu_length_info;
+	__le32 info1;
+	__le32 rule_indication[2];
+	__le32 info2;
+	__le32 ipv6_options_crc;
+	__le32 tcp_seq_num;
+	__le32 tcp_ack_num;
+	__le16 info3;
+	__le16 window_size;
+	__le16 tcp_udp_cksum;
+	__le16 info4;
+	__le16 sa_idx;
+	__le16 da_idx;
+	__le32 info5;
+	__le32 fse_metadata;
+	__le16 cce_metadata;
+	__le16 sa_sw_peer_id;
+	__le32 info6;
+	__le16 cum_l4_cksum;
+	__le16 cum_ip_length;
+} __packed;
+
 /* rx_msdu_end
  *
  * rxpcu_mpdu_filter_in_category
@@ -1190,16 +1362,16 @@ struct rx_mpdu_end {
 
 #define HAL_RX_DESC_HDR_STATUS_LEN	120
 
-struct hal_rx_desc {
+struct hal_rx_desc_ipq8074 {
 	__le32 msdu_end_tag;
-	struct rx_msdu_end msdu_end;
+	struct rx_msdu_end_ipq8074 msdu_end;
 	__le32 rx_attn_tag;
 	struct rx_attention attention;
 	__le32 msdu_start_tag;
-	struct rx_msdu_start msdu_start;
+	struct rx_msdu_start_ipq8074 msdu_start;
 	u8 rx_padding0[HAL_RX_DESC_PADDING0_BYTES];
 	__le32 mpdu_start_tag;
-	struct rx_mpdu_start mpdu_start;
+	struct rx_mpdu_start_ipq8074 mpdu_start;
 	__le32 mpdu_end_tag;
 	struct rx_mpdu_end mpdu_end;
 	u8 rx_padding1[HAL_RX_DESC_PADDING1_BYTES];
@@ -1209,6 +1381,32 @@ struct hal_rx_desc {
 	u8 msdu_payload[0];
 } __packed;
 
+struct hal_rx_desc_qcn9074 {
+	__le32 msdu_end_tag;
+	struct rx_msdu_end_qcn9074 msdu_end;
+	__le32 rx_attn_tag;
+	struct rx_attention attention;
+	__le32 msdu_start_tag;
+	struct rx_msdu_start_qcn9074 msdu_start;
+	u8 rx_padding0[HAL_RX_DESC_PADDING0_BYTES];
+	__le32 mpdu_start_tag;
+	struct rx_mpdu_start_qcn9074 mpdu_start;
+	__le32 mpdu_end_tag;
+	struct rx_mpdu_end mpdu_end;
+	u8 rx_padding1[HAL_RX_DESC_PADDING1_BYTES];
+	__le32 hdr_status_tag;
+	__le32 phy_ppdu_id;
+	u8 hdr_status[HAL_RX_DESC_HDR_STATUS_LEN];
+	u8 msdu_payload[0];
+} __packed;
+
+struct hal_rx_desc {
+	union {
+		struct hal_rx_desc_ipq8074 ipq8074;
+		struct hal_rx_desc_qcn9074 qcn9074;
+	} u;
+} __packed;
+
 #define HAL_RX_RU_ALLOC_TYPE_MAX 6
 #define RU_26  1
 #define RU_52  2
-- 
cgit v1.2.3


From 6289ac2b7182d418ee68e5c0f3f83d383d7a72ed Mon Sep 17 00:00:00 2001
From: Karthikeyan Periyasamy <periyasa@codeaurora.org>
Date: Tue, 16 Feb 2021 09:16:23 +0200
Subject: ath11k: add CE interrupt support for QCN9074

Define host CE configuration for QCN9074 since the max CE count is six.
Available MSI interrupt is five so cannot able to map the ce_id directly
for the msi_data_idx. Added get_ce_msi_idx ops in ath11k_hif_ops to get
the CE MSI idx which is used to initialize the CE ring.

Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.4.0.1.r2-00012-QCAHKSWPL_SILICONZ-1

Signed-off-by: Karthikeyan Periyasamy <periyasa@codeaurora.org>
Signed-off-by: Anilkumar Kolli <akolli@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1612946530-28504-10-git-send-email-akolli@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/ce.c   |  58 +++++++++-
 drivers/net/wireless/ath/ath11k/ce.h   |   1 +
 drivers/net/wireless/ath/ath11k/core.c |   6 +
 drivers/net/wireless/ath/ath11k/core.h |   2 +
 drivers/net/wireless/ath/ath11k/hif.h  |  10 ++
 drivers/net/wireless/ath/ath11k/hw.c   | 194 +++++++++++++++++++++++++++++++++
 drivers/net/wireless/ath/ath11k/pci.c  |  31 +++++-
 7 files changed, 294 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/ce.c b/drivers/net/wireless/ath/ath11k/ce.c
index 987c65010272..de8b632b058c 100644
--- a/drivers/net/wireless/ath/ath11k/ce.c
+++ b/drivers/net/wireless/ath/ath11k/ce.c
@@ -187,6 +187,59 @@ const struct ce_attr ath11k_host_ce_config_qca6390[] = {
 
 };
 
+const struct ce_attr ath11k_host_ce_config_qcn9074[] = {
+	/* CE0: host->target HTC control and raw streams */
+	{
+		.flags = CE_ATTR_FLAGS,
+		.src_nentries = 16,
+		.src_sz_max = 2048,
+		.dest_nentries = 0,
+	},
+
+	/* CE1: target->host HTT + HTC control */
+	{
+		.flags = CE_ATTR_FLAGS,
+		.src_nentries = 0,
+		.src_sz_max = 2048,
+		.dest_nentries = 512,
+		.recv_cb = ath11k_htc_rx_completion_handler,
+	},
+
+	/* CE2: target->host WMI */
+	{
+		.flags = CE_ATTR_FLAGS,
+		.src_nentries = 0,
+		.src_sz_max = 2048,
+		.dest_nentries = 32,
+		.recv_cb = ath11k_htc_rx_completion_handler,
+	},
+
+	/* CE3: host->target WMI (mac0) */
+	{
+		.flags = CE_ATTR_FLAGS,
+		.src_nentries = 32,
+		.src_sz_max = 2048,
+		.dest_nentries = 0,
+	},
+
+	/* CE4: host->target HTT */
+	{
+		.flags = CE_ATTR_FLAGS | CE_ATTR_DIS_INTR,
+		.src_nentries = 2048,
+		.src_sz_max = 256,
+		.dest_nentries = 0,
+	},
+
+	/* CE5: target->host pktlog */
+	{
+		.flags = CE_ATTR_FLAGS,
+		.src_nentries = 0,
+		.src_sz_max = 2048,
+		.dest_nentries = 512,
+		.recv_cb = ath11k_dp_htt_htc_t2h_msg_handler,
+	},
+};
+
 static bool ath11k_ce_need_shadow_fix(int ce_id)
 {
 	/* only ce4 needs shadow workaroud*/
@@ -455,7 +508,7 @@ static void ath11k_ce_srng_msi_ring_params_setup(struct ath11k_base *ab, u32 ce_
 						 struct hal_srng_params *ring_params)
 {
 	u32 msi_data_start;
-	u32 msi_data_count;
+	u32 msi_data_count, msi_data_idx;
 	u32 msi_irq_start;
 	u32 addr_lo;
 	u32 addr_hi;
@@ -469,10 +522,11 @@ static void ath11k_ce_srng_msi_ring_params_setup(struct ath11k_base *ab, u32 ce_
 		return;
 
 	ath11k_get_msi_address(ab, &addr_lo, &addr_hi);
+	ath11k_get_ce_msi_idx(ab, ce_id, &msi_data_idx);
 
 	ring_params->msi_addr = addr_lo;
 	ring_params->msi_addr |= (dma_addr_t)(((uint64_t)addr_hi) << 32);
-	ring_params->msi_data = (ce_id % msi_data_count) + msi_data_start;
+	ring_params->msi_data = (msi_data_idx % msi_data_count) + msi_data_start;
 	ring_params->flags |= HAL_SRNG_FLAGS_MSI_INTR;
 }
 
diff --git a/drivers/net/wireless/ath/ath11k/ce.h b/drivers/net/wireless/ath/ath11k/ce.h
index d6eeef919349..713f766cac22 100644
--- a/drivers/net/wireless/ath/ath11k/ce.h
+++ b/drivers/net/wireless/ath/ath11k/ce.h
@@ -173,6 +173,7 @@ struct ath11k_ce {
 
 extern const struct ce_attr ath11k_host_ce_config_ipq8074[];
 extern const struct ce_attr ath11k_host_ce_config_qca6390[];
+extern const struct ce_attr ath11k_host_ce_config_qcn9074[];
 
 void ath11k_ce_cleanup_pipes(struct ath11k_base *ab);
 void ath11k_ce_rx_replenish_retry(struct timer_list *t);
diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c
index a17812270c07..82b5e8c41f8f 100644
--- a/drivers/net/wireless/ath/ath11k/core.c
+++ b/drivers/net/wireless/ath/ath11k/core.c
@@ -164,6 +164,12 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
 		.hw_ops = &qcn9074_ops,
 		.internal_sleep_clock = false,
 		.regs = &qcn9074_regs,
+		.host_ce_config = ath11k_host_ce_config_qcn9074,
+		.ce_count = 6,
+		.target_ce_config = ath11k_target_ce_config_wlan_qcn9074,
+		.target_ce_count = 9,
+		.svc_to_ce_map = ath11k_target_service_to_ce_map_wlan_qcn9074,
+		.svc_to_ce_map_len = 18,
 		.rxdma1_enable = true,
 		.num_rxmda_per_pdev = 1,
 		.rx_mac_buf_ring = false,
diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h
index 912294f7657c..9cdc4f0b0690 100644
--- a/drivers/net/wireless/ath/ath11k/core.h
+++ b/drivers/net/wireless/ath/ath11k/core.h
@@ -878,6 +878,8 @@ extern const struct service_to_pipe ath11k_target_service_to_ce_map_wlan_ipq6018
 extern const struct ce_pipe_config ath11k_target_ce_config_wlan_qca6390[];
 extern const struct service_to_pipe ath11k_target_service_to_ce_map_wlan_qca6390[];
 
+extern const struct ce_pipe_config ath11k_target_ce_config_wlan_qcn9074[];
+extern const struct service_to_pipe ath11k_target_service_to_ce_map_wlan_qcn9074[];
 int ath11k_core_qmi_firmware_ready(struct ath11k_base *ab);
 int ath11k_core_pre_init(struct ath11k_base *ab);
 int ath11k_core_init(struct ath11k_base *ath11k);
diff --git a/drivers/net/wireless/ath/ath11k/hif.h b/drivers/net/wireless/ath/ath11k/hif.h
index 6285c52afc44..e9366f786fbb 100644
--- a/drivers/net/wireless/ath/ath11k/hif.h
+++ b/drivers/net/wireless/ath/ath11k/hif.h
@@ -28,6 +28,7 @@ struct ath11k_hif_ops {
 				u32 *msi_addr_hi);
 	void (*ce_irq_enable)(struct ath11k_base *ab);
 	void (*ce_irq_disable)(struct ath11k_base *ab);
+	void (*get_ce_msi_idx)(struct ath11k_base *ab, u32 ce_id, u32 *msi_idx);
 };
 
 static inline void ath11k_hif_ce_irq_enable(struct ath11k_base *ab)
@@ -124,4 +125,13 @@ static inline void ath11k_get_msi_address(struct ath11k_base *ab, u32 *msi_addr_
 
 	ab->hif.ops->get_msi_address(ab, msi_addr_lo, msi_addr_hi);
 }
+
+static inline void ath11k_get_ce_msi_idx(struct ath11k_base *ab, u32 ce_id,
+					 u32 *msi_data_idx)
+{
+	if (ab->hif.ops->get_ce_msi_idx)
+		ab->hif.ops->get_ce_msi_idx(ab, ce_id, msi_data_idx);
+	else
+		*msi_data_idx = ce_id;
+}
 #endif /* _HIF_H_ */
diff --git a/drivers/net/wireless/ath/ath11k/hw.c b/drivers/net/wireless/ath/ath11k/hw.c
index 462d5e81bee8..c943e34c3721 100644
--- a/drivers/net/wireless/ath/ath11k/hw.c
+++ b/drivers/net/wireless/ath/ath11k/hw.c
@@ -1241,6 +1241,200 @@ const struct service_to_pipe ath11k_target_service_to_ce_map_wlan_qca6390[] = {
 	},
 };
 
+/* Target firmware's Copy Engine configuration. */
+const struct ce_pipe_config ath11k_target_ce_config_wlan_qcn9074[] = {
+	/* CE0: host->target HTC control and raw streams */
+	{
+		.pipenum = __cpu_to_le32(0),
+		.pipedir = __cpu_to_le32(PIPEDIR_OUT),
+		.nentries = __cpu_to_le32(32),
+		.nbytes_max = __cpu_to_le32(2048),
+		.flags = __cpu_to_le32(CE_ATTR_FLAGS),
+		.reserved = __cpu_to_le32(0),
+	},
+
+	/* CE1: target->host HTT + HTC control */
+	{
+		.pipenum = __cpu_to_le32(1),
+		.pipedir = __cpu_to_le32(PIPEDIR_IN),
+		.nentries = __cpu_to_le32(32),
+		.nbytes_max = __cpu_to_le32(2048),
+		.flags = __cpu_to_le32(CE_ATTR_FLAGS),
+		.reserved = __cpu_to_le32(0),
+	},
+
+	/* CE2: target->host WMI */
+	{
+		.pipenum = __cpu_to_le32(2),
+		.pipedir = __cpu_to_le32(PIPEDIR_IN),
+		.nentries = __cpu_to_le32(32),
+		.nbytes_max = __cpu_to_le32(2048),
+		.flags = __cpu_to_le32(CE_ATTR_FLAGS),
+		.reserved = __cpu_to_le32(0),
+	},
+
+	/* CE3: host->target WMI */
+	{
+		.pipenum = __cpu_to_le32(3),
+		.pipedir = __cpu_to_le32(PIPEDIR_OUT),
+		.nentries = __cpu_to_le32(32),
+		.nbytes_max = __cpu_to_le32(2048),
+		.flags = __cpu_to_le32(CE_ATTR_FLAGS),
+		.reserved = __cpu_to_le32(0),
+	},
+
+	/* CE4: host->target HTT */
+	{
+		.pipenum = __cpu_to_le32(4),
+		.pipedir = __cpu_to_le32(PIPEDIR_OUT),
+		.nentries = __cpu_to_le32(256),
+		.nbytes_max = __cpu_to_le32(256),
+		.flags = __cpu_to_le32(CE_ATTR_FLAGS | CE_ATTR_DIS_INTR),
+		.reserved = __cpu_to_le32(0),
+	},
+
+	/* CE5: target->host Pktlog */
+	{
+		.pipenum = __cpu_to_le32(5),
+		.pipedir = __cpu_to_le32(PIPEDIR_IN),
+		.nentries = __cpu_to_le32(32),
+		.nbytes_max = __cpu_to_le32(2048),
+		.flags = __cpu_to_le32(CE_ATTR_FLAGS),
+		.reserved = __cpu_to_le32(0),
+	},
+
+	/* CE6: Reserved for target autonomous hif_memcpy */
+	{
+		.pipenum = __cpu_to_le32(6),
+		.pipedir = __cpu_to_le32(PIPEDIR_INOUT),
+		.nentries = __cpu_to_le32(32),
+		.nbytes_max = __cpu_to_le32(16384),
+		.flags = __cpu_to_le32(CE_ATTR_FLAGS),
+		.reserved = __cpu_to_le32(0),
+	},
+
+	/* CE7 used only by Host */
+	{
+		.pipenum = __cpu_to_le32(7),
+		.pipedir = __cpu_to_le32(PIPEDIR_INOUT_H2H),
+		.nentries = __cpu_to_le32(0),
+		.nbytes_max = __cpu_to_le32(0),
+		.flags = __cpu_to_le32(CE_ATTR_FLAGS | CE_ATTR_DIS_INTR),
+		.reserved = __cpu_to_le32(0),
+	},
+
+	/* CE8 target->host used only by IPA */
+	{
+		.pipenum = __cpu_to_le32(8),
+		.pipedir = __cpu_to_le32(PIPEDIR_INOUT),
+		.nentries = __cpu_to_le32(32),
+		.nbytes_max = __cpu_to_le32(16384),
+		.flags = __cpu_to_le32(CE_ATTR_FLAGS),
+		.reserved = __cpu_to_le32(0),
+	},
+	/* CE 9, 10, 11 are used by MHI driver */
+};
+
+/* Map from service/endpoint to Copy Engine.
+ * This table is derived from the CE_PCI TABLE, above.
+ * It is passed to the Target at startup for use by firmware.
+ */
+const struct service_to_pipe ath11k_target_service_to_ce_map_wlan_qcn9074[] = {
+	{
+		__cpu_to_le32(ATH11K_HTC_SVC_ID_WMI_DATA_VO),
+		__cpu_to_le32(PIPEDIR_OUT),	/* out = UL = host -> target */
+		__cpu_to_le32(3),
+	},
+	{
+		__cpu_to_le32(ATH11K_HTC_SVC_ID_WMI_DATA_VO),
+		__cpu_to_le32(PIPEDIR_IN),	/* in = DL = target -> host */
+		__cpu_to_le32(2),
+	},
+	{
+		__cpu_to_le32(ATH11K_HTC_SVC_ID_WMI_DATA_BK),
+		__cpu_to_le32(PIPEDIR_OUT),	/* out = UL = host -> target */
+		__cpu_to_le32(3),
+	},
+	{
+		__cpu_to_le32(ATH11K_HTC_SVC_ID_WMI_DATA_BK),
+		__cpu_to_le32(PIPEDIR_IN),	/* in = DL = target -> host */
+		__cpu_to_le32(2),
+	},
+	{
+		__cpu_to_le32(ATH11K_HTC_SVC_ID_WMI_DATA_BE),
+		__cpu_to_le32(PIPEDIR_OUT),	/* out = UL = host -> target */
+		__cpu_to_le32(3),
+	},
+	{
+		__cpu_to_le32(ATH11K_HTC_SVC_ID_WMI_DATA_BE),
+		__cpu_to_le32(PIPEDIR_IN),	/* in = DL = target -> host */
+		__cpu_to_le32(2),
+	},
+	{
+		__cpu_to_le32(ATH11K_HTC_SVC_ID_WMI_DATA_VI),
+		__cpu_to_le32(PIPEDIR_OUT),	/* out = UL = host -> target */
+		__cpu_to_le32(3),
+	},
+	{
+		__cpu_to_le32(ATH11K_HTC_SVC_ID_WMI_DATA_VI),
+		__cpu_to_le32(PIPEDIR_IN),	/* in = DL = target -> host */
+		__cpu_to_le32(2),
+	},
+	{
+		__cpu_to_le32(ATH11K_HTC_SVC_ID_WMI_CONTROL),
+		__cpu_to_le32(PIPEDIR_OUT),	/* out = UL = host -> target */
+		__cpu_to_le32(3),
+	},
+	{
+		__cpu_to_le32(ATH11K_HTC_SVC_ID_WMI_CONTROL),
+		__cpu_to_le32(PIPEDIR_IN),	/* in = DL = target -> host */
+		__cpu_to_le32(2),
+	},
+	{
+		__cpu_to_le32(ATH11K_HTC_SVC_ID_RSVD_CTRL),
+		__cpu_to_le32(PIPEDIR_OUT),	/* out = UL = host -> target */
+		__cpu_to_le32(0),
+	},
+	{
+		__cpu_to_le32(ATH11K_HTC_SVC_ID_RSVD_CTRL),
+		__cpu_to_le32(PIPEDIR_IN),	/* in = DL = target -> host */
+		__cpu_to_le32(1),
+	},
+	{
+		__cpu_to_le32(ATH11K_HTC_SVC_ID_TEST_RAW_STREAMS),
+		__cpu_to_le32(PIPEDIR_OUT),	/* out = UL = host -> target */
+		__cpu_to_le32(0),
+	},
+	{
+		__cpu_to_le32(ATH11K_HTC_SVC_ID_TEST_RAW_STREAMS),
+		__cpu_to_le32(PIPEDIR_IN),	/* in = DL = target -> host */
+		__cpu_to_le32(1),
+	},
+	{
+		__cpu_to_le32(ATH11K_HTC_SVC_ID_HTT_DATA_MSG),
+		__cpu_to_le32(PIPEDIR_OUT),	/* out = UL = host -> target */
+		__cpu_to_le32(4),
+	},
+	{
+		__cpu_to_le32(ATH11K_HTC_SVC_ID_HTT_DATA_MSG),
+		__cpu_to_le32(PIPEDIR_IN),	/* in = DL = target -> host */
+		__cpu_to_le32(1),
+	},
+	{
+		__cpu_to_le32(ATH11K_HTC_SVC_ID_PKT_LOG),
+		__cpu_to_le32(PIPEDIR_IN),	/* in = DL = target -> host */
+		__cpu_to_le32(5),
+	},
+
+	/* (Additions here) */
+
+	{ /* must be last */
+		__cpu_to_le32(0),
+		__cpu_to_le32(0),
+		__cpu_to_le32(0),
+	},
+};
+
 const struct ath11k_hw_regs ipq8074_regs = {
 	/* SW2TCL(x) R0 ring configuration address */
 	.hal_tcl1_ring_base_lsb = 0x00000510,
diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c
index e54f1ef65d74..f58ee6762660 100644
--- a/drivers/net/wireless/ath/ath11k/pci.c
+++ b/drivers/net/wireless/ath/ath11k/pci.c
@@ -482,6 +482,23 @@ int ath11k_pci_get_user_msi_assignment(struct ath11k_pci *ab_pci, char *user_nam
 	return -EINVAL;
 }
 
+static void ath11k_pci_get_ce_msi_idx(struct ath11k_base *ab, u32 ce_id,
+				      u32 *msi_idx)
+{
+	u32 i, msi_data_idx;
+
+	for (i = 0, msi_data_idx = 0; i < ab->hw_params.ce_count; i++) {
+		if (ath11k_ce_get_attr_flags(ab, i) & CE_ATTR_DIS_INTR)
+			continue;
+
+		if (ce_id == i)
+			break;
+
+		msi_data_idx++;
+	}
+	*msi_idx = msi_data_idx;
+}
+
 static int ath11k_get_user_msi_assignment(struct ath11k_base *ab, char *user_name,
 					  int *num_vectors, u32 *user_base_data,
 					  u32 *base_vector)
@@ -741,7 +758,7 @@ static int ath11k_pci_config_irq(struct ath11k_base *ab)
 {
 	struct ath11k_ce_pipe *ce_pipe;
 	u32 msi_data_start;
-	u32 msi_data_count;
+	u32 msi_data_count, msi_data_idx;
 	u32 msi_irq_start;
 	unsigned int msi_data;
 	int irq, i, ret, irq_idx;
@@ -753,14 +770,14 @@ static int ath11k_pci_config_irq(struct ath11k_base *ab)
 		return ret;
 
 	/* Configure CE irqs */
-	for (i = 0; i < ab->hw_params.ce_count; i++) {
-		msi_data = (i % msi_data_count) + msi_irq_start;
-		irq = ath11k_pci_get_msi_irq(ab->dev, msi_data);
-		ce_pipe = &ab->ce.ce_pipe[i];
-
+	for (i = 0, msi_data_idx = 0; i < ab->hw_params.ce_count; i++) {
 		if (ath11k_ce_get_attr_flags(ab, i) & CE_ATTR_DIS_INTR)
 			continue;
 
+		msi_data = (msi_data_idx % msi_data_count) + msi_irq_start;
+		irq = ath11k_pci_get_msi_irq(ab->dev, msi_data);
+		ce_pipe = &ab->ce.ce_pipe[i];
+
 		irq_idx = ATH11K_PCI_IRQ_CE0_OFFSET + i;
 
 		tasklet_setup(&ce_pipe->intr_tq, ath11k_pci_ce_tasklet);
@@ -775,6 +792,7 @@ static int ath11k_pci_config_irq(struct ath11k_base *ab)
 		}
 
 		ab->irq_num[irq_idx] = irq;
+		msi_data_idx++;
 
 		ath11k_pci_ce_irq_disable(ab, i);
 	}
@@ -1135,6 +1153,7 @@ static const struct ath11k_hif_ops ath11k_pci_hif_ops = {
 	.map_service_to_pipe = ath11k_pci_map_service_to_pipe,
 	.ce_irq_enable = ath11k_pci_hif_ce_irq_enable,
 	.ce_irq_disable = ath11k_pci_hif_ce_irq_disable,
+	.get_ce_msi_idx = ath11k_pci_get_ce_msi_idx,
 };
 
 static int ath11k_pci_probe(struct pci_dev *pdev,
-- 
cgit v1.2.3


From 7dc67af063e3f0237c864504bb2188ada753b804 Mon Sep 17 00:00:00 2001
From: Karthikeyan Periyasamy <periyasa@codeaurora.org>
Date: Tue, 16 Feb 2021 09:16:24 +0200
Subject: ath11k: add extended interrupt support for QCN9074

Update the specific hw ring mask for QCN9074. Update the timestamp
information while processing DP and CE interrupts.

Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.4.0.1.r2-00012-QCAHKSWPL_SILICONZ-1

Signed-off-by: Karthikeyan Periyasamy <periyasa@codeaurora.org>
Signed-off-by: Anilkumar Kolli <akolli@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1612946530-28504-11-git-send-email-akolli@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/core.c  |  1 +
 drivers/net/wireless/ath/ath11k/dp_tx.c |  4 ++--
 drivers/net/wireless/ath/ath11k/hw.c    | 41 +++++++++++++++++++++++++++++++++
 drivers/net/wireless/ath/ath11k/hw.h    |  1 +
 drivers/net/wireless/ath/ath11k/pci.c   | 13 +++++++++--
 5 files changed, 56 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c
index 82b5e8c41f8f..ddedc331edef 100644
--- a/drivers/net/wireless/ath/ath11k/core.c
+++ b/drivers/net/wireless/ath/ath11k/core.c
@@ -162,6 +162,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
 		.single_pdev_only = false,
 		.qmi_service_ins_id = ATH11K_QMI_WLFW_SERVICE_INS_ID_V01_QCN9074,
 		.hw_ops = &qcn9074_ops,
+		.ring_mask = &ath11k_hw_ring_mask_qcn9074,
 		.internal_sleep_clock = false,
 		.regs = &qcn9074_regs,
 		.host_ce_config = ath11k_host_ce_config_qcn9074,
diff --git a/drivers/net/wireless/ath/ath11k/dp_tx.c b/drivers/net/wireless/ath/ath11k/dp_tx.c
index f5c277977e08..8bba5234f81f 100644
--- a/drivers/net/wireless/ath/ath11k/dp_tx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_tx.c
@@ -792,8 +792,8 @@ int ath11k_dp_tx_htt_srng_setup(struct ath11k_base *ab, u32 ring_id,
 	cmd->ring_tail_off32_remote_addr_hi = (u64)tp_addr >>
 					      HAL_ADDR_MSB_REG_SHIFT;
 
-	cmd->ring_msi_addr_lo = params.msi_addr & 0xffffffff;
-	cmd->ring_msi_addr_hi = ((uint64_t)(params.msi_addr) >> 32) & 0xffffffff;
+	cmd->ring_msi_addr_lo = lower_32_bits(params.msi_addr);
+	cmd->ring_msi_addr_hi = upper_32_bits(params.msi_addr);
 	cmd->msi_data = params.msi_data;
 
 	cmd->intr_info = FIELD_PREP(
diff --git a/drivers/net/wireless/ath/ath11k/hw.c b/drivers/net/wireless/ath/ath11k/hw.c
index c943e34c3721..377ae8d5b58f 100644
--- a/drivers/net/wireless/ath/ath11k/hw.c
+++ b/drivers/net/wireless/ath/ath11k/hw.c
@@ -1435,6 +1435,47 @@ const struct service_to_pipe ath11k_target_service_to_ce_map_wlan_qcn9074[] = {
 	},
 };
 
+const struct ath11k_hw_ring_mask ath11k_hw_ring_mask_qcn9074 = {
+	.tx  = {
+		ATH11K_TX_RING_MASK_0,
+		ATH11K_TX_RING_MASK_1,
+		ATH11K_TX_RING_MASK_2,
+	},
+	.rx_mon_status = {
+		0, 0, 0,
+		ATH11K_RX_MON_STATUS_RING_MASK_0,
+		ATH11K_RX_MON_STATUS_RING_MASK_1,
+		ATH11K_RX_MON_STATUS_RING_MASK_2,
+	},
+	.rx = {
+		0, 0, 0, 0,
+		ATH11K_RX_RING_MASK_0,
+		ATH11K_RX_RING_MASK_1,
+		ATH11K_RX_RING_MASK_2,
+		ATH11K_RX_RING_MASK_3,
+	},
+	.rx_err = {
+		0, 0, 0,
+		ATH11K_RX_ERR_RING_MASK_0,
+	},
+	.rx_wbm_rel = {
+		0, 0, 0,
+		ATH11K_RX_WBM_REL_RING_MASK_0,
+	},
+	.reo_status = {
+		0, 0, 0,
+		ATH11K_REO_STATUS_RING_MASK_0,
+	},
+	.rxdma2host = {
+		0, 0, 0,
+		ATH11K_RXDMA2HOST_RING_MASK_0,
+	},
+	.host2rxdma = {
+		0, 0, 0,
+		ATH11K_HOST2RXDMA_RING_MASK_0,
+	},
+};
+
 const struct ath11k_hw_regs ipq8074_regs = {
 	/* SW2TCL(x) R0 ring configuration address */
 	.hal_tcl1_ring_base_lsb = 0x00000510,
diff --git a/drivers/net/wireless/ath/ath11k/hw.h b/drivers/net/wireless/ath/ath11k/hw.h
index 54b7edfc4f88..c81a6328361d 100644
--- a/drivers/net/wireless/ath/ath11k/hw.h
+++ b/drivers/net/wireless/ath/ath11k/hw.h
@@ -208,6 +208,7 @@ extern const struct ath11k_hw_ops qcn9074_ops;
 
 extern const struct ath11k_hw_ring_mask ath11k_hw_ring_mask_ipq8074;
 extern const struct ath11k_hw_ring_mask ath11k_hw_ring_mask_qca6390;
+extern const struct ath11k_hw_ring_mask ath11k_hw_ring_mask_qcn9074;
 
 static inline
 int ath11k_hw_get_mac_from_pdev_id(struct ath11k_hw_params *hw,
diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c
index f58ee6762660..74e490356251 100644
--- a/drivers/net/wireless/ath/ath11k/pci.c
+++ b/drivers/net/wireless/ath/ath11k/pci.c
@@ -592,6 +592,9 @@ static irqreturn_t ath11k_pci_ce_interrupt_handler(int irq, void *arg)
 {
 	struct ath11k_ce_pipe *ce_pipe = arg;
 
+	/* last interrupt received for this CE */
+	ce_pipe->timestamp = jiffies;
+
 	ath11k_pci_ce_irq_disable(ce_pipe->ab, ce_pipe->pipe_num);
 	tasklet_schedule(&ce_pipe->intr_tq);
 
@@ -686,6 +689,9 @@ static irqreturn_t ath11k_pci_ext_interrupt_handler(int irq, void *arg)
 
 	ath11k_dbg(irq_grp->ab, ATH11K_DBG_PCI, "ext irq:%d\n", irq);
 
+	/* last interrupt received for this group */
+	irq_grp->timestamp = jiffies;
+
 	ath11k_pci_ext_grp_disable(irq_grp);
 
 	napi_schedule(&irq_grp->napi);
@@ -696,8 +702,9 @@ static irqreturn_t ath11k_pci_ext_interrupt_handler(int irq, void *arg)
 static int ath11k_pci_ext_irq_config(struct ath11k_base *ab)
 {
 	int i, j, ret, num_vectors = 0;
-	u32 user_base_data = 0, base_vector = 0;
+	u32 user_base_data = 0, base_vector = 0, base_idx;
 
+	base_idx = ATH11K_PCI_IRQ_CE0_OFFSET + CE_COUNT_MAX;
 	ret = ath11k_pci_get_user_msi_assignment(ath11k_pci_priv(ab), "DP",
 						 &num_vectors,
 						 &user_base_data,
@@ -727,7 +734,7 @@ static int ath11k_pci_ext_irq_config(struct ath11k_base *ab)
 		}
 
 		irq_grp->num_irq = num_irq;
-		irq_grp->irqs[0] = base_vector + i;
+		irq_grp->irqs[0] = base_idx + i;
 
 		for (j = 0; j < irq_grp->num_irq; j++) {
 			int irq_idx = irq_grp->irqs[j];
@@ -738,6 +745,8 @@ static int ath11k_pci_ext_irq_config(struct ath11k_base *ab)
 
 			ath11k_dbg(ab, ATH11K_DBG_PCI,
 				   "irq:%d group:%d\n", irq, i);
+
+			irq_set_status_flags(irq, IRQ_DISABLE_UNLAZY);
 			ret = request_irq(irq, ath11k_pci_ext_interrupt_handler,
 					  IRQF_SHARED,
 					  "DP_EXT_IRQ", irq_grp);
-- 
cgit v1.2.3


From 4e80946197a83a6115e308334618449b77696d6a Mon Sep 17 00:00:00 2001
From: Anilkumar Kolli <akolli@codeaurora.org>
Date: Tue, 16 Feb 2021 09:16:25 +0200
Subject: ath11k: add qcn9074 pci device support

QCN9074 is PCI based 11ax radio.
	- has 2G/5G/6G variants.
	- has NSS 2x2 and 4x4 variants.

QCN9074 uses 45MB of HOST DDR memory, target requests host memory in
segments, each segment is of 2MB size and is physcial contiguous and
use static window configuration.

Currently there are still two issues with QCN9074, see below. So we don't add
QCN9074 PCI id yet to make sure the driver is loaded. The id will be added only
after the issues are fixed.

Issue 1:
 ath11k_pci 0000:06:00.0: qmi failed memory request, err = -110
 ath11k_pci 0000:06:00.0: qmi failed to respond fw mem req:-110

Issue 2:
 ath11k_pci 0000:06:00.0: firmware crashed: MHI_CB_SYS_ERROR
 ath11k_pci 0000:06:00.0: qmi failed set mode request, mode: 0, err = -110
 ath11k_pci 0000:06:00.0: qmi failed to send wlan fw mode:-110

Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.4.0.1.r2-00012-QCAHKSWPL_SILICONZ-1

Signed-off-by: Anilkumar Kolli <akolli@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1612946530-28504-12-git-send-email-akolli@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/pci.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c
index 74e490356251..0f31eb566fb6 100644
--- a/drivers/net/wireless/ath/ath11k/pci.c
+++ b/drivers/net/wireless/ath/ath11k/pci.c
@@ -35,9 +35,11 @@
 #define ACCESS_ALWAYS_OFF 0xFE0
 
 #define QCA6390_DEVICE_ID		0x1101
+#define QCN9074_DEVICE_ID		0x1104
 
 static const struct pci_device_id ath11k_pci_id_table[] = {
 	{ PCI_VDEVICE(QCOM, QCA6390_DEVICE_ID) },
+	/* TODO: add QCN9074_DEVICE_ID) once firmware issues are resolved */
 	{0}
 };
 
@@ -61,6 +63,15 @@ static const struct ath11k_msi_config ath11k_msi_config[] = {
 			{ .name = "DP", .num_vectors = 18, .base_vector = 14 },
 		},
 	},
+	{
+		.total_vectors = 16,
+		.total_users = 3,
+		.users = (struct ath11k_msi_user[]) {
+			{ .name = "MHI", .num_vectors = 3, .base_vector = 0 },
+			{ .name = "CE", .num_vectors = 5, .base_vector = 3 },
+			{ .name = "DP", .num_vectors = 8, .base_vector = 8 },
+		},
+	},
 };
 
 static const char *irq_name[ATH11K_IRQ_NUM_MAX] = {
@@ -1217,6 +1228,12 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
 			ret = -EOPNOTSUPP;
 			goto err_pci_free_region;
 		}
+		ab_pci->msi_config = &ath11k_msi_config[0];
+		break;
+	case QCN9074_DEVICE_ID:
+		ab_pci->msi_config = &ath11k_msi_config[1];
+		ab->bus_params.static_window_map = true;
+		ab->hw_rev = ATH11K_HW_QCN9074_HW10;
 		break;
 	default:
 		dev_err(&pdev->dev, "Unknown PCI device found: 0x%x\n",
@@ -1225,7 +1242,6 @@ static int ath11k_pci_probe(struct pci_dev *pdev,
 		goto err_pci_free_region;
 	}
 
-	ab_pci->msi_config = &ath11k_msi_config[0];
 	ret = ath11k_pci_enable_msi(ab_pci);
 	if (ret) {
 		ath11k_err(ab, "failed to enable msi: %d\n", ret);
-- 
cgit v1.2.3


From 096b625fab8f9d88d9b436288a64b70080219d4b Mon Sep 17 00:00:00 2001
From: Lavanya Suresh <lavaks@codeaurora.org>
Date: Wed, 17 Feb 2021 11:45:45 +0200
Subject: ath11k: Fix sounding dimension config in HE cap

Number of Sounding dimensions config received from firmware for
bandwidth above 80MHz is cleared, and proper value is not set again.
So not resetting it to accept the config from firmware.

Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.4.0.1-01689-QCAHKSWPL_SILICONZ-1

Signed-off-by: Lavanya Suresh <lavaks@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1613460136-7170-1-git-send-email-lavaks@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/mac.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index b391169576e2..d685bea6da24 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -3919,8 +3919,6 @@ static int ath11k_mac_copy_he_cap(struct ath11k *ar,
 
 		he_cap_elem->phy_cap_info[5] &=
 			~IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_MASK;
-		he_cap_elem->phy_cap_info[5] &=
-			~IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_MASK;
 		he_cap_elem->phy_cap_info[5] |= ar->num_tx_chains - 1;
 
 		switch (i) {
-- 
cgit v1.2.3


From 788f805e8c0a10679fdfa7c6d0e21465e3b62de5 Mon Sep 17 00:00:00 2001
From: Lavanya Suresh <lavaks@codeaurora.org>
Date: Wed, 17 Feb 2021 11:45:45 +0200
Subject: ath11k: Enable radar detection for 160MHz secondary segment

WMI_CHAN_INFO_DFS_FREQ2 needs to be set in wmi vdev start command chan
info parameter, to enable radar detection for secondary segment in 160MHz.

Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.4.0.1-01717-QCAHKSWPL_SILICONZ-1

Signed-off-by: Lavanya Suresh <lavaks@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1613480547-28810-1-git-send-email-lavaks@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/mac.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index d685bea6da24..335d49af7dd5 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -5094,13 +5094,15 @@ ath11k_mac_vdev_start_restart(struct ath11k_vif *arvif,
 		arg.channel.chan_radar =
 			!!(chandef->chan->flags & IEEE80211_CHAN_RADAR);
 
+		arg.channel.freq2_radar =
+			!!(chandef->chan->flags & IEEE80211_CHAN_RADAR);
+
 		arg.channel.passive = arg.channel.chan_radar;
 
 		spin_lock_bh(&ab->base_lock);
 		arg.regdomain = ar->ab->dfs_region;
 		spin_unlock_bh(&ab->base_lock);
 
-		/* TODO: Notify if secondary 80Mhz also needs radar detection */
 		if (he_support) {
 			ret = ath11k_set_he_mu_sounding_mode(ar, arvif);
 			if (ret) {
-- 
cgit v1.2.3


From 6b7abacb9cbe45894a385ea1766fd4febbd6e0df Mon Sep 17 00:00:00 2001
From: Kalle Valo <kvalo@codeaurora.org>
Date: Wed, 17 Feb 2021 21:16:40 +0200
Subject: ath11k: print hardware name and version during initialisation

This way it's easy for the user to find what device is actually installed. This
also helps reporting bugs.

Screenshot:

[  459.988812] ath11k_pci 0000:06:00.0: BAR 0: assigned [mem 0xdb000000-0xdbffffff 64bit]
[  459.988867] ath11k_pci 0000:06:00.0: enabling device (0000 -> 0002)
[  459.997048] ath11k_pci 0000:06:00.0: qca6390 hw2.0
[  460.058093] mhi mhi0: Requested to power ON
[  460.059741] mhi mhi0: Power on setup success
[  460.476924] ath11k_pci 0000:06:00.0: chip_id 0x0 chip_family 0xb board_id 0xff soc_id 0xffffffff
[  460.477032] ath11k_pci 0000:06:00.0: fw_version 0x101c06cc fw_build_timestamp 2020-06-24 19:50 fw_build_id

Tested-on: QCA6390 hw2.0 PCI WLAN.HST.1.0.1-01740-QCAHSTSWPLZ_V2_TO_X86-1

Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1613589400-18891-1-git-send-email-kvalo@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c
index ddedc331edef..77ce3347ab86 100644
--- a/drivers/net/wireless/ath/ath11k/core.c
+++ b/drivers/net/wireless/ath/ath11k/core.c
@@ -1017,7 +1017,7 @@ static int ath11k_init_hw_params(struct ath11k_base *ab)
 
 	ab->hw_params = *hw_params;
 
-	ath11k_dbg(ab, ATH11K_DBG_BOOT, "Hardware name %s\n", ab->hw_params.name);
+	ath11k_info(ab, "%s\n", ab->hw_params.name);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 14ebaeeff8d0f2d5d59b6df6b59f54345839f9d9 Mon Sep 17 00:00:00 2001
From: Shuah Khan <skhan@linuxfoundation.org>
Date: Wed, 17 Feb 2021 14:18:01 -0700
Subject: Revert "ath9k: fix ath_tx_process_buffer() potential null ptr
 dereference"

This reverts commit a56c14bb21b296fb6d395164ab62ef2e419e5069.

ath_tx_process_buffer() doesn't dereference or check sta and passes it
to ath_tx_complete_aggr() and ath_tx_complete_buf().

ath_tx_complete_aggr() checks the pointer before use. No problem here.

ath_tx_complete_buf() doesn't check or dereference sta and passes it on
to ath_tx_complete(). ath_tx_complete() doesn't check or dereference sta,
but assigns it to tx_info->status.status_driver_data[0]

ath_tx_complete_buf() is called from ath_tx_complete_aggr() passing
null ieee80211_sta pointer.

There is a potential for dereference later on, if and when the
tx_info->status.status_driver_data[0]is referenced. In addition, the
rcu read lock might be released before referencing the contents.

ath_tx_complete_buf() should be fixed to check sta perhaps? Worth
looking into.

Reverting this patch because it doesn't solve the problem and introduces
memory leak by skipping buffer completion if the pointer (sta) is NULL.

Fixes: a56c14bb21b2 ("ath9k: fix ath_tx_process_buffer() potential null ptr dereference")
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210217211801.22540-1-skhan@linuxfoundation.org
---
 drivers/net/wireless/ath/ath9k/xmit.c | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
index 828b96bf55a2..e60d4737fc6e 100644
--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c
@@ -708,24 +708,20 @@ static void ath_tx_process_buffer(struct ath_softc *sc, struct ath_txq *txq,
 		ath_tx_count_airtime(sc, sta, bf, ts, tid->tidno);
 		if (ts->ts_status & (ATH9K_TXERR_FILT | ATH9K_TXERR_XRETRY))
 			tid->clear_ps_filter = true;
+	}
 
-		if (!bf_isampdu(bf)) {
-			if (!flush) {
-				info = IEEE80211_SKB_CB(bf->bf_mpdu);
-				memcpy(info->control.rates, bf->rates,
-				       sizeof(info->control.rates));
-				ath_tx_rc_status(sc, bf, ts, 1,
-						 txok ? 0 : 1, txok);
-				ath_dynack_sample_tx_ts(sc->sc_ah,
-							bf->bf_mpdu, ts, sta);
-			}
-			ath_tx_complete_buf(sc, bf, txq, bf_head, sta,
-					    ts, txok);
-		} else {
-			ath_tx_complete_aggr(sc, txq, bf, bf_head, sta,
-					     tid, ts, txok);
+	if (!bf_isampdu(bf)) {
+		if (!flush) {
+			info = IEEE80211_SKB_CB(bf->bf_mpdu);
+			memcpy(info->control.rates, bf->rates,
+			       sizeof(info->control.rates));
+			ath_tx_rc_status(sc, bf, ts, 1, txok ? 0 : 1, txok);
+			ath_dynack_sample_tx_ts(sc->sc_ah, bf->bf_mpdu, ts,
+						sta);
 		}
-	}
+		ath_tx_complete_buf(sc, bf, txq, bf_head, sta, ts, txok);
+	} else
+		ath_tx_complete_aggr(sc, txq, bf, bf_head, sta, tid, ts, txok);
 
 	if (!flush)
 		ath_txq_schedule(sc, txq);
-- 
cgit v1.2.3


From bf458d79cfc46f7fc7a77c1cfd91323050922e11 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kvalo@codeaurora.org>
Date: Mon, 22 Feb 2021 09:35:49 +0200
Subject: ath11k: qmi: add more debug messages

To make it easier to follow the qmi event flow.

No functional changes, compile tested only.

Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1613569421-14177-1-git-send-email-kvalo@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/qmi.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/drivers/net/wireless/ath/ath11k/qmi.c b/drivers/net/wireless/ath/ath11k/qmi.c
index eb1b987c9df2..69eb67a94f47 100644
--- a/drivers/net/wireless/ath/ath11k/qmi.c
+++ b/drivers/net/wireless/ath/ath11k/qmi.c
@@ -1556,6 +1556,8 @@ static int ath11k_qmi_host_cap_send(struct ath11k_base *ab)
 		req.nm_modem |= SLEEP_CLOCK_SELECT_INTERNAL_BIT;
 	}
 
+	ath11k_dbg(ab, ATH11K_DBG_QMI, "qmi host cap request\n");
+
 	ret = qmi_txn_init(&ab->qmi.handle, &txn,
 			   qmi_wlanfw_host_cap_resp_msg_v01_ei, &resp);
 	if (ret < 0)
@@ -1624,6 +1626,8 @@ static int ath11k_qmi_fw_ind_register_send(struct ath11k_base *ab)
 	if (ret < 0)
 		goto out;
 
+	ath11k_dbg(ab, ATH11K_DBG_QMI, "qmi indication register request\n");
+
 	ret = qmi_send_request(&ab->qmi.handle, NULL, &txn,
 			       QMI_WLANFW_IND_REGISTER_REQ_V01,
 			       QMI_WLANFW_IND_REGISTER_REQ_MSG_V01_MAX_LEN,
@@ -1699,6 +1703,9 @@ static int ath11k_qmi_respond_fw_mem_request(struct ath11k_base *ab)
 	if (ret < 0)
 		goto out;
 
+	ath11k_dbg(ab, ATH11K_DBG_QMI, "qmi respond memory request delayed %i\n",
+		   delayed);
+
 	ret = qmi_send_request(&ab->qmi.handle, NULL, &txn,
 			       QMI_WLANFW_RESPOND_MEM_REQ_V01,
 			       QMI_WLANFW_RESPOND_MEM_REQ_MSG_V01_MAX_LEN,
@@ -1843,6 +1850,8 @@ static int ath11k_qmi_request_target_cap(struct ath11k_base *ab)
 	if (ret < 0)
 		goto out;
 
+	ath11k_dbg(ab, ATH11K_DBG_QMI, "qmi target cap request\n");
+
 	ret = qmi_send_request(&ab->qmi.handle, NULL, &txn,
 			       QMI_WLANFW_CAP_REQ_V01,
 			       QMI_WLANFW_CAP_REQ_MSG_V01_MAX_LEN,
@@ -2000,6 +2009,9 @@ static int ath11k_qmi_load_bdf_fixed_addr(struct ath11k_base *ab)
 		if (ret < 0)
 			goto out_qmi_bdf;
 
+		ath11k_dbg(ab, ATH11K_DBG_QMI, "qmi bdf download req fixed addr type %d\n",
+			   type);
+
 		ret = qmi_send_request(&ab->qmi.handle, NULL, &txn,
 				       QMI_WLANFW_BDF_DOWNLOAD_REQ_V01,
 				       QMI_WLANFW_BDF_DOWNLOAD_REQ_MSG_V01_MAX_LEN,
@@ -2090,6 +2102,9 @@ static int ath11k_qmi_load_bdf_qmi(struct ath11k_base *ab)
 		if (ret < 0)
 			goto out_qmi_bdf;
 
+		ath11k_dbg(ab, ATH11K_DBG_QMI, "qmi bdf download request remaining %i\n",
+			   remaining);
+
 		ret = qmi_send_request(&ab->qmi.handle, NULL, &txn,
 				       QMI_WLANFW_BDF_DOWNLOAD_REQ_V01,
 				       QMI_WLANFW_BDF_DOWNLOAD_REQ_MSG_V01_MAX_LEN,
@@ -2200,6 +2215,8 @@ static int ath11k_qmi_wlanfw_m3_info_send(struct ath11k_base *ab)
 	if (ret < 0)
 		goto out;
 
+	ath11k_dbg(ab, ATH11K_DBG_QMI, "qmi m3 info req\n");
+
 	ret = qmi_send_request(&ab->qmi.handle, NULL, &txn,
 			       QMI_WLANFW_M3_INFO_REQ_V01,
 			       QMI_WLANFW_M3_INFO_REQ_MSG_V01_MAX_MSG_LEN,
@@ -2246,6 +2263,8 @@ static int ath11k_qmi_wlanfw_mode_send(struct ath11k_base *ab,
 	if (ret < 0)
 		goto out;
 
+	ath11k_dbg(ab, ATH11K_DBG_QMI, "qmi wlan mode req mode %d\n", mode);
+
 	ret = qmi_send_request(&ab->qmi.handle, NULL, &txn,
 			       QMI_WLANFW_WLAN_MODE_REQ_V01,
 			       QMI_WLANFW_WLAN_MODE_REQ_MSG_V01_MAX_LEN,
@@ -2338,6 +2357,8 @@ static int ath11k_qmi_wlanfw_wlan_cfg_send(struct ath11k_base *ab)
 	if (ret < 0)
 		goto out;
 
+	ath11k_dbg(ab, ATH11K_DBG_QMI, "qmi wlan cfg req\n");
+
 	ret = qmi_send_request(&ab->qmi.handle, NULL, &txn,
 			       QMI_WLANFW_WLAN_CFG_REQ_V01,
 			       QMI_WLANFW_WLAN_CFG_REQ_MSG_V01_MAX_LEN,
@@ -2370,6 +2391,8 @@ void ath11k_qmi_firmware_stop(struct ath11k_base *ab)
 {
 	int ret;
 
+	ath11k_dbg(ab, ATH11K_DBG_QMI, "qmi firmware stop\n");
+
 	ret = ath11k_qmi_wlanfw_mode_send(ab, ATH11K_FIRMWARE_MODE_OFF);
 	if (ret < 0) {
 		ath11k_warn(ab, "qmi failed to send wlan mode off\n");
@@ -2382,6 +2405,8 @@ int ath11k_qmi_firmware_start(struct ath11k_base *ab,
 {
 	int ret;
 
+	ath11k_dbg(ab, ATH11K_DBG_QMI, "qmi firmware start\n");
+
 	ret = ath11k_qmi_wlanfw_wlan_cfg_send(ab);
 	if (ret < 0) {
 		ath11k_warn(ab, "qmi failed to send wlan cfg:%d\n", ret);
-- 
cgit v1.2.3


From 097e9f0714555e5da72c7ebc5377107fdf10e57d Mon Sep 17 00:00:00 2001
From: Kalle Valo <kvalo@codeaurora.org>
Date: Mon, 22 Feb 2021 09:35:56 +0200
Subject: ath11k: qmi: cosmetic changes to error messages

Change the error messages to follow the style used in ath11k. Also include
error values in the messages which didn't have that.

No functional changes, compile tested only.

Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1613569421-14177-2-git-send-email-kvalo@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/qmi.c | 91 ++++++++++++++++++-----------------
 1 file changed, 47 insertions(+), 44 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/qmi.c b/drivers/net/wireless/ath/ath11k/qmi.c
index 69eb67a94f47..297a2b60716a 100644
--- a/drivers/net/wireless/ath/ath11k/qmi.c
+++ b/drivers/net/wireless/ath/ath11k/qmi.c
@@ -1568,7 +1568,7 @@ static int ath11k_qmi_host_cap_send(struct ath11k_base *ab)
 			       QMI_WLANFW_HOST_CAP_REQ_MSG_V01_MAX_LEN,
 			       qmi_wlanfw_host_cap_req_msg_v01_ei, &req);
 	if (ret < 0) {
-		ath11k_warn(ab, "Failed to send host capability request,err = %d\n", ret);
+		ath11k_warn(ab, "failed to send host capability request: %d\n", ret);
 		goto out;
 	}
 
@@ -1577,7 +1577,7 @@ static int ath11k_qmi_host_cap_send(struct ath11k_base *ab)
 		goto out;
 
 	if (resp.resp.result != QMI_RESULT_SUCCESS_V01) {
-		ath11k_warn(ab, "Host capability request failed, result: %d, err: %d\n",
+		ath11k_warn(ab, "host capability request failed: %d %d\n",
 			    resp.resp.result, resp.resp.error);
 		ret = -EINVAL;
 		goto out;
@@ -1633,19 +1633,19 @@ static int ath11k_qmi_fw_ind_register_send(struct ath11k_base *ab)
 			       QMI_WLANFW_IND_REGISTER_REQ_MSG_V01_MAX_LEN,
 			       qmi_wlanfw_ind_register_req_msg_v01_ei, req);
 	if (ret < 0) {
-		ath11k_warn(ab, "Failed to send indication register request, err = %d\n",
+		ath11k_warn(ab, "failed to send indication register request: %d\n",
 			    ret);
 		goto out;
 	}
 
 	ret = qmi_txn_wait(&txn, msecs_to_jiffies(ATH11K_QMI_WLANFW_TIMEOUT_MS));
 	if (ret < 0) {
-		ath11k_warn(ab, "failed to register fw indication %d\n", ret);
+		ath11k_warn(ab, "failed to register fw indication: %d\n", ret);
 		goto out;
 	}
 
 	if (resp->resp.result != QMI_RESULT_SUCCESS_V01) {
-		ath11k_warn(ab, "FW Ind register request failed, result: %d, err: %d\n",
+		ath11k_warn(ab, "firmware indication register request failed: %d %d\n",
 			    resp->resp.result, resp->resp.error);
 		ret = -EINVAL;
 		goto out;
@@ -1711,14 +1711,14 @@ static int ath11k_qmi_respond_fw_mem_request(struct ath11k_base *ab)
 			       QMI_WLANFW_RESPOND_MEM_REQ_MSG_V01_MAX_LEN,
 			       qmi_wlanfw_respond_mem_req_msg_v01_ei, req);
 	if (ret < 0) {
-		ath11k_warn(ab, "qmi failed to respond memory request, err = %d\n",
+		ath11k_warn(ab, "failed to respond qmi memory request: %d\n",
 			    ret);
 		goto out;
 	}
 
 	ret = qmi_txn_wait(&txn, msecs_to_jiffies(ATH11K_QMI_WLANFW_TIMEOUT_MS));
 	if (ret < 0) {
-		ath11k_warn(ab, "qmi failed memory request, err = %d\n", ret);
+		ath11k_warn(ab, "failed to wait qmi memory request: %d\n", ret);
 		goto out;
 	}
 
@@ -1729,7 +1729,7 @@ static int ath11k_qmi_respond_fw_mem_request(struct ath11k_base *ab)
 		if (delayed && resp.resp.error == 0)
 			goto out;
 
-		ath11k_warn(ab, "Respond mem req failed, result: %d, err: %d\n",
+		ath11k_warn(ab, "qmi respond memory request failed: %d %d\n",
 			    resp.resp.result, resp.resp.error);
 		ret = -EINVAL;
 		goto out;
@@ -1781,7 +1781,8 @@ static int ath11k_qmi_alloc_target_mem_chunk(struct ath11k_base *ab)
 				ab->qmi.target_mem_delayed = true;
 				return 0;
 			}
-			ath11k_err(ab, "failed to alloc memory, size: 0x%x, type: %u\n",
+
+			ath11k_err(ab, "failed to allocate dma memory for qmi (%d B type %u)\n",
 				   chunk->size,
 				   chunk->type);
 			return -EINVAL;
@@ -1857,19 +1858,19 @@ static int ath11k_qmi_request_target_cap(struct ath11k_base *ab)
 			       QMI_WLANFW_CAP_REQ_MSG_V01_MAX_LEN,
 			       qmi_wlanfw_cap_req_msg_v01_ei, &req);
 	if (ret < 0) {
-		ath11k_warn(ab, "qmi failed to send target cap request, err = %d\n",
+		ath11k_warn(ab, "failed to send qmi cap request: %d\n",
 			    ret);
 		goto out;
 	}
 
 	ret = qmi_txn_wait(&txn, msecs_to_jiffies(ATH11K_QMI_WLANFW_TIMEOUT_MS));
 	if (ret < 0) {
-		ath11k_warn(ab, "qmi failed target cap request %d\n", ret);
+		ath11k_warn(ab, "failed to wait qmi cap request: %d\n", ret);
 		goto out;
 	}
 
 	if (resp.resp.result != QMI_RESULT_SUCCESS_V01) {
-		ath11k_warn(ab, "qmi targetcap req failed, result: %d, err: %d\n",
+		ath11k_warn(ab, "qmi cap request failed: %d %d\n",
 			    resp.resp.result, resp.resp.error);
 		ret = -EINVAL;
 		goto out;
@@ -1932,7 +1933,7 @@ ath11k_qmi_prepare_bdf_download(struct ath11k_base *ab, int type,
 
 		ret = ath11k_core_fetch_bdf(ab, &bd);
 		if (ret) {
-			ath11k_warn(ab, "qmi failed to load BDF\n");
+			ath11k_warn(ab, "failed to load board file: %d\n", ret);
 			return ret;
 		}
 
@@ -1980,7 +1981,7 @@ static int ath11k_qmi_load_bdf_fixed_addr(struct ath11k_base *ab)
 
 	bdf_addr = ioremap(ab->hw_params.bdf_addr, ATH11K_QMI_BDF_MAX_SIZE);
 	if (!bdf_addr) {
-		ath11k_warn(ab, "qmi ioremap error for BDF\n");
+		ath11k_warn(ab, "failed ioremap for board file\n");
 		ret = -EIO;
 		goto out;
 	}
@@ -2026,7 +2027,7 @@ static int ath11k_qmi_load_bdf_fixed_addr(struct ath11k_base *ab)
 			goto out_qmi_bdf;
 
 		if (resp.resp.result != QMI_RESULT_SUCCESS_V01) {
-			ath11k_warn(ab, "qmi BDF download failed, result: %d, err: %d\n",
+			ath11k_warn(ab, "board file download request failed: %d %d\n",
 				    resp.resp.result, resp.resp.error);
 			ret = -EINVAL;
 			goto out_qmi_bdf;
@@ -2059,7 +2060,7 @@ static int ath11k_qmi_load_bdf_qmi(struct ath11k_base *ab)
 	memset(&bd, 0, sizeof(bd));
 	ret = ath11k_core_fetch_bdf(ab, &bd);
 	if (ret) {
-		ath11k_warn(ab, "qmi failed to load bdf:\n");
+		ath11k_warn(ab, "failed to fetch board file: %d\n", ret);
 		goto out;
 	}
 
@@ -2119,7 +2120,7 @@ static int ath11k_qmi_load_bdf_qmi(struct ath11k_base *ab)
 			goto out_qmi_bdf;
 
 		if (resp.resp.result != QMI_RESULT_SUCCESS_V01) {
-			ath11k_warn(ab, "qmi BDF download failed, result: %d, err: %d\n",
+			ath11k_warn(ab, "bdf download request failed: %d %d\n",
 				    resp.resp.result, resp.resp.error);
 			ret = resp.resp.result;
 			goto out_qmi_bdf;
@@ -2222,19 +2223,19 @@ static int ath11k_qmi_wlanfw_m3_info_send(struct ath11k_base *ab)
 			       QMI_WLANFW_M3_INFO_REQ_MSG_V01_MAX_MSG_LEN,
 			       qmi_wlanfw_m3_info_req_msg_v01_ei, &req);
 	if (ret < 0) {
-		ath11k_warn(ab, "qmi failed to send M3 information request, err = %d\n",
+		ath11k_warn(ab, "failed to send m3 information request: %d\n",
 			    ret);
 		goto out;
 	}
 
 	ret = qmi_txn_wait(&txn, msecs_to_jiffies(ATH11K_QMI_WLANFW_TIMEOUT_MS));
 	if (ret < 0) {
-		ath11k_warn(ab, "qmi failed M3 information request %d\n", ret);
+		ath11k_warn(ab, "failed to wait m3 information request: %d\n", ret);
 		goto out;
 	}
 
 	if (resp.resp.result != QMI_RESULT_SUCCESS_V01) {
-		ath11k_warn(ab, "qmi M3 info request failed, result: %d, err: %d\n",
+		ath11k_warn(ab, "m3 info request failed: %d %d\n",
 			    resp.resp.result, resp.resp.error);
 		ret = -EINVAL;
 		goto out;
@@ -2270,7 +2271,7 @@ static int ath11k_qmi_wlanfw_mode_send(struct ath11k_base *ab,
 			       QMI_WLANFW_WLAN_MODE_REQ_MSG_V01_MAX_LEN,
 			       qmi_wlanfw_wlan_mode_req_msg_v01_ei, &req);
 	if (ret < 0) {
-		ath11k_warn(ab, "qmi failed to send mode request, mode: %d, err = %d\n",
+		ath11k_warn(ab, "failed to send wlan mode request (mode %d): %d\n",
 			    mode, ret);
 		goto out;
 	}
@@ -2281,13 +2282,13 @@ static int ath11k_qmi_wlanfw_mode_send(struct ath11k_base *ab,
 			ath11k_warn(ab, "WLFW service is dis-connected\n");
 			return 0;
 		}
-		ath11k_warn(ab, "qmi failed set mode request, mode: %d, err = %d\n",
+		ath11k_warn(ab, "failed to wait wlan mode request (mode %d): %d\n",
 			    mode, ret);
 		goto out;
 	}
 
 	if (resp.resp.result != QMI_RESULT_SUCCESS_V01) {
-		ath11k_warn(ab, "Mode request failed, mode: %d, result: %d err: %d\n",
+		ath11k_warn(ab, "wlan mode request failed (mode: %d): %d %d\n",
 			    mode, resp.resp.result, resp.resp.error);
 		ret = -EINVAL;
 		goto out;
@@ -2364,19 +2365,19 @@ static int ath11k_qmi_wlanfw_wlan_cfg_send(struct ath11k_base *ab)
 			       QMI_WLANFW_WLAN_CFG_REQ_MSG_V01_MAX_LEN,
 			       qmi_wlanfw_wlan_cfg_req_msg_v01_ei, req);
 	if (ret < 0) {
-		ath11k_warn(ab, "qmi failed to send wlan config request, err = %d\n",
+		ath11k_warn(ab, "failed to send wlan config request: %d\n",
 			    ret);
 		goto out;
 	}
 
 	ret = qmi_txn_wait(&txn, msecs_to_jiffies(ATH11K_QMI_WLANFW_TIMEOUT_MS));
 	if (ret < 0) {
-		ath11k_warn(ab, "qmi failed wlan config request, err = %d\n", ret);
+		ath11k_warn(ab, "failed to wait wlan config request: %d\n", ret);
 		goto out;
 	}
 
 	if (resp.resp.result != QMI_RESULT_SUCCESS_V01) {
-		ath11k_warn(ab, "qmi wlan config request failed, result: %d, err: %d\n",
+		ath11k_warn(ab, "wlan config request failed: %d %d\n",
 			    resp.resp.result, resp.resp.error);
 		ret = -EINVAL;
 		goto out;
@@ -2395,7 +2396,7 @@ void ath11k_qmi_firmware_stop(struct ath11k_base *ab)
 
 	ret = ath11k_qmi_wlanfw_mode_send(ab, ATH11K_FIRMWARE_MODE_OFF);
 	if (ret < 0) {
-		ath11k_warn(ab, "qmi failed to send wlan mode off\n");
+		ath11k_warn(ab, "qmi failed to send wlan mode off: %d\n", ret);
 		return;
 	}
 }
@@ -2409,13 +2410,13 @@ int ath11k_qmi_firmware_start(struct ath11k_base *ab,
 
 	ret = ath11k_qmi_wlanfw_wlan_cfg_send(ab);
 	if (ret < 0) {
-		ath11k_warn(ab, "qmi failed to send wlan cfg:%d\n", ret);
+		ath11k_warn(ab, "qmi failed to send wlan cfg: %d\n", ret);
 		return ret;
 	}
 
 	ret = ath11k_qmi_wlanfw_mode_send(ab, mode);
 	if (ret < 0) {
-		ath11k_warn(ab, "qmi failed to send wlan fw mode:%d\n", ret);
+		ath11k_warn(ab, "qmi failed to send wlan fw mode: %d\n", ret);
 		return ret;
 	}
 
@@ -2429,7 +2430,7 @@ static int ath11k_qmi_process_coldboot_calibration(struct ath11k_base *ab)
 
 	ret = ath11k_qmi_wlanfw_mode_send(ab, ATH11K_FIRMWARE_MODE_COLD_BOOT);
 	if (ret < 0) {
-		ath11k_warn(ab, "qmi failed to send wlan fw mode:%d\n", ret);
+		ath11k_warn(ab, "qmi failed to send wlan fw mode: %d\n", ret);
 		return ret;
 	}
 
@@ -2439,7 +2440,7 @@ static int ath11k_qmi_process_coldboot_calibration(struct ath11k_base *ab)
 				     (ab->qmi.cal_done  == 1),
 				     ATH11K_COLD_BOOT_FW_RESET_DELAY);
 	if (timeout <= 0) {
-		ath11k_warn(ab, "Coldboot Calibration failed - wait ended\n");
+		ath11k_warn(ab, "coldboot calibration timed out\n");
 		return 0;
 	}
 
@@ -2478,13 +2479,14 @@ static int ath11k_qmi_event_server_arrive(struct ath11k_qmi *qmi)
 
 	ret = ath11k_qmi_fw_ind_register_send(ab);
 	if (ret < 0) {
-		ath11k_warn(ab, "qmi failed to send FW indication QMI:%d\n", ret);
+		ath11k_warn(ab, "failed to send qmi firmware indication: %d\n",
+			    ret);
 		return ret;
 	}
 
 	ret = ath11k_qmi_host_cap_send(ab);
 	if (ret < 0) {
-		ath11k_warn(ab, "qmi failed to send host cap QMI:%d\n", ret);
+		ath11k_warn(ab, "failed to send qmi host cap: %d\n", ret);
 		return ret;
 	}
 
@@ -2498,7 +2500,7 @@ static int ath11k_qmi_event_mem_request(struct ath11k_qmi *qmi)
 
 	ret = ath11k_qmi_respond_fw_mem_request(ab);
 	if (ret < 0) {
-		ath11k_warn(ab, "qmi failed to respond fw mem req:%d\n", ret);
+		ath11k_warn(ab, "qmi failed to respond fw mem req: %d\n", ret);
 		return ret;
 	}
 
@@ -2512,7 +2514,8 @@ static int ath11k_qmi_event_load_bdf(struct ath11k_qmi *qmi)
 
 	ret = ath11k_qmi_request_target_cap(ab);
 	if (ret < 0) {
-		ath11k_warn(ab, "qmi failed to req target capabilities:%d\n", ret);
+		ath11k_warn(ab, "failed to requeqst qmi target capabilities: %d\n",
+			    ret);
 		return ret;
 	}
 
@@ -2521,13 +2524,13 @@ static int ath11k_qmi_event_load_bdf(struct ath11k_qmi *qmi)
 	else
 		ret = ath11k_qmi_load_bdf_qmi(ab);
 	if (ret < 0) {
-		ath11k_warn(ab, "qmi failed to load board data file:%d\n", ret);
+		ath11k_warn(ab, "failed to load board data file: %d\n", ret);
 		return ret;
 	}
 
 	ret = ath11k_qmi_wlanfw_m3_info_send(ab);
 	if (ret < 0) {
-		ath11k_warn(ab, "qmi failed to send m3 info req:%d\n", ret);
+		ath11k_warn(ab, "failed to send qmi m3 info req: %d\n", ret);
 		return ret;
 	}
 
@@ -2548,7 +2551,7 @@ static void ath11k_qmi_msg_mem_request_cb(struct qmi_handle *qmi_hdl,
 
 	if (msg->mem_seg_len == 0 ||
 	    msg->mem_seg_len > ATH11K_QMI_WLANFW_MAX_NUM_MEM_SEG_V01)
-		ath11k_warn(ab, "Invalid memory segment length: %u\n",
+		ath11k_warn(ab, "invalid memory segment length: %u\n",
 			    msg->mem_seg_len);
 
 	ab->qmi.mem_seg_count = msg->mem_seg_len;
@@ -2563,14 +2566,14 @@ static void ath11k_qmi_msg_mem_request_cb(struct qmi_handle *qmi_hdl,
 	if (ab->bus_params.fixed_mem_region) {
 		ret = ath11k_qmi_assign_target_mem_chunk(ab);
 		if (ret) {
-			ath11k_warn(ab, "qmi failed to assign target memory: %d\n",
+			ath11k_warn(ab, "failed to assign qmi target memory: %d\n",
 				    ret);
 			return;
 		}
 	} else {
 		ret = ath11k_qmi_alloc_target_mem_chunk(ab);
 		if (ret) {
-			ath11k_warn(ab, "qmi failed to alloc target memory: %d\n",
+			ath11k_warn(ab, "failed to allocate qmi target memory: %d\n",
 				    ret);
 			return;
 		}
@@ -2664,7 +2667,7 @@ static int ath11k_qmi_ops_new_server(struct qmi_handle *qmi_hdl,
 	ret = kernel_connect(qmi_hdl->sock, (struct sockaddr *)sq,
 			     sizeof(*sq), 0);
 	if (ret) {
-		ath11k_warn(ab, "qmi failed to connect to remote service %d\n", ret);
+		ath11k_warn(ab, "failed to connect to qmi remote service: %d\n", ret);
 		return ret;
 	}
 
@@ -2750,7 +2753,7 @@ static void ath11k_qmi_driver_event_work(struct work_struct *work)
 		case ATH11K_QMI_EVENT_COLD_BOOT_CAL_DONE:
 			break;
 		default:
-			ath11k_warn(ab, "invalid event type: %d", event->type);
+			ath11k_warn(ab, "invalid qmi event type: %d", event->type);
 			break;
 		}
 		kfree(event);
@@ -2771,7 +2774,7 @@ int ath11k_qmi_init_service(struct ath11k_base *ab)
 	ret = qmi_handle_init(&ab->qmi.handle, ATH11K_QMI_RESP_LEN_MAX,
 			      &ath11k_qmi_ops, ath11k_qmi_msg_handlers);
 	if (ret < 0) {
-		ath11k_warn(ab, "failed to initialize qmi handle\n");
+		ath11k_warn(ab, "failed to initialize qmi handle: %d\n", ret);
 		return ret;
 	}
 
@@ -2790,7 +2793,7 @@ int ath11k_qmi_init_service(struct ath11k_base *ab)
 			     ATH11K_QMI_WLFW_SERVICE_VERS_V01,
 			     ab->qmi.service_ins_id);
 	if (ret < 0) {
-		ath11k_warn(ab, "failed to add qmi lookup\n");
+		ath11k_warn(ab, "failed to add qmi lookup: %d\n", ret);
 		destroy_workqueue(ab->qmi.event_wq);
 		return ret;
 	}
-- 
cgit v1.2.3


From 3808a18043a8d16ea1bc0ebe59c864f73413dbbf Mon Sep 17 00:00:00 2001
From: Miaoqing Pan <miaoqing@codeaurora.org>
Date: Thu, 18 Feb 2021 14:45:09 +0800
Subject: ath11k: fix potential wmi_mgmt_tx_queue race condition

There is a potential race condition between skb_queue_len()
and skb_queue_tail(), the former may get old value before
updated by the latter.

So use skb_queue_len_lockless() instead. And also use '>=',
in case we queue a few SKBs simultaneously.

Found while discussing a similar fix for ath10k:
https://patchwork.kernel.org/project/linux-wireless/patch/1608515579-1066-1-git-send-email-miaoqing@codeaurora.org/

No functional changes, compile tested only.

Signed-off-by: Miaoqing Pan <miaoqing@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1613630709-704-1-git-send-email-miaoqing@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/mac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index 335d49af7dd5..3c1f35a204ba 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -4211,7 +4211,7 @@ static int ath11k_mac_mgmt_tx(struct ath11k *ar, struct sk_buff *skb,
 		return -ENOSPC;
 	}
 
-	if (skb_queue_len(q) == ATH11K_TX_MGMT_NUM_PENDING_MAX) {
+	if (skb_queue_len_lockless(q) >= ATH11K_TX_MGMT_NUM_PENDING_MAX) {
 		ath11k_warn(ar->ab, "mgmt tx queue is full\n");
 		return -ENOSPC;
 	}
-- 
cgit v1.2.3


From e3de5bb7ac1a4cb262f8768924fd3ef6182b10bb Mon Sep 17 00:00:00 2001
From: Pradeep Kumar Chitrapu <pradeepc@codeaurora.org>
Date: Thu, 18 Feb 2021 10:27:08 -0800
Subject: ath11k: fix thermal temperature read

Fix dangling pointer in thermal temperature event which causes
incorrect temperature read.

Tested-on: IPQ8074 AHB WLAN.HK.2.4.0.1-00041-QCAHKSWPL_SILICONZ-1

Signed-off-by: Pradeep Kumar Chitrapu <pradeepc@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210218182708.8844-1-pradeepc@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/wmi.c | 53 ++++++++++++++---------------------
 1 file changed, 21 insertions(+), 32 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c
index cccfd3bd4d27..ca5cda890d58 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.c
+++ b/drivers/net/wireless/ath/ath11k/wmi.c
@@ -5417,31 +5417,6 @@ int ath11k_wmi_pull_fw_stats(struct ath11k_base *ab, struct sk_buff *skb,
 	return 0;
 }
 
-static int
-ath11k_pull_pdev_temp_ev(struct ath11k_base *ab, u8 *evt_buf,
-			 u32 len, const struct wmi_pdev_temperature_event *ev)
-{
-	const void **tb;
-	int ret;
-
-	tb = ath11k_wmi_tlv_parse_alloc(ab, evt_buf, len, GFP_ATOMIC);
-	if (IS_ERR(tb)) {
-		ret = PTR_ERR(tb);
-		ath11k_warn(ab, "failed to parse tlv: %d\n", ret);
-		return ret;
-	}
-
-	ev = tb[WMI_TAG_PDEV_TEMPERATURE_EVENT];
-	if (!ev) {
-		ath11k_warn(ab, "failed to fetch pdev temp ev");
-		kfree(tb);
-		return -EPROTO;
-	}
-
-	kfree(tb);
-	return 0;
-}
-
 size_t ath11k_wmi_fw_stats_num_vdevs(struct list_head *head)
 {
 	struct ath11k_fw_stats_vdev *i;
@@ -6849,23 +6824,37 @@ ath11k_wmi_pdev_temperature_event(struct ath11k_base *ab,
 				  struct sk_buff *skb)
 {
 	struct ath11k *ar;
-	struct wmi_pdev_temperature_event ev = {0};
+	const void **tb;
+	const struct wmi_pdev_temperature_event *ev;
+	int ret;
+
+	tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+	if (IS_ERR(tb)) {
+		ret = PTR_ERR(tb);
+		ath11k_warn(ab, "failed to parse tlv: %d\n", ret);
+		return;
+	}
 
-	if (ath11k_pull_pdev_temp_ev(ab, skb->data, skb->len, &ev) != 0) {
-		ath11k_warn(ab, "failed to extract pdev temperature event");
+	ev = tb[WMI_TAG_PDEV_TEMPERATURE_EVENT];
+	if (!ev) {
+		ath11k_warn(ab, "failed to fetch pdev temp ev");
+		kfree(tb);
 		return;
 	}
 
 	ath11k_dbg(ab, ATH11K_DBG_WMI,
-		   "pdev temperature ev temp %d pdev_id %d\n", ev.temp, ev.pdev_id);
+		   "pdev temperature ev temp %d pdev_id %d\n", ev->temp, ev->pdev_id);
 
-	ar = ath11k_mac_get_ar_by_pdev_id(ab, ev.pdev_id);
+	ar = ath11k_mac_get_ar_by_pdev_id(ab, ev->pdev_id);
 	if (!ar) {
-		ath11k_warn(ab, "invalid pdev id in pdev temperature ev %d", ev.pdev_id);
+		ath11k_warn(ab, "invalid pdev id in pdev temperature ev %d", ev->pdev_id);
+		kfree(tb);
 		return;
 	}
 
-	ath11k_thermal_event_temperature(ar, ev.temp);
+	ath11k_thermal_event_temperature(ar, ev->temp);
+
+	kfree(tb);
 }
 
 static void ath11k_fils_discovery_event(struct ath11k_base *ab,
-- 
cgit v1.2.3


From b9fc8b4a591811546fec2dbef7e9f809362100c9 Mon Sep 17 00:00:00 2001
From: Grant Seltzer <grantseltzer@gmail.com>
Date: Mon, 22 Feb 2021 19:58:46 +0000
Subject: bpf: Add kernel/modules BTF presence checks to bpftool feature
 command

This adds both the CONFIG_DEBUG_INFO_BTF and CONFIG_DEBUG_INFO_BTF_MODULES
kernel compile option to output of the bpftool feature command.

This is relevant for developers that want to account for data structure
definition differences between kernels.

Signed-off-by: Grant Seltzer <grantseltzer@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210222195846.155483-1-grantseltzer@gmail.com
---
 tools/bpf/bpftool/feature.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 359960a8f1de..40a88df275f9 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -336,6 +336,10 @@ static void probe_kernel_image_config(const char *define_prefix)
 		{ "CONFIG_BPF_JIT", },
 		/* Avoid compiling eBPF interpreter (use JIT only) */
 		{ "CONFIG_BPF_JIT_ALWAYS_ON", },
+		/* Kernel BTF debug information available */
+		{ "CONFIG_DEBUG_INFO_BTF", },
+		/* Kernel module BTF debug information available */
+		{ "CONFIG_DEBUG_INFO_BTF_MODULES", },
 
 		/* cgroups */
 		{ "CONFIG_CGROUPS", },
-- 
cgit v1.2.3


From 2463e073497385ef63c220571013a2b89e9b95cc Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <alobakin@pm.me>
Date: Thu, 18 Feb 2021 20:49:41 +0000
Subject: netdevice: Add missing IFF_PHONY_HEADROOM self-definition

This is harmless for now, but can be fatal for future refactors.

Fixes: 871b642adebe3 ("netdev: introduce ndo_set_rx_headroom")
Signed-off-by: Alexander Lobakin <alobakin@pm.me>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210218204908.5455-2-alobakin@pm.me
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ddf4cfc12615..3b6f82c2c271 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1577,6 +1577,7 @@ enum netdev_priv_flags {
 #define IFF_L3MDEV_SLAVE		IFF_L3MDEV_SLAVE
 #define IFF_TEAM			IFF_TEAM
 #define IFF_RXFH_CONFIGURED		IFF_RXFH_CONFIGURED
+#define IFF_PHONY_HEADROOM		IFF_PHONY_HEADROOM
 #define IFF_MACSEC			IFF_MACSEC
 #define IFF_NO_RX_HANDLER		IFF_NO_RX_HANDLER
 #define IFF_FAILOVER			IFF_FAILOVER
-- 
cgit v1.2.3


From c2ff53d8049f30098153cd2d1299a44d7b124c57 Mon Sep 17 00:00:00 2001
From: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Date: Thu, 18 Feb 2021 20:50:02 +0000
Subject: net: Add priv_flags for allow tx skb without linear

In some cases, we hope to construct skb directly based on the existing
memory without copying data. In this case, the page will be placed
directly in the skb, and the linear space of skb is empty. But
unfortunately, many the network card does not support this operation.
For example Mellanox Technologies MT27710 Family [ConnectX-4 Lx] will
get the following error message:

    mlx5_core 0000:3b:00.1 eth1: Error cqe on cqn 0x817, ci 0x8,
    qn 0x1dbb, opcode 0xd, syndrome 0x1, vendor syndrome 0x68
    00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
    00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
    00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
    00000030: 00 00 00 00 60 10 68 01 0a 00 1d bb 00 0f 9f d2
    WQE DUMP: WQ size 1024 WQ cur size 0, WQE index 0xf, len: 64
    00000000: 00 00 0f 0a 00 1d bb 03 00 00 00 08 00 00 00 00
    00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
    00000020: 00 00 00 2b 00 08 00 00 00 00 00 05 9e e3 08 00
    00000030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
    mlx5_core 0000:3b:00.1 eth1: ERR CQE on SQ: 0x1dbb

So a priv_flag is added here to indicate whether the network card
supports this feature.

Suggested-by: Alexander Lobakin <alobakin@pm.me>
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Signed-off-by: Alexander Lobakin <alobakin@pm.me>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210218204908.5455-3-alobakin@pm.me
---
 include/linux/netdevice.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3b6f82c2c271..6cef47b76cc6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1518,6 +1518,8 @@ struct net_device_ops {
  * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device
  * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device
  * @IFF_LIVE_RENAME_OK: rename is allowed while device is up and running
+ * @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with
+ *	skb_headlen(skb) == 0 (data starts from frag0)
  */
 enum netdev_priv_flags {
 	IFF_802_1Q_VLAN			= 1<<0,
@@ -1551,6 +1553,7 @@ enum netdev_priv_flags {
 	IFF_FAILOVER_SLAVE		= 1<<28,
 	IFF_L3MDEV_RX_HANDLER		= 1<<29,
 	IFF_LIVE_RENAME_OK		= 1<<30,
+	IFF_TX_SKB_NO_LINEAR		= 1<<31,
 };
 
 #define IFF_802_1Q_VLAN			IFF_802_1Q_VLAN
@@ -1584,6 +1587,7 @@ enum netdev_priv_flags {
 #define IFF_FAILOVER_SLAVE		IFF_FAILOVER_SLAVE
 #define IFF_L3MDEV_RX_HANDLER		IFF_L3MDEV_RX_HANDLER
 #define IFF_LIVE_RENAME_OK		IFF_LIVE_RENAME_OK
+#define IFF_TX_SKB_NO_LINEAR		IFF_TX_SKB_NO_LINEAR
 
 /**
  *	struct net_device - The DEVICE structure.
-- 
cgit v1.2.3


From ab5bd583b9289666e918f9e5f672d33ccdfd49b2 Mon Sep 17 00:00:00 2001
From: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Date: Thu, 18 Feb 2021 20:50:17 +0000
Subject: virtio-net: Support IFF_TX_SKB_NO_LINEAR flag

Virtio net supports the case where the skb linear space is empty, so
add priv_flags.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Signed-off-by: Alexander Lobakin <alobakin@pm.me>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210218204908.5455-4-alobakin@pm.me
---
 drivers/net/virtio_net.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index ba8e63792549..f2ff6c3906c1 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2972,7 +2972,8 @@ static int virtnet_probe(struct virtio_device *vdev)
 		return -ENOMEM;
 
 	/* Set up network device as normal. */
-	dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE;
+	dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE |
+			   IFF_TX_SKB_NO_LINEAR;
 	dev->netdev_ops = &virtnet_netdev;
 	dev->features = NETIF_F_HIGHDMA;
 
-- 
cgit v1.2.3


From 3914d88f7608e6c2e80e344474fa289370c32451 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <alobakin@pm.me>
Date: Thu, 18 Feb 2021 20:50:31 +0000
Subject: xsk: Respect device's headroom and tailroom on generic xmit path

xsk_generic_xmit() allocates a new skb and then queues it for
xmitting. The size of new skb's headroom is desc->len, so it comes
to the driver/device with no reserved headroom and/or tailroom.
Lots of drivers need some headroom (and sometimes tailroom) to
prepend (and/or append) some headers or data, e.g. CPU tags,
device-specific headers/descriptors (LSO, TLS etc.), and if case
of no available space skb_cow_head() will reallocate the skb.
Reallocations are unwanted on fast-path, especially when it comes
to XDP, so generic XSK xmit should reserve the spaces declared in
dev->needed_headroom and dev->needed tailroom to avoid them.

Note on max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom)):

Usually, output functions reserve LL_RESERVED_SPACE(dev), which
consists of dev->hard_header_len + dev->needed_headroom, aligned
by 16.

However, on XSK xmit hard header is already here in the chunk, so
hard_header_len is not needed. But it'd still be better to align
data up to cacheline, while reserving no less than driver requests
for headroom. NET_SKB_PAD here is to double-insure there will be
no reallocations even when the driver advertises no needed_headroom,
but in fact need it (not so rare case).

Fixes: 35fcde7f8deb ("xsk: support for Tx")
Signed-off-by: Alexander Lobakin <alobakin@pm.me>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210218204908.5455-5-alobakin@pm.me
---
 net/xdp/xsk.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 4faabd1ecfd1..143979ea4165 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -454,12 +454,16 @@ static int xsk_generic_xmit(struct sock *sk)
 	struct sk_buff *skb;
 	unsigned long flags;
 	int err = 0;
+	u32 hr, tr;
 
 	mutex_lock(&xs->mutex);
 
 	if (xs->queue_id >= xs->dev->real_num_tx_queues)
 		goto out;
 
+	hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(xs->dev->needed_headroom));
+	tr = xs->dev->needed_tailroom;
+
 	while (xskq_cons_peek_desc(xs->tx, &desc, xs->pool)) {
 		char *buffer;
 		u64 addr;
@@ -471,11 +475,13 @@ static int xsk_generic_xmit(struct sock *sk)
 		}
 
 		len = desc.len;
-		skb = sock_alloc_send_skb(sk, len, 1, &err);
+		skb = sock_alloc_send_skb(sk, hr + len + tr, 1, &err);
 		if (unlikely(!skb))
 			goto out;
 
+		skb_reserve(skb, hr);
 		skb_put(skb, len);
+
 		addr = desc.addr;
 		buffer = xsk_buff_raw_get_data(xs->pool, addr);
 		err = skb_store_bits(skb, 0, buffer, len);
-- 
cgit v1.2.3


From 9c8f21e6f8856a96634e542a58ef3abf27486801 Mon Sep 17 00:00:00 2001
From: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Date: Thu, 18 Feb 2021 20:50:45 +0000
Subject: xsk: Build skb by page (aka generic zerocopy xmit)

This patch is used to construct skb based on page to save memory copy
overhead.

This function is implemented based on IFF_TX_SKB_NO_LINEAR. Only the
network card priv_flags supports IFF_TX_SKB_NO_LINEAR will use page to
directly construct skb. If this feature is not supported, it is still
necessary to copy data to construct skb.

  ---------------- Performance Testing ------------

  The test environment is Aliyun ECS server.

  Test cmd:
  ```
  xdpsock -i eth0 -t  -S -s <msg size>
  ```

  Test result data:

  size    64      512     1024    1500
  copy    1916747 1775988 1600203 1440054
  page    1974058 1953655 1945463 1904478
  percent 3.0%    10.0%   21.58%  32.3%

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Signed-off-by: Alexander Lobakin <alobakin@pm.me>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Dust Li <dust.li@linux.alibaba.com>
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210218204908.5455-6-alobakin@pm.me
---
 net/xdp/xsk.c | 120 ++++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 96 insertions(+), 24 deletions(-)

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 143979ea4165..a71ed664da0a 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -445,6 +445,97 @@ static void xsk_destruct_skb(struct sk_buff *skb)
 	sock_wfree(skb);
 }
 
+static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
+					      struct xdp_desc *desc)
+{
+	struct xsk_buff_pool *pool = xs->pool;
+	u32 hr, len, ts, offset, copy, copied;
+	struct sk_buff *skb;
+	struct page *page;
+	void *buffer;
+	int err, i;
+	u64 addr;
+
+	hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(xs->dev->needed_headroom));
+
+	skb = sock_alloc_send_skb(&xs->sk, hr, 1, &err);
+	if (unlikely(!skb))
+		return ERR_PTR(err);
+
+	skb_reserve(skb, hr);
+
+	addr = desc->addr;
+	len = desc->len;
+	ts = pool->unaligned ? len : pool->chunk_size;
+
+	buffer = xsk_buff_raw_get_data(pool, addr);
+	offset = offset_in_page(buffer);
+	addr = buffer - pool->addrs;
+
+	for (copied = 0, i = 0; copied < len; i++) {
+		page = pool->umem->pgs[addr >> PAGE_SHIFT];
+		get_page(page);
+
+		copy = min_t(u32, PAGE_SIZE - offset, len - copied);
+		skb_fill_page_desc(skb, i, page, offset, copy);
+
+		copied += copy;
+		addr += copy;
+		offset = 0;
+	}
+
+	skb->len += len;
+	skb->data_len += len;
+	skb->truesize += ts;
+
+	refcount_add(ts, &xs->sk.sk_wmem_alloc);
+
+	return skb;
+}
+
+static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
+				     struct xdp_desc *desc)
+{
+	struct net_device *dev = xs->dev;
+	struct sk_buff *skb;
+
+	if (dev->priv_flags & IFF_TX_SKB_NO_LINEAR) {
+		skb = xsk_build_skb_zerocopy(xs, desc);
+		if (IS_ERR(skb))
+			return skb;
+	} else {
+		u32 hr, tr, len;
+		void *buffer;
+		int err;
+
+		hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom));
+		tr = dev->needed_tailroom;
+		len = desc->len;
+
+		skb = sock_alloc_send_skb(&xs->sk, hr + len + tr, 1, &err);
+		if (unlikely(!skb))
+			return ERR_PTR(err);
+
+		skb_reserve(skb, hr);
+		skb_put(skb, len);
+
+		buffer = xsk_buff_raw_get_data(xs->pool, desc->addr);
+		err = skb_store_bits(skb, 0, buffer, len);
+		if (unlikely(err)) {
+			kfree_skb(skb);
+			return ERR_PTR(err);
+		}
+	}
+
+	skb->dev = dev;
+	skb->priority = xs->sk.sk_priority;
+	skb->mark = xs->sk.sk_mark;
+	skb_shinfo(skb)->destructor_arg = (void *)(long)desc->addr;
+	skb->destructor = xsk_destruct_skb;
+
+	return skb;
+}
+
 static int xsk_generic_xmit(struct sock *sk)
 {
 	struct xdp_sock *xs = xdp_sk(sk);
@@ -454,56 +545,37 @@ static int xsk_generic_xmit(struct sock *sk)
 	struct sk_buff *skb;
 	unsigned long flags;
 	int err = 0;
-	u32 hr, tr;
 
 	mutex_lock(&xs->mutex);
 
 	if (xs->queue_id >= xs->dev->real_num_tx_queues)
 		goto out;
 
-	hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(xs->dev->needed_headroom));
-	tr = xs->dev->needed_tailroom;
-
 	while (xskq_cons_peek_desc(xs->tx, &desc, xs->pool)) {
-		char *buffer;
-		u64 addr;
-		u32 len;
-
 		if (max_batch-- == 0) {
 			err = -EAGAIN;
 			goto out;
 		}
 
-		len = desc.len;
-		skb = sock_alloc_send_skb(sk, hr + len + tr, 1, &err);
-		if (unlikely(!skb))
+		skb = xsk_build_skb(xs, &desc);
+		if (IS_ERR(skb)) {
+			err = PTR_ERR(skb);
 			goto out;
+		}
 
-		skb_reserve(skb, hr);
-		skb_put(skb, len);
-
-		addr = desc.addr;
-		buffer = xsk_buff_raw_get_data(xs->pool, addr);
-		err = skb_store_bits(skb, 0, buffer, len);
 		/* This is the backpressure mechanism for the Tx path.
 		 * Reserve space in the completion queue and only proceed
 		 * if there is space in it. This avoids having to implement
 		 * any buffering in the Tx path.
 		 */
 		spin_lock_irqsave(&xs->pool->cq_lock, flags);
-		if (unlikely(err) || xskq_prod_reserve(xs->pool->cq)) {
+		if (xskq_prod_reserve(xs->pool->cq)) {
 			spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
 			kfree_skb(skb);
 			goto out;
 		}
 		spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
 
-		skb->dev = xs->dev;
-		skb->priority = sk->sk_priority;
-		skb->mark = sk->sk_mark;
-		skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr;
-		skb->destructor = xsk_destruct_skb;
-
 		err = __dev_direct_xmit(skb, xs->queue_id);
 		if  (err == NETDEV_TX_BUSY) {
 			/* Tell user-space to retry the send */
-- 
cgit v1.2.3


From a10787e6d58c24b51e91c19c6d16c5da89fcaa4b Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Thu, 25 Feb 2021 15:43:14 -0800
Subject: bpf: Enable task local storage for tracing programs

To access per-task data, BPF programs usually creates a hash table with
pid as the key. This is not ideal because:
 1. The user need to estimate the proper size of the hash table, which may
    be inaccurate;
 2. Big hash tables are slow;
 3. To clean up the data properly during task terminations, the user need
    to write extra logic.

Task local storage overcomes these issues and offers a better option for
these per-task data. Task local storage is only available to BPF_LSM. Now
enable it for tracing programs.

Unlike LSM programs, tracing programs can be called in IRQ contexts.
Helpers that access task local storage are updated to use
raw_spin_lock_irqsave() instead of raw_spin_lock_bh().

Tracing programs can attach to functions on the task free path, e.g.
exit_creds(). To avoid allocating task local storage after
bpf_task_storage_free(). bpf_task_storage_get() is updated to not allocate
new storage when the task is not refcounted (task->usage == 0).

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: KP Singh <kpsingh@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210225234319.336131-2-songliubraving@fb.com
---
 include/linux/bpf.h            |  7 +++++++
 include/linux/bpf_lsm.h        | 22 ---------------------
 include/linux/bpf_types.h      |  2 +-
 include/linux/sched.h          |  5 +++++
 kernel/bpf/Makefile            |  3 +--
 kernel/bpf/bpf_local_storage.c | 28 ++++++++++++++++-----------
 kernel/bpf/bpf_lsm.c           |  4 ----
 kernel/bpf/bpf_task_storage.c  | 43 +++++++++++-------------------------------
 kernel/fork.c                  |  5 +++++
 kernel/trace/bpf_trace.c       |  4 ++++
 10 files changed, 51 insertions(+), 72 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cccaef1088ea..e2cfc4809219 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1499,6 +1499,7 @@ struct bpf_prog *bpf_prog_by_id(u32 id);
 struct bpf_link *bpf_link_by_id(u32 id);
 
 const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
+void bpf_task_storage_free(struct task_struct *task);
 #else /* !CONFIG_BPF_SYSCALL */
 static inline struct bpf_prog *bpf_prog_get(u32 ufd)
 {
@@ -1684,6 +1685,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 {
 	return NULL;
 }
+
+static inline void bpf_task_storage_free(struct task_struct *task)
+{
+}
 #endif /* CONFIG_BPF_SYSCALL */
 
 void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
@@ -1886,6 +1891,8 @@ extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
 extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto;
 extern const struct bpf_func_proto bpf_sock_from_file_proto;
 extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto;
+extern const struct bpf_func_proto bpf_task_storage_get_proto;
+extern const struct bpf_func_proto bpf_task_storage_delete_proto;
 
 const struct bpf_func_proto *bpf_tracing_func_proto(
 	enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index 0d1c33ace398..479c101546ad 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -38,21 +38,9 @@ static inline struct bpf_storage_blob *bpf_inode(
 	return inode->i_security + bpf_lsm_blob_sizes.lbs_inode;
 }
 
-static inline struct bpf_storage_blob *bpf_task(
-	const struct task_struct *task)
-{
-	if (unlikely(!task->security))
-		return NULL;
-
-	return task->security + bpf_lsm_blob_sizes.lbs_task;
-}
-
 extern const struct bpf_func_proto bpf_inode_storage_get_proto;
 extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
-extern const struct bpf_func_proto bpf_task_storage_get_proto;
-extern const struct bpf_func_proto bpf_task_storage_delete_proto;
 void bpf_inode_storage_free(struct inode *inode);
-void bpf_task_storage_free(struct task_struct *task);
 
 #else /* !CONFIG_BPF_LSM */
 
@@ -73,20 +61,10 @@ static inline struct bpf_storage_blob *bpf_inode(
 	return NULL;
 }
 
-static inline struct bpf_storage_blob *bpf_task(
-	const struct task_struct *task)
-{
-	return NULL;
-}
-
 static inline void bpf_inode_storage_free(struct inode *inode)
 {
 }
 
-static inline void bpf_task_storage_free(struct task_struct *task)
-{
-}
-
 #endif /* CONFIG_BPF_LSM */
 
 #endif /* _LINUX_BPF_LSM_H */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 99f7fd657d87..b9edee336d80 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -109,8 +109,8 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
 #endif
 #ifdef CONFIG_BPF_LSM
 BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
-BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
 #endif
+BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
 #if defined(CONFIG_XDP_SOCKETS)
 BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4d568288abf9..e5fbf8e6952a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -42,6 +42,7 @@ struct audit_context;
 struct backing_dev_info;
 struct bio_list;
 struct blk_plug;
+struct bpf_local_storage;
 struct capture_control;
 struct cfs_rq;
 struct fs_struct;
@@ -1348,6 +1349,10 @@ struct task_struct {
 	/* Used by LSM modules for access restriction: */
 	void				*security;
 #endif
+#ifdef CONFIG_BPF_SYSCALL
+	/* Used by BPF task local storage */
+	struct bpf_local_storage __rcu	*bpf_storage;
+#endif
 
 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
 	unsigned long			lowest_stack;
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index d1249340fd6b..7f33098ca63f 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -9,8 +9,8 @@ CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
 obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
 obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
+obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
 obj-${CONFIG_BPF_LSM}	  += bpf_inode_storage.o
-obj-${CONFIG_BPF_LSM}	  += bpf_task_storage.o
 obj-$(CONFIG_BPF_SYSCALL) += disasm.o
 obj-$(CONFIG_BPF_JIT) += trampoline.o
 obj-$(CONFIG_BPF_SYSCALL) += btf.o
@@ -18,7 +18,6 @@ obj-$(CONFIG_BPF_JIT) += dispatcher.o
 ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_BPF_SYSCALL) += devmap.o
 obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
-obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o
 obj-$(CONFIG_BPF_SYSCALL) += offload.o
 obj-$(CONFIG_BPF_SYSCALL) += net_namespace.o
 endif
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index dd5aedee99e7..9bd47ad2b26f 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -140,17 +140,18 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
 {
 	struct bpf_local_storage *local_storage;
 	bool free_local_storage = false;
+	unsigned long flags;
 
 	if (unlikely(!selem_linked_to_storage(selem)))
 		/* selem has already been unlinked from sk */
 		return;
 
 	local_storage = rcu_dereference(selem->local_storage);
-	raw_spin_lock_bh(&local_storage->lock);
+	raw_spin_lock_irqsave(&local_storage->lock, flags);
 	if (likely(selem_linked_to_storage(selem)))
 		free_local_storage = bpf_selem_unlink_storage_nolock(
 			local_storage, selem, true);
-	raw_spin_unlock_bh(&local_storage->lock);
+	raw_spin_unlock_irqrestore(&local_storage->lock, flags);
 
 	if (free_local_storage)
 		kfree_rcu(local_storage, rcu);
@@ -167,6 +168,7 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
 {
 	struct bpf_local_storage_map *smap;
 	struct bpf_local_storage_map_bucket *b;
+	unsigned long flags;
 
 	if (unlikely(!selem_linked_to_map(selem)))
 		/* selem has already be unlinked from smap */
@@ -174,21 +176,22 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
 
 	smap = rcu_dereference(SDATA(selem)->smap);
 	b = select_bucket(smap, selem);
-	raw_spin_lock_bh(&b->lock);
+	raw_spin_lock_irqsave(&b->lock, flags);
 	if (likely(selem_linked_to_map(selem)))
 		hlist_del_init_rcu(&selem->map_node);
-	raw_spin_unlock_bh(&b->lock);
+	raw_spin_unlock_irqrestore(&b->lock, flags);
 }
 
 void bpf_selem_link_map(struct bpf_local_storage_map *smap,
 			struct bpf_local_storage_elem *selem)
 {
 	struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem);
+	unsigned long flags;
 
-	raw_spin_lock_bh(&b->lock);
+	raw_spin_lock_irqsave(&b->lock, flags);
 	RCU_INIT_POINTER(SDATA(selem)->smap, smap);
 	hlist_add_head_rcu(&selem->map_node, &b->list);
-	raw_spin_unlock_bh(&b->lock);
+	raw_spin_unlock_irqrestore(&b->lock, flags);
 }
 
 void bpf_selem_unlink(struct bpf_local_storage_elem *selem)
@@ -224,16 +227,18 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
 
 	sdata = SDATA(selem);
 	if (cacheit_lockit) {
+		unsigned long flags;
+
 		/* spinlock is needed to avoid racing with the
 		 * parallel delete.  Otherwise, publishing an already
 		 * deleted sdata to the cache will become a use-after-free
 		 * problem in the next bpf_local_storage_lookup().
 		 */
-		raw_spin_lock_bh(&local_storage->lock);
+		raw_spin_lock_irqsave(&local_storage->lock, flags);
 		if (selem_linked_to_storage(selem))
 			rcu_assign_pointer(local_storage->cache[smap->cache_idx],
 					   sdata);
-		raw_spin_unlock_bh(&local_storage->lock);
+		raw_spin_unlock_irqrestore(&local_storage->lock, flags);
 	}
 
 	return sdata;
@@ -327,6 +332,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
 	struct bpf_local_storage_data *old_sdata = NULL;
 	struct bpf_local_storage_elem *selem;
 	struct bpf_local_storage *local_storage;
+	unsigned long flags;
 	int err;
 
 	/* BPF_EXIST and BPF_NOEXIST cannot be both set */
@@ -374,7 +380,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
 		}
 	}
 
-	raw_spin_lock_bh(&local_storage->lock);
+	raw_spin_lock_irqsave(&local_storage->lock, flags);
 
 	/* Recheck local_storage->list under local_storage->lock */
 	if (unlikely(hlist_empty(&local_storage->list))) {
@@ -428,11 +434,11 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
 	}
 
 unlock:
-	raw_spin_unlock_bh(&local_storage->lock);
+	raw_spin_unlock_irqrestore(&local_storage->lock, flags);
 	return SDATA(selem);
 
 unlock_err:
-	raw_spin_unlock_bh(&local_storage->lock);
+	raw_spin_unlock_irqrestore(&local_storage->lock, flags);
 	return ERR_PTR(err);
 }
 
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 1622a44d1617..9829f381b51c 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -115,10 +115,6 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_spin_lock_proto;
 	case BPF_FUNC_spin_unlock:
 		return &bpf_spin_unlock_proto;
-	case BPF_FUNC_task_storage_get:
-		return &bpf_task_storage_get_proto;
-	case BPF_FUNC_task_storage_delete:
-		return &bpf_task_storage_delete_proto;
 	case BPF_FUNC_bprm_opts_set:
 		return &bpf_bprm_opts_set_proto;
 	case BPF_FUNC_ima_inode_hash:
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index e0da0258b732..baf3566e2323 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -15,7 +15,6 @@
 #include <linux/bpf_local_storage.h>
 #include <linux/filter.h>
 #include <uapi/linux/btf.h>
-#include <linux/bpf_lsm.h>
 #include <linux/btf_ids.h>
 #include <linux/fdtable.h>
 
@@ -24,12 +23,8 @@ DEFINE_BPF_STORAGE_CACHE(task_cache);
 static struct bpf_local_storage __rcu **task_storage_ptr(void *owner)
 {
 	struct task_struct *task = owner;
-	struct bpf_storage_blob *bsb;
 
-	bsb = bpf_task(task);
-	if (!bsb)
-		return NULL;
-	return &bsb->storage;
+	return &task->bpf_storage;
 }
 
 static struct bpf_local_storage_data *
@@ -38,13 +33,8 @@ task_storage_lookup(struct task_struct *task, struct bpf_map *map,
 {
 	struct bpf_local_storage *task_storage;
 	struct bpf_local_storage_map *smap;
-	struct bpf_storage_blob *bsb;
-
-	bsb = bpf_task(task);
-	if (!bsb)
-		return NULL;
 
-	task_storage = rcu_dereference(bsb->storage);
+	task_storage = rcu_dereference(task->bpf_storage);
 	if (!task_storage)
 		return NULL;
 
@@ -57,16 +47,12 @@ void bpf_task_storage_free(struct task_struct *task)
 	struct bpf_local_storage_elem *selem;
 	struct bpf_local_storage *local_storage;
 	bool free_task_storage = false;
-	struct bpf_storage_blob *bsb;
 	struct hlist_node *n;
-
-	bsb = bpf_task(task);
-	if (!bsb)
-		return;
+	unsigned long flags;
 
 	rcu_read_lock();
 
-	local_storage = rcu_dereference(bsb->storage);
+	local_storage = rcu_dereference(task->bpf_storage);
 	if (!local_storage) {
 		rcu_read_unlock();
 		return;
@@ -81,7 +67,7 @@ void bpf_task_storage_free(struct task_struct *task)
 	 * when unlinking elem from the local_storage->list and
 	 * the map's bucket->list.
 	 */
-	raw_spin_lock_bh(&local_storage->lock);
+	raw_spin_lock_irqsave(&local_storage->lock, flags);
 	hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
 		/* Always unlink from map before unlinking from
 		 * local_storage.
@@ -90,7 +76,7 @@ void bpf_task_storage_free(struct task_struct *task)
 		free_task_storage = bpf_selem_unlink_storage_nolock(
 			local_storage, selem, false);
 	}
-	raw_spin_unlock_bh(&local_storage->lock);
+	raw_spin_unlock_irqrestore(&local_storage->lock, flags);
 	rcu_read_unlock();
 
 	/* free_task_storage should always be true as long as
@@ -150,7 +136,7 @@ static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
 	 */
 	WARN_ON_ONCE(!rcu_read_lock_held());
 	task = pid_task(pid, PIDTYPE_PID);
-	if (!task || !task_storage_ptr(task)) {
+	if (!task) {
 		err = -ENOENT;
 		goto out;
 	}
@@ -213,23 +199,16 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
 	if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
 		return (unsigned long)NULL;
 
-	/* explicitly check that the task_storage_ptr is not
-	 * NULL as task_storage_lookup returns NULL in this case and
-	 * bpf_local_storage_update expects the owner to have a
-	 * valid storage pointer.
-	 */
-	if (!task || !task_storage_ptr(task))
+	if (!task)
 		return (unsigned long)NULL;
 
 	sdata = task_storage_lookup(task, map, true);
 	if (sdata)
 		return (unsigned long)sdata->data;
 
-	/* This helper must only be called from places where the lifetime of the task
-	 * is guaranteed. Either by being refcounted or by being protected
-	 * by an RCU read-side critical section.
-	 */
-	if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
+	/* only allocate new storage, when the task is refcounted */
+	if (refcount_read(&task->usage) &&
+	    (flags & BPF_LOCAL_STORAGE_GET_F_CREATE)) {
 		sdata = bpf_local_storage_update(
 			task, (struct bpf_local_storage_map *)map, value,
 			BPF_NOEXIST);
diff --git a/kernel/fork.c b/kernel/fork.c
index d66cd1014211..181604db2d65 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -96,6 +96,7 @@
 #include <linux/kasan.h>
 #include <linux/scs.h>
 #include <linux/io_uring.h>
+#include <linux/bpf.h>
 
 #include <asm/pgalloc.h>
 #include <linux/uaccess.h>
@@ -734,6 +735,7 @@ void __put_task_struct(struct task_struct *tsk)
 	cgroup_free(tsk);
 	task_numa_free(tsk, true);
 	security_task_free(tsk);
+	bpf_task_storage_free(tsk);
 	exit_creds(tsk);
 	delayacct_tsk_free(tsk);
 	put_signal_struct(tsk->signal);
@@ -2062,6 +2064,9 @@ static __latent_entropy struct task_struct *copy_process(
 	p->sequential_io	= 0;
 	p->sequential_io_avg	= 0;
 #endif
+#ifdef CONFIG_BPF_SYSCALL
+	RCU_INIT_POINTER(p->bpf_storage, NULL);
+#endif
 
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	retval = sched_fork(clone_flags, p);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index b0c45d923f0f..e9701744d8e4 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1367,6 +1367,10 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_per_cpu_ptr_proto;
 	case BPF_FUNC_this_cpu_ptr:
 		return &bpf_this_cpu_ptr_proto;
+	case BPF_FUNC_task_storage_get:
+		return &bpf_task_storage_get_proto;
+	case BPF_FUNC_task_storage_delete:
+		return &bpf_task_storage_delete_proto;
 	default:
 		return NULL;
 	}
-- 
cgit v1.2.3


From bc235cdb423a2daed6f337676006a66557429cd1 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Thu, 25 Feb 2021 15:43:15 -0800
Subject: bpf: Prevent deadlock from recursive bpf_task_storage_[get|delete]

BPF helpers bpf_task_storage_[get|delete] could hold two locks:
bpf_local_storage_map_bucket->lock and bpf_local_storage->lock. Calling
these helpers from fentry/fexit programs on functions in bpf_*_storage.c
may cause deadlock on either locks.

Prevent such deadlock with a per cpu counter, bpf_task_storage_busy. We
need this counter to be global, because the two locks here belong to two
different objects: bpf_local_storage_map and bpf_local_storage. If we
pick one of them as the owner of the counter, it is still possible to
trigger deadlock on the other lock. For example, if bpf_local_storage_map
owns the counters, it cannot prevent deadlock on bpf_local_storage->lock
when two maps are used.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210225234319.336131-3-songliubraving@fb.com
---
 include/linux/bpf_local_storage.h |  3 +-
 kernel/bpf/bpf_inode_storage.c    |  2 +-
 kernel/bpf/bpf_local_storage.c    | 11 +++++++-
 kernel/bpf/bpf_task_storage.c     | 59 +++++++++++++++++++++++++++++++++------
 net/core/bpf_sk_storage.c         |  2 +-
 5 files changed, 65 insertions(+), 12 deletions(-)

diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index b2c9463f36a1..b902c580c48d 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -126,7 +126,8 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
 			 struct bpf_local_storage_map *smap,
 			 bool cacheit_lockit);
 
-void bpf_local_storage_map_free(struct bpf_local_storage_map *smap);
+void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
+				int __percpu *busy_counter);
 
 int bpf_local_storage_map_check_btf(const struct bpf_map *map,
 				    const struct btf *btf,
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index 6639640523c0..da753721457c 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -237,7 +237,7 @@ static void inode_storage_map_free(struct bpf_map *map)
 
 	smap = (struct bpf_local_storage_map *)map;
 	bpf_local_storage_cache_idx_free(&inode_cache, smap->cache_idx);
-	bpf_local_storage_map_free(smap);
+	bpf_local_storage_map_free(smap, NULL);
 }
 
 static int inode_storage_map_btf_id;
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 9bd47ad2b26f..b305270b7a4b 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -474,7 +474,8 @@ void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
 	spin_unlock(&cache->idx_lock);
 }
 
-void bpf_local_storage_map_free(struct bpf_local_storage_map *smap)
+void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
+				int __percpu *busy_counter)
 {
 	struct bpf_local_storage_elem *selem;
 	struct bpf_local_storage_map_bucket *b;
@@ -503,7 +504,15 @@ void bpf_local_storage_map_free(struct bpf_local_storage_map *smap)
 		while ((selem = hlist_entry_safe(
 				rcu_dereference_raw(hlist_first_rcu(&b->list)),
 				struct bpf_local_storage_elem, map_node))) {
+			if (busy_counter) {
+				migrate_disable();
+				__this_cpu_inc(*busy_counter);
+			}
 			bpf_selem_unlink(selem);
+			if (busy_counter) {
+				__this_cpu_dec(*busy_counter);
+				migrate_enable();
+			}
 			cond_resched_rcu();
 		}
 		rcu_read_unlock();
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index baf3566e2323..fd3c74ef608e 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -20,6 +20,31 @@
 
 DEFINE_BPF_STORAGE_CACHE(task_cache);
 
+DEFINE_PER_CPU(int, bpf_task_storage_busy);
+
+static void bpf_task_storage_lock(void)
+{
+	migrate_disable();
+	__this_cpu_inc(bpf_task_storage_busy);
+}
+
+static void bpf_task_storage_unlock(void)
+{
+	__this_cpu_dec(bpf_task_storage_busy);
+	migrate_enable();
+}
+
+static bool bpf_task_storage_trylock(void)
+{
+	migrate_disable();
+	if (unlikely(__this_cpu_inc_return(bpf_task_storage_busy) != 1)) {
+		__this_cpu_dec(bpf_task_storage_busy);
+		migrate_enable();
+		return false;
+	}
+	return true;
+}
+
 static struct bpf_local_storage __rcu **task_storage_ptr(void *owner)
 {
 	struct task_struct *task = owner;
@@ -67,6 +92,7 @@ void bpf_task_storage_free(struct task_struct *task)
 	 * when unlinking elem from the local_storage->list and
 	 * the map's bucket->list.
 	 */
+	bpf_task_storage_lock();
 	raw_spin_lock_irqsave(&local_storage->lock, flags);
 	hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
 		/* Always unlink from map before unlinking from
@@ -77,6 +103,7 @@ void bpf_task_storage_free(struct task_struct *task)
 			local_storage, selem, false);
 	}
 	raw_spin_unlock_irqrestore(&local_storage->lock, flags);
+	bpf_task_storage_unlock();
 	rcu_read_unlock();
 
 	/* free_task_storage should always be true as long as
@@ -109,7 +136,9 @@ static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
 		goto out;
 	}
 
+	bpf_task_storage_lock();
 	sdata = task_storage_lookup(task, map, true);
+	bpf_task_storage_unlock();
 	put_pid(pid);
 	return sdata ? sdata->data : NULL;
 out:
@@ -141,8 +170,10 @@ static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
 		goto out;
 	}
 
+	bpf_task_storage_lock();
 	sdata = bpf_local_storage_update(
 		task, (struct bpf_local_storage_map *)map, value, map_flags);
+	bpf_task_storage_unlock();
 
 	err = PTR_ERR_OR_ZERO(sdata);
 out:
@@ -185,7 +216,9 @@ static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
 		goto out;
 	}
 
+	bpf_task_storage_lock();
 	err = task_storage_delete(task, map);
+	bpf_task_storage_unlock();
 out:
 	put_pid(pid);
 	return err;
@@ -202,34 +235,44 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
 	if (!task)
 		return (unsigned long)NULL;
 
+	if (!bpf_task_storage_trylock())
+		return (unsigned long)NULL;
+
 	sdata = task_storage_lookup(task, map, true);
 	if (sdata)
-		return (unsigned long)sdata->data;
+		goto unlock;
 
 	/* only allocate new storage, when the task is refcounted */
 	if (refcount_read(&task->usage) &&
-	    (flags & BPF_LOCAL_STORAGE_GET_F_CREATE)) {
+	    (flags & BPF_LOCAL_STORAGE_GET_F_CREATE))
 		sdata = bpf_local_storage_update(
 			task, (struct bpf_local_storage_map *)map, value,
 			BPF_NOEXIST);
-		return IS_ERR(sdata) ? (unsigned long)NULL :
-					     (unsigned long)sdata->data;
-	}
 
-	return (unsigned long)NULL;
+unlock:
+	bpf_task_storage_unlock();
+	return IS_ERR_OR_NULL(sdata) ? (unsigned long)NULL :
+		(unsigned long)sdata->data;
 }
 
 BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
 	   task)
 {
+	int ret;
+
 	if (!task)
 		return -EINVAL;
 
+	if (!bpf_task_storage_trylock())
+		return -EBUSY;
+
 	/* This helper must only be called from places where the lifetime of the task
 	 * is guaranteed. Either by being refcounted or by being protected
 	 * by an RCU read-side critical section.
 	 */
-	return task_storage_delete(task, map);
+	ret = task_storage_delete(task, map);
+	bpf_task_storage_unlock();
+	return ret;
 }
 
 static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
@@ -255,7 +298,7 @@ static void task_storage_map_free(struct bpf_map *map)
 
 	smap = (struct bpf_local_storage_map *)map;
 	bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx);
-	bpf_local_storage_map_free(smap);
+	bpf_local_storage_map_free(smap, &bpf_task_storage_busy);
 }
 
 static int task_storage_map_btf_id;
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 4edd033e899c..cc3712ad8716 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -89,7 +89,7 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
 
 	smap = (struct bpf_local_storage_map *)map;
 	bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx);
-	bpf_local_storage_map_free(smap);
+	bpf_local_storage_map_free(smap, NULL);
 }
 
 static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
-- 
cgit v1.2.3


From 1f87dcf116adedd6b9f47258d4fdf4fa9ce74002 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Thu, 25 Feb 2021 15:43:16 -0800
Subject: selftests/bpf: Add non-BPF_LSM test for task local storage

Task local storage is enabled for tracing programs. Add two tests for
task local storage without CONFIG_BPF_LSM.

The first test stores a value in sys_enter and read it back in sys_exit.

The second test checks whether the kernel allows allocating task local
storage in exit_creds() (which it should not).

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210225234319.336131-4-songliubraving@fb.com
---
 .../selftests/bpf/prog_tests/task_local_storage.c  | 69 ++++++++++++++++++++++
 .../selftests/bpf/progs/task_local_storage.c       | 64 ++++++++++++++++++++
 .../bpf/progs/task_local_storage_exit_creds.c      | 32 ++++++++++
 3 files changed, 165 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/task_local_storage.c
 create mode 100644 tools/testing/selftests/bpf/progs/task_local_storage.c
 create mode 100644 tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c

diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
new file mode 100644
index 000000000000..dbb7525cdd56
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#define _GNU_SOURCE         /* See feature_test_macros(7) */
+#include <unistd.h>
+#include <sys/syscall.h>   /* For SYS_xxx definitions */
+#include <sys/types.h>
+#include <test_progs.h>
+#include "task_local_storage.skel.h"
+#include "task_local_storage_exit_creds.skel.h"
+
+static void test_sys_enter_exit(void)
+{
+	struct task_local_storage *skel;
+	int err;
+
+	skel = task_local_storage__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+		return;
+
+	skel->bss->target_pid = syscall(SYS_gettid);
+
+	err = task_local_storage__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto out;
+
+	syscall(SYS_gettid);
+	syscall(SYS_gettid);
+
+	/* 3x syscalls: 1x attach and 2x gettid */
+	ASSERT_EQ(skel->bss->enter_cnt, 3, "enter_cnt");
+	ASSERT_EQ(skel->bss->exit_cnt, 3, "exit_cnt");
+	ASSERT_EQ(skel->bss->mismatch_cnt, 0, "mismatch_cnt");
+out:
+	task_local_storage__destroy(skel);
+}
+
+static void test_exit_creds(void)
+{
+	struct task_local_storage_exit_creds *skel;
+	int err;
+
+	skel = task_local_storage_exit_creds__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+		return;
+
+	err = task_local_storage_exit_creds__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto out;
+
+	/* trigger at least one exit_creds() */
+	if (CHECK_FAIL(system("ls > /dev/null")))
+		goto out;
+
+	/* sync rcu to make sure exit_creds() is called for "ls" */
+	kern_sync_rcu();
+	ASSERT_EQ(skel->bss->valid_ptr_count, 0, "valid_ptr_count");
+	ASSERT_NEQ(skel->bss->null_ptr_count, 0, "null_ptr_count");
+out:
+	task_local_storage_exit_creds__destroy(skel);
+}
+
+void test_task_local_storage(void)
+{
+	if (test__start_subtest("sys_enter_exit"))
+		test_sys_enter_exit();
+	if (test__start_subtest("exit_creds"))
+		test_exit_creds();
+}
diff --git a/tools/testing/selftests/bpf/progs/task_local_storage.c b/tools/testing/selftests/bpf/progs/task_local_storage.c
new file mode 100644
index 000000000000..80a0a20db88d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_local_storage.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, long);
+} enter_id SEC(".maps");
+
+#define MAGIC_VALUE 0xabcd1234
+
+pid_t target_pid = 0;
+int mismatch_cnt = 0;
+int enter_cnt = 0;
+int exit_cnt = 0;
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+{
+	struct task_struct *task;
+	long *ptr;
+
+	task = bpf_get_current_task_btf();
+	if (task->pid != target_pid)
+		return 0;
+
+	ptr = bpf_task_storage_get(&enter_id, task, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (!ptr)
+		return 0;
+
+	__sync_fetch_and_add(&enter_cnt, 1);
+	*ptr = MAGIC_VALUE + enter_cnt;
+
+	return 0;
+}
+
+SEC("tp_btf/sys_exit")
+int BPF_PROG(on_exit, struct pt_regs *regs, long id)
+{
+	struct task_struct *task;
+	long *ptr;
+
+	task = bpf_get_current_task_btf();
+	if (task->pid != target_pid)
+		return 0;
+
+	ptr = bpf_task_storage_get(&enter_id, task, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (!ptr)
+		return 0;
+
+	__sync_fetch_and_add(&exit_cnt, 1);
+	if (*ptr != MAGIC_VALUE + exit_cnt)
+		__sync_fetch_and_add(&mismatch_cnt, 1);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c b/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c
new file mode 100644
index 000000000000..81758c0aef99
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, __u64);
+} task_storage SEC(".maps");
+
+int valid_ptr_count = 0;
+int null_ptr_count = 0;
+
+SEC("fentry/exit_creds")
+int BPF_PROG(trace_exit_creds, struct task_struct *task)
+{
+	__u64 *ptr;
+
+	ptr = bpf_task_storage_get(&task_storage, task, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (ptr)
+		__sync_fetch_and_add(&valid_ptr_count, 1);
+	else
+		__sync_fetch_and_add(&null_ptr_count, 1);
+	return 0;
+}
-- 
cgit v1.2.3


From c540957a4d1d13934e93e53ce3056ac4108ffc29 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Thu, 25 Feb 2021 15:43:17 -0800
Subject: selftests/bpf: Test deadlock from recursive
 bpf_task_storage_[get|delete]

Add a test with recursive bpf_task_storage_[get|delete] from fentry
programs on bpf_local_storage_lookup and bpf_local_storage_update. Without
proper deadlock prevent mechanism, this test would cause deadlock.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210225234319.336131-5-songliubraving@fb.com
---
 .../selftests/bpf/prog_tests/task_local_storage.c  | 23 +++++++
 .../selftests/bpf/progs/task_ls_recursion.c        | 70 ++++++++++++++++++++++
 2 files changed, 93 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/task_ls_recursion.c

diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
index dbb7525cdd56..035c263aab1b 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
@@ -8,6 +8,7 @@
 #include <test_progs.h>
 #include "task_local_storage.skel.h"
 #include "task_local_storage_exit_creds.skel.h"
+#include "task_ls_recursion.skel.h"
 
 static void test_sys_enter_exit(void)
 {
@@ -60,10 +61,32 @@ out:
 	task_local_storage_exit_creds__destroy(skel);
 }
 
+static void test_recursion(void)
+{
+	struct task_ls_recursion *skel;
+	int err;
+
+	skel = task_ls_recursion__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+		return;
+
+	err = task_ls_recursion__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto out;
+
+	/* trigger sys_enter, make sure it does not cause deadlock */
+	syscall(SYS_gettid);
+
+out:
+	task_ls_recursion__destroy(skel);
+}
+
 void test_task_local_storage(void)
 {
 	if (test__start_subtest("sys_enter_exit"))
 		test_sys_enter_exit();
 	if (test__start_subtest("exit_creds"))
 		test_exit_creds();
+	if (test__start_subtest("recursion"))
+		test_recursion();
 }
diff --git a/tools/testing/selftests/bpf/progs/task_ls_recursion.c b/tools/testing/selftests/bpf/progs/task_ls_recursion.c
new file mode 100644
index 000000000000..564583dca7c8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_ls_recursion.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, long);
+} map_a SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, long);
+} map_b SEC(".maps");
+
+SEC("fentry/bpf_local_storage_lookup")
+int BPF_PROG(on_lookup)
+{
+	struct task_struct *task = bpf_get_current_task_btf();
+
+	bpf_task_storage_delete(&map_a, task);
+	bpf_task_storage_delete(&map_b, task);
+	return 0;
+}
+
+SEC("fentry/bpf_local_storage_update")
+int BPF_PROG(on_update)
+{
+	struct task_struct *task = bpf_get_current_task_btf();
+	long *ptr;
+
+	ptr = bpf_task_storage_get(&map_a, task, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (ptr)
+		*ptr += 1;
+
+	ptr = bpf_task_storage_get(&map_b, task, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (ptr)
+		*ptr += 1;
+
+	return 0;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+{
+	struct task_struct *task;
+	long *ptr;
+
+	task = bpf_get_current_task_btf();
+	ptr = bpf_task_storage_get(&map_a, task, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (ptr)
+		*ptr = 200;
+
+	ptr = bpf_task_storage_get(&map_b, task, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (ptr)
+		*ptr = 100;
+	return 0;
+}
-- 
cgit v1.2.3


From 4b0d2d4156cfb9f27d8e52a33d3522a78002fca1 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Thu, 25 Feb 2021 15:43:18 -0800
Subject: bpf: runqslower: Prefer using local vmlimux to generate vmlinux.h

Update the Makefile to prefer using $(O)/vmlinux, $(KBUILD_OUTPUT)/vmlinux
(for selftests) or ../../../vmlinux. These two files should have latest
definitions for vmlinux.h.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210225234319.336131-6-songliubraving@fb.com
---
 tools/bpf/runqslower/Makefile | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
index 9d9fb6209be1..c96ba90c6f01 100644
--- a/tools/bpf/runqslower/Makefile
+++ b/tools/bpf/runqslower/Makefile
@@ -16,7 +16,10 @@ CFLAGS := -g -Wall
 
 # Try to detect best kernel BTF source
 KERNEL_REL := $(shell uname -r)
-VMLINUX_BTF_PATHS := /sys/kernel/btf/vmlinux /boot/vmlinux-$(KERNEL_REL)
+VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux)		\
+	$(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
+	../../../vmlinux /sys/kernel/btf/vmlinux	\
+	/boot/vmlinux-$(KERNEL_REL)
 VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword			       \
 					  $(wildcard $(VMLINUX_BTF_PATHS))))
 
-- 
cgit v1.2.3


From ced47e30ab8b3ed986e28411f63e041b51c1fdf8 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Thu, 25 Feb 2021 15:43:19 -0800
Subject: bpf: runqslower: Use task local storage

Replace hashtab with task local storage in runqslower. This improves the
performance of these BPF programs. The following table summarizes average
runtime of these programs, in nanoseconds:

                          task-local   hash-prealloc   hash-no-prealloc
handle__sched_wakeup             125             340               3124
handle__sched_wakeup_new        2812            1510               2998
handle__sched_switch             151             208                991

Note that, task local storage gives better performance than hashtab for
handle__sched_wakeup and handle__sched_switch. On the other hand, for
handle__sched_wakeup_new, task local storage is slower than hashtab with
prealloc. This is because handle__sched_wakeup_new accesses the data for
the first time, so it has to allocate the data for task local storage.
Once the initial allocation is done, subsequent accesses, as those in
handle__sched_wakeup, are much faster with task local storage. If we
disable hashtab prealloc, task local storage is much faster for all 3
functions.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210225234319.336131-7-songliubraving@fb.com
---
 tools/bpf/runqslower/runqslower.bpf.c | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c
index 1f18a409f044..645530ca7e98 100644
--- a/tools/bpf/runqslower/runqslower.bpf.c
+++ b/tools/bpf/runqslower/runqslower.bpf.c
@@ -11,9 +11,9 @@ const volatile __u64 min_us = 0;
 const volatile pid_t targ_pid = 0;
 
 struct {
-	__uint(type, BPF_MAP_TYPE_HASH);
-	__uint(max_entries, 10240);
-	__type(key, u32);
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
 	__type(value, u64);
 } start SEC(".maps");
 
@@ -25,15 +25,20 @@ struct {
 
 /* record enqueue timestamp */
 __always_inline
-static int trace_enqueue(u32 tgid, u32 pid)
+static int trace_enqueue(struct task_struct *t)
 {
-	u64 ts;
+	u32 pid = t->pid;
+	u64 *ptr;
 
 	if (!pid || (targ_pid && targ_pid != pid))
 		return 0;
 
-	ts = bpf_ktime_get_ns();
-	bpf_map_update_elem(&start, &pid, &ts, 0);
+	ptr = bpf_task_storage_get(&start, t, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (!ptr)
+		return 0;
+
+	*ptr = bpf_ktime_get_ns();
 	return 0;
 }
 
@@ -43,7 +48,7 @@ int handle__sched_wakeup(u64 *ctx)
 	/* TP_PROTO(struct task_struct *p) */
 	struct task_struct *p = (void *)ctx[0];
 
-	return trace_enqueue(p->tgid, p->pid);
+	return trace_enqueue(p);
 }
 
 SEC("tp_btf/sched_wakeup_new")
@@ -52,7 +57,7 @@ int handle__sched_wakeup_new(u64 *ctx)
 	/* TP_PROTO(struct task_struct *p) */
 	struct task_struct *p = (void *)ctx[0];
 
-	return trace_enqueue(p->tgid, p->pid);
+	return trace_enqueue(p);
 }
 
 SEC("tp_btf/sched_switch")
@@ -70,12 +75,16 @@ int handle__sched_switch(u64 *ctx)
 
 	/* ivcsw: treat like an enqueue event and store timestamp */
 	if (prev->state == TASK_RUNNING)
-		trace_enqueue(prev->tgid, prev->pid);
+		trace_enqueue(prev);
 
 	pid = next->pid;
 
+	/* For pid mismatch, save a bpf_task_storage_get */
+	if (!pid || (targ_pid && targ_pid != pid))
+		return 0;
+
 	/* fetch timestamp and calculate delta */
-	tsp = bpf_map_lookup_elem(&start, &pid);
+	tsp = bpf_task_storage_get(&start, next, 0, 0);
 	if (!tsp)
 		return 0;   /* missed enqueue */
 
@@ -91,7 +100,7 @@ int handle__sched_switch(u64 *ctx)
 	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
 			      &event, sizeof(event));
 
-	bpf_map_delete_elem(&start, &pid);
+	bpf_task_storage_delete(&start, next);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 523a4cf491b3c9e2d546040d57250f1a0ca84f03 Mon Sep 17 00:00:00 2001
From: Dmitrii Banshchikov <me@ubique.spb.ru>
Date: Fri, 26 Feb 2021 00:26:29 +0400
Subject: bpf: Use MAX_BPF_FUNC_REG_ARGS macro

Instead of using integer literal here and there use macro name for
better context.

Signed-off-by: Dmitrii Banshchikov <me@ubique.spb.ru>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210225202629.585485-1-me@ubique.spb.ru
---
 include/linux/bpf.h   |  5 +++++
 kernel/bpf/btf.c      | 25 ++++++++++++++-----------
 kernel/bpf/verifier.c |  2 +-
 3 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e2cfc4809219..ae2c35641619 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -506,6 +506,11 @@ enum bpf_cgroup_storage_type {
  */
 #define MAX_BPF_FUNC_ARGS 12
 
+/* The maximum number of arguments passed through registers
+ * a single function may have.
+ */
+#define MAX_BPF_FUNC_REG_ARGS 5
+
 struct btf_func_model {
 	u8 ret_size;
 	u8 nr_args;
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 2efeb5f4b343..16e8148a28e2 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -4594,8 +4594,10 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 	}
 	arg = off / 8;
 	args = (const struct btf_param *)(t + 1);
-	/* if (t == NULL) Fall back to default BPF prog with 5 u64 arguments */
-	nr_args = t ? btf_type_vlen(t) : 5;
+	/* if (t == NULL) Fall back to default BPF prog with
+	 * MAX_BPF_FUNC_REG_ARGS u64 arguments.
+	 */
+	nr_args = t ? btf_type_vlen(t) : MAX_BPF_FUNC_REG_ARGS;
 	if (prog->aux->attach_btf_trace) {
 		/* skip first 'void *__data' argument in btf_trace_##name typedef */
 		args++;
@@ -4651,7 +4653,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 		}
 	} else {
 		if (!t)
-			/* Default prog with 5 args */
+			/* Default prog with MAX_BPF_FUNC_REG_ARGS args */
 			return true;
 		t = btf_type_by_id(btf, args[arg].type);
 	}
@@ -5102,12 +5104,12 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
 
 	if (!func) {
 		/* BTF function prototype doesn't match the verifier types.
-		 * Fall back to 5 u64 args.
+		 * Fall back to MAX_BPF_FUNC_REG_ARGS u64 args.
 		 */
-		for (i = 0; i < 5; i++)
+		for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++)
 			m->arg_size[i] = 8;
 		m->ret_size = 8;
-		m->nr_args = 5;
+		m->nr_args = MAX_BPF_FUNC_REG_ARGS;
 		return 0;
 	}
 	args = (const struct btf_param *)(func + 1);
@@ -5330,8 +5332,9 @@ int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
 	}
 	args = (const struct btf_param *)(t + 1);
 	nargs = btf_type_vlen(t);
-	if (nargs > 5) {
-		bpf_log(log, "Function %s has %d > 5 args\n", tname, nargs);
+	if (nargs > MAX_BPF_FUNC_REG_ARGS) {
+		bpf_log(log, "Function %s has %d > %d args\n", tname, nargs,
+			MAX_BPF_FUNC_REG_ARGS);
 		goto out;
 	}
 
@@ -5460,9 +5463,9 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
 	}
 	args = (const struct btf_param *)(t + 1);
 	nargs = btf_type_vlen(t);
-	if (nargs > 5) {
-		bpf_log(log, "Global function %s() with %d > 5 args. Buggy compiler.\n",
-			tname, nargs);
+	if (nargs > MAX_BPF_FUNC_REG_ARGS) {
+		bpf_log(log, "Global function %s() with %d > %d args. Buggy compiler.\n",
+			tname, nargs, MAX_BPF_FUNC_REG_ARGS);
 		return -EINVAL;
 	}
 	/* check that function returns int */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1dda9d81f12c..9f7e35590fc6 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5544,7 +5544,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 
 	meta.func_id = func_id;
 	/* check args */
-	for (i = 0; i < 5; i++) {
+	for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
 		err = check_func_arg(env, i, &meta, fn);
 		if (err)
 			return err;
-- 
cgit v1.2.3


From e6ac593372aadacc14e02b198e4a1acfef1db595 Mon Sep 17 00:00:00 2001
From: Brendan Jackman <jackmanb@google.com>
Date: Wed, 17 Feb 2021 10:45:09 +0000
Subject: bpf: Rename fixup_bpf_calls and add some comments

This function has become overloaded, it actually does lots of diverse
things in a single pass. Rename it to avoid confusion, and add some
concise commentary.

Signed-off-by: Brendan Jackman <jackmanb@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210217104509.2423183-1-jackmanb@google.com
---
 kernel/bpf/verifier.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9f7e35590fc6..9336bac39027 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5877,7 +5877,7 @@ static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
 	     aux->alu_limit != alu_limit))
 		return -EACCES;
 
-	/* Corresponding fixup done in fixup_bpf_calls(). */
+	/* Corresponding fixup done in do_misc_fixups(). */
 	aux->alu_state = alu_state;
 	aux->alu_limit = alu_limit;
 	return 0;
@@ -11535,12 +11535,10 @@ static int fixup_call_args(struct bpf_verifier_env *env)
 	return err;
 }
 
-/* fixup insn->imm field of bpf_call instructions
- * and inline eligible helpers as explicit sequence of BPF instructions
- *
- * this function is called after eBPF program passed verification
+/* Do various post-verification rewrites in a single program pass.
+ * These rewrites simplify JIT and interpreter implementations.
  */
-static int fixup_bpf_calls(struct bpf_verifier_env *env)
+static int do_misc_fixups(struct bpf_verifier_env *env)
 {
 	struct bpf_prog *prog = env->prog;
 	bool expect_blinding = bpf_jit_blinding_enabled(prog);
@@ -11555,6 +11553,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 	int i, ret, cnt, delta = 0;
 
 	for (i = 0; i < insn_cnt; i++, insn++) {
+		/* Make divide-by-zero exceptions impossible. */
 		if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
 		    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
 		    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
@@ -11595,6 +11594,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 			continue;
 		}
 
+		/* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
 		if (BPF_CLASS(insn->code) == BPF_LD &&
 		    (BPF_MODE(insn->code) == BPF_ABS ||
 		     BPF_MODE(insn->code) == BPF_IND)) {
@@ -11614,6 +11614,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 			continue;
 		}
 
+		/* Rewrite pointer arithmetic to mitigate speculation attacks. */
 		if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
 		    insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
 			const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
@@ -11835,6 +11836,7 @@ patch_map_ops_generic:
 			goto patch_call_imm;
 		}
 
+		/* Implement bpf_jiffies64 inline. */
 		if (prog->jit_requested && BITS_PER_LONG == 64 &&
 		    insn->imm == BPF_FUNC_jiffies64) {
 			struct bpf_insn ld_jiffies_addr[2] = {
@@ -12645,7 +12647,7 @@ skip_full_check:
 		ret = convert_ctx_accesses(env);
 
 	if (ret == 0)
-		ret = fixup_bpf_calls(env);
+		ret = do_misc_fixups(env);
 
 	/* do 32-bit optimization after insn patching has done so those patched
 	 * insns could be handled correctly.
-- 
cgit v1.2.3


From ecde60614d5ed60fde1c80b38b71582a3ea2e662 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 23 Feb 2021 16:23:01 +0000
Subject: selftest/bpf: Make xsk tests less verbose

Make the xsk tests less verbose by only printing the
essentials. Currently, it is hard to see if the tests passed or not
due to all the printouts. Move the extra printouts to a verbose
option, if further debugging is needed when a problem arises.

To run the xsk tests with verbose output:
./test_xsk.sh -v

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20210223162304.7450-2-ciara.loftus@intel.com
---
 tools/testing/selftests/bpf/test_xsk.sh    | 27 ++++++++++++++++++++------
 tools/testing/selftests/bpf/xdpxceiver.c   | 31 +++++++++++++++++-------------
 tools/testing/selftests/bpf/xdpxceiver.h   | 13 ++++++++-----
 tools/testing/selftests/bpf/xsk_prereqs.sh |  9 +++------
 4 files changed, 50 insertions(+), 30 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 88a7483eaae4..f4cedf4c2718 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -71,13 +71,17 @@
 #
 # Run (full output without color-coding):
 #   sudo ./test_xsk.sh
+#
+# Run with verbose output:
+#   sudo ./test_xsk.sh -v
 
 . xsk_prereqs.sh
 
-while getopts c flag
+while getopts "cv" flag
 do
 	case "${flag}" in
 		c) colorconsole=1;;
+		v) verbose=1;;
 	esac
 done
 
@@ -95,13 +99,17 @@ NS1=af_xdp${VETH1_POSTFIX}
 MTU=1500
 
 setup_vethPairs() {
-	echo "setting up ${VETH0}: namespace: ${NS0}"
+	if [[ $verbose -eq 1 ]]; then
+	        echo "setting up ${VETH0}: namespace: ${NS0}"
+	fi
 	ip netns add ${NS1}
 	ip link add ${VETH0} type veth peer name ${VETH1}
 	if [ -f /proc/net/if_inet6 ]; then
 		echo 1 > /proc/sys/net/ipv6/conf/${VETH0}/disable_ipv6
 	fi
-	echo "setting up ${VETH1}: namespace: ${NS1}"
+	if [[ $verbose -eq 1 ]]; then
+	        echo "setting up ${VETH1}: namespace: ${NS1}"
+	fi
 	ip link set ${VETH1} netns ${NS1}
 	ip netns exec ${NS1} ip link set ${VETH1} mtu ${MTU}
 	ip link set ${VETH0} mtu ${MTU}
@@ -125,7 +133,10 @@ echo "${VETH0}:${VETH1},${NS1}" > ${SPECFILE}
 
 validate_veth_spec_file
 
-echo "Spec file created: ${SPECFILE}"
+if [[ $verbose -eq 1 ]]; then
+        echo "Spec file created: ${SPECFILE}"
+	VERBOSE_ARG="-v"
+fi
 
 test_status $retval "${TEST_NAME}"
 
@@ -136,12 +147,16 @@ statusList=()
 ### TEST 1
 TEST_NAME="XSK KSELFTEST FRAMEWORK"
 
-echo "Switching interfaces [${VETH0}, ${VETH1}] to XDP Generic mode"
+if [[ $verbose -eq 1 ]]; then
+        echo "Switching interfaces [${VETH0}, ${VETH1}] to XDP Generic mode"
+fi
 vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
 
 retval=$?
 if [ $retval -eq 0 ]; then
-	echo "Switching interfaces [${VETH0}, ${VETH1}] to XDP Native mode"
+        if [[ $verbose -eq 1 ]]; then
+	        echo "Switching interfaces [${VETH0}, ${VETH1}] to XDP Native mode"
+	fi
 	vethXDPnative ${VETH0} ${VETH1} ${NS1}
 fi
 
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index f4a96d5ff524..8af746c9a6b6 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -341,6 +341,7 @@ static struct option long_options[] = {
 	{"tear-down", no_argument, 0, 'T'},
 	{"bidi", optional_argument, 0, 'B'},
 	{"debug", optional_argument, 0, 'D'},
+	{"verbose", no_argument, 0, 'v'},
 	{"tx-pkt-count", optional_argument, 0, 'C'},
 	{0, 0, 0, 0}
 };
@@ -359,6 +360,7 @@ static void usage(const char *prog)
 	    "  -T, --tear-down      Tear down sockets by repeatedly recreating them\n"
 	    "  -B, --bidi           Bi-directional sockets test\n"
 	    "  -D, --debug          Debug mode - dump packets L2 - L5\n"
+	    "  -v, --verbose        Verbose output\n"
 	    "  -C, --tx-pkt-count=n Number of packets to send\n";
 	ksft_print_msg(str, prog);
 }
@@ -392,7 +394,7 @@ static void *nsswitchthread(void *args)
 			ksft_test_result_fail("ERROR: [%s] interface \"%s\" does not exist\n",
 					      __func__, ifdict[targs->idx]->ifname);
 		} else {
-			ksft_print_msg("Interface found: %s\n", ifdict[targs->idx]->ifname);
+			print_verbose("Interface found: %s\n", ifdict[targs->idx]->ifname);
 			targs->retptr = true;
 		}
 	}
@@ -422,7 +424,7 @@ static int validate_interfaces(void)
 			pthread_join(ns_thread, NULL);
 
 			if (targs->retptr)
-				ksft_print_msg("NS switched: %s\n", ifdict[i]->nsname);
+				print_verbose("NS switched: %s\n", ifdict[i]->nsname);
 
 			free(targs);
 		} else {
@@ -432,7 +434,7 @@ static int validate_interfaces(void)
 				    ("ERROR: interface \"%s\" does not exist\n", ifdict[i]->ifname);
 				ret = false;
 			} else {
-				ksft_print_msg("Interface found: %s\n", ifdict[i]->ifname);
+				print_verbose("Interface found: %s\n", ifdict[i]->ifname);
 			}
 		}
 	}
@@ -446,7 +448,7 @@ static void parse_command_line(int argc, char **argv)
 	opterr = 0;
 
 	for (;;) {
-		c = getopt_long(argc, argv, "i:q:pSNcTBDC:", long_options, &option_index);
+		c = getopt_long(argc, argv, "i:q:pSNcTBDC:v", long_options, &option_index);
 
 		if (c == -1)
 			break;
@@ -497,6 +499,9 @@ static void parse_command_line(int argc, char **argv)
 		case 'C':
 			opt_pkt_count = atoi(optarg);
 			break;
+		case 'v':
+			opt_verbose = 1;
+			break;
 		default:
 			usage(basename(argv[0]));
 			ksft_exit_xfail();
@@ -714,7 +719,7 @@ static void worker_pkt_dump(void)
 		int payload = *((uint32_t *)(pkt_buf[iter]->payload + PKT_HDR_SIZE));
 
 		if (payload == EOT) {
-			ksft_print_msg("End-of-transmission frame received\n");
+			print_verbose("End-of-transmission frame received\n");
 			fprintf(stdout, "---------------------------------------\n");
 			break;
 		}
@@ -746,7 +751,7 @@ static void worker_pkt_validate(void)
 			}
 
 			if (payloadseqnum == EOT) {
-				ksft_print_msg("End-of-transmission frame received: PASS\n");
+				print_verbose("End-of-transmission frame received: PASS\n");
 				sigvar = 1;
 				break;
 			}
@@ -836,7 +841,7 @@ static void *worker_testapp_validate(void *arg)
 			usleep(USLEEP_MAX);
 		}
 
-		ksft_print_msg("Interface [%s] vector [Tx]\n", ifobject->ifname);
+		print_verbose("Interface [%s] vector [Tx]\n", ifobject->ifname);
 		for (int i = 0; i < num_frames; i++) {
 			/*send EOT frame */
 			if (i == (num_frames - 1))
@@ -850,7 +855,7 @@ static void *worker_testapp_validate(void *arg)
 			gen_eth_frame(ifobject->umem, i * XSK_UMEM__DEFAULT_FRAME_SIZE);
 		}
 
-		ksft_print_msg("Sending %d packets on interface %s\n",
+		print_verbose("Sending %d packets on interface %s\n",
 			       (opt_pkt_count - 1), ifobject->ifname);
 		tx_only_all(ifobject);
 	} else if (ifobject->fv.vector == rx) {
@@ -860,7 +865,7 @@ static void *worker_testapp_validate(void *arg)
 		if (!bidi_pass)
 			thread_common_ops(ifobject, bufs, &sync_mutex_tx, &spinning_rx);
 
-		ksft_print_msg("Interface [%s] vector [Rx]\n", ifobject->ifname);
+		print_verbose("Interface [%s] vector [Rx]\n", ifobject->ifname);
 		xsk_populate_fill_ring(ifobject->umem);
 
 		TAILQ_INIT(&head);
@@ -890,11 +895,11 @@ static void *worker_testapp_validate(void *arg)
 				break;
 		}
 
-		ksft_print_msg("Received %d packets on interface %s\n",
+		print_verbose("Received %d packets on interface %s\n",
 			       pkt_counter, ifobject->ifname);
 
 		if (opt_teardown)
-			ksft_print_msg("Destroying socket\n");
+			print_verbose("Destroying socket\n");
 	}
 
 	if (!opt_bidi || bidi_pass) {
@@ -914,7 +919,7 @@ static void testapp_validate(void)
 	if (opt_bidi && bidi_pass) {
 		pthread_init_mutex();
 		if (!switching_notify) {
-			ksft_print_msg("Switching Tx/Rx vectors\n");
+			print_verbose("Switching Tx/Rx vectors\n");
 			switching_notify++;
 		}
 	}
@@ -974,7 +979,7 @@ static void testapp_sockets(void)
 		pkt_counter = 0;
 		prev_pkt = -1;
 		sigvar = 0;
-		ksft_print_msg("Creating socket\n");
+		print_verbose("Creating socket\n");
 		testapp_validate();
 		opt_bidi ? bidi_pass++ : bidi_pass;
 	}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 0e9f9b7e61c2..f66f399dfb2d 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -42,6 +42,8 @@
 #define POLL_TMOUT 1000
 #define NEED_WAKEUP true
 
+#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
+
 typedef __u32 u32;
 typedef __u16 u16;
 typedef __u8 u8;
@@ -51,11 +53,11 @@ enum TESTS {
 	ORDER_CONTENT_VALIDATE_XDP_DRV = 1,
 };
 
-u8 uut;
-u8 debug_pkt_dump;
-u32 num_frames;
-u8 switching_notify;
-u8 bidi_pass;
+static u8 uut;
+static u8 debug_pkt_dump;
+static u32 num_frames;
+static u8 switching_notify;
+static u8 bidi_pass;
 
 static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 static int opt_queue;
@@ -64,6 +66,7 @@ static int opt_poll;
 static int opt_teardown;
 static int opt_bidi;
 static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
+static u8 opt_verbose;
 static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
 static u32 pkt_counter;
 static u32 prev_pkt = -1;
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
index 9d54c4645127..ef8c5b31f4b6 100755
--- a/tools/testing/selftests/bpf/xsk_prereqs.sh
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -82,24 +82,21 @@ clear_configs()
 {
 	if [ $(ip netns show | grep $3 &>/dev/null; echo $?;) == 0 ]; then
 		[ $(ip netns exec $3 ip link show $2 &>/dev/null; echo $?;) == 0 ] &&
-			{ echo "removing link $1:$2"; ip netns exec $3 ip link del $2; }
-		echo "removing ns $3"
+			{ ip netns exec $3 ip link del $2; }
 		ip netns del $3
 	fi
 	#Once we delete a veth pair node, the entire veth pair is removed,
 	#this is just to be cautious just incase the NS does not exist then
 	#veth node inside NS won't get removed so we explicitly remove it
 	[ $(ip link show $1 &>/dev/null; echo $?;) == 0 ] &&
-		{ echo "removing link $1"; ip link del $1; }
+		{ ip link del $1; }
 	if [ -f ${SPECFILE} ]; then
-		echo "removing spec file:" ${SPECFILE}
 		rm -f ${SPECFILE}
 	fi
 }
 
 cleanup_exit()
 {
-	echo "cleaning up..."
 	clear_configs $1 $2 $3
 }
 
@@ -131,5 +128,5 @@ execxdpxceiver()
 			copy[$index]=${!current}
 		done
 
-	./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${copy[*]} -C ${NUMPKTS}
+	./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${copy[*]} -C ${NUMPKTS} ${VERBOSE_ARG}
 }
-- 
cgit v1.2.3


From d2b0dfd5d1f94fe74ed580b5a1d5fdb5bf11f2fb Mon Sep 17 00:00:00 2001
From: Ciara Loftus <ciara.loftus@intel.com>
Date: Tue, 23 Feb 2021 16:23:02 +0000
Subject: selftests/bpf: Expose and rename debug argument

Launching xdpxceiver with -D enables what was formerly know as 'debug'
mode. Rename this mode to 'dump-pkts' as it better describes the
behavior enabled by the option. New usage:

./xdpxceiver .. -D
or
./xdpxceiver .. --dump-pkts

Also make it possible to pass this flag to the app via the test_xsk.sh
shell script like so:

./test_xsk.sh -D

Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210223162304.7450-3-ciara.loftus@intel.com
---
 tools/testing/selftests/bpf/test_xsk.sh    | 10 +++++++++-
 tools/testing/selftests/bpf/xdpxceiver.c   |  6 +++---
 tools/testing/selftests/bpf/xsk_prereqs.sh |  3 ++-
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index f4cedf4c2718..dbb129a36606 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -74,14 +74,18 @@
 #
 # Run with verbose output:
 #   sudo ./test_xsk.sh -v
+#
+# Run and dump packet contents:
+#   sudo ./test_xsk.sh -D
 
 . xsk_prereqs.sh
 
-while getopts "cv" flag
+while getopts "cvD" flag
 do
 	case "${flag}" in
 		c) colorconsole=1;;
 		v) verbose=1;;
+		D) dump_pkts=1;;
 	esac
 done
 
@@ -138,6 +142,10 @@ if [[ $verbose -eq 1 ]]; then
 	VERBOSE_ARG="-v"
 fi
 
+if [[ $dump_pkts -eq 1 ]]; then
+	DUMP_PKTS_ARG="-D"
+fi
+
 test_status $retval "${TEST_NAME}"
 
 ## START TESTS
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 8af746c9a6b6..506423201197 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -58,7 +58,7 @@
  * - Rx thread verifies if all 10k packets were received and delivered in-order,
  *   and have the right content
  *
- * Enable/disable debug mode:
+ * Enable/disable packet dump mode:
  * --------------------------
  * To enable L2 - L4 headers and payload dump of each packet on STDOUT, add
  * parameter -D to params array in test_xsk.sh, i.e. params=("-S" "-D")
@@ -340,7 +340,7 @@ static struct option long_options[] = {
 	{"copy", no_argument, 0, 'c'},
 	{"tear-down", no_argument, 0, 'T'},
 	{"bidi", optional_argument, 0, 'B'},
-	{"debug", optional_argument, 0, 'D'},
+	{"dump-pkts", optional_argument, 0, 'D'},
 	{"verbose", no_argument, 0, 'v'},
 	{"tx-pkt-count", optional_argument, 0, 'C'},
 	{0, 0, 0, 0}
@@ -359,7 +359,7 @@ static void usage(const char *prog)
 	    "  -c, --copy           Force copy mode\n"
 	    "  -T, --tear-down      Tear down sockets by repeatedly recreating them\n"
 	    "  -B, --bidi           Bi-directional sockets test\n"
-	    "  -D, --debug          Debug mode - dump packets L2 - L5\n"
+	    "  -D, --dump-pkts      Dump packets L2 - L5\n"
 	    "  -v, --verbose        Verbose output\n"
 	    "  -C, --tx-pkt-count=n Number of packets to send\n";
 	ksft_print_msg(str, prog);
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
index ef8c5b31f4b6..da93575d757a 100755
--- a/tools/testing/selftests/bpf/xsk_prereqs.sh
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -128,5 +128,6 @@ execxdpxceiver()
 			copy[$index]=${!current}
 		done
 
-	./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${copy[*]} -C ${NUMPKTS} ${VERBOSE_ARG}
+	./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${copy[*]} -C ${NUMPKTS} ${VERBOSE_ARG} \
+		${DUMP_PKTS_ARG}
 }
-- 
cgit v1.2.3


From d3e3bf5b4c673e79c5a11608f53d4e0059a7ec79 Mon Sep 17 00:00:00 2001
From: Ciara Loftus <ciara.loftus@intel.com>
Date: Tue, 23 Feb 2021 16:23:03 +0000
Subject: selftests/bpf: Restructure xsk selftests

Prior to this commit individual xsk tests were launched from the
shell script 'test_xsk.sh'. When adding a new test type, two new test
configurations had to be added to this file - one for each of the
supported XDP 'modes' (skb or drv). Should zero copy support be added to
the xsk selftest framework in the future, three new test configurations
would need to be added for each new test type. Each new test type also
typically requires new CLI arguments for the xdpxceiver program.

This commit aims to reduce the overhead of adding new tests, by launching
the test configurations from within the xdpxceiver program itself, using
simple loops. Every test is run every time the C program is executed. Many
of the CLI arguments can be removed as a result.

Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20210223162304.7450-4-ciara.loftus@intel.com
---
 tools/testing/selftests/bpf/test_xsk.sh    | 112 +--------------
 tools/testing/selftests/bpf/xdpxceiver.c   | 218 ++++++++++++++++++-----------
 tools/testing/selftests/bpf/xdpxceiver.h   |  33 +++--
 tools/testing/selftests/bpf/xsk_prereqs.sh |  24 +---
 4 files changed, 159 insertions(+), 228 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index dbb129a36606..56d4474e2c83 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -152,117 +152,9 @@ test_status $retval "${TEST_NAME}"
 
 statusList=()
 
-### TEST 1
-TEST_NAME="XSK KSELFTEST FRAMEWORK"
+TEST_NAME="XSK KSELFTESTS"
 
-if [[ $verbose -eq 1 ]]; then
-        echo "Switching interfaces [${VETH0}, ${VETH1}] to XDP Generic mode"
-fi
-vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
-
-retval=$?
-if [ $retval -eq 0 ]; then
-        if [[ $verbose -eq 1 ]]; then
-	        echo "Switching interfaces [${VETH0}, ${VETH1}] to XDP Native mode"
-	fi
-	vethXDPnative ${VETH0} ${VETH1} ${NS1}
-fi
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 2
-TEST_NAME="SKB NOPOLL"
-
-vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
-
-params=("-S")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 3
-TEST_NAME="SKB POLL"
-
-vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
-
-params=("-S" "-p")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 4
-TEST_NAME="DRV NOPOLL"
-
-vethXDPnative ${VETH0} ${VETH1} ${NS1}
-
-params=("-N")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 5
-TEST_NAME="DRV POLL"
-
-vethXDPnative ${VETH0} ${VETH1} ${NS1}
-
-params=("-N" "-p")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 6
-TEST_NAME="SKB SOCKET TEARDOWN"
-
-vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
-
-params=("-S" "-T")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 7
-TEST_NAME="DRV SOCKET TEARDOWN"
-
-vethXDPnative ${VETH0} ${VETH1} ${NS1}
-
-params=("-N" "-T")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 8
-TEST_NAME="SKB BIDIRECTIONAL SOCKETS"
-
-vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
-
-params=("-S" "-B")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 9
-TEST_NAME="DRV BIDIRECTIONAL SOCKETS"
-
-vethXDPnative ${VETH0} ${VETH1} ${NS1}
-
-params=("-N" "-B")
-execxdpxceiver params
+execxdpxceiver
 
 retval=$?
 test_status $retval "${TEST_NAME}"
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 506423201197..e7913444518d 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -18,12 +18,7 @@
  * These selftests test AF_XDP SKB and Native/DRV modes using veth
  * Virtual Ethernet interfaces.
  *
- * The following tests are run:
- *
- * 1. AF_XDP SKB mode
- *    Generic mode XDP is driver independent, used when the driver does
- *    not have support for XDP. Works on any netdevice using sockets and
- *    generic XDP path. XDP hook from netif_receive_skb().
+ * For each mode, the following tests are run:
  *    a. nopoll - soft-irq processing
  *    b. poll - using poll() syscall
  *    c. Socket Teardown
@@ -34,17 +29,6 @@
  *       completion rings on each socket, tx/rx in both directions. Only nopoll
  *       mode is used
  *
- * 2. AF_XDP DRV/Native mode
- *    Works on any netdevice with XDP_REDIRECT support, driver dependent. Processes
- *    packets before SKB allocation. Provides better performance than SKB. Driver
- *    hook available just after DMA of buffer descriptor.
- *    a. nopoll
- *    b. poll
- *    c. Socket Teardown
- *    d. Bi-directional sockets
- *    - Only copy mode is supported because veth does not currently support
- *      zero-copy mode
- *
  * Total tests: 8
  *
  * Flow:
@@ -98,17 +82,23 @@ typedef __u16 __sum16;
 
 static void __exit_with_error(int error, const char *file, const char *func, int line)
 {
-	ksft_test_result_fail
-	    ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
-	ksft_exit_xfail();
+	if (configured_mode == TEST_MODE_UNCONFIGURED) {
+		ksft_exit_fail_msg
+		("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
+	} else {
+		ksft_test_result_fail
+		("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
+		ksft_exit_xfail();
+	}
 }
 
 #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
 
 #define print_ksft_result(void)\
-	(ksft_test_result_pass("PASS: %s %s %s%s\n", uut ? "DRV" : "SKB", opt_poll ? "POLL" :\
-			       "NOPOLL", opt_teardown ? "Socket Teardown" : "",\
-			       opt_bidi ? "Bi-directional Sockets" : ""))
+	(ksft_test_result_pass("PASS: %s %s %s%s\n", configured_mode ? "DRV" : "SKB",\
+			       test_type == TEST_TYPE_POLL ? "POLL" : "NOPOLL",\
+			       test_type == TEST_TYPE_TEARDOWN ? "Socket Teardown" : "",\
+			       test_type == TEST_TYPE_BIDI ? "Bi-directional Sockets" : ""))
 
 static void pthread_init_mutex(void)
 {
@@ -311,10 +301,10 @@ static int xsk_configure_socket(struct ifobject *ifobject)
 	cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
 	cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
 	cfg.libbpf_flags = 0;
-	cfg.xdp_flags = opt_xdp_flags;
-	cfg.bind_flags = opt_xdp_bind_flags;
+	cfg.xdp_flags = xdp_flags;
+	cfg.bind_flags = xdp_bind_flags;
 
-	if (!opt_bidi) {
+	if (test_type != TEST_TYPE_BIDI) {
 		rxr = (ifobject->fv.vector == rx) ? &ifobject->xsk->rx : NULL;
 		txr = (ifobject->fv.vector == tx) ? &ifobject->xsk->tx : NULL;
 	} else {
@@ -334,12 +324,6 @@ static int xsk_configure_socket(struct ifobject *ifobject)
 static struct option long_options[] = {
 	{"interface", required_argument, 0, 'i'},
 	{"queue", optional_argument, 0, 'q'},
-	{"poll", no_argument, 0, 'p'},
-	{"xdp-skb", no_argument, 0, 'S'},
-	{"xdp-native", no_argument, 0, 'N'},
-	{"copy", no_argument, 0, 'c'},
-	{"tear-down", no_argument, 0, 'T'},
-	{"bidi", optional_argument, 0, 'B'},
 	{"dump-pkts", optional_argument, 0, 'D'},
 	{"verbose", no_argument, 0, 'v'},
 	{"tx-pkt-count", optional_argument, 0, 'C'},
@@ -353,12 +337,6 @@ static void usage(const char *prog)
 	    "  Options:\n"
 	    "  -i, --interface      Use interface\n"
 	    "  -q, --queue=n        Use queue n (default 0)\n"
-	    "  -p, --poll           Use poll syscall\n"
-	    "  -S, --xdp-skb=n      Use XDP SKB mode\n"
-	    "  -N, --xdp-native=n   Enforce XDP DRV (native) mode\n"
-	    "  -c, --copy           Force copy mode\n"
-	    "  -T, --tear-down      Tear down sockets by repeatedly recreating them\n"
-	    "  -B, --bidi           Bi-directional sockets test\n"
 	    "  -D, --dump-pkts      Dump packets L2 - L5\n"
 	    "  -v, --verbose        Verbose output\n"
 	    "  -C, --tx-pkt-count=n Number of packets to send\n";
@@ -448,7 +426,7 @@ static void parse_command_line(int argc, char **argv)
 	opterr = 0;
 
 	for (;;) {
-		c = getopt_long(argc, argv, "i:q:pSNcTBDC:v", long_options, &option_index);
+		c = getopt_long(argc, argv, "i:q:DC:v", long_options, &option_index);
 
 		if (c == -1)
 			break;
@@ -471,28 +449,6 @@ static void parse_command_line(int argc, char **argv)
 		case 'q':
 			opt_queue = atoi(optarg);
 			break;
-		case 'p':
-			opt_poll = 1;
-			break;
-		case 'S':
-			opt_xdp_flags |= XDP_FLAGS_SKB_MODE;
-			opt_xdp_bind_flags |= XDP_COPY;
-			uut = ORDER_CONTENT_VALIDATE_XDP_SKB;
-			break;
-		case 'N':
-			opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
-			opt_xdp_bind_flags |= XDP_COPY;
-			uut = ORDER_CONTENT_VALIDATE_XDP_DRV;
-			break;
-		case 'c':
-			opt_xdp_bind_flags |= XDP_COPY;
-			break;
-		case 'T':
-			opt_teardown = 1;
-			break;
-		case 'B':
-			opt_bidi = 1;
-			break;
 		case 'D':
 			debug_pkt_dump = 1;
 			break;
@@ -508,6 +464,11 @@ static void parse_command_line(int argc, char **argv)
 		}
 	}
 
+	if (!opt_pkt_count) {
+		print_verbose("No tx-pkt-count specified, using default %u\n", DEFAULT_PKT_CNT);
+		opt_pkt_count = DEFAULT_PKT_CNT;
+	}
+
 	if (!validate_interfaces()) {
 		usage(basename(argv[0]));
 		ksft_exit_xfail();
@@ -659,7 +620,7 @@ static void tx_only_all(struct ifobject *ifobject)
 	while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
 		int batch_size = get_batch_size(pkt_cnt);
 
-		if (opt_poll) {
+		if (test_type == TEST_TYPE_POLL) {
 			ret = poll(fds, 1, POLL_TMOUT);
 			if (ret <= 0)
 				continue;
@@ -883,7 +844,7 @@ static void *worker_testapp_validate(void *arg)
 		pthread_mutex_unlock(&sync_mutex);
 
 		while (1) {
-			if (opt_poll) {
+			if (test_type == TEST_TYPE_POLL) {
 				ret = poll(fds, 1, POLL_TMOUT);
 				if (ret <= 0)
 					continue;
@@ -898,11 +859,11 @@ static void *worker_testapp_validate(void *arg)
 		print_verbose("Received %d packets on interface %s\n",
 			       pkt_counter, ifobject->ifname);
 
-		if (opt_teardown)
+		if (test_type == TEST_TYPE_TEARDOWN)
 			print_verbose("Destroying socket\n");
 	}
 
-	if (!opt_bidi || bidi_pass) {
+	if ((test_type != TEST_TYPE_BIDI) || bidi_pass) {
 		xsk_socket__delete(ifobject->xsk->xsk);
 		(void)xsk_umem__delete(ifobject->umem->umem);
 	}
@@ -912,11 +873,12 @@ static void *worker_testapp_validate(void *arg)
 static void testapp_validate(void)
 {
 	struct timespec max_wait = { 0, 0 };
+	bool bidi = test_type == TEST_TYPE_BIDI;
 
 	pthread_attr_init(&attr);
 	pthread_attr_setstacksize(&attr, THREAD_STACK);
 
-	if (opt_bidi && bidi_pass) {
+	if ((test_type == TEST_TYPE_BIDI) && bidi_pass) {
 		pthread_init_mutex();
 		if (!switching_notify) {
 			print_verbose("Switching Tx/Rx vectors\n");
@@ -927,10 +889,10 @@ static void testapp_validate(void)
 	pthread_mutex_lock(&sync_mutex);
 
 	/*Spawn RX thread */
-	if (!opt_bidi || !bidi_pass) {
+	if (!bidi || !bidi_pass) {
 		if (pthread_create(&t0, &attr, worker_testapp_validate, ifdict[1]))
 			exit_with_error(errno);
-	} else if (opt_bidi && bidi_pass) {
+	} else if (bidi && bidi_pass) {
 		/*switch Tx/Rx vectors */
 		ifdict[0]->fv.vector = rx;
 		if (pthread_create(&t0, &attr, worker_testapp_validate, ifdict[0]))
@@ -947,10 +909,10 @@ static void testapp_validate(void)
 	pthread_mutex_unlock(&sync_mutex);
 
 	/*Spawn TX thread */
-	if (!opt_bidi || !bidi_pass) {
+	if (!bidi || !bidi_pass) {
 		if (pthread_create(&t1, &attr, worker_testapp_validate, ifdict[0]))
 			exit_with_error(errno);
-	} else if (opt_bidi && bidi_pass) {
+	} else if (bidi && bidi_pass) {
 		/*switch Tx/Rx vectors */
 		ifdict[1]->fv.vector = tx;
 		if (pthread_create(&t1, &attr, worker_testapp_validate, ifdict[1]))
@@ -969,19 +931,20 @@ static void testapp_validate(void)
 		free(pkt_buf);
 	}
 
-	if (!opt_teardown && !opt_bidi)
+	if (!(test_type == TEST_TYPE_TEARDOWN) && !bidi)
 		print_ksft_result();
 }
 
 static void testapp_sockets(void)
 {
-	for (int i = 0; i < (opt_teardown ? MAX_TEARDOWN_ITER : MAX_BIDI_ITER); i++) {
+	for (int i = 0; i < ((test_type == TEST_TYPE_TEARDOWN) ? MAX_TEARDOWN_ITER : MAX_BIDI_ITER);
+	     i++) {
 		pkt_counter = 0;
 		prev_pkt = -1;
 		sigvar = 0;
 		print_verbose("Creating socket\n");
 		testapp_validate();
-		opt_bidi ? bidi_pass++ : bidi_pass;
+		test_type == TEST_TYPE_BIDI ? bidi_pass++ : bidi_pass;
 	}
 
 	print_ksft_result();
@@ -1008,6 +971,98 @@ static void init_iface_config(struct ifaceconfigobj *ifaceconfig)
 	ifdict[1]->src_port = ifaceconfig->dst_port;
 }
 
+static void *nsdisablemodethread(void *args)
+{
+	struct targs *targs = args;
+
+	targs->retptr = false;
+
+	if (switch_namespace(targs->idx)) {
+		targs->retptr = bpf_set_link_xdp_fd(ifdict[targs->idx]->ifindex, -1, targs->flags);
+	} else {
+		targs->retptr = errno;
+		print_verbose("Failed to switch namespace to %s\n", ifdict[targs->idx]->nsname);
+	}
+
+	pthread_exit(NULL);
+}
+
+static void disable_xdp_mode(int mode)
+{
+	int err = 0;
+	__u32 flags = XDP_FLAGS_UPDATE_IF_NOEXIST | mode;
+	char *mode_str = mode & XDP_FLAGS_SKB_MODE ? "skb" : "drv";
+
+	for (int i = 0; i < MAX_INTERFACES; i++) {
+		if (strcmp(ifdict[i]->nsname, "")) {
+			struct targs *targs;
+
+			targs = malloc(sizeof(*targs));
+			memset(targs, 0, sizeof(*targs));
+			if (!targs)
+				exit_with_error(errno);
+
+			targs->idx = i;
+			targs->flags = flags;
+			if (pthread_create(&ns_thread, NULL, nsdisablemodethread, targs))
+				exit_with_error(errno);
+
+			pthread_join(ns_thread, NULL);
+			err = targs->retptr;
+			free(targs);
+		} else {
+			err = bpf_set_link_xdp_fd(ifdict[i]->ifindex, -1, flags);
+		}
+
+		if (err) {
+			print_verbose("Failed to disable %s mode on interface %s\n",
+						mode_str, ifdict[i]->ifname);
+			exit_with_error(err);
+		}
+
+		print_verbose("Disabled %s mode for interface: %s\n", mode_str, ifdict[i]->ifname);
+		configured_mode = mode & XDP_FLAGS_SKB_MODE ? TEST_MODE_DRV : TEST_MODE_SKB;
+	}
+}
+
+static void run_pkt_test(int mode, int type)
+{
+	test_type = type;
+
+	/* reset defaults after potential previous test */
+	xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+	pkt_counter = 0;
+	switching_notify = 0;
+	bidi_pass = 0;
+	prev_pkt = -1;
+	ifdict[0]->fv.vector = tx;
+	ifdict[1]->fv.vector = rx;
+
+	switch (mode) {
+	case (TEST_MODE_SKB):
+		if (configured_mode == TEST_MODE_DRV)
+			disable_xdp_mode(XDP_FLAGS_DRV_MODE);
+		xdp_flags |= XDP_FLAGS_SKB_MODE;
+		break;
+	case (TEST_MODE_DRV):
+		if (configured_mode == TEST_MODE_SKB)
+			disable_xdp_mode(XDP_FLAGS_SKB_MODE);
+		xdp_flags |= XDP_FLAGS_DRV_MODE;
+		break;
+	default:
+		break;
+	}
+
+	pthread_init_mutex();
+
+	if ((test_type != TEST_TYPE_TEARDOWN) && (test_type != TEST_TYPE_BIDI))
+		testapp_validate();
+	else
+		testapp_sockets();
+
+	pthread_destroy_mutex();
+}
+
 int main(int argc, char **argv)
 {
 	struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY };
@@ -1021,6 +1076,7 @@ int main(int argc, char **argv)
 	const char *IP2 = "192.168.100.161";
 	u16 UDP_DST_PORT = 2020;
 	u16 UDP_SRC_PORT = 2121;
+	int i, j;
 
 	ifaceconfig = malloc(sizeof(struct ifaceconfigobj));
 	memcpy(ifaceconfig->dst_mac, MAC1, ETH_ALEN);
@@ -1046,24 +1102,18 @@ int main(int argc, char **argv)
 
 	init_iface_config(ifaceconfig);
 
-	pthread_init_mutex();
+	disable_xdp_mode(XDP_FLAGS_DRV_MODE);
 
-	ksft_set_plan(1);
+	ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX);
 
-	if (!opt_teardown && !opt_bidi) {
-		testapp_validate();
-	} else if (opt_teardown && opt_bidi) {
-		ksft_test_result_fail("ERROR: parameters -T and -B cannot be used together\n");
-		ksft_exit_xfail();
-	} else {
-		testapp_sockets();
+	for (i = 0; i < TEST_MODE_MAX; i++) {
+		for (j = 0; j < TEST_TYPE_MAX; j++)
+			run_pkt_test(i, j);
 	}
 
 	for (int i = 0; i < MAX_INTERFACES; i++)
 		free(ifdict[i]);
 
-	pthread_destroy_mutex();
-
 	ksft_exit_pass();
 
 	return 0;
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index f66f399dfb2d..e05703f661f8 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -41,6 +41,7 @@
 #define BATCH_SIZE 64
 #define POLL_TMOUT 1000
 #define NEED_WAKEUP true
+#define DEFAULT_PKT_CNT 10000
 
 #define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
 
@@ -48,28 +49,37 @@ typedef __u32 u32;
 typedef __u16 u16;
 typedef __u8 u8;
 
-enum TESTS {
-	ORDER_CONTENT_VALIDATE_XDP_SKB = 0,
-	ORDER_CONTENT_VALIDATE_XDP_DRV = 1,
+enum TEST_MODES {
+	TEST_MODE_UNCONFIGURED = -1,
+	TEST_MODE_SKB,
+	TEST_MODE_DRV,
+	TEST_MODE_MAX
 };
 
-static u8 uut;
+enum TEST_TYPES {
+	TEST_TYPE_NOPOLL,
+	TEST_TYPE_POLL,
+	TEST_TYPE_TEARDOWN,
+	TEST_TYPE_BIDI,
+	TEST_TYPE_MAX
+};
+
+static int configured_mode = TEST_MODE_UNCONFIGURED;
 static u8 debug_pkt_dump;
 static u32 num_frames;
 static u8 switching_notify;
 static u8 bidi_pass;
+static int test_type;
 
-static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 static int opt_queue;
 static int opt_pkt_count;
-static int opt_poll;
-static int opt_teardown;
-static int opt_bidi;
-static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
 static u8 opt_verbose;
+
+static u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static u32 xdp_bind_flags = XDP_USE_NEED_WAKEUP | XDP_COPY;
 static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
 static u32 pkt_counter;
-static u32 prev_pkt = -1;
+static long prev_pkt = -1;
 static int sigvar;
 
 struct xsk_umem_info {
@@ -140,8 +150,9 @@ pthread_t t0, t1, ns_thread;
 pthread_attr_t attr;
 
 struct targs {
-	bool retptr;
+	u8 retptr;
 	int idx;
+	u32 flags;
 };
 
 TAILQ_HEAD(head_s, pkt) head = TAILQ_HEAD_INITIALIZER(head);
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
index da93575d757a..dac1c5f78752 100755
--- a/tools/testing/selftests/bpf/xsk_prereqs.sh
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -105,29 +105,7 @@ validate_ip_utility()
 	[ ! $(type -P ip) ] && { echo "'ip' not found. Skipping tests."; test_exit $ksft_skip 1; }
 }
 
-vethXDPgeneric()
-{
-	ip link set dev $1 xdpdrv off
-	ip netns exec $3 ip link set dev $2 xdpdrv off
-}
-
-vethXDPnative()
-{
-	ip link set dev $1 xdpgeneric off
-	ip netns exec $3 ip link set dev $2 xdpgeneric off
-}
-
 execxdpxceiver()
 {
-	local -a 'paramkeys=("${!'"$1"'[@]}")' copy
-	paramkeysstr=${paramkeys[*]}
-
-	for index in $paramkeysstr;
-		do
-			current=$1"[$index]"
-			copy[$index]=${!current}
-		done
-
-	./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${copy[*]} -C ${NUMPKTS} ${VERBOSE_ARG} \
-		${DUMP_PKTS_ARG}
+	./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} -C ${NUMPKTS} ${VERBOSE_ARG} ${DUMP_PKTS_ARG}
 }
-- 
cgit v1.2.3


From b267e5a458a719f3f5eaaaebe87c5f4a13584832 Mon Sep 17 00:00:00 2001
From: Ciara Loftus <ciara.loftus@intel.com>
Date: Tue, 23 Feb 2021 16:23:04 +0000
Subject: selftests/bpf: Introduce xsk statistics tests

This commit introduces a range of tests to the xsk testsuite
for validating xsk statistics.

A new test type called 'stats' is added. Within it there are
four sub-tests. Each test configures a scenario which should
trigger the given error statistic. The test passes if the statistic
is successfully incremented.

The four statistics for which tests have been created are:
1. rx dropped
Increase the UMEM frame headroom to a value which results in
insufficient space in the rx buffer for both the packet and the headroom.
2. tx invalid
Set the 'len' field of tx descriptors to an invalid value (umem frame
size + 1).
3. rx ring full
Reduce the size of the RX ring to a fraction of the fill ring size.
4. fill queue empty
Do not populate the fill queue and then try to receive pkts.

Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20210223162304.7450-5-ciara.loftus@intel.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 137 +++++++++++++++++++++++++++----
 tools/testing/selftests/bpf/xdpxceiver.h |  13 +++
 2 files changed, 136 insertions(+), 14 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index e7913444518d..8b0f7fdd9003 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -28,8 +28,21 @@
  *       Configure sockets as bi-directional tx/rx sockets, sets up fill and
  *       completion rings on each socket, tx/rx in both directions. Only nopoll
  *       mode is used
+ *    e. Statistics
+ *       Trigger some error conditions and ensure that the appropriate statistics
+ *       are incremented. Within this test, the following statistics are tested:
+ *       i.   rx dropped
+ *            Increase the UMEM frame headroom to a value which results in
+ *            insufficient space in the rx buffer for both the packet and the headroom.
+ *       ii.  tx invalid
+ *            Set the 'len' field of tx descriptors to an invalid value (umem frame
+ *            size + 1).
+ *       iii. rx ring full
+ *            Reduce the size of the RX ring to a fraction of the fill ring size.
+ *       iv.  fill queue empty
+ *            Do not populate the fill queue and then try to receive pkts.
  *
- * Total tests: 8
+ * Total tests: 10
  *
  * Flow:
  * -----
@@ -95,10 +108,11 @@ static void __exit_with_error(int error, const char *file, const char *func, int
 #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
 
 #define print_ksft_result(void)\
-	(ksft_test_result_pass("PASS: %s %s %s%s\n", configured_mode ? "DRV" : "SKB",\
+	(ksft_test_result_pass("PASS: %s %s %s%s%s\n", configured_mode ? "DRV" : "SKB",\
 			       test_type == TEST_TYPE_POLL ? "POLL" : "NOPOLL",\
 			       test_type == TEST_TYPE_TEARDOWN ? "Socket Teardown" : "",\
-			       test_type == TEST_TYPE_BIDI ? "Bi-directional Sockets" : ""))
+			       test_type == TEST_TYPE_BIDI ? "Bi-directional Sockets" : "",\
+			       test_type == TEST_TYPE_STATS ? "Stats" : ""))
 
 static void pthread_init_mutex(void)
 {
@@ -260,13 +274,20 @@ static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
 static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size)
 {
 	int ret;
+	struct xsk_umem_config cfg = {
+		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+		.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
+		.frame_headroom = frame_headroom,
+		.flags = XSK_UMEM__DEFAULT_FLAGS
+	};
 
 	data->umem = calloc(1, sizeof(struct xsk_umem_info));
 	if (!data->umem)
 		exit_with_error(errno);
 
 	ret = xsk_umem__create(&data->umem->umem, buffer, size,
-			       &data->umem->fq, &data->umem->cq, NULL);
+			       &data->umem->fq, &data->umem->cq, &cfg);
 	if (ret)
 		exit_with_error(ret);
 
@@ -298,7 +319,7 @@ static int xsk_configure_socket(struct ifobject *ifobject)
 		exit_with_error(errno);
 
 	ifobject->xsk->umem = ifobject->umem;
-	cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+	cfg.rx_size = rxqsize;
 	cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
 	cfg.libbpf_flags = 0;
 	cfg.xdp_flags = xdp_flags;
@@ -565,6 +586,8 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frameptr, int batch_size)
 {
 	u32 idx;
 	unsigned int i;
+	bool tx_invalid_test = stat_test_type == STAT_TEST_TX_INVALID;
+	u32 len = tx_invalid_test ? XSK_UMEM__DEFAULT_FRAME_SIZE + 1 : PKT_SIZE;
 
 	while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < batch_size)
 		complete_tx_only(xsk, batch_size);
@@ -573,11 +596,16 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frameptr, int batch_size)
 		struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
 
 		tx_desc->addr = (*frameptr + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
-		tx_desc->len = PKT_SIZE;
+		tx_desc->len = len;
 	}
 
 	xsk_ring_prod__submit(&xsk->tx, batch_size);
-	xsk->outstanding_tx += batch_size;
+	if (!tx_invalid_test) {
+		xsk->outstanding_tx += batch_size;
+	} else {
+		if (!NEED_WAKEUP || xsk_ring_prod__needs_wakeup(&xsk->tx))
+			kick_tx(xsk);
+	}
 	*frameptr += batch_size;
 	*frameptr %= num_frames;
 	complete_tx_only(xsk, batch_size);
@@ -689,6 +717,48 @@ static void worker_pkt_dump(void)
 	}
 }
 
+static void worker_stats_validate(struct ifobject *ifobject)
+{
+	struct xdp_statistics stats;
+	socklen_t optlen;
+	int err;
+	struct xsk_socket *xsk = stat_test_type == STAT_TEST_TX_INVALID ?
+							ifdict[!ifobject->ifdict_index]->xsk->xsk :
+							ifobject->xsk->xsk;
+	int fd = xsk_socket__fd(xsk);
+	unsigned long xsk_stat = 0, expected_stat = opt_pkt_count;
+
+	sigvar = 0;
+
+	optlen = sizeof(stats);
+	err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
+	if (err)
+		return;
+
+	if (optlen == sizeof(struct xdp_statistics)) {
+		switch (stat_test_type) {
+		case STAT_TEST_RX_DROPPED:
+			xsk_stat = stats.rx_dropped;
+			break;
+		case STAT_TEST_TX_INVALID:
+			xsk_stat = stats.tx_invalid_descs;
+			break;
+		case STAT_TEST_RX_FULL:
+			xsk_stat = stats.rx_ring_full;
+			expected_stat -= RX_FULL_RXQSIZE;
+			break;
+		case STAT_TEST_RX_FILL_EMPTY:
+			xsk_stat = stats.rx_fill_ring_empty_descs;
+			break;
+		default:
+			break;
+		}
+
+		if (xsk_stat == expected_stat)
+			sigvar = 1;
+	}
+}
+
 static void worker_pkt_validate(void)
 {
 	u32 payloadseqnum = -2;
@@ -827,7 +897,8 @@ static void *worker_testapp_validate(void *arg)
 			thread_common_ops(ifobject, bufs, &sync_mutex_tx, &spinning_rx);
 
 		print_verbose("Interface [%s] vector [Rx]\n", ifobject->ifname);
-		xsk_populate_fill_ring(ifobject->umem);
+		if (stat_test_type != STAT_TEST_RX_FILL_EMPTY)
+			xsk_populate_fill_ring(ifobject->umem);
 
 		TAILQ_INIT(&head);
 		if (debug_pkt_dump) {
@@ -849,15 +920,21 @@ static void *worker_testapp_validate(void *arg)
 				if (ret <= 0)
 					continue;
 			}
-			rx_pkt(ifobject->xsk, fds);
-			worker_pkt_validate();
+
+			if (test_type != TEST_TYPE_STATS) {
+				rx_pkt(ifobject->xsk, fds);
+				worker_pkt_validate();
+			} else {
+				worker_stats_validate(ifobject);
+			}
 
 			if (sigvar)
 				break;
 		}
 
-		print_verbose("Received %d packets on interface %s\n",
-			       pkt_counter, ifobject->ifname);
+		if (test_type != TEST_TYPE_STATS)
+			print_verbose("Received %d packets on interface %s\n",
+				pkt_counter, ifobject->ifname);
 
 		if (test_type == TEST_TYPE_TEARDOWN)
 			print_verbose("Destroying socket\n");
@@ -931,7 +1008,7 @@ static void testapp_validate(void)
 		free(pkt_buf);
 	}
 
-	if (!(test_type == TEST_TYPE_TEARDOWN) && !bidi)
+	if (!(test_type == TEST_TYPE_TEARDOWN) && !bidi && !(test_type == TEST_TYPE_STATS))
 		print_ksft_result();
 }
 
@@ -950,6 +1027,32 @@ static void testapp_sockets(void)
 	print_ksft_result();
 }
 
+static void testapp_stats(void)
+{
+	for (int i = 0; i < STAT_TEST_TYPE_MAX; i++) {
+		stat_test_type = i;
+
+		/* reset defaults */
+		rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+		frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
+
+		switch (stat_test_type) {
+		case STAT_TEST_RX_DROPPED:
+			frame_headroom = XSK_UMEM__DEFAULT_FRAME_SIZE -
+						XDP_PACKET_HEADROOM - 1;
+			break;
+		case STAT_TEST_RX_FULL:
+			rxqsize = RX_FULL_RXQSIZE;
+			break;
+		default:
+			break;
+		}
+		testapp_validate();
+	}
+
+	print_ksft_result();
+}
+
 static void init_iface_config(struct ifaceconfigobj *ifaceconfig)
 {
 	/*Init interface0 */
@@ -1037,6 +1140,10 @@ static void run_pkt_test(int mode, int type)
 	prev_pkt = -1;
 	ifdict[0]->fv.vector = tx;
 	ifdict[1]->fv.vector = rx;
+	sigvar = 0;
+	stat_test_type = -1;
+	rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+	frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
 
 	switch (mode) {
 	case (TEST_MODE_SKB):
@@ -1055,7 +1162,9 @@ static void run_pkt_test(int mode, int type)
 
 	pthread_init_mutex();
 
-	if ((test_type != TEST_TYPE_TEARDOWN) && (test_type != TEST_TYPE_BIDI))
+	if (test_type == TEST_TYPE_STATS)
+		testapp_stats();
+	else if ((test_type != TEST_TYPE_TEARDOWN) && (test_type != TEST_TYPE_BIDI))
 		testapp_validate();
 	else
 		testapp_sockets();
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index e05703f661f8..30314ef305c2 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -42,6 +42,7 @@
 #define POLL_TMOUT 1000
 #define NEED_WAKEUP true
 #define DEFAULT_PKT_CNT 10000
+#define RX_FULL_RXQSIZE 32
 
 #define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
 
@@ -61,9 +62,18 @@ enum TEST_TYPES {
 	TEST_TYPE_POLL,
 	TEST_TYPE_TEARDOWN,
 	TEST_TYPE_BIDI,
+	TEST_TYPE_STATS,
 	TEST_TYPE_MAX
 };
 
+enum STAT_TEST_TYPES {
+	STAT_TEST_RX_DROPPED,
+	STAT_TEST_TX_INVALID,
+	STAT_TEST_RX_FULL,
+	STAT_TEST_RX_FILL_EMPTY,
+	STAT_TEST_TYPE_MAX
+};
+
 static int configured_mode = TEST_MODE_UNCONFIGURED;
 static u8 debug_pkt_dump;
 static u32 num_frames;
@@ -81,6 +91,9 @@ static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
 static u32 pkt_counter;
 static long prev_pkt = -1;
 static int sigvar;
+static int stat_test_type;
+static u32 rxqsize;
+static u32 frame_headroom;
 
 struct xsk_umem_info {
 	struct xsk_ring_prod fq;
-- 
cgit v1.2.3


From 3edc5782fb64c97946f4f321141cb4f46c9da825 Mon Sep 17 00:00:00 2001
From: Rasmus Moorats <xx@nns.ee>
Date: Thu, 18 Feb 2021 13:11:24 +0200
Subject: Bluetooth: btusb: support 0cb5:c547 Realtek 8822CE device

Some Xiaomi RedmiBook laptop models use the 0cb5:c547 USB identifier
for their Bluetooth device, so load the appropriate firmware for
Realtek 8822CE.

-Device(0cb5:c547) from /sys/kernel/debug/usb/devices
T:  Bus=01 Lev=01 Prnt=01 Port=03 Cnt=02 Dev#=  3 Spd=12   MxCh= 0
D:  Ver= 1.00 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs=  1
P:  Vendor=0cb5 ProdID=c547 Rev= 0.00
S:  Manufacturer=Realtek
S:  Product=Bluetooth Radio
S:  SerialNumber=00e04c000001
C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=500mA
I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=81(I) Atr=03(Int.) MxPS=  16 Ivl=1ms
E:  Ad=02(O) Atr=02(Bulk) MxPS=  64 Ivl=0ms
E:  Ad=82(I) Atr=02(Bulk) MxPS=  64 Ivl=0ms
I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=   0 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=   0 Ivl=1ms
I:  If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=   9 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=   9 Ivl=1ms
I:  If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  17 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  17 Ivl=1ms
I:  If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  25 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  25 Ivl=1ms
I:  If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  33 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  33 Ivl=1ms
I:  If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  49 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  49 Ivl=1ms

Signed-off-by: Rasmus Moorats <xx@nns.ee>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btusb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 52683fd22e05..d15e3c71b348 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -455,6 +455,8 @@ static const struct usb_device_id blacklist_table[] = {
 						     BTUSB_WIDEBAND_SPEECH },
 	{ USB_DEVICE(0x0bda, 0xc123), .driver_info = BTUSB_REALTEK |
 						     BTUSB_WIDEBAND_SPEECH },
+	{ USB_DEVICE(0x0cb5, 0xc547), .driver_info = BTUSB_REALTEK |
+						     BTUSB_WIDEBAND_SPEECH },
 
 	/* Silicon Wave based devices */
 	{ USB_DEVICE(0x0c10, 0x0000), .driver_info = BTUSB_SWAVE },
-- 
cgit v1.2.3


From 201cf3976c065fc47ab260302d06690f73587df9 Mon Sep 17 00:00:00 2001
From: "mark-yw.chen" <mark-yw.chen@mediatek.com>
Date: Tue, 23 Feb 2021 14:27:40 +0800
Subject: Bluetooth: btusb: Fix incorrect type in assignment and uninitialized
 symbol

Warnings: drivers/bluetooth/btusb.c:3775 btusb_mtk_setup() error:
uninitialized symbol 'fw_version'.
-> add initial value for fw_version.

Warnings: sparse: sparse: incorrect type in assignment (different base
types)
-> add le32_to_cpu to fix incorrect type in assignment.

Signed-off-by: mark-yw.chen <mark-yw.chen@mediatek.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btusb.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index d15e3c71b348..0c20226d3b63 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -3497,7 +3497,7 @@ static int btusb_mtk_setup_firmware_79xx(struct hci_dev *hdev, const char *fwnam
 	fw_ptr = fw->data;
 	fw_bin_ptr = fw_ptr;
 	globaldesc = (struct btmtk_global_desc *)(fw_ptr + MTK_FW_ROM_PATCH_HEADER_SIZE);
-	section_num = globaldesc->section_num;
+	section_num = le32_to_cpu(globaldesc->section_num);
 
 	for (i = 0; i < section_num; i++) {
 		first_block = 1;
@@ -3505,8 +3505,8 @@ static int btusb_mtk_setup_firmware_79xx(struct hci_dev *hdev, const char *fwnam
 		sectionmap = (struct btmtk_section_map *)(fw_ptr + MTK_FW_ROM_PATCH_HEADER_SIZE +
 			      MTK_FW_ROM_PATCH_GD_SIZE + MTK_FW_ROM_PATCH_SEC_MAP_SIZE * i);
 
-		section_offset = sectionmap->secoffset;
-		dl_size = sectionmap->bin_info_spec.dlsize;
+		section_offset = le32_to_cpu(sectionmap->secoffset);
+		dl_size = le32_to_cpu(sectionmap->bin_info_spec.dlsize);
 
 		if (dl_size > 0) {
 			retry = 20;
@@ -3742,7 +3742,7 @@ static int btusb_mtk_setup(struct hci_dev *hdev)
 	int err, status;
 	u32 dev_id;
 	char fw_bin_name[64];
-	u32 fw_version;
+	u32 fw_version = 0;
 	u8 param;
 
 	calltime = ktime_get();
-- 
cgit v1.2.3


From a83586a7ddba25065ec37323c05deb9019ce4fa9 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 23 Feb 2021 21:14:57 +0800
Subject: bpf: Remove blank line in bpf helper description comment

Commit 34b2021cc616 ("bpf: Add BPF-helper for MTU checking") added an extra
blank line in bpf helper description. This will make bpf_helpers_doc.py stop
building bpf_helper_defs.h immediately after bpf_check_mtu(), which will
affect future added functions.

Fixes: 34b2021cc616 ("bpf: Add BPF-helper for MTU checking")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Link: https://lore.kernel.org/bpf/20210223131457.1378978-1-liuhangbin@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h       | 1 -
 tools/include/uapi/linux/bpf.h | 1 -
 2 files changed, 2 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4c24daa43bac..79c893310492 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3850,7 +3850,6 @@ union bpf_attr {
  *
  * long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags)
  *	Description
-
  *		Check ctx packet size against exceeding MTU of net device (based
  *		on *ifindex*).  This helper will likely be used in combination
  *		with helpers that adjust/change the packet size.
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4c24daa43bac..79c893310492 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3850,7 +3850,6 @@ union bpf_attr {
  *
  * long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags)
  *	Description
-
  *		Check ctx packet size against exceeding MTU of net device (based
  *		on *ifindex*).  This helper will likely be used in combination
  *		with helpers that adjust/change the packet size.
-- 
cgit v1.2.3


From 887596095ec2a9ea39ffcf98f27bf2e77c5eb512 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 23 Feb 2021 10:49:26 -0800
Subject: bpf: Clean up sockmap related Kconfigs

As suggested by John, clean up sockmap related Kconfigs:

Reduce the scope of CONFIG_BPF_STREAM_PARSER down to TCP stream
parser, to reflect its name.

Make the rest sockmap code simply depend on CONFIG_BPF_SYSCALL
and CONFIG_INET, the latter is still needed at this point because
of TCP/UDP proto update. And leave CONFIG_NET_SOCK_MSG untouched,
as it is used by non-sockmap cases.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Lorenz Bauer <lmb@cloudflare.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/20210223184934.6054-2-xiyou.wangcong@gmail.com
---
 include/linux/bpf.h       |  26 ++++-----
 include/linux/bpf_types.h |   6 +-
 include/linux/skmsg.h     |  18 ++++++
 include/net/tcp.h         |  16 ++---
 include/net/udp.h         |   4 +-
 init/Kconfig              |   1 +
 net/Kconfig               |   6 +-
 net/core/Makefile         |   6 +-
 net/core/skmsg.c          | 145 +++++++++++++++++++++++++---------------------
 net/core/sock_map.c       |   2 +
 net/ipv4/Makefile         |   2 +-
 net/ipv4/tcp_bpf.c        |   4 +-
 12 files changed, 133 insertions(+), 103 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ae2c35641619..2be47ada5f2d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1778,7 +1778,7 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
 }
 #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
 
-#if defined(CONFIG_BPF_STREAM_PARSER)
+#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
 int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
 			 struct bpf_prog *old, u32 which);
 int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
@@ -1786,7 +1786,18 @@ int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
 int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
 void sock_map_unhash(struct sock *sk);
 void sock_map_close(struct sock *sk, long timeout);
+
+void bpf_sk_reuseport_detach(struct sock *sk);
+int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
+				       void *value);
+int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
+				       void *value, u64 map_flags);
 #else
+static inline void bpf_sk_reuseport_detach(struct sock *sk)
+{
+}
+
+#ifdef CONFIG_BPF_SYSCALL
 static inline int sock_map_prog_update(struct bpf_map *map,
 				       struct bpf_prog *prog,
 				       struct bpf_prog *old, u32 which)
@@ -1811,20 +1822,7 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
 {
 	return -EOPNOTSUPP;
 }
-#endif /* CONFIG_BPF_STREAM_PARSER */
 
-#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
-void bpf_sk_reuseport_detach(struct sock *sk);
-int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
-				       void *value);
-int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
-				       void *value, u64 map_flags);
-#else
-static inline void bpf_sk_reuseport_detach(struct sock *sk)
-{
-}
-
-#ifdef CONFIG_BPF_SYSCALL
 static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map,
 						     void *key, void *value)
 {
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index b9edee336d80..f883f01a5061 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -103,10 +103,6 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
-#if defined(CONFIG_BPF_STREAM_PARSER)
-BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
-BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
-#endif
 #ifdef CONFIG_BPF_LSM
 BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
 #endif
@@ -116,6 +112,8 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
 #endif
 #ifdef CONFIG_INET
+BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
 #endif
 #endif
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 8edbbf5f2f93..db7a08be4725 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -71,7 +71,9 @@ struct sk_psock_link {
 };
 
 struct sk_psock_parser {
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
 	struct strparser		strp;
+#endif
 	bool				enabled;
 	void (*saved_data_ready)(struct sock *sk);
 };
@@ -305,9 +307,25 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err)
 
 struct sk_psock *sk_psock_init(struct sock *sk, int node);
 
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
 int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
 void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock);
 void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock);
+#else
+static inline int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
+{
+}
+
+static inline void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
+{
+}
+#endif
+
 void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock);
 void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock);
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 963cd86d12dd..c00e125dcfb9 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2222,25 +2222,27 @@ void tcp_update_ulp(struct sock *sk, struct proto *p,
 	__MODULE_INFO(alias, alias_userspace, name);		\
 	__MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name)
 
+#ifdef CONFIG_NET_SOCK_MSG
 struct sk_msg;
 struct sk_psock;
 
-#ifdef CONFIG_BPF_STREAM_PARSER
+#ifdef CONFIG_BPF_SYSCALL
 struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
 void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
-#else
-static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
-{
-}
-#endif /* CONFIG_BPF_STREAM_PARSER */
+#endif /* CONFIG_BPF_SYSCALL */
 
-#ifdef CONFIG_NET_SOCK_MSG
 int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
 			  int flags);
 int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
 		      struct msghdr *msg, int len, int flags);
 #endif /* CONFIG_NET_SOCK_MSG */
 
+#if !defined(CONFIG_BPF_SYSCALL) || !defined(CONFIG_NET_SOCK_MSG)
+static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
+{
+}
+#endif
+
 #ifdef CONFIG_CGROUP_BPF
 static inline void bpf_skops_init_skb(struct bpf_sock_ops_kern *skops,
 				      struct sk_buff *skb,
diff --git a/include/net/udp.h b/include/net/udp.h
index a132a02b2f2c..d4d064c59232 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -515,9 +515,9 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
 	return segs;
 }
 
-#ifdef CONFIG_BPF_STREAM_PARSER
+#ifdef CONFIG_BPF_SYSCALL
 struct sk_psock;
 struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
-#endif /* BPF_STREAM_PARSER */
+#endif
 
 #endif	/* _UDP_H */
diff --git a/init/Kconfig b/init/Kconfig
index 096e1af5c586..66cef5eac275 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1702,6 +1702,7 @@ config BPF_SYSCALL
 	select BPF
 	select IRQ_WORK
 	select TASKS_TRACE_RCU
+	select NET_SOCK_MSG if INET
 	default n
 	help
 	  Enable the bpf() system call that allows to manipulate eBPF
diff --git a/net/Kconfig b/net/Kconfig
index 8cea808ad9e8..0ead7ec0d2bd 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -317,13 +317,9 @@ config BPF_STREAM_PARSER
 	select STREAM_PARSER
 	select NET_SOCK_MSG
 	help
-	  Enabling this allows a stream parser to be used with
+	  Enabling this allows a TCP stream parser to be used with
 	  BPF_MAP_TYPE_SOCKMAP.
 
-	  BPF_MAP_TYPE_SOCKMAP provides a map type to use with network sockets.
-	  It can be used to enforce socket policy, implement socket redirects,
-	  etc.
-
 config NET_FLOW_LIMIT
 	bool
 	depends on RPS
diff --git a/net/core/Makefile b/net/core/Makefile
index 3e2c378e5f31..0c2233c826fd 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -16,7 +16,6 @@ obj-y		     += dev.o dev_addr_lists.o dst.o netevent.o \
 obj-y += net-sysfs.o
 obj-$(CONFIG_PAGE_POOL) += page_pool.o
 obj-$(CONFIG_PROC_FS) += net-procfs.o
-obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
 obj-$(CONFIG_NET_PKTGEN) += pktgen.o
 obj-$(CONFIG_NETPOLL) += netpoll.o
 obj-$(CONFIG_FIB_RULES) += fib_rules.o
@@ -28,10 +27,13 @@ obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
 obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
 obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
 obj-$(CONFIG_LWTUNNEL_BPF) += lwt_bpf.o
-obj-$(CONFIG_BPF_STREAM_PARSER) += sock_map.o
 obj-$(CONFIG_DST_CACHE) += dst_cache.o
 obj-$(CONFIG_HWBM) += hwbm.o
 obj-$(CONFIG_NET_DEVLINK) += devlink.o
 obj-$(CONFIG_GRO_CELLS) += gro_cells.o
 obj-$(CONFIG_FAILOVER) += failover.o
+ifeq ($(CONFIG_INET),y)
+obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
+obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
+endif
 obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 1261512d6807..e017744111e1 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -645,15 +645,15 @@ static void sk_psock_link_destroy(struct sk_psock *psock)
 	}
 }
 
+static void sk_psock_done_strp(struct sk_psock *psock);
+
 static void sk_psock_destroy_deferred(struct work_struct *gc)
 {
 	struct sk_psock *psock = container_of(gc, struct sk_psock, gc);
 
 	/* No sk_callback_lock since already detached. */
 
-	/* Parser has been stopped */
-	if (psock->progs.skb_parser)
-		strp_done(&psock->parser.strp);
+	sk_psock_done_strp(psock);
 
 	cancel_work_sync(&psock->work);
 
@@ -750,14 +750,6 @@ static int sk_psock_bpf_run(struct sk_psock *psock, struct bpf_prog *prog,
 	return bpf_prog_run_pin_on_cpu(prog, skb);
 }
 
-static struct sk_psock *sk_psock_from_strp(struct strparser *strp)
-{
-	struct sk_psock_parser *parser;
-
-	parser = container_of(strp, struct sk_psock_parser, strp);
-	return container_of(parser, struct sk_psock, parser);
-}
-
 static void sk_psock_skb_redirect(struct sk_buff *skb)
 {
 	struct sk_psock *psock_other;
@@ -866,6 +858,24 @@ out_free:
 	}
 }
 
+static void sk_psock_write_space(struct sock *sk)
+{
+	struct sk_psock *psock;
+	void (*write_space)(struct sock *sk) = NULL;
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (likely(psock)) {
+		if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
+			schedule_work(&psock->work);
+		write_space = psock->saved_write_space;
+	}
+	rcu_read_unlock();
+	if (write_space)
+		write_space(sk);
+}
+
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
 static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
 {
 	struct sk_psock *psock;
@@ -897,6 +907,14 @@ static int sk_psock_strp_read_done(struct strparser *strp, int err)
 	return err;
 }
 
+static struct sk_psock *sk_psock_from_strp(struct strparser *strp)
+{
+	struct sk_psock_parser *parser;
+
+	parser = container_of(strp, struct sk_psock_parser, strp);
+	return container_of(parser, struct sk_psock, parser);
+}
+
 static int sk_psock_strp_parse(struct strparser *strp, struct sk_buff *skb)
 {
 	struct sk_psock *psock = sk_psock_from_strp(strp);
@@ -933,6 +951,56 @@ static void sk_psock_strp_data_ready(struct sock *sk)
 	rcu_read_unlock();
 }
 
+int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
+{
+	static const struct strp_callbacks cb = {
+		.rcv_msg	= sk_psock_strp_read,
+		.read_sock_done	= sk_psock_strp_read_done,
+		.parse_msg	= sk_psock_strp_parse,
+	};
+
+	psock->parser.enabled = false;
+	return strp_init(&psock->parser.strp, sk, &cb);
+}
+
+void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
+{
+	struct sk_psock_parser *parser = &psock->parser;
+
+	if (parser->enabled)
+		return;
+
+	parser->saved_data_ready = sk->sk_data_ready;
+	sk->sk_data_ready = sk_psock_strp_data_ready;
+	sk->sk_write_space = sk_psock_write_space;
+	parser->enabled = true;
+}
+
+void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
+{
+	struct sk_psock_parser *parser = &psock->parser;
+
+	if (!parser->enabled)
+		return;
+
+	sk->sk_data_ready = parser->saved_data_ready;
+	parser->saved_data_ready = NULL;
+	strp_stop(&parser->strp);
+	parser->enabled = false;
+}
+
+static void sk_psock_done_strp(struct sk_psock *psock)
+{
+	/* Parser has been stopped */
+	if (psock->progs.skb_parser)
+		strp_done(&psock->parser.strp);
+}
+#else
+static void sk_psock_done_strp(struct sk_psock *psock)
+{
+}
+#endif /* CONFIG_BPF_STREAM_PARSER */
+
 static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
 				 unsigned int offset, size_t orig_len)
 {
@@ -984,35 +1052,6 @@ static void sk_psock_verdict_data_ready(struct sock *sk)
 	sock->ops->read_sock(sk, &desc, sk_psock_verdict_recv);
 }
 
-static void sk_psock_write_space(struct sock *sk)
-{
-	struct sk_psock *psock;
-	void (*write_space)(struct sock *sk) = NULL;
-
-	rcu_read_lock();
-	psock = sk_psock(sk);
-	if (likely(psock)) {
-		if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
-			schedule_work(&psock->work);
-		write_space = psock->saved_write_space;
-	}
-	rcu_read_unlock();
-	if (write_space)
-		write_space(sk);
-}
-
-int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
-{
-	static const struct strp_callbacks cb = {
-		.rcv_msg	= sk_psock_strp_read,
-		.read_sock_done	= sk_psock_strp_read_done,
-		.parse_msg	= sk_psock_strp_parse,
-	};
-
-	psock->parser.enabled = false;
-	return strp_init(&psock->parser.strp, sk, &cb);
-}
-
 void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)
 {
 	struct sk_psock_parser *parser = &psock->parser;
@@ -1026,32 +1065,6 @@ void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)
 	parser->enabled = true;
 }
 
-void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
-{
-	struct sk_psock_parser *parser = &psock->parser;
-
-	if (parser->enabled)
-		return;
-
-	parser->saved_data_ready = sk->sk_data_ready;
-	sk->sk_data_ready = sk_psock_strp_data_ready;
-	sk->sk_write_space = sk_psock_write_space;
-	parser->enabled = true;
-}
-
-void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
-{
-	struct sk_psock_parser *parser = &psock->parser;
-
-	if (!parser->enabled)
-		return;
-
-	sk->sk_data_ready = parser->saved_data_ready;
-	parser->saved_data_ready = NULL;
-	strp_stop(&parser->strp);
-	parser->enabled = false;
-}
-
 void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock)
 {
 	struct sk_psock_parser *parser = &psock->parser;
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index d758fb83c884..ee3334dd3a38 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1461,9 +1461,11 @@ int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
 	case BPF_SK_MSG_VERDICT:
 		pprog = &progs->msg_parser;
 		break;
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
 	case BPF_SK_SKB_STREAM_PARSER:
 		pprog = &progs->skb_parser;
 		break;
+#endif
 	case BPF_SK_SKB_STREAM_VERDICT:
 		pprog = &progs->skb_verdict;
 		break;
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 5b77a46885b9..bbdd9c44f14e 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -62,7 +62,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
 obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
 obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
 obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o
-obj-$(CONFIG_BPF_STREAM_PARSER) += udp_bpf.o
+obj-$(CONFIG_BPF_SYSCALL) += udp_bpf.o
 obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
 
 obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index bc7d2a586e18..17c322b875fd 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -229,7 +229,7 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg,
 }
 EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir);
 
-#ifdef CONFIG_BPF_STREAM_PARSER
+#ifdef CONFIG_BPF_SYSCALL
 static bool tcp_bpf_stream_read(const struct sock *sk)
 {
 	struct sk_psock *psock;
@@ -629,4 +629,4 @@ void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
 	if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE])
 		newsk->sk_prot = sk->sk_prot_creator;
 }
-#endif /* CONFIG_BPF_STREAM_PARSER */
+#endif /* CONFIG_BPF_SYSCALL */
-- 
cgit v1.2.3


From 5a685cd94b21a88efa6be77169eddef525368034 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 23 Feb 2021 10:49:27 -0800
Subject: skmsg: Get rid of struct sk_psock_parser

struct sk_psock_parser is embedded in sk_psock, it is
unnecessary as skb verdict also uses ->saved_data_ready.
We can simply fold these fields into sk_psock, and get rid
of ->enabled.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/20210223184934.6054-3-xiyou.wangcong@gmail.com
---
 include/linux/skmsg.h | 19 +++++++-----------
 net/core/skmsg.c      | 53 ++++++++++++++++-----------------------------------
 net/core/sock_map.c   |  8 ++++----
 3 files changed, 27 insertions(+), 53 deletions(-)

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index db7a08be4725..22e26f82de33 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -70,14 +70,6 @@ struct sk_psock_link {
 	void				*link_raw;
 };
 
-struct sk_psock_parser {
-#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
-	struct strparser		strp;
-#endif
-	bool				enabled;
-	void (*saved_data_ready)(struct sock *sk);
-};
-
 struct sk_psock_work_state {
 	struct sk_buff			*skb;
 	u32				len;
@@ -92,7 +84,9 @@ struct sk_psock {
 	u32				eval;
 	struct sk_msg			*cork;
 	struct sk_psock_progs		progs;
-	struct sk_psock_parser		parser;
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
+	struct strparser		strp;
+#endif
 	struct sk_buff_head		ingress_skb;
 	struct list_head		ingress_msg;
 	unsigned long			state;
@@ -102,6 +96,7 @@ struct sk_psock {
 	void (*saved_unhash)(struct sock *sk);
 	void (*saved_close)(struct sock *sk, long timeout);
 	void (*saved_write_space)(struct sock *sk);
+	void (*saved_data_ready)(struct sock *sk);
 	struct proto			*sk_proto;
 	struct sk_psock_work_state	work_state;
 	struct work_struct		work;
@@ -418,8 +413,8 @@ static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)
 
 static inline void sk_psock_data_ready(struct sock *sk, struct sk_psock *psock)
 {
-	if (psock->parser.enabled)
-		psock->parser.saved_data_ready(sk);
+	if (psock->saved_data_ready)
+		psock->saved_data_ready(sk);
 	else
 		sk->sk_data_ready(sk);
 }
@@ -458,6 +453,6 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
 {
 	if (!psock)
 		return false;
-	return psock->parser.enabled;
+	return !!psock->saved_data_ready;
 }
 #endif /* _LINUX_SKMSG_H */
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index e017744111e1..d00c9a4b47e7 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -907,17 +907,9 @@ static int sk_psock_strp_read_done(struct strparser *strp, int err)
 	return err;
 }
 
-static struct sk_psock *sk_psock_from_strp(struct strparser *strp)
-{
-	struct sk_psock_parser *parser;
-
-	parser = container_of(strp, struct sk_psock_parser, strp);
-	return container_of(parser, struct sk_psock, parser);
-}
-
 static int sk_psock_strp_parse(struct strparser *strp, struct sk_buff *skb)
 {
-	struct sk_psock *psock = sk_psock_from_strp(strp);
+	struct sk_psock *psock = container_of(strp, struct sk_psock, strp);
 	struct bpf_prog *prog;
 	int ret = skb->len;
 
@@ -941,10 +933,10 @@ static void sk_psock_strp_data_ready(struct sock *sk)
 	psock = sk_psock(sk);
 	if (likely(psock)) {
 		if (tls_sw_has_ctx_rx(sk)) {
-			psock->parser.saved_data_ready(sk);
+			psock->saved_data_ready(sk);
 		} else {
 			write_lock_bh(&sk->sk_callback_lock);
-			strp_data_ready(&psock->parser.strp);
+			strp_data_ready(&psock->strp);
 			write_unlock_bh(&sk->sk_callback_lock);
 		}
 	}
@@ -959,41 +951,34 @@ int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
 		.parse_msg	= sk_psock_strp_parse,
 	};
 
-	psock->parser.enabled = false;
-	return strp_init(&psock->parser.strp, sk, &cb);
+	return strp_init(&psock->strp, sk, &cb);
 }
 
 void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
 {
-	struct sk_psock_parser *parser = &psock->parser;
-
-	if (parser->enabled)
+	if (psock->saved_data_ready)
 		return;
 
-	parser->saved_data_ready = sk->sk_data_ready;
+	psock->saved_data_ready = sk->sk_data_ready;
 	sk->sk_data_ready = sk_psock_strp_data_ready;
 	sk->sk_write_space = sk_psock_write_space;
-	parser->enabled = true;
 }
 
 void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
 {
-	struct sk_psock_parser *parser = &psock->parser;
-
-	if (!parser->enabled)
+	if (!psock->saved_data_ready)
 		return;
 
-	sk->sk_data_ready = parser->saved_data_ready;
-	parser->saved_data_ready = NULL;
-	strp_stop(&parser->strp);
-	parser->enabled = false;
+	sk->sk_data_ready = psock->saved_data_ready;
+	psock->saved_data_ready = NULL;
+	strp_stop(&psock->strp);
 }
 
 static void sk_psock_done_strp(struct sk_psock *psock)
 {
 	/* Parser has been stopped */
 	if (psock->progs.skb_parser)
-		strp_done(&psock->parser.strp);
+		strp_done(&psock->strp);
 }
 #else
 static void sk_psock_done_strp(struct sk_psock *psock)
@@ -1054,25 +1039,19 @@ static void sk_psock_verdict_data_ready(struct sock *sk)
 
 void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)
 {
-	struct sk_psock_parser *parser = &psock->parser;
-
-	if (parser->enabled)
+	if (psock->saved_data_ready)
 		return;
 
-	parser->saved_data_ready = sk->sk_data_ready;
+	psock->saved_data_ready = sk->sk_data_ready;
 	sk->sk_data_ready = sk_psock_verdict_data_ready;
 	sk->sk_write_space = sk_psock_write_space;
-	parser->enabled = true;
 }
 
 void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock)
 {
-	struct sk_psock_parser *parser = &psock->parser;
-
-	if (!parser->enabled)
+	if (!psock->saved_data_ready)
 		return;
 
-	sk->sk_data_ready = parser->saved_data_ready;
-	parser->saved_data_ready = NULL;
-	parser->enabled = false;
+	sk->sk_data_ready = psock->saved_data_ready;
+	psock->saved_data_ready = NULL;
 }
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index ee3334dd3a38..1a28a5c2c61e 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -148,9 +148,9 @@ static void sock_map_del_link(struct sock *sk,
 			struct bpf_map *map = link->map;
 			struct bpf_stab *stab = container_of(map, struct bpf_stab,
 							     map);
-			if (psock->parser.enabled && stab->progs.skb_parser)
+			if (psock->saved_data_ready && stab->progs.skb_parser)
 				strp_stop = true;
-			if (psock->parser.enabled && stab->progs.skb_verdict)
+			if (psock->saved_data_ready && stab->progs.skb_verdict)
 				verdict_stop = true;
 			list_del(&link->list);
 			sk_psock_free_link(link);
@@ -283,14 +283,14 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
 		goto out_drop;
 
 	write_lock_bh(&sk->sk_callback_lock);
-	if (skb_parser && skb_verdict && !psock->parser.enabled) {
+	if (skb_parser && skb_verdict && !psock->saved_data_ready) {
 		ret = sk_psock_init_strp(sk, psock);
 		if (ret)
 			goto out_unlock_drop;
 		psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
 		psock_set_prog(&psock->progs.skb_parser, skb_parser);
 		sk_psock_start_strp(sk, psock);
-	} else if (!skb_parser && skb_verdict && !psock->parser.enabled) {
+	} else if (!skb_parser && skb_verdict && !psock->saved_data_ready) {
 		psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
 		sk_psock_start_verdict(sk,psock);
 	}
-- 
cgit v1.2.3


From 16137b09a66f2b75090f1e56a9ba0e27ef845ebc Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 23 Feb 2021 10:49:28 -0800
Subject: bpf: Compute data_end dynamically with JIT code

Currently, we compute ->data_end with a compile-time constant
offset of skb. But as Jakub pointed out, we can actually compute
it in eBPF JIT code at run-time, so that we can competely get
rid of ->data_end. This is similar to skb_shinfo(skb) computation
in bpf_convert_shinfo_access().

Suggested-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/20210223184934.6054-4-xiyou.wangcong@gmail.com
---
 include/net/tcp.h |  6 ------
 net/core/filter.c | 48 ++++++++++++++++++++++++++++--------------------
 net/core/skmsg.c  |  1 -
 3 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index c00e125dcfb9..947ef5da6867 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -886,18 +886,12 @@ struct tcp_skb_cb {
 		struct {
 			__u32 flags;
 			struct sock *sk_redir;
-			void *data_end;
 		} bpf;
 	};
 };
 
 #define TCP_SKB_CB(__skb)	((struct tcp_skb_cb *)&((__skb)->cb[0]))
 
-static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
-{
-	TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb);
-}
-
 static inline bool tcp_skb_bpf_ingress(const struct sk_buff *skb)
 {
 	return TCP_SKB_CB(skb)->bpf.flags & BPF_F_INGRESS;
diff --git a/net/core/filter.c b/net/core/filter.c
index adfdad234674..13bcf248ee7b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1863,10 +1863,7 @@ static const struct bpf_func_proto bpf_sk_fullsock_proto = {
 static inline int sk_skb_try_make_writable(struct sk_buff *skb,
 					   unsigned int write_len)
 {
-	int err = __bpf_try_make_writable(skb, write_len);
-
-	bpf_compute_data_end_sk_skb(skb);
-	return err;
+	return __bpf_try_make_writable(skb, write_len);
 }
 
 BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
@@ -3577,7 +3574,6 @@ BPF_CALL_4(sk_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
 			return -ENOMEM;
 		__skb_pull(skb, len_diff_abs);
 	}
-	bpf_compute_data_end_sk_skb(skb);
 	if (tls_sw_has_ctx_rx(skb->sk)) {
 		struct strp_msg *rxm = strp_msg(skb);
 
@@ -3742,10 +3738,7 @@ static const struct bpf_func_proto bpf_skb_change_tail_proto = {
 BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
 	   u64, flags)
 {
-	int ret = __bpf_skb_change_tail(skb, new_len, flags);
-
-	bpf_compute_data_end_sk_skb(skb);
-	return ret;
+	return __bpf_skb_change_tail(skb, new_len, flags);
 }
 
 static const struct bpf_func_proto sk_skb_change_tail_proto = {
@@ -3808,10 +3801,7 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = {
 BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
 	   u64, flags)
 {
-	int ret = __bpf_skb_change_head(skb, head_room, flags);
-
-	bpf_compute_data_end_sk_skb(skb);
-	return ret;
+	return __bpf_skb_change_head(skb, head_room, flags);
 }
 
 static const struct bpf_func_proto sk_skb_change_head_proto = {
@@ -9655,22 +9645,40 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
 	return insn - insn_buf;
 }
 
+/* data_end = skb->data + skb_headlen() */
+static struct bpf_insn *bpf_convert_data_end_access(const struct bpf_insn *si,
+						    struct bpf_insn *insn)
+{
+	/* si->dst_reg = skb->data */
+	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
+			      si->dst_reg, si->src_reg,
+			      offsetof(struct sk_buff, data));
+	/* AX = skb->len */
+	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),
+			      BPF_REG_AX, si->src_reg,
+			      offsetof(struct sk_buff, len));
+	/* si->dst_reg = skb->data + skb->len */
+	*insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
+	/* AX = skb->data_len */
+	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data_len),
+			      BPF_REG_AX, si->src_reg,
+			      offsetof(struct sk_buff, data_len));
+	/* si->dst_reg = skb->data + skb->len - skb->data_len */
+	*insn++ = BPF_ALU64_REG(BPF_SUB, si->dst_reg, BPF_REG_AX);
+
+	return insn;
+}
+
 static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
 				     const struct bpf_insn *si,
 				     struct bpf_insn *insn_buf,
 				     struct bpf_prog *prog, u32 *target_size)
 {
 	struct bpf_insn *insn = insn_buf;
-	int off;
 
 	switch (si->off) {
 	case offsetof(struct __sk_buff, data_end):
-		off  = si->off;
-		off -= offsetof(struct __sk_buff, data_end);
-		off += offsetof(struct sk_buff, cb);
-		off += offsetof(struct tcp_skb_cb, bpf.data_end);
-		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
-				      si->src_reg, off);
+		insn = bpf_convert_data_end_access(si, insn);
 		break;
 	default:
 		return bpf_convert_ctx_access(type, si, insn_buf, prog,
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index d00c9a4b47e7..8822001ab3dc 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -746,7 +746,6 @@ EXPORT_SYMBOL_GPL(sk_psock_msg_verdict);
 static int sk_psock_bpf_run(struct sk_psock *psock, struct bpf_prog *prog,
 			    struct sk_buff *skb)
 {
-	bpf_compute_data_end_sk_skb(skb);
 	return bpf_prog_run_pin_on_cpu(prog, skb);
 }
 
-- 
cgit v1.2.3


From e3526bb92a2084cdaec6cb2855bcec98b280426c Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 23 Feb 2021 10:49:29 -0800
Subject: skmsg: Move sk_redir from TCP_SKB_CB to skb

Currently TCP_SKB_CB() is hard-coded in skmsg code, it certainly
does not work for any other non-TCP protocols. We can move them to
skb ext, but it introduces a memory allocation on fast path.

Fortunately, we only need to a word-size to store all the information,
because the flags actually only contains 1 bit so can be just packed
into the lowest bit of the "pointer", which is stored as unsigned
long.

Inside struct sk_buff, '_skb_refdst' can be reused because skb dst is
no longer needed after ->sk_data_ready() so we can just drop it.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/20210223184934.6054-5-xiyou.wangcong@gmail.com
---
 include/linux/skbuff.h |  3 +++
 include/linux/skmsg.h  | 38 ++++++++++++++++++++++++++++++++++++++
 include/net/tcp.h      | 19 -------------------
 net/core/skmsg.c       | 31 +++++++++++++++++++------------
 net/core/sock_map.c    |  8 ++------
 5 files changed, 62 insertions(+), 37 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6d0a33d1c0db..bd84f799c952 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -755,6 +755,9 @@ struct sk_buff {
 			void		(*destructor)(struct sk_buff *skb);
 		};
 		struct list_head	tcp_tsorted_anchor;
+#ifdef CONFIG_NET_SOCK_MSG
+		unsigned long		_sk_redir;
+#endif
 	};
 
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 22e26f82de33..e0de45527bb6 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -455,4 +455,42 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
 		return false;
 	return !!psock->saved_data_ready;
 }
+
+#if IS_ENABLED(CONFIG_NET_SOCK_MSG)
+
+/* We only have one bit so far. */
+#define BPF_F_PTR_MASK ~(BPF_F_INGRESS)
+
+static inline bool skb_bpf_ingress(const struct sk_buff *skb)
+{
+	unsigned long sk_redir = skb->_sk_redir;
+
+	return sk_redir & BPF_F_INGRESS;
+}
+
+static inline void skb_bpf_set_ingress(struct sk_buff *skb)
+{
+	skb->_sk_redir |= BPF_F_INGRESS;
+}
+
+static inline void skb_bpf_set_redir(struct sk_buff *skb, struct sock *sk_redir,
+				     bool ingress)
+{
+	skb->_sk_redir = (unsigned long)sk_redir;
+	if (ingress)
+		skb->_sk_redir |= BPF_F_INGRESS;
+}
+
+static inline struct sock *skb_bpf_redirect_fetch(const struct sk_buff *skb)
+{
+	unsigned long sk_redir = skb->_sk_redir;
+
+	return (struct sock *)(sk_redir & BPF_F_PTR_MASK);
+}
+
+static inline void skb_bpf_redirect_clear(struct sk_buff *skb)
+{
+	skb->_sk_redir = 0;
+}
+#endif /* CONFIG_NET_SOCK_MSG */
 #endif /* _LINUX_SKMSG_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 947ef5da6867..075de26f449d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -883,30 +883,11 @@ struct tcp_skb_cb {
 			struct inet6_skb_parm	h6;
 #endif
 		} header;	/* For incoming skbs */
-		struct {
-			__u32 flags;
-			struct sock *sk_redir;
-		} bpf;
 	};
 };
 
 #define TCP_SKB_CB(__skb)	((struct tcp_skb_cb *)&((__skb)->cb[0]))
 
-static inline bool tcp_skb_bpf_ingress(const struct sk_buff *skb)
-{
-	return TCP_SKB_CB(skb)->bpf.flags & BPF_F_INGRESS;
-}
-
-static inline struct sock *tcp_skb_bpf_redirect_fetch(struct sk_buff *skb)
-{
-	return TCP_SKB_CB(skb)->bpf.sk_redir;
-}
-
-static inline void tcp_skb_bpf_redirect_clear(struct sk_buff *skb)
-{
-	TCP_SKB_CB(skb)->bpf.sk_redir = NULL;
-}
-
 extern const struct inet_connection_sock_af_ops ipv4_specific;
 
 #if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 8822001ab3dc..409258367bea 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -525,7 +525,8 @@ static void sk_psock_backlog(struct work_struct *work)
 		len = skb->len;
 		off = 0;
 start:
-		ingress = tcp_skb_bpf_ingress(skb);
+		ingress = skb_bpf_ingress(skb);
+		skb_bpf_redirect_clear(skb);
 		do {
 			ret = -EIO;
 			if (likely(psock->sk->sk_socket))
@@ -631,7 +632,12 @@ void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
 
 static void sk_psock_zap_ingress(struct sk_psock *psock)
 {
-	__skb_queue_purge(&psock->ingress_skb);
+	struct sk_buff *skb;
+
+	while ((skb = __skb_dequeue(&psock->ingress_skb)) != NULL) {
+		skb_bpf_redirect_clear(skb);
+		kfree_skb(skb);
+	}
 	__sk_psock_purge_ingress_msg(psock);
 }
 
@@ -754,7 +760,7 @@ static void sk_psock_skb_redirect(struct sk_buff *skb)
 	struct sk_psock *psock_other;
 	struct sock *sk_other;
 
-	sk_other = tcp_skb_bpf_redirect_fetch(skb);
+	sk_other = skb_bpf_redirect_fetch(skb);
 	/* This error is a buggy BPF program, it returned a redirect
 	 * return code, but then didn't set a redirect interface.
 	 */
@@ -804,9 +810,10 @@ int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb)
 		 * TLS context.
 		 */
 		skb->sk = psock->sk;
-		tcp_skb_bpf_redirect_clear(skb);
+		skb_dst_drop(skb);
+		skb_bpf_redirect_clear(skb);
 		ret = sk_psock_bpf_run(psock, prog, skb);
-		ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
+		ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
 		skb->sk = NULL;
 	}
 	sk_psock_tls_verdict_apply(skb, psock->sk, ret);
@@ -818,7 +825,6 @@ EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read);
 static void sk_psock_verdict_apply(struct sk_psock *psock,
 				   struct sk_buff *skb, int verdict)
 {
-	struct tcp_skb_cb *tcp;
 	struct sock *sk_other;
 	int err = -EIO;
 
@@ -830,8 +836,7 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
 			goto out_free;
 		}
 
-		tcp = TCP_SKB_CB(skb);
-		tcp->bpf.flags |= BPF_F_INGRESS;
+		skb_bpf_set_ingress(skb);
 
 		/* If the queue is empty then we can submit directly
 		 * into the msg queue. If its not empty we have to
@@ -892,9 +897,10 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
 	skb_set_owner_r(skb, sk);
 	prog = READ_ONCE(psock->progs.skb_verdict);
 	if (likely(prog)) {
-		tcp_skb_bpf_redirect_clear(skb);
+		skb_dst_drop(skb);
+		skb_bpf_redirect_clear(skb);
 		ret = sk_psock_bpf_run(psock, prog, skb);
-		ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
+		ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
 	}
 	sk_psock_verdict_apply(psock, skb, ret);
 out:
@@ -1011,9 +1017,10 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
 	skb_set_owner_r(skb, sk);
 	prog = READ_ONCE(psock->progs.skb_verdict);
 	if (likely(prog)) {
-		tcp_skb_bpf_redirect_clear(skb);
+		skb_dst_drop(skb);
+		skb_bpf_redirect_clear(skb);
 		ret = sk_psock_bpf_run(psock, prog, skb);
-		ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
+		ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
 	}
 	sk_psock_verdict_apply(psock, skb, ret);
 out:
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 1a28a5c2c61e..dbfcd7006338 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -657,7 +657,6 @@ const struct bpf_func_proto bpf_sock_map_update_proto = {
 BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
 	   struct bpf_map *, map, u32, key, u64, flags)
 {
-	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
 	struct sock *sk;
 
 	if (unlikely(flags & ~(BPF_F_INGRESS)))
@@ -667,8 +666,7 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
 	if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
 		return SK_DROP;
 
-	tcb->bpf.flags = flags;
-	tcb->bpf.sk_redir = sk;
+	skb_bpf_set_redir(skb, sk, flags & BPF_F_INGRESS);
 	return SK_PASS;
 }
 
@@ -1250,7 +1248,6 @@ const struct bpf_func_proto bpf_sock_hash_update_proto = {
 BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb,
 	   struct bpf_map *, map, void *, key, u64, flags)
 {
-	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
 	struct sock *sk;
 
 	if (unlikely(flags & ~(BPF_F_INGRESS)))
@@ -1260,8 +1257,7 @@ BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb,
 	if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
 		return SK_DROP;
 
-	tcb->bpf.flags = flags;
-	tcb->bpf.sk_redir = sk;
+	skb_bpf_set_redir(skb, sk, flags & BPF_F_INGRESS);
 	return SK_PASS;
 }
 
-- 
cgit v1.2.3


From ae8b8332fbb512f53bf50ff6a7586dd0f90ed18a Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 23 Feb 2021 10:49:30 -0800
Subject: sock_map: Rename skb_parser and skb_verdict

These two eBPF programs are tied to BPF_SK_SKB_STREAM_PARSER
and BPF_SK_SKB_STREAM_VERDICT, rename them to reflect the fact
they are only used for TCP. And save the name 'skb_verdict' for
general use later.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Lorenz Bauer <lmb@cloudflare.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/20210223184934.6054-6-xiyou.wangcong@gmail.com
---
 include/linux/skmsg.h                              |  8 +--
 net/core/skmsg.c                                   | 14 ++---
 net/core/sock_map.c                                | 60 +++++++++++-----------
 .../selftests/bpf/prog_tests/sockmap_listen.c      |  8 +--
 .../selftests/bpf/progs/test_sockmap_listen.c      |  4 +-
 5 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index e0de45527bb6..d9f6ec4a9cf2 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -56,8 +56,8 @@ struct sk_msg {
 
 struct sk_psock_progs {
 	struct bpf_prog			*msg_parser;
-	struct bpf_prog			*skb_parser;
-	struct bpf_prog			*skb_verdict;
+	struct bpf_prog			*stream_parser;
+	struct bpf_prog			*stream_verdict;
 };
 
 enum sk_psock_state_bits {
@@ -443,8 +443,8 @@ static inline int psock_replace_prog(struct bpf_prog **pprog,
 static inline void psock_progs_drop(struct sk_psock_progs *progs)
 {
 	psock_set_prog(&progs->msg_parser, NULL);
-	psock_set_prog(&progs->skb_parser, NULL);
-	psock_set_prog(&progs->skb_verdict, NULL);
+	psock_set_prog(&progs->stream_parser, NULL);
+	psock_set_prog(&progs->stream_verdict, NULL);
 }
 
 int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb);
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 409258367bea..35f9caa3b125 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -691,9 +691,9 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
 	write_lock_bh(&sk->sk_callback_lock);
 	sk_psock_restore_proto(sk, psock);
 	rcu_assign_sk_user_data(sk, NULL);
-	if (psock->progs.skb_parser)
+	if (psock->progs.stream_parser)
 		sk_psock_stop_strp(sk, psock);
-	else if (psock->progs.skb_verdict)
+	else if (psock->progs.stream_verdict)
 		sk_psock_stop_verdict(sk, psock);
 	write_unlock_bh(&sk->sk_callback_lock);
 	sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
@@ -803,7 +803,7 @@ int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb)
 	int ret = __SK_PASS;
 
 	rcu_read_lock();
-	prog = READ_ONCE(psock->progs.skb_verdict);
+	prog = READ_ONCE(psock->progs.stream_verdict);
 	if (likely(prog)) {
 		/* We skip full set_owner_r here because if we do a SK_PASS
 		 * or SK_DROP we can skip skb memory accounting and use the
@@ -895,7 +895,7 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
 		goto out;
 	}
 	skb_set_owner_r(skb, sk);
-	prog = READ_ONCE(psock->progs.skb_verdict);
+	prog = READ_ONCE(psock->progs.stream_verdict);
 	if (likely(prog)) {
 		skb_dst_drop(skb);
 		skb_bpf_redirect_clear(skb);
@@ -919,7 +919,7 @@ static int sk_psock_strp_parse(struct strparser *strp, struct sk_buff *skb)
 	int ret = skb->len;
 
 	rcu_read_lock();
-	prog = READ_ONCE(psock->progs.skb_parser);
+	prog = READ_ONCE(psock->progs.stream_parser);
 	if (likely(prog)) {
 		skb->sk = psock->sk;
 		ret = sk_psock_bpf_run(psock, prog, skb);
@@ -982,7 +982,7 @@ void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
 static void sk_psock_done_strp(struct sk_psock *psock)
 {
 	/* Parser has been stopped */
-	if (psock->progs.skb_parser)
+	if (psock->progs.stream_parser)
 		strp_done(&psock->strp);
 }
 #else
@@ -1015,7 +1015,7 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
 		goto out;
 	}
 	skb_set_owner_r(skb, sk);
-	prog = READ_ONCE(psock->progs.skb_verdict);
+	prog = READ_ONCE(psock->progs.stream_verdict);
 	if (likely(prog)) {
 		skb_dst_drop(skb);
 		skb_bpf_redirect_clear(skb);
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index dbfcd7006338..69785070f02d 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -148,9 +148,9 @@ static void sock_map_del_link(struct sock *sk,
 			struct bpf_map *map = link->map;
 			struct bpf_stab *stab = container_of(map, struct bpf_stab,
 							     map);
-			if (psock->saved_data_ready && stab->progs.skb_parser)
+			if (psock->saved_data_ready && stab->progs.stream_parser)
 				strp_stop = true;
-			if (psock->saved_data_ready && stab->progs.skb_verdict)
+			if (psock->saved_data_ready && stab->progs.stream_verdict)
 				verdict_stop = true;
 			list_del(&link->list);
 			sk_psock_free_link(link);
@@ -224,23 +224,23 @@ out:
 static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
 			 struct sock *sk)
 {
-	struct bpf_prog *msg_parser, *skb_parser, *skb_verdict;
+	struct bpf_prog *msg_parser, *stream_parser, *stream_verdict;
 	struct sk_psock *psock;
 	int ret;
 
-	skb_verdict = READ_ONCE(progs->skb_verdict);
-	if (skb_verdict) {
-		skb_verdict = bpf_prog_inc_not_zero(skb_verdict);
-		if (IS_ERR(skb_verdict))
-			return PTR_ERR(skb_verdict);
+	stream_verdict = READ_ONCE(progs->stream_verdict);
+	if (stream_verdict) {
+		stream_verdict = bpf_prog_inc_not_zero(stream_verdict);
+		if (IS_ERR(stream_verdict))
+			return PTR_ERR(stream_verdict);
 	}
 
-	skb_parser = READ_ONCE(progs->skb_parser);
-	if (skb_parser) {
-		skb_parser = bpf_prog_inc_not_zero(skb_parser);
-		if (IS_ERR(skb_parser)) {
-			ret = PTR_ERR(skb_parser);
-			goto out_put_skb_verdict;
+	stream_parser = READ_ONCE(progs->stream_parser);
+	if (stream_parser) {
+		stream_parser = bpf_prog_inc_not_zero(stream_parser);
+		if (IS_ERR(stream_parser)) {
+			ret = PTR_ERR(stream_parser);
+			goto out_put_stream_verdict;
 		}
 	}
 
@@ -249,7 +249,7 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
 		msg_parser = bpf_prog_inc_not_zero(msg_parser);
 		if (IS_ERR(msg_parser)) {
 			ret = PTR_ERR(msg_parser);
-			goto out_put_skb_parser;
+			goto out_put_stream_parser;
 		}
 	}
 
@@ -261,8 +261,8 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
 
 	if (psock) {
 		if ((msg_parser && READ_ONCE(psock->progs.msg_parser)) ||
-		    (skb_parser  && READ_ONCE(psock->progs.skb_parser)) ||
-		    (skb_verdict && READ_ONCE(psock->progs.skb_verdict))) {
+		    (stream_parser  && READ_ONCE(psock->progs.stream_parser)) ||
+		    (stream_verdict && READ_ONCE(psock->progs.stream_verdict))) {
 			sk_psock_put(sk, psock);
 			ret = -EBUSY;
 			goto out_progs;
@@ -283,15 +283,15 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
 		goto out_drop;
 
 	write_lock_bh(&sk->sk_callback_lock);
-	if (skb_parser && skb_verdict && !psock->saved_data_ready) {
+	if (stream_parser && stream_verdict && !psock->saved_data_ready) {
 		ret = sk_psock_init_strp(sk, psock);
 		if (ret)
 			goto out_unlock_drop;
-		psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
-		psock_set_prog(&psock->progs.skb_parser, skb_parser);
+		psock_set_prog(&psock->progs.stream_verdict, stream_verdict);
+		psock_set_prog(&psock->progs.stream_parser, stream_parser);
 		sk_psock_start_strp(sk, psock);
-	} else if (!skb_parser && skb_verdict && !psock->saved_data_ready) {
-		psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
+	} else if (!stream_parser && stream_verdict && !psock->saved_data_ready) {
+		psock_set_prog(&psock->progs.stream_verdict, stream_verdict);
 		sk_psock_start_verdict(sk,psock);
 	}
 	write_unlock_bh(&sk->sk_callback_lock);
@@ -303,12 +303,12 @@ out_drop:
 out_progs:
 	if (msg_parser)
 		bpf_prog_put(msg_parser);
-out_put_skb_parser:
-	if (skb_parser)
-		bpf_prog_put(skb_parser);
-out_put_skb_verdict:
-	if (skb_verdict)
-		bpf_prog_put(skb_verdict);
+out_put_stream_parser:
+	if (stream_parser)
+		bpf_prog_put(stream_parser);
+out_put_stream_verdict:
+	if (stream_verdict)
+		bpf_prog_put(stream_verdict);
 	return ret;
 }
 
@@ -1459,11 +1459,11 @@ int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
 		break;
 #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
 	case BPF_SK_SKB_STREAM_PARSER:
-		pprog = &progs->skb_parser;
+		pprog = &progs->stream_parser;
 		break;
 #endif
 	case BPF_SK_SKB_STREAM_VERDICT:
-		pprog = &progs->skb_verdict;
+		pprog = &progs->stream_verdict;
 		break;
 	default:
 		return -EOPNOTSUPP;
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index d7d65a700799..c26e6bf05e49 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -1014,8 +1014,8 @@ static void test_skb_redir_to_connected(struct test_sockmap_listen *skel,
 					struct bpf_map *inner_map, int family,
 					int sotype)
 {
-	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
-	int parser = bpf_program__fd(skel->progs.prog_skb_parser);
+	int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
+	int parser = bpf_program__fd(skel->progs.prog_stream_parser);
 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
 	int sock_map = bpf_map__fd(inner_map);
 	int err;
@@ -1125,8 +1125,8 @@ static void test_skb_redir_to_listening(struct test_sockmap_listen *skel,
 					struct bpf_map *inner_map, int family,
 					int sotype)
 {
-	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
-	int parser = bpf_program__fd(skel->progs.prog_skb_parser);
+	int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
+	int parser = bpf_program__fd(skel->progs.prog_stream_parser);
 	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
 	int sock_map = bpf_map__fd(inner_map);
 	int err;
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
index a3a366c57ce1..fa221141e9c1 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
@@ -31,13 +31,13 @@ struct {
 static volatile bool test_sockmap; /* toggled by user-space */
 
 SEC("sk_skb/stream_parser")
-int prog_skb_parser(struct __sk_buff *skb)
+int prog_stream_parser(struct __sk_buff *skb)
 {
 	return skb->len;
 }
 
 SEC("sk_skb/stream_verdict")
-int prog_skb_verdict(struct __sk_buff *skb)
+int prog_stream_verdict(struct __sk_buff *skb)
 {
 	unsigned int *count;
 	__u32 zero = 0;
-- 
cgit v1.2.3


From 4675e234b9e15159894b90ead9340e1dc202b670 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 23 Feb 2021 10:49:31 -0800
Subject: sock_map: Make sock_map_prog_update() static

It is only used within sock_map.c so can become static.

Suggested-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/20210223184934.6054-7-xiyou.wangcong@gmail.com
---
 include/linux/bpf.h | 9 ---------
 net/core/sock_map.c | 7 +++++--
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2be47ada5f2d..e1e4d2f60527 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1779,8 +1779,6 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
 #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
 
 #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
-int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
-			 struct bpf_prog *old, u32 which);
 int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
 int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
 int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
@@ -1798,13 +1796,6 @@ static inline void bpf_sk_reuseport_detach(struct sock *sk)
 }
 
 #ifdef CONFIG_BPF_SYSCALL
-static inline int sock_map_prog_update(struct bpf_map *map,
-				       struct bpf_prog *prog,
-				       struct bpf_prog *old, u32 which)
-{
-	return -EOPNOTSUPP;
-}
-
 static inline int sock_map_get_from_fd(const union bpf_attr *attr,
 				       struct bpf_prog *prog)
 {
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 69785070f02d..dd53a7771d7e 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -24,6 +24,9 @@ struct bpf_stab {
 #define SOCK_CREATE_FLAG_MASK				\
 	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
 
+static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
+				struct bpf_prog *old, u32 which);
+
 static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
 {
 	struct bpf_stab *stab;
@@ -1444,8 +1447,8 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
 	return NULL;
 }
 
-int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
-			 struct bpf_prog *old, u32 which)
+static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
+				struct bpf_prog *old, u32 which)
 {
 	struct sk_psock_progs *progs = sock_map_progs(map);
 	struct bpf_prog **pprog;
-- 
cgit v1.2.3


From cd81cefb1abc52bd164f4d9760cd22eadc0e4468 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 23 Feb 2021 10:49:32 -0800
Subject: skmsg: Make __sk_psock_purge_ingress_msg() static

It is only used within skmsg.c so can become static.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/20210223184934.6054-8-xiyou.wangcong@gmail.com
---
 include/linux/skmsg.h | 2 --
 net/core/skmsg.c      | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index d9f6ec4a9cf2..676d48e08159 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -340,8 +340,6 @@ static inline void sk_psock_free_link(struct sk_psock_link *link)
 
 struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock);
 
-void __sk_psock_purge_ingress_msg(struct sk_psock *psock);
-
 static inline void sk_psock_cork_free(struct sk_psock *psock)
 {
 	if (psock->cork) {
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 35f9caa3b125..46e29d2c0c48 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -619,7 +619,7 @@ struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock)
 	return link;
 }
 
-void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
+static void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
 {
 	struct sk_msg *msg, *tmp;
 
-- 
cgit v1.2.3


From 533342322276b06b4db260c413ce907238851e9b Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 23 Feb 2021 10:49:33 -0800
Subject: skmsg: Get rid of sk_psock_bpf_run()

It is now nearly identical to bpf_prog_run_pin_on_cpu() and
it has an unused parameter 'psock', so we can just get rid
of it and call bpf_prog_run_pin_on_cpu() directly.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/20210223184934.6054-9-xiyou.wangcong@gmail.com
---
 net/core/skmsg.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 46e29d2c0c48..07f54015238a 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -749,12 +749,6 @@ out:
 }
 EXPORT_SYMBOL_GPL(sk_psock_msg_verdict);
 
-static int sk_psock_bpf_run(struct sk_psock *psock, struct bpf_prog *prog,
-			    struct sk_buff *skb)
-{
-	return bpf_prog_run_pin_on_cpu(prog, skb);
-}
-
 static void sk_psock_skb_redirect(struct sk_buff *skb)
 {
 	struct sk_psock *psock_other;
@@ -812,7 +806,7 @@ int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb)
 		skb->sk = psock->sk;
 		skb_dst_drop(skb);
 		skb_bpf_redirect_clear(skb);
-		ret = sk_psock_bpf_run(psock, prog, skb);
+		ret = bpf_prog_run_pin_on_cpu(prog, skb);
 		ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
 		skb->sk = NULL;
 	}
@@ -899,7 +893,7 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
 	if (likely(prog)) {
 		skb_dst_drop(skb);
 		skb_bpf_redirect_clear(skb);
-		ret = sk_psock_bpf_run(psock, prog, skb);
+		ret = bpf_prog_run_pin_on_cpu(prog, skb);
 		ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
 	}
 	sk_psock_verdict_apply(psock, skb, ret);
@@ -922,7 +916,7 @@ static int sk_psock_strp_parse(struct strparser *strp, struct sk_buff *skb)
 	prog = READ_ONCE(psock->progs.stream_parser);
 	if (likely(prog)) {
 		skb->sk = psock->sk;
-		ret = sk_psock_bpf_run(psock, prog, skb);
+		ret = bpf_prog_run_pin_on_cpu(prog, skb);
 		skb->sk = NULL;
 	}
 	rcu_read_unlock();
@@ -1019,7 +1013,7 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
 	if (likely(prog)) {
 		skb_dst_drop(skb);
 		skb_bpf_redirect_clear(skb);
-		ret = sk_psock_bpf_run(psock, prog, skb);
+		ret = bpf_prog_run_pin_on_cpu(prog, skb);
 		ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
 	}
 	sk_psock_verdict_apply(psock, skb, ret);
-- 
cgit v1.2.3


From ff9614b81be65d648ec4615b593c6e4b2dac6375 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 23 Feb 2021 10:49:34 -0800
Subject: skmsg: Remove unused sk_psock_stop() declaration

It is not defined or used anywhere.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/20210223184934.6054-10-xiyou.wangcong@gmail.com
---
 include/linux/skmsg.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 676d48e08159..6c09d94be2e9 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -400,7 +400,6 @@ static inline struct sk_psock *sk_psock_get(struct sock *sk)
 	return psock;
 }
 
-void sk_psock_stop(struct sock *sk, struct sk_psock *psock);
 void sk_psock_drop(struct sock *sk, struct sk_psock *psock);
 
 static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)
-- 
cgit v1.2.3


From 2854436612c4d2606c9246ce2976ab6634276337 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Thu, 25 Feb 2021 16:19:47 +0000
Subject: selftests/bpf: Propagate error code of the command to vmtest.sh

When vmtest.sh ran a command in a VM, it did not record or propagate the
error code of the command. This made the script less "script-able". The
script now saves the error code of the said command in a file in the VM,
copies the file back to the host and (when available) uses this error
code instead of its own.

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210225161947.1778590-1-kpsingh@kernel.org
---
 tools/testing/selftests/bpf/vmtest.sh | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh
index 26ae8d0b6ce3..22554894db99 100755
--- a/tools/testing/selftests/bpf/vmtest.sh
+++ b/tools/testing/selftests/bpf/vmtest.sh
@@ -17,6 +17,9 @@ KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vm
 KCONFIG_API_URL="https://api.github.com/repos/libbpf/libbpf/contents/travis-ci/vmtest/configs/latest.config"
 INDEX_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/INDEX"
 NUM_COMPILE_JOBS="$(nproc)"
+LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")"
+LOG_FILE="${LOG_FILE_BASE}.log"
+EXIT_STATUS_FILE="${LOG_FILE_BASE}.exit_status"
 
 usage()
 {
@@ -146,7 +149,6 @@ update_init_script()
 	local init_script_dir="${OUTPUT_DIR}/${MOUNT_DIR}/etc/rcS.d"
 	local init_script="${init_script_dir}/S50-startup"
 	local command="$1"
-	local log_file="$2"
 
 	mount_image
 
@@ -163,11 +165,16 @@ EOF
 	sudo bash -c "cat >${init_script}" <<EOF
 #!/bin/bash
 
+# Have a default value in the exit status file
+# incase the VM is forcefully stopped.
+echo "130" > "/root/${EXIT_STATUS_FILE}"
+
 {
 	cd /root/bpf
 	echo ${command}
 	stdbuf -oL -eL ${command}
-} 2>&1 | tee /root/${log_file}
+	echo "\$?" > "/root/${EXIT_STATUS_FILE}"
+} 2>&1 | tee "/root/${LOG_FILE}"
 poweroff -f
 EOF
 
@@ -221,10 +228,12 @@ EOF
 copy_logs()
 {
 	local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}"
-	local log_file="${mount_dir}/root/$1"
+	local log_file="${mount_dir}/root/${LOG_FILE}"
+	local exit_status_file="${mount_dir}/root/${EXIT_STATUS_FILE}"
 
 	mount_image
 	sudo cp ${log_file} "${OUTPUT_DIR}"
+	sudo cp ${exit_status_file} "${OUTPUT_DIR}"
 	sudo rm -f ${log_file}
 	unmount_image
 }
@@ -263,7 +272,6 @@ main()
 {
 	local script_dir="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
 	local kernel_checkout=$(realpath "${script_dir}"/../../../../)
-	local log_file="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S.log")"
 	# By default the script searches for the kernel in the checkout directory but
 	# it also obeys environment variables O= and KBUILD_OUTPUT=
 	local kernel_bzimage="${kernel_checkout}/${X86_BZIMAGE}"
@@ -347,19 +355,23 @@ main()
 	fi
 
 	update_selftests "${kernel_checkout}" "${make_command}"
-	update_init_script "${command}" "${log_file}"
+	update_init_script "${command}"
 	run_vm "${kernel_bzimage}"
-	copy_logs "${log_file}"
-	echo "Logs saved in ${OUTPUT_DIR}/${log_file}"
+	copy_logs
+	echo "Logs saved in ${OUTPUT_DIR}/${LOG_FILE}"
 }
 
 catch()
 {
 	local exit_code=$1
+	local exit_status_file="${OUTPUT_DIR}/${EXIT_STATUS_FILE}"
 	# This is just a cleanup and the directory may
 	# have already been unmounted. So, don't let this
 	# clobber the error code we intend to return.
 	unmount_image || true
+	if [[ -f "${exit_status_file}" ]]; then
+		exit_code="$(cat ${exit_status_file})"
+	fi
 	exit ${exit_code}
 }
 
-- 
cgit v1.2.3


From 86fd166575c38c17ecd3a6b8fb9c64fa19871486 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Wed, 24 Feb 2021 12:14:45 +0100
Subject: selftests/bpf: Copy extras in out-of-srctree builds

Building selftests in a separate directory like this:

    make O="$BUILD" -C tools/testing/selftests/bpf

and then running:

    cd "$BUILD" && ./test_progs -t btf

causes all the non-flavored btf_dump_test_case_*.c tests to fail,
because these files are not copied to where test_progs expects to find
them.

Fix by not skipping EXT-COPY when the original $(OUTPUT) is not empty
(lib.mk sets it to $(shell pwd) in that case) and using rsync instead
of cp: cp fails because e.g. urandom_read is being copied into itself,
and rsync simply skips such cases. rsync is already used by kselftests
and therefore is not a new dependency.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210224111445.102342-1-iii@linux.ibm.com
---
 tools/testing/selftests/bpf/Makefile | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 044bfdcf5b74..a81af15e4ded 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -382,11 +382,12 @@ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o:				\
 	$$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@)
 	$(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
 
-# only copy extra resources if in flavored build
+# non-flavored in-srctree builds receive special treatment, in particular, we
+# do not need to copy extra resources (see e.g. test_btf_dump_case())
 $(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT)
-ifneq ($2,)
+ifneq ($2:$(OUTPUT),:$(shell pwd))
 	$$(call msg,EXT-COPY,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES))
-	$(Q)cp -a $$^ $(TRUNNER_OUTPUT)/
+	$(Q)rsync -aq $$^ $(TRUNNER_OUTPUT)/
 endif
 
 $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS)			\
-- 
cgit v1.2.3


From efdb22de7dcd3b24b8154b3c3ba496f62afea00c Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Fri, 26 Feb 2021 12:49:20 -0800
Subject: bpf: Factor out visit_func_call_insn() in check_cfg()

During verifier check_cfg(), all instructions are
visited to ensure verifier can handle program control flows.
This patch factored out function visit_func_call_insn()
so it can be reused in later patch to visit callback function
calls. There is no functionality change for this patch.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210226204920.3884136-1-yhs@fb.com
---
 kernel/bpf/verifier.c | 35 +++++++++++++++++++++++------------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9336bac39027..b14fab7a7a16 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -8592,6 +8592,27 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
 	return DONE_EXPLORING;
 }
 
+static int visit_func_call_insn(int t, int insn_cnt,
+				struct bpf_insn *insns,
+				struct bpf_verifier_env *env,
+				bool visit_callee)
+{
+	int ret;
+
+	ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
+	if (ret)
+		return ret;
+
+	if (t + 1 < insn_cnt)
+		init_explored_state(env, t + 1);
+	if (visit_callee) {
+		init_explored_state(env, t);
+		ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
+				env, false);
+	}
+	return ret;
+}
+
 /* Visits the instruction at index t and returns one of the following:
  *  < 0 - an error occurred
  *  DONE_EXPLORING - the instruction was fully explored
@@ -8612,18 +8633,8 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
 		return DONE_EXPLORING;
 
 	case BPF_CALL:
-		ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
-		if (ret)
-			return ret;
-
-		if (t + 1 < insn_cnt)
-			init_explored_state(env, t + 1);
-		if (insns[t].src_reg == BPF_PSEUDO_CALL) {
-			init_explored_state(env, t);
-			ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
-					env, false);
-		}
-		return ret;
+		return visit_func_call_insn(t, insn_cnt, insns, env,
+					    insns[t].src_reg == BPF_PSEUDO_CALL);
 
 	case BPF_JA:
 		if (BPF_SRC(insns[t].code) != BPF_K)
-- 
cgit v1.2.3


From bc2591d63fc91bd5a9aaff145148a224d892bdb8 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Fri, 26 Feb 2021 12:49:22 -0800
Subject: bpf: Factor out verbose_invalid_scalar()

Factor out the function verbose_invalid_scalar() to verbose
print if a scalar is not in a tnum range. There is no
functionality change and the function will be used by
later patch which introduced bpf_for_each_map_elem().

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210226204922.3884375-1-yhs@fb.com
---
 kernel/bpf/verifier.c | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b14fab7a7a16..7194980c3ca4 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -390,6 +390,24 @@ __printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
 	env->prev_linfo = linfo;
 }
 
+static void verbose_invalid_scalar(struct bpf_verifier_env *env,
+				   struct bpf_reg_state *reg,
+				   struct tnum *range, const char *ctx,
+				   const char *reg_name)
+{
+	char tn_buf[48];
+
+	verbose(env, "At %s the register %s ", ctx, reg_name);
+	if (!tnum_is_unknown(reg->var_off)) {
+		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+		verbose(env, "has value %s", tn_buf);
+	} else {
+		verbose(env, "has unknown scalar value");
+	}
+	tnum_strn(tn_buf, sizeof(tn_buf), *range);
+	verbose(env, " should have been in %s\n", tn_buf);
+}
+
 static bool type_is_pkt_pointer(enum bpf_reg_type type)
 {
 	return type == PTR_TO_PACKET ||
@@ -8455,17 +8473,7 @@ static int check_return_code(struct bpf_verifier_env *env)
 	}
 
 	if (!tnum_in(range, reg->var_off)) {
-		char tn_buf[48];
-
-		verbose(env, "At program exit the register R0 ");
-		if (!tnum_is_unknown(reg->var_off)) {
-			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
-			verbose(env, "has value %s", tn_buf);
-		} else {
-			verbose(env, "has unknown scalar value");
-		}
-		tnum_strn(tn_buf, sizeof(tn_buf), range);
-		verbose(env, " should have been in %s\n", tn_buf);
+		verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
 		return -EINVAL;
 	}
 
-- 
cgit v1.2.3


From 1435137573f9c75455903e8cd01f84d6e092ea16 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Fri, 26 Feb 2021 12:49:23 -0800
Subject: bpf: Refactor check_func_call() to allow callback function

Later proposed bpf_for_each_map_elem() helper has callback
function as one of its arguments. This patch refactored
check_func_call() to permit callback function which sets
callee state. Different callback functions may have
different callee states.
There is no functionality change for this patch.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210226204923.3884627-1-yhs@fb.com
---
 kernel/bpf/verifier.c | 60 ++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 43 insertions(+), 17 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 7194980c3ca4..97e772f44cd7 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5249,13 +5249,19 @@ static void clear_caller_saved_regs(struct bpf_verifier_env *env,
 	}
 }
 
-static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
-			   int *insn_idx)
+typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
+				   struct bpf_func_state *caller,
+				   struct bpf_func_state *callee,
+				   int insn_idx);
+
+static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+			     int *insn_idx, int subprog,
+			     set_callee_state_fn set_callee_state_cb)
 {
 	struct bpf_verifier_state *state = env->cur_state;
 	struct bpf_func_info_aux *func_info_aux;
 	struct bpf_func_state *caller, *callee;
-	int i, err, subprog, target_insn;
+	int err;
 	bool is_global = false;
 
 	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
@@ -5264,14 +5270,6 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		return -E2BIG;
 	}
 
-	target_insn = *insn_idx + insn->imm;
-	subprog = find_subprog(env, target_insn + 1);
-	if (subprog < 0) {
-		verbose(env, "verifier bug. No program starts at insn %d\n",
-			target_insn + 1);
-		return -EFAULT;
-	}
-
 	caller = state->frame[state->curframe];
 	if (state->frame[state->curframe + 1]) {
 		verbose(env, "verifier bug. Frame %d already allocated\n",
@@ -5326,11 +5324,9 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	if (err)
 		return err;
 
-	/* copy r1 - r5 args that callee can access.  The copy includes parent
-	 * pointers, which connects us up to the liveness chain
-	 */
-	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
-		callee->regs[i] = caller->regs[i];
+	err = set_callee_state_cb(env, caller, callee, *insn_idx);
+	if (err)
+		return err;
 
 	clear_caller_saved_regs(env, caller->regs);
 
@@ -5338,7 +5334,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	state->curframe++;
 
 	/* and go analyze first insn of the callee */
-	*insn_idx = target_insn;
+	*insn_idx = env->subprog_info[subprog].start - 1;
 
 	if (env->log.level & BPF_LOG_LEVEL) {
 		verbose(env, "caller:\n");
@@ -5349,6 +5345,36 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	return 0;
 }
 
+static int set_callee_state(struct bpf_verifier_env *env,
+			    struct bpf_func_state *caller,
+			    struct bpf_func_state *callee, int insn_idx)
+{
+	int i;
+
+	/* copy r1 - r5 args that callee can access.  The copy includes parent
+	 * pointers, which connects us up to the liveness chain
+	 */
+	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
+		callee->regs[i] = caller->regs[i];
+	return 0;
+}
+
+static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+			   int *insn_idx)
+{
+	int subprog, target_insn;
+
+	target_insn = *insn_idx + insn->imm + 1;
+	subprog = find_subprog(env, target_insn);
+	if (subprog < 0) {
+		verbose(env, "verifier bug. No program starts at insn %d\n",
+			target_insn);
+		return -EFAULT;
+	}
+
+	return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
+}
+
 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
 {
 	struct bpf_verifier_state *state = env->cur_state;
-- 
cgit v1.2.3


From 282a0f46d6cda7cf843cd77c9b53b4d1d9e31302 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Fri, 26 Feb 2021 12:49:24 -0800
Subject: bpf: Change return value of verifier function add_subprog()

Currently, verifier function add_subprog() returns 0 for success
and negative value for failure. Change the return value
to be the subprog number for success. This functionality will be
used in the next patch to save a call to find_subprog().

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210226204924.3884848-1-yhs@fb.com
---
 kernel/bpf/verifier.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 97e772f44cd7..dbdca49ac6cc 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1530,7 +1530,7 @@ static int add_subprog(struct bpf_verifier_env *env, int off)
 	}
 	ret = find_subprog(env, off);
 	if (ret >= 0)
-		return 0;
+		return ret;
 	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
 		verbose(env, "too many subprograms\n");
 		return -E2BIG;
@@ -1538,7 +1538,7 @@ static int add_subprog(struct bpf_verifier_env *env, int off)
 	env->subprog_info[env->subprog_cnt++].start = off;
 	sort(env->subprog_info, env->subprog_cnt,
 	     sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
-	return 0;
+	return env->subprog_cnt - 1;
 }
 
 static int check_subprogs(struct bpf_verifier_env *env)
-- 
cgit v1.2.3


From 69c087ba6225b574afb6e505b72cb75242a3d844 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Fri, 26 Feb 2021 12:49:25 -0800
Subject: bpf: Add bpf_for_each_map_elem() helper

The bpf_for_each_map_elem() helper is introduced which
iterates all map elements with a callback function. The
helper signature looks like
  long bpf_for_each_map_elem(map, callback_fn, callback_ctx, flags)
and for each map element, the callback_fn will be called. For example,
like hashmap, the callback signature may look like
  long callback_fn(map, key, val, callback_ctx)

There are two known use cases for this. One is from upstream ([1]) where
a for_each_map_elem helper may help implement a timeout mechanism
in a more generic way. Another is from our internal discussion
for a firewall use case where a map contains all the rules. The packet
data can be compared to all these rules to decide allow or deny
the packet.

For array maps, users can already use a bounded loop to traverse
elements. Using this helper can avoid using bounded loop. For other
type of maps (e.g., hash maps) where bounded loop is hard or
impossible to use, this helper provides a convenient way to
operate on all elements.

For callback_fn, besides map and map element, a callback_ctx,
allocated on caller stack, is also passed to the callback
function. This callback_ctx argument can provide additional
input and allow to write to caller stack for output.

If the callback_fn returns 0, the helper will iterate through next
element if available. If the callback_fn returns 1, the helper
will stop iterating and returns to the bpf program. Other return
values are not used for now.

Currently, this helper is only available with jit. It is possible
to make it work with interpreter with so effort but I leave it
as the future work.

[1]: https://lore.kernel.org/bpf/20210122205415.113822-1-xiyou.wangcong@gmail.com/

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210226204925.3884923-1-yhs@fb.com
---
 include/linux/bpf.h            |  13 +++
 include/linux/bpf_verifier.h   |   3 +
 include/uapi/linux/bpf.h       |  38 ++++++++
 kernel/bpf/bpf_iter.c          |  16 ++++
 kernel/bpf/helpers.c           |   2 +
 kernel/bpf/verifier.c          | 208 ++++++++++++++++++++++++++++++++++++++---
 kernel/trace/bpf_trace.c       |   2 +
 tools/include/uapi/linux/bpf.h |  38 ++++++++
 8 files changed, 307 insertions(+), 13 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e1e4d2f60527..aeb1b93a4d75 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -39,6 +39,7 @@ struct bpf_local_storage;
 struct bpf_local_storage_map;
 struct kobject;
 struct mem_cgroup;
+struct bpf_func_state;
 
 extern struct idr btf_idr;
 extern spinlock_t btf_idr_lock;
@@ -129,6 +130,13 @@ struct bpf_map_ops {
 	bool (*map_meta_equal)(const struct bpf_map *meta0,
 			       const struct bpf_map *meta1);
 
+
+	int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env,
+					      struct bpf_func_state *caller,
+					      struct bpf_func_state *callee);
+	int (*map_for_each_callback)(struct bpf_map *map, void *callback_fn,
+				     void *callback_ctx, u64 flags);
+
 	/* BTF name and id of struct allocated by map_alloc */
 	const char * const map_btf_name;
 	int *map_btf_id;
@@ -295,6 +303,8 @@ enum bpf_arg_type {
 	ARG_CONST_ALLOC_SIZE_OR_ZERO,	/* number of allocated bytes requested */
 	ARG_PTR_TO_BTF_ID_SOCK_COMMON,	/* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
 	ARG_PTR_TO_PERCPU_BTF_ID,	/* pointer to in-kernel percpu type */
+	ARG_PTR_TO_FUNC,	/* pointer to a bpf program function */
+	ARG_PTR_TO_STACK_OR_NULL,	/* pointer to stack or NULL */
 	__BPF_ARG_TYPE_MAX,
 };
 
@@ -411,6 +421,8 @@ enum bpf_reg_type {
 	PTR_TO_RDWR_BUF,	 /* reg points to a read/write buffer */
 	PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
 	PTR_TO_PERCPU_BTF_ID,	 /* reg points to a percpu kernel variable */
+	PTR_TO_FUNC,		 /* reg points to a bpf program function */
+	PTR_TO_MAP_KEY,		 /* reg points to a map element key */
 };
 
 /* The information passed from prog-specific *_is_valid_access
@@ -1887,6 +1899,7 @@ extern const struct bpf_func_proto bpf_sock_from_file_proto;
 extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto;
 extern const struct bpf_func_proto bpf_task_storage_get_proto;
 extern const struct bpf_func_proto bpf_task_storage_delete_proto;
+extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
 
 const struct bpf_func_proto *bpf_tracing_func_proto(
 	enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 971b33aca13d..51c2ffa3d901 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -68,6 +68,8 @@ struct bpf_reg_state {
 			unsigned long raw1;
 			unsigned long raw2;
 		} raw;
+
+		u32 subprogno; /* for PTR_TO_FUNC */
 	};
 	/* For PTR_TO_PACKET, used to find other pointers with the same variable
 	 * offset, so they can share range knowledge.
@@ -204,6 +206,7 @@ struct bpf_func_state {
 	int acquired_refs;
 	struct bpf_reference_state *refs;
 	int allocated_stack;
+	bool in_callback_fn;
 	struct bpf_stack_state *stack;
 };
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 79c893310492..b89af20cfa19 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -393,6 +393,15 @@ enum bpf_link_type {
  *                   is struct/union.
  */
 #define BPF_PSEUDO_BTF_ID	3
+/* insn[0].src_reg:  BPF_PSEUDO_FUNC
+ * insn[0].imm:      insn offset to the func
+ * insn[1].imm:      0
+ * insn[0].off:      0
+ * insn[1].off:      0
+ * ldimm64 rewrite:  address of the function
+ * verifier type:    PTR_TO_FUNC.
+ */
+#define BPF_PSEUDO_FUNC		4
 
 /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
  * offset to another bpf function
@@ -3909,6 +3918,34 @@ union bpf_attr {
  *		* **BPF_MTU_CHK_RET_FRAG_NEEDED**
  *		* **BPF_MTU_CHK_RET_SEGS_TOOBIG**
  *
+ * long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags)
+ *	Description
+ *		For each element in **map**, call **callback_fn** function with
+ *		**map**, **callback_ctx** and other map-specific parameters.
+ *		The **callback_fn** should be a static function and
+ *		the **callback_ctx** should be a pointer to the stack.
+ *		The **flags** is used to control certain aspects of the helper.
+ *		Currently, the **flags** must be 0.
+ *
+ *		The following are a list of supported map types and their
+ *		respective expected callback signatures:
+ *
+ *		BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH,
+ *		BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH,
+ *		BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY
+ *
+ *		long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx);
+ *
+ *		For per_cpu maps, the map_value is the value on the cpu where the
+ *		bpf_prog is running.
+ *
+ *		If **callback_fn** return 0, the helper will continue to the next
+ *		element. If return value is 1, the helper will skip the rest of
+ *		elements and return. Other return values are not used now.
+ *
+ *	Return
+ *		The number of traversed map elements for success, **-EINVAL** for
+ *		invalid **flags**.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -4075,6 +4112,7 @@ union bpf_attr {
 	FN(ima_inode_hash),		\
 	FN(sock_from_file),		\
 	FN(check_mtu),			\
+	FN(for_each_map_elem),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index a0d9eade9c80..931870f9cf56 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -675,3 +675,19 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
 	 */
 	return ret == 0 ? 0 : -EAGAIN;
 }
+
+BPF_CALL_4(bpf_for_each_map_elem, struct bpf_map *, map, void *, callback_fn,
+	   void *, callback_ctx, u64, flags)
+{
+	return map->ops->map_for_each_callback(map, callback_fn, callback_ctx, flags);
+}
+
+const struct bpf_func_proto bpf_for_each_map_elem_proto = {
+	.func		= bpf_for_each_map_elem,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_CONST_MAP_PTR,
+	.arg2_type	= ARG_PTR_TO_FUNC,
+	.arg3_type	= ARG_PTR_TO_STACK_OR_NULL,
+	.arg4_type	= ARG_ANYTHING,
+};
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 308427fe03a3..074800226327 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -708,6 +708,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_ringbuf_discard_proto;
 	case BPF_FUNC_ringbuf_query:
 		return &bpf_ringbuf_query_proto;
+	case BPF_FUNC_for_each_map_elem:
+		return &bpf_for_each_map_elem_proto;
 	default:
 		break;
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index dbdca49ac6cc..53afe9461b03 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -234,6 +234,12 @@ static bool bpf_pseudo_call(const struct bpf_insn *insn)
 	       insn->src_reg == BPF_PSEUDO_CALL;
 }
 
+static bool bpf_pseudo_func(const struct bpf_insn *insn)
+{
+	return insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
+	       insn->src_reg == BPF_PSEUDO_FUNC;
+}
+
 struct bpf_call_arg_meta {
 	struct bpf_map *map_ptr;
 	bool raw_mode;
@@ -248,6 +254,7 @@ struct bpf_call_arg_meta {
 	u32 btf_id;
 	struct btf *ret_btf;
 	u32 ret_btf_id;
+	u32 subprogno;
 };
 
 struct btf *btf_vmlinux;
@@ -427,6 +434,7 @@ static bool reg_type_not_null(enum bpf_reg_type type)
 	return type == PTR_TO_SOCKET ||
 		type == PTR_TO_TCP_SOCK ||
 		type == PTR_TO_MAP_VALUE ||
+		type == PTR_TO_MAP_KEY ||
 		type == PTR_TO_SOCK_COMMON;
 }
 
@@ -469,7 +477,8 @@ static bool arg_type_may_be_null(enum bpf_arg_type type)
 	       type == ARG_PTR_TO_MEM_OR_NULL ||
 	       type == ARG_PTR_TO_CTX_OR_NULL ||
 	       type == ARG_PTR_TO_SOCKET_OR_NULL ||
-	       type == ARG_PTR_TO_ALLOC_MEM_OR_NULL;
+	       type == ARG_PTR_TO_ALLOC_MEM_OR_NULL ||
+	       type == ARG_PTR_TO_STACK_OR_NULL;
 }
 
 /* Determine whether the function releases some resources allocated by another
@@ -552,6 +561,8 @@ static const char * const reg_type_str[] = {
 	[PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null",
 	[PTR_TO_RDWR_BUF]	= "rdwr_buf",
 	[PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null",
+	[PTR_TO_FUNC]		= "func",
+	[PTR_TO_MAP_KEY]	= "map_key",
 };
 
 static char slot_type_char[] = {
@@ -623,6 +634,7 @@ static void print_verifier_state(struct bpf_verifier_env *env,
 			if (type_is_pkt_pointer(t))
 				verbose(env, ",r=%d", reg->range);
 			else if (t == CONST_PTR_TO_MAP ||
+				 t == PTR_TO_MAP_KEY ||
 				 t == PTR_TO_MAP_VALUE ||
 				 t == PTR_TO_MAP_VALUE_OR_NULL)
 				verbose(env, ",ks=%d,vs=%d",
@@ -1555,6 +1567,19 @@ static int check_subprogs(struct bpf_verifier_env *env)
 
 	/* determine subprog starts. The end is one before the next starts */
 	for (i = 0; i < insn_cnt; i++) {
+		if (bpf_pseudo_func(insn + i)) {
+			if (!env->bpf_capable) {
+				verbose(env,
+					"function pointers are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
+				return -EPERM;
+			}
+			ret = add_subprog(env, i + insn[i].imm + 1);
+			if (ret < 0)
+				return ret;
+			/* remember subprog */
+			insn[i + 1].imm = ret;
+			continue;
+		}
 		if (!bpf_pseudo_call(insn + i))
 			continue;
 		if (!env->bpf_capable) {
@@ -2286,6 +2311,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
 	case PTR_TO_PERCPU_BTF_ID:
 	case PTR_TO_MEM:
 	case PTR_TO_MEM_OR_NULL:
+	case PTR_TO_FUNC:
+	case PTR_TO_MAP_KEY:
 		return true;
 	default:
 		return false;
@@ -2890,6 +2917,10 @@ static int __check_mem_access(struct bpf_verifier_env *env, int regno,
 
 	reg = &cur_regs(env)[regno];
 	switch (reg->type) {
+	case PTR_TO_MAP_KEY:
+		verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
+			mem_size, off, size);
+		break;
 	case PTR_TO_MAP_VALUE:
 		verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
 			mem_size, off, size);
@@ -3295,6 +3326,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
 	case PTR_TO_FLOW_KEYS:
 		pointer_desc = "flow keys ";
 		break;
+	case PTR_TO_MAP_KEY:
+		pointer_desc = "key ";
+		break;
 	case PTR_TO_MAP_VALUE:
 		pointer_desc = "value ";
 		break;
@@ -3396,7 +3430,7 @@ process_func:
 continue_func:
 	subprog_end = subprog[idx + 1].start;
 	for (; i < subprog_end; i++) {
-		if (!bpf_pseudo_call(insn + i))
+		if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
 			continue;
 		/* remember insn and function to return to */
 		ret_insn[frame] = i + 1;
@@ -3833,7 +3867,19 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 	/* for access checks, reg->off is just part of off */
 	off += reg->off;
 
-	if (reg->type == PTR_TO_MAP_VALUE) {
+	if (reg->type == PTR_TO_MAP_KEY) {
+		if (t == BPF_WRITE) {
+			verbose(env, "write to change key R%d not allowed\n", regno);
+			return -EACCES;
+		}
+
+		err = check_mem_region_access(env, regno, off, size,
+					      reg->map_ptr->key_size, false);
+		if (err)
+			return err;
+		if (value_regno >= 0)
+			mark_reg_unknown(env, regs, value_regno);
+	} else if (reg->type == PTR_TO_MAP_VALUE) {
 		if (t == BPF_WRITE && value_regno >= 0 &&
 		    is_pointer_value(env, value_regno)) {
 			verbose(env, "R%d leaks addr into map\n", value_regno);
@@ -4249,6 +4295,9 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
 	case PTR_TO_PACKET_META:
 		return check_packet_access(env, regno, reg->off, access_size,
 					   zero_size_allowed);
+	case PTR_TO_MAP_KEY:
+		return check_mem_region_access(env, regno, reg->off, access_size,
+					       reg->map_ptr->key_size, false);
 	case PTR_TO_MAP_VALUE:
 		if (check_map_access_type(env, regno, reg->off, access_size,
 					  meta && meta->raw_mode ? BPF_WRITE :
@@ -4465,6 +4514,7 @@ static const struct bpf_reg_types map_key_value_types = {
 		PTR_TO_STACK,
 		PTR_TO_PACKET,
 		PTR_TO_PACKET_META,
+		PTR_TO_MAP_KEY,
 		PTR_TO_MAP_VALUE,
 	},
 };
@@ -4496,6 +4546,7 @@ static const struct bpf_reg_types mem_types = {
 		PTR_TO_STACK,
 		PTR_TO_PACKET,
 		PTR_TO_PACKET_META,
+		PTR_TO_MAP_KEY,
 		PTR_TO_MAP_VALUE,
 		PTR_TO_MEM,
 		PTR_TO_RDONLY_BUF,
@@ -4508,6 +4559,7 @@ static const struct bpf_reg_types int_ptr_types = {
 		PTR_TO_STACK,
 		PTR_TO_PACKET,
 		PTR_TO_PACKET_META,
+		PTR_TO_MAP_KEY,
 		PTR_TO_MAP_VALUE,
 	},
 };
@@ -4520,6 +4572,8 @@ static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_T
 static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
 static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
 static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
+static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
+static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
 
 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
 	[ARG_PTR_TO_MAP_KEY]		= &map_key_value_types,
@@ -4548,6 +4602,8 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
 	[ARG_PTR_TO_INT]		= &int_ptr_types,
 	[ARG_PTR_TO_LONG]		= &int_ptr_types,
 	[ARG_PTR_TO_PERCPU_BTF_ID]	= &percpu_btf_ptr_types,
+	[ARG_PTR_TO_FUNC]		= &func_ptr_types,
+	[ARG_PTR_TO_STACK_OR_NULL]	= &stack_ptr_types,
 };
 
 static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
@@ -4729,6 +4785,8 @@ skip_type_check:
 			verbose(env, "verifier internal error\n");
 			return -EFAULT;
 		}
+	} else if (arg_type == ARG_PTR_TO_FUNC) {
+		meta->subprogno = reg->subprogno;
 	} else if (arg_type_is_mem_ptr(arg_type)) {
 		/* The access to this pointer is only checked when we hit the
 		 * next is_mem_size argument below.
@@ -5375,6 +5433,35 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
 }
 
+static int set_map_elem_callback_state(struct bpf_verifier_env *env,
+				       struct bpf_func_state *caller,
+				       struct bpf_func_state *callee,
+				       int insn_idx)
+{
+	struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
+	struct bpf_map *map;
+	int err;
+
+	if (bpf_map_ptr_poisoned(insn_aux)) {
+		verbose(env, "tail_call abusing map_ptr\n");
+		return -EINVAL;
+	}
+
+	map = BPF_MAP_PTR(insn_aux->map_ptr_state);
+	if (!map->ops->map_set_for_each_callback_args ||
+	    !map->ops->map_for_each_callback) {
+		verbose(env, "callback function not allowed for map\n");
+		return -ENOTSUPP;
+	}
+
+	err = map->ops->map_set_for_each_callback_args(env, caller, callee);
+	if (err)
+		return err;
+
+	callee->in_callback_fn = true;
+	return 0;
+}
+
 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
 {
 	struct bpf_verifier_state *state = env->cur_state;
@@ -5397,8 +5484,22 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
 
 	state->curframe--;
 	caller = state->frame[state->curframe];
-	/* return to the caller whatever r0 had in the callee */
-	caller->regs[BPF_REG_0] = *r0;
+	if (callee->in_callback_fn) {
+		/* enforce R0 return value range [0, 1]. */
+		struct tnum range = tnum_range(0, 1);
+
+		if (r0->type != SCALAR_VALUE) {
+			verbose(env, "R0 not a scalar value\n");
+			return -EACCES;
+		}
+		if (!tnum_in(range, r0->var_off)) {
+			verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
+			return -EINVAL;
+		}
+	} else {
+		/* return to the caller whatever r0 had in the callee */
+		caller->regs[BPF_REG_0] = *r0;
+	}
 
 	/* Transfer references to the caller */
 	err = transfer_reference_state(caller, callee);
@@ -5453,7 +5554,8 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
 	    func_id != BPF_FUNC_map_delete_elem &&
 	    func_id != BPF_FUNC_map_push_elem &&
 	    func_id != BPF_FUNC_map_pop_elem &&
-	    func_id != BPF_FUNC_map_peek_elem)
+	    func_id != BPF_FUNC_map_peek_elem &&
+	    func_id != BPF_FUNC_for_each_map_elem)
 		return 0;
 
 	if (map == NULL) {
@@ -5534,15 +5636,18 @@ static int check_reference_leak(struct bpf_verifier_env *env)
 	return state->acquired_refs ? -EINVAL : 0;
 }
 
-static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
+static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+			     int *insn_idx_p)
 {
 	const struct bpf_func_proto *fn = NULL;
 	struct bpf_reg_state *regs;
 	struct bpf_call_arg_meta meta;
+	int insn_idx = *insn_idx_p;
 	bool changes_data;
-	int i, err;
+	int i, err, func_id;
 
 	/* find function prototype */
+	func_id = insn->imm;
 	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
 		verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
 			func_id);
@@ -5638,6 +5743,13 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 		return -EINVAL;
 	}
 
+	if (func_id == BPF_FUNC_for_each_map_elem) {
+		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
+					set_map_elem_callback_state);
+		if (err < 0)
+			return -EINVAL;
+	}
+
 	/* reset caller saved regs */
 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
 		mark_reg_not_init(env, regs, caller_saved[i]);
@@ -5891,6 +6003,19 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
 		else
 			*ptr_limit = -off;
 		return 0;
+	case PTR_TO_MAP_KEY:
+		/* Currently, this code is not exercised as the only use
+		 * is bpf_for_each_map_elem() helper which requires
+		 * bpf_capble. The code has been tested manually for
+		 * future use.
+		 */
+		if (mask_to_left) {
+			*ptr_limit = ptr_reg->umax_value + ptr_reg->off;
+		} else {
+			off = ptr_reg->smin_value + ptr_reg->off;
+			*ptr_limit = ptr_reg->map_ptr->key_size - off;
+		}
+		return 0;
 	case PTR_TO_MAP_VALUE:
 		if (mask_to_left) {
 			*ptr_limit = ptr_reg->umax_value + ptr_reg->off;
@@ -6092,6 +6217,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
 			dst, reg_type_str[ptr_reg->type]);
 		return -EACCES;
+	case PTR_TO_MAP_KEY:
 	case PTR_TO_MAP_VALUE:
 		if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) {
 			verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n",
@@ -8271,6 +8397,24 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
 		return 0;
 	}
 
+	if (insn->src_reg == BPF_PSEUDO_FUNC) {
+		struct bpf_prog_aux *aux = env->prog->aux;
+		u32 subprogno = insn[1].imm;
+
+		if (!aux->func_info) {
+			verbose(env, "missing btf func_info\n");
+			return -EINVAL;
+		}
+		if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
+			verbose(env, "callback function not static\n");
+			return -EINVAL;
+		}
+
+		dst_reg->type = PTR_TO_FUNC;
+		dst_reg->subprogno = subprogno;
+		return 0;
+	}
+
 	map = env->used_maps[aux->map_index];
 	mark_reg_known_zero(env, regs, insn->dst_reg);
 	dst_reg->map_ptr = map;
@@ -8657,6 +8801,9 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
 	struct bpf_insn *insns = env->prog->insnsi;
 	int ret;
 
+	if (bpf_pseudo_func(insns + t))
+		return visit_func_call_insn(t, insn_cnt, insns, env, true);
+
 	/* All non-branch instructions have a single fall-through edge. */
 	if (BPF_CLASS(insns[t].code) != BPF_JMP &&
 	    BPF_CLASS(insns[t].code) != BPF_JMP32)
@@ -9277,6 +9424,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
 			 */
 			return false;
 		}
+	case PTR_TO_MAP_KEY:
 	case PTR_TO_MAP_VALUE:
 		/* If the new min/max/var_off satisfy the old ones and
 		 * everything else matches, we are OK.
@@ -10123,10 +10271,9 @@ static int do_check(struct bpf_verifier_env *env)
 				if (insn->src_reg == BPF_PSEUDO_CALL)
 					err = check_func_call(env, insn, &env->insn_idx);
 				else
-					err = check_helper_call(env, insn->imm, env->insn_idx);
+					err = check_helper_call(env, insn, &env->insn_idx);
 				if (err)
 					return err;
-
 			} else if (opcode == BPF_JA) {
 				if (BPF_SRC(insn->code) != BPF_K ||
 				    insn->imm != 0 ||
@@ -10555,6 +10702,12 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
 				goto next_insn;
 			}
 
+			if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
+				aux = &env->insn_aux_data[i];
+				aux->ptr_type = PTR_TO_FUNC;
+				goto next_insn;
+			}
+
 			/* In final convert_pseudo_ld_imm64() step, this is
 			 * converted into regular 64-bit imm load insn.
 			 */
@@ -10687,9 +10840,13 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
 	int insn_cnt = env->prog->len;
 	int i;
 
-	for (i = 0; i < insn_cnt; i++, insn++)
-		if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
-			insn->src_reg = 0;
+	for (i = 0; i < insn_cnt; i++, insn++) {
+		if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
+			continue;
+		if (insn->src_reg == BPF_PSEUDO_FUNC)
+			continue;
+		insn->src_reg = 0;
+	}
 }
 
 /* single env->prog->insni[off] instruction was replaced with the range
@@ -11330,6 +11487,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 		return 0;
 
 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
+		if (bpf_pseudo_func(insn)) {
+			env->insn_aux_data[i].call_imm = insn->imm;
+			/* subprog is encoded in insn[1].imm */
+			continue;
+		}
+
 		if (!bpf_pseudo_call(insn))
 			continue;
 		/* Upon error here we cannot fall back to interpreter but
@@ -11459,6 +11622,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 	for (i = 0; i < env->subprog_cnt; i++) {
 		insn = func[i]->insnsi;
 		for (j = 0; j < func[i]->len; j++, insn++) {
+			if (bpf_pseudo_func(insn)) {
+				subprog = insn[1].imm;
+				insn[0].imm = (u32)(long)func[subprog]->bpf_func;
+				insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
+				continue;
+			}
 			if (!bpf_pseudo_call(insn))
 				continue;
 			subprog = insn->off;
@@ -11504,6 +11673,11 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 	 * later look the same as if they were interpreted only.
 	 */
 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
+		if (bpf_pseudo_func(insn)) {
+			insn[0].imm = env->insn_aux_data[i].call_imm;
+			insn[1].imm = find_subprog(env, i + insn[0].imm + 1);
+			continue;
+		}
 		if (!bpf_pseudo_call(insn))
 			continue;
 		insn->off = env->insn_aux_data[i].call_imm;
@@ -11568,6 +11742,14 @@ static int fixup_call_args(struct bpf_verifier_env *env)
 		return -EINVAL;
 	}
 	for (i = 0; i < prog->len; i++, insn++) {
+		if (bpf_pseudo_func(insn)) {
+			/* When JIT fails the progs with callback calls
+			 * have to be rejected, since interpreter doesn't support them yet.
+			 */
+			verbose(env, "callbacks are not allowed in non-JITed programs\n");
+			return -EINVAL;
+		}
+
 		if (!bpf_pseudo_call(insn))
 			continue;
 		depth = get_callee_stack_depth(env, insn, i);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index e9701744d8e4..0d23755c2747 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1371,6 +1371,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_task_storage_get_proto;
 	case BPF_FUNC_task_storage_delete:
 		return &bpf_task_storage_delete_proto;
+	case BPF_FUNC_for_each_map_elem:
+		return &bpf_for_each_map_elem_proto;
 	default:
 		return NULL;
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 79c893310492..b89af20cfa19 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -393,6 +393,15 @@ enum bpf_link_type {
  *                   is struct/union.
  */
 #define BPF_PSEUDO_BTF_ID	3
+/* insn[0].src_reg:  BPF_PSEUDO_FUNC
+ * insn[0].imm:      insn offset to the func
+ * insn[1].imm:      0
+ * insn[0].off:      0
+ * insn[1].off:      0
+ * ldimm64 rewrite:  address of the function
+ * verifier type:    PTR_TO_FUNC.
+ */
+#define BPF_PSEUDO_FUNC		4
 
 /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
  * offset to another bpf function
@@ -3909,6 +3918,34 @@ union bpf_attr {
  *		* **BPF_MTU_CHK_RET_FRAG_NEEDED**
  *		* **BPF_MTU_CHK_RET_SEGS_TOOBIG**
  *
+ * long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags)
+ *	Description
+ *		For each element in **map**, call **callback_fn** function with
+ *		**map**, **callback_ctx** and other map-specific parameters.
+ *		The **callback_fn** should be a static function and
+ *		the **callback_ctx** should be a pointer to the stack.
+ *		The **flags** is used to control certain aspects of the helper.
+ *		Currently, the **flags** must be 0.
+ *
+ *		The following are a list of supported map types and their
+ *		respective expected callback signatures:
+ *
+ *		BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH,
+ *		BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH,
+ *		BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY
+ *
+ *		long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx);
+ *
+ *		For per_cpu maps, the map_value is the value on the cpu where the
+ *		bpf_prog is running.
+ *
+ *		If **callback_fn** return 0, the helper will continue to the next
+ *		element. If return value is 1, the helper will skip the rest of
+ *		elements and return. Other return values are not used now.
+ *
+ *	Return
+ *		The number of traversed map elements for success, **-EINVAL** for
+ *		invalid **flags**.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -4075,6 +4112,7 @@ union bpf_attr {
 	FN(ima_inode_hash),		\
 	FN(sock_from_file),		\
 	FN(check_mtu),			\
+	FN(for_each_map_elem),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 314ee05e2fc601a7bece14376547d2b7a04bab67 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Fri, 26 Feb 2021 12:49:27 -0800
Subject: bpf: Add hashtab support for bpf_for_each_map_elem() helper

This patch added support for hashmap, percpu hashmap,
lru hashmap and percpu lru hashmap.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210226204927.3885020-1-yhs@fb.com
---
 include/linux/bpf.h   |  4 ++++
 kernel/bpf/hashtab.c  | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/verifier.c | 27 +++++++++++++++++++++
 3 files changed, 96 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index aeb1b93a4d75..4c730863fa77 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1397,6 +1397,10 @@ void bpf_iter_map_show_fdinfo(const struct bpf_iter_aux_info *aux,
 int bpf_iter_map_fill_link_info(const struct bpf_iter_aux_info *aux,
 				struct bpf_link_info *info);
 
+int map_set_for_each_callback_args(struct bpf_verifier_env *env,
+				   struct bpf_func_state *caller,
+				   struct bpf_func_state *callee);
+
 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
 int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index d63912e73ad9..330d721dd2af 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1869,6 +1869,63 @@ static const struct bpf_iter_seq_info iter_seq_info = {
 	.seq_priv_size		= sizeof(struct bpf_iter_seq_hash_map_info),
 };
 
+static int bpf_for_each_hash_elem(struct bpf_map *map, void *callback_fn,
+				  void *callback_ctx, u64 flags)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	struct hlist_nulls_head *head;
+	struct hlist_nulls_node *n;
+	struct htab_elem *elem;
+	u32 roundup_key_size;
+	int i, num_elems = 0;
+	void __percpu *pptr;
+	struct bucket *b;
+	void *key, *val;
+	bool is_percpu;
+	u64 ret = 0;
+
+	if (flags != 0)
+		return -EINVAL;
+
+	is_percpu = htab_is_percpu(htab);
+
+	roundup_key_size = round_up(map->key_size, 8);
+	/* disable migration so percpu value prepared here will be the
+	 * same as the one seen by the bpf program with bpf_map_lookup_elem().
+	 */
+	if (is_percpu)
+		migrate_disable();
+	for (i = 0; i < htab->n_buckets; i++) {
+		b = &htab->buckets[i];
+		rcu_read_lock();
+		head = &b->head;
+		hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) {
+			key = elem->key;
+			if (is_percpu) {
+				/* current cpu value for percpu map */
+				pptr = htab_elem_get_ptr(elem, map->key_size);
+				val = this_cpu_ptr(pptr);
+			} else {
+				val = elem->key + roundup_key_size;
+			}
+			num_elems++;
+			ret = BPF_CAST_CALL(callback_fn)((u64)(long)map,
+					(u64)(long)key, (u64)(long)val,
+					(u64)(long)callback_ctx, 0);
+			/* return value: 0 - continue, 1 - stop and return */
+			if (ret) {
+				rcu_read_unlock();
+				goto out;
+			}
+		}
+		rcu_read_unlock();
+	}
+out:
+	if (is_percpu)
+		migrate_enable();
+	return num_elems;
+}
+
 static int htab_map_btf_id;
 const struct bpf_map_ops htab_map_ops = {
 	.map_meta_equal = bpf_map_meta_equal,
@@ -1881,6 +1938,8 @@ const struct bpf_map_ops htab_map_ops = {
 	.map_delete_elem = htab_map_delete_elem,
 	.map_gen_lookup = htab_map_gen_lookup,
 	.map_seq_show_elem = htab_map_seq_show_elem,
+	.map_set_for_each_callback_args = map_set_for_each_callback_args,
+	.map_for_each_callback = bpf_for_each_hash_elem,
 	BATCH_OPS(htab),
 	.map_btf_name = "bpf_htab",
 	.map_btf_id = &htab_map_btf_id,
@@ -1900,6 +1959,8 @@ const struct bpf_map_ops htab_lru_map_ops = {
 	.map_delete_elem = htab_lru_map_delete_elem,
 	.map_gen_lookup = htab_lru_map_gen_lookup,
 	.map_seq_show_elem = htab_map_seq_show_elem,
+	.map_set_for_each_callback_args = map_set_for_each_callback_args,
+	.map_for_each_callback = bpf_for_each_hash_elem,
 	BATCH_OPS(htab_lru),
 	.map_btf_name = "bpf_htab",
 	.map_btf_id = &htab_lru_map_btf_id,
@@ -2019,6 +2080,8 @@ const struct bpf_map_ops htab_percpu_map_ops = {
 	.map_update_elem = htab_percpu_map_update_elem,
 	.map_delete_elem = htab_map_delete_elem,
 	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
+	.map_set_for_each_callback_args = map_set_for_each_callback_args,
+	.map_for_each_callback = bpf_for_each_hash_elem,
 	BATCH_OPS(htab_percpu),
 	.map_btf_name = "bpf_htab",
 	.map_btf_id = &htab_percpu_map_btf_id,
@@ -2036,6 +2099,8 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
 	.map_update_elem = htab_lru_percpu_map_update_elem,
 	.map_delete_elem = htab_lru_map_delete_elem,
 	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
+	.map_set_for_each_callback_args = map_set_for_each_callback_args,
+	.map_for_each_callback = bpf_for_each_hash_elem,
 	BATCH_OPS(htab_lru_percpu),
 	.map_btf_name = "bpf_htab",
 	.map_btf_id = &htab_lru_percpu_map_btf_id,
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 53afe9461b03..9fe90ce52a65 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5403,6 +5403,33 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 	return 0;
 }
 
+int map_set_for_each_callback_args(struct bpf_verifier_env *env,
+				   struct bpf_func_state *caller,
+				   struct bpf_func_state *callee)
+{
+	/* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
+	 *      void *callback_ctx, u64 flags);
+	 * callback_fn(struct bpf_map *map, void *key, void *value,
+	 *      void *callback_ctx);
+	 */
+	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
+
+	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
+	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
+	callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
+
+	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
+	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
+	callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
+
+	/* pointer to stack or null */
+	callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
+
+	/* unused */
+	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+	return 0;
+}
+
 static int set_callee_state(struct bpf_verifier_env *env,
 			    struct bpf_func_state *caller,
 			    struct bpf_func_state *callee, int insn_idx)
-- 
cgit v1.2.3


From 06dcdcd4b9e84ee78d865c928b1d43bb71829251 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Fri, 26 Feb 2021 12:49:28 -0800
Subject: bpf: Add arraymap support for bpf_for_each_map_elem() helper

This patch added support for arraymap and percpu arraymap.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210226204928.3885192-1-yhs@fb.com
---
 kernel/bpf/arraymap.c | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 1f8453343bf2..463d25e1e67e 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -625,6 +625,42 @@ static const struct bpf_iter_seq_info iter_seq_info = {
 	.seq_priv_size		= sizeof(struct bpf_iter_seq_array_map_info),
 };
 
+static int bpf_for_each_array_elem(struct bpf_map *map, void *callback_fn,
+				   void *callback_ctx, u64 flags)
+{
+	u32 i, key, num_elems = 0;
+	struct bpf_array *array;
+	bool is_percpu;
+	u64 ret = 0;
+	void *val;
+
+	if (flags != 0)
+		return -EINVAL;
+
+	is_percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
+	array = container_of(map, struct bpf_array, map);
+	if (is_percpu)
+		migrate_disable();
+	for (i = 0; i < map->max_entries; i++) {
+		if (is_percpu)
+			val = this_cpu_ptr(array->pptrs[i]);
+		else
+			val = array->value + array->elem_size * i;
+		num_elems++;
+		key = i;
+		ret = BPF_CAST_CALL(callback_fn)((u64)(long)map,
+					(u64)(long)&key, (u64)(long)val,
+					(u64)(long)callback_ctx, 0);
+		/* return value: 0 - continue, 1 - stop and return */
+		if (ret)
+			break;
+	}
+
+	if (is_percpu)
+		migrate_enable();
+	return num_elems;
+}
+
 static int array_map_btf_id;
 const struct bpf_map_ops array_map_ops = {
 	.map_meta_equal = array_map_meta_equal,
@@ -643,6 +679,8 @@ const struct bpf_map_ops array_map_ops = {
 	.map_check_btf = array_map_check_btf,
 	.map_lookup_batch = generic_map_lookup_batch,
 	.map_update_batch = generic_map_update_batch,
+	.map_set_for_each_callback_args = map_set_for_each_callback_args,
+	.map_for_each_callback = bpf_for_each_array_elem,
 	.map_btf_name = "bpf_array",
 	.map_btf_id = &array_map_btf_id,
 	.iter_seq_info = &iter_seq_info,
@@ -660,6 +698,8 @@ const struct bpf_map_ops percpu_array_map_ops = {
 	.map_delete_elem = array_map_delete_elem,
 	.map_seq_show_elem = percpu_array_map_seq_show_elem,
 	.map_check_btf = array_map_check_btf,
+	.map_set_for_each_callback_args = map_set_for_each_callback_args,
+	.map_for_each_callback = bpf_for_each_array_elem,
 	.map_btf_name = "bpf_array",
 	.map_btf_id = &percpu_array_map_btf_id,
 	.iter_seq_info = &iter_seq_info,
-- 
cgit v1.2.3


From b8f871fa32ad392759bc70090fa8c60d9f10625c Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Fri, 26 Feb 2021 12:49:29 -0800
Subject: libbpf: Move function is_ldimm64() earlier in libbpf.c

Move function is_ldimm64() close to the beginning of libbpf.c,
so it can be reused by later code and the next patch as well.
There is no functionality change.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210226204929.3885295-1-yhs@fb.com
---
 tools/lib/bpf/libbpf.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index d43cc3f29dae..21a3eedf070d 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -574,6 +574,11 @@ static bool insn_is_subprog_call(const struct bpf_insn *insn)
 	       insn->off == 0;
 }
 
+static bool is_ldimm64(struct bpf_insn *insn)
+{
+	return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+}
+
 static int
 bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
 		      const char *name, size_t sec_idx, const char *sec_name,
@@ -3395,7 +3400,7 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
 		return 0;
 	}
 
-	if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) {
+	if (!is_ldimm64(insn)) {
 		pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
 			prog->name, sym_name, insn_idx, insn->code);
 		return -LIBBPF_ERRNO__RELOC;
@@ -5566,11 +5571,6 @@ static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
 	insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
 }
 
-static bool is_ldimm64(struct bpf_insn *insn)
-{
-	return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
-}
-
 static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
 {
 	switch (BPF_SIZE(insn->code)) {
-- 
cgit v1.2.3


From 53eddb5e04ac5c53a4ccef9f1f900562a5a75246 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Fri, 26 Feb 2021 12:49:30 -0800
Subject: libbpf: Support subprog address relocation

A new relocation RELO_SUBPROG_ADDR is added to capture
subprog addresses loaded with ld_imm64 insns. Such ld_imm64
insns are marked with BPF_PSEUDO_FUNC and will be passed to
kernel. For bpf_for_each_map_elem() case, kernel will
check that the to-be-used subprog address must be a static
function and replace it with proper actual jited func address.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210226204930.3885367-1-yhs@fb.com
---
 tools/lib/bpf/libbpf.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 61 insertions(+), 3 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 21a3eedf070d..62d9ed56b081 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -188,6 +188,7 @@ enum reloc_type {
 	RELO_CALL,
 	RELO_DATA,
 	RELO_EXTERN,
+	RELO_SUBPROG_ADDR,
 };
 
 struct reloc_desc {
@@ -579,6 +580,11 @@ static bool is_ldimm64(struct bpf_insn *insn)
 	return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
 }
 
+static bool insn_is_pseudo_func(struct bpf_insn *insn)
+{
+	return is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
+}
+
 static int
 bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
 		      const char *name, size_t sec_idx, const char *sec_name,
@@ -2979,6 +2985,23 @@ static bool sym_is_extern(const GElf_Sym *sym)
 	       GELF_ST_TYPE(sym->st_info) == STT_NOTYPE;
 }
 
+static bool sym_is_subprog(const GElf_Sym *sym, int text_shndx)
+{
+	int bind = GELF_ST_BIND(sym->st_info);
+	int type = GELF_ST_TYPE(sym->st_info);
+
+	/* in .text section */
+	if (sym->st_shndx != text_shndx)
+		return false;
+
+	/* local function */
+	if (bind == STB_LOCAL && type == STT_SECTION)
+		return true;
+
+	/* global function */
+	return bind == STB_GLOBAL && type == STT_FUNC;
+}
+
 static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
 {
 	const struct btf_type *t;
@@ -3435,6 +3458,23 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
 		return -LIBBPF_ERRNO__RELOC;
 	}
 
+	/* loading subprog addresses */
+	if (sym_is_subprog(sym, obj->efile.text_shndx)) {
+		/* global_func: sym->st_value = offset in the section, insn->imm = 0.
+		 * local_func: sym->st_value = 0, insn->imm = offset in the section.
+		 */
+		if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
+			pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
+				prog->name, sym_name, (size_t)sym->st_value, insn->imm);
+			return -LIBBPF_ERRNO__RELOC;
+		}
+
+		reloc_desc->type = RELO_SUBPROG_ADDR;
+		reloc_desc->insn_idx = insn_idx;
+		reloc_desc->sym_off = sym->st_value;
+		return 0;
+	}
+
 	type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
 	sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
 
@@ -6172,6 +6212,10 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
 			}
 			relo->processed = true;
 			break;
+		case RELO_SUBPROG_ADDR:
+			insn[0].src_reg = BPF_PSEUDO_FUNC;
+			/* will be handled as a follow up pass */
+			break;
 		case RELO_CALL:
 			/* will be handled as a follow up pass */
 			break;
@@ -6358,11 +6402,11 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
 
 	for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
 		insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
-		if (!insn_is_subprog_call(insn))
+		if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
 			continue;
 
 		relo = find_prog_insn_relo(prog, insn_idx);
-		if (relo && relo->type != RELO_CALL) {
+		if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
 			pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
 				prog->name, insn_idx, relo->type);
 			return -LIBBPF_ERRNO__RELOC;
@@ -6374,8 +6418,22 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
 			 * call always has imm = -1, but for static functions
 			 * relocation is against STT_SECTION and insn->imm
 			 * points to a start of a static function
+			 *
+			 * for subprog addr relocation, the relo->sym_off + insn->imm is
+			 * the byte offset in the corresponding section.
 			 */
-			sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
+			if (relo->type == RELO_CALL)
+				sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
+			else
+				sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
+		} else if (insn_is_pseudo_func(insn)) {
+			/*
+			 * RELO_SUBPROG_ADDR relo is always emitted even if both
+			 * functions are in the same section, so it shouldn't reach here.
+			 */
+			pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
+				prog->name, insn_idx);
+			return -LIBBPF_ERRNO__RELOC;
 		} else {
 			/* if subprogram call is to a static function within
 			 * the same ELF section, there won't be any relocation
-- 
cgit v1.2.3


From f1f9f0d8d737b9a1c5d15635bf5696643626fd39 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Fri, 26 Feb 2021 12:49:31 -0800
Subject: bpftool: Print subprog address properly

With later hashmap example, using bpftool xlated output may
look like:
  int dump_task(struct bpf_iter__task * ctx):
  ; struct task_struct *task = ctx->task;
     0: (79) r2 = *(u64 *)(r1 +8)
  ; if (task == (void *)0 || called > 0)
  ...
    19: (18) r2 = subprog[+17]
    30: (18) r2 = subprog[+25]
  ...
  36: (95) exit
  __u64 check_hash_elem(struct bpf_map * map, __u32 * key, __u64 * val,
                        struct callback_ctx * data):
  ; struct bpf_iter__task *ctx = data->ctx;
    37: (79) r5 = *(u64 *)(r4 +0)
  ...
    55: (95) exit
  __u64 check_percpu_elem(struct bpf_map * map, __u32 * key,
                          __u64 * val, void * unused):
  ; check_percpu_elem(struct bpf_map *map, __u32 *key, __u64 *val, void *unused)
    56: (bf) r6 = r3
  ...
    83: (18) r2 = subprog[-47]

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210226204931.3885458-1-yhs@fb.com
---
 tools/bpf/bpftool/xlated_dumper.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index 8608cd68cdd0..6fc3e6f7f40c 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -196,6 +196,9 @@ static const char *print_imm(void *private_data,
 	else if (insn->src_reg == BPF_PSEUDO_MAP_VALUE)
 		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
 			 "map[id:%u][0]+%u", insn->imm, (insn + 1)->imm);
+	else if (insn->src_reg == BPF_PSEUDO_FUNC)
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			 "subprog[%+d]", insn->imm);
 	else
 		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
 			 "0x%llx", (unsigned long long)full_imm);
-- 
cgit v1.2.3


From 9de7f0fdab326a37c9f741f0f6c0f1cbc320a5ab Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Fri, 26 Feb 2021 12:49:33 -0800
Subject: selftests/bpf: Add hashmap test for bpf_for_each_map_elem() helper

A test case is added for hashmap and percpu hashmap. The test
also exercises nested bpf_for_each_map_elem() calls like
    bpf_prog:
      bpf_for_each_map_elem(func1)
    func1:
      bpf_for_each_map_elem(func2)
    func2:

  $ ./test_progs -n 45
  #45/1 hash_map:OK
  #45 for_each:OK
  Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210226204933.3885657-1-yhs@fb.com
---
 tools/testing/selftests/bpf/prog_tests/for_each.c  | 73 +++++++++++++++++
 .../selftests/bpf/progs/for_each_hash_map_elem.c   | 95 ++++++++++++++++++++++
 tools/testing/selftests/bpf/test_progs.h           | 11 +++
 3 files changed, 179 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/for_each.c
 create mode 100644 tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c

diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c
new file mode 100644
index 000000000000..aa847cd9f71f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/for_each.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "for_each_hash_map_elem.skel.h"
+
+static unsigned int duration;
+
+static void test_hash_map(void)
+{
+	int i, err, hashmap_fd, max_entries, percpu_map_fd;
+	struct for_each_hash_map_elem *skel;
+	__u64 *percpu_valbuf = NULL;
+	__u32 key, num_cpus, retval;
+	__u64 val;
+
+	skel = for_each_hash_map_elem__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "for_each_hash_map_elem__open_and_load"))
+		return;
+
+	hashmap_fd = bpf_map__fd(skel->maps.hashmap);
+	max_entries = bpf_map__max_entries(skel->maps.hashmap);
+	for (i = 0; i < max_entries; i++) {
+		key = i;
+		val = i + 1;
+		err = bpf_map_update_elem(hashmap_fd, &key, &val, BPF_ANY);
+		if (!ASSERT_OK(err, "map_update"))
+			goto out;
+	}
+
+	num_cpus = bpf_num_possible_cpus();
+	percpu_map_fd = bpf_map__fd(skel->maps.percpu_map);
+	percpu_valbuf = malloc(sizeof(__u64) * num_cpus);
+	if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf"))
+		goto out;
+
+	key = 1;
+	for (i = 0; i < num_cpus; i++)
+		percpu_valbuf[i] = i + 1;
+	err = bpf_map_update_elem(percpu_map_fd, &key, percpu_valbuf, BPF_ANY);
+	if (!ASSERT_OK(err, "percpu_map_update"))
+		goto out;
+
+	err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_pkt_access),
+				1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
+				&retval, &duration);
+	if (CHECK(err || retval, "ipv4", "err %d errno %d retval %d\n",
+		  err, errno, retval))
+		goto out;
+
+	ASSERT_EQ(skel->bss->hashmap_output, 4, "hashmap_output");
+	ASSERT_EQ(skel->bss->hashmap_elems, max_entries, "hashmap_elems");
+
+	key = 1;
+	err = bpf_map_lookup_elem(hashmap_fd, &key, &val);
+	ASSERT_ERR(err, "hashmap_lookup");
+
+	ASSERT_EQ(skel->bss->percpu_called, 1, "percpu_called");
+	ASSERT_LT(skel->bss->cpu, num_cpus, "num_cpus");
+	ASSERT_EQ(skel->bss->percpu_map_elems, 1, "percpu_map_elems");
+	ASSERT_EQ(skel->bss->percpu_key, 1, "percpu_key");
+	ASSERT_EQ(skel->bss->percpu_val, skel->bss->cpu + 1, "percpu_val");
+	ASSERT_EQ(skel->bss->percpu_output, 100, "percpu_output");
+out:
+	free(percpu_valbuf);
+	for_each_hash_map_elem__destroy(skel);
+}
+
+void test_for_each(void)
+{
+	if (test__start_subtest("hash_map"))
+		test_hash_map();
+}
diff --git a/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c b/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c
new file mode 100644
index 000000000000..913dd91aafff
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 3);
+	__type(key, __u32);
+	__type(value, __u64);
+} hashmap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} percpu_map SEC(".maps");
+
+struct callback_ctx {
+	struct __sk_buff *ctx;
+	int input;
+	int output;
+};
+
+static __u64
+check_hash_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+		struct callback_ctx *data)
+{
+	struct __sk_buff *skb = data->ctx;
+	__u32 k;
+	__u64 v;
+
+	if (skb) {
+		k = *key;
+		v = *val;
+		if (skb->len == 10000 && k == 10 && v == 10)
+			data->output = 3; /* impossible path */
+		else
+			data->output = 4;
+	} else {
+		data->output = data->input;
+		bpf_map_delete_elem(map, key);
+	}
+
+	return 0;
+}
+
+__u32 cpu = 0;
+__u32 percpu_called = 0;
+__u32 percpu_key = 0;
+__u64 percpu_val = 0;
+int percpu_output = 0;
+
+static __u64
+check_percpu_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+		  struct callback_ctx *unused)
+{
+	struct callback_ctx data;
+
+	percpu_called++;
+	cpu = bpf_get_smp_processor_id();
+	percpu_key = *key;
+	percpu_val = *val;
+
+	data.ctx = 0;
+	data.input = 100;
+	data.output = 0;
+	bpf_for_each_map_elem(&hashmap, check_hash_elem, &data, 0);
+	percpu_output = data.output;
+
+	return 0;
+}
+
+int hashmap_output = 0;
+int hashmap_elems = 0;
+int percpu_map_elems = 0;
+
+SEC("classifier")
+int test_pkt_access(struct __sk_buff *skb)
+{
+	struct callback_ctx data;
+
+	data.ctx = skb;
+	data.input = 10;
+	data.output = 0;
+	hashmap_elems = bpf_for_each_map_elem(&hashmap, check_hash_elem, &data, 0);
+	hashmap_output = data.output;
+
+	percpu_map_elems = bpf_for_each_map_elem(&percpu_map, check_percpu_elem,
+						 (void *)0, 0);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index f7c2fd89d01a..e87c8546230e 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -152,6 +152,17 @@ extern int test__join_cgroup(const char *path);
 	___ok;								\
 })
 
+#define ASSERT_LT(actual, expected, name) ({				\
+	static int duration = 0;					\
+	typeof(actual) ___act = (actual);				\
+	typeof(expected) ___exp = (expected);				\
+	bool ___ok = ___act < ___exp;					\
+	CHECK(!___ok, (name),						\
+	      "unexpected %s: actual %lld >= expected %lld\n",		\
+	      (name), (long long)(___act), (long long)(___exp));	\
+	___ok;								\
+})
+
 #define ASSERT_STREQ(actual, expected, name) ({				\
 	static int duration = 0;					\
 	const char *___act = actual;					\
-- 
cgit v1.2.3


From 6b9e3331347ee9e84fe5c71d3eba7ec204f9bb25 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Fri, 26 Feb 2021 12:49:34 -0800
Subject: selftests/bpf: Add arraymap test for bpf_for_each_map_elem() helper

A test is added for arraymap and percpu arraymap. The test also
exercises the early return for the helper which does not
traverse all elements.
    $ ./test_progs -n 45
    #45/1 hash_map:OK
    #45/2 array_map:OK
    #45 for_each:OK
    Summary: 1/2 PASSED, 0 SKIPPED, 0 FAILED

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210226204934.3885756-1-yhs@fb.com
---
 tools/testing/selftests/bpf/prog_tests/for_each.c  | 57 ++++++++++++++++++++
 .../selftests/bpf/progs/for_each_array_map_elem.c  | 61 ++++++++++++++++++++++
 2 files changed, 118 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/for_each_array_map_elem.c

diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c
index aa847cd9f71f..68eb12a287d4 100644
--- a/tools/testing/selftests/bpf/prog_tests/for_each.c
+++ b/tools/testing/selftests/bpf/prog_tests/for_each.c
@@ -3,6 +3,7 @@
 #include <test_progs.h>
 #include <network_helpers.h>
 #include "for_each_hash_map_elem.skel.h"
+#include "for_each_array_map_elem.skel.h"
 
 static unsigned int duration;
 
@@ -66,8 +67,64 @@ out:
 	for_each_hash_map_elem__destroy(skel);
 }
 
+static void test_array_map(void)
+{
+	__u32 key, num_cpus, max_entries, retval;
+	int i, arraymap_fd, percpu_map_fd, err;
+	struct for_each_array_map_elem *skel;
+	__u64 *percpu_valbuf = NULL;
+	__u64 val, expected_total;
+
+	skel = for_each_array_map_elem__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "for_each_array_map_elem__open_and_load"))
+		return;
+
+	arraymap_fd = bpf_map__fd(skel->maps.arraymap);
+	expected_total = 0;
+	max_entries = bpf_map__max_entries(skel->maps.arraymap);
+	for (i = 0; i < max_entries; i++) {
+		key = i;
+		val = i + 1;
+		/* skip the last iteration for expected total */
+		if (i != max_entries - 1)
+			expected_total += val;
+		err = bpf_map_update_elem(arraymap_fd, &key, &val, BPF_ANY);
+		if (!ASSERT_OK(err, "map_update"))
+			goto out;
+	}
+
+	num_cpus = bpf_num_possible_cpus();
+	percpu_map_fd = bpf_map__fd(skel->maps.percpu_map);
+	percpu_valbuf = malloc(sizeof(__u64) * num_cpus);
+	if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf"))
+		goto out;
+
+	key = 0;
+	for (i = 0; i < num_cpus; i++)
+		percpu_valbuf[i] = i + 1;
+	err = bpf_map_update_elem(percpu_map_fd, &key, percpu_valbuf, BPF_ANY);
+	if (!ASSERT_OK(err, "percpu_map_update"))
+		goto out;
+
+	err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_pkt_access),
+				1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
+				&retval, &duration);
+	if (CHECK(err || retval, "ipv4", "err %d errno %d retval %d\n",
+		  err, errno, retval))
+		goto out;
+
+	ASSERT_EQ(skel->bss->arraymap_output, expected_total, "array_output");
+	ASSERT_EQ(skel->bss->cpu + 1, skel->bss->percpu_val, "percpu_val");
+
+out:
+	free(percpu_valbuf);
+	for_each_array_map_elem__destroy(skel);
+}
+
 void test_for_each(void)
 {
 	if (test__start_subtest("hash_map"))
 		test_hash_map();
+	if (test__start_subtest("array_map"))
+		test_array_map();
 }
diff --git a/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c b/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c
new file mode 100644
index 000000000000..75e8e1069fe7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 3);
+	__type(key, __u32);
+	__type(value, __u64);
+} arraymap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} percpu_map SEC(".maps");
+
+struct callback_ctx {
+	int output;
+};
+
+static __u64
+check_array_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+		 struct callback_ctx *data)
+{
+	data->output += *val;
+	if (*key == 1)
+		return 1; /* stop the iteration */
+	return 0;
+}
+
+__u32 cpu = 0;
+__u64 percpu_val = 0;
+
+static __u64
+check_percpu_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+		  struct callback_ctx *data)
+{
+	cpu = bpf_get_smp_processor_id();
+	percpu_val = *val;
+	return 0;
+}
+
+u32 arraymap_output = 0;
+
+SEC("classifier")
+int test_pkt_access(struct __sk_buff *skb)
+{
+	struct callback_ctx data;
+
+	data.output = 0;
+	bpf_for_each_map_elem(&arraymap, check_array_elem, &data, 0);
+	arraymap_output = data.output;
+
+	bpf_for_each_map_elem(&percpu_map, check_percpu_elem, (void *)0, 0);
+	return 0;
+}
-- 
cgit v1.2.3


From 04883a079968e6250a4549f6fadb6427c534885e Mon Sep 17 00:00:00 2001
From: Ian Denhardt <ian@zenhack.net>
Date: Tue, 23 Feb 2021 21:15:31 -0500
Subject: tools, bpf_asm: Hard error on out of range jumps

Per discussion at [0] this was originally introduced as a warning due
to concerns about breaking existing code, but a hard error probably
makes more sense, especially given that concerns about breakage were
only speculation.

  [0] https://lore.kernel.org/bpf/c964892195a6b91d20a67691448567ef528ffa6d.camel@linux.ibm.com/T/#t

Signed-off-by: Ian Denhardt <ian@zenhack.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
Link: https://lore.kernel.org/bpf/a6b6c7516f5d559049d669968e953b4a8d7adea3.1614201868.git.ian@zenhack.net
---
 tools/bpf/bpf_exp.y | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/bpf/bpf_exp.y b/tools/bpf/bpf_exp.y
index 8d48e896be50..8d03e5245da5 100644
--- a/tools/bpf/bpf_exp.y
+++ b/tools/bpf/bpf_exp.y
@@ -549,9 +549,11 @@ static uint8_t bpf_encode_jt_jf_offset(int off, int i)
 {
 	int delta = off - i - 1;
 
-	if (delta < 0 || delta > 255)
-		fprintf(stderr, "warning: insn #%d jumps to insn #%d, "
+	if (delta < 0 || delta > 255) {
+		fprintf(stderr, "error: insn #%d jumps to insn #%d, "
 				"which is out of range\n", i, off);
+		exit(1);
+	}
 	return (uint8_t) delta;
 }
 
-- 
cgit v1.2.3


From 85e142cb42a1e7b33971bf035dae432d8670c46b Mon Sep 17 00:00:00 2001
From: Ian Denhardt <ian@zenhack.net>
Date: Tue, 23 Feb 2021 21:24:00 -0500
Subject: tools, bpf_asm: Exit non-zero on errors

... so callers can correctly detect failure.

Signed-off-by: Ian Denhardt <ian@zenhack.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/b0bea780bc292f29e7b389dd062f20adc2a2d634.1614201868.git.ian@zenhack.net
---
 tools/bpf/bpf_exp.y | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/bpf/bpf_exp.y b/tools/bpf/bpf_exp.y
index 8d03e5245da5..dfb7254a24e8 100644
--- a/tools/bpf/bpf_exp.y
+++ b/tools/bpf/bpf_exp.y
@@ -185,13 +185,13 @@ ldx
 	| OP_LDXB number '*' '(' '[' number ']' '&' number ')' {
 		if ($2 != 4 || $9 != 0xf) {
 			fprintf(stderr, "ldxb offset not supported!\n");
-			exit(0);
+			exit(1);
 		} else {
 			bpf_set_curr_instr(BPF_LDX | BPF_MSH | BPF_B, 0, 0, $6); } }
 	| OP_LDX number '*' '(' '[' number ']' '&' number ')' {
 		if ($2 != 4 || $9 != 0xf) {
 			fprintf(stderr, "ldxb offset not supported!\n");
-			exit(0);
+			exit(1);
 		} else {
 			bpf_set_curr_instr(BPF_LDX | BPF_MSH | BPF_B, 0, 0, $6); } }
 	;
@@ -472,7 +472,7 @@ static void bpf_assert_max(void)
 {
 	if (curr_instr >= BPF_MAXINSNS) {
 		fprintf(stderr, "only max %u insns allowed!\n", BPF_MAXINSNS);
-		exit(0);
+		exit(1);
 	}
 }
 
@@ -522,7 +522,7 @@ static int bpf_find_insns_offset(const char *label)
 
 	if (ret == -ENOENT) {
 		fprintf(stderr, "no such label \'%s\'!\n", label);
-		exit(0);
+		exit(1);
 	}
 
 	return ret;
-- 
cgit v1.2.3


From 5cb08553f7f2536f2f5a9142a060af2a77c1d5dc Mon Sep 17 00:00:00 2001
From: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Date: Mon, 1 Mar 2021 12:06:04 -0800
Subject: Bluetooth: Notify suspend on le conn failed

When suspending, Bluetooth disconnects all connected peers devices. If
an LE connection is started but isn't completed, we will see an LE
Create Connection Cancel instead of an HCI disconnect. This just adds
a check to see if an LE cancel was the last disconnected device and wake
the suspend thread when that is the case.

Signed-off-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Reviewed-by: Archie Pusaka <apusaka@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_conn.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 6ffa89e3ba0a..468d31f3303d 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -772,6 +772,16 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
 
 	hci_conn_del(conn);
 
+	/* The suspend notifier is waiting for all devices to disconnect and an
+	 * LE connect cancel will result in an hci_le_conn_failed. Once the last
+	 * connection is deleted, we should also wake the suspend queue to
+	 * complete suspend operations.
+	 */
+	if (list_empty(&hdev->conn_hash.list) &&
+	    test_and_clear_bit(SUSPEND_DISCONNECTING, hdev->suspend_tasks)) {
+		wake_up(&hdev->suspend_wait_q);
+	}
+
 	/* Since we may have temporarily stopped the background scanning in
 	 * favor of connection establishment, we should restart it.
 	 */
-- 
cgit v1.2.3


From 303dcc25b5c782547eb13b9f29426de843dd6f34 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 2 Mar 2021 16:40:10 -0800
Subject: tools/runqslower: Allow substituting custom vmlinux.h for the build

Just like was done for bpftool and selftests in ec23eb705620 ("tools/bpftool:
Allow substituting custom vmlinux.h for the build") and ca4db6389d61
("selftests/bpf: Allow substituting custom vmlinux.h for selftests build"),
allow to provide pre-generated vmlinux.h for runqslower build.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210303004010.653954-1-andrii@kernel.org
---
 tools/bpf/runqslower/Makefile | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
index c96ba90c6f01..3818ec511fd2 100644
--- a/tools/bpf/runqslower/Makefile
+++ b/tools/bpf/runqslower/Makefile
@@ -69,12 +69,16 @@ $(OUTPUT) $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT):
 	$(QUIET_MKDIR)mkdir -p $@
 
 $(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL)
+ifeq ($(VMLINUX_H),)
 	$(Q)if [ ! -e "$(VMLINUX_BTF_PATH)" ] ; then \
 		echo "Couldn't find kernel BTF; set VMLINUX_BTF to"	       \
 			"specify its location." >&2;			       \
 		exit 1;\
 	fi
 	$(QUIET_GEN)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@
+else
+	$(Q)cp "$(VMLINUX_H)" $@
+endif
 
 $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OUTPUT)
 	$(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) $@
-- 
cgit v1.2.3


From e5b0ad69c97a04f42834b24a6a0323ab15ccc9bb Mon Sep 17 00:00:00 2001
From: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Date: Wed, 3 Mar 2021 08:34:04 -0800
Subject: Bluetooth: Remove unneeded commands for suspend

During suspend, there are a few scan enable and set event filter
commands that don't need to be sent unless there are actual BR/EDR
devices capable of waking the system. Check the HCI_PSCAN bit before
writing scan enable and use a new dev flag, HCI_EVENT_FILTER_CONFIGURED
to control whether to clear the event filter.

Signed-off-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Reviewed-by: Archie Pusaka <apusaka@chromium.org>
Reviewed-by: Alain Michaud <alainm@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h |  1 +
 net/bluetooth/hci_event.c   | 27 +++++++++++++++++++++++++++
 net/bluetooth/hci_request.c | 44 +++++++++++++++++++++++++++-----------------
 3 files changed, 55 insertions(+), 17 deletions(-)

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index ba2f439bc04d..ea4ae551c426 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -320,6 +320,7 @@ enum {
 	HCI_BREDR_ENABLED,
 	HCI_LE_SCAN_INTERRUPTED,
 	HCI_WIDEBAND_SPEECH_ENABLED,
+	HCI_EVENT_FILTER_CONFIGURED,
 
 	HCI_DUT_MODE,
 	HCI_VENDOR_DIAG,
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 67668be3461e..f4a734f8a9ac 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -395,6 +395,29 @@ done:
 	hci_dev_unlock(hdev);
 }
 
+static void hci_cc_set_event_filter(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	__u8 status = *((__u8 *)skb->data);
+	struct hci_cp_set_event_filter *cp;
+	void *sent;
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+	if (status)
+		return;
+
+	sent = hci_sent_cmd_data(hdev, HCI_OP_SET_EVENT_FLT);
+	if (!sent)
+		return;
+
+	cp = (struct hci_cp_set_event_filter *)sent;
+
+	if (cp->flt_type == HCI_FLT_CLEAR_ALL)
+		hci_dev_clear_flag(hdev, HCI_EVENT_FILTER_CONFIGURED);
+	else
+		hci_dev_set_flag(hdev, HCI_EVENT_FILTER_CONFIGURED);
+}
+
 static void hci_cc_read_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_rp_read_class_of_dev *rp = (void *) skb->data;
@@ -3328,6 +3351,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
 		hci_cc_write_scan_enable(hdev, skb);
 		break;
 
+	case HCI_OP_SET_EVENT_FLT:
+		hci_cc_set_event_filter(hdev, skb);
+		break;
+
 	case HCI_OP_READ_CLASS_OF_DEV:
 		hci_cc_read_class_of_dev(hdev, skb);
 		break;
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index e55976db4403..75a42178c82d 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -1131,14 +1131,14 @@ static void hci_req_clear_event_filter(struct hci_request *req)
 {
 	struct hci_cp_set_event_filter f;
 
-	memset(&f, 0, sizeof(f));
-	f.flt_type = HCI_FLT_CLEAR_ALL;
-	hci_req_add(req, HCI_OP_SET_EVENT_FLT, 1, &f);
+	if (!hci_dev_test_flag(req->hdev, HCI_BREDR_ENABLED))
+		return;
 
-	/* Update page scan state (since we may have modified it when setting
-	 * the event filter).
-	 */
-	__hci_req_update_scan(req);
+	if (hci_dev_test_flag(req->hdev, HCI_EVENT_FILTER_CONFIGURED)) {
+		memset(&f, 0, sizeof(f));
+		f.flt_type = HCI_FLT_CLEAR_ALL;
+		hci_req_add(req, HCI_OP_SET_EVENT_FLT, 1, &f);
+	}
 }
 
 static void hci_req_set_event_filter(struct hci_request *req)
@@ -1147,6 +1147,10 @@ static void hci_req_set_event_filter(struct hci_request *req)
 	struct hci_cp_set_event_filter f;
 	struct hci_dev *hdev = req->hdev;
 	u8 scan = SCAN_DISABLED;
+	bool scanning = test_bit(HCI_PSCAN, &hdev->flags);
+
+	if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
+		return;
 
 	/* Always clear event filter when starting */
 	hci_req_clear_event_filter(req);
@@ -1167,12 +1171,13 @@ static void hci_req_set_event_filter(struct hci_request *req)
 		scan = SCAN_PAGE;
 	}
 
-	if (scan)
+	if (scan && !scanning) {
 		set_bit(SUSPEND_SCAN_ENABLE, hdev->suspend_tasks);
-	else
+		hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+	} else if (!scan && scanning) {
 		set_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks);
-
-	hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+		hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+	}
 }
 
 static void cancel_adv_timeout(struct hci_dev *hdev)
@@ -1315,9 +1320,14 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next)
 
 		hdev->advertising_paused = true;
 		hdev->advertising_old_state = old_state;
-		/* Disable page scan */
-		page_scan = SCAN_DISABLED;
-		hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &page_scan);
+
+		/* Disable page scan if enabled */
+		if (test_bit(HCI_PSCAN, &hdev->flags)) {
+			page_scan = SCAN_DISABLED;
+			hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1,
+				    &page_scan);
+			set_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks);
+		}
 
 		/* Disable LE passive scan if enabled */
 		if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) {
@@ -1328,9 +1338,6 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next)
 		/* Disable advertisement filters */
 		hci_req_add_set_adv_filter_enable(&req, false);
 
-		/* Mark task needing completion */
-		set_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks);
-
 		/* Prevent disconnects from causing scanning to be re-enabled */
 		hdev->scanning_paused = true;
 
@@ -1364,7 +1371,10 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next)
 		hdev->suspended = false;
 		hdev->scanning_paused = false;
 
+		/* Clear any event filters and restore scan state */
 		hci_req_clear_event_filter(&req);
+		__hci_req_update_scan(&req);
+
 		/* Reset passive/background scanning to normal */
 		__hci_update_background_scan(&req);
 		/* Enable all of the advertisement filters */
-- 
cgit v1.2.3


From ff02db13e9bfa01e0d66c5fa53da29bd1f1b208a Mon Sep 17 00:00:00 2001
From: Daniel Winkler <danielwinkler@google.com>
Date: Wed, 3 Mar 2021 11:15:23 -0800
Subject: Bluetooth: Allow scannable adv with extended MGMT APIs

An issue was found, where if a bluetooth client requests a broadcast
advertisement with scan response data, it will not be properly
registered with the controller. This is because at the time that the
hci_cp_le_set_scan_param structure is created, the scan response will
not yet have been received since it comes in a second MGMT call. With
empty scan response, the request defaults to a non-scannable PDU type.
On some controllers, the subsequent scan response request will fail due
to incorrect PDU type, and others will succeed and not use the scan
response.

This fix allows the advertising parameters MGMT call to include a flag
to let the kernel know whether a scan response will be coming, so that
the correct PDU type is used in the first place. A bluetoothd change is
also incoming to take advantage of it.

To test this, I created a broadcast advertisement with scan response
data and registered it on the hatch chromebook. Without this change, the
request fails, and with it will succeed.

Reviewed-by: Alain Michaud <alainm@chromium.org>
Reviewed-by: Sonny Sasaka <sonnysasaka@chromium.org>
Reviewed-by: Miao-chen Chou <mcchou@chromium.org>
Signed-off-by: Daniel Winkler <danielwinkler@google.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/mgmt.h | 1 +
 net/bluetooth/hci_request.c  | 3 ++-
 net/bluetooth/mgmt.c         | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 839a2028009e..a7cffb069565 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -578,6 +578,7 @@ struct mgmt_rp_add_advertising {
 #define MGMT_ADV_PARAM_TIMEOUT		BIT(13)
 #define MGMT_ADV_PARAM_INTERVALS	BIT(14)
 #define MGMT_ADV_PARAM_TX_POWER		BIT(15)
+#define MGMT_ADV_PARAM_SCAN_RSP		BIT(16)
 
 #define MGMT_ADV_FLAG_SEC_MASK	(MGMT_ADV_FLAG_SEC_1M | MGMT_ADV_FLAG_SEC_2M | \
 				 MGMT_ADV_FLAG_SEC_CODED)
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 75a42178c82d..d7ee11ef70d3 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -2180,7 +2180,8 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance)
 			cp.evt_properties = cpu_to_le16(LE_EXT_ADV_CONN_IND);
 		else
 			cp.evt_properties = cpu_to_le16(LE_LEGACY_ADV_IND);
-	} else if (adv_instance_is_scannable(hdev, instance)) {
+	} else if (adv_instance_is_scannable(hdev, instance) ||
+		   (flags & MGMT_ADV_PARAM_SCAN_RSP)) {
 		if (secondary_adv)
 			cp.evt_properties = cpu_to_le16(LE_EXT_ADV_SCAN_IND);
 		else
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 74971b4bd457..90334ac4a135 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -7432,6 +7432,7 @@ static u32 get_supported_adv_flags(struct hci_dev *hdev)
 	flags |= MGMT_ADV_PARAM_TIMEOUT;
 	flags |= MGMT_ADV_PARAM_INTERVALS;
 	flags |= MGMT_ADV_PARAM_TX_POWER;
+	flags |= MGMT_ADV_PARAM_SCAN_RSP;
 
 	/* In extended adv TX_POWER returned from Set Adv Param
 	 * will be always valid.
-- 
cgit v1.2.3


From 6ed6e1c761f6c8391af654facbbbf1748ae9f386 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Mon, 1 Mar 2021 10:48:05 -0800
Subject: skmsg: Add function doc for skb->_sk_redir

This should fix the following warning:

include/linux/skbuff.h:932: warning: Function parameter or member
'_sk_redir' not described in 'sk_buff'

Reported-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Lorenz Bauer <lmb@cloudflare.com>
Link: https://lore.kernel.org/bpf/20210301184805.8174-1-xiyou.wangcong@gmail.com
---
 include/linux/skbuff.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index bd84f799c952..0503c917d773 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -656,6 +656,7 @@ typedef unsigned char *sk_buff_data_t;
  *	@protocol: Packet protocol from driver
  *	@destructor: Destruct function
  *	@tcp_tsorted_anchor: list structure for TCP (tp->tsorted_sent_queue)
+ *	@_sk_redir: socket redirection information for skmsg
  *	@_nfct: Associated connection, if any (with nfctinfo bits)
  *	@nf_bridge: Saved data about a bridged frame - see br_netfilter.c
  *	@skb_iif: ifindex of device we arrived on
-- 
cgit v1.2.3


From 86a35af628e539f7ae9796f1984761e8d3232ac0 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Fri, 26 Feb 2021 14:38:10 -0800
Subject: selftests/bpf: Add a verifier scale test with unknown bounded loop

The original bcc pull request https://github.com/iovisor/bcc/pull/3270 exposed
a verifier failure with Clang 12/13 while Clang 4 works fine.

Further investigation exposed two issues:

  Issue 1: LLVM may generate code which uses less refined value. The issue is
           fixed in LLVM patch: https://reviews.llvm.org/D97479

  Issue 2: Spills with initial value 0 are marked as precise which makes later
           state pruning less effective. This is my rough initial analysis and
           further investigation is needed to find how to improve verifier
           pruning in such cases.

With the above LLVM patch, for the new loop6.c test, which has smaller loop
bound compared to original test, I got:

  $ test_progs -s -n 10/16
  ...
  stack depth 64
  processed 390735 insns (limit 1000000) max_states_per_insn 87
      total_states 8658 peak_states 964 mark_read 6
  #10/16 loop6.o:OK

Use the original loop bound, i.e., commenting out "#define WORKAROUND", I got:

  $ test_progs -s -n 10/16
  ...
  BPF program is too large. Processed 1000001 insn
  stack depth 64
  processed 1000001 insns (limit 1000000) max_states_per_insn 91
      total_states 23176 peak_states 5069 mark_read 6
  ...
  #10/16 loop6.o:FAIL

The purpose of this patch is to provide a regression test for the above LLVM fix
and also provide a test case for further analyzing the verifier pruning issue.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Zhenwei Pi <pizhenwei@bytedance.com>
Link: https://lore.kernel.org/bpf/20210226223810.236472-1-yhs@fb.com
---
 tools/testing/selftests/bpf/README.rst             | 39 +++++++++
 .../selftests/bpf/prog_tests/bpf_verif_scale.c     |  1 +
 tools/testing/selftests/bpf/progs/loop6.c          | 99 ++++++++++++++++++++++
 3 files changed, 139 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/loop6.c

diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index fd148b8410fa..dbc8f6cc5c67 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -111,6 +111,45 @@ available in 10.0.1. The patch is available in llvm 11.0.0 trunk.
 
 __  https://reviews.llvm.org/D78466
 
+bpf_verif_scale/loop6.o test failure with Clang 12
+==================================================
+
+With Clang 12, the following bpf_verif_scale test failed:
+  * ``bpf_verif_scale/loop6.o``
+
+The verifier output looks like
+
+.. code-block:: c
+
+  R1 type=ctx expected=fp
+  The sequence of 8193 jumps is too complex.
+
+The reason is compiler generating the following code
+
+.. code-block:: c
+
+  ;       for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
+      14:       16 05 40 00 00 00 00 00 if w5 == 0 goto +64 <LBB0_6>
+      15:       bc 51 00 00 00 00 00 00 w1 = w5
+      16:       04 01 00 00 ff ff ff ff w1 += -1
+      17:       67 05 00 00 20 00 00 00 r5 <<= 32
+      18:       77 05 00 00 20 00 00 00 r5 >>= 32
+      19:       a6 01 01 00 05 00 00 00 if w1 < 5 goto +1 <LBB0_4>
+      20:       b7 05 00 00 06 00 00 00 r5 = 6
+  00000000000000a8 <LBB0_4>:
+      21:       b7 02 00 00 00 00 00 00 r2 = 0
+      22:       b7 01 00 00 00 00 00 00 r1 = 0
+  ;       for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
+      23:       7b 1a e0 ff 00 00 00 00 *(u64 *)(r10 - 32) = r1
+      24:       7b 5a c0 ff 00 00 00 00 *(u64 *)(r10 - 64) = r5
+
+Note that insn #15 has w1 = w5 and w1 is refined later but
+r5(w5) is eventually saved on stack at insn #24 for later use.
+This cause later verifier failure. The bug has been `fixed`__ in
+Clang 13.
+
+__  https://reviews.llvm.org/D97479
+
 BPF CO-RE-based tests and Clang version
 =======================================
 
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index e698ee6bb6c2..3d002c245d2b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -76,6 +76,7 @@ void test_bpf_verif_scale(void)
 		{ "loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
 		{ "loop4.o", BPF_PROG_TYPE_SCHED_CLS },
 		{ "loop5.o", BPF_PROG_TYPE_SCHED_CLS },
+		{ "loop6.o", BPF_PROG_TYPE_KPROBE },
 
 		/* partial unroll. 19k insn in a loop.
 		 * Total program size 20.8k insn.
diff --git a/tools/testing/selftests/bpf/progs/loop6.c b/tools/testing/selftests/bpf/progs/loop6.c
new file mode 100644
index 000000000000..2a7141ac1656
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/loop6.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ptrace.h>
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* typically virtio scsi has max SGs of 6 */
+#define VIRTIO_MAX_SGS	6
+
+/* Verifier will fail with SG_MAX = 128. The failure can be
+ * workarounded with a smaller SG_MAX, e.g. 10.
+ */
+#define WORKAROUND
+#ifdef WORKAROUND
+#define SG_MAX		10
+#else
+/* typically virtio blk has max SEG of 128 */
+#define SG_MAX		128
+#endif
+
+#define SG_CHAIN	0x01UL
+#define SG_END		0x02UL
+
+struct scatterlist {
+	unsigned long   page_link;
+	unsigned int    offset;
+	unsigned int    length;
+};
+
+#define sg_is_chain(sg)		((sg)->page_link & SG_CHAIN)
+#define sg_is_last(sg)		((sg)->page_link & SG_END)
+#define sg_chain_ptr(sg)	\
+	((struct scatterlist *) ((sg)->page_link & ~(SG_CHAIN | SG_END)))
+
+static inline struct scatterlist *__sg_next(struct scatterlist *sgp)
+{
+	struct scatterlist sg;
+
+	bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
+	if (sg_is_last(&sg))
+		return NULL;
+
+	sgp++;
+
+	bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
+	if (sg_is_chain(&sg))
+		sgp = sg_chain_ptr(&sg);
+
+	return sgp;
+}
+
+static inline struct scatterlist *get_sgp(struct scatterlist **sgs, int i)
+{
+	struct scatterlist *sgp;
+
+	bpf_probe_read_kernel(&sgp, sizeof(sgp), sgs + i);
+	return sgp;
+}
+
+int config = 0;
+int result = 0;
+
+SEC("kprobe/virtqueue_add_sgs")
+BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs,
+	   unsigned int out_sgs, unsigned int in_sgs)
+{
+	struct scatterlist *sgp = NULL;
+	__u64 length1 = 0, length2 = 0;
+	unsigned int i, n, len;
+
+	if (config != 0)
+		return 0;
+
+	for (i = 0; (i < VIRTIO_MAX_SGS) && (i < out_sgs); i++) {
+		for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
+		     sgp = __sg_next(sgp)) {
+			bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
+			length1 += len;
+			n++;
+		}
+	}
+
+	for (i = 0; (i < VIRTIO_MAX_SGS) && (i < in_sgs); i++) {
+		for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
+		     sgp = __sg_next(sgp)) {
+			bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
+			length2 += len;
+			n++;
+		}
+	}
+
+	config = 1;
+	result = length2 - length1;
+	return 0;
+}
-- 
cgit v1.2.3


From 8fd886911a6a99acf4a8facf619a2e7b5225be78 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 26 Feb 2021 21:22:47 +0100
Subject: bpf: Add BTF_KIND_FLOAT to uapi

Add a new kind value and expand the kind bitfield.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210226202256.116518-2-iii@linux.ibm.com
---
 include/uapi/linux/btf.h       | 5 +++--
 tools/include/uapi/linux/btf.h | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index 5a667107ad2c..d27b1708efe9 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -52,7 +52,7 @@ struct btf_type {
 	};
 };
 
-#define BTF_INFO_KIND(info)	(((info) >> 24) & 0x0f)
+#define BTF_INFO_KIND(info)	(((info) >> 24) & 0x1f)
 #define BTF_INFO_VLEN(info)	((info) & 0xffff)
 #define BTF_INFO_KFLAG(info)	((info) >> 31)
 
@@ -72,7 +72,8 @@ struct btf_type {
 #define BTF_KIND_FUNC_PROTO	13	/* Function Proto	*/
 #define BTF_KIND_VAR		14	/* Variable	*/
 #define BTF_KIND_DATASEC	15	/* Section	*/
-#define BTF_KIND_MAX		BTF_KIND_DATASEC
+#define BTF_KIND_FLOAT		16	/* Floating point	*/
+#define BTF_KIND_MAX		BTF_KIND_FLOAT
 #define NR_BTF_KINDS		(BTF_KIND_MAX + 1)
 
 /* For some specific BTF_KIND, "struct btf_type" is immediately
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
index 5a667107ad2c..d27b1708efe9 100644
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
@@ -52,7 +52,7 @@ struct btf_type {
 	};
 };
 
-#define BTF_INFO_KIND(info)	(((info) >> 24) & 0x0f)
+#define BTF_INFO_KIND(info)	(((info) >> 24) & 0x1f)
 #define BTF_INFO_VLEN(info)	((info) & 0xffff)
 #define BTF_INFO_KFLAG(info)	((info) >> 31)
 
@@ -72,7 +72,8 @@ struct btf_type {
 #define BTF_KIND_FUNC_PROTO	13	/* Function Proto	*/
 #define BTF_KIND_VAR		14	/* Variable	*/
 #define BTF_KIND_DATASEC	15	/* Section	*/
-#define BTF_KIND_MAX		BTF_KIND_DATASEC
+#define BTF_KIND_FLOAT		16	/* Floating point	*/
+#define BTF_KIND_MAX		BTF_KIND_FLOAT
 #define NR_BTF_KINDS		(BTF_KIND_MAX + 1)
 
 /* For some specific BTF_KIND, "struct btf_type" is immediately
-- 
cgit v1.2.3


From 1b1ce92b24331b569a444858fc487a1ca19dc778 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 26 Feb 2021 21:22:48 +0100
Subject: libbpf: Fix whitespace in btf_add_composite() comment

Remove trailing space.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210226202256.116518-3-iii@linux.ibm.com
---
 tools/lib/bpf/btf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index d9c10830d749..0797ab714830 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1883,7 +1883,7 @@ static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32
  *   - *byte_sz* - size of the struct, in bytes;
  *
  * Struct initially has no fields in it. Fields can be added by
- * btf__add_field() right after btf__add_struct() succeeds. 
+ * btf__add_field() right after btf__add_struct() succeeds.
  *
  * Returns:
  *   - >0, type ID of newly added BTF type;
-- 
cgit v1.2.3


From 22541a9eeb0d968c133aaebd95fa59da3208e705 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 26 Feb 2021 21:22:49 +0100
Subject: libbpf: Add BTF_KIND_FLOAT support

The logic follows that of BTF_KIND_INT most of the time. Sanitization
replaces BTF_KIND_FLOATs with equally-sized empty BTF_KIND_STRUCTs on
older kernels, for example, the following:

    [4] FLOAT 'float' size=4

becomes the following:

    [4] STRUCT '(anon)' size=4 vlen=0

With dwarves patch [1] and this patch, the older kernels, which were
failing with the floating-point-related errors, will now start working
correctly.

[1] https://github.com/iii-i/dwarves/commit/btf-kind-float-v2

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210226202256.116518-4-iii@linux.ibm.com
---
 tools/lib/bpf/btf.c             | 49 +++++++++++++++++++++++++++++++++++++++++
 tools/lib/bpf/btf.h             |  6 +++++
 tools/lib/bpf/btf_dump.c        |  4 ++++
 tools/lib/bpf/libbpf.c          | 29 +++++++++++++++++++++++-
 tools/lib/bpf/libbpf.map        |  5 +++++
 tools/lib/bpf/libbpf_internal.h |  2 ++
 6 files changed, 94 insertions(+), 1 deletion(-)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 0797ab714830..3aa58f2ac183 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -291,6 +291,7 @@ static int btf_type_size(const struct btf_type *t)
 	case BTF_KIND_PTR:
 	case BTF_KIND_TYPEDEF:
 	case BTF_KIND_FUNC:
+	case BTF_KIND_FLOAT:
 		return base_size;
 	case BTF_KIND_INT:
 		return base_size + sizeof(__u32);
@@ -338,6 +339,7 @@ static int btf_bswap_type_rest(struct btf_type *t)
 	case BTF_KIND_PTR:
 	case BTF_KIND_TYPEDEF:
 	case BTF_KIND_FUNC:
+	case BTF_KIND_FLOAT:
 		return 0;
 	case BTF_KIND_INT:
 		*(__u32 *)(t + 1) = bswap_32(*(__u32 *)(t + 1));
@@ -578,6 +580,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
 		case BTF_KIND_UNION:
 		case BTF_KIND_ENUM:
 		case BTF_KIND_DATASEC:
+		case BTF_KIND_FLOAT:
 			size = t->size;
 			goto done;
 		case BTF_KIND_PTR:
@@ -621,6 +624,7 @@ int btf__align_of(const struct btf *btf, __u32 id)
 	switch (kind) {
 	case BTF_KIND_INT:
 	case BTF_KIND_ENUM:
+	case BTF_KIND_FLOAT:
 		return min(btf_ptr_sz(btf), (size_t)t->size);
 	case BTF_KIND_PTR:
 		return btf_ptr_sz(btf);
@@ -1756,6 +1760,47 @@ int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding
 	return btf_commit_type(btf, sz);
 }
 
+/*
+ * Append new BTF_KIND_FLOAT type with:
+ *   - *name* - non-empty, non-NULL type name;
+ *   - *sz* - size of the type, in bytes;
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_float(struct btf *btf, const char *name, size_t byte_sz)
+{
+	struct btf_type *t;
+	int sz, name_off;
+
+	/* non-empty name */
+	if (!name || !name[0])
+		return -EINVAL;
+
+	/* byte_sz must be one of the explicitly allowed values */
+	if (byte_sz != 2 && byte_sz != 4 && byte_sz != 8 && byte_sz != 12 &&
+	    byte_sz != 16)
+		return -EINVAL;
+
+	if (btf_ensure_modifiable(btf))
+		return -ENOMEM;
+
+	sz = sizeof(struct btf_type);
+	t = btf_add_type_mem(btf, sz);
+	if (!t)
+		return -ENOMEM;
+
+	name_off = btf__add_str(btf, name);
+	if (name_off < 0)
+		return name_off;
+
+	t->name_off = name_off;
+	t->info = btf_type_info(BTF_KIND_FLOAT, 0, 0);
+	t->size = byte_sz;
+
+	return btf_commit_type(btf, sz);
+}
+
 /* it's completely legal to append BTF types with type IDs pointing forward to
  * types that haven't been appended yet, so we only make sure that id looks
  * sane, we can't guarantee that ID will always be valid
@@ -3626,6 +3671,7 @@ static int btf_dedup_prep(struct btf_dedup *d)
 		case BTF_KIND_FWD:
 		case BTF_KIND_TYPEDEF:
 		case BTF_KIND_FUNC:
+		case BTF_KIND_FLOAT:
 			h = btf_hash_common(t);
 			break;
 		case BTF_KIND_INT:
@@ -3722,6 +3768,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
 		break;
 
 	case BTF_KIND_FWD:
+	case BTF_KIND_FLOAT:
 		h = btf_hash_common(t);
 		for_each_dedup_cand(d, hash_entry, h) {
 			cand_id = (__u32)(long)hash_entry->value;
@@ -3983,6 +4030,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
 			return btf_compat_enum(cand_type, canon_type);
 
 	case BTF_KIND_FWD:
+	case BTF_KIND_FLOAT:
 		return btf_equal_common(cand_type, canon_type);
 
 	case BTF_KIND_CONST:
@@ -4479,6 +4527,7 @@ static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)
 	switch (btf_kind(t)) {
 	case BTF_KIND_INT:
 	case BTF_KIND_ENUM:
+	case BTF_KIND_FLOAT:
 		break;
 
 	case BTF_KIND_FWD:
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 1237bcd1dd17..029a9cfc8c2d 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -95,6 +95,7 @@ LIBBPF_API int btf__find_str(struct btf *btf, const char *s);
 LIBBPF_API int btf__add_str(struct btf *btf, const char *s);
 
 LIBBPF_API int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding);
+LIBBPF_API int btf__add_float(struct btf *btf, const char *name, size_t byte_sz);
 LIBBPF_API int btf__add_ptr(struct btf *btf, int ref_type_id);
 LIBBPF_API int btf__add_array(struct btf *btf,
 			      int index_type_id, int elem_type_id, __u32 nr_elems);
@@ -294,6 +295,11 @@ static inline bool btf_is_datasec(const struct btf_type *t)
 	return btf_kind(t) == BTF_KIND_DATASEC;
 }
 
+static inline bool btf_is_float(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_FLOAT;
+}
+
 static inline __u8 btf_int_encoding(const struct btf_type *t)
 {
 	return BTF_INT_ENCODING(*(__u32 *)(t + 1));
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 2f9d685bd522..5e957fcceee6 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -279,6 +279,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d)
 		case BTF_KIND_INT:
 		case BTF_KIND_ENUM:
 		case BTF_KIND_FWD:
+		case BTF_KIND_FLOAT:
 			break;
 
 		case BTF_KIND_VOLATILE:
@@ -453,6 +454,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
 
 	switch (btf_kind(t)) {
 	case BTF_KIND_INT:
+	case BTF_KIND_FLOAT:
 		tstate->order_state = ORDERED;
 		return 0;
 
@@ -1133,6 +1135,7 @@ skip_mod:
 		case BTF_KIND_STRUCT:
 		case BTF_KIND_UNION:
 		case BTF_KIND_TYPEDEF:
+		case BTF_KIND_FLOAT:
 			goto done;
 		default:
 			pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n",
@@ -1247,6 +1250,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
 
 		switch (kind) {
 		case BTF_KIND_INT:
+		case BTF_KIND_FLOAT:
 			btf_dump_emit_mods(d, decls);
 			name = btf_name_of(d, t->name_off);
 			btf_dump_printf(d, "%s", name);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 62d9ed56b081..2f351d3ad3e7 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -178,6 +178,8 @@ enum kern_feature_id {
 	FEAT_PROG_BIND_MAP,
 	/* Kernel support for module BTFs */
 	FEAT_MODULE_BTF,
+	/* BTF_KIND_FLOAT support */
+	FEAT_BTF_FLOAT,
 	__FEAT_CNT,
 };
 
@@ -1946,6 +1948,7 @@ static const char *btf_kind_str(const struct btf_type *t)
 	case BTF_KIND_FUNC_PROTO: return "func_proto";
 	case BTF_KIND_VAR: return "var";
 	case BTF_KIND_DATASEC: return "datasec";
+	case BTF_KIND_FLOAT: return "float";
 	default: return "unknown";
 	}
 }
@@ -2395,15 +2398,17 @@ static bool btf_needs_sanitization(struct bpf_object *obj)
 {
 	bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
 	bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
+	bool has_float = kernel_supports(FEAT_BTF_FLOAT);
 	bool has_func = kernel_supports(FEAT_BTF_FUNC);
 
-	return !has_func || !has_datasec || !has_func_global;
+	return !has_func || !has_datasec || !has_func_global || !has_float;
 }
 
 static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
 {
 	bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
 	bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
+	bool has_float = kernel_supports(FEAT_BTF_FLOAT);
 	bool has_func = kernel_supports(FEAT_BTF_FUNC);
 	struct btf_type *t;
 	int i, j, vlen;
@@ -2456,6 +2461,13 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
 		} else if (!has_func_global && btf_is_func(t)) {
 			/* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
 			t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
+		} else if (!has_float && btf_is_float(t)) {
+			/* replace FLOAT with an equally-sized empty STRUCT;
+			 * since C compilers do not accept e.g. "float" as a
+			 * valid struct name, make it anonymous
+			 */
+			t->name_off = 0;
+			t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
 		}
 	}
 }
@@ -3927,6 +3939,18 @@ static int probe_kern_btf_datasec(void)
 					     strs, sizeof(strs)));
 }
 
+static int probe_kern_btf_float(void)
+{
+	static const char strs[] = "\0float";
+	__u32 types[] = {
+		/* float */
+		BTF_TYPE_FLOAT_ENC(1, 4),
+	};
+
+	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+					     strs, sizeof(strs)));
+}
+
 static int probe_kern_array_mmap(void)
 {
 	struct bpf_create_map_attr attr = {
@@ -4106,6 +4130,9 @@ static struct kern_feature_desc {
 	[FEAT_MODULE_BTF] = {
 		"module BTF support", probe_module_btf,
 	},
+	[FEAT_BTF_FLOAT] = {
+		"BTF_KIND_FLOAT support", probe_kern_btf_float,
+	},
 };
 
 static bool kernel_supports(enum kern_feature_id feat_id)
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 1c0fd2dd233a..ec898f464ab9 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -350,3 +350,8 @@ LIBBPF_0.3.0 {
 		xsk_setup_xdp_prog;
 		xsk_socket__update_xskmap;
 } LIBBPF_0.2.0;
+
+LIBBPF_0.4.0 {
+	global:
+		btf__add_float;
+} LIBBPF_0.3.0;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 969d0ac592ba..343f6eb05637 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -31,6 +31,8 @@
 #define BTF_MEMBER_ENC(name, type, bits_offset) (name), (type), (bits_offset)
 #define BTF_PARAM_ENC(name, type) (name), (type)
 #define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size)
+#define BTF_TYPE_FLOAT_ENC(name, sz) \
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz)
 
 #ifndef likely
 #define likely(x) __builtin_expect(!!(x), 1)
-- 
cgit v1.2.3


From 737e0f919a8d2a313618d8ac67d50e8223bc5d74 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 26 Feb 2021 21:22:50 +0100
Subject: tools/bpftool: Add BTF_KIND_FLOAT support

Only dumping support needs to be adjusted, the code structure follows
that of BTF_KIND_INT. Example plain and JSON outputs:

    [4] FLOAT 'float' size=4
    {"id":4,"kind":"FLOAT","name":"float","size":4}

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210226202256.116518-5-iii@linux.ibm.com
---
 tools/bpf/bpftool/btf.c        | 8 ++++++++
 tools/bpf/bpftool/btf_dumper.c | 1 +
 2 files changed, 9 insertions(+)

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index fe9e7b3a4b50..985610c3f193 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -36,6 +36,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
 	[BTF_KIND_FUNC_PROTO]	= "FUNC_PROTO",
 	[BTF_KIND_VAR]		= "VAR",
 	[BTF_KIND_DATASEC]	= "DATASEC",
+	[BTF_KIND_FLOAT]	= "FLOAT",
 };
 
 struct btf_attach_table {
@@ -327,6 +328,13 @@ static int dump_btf_type(const struct btf *btf, __u32 id,
 			jsonw_end_array(w);
 		break;
 	}
+	case BTF_KIND_FLOAT: {
+		if (json_output)
+			jsonw_uint_field(w, "size", t->size);
+		else
+			printf(" size=%u", t->size);
+		break;
+	}
 	default:
 		break;
 	}
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 0e9310727281..7ca54d046362 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -596,6 +596,7 @@ static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id,
 	switch (BTF_INFO_KIND(t->info)) {
 	case BTF_KIND_INT:
 	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_FLOAT:
 		BTF_PRINT_ARG("%s ", btf__name_by_offset(btf, t->name_off));
 		break;
 	case BTF_KIND_STRUCT:
-- 
cgit v1.2.3


From eea154a852e827c003215f7beed3e10f05471a86 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 26 Feb 2021 21:22:51 +0100
Subject: selftests/bpf: Use the 25th bit in the "invalid BTF_INFO" test

The bit being checked by this test is no longer reserved after
introducing BTF_KIND_FLOAT, so use the next one instead.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210226202256.116518-6-iii@linux.ibm.com
---
 tools/testing/selftests/bpf/prog_tests/btf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 6a7ee7420701..c29406736138 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -1903,7 +1903,7 @@ static struct btf_raw_test raw_tests[] = {
 	.raw_types = {
 		/* int */				/* [1] */
 		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
-		BTF_TYPE_ENC(0, 0x10000000, 4),
+		BTF_TYPE_ENC(0, 0x20000000, 4),
 		BTF_END_RAW,
 	},
 	.str_sec = "",
-- 
cgit v1.2.3


From b1828f0b04828aa8cccadf00a702f459caefeed9 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 26 Feb 2021 21:22:52 +0100
Subject: bpf: Add BTF_KIND_FLOAT support

On the kernel side, introduce a new btf_kind_operations. It is
similar to that of BTF_KIND_INT, however, it does not need to
handle encodings and bit offsets. Do not implement printing, since
the kernel does not know how to format floating-point values.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210226202256.116518-7-iii@linux.ibm.com
---
 kernel/bpf/btf.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 81 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 16e8148a28e2..c914d68f3bf8 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -173,7 +173,7 @@
 #define BITS_ROUNDUP_BYTES(bits) \
 	(BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits))
 
-#define BTF_INFO_MASK 0x8f00ffff
+#define BTF_INFO_MASK 0x9f00ffff
 #define BTF_INT_MASK 0x0fffffff
 #define BTF_TYPE_ID_VALID(type_id) ((type_id) <= BTF_MAX_TYPE)
 #define BTF_STR_OFFSET_VALID(name_off) ((name_off) <= BTF_MAX_NAME_OFFSET)
@@ -280,6 +280,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
 	[BTF_KIND_FUNC_PROTO]	= "FUNC_PROTO",
 	[BTF_KIND_VAR]		= "VAR",
 	[BTF_KIND_DATASEC]	= "DATASEC",
+	[BTF_KIND_FLOAT]	= "FLOAT",
 };
 
 static const char *btf_type_str(const struct btf_type *t)
@@ -574,6 +575,7 @@ static bool btf_type_has_size(const struct btf_type *t)
 	case BTF_KIND_UNION:
 	case BTF_KIND_ENUM:
 	case BTF_KIND_DATASEC:
+	case BTF_KIND_FLOAT:
 		return true;
 	}
 
@@ -1704,6 +1706,7 @@ __btf_resolve_size(const struct btf *btf, const struct btf_type *type,
 		case BTF_KIND_STRUCT:
 		case BTF_KIND_UNION:
 		case BTF_KIND_ENUM:
+		case BTF_KIND_FLOAT:
 			size = type->size;
 			goto resolved;
 
@@ -1849,7 +1852,7 @@ static int btf_df_check_kflag_member(struct btf_verifier_env *env,
 	return -EINVAL;
 }
 
-/* Used for ptr, array and struct/union type members.
+/* Used for ptr, array struct/union and float type members.
  * int, enum and modifier types have their specific callback functions.
  */
 static int btf_generic_check_kflag_member(struct btf_verifier_env *env,
@@ -3675,6 +3678,81 @@ static const struct btf_kind_operations datasec_ops = {
 	.show			= btf_datasec_show,
 };
 
+static s32 btf_float_check_meta(struct btf_verifier_env *env,
+				const struct btf_type *t,
+				u32 meta_left)
+{
+	if (btf_type_vlen(t)) {
+		btf_verifier_log_type(env, t, "vlen != 0");
+		return -EINVAL;
+	}
+
+	if (btf_type_kflag(t)) {
+		btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
+		return -EINVAL;
+	}
+
+	if (t->size != 2 && t->size != 4 && t->size != 8 && t->size != 12 &&
+	    t->size != 16) {
+		btf_verifier_log_type(env, t, "Invalid type_size");
+		return -EINVAL;
+	}
+
+	btf_verifier_log_type(env, t, NULL);
+
+	return 0;
+}
+
+static int btf_float_check_member(struct btf_verifier_env *env,
+				  const struct btf_type *struct_type,
+				  const struct btf_member *member,
+				  const struct btf_type *member_type)
+{
+	u64 start_offset_bytes;
+	u64 end_offset_bytes;
+	u64 misalign_bits;
+	u64 align_bytes;
+	u64 align_bits;
+
+	/* Different architectures have different alignment requirements, so
+	 * here we check only for the reasonable minimum. This way we ensure
+	 * that types after CO-RE can pass the kernel BTF verifier.
+	 */
+	align_bytes = min_t(u64, sizeof(void *), member_type->size);
+	align_bits = align_bytes * BITS_PER_BYTE;
+	div64_u64_rem(member->offset, align_bits, &misalign_bits);
+	if (misalign_bits) {
+		btf_verifier_log_member(env, struct_type, member,
+					"Member is not properly aligned");
+		return -EINVAL;
+	}
+
+	start_offset_bytes = member->offset / BITS_PER_BYTE;
+	end_offset_bytes = start_offset_bytes + member_type->size;
+	if (end_offset_bytes > struct_type->size) {
+		btf_verifier_log_member(env, struct_type, member,
+					"Member exceeds struct_size");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void btf_float_log(struct btf_verifier_env *env,
+			  const struct btf_type *t)
+{
+	btf_verifier_log(env, "size=%u", t->size);
+}
+
+static const struct btf_kind_operations float_ops = {
+	.check_meta = btf_float_check_meta,
+	.resolve = btf_df_resolve,
+	.check_member = btf_float_check_member,
+	.check_kflag_member = btf_generic_check_kflag_member,
+	.log_details = btf_float_log,
+	.show = btf_df_show,
+};
+
 static int btf_func_proto_check(struct btf_verifier_env *env,
 				const struct btf_type *t)
 {
@@ -3808,6 +3886,7 @@ static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = {
 	[BTF_KIND_FUNC_PROTO] = &func_proto_ops,
 	[BTF_KIND_VAR] = &var_ops,
 	[BTF_KIND_DATASEC] = &datasec_ops,
+	[BTF_KIND_FLOAT] = &float_ops,
 };
 
 static s32 btf_check_meta(struct btf_verifier_env *env,
-- 
cgit v1.2.3


From 7e72aad3a15c06e40e3ccd2352e5010e978f1acf Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 26 Feb 2021 21:22:53 +0100
Subject: selftest/bpf: Add BTF_KIND_FLOAT tests

Test the good variants as well as the potential malformed ones.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210226202256.116518-8-iii@linux.ibm.com
---
 tools/testing/selftests/bpf/btf_helpers.c    |   4 +
 tools/testing/selftests/bpf/prog_tests/btf.c | 131 +++++++++++++++++++++++++++
 tools/testing/selftests/bpf/test_btf.h       |   3 +
 3 files changed, 138 insertions(+)

diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c
index 48f90490f922..b692e6ead9b5 100644
--- a/tools/testing/selftests/bpf/btf_helpers.c
+++ b/tools/testing/selftests/bpf/btf_helpers.c
@@ -23,6 +23,7 @@ static const char * const btf_kind_str_mapping[] = {
 	[BTF_KIND_FUNC_PROTO]	= "FUNC_PROTO",
 	[BTF_KIND_VAR]		= "VAR",
 	[BTF_KIND_DATASEC]	= "DATASEC",
+	[BTF_KIND_FLOAT]	= "FLOAT",
 };
 
 static const char *btf_kind_str(__u16 kind)
@@ -173,6 +174,9 @@ int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id)
 		}
 		break;
 	}
+	case BTF_KIND_FLOAT:
+		fprintf(out, " size=%u", t->size);
+		break;
 	default:
 		break;
 	}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index c29406736138..11d98d3cf949 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -3531,6 +3531,136 @@ static struct btf_raw_test raw_tests[] = {
 	.max_entries = 1,
 },
 
+{
+	.descr = "float test #1, well-formed",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+								/* [1] */
+		BTF_TYPE_FLOAT_ENC(NAME_TBD, 2),		/* [2] */
+		BTF_TYPE_FLOAT_ENC(NAME_TBD, 4),		/* [3] */
+		BTF_TYPE_FLOAT_ENC(NAME_TBD, 8),		/* [4] */
+		BTF_TYPE_FLOAT_ENC(NAME_TBD, 12),		/* [5] */
+		BTF_TYPE_FLOAT_ENC(NAME_TBD, 16),		/* [6] */
+		BTF_STRUCT_ENC(NAME_TBD, 5, 48),		/* [7] */
+		BTF_MEMBER_ENC(NAME_TBD, 2, 0),
+		BTF_MEMBER_ENC(NAME_TBD, 3, 32),
+		BTF_MEMBER_ENC(NAME_TBD, 4, 64),
+		BTF_MEMBER_ENC(NAME_TBD, 5, 128),
+		BTF_MEMBER_ENC(NAME_TBD, 6, 256),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0int\0_Float16\0float\0double\0_Float80\0long_double"
+		    "\0floats\0a\0b\0c\0d\0e"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "float_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 48,
+	.key_type_id = 1,
+	.value_type_id = 7,
+	.max_entries = 1,
+},
+{
+	.descr = "float test #2, invalid vlen",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+								/* [1] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 1), 4),
+								/* [2] */
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0int\0float"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "float_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "vlen != 0",
+},
+{
+	.descr = "float test #3, invalid kind_flag",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+								/* [1] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FLOAT, 1, 0), 4),
+								/* [2] */
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0int\0float"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "float_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid btf_info kind_flag",
+},
+{
+	.descr = "float test #4, member does not fit",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+								/* [1] */
+		BTF_TYPE_FLOAT_ENC(NAME_TBD, 4),		/* [2] */
+		BTF_STRUCT_ENC(NAME_TBD, 1, 2),			/* [3] */
+		BTF_MEMBER_ENC(NAME_TBD, 2, 0),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0int\0float\0floats\0x"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "float_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 3,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Member exceeds struct_size",
+},
+{
+	.descr = "float test #5, member is not properly aligned",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+								/* [1] */
+		BTF_TYPE_FLOAT_ENC(NAME_TBD, 4),		/* [2] */
+		BTF_STRUCT_ENC(NAME_TBD, 1, 8),			/* [3] */
+		BTF_MEMBER_ENC(NAME_TBD, 2, 8),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0int\0float\0floats\0x"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "float_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 3,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Member is not properly aligned",
+},
+{
+	.descr = "float test #6, invalid size",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+								/* [1] */
+		BTF_TYPE_FLOAT_ENC(NAME_TBD, 6),		/* [2] */
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0int\0float"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "float_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 6,
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid type_size",
+},
+
 }; /* struct btf_raw_test raw_tests[] */
 
 static const char *get_next_str(const char *start, const char *end)
@@ -6630,6 +6760,7 @@ static int btf_type_size(const struct btf_type *t)
 	case BTF_KIND_PTR:
 	case BTF_KIND_TYPEDEF:
 	case BTF_KIND_FUNC:
+	case BTF_KIND_FLOAT:
 		return base_size;
 	case BTF_KIND_INT:
 		return base_size + sizeof(__u32);
diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h
index 2023725f1962..e2394eea4b7f 100644
--- a/tools/testing/selftests/bpf/test_btf.h
+++ b/tools/testing/selftests/bpf/test_btf.h
@@ -66,4 +66,7 @@
 #define BTF_FUNC_ENC(name, func_proto) \
 	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), func_proto)
 
+#define BTF_TYPE_FLOAT_ENC(name, sz) \
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz)
+
 #endif /* _TEST_BTF_H */
-- 
cgit v1.2.3


From 7999cf7df899caf244236dcc11cce844347dab4a Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 26 Feb 2021 21:22:54 +0100
Subject: selftests/bpf: Add BTF_KIND_FLOAT to the existing deduplication tests

Check that floats don't interfere with struct deduplication, that they
are not merged with another kinds and that floats of different sizes are
not merged with each other.

Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210226202256.116518-9-iii@linux.ibm.com
---
 tools/testing/selftests/bpf/prog_tests/btf.c | 43 ++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 12 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 11d98d3cf949..0457ae32b270 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -6411,11 +6411,12 @@ const struct btf_dedup_test dedup_tests[] = {
 			/* int[16] */
 			BTF_TYPE_ARRAY_ENC(1, 1, 16),					/* [2] */
 			/* struct s { */
-			BTF_STRUCT_ENC(NAME_NTH(2), 4, 84),				/* [3] */
+			BTF_STRUCT_ENC(NAME_NTH(2), 5, 88),				/* [3] */
 				BTF_MEMBER_ENC(NAME_NTH(3), 4, 0),	/* struct s *next;	*/
 				BTF_MEMBER_ENC(NAME_NTH(4), 5, 64),	/* const int *a;	*/
 				BTF_MEMBER_ENC(NAME_NTH(5), 2, 128),	/* int b[16];		*/
 				BTF_MEMBER_ENC(NAME_NTH(6), 1, 640),	/* int c;		*/
+				BTF_MEMBER_ENC(NAME_NTH(8), 13, 672),	/* float d;		*/
 			/* ptr -> [3] struct s */
 			BTF_PTR_ENC(3),							/* [4] */
 			/* ptr -> [6] const int */
@@ -6426,39 +6427,43 @@ const struct btf_dedup_test dedup_tests[] = {
 			/* full copy of the above */
 			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),	/* [7] */
 			BTF_TYPE_ARRAY_ENC(7, 7, 16),					/* [8] */
-			BTF_STRUCT_ENC(NAME_NTH(2), 4, 84),				/* [9] */
+			BTF_STRUCT_ENC(NAME_NTH(2), 5, 88),				/* [9] */
 				BTF_MEMBER_ENC(NAME_NTH(3), 10, 0),
 				BTF_MEMBER_ENC(NAME_NTH(4), 11, 64),
 				BTF_MEMBER_ENC(NAME_NTH(5), 8, 128),
 				BTF_MEMBER_ENC(NAME_NTH(6), 7, 640),
+				BTF_MEMBER_ENC(NAME_NTH(8), 13, 672),
 			BTF_PTR_ENC(9),							/* [10] */
 			BTF_PTR_ENC(12),						/* [11] */
 			BTF_CONST_ENC(7),						/* [12] */
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4),				/* [13] */
 			BTF_END_RAW,
 		},
-		BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0"),
+		BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0float\0d"),
 	},
 	.expect = {
 		.raw_types = {
 			/* int */
-			BTF_TYPE_INT_ENC(NAME_NTH(4), BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+			BTF_TYPE_INT_ENC(NAME_NTH(5), BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
 			/* int[16] */
 			BTF_TYPE_ARRAY_ENC(1, 1, 16),					/* [2] */
 			/* struct s { */
-			BTF_STRUCT_ENC(NAME_NTH(6), 4, 84),				/* [3] */
-				BTF_MEMBER_ENC(NAME_NTH(5), 4, 0),	/* struct s *next;	*/
+			BTF_STRUCT_ENC(NAME_NTH(8), 5, 88),				/* [3] */
+				BTF_MEMBER_ENC(NAME_NTH(7), 4, 0),	/* struct s *next;	*/
 				BTF_MEMBER_ENC(NAME_NTH(1), 5, 64),	/* const int *a;	*/
 				BTF_MEMBER_ENC(NAME_NTH(2), 2, 128),	/* int b[16];		*/
 				BTF_MEMBER_ENC(NAME_NTH(3), 1, 640),	/* int c;		*/
+				BTF_MEMBER_ENC(NAME_NTH(4), 7, 672),	/* float d;		*/
 			/* ptr -> [3] struct s */
 			BTF_PTR_ENC(3),							/* [4] */
 			/* ptr -> [6] const int */
 			BTF_PTR_ENC(6),							/* [5] */
 			/* const -> [1] int */
 			BTF_CONST_ENC(1),						/* [6] */
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4),				/* [7] */
 			BTF_END_RAW,
 		},
-		BTF_STR_SEC("\0a\0b\0c\0int\0next\0s"),
+		BTF_STR_SEC("\0a\0b\0c\0d\0int\0float\0next\0s"),
 	},
 	.opts = {
 		.dont_resolve_fwds = false,
@@ -6579,9 +6584,10 @@ const struct btf_dedup_test dedup_tests[] = {
 				BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
 				BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8),
 			BTF_FUNC_ENC(NAME_TBD, 12),					/* [13] func */
+			BTF_TYPE_FLOAT_ENC(NAME_TBD, 2),				/* [14] float */
 			BTF_END_RAW,
 		},
-		BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"),
+		BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N"),
 	},
 	.expect = {
 		.raw_types = {
@@ -6604,16 +6610,17 @@ const struct btf_dedup_test dedup_tests[] = {
 				BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
 				BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8),
 			BTF_FUNC_ENC(NAME_TBD, 12),					/* [13] func */
+			BTF_TYPE_FLOAT_ENC(NAME_TBD, 2),				/* [14] float */
 			BTF_END_RAW,
 		},
-		BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"),
+		BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N"),
 	},
 	.opts = {
 		.dont_resolve_fwds = false,
 	},
 },
 {
-	.descr = "dedup: no int duplicates",
+	.descr = "dedup: no int/float duplicates",
 	.input = {
 		.raw_types = {
 			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 8),
@@ -6628,9 +6635,15 @@ const struct btf_dedup_test dedup_tests[] = {
 			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8),
 			/* different byte size */
 			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),
+			/* all allowed sizes */
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 2),
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 4),
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 8),
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 12),
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 16),
 			BTF_END_RAW,
 		},
-		BTF_STR_SEC("\0int\0some other int"),
+		BTF_STR_SEC("\0int\0some other int\0float"),
 	},
 	.expect = {
 		.raw_types = {
@@ -6646,9 +6659,15 @@ const struct btf_dedup_test dedup_tests[] = {
 			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8),
 			/* different byte size */
 			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),
+			/* all allowed sizes */
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 2),
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 4),
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 8),
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 12),
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 16),
 			BTF_END_RAW,
 		},
-		BTF_STR_SEC("\0int\0some other int"),
+		BTF_STR_SEC("\0int\0some other int\0float"),
 	},
 	.opts = {
 		.dont_resolve_fwds = false,
-- 
cgit v1.2.3


From 6be6a0baffc1357b6d2023155753f111624c4fec Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 26 Feb 2021 21:22:56 +0100
Subject: bpf: Document BTF_KIND_FLOAT in btf.rst

Also document the expansion of the kind bitfield.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210226202256.116518-11-iii@linux.ibm.com
---
 Documentation/bpf/btf.rst | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst
index 44dc789de2b4..846354cd2d69 100644
--- a/Documentation/bpf/btf.rst
+++ b/Documentation/bpf/btf.rst
@@ -84,6 +84,7 @@ sequentially and type id is assigned to each recognized type starting from id
     #define BTF_KIND_FUNC_PROTO     13      /* Function Proto       */
     #define BTF_KIND_VAR            14      /* Variable     */
     #define BTF_KIND_DATASEC        15      /* Section      */
+    #define BTF_KIND_FLOAT          16      /* Floating point       */
 
 Note that the type section encodes debug info, not just pure types.
 ``BTF_KIND_FUNC`` is not a type, and it represents a defined subprogram.
@@ -95,8 +96,8 @@ Each type contains the following common data::
         /* "info" bits arrangement
          * bits  0-15: vlen (e.g. # of struct's members)
          * bits 16-23: unused
-         * bits 24-27: kind (e.g. int, ptr, array...etc)
-         * bits 28-30: unused
+         * bits 24-28: kind (e.g. int, ptr, array...etc)
+         * bits 29-30: unused
          * bit     31: kind_flag, currently used by
          *             struct, union and fwd
          */
@@ -452,6 +453,18 @@ map definition.
   * ``offset``: the in-section offset of the variable
   * ``size``: the size of the variable in bytes
 
+2.2.16 BTF_KIND_FLOAT
+~~~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: any valid offset
+ * ``info.kind_flag``: 0
+ * ``info.kind``: BTF_KIND_FLOAT
+ * ``info.vlen``: 0
+ * ``size``: the size of the float type in bytes: 2, 4, 8, 12 or 16.
+
+No additional type data follow ``btf_type``.
+
 3. BTF Kernel API
 *****************
 
-- 
cgit v1.2.3


From 7799e4d9d84f6f8231dfd9dca4da5f4b2f0aa932 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@cilium.io>
Date: Tue, 2 Mar 2021 09:19:33 -0800
Subject: bpf: Import syscall arg documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These descriptions are present in the man-pages project from the
original submissions around 2015-2016. Import them so that they can be
kept up to date as developers extend the bpf syscall commands.

These descriptions follow the pattern used by scripts/bpf_helpers_doc.py
so that we can take advantage of the parser to generate more up-to-date
man page writing based upon these headers.

Some minor wording adjustments were made to make the descriptions
more consistent for the description / return format.

Signed-off-by: Joe Stringer <joe@cilium.io>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210302171947.2268128-2-joe@cilium.io

Co-authored-by: Alexei Starovoitov <ast@kernel.org>
Co-authored-by: Michael Kerrisk <mtk.manpages@gmail.com>
---
 include/uapi/linux/bpf.h | 122 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 121 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b89af20cfa19..fb16c590e6d9 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -93,7 +93,127 @@ union bpf_iter_link_info {
 	} map;
 };
 
-/* BPF syscall commands, see bpf(2) man-page for details. */
+/* BPF syscall commands, see bpf(2) man-page for more details. */
+/**
+ * DOC: eBPF Syscall Preamble
+ *
+ * The operation to be performed by the **bpf**\ () system call is determined
+ * by the *cmd* argument. Each operation takes an accompanying argument,
+ * provided via *attr*, which is a pointer to a union of type *bpf_attr* (see
+ * below). The size argument is the size of the union pointed to by *attr*.
+ */
+/**
+ * DOC: eBPF Syscall Commands
+ *
+ * BPF_MAP_CREATE
+ *	Description
+ *		Create a map and return a file descriptor that refers to the
+ *		map. The close-on-exec file descriptor flag (see **fcntl**\ (2))
+ *		is automatically enabled for the new file descriptor.
+ *
+ *		Applying **close**\ (2) to the file descriptor returned by
+ *		**BPF_MAP_CREATE** will delete the map (but see NOTES).
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_MAP_LOOKUP_ELEM
+ *	Description
+ *		Look up an element with a given *key* in the map referred to
+ *		by the file descriptor *map_fd*.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_MAP_UPDATE_ELEM
+ *	Description
+ *		Create or update an element (key/value pair) in a specified map.
+ *
+ *		The *flags* argument should be specified as one of the
+ *		following:
+ *
+ *		**BPF_ANY**
+ *			Create a new element or update an existing element.
+ *		**BPF_NOEXIST**
+ *			Create a new element only if it did not exist.
+ *		**BPF_EXIST**
+ *			Update an existing element.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ *		May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**,
+ *		**E2BIG**, **EEXIST**, or **ENOENT**.
+ *
+ *		**E2BIG**
+ *			The number of elements in the map reached the
+ *			*max_entries* limit specified at map creation time.
+ *		**EEXIST**
+ *			If *flags* specifies **BPF_NOEXIST** and the element
+ *			with *key* already exists in the map.
+ *		**ENOENT**
+ *			If *flags* specifies **BPF_EXIST** and the element with
+ *			*key* does not exist in the map.
+ *
+ * BPF_MAP_DELETE_ELEM
+ *	Description
+ *		Look up and delete an element by key in a specified map.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_MAP_GET_NEXT_KEY
+ *	Description
+ *		Look up an element by key in a specified map and return the key
+ *		of the next element. Can be used to iterate over all elements
+ *		in the map.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ *		The following cases can be used to iterate over all elements of
+ *		the map:
+ *
+ *		* If *key* is not found, the operation returns zero and sets
+ *		  the *next_key* pointer to the key of the first element.
+ *		* If *key* is found, the operation returns zero and sets the
+ *		  *next_key* pointer to the key of the next element.
+ *		* If *key* is the last element, returns -1 and *errno* is set
+ *		  to **ENOENT**.
+ *
+ *		May set *errno* to **ENOMEM**, **EFAULT**, **EPERM**, or
+ *		**EINVAL** on error.
+ *
+ * BPF_PROG_LOAD
+ *	Description
+ *		Verify and load an eBPF program, returning a new file
+ *		descriptor associated with the program.
+ *
+ *		Applying **close**\ (2) to the file descriptor returned by
+ *		**BPF_PROG_LOAD** will unload the eBPF program (but see NOTES).
+ *
+ *		The close-on-exec file descriptor flag (see **fcntl**\ (2)) is
+ *		automatically enabled for the new file descriptor.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * NOTES
+ *	eBPF objects (maps and programs) can be shared between processes.
+ *	For example, after **fork**\ (2), the child inherits file descriptors
+ *	referring to the same eBPF objects. In addition, file descriptors
+ *	referring to eBPF objects can be transferred over UNIX domain sockets.
+ *	File descriptors referring to eBPF objects can be duplicated in the
+ *	usual way, using **dup**\ (2) and similar calls. An eBPF object is
+ *	deallocated only after all file descriptors referring to the object
+ *	have been closed.
+ */
 enum bpf_cmd {
 	BPF_MAP_CREATE,
 	BPF_MAP_LOOKUP_ELEM,
-- 
cgit v1.2.3


From f67c9cbf6c581468f6c7144d497565cfc7918c31 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@cilium.io>
Date: Tue, 2 Mar 2021 09:19:34 -0800
Subject: bpf: Add minimal bpf() command documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce high-level descriptions of the intent and return codes of the
bpf() syscall commands. Subsequent patches may further flesh out the
content to provide a more useful programming reference.

Signed-off-by: Joe Stringer <joe@cilium.io>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210302171947.2268128-3-joe@cilium.io
---
 include/uapi/linux/bpf.h | 368 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 368 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index fb16c590e6d9..052bbfe65f77 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -204,6 +204,374 @@ union bpf_iter_link_info {
  *		A new file descriptor (a nonnegative integer), or -1 if an
  *		error occurred (in which case, *errno* is set appropriately).
  *
+ * BPF_OBJ_PIN
+ *	Description
+ *		Pin an eBPF program or map referred by the specified *bpf_fd*
+ *		to the provided *pathname* on the filesystem.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_OBJ_GET
+ *	Description
+ *		Open a file descriptor for the eBPF object pinned to the
+ *		specified *pathname*.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_PROG_ATTACH
+ *	Description
+ *		Attach an eBPF program to a *target_fd* at the specified
+ *		*attach_type* hook.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_PROG_DETACH
+ *	Description
+ *		Detach the eBPF program associated with the *target_fd* at the
+ *		hook specified by *attach_type*. The program must have been
+ *		previously attached using **BPF_PROG_ATTACH**.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_PROG_TEST_RUN
+ *	Description
+ *		Run an eBPF program a number of times against a provided
+ *		program context and return the modified program context and
+ *		duration of the test run.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_PROG_GET_NEXT_ID
+ *	Description
+ *		Fetch the next eBPF program currently loaded into the kernel.
+ *
+ *		Looks for the eBPF program with an id greater than *start_id*
+ *		and updates *next_id* on success. If no other eBPF programs
+ *		remain with ids higher than *start_id*, returns -1 and sets
+ *		*errno* to **ENOENT**.
+ *
+ *	Return
+ *		Returns zero on success. On error, or when no id remains, -1
+ *		is returned and *errno* is set appropriately.
+ *
+ * BPF_MAP_GET_NEXT_ID
+ *	Description
+ *		Fetch the next eBPF map currently loaded into the kernel.
+ *
+ *		Looks for the eBPF map with an id greater than *start_id*
+ *		and updates *next_id* on success. If no other eBPF maps
+ *		remain with ids higher than *start_id*, returns -1 and sets
+ *		*errno* to **ENOENT**.
+ *
+ *	Return
+ *		Returns zero on success. On error, or when no id remains, -1
+ *		is returned and *errno* is set appropriately.
+ *
+ * BPF_PROG_GET_FD_BY_ID
+ *	Description
+ *		Open a file descriptor for the eBPF program corresponding to
+ *		*prog_id*.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_MAP_GET_FD_BY_ID
+ *	Description
+ *		Open a file descriptor for the eBPF map corresponding to
+ *		*map_id*.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_OBJ_GET_INFO_BY_FD
+ *	Description
+ *		Obtain information about the eBPF object corresponding to
+ *		*bpf_fd*.
+ *
+ *		Populates up to *info_len* bytes of *info*, which will be in
+ *		one of the following formats depending on the eBPF object type
+ *		of *bpf_fd*:
+ *
+ *		* **struct bpf_prog_info**
+ *		* **struct bpf_map_info**
+ *		* **struct bpf_btf_info**
+ *		* **struct bpf_link_info**
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_PROG_QUERY
+ *	Description
+ *		Obtain information about eBPF programs associated with the
+ *		specified *attach_type* hook.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_RAW_TRACEPOINT_OPEN
+ *	Description
+ *		Attach an eBPF program to a tracepoint *name* to access kernel
+ *		internal arguments of the tracepoint in their raw form.
+ *
+ *		The *prog_fd* must be a valid file descriptor associated with
+ *		a loaded eBPF program of type **BPF_PROG_TYPE_RAW_TRACEPOINT**.
+ *
+ *		No ABI guarantees are made about the content of tracepoint
+ *		arguments exposed to the corresponding eBPF program.
+ *
+ *		Applying **close**\ (2) to the file descriptor returned by
+ *		**BPF_RAW_TRACEPOINT_OPEN** will delete the map (but see NOTES).
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_BTF_LOAD
+ *	Description
+ *		Verify and load BPF Type Format (BTF) metadata into the kernel,
+ *		returning a new file descriptor associated with the metadata.
+ *		BTF is described in more detail at
+ *		https://www.kernel.org/doc/html/latest/bpf/btf.html.
+ *
+ *		The *btf* parameter must point to valid memory providing
+ *		*btf_size* bytes of BTF binary metadata.
+ *
+ *		The returned file descriptor can be passed to other **bpf**\ ()
+ *		subcommands such as **BPF_PROG_LOAD** or **BPF_MAP_CREATE** to
+ *		associate the BTF with those objects.
+ *
+ *		Similar to **BPF_PROG_LOAD**, **BPF_BTF_LOAD** has optional
+ *		parameters to specify a *btf_log_buf*, *btf_log_size* and
+ *		*btf_log_level* which allow the kernel to return freeform log
+ *		output regarding the BTF verification process.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_BTF_GET_FD_BY_ID
+ *	Description
+ *		Open a file descriptor for the BPF Type Format (BTF)
+ *		corresponding to *btf_id*.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_TASK_FD_QUERY
+ *	Description
+ *		Obtain information about eBPF programs associated with the
+ *		target process identified by *pid* and *fd*.
+ *
+ *		If the *pid* and *fd* are associated with a tracepoint, kprobe
+ *		or uprobe perf event, then the *prog_id* and *fd_type* will
+ *		be populated with the eBPF program id and file descriptor type
+ *		of type **bpf_task_fd_type**. If associated with a kprobe or
+ *		uprobe, the  *probe_offset* and *probe_addr* will also be
+ *		populated. Optionally, if *buf* is provided, then up to
+ *		*buf_len* bytes of *buf* will be populated with the name of
+ *		the tracepoint, kprobe or uprobe.
+ *
+ *		The resulting *prog_id* may be introspected in deeper detail
+ *		using **BPF_PROG_GET_FD_BY_ID** and **BPF_OBJ_GET_INFO_BY_FD**.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_MAP_LOOKUP_AND_DELETE_ELEM
+ *	Description
+ *		Look up an element with the given *key* in the map referred to
+ *		by the file descriptor *fd*, and if found, delete the element.
+ *
+ *		The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
+ *		implement this command as a "pop" operation, deleting the top
+ *		element rather than one corresponding to *key*.
+ *		The *key* and *key_len* parameters should be zeroed when
+ *		issuing this operation for these map types.
+ *
+ *		This command is only valid for the following map types:
+ *		* **BPF_MAP_TYPE_QUEUE**
+ *		* **BPF_MAP_TYPE_STACK**
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_MAP_FREEZE
+ *	Description
+ *		Freeze the permissions of the specified map.
+ *
+ *		Write permissions may be frozen by passing zero *flags*.
+ *		Upon success, no future syscall invocations may alter the
+ *		map state of *map_fd*. Write operations from eBPF programs
+ *		are still possible for a frozen map.
+ *
+ *		Not supported for maps of type **BPF_MAP_TYPE_STRUCT_OPS**.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_BTF_GET_NEXT_ID
+ *	Description
+ *		Fetch the next BPF Type Format (BTF) object currently loaded
+ *		into the kernel.
+ *
+ *		Looks for the BTF object with an id greater than *start_id*
+ *		and updates *next_id* on success. If no other BTF objects
+ *		remain with ids higher than *start_id*, returns -1 and sets
+ *		*errno* to **ENOENT**.
+ *
+ *	Return
+ *		Returns zero on success. On error, or when no id remains, -1
+ *		is returned and *errno* is set appropriately.
+ *
+ * BPF_MAP_LOOKUP_BATCH
+ *	Description
+ *		Iterate and fetch multiple elements in a map.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_MAP_LOOKUP_AND_DELETE_BATCH
+ *	Description
+ *		Iterate and delete multiple elements in a map.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_MAP_UPDATE_BATCH
+ *	Description
+ *		Iterate and update multiple elements in a map.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_MAP_DELETE_BATCH
+ *	Description
+ *		Iterate and delete multiple elements in a map.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_LINK_CREATE
+ *	Description
+ *		Attach an eBPF program to a *target_fd* at the specified
+ *		*attach_type* hook and return a file descriptor handle for
+ *		managing the link.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_UPDATE
+ *	Description
+ *		Update the eBPF program in the specified *link_fd* to
+ *		*new_prog_fd*.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_LINK_GET_FD_BY_ID
+ *	Description
+ *		Open a file descriptor for the eBPF Link corresponding to
+ *		*link_id*.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_GET_NEXT_ID
+ *	Description
+ *		Fetch the next eBPF link currently loaded into the kernel.
+ *
+ *		Looks for the eBPF link with an id greater than *start_id*
+ *		and updates *next_id* on success. If no other eBPF links
+ *		remain with ids higher than *start_id*, returns -1 and sets
+ *		*errno* to **ENOENT**.
+ *
+ *	Return
+ *		Returns zero on success. On error, or when no id remains, -1
+ *		is returned and *errno* is set appropriately.
+ *
+ * BPF_ENABLE_STATS
+ *	Description
+ *		Enable eBPF runtime statistics gathering.
+ *
+ *		Runtime statistics gathering for the eBPF runtime is disabled
+ *		by default to minimize the corresponding performance overhead.
+ *		This command enables statistics globally.
+ *
+ *		Multiple programs may independently enable statistics.
+ *		After gathering the desired statistics, eBPF runtime statistics
+ *		may be disabled again by calling **close**\ (2) for the file
+ *		descriptor returned by this function. Statistics will only be
+ *		disabled system-wide when all outstanding file descriptors
+ *		returned by prior calls for this subcommand are closed.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_ITER_CREATE
+ *	Description
+ *		Create an iterator on top of the specified *link_fd* (as
+ *		previously created using **BPF_LINK_CREATE**) and return a
+ *		file descriptor that can be used to trigger the iteration.
+ *
+ *		If the resulting file descriptor is pinned to the filesystem
+ *		using  **BPF_OBJ_PIN**, then subsequent **read**\ (2) syscalls
+ *		for that path will trigger the iterator to read kernel state
+ *		using the eBPF program attached to *link_fd*.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_DETACH
+ *	Description
+ *		Forcefully detach the specified *link_fd* from its
+ *		corresponding attachment point.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_PROG_BIND_MAP
+ *	Description
+ *		Bind a map to the lifetime of an eBPF program.
+ *
+ *		The map identified by *map_fd* is bound to the program
+ *		identified by *prog_fd* and only released when *prog_fd* is
+ *		released. This may be used in cases where metadata should be
+ *		associated with a program which otherwise does not contain any
+ *		references to the map (for example, embedded in the eBPF
+ *		program instructions).
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
  * NOTES
  *	eBPF objects (maps and programs) can be shared between processes.
  *	For example, after **fork**\ (2), the child inherits file descriptors
-- 
cgit v1.2.3


From 6690523bccb3e44cfcc4b2c995767e6814046e34 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@cilium.io>
Date: Tue, 2 Mar 2021 09:19:35 -0800
Subject: bpf: Document BPF_F_LOCK in syscall commands
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Document the meaning of the BPF_F_LOCK flag for the map lookup/update
descriptions. Based on commit 96049f3afd50 ("bpf: introduce BPF_F_LOCK
flag").

Signed-off-by: Joe Stringer <joe@cilium.io>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210302171947.2268128-4-joe@cilium.io
---
 include/uapi/linux/bpf.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 052bbfe65f77..eb9f059f0569 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -123,6 +123,14 @@ union bpf_iter_link_info {
  *		Look up an element with a given *key* in the map referred to
  *		by the file descriptor *map_fd*.
  *
+ *		The *flags* argument may be specified as one of the
+ *		following:
+ *
+ *		**BPF_F_LOCK**
+ *			Look up the value of a spin-locked map without
+ *			returning the lock. This must be specified if the
+ *			elements contain a spinlock.
+ *
  *	Return
  *		Returns zero on success. On error, -1 is returned and *errno*
  *		is set appropriately.
@@ -140,6 +148,8 @@ union bpf_iter_link_info {
  *			Create a new element only if it did not exist.
  *		**BPF_EXIST**
  *			Update an existing element.
+ *		**BPF_F_LOCK**
+ *			Update a spin_lock-ed map element.
  *
  *	Return
  *		Returns zero on success. On error, -1 is returned and *errno*
-- 
cgit v1.2.3


From 8aacb3c8d1a32b23c82645051bba55f0ae6c103b Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@cilium.io>
Date: Tue, 2 Mar 2021 09:19:36 -0800
Subject: bpf: Document BPF_PROG_PIN syscall command
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit b2197755b263 ("bpf: add support for persistent maps/progs")
contains the original implementation and git logs, used as reference for
this documentation.

Also pull in the filename restriction as documented in commit 6d8cb045cde6
("bpf: comment why dots in filenames under BPF virtual FS are not allowed")

Signed-off-by: Joe Stringer <joe@cilium.io>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210302171947.2268128-5-joe@cilium.io
---
 include/uapi/linux/bpf.h | 36 +++++++++++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index eb9f059f0569..6946dde90c56 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -219,6 +219,22 @@ union bpf_iter_link_info {
  *		Pin an eBPF program or map referred by the specified *bpf_fd*
  *		to the provided *pathname* on the filesystem.
  *
+ *		The *pathname* argument must not contain a dot (".").
+ *
+ *		On success, *pathname* retains a reference to the eBPF object,
+ *		preventing deallocation of the object when the original
+ *		*bpf_fd* is closed. This allow the eBPF object to live beyond
+ *		**close**\ (\ *bpf_fd*\ ), and hence the lifetime of the parent
+ *		process.
+ *
+ *		Applying **unlink**\ (2) or similar calls to the *pathname*
+ *		unpins the object from the filesystem, removing the reference.
+ *		If no other file descriptors or filesystem nodes refer to the
+ *		same object, it will be deallocated (see NOTES).
+ *
+ *		The filesystem type for the parent directory of *pathname* must
+ *		be **BPF_FS_MAGIC**.
+ *
  *	Return
  *		Returns zero on success. On error, -1 is returned and *errno*
  *		is set appropriately.
@@ -584,13 +600,19 @@ union bpf_iter_link_info {
  *
  * NOTES
  *	eBPF objects (maps and programs) can be shared between processes.
- *	For example, after **fork**\ (2), the child inherits file descriptors
- *	referring to the same eBPF objects. In addition, file descriptors
- *	referring to eBPF objects can be transferred over UNIX domain sockets.
- *	File descriptors referring to eBPF objects can be duplicated in the
- *	usual way, using **dup**\ (2) and similar calls. An eBPF object is
- *	deallocated only after all file descriptors referring to the object
- *	have been closed.
+ *
+ *	* After **fork**\ (2), the child inherits file descriptors
+ *	  referring to the same eBPF objects.
+ *	* File descriptors referring to eBPF objects can be transferred over
+ *	  **unix**\ (7) domain sockets.
+ *	* File descriptors referring to eBPF objects can be duplicated in the
+ *	  usual way, using **dup**\ (2) and similar calls.
+ *	* File descriptors referring to eBPF objects can be pinned to the
+ *	  filesystem using the **BPF_OBJ_PIN** command of **bpf**\ (2).
+ *
+ *	An eBPF object is deallocated only after all file descriptors referring
+ *	to the object have been closed and no references remain pinned to the
+ *	filesystem or attached (for example, bound to a program or device).
  */
 enum bpf_cmd {
 	BPF_MAP_CREATE,
-- 
cgit v1.2.3


From 32e76b187a90de5809d68c2ef3e3964176dacaf0 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@cilium.io>
Date: Tue, 2 Mar 2021 09:19:37 -0800
Subject: bpf: Document BPF_PROG_ATTACH syscall command
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Document the prog attach command in more detail, based on git commits:
* commit f4324551489e ("bpf: add BPF_PROG_ATTACH and BPF_PROG_DETACH
  commands")
* commit 4f738adba30a ("bpf: create tcp_bpf_ulp allowing BPF to monitor
  socket TX/RX data")
* commit f4364dcfc86d ("media: rc: introduce BPF_PROG_LIRC_MODE2")
* commit d58e468b1112 ("flow_dissector: implements flow dissector BPF
  hook")

Signed-off-by: Joe Stringer <joe@cilium.io>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210302171947.2268128-6-joe@cilium.io
---
 include/uapi/linux/bpf.h | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6946dde90c56..a8f2964ec885 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -253,6 +253,43 @@ union bpf_iter_link_info {
  *		Attach an eBPF program to a *target_fd* at the specified
  *		*attach_type* hook.
  *
+ *		The *attach_type* specifies the eBPF attachment point to
+ *		attach the program to, and must be one of *bpf_attach_type*
+ *		(see below).
+ *
+ *		The *attach_bpf_fd* must be a valid file descriptor for a
+ *		loaded eBPF program of a cgroup, flow dissector, LIRC, sockmap
+ *		or sock_ops type corresponding to the specified *attach_type*.
+ *
+ *		The *target_fd* must be a valid file descriptor for a kernel
+ *		object which depends on the attach type of *attach_bpf_fd*:
+ *
+ *		**BPF_PROG_TYPE_CGROUP_DEVICE**,
+ *		**BPF_PROG_TYPE_CGROUP_SKB**,
+ *		**BPF_PROG_TYPE_CGROUP_SOCK**,
+ *		**BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
+ *		**BPF_PROG_TYPE_CGROUP_SOCKOPT**,
+ *		**BPF_PROG_TYPE_CGROUP_SYSCTL**,
+ *		**BPF_PROG_TYPE_SOCK_OPS**
+ *
+ *			Control Group v2 hierarchy with the eBPF controller
+ *			enabled. Requires the kernel to be compiled with
+ *			**CONFIG_CGROUP_BPF**.
+ *
+ *		**BPF_PROG_TYPE_FLOW_DISSECTOR**
+ *
+ *			Network namespace (eg /proc/self/ns/net).
+ *
+ *		**BPF_PROG_TYPE_LIRC_MODE2**
+ *
+ *			LIRC device path (eg /dev/lircN). Requires the kernel
+ *			to be compiled with **CONFIG_BPF_LIRC_MODE2**.
+ *
+ *		**BPF_PROG_TYPE_SK_SKB**,
+ *		**BPF_PROG_TYPE_SK_MSG**
+ *
+ *			eBPF map of socket type (eg **BPF_MAP_TYPE_SOCKHASH**).
+ *
  *	Return
  *		Returns zero on success. On error, -1 is returned and *errno*
  *		is set appropriately.
-- 
cgit v1.2.3


From 2a3fdca4e3bc7a01316277ba26f4090c4b19bf7c Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@cilium.io>
Date: Tue, 2 Mar 2021 09:19:38 -0800
Subject: bpf: Document BPF_PROG_TEST_RUN syscall command
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Based on a brief read of the corresponding source code.

Signed-off-by: Joe Stringer <joe@cilium.io>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210302171947.2268128-7-joe@cilium.io
---
 include/uapi/linux/bpf.h | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a8f2964ec885..a6cd6650e23d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -306,14 +306,22 @@ union bpf_iter_link_info {
  *
  * BPF_PROG_TEST_RUN
  *	Description
- *		Run an eBPF program a number of times against a provided
- *		program context and return the modified program context and
- *		duration of the test run.
+ *		Run the eBPF program associated with the *prog_fd* a *repeat*
+ *		number of times against a provided program context *ctx_in* and
+ *		data *data_in*, and return the modified program context
+ *		*ctx_out*, *data_out* (for example, packet data), result of the
+ *		execution *retval*, and *duration* of the test run.
  *
  *	Return
  *		Returns zero on success. On error, -1 is returned and *errno*
  *		is set appropriately.
  *
+ *		**ENOSPC**
+ *			Either *data_size_out* or *ctx_size_out* is too small.
+ *		**ENOTSUPP**
+ *			This command is not supported by the program type of
+ *			the program referred to by *prog_fd*.
+ *
  * BPF_PROG_GET_NEXT_ID
  *	Description
  *		Fetch the next eBPF program currently loaded into the kernel.
-- 
cgit v1.2.3


From 5d999994e05d62d4f53059540652014cf83cddfe Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@cilium.io>
Date: Tue, 2 Mar 2021 09:19:39 -0800
Subject: bpf: Document BPF_PROG_QUERY syscall command
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 468e2f64d220 ("bpf: introduce BPF_PROG_QUERY command") originally
introduced this, but there have been several additions since then.
Unlike BPF_PROG_ATTACH, it appears that the sockmap progs are not able
to be queried so far.

Signed-off-by: Joe Stringer <joe@cilium.io>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210302171947.2268128-8-joe@cilium.io
---
 include/uapi/linux/bpf.h | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a6cd6650e23d..0cf92ef011f1 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -389,6 +389,43 @@ union bpf_iter_link_info {
  *		Obtain information about eBPF programs associated with the
  *		specified *attach_type* hook.
  *
+ *		The *target_fd* must be a valid file descriptor for a kernel
+ *		object which depends on the attach type of *attach_bpf_fd*:
+ *
+ *		**BPF_PROG_TYPE_CGROUP_DEVICE**,
+ *		**BPF_PROG_TYPE_CGROUP_SKB**,
+ *		**BPF_PROG_TYPE_CGROUP_SOCK**,
+ *		**BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
+ *		**BPF_PROG_TYPE_CGROUP_SOCKOPT**,
+ *		**BPF_PROG_TYPE_CGROUP_SYSCTL**,
+ *		**BPF_PROG_TYPE_SOCK_OPS**
+ *
+ *			Control Group v2 hierarchy with the eBPF controller
+ *			enabled. Requires the kernel to be compiled with
+ *			**CONFIG_CGROUP_BPF**.
+ *
+ *		**BPF_PROG_TYPE_FLOW_DISSECTOR**
+ *
+ *			Network namespace (eg /proc/self/ns/net).
+ *
+ *		**BPF_PROG_TYPE_LIRC_MODE2**
+ *
+ *			LIRC device path (eg /dev/lircN). Requires the kernel
+ *			to be compiled with **CONFIG_BPF_LIRC_MODE2**.
+ *
+ *		**BPF_PROG_QUERY** always fetches the number of programs
+ *		attached and the *attach_flags* which were used to attach those
+ *		programs. Additionally, if *prog_ids* is nonzero and the number
+ *		of attached programs is less than *prog_cnt*, populates
+ *		*prog_ids* with the eBPF program ids of the programs attached
+ *		at *target_fd*.
+ *
+ *		The following flags may alter the result:
+ *
+ *		**BPF_F_QUERY_EFFECTIVE**
+ *			Only return information regarding programs which are
+ *			currently effective at the specified *target_fd*.
+ *
  *	Return
  *		Returns zero on success. On error, -1 is returned and *errno*
  *		is set appropriately.
-- 
cgit v1.2.3


From 0cb804547927c05f6aa7e28c8d4a1e02fec1a6d4 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@cilium.io>
Date: Tue, 2 Mar 2021 09:19:40 -0800
Subject: bpf: Document BPF_MAP_*_BATCH syscall commands
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Based roughly on the following commits:
* Commit cb4d03ab499d ("bpf: Add generic support for lookup batch op")
* Commit 057996380a42 ("bpf: Add batch ops to all htab bpf map")
* Commit aa2e93b8e58e ("bpf: Add generic support for update and delete
  batch ops")

Signed-off-by: Joe Stringer <joe@cilium.io>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: Brian Vazquez <brianvv@google.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210302171947.2268128-9-joe@cilium.io
---
 include/uapi/linux/bpf.h | 114 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 111 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0cf92ef011f1..c8b9d19fce22 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -553,13 +553,55 @@ union bpf_iter_link_info {
  *	Description
  *		Iterate and fetch multiple elements in a map.
  *
+ *		Two opaque values are used to manage batch operations,
+ *		*in_batch* and *out_batch*. Initially, *in_batch* must be set
+ *		to NULL to begin the batched operation. After each subsequent
+ *		**BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant
+ *		*out_batch* as the *in_batch* for the next operation to
+ *		continue iteration from the current point.
+ *
+ *		The *keys* and *values* are output parameters which must point
+ *		to memory large enough to hold *count* items based on the key
+ *		and value size of the map *map_fd*. The *keys* buffer must be
+ *		of *key_size* * *count*. The *values* buffer must be of
+ *		*value_size* * *count*.
+ *
+ *		The *elem_flags* argument may be specified as one of the
+ *		following:
+ *
+ *		**BPF_F_LOCK**
+ *			Look up the value of a spin-locked map without
+ *			returning the lock. This must be specified if the
+ *			elements contain a spinlock.
+ *
+ *		On success, *count* elements from the map are copied into the
+ *		user buffer, with the keys copied into *keys* and the values
+ *		copied into the corresponding indices in *values*.
+ *
+ *		If an error is returned and *errno* is not **EFAULT**, *count*
+ *		is set to the number of successfully processed elements.
+ *
  *	Return
  *		Returns zero on success. On error, -1 is returned and *errno*
  *		is set appropriately.
  *
+ *		May set *errno* to **ENOSPC** to indicate that *keys* or
+ *		*values* is too small to dump an entire bucket during
+ *		iteration of a hash-based map type.
+ *
  * BPF_MAP_LOOKUP_AND_DELETE_BATCH
  *	Description
- *		Iterate and delete multiple elements in a map.
+ *		Iterate and delete all elements in a map.
+ *
+ *		This operation has the same behavior as
+ *		**BPF_MAP_LOOKUP_BATCH** with two exceptions:
+ *
+ *		* Every element that is successfully returned is also deleted
+ *		  from the map. This is at least *count* elements. Note that
+ *		  *count* is both an input and an output parameter.
+ *		* Upon returning with *errno* set to **EFAULT**, up to
+ *		  *count* elements may be deleted without returning the keys
+ *		  and values of the deleted elements.
  *
  *	Return
  *		Returns zero on success. On error, -1 is returned and *errno*
@@ -567,15 +609,81 @@ union bpf_iter_link_info {
  *
  * BPF_MAP_UPDATE_BATCH
  *	Description
- *		Iterate and update multiple elements in a map.
+ *		Update multiple elements in a map by *key*.
+ *
+ *		The *keys* and *values* are input parameters which must point
+ *		to memory large enough to hold *count* items based on the key
+ *		and value size of the map *map_fd*. The *keys* buffer must be
+ *		of *key_size* * *count*. The *values* buffer must be of
+ *		*value_size* * *count*.
+ *
+ *		Each element specified in *keys* is sequentially updated to the
+ *		value in the corresponding index in *values*. The *in_batch*
+ *		and *out_batch* parameters are ignored and should be zeroed.
+ *
+ *		The *elem_flags* argument should be specified as one of the
+ *		following:
+ *
+ *		**BPF_ANY**
+ *			Create new elements or update a existing elements.
+ *		**BPF_NOEXIST**
+ *			Create new elements only if they do not exist.
+ *		**BPF_EXIST**
+ *			Update existing elements.
+ *		**BPF_F_LOCK**
+ *			Update spin_lock-ed map elements. This must be
+ *			specified if the map value contains a spinlock.
+ *
+ *		On success, *count* elements from the map are updated.
+ *
+ *		If an error is returned and *errno* is not **EFAULT**, *count*
+ *		is set to the number of successfully processed elements.
  *
  *	Return
  *		Returns zero on success. On error, -1 is returned and *errno*
  *		is set appropriately.
  *
+ *		May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, or
+ *		**E2BIG**. **E2BIG** indicates that the number of elements in
+ *		the map reached the *max_entries* limit specified at map
+ *		creation time.
+ *
+ *		May set *errno* to one of the following error codes under
+ *		specific circumstances:
+ *
+ *		**EEXIST**
+ *			If *flags* specifies **BPF_NOEXIST** and the element
+ *			with *key* already exists in the map.
+ *		**ENOENT**
+ *			If *flags* specifies **BPF_EXIST** and the element with
+ *			*key* does not exist in the map.
+ *
  * BPF_MAP_DELETE_BATCH
  *	Description
- *		Iterate and delete multiple elements in a map.
+ *		Delete multiple elements in a map by *key*.
+ *
+ *		The *keys* parameter is an input parameter which must point
+ *		to memory large enough to hold *count* items based on the key
+ *		size of the map *map_fd*, that is, *key_size* * *count*.
+ *
+ *		Each element specified in *keys* is sequentially deleted. The
+ *		*in_batch*, *out_batch*, and *values* parameters are ignored
+ *		and should be zeroed.
+ *
+ *		The *elem_flags* argument may be specified as one of the
+ *		following:
+ *
+ *		**BPF_F_LOCK**
+ *			Look up the value of a spin-locked map without
+ *			returning the lock. This must be specified if the
+ *			elements contain a spinlock.
+ *
+ *		On success, *count* elements from the map are updated.
+ *
+ *		If an error is returned and *errno* is not **EFAULT**, *count*
+ *		is set to the number of successfully processed elements. If
+ *		*errno* is **EFAULT**, up to *count* elements may be been
+ *		deleted.
  *
  *	Return
  *		Returns zero on success. On error, -1 is returned and *errno*
-- 
cgit v1.2.3


From 923a932c982fd71856f80dbeaaa3ca41a75e89e0 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@cilium.io>
Date: Tue, 2 Mar 2021 09:19:41 -0800
Subject: scripts/bpf: Abstract eBPF API target parameter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Abstract out the target parameter so that upcoming commits, more than
just the existing "helpers" target can be called to generate specific
portions of docs from the eBPF UAPI headers.

Signed-off-by: Joe Stringer <joe@cilium.io>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20210302171947.2268128-10-joe@cilium.io
---
 MAINTAINERS                    |   1 +
 include/uapi/linux/bpf.h       |   2 +-
 scripts/bpf_doc.py             | 650 +++++++++++++++++++++++++++++++++++++++++
 scripts/bpf_helpers_doc.py     | 615 --------------------------------------
 tools/bpf/Makefile.helpers     |   2 +-
 tools/include/uapi/linux/bpf.h |   2 +-
 tools/lib/bpf/Makefile         |   2 +-
 tools/perf/MANIFEST            |   2 +-
 8 files changed, 656 insertions(+), 620 deletions(-)
 create mode 100755 scripts/bpf_doc.py
 delete mode 100755 scripts/bpf_helpers_doc.py

diff --git a/MAINTAINERS b/MAINTAINERS
index a50a543e3c81..8d56c7044067 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3223,6 +3223,7 @@ F:	net/core/filter.c
 F:	net/sched/act_bpf.c
 F:	net/sched/cls_bpf.c
 F:	samples/bpf/
+F:	scripts/bpf_doc.py
 F:	tools/bpf/
 F:	tools/lib/bpf/
 F:	tools/testing/selftests/bpf/
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c8b9d19fce22..63a56ed6a785 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1439,7 +1439,7 @@ union bpf_attr {
  * parsed and used to produce a manual page. The workflow is the following,
  * and requires the rst2man utility:
  *
- *     $ ./scripts/bpf_helpers_doc.py \
+ *     $ ./scripts/bpf_doc.py \
  *             --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
  *     $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
  *     $ man /tmp/bpf-helpers.7
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
new file mode 100755
index 000000000000..5a4f68aab335
--- /dev/null
+++ b/scripts/bpf_doc.py
@@ -0,0 +1,650 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Copyright (C) 2018-2019 Netronome Systems, Inc.
+# Copyright (C) 2021 Isovalent, Inc.
+
+# In case user attempts to run with Python 2.
+from __future__ import print_function
+
+import argparse
+import re
+import sys, os
+
+class NoHelperFound(BaseException):
+    pass
+
+class ParsingError(BaseException):
+    def __init__(self, line='<line not provided>', reader=None):
+        if reader:
+            BaseException.__init__(self,
+                                   'Error at file offset %d, parsing line: %s' %
+                                   (reader.tell(), line))
+        else:
+            BaseException.__init__(self, 'Error parsing line: %s' % line)
+
+class Helper(object):
+    """
+    An object representing the description of an eBPF helper function.
+    @proto: function prototype of the helper function
+    @desc: textual description of the helper function
+    @ret: description of the return value of the helper function
+    """
+    def __init__(self, proto='', desc='', ret=''):
+        self.proto = proto
+        self.desc = desc
+        self.ret = ret
+
+    def proto_break_down(self):
+        """
+        Break down helper function protocol into smaller chunks: return type,
+        name, distincts arguments.
+        """
+        arg_re = re.compile('((\w+ )*?(\w+|...))( (\**)(\w+))?$')
+        res = {}
+        proto_re = re.compile('(.+) (\**)(\w+)\(((([^,]+)(, )?){1,5})\)$')
+
+        capture = proto_re.match(self.proto)
+        res['ret_type'] = capture.group(1)
+        res['ret_star'] = capture.group(2)
+        res['name']     = capture.group(3)
+        res['args'] = []
+
+        args    = capture.group(4).split(', ')
+        for a in args:
+            capture = arg_re.match(a)
+            res['args'].append({
+                'type' : capture.group(1),
+                'star' : capture.group(5),
+                'name' : capture.group(6)
+            })
+
+        return res
+
+class HeaderParser(object):
+    """
+    An object used to parse a file in order to extract the documentation of a
+    list of eBPF helper functions. All the helpers that can be retrieved are
+    stored as Helper object, in the self.helpers() array.
+    @filename: name of file to parse, usually include/uapi/linux/bpf.h in the
+               kernel tree
+    """
+    def __init__(self, filename):
+        self.reader = open(filename, 'r')
+        self.line = ''
+        self.helpers = []
+
+    def parse_helper(self):
+        proto    = self.parse_proto()
+        desc     = self.parse_desc()
+        ret      = self.parse_ret()
+        return Helper(proto=proto, desc=desc, ret=ret)
+
+    def parse_proto(self):
+        # Argument can be of shape:
+        #   - "void"
+        #   - "type  name"
+        #   - "type *name"
+        #   - Same as above, with "const" and/or "struct" in front of type
+        #   - "..." (undefined number of arguments, for bpf_trace_printk())
+        # There is at least one term ("void"), and at most five arguments.
+        p = re.compile(' \* ?((.+) \**\w+\((((const )?(struct )?(\w+|\.\.\.)( \**\w+)?)(, )?){1,5}\))$')
+        capture = p.match(self.line)
+        if not capture:
+            raise NoHelperFound
+        self.line = self.reader.readline()
+        return capture.group(1)
+
+    def parse_desc(self):
+        p = re.compile(' \* ?(?:\t| {5,8})Description$')
+        capture = p.match(self.line)
+        if not capture:
+            # Helper can have empty description and we might be parsing another
+            # attribute: return but do not consume.
+            return ''
+        # Description can be several lines, some of them possibly empty, and it
+        # stops when another subsection title is met.
+        desc = ''
+        while True:
+            self.line = self.reader.readline()
+            if self.line == ' *\n':
+                desc += '\n'
+            else:
+                p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
+                capture = p.match(self.line)
+                if capture:
+                    desc += capture.group(1) + '\n'
+                else:
+                    break
+        return desc
+
+    def parse_ret(self):
+        p = re.compile(' \* ?(?:\t| {5,8})Return$')
+        capture = p.match(self.line)
+        if not capture:
+            # Helper can have empty retval and we might be parsing another
+            # attribute: return but do not consume.
+            return ''
+        # Return value description can be several lines, some of them possibly
+        # empty, and it stops when another subsection title is met.
+        ret = ''
+        while True:
+            self.line = self.reader.readline()
+            if self.line == ' *\n':
+                ret += '\n'
+            else:
+                p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
+                capture = p.match(self.line)
+                if capture:
+                    ret += capture.group(1) + '\n'
+                else:
+                    break
+        return ret
+
+    def run(self):
+        # Advance to start of helper function descriptions.
+        offset = self.reader.read().find('* Start of BPF helper function descriptions:')
+        if offset == -1:
+            raise Exception('Could not find start of eBPF helper descriptions list')
+        self.reader.seek(offset)
+        self.reader.readline()
+        self.reader.readline()
+        self.line = self.reader.readline()
+
+        while True:
+            try:
+                helper = self.parse_helper()
+                self.helpers.append(helper)
+            except NoHelperFound:
+                break
+
+        self.reader.close()
+
+###############################################################################
+
+class Printer(object):
+    """
+    A generic class for printers. Printers should be created with an array of
+    Helper objects, and implement a way to print them in the desired fashion.
+    @parser: A HeaderParser with objects to print to standard output
+    """
+    def __init__(self, parser):
+        self.parser = parser
+        self.elements = []
+
+    def print_header(self):
+        pass
+
+    def print_footer(self):
+        pass
+
+    def print_one(self, helper):
+        pass
+
+    def print_all(self):
+        self.print_header()
+        for elem in self.elements:
+            self.print_one(elem)
+        self.print_footer()
+
+
+class PrinterRST(Printer):
+    """
+    A generic class for printers that print ReStructured Text. Printers should
+    be created with a HeaderParser object, and implement a way to print API
+    elements in the desired fashion.
+    @parser: A HeaderParser with objects to print to standard output
+    """
+    def __init__(self, parser):
+        self.parser = parser
+
+    def print_license(self):
+        license = '''\
+.. Copyright (C) All BPF authors and contributors from 2014 to present.
+.. See git log include/uapi/linux/bpf.h in kernel tree for details.
+.. 
+.. %%%LICENSE_START(VERBATIM)
+.. Permission is granted to make and distribute verbatim copies of this
+.. manual provided the copyright notice and this permission notice are
+.. preserved on all copies.
+.. 
+.. Permission is granted to copy and distribute modified versions of this
+.. manual under the conditions for verbatim copying, provided that the
+.. entire resulting derived work is distributed under the terms of a
+.. permission notice identical to this one.
+.. 
+.. Since the Linux kernel and libraries are constantly changing, this
+.. manual page may be incorrect or out-of-date.  The author(s) assume no
+.. responsibility for errors or omissions, or for damages resulting from
+.. the use of the information contained herein.  The author(s) may not
+.. have taken the same level of care in the production of this manual,
+.. which is licensed free of charge, as they might when working
+.. professionally.
+.. 
+.. Formatted or processed versions of this manual, if unaccompanied by
+.. the source, must acknowledge the copyright and authors of this work.
+.. %%%LICENSE_END
+.. 
+.. Please do not edit this file. It was generated from the documentation
+.. located in file include/uapi/linux/bpf.h of the Linux kernel sources
+.. (helpers description), and from scripts/bpf_doc.py in the same
+.. repository (header and footer).
+'''
+        print(license)
+
+    def print_elem(self, elem):
+        if (elem.desc):
+            print('\tDescription')
+            # Do not strip all newline characters: formatted code at the end of
+            # a section must be followed by a blank line.
+            for line in re.sub('\n$', '', elem.desc, count=1).split('\n'):
+                print('{}{}'.format('\t\t' if line else '', line))
+
+        if (elem.ret):
+            print('\tReturn')
+            for line in elem.ret.rstrip().split('\n'):
+                print('{}{}'.format('\t\t' if line else '', line))
+
+        print('')
+
+
+class PrinterHelpersRST(PrinterRST):
+    """
+    A printer for dumping collected information about helpers as a ReStructured
+    Text page compatible with the rst2man program, which can be used to
+    generate a manual page for the helpers.
+    @parser: A HeaderParser with Helper objects to print to standard output
+    """
+    def __init__(self, parser):
+        self.elements = parser.helpers
+
+    def print_header(self):
+        header = '''\
+===========
+BPF-HELPERS
+===========
+-------------------------------------------------------------------------------
+list of eBPF helper functions
+-------------------------------------------------------------------------------
+
+:Manual section: 7
+
+DESCRIPTION
+===========
+
+The extended Berkeley Packet Filter (eBPF) subsystem consists in programs
+written in a pseudo-assembly language, then attached to one of the several
+kernel hooks and run in reaction of specific events. This framework differs
+from the older, "classic" BPF (or "cBPF") in several aspects, one of them being
+the ability to call special functions (or "helpers") from within a program.
+These functions are restricted to a white-list of helpers defined in the
+kernel.
+
+These helpers are used by eBPF programs to interact with the system, or with
+the context in which they work. For instance, they can be used to print
+debugging messages, to get the time since the system was booted, to interact
+with eBPF maps, or to manipulate network packets. Since there are several eBPF
+program types, and that they do not run in the same context, each program type
+can only call a subset of those helpers.
+
+Due to eBPF conventions, a helper can not have more than five arguments.
+
+Internally, eBPF programs call directly into the compiled helper functions
+without requiring any foreign-function interface. As a result, calling helpers
+introduces no overhead, thus offering excellent performance.
+
+This document is an attempt to list and document the helpers available to eBPF
+developers. They are sorted by chronological order (the oldest helpers in the
+kernel at the top).
+
+HELPERS
+=======
+'''
+        PrinterRST.print_license(self)
+        print(header)
+
+    def print_footer(self):
+        footer = '''
+EXAMPLES
+========
+
+Example usage for most of the eBPF helpers listed in this manual page are
+available within the Linux kernel sources, at the following locations:
+
+* *samples/bpf/*
+* *tools/testing/selftests/bpf/*
+
+LICENSE
+=======
+
+eBPF programs can have an associated license, passed along with the bytecode
+instructions to the kernel when the programs are loaded. The format for that
+string is identical to the one in use for kernel modules (Dual licenses, such
+as "Dual BSD/GPL", may be used). Some helper functions are only accessible to
+programs that are compatible with the GNU Privacy License (GPL).
+
+In order to use such helpers, the eBPF program must be loaded with the correct
+license string passed (via **attr**) to the **bpf**\ () system call, and this
+generally translates into the C source code of the program containing a line
+similar to the following:
+
+::
+
+	char ____license[] __attribute__((section("license"), used)) = "GPL";
+
+IMPLEMENTATION
+==============
+
+This manual page is an effort to document the existing eBPF helper functions.
+But as of this writing, the BPF sub-system is under heavy development. New eBPF
+program or map types are added, along with new helper functions. Some helpers
+are occasionally made available for additional program types. So in spite of
+the efforts of the community, this page might not be up-to-date. If you want to
+check by yourself what helper functions exist in your kernel, or what types of
+programs they can support, here are some files among the kernel tree that you
+may be interested in:
+
+* *include/uapi/linux/bpf.h* is the main BPF header. It contains the full list
+  of all helper functions, as well as many other BPF definitions including most
+  of the flags, structs or constants used by the helpers.
+* *net/core/filter.c* contains the definition of most network-related helper
+  functions, and the list of program types from which they can be used.
+* *kernel/trace/bpf_trace.c* is the equivalent for most tracing program-related
+  helpers.
+* *kernel/bpf/verifier.c* contains the functions used to check that valid types
+  of eBPF maps are used with a given helper function.
+* *kernel/bpf/* directory contains other files in which additional helpers are
+  defined (for cgroups, sockmaps, etc.).
+* The bpftool utility can be used to probe the availability of helper functions
+  on the system (as well as supported program and map types, and a number of
+  other parameters). To do so, run **bpftool feature probe** (see
+  **bpftool-feature**\ (8) for details). Add the **unprivileged** keyword to
+  list features available to unprivileged users.
+
+Compatibility between helper functions and program types can generally be found
+in the files where helper functions are defined. Look for the **struct
+bpf_func_proto** objects and for functions returning them: these functions
+contain a list of helpers that a given program type can call. Note that the
+**default:** label of the **switch ... case** used to filter helpers can call
+other functions, themselves allowing access to additional helpers. The
+requirement for GPL license is also in those **struct bpf_func_proto**.
+
+Compatibility between helper functions and map types can be found in the
+**check_map_func_compatibility**\ () function in file *kernel/bpf/verifier.c*.
+
+Helper functions that invalidate the checks on **data** and **data_end**
+pointers for network processing are listed in function
+**bpf_helper_changes_pkt_data**\ () in file *net/core/filter.c*.
+
+SEE ALSO
+========
+
+**bpf**\ (2),
+**bpftool**\ (8),
+**cgroups**\ (7),
+**ip**\ (8),
+**perf_event_open**\ (2),
+**sendmsg**\ (2),
+**socket**\ (7),
+**tc-bpf**\ (8)'''
+        print(footer)
+
+    def print_proto(self, helper):
+        """
+        Format function protocol with bold and italics markers. This makes RST
+        file less readable, but gives nice results in the manual page.
+        """
+        proto = helper.proto_break_down()
+
+        print('**%s %s%s(' % (proto['ret_type'],
+                              proto['ret_star'].replace('*', '\\*'),
+                              proto['name']),
+              end='')
+
+        comma = ''
+        for a in proto['args']:
+            one_arg = '{}{}'.format(comma, a['type'])
+            if a['name']:
+                if a['star']:
+                    one_arg += ' {}**\ '.format(a['star'].replace('*', '\\*'))
+                else:
+                    one_arg += '** '
+                one_arg += '*{}*\\ **'.format(a['name'])
+            comma = ', '
+            print(one_arg, end='')
+
+        print(')**')
+
+    def print_one(self, helper):
+        self.print_proto(helper)
+        self.print_elem(helper)
+
+
+
+
+class PrinterHelpers(Printer):
+    """
+    A printer for dumping collected information about helpers as C header to
+    be included from BPF program.
+    @parser: A HeaderParser with Helper objects to print to standard output
+    """
+    def __init__(self, parser):
+        self.elements = parser.helpers
+
+    type_fwds = [
+            'struct bpf_fib_lookup',
+            'struct bpf_sk_lookup',
+            'struct bpf_perf_event_data',
+            'struct bpf_perf_event_value',
+            'struct bpf_pidns_info',
+            'struct bpf_redir_neigh',
+            'struct bpf_sock',
+            'struct bpf_sock_addr',
+            'struct bpf_sock_ops',
+            'struct bpf_sock_tuple',
+            'struct bpf_spin_lock',
+            'struct bpf_sysctl',
+            'struct bpf_tcp_sock',
+            'struct bpf_tunnel_key',
+            'struct bpf_xfrm_state',
+            'struct linux_binprm',
+            'struct pt_regs',
+            'struct sk_reuseport_md',
+            'struct sockaddr',
+            'struct tcphdr',
+            'struct seq_file',
+            'struct tcp6_sock',
+            'struct tcp_sock',
+            'struct tcp_timewait_sock',
+            'struct tcp_request_sock',
+            'struct udp6_sock',
+            'struct task_struct',
+
+            'struct __sk_buff',
+            'struct sk_msg_md',
+            'struct xdp_md',
+            'struct path',
+            'struct btf_ptr',
+            'struct inode',
+            'struct socket',
+            'struct file',
+    ]
+    known_types = {
+            '...',
+            'void',
+            'const void',
+            'char',
+            'const char',
+            'int',
+            'long',
+            'unsigned long',
+
+            '__be16',
+            '__be32',
+            '__wsum',
+
+            'struct bpf_fib_lookup',
+            'struct bpf_perf_event_data',
+            'struct bpf_perf_event_value',
+            'struct bpf_pidns_info',
+            'struct bpf_redir_neigh',
+            'struct bpf_sk_lookup',
+            'struct bpf_sock',
+            'struct bpf_sock_addr',
+            'struct bpf_sock_ops',
+            'struct bpf_sock_tuple',
+            'struct bpf_spin_lock',
+            'struct bpf_sysctl',
+            'struct bpf_tcp_sock',
+            'struct bpf_tunnel_key',
+            'struct bpf_xfrm_state',
+            'struct linux_binprm',
+            'struct pt_regs',
+            'struct sk_reuseport_md',
+            'struct sockaddr',
+            'struct tcphdr',
+            'struct seq_file',
+            'struct tcp6_sock',
+            'struct tcp_sock',
+            'struct tcp_timewait_sock',
+            'struct tcp_request_sock',
+            'struct udp6_sock',
+            'struct task_struct',
+            'struct path',
+            'struct btf_ptr',
+            'struct inode',
+            'struct socket',
+            'struct file',
+    }
+    mapped_types = {
+            'u8': '__u8',
+            'u16': '__u16',
+            'u32': '__u32',
+            'u64': '__u64',
+            's8': '__s8',
+            's16': '__s16',
+            's32': '__s32',
+            's64': '__s64',
+            'size_t': 'unsigned long',
+            'struct bpf_map': 'void',
+            'struct sk_buff': 'struct __sk_buff',
+            'const struct sk_buff': 'const struct __sk_buff',
+            'struct sk_msg_buff': 'struct sk_msg_md',
+            'struct xdp_buff': 'struct xdp_md',
+    }
+    # Helpers overloaded for different context types.
+    overloaded_helpers = [
+        'bpf_get_socket_cookie',
+        'bpf_sk_assign',
+    ]
+
+    def print_header(self):
+        header = '''\
+/* This is auto-generated file. See bpf_doc.py for details. */
+
+/* Forward declarations of BPF structs */'''
+
+        print(header)
+        for fwd in self.type_fwds:
+            print('%s;' % fwd)
+        print('')
+
+    def print_footer(self):
+        footer = ''
+        print(footer)
+
+    def map_type(self, t):
+        if t in self.known_types:
+            return t
+        if t in self.mapped_types:
+            return self.mapped_types[t]
+        print("Unrecognized type '%s', please add it to known types!" % t,
+              file=sys.stderr)
+        sys.exit(1)
+
+    seen_helpers = set()
+
+    def print_one(self, helper):
+        proto = helper.proto_break_down()
+
+        if proto['name'] in self.seen_helpers:
+            return
+        self.seen_helpers.add(proto['name'])
+
+        print('/*')
+        print(" * %s" % proto['name'])
+        print(" *")
+        if (helper.desc):
+            # Do not strip all newline characters: formatted code at the end of
+            # a section must be followed by a blank line.
+            for line in re.sub('\n$', '', helper.desc, count=1).split('\n'):
+                print(' *{}{}'.format(' \t' if line else '', line))
+
+        if (helper.ret):
+            print(' *')
+            print(' * Returns')
+            for line in helper.ret.rstrip().split('\n'):
+                print(' *{}{}'.format(' \t' if line else '', line))
+
+        print(' */')
+        print('static %s %s(*%s)(' % (self.map_type(proto['ret_type']),
+                                      proto['ret_star'], proto['name']), end='')
+        comma = ''
+        for i, a in enumerate(proto['args']):
+            t = a['type']
+            n = a['name']
+            if proto['name'] in self.overloaded_helpers and i == 0:
+                    t = 'void'
+                    n = 'ctx'
+            one_arg = '{}{}'.format(comma, self.map_type(t))
+            if n:
+                if a['star']:
+                    one_arg += ' {}'.format(a['star'])
+                else:
+                    one_arg += ' '
+                one_arg += '{}'.format(n)
+            comma = ', '
+            print(one_arg, end='')
+
+        print(') = (void *) %d;' % len(self.seen_helpers))
+        print('')
+
+###############################################################################
+
+# If script is launched from scripts/ from kernel tree and can access
+# ../include/uapi/linux/bpf.h, use it as a default name for the file to parse,
+# otherwise the --filename argument will be required from the command line.
+script = os.path.abspath(sys.argv[0])
+linuxRoot = os.path.dirname(os.path.dirname(script))
+bpfh = os.path.join(linuxRoot, 'include/uapi/linux/bpf.h')
+
+printers = {
+        'helpers': PrinterHelpersRST,
+}
+
+argParser = argparse.ArgumentParser(description="""
+Parse eBPF header file and generate documentation for the eBPF API.
+The RST-formatted output produced can be turned into a manual page with the
+rst2man utility.
+""")
+argParser.add_argument('--header', action='store_true',
+                       help='generate C header file')
+if (os.path.isfile(bpfh)):
+    argParser.add_argument('--filename', help='path to include/uapi/linux/bpf.h',
+                           default=bpfh)
+else:
+    argParser.add_argument('--filename', help='path to include/uapi/linux/bpf.h')
+argParser.add_argument('target', nargs='?', default='helpers',
+                       choices=printers.keys(), help='eBPF API target')
+args = argParser.parse_args()
+
+# Parse file.
+headerParser = HeaderParser(args.filename)
+headerParser.run()
+
+# Print formatted output to standard output.
+if args.header:
+    printer = PrinterHelpers(headerParser)
+else:
+    printer = printers[args.target](headerParser)
+printer.print_all()
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
deleted file mode 100755
index 867ada23281c..000000000000
--- a/scripts/bpf_helpers_doc.py
+++ /dev/null
@@ -1,615 +0,0 @@
-#!/usr/bin/env python3
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Copyright (C) 2018-2019 Netronome Systems, Inc.
-
-# In case user attempts to run with Python 2.
-from __future__ import print_function
-
-import argparse
-import re
-import sys, os
-
-class NoHelperFound(BaseException):
-    pass
-
-class ParsingError(BaseException):
-    def __init__(self, line='<line not provided>', reader=None):
-        if reader:
-            BaseException.__init__(self,
-                                   'Error at file offset %d, parsing line: %s' %
-                                   (reader.tell(), line))
-        else:
-            BaseException.__init__(self, 'Error parsing line: %s' % line)
-
-class Helper(object):
-    """
-    An object representing the description of an eBPF helper function.
-    @proto: function prototype of the helper function
-    @desc: textual description of the helper function
-    @ret: description of the return value of the helper function
-    """
-    def __init__(self, proto='', desc='', ret=''):
-        self.proto = proto
-        self.desc = desc
-        self.ret = ret
-
-    def proto_break_down(self):
-        """
-        Break down helper function protocol into smaller chunks: return type,
-        name, distincts arguments.
-        """
-        arg_re = re.compile('((\w+ )*?(\w+|...))( (\**)(\w+))?$')
-        res = {}
-        proto_re = re.compile('(.+) (\**)(\w+)\(((([^,]+)(, )?){1,5})\)$')
-
-        capture = proto_re.match(self.proto)
-        res['ret_type'] = capture.group(1)
-        res['ret_star'] = capture.group(2)
-        res['name']     = capture.group(3)
-        res['args'] = []
-
-        args    = capture.group(4).split(', ')
-        for a in args:
-            capture = arg_re.match(a)
-            res['args'].append({
-                'type' : capture.group(1),
-                'star' : capture.group(5),
-                'name' : capture.group(6)
-            })
-
-        return res
-
-class HeaderParser(object):
-    """
-    An object used to parse a file in order to extract the documentation of a
-    list of eBPF helper functions. All the helpers that can be retrieved are
-    stored as Helper object, in the self.helpers() array.
-    @filename: name of file to parse, usually include/uapi/linux/bpf.h in the
-               kernel tree
-    """
-    def __init__(self, filename):
-        self.reader = open(filename, 'r')
-        self.line = ''
-        self.helpers = []
-
-    def parse_helper(self):
-        proto    = self.parse_proto()
-        desc     = self.parse_desc()
-        ret      = self.parse_ret()
-        return Helper(proto=proto, desc=desc, ret=ret)
-
-    def parse_proto(self):
-        # Argument can be of shape:
-        #   - "void"
-        #   - "type  name"
-        #   - "type *name"
-        #   - Same as above, with "const" and/or "struct" in front of type
-        #   - "..." (undefined number of arguments, for bpf_trace_printk())
-        # There is at least one term ("void"), and at most five arguments.
-        p = re.compile(' \* ?((.+) \**\w+\((((const )?(struct )?(\w+|\.\.\.)( \**\w+)?)(, )?){1,5}\))$')
-        capture = p.match(self.line)
-        if not capture:
-            raise NoHelperFound
-        self.line = self.reader.readline()
-        return capture.group(1)
-
-    def parse_desc(self):
-        p = re.compile(' \* ?(?:\t| {5,8})Description$')
-        capture = p.match(self.line)
-        if not capture:
-            # Helper can have empty description and we might be parsing another
-            # attribute: return but do not consume.
-            return ''
-        # Description can be several lines, some of them possibly empty, and it
-        # stops when another subsection title is met.
-        desc = ''
-        while True:
-            self.line = self.reader.readline()
-            if self.line == ' *\n':
-                desc += '\n'
-            else:
-                p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
-                capture = p.match(self.line)
-                if capture:
-                    desc += capture.group(1) + '\n'
-                else:
-                    break
-        return desc
-
-    def parse_ret(self):
-        p = re.compile(' \* ?(?:\t| {5,8})Return$')
-        capture = p.match(self.line)
-        if not capture:
-            # Helper can have empty retval and we might be parsing another
-            # attribute: return but do not consume.
-            return ''
-        # Return value description can be several lines, some of them possibly
-        # empty, and it stops when another subsection title is met.
-        ret = ''
-        while True:
-            self.line = self.reader.readline()
-            if self.line == ' *\n':
-                ret += '\n'
-            else:
-                p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
-                capture = p.match(self.line)
-                if capture:
-                    ret += capture.group(1) + '\n'
-                else:
-                    break
-        return ret
-
-    def run(self):
-        # Advance to start of helper function descriptions.
-        offset = self.reader.read().find('* Start of BPF helper function descriptions:')
-        if offset == -1:
-            raise Exception('Could not find start of eBPF helper descriptions list')
-        self.reader.seek(offset)
-        self.reader.readline()
-        self.reader.readline()
-        self.line = self.reader.readline()
-
-        while True:
-            try:
-                helper = self.parse_helper()
-                self.helpers.append(helper)
-            except NoHelperFound:
-                break
-
-        self.reader.close()
-
-###############################################################################
-
-class Printer(object):
-    """
-    A generic class for printers. Printers should be created with an array of
-    Helper objects, and implement a way to print them in the desired fashion.
-    @helpers: array of Helper objects to print to standard output
-    """
-    def __init__(self, helpers):
-        self.helpers = helpers
-
-    def print_header(self):
-        pass
-
-    def print_footer(self):
-        pass
-
-    def print_one(self, helper):
-        pass
-
-    def print_all(self):
-        self.print_header()
-        for helper in self.helpers:
-            self.print_one(helper)
-        self.print_footer()
-
-class PrinterRST(Printer):
-    """
-    A printer for dumping collected information about helpers as a ReStructured
-    Text page compatible with the rst2man program, which can be used to
-    generate a manual page for the helpers.
-    @helpers: array of Helper objects to print to standard output
-    """
-    def print_header(self):
-        header = '''\
-.. Copyright (C) All BPF authors and contributors from 2014 to present.
-.. See git log include/uapi/linux/bpf.h in kernel tree for details.
-.. 
-.. %%%LICENSE_START(VERBATIM)
-.. Permission is granted to make and distribute verbatim copies of this
-.. manual provided the copyright notice and this permission notice are
-.. preserved on all copies.
-.. 
-.. Permission is granted to copy and distribute modified versions of this
-.. manual under the conditions for verbatim copying, provided that the
-.. entire resulting derived work is distributed under the terms of a
-.. permission notice identical to this one.
-.. 
-.. Since the Linux kernel and libraries are constantly changing, this
-.. manual page may be incorrect or out-of-date.  The author(s) assume no
-.. responsibility for errors or omissions, or for damages resulting from
-.. the use of the information contained herein.  The author(s) may not
-.. have taken the same level of care in the production of this manual,
-.. which is licensed free of charge, as they might when working
-.. professionally.
-.. 
-.. Formatted or processed versions of this manual, if unaccompanied by
-.. the source, must acknowledge the copyright and authors of this work.
-.. %%%LICENSE_END
-.. 
-.. Please do not edit this file. It was generated from the documentation
-.. located in file include/uapi/linux/bpf.h of the Linux kernel sources
-.. (helpers description), and from scripts/bpf_helpers_doc.py in the same
-.. repository (header and footer).
-
-===========
-BPF-HELPERS
-===========
--------------------------------------------------------------------------------
-list of eBPF helper functions
--------------------------------------------------------------------------------
-
-:Manual section: 7
-
-DESCRIPTION
-===========
-
-The extended Berkeley Packet Filter (eBPF) subsystem consists in programs
-written in a pseudo-assembly language, then attached to one of the several
-kernel hooks and run in reaction of specific events. This framework differs
-from the older, "classic" BPF (or "cBPF") in several aspects, one of them being
-the ability to call special functions (or "helpers") from within a program.
-These functions are restricted to a white-list of helpers defined in the
-kernel.
-
-These helpers are used by eBPF programs to interact with the system, or with
-the context in which they work. For instance, they can be used to print
-debugging messages, to get the time since the system was booted, to interact
-with eBPF maps, or to manipulate network packets. Since there are several eBPF
-program types, and that they do not run in the same context, each program type
-can only call a subset of those helpers.
-
-Due to eBPF conventions, a helper can not have more than five arguments.
-
-Internally, eBPF programs call directly into the compiled helper functions
-without requiring any foreign-function interface. As a result, calling helpers
-introduces no overhead, thus offering excellent performance.
-
-This document is an attempt to list and document the helpers available to eBPF
-developers. They are sorted by chronological order (the oldest helpers in the
-kernel at the top).
-
-HELPERS
-=======
-'''
-        print(header)
-
-    def print_footer(self):
-        footer = '''
-EXAMPLES
-========
-
-Example usage for most of the eBPF helpers listed in this manual page are
-available within the Linux kernel sources, at the following locations:
-
-* *samples/bpf/*
-* *tools/testing/selftests/bpf/*
-
-LICENSE
-=======
-
-eBPF programs can have an associated license, passed along with the bytecode
-instructions to the kernel when the programs are loaded. The format for that
-string is identical to the one in use for kernel modules (Dual licenses, such
-as "Dual BSD/GPL", may be used). Some helper functions are only accessible to
-programs that are compatible with the GNU Privacy License (GPL).
-
-In order to use such helpers, the eBPF program must be loaded with the correct
-license string passed (via **attr**) to the **bpf**\ () system call, and this
-generally translates into the C source code of the program containing a line
-similar to the following:
-
-::
-
-	char ____license[] __attribute__((section("license"), used)) = "GPL";
-
-IMPLEMENTATION
-==============
-
-This manual page is an effort to document the existing eBPF helper functions.
-But as of this writing, the BPF sub-system is under heavy development. New eBPF
-program or map types are added, along with new helper functions. Some helpers
-are occasionally made available for additional program types. So in spite of
-the efforts of the community, this page might not be up-to-date. If you want to
-check by yourself what helper functions exist in your kernel, or what types of
-programs they can support, here are some files among the kernel tree that you
-may be interested in:
-
-* *include/uapi/linux/bpf.h* is the main BPF header. It contains the full list
-  of all helper functions, as well as many other BPF definitions including most
-  of the flags, structs or constants used by the helpers.
-* *net/core/filter.c* contains the definition of most network-related helper
-  functions, and the list of program types from which they can be used.
-* *kernel/trace/bpf_trace.c* is the equivalent for most tracing program-related
-  helpers.
-* *kernel/bpf/verifier.c* contains the functions used to check that valid types
-  of eBPF maps are used with a given helper function.
-* *kernel/bpf/* directory contains other files in which additional helpers are
-  defined (for cgroups, sockmaps, etc.).
-* The bpftool utility can be used to probe the availability of helper functions
-  on the system (as well as supported program and map types, and a number of
-  other parameters). To do so, run **bpftool feature probe** (see
-  **bpftool-feature**\ (8) for details). Add the **unprivileged** keyword to
-  list features available to unprivileged users.
-
-Compatibility between helper functions and program types can generally be found
-in the files where helper functions are defined. Look for the **struct
-bpf_func_proto** objects and for functions returning them: these functions
-contain a list of helpers that a given program type can call. Note that the
-**default:** label of the **switch ... case** used to filter helpers can call
-other functions, themselves allowing access to additional helpers. The
-requirement for GPL license is also in those **struct bpf_func_proto**.
-
-Compatibility between helper functions and map types can be found in the
-**check_map_func_compatibility**\ () function in file *kernel/bpf/verifier.c*.
-
-Helper functions that invalidate the checks on **data** and **data_end**
-pointers for network processing are listed in function
-**bpf_helper_changes_pkt_data**\ () in file *net/core/filter.c*.
-
-SEE ALSO
-========
-
-**bpf**\ (2),
-**bpftool**\ (8),
-**cgroups**\ (7),
-**ip**\ (8),
-**perf_event_open**\ (2),
-**sendmsg**\ (2),
-**socket**\ (7),
-**tc-bpf**\ (8)'''
-        print(footer)
-
-    def print_proto(self, helper):
-        """
-        Format function protocol with bold and italics markers. This makes RST
-        file less readable, but gives nice results in the manual page.
-        """
-        proto = helper.proto_break_down()
-
-        print('**%s %s%s(' % (proto['ret_type'],
-                              proto['ret_star'].replace('*', '\\*'),
-                              proto['name']),
-              end='')
-
-        comma = ''
-        for a in proto['args']:
-            one_arg = '{}{}'.format(comma, a['type'])
-            if a['name']:
-                if a['star']:
-                    one_arg += ' {}**\ '.format(a['star'].replace('*', '\\*'))
-                else:
-                    one_arg += '** '
-                one_arg += '*{}*\\ **'.format(a['name'])
-            comma = ', '
-            print(one_arg, end='')
-
-        print(')**')
-
-    def print_one(self, helper):
-        self.print_proto(helper)
-
-        if (helper.desc):
-            print('\tDescription')
-            # Do not strip all newline characters: formatted code at the end of
-            # a section must be followed by a blank line.
-            for line in re.sub('\n$', '', helper.desc, count=1).split('\n'):
-                print('{}{}'.format('\t\t' if line else '', line))
-
-        if (helper.ret):
-            print('\tReturn')
-            for line in helper.ret.rstrip().split('\n'):
-                print('{}{}'.format('\t\t' if line else '', line))
-
-        print('')
-
-class PrinterHelpers(Printer):
-    """
-    A printer for dumping collected information about helpers as C header to
-    be included from BPF program.
-    @helpers: array of Helper objects to print to standard output
-    """
-
-    type_fwds = [
-            'struct bpf_fib_lookup',
-            'struct bpf_sk_lookup',
-            'struct bpf_perf_event_data',
-            'struct bpf_perf_event_value',
-            'struct bpf_pidns_info',
-            'struct bpf_redir_neigh',
-            'struct bpf_sock',
-            'struct bpf_sock_addr',
-            'struct bpf_sock_ops',
-            'struct bpf_sock_tuple',
-            'struct bpf_spin_lock',
-            'struct bpf_sysctl',
-            'struct bpf_tcp_sock',
-            'struct bpf_tunnel_key',
-            'struct bpf_xfrm_state',
-            'struct linux_binprm',
-            'struct pt_regs',
-            'struct sk_reuseport_md',
-            'struct sockaddr',
-            'struct tcphdr',
-            'struct seq_file',
-            'struct tcp6_sock',
-            'struct tcp_sock',
-            'struct tcp_timewait_sock',
-            'struct tcp_request_sock',
-            'struct udp6_sock',
-            'struct task_struct',
-
-            'struct __sk_buff',
-            'struct sk_msg_md',
-            'struct xdp_md',
-            'struct path',
-            'struct btf_ptr',
-            'struct inode',
-            'struct socket',
-            'struct file',
-    ]
-    known_types = {
-            '...',
-            'void',
-            'const void',
-            'char',
-            'const char',
-            'int',
-            'long',
-            'unsigned long',
-
-            '__be16',
-            '__be32',
-            '__wsum',
-
-            'struct bpf_fib_lookup',
-            'struct bpf_perf_event_data',
-            'struct bpf_perf_event_value',
-            'struct bpf_pidns_info',
-            'struct bpf_redir_neigh',
-            'struct bpf_sk_lookup',
-            'struct bpf_sock',
-            'struct bpf_sock_addr',
-            'struct bpf_sock_ops',
-            'struct bpf_sock_tuple',
-            'struct bpf_spin_lock',
-            'struct bpf_sysctl',
-            'struct bpf_tcp_sock',
-            'struct bpf_tunnel_key',
-            'struct bpf_xfrm_state',
-            'struct linux_binprm',
-            'struct pt_regs',
-            'struct sk_reuseport_md',
-            'struct sockaddr',
-            'struct tcphdr',
-            'struct seq_file',
-            'struct tcp6_sock',
-            'struct tcp_sock',
-            'struct tcp_timewait_sock',
-            'struct tcp_request_sock',
-            'struct udp6_sock',
-            'struct task_struct',
-            'struct path',
-            'struct btf_ptr',
-            'struct inode',
-            'struct socket',
-            'struct file',
-    }
-    mapped_types = {
-            'u8': '__u8',
-            'u16': '__u16',
-            'u32': '__u32',
-            'u64': '__u64',
-            's8': '__s8',
-            's16': '__s16',
-            's32': '__s32',
-            's64': '__s64',
-            'size_t': 'unsigned long',
-            'struct bpf_map': 'void',
-            'struct sk_buff': 'struct __sk_buff',
-            'const struct sk_buff': 'const struct __sk_buff',
-            'struct sk_msg_buff': 'struct sk_msg_md',
-            'struct xdp_buff': 'struct xdp_md',
-    }
-    # Helpers overloaded for different context types.
-    overloaded_helpers = [
-        'bpf_get_socket_cookie',
-        'bpf_sk_assign',
-    ]
-
-    def print_header(self):
-        header = '''\
-/* This is auto-generated file. See bpf_helpers_doc.py for details. */
-
-/* Forward declarations of BPF structs */'''
-
-        print(header)
-        for fwd in self.type_fwds:
-            print('%s;' % fwd)
-        print('')
-
-    def print_footer(self):
-        footer = ''
-        print(footer)
-
-    def map_type(self, t):
-        if t in self.known_types:
-            return t
-        if t in self.mapped_types:
-            return self.mapped_types[t]
-        print("Unrecognized type '%s', please add it to known types!" % t,
-              file=sys.stderr)
-        sys.exit(1)
-
-    seen_helpers = set()
-
-    def print_one(self, helper):
-        proto = helper.proto_break_down()
-
-        if proto['name'] in self.seen_helpers:
-            return
-        self.seen_helpers.add(proto['name'])
-
-        print('/*')
-        print(" * %s" % proto['name'])
-        print(" *")
-        if (helper.desc):
-            # Do not strip all newline characters: formatted code at the end of
-            # a section must be followed by a blank line.
-            for line in re.sub('\n$', '', helper.desc, count=1).split('\n'):
-                print(' *{}{}'.format(' \t' if line else '', line))
-
-        if (helper.ret):
-            print(' *')
-            print(' * Returns')
-            for line in helper.ret.rstrip().split('\n'):
-                print(' *{}{}'.format(' \t' if line else '', line))
-
-        print(' */')
-        print('static %s %s(*%s)(' % (self.map_type(proto['ret_type']),
-                                      proto['ret_star'], proto['name']), end='')
-        comma = ''
-        for i, a in enumerate(proto['args']):
-            t = a['type']
-            n = a['name']
-            if proto['name'] in self.overloaded_helpers and i == 0:
-                    t = 'void'
-                    n = 'ctx'
-            one_arg = '{}{}'.format(comma, self.map_type(t))
-            if n:
-                if a['star']:
-                    one_arg += ' {}'.format(a['star'])
-                else:
-                    one_arg += ' '
-                one_arg += '{}'.format(n)
-            comma = ', '
-            print(one_arg, end='')
-
-        print(') = (void *) %d;' % len(self.seen_helpers))
-        print('')
-
-###############################################################################
-
-# If script is launched from scripts/ from kernel tree and can access
-# ../include/uapi/linux/bpf.h, use it as a default name for the file to parse,
-# otherwise the --filename argument will be required from the command line.
-script = os.path.abspath(sys.argv[0])
-linuxRoot = os.path.dirname(os.path.dirname(script))
-bpfh = os.path.join(linuxRoot, 'include/uapi/linux/bpf.h')
-
-argParser = argparse.ArgumentParser(description="""
-Parse eBPF header file and generate documentation for eBPF helper functions.
-The RST-formatted output produced can be turned into a manual page with the
-rst2man utility.
-""")
-argParser.add_argument('--header', action='store_true',
-                       help='generate C header file')
-if (os.path.isfile(bpfh)):
-    argParser.add_argument('--filename', help='path to include/uapi/linux/bpf.h',
-                           default=bpfh)
-else:
-    argParser.add_argument('--filename', help='path to include/uapi/linux/bpf.h')
-args = argParser.parse_args()
-
-# Parse file.
-headerParser = HeaderParser(args.filename)
-headerParser.run()
-
-# Print formatted output to standard output.
-if args.header:
-    printer = PrinterHelpers(headerParser.helpers)
-else:
-    printer = PrinterRST(headerParser.helpers)
-printer.print_all()
diff --git a/tools/bpf/Makefile.helpers b/tools/bpf/Makefile.helpers
index 854d084026dd..a26599022fd6 100644
--- a/tools/bpf/Makefile.helpers
+++ b/tools/bpf/Makefile.helpers
@@ -35,7 +35,7 @@ man7: $(DOC_MAN7)
 RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
 
 $(OUTPUT)$(HELPERS_RST): $(UP2DIR)../../include/uapi/linux/bpf.h
-	$(QUIET_GEN)$(UP2DIR)../../scripts/bpf_helpers_doc.py --filename $< > $@
+	$(QUIET_GEN)$(UP2DIR)../../scripts/bpf_doc.py --filename $< > $@
 
 $(OUTPUT)%.7: $(OUTPUT)%.rst
 ifndef RST2MAN_DEP
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b89af20cfa19..b4c5c529ad17 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -729,7 +729,7 @@ union bpf_attr {
  * parsed and used to produce a manual page. The workflow is the following,
  * and requires the rst2man utility:
  *
- *     $ ./scripts/bpf_helpers_doc.py \
+ *     $ ./scripts/bpf_doc.py \
  *             --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
  *     $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
  *     $ man /tmp/bpf-helpers.7
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 887a494ad5fc..8170f88e8ea6 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -158,7 +158,7 @@ $(BPF_IN_STATIC): force $(BPF_HELPER_DEFS)
 	$(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR)
 
 $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h
-	$(QUIET_GEN)$(srctree)/scripts/bpf_helpers_doc.py --header \
+	$(QUIET_GEN)$(srctree)/scripts/bpf_doc.py --header \
 		--file $(srctree)/tools/include/uapi/linux/bpf.h > $(BPF_HELPER_DEFS)
 
 $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 5d7b947320fb..f05c4d48fd7e 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -20,4 +20,4 @@ tools/lib/bitmap.c
 tools/lib/str_error_r.c
 tools/lib/vsprintf.c
 tools/lib/zalloc.c
-scripts/bpf_helpers_doc.py
+scripts/bpf_doc.py
-- 
cgit v1.2.3


From a67882a221e348ab1c925b47efdfec8b11272d3f Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@cilium.io>
Date: Tue, 2 Mar 2021 09:19:42 -0800
Subject: scripts/bpf: Add syscall commands printer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a new target to bpf_doc.py to support generating the list of syscall
commands directly from the UAPI headers. Assuming that developer
submissions keep the main header up to date, this should allow the man
pages to be automatically generated based on the latest API changes
rather than requiring someone to separately go back through the API and
describe each command.

Signed-off-by: Joe Stringer <joe@cilium.io>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20210302171947.2268128-11-joe@cilium.io
---
 scripts/bpf_doc.py | 100 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 91 insertions(+), 9 deletions(-)

diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
index 5a4f68aab335..2d94025b38e9 100755
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -14,6 +14,9 @@ import sys, os
 class NoHelperFound(BaseException):
     pass
 
+class NoSyscallCommandFound(BaseException):
+    pass
+
 class ParsingError(BaseException):
     def __init__(self, line='<line not provided>', reader=None):
         if reader:
@@ -23,18 +26,27 @@ class ParsingError(BaseException):
         else:
             BaseException.__init__(self, 'Error parsing line: %s' % line)
 
-class Helper(object):
+
+class APIElement(object):
     """
-    An object representing the description of an eBPF helper function.
-    @proto: function prototype of the helper function
-    @desc: textual description of the helper function
-    @ret: description of the return value of the helper function
+    An object representing the description of an aspect of the eBPF API.
+    @proto: prototype of the API symbol
+    @desc: textual description of the symbol
+    @ret: (optional) description of any associated return value
     """
     def __init__(self, proto='', desc='', ret=''):
         self.proto = proto
         self.desc = desc
         self.ret = ret
 
+
+class Helper(APIElement):
+    """
+    An object representing the description of an eBPF helper function.
+    @proto: function prototype of the helper function
+    @desc: textual description of the helper function
+    @ret: description of the return value of the helper function
+    """
     def proto_break_down(self):
         """
         Break down helper function protocol into smaller chunks: return type,
@@ -61,6 +73,7 @@ class Helper(object):
 
         return res
 
+
 class HeaderParser(object):
     """
     An object used to parse a file in order to extract the documentation of a
@@ -73,6 +86,13 @@ class HeaderParser(object):
         self.reader = open(filename, 'r')
         self.line = ''
         self.helpers = []
+        self.commands = []
+
+    def parse_element(self):
+        proto    = self.parse_symbol()
+        desc     = self.parse_desc()
+        ret      = self.parse_ret()
+        return APIElement(proto=proto, desc=desc, ret=ret)
 
     def parse_helper(self):
         proto    = self.parse_proto()
@@ -80,6 +100,18 @@ class HeaderParser(object):
         ret      = self.parse_ret()
         return Helper(proto=proto, desc=desc, ret=ret)
 
+    def parse_symbol(self):
+        p = re.compile(' \* ?(.+)$')
+        capture = p.match(self.line)
+        if not capture:
+            raise NoSyscallCommandFound
+        end_re = re.compile(' \* ?NOTES$')
+        end = end_re.match(self.line)
+        if end:
+            raise NoSyscallCommandFound
+        self.line = self.reader.readline()
+        return capture.group(1)
+
     def parse_proto(self):
         # Argument can be of shape:
         #   - "void"
@@ -141,16 +173,29 @@ class HeaderParser(object):
                     break
         return ret
 
-    def run(self):
-        # Advance to start of helper function descriptions.
-        offset = self.reader.read().find('* Start of BPF helper function descriptions:')
+    def seek_to(self, target, help_message):
+        self.reader.seek(0)
+        offset = self.reader.read().find(target)
         if offset == -1:
-            raise Exception('Could not find start of eBPF helper descriptions list')
+            raise Exception(help_message)
         self.reader.seek(offset)
         self.reader.readline()
         self.reader.readline()
         self.line = self.reader.readline()
 
+    def parse_syscall(self):
+        self.seek_to('* DOC: eBPF Syscall Commands',
+                     'Could not find start of eBPF syscall descriptions list')
+        while True:
+            try:
+                command = self.parse_element()
+                self.commands.append(command)
+            except NoSyscallCommandFound:
+                break
+
+    def parse_helpers(self):
+        self.seek_to('* Start of BPF helper function descriptions:',
+                     'Could not find start of eBPF helper descriptions list')
         while True:
             try:
                 helper = self.parse_helper()
@@ -158,6 +203,9 @@ class HeaderParser(object):
             except NoHelperFound:
                 break
 
+    def run(self):
+        self.parse_syscall()
+        self.parse_helpers()
         self.reader.close()
 
 ###############################################################################
@@ -420,6 +468,37 @@ SEE ALSO
         self.print_elem(helper)
 
 
+class PrinterSyscallRST(PrinterRST):
+    """
+    A printer for dumping collected information about the syscall API as a
+    ReStructured Text page compatible with the rst2man program, which can be
+    used to generate a manual page for the syscall.
+    @parser: A HeaderParser with APIElement objects to print to standard
+             output
+    """
+    def __init__(self, parser):
+        self.elements = parser.commands
+
+    def print_header(self):
+        header = '''\
+===
+bpf
+===
+-------------------------------------------------------------------------------
+Perform a command on an extended BPF object
+-------------------------------------------------------------------------------
+
+:Manual section: 2
+
+COMMANDS
+========
+'''
+        PrinterRST.print_license(self)
+        print(header)
+
+    def print_one(self, command):
+        print('**%s**' % (command.proto))
+        self.print_elem(command)
 
 
 class PrinterHelpers(Printer):
@@ -620,6 +699,7 @@ bpfh = os.path.join(linuxRoot, 'include/uapi/linux/bpf.h')
 
 printers = {
         'helpers': PrinterHelpersRST,
+        'syscall': PrinterSyscallRST,
 }
 
 argParser = argparse.ArgumentParser(description="""
@@ -644,6 +724,8 @@ headerParser.run()
 
 # Print formatted output to standard output.
 if args.header:
+    if args.target != 'helpers':
+        raise NotImplementedError('Only helpers header generation is supported')
     printer = PrinterHelpers(headerParser)
 else:
     printer = printers[args.target](headerParser)
-- 
cgit v1.2.3


From a01d935b2e0916d32cb567f90c32a0c4eb46993c Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@cilium.io>
Date: Tue, 2 Mar 2021 09:19:43 -0800
Subject: tools/bpf: Remove bpf-helpers from bpftool docs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This logic is used for validating the manual pages from selftests, so
move the infra under tools/testing/selftests/bpf/ and rely on selftests
for validation rather than tying it into the bpftool build.

Signed-off-by: Joe Stringer <joe@cilium.io>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20210302171947.2268128-12-joe@cilium.io
---
 tools/bpf/Makefile.helpers                        | 60 ----------------------
 tools/bpf/bpftool/.gitignore                      |  1 -
 tools/bpf/bpftool/Documentation/Makefile          | 11 ++--
 tools/testing/selftests/bpf/.gitignore            |  1 +
 tools/testing/selftests/bpf/Makefile              | 20 ++++++--
 tools/testing/selftests/bpf/Makefile.docs         | 62 +++++++++++++++++++++++
 tools/testing/selftests/bpf/test_bpftool_build.sh | 21 --------
 tools/testing/selftests/bpf/test_doc_build.sh     | 13 +++++
 8 files changed, 95 insertions(+), 94 deletions(-)
 delete mode 100644 tools/bpf/Makefile.helpers
 create mode 100644 tools/testing/selftests/bpf/Makefile.docs
 create mode 100755 tools/testing/selftests/bpf/test_doc_build.sh

diff --git a/tools/bpf/Makefile.helpers b/tools/bpf/Makefile.helpers
deleted file mode 100644
index a26599022fd6..000000000000
--- a/tools/bpf/Makefile.helpers
+++ /dev/null
@@ -1,60 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-ifndef allow-override
-  include ../scripts/Makefile.include
-  include ../scripts/utilities.mak
-else
-  # Assume Makefile.helpers is being run from bpftool/Documentation
-  # subdirectory. Go up two more directories to fetch bpf.h header and
-  # associated script.
-  UP2DIR := ../../
-endif
-
-INSTALL ?= install
-RM ?= rm -f
-RMDIR ?= rmdir --ignore-fail-on-non-empty
-
-ifeq ($(V),1)
-  Q =
-else
-  Q = @
-endif
-
-prefix ?= /usr/local
-mandir ?= $(prefix)/man
-man7dir = $(mandir)/man7
-
-HELPERS_RST = bpf-helpers.rst
-MAN7_RST = $(HELPERS_RST)
-
-_DOC_MAN7 = $(patsubst %.rst,%.7,$(MAN7_RST))
-DOC_MAN7 = $(addprefix $(OUTPUT),$(_DOC_MAN7))
-
-helpers: man7
-man7: $(DOC_MAN7)
-
-RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
-
-$(OUTPUT)$(HELPERS_RST): $(UP2DIR)../../include/uapi/linux/bpf.h
-	$(QUIET_GEN)$(UP2DIR)../../scripts/bpf_doc.py --filename $< > $@
-
-$(OUTPUT)%.7: $(OUTPUT)%.rst
-ifndef RST2MAN_DEP
-	$(error "rst2man not found, but required to generate man pages")
-endif
-	$(QUIET_GEN)rst2man $< > $@
-
-helpers-clean:
-	$(call QUIET_CLEAN, eBPF_helpers-manpage)
-	$(Q)$(RM) $(DOC_MAN7) $(OUTPUT)$(HELPERS_RST)
-
-helpers-install: helpers
-	$(call QUIET_INSTALL, eBPF_helpers-manpage)
-	$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man7dir)
-	$(Q)$(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
-
-helpers-uninstall:
-	$(call QUIET_UNINST, eBPF_helpers-manpage)
-	$(Q)$(RM) $(addprefix $(DESTDIR)$(man7dir)/,$(_DOC_MAN7))
-	$(Q)$(RMDIR) $(DESTDIR)$(man7dir)
-
-.PHONY: helpers helpers-clean helpers-install helpers-uninstall
diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore
index 944cb4b7c95d..05ce4446b780 100644
--- a/tools/bpf/bpftool/.gitignore
+++ b/tools/bpf/bpftool/.gitignore
@@ -3,7 +3,6 @@
 /bootstrap/
 /bpftool
 bpftool*.8
-bpf-helpers.*
 FEATURE-DUMP.bpftool
 feature
 libbpf
diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile
index f33cb02de95c..c49487905ceb 100644
--- a/tools/bpf/bpftool/Documentation/Makefile
+++ b/tools/bpf/bpftool/Documentation/Makefile
@@ -16,15 +16,12 @@ prefix ?= /usr/local
 mandir ?= $(prefix)/man
 man8dir = $(mandir)/man8
 
-# Load targets for building eBPF helpers man page.
-include ../../Makefile.helpers
-
 MAN8_RST = $(wildcard bpftool*.rst)
 
 _DOC_MAN8 = $(patsubst %.rst,%.8,$(MAN8_RST))
 DOC_MAN8 = $(addprefix $(OUTPUT),$(_DOC_MAN8))
 
-man: man8 helpers
+man: man8
 man8: $(DOC_MAN8)
 
 RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
@@ -46,16 +43,16 @@ ifndef RST2MAN_DEP
 endif
 	$(QUIET_GEN)( cat $< ; printf "%b" $(call see_also,$<) ) | rst2man $(RST2MAN_OPTS) > $@
 
-clean: helpers-clean
+clean:
 	$(call QUIET_CLEAN, Documentation)
 	$(Q)$(RM) $(DOC_MAN8)
 
-install: man helpers-install
+install: man
 	$(call QUIET_INSTALL, Documentation-man)
 	$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man8dir)
 	$(Q)$(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir)
 
-uninstall: helpers-uninstall
+uninstall:
 	$(call QUIET_UNINST, Documentation-man)
 	$(Q)$(RM) $(addprefix $(DESTDIR)$(man8dir)/,$(_DOC_MAN8))
 	$(Q)$(RMDIR) $(DESTDIR)$(man8dir)
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index c0c48fdb9ac1..a0d5ec3cfc24 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
+bpf-helpers*
 test_verifier
 test_maps
 test_lru_map
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index a81af15e4ded..b5827464c6b5 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -68,6 +68,7 @@ TEST_PROGS := test_kmod.sh \
 	test_bpftool_build.sh \
 	test_bpftool.sh \
 	test_bpftool_metadata.sh \
+	test_docs_build.sh \
 	test_xsk.sh
 
 TEST_PROGS_EXTENDED := with_addr.sh \
@@ -103,6 +104,7 @@ override define CLEAN
 	$(call msg,CLEAN)
 	$(Q)$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
 	$(Q)$(MAKE) -C bpf_testmod clean
+	$(Q)$(MAKE) docs-clean
 endef
 
 include ../lib.mk
@@ -180,6 +182,7 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL)
 		    cp $(SCRATCH_DIR)/runqslower $@
 
 $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ)
+$(TEST_GEN_FILES): docs
 
 $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
 $(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c
@@ -200,11 +203,16 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile)    \
 		    CC=$(HOSTCC) LD=$(HOSTLD)				       \
 		    OUTPUT=$(HOST_BUILD_DIR)/bpftool/			       \
 		    prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install
-	$(Q)mkdir -p $(BUILD_DIR)/bpftool/Documentation
-	$(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras)	       \
-		    -C $(BPFTOOLDIR)/Documentation			       \
-		    OUTPUT=$(BUILD_DIR)/bpftool/Documentation/		       \
-		    prefix= DESTDIR=$(SCRATCH_DIR)/ install
+
+docs:
+	$(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras)	\
+	            -f Makefile.docs					\
+	            prefix= OUTPUT=$(OUTPUT)/ DESTDIR=$(OUTPUT)/ $@
+
+docs-clean:
+	$(Q)$(MAKE) $(submake_extras)					\
+	            -f Makefile.docs					\
+	            prefix= OUTPUT=$(OUTPUT)/ DESTDIR=$(OUTPUT)/ $@
 
 $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)		       \
 	   ../../../include/uapi/linux/bpf.h                                   \
@@ -477,3 +485,5 @@ EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR)	\
 	prog_tests/tests.h map_tests/tests.h verifier/tests.h		\
 	feature								\
 	$(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc bpf_testmod.ko)
+
+.PHONY: docs docs-clean
diff --git a/tools/testing/selftests/bpf/Makefile.docs b/tools/testing/selftests/bpf/Makefile.docs
new file mode 100644
index 000000000000..546c4a763b46
--- /dev/null
+++ b/tools/testing/selftests/bpf/Makefile.docs
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+include ../../../scripts/Makefile.include
+include ../../../scripts/utilities.mak
+
+INSTALL ?= install
+RM ?= rm -f
+RMDIR ?= rmdir --ignore-fail-on-non-empty
+
+ifeq ($(V),1)
+  Q =
+else
+  Q = @
+endif
+
+prefix ?= /usr/local
+mandir ?= $(prefix)/man
+man7dir = $(mandir)/man7
+
+HELPERS_RST = bpf-helpers.rst
+MAN7_RST = $(HELPERS_RST)
+
+_DOC_MAN7 = $(patsubst %.rst,%.7,$(MAN7_RST))
+DOC_MAN7 = $(addprefix $(OUTPUT),$(_DOC_MAN7))
+
+DOCTARGETS := helpers
+
+docs: $(DOCTARGETS)
+helpers: man7
+man7: $(DOC_MAN7)
+
+RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
+
+# Configure make rules for the man page bpf-$1.$2.
+# $1 - target for scripts/bpf_doc.py
+# $2 - man page section to generate the troff file
+define DOCS_RULES =
+$(OUTPUT)bpf-$1.rst: ../../../../include/uapi/linux/bpf.h
+	$$(QUIET_GEN)../../../../scripts/bpf_doc.py $1 \
+		--filename $$< > $$@
+
+$(OUTPUT)%.7: $(OUTPUT)%.rst
+ifndef RST2MAN_DEP
+	$(error "rst2man not found, but required to generate man pages")
+endif
+	$(QUIET_GEN)rst2man $< > $@
+
+docs-clean:
+	$(call QUIET_CLEAN, eBPF_helpers-manpage)
+	$(Q)$(RM) $(DOC_MAN7) $(OUTPUT)$(HELPERS_RST)
+
+docs-install: helpers
+	$(call QUIET_INSTALL, eBPF_helpers-manpage)
+	$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man7dir)
+	$(Q)$(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
+
+docs-uninstall:
+	$(call QUIET_UNINST, eBPF_helpers-manpage)
+	$(Q)$(RM) $(addprefix $(DESTDIR)$(man7dir)/,$(_DOC_MAN7))
+	$(Q)$(RMDIR) $(DESTDIR)$(man7dir)
+
+.PHONY: docs docs-clean docs-install docs-uninstall
diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh
index 2db3c60e1e61..ac349a5cea7e 100755
--- a/tools/testing/selftests/bpf/test_bpftool_build.sh
+++ b/tools/testing/selftests/bpf/test_bpftool_build.sh
@@ -85,23 +85,6 @@ make_with_tmpdir() {
 	echo
 }
 
-make_doc_and_clean() {
-	echo -e "\$PWD:    $PWD"
-	echo -e "command: make -s $* doc >/dev/null"
-	RST2MAN_OPTS="--exit-status=1" make $J -s $* doc
-	if [ $? -ne 0 ] ; then
-		ERROR=1
-		printf "FAILURE: Errors or warnings when building documentation\n"
-	fi
-	(
-		if [ $# -ge 1 ] ; then
-			cd ${@: -1}
-		fi
-		make -s doc-clean
-	)
-	echo
-}
-
 echo "Trying to build bpftool"
 echo -e "... through kbuild\n"
 
@@ -162,7 +145,3 @@ make_and_clean
 make_with_tmpdir OUTPUT
 
 make_with_tmpdir O
-
-echo -e "Checking documentation build\n"
-# From tools/bpf/bpftool
-make_doc_and_clean
diff --git a/tools/testing/selftests/bpf/test_doc_build.sh b/tools/testing/selftests/bpf/test_doc_build.sh
new file mode 100755
index 000000000000..7eb940a7b2eb
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_doc_build.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
+# Assume script is located under tools/testing/selftests/bpf/. We want to start
+# build attempts from the top of kernel repository.
+SCRIPT_REL_PATH=$(realpath --relative-to=$PWD $0)
+SCRIPT_REL_DIR=$(dirname $SCRIPT_REL_PATH)
+KDIR_ROOT_DIR=$(realpath $PWD/$SCRIPT_REL_DIR/../../../../)
+cd $KDIR_ROOT_DIR
+
+for tgt in docs docs-clean; do
+	make -s -C $PWD/$SCRIPT_REL_DIR $tgt;
+done
-- 
cgit v1.2.3


From 62b379a233a79e6f4d2e8b14750ae8fa13b8caf8 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@cilium.io>
Date: Tue, 2 Mar 2021 09:19:44 -0800
Subject: selftests/bpf: Templatize man page generation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously, the Makefile here was only targeting a single manual page so
it just hardcoded a bunch of individual rules to specifically handle
build, clean, install, uninstall for that particular page.

Upcoming commits will generate manual pages for an additional section,
so this commit prepares the makefile first by converting the existing
targets into an evaluated set of targets based on the manual page name
and section.

Signed-off-by: Joe Stringer <joe@cilium.io>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20210302171947.2268128-13-joe@cilium.io
---
 tools/testing/selftests/bpf/Makefile.docs | 40 +++++++++++++++++++------------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/tools/testing/selftests/bpf/Makefile.docs b/tools/testing/selftests/bpf/Makefile.docs
index 546c4a763b46..f39ad19317c8 100644
--- a/tools/testing/selftests/bpf/Makefile.docs
+++ b/tools/testing/selftests/bpf/Makefile.docs
@@ -39,24 +39,34 @@ $(OUTPUT)bpf-$1.rst: ../../../../include/uapi/linux/bpf.h
 	$$(QUIET_GEN)../../../../scripts/bpf_doc.py $1 \
 		--filename $$< > $$@
 
-$(OUTPUT)%.7: $(OUTPUT)%.rst
+$(OUTPUT)%.$2: $(OUTPUT)%.rst
 ifndef RST2MAN_DEP
-	$(error "rst2man not found, but required to generate man pages")
+	$$(error "rst2man not found, but required to generate man pages")
 endif
-	$(QUIET_GEN)rst2man $< > $@
+	$$(QUIET_GEN)rst2man $$< > $$@
 
-docs-clean:
-	$(call QUIET_CLEAN, eBPF_helpers-manpage)
-	$(Q)$(RM) $(DOC_MAN7) $(OUTPUT)$(HELPERS_RST)
+docs-clean-$1:
+	$$(call QUIET_CLEAN, eBPF_$1-manpage)
+	$(Q)$(RM) $$(DOC_MAN$2) $(OUTPUT)bpf-$1.rst
 
-docs-install: helpers
-	$(call QUIET_INSTALL, eBPF_helpers-manpage)
-	$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man7dir)
-	$(Q)$(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
+docs-install-$1: docs
+	$$(call QUIET_INSTALL, eBPF_$1-manpage)
+	$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$$(man$2dir)
+	$(Q)$(INSTALL) -m 644 $$(DOC_MAN$2) $(DESTDIR)$$(man$2dir)
 
-docs-uninstall:
-	$(call QUIET_UNINST, eBPF_helpers-manpage)
-	$(Q)$(RM) $(addprefix $(DESTDIR)$(man7dir)/,$(_DOC_MAN7))
-	$(Q)$(RMDIR) $(DESTDIR)$(man7dir)
+docs-uninstall-$1:
+	$$(call QUIET_UNINST, eBPF_$1-manpage)
+	$(Q)$(RM) $$(addprefix $(DESTDIR)$$(man$2dir)/,$$(_DOC_MAN$2))
+	$(Q)$(RMDIR) $(DESTDIR)$$(man$2dir)
 
-.PHONY: docs docs-clean docs-install docs-uninstall
+.PHONY: $1 docs-clean-$1 docs-install-$1 docs-uninstall-$1
+endef
+
+# Create the make targets to generate manual pages by name and section
+$(eval $(call DOCS_RULES,helpers,7))
+
+docs-clean: $(foreach doctarget,$(DOCTARGETS), docs-clean-$(doctarget))
+docs-install: $(foreach doctarget,$(DOCTARGETS), docs-install-$(doctarget))
+docs-uninstall: $(foreach doctarget,$(DOCTARGETS), docs-uninstall-$(doctarget))
+
+.PHONY: docs docs-clean docs-install docs-uninstall man7
-- 
cgit v1.2.3


From accbd33a9b0328777899a85d148040e4d8921d87 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@cilium.io>
Date: Tue, 2 Mar 2021 09:19:45 -0800
Subject: selftests/bpf: Test syscall command parsing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add building of the bpf(2) syscall commands documentation as part of the
docs building step in the build. This allows us to pick up on potential
parse errors from the docs generator script as part of selftests.

The generated manual pages here are not intended for distribution, they
are just a fragment that can be integrated into the other static text of
bpf(2) to form the full manual page.

Signed-off-by: Joe Stringer <joe@cilium.io>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210302171947.2268128-14-joe@cilium.io
---
 tools/testing/selftests/bpf/.gitignore    |  1 +
 tools/testing/selftests/bpf/Makefile.docs | 14 ++++++++++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index a0d5ec3cfc24..4866f6a21901 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 bpf-helpers*
+bpf-syscall*
 test_verifier
 test_maps
 test_lru_map
diff --git a/tools/testing/selftests/bpf/Makefile.docs b/tools/testing/selftests/bpf/Makefile.docs
index f39ad19317c8..ccf260021e83 100644
--- a/tools/testing/selftests/bpf/Makefile.docs
+++ b/tools/testing/selftests/bpf/Makefile.docs
@@ -15,18 +15,27 @@ endif
 
 prefix ?= /usr/local
 mandir ?= $(prefix)/man
+man2dir = $(mandir)/man2
 man7dir = $(mandir)/man7
 
+SYSCALL_RST = bpf-syscall.rst
+MAN2_RST = $(SYSCALL_RST)
+
 HELPERS_RST = bpf-helpers.rst
 MAN7_RST = $(HELPERS_RST)
 
+_DOC_MAN2 = $(patsubst %.rst,%.2,$(MAN2_RST))
+DOC_MAN2 = $(addprefix $(OUTPUT),$(_DOC_MAN2))
+
 _DOC_MAN7 = $(patsubst %.rst,%.7,$(MAN7_RST))
 DOC_MAN7 = $(addprefix $(OUTPUT),$(_DOC_MAN7))
 
-DOCTARGETS := helpers
+DOCTARGETS := helpers syscall
 
 docs: $(DOCTARGETS)
+syscall: man2
 helpers: man7
+man2: $(DOC_MAN2)
 man7: $(DOC_MAN7)
 
 RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
@@ -64,9 +73,10 @@ endef
 
 # Create the make targets to generate manual pages by name and section
 $(eval $(call DOCS_RULES,helpers,7))
+$(eval $(call DOCS_RULES,syscall,2))
 
 docs-clean: $(foreach doctarget,$(DOCTARGETS), docs-clean-$(doctarget))
 docs-install: $(foreach doctarget,$(DOCTARGETS), docs-install-$(doctarget))
 docs-uninstall: $(foreach doctarget,$(DOCTARGETS), docs-uninstall-$(doctarget))
 
-.PHONY: docs docs-clean docs-install docs-uninstall man7
+.PHONY: docs docs-clean docs-install docs-uninstall man2 man7
-- 
cgit v1.2.3


From 6197e5b7b1b5acd1e9b04bdf3527c694d84a27e2 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@cilium.io>
Date: Tue, 2 Mar 2021 09:19:46 -0800
Subject: docs/bpf: Add bpf() syscall command reference
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Generate the syscall command reference from the UAPI header file and
include it in the main bpf docs page.

Signed-off-by: Joe Stringer <joe@cilium.io>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20210302171947.2268128-15-joe@cilium.io
---
 Documentation/bpf/index.rst                  |  9 ++++++---
 Documentation/userspace-api/ebpf/index.rst   | 17 +++++++++++++++++
 Documentation/userspace-api/ebpf/syscall.rst | 24 ++++++++++++++++++++++++
 Documentation/userspace-api/index.rst        |  1 +
 MAINTAINERS                                  |  1 +
 5 files changed, 49 insertions(+), 3 deletions(-)
 create mode 100644 Documentation/userspace-api/ebpf/index.rst
 create mode 100644 Documentation/userspace-api/ebpf/syscall.rst

diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst
index 4f2874b729c3..a702f67dd45f 100644
--- a/Documentation/bpf/index.rst
+++ b/Documentation/bpf/index.rst
@@ -12,9 +12,6 @@ BPF instruction-set.
 The Cilium project also maintains a `BPF and XDP Reference Guide`_
 that goes into great technical depth about the BPF Architecture.
 
-The primary info for the bpf syscall is available in the `man-pages`_
-for `bpf(2)`_.
-
 BPF Type Format (BTF)
 =====================
 
@@ -35,6 +32,12 @@ Two sets of Questions and Answers (Q&A) are maintained.
    bpf_design_QA
    bpf_devel_QA
 
+Syscall API
+===========
+
+The primary info for the bpf syscall is available in the `man-pages`_
+for `bpf(2)`_. For more information about the userspace API, see
+Documentation/userspace-api/ebpf/index.rst.
 
 Helper functions
 ================
diff --git a/Documentation/userspace-api/ebpf/index.rst b/Documentation/userspace-api/ebpf/index.rst
new file mode 100644
index 000000000000..473dfba78116
--- /dev/null
+++ b/Documentation/userspace-api/ebpf/index.rst
@@ -0,0 +1,17 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+eBPF Userspace API
+==================
+
+eBPF is a kernel mechanism to provide a sandboxed runtime environment in the
+Linux kernel for runtime extension and instrumentation without changing kernel
+source code or loading kernel modules. eBPF programs can be attached to various
+kernel subsystems, including networking, tracing and Linux security modules
+(LSM).
+
+For internal kernel documentation on eBPF, see Documentation/bpf/index.rst.
+
+.. toctree::
+   :maxdepth: 1
+
+   syscall
diff --git a/Documentation/userspace-api/ebpf/syscall.rst b/Documentation/userspace-api/ebpf/syscall.rst
new file mode 100644
index 000000000000..ea9918084221
--- /dev/null
+++ b/Documentation/userspace-api/ebpf/syscall.rst
@@ -0,0 +1,24 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+eBPF Syscall
+------------
+
+:Authors: - Alexei Starovoitov <ast@kernel.org>
+          - Joe Stringer <joe@wand.net.nz>
+          - Michael Kerrisk <mtk.manpages@gmail.com>
+
+The primary info for the bpf syscall is available in the `man-pages`_
+for `bpf(2)`_.
+
+bpf() subcommand reference
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: include/uapi/linux/bpf.h
+   :doc: eBPF Syscall Preamble
+
+.. kernel-doc:: include/uapi/linux/bpf.h
+   :doc: eBPF Syscall Commands
+
+.. Links:
+.. _man-pages: https://www.kernel.org/doc/man-pages/
+.. _bpf(2): https://man7.org/linux/man-pages/man2/bpf.2.html
diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst
index d29b020e5622..1e2438b7afa0 100644
--- a/Documentation/userspace-api/index.rst
+++ b/Documentation/userspace-api/index.rst
@@ -21,6 +21,7 @@ place where this information is gathered.
    unshare
    spec_ctrl
    accelerators/ocxl
+   ebpf/index
    ioctl/index
    iommu
    media/index
diff --git a/MAINTAINERS b/MAINTAINERS
index 8d56c7044067..4446d1455354 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3209,6 +3209,7 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git
 F:	Documentation/bpf/
 F:	Documentation/networking/filter.rst
+F:	Documentation/userspace-api/ebpf/
 F:	arch/*/net/*
 F:	include/linux/bpf*
 F:	include/linux/filter.h
-- 
cgit v1.2.3


From 242029f42691e05ac09b31b98221421bd564375e Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@cilium.io>
Date: Tue, 2 Mar 2021 09:19:47 -0800
Subject: tools: Sync uapi bpf.h header with latest changes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Synchronize the header after all of the recent changes.

Signed-off-by: Joe Stringer <joe@cilium.io>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20210302171947.2268128-16-joe@cilium.io
---
 tools/include/uapi/linux/bpf.h | 712 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 711 insertions(+), 1 deletion(-)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b4c5c529ad17..63a56ed6a785 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -93,7 +93,717 @@ union bpf_iter_link_info {
 	} map;
 };
 
-/* BPF syscall commands, see bpf(2) man-page for details. */
+/* BPF syscall commands, see bpf(2) man-page for more details. */
+/**
+ * DOC: eBPF Syscall Preamble
+ *
+ * The operation to be performed by the **bpf**\ () system call is determined
+ * by the *cmd* argument. Each operation takes an accompanying argument,
+ * provided via *attr*, which is a pointer to a union of type *bpf_attr* (see
+ * below). The size argument is the size of the union pointed to by *attr*.
+ */
+/**
+ * DOC: eBPF Syscall Commands
+ *
+ * BPF_MAP_CREATE
+ *	Description
+ *		Create a map and return a file descriptor that refers to the
+ *		map. The close-on-exec file descriptor flag (see **fcntl**\ (2))
+ *		is automatically enabled for the new file descriptor.
+ *
+ *		Applying **close**\ (2) to the file descriptor returned by
+ *		**BPF_MAP_CREATE** will delete the map (but see NOTES).
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_MAP_LOOKUP_ELEM
+ *	Description
+ *		Look up an element with a given *key* in the map referred to
+ *		by the file descriptor *map_fd*.
+ *
+ *		The *flags* argument may be specified as one of the
+ *		following:
+ *
+ *		**BPF_F_LOCK**
+ *			Look up the value of a spin-locked map without
+ *			returning the lock. This must be specified if the
+ *			elements contain a spinlock.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_MAP_UPDATE_ELEM
+ *	Description
+ *		Create or update an element (key/value pair) in a specified map.
+ *
+ *		The *flags* argument should be specified as one of the
+ *		following:
+ *
+ *		**BPF_ANY**
+ *			Create a new element or update an existing element.
+ *		**BPF_NOEXIST**
+ *			Create a new element only if it did not exist.
+ *		**BPF_EXIST**
+ *			Update an existing element.
+ *		**BPF_F_LOCK**
+ *			Update a spin_lock-ed map element.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ *		May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**,
+ *		**E2BIG**, **EEXIST**, or **ENOENT**.
+ *
+ *		**E2BIG**
+ *			The number of elements in the map reached the
+ *			*max_entries* limit specified at map creation time.
+ *		**EEXIST**
+ *			If *flags* specifies **BPF_NOEXIST** and the element
+ *			with *key* already exists in the map.
+ *		**ENOENT**
+ *			If *flags* specifies **BPF_EXIST** and the element with
+ *			*key* does not exist in the map.
+ *
+ * BPF_MAP_DELETE_ELEM
+ *	Description
+ *		Look up and delete an element by key in a specified map.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_MAP_GET_NEXT_KEY
+ *	Description
+ *		Look up an element by key in a specified map and return the key
+ *		of the next element. Can be used to iterate over all elements
+ *		in the map.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ *		The following cases can be used to iterate over all elements of
+ *		the map:
+ *
+ *		* If *key* is not found, the operation returns zero and sets
+ *		  the *next_key* pointer to the key of the first element.
+ *		* If *key* is found, the operation returns zero and sets the
+ *		  *next_key* pointer to the key of the next element.
+ *		* If *key* is the last element, returns -1 and *errno* is set
+ *		  to **ENOENT**.
+ *
+ *		May set *errno* to **ENOMEM**, **EFAULT**, **EPERM**, or
+ *		**EINVAL** on error.
+ *
+ * BPF_PROG_LOAD
+ *	Description
+ *		Verify and load an eBPF program, returning a new file
+ *		descriptor associated with the program.
+ *
+ *		Applying **close**\ (2) to the file descriptor returned by
+ *		**BPF_PROG_LOAD** will unload the eBPF program (but see NOTES).
+ *
+ *		The close-on-exec file descriptor flag (see **fcntl**\ (2)) is
+ *		automatically enabled for the new file descriptor.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_OBJ_PIN
+ *	Description
+ *		Pin an eBPF program or map referred by the specified *bpf_fd*
+ *		to the provided *pathname* on the filesystem.
+ *
+ *		The *pathname* argument must not contain a dot (".").
+ *
+ *		On success, *pathname* retains a reference to the eBPF object,
+ *		preventing deallocation of the object when the original
+ *		*bpf_fd* is closed. This allow the eBPF object to live beyond
+ *		**close**\ (\ *bpf_fd*\ ), and hence the lifetime of the parent
+ *		process.
+ *
+ *		Applying **unlink**\ (2) or similar calls to the *pathname*
+ *		unpins the object from the filesystem, removing the reference.
+ *		If no other file descriptors or filesystem nodes refer to the
+ *		same object, it will be deallocated (see NOTES).
+ *
+ *		The filesystem type for the parent directory of *pathname* must
+ *		be **BPF_FS_MAGIC**.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_OBJ_GET
+ *	Description
+ *		Open a file descriptor for the eBPF object pinned to the
+ *		specified *pathname*.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_PROG_ATTACH
+ *	Description
+ *		Attach an eBPF program to a *target_fd* at the specified
+ *		*attach_type* hook.
+ *
+ *		The *attach_type* specifies the eBPF attachment point to
+ *		attach the program to, and must be one of *bpf_attach_type*
+ *		(see below).
+ *
+ *		The *attach_bpf_fd* must be a valid file descriptor for a
+ *		loaded eBPF program of a cgroup, flow dissector, LIRC, sockmap
+ *		or sock_ops type corresponding to the specified *attach_type*.
+ *
+ *		The *target_fd* must be a valid file descriptor for a kernel
+ *		object which depends on the attach type of *attach_bpf_fd*:
+ *
+ *		**BPF_PROG_TYPE_CGROUP_DEVICE**,
+ *		**BPF_PROG_TYPE_CGROUP_SKB**,
+ *		**BPF_PROG_TYPE_CGROUP_SOCK**,
+ *		**BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
+ *		**BPF_PROG_TYPE_CGROUP_SOCKOPT**,
+ *		**BPF_PROG_TYPE_CGROUP_SYSCTL**,
+ *		**BPF_PROG_TYPE_SOCK_OPS**
+ *
+ *			Control Group v2 hierarchy with the eBPF controller
+ *			enabled. Requires the kernel to be compiled with
+ *			**CONFIG_CGROUP_BPF**.
+ *
+ *		**BPF_PROG_TYPE_FLOW_DISSECTOR**
+ *
+ *			Network namespace (eg /proc/self/ns/net).
+ *
+ *		**BPF_PROG_TYPE_LIRC_MODE2**
+ *
+ *			LIRC device path (eg /dev/lircN). Requires the kernel
+ *			to be compiled with **CONFIG_BPF_LIRC_MODE2**.
+ *
+ *		**BPF_PROG_TYPE_SK_SKB**,
+ *		**BPF_PROG_TYPE_SK_MSG**
+ *
+ *			eBPF map of socket type (eg **BPF_MAP_TYPE_SOCKHASH**).
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_PROG_DETACH
+ *	Description
+ *		Detach the eBPF program associated with the *target_fd* at the
+ *		hook specified by *attach_type*. The program must have been
+ *		previously attached using **BPF_PROG_ATTACH**.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_PROG_TEST_RUN
+ *	Description
+ *		Run the eBPF program associated with the *prog_fd* a *repeat*
+ *		number of times against a provided program context *ctx_in* and
+ *		data *data_in*, and return the modified program context
+ *		*ctx_out*, *data_out* (for example, packet data), result of the
+ *		execution *retval*, and *duration* of the test run.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ *		**ENOSPC**
+ *			Either *data_size_out* or *ctx_size_out* is too small.
+ *		**ENOTSUPP**
+ *			This command is not supported by the program type of
+ *			the program referred to by *prog_fd*.
+ *
+ * BPF_PROG_GET_NEXT_ID
+ *	Description
+ *		Fetch the next eBPF program currently loaded into the kernel.
+ *
+ *		Looks for the eBPF program with an id greater than *start_id*
+ *		and updates *next_id* on success. If no other eBPF programs
+ *		remain with ids higher than *start_id*, returns -1 and sets
+ *		*errno* to **ENOENT**.
+ *
+ *	Return
+ *		Returns zero on success. On error, or when no id remains, -1
+ *		is returned and *errno* is set appropriately.
+ *
+ * BPF_MAP_GET_NEXT_ID
+ *	Description
+ *		Fetch the next eBPF map currently loaded into the kernel.
+ *
+ *		Looks for the eBPF map with an id greater than *start_id*
+ *		and updates *next_id* on success. If no other eBPF maps
+ *		remain with ids higher than *start_id*, returns -1 and sets
+ *		*errno* to **ENOENT**.
+ *
+ *	Return
+ *		Returns zero on success. On error, or when no id remains, -1
+ *		is returned and *errno* is set appropriately.
+ *
+ * BPF_PROG_GET_FD_BY_ID
+ *	Description
+ *		Open a file descriptor for the eBPF program corresponding to
+ *		*prog_id*.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_MAP_GET_FD_BY_ID
+ *	Description
+ *		Open a file descriptor for the eBPF map corresponding to
+ *		*map_id*.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_OBJ_GET_INFO_BY_FD
+ *	Description
+ *		Obtain information about the eBPF object corresponding to
+ *		*bpf_fd*.
+ *
+ *		Populates up to *info_len* bytes of *info*, which will be in
+ *		one of the following formats depending on the eBPF object type
+ *		of *bpf_fd*:
+ *
+ *		* **struct bpf_prog_info**
+ *		* **struct bpf_map_info**
+ *		* **struct bpf_btf_info**
+ *		* **struct bpf_link_info**
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_PROG_QUERY
+ *	Description
+ *		Obtain information about eBPF programs associated with the
+ *		specified *attach_type* hook.
+ *
+ *		The *target_fd* must be a valid file descriptor for a kernel
+ *		object which depends on the attach type of *attach_bpf_fd*:
+ *
+ *		**BPF_PROG_TYPE_CGROUP_DEVICE**,
+ *		**BPF_PROG_TYPE_CGROUP_SKB**,
+ *		**BPF_PROG_TYPE_CGROUP_SOCK**,
+ *		**BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
+ *		**BPF_PROG_TYPE_CGROUP_SOCKOPT**,
+ *		**BPF_PROG_TYPE_CGROUP_SYSCTL**,
+ *		**BPF_PROG_TYPE_SOCK_OPS**
+ *
+ *			Control Group v2 hierarchy with the eBPF controller
+ *			enabled. Requires the kernel to be compiled with
+ *			**CONFIG_CGROUP_BPF**.
+ *
+ *		**BPF_PROG_TYPE_FLOW_DISSECTOR**
+ *
+ *			Network namespace (eg /proc/self/ns/net).
+ *
+ *		**BPF_PROG_TYPE_LIRC_MODE2**
+ *
+ *			LIRC device path (eg /dev/lircN). Requires the kernel
+ *			to be compiled with **CONFIG_BPF_LIRC_MODE2**.
+ *
+ *		**BPF_PROG_QUERY** always fetches the number of programs
+ *		attached and the *attach_flags* which were used to attach those
+ *		programs. Additionally, if *prog_ids* is nonzero and the number
+ *		of attached programs is less than *prog_cnt*, populates
+ *		*prog_ids* with the eBPF program ids of the programs attached
+ *		at *target_fd*.
+ *
+ *		The following flags may alter the result:
+ *
+ *		**BPF_F_QUERY_EFFECTIVE**
+ *			Only return information regarding programs which are
+ *			currently effective at the specified *target_fd*.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_RAW_TRACEPOINT_OPEN
+ *	Description
+ *		Attach an eBPF program to a tracepoint *name* to access kernel
+ *		internal arguments of the tracepoint in their raw form.
+ *
+ *		The *prog_fd* must be a valid file descriptor associated with
+ *		a loaded eBPF program of type **BPF_PROG_TYPE_RAW_TRACEPOINT**.
+ *
+ *		No ABI guarantees are made about the content of tracepoint
+ *		arguments exposed to the corresponding eBPF program.
+ *
+ *		Applying **close**\ (2) to the file descriptor returned by
+ *		**BPF_RAW_TRACEPOINT_OPEN** will delete the map (but see NOTES).
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_BTF_LOAD
+ *	Description
+ *		Verify and load BPF Type Format (BTF) metadata into the kernel,
+ *		returning a new file descriptor associated with the metadata.
+ *		BTF is described in more detail at
+ *		https://www.kernel.org/doc/html/latest/bpf/btf.html.
+ *
+ *		The *btf* parameter must point to valid memory providing
+ *		*btf_size* bytes of BTF binary metadata.
+ *
+ *		The returned file descriptor can be passed to other **bpf**\ ()
+ *		subcommands such as **BPF_PROG_LOAD** or **BPF_MAP_CREATE** to
+ *		associate the BTF with those objects.
+ *
+ *		Similar to **BPF_PROG_LOAD**, **BPF_BTF_LOAD** has optional
+ *		parameters to specify a *btf_log_buf*, *btf_log_size* and
+ *		*btf_log_level* which allow the kernel to return freeform log
+ *		output regarding the BTF verification process.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_BTF_GET_FD_BY_ID
+ *	Description
+ *		Open a file descriptor for the BPF Type Format (BTF)
+ *		corresponding to *btf_id*.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_TASK_FD_QUERY
+ *	Description
+ *		Obtain information about eBPF programs associated with the
+ *		target process identified by *pid* and *fd*.
+ *
+ *		If the *pid* and *fd* are associated with a tracepoint, kprobe
+ *		or uprobe perf event, then the *prog_id* and *fd_type* will
+ *		be populated with the eBPF program id and file descriptor type
+ *		of type **bpf_task_fd_type**. If associated with a kprobe or
+ *		uprobe, the  *probe_offset* and *probe_addr* will also be
+ *		populated. Optionally, if *buf* is provided, then up to
+ *		*buf_len* bytes of *buf* will be populated with the name of
+ *		the tracepoint, kprobe or uprobe.
+ *
+ *		The resulting *prog_id* may be introspected in deeper detail
+ *		using **BPF_PROG_GET_FD_BY_ID** and **BPF_OBJ_GET_INFO_BY_FD**.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_MAP_LOOKUP_AND_DELETE_ELEM
+ *	Description
+ *		Look up an element with the given *key* in the map referred to
+ *		by the file descriptor *fd*, and if found, delete the element.
+ *
+ *		The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
+ *		implement this command as a "pop" operation, deleting the top
+ *		element rather than one corresponding to *key*.
+ *		The *key* and *key_len* parameters should be zeroed when
+ *		issuing this operation for these map types.
+ *
+ *		This command is only valid for the following map types:
+ *		* **BPF_MAP_TYPE_QUEUE**
+ *		* **BPF_MAP_TYPE_STACK**
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_MAP_FREEZE
+ *	Description
+ *		Freeze the permissions of the specified map.
+ *
+ *		Write permissions may be frozen by passing zero *flags*.
+ *		Upon success, no future syscall invocations may alter the
+ *		map state of *map_fd*. Write operations from eBPF programs
+ *		are still possible for a frozen map.
+ *
+ *		Not supported for maps of type **BPF_MAP_TYPE_STRUCT_OPS**.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_BTF_GET_NEXT_ID
+ *	Description
+ *		Fetch the next BPF Type Format (BTF) object currently loaded
+ *		into the kernel.
+ *
+ *		Looks for the BTF object with an id greater than *start_id*
+ *		and updates *next_id* on success. If no other BTF objects
+ *		remain with ids higher than *start_id*, returns -1 and sets
+ *		*errno* to **ENOENT**.
+ *
+ *	Return
+ *		Returns zero on success. On error, or when no id remains, -1
+ *		is returned and *errno* is set appropriately.
+ *
+ * BPF_MAP_LOOKUP_BATCH
+ *	Description
+ *		Iterate and fetch multiple elements in a map.
+ *
+ *		Two opaque values are used to manage batch operations,
+ *		*in_batch* and *out_batch*. Initially, *in_batch* must be set
+ *		to NULL to begin the batched operation. After each subsequent
+ *		**BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant
+ *		*out_batch* as the *in_batch* for the next operation to
+ *		continue iteration from the current point.
+ *
+ *		The *keys* and *values* are output parameters which must point
+ *		to memory large enough to hold *count* items based on the key
+ *		and value size of the map *map_fd*. The *keys* buffer must be
+ *		of *key_size* * *count*. The *values* buffer must be of
+ *		*value_size* * *count*.
+ *
+ *		The *elem_flags* argument may be specified as one of the
+ *		following:
+ *
+ *		**BPF_F_LOCK**
+ *			Look up the value of a spin-locked map without
+ *			returning the lock. This must be specified if the
+ *			elements contain a spinlock.
+ *
+ *		On success, *count* elements from the map are copied into the
+ *		user buffer, with the keys copied into *keys* and the values
+ *		copied into the corresponding indices in *values*.
+ *
+ *		If an error is returned and *errno* is not **EFAULT**, *count*
+ *		is set to the number of successfully processed elements.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ *		May set *errno* to **ENOSPC** to indicate that *keys* or
+ *		*values* is too small to dump an entire bucket during
+ *		iteration of a hash-based map type.
+ *
+ * BPF_MAP_LOOKUP_AND_DELETE_BATCH
+ *	Description
+ *		Iterate and delete all elements in a map.
+ *
+ *		This operation has the same behavior as
+ *		**BPF_MAP_LOOKUP_BATCH** with two exceptions:
+ *
+ *		* Every element that is successfully returned is also deleted
+ *		  from the map. This is at least *count* elements. Note that
+ *		  *count* is both an input and an output parameter.
+ *		* Upon returning with *errno* set to **EFAULT**, up to
+ *		  *count* elements may be deleted without returning the keys
+ *		  and values of the deleted elements.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_MAP_UPDATE_BATCH
+ *	Description
+ *		Update multiple elements in a map by *key*.
+ *
+ *		The *keys* and *values* are input parameters which must point
+ *		to memory large enough to hold *count* items based on the key
+ *		and value size of the map *map_fd*. The *keys* buffer must be
+ *		of *key_size* * *count*. The *values* buffer must be of
+ *		*value_size* * *count*.
+ *
+ *		Each element specified in *keys* is sequentially updated to the
+ *		value in the corresponding index in *values*. The *in_batch*
+ *		and *out_batch* parameters are ignored and should be zeroed.
+ *
+ *		The *elem_flags* argument should be specified as one of the
+ *		following:
+ *
+ *		**BPF_ANY**
+ *			Create new elements or update a existing elements.
+ *		**BPF_NOEXIST**
+ *			Create new elements only if they do not exist.
+ *		**BPF_EXIST**
+ *			Update existing elements.
+ *		**BPF_F_LOCK**
+ *			Update spin_lock-ed map elements. This must be
+ *			specified if the map value contains a spinlock.
+ *
+ *		On success, *count* elements from the map are updated.
+ *
+ *		If an error is returned and *errno* is not **EFAULT**, *count*
+ *		is set to the number of successfully processed elements.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ *		May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, or
+ *		**E2BIG**. **E2BIG** indicates that the number of elements in
+ *		the map reached the *max_entries* limit specified at map
+ *		creation time.
+ *
+ *		May set *errno* to one of the following error codes under
+ *		specific circumstances:
+ *
+ *		**EEXIST**
+ *			If *flags* specifies **BPF_NOEXIST** and the element
+ *			with *key* already exists in the map.
+ *		**ENOENT**
+ *			If *flags* specifies **BPF_EXIST** and the element with
+ *			*key* does not exist in the map.
+ *
+ * BPF_MAP_DELETE_BATCH
+ *	Description
+ *		Delete multiple elements in a map by *key*.
+ *
+ *		The *keys* parameter is an input parameter which must point
+ *		to memory large enough to hold *count* items based on the key
+ *		size of the map *map_fd*, that is, *key_size* * *count*.
+ *
+ *		Each element specified in *keys* is sequentially deleted. The
+ *		*in_batch*, *out_batch*, and *values* parameters are ignored
+ *		and should be zeroed.
+ *
+ *		The *elem_flags* argument may be specified as one of the
+ *		following:
+ *
+ *		**BPF_F_LOCK**
+ *			Look up the value of a spin-locked map without
+ *			returning the lock. This must be specified if the
+ *			elements contain a spinlock.
+ *
+ *		On success, *count* elements from the map are updated.
+ *
+ *		If an error is returned and *errno* is not **EFAULT**, *count*
+ *		is set to the number of successfully processed elements. If
+ *		*errno* is **EFAULT**, up to *count* elements may be been
+ *		deleted.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_LINK_CREATE
+ *	Description
+ *		Attach an eBPF program to a *target_fd* at the specified
+ *		*attach_type* hook and return a file descriptor handle for
+ *		managing the link.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_UPDATE
+ *	Description
+ *		Update the eBPF program in the specified *link_fd* to
+ *		*new_prog_fd*.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_LINK_GET_FD_BY_ID
+ *	Description
+ *		Open a file descriptor for the eBPF Link corresponding to
+ *		*link_id*.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_GET_NEXT_ID
+ *	Description
+ *		Fetch the next eBPF link currently loaded into the kernel.
+ *
+ *		Looks for the eBPF link with an id greater than *start_id*
+ *		and updates *next_id* on success. If no other eBPF links
+ *		remain with ids higher than *start_id*, returns -1 and sets
+ *		*errno* to **ENOENT**.
+ *
+ *	Return
+ *		Returns zero on success. On error, or when no id remains, -1
+ *		is returned and *errno* is set appropriately.
+ *
+ * BPF_ENABLE_STATS
+ *	Description
+ *		Enable eBPF runtime statistics gathering.
+ *
+ *		Runtime statistics gathering for the eBPF runtime is disabled
+ *		by default to minimize the corresponding performance overhead.
+ *		This command enables statistics globally.
+ *
+ *		Multiple programs may independently enable statistics.
+ *		After gathering the desired statistics, eBPF runtime statistics
+ *		may be disabled again by calling **close**\ (2) for the file
+ *		descriptor returned by this function. Statistics will only be
+ *		disabled system-wide when all outstanding file descriptors
+ *		returned by prior calls for this subcommand are closed.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_ITER_CREATE
+ *	Description
+ *		Create an iterator on top of the specified *link_fd* (as
+ *		previously created using **BPF_LINK_CREATE**) and return a
+ *		file descriptor that can be used to trigger the iteration.
+ *
+ *		If the resulting file descriptor is pinned to the filesystem
+ *		using  **BPF_OBJ_PIN**, then subsequent **read**\ (2) syscalls
+ *		for that path will trigger the iterator to read kernel state
+ *		using the eBPF program attached to *link_fd*.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_DETACH
+ *	Description
+ *		Forcefully detach the specified *link_fd* from its
+ *		corresponding attachment point.
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * BPF_PROG_BIND_MAP
+ *	Description
+ *		Bind a map to the lifetime of an eBPF program.
+ *
+ *		The map identified by *map_fd* is bound to the program
+ *		identified by *prog_fd* and only released when *prog_fd* is
+ *		released. This may be used in cases where metadata should be
+ *		associated with a program which otherwise does not contain any
+ *		references to the map (for example, embedded in the eBPF
+ *		program instructions).
+ *
+ *	Return
+ *		Returns zero on success. On error, -1 is returned and *errno*
+ *		is set appropriately.
+ *
+ * NOTES
+ *	eBPF objects (maps and programs) can be shared between processes.
+ *
+ *	* After **fork**\ (2), the child inherits file descriptors
+ *	  referring to the same eBPF objects.
+ *	* File descriptors referring to eBPF objects can be transferred over
+ *	  **unix**\ (7) domain sockets.
+ *	* File descriptors referring to eBPF objects can be duplicated in the
+ *	  usual way, using **dup**\ (2) and similar calls.
+ *	* File descriptors referring to eBPF objects can be pinned to the
+ *	  filesystem using the **BPF_OBJ_PIN** command of **bpf**\ (2).
+ *
+ *	An eBPF object is deallocated only after all file descriptors referring
+ *	to the object have been closed and no references remain pinned to the
+ *	filesystem or attached (for example, bound to a program or device).
+ */
 enum bpf_cmd {
 	BPF_MAP_CREATE,
 	BPF_MAP_LOOKUP_ELEM,
-- 
cgit v1.2.3


From 607b9cc92bd7208338d714a22b8082fe83bcb177 Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Wed, 3 Mar 2021 10:18:12 +0000
Subject: bpf: Consolidate shared test timing code

Share the timing / signal interruption logic between different
implementations of PROG_TEST_RUN. There is a change in behaviour
as well. We check the loop exit condition before checking for
pending signals. This resolves an edge case where a signal
arrives during the last iteration. Instead of aborting with
EINTR we return the successful result to user space.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210303101816.36774-2-lmb@cloudflare.com
---
 net/bpf/test_run.c | 141 +++++++++++++++++++++++++++++------------------------
 1 file changed, 78 insertions(+), 63 deletions(-)

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 58bcb8c849d5..eb3c78cd4d7c 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -16,14 +16,78 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/bpf_test_run.h>
 
+struct bpf_test_timer {
+	enum { NO_PREEMPT, NO_MIGRATE } mode;
+	u32 i;
+	u64 time_start, time_spent;
+};
+
+static void bpf_test_timer_enter(struct bpf_test_timer *t)
+	__acquires(rcu)
+{
+	rcu_read_lock();
+	if (t->mode == NO_PREEMPT)
+		preempt_disable();
+	else
+		migrate_disable();
+
+	t->time_start = ktime_get_ns();
+}
+
+static void bpf_test_timer_leave(struct bpf_test_timer *t)
+	__releases(rcu)
+{
+	t->time_start = 0;
+
+	if (t->mode == NO_PREEMPT)
+		preempt_enable();
+	else
+		migrate_enable();
+	rcu_read_unlock();
+}
+
+static bool bpf_test_timer_continue(struct bpf_test_timer *t, u32 repeat, int *err, u32 *duration)
+	__must_hold(rcu)
+{
+	t->i++;
+	if (t->i >= repeat) {
+		/* We're done. */
+		t->time_spent += ktime_get_ns() - t->time_start;
+		do_div(t->time_spent, t->i);
+		*duration = t->time_spent > U32_MAX ? U32_MAX : (u32)t->time_spent;
+		*err = 0;
+		goto reset;
+	}
+
+	if (signal_pending(current)) {
+		/* During iteration: we've been cancelled, abort. */
+		*err = -EINTR;
+		goto reset;
+	}
+
+	if (need_resched()) {
+		/* During iteration: we need to reschedule between runs. */
+		t->time_spent += ktime_get_ns() - t->time_start;
+		bpf_test_timer_leave(t);
+		cond_resched();
+		bpf_test_timer_enter(t);
+	}
+
+	/* Do another round. */
+	return true;
+
+reset:
+	t->i = 0;
+	return false;
+}
+
 static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
 			u32 *retval, u32 *time, bool xdp)
 {
 	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL };
+	struct bpf_test_timer t = { NO_MIGRATE };
 	enum bpf_cgroup_storage_type stype;
-	u64 time_start, time_spent = 0;
-	int ret = 0;
-	u32 i;
+	int ret;
 
 	for_each_cgroup_storage_type(stype) {
 		storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
@@ -38,40 +102,16 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
 	if (!repeat)
 		repeat = 1;
 
-	rcu_read_lock();
-	migrate_disable();
-	time_start = ktime_get_ns();
-	for (i = 0; i < repeat; i++) {
+	bpf_test_timer_enter(&t);
+	do {
 		bpf_cgroup_storage_set(storage);
 
 		if (xdp)
 			*retval = bpf_prog_run_xdp(prog, ctx);
 		else
 			*retval = BPF_PROG_RUN(prog, ctx);
-
-		if (signal_pending(current)) {
-			ret = -EINTR;
-			break;
-		}
-
-		if (need_resched()) {
-			time_spent += ktime_get_ns() - time_start;
-			migrate_enable();
-			rcu_read_unlock();
-
-			cond_resched();
-
-			rcu_read_lock();
-			migrate_disable();
-			time_start = ktime_get_ns();
-		}
-	}
-	time_spent += ktime_get_ns() - time_start;
-	migrate_enable();
-	rcu_read_unlock();
-
-	do_div(time_spent, repeat);
-	*time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
+	} while (bpf_test_timer_continue(&t, repeat, &ret, time));
+	bpf_test_timer_leave(&t);
 
 	for_each_cgroup_storage_type(stype)
 		bpf_cgroup_storage_free(storage[stype]);
@@ -674,18 +714,17 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 				     const union bpf_attr *kattr,
 				     union bpf_attr __user *uattr)
 {
+	struct bpf_test_timer t = { NO_PREEMPT };
 	u32 size = kattr->test.data_size_in;
 	struct bpf_flow_dissector ctx = {};
 	u32 repeat = kattr->test.repeat;
 	struct bpf_flow_keys *user_ctx;
 	struct bpf_flow_keys flow_keys;
-	u64 time_start, time_spent = 0;
 	const struct ethhdr *eth;
 	unsigned int flags = 0;
 	u32 retval, duration;
 	void *data;
 	int ret;
-	u32 i;
 
 	if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
 		return -EINVAL;
@@ -721,39 +760,15 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 	ctx.data = data;
 	ctx.data_end = (__u8 *)data + size;
 
-	rcu_read_lock();
-	preempt_disable();
-	time_start = ktime_get_ns();
-	for (i = 0; i < repeat; i++) {
+	bpf_test_timer_enter(&t);
+	do {
 		retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN,
 					  size, flags);
+	} while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
+	bpf_test_timer_leave(&t);
 
-		if (signal_pending(current)) {
-			preempt_enable();
-			rcu_read_unlock();
-
-			ret = -EINTR;
-			goto out;
-		}
-
-		if (need_resched()) {
-			time_spent += ktime_get_ns() - time_start;
-			preempt_enable();
-			rcu_read_unlock();
-
-			cond_resched();
-
-			rcu_read_lock();
-			preempt_disable();
-			time_start = ktime_get_ns();
-		}
-	}
-	time_spent += ktime_get_ns() - time_start;
-	preempt_enable();
-	rcu_read_unlock();
-
-	do_div(time_spent, repeat);
-	duration = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
+	if (ret < 0)
+		goto out;
 
 	ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
 			      retval, duration);
-- 
cgit v1.2.3


From 7c32e8f8bc33a5f4b113a630857e46634e3e143b Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Wed, 3 Mar 2021 10:18:13 +0000
Subject: bpf: Add PROG_TEST_RUN support for sk_lookup programs

Allow to pass sk_lookup programs to PROG_TEST_RUN. User space
provides the full bpf_sk_lookup struct as context. Since the
context includes a socket pointer that can't be exposed
to user space we define that PROG_TEST_RUN returns the cookie
of the selected socket or zero in place of the socket pointer.

We don't support testing programs that select a reuseport socket,
since this would mean running another (unrelated) BPF program
from the sk_lookup test handler.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210303101816.36774-3-lmb@cloudflare.com
---
 include/linux/bpf.h            |  10 ++++
 include/uapi/linux/bpf.h       |   5 +-
 net/bpf/test_run.c             | 105 +++++++++++++++++++++++++++++++++++++++++
 net/core/filter.c              |   1 +
 tools/include/uapi/linux/bpf.h |   5 +-
 5 files changed, 124 insertions(+), 2 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 4c730863fa77..c931bc97019d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1491,6 +1491,9 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
 			     const union bpf_attr *kattr,
 			     union bpf_attr __user *uattr);
+int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
+				const union bpf_attr *kattr,
+				union bpf_attr __user *uattr);
 bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 		    const struct bpf_prog *prog,
 		    struct bpf_insn_access_aux *info);
@@ -1692,6 +1695,13 @@ static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 	return -ENOTSUPP;
 }
 
+static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
+					      const union bpf_attr *kattr,
+					      union bpf_attr __user *uattr)
+{
+	return -ENOTSUPP;
+}
+
 static inline void bpf_map_put(struct bpf_map *map)
 {
 }
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 63a56ed6a785..7f530e349aff 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5953,7 +5953,10 @@ struct bpf_pidns_info {
 
 /* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
 struct bpf_sk_lookup {
-	__bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+	union {
+		__bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+		__u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */
+	};
 
 	__u32 family;		/* Protocol family (AF_INET, AF_INET6) */
 	__u32 protocol;		/* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index eb3c78cd4d7c..0abdd67f44b1 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -10,8 +10,10 @@
 #include <net/bpf_sk_storage.h>
 #include <net/sock.h>
 #include <net/tcp.h>
+#include <net/net_namespace.h>
 #include <linux/error-injection.h>
 #include <linux/smp.h>
+#include <linux/sock_diag.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/bpf_test_run.h>
@@ -781,3 +783,106 @@ out:
 	kfree(data);
 	return ret;
 }
+
+int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr,
+				union bpf_attr __user *uattr)
+{
+	struct bpf_test_timer t = { NO_PREEMPT };
+	struct bpf_prog_array *progs = NULL;
+	struct bpf_sk_lookup_kern ctx = {};
+	u32 repeat = kattr->test.repeat;
+	struct bpf_sk_lookup *user_ctx;
+	u32 retval, duration;
+	int ret = -EINVAL;
+
+	if (prog->type != BPF_PROG_TYPE_SK_LOOKUP)
+		return -EINVAL;
+
+	if (kattr->test.flags || kattr->test.cpu)
+		return -EINVAL;
+
+	if (kattr->test.data_in || kattr->test.data_size_in || kattr->test.data_out ||
+	    kattr->test.data_size_out)
+		return -EINVAL;
+
+	if (!repeat)
+		repeat = 1;
+
+	user_ctx = bpf_ctx_init(kattr, sizeof(*user_ctx));
+	if (IS_ERR(user_ctx))
+		return PTR_ERR(user_ctx);
+
+	if (!user_ctx)
+		return -EINVAL;
+
+	if (user_ctx->sk)
+		goto out;
+
+	if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx)))
+		goto out;
+
+	if (user_ctx->local_port > U16_MAX || user_ctx->remote_port > U16_MAX) {
+		ret = -ERANGE;
+		goto out;
+	}
+
+	ctx.family = (u16)user_ctx->family;
+	ctx.protocol = (u16)user_ctx->protocol;
+	ctx.dport = (u16)user_ctx->local_port;
+	ctx.sport = (__force __be16)user_ctx->remote_port;
+
+	switch (ctx.family) {
+	case AF_INET:
+		ctx.v4.daddr = (__force __be32)user_ctx->local_ip4;
+		ctx.v4.saddr = (__force __be32)user_ctx->remote_ip4;
+		break;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	case AF_INET6:
+		ctx.v6.daddr = (struct in6_addr *)user_ctx->local_ip6;
+		ctx.v6.saddr = (struct in6_addr *)user_ctx->remote_ip6;
+		break;
+#endif
+
+	default:
+		ret = -EAFNOSUPPORT;
+		goto out;
+	}
+
+	progs = bpf_prog_array_alloc(1, GFP_KERNEL);
+	if (!progs) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	progs->items[0].prog = prog;
+
+	bpf_test_timer_enter(&t);
+	do {
+		ctx.selected_sk = NULL;
+		retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, BPF_PROG_RUN);
+	} while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
+	bpf_test_timer_leave(&t);
+
+	if (ret < 0)
+		goto out;
+
+	user_ctx->cookie = 0;
+	if (ctx.selected_sk) {
+		if (ctx.selected_sk->sk_reuseport && !ctx.no_reuseport) {
+			ret = -EOPNOTSUPP;
+			goto out;
+		}
+
+		user_ctx->cookie = sock_gen_cookie(ctx.selected_sk);
+	}
+
+	ret = bpf_test_finish(kattr, uattr, NULL, 0, retval, duration);
+	if (!ret)
+		ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx));
+
+out:
+	bpf_prog_array_free(progs);
+	kfree(user_ctx);
+	return ret;
+}
diff --git a/net/core/filter.c b/net/core/filter.c
index 13bcf248ee7b..a526db494c62 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -10457,6 +10457,7 @@ static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type,
 }
 
 const struct bpf_prog_ops sk_lookup_prog_ops = {
+	.test_run = bpf_prog_test_run_sk_lookup,
 };
 
 const struct bpf_verifier_ops sk_lookup_verifier_ops = {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 63a56ed6a785..7f530e349aff 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5953,7 +5953,10 @@ struct bpf_pidns_info {
 
 /* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
 struct bpf_sk_lookup {
-	__bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+	union {
+		__bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+		__u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */
+	};
 
 	__u32 family;		/* Protocol family (AF_INET, AF_INET6) */
 	__u32 protocol;		/* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
-- 
cgit v1.2.3


From 509b2937bce90089fd2785db9f27951a3d850c34 Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Wed, 3 Mar 2021 10:18:14 +0000
Subject: selftests: bpf: Convert sk_lookup ctx access tests to PROG_TEST_RUN

Convert the selftests for sk_lookup narrow context access to use
PROG_TEST_RUN instead of creating actual sockets. This ensures that
ctx is populated correctly when using PROG_TEST_RUN.

Assert concrete values since we now control remote_ip and remote_port.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210303101816.36774-4-lmb@cloudflare.com
---
 tools/testing/selftests/bpf/prog_tests/sk_lookup.c | 83 ++++++++++++++++++----
 tools/testing/selftests/bpf/progs/test_sk_lookup.c | 62 ++++++++++------
 2 files changed, 109 insertions(+), 36 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
index 9ff0412e1fd3..45c82db3c58c 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
@@ -241,6 +241,48 @@ fail:
 	return -1;
 }
 
+static __u64 socket_cookie(int fd)
+{
+	__u64 cookie;
+	socklen_t cookie_len = sizeof(cookie);
+
+	if (CHECK(getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie, &cookie_len) < 0,
+		  "getsockopt(SO_COOKIE)", "%s\n", strerror(errno)))
+		return 0;
+	return cookie;
+}
+
+static int fill_sk_lookup_ctx(struct bpf_sk_lookup *ctx, const char *local_ip, __u16 local_port,
+			      const char *remote_ip, __u16 remote_port)
+{
+	void *local, *remote;
+	int err;
+
+	memset(ctx, 0, sizeof(*ctx));
+	ctx->local_port = local_port;
+	ctx->remote_port = htons(remote_port);
+
+	if (is_ipv6(local_ip)) {
+		ctx->family = AF_INET6;
+		local = &ctx->local_ip6[0];
+		remote = &ctx->remote_ip6[0];
+	} else {
+		ctx->family = AF_INET;
+		local = &ctx->local_ip4;
+		remote = &ctx->remote_ip4;
+	}
+
+	err = inet_pton(ctx->family, local_ip, local);
+	if (CHECK(err != 1, "inet_pton", "local_ip failed\n"))
+		return 1;
+
+	err = inet_pton(ctx->family, remote_ip, remote);
+	if (CHECK(err != 1, "inet_pton", "remote_ip failed\n"))
+		return 1;
+
+	return 0;
+}
+
 static int send_byte(int fd)
 {
 	ssize_t n;
@@ -1009,18 +1051,27 @@ static void test_drop_on_reuseport(struct test_sk_lookup *skel)
 
 static void run_sk_assign(struct test_sk_lookup *skel,
 			  struct bpf_program *lookup_prog,
-			  const char *listen_ip, const char *connect_ip)
+			  const char *remote_ip, const char *local_ip)
 {
-	int client_fd, peer_fd, server_fds[MAX_SERVERS] = { -1 };
-	struct bpf_link *lookup_link;
+	int server_fds[MAX_SERVERS] = { -1 };
+	struct bpf_sk_lookup ctx;
+	__u64 server_cookie;
 	int i, err;
 
-	lookup_link = attach_lookup_prog(lookup_prog);
-	if (!lookup_link)
+	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+		.ctx_in = &ctx,
+		.ctx_size_in = sizeof(ctx),
+		.ctx_out = &ctx,
+		.ctx_size_out = sizeof(ctx),
+	);
+
+	if (fill_sk_lookup_ctx(&ctx, local_ip, EXT_PORT, remote_ip, INT_PORT))
 		return;
 
+	ctx.protocol = IPPROTO_TCP;
+
 	for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
-		server_fds[i] = make_server(SOCK_STREAM, listen_ip, 0, NULL);
+		server_fds[i] = make_server(SOCK_STREAM, local_ip, 0, NULL);
 		if (server_fds[i] < 0)
 			goto close_servers;
 
@@ -1030,23 +1081,25 @@ static void run_sk_assign(struct test_sk_lookup *skel,
 			goto close_servers;
 	}
 
-	client_fd = make_client(SOCK_STREAM, connect_ip, EXT_PORT);
-	if (client_fd < 0)
+	server_cookie = socket_cookie(server_fds[SERVER_B]);
+	if (!server_cookie)
+		return;
+
+	err = bpf_prog_test_run_opts(bpf_program__fd(lookup_prog), &opts);
+	if (CHECK(err, "test_run", "failed with error %d\n", errno))
+		goto close_servers;
+
+	if (CHECK(ctx.cookie == 0, "ctx.cookie", "no socket selected\n"))
 		goto close_servers;
 
-	peer_fd = accept(server_fds[SERVER_B], NULL, NULL);
-	if (CHECK(peer_fd < 0, "accept", "failed\n"))
-		goto close_client;
+	CHECK(ctx.cookie != server_cookie, "ctx.cookie",
+	      "selected sk %llu instead of %llu\n", ctx.cookie, server_cookie);
 
-	close(peer_fd);
-close_client:
-	close(client_fd);
 close_servers:
 	for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
 		if (server_fds[i] != -1)
 			close(server_fds[i]);
 	}
-	bpf_link__destroy(lookup_link);
 }
 
 static void run_sk_assign_v4(struct test_sk_lookup *skel,
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
index 1032b292af5b..ac6f7f205e25 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
@@ -64,6 +64,10 @@ static const int PROG_DONE = 1;
 static const __u32 KEY_SERVER_A = SERVER_A;
 static const __u32 KEY_SERVER_B = SERVER_B;
 
+static const __u16 SRC_PORT = bpf_htons(8008);
+static const __u32 SRC_IP4 = IP4(127, 0, 0, 2);
+static const __u32 SRC_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000002);
+
 static const __u16 DST_PORT = 7007; /* Host byte order */
 static const __u32 DST_IP4 = IP4(127, 0, 0, 1);
 static const __u32 DST_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000001);
@@ -398,11 +402,12 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
 	if (LSW(ctx->protocol, 0) != IPPROTO_TCP)
 		return SK_DROP;
 
-	/* Narrow loads from remote_port field. Expect non-0 value. */
-	if (LSB(ctx->remote_port, 0) == 0 && LSB(ctx->remote_port, 1) == 0 &&
-	    LSB(ctx->remote_port, 2) == 0 && LSB(ctx->remote_port, 3) == 0)
+	/* Narrow loads from remote_port field. Expect SRC_PORT. */
+	if (LSB(ctx->remote_port, 0) != ((SRC_PORT >> 0) & 0xff) ||
+	    LSB(ctx->remote_port, 1) != ((SRC_PORT >> 8) & 0xff) ||
+	    LSB(ctx->remote_port, 2) != 0 || LSB(ctx->remote_port, 3) != 0)
 		return SK_DROP;
-	if (LSW(ctx->remote_port, 0) == 0)
+	if (LSW(ctx->remote_port, 0) != SRC_PORT)
 		return SK_DROP;
 
 	/* Narrow loads from local_port field. Expect DST_PORT. */
@@ -415,11 +420,14 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
 
 	/* Narrow loads from IPv4 fields */
 	if (v4) {
-		/* Expect non-0.0.0.0 in remote_ip4 */
-		if (LSB(ctx->remote_ip4, 0) == 0 && LSB(ctx->remote_ip4, 1) == 0 &&
-		    LSB(ctx->remote_ip4, 2) == 0 && LSB(ctx->remote_ip4, 3) == 0)
+		/* Expect SRC_IP4 in remote_ip4 */
+		if (LSB(ctx->remote_ip4, 0) != ((SRC_IP4 >> 0) & 0xff) ||
+		    LSB(ctx->remote_ip4, 1) != ((SRC_IP4 >> 8) & 0xff) ||
+		    LSB(ctx->remote_ip4, 2) != ((SRC_IP4 >> 16) & 0xff) ||
+		    LSB(ctx->remote_ip4, 3) != ((SRC_IP4 >> 24) & 0xff))
 			return SK_DROP;
-		if (LSW(ctx->remote_ip4, 0) == 0 && LSW(ctx->remote_ip4, 1) == 0)
+		if (LSW(ctx->remote_ip4, 0) != ((SRC_IP4 >> 0) & 0xffff) ||
+		    LSW(ctx->remote_ip4, 1) != ((SRC_IP4 >> 16) & 0xffff))
 			return SK_DROP;
 
 		/* Expect DST_IP4 in local_ip4 */
@@ -448,20 +456,32 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
 
 	/* Narrow loads from IPv6 fields */
 	if (!v4) {
-		/* Expect non-:: IP in remote_ip6 */
-		if (LSB(ctx->remote_ip6[0], 0) == 0 && LSB(ctx->remote_ip6[0], 1) == 0 &&
-		    LSB(ctx->remote_ip6[0], 2) == 0 && LSB(ctx->remote_ip6[0], 3) == 0 &&
-		    LSB(ctx->remote_ip6[1], 0) == 0 && LSB(ctx->remote_ip6[1], 1) == 0 &&
-		    LSB(ctx->remote_ip6[1], 2) == 0 && LSB(ctx->remote_ip6[1], 3) == 0 &&
-		    LSB(ctx->remote_ip6[2], 0) == 0 && LSB(ctx->remote_ip6[2], 1) == 0 &&
-		    LSB(ctx->remote_ip6[2], 2) == 0 && LSB(ctx->remote_ip6[2], 3) == 0 &&
-		    LSB(ctx->remote_ip6[3], 0) == 0 && LSB(ctx->remote_ip6[3], 1) == 0 &&
-		    LSB(ctx->remote_ip6[3], 2) == 0 && LSB(ctx->remote_ip6[3], 3) == 0)
+		/* Expect SRC_IP6 in remote_ip6 */
+		if (LSB(ctx->remote_ip6[0], 0) != ((SRC_IP6[0] >> 0) & 0xff) ||
+		    LSB(ctx->remote_ip6[0], 1) != ((SRC_IP6[0] >> 8) & 0xff) ||
+		    LSB(ctx->remote_ip6[0], 2) != ((SRC_IP6[0] >> 16) & 0xff) ||
+		    LSB(ctx->remote_ip6[0], 3) != ((SRC_IP6[0] >> 24) & 0xff) ||
+		    LSB(ctx->remote_ip6[1], 0) != ((SRC_IP6[1] >> 0) & 0xff) ||
+		    LSB(ctx->remote_ip6[1], 1) != ((SRC_IP6[1] >> 8) & 0xff) ||
+		    LSB(ctx->remote_ip6[1], 2) != ((SRC_IP6[1] >> 16) & 0xff) ||
+		    LSB(ctx->remote_ip6[1], 3) != ((SRC_IP6[1] >> 24) & 0xff) ||
+		    LSB(ctx->remote_ip6[2], 0) != ((SRC_IP6[2] >> 0) & 0xff) ||
+		    LSB(ctx->remote_ip6[2], 1) != ((SRC_IP6[2] >> 8) & 0xff) ||
+		    LSB(ctx->remote_ip6[2], 2) != ((SRC_IP6[2] >> 16) & 0xff) ||
+		    LSB(ctx->remote_ip6[2], 3) != ((SRC_IP6[2] >> 24) & 0xff) ||
+		    LSB(ctx->remote_ip6[3], 0) != ((SRC_IP6[3] >> 0) & 0xff) ||
+		    LSB(ctx->remote_ip6[3], 1) != ((SRC_IP6[3] >> 8) & 0xff) ||
+		    LSB(ctx->remote_ip6[3], 2) != ((SRC_IP6[3] >> 16) & 0xff) ||
+		    LSB(ctx->remote_ip6[3], 3) != ((SRC_IP6[3] >> 24) & 0xff))
 			return SK_DROP;
-		if (LSW(ctx->remote_ip6[0], 0) == 0 && LSW(ctx->remote_ip6[0], 1) == 0 &&
-		    LSW(ctx->remote_ip6[1], 0) == 0 && LSW(ctx->remote_ip6[1], 1) == 0 &&
-		    LSW(ctx->remote_ip6[2], 0) == 0 && LSW(ctx->remote_ip6[2], 1) == 0 &&
-		    LSW(ctx->remote_ip6[3], 0) == 0 && LSW(ctx->remote_ip6[3], 1) == 0)
+		if (LSW(ctx->remote_ip6[0], 0) != ((SRC_IP6[0] >> 0) & 0xffff) ||
+		    LSW(ctx->remote_ip6[0], 1) != ((SRC_IP6[0] >> 16) & 0xffff) ||
+		    LSW(ctx->remote_ip6[1], 0) != ((SRC_IP6[1] >> 0) & 0xffff) ||
+		    LSW(ctx->remote_ip6[1], 1) != ((SRC_IP6[1] >> 16) & 0xffff) ||
+		    LSW(ctx->remote_ip6[2], 0) != ((SRC_IP6[2] >> 0) & 0xffff) ||
+		    LSW(ctx->remote_ip6[2], 1) != ((SRC_IP6[2] >> 16) & 0xffff) ||
+		    LSW(ctx->remote_ip6[3], 0) != ((SRC_IP6[3] >> 0) & 0xffff) ||
+		    LSW(ctx->remote_ip6[3], 1) != ((SRC_IP6[3] >> 16) & 0xffff))
 			return SK_DROP;
 		/* Expect DST_IP6 in local_ip6 */
 		if (LSB(ctx->local_ip6[0], 0) != ((DST_IP6[0] >> 0) & 0xff) ||
-- 
cgit v1.2.3


From abab306ff04b570e817be3f026068fe9a5d99fbb Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Wed, 3 Mar 2021 10:18:15 +0000
Subject: selftests: bpf: Check that PROG_TEST_RUN repeats as requested

Extend a simple prog_run test to check that PROG_TEST_RUN adheres
to the requested repetitions. Convert it to use BPF skeleton.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210303101816.36774-5-lmb@cloudflare.com
---
 .../selftests/bpf/prog_tests/prog_run_xattr.c      | 51 ++++++++++++++++++----
 1 file changed, 42 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
index 935a294f049a..131d7f7eeb42 100644
--- a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
+++ b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
@@ -2,12 +2,31 @@
 #include <test_progs.h>
 #include <network_helpers.h>
 
-void test_prog_run_xattr(void)
+#include "test_pkt_access.skel.h"
+
+static const __u32 duration;
+
+static void check_run_cnt(int prog_fd, __u64 run_cnt)
 {
-	const char *file = "./test_pkt_access.o";
-	struct bpf_object *obj;
-	char buf[10];
+	struct bpf_prog_info info = {};
+	__u32 info_len = sizeof(info);
 	int err;
+
+	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	if (CHECK(err, "get_prog_info", "failed to get bpf_prog_info for fd %d\n", prog_fd))
+		return;
+
+	CHECK(run_cnt != info.run_cnt, "run_cnt",
+	      "incorrect number of repetitions, want %llu have %llu\n", run_cnt, info.run_cnt);
+}
+
+void test_prog_run_xattr(void)
+{
+	struct test_pkt_access *skel;
+	int err, stats_fd = -1;
+	char buf[10] = {};
+	__u64 run_cnt = 0;
+
 	struct bpf_prog_test_run_attr tattr = {
 		.repeat = 1,
 		.data_in = &pkt_v4,
@@ -16,12 +35,15 @@ void test_prog_run_xattr(void)
 		.data_size_out = 5,
 	};
 
-	err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj,
-			    &tattr.prog_fd);
-	if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+	stats_fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
+	if (CHECK_ATTR(stats_fd < 0, "enable_stats", "failed %d\n", errno))
 		return;
 
-	memset(buf, 0, sizeof(buf));
+	skel = test_pkt_access__open_and_load();
+	if (CHECK_ATTR(!skel, "open_and_load", "failed\n"))
+		goto cleanup;
+
+	tattr.prog_fd = bpf_program__fd(skel->progs.test_pkt_access);
 
 	err = bpf_prog_test_run_xattr(&tattr);
 	CHECK_ATTR(err != -1 || errno != ENOSPC || tattr.retval, "run",
@@ -34,8 +56,12 @@ void test_prog_run_xattr(void)
 	CHECK_ATTR(buf[5] != 0, "overflow",
 	      "BPF_PROG_TEST_RUN ignored size hint\n");
 
+	run_cnt += tattr.repeat;
+	check_run_cnt(tattr.prog_fd, run_cnt);
+
 	tattr.data_out = NULL;
 	tattr.data_size_out = 0;
+	tattr.repeat = 2;
 	errno = 0;
 
 	err = bpf_prog_test_run_xattr(&tattr);
@@ -46,5 +72,12 @@ void test_prog_run_xattr(void)
 	err = bpf_prog_test_run_xattr(&tattr);
 	CHECK_ATTR(err != -EINVAL, "run_wrong_size_out", "err %d\n", err);
 
-	bpf_object__close(obj);
+	run_cnt += tattr.repeat;
+	check_run_cnt(tattr.prog_fd, run_cnt);
+
+cleanup:
+	if (skel)
+		test_pkt_access__destroy(skel);
+	if (stats_fd != -1)
+		close(stats_fd);
 }
-- 
cgit v1.2.3


From b4f894633fa14d7d46ba7676f950b90a401504bb Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Wed, 3 Mar 2021 10:18:16 +0000
Subject: selftests: bpf: Don't run sk_lookup in verifier tests

sk_lookup doesn't allow setting data_in for bpf_prog_run. This doesn't
play well with the verifier tests, since they always set a 64 byte
input buffer. Allow not running verifier tests by setting
bpf_test.runs to a negative value and don't run the ctx access case
for sk_lookup. We have dedicated ctx access tests so skipping here
doesn't reduce coverage.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210303101816.36774-6-lmb@cloudflare.com
---
 tools/testing/selftests/bpf/test_verifier.c          | 4 ++--
 tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 58b5a349d3ba..1512092e1e68 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -105,7 +105,7 @@ struct bpf_test {
 	enum bpf_prog_type prog_type;
 	uint8_t flags;
 	void (*fill_helper)(struct bpf_test *self);
-	uint8_t runs;
+	int runs;
 #define bpf_testdata_struct_t					\
 	struct {						\
 		uint32_t retval, retval_unpriv;			\
@@ -1165,7 +1165,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 
 	run_errs = 0;
 	run_successes = 0;
-	if (!alignment_prevented_execution && fd_prog >= 0) {
+	if (!alignment_prevented_execution && fd_prog >= 0 && test->runs >= 0) {
 		uint32_t expected_val;
 		int i;
 
diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
index fb13ca2d5606..d78627be060f 100644
--- a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
+++ b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
@@ -239,6 +239,7 @@
 	.result = ACCEPT,
 	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
 	.expected_attach_type = BPF_SK_LOOKUP,
+	.runs = -1,
 },
 /* invalid 8-byte reads from a 4-byte fields in bpf_sk_lookup */
 {
-- 
cgit v1.2.3


From 46ac034f769fcd50d3d554041a3879a0cdf2ee57 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Wed, 3 Mar 2021 15:20:35 +0800
Subject: bpf: Simplify the calculation of variables

Fix the following coccicheck warnings:

./tools/bpf/bpf_dbg.c:1201:55-57: WARNING !A || A && B is equivalent to
!A || B.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/1614756035-111280-1-git-send-email-jiapeng.chong@linux.alibaba.com
---
 tools/bpf/bpf_dbg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/bpf/bpf_dbg.c b/tools/bpf/bpf_dbg.c
index a07dfc479270..00e560a17baf 100644
--- a/tools/bpf/bpf_dbg.c
+++ b/tools/bpf/bpf_dbg.c
@@ -1198,7 +1198,7 @@ static int cmd_run(char *num)
 		else
 			return CMD_OK;
 		bpf_reset();
-	} while (pcap_next_pkt() && (!has_limit || (has_limit && ++i < pkts)));
+	} while (pcap_next_pkt() && (!has_limit || (++i < pkts)));
 
 	rl_printf("bpf passes:%u fails:%u\n", pass, fail);
 
-- 
cgit v1.2.3


From bce8623135fbe54bd86797df72cb85bfe4118b6e Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Wed, 3 Mar 2021 15:52:10 +0800
Subject: selftests/bpf: Simplify the calculation of variables

Fix the following coccicheck warnings:

./tools/testing/selftests/bpf/test_sockmap.c:735:35-37: WARNING !A || A
&& B is equivalent to !A || B.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/1614757930-17197-1-git-send-email-jiapeng.chong@linux.alibaba.com
---
 tools/testing/selftests/bpf/test_sockmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 427ca00a3217..eefd445b96fc 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -732,7 +732,7 @@ static int sendmsg_test(struct sockmap_options *opt)
 		 * socket is not a valid test. So in this case lets not
 		 * enable kTLS but still run the test.
 		 */
-		if (!txmsg_redir || (txmsg_redir && txmsg_ingress)) {
+		if (!txmsg_redir || txmsg_ingress) {
 			err = sockmap_init_ktls(opt->verbose, rx_fd);
 			if (err)
 				return err;
-- 
cgit v1.2.3


From d01b59c9ae94560fbcceaafeef39784d72765033 Mon Sep 17 00:00:00 2001
From: Xuesen Huang <huangxuesen@kuaishou.com>
Date: Thu, 4 Mar 2021 14:40:46 +0800
Subject: bpf: Add bpf_skb_adjust_room flag BPF_F_ADJ_ROOM_ENCAP_L2_ETH

bpf_skb_adjust_room sets the inner_protocol as skb->protocol for packets
encapsulation. But that is not appropriate when pushing Ethernet header.

Add an option to further specify encap L2 type and set the inner_protocol
as ETH_P_TEB.

Suggested-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Xuesen Huang <huangxuesen@kuaishou.com>
Signed-off-by: Zhiyong Cheng <chengzhiyong@kuaishou.com>
Signed-off-by: Li Wang <wangli09@kuaishou.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Willem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/bpf/20210304064046.6232-1-hxseverything@gmail.com
---
 include/uapi/linux/bpf.h       |  5 +++++
 net/core/filter.c              | 11 ++++++++++-
 tools/include/uapi/linux/bpf.h |  5 +++++
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 7f530e349aff..2d3036e292a9 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2484,6 +2484,10 @@ union bpf_attr {
  *		  Use with ENCAP_L3/L4 flags to further specify the tunnel
  *		  type; *len* is the length of the inner MAC header.
  *
+ *		* **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
+ *		  Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
+ *		  L2 type as Ethernet.
+ *
  * 		A call to this helper is susceptible to change the underlying
  * 		packet buffer. Therefore, at load time, all checks on pointers
  * 		previously done by the verifier are invalidated and must be
@@ -4916,6 +4920,7 @@ enum {
 	BPF_F_ADJ_ROOM_ENCAP_L4_GRE	= (1ULL << 3),
 	BPF_F_ADJ_ROOM_ENCAP_L4_UDP	= (1ULL << 4),
 	BPF_F_ADJ_ROOM_NO_CSUM_RESET	= (1ULL << 5),
+	BPF_F_ADJ_ROOM_ENCAP_L2_ETH	= (1ULL << 6),
 };
 
 enum {
diff --git a/net/core/filter.c b/net/core/filter.c
index a526db494c62..588b19ba0da8 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3409,6 +3409,7 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
 					 BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
 					 BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
 					 BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
+					 BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
 					 BPF_F_ADJ_ROOM_ENCAP_L2( \
 					  BPF_ADJ_ROOM_ENCAP_L2_MASK))
 
@@ -3445,6 +3446,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
 		    flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
 			return -EINVAL;
 
+		if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH &&
+		    inner_mac_len < ETH_HLEN)
+			return -EINVAL;
+
 		if (skb->encapsulation)
 			return -EALREADY;
 
@@ -3463,7 +3468,11 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
 		skb->inner_mac_header = inner_net - inner_mac_len;
 		skb->inner_network_header = inner_net;
 		skb->inner_transport_header = inner_trans;
-		skb_set_inner_protocol(skb, skb->protocol);
+
+		if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH)
+			skb_set_inner_protocol(skb, htons(ETH_P_TEB));
+		else
+			skb_set_inner_protocol(skb, skb->protocol);
 
 		skb->encapsulation = 1;
 		skb_set_network_header(skb, mac_len);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 7f530e349aff..2d3036e292a9 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2484,6 +2484,10 @@ union bpf_attr {
  *		  Use with ENCAP_L3/L4 flags to further specify the tunnel
  *		  type; *len* is the length of the inner MAC header.
  *
+ *		* **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
+ *		  Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
+ *		  L2 type as Ethernet.
+ *
  * 		A call to this helper is susceptible to change the underlying
  * 		packet buffer. Therefore, at load time, all checks on pointers
  * 		previously done by the verifier are invalidated and must be
@@ -4916,6 +4920,7 @@ enum {
 	BPF_F_ADJ_ROOM_ENCAP_L4_GRE	= (1ULL << 3),
 	BPF_F_ADJ_ROOM_ENCAP_L4_UDP	= (1ULL << 4),
 	BPF_F_ADJ_ROOM_NO_CSUM_RESET	= (1ULL << 5),
+	BPF_F_ADJ_ROOM_ENCAP_L2_ETH	= (1ULL << 6),
 };
 
 enum {
-- 
cgit v1.2.3


From 256becd450172eec74566f1aa7819ce80181d7e1 Mon Sep 17 00:00:00 2001
From: Xuesen Huang <huangxuesen@kuaishou.com>
Date: Fri, 5 Mar 2021 20:33:47 +0800
Subject: selftests, bpf: Extend test_tc_tunnel test with vxlan

Add BPF_F_ADJ_ROOM_ENCAP_L2_ETH flag to the existing tests which
encapsulates the ethernet as the inner l2 header.

Update a vxlan encapsulation test case.

Signed-off-by: Xuesen Huang <huangxuesen@kuaishou.com>
Signed-off-by: Li Wang <wangli09@kuaishou.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Willem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/bpf/20210305123347.15311-1-hxseverything@gmail.com
---
 tools/testing/selftests/bpf/progs/test_tc_tunnel.c | 113 ++++++++++++++++++---
 tools/testing/selftests/bpf/test_tc_tunnel.sh      |  15 ++-
 2 files changed, 111 insertions(+), 17 deletions(-)

diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 37bce7a7c394..84cd63259554 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -24,14 +24,29 @@ static const int cfg_port = 8000;
 
 static const int cfg_udp_src = 20000;
 
+#define	L2_PAD_SZ	(sizeof(struct vxlanhdr) + ETH_HLEN)
+
 #define	UDP_PORT		5555
 #define	MPLS_OVER_UDP_PORT	6635
 #define	ETH_OVER_UDP_PORT	7777
+#define	VXLAN_UDP_PORT		8472
+
+#define	EXTPROTO_VXLAN	0x1
+
+#define	VXLAN_N_VID     (1u << 24)
+#define	VXLAN_VNI_MASK	bpf_htonl((VXLAN_N_VID - 1) << 8)
+#define	VXLAN_FLAGS     0x8
+#define	VXLAN_VNI       1
 
 /* MPLS label 1000 with S bit (last label) set and ttl of 255. */
 static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
 						     MPLS_LS_S_MASK | 0xff);
 
+struct vxlanhdr {
+	__be32 vx_flags;
+	__be32 vx_vni;
+} __attribute__((packed));
+
 struct gre_hdr {
 	__be16 flags;
 	__be16 protocol;
@@ -45,13 +60,13 @@ union l4hdr {
 struct v4hdr {
 	struct iphdr ip;
 	union l4hdr l4hdr;
-	__u8 pad[16];			/* enough space for L2 header */
+	__u8 pad[L2_PAD_SZ];		/* space for L2 header / vxlan header ... */
 } __attribute__((packed));
 
 struct v6hdr {
 	struct ipv6hdr ip;
 	union l4hdr l4hdr;
-	__u8 pad[16];			/* enough space for L2 header */
+	__u8 pad[L2_PAD_SZ];		/* space for L2 header / vxlan header ... */
 } __attribute__((packed));
 
 static __always_inline void set_ipv4_csum(struct iphdr *iph)
@@ -69,14 +84,15 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph)
 	iph->check = ~((csum & 0xffff) + (csum >> 16));
 }
 
-static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
-				      __u16 l2_proto)
+static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
+					__u16 l2_proto, __u16 ext_proto)
 {
 	__u16 udp_dst = UDP_PORT;
 	struct iphdr iph_inner;
 	struct v4hdr h_outer;
 	struct tcphdr tcph;
 	int olen, l2_len;
+	__u8 *l2_hdr = NULL;
 	int tcp_off;
 	__u64 flags;
 
@@ -141,7 +157,11 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
 		break;
 	case ETH_P_TEB:
 		l2_len = ETH_HLEN;
-		udp_dst = ETH_OVER_UDP_PORT;
+		if (ext_proto & EXTPROTO_VXLAN) {
+			udp_dst = VXLAN_UDP_PORT;
+			l2_len += sizeof(struct vxlanhdr);
+		} else
+			udp_dst = ETH_OVER_UDP_PORT;
 		break;
 	}
 	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
@@ -171,14 +191,26 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
 	}
 
 	/* add L2 encap (if specified) */
+	l2_hdr = (__u8 *)&h_outer + olen;
 	switch (l2_proto) {
 	case ETH_P_MPLS_UC:
-		*((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
+		*(__u32 *)l2_hdr = mpls_label;
 		break;
 	case ETH_P_TEB:
-		if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
-				       ETH_HLEN))
+		flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
+
+		if (ext_proto & EXTPROTO_VXLAN) {
+			struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
+
+			vxlan_hdr->vx_flags = VXLAN_FLAGS;
+			vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
+
+			l2_hdr += sizeof(struct vxlanhdr);
+		}
+
+		if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
 			return TC_ACT_SHOT;
+
 		break;
 	}
 	olen += l2_len;
@@ -214,14 +246,21 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
 	return TC_ACT_OK;
 }
 
-static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
 				      __u16 l2_proto)
+{
+	return __encap_ipv4(skb, encap_proto, l2_proto, 0);
+}
+
+static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+					__u16 l2_proto, __u16 ext_proto)
 {
 	__u16 udp_dst = UDP_PORT;
 	struct ipv6hdr iph_inner;
 	struct v6hdr h_outer;
 	struct tcphdr tcph;
 	int olen, l2_len;
+	__u8 *l2_hdr = NULL;
 	__u16 tot_len;
 	__u64 flags;
 
@@ -249,7 +288,11 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
 		break;
 	case ETH_P_TEB:
 		l2_len = ETH_HLEN;
-		udp_dst = ETH_OVER_UDP_PORT;
+		if (ext_proto & EXTPROTO_VXLAN) {
+			udp_dst = VXLAN_UDP_PORT;
+			l2_len += sizeof(struct vxlanhdr);
+		} else
+			udp_dst = ETH_OVER_UDP_PORT;
 		break;
 	}
 	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
@@ -267,7 +310,7 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
 		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
 		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
 		tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
-			  sizeof(h_outer.l4hdr.udp);
+			  sizeof(h_outer.l4hdr.udp) + l2_len;
 		h_outer.l4hdr.udp.check = 0;
 		h_outer.l4hdr.udp.len = bpf_htons(tot_len);
 		break;
@@ -278,13 +321,24 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
 	}
 
 	/* add L2 encap (if specified) */
+	l2_hdr = (__u8 *)&h_outer + olen;
 	switch (l2_proto) {
 	case ETH_P_MPLS_UC:
-		*((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
+		*(__u32 *)l2_hdr = mpls_label;
 		break;
 	case ETH_P_TEB:
-		if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
-				       ETH_HLEN))
+		flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
+
+		if (ext_proto & EXTPROTO_VXLAN) {
+			struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
+
+			vxlan_hdr->vx_flags = VXLAN_FLAGS;
+			vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
+
+			l2_hdr += sizeof(struct vxlanhdr);
+		}
+
+		if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
 			return TC_ACT_SHOT;
 		break;
 	}
@@ -309,6 +363,12 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
 	return TC_ACT_OK;
 }
 
+static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+				      __u16 l2_proto)
+{
+	return __encap_ipv6(skb, encap_proto, l2_proto, 0);
+}
+
 SEC("encap_ipip_none")
 int __encap_ipip_none(struct __sk_buff *skb)
 {
@@ -372,6 +432,17 @@ int __encap_udp_eth(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
+SEC("encap_vxlan_eth")
+int __encap_vxlan_eth(struct __sk_buff *skb)
+{
+	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+		return __encap_ipv4(skb, IPPROTO_UDP,
+				    ETH_P_TEB,
+				    EXTPROTO_VXLAN);
+	else
+		return TC_ACT_OK;
+}
+
 SEC("encap_sit_none")
 int __encap_sit_none(struct __sk_buff *skb)
 {
@@ -444,6 +515,17 @@ int __encap_ip6udp_eth(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
+SEC("encap_ip6vxlan_eth")
+int __encap_ip6vxlan_eth(struct __sk_buff *skb)
+{
+	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+		return __encap_ipv6(skb, IPPROTO_UDP,
+				    ETH_P_TEB,
+				    EXTPROTO_VXLAN);
+	else
+		return TC_ACT_OK;
+}
+
 static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
 {
 	char buf[sizeof(struct v6hdr)];
@@ -479,6 +561,9 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
 		case ETH_OVER_UDP_PORT:
 			olen += ETH_HLEN;
 			break;
+		case VXLAN_UDP_PORT:
+			olen += ETH_HLEN + sizeof(struct vxlanhdr);
+			break;
 		}
 		break;
 	default:
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
index 7c76b841b17b..c9dde9b9d987 100755
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -44,8 +44,8 @@ setup() {
 	# clamp route to reserve room for tunnel headers
 	ip -netns "${ns1}" -4 route flush table main
 	ip -netns "${ns1}" -6 route flush table main
-	ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1458 dev veth1
-	ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1438 dev veth1
+	ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1450 dev veth1
+	ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1430 dev veth1
 
 	sleep 1
 
@@ -105,6 +105,12 @@ if [[ "$#" -eq "0" ]]; then
 	echo "sit"
 	$0 ipv6 sit none 100
 
+	echo "ip4 vxlan"
+	$0 ipv4 vxlan eth 2000
+
+	echo "ip6 vxlan"
+	$0 ipv6 ip6vxlan eth 2000
+
 	for mac in none mpls eth ; do
 		echo "ip gre $mac"
 		$0 ipv4 gre $mac 100
@@ -214,6 +220,9 @@ if [[ "$tuntype" =~ "udp" ]]; then
 	targs="encap fou encap-sport auto encap-dport $dport"
 elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
 	ttype=$gretaptype
+elif [[ "$tuntype" =~ "vxlan" && "$mac" == "eth" ]]; then
+	ttype="vxlan"
+	targs="id 1 dstport 8472 udp6zerocsumrx"
 else
 	ttype=$tuntype
 	targs=""
@@ -242,7 +251,7 @@ if [[ "$tuntype" == "ip6udp" && "$mac" == "mpls" ]]; then
 elif [[ "$tuntype" =~ "udp" && "$mac" == "eth" ]]; then
 	# No support for TEB fou tunnel; expect failure.
 	expect_tun_fail=1
-elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
+elif [[ "$tuntype" =~ (gre|vxlan) && "$mac" == "eth" ]]; then
 	# Share ethernet address between tunnel/veth2 so L2 decap works.
 	ethaddr=$(ip netns exec "${ns2}" ip link show veth2 | \
 		  awk '/ether/ { print $2 }')
-- 
cgit v1.2.3


From 7820ee1c4757d888c2255b8eb7f74b8d1e5ac555 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 5 Mar 2021 19:37:35 +0100
Subject: Bluetooth: btbcm: Rewrite bindings in YAML and add reset

This rewrites the Broadcom bluetooth bindings in YAML and
adds a GPIO handle for the BT_RST_N line as used on some
platforms.

The Ingenic UART binding was using this binding in its
example DTS fragment, however mistakenly using "vcc-supply"
for what is called "vbat-supply". The proper DTS files
and the code in the kernel all use "vbat-supply" so
fix up the example in this patch so we ge a clean
check.

Cc: devicetree@vger.kernel.org
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 .../devicetree/bindings/net/broadcom-bluetooth.txt |  56 ----------
 .../bindings/net/broadcom-bluetooth.yaml           | 117 +++++++++++++++++++++
 .../devicetree/bindings/serial/ingenic,uart.yaml   |   2 +-
 3 files changed, 118 insertions(+), 57 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/net/broadcom-bluetooth.txt
 create mode 100644 Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml

diff --git a/Documentation/devicetree/bindings/net/broadcom-bluetooth.txt b/Documentation/devicetree/bindings/net/broadcom-bluetooth.txt
deleted file mode 100644
index a7d57ba5f2ac..000000000000
--- a/Documentation/devicetree/bindings/net/broadcom-bluetooth.txt
+++ /dev/null
@@ -1,56 +0,0 @@
-Broadcom Bluetooth Chips
----------------------
-
-This documents the binding structure and common properties for serial
-attached Broadcom devices.
-
-Serial attached Broadcom devices shall be a child node of the host UART
-device the slave device is attached to.
-
-Required properties:
-
- - compatible: should contain one of the following:
-   * "brcm,bcm20702a1"
-   * "brcm,bcm4329-bt"
-   * "brcm,bcm4330-bt"
-   * "brcm,bcm43438-bt"
-   * "brcm,bcm4345c5"
-   * "brcm,bcm43540-bt"
-   * "brcm,bcm4335a0"
-
-Optional properties:
-
- - max-speed: see Documentation/devicetree/bindings/serial/serial.yaml
- - shutdown-gpios: GPIO specifier, used to enable the BT module
- - device-wakeup-gpios: GPIO specifier, used to wakeup the controller
- - host-wakeup-gpios: GPIO specifier, used to wakeup the host processor.
-                      deprecated, replaced by interrupts and
-                      "host-wakeup" interrupt-names
- - clocks: 1 or 2 clocks as defined in clock-names below, in that order
- - clock-names: names for clock inputs, matching the clocks given
-   - "extclk": deprecated, replaced by "txco"
-   - "txco": external reference clock (not a standalone crystal)
-   - "lpo": external low power 32.768 kHz clock
- - vbat-supply: phandle to regulator supply for VBAT
- - vddio-supply: phandle to regulator supply for VDDIO
- - brcm,bt-pcm-int-params: configure PCM parameters via a 5-byte array
-    - sco-routing: 0 = PCM, 1 = Transport, 2 = Codec, 3 = I2S
-    - pcm-interface-rate: 128KBps, 256KBps, 512KBps, 1024KBps, 2048KBps
-    - pcm-frame-type: short, long
-    - pcm-sync-mode: slave, master
-    - pcm-clock-mode: slave, master
- - interrupts: must be one, used to wakeup the host processor
- - interrupt-names: must be "host-wakeup"
-
-Example:
-
-&uart2 {
-       pinctrl-names = "default";
-       pinctrl-0 = <&uart2_pins>;
-
-       bluetooth {
-               compatible = "brcm,bcm43438-bt";
-               max-speed = <921600>;
-               brcm,bt-pcm-int-params = [01 02 00 01 01];
-       };
-};
diff --git a/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml b/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml
new file mode 100644
index 000000000000..bdd6ca617e23
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml
@@ -0,0 +1,117 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/broadcom-bluetooth.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom Bluetooth Chips
+
+maintainers:
+  - Linus Walleij <linus.walleij@linaro.org>
+
+description:
+  This binding describes Broadcom UART-attached bluetooth chips.
+
+properties:
+  compatible:
+    enum:
+      - brcm,bcm20702a1
+      - brcm,bcm4329-bt
+      - brcm,bcm4330-bt
+      - brcm,bcm43438-bt
+      - brcm,bcm4345c5
+      - brcm,bcm43540-bt
+      - brcm,bcm4335a0
+
+  shutdown-gpios:
+    maxItems: 1
+    description: GPIO specifier for the line BT_REG_ON used to
+      power on the BT module
+
+  reset-gpios:
+    maxItems: 1
+    description: GPIO specifier for the line BT_RST_N used to
+      reset the BT module. This should be marked as
+      GPIO_ACTIVE_LOW.
+
+  device-wakeup-gpios:
+    maxItems: 1
+    description: GPIO specifier for the line BT_WAKE used to
+      wakeup the controller. This is using the BT_GPIO_0
+      pin on the chip when in use.
+
+  host-wakeup-gpios:
+    maxItems: 1
+    deprecated: true
+    description: GPIO specifier for the line HOST_WAKE used
+      to wakeup the host processor. This is using he BT_GPIO_1
+      pin on the chip when in use. This is deprecated and replaced
+      by interrupts and "host-wakeup" interrupt-names
+
+  clocks:
+    maxItems: 2
+    description: 1 or 2 clocks as defined in clock-names below,
+      in that order
+
+  clock-names:
+    description: Names of the 1 to 2 supplied clocks
+    items:
+      - const: txco
+      - const: lpo
+      - const: extclk
+
+  vbat-supply:
+    description: phandle to regulator supply for VBAT
+
+  vddio-supply:
+    description: phandle to regulator supply for VDDIO
+
+  brcm,bt-pcm-int-params:
+    $ref: /schemas/types.yaml#/definitions/uint8-array
+    minItems: 5
+    maxItems: 5
+    description: |-
+      configure PCM parameters via a 5-byte array:
+       sco-routing: 0 = PCM, 1 = Transport, 2 = Codec, 3 = I2S
+       pcm-interface-rate: 128KBps, 256KBps, 512KBps, 1024KBps, 2048KBps
+       pcm-frame-type: short, long
+       pcm-sync-mode: slave, master
+       pcm-clock-mode: slave, master
+
+  interrupts:
+    items:
+      - description: Handle to the line HOST_WAKE used to wake
+          up the host processor. This uses the BT_GPIO_1 pin on
+          the chip when in use.
+
+  interrupt-names:
+    items:
+      - const: host-wakeup
+
+  max-speed: true
+  current-speed: true
+
+required:
+  - compatible
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    uart {
+        uart-has-rtscts;
+
+        bluetooth {
+            compatible = "brcm,bcm4330-bt";
+            max-speed = <921600>;
+            brcm,bt-pcm-int-params = [01 02 00 01 01];
+            shutdown-gpios = <&gpio 30 GPIO_ACTIVE_HIGH>;
+            device-wakeup-gpios = <&gpio 7 GPIO_ACTIVE_HIGH>;
+            reset-gpios = <&gpio 9 GPIO_ACTIVE_LOW>;
+            interrupt-parent = <&gpio>;
+            interrupts = <8 IRQ_TYPE_EDGE_FALLING>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/serial/ingenic,uart.yaml b/Documentation/devicetree/bindings/serial/ingenic,uart.yaml
index 559213899d73..7748d8c3bab8 100644
--- a/Documentation/devicetree/bindings/serial/ingenic,uart.yaml
+++ b/Documentation/devicetree/bindings/serial/ingenic,uart.yaml
@@ -91,7 +91,7 @@ examples:
       bluetooth {
         compatible = "brcm,bcm4330-bt";
         reset-gpios = <&gpf 8 GPIO_ACTIVE_HIGH>;
-        vcc-supply = <&wlan0_power>;
+        vbat-supply = <&wlan0_power>;
         device-wakeup-gpios = <&gpf 5 GPIO_ACTIVE_HIGH>;
         host-wakeup-gpios = <&gpf 6 GPIO_ACTIVE_HIGH>;
         shutdown-gpios = <&gpf 4 GPIO_ACTIVE_LOW>;
-- 
cgit v1.2.3


From 2f40796671f03c0eb193f44c7d4a6855ab9b30f6 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 5 Mar 2021 19:37:36 +0100
Subject: Bluetooth: btbcm: Obtain and handle reset GPIO

Obtain and drive the optional reset GPIO line if this is
not hardwired in the platform. This is needed on the
Samsung GT-I9070 mobile phone.

The invers of power is used, this should be OK to apply
simultaneously as the power signal according to
figures 47-51 on pages 159-161 in the BCM4330 datasheet.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_bcm.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index 3764ceb6fa0d..93a4c03a8787 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -68,6 +68,8 @@ struct bcm_device_data {
  *	deassert = Bluetooth device may sleep when sleep criteria are met
  * @shutdown: BT_REG_ON pin,
  *	power up or power down Bluetooth device internal regulators
+ * @reset: BT_RST_N pin,
+ *	active low resets the Bluetooth logic core
  * @set_device_wakeup: callback to toggle BT_WAKE pin
  *	either by accessing @device_wakeup or by calling @btlp
  * @set_shutdown: callback to toggle BT_REG_ON pin
@@ -101,6 +103,7 @@ struct bcm_device {
 	const char		*name;
 	struct gpio_desc	*device_wakeup;
 	struct gpio_desc	*shutdown;
+	struct gpio_desc	*reset;
 	int			(*set_device_wakeup)(struct bcm_device *, bool);
 	int			(*set_shutdown)(struct bcm_device *, bool);
 #ifdef CONFIG_ACPI
@@ -985,6 +988,15 @@ static int bcm_gpio_set_device_wakeup(struct bcm_device *dev, bool awake)
 static int bcm_gpio_set_shutdown(struct bcm_device *dev, bool powered)
 {
 	gpiod_set_value_cansleep(dev->shutdown, powered);
+	if (dev->reset)
+		/*
+		 * The reset line is asserted on powerdown and deasserted
+		 * on poweron so the inverse of powered is used. Notice
+		 * that the GPIO line BT_RST_N needs to be specified as
+		 * active low in the device tree or similar system
+		 * description.
+		 */
+		gpiod_set_value_cansleep(dev->reset, !powered);
 	return 0;
 }
 
@@ -1050,6 +1062,11 @@ static int bcm_get_resources(struct bcm_device *dev)
 	if (IS_ERR(dev->shutdown))
 		return PTR_ERR(dev->shutdown);
 
+	dev->reset = devm_gpiod_get_optional(dev->dev, "reset",
+					     GPIOD_OUT_LOW);
+	if (IS_ERR(dev->reset))
+		return PTR_ERR(dev->reset);
+
 	dev->set_device_wakeup = bcm_gpio_set_device_wakeup;
 	dev->set_shutdown = bcm_gpio_set_shutdown;
 
-- 
cgit v1.2.3


From 62acbbb661a1a7eb3ee155fe029a4901f4d5cc45 Mon Sep 17 00:00:00 2001
From: Kiran K <kiran.k@intel.com>
Date: Mon, 8 Mar 2021 16:44:54 +0530
Subject: Bluetooth: btusb: print firmware file name on error loading firmware

print the firmware file name on error loading from fie system

Signed-off-by: Kiran K <kiran.k@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btusb.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 0c20226d3b63..0d355bb45e08 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -2540,7 +2540,8 @@ static int btusb_intel_download_firmware_newgen(struct hci_dev *hdev,
 	btusb_setup_intel_newgen_get_fw_name(ver, fwname, sizeof(fwname), "sfi");
 	err = request_firmware(&fw, fwname, &hdev->dev);
 	if (err < 0) {
-		bt_dev_err(hdev, "Failed to load Intel firmware file (%d)", err);
+		bt_dev_err(hdev, "Failed to load Intel firmware file %s (%d)",
+			   fwname, err);
 		return err;
 	}
 
@@ -2734,7 +2735,8 @@ static int btusb_intel_download_firmware(struct hci_dev *hdev,
 
 	err = request_firmware(&fw, fwname, &hdev->dev);
 	if (err < 0) {
-		bt_dev_err(hdev, "Failed to load Intel firmware file (%d)", err);
+		bt_dev_err(hdev, "Failed to load Intel firmware file %s (%d)",
+			   fwname, err);
 		return err;
 	}
 
-- 
cgit v1.2.3


From d00745da644d42c2f97293eb3fe19cfd5c0b073c Mon Sep 17 00:00:00 2001
From: Kiran K <kiran.k@intel.com>
Date: Mon, 8 Mar 2021 18:46:01 +0530
Subject: Bluetooth: btintel: Fix offset calculation boot address parameter

Boot address parameter was not getting updated properly
due to wrong offset

Signed-off-by: Kiran K <kiran.k@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btintel.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c
index 88ce5f0ffc4b..fa97324454ea 100644
--- a/drivers/bluetooth/btintel.c
+++ b/drivers/bluetooth/btintel.c
@@ -863,7 +863,8 @@ static int btintel_download_firmware_payload(struct hci_dev *hdev,
 			/* The boot parameter is the first 32-bit value
 			 * and rest of 3 octets are reserved.
 			 */
-			*boot_param = get_unaligned_le32(fw_ptr + sizeof(*cmd));
+			*boot_param = get_unaligned_le32(fw_ptr + frag_len +
+							 sizeof(*cmd));
 
 			bt_dev_dbg(hdev, "boot_param=0x%x", *boot_param);
 		}
-- 
cgit v1.2.3


From 299194a91451263020c73dd2a3b7e0218c88dbd0 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 5 Mar 2021 14:40:50 +0100
Subject: selftests/bpf: Fix test_attach_probe for powerpc uprobes

When testing uprobes we the test gets GEP (Global Entry Point)
address from kallsyms, but then the function is called locally
so the uprobe is not triggered.

Fixing this by adjusting the address to LEP (Local Entry Point)
for powerpc arch plus instruction check stolen from ppc_function_entry
function pointed out and explained by Michael and Naveen.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Link: https://lore.kernel.org/bpf/20210305134050.139840-1-jolsa@kernel.org
---
 .../selftests/bpf/prog_tests/attach_probe.c        | 40 +++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index a0ee87c8e1ea..9dc4e3dfbcf3 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -2,6 +2,44 @@
 #include <test_progs.h>
 #include "test_attach_probe.skel.h"
 
+#if defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2
+
+#define OP_RT_RA_MASK   0xffff0000UL
+#define LIS_R2          0x3c400000UL
+#define ADDIS_R2_R12    0x3c4c0000UL
+#define ADDI_R2_R2      0x38420000UL
+
+static ssize_t get_offset(ssize_t addr, ssize_t base)
+{
+	u32 *insn = (u32 *) addr;
+
+	/*
+	 * A PPC64 ABIv2 function may have a local and a global entry
+	 * point. We need to use the local entry point when patching
+	 * functions, so identify and step over the global entry point
+	 * sequence.
+	 *
+	 * The global entry point sequence is always of the form:
+	 *
+	 * addis r2,r12,XXXX
+	 * addi  r2,r2,XXXX
+	 *
+	 * A linker optimisation may convert the addis to lis:
+	 *
+	 * lis   r2,XXXX
+	 * addi  r2,r2,XXXX
+	 */
+	if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
+	     ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
+	    ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2))
+		return (ssize_t)(insn + 2) - base;
+	else
+		return addr - base;
+}
+#else
+#define get_offset(addr, base) (addr - base)
+#endif
+
 ssize_t get_base_addr() {
 	size_t start, offset;
 	char buf[256];
@@ -36,7 +74,7 @@ void test_attach_probe(void)
 	if (CHECK(base_addr < 0, "get_base_addr",
 		  "failed to find base addr: %zd", base_addr))
 		return;
-	uprobe_offset = (size_t)&get_base_addr - base_addr;
+	uprobe_offset = get_offset((size_t)&get_base_addr, base_addr);
 
 	skel = test_attach_probe__open_and_load();
 	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
-- 
cgit v1.2.3


From a23b3f5697e6cf8affa7adf3e967e5ab569ea757 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Fri, 5 Mar 2021 10:41:12 +0100
Subject: xsk: Update rings for load-acquire/store-release barriers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, the AF_XDP rings uses general smp_{r,w,}mb() barriers on
the kernel-side. On most modern architectures
load-acquire/store-release barriers perform better, and results in
simpler code for circular ring buffers.

This change updates the XDP socket rings to use
load-acquire/store-release barriers.

It is important to note that changing from the old smp_{r,w,}mb()
barriers, to load-acquire/store-release barriers does not break
compatibility. The old semantics work with the new one, and vice
versa.

As pointed out by "Documentation/memory-barriers.txt" in the "SMP
BARRIER PAIRING" section:

  "General barriers pair with each other, though they also pair with
  most other types of barriers, albeit without multicopy atomicity.
  An acquire barrier pairs with a release barrier, but both may also
  pair with other barriers, including of course general barriers."

How different barriers behaves and pairs is outlined in
"tools/memory-model/Documentation/cheatsheet.txt".

In order to make sure that compatibility is not broken, LKMM herd7
based litmus tests can be constructed and verified.

We generalize the XDP socket ring to a one entry ring, and create two
scenarios; One where the ring is full, where only the consumer can
proceed, followed by the producer. One where the ring is empty, where
only the producer can proceed, followed by the consumer. Each scenario
is then expanded to four different tests: general producer/general
consumer, general producer/acqrel consumer, acqrel producer/general
consumer, acqrel producer/acqrel consumer. In total eight tests.

The empty ring test:
  C spsc-rb+empty

  // Simple one entry ring:
  // prod cons     allowed action       prod cons
  //    0    0 =>       prod          =>   1    0
  //    0    1 =>       cons          =>   0    0
  //    1    0 =>       cons          =>   1    1
  //    1    1 =>       prod          =>   0    1

  {}

  // We start at prod==0, cons==0, data==0, i.e. nothing has been
  // written to the ring. From here only the producer can start, and
  // should write 1. Afterwards, consumer can continue and read 1 to
  // data. Can we enter state prod==1, cons==1, but consumer observed
  // the incorrect value of 0?

  P0(int *prod, int *cons, int *data)
  {
     ... producer
  }

  P1(int *prod, int *cons, int *data)
  {
     ... consumer
  }

  exists( 1:d=0 /\ prod=1 /\ cons=1 );

The full ring test:
  C spsc-rb+full

  // Simple one entry ring:
  // prod cons     allowed action       prod cons
  //    0    0 =>       prod          =>   1    0
  //    0    1 =>       cons          =>   0    0
  //    1    0 =>       cons          =>   1    1
  //    1    1 =>       prod          =>   0    1

  { prod = 1; }

  // We start at prod==1, cons==0, data==1, i.e. producer has
  // written 0, so from here only the consumer can start, and should
  // consume 0. Afterwards, producer can continue and write 1 to
  // data. Can we enter state prod==0, cons==1, but consumer observed
  // the write of 1?

  P0(int *prod, int *cons, int *data)
  {
    ... producer
  }

  P1(int *prod, int *cons, int *data)
  {
    ... consumer
  }

  exists( 1:d=1 /\ prod=0 /\ cons=1 );

where P0 and P1 are:

  P0(int *prod, int *cons, int *data)
  {
  	int p;

  	p = READ_ONCE(*prod);
  	if (READ_ONCE(*cons) == p) {
  		WRITE_ONCE(*data, 1);
  		smp_wmb();
  		WRITE_ONCE(*prod, p ^ 1);
  	}
  }

  P0(int *prod, int *cons, int *data)
  {
  	int p;

  	p = READ_ONCE(*prod);
  	if (READ_ONCE(*cons) == p) {
  		WRITE_ONCE(*data, 1);
  		smp_store_release(prod, p ^ 1);
  	}
  }

  P1(int *prod, int *cons, int *data)
  {
  	int c;
  	int d = -1;

  	c = READ_ONCE(*cons);
  	if (READ_ONCE(*prod) != c) {
  		smp_rmb();
  		d = READ_ONCE(*data);
  		smp_mb();
  		WRITE_ONCE(*cons, c ^ 1);
  	}
  }

  P1(int *prod, int *cons, int *data)
  {
  	int c;
  	int d = -1;

  	c = READ_ONCE(*cons);
  	if (smp_load_acquire(prod) != c) {
  		d = READ_ONCE(*data);
  		smp_store_release(cons, c ^ 1);
  	}
  }

The full LKMM litmus tests are found at [1].

On x86-64 systems the l2fwd AF_XDP xdpsock sample performance
increases by 1%. This is mostly due to that the smp_mb() is removed,
which is a relatively expensive operation on these
platforms. Weakly-ordered platforms, such as ARM64 might benefit even
more.

[1] https://github.com/bjoto/litmus-xsk

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20210305094113.413544-2-bjorn.topel@gmail.com
---
 net/xdp/xsk_queue.h | 30 +++++++++++++-----------------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 2823b7c3302d..2ac3802c2cd7 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -47,19 +47,18 @@ struct xsk_queue {
 	u64 queue_empty_descs;
 };
 
-/* The structure of the shared state of the rings are the same as the
- * ring buffer in kernel/events/ring_buffer.c. For the Rx and completion
- * ring, the kernel is the producer and user space is the consumer. For
- * the Tx and fill rings, the kernel is the consumer and user space is
- * the producer.
+/* The structure of the shared state of the rings are a simple
+ * circular buffer, as outlined in
+ * Documentation/core-api/circular-buffers.rst. For the Rx and
+ * completion ring, the kernel is the producer and user space is the
+ * consumer. For the Tx and fill rings, the kernel is the consumer and
+ * user space is the producer.
  *
  * producer                         consumer
  *
- * if (LOAD ->consumer) {           LOAD ->producer
- *                    (A)           smp_rmb()       (C)
+ * if (LOAD ->consumer) {  (A)      LOAD.acq ->producer  (C)
  *    STORE $data                   LOAD $data
- *    smp_wmb()       (B)           smp_mb()        (D)
- *    STORE ->producer              STORE ->consumer
+ *    STORE.rel ->producer (B)      STORE.rel ->consumer (D)
  * }
  *
  * (A) pairs with (D), and (B) pairs with (C).
@@ -78,7 +77,8 @@ struct xsk_queue {
  *
  * (A) is a control dependency that separates the load of ->consumer
  * from the stores of $data. In case ->consumer indicates there is no
- * room in the buffer to store $data we do not. So no barrier is needed.
+ * room in the buffer to store $data we do not. The dependency will
+ * order both of the stores after the loads. So no barrier is needed.
  *
  * (D) protects the load of the data to be observed to happen after the
  * store of the consumer pointer. If we did not have this memory
@@ -227,15 +227,13 @@ static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q,
 
 static inline void __xskq_cons_release(struct xsk_queue *q)
 {
-	smp_mb(); /* D, matches A */
-	WRITE_ONCE(q->ring->consumer, q->cached_cons);
+	smp_store_release(&q->ring->consumer, q->cached_cons); /* D, matchees A */
 }
 
 static inline void __xskq_cons_peek(struct xsk_queue *q)
 {
 	/* Refresh the local pointer */
-	q->cached_prod = READ_ONCE(q->ring->producer);
-	smp_rmb(); /* C, matches B */
+	q->cached_prod = smp_load_acquire(&q->ring->producer);  /* C, matches B */
 }
 
 static inline void xskq_cons_get_entries(struct xsk_queue *q)
@@ -397,9 +395,7 @@ static inline int xskq_prod_reserve_desc(struct xsk_queue *q,
 
 static inline void __xskq_prod_submit(struct xsk_queue *q, u32 idx)
 {
-	smp_wmb(); /* B, matches C */
-
-	WRITE_ONCE(q->ring->producer, idx);
+	smp_store_release(&q->ring->producer, idx); /* B, matches C */
 }
 
 static inline void xskq_prod_submit(struct xsk_queue *q)
-- 
cgit v1.2.3


From 291471dd1559528a4c2ad5026eff94ed1030562b Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Fri, 5 Mar 2021 10:41:13 +0100
Subject: libbpf, xsk: Add libbpf_smp_store_release libbpf_smp_load_acquire
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that the AF_XDP rings have load-acquire/store-release semantics,
move libbpf to that as well.

The library-internal libbpf_smp_{load_acquire,store_release} are only
valid for 32-bit words on ARM64.

Also, remove the barriers that are no longer in use.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20210305094113.413544-3-bjorn.topel@gmail.com
---
 tools/lib/bpf/libbpf_util.h | 72 +++++++++++++++++++++++++++++++--------------
 tools/lib/bpf/xsk.h         | 17 ++++-------
 2 files changed, 55 insertions(+), 34 deletions(-)

diff --git a/tools/lib/bpf/libbpf_util.h b/tools/lib/bpf/libbpf_util.h
index 59c779c5790c..94a0d7bb6f3c 100644
--- a/tools/lib/bpf/libbpf_util.h
+++ b/tools/lib/bpf/libbpf_util.h
@@ -5,6 +5,7 @@
 #define __LIBBPF_LIBBPF_UTIL_H
 
 #include <stdbool.h>
+#include <linux/compiler.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -15,29 +16,56 @@ extern "C" {
  * application that uses libbpf.
  */
 #if defined(__i386__) || defined(__x86_64__)
-# define libbpf_smp_rmb() asm volatile("" : : : "memory")
-# define libbpf_smp_wmb() asm volatile("" : : : "memory")
-# define libbpf_smp_mb() \
-	asm volatile("lock; addl $0,-4(%%rsp)" : : : "memory", "cc")
-/* Hinders stores to be observed before older loads. */
-# define libbpf_smp_rwmb() asm volatile("" : : : "memory")
+# define libbpf_smp_store_release(p, v)					\
+	do {								\
+		asm volatile("" : : : "memory");			\
+		WRITE_ONCE(*p, v);					\
+	} while (0)
+# define libbpf_smp_load_acquire(p)					\
+	({								\
+		typeof(*p) ___p1 = READ_ONCE(*p);			\
+		asm volatile("" : : : "memory");			\
+		___p1;							\
+	})
 #elif defined(__aarch64__)
-# define libbpf_smp_rmb() asm volatile("dmb ishld" : : : "memory")
-# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
-# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
-# define libbpf_smp_rwmb() libbpf_smp_mb()
-#elif defined(__arm__)
-/* These are only valid for armv7 and above */
-# define libbpf_smp_rmb() asm volatile("dmb ish" : : : "memory")
-# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
-# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
-# define libbpf_smp_rwmb() libbpf_smp_mb()
-#else
-/* Architecture missing native barrier functions. */
-# define libbpf_smp_rmb() __sync_synchronize()
-# define libbpf_smp_wmb() __sync_synchronize()
-# define libbpf_smp_mb() __sync_synchronize()
-# define libbpf_smp_rwmb() __sync_synchronize()
+# define libbpf_smp_store_release(p, v)					\
+		asm volatile ("stlr %w1, %0" : "=Q" (*p) : "r" (v) : "memory")
+# define libbpf_smp_load_acquire(p)					\
+	({								\
+		typeof(*p) ___p1;					\
+		asm volatile ("ldar %w0, %1"				\
+			      : "=r" (___p1) : "Q" (*p) : "memory");	\
+		__p1;							\
+	})
+#elif defined(__riscv)
+# define libbpf_smp_store_release(p, v)					\
+	do {								\
+		asm volatile ("fence rw,w" : : : "memory");		\
+		WRITE_ONCE(*p, v);					\
+	} while (0)
+# define libbpf_smp_load_acquire(p)					\
+	({								\
+		typeof(*p) ___p1 = READ_ONCE(*p);			\
+		asm volatile ("fence r,rw" : : : "memory");		\
+		___p1;							\
+	})
+#endif
+
+#ifndef libbpf_smp_store_release
+#define libbpf_smp_store_release(p, v)					\
+	do {								\
+		__sync_synchronize();					\
+		WRITE_ONCE(*p, v);					\
+	} while (0)
+#endif
+
+#ifndef libbpf_smp_load_acquire
+#define libbpf_smp_load_acquire(p)					\
+	({								\
+		typeof(*p) ___p1 = READ_ONCE(*p);			\
+		__sync_synchronize();					\
+		___p1;							\
+	})
 #endif
 
 #ifdef __cplusplus
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index e9f121f5d129..a9fdea87b5cd 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -96,7 +96,8 @@ static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
 	 * this function. Without this optimization it whould have been
 	 * free_entries = r->cached_prod - r->cached_cons + r->size.
 	 */
-	r->cached_cons = *r->consumer + r->size;
+	r->cached_cons = libbpf_smp_load_acquire(r->consumer);
+	r->cached_cons += r->size;
 
 	return r->cached_cons - r->cached_prod;
 }
@@ -106,7 +107,7 @@ static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
 	__u32 entries = r->cached_prod - r->cached_cons;
 
 	if (entries == 0) {
-		r->cached_prod = *r->producer;
+		r->cached_prod = libbpf_smp_load_acquire(r->producer);
 		entries = r->cached_prod - r->cached_cons;
 	}
 
@@ -129,9 +130,7 @@ static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb)
 	/* Make sure everything has been written to the ring before indicating
 	 * this to the kernel by writing the producer pointer.
 	 */
-	libbpf_smp_wmb();
-
-	*prod->producer += nb;
+	libbpf_smp_store_release(prod->producer, *prod->producer + nb);
 }
 
 static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx)
@@ -139,11 +138,6 @@ static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __
 	__u32 entries = xsk_cons_nb_avail(cons, nb);
 
 	if (entries > 0) {
-		/* Make sure we do not speculatively read the data before
-		 * we have received the packet buffers from the ring.
-		 */
-		libbpf_smp_rmb();
-
 		*idx = cons->cached_cons;
 		cons->cached_cons += entries;
 	}
@@ -161,9 +155,8 @@ static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb)
 	/* Make sure data has been read before indicating we are done
 	 * with the entries by updating the consumer pointer.
 	 */
-	libbpf_smp_rwmb();
+	libbpf_smp_store_release(cons->consumer, *cons->consumer + nb);
 
-	*cons->consumer += nb;
 }
 
 static inline void *xsk_umem__get_data(void *umem_area, __u64 addr)
-- 
cgit v1.2.3


From a6aac408c56112f73d28ea8567c29b2a7fe8fccc Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Mon, 8 Mar 2021 19:25:22 +0100
Subject: libbpf: Fix arm64 build

The macro for libbpf_smp_store_release() doesn't build on arm64, fix it.

Fixes: 291471dd1559 ("libbpf, xsk: Add libbpf_smp_store_release libbpf_smp_load_acquire")
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210308182521.155536-1-jean-philippe@linaro.org
---
 tools/lib/bpf/libbpf_util.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/bpf/libbpf_util.h b/tools/lib/bpf/libbpf_util.h
index 94a0d7bb6f3c..cfbcfc063c81 100644
--- a/tools/lib/bpf/libbpf_util.h
+++ b/tools/lib/bpf/libbpf_util.h
@@ -35,7 +35,7 @@ extern "C" {
 		typeof(*p) ___p1;					\
 		asm volatile ("ldar %w0, %1"				\
 			      : "=r" (___p1) : "Q" (*p) : "memory");	\
-		__p1;							\
+		___p1;							\
 	})
 #elif defined(__riscv)
 # define libbpf_smp_store_release(p, v)					\
-- 
cgit v1.2.3


From a0d73acc1e4bc1c542701e37b2e0e233fe6a271d Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Mon, 8 Mar 2021 19:28:31 +0100
Subject: selftests/bpf: Fix typo in Makefile

The selftest build fails when trying to install the scripts:

rsync: [sender] link_stat "tools/testing/selftests/bpf/test_docs_build.sh" failed: No such file or directory (2)

Fix the filename.

Fixes: a01d935b2e09 ("tools/bpf: Remove bpf-helpers from bpftool docs")
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210308182830.155784-1-jean-philippe@linaro.org
---
 tools/testing/selftests/bpf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index b5827464c6b5..c3999587bc23 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -68,7 +68,7 @@ TEST_PROGS := test_kmod.sh \
 	test_bpftool_build.sh \
 	test_bpftool.sh \
 	test_bpftool_metadata.sh \
-	test_docs_build.sh \
+	test_doc_build.sh \
 	test_xsk.sh
 
 TEST_PROGS_EXTENDED := with_addr.sh \
-- 
cgit v1.2.3


From 018e3fa8e7ff087629e5285cc14d1a6ca9bebc8b Mon Sep 17 00:00:00 2001
From: Youghandhar Chintala <youghand@codeaurora.org>
Date: Wed, 24 Feb 2021 09:00:10 +0200
Subject: ath10k: skip the wait for completion to recovery in shutdown path

Currently in the shutdown callback we wait for recovery to complete
before freeing up the resources. This results in additional two seconds
delay during the shutdown and thereby increase the shutdown time.

As an attempt to take less time during shutdown, remove the wait for
recovery completion in the shutdown callback and added an API to freeing
the reosurces in which they were common for shutdown and removing
the module.

Tested-on: WCN3990 hw1.0 SNOC WLAN.HL.3.1-01040-QCAHLSWMTPLZ-1

Signed-off-by: Youghandhar Chintala <youghand@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210223142908.23374-1-youghand@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/snoc.c | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c
index d66593f0950f..ea00fbb15601 100644
--- a/drivers/net/wireless/ath/ath10k/snoc.c
+++ b/drivers/net/wireless/ath/ath10k/snoc.c
@@ -1759,17 +1759,11 @@ err_core_destroy:
 	return ret;
 }
 
-static int ath10k_snoc_remove(struct platform_device *pdev)
+static int ath10k_snoc_free_resources(struct ath10k *ar)
 {
-	struct ath10k *ar = platform_get_drvdata(pdev);
 	struct ath10k_snoc *ar_snoc = ath10k_snoc_priv(ar);
 
-	ath10k_dbg(ar, ATH10K_DBG_SNOC, "snoc remove\n");
-
-	reinit_completion(&ar->driver_recovery);
-
-	if (test_bit(ATH10K_SNOC_FLAG_RECOVERY, &ar_snoc->flags))
-		wait_for_completion_timeout(&ar->driver_recovery, 3 * HZ);
+	ath10k_dbg(ar, ATH10K_DBG_SNOC, "snoc free resources\n");
 
 	set_bit(ATH10K_SNOC_FLAG_UNREGISTERING, &ar_snoc->flags);
 
@@ -1783,12 +1777,29 @@ static int ath10k_snoc_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static int ath10k_snoc_remove(struct platform_device *pdev)
+{
+	struct ath10k *ar = platform_get_drvdata(pdev);
+	struct ath10k_snoc *ar_snoc = ath10k_snoc_priv(ar);
+
+	ath10k_dbg(ar, ATH10K_DBG_SNOC, "snoc remove\n");
+
+	reinit_completion(&ar->driver_recovery);
+
+	if (test_bit(ATH10K_SNOC_FLAG_RECOVERY, &ar_snoc->flags))
+		wait_for_completion_timeout(&ar->driver_recovery, 3 * HZ);
+
+	ath10k_snoc_free_resources(ar);
+
+	return 0;
+}
+
 static void ath10k_snoc_shutdown(struct platform_device *pdev)
 {
 	struct ath10k *ar = platform_get_drvdata(pdev);
 
 	ath10k_dbg(ar, ATH10K_DBG_SNOC, "snoc shutdown\n");
-	ath10k_snoc_remove(pdev);
+	ath10k_snoc_free_resources(ar);
 }
 
 static struct platform_driver ath10k_snoc_driver = {
-- 
cgit v1.2.3


From f277eb0500b4ee1cbe9db8615761f19b5a5520c9 Mon Sep 17 00:00:00 2001
From: Sriram R <srirrama@codeaurora.org>
Date: Wed, 24 Feb 2021 14:32:41 +0530
Subject: ath11k: Update signal filled flag during sta_statistics drv op

Currently, though the peer rssi information is updated to station dump
from driver sta_statistics mac op, the info doesn't get updated
since the NL80211_STA_INFO_SIGNAL filled flag is not set in station
info. Hence update this flag while filling the rssi info.

Tested on: IPQ8074 WLAN.HK.2.1.0.1-01213-QCAHKSWPL_SILICONZ-1

Signed-off-by: Sriram R <srirrama@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210224090241.3098-1-srirrama@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/mac.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index 3c1f35a204ba..32c7687d9ac2 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -6082,6 +6082,7 @@ static void ath11k_mac_op_sta_statistics(struct ieee80211_hw *hw,
 
 	/* TODO: Use real NF instead of default one. */
 	sinfo->signal = arsta->rssi_comb + ATH11K_DEFAULT_NOISE_FLOOR;
+	sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL);
 }
 
 static const struct ieee80211_ops ath11k_ops = {
-- 
cgit v1.2.3


From 26f6979237293e93d3f165a0f3af9d967596b2c4 Mon Sep 17 00:00:00 2001
From: Lavanya Suresh <lavaks@codeaurora.org>
Date: Fri, 26 Feb 2021 17:30:59 +0530
Subject: ath11k: Add support for STA to handle beacon miss

When AP goes down without any indication to STA, firmware detects
missing beacon, and sends wmi roam event with reason BEACON_MISS
to the host.

Added support for STA mode to trigger disassociation from AP,
on receiving this event from firmware.

Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.4.0.1-01717-QCAHKSWPL_SILICONZ-1
Tested-on: QCA6390 hw2.0 PCI WLAN.HST.1.0.1-01740-QCAHSTSWPLZ_V2_TO_X86-1

Signed-off-by: Lavanya Suresh <lavaks@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1614340859-28867-1-git-send-email-lavaks@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/core.h |  2 +
 drivers/net/wireless/ath/ath11k/mac.c  | 79 +++++++++++++++++++++++++++++++---
 drivers/net/wireless/ath/ath11k/mac.h  |  2 +
 drivers/net/wireless/ath/ath11k/wmi.c  | 11 ++---
 4 files changed, 81 insertions(+), 13 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h
index 9cdc4f0b0690..55af982deca7 100644
--- a/drivers/net/wireless/ath/ath11k/core.h
+++ b/drivers/net/wireless/ath/ath11k/core.h
@@ -34,6 +34,7 @@
 #define ATH11K_PRB_RSP_DROP_THRESHOLD ((ATH11K_TX_MGMT_TARGET_MAX_SUPPORT_WMI * 3) / 4)
 
 #define ATH11K_INVALID_HW_MAC_ID	0xFF
+#define ATH11K_CONNECTION_LOSS_HZ	(3 * HZ)
 
 extern unsigned int ath11k_frame_mode;
 
@@ -235,6 +236,7 @@ struct ath11k_vif {
 	u32 aid;
 	u8 bssid[ETH_ALEN];
 	struct cfg80211_bitrate_mask bitrate_mask;
+	struct delayed_work connection_loss_work;
 	int num_legacy_stations;
 	int rtscts_prot_mode;
 	int txpower;
diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index 32c7687d9ac2..bf6796973535 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -829,6 +829,75 @@ static void ath11k_control_beaconing(struct ath11k_vif *arvif,
 	ath11k_dbg(ar->ab, ATH11K_DBG_MAC, "mac vdev %d up\n", arvif->vdev_id);
 }
 
+static void ath11k_mac_handle_beacon_iter(void *data, u8 *mac,
+					  struct ieee80211_vif *vif)
+{
+	struct sk_buff *skb = data;
+	struct ieee80211_mgmt *mgmt = (void *)skb->data;
+	struct ath11k_vif *arvif = (void *)vif->drv_priv;
+
+	if (vif->type != NL80211_IFTYPE_STATION)
+		return;
+
+	if (!ether_addr_equal(mgmt->bssid, vif->bss_conf.bssid))
+		return;
+
+	cancel_delayed_work(&arvif->connection_loss_work);
+}
+
+void ath11k_mac_handle_beacon(struct ath11k *ar, struct sk_buff *skb)
+{
+	ieee80211_iterate_active_interfaces_atomic(ar->hw,
+						   IEEE80211_IFACE_ITER_NORMAL,
+						   ath11k_mac_handle_beacon_iter,
+						   skb);
+}
+
+static void ath11k_mac_handle_beacon_miss_iter(void *data, u8 *mac,
+					       struct ieee80211_vif *vif)
+{
+	u32 *vdev_id = data;
+	struct ath11k_vif *arvif = (void *)vif->drv_priv;
+	struct ath11k *ar = arvif->ar;
+	struct ieee80211_hw *hw = ar->hw;
+
+	if (arvif->vdev_id != *vdev_id)
+		return;
+
+	if (!arvif->is_up)
+		return;
+
+	ieee80211_beacon_loss(vif);
+
+	/* Firmware doesn't report beacon loss events repeatedly. If AP probe
+	 * (done by mac80211) succeeds but beacons do not resume then it
+	 * doesn't make sense to continue operation. Queue connection loss work
+	 * which can be cancelled when beacon is received.
+	 */
+	ieee80211_queue_delayed_work(hw, &arvif->connection_loss_work,
+				     ATH11K_CONNECTION_LOSS_HZ);
+}
+
+void ath11k_mac_handle_beacon_miss(struct ath11k *ar, u32 vdev_id)
+{
+	ieee80211_iterate_active_interfaces_atomic(ar->hw,
+						   IEEE80211_IFACE_ITER_NORMAL,
+						   ath11k_mac_handle_beacon_miss_iter,
+						   &vdev_id);
+}
+
+static void ath11k_mac_vif_sta_connection_loss_work(struct work_struct *work)
+{
+	struct ath11k_vif *arvif = container_of(work, struct ath11k_vif,
+						connection_loss_work.work);
+	struct ieee80211_vif *vif = arvif->vif;
+
+	if (!arvif->is_up)
+		return;
+
+	ieee80211_connection_loss(vif);
+}
+
 static void ath11k_peer_assoc_h_basic(struct ath11k *ar,
 				      struct ieee80211_vif *vif,
 				      struct ieee80211_sta *sta,
@@ -1760,7 +1829,7 @@ static void ath11k_bss_disassoc(struct ieee80211_hw *hw,
 
 	arvif->is_up = false;
 
-	/* TODO: cancel connection_loss_work */
+	cancel_delayed_work_sync(&arvif->connection_loss_work);
 }
 
 static u32 ath11k_mac_get_rate_hw_value(int bitrate)
@@ -4615,10 +4684,8 @@ static int ath11k_mac_op_add_interface(struct ieee80211_hw *hw,
 	arvif->vif = vif;
 
 	INIT_LIST_HEAD(&arvif->list);
-
-	/* Should we initialize any worker to handle connection loss indication
-	 * from firmware in sta mode?
-	 */
+	INIT_DELAYED_WORK(&arvif->connection_loss_work,
+			  ath11k_mac_vif_sta_connection_loss_work);
 
 	for (i = 0; i < ARRAY_SIZE(arvif->bitrate_mask.control); i++) {
 		arvif->bitrate_mask.control[i].legacy = 0xffffffff;
@@ -4827,6 +4894,8 @@ static void ath11k_mac_op_remove_interface(struct ieee80211_hw *hw,
 	int ret;
 	int i;
 
+	cancel_delayed_work_sync(&arvif->connection_loss_work);
+
 	mutex_lock(&ar->conf_mutex);
 
 	ath11k_dbg(ab, ATH11K_DBG_MAC, "mac remove interface (vdev %d)\n",
diff --git a/drivers/net/wireless/ath/ath11k/mac.h b/drivers/net/wireless/ath/ath11k/mac.h
index 455577905505..4bc59bdaf244 100644
--- a/drivers/net/wireless/ath/ath11k/mac.h
+++ b/drivers/net/wireless/ath/ath11k/mac.h
@@ -150,4 +150,6 @@ int ath11k_mac_tx_mgmt_pending_free(int buf_id, void *skb, void *ctx);
 u8 ath11k_mac_bw_to_mac80211_bw(u8 bw);
 enum ath11k_supported_bw ath11k_mac_mac80211_bw_to_ath11k_bw(enum rate_info_bw bw);
 enum hal_encrypt_type ath11k_dp_tx_get_encrypt_type(u32 cipher);
+void ath11k_mac_handle_beacon(struct ath11k *ar, struct sk_buff *skb);
+void ath11k_mac_handle_beacon_miss(struct ath11k *ar, u32 vdev_id);
 #endif
diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c
index ca5cda890d58..5ca2d80679b6 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.c
+++ b/drivers/net/wireless/ath/ath11k/wmi.c
@@ -6171,10 +6171,8 @@ static void ath11k_mgmt_rx_event(struct ath11k_base *ab, struct sk_buff *skb)
 		}
 	}
 
-	/* TODO: Pending handle beacon implementation
-	 *if (ieee80211_is_beacon(hdr->frame_control))
-	 *	ath11k_mac_handle_beacon(ar, skb);
-	 */
+	if (ieee80211_is_beacon(hdr->frame_control))
+		ath11k_mac_handle_beacon(ar, skb);
 
 	ath11k_dbg(ab, ATH11K_DBG_MGMT,
 		   "event mgmt rx skb %pK len %d ftype %02x stype %02x\n",
@@ -6393,10 +6391,7 @@ static void ath11k_roam_event(struct ath11k_base *ab, struct sk_buff *skb)
 
 	switch (roam_ev.reason) {
 	case WMI_ROAM_REASON_BEACON_MISS:
-		/* TODO: Pending beacon miss and connection_loss_work
-		 * implementation
-		 * ath11k_mac_handle_beacon_miss(ar, vdev_id);
-		 */
+		ath11k_mac_handle_beacon_miss(ar, roam_ev.vdev_id);
 		break;
 	case WMI_ROAM_REASON_BETTER_AP:
 	case WMI_ROAM_REASON_LOW_RSSI:
-- 
cgit v1.2.3


From 3fcd50d6f9a963a21e142d71be135eff6a374d2d Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Tue, 9 Mar 2021 01:56:48 +0100
Subject: selftests/bpf: Add BTF_KIND_FLOAT to test_core_reloc_size

Verify that bpf_core_field_size() is working correctly with floats.
Also document the required clang version.

Suggested-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210309005649.162480-2-iii@linux.ibm.com
---
 tools/testing/selftests/bpf/README.rst                   | 9 +++++++++
 tools/testing/selftests/bpf/prog_tests/core_reloc.c      | 1 +
 tools/testing/selftests/bpf/progs/core_reloc_types.h     | 5 +++++
 tools/testing/selftests/bpf/progs/test_core_reloc_size.c | 3 +++
 4 files changed, 18 insertions(+)

diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index dbc8f6cc5c67..3464161c8eea 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -170,3 +170,12 @@ failures:
 .. _2: https://reviews.llvm.org/D85174
 .. _3: https://reviews.llvm.org/D83878
 .. _4: https://reviews.llvm.org/D83242
+
+Floating-point tests and Clang version
+======================================
+
+Certain selftests, e.g. core_reloc, require support for the floating-point
+types, which was introduced in `Clang 13`__. The older Clang versions will
+either crash when compiling these tests, or generate an incorrect BTF.
+
+__  https://reviews.llvm.org/D83289
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 06eb956ff7bb..d94dcead72e6 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -266,6 +266,7 @@ static int duration = 0;
 		.arr_elem_sz = sizeof(((type *)0)->arr_field[0]),	\
 		.ptr_sz = 8, /* always 8-byte pointer for BPF */	\
 		.enum_sz = sizeof(((type *)0)->enum_field),		\
+		.float_sz = sizeof(((type *)0)->float_field),		\
 	}
 
 #define SIZE_CASE(name) {						\
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index 9a2850850121..9982eb969048 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -807,6 +807,7 @@ struct core_reloc_size_output {
 	int arr_elem_sz;
 	int ptr_sz;
 	int enum_sz;
+	int float_sz;
 };
 
 struct core_reloc_size {
@@ -816,6 +817,7 @@ struct core_reloc_size {
 	int arr_field[4];
 	void *ptr_field;
 	enum { VALUE = 123 } enum_field;
+	float float_field;
 };
 
 struct core_reloc_size___diff_sz {
@@ -825,6 +827,7 @@ struct core_reloc_size___diff_sz {
 	char arr_field[10];
 	void *ptr_field;
 	enum { OTHER_VALUE = 0xFFFFFFFFFFFFFFFF } enum_field;
+	double float_field;
 };
 
 /* Error case of two candidates with the fields (int_field) at the same
@@ -839,6 +842,7 @@ struct core_reloc_size___err_ambiguous1 {
 	int arr_field[4];
 	void *ptr_field;
 	enum { VALUE___1 = 123 } enum_field;
+	float float_field;
 };
 
 struct core_reloc_size___err_ambiguous2 {
@@ -850,6 +854,7 @@ struct core_reloc_size___err_ambiguous2 {
 	int arr_field[4];
 	void *ptr_field;
 	enum { VALUE___2 = 123 } enum_field;
+	float float_field;
 };
 
 /*
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
index d7fb6cfc7891..7b2d576aeea1 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
@@ -21,6 +21,7 @@ struct core_reloc_size_output {
 	int arr_elem_sz;
 	int ptr_sz;
 	int enum_sz;
+	int float_sz;
 };
 
 struct core_reloc_size {
@@ -30,6 +31,7 @@ struct core_reloc_size {
 	int arr_field[4];
 	void *ptr_field;
 	enum { VALUE = 123 } enum_field;
+	float float_field;
 };
 
 SEC("raw_tracepoint/sys_enter")
@@ -45,6 +47,7 @@ int test_core_size(void *ctx)
 	out->arr_elem_sz = bpf_core_field_size(in->arr_field[0]);
 	out->ptr_sz = bpf_core_field_size(in->ptr_field);
 	out->enum_sz = bpf_core_field_size(in->enum_field);
+	out->float_sz = bpf_core_field_size(in->float_field);
 
 	return 0;
 }
-- 
cgit v1.2.3


From ccb0e23ca27445e2408f52944660f9e5e5d1c4b1 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Tue, 9 Mar 2021 01:56:49 +0100
Subject: selftests/bpf: Add BTF_KIND_FLOAT to btf_dump_test_case_syntax

Check that dumping various floating-point types produces a valid C
code.

Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210309005649.162480-3-iii@linux.ibm.com
---
 tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
index 31975c96e2c9..12b40dc81e14 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
@@ -205,6 +205,12 @@ struct struct_with_embedded_stuff {
 	int t[11];
 };
 
+struct float_struct {
+	float f;
+	const double *d;
+	volatile long double *ld;
+};
+
 struct root_struct {
 	enum e1 _1;
 	enum e2 _2;
@@ -219,6 +225,7 @@ struct root_struct {
 	union_fwd_t *_12;
 	union_fwd_ptr_t _13;
 	struct struct_with_embedded_stuff _14;
+	struct float_struct _15;
 };
 
 /* ------ END-EXPECTED-OUTPUT ------ */
-- 
cgit v1.2.3


From 11d39cfeecfc9d92a5faa2a55c228e796478e0cb Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 8 Mar 2021 20:43:22 -0800
Subject: selftests/bpf: Fix compiler warning in BPF_KPROBE definition in
 loop6.c

Add missing return type to BPF_KPROBE definition. Without it, compiler
generates the following warning:

progs/loop6.c:68:12: warning: type specifier missing, defaults to 'int' [-Wimplicit-int]
BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs,
           ^
1 warning generated.

Fixes: 86a35af628e5 ("selftests/bpf: Add a verifier scale test with unknown bounded loop")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210309044322.3487636-1-andrii@kernel.org
---
 tools/testing/selftests/bpf/progs/loop6.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/progs/loop6.c b/tools/testing/selftests/bpf/progs/loop6.c
index 2a7141ac1656..38de0331e6b4 100644
--- a/tools/testing/selftests/bpf/progs/loop6.c
+++ b/tools/testing/selftests/bpf/progs/loop6.c
@@ -65,8 +65,8 @@ int config = 0;
 int result = 0;
 
 SEC("kprobe/virtqueue_add_sgs")
-BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs,
-	   unsigned int out_sgs, unsigned int in_sgs)
+int BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs,
+	       unsigned int out_sgs, unsigned int in_sgs)
 {
 	struct scatterlist *sgp = NULL;
 	__u64 length1 = 0, length2 = 0;
-- 
cgit v1.2.3


From e6a4750ffe9d701c4d55212b14b615e63571d235 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Mon, 8 Mar 2021 12:29:06 +0100
Subject: bpf, xdp: Make bpf_redirect_map() a map operation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently the bpf_redirect_map() implementation dispatches to the
correct map-lookup function via a switch-statement. To avoid the
dispatching, this change adds bpf_redirect_map() as a map
operation. Each map provides its bpf_redirect_map() version, and
correct function is automatically selected by the BPF verifier.

A nice side-effect of the code movement is that the map lookup
functions are now local to the map implementation files, which removes
one additional function call.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20210308112907.559576-2-bjorn.topel@gmail.com
---
 include/linux/bpf.h    | 26 ++++++--------------------
 include/linux/filter.h | 27 +++++++++++++++++++++++++++
 include/net/xdp_sock.h | 19 -------------------
 kernel/bpf/cpumap.c    |  8 +++++++-
 kernel/bpf/devmap.c    | 16 ++++++++++++++--
 kernel/bpf/verifier.c  | 13 +++++++++++--
 net/core/filter.c      | 39 +--------------------------------------
 net/xdp/xskmap.c       | 16 ++++++++++++++++
 8 files changed, 82 insertions(+), 82 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c931bc97019d..a25730eaa148 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -118,6 +118,9 @@ struct bpf_map_ops {
 					   void *owner, u32 size);
 	struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner);
 
+	/* Misc helpers.*/
+	int (*map_redirect)(struct bpf_map *map, u32 ifindex, u64 flags);
+
 	/* map_meta_equal must be implemented for maps that can be
 	 * used as an inner map.  It is a runtime check to ensure
 	 * an inner map can be inserted to an outer map.
@@ -1450,9 +1453,9 @@ struct btf *bpf_get_btf_vmlinux(void);
 /* Map specifics */
 struct xdp_buff;
 struct sk_buff;
+struct bpf_dtab_netdev;
+struct bpf_cpu_map_entry;
 
-struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
-struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key);
 void __dev_flush(void);
 int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
 		    struct net_device *dev_rx);
@@ -1462,7 +1465,6 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
 			     struct bpf_prog *xdp_prog);
 bool dev_map_can_have_prog(struct bpf_map *map);
 
-struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
 void __cpu_map_flush(void);
 int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
 		    struct net_device *dev_rx);
@@ -1593,17 +1595,6 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags)
 	return -EOPNOTSUPP;
 }
 
-static inline struct net_device  *__dev_map_lookup_elem(struct bpf_map *map,
-						       u32 key)
-{
-	return NULL;
-}
-
-static inline struct net_device  *__dev_map_hash_lookup_elem(struct bpf_map *map,
-							     u32 key)
-{
-	return NULL;
-}
 static inline bool dev_map_can_have_prog(struct bpf_map *map)
 {
 	return false;
@@ -1615,6 +1606,7 @@ static inline void __dev_flush(void)
 
 struct xdp_buff;
 struct bpf_dtab_netdev;
+struct bpf_cpu_map_entry;
 
 static inline
 int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
@@ -1639,12 +1631,6 @@ static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst,
 	return 0;
 }
 
-static inline
-struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
-{
-	return NULL;
-}
-
 static inline void __cpu_map_flush(void)
 {
 }
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 3b00fc906ccd..008691fd3b58 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1472,4 +1472,31 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
 }
 #endif /* IS_ENABLED(CONFIG_IPV6) */
 
+static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex, u64 flags,
+						  void *lookup_elem(struct bpf_map *map, u32 key))
+{
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+	/* Lower bits of the flags are used as return code on lookup failure */
+	if (unlikely(flags > XDP_TX))
+		return XDP_ABORTED;
+
+	ri->tgt_value = lookup_elem(map, ifindex);
+	if (unlikely(!ri->tgt_value)) {
+		/* If the lookup fails we want to clear out the state in the
+		 * redirect_info struct completely, so that if an eBPF program
+		 * performs multiple lookups, the last one always takes
+		 * precedence.
+		 */
+		WRITE_ONCE(ri->map, NULL);
+		return flags;
+	}
+
+	ri->flags = flags;
+	ri->tgt_index = ifindex;
+	WRITE_ONCE(ri->map, map);
+
+	return XDP_REDIRECT;
+}
+
 #endif /* __LINUX_FILTER_H__ */
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index cc17bc957548..9c0722c6d7ac 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -80,19 +80,6 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
 int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp);
 void __xsk_map_flush(void);
 
-static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
-						     u32 key)
-{
-	struct xsk_map *m = container_of(map, struct xsk_map, map);
-	struct xdp_sock *xs;
-
-	if (key >= map->max_entries)
-		return NULL;
-
-	xs = READ_ONCE(m->xsk_map[key]);
-	return xs;
-}
-
 #else
 
 static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
@@ -109,12 +96,6 @@ static inline void __xsk_map_flush(void)
 {
 }
 
-static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
-						     u32 key)
-{
-	return NULL;
-}
-
 #endif /* CONFIG_XDP_SOCKETS */
 
 #endif /* _LINUX_XDP_SOCK_H */
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 5d1469de6921..7352d4160b7f 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -563,7 +563,7 @@ static void cpu_map_free(struct bpf_map *map)
 	kfree(cmap);
 }
 
-struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
+static void *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
 {
 	struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
 	struct bpf_cpu_map_entry *rcpu;
@@ -600,6 +600,11 @@ static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 	return 0;
 }
 
+static int cpu_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
+{
+	return __bpf_xdp_redirect_map(map, ifindex, flags, __cpu_map_lookup_elem);
+}
+
 static int cpu_map_btf_id;
 const struct bpf_map_ops cpu_map_ops = {
 	.map_meta_equal		= bpf_map_meta_equal,
@@ -612,6 +617,7 @@ const struct bpf_map_ops cpu_map_ops = {
 	.map_check_btf		= map_check_no_btf,
 	.map_btf_name		= "bpf_cpu_map",
 	.map_btf_id		= &cpu_map_btf_id,
+	.map_redirect		= cpu_map_redirect,
 };
 
 static void bq_flush_to_queue(struct xdp_bulk_queue *bq)
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 85d9d1b72a33..f7f42448259f 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -258,7 +258,7 @@ static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 	return 0;
 }
 
-struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
+static void *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
 	struct hlist_head *head = dev_map_index_hash(dtab, key);
@@ -392,7 +392,7 @@ void __dev_flush(void)
  * update happens in parallel here a dev_put wont happen until after reading the
  * ifindex.
  */
-struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
+static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
 	struct bpf_dtab_netdev *obj;
@@ -735,6 +735,16 @@ static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value,
 					 map, key, value, map_flags);
 }
 
+static int dev_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
+{
+	return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_lookup_elem);
+}
+
+static int dev_hash_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
+{
+	return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_hash_lookup_elem);
+}
+
 static int dev_map_btf_id;
 const struct bpf_map_ops dev_map_ops = {
 	.map_meta_equal = bpf_map_meta_equal,
@@ -747,6 +757,7 @@ const struct bpf_map_ops dev_map_ops = {
 	.map_check_btf = map_check_no_btf,
 	.map_btf_name = "bpf_dtab",
 	.map_btf_id = &dev_map_btf_id,
+	.map_redirect = dev_map_redirect,
 };
 
 static int dev_map_hash_map_btf_id;
@@ -761,6 +772,7 @@ const struct bpf_map_ops dev_map_hash_ops = {
 	.map_check_btf = map_check_no_btf,
 	.map_btf_name = "bpf_dtab",
 	.map_btf_id = &dev_map_hash_map_btf_id,
+	.map_redirect = dev_hash_map_redirect,
 };
 
 static void dev_map_hash_remove_netdev(struct bpf_dtab *dtab,
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9fe90ce52a65..97eb0b2435b8 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5582,7 +5582,8 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
 	    func_id != BPF_FUNC_map_push_elem &&
 	    func_id != BPF_FUNC_map_pop_elem &&
 	    func_id != BPF_FUNC_map_peek_elem &&
-	    func_id != BPF_FUNC_for_each_map_elem)
+	    func_id != BPF_FUNC_for_each_map_elem &&
+	    func_id != BPF_FUNC_redirect_map)
 		return 0;
 
 	if (map == NULL) {
@@ -12017,7 +12018,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
 		     insn->imm == BPF_FUNC_map_delete_elem ||
 		     insn->imm == BPF_FUNC_map_push_elem   ||
 		     insn->imm == BPF_FUNC_map_pop_elem    ||
-		     insn->imm == BPF_FUNC_map_peek_elem)) {
+		     insn->imm == BPF_FUNC_map_peek_elem   ||
+		     insn->imm == BPF_FUNC_redirect_map)) {
 			aux = &env->insn_aux_data[i + delta];
 			if (bpf_map_ptr_poisoned(aux))
 				goto patch_call_imm;
@@ -12059,6 +12061,9 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
 				     (int (*)(struct bpf_map *map, void *value))NULL));
 			BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
 				     (int (*)(struct bpf_map *map, void *value))NULL));
+			BUILD_BUG_ON(!__same_type(ops->map_redirect,
+				     (int (*)(struct bpf_map *map, u32 ifindex, u64 flags))NULL));
+
 patch_map_ops_generic:
 			switch (insn->imm) {
 			case BPF_FUNC_map_lookup_elem:
@@ -12085,6 +12090,10 @@ patch_map_ops_generic:
 				insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
 					    __bpf_call_base;
 				continue;
+			case BPF_FUNC_redirect_map:
+				insn->imm = BPF_CAST_CALL(ops->map_redirect) -
+					    __bpf_call_base;
+				continue;
 			}
 
 			goto patch_call_imm;
diff --git a/net/core/filter.c b/net/core/filter.c
index 588b19ba0da8..183b0aa6b027 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3943,22 +3943,6 @@ void xdp_do_flush(void)
 }
 EXPORT_SYMBOL_GPL(xdp_do_flush);
 
-static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
-{
-	switch (map->map_type) {
-	case BPF_MAP_TYPE_DEVMAP:
-		return __dev_map_lookup_elem(map, index);
-	case BPF_MAP_TYPE_DEVMAP_HASH:
-		return __dev_map_hash_lookup_elem(map, index);
-	case BPF_MAP_TYPE_CPUMAP:
-		return __cpu_map_lookup_elem(map, index);
-	case BPF_MAP_TYPE_XSKMAP:
-		return __xsk_map_lookup_elem(map, index);
-	default:
-		return NULL;
-	}
-}
-
 void bpf_clear_redirect_map(struct bpf_map *map)
 {
 	struct bpf_redirect_info *ri;
@@ -4112,28 +4096,7 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
 BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
 	   u64, flags)
 {
-	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
-
-	/* Lower bits of the flags are used as return code on lookup failure */
-	if (unlikely(flags > XDP_TX))
-		return XDP_ABORTED;
-
-	ri->tgt_value = __xdp_map_lookup_elem(map, ifindex);
-	if (unlikely(!ri->tgt_value)) {
-		/* If the lookup fails we want to clear out the state in the
-		 * redirect_info struct completely, so that if an eBPF program
-		 * performs multiple lookups, the last one always takes
-		 * precedence.
-		 */
-		WRITE_ONCE(ri->map, NULL);
-		return flags;
-	}
-
-	ri->flags = flags;
-	ri->tgt_index = ifindex;
-	WRITE_ONCE(ri->map, map);
-
-	return XDP_REDIRECT;
+	return map->ops->map_redirect(map, ifindex, flags);
 }
 
 static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
index 113fd9017203..fbeb4870f798 100644
--- a/net/xdp/xskmap.c
+++ b/net/xdp/xskmap.c
@@ -125,6 +125,16 @@ static int xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
 	return insn - insn_buf;
 }
 
+static void *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
+{
+	struct xsk_map *m = container_of(map, struct xsk_map, map);
+
+	if (key >= map->max_entries)
+		return NULL;
+
+	return READ_ONCE(m->xsk_map[key]);
+}
+
 static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
 {
 	WARN_ON_ONCE(!rcu_read_lock_held());
@@ -215,6 +225,11 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key)
 	return 0;
 }
 
+static int xsk_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
+{
+	return __bpf_xdp_redirect_map(map, ifindex, flags, __xsk_map_lookup_elem);
+}
+
 void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
 			     struct xdp_sock **map_entry)
 {
@@ -247,4 +262,5 @@ const struct bpf_map_ops xsk_map_ops = {
 	.map_check_btf = map_check_no_btf,
 	.map_btf_name = "xsk_map",
 	.map_btf_id = &xsk_map_btf_id,
+	.map_redirect = xsk_map_redirect,
 };
-- 
cgit v1.2.3


From ee75aef23afe6e88497151c127c13ed69f41aaa2 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Mon, 8 Mar 2021 12:29:07 +0100
Subject: bpf, xdp: Restructure redirect actions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The XDP_REDIRECT implementations for maps and non-maps are fairly
similar, but obviously need to take different code paths depending on
if the target is using a map or not. Today, the redirect targets for
XDP either uses a map, or is based on ifindex.

Here, the map type and id are added to bpf_redirect_info, instead of
the actual map. Map type, map item/ifindex, and the map_id (if any) is
passed to xdp_do_redirect().

For ifindex-based redirect, used by the bpf_redirect() XDP BFP helper,
a special map type/id are used. Map type of UNSPEC together with map id
equal to INT_MAX has the special meaning of an ifindex based
redirect. Note that valid map ids are 1 inclusive, INT_MAX exclusive
([1,INT_MAX[).

In addition to making the code easier to follow, using explicit type
and id in bpf_redirect_info has a slight positive performance impact
by avoiding a pointer indirection for the map type lookup, and instead
use the cacheline for bpf_redirect_info.

Since the actual map is not passed via bpf_redirect_info anymore, the
map lookup is only done in the BPF helper. This means that the
bpf_clear_redirect_map() function can be removed. The actual map item
is RCU protected.

The bpf_redirect_info flags member is not used by XDP, and not
read/written any more. The map member is only written to when
required/used, and not unconditionally.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20210308112907.559576-3-bjorn.topel@gmail.com
---
 include/linux/filter.h     |  10 +--
 include/trace/events/xdp.h |  62 ++++++++++-------
 kernel/bpf/cpumap.c        |   1 -
 kernel/bpf/devmap.c        |   1 -
 net/core/filter.c          | 170 ++++++++++++++++++++-------------------------
 net/xdp/xskmap.c           |   1 -
 6 files changed, 116 insertions(+), 129 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 008691fd3b58..b2b85b2cad8e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -646,7 +646,8 @@ struct bpf_redirect_info {
 	u32 flags;
 	u32 tgt_index;
 	void *tgt_value;
-	struct bpf_map *map;
+	u32 map_id;
+	enum bpf_map_type map_type;
 	u32 kern_flags;
 	struct bpf_nh_params nh;
 };
@@ -1488,13 +1489,14 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifind
 		 * performs multiple lookups, the last one always takes
 		 * precedence.
 		 */
-		WRITE_ONCE(ri->map, NULL);
+		ri->map_id = INT_MAX; /* Valid map id idr range: [1,INT_MAX[ */
+		ri->map_type = BPF_MAP_TYPE_UNSPEC;
 		return flags;
 	}
 
-	ri->flags = flags;
 	ri->tgt_index = ifindex;
-	WRITE_ONCE(ri->map, map);
+	ri->map_id = map->id;
+	ri->map_type = map->map_type;
 
 	return XDP_REDIRECT;
 }
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index 76a97176ab81..fcad3645a70b 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -86,19 +86,15 @@ struct _bpf_dtab_netdev {
 };
 #endif /* __DEVMAP_OBJ_TYPE */
 
-#define devmap_ifindex(tgt, map)				\
-	(((map->map_type == BPF_MAP_TYPE_DEVMAP ||	\
-		  map->map_type == BPF_MAP_TYPE_DEVMAP_HASH)) ? \
-	  ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex : 0)
-
 DECLARE_EVENT_CLASS(xdp_redirect_template,
 
 	TP_PROTO(const struct net_device *dev,
 		 const struct bpf_prog *xdp,
 		 const void *tgt, int err,
-		 const struct bpf_map *map, u32 index),
+		 enum bpf_map_type map_type,
+		 u32 map_id, u32 index),
 
-	TP_ARGS(dev, xdp, tgt, err, map, index),
+	TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index),
 
 	TP_STRUCT__entry(
 		__field(int, prog_id)
@@ -111,14 +107,22 @@ DECLARE_EVENT_CLASS(xdp_redirect_template,
 	),
 
 	TP_fast_assign(
+		u32 ifindex = 0, map_index = index;
+
+		if (map_type == BPF_MAP_TYPE_DEVMAP || map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
+			ifindex = ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex;
+		} else if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
+			ifindex = index;
+			map_index = 0;
+		}
+
 		__entry->prog_id	= xdp->aux->id;
 		__entry->act		= XDP_REDIRECT;
 		__entry->ifindex	= dev->ifindex;
 		__entry->err		= err;
-		__entry->to_ifindex	= map ? devmap_ifindex(tgt, map) :
-						index;
-		__entry->map_id		= map ? map->id : 0;
-		__entry->map_index	= map ? index : 0;
+		__entry->to_ifindex	= ifindex;
+		__entry->map_id		= map_id;
+		__entry->map_index	= map_index;
 	),
 
 	TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d"
@@ -133,45 +137,49 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect,
 	TP_PROTO(const struct net_device *dev,
 		 const struct bpf_prog *xdp,
 		 const void *tgt, int err,
-		 const struct bpf_map *map, u32 index),
-	TP_ARGS(dev, xdp, tgt, err, map, index)
+		 enum bpf_map_type map_type,
+		 u32 map_id, u32 index),
+	TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
 );
 
 DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err,
 	TP_PROTO(const struct net_device *dev,
 		 const struct bpf_prog *xdp,
 		 const void *tgt, int err,
-		 const struct bpf_map *map, u32 index),
-	TP_ARGS(dev, xdp, tgt, err, map, index)
+		 enum bpf_map_type map_type,
+		 u32 map_id, u32 index),
+	TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
 );
 
-#define _trace_xdp_redirect(dev, xdp, to)				\
-	 trace_xdp_redirect(dev, xdp, NULL, 0, NULL, to)
+#define _trace_xdp_redirect(dev, xdp, to)						\
+	 trace_xdp_redirect(dev, xdp, NULL, 0, BPF_MAP_TYPE_UNSPEC, INT_MAX, to)
 
-#define _trace_xdp_redirect_err(dev, xdp, to, err)			\
-	 trace_xdp_redirect_err(dev, xdp, NULL, err, NULL, to)
+#define _trace_xdp_redirect_err(dev, xdp, to, err)					\
+	 trace_xdp_redirect_err(dev, xdp, NULL, err, BPF_MAP_TYPE_UNSPEC, INT_MAX, to)
 
-#define _trace_xdp_redirect_map(dev, xdp, to, map, index)		\
-	 trace_xdp_redirect(dev, xdp, to, 0, map, index)
+#define _trace_xdp_redirect_map(dev, xdp, to, map_type, map_id, index) \
+	 trace_xdp_redirect(dev, xdp, to, 0, map_type, map_id, index)
 
-#define _trace_xdp_redirect_map_err(dev, xdp, to, map, index, err)	\
-	 trace_xdp_redirect_err(dev, xdp, to, err, map, index)
+#define _trace_xdp_redirect_map_err(dev, xdp, to, map_type, map_id, index, err) \
+	 trace_xdp_redirect_err(dev, xdp, to, err, map_type, map_id, index)
 
 /* not used anymore, but kept around so as not to break old programs */
 DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map,
 	TP_PROTO(const struct net_device *dev,
 		 const struct bpf_prog *xdp,
 		 const void *tgt, int err,
-		 const struct bpf_map *map, u32 index),
-	TP_ARGS(dev, xdp, tgt, err, map, index)
+		 enum bpf_map_type map_type,
+		 u32 map_id, u32 index),
+	TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
 );
 
 DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map_err,
 	TP_PROTO(const struct net_device *dev,
 		 const struct bpf_prog *xdp,
 		 const void *tgt, int err,
-		 const struct bpf_map *map, u32 index),
-	TP_ARGS(dev, xdp, tgt, err, map, index)
+		 enum bpf_map_type map_type,
+		 u32 map_id, u32 index),
+	TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
 );
 
 TRACE_EVENT(xdp_cpumap_kthread,
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 7352d4160b7f..0cf2791d5099 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -543,7 +543,6 @@ static void cpu_map_free(struct bpf_map *map)
 	 * complete.
 	 */
 
-	bpf_clear_redirect_map(map);
 	synchronize_rcu();
 
 	/* For cpu_map the remote CPUs can still be using the entries
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index f7f42448259f..7a5ad7331c3b 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -197,7 +197,6 @@ static void dev_map_free(struct bpf_map *map)
 	list_del_rcu(&dtab->list);
 	spin_unlock(&dev_map_lock);
 
-	bpf_clear_redirect_map(map);
 	synchronize_rcu();
 
 	/* Make sure prior __dev_map_entry_free() have completed. */
diff --git a/net/core/filter.c b/net/core/filter.c
index 183b0aa6b027..b6732000d8a2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3918,23 +3918,6 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
-static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
-			    struct bpf_map *map, struct xdp_buff *xdp)
-{
-	switch (map->map_type) {
-	case BPF_MAP_TYPE_DEVMAP:
-	case BPF_MAP_TYPE_DEVMAP_HASH:
-		return dev_map_enqueue(fwd, xdp, dev_rx);
-	case BPF_MAP_TYPE_CPUMAP:
-		return cpu_map_enqueue(fwd, xdp, dev_rx);
-	case BPF_MAP_TYPE_XSKMAP:
-		return __xsk_map_redirect(fwd, xdp);
-	default:
-		return -EBADRQC;
-	}
-	return 0;
-}
-
 void xdp_do_flush(void)
 {
 	__dev_flush();
@@ -3943,55 +3926,52 @@ void xdp_do_flush(void)
 }
 EXPORT_SYMBOL_GPL(xdp_do_flush);
 
-void bpf_clear_redirect_map(struct bpf_map *map)
-{
-	struct bpf_redirect_info *ri;
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		ri = per_cpu_ptr(&bpf_redirect_info, cpu);
-		/* Avoid polluting remote cacheline due to writes if
-		 * not needed. Once we pass this test, we need the
-		 * cmpxchg() to make sure it hasn't been changed in
-		 * the meantime by remote CPU.
-		 */
-		if (unlikely(READ_ONCE(ri->map) == map))
-			cmpxchg(&ri->map, map, NULL);
-	}
-}
-
 int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
 		    struct bpf_prog *xdp_prog)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
-	struct bpf_map *map = READ_ONCE(ri->map);
-	u32 index = ri->tgt_index;
+	enum bpf_map_type map_type = ri->map_type;
 	void *fwd = ri->tgt_value;
+	u32 map_id = ri->map_id;
 	int err;
 
-	ri->tgt_index = 0;
-	ri->tgt_value = NULL;
-	WRITE_ONCE(ri->map, NULL);
+	ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
+	ri->map_type = BPF_MAP_TYPE_UNSPEC;
 
-	if (unlikely(!map)) {
-		fwd = dev_get_by_index_rcu(dev_net(dev), index);
-		if (unlikely(!fwd)) {
-			err = -EINVAL;
-			goto err;
+	switch (map_type) {
+	case BPF_MAP_TYPE_DEVMAP:
+		fallthrough;
+	case BPF_MAP_TYPE_DEVMAP_HASH:
+		err = dev_map_enqueue(fwd, xdp, dev);
+		break;
+	case BPF_MAP_TYPE_CPUMAP:
+		err = cpu_map_enqueue(fwd, xdp, dev);
+		break;
+	case BPF_MAP_TYPE_XSKMAP:
+		err = __xsk_map_redirect(fwd, xdp);
+		break;
+	case BPF_MAP_TYPE_UNSPEC:
+		if (map_id == INT_MAX) {
+			fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index);
+			if (unlikely(!fwd)) {
+				err = -EINVAL;
+				break;
+			}
+			err = dev_xdp_enqueue(fwd, xdp, dev);
+			break;
 		}
-
-		err = dev_xdp_enqueue(fwd, xdp, dev);
-	} else {
-		err = __bpf_tx_xdp_map(dev, fwd, map, xdp);
+		fallthrough;
+	default:
+		err = -EBADRQC;
 	}
 
 	if (unlikely(err))
 		goto err;
 
-	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
+	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
 	return 0;
 err:
-	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
+	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
 	return err;
 }
 EXPORT_SYMBOL_GPL(xdp_do_redirect);
@@ -4000,41 +3980,36 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
 				       struct sk_buff *skb,
 				       struct xdp_buff *xdp,
 				       struct bpf_prog *xdp_prog,
-				       struct bpf_map *map)
+				       void *fwd,
+				       enum bpf_map_type map_type, u32 map_id)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
-	u32 index = ri->tgt_index;
-	void *fwd = ri->tgt_value;
-	int err = 0;
-
-	ri->tgt_index = 0;
-	ri->tgt_value = NULL;
-	WRITE_ONCE(ri->map, NULL);
-
-	if (map->map_type == BPF_MAP_TYPE_DEVMAP ||
-	    map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
-		struct bpf_dtab_netdev *dst = fwd;
+	int err;
 
-		err = dev_map_generic_redirect(dst, skb, xdp_prog);
+	switch (map_type) {
+	case BPF_MAP_TYPE_DEVMAP:
+		fallthrough;
+	case BPF_MAP_TYPE_DEVMAP_HASH:
+		err = dev_map_generic_redirect(fwd, skb, xdp_prog);
 		if (unlikely(err))
 			goto err;
-	} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
-		struct xdp_sock *xs = fwd;
-
-		err = xsk_generic_rcv(xs, xdp);
+		break;
+	case BPF_MAP_TYPE_XSKMAP:
+		err = xsk_generic_rcv(fwd, xdp);
 		if (err)
 			goto err;
 		consume_skb(skb);
-	} else {
+		break;
+	default:
 		/* TODO: Handle BPF_MAP_TYPE_CPUMAP */
 		err = -EBADRQC;
 		goto err;
 	}
 
-	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
+	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
 	return 0;
 err:
-	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
+	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
 	return err;
 }
 
@@ -4042,31 +4017,34 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
 			    struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
-	struct bpf_map *map = READ_ONCE(ri->map);
-	u32 index = ri->tgt_index;
-	struct net_device *fwd;
-	int err = 0;
-
-	if (map)
-		return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog,
-						   map);
-	ri->tgt_index = 0;
-	fwd = dev_get_by_index_rcu(dev_net(dev), index);
-	if (unlikely(!fwd)) {
-		err = -EINVAL;
-		goto err;
-	}
+	enum bpf_map_type map_type = ri->map_type;
+	void *fwd = ri->tgt_value;
+	u32 map_id = ri->map_id;
+	int err;
 
-	err = xdp_ok_fwd_dev(fwd, skb->len);
-	if (unlikely(err))
-		goto err;
+	ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
+	ri->map_type = BPF_MAP_TYPE_UNSPEC;
 
-	skb->dev = fwd;
-	_trace_xdp_redirect(dev, xdp_prog, index);
-	generic_xdp_tx(skb, xdp_prog);
-	return 0;
+	if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
+		fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index);
+		if (unlikely(!fwd)) {
+			err = -EINVAL;
+			goto err;
+		}
+
+		err = xdp_ok_fwd_dev(fwd, skb->len);
+		if (unlikely(err))
+			goto err;
+
+		skb->dev = fwd;
+		_trace_xdp_redirect(dev, xdp_prog, ri->tgt_index);
+		generic_xdp_tx(skb, xdp_prog);
+		return 0;
+	}
+
+	return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id);
 err:
-	_trace_xdp_redirect_err(dev, xdp_prog, index, err);
+	_trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err);
 	return err;
 }
 
@@ -4077,10 +4055,12 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
 	if (unlikely(flags))
 		return XDP_ABORTED;
 
-	ri->flags = flags;
+	/* NB! Map type UNSPEC and map_id == INT_MAX (never generated
+	 * by map_idr) is used for ifindex based XDP redirect.
+	 */
 	ri->tgt_index = ifindex;
-	ri->tgt_value = NULL;
-	WRITE_ONCE(ri->map, NULL);
+	ri->map_id = INT_MAX;
+	ri->map_type = BPF_MAP_TYPE_UNSPEC;
 
 	return XDP_REDIRECT;
 }
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
index fbeb4870f798..67b4ce504852 100644
--- a/net/xdp/xskmap.c
+++ b/net/xdp/xskmap.c
@@ -87,7 +87,6 @@ static void xsk_map_free(struct bpf_map *map)
 {
 	struct xsk_map *m = container_of(map, struct xsk_map, map);
 
-	bpf_clear_redirect_map(map);
 	synchronize_net();
 	bpf_map_area_free(m);
 }
-- 
cgit v1.2.3


From 0bb3262c0248d44aea3be31076f44beb82a7b120 Mon Sep 17 00:00:00 2001
From: Menglong Dong <dong.menglong@zte.com.cn>
Date: Tue, 9 Mar 2021 17:51:35 -0800
Subject: net: socket: use BIT() for MSG_*

The bit mask for MSG_* seems a little confused here. Replace it
with BIT() to make it clear to understand.

Signed-off-by: Menglong Dong <dong.menglong@zte.com.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h | 71 ++++++++++++++++++++++++++------------------------
 1 file changed, 37 insertions(+), 34 deletions(-)

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 385894b4a8bb..e88859f38cd0 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -283,42 +283,45 @@ struct ucred {
    Added those for 1003.1g not all are supported yet
  */
 
-#define MSG_OOB		1
-#define MSG_PEEK	2
-#define MSG_DONTROUTE	4
-#define MSG_TRYHARD     4       /* Synonym for MSG_DONTROUTE for DECnet */
-#define MSG_CTRUNC	8
-#define MSG_PROBE	0x10	/* Do not send. Only probe path f.e. for MTU */
-#define MSG_TRUNC	0x20
-#define MSG_DONTWAIT	0x40	/* Nonblocking io		 */
-#define MSG_EOR         0x80	/* End of record */
-#define MSG_WAITALL	0x100	/* Wait for a full request */
-#define MSG_FIN         0x200
-#define MSG_SYN		0x400
-#define MSG_CONFIRM	0x800	/* Confirm path validity */
-#define MSG_RST		0x1000
-#define MSG_ERRQUEUE	0x2000	/* Fetch message from error queue */
-#define MSG_NOSIGNAL	0x4000	/* Do not generate SIGPIPE */
-#define MSG_MORE	0x8000	/* Sender will send more */
-#define MSG_WAITFORONE	0x10000	/* recvmmsg(): block until 1+ packets avail */
-#define MSG_SENDPAGE_NOPOLICY 0x10000 /* sendpage() internal : do no apply policy */
-#define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */
-#define MSG_BATCH	0x40000 /* sendmmsg(): more messages coming */
-#define MSG_EOF         MSG_FIN
-#define MSG_NO_SHARED_FRAGS 0x80000 /* sendpage() internal : page frags are not shared */
-#define MSG_SENDPAGE_DECRYPTED	0x100000 /* sendpage() internal : page may carry
-					  * plain text and require encryption
-					  */
-
-#define MSG_ZEROCOPY	0x4000000	/* Use user data in kernel path */
-#define MSG_FASTOPEN	0x20000000	/* Send data in TCP SYN */
-#define MSG_CMSG_CLOEXEC 0x40000000	/* Set close_on_exec for file
-					   descriptor received through
-					   SCM_RIGHTS */
+#define MSG_OOB		BIT(0)
+#define MSG_PEEK	BIT(1)
+#define MSG_DONTROUTE	BIT(2)
+#define MSG_TRYHARD	BIT(2)	/* Synonym for MSG_DONTROUTE for DECnet		*/
+#define MSG_CTRUNC	BIT(3)
+#define MSG_PROBE	BIT(4)	/* Do not send. Only probe path f.e. for MTU	*/
+#define MSG_TRUNC	BIT(5)
+#define MSG_DONTWAIT	BIT(6)	/* Nonblocking io		*/
+#define MSG_EOR		BIT(7)	/* End of record		*/
+#define MSG_WAITALL	BIT(8)	/* Wait for a full request	*/
+#define MSG_FIN		BIT(9)
+#define MSG_SYN		BIT(10)
+#define MSG_CONFIRM	BIT(11)	/* Confirm path validity	*/
+#define MSG_RST		BIT(12)
+#define MSG_ERRQUEUE	BIT(13)	/* Fetch message from error queue */
+#define MSG_NOSIGNAL	BIT(14)	/* Do not generate SIGPIPE	*/
+#define MSG_MORE	BIT(15)	/* Sender will send more	*/
+#define MSG_WAITFORONE	BIT(16)	/* recvmmsg(): block until 1+ packets avail */
+#define MSG_SENDPAGE_NOPOLICY	BIT(16)	/* sendpage() internal : do no apply policy */
+#define MSG_SENDPAGE_NOTLAST	BIT(17)	/* sendpage() internal : not the last page  */
+#define MSG_BATCH	BIT(18)		/* sendmmsg(): more messages coming */
+#define MSG_EOF		MSG_FIN
+#define MSG_NO_SHARED_FRAGS	BIT(19)	/* sendpage() internal : page frags
+					 * are not shared
+					 */
+#define MSG_SENDPAGE_DECRYPTED	BIT(20)	/* sendpage() internal : page may carry
+					 * plain text and require encryption
+					 */
+
+#define MSG_ZEROCOPY	BIT(26)		/* Use user data in kernel path */
+#define MSG_FASTOPEN	BIT(29)		/* Send data in TCP SYN */
+#define MSG_CMSG_CLOEXEC	BIT(30)	/* Set close_on_exec for file
+					 * descriptor received through
+					 * SCM_RIGHTS
+					 */
 #if defined(CONFIG_COMPAT)
-#define MSG_CMSG_COMPAT	0x80000000	/* This message needs 32 bit fixups */
+#define MSG_CMSG_COMPAT	BIT(31)	/* This message needs 32 bit fixups */
 #else
-#define MSG_CMSG_COMPAT	0		/* We never have 32 bit fixups */
+#define MSG_CMSG_COMPAT	0	/* We never have 32 bit fixups */
 #endif
 
 
-- 
cgit v1.2.3


From 97c2c69e1926260c78c7f1c0b2c987934f1dc7a1 Mon Sep 17 00:00:00 2001
From: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Date: Wed, 10 Mar 2021 10:24:45 +0800
Subject: virtio-net: support XDP when not more queues

The number of queues implemented by many virtio backends is limited,
especially some machines have a large number of CPUs. In this case, it
is often impossible to allocate a separate queue for
XDP_TX/XDP_REDIRECT, then xdp cannot be loaded to work, even xdp does
not use the XDP_TX/XDP_REDIRECT.

This patch allows XDP_TX/XDP_REDIRECT to run by reuse the existing SQ
with __netif_tx_lock() hold when there are not enough queues.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Reviewed-by: Dust Li <dust.li@linux.alibaba.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 62 ++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 49 insertions(+), 13 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 708a8b2ca25b..e97288dd6e5a 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -195,6 +195,9 @@ struct virtnet_info {
 	/* # of XDP queue pairs currently used by the driver */
 	u16 xdp_queue_pairs;
 
+	/* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */
+	bool xdp_enabled;
+
 	/* I like... big packets and I cannot lie! */
 	bool big_packets;
 
@@ -481,12 +484,41 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
 	return 0;
 }
 
-static struct send_queue *virtnet_xdp_sq(struct virtnet_info *vi)
-{
-	unsigned int qp;
-
-	qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
-	return &vi->sq[qp];
+/* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on
+ * the current cpu, so it does not need to be locked.
+ *
+ * Here we use marco instead of inline functions because we have to deal with
+ * three issues at the same time: 1. the choice of sq. 2. judge and execute the
+ * lock/unlock of txq 3. make sparse happy. It is difficult for two inline
+ * functions to perfectly solve these three problems at the same time.
+ */
+#define virtnet_xdp_get_sq(vi) ({                                       \
+	struct netdev_queue *txq;                                       \
+	typeof(vi) v = (vi);                                            \
+	unsigned int qp;                                                \
+									\
+	if (v->curr_queue_pairs > nr_cpu_ids) {                         \
+		qp = v->curr_queue_pairs - v->xdp_queue_pairs;          \
+		qp += smp_processor_id();                               \
+		txq = netdev_get_tx_queue(v->dev, qp);                  \
+		__netif_tx_acquire(txq);                                \
+	} else {                                                        \
+		qp = smp_processor_id() % v->curr_queue_pairs;          \
+		txq = netdev_get_tx_queue(v->dev, qp);                  \
+		__netif_tx_lock(txq, raw_smp_processor_id());           \
+	}                                                               \
+	v->sq + qp;                                                     \
+})
+
+#define virtnet_xdp_put_sq(vi, q) {                                     \
+	struct netdev_queue *txq;                                       \
+	typeof(vi) v = (vi);                                            \
+									\
+	txq = netdev_get_tx_queue(v->dev, (q) - v->sq);                 \
+	if (v->curr_queue_pairs > nr_cpu_ids)                           \
+		__netif_tx_release(txq);                                \
+	else                                                            \
+		__netif_tx_unlock(txq);                                 \
 }
 
 static int virtnet_xdp_xmit(struct net_device *dev,
@@ -512,7 +544,7 @@ static int virtnet_xdp_xmit(struct net_device *dev,
 	if (!xdp_prog)
 		return -ENXIO;
 
-	sq = virtnet_xdp_sq(vi);
+	sq = virtnet_xdp_get_sq(vi);
 
 	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
 		ret = -EINVAL;
@@ -560,12 +592,13 @@ out:
 	sq->stats.kicks += kicks;
 	u64_stats_update_end(&sq->stats.syncp);
 
+	virtnet_xdp_put_sq(vi, sq);
 	return ret;
 }
 
 static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
 {
-	return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0;
+	return vi->xdp_enabled ? VIRTIO_XDP_HEADROOM : 0;
 }
 
 /* We copy the packet for XDP in the following cases:
@@ -1458,12 +1491,13 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
 		xdp_do_flush();
 
 	if (xdp_xmit & VIRTIO_XDP_TX) {
-		sq = virtnet_xdp_sq(vi);
+		sq = virtnet_xdp_get_sq(vi);
 		if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
 			u64_stats_update_begin(&sq->stats.syncp);
 			sq->stats.kicks++;
 			u64_stats_update_end(&sq->stats.syncp);
 		}
+		virtnet_xdp_put_sq(vi, sq);
 	}
 
 	return received;
@@ -2418,10 +2452,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
 
 	/* XDP requires extra queues for XDP_TX */
 	if (curr_qp + xdp_qp > vi->max_queue_pairs) {
-		NL_SET_ERR_MSG_MOD(extack, "Too few free TX rings available");
-		netdev_warn(dev, "request %i queues but max is %i\n",
+		netdev_warn(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n",
 			    curr_qp + xdp_qp, vi->max_queue_pairs);
-		return -ENOMEM;
+		xdp_qp = 0;
 	}
 
 	old_prog = rtnl_dereference(vi->rq[0].xdp_prog);
@@ -2455,11 +2488,14 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
 	vi->xdp_queue_pairs = xdp_qp;
 
 	if (prog) {
+		vi->xdp_enabled = true;
 		for (i = 0; i < vi->max_queue_pairs; i++) {
 			rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
 			if (i == 0 && !old_prog)
 				virtnet_clear_guest_offloads(vi);
 		}
+	} else {
+		vi->xdp_enabled = false;
 	}
 
 	for (i = 0; i < vi->max_queue_pairs; i++) {
@@ -2527,7 +2563,7 @@ static int virtnet_set_features(struct net_device *dev,
 	int err;
 
 	if ((dev->features ^ features) & NETIF_F_LRO) {
-		if (vi->xdp_queue_pairs)
+		if (vi->xdp_enabled)
 			return -EBUSY;
 
 		if (features & NETIF_F_LRO)
-- 
cgit v1.2.3


From 67a580aad1797938a1492f41f3f1447e751969e8 Mon Sep 17 00:00:00 2001
From: Wang Qing <wangqing@vivo.com>
Date: Wed, 10 Mar 2021 11:06:03 +0800
Subject: drivers: isdn: mISDN: fix spelling typo of 'wheter'

wheter -> whether

Signed-off-by: Wang Qing <wangqing@vivo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/mISDN/l1oip_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c
index b57dcb834594..facbd886ee1c 100644
--- a/drivers/isdn/mISDN/l1oip_core.c
+++ b/drivers/isdn/mISDN/l1oip_core.c
@@ -200,7 +200,7 @@
 
  The complete socket opening and closing is done by a thread.
  When the thread opened a socket, the hc->socket descriptor is set. Whenever a
- packet shall be sent to the socket, the hc->socket must be checked wheter not
+ packet shall be sent to the socket, the hc->socket must be checked whether not
  NULL. To prevent change in socket descriptor, the hc->socket_lock must be used.
  To change the socket, a recall of l1oip_socket_open() will safely kill the
  socket process and create a new one.
-- 
cgit v1.2.3


From 4b18d5d1b2bab6d6ee20854c570bf29b560e9734 Mon Sep 17 00:00:00 2001
From: Wang Qing <wangqing@vivo.com>
Date: Wed, 10 Mar 2021 11:06:46 +0800
Subject: net: ethernet: chelsiofix: spelling typo of 'rewriteing'

rewriteing -> rewriting

Signed-off-by: Wang Qing <wangqing@vivo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 83b46440408b..b1cae5a19839 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -979,7 +979,7 @@ void clear_filter(struct adapter *adap, struct filter_entry *f)
 {
 	struct port_info *pi = netdev_priv(f->dev);
 
-	/* If the new or old filter have loopback rewriteing rules then we'll
+	/* If the new or old filter have loopback rewriting rules then we'll
 	 * need to free any existing L2T, SMT, CLIP entries of filter
 	 * rule.
 	 */
-- 
cgit v1.2.3


From 3e6f20e09a455d95e61c60d03a9994848b0f853c Mon Sep 17 00:00:00 2001
From: Yejune Deng <yejune.deng@gmail.com>
Date: Wed, 10 Mar 2021 11:23:43 +0800
Subject: net/rds: Drop duplicate sin and sin6 assignments

There is no need to assign the msg->msg_name to sin or sin6,
because there is DECLARE_SOCKADDR statement.

Signed-off-by: Yejune Deng <yejune.deng@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/recv.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/net/rds/recv.c b/net/rds/recv.c
index aba4afe4dfed..4db109fb6ec2 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -722,8 +722,6 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 
 		if (msg->msg_name) {
 			if (ipv6_addr_v4mapped(&inc->i_saddr)) {
-				sin = (struct sockaddr_in *)msg->msg_name;
-
 				sin->sin_family = AF_INET;
 				sin->sin_port = inc->i_hdr.h_sport;
 				sin->sin_addr.s_addr =
@@ -731,8 +729,6 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 				memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
 				msg->msg_namelen = sizeof(*sin);
 			} else {
-				sin6 = (struct sockaddr_in6 *)msg->msg_name;
-
 				sin6->sin6_family = AF_INET6;
 				sin6->sin6_port = inc->i_hdr.h_sport;
 				sin6->sin6_addr = inc->i_saddr;
-- 
cgit v1.2.3


From 537a0c5c4218329990dc8973068f3bfe5c882623 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Tue, 9 Mar 2021 23:30:20 -0600
Subject: net: fddi: skfp: smt: Replace one-element array with flexible-array
 member
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is a regular need in the kernel to provide a way to declare having
a dynamically sized set of trailing elements in a structure. Kernel code
should always use “flexible array members”[1] for these cases. The older
style of one-element or zero-length arrays should no longer be used[2].

Refactor the code according to the use of flexible-array members in
smt_sif_operation structure, instead of one-element arrays. Also, make
use of the struct_size() helper instead of the open-coded version
to calculate the size of the struct-with-flex-array. Additionally, make
use of the typeof operator to properly determine the object type to be
passed to macro smtod().

Also, this helps the ongoing efforts to enable -Warray-bounds by fixing
the following warnings:

  CC [M]  drivers/net/fddi/skfp/smt.o
drivers/net/fddi/skfp/smt.c: In function ‘smt_send_sif_operation’:
drivers/net/fddi/skfp/smt.c:1084:30: warning: array subscript 1 is above array bounds of ‘struct smt_p_lem[1]’ [-Warray-bounds]
 1084 |    smt_fill_lem(smc,&sif->lem[i],i) ;
      |                      ~~~~~~~~^~~
In file included from drivers/net/fddi/skfp/h/smc.h:42,
                 from drivers/net/fddi/skfp/smt.c:15:
drivers/net/fddi/skfp/h/smt.h:767:19: note: while referencing ‘lem’
  767 |  struct smt_p_lem lem[1] ; /* phy lem status */
      |                   ^~~
drivers/net/fddi/skfp/smt.c:1084:30: warning: array subscript 1 is above array bounds of ‘struct smt_p_lem[1]’ [-Warray-bounds]
 1084 |    smt_fill_lem(smc,&sif->lem[i],i) ;
      |                      ~~~~~~~~^~~
In file included from drivers/net/fddi/skfp/h/smc.h:42,
                 from drivers/net/fddi/skfp/smt.c:15:
drivers/net/fddi/skfp/h/smt.h:767:19: note: while referencing ‘lem’
  767 |  struct smt_p_lem lem[1] ; /* phy lem status */
      |                   ^~~
drivers/net/fddi/skfp/smt.c:1084:30: warning: array subscript 1 is above array bounds of ‘struct smt_p_lem[1]’ [-Warray-bounds]
 1084 |    smt_fill_lem(smc,&sif->lem[i],i) ;
      |                      ~~~~~~~~^~~
In file included from drivers/net/fddi/skfp/h/smc.h:42,
                 from drivers/net/fddi/skfp/smt.c:15:
drivers/net/fddi/skfp/h/smt.h:767:19: note: while referencing ‘lem’
  767 |  struct smt_p_lem lem[1] ; /* phy lem status */
      |                   ^~~
drivers/net/fddi/skfp/smt.c:1084:30: warning: array subscript 1 is above array bounds of ‘struct smt_p_lem[1]’ [-Warray-bounds]
 1084 |    smt_fill_lem(smc,&sif->lem[i],i) ;
      |                      ~~~~~~~~^~~
In file included from drivers/net/fddi/skfp/h/smc.h:42,
                 from drivers/net/fddi/skfp/smt.c:15:
drivers/net/fddi/skfp/h/smt.h:767:19: note: while referencing ‘lem’
  767 |  struct smt_p_lem lem[1] ; /* phy lem status */
      |                   ^~~
drivers/net/fddi/skfp/smt.c:1084:30: warning: array subscript 1 is above array bounds of ‘struct smt_p_lem[1]’ [-Warray-bounds]
 1084 |    smt_fill_lem(smc,&sif->lem[i],i) ;
      |                      ~~~~~~~~^~~
In file included from drivers/net/fddi/skfp/h/smc.h:42,
                 from drivers/net/fddi/skfp/smt.c:15:
drivers/net/fddi/skfp/h/smt.h:767:19: note: while referencing ‘lem’
  767 |  struct smt_p_lem lem[1] ; /* phy lem status */
      |                   ^~~
drivers/net/fddi/skfp/smt.c:1084:30: warning: array subscript 1 is above array bounds of ‘struct smt_p_lem[1]’ [-Warray-bounds]
 1084 |    smt_fill_lem(smc,&sif->lem[i],i) ;
      |                      ~~~~~~~~^~~
In file included from drivers/net/fddi/skfp/h/smc.h:42,
                 from drivers/net/fddi/skfp/smt.c:15:
drivers/net/fddi/skfp/h/smt.h:767:19: note: while referencing ‘lem’
  767 |  struct smt_p_lem lem[1] ; /* phy lem status */
      |                   ^~~
drivers/net/fddi/skfp/smt.c:1084:30: warning: array subscript 1 is above array bounds of ‘struct smt_p_lem[1]’ [-Warray-bounds]
 1084 |    smt_fill_lem(smc,&sif->lem[i],i) ;
      |                      ~~~~~~~~^~~
In file included from drivers/net/fddi/skfp/h/smc.h:42,
                 from drivers/net/fddi/skfp/smt.c:15:
drivers/net/fddi/skfp/h/smt.h:767:19: note: while referencing ‘lem’
  767 |  struct smt_p_lem lem[1] ; /* phy lem status */
      |                   ^~~
drivers/net/fddi/skfp/smt.c:1084:30: warning: array subscript 1 is above array bounds of ‘struct smt_p_lem[1]’ [-Warray-bounds]
 1084 |    smt_fill_lem(smc,&sif->lem[i],i) ;
      |                      ~~~~~~~~^~~
In file included from drivers/net/fddi/skfp/h/smc.h:42,
                 from drivers/net/fddi/skfp/smt.c:15:
drivers/net/fddi/skfp/h/smt.h:767:19: note: while referencing ‘lem’
  767 |  struct smt_p_lem lem[1] ; /* phy lem status */
      |                   ^~~
drivers/net/fddi/skfp/smt.c:1084:30: warning: array subscript 1 is above array bounds of ‘struct smt_p_lem[1]’ [-Warray-bounds]
 1084 |    smt_fill_lem(smc,&sif->lem[i],i) ;
      |                      ~~~~~~~~^~~
In file included from drivers/net/fddi/skfp/h/smc.h:42,
                 from drivers/net/fddi/skfp/smt.c:15:
drivers/net/fddi/skfp/h/smt.h:767:19: note: while referencing ‘lem’
  767 |  struct smt_p_lem lem[1] ; /* phy lem status */
      |                   ^~~
drivers/net/fddi/skfp/smt.c:1084:30: warning: array subscript 1 is above array bounds of ‘struct smt_p_lem[1]’ [-Warray-bounds]
 1084 |    smt_fill_lem(smc,&sif->lem[i],i) ;
      |                      ~~~~~~~~^~~
In file included from drivers/net/fddi/skfp/h/smc.h:42,
                 from drivers/net/fddi/skfp/smt.c:15:
drivers/net/fddi/skfp/h/smt.h:767:19: note: while referencing ‘lem’
  767 |  struct smt_p_lem lem[1] ; /* phy lem status */

[1] https://en.wikipedia.org/wiki/Flexible_array_member
[2] https://www.kernel.org/doc/html/v5.10/process/deprecated.html#zero-length-and-one-element-arrays

Link: https://github.com/KSPP/linux/issues/79
Link: https://github.com/KSPP/linux/issues/109
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/fddi/skfp/h/smt.h | 4 +---
 drivers/net/fddi/skfp/smt.c   | 4 ++--
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/net/fddi/skfp/h/smt.h b/drivers/net/fddi/skfp/h/smt.h
index a0dbc0f57a55..751b5bb87a9d 100644
--- a/drivers/net/fddi/skfp/h/smt.h
+++ b/drivers/net/fddi/skfp/h/smt.h
@@ -764,10 +764,8 @@ struct smt_sif_operation {
 	struct smt_p_setcount	setcount ;	 /* Set Count mandatory */
 #endif
 	/* must be last */
-	struct smt_p_lem	lem[1] ;	/* phy lem status */
+	struct smt_p_lem	lem[];		/* phy lem status */
 } ;
-#define SIZEOF_SMT_SIF_OPERATION	(sizeof(struct smt_sif_operation)- \
-					 sizeof(struct smt_p_lem))
 
 /*
  * ECF : echo frame
diff --git a/drivers/net/fddi/skfp/smt.c b/drivers/net/fddi/skfp/smt.c
index 774a6e3b0a67..6b68a53f1b38 100644
--- a/drivers/net/fddi/skfp/smt.c
+++ b/drivers/net/fddi/skfp/smt.c
@@ -1063,9 +1063,9 @@ static void smt_send_sif_operation(struct s_smc *smc, struct fddi_addr *dest,
 #endif
 
 	if (!(mb = smt_build_frame(smc,SMT_SIF_OPER,SMT_REPLY,
-		SIZEOF_SMT_SIF_OPERATION+ports*sizeof(struct smt_p_lem))))
+				   struct_size(sif, lem, ports))))
 		return ;
-	sif = smtod(mb, struct smt_sif_operation *) ;
+	sif = smtod(mb, typeof(sif));
 	smt_fill_timestamp(smc,&sif->ts) ;	/* set time stamp */
 	smt_fill_mac_status(smc,&sif->status) ; /* set mac status */
 	smt_fill_mac_counter(smc,&sif->mc) ; /* set mac counter field */
-- 
cgit v1.2.3


From fdeadd6e49b32115c233e5b090dac419b9922a1b Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Tue, 9 Mar 2021 23:33:16 -0600
Subject: net: mscc: ocelot: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of just letting the code
fall through to the next case.

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot_vcap.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c
index 37a232911395..7945393a0655 100644
--- a/drivers/net/ethernet/mscc/ocelot_vcap.c
+++ b/drivers/net/ethernet/mscc/ocelot_vcap.c
@@ -761,6 +761,7 @@ static void is1_entry_set(struct ocelot *ocelot, int ix,
 			vcap_key_bytes_set(vcap, &data, VCAP_IS1_HK_ETYPE,
 					   etype.value, etype.mask);
 		}
+		break;
 	}
 	default:
 		break;
-- 
cgit v1.2.3


From 097c8ff042e3430a148d624b3c5bf0677dce3007 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Tue, 9 Mar 2021 23:34:53 -0600
Subject: net: 3c509: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of just letting the code
fall through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/3com/3c509.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c
index 53e1f7e07959..96cc5fc36eb5 100644
--- a/drivers/net/ethernet/3com/3c509.c
+++ b/drivers/net/ethernet/3com/3c509.c
@@ -1051,6 +1051,7 @@ el3_netdev_get_ecmd(struct net_device *dev, struct ethtool_link_ksettings *cmd)
 		break;
 	case 3:
 		cmd->base.port = PORT_BNC;
+		break;
 	default:
 		break;
 	}
-- 
cgit v1.2.3


From 2a86b4a7f7ff8413490134104d7bdad7a693deaf Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Tue, 9 Mar 2021 23:37:02 -0600
Subject: net: cassini: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of just letting the code
fall through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sun/cassini.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index 9ff894ba8d3e..54f45d8c79a7 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -1599,6 +1599,7 @@ static inline int cas_mdio_link_not_up(struct cas *cp)
 			cas_phy_write(cp, MII_BMCR, val);
 			break;
 		}
+		break;
 	default:
 		break;
 	}
-- 
cgit v1.2.3


From 4cdbe58b4b3c98153790e0a2dcb1b0ec97d03c9a Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Tue, 9 Mar 2021 23:38:11 -0600
Subject: decnet: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of letting the code fall
through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/dn_route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 2193ae529e75..37c90c1fdc93 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1407,7 +1407,7 @@ static int dn_route_input_slow(struct sk_buff *skb)
 			flags |= RTCF_DOREDIRECT;
 
 		local_src = DN_FIB_RES_PREFSRC(res);
-
+		break;
 	case RTN_BLACKHOLE:
 	case RTN_UNREACHABLE:
 		break;
-- 
cgit v1.2.3


From 5646fba6eaff6b8da23f6e29b88218b4de4dac78 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Tue, 9 Mar 2021 23:39:41 -0600
Subject: net: ax25: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of letting the code fall
through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ax25/af_ax25.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 269ee89d2c2b..2631efc6e359 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -850,6 +850,7 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol,
 		case AX25_P_ROSE:
 			if (ax25_protocol_is_registered(AX25_P_ROSE))
 				return -ESOCKTNOSUPPORT;
+			break;
 #endif
 		default:
 			break;
-- 
cgit v1.2.3


From ecd1c6a51fcc0389d0dd3e3ea70edab3520f487d Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Tue, 9 Mar 2021 23:41:15 -0600
Subject: net: bridge: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of letting the code fall
through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_input.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 222285d9dae2..8875e953ac53 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -144,6 +144,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
 		break;
 	case BR_PKT_UNICAST:
 		dst = br_fdb_find_rcu(br, eth_hdr(skb)->h_dest, vid);
+		break;
 	default:
 		break;
 	}
-- 
cgit v1.2.3


From b1866bfff9223e4d15727e05a865b744a163eff2 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Tue, 9 Mar 2021 23:42:43 -0600
Subject: net: core: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of letting the code fall
through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/dev.c b/net/core/dev.c
index 6c5967e80132..2bfdd528c7c3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5265,6 +5265,7 @@ skip_classify:
 			goto another_round;
 		case RX_HANDLER_EXACT:
 			deliver_exact = true;
+			break;
 		case RX_HANDLER_PASS:
 			break;
 		default:
-- 
cgit v1.2.3


From 90d181ca488f466904ea59dd5c836f766b69c71b Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Tue, 9 Mar 2021 23:43:45 -0600
Subject: net: rose: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix multiple
warnings by explicitly adding multiple break statements instead of
letting the code fall through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rose/rose_route.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 6e35703ff353..c0e04c261a15 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -347,6 +347,7 @@ static int rose_del_node(struct rose_route_struct *rose_route,
 				case 1:
 					rose_node->neighbour[1] =
 						rose_node->neighbour[2];
+					break;
 				case 2:
 					break;
 				}
@@ -508,6 +509,7 @@ void rose_rt_device_down(struct net_device *dev)
 					fallthrough;
 				case 1:
 					t->neighbour[1] = t->neighbour[2];
+					break;
 				case 2:
 					break;
 				}
-- 
cgit v1.2.3


From 71ae2cb30531725dbc53329e2aac42294692a008 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Tue, 9 Mar 2021 23:45:22 -0600
Subject: net: plip: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix multiple
warnings by explicitly adding multiple break statements instead of
letting the code fall through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/plip/plip.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/plip/plip.c b/drivers/net/plip/plip.c
index 4406b353123e..e26cf91bdec2 100644
--- a/drivers/net/plip/plip.c
+++ b/drivers/net/plip/plip.c
@@ -516,6 +516,7 @@ plip_receive(unsigned short nibble_timeout, struct net_device *dev,
 		*data_p |= (c0 << 1) & 0xf0;
 		write_data (dev, 0x00); /* send ACK */
 		*ns_p = PLIP_NB_BEGIN;
+		break;
 	case PLIP_NB_2:
 		break;
 	}
@@ -808,6 +809,7 @@ plip_send_packet(struct net_device *dev, struct net_local *nl,
 				return HS_TIMEOUT;
 			}
 		}
+		break;
 
 	case PLIP_PK_LENGTH_LSB:
 		if (plip_send(nibble_timeout, dev,
-- 
cgit v1.2.3


From 134639e9cc0f921c2c3ad44a5bea00219f8b0ada Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Wed, 10 Mar 2021 00:07:01 -0600
Subject: qed: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix multiple
warnings by explicitly adding a couple of break statements instead of
just letting the code fall through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Reviewed-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_l2.c    | 1 +
 drivers/net/ethernet/qlogic/qed/qed_sriov.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 07824bf9d68d..dfaf10edfabf 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -396,6 +396,7 @@ int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn,
 		tpa_param->tpa_ipv6_en_flg = 1;
 		tpa_param->tpa_pkt_split_flg = 1;
 		tpa_param->tpa_gro_consistent_flg = 1;
+		break;
 	default:
 		break;
 	}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index b8dc5c4591ef..ed2b6fe5a78d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -4734,6 +4734,7 @@ void qed_inform_vf_link_state(struct qed_hwfn *hwfn)
 			 */
 			link.speed = (hwfn->cdev->num_hwfns > 1) ?
 				     100000 : 40000;
+			break;
 		default:
 			/* In auto mode pass PF link image to VF */
 			break;
-- 
cgit v1.2.3


From 1ddc3229ad3c40840c24a699ada5cfeb4319b578 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 10 Mar 2021 16:28:58 +0800
Subject: skbuff: remove some unnecessary operation in skb_segment_list()

gro list uses skb_shinfo(skb)->frag_list to link two skb together,
and NAPI_GRO_CB(p)->last->next is used when there are more skb,
see skb_gro_receive_list(). gso expects that each segmented skb is
linked together using skb->next, so only the first skb->next need
to set to skb_shinfo(skb)-> frag_list when doing gso list segment.

It is the same reason that nskb->next does not need to be set to
list_skb before goto the error handling, because nskb->next already
pointers to list_skb.

And nskb is also the last skb at the end of loop, so remove tail
variable and use nskb instead.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c421c8f80925..e8320b5d651a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3732,13 +3732,13 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
 	unsigned int tnl_hlen = skb_tnl_header_len(skb);
 	unsigned int delta_truesize = 0;
 	unsigned int delta_len = 0;
-	struct sk_buff *tail = NULL;
 	struct sk_buff *nskb, *tmp;
 	int err;
 
 	skb_push(skb, -skb_network_offset(skb) + offset);
 
 	skb_shinfo(skb)->frag_list = NULL;
+	skb->next = list_skb;
 
 	do {
 		nskb = list_skb;
@@ -3756,17 +3756,8 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
 			}
 		}
 
-		if (!tail)
-			skb->next = nskb;
-		else
-			tail->next = nskb;
-
-		if (unlikely(err)) {
-			nskb->next = list_skb;
+		if (unlikely(err))
 			goto err_linearize;
-		}
-
-		tail = nskb;
 
 		delta_len += nskb->len;
 		delta_truesize += nskb->truesize;
@@ -3793,7 +3784,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
 
 	skb_gso_reset(skb);
 
-	skb->prev = tail;
+	skb->prev = nskb;
 
 	if (skb_needs_linearize(skb, features) &&
 	    __skb_linearize(skb))
-- 
cgit v1.2.3


From 3559c1ea4336636c886002996d50805365d3055c Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <rafal@milecki.pl>
Date: Wed, 10 Mar 2021 09:48:13 +0100
Subject: net: broadcom: bcm4908_enet: read MAC from OF
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BCM4908 devices have MAC address accessible using NVMEM so it's needed
to use OF helper for reading it.

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bcm4908_enet.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bcm4908_enet.c b/drivers/net/ethernet/broadcom/bcm4908_enet.c
index 98cf82dea3e4..199da299cbd0 100644
--- a/drivers/net/ethernet/broadcom/bcm4908_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm4908_enet.c
@@ -9,6 +9,7 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_net.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/string.h>
@@ -620,6 +621,7 @@ static int bcm4908_enet_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct net_device *netdev;
 	struct bcm4908_enet *enet;
+	const u8 *mac;
 	int err;
 
 	netdev = devm_alloc_etherdev(dev, sizeof(*enet));
@@ -647,7 +649,11 @@ static int bcm4908_enet_probe(struct platform_device *pdev)
 		return err;
 
 	SET_NETDEV_DEV(netdev, &pdev->dev);
-	eth_hw_addr_random(netdev);
+	mac = of_get_mac_address(dev->of_node);
+	if (!IS_ERR(mac))
+		ether_addr_copy(netdev->dev_addr, mac);
+	else
+		eth_hw_addr_random(netdev);
 	netdev->netdev_ops = &bcm4908_enet_netdev_ops;
 	netdev->min_mtu = ETH_ZLEN;
 	netdev->mtu = ETH_DATA_LEN;
-- 
cgit v1.2.3


From 762c1adb1c15655307b079393f14f098d135f051 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Wed, 10 Mar 2021 16:53:04 +0800
Subject: isdn: mISDN: remove unneeded variable 'ret'

Fix the following coccicheck warning:
./drivers/isdn/mISDN/dsp_core.c:956:6-9: Unneeded variable: "err".
Return "0" on line 1001

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/mISDN/dsp_core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c
index 038e72a84b33..4946ea14bf74 100644
--- a/drivers/isdn/mISDN/dsp_core.c
+++ b/drivers/isdn/mISDN/dsp_core.c
@@ -953,7 +953,6 @@ dsp_ctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
 {
 	struct dsp		*dsp = container_of(ch, struct dsp, ch);
 	u_long		flags;
-	int		err = 0;
 
 	if (debug & DEBUG_DSP_CTRL)
 		printk(KERN_DEBUG "%s:(%x)\n", __func__, cmd);
@@ -998,7 +997,7 @@ dsp_ctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
 		module_put(THIS_MODULE);
 		break;
 	}
-	return err;
+	return 0;
 }
 
 static void
-- 
cgit v1.2.3


From 58c04397f74b7e949751663ac1dbfb4e964c389d Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 10 Mar 2021 12:33:20 +0200
Subject: sched: act_sample: Implement stats_update callback

Implement this callback in order to get the offloaded stats added to the
kernel stats.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_sample.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 3ebf9ede3cf1..db8ee9e5c8c2 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -194,6 +194,16 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
 	return retval;
 }
 
+static void tcf_sample_stats_update(struct tc_action *a, u64 bytes, u64 packets,
+				    u64 drops, u64 lastuse, bool hw)
+{
+	struct tcf_sample *s = to_sample(a);
+	struct tcf_t *tm = &s->tcf_tm;
+
+	tcf_action_update_stats(a, bytes, packets, drops, hw);
+	tm->lastuse = max_t(u64, tm->lastuse, lastuse);
+}
+
 static int tcf_sample_dump(struct sk_buff *skb, struct tc_action *a,
 			   int bind, int ref)
 {
@@ -280,6 +290,7 @@ static struct tc_action_ops act_sample_ops = {
 	.id	  = TCA_ID_SAMPLE,
 	.owner	  = THIS_MODULE,
 	.act	  = tcf_sample_act,
+	.stats_update = tcf_sample_stats_update,
 	.dump	  = tcf_sample_dump,
 	.init	  = tcf_sample_init,
 	.cleanup  = tcf_sample_cleanup,
-- 
cgit v1.2.3


From e56763ee50a3f28d2f70355ca43fb78d8539a183 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@orcam.me.uk>
Date: Wed, 10 Mar 2021 13:02:42 +0100
Subject: FDDI: if_fddi.h: Update my e-mail address

Following the recent update to MAINTAINERS update my e-mail address.

Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_fddi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/if_fddi.h b/include/uapi/linux/if_fddi.h
index 7239aa9c0766..8df2d9934bcd 100644
--- a/include/uapi/linux/if_fddi.h
+++ b/include/uapi/linux/if_fddi.h
@@ -9,7 +9,7 @@
  * Version:	@(#)if_fddi.h	1.0.3	Oct  6 2018
  *
  * Author:	Lawrence V. Stefani, <stefani@yahoo.com>
- * Maintainer:	Maciej W. Rozycki, <macro@linux-mips.org>
+ * Maintainer:	Maciej W. Rozycki, <macro@orcam.me.uk>
  *
  *		if_fddi.h is based on previous if_ether.h and if_tr.h work by
  *			Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
-- 
cgit v1.2.3


From 4d248c0deea53af92c170cdfb1a127c07ccc190d Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@orcam.me.uk>
Date: Wed, 10 Mar 2021 13:02:49 +0100
Subject: FDDI: defxx: Update my e-mail address

Following the recent update to MAINTAINERS update my e-mail address.

Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/fddi/defxx.c | 2 +-
 drivers/net/fddi/defxx.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/fddi/defxx.c b/drivers/net/fddi/defxx.c
index 077c68498f04..98302356dcb4 100644
--- a/drivers/net/fddi/defxx.c
+++ b/drivers/net/fddi/defxx.c
@@ -21,7 +21,7 @@
  *   LVS	Lawrence V. Stefani <lstefani@yahoo.com>
  *
  * Maintainers:
- *   macro	Maciej W. Rozycki <macro@linux-mips.org>
+ *   macro	Maciej W. Rozycki <macro@orcam.me.uk>
  *
  * Credits:
  *   I'd like to thank Patricia Cross for helping me get started with
diff --git a/drivers/net/fddi/defxx.h b/drivers/net/fddi/defxx.h
index 9d30fde2ef3c..4c31d31e0a3f 100644
--- a/drivers/net/fddi/defxx.h
+++ b/drivers/net/fddi/defxx.h
@@ -16,7 +16,7 @@
  *   LVS	Lawrence V. Stefani <lstefani@yahoo.com>
  *
  * Maintainers:
- *   macro	Maciej W. Rozycki <macro@linux-mips.org>
+ *   macro	Maciej W. Rozycki <macro@orcam.me.uk>
  *
  * Modification History:
  *		Date		Name	Description
-- 
cgit v1.2.3


From aa27b8f7a048f1cf888b1cb67160e26e069a263c Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@orcam.me.uk>
Date: Wed, 10 Mar 2021 13:02:54 +0100
Subject: FDDI: defza: Update my e-mail address

Following the recent update to MAINTAINERS update my e-mail address.

Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/device_drivers/fddi/defza.rst | 2 +-
 drivers/net/fddi/defza.c                               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/device_drivers/fddi/defza.rst b/Documentation/networking/device_drivers/fddi/defza.rst
index 73c2f793ea26..7393f33ea705 100644
--- a/Documentation/networking/device_drivers/fddi/defza.rst
+++ b/Documentation/networking/device_drivers/fddi/defza.rst
@@ -60,4 +60,4 @@ To do:
 
 Both success and failure reports are welcome.
 
-Maciej W. Rozycki  <macro@linux-mips.org>
+Maciej W. Rozycki  <macro@orcam.me.uk>
diff --git a/drivers/net/fddi/defza.c b/drivers/net/fddi/defza.c
index eaf85db53a5e..14f07050b6b1 100644
--- a/drivers/net/fddi/defza.c
+++ b/drivers/net/fddi/defza.c
@@ -60,7 +60,7 @@
 static const char version[] =
 	DRV_NAME ": " DRV_VERSION "  " DRV_RELDATE "  Maciej W. Rozycki\n";
 
-MODULE_AUTHOR("Maciej W. Rozycki <macro@linux-mips.org>");
+MODULE_AUTHOR("Maciej W. Rozycki <macro@orcam.me.uk>");
 MODULE_DESCRIPTION("DEC FDDIcontroller 700 (DEFZA-xx) driver");
 MODULE_LICENSE("GPL");
 
-- 
cgit v1.2.3


From 847cbfc014adafeac401e19e349b0fd524f201c3 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Wed, 10 Mar 2021 16:50:44 +0200
Subject: net: add a helper to avoid issues with HW TX timestamping and
 SO_TXTIME

As explained in commit 29d98f54a4fe ("net: enetc: allow hardware
timestamping on TX queues with tc-etf enabled"), hardware TX
timestamping requires an skb with skb->tstamp = 0. When a packet is sent
with SO_TXTIME, the skb->skb_mstamp_ns corrupts the value of skb->tstamp,
so the drivers need to explicitly reset skb->tstamp to zero after
consuming the TX time.

Create a helper named skb_txtime_consumed() which does just that. All
drivers which offload TC_SETUP_QDISC_ETF should implement it, and it
would make it easier to assess during review whether they do the right
thing in order to be compatible with hardware timestamping or not.

Suggested-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 8 ++------
 drivers/net/ethernet/intel/igb/igb_main.c    | 2 +-
 drivers/net/ethernet/intel/igc/igc_main.c    | 2 +-
 include/net/pkt_sched.h                      | 9 +++++++++
 4 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 09471329f3a3..89d2cb348271 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -5,6 +5,7 @@
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/vmalloc.h>
+#include <net/pkt_sched.h>
 
 /* ENETC overhead: optional extension BD + 1 BD gap */
 #define ENETC_TXBDS_NEEDED(val)	((val) + 2)
@@ -344,12 +345,7 @@ static void enetc_tstamp_tx(struct sk_buff *skb, u64 tstamp)
 	if (skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) {
 		memset(&shhwtstamps, 0, sizeof(shhwtstamps));
 		shhwtstamps.hwtstamp = ns_to_ktime(tstamp);
-		/* Ensure skb_mstamp_ns, which might have been populated with
-		 * the txtime, is not mistaken for a software timestamp,
-		 * because this will prevent the dispatch of our hardware
-		 * timestamp to the socket.
-		 */
-		skb->tstamp = ktime_set(0, 0);
+		skb_txtime_consumed(skb);
 		skb_tstamp_tx(skb, &shhwtstamps);
 	}
 }
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 878b31d534ec..369533feb4f2 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -5856,7 +5856,7 @@ static void igb_tx_ctxtdesc(struct igb_ring *tx_ring,
 	 */
 	if (tx_ring->launchtime_enable) {
 		ts = ktime_to_timespec64(first->skb->tstamp);
-		first->skb->tstamp = ktime_set(0, 0);
+		skb_txtime_consumed(first->skb);
 		context_desc->seqnum_seed = cpu_to_le32(ts.tv_nsec / 32);
 	} else {
 		context_desc->seqnum_seed = 0;
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 7ac9597ddb84..059ffcfb0bda 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -941,7 +941,7 @@ static void igc_tx_ctxtdesc(struct igc_ring *tx_ring,
 		struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
 		ktime_t txtime = first->skb->tstamp;
 
-		first->skb->tstamp = ktime_set(0, 0);
+		skb_txtime_consumed(first->skb);
 		context_desc->launch_time = igc_tx_launchtime(adapter,
 							      txtime);
 	} else {
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 15b1b30f454e..f5c1bee0cd6a 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -188,4 +188,13 @@ struct tc_taprio_qopt_offload *taprio_offload_get(struct tc_taprio_qopt_offload
 						  *offload);
 void taprio_offload_free(struct tc_taprio_qopt_offload *offload);
 
+/* Ensure skb_mstamp_ns, which might have been populated with the txtime, is
+ * not mistaken for a software timestamp, because this will otherwise prevent
+ * the dispatch of hardware timestamps to the socket.
+ */
+static inline void skb_txtime_consumed(struct sk_buff *skb)
+{
+	skb->tstamp = ktime_set(0, 0);
+}
+
 #endif
-- 
cgit v1.2.3


From 13fdb9403d9ec41ab237d039db6c263788a2b587 Mon Sep 17 00:00:00 2001
From: Shubhankar Kuranagatti <shubhankarvk@gmail.com>
Date: Thu, 11 Mar 2021 02:03:14 +0530
Subject: net: ipv6: route.c:fix indentation

The series of space has been replaced by tab space
wherever required.

Signed-off-by: Shubhankar Kuranagatti <shubhankarvk@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1536f4948e86..60058f3dcc48 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2378,7 +2378,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
 
 			memset(&hash_keys, 0, sizeof(hash_keys));
 
-                        if (!flkeys) {
+			if (!flkeys) {
 				skb_flow_dissect_flow_keys(skb, &keys, flag);
 				flkeys = &keys;
 			}
@@ -2518,20 +2518,20 @@ struct dst_entry *ip6_route_output_flags(struct net *net,
 					 struct flowi6 *fl6,
 					 int flags)
 {
-        struct dst_entry *dst;
-        struct rt6_info *rt6;
+	struct dst_entry *dst;
+	struct rt6_info *rt6;
 
-        rcu_read_lock();
-        dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
-        rt6 = (struct rt6_info *)dst;
-        /* For dst cached in uncached_list, refcnt is already taken. */
-        if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) {
-                dst = &net->ipv6.ip6_null_entry->dst;
-                dst_hold(dst);
-        }
-        rcu_read_unlock();
+	rcu_read_lock();
+	dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
+	rt6 = (struct rt6_info *)dst;
+	/* For dst cached in uncached_list, refcnt is already taken. */
+	if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) {
+		dst = &net->ipv6.ip6_null_entry->dst;
+		dst_hold(dst);
+	}
+	rcu_read_unlock();
 
-        return dst;
+	return dst;
 }
 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
 
-- 
cgit v1.2.3


From 825e8885779d62c34eb78769fe32caab92b08a98 Mon Sep 17 00:00:00 2001
From: Danielle Ratson <danieller@nvidia.com>
Date: Wed, 10 Mar 2021 13:02:15 +0200
Subject: mlxsw: spectrum: Reword an error message for Q-in-Q veto

'Uppers' is not clear enough for all users when referring to upper
devices.

Reword the error message so it will be clearer.

Signed-off-by: Danielle Ratson <danieller@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 1650d9852b5b..5d2b965f934f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4283,7 +4283,7 @@ static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev,
 		if (br_vlan_enabled(br_dev)) {
 			br_vlan_get_proto(br_dev, &proto);
 			if (proto == ETH_P_8021AD) {
-				NL_SET_ERR_MSG_MOD(extack, "Uppers are not supported on top of an 802.1ad bridge");
+				NL_SET_ERR_MSG_MOD(extack, "Upper devices are not supported on top of an 802.1ad bridge");
 				return -EOPNOTSUPP;
 			}
 		}
-- 
cgit v1.2.3


From 675e5a1e1afa4cb20337f83c9e885cae977c87cc Mon Sep 17 00:00:00 2001
From: Amit Cohen <amcohen@nvidia.com>
Date: Wed, 10 Mar 2021 13:02:16 +0200
Subject: mlxsw: reg: Fix comment about slot_index field in PMAOS register

The comment did not include the register name.
Add `pmaos` to align the comment with other comments.

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index c4adc7f740d3..afd42907092f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -5637,7 +5637,7 @@ static inline void mlxsw_reg_pspa_pack(char *payload, u8 swid, u8 local_port)
 
 MLXSW_REG_DEFINE(pmaos, MLXSW_REG_PMAOS_ID, MLXSW_REG_PMAOS_LEN);
 
-/* reg_slot_index
+/* reg_pmaos_slot_index
  * Slot index.
  * Access: Index
  */
-- 
cgit v1.2.3


From 2ab781c2ccf477012489e30ad0f1c1aa42398437 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Wed, 10 Mar 2021 13:02:17 +0200
Subject: mlxsw: spectrum: Bump minimum FW version to xx.2008.2406

The indicated version fixes the following two issues:

- MIRROR_SAMPLER_ACTION.mirror_probability_rate inverted. This has
  implication for per-flow sampling.

- When adjacency is replaced-if-inactive (RATR.opcode=3), bad parameter
  was reported when replacing an active entry. This breaks offload of
  resilient next-hop groups.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 5d2b965f934f..dbf95e52b341 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -45,7 +45,7 @@
 
 #define MLXSW_SP1_FWREV_MAJOR 13
 #define MLXSW_SP1_FWREV_MINOR 2008
-#define MLXSW_SP1_FWREV_SUBMINOR 2018
+#define MLXSW_SP1_FWREV_SUBMINOR 2406
 #define MLXSW_SP1_FWREV_CAN_RESET_MINOR 1702
 
 static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = {
@@ -62,7 +62,7 @@ static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = {
 
 #define MLXSW_SP2_FWREV_MAJOR 29
 #define MLXSW_SP2_FWREV_MINOR 2008
-#define MLXSW_SP2_FWREV_SUBMINOR 2018
+#define MLXSW_SP2_FWREV_SUBMINOR 2406
 
 static const struct mlxsw_fw_rev mlxsw_sp2_fw_rev = {
 	.major = MLXSW_SP2_FWREV_MAJOR,
@@ -77,7 +77,7 @@ static const struct mlxsw_fw_rev mlxsw_sp2_fw_rev = {
 
 #define MLXSW_SP3_FWREV_MAJOR 30
 #define MLXSW_SP3_FWREV_MINOR 2008
-#define MLXSW_SP3_FWREV_SUBMINOR 2018
+#define MLXSW_SP3_FWREV_SUBMINOR 2406
 
 static const struct mlxsw_fw_rev mlxsw_sp3_fw_rev = {
 	.major = MLXSW_SP3_FWREV_MAJOR,
-- 
cgit v1.2.3


From ff12ba3ad78db47f8c824841259c7cc188f37f19 Mon Sep 17 00:00:00 2001
From: Danielle Ratson <danieller@nvidia.com>
Date: Wed, 10 Mar 2021 13:02:18 +0200
Subject: mlxsw: reg: Extend MFDE register with new log_ip field

Extend MFDE (Monitoring FW Debug) register with new field specifying the
instruction pointer that triggered the CR space timeout.

Signed-off-by: Danielle Ratson <danieller@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index afd42907092f..a042ff79d306 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -10979,6 +10979,13 @@ MLXSW_ITEM32(reg, mfde, log_address, 0x10, 0, 32);
  */
 MLXSW_ITEM32(reg, mfde, log_id, 0x14, 0, 4);
 
+/* reg_mfde_log_ip
+ * IP (instruction pointer) that triggered the timeout.
+ * Valid in case event_id == MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, mfde, log_ip, 0x18, 0, 64);
+
 /* reg_mfde_pipes_mask
  * Bit per kvh pipe.
  * Access: RO
-- 
cgit v1.2.3


From 315afd2068a861650510f31cfed11c138062172a Mon Sep 17 00:00:00 2001
From: Danielle Ratson <danieller@nvidia.com>
Date: Wed, 10 Mar 2021 13:02:19 +0200
Subject: mlxsw: core: Expose MFDE.log_ip to devlink health

Add the MFDE.log_ip field to devlink health reporter in order to ease
firmware debug. This field encodes the instruction pointer that triggered
the CR space timeout.

Signed-off-by: Danielle Ratson <danieller@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/core.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 52fdc34251ba..c53461ac4e10 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1806,6 +1806,10 @@ static int mlxsw_core_health_fw_fatal_dump(struct devlink_health_reporter *repor
 		err = devlink_fmsg_u8_pair_put(fmsg, "log_irisc_id", val);
 		if (err)
 			return err;
+		val = mlxsw_reg_mfde_log_ip_get(mfde_pl);
+		err = devlink_fmsg_u64_pair_put(fmsg, "log_ip", val);
+		if (err)
+			return err;
 	} else if (event_id == MLXSW_REG_MFDE_EVENT_ID_KVD_IM_STOP) {
 		val = mlxsw_reg_mfde_pipes_mask_get(mfde_pl);
 		err = devlink_fmsg_u32_pair_put(fmsg, "pipes_mask", val);
-- 
cgit v1.2.3


From 4734a750f4674631ab9896189810b57700597aa7 Mon Sep 17 00:00:00 2001
From: Danielle Ratson <danieller@nvidia.com>
Date: Wed, 10 Mar 2021 13:02:20 +0200
Subject: mlxsw: Adjust some MFDE fields shift and size to fw implementation

MFDE.irisc_id and MFDE.event_id were adjusted according to what is
actually implemented in firmware.

Adjust the shift and size of these fields in mlxsw as well.

Note that the displacement of the first field is not a regression.
It was always incorrect and therefore reported "0".

Signed-off-by: Danielle Ratson <danieller@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/core.c | 2 +-
 drivers/net/ethernet/mellanox/mlxsw/reg.h  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index c53461ac4e10..7e9a7cb31720 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1728,7 +1728,7 @@ static int mlxsw_core_health_fw_fatal_dump(struct devlink_health_reporter *repor
 		return err;
 
 	event_id = mlxsw_reg_mfde_event_id_get(mfde_pl);
-	err = devlink_fmsg_u8_pair_put(fmsg, "id", event_id);
+	err = devlink_fmsg_u32_pair_put(fmsg, "id", event_id);
 	if (err)
 		return err;
 	switch (event_id) {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index a042ff79d306..2f7f691f85ff 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -10919,7 +10919,7 @@ MLXSW_REG_DEFINE(mfde, MLXSW_REG_MFDE_ID, MLXSW_REG_MFDE_LEN);
  * Which irisc triggered the event
  * Access: RO
  */
-MLXSW_ITEM32(reg, mfde, irisc_id, 0x00, 8, 4);
+MLXSW_ITEM32(reg, mfde, irisc_id, 0x00, 24, 8);
 
 enum mlxsw_reg_mfde_event_id {
 	MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO = 1,
@@ -10930,7 +10930,7 @@ enum mlxsw_reg_mfde_event_id {
 /* reg_mfde_event_id
  * Access: RO
  */
-MLXSW_ITEM32(reg, mfde, event_id, 0x00, 0, 8);
+MLXSW_ITEM32(reg, mfde, event_id, 0x00, 0, 16);
 
 enum mlxsw_reg_mfde_method {
 	MLXSW_REG_MFDE_METHOD_QUERY,
-- 
cgit v1.2.3


From f626ca682912fab55dff15469ce893ae16b65c7e Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@orcam.me.uk>
Date: Wed, 10 Mar 2021 13:03:09 +0100
Subject: FDDI: defxx: Bail out gracefully with unassigned PCI resource for CSR

Recent versions of the PCI Express specification have deprecated support
for I/O transactions and actually some PCIe host bridges, such as Power
Systems Host Bridge 4 (PHB4), do not implement them.

For those systems the PCI BARs that request a mapping in the I/O space
have the length recorded in the corresponding PCI resource set to zero,
which makes it unassigned:

# lspci -s 0031:02:04.0 -v
0031:02:04.0 FDDI network controller: Digital Equipment Corporation PCI-to-PDQ Interface Chip [PFI] FDDI (DEFPA) (rev 02)
	Subsystem: Digital Equipment Corporation FDDIcontroller/PCI (DEFPA)
	Flags: bus master, medium devsel, latency 136, IRQ 57, NUMA node 8
	Memory at 620c080020000 (32-bit, non-prefetchable) [size=128]
	I/O ports at <unassigned> [disabled]
	Memory at 620c080030000 (32-bit, non-prefetchable) [size=64K]
	Capabilities: [50] Power Management version 2
	Kernel driver in use: defxx
	Kernel modules: defxx

#

Regardless the driver goes ahead and requests it (here observed with a
Raptor Talos II POWER9 system), resulting in an odd /proc/ioport entry:

# cat /proc/ioports
00000000-ffffffffffffffff : 0031:02:04.0
#

Furthermore, the system gets confused as the driver actually continues
and pokes at those locations, causing a flood of messages being output
to the system console by the underlying system firmware, like:

defxx: v1.11 2014/07/01  Lawrence V. Stefani and others
defxx 0031:02:04.0: enabling device (0140 -> 0142)
LPC[000]: Got SYNC no-response error. Error address reg: 0xd0010000
IPMI: dropping non severe PEL event
LPC[000]: Got SYNC no-response error. Error address reg: 0xd0010014
IPMI: dropping non severe PEL event
LPC[000]: Got SYNC no-response error. Error address reg: 0xd0010014
IPMI: dropping non severe PEL event

and so on and so on (possibly intermixed actually, as there's no locking
between the kernel and the firmware in console port access with this
particular system, but cleaned up above for clarity), and once some 10k
of such pairs of the latter two messages have been produced an interace
eventually shows up in a useless state:

0031:02:04.0: DEFPA at I/O addr = 0x0, IRQ = 57, Hardware addr = 00-00-00-00-00-00

This was not expected to happen as resource handling was added to the
driver a while ago, because it was not known at that time that a PCI
system would be possible that cannot assign port I/O resources, and
oddly enough `request_region' does not fail, which would have caught it.

Correct the problem then by checking for the length of zero for the CSR
resource and bail out gracefully refusing to register an interface if
that turns out to be the case, producing messages like:

defxx: v1.11 2014/07/01  Lawrence V. Stefani and others
0031:02:04.0: Cannot use I/O, no address set, aborting
0031:02:04.0: Recompile driver with "CONFIG_DEFXX_MMIO=y"

Keep the original check for the EISA MMIO resource as implemented,
because in that case the length is hardwired to 0x400 as a consequence
of how the compare/mask address decoding works in the ESIC chip and it
is only the base address that is set to zero if MMIO has been disabled
for the adapter in EISA configuration, which in turn could be a valid
bus address in a legacy-free system implementing PCI, especially for
port I/O.

Where the EISA MMIO resource has been disabled for the adapter in EISA
configuration this arrangement keeps producing messages like:

eisa 00:05: EISA: slot 5: DEC3002 detected
defxx: v1.11 2014/07/01  Lawrence V. Stefani and others
00:05: Cannot use MMIO, no address set, aborting
00:05: Recompile driver with "CONFIG_DEFXX_MMIO=n"
00:05: Or run ECU and set adapter's MMIO location

with the last two lines now swapped for easier handling in the driver.

There is no need to check for and catch the case of a port I/O resource
not having been assigned for EISA as the adapter uses the slot-specific
I/O space, which gets assigned by how EISA has been specified and maps
directly to the particular slot an option card has been placed in.  And
the EISA variant of the adapter has additional registers that are only
accessible via the port I/O space anyway.

While at it factor out the error message calls into helpers and fix an
argument order bug with the `pr_err' call now in `dfx_register_res_err'.

Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
Fixes: 4d0438e56a8f ("defxx: Clean up DEFEA resource management")
Cc: stable@vger.kernel.org # v3.19+
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/fddi/defxx.c | 47 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 30 insertions(+), 17 deletions(-)

diff --git a/drivers/net/fddi/defxx.c b/drivers/net/fddi/defxx.c
index 98302356dcb4..c62d4b539e24 100644
--- a/drivers/net/fddi/defxx.c
+++ b/drivers/net/fddi/defxx.c
@@ -495,6 +495,25 @@ static const struct net_device_ops dfx_netdev_ops = {
 	.ndo_set_mac_address	= dfx_ctl_set_mac_address,
 };
 
+static void dfx_register_res_alloc_err(const char *print_name, bool mmio,
+				       bool eisa)
+{
+	pr_err("%s: Cannot use %s, no address set, aborting\n",
+	       print_name, mmio ? "MMIO" : "I/O");
+	pr_err("%s: Recompile driver with \"CONFIG_DEFXX_MMIO=%c\"\n",
+	       print_name, mmio ? 'n' : 'y');
+	if (eisa && mmio)
+		pr_err("%s: Or run ECU and set adapter's MMIO location\n",
+		       print_name);
+}
+
+static void dfx_register_res_err(const char *print_name, bool mmio,
+				 unsigned long start, unsigned long len)
+{
+	pr_err("%s: Cannot reserve %s resource 0x%lx @ 0x%lx, aborting\n",
+	       print_name, mmio ? "MMIO" : "I/O", len, start);
+}
+
 /*
  * ================
  * = dfx_register =
@@ -568,15 +587,12 @@ static int dfx_register(struct device *bdev)
 	dev_set_drvdata(bdev, dev);
 
 	dfx_get_bars(bdev, bar_start, bar_len);
-	if (dfx_bus_eisa && dfx_use_mmio && bar_start[0] == 0) {
-		pr_err("%s: Cannot use MMIO, no address set, aborting\n",
-		       print_name);
-		pr_err("%s: Run ECU and set adapter's MMIO location\n",
-		       print_name);
-		pr_err("%s: Or recompile driver with \"CONFIG_DEFXX_MMIO=n\""
-		       "\n", print_name);
+	if (bar_len[0] == 0 ||
+	    (dfx_bus_eisa && dfx_use_mmio && bar_start[0] == 0)) {
+		dfx_register_res_alloc_err(print_name, dfx_use_mmio,
+					   dfx_bus_eisa);
 		err = -ENXIO;
-		goto err_out;
+		goto err_out_disable;
 	}
 
 	if (dfx_use_mmio)
@@ -585,18 +601,16 @@ static int dfx_register(struct device *bdev)
 	else
 		region = request_region(bar_start[0], bar_len[0], print_name);
 	if (!region) {
-		pr_err("%s: Cannot reserve %s resource 0x%lx @ 0x%lx, "
-		       "aborting\n", dfx_use_mmio ? "MMIO" : "I/O", print_name,
-		       (long)bar_len[0], (long)bar_start[0]);
+		dfx_register_res_err(print_name, dfx_use_mmio,
+				     bar_start[0], bar_len[0]);
 		err = -EBUSY;
 		goto err_out_disable;
 	}
 	if (bar_start[1] != 0) {
 		region = request_region(bar_start[1], bar_len[1], print_name);
 		if (!region) {
-			pr_err("%s: Cannot reserve I/O resource "
-			       "0x%lx @ 0x%lx, aborting\n", print_name,
-			       (long)bar_len[1], (long)bar_start[1]);
+			dfx_register_res_err(print_name, 0,
+					     bar_start[1], bar_len[1]);
 			err = -EBUSY;
 			goto err_out_csr_region;
 		}
@@ -604,9 +618,8 @@ static int dfx_register(struct device *bdev)
 	if (bar_start[2] != 0) {
 		region = request_region(bar_start[2], bar_len[2], print_name);
 		if (!region) {
-			pr_err("%s: Cannot reserve I/O resource "
-			       "0x%lx @ 0x%lx, aborting\n", print_name,
-			       (long)bar_len[2], (long)bar_start[2]);
+			dfx_register_res_err(print_name, 0,
+					     bar_start[2], bar_len[2]);
 			err = -EBUSY;
 			goto err_out_bh_region;
 		}
-- 
cgit v1.2.3


From 193ced4a79599352d63cb8c9e2f0c6043106eb6a Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@orcam.me.uk>
Date: Wed, 10 Mar 2021 13:03:14 +0100
Subject: FDDI: defxx: Make MMIO the configuration default except for EISA

Recent versions of the PCI Express specification have deprecated support
for I/O transactions and actually some PCIe host bridges, such as Power
Systems Host Bridge 4 (PHB4), do not implement them.

The default kernel configuration choice for the defxx driver is the use
of I/O ports rather than MMIO for PCI and EISA systems.  It may have
made sense as a conservative backwards compatible choice back when MMIO
operation support was added to the driver as a part of TURBOchannel bus
support.  However nowadays this configuration choice makes the driver
unusable with systems that do not implement I/O transactions for PCIe.

Make DEFXX_MMIO the configuration default then, except where configured
for EISA.  This exception is because an EISA adapter can have its MMIO
decoding disabled with ECU (EISA Configuration Utility) and therefore
not available with the resource allocation infrastructure we implement,
while port I/O is always readily available as it uses slot-specific
addressing, directly mapped to the slot an option card has been placed
in and handled with our EISA bus support core.  Conversely a kernel that
supports modern systems which may not have I/O transactions implemented
for PCIe will usually not be expected to handle legacy EISA systems.

The change of the default will make it easier for people, including but
not limited to distribution packagers, to make a working choice for the
driver.

Update the option description accordingly and while at it replace the
potentially ambiguous PIO acronym with IOP for "port I/O" vs "I/O ports"
according to our nomenclature used elsewhere.

Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
Fixes: e89a2cfb7d7b ("[TC] defxx: TURBOchannel support")
Cc: stable@vger.kernel.org # v2.6.21+
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/fddi/Kconfig | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/net/fddi/Kconfig b/drivers/net/fddi/Kconfig
index f722079dfb6a..f99c1048c97e 100644
--- a/drivers/net/fddi/Kconfig
+++ b/drivers/net/fddi/Kconfig
@@ -40,17 +40,20 @@ config DEFXX
 
 config DEFXX_MMIO
 	bool
-	prompt "Use MMIO instead of PIO" if PCI || EISA
+	prompt "Use MMIO instead of IOP" if PCI || EISA
 	depends on DEFXX
-	default n if PCI || EISA
+	default n if EISA
 	default y
 	help
 	  This instructs the driver to use EISA or PCI memory-mapped I/O
-	  (MMIO) as appropriate instead of programmed I/O ports (PIO).
+	  (MMIO) as appropriate instead of programmed I/O ports (IOP).
 	  Enabling this gives an improvement in processing time in parts
-	  of the driver, but it may cause problems with EISA (DEFEA)
-	  adapters.  TURBOchannel does not have the concept of I/O ports,
-	  so MMIO is always used for these (DEFTA) adapters.
+	  of the driver, but it requires a memory window to be configured
+	  for EISA (DEFEA) adapters that may not always be available.
+	  Conversely some PCIe host bridges do not support IOP, so MMIO
+	  may be required to access PCI (DEFPA) adapters on downstream PCI
+	  buses with some systems.  TURBOchannel does not have the concept
+	  of I/O ports, so MMIO is always used for these (DEFTA) adapters.
 
 	  If unsure, say N.
 
-- 
cgit v1.2.3


From 795e272e54746e97fde54454873d384d5012cc9d Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@orcam.me.uk>
Date: Wed, 10 Mar 2021 13:03:19 +0100
Subject: FDDI: defxx: Implement dynamic CSR I/O address space selection

Recent versions of the PCI Express specification have deprecated support
for I/O transactions and actually some PCIe host bridges, such as Power
Systems Host Bridge 4 (PHB4), do not implement them.  Conversely a DEFEA
adapter can have its MMIO decoding disabled with ECU (EISA Configuration
Utility) and therefore not available for us with the resource allocation
infrastructure we implement.

However either I/O address space will always be available for use with
the DEFEA (EISA) and DEFPA (PCI) adapters and both have double address
decoding implemented in hardware for Control and Status Register access.
The two kinds of adapters can be present both at once in a single mixed
PCI/EISA system.  For the DEFTA (TURBOchannel) variant there is no issue
as there has been no port I/O address space defined for that bus.

To make people's life easier and the driver more robust remove the
DEFXX_MMIO configuration option so as to rather than making the choice
for the I/O address space to use at build time for all the adapters
installed in the system let the driver choose the most suitable address
space dynamically on a case-by-case basis at run time.  Make MMIO the
default and resort to port I/O should the default fail for some reason.

This way multiple adapters installed in one system can use different I/O
address spaces each, in particular in the presence of DEFEA adapters in
a pure-EISA or a mixed EISA/PCI system (it is expected that DEFPA boards
will use MMIO in normal circumstances).

The choice of the I/O address space to use continues being reported by
the driver on startup, e.g.:

eisa 00:05: EISA: slot 5: DEC3002 detected
defxx: v1.12 2021/03/10  Lawrence V. Stefani and others
00:05: DEFEA at I/O addr = 0x5000, IRQ = 10, Hardware addr = 00-00-f8-c8-b3-b6
00:05: registered as fddi0

and:

defxx: v1.12 2021/03/10  Lawrence V. Stefani and others
0031:02:04.0: DEFPA at MMIO addr = 0x620c080020000, IRQ = 57, Hardware addr = 00-60-6d-93-91-98
0031:02:04.0: registered as fddi0

and:

defxx: v1.12 2021/03/10  Lawrence V. Stefani and others
tc2: DEFTA at MMIO addr = 0x1f100000, IRQ = 21, Hardware addr = 08-00-2b-b0-8b-1e
tc2: registered as fddi0

so there is no need to add further information.

The change is supposed to cause a negligible performance hit as I/O
accessors will now have code executed conditionally at run time.

Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/fddi/Kconfig | 19 ---------------
 drivers/net/fddi/defxx.c | 60 ++++++++++++++++++------------------------------
 drivers/net/fddi/defxx.h |  3 +++
 3 files changed, 25 insertions(+), 57 deletions(-)

diff --git a/drivers/net/fddi/Kconfig b/drivers/net/fddi/Kconfig
index f99c1048c97e..846bf41c2717 100644
--- a/drivers/net/fddi/Kconfig
+++ b/drivers/net/fddi/Kconfig
@@ -38,25 +38,6 @@ config DEFXX
 	  To compile this driver as a module, choose M here: the module
 	  will be called defxx.  If unsure, say N.
 
-config DEFXX_MMIO
-	bool
-	prompt "Use MMIO instead of IOP" if PCI || EISA
-	depends on DEFXX
-	default n if EISA
-	default y
-	help
-	  This instructs the driver to use EISA or PCI memory-mapped I/O
-	  (MMIO) as appropriate instead of programmed I/O ports (IOP).
-	  Enabling this gives an improvement in processing time in parts
-	  of the driver, but it requires a memory window to be configured
-	  for EISA (DEFEA) adapters that may not always be available.
-	  Conversely some PCIe host bridges do not support IOP, so MMIO
-	  may be required to access PCI (DEFPA) adapters on downstream PCI
-	  buses with some systems.  TURBOchannel does not have the concept
-	  of I/O ports, so MMIO is always used for these (DEFTA) adapters.
-
-	  If unsure, say N.
-
 config SKFP
 	tristate "SysKonnect FDDI PCI support"
 	depends on FDDI && PCI
diff --git a/drivers/net/fddi/defxx.c b/drivers/net/fddi/defxx.c
index c62d4b539e24..7d733a6204e2 100644
--- a/drivers/net/fddi/defxx.c
+++ b/drivers/net/fddi/defxx.c
@@ -197,6 +197,7 @@
  *		23 Oct 2006	macro		Big-endian host support.
  *		14 Dec 2006	macro		TURBOchannel support.
  *		01 Jul 2014	macro		Fixes for DMA on 64-bit hosts.
+ *		10 Mar 2021	macro		Dynamic MMIO vs port I/O.
  */
 
 /* Include files */
@@ -225,8 +226,8 @@
 
 /* Version information string should be updated prior to each new release!  */
 #define DRV_NAME "defxx"
-#define DRV_VERSION "v1.11"
-#define DRV_RELDATE "2014/07/01"
+#define DRV_VERSION "v1.12"
+#define DRV_RELDATE "2021/03/10"
 
 static const char version[] =
 	DRV_NAME ": " DRV_VERSION " " DRV_RELDATE
@@ -253,10 +254,10 @@ static const char version[] =
 #define DFX_BUS_TC(dev) 0
 #endif
 
-#ifdef CONFIG_DEFXX_MMIO
-#define DFX_MMIO 1
+#if defined(CONFIG_EISA) || defined(CONFIG_PCI)
+#define dfx_use_mmio bp->mmio
 #else
-#define DFX_MMIO 0
+#define dfx_use_mmio true
 #endif
 
 /* Define module-wide (static) routines */
@@ -374,8 +375,6 @@ static inline void dfx_outl(DFX_board_t *bp, int offset, u32 data)
 static void dfx_port_write_long(DFX_board_t *bp, int offset, u32 data)
 {
 	struct device __maybe_unused *bdev = bp->bus_dev;
-	int dfx_bus_tc = DFX_BUS_TC(bdev);
-	int dfx_use_mmio = DFX_MMIO || dfx_bus_tc;
 
 	if (dfx_use_mmio)
 		dfx_writel(bp, offset, data);
@@ -398,8 +397,6 @@ static inline void dfx_inl(DFX_board_t *bp, int offset, u32 *data)
 static void dfx_port_read_long(DFX_board_t *bp, int offset, u32 *data)
 {
 	struct device __maybe_unused *bdev = bp->bus_dev;
-	int dfx_bus_tc = DFX_BUS_TC(bdev);
-	int dfx_use_mmio = DFX_MMIO || dfx_bus_tc;
 
 	if (dfx_use_mmio)
 		dfx_readl(bp, offset, data);
@@ -421,7 +418,7 @@ static void dfx_port_read_long(DFX_board_t *bp, int offset, u32 *data)
  *   None
  *
  * Arguments:
- *   bdev	- pointer to device information
+ *   bp		- pointer to board information
  *   bar_start	- pointer to store the start addresses
  *   bar_len	- pointer to store the lengths of the areas
  *
@@ -431,13 +428,13 @@ static void dfx_port_read_long(DFX_board_t *bp, int offset, u32 *data)
  * Side Effects:
  *   None
  */
-static void dfx_get_bars(struct device *bdev,
+static void dfx_get_bars(DFX_board_t *bp,
 			 resource_size_t *bar_start, resource_size_t *bar_len)
 {
+	struct device *bdev = bp->bus_dev;
 	int dfx_bus_pci = dev_is_pci(bdev);
 	int dfx_bus_eisa = DFX_BUS_EISA(bdev);
 	int dfx_bus_tc = DFX_BUS_TC(bdev);
-	int dfx_use_mmio = DFX_MMIO || dfx_bus_tc;
 
 	if (dfx_bus_pci) {
 		int num = dfx_use_mmio ? 0 : 1;
@@ -495,18 +492,6 @@ static const struct net_device_ops dfx_netdev_ops = {
 	.ndo_set_mac_address	= dfx_ctl_set_mac_address,
 };
 
-static void dfx_register_res_alloc_err(const char *print_name, bool mmio,
-				       bool eisa)
-{
-	pr_err("%s: Cannot use %s, no address set, aborting\n",
-	       print_name, mmio ? "MMIO" : "I/O");
-	pr_err("%s: Recompile driver with \"CONFIG_DEFXX_MMIO=%c\"\n",
-	       print_name, mmio ? 'n' : 'y');
-	if (eisa && mmio)
-		pr_err("%s: Or run ECU and set adapter's MMIO location\n",
-		       print_name);
-}
-
 static void dfx_register_res_err(const char *print_name, bool mmio,
 				 unsigned long start, unsigned long len)
 {
@@ -547,8 +532,6 @@ static int dfx_register(struct device *bdev)
 	static int version_disp;
 	int dfx_bus_pci = dev_is_pci(bdev);
 	int dfx_bus_eisa = DFX_BUS_EISA(bdev);
-	int dfx_bus_tc = DFX_BUS_TC(bdev);
-	int dfx_use_mmio = DFX_MMIO || dfx_bus_tc;
 	const char *print_name = dev_name(bdev);
 	struct net_device *dev;
 	DFX_board_t	  *bp;			/* board pointer */
@@ -586,19 +569,24 @@ static int dfx_register(struct device *bdev)
 	bp->bus_dev = bdev;
 	dev_set_drvdata(bdev, dev);
 
-	dfx_get_bars(bdev, bar_start, bar_len);
+	bp->mmio = true;
+
+	dfx_get_bars(bp, bar_start, bar_len);
 	if (bar_len[0] == 0 ||
 	    (dfx_bus_eisa && dfx_use_mmio && bar_start[0] == 0)) {
-		dfx_register_res_alloc_err(print_name, dfx_use_mmio,
-					   dfx_bus_eisa);
-		err = -ENXIO;
-		goto err_out_disable;
+		bp->mmio = false;
+		dfx_get_bars(bp, bar_start, bar_len);
 	}
 
-	if (dfx_use_mmio)
+	if (dfx_use_mmio) {
 		region = request_mem_region(bar_start[0], bar_len[0],
 					    print_name);
-	else
+		if (!region && (dfx_bus_eisa || dfx_bus_pci)) {
+			bp->mmio = false;
+			dfx_get_bars(bp, bar_start, bar_len);
+		}
+	}
+	if (!dfx_use_mmio)
 		region = request_region(bar_start[0], bar_len[0], print_name);
 	if (!region) {
 		dfx_register_res_err(print_name, dfx_use_mmio,
@@ -734,7 +722,6 @@ static void dfx_bus_init(struct net_device *dev)
 	int dfx_bus_pci = dev_is_pci(bdev);
 	int dfx_bus_eisa = DFX_BUS_EISA(bdev);
 	int dfx_bus_tc = DFX_BUS_TC(bdev);
-	int dfx_use_mmio = DFX_MMIO || dfx_bus_tc;
 	u8 val;
 
 	DBG_printk("In dfx_bus_init...\n");
@@ -1054,7 +1041,6 @@ static int dfx_driver_init(struct net_device *dev, const char *print_name,
 	int dfx_bus_pci = dev_is_pci(bdev);
 	int dfx_bus_eisa = DFX_BUS_EISA(bdev);
 	int dfx_bus_tc = DFX_BUS_TC(bdev);
-	int dfx_use_mmio = DFX_MMIO || dfx_bus_tc;
 	int alloc_size;			/* total buffer size needed */
 	char *top_v, *curr_v;		/* virtual addrs into memory block */
 	dma_addr_t top_p, curr_p;	/* physical addrs into memory block */
@@ -3708,8 +3694,6 @@ static void dfx_unregister(struct device *bdev)
 	struct net_device *dev = dev_get_drvdata(bdev);
 	DFX_board_t *bp = netdev_priv(dev);
 	int dfx_bus_pci = dev_is_pci(bdev);
-	int dfx_bus_tc = DFX_BUS_TC(bdev);
-	int dfx_use_mmio = DFX_MMIO || dfx_bus_tc;
 	resource_size_t bar_start[3] = {0};	/* pointers to ports */
 	resource_size_t bar_len[3] = {0};	/* resource lengths */
 	int		alloc_size;		/* total buffer size used */
@@ -3729,7 +3713,7 @@ static void dfx_unregister(struct device *bdev)
 
 	dfx_bus_uninit(dev);
 
-	dfx_get_bars(bdev, bar_start, bar_len);
+	dfx_get_bars(bp, bar_start, bar_len);
 	if (bar_start[2] != 0)
 		release_region(bar_start[2], bar_len[2]);
 	if (bar_start[1] != 0)
diff --git a/drivers/net/fddi/defxx.h b/drivers/net/fddi/defxx.h
index 4c31d31e0a3f..8193713e12db 100644
--- a/drivers/net/fddi/defxx.h
+++ b/drivers/net/fddi/defxx.h
@@ -27,6 +27,7 @@
  *		04 Aug 2003	macro		Converted to the DMA API.
  *		23 Oct 2006	macro		Big-endian host support.
  *		14 Dec 2006	macro		TURBOchannel support.
+ *		10 Mar 2021	macro		Dynamic MMIO vs port I/O.
  */
 
 #ifndef _DEFXX_H_
@@ -1776,6 +1777,8 @@ typedef struct DFX_board_tag
 		int port;
 	} base;										/* base address */
 	struct device			*bus_dev;
+	/* Whether to use MMIO or port I/O.  */
+	bool				mmio;
 	u32				full_duplex_enb;				/* FDDI Full Duplex enable (1 == on, 2 == off) */
 	u32				req_ttrt;					/* requested TTRT value (in 80ns units) */
 	u32				burst_size;					/* adapter burst size (enumerated) */
-- 
cgit v1.2.3


From 4e052626b1a86988f92e60afd4a05f088b75c7bb Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@orcam.me.uk>
Date: Wed, 10 Mar 2021 13:03:24 +0100
Subject: FDDI: defxx: Use driver's name with resource requests

Replace repeated "defxx" strings with a reference to the DRV_NAME macro
and then use the driver's name rather that the bus address with resource
requests so as to have contents of /proc/iomem and /proc/ioports more
meaningful to the user, in line with what drivers usually do.

So rather than say:

5000-50ff : DEC FDDIcontroller/EISA Adapter
  5000-503f : 00:05
  5040-5043 : 00:05
5400-54ff : DEC FDDIcontroller/EISA Adapter
5800-58ff : DEC FDDIcontroller/EISA Adapter
5c00-5cff : DEC FDDIcontroller/EISA Adapter
  5c80-5cbf : 00:05

or:

620c080020000-620c08002007f : 0031:02:04.0
  620c080020000-620c08002007f : 0031:02:04.0
620c080030000-620c08003ffff : 0031:02:04.0

or:

1f100000-1f10003f : tc2

we report:

5000-50ff : DEC FDDIcontroller/EISA Adapter
  5000-503f : defxx
  5040-5043 : defxx
5400-54ff : DEC FDDIcontroller/EISA Adapter
5800-58ff : DEC FDDIcontroller/EISA Adapter
5c00-5cff : DEC FDDIcontroller/EISA Adapter
  5c80-5cbf : defxx

and:

620c080020000-620c08002007f : 0031:02:04.0
  620c080020000-620c08002007f : defxx
620c080030000-620c08003ffff : 0031:02:04.0

and:

1f100000-1f10003f : defxx

respectively for the DEFEA (EISA), DEFPA (PCI), and DEFTA (TURBOchannel)
adapters.

Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/fddi/defxx.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/net/fddi/defxx.c b/drivers/net/fddi/defxx.c
index 7d733a6204e2..6d1e3f49a3d3 100644
--- a/drivers/net/fddi/defxx.c
+++ b/drivers/net/fddi/defxx.c
@@ -580,14 +580,15 @@ static int dfx_register(struct device *bdev)
 
 	if (dfx_use_mmio) {
 		region = request_mem_region(bar_start[0], bar_len[0],
-					    print_name);
+					    bdev->driver->name);
 		if (!region && (dfx_bus_eisa || dfx_bus_pci)) {
 			bp->mmio = false;
 			dfx_get_bars(bp, bar_start, bar_len);
 		}
 	}
 	if (!dfx_use_mmio)
-		region = request_region(bar_start[0], bar_len[0], print_name);
+		region = request_region(bar_start[0], bar_len[0],
+					bdev->driver->name);
 	if (!region) {
 		dfx_register_res_err(print_name, dfx_use_mmio,
 				     bar_start[0], bar_len[0]);
@@ -595,7 +596,8 @@ static int dfx_register(struct device *bdev)
 		goto err_out_disable;
 	}
 	if (bar_start[1] != 0) {
-		region = request_region(bar_start[1], bar_len[1], print_name);
+		region = request_region(bar_start[1], bar_len[1],
+					bdev->driver->name);
 		if (!region) {
 			dfx_register_res_err(print_name, 0,
 					     bar_start[1], bar_len[1]);
@@ -604,7 +606,8 @@ static int dfx_register(struct device *bdev)
 		}
 	}
 	if (bar_start[2] != 0) {
-		region = request_region(bar_start[2], bar_len[2], print_name);
+		region = request_region(bar_start[2], bar_len[2],
+					bdev->driver->name);
 		if (!region) {
 			dfx_register_res_err(print_name, 0,
 					     bar_start[2], bar_len[2]);
@@ -3745,7 +3748,7 @@ static const struct pci_device_id dfx_pci_table[] = {
 MODULE_DEVICE_TABLE(pci, dfx_pci_table);
 
 static struct pci_driver dfx_pci_driver = {
-	.name		= "defxx",
+	.name		= DRV_NAME,
 	.id_table	= dfx_pci_table,
 	.probe		= dfx_pci_register,
 	.remove		= dfx_pci_unregister,
@@ -3776,7 +3779,7 @@ MODULE_DEVICE_TABLE(eisa, dfx_eisa_table);
 static struct eisa_driver dfx_eisa_driver = {
 	.id_table	= dfx_eisa_table,
 	.driver		= {
-		.name	= "defxx",
+		.name	= DRV_NAME,
 		.bus	= &eisa_bus_type,
 		.probe	= dfx_dev_register,
 		.remove	= dfx_dev_unregister,
@@ -3797,7 +3800,7 @@ MODULE_DEVICE_TABLE(tc, dfx_tc_table);
 static struct tc_driver dfx_tc_driver = {
 	.id_table	= dfx_tc_table,
 	.driver		= {
-		.name	= "defxx",
+		.name	= DRV_NAME,
 		.bus	= &tc_bus_type,
 		.probe	= dfx_dev_register,
 		.remove	= dfx_dev_unregister,
-- 
cgit v1.2.3


From 176769d10f96aa695786b000c9cacb13e48189f9 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Wed, 10 Mar 2021 14:03:40 +0200
Subject: net: enetc: move the CBDR API to enetc_cbdr.c

Since there is a dedicated file in this driver for interacting with
control BD rings, it makes sense to move these functions there.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c      | 54 -----------------------
 drivers/net/ethernet/freescale/enetc/enetc_cbdr.c | 54 +++++++++++++++++++++++
 2 files changed, 54 insertions(+), 54 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 89d2cb348271..09f7fdbf93a7 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -991,60 +991,6 @@ static void enetc_free_rxtx_rings(struct enetc_ndev_priv *priv)
 		enetc_free_tx_ring(priv->tx_ring[i]);
 }
 
-int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr)
-{
-	int size = cbdr->bd_count * sizeof(struct enetc_cbd);
-
-	cbdr->bd_base = dma_alloc_coherent(dev, size, &cbdr->bd_dma_base,
-					   GFP_KERNEL);
-	if (!cbdr->bd_base)
-		return -ENOMEM;
-
-	/* h/w requires 128B alignment */
-	if (!IS_ALIGNED(cbdr->bd_dma_base, 128)) {
-		dma_free_coherent(dev, size, cbdr->bd_base, cbdr->bd_dma_base);
-		return -EINVAL;
-	}
-
-	cbdr->next_to_clean = 0;
-	cbdr->next_to_use = 0;
-
-	return 0;
-}
-
-void enetc_free_cbdr(struct device *dev, struct enetc_cbdr *cbdr)
-{
-	int size = cbdr->bd_count * sizeof(struct enetc_cbd);
-
-	dma_free_coherent(dev, size, cbdr->bd_base, cbdr->bd_dma_base);
-	cbdr->bd_base = NULL;
-}
-
-void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr)
-{
-	/* set CBDR cache attributes */
-	enetc_wr(hw, ENETC_SICAR2,
-		 ENETC_SICAR_RD_COHERENT | ENETC_SICAR_WR_COHERENT);
-
-	enetc_wr(hw, ENETC_SICBDRBAR0, lower_32_bits(cbdr->bd_dma_base));
-	enetc_wr(hw, ENETC_SICBDRBAR1, upper_32_bits(cbdr->bd_dma_base));
-	enetc_wr(hw, ENETC_SICBDRLENR, ENETC_RTBLENR_LEN(cbdr->bd_count));
-
-	enetc_wr(hw, ENETC_SICBDRPIR, 0);
-	enetc_wr(hw, ENETC_SICBDRCIR, 0);
-
-	/* enable ring */
-	enetc_wr(hw, ENETC_SICBDRMR, BIT(31));
-
-	cbdr->pir = hw->reg + ENETC_SICBDRPIR;
-	cbdr->cir = hw->reg + ENETC_SICBDRCIR;
-}
-
-void enetc_clear_cbdr(struct enetc_hw *hw)
-{
-	enetc_wr(hw, ENETC_SICBDRMR, 0);
-}
-
 static int enetc_setup_default_rss_table(struct enetc_si *si, int num_groups)
 {
 	int *rss_table;
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
index 201cbc362e33..ad6aecda6b47 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
@@ -3,6 +3,60 @@
 
 #include "enetc.h"
 
+int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr)
+{
+	int size = cbdr->bd_count * sizeof(struct enetc_cbd);
+
+	cbdr->bd_base = dma_alloc_coherent(dev, size, &cbdr->bd_dma_base,
+					   GFP_KERNEL);
+	if (!cbdr->bd_base)
+		return -ENOMEM;
+
+	/* h/w requires 128B alignment */
+	if (!IS_ALIGNED(cbdr->bd_dma_base, 128)) {
+		dma_free_coherent(dev, size, cbdr->bd_base, cbdr->bd_dma_base);
+		return -EINVAL;
+	}
+
+	cbdr->next_to_clean = 0;
+	cbdr->next_to_use = 0;
+
+	return 0;
+}
+
+void enetc_free_cbdr(struct device *dev, struct enetc_cbdr *cbdr)
+{
+	int size = cbdr->bd_count * sizeof(struct enetc_cbd);
+
+	dma_free_coherent(dev, size, cbdr->bd_base, cbdr->bd_dma_base);
+	cbdr->bd_base = NULL;
+}
+
+void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr)
+{
+	/* set CBDR cache attributes */
+	enetc_wr(hw, ENETC_SICAR2,
+		 ENETC_SICAR_RD_COHERENT | ENETC_SICAR_WR_COHERENT);
+
+	enetc_wr(hw, ENETC_SICBDRBAR0, lower_32_bits(cbdr->bd_dma_base));
+	enetc_wr(hw, ENETC_SICBDRBAR1, upper_32_bits(cbdr->bd_dma_base));
+	enetc_wr(hw, ENETC_SICBDRLENR, ENETC_RTBLENR_LEN(cbdr->bd_count));
+
+	enetc_wr(hw, ENETC_SICBDRPIR, 0);
+	enetc_wr(hw, ENETC_SICBDRCIR, 0);
+
+	/* enable ring */
+	enetc_wr(hw, ENETC_SICBDRMR, BIT(31));
+
+	cbdr->pir = hw->reg + ENETC_SICBDRPIR;
+	cbdr->cir = hw->reg + ENETC_SICBDRCIR;
+}
+
+void enetc_clear_cbdr(struct enetc_hw *hw)
+{
+	enetc_wr(hw, ENETC_SICBDRMR, 0);
+}
+
 static void enetc_clean_cbdr(struct enetc_si *si)
 {
 	struct enetc_cbdr *ring = &si->cbd_ring;
-- 
cgit v1.2.3


From 01121ab73924d355091d9c8a3b593a361ea4a5ae Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Wed, 10 Mar 2021 14:03:41 +0200
Subject: net: enetc: save the DMA device for enetc_free_cbdr

We shouldn't need to pass the struct device *dev to enetc CBDR APIs over
and over again, so save this inside struct enetc_cbdr::dma_dev and avoid
calling it from the enetc_free_cbdr functions.

This breaks the dependency of the cbdr API from struct enetc_si (the
station interface).

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c      |  4 +--
 drivers/net/ethernet/freescale/enetc/enetc.h      |  3 +-
 drivers/net/ethernet/freescale/enetc/enetc_cbdr.c | 37 +++++++++++++----------
 drivers/net/ethernet/freescale/enetc/enetc_pf.c   |  2 +-
 4 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 09f7fdbf93a7..2d60af5fcd15 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -1077,7 +1077,7 @@ int enetc_alloc_si_resources(struct enetc_ndev_priv *priv)
 
 err_alloc_cls:
 	enetc_clear_cbdr(&si->hw);
-	enetc_free_cbdr(priv->dev, &si->cbd_ring);
+	enetc_free_cbdr(&si->cbd_ring);
 
 	return err;
 }
@@ -1087,7 +1087,7 @@ void enetc_free_si_resources(struct enetc_ndev_priv *priv)
 	struct enetc_si *si = priv->si;
 
 	enetc_clear_cbdr(&si->hw);
-	enetc_free_cbdr(priv->dev, &si->cbd_ring);
+	enetc_free_cbdr(&si->cbd_ring);
 
 	kfree(priv->cls_rules);
 }
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index 8b380fc13314..b343d1002bb7 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -104,6 +104,7 @@ struct enetc_cbdr {
 	int next_to_clean;
 
 	dma_addr_t bd_dma_base;
+	struct device *dma_dev;
 };
 
 #define ENETC_TXBD(BDR, i) (&(((union enetc_tx_bd *)((BDR).bd_base))[i]))
@@ -311,7 +312,7 @@ void enetc_set_ethtool_ops(struct net_device *ndev);
 
 /* control buffer descriptor ring (CBDR) */
 int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr);
-void enetc_free_cbdr(struct device *dev, struct enetc_cbdr *cbdr);
+void enetc_free_cbdr(struct enetc_cbdr *cbdr);
 void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr);
 void enetc_clear_cbdr(struct enetc_hw *hw);
 int enetc_set_mac_flt_entry(struct enetc_si *si, int index,
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
index ad6aecda6b47..7e84eb665ecd 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
@@ -14,22 +14,26 @@ int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr)
 
 	/* h/w requires 128B alignment */
 	if (!IS_ALIGNED(cbdr->bd_dma_base, 128)) {
-		dma_free_coherent(dev, size, cbdr->bd_base, cbdr->bd_dma_base);
+		dma_free_coherent(dev, size, cbdr->bd_base,
+				  cbdr->bd_dma_base);
 		return -EINVAL;
 	}
 
 	cbdr->next_to_clean = 0;
 	cbdr->next_to_use = 0;
+	cbdr->dma_dev = dev;
 
 	return 0;
 }
 
-void enetc_free_cbdr(struct device *dev, struct enetc_cbdr *cbdr)
+void enetc_free_cbdr(struct enetc_cbdr *cbdr)
 {
 	int size = cbdr->bd_count * sizeof(struct enetc_cbd);
 
-	dma_free_coherent(dev, size, cbdr->bd_base, cbdr->bd_dma_base);
+	dma_free_coherent(cbdr->dma_dev, size, cbdr->bd_base,
+			  cbdr->bd_dma_base);
 	cbdr->bd_base = NULL;
+	cbdr->dma_dev = NULL;
 }
 
 void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr)
@@ -57,9 +61,8 @@ void enetc_clear_cbdr(struct enetc_hw *hw)
 	enetc_wr(hw, ENETC_SICBDRMR, 0);
 }
 
-static void enetc_clean_cbdr(struct enetc_si *si)
+static void enetc_clean_cbdr(struct enetc_cbdr *ring)
 {
-	struct enetc_cbdr *ring = &si->cbd_ring;
 	struct enetc_cbd *dest_cbd;
 	int i, status;
 
@@ -69,7 +72,7 @@ static void enetc_clean_cbdr(struct enetc_si *si)
 		dest_cbd = ENETC_CBD(*ring, i);
 		status = dest_cbd->status_flags & ENETC_CBD_STATUS_MASK;
 		if (status)
-			dev_warn(&si->pdev->dev, "CMD err %04x for cmd %04x\n",
+			dev_warn(ring->dma_dev, "CMD err %04x for cmd %04x\n",
 				 status, dest_cbd->cmd);
 
 		memset(dest_cbd, 0, sizeof(*dest_cbd));
@@ -97,7 +100,7 @@ int enetc_send_cmd(struct enetc_si *si, struct enetc_cbd *cbd)
 		return -EIO;
 
 	if (unlikely(!enetc_cbd_unused(ring)))
-		enetc_clean_cbdr(si);
+		enetc_clean_cbdr(ring);
 
 	i = ring->next_to_use;
 	dest_cbd = ENETC_CBD(*ring, i);
@@ -123,7 +126,7 @@ int enetc_send_cmd(struct enetc_si *si, struct enetc_cbd *cbd)
 	/* CBD may writeback data, feedback up level */
 	*cbd = *dest_cbd;
 
-	enetc_clean_cbdr(si);
+	enetc_clean_cbdr(ring);
 
 	return 0;
 }
@@ -171,6 +174,7 @@ int enetc_set_mac_flt_entry(struct enetc_si *si, int index,
 int enetc_set_fs_entry(struct enetc_si *si, struct enetc_cmd_rfse *rfse,
 		       int index)
 {
+	struct enetc_cbdr *ring = &si->cbd_ring;
 	struct enetc_cbd cbd = {.cmd = 0};
 	dma_addr_t dma, dma_align;
 	void *tmp, *tmp_align;
@@ -183,10 +187,10 @@ int enetc_set_fs_entry(struct enetc_si *si, struct enetc_cmd_rfse *rfse,
 	cbd.length = cpu_to_le16(sizeof(*rfse));
 	cbd.opt[3] = cpu_to_le32(0); /* SI */
 
-	tmp = dma_alloc_coherent(&si->pdev->dev, sizeof(*rfse) + RFSE_ALIGN,
+	tmp = dma_alloc_coherent(ring->dma_dev, sizeof(*rfse) + RFSE_ALIGN,
 				 &dma, GFP_KERNEL);
 	if (!tmp) {
-		dev_err(&si->pdev->dev, "DMA mapping of RFS entry failed!\n");
+		dev_err(ring->dma_dev, "DMA mapping of RFS entry failed!\n");
 		return -ENOMEM;
 	}
 
@@ -199,9 +203,9 @@ int enetc_set_fs_entry(struct enetc_si *si, struct enetc_cmd_rfse *rfse,
 
 	err = enetc_send_cmd(si, &cbd);
 	if (err)
-		dev_err(&si->pdev->dev, "FS entry add failed (%d)!", err);
+		dev_err(ring->dma_dev, "FS entry add failed (%d)!", err);
 
-	dma_free_coherent(&si->pdev->dev, sizeof(*rfse) + RFSE_ALIGN,
+	dma_free_coherent(ring->dma_dev, sizeof(*rfse) + RFSE_ALIGN,
 			  tmp, dma);
 
 	return err;
@@ -211,6 +215,7 @@ int enetc_set_fs_entry(struct enetc_si *si, struct enetc_cmd_rfse *rfse,
 static int enetc_cmd_rss_table(struct enetc_si *si, u32 *table, int count,
 			       bool read)
 {
+	struct enetc_cbdr *ring = &si->cbd_ring;
 	struct enetc_cbd cbd = {.cmd = 0};
 	dma_addr_t dma, dma_align;
 	u8 *tmp, *tmp_align;
@@ -220,10 +225,10 @@ static int enetc_cmd_rss_table(struct enetc_si *si, u32 *table, int count,
 		/* HW only takes in a full 64 entry table */
 		return -EINVAL;
 
-	tmp = dma_alloc_coherent(&si->pdev->dev, count + RSSE_ALIGN,
+	tmp = dma_alloc_coherent(ring->dma_dev, count + RSSE_ALIGN,
 				 &dma, GFP_KERNEL);
 	if (!tmp) {
-		dev_err(&si->pdev->dev, "DMA mapping of RSS table failed!\n");
+		dev_err(ring->dma_dev, "DMA mapping of RSS table failed!\n");
 		return -ENOMEM;
 	}
 	dma_align = ALIGN(dma, RSSE_ALIGN);
@@ -243,13 +248,13 @@ static int enetc_cmd_rss_table(struct enetc_si *si, u32 *table, int count,
 
 	err = enetc_send_cmd(si, &cbd);
 	if (err)
-		dev_err(&si->pdev->dev, "RSS cmd failed (%d)!", err);
+		dev_err(ring->dma_dev, "RSS cmd failed (%d)!", err);
 
 	if (read)
 		for (i = 0; i < count; i++)
 			table[i] = tmp_align[i];
 
-	dma_free_coherent(&si->pdev->dev, count + RSSE_ALIGN, tmp, dma);
+	dma_free_coherent(ring->dma_dev, count + RSSE_ALIGN, tmp, dma);
 
 	return err;
 }
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index 224fc37a6757..a7195ec736f3 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -1098,7 +1098,7 @@ static void enetc_init_unused_port(struct enetc_si *si)
 	enetc_init_port_rss_memory(si);
 
 	enetc_clear_cbdr(hw);
-	enetc_free_cbdr(dev, &si->cbd_ring);
+	enetc_free_cbdr(&si->cbd_ring);
 }
 
 static int enetc_pf_probe(struct pci_dev *pdev,
-- 
cgit v1.2.3


From 24be14e3260ad8f5b926da8ad0bc0067435e7340 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Wed, 10 Mar 2021 14:03:42 +0200
Subject: net: enetc: squash enetc_alloc_cbdr and enetc_setup_cbdr

enetc_alloc_cbdr and enetc_setup_cbdr are always called one after
another, so we can simplify the callers and make enetc_setup_cbdr do
everything that's needed.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c      |  4 +--
 drivers/net/ethernet/freescale/enetc/enetc.h      |  4 +--
 drivers/net/ethernet/freescale/enetc/enetc_cbdr.c | 30 +++++++++++------------
 drivers/net/ethernet/freescale/enetc/enetc_pf.c   |  4 +--
 4 files changed, 18 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 2d60af5fcd15..718ce01daffe 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -1060,12 +1060,10 @@ int enetc_alloc_si_resources(struct enetc_ndev_priv *priv)
 	struct enetc_si *si = priv->si;
 	int err;
 
-	err = enetc_alloc_cbdr(priv->dev, &si->cbd_ring);
+	err = enetc_setup_cbdr(priv->dev, &si->hw, &si->cbd_ring);
 	if (err)
 		return err;
 
-	enetc_setup_cbdr(&si->hw, &si->cbd_ring);
-
 	priv->cls_rules = kcalloc(si->num_fs_entries, sizeof(*priv->cls_rules),
 				  GFP_KERNEL);
 	if (!priv->cls_rules) {
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index b343d1002bb7..1f2e9bec1b30 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -311,9 +311,9 @@ int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
 void enetc_set_ethtool_ops(struct net_device *ndev);
 
 /* control buffer descriptor ring (CBDR) */
-int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr);
+int enetc_setup_cbdr(struct device *dev, struct enetc_hw *hw,
+		     struct enetc_cbdr *cbdr);
 void enetc_free_cbdr(struct enetc_cbdr *cbdr);
-void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr);
 void enetc_clear_cbdr(struct enetc_hw *hw);
 int enetc_set_mac_flt_entry(struct enetc_si *si, int index,
 			    char *mac_addr, int si_map);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
index 7e84eb665ecd..4de31b283319 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
@@ -3,7 +3,8 @@
 
 #include "enetc.h"
 
-int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr)
+int enetc_setup_cbdr(struct device *dev, struct enetc_hw *hw,
+		     struct enetc_cbdr *cbdr)
 {
 	int size = cbdr->bd_count * sizeof(struct enetc_cbd);
 
@@ -23,21 +24,6 @@ int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr)
 	cbdr->next_to_use = 0;
 	cbdr->dma_dev = dev;
 
-	return 0;
-}
-
-void enetc_free_cbdr(struct enetc_cbdr *cbdr)
-{
-	int size = cbdr->bd_count * sizeof(struct enetc_cbd);
-
-	dma_free_coherent(cbdr->dma_dev, size, cbdr->bd_base,
-			  cbdr->bd_dma_base);
-	cbdr->bd_base = NULL;
-	cbdr->dma_dev = NULL;
-}
-
-void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr)
-{
 	/* set CBDR cache attributes */
 	enetc_wr(hw, ENETC_SICAR2,
 		 ENETC_SICAR_RD_COHERENT | ENETC_SICAR_WR_COHERENT);
@@ -54,6 +40,18 @@ void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr)
 
 	cbdr->pir = hw->reg + ENETC_SICBDRPIR;
 	cbdr->cir = hw->reg + ENETC_SICBDRCIR;
+
+	return 0;
+}
+
+void enetc_free_cbdr(struct enetc_cbdr *cbdr)
+{
+	int size = cbdr->bd_count * sizeof(struct enetc_cbd);
+
+	dma_free_coherent(cbdr->dma_dev, size, cbdr->bd_base,
+			  cbdr->bd_dma_base);
+	cbdr->bd_base = NULL;
+	cbdr->dma_dev = NULL;
 }
 
 void enetc_clear_cbdr(struct enetc_hw *hw)
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index a7195ec736f3..31d229e0912a 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -1088,12 +1088,10 @@ static void enetc_init_unused_port(struct enetc_si *si)
 	int err;
 
 	si->cbd_ring.bd_count = ENETC_CBDR_DEFAULT_SIZE;
-	err = enetc_alloc_cbdr(dev, &si->cbd_ring);
+	err = enetc_setup_cbdr(dev, hw, &si->cbd_ring);
 	if (err)
 		return;
 
-	enetc_setup_cbdr(hw, &si->cbd_ring);
-
 	enetc_init_port_rfs_memory(si);
 	enetc_init_port_rss_memory(si);
 
-- 
cgit v1.2.3


From 27f9025d49416e35bd7f1cedc9edd0388612501f Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Wed, 10 Mar 2021 14:03:43 +0200
Subject: net: enetc: save the mode register address inside struct enetc_cbdr

enetc_clear_cbdr depends on struct enetc_hw because it must disable the
ring through a register write. We'd like to remove that dependency, so
let's do what's already done with the producer and consumer indices,
which is to save the iomem address in a variable kept in struct enetc_cbdr.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c      |  4 ++--
 drivers/net/ethernet/freescale/enetc/enetc.h      |  3 ++-
 drivers/net/ethernet/freescale/enetc/enetc_cbdr.c | 19 ++++++++++---------
 drivers/net/ethernet/freescale/enetc/enetc_pf.c   |  2 +-
 4 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 718ce01daffe..80f80a1c7dd2 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -1074,7 +1074,7 @@ int enetc_alloc_si_resources(struct enetc_ndev_priv *priv)
 	return 0;
 
 err_alloc_cls:
-	enetc_clear_cbdr(&si->hw);
+	enetc_clear_cbdr(&si->cbd_ring);
 	enetc_free_cbdr(&si->cbd_ring);
 
 	return err;
@@ -1084,7 +1084,7 @@ void enetc_free_si_resources(struct enetc_ndev_priv *priv)
 {
 	struct enetc_si *si = priv->si;
 
-	enetc_clear_cbdr(&si->hw);
+	enetc_clear_cbdr(&si->cbd_ring);
 	enetc_free_cbdr(&si->cbd_ring);
 
 	kfree(priv->cls_rules);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index 1f2e9bec1b30..9d4dbeef61ac 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -98,6 +98,7 @@ struct enetc_cbdr {
 	void *bd_base; /* points to Rx or Tx BD ring */
 	void __iomem *pir;
 	void __iomem *cir;
+	void __iomem *mr; /* mode register */
 
 	int bd_count; /* # of BDs */
 	int next_to_use;
@@ -314,7 +315,7 @@ void enetc_set_ethtool_ops(struct net_device *ndev);
 int enetc_setup_cbdr(struct device *dev, struct enetc_hw *hw,
 		     struct enetc_cbdr *cbdr);
 void enetc_free_cbdr(struct enetc_cbdr *cbdr);
-void enetc_clear_cbdr(struct enetc_hw *hw);
+void enetc_clear_cbdr(struct enetc_cbdr *cbdr);
 int enetc_set_mac_flt_entry(struct enetc_si *si, int index,
 			    char *mac_addr, int si_map);
 int enetc_clear_mac_flt_entry(struct enetc_si *si, int index);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
index 4de31b283319..bb20a58e8830 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
@@ -24,6 +24,10 @@ int enetc_setup_cbdr(struct device *dev, struct enetc_hw *hw,
 	cbdr->next_to_use = 0;
 	cbdr->dma_dev = dev;
 
+	cbdr->pir = hw->reg + ENETC_SICBDRPIR;
+	cbdr->cir = hw->reg + ENETC_SICBDRCIR;
+	cbdr->mr = hw->reg + ENETC_SICBDRMR;
+
 	/* set CBDR cache attributes */
 	enetc_wr(hw, ENETC_SICAR2,
 		 ENETC_SICAR_RD_COHERENT | ENETC_SICAR_WR_COHERENT);
@@ -32,14 +36,10 @@ int enetc_setup_cbdr(struct device *dev, struct enetc_hw *hw,
 	enetc_wr(hw, ENETC_SICBDRBAR1, upper_32_bits(cbdr->bd_dma_base));
 	enetc_wr(hw, ENETC_SICBDRLENR, ENETC_RTBLENR_LEN(cbdr->bd_count));
 
-	enetc_wr(hw, ENETC_SICBDRPIR, 0);
-	enetc_wr(hw, ENETC_SICBDRCIR, 0);
-
+	enetc_wr_reg(cbdr->pir, cbdr->next_to_clean);
+	enetc_wr_reg(cbdr->cir, cbdr->next_to_use);
 	/* enable ring */
-	enetc_wr(hw, ENETC_SICBDRMR, BIT(31));
-
-	cbdr->pir = hw->reg + ENETC_SICBDRPIR;
-	cbdr->cir = hw->reg + ENETC_SICBDRCIR;
+	enetc_wr_reg(cbdr->mr, BIT(31));
 
 	return 0;
 }
@@ -54,9 +54,10 @@ void enetc_free_cbdr(struct enetc_cbdr *cbdr)
 	cbdr->dma_dev = NULL;
 }
 
-void enetc_clear_cbdr(struct enetc_hw *hw)
+void enetc_clear_cbdr(struct enetc_cbdr *cbdr)
 {
-	enetc_wr(hw, ENETC_SICBDRMR, 0);
+	/* disable ring */
+	enetc_wr_reg(cbdr->mr, 0);
 }
 
 static void enetc_clean_cbdr(struct enetc_cbdr *ring)
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index 31d229e0912a..f083d49d7772 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -1095,7 +1095,7 @@ static void enetc_init_unused_port(struct enetc_si *si)
 	enetc_init_port_rfs_memory(si);
 	enetc_init_port_rss_memory(si);
 
-	enetc_clear_cbdr(hw);
+	enetc_clear_cbdr(&si->cbd_ring);
 	enetc_free_cbdr(&si->cbd_ring);
 }
 
-- 
cgit v1.2.3


From 0bfde022b345763df70fdc73bcb5ab8943589487 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Wed, 10 Mar 2021 14:03:44 +0200
Subject: net: enetc: squash clear_cbdr and free_cbdr into teardown_cbdr

All call sites call enetc_clear_cbdr and enetc_free_cbdr one after
another, so let's combine the two functions into a single method named
enetc_teardown_cbdr which does both, and in the same order.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c      |  6 ++----
 drivers/net/ethernet/freescale/enetc/enetc.h      |  3 +--
 drivers/net/ethernet/freescale/enetc/enetc_cbdr.c | 11 ++++-------
 drivers/net/ethernet/freescale/enetc/enetc_pf.c   |  3 +--
 4 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 80f80a1c7dd2..ff507b4ab478 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -1074,8 +1074,7 @@ int enetc_alloc_si_resources(struct enetc_ndev_priv *priv)
 	return 0;
 
 err_alloc_cls:
-	enetc_clear_cbdr(&si->cbd_ring);
-	enetc_free_cbdr(&si->cbd_ring);
+	enetc_teardown_cbdr(&si->cbd_ring);
 
 	return err;
 }
@@ -1084,8 +1083,7 @@ void enetc_free_si_resources(struct enetc_ndev_priv *priv)
 {
 	struct enetc_si *si = priv->si;
 
-	enetc_clear_cbdr(&si->cbd_ring);
-	enetc_free_cbdr(&si->cbd_ring);
+	enetc_teardown_cbdr(&si->cbd_ring);
 
 	kfree(priv->cls_rules);
 }
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index 9d4dbeef61ac..19772be63a2c 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -314,8 +314,7 @@ void enetc_set_ethtool_ops(struct net_device *ndev);
 /* control buffer descriptor ring (CBDR) */
 int enetc_setup_cbdr(struct device *dev, struct enetc_hw *hw,
 		     struct enetc_cbdr *cbdr);
-void enetc_free_cbdr(struct enetc_cbdr *cbdr);
-void enetc_clear_cbdr(struct enetc_cbdr *cbdr);
+void enetc_teardown_cbdr(struct enetc_cbdr *cbdr);
 int enetc_set_mac_flt_entry(struct enetc_si *si, int index,
 			    char *mac_addr, int si_map);
 int enetc_clear_mac_flt_entry(struct enetc_si *si, int index);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
index bb20a58e8830..bee453be2240 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
@@ -44,22 +44,19 @@ int enetc_setup_cbdr(struct device *dev, struct enetc_hw *hw,
 	return 0;
 }
 
-void enetc_free_cbdr(struct enetc_cbdr *cbdr)
+void enetc_teardown_cbdr(struct enetc_cbdr *cbdr)
 {
 	int size = cbdr->bd_count * sizeof(struct enetc_cbd);
 
+	/* disable ring */
+	enetc_wr_reg(cbdr->mr, 0);
+
 	dma_free_coherent(cbdr->dma_dev, size, cbdr->bd_base,
 			  cbdr->bd_dma_base);
 	cbdr->bd_base = NULL;
 	cbdr->dma_dev = NULL;
 }
 
-void enetc_clear_cbdr(struct enetc_cbdr *cbdr)
-{
-	/* disable ring */
-	enetc_wr_reg(cbdr->mr, 0);
-}
-
 static void enetc_clean_cbdr(struct enetc_cbdr *ring)
 {
 	struct enetc_cbd *dest_cbd;
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index f083d49d7772..4dd7199d5007 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -1095,8 +1095,7 @@ static void enetc_init_unused_port(struct enetc_si *si)
 	enetc_init_port_rfs_memory(si);
 	enetc_init_port_rss_memory(si);
 
-	enetc_clear_cbdr(&si->cbd_ring);
-	enetc_free_cbdr(&si->cbd_ring);
+	enetc_teardown_cbdr(&si->cbd_ring);
 }
 
 static int enetc_pf_probe(struct pci_dev *pdev,
-- 
cgit v1.2.3


From 5b4daa7f1256669fe4eb215c77a88c55ad24f26e Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Wed, 10 Mar 2021 14:03:45 +0200
Subject: net: enetc: pass bd_count as an argument to enetc_setup_cbdr

It makes no sense from an API perspective to first initialize some
portion of struct enetc_cbdr outside enetc_setup_cbdr, then leave that
function to initialize the rest. enetc_setup_cbdr should be able to
perform all initialization given a zero-initialized struct enetc_cbdr.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c      | 6 ++----
 drivers/net/ethernet/freescale/enetc/enetc.h      | 2 +-
 drivers/net/ethernet/freescale/enetc/enetc_cbdr.c | 5 +++--
 drivers/net/ethernet/freescale/enetc/enetc_pf.c   | 3 +--
 4 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index ff507b4ab478..dda3306678c3 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -1050,9 +1050,6 @@ void enetc_init_si_rings_params(struct enetc_ndev_priv *priv)
 	priv->bdr_int_num = cpus;
 	priv->ic_mode = ENETC_IC_RX_ADAPTIVE | ENETC_IC_TX_MANUAL;
 	priv->tx_ictt = ENETC_TXIC_TIMETHR;
-
-	/* SI specific */
-	si->cbd_ring.bd_count = ENETC_CBDR_DEFAULT_SIZE;
 }
 
 int enetc_alloc_si_resources(struct enetc_ndev_priv *priv)
@@ -1060,7 +1057,8 @@ int enetc_alloc_si_resources(struct enetc_ndev_priv *priv)
 	struct enetc_si *si = priv->si;
 	int err;
 
-	err = enetc_setup_cbdr(priv->dev, &si->hw, &si->cbd_ring);
+	err = enetc_setup_cbdr(priv->dev, &si->hw, ENETC_CBDR_DEFAULT_SIZE,
+			       &si->cbd_ring);
 	if (err)
 		return err;
 
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index 19772be63a2c..af8b8be114bd 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -312,7 +312,7 @@ int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
 void enetc_set_ethtool_ops(struct net_device *ndev);
 
 /* control buffer descriptor ring (CBDR) */
-int enetc_setup_cbdr(struct device *dev, struct enetc_hw *hw,
+int enetc_setup_cbdr(struct device *dev, struct enetc_hw *hw, int bd_count,
 		     struct enetc_cbdr *cbdr);
 void enetc_teardown_cbdr(struct enetc_cbdr *cbdr);
 int enetc_set_mac_flt_entry(struct enetc_si *si, int index,
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
index bee453be2240..073e56dcca4e 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c
@@ -3,10 +3,10 @@
 
 #include "enetc.h"
 
-int enetc_setup_cbdr(struct device *dev, struct enetc_hw *hw,
+int enetc_setup_cbdr(struct device *dev, struct enetc_hw *hw, int bd_count,
 		     struct enetc_cbdr *cbdr)
 {
-	int size = cbdr->bd_count * sizeof(struct enetc_cbd);
+	int size = bd_count * sizeof(struct enetc_cbd);
 
 	cbdr->bd_base = dma_alloc_coherent(dev, size, &cbdr->bd_dma_base,
 					   GFP_KERNEL);
@@ -23,6 +23,7 @@ int enetc_setup_cbdr(struct device *dev, struct enetc_hw *hw,
 	cbdr->next_to_clean = 0;
 	cbdr->next_to_use = 0;
 	cbdr->dma_dev = dev;
+	cbdr->bd_count = bd_count;
 
 	cbdr->pir = hw->reg + ENETC_SICBDRPIR;
 	cbdr->cir = hw->reg + ENETC_SICBDRCIR;
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index 4dd7199d5007..bf70b8f9943f 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -1087,8 +1087,7 @@ static void enetc_init_unused_port(struct enetc_si *si)
 	struct enetc_hw *hw = &si->hw;
 	int err;
 
-	si->cbd_ring.bd_count = ENETC_CBDR_DEFAULT_SIZE;
-	err = enetc_setup_cbdr(dev, hw, &si->cbd_ring);
+	err = enetc_setup_cbdr(dev, hw, ENETC_CBDR_DEFAULT_SIZE, &si->cbd_ring);
 	if (err)
 		return;
 
-- 
cgit v1.2.3


From 4b47c0b81ffd9b395b5afc3ef41c69cceb6a8576 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Wed, 10 Mar 2021 14:03:46 +0200
Subject: net: enetc: don't initialize unused ports from a separate code path

Since commit 3222b5b613db ("net: enetc: initialize RFS/RSS memories for
unused ports too") there is a requirement to initialize the memories of
unused PFs too, which has left the probe path in a bit of a rough shape,
because we basically have a minimal initialization path for unused PFs
which is separate from the main initialization path.

Now that initializing a control BD ring is as simple as calling
enetc_setup_cbdr, let's move that outside of enetc_alloc_si_resources
(unused PFs don't need classification rules, so no point in allocating
them just to free them later).

But enetc_alloc_si_resources is called both for PFs and for VFs, so now
that enetc_setup_cbdr is no longer called from this common function, it
means that the VF probe path needs to explicitly call enetc_setup_cbdr
too.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c    | 21 +---------
 drivers/net/ethernet/freescale/enetc/enetc_pf.c | 52 ++++++++++---------------
 drivers/net/ethernet/freescale/enetc/enetc_vf.c |  7 ++++
 3 files changed, 30 insertions(+), 50 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index dda3306678c3..53c8a698c72c 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -1055,34 +1055,17 @@ void enetc_init_si_rings_params(struct enetc_ndev_priv *priv)
 int enetc_alloc_si_resources(struct enetc_ndev_priv *priv)
 {
 	struct enetc_si *si = priv->si;
-	int err;
-
-	err = enetc_setup_cbdr(priv->dev, &si->hw, ENETC_CBDR_DEFAULT_SIZE,
-			       &si->cbd_ring);
-	if (err)
-		return err;
 
 	priv->cls_rules = kcalloc(si->num_fs_entries, sizeof(*priv->cls_rules),
 				  GFP_KERNEL);
-	if (!priv->cls_rules) {
-		err = -ENOMEM;
-		goto err_alloc_cls;
-	}
+	if (!priv->cls_rules)
+		return -ENOMEM;
 
 	return 0;
-
-err_alloc_cls:
-	enetc_teardown_cbdr(&si->cbd_ring);
-
-	return err;
 }
 
 void enetc_free_si_resources(struct enetc_ndev_priv *priv)
 {
-	struct enetc_si *si = priv->si;
-
-	enetc_teardown_cbdr(&si->cbd_ring);
-
 	kfree(priv->cls_rules);
 }
 
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index bf70b8f9943f..c8b6110448d4 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -1081,22 +1081,6 @@ static int enetc_init_port_rss_memory(struct enetc_si *si)
 	return err;
 }
 
-static void enetc_init_unused_port(struct enetc_si *si)
-{
-	struct device *dev = &si->pdev->dev;
-	struct enetc_hw *hw = &si->hw;
-	int err;
-
-	err = enetc_setup_cbdr(dev, hw, ENETC_CBDR_DEFAULT_SIZE, &si->cbd_ring);
-	if (err)
-		return;
-
-	enetc_init_port_rfs_memory(si);
-	enetc_init_port_rss_memory(si);
-
-	enetc_teardown_cbdr(&si->cbd_ring);
-}
-
 static int enetc_pf_probe(struct pci_dev *pdev,
 			  const struct pci_device_id *ent)
 {
@@ -1120,8 +1104,24 @@ static int enetc_pf_probe(struct pci_dev *pdev,
 		goto err_map_pf_space;
 	}
 
+	err = enetc_setup_cbdr(&pdev->dev, &si->hw, ENETC_CBDR_DEFAULT_SIZE,
+			       &si->cbd_ring);
+	if (err)
+		goto err_setup_cbdr;
+
+	err = enetc_init_port_rfs_memory(si);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to initialize RFS memory\n");
+		goto err_init_port_rfs;
+	}
+
+	err = enetc_init_port_rss_memory(si);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to initialize RSS memory\n");
+		goto err_init_port_rss;
+	}
+
 	if (node && !of_device_is_available(node)) {
-		enetc_init_unused_port(si);
 		dev_info(&pdev->dev, "device is disabled, skipping\n");
 		err = -ENODEV;
 		goto err_device_disabled;
@@ -1154,18 +1154,6 @@ static int enetc_pf_probe(struct pci_dev *pdev,
 		goto err_alloc_si_res;
 	}
 
-	err = enetc_init_port_rfs_memory(si);
-	if (err) {
-		dev_err(&pdev->dev, "Failed to initialize RFS memory\n");
-		goto err_init_port_rfs;
-	}
-
-	err = enetc_init_port_rss_memory(si);
-	if (err) {
-		dev_err(&pdev->dev, "Failed to initialize RSS memory\n");
-		goto err_init_port_rss;
-	}
-
 	err = enetc_configure_si(priv);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to configure SI\n");
@@ -1201,15 +1189,17 @@ err_phylink_create:
 err_mdiobus_create:
 	enetc_free_msix(priv);
 err_config_si:
-err_init_port_rss:
-err_init_port_rfs:
 err_alloc_msix:
 	enetc_free_si_resources(priv);
 err_alloc_si_res:
 	si->ndev = NULL;
 	free_netdev(ndev);
 err_alloc_netdev:
+err_init_port_rss:
+err_init_port_rfs:
 err_device_disabled:
+	enetc_teardown_cbdr(&si->cbd_ring);
+err_setup_cbdr:
 err_map_pf_space:
 	enetc_pci_remove(pdev);
 
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
index 9b755a84c2d6..371a34d3c6b4 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
@@ -165,6 +165,11 @@ static int enetc_vf_probe(struct pci_dev *pdev,
 
 	enetc_init_si_rings_params(priv);
 
+	err = enetc_setup_cbdr(priv->dev, &si->hw, ENETC_CBDR_DEFAULT_SIZE,
+			       &si->cbd_ring);
+	if (err)
+		goto err_setup_cbdr;
+
 	err = enetc_alloc_si_resources(priv);
 	if (err) {
 		dev_err(&pdev->dev, "SI resource alloc failed\n");
@@ -197,6 +202,8 @@ err_config_si:
 err_alloc_msix:
 	enetc_free_si_resources(priv);
 err_alloc_si_res:
+	enetc_teardown_cbdr(&si->cbd_ring);
+err_setup_cbdr:
 	si->ndev = NULL;
 	free_netdev(ndev);
 err_alloc_netdev:
-- 
cgit v1.2.3


From c027aa9201eb9af8067282d4b8a7ce7523cd3c8d Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Wed, 10 Mar 2021 14:03:47 +0200
Subject: net: enetc: simplify callers of enetc_rxbd_next

When we iterate through the BDs in the RX ring, the software producer
index (which is already passed by value to enetc_rxbd_next) lags behind,
and we end up with this funny looking "++i == rx_ring->bd_count" check
so that we drag it after us.

Let's pass the software producer index "i" by reference, so that
enetc_rxbd_next can increment it by itself (mod rx_ring->bd_count),
especially since enetc_rxbd_next has to increment the index anyway.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 21 +++++----------------
 drivers/net/ethernet/freescale/enetc/enetc.h | 23 +++++++++++++++--------
 2 files changed, 20 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 53c8a698c72c..7bf56dd8b580 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -479,13 +479,8 @@ static int enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt)
 		/* clear 'R" as well */
 		rxbd->r.lstatus = 0;
 
-		rxbd = enetc_rxbd_next(rx_ring, rxbd, i);
-		rx_swbd++;
-		i++;
-		if (unlikely(i == rx_ring->bd_count)) {
-			i = 0;
-			rx_swbd = rx_ring->rx_swbd;
-		}
+		enetc_rxbd_next(rx_ring, &rxbd, &i);
+		rx_swbd = &rx_ring->rx_swbd[i];
 	}
 
 	if (likely(j)) {
@@ -700,9 +695,7 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 
 		cleaned_cnt++;
 
-		rxbd = enetc_rxbd_next(rx_ring, rxbd, i);
-		if (unlikely(++i == rx_ring->bd_count))
-			i = 0;
+		enetc_rxbd_next(rx_ring, &rxbd, &i);
 
 		if (unlikely(bd_status &
 			     ENETC_RXBD_LSTATUS(ENETC_RXBD_ERR_MASK))) {
@@ -711,9 +704,7 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 				dma_rmb();
 				bd_status = le32_to_cpu(rxbd->r.lstatus);
 
-				rxbd = enetc_rxbd_next(rx_ring, rxbd, i);
-				if (unlikely(++i == rx_ring->bd_count))
-					i = 0;
+				enetc_rxbd_next(rx_ring, &rxbd, &i);
 			}
 
 			rx_ring->ndev->stats.rx_dropped++;
@@ -736,9 +727,7 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 
 			cleaned_cnt++;
 
-			rxbd = enetc_rxbd_next(rx_ring, rxbd, i);
-			if (unlikely(++i == rx_ring->bd_count))
-				i = 0;
+			enetc_rxbd_next(rx_ring, &rxbd, &i);
 		}
 
 		rx_byte_cnt += skb->len;
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index af8b8be114bd..30b9ad550d7b 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -121,19 +121,26 @@ static inline union enetc_rx_bd *enetc_rxbd(struct enetc_bdr *rx_ring, int i)
 	return &(((union enetc_rx_bd *)rx_ring->bd_base)[hw_idx]);
 }
 
-static inline union enetc_rx_bd *enetc_rxbd_next(struct enetc_bdr *rx_ring,
-						 union enetc_rx_bd *rxbd,
-						 int i)
+static inline void enetc_rxbd_next(struct enetc_bdr *rx_ring,
+				   union enetc_rx_bd **old_rxbd, int *old_index)
 {
-	rxbd++;
+	union enetc_rx_bd *new_rxbd = *old_rxbd;
+	int new_index = *old_index;
+
+	new_rxbd++;
+
 #ifdef CONFIG_FSL_ENETC_PTP_CLOCK
 	if (rx_ring->ext_en)
-		rxbd++;
+		new_rxbd++;
 #endif
-	if (unlikely(++i == rx_ring->bd_count))
-		rxbd = rx_ring->bd_base;
 
-	return rxbd;
+	if (unlikely(++new_index == rx_ring->bd_count)) {
+		new_rxbd = rx_ring->bd_base;
+		new_index = 0;
+	}
+
+	*old_rxbd = new_rxbd;
+	*old_index = new_index;
 }
 
 static inline union enetc_rx_bd *enetc_rxbd_ext(union enetc_rx_bd *rxbd)
-- 
cgit v1.2.3


From 7f071a450b08801a14c4f6eecdb6881492dd8860 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Wed, 10 Mar 2021 14:03:48 +0200
Subject: net: enetc: use enum enetc_active_offloads

The active_offloads variable of enetc_ndev_priv has an enum type, use it.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index 30b9ad550d7b..773e412b9f4e 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -261,7 +261,7 @@ struct enetc_ndev_priv {
 	u16 rx_bd_count, tx_bd_count;
 
 	u16 msg_enable;
-	int active_offloads;
+	enum enetc_active_offloads active_offloads;
 
 	u32 speed; /* store speed for compare update pspeed */
 
-- 
cgit v1.2.3


From 8580b3c3d786e739e6073438d1da0218ca436939 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Wed, 10 Mar 2021 14:03:49 +0200
Subject: net: enetc: remove forward-declarations of enetc_clean_{rx,tx}_ring

This patch moves the NAPI enetc_poll after enetc_clean_rx_ring such that
we can delete the forward declarations.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 92 +++++++++++++---------------
 1 file changed, 44 insertions(+), 48 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 7bf56dd8b580..81c750b2d08f 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -242,10 +242,6 @@ static irqreturn_t enetc_msix(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget);
-static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
-			       struct napi_struct *napi, int work_limit);
-
 static void enetc_rx_dim_work(struct work_struct *w)
 {
 	struct dim *dim = container_of(w, struct dim, work);
@@ -274,50 +270,6 @@ static void enetc_rx_net_dim(struct enetc_int_vector *v)
 	net_dim(&v->rx_dim, dim_sample);
 }
 
-static int enetc_poll(struct napi_struct *napi, int budget)
-{
-	struct enetc_int_vector
-		*v = container_of(napi, struct enetc_int_vector, napi);
-	bool complete = true;
-	int work_done;
-	int i;
-
-	enetc_lock_mdio();
-
-	for (i = 0; i < v->count_tx_rings; i++)
-		if (!enetc_clean_tx_ring(&v->tx_ring[i], budget))
-			complete = false;
-
-	work_done = enetc_clean_rx_ring(&v->rx_ring, napi, budget);
-	if (work_done == budget)
-		complete = false;
-	if (work_done)
-		v->rx_napi_work = true;
-
-	if (!complete) {
-		enetc_unlock_mdio();
-		return budget;
-	}
-
-	napi_complete_done(napi, work_done);
-
-	if (likely(v->rx_dim_en))
-		enetc_rx_net_dim(v);
-
-	v->rx_napi_work = false;
-
-	/* enable interrupts */
-	enetc_wr_reg_hot(v->rbier, ENETC_RBIER_RXTIE);
-
-	for_each_set_bit(i, &v->tx_rings_map, ENETC_MAX_NUM_TXQS)
-		enetc_wr_reg_hot(v->tbier_base + ENETC_BDR_OFF(i),
-				 ENETC_TBIER_TXTIE);
-
-	enetc_unlock_mdio();
-
-	return work_done;
-}
-
 static int enetc_bd_ready_count(struct enetc_bdr *tx_ring, int ci)
 {
 	int pi = enetc_rd_reg_hot(tx_ring->tcir) & ENETC_TBCIR_IDX_MASK;
@@ -747,6 +699,50 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 	return rx_frm_cnt;
 }
 
+static int enetc_poll(struct napi_struct *napi, int budget)
+{
+	struct enetc_int_vector
+		*v = container_of(napi, struct enetc_int_vector, napi);
+	bool complete = true;
+	int work_done;
+	int i;
+
+	enetc_lock_mdio();
+
+	for (i = 0; i < v->count_tx_rings; i++)
+		if (!enetc_clean_tx_ring(&v->tx_ring[i], budget))
+			complete = false;
+
+	work_done = enetc_clean_rx_ring(&v->rx_ring, napi, budget);
+	if (work_done == budget)
+		complete = false;
+	if (work_done)
+		v->rx_napi_work = true;
+
+	if (!complete) {
+		enetc_unlock_mdio();
+		return budget;
+	}
+
+	napi_complete_done(napi, work_done);
+
+	if (likely(v->rx_dim_en))
+		enetc_rx_net_dim(v);
+
+	v->rx_napi_work = false;
+
+	/* enable interrupts */
+	enetc_wr_reg_hot(v->rbier, ENETC_RBIER_RXTIE);
+
+	for_each_set_bit(i, &v->tx_rings_map, ENETC_MAX_NUM_TXQS)
+		enetc_wr_reg_hot(v->tbier_base + ENETC_BDR_OFF(i),
+				 ENETC_TBIER_TXTIE);
+
+	enetc_unlock_mdio();
+
+	return work_done;
+}
+
 /* Probing and Init */
 #define ENETC_MAX_RFS_SIZE 64
 void enetc_get_si_caps(struct enetc_si *si)
-- 
cgit v1.2.3


From 0486185ee2442ce8fa4d4345509b439ca6b4a717 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Wed, 10 Mar 2021 14:03:50 +0200
Subject: net: enetc: remove forward declaration for enetc_map_tx_buffs

There is no other reason why this forward declaration exists rather than
poor ordering of the functions.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 73 +++++++++++++---------------
 1 file changed, 35 insertions(+), 38 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 81c750b2d08f..3c504ef75746 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -13,44 +13,6 @@
 #define ENETC_MAX_SKB_FRAGS	13
 #define ENETC_TXBDS_MAX_NEEDED	ENETC_TXBDS_NEEDED(ENETC_MAX_SKB_FRAGS + 1)
 
-static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
-			      int active_offloads);
-
-netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev)
-{
-	struct enetc_ndev_priv *priv = netdev_priv(ndev);
-	struct enetc_bdr *tx_ring;
-	int count;
-
-	tx_ring = priv->tx_ring[skb->queue_mapping];
-
-	if (unlikely(skb_shinfo(skb)->nr_frags > ENETC_MAX_SKB_FRAGS))
-		if (unlikely(skb_linearize(skb)))
-			goto drop_packet_err;
-
-	count = skb_shinfo(skb)->nr_frags + 1; /* fragments + head */
-	if (enetc_bd_unused(tx_ring) < ENETC_TXBDS_NEEDED(count)) {
-		netif_stop_subqueue(ndev, tx_ring->index);
-		return NETDEV_TX_BUSY;
-	}
-
-	enetc_lock_mdio();
-	count = enetc_map_tx_buffs(tx_ring, skb, priv->active_offloads);
-	enetc_unlock_mdio();
-
-	if (unlikely(!count))
-		goto drop_packet_err;
-
-	if (enetc_bd_unused(tx_ring) < ENETC_TXBDS_MAX_NEEDED)
-		netif_stop_subqueue(ndev, tx_ring->index);
-
-	return NETDEV_TX_OK;
-
-drop_packet_err:
-	dev_kfree_skb_any(skb);
-	return NETDEV_TX_OK;
-}
-
 static void enetc_unmap_tx_buff(struct enetc_bdr *tx_ring,
 				struct enetc_tx_swbd *tx_swbd)
 {
@@ -221,6 +183,41 @@ dma_err:
 	return 0;
 }
 
+netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct enetc_bdr *tx_ring;
+	int count;
+
+	tx_ring = priv->tx_ring[skb->queue_mapping];
+
+	if (unlikely(skb_shinfo(skb)->nr_frags > ENETC_MAX_SKB_FRAGS))
+		if (unlikely(skb_linearize(skb)))
+			goto drop_packet_err;
+
+	count = skb_shinfo(skb)->nr_frags + 1; /* fragments + head */
+	if (enetc_bd_unused(tx_ring) < ENETC_TXBDS_NEEDED(count)) {
+		netif_stop_subqueue(ndev, tx_ring->index);
+		return NETDEV_TX_BUSY;
+	}
+
+	enetc_lock_mdio();
+	count = enetc_map_tx_buffs(tx_ring, skb, priv->active_offloads);
+	enetc_unlock_mdio();
+
+	if (unlikely(!count))
+		goto drop_packet_err;
+
+	if (enetc_bd_unused(tx_ring) < ENETC_TXBDS_MAX_NEEDED)
+		netif_stop_subqueue(ndev, tx_ring->index);
+
+	return NETDEV_TX_OK;
+
+drop_packet_err:
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_OK;
+}
+
 static irqreturn_t enetc_msix(int irq, void *data)
 {
 	struct enetc_int_vector	*v = data;
-- 
cgit v1.2.3


From 7a5222cb7a56fc186141b816a4c68adf42ce523f Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Wed, 10 Mar 2021 14:03:51 +0200
Subject: net: enetc: make enetc_refill_rx_ring update the consumer index

Since commit fd5736bf9f23 ("enetc: Workaround for MDIO register access
issue"), enetc_refill_rx_ring no longer updates the RX BD ring's
consumer index, that is left to be done by the caller. This has led to
bugs such as the ones found in 96a5223b918c ("net: enetc: remove bogus
write to SIRXIDR from enetc_setup_rxbdr") and 3a5d12c9be6f ("net: enetc:
keep RX ring consumer index in sync with hardware"), so it is desirable
that we move back the update of the consumer index into enetc_refill_rx_ring.

The trouble with that is the different MDIO locking context for the two
callers of enetc_refill_rx_ring:

- enetc_clean_rx_ring runs under enetc_lock_mdio()
- enetc_setup_rxbdr runs outside enetc_lock_mdio()

Simplify the callers of enetc_refill_rx_ring by making enetc_setup_rxbdr
explicitly take enetc_lock_mdio() around the call. It will be the only
place in need of ensuring the hot accessors can be used.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 3c504ef75746..5a54976e6a28 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -435,6 +435,9 @@ static int enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt)
 	if (likely(j)) {
 		rx_ring->next_to_alloc = i; /* keep track from page reuse */
 		rx_ring->next_to_use = i;
+
+		/* update ENETC's consumer index */
+		enetc_wr_reg_hot(rx_ring->rcir, rx_ring->next_to_use);
 	}
 
 	return j;
@@ -620,13 +623,9 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 		u32 bd_status;
 		u16 size;
 
-		if (cleaned_cnt >= ENETC_RXBD_BUNDLE) {
-			int count = enetc_refill_rx_ring(rx_ring, cleaned_cnt);
-
-			/* update ENETC's consumer index */
-			enetc_wr_reg_hot(rx_ring->rcir, rx_ring->next_to_use);
-			cleaned_cnt -= count;
-		}
+		if (cleaned_cnt >= ENETC_RXBD_BUNDLE)
+			cleaned_cnt -= enetc_refill_rx_ring(rx_ring,
+							    cleaned_cnt);
 
 		rxbd = enetc_rxbd(rx_ring, i);
 		bd_status = le32_to_cpu(rxbd->r.lstatus);
@@ -1118,9 +1117,9 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring)
 	rx_ring->rcir = hw->reg + ENETC_BDR(RX, idx, ENETC_RBCIR);
 	rx_ring->idr = hw->reg + ENETC_SIRXIDR;
 
+	enetc_lock_mdio();
 	enetc_refill_rx_ring(rx_ring, enetc_bd_unused(rx_ring));
-	/* update ENETC's consumer index */
-	enetc_rxbdr_wr(hw, idx, ENETC_RBCIR, rx_ring->next_to_use);
+	enetc_unlock_mdio();
 
 	/* enable ring */
 	enetc_rxbdr_wr(hw, idx, ENETC_RBMR, rbmr);
-- 
cgit v1.2.3


From 93a4d0ab1e444f83fa79b85981e6c4818bb580dc Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Wed, 10 Mar 2021 14:14:38 +0200
Subject: staging: dpaa2-switch: remove broken learning and flooding support

This patch is removing the current configuration of learning and
flooding states per switch port because they are essentially broken in
terms of integration with the switchdev APIs and the bridge
understanding of these states.

First of all, the learning state is a per switch port configuration
while the dpaa2-switch driver was using it to configure the entire
bridging domain. This is broken since the software learning state could
be out of sync with the hardware state when ports from the same bridging
domain are configured by the user with different learning parameters.

The BR_FLOOD flag has been misinterpreted as well. Instead of denoting
whether unicast traffic for which there is no FDB entry will be flooded
towards a given port, the dpaa2-switch used the flag to configure
whether or not a frame with an unknown destination received on a given
port should be flooded or not. In summary, it was used as ingress
setting instead of a egress one.

Also, remove the unnecessary call to dpsw_if_set_broadcast() and the API
definition. The HW default is to let all switch ports to be able to
flood broadcast traffic thus there is no need to call the API again.

Instead of trying to patch things up, just remove the support for the
moment so that we'll add it back cleanly once the driver is out of
staging.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h |  24 ------
 drivers/staging/fsl-dpaa2/ethsw/dpsw.c     |  93 ----------------------
 drivers/staging/fsl-dpaa2/ethsw/dpsw.h     |  18 -----
 drivers/staging/fsl-dpaa2/ethsw/ethsw.c    | 123 -----------------------------
 drivers/staging/fsl-dpaa2/ethsw/ethsw.h    |   1 -
 5 files changed, 259 deletions(-)

diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h b/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
index 450841cc6ca8..2a921ed9594d 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
+++ b/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
@@ -48,8 +48,6 @@
 #define DPSW_CMDID_IF_SET_MAX_FRAME_LENGTH  DPSW_CMD_ID(0x044)
 
 #define DPSW_CMDID_IF_GET_LINK_STATE        DPSW_CMD_ID(0x046)
-#define DPSW_CMDID_IF_SET_FLOODING          DPSW_CMD_ID(0x047)
-#define DPSW_CMDID_IF_SET_BROADCAST         DPSW_CMD_ID(0x048)
 
 #define DPSW_CMDID_IF_GET_TCI               DPSW_CMD_ID(0x04A)
 
@@ -68,7 +66,6 @@
 #define DPSW_CMDID_FDB_REMOVE_UNICAST       DPSW_CMD_ID(0x085)
 #define DPSW_CMDID_FDB_ADD_MULTICAST        DPSW_CMD_ID(0x086)
 #define DPSW_CMDID_FDB_REMOVE_MULTICAST     DPSW_CMD_ID(0x087)
-#define DPSW_CMDID_FDB_SET_LEARNING_MODE    DPSW_CMD_ID(0x088)
 #define DPSW_CMDID_FDB_DUMP                 DPSW_CMD_ID(0x08A)
 
 #define DPSW_CMDID_IF_GET_PORT_MAC_ADDR     DPSW_CMD_ID(0x0A7)
@@ -191,18 +188,6 @@ struct dpsw_rsp_get_attr {
 	__le64 options;
 };
 
-struct dpsw_cmd_if_set_flooding {
-	__le16 if_id;
-	/* from LSB: enable:1 */
-	u8 enable;
-};
-
-struct dpsw_cmd_if_set_broadcast {
-	__le16 if_id;
-	/* from LSB: enable:1 */
-	u8 enable;
-};
-
 #define DPSW_VLAN_ID_SHIFT	0
 #define DPSW_VLAN_ID_SIZE	12
 #define DPSW_DEI_SHIFT		12
@@ -350,15 +335,6 @@ struct dpsw_cmd_fdb_multicast_op {
 	__le64 if_id[4];
 };
 
-#define DPSW_LEARNING_MODE_SHIFT	0
-#define DPSW_LEARNING_MODE_SIZE		4
-
-struct dpsw_cmd_fdb_set_learning_mode {
-	__le16 fdb_id;
-	/* only the first 4 bits from LSB */
-	u8 mode;
-};
-
 struct dpsw_cmd_fdb_dump {
 	__le16 fdb_id;
 	__le16 pad0;
diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw.c b/drivers/staging/fsl-dpaa2/ethsw/dpsw.c
index f8bfe779bd30..f7013d71dc84 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/dpsw.c
@@ -431,68 +431,6 @@ int dpsw_if_get_link_state(struct fsl_mc_io *mc_io,
 	return 0;
 }
 
-/**
- * dpsw_if_set_flooding() - Enable Disable flooding for particular interface
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @if_id:	Interface Identifier
- * @en:		1 - enable, 0 - disable
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_if_set_flooding(struct fsl_mc_io *mc_io,
-			 u32 cmd_flags,
-			 u16 token,
-			 u16 if_id,
-			 u8 en)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_if_set_flooding *cmd_params;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_FLOODING,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_if_set_flooding *)cmd.params;
-	cmd_params->if_id = cpu_to_le16(if_id);
-	dpsw_set_field(cmd_params->enable, ENABLE, en);
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_if_set_broadcast() - Enable/disable broadcast for particular interface
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @if_id:	Interface Identifier
- * @en:		1 - enable, 0 - disable
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_if_set_broadcast(struct fsl_mc_io *mc_io,
-			  u32 cmd_flags,
-			  u16 token,
-			  u16 if_id,
-			  u8 en)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_if_set_broadcast *cmd_params;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_BROADCAST,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_if_set_broadcast *)cmd.params;
-	cmd_params->if_id = cpu_to_le16(if_id);
-	dpsw_set_field(cmd_params->enable, ENABLE, en);
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
 /**
  * dpsw_if_set_tci() - Set default VLAN Tag Control Information (TCI)
  * @mc_io:	Pointer to MC portal's I/O object
@@ -1151,37 +1089,6 @@ int dpsw_fdb_remove_multicast(struct fsl_mc_io *mc_io,
 	return mc_send_command(mc_io, &cmd);
 }
 
-/**
- * dpsw_fdb_set_learning_mode() - Define FDB learning mode
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @fdb_id:	Forwarding Database Identifier
- * @mode:	Learning mode
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_fdb_set_learning_mode(struct fsl_mc_io *mc_io,
-			       u32 cmd_flags,
-			       u16 token,
-			       u16 fdb_id,
-			       enum dpsw_fdb_learning_mode mode)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_fdb_set_learning_mode *cmd_params;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_SET_LEARNING_MODE,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_fdb_set_learning_mode *)cmd.params;
-	cmd_params->fdb_id = cpu_to_le16(fdb_id);
-	dpsw_set_field(cmd_params->mode, LEARNING_MODE, mode);
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
 /**
  * dpsw_get_api_version() - Get Data Path Switch API version
  * @mc_io:	Pointer to MC portal's I/O object
diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw.h b/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
index 9cfd8a8e0197..bc6bcfb6893d 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
+++ b/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
@@ -235,18 +235,6 @@ int dpsw_if_get_link_state(struct fsl_mc_io *mc_io,
 			   u16 if_id,
 			   struct dpsw_link_state *state);
 
-int dpsw_if_set_flooding(struct fsl_mc_io *mc_io,
-			 u32 cmd_flags,
-			 u16 token,
-			 u16 if_id,
-			 u8 en);
-
-int dpsw_if_set_broadcast(struct fsl_mc_io *mc_io,
-			  u32 cmd_flags,
-			  u16 token,
-			  u16 if_id,
-			  u8 en);
-
 /**
  * struct dpsw_tci_cfg - Tag Control Information (TCI) configuration
  * @pcp: Priority Code Point (PCP): a 3-bit field which refers
@@ -555,12 +543,6 @@ enum dpsw_fdb_learning_mode {
 	DPSW_FDB_LEARNING_MODE_SECURE = 3
 };
 
-int dpsw_fdb_set_learning_mode(struct fsl_mc_io *mc_io,
-			       u32 cmd_flags,
-			       u16 token,
-			       u16 fdb_id,
-			       enum dpsw_fdb_learning_mode mode);
-
 /**
  * struct dpsw_fdb_attr - FDB Attributes
  * @max_fdb_entries: Number of FDB entries
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
index 703055e063ff..edcaf99c24fc 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
@@ -161,44 +161,6 @@ static int dpaa2_switch_port_add_vlan(struct ethsw_port_priv *port_priv,
 	return 0;
 }
 
-static int dpaa2_switch_set_learning(struct ethsw_core *ethsw, bool enable)
-{
-	enum dpsw_fdb_learning_mode learn_mode;
-	int err;
-
-	if (enable)
-		learn_mode = DPSW_FDB_LEARNING_MODE_HW;
-	else
-		learn_mode = DPSW_FDB_LEARNING_MODE_DIS;
-
-	err = dpsw_fdb_set_learning_mode(ethsw->mc_io, 0, ethsw->dpsw_handle, 0,
-					 learn_mode);
-	if (err) {
-		dev_err(ethsw->dev, "dpsw_fdb_set_learning_mode err %d\n", err);
-		return err;
-	}
-	ethsw->learning = enable;
-
-	return 0;
-}
-
-static int dpaa2_switch_port_set_flood(struct ethsw_port_priv *port_priv, bool enable)
-{
-	int err;
-
-	err = dpsw_if_set_flooding(port_priv->ethsw_data->mc_io, 0,
-				   port_priv->ethsw_data->dpsw_handle,
-				   port_priv->idx, enable);
-	if (err) {
-		netdev_err(port_priv->netdev,
-			   "dpsw_if_set_flooding err %d\n", err);
-		return err;
-	}
-	port_priv->flood = enable;
-
-	return 0;
-}
-
 static int dpaa2_switch_port_set_stp_state(struct ethsw_port_priv *port_priv, u8 state)
 {
 	struct dpsw_stp_cfg stp_cfg = {
@@ -908,41 +870,6 @@ static int dpaa2_switch_port_attr_stp_state_set(struct net_device *netdev,
 	return dpaa2_switch_port_set_stp_state(port_priv, state);
 }
 
-static int
-dpaa2_switch_port_attr_br_flags_pre_set(struct net_device *netdev,
-					struct switchdev_brport_flags flags)
-{
-	if (flags.mask & ~(BR_LEARNING | BR_FLOOD))
-		return -EINVAL;
-
-	return 0;
-}
-
-static int
-dpaa2_switch_port_attr_br_flags_set(struct net_device *netdev,
-				    struct switchdev_brport_flags flags)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	int err = 0;
-
-	if (flags.mask & BR_LEARNING) {
-		/* Learning is enabled per switch */
-		err = dpaa2_switch_set_learning(port_priv->ethsw_data,
-						!!(flags.val & BR_LEARNING));
-		if (err)
-			return err;
-	}
-
-	if (flags.mask & BR_FLOOD) {
-		err = dpaa2_switch_port_set_flood(port_priv,
-						  !!(flags.val & BR_FLOOD));
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
 static int dpaa2_switch_port_attr_set(struct net_device *netdev,
 				      const struct switchdev_attr *attr)
 {
@@ -953,14 +880,6 @@ static int dpaa2_switch_port_attr_set(struct net_device *netdev,
 		err = dpaa2_switch_port_attr_stp_state_set(netdev,
 							   attr->u.stp_state);
 		break;
-	case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS:
-		err = dpaa2_switch_port_attr_br_flags_pre_set(netdev,
-							      attr->u.brport_flags);
-		break;
-	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
-		err = dpaa2_switch_port_attr_br_flags_set(netdev,
-							  attr->u.brport_flags);
-		break;
 	case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
 		/* VLANs are supported by default  */
 		break;
@@ -1232,24 +1151,6 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
 		}
 	}
 
-	/* Enable flooding */
-	err = dpaa2_switch_port_set_flood(port_priv, 1);
-	if (!err)
-		port_priv->bridge_dev = upper_dev;
-
-	return err;
-}
-
-static int dpaa2_switch_port_bridge_leave(struct net_device *netdev)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	int err;
-
-	/* Disable flooding */
-	err = dpaa2_switch_port_set_flood(port_priv, 0);
-	if (!err)
-		port_priv->bridge_dev = NULL;
-
 	return err;
 }
 
@@ -1270,8 +1171,6 @@ static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb,
 		if (netif_is_bridge_master(upper_dev)) {
 			if (info->linking)
 				err = dpaa2_switch_port_bridge_join(netdev, upper_dev);
-			else
-				err = dpaa2_switch_port_bridge_leave(netdev);
 		}
 	}
 
@@ -1513,13 +1412,6 @@ static int dpaa2_switch_init(struct fsl_mc_device *sw_dev)
 		goto err_close;
 	}
 
-	err = dpsw_fdb_set_learning_mode(ethsw->mc_io, 0, ethsw->dpsw_handle, 0,
-					 DPSW_FDB_LEARNING_MODE_HW);
-	if (err) {
-		dev_err(dev, "dpsw_fdb_set_learning_mode err %d\n", err);
-		goto err_close;
-	}
-
 	stp_cfg.vlan_id = DEFAULT_VLAN_ID;
 	stp_cfg.state = DPSW_STP_STATE_FORWARDING;
 
@@ -1531,15 +1423,6 @@ static int dpaa2_switch_init(struct fsl_mc_device *sw_dev)
 				err, i);
 			goto err_close;
 		}
-
-		err = dpsw_if_set_broadcast(ethsw->mc_io, 0,
-					    ethsw->dpsw_handle, i, 1);
-		if (err) {
-			dev_err(dev,
-				"dpsw_if_set_broadcast err %d for port %d\n",
-				err, i);
-			goto err_close;
-		}
 	}
 
 	ethsw->workqueue = alloc_ordered_workqueue("%s_%d_ordered",
@@ -1689,9 +1572,6 @@ static int dpaa2_switch_probe_port(struct ethsw_core *ethsw,
 	port_priv->idx = port_idx;
 	port_priv->stp_state = BR_STATE_FORWARDING;
 
-	/* Flooding is implicitly enabled */
-	port_priv->flood = true;
-
 	SET_NETDEV_DEV(port_netdev, dev);
 	port_netdev->netdev_ops = &dpaa2_switch_port_ops;
 	port_netdev->ethtool_ops = &dpaa2_switch_port_ethtool_ops;
@@ -1756,9 +1636,6 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
 	/* DEFAULT_VLAN_ID is implicitly configured on the switch */
 	ethsw->vlans[DEFAULT_VLAN_ID] = ETHSW_VLAN_MEMBER;
 
-	/* Learning is implicitly enabled */
-	ethsw->learning = true;
-
 	ethsw->ports = kcalloc(ethsw->sw_attr.num_ifs, sizeof(*ethsw->ports),
 			       GFP_KERNEL);
 	if (!(ethsw->ports)) {
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.h b/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
index 5f9211ccb1ef..448f60755eea 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
@@ -69,7 +69,6 @@ struct ethsw_core {
 	struct ethsw_port_priv		**ports;
 
 	u8				vlans[VLAN_VID_MASK + 1];
-	bool				learning;
 
 	struct notifier_block		port_nb;
 	struct notifier_block		port_switchdev_nb;
-- 
cgit v1.2.3


From 282d47de29c766621b4d340aff89f6c239fc47e2 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Wed, 10 Mar 2021 14:14:39 +0200
Subject: staging: dpaa2-switch: fix up initial forwarding configuration done
 by firmware

By default, the DPSW object is configured with VLAN ID 1 in the VLAN
table, which all ports are member of. This entry in the VLAN table
selects the same FDB ID for all ports, meaning that forwarding between
ports is permitted. This is unlike the switchdev model, where each port
should operate as standalone by default.

To make the switch operate in standalone ports mode, we need the VLAN
table to select a unique FDB ID for each port. In order to do that, we
need to simply delete the VLAN 1 created automatically by firmware, and
let dpaa2_switch_port_init take over, by readding VLAN ID 1, but
pointing towards a unique FDB ID.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/staging/fsl-dpaa2/ethsw/ethsw.c | 80 +++++++++++++++++++++------------
 1 file changed, 51 insertions(+), 29 deletions(-)

diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
index edcaf99c24fc..fa0ec54b49fa 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
@@ -3,7 +3,7 @@
  * DPAA2 Ethernet Switch driver
  *
  * Copyright 2014-2016 Freescale Semiconductor Inc.
- * Copyright 2017-2018 NXP
+ * Copyright 2017-2021 NXP
  *
  */
 
@@ -1365,6 +1365,8 @@ static int dpaa2_switch_init(struct fsl_mc_device *sw_dev)
 {
 	struct device *dev = &sw_dev->dev;
 	struct ethsw_core *ethsw = dev_get_drvdata(dev);
+	struct dpsw_vlan_if_cfg vcfg = {0};
+	struct dpsw_tci_cfg tci_cfg = {0};
 	struct dpsw_stp_cfg stp_cfg;
 	int err;
 	u16 i;
@@ -1416,6 +1418,12 @@ static int dpaa2_switch_init(struct fsl_mc_device *sw_dev)
 	stp_cfg.state = DPSW_STP_STATE_FORWARDING;
 
 	for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
+		err = dpsw_if_disable(ethsw->mc_io, 0, ethsw->dpsw_handle, i);
+		if (err) {
+			dev_err(dev, "dpsw_if_disable err %d\n", err);
+			goto err_close;
+		}
+
 		err = dpsw_if_set_stp(ethsw->mc_io, 0, ethsw->dpsw_handle, i,
 				      &stp_cfg);
 		if (err) {
@@ -1423,6 +1431,39 @@ static int dpaa2_switch_init(struct fsl_mc_device *sw_dev)
 				err, i);
 			goto err_close;
 		}
+
+		/* Switch starts with all ports configured to VLAN 1. Need to
+		 * remove this setting to allow configuration at bridge join
+		 */
+		vcfg.num_ifs = 1;
+		vcfg.if_id[0] = i;
+		err = dpsw_vlan_remove_if_untagged(ethsw->mc_io, 0, ethsw->dpsw_handle,
+						   DEFAULT_VLAN_ID, &vcfg);
+		if (err) {
+			dev_err(dev, "dpsw_vlan_remove_if_untagged err %d\n",
+				err);
+			goto err_close;
+		}
+
+		tci_cfg.vlan_id = 4095;
+		err = dpsw_if_set_tci(ethsw->mc_io, 0, ethsw->dpsw_handle, i, &tci_cfg);
+		if (err) {
+			dev_err(dev, "dpsw_if_set_tci err %d\n", err);
+			goto err_close;
+		}
+
+		err = dpsw_vlan_remove_if(ethsw->mc_io, 0, ethsw->dpsw_handle,
+					  DEFAULT_VLAN_ID, &vcfg);
+		if (err) {
+			dev_err(dev, "dpsw_vlan_remove_if err %d\n", err);
+			goto err_close;
+		}
+	}
+
+	err = dpsw_vlan_remove(ethsw->mc_io, 0, ethsw->dpsw_handle, DEFAULT_VLAN_ID);
+	if (err) {
+		dev_err(dev, "dpsw_vlan_remove err %d\n", err);
+		goto err_close;
 	}
 
 	ethsw->workqueue = alloc_ordered_workqueue("%s_%d_ordered",
@@ -1449,34 +1490,22 @@ err_close:
 
 static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
 {
+	struct switchdev_obj_port_vlan vlan = {
+		.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
+		.vid = DEFAULT_VLAN_ID,
+		.flags = BRIDGE_VLAN_INFO_UNTAGGED | BRIDGE_VLAN_INFO_PVID,
+	};
 	struct net_device *netdev = port_priv->netdev;
-	struct ethsw_core *ethsw = port_priv->ethsw_data;
-	struct dpsw_vlan_if_cfg vcfg;
 	int err;
 
-	/* Switch starts with all ports configured to VLAN 1. Need to
-	 * remove this setting to allow configuration at bridge join
+	/* We need to add VLAN 1 as the PVID on this port until it is under a
+	 * bridge since the DPAA2 switch is not able to handle the traffic in a
+	 * VLAN unaware fashion
 	 */
-	vcfg.num_ifs = 1;
-	vcfg.if_id[0] = port_priv->idx;
-
-	err = dpsw_vlan_remove_if_untagged(ethsw->mc_io, 0, ethsw->dpsw_handle,
-					   DEFAULT_VLAN_ID, &vcfg);
-	if (err) {
-		netdev_err(netdev, "dpsw_vlan_remove_if_untagged err %d\n",
-			   err);
-		return err;
-	}
-
-	err = dpaa2_switch_port_set_pvid(port_priv, 0);
+	err = dpaa2_switch_port_vlans_add(netdev, &vlan);
 	if (err)
 		return err;
 
-	err = dpsw_vlan_remove_if(ethsw->mc_io, 0, ethsw->dpsw_handle,
-				  DEFAULT_VLAN_ID, &vcfg);
-	if (err)
-		netdev_err(netdev, "dpsw_vlan_remove_if err %d\n", err);
-
 	return err;
 }
 
@@ -1633,9 +1662,6 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
 	if (err)
 		goto err_free_cmdport;
 
-	/* DEFAULT_VLAN_ID is implicitly configured on the switch */
-	ethsw->vlans[DEFAULT_VLAN_ID] = ETHSW_VLAN_MEMBER;
-
 	ethsw->ports = kcalloc(ethsw->sw_attr.num_ifs, sizeof(*ethsw->ports),
 			       GFP_KERNEL);
 	if (!(ethsw->ports)) {
@@ -1655,10 +1681,6 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
 		goto err_free_ports;
 	}
 
-	/* Make sure the switch ports are disabled at probe time */
-	for (i = 0; i < ethsw->sw_attr.num_ifs; i++)
-		dpsw_if_disable(ethsw->mc_io, 0, ethsw->dpsw_handle, i);
-
 	/* Setup IRQs */
 	err = dpaa2_switch_setup_irqs(sw_dev);
 	if (err)
-- 
cgit v1.2.3


From 5dda9a7921c7574ba02939eb3552145aa02c44d9 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Wed, 10 Mar 2021 14:14:40 +0200
Subject: staging: dpaa2-switch: remove obsolete .ndo_fdb_{add|del} callbacks

Since the dpaa2-switch already listens for SWITCHDEV_FDB_ADD_TO_DEVICE /
SWITCHDEV_FDB_DEL_TO_DEVICE events emitted by the bridge, we don't need
the bridge bypass operations, and now is a good time to delete them. All
'bridge fdb' commands need the 'master' flag specified now.

In fact, having the obsolete .ndo_fdb_{add|del} callbacks would even
complicate the bridge leave/join procedures without any real benefit.
Every FDB entry is installed in an FDB ID as far as the hardware is
concerned, and the dpaa2-switch ports change their FDB ID when they join
or leave a bridge. So we would need to manually delete these FDB entries
when the FDB ID changes. That's because, unlike FDB entries added
through switchdev, where the bridge automatically deletes those on
leave, there isn't anybody who will remove the static FDB entries
installed via the bridge bypass operations upon a change in the upper
device.

Note that we still need .ndo_fdb_dump though. The dpaa2-switch does not
emit any interrupts when a new address is learnt, so we cannot keep the
bridge FDB in sync with the hardware FDB. Therefore, we need this
callback to get a chance to print the FDB entries that were dynamically
learnt by our hardware.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/staging/fsl-dpaa2/ethsw/ethsw.c | 27 ---------------------------
 1 file changed, 27 deletions(-)

diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
index fa0ec54b49fa..3067289a15a1 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
@@ -295,31 +295,6 @@ static int dpaa2_switch_port_fdb_del_mc(struct ethsw_port_priv *port_priv,
 	return err;
 }
 
-static int dpaa2_switch_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
-				     struct net_device *dev, const unsigned char *addr,
-				     u16 vid, u16 flags,
-				     struct netlink_ext_ack *extack)
-{
-	if (is_unicast_ether_addr(addr))
-		return dpaa2_switch_port_fdb_add_uc(netdev_priv(dev),
-						    addr);
-	else
-		return dpaa2_switch_port_fdb_add_mc(netdev_priv(dev),
-						    addr);
-}
-
-static int dpaa2_switch_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
-				     struct net_device *dev,
-				     const unsigned char *addr, u16 vid)
-{
-	if (is_unicast_ether_addr(addr))
-		return dpaa2_switch_port_fdb_del_uc(netdev_priv(dev),
-						    addr);
-	else
-		return dpaa2_switch_port_fdb_del_mc(netdev_priv(dev),
-						    addr);
-}
-
 static void dpaa2_switch_port_get_stats(struct net_device *netdev,
 					struct rtnl_link_stats64 *stats)
 {
@@ -726,8 +701,6 @@ static const struct net_device_ops dpaa2_switch_port_ops = {
 	.ndo_change_mtu		= dpaa2_switch_port_change_mtu,
 	.ndo_has_offload_stats	= dpaa2_switch_port_has_offload_stats,
 	.ndo_get_offload_stats	= dpaa2_switch_port_get_offload_stats,
-	.ndo_fdb_add		= dpaa2_switch_port_fdb_add,
-	.ndo_fdb_del		= dpaa2_switch_port_fdb_del,
 	.ndo_fdb_dump		= dpaa2_switch_port_fdb_dump,
 
 	.ndo_start_xmit		= dpaa2_switch_port_dropframe,
-- 
cgit v1.2.3


From 26d419f36a23acfaf38cd5e95a539701b2bb4559 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Wed, 10 Mar 2021 14:14:41 +0200
Subject: staging: dpaa2-switch: get control interface attributes

Introduce a new structure to hold all necessary info related to an RX
queue for the control interface and populate the FQ IDs.
We only have one Rx queue and one Tx confirmation queue on the control
interface, both shared by all the switch ports.

Also, increase the minimum version of the object supported by the driver
since for a basic switch driver support we'll be in need for some ABIs
added in the latest version of firmware.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h | 13 +++++--
 drivers/staging/fsl-dpaa2/ethsw/dpsw.c     | 33 +++++++++++++++++-
 drivers/staging/fsl-dpaa2/ethsw/dpsw.h     | 23 +++++++++++-
 drivers/staging/fsl-dpaa2/ethsw/ethsw.c    | 56 ++++++++++++++++++++++++++----
 drivers/staging/fsl-dpaa2/ethsw/ethsw.h    | 22 +++++++++++-
 5 files changed, 136 insertions(+), 11 deletions(-)

diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h b/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
index 2a921ed9594d..fc1ba45f8a3f 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
+++ b/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright 2014-2016 Freescale Semiconductor Inc.
- * Copyright 2017-2020 NXP
+ * Copyright 2017-2021 NXP
  *
  */
 
@@ -10,7 +10,7 @@
 
 /* DPSW Version */
 #define DPSW_VER_MAJOR		8
-#define DPSW_VER_MINOR		5
+#define DPSW_VER_MINOR		9
 
 #define DPSW_CMD_BASE_VERSION	1
 #define DPSW_CMD_VERSION_2	2
@@ -72,6 +72,8 @@
 #define DPSW_CMDID_IF_GET_PRIMARY_MAC_ADDR  DPSW_CMD_ID(0x0A8)
 #define DPSW_CMDID_IF_SET_PRIMARY_MAC_ADDR  DPSW_CMD_ID(0x0A9)
 
+#define DPSW_CMDID_CTRL_IF_GET_ATTR         DPSW_CMD_ID(0x0A0)
+
 /* Macros for accessing command fields smaller than 1byte */
 #define DPSW_MASK(field)        \
 	GENMASK(DPSW_##field##_SHIFT + DPSW_##field##_SIZE - 1, \
@@ -347,6 +349,13 @@ struct dpsw_rsp_fdb_dump {
 	__le16 num_entries;
 };
 
+struct dpsw_rsp_ctrl_if_get_attr {
+	__le64 pad;
+	__le32 rx_fqid;
+	__le32 rx_err_fqid;
+	__le32 tx_err_conf_fqid;
+};
+
 struct dpsw_rsp_get_api_version {
 	__le16 version_major;
 	__le16 version_minor;
diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw.c b/drivers/staging/fsl-dpaa2/ethsw/dpsw.c
index f7013d71dc84..a7794f012e78 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/dpsw.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright 2014-2016 Freescale Semiconductor Inc.
- * Copyright 2017-2018 NXP
+ * Copyright 2017-2021 NXP
  *
  */
 
@@ -1089,6 +1089,37 @@ int dpsw_fdb_remove_multicast(struct fsl_mc_io *mc_io,
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpsw_ctrl_if_get_attributes() - Obtain control interface attributes
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @attr:	Returned control interface attributes
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_ctrl_if_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags,
+				u16 token, struct dpsw_ctrl_if_attr *attr)
+{
+	struct dpsw_rsp_ctrl_if_get_attr *rsp_params;
+	struct fsl_mc_command cmd = { 0 };
+	int err;
+
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_GET_ATTR,
+					  cmd_flags, token);
+
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	rsp_params = (struct dpsw_rsp_ctrl_if_get_attr *)cmd.params;
+	attr->rx_fqid = le32_to_cpu(rsp_params->rx_fqid);
+	attr->rx_err_fqid = le32_to_cpu(rsp_params->rx_err_fqid);
+	attr->tx_err_conf_fqid = le32_to_cpu(rsp_params->tx_err_conf_fqid);
+
+	return 0;
+}
+
 /**
  * dpsw_get_api_version() - Get Data Path Switch API version
  * @mc_io:	Pointer to MC portal's I/O object
diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw.h b/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
index bc6bcfb6893d..8c8cc9600e8d 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
+++ b/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright 2014-2016 Freescale Semiconductor Inc.
- * Copyright 2017-2018 NXP
+ * Copyright 2017-2021 NXP
  *
  */
 
@@ -175,6 +175,27 @@ int dpsw_get_attributes(struct fsl_mc_io *mc_io,
 			u16 token,
 			struct dpsw_attr *attr);
 
+/**
+ * struct dpsw_ctrl_if_attr - Control interface attributes
+ * @rx_fqid:		Receive FQID
+ * @rx_err_fqid:	Receive error FQID
+ * @tx_err_conf_fqid:	Transmit error and confirmation FQID
+ */
+struct dpsw_ctrl_if_attr {
+	u32 rx_fqid;
+	u32 rx_err_fqid;
+	u32 tx_err_conf_fqid;
+};
+
+int dpsw_ctrl_if_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags,
+				u16 token, struct dpsw_ctrl_if_attr *attr);
+
+enum dpsw_queue_type {
+	DPSW_QUEUE_RX,
+	DPSW_QUEUE_TX_ERR_CONF,
+	DPSW_QUEUE_RX_ERR,
+};
+
 /**
  * enum dpsw_action - Action selection for special/control frames
  * @DPSW_ACTION_DROP: Drop frame
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
index 3067289a15a1..b03211fb6fc9 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
@@ -20,7 +20,7 @@
 
 /* Minimal supported DPSW version */
 #define DPSW_MIN_VER_MAJOR		8
-#define DPSW_MIN_VER_MINOR		1
+#define DPSW_MIN_VER_MINOR		9
 
 #define DEFAULT_VLAN_ID			1
 
@@ -1334,6 +1334,43 @@ static void dpaa2_switch_detect_features(struct ethsw_core *ethsw)
 		ethsw->features |= ETHSW_FEATURE_MAC_ADDR;
 }
 
+static int dpaa2_switch_setup_fqs(struct ethsw_core *ethsw)
+{
+	struct dpsw_ctrl_if_attr ctrl_if_attr;
+	struct device *dev = ethsw->dev;
+	int i = 0;
+	int err;
+
+	err = dpsw_ctrl_if_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle,
+					  &ctrl_if_attr);
+	if (err) {
+		dev_err(dev, "dpsw_ctrl_if_get_attributes() = %d\n", err);
+		return err;
+	}
+
+	ethsw->fq[i].fqid = ctrl_if_attr.rx_fqid;
+	ethsw->fq[i].ethsw = ethsw;
+	ethsw->fq[i++].type = DPSW_QUEUE_RX;
+
+	ethsw->fq[i].fqid = ctrl_if_attr.tx_err_conf_fqid;
+	ethsw->fq[i].ethsw = ethsw;
+	ethsw->fq[i++].type = DPSW_QUEUE_TX_ERR_CONF;
+
+	return 0;
+}
+
+static int dpaa2_switch_ctrl_if_setup(struct ethsw_core *ethsw)
+{
+	int err;
+
+	/* setup FQs for Rx and Tx Conf */
+	err = dpaa2_switch_setup_fqs(ethsw);
+	if (err)
+		return err;
+
+	return 0;
+}
+
 static int dpaa2_switch_init(struct fsl_mc_device *sw_dev)
 {
 	struct device *dev = &sw_dev->dev;
@@ -1371,11 +1408,14 @@ static int dpaa2_switch_init(struct fsl_mc_device *sw_dev)
 	if (ethsw->major < DPSW_MIN_VER_MAJOR ||
 	    (ethsw->major == DPSW_MIN_VER_MAJOR &&
 	     ethsw->minor < DPSW_MIN_VER_MINOR)) {
-		dev_err(dev, "DPSW version %d:%d not supported. Use %d.%d or greater.\n",
-			ethsw->major,
-			ethsw->minor,
-			DPSW_MIN_VER_MAJOR, DPSW_MIN_VER_MINOR);
-		err = -ENOTSUPP;
+		dev_err(dev, "DPSW version %d:%d not supported. Use firmware 10.28.0 or greater.\n",
+			ethsw->major, ethsw->minor);
+		err = -EOPNOTSUPP;
+		goto err_close;
+	}
+
+	if (!dpaa2_switch_supports_cpu_traffic(ethsw)) {
+		err = -EOPNOTSUPP;
 		goto err_close;
 	}
 
@@ -1447,6 +1487,10 @@ static int dpaa2_switch_init(struct fsl_mc_device *sw_dev)
 		goto err_close;
 	}
 
+	err = dpaa2_switch_ctrl_if_setup(ethsw);
+	if (err)
+		goto err_destroy_ordered_workqueue;
+
 	err = dpaa2_switch_register_notifier(dev);
 	if (err)
 		goto err_destroy_ordered_workqueue;
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.h b/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
index 448f60755eea..24a203c42f5f 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
@@ -3,7 +3,7 @@
  * DPAA2 Ethernet Switch declarations
  *
  * Copyright 2014-2016 Freescale Semiconductor Inc.
- * Copyright 2017-2018 NXP
+ * Copyright 2017-2021 NXP
  *
  */
 
@@ -39,10 +39,19 @@
 
 #define ETHSW_FEATURE_MAC_ADDR	BIT(0)
 
+/* Number of receive queues (one RX and one TX_CONF) */
+#define DPAA2_SWITCH_RX_NUM_FQS	2
+
 extern const struct ethtool_ops dpaa2_switch_port_ethtool_ops;
 
 struct ethsw_core;
 
+struct dpaa2_switch_fq {
+	struct ethsw_core *ethsw;
+	enum dpsw_queue_type type;
+	u32 fqid;
+};
+
 /* Per port private data */
 struct ethsw_port_priv {
 	struct net_device	*netdev;
@@ -74,6 +83,17 @@ struct ethsw_core {
 	struct notifier_block		port_switchdev_nb;
 	struct notifier_block		port_switchdevb_nb;
 	struct workqueue_struct		*workqueue;
+
+	struct dpaa2_switch_fq		fq[DPAA2_SWITCH_RX_NUM_FQS];
 };
 
+static inline bool dpaa2_switch_supports_cpu_traffic(struct ethsw_core *ethsw)
+{
+	if (ethsw->sw_attr.options & DPSW_OPT_CTRL_IF_DIS) {
+		dev_err(ethsw->dev, "Control Interface is disabled, cannot probe\n");
+		return false;
+	}
+
+	return true;
+}
 #endif	/* __ETHSW_H */
-- 
cgit v1.2.3


From 2877e4f7e189bd8e89b85bfda04fbde319057f50 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Wed, 10 Mar 2021 14:14:42 +0200
Subject: staging: dpaa2-switch: setup buffer pool and RX path rings

Allocate and setup a buffer pool, needed on the Rx path of the control
interface. Also, define the Rx buffer size seen by the WRIOP from the
PAGE_SIZE buffers seeded.

Also, create the needed Rx rings for both frame queues used on the
control interface.  On the Rx path, when a pull-dequeue operation is
performed on a software portal, available frame descriptors are put in a
ring - a DMA memory storage - for further usage.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h |  12 +++
 drivers/staging/fsl-dpaa2/ethsw/dpsw.c     |  31 +++++++
 drivers/staging/fsl-dpaa2/ethsw/dpsw.h     |  26 ++++++
 drivers/staging/fsl-dpaa2/ethsw/ethsw.c    | 126 +++++++++++++++++++++++++++++
 drivers/staging/fsl-dpaa2/ethsw/ethsw.h    |  14 ++++
 5 files changed, 209 insertions(+)

diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h b/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
index fc1ba45f8a3f..d76f80d1bd8b 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
+++ b/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
@@ -8,6 +8,8 @@
 #ifndef __FSL_DPSW_CMD_H
 #define __FSL_DPSW_CMD_H
 
+#include "dpsw.h"
+
 /* DPSW Version */
 #define DPSW_VER_MAJOR		8
 #define DPSW_VER_MINOR		9
@@ -73,6 +75,7 @@
 #define DPSW_CMDID_IF_SET_PRIMARY_MAC_ADDR  DPSW_CMD_ID(0x0A9)
 
 #define DPSW_CMDID_CTRL_IF_GET_ATTR         DPSW_CMD_ID(0x0A0)
+#define DPSW_CMDID_CTRL_IF_SET_POOLS        DPSW_CMD_ID(0x0A1)
 
 /* Macros for accessing command fields smaller than 1byte */
 #define DPSW_MASK(field)        \
@@ -356,6 +359,15 @@ struct dpsw_rsp_ctrl_if_get_attr {
 	__le32 tx_err_conf_fqid;
 };
 
+#define DPSW_BACKUP_POOL(val, order)	(((val) & 0x1) << (order))
+struct dpsw_cmd_ctrl_if_set_pools {
+	u8 num_dpbp;
+	u8 backup_pool_mask;
+	__le16 pad;
+	__le32 dpbp_id[DPSW_MAX_DPBP];
+	__le16 buffer_size[DPSW_MAX_DPBP];
+};
+
 struct dpsw_rsp_get_api_version {
 	__le16 version_major;
 	__le16 version_minor;
diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw.c b/drivers/staging/fsl-dpaa2/ethsw/dpsw.c
index a7794f012e78..a560270b2466 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/dpsw.c
@@ -1120,6 +1120,37 @@ int dpsw_ctrl_if_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags,
 	return 0;
 }
 
+/**
+ * dpsw_ctrl_if_set_pools() - Set control interface buffer pools
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @cfg:	Buffer pools configuration
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_ctrl_if_set_pools(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			   const struct dpsw_ctrl_if_pools_cfg *cfg)
+{
+	struct dpsw_cmd_ctrl_if_set_pools *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+	int i;
+
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_SET_POOLS,
+					  cmd_flags, token);
+	cmd_params = (struct dpsw_cmd_ctrl_if_set_pools *)cmd.params;
+	cmd_params->num_dpbp = cfg->num_dpbp;
+	for (i = 0; i < DPSW_MAX_DPBP; i++) {
+		cmd_params->dpbp_id[i] = cpu_to_le32(cfg->pools[i].dpbp_id);
+		cmd_params->buffer_size[i] =
+			cpu_to_le16(cfg->pools[i].buffer_size);
+		cmd_params->backup_pool_mask |=
+			DPSW_BACKUP_POOL(cfg->pools[i].backup_pool, i);
+	}
+
+	return mc_send_command(mc_io, &cmd);
+}
+
 /**
  * dpsw_get_api_version() - Get Data Path Switch API version
  * @mc_io:	Pointer to MC portal's I/O object
diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw.h b/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
index 8c8cc9600e8d..b14ec3e10b85 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
+++ b/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
@@ -196,6 +196,32 @@ enum dpsw_queue_type {
 	DPSW_QUEUE_RX_ERR,
 };
 
+/**
+ * Maximum number of DPBP
+ */
+#define DPSW_MAX_DPBP     8
+
+/**
+ * struct dpsw_ctrl_if_pools_cfg - Control interface buffer pools configuration
+ * @num_dpbp: Number of DPBPs
+ * @pools: Array of buffer pools parameters; The number of valid entries
+ *	must match 'num_dpbp' value
+ * @pools.dpbp_id: DPBP object ID
+ * @pools.buffer_size: Buffer size
+ * @pools.backup_pool: Backup pool
+ */
+struct dpsw_ctrl_if_pools_cfg {
+	u8 num_dpbp;
+	struct {
+		int dpbp_id;
+		u16 buffer_size;
+		int backup_pool;
+	} pools[DPSW_MAX_DPBP];
+};
+
+int dpsw_ctrl_if_set_pools(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			   const struct dpsw_ctrl_if_pools_cfg *cfg);
+
 /**
  * enum dpsw_action - Action selection for special/control frames
  * @DPSW_ACTION_DROP: Drop frame
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
index b03211fb6fc9..06998578355c 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
@@ -1359,6 +1359,110 @@ static int dpaa2_switch_setup_fqs(struct ethsw_core *ethsw)
 	return 0;
 }
 
+static int dpaa2_switch_setup_dpbp(struct ethsw_core *ethsw)
+{
+	struct dpsw_ctrl_if_pools_cfg dpsw_ctrl_if_pools_cfg = { 0 };
+	struct device *dev = ethsw->dev;
+	struct fsl_mc_device *dpbp_dev;
+	struct dpbp_attr dpbp_attrs;
+	int err;
+
+	err = fsl_mc_object_allocate(to_fsl_mc_device(dev), FSL_MC_POOL_DPBP,
+				     &dpbp_dev);
+	if (err) {
+		if (err == -ENXIO)
+			err = -EPROBE_DEFER;
+		else
+			dev_err(dev, "DPBP device allocation failed\n");
+		return err;
+	}
+	ethsw->dpbp_dev = dpbp_dev;
+
+	err = dpbp_open(ethsw->mc_io, 0, dpbp_dev->obj_desc.id,
+			&dpbp_dev->mc_handle);
+	if (err) {
+		dev_err(dev, "dpbp_open() failed\n");
+		goto err_open;
+	}
+
+	err = dpbp_reset(ethsw->mc_io, 0, dpbp_dev->mc_handle);
+	if (err) {
+		dev_err(dev, "dpbp_reset() failed\n");
+		goto err_reset;
+	}
+
+	err = dpbp_enable(ethsw->mc_io, 0, dpbp_dev->mc_handle);
+	if (err) {
+		dev_err(dev, "dpbp_enable() failed\n");
+		goto err_enable;
+	}
+
+	err = dpbp_get_attributes(ethsw->mc_io, 0, dpbp_dev->mc_handle,
+				  &dpbp_attrs);
+	if (err) {
+		dev_err(dev, "dpbp_get_attributes() failed\n");
+		goto err_get_attr;
+	}
+
+	dpsw_ctrl_if_pools_cfg.num_dpbp = 1;
+	dpsw_ctrl_if_pools_cfg.pools[0].dpbp_id = dpbp_attrs.id;
+	dpsw_ctrl_if_pools_cfg.pools[0].buffer_size = DPAA2_SWITCH_RX_BUF_SIZE;
+	dpsw_ctrl_if_pools_cfg.pools[0].backup_pool = 0;
+
+	err = dpsw_ctrl_if_set_pools(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				     &dpsw_ctrl_if_pools_cfg);
+	if (err) {
+		dev_err(dev, "dpsw_ctrl_if_set_pools() failed\n");
+		goto err_get_attr;
+	}
+	ethsw->bpid = dpbp_attrs.id;
+
+	return 0;
+
+err_get_attr:
+	dpbp_disable(ethsw->mc_io, 0, dpbp_dev->mc_handle);
+err_enable:
+err_reset:
+	dpbp_close(ethsw->mc_io, 0, dpbp_dev->mc_handle);
+err_open:
+	fsl_mc_object_free(dpbp_dev);
+	return err;
+}
+
+static void dpaa2_switch_free_dpbp(struct ethsw_core *ethsw)
+{
+	dpbp_disable(ethsw->mc_io, 0, ethsw->dpbp_dev->mc_handle);
+	dpbp_close(ethsw->mc_io, 0, ethsw->dpbp_dev->mc_handle);
+	fsl_mc_object_free(ethsw->dpbp_dev);
+}
+
+static int dpaa2_switch_alloc_rings(struct ethsw_core *ethsw)
+{
+	int i;
+
+	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++) {
+		ethsw->fq[i].store =
+			dpaa2_io_store_create(DPAA2_SWITCH_STORE_SIZE,
+					      ethsw->dev);
+		if (!ethsw->fq[i].store) {
+			dev_err(ethsw->dev, "dpaa2_io_store_create failed\n");
+			while (--i >= 0)
+				dpaa2_io_store_destroy(ethsw->fq[i].store);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+static void dpaa2_switch_destroy_rings(struct ethsw_core *ethsw)
+{
+	int i;
+
+	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++)
+		dpaa2_io_store_destroy(ethsw->fq[i].store);
+}
+
 static int dpaa2_switch_ctrl_if_setup(struct ethsw_core *ethsw)
 {
 	int err;
@@ -1368,7 +1472,21 @@ static int dpaa2_switch_ctrl_if_setup(struct ethsw_core *ethsw)
 	if (err)
 		return err;
 
+	/* setup the buffer pool needed on the Rx path */
+	err = dpaa2_switch_setup_dpbp(ethsw);
+	if (err)
+		return err;
+
+	err = dpaa2_switch_alloc_rings(ethsw);
+	if (err)
+		goto err_free_dpbp;
+
 	return 0;
+
+err_free_dpbp:
+	dpaa2_switch_free_dpbp(ethsw);
+
+	return err;
 }
 
 static int dpaa2_switch_init(struct fsl_mc_device *sw_dev)
@@ -1563,6 +1681,12 @@ static void dpaa2_switch_takedown(struct fsl_mc_device *sw_dev)
 		dev_warn(dev, "dpsw_close err %d\n", err);
 }
 
+static void dpaa2_switch_ctrl_if_teardown(struct ethsw_core *ethsw)
+{
+	dpaa2_switch_destroy_rings(ethsw);
+	dpaa2_switch_free_dpbp(ethsw);
+}
+
 static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev)
 {
 	struct ethsw_port_priv *port_priv;
@@ -1573,6 +1697,8 @@ static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev)
 	dev = &sw_dev->dev;
 	ethsw = dev_get_drvdata(dev);
 
+	dpaa2_switch_ctrl_if_teardown(ethsw);
+
 	dpaa2_switch_teardown_irqs(sw_dev);
 
 	dpsw_disable(ethsw->mc_io, 0, ethsw->dpsw_handle);
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.h b/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
index 24a203c42f5f..f5bd2cd3d140 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
@@ -17,6 +17,8 @@
 #include <uapi/linux/if_bridge.h>
 #include <net/switchdev.h>
 #include <linux/if_bridge.h>
+#include <linux/fsl/mc.h>
+#include <soc/fsl/dpaa2-io.h>
 
 #include "dpsw.h"
 
@@ -42,6 +44,15 @@
 /* Number of receive queues (one RX and one TX_CONF) */
 #define DPAA2_SWITCH_RX_NUM_FQS	2
 
+/* Hardware requires alignment for ingress/egress buffer addresses */
+#define DPAA2_SWITCH_RX_BUF_RAW_SIZE	PAGE_SIZE
+#define DPAA2_SWITCH_RX_BUF_TAILROOM \
+	SKB_DATA_ALIGN(sizeof(struct skb_shared_info))
+#define DPAA2_SWITCH_RX_BUF_SIZE \
+	(DPAA2_SWITCH_RX_BUF_RAW_SIZE - DPAA2_SWITCH_RX_BUF_TAILROOM)
+
+#define DPAA2_SWITCH_STORE_SIZE 16
+
 extern const struct ethtool_ops dpaa2_switch_port_ethtool_ops;
 
 struct ethsw_core;
@@ -49,6 +60,7 @@ struct ethsw_core;
 struct dpaa2_switch_fq {
 	struct ethsw_core *ethsw;
 	enum dpsw_queue_type type;
+	struct dpaa2_io_store *store;
 	u32 fqid;
 };
 
@@ -85,6 +97,8 @@ struct ethsw_core {
 	struct workqueue_struct		*workqueue;
 
 	struct dpaa2_switch_fq		fq[DPAA2_SWITCH_RX_NUM_FQS];
+	struct fsl_mc_device		*dpbp_dev;
+	u16				bpid;
 };
 
 static inline bool dpaa2_switch_supports_cpu_traffic(struct ethsw_core *ethsw)
-- 
cgit v1.2.3


From 04abc97d3ef789b018a7328e5069bcc6f44ff54a Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Wed, 10 Mar 2021 14:14:43 +0200
Subject: staging: dpaa2-switch: setup dpio

Setup interrupts on the control interface queues. We do not force an
exact affinity between the interrupts received from a specific queue and
a cpu.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h | 15 +++++++
 drivers/staging/fsl-dpaa2/ethsw/dpsw.c     | 33 +++++++++++++++
 drivers/staging/fsl-dpaa2/ethsw/dpsw.h     | 23 +++++++++++
 drivers/staging/fsl-dpaa2/ethsw/ethsw.c    | 65 ++++++++++++++++++++++++++++++
 drivers/staging/fsl-dpaa2/ethsw/ethsw.h    |  1 +
 5 files changed, 137 insertions(+)

diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h b/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
index d76f80d1bd8b..d451e1e7f0a3 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
+++ b/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
@@ -76,6 +76,7 @@
 
 #define DPSW_CMDID_CTRL_IF_GET_ATTR         DPSW_CMD_ID(0x0A0)
 #define DPSW_CMDID_CTRL_IF_SET_POOLS        DPSW_CMD_ID(0x0A1)
+#define DPSW_CMDID_CTRL_IF_SET_QUEUE        DPSW_CMD_ID(0x0A6)
 
 /* Macros for accessing command fields smaller than 1byte */
 #define DPSW_MASK(field)        \
@@ -368,6 +369,20 @@ struct dpsw_cmd_ctrl_if_set_pools {
 	__le16 buffer_size[DPSW_MAX_DPBP];
 };
 
+#define DPSW_DEST_TYPE_SHIFT	0
+#define DPSW_DEST_TYPE_SIZE	4
+
+struct dpsw_cmd_ctrl_if_set_queue {
+	__le32 dest_id;
+	u8 dest_priority;
+	u8 pad;
+	/* from LSB: dest_type:4 */
+	u8 dest_type;
+	u8 qtype;
+	__le64 user_ctx;
+	__le32 options;
+};
+
 struct dpsw_rsp_get_api_version {
 	__le16 version_major;
 	__le16 version_minor;
diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw.c b/drivers/staging/fsl-dpaa2/ethsw/dpsw.c
index a560270b2466..4bc5791c0fa1 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/dpsw.c
@@ -1151,6 +1151,39 @@ int dpsw_ctrl_if_set_pools(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpsw_ctrl_if_set_queue() - Set Rx queue configuration
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of dpsw object
+ * @qtype:	dpsw_queue_type of the targeted queue
+ * @cfg:	Rx queue configuration
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_ctrl_if_set_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			   enum dpsw_queue_type qtype,
+			   const struct dpsw_ctrl_if_queue_cfg *cfg)
+{
+	struct dpsw_cmd_ctrl_if_set_queue *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_SET_QUEUE,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_ctrl_if_set_queue *)cmd.params;
+	cmd_params->dest_id = cpu_to_le32(cfg->dest_cfg.dest_id);
+	cmd_params->dest_priority = cfg->dest_cfg.priority;
+	cmd_params->qtype = qtype;
+	cmd_params->user_ctx = cpu_to_le64(cfg->user_ctx);
+	cmd_params->options = cpu_to_le32(cfg->options);
+	dpsw_set_field(cmd_params->dest_type,
+		       DEST_TYPE,
+		       cfg->dest_cfg.dest_type);
+
+	return mc_send_command(mc_io, &cmd);
+}
+
 /**
  * dpsw_get_api_version() - Get Data Path Switch API version
  * @mc_io:	Pointer to MC portal's I/O object
diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw.h b/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
index b14ec3e10b85..f888778b70a1 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
+++ b/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
@@ -222,6 +222,29 @@ struct dpsw_ctrl_if_pools_cfg {
 int dpsw_ctrl_if_set_pools(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
 			   const struct dpsw_ctrl_if_pools_cfg *cfg);
 
+#define DPSW_CTRL_IF_QUEUE_OPT_USER_CTX		0x00000001
+#define DPSW_CTRL_IF_QUEUE_OPT_DEST		0x00000002
+
+enum dpsw_ctrl_if_dest {
+	DPSW_CTRL_IF_DEST_NONE = 0,
+	DPSW_CTRL_IF_DEST_DPIO = 1,
+};
+
+struct dpsw_ctrl_if_dest_cfg {
+	enum dpsw_ctrl_if_dest dest_type;
+	int dest_id;
+	u8 priority;
+};
+
+struct dpsw_ctrl_if_queue_cfg {
+	u32 options;
+	u64 user_ctx;
+	struct dpsw_ctrl_if_dest_cfg dest_cfg;
+};
+
+int dpsw_ctrl_if_set_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			   enum dpsw_queue_type qtype,
+			   const struct dpsw_ctrl_if_queue_cfg *cfg);
 /**
  * enum dpsw_action - Action selection for special/control frames
  * @DPSW_ACTION_DROP: Drop frame
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
index 06998578355c..5e2d441cbc38 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
@@ -1463,6 +1463,64 @@ static void dpaa2_switch_destroy_rings(struct ethsw_core *ethsw)
 		dpaa2_io_store_destroy(ethsw->fq[i].store);
 }
 
+static int dpaa2_switch_setup_dpio(struct ethsw_core *ethsw)
+{
+	struct dpsw_ctrl_if_queue_cfg queue_cfg;
+	struct dpaa2_io_notification_ctx *nctx;
+	int err, i, j;
+
+	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++) {
+		nctx = &ethsw->fq[i].nctx;
+
+		/* Register a new software context for the FQID.
+		 * By using NULL as the first parameter, we specify that we do
+		 * not care on which cpu are interrupts received for this queue
+		 */
+		nctx->is_cdan = 0;
+		nctx->id = ethsw->fq[i].fqid;
+		nctx->desired_cpu = DPAA2_IO_ANY_CPU;
+		err = dpaa2_io_service_register(NULL, nctx, ethsw->dev);
+		if (err) {
+			err = -EPROBE_DEFER;
+			goto err_register;
+		}
+
+		queue_cfg.options = DPSW_CTRL_IF_QUEUE_OPT_DEST |
+				    DPSW_CTRL_IF_QUEUE_OPT_USER_CTX;
+		queue_cfg.dest_cfg.dest_type = DPSW_CTRL_IF_DEST_DPIO;
+		queue_cfg.dest_cfg.dest_id = nctx->dpio_id;
+		queue_cfg.dest_cfg.priority = 0;
+		queue_cfg.user_ctx = nctx->qman64;
+
+		err = dpsw_ctrl_if_set_queue(ethsw->mc_io, 0,
+					     ethsw->dpsw_handle,
+					     ethsw->fq[i].type,
+					     &queue_cfg);
+		if (err)
+			goto err_set_queue;
+	}
+
+	return 0;
+
+err_set_queue:
+	dpaa2_io_service_deregister(NULL, nctx, ethsw->dev);
+err_register:
+	for (j = 0; j < i; j++)
+		dpaa2_io_service_deregister(NULL, &ethsw->fq[j].nctx,
+					    ethsw->dev);
+
+	return err;
+}
+
+static void dpaa2_switch_free_dpio(struct ethsw_core *ethsw)
+{
+	int i;
+
+	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++)
+		dpaa2_io_service_deregister(NULL, &ethsw->fq[i].nctx,
+					    ethsw->dev);
+}
+
 static int dpaa2_switch_ctrl_if_setup(struct ethsw_core *ethsw)
 {
 	int err;
@@ -1481,8 +1539,14 @@ static int dpaa2_switch_ctrl_if_setup(struct ethsw_core *ethsw)
 	if (err)
 		goto err_free_dpbp;
 
+	err = dpaa2_switch_setup_dpio(ethsw);
+	if (err)
+		goto err_destroy_rings;
+
 	return 0;
 
+err_destroy_rings:
+	dpaa2_switch_destroy_rings(ethsw);
 err_free_dpbp:
 	dpaa2_switch_free_dpbp(ethsw);
 
@@ -1683,6 +1747,7 @@ static void dpaa2_switch_takedown(struct fsl_mc_device *sw_dev)
 
 static void dpaa2_switch_ctrl_if_teardown(struct ethsw_core *ethsw)
 {
+	dpaa2_switch_free_dpio(ethsw);
 	dpaa2_switch_destroy_rings(ethsw);
 	dpaa2_switch_free_dpbp(ethsw);
 }
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.h b/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
index f5bd2cd3d140..50dd529e2a79 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
@@ -61,6 +61,7 @@ struct dpaa2_switch_fq {
 	struct ethsw_core *ethsw;
 	enum dpsw_queue_type type;
 	struct dpaa2_io_store *store;
+	struct dpaa2_io_notification_ctx nctx;
 	u32 fqid;
 };
 
-- 
cgit v1.2.3


From 0b1b71370458860579831e77485883fcf2e8fbbe Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Wed, 10 Mar 2021 14:14:44 +0200
Subject: staging: dpaa2-switch: handle Rx path on control interface

The dpaa2-ethsw supports only one Rx queue that is shared by all switch
ports. This means that information about which port was the ingress port
for a specific frame needs to be passed in metadata. In our case, the
Flow Context (FLC) field from the frame descriptor holds this
information. Besides the interface ID of the ingress port we also
receive the virtual QDID of the port. Below is a visual description of
the 64 bits of FLC.

63           47           31           15           0
+---------------------------------------------------+
|            |            |            |            |
|  RESERVED  |    IF_ID   |  RESERVED  |  IF QDID   |
|            |            |            |            |
+---------------------------------------------------+

Because all switch ports share the same Rx and Tx conf queues, NAPI
management takes into consideration when there is at least one switch
interface open to enable the NAPI instance.

The Rx path is common, for the most part, for both Rx and Tx conf with
the mention that each of them has its own consume function of a frame
descriptor. Dequeueing from a FQ, consuming dequeued store and also the
NAPI poll function is common between both queues.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/staging/fsl-dpaa2/ethsw/ethsw.c | 455 +++++++++++++++++++++++++++++++-
 drivers/staging/fsl-dpaa2/ethsw/ethsw.h |  17 ++
 2 files changed, 460 insertions(+), 12 deletions(-)

diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
index 5e2d441cbc38..54cf87a66ab0 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
@@ -13,6 +13,7 @@
 #include <linux/msi.h>
 #include <linux/kthread.h>
 #include <linux/workqueue.h>
+#include <linux/iommu.h>
 
 #include <linux/fsl/mc.h>
 
@@ -24,6 +25,16 @@
 
 #define DEFAULT_VLAN_ID			1
 
+static void *dpaa2_iova_to_virt(struct iommu_domain *domain,
+				dma_addr_t iova_addr)
+{
+	phys_addr_t phys_addr;
+
+	phys_addr = domain ? iommu_iova_to_phys(domain, iova_addr) : iova_addr;
+
+	return phys_to_virt(phys_addr);
+}
+
 static int dpaa2_switch_add_vlan(struct ethsw_core *ethsw, u16 vid)
 {
 	int err;
@@ -433,9 +444,51 @@ static int dpaa2_switch_port_carrier_state_sync(struct net_device *netdev)
 	return 0;
 }
 
+/* Manage all NAPI instances for the control interface.
+ *
+ * We only have one RX queue and one Tx Conf queue for all
+ * switch ports. Therefore, we only need to enable the NAPI instance once, the
+ * first time one of the switch ports runs .dev_open().
+ */
+
+static void dpaa2_switch_enable_ctrl_if_napi(struct ethsw_core *ethsw)
+{
+	int i;
+
+	/* Access to the ethsw->napi_users relies on the RTNL lock */
+	ASSERT_RTNL();
+
+	/* a new interface is using the NAPI instance */
+	ethsw->napi_users++;
+
+	/* if there is already a user of the instance, return */
+	if (ethsw->napi_users > 1)
+		return;
+
+	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++)
+		napi_enable(&ethsw->fq[i].napi);
+}
+
+static void dpaa2_switch_disable_ctrl_if_napi(struct ethsw_core *ethsw)
+{
+	int i;
+
+	/* Access to the ethsw->napi_users relies on the RTNL lock */
+	ASSERT_RTNL();
+
+	/* If we are not the last interface using the NAPI, return */
+	ethsw->napi_users--;
+	if (ethsw->napi_users)
+		return;
+
+	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++)
+		napi_disable(&ethsw->fq[i].napi);
+}
+
 static int dpaa2_switch_port_open(struct net_device *netdev)
 {
 	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
 	int err;
 
 	/* No need to allow Tx as control interface is disabled */
@@ -464,6 +517,8 @@ static int dpaa2_switch_port_open(struct net_device *netdev)
 		goto err_carrier_sync;
 	}
 
+	dpaa2_switch_enable_ctrl_if_napi(ethsw);
+
 	return 0;
 
 err_carrier_sync:
@@ -476,6 +531,7 @@ err_carrier_sync:
 static int dpaa2_switch_port_stop(struct net_device *netdev)
 {
 	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
 	int err;
 
 	err = dpsw_if_disable(port_priv->ethsw_data->mc_io, 0,
@@ -486,6 +542,8 @@ static int dpaa2_switch_port_stop(struct net_device *netdev)
 		return err;
 	}
 
+	dpaa2_switch_disable_ctrl_if_napi(ethsw);
+
 	return 0;
 }
 
@@ -692,6 +750,28 @@ static int dpaa2_switch_port_set_mac_addr(struct ethsw_port_priv *port_priv)
 	return 0;
 }
 
+static void dpaa2_switch_free_fd(const struct ethsw_core *ethsw,
+				 const struct dpaa2_fd *fd)
+{
+	struct device *dev = ethsw->dev;
+	unsigned char *buffer_start;
+	struct sk_buff **skbh, *skb;
+	dma_addr_t fd_addr;
+
+	fd_addr = dpaa2_fd_get_addr(fd);
+	skbh = dpaa2_iova_to_virt(ethsw->iommu_domain, fd_addr);
+
+	skb = *skbh;
+	buffer_start = (unsigned char *)skbh;
+
+	dma_unmap_single(dev, fd_addr,
+			 skb_tail_pointer(skb) - buffer_start,
+			 DMA_TO_DEVICE);
+
+	/* Move on with skb release */
+	dev_kfree_skb(skb);
+}
+
 static const struct net_device_ops dpaa2_switch_port_ops = {
 	.ndo_open		= dpaa2_switch_port_open,
 	.ndo_stop		= dpaa2_switch_port_stop,
@@ -1326,6 +1406,98 @@ err_switchdev_nb:
 	return err;
 }
 
+/* Build a linear skb based on a single-buffer frame descriptor */
+static struct sk_buff *dpaa2_switch_build_linear_skb(struct ethsw_core *ethsw,
+						     const struct dpaa2_fd *fd)
+{
+	u16 fd_offset = dpaa2_fd_get_offset(fd);
+	dma_addr_t addr = dpaa2_fd_get_addr(fd);
+	u32 fd_length = dpaa2_fd_get_len(fd);
+	struct device *dev = ethsw->dev;
+	struct sk_buff *skb = NULL;
+	void *fd_vaddr;
+
+	fd_vaddr = dpaa2_iova_to_virt(ethsw->iommu_domain, addr);
+	dma_unmap_page(dev, addr, DPAA2_SWITCH_RX_BUF_SIZE,
+		       DMA_FROM_DEVICE);
+
+	skb = build_skb(fd_vaddr, DPAA2_SWITCH_RX_BUF_SIZE +
+			SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
+	if (unlikely(!skb)) {
+		dev_err(dev, "build_skb() failed\n");
+		return NULL;
+	}
+
+	skb_reserve(skb, fd_offset);
+	skb_put(skb, fd_length);
+
+	ethsw->buf_count--;
+
+	return skb;
+}
+
+static void dpaa2_switch_rx(struct dpaa2_switch_fq *fq,
+			    const struct dpaa2_fd *fd)
+{
+	struct ethsw_core *ethsw = fq->ethsw;
+	struct ethsw_port_priv *port_priv;
+	struct net_device *netdev;
+	struct vlan_ethhdr *hdr;
+	struct sk_buff *skb;
+	u16 vlan_tci, vid;
+	int if_id, err;
+
+	/* get switch ingress interface ID */
+	if_id = upper_32_bits(dpaa2_fd_get_flc(fd)) & 0x0000FFFF;
+
+	if (if_id >= ethsw->sw_attr.num_ifs) {
+		dev_err(ethsw->dev, "Frame received from unknown interface!\n");
+		goto err_free_fd;
+	}
+	port_priv = ethsw->ports[if_id];
+	netdev = port_priv->netdev;
+
+	/* build the SKB based on the FD received */
+	if (dpaa2_fd_get_format(fd) != dpaa2_fd_single) {
+		if (net_ratelimit()) {
+			netdev_err(netdev, "Received invalid frame format\n");
+			goto err_free_fd;
+		}
+	}
+
+	skb = dpaa2_switch_build_linear_skb(ethsw, fd);
+	if (unlikely(!skb))
+		goto err_free_fd;
+
+	skb_reset_mac_header(skb);
+
+	/* Remove the VLAN header if the packet that we just received has a vid
+	 * equal to the port PVIDs. Since the dpaa2-switch can operate only in
+	 * VLAN-aware mode and no alterations are made on the packet when it's
+	 * redirected/mirrored to the control interface, we are sure that there
+	 * will always be a VLAN header present.
+	 */
+	hdr = vlan_eth_hdr(skb);
+	vid = ntohs(hdr->h_vlan_TCI) & VLAN_VID_MASK;
+	if (vid == port_priv->pvid) {
+		err = __skb_vlan_pop(skb, &vlan_tci);
+		if (err) {
+			dev_info(ethsw->dev, "__skb_vlan_pop() returned %d", err);
+			goto err_free_fd;
+		}
+	}
+
+	skb->dev = netdev;
+	skb->protocol = eth_type_trans(skb, skb->dev);
+
+	netif_receive_skb(skb);
+
+	return;
+
+err_free_fd:
+	dpaa2_switch_free_fd(ethsw, fd);
+}
+
 static void dpaa2_switch_detect_features(struct ethsw_core *ethsw)
 {
 	ethsw->features = 0;
@@ -1359,6 +1531,146 @@ static int dpaa2_switch_setup_fqs(struct ethsw_core *ethsw)
 	return 0;
 }
 
+/* Free buffers acquired from the buffer pool or which were meant to
+ * be released in the pool
+ */
+static void dpaa2_switch_free_bufs(struct ethsw_core *ethsw, u64 *buf_array, int count)
+{
+	struct device *dev = ethsw->dev;
+	void *vaddr;
+	int i;
+
+	for (i = 0; i < count; i++) {
+		vaddr = dpaa2_iova_to_virt(ethsw->iommu_domain, buf_array[i]);
+		dma_unmap_page(dev, buf_array[i], DPAA2_SWITCH_RX_BUF_SIZE,
+			       DMA_FROM_DEVICE);
+		free_pages((unsigned long)vaddr, 0);
+	}
+}
+
+/* Perform a single release command to add buffers
+ * to the specified buffer pool
+ */
+static int dpaa2_switch_add_bufs(struct ethsw_core *ethsw, u16 bpid)
+{
+	struct device *dev = ethsw->dev;
+	u64 buf_array[BUFS_PER_CMD];
+	struct page *page;
+	int retries = 0;
+	dma_addr_t addr;
+	int err;
+	int i;
+
+	for (i = 0; i < BUFS_PER_CMD; i++) {
+		/* Allocate one page for each Rx buffer. WRIOP sees
+		 * the entire page except for a tailroom reserved for
+		 * skb shared info
+		 */
+		page = dev_alloc_pages(0);
+		if (!page) {
+			dev_err(dev, "buffer allocation failed\n");
+			goto err_alloc;
+		}
+
+		addr = dma_map_page(dev, page, 0, DPAA2_SWITCH_RX_BUF_SIZE,
+				    DMA_FROM_DEVICE);
+		if (dma_mapping_error(dev, addr)) {
+			dev_err(dev, "dma_map_single() failed\n");
+			goto err_map;
+		}
+		buf_array[i] = addr;
+	}
+
+release_bufs:
+	/* In case the portal is busy, retry until successful or
+	 * max retries hit.
+	 */
+	while ((err = dpaa2_io_service_release(NULL, bpid,
+					       buf_array, i)) == -EBUSY) {
+		if (retries++ >= DPAA2_SWITCH_SWP_BUSY_RETRIES)
+			break;
+
+		cpu_relax();
+	}
+
+	/* If release command failed, clean up and bail out. */
+	if (err) {
+		dpaa2_switch_free_bufs(ethsw, buf_array, i);
+		return 0;
+	}
+
+	return i;
+
+err_map:
+	__free_pages(page, 0);
+err_alloc:
+	/* If we managed to allocate at least some buffers,
+	 * release them to hardware
+	 */
+	if (i)
+		goto release_bufs;
+
+	return 0;
+}
+
+static int dpaa2_switch_refill_bp(struct ethsw_core *ethsw)
+{
+	int *count = &ethsw->buf_count;
+	int new_count;
+	int err = 0;
+
+	if (unlikely(*count < DPAA2_ETHSW_REFILL_THRESH)) {
+		do {
+			new_count = dpaa2_switch_add_bufs(ethsw, ethsw->bpid);
+			if (unlikely(!new_count)) {
+				/* Out of memory; abort for now, we'll
+				 * try later on
+				 */
+				break;
+			}
+			*count += new_count;
+		} while (*count < DPAA2_ETHSW_NUM_BUFS);
+
+		if (unlikely(*count < DPAA2_ETHSW_NUM_BUFS))
+			err = -ENOMEM;
+	}
+
+	return err;
+}
+
+static int dpaa2_switch_seed_bp(struct ethsw_core *ethsw)
+{
+	int *count, i;
+
+	for (i = 0; i < DPAA2_ETHSW_NUM_BUFS; i += BUFS_PER_CMD) {
+		count = &ethsw->buf_count;
+		*count += dpaa2_switch_add_bufs(ethsw, ethsw->bpid);
+
+		if (unlikely(*count < BUFS_PER_CMD))
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void dpaa2_switch_drain_bp(struct ethsw_core *ethsw)
+{
+	u64 buf_array[BUFS_PER_CMD];
+	int ret;
+
+	do {
+		ret = dpaa2_io_service_acquire(NULL, ethsw->bpid,
+					       buf_array, BUFS_PER_CMD);
+		if (ret < 0) {
+			dev_err(ethsw->dev,
+				"dpaa2_io_service_acquire() = %d\n", ret);
+			return;
+		}
+		dpaa2_switch_free_bufs(ethsw, buf_array, ret);
+
+	} while (ret);
+}
+
 static int dpaa2_switch_setup_dpbp(struct ethsw_core *ethsw)
 {
 	struct dpsw_ctrl_if_pools_cfg dpsw_ctrl_if_pools_cfg = { 0 };
@@ -1463,6 +1775,103 @@ static void dpaa2_switch_destroy_rings(struct ethsw_core *ethsw)
 		dpaa2_io_store_destroy(ethsw->fq[i].store);
 }
 
+static int dpaa2_switch_pull_fq(struct dpaa2_switch_fq *fq)
+{
+	int err, retries = 0;
+
+	/* Try to pull from the FQ while the portal is busy and we didn't hit
+	 * the maximum number fo retries
+	 */
+	do {
+		err = dpaa2_io_service_pull_fq(NULL, fq->fqid, fq->store);
+		cpu_relax();
+	} while (err == -EBUSY && retries++ < DPAA2_SWITCH_SWP_BUSY_RETRIES);
+
+	if (unlikely(err))
+		dev_err(fq->ethsw->dev, "dpaa2_io_service_pull err %d", err);
+
+	return err;
+}
+
+/* Consume all frames pull-dequeued into the store */
+static int dpaa2_switch_store_consume(struct dpaa2_switch_fq *fq)
+{
+	struct ethsw_core *ethsw = fq->ethsw;
+	int cleaned = 0, is_last;
+	struct dpaa2_dq *dq;
+	int retries = 0;
+
+	do {
+		/* Get the next available FD from the store */
+		dq = dpaa2_io_store_next(fq->store, &is_last);
+		if (unlikely(!dq)) {
+			if (retries++ >= DPAA2_SWITCH_SWP_BUSY_RETRIES) {
+				dev_err_once(ethsw->dev,
+					     "No valid dequeue response\n");
+				return -ETIMEDOUT;
+			}
+			continue;
+		}
+
+		dpaa2_switch_rx(fq, dpaa2_dq_fd(dq));
+		cleaned++;
+
+	} while (!is_last);
+
+	return cleaned;
+}
+
+/* NAPI poll routine */
+static int dpaa2_switch_poll(struct napi_struct *napi, int budget)
+{
+	int err, cleaned = 0, store_cleaned, work_done;
+	struct dpaa2_switch_fq *fq;
+	int retries = 0;
+
+	fq = container_of(napi, struct dpaa2_switch_fq, napi);
+
+	do {
+		err = dpaa2_switch_pull_fq(fq);
+		if (unlikely(err))
+			break;
+
+		/* Refill pool if appropriate */
+		dpaa2_switch_refill_bp(fq->ethsw);
+
+		store_cleaned = dpaa2_switch_store_consume(fq);
+		cleaned += store_cleaned;
+
+		if (cleaned >= budget) {
+			work_done = budget;
+			goto out;
+		}
+
+	} while (store_cleaned);
+
+	/* We didn't consume the entire budget, so finish napi and re-enable
+	 * data availability notifications
+	 */
+	napi_complete_done(napi, cleaned);
+	do {
+		err = dpaa2_io_service_rearm(NULL, &fq->nctx);
+		cpu_relax();
+	} while (err == -EBUSY && retries++ < DPAA2_SWITCH_SWP_BUSY_RETRIES);
+
+	work_done = max(cleaned, 1);
+out:
+
+	return work_done;
+}
+
+static void dpaa2_switch_fqdan_cb(struct dpaa2_io_notification_ctx *nctx)
+{
+	struct dpaa2_switch_fq *fq;
+
+	fq = container_of(nctx, struct dpaa2_switch_fq, nctx);
+
+	napi_schedule(&fq->napi);
+}
+
 static int dpaa2_switch_setup_dpio(struct ethsw_core *ethsw)
 {
 	struct dpsw_ctrl_if_queue_cfg queue_cfg;
@@ -1479,6 +1888,7 @@ static int dpaa2_switch_setup_dpio(struct ethsw_core *ethsw)
 		nctx->is_cdan = 0;
 		nctx->id = ethsw->fq[i].fqid;
 		nctx->desired_cpu = DPAA2_IO_ANY_CPU;
+		nctx->cb = dpaa2_switch_fqdan_cb;
 		err = dpaa2_io_service_register(NULL, nctx, ethsw->dev);
 		if (err) {
 			err = -EPROBE_DEFER;
@@ -1535,10 +1945,14 @@ static int dpaa2_switch_ctrl_if_setup(struct ethsw_core *ethsw)
 	if (err)
 		return err;
 
-	err = dpaa2_switch_alloc_rings(ethsw);
+	err = dpaa2_switch_seed_bp(ethsw);
 	if (err)
 		goto err_free_dpbp;
 
+	err = dpaa2_switch_alloc_rings(ethsw);
+	if (err)
+		goto err_drain_dpbp;
+
 	err = dpaa2_switch_setup_dpio(ethsw);
 	if (err)
 		goto err_destroy_rings;
@@ -1547,6 +1961,8 @@ static int dpaa2_switch_ctrl_if_setup(struct ethsw_core *ethsw)
 
 err_destroy_rings:
 	dpaa2_switch_destroy_rings(ethsw);
+err_drain_dpbp:
+	dpaa2_switch_drain_bp(ethsw);
 err_free_dpbp:
 	dpaa2_switch_free_dpbp(ethsw);
 
@@ -1749,6 +2165,7 @@ static void dpaa2_switch_ctrl_if_teardown(struct ethsw_core *ethsw)
 {
 	dpaa2_switch_free_dpio(ethsw);
 	dpaa2_switch_destroy_rings(ethsw);
+	dpaa2_switch_drain_bp(ethsw);
 	dpaa2_switch_free_dpbp(ethsw);
 }
 
@@ -1825,12 +2242,6 @@ static int dpaa2_switch_probe_port(struct ethsw_core *ethsw,
 	if (err)
 		goto err_port_probe;
 
-	err = register_netdev(port_netdev);
-	if (err < 0) {
-		dev_err(dev, "register_netdev error %d\n", err);
-		goto err_port_probe;
-	}
-
 	ethsw->ports[port_idx] = port_priv;
 
 	return 0;
@@ -1854,6 +2265,7 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
 		return -ENOMEM;
 
 	ethsw->dev = dev;
+	ethsw->iommu_domain = iommu_get_domain_for_dev(dev);
 	dev_set_drvdata(dev, ethsw);
 
 	err = fsl_mc_portal_allocate(sw_dev, FSL_MC_IO_ATOMIC_CONTEXT_PORTAL,
@@ -1883,6 +2295,15 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
 			goto err_free_ports;
 	}
 
+	/* Add a NAPI instance for each of the Rx queues. The first port's
+	 * net_device will be associated with the instances since we do not have
+	 * different queues for each switch ports.
+	 */
+	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++)
+		netif_napi_add(ethsw->ports[0]->netdev,
+			       &ethsw->fq[i].napi, dpaa2_switch_poll,
+			       NAPI_POLL_WEIGHT);
+
 	err = dpsw_enable(ethsw->mc_io, 0, ethsw->dpsw_handle);
 	if (err) {
 		dev_err(ethsw->dev, "dpsw_enable err %d\n", err);
@@ -1894,18 +2315,28 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
 	if (err)
 		goto err_stop;
 
-	dev_info(dev, "probed %d port switch\n", ethsw->sw_attr.num_ifs);
+	/* Register the netdev only when the entire setup is done and the
+	 * switch port interfaces are ready to receive traffic
+	 */
+	for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
+		err = register_netdev(ethsw->ports[i]->netdev);
+		if (err < 0) {
+			dev_err(dev, "register_netdev error %d\n", err);
+			goto err_unregister_ports;
+		}
+	}
+
 	return 0;
 
+err_unregister_ports:
+	for (i--; i >= 0; i--)
+		unregister_netdev(ethsw->ports[i]->netdev);
 err_stop:
 	dpsw_disable(ethsw->mc_io, 0, ethsw->dpsw_handle);
 
 err_free_ports:
-	/* Cleanup registered ports only */
-	for (i--; i >= 0; i--) {
-		unregister_netdev(ethsw->ports[i]->netdev);
+	for (i--; i >= 0; i--)
 		free_netdev(ethsw->ports[i]->netdev);
-	}
 	kfree(ethsw->ports);
 
 err_takedown:
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.h b/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
index 50dd529e2a79..238f16561979 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
@@ -53,6 +53,19 @@
 
 #define DPAA2_SWITCH_STORE_SIZE 16
 
+/* Buffer management */
+#define BUFS_PER_CMD			7
+#define DPAA2_ETHSW_NUM_BUFS		(1024 * BUFS_PER_CMD)
+#define DPAA2_ETHSW_REFILL_THRESH	(DPAA2_ETHSW_NUM_BUFS * 5 / 6)
+
+/* Number of times to retry DPIO portal operations while waiting
+ * for portal to finish executing current command and become
+ * available. We want to avoid being stuck in a while loop in case
+ * hardware becomes unresponsive, but not give up too easily if
+ * the portal really is busy for valid reasons
+ */
+#define DPAA2_SWITCH_SWP_BUSY_RETRIES		1000
+
 extern const struct ethtool_ops dpaa2_switch_port_ethtool_ops;
 
 struct ethsw_core;
@@ -62,6 +75,7 @@ struct dpaa2_switch_fq {
 	enum dpsw_queue_type type;
 	struct dpaa2_io_store *store;
 	struct dpaa2_io_notification_ctx nctx;
+	struct napi_struct napi;
 	u32 fqid;
 };
 
@@ -89,6 +103,7 @@ struct ethsw_core {
 	unsigned long			features;
 	int				dev_id;
 	struct ethsw_port_priv		**ports;
+	struct iommu_domain		*iommu_domain;
 
 	u8				vlans[VLAN_VID_MASK + 1];
 
@@ -99,7 +114,9 @@ struct ethsw_core {
 
 	struct dpaa2_switch_fq		fq[DPAA2_SWITCH_RX_NUM_FQS];
 	struct fsl_mc_device		*dpbp_dev;
+	int				buf_count;
 	u16				bpid;
+	int				napi_users;
 };
 
 static inline bool dpaa2_switch_supports_cpu_traffic(struct ethsw_core *ethsw)
-- 
cgit v1.2.3


From 7fd94d86b7f4a7f71223bdc1348b897411f2224b Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Wed, 10 Mar 2021 14:14:45 +0200
Subject: staging: dpaa2-switch: add .ndo_start_xmit() callback

Implement the .ndo_start_xmit() callback for the switch port interfaces.
For each of the switch ports, gather the corresponding queue
destination ID (QDID) necessary for Tx enqueueing.

We'll reserve 64 bytes for software annotations, where we keep a skb
backpointer used on the Tx confirmation side for releasing the allocated
memory. At the moment, we only support linear skbs.

Also, add support for the Tx confirmation path which for the most part
shares the code path with the normal Rx queue.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h |  24 +++++
 drivers/staging/fsl-dpaa2/ethsw/dpsw.c     |  41 ++++++++
 drivers/staging/fsl-dpaa2/ethsw/dpsw.h     |  28 ++++++
 drivers/staging/fsl-dpaa2/ethsw/ethsw.c    | 154 ++++++++++++++++++++++++++---
 drivers/staging/fsl-dpaa2/ethsw/ethsw.h    |  14 +++
 5 files changed, 245 insertions(+), 16 deletions(-)

diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h b/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
index d451e1e7f0a3..4aa83d41d762 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
+++ b/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
@@ -47,6 +47,8 @@
 #define DPSW_CMDID_IF_ENABLE                DPSW_CMD_ID(0x03D)
 #define DPSW_CMDID_IF_DISABLE               DPSW_CMD_ID(0x03E)
 
+#define DPSW_CMDID_IF_GET_ATTR              DPSW_CMD_ID(0x042)
+
 #define DPSW_CMDID_IF_SET_MAX_FRAME_LENGTH  DPSW_CMD_ID(0x044)
 
 #define DPSW_CMDID_IF_GET_LINK_STATE        DPSW_CMD_ID(0x046)
@@ -246,6 +248,28 @@ struct dpsw_cmd_if {
 	__le16 if_id;
 };
 
+#define DPSW_ADMIT_UNTAGGED_SHIFT	0
+#define DPSW_ADMIT_UNTAGGED_SIZE	4
+#define DPSW_ENABLED_SHIFT		5
+#define DPSW_ENABLED_SIZE		1
+#define DPSW_ACCEPT_ALL_VLAN_SHIFT	6
+#define DPSW_ACCEPT_ALL_VLAN_SIZE	1
+
+struct dpsw_rsp_if_get_attr {
+	/* cmd word 0 */
+	/* from LSB: admit_untagged:4 enabled:1 accept_all_vlan:1 */
+	u8 conf;
+	u8 pad1;
+	u8 num_tcs;
+	u8 pad2;
+	__le16 qdid;
+	/* cmd word 1 */
+	__le32 options;
+	__le32 pad3;
+	/* cmd word 2 */
+	__le32 rate;
+};
+
 struct dpsw_cmd_if_set_max_frame_length {
 	__le16 if_id;
 	__le16 frame_length;
diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw.c b/drivers/staging/fsl-dpaa2/ethsw/dpsw.c
index 4bc5791c0fa1..bd482a9b1930 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/dpsw.c
@@ -642,6 +642,47 @@ int dpsw_if_disable(struct fsl_mc_io *mc_io,
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpsw_if_get_attributes() - Function obtains attributes of interface
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @if_id:	Interface Identifier
+ * @attr:	Returned interface attributes
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_if_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			   u16 if_id, struct dpsw_if_attr *attr)
+{
+	struct dpsw_rsp_if_get_attr *rsp_params;
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_if *cmd_params;
+	int err;
+
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_GET_ATTR, cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_if *)cmd.params;
+	cmd_params->if_id = cpu_to_le16(if_id);
+
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	rsp_params = (struct dpsw_rsp_if_get_attr *)cmd.params;
+	attr->num_tcs = rsp_params->num_tcs;
+	attr->rate = le32_to_cpu(rsp_params->rate);
+	attr->options = le32_to_cpu(rsp_params->options);
+	attr->qdid = le16_to_cpu(rsp_params->qdid);
+	attr->enabled = dpsw_get_field(rsp_params->conf, ENABLED);
+	attr->accept_all_vlan = dpsw_get_field(rsp_params->conf,
+					       ACCEPT_ALL_VLAN);
+	attr->admit_untagged = dpsw_get_field(rsp_params->conf,
+					      ADMIT_UNTAGGED);
+
+	return 0;
+}
+
 /**
  * dpsw_if_set_max_frame_length() - Set Maximum Receive frame length.
  * @mc_io:		Pointer to MC portal's I/O object
diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw.h b/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
index f888778b70a1..e741e91e485a 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
+++ b/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
@@ -430,6 +430,34 @@ int dpsw_if_disable(struct fsl_mc_io *mc_io,
 		    u16 token,
 		    u16 if_id);
 
+/**
+ * struct dpsw_if_attr - Structure representing DPSW interface attributes
+ * @num_tcs: Number of traffic classes
+ * @rate: Transmit rate in bits per second
+ * @options: Interface configuration options (bitmap)
+ * @enabled: Indicates if interface is enabled
+ * @accept_all_vlan: The device discards/accepts incoming frames
+ *		for VLANs that do not include this interface
+ * @admit_untagged: When set to 'DPSW_ADMIT_ONLY_VLAN_TAGGED', the device
+ *		discards untagged frames or priority-tagged frames received on
+ *		this interface;
+ *		When set to 'DPSW_ADMIT_ALL', untagged frames or priority-
+ *		tagged frames received on this interface are accepted
+ * @qdid: control frames transmit qdid
+ */
+struct dpsw_if_attr {
+	u8 num_tcs;
+	u32 rate;
+	u32 options;
+	int enabled;
+	int accept_all_vlan;
+	enum dpsw_accepted_frames admit_untagged;
+	u16 qdid;
+};
+
+int dpsw_if_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			   u16 if_id, struct dpsw_if_attr *attr);
+
 int dpsw_if_set_max_frame_length(struct fsl_mc_io *mc_io,
 				 u32 cmd_flags,
 				 u16 token,
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
index 54cf87a66ab0..102221263062 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
@@ -434,10 +434,13 @@ static int dpaa2_switch_port_carrier_state_sync(struct net_device *netdev)
 	WARN_ONCE(state.up > 1, "Garbage read into link_state");
 
 	if (state.up != port_priv->link_state) {
-		if (state.up)
+		if (state.up) {
 			netif_carrier_on(netdev);
-		else
+			netif_tx_start_all_queues(netdev);
+		} else {
 			netif_carrier_off(netdev);
+			netif_tx_stop_all_queues(netdev);
+		}
 		port_priv->link_state = state.up;
 	}
 
@@ -491,9 +494,6 @@ static int dpaa2_switch_port_open(struct net_device *netdev)
 	struct ethsw_core *ethsw = port_priv->ethsw_data;
 	int err;
 
-	/* No need to allow Tx as control interface is disabled */
-	netif_tx_stop_all_queues(netdev);
-
 	/* Explicitly set carrier off, otherwise
 	 * netif_carrier_ok() will return true and cause 'ip link show'
 	 * to report the LOWER_UP flag, even though the link
@@ -547,15 +547,6 @@ static int dpaa2_switch_port_stop(struct net_device *netdev)
 	return 0;
 }
 
-static netdev_tx_t dpaa2_switch_port_dropframe(struct sk_buff *skb,
-					       struct net_device *netdev)
-{
-	/* we don't support I/O for now, drop the frame */
-	dev_kfree_skb_any(skb);
-
-	return NETDEV_TX_OK;
-}
-
 static int dpaa2_switch_port_parent_id(struct net_device *dev,
 				       struct netdev_phys_item_id *ppid)
 {
@@ -772,6 +763,115 @@ static void dpaa2_switch_free_fd(const struct ethsw_core *ethsw,
 	dev_kfree_skb(skb);
 }
 
+static int dpaa2_switch_build_single_fd(struct ethsw_core *ethsw,
+					struct sk_buff *skb,
+					struct dpaa2_fd *fd)
+{
+	struct device *dev = ethsw->dev;
+	struct sk_buff **skbh;
+	dma_addr_t addr;
+	u8 *buff_start;
+	void *hwa;
+
+	buff_start = PTR_ALIGN(skb->data - DPAA2_SWITCH_TX_DATA_OFFSET -
+			       DPAA2_SWITCH_TX_BUF_ALIGN,
+			       DPAA2_SWITCH_TX_BUF_ALIGN);
+
+	/* Clear FAS to have consistent values for TX confirmation. It is
+	 * located in the first 8 bytes of the buffer's hardware annotation
+	 * area
+	 */
+	hwa = buff_start + DPAA2_SWITCH_SWA_SIZE;
+	memset(hwa, 0, 8);
+
+	/* Store a backpointer to the skb at the beginning of the buffer
+	 * (in the private data area) such that we can release it
+	 * on Tx confirm
+	 */
+	skbh = (struct sk_buff **)buff_start;
+	*skbh = skb;
+
+	addr = dma_map_single(dev, buff_start,
+			      skb_tail_pointer(skb) - buff_start,
+			      DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(dev, addr)))
+		return -ENOMEM;
+
+	/* Setup the FD fields */
+	memset(fd, 0, sizeof(*fd));
+
+	dpaa2_fd_set_addr(fd, addr);
+	dpaa2_fd_set_offset(fd, (u16)(skb->data - buff_start));
+	dpaa2_fd_set_len(fd, skb->len);
+	dpaa2_fd_set_format(fd, dpaa2_fd_single);
+
+	return 0;
+}
+
+static netdev_tx_t dpaa2_switch_port_tx(struct sk_buff *skb,
+					struct net_device *net_dev)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(net_dev);
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	int retries = DPAA2_SWITCH_SWP_BUSY_RETRIES;
+	struct dpaa2_fd fd;
+	int err;
+
+	if (unlikely(skb_headroom(skb) < DPAA2_SWITCH_NEEDED_HEADROOM)) {
+		struct sk_buff *ns;
+
+		ns = skb_realloc_headroom(skb, DPAA2_SWITCH_NEEDED_HEADROOM);
+		if (unlikely(!ns)) {
+			net_err_ratelimited("%s: Error reallocating skb headroom\n", net_dev->name);
+			goto err_free_skb;
+		}
+		dev_consume_skb_any(skb);
+		skb = ns;
+	}
+
+	/* We'll be holding a back-reference to the skb until Tx confirmation */
+	skb = skb_unshare(skb, GFP_ATOMIC);
+	if (unlikely(!skb)) {
+		/* skb_unshare() has already freed the skb */
+		net_err_ratelimited("%s: Error copying the socket buffer\n", net_dev->name);
+		goto err_exit;
+	}
+
+	/* At this stage, we do not support non-linear skbs so just try to
+	 * linearize the skb and if that's not working, just drop the packet.
+	 */
+	err = skb_linearize(skb);
+	if (err) {
+		net_err_ratelimited("%s: skb_linearize error (%d)!\n", net_dev->name, err);
+		goto err_free_skb;
+	}
+
+	err = dpaa2_switch_build_single_fd(ethsw, skb, &fd);
+	if (unlikely(err)) {
+		net_err_ratelimited("%s: ethsw_build_*_fd() %d\n", net_dev->name, err);
+		goto err_free_skb;
+	}
+
+	do {
+		err = dpaa2_io_service_enqueue_qd(NULL,
+						  port_priv->tx_qdid,
+						  8, 0, &fd);
+		retries--;
+	} while (err == -EBUSY && retries);
+
+	if (unlikely(err < 0)) {
+		dpaa2_switch_free_fd(ethsw, &fd);
+		goto err_exit;
+	}
+
+	return NETDEV_TX_OK;
+
+err_free_skb:
+	dev_kfree_skb(skb);
+err_exit:
+	return NETDEV_TX_OK;
+}
+
 static const struct net_device_ops dpaa2_switch_port_ops = {
 	.ndo_open		= dpaa2_switch_port_open,
 	.ndo_stop		= dpaa2_switch_port_stop,
@@ -783,7 +883,7 @@ static const struct net_device_ops dpaa2_switch_port_ops = {
 	.ndo_get_offload_stats	= dpaa2_switch_port_get_offload_stats,
 	.ndo_fdb_dump		= dpaa2_switch_port_fdb_dump,
 
-	.ndo_start_xmit		= dpaa2_switch_port_dropframe,
+	.ndo_start_xmit		= dpaa2_switch_port_tx,
 	.ndo_get_port_parent_id	= dpaa2_switch_port_parent_id,
 	.ndo_get_phys_port_name = dpaa2_switch_port_get_phys_name,
 };
@@ -1436,6 +1536,12 @@ static struct sk_buff *dpaa2_switch_build_linear_skb(struct ethsw_core *ethsw,
 	return skb;
 }
 
+static void dpaa2_switch_tx_conf(struct dpaa2_switch_fq *fq,
+				 const struct dpaa2_fd *fd)
+{
+	dpaa2_switch_free_fd(fq->ethsw, fd);
+}
+
 static void dpaa2_switch_rx(struct dpaa2_switch_fq *fq,
 			    const struct dpaa2_fd *fd)
 {
@@ -1813,7 +1919,10 @@ static int dpaa2_switch_store_consume(struct dpaa2_switch_fq *fq)
 			continue;
 		}
 
-		dpaa2_switch_rx(fq, dpaa2_dq_fd(dq));
+		if (fq->type == DPSW_QUEUE_RX)
+			dpaa2_switch_rx(fq, dpaa2_dq_fd(dq));
+		else
+			dpaa2_switch_tx_conf(fq, dpaa2_dq_fd(dq));
 		cleaned++;
 
 	} while (!is_last);
@@ -2111,8 +2220,19 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
 		.flags = BRIDGE_VLAN_INFO_UNTAGGED | BRIDGE_VLAN_INFO_PVID,
 	};
 	struct net_device *netdev = port_priv->netdev;
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct dpsw_if_attr dpsw_if_attr;
 	int err;
 
+	/* Get the Tx queue for this specific port */
+	err = dpsw_if_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				     port_priv->idx, &dpsw_if_attr);
+	if (err) {
+		netdev_err(netdev, "dpsw_if_get_attributes err %d\n", err);
+		return err;
+	}
+	port_priv->tx_qdid = dpsw_if_attr.qdid;
+
 	/* We need to add VLAN 1 as the PVID on this port until it is under a
 	 * bridge since the DPAA2 switch is not able to handle the traffic in a
 	 * VLAN unaware fashion
@@ -2230,6 +2350,8 @@ static int dpaa2_switch_probe_port(struct ethsw_core *ethsw,
 	port_netdev->netdev_ops = &dpaa2_switch_port_ops;
 	port_netdev->ethtool_ops = &dpaa2_switch_port_ethtool_ops;
 
+	port_netdev->needed_headroom = DPAA2_SWITCH_NEEDED_HEADROOM;
+
 	/* Set MTU limits */
 	port_netdev->min_mtu = ETH_MIN_MTU;
 	port_netdev->max_mtu = ETHSW_MAX_FRAME_LENGTH;
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.h b/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
index 238f16561979..ab3b75a62f01 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
@@ -66,6 +66,19 @@
  */
 #define DPAA2_SWITCH_SWP_BUSY_RETRIES		1000
 
+/* Hardware annotation buffer size */
+#define DPAA2_SWITCH_HWA_SIZE			64
+/* Software annotation buffer size */
+#define DPAA2_SWITCH_SWA_SIZE			64
+
+#define DPAA2_SWITCH_TX_BUF_ALIGN		64
+
+#define DPAA2_SWITCH_TX_DATA_OFFSET \
+	(DPAA2_SWITCH_HWA_SIZE + DPAA2_SWITCH_SWA_SIZE)
+
+#define DPAA2_SWITCH_NEEDED_HEADROOM \
+	(DPAA2_SWITCH_TX_DATA_OFFSET + DPAA2_SWITCH_TX_BUF_ALIGN)
+
 extern const struct ethtool_ops dpaa2_switch_port_ethtool_ops;
 
 struct ethsw_core;
@@ -91,6 +104,7 @@ struct ethsw_port_priv {
 	u8			vlans[VLAN_VID_MASK + 1];
 	u16			pvid;
 	struct net_device	*bridge_dev;
+	u16			tx_qdid;
 };
 
 /* Switch data */
-- 
cgit v1.2.3


From 613c0a5810b79610db8535c3816b0c149675f8ee Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Wed, 10 Mar 2021 14:14:46 +0200
Subject: staging: dpaa2-switch: enable the control interface

Enable the CTRL_IF of the switch object, now that all the pieces are in
place (buffer and queue management, interrupts, NAPI instances etc).

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h |  2 ++
 drivers/staging/fsl-dpaa2/ethsw/dpsw.c     | 37 ++++++++++++++++++++++++++++++
 drivers/staging/fsl-dpaa2/ethsw/dpsw.h     |  5 ++++
 drivers/staging/fsl-dpaa2/ethsw/ethsw.c    |  9 ++++++++
 4 files changed, 53 insertions(+)

diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h b/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
index 4aa83d41d762..6f1b9d16a09f 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
+++ b/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
@@ -78,6 +78,8 @@
 
 #define DPSW_CMDID_CTRL_IF_GET_ATTR         DPSW_CMD_ID(0x0A0)
 #define DPSW_CMDID_CTRL_IF_SET_POOLS        DPSW_CMD_ID(0x0A1)
+#define DPSW_CMDID_CTRL_IF_ENABLE           DPSW_CMD_ID(0x0A2)
+#define DPSW_CMDID_CTRL_IF_DISABLE          DPSW_CMD_ID(0x0A3)
 #define DPSW_CMDID_CTRL_IF_SET_QUEUE        DPSW_CMD_ID(0x0A6)
 
 /* Macros for accessing command fields smaller than 1byte */
diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw.c b/drivers/staging/fsl-dpaa2/ethsw/dpsw.c
index bd482a9b1930..edc7559b7b1e 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/dpsw.c
@@ -1363,3 +1363,40 @@ int dpsw_if_set_primary_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags,
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
 }
+
+/**
+ * dpsw_ctrl_if_enable() - Enable control interface
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_ctrl_if_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_ENABLE, cmd_flags,
+					  token);
+
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_ctrl_if_disable() - Function disables control interface
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_ctrl_if_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_DISABLE,
+					  cmd_flags,
+					  token);
+
+	return mc_send_command(mc_io, &cmd);
+}
diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw.h b/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
index e741e91e485a..954aa4401cd9 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
+++ b/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
@@ -245,6 +245,11 @@ struct dpsw_ctrl_if_queue_cfg {
 int dpsw_ctrl_if_set_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
 			   enum dpsw_queue_type qtype,
 			   const struct dpsw_ctrl_if_queue_cfg *cfg);
+
+int dpsw_ctrl_if_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
+
+int dpsw_ctrl_if_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
+
 /**
  * enum dpsw_action - Action selection for special/control frames
  * @DPSW_ACTION_DROP: Drop frame
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
index 102221263062..41860187cba5 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
@@ -2066,8 +2066,16 @@ static int dpaa2_switch_ctrl_if_setup(struct ethsw_core *ethsw)
 	if (err)
 		goto err_destroy_rings;
 
+	err = dpsw_ctrl_if_enable(ethsw->mc_io, 0, ethsw->dpsw_handle);
+	if (err) {
+		dev_err(ethsw->dev, "dpsw_ctrl_if_enable err %d\n", err);
+		goto err_deregister_dpio;
+	}
+
 	return 0;
 
+err_deregister_dpio:
+	dpaa2_switch_free_dpio(ethsw);
 err_destroy_rings:
 	dpaa2_switch_destroy_rings(ethsw);
 err_drain_dpbp:
@@ -2283,6 +2291,7 @@ static void dpaa2_switch_takedown(struct fsl_mc_device *sw_dev)
 
 static void dpaa2_switch_ctrl_if_teardown(struct ethsw_core *ethsw)
 {
+	dpsw_ctrl_if_disable(ethsw->mc_io, 0, ethsw->dpsw_handle);
 	dpaa2_switch_free_dpio(ethsw);
 	dpaa2_switch_destroy_rings(ethsw);
 	dpaa2_switch_drain_bp(ethsw);
-- 
cgit v1.2.3


From 539dda3c5d190c5088b5e57944b1b482fcb464de Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Wed, 10 Mar 2021 14:14:47 +0200
Subject: staging: dpaa2-switch: properly setup switching domains

Until now, the DPAA2 switch was not capable to properly setup its
switching domains depending on the existence, or lack thereof, of a
upper bridge device. This meant that all switch ports of a DPSW object
were switching by default even though they were not under the same
bridge device.

Another issue was the inability to actually add the CPU in the flooding
domains (broadcast, unknown unicast etc) of a particular switch port.
This meant that a simple ping on a switch interface was not possible
since no broadcast ARP frame would actually reach the CPU queues.

This patch tries to fix exactly these problems by:

* Creating and managing a FDB table for each flooding domain. This means
  that when a switch interface is not bridged it will use its own FDB
  table. While in bridged mode all DPAA2 switch interfaces under the
  same upper will use the same FDB table, thus leverage the same FDB
  entries.

* Adding a new MC firmware command - dpsw_set_egress_flood() - through
  which the driver can setup the flooding domains as needed. For
  example, when the switch interface is standalone, thus not in a
  bridge with any other DPAA2 switch port, it will setup its broadcast
  and unknown unicast flooding domains to only include the control
  interface (the queues that reach the CPU and the driver can dequeue
  from). This flooding domain changes when the interface joins a bridge
  and is configured to include, beside the control interface, all other
  DPAA2 switch interfaces.

We impose a minimum limit of FDB tables available equal to the number of
switch interfaces so that we guarantee that, in the maximal
configuration - all interfaces are standalone, each switch port will
have a private FDB table. At the same time, we only probe DPSW objects
that have the flooding and broadcast replicators configured to be per
FDB (DPSW_*_PER_FDB). Without this, the dpaa2-switch driver would not
be able to configure multiple switching domains.

At probe time, a FDB table will be allocated for each port. At a bridge
join event, the switch port will either continue to use the current FDB
table (if it's the first dpaa2-switch port to join that bridge) or will
switch to use the FDB table associated with the port that it's already
under the bridge. If a FDB switch is necessary, the private FDB table
which was previously used will be returned to the pool of unused FDBs.

Upon a bridge leave, the switch port needs a private FDB table thus it
will search and get the first unused FDB table. This way, all the other
ports remaining under the bridge will continue to use the same FDB
table.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h |  38 +++-
 drivers/staging/fsl-dpaa2/ethsw/dpsw.c     |  90 +++++++-
 drivers/staging/fsl-dpaa2/ethsw/dpsw.h     |  76 ++++++-
 drivers/staging/fsl-dpaa2/ethsw/ethsw.c    | 321 ++++++++++++++++++++++++++---
 drivers/staging/fsl-dpaa2/ethsw/ethsw.h    |  30 ++-
 5 files changed, 514 insertions(+), 41 deletions(-)

diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h b/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
index 6f1b9d16a09f..eb620e832412 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
+++ b/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
@@ -29,7 +29,7 @@
 
 #define DPSW_CMDID_ENABLE                   DPSW_CMD_ID(0x002)
 #define DPSW_CMDID_DISABLE                  DPSW_CMD_ID(0x003)
-#define DPSW_CMDID_GET_ATTR                 DPSW_CMD_ID(0x004)
+#define DPSW_CMDID_GET_ATTR                 DPSW_CMD_V2(0x004)
 #define DPSW_CMDID_RESET                    DPSW_CMD_ID(0x005)
 
 #define DPSW_CMDID_SET_IRQ_ENABLE           DPSW_CMD_ID(0x012)
@@ -58,7 +58,7 @@
 #define DPSW_CMDID_IF_SET_LINK_CFG          DPSW_CMD_ID(0x04C)
 
 #define DPSW_CMDID_VLAN_ADD                 DPSW_CMD_ID(0x060)
-#define DPSW_CMDID_VLAN_ADD_IF              DPSW_CMD_ID(0x061)
+#define DPSW_CMDID_VLAN_ADD_IF              DPSW_CMD_V2(0x061)
 #define DPSW_CMDID_VLAN_ADD_IF_UNTAGGED     DPSW_CMD_ID(0x062)
 
 #define DPSW_CMDID_VLAN_REMOVE_IF           DPSW_CMD_ID(0x064)
@@ -66,6 +66,8 @@
 #define DPSW_CMDID_VLAN_REMOVE_IF_FLOODING  DPSW_CMD_ID(0x066)
 #define DPSW_CMDID_VLAN_REMOVE              DPSW_CMD_ID(0x067)
 
+#define DPSW_CMDID_FDB_ADD                  DPSW_CMD_ID(0x082)
+#define DPSW_CMDID_FDB_REMOVE               DPSW_CMD_ID(0x083)
 #define DPSW_CMDID_FDB_ADD_UNICAST          DPSW_CMD_ID(0x084)
 #define DPSW_CMDID_FDB_REMOVE_UNICAST       DPSW_CMD_ID(0x085)
 #define DPSW_CMDID_FDB_ADD_MULTICAST        DPSW_CMD_ID(0x086)
@@ -82,6 +84,8 @@
 #define DPSW_CMDID_CTRL_IF_DISABLE          DPSW_CMD_ID(0x0A3)
 #define DPSW_CMDID_CTRL_IF_SET_QUEUE        DPSW_CMD_ID(0x0A6)
 
+#define DPSW_CMDID_SET_EGRESS_FLOOD         DPSW_CMD_ID(0x0AC)
+
 /* Macros for accessing command fields smaller than 1byte */
 #define DPSW_MASK(field)        \
 	GENMASK(DPSW_##field##_SHIFT + DPSW_##field##_SIZE - 1, \
@@ -176,6 +180,12 @@ struct dpsw_cmd_clear_irq_status {
 #define DPSW_COMPONENT_TYPE_SHIFT	0
 #define DPSW_COMPONENT_TYPE_SIZE	4
 
+#define DPSW_FLOODING_CFG_SHIFT		0
+#define DPSW_FLOODING_CFG_SIZE		4
+
+#define DPSW_BROADCAST_CFG_SHIFT	4
+#define DPSW_BROADCAST_CFG_SIZE		4
+
 struct dpsw_rsp_get_attr {
 	/* cmd word 0 */
 	__le16 num_ifs;
@@ -193,7 +203,11 @@ struct dpsw_rsp_get_attr {
 	u8 max_meters_per_if;
 	/* from LSB only the first 4 bits */
 	u8 component_type;
-	__le16 pad;
+	/* [0:3] - flooding configuration
+	 * [4:7] - broadcast configuration
+	 */
+	u8 repl_cfg;
+	u8 pad;
 	/* cmd word 3 */
 	__le64 options;
 };
@@ -312,6 +326,16 @@ struct dpsw_vlan_add {
 	__le16 vlan_id;
 };
 
+struct dpsw_cmd_vlan_add_if {
+	/* cmd word 0 */
+	__le16 options;
+	__le16 vlan_id;
+	__le16 fdb_id;
+	__le16 pad0;
+	/* cmd word 1-4 */
+	__le64 if_id;
+};
+
 struct dpsw_cmd_vlan_manage_if {
 	/* cmd word 0 */
 	__le16 pad0;
@@ -328,7 +352,7 @@ struct dpsw_cmd_vlan_remove {
 
 struct dpsw_cmd_fdb_add {
 	__le32 pad;
-	__le16 fdb_aging_time;
+	__le16 fdb_ageing_time;
 	__le16 num_fdb_entries;
 };
 
@@ -424,5 +448,11 @@ struct dpsw_cmd_if_set_mac_addr {
 	u8 mac_addr[6];
 };
 
+struct dpsw_cmd_set_egress_flood {
+	__le16 fdb_id;
+	u8 flood_type;
+	u8 pad[5];
+	__le64 if_id;
+};
 #pragma pack(pop)
 #endif /* __FSL_DPSW_CMD_H */
diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw.c b/drivers/staging/fsl-dpaa2/ethsw/dpsw.c
index edc7559b7b1e..5189f156100e 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/dpsw.c
@@ -351,9 +351,9 @@ int dpsw_get_attributes(struct fsl_mc_io *mc_io,
 	attr->max_fdb_mc_groups = le16_to_cpu(rsp_params->max_fdb_mc_groups);
 	attr->max_meters_per_if = rsp_params->max_meters_per_if;
 	attr->options = le64_to_cpu(rsp_params->options);
-	attr->component_type = dpsw_get_field(rsp_params->component_type,
-					      COMPONENT_TYPE);
-
+	attr->component_type = dpsw_get_field(rsp_params->component_type, COMPONENT_TYPE);
+	attr->flooding_cfg = dpsw_get_field(rsp_params->repl_cfg, FLOODING_CFG);
+	attr->broadcast_cfg = dpsw_get_field(rsp_params->repl_cfg, BROADCAST_CFG);
 	return 0;
 }
 
@@ -924,6 +924,66 @@ int dpsw_vlan_remove(struct fsl_mc_io *mc_io,
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpsw_fdb_add() - Add FDB to switch and Returns handle to FDB table for
+ *		the reference
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @fdb_id:	Returned Forwarding Database Identifier
+ * @cfg:	FDB Configuration
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_fdb_add(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 *fdb_id,
+		 const struct dpsw_fdb_cfg *cfg)
+{
+	struct dpsw_cmd_fdb_add *cmd_params;
+	struct dpsw_rsp_fdb_add *rsp_params;
+	struct fsl_mc_command cmd = { 0 };
+	int err;
+
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_ADD,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_fdb_add *)cmd.params;
+	cmd_params->fdb_ageing_time = cpu_to_le16(cfg->fdb_ageing_time);
+	cmd_params->num_fdb_entries = cpu_to_le16(cfg->num_fdb_entries);
+
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	rsp_params = (struct dpsw_rsp_fdb_add *)cmd.params;
+	*fdb_id = le16_to_cpu(rsp_params->fdb_id);
+
+	return 0;
+}
+
+/**
+ * dpsw_fdb_remove() - Remove FDB from switch
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @fdb_id:	Forwarding Database Identifier
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_fdb_remove(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 fdb_id)
+{
+	struct dpsw_cmd_fdb_remove *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_REMOVE,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_fdb_remove *)cmd.params;
+	cmd_params->fdb_id = cpu_to_le16(fdb_id);
+
+	return mc_send_command(mc_io, &cmd);
+}
+
 /**
  * dpsw_fdb_add_unicast() - Function adds an unicast entry into MAC lookup table
  * @mc_io:	Pointer to MC portal's I/O object
@@ -1400,3 +1460,27 @@ int dpsw_ctrl_if_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
 
 	return mc_send_command(mc_io, &cmd);
 }
+
+/**
+ * dpsw_set_egress_flood() - Set egress parameters associated with an FDB ID
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @cfg:	Egress flooding configuration
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_set_egress_flood(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			  const struct dpsw_egress_flood_cfg *cfg)
+{
+	struct dpsw_cmd_set_egress_flood *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_SET_EGRESS_FLOOD, cmd_flags, token);
+	cmd_params = (struct dpsw_cmd_set_egress_flood *)cmd.params;
+	cmd_params->fdb_id = cpu_to_le16(cfg->fdb_id);
+	cmd_params->flood_type = cfg->flood_type;
+	build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs);
+
+	return mc_send_command(mc_io, &cmd);
+}
diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw.h b/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
index 954aa4401cd9..9e04350f3277 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
+++ b/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
@@ -75,6 +75,35 @@ enum dpsw_component_type {
 	DPSW_COMPONENT_TYPE_S_VLAN
 };
 
+/**
+ *  enum dpsw_flooding_cfg - flooding configuration requested
+ * @DPSW_FLOODING_PER_VLAN: Flooding replicators are allocated per VLAN and
+ * interfaces present in each of them can be configured using
+ * dpsw_vlan_add_if_flooding()/dpsw_vlan_remove_if_flooding().
+ * This is the default configuration.
+ *
+ * @DPSW_FLOODING_PER_FDB: Flooding replicators are allocated per FDB and
+ * interfaces present in each of them can be configured using
+ * dpsw_set_egress_flood().
+ */
+enum dpsw_flooding_cfg {
+	DPSW_FLOODING_PER_VLAN = 0,
+	DPSW_FLOODING_PER_FDB,
+};
+
+/**
+ * enum dpsw_broadcast_cfg - broadcast configuration requested
+ * @DPSW_BROADCAST_PER_OBJECT: There is only one broadcast replicator per DPSW
+ * object. This is the default configuration.
+ * @DPSW_BROADCAST_PER_FDB: Broadcast replicators are allocated per FDB and
+ * interfaces present in each of them can be configured using
+ * dpsw_set_egress_flood().
+ */
+enum dpsw_broadcast_cfg {
+	DPSW_BROADCAST_PER_OBJECT = 0,
+	DPSW_BROADCAST_PER_FDB,
+};
+
 int dpsw_enable(struct fsl_mc_io *mc_io,
 		u32 cmd_flags,
 		u16 token);
@@ -153,6 +182,8 @@ int dpsw_clear_irq_status(struct fsl_mc_io *mc_io,
  * @num_vlans: Current number of VLANs
  * @num_fdbs: Current number of FDBs
  * @component_type: Component type of this bridge
+ * @flooding_cfg: Flooding configuration (PER_VLAN - default, PER_FDB)
+ * @broadcast_cfg: Broadcast configuration (PER_OBJECT - default, PER_FDB)
  */
 struct dpsw_attr {
 	int id;
@@ -168,6 +199,8 @@ struct dpsw_attr {
 	u16 num_vlans;
 	u8 num_fdbs;
 	enum dpsw_component_type component_type;
+	enum dpsw_flooding_cfg flooding_cfg;
+	enum dpsw_broadcast_cfg broadcast_cfg;
 };
 
 int dpsw_get_attributes(struct fsl_mc_io *mc_io,
@@ -483,6 +516,8 @@ int dpsw_vlan_add(struct fsl_mc_io *mc_io,
 		  u16 vlan_id,
 		  const struct dpsw_vlan_cfg *cfg);
 
+#define DPSW_VLAN_ADD_IF_OPT_FDB_ID            0x0001
+
 /**
  * struct dpsw_vlan_if_cfg - Set of VLAN Interfaces
  * @num_ifs: The number of interfaces that are assigned to the egress
@@ -492,7 +527,9 @@ int dpsw_vlan_add(struct fsl_mc_io *mc_io,
  */
 struct dpsw_vlan_if_cfg {
 	u16 num_ifs;
+	u16 options;
 	u16 if_id[DPSW_MAX_IF];
+	u16 fdb_id;
 };
 
 int dpsw_vlan_add_if(struct fsl_mc_io *mc_io,
@@ -649,14 +686,14 @@ enum dpsw_fdb_learning_mode {
 /**
  * struct dpsw_fdb_attr - FDB Attributes
  * @max_fdb_entries: Number of FDB entries
- * @fdb_aging_time: Aging time in seconds
+ * @fdb_ageing_time: Ageing time in seconds
  * @learning_mode: Learning mode
  * @num_fdb_mc_groups: Current number of multicast groups
  * @max_fdb_mc_groups: Maximum number of multicast groups
  */
 struct dpsw_fdb_attr {
 	u16 max_fdb_entries;
-	u16 fdb_aging_time;
+	u16 fdb_ageing_time;
 	enum dpsw_fdb_learning_mode learning_mode;
 	u16 num_fdb_mc_groups;
 	u16 max_fdb_mc_groups;
@@ -676,4 +713,39 @@ int dpsw_if_get_primary_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags,
 int dpsw_if_set_primary_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags,
 				 u16 token, u16 if_id, u8 mac_addr[6]);
 
+/**
+ * struct dpsw_fdb_cfg  - FDB Configuration
+ * @num_fdb_entries: Number of FDB entries
+ * @fdb_ageing_time: Ageing time in seconds
+ */
+struct dpsw_fdb_cfg {
+	u16 num_fdb_entries;
+	u16 fdb_ageing_time;
+};
+
+int dpsw_fdb_add(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 *fdb_id,
+		 const struct dpsw_fdb_cfg *cfg);
+
+int dpsw_fdb_remove(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 fdb_id);
+
+/**
+ * enum dpsw_flood_type - Define the flood type of a DPSW object
+ * @DPSW_BROADCAST: Broadcast flooding
+ * @DPSW_FLOODING: Unknown flooding
+ */
+enum dpsw_flood_type {
+	DPSW_BROADCAST = 0,
+	DPSW_FLOODING,
+};
+
+struct dpsw_egress_flood_cfg {
+	u16 fdb_id;
+	enum dpsw_flood_type flood_type;
+	u16 num_ifs;
+	u16 if_id[DPSW_MAX_IF];
+};
+
+int dpsw_set_egress_flood(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			  const struct dpsw_egress_flood_cfg *cfg);
+
 #endif /* __FSL_DPSW_H */
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
index 41860187cba5..5be07181399d 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
@@ -25,6 +25,91 @@
 
 #define DEFAULT_VLAN_ID			1
 
+static u16 dpaa2_switch_port_get_fdb_id(struct ethsw_port_priv *port_priv)
+{
+	return port_priv->fdb->fdb_id;
+}
+
+static struct dpaa2_switch_fdb *dpaa2_switch_fdb_get_unused(struct ethsw_core *ethsw)
+{
+	int i;
+
+	for (i = 0; i < ethsw->sw_attr.num_ifs; i++)
+		if (!ethsw->fdbs[i].in_use)
+			return &ethsw->fdbs[i];
+	return NULL;
+}
+
+static u16 dpaa2_switch_port_set_fdb(struct ethsw_port_priv *port_priv,
+				     struct net_device *bridge_dev)
+{
+	struct ethsw_port_priv *other_port_priv = NULL;
+	struct dpaa2_switch_fdb *fdb;
+	struct net_device *other_dev;
+	struct list_head *iter;
+
+	/* If we leave a bridge (bridge_dev is NULL), find an unused
+	 * FDB and use that.
+	 */
+	if (!bridge_dev) {
+		fdb = dpaa2_switch_fdb_get_unused(port_priv->ethsw_data);
+
+		/* If there is no unused FDB, we must be the last port that
+		 * leaves the last bridge, all the others are standalone. We
+		 * can just keep the FDB that we already have.
+		 */
+
+		if (!fdb) {
+			port_priv->fdb->bridge_dev = NULL;
+			return 0;
+		}
+
+		port_priv->fdb = fdb;
+		port_priv->fdb->in_use = true;
+		port_priv->fdb->bridge_dev = NULL;
+		return 0;
+	}
+
+	/* The below call to netdev_for_each_lower_dev() demands the RTNL lock
+	 * being held. Assert on it so that it's easier to catch new code
+	 * paths that reach this point without the RTNL lock.
+	 */
+	ASSERT_RTNL();
+
+	/* If part of a bridge, use the FDB of the first dpaa2 switch interface
+	 * to be present in that bridge
+	 */
+	netdev_for_each_lower_dev(bridge_dev, other_dev, iter) {
+		if (!dpaa2_switch_port_dev_check(other_dev, NULL))
+			continue;
+
+		if (other_dev == port_priv->netdev)
+			continue;
+
+		other_port_priv = netdev_priv(other_dev);
+		break;
+	}
+
+	/* The current port is about to change its FDB to the one used by the
+	 * first port that joined the bridge.
+	 */
+	if (other_port_priv) {
+		/* The previous FDB is about to become unused, since the
+		 * interface is no longer standalone.
+		 */
+		port_priv->fdb->in_use = false;
+		port_priv->fdb->bridge_dev = NULL;
+
+		/* Get a reference to the new FDB */
+		port_priv->fdb = other_port_priv->fdb;
+	}
+
+	/* Keep track of the new upper bridge device */
+	port_priv->fdb->bridge_dev = bridge_dev;
+
+	return 0;
+}
+
 static void *dpaa2_iova_to_virt(struct iommu_domain *domain,
 				dma_addr_t iova_addr)
 {
@@ -35,14 +120,13 @@ static void *dpaa2_iova_to_virt(struct iommu_domain *domain,
 	return phys_to_virt(phys_addr);
 }
 
-static int dpaa2_switch_add_vlan(struct ethsw_core *ethsw, u16 vid)
+static int dpaa2_switch_add_vlan(struct ethsw_port_priv *port_priv, u16 vid)
 {
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct dpsw_vlan_cfg vcfg = {0};
 	int err;
 
-	struct dpsw_vlan_cfg	vcfg = {
-		.fdb_id = 0,
-	};
-
+	vcfg.fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
 	err = dpsw_vlan_add(ethsw->mc_io, 0,
 			    ethsw->dpsw_handle, vid, &vcfg);
 	if (err) {
@@ -133,7 +217,7 @@ static int dpaa2_switch_port_add_vlan(struct ethsw_port_priv *port_priv,
 {
 	struct ethsw_core *ethsw = port_priv->ethsw_data;
 	struct net_device *netdev = port_priv->netdev;
-	struct dpsw_vlan_if_cfg vcfg;
+	struct dpsw_vlan_if_cfg vcfg = {0};
 	int err;
 
 	if (port_priv->vlans[vid]) {
@@ -141,8 +225,13 @@ static int dpaa2_switch_port_add_vlan(struct ethsw_port_priv *port_priv,
 		return -EEXIST;
 	}
 
+	/* If hit, this VLAN rule will lead the packet into the FDB table
+	 * specified in the vlan configuration below
+	 */
 	vcfg.num_ifs = 1;
 	vcfg.if_id[0] = port_priv->idx;
+	vcfg.fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
+	vcfg.options |= DPSW_VLAN_ADD_IF_OPT_FDB_ID;
 	err = dpsw_vlan_add_if(ethsw->mc_io, 0, ethsw->dpsw_handle, vid, &vcfg);
 	if (err) {
 		netdev_err(netdev, "dpsw_vlan_add_if err %d\n", err);
@@ -229,15 +318,17 @@ static int dpaa2_switch_port_fdb_add_uc(struct ethsw_port_priv *port_priv,
 					const unsigned char *addr)
 {
 	struct dpsw_fdb_unicast_cfg entry = {0};
+	u16 fdb_id;
 	int err;
 
 	entry.if_egress = port_priv->idx;
 	entry.type = DPSW_FDB_ENTRY_STATIC;
 	ether_addr_copy(entry.mac_addr, addr);
 
+	fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
 	err = dpsw_fdb_add_unicast(port_priv->ethsw_data->mc_io, 0,
 				   port_priv->ethsw_data->dpsw_handle,
-				   0, &entry);
+				   fdb_id, &entry);
 	if (err)
 		netdev_err(port_priv->netdev,
 			   "dpsw_fdb_add_unicast err %d\n", err);
@@ -248,15 +339,17 @@ static int dpaa2_switch_port_fdb_del_uc(struct ethsw_port_priv *port_priv,
 					const unsigned char *addr)
 {
 	struct dpsw_fdb_unicast_cfg entry = {0};
+	u16 fdb_id;
 	int err;
 
 	entry.if_egress = port_priv->idx;
 	entry.type = DPSW_FDB_ENTRY_STATIC;
 	ether_addr_copy(entry.mac_addr, addr);
 
+	fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
 	err = dpsw_fdb_remove_unicast(port_priv->ethsw_data->mc_io, 0,
 				      port_priv->ethsw_data->dpsw_handle,
-				      0, &entry);
+				      fdb_id, &entry);
 	/* Silently discard error for calling multiple times the del command */
 	if (err && err != -ENXIO)
 		netdev_err(port_priv->netdev,
@@ -268,6 +361,7 @@ static int dpaa2_switch_port_fdb_add_mc(struct ethsw_port_priv *port_priv,
 					const unsigned char *addr)
 {
 	struct dpsw_fdb_multicast_cfg entry = {0};
+	u16 fdb_id;
 	int err;
 
 	ether_addr_copy(entry.mac_addr, addr);
@@ -275,9 +369,10 @@ static int dpaa2_switch_port_fdb_add_mc(struct ethsw_port_priv *port_priv,
 	entry.num_ifs = 1;
 	entry.if_id[0] = port_priv->idx;
 
+	fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
 	err = dpsw_fdb_add_multicast(port_priv->ethsw_data->mc_io, 0,
 				     port_priv->ethsw_data->dpsw_handle,
-				     0, &entry);
+				     fdb_id, &entry);
 	/* Silently discard error for calling multiple times the add command */
 	if (err && err != -ENXIO)
 		netdev_err(port_priv->netdev, "dpsw_fdb_add_multicast err %d\n",
@@ -289,6 +384,7 @@ static int dpaa2_switch_port_fdb_del_mc(struct ethsw_port_priv *port_priv,
 					const unsigned char *addr)
 {
 	struct dpsw_fdb_multicast_cfg entry = {0};
+	u16 fdb_id;
 	int err;
 
 	ether_addr_copy(entry.mac_addr, addr);
@@ -296,9 +392,10 @@ static int dpaa2_switch_port_fdb_del_mc(struct ethsw_port_priv *port_priv,
 	entry.num_ifs = 1;
 	entry.if_id[0] = port_priv->idx;
 
+	fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
 	err = dpsw_fdb_remove_multicast(port_priv->ethsw_data->mc_io, 0,
 					port_priv->ethsw_data->dpsw_handle,
-					0, &entry);
+					fdb_id, &entry);
 	/* Silently discard error for calling multiple times the del command */
 	if (err && err != -ENAVAIL)
 		netdev_err(port_priv->netdev,
@@ -652,6 +749,7 @@ static int dpaa2_switch_port_fdb_dump(struct sk_buff *skb, struct netlink_callba
 	u32 fdb_dump_size;
 	int err = 0, i;
 	u8 *dma_mem;
+	u16 fdb_id;
 
 	fdb_dump_size = ethsw->sw_attr.max_fdb_entries * sizeof(fdb_entry);
 	dma_mem = kzalloc(fdb_dump_size, GFP_KERNEL);
@@ -666,7 +764,8 @@ static int dpaa2_switch_port_fdb_dump(struct sk_buff *skb, struct netlink_callba
 		goto err_map;
 	}
 
-	err = dpsw_fdb_dump(ethsw->mc_io, 0, ethsw->dpsw_handle, 0,
+	fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
+	err = dpsw_fdb_dump(ethsw->mc_io, 0, ethsw->dpsw_handle, fdb_id,
 			    fdb_dump_iova, fdb_dump_size, &num_fdb_entries);
 	if (err) {
 		netdev_err(net_dev, "dpsw_fdb_dump() = %d\n", err);
@@ -888,8 +987,8 @@ static const struct net_device_ops dpaa2_switch_port_ops = {
 	.ndo_get_phys_port_name = dpaa2_switch_port_get_phys_name,
 };
 
-static bool dpaa2_switch_port_dev_check(const struct net_device *netdev,
-					struct notifier_block *nb)
+bool dpaa2_switch_port_dev_check(const struct net_device *netdev,
+				 struct notifier_block *nb)
 {
 	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
 
@@ -1080,7 +1179,7 @@ static int dpaa2_switch_port_vlans_add(struct net_device *netdev,
 
 	if (!port_priv->ethsw_data->vlans[vlan->vid]) {
 		/* this is a new VLAN */
-		err = dpaa2_switch_add_vlan(port_priv->ethsw_data, vlan->vid);
+		err = dpaa2_switch_add_vlan(port_priv, vlan->vid);
 		if (err)
 			return err;
 
@@ -1163,7 +1262,11 @@ static int dpaa2_switch_port_del_vlan(struct ethsw_port_priv *port_priv, u16 vid
 		return -ENOENT;
 
 	if (port_priv->vlans[vid] & ETHSW_VLAN_PVID) {
-		err = dpaa2_switch_port_set_pvid(port_priv, 0);
+		/* If we are deleting the PVID of a port, use VLAN 4095 instead
+		 * as we are sure that neither the bridge nor the 8021q module
+		 * will use it
+		 */
+		err = dpaa2_switch_port_set_pvid(port_priv, 4095);
 		if (err)
 			return err;
 	}
@@ -1273,7 +1376,48 @@ static int dpaa2_switch_port_attr_set_event(struct net_device *netdev,
 	return notifier_from_errno(err);
 }
 
-/* For the moment, only flood setting needs to be updated */
+static int dpaa2_switch_fdb_set_egress_flood(struct ethsw_core *ethsw, u16 fdb_id)
+{
+	struct dpsw_egress_flood_cfg flood_cfg;
+	int i = 0, j;
+	int err;
+
+	/* Add all the DPAA2 switch ports found in the same bridging domain to
+	 * the egress flooding domain
+	 */
+	for (j = 0; j < ethsw->sw_attr.num_ifs; j++)
+		if (ethsw->ports[j] && ethsw->ports[j]->fdb->fdb_id == fdb_id)
+			flood_cfg.if_id[i++] = ethsw->ports[j]->idx;
+
+	/* Add the CTRL interface to the egress flooding domain */
+	flood_cfg.if_id[i++] = ethsw->sw_attr.num_ifs;
+
+	/* Use the FDB of the first dpaa2 switch port added to the bridge */
+	flood_cfg.fdb_id = fdb_id;
+
+	/* Setup broadcast flooding domain */
+	flood_cfg.flood_type = DPSW_BROADCAST;
+	flood_cfg.num_ifs = i;
+	err = dpsw_set_egress_flood(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				    &flood_cfg);
+	if (err) {
+		dev_err(ethsw->dev, "dpsw_set_egress_flood() = %d\n", err);
+		return err;
+	}
+
+	/* Setup unknown flooding domain */
+	flood_cfg.flood_type = DPSW_FLOODING;
+	flood_cfg.num_ifs = i;
+	err = dpsw_set_egress_flood(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				    &flood_cfg);
+	if (err) {
+		dev_err(ethsw->dev, "dpsw_set_egress_flood() = %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
 static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
 					 struct net_device *upper_dev)
 {
@@ -1282,15 +1426,7 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
 	struct ethsw_port_priv *other_port_priv;
 	struct net_device *other_dev;
 	struct list_head *iter;
-	int i, err;
-
-	for (i = 0; i < ethsw->sw_attr.num_ifs; i++)
-		if (ethsw->ports[i]->bridge_dev &&
-		    (ethsw->ports[i]->bridge_dev != upper_dev)) {
-			netdev_err(netdev,
-				   "Only one bridge supported per DPSW object!\n");
-			return -EINVAL;
-		}
+	int err;
 
 	netdev_for_each_lower_dev(upper_dev, other_dev, iter) {
 		if (!dpaa2_switch_port_dev_check(other_dev, NULL))
@@ -1304,9 +1440,87 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
 		}
 	}
 
+	/* Delete the previously manually installed VLAN 1 */
+	err = dpaa2_switch_port_del_vlan(port_priv, 1);
+	if (err)
+		return err;
+
+	dpaa2_switch_port_set_fdb(port_priv, upper_dev);
+
+	/* Setup the egress flood policy (broadcast, unknown unicast) */
+	err = dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id);
+	if (err)
+		goto err_egress_flood;
+
+	return 0;
+
+err_egress_flood:
+	dpaa2_switch_port_set_fdb(port_priv, NULL);
 	return err;
 }
 
+static int dpaa2_switch_port_clear_rxvlan(struct net_device *vdev, int vid, void *arg)
+{
+	__be16 vlan_proto = htons(ETH_P_8021Q);
+
+	if (vdev)
+		vlan_proto = vlan_dev_vlan_proto(vdev);
+
+	return dpaa2_switch_port_vlan_kill(arg, vlan_proto, vid);
+}
+
+static int dpaa2_switch_port_restore_rxvlan(struct net_device *vdev, int vid, void *arg)
+{
+	__be16 vlan_proto = htons(ETH_P_8021Q);
+
+	if (vdev)
+		vlan_proto = vlan_dev_vlan_proto(vdev);
+
+	return dpaa2_switch_port_vlan_add(arg, vlan_proto, vid);
+}
+
+static int dpaa2_switch_port_bridge_leave(struct net_device *netdev)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	struct dpaa2_switch_fdb *old_fdb = port_priv->fdb;
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	int err;
+
+	/* Clear all RX VLANs installed through vlan_vid_add() either as VLAN
+	 * upper devices or otherwise from the FDB table that we are about to
+	 * leave
+	 */
+	err = vlan_for_each(netdev, dpaa2_switch_port_clear_rxvlan, netdev);
+	if (err)
+		netdev_err(netdev, "Unable to clear RX VLANs from old FDB table, err (%d)\n", err);
+
+	dpaa2_switch_port_set_fdb(port_priv, NULL);
+
+	/* Restore all RX VLANs into the new FDB table that we just joined */
+	err = vlan_for_each(netdev, dpaa2_switch_port_restore_rxvlan, netdev);
+	if (err)
+		netdev_err(netdev, "Unable to restore RX VLANs to the new FDB, err (%d)\n", err);
+
+	/* Setup the egress flood policy (broadcast, unknown unicast).
+	 * When the port is not under a bridge, only the CTRL interface is part
+	 * of the flooding domain besides the actual port
+	 */
+	err = dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id);
+	if (err)
+		return err;
+
+	/* Recreate the egress flood domain of the FDB that we just left */
+	err = dpaa2_switch_fdb_set_egress_flood(ethsw, old_fdb->fdb_id);
+	if (err)
+		return err;
+
+	/* Add the VLAN 1 as PVID when not under a bridge. We need this since
+	 * the dpaa2 switch interfaces are not capable to be VLAN unaware
+	 */
+	return dpaa2_switch_port_add_vlan(port_priv, DEFAULT_VLAN_ID,
+					  BRIDGE_VLAN_INFO_UNTAGGED | BRIDGE_VLAN_INFO_PVID);
+}
+
 static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb,
 					     unsigned long event, void *ptr)
 {
@@ -1324,6 +1538,8 @@ static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb,
 		if (netif_is_bridge_master(upper_dev)) {
 			if (info->linking)
 				err = dpaa2_switch_port_bridge_join(netdev, upper_dev);
+			else
+				err = dpaa2_switch_port_bridge_leave(netdev);
 		}
 	}
 
@@ -2202,6 +2418,10 @@ static int dpaa2_switch_init(struct fsl_mc_device *sw_dev)
 		goto err_close;
 	}
 
+	err = dpsw_fdb_remove(ethsw->mc_io, 0, ethsw->dpsw_handle, 0);
+	if (err)
+		goto err_destroy_ordered_workqueue;
+
 	err = dpaa2_switch_ctrl_if_setup(ethsw);
 	if (err)
 		goto err_destroy_ordered_workqueue;
@@ -2229,7 +2449,10 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
 	};
 	struct net_device *netdev = port_priv->netdev;
 	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct dpsw_fdb_cfg fdb_cfg = {0};
+	struct dpaa2_switch_fdb *fdb;
 	struct dpsw_if_attr dpsw_if_attr;
+	u16 fdb_id;
 	int err;
 
 	/* Get the Tx queue for this specific port */
@@ -2241,6 +2464,22 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
 	}
 	port_priv->tx_qdid = dpsw_if_attr.qdid;
 
+	/* Create a FDB table for this particular switch port */
+	fdb_cfg.num_fdb_entries = ethsw->sw_attr.max_fdb_entries / ethsw->sw_attr.num_ifs;
+	err = dpsw_fdb_add(ethsw->mc_io, 0, ethsw->dpsw_handle,
+			   &fdb_id, &fdb_cfg);
+	if (err) {
+		netdev_err(netdev, "dpsw_fdb_add err %d\n", err);
+		return err;
+	}
+
+	/* Find an unused dpaa2_switch_fdb structure and use it */
+	fdb = dpaa2_switch_fdb_get_unused(ethsw);
+	fdb->fdb_id = fdb_id;
+	fdb->in_use = true;
+	fdb->bridge_dev = NULL;
+	port_priv->fdb = fdb;
+
 	/* We need to add VLAN 1 as the PVID on this port until it is under a
 	 * bridge since the DPAA2 switch is not able to handle the traffic in a
 	 * VLAN unaware fashion
@@ -2249,6 +2488,11 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
 	if (err)
 		return err;
 
+	/* Setup the egress flooding domains (broadcast, unknown unicast */
+	err = dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id);
+	if (err)
+		return err;
+
 	return err;
 }
 
@@ -2319,6 +2563,8 @@ static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev)
 		unregister_netdev(port_priv->netdev);
 		free_netdev(port_priv->netdev);
 	}
+
+	kfree(ethsw->fdbs);
 	kfree(ethsw->ports);
 
 	dpaa2_switch_takedown(sw_dev);
@@ -2365,6 +2611,11 @@ static int dpaa2_switch_probe_port(struct ethsw_core *ethsw,
 	port_netdev->min_mtu = ETH_MIN_MTU;
 	port_netdev->max_mtu = ETHSW_MAX_FRAME_LENGTH;
 
+	/* Populate the private port structure so that later calls to
+	 * dpaa2_switch_port_init() can use it.
+	 */
+	ethsw->ports[port_idx] = port_priv;
+
 	err = dpaa2_switch_port_init(port_priv, port_idx);
 	if (err)
 		goto err_port_probe;
@@ -2373,12 +2624,11 @@ static int dpaa2_switch_probe_port(struct ethsw_core *ethsw,
 	if (err)
 		goto err_port_probe;
 
-	ethsw->ports[port_idx] = port_priv;
-
 	return 0;
 
 err_port_probe:
 	free_netdev(port_netdev);
+	ethsw->ports[port_idx] = NULL;
 
 	return err;
 }
@@ -2420,10 +2670,17 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
 		goto err_takedown;
 	}
 
+	ethsw->fdbs = kcalloc(ethsw->sw_attr.num_ifs, sizeof(*ethsw->fdbs),
+			      GFP_KERNEL);
+	if (!ethsw->fdbs) {
+		err = -ENOMEM;
+		goto err_free_ports;
+	}
+
 	for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
 		err = dpaa2_switch_probe_port(ethsw, i);
 		if (err)
-			goto err_free_ports;
+			goto err_free_netdev;
 	}
 
 	/* Add a NAPI instance for each of the Rx queues. The first port's
@@ -2438,7 +2695,7 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
 	err = dpsw_enable(ethsw->mc_io, 0, ethsw->dpsw_handle);
 	if (err) {
 		dev_err(ethsw->dev, "dpsw_enable err %d\n", err);
-		goto err_free_ports;
+		goto err_free_netdev;
 	}
 
 	/* Setup IRQs */
@@ -2462,12 +2719,14 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
 err_unregister_ports:
 	for (i--; i >= 0; i--)
 		unregister_netdev(ethsw->ports[i]->netdev);
+	dpaa2_switch_teardown_irqs(sw_dev);
 err_stop:
 	dpsw_disable(ethsw->mc_io, 0, ethsw->dpsw_handle);
-
-err_free_ports:
+err_free_netdev:
 	for (i--; i >= 0; i--)
 		free_netdev(ethsw->ports[i]->netdev);
+	kfree(ethsw->fdbs);
+err_free_ports:
 	kfree(ethsw->ports);
 
 err_takedown:
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.h b/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
index ab3b75a62f01..d83a9f17f672 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
@@ -92,6 +92,12 @@ struct dpaa2_switch_fq {
 	u32 fqid;
 };
 
+struct dpaa2_switch_fdb {
+	struct net_device	*bridge_dev;
+	u16			fdb_id;
+	bool			in_use;
+};
+
 /* Per port private data */
 struct ethsw_port_priv {
 	struct net_device	*netdev;
@@ -103,8 +109,9 @@ struct ethsw_port_priv {
 
 	u8			vlans[VLAN_VID_MASK + 1];
 	u16			pvid;
-	struct net_device	*bridge_dev;
 	u16			tx_qdid;
+
+	struct dpaa2_switch_fdb	*fdb;
 };
 
 /* Switch data */
@@ -131,6 +138,8 @@ struct ethsw_core {
 	int				buf_count;
 	u16				bpid;
 	int				napi_users;
+
+	struct dpaa2_switch_fdb		*fdbs;
 };
 
 static inline bool dpaa2_switch_supports_cpu_traffic(struct ethsw_core *ethsw)
@@ -140,6 +149,25 @@ static inline bool dpaa2_switch_supports_cpu_traffic(struct ethsw_core *ethsw)
 		return false;
 	}
 
+	if (ethsw->sw_attr.flooding_cfg != DPSW_FLOODING_PER_FDB) {
+		dev_err(ethsw->dev, "Flooding domain is not per FDB, cannot probe\n");
+		return false;
+	}
+
+	if (ethsw->sw_attr.broadcast_cfg != DPSW_BROADCAST_PER_FDB) {
+		dev_err(ethsw->dev, "Broadcast domain is not per FDB, cannot probe\n");
+		return false;
+	}
+
+	if (ethsw->sw_attr.max_fdbs < ethsw->sw_attr.num_ifs) {
+		dev_err(ethsw->dev, "The number of FDBs is lower than the number of ports, cannot probe\n");
+		return false;
+	}
+
 	return true;
 }
+
+bool dpaa2_switch_port_dev_check(const struct net_device *netdev,
+				 struct notifier_block *nb);
+
 #endif	/* __ETHSW_H */
-- 
cgit v1.2.3


From 16abb6ad6abc5ffe45d1e03e9263530b263de2a1 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Wed, 10 Mar 2021 14:14:48 +0200
Subject: staging: dpaa2-switch: move the notifier register to module_init()

Move the notifier blocks register into the module_init() step, instead of
object probe, so that all DPSW devices probed by the dpaa2-switch driver
can use the same notifiers.

This will enable us to have a more straightforward approach in
determining if an event is intended for an object managed by this driver
or not. Previously, the dpaa2_switch_port_dev_check() function was
forced to also check the notifier block beside the net_device_ops
structure to determine if the event is for us or not.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/staging/fsl-dpaa2/ethsw/ethsw.c | 176 +++++++++++++++++---------------
 drivers/staging/fsl-dpaa2/ethsw/ethsw.h |   6 +-
 2 files changed, 95 insertions(+), 87 deletions(-)

diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
index 5be07181399d..1f8976898291 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
@@ -80,7 +80,7 @@ static u16 dpaa2_switch_port_set_fdb(struct ethsw_port_priv *port_priv,
 	 * to be present in that bridge
 	 */
 	netdev_for_each_lower_dev(bridge_dev, other_dev, iter) {
-		if (!dpaa2_switch_port_dev_check(other_dev, NULL))
+		if (!dpaa2_switch_port_dev_check(other_dev))
 			continue;
 
 		if (other_dev == port_priv->netdev)
@@ -987,18 +987,9 @@ static const struct net_device_ops dpaa2_switch_port_ops = {
 	.ndo_get_phys_port_name = dpaa2_switch_port_get_phys_name,
 };
 
-bool dpaa2_switch_port_dev_check(const struct net_device *netdev,
-				 struct notifier_block *nb)
+bool dpaa2_switch_port_dev_check(const struct net_device *netdev)
 {
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-
-	if (netdev->netdev_ops == &dpaa2_switch_port_ops &&
-	    (!nb || &port_priv->ethsw_data->port_nb == nb ||
-	     &port_priv->ethsw_data->port_switchdev_nb == nb ||
-	     &port_priv->ethsw_data->port_switchdevb_nb == nb))
-		return true;
-
-	return false;
+	return netdev->netdev_ops == &dpaa2_switch_port_ops;
 }
 
 static void dpaa2_switch_links_state_update(struct ethsw_core *ethsw)
@@ -1429,7 +1420,7 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
 	int err;
 
 	netdev_for_each_lower_dev(upper_dev, other_dev, iter) {
-		if (!dpaa2_switch_port_dev_check(other_dev, NULL))
+		if (!dpaa2_switch_port_dev_check(other_dev))
 			continue;
 
 		other_port_priv = netdev_priv(other_dev);
@@ -1529,7 +1520,7 @@ static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb,
 	struct net_device *upper_dev;
 	int err = 0;
 
-	if (!dpaa2_switch_port_dev_check(netdev, nb))
+	if (!dpaa2_switch_port_dev_check(netdev))
 		return NOTIFY_DONE;
 
 	/* Handle just upper dev link/unlink for the moment */
@@ -1606,7 +1597,7 @@ static int dpaa2_switch_port_event(struct notifier_block *nb,
 	struct switchdev_notifier_fdb_info *fdb_info = ptr;
 	struct ethsw_core *ethsw = port_priv->ethsw_data;
 
-	if (!dpaa2_switch_port_dev_check(dev, nb))
+	if (!dpaa2_switch_port_dev_check(dev))
 		return NOTIFY_DONE;
 
 	if (event == SWITCHDEV_PORT_ATTR_SET)
@@ -1673,7 +1664,7 @@ static int dpaa2_switch_port_blocking_event(struct notifier_block *nb,
 {
 	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
 
-	if (!dpaa2_switch_port_dev_check(dev, nb))
+	if (!dpaa2_switch_port_dev_check(dev))
 		return NOTIFY_DONE;
 
 	switch (event) {
@@ -1687,41 +1678,6 @@ static int dpaa2_switch_port_blocking_event(struct notifier_block *nb,
 	return NOTIFY_DONE;
 }
 
-static int dpaa2_switch_register_notifier(struct device *dev)
-{
-	struct ethsw_core *ethsw = dev_get_drvdata(dev);
-	int err;
-
-	ethsw->port_nb.notifier_call = dpaa2_switch_port_netdevice_event;
-	err = register_netdevice_notifier(&ethsw->port_nb);
-	if (err) {
-		dev_err(dev, "Failed to register netdev notifier\n");
-		return err;
-	}
-
-	ethsw->port_switchdev_nb.notifier_call = dpaa2_switch_port_event;
-	err = register_switchdev_notifier(&ethsw->port_switchdev_nb);
-	if (err) {
-		dev_err(dev, "Failed to register switchdev notifier\n");
-		goto err_switchdev_nb;
-	}
-
-	ethsw->port_switchdevb_nb.notifier_call = dpaa2_switch_port_blocking_event;
-	err = register_switchdev_blocking_notifier(&ethsw->port_switchdevb_nb);
-	if (err) {
-		dev_err(dev, "Failed to register switchdev blocking notifier\n");
-		goto err_switchdev_blocking_nb;
-	}
-
-	return 0;
-
-err_switchdev_blocking_nb:
-	unregister_switchdev_notifier(&ethsw->port_switchdev_nb);
-err_switchdev_nb:
-	unregister_netdevice_notifier(&ethsw->port_nb);
-	return err;
-}
-
 /* Build a linear skb based on a single-buffer frame descriptor */
 static struct sk_buff *dpaa2_switch_build_linear_skb(struct ethsw_core *ethsw,
 						     const struct dpaa2_fd *fd)
@@ -2426,10 +2382,6 @@ static int dpaa2_switch_init(struct fsl_mc_device *sw_dev)
 	if (err)
 		goto err_destroy_ordered_workqueue;
 
-	err = dpaa2_switch_register_notifier(dev);
-	if (err)
-		goto err_destroy_ordered_workqueue;
-
 	return 0;
 
 err_destroy_ordered_workqueue:
@@ -2496,38 +2448,12 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
 	return err;
 }
 
-static void dpaa2_switch_unregister_notifier(struct device *dev)
-{
-	struct ethsw_core *ethsw = dev_get_drvdata(dev);
-	struct notifier_block *nb;
-	int err;
-
-	nb = &ethsw->port_switchdevb_nb;
-	err = unregister_switchdev_blocking_notifier(nb);
-	if (err)
-		dev_err(dev,
-			"Failed to unregister switchdev blocking notifier (%d)\n",
-			err);
-
-	err = unregister_switchdev_notifier(&ethsw->port_switchdev_nb);
-	if (err)
-		dev_err(dev,
-			"Failed to unregister switchdev notifier (%d)\n", err);
-
-	err = unregister_netdevice_notifier(&ethsw->port_nb);
-	if (err)
-		dev_err(dev,
-			"Failed to unregister netdev notifier (%d)\n", err);
-}
-
 static void dpaa2_switch_takedown(struct fsl_mc_device *sw_dev)
 {
 	struct device *dev = &sw_dev->dev;
 	struct ethsw_core *ethsw = dev_get_drvdata(dev);
 	int err;
 
-	dpaa2_switch_unregister_notifier(dev);
-
 	err = dpsw_close(ethsw->mc_io, 0, ethsw->dpsw_handle);
 	if (err)
 		dev_warn(dev, "dpsw_close err %d\n", err);
@@ -2761,7 +2687,93 @@ static struct fsl_mc_driver dpaa2_switch_drv = {
 	.match_id_table = dpaa2_switch_match_id_table
 };
 
-module_fsl_mc_driver(dpaa2_switch_drv);
+static struct notifier_block dpaa2_switch_port_nb __read_mostly = {
+	.notifier_call = dpaa2_switch_port_netdevice_event,
+};
+
+static struct notifier_block dpaa2_switch_port_switchdev_nb = {
+	.notifier_call = dpaa2_switch_port_event,
+};
+
+static struct notifier_block dpaa2_switch_port_switchdev_blocking_nb = {
+	.notifier_call = dpaa2_switch_port_blocking_event,
+};
+
+static int dpaa2_switch_register_notifiers(void)
+{
+	int err;
+
+	err = register_netdevice_notifier(&dpaa2_switch_port_nb);
+	if (err) {
+		pr_err("dpaa2-switch: failed to register net_device notifier (%d)\n", err);
+		return err;
+	}
+
+	err = register_switchdev_notifier(&dpaa2_switch_port_switchdev_nb);
+	if (err) {
+		pr_err("dpaa2-switch: failed to register switchdev notifier (%d)\n", err);
+		goto err_switchdev_nb;
+	}
+
+	err = register_switchdev_blocking_notifier(&dpaa2_switch_port_switchdev_blocking_nb);
+	if (err) {
+		pr_err("dpaa2-switch: failed to register switchdev blocking notifier (%d)\n", err);
+		goto err_switchdev_blocking_nb;
+	}
+
+	return 0;
+
+err_switchdev_blocking_nb:
+	unregister_switchdev_notifier(&dpaa2_switch_port_switchdev_nb);
+err_switchdev_nb:
+	unregister_netdevice_notifier(&dpaa2_switch_port_nb);
+
+	return err;
+}
+
+static void dpaa2_switch_unregister_notifiers(void)
+{
+	int err;
+
+	err = unregister_switchdev_blocking_notifier(&dpaa2_switch_port_switchdev_blocking_nb);
+	if (err)
+		pr_err("dpaa2-switch: failed to unregister switchdev blocking notifier (%d)\n",
+		       err);
+
+	err = unregister_switchdev_notifier(&dpaa2_switch_port_switchdev_nb);
+	if (err)
+		pr_err("dpaa2-switch: failed to unregister switchdev notifier (%d)\n", err);
+
+	err = unregister_netdevice_notifier(&dpaa2_switch_port_nb);
+	if (err)
+		pr_err("dpaa2-switch: failed to unregister net_device notifier (%d)\n", err);
+}
+
+static int __init dpaa2_switch_driver_init(void)
+{
+	int err;
+
+	err = fsl_mc_driver_register(&dpaa2_switch_drv);
+	if (err)
+		return err;
+
+	err = dpaa2_switch_register_notifiers();
+	if (err) {
+		fsl_mc_driver_unregister(&dpaa2_switch_drv);
+		return err;
+	}
+
+	return 0;
+}
+
+static void __exit dpaa2_switch_driver_exit(void)
+{
+	dpaa2_switch_unregister_notifiers();
+	fsl_mc_driver_unregister(&dpaa2_switch_drv);
+}
+
+module_init(dpaa2_switch_driver_init);
+module_exit(dpaa2_switch_driver_exit);
 
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("DPAA2 Ethernet Switch Driver");
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.h b/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
index d83a9f17f672..0c228509fcd4 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
@@ -128,9 +128,6 @@ struct ethsw_core {
 
 	u8				vlans[VLAN_VID_MASK + 1];
 
-	struct notifier_block		port_nb;
-	struct notifier_block		port_switchdev_nb;
-	struct notifier_block		port_switchdevb_nb;
 	struct workqueue_struct		*workqueue;
 
 	struct dpaa2_switch_fq		fq[DPAA2_SWITCH_RX_NUM_FQS];
@@ -167,7 +164,6 @@ static inline bool dpaa2_switch_supports_cpu_traffic(struct ethsw_core *ethsw)
 	return true;
 }
 
-bool dpaa2_switch_port_dev_check(const struct net_device *netdev,
-				 struct notifier_block *nb);
+bool dpaa2_switch_port_dev_check(const struct net_device *netdev);
 
 #endif	/* __ETHSW_H */
-- 
cgit v1.2.3


From d671407fccbb022ab6004bd8a8d5f1e970f2ba2e Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Wed, 10 Mar 2021 14:14:49 +0200
Subject: staging: dpaa2-switch: accept only vlan-aware upper devices

The DPAA2 Switch is not capable to handle traffic in a VLAN unaware
fashion, thus the previous handling of both the accepted upper devices
and the SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING flag was wrong.

Fix this by checking if the bridge that we are joining is indeed VLAN
aware, if not return an error. Also, the RX VLAN filtering feature is
defined as 'on [fixed]' and the .ndo_vlan_rx_add_vid() and
.ndo_vlan_rx_kill_vid() callbacks are implemented just by recreating a
switchdev_obj_port_vlan object and then calling the same functions used
on the switchdev notifier path.
In addition, changing the vlan_filtering flag to 0 on a bridge under
which a DPAA2 switch interface is present is not supported, thus
rejected when SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING is received with
such a request.

This patch is also adding the use of the switchdev_handle_port_attr_set
function so that we can iterate through all the lower devices of the
bridge that the notification was received on and actually catch if the
user is trying to change the vlan_filtering state. Since on a VLAN
filtering change the net_device is the bridge, we also move the
dpaa2_switch_port_dev_check call so that we do not return NOTIFY_DONE
right away.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/staging/fsl-dpaa2/Kconfig       |  1 +
 drivers/staging/fsl-dpaa2/ethsw/ethsw.c | 88 ++++++++++++++++++++++++++-------
 drivers/staging/fsl-dpaa2/ethsw/ethsw.h |  6 +++
 3 files changed, 76 insertions(+), 19 deletions(-)

diff --git a/drivers/staging/fsl-dpaa2/Kconfig b/drivers/staging/fsl-dpaa2/Kconfig
index 244237bb068a..7cb005b6e7ab 100644
--- a/drivers/staging/fsl-dpaa2/Kconfig
+++ b/drivers/staging/fsl-dpaa2/Kconfig
@@ -12,6 +12,7 @@ config FSL_DPAA2
 
 config FSL_DPAA2_ETHSW
 	tristate "Freescale DPAA2 Ethernet Switch"
+	depends on BRIDGE || BRIDGE=n
 	depends on FSL_DPAA2
 	depends on NET_SWITCHDEV
 	help
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
index 1f8976898291..8058bc3ed467 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
@@ -800,6 +800,34 @@ err_map:
 	return err;
 }
 
+static int dpaa2_switch_port_vlan_add(struct net_device *netdev, __be16 proto,
+				      u16 vid)
+{
+	struct switchdev_obj_port_vlan vlan = {
+		.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
+		.vid = vid,
+		.obj.orig_dev = netdev,
+		/* This API only allows programming tagged, non-PVID VIDs */
+		.flags = 0,
+	};
+
+	return dpaa2_switch_port_vlans_add(netdev, &vlan);
+}
+
+static int dpaa2_switch_port_vlan_kill(struct net_device *netdev, __be16 proto,
+				       u16 vid)
+{
+	struct switchdev_obj_port_vlan vlan = {
+		.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
+		.vid = vid,
+		.obj.orig_dev = netdev,
+		/* This API only allows programming tagged, non-PVID VIDs */
+		.flags = 0,
+	};
+
+	return dpaa2_switch_port_vlans_del(netdev, &vlan);
+}
+
 static int dpaa2_switch_port_set_mac_addr(struct ethsw_port_priv *port_priv)
 {
 	struct ethsw_core *ethsw = port_priv->ethsw_data;
@@ -981,6 +1009,8 @@ static const struct net_device_ops dpaa2_switch_port_ops = {
 	.ndo_has_offload_stats	= dpaa2_switch_port_has_offload_stats,
 	.ndo_get_offload_stats	= dpaa2_switch_port_get_offload_stats,
 	.ndo_fdb_dump		= dpaa2_switch_port_fdb_dump,
+	.ndo_vlan_rx_add_vid	= dpaa2_switch_port_vlan_add,
+	.ndo_vlan_rx_kill_vid	= dpaa2_switch_port_vlan_kill,
 
 	.ndo_start_xmit		= dpaa2_switch_port_tx,
 	.ndo_get_port_parent_id	= dpaa2_switch_port_parent_id,
@@ -1114,7 +1144,8 @@ static int dpaa2_switch_port_attr_stp_state_set(struct net_device *netdev,
 }
 
 static int dpaa2_switch_port_attr_set(struct net_device *netdev,
-				      const struct switchdev_attr *attr)
+				      const struct switchdev_attr *attr,
+				      struct netlink_ext_ack *extack)
 {
 	int err = 0;
 
@@ -1124,7 +1155,11 @@ static int dpaa2_switch_port_attr_set(struct net_device *netdev,
 							   attr->u.stp_state);
 		break;
 	case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
-		/* VLANs are supported by default  */
+		if (!attr->u.vlan_filtering) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "The DPAA2 switch does not support VLAN-unaware operation");
+			return -EOPNOTSUPP;
+		}
 		break;
 	default:
 		err = -EOPNOTSUPP;
@@ -1134,8 +1169,8 @@ static int dpaa2_switch_port_attr_set(struct net_device *netdev,
 	return err;
 }
 
-static int dpaa2_switch_port_vlans_add(struct net_device *netdev,
-				       const struct switchdev_obj_port_vlan *vlan)
+int dpaa2_switch_port_vlans_add(struct net_device *netdev,
+				const struct switchdev_obj_port_vlan *vlan)
 {
 	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
 	struct ethsw_core *ethsw = port_priv->ethsw_data;
@@ -1303,8 +1338,8 @@ static int dpaa2_switch_port_del_vlan(struct ethsw_port_priv *port_priv, u16 vid
 	return 0;
 }
 
-static int dpaa2_switch_port_vlans_del(struct net_device *netdev,
-				       const struct switchdev_obj_port_vlan *vlan)
+int dpaa2_switch_port_vlans_del(struct net_device *netdev,
+				const struct switchdev_obj_port_vlan *vlan)
 {
 	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
 
@@ -1356,14 +1391,13 @@ static int dpaa2_switch_port_obj_del(struct net_device *netdev,
 }
 
 static int dpaa2_switch_port_attr_set_event(struct net_device *netdev,
-					    struct switchdev_notifier_port_attr_info
-					    *port_attr_info)
+					    struct switchdev_notifier_port_attr_info *ptr)
 {
 	int err;
 
-	err = dpaa2_switch_port_attr_set(netdev, port_attr_info->attr);
-
-	port_attr_info->handled = true;
+	err = switchdev_handle_port_attr_set(netdev, ptr,
+					     dpaa2_switch_port_dev_check,
+					     dpaa2_switch_port_attr_set);
 	return notifier_from_errno(err);
 }
 
@@ -1517,14 +1551,24 @@ static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb,
 {
 	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
 	struct netdev_notifier_changeupper_info *info = ptr;
+	struct netlink_ext_ack *extack;
 	struct net_device *upper_dev;
 	int err = 0;
 
 	if (!dpaa2_switch_port_dev_check(netdev))
 		return NOTIFY_DONE;
 
-	/* Handle just upper dev link/unlink for the moment */
-	if (event == NETDEV_CHANGEUPPER) {
+	extack = netdev_notifier_info_to_extack(&info->info);
+
+	switch (event) {
+	case NETDEV_PRECHANGEUPPER:
+		upper_dev = info->upper_dev;
+		if (netif_is_bridge_master(upper_dev) && !br_vlan_enabled(upper_dev)) {
+			NL_SET_ERR_MSG_MOD(extack, "Cannot join a VLAN-unaware bridge");
+			err = -EOPNOTSUPP;
+		}
+		break;
+	case NETDEV_CHANGEUPPER:
 		upper_dev = info->upper_dev;
 		if (netif_is_bridge_master(upper_dev)) {
 			if (info->linking)
@@ -1532,6 +1576,7 @@ static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb,
 			else
 				err = dpaa2_switch_port_bridge_leave(netdev);
 		}
+		break;
 	}
 
 	return notifier_from_errno(err);
@@ -1597,12 +1642,12 @@ static int dpaa2_switch_port_event(struct notifier_block *nb,
 	struct switchdev_notifier_fdb_info *fdb_info = ptr;
 	struct ethsw_core *ethsw = port_priv->ethsw_data;
 
-	if (!dpaa2_switch_port_dev_check(dev))
-		return NOTIFY_DONE;
-
 	if (event == SWITCHDEV_PORT_ATTR_SET)
 		return dpaa2_switch_port_attr_set_event(dev, ptr);
 
+	if (!dpaa2_switch_port_dev_check(dev))
+		return NOTIFY_DONE;
+
 	switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
 	if (!switchdev_work)
 		return NOTIFY_BAD;
@@ -1646,6 +1691,9 @@ static int dpaa2_switch_port_obj_event(unsigned long event,
 {
 	int err = -EOPNOTSUPP;
 
+	if (!dpaa2_switch_port_dev_check(netdev))
+		return NOTIFY_DONE;
+
 	switch (event) {
 	case SWITCHDEV_PORT_OBJ_ADD:
 		err = dpaa2_switch_port_obj_add(netdev, port_obj_info->obj);
@@ -1664,9 +1712,6 @@ static int dpaa2_switch_port_blocking_event(struct notifier_block *nb,
 {
 	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
 
-	if (!dpaa2_switch_port_dev_check(dev))
-		return NOTIFY_DONE;
-
 	switch (event) {
 	case SWITCHDEV_PORT_OBJ_ADD:
 	case SWITCHDEV_PORT_OBJ_DEL:
@@ -2542,6 +2587,11 @@ static int dpaa2_switch_probe_port(struct ethsw_core *ethsw,
 	 */
 	ethsw->ports[port_idx] = port_priv;
 
+	/* The DPAA2 switch's ingress path depends on the VLAN table,
+	 * thus we are not able to disable VLAN filtering.
+	 */
+	port_netdev->features = NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER;
+
 	err = dpaa2_switch_port_init(port_priv, port_idx);
 	if (err)
 		goto err_port_probe;
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.h b/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
index 0c228509fcd4..ac9335c83357 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
@@ -166,4 +166,10 @@ static inline bool dpaa2_switch_supports_cpu_traffic(struct ethsw_core *ethsw)
 
 bool dpaa2_switch_port_dev_check(const struct net_device *netdev);
 
+int dpaa2_switch_port_vlans_add(struct net_device *netdev,
+				const struct switchdev_obj_port_vlan *vlan);
+
+int dpaa2_switch_port_vlans_del(struct net_device *netdev,
+				const struct switchdev_obj_port_vlan *vlan);
+
 #endif	/* __ETHSW_H */
-- 
cgit v1.2.3


From 685b480145c15b81c411ad8b9a6b74a472274793 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Wed, 10 Mar 2021 14:14:50 +0200
Subject: staging: dpaa2-switch: add fast-ageing on bridge leave

Upon leaving a bridge, any MAC addresses learnt on the switch port prior
to this point have to be removed so that we preserve the bridging domain
configuration.

Restructure the dpaa2_switch_port_fdb_dump() function in order to have a
common dpaa2_switch_fdb_iterate() function between the FDB dump callback
and the fast age procedure. To accomplish this, add a new callback -
dpaa2_switch_fdb_cb_t - which will be called on each MAC addr and,
depending on the situation, will either dump the FDB entry into a
netlink message or will delete the address from the FDB table, in case
of the fast-age.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/staging/fsl-dpaa2/ethsw/ethsw.c | 76 ++++++++++++++++++++++++++-------
 drivers/staging/fsl-dpaa2/ethsw/ethsw.h |  3 ++
 2 files changed, 63 insertions(+), 16 deletions(-)

diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
index 8058bc3ed467..5fa7e41f6866 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
@@ -729,21 +729,14 @@ static int dpaa2_switch_port_fdb_valid_entry(struct fdb_dump_entry *entry,
 	return valid;
 }
 
-static int dpaa2_switch_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
-				      struct net_device *net_dev,
-				      struct net_device *filter_dev, int *idx)
+static int dpaa2_switch_fdb_iterate(struct ethsw_port_priv *port_priv,
+				    dpaa2_switch_fdb_cb_t cb, void *data)
 {
-	struct ethsw_port_priv *port_priv = netdev_priv(net_dev);
+	struct net_device *net_dev = port_priv->netdev;
 	struct ethsw_core *ethsw = port_priv->ethsw_data;
 	struct device *dev = net_dev->dev.parent;
 	struct fdb_dump_entry *fdb_entries;
 	struct fdb_dump_entry fdb_entry;
-	struct ethsw_dump_ctx dump = {
-		.dev = net_dev,
-		.skb = skb,
-		.cb = cb,
-		.idx = *idx,
-	};
 	dma_addr_t fdb_dump_iova;
 	u16 num_fdb_entries;
 	u32 fdb_dump_size;
@@ -778,17 +771,12 @@ static int dpaa2_switch_port_fdb_dump(struct sk_buff *skb, struct netlink_callba
 	for (i = 0; i < num_fdb_entries; i++) {
 		fdb_entry = fdb_entries[i];
 
-		if (!dpaa2_switch_port_fdb_valid_entry(&fdb_entry, port_priv))
-			continue;
-
-		err = dpaa2_switch_fdb_dump_nl(&fdb_entry, &dump);
+		err = cb(port_priv, &fdb_entry, data);
 		if (err)
 			goto end;
 	}
 
 end:
-	*idx = dump.idx;
-
 	kfree(dma_mem);
 
 	return 0;
@@ -800,6 +788,59 @@ err_map:
 	return err;
 }
 
+static int dpaa2_switch_fdb_entry_dump(struct ethsw_port_priv *port_priv,
+				       struct fdb_dump_entry *fdb_entry,
+				       void *data)
+{
+	if (!dpaa2_switch_port_fdb_valid_entry(fdb_entry, port_priv))
+		return 0;
+
+	return dpaa2_switch_fdb_dump_nl(fdb_entry, data);
+}
+
+static int dpaa2_switch_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
+				      struct net_device *net_dev,
+				      struct net_device *filter_dev, int *idx)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(net_dev);
+	struct ethsw_dump_ctx dump = {
+		.dev = net_dev,
+		.skb = skb,
+		.cb = cb,
+		.idx = *idx,
+	};
+	int err;
+
+	err = dpaa2_switch_fdb_iterate(port_priv, dpaa2_switch_fdb_entry_dump, &dump);
+	*idx = dump.idx;
+
+	return err;
+}
+
+static int dpaa2_switch_fdb_entry_fast_age(struct ethsw_port_priv *port_priv,
+					   struct fdb_dump_entry *fdb_entry,
+					   void *data __always_unused)
+{
+	if (!dpaa2_switch_port_fdb_valid_entry(fdb_entry, port_priv))
+		return 0;
+
+	if (!(fdb_entry->type & DPSW_FDB_ENTRY_TYPE_DYNAMIC))
+		return 0;
+
+	if (fdb_entry->type & DPSW_FDB_ENTRY_TYPE_UNICAST)
+		dpaa2_switch_port_fdb_del_uc(port_priv, fdb_entry->mac_addr);
+	else
+		dpaa2_switch_port_fdb_del_mc(port_priv, fdb_entry->mac_addr);
+
+	return 0;
+}
+
+static void dpaa2_switch_port_fast_age(struct ethsw_port_priv *port_priv)
+{
+	dpaa2_switch_fdb_iterate(port_priv,
+				 dpaa2_switch_fdb_entry_fast_age, NULL);
+}
+
 static int dpaa2_switch_port_vlan_add(struct net_device *netdev, __be16 proto,
 				      u16 vid)
 {
@@ -1511,6 +1552,9 @@ static int dpaa2_switch_port_bridge_leave(struct net_device *netdev)
 	struct ethsw_core *ethsw = port_priv->ethsw_data;
 	int err;
 
+	/* First of all, fast age any learn FDB addresses on this switch port */
+	dpaa2_switch_port_fast_age(port_priv);
+
 	/* Clear all RX VLANs installed through vlan_vid_add() either as VLAN
 	 * upper devices or otherwise from the FDB table that we are about to
 	 * leave
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.h b/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
index ac9335c83357..933563064015 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
@@ -172,4 +172,7 @@ int dpaa2_switch_port_vlans_add(struct net_device *netdev,
 int dpaa2_switch_port_vlans_del(struct net_device *netdev,
 				const struct switchdev_obj_port_vlan *vlan);
 
+typedef int dpaa2_switch_fdb_cb_t(struct ethsw_port_priv *port_priv,
+				  struct fdb_dump_entry *fdb_entry,
+				  void *data);
 #endif	/* __ETHSW_H */
-- 
cgit v1.2.3


From 1c4928fc2929a502f01d4b7001a37db51309ceb9 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Wed, 10 Mar 2021 14:14:51 +0200
Subject: staging: dpaa2-switch: prevent joining a bridge while VLAN uppers are
 present

Each time a switch port joins a bridge, it will start to use a FDB table
common with all the other switch ports that are under the same bridge.
This means that any VLAN added prior to a bridge join, will retain its
previous FDB table destination. With this patch, I choose to restrict
when a switch port can change it's upper device (either join or leave)
so that the driver does not have to delete all the previously installed
VLANs from the previous FDB and add them into the new one.

Thus, in the PRECHANGEUPPER  notification we check if there are any VLAN
type upper devices and if that's true, deny the CHANGEUPPER.

This way, the user is not restricted in the topology but rather in the
order in which the setup is done: it must first create the bridging
domain layout and after that add the necessary VLAN devices if
necessary. The teardown is similar, the VLAN devices will need to be
destroyed prior to a change in the bridging layout.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/staging/fsl-dpaa2/ethsw/ethsw.c | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
index 5fa7e41f6866..97292cf570c1 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
@@ -1590,6 +1590,21 @@ static int dpaa2_switch_port_bridge_leave(struct net_device *netdev)
 					  BRIDGE_VLAN_INFO_UNTAGGED | BRIDGE_VLAN_INFO_PVID);
 }
 
+static int dpaa2_switch_prevent_bridging_with_8021q_upper(struct net_device *netdev)
+{
+	struct net_device *upper_dev;
+	struct list_head *iter;
+
+	/* RCU read lock not necessary because we have write-side protection
+	 * (rtnl_mutex), however a non-rcu iterator does not exist.
+	 */
+	netdev_for_each_upper_dev_rcu(netdev, upper_dev, iter)
+		if (is_vlan_dev(upper_dev))
+			return -EOPNOTSUPP;
+
+	return 0;
+}
+
 static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb,
 					     unsigned long event, void *ptr)
 {
@@ -1607,10 +1622,22 @@ static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb,
 	switch (event) {
 	case NETDEV_PRECHANGEUPPER:
 		upper_dev = info->upper_dev;
-		if (netif_is_bridge_master(upper_dev) && !br_vlan_enabled(upper_dev)) {
+		if (!netif_is_bridge_master(upper_dev))
+			break;
+
+		if (!br_vlan_enabled(upper_dev)) {
 			NL_SET_ERR_MSG_MOD(extack, "Cannot join a VLAN-unaware bridge");
 			err = -EOPNOTSUPP;
+			goto out;
+		}
+
+		err = dpaa2_switch_prevent_bridging_with_8021q_upper(netdev);
+		if (err) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Cannot join a bridge while VLAN uppers are present");
+			goto out;
 		}
+
 		break;
 	case NETDEV_CHANGEUPPER:
 		upper_dev = info->upper_dev;
@@ -1623,6 +1650,7 @@ static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb,
 		break;
 	}
 
+out:
 	return notifier_from_errno(err);
 }
 
-- 
cgit v1.2.3


From f48298d3fbfaadedd7e7bd1cdcbb3f1291a8d42d Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Wed, 10 Mar 2021 14:14:52 +0200
Subject: staging: dpaa2-switch: move the driver out of staging

Now that the dpaa2-switch driver has basic I/O capabilities on the
switch port net_devices and multiple bridging domains are supported,
move the driver out of staging.

The dpaa2-switch driver is placed right next to the dpaa2-eth driver
since, in the near future, they will be sharing most of the data path.
I didn't implement code reuse in this patch series because I wanted to
keep it as small as possible.

Also, the README is removed from staging with the intention to add
proper rst documentation afterwards to actually match was is supported
by the driver.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                                        |    6 +-
 drivers/net/ethernet/freescale/dpaa2/Kconfig       |    8 +
 drivers/net/ethernet/freescale/dpaa2/Makefile      |    2 +
 .../freescale/dpaa2/dpaa2-switch-ethtool.c         |  189 ++
 .../net/ethernet/freescale/dpaa2/dpaa2-switch.c    | 2901 ++++++++++++++++++++
 .../net/ethernet/freescale/dpaa2/dpaa2-switch.h    |  178 ++
 drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h    |  458 +++
 drivers/net/ethernet/freescale/dpaa2/dpsw.c        | 1486 ++++++++++
 drivers/net/ethernet/freescale/dpaa2/dpsw.h        |  751 +++++
 drivers/staging/Kconfig                            |    2 -
 drivers/staging/Makefile                           |    1 -
 drivers/staging/fsl-dpaa2/Kconfig                  |   20 -
 drivers/staging/fsl-dpaa2/Makefile                 |    6 -
 drivers/staging/fsl-dpaa2/ethsw/Makefile           |   10 -
 drivers/staging/fsl-dpaa2/ethsw/README             |  106 -
 drivers/staging/fsl-dpaa2/ethsw/TODO               |   13 -
 drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h         |  458 ---
 drivers/staging/fsl-dpaa2/ethsw/dpsw.c             | 1486 ----------
 drivers/staging/fsl-dpaa2/ethsw/dpsw.h             |  751 -----
 drivers/staging/fsl-dpaa2/ethsw/ethsw-ethtool.c    |  189 --
 drivers/staging/fsl-dpaa2/ethsw/ethsw.c            | 2901 --------------------
 drivers/staging/fsl-dpaa2/ethsw/ethsw.h            |  178 --
 22 files changed, 5976 insertions(+), 6124 deletions(-)
 create mode 100644 drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c
 create mode 100644 drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
 create mode 100644 drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
 create mode 100644 drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
 create mode 100644 drivers/net/ethernet/freescale/dpaa2/dpsw.c
 create mode 100644 drivers/net/ethernet/freescale/dpaa2/dpsw.h
 delete mode 100644 drivers/staging/fsl-dpaa2/Kconfig
 delete mode 100644 drivers/staging/fsl-dpaa2/Makefile
 delete mode 100644 drivers/staging/fsl-dpaa2/ethsw/Makefile
 delete mode 100644 drivers/staging/fsl-dpaa2/ethsw/README
 delete mode 100644 drivers/staging/fsl-dpaa2/ethsw/TODO
 delete mode 100644 drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
 delete mode 100644 drivers/staging/fsl-dpaa2/ethsw/dpsw.c
 delete mode 100644 drivers/staging/fsl-dpaa2/ethsw/dpsw.h
 delete mode 100644 drivers/staging/fsl-dpaa2/ethsw/ethsw-ethtool.c
 delete mode 100644 drivers/staging/fsl-dpaa2/ethsw/ethsw.c
 delete mode 100644 drivers/staging/fsl-dpaa2/ethsw/ethsw.h

diff --git a/MAINTAINERS b/MAINTAINERS
index ed9fd1fb6932..e1fa5ad9bb30 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5473,11 +5473,11 @@ F:	drivers/net/ethernet/freescale/dpaa2/dpmac*
 F:	drivers/net/ethernet/freescale/dpaa2/dpni*
 
 DPAA2 ETHERNET SWITCH DRIVER
-M:	Ioana Radulescu <ruxandra.radulescu@nxp.com>
 M:	Ioana Ciornei <ioana.ciornei@nxp.com>
-L:	linux-kernel@vger.kernel.org
+L:	netdev@vger.kernel.org
 S:	Maintained
-F:	drivers/staging/fsl-dpaa2/ethsw
+F:	drivers/net/ethernet/freescale/dpaa2/dpaa2-switch*
+F:	drivers/net/ethernet/freescale/dpaa2/dpsw*
 
 DPT_I2O SCSI RAID DRIVER
 M:	Adaptec OEM Raid Solutions <aacraid@microsemi.com>
diff --git a/drivers/net/ethernet/freescale/dpaa2/Kconfig b/drivers/net/ethernet/freescale/dpaa2/Kconfig
index ee7a906e30b3..d029b69c3f18 100644
--- a/drivers/net/ethernet/freescale/dpaa2/Kconfig
+++ b/drivers/net/ethernet/freescale/dpaa2/Kconfig
@@ -29,3 +29,11 @@ config FSL_DPAA2_PTP_CLOCK
 	help
 	  This driver adds support for using the DPAA2 1588 timer module
 	  as a PTP clock.
+
+config FSL_DPAA2_SWITCH
+	tristate "Freescale DPAA2 Ethernet Switch"
+	depends on BRIDGE || BRIDGE=n
+	depends on NET_SWITCHDEV
+	help
+	  Driver for Freescale DPAA2 Ethernet Switch. This driver manages
+	  switch objects discovered on the Freeescale MC bus.
diff --git a/drivers/net/ethernet/freescale/dpaa2/Makefile b/drivers/net/ethernet/freescale/dpaa2/Makefile
index 146cb3540e61..644ef9ae02a3 100644
--- a/drivers/net/ethernet/freescale/dpaa2/Makefile
+++ b/drivers/net/ethernet/freescale/dpaa2/Makefile
@@ -5,11 +5,13 @@
 
 obj-$(CONFIG_FSL_DPAA2_ETH)		+= fsl-dpaa2-eth.o
 obj-$(CONFIG_FSL_DPAA2_PTP_CLOCK)	+= fsl-dpaa2-ptp.o
+obj-$(CONFIG_FSL_DPAA2_SWITCH)		+= fsl-dpaa2-switch.o
 
 fsl-dpaa2-eth-objs	:= dpaa2-eth.o dpaa2-ethtool.o dpni.o dpaa2-mac.o dpmac.o dpaa2-eth-devlink.o
 fsl-dpaa2-eth-${CONFIG_FSL_DPAA2_ETH_DCB} += dpaa2-eth-dcb.o
 fsl-dpaa2-eth-${CONFIG_DEBUG_FS} += dpaa2-eth-debugfs.o
 fsl-dpaa2-ptp-objs	:= dpaa2-ptp.o dprtc.o
+fsl-dpaa2-switch-objs	:= dpaa2-switch.o dpaa2-switch-ethtool.o dpsw.o
 
 # Needed by the tracing framework
 CFLAGS_dpaa2-eth.o := -I$(src)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c
new file mode 100644
index 000000000000..70e04321c420
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DPAA2 Ethernet Switch ethtool support
+ *
+ * Copyright 2014-2016 Freescale Semiconductor Inc.
+ * Copyright 2017-2018 NXP
+ *
+ */
+
+#include <linux/ethtool.h>
+
+#include "dpaa2-switch.h"
+
+static struct {
+	enum dpsw_counter id;
+	char name[ETH_GSTRING_LEN];
+} dpaa2_switch_ethtool_counters[] =  {
+	{DPSW_CNT_ING_FRAME,		"rx frames"},
+	{DPSW_CNT_ING_BYTE,		"rx bytes"},
+	{DPSW_CNT_ING_FLTR_FRAME,	"rx filtered frames"},
+	{DPSW_CNT_ING_FRAME_DISCARD,	"rx discarded frames"},
+	{DPSW_CNT_ING_BCAST_FRAME,	"rx b-cast frames"},
+	{DPSW_CNT_ING_BCAST_BYTES,	"rx b-cast bytes"},
+	{DPSW_CNT_ING_MCAST_FRAME,	"rx m-cast frames"},
+	{DPSW_CNT_ING_MCAST_BYTE,	"rx m-cast bytes"},
+	{DPSW_CNT_EGR_FRAME,		"tx frames"},
+	{DPSW_CNT_EGR_BYTE,		"tx bytes"},
+	{DPSW_CNT_EGR_FRAME_DISCARD,	"tx discarded frames"},
+	{DPSW_CNT_ING_NO_BUFF_DISCARD,	"rx discarded no buffer frames"},
+};
+
+#define DPAA2_SWITCH_NUM_COUNTERS	ARRAY_SIZE(dpaa2_switch_ethtool_counters)
+
+static void dpaa2_switch_get_drvinfo(struct net_device *netdev,
+				     struct ethtool_drvinfo *drvinfo)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	u16 version_major, version_minor;
+	int err;
+
+	strscpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
+
+	err = dpsw_get_api_version(port_priv->ethsw_data->mc_io, 0,
+				   &version_major,
+				   &version_minor);
+	if (err)
+		strscpy(drvinfo->fw_version, "N/A",
+			sizeof(drvinfo->fw_version));
+	else
+		snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+			 "%u.%u", version_major, version_minor);
+
+	strscpy(drvinfo->bus_info, dev_name(netdev->dev.parent->parent),
+		sizeof(drvinfo->bus_info));
+}
+
+static int
+dpaa2_switch_get_link_ksettings(struct net_device *netdev,
+				struct ethtool_link_ksettings *link_ksettings)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	struct dpsw_link_state state = {0};
+	int err = 0;
+
+	err = dpsw_if_get_link_state(port_priv->ethsw_data->mc_io, 0,
+				     port_priv->ethsw_data->dpsw_handle,
+				     port_priv->idx,
+				     &state);
+	if (err) {
+		netdev_err(netdev, "ERROR %d getting link state\n", err);
+		goto out;
+	}
+
+	/* At the moment, we have no way of interrogating the DPMAC
+	 * from the DPSW side or there may not exist a DPMAC at all.
+	 * Report only autoneg state, duplexity and speed.
+	 */
+	if (state.options & DPSW_LINK_OPT_AUTONEG)
+		link_ksettings->base.autoneg = AUTONEG_ENABLE;
+	if (!(state.options & DPSW_LINK_OPT_HALF_DUPLEX))
+		link_ksettings->base.duplex = DUPLEX_FULL;
+	link_ksettings->base.speed = state.rate;
+
+out:
+	return err;
+}
+
+static int
+dpaa2_switch_set_link_ksettings(struct net_device *netdev,
+				const struct ethtool_link_ksettings *link_ksettings)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct dpsw_link_cfg cfg = {0};
+	bool if_running;
+	int err = 0, ret;
+
+	/* Interface needs to be down to change link settings */
+	if_running = netif_running(netdev);
+	if (if_running) {
+		err = dpsw_if_disable(ethsw->mc_io, 0,
+				      ethsw->dpsw_handle,
+				      port_priv->idx);
+		if (err) {
+			netdev_err(netdev, "dpsw_if_disable err %d\n", err);
+			return err;
+		}
+	}
+
+	cfg.rate = link_ksettings->base.speed;
+	if (link_ksettings->base.autoneg == AUTONEG_ENABLE)
+		cfg.options |= DPSW_LINK_OPT_AUTONEG;
+	else
+		cfg.options &= ~DPSW_LINK_OPT_AUTONEG;
+	if (link_ksettings->base.duplex  == DUPLEX_HALF)
+		cfg.options |= DPSW_LINK_OPT_HALF_DUPLEX;
+	else
+		cfg.options &= ~DPSW_LINK_OPT_HALF_DUPLEX;
+
+	err = dpsw_if_set_link_cfg(port_priv->ethsw_data->mc_io, 0,
+				   port_priv->ethsw_data->dpsw_handle,
+				   port_priv->idx,
+				   &cfg);
+
+	if (if_running) {
+		ret = dpsw_if_enable(ethsw->mc_io, 0,
+				     ethsw->dpsw_handle,
+				     port_priv->idx);
+		if (ret) {
+			netdev_err(netdev, "dpsw_if_enable err %d\n", ret);
+			return ret;
+		}
+	}
+	return err;
+}
+
+static int dpaa2_switch_ethtool_get_sset_count(struct net_device *dev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_STATS:
+		return DPAA2_SWITCH_NUM_COUNTERS;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void dpaa2_switch_ethtool_get_strings(struct net_device *netdev,
+					     u32 stringset, u8 *data)
+{
+	int i;
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		for (i = 0; i < DPAA2_SWITCH_NUM_COUNTERS; i++)
+			memcpy(data + i * ETH_GSTRING_LEN,
+			       dpaa2_switch_ethtool_counters[i].name,
+			       ETH_GSTRING_LEN);
+		break;
+	}
+}
+
+static void dpaa2_switch_ethtool_get_stats(struct net_device *netdev,
+					   struct ethtool_stats *stats,
+					   u64 *data)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	int i, err;
+
+	for (i = 0; i < DPAA2_SWITCH_NUM_COUNTERS; i++) {
+		err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
+					  port_priv->ethsw_data->dpsw_handle,
+					  port_priv->idx,
+					  dpaa2_switch_ethtool_counters[i].id,
+					  &data[i]);
+		if (err)
+			netdev_err(netdev, "dpsw_if_get_counter[%s] err %d\n",
+				   dpaa2_switch_ethtool_counters[i].name, err);
+	}
+}
+
+const struct ethtool_ops dpaa2_switch_port_ethtool_ops = {
+	.get_drvinfo		= dpaa2_switch_get_drvinfo,
+	.get_link		= ethtool_op_get_link,
+	.get_link_ksettings	= dpaa2_switch_get_link_ksettings,
+	.set_link_ksettings	= dpaa2_switch_set_link_ksettings,
+	.get_strings		= dpaa2_switch_ethtool_get_strings,
+	.get_ethtool_stats	= dpaa2_switch_ethtool_get_stats,
+	.get_sset_count		= dpaa2_switch_ethtool_get_sset_count,
+};
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
new file mode 100644
index 000000000000..2fd05dd18d46
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -0,0 +1,2901 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DPAA2 Ethernet Switch driver
+ *
+ * Copyright 2014-2016 Freescale Semiconductor Inc.
+ * Copyright 2017-2021 NXP
+ *
+ */
+
+#include <linux/module.h>
+
+#include <linux/interrupt.h>
+#include <linux/msi.h>
+#include <linux/kthread.h>
+#include <linux/workqueue.h>
+#include <linux/iommu.h>
+
+#include <linux/fsl/mc.h>
+
+#include "dpaa2-switch.h"
+
+/* Minimal supported DPSW version */
+#define DPSW_MIN_VER_MAJOR		8
+#define DPSW_MIN_VER_MINOR		9
+
+#define DEFAULT_VLAN_ID			1
+
+static u16 dpaa2_switch_port_get_fdb_id(struct ethsw_port_priv *port_priv)
+{
+	return port_priv->fdb->fdb_id;
+}
+
+static struct dpaa2_switch_fdb *dpaa2_switch_fdb_get_unused(struct ethsw_core *ethsw)
+{
+	int i;
+
+	for (i = 0; i < ethsw->sw_attr.num_ifs; i++)
+		if (!ethsw->fdbs[i].in_use)
+			return &ethsw->fdbs[i];
+	return NULL;
+}
+
+static u16 dpaa2_switch_port_set_fdb(struct ethsw_port_priv *port_priv,
+				     struct net_device *bridge_dev)
+{
+	struct ethsw_port_priv *other_port_priv = NULL;
+	struct dpaa2_switch_fdb *fdb;
+	struct net_device *other_dev;
+	struct list_head *iter;
+
+	/* If we leave a bridge (bridge_dev is NULL), find an unused
+	 * FDB and use that.
+	 */
+	if (!bridge_dev) {
+		fdb = dpaa2_switch_fdb_get_unused(port_priv->ethsw_data);
+
+		/* If there is no unused FDB, we must be the last port that
+		 * leaves the last bridge, all the others are standalone. We
+		 * can just keep the FDB that we already have.
+		 */
+
+		if (!fdb) {
+			port_priv->fdb->bridge_dev = NULL;
+			return 0;
+		}
+
+		port_priv->fdb = fdb;
+		port_priv->fdb->in_use = true;
+		port_priv->fdb->bridge_dev = NULL;
+		return 0;
+	}
+
+	/* The below call to netdev_for_each_lower_dev() demands the RTNL lock
+	 * being held. Assert on it so that it's easier to catch new code
+	 * paths that reach this point without the RTNL lock.
+	 */
+	ASSERT_RTNL();
+
+	/* If part of a bridge, use the FDB of the first dpaa2 switch interface
+	 * to be present in that bridge
+	 */
+	netdev_for_each_lower_dev(bridge_dev, other_dev, iter) {
+		if (!dpaa2_switch_port_dev_check(other_dev))
+			continue;
+
+		if (other_dev == port_priv->netdev)
+			continue;
+
+		other_port_priv = netdev_priv(other_dev);
+		break;
+	}
+
+	/* The current port is about to change its FDB to the one used by the
+	 * first port that joined the bridge.
+	 */
+	if (other_port_priv) {
+		/* The previous FDB is about to become unused, since the
+		 * interface is no longer standalone.
+		 */
+		port_priv->fdb->in_use = false;
+		port_priv->fdb->bridge_dev = NULL;
+
+		/* Get a reference to the new FDB */
+		port_priv->fdb = other_port_priv->fdb;
+	}
+
+	/* Keep track of the new upper bridge device */
+	port_priv->fdb->bridge_dev = bridge_dev;
+
+	return 0;
+}
+
+static void *dpaa2_iova_to_virt(struct iommu_domain *domain,
+				dma_addr_t iova_addr)
+{
+	phys_addr_t phys_addr;
+
+	phys_addr = domain ? iommu_iova_to_phys(domain, iova_addr) : iova_addr;
+
+	return phys_to_virt(phys_addr);
+}
+
+static int dpaa2_switch_add_vlan(struct ethsw_port_priv *port_priv, u16 vid)
+{
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct dpsw_vlan_cfg vcfg = {0};
+	int err;
+
+	vcfg.fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
+	err = dpsw_vlan_add(ethsw->mc_io, 0,
+			    ethsw->dpsw_handle, vid, &vcfg);
+	if (err) {
+		dev_err(ethsw->dev, "dpsw_vlan_add err %d\n", err);
+		return err;
+	}
+	ethsw->vlans[vid] = ETHSW_VLAN_MEMBER;
+
+	return 0;
+}
+
+static bool dpaa2_switch_port_is_up(struct ethsw_port_priv *port_priv)
+{
+	struct net_device *netdev = port_priv->netdev;
+	struct dpsw_link_state state;
+	int err;
+
+	err = dpsw_if_get_link_state(port_priv->ethsw_data->mc_io, 0,
+				     port_priv->ethsw_data->dpsw_handle,
+				     port_priv->idx, &state);
+	if (err) {
+		netdev_err(netdev, "dpsw_if_get_link_state() err %d\n", err);
+		return true;
+	}
+
+	WARN_ONCE(state.up > 1, "Garbage read into link_state");
+
+	return state.up ? true : false;
+}
+
+static int dpaa2_switch_port_set_pvid(struct ethsw_port_priv *port_priv, u16 pvid)
+{
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct net_device *netdev = port_priv->netdev;
+	struct dpsw_tci_cfg tci_cfg = { 0 };
+	bool up;
+	int err, ret;
+
+	err = dpsw_if_get_tci(ethsw->mc_io, 0, ethsw->dpsw_handle,
+			      port_priv->idx, &tci_cfg);
+	if (err) {
+		netdev_err(netdev, "dpsw_if_get_tci err %d\n", err);
+		return err;
+	}
+
+	tci_cfg.vlan_id = pvid;
+
+	/* Interface needs to be down to change PVID */
+	up = dpaa2_switch_port_is_up(port_priv);
+	if (up) {
+		err = dpsw_if_disable(ethsw->mc_io, 0,
+				      ethsw->dpsw_handle,
+				      port_priv->idx);
+		if (err) {
+			netdev_err(netdev, "dpsw_if_disable err %d\n", err);
+			return err;
+		}
+	}
+
+	err = dpsw_if_set_tci(ethsw->mc_io, 0, ethsw->dpsw_handle,
+			      port_priv->idx, &tci_cfg);
+	if (err) {
+		netdev_err(netdev, "dpsw_if_set_tci err %d\n", err);
+		goto set_tci_error;
+	}
+
+	/* Delete previous PVID info and mark the new one */
+	port_priv->vlans[port_priv->pvid] &= ~ETHSW_VLAN_PVID;
+	port_priv->vlans[pvid] |= ETHSW_VLAN_PVID;
+	port_priv->pvid = pvid;
+
+set_tci_error:
+	if (up) {
+		ret = dpsw_if_enable(ethsw->mc_io, 0,
+				     ethsw->dpsw_handle,
+				     port_priv->idx);
+		if (ret) {
+			netdev_err(netdev, "dpsw_if_enable err %d\n", ret);
+			return ret;
+		}
+	}
+
+	return err;
+}
+
+static int dpaa2_switch_port_add_vlan(struct ethsw_port_priv *port_priv,
+				      u16 vid, u16 flags)
+{
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct net_device *netdev = port_priv->netdev;
+	struct dpsw_vlan_if_cfg vcfg = {0};
+	int err;
+
+	if (port_priv->vlans[vid]) {
+		netdev_warn(netdev, "VLAN %d already configured\n", vid);
+		return -EEXIST;
+	}
+
+	/* If hit, this VLAN rule will lead the packet into the FDB table
+	 * specified in the vlan configuration below
+	 */
+	vcfg.num_ifs = 1;
+	vcfg.if_id[0] = port_priv->idx;
+	vcfg.fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
+	vcfg.options |= DPSW_VLAN_ADD_IF_OPT_FDB_ID;
+	err = dpsw_vlan_add_if(ethsw->mc_io, 0, ethsw->dpsw_handle, vid, &vcfg);
+	if (err) {
+		netdev_err(netdev, "dpsw_vlan_add_if err %d\n", err);
+		return err;
+	}
+
+	port_priv->vlans[vid] = ETHSW_VLAN_MEMBER;
+
+	if (flags & BRIDGE_VLAN_INFO_UNTAGGED) {
+		err = dpsw_vlan_add_if_untagged(ethsw->mc_io, 0,
+						ethsw->dpsw_handle,
+						vid, &vcfg);
+		if (err) {
+			netdev_err(netdev,
+				   "dpsw_vlan_add_if_untagged err %d\n", err);
+			return err;
+		}
+		port_priv->vlans[vid] |= ETHSW_VLAN_UNTAGGED;
+	}
+
+	if (flags & BRIDGE_VLAN_INFO_PVID) {
+		err = dpaa2_switch_port_set_pvid(port_priv, vid);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int dpaa2_switch_port_set_stp_state(struct ethsw_port_priv *port_priv, u8 state)
+{
+	struct dpsw_stp_cfg stp_cfg = {
+		.state = state,
+	};
+	int err;
+	u16 vid;
+
+	if (!netif_running(port_priv->netdev) || state == port_priv->stp_state)
+		return 0;	/* Nothing to do */
+
+	for (vid = 0; vid <= VLAN_VID_MASK; vid++) {
+		if (port_priv->vlans[vid] & ETHSW_VLAN_MEMBER) {
+			stp_cfg.vlan_id = vid;
+			err = dpsw_if_set_stp(port_priv->ethsw_data->mc_io, 0,
+					      port_priv->ethsw_data->dpsw_handle,
+					      port_priv->idx, &stp_cfg);
+			if (err) {
+				netdev_err(port_priv->netdev,
+					   "dpsw_if_set_stp err %d\n", err);
+				return err;
+			}
+		}
+	}
+
+	port_priv->stp_state = state;
+
+	return 0;
+}
+
+static int dpaa2_switch_dellink(struct ethsw_core *ethsw, u16 vid)
+{
+	struct ethsw_port_priv *ppriv_local = NULL;
+	int i, err;
+
+	if (!ethsw->vlans[vid])
+		return -ENOENT;
+
+	err = dpsw_vlan_remove(ethsw->mc_io, 0, ethsw->dpsw_handle, vid);
+	if (err) {
+		dev_err(ethsw->dev, "dpsw_vlan_remove err %d\n", err);
+		return err;
+	}
+	ethsw->vlans[vid] = 0;
+
+	for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
+		ppriv_local = ethsw->ports[i];
+		ppriv_local->vlans[vid] = 0;
+	}
+
+	return 0;
+}
+
+static int dpaa2_switch_port_fdb_add_uc(struct ethsw_port_priv *port_priv,
+					const unsigned char *addr)
+{
+	struct dpsw_fdb_unicast_cfg entry = {0};
+	u16 fdb_id;
+	int err;
+
+	entry.if_egress = port_priv->idx;
+	entry.type = DPSW_FDB_ENTRY_STATIC;
+	ether_addr_copy(entry.mac_addr, addr);
+
+	fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
+	err = dpsw_fdb_add_unicast(port_priv->ethsw_data->mc_io, 0,
+				   port_priv->ethsw_data->dpsw_handle,
+				   fdb_id, &entry);
+	if (err)
+		netdev_err(port_priv->netdev,
+			   "dpsw_fdb_add_unicast err %d\n", err);
+	return err;
+}
+
+static int dpaa2_switch_port_fdb_del_uc(struct ethsw_port_priv *port_priv,
+					const unsigned char *addr)
+{
+	struct dpsw_fdb_unicast_cfg entry = {0};
+	u16 fdb_id;
+	int err;
+
+	entry.if_egress = port_priv->idx;
+	entry.type = DPSW_FDB_ENTRY_STATIC;
+	ether_addr_copy(entry.mac_addr, addr);
+
+	fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
+	err = dpsw_fdb_remove_unicast(port_priv->ethsw_data->mc_io, 0,
+				      port_priv->ethsw_data->dpsw_handle,
+				      fdb_id, &entry);
+	/* Silently discard error for calling multiple times the del command */
+	if (err && err != -ENXIO)
+		netdev_err(port_priv->netdev,
+			   "dpsw_fdb_remove_unicast err %d\n", err);
+	return err;
+}
+
+static int dpaa2_switch_port_fdb_add_mc(struct ethsw_port_priv *port_priv,
+					const unsigned char *addr)
+{
+	struct dpsw_fdb_multicast_cfg entry = {0};
+	u16 fdb_id;
+	int err;
+
+	ether_addr_copy(entry.mac_addr, addr);
+	entry.type = DPSW_FDB_ENTRY_STATIC;
+	entry.num_ifs = 1;
+	entry.if_id[0] = port_priv->idx;
+
+	fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
+	err = dpsw_fdb_add_multicast(port_priv->ethsw_data->mc_io, 0,
+				     port_priv->ethsw_data->dpsw_handle,
+				     fdb_id, &entry);
+	/* Silently discard error for calling multiple times the add command */
+	if (err && err != -ENXIO)
+		netdev_err(port_priv->netdev, "dpsw_fdb_add_multicast err %d\n",
+			   err);
+	return err;
+}
+
+static int dpaa2_switch_port_fdb_del_mc(struct ethsw_port_priv *port_priv,
+					const unsigned char *addr)
+{
+	struct dpsw_fdb_multicast_cfg entry = {0};
+	u16 fdb_id;
+	int err;
+
+	ether_addr_copy(entry.mac_addr, addr);
+	entry.type = DPSW_FDB_ENTRY_STATIC;
+	entry.num_ifs = 1;
+	entry.if_id[0] = port_priv->idx;
+
+	fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
+	err = dpsw_fdb_remove_multicast(port_priv->ethsw_data->mc_io, 0,
+					port_priv->ethsw_data->dpsw_handle,
+					fdb_id, &entry);
+	/* Silently discard error for calling multiple times the del command */
+	if (err && err != -ENAVAIL)
+		netdev_err(port_priv->netdev,
+			   "dpsw_fdb_remove_multicast err %d\n", err);
+	return err;
+}
+
+static void dpaa2_switch_port_get_stats(struct net_device *netdev,
+					struct rtnl_link_stats64 *stats)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	u64 tmp;
+	int err;
+
+	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
+				  port_priv->ethsw_data->dpsw_handle,
+				  port_priv->idx,
+				  DPSW_CNT_ING_FRAME, &stats->rx_packets);
+	if (err)
+		goto error;
+
+	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
+				  port_priv->ethsw_data->dpsw_handle,
+				  port_priv->idx,
+				  DPSW_CNT_EGR_FRAME, &stats->tx_packets);
+	if (err)
+		goto error;
+
+	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
+				  port_priv->ethsw_data->dpsw_handle,
+				  port_priv->idx,
+				  DPSW_CNT_ING_BYTE, &stats->rx_bytes);
+	if (err)
+		goto error;
+
+	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
+				  port_priv->ethsw_data->dpsw_handle,
+				  port_priv->idx,
+				  DPSW_CNT_EGR_BYTE, &stats->tx_bytes);
+	if (err)
+		goto error;
+
+	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
+				  port_priv->ethsw_data->dpsw_handle,
+				  port_priv->idx,
+				  DPSW_CNT_ING_FRAME_DISCARD,
+				  &stats->rx_dropped);
+	if (err)
+		goto error;
+
+	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
+				  port_priv->ethsw_data->dpsw_handle,
+				  port_priv->idx,
+				  DPSW_CNT_ING_FLTR_FRAME,
+				  &tmp);
+	if (err)
+		goto error;
+	stats->rx_dropped += tmp;
+
+	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
+				  port_priv->ethsw_data->dpsw_handle,
+				  port_priv->idx,
+				  DPSW_CNT_EGR_FRAME_DISCARD,
+				  &stats->tx_dropped);
+	if (err)
+		goto error;
+
+	return;
+
+error:
+	netdev_err(netdev, "dpsw_if_get_counter err %d\n", err);
+}
+
+static bool dpaa2_switch_port_has_offload_stats(const struct net_device *netdev,
+						int attr_id)
+{
+	return (attr_id == IFLA_OFFLOAD_XSTATS_CPU_HIT);
+}
+
+static int dpaa2_switch_port_get_offload_stats(int attr_id,
+					       const struct net_device *netdev,
+					       void *sp)
+{
+	switch (attr_id) {
+	case IFLA_OFFLOAD_XSTATS_CPU_HIT:
+		dpaa2_switch_port_get_stats((struct net_device *)netdev, sp);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int dpaa2_switch_port_change_mtu(struct net_device *netdev, int mtu)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	int err;
+
+	err = dpsw_if_set_max_frame_length(port_priv->ethsw_data->mc_io,
+					   0,
+					   port_priv->ethsw_data->dpsw_handle,
+					   port_priv->idx,
+					   (u16)ETHSW_L2_MAX_FRM(mtu));
+	if (err) {
+		netdev_err(netdev,
+			   "dpsw_if_set_max_frame_length() err %d\n", err);
+		return err;
+	}
+
+	netdev->mtu = mtu;
+	return 0;
+}
+
+static int dpaa2_switch_port_carrier_state_sync(struct net_device *netdev)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	struct dpsw_link_state state;
+	int err;
+
+	/* Interrupts are received even though no one issued an 'ifconfig up'
+	 * on the switch interface. Ignore these link state update interrupts
+	 */
+	if (!netif_running(netdev))
+		return 0;
+
+	err = dpsw_if_get_link_state(port_priv->ethsw_data->mc_io, 0,
+				     port_priv->ethsw_data->dpsw_handle,
+				     port_priv->idx, &state);
+	if (err) {
+		netdev_err(netdev, "dpsw_if_get_link_state() err %d\n", err);
+		return err;
+	}
+
+	WARN_ONCE(state.up > 1, "Garbage read into link_state");
+
+	if (state.up != port_priv->link_state) {
+		if (state.up) {
+			netif_carrier_on(netdev);
+			netif_tx_start_all_queues(netdev);
+		} else {
+			netif_carrier_off(netdev);
+			netif_tx_stop_all_queues(netdev);
+		}
+		port_priv->link_state = state.up;
+	}
+
+	return 0;
+}
+
+/* Manage all NAPI instances for the control interface.
+ *
+ * We only have one RX queue and one Tx Conf queue for all
+ * switch ports. Therefore, we only need to enable the NAPI instance once, the
+ * first time one of the switch ports runs .dev_open().
+ */
+
+static void dpaa2_switch_enable_ctrl_if_napi(struct ethsw_core *ethsw)
+{
+	int i;
+
+	/* Access to the ethsw->napi_users relies on the RTNL lock */
+	ASSERT_RTNL();
+
+	/* a new interface is using the NAPI instance */
+	ethsw->napi_users++;
+
+	/* if there is already a user of the instance, return */
+	if (ethsw->napi_users > 1)
+		return;
+
+	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++)
+		napi_enable(&ethsw->fq[i].napi);
+}
+
+static void dpaa2_switch_disable_ctrl_if_napi(struct ethsw_core *ethsw)
+{
+	int i;
+
+	/* Access to the ethsw->napi_users relies on the RTNL lock */
+	ASSERT_RTNL();
+
+	/* If we are not the last interface using the NAPI, return */
+	ethsw->napi_users--;
+	if (ethsw->napi_users)
+		return;
+
+	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++)
+		napi_disable(&ethsw->fq[i].napi);
+}
+
+static int dpaa2_switch_port_open(struct net_device *netdev)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	int err;
+
+	/* Explicitly set carrier off, otherwise
+	 * netif_carrier_ok() will return true and cause 'ip link show'
+	 * to report the LOWER_UP flag, even though the link
+	 * notification wasn't even received.
+	 */
+	netif_carrier_off(netdev);
+
+	err = dpsw_if_enable(port_priv->ethsw_data->mc_io, 0,
+			     port_priv->ethsw_data->dpsw_handle,
+			     port_priv->idx);
+	if (err) {
+		netdev_err(netdev, "dpsw_if_enable err %d\n", err);
+		return err;
+	}
+
+	/* sync carrier state */
+	err = dpaa2_switch_port_carrier_state_sync(netdev);
+	if (err) {
+		netdev_err(netdev,
+			   "dpaa2_switch_port_carrier_state_sync err %d\n", err);
+		goto err_carrier_sync;
+	}
+
+	dpaa2_switch_enable_ctrl_if_napi(ethsw);
+
+	return 0;
+
+err_carrier_sync:
+	dpsw_if_disable(port_priv->ethsw_data->mc_io, 0,
+			port_priv->ethsw_data->dpsw_handle,
+			port_priv->idx);
+	return err;
+}
+
+static int dpaa2_switch_port_stop(struct net_device *netdev)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	int err;
+
+	err = dpsw_if_disable(port_priv->ethsw_data->mc_io, 0,
+			      port_priv->ethsw_data->dpsw_handle,
+			      port_priv->idx);
+	if (err) {
+		netdev_err(netdev, "dpsw_if_disable err %d\n", err);
+		return err;
+	}
+
+	dpaa2_switch_disable_ctrl_if_napi(ethsw);
+
+	return 0;
+}
+
+static int dpaa2_switch_port_parent_id(struct net_device *dev,
+				       struct netdev_phys_item_id *ppid)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(dev);
+
+	ppid->id_len = 1;
+	ppid->id[0] = port_priv->ethsw_data->dev_id;
+
+	return 0;
+}
+
+static int dpaa2_switch_port_get_phys_name(struct net_device *netdev, char *name,
+					   size_t len)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	int err;
+
+	err = snprintf(name, len, "p%d", port_priv->idx);
+	if (err >= len)
+		return -EINVAL;
+
+	return 0;
+}
+
+struct ethsw_dump_ctx {
+	struct net_device *dev;
+	struct sk_buff *skb;
+	struct netlink_callback *cb;
+	int idx;
+};
+
+static int dpaa2_switch_fdb_dump_nl(struct fdb_dump_entry *entry,
+				    struct ethsw_dump_ctx *dump)
+{
+	int is_dynamic = entry->type & DPSW_FDB_ENTRY_DINAMIC;
+	u32 portid = NETLINK_CB(dump->cb->skb).portid;
+	u32 seq = dump->cb->nlh->nlmsg_seq;
+	struct nlmsghdr *nlh;
+	struct ndmsg *ndm;
+
+	if (dump->idx < dump->cb->args[2])
+		goto skip;
+
+	nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
+			sizeof(*ndm), NLM_F_MULTI);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	ndm = nlmsg_data(nlh);
+	ndm->ndm_family  = AF_BRIDGE;
+	ndm->ndm_pad1    = 0;
+	ndm->ndm_pad2    = 0;
+	ndm->ndm_flags   = NTF_SELF;
+	ndm->ndm_type    = 0;
+	ndm->ndm_ifindex = dump->dev->ifindex;
+	ndm->ndm_state   = is_dynamic ? NUD_REACHABLE : NUD_NOARP;
+
+	if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, entry->mac_addr))
+		goto nla_put_failure;
+
+	nlmsg_end(dump->skb, nlh);
+
+skip:
+	dump->idx++;
+	return 0;
+
+nla_put_failure:
+	nlmsg_cancel(dump->skb, nlh);
+	return -EMSGSIZE;
+}
+
+static int dpaa2_switch_port_fdb_valid_entry(struct fdb_dump_entry *entry,
+					     struct ethsw_port_priv *port_priv)
+{
+	int idx = port_priv->idx;
+	int valid;
+
+	if (entry->type & DPSW_FDB_ENTRY_TYPE_UNICAST)
+		valid = entry->if_info == port_priv->idx;
+	else
+		valid = entry->if_mask[idx / 8] & BIT(idx % 8);
+
+	return valid;
+}
+
+static int dpaa2_switch_fdb_iterate(struct ethsw_port_priv *port_priv,
+				    dpaa2_switch_fdb_cb_t cb, void *data)
+{
+	struct net_device *net_dev = port_priv->netdev;
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct device *dev = net_dev->dev.parent;
+	struct fdb_dump_entry *fdb_entries;
+	struct fdb_dump_entry fdb_entry;
+	dma_addr_t fdb_dump_iova;
+	u16 num_fdb_entries;
+	u32 fdb_dump_size;
+	int err = 0, i;
+	u8 *dma_mem;
+	u16 fdb_id;
+
+	fdb_dump_size = ethsw->sw_attr.max_fdb_entries * sizeof(fdb_entry);
+	dma_mem = kzalloc(fdb_dump_size, GFP_KERNEL);
+	if (!dma_mem)
+		return -ENOMEM;
+
+	fdb_dump_iova = dma_map_single(dev, dma_mem, fdb_dump_size,
+				       DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev, fdb_dump_iova)) {
+		netdev_err(net_dev, "dma_map_single() failed\n");
+		err = -ENOMEM;
+		goto err_map;
+	}
+
+	fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
+	err = dpsw_fdb_dump(ethsw->mc_io, 0, ethsw->dpsw_handle, fdb_id,
+			    fdb_dump_iova, fdb_dump_size, &num_fdb_entries);
+	if (err) {
+		netdev_err(net_dev, "dpsw_fdb_dump() = %d\n", err);
+		goto err_dump;
+	}
+
+	dma_unmap_single(dev, fdb_dump_iova, fdb_dump_size, DMA_FROM_DEVICE);
+
+	fdb_entries = (struct fdb_dump_entry *)dma_mem;
+	for (i = 0; i < num_fdb_entries; i++) {
+		fdb_entry = fdb_entries[i];
+
+		err = cb(port_priv, &fdb_entry, data);
+		if (err)
+			goto end;
+	}
+
+end:
+	kfree(dma_mem);
+
+	return 0;
+
+err_dump:
+	dma_unmap_single(dev, fdb_dump_iova, fdb_dump_size, DMA_TO_DEVICE);
+err_map:
+	kfree(dma_mem);
+	return err;
+}
+
+static int dpaa2_switch_fdb_entry_dump(struct ethsw_port_priv *port_priv,
+				       struct fdb_dump_entry *fdb_entry,
+				       void *data)
+{
+	if (!dpaa2_switch_port_fdb_valid_entry(fdb_entry, port_priv))
+		return 0;
+
+	return dpaa2_switch_fdb_dump_nl(fdb_entry, data);
+}
+
+static int dpaa2_switch_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
+				      struct net_device *net_dev,
+				      struct net_device *filter_dev, int *idx)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(net_dev);
+	struct ethsw_dump_ctx dump = {
+		.dev = net_dev,
+		.skb = skb,
+		.cb = cb,
+		.idx = *idx,
+	};
+	int err;
+
+	err = dpaa2_switch_fdb_iterate(port_priv, dpaa2_switch_fdb_entry_dump, &dump);
+	*idx = dump.idx;
+
+	return err;
+}
+
+static int dpaa2_switch_fdb_entry_fast_age(struct ethsw_port_priv *port_priv,
+					   struct fdb_dump_entry *fdb_entry,
+					   void *data __always_unused)
+{
+	if (!dpaa2_switch_port_fdb_valid_entry(fdb_entry, port_priv))
+		return 0;
+
+	if (!(fdb_entry->type & DPSW_FDB_ENTRY_TYPE_DYNAMIC))
+		return 0;
+
+	if (fdb_entry->type & DPSW_FDB_ENTRY_TYPE_UNICAST)
+		dpaa2_switch_port_fdb_del_uc(port_priv, fdb_entry->mac_addr);
+	else
+		dpaa2_switch_port_fdb_del_mc(port_priv, fdb_entry->mac_addr);
+
+	return 0;
+}
+
+static void dpaa2_switch_port_fast_age(struct ethsw_port_priv *port_priv)
+{
+	dpaa2_switch_fdb_iterate(port_priv,
+				 dpaa2_switch_fdb_entry_fast_age, NULL);
+}
+
+static int dpaa2_switch_port_vlan_add(struct net_device *netdev, __be16 proto,
+				      u16 vid)
+{
+	struct switchdev_obj_port_vlan vlan = {
+		.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
+		.vid = vid,
+		.obj.orig_dev = netdev,
+		/* This API only allows programming tagged, non-PVID VIDs */
+		.flags = 0,
+	};
+
+	return dpaa2_switch_port_vlans_add(netdev, &vlan);
+}
+
+static int dpaa2_switch_port_vlan_kill(struct net_device *netdev, __be16 proto,
+				       u16 vid)
+{
+	struct switchdev_obj_port_vlan vlan = {
+		.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
+		.vid = vid,
+		.obj.orig_dev = netdev,
+		/* This API only allows programming tagged, non-PVID VIDs */
+		.flags = 0,
+	};
+
+	return dpaa2_switch_port_vlans_del(netdev, &vlan);
+}
+
+static int dpaa2_switch_port_set_mac_addr(struct ethsw_port_priv *port_priv)
+{
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct net_device *net_dev = port_priv->netdev;
+	struct device *dev = net_dev->dev.parent;
+	u8 mac_addr[ETH_ALEN];
+	int err;
+
+	if (!(ethsw->features & ETHSW_FEATURE_MAC_ADDR))
+		return 0;
+
+	/* Get firmware address, if any */
+	err = dpsw_if_get_port_mac_addr(ethsw->mc_io, 0, ethsw->dpsw_handle,
+					port_priv->idx, mac_addr);
+	if (err) {
+		dev_err(dev, "dpsw_if_get_port_mac_addr() failed\n");
+		return err;
+	}
+
+	/* First check if firmware has any address configured by bootloader */
+	if (!is_zero_ether_addr(mac_addr)) {
+		memcpy(net_dev->dev_addr, mac_addr, net_dev->addr_len);
+	} else {
+		/* No MAC address configured, fill in net_dev->dev_addr
+		 * with a random one
+		 */
+		eth_hw_addr_random(net_dev);
+		dev_dbg_once(dev, "device(s) have all-zero hwaddr, replaced with random\n");
+
+		/* Override NET_ADDR_RANDOM set by eth_hw_addr_random(); for all
+		 * practical purposes, this will be our "permanent" mac address,
+		 * at least until the next reboot. This move will also permit
+		 * register_netdevice() to properly fill up net_dev->perm_addr.
+		 */
+		net_dev->addr_assign_type = NET_ADDR_PERM;
+	}
+
+	return 0;
+}
+
+static void dpaa2_switch_free_fd(const struct ethsw_core *ethsw,
+				 const struct dpaa2_fd *fd)
+{
+	struct device *dev = ethsw->dev;
+	unsigned char *buffer_start;
+	struct sk_buff **skbh, *skb;
+	dma_addr_t fd_addr;
+
+	fd_addr = dpaa2_fd_get_addr(fd);
+	skbh = dpaa2_iova_to_virt(ethsw->iommu_domain, fd_addr);
+
+	skb = *skbh;
+	buffer_start = (unsigned char *)skbh;
+
+	dma_unmap_single(dev, fd_addr,
+			 skb_tail_pointer(skb) - buffer_start,
+			 DMA_TO_DEVICE);
+
+	/* Move on with skb release */
+	dev_kfree_skb(skb);
+}
+
+static int dpaa2_switch_build_single_fd(struct ethsw_core *ethsw,
+					struct sk_buff *skb,
+					struct dpaa2_fd *fd)
+{
+	struct device *dev = ethsw->dev;
+	struct sk_buff **skbh;
+	dma_addr_t addr;
+	u8 *buff_start;
+	void *hwa;
+
+	buff_start = PTR_ALIGN(skb->data - DPAA2_SWITCH_TX_DATA_OFFSET -
+			       DPAA2_SWITCH_TX_BUF_ALIGN,
+			       DPAA2_SWITCH_TX_BUF_ALIGN);
+
+	/* Clear FAS to have consistent values for TX confirmation. It is
+	 * located in the first 8 bytes of the buffer's hardware annotation
+	 * area
+	 */
+	hwa = buff_start + DPAA2_SWITCH_SWA_SIZE;
+	memset(hwa, 0, 8);
+
+	/* Store a backpointer to the skb at the beginning of the buffer
+	 * (in the private data area) such that we can release it
+	 * on Tx confirm
+	 */
+	skbh = (struct sk_buff **)buff_start;
+	*skbh = skb;
+
+	addr = dma_map_single(dev, buff_start,
+			      skb_tail_pointer(skb) - buff_start,
+			      DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(dev, addr)))
+		return -ENOMEM;
+
+	/* Setup the FD fields */
+	memset(fd, 0, sizeof(*fd));
+
+	dpaa2_fd_set_addr(fd, addr);
+	dpaa2_fd_set_offset(fd, (u16)(skb->data - buff_start));
+	dpaa2_fd_set_len(fd, skb->len);
+	dpaa2_fd_set_format(fd, dpaa2_fd_single);
+
+	return 0;
+}
+
+static netdev_tx_t dpaa2_switch_port_tx(struct sk_buff *skb,
+					struct net_device *net_dev)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(net_dev);
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	int retries = DPAA2_SWITCH_SWP_BUSY_RETRIES;
+	struct dpaa2_fd fd;
+	int err;
+
+	if (unlikely(skb_headroom(skb) < DPAA2_SWITCH_NEEDED_HEADROOM)) {
+		struct sk_buff *ns;
+
+		ns = skb_realloc_headroom(skb, DPAA2_SWITCH_NEEDED_HEADROOM);
+		if (unlikely(!ns)) {
+			net_err_ratelimited("%s: Error reallocating skb headroom\n", net_dev->name);
+			goto err_free_skb;
+		}
+		dev_consume_skb_any(skb);
+		skb = ns;
+	}
+
+	/* We'll be holding a back-reference to the skb until Tx confirmation */
+	skb = skb_unshare(skb, GFP_ATOMIC);
+	if (unlikely(!skb)) {
+		/* skb_unshare() has already freed the skb */
+		net_err_ratelimited("%s: Error copying the socket buffer\n", net_dev->name);
+		goto err_exit;
+	}
+
+	/* At this stage, we do not support non-linear skbs so just try to
+	 * linearize the skb and if that's not working, just drop the packet.
+	 */
+	err = skb_linearize(skb);
+	if (err) {
+		net_err_ratelimited("%s: skb_linearize error (%d)!\n", net_dev->name, err);
+		goto err_free_skb;
+	}
+
+	err = dpaa2_switch_build_single_fd(ethsw, skb, &fd);
+	if (unlikely(err)) {
+		net_err_ratelimited("%s: ethsw_build_*_fd() %d\n", net_dev->name, err);
+		goto err_free_skb;
+	}
+
+	do {
+		err = dpaa2_io_service_enqueue_qd(NULL,
+						  port_priv->tx_qdid,
+						  8, 0, &fd);
+		retries--;
+	} while (err == -EBUSY && retries);
+
+	if (unlikely(err < 0)) {
+		dpaa2_switch_free_fd(ethsw, &fd);
+		goto err_exit;
+	}
+
+	return NETDEV_TX_OK;
+
+err_free_skb:
+	dev_kfree_skb(skb);
+err_exit:
+	return NETDEV_TX_OK;
+}
+
+static const struct net_device_ops dpaa2_switch_port_ops = {
+	.ndo_open		= dpaa2_switch_port_open,
+	.ndo_stop		= dpaa2_switch_port_stop,
+
+	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_get_stats64	= dpaa2_switch_port_get_stats,
+	.ndo_change_mtu		= dpaa2_switch_port_change_mtu,
+	.ndo_has_offload_stats	= dpaa2_switch_port_has_offload_stats,
+	.ndo_get_offload_stats	= dpaa2_switch_port_get_offload_stats,
+	.ndo_fdb_dump		= dpaa2_switch_port_fdb_dump,
+	.ndo_vlan_rx_add_vid	= dpaa2_switch_port_vlan_add,
+	.ndo_vlan_rx_kill_vid	= dpaa2_switch_port_vlan_kill,
+
+	.ndo_start_xmit		= dpaa2_switch_port_tx,
+	.ndo_get_port_parent_id	= dpaa2_switch_port_parent_id,
+	.ndo_get_phys_port_name = dpaa2_switch_port_get_phys_name,
+};
+
+bool dpaa2_switch_port_dev_check(const struct net_device *netdev)
+{
+	return netdev->netdev_ops == &dpaa2_switch_port_ops;
+}
+
+static void dpaa2_switch_links_state_update(struct ethsw_core *ethsw)
+{
+	int i;
+
+	for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
+		dpaa2_switch_port_carrier_state_sync(ethsw->ports[i]->netdev);
+		dpaa2_switch_port_set_mac_addr(ethsw->ports[i]);
+	}
+}
+
+static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg)
+{
+	struct device *dev = (struct device *)arg;
+	struct ethsw_core *ethsw = dev_get_drvdata(dev);
+
+	/* Mask the events and the if_id reserved bits to be cleared on read */
+	u32 status = DPSW_IRQ_EVENT_LINK_CHANGED | 0xFFFF0000;
+	int err;
+
+	err = dpsw_get_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				  DPSW_IRQ_INDEX_IF, &status);
+	if (err) {
+		dev_err(dev, "Can't get irq status (err %d)\n", err);
+
+		err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle,
+					    DPSW_IRQ_INDEX_IF, 0xFFFFFFFF);
+		if (err)
+			dev_err(dev, "Can't clear irq status (err %d)\n", err);
+		goto out;
+	}
+
+	if (status & DPSW_IRQ_EVENT_LINK_CHANGED)
+		dpaa2_switch_links_state_update(ethsw);
+
+out:
+	return IRQ_HANDLED;
+}
+
+static int dpaa2_switch_setup_irqs(struct fsl_mc_device *sw_dev)
+{
+	struct device *dev = &sw_dev->dev;
+	struct ethsw_core *ethsw = dev_get_drvdata(dev);
+	u32 mask = DPSW_IRQ_EVENT_LINK_CHANGED;
+	struct fsl_mc_device_irq *irq;
+	int err;
+
+	err = fsl_mc_allocate_irqs(sw_dev);
+	if (err) {
+		dev_err(dev, "MC irqs allocation failed\n");
+		return err;
+	}
+
+	if (WARN_ON(sw_dev->obj_desc.irq_count != DPSW_IRQ_NUM)) {
+		err = -EINVAL;
+		goto free_irq;
+	}
+
+	err = dpsw_set_irq_enable(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				  DPSW_IRQ_INDEX_IF, 0);
+	if (err) {
+		dev_err(dev, "dpsw_set_irq_enable err %d\n", err);
+		goto free_irq;
+	}
+
+	irq = sw_dev->irqs[DPSW_IRQ_INDEX_IF];
+
+	err = devm_request_threaded_irq(dev, irq->msi_desc->irq,
+					NULL,
+					dpaa2_switch_irq0_handler_thread,
+					IRQF_NO_SUSPEND | IRQF_ONESHOT,
+					dev_name(dev), dev);
+	if (err) {
+		dev_err(dev, "devm_request_threaded_irq(): %d\n", err);
+		goto free_irq;
+	}
+
+	err = dpsw_set_irq_mask(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				DPSW_IRQ_INDEX_IF, mask);
+	if (err) {
+		dev_err(dev, "dpsw_set_irq_mask(): %d\n", err);
+		goto free_devm_irq;
+	}
+
+	err = dpsw_set_irq_enable(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				  DPSW_IRQ_INDEX_IF, 1);
+	if (err) {
+		dev_err(dev, "dpsw_set_irq_enable(): %d\n", err);
+		goto free_devm_irq;
+	}
+
+	return 0;
+
+free_devm_irq:
+	devm_free_irq(dev, irq->msi_desc->irq, dev);
+free_irq:
+	fsl_mc_free_irqs(sw_dev);
+	return err;
+}
+
+static void dpaa2_switch_teardown_irqs(struct fsl_mc_device *sw_dev)
+{
+	struct device *dev = &sw_dev->dev;
+	struct ethsw_core *ethsw = dev_get_drvdata(dev);
+	int err;
+
+	err = dpsw_set_irq_enable(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				  DPSW_IRQ_INDEX_IF, 0);
+	if (err)
+		dev_err(dev, "dpsw_set_irq_enable err %d\n", err);
+
+	fsl_mc_free_irqs(sw_dev);
+}
+
+static int dpaa2_switch_port_attr_stp_state_set(struct net_device *netdev,
+						u8 state)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+
+	return dpaa2_switch_port_set_stp_state(port_priv, state);
+}
+
+static int dpaa2_switch_port_attr_set(struct net_device *netdev,
+				      const struct switchdev_attr *attr,
+				      struct netlink_ext_ack *extack)
+{
+	int err = 0;
+
+	switch (attr->id) {
+	case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
+		err = dpaa2_switch_port_attr_stp_state_set(netdev,
+							   attr->u.stp_state);
+		break;
+	case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
+		if (!attr->u.vlan_filtering) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "The DPAA2 switch does not support VLAN-unaware operation");
+			return -EOPNOTSUPP;
+		}
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+	return err;
+}
+
+int dpaa2_switch_port_vlans_add(struct net_device *netdev,
+				const struct switchdev_obj_port_vlan *vlan)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct dpsw_attr *attr = &ethsw->sw_attr;
+	int err = 0;
+
+	/* Make sure that the VLAN is not already configured
+	 * on the switch port
+	 */
+	if (port_priv->vlans[vlan->vid] & ETHSW_VLAN_MEMBER)
+		return -EEXIST;
+
+	/* Check if there is space for a new VLAN */
+	err = dpsw_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				  &ethsw->sw_attr);
+	if (err) {
+		netdev_err(netdev, "dpsw_get_attributes err %d\n", err);
+		return err;
+	}
+	if (attr->max_vlans - attr->num_vlans < 1)
+		return -ENOSPC;
+
+	/* Check if there is space for a new VLAN */
+	err = dpsw_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				  &ethsw->sw_attr);
+	if (err) {
+		netdev_err(netdev, "dpsw_get_attributes err %d\n", err);
+		return err;
+	}
+	if (attr->max_vlans - attr->num_vlans < 1)
+		return -ENOSPC;
+
+	if (!port_priv->ethsw_data->vlans[vlan->vid]) {
+		/* this is a new VLAN */
+		err = dpaa2_switch_add_vlan(port_priv, vlan->vid);
+		if (err)
+			return err;
+
+		port_priv->ethsw_data->vlans[vlan->vid] |= ETHSW_VLAN_GLOBAL;
+	}
+
+	return dpaa2_switch_port_add_vlan(port_priv, vlan->vid, vlan->flags);
+}
+
+static int dpaa2_switch_port_lookup_address(struct net_device *netdev, int is_uc,
+					    const unsigned char *addr)
+{
+	struct netdev_hw_addr_list *list = (is_uc) ? &netdev->uc : &netdev->mc;
+	struct netdev_hw_addr *ha;
+
+	netif_addr_lock_bh(netdev);
+	list_for_each_entry(ha, &list->list, list) {
+		if (ether_addr_equal(ha->addr, addr)) {
+			netif_addr_unlock_bh(netdev);
+			return 1;
+		}
+	}
+	netif_addr_unlock_bh(netdev);
+	return 0;
+}
+
+static int dpaa2_switch_port_mdb_add(struct net_device *netdev,
+				     const struct switchdev_obj_port_mdb *mdb)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	int err;
+
+	/* Check if address is already set on this port */
+	if (dpaa2_switch_port_lookup_address(netdev, 0, mdb->addr))
+		return -EEXIST;
+
+	err = dpaa2_switch_port_fdb_add_mc(port_priv, mdb->addr);
+	if (err)
+		return err;
+
+	err = dev_mc_add(netdev, mdb->addr);
+	if (err) {
+		netdev_err(netdev, "dev_mc_add err %d\n", err);
+		dpaa2_switch_port_fdb_del_mc(port_priv, mdb->addr);
+	}
+
+	return err;
+}
+
+static int dpaa2_switch_port_obj_add(struct net_device *netdev,
+				     const struct switchdev_obj *obj)
+{
+	int err;
+
+	switch (obj->id) {
+	case SWITCHDEV_OBJ_ID_PORT_VLAN:
+		err = dpaa2_switch_port_vlans_add(netdev,
+						  SWITCHDEV_OBJ_PORT_VLAN(obj));
+		break;
+	case SWITCHDEV_OBJ_ID_PORT_MDB:
+		err = dpaa2_switch_port_mdb_add(netdev,
+						SWITCHDEV_OBJ_PORT_MDB(obj));
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+	return err;
+}
+
+static int dpaa2_switch_port_del_vlan(struct ethsw_port_priv *port_priv, u16 vid)
+{
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct net_device *netdev = port_priv->netdev;
+	struct dpsw_vlan_if_cfg vcfg;
+	int i, err;
+
+	if (!port_priv->vlans[vid])
+		return -ENOENT;
+
+	if (port_priv->vlans[vid] & ETHSW_VLAN_PVID) {
+		/* If we are deleting the PVID of a port, use VLAN 4095 instead
+		 * as we are sure that neither the bridge nor the 8021q module
+		 * will use it
+		 */
+		err = dpaa2_switch_port_set_pvid(port_priv, 4095);
+		if (err)
+			return err;
+	}
+
+	vcfg.num_ifs = 1;
+	vcfg.if_id[0] = port_priv->idx;
+	if (port_priv->vlans[vid] & ETHSW_VLAN_UNTAGGED) {
+		err = dpsw_vlan_remove_if_untagged(ethsw->mc_io, 0,
+						   ethsw->dpsw_handle,
+						   vid, &vcfg);
+		if (err) {
+			netdev_err(netdev,
+				   "dpsw_vlan_remove_if_untagged err %d\n",
+				   err);
+		}
+		port_priv->vlans[vid] &= ~ETHSW_VLAN_UNTAGGED;
+	}
+
+	if (port_priv->vlans[vid] & ETHSW_VLAN_MEMBER) {
+		err = dpsw_vlan_remove_if(ethsw->mc_io, 0, ethsw->dpsw_handle,
+					  vid, &vcfg);
+		if (err) {
+			netdev_err(netdev,
+				   "dpsw_vlan_remove_if err %d\n", err);
+			return err;
+		}
+		port_priv->vlans[vid] &= ~ETHSW_VLAN_MEMBER;
+
+		/* Delete VLAN from switch if it is no longer configured on
+		 * any port
+		 */
+		for (i = 0; i < ethsw->sw_attr.num_ifs; i++)
+			if (ethsw->ports[i]->vlans[vid] & ETHSW_VLAN_MEMBER)
+				return 0; /* Found a port member in VID */
+
+		ethsw->vlans[vid] &= ~ETHSW_VLAN_GLOBAL;
+
+		err = dpaa2_switch_dellink(ethsw, vid);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+int dpaa2_switch_port_vlans_del(struct net_device *netdev,
+				const struct switchdev_obj_port_vlan *vlan)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+
+	if (netif_is_bridge_master(vlan->obj.orig_dev))
+		return -EOPNOTSUPP;
+
+	return dpaa2_switch_port_del_vlan(port_priv, vlan->vid);
+}
+
+static int dpaa2_switch_port_mdb_del(struct net_device *netdev,
+				     const struct switchdev_obj_port_mdb *mdb)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	int err;
+
+	if (!dpaa2_switch_port_lookup_address(netdev, 0, mdb->addr))
+		return -ENOENT;
+
+	err = dpaa2_switch_port_fdb_del_mc(port_priv, mdb->addr);
+	if (err)
+		return err;
+
+	err = dev_mc_del(netdev, mdb->addr);
+	if (err) {
+		netdev_err(netdev, "dev_mc_del err %d\n", err);
+		return err;
+	}
+
+	return err;
+}
+
+static int dpaa2_switch_port_obj_del(struct net_device *netdev,
+				     const struct switchdev_obj *obj)
+{
+	int err;
+
+	switch (obj->id) {
+	case SWITCHDEV_OBJ_ID_PORT_VLAN:
+		err = dpaa2_switch_port_vlans_del(netdev, SWITCHDEV_OBJ_PORT_VLAN(obj));
+		break;
+	case SWITCHDEV_OBJ_ID_PORT_MDB:
+		err = dpaa2_switch_port_mdb_del(netdev, SWITCHDEV_OBJ_PORT_MDB(obj));
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+	return err;
+}
+
+static int dpaa2_switch_port_attr_set_event(struct net_device *netdev,
+					    struct switchdev_notifier_port_attr_info *ptr)
+{
+	int err;
+
+	err = switchdev_handle_port_attr_set(netdev, ptr,
+					     dpaa2_switch_port_dev_check,
+					     dpaa2_switch_port_attr_set);
+	return notifier_from_errno(err);
+}
+
+static int dpaa2_switch_fdb_set_egress_flood(struct ethsw_core *ethsw, u16 fdb_id)
+{
+	struct dpsw_egress_flood_cfg flood_cfg;
+	int i = 0, j;
+	int err;
+
+	/* Add all the DPAA2 switch ports found in the same bridging domain to
+	 * the egress flooding domain
+	 */
+	for (j = 0; j < ethsw->sw_attr.num_ifs; j++)
+		if (ethsw->ports[j] && ethsw->ports[j]->fdb->fdb_id == fdb_id)
+			flood_cfg.if_id[i++] = ethsw->ports[j]->idx;
+
+	/* Add the CTRL interface to the egress flooding domain */
+	flood_cfg.if_id[i++] = ethsw->sw_attr.num_ifs;
+
+	/* Use the FDB of the first dpaa2 switch port added to the bridge */
+	flood_cfg.fdb_id = fdb_id;
+
+	/* Setup broadcast flooding domain */
+	flood_cfg.flood_type = DPSW_BROADCAST;
+	flood_cfg.num_ifs = i;
+	err = dpsw_set_egress_flood(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				    &flood_cfg);
+	if (err) {
+		dev_err(ethsw->dev, "dpsw_set_egress_flood() = %d\n", err);
+		return err;
+	}
+
+	/* Setup unknown flooding domain */
+	flood_cfg.flood_type = DPSW_FLOODING;
+	flood_cfg.num_ifs = i;
+	err = dpsw_set_egress_flood(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				    &flood_cfg);
+	if (err) {
+		dev_err(ethsw->dev, "dpsw_set_egress_flood() = %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
+					 struct net_device *upper_dev)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct ethsw_port_priv *other_port_priv;
+	struct net_device *other_dev;
+	struct list_head *iter;
+	int err;
+
+	netdev_for_each_lower_dev(upper_dev, other_dev, iter) {
+		if (!dpaa2_switch_port_dev_check(other_dev))
+			continue;
+
+		other_port_priv = netdev_priv(other_dev);
+		if (other_port_priv->ethsw_data != port_priv->ethsw_data) {
+			netdev_err(netdev,
+				   "Interface from a different DPSW is in the bridge already!\n");
+			return -EINVAL;
+		}
+	}
+
+	/* Delete the previously manually installed VLAN 1 */
+	err = dpaa2_switch_port_del_vlan(port_priv, 1);
+	if (err)
+		return err;
+
+	dpaa2_switch_port_set_fdb(port_priv, upper_dev);
+
+	/* Setup the egress flood policy (broadcast, unknown unicast) */
+	err = dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id);
+	if (err)
+		goto err_egress_flood;
+
+	return 0;
+
+err_egress_flood:
+	dpaa2_switch_port_set_fdb(port_priv, NULL);
+	return err;
+}
+
+static int dpaa2_switch_port_clear_rxvlan(struct net_device *vdev, int vid, void *arg)
+{
+	__be16 vlan_proto = htons(ETH_P_8021Q);
+
+	if (vdev)
+		vlan_proto = vlan_dev_vlan_proto(vdev);
+
+	return dpaa2_switch_port_vlan_kill(arg, vlan_proto, vid);
+}
+
+static int dpaa2_switch_port_restore_rxvlan(struct net_device *vdev, int vid, void *arg)
+{
+	__be16 vlan_proto = htons(ETH_P_8021Q);
+
+	if (vdev)
+		vlan_proto = vlan_dev_vlan_proto(vdev);
+
+	return dpaa2_switch_port_vlan_add(arg, vlan_proto, vid);
+}
+
+static int dpaa2_switch_port_bridge_leave(struct net_device *netdev)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	struct dpaa2_switch_fdb *old_fdb = port_priv->fdb;
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	int err;
+
+	/* First of all, fast age any learn FDB addresses on this switch port */
+	dpaa2_switch_port_fast_age(port_priv);
+
+	/* Clear all RX VLANs installed through vlan_vid_add() either as VLAN
+	 * upper devices or otherwise from the FDB table that we are about to
+	 * leave
+	 */
+	err = vlan_for_each(netdev, dpaa2_switch_port_clear_rxvlan, netdev);
+	if (err)
+		netdev_err(netdev, "Unable to clear RX VLANs from old FDB table, err (%d)\n", err);
+
+	dpaa2_switch_port_set_fdb(port_priv, NULL);
+
+	/* Restore all RX VLANs into the new FDB table that we just joined */
+	err = vlan_for_each(netdev, dpaa2_switch_port_restore_rxvlan, netdev);
+	if (err)
+		netdev_err(netdev, "Unable to restore RX VLANs to the new FDB, err (%d)\n", err);
+
+	/* Setup the egress flood policy (broadcast, unknown unicast).
+	 * When the port is not under a bridge, only the CTRL interface is part
+	 * of the flooding domain besides the actual port
+	 */
+	err = dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id);
+	if (err)
+		return err;
+
+	/* Recreate the egress flood domain of the FDB that we just left */
+	err = dpaa2_switch_fdb_set_egress_flood(ethsw, old_fdb->fdb_id);
+	if (err)
+		return err;
+
+	/* Add the VLAN 1 as PVID when not under a bridge. We need this since
+	 * the dpaa2 switch interfaces are not capable to be VLAN unaware
+	 */
+	return dpaa2_switch_port_add_vlan(port_priv, DEFAULT_VLAN_ID,
+					  BRIDGE_VLAN_INFO_UNTAGGED | BRIDGE_VLAN_INFO_PVID);
+}
+
+static int dpaa2_switch_prevent_bridging_with_8021q_upper(struct net_device *netdev)
+{
+	struct net_device *upper_dev;
+	struct list_head *iter;
+
+	/* RCU read lock not necessary because we have write-side protection
+	 * (rtnl_mutex), however a non-rcu iterator does not exist.
+	 */
+	netdev_for_each_upper_dev_rcu(netdev, upper_dev, iter)
+		if (is_vlan_dev(upper_dev))
+			return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb,
+					     unsigned long event, void *ptr)
+{
+	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+	struct netdev_notifier_changeupper_info *info = ptr;
+	struct netlink_ext_ack *extack;
+	struct net_device *upper_dev;
+	int err = 0;
+
+	if (!dpaa2_switch_port_dev_check(netdev))
+		return NOTIFY_DONE;
+
+	extack = netdev_notifier_info_to_extack(&info->info);
+
+	switch (event) {
+	case NETDEV_PRECHANGEUPPER:
+		upper_dev = info->upper_dev;
+		if (!netif_is_bridge_master(upper_dev))
+			break;
+
+		if (!br_vlan_enabled(upper_dev)) {
+			NL_SET_ERR_MSG_MOD(extack, "Cannot join a VLAN-unaware bridge");
+			err = -EOPNOTSUPP;
+			goto out;
+		}
+
+		err = dpaa2_switch_prevent_bridging_with_8021q_upper(netdev);
+		if (err) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Cannot join a bridge while VLAN uppers are present");
+			goto out;
+		}
+
+		break;
+	case NETDEV_CHANGEUPPER:
+		upper_dev = info->upper_dev;
+		if (netif_is_bridge_master(upper_dev)) {
+			if (info->linking)
+				err = dpaa2_switch_port_bridge_join(netdev, upper_dev);
+			else
+				err = dpaa2_switch_port_bridge_leave(netdev);
+		}
+		break;
+	}
+
+out:
+	return notifier_from_errno(err);
+}
+
+struct ethsw_switchdev_event_work {
+	struct work_struct work;
+	struct switchdev_notifier_fdb_info fdb_info;
+	struct net_device *dev;
+	unsigned long event;
+};
+
+static void dpaa2_switch_event_work(struct work_struct *work)
+{
+	struct ethsw_switchdev_event_work *switchdev_work =
+		container_of(work, struct ethsw_switchdev_event_work, work);
+	struct net_device *dev = switchdev_work->dev;
+	struct switchdev_notifier_fdb_info *fdb_info;
+	int err;
+
+	rtnl_lock();
+	fdb_info = &switchdev_work->fdb_info;
+
+	switch (switchdev_work->event) {
+	case SWITCHDEV_FDB_ADD_TO_DEVICE:
+		if (!fdb_info->added_by_user)
+			break;
+		if (is_unicast_ether_addr(fdb_info->addr))
+			err = dpaa2_switch_port_fdb_add_uc(netdev_priv(dev),
+							   fdb_info->addr);
+		else
+			err = dpaa2_switch_port_fdb_add_mc(netdev_priv(dev),
+							   fdb_info->addr);
+		if (err)
+			break;
+		fdb_info->offloaded = true;
+		call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev,
+					 &fdb_info->info, NULL);
+		break;
+	case SWITCHDEV_FDB_DEL_TO_DEVICE:
+		if (!fdb_info->added_by_user)
+			break;
+		if (is_unicast_ether_addr(fdb_info->addr))
+			dpaa2_switch_port_fdb_del_uc(netdev_priv(dev), fdb_info->addr);
+		else
+			dpaa2_switch_port_fdb_del_mc(netdev_priv(dev), fdb_info->addr);
+		break;
+	}
+
+	rtnl_unlock();
+	kfree(switchdev_work->fdb_info.addr);
+	kfree(switchdev_work);
+	dev_put(dev);
+}
+
+/* Called under rcu_read_lock() */
+static int dpaa2_switch_port_event(struct notifier_block *nb,
+				   unsigned long event, void *ptr)
+{
+	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+	struct ethsw_port_priv *port_priv = netdev_priv(dev);
+	struct ethsw_switchdev_event_work *switchdev_work;
+	struct switchdev_notifier_fdb_info *fdb_info = ptr;
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+
+	if (event == SWITCHDEV_PORT_ATTR_SET)
+		return dpaa2_switch_port_attr_set_event(dev, ptr);
+
+	if (!dpaa2_switch_port_dev_check(dev))
+		return NOTIFY_DONE;
+
+	switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
+	if (!switchdev_work)
+		return NOTIFY_BAD;
+
+	INIT_WORK(&switchdev_work->work, dpaa2_switch_event_work);
+	switchdev_work->dev = dev;
+	switchdev_work->event = event;
+
+	switch (event) {
+	case SWITCHDEV_FDB_ADD_TO_DEVICE:
+	case SWITCHDEV_FDB_DEL_TO_DEVICE:
+		memcpy(&switchdev_work->fdb_info, ptr,
+		       sizeof(switchdev_work->fdb_info));
+		switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
+		if (!switchdev_work->fdb_info.addr)
+			goto err_addr_alloc;
+
+		ether_addr_copy((u8 *)switchdev_work->fdb_info.addr,
+				fdb_info->addr);
+
+		/* Take a reference on the device to avoid being freed. */
+		dev_hold(dev);
+		break;
+	default:
+		kfree(switchdev_work);
+		return NOTIFY_DONE;
+	}
+
+	queue_work(ethsw->workqueue, &switchdev_work->work);
+
+	return NOTIFY_DONE;
+
+err_addr_alloc:
+	kfree(switchdev_work);
+	return NOTIFY_BAD;
+}
+
+static int dpaa2_switch_port_obj_event(unsigned long event,
+				       struct net_device *netdev,
+				       struct switchdev_notifier_port_obj_info *port_obj_info)
+{
+	int err = -EOPNOTSUPP;
+
+	if (!dpaa2_switch_port_dev_check(netdev))
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case SWITCHDEV_PORT_OBJ_ADD:
+		err = dpaa2_switch_port_obj_add(netdev, port_obj_info->obj);
+		break;
+	case SWITCHDEV_PORT_OBJ_DEL:
+		err = dpaa2_switch_port_obj_del(netdev, port_obj_info->obj);
+		break;
+	}
+
+	port_obj_info->handled = true;
+	return notifier_from_errno(err);
+}
+
+static int dpaa2_switch_port_blocking_event(struct notifier_block *nb,
+					    unsigned long event, void *ptr)
+{
+	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+
+	switch (event) {
+	case SWITCHDEV_PORT_OBJ_ADD:
+	case SWITCHDEV_PORT_OBJ_DEL:
+		return dpaa2_switch_port_obj_event(event, dev, ptr);
+	case SWITCHDEV_PORT_ATTR_SET:
+		return dpaa2_switch_port_attr_set_event(dev, ptr);
+	}
+
+	return NOTIFY_DONE;
+}
+
+/* Build a linear skb based on a single-buffer frame descriptor */
+static struct sk_buff *dpaa2_switch_build_linear_skb(struct ethsw_core *ethsw,
+						     const struct dpaa2_fd *fd)
+{
+	u16 fd_offset = dpaa2_fd_get_offset(fd);
+	dma_addr_t addr = dpaa2_fd_get_addr(fd);
+	u32 fd_length = dpaa2_fd_get_len(fd);
+	struct device *dev = ethsw->dev;
+	struct sk_buff *skb = NULL;
+	void *fd_vaddr;
+
+	fd_vaddr = dpaa2_iova_to_virt(ethsw->iommu_domain, addr);
+	dma_unmap_page(dev, addr, DPAA2_SWITCH_RX_BUF_SIZE,
+		       DMA_FROM_DEVICE);
+
+	skb = build_skb(fd_vaddr, DPAA2_SWITCH_RX_BUF_SIZE +
+			SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
+	if (unlikely(!skb)) {
+		dev_err(dev, "build_skb() failed\n");
+		return NULL;
+	}
+
+	skb_reserve(skb, fd_offset);
+	skb_put(skb, fd_length);
+
+	ethsw->buf_count--;
+
+	return skb;
+}
+
+static void dpaa2_switch_tx_conf(struct dpaa2_switch_fq *fq,
+				 const struct dpaa2_fd *fd)
+{
+	dpaa2_switch_free_fd(fq->ethsw, fd);
+}
+
+static void dpaa2_switch_rx(struct dpaa2_switch_fq *fq,
+			    const struct dpaa2_fd *fd)
+{
+	struct ethsw_core *ethsw = fq->ethsw;
+	struct ethsw_port_priv *port_priv;
+	struct net_device *netdev;
+	struct vlan_ethhdr *hdr;
+	struct sk_buff *skb;
+	u16 vlan_tci, vid;
+	int if_id, err;
+
+	/* get switch ingress interface ID */
+	if_id = upper_32_bits(dpaa2_fd_get_flc(fd)) & 0x0000FFFF;
+
+	if (if_id >= ethsw->sw_attr.num_ifs) {
+		dev_err(ethsw->dev, "Frame received from unknown interface!\n");
+		goto err_free_fd;
+	}
+	port_priv = ethsw->ports[if_id];
+	netdev = port_priv->netdev;
+
+	/* build the SKB based on the FD received */
+	if (dpaa2_fd_get_format(fd) != dpaa2_fd_single) {
+		if (net_ratelimit()) {
+			netdev_err(netdev, "Received invalid frame format\n");
+			goto err_free_fd;
+		}
+	}
+
+	skb = dpaa2_switch_build_linear_skb(ethsw, fd);
+	if (unlikely(!skb))
+		goto err_free_fd;
+
+	skb_reset_mac_header(skb);
+
+	/* Remove the VLAN header if the packet that we just received has a vid
+	 * equal to the port PVIDs. Since the dpaa2-switch can operate only in
+	 * VLAN-aware mode and no alterations are made on the packet when it's
+	 * redirected/mirrored to the control interface, we are sure that there
+	 * will always be a VLAN header present.
+	 */
+	hdr = vlan_eth_hdr(skb);
+	vid = ntohs(hdr->h_vlan_TCI) & VLAN_VID_MASK;
+	if (vid == port_priv->pvid) {
+		err = __skb_vlan_pop(skb, &vlan_tci);
+		if (err) {
+			dev_info(ethsw->dev, "__skb_vlan_pop() returned %d", err);
+			goto err_free_fd;
+		}
+	}
+
+	skb->dev = netdev;
+	skb->protocol = eth_type_trans(skb, skb->dev);
+
+	netif_receive_skb(skb);
+
+	return;
+
+err_free_fd:
+	dpaa2_switch_free_fd(ethsw, fd);
+}
+
+static void dpaa2_switch_detect_features(struct ethsw_core *ethsw)
+{
+	ethsw->features = 0;
+
+	if (ethsw->major > 8 || (ethsw->major == 8 && ethsw->minor >= 6))
+		ethsw->features |= ETHSW_FEATURE_MAC_ADDR;
+}
+
+static int dpaa2_switch_setup_fqs(struct ethsw_core *ethsw)
+{
+	struct dpsw_ctrl_if_attr ctrl_if_attr;
+	struct device *dev = ethsw->dev;
+	int i = 0;
+	int err;
+
+	err = dpsw_ctrl_if_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle,
+					  &ctrl_if_attr);
+	if (err) {
+		dev_err(dev, "dpsw_ctrl_if_get_attributes() = %d\n", err);
+		return err;
+	}
+
+	ethsw->fq[i].fqid = ctrl_if_attr.rx_fqid;
+	ethsw->fq[i].ethsw = ethsw;
+	ethsw->fq[i++].type = DPSW_QUEUE_RX;
+
+	ethsw->fq[i].fqid = ctrl_if_attr.tx_err_conf_fqid;
+	ethsw->fq[i].ethsw = ethsw;
+	ethsw->fq[i++].type = DPSW_QUEUE_TX_ERR_CONF;
+
+	return 0;
+}
+
+/* Free buffers acquired from the buffer pool or which were meant to
+ * be released in the pool
+ */
+static void dpaa2_switch_free_bufs(struct ethsw_core *ethsw, u64 *buf_array, int count)
+{
+	struct device *dev = ethsw->dev;
+	void *vaddr;
+	int i;
+
+	for (i = 0; i < count; i++) {
+		vaddr = dpaa2_iova_to_virt(ethsw->iommu_domain, buf_array[i]);
+		dma_unmap_page(dev, buf_array[i], DPAA2_SWITCH_RX_BUF_SIZE,
+			       DMA_FROM_DEVICE);
+		free_pages((unsigned long)vaddr, 0);
+	}
+}
+
+/* Perform a single release command to add buffers
+ * to the specified buffer pool
+ */
+static int dpaa2_switch_add_bufs(struct ethsw_core *ethsw, u16 bpid)
+{
+	struct device *dev = ethsw->dev;
+	u64 buf_array[BUFS_PER_CMD];
+	struct page *page;
+	int retries = 0;
+	dma_addr_t addr;
+	int err;
+	int i;
+
+	for (i = 0; i < BUFS_PER_CMD; i++) {
+		/* Allocate one page for each Rx buffer. WRIOP sees
+		 * the entire page except for a tailroom reserved for
+		 * skb shared info
+		 */
+		page = dev_alloc_pages(0);
+		if (!page) {
+			dev_err(dev, "buffer allocation failed\n");
+			goto err_alloc;
+		}
+
+		addr = dma_map_page(dev, page, 0, DPAA2_SWITCH_RX_BUF_SIZE,
+				    DMA_FROM_DEVICE);
+		if (dma_mapping_error(dev, addr)) {
+			dev_err(dev, "dma_map_single() failed\n");
+			goto err_map;
+		}
+		buf_array[i] = addr;
+	}
+
+release_bufs:
+	/* In case the portal is busy, retry until successful or
+	 * max retries hit.
+	 */
+	while ((err = dpaa2_io_service_release(NULL, bpid,
+					       buf_array, i)) == -EBUSY) {
+		if (retries++ >= DPAA2_SWITCH_SWP_BUSY_RETRIES)
+			break;
+
+		cpu_relax();
+	}
+
+	/* If release command failed, clean up and bail out. */
+	if (err) {
+		dpaa2_switch_free_bufs(ethsw, buf_array, i);
+		return 0;
+	}
+
+	return i;
+
+err_map:
+	__free_pages(page, 0);
+err_alloc:
+	/* If we managed to allocate at least some buffers,
+	 * release them to hardware
+	 */
+	if (i)
+		goto release_bufs;
+
+	return 0;
+}
+
+static int dpaa2_switch_refill_bp(struct ethsw_core *ethsw)
+{
+	int *count = &ethsw->buf_count;
+	int new_count;
+	int err = 0;
+
+	if (unlikely(*count < DPAA2_ETHSW_REFILL_THRESH)) {
+		do {
+			new_count = dpaa2_switch_add_bufs(ethsw, ethsw->bpid);
+			if (unlikely(!new_count)) {
+				/* Out of memory; abort for now, we'll
+				 * try later on
+				 */
+				break;
+			}
+			*count += new_count;
+		} while (*count < DPAA2_ETHSW_NUM_BUFS);
+
+		if (unlikely(*count < DPAA2_ETHSW_NUM_BUFS))
+			err = -ENOMEM;
+	}
+
+	return err;
+}
+
+static int dpaa2_switch_seed_bp(struct ethsw_core *ethsw)
+{
+	int *count, i;
+
+	for (i = 0; i < DPAA2_ETHSW_NUM_BUFS; i += BUFS_PER_CMD) {
+		count = &ethsw->buf_count;
+		*count += dpaa2_switch_add_bufs(ethsw, ethsw->bpid);
+
+		if (unlikely(*count < BUFS_PER_CMD))
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void dpaa2_switch_drain_bp(struct ethsw_core *ethsw)
+{
+	u64 buf_array[BUFS_PER_CMD];
+	int ret;
+
+	do {
+		ret = dpaa2_io_service_acquire(NULL, ethsw->bpid,
+					       buf_array, BUFS_PER_CMD);
+		if (ret < 0) {
+			dev_err(ethsw->dev,
+				"dpaa2_io_service_acquire() = %d\n", ret);
+			return;
+		}
+		dpaa2_switch_free_bufs(ethsw, buf_array, ret);
+
+	} while (ret);
+}
+
+static int dpaa2_switch_setup_dpbp(struct ethsw_core *ethsw)
+{
+	struct dpsw_ctrl_if_pools_cfg dpsw_ctrl_if_pools_cfg = { 0 };
+	struct device *dev = ethsw->dev;
+	struct fsl_mc_device *dpbp_dev;
+	struct dpbp_attr dpbp_attrs;
+	int err;
+
+	err = fsl_mc_object_allocate(to_fsl_mc_device(dev), FSL_MC_POOL_DPBP,
+				     &dpbp_dev);
+	if (err) {
+		if (err == -ENXIO)
+			err = -EPROBE_DEFER;
+		else
+			dev_err(dev, "DPBP device allocation failed\n");
+		return err;
+	}
+	ethsw->dpbp_dev = dpbp_dev;
+
+	err = dpbp_open(ethsw->mc_io, 0, dpbp_dev->obj_desc.id,
+			&dpbp_dev->mc_handle);
+	if (err) {
+		dev_err(dev, "dpbp_open() failed\n");
+		goto err_open;
+	}
+
+	err = dpbp_reset(ethsw->mc_io, 0, dpbp_dev->mc_handle);
+	if (err) {
+		dev_err(dev, "dpbp_reset() failed\n");
+		goto err_reset;
+	}
+
+	err = dpbp_enable(ethsw->mc_io, 0, dpbp_dev->mc_handle);
+	if (err) {
+		dev_err(dev, "dpbp_enable() failed\n");
+		goto err_enable;
+	}
+
+	err = dpbp_get_attributes(ethsw->mc_io, 0, dpbp_dev->mc_handle,
+				  &dpbp_attrs);
+	if (err) {
+		dev_err(dev, "dpbp_get_attributes() failed\n");
+		goto err_get_attr;
+	}
+
+	dpsw_ctrl_if_pools_cfg.num_dpbp = 1;
+	dpsw_ctrl_if_pools_cfg.pools[0].dpbp_id = dpbp_attrs.id;
+	dpsw_ctrl_if_pools_cfg.pools[0].buffer_size = DPAA2_SWITCH_RX_BUF_SIZE;
+	dpsw_ctrl_if_pools_cfg.pools[0].backup_pool = 0;
+
+	err = dpsw_ctrl_if_set_pools(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				     &dpsw_ctrl_if_pools_cfg);
+	if (err) {
+		dev_err(dev, "dpsw_ctrl_if_set_pools() failed\n");
+		goto err_get_attr;
+	}
+	ethsw->bpid = dpbp_attrs.id;
+
+	return 0;
+
+err_get_attr:
+	dpbp_disable(ethsw->mc_io, 0, dpbp_dev->mc_handle);
+err_enable:
+err_reset:
+	dpbp_close(ethsw->mc_io, 0, dpbp_dev->mc_handle);
+err_open:
+	fsl_mc_object_free(dpbp_dev);
+	return err;
+}
+
+static void dpaa2_switch_free_dpbp(struct ethsw_core *ethsw)
+{
+	dpbp_disable(ethsw->mc_io, 0, ethsw->dpbp_dev->mc_handle);
+	dpbp_close(ethsw->mc_io, 0, ethsw->dpbp_dev->mc_handle);
+	fsl_mc_object_free(ethsw->dpbp_dev);
+}
+
+static int dpaa2_switch_alloc_rings(struct ethsw_core *ethsw)
+{
+	int i;
+
+	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++) {
+		ethsw->fq[i].store =
+			dpaa2_io_store_create(DPAA2_SWITCH_STORE_SIZE,
+					      ethsw->dev);
+		if (!ethsw->fq[i].store) {
+			dev_err(ethsw->dev, "dpaa2_io_store_create failed\n");
+			while (--i >= 0)
+				dpaa2_io_store_destroy(ethsw->fq[i].store);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+static void dpaa2_switch_destroy_rings(struct ethsw_core *ethsw)
+{
+	int i;
+
+	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++)
+		dpaa2_io_store_destroy(ethsw->fq[i].store);
+}
+
+static int dpaa2_switch_pull_fq(struct dpaa2_switch_fq *fq)
+{
+	int err, retries = 0;
+
+	/* Try to pull from the FQ while the portal is busy and we didn't hit
+	 * the maximum number fo retries
+	 */
+	do {
+		err = dpaa2_io_service_pull_fq(NULL, fq->fqid, fq->store);
+		cpu_relax();
+	} while (err == -EBUSY && retries++ < DPAA2_SWITCH_SWP_BUSY_RETRIES);
+
+	if (unlikely(err))
+		dev_err(fq->ethsw->dev, "dpaa2_io_service_pull err %d", err);
+
+	return err;
+}
+
+/* Consume all frames pull-dequeued into the store */
+static int dpaa2_switch_store_consume(struct dpaa2_switch_fq *fq)
+{
+	struct ethsw_core *ethsw = fq->ethsw;
+	int cleaned = 0, is_last;
+	struct dpaa2_dq *dq;
+	int retries = 0;
+
+	do {
+		/* Get the next available FD from the store */
+		dq = dpaa2_io_store_next(fq->store, &is_last);
+		if (unlikely(!dq)) {
+			if (retries++ >= DPAA2_SWITCH_SWP_BUSY_RETRIES) {
+				dev_err_once(ethsw->dev,
+					     "No valid dequeue response\n");
+				return -ETIMEDOUT;
+			}
+			continue;
+		}
+
+		if (fq->type == DPSW_QUEUE_RX)
+			dpaa2_switch_rx(fq, dpaa2_dq_fd(dq));
+		else
+			dpaa2_switch_tx_conf(fq, dpaa2_dq_fd(dq));
+		cleaned++;
+
+	} while (!is_last);
+
+	return cleaned;
+}
+
+/* NAPI poll routine */
+static int dpaa2_switch_poll(struct napi_struct *napi, int budget)
+{
+	int err, cleaned = 0, store_cleaned, work_done;
+	struct dpaa2_switch_fq *fq;
+	int retries = 0;
+
+	fq = container_of(napi, struct dpaa2_switch_fq, napi);
+
+	do {
+		err = dpaa2_switch_pull_fq(fq);
+		if (unlikely(err))
+			break;
+
+		/* Refill pool if appropriate */
+		dpaa2_switch_refill_bp(fq->ethsw);
+
+		store_cleaned = dpaa2_switch_store_consume(fq);
+		cleaned += store_cleaned;
+
+		if (cleaned >= budget) {
+			work_done = budget;
+			goto out;
+		}
+
+	} while (store_cleaned);
+
+	/* We didn't consume the entire budget, so finish napi and re-enable
+	 * data availability notifications
+	 */
+	napi_complete_done(napi, cleaned);
+	do {
+		err = dpaa2_io_service_rearm(NULL, &fq->nctx);
+		cpu_relax();
+	} while (err == -EBUSY && retries++ < DPAA2_SWITCH_SWP_BUSY_RETRIES);
+
+	work_done = max(cleaned, 1);
+out:
+
+	return work_done;
+}
+
+static void dpaa2_switch_fqdan_cb(struct dpaa2_io_notification_ctx *nctx)
+{
+	struct dpaa2_switch_fq *fq;
+
+	fq = container_of(nctx, struct dpaa2_switch_fq, nctx);
+
+	napi_schedule(&fq->napi);
+}
+
+static int dpaa2_switch_setup_dpio(struct ethsw_core *ethsw)
+{
+	struct dpsw_ctrl_if_queue_cfg queue_cfg;
+	struct dpaa2_io_notification_ctx *nctx;
+	int err, i, j;
+
+	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++) {
+		nctx = &ethsw->fq[i].nctx;
+
+		/* Register a new software context for the FQID.
+		 * By using NULL as the first parameter, we specify that we do
+		 * not care on which cpu are interrupts received for this queue
+		 */
+		nctx->is_cdan = 0;
+		nctx->id = ethsw->fq[i].fqid;
+		nctx->desired_cpu = DPAA2_IO_ANY_CPU;
+		nctx->cb = dpaa2_switch_fqdan_cb;
+		err = dpaa2_io_service_register(NULL, nctx, ethsw->dev);
+		if (err) {
+			err = -EPROBE_DEFER;
+			goto err_register;
+		}
+
+		queue_cfg.options = DPSW_CTRL_IF_QUEUE_OPT_DEST |
+				    DPSW_CTRL_IF_QUEUE_OPT_USER_CTX;
+		queue_cfg.dest_cfg.dest_type = DPSW_CTRL_IF_DEST_DPIO;
+		queue_cfg.dest_cfg.dest_id = nctx->dpio_id;
+		queue_cfg.dest_cfg.priority = 0;
+		queue_cfg.user_ctx = nctx->qman64;
+
+		err = dpsw_ctrl_if_set_queue(ethsw->mc_io, 0,
+					     ethsw->dpsw_handle,
+					     ethsw->fq[i].type,
+					     &queue_cfg);
+		if (err)
+			goto err_set_queue;
+	}
+
+	return 0;
+
+err_set_queue:
+	dpaa2_io_service_deregister(NULL, nctx, ethsw->dev);
+err_register:
+	for (j = 0; j < i; j++)
+		dpaa2_io_service_deregister(NULL, &ethsw->fq[j].nctx,
+					    ethsw->dev);
+
+	return err;
+}
+
+static void dpaa2_switch_free_dpio(struct ethsw_core *ethsw)
+{
+	int i;
+
+	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++)
+		dpaa2_io_service_deregister(NULL, &ethsw->fq[i].nctx,
+					    ethsw->dev);
+}
+
+static int dpaa2_switch_ctrl_if_setup(struct ethsw_core *ethsw)
+{
+	int err;
+
+	/* setup FQs for Rx and Tx Conf */
+	err = dpaa2_switch_setup_fqs(ethsw);
+	if (err)
+		return err;
+
+	/* setup the buffer pool needed on the Rx path */
+	err = dpaa2_switch_setup_dpbp(ethsw);
+	if (err)
+		return err;
+
+	err = dpaa2_switch_seed_bp(ethsw);
+	if (err)
+		goto err_free_dpbp;
+
+	err = dpaa2_switch_alloc_rings(ethsw);
+	if (err)
+		goto err_drain_dpbp;
+
+	err = dpaa2_switch_setup_dpio(ethsw);
+	if (err)
+		goto err_destroy_rings;
+
+	err = dpsw_ctrl_if_enable(ethsw->mc_io, 0, ethsw->dpsw_handle);
+	if (err) {
+		dev_err(ethsw->dev, "dpsw_ctrl_if_enable err %d\n", err);
+		goto err_deregister_dpio;
+	}
+
+	return 0;
+
+err_deregister_dpio:
+	dpaa2_switch_free_dpio(ethsw);
+err_destroy_rings:
+	dpaa2_switch_destroy_rings(ethsw);
+err_drain_dpbp:
+	dpaa2_switch_drain_bp(ethsw);
+err_free_dpbp:
+	dpaa2_switch_free_dpbp(ethsw);
+
+	return err;
+}
+
+static int dpaa2_switch_init(struct fsl_mc_device *sw_dev)
+{
+	struct device *dev = &sw_dev->dev;
+	struct ethsw_core *ethsw = dev_get_drvdata(dev);
+	struct dpsw_vlan_if_cfg vcfg = {0};
+	struct dpsw_tci_cfg tci_cfg = {0};
+	struct dpsw_stp_cfg stp_cfg;
+	int err;
+	u16 i;
+
+	ethsw->dev_id = sw_dev->obj_desc.id;
+
+	err = dpsw_open(ethsw->mc_io, 0, ethsw->dev_id, &ethsw->dpsw_handle);
+	if (err) {
+		dev_err(dev, "dpsw_open err %d\n", err);
+		return err;
+	}
+
+	err = dpsw_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				  &ethsw->sw_attr);
+	if (err) {
+		dev_err(dev, "dpsw_get_attributes err %d\n", err);
+		goto err_close;
+	}
+
+	err = dpsw_get_api_version(ethsw->mc_io, 0,
+				   &ethsw->major,
+				   &ethsw->minor);
+	if (err) {
+		dev_err(dev, "dpsw_get_api_version err %d\n", err);
+		goto err_close;
+	}
+
+	/* Minimum supported DPSW version check */
+	if (ethsw->major < DPSW_MIN_VER_MAJOR ||
+	    (ethsw->major == DPSW_MIN_VER_MAJOR &&
+	     ethsw->minor < DPSW_MIN_VER_MINOR)) {
+		dev_err(dev, "DPSW version %d:%d not supported. Use firmware 10.28.0 or greater.\n",
+			ethsw->major, ethsw->minor);
+		err = -EOPNOTSUPP;
+		goto err_close;
+	}
+
+	if (!dpaa2_switch_supports_cpu_traffic(ethsw)) {
+		err = -EOPNOTSUPP;
+		goto err_close;
+	}
+
+	dpaa2_switch_detect_features(ethsw);
+
+	err = dpsw_reset(ethsw->mc_io, 0, ethsw->dpsw_handle);
+	if (err) {
+		dev_err(dev, "dpsw_reset err %d\n", err);
+		goto err_close;
+	}
+
+	stp_cfg.vlan_id = DEFAULT_VLAN_ID;
+	stp_cfg.state = DPSW_STP_STATE_FORWARDING;
+
+	for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
+		err = dpsw_if_disable(ethsw->mc_io, 0, ethsw->dpsw_handle, i);
+		if (err) {
+			dev_err(dev, "dpsw_if_disable err %d\n", err);
+			goto err_close;
+		}
+
+		err = dpsw_if_set_stp(ethsw->mc_io, 0, ethsw->dpsw_handle, i,
+				      &stp_cfg);
+		if (err) {
+			dev_err(dev, "dpsw_if_set_stp err %d for port %d\n",
+				err, i);
+			goto err_close;
+		}
+
+		/* Switch starts with all ports configured to VLAN 1. Need to
+		 * remove this setting to allow configuration at bridge join
+		 */
+		vcfg.num_ifs = 1;
+		vcfg.if_id[0] = i;
+		err = dpsw_vlan_remove_if_untagged(ethsw->mc_io, 0, ethsw->dpsw_handle,
+						   DEFAULT_VLAN_ID, &vcfg);
+		if (err) {
+			dev_err(dev, "dpsw_vlan_remove_if_untagged err %d\n",
+				err);
+			goto err_close;
+		}
+
+		tci_cfg.vlan_id = 4095;
+		err = dpsw_if_set_tci(ethsw->mc_io, 0, ethsw->dpsw_handle, i, &tci_cfg);
+		if (err) {
+			dev_err(dev, "dpsw_if_set_tci err %d\n", err);
+			goto err_close;
+		}
+
+		err = dpsw_vlan_remove_if(ethsw->mc_io, 0, ethsw->dpsw_handle,
+					  DEFAULT_VLAN_ID, &vcfg);
+		if (err) {
+			dev_err(dev, "dpsw_vlan_remove_if err %d\n", err);
+			goto err_close;
+		}
+	}
+
+	err = dpsw_vlan_remove(ethsw->mc_io, 0, ethsw->dpsw_handle, DEFAULT_VLAN_ID);
+	if (err) {
+		dev_err(dev, "dpsw_vlan_remove err %d\n", err);
+		goto err_close;
+	}
+
+	ethsw->workqueue = alloc_ordered_workqueue("%s_%d_ordered",
+						   WQ_MEM_RECLAIM, "ethsw",
+						   ethsw->sw_attr.id);
+	if (!ethsw->workqueue) {
+		err = -ENOMEM;
+		goto err_close;
+	}
+
+	err = dpsw_fdb_remove(ethsw->mc_io, 0, ethsw->dpsw_handle, 0);
+	if (err)
+		goto err_destroy_ordered_workqueue;
+
+	err = dpaa2_switch_ctrl_if_setup(ethsw);
+	if (err)
+		goto err_destroy_ordered_workqueue;
+
+	return 0;
+
+err_destroy_ordered_workqueue:
+	destroy_workqueue(ethsw->workqueue);
+
+err_close:
+	dpsw_close(ethsw->mc_io, 0, ethsw->dpsw_handle);
+	return err;
+}
+
+static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
+{
+	struct switchdev_obj_port_vlan vlan = {
+		.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
+		.vid = DEFAULT_VLAN_ID,
+		.flags = BRIDGE_VLAN_INFO_UNTAGGED | BRIDGE_VLAN_INFO_PVID,
+	};
+	struct net_device *netdev = port_priv->netdev;
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct dpsw_fdb_cfg fdb_cfg = {0};
+	struct dpaa2_switch_fdb *fdb;
+	struct dpsw_if_attr dpsw_if_attr;
+	u16 fdb_id;
+	int err;
+
+	/* Get the Tx queue for this specific port */
+	err = dpsw_if_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				     port_priv->idx, &dpsw_if_attr);
+	if (err) {
+		netdev_err(netdev, "dpsw_if_get_attributes err %d\n", err);
+		return err;
+	}
+	port_priv->tx_qdid = dpsw_if_attr.qdid;
+
+	/* Create a FDB table for this particular switch port */
+	fdb_cfg.num_fdb_entries = ethsw->sw_attr.max_fdb_entries / ethsw->sw_attr.num_ifs;
+	err = dpsw_fdb_add(ethsw->mc_io, 0, ethsw->dpsw_handle,
+			   &fdb_id, &fdb_cfg);
+	if (err) {
+		netdev_err(netdev, "dpsw_fdb_add err %d\n", err);
+		return err;
+	}
+
+	/* Find an unused dpaa2_switch_fdb structure and use it */
+	fdb = dpaa2_switch_fdb_get_unused(ethsw);
+	fdb->fdb_id = fdb_id;
+	fdb->in_use = true;
+	fdb->bridge_dev = NULL;
+	port_priv->fdb = fdb;
+
+	/* We need to add VLAN 1 as the PVID on this port until it is under a
+	 * bridge since the DPAA2 switch is not able to handle the traffic in a
+	 * VLAN unaware fashion
+	 */
+	err = dpaa2_switch_port_vlans_add(netdev, &vlan);
+	if (err)
+		return err;
+
+	/* Setup the egress flooding domains (broadcast, unknown unicast */
+	err = dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id);
+	if (err)
+		return err;
+
+	return err;
+}
+
+static void dpaa2_switch_takedown(struct fsl_mc_device *sw_dev)
+{
+	struct device *dev = &sw_dev->dev;
+	struct ethsw_core *ethsw = dev_get_drvdata(dev);
+	int err;
+
+	err = dpsw_close(ethsw->mc_io, 0, ethsw->dpsw_handle);
+	if (err)
+		dev_warn(dev, "dpsw_close err %d\n", err);
+}
+
+static void dpaa2_switch_ctrl_if_teardown(struct ethsw_core *ethsw)
+{
+	dpsw_ctrl_if_disable(ethsw->mc_io, 0, ethsw->dpsw_handle);
+	dpaa2_switch_free_dpio(ethsw);
+	dpaa2_switch_destroy_rings(ethsw);
+	dpaa2_switch_drain_bp(ethsw);
+	dpaa2_switch_free_dpbp(ethsw);
+}
+
+static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev)
+{
+	struct ethsw_port_priv *port_priv;
+	struct ethsw_core *ethsw;
+	struct device *dev;
+	int i;
+
+	dev = &sw_dev->dev;
+	ethsw = dev_get_drvdata(dev);
+
+	dpaa2_switch_ctrl_if_teardown(ethsw);
+
+	dpaa2_switch_teardown_irqs(sw_dev);
+
+	dpsw_disable(ethsw->mc_io, 0, ethsw->dpsw_handle);
+
+	for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
+		port_priv = ethsw->ports[i];
+		unregister_netdev(port_priv->netdev);
+		free_netdev(port_priv->netdev);
+	}
+
+	kfree(ethsw->fdbs);
+	kfree(ethsw->ports);
+
+	dpaa2_switch_takedown(sw_dev);
+
+	destroy_workqueue(ethsw->workqueue);
+
+	fsl_mc_portal_free(ethsw->mc_io);
+
+	kfree(ethsw);
+
+	dev_set_drvdata(dev, NULL);
+
+	return 0;
+}
+
+static int dpaa2_switch_probe_port(struct ethsw_core *ethsw,
+				   u16 port_idx)
+{
+	struct ethsw_port_priv *port_priv;
+	struct device *dev = ethsw->dev;
+	struct net_device *port_netdev;
+	int err;
+
+	port_netdev = alloc_etherdev(sizeof(struct ethsw_port_priv));
+	if (!port_netdev) {
+		dev_err(dev, "alloc_etherdev error\n");
+		return -ENOMEM;
+	}
+
+	port_priv = netdev_priv(port_netdev);
+	port_priv->netdev = port_netdev;
+	port_priv->ethsw_data = ethsw;
+
+	port_priv->idx = port_idx;
+	port_priv->stp_state = BR_STATE_FORWARDING;
+
+	SET_NETDEV_DEV(port_netdev, dev);
+	port_netdev->netdev_ops = &dpaa2_switch_port_ops;
+	port_netdev->ethtool_ops = &dpaa2_switch_port_ethtool_ops;
+
+	port_netdev->needed_headroom = DPAA2_SWITCH_NEEDED_HEADROOM;
+
+	/* Set MTU limits */
+	port_netdev->min_mtu = ETH_MIN_MTU;
+	port_netdev->max_mtu = ETHSW_MAX_FRAME_LENGTH;
+
+	/* Populate the private port structure so that later calls to
+	 * dpaa2_switch_port_init() can use it.
+	 */
+	ethsw->ports[port_idx] = port_priv;
+
+	/* The DPAA2 switch's ingress path depends on the VLAN table,
+	 * thus we are not able to disable VLAN filtering.
+	 */
+	port_netdev->features = NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER;
+
+	err = dpaa2_switch_port_init(port_priv, port_idx);
+	if (err)
+		goto err_port_probe;
+
+	err = dpaa2_switch_port_set_mac_addr(port_priv);
+	if (err)
+		goto err_port_probe;
+
+	return 0;
+
+err_port_probe:
+	free_netdev(port_netdev);
+	ethsw->ports[port_idx] = NULL;
+
+	return err;
+}
+
+static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
+{
+	struct device *dev = &sw_dev->dev;
+	struct ethsw_core *ethsw;
+	int i, err;
+
+	/* Allocate switch core*/
+	ethsw = kzalloc(sizeof(*ethsw), GFP_KERNEL);
+
+	if (!ethsw)
+		return -ENOMEM;
+
+	ethsw->dev = dev;
+	ethsw->iommu_domain = iommu_get_domain_for_dev(dev);
+	dev_set_drvdata(dev, ethsw);
+
+	err = fsl_mc_portal_allocate(sw_dev, FSL_MC_IO_ATOMIC_CONTEXT_PORTAL,
+				     &ethsw->mc_io);
+	if (err) {
+		if (err == -ENXIO)
+			err = -EPROBE_DEFER;
+		else
+			dev_err(dev, "fsl_mc_portal_allocate err %d\n", err);
+		goto err_free_drvdata;
+	}
+
+	err = dpaa2_switch_init(sw_dev);
+	if (err)
+		goto err_free_cmdport;
+
+	ethsw->ports = kcalloc(ethsw->sw_attr.num_ifs, sizeof(*ethsw->ports),
+			       GFP_KERNEL);
+	if (!(ethsw->ports)) {
+		err = -ENOMEM;
+		goto err_takedown;
+	}
+
+	ethsw->fdbs = kcalloc(ethsw->sw_attr.num_ifs, sizeof(*ethsw->fdbs),
+			      GFP_KERNEL);
+	if (!ethsw->fdbs) {
+		err = -ENOMEM;
+		goto err_free_ports;
+	}
+
+	for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
+		err = dpaa2_switch_probe_port(ethsw, i);
+		if (err)
+			goto err_free_netdev;
+	}
+
+	/* Add a NAPI instance for each of the Rx queues. The first port's
+	 * net_device will be associated with the instances since we do not have
+	 * different queues for each switch ports.
+	 */
+	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++)
+		netif_napi_add(ethsw->ports[0]->netdev,
+			       &ethsw->fq[i].napi, dpaa2_switch_poll,
+			       NAPI_POLL_WEIGHT);
+
+	err = dpsw_enable(ethsw->mc_io, 0, ethsw->dpsw_handle);
+	if (err) {
+		dev_err(ethsw->dev, "dpsw_enable err %d\n", err);
+		goto err_free_netdev;
+	}
+
+	/* Setup IRQs */
+	err = dpaa2_switch_setup_irqs(sw_dev);
+	if (err)
+		goto err_stop;
+
+	/* Register the netdev only when the entire setup is done and the
+	 * switch port interfaces are ready to receive traffic
+	 */
+	for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
+		err = register_netdev(ethsw->ports[i]->netdev);
+		if (err < 0) {
+			dev_err(dev, "register_netdev error %d\n", err);
+			goto err_unregister_ports;
+		}
+	}
+
+	return 0;
+
+err_unregister_ports:
+	for (i--; i >= 0; i--)
+		unregister_netdev(ethsw->ports[i]->netdev);
+	dpaa2_switch_teardown_irqs(sw_dev);
+err_stop:
+	dpsw_disable(ethsw->mc_io, 0, ethsw->dpsw_handle);
+err_free_netdev:
+	for (i--; i >= 0; i--)
+		free_netdev(ethsw->ports[i]->netdev);
+	kfree(ethsw->fdbs);
+err_free_ports:
+	kfree(ethsw->ports);
+
+err_takedown:
+	dpaa2_switch_takedown(sw_dev);
+
+err_free_cmdport:
+	fsl_mc_portal_free(ethsw->mc_io);
+
+err_free_drvdata:
+	kfree(ethsw);
+	dev_set_drvdata(dev, NULL);
+
+	return err;
+}
+
+static const struct fsl_mc_device_id dpaa2_switch_match_id_table[] = {
+	{
+		.vendor = FSL_MC_VENDOR_FREESCALE,
+		.obj_type = "dpsw",
+	},
+	{ .vendor = 0x0 }
+};
+MODULE_DEVICE_TABLE(fslmc, dpaa2_switch_match_id_table);
+
+static struct fsl_mc_driver dpaa2_switch_drv = {
+	.driver = {
+		.name = KBUILD_MODNAME,
+		.owner = THIS_MODULE,
+	},
+	.probe = dpaa2_switch_probe,
+	.remove = dpaa2_switch_remove,
+	.match_id_table = dpaa2_switch_match_id_table
+};
+
+static struct notifier_block dpaa2_switch_port_nb __read_mostly = {
+	.notifier_call = dpaa2_switch_port_netdevice_event,
+};
+
+static struct notifier_block dpaa2_switch_port_switchdev_nb = {
+	.notifier_call = dpaa2_switch_port_event,
+};
+
+static struct notifier_block dpaa2_switch_port_switchdev_blocking_nb = {
+	.notifier_call = dpaa2_switch_port_blocking_event,
+};
+
+static int dpaa2_switch_register_notifiers(void)
+{
+	int err;
+
+	err = register_netdevice_notifier(&dpaa2_switch_port_nb);
+	if (err) {
+		pr_err("dpaa2-switch: failed to register net_device notifier (%d)\n", err);
+		return err;
+	}
+
+	err = register_switchdev_notifier(&dpaa2_switch_port_switchdev_nb);
+	if (err) {
+		pr_err("dpaa2-switch: failed to register switchdev notifier (%d)\n", err);
+		goto err_switchdev_nb;
+	}
+
+	err = register_switchdev_blocking_notifier(&dpaa2_switch_port_switchdev_blocking_nb);
+	if (err) {
+		pr_err("dpaa2-switch: failed to register switchdev blocking notifier (%d)\n", err);
+		goto err_switchdev_blocking_nb;
+	}
+
+	return 0;
+
+err_switchdev_blocking_nb:
+	unregister_switchdev_notifier(&dpaa2_switch_port_switchdev_nb);
+err_switchdev_nb:
+	unregister_netdevice_notifier(&dpaa2_switch_port_nb);
+
+	return err;
+}
+
+static void dpaa2_switch_unregister_notifiers(void)
+{
+	int err;
+
+	err = unregister_switchdev_blocking_notifier(&dpaa2_switch_port_switchdev_blocking_nb);
+	if (err)
+		pr_err("dpaa2-switch: failed to unregister switchdev blocking notifier (%d)\n",
+		       err);
+
+	err = unregister_switchdev_notifier(&dpaa2_switch_port_switchdev_nb);
+	if (err)
+		pr_err("dpaa2-switch: failed to unregister switchdev notifier (%d)\n", err);
+
+	err = unregister_netdevice_notifier(&dpaa2_switch_port_nb);
+	if (err)
+		pr_err("dpaa2-switch: failed to unregister net_device notifier (%d)\n", err);
+}
+
+static int __init dpaa2_switch_driver_init(void)
+{
+	int err;
+
+	err = fsl_mc_driver_register(&dpaa2_switch_drv);
+	if (err)
+		return err;
+
+	err = dpaa2_switch_register_notifiers();
+	if (err) {
+		fsl_mc_driver_unregister(&dpaa2_switch_drv);
+		return err;
+	}
+
+	return 0;
+}
+
+static void __exit dpaa2_switch_driver_exit(void)
+{
+	dpaa2_switch_unregister_notifiers();
+	fsl_mc_driver_unregister(&dpaa2_switch_drv);
+}
+
+module_init(dpaa2_switch_driver_init);
+module_exit(dpaa2_switch_driver_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("DPAA2 Ethernet Switch Driver");
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
new file mode 100644
index 000000000000..933563064015
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * DPAA2 Ethernet Switch declarations
+ *
+ * Copyright 2014-2016 Freescale Semiconductor Inc.
+ * Copyright 2017-2021 NXP
+ *
+ */
+
+#ifndef __ETHSW_H
+#define __ETHSW_H
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/if_vlan.h>
+#include <uapi/linux/if_bridge.h>
+#include <net/switchdev.h>
+#include <linux/if_bridge.h>
+#include <linux/fsl/mc.h>
+#include <soc/fsl/dpaa2-io.h>
+
+#include "dpsw.h"
+
+/* Number of IRQs supported */
+#define DPSW_IRQ_NUM	2
+
+/* Port is member of VLAN */
+#define ETHSW_VLAN_MEMBER	1
+/* VLAN to be treated as untagged on egress */
+#define ETHSW_VLAN_UNTAGGED	2
+/* Untagged frames will be assigned to this VLAN */
+#define ETHSW_VLAN_PVID		4
+/* VLAN configured on the switch */
+#define ETHSW_VLAN_GLOBAL	8
+
+/* Maximum Frame Length supported by HW (currently 10k) */
+#define DPAA2_MFL		(10 * 1024)
+#define ETHSW_MAX_FRAME_LENGTH	(DPAA2_MFL - VLAN_ETH_HLEN - ETH_FCS_LEN)
+#define ETHSW_L2_MAX_FRM(mtu)	((mtu) + VLAN_ETH_HLEN + ETH_FCS_LEN)
+
+#define ETHSW_FEATURE_MAC_ADDR	BIT(0)
+
+/* Number of receive queues (one RX and one TX_CONF) */
+#define DPAA2_SWITCH_RX_NUM_FQS	2
+
+/* Hardware requires alignment for ingress/egress buffer addresses */
+#define DPAA2_SWITCH_RX_BUF_RAW_SIZE	PAGE_SIZE
+#define DPAA2_SWITCH_RX_BUF_TAILROOM \
+	SKB_DATA_ALIGN(sizeof(struct skb_shared_info))
+#define DPAA2_SWITCH_RX_BUF_SIZE \
+	(DPAA2_SWITCH_RX_BUF_RAW_SIZE - DPAA2_SWITCH_RX_BUF_TAILROOM)
+
+#define DPAA2_SWITCH_STORE_SIZE 16
+
+/* Buffer management */
+#define BUFS_PER_CMD			7
+#define DPAA2_ETHSW_NUM_BUFS		(1024 * BUFS_PER_CMD)
+#define DPAA2_ETHSW_REFILL_THRESH	(DPAA2_ETHSW_NUM_BUFS * 5 / 6)
+
+/* Number of times to retry DPIO portal operations while waiting
+ * for portal to finish executing current command and become
+ * available. We want to avoid being stuck in a while loop in case
+ * hardware becomes unresponsive, but not give up too easily if
+ * the portal really is busy for valid reasons
+ */
+#define DPAA2_SWITCH_SWP_BUSY_RETRIES		1000
+
+/* Hardware annotation buffer size */
+#define DPAA2_SWITCH_HWA_SIZE			64
+/* Software annotation buffer size */
+#define DPAA2_SWITCH_SWA_SIZE			64
+
+#define DPAA2_SWITCH_TX_BUF_ALIGN		64
+
+#define DPAA2_SWITCH_TX_DATA_OFFSET \
+	(DPAA2_SWITCH_HWA_SIZE + DPAA2_SWITCH_SWA_SIZE)
+
+#define DPAA2_SWITCH_NEEDED_HEADROOM \
+	(DPAA2_SWITCH_TX_DATA_OFFSET + DPAA2_SWITCH_TX_BUF_ALIGN)
+
+extern const struct ethtool_ops dpaa2_switch_port_ethtool_ops;
+
+struct ethsw_core;
+
+struct dpaa2_switch_fq {
+	struct ethsw_core *ethsw;
+	enum dpsw_queue_type type;
+	struct dpaa2_io_store *store;
+	struct dpaa2_io_notification_ctx nctx;
+	struct napi_struct napi;
+	u32 fqid;
+};
+
+struct dpaa2_switch_fdb {
+	struct net_device	*bridge_dev;
+	u16			fdb_id;
+	bool			in_use;
+};
+
+/* Per port private data */
+struct ethsw_port_priv {
+	struct net_device	*netdev;
+	u16			idx;
+	struct ethsw_core	*ethsw_data;
+	u8			link_state;
+	u8			stp_state;
+	bool			flood;
+
+	u8			vlans[VLAN_VID_MASK + 1];
+	u16			pvid;
+	u16			tx_qdid;
+
+	struct dpaa2_switch_fdb	*fdb;
+};
+
+/* Switch data */
+struct ethsw_core {
+	struct device			*dev;
+	struct fsl_mc_io		*mc_io;
+	u16				dpsw_handle;
+	struct dpsw_attr		sw_attr;
+	u16				major, minor;
+	unsigned long			features;
+	int				dev_id;
+	struct ethsw_port_priv		**ports;
+	struct iommu_domain		*iommu_domain;
+
+	u8				vlans[VLAN_VID_MASK + 1];
+
+	struct workqueue_struct		*workqueue;
+
+	struct dpaa2_switch_fq		fq[DPAA2_SWITCH_RX_NUM_FQS];
+	struct fsl_mc_device		*dpbp_dev;
+	int				buf_count;
+	u16				bpid;
+	int				napi_users;
+
+	struct dpaa2_switch_fdb		*fdbs;
+};
+
+static inline bool dpaa2_switch_supports_cpu_traffic(struct ethsw_core *ethsw)
+{
+	if (ethsw->sw_attr.options & DPSW_OPT_CTRL_IF_DIS) {
+		dev_err(ethsw->dev, "Control Interface is disabled, cannot probe\n");
+		return false;
+	}
+
+	if (ethsw->sw_attr.flooding_cfg != DPSW_FLOODING_PER_FDB) {
+		dev_err(ethsw->dev, "Flooding domain is not per FDB, cannot probe\n");
+		return false;
+	}
+
+	if (ethsw->sw_attr.broadcast_cfg != DPSW_BROADCAST_PER_FDB) {
+		dev_err(ethsw->dev, "Broadcast domain is not per FDB, cannot probe\n");
+		return false;
+	}
+
+	if (ethsw->sw_attr.max_fdbs < ethsw->sw_attr.num_ifs) {
+		dev_err(ethsw->dev, "The number of FDBs is lower than the number of ports, cannot probe\n");
+		return false;
+	}
+
+	return true;
+}
+
+bool dpaa2_switch_port_dev_check(const struct net_device *netdev);
+
+int dpaa2_switch_port_vlans_add(struct net_device *netdev,
+				const struct switchdev_obj_port_vlan *vlan);
+
+int dpaa2_switch_port_vlans_del(struct net_device *netdev,
+				const struct switchdev_obj_port_vlan *vlan);
+
+typedef int dpaa2_switch_fdb_cb_t(struct ethsw_port_priv *port_priv,
+				  struct fdb_dump_entry *fdb_entry,
+				  void *data);
+#endif	/* __ETHSW_H */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
new file mode 100644
index 000000000000..eb620e832412
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
@@ -0,0 +1,458 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2014-2016 Freescale Semiconductor Inc.
+ * Copyright 2017-2021 NXP
+ *
+ */
+
+#ifndef __FSL_DPSW_CMD_H
+#define __FSL_DPSW_CMD_H
+
+#include "dpsw.h"
+
+/* DPSW Version */
+#define DPSW_VER_MAJOR		8
+#define DPSW_VER_MINOR		9
+
+#define DPSW_CMD_BASE_VERSION	1
+#define DPSW_CMD_VERSION_2	2
+#define DPSW_CMD_ID_OFFSET	4
+
+#define DPSW_CMD_ID(id)	(((id) << DPSW_CMD_ID_OFFSET) | DPSW_CMD_BASE_VERSION)
+#define DPSW_CMD_V2(id) (((id) << DPSW_CMD_ID_OFFSET) | DPSW_CMD_VERSION_2)
+
+/* Command IDs */
+#define DPSW_CMDID_CLOSE                    DPSW_CMD_ID(0x800)
+#define DPSW_CMDID_OPEN                     DPSW_CMD_ID(0x802)
+
+#define DPSW_CMDID_GET_API_VERSION          DPSW_CMD_ID(0xa02)
+
+#define DPSW_CMDID_ENABLE                   DPSW_CMD_ID(0x002)
+#define DPSW_CMDID_DISABLE                  DPSW_CMD_ID(0x003)
+#define DPSW_CMDID_GET_ATTR                 DPSW_CMD_V2(0x004)
+#define DPSW_CMDID_RESET                    DPSW_CMD_ID(0x005)
+
+#define DPSW_CMDID_SET_IRQ_ENABLE           DPSW_CMD_ID(0x012)
+
+#define DPSW_CMDID_SET_IRQ_MASK             DPSW_CMD_ID(0x014)
+
+#define DPSW_CMDID_GET_IRQ_STATUS           DPSW_CMD_ID(0x016)
+#define DPSW_CMDID_CLEAR_IRQ_STATUS         DPSW_CMD_ID(0x017)
+
+#define DPSW_CMDID_IF_SET_TCI               DPSW_CMD_ID(0x030)
+#define DPSW_CMDID_IF_SET_STP               DPSW_CMD_ID(0x031)
+
+#define DPSW_CMDID_IF_GET_COUNTER           DPSW_CMD_V2(0x034)
+
+#define DPSW_CMDID_IF_ENABLE                DPSW_CMD_ID(0x03D)
+#define DPSW_CMDID_IF_DISABLE               DPSW_CMD_ID(0x03E)
+
+#define DPSW_CMDID_IF_GET_ATTR              DPSW_CMD_ID(0x042)
+
+#define DPSW_CMDID_IF_SET_MAX_FRAME_LENGTH  DPSW_CMD_ID(0x044)
+
+#define DPSW_CMDID_IF_GET_LINK_STATE        DPSW_CMD_ID(0x046)
+
+#define DPSW_CMDID_IF_GET_TCI               DPSW_CMD_ID(0x04A)
+
+#define DPSW_CMDID_IF_SET_LINK_CFG          DPSW_CMD_ID(0x04C)
+
+#define DPSW_CMDID_VLAN_ADD                 DPSW_CMD_ID(0x060)
+#define DPSW_CMDID_VLAN_ADD_IF              DPSW_CMD_V2(0x061)
+#define DPSW_CMDID_VLAN_ADD_IF_UNTAGGED     DPSW_CMD_ID(0x062)
+
+#define DPSW_CMDID_VLAN_REMOVE_IF           DPSW_CMD_ID(0x064)
+#define DPSW_CMDID_VLAN_REMOVE_IF_UNTAGGED  DPSW_CMD_ID(0x065)
+#define DPSW_CMDID_VLAN_REMOVE_IF_FLOODING  DPSW_CMD_ID(0x066)
+#define DPSW_CMDID_VLAN_REMOVE              DPSW_CMD_ID(0x067)
+
+#define DPSW_CMDID_FDB_ADD                  DPSW_CMD_ID(0x082)
+#define DPSW_CMDID_FDB_REMOVE               DPSW_CMD_ID(0x083)
+#define DPSW_CMDID_FDB_ADD_UNICAST          DPSW_CMD_ID(0x084)
+#define DPSW_CMDID_FDB_REMOVE_UNICAST       DPSW_CMD_ID(0x085)
+#define DPSW_CMDID_FDB_ADD_MULTICAST        DPSW_CMD_ID(0x086)
+#define DPSW_CMDID_FDB_REMOVE_MULTICAST     DPSW_CMD_ID(0x087)
+#define DPSW_CMDID_FDB_DUMP                 DPSW_CMD_ID(0x08A)
+
+#define DPSW_CMDID_IF_GET_PORT_MAC_ADDR     DPSW_CMD_ID(0x0A7)
+#define DPSW_CMDID_IF_GET_PRIMARY_MAC_ADDR  DPSW_CMD_ID(0x0A8)
+#define DPSW_CMDID_IF_SET_PRIMARY_MAC_ADDR  DPSW_CMD_ID(0x0A9)
+
+#define DPSW_CMDID_CTRL_IF_GET_ATTR         DPSW_CMD_ID(0x0A0)
+#define DPSW_CMDID_CTRL_IF_SET_POOLS        DPSW_CMD_ID(0x0A1)
+#define DPSW_CMDID_CTRL_IF_ENABLE           DPSW_CMD_ID(0x0A2)
+#define DPSW_CMDID_CTRL_IF_DISABLE          DPSW_CMD_ID(0x0A3)
+#define DPSW_CMDID_CTRL_IF_SET_QUEUE        DPSW_CMD_ID(0x0A6)
+
+#define DPSW_CMDID_SET_EGRESS_FLOOD         DPSW_CMD_ID(0x0AC)
+
+/* Macros for accessing command fields smaller than 1byte */
+#define DPSW_MASK(field)        \
+	GENMASK(DPSW_##field##_SHIFT + DPSW_##field##_SIZE - 1, \
+		DPSW_##field##_SHIFT)
+#define dpsw_set_field(var, field, val) \
+	((var) |= (((val) << DPSW_##field##_SHIFT) & DPSW_MASK(field)))
+#define dpsw_get_field(var, field)      \
+	(((var) & DPSW_MASK(field)) >> DPSW_##field##_SHIFT)
+#define dpsw_get_bit(var, bit) \
+	(((var)  >> (bit)) & GENMASK(0, 0))
+
+#pragma pack(push, 1)
+struct dpsw_cmd_open {
+	__le32 dpsw_id;
+};
+
+#define DPSW_COMPONENT_TYPE_SHIFT	0
+#define DPSW_COMPONENT_TYPE_SIZE	4
+
+struct dpsw_cmd_create {
+	/* cmd word 0 */
+	__le16 num_ifs;
+	u8 max_fdbs;
+	u8 max_meters_per_if;
+	/* from LSB: only the first 4 bits */
+	u8 component_type;
+	u8 pad[3];
+	/* cmd word 1 */
+	__le16 max_vlans;
+	__le16 max_fdb_entries;
+	__le16 fdb_aging_time;
+	__le16 max_fdb_mc_groups;
+	/* cmd word 2 */
+	__le64 options;
+};
+
+struct dpsw_cmd_destroy {
+	__le32 dpsw_id;
+};
+
+#define DPSW_ENABLE_SHIFT 0
+#define DPSW_ENABLE_SIZE  1
+
+struct dpsw_rsp_is_enabled {
+	/* from LSB: enable:1 */
+	u8 enabled;
+};
+
+struct dpsw_cmd_set_irq_enable {
+	u8 enable_state;
+	u8 pad[3];
+	u8 irq_index;
+};
+
+struct dpsw_cmd_get_irq_enable {
+	__le32 pad;
+	u8 irq_index;
+};
+
+struct dpsw_rsp_get_irq_enable {
+	u8 enable_state;
+};
+
+struct dpsw_cmd_set_irq_mask {
+	__le32 mask;
+	u8 irq_index;
+};
+
+struct dpsw_cmd_get_irq_mask {
+	__le32 pad;
+	u8 irq_index;
+};
+
+struct dpsw_rsp_get_irq_mask {
+	__le32 mask;
+};
+
+struct dpsw_cmd_get_irq_status {
+	__le32 status;
+	u8 irq_index;
+};
+
+struct dpsw_rsp_get_irq_status {
+	__le32 status;
+};
+
+struct dpsw_cmd_clear_irq_status {
+	__le32 status;
+	u8 irq_index;
+};
+
+#define DPSW_COMPONENT_TYPE_SHIFT	0
+#define DPSW_COMPONENT_TYPE_SIZE	4
+
+#define DPSW_FLOODING_CFG_SHIFT		0
+#define DPSW_FLOODING_CFG_SIZE		4
+
+#define DPSW_BROADCAST_CFG_SHIFT	4
+#define DPSW_BROADCAST_CFG_SIZE		4
+
+struct dpsw_rsp_get_attr {
+	/* cmd word 0 */
+	__le16 num_ifs;
+	u8 max_fdbs;
+	u8 num_fdbs;
+	__le16 max_vlans;
+	__le16 num_vlans;
+	/* cmd word 1 */
+	__le16 max_fdb_entries;
+	__le16 fdb_aging_time;
+	__le32 dpsw_id;
+	/* cmd word 2 */
+	__le16 mem_size;
+	__le16 max_fdb_mc_groups;
+	u8 max_meters_per_if;
+	/* from LSB only the first 4 bits */
+	u8 component_type;
+	/* [0:3] - flooding configuration
+	 * [4:7] - broadcast configuration
+	 */
+	u8 repl_cfg;
+	u8 pad;
+	/* cmd word 3 */
+	__le64 options;
+};
+
+#define DPSW_VLAN_ID_SHIFT	0
+#define DPSW_VLAN_ID_SIZE	12
+#define DPSW_DEI_SHIFT		12
+#define DPSW_DEI_SIZE		1
+#define DPSW_PCP_SHIFT		13
+#define DPSW_PCP_SIZE		3
+
+struct dpsw_cmd_if_set_tci {
+	__le16 if_id;
+	/* from LSB: VLAN_ID:12 DEI:1 PCP:3 */
+	__le16 conf;
+};
+
+struct dpsw_cmd_if_get_tci {
+	__le16 if_id;
+};
+
+struct dpsw_rsp_if_get_tci {
+	__le16 pad;
+	__le16 vlan_id;
+	u8 dei;
+	u8 pcp;
+};
+
+#define DPSW_STATE_SHIFT	0
+#define DPSW_STATE_SIZE		4
+
+struct dpsw_cmd_if_set_stp {
+	__le16 if_id;
+	__le16 vlan_id;
+	/* only the first LSB 4 bits */
+	u8 state;
+};
+
+#define DPSW_COUNTER_TYPE_SHIFT		0
+#define DPSW_COUNTER_TYPE_SIZE		5
+
+struct dpsw_cmd_if_get_counter {
+	__le16 if_id;
+	/* from LSB: type:5 */
+	u8 type;
+};
+
+struct dpsw_rsp_if_get_counter {
+	__le64 pad;
+	__le64 counter;
+};
+
+struct dpsw_cmd_if {
+	__le16 if_id;
+};
+
+#define DPSW_ADMIT_UNTAGGED_SHIFT	0
+#define DPSW_ADMIT_UNTAGGED_SIZE	4
+#define DPSW_ENABLED_SHIFT		5
+#define DPSW_ENABLED_SIZE		1
+#define DPSW_ACCEPT_ALL_VLAN_SHIFT	6
+#define DPSW_ACCEPT_ALL_VLAN_SIZE	1
+
+struct dpsw_rsp_if_get_attr {
+	/* cmd word 0 */
+	/* from LSB: admit_untagged:4 enabled:1 accept_all_vlan:1 */
+	u8 conf;
+	u8 pad1;
+	u8 num_tcs;
+	u8 pad2;
+	__le16 qdid;
+	/* cmd word 1 */
+	__le32 options;
+	__le32 pad3;
+	/* cmd word 2 */
+	__le32 rate;
+};
+
+struct dpsw_cmd_if_set_max_frame_length {
+	__le16 if_id;
+	__le16 frame_length;
+};
+
+struct dpsw_cmd_if_set_link_cfg {
+	/* cmd word 0 */
+	__le16 if_id;
+	u8 pad[6];
+	/* cmd word 1 */
+	__le32 rate;
+	__le32 pad1;
+	/* cmd word 2 */
+	__le64 options;
+};
+
+struct dpsw_cmd_if_get_link_state {
+	__le16 if_id;
+};
+
+#define DPSW_UP_SHIFT	0
+#define DPSW_UP_SIZE	1
+
+struct dpsw_rsp_if_get_link_state {
+	/* cmd word 0 */
+	__le32 pad0;
+	u8 up;
+	u8 pad1[3];
+	/* cmd word 1 */
+	__le32 rate;
+	__le32 pad2;
+	/* cmd word 2 */
+	__le64 options;
+};
+
+struct dpsw_vlan_add {
+	__le16 fdb_id;
+	__le16 vlan_id;
+};
+
+struct dpsw_cmd_vlan_add_if {
+	/* cmd word 0 */
+	__le16 options;
+	__le16 vlan_id;
+	__le16 fdb_id;
+	__le16 pad0;
+	/* cmd word 1-4 */
+	__le64 if_id;
+};
+
+struct dpsw_cmd_vlan_manage_if {
+	/* cmd word 0 */
+	__le16 pad0;
+	__le16 vlan_id;
+	__le32 pad1;
+	/* cmd word 1-4 */
+	__le64 if_id[4];
+};
+
+struct dpsw_cmd_vlan_remove {
+	__le16 pad;
+	__le16 vlan_id;
+};
+
+struct dpsw_cmd_fdb_add {
+	__le32 pad;
+	__le16 fdb_ageing_time;
+	__le16 num_fdb_entries;
+};
+
+struct dpsw_rsp_fdb_add {
+	__le16 fdb_id;
+};
+
+struct dpsw_cmd_fdb_remove {
+	__le16 fdb_id;
+};
+
+#define DPSW_ENTRY_TYPE_SHIFT	0
+#define DPSW_ENTRY_TYPE_SIZE	4
+
+struct dpsw_cmd_fdb_unicast_op {
+	/* cmd word 0 */
+	__le16 fdb_id;
+	u8 mac_addr[6];
+	/* cmd word 1 */
+	__le16 if_egress;
+	/* only the first 4 bits from LSB */
+	u8 type;
+};
+
+struct dpsw_cmd_fdb_multicast_op {
+	/* cmd word 0 */
+	__le16 fdb_id;
+	__le16 num_ifs;
+	/* only the first 4 bits from LSB */
+	u8 type;
+	u8 pad[3];
+	/* cmd word 1 */
+	u8 mac_addr[6];
+	__le16 pad2;
+	/* cmd word 2-5 */
+	__le64 if_id[4];
+};
+
+struct dpsw_cmd_fdb_dump {
+	__le16 fdb_id;
+	__le16 pad0;
+	__le32 pad1;
+	__le64 iova_addr;
+	__le32 iova_size;
+};
+
+struct dpsw_rsp_fdb_dump {
+	__le16 num_entries;
+};
+
+struct dpsw_rsp_ctrl_if_get_attr {
+	__le64 pad;
+	__le32 rx_fqid;
+	__le32 rx_err_fqid;
+	__le32 tx_err_conf_fqid;
+};
+
+#define DPSW_BACKUP_POOL(val, order)	(((val) & 0x1) << (order))
+struct dpsw_cmd_ctrl_if_set_pools {
+	u8 num_dpbp;
+	u8 backup_pool_mask;
+	__le16 pad;
+	__le32 dpbp_id[DPSW_MAX_DPBP];
+	__le16 buffer_size[DPSW_MAX_DPBP];
+};
+
+#define DPSW_DEST_TYPE_SHIFT	0
+#define DPSW_DEST_TYPE_SIZE	4
+
+struct dpsw_cmd_ctrl_if_set_queue {
+	__le32 dest_id;
+	u8 dest_priority;
+	u8 pad;
+	/* from LSB: dest_type:4 */
+	u8 dest_type;
+	u8 qtype;
+	__le64 user_ctx;
+	__le32 options;
+};
+
+struct dpsw_rsp_get_api_version {
+	__le16 version_major;
+	__le16 version_minor;
+};
+
+struct dpsw_rsp_if_get_mac_addr {
+	__le16 pad;
+	u8 mac_addr[6];
+};
+
+struct dpsw_cmd_if_set_mac_addr {
+	__le16 if_id;
+	u8 mac_addr[6];
+};
+
+struct dpsw_cmd_set_egress_flood {
+	__le16 fdb_id;
+	u8 flood_type;
+	u8 pad[5];
+	__le64 if_id;
+};
+#pragma pack(pop)
+#endif /* __FSL_DPSW_CMD_H */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.c b/drivers/net/ethernet/freescale/dpaa2/dpsw.c
new file mode 100644
index 000000000000..5189f156100e
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.c
@@ -0,0 +1,1486 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2014-2016 Freescale Semiconductor Inc.
+ * Copyright 2017-2021 NXP
+ *
+ */
+
+#include <linux/fsl/mc.h>
+#include "dpsw.h"
+#include "dpsw-cmd.h"
+
+static void build_if_id_bitmap(__le64 *bmap,
+			       const u16 *id,
+			       const u16 num_ifs)
+{
+	int i;
+
+	for (i = 0; (i < num_ifs) && (i < DPSW_MAX_IF); i++) {
+		if (id[i] < DPSW_MAX_IF)
+			bmap[id[i] / 64] |= cpu_to_le64(BIT_MASK(id[i] % 64));
+	}
+}
+
+/**
+ * dpsw_open() - Open a control session for the specified object
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @dpsw_id:	DPSW unique ID
+ * @token:	Returned token; use in subsequent API calls
+ *
+ * This function can be used to open a control session for an
+ * already created object; an object may have been declared in
+ * the DPL or by calling the dpsw_create() function.
+ * This function returns a unique authentication token,
+ * associated with the specific object ID and the specific MC
+ * portal; this token must be used in all subsequent commands for
+ * this specific object
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_open(struct fsl_mc_io *mc_io,
+	      u32 cmd_flags,
+	      int dpsw_id,
+	      u16 *token)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_open *cmd_params;
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_OPEN,
+					  cmd_flags,
+					  0);
+	cmd_params = (struct dpsw_cmd_open *)cmd.params;
+	cmd_params->dpsw_id = cpu_to_le32(dpsw_id);
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	*token = mc_cmd_hdr_read_token(&cmd);
+
+	return 0;
+}
+
+/**
+ * dpsw_close() - Close the control session of the object
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ *
+ * After this function is called, no further operations are
+ * allowed on the object without opening a new control session.
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_close(struct fsl_mc_io *mc_io,
+	       u32 cmd_flags,
+	       u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_CLOSE,
+					  cmd_flags,
+					  token);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_enable() - Enable DPSW functionality
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_enable(struct fsl_mc_io *mc_io,
+		u32 cmd_flags,
+		u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_ENABLE,
+					  cmd_flags,
+					  token);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_disable() - Disable DPSW functionality
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_disable(struct fsl_mc_io *mc_io,
+		 u32 cmd_flags,
+		 u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_DISABLE,
+					  cmd_flags,
+					  token);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_reset() - Reset the DPSW, returns the object to initial state.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_reset(struct fsl_mc_io *mc_io,
+	       u32 cmd_flags,
+	       u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_RESET,
+					  cmd_flags,
+					  token);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_set_irq_enable() - Set overall interrupt state.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPCI object
+ * @irq_index:	The interrupt index to configure
+ * @en:		Interrupt state - enable = 1, disable = 0
+ *
+ * Allows GPP software to control when interrupts are generated.
+ * Each interrupt can have up to 32 causes.  The enable/disable control's the
+ * overall interrupt state. if the interrupt is disabled no causes will cause
+ * an interrupt
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_set_irq_enable(struct fsl_mc_io *mc_io,
+			u32 cmd_flags,
+			u16 token,
+			u8 irq_index,
+			u8 en)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_set_irq_enable *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_SET_IRQ_ENABLE,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_set_irq_enable *)cmd.params;
+	dpsw_set_field(cmd_params->enable_state, ENABLE, en);
+	cmd_params->irq_index = irq_index;
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_set_irq_mask() - Set interrupt mask.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPCI object
+ * @irq_index:	The interrupt index to configure
+ * @mask:	Event mask to trigger interrupt;
+ *		each bit:
+ *			0 = ignore event
+ *			1 = consider event for asserting IRQ
+ *
+ * Every interrupt can have up to 32 causes and the interrupt model supports
+ * masking/unmasking each cause independently
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_set_irq_mask(struct fsl_mc_io *mc_io,
+		      u32 cmd_flags,
+		      u16 token,
+		      u8 irq_index,
+		      u32 mask)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_set_irq_mask *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_SET_IRQ_MASK,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_set_irq_mask *)cmd.params;
+	cmd_params->mask = cpu_to_le32(mask);
+	cmd_params->irq_index = irq_index;
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_get_irq_status() - Get the current status of any pending interrupts
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @irq_index:	The interrupt index to configure
+ * @status:	Returned interrupts status - one bit per cause:
+ *			0 = no interrupt pending
+ *			1 = interrupt pending
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_get_irq_status(struct fsl_mc_io *mc_io,
+			u32 cmd_flags,
+			u16 token,
+			u8 irq_index,
+			u32 *status)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_get_irq_status *cmd_params;
+	struct dpsw_rsp_get_irq_status *rsp_params;
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_GET_IRQ_STATUS,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_get_irq_status *)cmd.params;
+	cmd_params->status = cpu_to_le32(*status);
+	cmd_params->irq_index = irq_index;
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpsw_rsp_get_irq_status *)cmd.params;
+	*status = le32_to_cpu(rsp_params->status);
+
+	return 0;
+}
+
+/**
+ * dpsw_clear_irq_status() - Clear a pending interrupt's status
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPCI object
+ * @irq_index:	The interrupt index to configure
+ * @status:	bits to clear (W1C) - one bit per cause:
+ *			0 = don't change
+ *			1 = clear status bit
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_clear_irq_status(struct fsl_mc_io *mc_io,
+			  u32 cmd_flags,
+			  u16 token,
+			  u8 irq_index,
+			  u32 status)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_clear_irq_status *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_CLEAR_IRQ_STATUS,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_clear_irq_status *)cmd.params;
+	cmd_params->status = cpu_to_le32(status);
+	cmd_params->irq_index = irq_index;
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_get_attributes() - Retrieve DPSW attributes
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @attr:	Returned DPSW attributes
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_get_attributes(struct fsl_mc_io *mc_io,
+			u32 cmd_flags,
+			u16 token,
+			struct dpsw_attr *attr)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_rsp_get_attr *rsp_params;
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_GET_ATTR,
+					  cmd_flags,
+					  token);
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpsw_rsp_get_attr *)cmd.params;
+	attr->num_ifs = le16_to_cpu(rsp_params->num_ifs);
+	attr->max_fdbs = rsp_params->max_fdbs;
+	attr->num_fdbs = rsp_params->num_fdbs;
+	attr->max_vlans = le16_to_cpu(rsp_params->max_vlans);
+	attr->num_vlans = le16_to_cpu(rsp_params->num_vlans);
+	attr->max_fdb_entries = le16_to_cpu(rsp_params->max_fdb_entries);
+	attr->fdb_aging_time = le16_to_cpu(rsp_params->fdb_aging_time);
+	attr->id = le32_to_cpu(rsp_params->dpsw_id);
+	attr->mem_size = le16_to_cpu(rsp_params->mem_size);
+	attr->max_fdb_mc_groups = le16_to_cpu(rsp_params->max_fdb_mc_groups);
+	attr->max_meters_per_if = rsp_params->max_meters_per_if;
+	attr->options = le64_to_cpu(rsp_params->options);
+	attr->component_type = dpsw_get_field(rsp_params->component_type, COMPONENT_TYPE);
+	attr->flooding_cfg = dpsw_get_field(rsp_params->repl_cfg, FLOODING_CFG);
+	attr->broadcast_cfg = dpsw_get_field(rsp_params->repl_cfg, BROADCAST_CFG);
+	return 0;
+}
+
+/**
+ * dpsw_if_set_link_cfg() - Set the link configuration.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @if_id:	Interface id
+ * @cfg:	Link configuration
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_if_set_link_cfg(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token,
+			 u16 if_id,
+			 struct dpsw_link_cfg *cfg)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_if_set_link_cfg *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_LINK_CFG,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_if_set_link_cfg *)cmd.params;
+	cmd_params->if_id = cpu_to_le16(if_id);
+	cmd_params->rate = cpu_to_le32(cfg->rate);
+	cmd_params->options = cpu_to_le64(cfg->options);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_if_get_link_state - Return the link state
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @if_id:	Interface id
+ * @state:	Link state	1 - linkup, 0 - link down or disconnected
+ *
+ * @Return	'0' on Success; Error code otherwise.
+ */
+int dpsw_if_get_link_state(struct fsl_mc_io *mc_io,
+			   u32 cmd_flags,
+			   u16 token,
+			   u16 if_id,
+			   struct dpsw_link_state *state)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_if_get_link_state *cmd_params;
+	struct dpsw_rsp_if_get_link_state *rsp_params;
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_GET_LINK_STATE,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_if_get_link_state *)cmd.params;
+	cmd_params->if_id = cpu_to_le16(if_id);
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpsw_rsp_if_get_link_state *)cmd.params;
+	state->rate = le32_to_cpu(rsp_params->rate);
+	state->options = le64_to_cpu(rsp_params->options);
+	state->up = dpsw_get_field(rsp_params->up, UP);
+
+	return 0;
+}
+
+/**
+ * dpsw_if_set_tci() - Set default VLAN Tag Control Information (TCI)
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @if_id:	Interface Identifier
+ * @cfg:	Tag Control Information Configuration
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_if_set_tci(struct fsl_mc_io *mc_io,
+		    u32 cmd_flags,
+		    u16 token,
+		    u16 if_id,
+		    const struct dpsw_tci_cfg *cfg)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_if_set_tci *cmd_params;
+	u16 tmp_conf = 0;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_TCI,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_if_set_tci *)cmd.params;
+	cmd_params->if_id = cpu_to_le16(if_id);
+	dpsw_set_field(tmp_conf, VLAN_ID, cfg->vlan_id);
+	dpsw_set_field(tmp_conf, DEI, cfg->dei);
+	dpsw_set_field(tmp_conf, PCP, cfg->pcp);
+	cmd_params->conf = cpu_to_le16(tmp_conf);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_if_get_tci() - Get default VLAN Tag Control Information (TCI)
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @if_id:	Interface Identifier
+ * @cfg:	Tag Control Information Configuration
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_if_get_tci(struct fsl_mc_io *mc_io,
+		    u32 cmd_flags,
+		    u16 token,
+		    u16 if_id,
+		    struct dpsw_tci_cfg *cfg)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_if_get_tci *cmd_params;
+	struct dpsw_rsp_if_get_tci *rsp_params;
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_GET_TCI,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_if_get_tci *)cmd.params;
+	cmd_params->if_id = cpu_to_le16(if_id);
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpsw_rsp_if_get_tci *)cmd.params;
+	cfg->pcp = rsp_params->pcp;
+	cfg->dei = rsp_params->dei;
+	cfg->vlan_id = le16_to_cpu(rsp_params->vlan_id);
+
+	return 0;
+}
+
+/**
+ * dpsw_if_set_stp() - Function sets Spanning Tree Protocol (STP) state.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @if_id:	Interface Identifier
+ * @cfg:	STP State configuration parameters
+ *
+ * The following STP states are supported -
+ * blocking, listening, learning, forwarding and disabled.
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_if_set_stp(struct fsl_mc_io *mc_io,
+		    u32 cmd_flags,
+		    u16 token,
+		    u16 if_id,
+		    const struct dpsw_stp_cfg *cfg)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_if_set_stp *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_STP,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_if_set_stp *)cmd.params;
+	cmd_params->if_id = cpu_to_le16(if_id);
+	cmd_params->vlan_id = cpu_to_le16(cfg->vlan_id);
+	dpsw_set_field(cmd_params->state, STATE, cfg->state);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_if_get_counter() - Get specific counter of particular interface
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @if_id:	Interface Identifier
+ * @type:	Counter type
+ * @counter:	return value
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_if_get_counter(struct fsl_mc_io *mc_io,
+			u32 cmd_flags,
+			u16 token,
+			u16 if_id,
+			enum dpsw_counter type,
+			u64 *counter)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_if_get_counter *cmd_params;
+	struct dpsw_rsp_if_get_counter *rsp_params;
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_GET_COUNTER,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_if_get_counter *)cmd.params;
+	cmd_params->if_id = cpu_to_le16(if_id);
+	dpsw_set_field(cmd_params->type, COUNTER_TYPE, type);
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpsw_rsp_if_get_counter *)cmd.params;
+	*counter = le64_to_cpu(rsp_params->counter);
+
+	return 0;
+}
+
+/**
+ * dpsw_if_enable() - Enable Interface
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @if_id:	Interface Identifier
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_if_enable(struct fsl_mc_io *mc_io,
+		   u32 cmd_flags,
+		   u16 token,
+		   u16 if_id)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_if *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_ENABLE,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_if *)cmd.params;
+	cmd_params->if_id = cpu_to_le16(if_id);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_if_disable() - Disable Interface
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @if_id:	Interface Identifier
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_if_disable(struct fsl_mc_io *mc_io,
+		    u32 cmd_flags,
+		    u16 token,
+		    u16 if_id)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_if *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_DISABLE,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_if *)cmd.params;
+	cmd_params->if_id = cpu_to_le16(if_id);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_if_get_attributes() - Function obtains attributes of interface
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @if_id:	Interface Identifier
+ * @attr:	Returned interface attributes
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_if_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			   u16 if_id, struct dpsw_if_attr *attr)
+{
+	struct dpsw_rsp_if_get_attr *rsp_params;
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_if *cmd_params;
+	int err;
+
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_GET_ATTR, cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_if *)cmd.params;
+	cmd_params->if_id = cpu_to_le16(if_id);
+
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	rsp_params = (struct dpsw_rsp_if_get_attr *)cmd.params;
+	attr->num_tcs = rsp_params->num_tcs;
+	attr->rate = le32_to_cpu(rsp_params->rate);
+	attr->options = le32_to_cpu(rsp_params->options);
+	attr->qdid = le16_to_cpu(rsp_params->qdid);
+	attr->enabled = dpsw_get_field(rsp_params->conf, ENABLED);
+	attr->accept_all_vlan = dpsw_get_field(rsp_params->conf,
+					       ACCEPT_ALL_VLAN);
+	attr->admit_untagged = dpsw_get_field(rsp_params->conf,
+					      ADMIT_UNTAGGED);
+
+	return 0;
+}
+
+/**
+ * dpsw_if_set_max_frame_length() - Set Maximum Receive frame length.
+ * @mc_io:		Pointer to MC portal's I/O object
+ * @cmd_flags:		Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:		Token of DPSW object
+ * @if_id:		Interface Identifier
+ * @frame_length:	Maximum Frame Length
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_if_set_max_frame_length(struct fsl_mc_io *mc_io,
+				 u32 cmd_flags,
+				 u16 token,
+				 u16 if_id,
+				 u16 frame_length)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_if_set_max_frame_length *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_MAX_FRAME_LENGTH,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_if_set_max_frame_length *)cmd.params;
+	cmd_params->if_id = cpu_to_le16(if_id);
+	cmd_params->frame_length = cpu_to_le16(frame_length);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_vlan_add() - Adding new VLAN to DPSW.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @vlan_id:	VLAN Identifier
+ * @cfg:	VLAN configuration
+ *
+ * Only VLAN ID and FDB ID are required parameters here.
+ * 12 bit VLAN ID is defined in IEEE802.1Q.
+ * Adding a duplicate VLAN ID is not allowed.
+ * FDB ID can be shared across multiple VLANs. Shared learning
+ * is obtained by calling dpsw_vlan_add for multiple VLAN IDs
+ * with same fdb_id
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_vlan_add(struct fsl_mc_io *mc_io,
+		  u32 cmd_flags,
+		  u16 token,
+		  u16 vlan_id,
+		  const struct dpsw_vlan_cfg *cfg)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_vlan_add *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_ADD,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_vlan_add *)cmd.params;
+	cmd_params->fdb_id = cpu_to_le16(cfg->fdb_id);
+	cmd_params->vlan_id = cpu_to_le16(vlan_id);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_vlan_add_if() - Adding a set of interfaces to an existing VLAN.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @vlan_id:	VLAN Identifier
+ * @cfg:	Set of interfaces to add
+ *
+ * It adds only interfaces not belonging to this VLAN yet,
+ * otherwise an error is generated and an entire command is
+ * ignored. This function can be called numerous times always
+ * providing required interfaces delta.
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_vlan_add_if(struct fsl_mc_io *mc_io,
+		     u32 cmd_flags,
+		     u16 token,
+		     u16 vlan_id,
+		     const struct dpsw_vlan_if_cfg *cfg)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_vlan_manage_if *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_ADD_IF,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_vlan_manage_if *)cmd.params;
+	cmd_params->vlan_id = cpu_to_le16(vlan_id);
+	build_if_id_bitmap(cmd_params->if_id, cfg->if_id, cfg->num_ifs);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_vlan_add_if_untagged() - Defining a set of interfaces that should be
+ *				transmitted as untagged.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @vlan_id:	VLAN Identifier
+ * @cfg:	Set of interfaces that should be transmitted as untagged
+ *
+ * These interfaces should already belong to this VLAN.
+ * By default all interfaces are transmitted as tagged.
+ * Providing un-existing interface or untagged interface that is
+ * configured untagged already generates an error and the entire
+ * command is ignored.
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_vlan_add_if_untagged(struct fsl_mc_io *mc_io,
+			      u32 cmd_flags,
+			      u16 token,
+			      u16 vlan_id,
+			      const struct dpsw_vlan_if_cfg *cfg)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_vlan_manage_if *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_ADD_IF_UNTAGGED,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_vlan_manage_if *)cmd.params;
+	cmd_params->vlan_id = cpu_to_le16(vlan_id);
+	build_if_id_bitmap(cmd_params->if_id, cfg->if_id, cfg->num_ifs);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_vlan_remove_if() - Remove interfaces from an existing VLAN.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @vlan_id:	VLAN Identifier
+ * @cfg:	Set of interfaces that should be removed
+ *
+ * Interfaces must belong to this VLAN, otherwise an error
+ * is returned and an the command is ignored
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_vlan_remove_if(struct fsl_mc_io *mc_io,
+			u32 cmd_flags,
+			u16 token,
+			u16 vlan_id,
+			const struct dpsw_vlan_if_cfg *cfg)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_vlan_manage_if *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_REMOVE_IF,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_vlan_manage_if *)cmd.params;
+	cmd_params->vlan_id = cpu_to_le16(vlan_id);
+	build_if_id_bitmap(cmd_params->if_id, cfg->if_id, cfg->num_ifs);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_vlan_remove_if_untagged() - Define a set of interfaces that should be
+ *		converted from transmitted as untagged to transmit as tagged.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @vlan_id:	VLAN Identifier
+ * @cfg:	Set of interfaces that should be removed
+ *
+ * Interfaces provided by API have to belong to this VLAN and
+ * configured untagged, otherwise an error is returned and the
+ * command is ignored
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_vlan_remove_if_untagged(struct fsl_mc_io *mc_io,
+				 u32 cmd_flags,
+				 u16 token,
+				 u16 vlan_id,
+				 const struct dpsw_vlan_if_cfg *cfg)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_vlan_manage_if *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_REMOVE_IF_UNTAGGED,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_vlan_manage_if *)cmd.params;
+	cmd_params->vlan_id = cpu_to_le16(vlan_id);
+	build_if_id_bitmap(cmd_params->if_id, cfg->if_id, cfg->num_ifs);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_vlan_remove() - Remove an entire VLAN
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @vlan_id:	VLAN Identifier
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_vlan_remove(struct fsl_mc_io *mc_io,
+		     u32 cmd_flags,
+		     u16 token,
+		     u16 vlan_id)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_vlan_remove *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_REMOVE,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_vlan_remove *)cmd.params;
+	cmd_params->vlan_id = cpu_to_le16(vlan_id);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_fdb_add() - Add FDB to switch and Returns handle to FDB table for
+ *		the reference
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @fdb_id:	Returned Forwarding Database Identifier
+ * @cfg:	FDB Configuration
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_fdb_add(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 *fdb_id,
+		 const struct dpsw_fdb_cfg *cfg)
+{
+	struct dpsw_cmd_fdb_add *cmd_params;
+	struct dpsw_rsp_fdb_add *rsp_params;
+	struct fsl_mc_command cmd = { 0 };
+	int err;
+
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_ADD,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_fdb_add *)cmd.params;
+	cmd_params->fdb_ageing_time = cpu_to_le16(cfg->fdb_ageing_time);
+	cmd_params->num_fdb_entries = cpu_to_le16(cfg->num_fdb_entries);
+
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	rsp_params = (struct dpsw_rsp_fdb_add *)cmd.params;
+	*fdb_id = le16_to_cpu(rsp_params->fdb_id);
+
+	return 0;
+}
+
+/**
+ * dpsw_fdb_remove() - Remove FDB from switch
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @fdb_id:	Forwarding Database Identifier
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_fdb_remove(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 fdb_id)
+{
+	struct dpsw_cmd_fdb_remove *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_REMOVE,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_fdb_remove *)cmd.params;
+	cmd_params->fdb_id = cpu_to_le16(fdb_id);
+
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_fdb_add_unicast() - Function adds an unicast entry into MAC lookup table
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @fdb_id:	Forwarding Database Identifier
+ * @cfg:	Unicast entry configuration
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_fdb_add_unicast(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token,
+			 u16 fdb_id,
+			 const struct dpsw_fdb_unicast_cfg *cfg)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_fdb_unicast_op *cmd_params;
+	int i;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_ADD_UNICAST,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_fdb_unicast_op *)cmd.params;
+	cmd_params->fdb_id = cpu_to_le16(fdb_id);
+	cmd_params->if_egress = cpu_to_le16(cfg->if_egress);
+	for (i = 0; i < 6; i++)
+		cmd_params->mac_addr[i] = cfg->mac_addr[5 - i];
+	dpsw_set_field(cmd_params->type, ENTRY_TYPE, cfg->type);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_fdb_dump() - Dump the content of FDB table into memory.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @fdb_id:	Forwarding Database Identifier
+ * @iova_addr:	Data will be stored here as an array of struct fdb_dump_entry
+ * @iova_size:	Memory size allocated at iova_addr
+ * @num_entries:Number of entries written at iova_addr
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ *
+ * The memory allocated at iova_addr must be initialized with zero before
+ * command execution. If the FDB table does not fit into memory MC will stop
+ * after the memory is filled up.
+ * The struct fdb_dump_entry array must be parsed until the end of memory
+ * area or until an entry with mac_addr set to zero is found.
+ */
+int dpsw_fdb_dump(struct fsl_mc_io *mc_io,
+		  u32 cmd_flags,
+		  u16 token,
+		  u16 fdb_id,
+		  u64 iova_addr,
+		  u32 iova_size,
+		  u16 *num_entries)
+{
+	struct dpsw_cmd_fdb_dump *cmd_params;
+	struct dpsw_rsp_fdb_dump *rsp_params;
+	struct fsl_mc_command cmd = { 0 };
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_DUMP,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_fdb_dump *)cmd.params;
+	cmd_params->fdb_id = cpu_to_le16(fdb_id);
+	cmd_params->iova_addr = cpu_to_le64(iova_addr);
+	cmd_params->iova_size = cpu_to_le32(iova_size);
+
+	/* send command to mc */
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	rsp_params = (struct dpsw_rsp_fdb_dump *)cmd.params;
+	*num_entries = le16_to_cpu(rsp_params->num_entries);
+
+	return 0;
+}
+
+/**
+ * dpsw_fdb_remove_unicast() - removes an entry from MAC lookup table
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @fdb_id:	Forwarding Database Identifier
+ * @cfg:	Unicast entry configuration
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_fdb_remove_unicast(struct fsl_mc_io *mc_io,
+			    u32 cmd_flags,
+			    u16 token,
+			    u16 fdb_id,
+			    const struct dpsw_fdb_unicast_cfg *cfg)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_fdb_unicast_op *cmd_params;
+	int i;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_REMOVE_UNICAST,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_fdb_unicast_op *)cmd.params;
+	cmd_params->fdb_id = cpu_to_le16(fdb_id);
+	for (i = 0; i < 6; i++)
+		cmd_params->mac_addr[i] = cfg->mac_addr[5 - i];
+	cmd_params->if_egress = cpu_to_le16(cfg->if_egress);
+	dpsw_set_field(cmd_params->type, ENTRY_TYPE, cfg->type);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_fdb_add_multicast() - Add a set of egress interfaces to multi-cast group
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @fdb_id:	Forwarding Database Identifier
+ * @cfg:	Multicast entry configuration
+ *
+ * If group doesn't exist, it will be created.
+ * It adds only interfaces not belonging to this multicast group
+ * yet, otherwise error will be generated and the command is
+ * ignored.
+ * This function may be called numerous times always providing
+ * required interfaces delta.
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_fdb_add_multicast(struct fsl_mc_io *mc_io,
+			   u32 cmd_flags,
+			   u16 token,
+			   u16 fdb_id,
+			   const struct dpsw_fdb_multicast_cfg *cfg)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_fdb_multicast_op *cmd_params;
+	int i;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_ADD_MULTICAST,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_fdb_multicast_op *)cmd.params;
+	cmd_params->fdb_id = cpu_to_le16(fdb_id);
+	cmd_params->num_ifs = cpu_to_le16(cfg->num_ifs);
+	dpsw_set_field(cmd_params->type, ENTRY_TYPE, cfg->type);
+	build_if_id_bitmap(cmd_params->if_id, cfg->if_id, cfg->num_ifs);
+	for (i = 0; i < 6; i++)
+		cmd_params->mac_addr[i] = cfg->mac_addr[5 - i];
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_fdb_remove_multicast() - Removing interfaces from an existing multicast
+ *				group.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @fdb_id:	Forwarding Database Identifier
+ * @cfg:	Multicast entry configuration
+ *
+ * Interfaces provided by this API have to exist in the group,
+ * otherwise an error will be returned and an entire command
+ * ignored. If there is no interface left in the group,
+ * an entire group is deleted
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_fdb_remove_multicast(struct fsl_mc_io *mc_io,
+			      u32 cmd_flags,
+			      u16 token,
+			      u16 fdb_id,
+			      const struct dpsw_fdb_multicast_cfg *cfg)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_fdb_multicast_op *cmd_params;
+	int i;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_REMOVE_MULTICAST,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_fdb_multicast_op *)cmd.params;
+	cmd_params->fdb_id = cpu_to_le16(fdb_id);
+	cmd_params->num_ifs = cpu_to_le16(cfg->num_ifs);
+	dpsw_set_field(cmd_params->type, ENTRY_TYPE, cfg->type);
+	build_if_id_bitmap(cmd_params->if_id, cfg->if_id, cfg->num_ifs);
+	for (i = 0; i < 6; i++)
+		cmd_params->mac_addr[i] = cfg->mac_addr[5 - i];
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_ctrl_if_get_attributes() - Obtain control interface attributes
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @attr:	Returned control interface attributes
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_ctrl_if_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags,
+				u16 token, struct dpsw_ctrl_if_attr *attr)
+{
+	struct dpsw_rsp_ctrl_if_get_attr *rsp_params;
+	struct fsl_mc_command cmd = { 0 };
+	int err;
+
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_GET_ATTR,
+					  cmd_flags, token);
+
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	rsp_params = (struct dpsw_rsp_ctrl_if_get_attr *)cmd.params;
+	attr->rx_fqid = le32_to_cpu(rsp_params->rx_fqid);
+	attr->rx_err_fqid = le32_to_cpu(rsp_params->rx_err_fqid);
+	attr->tx_err_conf_fqid = le32_to_cpu(rsp_params->tx_err_conf_fqid);
+
+	return 0;
+}
+
+/**
+ * dpsw_ctrl_if_set_pools() - Set control interface buffer pools
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @cfg:	Buffer pools configuration
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_ctrl_if_set_pools(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			   const struct dpsw_ctrl_if_pools_cfg *cfg)
+{
+	struct dpsw_cmd_ctrl_if_set_pools *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+	int i;
+
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_SET_POOLS,
+					  cmd_flags, token);
+	cmd_params = (struct dpsw_cmd_ctrl_if_set_pools *)cmd.params;
+	cmd_params->num_dpbp = cfg->num_dpbp;
+	for (i = 0; i < DPSW_MAX_DPBP; i++) {
+		cmd_params->dpbp_id[i] = cpu_to_le32(cfg->pools[i].dpbp_id);
+		cmd_params->buffer_size[i] =
+			cpu_to_le16(cfg->pools[i].buffer_size);
+		cmd_params->backup_pool_mask |=
+			DPSW_BACKUP_POOL(cfg->pools[i].backup_pool, i);
+	}
+
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_ctrl_if_set_queue() - Set Rx queue configuration
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of dpsw object
+ * @qtype:	dpsw_queue_type of the targeted queue
+ * @cfg:	Rx queue configuration
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_ctrl_if_set_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			   enum dpsw_queue_type qtype,
+			   const struct dpsw_ctrl_if_queue_cfg *cfg)
+{
+	struct dpsw_cmd_ctrl_if_set_queue *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_SET_QUEUE,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_ctrl_if_set_queue *)cmd.params;
+	cmd_params->dest_id = cpu_to_le32(cfg->dest_cfg.dest_id);
+	cmd_params->dest_priority = cfg->dest_cfg.priority;
+	cmd_params->qtype = qtype;
+	cmd_params->user_ctx = cpu_to_le64(cfg->user_ctx);
+	cmd_params->options = cpu_to_le32(cfg->options);
+	dpsw_set_field(cmd_params->dest_type,
+		       DEST_TYPE,
+		       cfg->dest_cfg.dest_type);
+
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_get_api_version() - Get Data Path Switch API version
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @major_ver:	Major version of data path switch API
+ * @minor_ver:	Minor version of data path switch API
+ *
+ * Return:  '0' on Success; Error code otherwise.
+ */
+int dpsw_get_api_version(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 *major_ver,
+			 u16 *minor_ver)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_rsp_get_api_version *rsp_params;
+	int err;
+
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_GET_API_VERSION,
+					  cmd_flags,
+					  0);
+
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	rsp_params = (struct dpsw_rsp_get_api_version *)cmd.params;
+	*major_ver = le16_to_cpu(rsp_params->version_major);
+	*minor_ver = le16_to_cpu(rsp_params->version_minor);
+
+	return 0;
+}
+
+/**
+ * dpsw_if_get_port_mac_addr()
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @if_id:	Interface Identifier
+ * @mac_addr:	MAC address of the physical port, if any, otherwise 0
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_if_get_port_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			      u16 if_id, u8 mac_addr[6])
+{
+	struct dpsw_rsp_if_get_mac_addr *rsp_params;
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_if *cmd_params;
+	int err, i;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_GET_PORT_MAC_ADDR,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_if *)cmd.params;
+	cmd_params->if_id = cpu_to_le16(if_id);
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpsw_rsp_if_get_mac_addr *)cmd.params;
+	for (i = 0; i < 6; i++)
+		mac_addr[5 - i] = rsp_params->mac_addr[i];
+
+	return 0;
+}
+
+/**
+ * dpsw_if_get_primary_mac_addr()
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @if_id:	Interface Identifier
+ * @mac_addr:	MAC address of the physical port, if any, otherwise 0
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_if_get_primary_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags,
+				 u16 token, u16 if_id, u8 mac_addr[6])
+{
+	struct dpsw_rsp_if_get_mac_addr *rsp_params;
+	struct fsl_mc_command cmd = { 0 };
+	struct dpsw_cmd_if *cmd_params;
+	int err, i;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_PRIMARY_MAC_ADDR,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_if *)cmd.params;
+	cmd_params->if_id = cpu_to_le16(if_id);
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	rsp_params = (struct dpsw_rsp_if_get_mac_addr *)cmd.params;
+	for (i = 0; i < 6; i++)
+		mac_addr[5 - i] = rsp_params->mac_addr[i];
+
+	return 0;
+}
+
+/**
+ * dpsw_if_set_primary_mac_addr()
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @if_id:	Interface Identifier
+ * @mac_addr:	MAC address of the physical port, if any, otherwise 0
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_if_set_primary_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags,
+				 u16 token, u16 if_id, u8 mac_addr[6])
+{
+	struct dpsw_cmd_if_set_mac_addr *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+	int i;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_PRIMARY_MAC_ADDR,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_if_set_mac_addr *)cmd.params;
+	cmd_params->if_id = cpu_to_le16(if_id);
+	for (i = 0; i < 6; i++)
+		cmd_params->mac_addr[i] = mac_addr[5 - i];
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_ctrl_if_enable() - Enable control interface
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_ctrl_if_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_ENABLE, cmd_flags,
+					  token);
+
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_ctrl_if_disable() - Function disables control interface
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_ctrl_if_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_DISABLE,
+					  cmd_flags,
+					  token);
+
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_set_egress_flood() - Set egress parameters associated with an FDB ID
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @cfg:	Egress flooding configuration
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_set_egress_flood(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			  const struct dpsw_egress_flood_cfg *cfg)
+{
+	struct dpsw_cmd_set_egress_flood *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_SET_EGRESS_FLOOD, cmd_flags, token);
+	cmd_params = (struct dpsw_cmd_set_egress_flood *)cmd.params;
+	cmd_params->fdb_id = cpu_to_le16(cfg->fdb_id);
+	cmd_params->flood_type = cfg->flood_type;
+	build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs);
+
+	return mc_send_command(mc_io, &cmd);
+}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.h b/drivers/net/ethernet/freescale/dpaa2/dpsw.h
new file mode 100644
index 000000000000..9e04350f3277
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.h
@@ -0,0 +1,751 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2014-2016 Freescale Semiconductor Inc.
+ * Copyright 2017-2021 NXP
+ *
+ */
+
+#ifndef __FSL_DPSW_H
+#define __FSL_DPSW_H
+
+/* Data Path L2-Switch API
+ * Contains API for handling DPSW topology and functionality
+ */
+
+struct fsl_mc_io;
+
+/**
+ * DPSW general definitions
+ */
+
+/**
+ * Maximum number of traffic class priorities
+ */
+#define DPSW_MAX_PRIORITIES	8
+/**
+ * Maximum number of interfaces
+ */
+#define DPSW_MAX_IF		64
+
+int dpsw_open(struct fsl_mc_io *mc_io,
+	      u32 cmd_flags,
+	      int dpsw_id,
+	      u16 *token);
+
+int dpsw_close(struct fsl_mc_io *mc_io,
+	       u32 cmd_flags,
+	       u16 token);
+
+/**
+ * DPSW options
+ */
+
+/**
+ * Disable flooding
+ */
+#define DPSW_OPT_FLOODING_DIS		0x0000000000000001ULL
+/**
+ * Disable Multicast
+ */
+#define DPSW_OPT_MULTICAST_DIS		0x0000000000000004ULL
+/**
+ * Support control interface
+ */
+#define DPSW_OPT_CTRL_IF_DIS		0x0000000000000010ULL
+/**
+ * Disable flooding metering
+ */
+#define DPSW_OPT_FLOODING_METERING_DIS  0x0000000000000020ULL
+/**
+ * Enable metering
+ */
+#define DPSW_OPT_METERING_EN            0x0000000000000040ULL
+
+/**
+ * enum dpsw_component_type - component type of a bridge
+ * @DPSW_COMPONENT_TYPE_C_VLAN: A C-VLAN component of an
+ *   enterprise VLAN bridge or of a Provider Bridge used
+ *   to process C-tagged frames
+ * @DPSW_COMPONENT_TYPE_S_VLAN: An S-VLAN component of a
+ *   Provider Bridge
+ *
+ */
+enum dpsw_component_type {
+	DPSW_COMPONENT_TYPE_C_VLAN = 0,
+	DPSW_COMPONENT_TYPE_S_VLAN
+};
+
+/**
+ *  enum dpsw_flooding_cfg - flooding configuration requested
+ * @DPSW_FLOODING_PER_VLAN: Flooding replicators are allocated per VLAN and
+ * interfaces present in each of them can be configured using
+ * dpsw_vlan_add_if_flooding()/dpsw_vlan_remove_if_flooding().
+ * This is the default configuration.
+ *
+ * @DPSW_FLOODING_PER_FDB: Flooding replicators are allocated per FDB and
+ * interfaces present in each of them can be configured using
+ * dpsw_set_egress_flood().
+ */
+enum dpsw_flooding_cfg {
+	DPSW_FLOODING_PER_VLAN = 0,
+	DPSW_FLOODING_PER_FDB,
+};
+
+/**
+ * enum dpsw_broadcast_cfg - broadcast configuration requested
+ * @DPSW_BROADCAST_PER_OBJECT: There is only one broadcast replicator per DPSW
+ * object. This is the default configuration.
+ * @DPSW_BROADCAST_PER_FDB: Broadcast replicators are allocated per FDB and
+ * interfaces present in each of them can be configured using
+ * dpsw_set_egress_flood().
+ */
+enum dpsw_broadcast_cfg {
+	DPSW_BROADCAST_PER_OBJECT = 0,
+	DPSW_BROADCAST_PER_FDB,
+};
+
+int dpsw_enable(struct fsl_mc_io *mc_io,
+		u32 cmd_flags,
+		u16 token);
+
+int dpsw_disable(struct fsl_mc_io *mc_io,
+		 u32 cmd_flags,
+		 u16 token);
+
+int dpsw_reset(struct fsl_mc_io *mc_io,
+	       u32 cmd_flags,
+	       u16 token);
+
+/**
+ * DPSW IRQ Index and Events
+ */
+
+#define DPSW_IRQ_INDEX_IF		0x0000
+#define DPSW_IRQ_INDEX_L2SW		0x0001
+
+/**
+ * IRQ event - Indicates that the link state changed
+ */
+#define DPSW_IRQ_EVENT_LINK_CHANGED	0x0001
+
+/**
+ * struct dpsw_irq_cfg - IRQ configuration
+ * @addr:	Address that must be written to signal a message-based interrupt
+ * @val:	Value to write into irq_addr address
+ * @irq_num: A user defined number associated with this IRQ
+ */
+struct dpsw_irq_cfg {
+	u64 addr;
+	u32 val;
+	int irq_num;
+};
+
+int dpsw_set_irq_enable(struct fsl_mc_io *mc_io,
+			u32 cmd_flags,
+			u16 token,
+			u8 irq_index,
+			u8 en);
+
+int dpsw_set_irq_mask(struct fsl_mc_io *mc_io,
+		      u32 cmd_flags,
+		      u16 token,
+		      u8 irq_index,
+		      u32 mask);
+
+int dpsw_get_irq_status(struct fsl_mc_io *mc_io,
+			u32 cmd_flags,
+			u16 token,
+			u8 irq_index,
+			u32 *status);
+
+int dpsw_clear_irq_status(struct fsl_mc_io *mc_io,
+			  u32 cmd_flags,
+			  u16 token,
+			  u8 irq_index,
+			  u32 status);
+
+/**
+ * struct dpsw_attr - Structure representing DPSW attributes
+ * @id: DPSW object ID
+ * @options: Enable/Disable DPSW features
+ * @max_vlans: Maximum Number of VLANs
+ * @max_meters_per_if:  Number of meters per interface
+ * @max_fdbs: Maximum Number of FDBs
+ * @max_fdb_entries: Number of FDB entries for default FDB table;
+ *			0 - indicates default 1024 entries.
+ * @fdb_aging_time: Default FDB aging time for default FDB table;
+ *			0 - indicates default 300 seconds
+ * @max_fdb_mc_groups: Number of multicast groups in each FDB table;
+ *			0 - indicates default 32
+ * @mem_size: DPSW frame storage memory size
+ * @num_ifs: Number of interfaces
+ * @num_vlans: Current number of VLANs
+ * @num_fdbs: Current number of FDBs
+ * @component_type: Component type of this bridge
+ * @flooding_cfg: Flooding configuration (PER_VLAN - default, PER_FDB)
+ * @broadcast_cfg: Broadcast configuration (PER_OBJECT - default, PER_FDB)
+ */
+struct dpsw_attr {
+	int id;
+	u64 options;
+	u16 max_vlans;
+	u8 max_meters_per_if;
+	u8 max_fdbs;
+	u16 max_fdb_entries;
+	u16 fdb_aging_time;
+	u16 max_fdb_mc_groups;
+	u16 num_ifs;
+	u16 mem_size;
+	u16 num_vlans;
+	u8 num_fdbs;
+	enum dpsw_component_type component_type;
+	enum dpsw_flooding_cfg flooding_cfg;
+	enum dpsw_broadcast_cfg broadcast_cfg;
+};
+
+int dpsw_get_attributes(struct fsl_mc_io *mc_io,
+			u32 cmd_flags,
+			u16 token,
+			struct dpsw_attr *attr);
+
+/**
+ * struct dpsw_ctrl_if_attr - Control interface attributes
+ * @rx_fqid:		Receive FQID
+ * @rx_err_fqid:	Receive error FQID
+ * @tx_err_conf_fqid:	Transmit error and confirmation FQID
+ */
+struct dpsw_ctrl_if_attr {
+	u32 rx_fqid;
+	u32 rx_err_fqid;
+	u32 tx_err_conf_fqid;
+};
+
+int dpsw_ctrl_if_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags,
+				u16 token, struct dpsw_ctrl_if_attr *attr);
+
+enum dpsw_queue_type {
+	DPSW_QUEUE_RX,
+	DPSW_QUEUE_TX_ERR_CONF,
+	DPSW_QUEUE_RX_ERR,
+};
+
+/**
+ * Maximum number of DPBP
+ */
+#define DPSW_MAX_DPBP     8
+
+/**
+ * struct dpsw_ctrl_if_pools_cfg - Control interface buffer pools configuration
+ * @num_dpbp: Number of DPBPs
+ * @pools: Array of buffer pools parameters; The number of valid entries
+ *	must match 'num_dpbp' value
+ * @pools.dpbp_id: DPBP object ID
+ * @pools.buffer_size: Buffer size
+ * @pools.backup_pool: Backup pool
+ */
+struct dpsw_ctrl_if_pools_cfg {
+	u8 num_dpbp;
+	struct {
+		int dpbp_id;
+		u16 buffer_size;
+		int backup_pool;
+	} pools[DPSW_MAX_DPBP];
+};
+
+int dpsw_ctrl_if_set_pools(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			   const struct dpsw_ctrl_if_pools_cfg *cfg);
+
+#define DPSW_CTRL_IF_QUEUE_OPT_USER_CTX		0x00000001
+#define DPSW_CTRL_IF_QUEUE_OPT_DEST		0x00000002
+
+enum dpsw_ctrl_if_dest {
+	DPSW_CTRL_IF_DEST_NONE = 0,
+	DPSW_CTRL_IF_DEST_DPIO = 1,
+};
+
+struct dpsw_ctrl_if_dest_cfg {
+	enum dpsw_ctrl_if_dest dest_type;
+	int dest_id;
+	u8 priority;
+};
+
+struct dpsw_ctrl_if_queue_cfg {
+	u32 options;
+	u64 user_ctx;
+	struct dpsw_ctrl_if_dest_cfg dest_cfg;
+};
+
+int dpsw_ctrl_if_set_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			   enum dpsw_queue_type qtype,
+			   const struct dpsw_ctrl_if_queue_cfg *cfg);
+
+int dpsw_ctrl_if_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
+
+int dpsw_ctrl_if_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
+
+/**
+ * enum dpsw_action - Action selection for special/control frames
+ * @DPSW_ACTION_DROP: Drop frame
+ * @DPSW_ACTION_REDIRECT: Redirect frame to control port
+ */
+enum dpsw_action {
+	DPSW_ACTION_DROP = 0,
+	DPSW_ACTION_REDIRECT = 1
+};
+
+/**
+ * Enable auto-negotiation
+ */
+#define DPSW_LINK_OPT_AUTONEG		0x0000000000000001ULL
+/**
+ * Enable half-duplex mode
+ */
+#define DPSW_LINK_OPT_HALF_DUPLEX	0x0000000000000002ULL
+/**
+ * Enable pause frames
+ */
+#define DPSW_LINK_OPT_PAUSE		0x0000000000000004ULL
+/**
+ * Enable a-symmetric pause frames
+ */
+#define DPSW_LINK_OPT_ASYM_PAUSE	0x0000000000000008ULL
+
+/**
+ * struct dpsw_link_cfg - Structure representing DPSW link configuration
+ * @rate: Rate
+ * @options: Mask of available options; use 'DPSW_LINK_OPT_<X>' values
+ */
+struct dpsw_link_cfg {
+	u32 rate;
+	u64 options;
+};
+
+int dpsw_if_set_link_cfg(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token,
+			 u16 if_id,
+			 struct dpsw_link_cfg *cfg);
+/**
+ * struct dpsw_link_state - Structure representing DPSW link state
+ * @rate: Rate
+ * @options: Mask of available options; use 'DPSW_LINK_OPT_<X>' values
+ * @up: 0 - covers two cases: down and disconnected, 1 - up
+ */
+struct dpsw_link_state {
+	u32 rate;
+	u64 options;
+	u8 up;
+};
+
+int dpsw_if_get_link_state(struct fsl_mc_io *mc_io,
+			   u32 cmd_flags,
+			   u16 token,
+			   u16 if_id,
+			   struct dpsw_link_state *state);
+
+/**
+ * struct dpsw_tci_cfg - Tag Control Information (TCI) configuration
+ * @pcp: Priority Code Point (PCP): a 3-bit field which refers
+ *		 to the IEEE 802.1p priority
+ * @dei: Drop Eligible Indicator (DEI): a 1-bit field. May be used
+ *		 separately or in conjunction with PCP to indicate frames
+ *		 eligible to be dropped in the presence of congestion
+ * @vlan_id: VLAN Identifier (VID): a 12-bit field specifying the VLAN
+ *			to which the frame belongs. The hexadecimal values
+ *			of 0x000 and 0xFFF are reserved;
+ *			all other values may be used as VLAN identifiers,
+ *			allowing up to 4,094 VLANs
+ */
+struct dpsw_tci_cfg {
+	u8 pcp;
+	u8 dei;
+	u16 vlan_id;
+};
+
+int dpsw_if_set_tci(struct fsl_mc_io *mc_io,
+		    u32 cmd_flags,
+		    u16 token,
+		    u16 if_id,
+		    const struct dpsw_tci_cfg *cfg);
+
+int dpsw_if_get_tci(struct fsl_mc_io *mc_io,
+		    u32 cmd_flags,
+		    u16 token,
+		    u16 if_id,
+		    struct dpsw_tci_cfg *cfg);
+
+/**
+ * enum dpsw_stp_state - Spanning Tree Protocol (STP) states
+ * @DPSW_STP_STATE_BLOCKING: Blocking state
+ * @DPSW_STP_STATE_LISTENING: Listening state
+ * @DPSW_STP_STATE_LEARNING: Learning state
+ * @DPSW_STP_STATE_FORWARDING: Forwarding state
+ *
+ */
+enum dpsw_stp_state {
+	DPSW_STP_STATE_DISABLED = 0,
+	DPSW_STP_STATE_LISTENING = 1,
+	DPSW_STP_STATE_LEARNING = 2,
+	DPSW_STP_STATE_FORWARDING = 3,
+	DPSW_STP_STATE_BLOCKING = 0
+};
+
+/**
+ * struct dpsw_stp_cfg - Spanning Tree Protocol (STP) Configuration
+ * @vlan_id: VLAN ID STP state
+ * @state: STP state
+ */
+struct dpsw_stp_cfg {
+	u16 vlan_id;
+	enum dpsw_stp_state state;
+};
+
+int dpsw_if_set_stp(struct fsl_mc_io *mc_io,
+		    u32 cmd_flags,
+		    u16 token,
+		    u16 if_id,
+		    const struct dpsw_stp_cfg *cfg);
+
+/**
+ * enum dpsw_accepted_frames - Types of frames to accept
+ * @DPSW_ADMIT_ALL: The device accepts VLAN tagged, untagged and
+ *			priority tagged frames
+ * @DPSW_ADMIT_ONLY_VLAN_TAGGED: The device discards untagged frames or
+ *			Priority-Tagged frames received on this interface.
+ *
+ */
+enum dpsw_accepted_frames {
+	DPSW_ADMIT_ALL = 1,
+	DPSW_ADMIT_ONLY_VLAN_TAGGED = 3
+};
+
+/**
+ * enum dpsw_counter  - Counters types
+ * @DPSW_CNT_ING_FRAME: Counts ingress frames
+ * @DPSW_CNT_ING_BYTE: Counts ingress bytes
+ * @DPSW_CNT_ING_FLTR_FRAME: Counts filtered ingress frames
+ * @DPSW_CNT_ING_FRAME_DISCARD: Counts discarded ingress frame
+ * @DPSW_CNT_ING_MCAST_FRAME: Counts ingress multicast frames
+ * @DPSW_CNT_ING_MCAST_BYTE: Counts ingress multicast bytes
+ * @DPSW_CNT_ING_BCAST_FRAME: Counts ingress broadcast frames
+ * @DPSW_CNT_ING_BCAST_BYTES: Counts ingress broadcast bytes
+ * @DPSW_CNT_EGR_FRAME: Counts egress frames
+ * @DPSW_CNT_EGR_BYTE: Counts egress bytes
+ * @DPSW_CNT_EGR_FRAME_DISCARD: Counts discarded egress frames
+ * @DPSW_CNT_EGR_STP_FRAME_DISCARD: Counts egress STP discarded frames
+ * @DPSW_CNT_ING_NO_BUFF_DISCARD: Counts ingress no buffer discarded frames
+ */
+enum dpsw_counter {
+	DPSW_CNT_ING_FRAME = 0x0,
+	DPSW_CNT_ING_BYTE = 0x1,
+	DPSW_CNT_ING_FLTR_FRAME = 0x2,
+	DPSW_CNT_ING_FRAME_DISCARD = 0x3,
+	DPSW_CNT_ING_MCAST_FRAME = 0x4,
+	DPSW_CNT_ING_MCAST_BYTE = 0x5,
+	DPSW_CNT_ING_BCAST_FRAME = 0x6,
+	DPSW_CNT_ING_BCAST_BYTES = 0x7,
+	DPSW_CNT_EGR_FRAME = 0x8,
+	DPSW_CNT_EGR_BYTE = 0x9,
+	DPSW_CNT_EGR_FRAME_DISCARD = 0xa,
+	DPSW_CNT_EGR_STP_FRAME_DISCARD = 0xb,
+	DPSW_CNT_ING_NO_BUFF_DISCARD = 0xc,
+};
+
+int dpsw_if_get_counter(struct fsl_mc_io *mc_io,
+			u32 cmd_flags,
+			u16 token,
+			u16 if_id,
+			enum dpsw_counter type,
+			u64 *counter);
+
+int dpsw_if_enable(struct fsl_mc_io *mc_io,
+		   u32 cmd_flags,
+		   u16 token,
+		   u16 if_id);
+
+int dpsw_if_disable(struct fsl_mc_io *mc_io,
+		    u32 cmd_flags,
+		    u16 token,
+		    u16 if_id);
+
+/**
+ * struct dpsw_if_attr - Structure representing DPSW interface attributes
+ * @num_tcs: Number of traffic classes
+ * @rate: Transmit rate in bits per second
+ * @options: Interface configuration options (bitmap)
+ * @enabled: Indicates if interface is enabled
+ * @accept_all_vlan: The device discards/accepts incoming frames
+ *		for VLANs that do not include this interface
+ * @admit_untagged: When set to 'DPSW_ADMIT_ONLY_VLAN_TAGGED', the device
+ *		discards untagged frames or priority-tagged frames received on
+ *		this interface;
+ *		When set to 'DPSW_ADMIT_ALL', untagged frames or priority-
+ *		tagged frames received on this interface are accepted
+ * @qdid: control frames transmit qdid
+ */
+struct dpsw_if_attr {
+	u8 num_tcs;
+	u32 rate;
+	u32 options;
+	int enabled;
+	int accept_all_vlan;
+	enum dpsw_accepted_frames admit_untagged;
+	u16 qdid;
+};
+
+int dpsw_if_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			   u16 if_id, struct dpsw_if_attr *attr);
+
+int dpsw_if_set_max_frame_length(struct fsl_mc_io *mc_io,
+				 u32 cmd_flags,
+				 u16 token,
+				 u16 if_id,
+				 u16 frame_length);
+
+/**
+ * struct dpsw_vlan_cfg - VLAN Configuration
+ * @fdb_id: Forwarding Data Base
+ */
+struct dpsw_vlan_cfg {
+	u16 fdb_id;
+};
+
+int dpsw_vlan_add(struct fsl_mc_io *mc_io,
+		  u32 cmd_flags,
+		  u16 token,
+		  u16 vlan_id,
+		  const struct dpsw_vlan_cfg *cfg);
+
+#define DPSW_VLAN_ADD_IF_OPT_FDB_ID            0x0001
+
+/**
+ * struct dpsw_vlan_if_cfg - Set of VLAN Interfaces
+ * @num_ifs: The number of interfaces that are assigned to the egress
+ *		list for this VLAN
+ * @if_id: The set of interfaces that are
+ *		assigned to the egress list for this VLAN
+ */
+struct dpsw_vlan_if_cfg {
+	u16 num_ifs;
+	u16 options;
+	u16 if_id[DPSW_MAX_IF];
+	u16 fdb_id;
+};
+
+int dpsw_vlan_add_if(struct fsl_mc_io *mc_io,
+		     u32 cmd_flags,
+		     u16 token,
+		     u16 vlan_id,
+		     const struct dpsw_vlan_if_cfg *cfg);
+
+int dpsw_vlan_add_if_untagged(struct fsl_mc_io *mc_io,
+			      u32 cmd_flags,
+			      u16 token,
+			      u16 vlan_id,
+			      const struct dpsw_vlan_if_cfg *cfg);
+
+int dpsw_vlan_remove_if(struct fsl_mc_io *mc_io,
+			u32 cmd_flags,
+			u16 token,
+			u16 vlan_id,
+			const struct dpsw_vlan_if_cfg *cfg);
+
+int dpsw_vlan_remove_if_untagged(struct fsl_mc_io *mc_io,
+				 u32 cmd_flags,
+				 u16 token,
+				 u16 vlan_id,
+				 const struct dpsw_vlan_if_cfg *cfg);
+
+int dpsw_vlan_remove(struct fsl_mc_io *mc_io,
+		     u32 cmd_flags,
+		     u16 token,
+		     u16 vlan_id);
+
+/**
+ * enum dpsw_fdb_entry_type - FDB Entry type - Static/Dynamic
+ * @DPSW_FDB_ENTRY_STATIC: Static entry
+ * @DPSW_FDB_ENTRY_DINAMIC: Dynamic entry
+ */
+enum dpsw_fdb_entry_type {
+	DPSW_FDB_ENTRY_STATIC = 0,
+	DPSW_FDB_ENTRY_DINAMIC = 1
+};
+
+/**
+ * struct dpsw_fdb_unicast_cfg - Unicast entry configuration
+ * @type: Select static or dynamic entry
+ * @mac_addr: MAC address
+ * @if_egress: Egress interface ID
+ */
+struct dpsw_fdb_unicast_cfg {
+	enum dpsw_fdb_entry_type type;
+	u8 mac_addr[6];
+	u16 if_egress;
+};
+
+int dpsw_fdb_add_unicast(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token,
+			 u16 fdb_id,
+			 const struct dpsw_fdb_unicast_cfg *cfg);
+
+int dpsw_fdb_remove_unicast(struct fsl_mc_io *mc_io,
+			    u32 cmd_flags,
+			    u16 token,
+			    u16 fdb_id,
+			    const struct dpsw_fdb_unicast_cfg *cfg);
+
+#define DPSW_FDB_ENTRY_TYPE_DYNAMIC  BIT(0)
+#define DPSW_FDB_ENTRY_TYPE_UNICAST  BIT(1)
+
+/**
+ * struct fdb_dump_entry - fdb snapshot entry
+ * @mac_addr: MAC address
+ * @type: bit0 - DINAMIC(1)/STATIC(0), bit1 - UNICAST(1)/MULTICAST(0)
+ * @if_info: unicast - egress interface, multicast - number of egress interfaces
+ * @if_mask: multicast - egress interface mask
+ */
+struct fdb_dump_entry {
+	u8 mac_addr[6];
+	u8 type;
+	u8 if_info;
+	u8 if_mask[8];
+};
+
+int dpsw_fdb_dump(struct fsl_mc_io *mc_io,
+		  u32 cmd_flags,
+		  u16 token,
+		  u16 fdb_id,
+		  u64 iova_addr,
+		  u32 iova_size,
+		  u16 *num_entries);
+
+/**
+ * struct dpsw_fdb_multicast_cfg - Multi-cast entry configuration
+ * @type: Select static or dynamic entry
+ * @mac_addr: MAC address
+ * @num_ifs: Number of external and internal interfaces
+ * @if_id: Egress interface IDs
+ */
+struct dpsw_fdb_multicast_cfg {
+	enum dpsw_fdb_entry_type type;
+	u8 mac_addr[6];
+	u16 num_ifs;
+	u16 if_id[DPSW_MAX_IF];
+};
+
+int dpsw_fdb_add_multicast(struct fsl_mc_io *mc_io,
+			   u32 cmd_flags,
+			   u16 token,
+			   u16 fdb_id,
+			   const struct dpsw_fdb_multicast_cfg *cfg);
+
+int dpsw_fdb_remove_multicast(struct fsl_mc_io *mc_io,
+			      u32 cmd_flags,
+			      u16 token,
+			      u16 fdb_id,
+			      const struct dpsw_fdb_multicast_cfg *cfg);
+
+/**
+ * enum dpsw_fdb_learning_mode - Auto-learning modes
+ * @DPSW_FDB_LEARNING_MODE_DIS: Disable Auto-learning
+ * @DPSW_FDB_LEARNING_MODE_HW: Enable HW auto-Learning
+ * @DPSW_FDB_LEARNING_MODE_NON_SECURE: Enable None secure learning by CPU
+ * @DPSW_FDB_LEARNING_MODE_SECURE: Enable secure learning by CPU
+ *
+ *	NONE - SECURE LEARNING
+ *	SMAC found	DMAC found	CTLU Action
+ *	v		v	Forward frame to
+ *						1.  DMAC destination
+ *	-		v	Forward frame to
+ *						1.  DMAC destination
+ *						2.  Control interface
+ *	v		-	Forward frame to
+ *						1.  Flooding list of interfaces
+ *	-		-	Forward frame to
+ *						1.  Flooding list of interfaces
+ *						2.  Control interface
+ *	SECURE LEARING
+ *	SMAC found	DMAC found	CTLU Action
+ *	v		v		Forward frame to
+ *						1.  DMAC destination
+ *	-		v		Forward frame to
+ *						1.  Control interface
+ *	v		-		Forward frame to
+ *						1.  Flooding list of interfaces
+ *	-		-		Forward frame to
+ *						1.  Control interface
+ */
+enum dpsw_fdb_learning_mode {
+	DPSW_FDB_LEARNING_MODE_DIS = 0,
+	DPSW_FDB_LEARNING_MODE_HW = 1,
+	DPSW_FDB_LEARNING_MODE_NON_SECURE = 2,
+	DPSW_FDB_LEARNING_MODE_SECURE = 3
+};
+
+/**
+ * struct dpsw_fdb_attr - FDB Attributes
+ * @max_fdb_entries: Number of FDB entries
+ * @fdb_ageing_time: Ageing time in seconds
+ * @learning_mode: Learning mode
+ * @num_fdb_mc_groups: Current number of multicast groups
+ * @max_fdb_mc_groups: Maximum number of multicast groups
+ */
+struct dpsw_fdb_attr {
+	u16 max_fdb_entries;
+	u16 fdb_ageing_time;
+	enum dpsw_fdb_learning_mode learning_mode;
+	u16 num_fdb_mc_groups;
+	u16 max_fdb_mc_groups;
+};
+
+int dpsw_get_api_version(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 *major_ver,
+			 u16 *minor_ver);
+
+int dpsw_if_get_port_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			      u16 if_id, u8 mac_addr[6]);
+
+int dpsw_if_get_primary_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags,
+				 u16 token, u16 if_id, u8 mac_addr[6]);
+
+int dpsw_if_set_primary_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags,
+				 u16 token, u16 if_id, u8 mac_addr[6]);
+
+/**
+ * struct dpsw_fdb_cfg  - FDB Configuration
+ * @num_fdb_entries: Number of FDB entries
+ * @fdb_ageing_time: Ageing time in seconds
+ */
+struct dpsw_fdb_cfg {
+	u16 num_fdb_entries;
+	u16 fdb_ageing_time;
+};
+
+int dpsw_fdb_add(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 *fdb_id,
+		 const struct dpsw_fdb_cfg *cfg);
+
+int dpsw_fdb_remove(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 fdb_id);
+
+/**
+ * enum dpsw_flood_type - Define the flood type of a DPSW object
+ * @DPSW_BROADCAST: Broadcast flooding
+ * @DPSW_FLOODING: Unknown flooding
+ */
+enum dpsw_flood_type {
+	DPSW_BROADCAST = 0,
+	DPSW_FLOODING,
+};
+
+struct dpsw_egress_flood_cfg {
+	u16 fdb_id;
+	enum dpsw_flood_type flood_type;
+	u16 num_ifs;
+	u16 if_id[DPSW_MAX_IF];
+};
+
+int dpsw_set_egress_flood(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			  const struct dpsw_egress_flood_cfg *cfg);
+
+#endif /* __FSL_DPSW_H */
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index b22f73d7bfc4..6e798229fe25 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -78,8 +78,6 @@ source "drivers/staging/clocking-wizard/Kconfig"
 
 source "drivers/staging/fbtft/Kconfig"
 
-source "drivers/staging/fsl-dpaa2/Kconfig"
-
 source "drivers/staging/most/Kconfig"
 
 source "drivers/staging/ks7010/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index 2245059e69c7..8d4d9812ecdf 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -29,7 +29,6 @@ obj-$(CONFIG_GS_FPGABOOT)	+= gs_fpgaboot/
 obj-$(CONFIG_UNISYSSPAR)	+= unisys/
 obj-$(CONFIG_COMMON_CLK_XLNX_CLKWZRD)	+= clocking-wizard/
 obj-$(CONFIG_FB_TFT)		+= fbtft/
-obj-$(CONFIG_FSL_DPAA2)		+= fsl-dpaa2/
 obj-$(CONFIG_MOST)		+= most/
 obj-$(CONFIG_KS7010)		+= ks7010/
 obj-$(CONFIG_GREYBUS)		+= greybus/
diff --git a/drivers/staging/fsl-dpaa2/Kconfig b/drivers/staging/fsl-dpaa2/Kconfig
deleted file mode 100644
index 7cb005b6e7ab..000000000000
--- a/drivers/staging/fsl-dpaa2/Kconfig
+++ /dev/null
@@ -1,20 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Freescale DataPath Acceleration Architecture Gen2 (DPAA2) drivers
-#
-
-config FSL_DPAA2
-	bool "Freescale DPAA2 devices"
-	depends on FSL_MC_BUS
-	help
-	  Build drivers for Freescale DataPath Acceleration
-	  Architecture (DPAA2) family of SoCs.
-
-config FSL_DPAA2_ETHSW
-	tristate "Freescale DPAA2 Ethernet Switch"
-	depends on BRIDGE || BRIDGE=n
-	depends on FSL_DPAA2
-	depends on NET_SWITCHDEV
-	help
-	  Driver for Freescale DPAA2 Ethernet Switch. Select
-	  BRIDGE to have support for bridge tools.
diff --git a/drivers/staging/fsl-dpaa2/Makefile b/drivers/staging/fsl-dpaa2/Makefile
deleted file mode 100644
index 9645db7689c9..000000000000
--- a/drivers/staging/fsl-dpaa2/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Freescale DataPath Acceleration Architecture Gen2 (DPAA2) drivers
-#
-
-obj-$(CONFIG_FSL_DPAA2_ETHSW)		+= ethsw/
diff --git a/drivers/staging/fsl-dpaa2/ethsw/Makefile b/drivers/staging/fsl-dpaa2/ethsw/Makefile
deleted file mode 100644
index f6f2cf798faf..000000000000
--- a/drivers/staging/fsl-dpaa2/ethsw/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile for the Freescale DPAA2 Ethernet Switch
-#
-# Copyright 2014-2017 Freescale Semiconductor Inc.
-# Copyright 2017-2018 NXP
-
-obj-$(CONFIG_FSL_DPAA2_ETHSW) += dpaa2-ethsw.o
-
-dpaa2-ethsw-objs := ethsw.o ethsw-ethtool.o dpsw.o
diff --git a/drivers/staging/fsl-dpaa2/ethsw/README b/drivers/staging/fsl-dpaa2/ethsw/README
deleted file mode 100644
index b48dcbf7c5fb..000000000000
--- a/drivers/staging/fsl-dpaa2/ethsw/README
+++ /dev/null
@@ -1,106 +0,0 @@
-DPAA2 Ethernet Switch driver
-============================
-
-This file provides documentation for the DPAA2 Ethernet Switch driver
-
-
-Contents
-========
-	Supported Platforms
-	Architecture Overview
-	Creating an Ethernet Switch
-	Features
-
-
-	Supported Platforms
-===================
-This driver provides networking support for Freescale LS2085A, LS2088A
-DPAA2 SoCs.
-
-
-Architecture Overview
-=====================
-The Ethernet Switch in the DPAA2 architecture consists of several hardware
-resources that provide the functionality. These are allocated and
-configured via the Management Complex (MC) portals. MC abstracts most of
-these resources as DPAA2 objects and exposes ABIs through which they can
-be configured and controlled.
-
-For a more detailed description of the DPAA2 architecture and its object
-abstractions see:
-	drivers/staging/fsl-mc/README.txt
-
-The Ethernet Switch is built on top of a Datapath Switch (DPSW) object.
-
-Configuration interface:
-
-          ---------------------
-         | DPAA2 Switch driver |
-          ---------------------
-                   .
-                   .
-              ----------
-             | DPSW API |
-              ----------
-                   .           software
- ================= . ==============
-                   .           hardware
-          ---------------------
-         | MC hardware portals |
-          ---------------------
-                   .
-                   .
-                 ------
-                | DPSW |
-                 ------
-
-Driver uses the switch device driver model and exposes each switch port as
-a network interface, which can be included in a bridge. Traffic switched
-between ports is offloaded into the hardware. Exposed network interfaces
-are not used for I/O, they are used just for configuration. This
-limitation is going to be addressed in the future.
-
-The DPSW can have ports connected to DPNIs or to PHYs via DPMACs.
-
-
- [ethA]     [ethB]     [ethC]     [ethD]     [ethE]     [ethF]
-    :          :          :          :          :          :
-    :          :          :          :          :          :
-[eth drv]  [eth drv]  [                ethsw drv              ]
-    :          :          :          :          :          :        kernel
-========================================================================
-    :          :          :          :          :          :        hardware
- [DPNI]      [DPNI]     [============= DPSW =================]
-    |          |          |          |          |          |
-    |           ----------           |       [DPMAC]    [DPMAC]
-     -------------------------------            |          |
-                                                |          |
-                                              [PHY]      [PHY]
-
-For a more detailed description of the Ethernet switch device driver model
-see:
-	Documentation/networking/switchdev.rst
-
-Creating an Ethernet Switch
-===========================
-A device is created for the switch objects probed on the MC bus. Each DPSW
-has a number of properties which determine the configuration options and
-associated hardware resources.
-
-A DPSW object (and the other DPAA2 objects needed for a DPAA2 switch) can
-be added to a container on the MC bus in one of two ways: statically,
-through a Datapath Layout Binary file (DPL) that is parsed by MC at boot
-time; or created dynamically at runtime, via the DPAA2 objects APIs.
-
-Features
-========
-Driver configures DPSW to perform hardware switching offload of
-unicast/multicast/broadcast (VLAN tagged or untagged) traffic between its
-ports.
-
-It allows configuration of hardware learning, flooding, multicast groups,
-port VLAN configuration and STP state.
-
-Static entries can be added/removed from the FDB.
-
-Hardware statistics for each port are provided through ethtool -S option.
diff --git a/drivers/staging/fsl-dpaa2/ethsw/TODO b/drivers/staging/fsl-dpaa2/ethsw/TODO
deleted file mode 100644
index 4d46857b0b2b..000000000000
--- a/drivers/staging/fsl-dpaa2/ethsw/TODO
+++ /dev/null
@@ -1,13 +0,0 @@
-* Add I/O capabilities on switch port netdevices. This will allow control
-traffic to reach the CPU.
-* Add ACL to redirect control traffic to CPU.
-* Add support for multiple FDBs and switch port partitioning
-* MC firmware uprev; the DPAA2 objects used by the Ethernet Switch driver
-need to be kept in sync with binary interface changes in MC
-* refine README file
-* cleanup
-
-NOTE: At least first three of the above are required before getting the
-DPAA2 Ethernet Switch driver out of staging. Another requirement is that
-dpio driver is moved to drivers/soc (this is required for I/O).
-
diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h b/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
deleted file mode 100644
index eb620e832412..000000000000
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
+++ /dev/null
@@ -1,458 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright 2014-2016 Freescale Semiconductor Inc.
- * Copyright 2017-2021 NXP
- *
- */
-
-#ifndef __FSL_DPSW_CMD_H
-#define __FSL_DPSW_CMD_H
-
-#include "dpsw.h"
-
-/* DPSW Version */
-#define DPSW_VER_MAJOR		8
-#define DPSW_VER_MINOR		9
-
-#define DPSW_CMD_BASE_VERSION	1
-#define DPSW_CMD_VERSION_2	2
-#define DPSW_CMD_ID_OFFSET	4
-
-#define DPSW_CMD_ID(id)	(((id) << DPSW_CMD_ID_OFFSET) | DPSW_CMD_BASE_VERSION)
-#define DPSW_CMD_V2(id) (((id) << DPSW_CMD_ID_OFFSET) | DPSW_CMD_VERSION_2)
-
-/* Command IDs */
-#define DPSW_CMDID_CLOSE                    DPSW_CMD_ID(0x800)
-#define DPSW_CMDID_OPEN                     DPSW_CMD_ID(0x802)
-
-#define DPSW_CMDID_GET_API_VERSION          DPSW_CMD_ID(0xa02)
-
-#define DPSW_CMDID_ENABLE                   DPSW_CMD_ID(0x002)
-#define DPSW_CMDID_DISABLE                  DPSW_CMD_ID(0x003)
-#define DPSW_CMDID_GET_ATTR                 DPSW_CMD_V2(0x004)
-#define DPSW_CMDID_RESET                    DPSW_CMD_ID(0x005)
-
-#define DPSW_CMDID_SET_IRQ_ENABLE           DPSW_CMD_ID(0x012)
-
-#define DPSW_CMDID_SET_IRQ_MASK             DPSW_CMD_ID(0x014)
-
-#define DPSW_CMDID_GET_IRQ_STATUS           DPSW_CMD_ID(0x016)
-#define DPSW_CMDID_CLEAR_IRQ_STATUS         DPSW_CMD_ID(0x017)
-
-#define DPSW_CMDID_IF_SET_TCI               DPSW_CMD_ID(0x030)
-#define DPSW_CMDID_IF_SET_STP               DPSW_CMD_ID(0x031)
-
-#define DPSW_CMDID_IF_GET_COUNTER           DPSW_CMD_V2(0x034)
-
-#define DPSW_CMDID_IF_ENABLE                DPSW_CMD_ID(0x03D)
-#define DPSW_CMDID_IF_DISABLE               DPSW_CMD_ID(0x03E)
-
-#define DPSW_CMDID_IF_GET_ATTR              DPSW_CMD_ID(0x042)
-
-#define DPSW_CMDID_IF_SET_MAX_FRAME_LENGTH  DPSW_CMD_ID(0x044)
-
-#define DPSW_CMDID_IF_GET_LINK_STATE        DPSW_CMD_ID(0x046)
-
-#define DPSW_CMDID_IF_GET_TCI               DPSW_CMD_ID(0x04A)
-
-#define DPSW_CMDID_IF_SET_LINK_CFG          DPSW_CMD_ID(0x04C)
-
-#define DPSW_CMDID_VLAN_ADD                 DPSW_CMD_ID(0x060)
-#define DPSW_CMDID_VLAN_ADD_IF              DPSW_CMD_V2(0x061)
-#define DPSW_CMDID_VLAN_ADD_IF_UNTAGGED     DPSW_CMD_ID(0x062)
-
-#define DPSW_CMDID_VLAN_REMOVE_IF           DPSW_CMD_ID(0x064)
-#define DPSW_CMDID_VLAN_REMOVE_IF_UNTAGGED  DPSW_CMD_ID(0x065)
-#define DPSW_CMDID_VLAN_REMOVE_IF_FLOODING  DPSW_CMD_ID(0x066)
-#define DPSW_CMDID_VLAN_REMOVE              DPSW_CMD_ID(0x067)
-
-#define DPSW_CMDID_FDB_ADD                  DPSW_CMD_ID(0x082)
-#define DPSW_CMDID_FDB_REMOVE               DPSW_CMD_ID(0x083)
-#define DPSW_CMDID_FDB_ADD_UNICAST          DPSW_CMD_ID(0x084)
-#define DPSW_CMDID_FDB_REMOVE_UNICAST       DPSW_CMD_ID(0x085)
-#define DPSW_CMDID_FDB_ADD_MULTICAST        DPSW_CMD_ID(0x086)
-#define DPSW_CMDID_FDB_REMOVE_MULTICAST     DPSW_CMD_ID(0x087)
-#define DPSW_CMDID_FDB_DUMP                 DPSW_CMD_ID(0x08A)
-
-#define DPSW_CMDID_IF_GET_PORT_MAC_ADDR     DPSW_CMD_ID(0x0A7)
-#define DPSW_CMDID_IF_GET_PRIMARY_MAC_ADDR  DPSW_CMD_ID(0x0A8)
-#define DPSW_CMDID_IF_SET_PRIMARY_MAC_ADDR  DPSW_CMD_ID(0x0A9)
-
-#define DPSW_CMDID_CTRL_IF_GET_ATTR         DPSW_CMD_ID(0x0A0)
-#define DPSW_CMDID_CTRL_IF_SET_POOLS        DPSW_CMD_ID(0x0A1)
-#define DPSW_CMDID_CTRL_IF_ENABLE           DPSW_CMD_ID(0x0A2)
-#define DPSW_CMDID_CTRL_IF_DISABLE          DPSW_CMD_ID(0x0A3)
-#define DPSW_CMDID_CTRL_IF_SET_QUEUE        DPSW_CMD_ID(0x0A6)
-
-#define DPSW_CMDID_SET_EGRESS_FLOOD         DPSW_CMD_ID(0x0AC)
-
-/* Macros for accessing command fields smaller than 1byte */
-#define DPSW_MASK(field)        \
-	GENMASK(DPSW_##field##_SHIFT + DPSW_##field##_SIZE - 1, \
-		DPSW_##field##_SHIFT)
-#define dpsw_set_field(var, field, val) \
-	((var) |= (((val) << DPSW_##field##_SHIFT) & DPSW_MASK(field)))
-#define dpsw_get_field(var, field)      \
-	(((var) & DPSW_MASK(field)) >> DPSW_##field##_SHIFT)
-#define dpsw_get_bit(var, bit) \
-	(((var)  >> (bit)) & GENMASK(0, 0))
-
-#pragma pack(push, 1)
-struct dpsw_cmd_open {
-	__le32 dpsw_id;
-};
-
-#define DPSW_COMPONENT_TYPE_SHIFT	0
-#define DPSW_COMPONENT_TYPE_SIZE	4
-
-struct dpsw_cmd_create {
-	/* cmd word 0 */
-	__le16 num_ifs;
-	u8 max_fdbs;
-	u8 max_meters_per_if;
-	/* from LSB: only the first 4 bits */
-	u8 component_type;
-	u8 pad[3];
-	/* cmd word 1 */
-	__le16 max_vlans;
-	__le16 max_fdb_entries;
-	__le16 fdb_aging_time;
-	__le16 max_fdb_mc_groups;
-	/* cmd word 2 */
-	__le64 options;
-};
-
-struct dpsw_cmd_destroy {
-	__le32 dpsw_id;
-};
-
-#define DPSW_ENABLE_SHIFT 0
-#define DPSW_ENABLE_SIZE  1
-
-struct dpsw_rsp_is_enabled {
-	/* from LSB: enable:1 */
-	u8 enabled;
-};
-
-struct dpsw_cmd_set_irq_enable {
-	u8 enable_state;
-	u8 pad[3];
-	u8 irq_index;
-};
-
-struct dpsw_cmd_get_irq_enable {
-	__le32 pad;
-	u8 irq_index;
-};
-
-struct dpsw_rsp_get_irq_enable {
-	u8 enable_state;
-};
-
-struct dpsw_cmd_set_irq_mask {
-	__le32 mask;
-	u8 irq_index;
-};
-
-struct dpsw_cmd_get_irq_mask {
-	__le32 pad;
-	u8 irq_index;
-};
-
-struct dpsw_rsp_get_irq_mask {
-	__le32 mask;
-};
-
-struct dpsw_cmd_get_irq_status {
-	__le32 status;
-	u8 irq_index;
-};
-
-struct dpsw_rsp_get_irq_status {
-	__le32 status;
-};
-
-struct dpsw_cmd_clear_irq_status {
-	__le32 status;
-	u8 irq_index;
-};
-
-#define DPSW_COMPONENT_TYPE_SHIFT	0
-#define DPSW_COMPONENT_TYPE_SIZE	4
-
-#define DPSW_FLOODING_CFG_SHIFT		0
-#define DPSW_FLOODING_CFG_SIZE		4
-
-#define DPSW_BROADCAST_CFG_SHIFT	4
-#define DPSW_BROADCAST_CFG_SIZE		4
-
-struct dpsw_rsp_get_attr {
-	/* cmd word 0 */
-	__le16 num_ifs;
-	u8 max_fdbs;
-	u8 num_fdbs;
-	__le16 max_vlans;
-	__le16 num_vlans;
-	/* cmd word 1 */
-	__le16 max_fdb_entries;
-	__le16 fdb_aging_time;
-	__le32 dpsw_id;
-	/* cmd word 2 */
-	__le16 mem_size;
-	__le16 max_fdb_mc_groups;
-	u8 max_meters_per_if;
-	/* from LSB only the first 4 bits */
-	u8 component_type;
-	/* [0:3] - flooding configuration
-	 * [4:7] - broadcast configuration
-	 */
-	u8 repl_cfg;
-	u8 pad;
-	/* cmd word 3 */
-	__le64 options;
-};
-
-#define DPSW_VLAN_ID_SHIFT	0
-#define DPSW_VLAN_ID_SIZE	12
-#define DPSW_DEI_SHIFT		12
-#define DPSW_DEI_SIZE		1
-#define DPSW_PCP_SHIFT		13
-#define DPSW_PCP_SIZE		3
-
-struct dpsw_cmd_if_set_tci {
-	__le16 if_id;
-	/* from LSB: VLAN_ID:12 DEI:1 PCP:3 */
-	__le16 conf;
-};
-
-struct dpsw_cmd_if_get_tci {
-	__le16 if_id;
-};
-
-struct dpsw_rsp_if_get_tci {
-	__le16 pad;
-	__le16 vlan_id;
-	u8 dei;
-	u8 pcp;
-};
-
-#define DPSW_STATE_SHIFT	0
-#define DPSW_STATE_SIZE		4
-
-struct dpsw_cmd_if_set_stp {
-	__le16 if_id;
-	__le16 vlan_id;
-	/* only the first LSB 4 bits */
-	u8 state;
-};
-
-#define DPSW_COUNTER_TYPE_SHIFT		0
-#define DPSW_COUNTER_TYPE_SIZE		5
-
-struct dpsw_cmd_if_get_counter {
-	__le16 if_id;
-	/* from LSB: type:5 */
-	u8 type;
-};
-
-struct dpsw_rsp_if_get_counter {
-	__le64 pad;
-	__le64 counter;
-};
-
-struct dpsw_cmd_if {
-	__le16 if_id;
-};
-
-#define DPSW_ADMIT_UNTAGGED_SHIFT	0
-#define DPSW_ADMIT_UNTAGGED_SIZE	4
-#define DPSW_ENABLED_SHIFT		5
-#define DPSW_ENABLED_SIZE		1
-#define DPSW_ACCEPT_ALL_VLAN_SHIFT	6
-#define DPSW_ACCEPT_ALL_VLAN_SIZE	1
-
-struct dpsw_rsp_if_get_attr {
-	/* cmd word 0 */
-	/* from LSB: admit_untagged:4 enabled:1 accept_all_vlan:1 */
-	u8 conf;
-	u8 pad1;
-	u8 num_tcs;
-	u8 pad2;
-	__le16 qdid;
-	/* cmd word 1 */
-	__le32 options;
-	__le32 pad3;
-	/* cmd word 2 */
-	__le32 rate;
-};
-
-struct dpsw_cmd_if_set_max_frame_length {
-	__le16 if_id;
-	__le16 frame_length;
-};
-
-struct dpsw_cmd_if_set_link_cfg {
-	/* cmd word 0 */
-	__le16 if_id;
-	u8 pad[6];
-	/* cmd word 1 */
-	__le32 rate;
-	__le32 pad1;
-	/* cmd word 2 */
-	__le64 options;
-};
-
-struct dpsw_cmd_if_get_link_state {
-	__le16 if_id;
-};
-
-#define DPSW_UP_SHIFT	0
-#define DPSW_UP_SIZE	1
-
-struct dpsw_rsp_if_get_link_state {
-	/* cmd word 0 */
-	__le32 pad0;
-	u8 up;
-	u8 pad1[3];
-	/* cmd word 1 */
-	__le32 rate;
-	__le32 pad2;
-	/* cmd word 2 */
-	__le64 options;
-};
-
-struct dpsw_vlan_add {
-	__le16 fdb_id;
-	__le16 vlan_id;
-};
-
-struct dpsw_cmd_vlan_add_if {
-	/* cmd word 0 */
-	__le16 options;
-	__le16 vlan_id;
-	__le16 fdb_id;
-	__le16 pad0;
-	/* cmd word 1-4 */
-	__le64 if_id;
-};
-
-struct dpsw_cmd_vlan_manage_if {
-	/* cmd word 0 */
-	__le16 pad0;
-	__le16 vlan_id;
-	__le32 pad1;
-	/* cmd word 1-4 */
-	__le64 if_id[4];
-};
-
-struct dpsw_cmd_vlan_remove {
-	__le16 pad;
-	__le16 vlan_id;
-};
-
-struct dpsw_cmd_fdb_add {
-	__le32 pad;
-	__le16 fdb_ageing_time;
-	__le16 num_fdb_entries;
-};
-
-struct dpsw_rsp_fdb_add {
-	__le16 fdb_id;
-};
-
-struct dpsw_cmd_fdb_remove {
-	__le16 fdb_id;
-};
-
-#define DPSW_ENTRY_TYPE_SHIFT	0
-#define DPSW_ENTRY_TYPE_SIZE	4
-
-struct dpsw_cmd_fdb_unicast_op {
-	/* cmd word 0 */
-	__le16 fdb_id;
-	u8 mac_addr[6];
-	/* cmd word 1 */
-	__le16 if_egress;
-	/* only the first 4 bits from LSB */
-	u8 type;
-};
-
-struct dpsw_cmd_fdb_multicast_op {
-	/* cmd word 0 */
-	__le16 fdb_id;
-	__le16 num_ifs;
-	/* only the first 4 bits from LSB */
-	u8 type;
-	u8 pad[3];
-	/* cmd word 1 */
-	u8 mac_addr[6];
-	__le16 pad2;
-	/* cmd word 2-5 */
-	__le64 if_id[4];
-};
-
-struct dpsw_cmd_fdb_dump {
-	__le16 fdb_id;
-	__le16 pad0;
-	__le32 pad1;
-	__le64 iova_addr;
-	__le32 iova_size;
-};
-
-struct dpsw_rsp_fdb_dump {
-	__le16 num_entries;
-};
-
-struct dpsw_rsp_ctrl_if_get_attr {
-	__le64 pad;
-	__le32 rx_fqid;
-	__le32 rx_err_fqid;
-	__le32 tx_err_conf_fqid;
-};
-
-#define DPSW_BACKUP_POOL(val, order)	(((val) & 0x1) << (order))
-struct dpsw_cmd_ctrl_if_set_pools {
-	u8 num_dpbp;
-	u8 backup_pool_mask;
-	__le16 pad;
-	__le32 dpbp_id[DPSW_MAX_DPBP];
-	__le16 buffer_size[DPSW_MAX_DPBP];
-};
-
-#define DPSW_DEST_TYPE_SHIFT	0
-#define DPSW_DEST_TYPE_SIZE	4
-
-struct dpsw_cmd_ctrl_if_set_queue {
-	__le32 dest_id;
-	u8 dest_priority;
-	u8 pad;
-	/* from LSB: dest_type:4 */
-	u8 dest_type;
-	u8 qtype;
-	__le64 user_ctx;
-	__le32 options;
-};
-
-struct dpsw_rsp_get_api_version {
-	__le16 version_major;
-	__le16 version_minor;
-};
-
-struct dpsw_rsp_if_get_mac_addr {
-	__le16 pad;
-	u8 mac_addr[6];
-};
-
-struct dpsw_cmd_if_set_mac_addr {
-	__le16 if_id;
-	u8 mac_addr[6];
-};
-
-struct dpsw_cmd_set_egress_flood {
-	__le16 fdb_id;
-	u8 flood_type;
-	u8 pad[5];
-	__le64 if_id;
-};
-#pragma pack(pop)
-#endif /* __FSL_DPSW_CMD_H */
diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw.c b/drivers/staging/fsl-dpaa2/ethsw/dpsw.c
deleted file mode 100644
index 5189f156100e..000000000000
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw.c
+++ /dev/null
@@ -1,1486 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright 2014-2016 Freescale Semiconductor Inc.
- * Copyright 2017-2021 NXP
- *
- */
-
-#include <linux/fsl/mc.h>
-#include "dpsw.h"
-#include "dpsw-cmd.h"
-
-static void build_if_id_bitmap(__le64 *bmap,
-			       const u16 *id,
-			       const u16 num_ifs)
-{
-	int i;
-
-	for (i = 0; (i < num_ifs) && (i < DPSW_MAX_IF); i++) {
-		if (id[i] < DPSW_MAX_IF)
-			bmap[id[i] / 64] |= cpu_to_le64(BIT_MASK(id[i] % 64));
-	}
-}
-
-/**
- * dpsw_open() - Open a control session for the specified object
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @dpsw_id:	DPSW unique ID
- * @token:	Returned token; use in subsequent API calls
- *
- * This function can be used to open a control session for an
- * already created object; an object may have been declared in
- * the DPL or by calling the dpsw_create() function.
- * This function returns a unique authentication token,
- * associated with the specific object ID and the specific MC
- * portal; this token must be used in all subsequent commands for
- * this specific object
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpsw_open(struct fsl_mc_io *mc_io,
-	      u32 cmd_flags,
-	      int dpsw_id,
-	      u16 *token)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_open *cmd_params;
-	int err;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_OPEN,
-					  cmd_flags,
-					  0);
-	cmd_params = (struct dpsw_cmd_open *)cmd.params;
-	cmd_params->dpsw_id = cpu_to_le32(dpsw_id);
-
-	/* send command to mc*/
-	err = mc_send_command(mc_io, &cmd);
-	if (err)
-		return err;
-
-	/* retrieve response parameters */
-	*token = mc_cmd_hdr_read_token(&cmd);
-
-	return 0;
-}
-
-/**
- * dpsw_close() - Close the control session of the object
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- *
- * After this function is called, no further operations are
- * allowed on the object without opening a new control session.
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpsw_close(struct fsl_mc_io *mc_io,
-	       u32 cmd_flags,
-	       u16 token)
-{
-	struct fsl_mc_command cmd = { 0 };
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_CLOSE,
-					  cmd_flags,
-					  token);
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_enable() - Enable DPSW functionality
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_enable(struct fsl_mc_io *mc_io,
-		u32 cmd_flags,
-		u16 token)
-{
-	struct fsl_mc_command cmd = { 0 };
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_ENABLE,
-					  cmd_flags,
-					  token);
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_disable() - Disable DPSW functionality
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_disable(struct fsl_mc_io *mc_io,
-		 u32 cmd_flags,
-		 u16 token)
-{
-	struct fsl_mc_command cmd = { 0 };
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_DISABLE,
-					  cmd_flags,
-					  token);
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_reset() - Reset the DPSW, returns the object to initial state.
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpsw_reset(struct fsl_mc_io *mc_io,
-	       u32 cmd_flags,
-	       u16 token)
-{
-	struct fsl_mc_command cmd = { 0 };
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_RESET,
-					  cmd_flags,
-					  token);
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_set_irq_enable() - Set overall interrupt state.
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPCI object
- * @irq_index:	The interrupt index to configure
- * @en:		Interrupt state - enable = 1, disable = 0
- *
- * Allows GPP software to control when interrupts are generated.
- * Each interrupt can have up to 32 causes.  The enable/disable control's the
- * overall interrupt state. if the interrupt is disabled no causes will cause
- * an interrupt
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpsw_set_irq_enable(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			u8 irq_index,
-			u8 en)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_set_irq_enable *cmd_params;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_SET_IRQ_ENABLE,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_set_irq_enable *)cmd.params;
-	dpsw_set_field(cmd_params->enable_state, ENABLE, en);
-	cmd_params->irq_index = irq_index;
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_set_irq_mask() - Set interrupt mask.
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPCI object
- * @irq_index:	The interrupt index to configure
- * @mask:	Event mask to trigger interrupt;
- *		each bit:
- *			0 = ignore event
- *			1 = consider event for asserting IRQ
- *
- * Every interrupt can have up to 32 causes and the interrupt model supports
- * masking/unmasking each cause independently
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpsw_set_irq_mask(struct fsl_mc_io *mc_io,
-		      u32 cmd_flags,
-		      u16 token,
-		      u8 irq_index,
-		      u32 mask)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_set_irq_mask *cmd_params;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_SET_IRQ_MASK,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_set_irq_mask *)cmd.params;
-	cmd_params->mask = cpu_to_le32(mask);
-	cmd_params->irq_index = irq_index;
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_get_irq_status() - Get the current status of any pending interrupts
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @irq_index:	The interrupt index to configure
- * @status:	Returned interrupts status - one bit per cause:
- *			0 = no interrupt pending
- *			1 = interrupt pending
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpsw_get_irq_status(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			u8 irq_index,
-			u32 *status)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_get_irq_status *cmd_params;
-	struct dpsw_rsp_get_irq_status *rsp_params;
-	int err;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_GET_IRQ_STATUS,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_get_irq_status *)cmd.params;
-	cmd_params->status = cpu_to_le32(*status);
-	cmd_params->irq_index = irq_index;
-
-	/* send command to mc*/
-	err = mc_send_command(mc_io, &cmd);
-	if (err)
-		return err;
-
-	/* retrieve response parameters */
-	rsp_params = (struct dpsw_rsp_get_irq_status *)cmd.params;
-	*status = le32_to_cpu(rsp_params->status);
-
-	return 0;
-}
-
-/**
- * dpsw_clear_irq_status() - Clear a pending interrupt's status
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPCI object
- * @irq_index:	The interrupt index to configure
- * @status:	bits to clear (W1C) - one bit per cause:
- *			0 = don't change
- *			1 = clear status bit
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpsw_clear_irq_status(struct fsl_mc_io *mc_io,
-			  u32 cmd_flags,
-			  u16 token,
-			  u8 irq_index,
-			  u32 status)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_clear_irq_status *cmd_params;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_CLEAR_IRQ_STATUS,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_clear_irq_status *)cmd.params;
-	cmd_params->status = cpu_to_le32(status);
-	cmd_params->irq_index = irq_index;
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_get_attributes() - Retrieve DPSW attributes
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @attr:	Returned DPSW attributes
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_get_attributes(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			struct dpsw_attr *attr)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_rsp_get_attr *rsp_params;
-	int err;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_GET_ATTR,
-					  cmd_flags,
-					  token);
-
-	/* send command to mc*/
-	err = mc_send_command(mc_io, &cmd);
-	if (err)
-		return err;
-
-	/* retrieve response parameters */
-	rsp_params = (struct dpsw_rsp_get_attr *)cmd.params;
-	attr->num_ifs = le16_to_cpu(rsp_params->num_ifs);
-	attr->max_fdbs = rsp_params->max_fdbs;
-	attr->num_fdbs = rsp_params->num_fdbs;
-	attr->max_vlans = le16_to_cpu(rsp_params->max_vlans);
-	attr->num_vlans = le16_to_cpu(rsp_params->num_vlans);
-	attr->max_fdb_entries = le16_to_cpu(rsp_params->max_fdb_entries);
-	attr->fdb_aging_time = le16_to_cpu(rsp_params->fdb_aging_time);
-	attr->id = le32_to_cpu(rsp_params->dpsw_id);
-	attr->mem_size = le16_to_cpu(rsp_params->mem_size);
-	attr->max_fdb_mc_groups = le16_to_cpu(rsp_params->max_fdb_mc_groups);
-	attr->max_meters_per_if = rsp_params->max_meters_per_if;
-	attr->options = le64_to_cpu(rsp_params->options);
-	attr->component_type = dpsw_get_field(rsp_params->component_type, COMPONENT_TYPE);
-	attr->flooding_cfg = dpsw_get_field(rsp_params->repl_cfg, FLOODING_CFG);
-	attr->broadcast_cfg = dpsw_get_field(rsp_params->repl_cfg, BROADCAST_CFG);
-	return 0;
-}
-
-/**
- * dpsw_if_set_link_cfg() - Set the link configuration.
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @if_id:	Interface id
- * @cfg:	Link configuration
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpsw_if_set_link_cfg(struct fsl_mc_io *mc_io,
-			 u32 cmd_flags,
-			 u16 token,
-			 u16 if_id,
-			 struct dpsw_link_cfg *cfg)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_if_set_link_cfg *cmd_params;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_LINK_CFG,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_if_set_link_cfg *)cmd.params;
-	cmd_params->if_id = cpu_to_le16(if_id);
-	cmd_params->rate = cpu_to_le32(cfg->rate);
-	cmd_params->options = cpu_to_le64(cfg->options);
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_if_get_link_state - Return the link state
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @if_id:	Interface id
- * @state:	Link state	1 - linkup, 0 - link down or disconnected
- *
- * @Return	'0' on Success; Error code otherwise.
- */
-int dpsw_if_get_link_state(struct fsl_mc_io *mc_io,
-			   u32 cmd_flags,
-			   u16 token,
-			   u16 if_id,
-			   struct dpsw_link_state *state)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_if_get_link_state *cmd_params;
-	struct dpsw_rsp_if_get_link_state *rsp_params;
-	int err;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_GET_LINK_STATE,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_if_get_link_state *)cmd.params;
-	cmd_params->if_id = cpu_to_le16(if_id);
-
-	/* send command to mc*/
-	err = mc_send_command(mc_io, &cmd);
-	if (err)
-		return err;
-
-	/* retrieve response parameters */
-	rsp_params = (struct dpsw_rsp_if_get_link_state *)cmd.params;
-	state->rate = le32_to_cpu(rsp_params->rate);
-	state->options = le64_to_cpu(rsp_params->options);
-	state->up = dpsw_get_field(rsp_params->up, UP);
-
-	return 0;
-}
-
-/**
- * dpsw_if_set_tci() - Set default VLAN Tag Control Information (TCI)
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @if_id:	Interface Identifier
- * @cfg:	Tag Control Information Configuration
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_if_set_tci(struct fsl_mc_io *mc_io,
-		    u32 cmd_flags,
-		    u16 token,
-		    u16 if_id,
-		    const struct dpsw_tci_cfg *cfg)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_if_set_tci *cmd_params;
-	u16 tmp_conf = 0;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_TCI,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_if_set_tci *)cmd.params;
-	cmd_params->if_id = cpu_to_le16(if_id);
-	dpsw_set_field(tmp_conf, VLAN_ID, cfg->vlan_id);
-	dpsw_set_field(tmp_conf, DEI, cfg->dei);
-	dpsw_set_field(tmp_conf, PCP, cfg->pcp);
-	cmd_params->conf = cpu_to_le16(tmp_conf);
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_if_get_tci() - Get default VLAN Tag Control Information (TCI)
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @if_id:	Interface Identifier
- * @cfg:	Tag Control Information Configuration
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_if_get_tci(struct fsl_mc_io *mc_io,
-		    u32 cmd_flags,
-		    u16 token,
-		    u16 if_id,
-		    struct dpsw_tci_cfg *cfg)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_if_get_tci *cmd_params;
-	struct dpsw_rsp_if_get_tci *rsp_params;
-	int err;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_GET_TCI,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_if_get_tci *)cmd.params;
-	cmd_params->if_id = cpu_to_le16(if_id);
-
-	/* send command to mc*/
-	err = mc_send_command(mc_io, &cmd);
-	if (err)
-		return err;
-
-	/* retrieve response parameters */
-	rsp_params = (struct dpsw_rsp_if_get_tci *)cmd.params;
-	cfg->pcp = rsp_params->pcp;
-	cfg->dei = rsp_params->dei;
-	cfg->vlan_id = le16_to_cpu(rsp_params->vlan_id);
-
-	return 0;
-}
-
-/**
- * dpsw_if_set_stp() - Function sets Spanning Tree Protocol (STP) state.
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @if_id:	Interface Identifier
- * @cfg:	STP State configuration parameters
- *
- * The following STP states are supported -
- * blocking, listening, learning, forwarding and disabled.
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_if_set_stp(struct fsl_mc_io *mc_io,
-		    u32 cmd_flags,
-		    u16 token,
-		    u16 if_id,
-		    const struct dpsw_stp_cfg *cfg)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_if_set_stp *cmd_params;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_STP,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_if_set_stp *)cmd.params;
-	cmd_params->if_id = cpu_to_le16(if_id);
-	cmd_params->vlan_id = cpu_to_le16(cfg->vlan_id);
-	dpsw_set_field(cmd_params->state, STATE, cfg->state);
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_if_get_counter() - Get specific counter of particular interface
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @if_id:	Interface Identifier
- * @type:	Counter type
- * @counter:	return value
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_if_get_counter(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			u16 if_id,
-			enum dpsw_counter type,
-			u64 *counter)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_if_get_counter *cmd_params;
-	struct dpsw_rsp_if_get_counter *rsp_params;
-	int err;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_GET_COUNTER,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_if_get_counter *)cmd.params;
-	cmd_params->if_id = cpu_to_le16(if_id);
-	dpsw_set_field(cmd_params->type, COUNTER_TYPE, type);
-
-	/* send command to mc*/
-	err = mc_send_command(mc_io, &cmd);
-	if (err)
-		return err;
-
-	/* retrieve response parameters */
-	rsp_params = (struct dpsw_rsp_if_get_counter *)cmd.params;
-	*counter = le64_to_cpu(rsp_params->counter);
-
-	return 0;
-}
-
-/**
- * dpsw_if_enable() - Enable Interface
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @if_id:	Interface Identifier
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_if_enable(struct fsl_mc_io *mc_io,
-		   u32 cmd_flags,
-		   u16 token,
-		   u16 if_id)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_if *cmd_params;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_ENABLE,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_if *)cmd.params;
-	cmd_params->if_id = cpu_to_le16(if_id);
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_if_disable() - Disable Interface
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @if_id:	Interface Identifier
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_if_disable(struct fsl_mc_io *mc_io,
-		    u32 cmd_flags,
-		    u16 token,
-		    u16 if_id)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_if *cmd_params;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_DISABLE,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_if *)cmd.params;
-	cmd_params->if_id = cpu_to_le16(if_id);
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_if_get_attributes() - Function obtains attributes of interface
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @if_id:	Interface Identifier
- * @attr:	Returned interface attributes
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_if_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
-			   u16 if_id, struct dpsw_if_attr *attr)
-{
-	struct dpsw_rsp_if_get_attr *rsp_params;
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_if *cmd_params;
-	int err;
-
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_GET_ATTR, cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_if *)cmd.params;
-	cmd_params->if_id = cpu_to_le16(if_id);
-
-	err = mc_send_command(mc_io, &cmd);
-	if (err)
-		return err;
-
-	rsp_params = (struct dpsw_rsp_if_get_attr *)cmd.params;
-	attr->num_tcs = rsp_params->num_tcs;
-	attr->rate = le32_to_cpu(rsp_params->rate);
-	attr->options = le32_to_cpu(rsp_params->options);
-	attr->qdid = le16_to_cpu(rsp_params->qdid);
-	attr->enabled = dpsw_get_field(rsp_params->conf, ENABLED);
-	attr->accept_all_vlan = dpsw_get_field(rsp_params->conf,
-					       ACCEPT_ALL_VLAN);
-	attr->admit_untagged = dpsw_get_field(rsp_params->conf,
-					      ADMIT_UNTAGGED);
-
-	return 0;
-}
-
-/**
- * dpsw_if_set_max_frame_length() - Set Maximum Receive frame length.
- * @mc_io:		Pointer to MC portal's I/O object
- * @cmd_flags:		Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:		Token of DPSW object
- * @if_id:		Interface Identifier
- * @frame_length:	Maximum Frame Length
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_if_set_max_frame_length(struct fsl_mc_io *mc_io,
-				 u32 cmd_flags,
-				 u16 token,
-				 u16 if_id,
-				 u16 frame_length)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_if_set_max_frame_length *cmd_params;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_MAX_FRAME_LENGTH,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_if_set_max_frame_length *)cmd.params;
-	cmd_params->if_id = cpu_to_le16(if_id);
-	cmd_params->frame_length = cpu_to_le16(frame_length);
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_vlan_add() - Adding new VLAN to DPSW.
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @vlan_id:	VLAN Identifier
- * @cfg:	VLAN configuration
- *
- * Only VLAN ID and FDB ID are required parameters here.
- * 12 bit VLAN ID is defined in IEEE802.1Q.
- * Adding a duplicate VLAN ID is not allowed.
- * FDB ID can be shared across multiple VLANs. Shared learning
- * is obtained by calling dpsw_vlan_add for multiple VLAN IDs
- * with same fdb_id
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_vlan_add(struct fsl_mc_io *mc_io,
-		  u32 cmd_flags,
-		  u16 token,
-		  u16 vlan_id,
-		  const struct dpsw_vlan_cfg *cfg)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_vlan_add *cmd_params;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_ADD,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_vlan_add *)cmd.params;
-	cmd_params->fdb_id = cpu_to_le16(cfg->fdb_id);
-	cmd_params->vlan_id = cpu_to_le16(vlan_id);
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_vlan_add_if() - Adding a set of interfaces to an existing VLAN.
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @vlan_id:	VLAN Identifier
- * @cfg:	Set of interfaces to add
- *
- * It adds only interfaces not belonging to this VLAN yet,
- * otherwise an error is generated and an entire command is
- * ignored. This function can be called numerous times always
- * providing required interfaces delta.
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_vlan_add_if(struct fsl_mc_io *mc_io,
-		     u32 cmd_flags,
-		     u16 token,
-		     u16 vlan_id,
-		     const struct dpsw_vlan_if_cfg *cfg)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_vlan_manage_if *cmd_params;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_ADD_IF,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_vlan_manage_if *)cmd.params;
-	cmd_params->vlan_id = cpu_to_le16(vlan_id);
-	build_if_id_bitmap(cmd_params->if_id, cfg->if_id, cfg->num_ifs);
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_vlan_add_if_untagged() - Defining a set of interfaces that should be
- *				transmitted as untagged.
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @vlan_id:	VLAN Identifier
- * @cfg:	Set of interfaces that should be transmitted as untagged
- *
- * These interfaces should already belong to this VLAN.
- * By default all interfaces are transmitted as tagged.
- * Providing un-existing interface or untagged interface that is
- * configured untagged already generates an error and the entire
- * command is ignored.
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_vlan_add_if_untagged(struct fsl_mc_io *mc_io,
-			      u32 cmd_flags,
-			      u16 token,
-			      u16 vlan_id,
-			      const struct dpsw_vlan_if_cfg *cfg)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_vlan_manage_if *cmd_params;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_ADD_IF_UNTAGGED,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_vlan_manage_if *)cmd.params;
-	cmd_params->vlan_id = cpu_to_le16(vlan_id);
-	build_if_id_bitmap(cmd_params->if_id, cfg->if_id, cfg->num_ifs);
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_vlan_remove_if() - Remove interfaces from an existing VLAN.
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @vlan_id:	VLAN Identifier
- * @cfg:	Set of interfaces that should be removed
- *
- * Interfaces must belong to this VLAN, otherwise an error
- * is returned and an the command is ignored
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_vlan_remove_if(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			u16 vlan_id,
-			const struct dpsw_vlan_if_cfg *cfg)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_vlan_manage_if *cmd_params;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_REMOVE_IF,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_vlan_manage_if *)cmd.params;
-	cmd_params->vlan_id = cpu_to_le16(vlan_id);
-	build_if_id_bitmap(cmd_params->if_id, cfg->if_id, cfg->num_ifs);
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_vlan_remove_if_untagged() - Define a set of interfaces that should be
- *		converted from transmitted as untagged to transmit as tagged.
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @vlan_id:	VLAN Identifier
- * @cfg:	Set of interfaces that should be removed
- *
- * Interfaces provided by API have to belong to this VLAN and
- * configured untagged, otherwise an error is returned and the
- * command is ignored
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_vlan_remove_if_untagged(struct fsl_mc_io *mc_io,
-				 u32 cmd_flags,
-				 u16 token,
-				 u16 vlan_id,
-				 const struct dpsw_vlan_if_cfg *cfg)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_vlan_manage_if *cmd_params;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_REMOVE_IF_UNTAGGED,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_vlan_manage_if *)cmd.params;
-	cmd_params->vlan_id = cpu_to_le16(vlan_id);
-	build_if_id_bitmap(cmd_params->if_id, cfg->if_id, cfg->num_ifs);
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_vlan_remove() - Remove an entire VLAN
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @vlan_id:	VLAN Identifier
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_vlan_remove(struct fsl_mc_io *mc_io,
-		     u32 cmd_flags,
-		     u16 token,
-		     u16 vlan_id)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_vlan_remove *cmd_params;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_REMOVE,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_vlan_remove *)cmd.params;
-	cmd_params->vlan_id = cpu_to_le16(vlan_id);
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_fdb_add() - Add FDB to switch and Returns handle to FDB table for
- *		the reference
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @fdb_id:	Returned Forwarding Database Identifier
- * @cfg:	FDB Configuration
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_fdb_add(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 *fdb_id,
-		 const struct dpsw_fdb_cfg *cfg)
-{
-	struct dpsw_cmd_fdb_add *cmd_params;
-	struct dpsw_rsp_fdb_add *rsp_params;
-	struct fsl_mc_command cmd = { 0 };
-	int err;
-
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_ADD,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_fdb_add *)cmd.params;
-	cmd_params->fdb_ageing_time = cpu_to_le16(cfg->fdb_ageing_time);
-	cmd_params->num_fdb_entries = cpu_to_le16(cfg->num_fdb_entries);
-
-	err = mc_send_command(mc_io, &cmd);
-	if (err)
-		return err;
-
-	rsp_params = (struct dpsw_rsp_fdb_add *)cmd.params;
-	*fdb_id = le16_to_cpu(rsp_params->fdb_id);
-
-	return 0;
-}
-
-/**
- * dpsw_fdb_remove() - Remove FDB from switch
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @fdb_id:	Forwarding Database Identifier
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_fdb_remove(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 fdb_id)
-{
-	struct dpsw_cmd_fdb_remove *cmd_params;
-	struct fsl_mc_command cmd = { 0 };
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_REMOVE,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_fdb_remove *)cmd.params;
-	cmd_params->fdb_id = cpu_to_le16(fdb_id);
-
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_fdb_add_unicast() - Function adds an unicast entry into MAC lookup table
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @fdb_id:	Forwarding Database Identifier
- * @cfg:	Unicast entry configuration
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_fdb_add_unicast(struct fsl_mc_io *mc_io,
-			 u32 cmd_flags,
-			 u16 token,
-			 u16 fdb_id,
-			 const struct dpsw_fdb_unicast_cfg *cfg)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_fdb_unicast_op *cmd_params;
-	int i;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_ADD_UNICAST,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_fdb_unicast_op *)cmd.params;
-	cmd_params->fdb_id = cpu_to_le16(fdb_id);
-	cmd_params->if_egress = cpu_to_le16(cfg->if_egress);
-	for (i = 0; i < 6; i++)
-		cmd_params->mac_addr[i] = cfg->mac_addr[5 - i];
-	dpsw_set_field(cmd_params->type, ENTRY_TYPE, cfg->type);
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_fdb_dump() - Dump the content of FDB table into memory.
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @fdb_id:	Forwarding Database Identifier
- * @iova_addr:	Data will be stored here as an array of struct fdb_dump_entry
- * @iova_size:	Memory size allocated at iova_addr
- * @num_entries:Number of entries written at iova_addr
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- *
- * The memory allocated at iova_addr must be initialized with zero before
- * command execution. If the FDB table does not fit into memory MC will stop
- * after the memory is filled up.
- * The struct fdb_dump_entry array must be parsed until the end of memory
- * area or until an entry with mac_addr set to zero is found.
- */
-int dpsw_fdb_dump(struct fsl_mc_io *mc_io,
-		  u32 cmd_flags,
-		  u16 token,
-		  u16 fdb_id,
-		  u64 iova_addr,
-		  u32 iova_size,
-		  u16 *num_entries)
-{
-	struct dpsw_cmd_fdb_dump *cmd_params;
-	struct dpsw_rsp_fdb_dump *rsp_params;
-	struct fsl_mc_command cmd = { 0 };
-	int err;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_DUMP,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_fdb_dump *)cmd.params;
-	cmd_params->fdb_id = cpu_to_le16(fdb_id);
-	cmd_params->iova_addr = cpu_to_le64(iova_addr);
-	cmd_params->iova_size = cpu_to_le32(iova_size);
-
-	/* send command to mc */
-	err = mc_send_command(mc_io, &cmd);
-	if (err)
-		return err;
-
-	rsp_params = (struct dpsw_rsp_fdb_dump *)cmd.params;
-	*num_entries = le16_to_cpu(rsp_params->num_entries);
-
-	return 0;
-}
-
-/**
- * dpsw_fdb_remove_unicast() - removes an entry from MAC lookup table
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @fdb_id:	Forwarding Database Identifier
- * @cfg:	Unicast entry configuration
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_fdb_remove_unicast(struct fsl_mc_io *mc_io,
-			    u32 cmd_flags,
-			    u16 token,
-			    u16 fdb_id,
-			    const struct dpsw_fdb_unicast_cfg *cfg)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_fdb_unicast_op *cmd_params;
-	int i;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_REMOVE_UNICAST,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_fdb_unicast_op *)cmd.params;
-	cmd_params->fdb_id = cpu_to_le16(fdb_id);
-	for (i = 0; i < 6; i++)
-		cmd_params->mac_addr[i] = cfg->mac_addr[5 - i];
-	cmd_params->if_egress = cpu_to_le16(cfg->if_egress);
-	dpsw_set_field(cmd_params->type, ENTRY_TYPE, cfg->type);
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_fdb_add_multicast() - Add a set of egress interfaces to multi-cast group
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @fdb_id:	Forwarding Database Identifier
- * @cfg:	Multicast entry configuration
- *
- * If group doesn't exist, it will be created.
- * It adds only interfaces not belonging to this multicast group
- * yet, otherwise error will be generated and the command is
- * ignored.
- * This function may be called numerous times always providing
- * required interfaces delta.
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_fdb_add_multicast(struct fsl_mc_io *mc_io,
-			   u32 cmd_flags,
-			   u16 token,
-			   u16 fdb_id,
-			   const struct dpsw_fdb_multicast_cfg *cfg)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_fdb_multicast_op *cmd_params;
-	int i;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_ADD_MULTICAST,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_fdb_multicast_op *)cmd.params;
-	cmd_params->fdb_id = cpu_to_le16(fdb_id);
-	cmd_params->num_ifs = cpu_to_le16(cfg->num_ifs);
-	dpsw_set_field(cmd_params->type, ENTRY_TYPE, cfg->type);
-	build_if_id_bitmap(cmd_params->if_id, cfg->if_id, cfg->num_ifs);
-	for (i = 0; i < 6; i++)
-		cmd_params->mac_addr[i] = cfg->mac_addr[5 - i];
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_fdb_remove_multicast() - Removing interfaces from an existing multicast
- *				group.
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @fdb_id:	Forwarding Database Identifier
- * @cfg:	Multicast entry configuration
- *
- * Interfaces provided by this API have to exist in the group,
- * otherwise an error will be returned and an entire command
- * ignored. If there is no interface left in the group,
- * an entire group is deleted
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_fdb_remove_multicast(struct fsl_mc_io *mc_io,
-			      u32 cmd_flags,
-			      u16 token,
-			      u16 fdb_id,
-			      const struct dpsw_fdb_multicast_cfg *cfg)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_fdb_multicast_op *cmd_params;
-	int i;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_REMOVE_MULTICAST,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_fdb_multicast_op *)cmd.params;
-	cmd_params->fdb_id = cpu_to_le16(fdb_id);
-	cmd_params->num_ifs = cpu_to_le16(cfg->num_ifs);
-	dpsw_set_field(cmd_params->type, ENTRY_TYPE, cfg->type);
-	build_if_id_bitmap(cmd_params->if_id, cfg->if_id, cfg->num_ifs);
-	for (i = 0; i < 6; i++)
-		cmd_params->mac_addr[i] = cfg->mac_addr[5 - i];
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_ctrl_if_get_attributes() - Obtain control interface attributes
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @attr:	Returned control interface attributes
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpsw_ctrl_if_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags,
-				u16 token, struct dpsw_ctrl_if_attr *attr)
-{
-	struct dpsw_rsp_ctrl_if_get_attr *rsp_params;
-	struct fsl_mc_command cmd = { 0 };
-	int err;
-
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_GET_ATTR,
-					  cmd_flags, token);
-
-	err = mc_send_command(mc_io, &cmd);
-	if (err)
-		return err;
-
-	rsp_params = (struct dpsw_rsp_ctrl_if_get_attr *)cmd.params;
-	attr->rx_fqid = le32_to_cpu(rsp_params->rx_fqid);
-	attr->rx_err_fqid = le32_to_cpu(rsp_params->rx_err_fqid);
-	attr->tx_err_conf_fqid = le32_to_cpu(rsp_params->tx_err_conf_fqid);
-
-	return 0;
-}
-
-/**
- * dpsw_ctrl_if_set_pools() - Set control interface buffer pools
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @cfg:	Buffer pools configuration
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpsw_ctrl_if_set_pools(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
-			   const struct dpsw_ctrl_if_pools_cfg *cfg)
-{
-	struct dpsw_cmd_ctrl_if_set_pools *cmd_params;
-	struct fsl_mc_command cmd = { 0 };
-	int i;
-
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_SET_POOLS,
-					  cmd_flags, token);
-	cmd_params = (struct dpsw_cmd_ctrl_if_set_pools *)cmd.params;
-	cmd_params->num_dpbp = cfg->num_dpbp;
-	for (i = 0; i < DPSW_MAX_DPBP; i++) {
-		cmd_params->dpbp_id[i] = cpu_to_le32(cfg->pools[i].dpbp_id);
-		cmd_params->buffer_size[i] =
-			cpu_to_le16(cfg->pools[i].buffer_size);
-		cmd_params->backup_pool_mask |=
-			DPSW_BACKUP_POOL(cfg->pools[i].backup_pool, i);
-	}
-
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_ctrl_if_set_queue() - Set Rx queue configuration
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of dpsw object
- * @qtype:	dpsw_queue_type of the targeted queue
- * @cfg:	Rx queue configuration
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpsw_ctrl_if_set_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
-			   enum dpsw_queue_type qtype,
-			   const struct dpsw_ctrl_if_queue_cfg *cfg)
-{
-	struct dpsw_cmd_ctrl_if_set_queue *cmd_params;
-	struct fsl_mc_command cmd = { 0 };
-
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_SET_QUEUE,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_ctrl_if_set_queue *)cmd.params;
-	cmd_params->dest_id = cpu_to_le32(cfg->dest_cfg.dest_id);
-	cmd_params->dest_priority = cfg->dest_cfg.priority;
-	cmd_params->qtype = qtype;
-	cmd_params->user_ctx = cpu_to_le64(cfg->user_ctx);
-	cmd_params->options = cpu_to_le32(cfg->options);
-	dpsw_set_field(cmd_params->dest_type,
-		       DEST_TYPE,
-		       cfg->dest_cfg.dest_type);
-
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_get_api_version() - Get Data Path Switch API version
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @major_ver:	Major version of data path switch API
- * @minor_ver:	Minor version of data path switch API
- *
- * Return:  '0' on Success; Error code otherwise.
- */
-int dpsw_get_api_version(struct fsl_mc_io *mc_io,
-			 u32 cmd_flags,
-			 u16 *major_ver,
-			 u16 *minor_ver)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_rsp_get_api_version *rsp_params;
-	int err;
-
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_GET_API_VERSION,
-					  cmd_flags,
-					  0);
-
-	err = mc_send_command(mc_io, &cmd);
-	if (err)
-		return err;
-
-	rsp_params = (struct dpsw_rsp_get_api_version *)cmd.params;
-	*major_ver = le16_to_cpu(rsp_params->version_major);
-	*minor_ver = le16_to_cpu(rsp_params->version_minor);
-
-	return 0;
-}
-
-/**
- * dpsw_if_get_port_mac_addr()
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @if_id:	Interface Identifier
- * @mac_addr:	MAC address of the physical port, if any, otherwise 0
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_if_get_port_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
-			      u16 if_id, u8 mac_addr[6])
-{
-	struct dpsw_rsp_if_get_mac_addr *rsp_params;
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_if *cmd_params;
-	int err, i;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_GET_PORT_MAC_ADDR,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_if *)cmd.params;
-	cmd_params->if_id = cpu_to_le16(if_id);
-
-	/* send command to mc*/
-	err = mc_send_command(mc_io, &cmd);
-	if (err)
-		return err;
-
-	/* retrieve response parameters */
-	rsp_params = (struct dpsw_rsp_if_get_mac_addr *)cmd.params;
-	for (i = 0; i < 6; i++)
-		mac_addr[5 - i] = rsp_params->mac_addr[i];
-
-	return 0;
-}
-
-/**
- * dpsw_if_get_primary_mac_addr()
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @if_id:	Interface Identifier
- * @mac_addr:	MAC address of the physical port, if any, otherwise 0
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_if_get_primary_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags,
-				 u16 token, u16 if_id, u8 mac_addr[6])
-{
-	struct dpsw_rsp_if_get_mac_addr *rsp_params;
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_if *cmd_params;
-	int err, i;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_PRIMARY_MAC_ADDR,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_if *)cmd.params;
-	cmd_params->if_id = cpu_to_le16(if_id);
-
-	/* send command to mc*/
-	err = mc_send_command(mc_io, &cmd);
-	if (err)
-		return err;
-
-	/* retrieve response parameters */
-	rsp_params = (struct dpsw_rsp_if_get_mac_addr *)cmd.params;
-	for (i = 0; i < 6; i++)
-		mac_addr[5 - i] = rsp_params->mac_addr[i];
-
-	return 0;
-}
-
-/**
- * dpsw_if_set_primary_mac_addr()
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @if_id:	Interface Identifier
- * @mac_addr:	MAC address of the physical port, if any, otherwise 0
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_if_set_primary_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags,
-				 u16 token, u16 if_id, u8 mac_addr[6])
-{
-	struct dpsw_cmd_if_set_mac_addr *cmd_params;
-	struct fsl_mc_command cmd = { 0 };
-	int i;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_PRIMARY_MAC_ADDR,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_if_set_mac_addr *)cmd.params;
-	cmd_params->if_id = cpu_to_le16(if_id);
-	for (i = 0; i < 6; i++)
-		cmd_params->mac_addr[i] = mac_addr[5 - i];
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_ctrl_if_enable() - Enable control interface
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpsw_ctrl_if_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
-{
-	struct fsl_mc_command cmd = { 0 };
-
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_ENABLE, cmd_flags,
-					  token);
-
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_ctrl_if_disable() - Function disables control interface
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpsw_ctrl_if_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
-{
-	struct fsl_mc_command cmd = { 0 };
-
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_DISABLE,
-					  cmd_flags,
-					  token);
-
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpsw_set_egress_flood() - Set egress parameters associated with an FDB ID
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @cfg:	Egress flooding configuration
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpsw_set_egress_flood(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
-			  const struct dpsw_egress_flood_cfg *cfg)
-{
-	struct dpsw_cmd_set_egress_flood *cmd_params;
-	struct fsl_mc_command cmd = { 0 };
-
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_SET_EGRESS_FLOOD, cmd_flags, token);
-	cmd_params = (struct dpsw_cmd_set_egress_flood *)cmd.params;
-	cmd_params->fdb_id = cpu_to_le16(cfg->fdb_id);
-	cmd_params->flood_type = cfg->flood_type;
-	build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs);
-
-	return mc_send_command(mc_io, &cmd);
-}
diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw.h b/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
deleted file mode 100644
index 9e04350f3277..000000000000
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
+++ /dev/null
@@ -1,751 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright 2014-2016 Freescale Semiconductor Inc.
- * Copyright 2017-2021 NXP
- *
- */
-
-#ifndef __FSL_DPSW_H
-#define __FSL_DPSW_H
-
-/* Data Path L2-Switch API
- * Contains API for handling DPSW topology and functionality
- */
-
-struct fsl_mc_io;
-
-/**
- * DPSW general definitions
- */
-
-/**
- * Maximum number of traffic class priorities
- */
-#define DPSW_MAX_PRIORITIES	8
-/**
- * Maximum number of interfaces
- */
-#define DPSW_MAX_IF		64
-
-int dpsw_open(struct fsl_mc_io *mc_io,
-	      u32 cmd_flags,
-	      int dpsw_id,
-	      u16 *token);
-
-int dpsw_close(struct fsl_mc_io *mc_io,
-	       u32 cmd_flags,
-	       u16 token);
-
-/**
- * DPSW options
- */
-
-/**
- * Disable flooding
- */
-#define DPSW_OPT_FLOODING_DIS		0x0000000000000001ULL
-/**
- * Disable Multicast
- */
-#define DPSW_OPT_MULTICAST_DIS		0x0000000000000004ULL
-/**
- * Support control interface
- */
-#define DPSW_OPT_CTRL_IF_DIS		0x0000000000000010ULL
-/**
- * Disable flooding metering
- */
-#define DPSW_OPT_FLOODING_METERING_DIS  0x0000000000000020ULL
-/**
- * Enable metering
- */
-#define DPSW_OPT_METERING_EN            0x0000000000000040ULL
-
-/**
- * enum dpsw_component_type - component type of a bridge
- * @DPSW_COMPONENT_TYPE_C_VLAN: A C-VLAN component of an
- *   enterprise VLAN bridge or of a Provider Bridge used
- *   to process C-tagged frames
- * @DPSW_COMPONENT_TYPE_S_VLAN: An S-VLAN component of a
- *   Provider Bridge
- *
- */
-enum dpsw_component_type {
-	DPSW_COMPONENT_TYPE_C_VLAN = 0,
-	DPSW_COMPONENT_TYPE_S_VLAN
-};
-
-/**
- *  enum dpsw_flooding_cfg - flooding configuration requested
- * @DPSW_FLOODING_PER_VLAN: Flooding replicators are allocated per VLAN and
- * interfaces present in each of them can be configured using
- * dpsw_vlan_add_if_flooding()/dpsw_vlan_remove_if_flooding().
- * This is the default configuration.
- *
- * @DPSW_FLOODING_PER_FDB: Flooding replicators are allocated per FDB and
- * interfaces present in each of them can be configured using
- * dpsw_set_egress_flood().
- */
-enum dpsw_flooding_cfg {
-	DPSW_FLOODING_PER_VLAN = 0,
-	DPSW_FLOODING_PER_FDB,
-};
-
-/**
- * enum dpsw_broadcast_cfg - broadcast configuration requested
- * @DPSW_BROADCAST_PER_OBJECT: There is only one broadcast replicator per DPSW
- * object. This is the default configuration.
- * @DPSW_BROADCAST_PER_FDB: Broadcast replicators are allocated per FDB and
- * interfaces present in each of them can be configured using
- * dpsw_set_egress_flood().
- */
-enum dpsw_broadcast_cfg {
-	DPSW_BROADCAST_PER_OBJECT = 0,
-	DPSW_BROADCAST_PER_FDB,
-};
-
-int dpsw_enable(struct fsl_mc_io *mc_io,
-		u32 cmd_flags,
-		u16 token);
-
-int dpsw_disable(struct fsl_mc_io *mc_io,
-		 u32 cmd_flags,
-		 u16 token);
-
-int dpsw_reset(struct fsl_mc_io *mc_io,
-	       u32 cmd_flags,
-	       u16 token);
-
-/**
- * DPSW IRQ Index and Events
- */
-
-#define DPSW_IRQ_INDEX_IF		0x0000
-#define DPSW_IRQ_INDEX_L2SW		0x0001
-
-/**
- * IRQ event - Indicates that the link state changed
- */
-#define DPSW_IRQ_EVENT_LINK_CHANGED	0x0001
-
-/**
- * struct dpsw_irq_cfg - IRQ configuration
- * @addr:	Address that must be written to signal a message-based interrupt
- * @val:	Value to write into irq_addr address
- * @irq_num: A user defined number associated with this IRQ
- */
-struct dpsw_irq_cfg {
-	u64 addr;
-	u32 val;
-	int irq_num;
-};
-
-int dpsw_set_irq_enable(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			u8 irq_index,
-			u8 en);
-
-int dpsw_set_irq_mask(struct fsl_mc_io *mc_io,
-		      u32 cmd_flags,
-		      u16 token,
-		      u8 irq_index,
-		      u32 mask);
-
-int dpsw_get_irq_status(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			u8 irq_index,
-			u32 *status);
-
-int dpsw_clear_irq_status(struct fsl_mc_io *mc_io,
-			  u32 cmd_flags,
-			  u16 token,
-			  u8 irq_index,
-			  u32 status);
-
-/**
- * struct dpsw_attr - Structure representing DPSW attributes
- * @id: DPSW object ID
- * @options: Enable/Disable DPSW features
- * @max_vlans: Maximum Number of VLANs
- * @max_meters_per_if:  Number of meters per interface
- * @max_fdbs: Maximum Number of FDBs
- * @max_fdb_entries: Number of FDB entries for default FDB table;
- *			0 - indicates default 1024 entries.
- * @fdb_aging_time: Default FDB aging time for default FDB table;
- *			0 - indicates default 300 seconds
- * @max_fdb_mc_groups: Number of multicast groups in each FDB table;
- *			0 - indicates default 32
- * @mem_size: DPSW frame storage memory size
- * @num_ifs: Number of interfaces
- * @num_vlans: Current number of VLANs
- * @num_fdbs: Current number of FDBs
- * @component_type: Component type of this bridge
- * @flooding_cfg: Flooding configuration (PER_VLAN - default, PER_FDB)
- * @broadcast_cfg: Broadcast configuration (PER_OBJECT - default, PER_FDB)
- */
-struct dpsw_attr {
-	int id;
-	u64 options;
-	u16 max_vlans;
-	u8 max_meters_per_if;
-	u8 max_fdbs;
-	u16 max_fdb_entries;
-	u16 fdb_aging_time;
-	u16 max_fdb_mc_groups;
-	u16 num_ifs;
-	u16 mem_size;
-	u16 num_vlans;
-	u8 num_fdbs;
-	enum dpsw_component_type component_type;
-	enum dpsw_flooding_cfg flooding_cfg;
-	enum dpsw_broadcast_cfg broadcast_cfg;
-};
-
-int dpsw_get_attributes(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			struct dpsw_attr *attr);
-
-/**
- * struct dpsw_ctrl_if_attr - Control interface attributes
- * @rx_fqid:		Receive FQID
- * @rx_err_fqid:	Receive error FQID
- * @tx_err_conf_fqid:	Transmit error and confirmation FQID
- */
-struct dpsw_ctrl_if_attr {
-	u32 rx_fqid;
-	u32 rx_err_fqid;
-	u32 tx_err_conf_fqid;
-};
-
-int dpsw_ctrl_if_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags,
-				u16 token, struct dpsw_ctrl_if_attr *attr);
-
-enum dpsw_queue_type {
-	DPSW_QUEUE_RX,
-	DPSW_QUEUE_TX_ERR_CONF,
-	DPSW_QUEUE_RX_ERR,
-};
-
-/**
- * Maximum number of DPBP
- */
-#define DPSW_MAX_DPBP     8
-
-/**
- * struct dpsw_ctrl_if_pools_cfg - Control interface buffer pools configuration
- * @num_dpbp: Number of DPBPs
- * @pools: Array of buffer pools parameters; The number of valid entries
- *	must match 'num_dpbp' value
- * @pools.dpbp_id: DPBP object ID
- * @pools.buffer_size: Buffer size
- * @pools.backup_pool: Backup pool
- */
-struct dpsw_ctrl_if_pools_cfg {
-	u8 num_dpbp;
-	struct {
-		int dpbp_id;
-		u16 buffer_size;
-		int backup_pool;
-	} pools[DPSW_MAX_DPBP];
-};
-
-int dpsw_ctrl_if_set_pools(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
-			   const struct dpsw_ctrl_if_pools_cfg *cfg);
-
-#define DPSW_CTRL_IF_QUEUE_OPT_USER_CTX		0x00000001
-#define DPSW_CTRL_IF_QUEUE_OPT_DEST		0x00000002
-
-enum dpsw_ctrl_if_dest {
-	DPSW_CTRL_IF_DEST_NONE = 0,
-	DPSW_CTRL_IF_DEST_DPIO = 1,
-};
-
-struct dpsw_ctrl_if_dest_cfg {
-	enum dpsw_ctrl_if_dest dest_type;
-	int dest_id;
-	u8 priority;
-};
-
-struct dpsw_ctrl_if_queue_cfg {
-	u32 options;
-	u64 user_ctx;
-	struct dpsw_ctrl_if_dest_cfg dest_cfg;
-};
-
-int dpsw_ctrl_if_set_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
-			   enum dpsw_queue_type qtype,
-			   const struct dpsw_ctrl_if_queue_cfg *cfg);
-
-int dpsw_ctrl_if_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
-
-int dpsw_ctrl_if_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
-
-/**
- * enum dpsw_action - Action selection for special/control frames
- * @DPSW_ACTION_DROP: Drop frame
- * @DPSW_ACTION_REDIRECT: Redirect frame to control port
- */
-enum dpsw_action {
-	DPSW_ACTION_DROP = 0,
-	DPSW_ACTION_REDIRECT = 1
-};
-
-/**
- * Enable auto-negotiation
- */
-#define DPSW_LINK_OPT_AUTONEG		0x0000000000000001ULL
-/**
- * Enable half-duplex mode
- */
-#define DPSW_LINK_OPT_HALF_DUPLEX	0x0000000000000002ULL
-/**
- * Enable pause frames
- */
-#define DPSW_LINK_OPT_PAUSE		0x0000000000000004ULL
-/**
- * Enable a-symmetric pause frames
- */
-#define DPSW_LINK_OPT_ASYM_PAUSE	0x0000000000000008ULL
-
-/**
- * struct dpsw_link_cfg - Structure representing DPSW link configuration
- * @rate: Rate
- * @options: Mask of available options; use 'DPSW_LINK_OPT_<X>' values
- */
-struct dpsw_link_cfg {
-	u32 rate;
-	u64 options;
-};
-
-int dpsw_if_set_link_cfg(struct fsl_mc_io *mc_io,
-			 u32 cmd_flags,
-			 u16 token,
-			 u16 if_id,
-			 struct dpsw_link_cfg *cfg);
-/**
- * struct dpsw_link_state - Structure representing DPSW link state
- * @rate: Rate
- * @options: Mask of available options; use 'DPSW_LINK_OPT_<X>' values
- * @up: 0 - covers two cases: down and disconnected, 1 - up
- */
-struct dpsw_link_state {
-	u32 rate;
-	u64 options;
-	u8 up;
-};
-
-int dpsw_if_get_link_state(struct fsl_mc_io *mc_io,
-			   u32 cmd_flags,
-			   u16 token,
-			   u16 if_id,
-			   struct dpsw_link_state *state);
-
-/**
- * struct dpsw_tci_cfg - Tag Control Information (TCI) configuration
- * @pcp: Priority Code Point (PCP): a 3-bit field which refers
- *		 to the IEEE 802.1p priority
- * @dei: Drop Eligible Indicator (DEI): a 1-bit field. May be used
- *		 separately or in conjunction with PCP to indicate frames
- *		 eligible to be dropped in the presence of congestion
- * @vlan_id: VLAN Identifier (VID): a 12-bit field specifying the VLAN
- *			to which the frame belongs. The hexadecimal values
- *			of 0x000 and 0xFFF are reserved;
- *			all other values may be used as VLAN identifiers,
- *			allowing up to 4,094 VLANs
- */
-struct dpsw_tci_cfg {
-	u8 pcp;
-	u8 dei;
-	u16 vlan_id;
-};
-
-int dpsw_if_set_tci(struct fsl_mc_io *mc_io,
-		    u32 cmd_flags,
-		    u16 token,
-		    u16 if_id,
-		    const struct dpsw_tci_cfg *cfg);
-
-int dpsw_if_get_tci(struct fsl_mc_io *mc_io,
-		    u32 cmd_flags,
-		    u16 token,
-		    u16 if_id,
-		    struct dpsw_tci_cfg *cfg);
-
-/**
- * enum dpsw_stp_state - Spanning Tree Protocol (STP) states
- * @DPSW_STP_STATE_BLOCKING: Blocking state
- * @DPSW_STP_STATE_LISTENING: Listening state
- * @DPSW_STP_STATE_LEARNING: Learning state
- * @DPSW_STP_STATE_FORWARDING: Forwarding state
- *
- */
-enum dpsw_stp_state {
-	DPSW_STP_STATE_DISABLED = 0,
-	DPSW_STP_STATE_LISTENING = 1,
-	DPSW_STP_STATE_LEARNING = 2,
-	DPSW_STP_STATE_FORWARDING = 3,
-	DPSW_STP_STATE_BLOCKING = 0
-};
-
-/**
- * struct dpsw_stp_cfg - Spanning Tree Protocol (STP) Configuration
- * @vlan_id: VLAN ID STP state
- * @state: STP state
- */
-struct dpsw_stp_cfg {
-	u16 vlan_id;
-	enum dpsw_stp_state state;
-};
-
-int dpsw_if_set_stp(struct fsl_mc_io *mc_io,
-		    u32 cmd_flags,
-		    u16 token,
-		    u16 if_id,
-		    const struct dpsw_stp_cfg *cfg);
-
-/**
- * enum dpsw_accepted_frames - Types of frames to accept
- * @DPSW_ADMIT_ALL: The device accepts VLAN tagged, untagged and
- *			priority tagged frames
- * @DPSW_ADMIT_ONLY_VLAN_TAGGED: The device discards untagged frames or
- *			Priority-Tagged frames received on this interface.
- *
- */
-enum dpsw_accepted_frames {
-	DPSW_ADMIT_ALL = 1,
-	DPSW_ADMIT_ONLY_VLAN_TAGGED = 3
-};
-
-/**
- * enum dpsw_counter  - Counters types
- * @DPSW_CNT_ING_FRAME: Counts ingress frames
- * @DPSW_CNT_ING_BYTE: Counts ingress bytes
- * @DPSW_CNT_ING_FLTR_FRAME: Counts filtered ingress frames
- * @DPSW_CNT_ING_FRAME_DISCARD: Counts discarded ingress frame
- * @DPSW_CNT_ING_MCAST_FRAME: Counts ingress multicast frames
- * @DPSW_CNT_ING_MCAST_BYTE: Counts ingress multicast bytes
- * @DPSW_CNT_ING_BCAST_FRAME: Counts ingress broadcast frames
- * @DPSW_CNT_ING_BCAST_BYTES: Counts ingress broadcast bytes
- * @DPSW_CNT_EGR_FRAME: Counts egress frames
- * @DPSW_CNT_EGR_BYTE: Counts egress bytes
- * @DPSW_CNT_EGR_FRAME_DISCARD: Counts discarded egress frames
- * @DPSW_CNT_EGR_STP_FRAME_DISCARD: Counts egress STP discarded frames
- * @DPSW_CNT_ING_NO_BUFF_DISCARD: Counts ingress no buffer discarded frames
- */
-enum dpsw_counter {
-	DPSW_CNT_ING_FRAME = 0x0,
-	DPSW_CNT_ING_BYTE = 0x1,
-	DPSW_CNT_ING_FLTR_FRAME = 0x2,
-	DPSW_CNT_ING_FRAME_DISCARD = 0x3,
-	DPSW_CNT_ING_MCAST_FRAME = 0x4,
-	DPSW_CNT_ING_MCAST_BYTE = 0x5,
-	DPSW_CNT_ING_BCAST_FRAME = 0x6,
-	DPSW_CNT_ING_BCAST_BYTES = 0x7,
-	DPSW_CNT_EGR_FRAME = 0x8,
-	DPSW_CNT_EGR_BYTE = 0x9,
-	DPSW_CNT_EGR_FRAME_DISCARD = 0xa,
-	DPSW_CNT_EGR_STP_FRAME_DISCARD = 0xb,
-	DPSW_CNT_ING_NO_BUFF_DISCARD = 0xc,
-};
-
-int dpsw_if_get_counter(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			u16 if_id,
-			enum dpsw_counter type,
-			u64 *counter);
-
-int dpsw_if_enable(struct fsl_mc_io *mc_io,
-		   u32 cmd_flags,
-		   u16 token,
-		   u16 if_id);
-
-int dpsw_if_disable(struct fsl_mc_io *mc_io,
-		    u32 cmd_flags,
-		    u16 token,
-		    u16 if_id);
-
-/**
- * struct dpsw_if_attr - Structure representing DPSW interface attributes
- * @num_tcs: Number of traffic classes
- * @rate: Transmit rate in bits per second
- * @options: Interface configuration options (bitmap)
- * @enabled: Indicates if interface is enabled
- * @accept_all_vlan: The device discards/accepts incoming frames
- *		for VLANs that do not include this interface
- * @admit_untagged: When set to 'DPSW_ADMIT_ONLY_VLAN_TAGGED', the device
- *		discards untagged frames or priority-tagged frames received on
- *		this interface;
- *		When set to 'DPSW_ADMIT_ALL', untagged frames or priority-
- *		tagged frames received on this interface are accepted
- * @qdid: control frames transmit qdid
- */
-struct dpsw_if_attr {
-	u8 num_tcs;
-	u32 rate;
-	u32 options;
-	int enabled;
-	int accept_all_vlan;
-	enum dpsw_accepted_frames admit_untagged;
-	u16 qdid;
-};
-
-int dpsw_if_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
-			   u16 if_id, struct dpsw_if_attr *attr);
-
-int dpsw_if_set_max_frame_length(struct fsl_mc_io *mc_io,
-				 u32 cmd_flags,
-				 u16 token,
-				 u16 if_id,
-				 u16 frame_length);
-
-/**
- * struct dpsw_vlan_cfg - VLAN Configuration
- * @fdb_id: Forwarding Data Base
- */
-struct dpsw_vlan_cfg {
-	u16 fdb_id;
-};
-
-int dpsw_vlan_add(struct fsl_mc_io *mc_io,
-		  u32 cmd_flags,
-		  u16 token,
-		  u16 vlan_id,
-		  const struct dpsw_vlan_cfg *cfg);
-
-#define DPSW_VLAN_ADD_IF_OPT_FDB_ID            0x0001
-
-/**
- * struct dpsw_vlan_if_cfg - Set of VLAN Interfaces
- * @num_ifs: The number of interfaces that are assigned to the egress
- *		list for this VLAN
- * @if_id: The set of interfaces that are
- *		assigned to the egress list for this VLAN
- */
-struct dpsw_vlan_if_cfg {
-	u16 num_ifs;
-	u16 options;
-	u16 if_id[DPSW_MAX_IF];
-	u16 fdb_id;
-};
-
-int dpsw_vlan_add_if(struct fsl_mc_io *mc_io,
-		     u32 cmd_flags,
-		     u16 token,
-		     u16 vlan_id,
-		     const struct dpsw_vlan_if_cfg *cfg);
-
-int dpsw_vlan_add_if_untagged(struct fsl_mc_io *mc_io,
-			      u32 cmd_flags,
-			      u16 token,
-			      u16 vlan_id,
-			      const struct dpsw_vlan_if_cfg *cfg);
-
-int dpsw_vlan_remove_if(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			u16 vlan_id,
-			const struct dpsw_vlan_if_cfg *cfg);
-
-int dpsw_vlan_remove_if_untagged(struct fsl_mc_io *mc_io,
-				 u32 cmd_flags,
-				 u16 token,
-				 u16 vlan_id,
-				 const struct dpsw_vlan_if_cfg *cfg);
-
-int dpsw_vlan_remove(struct fsl_mc_io *mc_io,
-		     u32 cmd_flags,
-		     u16 token,
-		     u16 vlan_id);
-
-/**
- * enum dpsw_fdb_entry_type - FDB Entry type - Static/Dynamic
- * @DPSW_FDB_ENTRY_STATIC: Static entry
- * @DPSW_FDB_ENTRY_DINAMIC: Dynamic entry
- */
-enum dpsw_fdb_entry_type {
-	DPSW_FDB_ENTRY_STATIC = 0,
-	DPSW_FDB_ENTRY_DINAMIC = 1
-};
-
-/**
- * struct dpsw_fdb_unicast_cfg - Unicast entry configuration
- * @type: Select static or dynamic entry
- * @mac_addr: MAC address
- * @if_egress: Egress interface ID
- */
-struct dpsw_fdb_unicast_cfg {
-	enum dpsw_fdb_entry_type type;
-	u8 mac_addr[6];
-	u16 if_egress;
-};
-
-int dpsw_fdb_add_unicast(struct fsl_mc_io *mc_io,
-			 u32 cmd_flags,
-			 u16 token,
-			 u16 fdb_id,
-			 const struct dpsw_fdb_unicast_cfg *cfg);
-
-int dpsw_fdb_remove_unicast(struct fsl_mc_io *mc_io,
-			    u32 cmd_flags,
-			    u16 token,
-			    u16 fdb_id,
-			    const struct dpsw_fdb_unicast_cfg *cfg);
-
-#define DPSW_FDB_ENTRY_TYPE_DYNAMIC  BIT(0)
-#define DPSW_FDB_ENTRY_TYPE_UNICAST  BIT(1)
-
-/**
- * struct fdb_dump_entry - fdb snapshot entry
- * @mac_addr: MAC address
- * @type: bit0 - DINAMIC(1)/STATIC(0), bit1 - UNICAST(1)/MULTICAST(0)
- * @if_info: unicast - egress interface, multicast - number of egress interfaces
- * @if_mask: multicast - egress interface mask
- */
-struct fdb_dump_entry {
-	u8 mac_addr[6];
-	u8 type;
-	u8 if_info;
-	u8 if_mask[8];
-};
-
-int dpsw_fdb_dump(struct fsl_mc_io *mc_io,
-		  u32 cmd_flags,
-		  u16 token,
-		  u16 fdb_id,
-		  u64 iova_addr,
-		  u32 iova_size,
-		  u16 *num_entries);
-
-/**
- * struct dpsw_fdb_multicast_cfg - Multi-cast entry configuration
- * @type: Select static or dynamic entry
- * @mac_addr: MAC address
- * @num_ifs: Number of external and internal interfaces
- * @if_id: Egress interface IDs
- */
-struct dpsw_fdb_multicast_cfg {
-	enum dpsw_fdb_entry_type type;
-	u8 mac_addr[6];
-	u16 num_ifs;
-	u16 if_id[DPSW_MAX_IF];
-};
-
-int dpsw_fdb_add_multicast(struct fsl_mc_io *mc_io,
-			   u32 cmd_flags,
-			   u16 token,
-			   u16 fdb_id,
-			   const struct dpsw_fdb_multicast_cfg *cfg);
-
-int dpsw_fdb_remove_multicast(struct fsl_mc_io *mc_io,
-			      u32 cmd_flags,
-			      u16 token,
-			      u16 fdb_id,
-			      const struct dpsw_fdb_multicast_cfg *cfg);
-
-/**
- * enum dpsw_fdb_learning_mode - Auto-learning modes
- * @DPSW_FDB_LEARNING_MODE_DIS: Disable Auto-learning
- * @DPSW_FDB_LEARNING_MODE_HW: Enable HW auto-Learning
- * @DPSW_FDB_LEARNING_MODE_NON_SECURE: Enable None secure learning by CPU
- * @DPSW_FDB_LEARNING_MODE_SECURE: Enable secure learning by CPU
- *
- *	NONE - SECURE LEARNING
- *	SMAC found	DMAC found	CTLU Action
- *	v		v	Forward frame to
- *						1.  DMAC destination
- *	-		v	Forward frame to
- *						1.  DMAC destination
- *						2.  Control interface
- *	v		-	Forward frame to
- *						1.  Flooding list of interfaces
- *	-		-	Forward frame to
- *						1.  Flooding list of interfaces
- *						2.  Control interface
- *	SECURE LEARING
- *	SMAC found	DMAC found	CTLU Action
- *	v		v		Forward frame to
- *						1.  DMAC destination
- *	-		v		Forward frame to
- *						1.  Control interface
- *	v		-		Forward frame to
- *						1.  Flooding list of interfaces
- *	-		-		Forward frame to
- *						1.  Control interface
- */
-enum dpsw_fdb_learning_mode {
-	DPSW_FDB_LEARNING_MODE_DIS = 0,
-	DPSW_FDB_LEARNING_MODE_HW = 1,
-	DPSW_FDB_LEARNING_MODE_NON_SECURE = 2,
-	DPSW_FDB_LEARNING_MODE_SECURE = 3
-};
-
-/**
- * struct dpsw_fdb_attr - FDB Attributes
- * @max_fdb_entries: Number of FDB entries
- * @fdb_ageing_time: Ageing time in seconds
- * @learning_mode: Learning mode
- * @num_fdb_mc_groups: Current number of multicast groups
- * @max_fdb_mc_groups: Maximum number of multicast groups
- */
-struct dpsw_fdb_attr {
-	u16 max_fdb_entries;
-	u16 fdb_ageing_time;
-	enum dpsw_fdb_learning_mode learning_mode;
-	u16 num_fdb_mc_groups;
-	u16 max_fdb_mc_groups;
-};
-
-int dpsw_get_api_version(struct fsl_mc_io *mc_io,
-			 u32 cmd_flags,
-			 u16 *major_ver,
-			 u16 *minor_ver);
-
-int dpsw_if_get_port_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
-			      u16 if_id, u8 mac_addr[6]);
-
-int dpsw_if_get_primary_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags,
-				 u16 token, u16 if_id, u8 mac_addr[6]);
-
-int dpsw_if_set_primary_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags,
-				 u16 token, u16 if_id, u8 mac_addr[6]);
-
-/**
- * struct dpsw_fdb_cfg  - FDB Configuration
- * @num_fdb_entries: Number of FDB entries
- * @fdb_ageing_time: Ageing time in seconds
- */
-struct dpsw_fdb_cfg {
-	u16 num_fdb_entries;
-	u16 fdb_ageing_time;
-};
-
-int dpsw_fdb_add(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 *fdb_id,
-		 const struct dpsw_fdb_cfg *cfg);
-
-int dpsw_fdb_remove(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 fdb_id);
-
-/**
- * enum dpsw_flood_type - Define the flood type of a DPSW object
- * @DPSW_BROADCAST: Broadcast flooding
- * @DPSW_FLOODING: Unknown flooding
- */
-enum dpsw_flood_type {
-	DPSW_BROADCAST = 0,
-	DPSW_FLOODING,
-};
-
-struct dpsw_egress_flood_cfg {
-	u16 fdb_id;
-	enum dpsw_flood_type flood_type;
-	u16 num_ifs;
-	u16 if_id[DPSW_MAX_IF];
-};
-
-int dpsw_set_egress_flood(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
-			  const struct dpsw_egress_flood_cfg *cfg);
-
-#endif /* __FSL_DPSW_H */
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw-ethtool.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw-ethtool.c
deleted file mode 100644
index 0af2e9914ec4..000000000000
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw-ethtool.c
+++ /dev/null
@@ -1,189 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * DPAA2 Ethernet Switch ethtool support
- *
- * Copyright 2014-2016 Freescale Semiconductor Inc.
- * Copyright 2017-2018 NXP
- *
- */
-
-#include <linux/ethtool.h>
-
-#include "ethsw.h"
-
-static struct {
-	enum dpsw_counter id;
-	char name[ETH_GSTRING_LEN];
-} dpaa2_switch_ethtool_counters[] =  {
-	{DPSW_CNT_ING_FRAME,		"rx frames"},
-	{DPSW_CNT_ING_BYTE,		"rx bytes"},
-	{DPSW_CNT_ING_FLTR_FRAME,	"rx filtered frames"},
-	{DPSW_CNT_ING_FRAME_DISCARD,	"rx discarded frames"},
-	{DPSW_CNT_ING_BCAST_FRAME,	"rx b-cast frames"},
-	{DPSW_CNT_ING_BCAST_BYTES,	"rx b-cast bytes"},
-	{DPSW_CNT_ING_MCAST_FRAME,	"rx m-cast frames"},
-	{DPSW_CNT_ING_MCAST_BYTE,	"rx m-cast bytes"},
-	{DPSW_CNT_EGR_FRAME,		"tx frames"},
-	{DPSW_CNT_EGR_BYTE,		"tx bytes"},
-	{DPSW_CNT_EGR_FRAME_DISCARD,	"tx discarded frames"},
-	{DPSW_CNT_ING_NO_BUFF_DISCARD,	"rx discarded no buffer frames"},
-};
-
-#define DPAA2_SWITCH_NUM_COUNTERS	ARRAY_SIZE(dpaa2_switch_ethtool_counters)
-
-static void dpaa2_switch_get_drvinfo(struct net_device *netdev,
-				     struct ethtool_drvinfo *drvinfo)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	u16 version_major, version_minor;
-	int err;
-
-	strscpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
-
-	err = dpsw_get_api_version(port_priv->ethsw_data->mc_io, 0,
-				   &version_major,
-				   &version_minor);
-	if (err)
-		strscpy(drvinfo->fw_version, "N/A",
-			sizeof(drvinfo->fw_version));
-	else
-		snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
-			 "%u.%u", version_major, version_minor);
-
-	strscpy(drvinfo->bus_info, dev_name(netdev->dev.parent->parent),
-		sizeof(drvinfo->bus_info));
-}
-
-static int
-dpaa2_switch_get_link_ksettings(struct net_device *netdev,
-				struct ethtool_link_ksettings *link_ksettings)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	struct dpsw_link_state state = {0};
-	int err = 0;
-
-	err = dpsw_if_get_link_state(port_priv->ethsw_data->mc_io, 0,
-				     port_priv->ethsw_data->dpsw_handle,
-				     port_priv->idx,
-				     &state);
-	if (err) {
-		netdev_err(netdev, "ERROR %d getting link state\n", err);
-		goto out;
-	}
-
-	/* At the moment, we have no way of interrogating the DPMAC
-	 * from the DPSW side or there may not exist a DPMAC at all.
-	 * Report only autoneg state, duplexity and speed.
-	 */
-	if (state.options & DPSW_LINK_OPT_AUTONEG)
-		link_ksettings->base.autoneg = AUTONEG_ENABLE;
-	if (!(state.options & DPSW_LINK_OPT_HALF_DUPLEX))
-		link_ksettings->base.duplex = DUPLEX_FULL;
-	link_ksettings->base.speed = state.rate;
-
-out:
-	return err;
-}
-
-static int
-dpaa2_switch_set_link_ksettings(struct net_device *netdev,
-				const struct ethtool_link_ksettings *link_ksettings)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	struct ethsw_core *ethsw = port_priv->ethsw_data;
-	struct dpsw_link_cfg cfg = {0};
-	bool if_running;
-	int err = 0, ret;
-
-	/* Interface needs to be down to change link settings */
-	if_running = netif_running(netdev);
-	if (if_running) {
-		err = dpsw_if_disable(ethsw->mc_io, 0,
-				      ethsw->dpsw_handle,
-				      port_priv->idx);
-		if (err) {
-			netdev_err(netdev, "dpsw_if_disable err %d\n", err);
-			return err;
-		}
-	}
-
-	cfg.rate = link_ksettings->base.speed;
-	if (link_ksettings->base.autoneg == AUTONEG_ENABLE)
-		cfg.options |= DPSW_LINK_OPT_AUTONEG;
-	else
-		cfg.options &= ~DPSW_LINK_OPT_AUTONEG;
-	if (link_ksettings->base.duplex  == DUPLEX_HALF)
-		cfg.options |= DPSW_LINK_OPT_HALF_DUPLEX;
-	else
-		cfg.options &= ~DPSW_LINK_OPT_HALF_DUPLEX;
-
-	err = dpsw_if_set_link_cfg(port_priv->ethsw_data->mc_io, 0,
-				   port_priv->ethsw_data->dpsw_handle,
-				   port_priv->idx,
-				   &cfg);
-
-	if (if_running) {
-		ret = dpsw_if_enable(ethsw->mc_io, 0,
-				     ethsw->dpsw_handle,
-				     port_priv->idx);
-		if (ret) {
-			netdev_err(netdev, "dpsw_if_enable err %d\n", ret);
-			return ret;
-		}
-	}
-	return err;
-}
-
-static int dpaa2_switch_ethtool_get_sset_count(struct net_device *dev, int sset)
-{
-	switch (sset) {
-	case ETH_SS_STATS:
-		return DPAA2_SWITCH_NUM_COUNTERS;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-static void dpaa2_switch_ethtool_get_strings(struct net_device *netdev,
-					     u32 stringset, u8 *data)
-{
-	int i;
-
-	switch (stringset) {
-	case ETH_SS_STATS:
-		for (i = 0; i < DPAA2_SWITCH_NUM_COUNTERS; i++)
-			memcpy(data + i * ETH_GSTRING_LEN,
-			       dpaa2_switch_ethtool_counters[i].name,
-			       ETH_GSTRING_LEN);
-		break;
-	}
-}
-
-static void dpaa2_switch_ethtool_get_stats(struct net_device *netdev,
-					   struct ethtool_stats *stats,
-					   u64 *data)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	int i, err;
-
-	for (i = 0; i < DPAA2_SWITCH_NUM_COUNTERS; i++) {
-		err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
-					  port_priv->ethsw_data->dpsw_handle,
-					  port_priv->idx,
-					  dpaa2_switch_ethtool_counters[i].id,
-					  &data[i]);
-		if (err)
-			netdev_err(netdev, "dpsw_if_get_counter[%s] err %d\n",
-				   dpaa2_switch_ethtool_counters[i].name, err);
-	}
-}
-
-const struct ethtool_ops dpaa2_switch_port_ethtool_ops = {
-	.get_drvinfo		= dpaa2_switch_get_drvinfo,
-	.get_link		= ethtool_op_get_link,
-	.get_link_ksettings	= dpaa2_switch_get_link_ksettings,
-	.set_link_ksettings	= dpaa2_switch_set_link_ksettings,
-	.get_strings		= dpaa2_switch_ethtool_get_strings,
-	.get_ethtool_stats	= dpaa2_switch_ethtool_get_stats,
-	.get_sset_count		= dpaa2_switch_ethtool_get_sset_count,
-};
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
deleted file mode 100644
index 97292cf570c1..000000000000
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
+++ /dev/null
@@ -1,2901 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * DPAA2 Ethernet Switch driver
- *
- * Copyright 2014-2016 Freescale Semiconductor Inc.
- * Copyright 2017-2021 NXP
- *
- */
-
-#include <linux/module.h>
-
-#include <linux/interrupt.h>
-#include <linux/msi.h>
-#include <linux/kthread.h>
-#include <linux/workqueue.h>
-#include <linux/iommu.h>
-
-#include <linux/fsl/mc.h>
-
-#include "ethsw.h"
-
-/* Minimal supported DPSW version */
-#define DPSW_MIN_VER_MAJOR		8
-#define DPSW_MIN_VER_MINOR		9
-
-#define DEFAULT_VLAN_ID			1
-
-static u16 dpaa2_switch_port_get_fdb_id(struct ethsw_port_priv *port_priv)
-{
-	return port_priv->fdb->fdb_id;
-}
-
-static struct dpaa2_switch_fdb *dpaa2_switch_fdb_get_unused(struct ethsw_core *ethsw)
-{
-	int i;
-
-	for (i = 0; i < ethsw->sw_attr.num_ifs; i++)
-		if (!ethsw->fdbs[i].in_use)
-			return &ethsw->fdbs[i];
-	return NULL;
-}
-
-static u16 dpaa2_switch_port_set_fdb(struct ethsw_port_priv *port_priv,
-				     struct net_device *bridge_dev)
-{
-	struct ethsw_port_priv *other_port_priv = NULL;
-	struct dpaa2_switch_fdb *fdb;
-	struct net_device *other_dev;
-	struct list_head *iter;
-
-	/* If we leave a bridge (bridge_dev is NULL), find an unused
-	 * FDB and use that.
-	 */
-	if (!bridge_dev) {
-		fdb = dpaa2_switch_fdb_get_unused(port_priv->ethsw_data);
-
-		/* If there is no unused FDB, we must be the last port that
-		 * leaves the last bridge, all the others are standalone. We
-		 * can just keep the FDB that we already have.
-		 */
-
-		if (!fdb) {
-			port_priv->fdb->bridge_dev = NULL;
-			return 0;
-		}
-
-		port_priv->fdb = fdb;
-		port_priv->fdb->in_use = true;
-		port_priv->fdb->bridge_dev = NULL;
-		return 0;
-	}
-
-	/* The below call to netdev_for_each_lower_dev() demands the RTNL lock
-	 * being held. Assert on it so that it's easier to catch new code
-	 * paths that reach this point without the RTNL lock.
-	 */
-	ASSERT_RTNL();
-
-	/* If part of a bridge, use the FDB of the first dpaa2 switch interface
-	 * to be present in that bridge
-	 */
-	netdev_for_each_lower_dev(bridge_dev, other_dev, iter) {
-		if (!dpaa2_switch_port_dev_check(other_dev))
-			continue;
-
-		if (other_dev == port_priv->netdev)
-			continue;
-
-		other_port_priv = netdev_priv(other_dev);
-		break;
-	}
-
-	/* The current port is about to change its FDB to the one used by the
-	 * first port that joined the bridge.
-	 */
-	if (other_port_priv) {
-		/* The previous FDB is about to become unused, since the
-		 * interface is no longer standalone.
-		 */
-		port_priv->fdb->in_use = false;
-		port_priv->fdb->bridge_dev = NULL;
-
-		/* Get a reference to the new FDB */
-		port_priv->fdb = other_port_priv->fdb;
-	}
-
-	/* Keep track of the new upper bridge device */
-	port_priv->fdb->bridge_dev = bridge_dev;
-
-	return 0;
-}
-
-static void *dpaa2_iova_to_virt(struct iommu_domain *domain,
-				dma_addr_t iova_addr)
-{
-	phys_addr_t phys_addr;
-
-	phys_addr = domain ? iommu_iova_to_phys(domain, iova_addr) : iova_addr;
-
-	return phys_to_virt(phys_addr);
-}
-
-static int dpaa2_switch_add_vlan(struct ethsw_port_priv *port_priv, u16 vid)
-{
-	struct ethsw_core *ethsw = port_priv->ethsw_data;
-	struct dpsw_vlan_cfg vcfg = {0};
-	int err;
-
-	vcfg.fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
-	err = dpsw_vlan_add(ethsw->mc_io, 0,
-			    ethsw->dpsw_handle, vid, &vcfg);
-	if (err) {
-		dev_err(ethsw->dev, "dpsw_vlan_add err %d\n", err);
-		return err;
-	}
-	ethsw->vlans[vid] = ETHSW_VLAN_MEMBER;
-
-	return 0;
-}
-
-static bool dpaa2_switch_port_is_up(struct ethsw_port_priv *port_priv)
-{
-	struct net_device *netdev = port_priv->netdev;
-	struct dpsw_link_state state;
-	int err;
-
-	err = dpsw_if_get_link_state(port_priv->ethsw_data->mc_io, 0,
-				     port_priv->ethsw_data->dpsw_handle,
-				     port_priv->idx, &state);
-	if (err) {
-		netdev_err(netdev, "dpsw_if_get_link_state() err %d\n", err);
-		return true;
-	}
-
-	WARN_ONCE(state.up > 1, "Garbage read into link_state");
-
-	return state.up ? true : false;
-}
-
-static int dpaa2_switch_port_set_pvid(struct ethsw_port_priv *port_priv, u16 pvid)
-{
-	struct ethsw_core *ethsw = port_priv->ethsw_data;
-	struct net_device *netdev = port_priv->netdev;
-	struct dpsw_tci_cfg tci_cfg = { 0 };
-	bool up;
-	int err, ret;
-
-	err = dpsw_if_get_tci(ethsw->mc_io, 0, ethsw->dpsw_handle,
-			      port_priv->idx, &tci_cfg);
-	if (err) {
-		netdev_err(netdev, "dpsw_if_get_tci err %d\n", err);
-		return err;
-	}
-
-	tci_cfg.vlan_id = pvid;
-
-	/* Interface needs to be down to change PVID */
-	up = dpaa2_switch_port_is_up(port_priv);
-	if (up) {
-		err = dpsw_if_disable(ethsw->mc_io, 0,
-				      ethsw->dpsw_handle,
-				      port_priv->idx);
-		if (err) {
-			netdev_err(netdev, "dpsw_if_disable err %d\n", err);
-			return err;
-		}
-	}
-
-	err = dpsw_if_set_tci(ethsw->mc_io, 0, ethsw->dpsw_handle,
-			      port_priv->idx, &tci_cfg);
-	if (err) {
-		netdev_err(netdev, "dpsw_if_set_tci err %d\n", err);
-		goto set_tci_error;
-	}
-
-	/* Delete previous PVID info and mark the new one */
-	port_priv->vlans[port_priv->pvid] &= ~ETHSW_VLAN_PVID;
-	port_priv->vlans[pvid] |= ETHSW_VLAN_PVID;
-	port_priv->pvid = pvid;
-
-set_tci_error:
-	if (up) {
-		ret = dpsw_if_enable(ethsw->mc_io, 0,
-				     ethsw->dpsw_handle,
-				     port_priv->idx);
-		if (ret) {
-			netdev_err(netdev, "dpsw_if_enable err %d\n", ret);
-			return ret;
-		}
-	}
-
-	return err;
-}
-
-static int dpaa2_switch_port_add_vlan(struct ethsw_port_priv *port_priv,
-				      u16 vid, u16 flags)
-{
-	struct ethsw_core *ethsw = port_priv->ethsw_data;
-	struct net_device *netdev = port_priv->netdev;
-	struct dpsw_vlan_if_cfg vcfg = {0};
-	int err;
-
-	if (port_priv->vlans[vid]) {
-		netdev_warn(netdev, "VLAN %d already configured\n", vid);
-		return -EEXIST;
-	}
-
-	/* If hit, this VLAN rule will lead the packet into the FDB table
-	 * specified in the vlan configuration below
-	 */
-	vcfg.num_ifs = 1;
-	vcfg.if_id[0] = port_priv->idx;
-	vcfg.fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
-	vcfg.options |= DPSW_VLAN_ADD_IF_OPT_FDB_ID;
-	err = dpsw_vlan_add_if(ethsw->mc_io, 0, ethsw->dpsw_handle, vid, &vcfg);
-	if (err) {
-		netdev_err(netdev, "dpsw_vlan_add_if err %d\n", err);
-		return err;
-	}
-
-	port_priv->vlans[vid] = ETHSW_VLAN_MEMBER;
-
-	if (flags & BRIDGE_VLAN_INFO_UNTAGGED) {
-		err = dpsw_vlan_add_if_untagged(ethsw->mc_io, 0,
-						ethsw->dpsw_handle,
-						vid, &vcfg);
-		if (err) {
-			netdev_err(netdev,
-				   "dpsw_vlan_add_if_untagged err %d\n", err);
-			return err;
-		}
-		port_priv->vlans[vid] |= ETHSW_VLAN_UNTAGGED;
-	}
-
-	if (flags & BRIDGE_VLAN_INFO_PVID) {
-		err = dpaa2_switch_port_set_pvid(port_priv, vid);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
-static int dpaa2_switch_port_set_stp_state(struct ethsw_port_priv *port_priv, u8 state)
-{
-	struct dpsw_stp_cfg stp_cfg = {
-		.state = state,
-	};
-	int err;
-	u16 vid;
-
-	if (!netif_running(port_priv->netdev) || state == port_priv->stp_state)
-		return 0;	/* Nothing to do */
-
-	for (vid = 0; vid <= VLAN_VID_MASK; vid++) {
-		if (port_priv->vlans[vid] & ETHSW_VLAN_MEMBER) {
-			stp_cfg.vlan_id = vid;
-			err = dpsw_if_set_stp(port_priv->ethsw_data->mc_io, 0,
-					      port_priv->ethsw_data->dpsw_handle,
-					      port_priv->idx, &stp_cfg);
-			if (err) {
-				netdev_err(port_priv->netdev,
-					   "dpsw_if_set_stp err %d\n", err);
-				return err;
-			}
-		}
-	}
-
-	port_priv->stp_state = state;
-
-	return 0;
-}
-
-static int dpaa2_switch_dellink(struct ethsw_core *ethsw, u16 vid)
-{
-	struct ethsw_port_priv *ppriv_local = NULL;
-	int i, err;
-
-	if (!ethsw->vlans[vid])
-		return -ENOENT;
-
-	err = dpsw_vlan_remove(ethsw->mc_io, 0, ethsw->dpsw_handle, vid);
-	if (err) {
-		dev_err(ethsw->dev, "dpsw_vlan_remove err %d\n", err);
-		return err;
-	}
-	ethsw->vlans[vid] = 0;
-
-	for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
-		ppriv_local = ethsw->ports[i];
-		ppriv_local->vlans[vid] = 0;
-	}
-
-	return 0;
-}
-
-static int dpaa2_switch_port_fdb_add_uc(struct ethsw_port_priv *port_priv,
-					const unsigned char *addr)
-{
-	struct dpsw_fdb_unicast_cfg entry = {0};
-	u16 fdb_id;
-	int err;
-
-	entry.if_egress = port_priv->idx;
-	entry.type = DPSW_FDB_ENTRY_STATIC;
-	ether_addr_copy(entry.mac_addr, addr);
-
-	fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
-	err = dpsw_fdb_add_unicast(port_priv->ethsw_data->mc_io, 0,
-				   port_priv->ethsw_data->dpsw_handle,
-				   fdb_id, &entry);
-	if (err)
-		netdev_err(port_priv->netdev,
-			   "dpsw_fdb_add_unicast err %d\n", err);
-	return err;
-}
-
-static int dpaa2_switch_port_fdb_del_uc(struct ethsw_port_priv *port_priv,
-					const unsigned char *addr)
-{
-	struct dpsw_fdb_unicast_cfg entry = {0};
-	u16 fdb_id;
-	int err;
-
-	entry.if_egress = port_priv->idx;
-	entry.type = DPSW_FDB_ENTRY_STATIC;
-	ether_addr_copy(entry.mac_addr, addr);
-
-	fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
-	err = dpsw_fdb_remove_unicast(port_priv->ethsw_data->mc_io, 0,
-				      port_priv->ethsw_data->dpsw_handle,
-				      fdb_id, &entry);
-	/* Silently discard error for calling multiple times the del command */
-	if (err && err != -ENXIO)
-		netdev_err(port_priv->netdev,
-			   "dpsw_fdb_remove_unicast err %d\n", err);
-	return err;
-}
-
-static int dpaa2_switch_port_fdb_add_mc(struct ethsw_port_priv *port_priv,
-					const unsigned char *addr)
-{
-	struct dpsw_fdb_multicast_cfg entry = {0};
-	u16 fdb_id;
-	int err;
-
-	ether_addr_copy(entry.mac_addr, addr);
-	entry.type = DPSW_FDB_ENTRY_STATIC;
-	entry.num_ifs = 1;
-	entry.if_id[0] = port_priv->idx;
-
-	fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
-	err = dpsw_fdb_add_multicast(port_priv->ethsw_data->mc_io, 0,
-				     port_priv->ethsw_data->dpsw_handle,
-				     fdb_id, &entry);
-	/* Silently discard error for calling multiple times the add command */
-	if (err && err != -ENXIO)
-		netdev_err(port_priv->netdev, "dpsw_fdb_add_multicast err %d\n",
-			   err);
-	return err;
-}
-
-static int dpaa2_switch_port_fdb_del_mc(struct ethsw_port_priv *port_priv,
-					const unsigned char *addr)
-{
-	struct dpsw_fdb_multicast_cfg entry = {0};
-	u16 fdb_id;
-	int err;
-
-	ether_addr_copy(entry.mac_addr, addr);
-	entry.type = DPSW_FDB_ENTRY_STATIC;
-	entry.num_ifs = 1;
-	entry.if_id[0] = port_priv->idx;
-
-	fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
-	err = dpsw_fdb_remove_multicast(port_priv->ethsw_data->mc_io, 0,
-					port_priv->ethsw_data->dpsw_handle,
-					fdb_id, &entry);
-	/* Silently discard error for calling multiple times the del command */
-	if (err && err != -ENAVAIL)
-		netdev_err(port_priv->netdev,
-			   "dpsw_fdb_remove_multicast err %d\n", err);
-	return err;
-}
-
-static void dpaa2_switch_port_get_stats(struct net_device *netdev,
-					struct rtnl_link_stats64 *stats)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	u64 tmp;
-	int err;
-
-	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
-				  port_priv->ethsw_data->dpsw_handle,
-				  port_priv->idx,
-				  DPSW_CNT_ING_FRAME, &stats->rx_packets);
-	if (err)
-		goto error;
-
-	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
-				  port_priv->ethsw_data->dpsw_handle,
-				  port_priv->idx,
-				  DPSW_CNT_EGR_FRAME, &stats->tx_packets);
-	if (err)
-		goto error;
-
-	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
-				  port_priv->ethsw_data->dpsw_handle,
-				  port_priv->idx,
-				  DPSW_CNT_ING_BYTE, &stats->rx_bytes);
-	if (err)
-		goto error;
-
-	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
-				  port_priv->ethsw_data->dpsw_handle,
-				  port_priv->idx,
-				  DPSW_CNT_EGR_BYTE, &stats->tx_bytes);
-	if (err)
-		goto error;
-
-	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
-				  port_priv->ethsw_data->dpsw_handle,
-				  port_priv->idx,
-				  DPSW_CNT_ING_FRAME_DISCARD,
-				  &stats->rx_dropped);
-	if (err)
-		goto error;
-
-	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
-				  port_priv->ethsw_data->dpsw_handle,
-				  port_priv->idx,
-				  DPSW_CNT_ING_FLTR_FRAME,
-				  &tmp);
-	if (err)
-		goto error;
-	stats->rx_dropped += tmp;
-
-	err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
-				  port_priv->ethsw_data->dpsw_handle,
-				  port_priv->idx,
-				  DPSW_CNT_EGR_FRAME_DISCARD,
-				  &stats->tx_dropped);
-	if (err)
-		goto error;
-
-	return;
-
-error:
-	netdev_err(netdev, "dpsw_if_get_counter err %d\n", err);
-}
-
-static bool dpaa2_switch_port_has_offload_stats(const struct net_device *netdev,
-						int attr_id)
-{
-	return (attr_id == IFLA_OFFLOAD_XSTATS_CPU_HIT);
-}
-
-static int dpaa2_switch_port_get_offload_stats(int attr_id,
-					       const struct net_device *netdev,
-					       void *sp)
-{
-	switch (attr_id) {
-	case IFLA_OFFLOAD_XSTATS_CPU_HIT:
-		dpaa2_switch_port_get_stats((struct net_device *)netdev, sp);
-		return 0;
-	}
-
-	return -EINVAL;
-}
-
-static int dpaa2_switch_port_change_mtu(struct net_device *netdev, int mtu)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	int err;
-
-	err = dpsw_if_set_max_frame_length(port_priv->ethsw_data->mc_io,
-					   0,
-					   port_priv->ethsw_data->dpsw_handle,
-					   port_priv->idx,
-					   (u16)ETHSW_L2_MAX_FRM(mtu));
-	if (err) {
-		netdev_err(netdev,
-			   "dpsw_if_set_max_frame_length() err %d\n", err);
-		return err;
-	}
-
-	netdev->mtu = mtu;
-	return 0;
-}
-
-static int dpaa2_switch_port_carrier_state_sync(struct net_device *netdev)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	struct dpsw_link_state state;
-	int err;
-
-	/* Interrupts are received even though no one issued an 'ifconfig up'
-	 * on the switch interface. Ignore these link state update interrupts
-	 */
-	if (!netif_running(netdev))
-		return 0;
-
-	err = dpsw_if_get_link_state(port_priv->ethsw_data->mc_io, 0,
-				     port_priv->ethsw_data->dpsw_handle,
-				     port_priv->idx, &state);
-	if (err) {
-		netdev_err(netdev, "dpsw_if_get_link_state() err %d\n", err);
-		return err;
-	}
-
-	WARN_ONCE(state.up > 1, "Garbage read into link_state");
-
-	if (state.up != port_priv->link_state) {
-		if (state.up) {
-			netif_carrier_on(netdev);
-			netif_tx_start_all_queues(netdev);
-		} else {
-			netif_carrier_off(netdev);
-			netif_tx_stop_all_queues(netdev);
-		}
-		port_priv->link_state = state.up;
-	}
-
-	return 0;
-}
-
-/* Manage all NAPI instances for the control interface.
- *
- * We only have one RX queue and one Tx Conf queue for all
- * switch ports. Therefore, we only need to enable the NAPI instance once, the
- * first time one of the switch ports runs .dev_open().
- */
-
-static void dpaa2_switch_enable_ctrl_if_napi(struct ethsw_core *ethsw)
-{
-	int i;
-
-	/* Access to the ethsw->napi_users relies on the RTNL lock */
-	ASSERT_RTNL();
-
-	/* a new interface is using the NAPI instance */
-	ethsw->napi_users++;
-
-	/* if there is already a user of the instance, return */
-	if (ethsw->napi_users > 1)
-		return;
-
-	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++)
-		napi_enable(&ethsw->fq[i].napi);
-}
-
-static void dpaa2_switch_disable_ctrl_if_napi(struct ethsw_core *ethsw)
-{
-	int i;
-
-	/* Access to the ethsw->napi_users relies on the RTNL lock */
-	ASSERT_RTNL();
-
-	/* If we are not the last interface using the NAPI, return */
-	ethsw->napi_users--;
-	if (ethsw->napi_users)
-		return;
-
-	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++)
-		napi_disable(&ethsw->fq[i].napi);
-}
-
-static int dpaa2_switch_port_open(struct net_device *netdev)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	struct ethsw_core *ethsw = port_priv->ethsw_data;
-	int err;
-
-	/* Explicitly set carrier off, otherwise
-	 * netif_carrier_ok() will return true and cause 'ip link show'
-	 * to report the LOWER_UP flag, even though the link
-	 * notification wasn't even received.
-	 */
-	netif_carrier_off(netdev);
-
-	err = dpsw_if_enable(port_priv->ethsw_data->mc_io, 0,
-			     port_priv->ethsw_data->dpsw_handle,
-			     port_priv->idx);
-	if (err) {
-		netdev_err(netdev, "dpsw_if_enable err %d\n", err);
-		return err;
-	}
-
-	/* sync carrier state */
-	err = dpaa2_switch_port_carrier_state_sync(netdev);
-	if (err) {
-		netdev_err(netdev,
-			   "dpaa2_switch_port_carrier_state_sync err %d\n", err);
-		goto err_carrier_sync;
-	}
-
-	dpaa2_switch_enable_ctrl_if_napi(ethsw);
-
-	return 0;
-
-err_carrier_sync:
-	dpsw_if_disable(port_priv->ethsw_data->mc_io, 0,
-			port_priv->ethsw_data->dpsw_handle,
-			port_priv->idx);
-	return err;
-}
-
-static int dpaa2_switch_port_stop(struct net_device *netdev)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	struct ethsw_core *ethsw = port_priv->ethsw_data;
-	int err;
-
-	err = dpsw_if_disable(port_priv->ethsw_data->mc_io, 0,
-			      port_priv->ethsw_data->dpsw_handle,
-			      port_priv->idx);
-	if (err) {
-		netdev_err(netdev, "dpsw_if_disable err %d\n", err);
-		return err;
-	}
-
-	dpaa2_switch_disable_ctrl_if_napi(ethsw);
-
-	return 0;
-}
-
-static int dpaa2_switch_port_parent_id(struct net_device *dev,
-				       struct netdev_phys_item_id *ppid)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(dev);
-
-	ppid->id_len = 1;
-	ppid->id[0] = port_priv->ethsw_data->dev_id;
-
-	return 0;
-}
-
-static int dpaa2_switch_port_get_phys_name(struct net_device *netdev, char *name,
-					   size_t len)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	int err;
-
-	err = snprintf(name, len, "p%d", port_priv->idx);
-	if (err >= len)
-		return -EINVAL;
-
-	return 0;
-}
-
-struct ethsw_dump_ctx {
-	struct net_device *dev;
-	struct sk_buff *skb;
-	struct netlink_callback *cb;
-	int idx;
-};
-
-static int dpaa2_switch_fdb_dump_nl(struct fdb_dump_entry *entry,
-				    struct ethsw_dump_ctx *dump)
-{
-	int is_dynamic = entry->type & DPSW_FDB_ENTRY_DINAMIC;
-	u32 portid = NETLINK_CB(dump->cb->skb).portid;
-	u32 seq = dump->cb->nlh->nlmsg_seq;
-	struct nlmsghdr *nlh;
-	struct ndmsg *ndm;
-
-	if (dump->idx < dump->cb->args[2])
-		goto skip;
-
-	nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
-			sizeof(*ndm), NLM_F_MULTI);
-	if (!nlh)
-		return -EMSGSIZE;
-
-	ndm = nlmsg_data(nlh);
-	ndm->ndm_family  = AF_BRIDGE;
-	ndm->ndm_pad1    = 0;
-	ndm->ndm_pad2    = 0;
-	ndm->ndm_flags   = NTF_SELF;
-	ndm->ndm_type    = 0;
-	ndm->ndm_ifindex = dump->dev->ifindex;
-	ndm->ndm_state   = is_dynamic ? NUD_REACHABLE : NUD_NOARP;
-
-	if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, entry->mac_addr))
-		goto nla_put_failure;
-
-	nlmsg_end(dump->skb, nlh);
-
-skip:
-	dump->idx++;
-	return 0;
-
-nla_put_failure:
-	nlmsg_cancel(dump->skb, nlh);
-	return -EMSGSIZE;
-}
-
-static int dpaa2_switch_port_fdb_valid_entry(struct fdb_dump_entry *entry,
-					     struct ethsw_port_priv *port_priv)
-{
-	int idx = port_priv->idx;
-	int valid;
-
-	if (entry->type & DPSW_FDB_ENTRY_TYPE_UNICAST)
-		valid = entry->if_info == port_priv->idx;
-	else
-		valid = entry->if_mask[idx / 8] & BIT(idx % 8);
-
-	return valid;
-}
-
-static int dpaa2_switch_fdb_iterate(struct ethsw_port_priv *port_priv,
-				    dpaa2_switch_fdb_cb_t cb, void *data)
-{
-	struct net_device *net_dev = port_priv->netdev;
-	struct ethsw_core *ethsw = port_priv->ethsw_data;
-	struct device *dev = net_dev->dev.parent;
-	struct fdb_dump_entry *fdb_entries;
-	struct fdb_dump_entry fdb_entry;
-	dma_addr_t fdb_dump_iova;
-	u16 num_fdb_entries;
-	u32 fdb_dump_size;
-	int err = 0, i;
-	u8 *dma_mem;
-	u16 fdb_id;
-
-	fdb_dump_size = ethsw->sw_attr.max_fdb_entries * sizeof(fdb_entry);
-	dma_mem = kzalloc(fdb_dump_size, GFP_KERNEL);
-	if (!dma_mem)
-		return -ENOMEM;
-
-	fdb_dump_iova = dma_map_single(dev, dma_mem, fdb_dump_size,
-				       DMA_FROM_DEVICE);
-	if (dma_mapping_error(dev, fdb_dump_iova)) {
-		netdev_err(net_dev, "dma_map_single() failed\n");
-		err = -ENOMEM;
-		goto err_map;
-	}
-
-	fdb_id = dpaa2_switch_port_get_fdb_id(port_priv);
-	err = dpsw_fdb_dump(ethsw->mc_io, 0, ethsw->dpsw_handle, fdb_id,
-			    fdb_dump_iova, fdb_dump_size, &num_fdb_entries);
-	if (err) {
-		netdev_err(net_dev, "dpsw_fdb_dump() = %d\n", err);
-		goto err_dump;
-	}
-
-	dma_unmap_single(dev, fdb_dump_iova, fdb_dump_size, DMA_FROM_DEVICE);
-
-	fdb_entries = (struct fdb_dump_entry *)dma_mem;
-	for (i = 0; i < num_fdb_entries; i++) {
-		fdb_entry = fdb_entries[i];
-
-		err = cb(port_priv, &fdb_entry, data);
-		if (err)
-			goto end;
-	}
-
-end:
-	kfree(dma_mem);
-
-	return 0;
-
-err_dump:
-	dma_unmap_single(dev, fdb_dump_iova, fdb_dump_size, DMA_TO_DEVICE);
-err_map:
-	kfree(dma_mem);
-	return err;
-}
-
-static int dpaa2_switch_fdb_entry_dump(struct ethsw_port_priv *port_priv,
-				       struct fdb_dump_entry *fdb_entry,
-				       void *data)
-{
-	if (!dpaa2_switch_port_fdb_valid_entry(fdb_entry, port_priv))
-		return 0;
-
-	return dpaa2_switch_fdb_dump_nl(fdb_entry, data);
-}
-
-static int dpaa2_switch_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
-				      struct net_device *net_dev,
-				      struct net_device *filter_dev, int *idx)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(net_dev);
-	struct ethsw_dump_ctx dump = {
-		.dev = net_dev,
-		.skb = skb,
-		.cb = cb,
-		.idx = *idx,
-	};
-	int err;
-
-	err = dpaa2_switch_fdb_iterate(port_priv, dpaa2_switch_fdb_entry_dump, &dump);
-	*idx = dump.idx;
-
-	return err;
-}
-
-static int dpaa2_switch_fdb_entry_fast_age(struct ethsw_port_priv *port_priv,
-					   struct fdb_dump_entry *fdb_entry,
-					   void *data __always_unused)
-{
-	if (!dpaa2_switch_port_fdb_valid_entry(fdb_entry, port_priv))
-		return 0;
-
-	if (!(fdb_entry->type & DPSW_FDB_ENTRY_TYPE_DYNAMIC))
-		return 0;
-
-	if (fdb_entry->type & DPSW_FDB_ENTRY_TYPE_UNICAST)
-		dpaa2_switch_port_fdb_del_uc(port_priv, fdb_entry->mac_addr);
-	else
-		dpaa2_switch_port_fdb_del_mc(port_priv, fdb_entry->mac_addr);
-
-	return 0;
-}
-
-static void dpaa2_switch_port_fast_age(struct ethsw_port_priv *port_priv)
-{
-	dpaa2_switch_fdb_iterate(port_priv,
-				 dpaa2_switch_fdb_entry_fast_age, NULL);
-}
-
-static int dpaa2_switch_port_vlan_add(struct net_device *netdev, __be16 proto,
-				      u16 vid)
-{
-	struct switchdev_obj_port_vlan vlan = {
-		.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
-		.vid = vid,
-		.obj.orig_dev = netdev,
-		/* This API only allows programming tagged, non-PVID VIDs */
-		.flags = 0,
-	};
-
-	return dpaa2_switch_port_vlans_add(netdev, &vlan);
-}
-
-static int dpaa2_switch_port_vlan_kill(struct net_device *netdev, __be16 proto,
-				       u16 vid)
-{
-	struct switchdev_obj_port_vlan vlan = {
-		.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
-		.vid = vid,
-		.obj.orig_dev = netdev,
-		/* This API only allows programming tagged, non-PVID VIDs */
-		.flags = 0,
-	};
-
-	return dpaa2_switch_port_vlans_del(netdev, &vlan);
-}
-
-static int dpaa2_switch_port_set_mac_addr(struct ethsw_port_priv *port_priv)
-{
-	struct ethsw_core *ethsw = port_priv->ethsw_data;
-	struct net_device *net_dev = port_priv->netdev;
-	struct device *dev = net_dev->dev.parent;
-	u8 mac_addr[ETH_ALEN];
-	int err;
-
-	if (!(ethsw->features & ETHSW_FEATURE_MAC_ADDR))
-		return 0;
-
-	/* Get firmware address, if any */
-	err = dpsw_if_get_port_mac_addr(ethsw->mc_io, 0, ethsw->dpsw_handle,
-					port_priv->idx, mac_addr);
-	if (err) {
-		dev_err(dev, "dpsw_if_get_port_mac_addr() failed\n");
-		return err;
-	}
-
-	/* First check if firmware has any address configured by bootloader */
-	if (!is_zero_ether_addr(mac_addr)) {
-		memcpy(net_dev->dev_addr, mac_addr, net_dev->addr_len);
-	} else {
-		/* No MAC address configured, fill in net_dev->dev_addr
-		 * with a random one
-		 */
-		eth_hw_addr_random(net_dev);
-		dev_dbg_once(dev, "device(s) have all-zero hwaddr, replaced with random\n");
-
-		/* Override NET_ADDR_RANDOM set by eth_hw_addr_random(); for all
-		 * practical purposes, this will be our "permanent" mac address,
-		 * at least until the next reboot. This move will also permit
-		 * register_netdevice() to properly fill up net_dev->perm_addr.
-		 */
-		net_dev->addr_assign_type = NET_ADDR_PERM;
-	}
-
-	return 0;
-}
-
-static void dpaa2_switch_free_fd(const struct ethsw_core *ethsw,
-				 const struct dpaa2_fd *fd)
-{
-	struct device *dev = ethsw->dev;
-	unsigned char *buffer_start;
-	struct sk_buff **skbh, *skb;
-	dma_addr_t fd_addr;
-
-	fd_addr = dpaa2_fd_get_addr(fd);
-	skbh = dpaa2_iova_to_virt(ethsw->iommu_domain, fd_addr);
-
-	skb = *skbh;
-	buffer_start = (unsigned char *)skbh;
-
-	dma_unmap_single(dev, fd_addr,
-			 skb_tail_pointer(skb) - buffer_start,
-			 DMA_TO_DEVICE);
-
-	/* Move on with skb release */
-	dev_kfree_skb(skb);
-}
-
-static int dpaa2_switch_build_single_fd(struct ethsw_core *ethsw,
-					struct sk_buff *skb,
-					struct dpaa2_fd *fd)
-{
-	struct device *dev = ethsw->dev;
-	struct sk_buff **skbh;
-	dma_addr_t addr;
-	u8 *buff_start;
-	void *hwa;
-
-	buff_start = PTR_ALIGN(skb->data - DPAA2_SWITCH_TX_DATA_OFFSET -
-			       DPAA2_SWITCH_TX_BUF_ALIGN,
-			       DPAA2_SWITCH_TX_BUF_ALIGN);
-
-	/* Clear FAS to have consistent values for TX confirmation. It is
-	 * located in the first 8 bytes of the buffer's hardware annotation
-	 * area
-	 */
-	hwa = buff_start + DPAA2_SWITCH_SWA_SIZE;
-	memset(hwa, 0, 8);
-
-	/* Store a backpointer to the skb at the beginning of the buffer
-	 * (in the private data area) such that we can release it
-	 * on Tx confirm
-	 */
-	skbh = (struct sk_buff **)buff_start;
-	*skbh = skb;
-
-	addr = dma_map_single(dev, buff_start,
-			      skb_tail_pointer(skb) - buff_start,
-			      DMA_TO_DEVICE);
-	if (unlikely(dma_mapping_error(dev, addr)))
-		return -ENOMEM;
-
-	/* Setup the FD fields */
-	memset(fd, 0, sizeof(*fd));
-
-	dpaa2_fd_set_addr(fd, addr);
-	dpaa2_fd_set_offset(fd, (u16)(skb->data - buff_start));
-	dpaa2_fd_set_len(fd, skb->len);
-	dpaa2_fd_set_format(fd, dpaa2_fd_single);
-
-	return 0;
-}
-
-static netdev_tx_t dpaa2_switch_port_tx(struct sk_buff *skb,
-					struct net_device *net_dev)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(net_dev);
-	struct ethsw_core *ethsw = port_priv->ethsw_data;
-	int retries = DPAA2_SWITCH_SWP_BUSY_RETRIES;
-	struct dpaa2_fd fd;
-	int err;
-
-	if (unlikely(skb_headroom(skb) < DPAA2_SWITCH_NEEDED_HEADROOM)) {
-		struct sk_buff *ns;
-
-		ns = skb_realloc_headroom(skb, DPAA2_SWITCH_NEEDED_HEADROOM);
-		if (unlikely(!ns)) {
-			net_err_ratelimited("%s: Error reallocating skb headroom\n", net_dev->name);
-			goto err_free_skb;
-		}
-		dev_consume_skb_any(skb);
-		skb = ns;
-	}
-
-	/* We'll be holding a back-reference to the skb until Tx confirmation */
-	skb = skb_unshare(skb, GFP_ATOMIC);
-	if (unlikely(!skb)) {
-		/* skb_unshare() has already freed the skb */
-		net_err_ratelimited("%s: Error copying the socket buffer\n", net_dev->name);
-		goto err_exit;
-	}
-
-	/* At this stage, we do not support non-linear skbs so just try to
-	 * linearize the skb and if that's not working, just drop the packet.
-	 */
-	err = skb_linearize(skb);
-	if (err) {
-		net_err_ratelimited("%s: skb_linearize error (%d)!\n", net_dev->name, err);
-		goto err_free_skb;
-	}
-
-	err = dpaa2_switch_build_single_fd(ethsw, skb, &fd);
-	if (unlikely(err)) {
-		net_err_ratelimited("%s: ethsw_build_*_fd() %d\n", net_dev->name, err);
-		goto err_free_skb;
-	}
-
-	do {
-		err = dpaa2_io_service_enqueue_qd(NULL,
-						  port_priv->tx_qdid,
-						  8, 0, &fd);
-		retries--;
-	} while (err == -EBUSY && retries);
-
-	if (unlikely(err < 0)) {
-		dpaa2_switch_free_fd(ethsw, &fd);
-		goto err_exit;
-	}
-
-	return NETDEV_TX_OK;
-
-err_free_skb:
-	dev_kfree_skb(skb);
-err_exit:
-	return NETDEV_TX_OK;
-}
-
-static const struct net_device_ops dpaa2_switch_port_ops = {
-	.ndo_open		= dpaa2_switch_port_open,
-	.ndo_stop		= dpaa2_switch_port_stop,
-
-	.ndo_set_mac_address	= eth_mac_addr,
-	.ndo_get_stats64	= dpaa2_switch_port_get_stats,
-	.ndo_change_mtu		= dpaa2_switch_port_change_mtu,
-	.ndo_has_offload_stats	= dpaa2_switch_port_has_offload_stats,
-	.ndo_get_offload_stats	= dpaa2_switch_port_get_offload_stats,
-	.ndo_fdb_dump		= dpaa2_switch_port_fdb_dump,
-	.ndo_vlan_rx_add_vid	= dpaa2_switch_port_vlan_add,
-	.ndo_vlan_rx_kill_vid	= dpaa2_switch_port_vlan_kill,
-
-	.ndo_start_xmit		= dpaa2_switch_port_tx,
-	.ndo_get_port_parent_id	= dpaa2_switch_port_parent_id,
-	.ndo_get_phys_port_name = dpaa2_switch_port_get_phys_name,
-};
-
-bool dpaa2_switch_port_dev_check(const struct net_device *netdev)
-{
-	return netdev->netdev_ops == &dpaa2_switch_port_ops;
-}
-
-static void dpaa2_switch_links_state_update(struct ethsw_core *ethsw)
-{
-	int i;
-
-	for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
-		dpaa2_switch_port_carrier_state_sync(ethsw->ports[i]->netdev);
-		dpaa2_switch_port_set_mac_addr(ethsw->ports[i]);
-	}
-}
-
-static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg)
-{
-	struct device *dev = (struct device *)arg;
-	struct ethsw_core *ethsw = dev_get_drvdata(dev);
-
-	/* Mask the events and the if_id reserved bits to be cleared on read */
-	u32 status = DPSW_IRQ_EVENT_LINK_CHANGED | 0xFFFF0000;
-	int err;
-
-	err = dpsw_get_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle,
-				  DPSW_IRQ_INDEX_IF, &status);
-	if (err) {
-		dev_err(dev, "Can't get irq status (err %d)\n", err);
-
-		err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle,
-					    DPSW_IRQ_INDEX_IF, 0xFFFFFFFF);
-		if (err)
-			dev_err(dev, "Can't clear irq status (err %d)\n", err);
-		goto out;
-	}
-
-	if (status & DPSW_IRQ_EVENT_LINK_CHANGED)
-		dpaa2_switch_links_state_update(ethsw);
-
-out:
-	return IRQ_HANDLED;
-}
-
-static int dpaa2_switch_setup_irqs(struct fsl_mc_device *sw_dev)
-{
-	struct device *dev = &sw_dev->dev;
-	struct ethsw_core *ethsw = dev_get_drvdata(dev);
-	u32 mask = DPSW_IRQ_EVENT_LINK_CHANGED;
-	struct fsl_mc_device_irq *irq;
-	int err;
-
-	err = fsl_mc_allocate_irqs(sw_dev);
-	if (err) {
-		dev_err(dev, "MC irqs allocation failed\n");
-		return err;
-	}
-
-	if (WARN_ON(sw_dev->obj_desc.irq_count != DPSW_IRQ_NUM)) {
-		err = -EINVAL;
-		goto free_irq;
-	}
-
-	err = dpsw_set_irq_enable(ethsw->mc_io, 0, ethsw->dpsw_handle,
-				  DPSW_IRQ_INDEX_IF, 0);
-	if (err) {
-		dev_err(dev, "dpsw_set_irq_enable err %d\n", err);
-		goto free_irq;
-	}
-
-	irq = sw_dev->irqs[DPSW_IRQ_INDEX_IF];
-
-	err = devm_request_threaded_irq(dev, irq->msi_desc->irq,
-					NULL,
-					dpaa2_switch_irq0_handler_thread,
-					IRQF_NO_SUSPEND | IRQF_ONESHOT,
-					dev_name(dev), dev);
-	if (err) {
-		dev_err(dev, "devm_request_threaded_irq(): %d\n", err);
-		goto free_irq;
-	}
-
-	err = dpsw_set_irq_mask(ethsw->mc_io, 0, ethsw->dpsw_handle,
-				DPSW_IRQ_INDEX_IF, mask);
-	if (err) {
-		dev_err(dev, "dpsw_set_irq_mask(): %d\n", err);
-		goto free_devm_irq;
-	}
-
-	err = dpsw_set_irq_enable(ethsw->mc_io, 0, ethsw->dpsw_handle,
-				  DPSW_IRQ_INDEX_IF, 1);
-	if (err) {
-		dev_err(dev, "dpsw_set_irq_enable(): %d\n", err);
-		goto free_devm_irq;
-	}
-
-	return 0;
-
-free_devm_irq:
-	devm_free_irq(dev, irq->msi_desc->irq, dev);
-free_irq:
-	fsl_mc_free_irqs(sw_dev);
-	return err;
-}
-
-static void dpaa2_switch_teardown_irqs(struct fsl_mc_device *sw_dev)
-{
-	struct device *dev = &sw_dev->dev;
-	struct ethsw_core *ethsw = dev_get_drvdata(dev);
-	int err;
-
-	err = dpsw_set_irq_enable(ethsw->mc_io, 0, ethsw->dpsw_handle,
-				  DPSW_IRQ_INDEX_IF, 0);
-	if (err)
-		dev_err(dev, "dpsw_set_irq_enable err %d\n", err);
-
-	fsl_mc_free_irqs(sw_dev);
-}
-
-static int dpaa2_switch_port_attr_stp_state_set(struct net_device *netdev,
-						u8 state)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-
-	return dpaa2_switch_port_set_stp_state(port_priv, state);
-}
-
-static int dpaa2_switch_port_attr_set(struct net_device *netdev,
-				      const struct switchdev_attr *attr,
-				      struct netlink_ext_ack *extack)
-{
-	int err = 0;
-
-	switch (attr->id) {
-	case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
-		err = dpaa2_switch_port_attr_stp_state_set(netdev,
-							   attr->u.stp_state);
-		break;
-	case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
-		if (!attr->u.vlan_filtering) {
-			NL_SET_ERR_MSG_MOD(extack,
-					   "The DPAA2 switch does not support VLAN-unaware operation");
-			return -EOPNOTSUPP;
-		}
-		break;
-	default:
-		err = -EOPNOTSUPP;
-		break;
-	}
-
-	return err;
-}
-
-int dpaa2_switch_port_vlans_add(struct net_device *netdev,
-				const struct switchdev_obj_port_vlan *vlan)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	struct ethsw_core *ethsw = port_priv->ethsw_data;
-	struct dpsw_attr *attr = &ethsw->sw_attr;
-	int err = 0;
-
-	/* Make sure that the VLAN is not already configured
-	 * on the switch port
-	 */
-	if (port_priv->vlans[vlan->vid] & ETHSW_VLAN_MEMBER)
-		return -EEXIST;
-
-	/* Check if there is space for a new VLAN */
-	err = dpsw_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle,
-				  &ethsw->sw_attr);
-	if (err) {
-		netdev_err(netdev, "dpsw_get_attributes err %d\n", err);
-		return err;
-	}
-	if (attr->max_vlans - attr->num_vlans < 1)
-		return -ENOSPC;
-
-	/* Check if there is space for a new VLAN */
-	err = dpsw_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle,
-				  &ethsw->sw_attr);
-	if (err) {
-		netdev_err(netdev, "dpsw_get_attributes err %d\n", err);
-		return err;
-	}
-	if (attr->max_vlans - attr->num_vlans < 1)
-		return -ENOSPC;
-
-	if (!port_priv->ethsw_data->vlans[vlan->vid]) {
-		/* this is a new VLAN */
-		err = dpaa2_switch_add_vlan(port_priv, vlan->vid);
-		if (err)
-			return err;
-
-		port_priv->ethsw_data->vlans[vlan->vid] |= ETHSW_VLAN_GLOBAL;
-	}
-
-	return dpaa2_switch_port_add_vlan(port_priv, vlan->vid, vlan->flags);
-}
-
-static int dpaa2_switch_port_lookup_address(struct net_device *netdev, int is_uc,
-					    const unsigned char *addr)
-{
-	struct netdev_hw_addr_list *list = (is_uc) ? &netdev->uc : &netdev->mc;
-	struct netdev_hw_addr *ha;
-
-	netif_addr_lock_bh(netdev);
-	list_for_each_entry(ha, &list->list, list) {
-		if (ether_addr_equal(ha->addr, addr)) {
-			netif_addr_unlock_bh(netdev);
-			return 1;
-		}
-	}
-	netif_addr_unlock_bh(netdev);
-	return 0;
-}
-
-static int dpaa2_switch_port_mdb_add(struct net_device *netdev,
-				     const struct switchdev_obj_port_mdb *mdb)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	int err;
-
-	/* Check if address is already set on this port */
-	if (dpaa2_switch_port_lookup_address(netdev, 0, mdb->addr))
-		return -EEXIST;
-
-	err = dpaa2_switch_port_fdb_add_mc(port_priv, mdb->addr);
-	if (err)
-		return err;
-
-	err = dev_mc_add(netdev, mdb->addr);
-	if (err) {
-		netdev_err(netdev, "dev_mc_add err %d\n", err);
-		dpaa2_switch_port_fdb_del_mc(port_priv, mdb->addr);
-	}
-
-	return err;
-}
-
-static int dpaa2_switch_port_obj_add(struct net_device *netdev,
-				     const struct switchdev_obj *obj)
-{
-	int err;
-
-	switch (obj->id) {
-	case SWITCHDEV_OBJ_ID_PORT_VLAN:
-		err = dpaa2_switch_port_vlans_add(netdev,
-						  SWITCHDEV_OBJ_PORT_VLAN(obj));
-		break;
-	case SWITCHDEV_OBJ_ID_PORT_MDB:
-		err = dpaa2_switch_port_mdb_add(netdev,
-						SWITCHDEV_OBJ_PORT_MDB(obj));
-		break;
-	default:
-		err = -EOPNOTSUPP;
-		break;
-	}
-
-	return err;
-}
-
-static int dpaa2_switch_port_del_vlan(struct ethsw_port_priv *port_priv, u16 vid)
-{
-	struct ethsw_core *ethsw = port_priv->ethsw_data;
-	struct net_device *netdev = port_priv->netdev;
-	struct dpsw_vlan_if_cfg vcfg;
-	int i, err;
-
-	if (!port_priv->vlans[vid])
-		return -ENOENT;
-
-	if (port_priv->vlans[vid] & ETHSW_VLAN_PVID) {
-		/* If we are deleting the PVID of a port, use VLAN 4095 instead
-		 * as we are sure that neither the bridge nor the 8021q module
-		 * will use it
-		 */
-		err = dpaa2_switch_port_set_pvid(port_priv, 4095);
-		if (err)
-			return err;
-	}
-
-	vcfg.num_ifs = 1;
-	vcfg.if_id[0] = port_priv->idx;
-	if (port_priv->vlans[vid] & ETHSW_VLAN_UNTAGGED) {
-		err = dpsw_vlan_remove_if_untagged(ethsw->mc_io, 0,
-						   ethsw->dpsw_handle,
-						   vid, &vcfg);
-		if (err) {
-			netdev_err(netdev,
-				   "dpsw_vlan_remove_if_untagged err %d\n",
-				   err);
-		}
-		port_priv->vlans[vid] &= ~ETHSW_VLAN_UNTAGGED;
-	}
-
-	if (port_priv->vlans[vid] & ETHSW_VLAN_MEMBER) {
-		err = dpsw_vlan_remove_if(ethsw->mc_io, 0, ethsw->dpsw_handle,
-					  vid, &vcfg);
-		if (err) {
-			netdev_err(netdev,
-				   "dpsw_vlan_remove_if err %d\n", err);
-			return err;
-		}
-		port_priv->vlans[vid] &= ~ETHSW_VLAN_MEMBER;
-
-		/* Delete VLAN from switch if it is no longer configured on
-		 * any port
-		 */
-		for (i = 0; i < ethsw->sw_attr.num_ifs; i++)
-			if (ethsw->ports[i]->vlans[vid] & ETHSW_VLAN_MEMBER)
-				return 0; /* Found a port member in VID */
-
-		ethsw->vlans[vid] &= ~ETHSW_VLAN_GLOBAL;
-
-		err = dpaa2_switch_dellink(ethsw, vid);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
-int dpaa2_switch_port_vlans_del(struct net_device *netdev,
-				const struct switchdev_obj_port_vlan *vlan)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-
-	if (netif_is_bridge_master(vlan->obj.orig_dev))
-		return -EOPNOTSUPP;
-
-	return dpaa2_switch_port_del_vlan(port_priv, vlan->vid);
-}
-
-static int dpaa2_switch_port_mdb_del(struct net_device *netdev,
-				     const struct switchdev_obj_port_mdb *mdb)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	int err;
-
-	if (!dpaa2_switch_port_lookup_address(netdev, 0, mdb->addr))
-		return -ENOENT;
-
-	err = dpaa2_switch_port_fdb_del_mc(port_priv, mdb->addr);
-	if (err)
-		return err;
-
-	err = dev_mc_del(netdev, mdb->addr);
-	if (err) {
-		netdev_err(netdev, "dev_mc_del err %d\n", err);
-		return err;
-	}
-
-	return err;
-}
-
-static int dpaa2_switch_port_obj_del(struct net_device *netdev,
-				     const struct switchdev_obj *obj)
-{
-	int err;
-
-	switch (obj->id) {
-	case SWITCHDEV_OBJ_ID_PORT_VLAN:
-		err = dpaa2_switch_port_vlans_del(netdev, SWITCHDEV_OBJ_PORT_VLAN(obj));
-		break;
-	case SWITCHDEV_OBJ_ID_PORT_MDB:
-		err = dpaa2_switch_port_mdb_del(netdev, SWITCHDEV_OBJ_PORT_MDB(obj));
-		break;
-	default:
-		err = -EOPNOTSUPP;
-		break;
-	}
-	return err;
-}
-
-static int dpaa2_switch_port_attr_set_event(struct net_device *netdev,
-					    struct switchdev_notifier_port_attr_info *ptr)
-{
-	int err;
-
-	err = switchdev_handle_port_attr_set(netdev, ptr,
-					     dpaa2_switch_port_dev_check,
-					     dpaa2_switch_port_attr_set);
-	return notifier_from_errno(err);
-}
-
-static int dpaa2_switch_fdb_set_egress_flood(struct ethsw_core *ethsw, u16 fdb_id)
-{
-	struct dpsw_egress_flood_cfg flood_cfg;
-	int i = 0, j;
-	int err;
-
-	/* Add all the DPAA2 switch ports found in the same bridging domain to
-	 * the egress flooding domain
-	 */
-	for (j = 0; j < ethsw->sw_attr.num_ifs; j++)
-		if (ethsw->ports[j] && ethsw->ports[j]->fdb->fdb_id == fdb_id)
-			flood_cfg.if_id[i++] = ethsw->ports[j]->idx;
-
-	/* Add the CTRL interface to the egress flooding domain */
-	flood_cfg.if_id[i++] = ethsw->sw_attr.num_ifs;
-
-	/* Use the FDB of the first dpaa2 switch port added to the bridge */
-	flood_cfg.fdb_id = fdb_id;
-
-	/* Setup broadcast flooding domain */
-	flood_cfg.flood_type = DPSW_BROADCAST;
-	flood_cfg.num_ifs = i;
-	err = dpsw_set_egress_flood(ethsw->mc_io, 0, ethsw->dpsw_handle,
-				    &flood_cfg);
-	if (err) {
-		dev_err(ethsw->dev, "dpsw_set_egress_flood() = %d\n", err);
-		return err;
-	}
-
-	/* Setup unknown flooding domain */
-	flood_cfg.flood_type = DPSW_FLOODING;
-	flood_cfg.num_ifs = i;
-	err = dpsw_set_egress_flood(ethsw->mc_io, 0, ethsw->dpsw_handle,
-				    &flood_cfg);
-	if (err) {
-		dev_err(ethsw->dev, "dpsw_set_egress_flood() = %d\n", err);
-		return err;
-	}
-
-	return 0;
-}
-
-static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
-					 struct net_device *upper_dev)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	struct ethsw_core *ethsw = port_priv->ethsw_data;
-	struct ethsw_port_priv *other_port_priv;
-	struct net_device *other_dev;
-	struct list_head *iter;
-	int err;
-
-	netdev_for_each_lower_dev(upper_dev, other_dev, iter) {
-		if (!dpaa2_switch_port_dev_check(other_dev))
-			continue;
-
-		other_port_priv = netdev_priv(other_dev);
-		if (other_port_priv->ethsw_data != port_priv->ethsw_data) {
-			netdev_err(netdev,
-				   "Interface from a different DPSW is in the bridge already!\n");
-			return -EINVAL;
-		}
-	}
-
-	/* Delete the previously manually installed VLAN 1 */
-	err = dpaa2_switch_port_del_vlan(port_priv, 1);
-	if (err)
-		return err;
-
-	dpaa2_switch_port_set_fdb(port_priv, upper_dev);
-
-	/* Setup the egress flood policy (broadcast, unknown unicast) */
-	err = dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id);
-	if (err)
-		goto err_egress_flood;
-
-	return 0;
-
-err_egress_flood:
-	dpaa2_switch_port_set_fdb(port_priv, NULL);
-	return err;
-}
-
-static int dpaa2_switch_port_clear_rxvlan(struct net_device *vdev, int vid, void *arg)
-{
-	__be16 vlan_proto = htons(ETH_P_8021Q);
-
-	if (vdev)
-		vlan_proto = vlan_dev_vlan_proto(vdev);
-
-	return dpaa2_switch_port_vlan_kill(arg, vlan_proto, vid);
-}
-
-static int dpaa2_switch_port_restore_rxvlan(struct net_device *vdev, int vid, void *arg)
-{
-	__be16 vlan_proto = htons(ETH_P_8021Q);
-
-	if (vdev)
-		vlan_proto = vlan_dev_vlan_proto(vdev);
-
-	return dpaa2_switch_port_vlan_add(arg, vlan_proto, vid);
-}
-
-static int dpaa2_switch_port_bridge_leave(struct net_device *netdev)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-	struct dpaa2_switch_fdb *old_fdb = port_priv->fdb;
-	struct ethsw_core *ethsw = port_priv->ethsw_data;
-	int err;
-
-	/* First of all, fast age any learn FDB addresses on this switch port */
-	dpaa2_switch_port_fast_age(port_priv);
-
-	/* Clear all RX VLANs installed through vlan_vid_add() either as VLAN
-	 * upper devices or otherwise from the FDB table that we are about to
-	 * leave
-	 */
-	err = vlan_for_each(netdev, dpaa2_switch_port_clear_rxvlan, netdev);
-	if (err)
-		netdev_err(netdev, "Unable to clear RX VLANs from old FDB table, err (%d)\n", err);
-
-	dpaa2_switch_port_set_fdb(port_priv, NULL);
-
-	/* Restore all RX VLANs into the new FDB table that we just joined */
-	err = vlan_for_each(netdev, dpaa2_switch_port_restore_rxvlan, netdev);
-	if (err)
-		netdev_err(netdev, "Unable to restore RX VLANs to the new FDB, err (%d)\n", err);
-
-	/* Setup the egress flood policy (broadcast, unknown unicast).
-	 * When the port is not under a bridge, only the CTRL interface is part
-	 * of the flooding domain besides the actual port
-	 */
-	err = dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id);
-	if (err)
-		return err;
-
-	/* Recreate the egress flood domain of the FDB that we just left */
-	err = dpaa2_switch_fdb_set_egress_flood(ethsw, old_fdb->fdb_id);
-	if (err)
-		return err;
-
-	/* Add the VLAN 1 as PVID when not under a bridge. We need this since
-	 * the dpaa2 switch interfaces are not capable to be VLAN unaware
-	 */
-	return dpaa2_switch_port_add_vlan(port_priv, DEFAULT_VLAN_ID,
-					  BRIDGE_VLAN_INFO_UNTAGGED | BRIDGE_VLAN_INFO_PVID);
-}
-
-static int dpaa2_switch_prevent_bridging_with_8021q_upper(struct net_device *netdev)
-{
-	struct net_device *upper_dev;
-	struct list_head *iter;
-
-	/* RCU read lock not necessary because we have write-side protection
-	 * (rtnl_mutex), however a non-rcu iterator does not exist.
-	 */
-	netdev_for_each_upper_dev_rcu(netdev, upper_dev, iter)
-		if (is_vlan_dev(upper_dev))
-			return -EOPNOTSUPP;
-
-	return 0;
-}
-
-static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb,
-					     unsigned long event, void *ptr)
-{
-	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
-	struct netdev_notifier_changeupper_info *info = ptr;
-	struct netlink_ext_ack *extack;
-	struct net_device *upper_dev;
-	int err = 0;
-
-	if (!dpaa2_switch_port_dev_check(netdev))
-		return NOTIFY_DONE;
-
-	extack = netdev_notifier_info_to_extack(&info->info);
-
-	switch (event) {
-	case NETDEV_PRECHANGEUPPER:
-		upper_dev = info->upper_dev;
-		if (!netif_is_bridge_master(upper_dev))
-			break;
-
-		if (!br_vlan_enabled(upper_dev)) {
-			NL_SET_ERR_MSG_MOD(extack, "Cannot join a VLAN-unaware bridge");
-			err = -EOPNOTSUPP;
-			goto out;
-		}
-
-		err = dpaa2_switch_prevent_bridging_with_8021q_upper(netdev);
-		if (err) {
-			NL_SET_ERR_MSG_MOD(extack,
-					   "Cannot join a bridge while VLAN uppers are present");
-			goto out;
-		}
-
-		break;
-	case NETDEV_CHANGEUPPER:
-		upper_dev = info->upper_dev;
-		if (netif_is_bridge_master(upper_dev)) {
-			if (info->linking)
-				err = dpaa2_switch_port_bridge_join(netdev, upper_dev);
-			else
-				err = dpaa2_switch_port_bridge_leave(netdev);
-		}
-		break;
-	}
-
-out:
-	return notifier_from_errno(err);
-}
-
-struct ethsw_switchdev_event_work {
-	struct work_struct work;
-	struct switchdev_notifier_fdb_info fdb_info;
-	struct net_device *dev;
-	unsigned long event;
-};
-
-static void dpaa2_switch_event_work(struct work_struct *work)
-{
-	struct ethsw_switchdev_event_work *switchdev_work =
-		container_of(work, struct ethsw_switchdev_event_work, work);
-	struct net_device *dev = switchdev_work->dev;
-	struct switchdev_notifier_fdb_info *fdb_info;
-	int err;
-
-	rtnl_lock();
-	fdb_info = &switchdev_work->fdb_info;
-
-	switch (switchdev_work->event) {
-	case SWITCHDEV_FDB_ADD_TO_DEVICE:
-		if (!fdb_info->added_by_user)
-			break;
-		if (is_unicast_ether_addr(fdb_info->addr))
-			err = dpaa2_switch_port_fdb_add_uc(netdev_priv(dev),
-							   fdb_info->addr);
-		else
-			err = dpaa2_switch_port_fdb_add_mc(netdev_priv(dev),
-							   fdb_info->addr);
-		if (err)
-			break;
-		fdb_info->offloaded = true;
-		call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev,
-					 &fdb_info->info, NULL);
-		break;
-	case SWITCHDEV_FDB_DEL_TO_DEVICE:
-		if (!fdb_info->added_by_user)
-			break;
-		if (is_unicast_ether_addr(fdb_info->addr))
-			dpaa2_switch_port_fdb_del_uc(netdev_priv(dev), fdb_info->addr);
-		else
-			dpaa2_switch_port_fdb_del_mc(netdev_priv(dev), fdb_info->addr);
-		break;
-	}
-
-	rtnl_unlock();
-	kfree(switchdev_work->fdb_info.addr);
-	kfree(switchdev_work);
-	dev_put(dev);
-}
-
-/* Called under rcu_read_lock() */
-static int dpaa2_switch_port_event(struct notifier_block *nb,
-				   unsigned long event, void *ptr)
-{
-	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
-	struct ethsw_port_priv *port_priv = netdev_priv(dev);
-	struct ethsw_switchdev_event_work *switchdev_work;
-	struct switchdev_notifier_fdb_info *fdb_info = ptr;
-	struct ethsw_core *ethsw = port_priv->ethsw_data;
-
-	if (event == SWITCHDEV_PORT_ATTR_SET)
-		return dpaa2_switch_port_attr_set_event(dev, ptr);
-
-	if (!dpaa2_switch_port_dev_check(dev))
-		return NOTIFY_DONE;
-
-	switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
-	if (!switchdev_work)
-		return NOTIFY_BAD;
-
-	INIT_WORK(&switchdev_work->work, dpaa2_switch_event_work);
-	switchdev_work->dev = dev;
-	switchdev_work->event = event;
-
-	switch (event) {
-	case SWITCHDEV_FDB_ADD_TO_DEVICE:
-	case SWITCHDEV_FDB_DEL_TO_DEVICE:
-		memcpy(&switchdev_work->fdb_info, ptr,
-		       sizeof(switchdev_work->fdb_info));
-		switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
-		if (!switchdev_work->fdb_info.addr)
-			goto err_addr_alloc;
-
-		ether_addr_copy((u8 *)switchdev_work->fdb_info.addr,
-				fdb_info->addr);
-
-		/* Take a reference on the device to avoid being freed. */
-		dev_hold(dev);
-		break;
-	default:
-		kfree(switchdev_work);
-		return NOTIFY_DONE;
-	}
-
-	queue_work(ethsw->workqueue, &switchdev_work->work);
-
-	return NOTIFY_DONE;
-
-err_addr_alloc:
-	kfree(switchdev_work);
-	return NOTIFY_BAD;
-}
-
-static int dpaa2_switch_port_obj_event(unsigned long event,
-				       struct net_device *netdev,
-				       struct switchdev_notifier_port_obj_info *port_obj_info)
-{
-	int err = -EOPNOTSUPP;
-
-	if (!dpaa2_switch_port_dev_check(netdev))
-		return NOTIFY_DONE;
-
-	switch (event) {
-	case SWITCHDEV_PORT_OBJ_ADD:
-		err = dpaa2_switch_port_obj_add(netdev, port_obj_info->obj);
-		break;
-	case SWITCHDEV_PORT_OBJ_DEL:
-		err = dpaa2_switch_port_obj_del(netdev, port_obj_info->obj);
-		break;
-	}
-
-	port_obj_info->handled = true;
-	return notifier_from_errno(err);
-}
-
-static int dpaa2_switch_port_blocking_event(struct notifier_block *nb,
-					    unsigned long event, void *ptr)
-{
-	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
-
-	switch (event) {
-	case SWITCHDEV_PORT_OBJ_ADD:
-	case SWITCHDEV_PORT_OBJ_DEL:
-		return dpaa2_switch_port_obj_event(event, dev, ptr);
-	case SWITCHDEV_PORT_ATTR_SET:
-		return dpaa2_switch_port_attr_set_event(dev, ptr);
-	}
-
-	return NOTIFY_DONE;
-}
-
-/* Build a linear skb based on a single-buffer frame descriptor */
-static struct sk_buff *dpaa2_switch_build_linear_skb(struct ethsw_core *ethsw,
-						     const struct dpaa2_fd *fd)
-{
-	u16 fd_offset = dpaa2_fd_get_offset(fd);
-	dma_addr_t addr = dpaa2_fd_get_addr(fd);
-	u32 fd_length = dpaa2_fd_get_len(fd);
-	struct device *dev = ethsw->dev;
-	struct sk_buff *skb = NULL;
-	void *fd_vaddr;
-
-	fd_vaddr = dpaa2_iova_to_virt(ethsw->iommu_domain, addr);
-	dma_unmap_page(dev, addr, DPAA2_SWITCH_RX_BUF_SIZE,
-		       DMA_FROM_DEVICE);
-
-	skb = build_skb(fd_vaddr, DPAA2_SWITCH_RX_BUF_SIZE +
-			SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
-	if (unlikely(!skb)) {
-		dev_err(dev, "build_skb() failed\n");
-		return NULL;
-	}
-
-	skb_reserve(skb, fd_offset);
-	skb_put(skb, fd_length);
-
-	ethsw->buf_count--;
-
-	return skb;
-}
-
-static void dpaa2_switch_tx_conf(struct dpaa2_switch_fq *fq,
-				 const struct dpaa2_fd *fd)
-{
-	dpaa2_switch_free_fd(fq->ethsw, fd);
-}
-
-static void dpaa2_switch_rx(struct dpaa2_switch_fq *fq,
-			    const struct dpaa2_fd *fd)
-{
-	struct ethsw_core *ethsw = fq->ethsw;
-	struct ethsw_port_priv *port_priv;
-	struct net_device *netdev;
-	struct vlan_ethhdr *hdr;
-	struct sk_buff *skb;
-	u16 vlan_tci, vid;
-	int if_id, err;
-
-	/* get switch ingress interface ID */
-	if_id = upper_32_bits(dpaa2_fd_get_flc(fd)) & 0x0000FFFF;
-
-	if (if_id >= ethsw->sw_attr.num_ifs) {
-		dev_err(ethsw->dev, "Frame received from unknown interface!\n");
-		goto err_free_fd;
-	}
-	port_priv = ethsw->ports[if_id];
-	netdev = port_priv->netdev;
-
-	/* build the SKB based on the FD received */
-	if (dpaa2_fd_get_format(fd) != dpaa2_fd_single) {
-		if (net_ratelimit()) {
-			netdev_err(netdev, "Received invalid frame format\n");
-			goto err_free_fd;
-		}
-	}
-
-	skb = dpaa2_switch_build_linear_skb(ethsw, fd);
-	if (unlikely(!skb))
-		goto err_free_fd;
-
-	skb_reset_mac_header(skb);
-
-	/* Remove the VLAN header if the packet that we just received has a vid
-	 * equal to the port PVIDs. Since the dpaa2-switch can operate only in
-	 * VLAN-aware mode and no alterations are made on the packet when it's
-	 * redirected/mirrored to the control interface, we are sure that there
-	 * will always be a VLAN header present.
-	 */
-	hdr = vlan_eth_hdr(skb);
-	vid = ntohs(hdr->h_vlan_TCI) & VLAN_VID_MASK;
-	if (vid == port_priv->pvid) {
-		err = __skb_vlan_pop(skb, &vlan_tci);
-		if (err) {
-			dev_info(ethsw->dev, "__skb_vlan_pop() returned %d", err);
-			goto err_free_fd;
-		}
-	}
-
-	skb->dev = netdev;
-	skb->protocol = eth_type_trans(skb, skb->dev);
-
-	netif_receive_skb(skb);
-
-	return;
-
-err_free_fd:
-	dpaa2_switch_free_fd(ethsw, fd);
-}
-
-static void dpaa2_switch_detect_features(struct ethsw_core *ethsw)
-{
-	ethsw->features = 0;
-
-	if (ethsw->major > 8 || (ethsw->major == 8 && ethsw->minor >= 6))
-		ethsw->features |= ETHSW_FEATURE_MAC_ADDR;
-}
-
-static int dpaa2_switch_setup_fqs(struct ethsw_core *ethsw)
-{
-	struct dpsw_ctrl_if_attr ctrl_if_attr;
-	struct device *dev = ethsw->dev;
-	int i = 0;
-	int err;
-
-	err = dpsw_ctrl_if_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle,
-					  &ctrl_if_attr);
-	if (err) {
-		dev_err(dev, "dpsw_ctrl_if_get_attributes() = %d\n", err);
-		return err;
-	}
-
-	ethsw->fq[i].fqid = ctrl_if_attr.rx_fqid;
-	ethsw->fq[i].ethsw = ethsw;
-	ethsw->fq[i++].type = DPSW_QUEUE_RX;
-
-	ethsw->fq[i].fqid = ctrl_if_attr.tx_err_conf_fqid;
-	ethsw->fq[i].ethsw = ethsw;
-	ethsw->fq[i++].type = DPSW_QUEUE_TX_ERR_CONF;
-
-	return 0;
-}
-
-/* Free buffers acquired from the buffer pool or which were meant to
- * be released in the pool
- */
-static void dpaa2_switch_free_bufs(struct ethsw_core *ethsw, u64 *buf_array, int count)
-{
-	struct device *dev = ethsw->dev;
-	void *vaddr;
-	int i;
-
-	for (i = 0; i < count; i++) {
-		vaddr = dpaa2_iova_to_virt(ethsw->iommu_domain, buf_array[i]);
-		dma_unmap_page(dev, buf_array[i], DPAA2_SWITCH_RX_BUF_SIZE,
-			       DMA_FROM_DEVICE);
-		free_pages((unsigned long)vaddr, 0);
-	}
-}
-
-/* Perform a single release command to add buffers
- * to the specified buffer pool
- */
-static int dpaa2_switch_add_bufs(struct ethsw_core *ethsw, u16 bpid)
-{
-	struct device *dev = ethsw->dev;
-	u64 buf_array[BUFS_PER_CMD];
-	struct page *page;
-	int retries = 0;
-	dma_addr_t addr;
-	int err;
-	int i;
-
-	for (i = 0; i < BUFS_PER_CMD; i++) {
-		/* Allocate one page for each Rx buffer. WRIOP sees
-		 * the entire page except for a tailroom reserved for
-		 * skb shared info
-		 */
-		page = dev_alloc_pages(0);
-		if (!page) {
-			dev_err(dev, "buffer allocation failed\n");
-			goto err_alloc;
-		}
-
-		addr = dma_map_page(dev, page, 0, DPAA2_SWITCH_RX_BUF_SIZE,
-				    DMA_FROM_DEVICE);
-		if (dma_mapping_error(dev, addr)) {
-			dev_err(dev, "dma_map_single() failed\n");
-			goto err_map;
-		}
-		buf_array[i] = addr;
-	}
-
-release_bufs:
-	/* In case the portal is busy, retry until successful or
-	 * max retries hit.
-	 */
-	while ((err = dpaa2_io_service_release(NULL, bpid,
-					       buf_array, i)) == -EBUSY) {
-		if (retries++ >= DPAA2_SWITCH_SWP_BUSY_RETRIES)
-			break;
-
-		cpu_relax();
-	}
-
-	/* If release command failed, clean up and bail out. */
-	if (err) {
-		dpaa2_switch_free_bufs(ethsw, buf_array, i);
-		return 0;
-	}
-
-	return i;
-
-err_map:
-	__free_pages(page, 0);
-err_alloc:
-	/* If we managed to allocate at least some buffers,
-	 * release them to hardware
-	 */
-	if (i)
-		goto release_bufs;
-
-	return 0;
-}
-
-static int dpaa2_switch_refill_bp(struct ethsw_core *ethsw)
-{
-	int *count = &ethsw->buf_count;
-	int new_count;
-	int err = 0;
-
-	if (unlikely(*count < DPAA2_ETHSW_REFILL_THRESH)) {
-		do {
-			new_count = dpaa2_switch_add_bufs(ethsw, ethsw->bpid);
-			if (unlikely(!new_count)) {
-				/* Out of memory; abort for now, we'll
-				 * try later on
-				 */
-				break;
-			}
-			*count += new_count;
-		} while (*count < DPAA2_ETHSW_NUM_BUFS);
-
-		if (unlikely(*count < DPAA2_ETHSW_NUM_BUFS))
-			err = -ENOMEM;
-	}
-
-	return err;
-}
-
-static int dpaa2_switch_seed_bp(struct ethsw_core *ethsw)
-{
-	int *count, i;
-
-	for (i = 0; i < DPAA2_ETHSW_NUM_BUFS; i += BUFS_PER_CMD) {
-		count = &ethsw->buf_count;
-		*count += dpaa2_switch_add_bufs(ethsw, ethsw->bpid);
-
-		if (unlikely(*count < BUFS_PER_CMD))
-			return -ENOMEM;
-	}
-
-	return 0;
-}
-
-static void dpaa2_switch_drain_bp(struct ethsw_core *ethsw)
-{
-	u64 buf_array[BUFS_PER_CMD];
-	int ret;
-
-	do {
-		ret = dpaa2_io_service_acquire(NULL, ethsw->bpid,
-					       buf_array, BUFS_PER_CMD);
-		if (ret < 0) {
-			dev_err(ethsw->dev,
-				"dpaa2_io_service_acquire() = %d\n", ret);
-			return;
-		}
-		dpaa2_switch_free_bufs(ethsw, buf_array, ret);
-
-	} while (ret);
-}
-
-static int dpaa2_switch_setup_dpbp(struct ethsw_core *ethsw)
-{
-	struct dpsw_ctrl_if_pools_cfg dpsw_ctrl_if_pools_cfg = { 0 };
-	struct device *dev = ethsw->dev;
-	struct fsl_mc_device *dpbp_dev;
-	struct dpbp_attr dpbp_attrs;
-	int err;
-
-	err = fsl_mc_object_allocate(to_fsl_mc_device(dev), FSL_MC_POOL_DPBP,
-				     &dpbp_dev);
-	if (err) {
-		if (err == -ENXIO)
-			err = -EPROBE_DEFER;
-		else
-			dev_err(dev, "DPBP device allocation failed\n");
-		return err;
-	}
-	ethsw->dpbp_dev = dpbp_dev;
-
-	err = dpbp_open(ethsw->mc_io, 0, dpbp_dev->obj_desc.id,
-			&dpbp_dev->mc_handle);
-	if (err) {
-		dev_err(dev, "dpbp_open() failed\n");
-		goto err_open;
-	}
-
-	err = dpbp_reset(ethsw->mc_io, 0, dpbp_dev->mc_handle);
-	if (err) {
-		dev_err(dev, "dpbp_reset() failed\n");
-		goto err_reset;
-	}
-
-	err = dpbp_enable(ethsw->mc_io, 0, dpbp_dev->mc_handle);
-	if (err) {
-		dev_err(dev, "dpbp_enable() failed\n");
-		goto err_enable;
-	}
-
-	err = dpbp_get_attributes(ethsw->mc_io, 0, dpbp_dev->mc_handle,
-				  &dpbp_attrs);
-	if (err) {
-		dev_err(dev, "dpbp_get_attributes() failed\n");
-		goto err_get_attr;
-	}
-
-	dpsw_ctrl_if_pools_cfg.num_dpbp = 1;
-	dpsw_ctrl_if_pools_cfg.pools[0].dpbp_id = dpbp_attrs.id;
-	dpsw_ctrl_if_pools_cfg.pools[0].buffer_size = DPAA2_SWITCH_RX_BUF_SIZE;
-	dpsw_ctrl_if_pools_cfg.pools[0].backup_pool = 0;
-
-	err = dpsw_ctrl_if_set_pools(ethsw->mc_io, 0, ethsw->dpsw_handle,
-				     &dpsw_ctrl_if_pools_cfg);
-	if (err) {
-		dev_err(dev, "dpsw_ctrl_if_set_pools() failed\n");
-		goto err_get_attr;
-	}
-	ethsw->bpid = dpbp_attrs.id;
-
-	return 0;
-
-err_get_attr:
-	dpbp_disable(ethsw->mc_io, 0, dpbp_dev->mc_handle);
-err_enable:
-err_reset:
-	dpbp_close(ethsw->mc_io, 0, dpbp_dev->mc_handle);
-err_open:
-	fsl_mc_object_free(dpbp_dev);
-	return err;
-}
-
-static void dpaa2_switch_free_dpbp(struct ethsw_core *ethsw)
-{
-	dpbp_disable(ethsw->mc_io, 0, ethsw->dpbp_dev->mc_handle);
-	dpbp_close(ethsw->mc_io, 0, ethsw->dpbp_dev->mc_handle);
-	fsl_mc_object_free(ethsw->dpbp_dev);
-}
-
-static int dpaa2_switch_alloc_rings(struct ethsw_core *ethsw)
-{
-	int i;
-
-	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++) {
-		ethsw->fq[i].store =
-			dpaa2_io_store_create(DPAA2_SWITCH_STORE_SIZE,
-					      ethsw->dev);
-		if (!ethsw->fq[i].store) {
-			dev_err(ethsw->dev, "dpaa2_io_store_create failed\n");
-			while (--i >= 0)
-				dpaa2_io_store_destroy(ethsw->fq[i].store);
-			return -ENOMEM;
-		}
-	}
-
-	return 0;
-}
-
-static void dpaa2_switch_destroy_rings(struct ethsw_core *ethsw)
-{
-	int i;
-
-	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++)
-		dpaa2_io_store_destroy(ethsw->fq[i].store);
-}
-
-static int dpaa2_switch_pull_fq(struct dpaa2_switch_fq *fq)
-{
-	int err, retries = 0;
-
-	/* Try to pull from the FQ while the portal is busy and we didn't hit
-	 * the maximum number fo retries
-	 */
-	do {
-		err = dpaa2_io_service_pull_fq(NULL, fq->fqid, fq->store);
-		cpu_relax();
-	} while (err == -EBUSY && retries++ < DPAA2_SWITCH_SWP_BUSY_RETRIES);
-
-	if (unlikely(err))
-		dev_err(fq->ethsw->dev, "dpaa2_io_service_pull err %d", err);
-
-	return err;
-}
-
-/* Consume all frames pull-dequeued into the store */
-static int dpaa2_switch_store_consume(struct dpaa2_switch_fq *fq)
-{
-	struct ethsw_core *ethsw = fq->ethsw;
-	int cleaned = 0, is_last;
-	struct dpaa2_dq *dq;
-	int retries = 0;
-
-	do {
-		/* Get the next available FD from the store */
-		dq = dpaa2_io_store_next(fq->store, &is_last);
-		if (unlikely(!dq)) {
-			if (retries++ >= DPAA2_SWITCH_SWP_BUSY_RETRIES) {
-				dev_err_once(ethsw->dev,
-					     "No valid dequeue response\n");
-				return -ETIMEDOUT;
-			}
-			continue;
-		}
-
-		if (fq->type == DPSW_QUEUE_RX)
-			dpaa2_switch_rx(fq, dpaa2_dq_fd(dq));
-		else
-			dpaa2_switch_tx_conf(fq, dpaa2_dq_fd(dq));
-		cleaned++;
-
-	} while (!is_last);
-
-	return cleaned;
-}
-
-/* NAPI poll routine */
-static int dpaa2_switch_poll(struct napi_struct *napi, int budget)
-{
-	int err, cleaned = 0, store_cleaned, work_done;
-	struct dpaa2_switch_fq *fq;
-	int retries = 0;
-
-	fq = container_of(napi, struct dpaa2_switch_fq, napi);
-
-	do {
-		err = dpaa2_switch_pull_fq(fq);
-		if (unlikely(err))
-			break;
-
-		/* Refill pool if appropriate */
-		dpaa2_switch_refill_bp(fq->ethsw);
-
-		store_cleaned = dpaa2_switch_store_consume(fq);
-		cleaned += store_cleaned;
-
-		if (cleaned >= budget) {
-			work_done = budget;
-			goto out;
-		}
-
-	} while (store_cleaned);
-
-	/* We didn't consume the entire budget, so finish napi and re-enable
-	 * data availability notifications
-	 */
-	napi_complete_done(napi, cleaned);
-	do {
-		err = dpaa2_io_service_rearm(NULL, &fq->nctx);
-		cpu_relax();
-	} while (err == -EBUSY && retries++ < DPAA2_SWITCH_SWP_BUSY_RETRIES);
-
-	work_done = max(cleaned, 1);
-out:
-
-	return work_done;
-}
-
-static void dpaa2_switch_fqdan_cb(struct dpaa2_io_notification_ctx *nctx)
-{
-	struct dpaa2_switch_fq *fq;
-
-	fq = container_of(nctx, struct dpaa2_switch_fq, nctx);
-
-	napi_schedule(&fq->napi);
-}
-
-static int dpaa2_switch_setup_dpio(struct ethsw_core *ethsw)
-{
-	struct dpsw_ctrl_if_queue_cfg queue_cfg;
-	struct dpaa2_io_notification_ctx *nctx;
-	int err, i, j;
-
-	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++) {
-		nctx = &ethsw->fq[i].nctx;
-
-		/* Register a new software context for the FQID.
-		 * By using NULL as the first parameter, we specify that we do
-		 * not care on which cpu are interrupts received for this queue
-		 */
-		nctx->is_cdan = 0;
-		nctx->id = ethsw->fq[i].fqid;
-		nctx->desired_cpu = DPAA2_IO_ANY_CPU;
-		nctx->cb = dpaa2_switch_fqdan_cb;
-		err = dpaa2_io_service_register(NULL, nctx, ethsw->dev);
-		if (err) {
-			err = -EPROBE_DEFER;
-			goto err_register;
-		}
-
-		queue_cfg.options = DPSW_CTRL_IF_QUEUE_OPT_DEST |
-				    DPSW_CTRL_IF_QUEUE_OPT_USER_CTX;
-		queue_cfg.dest_cfg.dest_type = DPSW_CTRL_IF_DEST_DPIO;
-		queue_cfg.dest_cfg.dest_id = nctx->dpio_id;
-		queue_cfg.dest_cfg.priority = 0;
-		queue_cfg.user_ctx = nctx->qman64;
-
-		err = dpsw_ctrl_if_set_queue(ethsw->mc_io, 0,
-					     ethsw->dpsw_handle,
-					     ethsw->fq[i].type,
-					     &queue_cfg);
-		if (err)
-			goto err_set_queue;
-	}
-
-	return 0;
-
-err_set_queue:
-	dpaa2_io_service_deregister(NULL, nctx, ethsw->dev);
-err_register:
-	for (j = 0; j < i; j++)
-		dpaa2_io_service_deregister(NULL, &ethsw->fq[j].nctx,
-					    ethsw->dev);
-
-	return err;
-}
-
-static void dpaa2_switch_free_dpio(struct ethsw_core *ethsw)
-{
-	int i;
-
-	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++)
-		dpaa2_io_service_deregister(NULL, &ethsw->fq[i].nctx,
-					    ethsw->dev);
-}
-
-static int dpaa2_switch_ctrl_if_setup(struct ethsw_core *ethsw)
-{
-	int err;
-
-	/* setup FQs for Rx and Tx Conf */
-	err = dpaa2_switch_setup_fqs(ethsw);
-	if (err)
-		return err;
-
-	/* setup the buffer pool needed on the Rx path */
-	err = dpaa2_switch_setup_dpbp(ethsw);
-	if (err)
-		return err;
-
-	err = dpaa2_switch_seed_bp(ethsw);
-	if (err)
-		goto err_free_dpbp;
-
-	err = dpaa2_switch_alloc_rings(ethsw);
-	if (err)
-		goto err_drain_dpbp;
-
-	err = dpaa2_switch_setup_dpio(ethsw);
-	if (err)
-		goto err_destroy_rings;
-
-	err = dpsw_ctrl_if_enable(ethsw->mc_io, 0, ethsw->dpsw_handle);
-	if (err) {
-		dev_err(ethsw->dev, "dpsw_ctrl_if_enable err %d\n", err);
-		goto err_deregister_dpio;
-	}
-
-	return 0;
-
-err_deregister_dpio:
-	dpaa2_switch_free_dpio(ethsw);
-err_destroy_rings:
-	dpaa2_switch_destroy_rings(ethsw);
-err_drain_dpbp:
-	dpaa2_switch_drain_bp(ethsw);
-err_free_dpbp:
-	dpaa2_switch_free_dpbp(ethsw);
-
-	return err;
-}
-
-static int dpaa2_switch_init(struct fsl_mc_device *sw_dev)
-{
-	struct device *dev = &sw_dev->dev;
-	struct ethsw_core *ethsw = dev_get_drvdata(dev);
-	struct dpsw_vlan_if_cfg vcfg = {0};
-	struct dpsw_tci_cfg tci_cfg = {0};
-	struct dpsw_stp_cfg stp_cfg;
-	int err;
-	u16 i;
-
-	ethsw->dev_id = sw_dev->obj_desc.id;
-
-	err = dpsw_open(ethsw->mc_io, 0, ethsw->dev_id, &ethsw->dpsw_handle);
-	if (err) {
-		dev_err(dev, "dpsw_open err %d\n", err);
-		return err;
-	}
-
-	err = dpsw_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle,
-				  &ethsw->sw_attr);
-	if (err) {
-		dev_err(dev, "dpsw_get_attributes err %d\n", err);
-		goto err_close;
-	}
-
-	err = dpsw_get_api_version(ethsw->mc_io, 0,
-				   &ethsw->major,
-				   &ethsw->minor);
-	if (err) {
-		dev_err(dev, "dpsw_get_api_version err %d\n", err);
-		goto err_close;
-	}
-
-	/* Minimum supported DPSW version check */
-	if (ethsw->major < DPSW_MIN_VER_MAJOR ||
-	    (ethsw->major == DPSW_MIN_VER_MAJOR &&
-	     ethsw->minor < DPSW_MIN_VER_MINOR)) {
-		dev_err(dev, "DPSW version %d:%d not supported. Use firmware 10.28.0 or greater.\n",
-			ethsw->major, ethsw->minor);
-		err = -EOPNOTSUPP;
-		goto err_close;
-	}
-
-	if (!dpaa2_switch_supports_cpu_traffic(ethsw)) {
-		err = -EOPNOTSUPP;
-		goto err_close;
-	}
-
-	dpaa2_switch_detect_features(ethsw);
-
-	err = dpsw_reset(ethsw->mc_io, 0, ethsw->dpsw_handle);
-	if (err) {
-		dev_err(dev, "dpsw_reset err %d\n", err);
-		goto err_close;
-	}
-
-	stp_cfg.vlan_id = DEFAULT_VLAN_ID;
-	stp_cfg.state = DPSW_STP_STATE_FORWARDING;
-
-	for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
-		err = dpsw_if_disable(ethsw->mc_io, 0, ethsw->dpsw_handle, i);
-		if (err) {
-			dev_err(dev, "dpsw_if_disable err %d\n", err);
-			goto err_close;
-		}
-
-		err = dpsw_if_set_stp(ethsw->mc_io, 0, ethsw->dpsw_handle, i,
-				      &stp_cfg);
-		if (err) {
-			dev_err(dev, "dpsw_if_set_stp err %d for port %d\n",
-				err, i);
-			goto err_close;
-		}
-
-		/* Switch starts with all ports configured to VLAN 1. Need to
-		 * remove this setting to allow configuration at bridge join
-		 */
-		vcfg.num_ifs = 1;
-		vcfg.if_id[0] = i;
-		err = dpsw_vlan_remove_if_untagged(ethsw->mc_io, 0, ethsw->dpsw_handle,
-						   DEFAULT_VLAN_ID, &vcfg);
-		if (err) {
-			dev_err(dev, "dpsw_vlan_remove_if_untagged err %d\n",
-				err);
-			goto err_close;
-		}
-
-		tci_cfg.vlan_id = 4095;
-		err = dpsw_if_set_tci(ethsw->mc_io, 0, ethsw->dpsw_handle, i, &tci_cfg);
-		if (err) {
-			dev_err(dev, "dpsw_if_set_tci err %d\n", err);
-			goto err_close;
-		}
-
-		err = dpsw_vlan_remove_if(ethsw->mc_io, 0, ethsw->dpsw_handle,
-					  DEFAULT_VLAN_ID, &vcfg);
-		if (err) {
-			dev_err(dev, "dpsw_vlan_remove_if err %d\n", err);
-			goto err_close;
-		}
-	}
-
-	err = dpsw_vlan_remove(ethsw->mc_io, 0, ethsw->dpsw_handle, DEFAULT_VLAN_ID);
-	if (err) {
-		dev_err(dev, "dpsw_vlan_remove err %d\n", err);
-		goto err_close;
-	}
-
-	ethsw->workqueue = alloc_ordered_workqueue("%s_%d_ordered",
-						   WQ_MEM_RECLAIM, "ethsw",
-						   ethsw->sw_attr.id);
-	if (!ethsw->workqueue) {
-		err = -ENOMEM;
-		goto err_close;
-	}
-
-	err = dpsw_fdb_remove(ethsw->mc_io, 0, ethsw->dpsw_handle, 0);
-	if (err)
-		goto err_destroy_ordered_workqueue;
-
-	err = dpaa2_switch_ctrl_if_setup(ethsw);
-	if (err)
-		goto err_destroy_ordered_workqueue;
-
-	return 0;
-
-err_destroy_ordered_workqueue:
-	destroy_workqueue(ethsw->workqueue);
-
-err_close:
-	dpsw_close(ethsw->mc_io, 0, ethsw->dpsw_handle);
-	return err;
-}
-
-static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
-{
-	struct switchdev_obj_port_vlan vlan = {
-		.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
-		.vid = DEFAULT_VLAN_ID,
-		.flags = BRIDGE_VLAN_INFO_UNTAGGED | BRIDGE_VLAN_INFO_PVID,
-	};
-	struct net_device *netdev = port_priv->netdev;
-	struct ethsw_core *ethsw = port_priv->ethsw_data;
-	struct dpsw_fdb_cfg fdb_cfg = {0};
-	struct dpaa2_switch_fdb *fdb;
-	struct dpsw_if_attr dpsw_if_attr;
-	u16 fdb_id;
-	int err;
-
-	/* Get the Tx queue for this specific port */
-	err = dpsw_if_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle,
-				     port_priv->idx, &dpsw_if_attr);
-	if (err) {
-		netdev_err(netdev, "dpsw_if_get_attributes err %d\n", err);
-		return err;
-	}
-	port_priv->tx_qdid = dpsw_if_attr.qdid;
-
-	/* Create a FDB table for this particular switch port */
-	fdb_cfg.num_fdb_entries = ethsw->sw_attr.max_fdb_entries / ethsw->sw_attr.num_ifs;
-	err = dpsw_fdb_add(ethsw->mc_io, 0, ethsw->dpsw_handle,
-			   &fdb_id, &fdb_cfg);
-	if (err) {
-		netdev_err(netdev, "dpsw_fdb_add err %d\n", err);
-		return err;
-	}
-
-	/* Find an unused dpaa2_switch_fdb structure and use it */
-	fdb = dpaa2_switch_fdb_get_unused(ethsw);
-	fdb->fdb_id = fdb_id;
-	fdb->in_use = true;
-	fdb->bridge_dev = NULL;
-	port_priv->fdb = fdb;
-
-	/* We need to add VLAN 1 as the PVID on this port until it is under a
-	 * bridge since the DPAA2 switch is not able to handle the traffic in a
-	 * VLAN unaware fashion
-	 */
-	err = dpaa2_switch_port_vlans_add(netdev, &vlan);
-	if (err)
-		return err;
-
-	/* Setup the egress flooding domains (broadcast, unknown unicast */
-	err = dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id);
-	if (err)
-		return err;
-
-	return err;
-}
-
-static void dpaa2_switch_takedown(struct fsl_mc_device *sw_dev)
-{
-	struct device *dev = &sw_dev->dev;
-	struct ethsw_core *ethsw = dev_get_drvdata(dev);
-	int err;
-
-	err = dpsw_close(ethsw->mc_io, 0, ethsw->dpsw_handle);
-	if (err)
-		dev_warn(dev, "dpsw_close err %d\n", err);
-}
-
-static void dpaa2_switch_ctrl_if_teardown(struct ethsw_core *ethsw)
-{
-	dpsw_ctrl_if_disable(ethsw->mc_io, 0, ethsw->dpsw_handle);
-	dpaa2_switch_free_dpio(ethsw);
-	dpaa2_switch_destroy_rings(ethsw);
-	dpaa2_switch_drain_bp(ethsw);
-	dpaa2_switch_free_dpbp(ethsw);
-}
-
-static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev)
-{
-	struct ethsw_port_priv *port_priv;
-	struct ethsw_core *ethsw;
-	struct device *dev;
-	int i;
-
-	dev = &sw_dev->dev;
-	ethsw = dev_get_drvdata(dev);
-
-	dpaa2_switch_ctrl_if_teardown(ethsw);
-
-	dpaa2_switch_teardown_irqs(sw_dev);
-
-	dpsw_disable(ethsw->mc_io, 0, ethsw->dpsw_handle);
-
-	for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
-		port_priv = ethsw->ports[i];
-		unregister_netdev(port_priv->netdev);
-		free_netdev(port_priv->netdev);
-	}
-
-	kfree(ethsw->fdbs);
-	kfree(ethsw->ports);
-
-	dpaa2_switch_takedown(sw_dev);
-
-	destroy_workqueue(ethsw->workqueue);
-
-	fsl_mc_portal_free(ethsw->mc_io);
-
-	kfree(ethsw);
-
-	dev_set_drvdata(dev, NULL);
-
-	return 0;
-}
-
-static int dpaa2_switch_probe_port(struct ethsw_core *ethsw,
-				   u16 port_idx)
-{
-	struct ethsw_port_priv *port_priv;
-	struct device *dev = ethsw->dev;
-	struct net_device *port_netdev;
-	int err;
-
-	port_netdev = alloc_etherdev(sizeof(struct ethsw_port_priv));
-	if (!port_netdev) {
-		dev_err(dev, "alloc_etherdev error\n");
-		return -ENOMEM;
-	}
-
-	port_priv = netdev_priv(port_netdev);
-	port_priv->netdev = port_netdev;
-	port_priv->ethsw_data = ethsw;
-
-	port_priv->idx = port_idx;
-	port_priv->stp_state = BR_STATE_FORWARDING;
-
-	SET_NETDEV_DEV(port_netdev, dev);
-	port_netdev->netdev_ops = &dpaa2_switch_port_ops;
-	port_netdev->ethtool_ops = &dpaa2_switch_port_ethtool_ops;
-
-	port_netdev->needed_headroom = DPAA2_SWITCH_NEEDED_HEADROOM;
-
-	/* Set MTU limits */
-	port_netdev->min_mtu = ETH_MIN_MTU;
-	port_netdev->max_mtu = ETHSW_MAX_FRAME_LENGTH;
-
-	/* Populate the private port structure so that later calls to
-	 * dpaa2_switch_port_init() can use it.
-	 */
-	ethsw->ports[port_idx] = port_priv;
-
-	/* The DPAA2 switch's ingress path depends on the VLAN table,
-	 * thus we are not able to disable VLAN filtering.
-	 */
-	port_netdev->features = NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER;
-
-	err = dpaa2_switch_port_init(port_priv, port_idx);
-	if (err)
-		goto err_port_probe;
-
-	err = dpaa2_switch_port_set_mac_addr(port_priv);
-	if (err)
-		goto err_port_probe;
-
-	return 0;
-
-err_port_probe:
-	free_netdev(port_netdev);
-	ethsw->ports[port_idx] = NULL;
-
-	return err;
-}
-
-static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
-{
-	struct device *dev = &sw_dev->dev;
-	struct ethsw_core *ethsw;
-	int i, err;
-
-	/* Allocate switch core*/
-	ethsw = kzalloc(sizeof(*ethsw), GFP_KERNEL);
-
-	if (!ethsw)
-		return -ENOMEM;
-
-	ethsw->dev = dev;
-	ethsw->iommu_domain = iommu_get_domain_for_dev(dev);
-	dev_set_drvdata(dev, ethsw);
-
-	err = fsl_mc_portal_allocate(sw_dev, FSL_MC_IO_ATOMIC_CONTEXT_PORTAL,
-				     &ethsw->mc_io);
-	if (err) {
-		if (err == -ENXIO)
-			err = -EPROBE_DEFER;
-		else
-			dev_err(dev, "fsl_mc_portal_allocate err %d\n", err);
-		goto err_free_drvdata;
-	}
-
-	err = dpaa2_switch_init(sw_dev);
-	if (err)
-		goto err_free_cmdport;
-
-	ethsw->ports = kcalloc(ethsw->sw_attr.num_ifs, sizeof(*ethsw->ports),
-			       GFP_KERNEL);
-	if (!(ethsw->ports)) {
-		err = -ENOMEM;
-		goto err_takedown;
-	}
-
-	ethsw->fdbs = kcalloc(ethsw->sw_attr.num_ifs, sizeof(*ethsw->fdbs),
-			      GFP_KERNEL);
-	if (!ethsw->fdbs) {
-		err = -ENOMEM;
-		goto err_free_ports;
-	}
-
-	for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
-		err = dpaa2_switch_probe_port(ethsw, i);
-		if (err)
-			goto err_free_netdev;
-	}
-
-	/* Add a NAPI instance for each of the Rx queues. The first port's
-	 * net_device will be associated with the instances since we do not have
-	 * different queues for each switch ports.
-	 */
-	for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++)
-		netif_napi_add(ethsw->ports[0]->netdev,
-			       &ethsw->fq[i].napi, dpaa2_switch_poll,
-			       NAPI_POLL_WEIGHT);
-
-	err = dpsw_enable(ethsw->mc_io, 0, ethsw->dpsw_handle);
-	if (err) {
-		dev_err(ethsw->dev, "dpsw_enable err %d\n", err);
-		goto err_free_netdev;
-	}
-
-	/* Setup IRQs */
-	err = dpaa2_switch_setup_irqs(sw_dev);
-	if (err)
-		goto err_stop;
-
-	/* Register the netdev only when the entire setup is done and the
-	 * switch port interfaces are ready to receive traffic
-	 */
-	for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
-		err = register_netdev(ethsw->ports[i]->netdev);
-		if (err < 0) {
-			dev_err(dev, "register_netdev error %d\n", err);
-			goto err_unregister_ports;
-		}
-	}
-
-	return 0;
-
-err_unregister_ports:
-	for (i--; i >= 0; i--)
-		unregister_netdev(ethsw->ports[i]->netdev);
-	dpaa2_switch_teardown_irqs(sw_dev);
-err_stop:
-	dpsw_disable(ethsw->mc_io, 0, ethsw->dpsw_handle);
-err_free_netdev:
-	for (i--; i >= 0; i--)
-		free_netdev(ethsw->ports[i]->netdev);
-	kfree(ethsw->fdbs);
-err_free_ports:
-	kfree(ethsw->ports);
-
-err_takedown:
-	dpaa2_switch_takedown(sw_dev);
-
-err_free_cmdport:
-	fsl_mc_portal_free(ethsw->mc_io);
-
-err_free_drvdata:
-	kfree(ethsw);
-	dev_set_drvdata(dev, NULL);
-
-	return err;
-}
-
-static const struct fsl_mc_device_id dpaa2_switch_match_id_table[] = {
-	{
-		.vendor = FSL_MC_VENDOR_FREESCALE,
-		.obj_type = "dpsw",
-	},
-	{ .vendor = 0x0 }
-};
-MODULE_DEVICE_TABLE(fslmc, dpaa2_switch_match_id_table);
-
-static struct fsl_mc_driver dpaa2_switch_drv = {
-	.driver = {
-		.name = KBUILD_MODNAME,
-		.owner = THIS_MODULE,
-	},
-	.probe = dpaa2_switch_probe,
-	.remove = dpaa2_switch_remove,
-	.match_id_table = dpaa2_switch_match_id_table
-};
-
-static struct notifier_block dpaa2_switch_port_nb __read_mostly = {
-	.notifier_call = dpaa2_switch_port_netdevice_event,
-};
-
-static struct notifier_block dpaa2_switch_port_switchdev_nb = {
-	.notifier_call = dpaa2_switch_port_event,
-};
-
-static struct notifier_block dpaa2_switch_port_switchdev_blocking_nb = {
-	.notifier_call = dpaa2_switch_port_blocking_event,
-};
-
-static int dpaa2_switch_register_notifiers(void)
-{
-	int err;
-
-	err = register_netdevice_notifier(&dpaa2_switch_port_nb);
-	if (err) {
-		pr_err("dpaa2-switch: failed to register net_device notifier (%d)\n", err);
-		return err;
-	}
-
-	err = register_switchdev_notifier(&dpaa2_switch_port_switchdev_nb);
-	if (err) {
-		pr_err("dpaa2-switch: failed to register switchdev notifier (%d)\n", err);
-		goto err_switchdev_nb;
-	}
-
-	err = register_switchdev_blocking_notifier(&dpaa2_switch_port_switchdev_blocking_nb);
-	if (err) {
-		pr_err("dpaa2-switch: failed to register switchdev blocking notifier (%d)\n", err);
-		goto err_switchdev_blocking_nb;
-	}
-
-	return 0;
-
-err_switchdev_blocking_nb:
-	unregister_switchdev_notifier(&dpaa2_switch_port_switchdev_nb);
-err_switchdev_nb:
-	unregister_netdevice_notifier(&dpaa2_switch_port_nb);
-
-	return err;
-}
-
-static void dpaa2_switch_unregister_notifiers(void)
-{
-	int err;
-
-	err = unregister_switchdev_blocking_notifier(&dpaa2_switch_port_switchdev_blocking_nb);
-	if (err)
-		pr_err("dpaa2-switch: failed to unregister switchdev blocking notifier (%d)\n",
-		       err);
-
-	err = unregister_switchdev_notifier(&dpaa2_switch_port_switchdev_nb);
-	if (err)
-		pr_err("dpaa2-switch: failed to unregister switchdev notifier (%d)\n", err);
-
-	err = unregister_netdevice_notifier(&dpaa2_switch_port_nb);
-	if (err)
-		pr_err("dpaa2-switch: failed to unregister net_device notifier (%d)\n", err);
-}
-
-static int __init dpaa2_switch_driver_init(void)
-{
-	int err;
-
-	err = fsl_mc_driver_register(&dpaa2_switch_drv);
-	if (err)
-		return err;
-
-	err = dpaa2_switch_register_notifiers();
-	if (err) {
-		fsl_mc_driver_unregister(&dpaa2_switch_drv);
-		return err;
-	}
-
-	return 0;
-}
-
-static void __exit dpaa2_switch_driver_exit(void)
-{
-	dpaa2_switch_unregister_notifiers();
-	fsl_mc_driver_unregister(&dpaa2_switch_drv);
-}
-
-module_init(dpaa2_switch_driver_init);
-module_exit(dpaa2_switch_driver_exit);
-
-MODULE_LICENSE("GPL v2");
-MODULE_DESCRIPTION("DPAA2 Ethernet Switch Driver");
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.h b/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
deleted file mode 100644
index 933563064015..000000000000
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
+++ /dev/null
@@ -1,178 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * DPAA2 Ethernet Switch declarations
- *
- * Copyright 2014-2016 Freescale Semiconductor Inc.
- * Copyright 2017-2021 NXP
- *
- */
-
-#ifndef __ETHSW_H
-#define __ETHSW_H
-
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/rtnetlink.h>
-#include <linux/if_vlan.h>
-#include <uapi/linux/if_bridge.h>
-#include <net/switchdev.h>
-#include <linux/if_bridge.h>
-#include <linux/fsl/mc.h>
-#include <soc/fsl/dpaa2-io.h>
-
-#include "dpsw.h"
-
-/* Number of IRQs supported */
-#define DPSW_IRQ_NUM	2
-
-/* Port is member of VLAN */
-#define ETHSW_VLAN_MEMBER	1
-/* VLAN to be treated as untagged on egress */
-#define ETHSW_VLAN_UNTAGGED	2
-/* Untagged frames will be assigned to this VLAN */
-#define ETHSW_VLAN_PVID		4
-/* VLAN configured on the switch */
-#define ETHSW_VLAN_GLOBAL	8
-
-/* Maximum Frame Length supported by HW (currently 10k) */
-#define DPAA2_MFL		(10 * 1024)
-#define ETHSW_MAX_FRAME_LENGTH	(DPAA2_MFL - VLAN_ETH_HLEN - ETH_FCS_LEN)
-#define ETHSW_L2_MAX_FRM(mtu)	((mtu) + VLAN_ETH_HLEN + ETH_FCS_LEN)
-
-#define ETHSW_FEATURE_MAC_ADDR	BIT(0)
-
-/* Number of receive queues (one RX and one TX_CONF) */
-#define DPAA2_SWITCH_RX_NUM_FQS	2
-
-/* Hardware requires alignment for ingress/egress buffer addresses */
-#define DPAA2_SWITCH_RX_BUF_RAW_SIZE	PAGE_SIZE
-#define DPAA2_SWITCH_RX_BUF_TAILROOM \
-	SKB_DATA_ALIGN(sizeof(struct skb_shared_info))
-#define DPAA2_SWITCH_RX_BUF_SIZE \
-	(DPAA2_SWITCH_RX_BUF_RAW_SIZE - DPAA2_SWITCH_RX_BUF_TAILROOM)
-
-#define DPAA2_SWITCH_STORE_SIZE 16
-
-/* Buffer management */
-#define BUFS_PER_CMD			7
-#define DPAA2_ETHSW_NUM_BUFS		(1024 * BUFS_PER_CMD)
-#define DPAA2_ETHSW_REFILL_THRESH	(DPAA2_ETHSW_NUM_BUFS * 5 / 6)
-
-/* Number of times to retry DPIO portal operations while waiting
- * for portal to finish executing current command and become
- * available. We want to avoid being stuck in a while loop in case
- * hardware becomes unresponsive, but not give up too easily if
- * the portal really is busy for valid reasons
- */
-#define DPAA2_SWITCH_SWP_BUSY_RETRIES		1000
-
-/* Hardware annotation buffer size */
-#define DPAA2_SWITCH_HWA_SIZE			64
-/* Software annotation buffer size */
-#define DPAA2_SWITCH_SWA_SIZE			64
-
-#define DPAA2_SWITCH_TX_BUF_ALIGN		64
-
-#define DPAA2_SWITCH_TX_DATA_OFFSET \
-	(DPAA2_SWITCH_HWA_SIZE + DPAA2_SWITCH_SWA_SIZE)
-
-#define DPAA2_SWITCH_NEEDED_HEADROOM \
-	(DPAA2_SWITCH_TX_DATA_OFFSET + DPAA2_SWITCH_TX_BUF_ALIGN)
-
-extern const struct ethtool_ops dpaa2_switch_port_ethtool_ops;
-
-struct ethsw_core;
-
-struct dpaa2_switch_fq {
-	struct ethsw_core *ethsw;
-	enum dpsw_queue_type type;
-	struct dpaa2_io_store *store;
-	struct dpaa2_io_notification_ctx nctx;
-	struct napi_struct napi;
-	u32 fqid;
-};
-
-struct dpaa2_switch_fdb {
-	struct net_device	*bridge_dev;
-	u16			fdb_id;
-	bool			in_use;
-};
-
-/* Per port private data */
-struct ethsw_port_priv {
-	struct net_device	*netdev;
-	u16			idx;
-	struct ethsw_core	*ethsw_data;
-	u8			link_state;
-	u8			stp_state;
-	bool			flood;
-
-	u8			vlans[VLAN_VID_MASK + 1];
-	u16			pvid;
-	u16			tx_qdid;
-
-	struct dpaa2_switch_fdb	*fdb;
-};
-
-/* Switch data */
-struct ethsw_core {
-	struct device			*dev;
-	struct fsl_mc_io		*mc_io;
-	u16				dpsw_handle;
-	struct dpsw_attr		sw_attr;
-	u16				major, minor;
-	unsigned long			features;
-	int				dev_id;
-	struct ethsw_port_priv		**ports;
-	struct iommu_domain		*iommu_domain;
-
-	u8				vlans[VLAN_VID_MASK + 1];
-
-	struct workqueue_struct		*workqueue;
-
-	struct dpaa2_switch_fq		fq[DPAA2_SWITCH_RX_NUM_FQS];
-	struct fsl_mc_device		*dpbp_dev;
-	int				buf_count;
-	u16				bpid;
-	int				napi_users;
-
-	struct dpaa2_switch_fdb		*fdbs;
-};
-
-static inline bool dpaa2_switch_supports_cpu_traffic(struct ethsw_core *ethsw)
-{
-	if (ethsw->sw_attr.options & DPSW_OPT_CTRL_IF_DIS) {
-		dev_err(ethsw->dev, "Control Interface is disabled, cannot probe\n");
-		return false;
-	}
-
-	if (ethsw->sw_attr.flooding_cfg != DPSW_FLOODING_PER_FDB) {
-		dev_err(ethsw->dev, "Flooding domain is not per FDB, cannot probe\n");
-		return false;
-	}
-
-	if (ethsw->sw_attr.broadcast_cfg != DPSW_BROADCAST_PER_FDB) {
-		dev_err(ethsw->dev, "Broadcast domain is not per FDB, cannot probe\n");
-		return false;
-	}
-
-	if (ethsw->sw_attr.max_fdbs < ethsw->sw_attr.num_ifs) {
-		dev_err(ethsw->dev, "The number of FDBs is lower than the number of ports, cannot probe\n");
-		return false;
-	}
-
-	return true;
-}
-
-bool dpaa2_switch_port_dev_check(const struct net_device *netdev);
-
-int dpaa2_switch_port_vlans_add(struct net_device *netdev,
-				const struct switchdev_obj_port_vlan *vlan);
-
-int dpaa2_switch_port_vlans_del(struct net_device *netdev,
-				const struct switchdev_obj_port_vlan *vlan);
-
-typedef int dpaa2_switch_fdb_cb_t(struct ethsw_port_priv *port_priv,
-				  struct fdb_dump_entry *fdb_entry,
-				  void *data);
-#endif	/* __ETHSW_H */
-- 
cgit v1.2.3


From 04ea63e34a2ee85cfd38578b3fc97b2d4c9dd573 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Wed, 10 Mar 2021 14:22:46 +0800
Subject: selftests/bpf: Fix warning comparing pointer to 0

Fix the following coccicheck warning:

./tools/testing/selftests/bpf/progs/test_global_func10.c:17:12-13:
WARNING comparing pointer to 0.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1615357366-97612-1-git-send-email-jiapeng.chong@linux.alibaba.com
---
 tools/testing/selftests/bpf/progs/test_global_func10.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/progs/test_global_func10.c b/tools/testing/selftests/bpf/progs/test_global_func10.c
index 61c2ae92ce41..97b7031d0e22 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func10.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func10.c
@@ -14,7 +14,7 @@ struct Big {
 
 __noinline int foo(const struct Big *big)
 {
-	if (big == 0)
+	if (!big)
 		return 0;
 
 	return bpf_get_prandom_u32() < big->y;
-- 
cgit v1.2.3


From a9c80b03e586fd3819089fbd33c38fb65ad5e00c Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Wed, 10 Mar 2021 15:18:34 +0800
Subject: bpf: Fix warning comparing pointer to 0

Fix the following coccicheck warning:

./tools/testing/selftests/bpf/progs/fentry_test.c:67:12-13: WARNING
comparing pointer to 0.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1615360714-30381-1-git-send-email-jiapeng.chong@linux.alibaba.com
---
 tools/testing/selftests/bpf/progs/fentry_test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/progs/fentry_test.c b/tools/testing/selftests/bpf/progs/fentry_test.c
index 5f645fdaba6f..52a550d281d9 100644
--- a/tools/testing/selftests/bpf/progs/fentry_test.c
+++ b/tools/testing/selftests/bpf/progs/fentry_test.c
@@ -64,7 +64,7 @@ __u64 test7_result = 0;
 SEC("fentry/bpf_fentry_test7")
 int BPF_PROG(test7, struct bpf_fentry_test_t *arg)
 {
-	if (arg == 0)
+	if (!arg)
 		test7_result = 1;
 	return 0;
 }
-- 
cgit v1.2.3


From 2882c48bf8f2fb9ec31cbb68be6b979da48f880d Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Wed, 10 Mar 2021 09:09:28 +0100
Subject: libbpf: xsk: Remove linux/compiler.h header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In commit 291471dd1559 ("libbpf, xsk: Add libbpf_smp_store_release
libbpf_smp_load_acquire") linux/compiler.h was added as a dependency
to xsk.h, which is the user-facing API. This makes it harder for
userspace application to consume the library. Here the header
inclusion is removed, and instead {READ,WRITE}_ONCE() is added
explicitly.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210310080929.641212-2-bjorn.topel@gmail.com
---
 tools/lib/bpf/libbpf_util.h | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/tools/lib/bpf/libbpf_util.h b/tools/lib/bpf/libbpf_util.h
index cfbcfc063c81..954da9b34a34 100644
--- a/tools/lib/bpf/libbpf_util.h
+++ b/tools/lib/bpf/libbpf_util.h
@@ -5,25 +5,30 @@
 #define __LIBBPF_LIBBPF_UTIL_H
 
 #include <stdbool.h>
-#include <linux/compiler.h>
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-/* Use these barrier functions instead of smp_[rw]mb() when they are
- * used in a libbpf header file. That way they can be built into the
- * application that uses libbpf.
+/* Load-Acquire Store-Release barriers used by the XDP socket
+ * library. The following macros should *NOT* be considered part of
+ * the xsk.h API, and is subject to change anytime.
+ *
+ * LIBRARY INTERNAL
  */
+
+#define __XSK_READ_ONCE(x) (*(volatile typeof(x) *)&x)
+#define __XSK_WRITE_ONCE(x, v) (*(volatile typeof(x) *)&x) = (v)
+
 #if defined(__i386__) || defined(__x86_64__)
 # define libbpf_smp_store_release(p, v)					\
 	do {								\
 		asm volatile("" : : : "memory");			\
-		WRITE_ONCE(*p, v);					\
+		__XSK_WRITE_ONCE(*p, v);				\
 	} while (0)
 # define libbpf_smp_load_acquire(p)					\
 	({								\
-		typeof(*p) ___p1 = READ_ONCE(*p);			\
+		typeof(*p) ___p1 = __XSK_READ_ONCE(*p);			\
 		asm volatile("" : : : "memory");			\
 		___p1;							\
 	})
@@ -41,11 +46,11 @@ extern "C" {
 # define libbpf_smp_store_release(p, v)					\
 	do {								\
 		asm volatile ("fence rw,w" : : : "memory");		\
-		WRITE_ONCE(*p, v);					\
+		__XSK_WRITE_ONCE(*p, v);				\
 	} while (0)
 # define libbpf_smp_load_acquire(p)					\
 	({								\
-		typeof(*p) ___p1 = READ_ONCE(*p);			\
+		typeof(*p) ___p1 = __XSK_READ_ONCE(*p);			\
 		asm volatile ("fence r,rw" : : : "memory");		\
 		___p1;							\
 	})
@@ -55,19 +60,21 @@ extern "C" {
 #define libbpf_smp_store_release(p, v)					\
 	do {								\
 		__sync_synchronize();					\
-		WRITE_ONCE(*p, v);					\
+		__XSK_WRITE_ONCE(*p, v);				\
 	} while (0)
 #endif
 
 #ifndef libbpf_smp_load_acquire
 #define libbpf_smp_load_acquire(p)					\
 	({								\
-		typeof(*p) ___p1 = READ_ONCE(*p);			\
+		typeof(*p) ___p1 = __XSK_READ_ONCE(*p);			\
 		__sync_synchronize();					\
 		___p1;							\
 	})
 #endif
 
+/* LIBRARY INTERNAL -- END */
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
-- 
cgit v1.2.3


From 7e8bbe24cb8b20e2719ec58bf6937ab2e484add2 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Wed, 10 Mar 2021 09:09:29 +0100
Subject: libbpf: xsk: Move barriers from libbpf_util.h to xsk.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The only user of libbpf_util.h is xsk.h. Move the barriers to xsk.h,
and remove libbpf_util.h. The barriers are used as an implementation
detail, and should not be considered part of the stable API.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210310080929.641212-3-bjorn.topel@gmail.com
---
 tools/lib/bpf/Makefile      |  1 -
 tools/lib/bpf/libbpf_util.h | 82 ---------------------------------------------
 tools/lib/bpf/xsk.h         | 70 ++++++++++++++++++++++++++++++++++++--
 3 files changed, 68 insertions(+), 85 deletions(-)
 delete mode 100644 tools/lib/bpf/libbpf_util.h

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 8170f88e8ea6..f45bacbaa3d5 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -228,7 +228,6 @@ install_headers: $(BPF_HELPER_DEFS)
 		$(call do_install,bpf.h,$(prefix)/include/bpf,644); \
 		$(call do_install,libbpf.h,$(prefix)/include/bpf,644); \
 		$(call do_install,btf.h,$(prefix)/include/bpf,644); \
-		$(call do_install,libbpf_util.h,$(prefix)/include/bpf,644); \
 		$(call do_install,libbpf_common.h,$(prefix)/include/bpf,644); \
 		$(call do_install,xsk.h,$(prefix)/include/bpf,644); \
 		$(call do_install,bpf_helpers.h,$(prefix)/include/bpf,644); \
diff --git a/tools/lib/bpf/libbpf_util.h b/tools/lib/bpf/libbpf_util.h
deleted file mode 100644
index 954da9b34a34..000000000000
--- a/tools/lib/bpf/libbpf_util.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-/* Copyright (c) 2019 Facebook */
-
-#ifndef __LIBBPF_LIBBPF_UTIL_H
-#define __LIBBPF_LIBBPF_UTIL_H
-
-#include <stdbool.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Load-Acquire Store-Release barriers used by the XDP socket
- * library. The following macros should *NOT* be considered part of
- * the xsk.h API, and is subject to change anytime.
- *
- * LIBRARY INTERNAL
- */
-
-#define __XSK_READ_ONCE(x) (*(volatile typeof(x) *)&x)
-#define __XSK_WRITE_ONCE(x, v) (*(volatile typeof(x) *)&x) = (v)
-
-#if defined(__i386__) || defined(__x86_64__)
-# define libbpf_smp_store_release(p, v)					\
-	do {								\
-		asm volatile("" : : : "memory");			\
-		__XSK_WRITE_ONCE(*p, v);				\
-	} while (0)
-# define libbpf_smp_load_acquire(p)					\
-	({								\
-		typeof(*p) ___p1 = __XSK_READ_ONCE(*p);			\
-		asm volatile("" : : : "memory");			\
-		___p1;							\
-	})
-#elif defined(__aarch64__)
-# define libbpf_smp_store_release(p, v)					\
-		asm volatile ("stlr %w1, %0" : "=Q" (*p) : "r" (v) : "memory")
-# define libbpf_smp_load_acquire(p)					\
-	({								\
-		typeof(*p) ___p1;					\
-		asm volatile ("ldar %w0, %1"				\
-			      : "=r" (___p1) : "Q" (*p) : "memory");	\
-		___p1;							\
-	})
-#elif defined(__riscv)
-# define libbpf_smp_store_release(p, v)					\
-	do {								\
-		asm volatile ("fence rw,w" : : : "memory");		\
-		__XSK_WRITE_ONCE(*p, v);				\
-	} while (0)
-# define libbpf_smp_load_acquire(p)					\
-	({								\
-		typeof(*p) ___p1 = __XSK_READ_ONCE(*p);			\
-		asm volatile ("fence r,rw" : : : "memory");		\
-		___p1;							\
-	})
-#endif
-
-#ifndef libbpf_smp_store_release
-#define libbpf_smp_store_release(p, v)					\
-	do {								\
-		__sync_synchronize();					\
-		__XSK_WRITE_ONCE(*p, v);				\
-	} while (0)
-#endif
-
-#ifndef libbpf_smp_load_acquire
-#define libbpf_smp_load_acquire(p)					\
-	({								\
-		typeof(*p) ___p1 = __XSK_READ_ONCE(*p);			\
-		__sync_synchronize();					\
-		___p1;							\
-	})
-#endif
-
-/* LIBRARY INTERNAL -- END */
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index a9fdea87b5cd..01c12dca9c10 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -3,7 +3,8 @@
 /*
  * AF_XDP user-space access library.
  *
- * Copyright(c) 2018 - 2019 Intel Corporation.
+ * Copyright (c) 2018 - 2019 Intel Corporation.
+ * Copyright (c) 2019 Facebook
  *
  * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
  */
@@ -13,15 +14,80 @@
 
 #include <stdio.h>
 #include <stdint.h>
+#include <stdbool.h>
 #include <linux/if_xdp.h>
 
 #include "libbpf.h"
-#include "libbpf_util.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+/* Load-Acquire Store-Release barriers used by the XDP socket
+ * library. The following macros should *NOT* be considered part of
+ * the xsk.h API, and is subject to change anytime.
+ *
+ * LIBRARY INTERNAL
+ */
+
+#define __XSK_READ_ONCE(x) (*(volatile typeof(x) *)&x)
+#define __XSK_WRITE_ONCE(x, v) (*(volatile typeof(x) *)&x) = (v)
+
+#if defined(__i386__) || defined(__x86_64__)
+# define libbpf_smp_store_release(p, v)					\
+	do {								\
+		asm volatile("" : : : "memory");			\
+		__XSK_WRITE_ONCE(*p, v);				\
+	} while (0)
+# define libbpf_smp_load_acquire(p)					\
+	({								\
+		typeof(*p) ___p1 = __XSK_READ_ONCE(*p);			\
+		asm volatile("" : : : "memory");			\
+		___p1;							\
+	})
+#elif defined(__aarch64__)
+# define libbpf_smp_store_release(p, v)					\
+		asm volatile ("stlr %w1, %0" : "=Q" (*p) : "r" (v) : "memory")
+# define libbpf_smp_load_acquire(p)					\
+	({								\
+		typeof(*p) ___p1;					\
+		asm volatile ("ldar %w0, %1"				\
+			      : "=r" (___p1) : "Q" (*p) : "memory");	\
+		___p1;							\
+	})
+#elif defined(__riscv)
+# define libbpf_smp_store_release(p, v)					\
+	do {								\
+		asm volatile ("fence rw,w" : : : "memory");		\
+		__XSK_WRITE_ONCE(*p, v);				\
+	} while (0)
+# define libbpf_smp_load_acquire(p)					\
+	({								\
+		typeof(*p) ___p1 = __XSK_READ_ONCE(*p);			\
+		asm volatile ("fence r,rw" : : : "memory");		\
+		___p1;							\
+	})
+#endif
+
+#ifndef libbpf_smp_store_release
+#define libbpf_smp_store_release(p, v)					\
+	do {								\
+		__sync_synchronize();					\
+		__XSK_WRITE_ONCE(*p, v);				\
+	} while (0)
+#endif
+
+#ifndef libbpf_smp_load_acquire
+#define libbpf_smp_load_acquire(p)					\
+	({								\
+		typeof(*p) ___p1 = __XSK_READ_ONCE(*p);			\
+		__sync_synchronize();					\
+		___p1;							\
+	})
+#endif
+
+/* LIBRARY INTERNAL -- END */
+
 /* Do not access these members directly. Use the functions below. */
 #define DEFINE_XSK_RING(name) \
 struct name { \
-- 
cgit v1.2.3


From 2b5720f26908571ff98bb279a6cb15f3b2660bb0 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Wed, 10 Mar 2021 11:26:26 -0800
Subject: ionic: move rx_page_alloc and free

Move ionic_rx_page_alloc() and ionic_rx_page_free() to earlier
in the file to make the next patch easier to review.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 140 +++++++++++------------
 1 file changed, 70 insertions(+), 70 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 162a1ff1e9d2..70b997f302ac 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -66,6 +66,76 @@ static struct sk_buff *ionic_rx_skb_alloc(struct ionic_queue *q,
 	return skb;
 }
 
+static int ionic_rx_page_alloc(struct ionic_queue *q,
+			       struct ionic_page_info *page_info)
+{
+	struct ionic_lif *lif = q->lif;
+	struct ionic_rx_stats *stats;
+	struct net_device *netdev;
+	struct device *dev;
+
+	netdev = lif->netdev;
+	dev = lif->ionic->dev;
+	stats = q_to_rx_stats(q);
+
+	if (unlikely(!page_info)) {
+		net_err_ratelimited("%s: %s invalid page_info in alloc\n",
+				    netdev->name, q->name);
+		return -EINVAL;
+	}
+
+	page_info->page = dev_alloc_page();
+	if (unlikely(!page_info->page)) {
+		net_err_ratelimited("%s: %s page alloc failed\n",
+				    netdev->name, q->name);
+		stats->alloc_err++;
+		return -ENOMEM;
+	}
+
+	page_info->dma_addr = dma_map_page(dev, page_info->page, 0, PAGE_SIZE,
+					   DMA_FROM_DEVICE);
+	if (unlikely(dma_mapping_error(dev, page_info->dma_addr))) {
+		put_page(page_info->page);
+		page_info->dma_addr = 0;
+		page_info->page = NULL;
+		net_err_ratelimited("%s: %s dma map failed\n",
+				    netdev->name, q->name);
+		stats->dma_map_err++;
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static void ionic_rx_page_free(struct ionic_queue *q,
+			       struct ionic_page_info *page_info)
+{
+	struct ionic_lif *lif = q->lif;
+	struct net_device *netdev;
+	struct device *dev;
+
+	netdev = lif->netdev;
+	dev = lif->ionic->dev;
+
+	if (unlikely(!page_info)) {
+		net_err_ratelimited("%s: %s invalid page_info in free\n",
+				    netdev->name, q->name);
+		return;
+	}
+
+	if (unlikely(!page_info->page)) {
+		net_err_ratelimited("%s: %s invalid page in free\n",
+				    netdev->name, q->name);
+		return;
+	}
+
+	dma_unmap_page(dev, page_info->dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
+
+	put_page(page_info->page);
+	page_info->dma_addr = 0;
+	page_info->page = NULL;
+}
+
 static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
 				      struct ionic_desc_info *desc_info,
 				      struct ionic_cq_info *cq_info)
@@ -253,76 +323,6 @@ static bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
 	return true;
 }
 
-static int ionic_rx_page_alloc(struct ionic_queue *q,
-			       struct ionic_page_info *page_info)
-{
-	struct ionic_lif *lif = q->lif;
-	struct ionic_rx_stats *stats;
-	struct net_device *netdev;
-	struct device *dev;
-
-	netdev = lif->netdev;
-	dev = lif->ionic->dev;
-	stats = q_to_rx_stats(q);
-
-	if (unlikely(!page_info)) {
-		net_err_ratelimited("%s: %s invalid page_info in alloc\n",
-				    netdev->name, q->name);
-		return -EINVAL;
-	}
-
-	page_info->page = dev_alloc_page();
-	if (unlikely(!page_info->page)) {
-		net_err_ratelimited("%s: %s page alloc failed\n",
-				    netdev->name, q->name);
-		stats->alloc_err++;
-		return -ENOMEM;
-	}
-
-	page_info->dma_addr = dma_map_page(dev, page_info->page, 0, PAGE_SIZE,
-					   DMA_FROM_DEVICE);
-	if (unlikely(dma_mapping_error(dev, page_info->dma_addr))) {
-		put_page(page_info->page);
-		page_info->dma_addr = 0;
-		page_info->page = NULL;
-		net_err_ratelimited("%s: %s dma map failed\n",
-				    netdev->name, q->name);
-		stats->dma_map_err++;
-		return -EIO;
-	}
-
-	return 0;
-}
-
-static void ionic_rx_page_free(struct ionic_queue *q,
-			       struct ionic_page_info *page_info)
-{
-	struct ionic_lif *lif = q->lif;
-	struct net_device *netdev;
-	struct device *dev;
-
-	netdev = lif->netdev;
-	dev = lif->ionic->dev;
-
-	if (unlikely(!page_info)) {
-		net_err_ratelimited("%s: %s invalid page_info in free\n",
-				    netdev->name, q->name);
-		return;
-	}
-
-	if (unlikely(!page_info->page)) {
-		net_err_ratelimited("%s: %s invalid page in free\n",
-				    netdev->name, q->name);
-		return;
-	}
-
-	dma_unmap_page(dev, page_info->dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
-
-	put_page(page_info->page);
-	page_info->dma_addr = 0;
-	page_info->page = NULL;
-}
-
 void ionic_rx_fill(struct ionic_queue *q)
 {
 	struct net_device *netdev = q->lif->netdev;
-- 
cgit v1.2.3


From 4b0a7539a3728f0fd7a11087d64371e8c28b4723 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Wed, 10 Mar 2021 11:26:27 -0800
Subject: ionic: implement Rx page reuse

Rework the Rx buffer allocations to use pages twice when using
normal MTU in order to cut down on buffer allocation and mapping
overhead.

Instead of tracking individual pages, in which we may have
wasted half the space when using standard 1500 MTU, we track
buffers which use half pages, so we can use the second half
of the page rather than allocate and map a new page once the
first buffer has been used.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_dev.h  |  12 +-
 drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 215 ++++++++++++++---------
 2 files changed, 138 insertions(+), 89 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index 690768ff0143..0f877c86eba6 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -170,9 +170,15 @@ typedef void (*ionic_desc_cb)(struct ionic_queue *q,
 			      struct ionic_desc_info *desc_info,
 			      struct ionic_cq_info *cq_info, void *cb_arg);
 
-struct ionic_page_info {
+#define IONIC_PAGE_SIZE				PAGE_SIZE
+#define IONIC_PAGE_SPLIT_SZ			(PAGE_SIZE / 2)
+#define IONIC_PAGE_GFP_MASK			(GFP_ATOMIC | __GFP_NOWARN |\
+						 __GFP_COMP | __GFP_MEMALLOC)
+
+struct ionic_buf_info {
 	struct page *page;
 	dma_addr_t dma_addr;
+	u32 page_offset;
 };
 
 struct ionic_desc_info {
@@ -187,8 +193,8 @@ struct ionic_desc_info {
 		struct ionic_txq_sg_desc *txq_sg_desc;
 		struct ionic_rxq_sg_desc *rxq_sgl_desc;
 	};
-	unsigned int npages;
-	struct ionic_page_info pages[IONIC_RX_MAX_SG_ELEMS + 1];
+	unsigned int nbufs;
+	struct ionic_buf_info bufs[IONIC_RX_MAX_SG_ELEMS + 1];
 	ionic_desc_cb cb;
 	void *cb_arg;
 };
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 70b997f302ac..3e13cfee9ecd 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -54,7 +54,7 @@ static struct sk_buff *ionic_rx_skb_alloc(struct ionic_queue *q,
 	if (frags)
 		skb = napi_get_frags(&q_to_qcq(q)->napi);
 	else
-		skb = netdev_alloc_skb_ip_align(netdev, len);
+		skb = napi_alloc_skb(&q_to_qcq(q)->napi, len);
 
 	if (unlikely(!skb)) {
 		net_warn_ratelimited("%s: SKB alloc failed on %s!\n",
@@ -66,8 +66,15 @@ static struct sk_buff *ionic_rx_skb_alloc(struct ionic_queue *q,
 	return skb;
 }
 
+static void ionic_rx_buf_reset(struct ionic_buf_info *buf_info)
+{
+	buf_info->page = NULL;
+	buf_info->page_offset = 0;
+	buf_info->dma_addr = 0;
+}
+
 static int ionic_rx_page_alloc(struct ionic_queue *q,
-			       struct ionic_page_info *page_info)
+			       struct ionic_buf_info *buf_info)
 {
 	struct ionic_lif *lif = q->lif;
 	struct ionic_rx_stats *stats;
@@ -78,26 +85,26 @@ static int ionic_rx_page_alloc(struct ionic_queue *q,
 	dev = lif->ionic->dev;
 	stats = q_to_rx_stats(q);
 
-	if (unlikely(!page_info)) {
-		net_err_ratelimited("%s: %s invalid page_info in alloc\n",
+	if (unlikely(!buf_info)) {
+		net_err_ratelimited("%s: %s invalid buf_info in alloc\n",
 				    netdev->name, q->name);
 		return -EINVAL;
 	}
 
-	page_info->page = dev_alloc_page();
-	if (unlikely(!page_info->page)) {
+	buf_info->page = alloc_pages(IONIC_PAGE_GFP_MASK, 0);
+	if (unlikely(!buf_info->page)) {
 		net_err_ratelimited("%s: %s page alloc failed\n",
 				    netdev->name, q->name);
 		stats->alloc_err++;
 		return -ENOMEM;
 	}
+	buf_info->page_offset = 0;
 
-	page_info->dma_addr = dma_map_page(dev, page_info->page, 0, PAGE_SIZE,
-					   DMA_FROM_DEVICE);
-	if (unlikely(dma_mapping_error(dev, page_info->dma_addr))) {
-		put_page(page_info->page);
-		page_info->dma_addr = 0;
-		page_info->page = NULL;
+	buf_info->dma_addr = dma_map_page(dev, buf_info->page, buf_info->page_offset,
+					  IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
+	if (unlikely(dma_mapping_error(dev, buf_info->dma_addr))) {
+		__free_pages(buf_info->page, 0);
+		ionic_rx_buf_reset(buf_info);
 		net_err_ratelimited("%s: %s dma map failed\n",
 				    netdev->name, q->name);
 		stats->dma_map_err++;
@@ -108,32 +115,46 @@ static int ionic_rx_page_alloc(struct ionic_queue *q,
 }
 
 static void ionic_rx_page_free(struct ionic_queue *q,
-			       struct ionic_page_info *page_info)
+			       struct ionic_buf_info *buf_info)
 {
-	struct ionic_lif *lif = q->lif;
-	struct net_device *netdev;
-	struct device *dev;
-
-	netdev = lif->netdev;
-	dev = lif->ionic->dev;
+	struct net_device *netdev = q->lif->netdev;
+	struct device *dev = q->lif->ionic->dev;
 
-	if (unlikely(!page_info)) {
-		net_err_ratelimited("%s: %s invalid page_info in free\n",
+	if (unlikely(!buf_info)) {
+		net_err_ratelimited("%s: %s invalid buf_info in free\n",
 				    netdev->name, q->name);
 		return;
 	}
 
-	if (unlikely(!page_info->page)) {
-		net_err_ratelimited("%s: %s invalid page in free\n",
-				    netdev->name, q->name);
+	if (!buf_info->page)
 		return;
-	}
 
-	dma_unmap_page(dev, page_info->dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
+	dma_unmap_page(dev, buf_info->dma_addr, IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
+	__free_pages(buf_info->page, 0);
+	ionic_rx_buf_reset(buf_info);
+}
+
+static bool ionic_rx_buf_recycle(struct ionic_queue *q,
+				 struct ionic_buf_info *buf_info, u32 used)
+{
+	u32 size;
+
+	/* don't re-use pages allocated in low-mem condition */
+	if (page_is_pfmemalloc(buf_info->page))
+		return false;
+
+	/* don't re-use buffers from non-local numa nodes */
+	if (page_to_nid(buf_info->page) != numa_mem_id())
+		return false;
+
+	size = ALIGN(used, IONIC_PAGE_SPLIT_SZ);
+	buf_info->page_offset += size;
+	if (buf_info->page_offset >= IONIC_PAGE_SIZE)
+		return false;
+
+	get_page(buf_info->page);
 
-	put_page(page_info->page);
-	page_info->dma_addr = 0;
-	page_info->page = NULL;
+	return true;
 }
 
 static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
@@ -142,16 +163,16 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
 {
 	struct ionic_rxq_comp *comp = cq_info->cq_desc;
 	struct device *dev = q->lif->ionic->dev;
-	struct ionic_page_info *page_info;
+	struct ionic_buf_info *buf_info;
 	struct sk_buff *skb;
 	unsigned int i;
 	u16 frag_len;
 	u16 len;
 
-	page_info = &desc_info->pages[0];
+	buf_info = &desc_info->bufs[0];
 	len = le16_to_cpu(comp->len);
 
-	prefetch(page_address(page_info->page) + NET_IP_ALIGN);
+	prefetch(buf_info->page);
 
 	skb = ionic_rx_skb_alloc(q, len, true);
 	if (unlikely(!skb))
@@ -159,7 +180,7 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
 
 	i = comp->num_sg_elems + 1;
 	do {
-		if (unlikely(!page_info->page)) {
+		if (unlikely(!buf_info->page)) {
 			struct napi_struct *napi = &q_to_qcq(q)->napi;
 
 			napi->skb = NULL;
@@ -167,15 +188,25 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
 			return NULL;
 		}
 
-		frag_len = min(len, (u16)PAGE_SIZE);
+		frag_len = min_t(u16, len, IONIC_PAGE_SIZE - buf_info->page_offset);
 		len -= frag_len;
 
-		dma_unmap_page(dev, dma_unmap_addr(page_info, dma_addr),
-			       PAGE_SIZE, DMA_FROM_DEVICE);
+		dma_sync_single_for_cpu(dev,
+					buf_info->dma_addr + buf_info->page_offset,
+					frag_len, DMA_FROM_DEVICE);
+
 		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
-				page_info->page, 0, frag_len, PAGE_SIZE);
-		page_info->page = NULL;
-		page_info++;
+				buf_info->page, buf_info->page_offset, frag_len,
+				IONIC_PAGE_SIZE);
+
+		if (!ionic_rx_buf_recycle(q, buf_info, frag_len)) {
+			dma_unmap_page(dev, buf_info->dma_addr,
+				       IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
+			ionic_rx_buf_reset(buf_info);
+		}
+
+		buf_info++;
+
 		i--;
 	} while (i > 0);
 
@@ -188,26 +219,26 @@ static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q,
 {
 	struct ionic_rxq_comp *comp = cq_info->cq_desc;
 	struct device *dev = q->lif->ionic->dev;
-	struct ionic_page_info *page_info;
+	struct ionic_buf_info *buf_info;
 	struct sk_buff *skb;
 	u16 len;
 
-	page_info = &desc_info->pages[0];
+	buf_info = &desc_info->bufs[0];
 	len = le16_to_cpu(comp->len);
 
 	skb = ionic_rx_skb_alloc(q, len, false);
 	if (unlikely(!skb))
 		return NULL;
 
-	if (unlikely(!page_info->page)) {
+	if (unlikely(!buf_info->page)) {
 		dev_kfree_skb(skb);
 		return NULL;
 	}
 
-	dma_sync_single_for_cpu(dev, dma_unmap_addr(page_info, dma_addr),
+	dma_sync_single_for_cpu(dev, buf_info->dma_addr + buf_info->page_offset,
 				len, DMA_FROM_DEVICE);
-	skb_copy_to_linear_data(skb, page_address(page_info->page), len);
-	dma_sync_single_for_device(dev, dma_unmap_addr(page_info, dma_addr),
+	skb_copy_to_linear_data(skb, page_address(buf_info->page) + buf_info->page_offset, len);
+	dma_sync_single_for_device(dev, buf_info->dma_addr + buf_info->page_offset,
 				   len, DMA_FROM_DEVICE);
 
 	skb_put(skb, len);
@@ -327,64 +358,73 @@ void ionic_rx_fill(struct ionic_queue *q)
 {
 	struct net_device *netdev = q->lif->netdev;
 	struct ionic_desc_info *desc_info;
-	struct ionic_page_info *page_info;
 	struct ionic_rxq_sg_desc *sg_desc;
 	struct ionic_rxq_sg_elem *sg_elem;
+	struct ionic_buf_info *buf_info;
 	struct ionic_rxq_desc *desc;
+	unsigned int max_sg_elems;
 	unsigned int remain_len;
-	unsigned int seg_len;
+	unsigned int frag_len;
 	unsigned int nfrags;
 	unsigned int i, j;
 	unsigned int len;
 
 	len = netdev->mtu + ETH_HLEN + VLAN_HLEN;
-	nfrags = round_up(len, PAGE_SIZE) / PAGE_SIZE;
+	max_sg_elems = q->lif->qtype_info[IONIC_QTYPE_RXQ].max_sg_elems;
 
 	for (i = ionic_q_space_avail(q); i; i--) {
+		nfrags = 0;
 		remain_len = len;
 		desc_info = &q->info[q->head_idx];
 		desc = desc_info->desc;
-		sg_desc = desc_info->sg_desc;
-		page_info = &desc_info->pages[0];
+		buf_info = &desc_info->bufs[0];
 
-		if (page_info->page) { /* recycle the buffer */
-			ionic_rxq_post(q, false, ionic_rx_clean, NULL);
-			continue;
-		}
-
-		/* fill main descriptor - pages[0] */
-		desc->opcode = (nfrags > 1) ? IONIC_RXQ_DESC_OPCODE_SG :
-					      IONIC_RXQ_DESC_OPCODE_SIMPLE;
-		desc_info->npages = nfrags;
-		if (unlikely(ionic_rx_page_alloc(q, page_info))) {
-			desc->addr = 0;
-			desc->len = 0;
-			return;
+		if (!buf_info->page) { /* alloc a new buffer? */
+			if (unlikely(ionic_rx_page_alloc(q, buf_info))) {
+				desc->addr = 0;
+				desc->len = 0;
+				return;
+			}
 		}
-		desc->addr = cpu_to_le64(page_info->dma_addr);
-		seg_len = min_t(unsigned int, PAGE_SIZE, len);
-		desc->len = cpu_to_le16(seg_len);
-		remain_len -= seg_len;
-		page_info++;
 
-		/* fill sg descriptors - pages[1..n] */
-		for (j = 0; j < nfrags - 1; j++) {
-			if (page_info->page) /* recycle the sg buffer */
-				continue;
+		/* fill main descriptor - buf[0] */
+		desc->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset);
+		frag_len = min_t(u16, len, IONIC_PAGE_SIZE - buf_info->page_offset);
+		desc->len = cpu_to_le16(frag_len);
+		remain_len -= frag_len;
+		buf_info++;
+		nfrags++;
 
+		/* fill sg descriptors - buf[1..n] */
+		sg_desc = desc_info->sg_desc;
+		for (j = 0; remain_len > 0 && j < max_sg_elems; j++) {
 			sg_elem = &sg_desc->elems[j];
-			if (unlikely(ionic_rx_page_alloc(q, page_info))) {
-				sg_elem->addr = 0;
-				sg_elem->len = 0;
-				return;
+			if (!buf_info->page) { /* alloc a new sg buffer? */
+				if (unlikely(ionic_rx_page_alloc(q, buf_info))) {
+					sg_elem->addr = 0;
+					sg_elem->len = 0;
+					return;
+				}
 			}
-			sg_elem->addr = cpu_to_le64(page_info->dma_addr);
-			seg_len = min_t(unsigned int, PAGE_SIZE, remain_len);
-			sg_elem->len = cpu_to_le16(seg_len);
-			remain_len -= seg_len;
-			page_info++;
+
+			sg_elem->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset);
+			frag_len = min_t(u16, remain_len, IONIC_PAGE_SIZE - buf_info->page_offset);
+			sg_elem->len = cpu_to_le16(frag_len);
+			remain_len -= frag_len;
+			buf_info++;
+			nfrags++;
 		}
 
+		/* clear end sg element as a sentinel */
+		if (j < max_sg_elems) {
+			sg_elem = &sg_desc->elems[j];
+			memset(sg_elem, 0, sizeof(*sg_elem));
+		}
+
+		desc->opcode = (nfrags > 1) ? IONIC_RXQ_DESC_OPCODE_SG :
+					      IONIC_RXQ_DESC_OPCODE_SIMPLE;
+		desc_info->nbufs = nfrags;
+
 		ionic_rxq_post(q, false, ionic_rx_clean, NULL);
 	}
 
@@ -395,21 +435,24 @@ void ionic_rx_fill(struct ionic_queue *q)
 void ionic_rx_empty(struct ionic_queue *q)
 {
 	struct ionic_desc_info *desc_info;
-	struct ionic_page_info *page_info;
+	struct ionic_buf_info *buf_info;
 	unsigned int i, j;
 
 	for (i = 0; i < q->num_descs; i++) {
 		desc_info = &q->info[i];
 		for (j = 0; j < IONIC_RX_MAX_SG_ELEMS + 1; j++) {
-			page_info = &desc_info->pages[j];
-			if (page_info->page)
-				ionic_rx_page_free(q, page_info);
+			buf_info = &desc_info->bufs[j];
+			if (buf_info->page)
+				ionic_rx_page_free(q, buf_info);
 		}
 
-		desc_info->npages = 0;
+		desc_info->nbufs = 0;
 		desc_info->cb = NULL;
 		desc_info->cb_arg = NULL;
 	}
+
+	q->head_idx = 0;
+	q->tail_idx = 0;
 }
 
 static void ionic_dim_update(struct ionic_qcq *qcq)
-- 
cgit v1.2.3


From f37bc3462e80d2dac9f0b2868720782a94dfb523 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Wed, 10 Mar 2021 11:26:28 -0800
Subject: ionic: optimize fastpath struct usage

Clean up a couple of struct uses to make for better fast path
access.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_dev.c  |  4 +--
 drivers/net/ethernet/pensando/ionic/ionic_dev.h  |  7 ++---
 drivers/net/ethernet/pensando/ionic/ionic_lif.c  |  3 ++-
 drivers/net/ethernet/pensando/ionic/ionic_lif.h  | 22 +++++++---------
 drivers/net/ethernet/pensando/ionic/ionic_main.c |  4 +--
 drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 33 +++++++++++-------------
 6 files changed, 34 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
index fb2b5bf179d7..b951bf5bbdc4 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
@@ -585,9 +585,9 @@ void ionic_q_sg_map(struct ionic_queue *q, void *base, dma_addr_t base_pa)
 void ionic_q_post(struct ionic_queue *q, bool ring_doorbell, ionic_desc_cb cb,
 		  void *cb_arg)
 {
-	struct device *dev = q->lif->ionic->dev;
 	struct ionic_desc_info *desc_info;
 	struct ionic_lif *lif = q->lif;
+	struct device *dev = q->dev;
 
 	desc_info = &q->info[q->head_idx];
 	desc_info->cb = cb;
@@ -629,7 +629,7 @@ void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info,
 
 	/* stop index must be for a descriptor that is not yet completed */
 	if (unlikely(!ionic_q_is_posted(q, stop_index)))
-		dev_err(q->lif->ionic->dev,
+		dev_err(q->dev,
 			"ionic stop is not posted %s stop %u tail %u head %u\n",
 			q->name, stop_index, q->tail_idx, q->head_idx);
 
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index 0f877c86eba6..339824cfd618 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -205,10 +205,12 @@ struct ionic_queue {
 	struct device *dev;
 	struct ionic_lif *lif;
 	struct ionic_desc_info *info;
+	u64 dbval;
 	u16 head_idx;
 	u16 tail_idx;
 	unsigned int index;
 	unsigned int num_descs;
+	unsigned int max_sg_elems;
 	u64 dbell_count;
 	u64 stop;
 	u64 wake;
@@ -217,7 +219,6 @@ struct ionic_queue {
 	unsigned int type;
 	unsigned int hw_index;
 	unsigned int hw_type;
-	u64 dbval;
 	union {
 		void *base;
 		struct ionic_txq_desc *txq;
@@ -235,7 +236,7 @@ struct ionic_queue {
 	unsigned int sg_desc_size;
 	unsigned int pid;
 	char name[IONIC_QUEUE_NAME_MAX_SZ];
-};
+} ____cacheline_aligned_in_smp;
 
 #define IONIC_INTR_INDEX_NOT_ASSIGNED	-1
 #define IONIC_INTR_NAME_MAX_SZ		32
@@ -262,7 +263,7 @@ struct ionic_cq {
 	u64 compl_count;
 	void *base;
 	dma_addr_t base_pa;
-};
+} ____cacheline_aligned_in_smp;
 
 struct ionic;
 
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 11140915c2da..6f8a5daaadfa 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -495,6 +495,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
 		goto err_out;
 	}
 
+	new->q.dev = dev;
 	new->flags = flags;
 
 	new->q.info = devm_kcalloc(dev, num_descs, sizeof(*new->q.info),
@@ -506,6 +507,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
 	}
 
 	new->q.type = type;
+	new->q.max_sg_elems = lif->qtype_info[type].max_sg_elems;
 
 	err = ionic_q_init(lif, idev, &new->q, index, name, num_descs,
 			   desc_size, sg_desc_size, pid);
@@ -2450,7 +2452,6 @@ int ionic_lif_alloc(struct ionic *ionic)
 	lif->index = 0;
 	lif->ntxq_descs = IONIC_DEF_TXRX_DESC;
 	lif->nrxq_descs = IONIC_DEF_TXRX_DESC;
-	lif->tx_budget = IONIC_TX_BUDGET_DEFAULT;
 
 	/* Convert the default coalesce value to actual hw resolution */
 	lif->rx_coalesce_usecs = IONIC_ITR_COAL_USEC_DEFAULT;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
index 563dba384a53..8ffda32a0a7d 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -159,16 +159,11 @@ struct ionic_qtype_info {
 
 #define IONIC_LIF_NAME_MAX_SZ		32
 struct ionic_lif {
-	char name[IONIC_LIF_NAME_MAX_SZ];
-	struct list_head list;
 	struct net_device *netdev;
 	DECLARE_BITMAP(state, IONIC_LIF_F_STATE_SIZE);
 	struct ionic *ionic;
-	bool registered;
 	unsigned int index;
 	unsigned int hw_index;
-	unsigned int kern_pid;
-	u64 __iomem *kern_dbpage;
 	struct mutex queue_lock;	/* lock for queue structures */
 	spinlock_t adminq_lock;		/* lock for AdminQ operations */
 	struct ionic_qcq *adminqcq;
@@ -177,20 +172,25 @@ struct ionic_lif {
 	struct ionic_tx_stats *txqstats;
 	struct ionic_qcq **rxqcqs;
 	struct ionic_rx_stats *rxqstats;
+	struct ionic_deferred deferred;
+	struct work_struct tx_timeout_work;
 	u64 last_eid;
+	unsigned int kern_pid;
+	u64 __iomem *kern_dbpage;
 	unsigned int neqs;
 	unsigned int nxqs;
 	unsigned int ntxq_descs;
 	unsigned int nrxq_descs;
 	u32 rx_copybreak;
-	u32 tx_budget;
 	unsigned int rx_mode;
 	u64 hw_features;
+	bool registered;
 	bool mc_overflow;
-	unsigned int nmcast;
 	bool uc_overflow;
 	u16 lif_type;
+	unsigned int nmcast;
 	unsigned int nucast;
+	char name[IONIC_LIF_NAME_MAX_SZ];
 
 	union ionic_lif_identity *identity;
 	struct ionic_lif_info *info;
@@ -205,16 +205,14 @@ struct ionic_lif {
 	u32 rss_ind_tbl_sz;
 
 	struct ionic_rx_filters rx_filters;
-	struct ionic_deferred deferred;
-	unsigned long *dbid_inuse;
-	unsigned int dbid_count;
-	struct dentry *dentry;
 	u32 rx_coalesce_usecs;		/* what the user asked for */
 	u32 rx_coalesce_hw;		/* what the hw is using */
 	u32 tx_coalesce_usecs;		/* what the user asked for */
 	u32 tx_coalesce_hw;		/* what the hw is using */
+	unsigned long *dbid_inuse;
+	unsigned int dbid_count;
 
-	struct work_struct tx_timeout_work;
+	struct dentry *dentry;
 };
 
 struct ionic_queue_params {
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
index fbc57de6683e..14ece909a451 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -234,17 +234,15 @@ static void ionic_adminq_cb(struct ionic_queue *q,
 {
 	struct ionic_admin_ctx *ctx = cb_arg;
 	struct ionic_admin_comp *comp;
-	struct device *dev;
 
 	if (!ctx)
 		return;
 
 	comp = cq_info->cq_desc;
-	dev = &q->lif->netdev->dev;
 
 	memcpy(&ctx->comp, comp, sizeof(*comp));
 
-	dev_dbg(dev, "comp admin queue command:\n");
+	dev_dbg(q->dev, "comp admin queue command:\n");
 	dynamic_hex_dump("comp ", DUMP_PREFIX_OFFSET, 16, 1,
 			 &ctx->comp, sizeof(ctx->comp), true);
 
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 3e13cfee9ecd..c472c14b3a80 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -82,7 +82,7 @@ static int ionic_rx_page_alloc(struct ionic_queue *q,
 	struct device *dev;
 
 	netdev = lif->netdev;
-	dev = lif->ionic->dev;
+	dev = q->dev;
 	stats = q_to_rx_stats(q);
 
 	if (unlikely(!buf_info)) {
@@ -118,7 +118,7 @@ static void ionic_rx_page_free(struct ionic_queue *q,
 			       struct ionic_buf_info *buf_info)
 {
 	struct net_device *netdev = q->lif->netdev;
-	struct device *dev = q->lif->ionic->dev;
+	struct device *dev = q->dev;
 
 	if (unlikely(!buf_info)) {
 		net_err_ratelimited("%s: %s invalid buf_info in free\n",
@@ -162,8 +162,8 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
 				      struct ionic_cq_info *cq_info)
 {
 	struct ionic_rxq_comp *comp = cq_info->cq_desc;
-	struct device *dev = q->lif->ionic->dev;
 	struct ionic_buf_info *buf_info;
+	struct device *dev = q->dev;
 	struct sk_buff *skb;
 	unsigned int i;
 	u16 frag_len;
@@ -218,8 +218,8 @@ static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q,
 					  struct ionic_cq_info *cq_info)
 {
 	struct ionic_rxq_comp *comp = cq_info->cq_desc;
-	struct device *dev = q->lif->ionic->dev;
 	struct ionic_buf_info *buf_info;
+	struct device *dev = q->dev;
 	struct sk_buff *skb;
 	u16 len;
 
@@ -362,7 +362,6 @@ void ionic_rx_fill(struct ionic_queue *q)
 	struct ionic_rxq_sg_elem *sg_elem;
 	struct ionic_buf_info *buf_info;
 	struct ionic_rxq_desc *desc;
-	unsigned int max_sg_elems;
 	unsigned int remain_len;
 	unsigned int frag_len;
 	unsigned int nfrags;
@@ -370,7 +369,6 @@ void ionic_rx_fill(struct ionic_queue *q)
 	unsigned int len;
 
 	len = netdev->mtu + ETH_HLEN + VLAN_HLEN;
-	max_sg_elems = q->lif->qtype_info[IONIC_QTYPE_RXQ].max_sg_elems;
 
 	for (i = ionic_q_space_avail(q); i; i--) {
 		nfrags = 0;
@@ -397,7 +395,7 @@ void ionic_rx_fill(struct ionic_queue *q)
 
 		/* fill sg descriptors - buf[1..n] */
 		sg_desc = desc_info->sg_desc;
-		for (j = 0; remain_len > 0 && j < max_sg_elems; j++) {
+		for (j = 0; remain_len > 0 && j < q->max_sg_elems; j++) {
 			sg_elem = &sg_desc->elems[j];
 			if (!buf_info->page) { /* alloc a new sg buffer? */
 				if (unlikely(ionic_rx_page_alloc(q, buf_info))) {
@@ -416,7 +414,7 @@ void ionic_rx_fill(struct ionic_queue *q)
 		}
 
 		/* clear end sg element as a sentinel */
-		if (j < max_sg_elems) {
+		if (j < q->max_sg_elems) {
 			sg_elem = &sg_desc->elems[j];
 			memset(sg_elem, 0, sizeof(*sg_elem));
 		}
@@ -568,7 +566,7 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget)
 	idev = &lif->ionic->idev;
 	txcq = &lif->txqcqs[qi]->cq;
 
-	tx_work_done = ionic_cq_service(txcq, lif->tx_budget,
+	tx_work_done = ionic_cq_service(txcq, IONIC_TX_BUDGET_DEFAULT,
 					ionic_tx_service, NULL, NULL);
 
 	rx_work_done = ionic_cq_service(rxcq, budget,
@@ -601,7 +599,7 @@ static dma_addr_t ionic_tx_map_single(struct ionic_queue *q,
 				      void *data, size_t len)
 {
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
-	struct device *dev = q->lif->ionic->dev;
+	struct device *dev = q->dev;
 	dma_addr_t dma_addr;
 
 	dma_addr = dma_map_single(dev, data, len, DMA_TO_DEVICE);
@@ -619,7 +617,7 @@ static dma_addr_t ionic_tx_map_frag(struct ionic_queue *q,
 				    size_t offset, size_t len)
 {
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
-	struct device *dev = q->lif->ionic->dev;
+	struct device *dev = q->dev;
 	dma_addr_t dma_addr;
 
 	dma_addr = skb_frag_dma_map(dev, frag, offset, len, DMA_TO_DEVICE);
@@ -640,7 +638,7 @@ static void ionic_tx_clean(struct ionic_queue *q,
 	struct ionic_txq_sg_elem *elem = sg_desc->elems;
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
 	struct ionic_txq_desc *desc = desc_info->desc;
-	struct device *dev = q->lif->ionic->dev;
+	struct device *dev = q->dev;
 	u8 opcode, flags, nsge;
 	u16 queue_index;
 	unsigned int i;
@@ -822,8 +820,8 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
 {
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
 	struct ionic_desc_info *rewind_desc_info;
-	struct device *dev = q->lif->ionic->dev;
 	struct ionic_txq_sg_elem *elem;
+	struct device *dev = q->dev;
 	struct ionic_txq_desc *desc;
 	unsigned int frag_left = 0;
 	unsigned int offset = 0;
@@ -994,7 +992,7 @@ static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb)
 {
 	struct ionic_txq_desc *desc = q->info[q->head_idx].txq_desc;
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
-	struct device *dev = q->lif->ionic->dev;
+	struct device *dev = q->dev;
 	dma_addr_t dma_addr;
 	bool has_vlan;
 	u8 flags = 0;
@@ -1034,7 +1032,7 @@ static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb)
 {
 	struct ionic_txq_desc *desc = q->info[q->head_idx].txq_desc;
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
-	struct device *dev = q->lif->ionic->dev;
+	struct device *dev = q->dev;
 	dma_addr_t dma_addr;
 	bool has_vlan;
 	u8 flags = 0;
@@ -1071,7 +1069,7 @@ static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb)
 	unsigned int len_left = skb->len - skb_headlen(skb);
 	struct ionic_txq_sg_elem *elem = sg_desc->elems;
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
-	struct device *dev = q->lif->ionic->dev;
+	struct device *dev = q->dev;
 	dma_addr_t dma_addr;
 	skb_frag_t *frag;
 	u16 len;
@@ -1120,7 +1118,6 @@ static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb)
 
 static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb)
 {
-	int sg_elems = q->lif->qtype_info[IONIC_QTYPE_TXQ].max_sg_elems;
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
 	int err;
 
@@ -1129,7 +1126,7 @@ static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb)
 		return (skb->len / skb_shinfo(skb)->gso_size) + 1;
 
 	/* If non-TSO, just need 1 desc and nr_frags sg elems */
-	if (skb_shinfo(skb)->nr_frags <= sg_elems)
+	if (skb_shinfo(skb)->nr_frags <= q->max_sg_elems)
 		return 1;
 
 	/* Too many frags, so linearize */
-- 
cgit v1.2.3


From 89e572e7369fd9ad9c6b07b023bc6e5e9abcd4de Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Wed, 10 Mar 2021 11:26:29 -0800
Subject: ionic: simplify rx skb alloc

Remove an unnecessary layer over rx skb allocation.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 63 +++++++++---------------
 1 file changed, 22 insertions(+), 41 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index c472c14b3a80..cd2540ff9251 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -10,12 +10,6 @@
 #include "ionic_lif.h"
 #include "ionic_txrx.h"
 
-static void ionic_rx_clean(struct ionic_queue *q,
-			   struct ionic_desc_info *desc_info,
-			   struct ionic_cq_info *cq_info,
-			   void *cb_arg);
-
-static bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info);
 
 static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info);
 
@@ -40,32 +34,6 @@ static inline struct netdev_queue *q_to_ndq(struct ionic_queue *q)
 	return netdev_get_tx_queue(q->lif->netdev, q->index);
 }
 
-static struct sk_buff *ionic_rx_skb_alloc(struct ionic_queue *q,
-					  unsigned int len, bool frags)
-{
-	struct ionic_lif *lif = q->lif;
-	struct ionic_rx_stats *stats;
-	struct net_device *netdev;
-	struct sk_buff *skb;
-
-	netdev = lif->netdev;
-	stats = &q->lif->rxqstats[q->index];
-
-	if (frags)
-		skb = napi_get_frags(&q_to_qcq(q)->napi);
-	else
-		skb = napi_alloc_skb(&q_to_qcq(q)->napi, len);
-
-	if (unlikely(!skb)) {
-		net_warn_ratelimited("%s: SKB alloc failed on %s!\n",
-				     netdev->name, q->name);
-		stats->alloc_err++;
-		return NULL;
-	}
-
-	return skb;
-}
-
 static void ionic_rx_buf_reset(struct ionic_buf_info *buf_info)
 {
 	buf_info->page = NULL;
@@ -76,12 +44,10 @@ static void ionic_rx_buf_reset(struct ionic_buf_info *buf_info)
 static int ionic_rx_page_alloc(struct ionic_queue *q,
 			       struct ionic_buf_info *buf_info)
 {
-	struct ionic_lif *lif = q->lif;
+	struct net_device *netdev = q->lif->netdev;
 	struct ionic_rx_stats *stats;
-	struct net_device *netdev;
 	struct device *dev;
 
-	netdev = lif->netdev;
 	dev = q->dev;
 	stats = q_to_rx_stats(q);
 
@@ -162,21 +128,29 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
 				      struct ionic_cq_info *cq_info)
 {
 	struct ionic_rxq_comp *comp = cq_info->cq_desc;
+	struct net_device *netdev = q->lif->netdev;
 	struct ionic_buf_info *buf_info;
+	struct ionic_rx_stats *stats;
 	struct device *dev = q->dev;
 	struct sk_buff *skb;
 	unsigned int i;
 	u16 frag_len;
 	u16 len;
 
+	stats = q_to_rx_stats(q);
+
 	buf_info = &desc_info->bufs[0];
 	len = le16_to_cpu(comp->len);
 
 	prefetch(buf_info->page);
 
-	skb = ionic_rx_skb_alloc(q, len, true);
-	if (unlikely(!skb))
+	skb = napi_get_frags(&q_to_qcq(q)->napi);
+	if (unlikely(!skb)) {
+		net_warn_ratelimited("%s: SKB alloc failed on %s!\n",
+				     netdev->name, q->name);
+		stats->alloc_err++;
 		return NULL;
+	}
 
 	i = comp->num_sg_elems + 1;
 	do {
@@ -218,17 +192,25 @@ static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q,
 					  struct ionic_cq_info *cq_info)
 {
 	struct ionic_rxq_comp *comp = cq_info->cq_desc;
+	struct net_device *netdev = q->lif->netdev;
 	struct ionic_buf_info *buf_info;
+	struct ionic_rx_stats *stats;
 	struct device *dev = q->dev;
 	struct sk_buff *skb;
 	u16 len;
 
+	stats = q_to_rx_stats(q);
+
 	buf_info = &desc_info->bufs[0];
 	len = le16_to_cpu(comp->len);
 
-	skb = ionic_rx_skb_alloc(q, len, false);
-	if (unlikely(!skb))
+	skb = napi_alloc_skb(&q_to_qcq(q)->napi, len);
+	if (unlikely(!skb)) {
+		net_warn_ratelimited("%s: SKB alloc failed on %s!\n",
+				     netdev->name, q->name);
+		stats->alloc_err++;
 		return NULL;
+	}
 
 	if (unlikely(!buf_info->page)) {
 		dev_kfree_skb(skb);
@@ -253,13 +235,12 @@ static void ionic_rx_clean(struct ionic_queue *q,
 			   void *cb_arg)
 {
 	struct ionic_rxq_comp *comp = cq_info->cq_desc;
+	struct net_device *netdev = q->lif->netdev;
 	struct ionic_qcq *qcq = q_to_qcq(q);
 	struct ionic_rx_stats *stats;
-	struct net_device *netdev;
 	struct sk_buff *skb;
 
 	stats = q_to_rx_stats(q);
-	netdev = q->lif->netdev;
 
 	if (comp->status) {
 		stats->dropped++;
-- 
cgit v1.2.3


From 55eda6bbe0c87c577376472f8bf381ef1c023303 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Wed, 10 Mar 2021 11:26:30 -0800
Subject: ionic: rebuild debugfs on qcq swap

With a reconfigure of each queue is needed a rebuild of
the matching debugfs information.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_lif.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 6f8a5daaadfa..48d3c7685b6c 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -2204,6 +2204,9 @@ static void ionic_swap_queues(struct ionic_qcq *a, struct ionic_qcq *b)
 	swap(a->cq_base,      b->cq_base);
 	swap(a->cq_base_pa,   b->cq_base_pa);
 	swap(a->cq_size,      b->cq_size);
+
+	ionic_debugfs_del_qcq(a);
+	ionic_debugfs_add_qcq(a->q.lif, a);
 }
 
 int ionic_reconfigure_queues(struct ionic_lif *lif,
-- 
cgit v1.2.3


From a25edab93b2877f3e20673464286a589678b87c2 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Wed, 10 Mar 2021 11:26:31 -0800
Subject: ionic: simplify use of completion types

Make better use of our struct types and type checking by passing
the actual Rx or Tx completion type rather than a generic void
pointer type.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index cd2540ff9251..c63e6e7aa47b 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -125,9 +125,8 @@ static bool ionic_rx_buf_recycle(struct ionic_queue *q,
 
 static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
 				      struct ionic_desc_info *desc_info,
-				      struct ionic_cq_info *cq_info)
+				      struct ionic_rxq_comp *comp)
 {
-	struct ionic_rxq_comp *comp = cq_info->cq_desc;
 	struct net_device *netdev = q->lif->netdev;
 	struct ionic_buf_info *buf_info;
 	struct ionic_rx_stats *stats;
@@ -155,9 +154,6 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
 	i = comp->num_sg_elems + 1;
 	do {
 		if (unlikely(!buf_info->page)) {
-			struct napi_struct *napi = &q_to_qcq(q)->napi;
-
-			napi->skb = NULL;
 			dev_kfree_skb(skb);
 			return NULL;
 		}
@@ -189,9 +185,8 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
 
 static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q,
 					  struct ionic_desc_info *desc_info,
-					  struct ionic_cq_info *cq_info)
+					  struct ionic_rxq_comp *comp)
 {
-	struct ionic_rxq_comp *comp = cq_info->cq_desc;
 	struct net_device *netdev = q->lif->netdev;
 	struct ionic_buf_info *buf_info;
 	struct ionic_rx_stats *stats;
@@ -234,7 +229,7 @@ static void ionic_rx_clean(struct ionic_queue *q,
 			   struct ionic_cq_info *cq_info,
 			   void *cb_arg)
 {
-	struct ionic_rxq_comp *comp = cq_info->cq_desc;
+	struct ionic_rxq_comp *comp = cq_info->rxcq;
 	struct net_device *netdev = q->lif->netdev;
 	struct ionic_qcq *qcq = q_to_qcq(q);
 	struct ionic_rx_stats *stats;
@@ -251,9 +246,9 @@ static void ionic_rx_clean(struct ionic_queue *q,
 	stats->bytes += le16_to_cpu(comp->len);
 
 	if (le16_to_cpu(comp->len) <= q->lif->rx_copybreak)
-		skb = ionic_rx_copybreak(q, desc_info, cq_info);
+		skb = ionic_rx_copybreak(q, desc_info, comp);
 	else
-		skb = ionic_rx_frags(q, desc_info, cq_info);
+		skb = ionic_rx_frags(q, desc_info, comp);
 
 	if (unlikely(!skb)) {
 		stats->dropped++;
@@ -309,7 +304,7 @@ static void ionic_rx_clean(struct ionic_queue *q,
 
 static bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
 {
-	struct ionic_rxq_comp *comp = cq_info->cq_desc;
+	struct ionic_rxq_comp *comp = cq_info->rxcq;
 	struct ionic_queue *q = cq->bound_q;
 	struct ionic_desc_info *desc_info;
 
@@ -661,7 +656,7 @@ static void ionic_tx_clean(struct ionic_queue *q,
 
 static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
 {
-	struct ionic_txq_comp *comp = cq_info->cq_desc;
+	struct ionic_txq_comp *comp = cq_info->txcq;
 	struct ionic_queue *q = cq->bound_q;
 	struct ionic_desc_info *desc_info;
 	u16 index;
-- 
cgit v1.2.3


From 6b9c8f46af9da611c1fb2f281c927b11f08e568d Mon Sep 17 00:00:00 2001
From: Shubhankar Kuranagatti <shubhankarvk@gmail.com>
Date: Thu, 11 Mar 2021 02:43:43 +0530
Subject: net: ipv4: route.c: fix space before tab

The extra space before tab space has been removed.

Signed-off-by: Shubhankar Kuranagatti <shubhankarvk@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 02d81d79deeb..55f2813a000d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -21,7 +21,7 @@
  *		Alan Cox	:	Added BSD route gw semantics
  *		Alan Cox	:	Super /proc >4K
  *		Alan Cox	:	MTU in route table
- *		Alan Cox	: 	MSS actually. Also added the window
+ *		Alan Cox	:	MSS actually. Also added the window
  *					clamper.
  *		Sam Lantinga	:	Fixed route matching in rt_del()
  *		Alan Cox	:	Routing cache support.
@@ -41,7 +41,7 @@
  *		Olaf Erb	:	irtt wasn't being copied right.
  *		Bjorn Ekwall	:	Kerneld route support.
  *		Alan Cox	:	Multicast fixed (I hope)
- * 		Pavel Krauz	:	Limited broadcast fixed
+ *		Pavel Krauz	:	Limited broadcast fixed
  *		Mike McLagan	:	Routing by source
  *	Alexey Kuznetsov	:	End of old history. Split to fib.c and
  *					route.c and rewritten from scratch.
@@ -54,8 +54,8 @@
  *	Robert Olsson		:	Added rt_cache statistics
  *	Arnaldo C. Melo		:	Convert proc stuff to seq_file
  *	Eric Dumazet		:	hashed spinlocks and rt_check_expire() fixes.
- * 	Ilia Sotnikov		:	Ignore TOS on PMTUD and Redirect
- * 	Ilia Sotnikov		:	Removed TOS from hash calculations
+ *	Ilia Sotnikov		:	Ignore TOS on PMTUD and Redirect
+ *	Ilia Sotnikov		:	Removed TOS from hash calculations
  */
 
 #define pr_fmt(fmt) "IPv4: " fmt
@@ -2246,7 +2246,7 @@ local_input:
 	if (res->type == RTN_UNREACHABLE) {
 		rth->dst.input= ip_error;
 		rth->dst.error= -err;
-		rth->rt_flags 	&= ~RTCF_LOCAL;
+		rth->rt_flags	&= ~RTCF_LOCAL;
 	}
 
 	if (do_cache) {
-- 
cgit v1.2.3


From 34bb975126419e86bc3b95e200dc41de6c6ca69c Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Thu, 11 Mar 2021 04:21:23 +0530
Subject: net: fddi: skfp: Mundane typo fixes throughout the file smt.h

Few spelling fixes throughout the file.

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/fddi/skfp/h/smt.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/fddi/skfp/h/smt.h b/drivers/net/fddi/skfp/h/smt.h
index 751b5bb87a9d..b19c7a99d32a 100644
--- a/drivers/net/fddi/skfp/h/smt.h
+++ b/drivers/net/fddi/skfp/h/smt.h
@@ -411,7 +411,7 @@ struct smt_p_reason {
 #define SMT_RDF_ILLEGAL 0x00000005	/* read only (PMF) */
 #define SMT_RDF_NOPARAM	0x6		/* parameter not supported (PMF) */
 #define SMT_RDF_RANGE	0x8		/* out of range */
-#define SMT_RDF_AUTHOR	0x9		/* not autohorized */
+#define SMT_RDF_AUTHOR	0x9		/* not authorized */
 #define SMT_RDF_LENGTH	0x0a		/* length error */
 #define SMT_RDF_TOOLONG	0x0b		/* length error */
 #define SMT_RDF_SBA	0x0d		/* SBA denied */
@@ -450,7 +450,7 @@ struct smt_p_version {
 
 struct smt_p_0015 {
 	struct smt_para	para ;		/* generic parameter header */
-	u_int		res_type ;	/* recsource type */
+	u_int		res_type ;	/* resource type */
 } ;
 
 #define	SYNC_BW		0x00000001L	/* Synchronous Bandwidth */
@@ -489,7 +489,7 @@ struct smt_p_0017 {
 struct smt_p_0018 {
 	struct smt_para	para ;		/* generic parameter header */
 	int		sba_ov_req ;	/* total sync bandwidth req for overhead*/
-} ;					/* measuered in bytes per T_Neg */
+} ;					/* measured in bytes per T_Neg */
 
 /*
  * P19 : SBA Allocation Address
@@ -562,7 +562,7 @@ struct smt_p_fsc {
 #define FSC_TYPE2	2		/* Special A/C indicator forwarding */
 
 /*
- * P00 21 : user defined authoriziation (see pmf.c)
+ * P00 21 : user defined authorization (see pmf.c)
  */
 #define SMT_P_AUTHOR	0x0021
 
-- 
cgit v1.2.3


From 6d19628f539fccf899298ff02ee4c73e4bf6df3f Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Wed, 10 Mar 2021 14:13:08 -0800
Subject: Bluetooth: SMP: Fail if remote and local public keys are identical

This fails the pairing procedure when both remote and local non-debug
public keys are identical.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/smp.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index b0c1ee110eff..e03cc284161c 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -2732,6 +2732,15 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb)
 	if (skb->len < sizeof(*key))
 		return SMP_INVALID_PARAMS;
 
+	/* Check if remote and local public keys are the same and debug key is
+	 * not in use.
+	 */
+	if (!test_bit(SMP_FLAG_DEBUG_KEY, &smp->flags) &&
+	    !crypto_memneq(key, smp->local_pk, 64)) {
+		bt_dev_err(hdev, "Remote and local public keys are identical");
+		return SMP_UNSPECIFIED;
+	}
+
 	memcpy(smp->remote_pk, key, 64);
 
 	if (test_bit(SMP_FLAG_REMOTE_OOB, &smp->flags)) {
-- 
cgit v1.2.3


From c1a74160eaf1ac218733b371158432b52601beff Mon Sep 17 00:00:00 2001
From: Venkata Lakshmi Narayana Gubba <gubbaven@codeaurora.org>
Date: Thu, 11 Mar 2021 12:33:40 +0530
Subject: Bluetooth: hci_qca: Add device_may_wakeup support

Based on device may wakeup status, Bluez stack will enable/disable
passive scanning with whitelist in BT controller while suspending.
As interrupt from BT SoC is handled by UART driver,we need to use
device handle of UART driver to get the status of device may wakeup

Signed-off-by: Venkata Lakshmi Narayana Gubba <gubbaven@codeaurora.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_qca.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index de36af63e182..73af901c2032 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -1571,6 +1571,20 @@ static void qca_cmd_timeout(struct hci_dev *hdev)
 	mutex_unlock(&qca->hci_memdump_lock);
 }
 
+static bool qca_prevent_wake(struct hci_dev *hdev)
+{
+	struct hci_uart *hu = hci_get_drvdata(hdev);
+	bool wakeup;
+
+	/* UART driver handles the interrupt from BT SoC.So we need to use
+	 * device handle of UART driver to get the status of device may wakeup.
+	 */
+	wakeup = device_may_wakeup(hu->serdev->ctrl->dev.parent);
+	bt_dev_dbg(hu->hdev, "wakeup status : %d", wakeup);
+
+	return !wakeup;
+}
+
 static int qca_wcn3990_init(struct hci_uart *hu)
 {
 	struct qca_serdev *qcadev;
@@ -1721,6 +1735,7 @@ retry:
 		qca_debugfs_init(hdev);
 		hu->hdev->hw_error = qca_hw_error;
 		hu->hdev->cmd_timeout = qca_cmd_timeout;
+		hu->hdev->prevent_wake = qca_prevent_wake;
 	} else if (ret == -ENOENT) {
 		/* No patch/nvm-config found, run with original fw/config */
 		set_bit(QCA_ROM_FW, &qca->flags);
-- 
cgit v1.2.3


From ee47ed08d75e8f16b3cf882061ee19c2ea19dd6c Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 10 Mar 2021 10:52:26 -0800
Subject: net: dsa: b53: Add debug prints in b53_vlan_enable()

Having dynamic debug prints in b53_vlan_enable() has been helpful to
uncover a recent but update the function to indicate the port being
configured (or -1 for initial setup) and include the global VLAN enabled
and VLAN filtering enable status.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/b53/b53_common.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index a162499bcafc..9bd51c2a51d2 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -349,7 +349,7 @@ static void b53_set_forwarding(struct b53_device *dev, int enable)
 	b53_write8(dev, B53_CTRL_PAGE, B53_IP_MULTICAST_CTRL, mgmt);
 }
 
-static void b53_enable_vlan(struct b53_device *dev, bool enable,
+static void b53_enable_vlan(struct b53_device *dev, int port, bool enable,
 			    bool enable_filtering)
 {
 	u8 mgmt, vc0, vc1, vc4 = 0, vc5;
@@ -431,6 +431,9 @@ static void b53_enable_vlan(struct b53_device *dev, bool enable,
 	b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, mgmt);
 
 	dev->vlan_enabled = enable;
+
+	dev_dbg(dev->dev, "Port %d VLAN enabled: %d, filtering: %d\n",
+		port, enable, enable_filtering);
 }
 
 static int b53_set_jumbo(struct b53_device *dev, bool enable, bool allow_10_100)
@@ -743,7 +746,7 @@ int b53_configure_vlan(struct dsa_switch *ds)
 		b53_do_vlan_op(dev, VTA_CMD_CLEAR);
 	}
 
-	b53_enable_vlan(dev, dev->vlan_enabled, ds->vlan_filtering);
+	b53_enable_vlan(dev, -1, dev->vlan_enabled, ds->vlan_filtering);
 
 	b53_for_each_port(dev, i)
 		b53_write16(dev, B53_VLAN_PAGE,
@@ -1429,7 +1432,7 @@ int b53_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering,
 {
 	struct b53_device *dev = ds->priv;
 
-	b53_enable_vlan(dev, dev->vlan_enabled, vlan_filtering);
+	b53_enable_vlan(dev, port, dev->vlan_enabled, vlan_filtering);
 
 	return 0;
 }
@@ -1454,7 +1457,7 @@ static int b53_vlan_prepare(struct dsa_switch *ds, int port,
 	if (vlan->vid >= dev->num_vlans)
 		return -ERANGE;
 
-	b53_enable_vlan(dev, true, ds->vlan_filtering);
+	b53_enable_vlan(dev, port, true, ds->vlan_filtering);
 
 	return 0;
 }
-- 
cgit v1.2.3


From b0bade515d360800fc701e1a965cf41adcc4ec1b Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 10 Mar 2021 14:12:43 -0800
Subject: net: phy: Expose phydev::dev_flags through sysfs

phydev::dev_flags contains a bitmask of configuration bits requested by
the consumer of a PHY device (Ethernet MAC or switch) towards the PHY
driver. Since these flags are often used for requesting LED or other
type of configuration being able to quickly audit them without
instrumenting the kernel is useful.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/ABI/testing/sysfs-class-net-phydev | 12 ++++++++++++
 drivers/net/phy/phy_device.c                     | 11 +++++++++++
 2 files changed, 23 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-class-net-phydev b/Documentation/ABI/testing/sysfs-class-net-phydev
index 40ced0ea4316..ac722dd5e694 100644
--- a/Documentation/ABI/testing/sysfs-class-net-phydev
+++ b/Documentation/ABI/testing/sysfs-class-net-phydev
@@ -51,3 +51,15 @@ Description:
 		Boolean value indicating whether the PHY device is used in
 		standalone mode, without a net_device associated, by PHYLINK.
 		Attribute created only when this is the case.
+
+What:		/sys/class/mdio_bus/<bus>/<device>/phy_dev_flags
+Date:		March 2021
+KernelVersion:	5.13
+Contact:	netdev@vger.kernel.org
+Description:
+		32-bit hexadecimal number representing a bit mask of the
+		configuration bits passed from the consumer of the PHY
+		(Ethernet MAC, switch, etc.) to the PHY driver. The flags are
+		only used internally by the kernel and their placement are
+		not meant to be stable across kernel versions. This is intended
+		for facilitating the debugging of PHY drivers.
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index cc38e326405a..a009d1769b08 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -512,10 +512,21 @@ phy_has_fixups_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RO(phy_has_fixups);
 
+static ssize_t phy_dev_flags_show(struct device *dev,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	struct phy_device *phydev = to_phy_device(dev);
+
+	return sprintf(buf, "0x%08x\n", phydev->dev_flags);
+}
+static DEVICE_ATTR_RO(phy_dev_flags);
+
 static struct attribute *phy_dev_attrs[] = {
 	&dev_attr_phy_id.attr,
 	&dev_attr_phy_interface.attr,
 	&dev_attr_phy_has_fixups.attr,
+	&dev_attr_phy_dev_flags.attr,
 	NULL,
 };
 ATTRIBUTE_GROUPS(phy_dev);
-- 
cgit v1.2.3


From c53d21af674adf9c4ba7b65d29fa4a37a9f19e8f Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Thu, 11 Mar 2021 15:11:01 +0800
Subject: netdevsim: fib: Remove redundant code

Fix the following coccicheck warnings:

./drivers/net/netdevsim/fib.c:874:5-8: Unneeded variable: "err". Return
"0" on line 889.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/fib.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c
index 46fb414f7ca6..3ca0f54d0c3b 100644
--- a/drivers/net/netdevsim/fib.c
+++ b/drivers/net/netdevsim/fib.c
@@ -869,10 +869,8 @@ err_rt_offload_failed_flag_set:
 	return err;
 }
 
-static int nsim_fib_event(struct nsim_fib_event *fib_event)
+static void nsim_fib_event(struct nsim_fib_event *fib_event)
 {
-	int err = 0;
-
 	switch (fib_event->family) {
 	case AF_INET:
 		nsim_fib4_event(fib_event->data, &fib_event->fen_info,
@@ -885,8 +883,6 @@ static int nsim_fib_event(struct nsim_fib_event *fib_event)
 		nsim_fib6_event_fini(&fib_event->fib6_event);
 		break;
 	}
-
-	return err;
 }
 
 static int nsim_fib4_prepare_event(struct fib_notifier_info *info,
-- 
cgit v1.2.3


From 7bef147a6ab6e686f4e3c6eadbda56d4ad9833ba Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@nvidia.com>
Date: Thu, 3 Dec 2020 11:55:16 +0200
Subject: net/mlx5: Don't skip vport check

Users of mlx5_eswitch_get_vport() are required to check return value
prior to passing mlx5_vport further. Fix all the places to do not skip
that check.

Reviewed-by: Eli Cohen <elic@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c          | 6 ++++++
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 6 ++++++
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
index cb1e181f4c6a..7bfc84238b3d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
@@ -120,7 +120,7 @@ struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u1
 	struct mlx5_vport *vport;
 
 	vport = mlx5_eswitch_get_vport(esw, vport_num);
-	return vport->dl_port;
+	return IS_ERR(vport) ? ERR_CAST(vport) : vport->dl_port;
 }
 
 int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index aba17835465b..fe1e06d95a12 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1141,6 +1141,8 @@ int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num,
 	struct mlx5_vport *vport;
 
 	vport = mlx5_eswitch_get_vport(esw, vport_num);
+	if (IS_ERR(vport))
+		return PTR_ERR(vport);
 
 	if (!vport->qos.enabled)
 		return -EOPNOTSUPP;
@@ -1279,6 +1281,8 @@ int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num,
 	int ret;
 
 	vport = mlx5_eswitch_get_vport(esw, vport_num);
+	if (IS_ERR(vport))
+		return PTR_ERR(vport);
 
 	mutex_lock(&esw->state_lock);
 	WARN_ON(vport->enabled);
@@ -1326,6 +1330,8 @@ void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num)
 	struct mlx5_vport *vport;
 
 	vport = mlx5_eswitch_get_vport(esw, vport_num);
+	if (IS_ERR(vport))
+		return;
 
 	mutex_lock(&esw->state_lock);
 	if (!vport->enabled)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 94cb0217b4f3..703753ac2e02 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -2554,6 +2554,9 @@ static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
 	struct mlx5_vport *vport;
 
 	vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+	if (IS_ERR(vport))
+		return PTR_ERR(vport);
+
 	return esw_vport_create_offloads_acl_tables(esw, vport);
 }
 
@@ -2562,6 +2565,9 @@ static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
 	struct mlx5_vport *vport;
 
 	vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+	if (IS_ERR(vport))
+		return;
+
 	esw_vport_destroy_offloads_acl_tables(esw, vport);
 }
 
-- 
cgit v1.2.3


From d89edb36070572e1b8889009fc3c143e7c290e16 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Mon, 2 Nov 2020 16:02:14 +0200
Subject: net/mlx5: Remove impossible checks of interface state

The interface state is constant at this stage and checked
before calling to the register/unregister reserved GIDs.

There is no need to double check it.

Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
index a68738c8f4bc..97324d9d4f2a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
@@ -55,10 +55,6 @@ void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev)
 
 int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count)
 {
-	if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
-		mlx5_core_err(dev, "Cannot reserve GIDs when interfaces are up\n");
-		return -EPERM;
-	}
 	if (dev->roce.reserved_gids.start < count) {
 		mlx5_core_warn(dev, "GID table exhausted attempting to reserve %d more GIDs\n",
 			       count);
@@ -79,7 +75,6 @@ int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count)
 
 void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count)
 {
-	WARN(test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state), "Unreserving GIDs when interfaces are up");
 	WARN(count > dev->roce.reserved_gids.count, "Unreserving %u GIDs when only %u reserved",
 	     count, dev->roce.reserved_gids.count);
 
-- 
cgit v1.2.3


From 6dea2f7eff9659049f90922283756830364e6278 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Mon, 2 Nov 2020 16:54:43 +0200
Subject: net/mlx5: Separate probe vs. reload flows

The mix between probe/unprobe and reload flows causes to have an extra
mutex lock intf_state_mutex that generates LOCKDEP warning between it
and devlink_mutex. As a preparation for the future removal, separate
those flows.

Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/devlink.c  |   6 +-
 drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c |   6 +-
 drivers/net/ethernet/mellanox/mlx5/core/main.c     | 136 ++++++++++++++-------
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h    |   6 +-
 .../ethernet/mellanox/mlx5/core/sf/dev/driver.c    |  14 ++-
 5 files changed, 107 insertions(+), 61 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index d7d8a68ef23d..6729720e1ab7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -148,7 +148,7 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
 
 	switch (action) {
 	case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
-		mlx5_unload_one(dev, false);
+		mlx5_unload_one(dev);
 		return 0;
 	case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
 		if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET)
@@ -170,13 +170,13 @@ static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_a
 	*actions_performed = BIT(action);
 	switch (action) {
 	case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
-		return mlx5_load_one(dev, false);
+		return mlx5_load_one(dev);
 	case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
 		if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET)
 			break;
 		/* On fw_activate action, also driver is reloaded and reinit performed */
 		*actions_performed |= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
-		return mlx5_load_one(dev, false);
+		return mlx5_load_one(dev);
 	default:
 		/* Unsupported action should not get to this function */
 		WARN_ON(1);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
index f9042e147c7f..255bd0059da1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -104,7 +104,7 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev)
 	if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) {
 		complete(&fw_reset->done);
 	} else {
-		mlx5_load_one(dev, false);
+		mlx5_load_one(dev);
 		devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0,
 							BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) |
 							BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE));
@@ -119,7 +119,7 @@ static void mlx5_sync_reset_reload_work(struct work_struct *work)
 	int err;
 
 	mlx5_enter_error_state(dev, true);
-	mlx5_unload_one(dev, false);
+	mlx5_unload_one(dev);
 	err = mlx5_health_wait_pci_up(dev);
 	if (err)
 		mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n");
@@ -342,7 +342,7 @@ static void mlx5_sync_reset_now_event(struct work_struct *work)
 	}
 
 	mlx5_enter_error_state(dev, true);
-	mlx5_unload_one(dev, false);
+	mlx5_unload_one(dev);
 done:
 	fw_reset->ret = err;
 	mlx5_fw_reset_complete_reload(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index c568896cfb23..363bc3e917c2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1235,7 +1235,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
 	mlx5_put_uars_page(dev, dev->priv.uar);
 }
 
-int mlx5_load_one(struct mlx5_core_dev *dev, bool boot)
+int mlx5_init_one(struct mlx5_core_dev *dev)
 {
 	int err = 0;
 
@@ -1247,16 +1247,14 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot)
 	/* remove any previous indication of internal error */
 	dev->state = MLX5_DEVICE_STATE_UP;
 
-	err = mlx5_function_setup(dev, boot);
+	err = mlx5_function_setup(dev, true);
 	if (err)
 		goto err_function;
 
-	if (boot) {
-		err = mlx5_init_once(dev);
-		if (err) {
-			mlx5_core_err(dev, "sw objs init failed\n");
-			goto function_teardown;
-		}
+	err = mlx5_init_once(dev);
+	if (err) {
+		mlx5_core_err(dev, "sw objs init failed\n");
+		goto function_teardown;
 	}
 
 	err = mlx5_load(dev);
@@ -1265,16 +1263,11 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot)
 
 	set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
 
-	if (boot) {
-		err = mlx5_devlink_register(priv_to_devlink(dev), dev->device);
-		if (err)
-			goto err_devlink_reg;
-
-		err = mlx5_register_device(dev);
-	} else {
-		err = mlx5_attach_device(dev);
-	}
+	err = mlx5_devlink_register(priv_to_devlink(dev), dev->device);
+	if (err)
+		goto err_devlink_reg;
 
+	err = mlx5_register_device(dev);
 	if (err)
 		goto err_register;
 
@@ -1282,16 +1275,14 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot)
 	return 0;
 
 err_register:
-	if (boot)
-		mlx5_devlink_unregister(priv_to_devlink(dev));
+	mlx5_devlink_unregister(priv_to_devlink(dev));
 err_devlink_reg:
 	clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
 	mlx5_unload(dev);
 err_load:
-	if (boot)
-		mlx5_cleanup_once(dev);
+	mlx5_cleanup_once(dev);
 function_teardown:
-	mlx5_function_teardown(dev, boot);
+	mlx5_function_teardown(dev, true);
 err_function:
 	dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
 out:
@@ -1299,33 +1290,84 @@ out:
 	return err;
 }
 
-void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup)
+void mlx5_uninit_one(struct mlx5_core_dev *dev)
 {
 	mutex_lock(&dev->intf_state_mutex);
 
-	if (cleanup) {
-		mlx5_unregister_device(dev);
-		mlx5_devlink_unregister(priv_to_devlink(dev));
-	} else {
-		mlx5_detach_device(dev);
-	}
+	mlx5_unregister_device(dev);
+	mlx5_devlink_unregister(priv_to_devlink(dev));
 
 	if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
 		mlx5_core_warn(dev, "%s: interface is down, NOP\n",
 			       __func__);
-		if (cleanup)
-			mlx5_cleanup_once(dev);
+		mlx5_cleanup_once(dev);
 		goto out;
 	}
 
 	clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
+	mlx5_unload(dev);
+	mlx5_cleanup_once(dev);
+	mlx5_function_teardown(dev, true);
+out:
+	mutex_unlock(&dev->intf_state_mutex);
+}
+
+int mlx5_load_one(struct mlx5_core_dev *dev)
+{
+	int err = 0;
+
+	mutex_lock(&dev->intf_state_mutex);
+	if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
+		mlx5_core_warn(dev, "interface is up, NOP\n");
+		goto out;
+	}
+	/* remove any previous indication of internal error */
+	dev->state = MLX5_DEVICE_STATE_UP;
+
+	err = mlx5_function_setup(dev, false);
+	if (err)
+		goto err_function;
+
+	err = mlx5_load(dev);
+	if (err)
+		goto err_load;
+
+	set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
 
+	err = mlx5_attach_device(dev);
+	if (err)
+		goto err_attach;
+
+	mutex_unlock(&dev->intf_state_mutex);
+	return 0;
+
+err_attach:
+	clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
 	mlx5_unload(dev);
+err_load:
+	mlx5_function_teardown(dev, false);
+err_function:
+	dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+out:
+	mutex_unlock(&dev->intf_state_mutex);
+	return err;
+}
 
-	if (cleanup)
-		mlx5_cleanup_once(dev);
+void mlx5_unload_one(struct mlx5_core_dev *dev)
+{
+	mutex_lock(&dev->intf_state_mutex);
+
+	mlx5_detach_device(dev);
+
+	if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
+		mlx5_core_warn(dev, "%s: interface is down, NOP\n",
+			       __func__);
+		goto out;
+	}
 
-	mlx5_function_teardown(dev, cleanup);
+	clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
+	mlx5_unload(dev);
+	mlx5_function_teardown(dev, false);
 out:
 	mutex_unlock(&dev->intf_state_mutex);
 }
@@ -1397,7 +1439,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
 	mutex_destroy(&dev->intf_state_mutex);
 }
 
-static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
+static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct mlx5_core_dev *dev;
 	struct devlink *devlink;
@@ -1433,11 +1475,11 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto pci_init_err;
 	}
 
-	err = mlx5_load_one(dev, true);
+	err = mlx5_init_one(dev);
 	if (err) {
-		mlx5_core_err(dev, "mlx5_load_one failed with error code %d\n",
+		mlx5_core_err(dev, "mlx5_init_one failed with error code %d\n",
 			      err);
-		goto err_load_one;
+		goto err_init_one;
 	}
 
 	err = mlx5_crdump_enable(dev);
@@ -1449,7 +1491,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		devlink_reload_enable(devlink);
 	return 0;
 
-err_load_one:
+err_init_one:
 	mlx5_pci_close(dev);
 pci_init_err:
 	mlx5_mdev_uninit(dev);
@@ -1469,7 +1511,7 @@ static void remove_one(struct pci_dev *pdev)
 	devlink_reload_disable(devlink);
 	mlx5_crdump_disable(dev);
 	mlx5_drain_health_wq(dev);
-	mlx5_unload_one(dev, true);
+	mlx5_uninit_one(dev);
 	mlx5_pci_close(dev);
 	mlx5_mdev_uninit(dev);
 	mlx5_adev_idx_free(dev->priv.adev_idx);
@@ -1485,7 +1527,7 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
 
 	mlx5_enter_error_state(dev, false);
 	mlx5_error_sw_reset(dev);
-	mlx5_unload_one(dev, false);
+	mlx5_unload_one(dev);
 	mlx5_drain_health_wq(dev);
 	mlx5_pci_disable_device(dev);
 
@@ -1555,7 +1597,7 @@ static void mlx5_pci_resume(struct pci_dev *pdev)
 
 	mlx5_core_info(dev, "%s was called\n", __func__);
 
-	err = mlx5_load_one(dev, false);
+	err = mlx5_load_one(dev);
 	if (err)
 		mlx5_core_err(dev, "%s: mlx5_load_one failed with error code: %d\n",
 			      __func__, err);
@@ -1627,7 +1669,7 @@ static void shutdown(struct pci_dev *pdev)
 	mlx5_core_info(dev, "Shutdown was called\n");
 	err = mlx5_try_fast_unload(dev);
 	if (err)
-		mlx5_unload_one(dev, false);
+		mlx5_unload_one(dev);
 	mlx5_pci_disable_device(dev);
 }
 
@@ -1635,7 +1677,7 @@ static int mlx5_suspend(struct pci_dev *pdev, pm_message_t state)
 {
 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
 
-	mlx5_unload_one(dev, false);
+	mlx5_unload_one(dev);
 
 	return 0;
 }
@@ -1644,7 +1686,7 @@ static int mlx5_resume(struct pci_dev *pdev)
 {
 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
 
-	return mlx5_load_one(dev, false);
+	return mlx5_load_one(dev);
 }
 
 static const struct pci_device_id mlx5_core_pci_table[] = {
@@ -1676,7 +1718,7 @@ MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table);
 void mlx5_disable_device(struct mlx5_core_dev *dev)
 {
 	mlx5_error_sw_reset(dev);
-	mlx5_unload_one(dev, false);
+	mlx5_unload_one(dev);
 }
 
 void mlx5_recover_device(struct mlx5_core_dev *dev)
@@ -1689,7 +1731,7 @@ void mlx5_recover_device(struct mlx5_core_dev *dev)
 static struct pci_driver mlx5_core_driver = {
 	.name           = KBUILD_MODNAME,
 	.id_table       = mlx5_core_pci_table,
-	.probe          = init_one,
+	.probe          = probe_one,
 	.remove         = remove_one,
 	.suspend        = mlx5_suspend,
 	.resume         = mlx5_resume,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index efe403c7e354..02993a51b114 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -267,8 +267,10 @@ static inline bool mlx5_core_is_sf(const struct mlx5_core_dev *dev)
 
 int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx);
 void mlx5_mdev_uninit(struct mlx5_core_dev *dev);
-void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup);
-int mlx5_load_one(struct mlx5_core_dev *dev, bool boot);
+int mlx5_init_one(struct mlx5_core_dev *dev);
+void mlx5_uninit_one(struct mlx5_core_dev *dev);
+void mlx5_unload_one(struct mlx5_core_dev *dev);
+int mlx5_load_one(struct mlx5_core_dev *dev);
 
 int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
index c4bf555c25ea..42c8ee03fe3e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
@@ -41,14 +41,15 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia
 		goto remap_err;
 	}
 
-	err = mlx5_load_one(mdev, true);
+	err = mlx5_init_one(mdev);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_load_one err=%d\n", err);
-		goto load_one_err;
+		mlx5_core_warn(mdev, "mlx5_init_one err=%d\n", err);
+		goto init_one_err;
 	}
+	devlink_reload_enable(devlink);
 	return 0;
 
-load_one_err:
+init_one_err:
 	iounmap(mdev->iseg);
 remap_err:
 	mlx5_mdev_uninit(mdev);
@@ -63,7 +64,8 @@ static void mlx5_sf_dev_remove(struct auxiliary_device *adev)
 	struct devlink *devlink;
 
 	devlink = priv_to_devlink(sf_dev->mdev);
-	mlx5_unload_one(sf_dev->mdev, true);
+	devlink_reload_disable(devlink);
+	mlx5_uninit_one(sf_dev->mdev);
 	iounmap(sf_dev->mdev->iseg);
 	mlx5_mdev_uninit(sf_dev->mdev);
 	mlx5_devlink_free(devlink);
@@ -73,7 +75,7 @@ static void mlx5_sf_dev_shutdown(struct auxiliary_device *adev)
 {
 	struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev);
 
-	mlx5_unload_one(sf_dev->mdev, false);
+	mlx5_unload_one(sf_dev->mdev);
 }
 
 static const struct auxiliary_device_id mlx5_sf_dev_id_table[] = {
-- 
cgit v1.2.3


From 7e615b9978021a034124166d4fa3dc4fc0ea4b16 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Tue, 3 Nov 2020 14:42:45 +0200
Subject: net/mlx5: Remove second FW tracer check

The FW tracer check is called twice, so delete one of them.

Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c       | 7 +------
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index 2eb022ad7fd0..49e106719392 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -1100,7 +1100,7 @@ int mlx5_fw_tracer_reload(struct mlx5_fw_tracer *tracer)
 	int err;
 
 	if (IS_ERR_OR_NULL(tracer))
-		return -EINVAL;
+		return 0;
 
 	dev = tracer->dev;
 	mlx5_fw_tracer_cleanup(tracer);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
index 255bd0059da1..d5d57630015f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -199,16 +199,11 @@ static void mlx5_fw_live_patch_event(struct work_struct *work)
 	struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
 						      fw_live_patch_work);
 	struct mlx5_core_dev *dev = fw_reset->dev;
-	struct mlx5_fw_tracer *tracer;
 
 	mlx5_core_info(dev, "Live patch updated firmware version: %d.%d.%d\n", fw_rev_maj(dev),
 		       fw_rev_min(dev), fw_rev_sub(dev));
 
-	tracer = dev->tracer;
-	if (IS_ERR_OR_NULL(tracer))
-		return;
-
-	if (mlx5_fw_tracer_reload(tracer))
+	if (mlx5_fw_tracer_reload(dev->tracer))
 		mlx5_core_err(dev, "Failed to reload FW tracer\n");
 }
 
-- 
cgit v1.2.3


From 7ad67a20f28fd20e63aeb1e095a9bd86bc5b1495 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Tue, 3 Nov 2020 15:17:10 +0200
Subject: net/mlx5: Don't rely on interface state bit

The check of MLX5_INTERFACE_STATE_UP is completely useless, because
the FW tracer cleanup is called on every change of the interface
and it ensures that notifier is disabled together with canceling
all the pending works.

Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index 49e106719392..01a1d02dcf15 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -1126,8 +1126,7 @@ static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void
 
 	switch (eqe->sub_type) {
 	case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE:
-		if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state))
-			queue_work(tracer->work_queue, &tracer->ownership_change_work);
+		queue_work(tracer->work_queue, &tracer->ownership_change_work);
 		break;
 	case MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE:
 		if (likely(tracer->str_db.loaded))
-- 
cgit v1.2.3


From fe06992b04a90767cee921b22fb2cb09c93447a8 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Tue, 3 Nov 2020 18:46:31 +0200
Subject: net/mlx5: Check returned value from health recover sequence

MLX5_INTERFACE_STATE_UP is far from being reliable check for success to
recover, because it can be changed any time and health logic doesn't
have any locks to protect from it.

The locks are not needed here because health recover is good to have,
but not must to success, so rely on the returned value from the
mlx5_recover_device() as a marker for success/failure.

Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/health.c    | 6 +++---
 drivers/net/ethernet/mellanox/mlx5/core/main.c      | 7 +++++--
 drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 +-
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 0c32c485eb58..a0a851640804 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -335,12 +335,12 @@ static int mlx5_health_try_recover(struct mlx5_core_dev *dev)
 		return -EIO;
 	}
 	mlx5_core_err(dev, "starting health recovery flow\n");
-	mlx5_recover_device(dev);
-	if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state) ||
-	    mlx5_health_check_fatal_sensors(dev)) {
+	if (mlx5_recover_device(dev) || mlx5_health_check_fatal_sensors(dev)) {
 		mlx5_core_err(dev, "health recovery failed\n");
 		return -EIO;
 	}
+
+	mlx5_core_info(dev, "health revovery succeded\n");
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 363bc3e917c2..e3a417d17707 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1721,11 +1721,14 @@ void mlx5_disable_device(struct mlx5_core_dev *dev)
 	mlx5_unload_one(dev);
 }
 
-void mlx5_recover_device(struct mlx5_core_dev *dev)
+int mlx5_recover_device(struct mlx5_core_dev *dev)
 {
+	int ret = -EIO;
+
 	mlx5_pci_disable_device(dev);
 	if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED)
-		mlx5_pci_resume(dev->pdev);
+		ret = mlx5_load_one(dev);
+	return ret;
 }
 
 static struct pci_driver mlx5_core_driver = {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 02993a51b114..37c8ec7d2217 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -134,7 +134,7 @@ void mlx5_error_sw_reset(struct mlx5_core_dev *dev);
 u32 mlx5_health_check_fatal_sensors(struct mlx5_core_dev *dev);
 int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev);
 void mlx5_disable_device(struct mlx5_core_dev *dev);
-void mlx5_recover_device(struct mlx5_core_dev *dev);
+int mlx5_recover_device(struct mlx5_core_dev *dev);
 int mlx5_sriov_init(struct mlx5_core_dev *dev);
 void mlx5_sriov_cleanup(struct mlx5_core_dev *dev);
 int mlx5_sriov_attach(struct mlx5_core_dev *dev);
-- 
cgit v1.2.3


From 76e68d950a170fb5cf70165bb442ba9636a19232 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Thu, 19 Nov 2020 14:20:44 +0200
Subject: net/mlx5e: CT, Avoid false lock dependency warning

To avoid false lock dependency warning set the ct_entries_ht lock
class different than the lock class of the ht being used when deleting
last flow from a group and then deleting a group, we get into del_sw_flow_group()
which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
it's different than the ht->mutex here.

======================================================
WARNING: possible circular locking dependency detected
5.10.0-rc2+ #8 Tainted: G           O
------------------------------------------------------
revalidator23/24009 is trying to acquire lock:
ffff888128d83828 (&node->lock){++++}-{3:3}, at: mlx5_del_flow_rules+0x83/0x7a0 [mlx5_core]

but task is already holding lock:
ffff8881081ef518 (&ht->mutex){+.+.}-{3:3}, at: rhashtable_free_and_destroy+0x37/0x720

which lock already depends on the new lock.

Signed-off-by: Roi Dayan <roid@nvidia.com>
Reviewed-by: Paul Blakey <paulb@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index f3f6eb081948..f81da2dc6af1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -1539,6 +1539,14 @@ mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
 }
 
+/* To avoid false lock dependency warning set the ct_entries_ht lock
+ * class different than the lock class of the ht being used when deleting
+ * last flow from a group and then deleting a group, we get into del_sw_flow_group()
+ * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
+ * it's different than the ht->mutex here.
+ */
+static struct lock_class_key ct_entries_ht_lock_key;
+
 static struct mlx5_ct_ft *
 mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
 		     struct nf_flowtable *nf_ft)
@@ -1573,6 +1581,8 @@ mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
 	if (err)
 		goto err_init;
 
+	lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key);
+
 	err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
 				     zone_params);
 	if (err)
-- 
cgit v1.2.3


From 87f77a6797971579a1da497ac01b692bafbc6cca Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 25 Feb 2021 13:54:54 +0100
Subject: net/mlx5e: fix mlx5e_tc_tun_update_header_ipv6 dummy definition

The alternative implementation of this function in a header file
is declared as a global symbol, and gets added to every .c file
that includes it, which leads to a link error:

arm-linux-gnueabi-ld: drivers/net/ethernet/mellanox/mlx5/core/en_rx.o: in function `mlx5e_tc_tun_update_header_ipv6':
en_rx.c:(.text+0x0): multiple definition of `mlx5e_tc_tun_update_header_ipv6'; drivers/net/ethernet/mellanox/mlx5/core/en_main.o:en_main.c:(.text+0x0): first defined here

Mark it 'static inline' like the other functions here.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
index 67de2bf36861..89d5ca91566e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
@@ -76,10 +76,12 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
 static inline int
 mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
 				struct net_device *mirred_dev,
-				struct mlx5e_encap_entry *e) { return -EOPNOTSUPP; }
-int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
-				    struct net_device *mirred_dev,
-				    struct mlx5e_encap_entry *e)
+				struct mlx5e_encap_entry *e)
+{ return -EOPNOTSUPP; }
+static inline int
+mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
+				struct net_device *mirred_dev,
+				struct mlx5e_encap_entry *e)
 { return -EOPNOTSUPP; }
 #endif
 int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
-- 
cgit v1.2.3


From 5632817b144fa9de3ae14f89c43312338cf269f1 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@nvidia.com>
Date: Mon, 1 Mar 2021 11:28:15 +0200
Subject: net/mlx5e: Add missing include

When CONFIG_IPV6 is disabled the header nexthop.h is not included by
fib_notifier.h which causes tc_tun_encap.c to fail to compile:

   In file included from drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c:5:
   In file included from drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h:7:
   In file included from drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h:7:
   In file included from drivers/net/ethernet/mellanox/mlx5/core/en_tc.h:40:
   drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h:78:5: warning: no previous prototype for function 'mlx5e_tc_tun_update_header_ipv6' [-Wmissing-prototypes]
   int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
       ^
   drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h:78:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
   int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
   ^
   static
>> drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c:1510:12: error: implicit declaration of function 'fib_info_nh' [-Werror,-Wimplicit-function-declaration]
           fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
                     ^
   drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c:1510:12: note: did you mean 'fib_info_put'?
   include/net/ip_fib.h:528:20: note: 'fib_info_put' declared here
   static inline void fib_info_put(struct fib_info *fi)
                      ^
>> drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c:1510:42: error: member reference type 'int' is not a pointer
           fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
                     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~  ^
   include/net/ip_fib.h:113:21: note: expanded from macro 'fib_nh_dev'
   #define fib_nh_dev              nh_common.nhc_dev
                                   ^
>> drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c:1552:13: error: incomplete definition of type 'struct fib6_entry_notifier_info'
           fen_info = container_of(info, struct fib6_entry_notifier_info, info);
                      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/kernel.h:694:51: note: expanded from macro 'container_of'
           BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) &&   \
           ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~
   include/linux/compiler_types.h:256:74: note: expanded from macro '__same_type'
   #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
                                                                            ^
   include/linux/build_bug.h:39:58: note: expanded from macro 'BUILD_BUG_ON_MSG'
   #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
                                       ~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~
   include/linux/compiler_types.h:320:22: note: expanded from macro 'compiletime_assert'
           _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
           ~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/compiler_types.h:308:23: note: expanded from macro '_compiletime_assert'
           __compiletime_assert(condition, msg, prefix, suffix)
           ~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/compiler_types.h:300:9: note: expanded from macro '__compiletime_assert'
                   if (!(condition))                                       \
                         ^~~~~~~~~
   drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c:1546:9: note: forward declaration of 'struct fib6_entry_notifier_info'
           struct fib6_entry_notifier_info *fen_info;
                  ^
>> drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c:1552:13: error: offsetof of incomplete type 'struct fib6_entry_notifier_info'
           fen_info = container_of(info, struct fib6_entry_notifier_info, info);
                      ^                  ~~~~~~
   include/linux/kernel.h:697:21: note: expanded from macro 'container_of'
           ((type *)(__mptr - offsetof(type, member))); })
                              ^        ~~~~
   include/linux/stddef.h:17:32: note: expanded from macro 'offsetof'
   #define offsetof(TYPE, MEMBER)  __compiler_offsetof(TYPE, MEMBER)
                                   ^                   ~~~~
   include/linux/compiler_types.h:140:35: note: expanded from macro '__compiler_offsetof'
   #define __compiler_offsetof(a, b)       __builtin_offsetof(a, b)
                                           ^                  ~
   drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c:1546:9: note: forward declaration of 'struct fib6_entry_notifier_info'
           struct fib6_entry_notifier_info *fen_info;
                  ^
>> drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c:1552:11: error: assigning to 'struct fib6_entry_notifier_info *' from incompatible type 'void'
           fen_info = container_of(info, struct fib6_entry_notifier_info, info);
                    ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>> drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c:1553:12: error: implicit declaration of function 'fib6_info_nh_dev' [-Werror,-Wimplicit-function-declaration]
           fib_dev = fib6_info_nh_dev(fen_info->rt);
                     ^
   drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c:1553:37: error: incomplete definition of type 'struct fib6_entry_notifier_info'
           fib_dev = fib6_info_nh_dev(fen_info->rt);
                                      ~~~~~~~~^
   drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c:1546:9: note: forward declaration of 'struct fib6_entry_notifier_info'
           struct fib6_entry_notifier_info *fen_info;
                  ^
   drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c:1555:14: error: incomplete definition of type 'struct fib6_entry_notifier_info'
               fen_info->rt->fib6_dst.plen != 128)
               ~~~~~~~~^
   drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c:1546:9: note: forward declaration of 'struct fib6_entry_notifier_info'
           struct fib6_entry_notifier_info *fen_info;
                  ^
   drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c:1562:39: error: incomplete definition of type 'struct fib6_entry_notifier_info'
           memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
                                        ~~~~~~~~^
   drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c:1546:9: note: forward declaration of 'struct fib6_entry_notifier_info'
           struct fib6_entry_notifier_info *fen_info;
                  ^
   drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c:1563:24: error: incomplete definition of type 'struct fib6_entry_notifier_info'
                  sizeof(fen_info->rt->fib6_dst.addr));
                         ~~~~~~~~^
   drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c:1546:9: note: forward declaration of 'struct fib6_entry_notifier_info'
           struct fib6_entry_notifier_info *fen_info;
                  ^
   1 warning and 10 errors generated.

Manually include net/nexthop.h in tc_tun_encap.c.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
index 6a116335bb21..32d06fe94acc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2021 Mellanox Technologies. */
 
 #include <net/fib_notifier.h>
+#include <net/nexthop.h>
 #include "tc_tun_encap.h"
 #include "en_tc.h"
 #include "tc_tun.h"
-- 
cgit v1.2.3


From fbeab6be054c60d935578ad8c35885c29a958a04 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@nvidia.com>
Date: Mon, 1 Mar 2021 11:43:58 +0200
Subject: net/mlx5: Fix indir stable stubs

Some of the stubs for CONFIG_MLX5_CLS_ACT==disabled are missing "static
inline" in their definition which causes the following compilation
warnings:

   In file included from drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c:41:
>> drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h:34:1: warning: no previous prototype for function 'mlx5_esw_indir_table_init' [-Wmissing-prototypes]
   mlx5_esw_indir_table_init(void)
   ^
   drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h:33:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
   struct mlx5_esw_indir_table *
   ^
   static
>> drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h:40:1: warning: no previous prototype for function 'mlx5_esw_indir_table_destroy' [-Wmissing-prototypes]
   mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir)
   ^
   drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h:39:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
   void
   ^
   static
>> drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h:61:1: warning: no previous prototype for function 'mlx5_esw_indir_table_needed' [-Wmissing-prototypes]
   mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw,
   ^
   drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h:60:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
   bool
   ^
   static
   3 warnings generated.

Add "static inline" prefix to signatures of stubs that were missing it.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h
index cb9eafd1b4ee..21d56b49d14b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h
@@ -30,13 +30,13 @@ mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr);
 
 #else
 /* indir API stubs */
-struct mlx5_esw_indir_table *
+static inline struct mlx5_esw_indir_table *
 mlx5_esw_indir_table_init(void)
 {
 	return NULL;
 }
 
-void
+static inline void
 mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir)
 {
 }
@@ -57,7 +57,7 @@ mlx5_esw_indir_table_put(struct mlx5_eswitch *esw,
 {
 }
 
-bool
+static inline bool
 mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw,
 			    struct mlx5_flow_attr *attr,
 			    u16 vport_num,
-- 
cgit v1.2.3


From 51ada5a52379e4dbf69cba70be6a348413f7a01f Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@nvidia.com>
Date: Tue, 16 Feb 2021 13:42:22 -0800
Subject: net/mlx5e: mlx5_tc_ct_init does not fail

mlx5_tc_ct_init() either returns a valid pointer or a NULL, either way
the caller can continue, remove IS_ERR check from callers as it has no
effect.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 0da69b98f38f..dc126389291d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -4646,10 +4646,6 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
 
 	tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr,
 				 MLX5_FLOW_NAMESPACE_KERNEL);
-	if (IS_ERR(tc->ct)) {
-		err = PTR_ERR(tc->ct);
-		goto err_ct;
-	}
 
 	tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
 	err = register_netdevice_notifier_dev_net(priv->netdev,
@@ -4665,7 +4661,6 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
 
 err_reg:
 	mlx5_tc_ct_clean(tc->ct);
-err_ct:
 	mlx5_chains_destroy(tc->chains);
 err_chains:
 	rhashtable_destroy(&tc->ht);
@@ -4724,8 +4719,6 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
 					       esw_chains(esw),
 					       &esw->offloads.mod_hdr,
 					       MLX5_FLOW_NAMESPACE_FDB);
-	if (IS_ERR(uplink_priv->ct_priv))
-		goto err_ct;
 
 	mapping = mapping_create(sizeof(struct tunnel_match_key),
 				 TUNNEL_INFO_BITS_MASK, true);
@@ -4765,7 +4758,6 @@ err_enc_opts_mapping:
 	mapping_destroy(uplink_priv->tunnel_mapping);
 err_tun_mapping:
 	mlx5_tc_ct_clean(uplink_priv->ct_priv);
-err_ct:
 	netdev_warn(priv->netdev,
 		    "Failed to initialize tc (eswitch), err: %d", err);
 	return err;
-- 
cgit v1.2.3


From 3094552bcd726682ded98e4f4aa97a7c6646f638 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Wed, 3 Mar 2021 10:36:02 +0200
Subject: net/mlx5: SF, Fix return type

Fix the following coccicheck warnings:

drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h:50:8-9: WARNING:
return of 0/1 in function 'mlx5_sf_dev_allocated' with return type bool

Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h
index 4de02902aef1..149fd9e698cf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h
@@ -47,7 +47,7 @@ static inline void mlx5_sf_driver_unregister(void)
 
 static inline bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev)
 {
-	return 0;
+	return false;
 }
 
 #endif
-- 
cgit v1.2.3


From 03e219c4cf842d65c9e6f3d72f9c02d5cb3f578f Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@nvidia.com>
Date: Mon, 14 Dec 2020 14:07:19 -0800
Subject: net/mlx5e: rep: Improve reg_cX conditions

There is no point of calculating reg_c1 or overriding reg_c0 if we are
going to abort the function.

Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index 065126370acd..fcae3c0a4e9f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -621,11 +621,7 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
 	int err;
 
 	reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK);
-	if (reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG)
-		reg_c0 = 0;
-	reg_c1 = be32_to_cpu(cqe->ft_metadata);
-
-	if (!reg_c0)
+	if (!reg_c0 || reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG)
 		return true;
 
 	/* If reg_c0 is not equal to the default flow tag then skb->mark
@@ -633,6 +629,8 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
 	 */
 	skb->mark = 0;
 
+	reg_c1 = be32_to_cpu(cqe->ft_metadata);
+
 	priv = netdev_priv(skb->dev);
 	esw = priv->mdev->priv.eswitch;
 
-- 
cgit v1.2.3


From 61e9508f1e5e5f7a32f51904cbde66f8fdcb452e Mon Sep 17 00:00:00 2001
From: Eli Cohen <elic@nvidia.com>
Date: Mon, 8 Feb 2021 11:07:53 +0200
Subject: net/mlx5: Avoid unnecessary operation

fs_get_obj retrieves the container of fs_parent_node just to pass the
same value as &fs_ns->node. Just pass fs_parent_node to
init_root_tree_recursive() to get exactly the same effect.

Signed-off-by: Eli Cohen <elic@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 66ad599bd488..f5517ea2f6be 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -2395,14 +2395,12 @@ static int init_root_tree(struct mlx5_flow_steering *steering,
 			  struct init_tree_node *init_node,
 			  struct fs_node *fs_parent_node)
 {
-	int i;
-	struct mlx5_flow_namespace *fs_ns;
 	int err;
+	int i;
 
-	fs_get_obj(fs_ns, fs_parent_node);
 	for (i = 0; i < init_node->ar_size; i++) {
 		err = init_root_tree_recursive(steering, &init_node->children[i],
-					       &fs_ns->node,
+					       fs_parent_node,
 					       init_node, i);
 		if (err)
 			return err;
-- 
cgit v1.2.3


From 9f4d9283388d9069dcf4d68ae9a212d7f0e6a985 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Tue, 9 Mar 2021 18:16:52 +0200
Subject: net/mlx5e: Alloc flow spec using kvzalloc instead of kzalloc

flow spec is not small and we do allocate it using kvzalloc
in most places of the driver. fix rest of the places
to use kvzalloc to avoid failure in allocation when
memory is too fragmented.

Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c         | 14 +++++++-------
 .../net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c    |  4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c |  4 ++--
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index f81da2dc6af1..3a6095c912f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -695,7 +695,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
 
 	zone_rule->nat = nat;
 
-	spec = kzalloc(sizeof(*spec), GFP_KERNEL);
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	if (!spec)
 		return -ENOMEM;
 
@@ -737,7 +737,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
 
 	zone_rule->attr = attr;
 
-	kfree(spec);
+	kvfree(spec);
 	ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
 
 	return 0;
@@ -749,7 +749,7 @@ err_rule:
 err_mod_hdr:
 	kfree(attr);
 err_attr:
-	kfree(spec);
+	kvfree(spec);
 	return err;
 }
 
@@ -1684,10 +1684,10 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
 	struct mlx5_ct_ft *ft;
 	u32 fte_id = 1;
 
-	post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL);
+	post_ct_spec = kvzalloc(sizeof(*post_ct_spec), GFP_KERNEL);
 	ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
 	if (!post_ct_spec || !ct_flow) {
-		kfree(post_ct_spec);
+		kvfree(post_ct_spec);
 		kfree(ct_flow);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -1822,7 +1822,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
 
 	attr->ct_attr.ct_flow = ct_flow;
 	dealloc_mod_hdr_actions(&pre_mod_acts);
-	kfree(post_ct_spec);
+	kvfree(post_ct_spec);
 
 	return rule;
 
@@ -1843,7 +1843,7 @@ err_alloc_pre:
 err_idr:
 	mlx5_tc_ct_del_ft_cb(ct_priv, ft);
 err_ft:
-	kfree(post_ct_spec);
+	kvfree(post_ct_spec);
 	kfree(ct_flow);
 	netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
 	return ERR_PTR(err);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index 381a9c8c9da9..34119ce92031 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -60,7 +60,7 @@ static int rx_err_add_rule(struct mlx5e_priv *priv,
 	struct mlx5_flow_spec *spec;
 	int err = 0;
 
-	spec = kzalloc(sizeof(*spec), GFP_KERNEL);
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	if (!spec)
 		return -ENOMEM;
 
@@ -101,7 +101,7 @@ out:
 	if (err)
 		mlx5_modify_header_dealloc(mdev, modify_hdr);
 out_spec:
-	kfree(spec);
+	kvfree(spec);
 	return err;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 703753ac2e02..a215ccee3e61 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1446,7 +1446,7 @@ esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
 	if (!mlx5_eswitch_reg_c1_loopback_supported(esw))
 		return ERR_PTR(-EOPNOTSUPP);
 
-	spec = kzalloc(sizeof(*spec), GFP_KERNEL);
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	if (!spec)
 		return ERR_PTR(-ENOMEM);
 
@@ -1469,7 +1469,7 @@ esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
 	dest.ft = esw->offloads.ft_offloads;
 
 	flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
-	kfree(spec);
+	kvfree(spec);
 
 	if (IS_ERR(flow_rule))
 		esw_warn(esw->dev,
-- 
cgit v1.2.3


From 433ccce83504a44d8e07ac2c5282ed24095af602 Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Thu, 11 Mar 2021 10:14:11 +0800
Subject: net: hns3: use FEC capability queried from firmware

For maintainability and compatibility, add support to use FEC
capability queried from firmware.

Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c  |  6 +++++-
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h  |  1 +
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 10 ++++++----
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index 1bd0ddfaec4d..02419bb461e1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -353,7 +353,9 @@ static void hclge_set_default_capability(struct hclge_dev *hdev)
 
 	set_bit(HNAE3_DEV_SUPPORT_FD_B, ae_dev->caps);
 	set_bit(HNAE3_DEV_SUPPORT_GRO_B, ae_dev->caps);
-	set_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps);
+	if (hdev->ae_dev->dev_version == HNAE3_DEVICE_VERSION_V2) {
+		set_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps);
+	}
 }
 
 static void hclge_parse_capability(struct hclge_dev *hdev,
@@ -378,6 +380,8 @@ static void hclge_parse_capability(struct hclge_dev *hdev,
 		set_bit(HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B, ae_dev->caps);
 	if (hnae3_get_bit(caps, HCLGE_CAP_FD_FORWARD_TC_B))
 		set_bit(HNAE3_DEV_SUPPORT_FD_FORWARD_TC_B, ae_dev->caps);
+	if (hnae3_get_bit(caps, HCLGE_CAP_FEC_B))
+		set_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps);
 }
 
 static __le32 hclge_build_api_caps(void)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 057dda735492..e8480ff680f4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -384,6 +384,7 @@ enum HCLGE_CAP_BITS {
 	HCLGE_CAP_HW_PAD_B,
 	HCLGE_CAP_STASH_B,
 	HCLGE_CAP_UDP_TUNNEL_CSUM_B,
+	HCLGE_CAP_FEC_B = 13,
 };
 
 enum HCLGE_API_CAP_BITS {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index e3f81c7e0ce7..0e0a16c6fb9e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2889,10 +2889,12 @@ static void hclge_update_link_status(struct hclge_dev *hdev)
 	clear_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state);
 }
 
-static void hclge_update_port_capability(struct hclge_mac *mac)
+static void hclge_update_port_capability(struct hclge_dev *hdev,
+					 struct hclge_mac *mac)
 {
-	/* update fec ability by speed */
-	hclge_convert_setting_fec(mac);
+	if (hnae3_dev_fec_supported(hdev))
+		/* update fec ability by speed */
+		hclge_convert_setting_fec(mac);
 
 	/* firmware can not identify back plane type, the media type
 	 * read from configuration can help deal it
@@ -3012,7 +3014,7 @@ static int hclge_update_port_info(struct hclge_dev *hdev)
 
 	if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) {
 		if (mac->speed_type == QUERY_ACTIVE_SPEED) {
-			hclge_update_port_capability(mac);
+			hclge_update_port_capability(hdev, mac);
 			return 0;
 		}
 		return hclge_cfg_mac_speed_dup(hdev, mac->speed,
-- 
cgit v1.2.3


From e8194f3262055bc2e6e2b7c02f9de2c3d2ef7fc9 Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Thu, 11 Mar 2021 10:14:12 +0800
Subject: net: hns3: use pause capability queried from firmware

For maintainability and compatibility, add support to use pause
capability queried from firmware, and add debugfs support to dump
this capability.

Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h             |  4 ++++
 drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c      |  3 +++
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c      |  8 ++++++++
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c  |  3 +++
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h  |  1 +
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 16 ++++++++++++----
 6 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index e9e60a935f40..9e9076f9ba04 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -90,6 +90,7 @@ enum HNAE3_DEV_CAP_BITS {
 	HNAE3_DEV_SUPPORT_HW_PAD_B,
 	HNAE3_DEV_SUPPORT_STASH_B,
 	HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B,
+	HNAE3_DEV_SUPPORT_PAUSE_B,
 };
 
 #define hnae3_dev_fd_supported(hdev) \
@@ -134,6 +135,9 @@ enum HNAE3_DEV_CAP_BITS {
 #define hnae3_dev_stash_supported(hdev) \
 	test_bit(HNAE3_DEV_SUPPORT_STASH_B, (hdev)->ae_dev->caps)
 
+#define hnae3_dev_pause_supported(hdev) \
+	test_bit(HNAE3_DEV_SUPPORT_PAUSE_B, (hdev)->ae_dev->caps)
+
 #define hnae3_ae_dev_tqp_txrx_indep_supported(ae_dev) \
 	test_bit(HNAE3_DEV_SUPPORT_TQP_TXRX_INDEP_B, (ae_dev)->caps)
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index dd11c57027bb..ded92ea50971 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -362,6 +362,9 @@ static void hns3_dbg_dev_caps(struct hnae3_handle *h)
 	dev_info(&h->pdev->dev, "support UDP tunnel csum: %s\n",
 		 test_bit(HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B, caps) ?
 		 "yes" : "no");
+	dev_info(&h->pdev->dev, "support PAUSE: %s\n",
+		 test_bit(HNAE3_DEV_SUPPORT_PAUSE_B, ae_dev->caps) ?
+		 "yes" : "no");
 }
 
 static void hns3_dbg_dev_specs(struct hnae3_handle *h)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index adcec4ea7cb9..28afb278508c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -642,6 +642,10 @@ static void hns3_get_pauseparam(struct net_device *netdev,
 				struct ethtool_pauseparam *param)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
+
+	if (!test_bit(HNAE3_DEV_SUPPORT_PAUSE_B, ae_dev->caps))
+		return;
 
 	if (h->ae_algo->ops->get_pauseparam)
 		h->ae_algo->ops->get_pauseparam(h, &param->autoneg,
@@ -652,6 +656,10 @@ static int hns3_set_pauseparam(struct net_device *netdev,
 			       struct ethtool_pauseparam *param)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
+
+	if (!test_bit(HNAE3_DEV_SUPPORT_PAUSE_B, ae_dev->caps))
+		return -EOPNOTSUPP;
 
 	netif_dbg(h, drv, netdev,
 		  "set pauseparam: autoneg=%u, rx:%u, tx:%u\n",
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index 02419bb461e1..1c90c6b8a9a3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -355,6 +355,7 @@ static void hclge_set_default_capability(struct hclge_dev *hdev)
 	set_bit(HNAE3_DEV_SUPPORT_GRO_B, ae_dev->caps);
 	if (hdev->ae_dev->dev_version == HNAE3_DEVICE_VERSION_V2) {
 		set_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps);
+		set_bit(HNAE3_DEV_SUPPORT_PAUSE_B, ae_dev->caps);
 	}
 }
 
@@ -382,6 +383,8 @@ static void hclge_parse_capability(struct hclge_dev *hdev,
 		set_bit(HNAE3_DEV_SUPPORT_FD_FORWARD_TC_B, ae_dev->caps);
 	if (hnae3_get_bit(caps, HCLGE_CAP_FEC_B))
 		set_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps);
+	if (hnae3_get_bit(caps, HCLGE_CAP_PAUSE_B))
+		set_bit(HNAE3_DEV_SUPPORT_PAUSE_B, ae_dev->caps);
 }
 
 static __le32 hclge_build_api_caps(void)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index e8480ff680f4..774a9ad32752 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -385,6 +385,7 @@ enum HCLGE_CAP_BITS {
 	HCLGE_CAP_STASH_B,
 	HCLGE_CAP_UDP_TUNNEL_CSUM_B,
 	HCLGE_CAP_FEC_B = 13,
+	HCLGE_CAP_PAUSE_B = 14,
 };
 
 enum HCLGE_API_CAP_BITS {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 0e0a16c6fb9e..26269d66e210 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1150,8 +1150,10 @@ static void hclge_parse_fiber_link_mode(struct hclge_dev *hdev,
 	if (hnae3_dev_fec_supported(hdev))
 		hclge_convert_setting_fec(mac);
 
+	if (hnae3_dev_pause_supported(hdev))
+		linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mac->supported);
+
 	linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, mac->supported);
-	linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mac->supported);
 	linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, mac->supported);
 }
 
@@ -1163,8 +1165,11 @@ static void hclge_parse_backplane_link_mode(struct hclge_dev *hdev,
 	hclge_convert_setting_kr(mac, speed_ability);
 	if (hnae3_dev_fec_supported(hdev))
 		hclge_convert_setting_fec(mac);
+
+	if (hnae3_dev_pause_supported(hdev))
+		linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mac->supported);
+
 	linkmode_set_bit(ETHTOOL_LINK_MODE_Backplane_BIT, mac->supported);
-	linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mac->supported);
 	linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, mac->supported);
 }
 
@@ -1193,10 +1198,13 @@ static void hclge_parse_copper_link_mode(struct hclge_dev *hdev,
 		linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, supported);
 	}
 
+	if (hnae3_dev_pause_supported(hdev)) {
+		linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported);
+		linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, supported);
+	}
+
 	linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, supported);
 	linkmode_set_bit(ETHTOOL_LINK_MODE_TP_BIT, supported);
-	linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported);
-	linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, supported);
 }
 
 static void hclge_parse_link_mode(struct hclge_dev *hdev, u16 speed_ability)
-- 
cgit v1.2.3


From ee90c6ba341f7f72858196f15912c8b6b7d032e8 Mon Sep 17 00:00:00 2001
From: Julien Massonneau <julien.massonneau@6wind.com>
Date: Thu, 11 Mar 2021 16:53:18 +0100
Subject: seg6: add support for IPv4 decapsulation in ipv6_srh_rcv()

As specified in IETF RFC 8754, section 4.3.1.2, if the upper layer
header is IPv4 or IPv6, perform IPv6 decapsulation and resubmit the
decapsulated packet to the IPv4 or IPv6 module.
Only IPv6 decapsulation was implemented. This patch adds support for IPv4
decapsulation.

Link: https://tools.ietf.org/html/rfc8754#section-4.3.1.2
Signed-off-by: Julien Massonneau <julien.massonneau@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 1 +
 net/ipv6/exthdrs.c | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index bd1f396cc9c7..448bf2b34759 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -30,6 +30,7 @@
  */
 
 #define NEXTHDR_HOP		0	/* Hop-by-hop option header. */
+#define NEXTHDR_IPV4		4	/* IPv4 in IPv6 */
 #define NEXTHDR_TCP		6	/* TCP segment. */
 #define NEXTHDR_UDP		17	/* UDP message. */
 #define NEXTHDR_IPV6		41	/* IPv6 in IPv6 */
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 6126f8bf94b3..56e479d158b7 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -381,7 +381,7 @@ static int ipv6_srh_rcv(struct sk_buff *skb)
 
 looped_back:
 	if (hdr->segments_left == 0) {
-		if (hdr->nexthdr == NEXTHDR_IPV6) {
+		if (hdr->nexthdr == NEXTHDR_IPV6 || hdr->nexthdr == NEXTHDR_IPV4) {
 			int offset = (hdr->hdrlen + 1) << 3;
 
 			skb_postpull_rcsum(skb, skb_network_header(skb),
@@ -397,7 +397,8 @@ looped_back:
 			skb_reset_network_header(skb);
 			skb_reset_transport_header(skb);
 			skb->encapsulation = 0;
-
+			if (hdr->nexthdr == NEXTHDR_IPV4)
+				skb->protocol = htons(ETH_P_IP);
 			__skb_tunnel_rx(skb, skb->dev, net);
 
 			netif_rx(skb);
-- 
cgit v1.2.3


From fbbc5bc2ab8c525f4aba4346249e3adc52d8e2c0 Mon Sep 17 00:00:00 2001
From: Julien Massonneau <julien.massonneau@6wind.com>
Date: Thu, 11 Mar 2021 16:53:19 +0100
Subject: seg6: ignore routing header with segments left equal to 0

When there are 2 segments routing header, after an End.B6 action
for example, the second SRH will never be handled by an action, packet will
be dropped when the first SRH has segments left equal to 0.
For actions that doesn't perform decapsulation (currently: End, End.X,
End.T, End.B6, End.B6.Encaps), this patch adds the IP6_FH_F_SKIP_RH flag
in arguments for ipv6_find_hdr().

Signed-off-by: Julien Massonneau <julien.massonneau@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/seg6_local.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index c2a0c78e84d4..8936f48570fc 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -119,12 +119,12 @@ static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt)
 	return (struct seg6_local_lwt *)lwt->data;
 }
 
-static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb)
+static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb, int flags)
 {
 	struct ipv6_sr_hdr *srh;
 	int len, srhoff = 0;
 
-	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, &flags) < 0)
 		return NULL;
 
 	if (!pskb_may_pull(skb, srhoff + sizeof(*srh)))
@@ -152,13 +152,10 @@ static struct ipv6_sr_hdr *get_and_validate_srh(struct sk_buff *skb)
 {
 	struct ipv6_sr_hdr *srh;
 
-	srh = get_srh(skb);
+	srh = get_srh(skb, IP6_FH_F_SKIP_RH);
 	if (!srh)
 		return NULL;
 
-	if (srh->segments_left == 0)
-		return NULL;
-
 #ifdef CONFIG_IPV6_SEG6_HMAC
 	if (!seg6_hmac_validate_skb(skb))
 		return NULL;
@@ -172,7 +169,7 @@ static bool decap_and_validate(struct sk_buff *skb, int proto)
 	struct ipv6_sr_hdr *srh;
 	unsigned int off = 0;
 
-	srh = get_srh(skb);
+	srh = get_srh(skb, 0);
 	if (srh && srh->segments_left > 0)
 		return false;
 
-- 
cgit v1.2.3


From 597f48e46b6e9142d30eaa5881c1b98338f092db Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Thu, 11 Mar 2021 19:03:12 +0100
Subject: nexthop: Pass nh_config to replace_nexthop()

Currently, replace assumes that the new group that is given is a
fully-formed object. But mpath groups really only have one attribute, and
that is the constituent next hop configuration. This may not be universally
true. From the usability perspective, it is desirable to allow the replace
operation to adjust just the constituent next hop configuration and leave
the group attributes as such intact.

But the object that keeps track of whether an attribute was or was not
given is the nh_config object, not the next hop or next-hop group. To allow
(selective) attribute updates during NH group replacement, propagate `cfg'
to replace_nexthop() and further to replace_nexthop_grp().

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/nexthop.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 743777bce179..f723dc97dcd3 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1107,7 +1107,7 @@ static void nh_rt_cache_flush(struct net *net, struct nexthop *nh)
 }
 
 static int replace_nexthop_grp(struct net *net, struct nexthop *old,
-			       struct nexthop *new,
+			       struct nexthop *new, const struct nh_config *cfg,
 			       struct netlink_ext_ack *extack)
 {
 	struct nh_group *oldg, *newg;
@@ -1276,7 +1276,8 @@ static void nexthop_replace_notify(struct net *net, struct nexthop *nh,
 }
 
 static int replace_nexthop(struct net *net, struct nexthop *old,
-			   struct nexthop *new, struct netlink_ext_ack *extack)
+			   struct nexthop *new, const struct nh_config *cfg,
+			   struct netlink_ext_ack *extack)
 {
 	bool new_is_reject = false;
 	struct nh_grp_entry *nhge;
@@ -1319,7 +1320,7 @@ static int replace_nexthop(struct net *net, struct nexthop *old,
 	}
 
 	if (old->is_group)
-		err = replace_nexthop_grp(net, old, new, extack);
+		err = replace_nexthop_grp(net, old, new, cfg, extack);
 	else
 		err = replace_nexthop_single(net, old, new, extack);
 
@@ -1361,7 +1362,7 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh,
 		} else if (new_id > nh->id) {
 			pp = &next->rb_right;
 		} else if (replace) {
-			rc = replace_nexthop(net, nh, new_nh, extack);
+			rc = replace_nexthop(net, nh, new_nh, cfg, extack);
 			if (!rc) {
 				new_nh = nh; /* send notification with old nh */
 				replace_notify = 1;
-- 
cgit v1.2.3


From 96a856256a43b88202f2ac8933092940146102ed Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Thu, 11 Mar 2021 19:03:13 +0100
Subject: nexthop: __nh_notifier_single_info_init(): Make nh_info an argument

The cited function currently uses rtnl_dereference() to get nh_info from a
handed-in nexthop. However, under the resilient hashing scheme, this
function will not always be called under RTNL, sometimes the mutual
exclusion will be achieved differently. Therefore move the nh_info
extraction from the function to its callers to make it possible to use a
different synchronization guarantee.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/nexthop.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index f723dc97dcd3..69c8b50a936e 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -52,10 +52,8 @@ static bool nexthop_notifiers_is_empty(struct net *net)
 
 static void
 __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info,
-			       const struct nexthop *nh)
+			       const struct nh_info *nhi)
 {
-	struct nh_info *nhi = rtnl_dereference(nh->nh_info);
-
 	nh_info->dev = nhi->fib_nhc.nhc_dev;
 	nh_info->gw_family = nhi->fib_nhc.nhc_gw_family;
 	if (nh_info->gw_family == AF_INET)
@@ -71,12 +69,14 @@ __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info,
 static int nh_notifier_single_info_init(struct nh_notifier_info *info,
 					const struct nexthop *nh)
 {
+	struct nh_info *nhi = rtnl_dereference(nh->nh_info);
+
 	info->type = NH_NOTIFIER_INFO_TYPE_SINGLE;
 	info->nh = kzalloc(sizeof(*info->nh), GFP_KERNEL);
 	if (!info->nh)
 		return -ENOMEM;
 
-	__nh_notifier_single_info_init(info->nh, nh);
+	__nh_notifier_single_info_init(info->nh, nhi);
 
 	return 0;
 }
@@ -103,11 +103,13 @@ static int nh_notifier_mp_info_init(struct nh_notifier_info *info,
 
 	for (i = 0; i < num_nh; i++) {
 		struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+		struct nh_info *nhi;
 
+		nhi = rtnl_dereference(nhge->nh->nh_info);
 		info->nh_grp->nh_entries[i].id = nhge->nh->id;
 		info->nh_grp->nh_entries[i].weight = nhge->weight;
 		__nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh,
-					       nhge->nh);
+					       nhi);
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 90e1a9e21326887fe0aef6c25ad36464953a961e Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Thu, 11 Mar 2021 19:03:14 +0100
Subject: nexthop: Add a dedicated flag for multipath next-hop groups

With the introduction of resilient nexthop groups, there will be two types
of multipath groups: the current hash-threshold "mpath" ones, and resilient
groups. Both are multipath, but to determine the fact, the system needs to
consider two flags. This might prove costly in the datapath. Therefore,
introduce a new flag, that should be set for next-hop groups that have more
than one nexthop, and should be considered multipath.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/nexthop.h | 7 ++++---
 net/ipv4/nexthop.c    | 5 ++++-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index 7bc057aee40b..5062c2c08e2b 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -80,6 +80,7 @@ struct nh_grp_entry {
 struct nh_group {
 	struct nh_group		*spare; /* spare group for removals */
 	u16			num_nh;
+	bool			is_multipath;
 	bool			mpath;
 	bool			fdb_nh;
 	bool			has_v4;
@@ -212,7 +213,7 @@ static inline bool nexthop_is_multipath(const struct nexthop *nh)
 		struct nh_group *nh_grp;
 
 		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
-		return nh_grp->mpath;
+		return nh_grp->is_multipath;
 	}
 	return false;
 }
@@ -227,7 +228,7 @@ static inline unsigned int nexthop_num_path(const struct nexthop *nh)
 		struct nh_group *nh_grp;
 
 		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
-		if (nh_grp->mpath)
+		if (nh_grp->is_multipath)
 			rc = nh_grp->num_nh;
 	}
 
@@ -308,7 +309,7 @@ struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel)
 		struct nh_group *nh_grp;
 
 		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
-		if (nh_grp->mpath) {
+		if (nh_grp->is_multipath) {
 			nh = nexthop_mpath_select(nh_grp, nhsel);
 			if (!nh)
 				return NULL;
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 69c8b50a936e..56c54d0fbacc 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -967,6 +967,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
 	}
 
 	newg->has_v4 = false;
+	newg->is_multipath = nhg->is_multipath;
 	newg->mpath = nhg->mpath;
 	newg->fdb_nh = nhg->fdb_nh;
 	newg->num_nh = nhg->num_nh;
@@ -1488,8 +1489,10 @@ static struct nexthop *nexthop_create_group(struct net *net,
 		nhg->nh_entries[i].nh_parent = nh;
 	}
 
-	if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH)
+	if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) {
 		nhg->mpath = 1;
+		nhg->is_multipath = true;
+	}
 
 	WARN_ON_ONCE(nhg->mpath != 1);
 
-- 
cgit v1.2.3


From 710ec5622306de8c071637ee41ddf4c9bd17e75a Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 11 Mar 2021 19:03:15 +0100
Subject: nexthop: Add netlink defines and enumerators for resilient NH groups

- RTM_NEWNEXTHOP et.al. that handle resilient groups will have a new nested
  attribute, NHA_RES_GROUP, whose elements are attributes NHA_RES_GROUP_*.

- RTM_NEWNEXTHOPBUCKET et.al. is a suite of new messages that will
  currently serve only for dumping of individual buckets of resilient next
  hop groups. For nexthop group buckets, these messages will carry a nested
  attribute NHA_RES_BUCKET, whose elements are attributes NHA_RES_BUCKET_*.

  There are several reasons why a new suite of messages is created for
  nexthop buckets instead of overloading the information on the existing
  RTM_{NEW,DEL,GET}NEXTHOP messages.

  First, a nexthop group can contain a large number of nexthop buckets (4k
  is not unheard of). This imposes limits on the amount of information that
  can be encoded for each nexthop bucket given a netlink message is limited
  to 64k bytes.

  Second, while RTM_NEWNEXTHOPBUCKET is only used for notifications at
  this point, in the future it can be extended to provide user space with
  control over nexthop buckets configuration.

- The new group type is NEXTHOP_GRP_TYPE_RES. Note that nexthop code is
  adjusted to bounce groups with that type for now.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/nexthop.h   | 47 +++++++++++++++++++++++++++++++++++++++++-
 include/uapi/linux/rtnetlink.h |  7 +++++++
 net/ipv4/nexthop.c             |  2 ++
 security/selinux/nlmsgtab.c    |  5 ++++-
 4 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h
index 2d4a1e784cf0..d8ffa8c9ca78 100644
--- a/include/uapi/linux/nexthop.h
+++ b/include/uapi/linux/nexthop.h
@@ -21,7 +21,10 @@ struct nexthop_grp {
 };
 
 enum {
-	NEXTHOP_GRP_TYPE_MPATH,  /* default type if not specified */
+	NEXTHOP_GRP_TYPE_MPATH,  /* hash-threshold nexthop group
+				  * default type if not specified
+				  */
+	NEXTHOP_GRP_TYPE_RES,    /* resilient nexthop group */
 	__NEXTHOP_GRP_TYPE_MAX,
 };
 
@@ -52,8 +55,50 @@ enum {
 	NHA_FDB,	/* flag; nexthop belongs to a bridge fdb */
 	/* if NHA_FDB is added, OIF, BLACKHOLE, ENCAP cannot be set */
 
+	/* nested; resilient nexthop group attributes */
+	NHA_RES_GROUP,
+	/* nested; nexthop bucket attributes */
+	NHA_RES_BUCKET,
+
 	__NHA_MAX,
 };
 
 #define NHA_MAX	(__NHA_MAX - 1)
+
+enum {
+	NHA_RES_GROUP_UNSPEC,
+	/* Pad attribute for 64-bit alignment. */
+	NHA_RES_GROUP_PAD = NHA_RES_GROUP_UNSPEC,
+
+	/* u16; number of nexthop buckets in a resilient nexthop group */
+	NHA_RES_GROUP_BUCKETS,
+	/* clock_t as u32; nexthop bucket idle timer (per-group) */
+	NHA_RES_GROUP_IDLE_TIMER,
+	/* clock_t as u32; nexthop unbalanced timer */
+	NHA_RES_GROUP_UNBALANCED_TIMER,
+	/* clock_t as u64; nexthop unbalanced time */
+	NHA_RES_GROUP_UNBALANCED_TIME,
+
+	__NHA_RES_GROUP_MAX,
+};
+
+#define NHA_RES_GROUP_MAX	(__NHA_RES_GROUP_MAX - 1)
+
+enum {
+	NHA_RES_BUCKET_UNSPEC,
+	/* Pad attribute for 64-bit alignment. */
+	NHA_RES_BUCKET_PAD = NHA_RES_BUCKET_UNSPEC,
+
+	/* u16; nexthop bucket index */
+	NHA_RES_BUCKET_INDEX,
+	/* clock_t as u64; nexthop bucket idle time */
+	NHA_RES_BUCKET_IDLE_TIME,
+	/* u32; nexthop id assigned to the nexthop bucket */
+	NHA_RES_BUCKET_NH_ID,
+
+	__NHA_RES_BUCKET_MAX,
+};
+
+#define NHA_RES_BUCKET_MAX	(__NHA_RES_BUCKET_MAX - 1)
+
 #endif
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 91e4ca064d61..d35953bc7d53 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -178,6 +178,13 @@ enum {
 	RTM_GETVLAN,
 #define RTM_GETVLAN	RTM_GETVLAN
 
+	RTM_NEWNEXTHOPBUCKET = 116,
+#define RTM_NEWNEXTHOPBUCKET	RTM_NEWNEXTHOPBUCKET
+	RTM_DELNEXTHOPBUCKET,
+#define RTM_DELNEXTHOPBUCKET	RTM_DELNEXTHOPBUCKET
+	RTM_GETNEXTHOPBUCKET,
+#define RTM_GETNEXTHOPBUCKET	RTM_GETNEXTHOPBUCKET
+
 	__RTM_MAX,
 #define RTM_MAX		(((__RTM_MAX + 3) & ~3) - 1)
 };
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 56c54d0fbacc..7a94591da856 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1492,6 +1492,8 @@ static struct nexthop *nexthop_create_group(struct net *net,
 	if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) {
 		nhg->mpath = 1;
 		nhg->is_multipath = true;
+	} else if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_RES) {
+		goto out_no_nh;
 	}
 
 	WARN_ON_ONCE(nhg->mpath != 1);
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index b69231918686..d59276f48d4f 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -88,6 +88,9 @@ static const struct nlmsg_perm nlmsg_route_perms[] =
 	{ RTM_NEWVLAN,		NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
 	{ RTM_DELVLAN,		NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
 	{ RTM_GETVLAN,		NETLINK_ROUTE_SOCKET__NLMSG_READ  },
+	{ RTM_NEWNEXTHOPBUCKET,	NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
+	{ RTM_DELNEXTHOPBUCKET,	NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
+	{ RTM_GETNEXTHOPBUCKET,	NETLINK_ROUTE_SOCKET__NLMSG_READ  },
 };
 
 static const struct nlmsg_perm nlmsg_tcpdiag_perms[] =
@@ -171,7 +174,7 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm)
 		 * structures at the top of this file with the new mappings
 		 * before updating the BUILD_BUG_ON() macro!
 		 */
-		BUILD_BUG_ON(RTM_MAX != (RTM_NEWVLAN + 3));
+		BUILD_BUG_ON(RTM_MAX != (RTM_NEWNEXTHOPBUCKET + 3));
 		err = nlmsg_perm(nlmsg_type, perm, nlmsg_route_perms,
 				 sizeof(nlmsg_route_perms));
 		break;
-- 
cgit v1.2.3


From 283a72a5599e80750699d2021830a294ed9ab3f3 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Thu, 11 Mar 2021 19:03:16 +0100
Subject: nexthop: Add implementation of resilient next-hop groups

At this moment, there is only one type of next-hop group: an mpath group,
which implements the hash-threshold algorithm.

To select a next hop, hash-threshold algorithm first assigns a range of
hashes to each next hop in the group, and then selects the next hop by
comparing the SKB hash with the individual ranges. When a next hop is
removed from the group, the ranges are recomputed, which leads to
reassignment of parts of hash space from one next hop to another. While
there will usually be some overlap between the previous and the new
distribution, some traffic flows change the next hop that they resolve to.
That causes problems e.g. as established TCP connections are reset, because
the traffic is forwarded to a server that is not familiar with the
connection.

Resilient hashing is a technique to address the above problem. Resilient
next-hop group has another layer of indirection between the group itself
and its constituent next hops: a hash table. The selection algorithm uses a
straightforward modulo operation to choose a hash bucket, and then reads
the next hop that this bucket contains, and forwards traffic there.

This indirection brings an important feature. In the hash-threshold
algorithm, the range of hashes associated with a next hop must be
continuous. With a hash table, mapping between the hash table buckets and
the individual next hops is arbitrary. Therefore when a next hop is deleted
the buckets that held it are simply reassigned to other next hops. When
weights of next hops in a group are altered, it may be possible to choose a
subset of buckets that are currently not used for forwarding traffic, and
use those to satisfy the new next-hop distribution demands, keeping the
"busy" buckets intact. This way, established flows are ideally kept being
forwarded to the same endpoints through the same paths as before the
next-hop group change.

In a nutshell, the algorithm works as follows. Each next hop has a number
of buckets that it wants to have, according to its weight and the number of
buckets in the hash table. In case of an event that might cause bucket
allocation change, the numbers for individual next hops are updated,
similarly to how ranges are updated for mpath group next hops. Following
that, a new "upkeep" algorithm runs, and for idle buckets that belong to a
next hop that is currently occupying more buckets than it wants (it is
"overweight"), it migrates the buckets to one of the next hops that has
fewer buckets than it wants (it is "underweight"). If, after this, there
are still underweight next hops, another upkeep run is scheduled to a
future time.

Chances are there are not enough "idle" buckets to satisfy the new demands.
The algorithm has knobs to select both what it means for a bucket to be
idle, and for whether and when to forcefully migrate buckets if there keeps
being an insufficient number of idle buckets.

There are three users of the resilient data structures.

- The forwarding code accesses them under RCU, and does not modify them
  except for updating the time a selected bucket was last used.

- Netlink code, running under RTNL, which may modify the data.

- The delayed upkeep code, which may modify the data. This runs unlocked,
  and mutual exclusion between the RTNL code and the delayed upkeep is
  maintained by canceling the delayed work synchronously before the RTNL
  code touches anything. Later it restarts the delayed work if necessary.

The RTNL code has to implement next-hop group replacement, next hop
removal, etc. For removal, the mpath code uses a neat trick of having a
backup next hop group structure, doing the necessary changes offline, and
then RCU-swapping them in. However, the hash tables for resilient hashing
are about an order of magnitude larger than the groups themselves (the size
might be e.g. 4K entries), and it was felt that keeping two of them is an
overkill. Both the primary next-hop group and the spare therefore use the
same resilient table, and writers are careful to keep all references valid
for the forwarding code. The hash table references next-hop group entries
from the next-hop group that is currently in the primary role (i.e. not
spare). During the transition from primary to spare, the table references a
mix of both the primary group and the spare. When a next hop is deleted,
the corresponding buckets are not set to NULL, but instead marked as empty,
so that the pointer is valid and can be used by the forwarding code. The
buckets are then migrated to a new next-hop group entry during upkeep. The
only times that the hash table is invalid is the very beginning and very
end of its lifetime. Between those points, it is always kept valid.

This patch introduces the core support code itself. It does not handle
notifications towards drivers, which are kept as if the group were an mpath
one. It does not handle netlink either. The only bit currently exposed to
user space is the new next-hop group type, and that is currently bounced.
There is therefore no way to actually access this code.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/nexthop.h |  42 ++++
 net/ipv4/nexthop.c    | 517 ++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 546 insertions(+), 13 deletions(-)

diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index 5062c2c08e2b..b78505c9031e 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -40,6 +40,12 @@ struct nh_config {
 
 	struct nlattr	*nh_grp;
 	u16		nh_grp_type;
+	u16		nh_grp_res_num_buckets;
+	unsigned long	nh_grp_res_idle_timer;
+	unsigned long	nh_grp_res_unbalanced_timer;
+	bool		nh_grp_res_has_num_buckets;
+	bool		nh_grp_res_has_idle_timer;
+	bool		nh_grp_res_has_unbalanced_timer;
 
 	struct nlattr	*nh_encap;
 	u16		nh_encap_type;
@@ -63,6 +69,32 @@ struct nh_info {
 	};
 };
 
+struct nh_res_bucket {
+	struct nh_grp_entry __rcu *nh_entry;
+	atomic_long_t		used_time;
+	unsigned long		migrated_time;
+	bool			occupied;
+	u8			nh_flags;
+};
+
+struct nh_res_table {
+	struct net		*net;
+	u32			nhg_id;
+	struct delayed_work	upkeep_dw;
+
+	/* List of NHGEs that have too few buckets ("uw" for underweight).
+	 * Reclaimed buckets will be given to entries in this list.
+	 */
+	struct list_head	uw_nh_entries;
+	unsigned long		unbalanced_since;
+
+	u32			idle_timer;
+	u32			unbalanced_timer;
+
+	u16			num_nh_buckets;
+	struct nh_res_bucket	nh_buckets[];
+};
+
 struct nh_grp_entry {
 	struct nexthop	*nh;
 	u8		weight;
@@ -71,6 +103,13 @@ struct nh_grp_entry {
 		struct {
 			atomic_t	upper_bound;
 		} mpath;
+		struct {
+			/* Member on uw_nh_entries. */
+			struct list_head	uw_nh_entry;
+
+			u16			count_buckets;
+			u16			wants_buckets;
+		} res;
 	};
 
 	struct list_head nh_list;
@@ -82,8 +121,11 @@ struct nh_group {
 	u16			num_nh;
 	bool			is_multipath;
 	bool			mpath;
+	bool			resilient;
 	bool			fdb_nh;
 	bool			has_v4;
+
+	struct nh_res_table __rcu *res_table;
 	struct nh_grp_entry	nh_entries[];
 };
 
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 7a94591da856..0e2ff72e10c0 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -183,6 +183,30 @@ static int call_nexthop_notifiers(struct net *net,
 	return notifier_to_errno(err);
 }
 
+/* There are three users of RES_TABLE, and NHs etc. referenced from there:
+ *
+ * 1) a collection of callbacks for NH maintenance. This operates under
+ *    RTNL,
+ * 2) the delayed work that gradually balances the resilient table,
+ * 3) and nexthop_select_path(), operating under RCU.
+ *
+ * Both the delayed work and the RTNL block are writers, and need to
+ * maintain mutual exclusion. Since there are only two and well-known
+ * writers for each table, the RTNL code can make sure it has exclusive
+ * access thus:
+ *
+ * - Have the DW operate without locking;
+ * - synchronously cancel the DW;
+ * - do the writing;
+ * - if the write was not actually a delete, call upkeep, which schedules
+ *   DW again if necessary.
+ *
+ * The functions that are always called from the RTNL context use
+ * rtnl_dereference(). The functions that can also be called from the DW do
+ * a raw dereference and rely on the above mutual exclusion scheme.
+ */
+#define nh_res_dereference(p) (rcu_dereference_raw(p))
+
 static int call_nexthop_notifier(struct notifier_block *nb, struct net *net,
 				 enum nexthop_event_type event_type,
 				 struct nexthop *nh,
@@ -241,6 +265,9 @@ static void nexthop_free_group(struct nexthop *nh)
 
 	WARN_ON(nhg->spare == nhg);
 
+	if (nhg->resilient)
+		vfree(rcu_dereference_raw(nhg->res_table));
+
 	kfree(nhg->spare);
 	kfree(nhg);
 }
@@ -299,6 +326,30 @@ static struct nh_group *nexthop_grp_alloc(u16 num_nh)
 	return nhg;
 }
 
+static void nh_res_table_upkeep_dw(struct work_struct *work);
+
+static struct nh_res_table *
+nexthop_res_table_alloc(struct net *net, u32 nhg_id, struct nh_config *cfg)
+{
+	const u16 num_nh_buckets = cfg->nh_grp_res_num_buckets;
+	struct nh_res_table *res_table;
+	unsigned long size;
+
+	size = struct_size(res_table, nh_buckets, num_nh_buckets);
+	res_table = __vmalloc(size, GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN);
+	if (!res_table)
+		return NULL;
+
+	res_table->net = net;
+	res_table->nhg_id = nhg_id;
+	INIT_DELAYED_WORK(&res_table->upkeep_dw, &nh_res_table_upkeep_dw);
+	INIT_LIST_HEAD(&res_table->uw_nh_entries);
+	res_table->idle_timer = cfg->nh_grp_res_idle_timer;
+	res_table->unbalanced_timer = cfg->nh_grp_res_unbalanced_timer;
+	res_table->num_nh_buckets = num_nh_buckets;
+	return res_table;
+}
+
 static void nh_base_seq_inc(struct net *net)
 {
 	while (++net->nexthop.seq == 0)
@@ -347,6 +398,13 @@ static u32 nh_find_unused_id(struct net *net)
 	return 0;
 }
 
+static void nh_res_time_set_deadline(unsigned long next_time,
+				     unsigned long *deadline)
+{
+	if (time_before(next_time, *deadline))
+		*deadline = next_time;
+}
+
 static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
 {
 	struct nexthop_grp *p;
@@ -540,20 +598,62 @@ errout:
 		rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err);
 }
 
+static unsigned long nh_res_bucket_used_time(const struct nh_res_bucket *bucket)
+{
+	return (unsigned long)atomic_long_read(&bucket->used_time);
+}
+
+static unsigned long
+nh_res_bucket_idle_point(const struct nh_res_table *res_table,
+			 const struct nh_res_bucket *bucket,
+			 unsigned long now)
+{
+	unsigned long time = nh_res_bucket_used_time(bucket);
+
+	/* Bucket was not used since it was migrated. The idle time is now. */
+	if (time == bucket->migrated_time)
+		return now;
+
+	return time + res_table->idle_timer;
+}
+
+static unsigned long
+nh_res_table_unb_point(const struct nh_res_table *res_table)
+{
+	return res_table->unbalanced_since + res_table->unbalanced_timer;
+}
+
+static void nh_res_bucket_set_idle(const struct nh_res_table *res_table,
+				   struct nh_res_bucket *bucket)
+{
+	unsigned long now = jiffies;
+
+	atomic_long_set(&bucket->used_time, (long)now);
+	bucket->migrated_time = now;
+}
+
+static void nh_res_bucket_set_busy(struct nh_res_bucket *bucket)
+{
+	atomic_long_set(&bucket->used_time, (long)jiffies);
+}
+
 static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
 			   bool *is_fdb, struct netlink_ext_ack *extack)
 {
 	if (nh->is_group) {
 		struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
 
-		/* nested multipath (group within a group) is not
-		 * supported
-		 */
+		/* Nesting groups within groups is not supported. */
 		if (nhg->mpath) {
 			NL_SET_ERR_MSG(extack,
 				       "Multipath group can not be a nexthop within a group");
 			return false;
 		}
+		if (nhg->resilient) {
+			NL_SET_ERR_MSG(extack,
+				       "Resilient group can not be a nexthop within a group");
+			return false;
+		}
 		*is_fdb = nhg->fdb_nh;
 	} else {
 		struct nh_info *nhi = rtnl_dereference(nh->nh_info);
@@ -734,6 +834,22 @@ static struct nexthop *nexthop_select_path_mp(struct nh_group *nhg, int hash)
 	return rc;
 }
 
+static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash)
+{
+	struct nh_res_table *res_table = rcu_dereference(nhg->res_table);
+	u16 bucket_index = hash % res_table->num_nh_buckets;
+	struct nh_res_bucket *bucket;
+	struct nh_grp_entry *nhge;
+
+	/* nexthop_select_path() is expected to return a non-NULL value, so
+	 * skip protocol validation and just hand out whatever there is.
+	 */
+	bucket = &res_table->nh_buckets[bucket_index];
+	nh_res_bucket_set_busy(bucket);
+	nhge = rcu_dereference(bucket->nh_entry);
+	return nhge->nh;
+}
+
 struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
 {
 	struct nh_group *nhg;
@@ -744,6 +860,8 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
 	nhg = rcu_dereference(nh->nh_grp);
 	if (nhg->mpath)
 		return nexthop_select_path_mp(nhg, hash);
+	else if (nhg->resilient)
+		return nexthop_select_path_res(nhg, hash);
 
 	/* Unreachable. */
 	return NULL;
@@ -926,7 +1044,289 @@ static int fib_check_nh_list(struct nexthop *old, struct nexthop *new,
 	return 0;
 }
 
-static void nh_group_rebalance(struct nh_group *nhg)
+static bool nh_res_nhge_is_balanced(const struct nh_grp_entry *nhge)
+{
+	return nhge->res.count_buckets == nhge->res.wants_buckets;
+}
+
+static bool nh_res_nhge_is_ow(const struct nh_grp_entry *nhge)
+{
+	return nhge->res.count_buckets > nhge->res.wants_buckets;
+}
+
+static bool nh_res_nhge_is_uw(const struct nh_grp_entry *nhge)
+{
+	return nhge->res.count_buckets < nhge->res.wants_buckets;
+}
+
+static bool nh_res_table_is_balanced(const struct nh_res_table *res_table)
+{
+	return list_empty(&res_table->uw_nh_entries);
+}
+
+static void nh_res_bucket_unset_nh(struct nh_res_bucket *bucket)
+{
+	struct nh_grp_entry *nhge;
+
+	if (bucket->occupied) {
+		nhge = nh_res_dereference(bucket->nh_entry);
+		nhge->res.count_buckets--;
+		bucket->occupied = false;
+	}
+}
+
+static void nh_res_bucket_set_nh(struct nh_res_bucket *bucket,
+				 struct nh_grp_entry *nhge)
+{
+	nh_res_bucket_unset_nh(bucket);
+
+	bucket->occupied = true;
+	rcu_assign_pointer(bucket->nh_entry, nhge);
+	nhge->res.count_buckets++;
+}
+
+static bool nh_res_bucket_should_migrate(struct nh_res_table *res_table,
+					 struct nh_res_bucket *bucket,
+					 unsigned long *deadline, bool *force)
+{
+	unsigned long now = jiffies;
+	struct nh_grp_entry *nhge;
+	unsigned long idle_point;
+
+	if (!bucket->occupied) {
+		/* The bucket is not occupied, its NHGE pointer is either
+		 * NULL or obsolete. We _have to_ migrate: set force.
+		 */
+		*force = true;
+		return true;
+	}
+
+	nhge = nh_res_dereference(bucket->nh_entry);
+
+	/* If the bucket is populated by an underweight or balanced
+	 * nexthop, do not migrate.
+	 */
+	if (!nh_res_nhge_is_ow(nhge))
+		return false;
+
+	/* At this point we know that the bucket is populated with an
+	 * overweight nexthop. It needs to be migrated to a new nexthop if
+	 * the idle timer of unbalanced timer expired.
+	 */
+
+	idle_point = nh_res_bucket_idle_point(res_table, bucket, now);
+	if (time_after_eq(now, idle_point)) {
+		/* The bucket is idle. We _can_ migrate: unset force. */
+		*force = false;
+		return true;
+	}
+
+	/* Unbalanced timer of 0 means "never force". */
+	if (res_table->unbalanced_timer) {
+		unsigned long unb_point;
+
+		unb_point = nh_res_table_unb_point(res_table);
+		if (time_after(now, unb_point)) {
+			/* The bucket is not idle, but the unbalanced timer
+			 * expired. We _can_ migrate, but set force anyway,
+			 * so that drivers know to ignore activity reports
+			 * from the HW.
+			 */
+			*force = true;
+			return true;
+		}
+
+		nh_res_time_set_deadline(unb_point, deadline);
+	}
+
+	nh_res_time_set_deadline(idle_point, deadline);
+	return false;
+}
+
+static bool nh_res_bucket_migrate(struct nh_res_table *res_table,
+				  u16 bucket_index, bool force)
+{
+	struct nh_res_bucket *bucket = &res_table->nh_buckets[bucket_index];
+	struct nh_grp_entry *new_nhge;
+
+	new_nhge = list_first_entry_or_null(&res_table->uw_nh_entries,
+					    struct nh_grp_entry,
+					    res.uw_nh_entry);
+	if (WARN_ON_ONCE(!new_nhge))
+		/* If this function is called, "bucket" is either not
+		 * occupied, or it belongs to a next hop that is
+		 * overweight. In either case, there ought to be a
+		 * corresponding underweight next hop.
+		 */
+		return false;
+
+	nh_res_bucket_set_nh(bucket, new_nhge);
+	nh_res_bucket_set_idle(res_table, bucket);
+
+	if (nh_res_nhge_is_balanced(new_nhge))
+		list_del(&new_nhge->res.uw_nh_entry);
+	return true;
+}
+
+#define NH_RES_UPKEEP_DW_MINIMUM_INTERVAL (HZ / 2)
+
+static void nh_res_table_upkeep(struct nh_res_table *res_table)
+{
+	unsigned long now = jiffies;
+	unsigned long deadline;
+	u16 i;
+
+	/* Deadline is the next time that upkeep should be run. It is the
+	 * earliest time at which one of the buckets might be migrated.
+	 * Start at the most pessimistic estimate: either unbalanced_timer
+	 * from now, or if there is none, idle_timer from now. For each
+	 * encountered time point, call nh_res_time_set_deadline() to
+	 * refine the estimate.
+	 */
+	if (res_table->unbalanced_timer)
+		deadline = now + res_table->unbalanced_timer;
+	else
+		deadline = now + res_table->idle_timer;
+
+	for (i = 0; i < res_table->num_nh_buckets; i++) {
+		struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
+		bool force;
+
+		if (nh_res_bucket_should_migrate(res_table, bucket,
+						 &deadline, &force)) {
+			if (!nh_res_bucket_migrate(res_table, i, force)) {
+				unsigned long idle_point;
+
+				/* A driver can override the migration
+				 * decision if the HW reports that the
+				 * bucket is actually not idle. Therefore
+				 * remark the bucket as busy again and
+				 * update the deadline.
+				 */
+				nh_res_bucket_set_busy(bucket);
+				idle_point = nh_res_bucket_idle_point(res_table,
+								      bucket,
+								      now);
+				nh_res_time_set_deadline(idle_point, &deadline);
+			}
+		}
+	}
+
+	/* If the group is still unbalanced, schedule the next upkeep to
+	 * either the deadline computed above, or the minimum deadline,
+	 * whichever comes later.
+	 */
+	if (!nh_res_table_is_balanced(res_table)) {
+		unsigned long now = jiffies;
+		unsigned long min_deadline;
+
+		min_deadline = now + NH_RES_UPKEEP_DW_MINIMUM_INTERVAL;
+		if (time_before(deadline, min_deadline))
+			deadline = min_deadline;
+
+		queue_delayed_work(system_power_efficient_wq,
+				   &res_table->upkeep_dw, deadline - now);
+	}
+}
+
+static void nh_res_table_upkeep_dw(struct work_struct *work)
+{
+	struct delayed_work *dw = to_delayed_work(work);
+	struct nh_res_table *res_table;
+
+	res_table = container_of(dw, struct nh_res_table, upkeep_dw);
+	nh_res_table_upkeep(res_table);
+}
+
+static void nh_res_table_cancel_upkeep(struct nh_res_table *res_table)
+{
+	cancel_delayed_work_sync(&res_table->upkeep_dw);
+}
+
+static void nh_res_group_rebalance(struct nh_group *nhg,
+				   struct nh_res_table *res_table)
+{
+	int prev_upper_bound = 0;
+	int total = 0;
+	int w = 0;
+	int i;
+
+	INIT_LIST_HEAD(&res_table->uw_nh_entries);
+
+	for (i = 0; i < nhg->num_nh; ++i)
+		total += nhg->nh_entries[i].weight;
+
+	for (i = 0; i < nhg->num_nh; ++i) {
+		struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+		int upper_bound;
+
+		w += nhge->weight;
+		upper_bound = DIV_ROUND_CLOSEST(res_table->num_nh_buckets * w,
+						total);
+		nhge->res.wants_buckets = upper_bound - prev_upper_bound;
+		prev_upper_bound = upper_bound;
+
+		if (nh_res_nhge_is_uw(nhge)) {
+			if (list_empty(&res_table->uw_nh_entries))
+				res_table->unbalanced_since = jiffies;
+			list_add(&nhge->res.uw_nh_entry,
+				 &res_table->uw_nh_entries);
+		}
+	}
+}
+
+/* Migrate buckets in res_table so that they reference NHGE's from NHG with
+ * the right NH ID. Set those buckets that do not have a corresponding NHGE
+ * entry in NHG as not occupied.
+ */
+static void nh_res_table_migrate_buckets(struct nh_res_table *res_table,
+					 struct nh_group *nhg)
+{
+	u16 i;
+
+	for (i = 0; i < res_table->num_nh_buckets; i++) {
+		struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
+		u32 id = rtnl_dereference(bucket->nh_entry)->nh->id;
+		bool found = false;
+		int j;
+
+		for (j = 0; j < nhg->num_nh; j++) {
+			struct nh_grp_entry *nhge = &nhg->nh_entries[j];
+
+			if (nhge->nh->id == id) {
+				nh_res_bucket_set_nh(bucket, nhge);
+				found = true;
+				break;
+			}
+		}
+
+		if (!found)
+			nh_res_bucket_unset_nh(bucket);
+	}
+}
+
+static void replace_nexthop_grp_res(struct nh_group *oldg,
+				    struct nh_group *newg)
+{
+	/* For NH group replacement, the new NHG might only have a stub
+	 * hash table with 0 buckets, because the number of buckets was not
+	 * specified. For NH removal, oldg and newg both reference the same
+	 * res_table. So in any case, in the following, we want to work
+	 * with oldg->res_table.
+	 */
+	struct nh_res_table *old_res_table = rtnl_dereference(oldg->res_table);
+	unsigned long prev_unbalanced_since = old_res_table->unbalanced_since;
+	bool prev_has_uw = !list_empty(&old_res_table->uw_nh_entries);
+
+	nh_res_table_cancel_upkeep(old_res_table);
+	nh_res_table_migrate_buckets(old_res_table, newg);
+	nh_res_group_rebalance(newg, old_res_table);
+	if (prev_has_uw && !list_empty(&old_res_table->uw_nh_entries))
+		old_res_table->unbalanced_since = prev_unbalanced_since;
+	nh_res_table_upkeep(old_res_table);
+}
+
+static void nh_mp_group_rebalance(struct nh_group *nhg)
 {
 	int total = 0;
 	int w = 0;
@@ -969,6 +1369,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
 	newg->has_v4 = false;
 	newg->is_multipath = nhg->is_multipath;
 	newg->mpath = nhg->mpath;
+	newg->resilient = nhg->resilient;
 	newg->fdb_nh = nhg->fdb_nh;
 	newg->num_nh = nhg->num_nh;
 
@@ -996,7 +1397,11 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
 		j++;
 	}
 
-	nh_group_rebalance(newg);
+	if (newg->mpath)
+		nh_mp_group_rebalance(newg);
+	else if (newg->resilient)
+		replace_nexthop_grp_res(nhg, newg);
+
 	rcu_assign_pointer(nhp->nh_grp, newg);
 
 	list_del(&nhge->nh_list);
@@ -1025,6 +1430,7 @@ static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh,
 static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
 {
 	struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
+	struct nh_res_table *res_table;
 	int i, num_nh = nhg->num_nh;
 
 	for (i = 0; i < num_nh; ++i) {
@@ -1035,6 +1441,11 @@ static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
 
 		list_del_init(&nhge->nh_list);
 	}
+
+	if (nhg->resilient) {
+		res_table = rtnl_dereference(nhg->res_table);
+		nh_res_table_cancel_upkeep(res_table);
+	}
 }
 
 /* not called for nexthop replace */
@@ -1113,6 +1524,9 @@ static int replace_nexthop_grp(struct net *net, struct nexthop *old,
 			       struct nexthop *new, const struct nh_config *cfg,
 			       struct netlink_ext_ack *extack)
 {
+	struct nh_res_table *tmp_table = NULL;
+	struct nh_res_table *new_res_table;
+	struct nh_res_table *old_res_table;
 	struct nh_group *oldg, *newg;
 	int i, err;
 
@@ -1121,19 +1535,57 @@ static int replace_nexthop_grp(struct net *net, struct nexthop *old,
 		return -EINVAL;
 	}
 
-	err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack);
-	if (err)
-		return err;
-
 	oldg = rtnl_dereference(old->nh_grp);
 	newg = rtnl_dereference(new->nh_grp);
 
+	if (newg->mpath != oldg->mpath) {
+		NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with one of a different type.");
+		return -EINVAL;
+	}
+
+	if (newg->mpath) {
+		err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new,
+					     extack);
+		if (err)
+			return err;
+	} else if (newg->resilient) {
+		new_res_table = rtnl_dereference(newg->res_table);
+		old_res_table = rtnl_dereference(oldg->res_table);
+
+		/* Accept if num_nh_buckets was not given, but if it was
+		 * given, demand that the value be correct.
+		 */
+		if (cfg->nh_grp_res_has_num_buckets &&
+		    cfg->nh_grp_res_num_buckets !=
+		    old_res_table->num_nh_buckets) {
+			NL_SET_ERR_MSG(extack, "Can not change number of buckets of a resilient nexthop group.");
+			return -EINVAL;
+		}
+
+		if (cfg->nh_grp_res_has_idle_timer)
+			old_res_table->idle_timer = cfg->nh_grp_res_idle_timer;
+		if (cfg->nh_grp_res_has_unbalanced_timer)
+			old_res_table->unbalanced_timer =
+				cfg->nh_grp_res_unbalanced_timer;
+
+		replace_nexthop_grp_res(oldg, newg);
+
+		tmp_table = new_res_table;
+		rcu_assign_pointer(newg->res_table, old_res_table);
+		rcu_assign_pointer(newg->spare->res_table, old_res_table);
+	}
+
 	/* update parents - used by nexthop code for cleanup */
 	for (i = 0; i < newg->num_nh; i++)
 		newg->nh_entries[i].nh_parent = old;
 
 	rcu_assign_pointer(old->nh_grp, newg);
 
+	if (newg->resilient) {
+		rcu_assign_pointer(oldg->res_table, tmp_table);
+		rcu_assign_pointer(oldg->spare->res_table, tmp_table);
+	}
+
 	for (i = 0; i < oldg->num_nh; i++)
 		oldg->nh_entries[i].nh_parent = new;
 
@@ -1383,6 +1835,27 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh,
 		goto out;
 	}
 
+	if (new_nh->is_group) {
+		struct nh_group *nhg = rtnl_dereference(new_nh->nh_grp);
+		struct nh_res_table *res_table;
+
+		if (nhg->resilient) {
+			res_table = rtnl_dereference(nhg->res_table);
+
+			/* Not passing the number of buckets is OK when
+			 * replacing, but not when creating a new group.
+			 */
+			if (!cfg->nh_grp_res_has_num_buckets) {
+				NL_SET_ERR_MSG(extack, "Number of buckets not specified for nexthop group insertion");
+				rc = -EINVAL;
+				goto out;
+			}
+
+			nh_res_group_rebalance(nhg, res_table);
+			nh_res_table_upkeep(res_table);
+		}
+	}
+
 	rb_link_node_rcu(&new_nh->rb_node, parent, pp);
 	rb_insert_color(&new_nh->rb_node, root);
 
@@ -1445,6 +1918,7 @@ static struct nexthop *nexthop_create_group(struct net *net,
 	u16 num_nh = nla_len(grps_attr) / sizeof(*entry);
 	struct nh_group *nhg;
 	struct nexthop *nh;
+	int err;
 	int i;
 
 	if (WARN_ON(!num_nh))
@@ -1476,8 +1950,10 @@ static struct nexthop *nexthop_create_group(struct net *net,
 		struct nh_info *nhi;
 
 		nhe = nexthop_find_by_id(net, entry[i].id);
-		if (!nexthop_get(nhe))
+		if (!nexthop_get(nhe)) {
+			err = -ENOENT;
 			goto out_no_nh;
+		}
 
 		nhi = rtnl_dereference(nhe->nh_info);
 		if (nhi->family == AF_INET)
@@ -1493,13 +1969,28 @@ static struct nexthop *nexthop_create_group(struct net *net,
 		nhg->mpath = 1;
 		nhg->is_multipath = true;
 	} else if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_RES) {
+		struct nh_res_table *res_table;
+
+		/* Bounce resilient groups for now. */
+		err = -EINVAL;
 		goto out_no_nh;
+
+		res_table = nexthop_res_table_alloc(net, cfg->nh_id, cfg);
+		if (!res_table) {
+			err = -ENOMEM;
+			goto out_no_nh;
+		}
+
+		rcu_assign_pointer(nhg->spare->res_table, res_table);
+		rcu_assign_pointer(nhg->res_table, res_table);
+		nhg->resilient = true;
+		nhg->is_multipath = true;
 	}
 
-	WARN_ON_ONCE(nhg->mpath != 1);
+	WARN_ON_ONCE(nhg->mpath + nhg->resilient != 1);
 
 	if (nhg->mpath)
-		nh_group_rebalance(nhg);
+		nh_mp_group_rebalance(nhg);
 
 	if (cfg->nh_fdb)
 		nhg->fdb_nh = 1;
@@ -1518,7 +2009,7 @@ out_no_nh:
 	kfree(nhg);
 	kfree(nh);
 
-	return ERR_PTR(-ENOENT);
+	return ERR_PTR(err);
 }
 
 static int nh_create_ipv4(struct net *net, struct nexthop *nh,
-- 
cgit v1.2.3


From b8f090d0beb185007e5305f7c8aaf3f38fba3dda Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 11 Mar 2021 19:03:17 +0100
Subject: nexthop: Add data structures for resilient group notifications

Add data structures that will be used for in-kernel notifications about
addition / deletion of a resilient nexthop group and about changes to a
hash bucket within a resilient group.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/nexthop.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index b78505c9031e..fd3c0debe8bf 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -155,11 +155,15 @@ struct nexthop {
 enum nexthop_event_type {
 	NEXTHOP_EVENT_DEL,
 	NEXTHOP_EVENT_REPLACE,
+	NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE,
+	NEXTHOP_EVENT_BUCKET_REPLACE,
 };
 
 enum nh_notifier_info_type {
 	NH_NOTIFIER_INFO_TYPE_SINGLE,
 	NH_NOTIFIER_INFO_TYPE_GRP,
+	NH_NOTIFIER_INFO_TYPE_RES_TABLE,
+	NH_NOTIFIER_INFO_TYPE_RES_BUCKET,
 };
 
 struct nh_notifier_single_info {
@@ -186,6 +190,19 @@ struct nh_notifier_grp_info {
 	struct nh_notifier_grp_entry_info nh_entries[];
 };
 
+struct nh_notifier_res_bucket_info {
+	u16 bucket_index;
+	unsigned int idle_timer_ms;
+	bool force;
+	struct nh_notifier_single_info old_nh;
+	struct nh_notifier_single_info new_nh;
+};
+
+struct nh_notifier_res_table_info {
+	u16 num_nh_buckets;
+	struct nh_notifier_single_info nhs[];
+};
+
 struct nh_notifier_info {
 	struct net *net;
 	struct netlink_ext_ack *extack;
@@ -194,6 +211,8 @@ struct nh_notifier_info {
 	union {
 		struct nh_notifier_single_info *nh;
 		struct nh_notifier_grp_info *nh_grp;
+		struct nh_notifier_res_table_info *nh_res_table;
+		struct nh_notifier_res_bucket_info *nh_res_bucket;
 	};
 };
 
-- 
cgit v1.2.3


From 7c37c7e00411b3d1e0c5292368317aca69d1f324 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Thu, 11 Mar 2021 19:03:18 +0100
Subject: nexthop: Implement notifiers for resilient nexthop groups

Implement the following notifications towards drivers:

- NEXTHOP_EVENT_REPLACE, when a resilient nexthop group is created.

- NEXTHOP_EVENT_BUCKET_REPLACE any time there is a change in assignment of
  next hops to hash table buckets. That includes replacements, deletions,
  and delayed upkeep cycles. Some bucket notifications can be vetoed by the
  driver, to make it possible to propagate bucket busy-ness flags from the
  HW back to the algorithm. Some are however forced, e.g. if a next hop is
  deleted, all buckets that use this next hop simply must be migrated,
  whether the HW wishes so or not.

- NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE, before a resilient nexthop group is
  replaced. Usually the driver will get the bucket notifications as well,
  and could veto those. But in some cases, a bucket may not be migrated
  immediately, but during delayed upkeep, and that is too late to roll the
  transaction back. This notification allows the driver to take a look and
  veto the new proposed group up front, before anything is committed.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/nexthop.c | 320 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 308 insertions(+), 12 deletions(-)

diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 0e2ff72e10c0..8b06aafc2e9e 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -115,6 +115,37 @@ static int nh_notifier_mp_info_init(struct nh_notifier_info *info,
 	return 0;
 }
 
+static int nh_notifier_res_table_info_init(struct nh_notifier_info *info,
+					   struct nh_group *nhg)
+{
+	struct nh_res_table *res_table = rtnl_dereference(nhg->res_table);
+	u16 num_nh_buckets = res_table->num_nh_buckets;
+	unsigned long size;
+	u16 i;
+
+	info->type = NH_NOTIFIER_INFO_TYPE_RES_TABLE;
+	size = struct_size(info->nh_res_table, nhs, num_nh_buckets);
+	info->nh_res_table = __vmalloc(size, GFP_KERNEL | __GFP_ZERO |
+				       __GFP_NOWARN);
+	if (!info->nh_res_table)
+		return -ENOMEM;
+
+	info->nh_res_table->num_nh_buckets = num_nh_buckets;
+
+	for (i = 0; i < num_nh_buckets; i++) {
+		struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
+		struct nh_grp_entry *nhge;
+		struct nh_info *nhi;
+
+		nhge = rtnl_dereference(bucket->nh_entry);
+		nhi = rtnl_dereference(nhge->nh->nh_info);
+		__nh_notifier_single_info_init(&info->nh_res_table->nhs[i],
+					       nhi);
+	}
+
+	return 0;
+}
+
 static int nh_notifier_grp_info_init(struct nh_notifier_info *info,
 				     const struct nexthop *nh)
 {
@@ -122,6 +153,8 @@ static int nh_notifier_grp_info_init(struct nh_notifier_info *info,
 
 	if (nhg->mpath)
 		return nh_notifier_mp_info_init(info, nhg);
+	else if (nhg->resilient)
+		return nh_notifier_res_table_info_init(info, nhg);
 	return -EINVAL;
 }
 
@@ -132,6 +165,8 @@ static void nh_notifier_grp_info_fini(struct nh_notifier_info *info,
 
 	if (nhg->mpath)
 		kfree(info->nh_grp);
+	else if (nhg->resilient)
+		vfree(info->nh_res_table);
 }
 
 static int nh_notifier_info_init(struct nh_notifier_info *info,
@@ -183,6 +218,107 @@ static int call_nexthop_notifiers(struct net *net,
 	return notifier_to_errno(err);
 }
 
+static int
+nh_notifier_res_bucket_idle_timer_get(const struct nh_notifier_info *info,
+				      bool force, unsigned int *p_idle_timer_ms)
+{
+	struct nh_res_table *res_table;
+	struct nh_group *nhg;
+	struct nexthop *nh;
+	int err = 0;
+
+	/* When 'force' is false, nexthop bucket replacement is performed
+	 * because the bucket was deemed to be idle. In this case, capable
+	 * listeners can choose to perform an atomic replacement: The bucket is
+	 * only replaced if it is inactive. However, if the idle timer interval
+	 * is smaller than the interval in which a listener is querying
+	 * buckets' activity from the device, then atomic replacement should
+	 * not be tried. Pass the idle timer value to listeners, so that they
+	 * could determine which type of replacement to perform.
+	 */
+	if (force) {
+		*p_idle_timer_ms = 0;
+		return 0;
+	}
+
+	rcu_read_lock();
+
+	nh = nexthop_find_by_id(info->net, info->id);
+	if (!nh) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	nhg = rcu_dereference(nh->nh_grp);
+	res_table = rcu_dereference(nhg->res_table);
+	*p_idle_timer_ms = jiffies_to_msecs(res_table->idle_timer);
+
+out:
+	rcu_read_unlock();
+
+	return err;
+}
+
+static int nh_notifier_res_bucket_info_init(struct nh_notifier_info *info,
+					    u16 bucket_index, bool force,
+					    struct nh_info *oldi,
+					    struct nh_info *newi)
+{
+	unsigned int idle_timer_ms;
+	int err;
+
+	err = nh_notifier_res_bucket_idle_timer_get(info, force,
+						    &idle_timer_ms);
+	if (err)
+		return err;
+
+	info->type = NH_NOTIFIER_INFO_TYPE_RES_BUCKET;
+	info->nh_res_bucket = kzalloc(sizeof(*info->nh_res_bucket),
+				      GFP_KERNEL);
+	if (!info->nh_res_bucket)
+		return -ENOMEM;
+
+	info->nh_res_bucket->bucket_index = bucket_index;
+	info->nh_res_bucket->idle_timer_ms = idle_timer_ms;
+	info->nh_res_bucket->force = force;
+	__nh_notifier_single_info_init(&info->nh_res_bucket->old_nh, oldi);
+	__nh_notifier_single_info_init(&info->nh_res_bucket->new_nh, newi);
+	return 0;
+}
+
+static void nh_notifier_res_bucket_info_fini(struct nh_notifier_info *info)
+{
+	kfree(info->nh_res_bucket);
+}
+
+static int __call_nexthop_res_bucket_notifiers(struct net *net, u32 nhg_id,
+					       u16 bucket_index, bool force,
+					       struct nh_info *oldi,
+					       struct nh_info *newi,
+					       struct netlink_ext_ack *extack)
+{
+	struct nh_notifier_info info = {
+		.net = net,
+		.extack = extack,
+		.id = nhg_id,
+	};
+	int err;
+
+	if (nexthop_notifiers_is_empty(net))
+		return 0;
+
+	err = nh_notifier_res_bucket_info_init(&info, bucket_index, force,
+					       oldi, newi);
+	if (err)
+		return err;
+
+	err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
+					   NEXTHOP_EVENT_BUCKET_REPLACE, &info);
+	nh_notifier_res_bucket_info_fini(&info);
+
+	return notifier_to_errno(err);
+}
+
 /* There are three users of RES_TABLE, and NHs etc. referenced from there:
  *
  * 1) a collection of callbacks for NH maintenance. This operates under
@@ -207,6 +343,53 @@ static int call_nexthop_notifiers(struct net *net,
  */
 #define nh_res_dereference(p) (rcu_dereference_raw(p))
 
+static int call_nexthop_res_bucket_notifiers(struct net *net, u32 nhg_id,
+					     u16 bucket_index, bool force,
+					     struct nexthop *old_nh,
+					     struct nexthop *new_nh,
+					     struct netlink_ext_ack *extack)
+{
+	struct nh_info *oldi = nh_res_dereference(old_nh->nh_info);
+	struct nh_info *newi = nh_res_dereference(new_nh->nh_info);
+
+	return __call_nexthop_res_bucket_notifiers(net, nhg_id, bucket_index,
+						   force, oldi, newi, extack);
+}
+
+static int call_nexthop_res_table_notifiers(struct net *net, struct nexthop *nh,
+					    struct netlink_ext_ack *extack)
+{
+	struct nh_notifier_info info = {
+		.net = net,
+		.extack = extack,
+	};
+	struct nh_group *nhg;
+	int err;
+
+	ASSERT_RTNL();
+
+	if (nexthop_notifiers_is_empty(net))
+		return 0;
+
+	/* At this point, the nexthop buckets are still not populated. Only
+	 * emit a notification with the logical nexthops, so that a listener
+	 * could potentially veto it in case of unsupported configuration.
+	 */
+	nhg = rtnl_dereference(nh->nh_grp);
+	err = nh_notifier_mp_info_init(&info, nhg);
+	if (err) {
+		NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info");
+		return err;
+	}
+
+	err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
+					   NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE,
+					   &info);
+	kfree(info.nh_grp);
+
+	return notifier_to_errno(err);
+}
+
 static int call_nexthop_notifier(struct notifier_block *nb, struct net *net,
 				 enum nexthop_event_type event_type,
 				 struct nexthop *nh,
@@ -1144,10 +1327,12 @@ static bool nh_res_bucket_should_migrate(struct nh_res_table *res_table,
 }
 
 static bool nh_res_bucket_migrate(struct nh_res_table *res_table,
-				  u16 bucket_index, bool force)
+				  u16 bucket_index, bool notify, bool force)
 {
 	struct nh_res_bucket *bucket = &res_table->nh_buckets[bucket_index];
 	struct nh_grp_entry *new_nhge;
+	struct netlink_ext_ack extack;
+	int err;
 
 	new_nhge = list_first_entry_or_null(&res_table->uw_nh_entries,
 					    struct nh_grp_entry,
@@ -1160,6 +1345,28 @@ static bool nh_res_bucket_migrate(struct nh_res_table *res_table,
 		 */
 		return false;
 
+	if (notify) {
+		struct nh_grp_entry *old_nhge;
+
+		old_nhge = nh_res_dereference(bucket->nh_entry);
+		err = call_nexthop_res_bucket_notifiers(res_table->net,
+							res_table->nhg_id,
+							bucket_index, force,
+							old_nhge->nh,
+							new_nhge->nh, &extack);
+		if (err) {
+			pr_err_ratelimited("%s\n", extack._msg);
+			if (!force)
+				return false;
+			/* It is not possible to veto a forced replacement, so
+			 * just clear the hardware flags from the nexthop
+			 * bucket to indicate to user space that this bucket is
+			 * not correctly populated in hardware.
+			 */
+			bucket->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP);
+		}
+	}
+
 	nh_res_bucket_set_nh(bucket, new_nhge);
 	nh_res_bucket_set_idle(res_table, bucket);
 
@@ -1170,7 +1377,7 @@ static bool nh_res_bucket_migrate(struct nh_res_table *res_table,
 
 #define NH_RES_UPKEEP_DW_MINIMUM_INTERVAL (HZ / 2)
 
-static void nh_res_table_upkeep(struct nh_res_table *res_table)
+static void nh_res_table_upkeep(struct nh_res_table *res_table, bool notify)
 {
 	unsigned long now = jiffies;
 	unsigned long deadline;
@@ -1194,7 +1401,8 @@ static void nh_res_table_upkeep(struct nh_res_table *res_table)
 
 		if (nh_res_bucket_should_migrate(res_table, bucket,
 						 &deadline, &force)) {
-			if (!nh_res_bucket_migrate(res_table, i, force)) {
+			if (!nh_res_bucket_migrate(res_table, i, notify,
+						   force)) {
 				unsigned long idle_point;
 
 				/* A driver can override the migration
@@ -1235,7 +1443,7 @@ static void nh_res_table_upkeep_dw(struct work_struct *work)
 	struct nh_res_table *res_table;
 
 	res_table = container_of(dw, struct nh_res_table, upkeep_dw);
-	nh_res_table_upkeep(res_table);
+	nh_res_table_upkeep(res_table, true);
 }
 
 static void nh_res_table_cancel_upkeep(struct nh_res_table *res_table)
@@ -1323,7 +1531,7 @@ static void replace_nexthop_grp_res(struct nh_group *oldg,
 	nh_res_group_rebalance(newg, old_res_table);
 	if (prev_has_uw && !list_empty(&old_res_table->uw_nh_entries))
 		old_res_table->unbalanced_since = prev_unbalanced_since;
-	nh_res_table_upkeep(old_res_table);
+	nh_res_table_upkeep(old_res_table, true);
 }
 
 static void nh_mp_group_rebalance(struct nh_group *nhg)
@@ -1407,9 +1615,15 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
 	list_del(&nhge->nh_list);
 	nexthop_put(nhge->nh);
 
-	err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, &extack);
-	if (err)
-		pr_err("%s\n", extack._msg);
+	/* Removal of a NH from a resilient group is notified through
+	 * bucket notifications.
+	 */
+	if (newg->mpath) {
+		err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp,
+					     &extack);
+		if (err)
+			pr_err("%s\n", extack._msg);
+	}
 
 	if (nlinfo)
 		nexthop_notify(RTM_NEWNEXTHOP, nhp, nlinfo);
@@ -1562,6 +1776,16 @@ static int replace_nexthop_grp(struct net *net, struct nexthop *old,
 			return -EINVAL;
 		}
 
+		/* Emit a pre-replace notification so that listeners could veto
+		 * a potentially unsupported configuration. Otherwise,
+		 * individual bucket replacement notifications would need to be
+		 * vetoed, which is something that should only happen if the
+		 * bucket is currently active.
+		 */
+		err = call_nexthop_res_table_notifiers(net, new, extack);
+		if (err)
+			return err;
+
 		if (cfg->nh_grp_res_has_idle_timer)
 			old_res_table->idle_timer = cfg->nh_grp_res_idle_timer;
 		if (cfg->nh_grp_res_has_unbalanced_timer)
@@ -1611,6 +1835,71 @@ static void nh_group_v4_update(struct nh_group *nhg)
 	nhg->has_v4 = has_v4;
 }
 
+static int replace_nexthop_single_notify_res(struct net *net,
+					     struct nh_res_table *res_table,
+					     struct nexthop *old,
+					     struct nh_info *oldi,
+					     struct nh_info *newi,
+					     struct netlink_ext_ack *extack)
+{
+	u32 nhg_id = res_table->nhg_id;
+	int err;
+	u16 i;
+
+	for (i = 0; i < res_table->num_nh_buckets; i++) {
+		struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
+		struct nh_grp_entry *nhge;
+
+		nhge = rtnl_dereference(bucket->nh_entry);
+		if (nhge->nh == old) {
+			err = __call_nexthop_res_bucket_notifiers(net, nhg_id,
+								  i, true,
+								  oldi, newi,
+								  extack);
+			if (err)
+				goto err_notify;
+		}
+	}
+
+	return 0;
+
+err_notify:
+	while (i-- > 0) {
+		struct nh_res_bucket *bucket = &res_table->nh_buckets[i];
+		struct nh_grp_entry *nhge;
+
+		nhge = rtnl_dereference(bucket->nh_entry);
+		if (nhge->nh == old)
+			__call_nexthop_res_bucket_notifiers(net, nhg_id, i,
+							    true, newi, oldi,
+							    extack);
+	}
+	return err;
+}
+
+static int replace_nexthop_single_notify(struct net *net,
+					 struct nexthop *group_nh,
+					 struct nexthop *old,
+					 struct nh_info *oldi,
+					 struct nh_info *newi,
+					 struct netlink_ext_ack *extack)
+{
+	struct nh_group *nhg = rtnl_dereference(group_nh->nh_grp);
+	struct nh_res_table *res_table;
+
+	if (nhg->mpath) {
+		return call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE,
+					      group_nh, extack);
+	} else if (nhg->resilient) {
+		res_table = rtnl_dereference(nhg->res_table);
+		return replace_nexthop_single_notify_res(net, res_table,
+							 old, oldi, newi,
+							 extack);
+	}
+
+	return -EINVAL;
+}
+
 static int replace_nexthop_single(struct net *net, struct nexthop *old,
 				  struct nexthop *new,
 				  struct netlink_ext_ack *extack)
@@ -1653,8 +1942,8 @@ static int replace_nexthop_single(struct net *net, struct nexthop *old,
 	list_for_each_entry(nhge, &old->grp_list, nh_list) {
 		struct nexthop *nhp = nhge->nh_parent;
 
-		err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp,
-					     extack);
+		err = replace_nexthop_single_notify(net, nhp, old, oldi, newi,
+						    extack);
 		if (err)
 			goto err_notify;
 	}
@@ -1684,7 +1973,7 @@ err_notify:
 	list_for_each_entry_continue_reverse(nhge, &old->grp_list, nh_list) {
 		struct nexthop *nhp = nhge->nh_parent;
 
-		call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, extack);
+		replace_nexthop_single_notify(net, nhp, old, newi, oldi, NULL);
 	}
 	call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, old, extack);
 	return err;
@@ -1852,13 +2141,20 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh,
 			}
 
 			nh_res_group_rebalance(nhg, res_table);
-			nh_res_table_upkeep(res_table);
+
+			/* Do not send bucket notifications, we do full
+			 * notification below.
+			 */
+			nh_res_table_upkeep(res_table, false);
 		}
 	}
 
 	rb_link_node_rcu(&new_nh->rb_node, parent, pp);
 	rb_insert_color(&new_nh->rb_node, root);
 
+	/* The initial insertion is a full notification for mpath as well
+	 * as resilient groups.
+	 */
 	rc = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new_nh, extack);
 	if (rc)
 		rb_erase(&new_nh->rb_node, &net->nexthop.rb_root);
-- 
cgit v1.2.3


From 56ad5ba344dea9c914331da8754f5ba7cede9941 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 11 Mar 2021 19:03:19 +0100
Subject: nexthop: Allow setting "offload" and "trap" indication of nexthop
 buckets

Add a function that can be called by device drivers to set "offload" or
"trap" indication on nexthop buckets following nexthop notifications and
other changes such as a neighbour becoming invalid.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/nexthop.h |  2 ++
 net/ipv4/nexthop.c    | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index fd3c0debe8bf..685f208d26b5 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -220,6 +220,8 @@ int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
 			      struct netlink_ext_ack *extack);
 int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
 void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap);
+void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index,
+				 bool offload, bool trap);
 
 /* caller is holding rcu or rtnl; no reference taken to nexthop */
 struct nexthop *nexthop_find_by_id(struct net *net, u32 id);
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 8b06aafc2e9e..1fce4ff39390 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -3072,6 +3072,40 @@ out:
 }
 EXPORT_SYMBOL(nexthop_set_hw_flags);
 
+void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index,
+				 bool offload, bool trap)
+{
+	struct nh_res_table *res_table;
+	struct nh_res_bucket *bucket;
+	struct nexthop *nexthop;
+	struct nh_group *nhg;
+
+	rcu_read_lock();
+
+	nexthop = nexthop_find_by_id(net, id);
+	if (!nexthop || !nexthop->is_group)
+		goto out;
+
+	nhg = rcu_dereference(nexthop->nh_grp);
+	if (!nhg->resilient)
+		goto out;
+
+	if (bucket_index >= nhg->res_table->num_nh_buckets)
+		goto out;
+
+	res_table = rcu_dereference(nhg->res_table);
+	bucket = &res_table->nh_buckets[bucket_index];
+	bucket->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP);
+	if (offload)
+		bucket->nh_flags |= RTNH_F_OFFLOAD;
+	if (trap)
+		bucket->nh_flags |= RTNH_F_TRAP;
+
+out:
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(nexthop_bucket_set_hw_flags);
+
 static void __net_exit nexthop_net_exit(struct net *net)
 {
 	rtnl_lock();
-- 
cgit v1.2.3


From cfc15c1dbb0b7574498eef453b8ddb534e408551 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 11 Mar 2021 19:03:20 +0100
Subject: nexthop: Allow reporting activity of nexthop buckets

The kernel periodically checks the idle time of nexthop buckets to
determine if they are idle and can be re-populated with a new nexthop.

When the resilient nexthop group is offloaded to hardware, the kernel
will not see activity on nexthop buckets unless it is reported from
hardware.

Add a function that can be periodically called by device drivers to
report activity on nexthop buckets after querying it from the underlying
device.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/nexthop.h |  2 ++
 net/ipv4/nexthop.c    | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index 685f208d26b5..ba94868a21d5 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -222,6 +222,8 @@ int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
 void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap);
 void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index,
 				 bool offload, bool trap);
+void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets,
+				     unsigned long *activity);
 
 /* caller is holding rcu or rtnl; no reference taken to nexthop */
 struct nexthop *nexthop_find_by_id(struct net *net, u32 id);
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 1fce4ff39390..495b5e69ffcd 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -3106,6 +3106,41 @@ out:
 }
 EXPORT_SYMBOL(nexthop_bucket_set_hw_flags);
 
+void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets,
+				     unsigned long *activity)
+{
+	struct nh_res_table *res_table;
+	struct nexthop *nexthop;
+	struct nh_group *nhg;
+	u16 i;
+
+	rcu_read_lock();
+
+	nexthop = nexthop_find_by_id(net, id);
+	if (!nexthop || !nexthop->is_group)
+		goto out;
+
+	nhg = rcu_dereference(nexthop->nh_grp);
+	if (!nhg->resilient)
+		goto out;
+
+	/* Instead of silently ignoring some buckets, demand that the sizes
+	 * be the same.
+	 */
+	res_table = rcu_dereference(nhg->res_table);
+	if (num_buckets != res_table->num_nh_buckets)
+		goto out;
+
+	for (i = 0; i < num_buckets; i++) {
+		if (test_bit(i, activity))
+			nh_res_bucket_set_busy(&res_table->nh_buckets[i]);
+	}
+
+out:
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(nexthop_res_grp_activity_update);
+
 static void __net_exit nexthop_net_exit(struct net *net)
 {
 	rtnl_lock();
-- 
cgit v1.2.3


From a2601e2b1e7ecd3831f2fbb1ad43bbdb2918e2e6 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Thu, 11 Mar 2021 19:03:21 +0100
Subject: nexthop: Add netlink handlers for resilient nexthop groups

Implement the netlink messages that allow creation and dumping of resilient
nexthop groups.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/nexthop.c | 150 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 145 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 495b5e69ffcd..439bf3b7ced5 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -16,6 +16,9 @@
 #include <net/route.h>
 #include <net/sock.h>
 
+#define NH_RES_DEFAULT_IDLE_TIMER	(120 * HZ)
+#define NH_RES_DEFAULT_UNBALANCED_TIMER	0	/* No forced rebalancing. */
+
 static void remove_nexthop(struct net *net, struct nexthop *nh,
 			   struct nl_info *nlinfo);
 
@@ -32,6 +35,7 @@ static const struct nla_policy rtm_nh_policy_new[] = {
 	[NHA_ENCAP_TYPE]	= { .type = NLA_U16 },
 	[NHA_ENCAP]		= { .type = NLA_NESTED },
 	[NHA_FDB]		= { .type = NLA_FLAG },
+	[NHA_RES_GROUP]		= { .type = NLA_NESTED },
 };
 
 static const struct nla_policy rtm_nh_policy_get[] = {
@@ -45,6 +49,12 @@ static const struct nla_policy rtm_nh_policy_dump[] = {
 	[NHA_FDB]		= { .type = NLA_FLAG },
 };
 
+static const struct nla_policy rtm_nh_res_policy_new[] = {
+	[NHA_RES_GROUP_BUCKETS]			= { .type = NLA_U16 },
+	[NHA_RES_GROUP_IDLE_TIMER]		= { .type = NLA_U32 },
+	[NHA_RES_GROUP_UNBALANCED_TIMER]	= { .type = NLA_U32 },
+};
+
 static bool nexthop_notifiers_is_empty(struct net *net)
 {
 	return !net->nexthop.notifier_chain.head;
@@ -588,6 +598,41 @@ static void nh_res_time_set_deadline(unsigned long next_time,
 		*deadline = next_time;
 }
 
+static clock_t nh_res_table_unbalanced_time(struct nh_res_table *res_table)
+{
+	if (list_empty(&res_table->uw_nh_entries))
+		return 0;
+	return jiffies_delta_to_clock_t(jiffies - res_table->unbalanced_since);
+}
+
+static int nla_put_nh_group_res(struct sk_buff *skb, struct nh_group *nhg)
+{
+	struct nh_res_table *res_table = rtnl_dereference(nhg->res_table);
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, NHA_RES_GROUP);
+	if (!nest)
+		return -EMSGSIZE;
+
+	if (nla_put_u16(skb, NHA_RES_GROUP_BUCKETS,
+			res_table->num_nh_buckets) ||
+	    nla_put_u32(skb, NHA_RES_GROUP_IDLE_TIMER,
+			jiffies_to_clock_t(res_table->idle_timer)) ||
+	    nla_put_u32(skb, NHA_RES_GROUP_UNBALANCED_TIMER,
+			jiffies_to_clock_t(res_table->unbalanced_timer)) ||
+	    nla_put_u64_64bit(skb, NHA_RES_GROUP_UNBALANCED_TIME,
+			      nh_res_table_unbalanced_time(res_table),
+			      NHA_RES_GROUP_PAD))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -EMSGSIZE;
+}
+
 static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
 {
 	struct nexthop_grp *p;
@@ -598,6 +643,8 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
 
 	if (nhg->mpath)
 		group_type = NEXTHOP_GRP_TYPE_MPATH;
+	else if (nhg->resilient)
+		group_type = NEXTHOP_GRP_TYPE_RES;
 
 	if (nla_put_u16(skb, NHA_GROUP_TYPE, group_type))
 		goto nla_put_failure;
@@ -613,6 +660,9 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
 		p += 1;
 	}
 
+	if (nhg->resilient && nla_put_nh_group_res(skb, nhg))
+		goto nla_put_failure;
+
 	return 0;
 
 nla_put_failure:
@@ -700,13 +750,26 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
+static size_t nh_nlmsg_size_grp_res(struct nh_group *nhg)
+{
+	return nla_total_size(0) +	/* NHA_RES_GROUP */
+		nla_total_size(2) +	/* NHA_RES_GROUP_BUCKETS */
+		nla_total_size(4) +	/* NHA_RES_GROUP_IDLE_TIMER */
+		nla_total_size(4) +	/* NHA_RES_GROUP_UNBALANCED_TIMER */
+		nla_total_size_64bit(8);/* NHA_RES_GROUP_UNBALANCED_TIME */
+}
+
 static size_t nh_nlmsg_size_grp(struct nexthop *nh)
 {
 	struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
 	size_t sz = sizeof(struct nexthop_grp) * nhg->num_nh;
+	size_t tot = nla_total_size(sz) +
+		nla_total_size(2); /* NHA_GROUP_TYPE */
+
+	if (nhg->resilient)
+		tot += nh_nlmsg_size_grp_res(nhg);
 
-	return nla_total_size(sz) +
-	       nla_total_size(2);  /* NHA_GROUP_TYPE */
+	return tot;
 }
 
 static size_t nh_nlmsg_size_single(struct nexthop *nh)
@@ -876,7 +939,7 @@ static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family,
 
 static int nh_check_attr_group(struct net *net,
 			       struct nlattr *tb[], size_t tb_size,
-			       struct netlink_ext_ack *extack)
+			       u16 nh_grp_type, struct netlink_ext_ack *extack)
 {
 	unsigned int len = nla_len(tb[NHA_GROUP]);
 	u8 nh_family = AF_UNSPEC;
@@ -937,8 +1000,14 @@ static int nh_check_attr_group(struct net *net,
 	for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) {
 		if (!tb[i])
 			continue;
-		if (i == NHA_FDB)
+		switch (i) {
+		case NHA_FDB:
 			continue;
+		case NHA_RES_GROUP:
+			if (nh_grp_type == NEXTHOP_GRP_TYPE_RES)
+				continue;
+			break;
+		}
 		NL_SET_ERR_MSG(extack,
 			       "No other attributes can be set in nexthop groups");
 		return -EINVAL;
@@ -2475,6 +2544,70 @@ static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg,
 	return nh;
 }
 
+static int rtm_nh_get_timer(struct nlattr *attr, unsigned long fallback,
+			    unsigned long *timer_p, bool *has_p,
+			    struct netlink_ext_ack *extack)
+{
+	unsigned long timer;
+	u32 value;
+
+	if (!attr) {
+		*timer_p = fallback;
+		*has_p = false;
+		return 0;
+	}
+
+	value = nla_get_u32(attr);
+	timer = clock_t_to_jiffies(value);
+	if (timer == ~0UL) {
+		NL_SET_ERR_MSG(extack, "Timer value too large");
+		return -EINVAL;
+	}
+
+	*timer_p = timer;
+	*has_p = true;
+	return 0;
+}
+
+static int rtm_to_nh_config_grp_res(struct nlattr *res, struct nh_config *cfg,
+				    struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[ARRAY_SIZE(rtm_nh_res_policy_new)] = {};
+	int err;
+
+	if (res) {
+		err = nla_parse_nested(tb,
+				       ARRAY_SIZE(rtm_nh_res_policy_new) - 1,
+				       res, rtm_nh_res_policy_new, extack);
+		if (err < 0)
+			return err;
+	}
+
+	if (tb[NHA_RES_GROUP_BUCKETS]) {
+		cfg->nh_grp_res_num_buckets =
+			nla_get_u16(tb[NHA_RES_GROUP_BUCKETS]);
+		cfg->nh_grp_res_has_num_buckets = true;
+		if (!cfg->nh_grp_res_num_buckets) {
+			NL_SET_ERR_MSG(extack, "Number of buckets needs to be non-0");
+			return -EINVAL;
+		}
+	}
+
+	err = rtm_nh_get_timer(tb[NHA_RES_GROUP_IDLE_TIMER],
+			       NH_RES_DEFAULT_IDLE_TIMER,
+			       &cfg->nh_grp_res_idle_timer,
+			       &cfg->nh_grp_res_has_idle_timer,
+			       extack);
+	if (err)
+		return err;
+
+	return rtm_nh_get_timer(tb[NHA_RES_GROUP_UNBALANCED_TIMER],
+				NH_RES_DEFAULT_UNBALANCED_TIMER,
+				&cfg->nh_grp_res_unbalanced_timer,
+				&cfg->nh_grp_res_has_unbalanced_timer,
+				extack);
+}
+
 static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
 			    struct nlmsghdr *nlh, struct nh_config *cfg,
 			    struct netlink_ext_ack *extack)
@@ -2553,7 +2686,14 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
 			NL_SET_ERR_MSG(extack, "Invalid group type");
 			goto out;
 		}
-		err = nh_check_attr_group(net, tb, ARRAY_SIZE(tb), extack);
+		err = nh_check_attr_group(net, tb, ARRAY_SIZE(tb),
+					  cfg->nh_grp_type, extack);
+		if (err)
+			goto out;
+
+		if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_RES)
+			err = rtm_to_nh_config_grp_res(tb[NHA_RES_GROUP],
+						       cfg, extack);
 
 		/* no other attributes should be set */
 		goto out;
-- 
cgit v1.2.3


From 8a1bbabb034d5cf6a4788acb4fc6ce0a1dfd6177 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Thu, 11 Mar 2021 19:03:22 +0100
Subject: nexthop: Add netlink handlers for bucket dump

Add a dump handler for resilient next hop buckets. When next-hop group ID
is given, it walks buckets of that group, otherwise it walks buckets of all
groups. It then dumps the buckets whose next hops match the given filtering
criteria.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/nexthop.c | 283 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 283 insertions(+)

diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 439bf3b7ced5..ed2745708f9d 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -55,6 +55,17 @@ static const struct nla_policy rtm_nh_res_policy_new[] = {
 	[NHA_RES_GROUP_UNBALANCED_TIMER]	= { .type = NLA_U32 },
 };
 
+static const struct nla_policy rtm_nh_policy_dump_bucket[] = {
+	[NHA_ID]		= { .type = NLA_U32 },
+	[NHA_OIF]		= { .type = NLA_U32 },
+	[NHA_MASTER]		= { .type = NLA_U32 },
+	[NHA_RES_BUCKET]	= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy rtm_nh_res_bucket_policy_dump[] = {
+	[NHA_RES_BUCKET_NH_ID]	= { .type = NLA_U32 },
+};
+
 static bool nexthop_notifiers_is_empty(struct net *net)
 {
 	return !net->nexthop.notifier_chain.head;
@@ -883,6 +894,60 @@ static void nh_res_bucket_set_busy(struct nh_res_bucket *bucket)
 	atomic_long_set(&bucket->used_time, (long)jiffies);
 }
 
+static clock_t nh_res_bucket_idle_time(const struct nh_res_bucket *bucket)
+{
+	unsigned long used_time = nh_res_bucket_used_time(bucket);
+
+	return jiffies_delta_to_clock_t(jiffies - used_time);
+}
+
+static int nh_fill_res_bucket(struct sk_buff *skb, struct nexthop *nh,
+			      struct nh_res_bucket *bucket, u16 bucket_index,
+			      int event, u32 portid, u32 seq,
+			      unsigned int nlflags,
+			      struct netlink_ext_ack *extack)
+{
+	struct nh_grp_entry *nhge = nh_res_dereference(bucket->nh_entry);
+	struct nlmsghdr *nlh;
+	struct nlattr *nest;
+	struct nhmsg *nhm;
+
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nhm), nlflags);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	nhm = nlmsg_data(nlh);
+	nhm->nh_family = AF_UNSPEC;
+	nhm->nh_flags = bucket->nh_flags;
+	nhm->nh_protocol = nh->protocol;
+	nhm->nh_scope = 0;
+	nhm->resvd = 0;
+
+	if (nla_put_u32(skb, NHA_ID, nh->id))
+		goto nla_put_failure;
+
+	nest = nla_nest_start(skb, NHA_RES_BUCKET);
+	if (!nest)
+		goto nla_put_failure;
+
+	if (nla_put_u16(skb, NHA_RES_BUCKET_INDEX, bucket_index) ||
+	    nla_put_u32(skb, NHA_RES_BUCKET_NH_ID, nhge->nh->id) ||
+	    nla_put_u64_64bit(skb, NHA_RES_BUCKET_IDLE_TIME,
+			      nh_res_bucket_idle_time(bucket),
+			      NHA_RES_BUCKET_PAD))
+		goto nla_put_failure_nest;
+
+	nla_nest_end(skb, nest);
+	nlmsg_end(skb, nlh);
+	return 0;
+
+nla_put_failure_nest:
+	nla_nest_cancel(skb, nest);
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
 static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
 			   bool *is_fdb, struct netlink_ext_ack *extack)
 {
@@ -2918,10 +2983,12 @@ errout_free:
 }
 
 struct nh_dump_filter {
+	u32 nh_id;
 	int dev_idx;
 	int master_idx;
 	bool group_filter;
 	bool fdb_filter;
+	u32 res_bucket_nh_id;
 };
 
 static bool nh_dump_filtered(struct nexthop *nh,
@@ -3101,6 +3168,219 @@ out_err:
 	return err;
 }
 
+static struct nexthop *
+nexthop_find_group_resilient(struct net *net, u32 id,
+			     struct netlink_ext_ack *extack)
+{
+	struct nh_group *nhg;
+	struct nexthop *nh;
+
+	nh = nexthop_find_by_id(net, id);
+	if (!nh)
+		return ERR_PTR(-ENOENT);
+
+	if (!nh->is_group) {
+		NL_SET_ERR_MSG(extack, "Not a nexthop group");
+		return ERR_PTR(-EINVAL);
+	}
+
+	nhg = rtnl_dereference(nh->nh_grp);
+	if (!nhg->resilient) {
+		NL_SET_ERR_MSG(extack, "Nexthop group not of type resilient");
+		return ERR_PTR(-EINVAL);
+	}
+
+	return nh;
+}
+
+static int nh_valid_dump_nhid(struct nlattr *attr, u32 *nh_id_p,
+			      struct netlink_ext_ack *extack)
+{
+	u32 idx;
+
+	if (attr) {
+		idx = nla_get_u32(attr);
+		if (!idx) {
+			NL_SET_ERR_MSG(extack, "Invalid nexthop id");
+			return -EINVAL;
+		}
+		*nh_id_p = idx;
+	} else {
+		*nh_id_p = 0;
+	}
+
+	return 0;
+}
+
+static int nh_valid_dump_bucket_req(const struct nlmsghdr *nlh,
+				    struct nh_dump_filter *filter,
+				    struct netlink_callback *cb)
+{
+	struct nlattr *res_tb[ARRAY_SIZE(rtm_nh_res_bucket_policy_dump)];
+	struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump_bucket)];
+	int err;
+
+	err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
+			  ARRAY_SIZE(rtm_nh_policy_dump_bucket) - 1,
+			  rtm_nh_policy_dump_bucket, NULL);
+	if (err < 0)
+		return err;
+
+	err = nh_valid_dump_nhid(tb[NHA_ID], &filter->nh_id, cb->extack);
+	if (err)
+		return err;
+
+	if (tb[NHA_RES_BUCKET]) {
+		size_t max = ARRAY_SIZE(rtm_nh_res_bucket_policy_dump) - 1;
+
+		err = nla_parse_nested(res_tb, max,
+				       tb[NHA_RES_BUCKET],
+				       rtm_nh_res_bucket_policy_dump,
+				       cb->extack);
+		if (err < 0)
+			return err;
+
+		err = nh_valid_dump_nhid(res_tb[NHA_RES_BUCKET_NH_ID],
+					 &filter->res_bucket_nh_id,
+					 cb->extack);
+		if (err)
+			return err;
+	}
+
+	return __nh_valid_dump_req(nlh, tb, filter, cb->extack);
+}
+
+struct rtm_dump_res_bucket_ctx {
+	struct rtm_dump_nh_ctx nh;
+	u16 bucket_index;
+	u32 done_nh_idx; /* 1 + the index of the last fully processed NH. */
+};
+
+static struct rtm_dump_res_bucket_ctx *
+rtm_dump_res_bucket_ctx(struct netlink_callback *cb)
+{
+	struct rtm_dump_res_bucket_ctx *ctx = (void *)cb->ctx;
+
+	BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
+	return ctx;
+}
+
+struct rtm_dump_nexthop_bucket_data {
+	struct rtm_dump_res_bucket_ctx *ctx;
+	struct nh_dump_filter filter;
+};
+
+static int rtm_dump_nexthop_bucket_nh(struct sk_buff *skb,
+				      struct netlink_callback *cb,
+				      struct nexthop *nh,
+				      struct rtm_dump_nexthop_bucket_data *dd)
+{
+	u32 portid = NETLINK_CB(cb->skb).portid;
+	struct nhmsg *nhm = nlmsg_data(cb->nlh);
+	struct nh_res_table *res_table;
+	struct nh_group *nhg;
+	u16 bucket_index;
+	int err;
+
+	if (dd->ctx->nh.idx < dd->ctx->done_nh_idx)
+		return 0;
+
+	nhg = rtnl_dereference(nh->nh_grp);
+	res_table = rtnl_dereference(nhg->res_table);
+	for (bucket_index = dd->ctx->bucket_index;
+	     bucket_index < res_table->num_nh_buckets;
+	     bucket_index++) {
+		struct nh_res_bucket *bucket;
+		struct nh_grp_entry *nhge;
+
+		bucket = &res_table->nh_buckets[bucket_index];
+		nhge = rtnl_dereference(bucket->nh_entry);
+		if (nh_dump_filtered(nhge->nh, &dd->filter, nhm->nh_family))
+			continue;
+
+		if (dd->filter.res_bucket_nh_id &&
+		    dd->filter.res_bucket_nh_id != nhge->nh->id)
+			continue;
+
+		err = nh_fill_res_bucket(skb, nh, bucket, bucket_index,
+					 RTM_NEWNEXTHOPBUCKET, portid,
+					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					 cb->extack);
+		if (err < 0) {
+			if (likely(skb->len))
+				goto out;
+			goto out_err;
+		}
+	}
+
+	dd->ctx->done_nh_idx = dd->ctx->nh.idx + 1;
+	bucket_index = 0;
+
+out:
+	err = skb->len;
+out_err:
+	dd->ctx->bucket_index = bucket_index;
+	return err;
+}
+
+static int rtm_dump_nexthop_bucket_cb(struct sk_buff *skb,
+				      struct netlink_callback *cb,
+				      struct nexthop *nh, void *data)
+{
+	struct rtm_dump_nexthop_bucket_data *dd = data;
+	struct nh_group *nhg;
+
+	if (!nh->is_group)
+		return 0;
+
+	nhg = rtnl_dereference(nh->nh_grp);
+	if (!nhg->resilient)
+		return 0;
+
+	return rtm_dump_nexthop_bucket_nh(skb, cb, nh, dd);
+}
+
+/* rtnl */
+static int rtm_dump_nexthop_bucket(struct sk_buff *skb,
+				   struct netlink_callback *cb)
+{
+	struct rtm_dump_res_bucket_ctx *ctx = rtm_dump_res_bucket_ctx(cb);
+	struct rtm_dump_nexthop_bucket_data dd = { .ctx = ctx };
+	struct net *net = sock_net(skb->sk);
+	struct nexthop *nh;
+	int err;
+
+	err = nh_valid_dump_bucket_req(cb->nlh, &dd.filter, cb);
+	if (err)
+		return err;
+
+	if (dd.filter.nh_id) {
+		nh = nexthop_find_group_resilient(net, dd.filter.nh_id,
+						  cb->extack);
+		if (IS_ERR(nh))
+			return PTR_ERR(nh);
+		err = rtm_dump_nexthop_bucket_nh(skb, cb, nh, &dd);
+	} else {
+		struct rb_root *root = &net->nexthop.rb_root;
+
+		err = rtm_dump_walk_nexthops(skb, cb, root, &ctx->nh,
+					     &rtm_dump_nexthop_bucket_cb, &dd);
+	}
+
+	if (err < 0) {
+		if (likely(skb->len))
+			goto out;
+		goto out_err;
+	}
+
+out:
+	err = skb->len;
+out_err:
+	cb->seq = net->nexthop.seq;
+	nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+	return err;
+}
+
 static void nexthop_sync_mtu(struct net_device *dev, u32 orig_mtu)
 {
 	unsigned int hash = nh_dev_hashfn(dev->ifindex);
@@ -3324,6 +3604,9 @@ static int __init nexthop_init(void)
 	rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
 	rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
 
+	rtnl_register(PF_UNSPEC, RTM_GETNEXTHOPBUCKET, NULL,
+		      rtm_dump_nexthop_bucket, 0);
+
 	return 0;
 }
 subsys_initcall(nexthop_init);
-- 
cgit v1.2.3


From 187d4c6b97967b8d023ccb981742f8605a1b1cb7 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Thu, 11 Mar 2021 19:03:23 +0100
Subject: nexthop: Add netlink handlers for bucket get

Allow getting (but not setting) individual buckets to inspect the next hop
mapped therein, idle time, and flags.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/nexthop.c | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 109 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index ed2745708f9d..3d602ef6f2c1 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -66,6 +66,15 @@ static const struct nla_policy rtm_nh_res_bucket_policy_dump[] = {
 	[NHA_RES_BUCKET_NH_ID]	= { .type = NLA_U32 },
 };
 
+static const struct nla_policy rtm_nh_policy_get_bucket[] = {
+	[NHA_ID]		= { .type = NLA_U32 },
+	[NHA_RES_BUCKET]	= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy rtm_nh_res_bucket_policy_get[] = {
+	[NHA_RES_BUCKET_INDEX]	= { .type = NLA_U16 },
+};
+
 static bool nexthop_notifiers_is_empty(struct net *net)
 {
 	return !net->nexthop.notifier_chain.head;
@@ -3381,6 +3390,105 @@ out_err:
 	return err;
 }
 
+static int nh_valid_get_bucket_req_res_bucket(struct nlattr *res,
+					      u16 *bucket_index,
+					      struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[ARRAY_SIZE(rtm_nh_res_bucket_policy_get)];
+	int err;
+
+	err = nla_parse_nested(tb, ARRAY_SIZE(rtm_nh_res_bucket_policy_get) - 1,
+			       res, rtm_nh_res_bucket_policy_get, extack);
+	if (err < 0)
+		return err;
+
+	if (!tb[NHA_RES_BUCKET_INDEX]) {
+		NL_SET_ERR_MSG(extack, "Bucket index is missing");
+		return -EINVAL;
+	}
+
+	*bucket_index = nla_get_u16(tb[NHA_RES_BUCKET_INDEX]);
+	return 0;
+}
+
+static int nh_valid_get_bucket_req(const struct nlmsghdr *nlh,
+				   u32 *id, u16 *bucket_index,
+				   struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get_bucket)];
+	int err;
+
+	err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
+			  ARRAY_SIZE(rtm_nh_policy_get_bucket) - 1,
+			  rtm_nh_policy_get_bucket, extack);
+	if (err < 0)
+		return err;
+
+	err = __nh_valid_get_del_req(nlh, tb, id, extack);
+	if (err)
+		return err;
+
+	if (!tb[NHA_RES_BUCKET]) {
+		NL_SET_ERR_MSG(extack, "Bucket information is missing");
+		return -EINVAL;
+	}
+
+	err = nh_valid_get_bucket_req_res_bucket(tb[NHA_RES_BUCKET],
+						 bucket_index, extack);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+/* rtnl */
+static int rtm_get_nexthop_bucket(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+				  struct netlink_ext_ack *extack)
+{
+	struct net *net = sock_net(in_skb->sk);
+	struct nh_res_table *res_table;
+	struct sk_buff *skb = NULL;
+	struct nh_group *nhg;
+	struct nexthop *nh;
+	u16 bucket_index;
+	int err;
+	u32 id;
+
+	err = nh_valid_get_bucket_req(nlh, &id, &bucket_index, extack);
+	if (err)
+		return err;
+
+	nh = nexthop_find_group_resilient(net, id, extack);
+	if (IS_ERR(nh))
+		return PTR_ERR(nh);
+
+	nhg = rtnl_dereference(nh->nh_grp);
+	res_table = rtnl_dereference(nhg->res_table);
+	if (bucket_index >= res_table->num_nh_buckets) {
+		NL_SET_ERR_MSG(extack, "Bucket index out of bounds");
+		return -ENOENT;
+	}
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	err = nh_fill_res_bucket(skb, nh, &res_table->nh_buckets[bucket_index],
+				 bucket_index, RTM_NEWNEXTHOPBUCKET,
+				 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
+				 0, extack);
+	if (err < 0) {
+		WARN_ON(err == -EMSGSIZE);
+		goto errout_free;
+	}
+
+	return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
+
+errout_free:
+	kfree_skb(skb);
+	return err;
+}
+
 static void nexthop_sync_mtu(struct net_device *dev, u32 orig_mtu)
 {
 	unsigned int hash = nh_dev_hashfn(dev->ifindex);
@@ -3604,7 +3712,7 @@ static int __init nexthop_init(void)
 	rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
 	rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
 
-	rtnl_register(PF_UNSPEC, RTM_GETNEXTHOPBUCKET, NULL,
+	rtnl_register(PF_UNSPEC, RTM_GETNEXTHOPBUCKET, rtm_get_nexthop_bucket,
 		      rtm_dump_nexthop_bucket, 0);
 
 	return 0;
-- 
cgit v1.2.3


From 0b4818aabcd6508a2545acb809f4acea034cea9c Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Thu, 11 Mar 2021 19:03:24 +0100
Subject: nexthop: Notify userspace about bucket migrations

Nexthop replacements et.al. are notified through netlink, but if a delayed
work migrates buckets on the background, userspace will stay oblivious.
Notify these as RTM_NEWNEXTHOPBUCKET events.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/nexthop.c | 45 +++++++++++++++++++++++++++++++++++++++------
 1 file changed, 39 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 3d602ef6f2c1..015a47e8163a 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -957,6 +957,34 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
+static void nexthop_bucket_notify(struct nh_res_table *res_table,
+				  u16 bucket_index)
+{
+	struct nh_res_bucket *bucket = &res_table->nh_buckets[bucket_index];
+	struct nh_grp_entry *nhge = nh_res_dereference(bucket->nh_entry);
+	struct nexthop *nh = nhge->nh_parent;
+	struct sk_buff *skb;
+	int err = -ENOBUFS;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		goto errout;
+
+	err = nh_fill_res_bucket(skb, nh, bucket, bucket_index,
+				 RTM_NEWNEXTHOPBUCKET, 0, 0, NLM_F_REPLACE,
+				 NULL);
+	if (err < 0) {
+		kfree_skb(skb);
+		goto errout;
+	}
+
+	rtnl_notify(skb, nh->net, 0, RTNLGRP_NEXTHOP, NULL, GFP_KERNEL);
+	return;
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(nh->net, RTNLGRP_NEXTHOP, err);
+}
+
 static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
 			   bool *is_fdb, struct netlink_ext_ack *extack)
 {
@@ -1470,7 +1498,8 @@ static bool nh_res_bucket_should_migrate(struct nh_res_table *res_table,
 }
 
 static bool nh_res_bucket_migrate(struct nh_res_table *res_table,
-				  u16 bucket_index, bool notify, bool force)
+				  u16 bucket_index, bool notify,
+				  bool notify_nl, bool force)
 {
 	struct nh_res_bucket *bucket = &res_table->nh_buckets[bucket_index];
 	struct nh_grp_entry *new_nhge;
@@ -1513,6 +1542,9 @@ static bool nh_res_bucket_migrate(struct nh_res_table *res_table,
 	nh_res_bucket_set_nh(bucket, new_nhge);
 	nh_res_bucket_set_idle(res_table, bucket);
 
+	if (notify_nl)
+		nexthop_bucket_notify(res_table, bucket_index);
+
 	if (nh_res_nhge_is_balanced(new_nhge))
 		list_del(&new_nhge->res.uw_nh_entry);
 	return true;
@@ -1520,7 +1552,8 @@ static bool nh_res_bucket_migrate(struct nh_res_table *res_table,
 
 #define NH_RES_UPKEEP_DW_MINIMUM_INTERVAL (HZ / 2)
 
-static void nh_res_table_upkeep(struct nh_res_table *res_table, bool notify)
+static void nh_res_table_upkeep(struct nh_res_table *res_table,
+				bool notify, bool notify_nl)
 {
 	unsigned long now = jiffies;
 	unsigned long deadline;
@@ -1545,7 +1578,7 @@ static void nh_res_table_upkeep(struct nh_res_table *res_table, bool notify)
 		if (nh_res_bucket_should_migrate(res_table, bucket,
 						 &deadline, &force)) {
 			if (!nh_res_bucket_migrate(res_table, i, notify,
-						   force)) {
+						   notify_nl, force)) {
 				unsigned long idle_point;
 
 				/* A driver can override the migration
@@ -1586,7 +1619,7 @@ static void nh_res_table_upkeep_dw(struct work_struct *work)
 	struct nh_res_table *res_table;
 
 	res_table = container_of(dw, struct nh_res_table, upkeep_dw);
-	nh_res_table_upkeep(res_table, true);
+	nh_res_table_upkeep(res_table, true, true);
 }
 
 static void nh_res_table_cancel_upkeep(struct nh_res_table *res_table)
@@ -1674,7 +1707,7 @@ static void replace_nexthop_grp_res(struct nh_group *oldg,
 	nh_res_group_rebalance(newg, old_res_table);
 	if (prev_has_uw && !list_empty(&old_res_table->uw_nh_entries))
 		old_res_table->unbalanced_since = prev_unbalanced_since;
-	nh_res_table_upkeep(old_res_table, true);
+	nh_res_table_upkeep(old_res_table, true, false);
 }
 
 static void nh_mp_group_rebalance(struct nh_group *nhg)
@@ -2288,7 +2321,7 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh,
 			/* Do not send bucket notifications, we do full
 			 * notification below.
 			 */
-			nh_res_table_upkeep(res_table, false);
+			nh_res_table_upkeep(res_table, false, false);
 		}
 	}
 
-- 
cgit v1.2.3


From 15e1dd570306680269a4738d8e6c721f0924aa03 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Thu, 11 Mar 2021 19:03:25 +0100
Subject: nexthop: Enable resilient next-hop groups

Now that all the code is in place, stop rejecting requests to create
resilient next-hop groups.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/nexthop.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 015a47e8163a..f09fe3a5608f 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -2443,10 +2443,6 @@ static struct nexthop *nexthop_create_group(struct net *net,
 	} else if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_RES) {
 		struct nh_res_table *res_table;
 
-		/* Bounce resilient groups for now. */
-		err = -EINVAL;
-		goto out_no_nh;
-
 		res_table = nexthop_res_table_alloc(net, cfg->nh_id, cfg);
 		if (!res_table) {
 			err = -ENOMEM;
-- 
cgit v1.2.3


From 5c7659eba873df8929f4bffd352906f625d4cfec Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 11 Mar 2021 14:24:11 +0200
Subject: mlxsw: spectrum_span: Add SPAN session identifier support

When packets are mirrored to the CPU, the trap identifier with which the
packets are trapped is determined according to the session identifier of
the SPAN agent performing the mirroring. Packets that are trapped for
the same logical reason (e.g., buffer drops) should use the same session
identifier.

Currently, a single session is implicitly supported (identifier 0) and
is used for packets that are mirrored to the CPU due to buffer drops
(e.g., early drop).

Subsequent patches are going to mirror packets to the CPU due to
sampling, which will require a different session identifier.

Prepare for that by making the session identifier an attribute of the
SPAN agent.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c |  4 +++-
 drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c  |  6 +++++-
 drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h  | 14 ++++++++++++++
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index fd672c6c9133..9d16823320ae 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -1404,7 +1404,9 @@ static int mlxsw_sp_qevent_trap_configure(struct mlxsw_sp *mlxsw_sp,
 					  struct mlxsw_sp_mall_entry *mall_entry,
 					  struct mlxsw_sp_qevent_binding *qevent_binding)
 {
-	struct mlxsw_sp_span_agent_parms agent_parms = {};
+	struct mlxsw_sp_span_agent_parms agent_parms = {
+		.session_id = MLXSW_SP_SPAN_SESSION_ID_BUFFER,
+	};
 	int err;
 
 	err = mlxsw_sp_trap_group_policer_hw_id_get(mlxsw_sp,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index 1892cea05ee7..3287211819df 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -186,6 +186,7 @@ mlxsw_sp_span_entry_phys_configure(struct mlxsw_sp_span_entry *span_entry,
 	/* Create a new port analayzer entry for local_port. */
 	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true,
 			    MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH);
+	mlxsw_reg_mpat_session_id_set(mpat_pl, sparms.session_id);
 	mlxsw_reg_mpat_pide_set(mpat_pl, sparms.policer_enable);
 	mlxsw_reg_mpat_pid_set(mpat_pl, sparms.policer_id);
 
@@ -203,6 +204,7 @@ mlxsw_sp_span_entry_deconfigure_common(struct mlxsw_sp_span_entry *span_entry,
 	int pa_id = span_entry->id;
 
 	mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false, span_type);
+	mlxsw_reg_mpat_session_id_set(mpat_pl, span_entry->parms.session_id);
 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl);
 }
 
@@ -938,7 +940,8 @@ mlxsw_sp_span_entry_find_by_parms(struct mlxsw_sp *mlxsw_sp,
 
 		if (refcount_read(&curr->ref_count) && curr->to_dev == to_dev &&
 		    curr->parms.policer_enable == sparms->policer_enable &&
-		    curr->parms.policer_id == sparms->policer_id)
+		    curr->parms.policer_id == sparms->policer_id &&
+		    curr->parms.session_id == sparms->session_id)
 			return curr;
 	}
 	return NULL;
@@ -1085,6 +1088,7 @@ int mlxsw_sp_span_agent_get(struct mlxsw_sp *mlxsw_sp, int *p_span_id,
 
 	sparms.policer_id = parms->policer_id;
 	sparms.policer_enable = parms->policer_enable;
+	sparms.session_id = parms->session_id;
 	span_entry = mlxsw_sp_span_entry_get(mlxsw_sp, to_dev, ops, sparms);
 	if (!span_entry)
 		return -ENOBUFS;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
index aa1cd409c0e2..6e84cc049428 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
@@ -13,6 +13,18 @@
 struct mlxsw_sp;
 struct mlxsw_sp_port;
 
+/* SPAN session identifiers that correspond to MLXSW_TRAP_ID_MIRROR_SESSION<i>
+ * trap identifiers. The session identifier is an attribute of the SPAN agent,
+ * which determines the trap identifier of packets that are mirrored to the
+ * CPU. Packets that are trapped to the CPU for the same logical reason (e.g.,
+ * buffer drops) should use the same session identifier.
+ */
+enum mlxsw_sp_span_session_id {
+	MLXSW_SP_SPAN_SESSION_ID_BUFFER,
+
+	__MLXSW_SP_SPAN_SESSION_ID_MAX = 8,
+};
+
 struct mlxsw_sp_span_parms {
 	struct mlxsw_sp_port *dest_port; /* NULL for unoffloaded SPAN. */
 	unsigned int ttl;
@@ -23,6 +35,7 @@ struct mlxsw_sp_span_parms {
 	u16 vid;
 	u16 policer_id;
 	bool policer_enable;
+	enum mlxsw_sp_span_session_id session_id;
 };
 
 enum mlxsw_sp_span_trigger {
@@ -41,6 +54,7 @@ struct mlxsw_sp_span_agent_parms {
 	const struct net_device *to_dev;
 	u16 policer_id;
 	bool policer_enable;
+	enum mlxsw_sp_span_session_id session_id;
 };
 
 struct mlxsw_sp_span_entry_ops;
-- 
cgit v1.2.3


From fa3faeb7aedbfbd7e30eb25a25058a2cce1010fb Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 11 Mar 2021 14:24:12 +0200
Subject: mlxsw: reg: Extend mirroring registers with probability rate field

The MPAR and MPAGR registers are used to configure the binding between
the mirroring trigger (e.g., received packet) and the SPAN agent. Add
probability rate field, which will allow us to support sampling by
mirroring to the CPU.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h           | 17 ++++++++++++++++-
 drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c |  2 +-
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 2f7f691f85ff..44f836246e33 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -9925,15 +9925,28 @@ MLXSW_ITEM32(reg, mpar, enable, 0x04, 31, 1);
  */
 MLXSW_ITEM32(reg, mpar, pa_id, 0x04, 0, 4);
 
+#define MLXSW_REG_MPAR_RATE_MAX 3500000000UL
+
+/* reg_mpar_probability_rate
+ * Sampling rate.
+ * Valid values are: 1 to 3.5*10^9
+ * Value of 1 means "sample all". Default is 1.
+ * Reserved when Spectrum-1.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mpar, probability_rate, 0x08, 0, 32);
+
 static inline void mlxsw_reg_mpar_pack(char *payload, u8 local_port,
 				       enum mlxsw_reg_mpar_i_e i_e,
-				       bool enable, u8 pa_id)
+				       bool enable, u8 pa_id,
+				       u32 probability_rate)
 {
 	MLXSW_REG_ZERO(mpar, payload);
 	mlxsw_reg_mpar_local_port_set(payload, local_port);
 	mlxsw_reg_mpar_enable_set(payload, enable);
 	mlxsw_reg_mpar_i_e_set(payload, i_e);
 	mlxsw_reg_mpar_pa_id_set(payload, pa_id);
+	mlxsw_reg_mpar_probability_rate_set(payload, probability_rate);
 }
 
 /* MGIR - Management General Information Register
@@ -10577,6 +10590,8 @@ MLXSW_ITEM32(reg, mpagr, trigger, 0x00, 0, 4);
  */
 MLXSW_ITEM32(reg, mpagr, pa_id, 0x04, 0, 4);
 
+#define MLXSW_REG_MPAGR_RATE_MAX 3500000000UL
+
 /* reg_mpagr_probability_rate
  * Sampling rate.
  * Valid values are: 1 to 3.5*10^9
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index 3287211819df..7711ace07ec8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -1232,7 +1232,7 @@ __mlxsw_sp_span_trigger_port_bind(struct mlxsw_sp_span *span,
 	}
 
 	mlxsw_reg_mpar_pack(mpar_pl, trigger_entry->local_port, i_e, enable,
-			    trigger_entry->parms.span_id);
+			    trigger_entry->parms.span_id, 1);
 	return mlxsw_reg_write(span->mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl);
 }
 
-- 
cgit v1.2.3


From 2dcbd9207b3380c0efd89b2805dfc4e867470eb9 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 11 Mar 2021 14:24:13 +0200
Subject: mlxsw: spectrum_span: Add SPAN probability rate support

Currently, every packet that matches a mirroring trigger (e.g., received
packets, buffer dropped packets) is mirrored. Spectrum-2 and later ASICs
support mirroring with probability, where every 1 in N matched packets
is mirrored.

Extend the API that creates the binding between the trigger and the SPAN
agent with a probability rate parameter, which is an attribute of the
trigger. Set it to '1' to maintain existing behavior.

Subsequent patches will use it to perform more sophisticated sampling,
by mirroring packets to the CPU with probability.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c |  1 +
 drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c    |  1 +
 drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c     | 15 ++++++++++++---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h     |  1 +
 4 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
index f30599ad6019..2b7652b4b0bb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
@@ -51,6 +51,7 @@ mlxsw_sp_mall_port_mirror_add(struct mlxsw_sp_port *mlxsw_sp_port,
 	trigger = mall_entry->ingress ? MLXSW_SP_SPAN_TRIGGER_INGRESS :
 					MLXSW_SP_SPAN_TRIGGER_EGRESS;
 	parms.span_id = mall_entry->mirror.span_id;
+	parms.probability_rate = 1;
 	err = mlxsw_sp_span_agent_bind(mlxsw_sp, trigger, mlxsw_sp_port,
 				       &parms);
 	if (err)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index 9d16823320ae..baf17c0b2702 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -1341,6 +1341,7 @@ static int mlxsw_sp_qevent_span_configure(struct mlxsw_sp *mlxsw_sp,
 		goto err_analyzed_port_get;
 
 	trigger_parms.span_id = span_id;
+	trigger_parms.probability_rate = 1;
 	err = mlxsw_sp_span_agent_bind(mlxsw_sp, qevent_binding->span_trigger, mlxsw_sp_port,
 				       &trigger_parms);
 	if (err)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index 7711ace07ec8..3398cc01e5ec 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -1231,8 +1231,12 @@ __mlxsw_sp_span_trigger_port_bind(struct mlxsw_sp_span *span,
 		return -EINVAL;
 	}
 
+	if (trigger_entry->parms.probability_rate > MLXSW_REG_MPAR_RATE_MAX)
+		return -EINVAL;
+
 	mlxsw_reg_mpar_pack(mpar_pl, trigger_entry->local_port, i_e, enable,
-			    trigger_entry->parms.span_id, 1);
+			    trigger_entry->parms.span_id,
+			    trigger_entry->parms.probability_rate);
 	return mlxsw_reg_write(span->mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl);
 }
 
@@ -1366,8 +1370,11 @@ mlxsw_sp2_span_trigger_global_bind(struct mlxsw_sp_span_trigger_entry *
 		return -EINVAL;
 	}
 
+	if (trigger_entry->parms.probability_rate > MLXSW_REG_MPAGR_RATE_MAX)
+		return -EINVAL;
+
 	mlxsw_reg_mpagr_pack(mpagr_pl, trigger, trigger_entry->parms.span_id,
-			     1);
+			     trigger_entry->parms.probability_rate);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpagr), mpagr_pl);
 }
 
@@ -1565,7 +1572,9 @@ int mlxsw_sp_span_agent_bind(struct mlxsw_sp *mlxsw_sp,
 							 trigger,
 							 mlxsw_sp_port);
 	if (trigger_entry) {
-		if (trigger_entry->parms.span_id != parms->span_id)
+		if (trigger_entry->parms.span_id != parms->span_id ||
+		    trigger_entry->parms.probability_rate !=
+		    parms->probability_rate)
 			return -EINVAL;
 		refcount_inc(&trigger_entry->ref_count);
 		goto out;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
index 6e84cc049428..dea1c0d31310 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
@@ -48,6 +48,7 @@ enum mlxsw_sp_span_trigger {
 
 struct mlxsw_sp_span_trigger_parms {
 	int span_id;
+	u32 probability_rate;
 };
 
 struct mlxsw_sp_span_agent_parms {
-- 
cgit v1.2.3


From 20afb9bc480d0bfe3d1abcb934d79b2cdc19acbf Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 11 Mar 2021 14:24:14 +0200
Subject: mlxsw: spectrum_matchall: Split sampling support between ASICs

Sampling of ingress packets is supported using a dedicated sampling
mechanism on all Spectrum ASICs. However, Spectrum-2 and later ASICs
support more sophisticated sampling by mirroring packets to the CPU.

As a preparation for more advanced sampling configurations, split the
sampling operations between Spectrum-1 and later ASICs.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c     |  3 ++
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h     | 11 ++++++++
 .../ethernet/mellanox/mlxsw/spectrum_matchall.c    | 32 ++++++++++++++++++++--
 3 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index dbf95e52b341..93b15b8c007e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2804,6 +2804,7 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->span_ops = &mlxsw_sp1_span_ops;
 	mlxsw_sp->policer_core_ops = &mlxsw_sp1_policer_core_ops;
 	mlxsw_sp->trap_ops = &mlxsw_sp1_trap_ops;
+	mlxsw_sp->mall_ops = &mlxsw_sp1_mall_ops;
 	mlxsw_sp->listeners = mlxsw_sp1_listener;
 	mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp1_listener);
 	mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1;
@@ -2833,6 +2834,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->span_ops = &mlxsw_sp2_span_ops;
 	mlxsw_sp->policer_core_ops = &mlxsw_sp2_policer_core_ops;
 	mlxsw_sp->trap_ops = &mlxsw_sp2_trap_ops;
+	mlxsw_sp->mall_ops = &mlxsw_sp2_mall_ops;
 	mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2;
 
 	return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack);
@@ -2860,6 +2862,7 @@ static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->span_ops = &mlxsw_sp3_span_ops;
 	mlxsw_sp->policer_core_ops = &mlxsw_sp2_policer_core_ops;
 	mlxsw_sp->trap_ops = &mlxsw_sp2_trap_ops;
+	mlxsw_sp->mall_ops = &mlxsw_sp2_mall_ops;
 	mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3;
 
 	return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index d9d9e1f488f9..1ec9a3c48a42 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -179,6 +179,7 @@ struct mlxsw_sp {
 	const struct mlxsw_sp_span_ops *span_ops;
 	const struct mlxsw_sp_policer_core_ops *policer_core_ops;
 	const struct mlxsw_sp_trap_ops *trap_ops;
+	const struct mlxsw_sp_mall_ops *mall_ops;
 	const struct mlxsw_listener *listeners;
 	size_t listeners_count;
 	u32 lowest_shaper_bs;
@@ -1033,6 +1034,16 @@ extern const struct mlxsw_afk_ops mlxsw_sp1_afk_ops;
 extern const struct mlxsw_afk_ops mlxsw_sp2_afk_ops;
 
 /* spectrum_matchall.c */
+struct mlxsw_sp_mall_ops {
+	int (*sample_add)(struct mlxsw_sp *mlxsw_sp,
+			  struct mlxsw_sp_port *mlxsw_sp_port, u32 rate);
+	void (*sample_del)(struct mlxsw_sp *mlxsw_sp,
+			   struct mlxsw_sp_port *mlxsw_sp_port);
+};
+
+extern const struct mlxsw_sp_mall_ops mlxsw_sp1_mall_ops;
+extern const struct mlxsw_sp_mall_ops mlxsw_sp2_mall_ops;
+
 enum mlxsw_sp_mall_action_type {
 	MLXSW_SP_MALL_ACTION_TYPE_MIRROR,
 	MLXSW_SP_MALL_ACTION_TYPE_SAMPLE,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
index 2b7652b4b0bb..ad1209df81ea 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
@@ -96,6 +96,7 @@ static int
 mlxsw_sp_mall_port_sample_add(struct mlxsw_sp_port *mlxsw_sp_port,
 			      struct mlxsw_sp_mall_entry *mall_entry)
 {
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	int err;
 
 	if (rtnl_dereference(mlxsw_sp_port->sample)) {
@@ -104,8 +105,8 @@ mlxsw_sp_mall_port_sample_add(struct mlxsw_sp_port *mlxsw_sp_port,
 	}
 	rcu_assign_pointer(mlxsw_sp_port->sample, &mall_entry->sample);
 
-	err = mlxsw_sp_mall_port_sample_set(mlxsw_sp_port, true,
-					    mall_entry->sample.rate);
+	err = mlxsw_sp->mall_ops->sample_add(mlxsw_sp, mlxsw_sp_port,
+					     mall_entry->sample.rate);
 	if (err)
 		goto err_port_sample_set;
 	return 0;
@@ -118,10 +119,12 @@ err_port_sample_set:
 static void
 mlxsw_sp_mall_port_sample_del(struct mlxsw_sp_port *mlxsw_sp_port)
 {
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+
 	if (!mlxsw_sp_port->sample)
 		return;
 
-	mlxsw_sp_mall_port_sample_set(mlxsw_sp_port, false, 1);
+	mlxsw_sp->mall_ops->sample_del(mlxsw_sp, mlxsw_sp_port);
 	RCU_INIT_POINTER(mlxsw_sp_port->sample, NULL);
 }
 
@@ -356,3 +359,26 @@ int mlxsw_sp_mall_prio_get(struct mlxsw_sp_flow_block *block, u32 chain_index,
 	*p_max_prio = block->mall.max_prio;
 	return 0;
 }
+
+static int mlxsw_sp1_mall_sample_add(struct mlxsw_sp *mlxsw_sp,
+				     struct mlxsw_sp_port *mlxsw_sp_port,
+				     u32 rate)
+{
+	return mlxsw_sp_mall_port_sample_set(mlxsw_sp_port, true, rate);
+}
+
+static void mlxsw_sp1_mall_sample_del(struct mlxsw_sp *mlxsw_sp,
+				      struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	mlxsw_sp_mall_port_sample_set(mlxsw_sp_port, false, 1);
+}
+
+const struct mlxsw_sp_mall_ops mlxsw_sp1_mall_ops = {
+	.sample_add = mlxsw_sp1_mall_sample_add,
+	.sample_del = mlxsw_sp1_mall_sample_del,
+};
+
+const struct mlxsw_sp_mall_ops mlxsw_sp2_mall_ops = {
+	.sample_add = mlxsw_sp1_mall_sample_add,
+	.sample_del = mlxsw_sp1_mall_sample_del,
+};
-- 
cgit v1.2.3


From 34a277212c67a395b47b715aa89c4b4e2c957d42 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 11 Mar 2021 14:24:15 +0200
Subject: mlxsw: spectrum_trap: Split sampling traps between ASICs

Sampling of ingress packets is supported using a dedicated sampling
mechanism on all Spectrum ASICs. However, Spectrum-2 and later ASICs
support more sophisticated sampling by mirroring packets to the CPU.

As a preparation for more advanced sampling configurations, split the trap
configuration used for sampled packets between Spectrum-1 and later ASICs.

This is needed since packets that are mirrored to the CPU are trapped
via a different trap identifier compared to packets that are sampled
using the dedicated sampling mechanism.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_trap.c    | 39 ++++++++++++++--------
 1 file changed, 26 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index 4ef12e3e021a..6ecc77fde095 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -463,11 +463,6 @@ static const struct mlxsw_sp_trap_group_item mlxsw_sp_trap_group_items_arr[] = {
 		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1,
 		.priority = 2,
 	},
-	{
-		.group = DEVLINK_TRAP_GROUP_GENERIC(ACL_SAMPLE, 0),
-		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_PKT_SAMPLE,
-		.priority = 0,
-	},
 	{
 		.group = DEVLINK_TRAP_GROUP_GENERIC(ACL_TRAP, 18),
 		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_FLOW_LOGGING,
@@ -992,14 +987,6 @@ static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = {
 			MLXSW_SP_RXL_NO_MARK(PTP1, PTP1, TRAP_TO_CPU, false),
 		},
 	},
-	{
-		.trap = MLXSW_SP_TRAP_CONTROL(FLOW_ACTION_SAMPLE, ACL_SAMPLE,
-					      MIRROR),
-		.listeners_arr = {
-			MLXSW_RXL(mlxsw_sp_rx_sample_listener, PKT_SAMPLE,
-				  MIRROR_TO_CPU, false, SP_PKT_SAMPLE, DISCARD),
-		},
-	},
 	{
 		.trap = MLXSW_SP_TRAP_CONTROL(FLOW_ACTION_TRAP, ACL_TRAP, TRAP),
 		.listeners_arr = {
@@ -1709,10 +1696,23 @@ int mlxsw_sp_trap_group_policer_hw_id_get(struct mlxsw_sp *mlxsw_sp, u16 id,
 
 static const struct mlxsw_sp_trap_group_item
 mlxsw_sp1_trap_group_items_arr[] = {
+	{
+		.group = DEVLINK_TRAP_GROUP_GENERIC(ACL_SAMPLE, 0),
+		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_PKT_SAMPLE,
+		.priority = 0,
+	},
 };
 
 static const struct mlxsw_sp_trap_item
 mlxsw_sp1_trap_items_arr[] = {
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(FLOW_ACTION_SAMPLE, ACL_SAMPLE,
+					      MIRROR),
+		.listeners_arr = {
+			MLXSW_RXL(mlxsw_sp_rx_sample_listener, PKT_SAMPLE,
+				  MIRROR_TO_CPU, false, SP_PKT_SAMPLE, DISCARD),
+		},
+	},
 };
 
 static int
@@ -1749,6 +1749,11 @@ mlxsw_sp2_trap_group_items_arr[] = {
 		.priority = 0,
 		.fixed_policer = true,
 	},
+	{
+		.group = DEVLINK_TRAP_GROUP_GENERIC(ACL_SAMPLE, 0),
+		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_PKT_SAMPLE,
+		.priority = 0,
+	},
 };
 
 static const struct mlxsw_sp_trap_item
@@ -1760,6 +1765,14 @@ mlxsw_sp2_trap_items_arr[] = {
 		},
 		.is_source = true,
 	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(FLOW_ACTION_SAMPLE, ACL_SAMPLE,
+					      MIRROR),
+		.listeners_arr = {
+			MLXSW_RXL(mlxsw_sp_rx_sample_listener, PKT_SAMPLE,
+				  MIRROR_TO_CPU, false, SP_PKT_SAMPLE, DISCARD),
+		},
+	},
 };
 
 static int
-- 
cgit v1.2.3


From cf31190ae0b788159a9874f0b28bbfde994741cd Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 11 Mar 2021 14:24:16 +0200
Subject: mlxsw: spectrum_matchall: Implement sampling using mirroring

Spectrum-2 and later ASICs support sampling of packets by mirroring to
the CPU with probability. There are several advantages compared to the
legacy dedicated sampling mechanism:

* Extra metadata per-packet: Egress port, egress traffic class, traffic
  class occupancy and end-to-end latency
* Ability to sample packets on egress / per-flow

Convert Spectrum-2 and later ASICs to perform sampling by mirroring to
the CPU with probability.

Subsequent patches will add support for egress / per-flow sampling and
expose the extra metadata.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h     |  1 +
 .../ethernet/mellanox/mlxsw/spectrum_matchall.c    | 57 +++++++++++++++++++++-
 .../net/ethernet/mellanox/mlxsw/spectrum_span.h    |  1 +
 .../net/ethernet/mellanox/mlxsw/spectrum_trap.c    |  8 ++-
 4 files changed, 63 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 1ec9a3c48a42..bc7006de7873 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -238,6 +238,7 @@ struct mlxsw_sp_port_sample {
 	u32 trunc_size;
 	u32 rate;
 	bool truncate;
+	int span_id;	/* Relevant for Spectrum-2 onwards. */
 };
 
 struct mlxsw_sp_bridge_port;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
index ad1209df81ea..841a2de37f36 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
@@ -378,7 +378,60 @@ const struct mlxsw_sp_mall_ops mlxsw_sp1_mall_ops = {
 	.sample_del = mlxsw_sp1_mall_sample_del,
 };
 
+static int mlxsw_sp2_mall_sample_add(struct mlxsw_sp *mlxsw_sp,
+				     struct mlxsw_sp_port *mlxsw_sp_port,
+				     u32 rate)
+{
+	struct mlxsw_sp_span_trigger_parms trigger_parms = {};
+	struct mlxsw_sp_span_agent_parms agent_parms = {
+		.to_dev = NULL,	/* Mirror to CPU. */
+		.session_id = MLXSW_SP_SPAN_SESSION_ID_SAMPLING,
+	};
+	struct mlxsw_sp_port_sample *sample;
+	int err;
+
+	sample = rtnl_dereference(mlxsw_sp_port->sample);
+
+	err = mlxsw_sp_span_agent_get(mlxsw_sp, &sample->span_id, &agent_parms);
+	if (err)
+		return err;
+
+	err = mlxsw_sp_span_analyzed_port_get(mlxsw_sp_port, true);
+	if (err)
+		goto err_analyzed_port_get;
+
+	trigger_parms.span_id = sample->span_id;
+	trigger_parms.probability_rate = rate;
+	err = mlxsw_sp_span_agent_bind(mlxsw_sp, MLXSW_SP_SPAN_TRIGGER_INGRESS,
+				       mlxsw_sp_port, &trigger_parms);
+	if (err)
+		goto err_agent_bind;
+
+	return 0;
+
+err_agent_bind:
+	mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, true);
+err_analyzed_port_get:
+	mlxsw_sp_span_agent_put(mlxsw_sp, sample->span_id);
+	return err;
+}
+
+static void mlxsw_sp2_mall_sample_del(struct mlxsw_sp *mlxsw_sp,
+				      struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	struct mlxsw_sp_span_trigger_parms trigger_parms = {};
+	struct mlxsw_sp_port_sample *sample;
+
+	sample = rtnl_dereference(mlxsw_sp_port->sample);
+
+	trigger_parms.span_id = sample->span_id;
+	mlxsw_sp_span_agent_unbind(mlxsw_sp, MLXSW_SP_SPAN_TRIGGER_INGRESS,
+				   mlxsw_sp_port, &trigger_parms);
+	mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, true);
+	mlxsw_sp_span_agent_put(mlxsw_sp, sample->span_id);
+}
+
 const struct mlxsw_sp_mall_ops mlxsw_sp2_mall_ops = {
-	.sample_add = mlxsw_sp1_mall_sample_add,
-	.sample_del = mlxsw_sp1_mall_sample_del,
+	.sample_add = mlxsw_sp2_mall_sample_add,
+	.sample_del = mlxsw_sp2_mall_sample_del,
 };
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
index dea1c0d31310..efaefd1ae863 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
@@ -21,6 +21,7 @@ struct mlxsw_sp_port;
  */
 enum mlxsw_sp_span_session_id {
 	MLXSW_SP_SPAN_SESSION_ID_BUFFER,
+	MLXSW_SP_SPAN_SESSION_ID_SAMPLING,
 
 	__MLXSW_SP_SPAN_SESSION_ID_MAX = 8,
 };
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index 6ecc77fde095..e0e6ee58d31a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -49,6 +49,8 @@ enum {
 #define MLXSW_SP_TRAP_METADATA DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT
 
 enum {
+	/* Packet was mirrored from ingress. */
+	MLXSW_SP_MIRROR_REASON_INGRESS = 1,
 	/* Packet was early dropped. */
 	MLXSW_SP_MIRROR_REASON_INGRESS_WRED = 9,
 };
@@ -1753,6 +1755,7 @@ mlxsw_sp2_trap_group_items_arr[] = {
 		.group = DEVLINK_TRAP_GROUP_GENERIC(ACL_SAMPLE, 0),
 		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_PKT_SAMPLE,
 		.priority = 0,
+		.fixed_policer = true,
 	},
 };
 
@@ -1769,8 +1772,9 @@ mlxsw_sp2_trap_items_arr[] = {
 		.trap = MLXSW_SP_TRAP_CONTROL(FLOW_ACTION_SAMPLE, ACL_SAMPLE,
 					      MIRROR),
 		.listeners_arr = {
-			MLXSW_RXL(mlxsw_sp_rx_sample_listener, PKT_SAMPLE,
-				  MIRROR_TO_CPU, false, SP_PKT_SAMPLE, DISCARD),
+			MLXSW_RXL_MIRROR(mlxsw_sp_rx_sample_listener, 1,
+					 SP_PKT_SAMPLE,
+					 MLXSW_SP_MIRROR_REASON_INGRESS),
 		},
 	},
 };
-- 
cgit v1.2.3


From 1980d37565061ab44bdc2f9e4da477d3b9752e81 Mon Sep 17 00:00:00 2001
From: Hoang Le <hoang.h.le@dektech.com.au>
Date: Thu, 11 Mar 2021 10:33:22 +0700
Subject: tipc: convert dest node's address to network order

(struct tipc_link_info)->dest is in network order (__be32), so we must
convert the value to network order before assigning. The problem detected
by sparse:

net/tipc/netlink_compat.c:699:24: warning: incorrect type in assignment (different base types)
net/tipc/netlink_compat.c:699:24:    expected restricted __be32 [usertype] dest
net/tipc/netlink_compat.c:699:24:    got int

Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: Hoang Le <hoang.h.le@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/netlink_compat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 5a1ce64039f7..0749df80454d 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -696,7 +696,7 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg,
 	if (err)
 		return err;
 
-	link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]);
+	link_info.dest = htonl(nla_get_flag(link[TIPC_NLA_LINK_DEST]));
 	link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP]));
 	nla_strscpy(link_info.str, link[TIPC_NLA_LINK_NAME],
 		    TIPC_MAX_LINK_NAME);
-- 
cgit v1.2.3


From 97bc84bbd4de3c060b68aea4d546c7f21c4d6814 Mon Sep 17 00:00:00 2001
From: Hoang Huu Le <hoang.h.le@dektech.com.au>
Date: Thu, 11 Mar 2021 10:33:23 +0700
Subject: tipc: clean up warnings detected by sparse

This patch fixes the following warning from sparse:

net/tipc/monitor.c:263:35: warning: incorrect type in assignment (different base types)
net/tipc/monitor.c:263:35:    expected unsigned int
net/tipc/monitor.c:263:35:    got restricted __be32 [usertype]
[...]
net/tipc/node.c:374:13: warning: context imbalance in 'tipc_node_read_lock' - wrong count at exit
net/tipc/node.c:379:13: warning: context imbalance in 'tipc_node_read_unlock' - unexpected unlock
net/tipc/node.c:384:13: warning: context imbalance in 'tipc_node_write_lock' - wrong count at exit
net/tipc/node.c:389:13: warning: context imbalance in 'tipc_node_write_unlock_fast' - unexpected unlock
net/tipc/node.c:404:17: warning: context imbalance in 'tipc_node_write_unlock' - unexpected unlock
[...]
net/tipc/crypto.c:1201:9: warning: incorrect type in initializer (different address spaces)
net/tipc/crypto.c:1201:9:    expected struct tipc_aead [noderef] __rcu *__tmp
net/tipc/crypto.c:1201:9:    got struct tipc_aead *
[...]

Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: Hoang Huu Le <hoang.h.le@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/crypto.c  | 12 +++++------
 net/tipc/monitor.c | 63 ++++++++++++++++++++++++++++++++++++++++--------------
 net/tipc/node.c    |  5 +++++
 3 files changed, 58 insertions(+), 22 deletions(-)

diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index f4fca8f7f63f..6f64acef73dc 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -317,7 +317,7 @@ static int tipc_aead_key_generate(struct tipc_aead_key *skey);
 
 #define tipc_aead_rcu_replace(rcu_ptr, ptr, lock)			\
 do {									\
-	typeof(rcu_ptr) __tmp = rcu_dereference_protected((rcu_ptr),	\
+	struct tipc_aead *__tmp = rcu_dereference_protected((rcu_ptr),	\
 						lockdep_is_held(lock));	\
 	rcu_assign_pointer((rcu_ptr), (ptr));				\
 	tipc_aead_put(__tmp);						\
@@ -798,7 +798,7 @@ static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb,
 	ehdr = (struct tipc_ehdr *)skb->data;
 	salt = aead->salt;
 	if (aead->mode == CLUSTER_KEY)
-		salt ^= ehdr->addr; /* __be32 */
+		salt ^= __be32_to_cpu(ehdr->addr);
 	else if (__dnode)
 		salt ^= tipc_node_get_addr(__dnode);
 	memcpy(iv, &salt, 4);
@@ -929,7 +929,7 @@ static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead,
 	ehdr = (struct tipc_ehdr *)skb->data;
 	salt = aead->salt;
 	if (aead->mode == CLUSTER_KEY)
-		salt ^= ehdr->addr; /* __be32 */
+		salt ^= __be32_to_cpu(ehdr->addr);
 	else if (ehdr->destined)
 		salt ^= tipc_own_addr(net);
 	memcpy(iv, &salt, 4);
@@ -1946,16 +1946,16 @@ static void tipc_crypto_rcv_complete(struct net *net, struct tipc_aead *aead,
 			goto rcv;
 		}
 		tipc_aead_put(aead);
-		aead = tipc_aead_get(tmp);
+		aead = tipc_aead_get((struct tipc_aead __force __rcu *)tmp);
 	}
 
 	if (unlikely(err)) {
-		tipc_aead_users_dec(aead, INT_MIN);
+		tipc_aead_users_dec((struct tipc_aead __force __rcu *)aead, INT_MIN);
 		goto free_skb;
 	}
 
 	/* Set the RX key's user */
-	tipc_aead_users_set(aead, 1);
+	tipc_aead_users_set((struct tipc_aead __force __rcu *)aead, 1);
 
 	/* Mark this point, RX works */
 	rx->timer1 = jiffies;
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 48fac3b17e40..407619697292 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -104,6 +104,36 @@ static struct tipc_monitor *tipc_monitor(struct net *net, int bearer_id)
 
 const int tipc_max_domain_size = sizeof(struct tipc_mon_domain);
 
+static inline u16 mon_cpu_to_le16(u16 val)
+{
+	return (__force __u16)htons(val);
+}
+
+static inline u32 mon_cpu_to_le32(u32 val)
+{
+	return (__force __u32)htonl(val);
+}
+
+static inline u64 mon_cpu_to_le64(u64 val)
+{
+	return (__force __u64)cpu_to_be64(val);
+}
+
+static inline u16 mon_le16_to_cpu(u16 val)
+{
+	return ntohs((__force __be16)val);
+}
+
+static inline u32 mon_le32_to_cpu(u32 val)
+{
+	return ntohl((__force __be32)val);
+}
+
+static inline u64 mon_le64_to_cpu(u64 val)
+{
+	return be64_to_cpu((__force __be64)val);
+}
+
 /* dom_rec_len(): actual length of domain record for transport
  */
 static int dom_rec_len(struct tipc_mon_domain *dom, u16 mcnt)
@@ -260,16 +290,16 @@ static void mon_update_local_domain(struct tipc_monitor *mon)
 		diff |= dom->members[i] != peer->addr;
 		dom->members[i] = peer->addr;
 		map_set(&dom->up_map, i, peer->is_up);
-		cache->members[i] = htonl(peer->addr);
+		cache->members[i] = mon_cpu_to_le32(peer->addr);
 	}
 	diff |= dom->up_map != prev_up_map;
 	if (!diff)
 		return;
 	dom->gen = ++mon->dom_gen;
-	cache->len = htons(dom->len);
-	cache->gen = htons(dom->gen);
-	cache->member_cnt = htons(member_cnt);
-	cache->up_map = cpu_to_be64(dom->up_map);
+	cache->len = mon_cpu_to_le16(dom->len);
+	cache->gen = mon_cpu_to_le16(dom->gen);
+	cache->member_cnt = mon_cpu_to_le16(member_cnt);
+	cache->up_map = mon_cpu_to_le64(dom->up_map);
 	mon_apply_domain(mon, self);
 }
 
@@ -455,10 +485,11 @@ void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
 	struct tipc_mon_domain dom_bef;
 	struct tipc_mon_domain *dom;
 	struct tipc_peer *peer;
-	u16 new_member_cnt = ntohs(arrv_dom->member_cnt);
+	u16 new_member_cnt = mon_le16_to_cpu(arrv_dom->member_cnt);
 	int new_dlen = dom_rec_len(arrv_dom, new_member_cnt);
-	u16 new_gen = ntohs(arrv_dom->gen);
-	u16 acked_gen = ntohs(arrv_dom->ack_gen);
+	u16 new_gen = mon_le16_to_cpu(arrv_dom->gen);
+	u16 acked_gen = mon_le16_to_cpu(arrv_dom->ack_gen);
+	u16 arrv_dlen = mon_le16_to_cpu(arrv_dom->len);
 	bool probing = state->probing;
 	int i, applied_bef;
 
@@ -469,7 +500,7 @@ void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
 		return;
 	if (dlen != dom_rec_len(arrv_dom, new_member_cnt))
 		return;
-	if ((dlen < new_dlen) || ntohs(arrv_dom->len) != new_dlen)
+	if (dlen < new_dlen || arrv_dlen != new_dlen)
 		return;
 
 	/* Synch generation numbers with peer if link just came up */
@@ -517,9 +548,9 @@ void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
 	dom->len = new_dlen;
 	dom->gen = new_gen;
 	dom->member_cnt = new_member_cnt;
-	dom->up_map = be64_to_cpu(arrv_dom->up_map);
+	dom->up_map = mon_le64_to_cpu(arrv_dom->up_map);
 	for (i = 0; i < new_member_cnt; i++)
-		dom->members[i] = ntohl(arrv_dom->members[i]);
+		dom->members[i] = mon_le32_to_cpu(arrv_dom->members[i]);
 
 	/* Update peers affected by this domain record */
 	applied_bef = peer->applied;
@@ -548,19 +579,19 @@ void tipc_mon_prep(struct net *net, void *data, int *dlen,
 	if (likely(state->acked_gen == gen)) {
 		len = dom_rec_len(dom, 0);
 		*dlen = len;
-		dom->len = htons(len);
-		dom->gen = htons(gen);
-		dom->ack_gen = htons(state->peer_gen);
+		dom->len = mon_cpu_to_le16(len);
+		dom->gen = mon_cpu_to_le16(gen);
+		dom->ack_gen = mon_cpu_to_le16(state->peer_gen);
 		dom->member_cnt = 0;
 		return;
 	}
 	/* Send the full record */
 	read_lock_bh(&mon->lock);
-	len = ntohs(mon->cache.len);
+	len = mon_le16_to_cpu(mon->cache.len);
 	*dlen = len;
 	memcpy(data, &mon->cache, len);
 	read_unlock_bh(&mon->lock);
-	dom->ack_gen = htons(state->peer_gen);
+	dom->ack_gen = mon_cpu_to_le16(state->peer_gen);
 }
 
 void tipc_mon_get_state(struct net *net, u32 addr,
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 008670d1f43e..9c95ef4b6326 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -372,26 +372,31 @@ static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id)
 }
 
 static void tipc_node_read_lock(struct tipc_node *n)
+	__acquires(n->lock)
 {
 	read_lock_bh(&n->lock);
 }
 
 static void tipc_node_read_unlock(struct tipc_node *n)
+	__releases(n->lock)
 {
 	read_unlock_bh(&n->lock);
 }
 
 static void tipc_node_write_lock(struct tipc_node *n)
+	__acquires(n->lock)
 {
 	write_lock_bh(&n->lock);
 }
 
 static void tipc_node_write_unlock_fast(struct tipc_node *n)
+	__releases(n->lock)
 {
 	write_unlock_bh(&n->lock);
 }
 
 static void tipc_node_write_unlock(struct tipc_node *n)
+	__releases(n->lock)
 {
 	struct net *net = n->net;
 	u32 addr = 0;
-- 
cgit v1.2.3


From 8176f8c0f095c9df44994a33f19b55e7c847df7f Mon Sep 17 00:00:00 2001
From: Tong Zhang <ztong0001@gmail.com>
Date: Wed, 10 Mar 2021 23:27:55 -0500
Subject: isdn: remove extra spaces in the header file

fix some coding style issues in the isdn header

Signed-off-by: Tong Zhang <ztong0001@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/hardware/mISDN/iohelper.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/isdn/hardware/mISDN/iohelper.h b/drivers/isdn/hardware/mISDN/iohelper.h
index b2b2bde8edba..c81f7aba4b57 100644
--- a/drivers/isdn/hardware/mISDN/iohelper.h
+++ b/drivers/isdn/hardware/mISDN/iohelper.h
@@ -13,14 +13,14 @@
 #ifndef _IOHELPER_H
 #define _IOHELPER_H
 
-typedef	u8	(read_reg_func)(void *hwp, u8 offset);
-			       typedef	void	(write_reg_func)(void *hwp, u8 offset, u8 value);
-			       typedef	void	(fifo_func)(void *hwp, u8 offset, u8 *datap, int size);
+typedef u8 (read_reg_func)(void *hwp, u8 offset);
+typedef void (write_reg_func)(void *hwp, u8 offset, u8 value);
+typedef void (fifo_func)(void *hwp, u8 offset, u8 *datap, int size);
 
-			       struct _ioport {
-				       u32	port;
-				       u32	ale;
-			       };
+struct _ioport {
+	u32 port;
+	u32 ale;
+};
 
 #define IOFUNC_IO(name, hws, ap)					\
 	static u8 Read##name##_IO(void *p, u8 off) {			\
-- 
cgit v1.2.3


From f4dae54e486d528d4dd98df116e7a522bbf12667 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 11 Mar 2021 12:35:04 -0800
Subject: tcp: plug skb_still_in_host_queue() to TSQ

Jakub and Neil reported an increase of RTO timers whenever
TX completions are delayed a bit more (by increasing
NIC TX coalescing parameters)

Main issue is that TCP stack has a logic preventing a packet
being retransmit if the prior clone has not yet been
orphaned or freed.

This logic came with commit 1f3279ae0c13 ("tcp: avoid
retransmits of TCP packets hanging in host queues")

Thankfully, in the case skb_still_in_host_queue() detects
the initial clone is still in flight, it can use TSQ logic
that will eventually retry later, at the moment the clone
is freed or orphaned.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Neil Spring <ntspring@fb.com>
Reported-by: Jakub Kicinski <kuba@kernel.org>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  2 +-
 net/ipv4/tcp_output.c  | 12 ++++++++----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0503c917d773..483e89348f78 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1140,7 +1140,7 @@ static inline bool skb_fclone_busy(const struct sock *sk,
 
 	return skb->fclone == SKB_FCLONE_ORIG &&
 	       refcount_read(&fclones->fclone_ref) > 1 &&
-	       fclones->skb2.sk == sk;
+	       READ_ONCE(fclones->skb2.sk) == sk;
 }
 
 /**
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index fbf140a770d8..0dbf208a4f2f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2775,13 +2775,17 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
  * a packet is still in a qdisc or driver queue.
  * In this case, there is very little point doing a retransmit !
  */
-static bool skb_still_in_host_queue(const struct sock *sk,
+static bool skb_still_in_host_queue(struct sock *sk,
 				    const struct sk_buff *skb)
 {
 	if (unlikely(skb_fclone_busy(sk, skb))) {
-		NET_INC_STATS(sock_net(sk),
-			      LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
-		return true;
+		set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
+		smp_mb__after_atomic();
+		if (skb_fclone_busy(sk, skb)) {
+			NET_INC_STATS(sock_net(sk),
+				      LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
+			return true;
+		}
 	}
 	return false;
 }
-- 
cgit v1.2.3


From a7abf3cd76e1e190a2c22afe5ffd0f695c7641b0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 11 Mar 2021 12:35:05 -0800
Subject: tcp: consider using standard rtx logic in tcp_rcv_fastopen_synack()

Jakub reported Data included in a Fastopen SYN that had to be
retransmit would have to wait for an RTO if TX completions are slow,
even with prior fix.

This is because tcp_rcv_fastopen_synack() does not use standard
rtx logic, meaning TSQ handler exits early in tcp_tsq_write()
because tp->lost_out == tp->retrans_out

Lets make tcp_rcv_fastopen_synack() use standard rtx logic,
by using tcp_mark_skb_lost() on the skb thats needs to be
sent again.

Not this raised a warning in tcp_fastretrans_alert() during my tests
since we consider the data not being aknowledged
by the receiver does not mean packet was lost on the network.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Jakub Kicinski <kuba@kernel.org>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 69a545db80d2..4cf4dd532d1c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2914,7 +2914,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
 	/* D. Check state exit conditions. State can be terminated
 	 *    when high_seq is ACKed. */
 	if (icsk->icsk_ca_state == TCP_CA_Open) {
-		WARN_ON(tp->retrans_out != 0);
+		WARN_ON(tp->retrans_out != 0 && !tp->syn_data);
 		tp->retrans_stamp = 0;
 	} else if (!before(tp->snd_una, tp->high_seq)) {
 		switch (icsk->icsk_ca_state) {
@@ -5994,11 +5994,9 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
 			tp->fastopen_client_fail = TFO_SYN_RETRANSMITTED;
 		else
 			tp->fastopen_client_fail = TFO_DATA_NOT_ACKED;
-		skb_rbtree_walk_from(data) {
-			if (__tcp_retransmit_skb(sk, data, 1))
-				break;
-		}
-		tcp_rearm_rto(sk);
+		skb_rbtree_walk_from(data)
+			 tcp_mark_skb_lost(sk, data);
+		tcp_xmit_retransmit_queue(sk);
 		NET_INC_STATS(sock_net(sk),
 				LINUX_MIB_TCPFASTOPENACTIVEFAIL);
 		return true;
-- 
cgit v1.2.3


From ac3959fd0dcc0e49298ea5cadfe257db0e58ef8b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 11 Mar 2021 12:35:06 -0800
Subject: tcp: remove obsolete check in __tcp_retransmit_skb()

TSQ provides a nice way to avoid bufferbloat on individual socket,
including retransmit packets. We can get rid of the old
heuristic:

	/* Do not sent more than we queued. 1/4 is reserved for possible
	 * copying overhead: fragmentation, tunneling, mangling etc.
	 */
	if (refcount_read(&sk->sk_wmem_alloc) >
	    min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2),
		  sk->sk_sndbuf))
		return -EAGAIN;

This heuristic was giving false positives according to Jakub,
whenever TX completions are delayed above RTT. (Ack packets
are processed by TCP stack before clones are orphaned/freed)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Jakub Kicinski <kuba@kernel.org>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0dbf208a4f2f..bde781f46b41 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3151,14 +3151,6 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
 	if (icsk->icsk_mtup.probe_size)
 		icsk->icsk_mtup.probe_size = 0;
 
-	/* Do not sent more than we queued. 1/4 is reserved for possible
-	 * copying overhead: fragmentation, tunneling, mangling etc.
-	 */
-	if (refcount_read(&sk->sk_wmem_alloc) >
-	    min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2),
-		  sk->sk_sndbuf))
-		return -EAGAIN;
-
 	if (skb_still_in_host_queue(sk, skb))
 		return -EBUSY;
 
-- 
cgit v1.2.3


From 1d9bfacd52ece918b7861e205202912231ca6166 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Mon, 1 Mar 2021 18:46:02 +0800
Subject: esp4: Simplify the calculation of variables

Fix the following coccicheck warnings:

./net/ipv4/esp4.c:757:16-18: WARNING !A || A && B is equivalent to !A || B.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/esp4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index a3271ec3e162..804a7698df5b 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -754,7 +754,7 @@ int esp_input_done2(struct sk_buff *skb, int err)
 	int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
 	int ihl;
 
-	if (!xo || (xo && !(xo->flags & CRYPTO_DONE)))
+	if (!xo || !(xo->flags & CRYPTO_DONE))
 		kfree(ESP_SKB_CB(skb)->tmp);
 
 	if (unlikely(err))
-- 
cgit v1.2.3


From f076835a8bf2aa6ea48f718e4506587c815ab99f Mon Sep 17 00:00:00 2001
From: Junlin Yang <yangjunlin@yulong.com>
Date: Thu, 11 Mar 2021 10:07:56 +0800
Subject: esp6: remove a duplicative condition

Fixes coccicheck warnings:
./net/ipv6/esp6_offload.c:319:32-34:
WARNING !A || A && B is equivalent to !A || B

Signed-off-by: Junlin Yang <yangjunlin@yulong.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv6/esp6_offload.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 1ca516fb30e1..631c168774f5 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -316,7 +316,7 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb,  netdev_features
 	esp.plen = esp.clen - skb->len - esp.tfclen;
 	esp.tailen = esp.tfclen + esp.plen + alen;
 
-	if (!hw_offload || (hw_offload && !skb_is_gso(skb))) {
+	if (!hw_offload || !skb_is_gso(skb)) {
 		esp.nfrags = esp6_output_head(x, skb, &esp);
 		if (esp.nfrags < 0)
 			return esp.nfrags;
-- 
cgit v1.2.3


From bfdfe7fc1bf9e8c16e4254236c3c731bfea6bdc5 Mon Sep 17 00:00:00 2001
From: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Date: Thu, 11 Mar 2021 18:22:34 +0100
Subject: docs: networking: phy: Improve placement of parenthesis
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

"either" is outside the parentheses, so the matching "or" should be too.

Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/phy.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/phy.rst b/Documentation/networking/phy.rst
index 06adfc2afcf0..3f05d50ecd6e 100644
--- a/Documentation/networking/phy.rst
+++ b/Documentation/networking/phy.rst
@@ -80,8 +80,8 @@ values of phy_interface_t must be understood from the perspective of the PHY
 device itself, leading to the following:
 
 * PHY_INTERFACE_MODE_RGMII: the PHY is not responsible for inserting any
-  internal delay by itself, it assumes that either the Ethernet MAC (if capable
-  or the PCB traces) insert the correct 1.5-2ns delay
+  internal delay by itself, it assumes that either the Ethernet MAC (if capable)
+  or the PCB traces insert the correct 1.5-2ns delay
 
 * PHY_INTERFACE_MODE_RGMII_TXID: the PHY should insert an internal delay
   for the transmit data lines (TXD[3:0]) processed by the PHY device
-- 
cgit v1.2.3


From 540ec76d31473c04a195a8501f6ee50b6295626b Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@nvidia.com>
Date: Wed, 10 Mar 2021 23:09:07 -0800
Subject: net/mlx5: Cleanup prototype warning

Cleanup W=1 warning:

drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c:49:
   warning: expecting prototype for Set lag port affinity().
   Prototype was for mlx5_lag_set_port_affinity() instead

Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
index 88e58ac902de..2c41a6920264 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
@@ -35,7 +35,7 @@ bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
 }
 
 /**
- * Set lag port affinity
+ * mlx5_lag_set_port_affinity
  *
  * @ldev: lag device
  * @port:
-- 
cgit v1.2.3


From 027d7166e17869cb46c9224a4b47b00b61408924 Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Wed, 10 Mar 2021 23:09:08 -0800
Subject: net/mlx5: simplify the return expression of mlx5_esw_offloads_pair()

Simplify the return expression.

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 94cb0217b4f3..107b1f208b72 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -2288,13 +2288,8 @@ void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num)
 static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
 				  struct mlx5_eswitch *peer_esw)
 {
-	int err;
 
-	err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
-	if (err)
-		return err;
-
-	return 0;
+	return esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
 }
 
 static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
-- 
cgit v1.2.3


From 59079438a664559bb1f6f5fe85e306962ef9286e Mon Sep 17 00:00:00 2001
From: Mikhael Goikhman <migo@nvidia.com>
Date: Wed, 10 Mar 2021 23:09:09 -0800
Subject: net/mlx5: Remove unused mlx5_core_health member recover_work

The code related to health->recover_work was removed in
commit 63cbc552eebf ("net/mlx5: Handle SW reset of FW in error flow")

Fix struct mlx5_core_health accordingly.

Signed-off-by: Mikhael Goikhman <migo@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/driver.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 53b89631a1d9..8fe51b4a781e 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -438,7 +438,6 @@ struct mlx5_core_health {
 	unsigned long			flags;
 	struct work_struct		fatal_report_work;
 	struct work_struct		report_work;
-	struct delayed_work		recover_work;
 	struct devlink_health_reporter *fw_reporter;
 	struct devlink_health_reporter *fw_fatal_reporter;
 };
-- 
cgit v1.2.3


From 7d97822a8ceccf9b4b3297465f0b2c1cfd4161f9 Mon Sep 17 00:00:00 2001
From: Mark Bloch <mbloch@nvidia.com>
Date: Wed, 10 Mar 2021 23:09:10 -0800
Subject: net/mlx5: E-Switch, Add match on vhca id to default send rules

Match on the vhca id of the E-Switch owner when creating the send-to-vport
representor rules.

Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 107b1f208b72..fd5f8b830584 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1055,10 +1055,16 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, u16 vport,
 	MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn);
 	/* source vport is the esw manager */
 	MLX5_SET(fte_match_set_misc, misc, source_port, esw->manager_vport);
+	if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+		MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
+			 MLX5_CAP_GEN(esw->dev, vhca_id));
 
 	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
 	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn);
 	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+	if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+		MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+				 source_eswitch_owner_vhca_id);
 
 	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
@@ -1702,6 +1708,12 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
 
 	MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
 	MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
+	if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
+		MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+				 misc_parameters.source_eswitch_owner_vhca_id);
+		MLX5_SET(create_flow_group_in, flow_group_in,
+			 source_eswitch_owner_vhca_id_valid, 1);
+	}
 
 	ix = esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ;
 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
-- 
cgit v1.2.3


From 59c904c8fffd903c1dae5fc6a402b88fa6dfc874 Mon Sep 17 00:00:00 2001
From: Mark Bloch <mbloch@nvidia.com>
Date: Wed, 10 Mar 2021 23:09:11 -0800
Subject: net/mlx5: E-Switch, Add eswitch pointer to each representor

Store the managing E-Switch of each representor. This will be used
when a representor is created on eswitch manager 0 but the vport
belongs to eswitch manager 1.

Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 1 +
 include/linux/mlx5/eswitch.h                               | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index fd5f8b830584..f6c0e7e05ad5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -3153,6 +3153,7 @@ void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw,
 	esw->offloads.rep_ops[rep_type] = ops;
 	mlx5_esw_for_all_reps(esw, i, rep) {
 		if (likely(mlx5_eswitch_vport_has_rep(esw, i))) {
+			rep->esw = esw;
 			rep_data = &rep->rep_data[rep_type];
 			atomic_set(&rep_data->state, REP_REGISTERED);
 		}
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index 994c2c8cb4fd..72d480df2a03 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -48,6 +48,7 @@ struct mlx5_eswitch_rep {
 	/* Only IB rep is using vport_index */
 	u16		       vport_index;
 	u32		       vlan_refcount;
+	struct                 mlx5_eswitch *esw;
 };
 
 void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw,
-- 
cgit v1.2.3


From 658cfceb6267974056cb50adde8a93a15967c665 Mon Sep 17 00:00:00 2001
From: Mark Bloch <mbloch@nvidia.com>
Date: Wed, 10 Mar 2021 23:09:12 -0800
Subject: RDMA/mlx5: Use representor E-Switch when getting netdev and metadata

Now that a pointer to the managing E-Switch is stored in the representor
use it.

Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
Acked-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/infiniband/hw/mlx5/fs.c     | 2 +-
 drivers/infiniband/hw/mlx5/ib_rep.c | 2 +-
 drivers/infiniband/hw/mlx5/main.c   | 3 +--
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 25da0b05b4e2..01370d9a871a 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -879,7 +879,7 @@ static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
 				    misc_parameters_2);
 
 		MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
-			 mlx5_eswitch_get_vport_metadata_for_match(esw,
+			 mlx5_eswitch_get_vport_metadata_for_match(rep->esw,
 								   rep->vport));
 		misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 				    misc_parameters_2);
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index 9164cc069ad4..4eae7131b0ce 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -20,7 +20,7 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 	rep->rep_data[REP_IB].priv = ibdev;
 	write_lock(&ibdev->port[vport_index].roce.netdev_lock);
 	ibdev->port[vport_index].roce.netdev =
-		mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport);
+		mlx5_ib_get_rep_netdev(rep->esw, rep->vport);
 	write_unlock(&ibdev->port[vport_index].roce.netdev_lock);
 
 	return 0;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 0d69a697d75f..7a7f6ccd02a5 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -126,7 +126,6 @@ static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev,
 					   struct net_device *ndev,
 					   u8 *port_num)
 {
-	struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
 	struct net_device *rep_ndev;
 	struct mlx5_ib_port *port;
 	int i;
@@ -137,7 +136,7 @@ static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev,
 			continue;
 
 		read_lock(&port->roce.netdev_lock);
-		rep_ndev = mlx5_ib_get_rep_netdev(esw,
+		rep_ndev = mlx5_ib_get_rep_netdev(port->rep->esw,
 						  port->rep->vport);
 		if (rep_ndev == ndev) {
 			read_unlock(&port->roce.netdev_lock);
-- 
cgit v1.2.3


From 3a46f4fb55ffd46e475e3fc53b1252f722cf647e Mon Sep 17 00:00:00 2001
From: Mark Bloch <mbloch@nvidia.com>
Date: Wed, 10 Mar 2021 23:09:13 -0800
Subject: net/mlx5: E-Switch, Refactor send to vport to be more generic

Now that each representor stores a pointer to the managing E-Switch
use that information when creating the send-to-vport rules.

Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/infiniband/hw/mlx5/ib_rep.c                  |  3 +--
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c     |  3 +--
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c   | 20 ++++++++++++--------
 include/linux/mlx5/eswitch.h                         |  4 ++--
 4 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index 4eae7131b0ce..db5de720bb12 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -123,8 +123,7 @@ struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
 
 	rep = dev->port[port - 1].rep;
 
-	return mlx5_eswitch_add_send_to_vport_rule(esw, rep->vport,
-						   sq->base.mqp.qpn);
+	return mlx5_eswitch_add_send_to_vport_rule(esw, rep, sq->base.mqp.qpn);
 }
 
 static int mlx5r_rep_probe(struct auxiliary_device *adev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index a132fff7a980..3d6c2bce67d2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -411,8 +411,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
 		}
 
 		/* Add re-inject rule to the PF/representor sqs */
-		flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw,
-								rep->vport,
+		flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, rep,
 								sqns_array[i]);
 		if (IS_ERR(flow_rule)) {
 			err = PTR_ERR(flow_rule);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index f6c0e7e05ad5..6090b2609089 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1036,7 +1036,8 @@ out:
 }
 
 struct mlx5_flow_handle *
-mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, u16 vport,
+mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
+				    struct mlx5_eswitch_rep *rep,
 				    u32 sqn)
 {
 	struct mlx5_flow_act flow_act = {0};
@@ -1054,27 +1055,30 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, u16 vport,
 	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
 	MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn);
 	/* source vport is the esw manager */
-	MLX5_SET(fte_match_set_misc, misc, source_port, esw->manager_vport);
-	if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+	MLX5_SET(fte_match_set_misc, misc, source_port, rep->esw->manager_vport);
+	if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch))
 		MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
-			 MLX5_CAP_GEN(esw->dev, vhca_id));
+			 MLX5_CAP_GEN(rep->esw->dev, vhca_id));
 
 	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
 	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn);
 	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
-	if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+	if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch))
 		MLX5_SET_TO_ONES(fte_match_set_misc, misc,
 				 source_eswitch_owner_vhca_id);
 
 	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
-	dest.vport.num = vport;
+	dest.vport.num = rep->vport;
+	dest.vport.vhca_id = MLX5_CAP_GEN(rep->esw->dev, vhca_id);
+	dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 
-	flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
+	flow_rule = mlx5_add_flow_rules(on_esw->fdb_table.offloads.slow_fdb,
 					spec, &flow_act, &dest, 1);
 	if (IS_ERR(flow_rule))
-		esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule));
+		esw_warn(on_esw->dev, "FDB: Failed to add send to vport rule err %ld\n",
+			 PTR_ERR(flow_rule));
 out:
 	kvfree(spec);
 	return flow_rule;
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index 72d480df2a03..2ec0527991c8 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -62,8 +62,8 @@ struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
 						u16 vport_num);
 void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type);
 struct mlx5_flow_handle *
-mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw,
-				    u16 vport_num, u32 sqn);
+mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
+				    struct mlx5_eswitch_rep *rep, u32 sqn);
 
 u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev);
 
-- 
cgit v1.2.3


From c3e666f1ada9cbfbe5465f122f9a2d63ddfd25ed Mon Sep 17 00:00:00 2001
From: Mark Bloch <mbloch@nvidia.com>
Date: Wed, 10 Mar 2021 23:09:14 -0800
Subject: net/mlx5: Add IFC bits needed for single FDB mode

Currently we operate in a mode where each eswitch manager has a separate
FDB. In order to combine these multiple FDBs we expose new caps to allow
this:

- Set root flow table which isn't native.
- Set FDB a different selection mode when in LAG mode.

Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/mlx5_ifc.h | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index df5d91c8b2d4..3ee7a86f39e4 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -806,9 +806,11 @@ struct mlx5_ifc_e_switch_cap_bits {
 	u8         vport_svlan_insert[0x1];
 	u8         vport_cvlan_insert_if_not_exist[0x1];
 	u8         vport_cvlan_insert_overwrite[0x1];
-	u8         reserved_at_5[0x3];
+	u8         reserved_at_5[0x2];
+	u8         esw_shared_ingress_acl[0x1];
 	u8         esw_uplink_ingress_acl[0x1];
-	u8         reserved_at_9[0x10];
+	u8         root_ft_on_other_esw[0x1];
+	u8         reserved_at_a[0xf];
 	u8         esw_functions_changed[0x1];
 	u8         reserved_at_1a[0x1];
 	u8         ecpf_vport_exists[0x1];
@@ -1502,7 +1504,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_270[0x6];
 	u8         lag_dct[0x2];
 	u8         lag_tx_port_affinity[0x1];
-	u8         reserved_at_279[0x2];
+	u8         lag_native_fdb_selection[0x1];
+	u8         reserved_at_27a[0x1];
 	u8         lag_master[0x1];
 	u8         num_lag_ports[0x4];
 
@@ -10036,14 +10039,19 @@ struct mlx5_ifc_set_flow_table_root_in_bits {
 	u8         reserved_at_60[0x20];
 
 	u8         table_type[0x8];
-	u8         reserved_at_88[0x18];
+	u8         reserved_at_88[0x7];
+	u8         table_of_other_vport[0x1];
+	u8         table_vport_number[0x10];
 
 	u8         reserved_at_a0[0x8];
 	u8         table_id[0x18];
 
 	u8         reserved_at_c0[0x8];
 	u8         underlay_qpn[0x18];
-	u8         reserved_at_e0[0x120];
+	u8         table_eswitch_owner_vhca_id_valid[0x1];
+	u8         reserved_at_e1[0xf];
+	u8         table_eswitch_owner_vhca_id[0x10];
+	u8         reserved_at_100[0x100];
 };
 
 enum {
@@ -10273,7 +10281,8 @@ struct mlx5_ifc_dcbx_param_bits {
 };
 
 struct mlx5_ifc_lagc_bits {
-	u8         reserved_at_0[0x1d];
+	u8         fdb_selection_mode[0x1];
+	u8         reserved_at_1[0x1c];
 	u8         lag_state[0x3];
 
 	u8         reserved_at_20[0x14];
-- 
cgit v1.2.3


From 26bf30902c10473ba38f220d3401a61c56d8db3b Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Wed, 10 Mar 2021 23:09:15 -0800
Subject: net/mlx5: Use order-0 allocations for EQs

Currently we are allocating high-order page for EQs. In case of
fragmented system, VF hot remove/add in VMs for example, there isn't
enough contiguous memory for EQs allocation, which results in crashing
of the VM.
Therefore, use order-0 fragments for the EQ allocations instead.

Performance tests:
ConnectX-5 100Gbps, CPU: Intel(R) Xeon(R) CPU E5-2697 v3 @ 2.60GHz
Performance tests show no sensible degradation.

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/en/health.c    |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/eq.c       | 27 +++++++++++++---------
 drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h   | 15 ++++++++----
 drivers/net/ethernet/mellanox/mlx5/core/wq.c       |  5 ----
 include/linux/mlx5/driver.h                        |  5 ++++
 5 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
index 84e501e057b4..6f4e6c34b2a2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
@@ -128,7 +128,7 @@ int mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg
 	if (err)
 		return err;
 
-	err = devlink_fmsg_u32_pair_put(fmsg, "size", eq->core.nent);
+	err = devlink_fmsg_u32_pair_put(fmsg, "size", eq_get_size(&eq->core));
 	if (err)
 		return err;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 174dfbc996c6..4e8381030d77 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -271,7 +271,7 @@ static void init_eq_buf(struct mlx5_eq *eq)
 	struct mlx5_eqe *eqe;
 	int i;
 
-	for (i = 0; i < eq->nent; i++) {
+	for (i = 0; i < eq_get_size(eq); i++) {
 		eqe = get_eqe(eq, i);
 		eqe->owner = MLX5_EQE_OWNER_INIT_VAL;
 	}
@@ -281,8 +281,10 @@ static int
 create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
 	      struct mlx5_eq_param *param)
 {
+	u8 log_eq_size = order_base_2(param->nent + MLX5_NUM_SPARE_EQE);
 	struct mlx5_cq_table *cq_table = &eq->cq_table;
 	u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
+	u8 log_eq_stride = ilog2(MLX5_EQE_SIZE);
 	struct mlx5_priv *priv = &dev->priv;
 	u8 vecidx = param->irq_index;
 	__be64 *pas;
@@ -297,16 +299,18 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
 	spin_lock_init(&cq_table->lock);
 	INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
 
-	eq->nent = roundup_pow_of_two(param->nent + MLX5_NUM_SPARE_EQE);
 	eq->cons_index = 0;
-	err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf);
+
+	err = mlx5_frag_buf_alloc_node(dev, wq_get_byte_sz(log_eq_size, log_eq_stride),
+				       &eq->frag_buf, dev->priv.numa_node);
 	if (err)
 		return err;
 
+	mlx5_init_fbc(eq->frag_buf.frags, log_eq_stride, log_eq_size, &eq->fbc);
 	init_eq_buf(eq);
 
 	inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
-		MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->buf.npages;
+		MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->frag_buf.npages;
 
 	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in) {
@@ -315,7 +319,7 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
 	}
 
 	pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas);
-	mlx5_fill_page_array(&eq->buf, pas);
+	mlx5_fill_page_frag_array(&eq->frag_buf, pas);
 
 	MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ);
 	if (!param->mask[0] && MLX5_CAP_GEN(dev, log_max_uctx))
@@ -326,11 +330,11 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
 				 param->mask[i]);
 
 	eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry);
-	MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent));
+	MLX5_SET(eqc, eqc, log_eq_size, eq->fbc.log_sz);
 	MLX5_SET(eqc, eqc, uar_page, priv->uar->index);
 	MLX5_SET(eqc, eqc, intr, vecidx);
 	MLX5_SET(eqc, eqc, log_page_size,
-		 eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+		 eq->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
 
 	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 	if (err)
@@ -356,7 +360,7 @@ err_in:
 	kvfree(in);
 
 err_buf:
-	mlx5_buf_free(dev, &eq->buf);
+	mlx5_frag_buf_free(dev, &eq->frag_buf);
 	return err;
 }
 
@@ -413,7 +417,7 @@ static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 			       eq->eqn);
 	synchronize_irq(eq->irqn);
 
-	mlx5_buf_free(dev, &eq->buf);
+	mlx5_frag_buf_free(dev, &eq->frag_buf);
 
 	return err;
 }
@@ -764,10 +768,11 @@ EXPORT_SYMBOL(mlx5_eq_destroy_generic);
 struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc)
 {
 	u32 ci = eq->cons_index + cc;
+	u32 nent = eq_get_size(eq);
 	struct mlx5_eqe *eqe;
 
-	eqe = get_eqe(eq, ci & (eq->nent - 1));
-	eqe = ((eqe->owner & 1) ^ !!(ci & eq->nent)) ? NULL : eqe;
+	eqe = get_eqe(eq, ci & (nent - 1));
+	eqe = ((eqe->owner & 1) ^ !!(ci & nent)) ? NULL : eqe;
 	/* Make sure we read EQ entry contents after we've
 	 * checked the ownership bit.
 	 */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
index 81f2cc4ca1da..f607a3858ef5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
@@ -22,15 +22,15 @@ struct mlx5_cq_table {
 };
 
 struct mlx5_eq {
+	struct mlx5_frag_buf_ctrl fbc;
+	struct mlx5_frag_buf    frag_buf;
 	struct mlx5_core_dev    *dev;
 	struct mlx5_cq_table    cq_table;
 	__be32 __iomem	        *doorbell;
 	u32                     cons_index;
-	struct mlx5_frag_buf    buf;
 	unsigned int            vecidx;
 	unsigned int            irqn;
 	u8                      eqn;
-	int                     nent;
 	struct mlx5_rsc_debug   *dbg;
 };
 
@@ -47,16 +47,21 @@ struct mlx5_eq_comp {
 	struct list_head        list;
 };
 
+static inline u32 eq_get_size(struct mlx5_eq *eq)
+{
+	return eq->fbc.sz_m1 + 1;
+}
+
 static inline struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
 {
-	return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE);
+	return mlx5_frag_buf_get_wqe(&eq->fbc, entry);
 }
 
 static inline struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
 {
-	struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1));
+	struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & eq->fbc.sz_m1);
 
-	return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe;
+	return (eqe->owner ^ (eq->cons_index >> eq->fbc.log_sz)) & 1 ? NULL : eqe;
 }
 
 static inline void eq_update_ci(struct mlx5_eq *eq, int arm)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index 01f075fac276..3091dd014650 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -34,11 +34,6 @@
 #include "wq.h"
 #include "mlx5_core.h"
 
-static u32 wq_get_byte_sz(u8 log_sz, u8 log_stride)
-{
-	return ((u32)1 << log_sz) << log_stride;
-}
-
 int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		       void *wqc, struct mlx5_wq_cyc *wq,
 		       struct mlx5_wq_ctrl *wq_ctrl)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 8fe51b4a781e..5c0422930b01 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -873,6 +873,11 @@ static inline u32 mlx5_base_mkey(const u32 key)
 	return key & 0xffffff00u;
 }
 
+static inline u32 wq_get_byte_sz(u8 log_sz, u8 log_stride)
+{
+	return ((u32)1 << log_sz) << log_stride;
+}
+
 static inline void mlx5_init_fbc_offset(struct mlx5_buf_list *frags,
 					u8 log_stride, u8 log_sz,
 					u16 strides_offset,
-- 
cgit v1.2.3


From 0142f09764752de8df211a42f0733531f96f4626 Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Sat, 6 Feb 2021 15:20:41 +0200
Subject: net/mlx5: DR, Fixed typo in STE v0

"reforamt" -> "reformat"

Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Reviewed-by: Alex Vesker <valex@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
index 9ec079247c4b..c5f62d2a058f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
@@ -331,7 +331,7 @@ static void dr_ste_v0_set_tx_push_vlan(u8 *hw_ste_p, u32 vlan_hdr,
 	MLX5_SET(ste_sx_transmit, hw_ste_p, action_type,
 		 DR_STE_ACTION_TYPE_PUSH_VLAN);
 	MLX5_SET(ste_sx_transmit, hw_ste_p, encap_pointer_vlan_data, vlan_hdr);
-	/* Due to HW limitation we need to set this bit, otherwise reforamt +
+	/* Due to HW limitation we need to set this bit, otherwise reformat +
 	 * push vlan will not work.
 	 */
 	if (go_back)
-- 
cgit v1.2.3


From 1412477882470bf72763e105e735bbec232b126f Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Sat, 6 Feb 2021 15:39:25 +0200
Subject: net/mlx5: DR, Remove unneeded rx_decap_l3 function for STEv1

Remove the dr_ste_v1_set_rx_decap_l3 function that was
replaced by another function - fixing a rebase error.

Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Reviewed-by: Alex Vesker <valex@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c   | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
index 4088d6e51508..6f368ccc428e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
@@ -437,21 +437,6 @@ static void dr_ste_v1_set_rx_decap(u8 *hw_ste_p, u8 *s_action)
 	dr_ste_v1_set_reparse(hw_ste_p);
 }
 
-static void dr_ste_v1_set_rx_decap_l3(u8 *hw_ste_p,
-				      u8 *s_action,
-				      u16 decap_actions,
-				      u32 decap_index)
-{
-	MLX5_SET(ste_single_action_modify_list_v1, s_action, action_id,
-		 DR_STE_V1_ACTION_ID_MODIFY_LIST);
-	MLX5_SET(ste_single_action_modify_list_v1, s_action, num_of_modify_actions,
-		 decap_actions);
-	MLX5_SET(ste_single_action_modify_list_v1, s_action, modify_actions_ptr,
-		 decap_index);
-
-	dr_ste_v1_set_reparse(hw_ste_p);
-}
-
 static void dr_ste_v1_set_rewrite_actions(u8 *hw_ste_p,
 					  u8 *s_action,
 					  u16 num_of_actions,
@@ -571,9 +556,6 @@ static void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn,
 	bool allow_ctr = true;
 
 	if (action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) {
-		dr_ste_v1_set_rx_decap_l3(last_ste, action,
-					  attr->decap_actions,
-					  attr->decap_index);
 		dr_ste_v1_set_rewrite_actions(last_ste, action,
 					      attr->decap_actions,
 					      attr->decap_index);
-- 
cgit v1.2.3


From cc82a2e6c8af956d894fa58a040dc0d532dd9978 Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Sat, 6 Feb 2021 22:41:41 +0200
Subject: net/mlx5: DR, Add missing vhca_id consume from STEv1

The field source_eswitch_owner_vhca_id was not consumed
in the same way as in STEv0. Added the missing set.

Fixes: 10b694186410 ("net/mlx5: DR, Add HW STEv1 match logic")
Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Reviewed-by: Alex Vesker <valex@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
index 6f368ccc428e..815951617e7c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
@@ -1514,6 +1514,7 @@ static void dr_ste_v1_build_src_gvmi_qpn_bit_mask(struct mlx5dr_match_param *val
 
 	DR_STE_SET_ONES(src_gvmi_qp_v1, bit_mask, source_gvmi, misc_mask, source_port);
 	DR_STE_SET_ONES(src_gvmi_qp_v1, bit_mask, source_qp, misc_mask, source_sqn);
+	misc_mask->source_eswitch_owner_vhca_id = 0;
 }
 
 static int dr_ste_v1_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
-- 
cgit v1.2.3


From ad2c99ca758126bac9f4c198e8e231e9e7d375dd Mon Sep 17 00:00:00 2001
From: Junlin Yang <yangjunlin@yulong.com>
Date: Wed, 3 Mar 2021 10:40:19 +0800
Subject: net/mlx5: use kvfree() for memory allocated with kvzalloc()

It is allocated with kvzalloc(), the corresponding release function
should not be kfree(), use kvfree() instead.

Generated by: scripts/coccinelle/api/kfree_mismatch.cocci

Signed-off-by: Junlin Yang <yangjunlin@yulong.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
index 6f6772bf61a2..3da7becc1069 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
@@ -248,7 +248,7 @@ err_mod_hdr_regc0:
 err_ethertype:
 	kfree(rule);
 out:
-	kfree(rule_spec);
+	kvfree(rule_spec);
 	return err;
 }
 
@@ -328,7 +328,7 @@ static int mlx5_create_indir_recirc_group(struct mlx5_eswitch *esw,
 	e->recirc_cnt = 0;
 
 out:
-	kfree(in);
+	kvfree(in);
 	return err;
 }
 
@@ -347,7 +347,7 @@ static int mlx5_create_indir_fwd_group(struct mlx5_eswitch *esw,
 
 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	if (!spec) {
-		kfree(in);
+		kvfree(in);
 		return -ENOMEM;
 	}
 
@@ -371,8 +371,8 @@ static int mlx5_create_indir_fwd_group(struct mlx5_eswitch *esw,
 	}
 
 err_out:
-	kfree(spec);
-	kfree(in);
+	kvfree(spec);
+	kvfree(in);
 	return err;
 }
 
-- 
cgit v1.2.3


From 7976092241645be4d7ee46ebc894b29a74351daa Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Mon, 22 Feb 2021 17:56:59 +0800
Subject: net/mlx5: remove unneeded semicolon

Fix the following coccicheck warnings:

./drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c:495:2-3: Unneeded
semicolon.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Reviewed-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
index c2ba41bb7a70..60a6328a9ca0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
@@ -492,7 +492,7 @@ static int mlx5_sf_esw_event(struct notifier_block *nb, unsigned long event, voi
 		break;
 	default:
 		break;
-	};
+	}
 
 	return 0;
 }
-- 
cgit v1.2.3


From 093bd764693711667e7f0dfd95dfa1b938efcec9 Mon Sep 17 00:00:00 2001
From: Mark Zhang <markzhang@nvidia.com>
Date: Tue, 2 Feb 2021 13:28:29 +0200
Subject: net/mlx5: Read congestion counters from all ports when lag is active

Read congestion counters from all ports in any lag mode rather than
only in RoCE lag mode (e.g., VF lag).

Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lag.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index 83a05371e2aa..127bb92da150 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -768,7 +768,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
 
 	spin_lock(&lag_lock);
 	ldev = mlx5_lag_dev_get(dev);
-	if (ldev && __mlx5_lag_is_roce(ldev)) {
+	if (ldev && __mlx5_lag_is_active(ldev)) {
 		num_ports = MLX5_MAX_PORTS;
 		mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev;
 		mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev;
-- 
cgit v1.2.3


From bca08a9145015d14e285dc4c24614dd5212ffb8d Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Tue, 9 Mar 2021 18:25:59 +0200
Subject: net/mlx5e: Remove redundant newline in NL_SET_ERR_MSG_MOD

Fix the following coccicheck warnings:

drivers/net/ethernet/mellanox/mlx5/core/devlink.c:145:29-66: WARNING
avoid newline at end of message in NL_SET_ERR_MSG_MOD
drivers/net/ethernet/mellanox/mlx5/core/devlink.c:140:29-77: WARNING
avoid newline at end of message in NL_SET_ERR_MSG_MOD

Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index 6729720e1ab7..38c7c44fe883 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -137,12 +137,12 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
 		 * unregistering devlink instance while holding devlink_mutext.
 		 * Hence, do not support reload.
 		 */
-		NL_SET_ERR_MSG_MOD(extack, "reload is unsupported when SFs are allocated\n");
+		NL_SET_ERR_MSG_MOD(extack, "reload is unsupported when SFs are allocated");
 		return -EOPNOTSUPP;
 	}
 
 	if (mlx5_lag_is_active(dev)) {
-		NL_SET_ERR_MSG_MOD(extack, "reload is unsupported in Lag mode\n");
+		NL_SET_ERR_MSG_MOD(extack, "reload is unsupported in Lag mode");
 		return -EOPNOTSUPP;
 	}
 
-- 
cgit v1.2.3


From 991b2654605b455a94dac73e14b23480e7e20991 Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Fri, 29 Jan 2021 18:43:31 +0200
Subject: net/mlx5e: Use net_prefetchw instead of prefetchw in MPWQE TX
 datapath

Commit e20f0dbf204f ("net/mlx5e: RX, Add a prefetch command for small
L1_CACHE_BYTES") switched to using net_prefetchw at all places in mlx5e.
In the same time frame, commit 5af75c747e2a ("net/mlx5e: Enhanced TX
MPWQE for SKBs") added one more usage of prefetchw. When these two
changes were merged, this new occurrence of prefetchw wasn't replaced
with net_prefetchw.

This commit fixes this last occurrence of prefetchw in
mlx5e_tx_mpwqe_session_start, making the same change that was done in
mlx5e_xdp_mpwqe_session_start.

Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index bdbffe484fce..d2efe2455955 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -576,7 +576,7 @@ static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq,
 
 	pi = mlx5e_txqsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS);
 	wqe = MLX5E_TX_FETCH_WQE(sq, pi);
-	prefetchw(wqe->data);
+	net_prefetchw(wqe->data);
 
 	*session = (struct mlx5e_tx_mpwqe) {
 		.wqe = wqe,
-- 
cgit v1.2.3


From e16cf9d754b93b0cb715ebb981e57cae200c19c9 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Wed, 17 Feb 2021 11:43:28 +0200
Subject: net/mlx5e: Dump ICOSQ WQE descriptor on CQE with error events

Dump the ICOSQ's WQE descriptor when a completion with error is received.

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 1b6ad94ebb10..1f15c6183dc1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -669,6 +669,7 @@ int mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
 						 get_cqe_opcode(cqe));
 				mlx5e_dump_error_cqe(&sq->cq, sq->sqn,
 						     (struct mlx5_err_cqe *)cqe);
+				mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);
 				if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
 					queue_work(cq->priv->wq, &sq->recover_work);
 				break;
-- 
cgit v1.2.3


From 2119bda642c49c732409574ce699e4845e95df48 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 8 Mar 2021 16:32:57 +0100
Subject: net/mlx5e: allocate 'indirection_rqt' buffer dynamically

Increasing the size of the indirection_rqt array from 128 to 256 bytes
pushed the stack usage of the mlx5e_hairpin_fill_rqt_rqns() function
over the warning limit when building with clang and CONFIG_KASAN:

drivers/net/ethernet/mellanox/mlx5/core/en_tc.c:970:1: error: stack frame size of 1180 bytes in function 'mlx5e_tc_add_nic_flow' [-Werror,-Wframe-larger-than=]

Using dynamic allocation here is safe because the caller does the
same, and it reduces the stack usage of the function to just a few
bytes.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index dc126389291d..c72725c3f53b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -445,12 +445,16 @@ static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
 	mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
 }
 
-static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc)
+static int mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc)
 {
-	u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE], rqn;
+	u32 *indirection_rqt, rqn;
 	struct mlx5e_priv *priv = hp->func_priv;
 	int i, ix, sz = MLX5E_INDIR_RQT_SIZE;
 
+	indirection_rqt = kzalloc(sz, GFP_KERNEL);
+	if (!indirection_rqt)
+		return -ENOMEM;
+
 	mlx5e_build_default_indir_rqt(indirection_rqt, sz,
 				      hp->num_channels);
 
@@ -462,6 +466,9 @@ static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc)
 		rqn = hp->pair->rqn[ix];
 		MLX5_SET(rqtc, rqtc, rq_num[i], rqn);
 	}
+
+	kfree(indirection_rqt);
+	return 0;
 }
 
 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
@@ -482,12 +489,15 @@ static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
 	MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
 	MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
 
-	mlx5e_hairpin_fill_rqt_rqns(hp, rqtc);
+	err = mlx5e_hairpin_fill_rqt_rqns(hp, rqtc);
+	if (err)
+		goto out;
 
 	err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn);
 	if (!err)
 		hp->indir_rqt.enabled = true;
 
+out:
 	kvfree(in);
 	return err;
 }
-- 
cgit v1.2.3


From 287e0df021e877c2aa5f4750b2a0575070471ddb Mon Sep 17 00:00:00 2001
From: Alaa Hleihel <alaa@nvidia.com>
Date: Thu, 31 Dec 2020 16:24:51 +0200
Subject: net/mlx5: Display the command index in command mailbox dump

Multiple commands can be printed at the same time which can
lead to wrong order of their lines in dmesg output.
As a result, it's hard to match data dumps to the correct command
or which command was fully dumped at some point.

Fix this by displaying the corresponding command index, and also
indicate when a command was fully dumped.

Signed-off-by: Alaa Hleihel <alaa@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 32 +++++++++++++++------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index e8cecd50558d..9d79c5ec31e9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -263,15 +263,15 @@ static int verify_signature(struct mlx5_cmd_work_ent *ent)
 	return 0;
 }
 
-static void dump_buf(void *buf, int size, int data_only, int offset)
+static void dump_buf(void *buf, int size, int data_only, int offset, int idx)
 {
 	__be32 *p = buf;
 	int i;
 
 	for (i = 0; i < size; i += 16) {
-		pr_debug("%03x: %08x %08x %08x %08x\n", offset, be32_to_cpu(p[0]),
-			 be32_to_cpu(p[1]), be32_to_cpu(p[2]),
-			 be32_to_cpu(p[3]));
+		pr_debug("cmd[%d]: %03x: %08x %08x %08x %08x\n", idx, offset,
+			 be32_to_cpu(p[0]), be32_to_cpu(p[1]),
+			 be32_to_cpu(p[2]), be32_to_cpu(p[3]));
 		p += 4;
 		offset += 16;
 	}
@@ -802,39 +802,41 @@ static void dump_command(struct mlx5_core_dev *dev,
 	int dump_len;
 	int i;
 
+	mlx5_core_dbg(dev, "cmd[%d]: start dump\n", ent->idx);
 	data_only = !!(mlx5_core_debug_mask & (1 << MLX5_CMD_DATA));
 
 	if (data_only)
 		mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_DATA,
-				   "dump command data %s(0x%x) %s\n",
-				   mlx5_command_str(op), op,
+				   "cmd[%d]: dump command data %s(0x%x) %s\n",
+				   ent->idx, mlx5_command_str(op), op,
 				   input ? "INPUT" : "OUTPUT");
 	else
-		mlx5_core_dbg(dev, "dump command %s(0x%x) %s\n",
-			      mlx5_command_str(op), op,
+		mlx5_core_dbg(dev, "cmd[%d]: dump command %s(0x%x) %s\n",
+			      ent->idx, mlx5_command_str(op), op,
 			      input ? "INPUT" : "OUTPUT");
 
 	if (data_only) {
 		if (input) {
-			dump_buf(ent->lay->in, sizeof(ent->lay->in), 1, offset);
+			dump_buf(ent->lay->in, sizeof(ent->lay->in), 1, offset, ent->idx);
 			offset += sizeof(ent->lay->in);
 		} else {
-			dump_buf(ent->lay->out, sizeof(ent->lay->out), 1, offset);
+			dump_buf(ent->lay->out, sizeof(ent->lay->out), 1, offset, ent->idx);
 			offset += sizeof(ent->lay->out);
 		}
 	} else {
-		dump_buf(ent->lay, sizeof(*ent->lay), 0, offset);
+		dump_buf(ent->lay, sizeof(*ent->lay), 0, offset, ent->idx);
 		offset += sizeof(*ent->lay);
 	}
 
 	for (i = 0; i < n && next; i++)  {
 		if (data_only) {
 			dump_len = min_t(int, MLX5_CMD_DATA_BLOCK_SIZE, msg->len - offset);
-			dump_buf(next->buf, dump_len, 1, offset);
+			dump_buf(next->buf, dump_len, 1, offset, ent->idx);
 			offset += MLX5_CMD_DATA_BLOCK_SIZE;
 		} else {
-			mlx5_core_dbg(dev, "command block:\n");
-			dump_buf(next->buf, sizeof(struct mlx5_cmd_prot_block), 0, offset);
+			mlx5_core_dbg(dev, "cmd[%d]: command block:\n", ent->idx);
+			dump_buf(next->buf, sizeof(struct mlx5_cmd_prot_block), 0, offset,
+				 ent->idx);
 			offset += sizeof(struct mlx5_cmd_prot_block);
 		}
 		next = next->next;
@@ -842,6 +844,8 @@ static void dump_command(struct mlx5_core_dev *dev,
 
 	if (data_only)
 		pr_debug("\n");
+
+	mlx5_core_dbg(dev, "cmd[%d]: end dump\n", ent->idx);
 }
 
 static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
-- 
cgit v1.2.3


From 69e2916ebce4410c0f6ba6c59c4f6e9eb228e5ec Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@mellanox.com>
Date: Mon, 21 Sep 2020 11:49:26 +0300
Subject: net/mlx5: CT: Add support for mirroring

Add support for mirroring before the CT action by spliting the pre ct rule.
Mirror outputs are done first on the tc chain,prio table rule (the fwd
rule), which will then forward to a per port fwd table.
On this fwd table, we insert the original pre ct rule that forwards to
ct/ct nat table.

Signed-off-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Maor Dickman <maord@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c |  4 ++++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    | 19 ++++++++++---------
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index 3a6095c912f1..5e3d31b888ce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -1797,6 +1797,10 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
 	ct_flow->post_ct_attr->prio = 0;
 	ct_flow->post_ct_attr->ft = ct_priv->post_ct;
 
+	/* Splits were handled before CT */
+	if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
+		ct_flow->post_ct_attr->esw_attr->split_count = 0;
+
 	ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE;
 	ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE;
 	ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index c72725c3f53b..121f0a744e55 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1087,19 +1087,23 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
 	if (flow_flag_test(flow, CT)) {
 		mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
 
-		return mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv),
+		rule = mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv),
 					       flow, spec, attr,
 					       mod_hdr_acts);
+	} else {
+		rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
 	}
 
-	rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
 	if (IS_ERR(rule))
 		return rule;
 
 	if (attr->esw_attr->split_count) {
 		flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
 		if (IS_ERR(flow->rule[1])) {
-			mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
+			if (flow_flag_test(flow, CT))
+				mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
+			else
+				mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
 			return flow->rule[1];
 		}
 	}
@@ -2989,7 +2993,8 @@ static bool actions_match_supported(struct mlx5e_priv *priv,
 	actions = flow->attr->action;
 
 	if (mlx5e_is_eswitch_flow(flow)) {
-		if (flow->attr->esw_attr->split_count && ct_flow) {
+		if (flow->attr->esw_attr->split_count && ct_flow &&
+		    !MLX5_CAP_GEN(flow->attr->esw_attr->in_mdev, reg_c_preserve)) {
 			/* All registers used by ct are cleared when using
 			 * split rules.
 			 */
@@ -3789,6 +3794,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 				return err;
 
 			flow_flag_set(flow, CT);
+			esw_attr->split_count = esw_attr->out_count;
 			break;
 		default:
 			NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
@@ -3851,11 +3857,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 			return -EOPNOTSUPP;
 		}
 
-		if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
-			NL_SET_ERR_MSG_MOD(extack,
-					   "Mirroring goto chain rules isn't supported");
-			return -EOPNOTSUPP;
-		}
 		attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 	}
 
-- 
cgit v1.2.3


From a3222a2da0a2d6c7682252d4bfdff05721a82b95 Mon Sep 17 00:00:00 2001
From: Maor Dickman <maord@nvidia.com>
Date: Sun, 24 Jan 2021 15:56:36 +0200
Subject: net/mlx5e: Allow to match on ICMP parameters

Support matching on ICMPv4/6 type and code parameters using misc3
section of match parameters.

Signed-off-by: Maor Dickman <maord@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 47 +++++++++++++++++++++++++
 include/linux/mlx5/device.h                     |  2 ++
 2 files changed, 49 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 121f0a744e55..54ea0dae7ded 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1961,6 +1961,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 				    misc_parameters);
 	void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 				    misc_parameters);
+	void *misc_c_3 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+				    misc_parameters_3);
+	void *misc_v_3 = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+				    misc_parameters_3);
 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
 	struct flow_dissector *dissector = rule->match.dissector;
 	u16 addr_type = 0;
@@ -1990,6 +1994,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 	      BIT(FLOW_DISSECTOR_KEY_CT) |
 	      BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
 	      BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) |
+	      BIT(FLOW_DISSECTOR_KEY_ICMP) |
 	      BIT(FLOW_DISSECTOR_KEY_MPLS))) {
 		NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
 		netdev_dbg(priv->netdev, "Unsupported key used: 0x%x\n",
@@ -2309,7 +2314,49 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 		if (match.mask->flags)
 			*match_level = MLX5_MATCH_L4;
 	}
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) {
+		struct flow_match_icmp match;
 
+		flow_rule_match_icmp(rule, &match);
+		switch (ip_proto) {
+		case IPPROTO_ICMP:
+			if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
+			      MLX5_FLEX_PROTO_ICMP))
+				return -EOPNOTSUPP;
+			MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_type,
+				 match.mask->type);
+			MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_type,
+				 match.key->type);
+			MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_code,
+				 match.mask->code);
+			MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_code,
+				 match.key->code);
+			break;
+		case IPPROTO_ICMPV6:
+			if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
+			      MLX5_FLEX_PROTO_ICMPV6))
+				return -EOPNOTSUPP;
+			MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_type,
+				 match.mask->type);
+			MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_type,
+				 match.key->type);
+			MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_code,
+				 match.mask->code);
+			MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_code,
+				 match.key->code);
+			break;
+		default:
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Code and type matching only with ICMP and ICMPv6");
+			netdev_err(priv->netdev,
+				   "Code and type matching only with ICMP and ICMPv6\n");
+			return -EINVAL;
+		}
+		if (match.mask->code || match.mask->type) {
+			*match_level = MLX5_MATCH_L4;
+			spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3;
+		}
+	}
 	return 0;
 }
 
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index dc3d2508f5c6..92a029a800a0 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1142,6 +1142,8 @@ enum mlx5_flex_parser_protos {
 	MLX5_FLEX_PROTO_GENEVE	      = 1 << 3,
 	MLX5_FLEX_PROTO_CW_MPLS_GRE   = 1 << 4,
 	MLX5_FLEX_PROTO_CW_MPLS_UDP   = 1 << 5,
+	MLX5_FLEX_PROTO_ICMP	      = 1 << 8,
+	MLX5_FLEX_PROTO_ICMPV6	      = 1 << 9,
 };
 
 /* MLX5 DEV CAPs */
-- 
cgit v1.2.3


From 8fab174b78f74f10e4d900fe6da6c9047b0b35b5 Mon Sep 17 00:00:00 2001
From: Robert Hancock <robert.hancock@calian.com>
Date: Thu, 11 Mar 2021 14:18:12 -0600
Subject: net: macb: poll for fixed link state in SGMII mode

When using a fixed-link configuration with GEM in SGMII mode, such as
for a chip-to-chip interconnect, the link state was always showing as
established regardless of the actual connectivity state. We can monitor
the pcs_link_state bit in the Network Status register to determine
whether the PCS link state is actually up.

Signed-off-by: Robert Hancock <robert.hancock@calian.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cadence/macb_main.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 15362d016a87..ca72a16c8da3 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -847,6 +847,15 @@ static int macb_phylink_connect(struct macb *bp)
 	return 0;
 }
 
+static void macb_get_pcs_fixed_state(struct phylink_config *config,
+				     struct phylink_link_state *state)
+{
+	struct net_device *ndev = to_net_dev(config->dev);
+	struct macb *bp = netdev_priv(ndev);
+
+	state->link = (macb_readl(bp, NSR) & MACB_BIT(NSR_LINK)) != 0;
+}
+
 /* based on au1000_eth. c*/
 static int macb_mii_probe(struct net_device *dev)
 {
@@ -855,6 +864,11 @@ static int macb_mii_probe(struct net_device *dev)
 	bp->phylink_config.dev = &dev->dev;
 	bp->phylink_config.type = PHYLINK_NETDEV;
 
+	if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+		bp->phylink_config.poll_fixed_state = true;
+		bp->phylink_config.get_fixed_state = macb_get_pcs_fixed_state;
+	}
+
 	bp->phylink = phylink_create(&bp->phylink_config, bp->pdev->dev.fwnode,
 				     bp->phy_interface, &macb_phylink_ops);
 	if (IS_ERR(bp->phylink)) {
-- 
cgit v1.2.3


From e276e5e40e92582db3814d8b2b28150862c7a50f Mon Sep 17 00:00:00 2001
From: Robert Hancock <robert.hancock@calian.com>
Date: Thu, 11 Mar 2021 14:18:13 -0600
Subject: net: macb: Disable PCS auto-negotiation for SGMII fixed-link mode

When using a fixed-link configuration in SGMII mode, it's not really
sensible to have auto-negotiation enabled since the link settings are
fixed by definition. In other configurations, such as an SGMII
connection to a PHY, it should generally be enabled.

Signed-off-by: Robert Hancock <robert.hancock@calian.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cadence/macb.h      | 14 ++++++++++++++
 drivers/net/ethernet/cadence/macb_main.c | 16 ++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index d8c68906525a..d8d87213697c 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -159,6 +159,16 @@
 #define GEM_PEFTN		0x01f4 /* PTP Peer Event Frame Tx Ns */
 #define GEM_PEFRSL		0x01f8 /* PTP Peer Event Frame Rx Sec Low */
 #define GEM_PEFRN		0x01fc /* PTP Peer Event Frame Rx Ns */
+#define GEM_PCSCNTRL		0x0200 /* PCS Control */
+#define GEM_PCSSTS		0x0204 /* PCS Status */
+#define GEM_PCSPHYTOPID		0x0208 /* PCS PHY Top ID */
+#define GEM_PCSPHYBOTID		0x020c /* PCS PHY Bottom ID */
+#define GEM_PCSANADV		0x0210 /* PCS AN Advertisement */
+#define GEM_PCSANLPBASE		0x0214 /* PCS AN Link Partner Base */
+#define GEM_PCSANEXP		0x0218 /* PCS AN Expansion */
+#define GEM_PCSANNPTX		0x021c /* PCS AN Next Page TX */
+#define GEM_PCSANNPLP		0x0220 /* PCS AN Next Page LP */
+#define GEM_PCSANEXTSTS		0x023c /* PCS AN Extended Status */
 #define GEM_DCFG1		0x0280 /* Design Config 1 */
 #define GEM_DCFG2		0x0284 /* Design Config 2 */
 #define GEM_DCFG3		0x0288 /* Design Config 3 */
@@ -478,6 +488,10 @@
 #define GEM_HS_MAC_SPEED_OFFSET			0
 #define GEM_HS_MAC_SPEED_SIZE			3
 
+/* Bitfields in PCSCNTRL */
+#define GEM_PCSAUTONEG_OFFSET			12
+#define GEM_PCSAUTONEG_SIZE			1
+
 /* Bitfields in DCFG1. */
 #define GEM_IRQCOR_OFFSET			23
 #define GEM_IRQCOR_SIZE				1
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index ca72a16c8da3..e7c123aadf56 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -694,6 +694,22 @@ static void macb_mac_config(struct phylink_config *config, unsigned int mode,
 	if (old_ncr ^ ncr)
 		macb_or_gem_writel(bp, NCR, ncr);
 
+	/* Disable AN for SGMII fixed link configuration, enable otherwise.
+	 * Must be written after PCSSEL is set in NCFGR,
+	 * otherwise writes will not take effect.
+	 */
+	if (macb_is_gem(bp) && state->interface == PHY_INTERFACE_MODE_SGMII) {
+		u32 pcsctrl, old_pcsctrl;
+
+		old_pcsctrl = gem_readl(bp, PCSCNTRL);
+		if (mode == MLO_AN_FIXED)
+			pcsctrl = old_pcsctrl & ~GEM_BIT(PCSAUTONEG);
+		else
+			pcsctrl = old_pcsctrl | GEM_BIT(PCSAUTONEG);
+		if (old_pcsctrl != pcsctrl)
+			gem_writel(bp, PCSCNTRL, pcsctrl);
+	}
+
 	spin_unlock_irqrestore(&bp->lock, flags);
 }
 
-- 
cgit v1.2.3


From ab4dda7a8cb7e55ea3d92fd5e249cf6f5396028c Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <rafal@milecki.pl>
Date: Thu, 11 Mar 2021 13:35:20 +0100
Subject: dt-bindings: net: bcm4908-enet: add optional TX interrupt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I discovered that hardware actually supports two interrupts, one per DMA
channel (RX and TX).

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/brcm,bcm4908-enet.yaml      | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/brcm,bcm4908-enet.yaml b/Documentation/devicetree/bindings/net/brcm,bcm4908-enet.yaml
index 79c38ea14237..2a3be0f9a1a1 100644
--- a/Documentation/devicetree/bindings/net/brcm,bcm4908-enet.yaml
+++ b/Documentation/devicetree/bindings/net/brcm,bcm4908-enet.yaml
@@ -22,10 +22,18 @@ properties:
     maxItems: 1
 
   interrupts:
-    description: RX interrupt
+    minItems: 1
+    maxItems: 2
+    items:
+      - description: RX interrupt
+      - description: TX interrupt
 
   interrupt-names:
-    const: rx
+    minItems: 1
+    maxItems: 2
+    items:
+      - const: rx
+      - const: tx
 
 required:
   - reg
@@ -43,6 +51,7 @@ examples:
         compatible = "brcm,bcm4908-enet";
         reg = <0x80002000 0x1000>;
 
-        interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
-        interrupt-names = "rx";
+        interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>;
+        interrupt-names = "rx", "tx";
     };
-- 
cgit v1.2.3


From 12bb508bfe5a564c36864b12253db23cac83bfa1 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <rafal@milecki.pl>
Date: Thu, 11 Mar 2021 13:35:21 +0100
Subject: net: broadcom: bcm4908_enet: support TX interrupt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It appears that each DMA channel has its own interrupt and both rings
can be configured (the same way) to handle interrupts.

1. Make ring interrupts code generic (make it operate on given ring)
2. Move napi to ring (so each has its own)
3. Make IRQ handler generic (match ring against received IRQ number)
4. Add (optional) support for TX interrupt

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bcm4908_enet.c | 138 ++++++++++++++++++++-------
 1 file changed, 103 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bcm4908_enet.c b/drivers/net/ethernet/broadcom/bcm4908_enet.c
index 199da299cbd0..cbfed1d1477b 100644
--- a/drivers/net/ethernet/broadcom/bcm4908_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm4908_enet.c
@@ -54,6 +54,7 @@ struct bcm4908_enet_dma_ring {
 	int length;
 	u16 cfg_block;
 	u16 st_ram_block;
+	struct napi_struct napi;
 
 	union {
 		void *cpu_addr;
@@ -67,8 +68,8 @@ struct bcm4908_enet_dma_ring {
 struct bcm4908_enet {
 	struct device *dev;
 	struct net_device *netdev;
-	struct napi_struct napi;
 	void __iomem *base;
+	int irq_tx;
 
 	struct bcm4908_enet_dma_ring tx_ring;
 	struct bcm4908_enet_dma_ring rx_ring;
@@ -123,24 +124,31 @@ static void enet_umac_set(struct bcm4908_enet *enet, u16 offset, u32 set)
  * Helpers
  */
 
-static void bcm4908_enet_intrs_on(struct bcm4908_enet *enet)
+static void bcm4908_enet_set_mtu(struct bcm4908_enet *enet, int mtu)
 {
-	enet_write(enet, ENET_DMA_CH_RX_CFG + ENET_DMA_CH_CFG_INT_MASK, ENET_DMA_INT_DEFAULTS);
+	enet_umac_write(enet, UMAC_MAX_FRAME_LEN, mtu + ENET_MAX_ETH_OVERHEAD);
 }
 
-static void bcm4908_enet_intrs_off(struct bcm4908_enet *enet)
+/***
+ * DMA ring ops
+ */
+
+static void bcm4908_enet_dma_ring_intrs_on(struct bcm4908_enet *enet,
+					   struct bcm4908_enet_dma_ring *ring)
 {
-	enet_write(enet, ENET_DMA_CH_RX_CFG + ENET_DMA_CH_CFG_INT_MASK, 0);
+	enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG_INT_MASK, ENET_DMA_INT_DEFAULTS);
 }
 
-static void bcm4908_enet_intrs_ack(struct bcm4908_enet *enet)
+static void bcm4908_enet_dma_ring_intrs_off(struct bcm4908_enet *enet,
+					    struct bcm4908_enet_dma_ring *ring)
 {
-	enet_write(enet, ENET_DMA_CH_RX_CFG + ENET_DMA_CH_CFG_INT_STAT, ENET_DMA_INT_DEFAULTS);
+	enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG_INT_MASK, 0);
 }
 
-static void bcm4908_enet_set_mtu(struct bcm4908_enet *enet, int mtu)
+static void bcm4908_enet_dma_ring_intrs_ack(struct bcm4908_enet *enet,
+					    struct bcm4908_enet_dma_ring *ring)
 {
-	enet_umac_write(enet, UMAC_MAX_FRAME_LEN, mtu + ENET_MAX_ETH_OVERHEAD);
+	enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG_INT_STAT, ENET_DMA_INT_DEFAULTS);
 }
 
 /***
@@ -414,11 +422,14 @@ static void bcm4908_enet_gmac_init(struct bcm4908_enet *enet)
 static irqreturn_t bcm4908_enet_irq_handler(int irq, void *dev_id)
 {
 	struct bcm4908_enet *enet = dev_id;
+	struct bcm4908_enet_dma_ring *ring;
 
-	bcm4908_enet_intrs_off(enet);
-	bcm4908_enet_intrs_ack(enet);
+	ring = (irq == enet->irq_tx) ? &enet->tx_ring : &enet->rx_ring;
 
-	napi_schedule(&enet->napi);
+	bcm4908_enet_dma_ring_intrs_off(enet, ring);
+	bcm4908_enet_dma_ring_intrs_ack(enet, ring);
+
+	napi_schedule(&ring->napi);
 
 	return IRQ_HANDLED;
 }
@@ -426,6 +437,8 @@ static irqreturn_t bcm4908_enet_irq_handler(int irq, void *dev_id)
 static int bcm4908_enet_open(struct net_device *netdev)
 {
 	struct bcm4908_enet *enet = netdev_priv(netdev);
+	struct bcm4908_enet_dma_ring *tx_ring = &enet->tx_ring;
+	struct bcm4908_enet_dma_ring *rx_ring = &enet->rx_ring;
 	struct device *dev = enet->dev;
 	int err;
 
@@ -435,6 +448,17 @@ static int bcm4908_enet_open(struct net_device *netdev)
 		return err;
 	}
 
+	if (enet->irq_tx > 0) {
+		err = request_irq(enet->irq_tx, bcm4908_enet_irq_handler, 0,
+				  "tx", enet);
+		if (err) {
+			dev_err(dev, "Failed to request IRQ %d: %d\n",
+				enet->irq_tx, err);
+			free_irq(netdev->irq, enet);
+			return err;
+		}
+	}
+
 	bcm4908_enet_gmac_init(enet);
 	bcm4908_enet_dma_reset(enet);
 	bcm4908_enet_dma_init(enet);
@@ -443,14 +467,19 @@ static int bcm4908_enet_open(struct net_device *netdev)
 
 	enet_set(enet, ENET_DMA_CONTROLLER_CFG, ENET_DMA_CTRL_CFG_MASTER_EN);
 	enet_maskset(enet, ENET_DMA_CONTROLLER_CFG, ENET_DMA_CTRL_CFG_FLOWC_CH1_EN, 0);
-	bcm4908_enet_dma_rx_ring_enable(enet, &enet->rx_ring);
 
-	napi_enable(&enet->napi);
+	if (enet->irq_tx > 0) {
+		napi_enable(&tx_ring->napi);
+		bcm4908_enet_dma_ring_intrs_ack(enet, tx_ring);
+		bcm4908_enet_dma_ring_intrs_on(enet, tx_ring);
+	}
+
+	bcm4908_enet_dma_rx_ring_enable(enet, rx_ring);
+	napi_enable(&rx_ring->napi);
 	netif_carrier_on(netdev);
 	netif_start_queue(netdev);
-
-	bcm4908_enet_intrs_ack(enet);
-	bcm4908_enet_intrs_on(enet);
+	bcm4908_enet_dma_ring_intrs_ack(enet, rx_ring);
+	bcm4908_enet_dma_ring_intrs_on(enet, rx_ring);
 
 	return 0;
 }
@@ -458,16 +487,20 @@ static int bcm4908_enet_open(struct net_device *netdev)
 static int bcm4908_enet_stop(struct net_device *netdev)
 {
 	struct bcm4908_enet *enet = netdev_priv(netdev);
+	struct bcm4908_enet_dma_ring *tx_ring = &enet->tx_ring;
+	struct bcm4908_enet_dma_ring *rx_ring = &enet->rx_ring;
 
 	netif_stop_queue(netdev);
 	netif_carrier_off(netdev);
-	napi_disable(&enet->napi);
+	napi_disable(&rx_ring->napi);
+	napi_disable(&tx_ring->napi);
 
 	bcm4908_enet_dma_rx_ring_disable(enet, &enet->rx_ring);
 	bcm4908_enet_dma_tx_ring_disable(enet, &enet->tx_ring);
 
 	bcm4908_enet_dma_uninit(enet);
 
+	free_irq(enet->irq_tx, enet);
 	free_irq(enet->netdev->irq, enet);
 
 	return 0;
@@ -484,25 +517,19 @@ static int bcm4908_enet_start_xmit(struct sk_buff *skb, struct net_device *netde
 	u32 tmp;
 
 	/* Free transmitted skbs */
-	while (ring->read_idx != ring->write_idx) {
-		buf_desc = &ring->buf_desc[ring->read_idx];
-		if (le32_to_cpu(buf_desc->ctl) & DMA_CTL_STATUS_OWN)
-			break;
-		slot = &ring->slots[ring->read_idx];
-
-		dma_unmap_single(dev, slot->dma_addr, slot->len, DMA_TO_DEVICE);
-		dev_kfree_skb(slot->skb);
-		if (++ring->read_idx == ring->length)
-			ring->read_idx = 0;
-	}
+	if (enet->irq_tx < 0 &&
+	    !(le32_to_cpu(ring->buf_desc[ring->read_idx].ctl) & DMA_CTL_STATUS_OWN))
+		napi_schedule(&enet->tx_ring.napi);
 
 	/* Don't use the last empty buf descriptor */
 	if (ring->read_idx <= ring->write_idx)
 		free_buf_descs = ring->read_idx - ring->write_idx + ring->length;
 	else
 		free_buf_descs = ring->read_idx - ring->write_idx;
-	if (free_buf_descs < 2)
+	if (free_buf_descs < 2) {
+		netif_stop_queue(netdev);
 		return NETDEV_TX_BUSY;
+	}
 
 	/* Hardware removes OWN bit after sending data */
 	buf_desc = &ring->buf_desc[ring->write_idx];
@@ -539,9 +566,10 @@ static int bcm4908_enet_start_xmit(struct sk_buff *skb, struct net_device *netde
 	return NETDEV_TX_OK;
 }
 
-static int bcm4908_enet_poll(struct napi_struct *napi, int weight)
+static int bcm4908_enet_poll_rx(struct napi_struct *napi, int weight)
 {
-	struct bcm4908_enet *enet = container_of(napi, struct bcm4908_enet, napi);
+	struct bcm4908_enet_dma_ring *rx_ring = container_of(napi, struct bcm4908_enet_dma_ring, napi);
+	struct bcm4908_enet *enet = container_of(rx_ring, struct bcm4908_enet, rx_ring);
 	struct device *dev = enet->dev;
 	int handled = 0;
 
@@ -590,7 +618,7 @@ static int bcm4908_enet_poll(struct napi_struct *napi, int weight)
 
 	if (handled < weight) {
 		napi_complete_done(napi, handled);
-		bcm4908_enet_intrs_on(enet);
+		bcm4908_enet_dma_ring_intrs_on(enet, rx_ring);
 	}
 
 	/* Hardware could disable ring if it run out of descriptors */
@@ -599,6 +627,42 @@ static int bcm4908_enet_poll(struct napi_struct *napi, int weight)
 	return handled;
 }
 
+static int bcm4908_enet_poll_tx(struct napi_struct *napi, int weight)
+{
+	struct bcm4908_enet_dma_ring *tx_ring = container_of(napi, struct bcm4908_enet_dma_ring, napi);
+	struct bcm4908_enet *enet = container_of(tx_ring, struct bcm4908_enet, tx_ring);
+	struct bcm4908_enet_dma_ring_bd *buf_desc;
+	struct bcm4908_enet_dma_ring_slot *slot;
+	struct device *dev = enet->dev;
+	unsigned int bytes = 0;
+	int handled = 0;
+
+	while (handled < weight && tx_ring->read_idx != tx_ring->write_idx) {
+		buf_desc = &tx_ring->buf_desc[tx_ring->read_idx];
+		if (le32_to_cpu(buf_desc->ctl) & DMA_CTL_STATUS_OWN)
+			break;
+		slot = &tx_ring->slots[tx_ring->read_idx];
+
+		dma_unmap_single(dev, slot->dma_addr, slot->len, DMA_TO_DEVICE);
+		dev_kfree_skb(slot->skb);
+		bytes += slot->len;
+		if (++tx_ring->read_idx == tx_ring->length)
+			tx_ring->read_idx = 0;
+
+		handled++;
+	}
+
+	if (handled < weight) {
+		napi_complete_done(napi, handled);
+		bcm4908_enet_dma_ring_intrs_on(enet, tx_ring);
+	}
+
+	if (netif_queue_stopped(enet->netdev))
+		netif_wake_queue(enet->netdev);
+
+	return handled;
+}
+
 static int bcm4908_enet_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct bcm4908_enet *enet = netdev_priv(netdev);
@@ -642,6 +706,8 @@ static int bcm4908_enet_probe(struct platform_device *pdev)
 	if (netdev->irq < 0)
 		return netdev->irq;
 
+	enet->irq_tx = platform_get_irq_byname(pdev, "tx");
+
 	dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
 
 	err = bcm4908_enet_dma_alloc(enet);
@@ -658,7 +724,8 @@ static int bcm4908_enet_probe(struct platform_device *pdev)
 	netdev->min_mtu = ETH_ZLEN;
 	netdev->mtu = ETH_DATA_LEN;
 	netdev->max_mtu = ENET_MTU_MAX;
-	netif_napi_add(netdev, &enet->napi, bcm4908_enet_poll, 64);
+	netif_tx_napi_add(netdev, &enet->tx_ring.napi, bcm4908_enet_poll_tx, NAPI_POLL_WEIGHT);
+	netif_napi_add(netdev, &enet->rx_ring.napi, bcm4908_enet_poll_rx, NAPI_POLL_WEIGHT);
 
 	err = register_netdev(netdev);
 	if (err) {
@@ -676,7 +743,8 @@ static int bcm4908_enet_remove(struct platform_device *pdev)
 	struct bcm4908_enet *enet = platform_get_drvdata(pdev);
 
 	unregister_netdev(enet->netdev);
-	netif_napi_del(&enet->napi);
+	netif_napi_del(&enet->rx_ring.napi);
+	netif_napi_del(&enet->tx_ring.napi);
 	bcm4908_enet_dma_free(enet);
 
 	return 0;
-- 
cgit v1.2.3


From 6ad086009f87f65043c2a2c0efa27e3915c50e6e Mon Sep 17 00:00:00 2001
From: Shubhankar Kuranagatti <shubhankarvk@gmail.com>
Date: Fri, 12 Mar 2021 13:00:05 +0530
Subject: net: ipv4: route.c: Fix indentation of multi line comment.

All comment lines inside the comment block have been aligned.
Every line of comment starts with a * (uniformity in code).

Signed-off-by: Shubhankar Kuranagatti <shubhankarvk@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 97 ++++++++++++++++++++++++++++----------------------------
 1 file changed, 49 insertions(+), 48 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 55f2813a000d..350bf500bb6d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -722,6 +722,7 @@ static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr,
 
 		for_each_possible_cpu(i) {
 			struct rtable __rcu **prt;
+
 			prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i);
 			rt = rcu_dereference(*prt);
 			if (rt)
@@ -1258,12 +1259,12 @@ static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb)
 }
 
 /*
-   We do not cache source address of outgoing interface,
-   because it is used only by IP RR, TS and SRR options,
-   so that it out of fast path.
-
-   BTW remember: "addr" is allowed to be not aligned
-   in IP options!
+ * We do not cache source address of outgoing interface,
+ * because it is used only by IP RR, TS and SRR options,
+ * so that it out of fast path.
+ *
+ * BTW remember: "addr" is allowed to be not aligned
+ * in IP options!
  */
 
 void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
@@ -2108,7 +2109,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		goto out;
 
 	/* Check for the most weird martians, which can be not detected
-	   by fib_lookup.
+	 * by fib_lookup.
 	 */
 
 	tun_info = skb_tunnel_info(skb);
@@ -2317,15 +2318,15 @@ int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		       u8 tos, struct net_device *dev, struct fib_result *res)
 {
 	/* Multicast recognition logic is moved from route cache to here.
-	   The problem was that too many Ethernet cards have broken/missing
-	   hardware multicast filters :-( As result the host on multicasting
-	   network acquires a lot of useless route cache entries, sort of
-	   SDR messages from all the world. Now we try to get rid of them.
-	   Really, provided software IP multicast filter is organized
-	   reasonably (at least, hashed), it does not result in a slowdown
-	   comparing with route cache reject entries.
-	   Note, that multicast routers are not affected, because
-	   route cache entry is created eventually.
+	 * The problem was that too many Ethernet cards have broken/missing
+	 * hardware multicast filters :-( As result the host on multicasting
+	 * network acquires a lot of useless route cache entries, sort of
+	 * SDR messages from all the world. Now we try to get rid of them.
+	 * Really, provided software IP multicast filter is organized
+	 * reasonably (at least, hashed), it does not result in a slowdown
+	 * comparing with route cache reject entries.
+	 * Note, that multicast routers are not affected, because
+	 * route cache entry is created eventually.
 	 */
 	if (ipv4_is_multicast(daddr)) {
 		struct in_device *in_dev = __in_dev_get_rcu(dev);
@@ -2537,11 +2538,11 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
 		rth = ERR_PTR(-ENETUNREACH);
 
 		/* I removed check for oif == dev_out->oif here.
-		   It was wrong for two reasons:
-		   1. ip_dev_find(net, saddr) can return wrong iface, if saddr
-		      is assigned to multiple interfaces.
-		   2. Moreover, we are allowed to send packets with saddr
-		      of another iface. --ANK
+		 * It was wrong for two reasons:
+		 * 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
+		 *    is assigned to multiple interfaces.
+		 * 2. Moreover, we are allowed to send packets with saddr
+		 *    of another iface. --ANK
 		 */
 
 		if (fl4->flowi4_oif == 0 &&
@@ -2553,18 +2554,18 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
 				goto out;
 
 			/* Special hack: user can direct multicasts
-			   and limited broadcast via necessary interface
-			   without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
-			   This hack is not just for fun, it allows
-			   vic,vat and friends to work.
-			   They bind socket to loopback, set ttl to zero
-			   and expect that it will work.
-			   From the viewpoint of routing cache they are broken,
-			   because we are not allowed to build multicast path
-			   with loopback source addr (look, routing cache
-			   cannot know, that ttl is zero, so that packet
-			   will not leave this host and route is valid).
-			   Luckily, this hack is good workaround.
+			 * and limited broadcast via necessary interface
+			 * without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
+			 * This hack is not just for fun, it allows
+			 * vic,vat and friends to work.
+			 * They bind socket to loopback, set ttl to zero
+			 * and expect that it will work.
+			 * From the viewpoint of routing cache they are broken,
+			 * because we are not allowed to build multicast path
+			 * with loopback source addr (look, routing cache
+			 * cannot know, that ttl is zero, so that packet
+			 * will not leave this host and route is valid).
+			 * Luckily, this hack is good workaround.
 			 */
 
 			fl4->flowi4_oif = dev_out->ifindex;
@@ -2627,21 +2628,21 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
 		    (ipv4_is_multicast(fl4->daddr) ||
 		    !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
 			/* Apparently, routing tables are wrong. Assume,
-			   that the destination is on link.
-
-			   WHY? DW.
-			   Because we are allowed to send to iface
-			   even if it has NO routes and NO assigned
-			   addresses. When oif is specified, routing
-			   tables are looked up with only one purpose:
-			   to catch if destination is gatewayed, rather than
-			   direct. Moreover, if MSG_DONTROUTE is set,
-			   we send packet, ignoring both routing tables
-			   and ifaddr state. --ANK
-
-
-			   We could make it even if oif is unknown,
-			   likely IPv6, but we do not.
+			 * that the destination is on link.
+			 *
+			 * WHY? DW.
+			 * Because we are allowed to send to iface
+			 * even if it has NO routes and NO assigned
+			 * addresses. When oif is specified, routing
+			 * tables are looked up with only one purpose:
+			 * to catch if destination is gatewayed, rather than
+			 * direct. Moreover, if MSG_DONTROUTE is set,
+			 * we send packet, ignoring both routing tables
+			 * and ifaddr state. --ANK
+			 *
+			 *
+			 * We could make it even if oif is unknown,
+			 * likely IPv6, but we do not.
 			 */
 
 			if (fl4->saddr == 0)
-- 
cgit v1.2.3


From 01488a0ccd9abe15565bed50a45afcddbb0fe199 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <rafal@milecki.pl>
Date: Fri, 12 Mar 2021 11:41:07 +0100
Subject: net: dsa: bcm_sf2: store PHY interface/mode in port structure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It's needed later for proper switch / crossbar setup.

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/bcm_sf2.c | 16 ++++++++++++----
 drivers/net/dsa/bcm_sf2.h |  1 +
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index f277df922fcd..416f8deffb20 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -443,10 +443,11 @@ static void bcm_sf2_intr_disable(struct bcm_sf2_priv *priv)
 static void bcm_sf2_identify_ports(struct bcm_sf2_priv *priv,
 				   struct device_node *dn)
 {
+	struct device *dev = priv->dev->ds->dev;
+	struct bcm_sf2_port_status *port_st;
 	struct device_node *port;
 	unsigned int port_num;
 	struct property *prop;
-	phy_interface_t mode;
 	int err;
 
 	priv->moca_port = -1;
@@ -455,19 +456,26 @@ static void bcm_sf2_identify_ports(struct bcm_sf2_priv *priv,
 		if (of_property_read_u32(port, "reg", &port_num))
 			continue;
 
+		if (port_num >= DSA_MAX_PORTS) {
+			dev_err(dev, "Invalid port number %d\n", port_num);
+			continue;
+		}
+
+		port_st = &priv->port_sts[port_num];
+
 		/* Internal PHYs get assigned a specific 'phy-mode' property
 		 * value: "internal" to help flag them before MDIO probing
 		 * has completed, since they might be turned off at that
 		 * time
 		 */
-		err = of_get_phy_mode(port, &mode);
+		err = of_get_phy_mode(port, &port_st->mode);
 		if (err)
 			continue;
 
-		if (mode == PHY_INTERFACE_MODE_INTERNAL)
+		if (port_st->mode == PHY_INTERFACE_MODE_INTERNAL)
 			priv->int_phy_mask |= 1 << port_num;
 
-		if (mode == PHY_INTERFACE_MODE_MOCA)
+		if (port_st->mode == PHY_INTERFACE_MODE_MOCA)
 			priv->moca_port = port_num;
 
 		if (of_property_read_bool(port, "brcm,use-bcm-hdr"))
diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h
index 1ed901a68536..a0fe0bf46d9f 100644
--- a/drivers/net/dsa/bcm_sf2.h
+++ b/drivers/net/dsa/bcm_sf2.h
@@ -44,6 +44,7 @@ struct bcm_sf2_hw_params {
 #define BCM_SF2_REGS_NUM	6
 
 struct bcm_sf2_port_status {
+	phy_interface_t mode;
 	unsigned int link;
 	bool enabled;
 };
-- 
cgit v1.2.3


From a9349f08ec6c1251d41ef167d27a15cc39bc5b97 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <rafal@milecki.pl>
Date: Fri, 12 Mar 2021 11:41:08 +0100
Subject: net: dsa: bcm_sf2: setup BCM4908 internal crossbar
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On some SoCs (e.g. BCM4908, BCM631[345]8) SF2 has an integrated
crossbar. It allows connecting its selected external ports to internal
ports. It's used by vendors to handle custom Ethernet setups.

BCM4908 has following 3x2 crossbar. On Asus GT-AC5300 rgmii is used for
connecting external BCM53134S switch. GPHY4 is usually used for WAN
port. More fancy devices use SerDes for 2.5 Gbps Ethernet.

              ┌──────────┐
SerDes ─── 0 ─┤          │
              │   3x2    ├─ 0 ─── switch port 7
 GPHY4 ─── 1 ─┤          │
              │ crossbar ├─ 1 ─── runner (accelerator)
 rgmii ─── 2 ─┤          │
              └──────────┘

Use setup data based on DT info to configure BCM4908's switch port 7.
Right now only GPHY and rgmii variants are supported. Handling SerDes
can be implemented later.

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/bcm_sf2.c      | 45 ++++++++++++++++++++++++++++++++++++++++++
 drivers/net/dsa/bcm_sf2.h      |  1 +
 drivers/net/dsa/bcm_sf2_regs.h |  7 +++++++
 3 files changed, 53 insertions(+)

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 416f8deffb20..3a3050e85af7 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -432,6 +432,44 @@ static int bcm_sf2_sw_rst(struct bcm_sf2_priv *priv)
 	return 0;
 }
 
+static void bcm_sf2_crossbar_setup(struct bcm_sf2_priv *priv)
+{
+	struct device *dev = priv->dev->ds->dev;
+	int shift;
+	u32 mask;
+	u32 reg;
+	int i;
+
+	mask = BIT(priv->num_crossbar_int_ports) - 1;
+
+	reg = reg_readl(priv, REG_CROSSBAR);
+	switch (priv->type) {
+	case BCM4908_DEVICE_ID:
+		shift = CROSSBAR_BCM4908_INT_P7 * priv->num_crossbar_int_ports;
+		reg &= ~(mask << shift);
+		if (0) /* FIXME */
+			reg |= CROSSBAR_BCM4908_EXT_SERDES << shift;
+		else if (priv->int_phy_mask & BIT(7))
+			reg |= CROSSBAR_BCM4908_EXT_GPHY4 << shift;
+		else if (phy_interface_mode_is_rgmii(priv->port_sts[7].mode))
+			reg |= CROSSBAR_BCM4908_EXT_RGMII << shift;
+		else if (WARN(1, "Invalid port mode\n"))
+			return;
+		break;
+	default:
+		return;
+	}
+	reg_writel(priv, reg, REG_CROSSBAR);
+
+	reg = reg_readl(priv, REG_CROSSBAR);
+	for (i = 0; i < priv->num_crossbar_int_ports; i++) {
+		shift = i * priv->num_crossbar_int_ports;
+
+		dev_dbg(dev, "crossbar int port #%d - ext port #%d\n", i,
+			(reg >> shift) & mask);
+	}
+}
+
 static void bcm_sf2_intr_disable(struct bcm_sf2_priv *priv)
 {
 	intrl2_0_mask_set(priv, 0xffffffff);
@@ -864,6 +902,8 @@ static int bcm_sf2_sw_resume(struct dsa_switch *ds)
 		return ret;
 	}
 
+	bcm_sf2_crossbar_setup(priv);
+
 	ret = bcm_sf2_cfp_resume(ds);
 	if (ret)
 		return ret;
@@ -1136,6 +1176,7 @@ struct bcm_sf2_of_data {
 	const u16 *reg_offsets;
 	unsigned int core_reg_align;
 	unsigned int num_cfp_rules;
+	unsigned int num_crossbar_int_ports;
 };
 
 static const u16 bcm_sf2_4908_reg_offsets[] = {
@@ -1160,6 +1201,7 @@ static const struct bcm_sf2_of_data bcm_sf2_4908_data = {
 	.core_reg_align	= 0,
 	.reg_offsets	= bcm_sf2_4908_reg_offsets,
 	.num_cfp_rules	= 0, /* FIXME */
+	.num_crossbar_int_ports = 2,
 };
 
 /* Register offsets for the SWITCH_REG_* block */
@@ -1270,6 +1312,7 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
 	priv->reg_offsets = data->reg_offsets;
 	priv->core_reg_align = data->core_reg_align;
 	priv->num_cfp_rules = data->num_cfp_rules;
+	priv->num_crossbar_int_ports = data->num_crossbar_int_ports;
 
 	priv->rcdev = devm_reset_control_get_optional_exclusive(&pdev->dev,
 								"switch");
@@ -1343,6 +1386,8 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
 		goto out_clk_mdiv;
 	}
 
+	bcm_sf2_crossbar_setup(priv);
+
 	bcm_sf2_gphy_enable_set(priv->dev->ds, true);
 
 	ret = bcm_sf2_mdio_register(ds);
diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h
index a0fe0bf46d9f..0d48402068d3 100644
--- a/drivers/net/dsa/bcm_sf2.h
+++ b/drivers/net/dsa/bcm_sf2.h
@@ -74,6 +74,7 @@ struct bcm_sf2_priv {
 	const u16			*reg_offsets;
 	unsigned int			core_reg_align;
 	unsigned int			num_cfp_rules;
+	unsigned int			num_crossbar_int_ports;
 
 	/* spinlock protecting access to the indirect registers */
 	spinlock_t			indir_lock;
diff --git a/drivers/net/dsa/bcm_sf2_regs.h b/drivers/net/dsa/bcm_sf2_regs.h
index 1d2d55c9f8aa..e297b09411f3 100644
--- a/drivers/net/dsa/bcm_sf2_regs.h
+++ b/drivers/net/dsa/bcm_sf2_regs.h
@@ -48,6 +48,13 @@ enum bcm_sf2_reg_offs {
 #define  PHY_PHYAD_SHIFT		8
 #define  PHY_PHYAD_MASK			0x1F
 
+/* Relative to REG_CROSSBAR */
+#define CROSSBAR_BCM4908_INT_P7		0
+#define CROSSBAR_BCM4908_INT_RUNNER	1
+#define CROSSBAR_BCM4908_EXT_SERDES	0
+#define CROSSBAR_BCM4908_EXT_GPHY4	1
+#define CROSSBAR_BCM4908_EXT_RGMII	2
+
 #define REG_RGMII_CNTRL_P(x)		(REG_RGMII_0_CNTRL + (x))
 
 /* Relative to REG_RGMII_CNTRL */
-- 
cgit v1.2.3


From 257382c54e8cf33042f184f24d5c9d739e1f48cb Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 12 Mar 2021 11:09:07 +0000
Subject: ptp_pch: Remove unused function 'pch_ch_control_read()'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the following W=1 kernel build warning(s):

 drivers/ptp/ptp_pch.c:182:5: warning: no previous prototype for ‘pch_ch_control_read’ [-Wmissing-prototypes]

Cc: Richard Cochran <richardcochran@gmail.com> (maintainer:PTP HARDWARE CLOCK SUPPORT)
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Flavio Suligoi <f.suligoi@asem.it>
Cc: netdev@vger.kernel.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h |  1 -
 drivers/ptp/ptp_pch.c                           | 11 -----------
 2 files changed, 12 deletions(-)

diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h
index 55cef5b16aa5..3ce4899a0417 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h
@@ -612,7 +612,6 @@ void pch_gbe_free_tx_resources(struct pch_gbe_adapter *adapter,
 void pch_gbe_free_rx_resources(struct pch_gbe_adapter *adapter,
 			       struct pch_gbe_rx_ring *rx_ring);
 void pch_gbe_update_stats(struct pch_gbe_adapter *adapter);
-u32 pch_ch_control_read(struct pci_dev *pdev);
 void pch_ch_control_write(struct pci_dev *pdev, u32 val);
 u32 pch_ch_event_read(struct pci_dev *pdev);
 void pch_ch_event_write(struct pci_dev *pdev, u32 val);
diff --git a/drivers/ptp/ptp_pch.c b/drivers/ptp/ptp_pch.c
index ce10ecd41ba0..f7ff7230623e 100644
--- a/drivers/ptp/ptp_pch.c
+++ b/drivers/ptp/ptp_pch.c
@@ -179,17 +179,6 @@ static inline void pch_block_reset(struct pch_dev *chip)
 	iowrite32(val, (&chip->regs->control));
 }
 
-u32 pch_ch_control_read(struct pci_dev *pdev)
-{
-	struct pch_dev *chip = pci_get_drvdata(pdev);
-	u32 val;
-
-	val = ioread32(&chip->regs->ch_control);
-
-	return val;
-}
-EXPORT_SYMBOL(pch_ch_control_read);
-
 void pch_ch_control_write(struct pci_dev *pdev, u32 val)
 {
 	struct pch_dev *chip = pci_get_drvdata(pdev);
-- 
cgit v1.2.3


From f90fc37f289cd0886ef3a12b2ea33b93b8d9d360 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 12 Mar 2021 11:09:08 +0000
Subject: ptp_pch: Move 'pch_*()' prototypes to shared header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the following W=1 kernel build warning(s):

 drivers/ptp/ptp_pch.c:193:6: warning: no previous prototype for ‘pch_ch_control_write’ [-Wmissing-prototypes]
 drivers/ptp/ptp_pch.c:201:5: warning: no previous prototype for ‘pch_ch_event_read’ [-Wmissing-prototypes]
 drivers/ptp/ptp_pch.c:212:6: warning: no previous prototype for ‘pch_ch_event_write’ [-Wmissing-prototypes]
 drivers/ptp/ptp_pch.c:220:5: warning: no previous prototype for ‘pch_src_uuid_lo_read’ [-Wmissing-prototypes]
 drivers/ptp/ptp_pch.c:231:5: warning: no previous prototype for ‘pch_src_uuid_hi_read’ [-Wmissing-prototypes]
 drivers/ptp/ptp_pch.c:242:5: warning: no previous prototype for ‘pch_rx_snap_read’ [-Wmissing-prototypes]
 drivers/ptp/ptp_pch.c:259:5: warning: no previous prototype for ‘pch_tx_snap_read’ [-Wmissing-prototypes]
 drivers/ptp/ptp_pch.c:300:5: warning: no previous prototype for ‘pch_set_station_address’ [-Wmissing-prototypes]

Cc: Richard Cochran <richardcochran@gmail.com> (maintainer:PTP HARDWARE CLOCK SUPPORT)
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Flavio Suligoi <f.suligoi@asem.it>
Cc: netdev@vger.kernel.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h    |  8 --------
 .../net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c   |  1 +
 drivers/ptp/ptp_pch.c                              |  1 +
 include/linux/ptp_pch.h                            | 22 ++++++++++++++++++++++
 4 files changed, 24 insertions(+), 8 deletions(-)
 create mode 100644 include/linux/ptp_pch.h

diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h
index 3ce4899a0417..a6823c4d355d 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h
@@ -612,14 +612,6 @@ void pch_gbe_free_tx_resources(struct pch_gbe_adapter *adapter,
 void pch_gbe_free_rx_resources(struct pch_gbe_adapter *adapter,
 			       struct pch_gbe_rx_ring *rx_ring);
 void pch_gbe_update_stats(struct pch_gbe_adapter *adapter);
-void pch_ch_control_write(struct pci_dev *pdev, u32 val);
-u32 pch_ch_event_read(struct pci_dev *pdev);
-void pch_ch_event_write(struct pci_dev *pdev, u32 val);
-u32 pch_src_uuid_lo_read(struct pci_dev *pdev);
-u32 pch_src_uuid_hi_read(struct pci_dev *pdev);
-u64 pch_rx_snap_read(struct pci_dev *pdev);
-u64 pch_tx_snap_read(struct pci_dev *pdev);
-int pch_set_station_address(u8 *addr, struct pci_dev *pdev);
 
 /* pch_gbe_param.c */
 void pch_gbe_check_options(struct pch_gbe_adapter *adapter);
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
index 140cee7c459d..334af49e5add 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/net_tstamp.h>
 #include <linux/ptp_classify.h>
+#include <linux/ptp_pch.h>
 #include <linux/gpio.h>
 
 #define DRV_VERSION     "1.01"
diff --git a/drivers/ptp/ptp_pch.c b/drivers/ptp/ptp_pch.c
index f7ff7230623e..fa4417ad02e0 100644
--- a/drivers/ptp/ptp_pch.c
+++ b/drivers/ptp/ptp_pch.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/ptp_clock_kernel.h>
+#include <linux/ptp_pch.h>
 #include <linux/slab.h>
 
 #define STATION_ADDR_LEN	20
diff --git a/include/linux/ptp_pch.h b/include/linux/ptp_pch.h
new file mode 100644
index 000000000000..51818198c292
--- /dev/null
+++ b/include/linux/ptp_pch.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * PTP PCH
+ *
+ * Copyright 2019 Linaro Ltd.
+ *
+ * Author Lee Jones <lee.jones@linaro.org>
+ */
+
+#ifndef _PTP_PCH_H_
+#define _PTP_PCH_H_
+
+void pch_ch_control_write(struct pci_dev *pdev, u32 val);
+u32  pch_ch_event_read(struct pci_dev *pdev);
+void pch_ch_event_write(struct pci_dev *pdev, u32 val);
+u32  pch_src_uuid_lo_read(struct pci_dev *pdev);
+u32  pch_src_uuid_hi_read(struct pci_dev *pdev);
+u64  pch_rx_snap_read(struct pci_dev *pdev);
+u64  pch_tx_snap_read(struct pci_dev *pdev);
+int  pch_set_station_address(u8 *addr, struct pci_dev *pdev);
+
+#endif /* _PTP_PCH_H_ */
-- 
cgit v1.2.3


From 9ec04c71ab206dbc95c79abdfc647df965a2cb91 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 12 Mar 2021 11:09:09 +0000
Subject: ptp: ptp_clockmatrix: Demote non-kernel-doc header to standard
 comment

Fixes the following W=1 kernel build warning(s):

 drivers/ptp/ptp_clockmatrix.c:1408: warning: Cannot understand  * @brief Maximum absolute value for write phase offset in picoseconds
 drivers/ptp/ptp_clockmatrix.c:1408: warning: Cannot understand  * @brief Maximum absolute value for write phase offset in picoseconds
 drivers/ptp/ptp_clockmatrix.c:1408: warning: Cannot understand  * @brief Maximum absolute value for write phase offset in picoseconds
 drivers/ptp/ptp_clockmatrix.c:1408: warning: Cannot understand  * @brief Maximum absolute value for write phase offset in picoseconds
 drivers/ptp/ptp_clockmatrix.c:1408: warning: Cannot understand  * @brief Maximum absolute value for write phase offset in picoseconds

Cc: Richard Cochran <richardcochran@gmail.com>
Cc: IDT-support-1588@lm.renesas.com
Cc: netdev@vger.kernel.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_clockmatrix.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/ptp/ptp_clockmatrix.c b/drivers/ptp/ptp_clockmatrix.c
index 75463c2e2b86..fa636951169e 100644
--- a/drivers/ptp/ptp_clockmatrix.c
+++ b/drivers/ptp/ptp_clockmatrix.c
@@ -1404,8 +1404,8 @@ static int idtcm_set_pll_mode(struct idtcm_channel *channel,
 
 /* PTP Hardware Clock interface */
 
-/**
- * @brief Maximum absolute value for write phase offset in picoseconds
+/*
+ * Maximum absolute value for write phase offset in picoseconds
  *
  * Destination signed register is 32-bit register in resolution of 50ps
  *
-- 
cgit v1.2.3


From 287f93ded67f48fd7126ee37e98103c6cf52eb0f Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 12 Mar 2021 11:09:10 +0000
Subject: ptp: ptp_p: Demote non-conformant kernel-doc headers and supply a
 param description

Fixes the following W=1 kernel build warning(s):

 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'control' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'event' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'addend' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'accum' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'test' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'ts_compare' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'rsystime_lo' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'rsystime_hi' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'systime_lo' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'systime_hi' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'trgt_lo' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'trgt_hi' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'asms_lo' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'asms_hi' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'amms_lo' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'amms_hi' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'ch_control' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'ch_event' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'tx_snap_lo' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'tx_snap_hi' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'rx_snap_lo' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'rx_snap_hi' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'src_uuid_lo' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'src_uuid_hi' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'can_status' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'can_snap_lo' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'can_snap_hi' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'ts_sel' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'ts_st' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'reserve1' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'stl_max_set_en' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'stl_max_set' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'reserve2' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:78: warning: Function parameter or member 'srst' not described in 'pch_ts_regs'
 drivers/ptp/ptp_pch.c:121: warning: Function parameter or member 'regs' not described in 'pch_dev'
 drivers/ptp/ptp_pch.c:121: warning: Function parameter or member 'ptp_clock' not described in 'pch_dev'
 drivers/ptp/ptp_pch.c:121: warning: Function parameter or member 'caps' not described in 'pch_dev'
 drivers/ptp/ptp_pch.c:121: warning: Function parameter or member 'exts0_enabled' not described in 'pch_dev'
 drivers/ptp/ptp_pch.c:121: warning: Function parameter or member 'exts1_enabled' not described in 'pch_dev'
 drivers/ptp/ptp_pch.c:121: warning: Function parameter or member 'mem_base' not described in 'pch_dev'
 drivers/ptp/ptp_pch.c:121: warning: Function parameter or member 'mem_size' not described in 'pch_dev'
 drivers/ptp/ptp_pch.c:121: warning: Function parameter or member 'irq' not described in 'pch_dev'
 drivers/ptp/ptp_pch.c:121: warning: Function parameter or member 'pdev' not described in 'pch_dev'
 drivers/ptp/ptp_pch.c:121: warning: Function parameter or member 'register_lock' not described in 'pch_dev'
 drivers/ptp/ptp_pch.c:128: warning: Function parameter or member 'station' not described in 'pch_params'
 drivers/ptp/ptp_pch.c:291: warning: Function parameter or member 'pdev' not described in 'pch_set_station_address'

Cc: Richard Cochran <richardcochran@gmail.com>
Cc: LAPIS SEMICONDUCTOR <tshimizu818@gmail.com>
Cc: netdev@vger.kernel.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_pch.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/ptp/ptp_pch.c b/drivers/ptp/ptp_pch.c
index fa4417ad02e0..a17e8cc642c5 100644
--- a/drivers/ptp/ptp_pch.c
+++ b/drivers/ptp/ptp_pch.c
@@ -37,7 +37,8 @@ enum pch_status {
 	PCH_FAILED,
 	PCH_UNSUPPORTED,
 };
-/**
+
+/*
  * struct pch_ts_regs - IEEE 1588 registers
  */
 struct pch_ts_regs {
@@ -103,7 +104,8 @@ struct pch_ts_regs {
 
 #define PCH_IEEE1588_ETH	(1 << 0)
 #define PCH_IEEE1588_CAN	(1 << 1)
-/**
+
+/*
  * struct pch_dev - Driver private data
  */
 struct pch_dev {
@@ -120,7 +122,7 @@ struct pch_dev {
 	spinlock_t register_lock;
 };
 
-/**
+/*
  * struct pch_params - 1588 module parameter
  */
 struct pch_params {
@@ -286,6 +288,7 @@ static void pch_reset(struct pch_dev *chip)
  *				    IEEE 1588 hardware when looking at PTP
  *				    traffic on the  ethernet interface
  * @addr:	dress which contain the column separated address to be used.
+ * @pdev:	PCI device.
  */
 int pch_set_station_address(u8 *addr, struct pci_dev *pdev)
 {
-- 
cgit v1.2.3


From 86927c9c4d4e59b75a680f1e7922dca717cba24d Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Fri, 12 Mar 2021 17:50:17 +0100
Subject: netdevsim: fib: Introduce a lock to guard nexthop hashtable

Currently netdevsim relies on RTNL to maintain exclusivity in accessing the
nexthop hash table. However, bucket notification may be called without RTNL
having been held. Instead, introduce a custom lock to guard the table.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/fib.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c
index 3ca0f54d0c3b..ba577e20b1a1 100644
--- a/drivers/net/netdevsim/fib.c
+++ b/drivers/net/netdevsim/fib.c
@@ -47,13 +47,14 @@ struct nsim_fib_data {
 	struct nsim_fib_entry nexthops;
 	struct rhashtable fib_rt_ht;
 	struct list_head fib_rt_list;
-	struct mutex fib_lock; /* Protects hashtable and list */
+	struct mutex fib_lock; /* Protects FIB HT and list */
 	struct notifier_block nexthop_nb;
 	struct rhashtable nexthop_ht;
 	struct devlink *devlink;
 	struct work_struct fib_event_work;
 	struct list_head fib_event_queue;
 	spinlock_t fib_event_queue_lock; /* Protects fib event queue list */
+	struct mutex nh_lock; /* Protects NH HT */
 	struct dentry *ddir;
 	bool fail_route_offload;
 };
@@ -1262,8 +1263,7 @@ static int nsim_nexthop_event_nb(struct notifier_block *nb, unsigned long event,
 	struct nh_notifier_info *info = ptr;
 	int err = 0;
 
-	ASSERT_RTNL();
-
+	mutex_lock(&data->nh_lock);
 	switch (event) {
 	case NEXTHOP_EVENT_REPLACE:
 		err = nsim_nexthop_insert(data, info);
@@ -1275,6 +1275,7 @@ static int nsim_nexthop_event_nb(struct notifier_block *nb, unsigned long event,
 		break;
 	}
 
+	mutex_unlock(&data->nh_lock);
 	return notifier_from_errno(err);
 }
 
@@ -1404,6 +1405,7 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink,
 	if (err)
 		goto err_data_free;
 
+	mutex_init(&data->nh_lock);
 	err = rhashtable_init(&data->nexthop_ht, &nsim_nexthop_ht_params);
 	if (err)
 		goto err_debugfs_exit;
@@ -1469,6 +1471,7 @@ err_rhashtable_nexthop_destroy:
 				    data);
 	mutex_destroy(&data->fib_lock);
 err_debugfs_exit:
+	mutex_destroy(&data->nh_lock);
 	nsim_fib_debugfs_exit(data);
 err_data_free:
 	kfree(data);
@@ -1497,6 +1500,7 @@ void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *data)
 	WARN_ON_ONCE(!list_empty(&data->fib_event_queue));
 	WARN_ON_ONCE(!list_empty(&data->fib_rt_list));
 	mutex_destroy(&data->fib_lock);
+	mutex_destroy(&data->nh_lock);
 	nsim_fib_debugfs_exit(data);
 	kfree(data);
 }
-- 
cgit v1.2.3


From 40ff83711f76d30265140827b46066dd3db2db79 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Fri, 12 Mar 2021 17:50:18 +0100
Subject: netdevsim: Create a helper for setting nexthop hardware flags

Instead of calling nexthop_set_hw_flags(), call a helper. It will be
used to also set nexthop bucket flags in a subsequent patch.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/fib.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c
index ba577e20b1a1..62cbd716383c 100644
--- a/drivers/net/netdevsim/fib.c
+++ b/drivers/net/netdevsim/fib.c
@@ -1157,6 +1157,13 @@ err_num_decrease:
 
 }
 
+static void nsim_nexthop_hw_flags_set(struct net *net,
+				      const struct nsim_nexthop *nexthop,
+				      bool trap)
+{
+	nexthop_set_hw_flags(net, nexthop->id, false, trap);
+}
+
 static int nsim_nexthop_add(struct nsim_fib_data *data,
 			    struct nsim_nexthop *nexthop,
 			    struct netlink_ext_ack *extack)
@@ -1175,7 +1182,7 @@ static int nsim_nexthop_add(struct nsim_fib_data *data,
 		goto err_nexthop_dismiss;
 	}
 
-	nexthop_set_hw_flags(net, nexthop->id, false, true);
+	nsim_nexthop_hw_flags_set(net, nexthop, true);
 
 	return 0;
 
@@ -1204,7 +1211,7 @@ static int nsim_nexthop_replace(struct nsim_fib_data *data,
 		goto err_nexthop_dismiss;
 	}
 
-	nexthop_set_hw_flags(net, nexthop->id, false, true);
+	nsim_nexthop_hw_flags_set(net, nexthop, true);
 	nsim_nexthop_account(data, nexthop_old->occ, false, extack);
 	nsim_nexthop_destroy(nexthop_old);
 
@@ -1286,7 +1293,7 @@ static void nsim_nexthop_free(void *ptr, void *arg)
 	struct net *net;
 
 	net = devlink_net(data->devlink);
-	nexthop_set_hw_flags(net, nexthop->id, false, false);
+	nsim_nexthop_hw_flags_set(net, nexthop, false);
 	nsim_nexthop_account(data, nexthop->occ, false, NULL);
 	nsim_nexthop_destroy(nexthop);
 }
-- 
cgit v1.2.3


From d8eaa4facacbb13425d4097bd066e28958a5716f Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Fri, 12 Mar 2021 17:50:19 +0100
Subject: netdevsim: Add support for resilient nexthop groups

Allow resilient nexthop groups to be programmed and account their
occupancy according to their number of buckets. The nexthop group itself
as well as its buckets are marked with hardware flags (i.e.,
'RTNH_F_TRAP').

Replacement of a single nexthop bucket can fail using the following
debugfs knob:

 # cat /sys/kernel/debug/netdevsim/netdevsim10/fib/fail_nexthop_bucket_replace
 N
 # echo 1 > /sys/kernel/debug/netdevsim/netdevsim10/fib/fail_nexthop_bucket_replace
 # cat /sys/kernel/debug/netdevsim/netdevsim10/fib/fail_nexthop_bucket_replace
 Y

Replacement of a resilient nexthop group can fail using the following
debugfs knob:

 # cat /sys/kernel/debug/netdevsim/netdevsim10/fib/fail_res_nexthop_group_replace
 N
 # echo 1 > /sys/kernel/debug/netdevsim/netdevsim10/fib/fail_res_nexthop_group_replace
 # cat /sys/kernel/debug/netdevsim/netdevsim10/fib/fail_res_nexthop_group_replace
 Y

This enables testing of various error paths.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/fib.c | 55 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c
index 62cbd716383c..e41f3b98295c 100644
--- a/drivers/net/netdevsim/fib.c
+++ b/drivers/net/netdevsim/fib.c
@@ -57,6 +57,8 @@ struct nsim_fib_data {
 	struct mutex nh_lock; /* Protects NH HT */
 	struct dentry *ddir;
 	bool fail_route_offload;
+	bool fail_res_nexthop_group_replace;
+	bool fail_nexthop_bucket_replace;
 };
 
 struct nsim_fib_rt_key {
@@ -117,6 +119,7 @@ struct nsim_nexthop {
 	struct rhash_head ht_node;
 	u64 occ;
 	u32 id;
+	bool is_resilient;
 };
 
 static const struct rhashtable_params nsim_nexthop_ht_params = {
@@ -1115,6 +1118,10 @@ static struct nsim_nexthop *nsim_nexthop_create(struct nsim_fib_data *data,
 		for (i = 0; i < info->nh_grp->num_nh; i++)
 			occ += info->nh_grp->nh_entries[i].weight;
 		break;
+	case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
+		occ = info->nh_res_table->num_nh_buckets;
+		nexthop->is_resilient = true;
+		break;
 	default:
 		NL_SET_ERR_MSG_MOD(info->extack, "Unsupported nexthop type");
 		kfree(nexthop);
@@ -1161,7 +1168,15 @@ static void nsim_nexthop_hw_flags_set(struct net *net,
 				      const struct nsim_nexthop *nexthop,
 				      bool trap)
 {
+	int i;
+
 	nexthop_set_hw_flags(net, nexthop->id, false, trap);
+
+	if (!nexthop->is_resilient)
+		return;
+
+	for (i = 0; i < nexthop->occ; i++)
+		nexthop_bucket_set_hw_flags(net, nexthop->id, i, false, trap);
 }
 
 static int nsim_nexthop_add(struct nsim_fib_data *data,
@@ -1262,6 +1277,32 @@ static void nsim_nexthop_remove(struct nsim_fib_data *data,
 	nsim_nexthop_destroy(nexthop);
 }
 
+static int nsim_nexthop_res_table_pre_replace(struct nsim_fib_data *data,
+					      struct nh_notifier_info *info)
+{
+	if (data->fail_res_nexthop_group_replace) {
+		NL_SET_ERR_MSG_MOD(info->extack, "Failed to replace a resilient nexthop group");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int nsim_nexthop_bucket_replace(struct nsim_fib_data *data,
+				       struct nh_notifier_info *info)
+{
+	if (data->fail_nexthop_bucket_replace) {
+		NL_SET_ERR_MSG_MOD(info->extack, "Failed to replace nexthop bucket");
+		return -EINVAL;
+	}
+
+	nexthop_bucket_set_hw_flags(info->net, info->id,
+				    info->nh_res_bucket->bucket_index,
+				    false, true);
+
+	return 0;
+}
+
 static int nsim_nexthop_event_nb(struct notifier_block *nb, unsigned long event,
 				 void *ptr)
 {
@@ -1278,6 +1319,12 @@ static int nsim_nexthop_event_nb(struct notifier_block *nb, unsigned long event,
 	case NEXTHOP_EVENT_DEL:
 		nsim_nexthop_remove(data, info);
 		break;
+	case NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE:
+		err = nsim_nexthop_res_table_pre_replace(data, info);
+		break;
+	case NEXTHOP_EVENT_BUCKET_REPLACE:
+		err = nsim_nexthop_bucket_replace(data, info);
+		break;
 	default:
 		break;
 	}
@@ -1387,6 +1434,14 @@ nsim_fib_debugfs_init(struct nsim_fib_data *data, struct nsim_dev *nsim_dev)
 	data->fail_route_offload = false;
 	debugfs_create_bool("fail_route_offload", 0600, data->ddir,
 			    &data->fail_route_offload);
+
+	data->fail_res_nexthop_group_replace = false;
+	debugfs_create_bool("fail_res_nexthop_group_replace", 0600, data->ddir,
+			    &data->fail_res_nexthop_group_replace);
+
+	data->fail_nexthop_bucket_replace = false;
+	debugfs_create_bool("fail_nexthop_bucket_replace", 0600, data->ddir,
+			    &data->fail_nexthop_bucket_replace);
 	return 0;
 }
 
-- 
cgit v1.2.3


From c6385c0b67c527b298111775bc89a7407ba1581e Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Fri, 12 Mar 2021 17:50:20 +0100
Subject: netdevsim: Allow reporting activity on nexthop buckets

A key component of the resilient hashing algorithm is the hash buckets'
activity. If a bucket is active, it will not be populated with a new
nexthop in order not to break existing flows. Therefore, in order to
easily and thoroughly test the algorithm, we need to be in full control
over the reported activity.

Add a debugfs interface that allows user space to have netdevsim report
a nexthop bucket within a resilient nexthop group as active. For
example:

 # echo 10 23 > /sys/kernel/debug/netdevsim/netdevsim10/fib/nexthop_bucket_activity

Will mark bucket 23 in nexthop group 10 as active.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/fib.c | 61 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c
index e41f3b98295c..fda6f37e7055 100644
--- a/drivers/net/netdevsim/fib.c
+++ b/drivers/net/netdevsim/fib.c
@@ -14,6 +14,7 @@
  * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
  */
 
+#include <linux/bitmap.h>
 #include <linux/in6.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
@@ -1345,6 +1346,63 @@ static void nsim_nexthop_free(void *ptr, void *arg)
 	nsim_nexthop_destroy(nexthop);
 }
 
+static ssize_t nsim_nexthop_bucket_activity_write(struct file *file,
+						  const char __user *user_buf,
+						  size_t size, loff_t *ppos)
+{
+	struct nsim_fib_data *data = file->private_data;
+	struct net *net = devlink_net(data->devlink);
+	struct nsim_nexthop *nexthop;
+	unsigned long *activity;
+	loff_t pos = *ppos;
+	u16 bucket_index;
+	char buf[128];
+	int err = 0;
+	u32 nhid;
+
+	if (pos != 0)
+		return -EINVAL;
+	if (size > sizeof(buf))
+		return -EINVAL;
+	if (copy_from_user(buf, user_buf, size))
+		return -EFAULT;
+	if (sscanf(buf, "%u %hu", &nhid, &bucket_index) != 2)
+		return -EINVAL;
+
+	rtnl_lock();
+
+	nexthop = rhashtable_lookup_fast(&data->nexthop_ht, &nhid,
+					 nsim_nexthop_ht_params);
+	if (!nexthop || !nexthop->is_resilient ||
+	    bucket_index >= nexthop->occ) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	activity = bitmap_zalloc(nexthop->occ, GFP_KERNEL);
+	if (!activity) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	bitmap_set(activity, bucket_index, 1);
+	nexthop_res_grp_activity_update(net, nhid, nexthop->occ, activity);
+	bitmap_free(activity);
+
+out:
+	rtnl_unlock();
+
+	*ppos = size;
+	return err ?: size;
+}
+
+static const struct file_operations nsim_nexthop_bucket_activity_fops = {
+	.open = simple_open,
+	.write = nsim_nexthop_bucket_activity_write,
+	.llseek = no_llseek,
+	.owner = THIS_MODULE,
+};
+
 static u64 nsim_fib_ipv4_resource_occ_get(void *priv)
 {
 	struct nsim_fib_data *data = priv;
@@ -1442,6 +1500,9 @@ nsim_fib_debugfs_init(struct nsim_fib_data *data, struct nsim_dev *nsim_dev)
 	data->fail_nexthop_bucket_replace = false;
 	debugfs_create_bool("fail_nexthop_bucket_replace", 0600, data->ddir,
 			    &data->fail_nexthop_bucket_replace);
+
+	debugfs_create_file("nexthop_bucket_activity", 0200, data->ddir,
+			    data, &nsim_nexthop_bucket_activity_fops);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 8e815284a5f9351973a5860447941823acf1182d Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Fri, 12 Mar 2021 17:50:21 +0100
Subject: selftests: fib_nexthops: Declutter test output

Before:

 # ./fib_nexthops.sh -t ipv4_torture

IPv4 runtime torture
--------------------
TEST: IPv4 torture test                                             [ OK ]
./fib_nexthops.sh: line 213: 19376 Killed                  ipv4_del_add_loop1
./fib_nexthops.sh: line 213: 19377 Killed                  ipv4_grp_replace_loop
./fib_nexthops.sh: line 213: 19378 Killed                  ip netns exec me ping -f 172.16.101.1 > /dev/null 2>&1
./fib_nexthops.sh: line 213: 19380 Killed                  ip netns exec me ping -f 172.16.101.2 > /dev/null 2>&1
./fib_nexthops.sh: line 213: 19381 Killed                  ip netns exec me mausezahn veth1 -B 172.16.101.2 -A 172.16.1.1 -c 0 -t tcp "dp=1-1023, flags=syn" > /dev/null 2>&1

Tests passed:   1
Tests failed:   0

 # ./fib_nexthops.sh -t ipv6_torture

IPv6 runtime torture
--------------------
TEST: IPv6 torture test                                             [ OK ]
./fib_nexthops.sh: line 213: 24453 Killed                  ipv6_del_add_loop1
./fib_nexthops.sh: line 213: 24454 Killed                  ipv6_grp_replace_loop
./fib_nexthops.sh: line 213: 24456 Killed                  ip netns exec me ping -f 2001:db8:101::1 > /dev/null 2>&1
./fib_nexthops.sh: line 213: 24457 Killed                  ip netns exec me ping -f 2001:db8:101::2 > /dev/null 2>&1
./fib_nexthops.sh: line 213: 24458 Killed                  ip netns exec me mausezahn -6 veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" > /dev/null 2>&1

Tests passed:   1
Tests failed:   0

After:

 # ./fib_nexthops.sh -t ipv4_torture

IPv4 runtime torture
--------------------
TEST: IPv4 torture test                                             [ OK ]

Tests passed:   1
Tests failed:   0

 # ./fib_nexthops.sh -t ipv6_torture

IPv6 runtime torture
--------------------
TEST: IPv6 torture test                                             [ OK ]

Tests passed:   1
Tests failed:   0

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_nexthops.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index d98fb85e201c..91226ac50112 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -874,6 +874,7 @@ ipv6_torture()
 
 	sleep 300
 	kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+	wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
 
 	# if we did not crash, success
 	log_test 0 0 "IPv6 torture test"
@@ -1476,6 +1477,7 @@ ipv4_torture()
 
 	sleep 300
 	kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+	wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
 
 	# if we did not crash, success
 	log_test 0 0 "IPv4 torture test"
-- 
cgit v1.2.3


From a8f9952d218d816ff1a13c9385edd821a8da527d Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Fri, 12 Mar 2021 17:50:22 +0100
Subject: selftests: fib_nexthops: List each test case in a different line

The lines with the IPv4 and IPv6 test cases are already very long and
more test cases will be added in subsequent patches.

List each test case in a different line to make it easier to extend the
test with more test cases.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_nexthops.sh | 30 +++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 91226ac50112..c840aa88ff18 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -19,10 +19,32 @@ ret=0
 ksft_skip=4
 
 # all tests in this script. Can be overridden with -t option
-IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime ipv4_large_grp ipv4_compat_mode ipv4_fdb_grp_fcnal ipv4_torture"
-IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime ipv6_large_grp ipv6_compat_mode ipv6_fdb_grp_fcnal ipv6_torture"
-
-ALL_TESTS="basic ${IPV4_TESTS} ${IPV6_TESTS}"
+IPV4_TESTS="
+	ipv4_fcnal
+	ipv4_grp_fcnal
+	ipv4_withv6_fcnal
+	ipv4_fcnal_runtime
+	ipv4_large_grp
+	ipv4_compat_mode
+	ipv4_fdb_grp_fcnal
+	ipv4_torture
+"
+
+IPV6_TESTS="
+	ipv6_fcnal
+	ipv6_grp_fcnal
+	ipv6_fcnal_runtime
+	ipv6_large_grp
+	ipv6_compat_mode
+	ipv6_fdb_grp_fcnal
+	ipv6_torture
+"
+
+ALL_TESTS="
+	basic
+	${IPV4_TESTS}
+	${IPV6_TESTS}
+"
 TESTS="${ALL_TESTS}"
 VERBOSE=0
 PAUSE_ON_FAIL=no
-- 
cgit v1.2.3


From 557205f47dc47afeaf80707845cda8ec79ea4cb0 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Fri, 12 Mar 2021 17:50:23 +0100
Subject: selftests: fib_nexthops: Test resilient nexthop groups

Add test cases for resilient nexthop groups. Exhaustive forwarding tests
are added separately under net/forwarding/.

Examples:

 # ./fib_nexthops.sh -t basic_res

Basic resilient nexthop group functional tests
----------------------------------------------
TEST: Add a nexthop group with default parameters                   [ OK ]
TEST: Get a nexthop group with default parameters                   [ OK ]
TEST: Get a nexthop group with non-default parameters               [ OK ]
TEST: Add a nexthop group with 0 buckets                            [ OK ]
TEST: Replace nexthop group parameters                              [ OK ]
TEST: Get a nexthop group after replacing parameters                [ OK ]
TEST: Replace idle timer                                            [ OK ]
TEST: Get a nexthop group after replacing idle timer                [ OK ]
TEST: Replace unbalanced timer                                      [ OK ]
TEST: Get a nexthop group after replacing unbalanced timer          [ OK ]
TEST: Replace with no parameters                                    [ OK ]
TEST: Get a nexthop group after replacing no parameters             [ OK ]
TEST: Replace nexthop group type - implicit                         [ OK ]
TEST: Replace nexthop group type - explicit                         [ OK ]
TEST: Replace number of nexthop buckets                             [ OK ]
TEST: Get a nexthop group after replacing with invalid parameters   [ OK ]
TEST: Dump all nexthop buckets                                      [ OK ]
TEST: Dump all nexthop buckets in a group                           [ OK ]
TEST: Dump all nexthop buckets with a specific nexthop device       [ OK ]
TEST: Dump all nexthop buckets with a specific nexthop identifier   [ OK ]
TEST: Dump all nexthop buckets in a non-existent group              [ OK ]
TEST: Dump all nexthop buckets in a non-resilient group             [ OK ]
TEST: Dump all nexthop buckets using a non-existent device          [ OK ]
TEST: Dump all nexthop buckets with invalid 'groups' keyword        [ OK ]
TEST: Dump all nexthop buckets with invalid 'fdb' keyword           [ OK ]
TEST: Get a valid nexthop bucket                                    [ OK ]
TEST: Get a nexthop bucket with valid group, but invalid index      [ OK ]
TEST: Get a nexthop bucket from a non-resilient group               [ OK ]
TEST: Get a nexthop bucket from a non-existent group                [ OK ]

Tests passed:  29
Tests failed:   0

 # ./fib_nexthops.sh -t ipv4_large_res_grp

IPv4 large resilient group (128k buckets)
-----------------------------------------
TEST: Dump large (x131072) nexthop buckets                          [ OK ]

Tests passed:   1
Tests failed:   0

 # ./fib_nexthops.sh -t ipv6_large_res_grp

IPv6 large resilient group (128k buckets)
-----------------------------------------
TEST: Dump large (x131072) nexthop buckets                          [ OK ]

Tests passed:   1
Tests failed:   0

 # ./fib_nexthops.sh -t ipv4_res_torture

IPv4 runtime resilient nexthop group torture
--------------------------------------------
TEST: IPv4 resilient nexthop group torture test                     [ OK ]

Tests passed:   1
Tests failed:   0

 # ./fib_nexthops.sh -t ipv6_res_torture

IPv6 runtime resilient nexthop group torture
--------------------------------------------
TEST: IPv6 resilient nexthop group torture test                     [ OK ]

Tests passed:   1
Tests failed:   0

 # ./fib_nexthops.sh -t ipv4_res_grp_fcnal

IPv4 resilient groups functional
--------------------------------
TEST: Nexthop group updated when entry is deleted                   [ OK ]
TEST: Nexthop buckets updated when entry is deleted                 [ OK ]
TEST: Nexthop group updated after replace                           [ OK ]
TEST: Nexthop buckets updated after replace                         [ OK ]
TEST: Nexthop group updated when entry is deleted - nECMP           [ OK ]
TEST: Nexthop buckets updated when entry is deleted - nECMP         [ OK ]
TEST: Nexthop group updated after replace - nECMP                   [ OK ]
TEST: Nexthop buckets updated after replace - nECMP                 [ OK ]

Tests passed:   8
Tests failed:   0

 # ./fib_nexthops.sh -t ipv6_res_grp_fcnal

IPv6 resilient groups functional
--------------------------------
TEST: Nexthop group updated when entry is deleted                   [ OK ]
TEST: Nexthop buckets updated when entry is deleted                 [ OK ]
TEST: Nexthop group updated after replace                           [ OK ]
TEST: Nexthop buckets updated after replace                         [ OK ]
TEST: Nexthop group updated when entry is deleted - nECMP           [ OK ]
TEST: Nexthop buckets updated when entry is deleted - nECMP         [ OK ]
TEST: Nexthop group updated after replace - nECMP                   [ OK ]
TEST: Nexthop buckets updated after replace - nECMP                 [ OK ]

Tests passed:   8
Tests failed:   0

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Co-developed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_nexthops.sh | 517 ++++++++++++++++++++++++++++
 1 file changed, 517 insertions(+)

diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index c840aa88ff18..56dd0c6f2e96 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -22,26 +22,33 @@ ksft_skip=4
 IPV4_TESTS="
 	ipv4_fcnal
 	ipv4_grp_fcnal
+	ipv4_res_grp_fcnal
 	ipv4_withv6_fcnal
 	ipv4_fcnal_runtime
 	ipv4_large_grp
+	ipv4_large_res_grp
 	ipv4_compat_mode
 	ipv4_fdb_grp_fcnal
 	ipv4_torture
+	ipv4_res_torture
 "
 
 IPV6_TESTS="
 	ipv6_fcnal
 	ipv6_grp_fcnal
+	ipv6_res_grp_fcnal
 	ipv6_fcnal_runtime
 	ipv6_large_grp
+	ipv6_large_res_grp
 	ipv6_compat_mode
 	ipv6_fdb_grp_fcnal
 	ipv6_torture
+	ipv6_res_torture
 "
 
 ALL_TESTS="
 	basic
+	basic_res
 	${IPV4_TESTS}
 	${IPV6_TESTS}
 "
@@ -254,6 +261,19 @@ check_nexthop()
 	check_output "${out}" "${expected}"
 }
 
+check_nexthop_bucket()
+{
+	local nharg="$1"
+	local expected="$2"
+	local out
+
+	# remove the idle time since we cannot match it
+	out=$($IP nexthop bucket ${nharg} \
+		| sed s/idle_time\ [0-9.]*\ // 2>/dev/null)
+
+	check_output "${out}" "${expected}"
+}
+
 check_route()
 {
 	local pfx="$1"
@@ -330,6 +350,25 @@ check_large_grp()
 	log_test $? 0 "Dump large (x$ecmp) ecmp groups"
 }
 
+check_large_res_grp()
+{
+	local ipv=$1
+	local buckets=$2
+	local ipstr=""
+
+	if [ $ipv -eq 4 ]; then
+		ipstr="172.16.1.2"
+	else
+		ipstr="2001:db8:91::2"
+	fi
+
+	# create a resilient group with $buckets buckets and dump them
+	run_cmd "$IP nexthop add id 100 via $ipstr dev veth1"
+	run_cmd "$IP nexthop add id 1000 group 100 type resilient buckets $buckets"
+	run_cmd "$IP nexthop bucket list"
+	log_test $? 0 "Dump large (x$buckets) nexthop buckets"
+}
+
 start_ip_monitor()
 {
 	local mtype=$1
@@ -366,6 +405,15 @@ check_nexthop_fdb_support()
 	fi
 }
 
+check_nexthop_res_support()
+{
+	$IP nexthop help 2>&1 | grep -q resilient
+	if [ $? -ne 0 ]; then
+		echo "SKIP: iproute2 too old, missing resilient nexthop group support"
+		return $ksft_skip
+	fi
+}
+
 ipv6_fdb_grp_fcnal()
 {
 	local rc
@@ -688,6 +736,70 @@ ipv6_grp_fcnal()
 	log_test $? 2 "Nexthop group can not have a blackhole and another nexthop"
 }
 
+ipv6_res_grp_fcnal()
+{
+	local rc
+
+	echo
+	echo "IPv6 resilient groups functional"
+	echo "--------------------------------"
+
+	check_nexthop_res_support
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	#
+	# migration of nexthop buckets - equal weights
+	#
+	run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+	run_cmd "$IP nexthop add id 102 group 62/63 type resilient buckets 2 idle_timer 0"
+
+	run_cmd "$IP nexthop del id 63"
+	check_nexthop "id 102" \
+		"id 102 group 62 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+	log_test $? 0 "Nexthop group updated when entry is deleted"
+	check_nexthop_bucket "list id 102" \
+		"id 102 index 0 nhid 62 id 102 index 1 nhid 62"
+	log_test $? 0 "Nexthop buckets updated when entry is deleted"
+
+	run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+	run_cmd "$IP nexthop replace id 102 group 62/63 type resilient buckets 2 idle_timer 0"
+	check_nexthop "id 102" \
+		"id 102 group 62/63 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+	log_test $? 0 "Nexthop group updated after replace"
+	check_nexthop_bucket "list id 102" \
+		"id 102 index 0 nhid 63 id 102 index 1 nhid 62"
+	log_test $? 0 "Nexthop buckets updated after replace"
+
+	$IP nexthop flush >/dev/null 2>&1
+
+	#
+	# migration of nexthop buckets - unequal weights
+	#
+	run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+	run_cmd "$IP nexthop add id 102 group 62,3/63,1 type resilient buckets 4 idle_timer 0"
+
+	run_cmd "$IP nexthop del id 63"
+	check_nexthop "id 102" \
+		"id 102 group 62,3 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+	log_test $? 0 "Nexthop group updated when entry is deleted - nECMP"
+	check_nexthop_bucket "list id 102" \
+		"id 102 index 0 nhid 62 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62"
+	log_test $? 0 "Nexthop buckets updated when entry is deleted - nECMP"
+
+	run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+	run_cmd "$IP nexthop replace id 102 group 62,3/63,1 type resilient buckets 4 idle_timer 0"
+	check_nexthop "id 102" \
+		"id 102 group 62,3/63 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+	log_test $? 0 "Nexthop group updated after replace - nECMP"
+	check_nexthop_bucket "list id 102" \
+		"id 102 index 0 nhid 63 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62"
+	log_test $? 0 "Nexthop buckets updated after replace - nECMP"
+}
+
 ipv6_fcnal_runtime()
 {
 	local rc
@@ -846,6 +958,22 @@ ipv6_large_grp()
 	$IP nexthop flush >/dev/null 2>&1
 }
 
+ipv6_large_res_grp()
+{
+	echo
+	echo "IPv6 large resilient group (128k buckets)"
+	echo "-----------------------------------------"
+
+	check_nexthop_res_support
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	check_large_res_grp 6 $((128 * 1024))
+
+	$IP nexthop flush >/dev/null 2>&1
+}
+
 ipv6_del_add_loop1()
 {
 	while :; do
@@ -902,6 +1030,61 @@ ipv6_torture()
 	log_test 0 0 "IPv6 torture test"
 }
 
+ipv6_res_grp_replace_loop()
+{
+	while :; do
+		$IP nexthop replace id 102 group 100/101 type resilient
+	done >/dev/null 2>&1
+}
+
+ipv6_res_torture()
+{
+	local pid1
+	local pid2
+	local pid3
+	local pid4
+	local pid5
+
+	echo
+	echo "IPv6 runtime resilient nexthop group torture"
+	echo "--------------------------------------------"
+
+	check_nexthop_res_support
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	if [ ! -x "$(command -v mausezahn)" ]; then
+		echo "SKIP: Could not run test; need mausezahn tool"
+		return
+	fi
+
+	run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth3"
+	run_cmd "$IP nexthop add id 102 group 100/101 type resilient buckets 512 idle_timer 0"
+	run_cmd "$IP route add 2001:db8:101::1 nhid 102"
+	run_cmd "$IP route add 2001:db8:101::2 nhid 102"
+
+	ipv6_del_add_loop1 &
+	pid1=$!
+	ipv6_res_grp_replace_loop &
+	pid2=$!
+	ip netns exec me ping -f 2001:db8:101::1 >/dev/null 2>&1 &
+	pid3=$!
+	ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 &
+	pid4=$!
+	ip netns exec me mausezahn -6 veth1 \
+			    -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 \
+			    -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+	pid5=$!
+
+	sleep 300
+	kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+	wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
+
+	# if we did not crash, success
+	log_test 0 0 "IPv6 resilient nexthop group torture test"
+}
 
 ipv4_fcnal()
 {
@@ -1061,6 +1244,70 @@ ipv4_grp_fcnal()
 	log_test $? 2 "Nexthop group can not have a blackhole and another nexthop"
 }
 
+ipv4_res_grp_fcnal()
+{
+	local rc
+
+	echo
+	echo "IPv4 resilient groups functional"
+	echo "--------------------------------"
+
+	check_nexthop_res_support
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	#
+	# migration of nexthop buckets - equal weights
+	#
+	run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1"
+	run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+	run_cmd "$IP nexthop add id 102 group 12/13 type resilient buckets 2 idle_timer 0"
+
+	run_cmd "$IP nexthop del id 13"
+	check_nexthop "id 102" \
+		"id 102 group 12 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+	log_test $? 0 "Nexthop group updated when entry is deleted"
+	check_nexthop_bucket "list id 102" \
+		"id 102 index 0 nhid 12 id 102 index 1 nhid 12"
+	log_test $? 0 "Nexthop buckets updated when entry is deleted"
+
+	run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+	run_cmd "$IP nexthop replace id 102 group 12/13 type resilient buckets 2 idle_timer 0"
+	check_nexthop "id 102" \
+		"id 102 group 12/13 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+	log_test $? 0 "Nexthop group updated after replace"
+	check_nexthop_bucket "list id 102" \
+		"id 102 index 0 nhid 13 id 102 index 1 nhid 12"
+	log_test $? 0 "Nexthop buckets updated after replace"
+
+	$IP nexthop flush >/dev/null 2>&1
+
+	#
+	# migration of nexthop buckets - unequal weights
+	#
+	run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1"
+	run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+	run_cmd "$IP nexthop add id 102 group 12,3/13,1 type resilient buckets 4 idle_timer 0"
+
+	run_cmd "$IP nexthop del id 13"
+	check_nexthop "id 102" \
+		"id 102 group 12,3 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+	log_test $? 0 "Nexthop group updated when entry is deleted - nECMP"
+	check_nexthop_bucket "list id 102" \
+		"id 102 index 0 nhid 12 id 102 index 1 nhid 12 id 102 index 2 nhid 12 id 102 index 3 nhid 12"
+	log_test $? 0 "Nexthop buckets updated when entry is deleted - nECMP"
+
+	run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+	run_cmd "$IP nexthop replace id 102 group 12,3/13,1 type resilient buckets 4 idle_timer 0"
+	check_nexthop "id 102" \
+		"id 102 group 12,3/13 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+	log_test $? 0 "Nexthop group updated after replace - nECMP"
+	check_nexthop_bucket "list id 102" \
+		"id 102 index 0 nhid 13 id 102 index 1 nhid 12 id 102 index 2 nhid 12 id 102 index 3 nhid 12"
+	log_test $? 0 "Nexthop buckets updated after replace - nECMP"
+}
+
 ipv4_withv6_fcnal()
 {
 	local lladdr
@@ -1282,6 +1529,22 @@ ipv4_large_grp()
 	$IP nexthop flush >/dev/null 2>&1
 }
 
+ipv4_large_res_grp()
+{
+	echo
+	echo "IPv4 large resilient group (128k buckets)"
+	echo "-----------------------------------------"
+
+	check_nexthop_res_support
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	check_large_res_grp 4 $((128 * 1024))
+
+	$IP nexthop flush >/dev/null 2>&1
+}
+
 sysctl_nexthop_compat_mode_check()
 {
 	local sysctlname="net.ipv4.nexthop_compat_mode"
@@ -1505,6 +1768,62 @@ ipv4_torture()
 	log_test 0 0 "IPv4 torture test"
 }
 
+ipv4_res_grp_replace_loop()
+{
+	while :; do
+		$IP nexthop replace id 102 group 100/101 type resilient
+	done >/dev/null 2>&1
+}
+
+ipv4_res_torture()
+{
+	local pid1
+	local pid2
+	local pid3
+	local pid4
+	local pid5
+
+	echo
+	echo "IPv4 runtime resilient nexthop group torture"
+	echo "--------------------------------------------"
+
+	check_nexthop_res_support
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	if [ ! -x "$(command -v mausezahn)" ]; then
+		echo "SKIP: Could not run test; need mausezahn tool"
+		return
+	fi
+
+	run_cmd "$IP nexthop add id 100 via 172.16.1.2 dev veth1"
+	run_cmd "$IP nexthop add id 101 via 172.16.2.2 dev veth3"
+	run_cmd "$IP nexthop add id 102 group 100/101 type resilient buckets 512 idle_timer 0"
+	run_cmd "$IP route add 172.16.101.1 nhid 102"
+	run_cmd "$IP route add 172.16.101.2 nhid 102"
+
+	ipv4_del_add_loop1 &
+	pid1=$!
+	ipv4_res_grp_replace_loop &
+	pid2=$!
+	ip netns exec me ping -f 172.16.101.1 >/dev/null 2>&1 &
+	pid3=$!
+	ip netns exec me ping -f 172.16.101.2 >/dev/null 2>&1 &
+	pid4=$!
+	ip netns exec me mausezahn veth1 \
+				-B 172.16.101.2 -A 172.16.1.1 -c 0 \
+				-t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+	pid5=$!
+
+	sleep 300
+	kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+	wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
+
+	# if we did not crash, success
+	log_test 0 0 "IPv4 resilient nexthop group torture test"
+}
+
 basic()
 {
 	echo
@@ -1616,6 +1935,204 @@ basic()
 	$IP nexthop flush >/dev/null 2>&1
 }
 
+check_nexthop_buckets_balance()
+{
+	local nharg=$1; shift
+	local ret
+
+	while (($# > 0)); do
+		local selector=$1; shift
+		local condition=$1; shift
+		local count
+
+		count=$($IP -j nexthop bucket ${nharg} ${selector} | jq length)
+		(( $count $condition ))
+		ret=$?
+		if ((ret != 0)); then
+			return $ret
+		fi
+	done
+
+	return 0
+}
+
+basic_res()
+{
+	echo
+	echo "Basic resilient nexthop group functional tests"
+	echo "----------------------------------------------"
+
+	check_nexthop_res_support
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	run_cmd "$IP nexthop add id 1 dev veth1"
+
+	#
+	# resilient nexthop group addition
+	#
+
+	run_cmd "$IP nexthop add id 101 group 1 type resilient buckets 8"
+	log_test $? 0 "Add a nexthop group with default parameters"
+
+	run_cmd "$IP nexthop get id 101"
+	check_nexthop "id 101" \
+		"id 101 group 1 type resilient buckets 8 idle_timer 120 unbalanced_timer 0 unbalanced_time 0"
+	log_test $? 0 "Get a nexthop group with default parameters"
+
+	run_cmd "$IP nexthop add id 102 group 1 type resilient
+			buckets 4 idle_timer 100 unbalanced_timer 5"
+	run_cmd "$IP nexthop get id 102"
+	check_nexthop "id 102" \
+		"id 102 group 1 type resilient buckets 4 idle_timer 100 unbalanced_timer 5 unbalanced_time 0"
+	log_test $? 0 "Get a nexthop group with non-default parameters"
+
+	run_cmd "$IP nexthop add id 103 group 1 type resilient buckets 0"
+	log_test $? 2 "Add a nexthop group with 0 buckets"
+
+	#
+	# resilient nexthop group replacement
+	#
+
+	run_cmd "$IP nexthop replace id 101 group 1 type resilient
+			buckets 8 idle_timer 240 unbalanced_timer 80"
+	log_test $? 0 "Replace nexthop group parameters"
+	check_nexthop "id 101" \
+		"id 101 group 1 type resilient buckets 8 idle_timer 240 unbalanced_timer 80 unbalanced_time 0"
+	log_test $? 0 "Get a nexthop group after replacing parameters"
+
+	run_cmd "$IP nexthop replace id 101 group 1 type resilient idle_timer 512"
+	log_test $? 0 "Replace idle timer"
+	check_nexthop "id 101" \
+		"id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 80 unbalanced_time 0"
+	log_test $? 0 "Get a nexthop group after replacing idle timer"
+
+	run_cmd "$IP nexthop replace id 101 group 1 type resilient unbalanced_timer 256"
+	log_test $? 0 "Replace unbalanced timer"
+	check_nexthop "id 101" \
+		"id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0"
+	log_test $? 0 "Get a nexthop group after replacing unbalanced timer"
+
+	run_cmd "$IP nexthop replace id 101 group 1 type resilient"
+	log_test $? 0 "Replace with no parameters"
+	check_nexthop "id 101" \
+		"id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0"
+	log_test $? 0 "Get a nexthop group after replacing no parameters"
+
+	run_cmd "$IP nexthop replace id 101 group 1"
+	log_test $? 2 "Replace nexthop group type - implicit"
+
+	run_cmd "$IP nexthop replace id 101 group 1 type mpath"
+	log_test $? 2 "Replace nexthop group type - explicit"
+
+	run_cmd "$IP nexthop replace id 101 group 1 type resilient buckets 1024"
+	log_test $? 2 "Replace number of nexthop buckets"
+
+	check_nexthop "id 101" \
+		"id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0"
+	log_test $? 0 "Get a nexthop group after replacing with invalid parameters"
+
+	#
+	# resilient nexthop buckets dump
+	#
+
+	$IP nexthop flush >/dev/null 2>&1
+	run_cmd "$IP nexthop add id 1 dev veth1"
+	run_cmd "$IP nexthop add id 2 dev veth3"
+	run_cmd "$IP nexthop add id 101 group 1/2 type resilient buckets 4"
+	run_cmd "$IP nexthop add id 201 group 1/2"
+
+	check_nexthop_bucket "" \
+		"id 101 index 0 nhid 2 id 101 index 1 nhid 2 id 101 index 2 nhid 1 id 101 index 3 nhid 1"
+	log_test $? 0 "Dump all nexthop buckets"
+
+	check_nexthop_bucket "list id 101" \
+		"id 101 index 0 nhid 2 id 101 index 1 nhid 2 id 101 index 2 nhid 1 id 101 index 3 nhid 1"
+	log_test $? 0 "Dump all nexthop buckets in a group"
+
+	(( $($IP -j nexthop bucket list id 101 |
+	     jq '[.[] | select(.bucket.idle_time > 0 and
+	                       .bucket.idle_time < 2)] | length') == 4 ))
+	log_test $? 0 "All nexthop buckets report a positive near-zero idle time"
+
+	check_nexthop_bucket "list dev veth1" \
+		"id 101 index 2 nhid 1 id 101 index 3 nhid 1"
+	log_test $? 0 "Dump all nexthop buckets with a specific nexthop device"
+
+	check_nexthop_bucket "list nhid 2" \
+		"id 101 index 0 nhid 2 id 101 index 1 nhid 2"
+	log_test $? 0 "Dump all nexthop buckets with a specific nexthop identifier"
+
+	run_cmd "$IP nexthop bucket list id 111"
+	log_test $? 2 "Dump all nexthop buckets in a non-existent group"
+
+	run_cmd "$IP nexthop bucket list id 201"
+	log_test $? 2 "Dump all nexthop buckets in a non-resilient group"
+
+	run_cmd "$IP nexthop bucket list dev bla"
+	log_test $? 255 "Dump all nexthop buckets using a non-existent device"
+
+	run_cmd "$IP nexthop bucket list groups"
+	log_test $? 255 "Dump all nexthop buckets with invalid 'groups' keyword"
+
+	run_cmd "$IP nexthop bucket list fdb"
+	log_test $? 255 "Dump all nexthop buckets with invalid 'fdb' keyword"
+
+	#
+	# resilient nexthop buckets get requests
+	#
+
+	check_nexthop_bucket "get id 101 index 0" "id 101 index 0 nhid 2"
+	log_test $? 0 "Get a valid nexthop bucket"
+
+	run_cmd "$IP nexthop bucket get id 101 index 999"
+	log_test $? 2 "Get a nexthop bucket with valid group, but invalid index"
+
+	run_cmd "$IP nexthop bucket get id 201 index 0"
+	log_test $? 2 "Get a nexthop bucket from a non-resilient group"
+
+	run_cmd "$IP nexthop bucket get id 999 index 0"
+	log_test $? 2 "Get a nexthop bucket from a non-existent group"
+
+	#
+	# tests for bucket migration
+	#
+
+	$IP nexthop flush >/dev/null 2>&1
+
+	run_cmd "$IP nexthop add id 1 dev veth1"
+	run_cmd "$IP nexthop add id 2 dev veth3"
+	run_cmd "$IP nexthop add id 101
+			group 1/2 type resilient buckets 10
+			idle_timer 1 unbalanced_timer 20"
+
+	check_nexthop_buckets_balance "list id 101" \
+				      "nhid 1" "== 5" \
+				      "nhid 2" "== 5"
+	log_test $? 0 "Initial bucket allocation"
+
+	run_cmd "$IP nexthop replace id 101
+			group 1,2/2,3 type resilient"
+	check_nexthop_buckets_balance "list id 101" \
+				      "nhid 1" "== 4" \
+				      "nhid 2" "== 6"
+	log_test $? 0 "Bucket allocation after replace"
+
+	# Check that increase in idle timer does not make buckets appear busy.
+	run_cmd "$IP nexthop replace id 101
+			group 1,2/2,3 type resilient
+			idle_timer 10"
+	run_cmd "$IP nexthop replace id 101
+			group 1/2 type resilient"
+	check_nexthop_buckets_balance "list id 101" \
+				      "nhid 1" "== 5" \
+				      "nhid 2" "== 5"
+	log_test $? 0 "Buckets migrated after idle timer change"
+
+	$IP nexthop flush >/dev/null 2>&1
+}
+
 ################################################################################
 # usage
 
-- 
cgit v1.2.3


From 386e3792b52a4d815aefb30d8c049484dce7bdd2 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Fri, 12 Mar 2021 17:50:24 +0100
Subject: selftests: forwarding: Add resilient hashing test

Verify that IPv4 and IPv6 multipath forwarding works correctly with
resilient nexthop groups and with different weights.

Test that when the idle timer is not zero, the resilient groups are not
rebalanced - because the nexthop buckets are considered active - and the
initial weights (1:1) are used.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/forwarding/router_mpath_nh_res.sh          | 400 +++++++++++++++++++++
 1 file changed, 400 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh

diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
new file mode 100755
index 000000000000..4898dd4118f1
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
@@ -0,0 +1,400 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	multipath_test
+"
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+	vrf_create "vrf-h1"
+	ip link set dev $h1 master vrf-h1
+
+	ip link set dev vrf-h1 up
+	ip link set dev $h1 up
+
+	ip address add 192.0.2.2/24 dev $h1
+	ip address add 2001:db8:1::2/64 dev $h1
+
+	ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+	ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+	ip route del 2001:db8:2::/64 vrf vrf-h1
+	ip route del 198.51.100.0/24 vrf vrf-h1
+
+	ip address del 2001:db8:1::2/64 dev $h1
+	ip address del 192.0.2.2/24 dev $h1
+
+	ip link set dev $h1 down
+	vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+	vrf_create "vrf-h2"
+	ip link set dev $h2 master vrf-h2
+
+	ip link set dev vrf-h2 up
+	ip link set dev $h2 up
+
+	ip address add 198.51.100.2/24 dev $h2
+	ip address add 2001:db8:2::2/64 dev $h2
+
+	ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+	ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip route del 2001:db8:1::/64 vrf vrf-h2
+	ip route del 192.0.2.0/24 vrf vrf-h2
+
+	ip address del 2001:db8:2::2/64 dev $h2
+	ip address del 198.51.100.2/24 dev $h2
+
+	ip link set dev $h2 down
+	vrf_destroy "vrf-h2"
+}
+
+router1_create()
+{
+	vrf_create "vrf-r1"
+	ip link set dev $rp11 master vrf-r1
+	ip link set dev $rp12 master vrf-r1
+	ip link set dev $rp13 master vrf-r1
+
+	ip link set dev vrf-r1 up
+	ip link set dev $rp11 up
+	ip link set dev $rp12 up
+	ip link set dev $rp13 up
+
+	ip address add 192.0.2.1/24 dev $rp11
+	ip address add 2001:db8:1::1/64 dev $rp11
+
+	ip address add 169.254.2.12/24 dev $rp12
+	ip address add fe80:2::12/64 dev $rp12
+
+	ip address add 169.254.3.13/24 dev $rp13
+	ip address add fe80:3::13/64 dev $rp13
+}
+
+router1_destroy()
+{
+	ip route del 2001:db8:2::/64 vrf vrf-r1
+	ip route del 198.51.100.0/24 vrf vrf-r1
+
+	ip address del fe80:3::13/64 dev $rp13
+	ip address del 169.254.3.13/24 dev $rp13
+
+	ip address del fe80:2::12/64 dev $rp12
+	ip address del 169.254.2.12/24 dev $rp12
+
+	ip address del 2001:db8:1::1/64 dev $rp11
+	ip address del 192.0.2.1/24 dev $rp11
+
+	ip nexthop del id 103
+	ip nexthop del id 101
+	ip nexthop del id 102
+	ip nexthop del id 106
+	ip nexthop del id 104
+	ip nexthop del id 105
+
+	ip link set dev $rp13 down
+	ip link set dev $rp12 down
+	ip link set dev $rp11 down
+
+	vrf_destroy "vrf-r1"
+}
+
+router2_create()
+{
+	vrf_create "vrf-r2"
+	ip link set dev $rp21 master vrf-r2
+	ip link set dev $rp22 master vrf-r2
+	ip link set dev $rp23 master vrf-r2
+
+	ip link set dev vrf-r2 up
+	ip link set dev $rp21 up
+	ip link set dev $rp22 up
+	ip link set dev $rp23 up
+
+	ip address add 198.51.100.1/24 dev $rp21
+	ip address add 2001:db8:2::1/64 dev $rp21
+
+	ip address add 169.254.2.22/24 dev $rp22
+	ip address add fe80:2::22/64 dev $rp22
+
+	ip address add 169.254.3.23/24 dev $rp23
+	ip address add fe80:3::23/64 dev $rp23
+}
+
+router2_destroy()
+{
+	ip route del 2001:db8:1::/64 vrf vrf-r2
+	ip route del 192.0.2.0/24 vrf vrf-r2
+
+	ip address del fe80:3::23/64 dev $rp23
+	ip address del 169.254.3.23/24 dev $rp23
+
+	ip address del fe80:2::22/64 dev $rp22
+	ip address del 169.254.2.22/24 dev $rp22
+
+	ip address del 2001:db8:2::1/64 dev $rp21
+	ip address del 198.51.100.1/24 dev $rp21
+
+	ip nexthop del id 201
+	ip nexthop del id 202
+	ip nexthop del id 204
+	ip nexthop del id 205
+
+	ip link set dev $rp23 down
+	ip link set dev $rp22 down
+	ip link set dev $rp21 down
+
+	vrf_destroy "vrf-r2"
+}
+
+routing_nh_obj()
+{
+	ip nexthop add id 101 via 169.254.2.22 dev $rp12
+	ip nexthop add id 102 via 169.254.3.23 dev $rp13
+	ip nexthop add id 103 group 101/102 type resilient buckets 512 \
+		idle_timer 0
+	ip route add 198.51.100.0/24 vrf vrf-r1 nhid 103
+
+	ip nexthop add id 104 via fe80:2::22 dev $rp12
+	ip nexthop add id 105 via fe80:3::23 dev $rp13
+	ip nexthop add id 106 group 104/105 type resilient buckets 512 \
+		idle_timer 0
+	ip route add 2001:db8:2::/64 vrf vrf-r1 nhid 106
+
+	ip nexthop add id 201 via 169.254.2.12 dev $rp22
+	ip nexthop add id 202 via 169.254.3.13 dev $rp23
+	ip nexthop add id 203 group 201/202 type resilient buckets 512 \
+		idle_timer 0
+	ip route add 192.0.2.0/24 vrf vrf-r2 nhid 203
+
+	ip nexthop add id 204 via fe80:2::12 dev $rp22
+	ip nexthop add id 205 via fe80:3::13 dev $rp23
+	ip nexthop add id 206 group 204/205 type resilient buckets 512 \
+		idle_timer 0
+	ip route add 2001:db8:1::/64 vrf vrf-r2 nhid 206
+}
+
+multipath4_test()
+{
+	local desc="$1"
+	local weight_rp12=$2
+	local weight_rp13=$3
+	local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+	local packets_rp12 packets_rp13
+
+	# Transmit multiple flows from h1 to h2 and make sure they are
+	# distributed between both multipath links (rp12 and rp13)
+	# according to the provided weights.
+	sysctl_set net.ipv4.fib_multipath_hash_policy 1
+
+	t0_rp12=$(link_stats_tx_packets_get $rp12)
+	t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+	ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+		-d 1msec -t udp "sp=1024,dp=0-32768"
+
+	t1_rp12=$(link_stats_tx_packets_get $rp12)
+	t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+	let "packets_rp12 = $t1_rp12 - $t0_rp12"
+	let "packets_rp13 = $t1_rp13 - $t0_rp13"
+	multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+	# Restore settings.
+	sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+multipath6_l4_test()
+{
+	local desc="$1"
+	local weight_rp12=$2
+	local weight_rp13=$3
+	local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+	local packets_rp12 packets_rp13
+
+	# Transmit multiple flows from h1 to h2 and make sure they are
+	# distributed between both multipath links (rp12 and rp13)
+	# according to the provided weights.
+	sysctl_set net.ipv6.fib_multipath_hash_policy 1
+
+	t0_rp12=$(link_stats_tx_packets_get $rp12)
+	t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+	$MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+		-d 1msec -t udp "sp=1024,dp=0-32768"
+
+	t1_rp12=$(link_stats_tx_packets_get $rp12)
+	t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+	let "packets_rp12 = $t1_rp12 - $t0_rp12"
+	let "packets_rp13 = $t1_rp13 - $t0_rp13"
+	multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+	sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+multipath_test()
+{
+	# Without an idle timer, weight replacement should happen immediately.
+	log_info "Running multipath tests without an idle timer"
+	ip nexthop replace id 103 group 101/102 type resilient idle_timer 0
+	ip nexthop replace id 106 group 104/105 type resilient idle_timer 0
+
+	log_info "Running IPv4 multipath tests"
+	ip nexthop replace id 103 group 101,1/102,1 type resilient
+	multipath4_test "ECMP" 1 1
+	ip nexthop replace id 103 group 101,2/102,1 type resilient
+	multipath4_test "Weighted MP 2:1" 2 1
+	ip nexthop replace id 103 group 101,11/102,45 type resilient
+	multipath4_test "Weighted MP 11:45" 11 45
+
+	ip nexthop replace id 103 group 101,1/102,1 type resilient
+
+	log_info "Running IPv6 L4 hash multipath tests"
+	ip nexthop replace id 106 group 104,1/105,1 type resilient
+	multipath6_l4_test "ECMP" 1 1
+	ip nexthop replace id 106 group 104,2/105,1 type resilient
+	multipath6_l4_test "Weighted MP 2:1" 2 1
+	ip nexthop replace id 106 group 104,11/105,45 type resilient
+	multipath6_l4_test "Weighted MP 11:45" 11 45
+
+	ip nexthop replace id 106 group 104,1/105,1 type resilient
+
+	# With an idle timer, weight replacement should not happen, so the
+	# expected ratio should always be the initial one (1:1).
+	log_info "Running multipath tests with an idle timer of 120 seconds"
+	ip nexthop replace id 103 group 101/102 type resilient idle_timer 120
+	ip nexthop replace id 106 group 104/105 type resilient idle_timer 120
+
+	log_info "Running IPv4 multipath tests"
+	ip nexthop replace id 103 group 101,1/102,1 type resilient
+	multipath4_test "ECMP" 1 1
+	ip nexthop replace id 103 group 101,2/102,1 type resilient
+	multipath4_test "Weighted MP 2:1" 1 1
+	ip nexthop replace id 103 group 101,11/102,45 type resilient
+	multipath4_test "Weighted MP 11:45" 1 1
+
+	ip nexthop replace id 103 group 101,1/102,1 type resilient
+
+	log_info "Running IPv6 L4 hash multipath tests"
+	ip nexthop replace id 106 group 104,1/105,1 type resilient
+	multipath6_l4_test "ECMP" 1 1
+	ip nexthop replace id 106 group 104,2/105,1 type resilient
+	multipath6_l4_test "Weighted MP 2:1" 1 1
+	ip nexthop replace id 106 group 104,11/105,45 type resilient
+	multipath6_l4_test "Weighted MP 11:45" 1 1
+
+	ip nexthop replace id 106 group 104,1/105,1 type resilient
+
+	# With a short idle timer and enough idle time, weight replacement
+	# should happen.
+	log_info "Running multipath tests with an idle timer of 5 seconds"
+	ip nexthop replace id 103 group 101/102 type resilient idle_timer 5
+	ip nexthop replace id 106 group 104/105 type resilient idle_timer 5
+
+	log_info "Running IPv4 multipath tests"
+	sleep 10
+	ip nexthop replace id 103 group 101,1/102,1 type resilient
+	multipath4_test "ECMP" 1 1
+	sleep 10
+	ip nexthop replace id 103 group 101,2/102,1 type resilient
+	multipath4_test "Weighted MP 2:1" 2 1
+	sleep 10
+	ip nexthop replace id 103 group 101,11/102,45 type resilient
+	multipath4_test "Weighted MP 11:45" 11 45
+
+	ip nexthop replace id 103 group 101,1/102,1 type resilient
+
+	log_info "Running IPv6 L4 hash multipath tests"
+	sleep 10
+	ip nexthop replace id 106 group 104,1/105,1 type resilient
+	multipath6_l4_test "ECMP" 1 1
+	sleep 10
+	ip nexthop replace id 106 group 104,2/105,1 type resilient
+	multipath6_l4_test "Weighted MP 2:1" 2 1
+	sleep 10
+	ip nexthop replace id 106 group 104,11/105,45 type resilient
+	multipath6_l4_test "Weighted MP 11:45" 11 45
+
+	ip nexthop replace id 106 group 104,1/105,1 type resilient
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp11=${NETIFS[p2]}
+
+	rp12=${NETIFS[p3]}
+	rp22=${NETIFS[p4]}
+
+	rp13=${NETIFS[p5]}
+	rp23=${NETIFS[p6]}
+
+	rp21=${NETIFS[p7]}
+	h2=${NETIFS[p8]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router1_create
+	router2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router2_destroy
+	router1_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
+ip nexthop ls >/dev/null 2>&1
+if [ $? -ne 0 ]; then
+	echo "Nexthop objects not supported; skipping tests"
+	exit 0
+fi
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+routing_nh_obj
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3


From 902280cacc0367dcaa8be8261e02ead932a1488d Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Fri, 12 Mar 2021 17:50:25 +0100
Subject: selftests: forwarding: Add resilient multipath tunneling nexthop test

Add a resilient nexthop objects version of gre_multipath_nh.sh. Test
that both IPv4 and IPv6 overlays work with resilient nexthop groups
where the nexthops are two GRE tunnels.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/forwarding/gre_multipath_nh_res.sh         | 361 +++++++++++++++++++++
 1 file changed, 361 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh

diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh
new file mode 100755
index 000000000000..088b65e64d66
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh
@@ -0,0 +1,361 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test traffic distribution when a wECMP route forwards traffic to two GRE
+# tunnels.
+#
+# +-------------------------+
+# | H1                      |
+# |               $h1 +     |
+# |      192.0.2.1/28 |     |
+# |  2001:db8:1::1/64 |     |
+# +-------------------|-----+
+#                     |
+# +-------------------|------------------------+
+# | SW1               |                        |
+# |              $ol1 +                        |
+# |      192.0.2.2/28                          |
+# |  2001:db8:1::2/64                          |
+# |                                            |
+# |  + g1a (gre)          + g1b (gre)          |
+# |    loc=192.0.2.65       loc=192.0.2.81     |
+# |    rem=192.0.2.66 --.   rem=192.0.2.82 --. |
+# |    tos=inherit      |   tos=inherit      | |
+# |  .------------------'                    | |
+# |  |                    .------------------' |
+# |  v                    v                    |
+# |  + $ul1.111 (vlan)    + $ul1.222 (vlan)    |
+# |  | 192.0.2.129/28     | 192.0.2.145/28     |
+# |   \                  /                     |
+# |    \________________/                      |
+# |            |                               |
+# |            + $ul1                          |
+# +------------|-------------------------------+
+#              |
+# +------------|-------------------------------+
+# | SW2        + $ul2                          |
+# |     _______|________                       |
+# |    /                \                      |
+# |   /                  \                     |
+# |  + $ul2.111 (vlan)    + $ul2.222 (vlan)    |
+# |  ^ 192.0.2.130/28     ^ 192.0.2.146/28     |
+# |  |                    |                    |
+# |  |                    '------------------. |
+# |  '------------------.                    | |
+# |  + g2a (gre)        | + g2b (gre)        | |
+# |    loc=192.0.2.66   |   loc=192.0.2.82   | |
+# |    rem=192.0.2.65 --'   rem=192.0.2.81 --' |
+# |    tos=inherit          tos=inherit        |
+# |                                            |
+# |              $ol2 +                        |
+# |     192.0.2.17/28 |                        |
+# |  2001:db8:2::1/64 |                        |
+# +-------------------|------------------------+
+#                     |
+# +-------------------|-----+
+# | H2                |     |
+# |               $h2 +     |
+# |     192.0.2.18/28       |
+# |  2001:db8:2::2/64       |
+# +-------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	multipath_ipv4
+	multipath_ipv6
+	multipath_ipv6_l4
+"
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+	ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2
+	ip route add vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+	ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+sw1_create()
+{
+	simple_if_init $ol1 192.0.2.2/28 2001:db8:1::2/64
+	__simple_if_init $ul1 v$ol1
+	vlan_create $ul1 111 v$ol1 192.0.2.129/28
+	vlan_create $ul1 222 v$ol1 192.0.2.145/28
+
+	tunnel_create g1a gre 192.0.2.65 192.0.2.66 tos inherit dev v$ol1
+	__simple_if_init g1a v$ol1 192.0.2.65/32
+	ip route add vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+
+	tunnel_create g1b gre 192.0.2.81 192.0.2.82 tos inherit dev v$ol1
+	__simple_if_init g1b v$ol1 192.0.2.81/32
+	ip route add vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+
+	ip -6 nexthop add id 101 dev g1a
+	ip -6 nexthop add id 102 dev g1b
+	ip nexthop add id 103 group 101/102 type resilient buckets 512 \
+		idle_timer 0
+
+	ip route add vrf v$ol1 192.0.2.16/28 nhid 103
+	ip route add vrf v$ol1 2001:db8:2::/64 nhid 103
+}
+
+sw1_destroy()
+{
+	ip route del vrf v$ol1 2001:db8:2::/64
+	ip route del vrf v$ol1 192.0.2.16/28
+
+	ip nexthop del id 103
+	ip -6 nexthop del id 102
+	ip -6 nexthop del id 101
+
+	ip route del vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+	__simple_if_fini g1b 192.0.2.81/32
+	tunnel_destroy g1b
+
+	ip route del vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+	__simple_if_fini g1a 192.0.2.65/32
+	tunnel_destroy g1a
+
+	vlan_destroy $ul1 222
+	vlan_destroy $ul1 111
+	__simple_if_fini $ul1
+	simple_if_fini $ol1 192.0.2.2/28 2001:db8:1::2/64
+}
+
+sw2_create()
+{
+	simple_if_init $ol2 192.0.2.17/28 2001:db8:2::1/64
+	__simple_if_init $ul2 v$ol2
+	vlan_create $ul2 111 v$ol2 192.0.2.130/28
+	vlan_create $ul2 222 v$ol2 192.0.2.146/28
+
+	tunnel_create g2a gre 192.0.2.66 192.0.2.65 tos inherit dev v$ol2
+	__simple_if_init g2a v$ol2 192.0.2.66/32
+	ip route add vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+
+	tunnel_create g2b gre 192.0.2.82 192.0.2.81 tos inherit dev v$ol2
+	__simple_if_init g2b v$ol2 192.0.2.82/32
+	ip route add vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+
+	ip -6 nexthop add id 201 dev g2a
+	ip -6 nexthop add id 202 dev g2b
+	ip nexthop add id 203 group 201/202 type resilient buckets 512 \
+		idle_timer 0
+
+	ip route add vrf v$ol2 192.0.2.0/28 nhid 203
+	ip route add vrf v$ol2 2001:db8:1::/64 nhid 203
+
+	tc qdisc add dev $ul2 clsact
+	tc filter add dev $ul2 ingress pref 111 prot 802.1Q \
+	   flower vlan_id 111 action pass
+	tc filter add dev $ul2 ingress pref 222 prot 802.1Q \
+	   flower vlan_id 222 action pass
+}
+
+sw2_destroy()
+{
+	tc qdisc del dev $ul2 clsact
+
+	ip route del vrf v$ol2 2001:db8:1::/64
+	ip route del vrf v$ol2 192.0.2.0/28
+
+	ip nexthop del id 203
+	ip -6 nexthop del id 202
+	ip -6 nexthop del id 201
+
+	ip route del vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+	__simple_if_fini g2b 192.0.2.82/32
+	tunnel_destroy g2b
+
+	ip route del vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+	__simple_if_fini g2a 192.0.2.66/32
+	tunnel_destroy g2a
+
+	vlan_destroy $ul2 222
+	vlan_destroy $ul2 111
+	__simple_if_fini $ul2
+	simple_if_fini $ol2 192.0.2.17/28 2001:db8:2::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.18/28 2001:db8:2::2/64
+	ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17
+	ip route add vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip route del vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+	ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17
+	simple_if_fini $h2 192.0.2.18/28 2001:db8:2::2/64
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	vrf_prepare
+	h1_create
+	sw1_create
+	sw2_create
+	h2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	h2_destroy
+	sw2_destroy
+	sw1_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+multipath4_test()
+{
+	local what=$1; shift
+	local weight1=$1; shift
+	local weight2=$1; shift
+
+	sysctl_set net.ipv4.fib_multipath_hash_policy 1
+	ip nexthop replace id 103 group 101,$weight1/102,$weight2 \
+		type resilient
+
+	local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	ip vrf exec v$h1 \
+	   $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \
+	       -d 1msec -t udp "sp=1024,dp=0-32768"
+
+	local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+	multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+	ip nexthop replace id 103 group 101/102 type resilient
+	sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+multipath6_test()
+{
+	local what=$1; shift
+	local weight1=$1; shift
+	local weight2=$1; shift
+
+	sysctl_set net.ipv6.fib_multipath_hash_policy 0
+	ip nexthop replace id 103 group 101,$weight1/102,$weight2 \
+		type resilient
+
+	local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	# Generate 16384 echo requests, each with a random flow label.
+	for ((i=0; i < 16384; ++i)); do
+		ip vrf exec v$h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
+	done
+
+	local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+	multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+	ip nexthop replace id 103 group 101/102 type resilient
+	sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+multipath6_l4_test()
+{
+	local what=$1; shift
+	local weight1=$1; shift
+	local weight2=$1; shift
+
+	sysctl_set net.ipv6.fib_multipath_hash_policy 1
+	ip nexthop replace id 103 group 101,$weight1/102,$weight2 \
+		type resilient
+
+	local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	ip vrf exec v$h1 \
+		$MZ $h1 -6 -q -p 64 -A 2001:db8:1::1 -B 2001:db8:2::2 \
+		-d 1msec -t udp "sp=1024,dp=0-32768"
+
+	local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+	multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+	ip nexthop replace id 103 group 101/102 type resilient
+	sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.18
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
+multipath_ipv4()
+{
+	log_info "Running IPv4 multipath tests"
+	multipath4_test "ECMP" 1 1
+	multipath4_test "Weighted MP 2:1" 2 1
+	multipath4_test "Weighted MP 11:45" 11 45
+}
+
+multipath_ipv6()
+{
+	log_info "Running IPv6 multipath tests"
+	multipath6_test "ECMP" 1 1
+	multipath6_test "Weighted MP 2:1" 2 1
+	multipath6_test "Weighted MP 11:45" 11 45
+}
+
+multipath_ipv6_l4()
+{
+	log_info "Running IPv6 L4 hash multipath tests"
+	multipath6_l4_test "ECMP" 1 1
+	multipath6_l4_test "Weighted MP 2:1" 2 1
+	multipath6_l4_test "Weighted MP 11:45" 11 45
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3


From b8a07c4cea04c14008f34880424852d38ae03758 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Fri, 12 Mar 2021 17:50:26 +0100
Subject: selftests: netdevsim: Add test for resilient nexthop groups offload
 API

Test various aspects of the resilient nexthop group offload API on top
of the netdevsim implementation. Both good and bad flows are tested.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Co-developed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/netdevsim/nexthop.sh     | 620 +++++++++++++++++++++
 1 file changed, 620 insertions(+)

diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
index be0c1b5ee6b8..ba75c81cda91 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
@@ -11,14 +11,33 @@ ALL_TESTS="
 	nexthop_single_add_err_test
 	nexthop_group_add_test
 	nexthop_group_add_err_test
+	nexthop_res_group_add_test
+	nexthop_res_group_add_err_test
 	nexthop_group_replace_test
 	nexthop_group_replace_err_test
+	nexthop_res_group_replace_test
+	nexthop_res_group_replace_err_test
+	nexthop_res_group_idle_timer_test
+	nexthop_res_group_idle_timer_del_test
+	nexthop_res_group_increase_idle_timer_test
+	nexthop_res_group_decrease_idle_timer_test
+	nexthop_res_group_unbalanced_timer_test
+	nexthop_res_group_unbalanced_timer_del_test
+	nexthop_res_group_no_unbalanced_timer_test
+	nexthop_res_group_short_unbalanced_timer_test
+	nexthop_res_group_increase_unbalanced_timer_test
+	nexthop_res_group_decrease_unbalanced_timer_test
+	nexthop_res_group_force_migrate_busy_test
 	nexthop_single_replace_test
 	nexthop_single_replace_err_test
 	nexthop_single_in_group_replace_test
 	nexthop_single_in_group_replace_err_test
+	nexthop_single_in_res_group_replace_test
+	nexthop_single_in_res_group_replace_err_test
 	nexthop_single_in_group_delete_test
 	nexthop_single_in_group_delete_err_test
+	nexthop_single_in_res_group_delete_test
+	nexthop_single_in_res_group_delete_err_test
 	nexthop_replay_test
 	nexthop_replay_err_test
 "
@@ -27,6 +46,7 @@ DEV_ADDR=1337
 DEV=netdevsim${DEV_ADDR}
 DEVLINK_DEV=netdevsim/${DEV}
 SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
+DEBUGFS_NET_DIR=/sys/kernel/debug/netdevsim/$DEV/
 NUM_NETIFS=0
 source $lib_dir/lib.sh
 source $lib_dir/devlink_lib.sh
@@ -44,6 +64,28 @@ nexthop_check()
 	return 0
 }
 
+nexthop_bucket_nhid_count_check()
+{
+	local group_id=$1; shift
+	local expected
+	local count
+	local nhid
+	local ret
+
+	while (($# > 0)); do
+		nhid=$1; shift
+		expected=$1; shift
+
+		count=$($IP nexthop bucket show id $group_id nhid $nhid |
+			grep "trap" | wc -l)
+		if ((expected != count)); then
+			return 1
+		fi
+	done
+
+	return 0
+}
+
 nexthop_resource_check()
 {
 	local expected_occ=$1; shift
@@ -159,6 +201,71 @@ nexthop_group_add_err_test()
 	nexthop_resource_set 9999
 }
 
+nexthop_res_group_add_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	$IP nexthop add id 10 group 1/2 type resilient buckets 4
+	nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+	check_err $? "Unexpected nexthop group entry"
+
+	nexthop_bucket_nhid_count_check 10 1 2
+	check_err $? "Wrong nexthop buckets count"
+	nexthop_bucket_nhid_count_check 10 2 2
+	check_err $? "Wrong nexthop buckets count"
+
+	nexthop_resource_check 6
+	check_err $? "Wrong nexthop occupancy"
+
+	$IP nexthop del id 10
+	nexthop_resource_check 2
+	check_err $? "Wrong nexthop occupancy after delete"
+
+	$IP nexthop add id 10 group 1,3/2,2 type resilient buckets 5
+	nexthop_check "id 10" "id 10 group 1,3/2,2 type resilient buckets 5 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+	check_err $? "Unexpected weighted nexthop group entry"
+
+	nexthop_bucket_nhid_count_check 10 1 3
+	check_err $? "Wrong nexthop buckets count"
+	nexthop_bucket_nhid_count_check 10 2 2
+	check_err $? "Wrong nexthop buckets count"
+
+	nexthop_resource_check 7
+	check_err $? "Wrong weighted nexthop occupancy"
+
+	$IP nexthop del id 10
+	nexthop_resource_check 2
+	check_err $? "Wrong nexthop occupancy after delete"
+
+	log_test "Resilient nexthop group add and delete"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_add_err_test()
+{
+	RET=0
+
+	nexthop_resource_set 2
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	$IP nexthop add id 10 group 1/2 type resilient buckets 4 &> /dev/null
+	check_fail $? "Nexthop group addition succeeded when should fail"
+
+	nexthop_resource_check 2
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Resilient nexthop group add failure"
+
+	$IP nexthop flush &> /dev/null
+	nexthop_resource_set 9999
+}
+
 nexthop_group_replace_test()
 {
 	RET=0
@@ -206,6 +313,411 @@ nexthop_group_replace_err_test()
 	nexthop_resource_set 9999
 }
 
+nexthop_res_group_replace_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 3 via 192.0.2.4 dev dummy1
+	$IP nexthop add id 10 group 1/2 type resilient buckets 6
+
+	$IP nexthop replace id 10 group 1/2/3 type resilient
+	nexthop_check "id 10" "id 10 group 1/2/3 type resilient buckets 6 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+	check_err $? "Unexpected nexthop group entry"
+
+	nexthop_bucket_nhid_count_check 10 1 2
+	check_err $? "Wrong nexthop buckets count"
+	nexthop_bucket_nhid_count_check 10 2 2
+	check_err $? "Wrong nexthop buckets count"
+	nexthop_bucket_nhid_count_check 10 3 2
+	check_err $? "Wrong nexthop buckets count"
+
+	nexthop_resource_check 9
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Resilient nexthop group replace"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_replace_err_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 3 via 192.0.2.4 dev dummy1
+	$IP nexthop add id 10 group 1/2 type resilient buckets 6
+
+	ip netns exec testns1 \
+		echo 1 > $DEBUGFS_NET_DIR/fib/fail_res_nexthop_group_replace
+	$IP nexthop replace id 10 group 1/2/3 type resilient &> /dev/null
+	check_fail $? "Nexthop group replacement succeeded when should fail"
+
+	nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 6 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+	check_err $? "Unexpected nexthop group entry after failure"
+
+	nexthop_bucket_nhid_count_check 10 1 3
+	check_err $? "Wrong nexthop buckets count"
+	nexthop_bucket_nhid_count_check 10 2 3
+	check_err $? "Wrong nexthop buckets count"
+
+	nexthop_resource_check 9
+	check_err $? "Wrong nexthop occupancy after failure"
+
+	log_test "Resilient nexthop group replace failure"
+
+	$IP nexthop flush &> /dev/null
+	ip netns exec testns1 \
+		echo 0 > $DEBUGFS_NET_DIR/fib/fail_res_nexthop_group_replace
+}
+
+nexthop_res_mark_buckets_busy()
+{
+	local group_id=$1; shift
+	local nhid=$1; shift
+	local count=$1; shift
+	local index
+
+	for index in $($IP -j nexthop bucket show id $group_id nhid $nhid |
+		       jq '.[].bucket.index' | head -n ${count:--0})
+	do
+		echo $group_id $index \
+			> $DEBUGFS_NET_DIR/fib/nexthop_bucket_activity
+	done
+}
+
+nexthop_res_num_nhid_buckets()
+{
+	local group_id=$1; shift
+	local nhid=$1; shift
+
+	$IP -j nexthop bucket show id $group_id nhid $nhid | jq length
+}
+
+nexthop_res_group_idle_timer_test()
+{
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1/2 type resilient buckets 8 idle_timer 4
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1/2,3 type resilient
+
+	nexthop_bucket_nhid_count_check 10  1 4  2 4
+	check_err $? "Group expected to be unbalanced"
+
+	sleep 6
+
+	nexthop_bucket_nhid_count_check 10  1 2  2 6
+	check_err $? "Group expected to be balanced"
+
+	log_test "Bucket migration after idle timer"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_idle_timer_del_test()
+{
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 3 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1,50/2,50/3,1 \
+	    type resilient buckets 8 idle_timer 6
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1,50/2,150/3,1 type resilient
+
+	nexthop_bucket_nhid_count_check 10  1 4  2 4  3 0
+	check_err $? "Group expected to be unbalanced"
+
+	sleep 4
+
+	# Deletion prompts group replacement. Check that the bucket timers
+	# are kept.
+	$IP nexthop delete id 3
+
+	nexthop_bucket_nhid_count_check 10  1 4  2 4
+	check_err $? "Group expected to still be unbalanced"
+
+	sleep 4
+
+	nexthop_bucket_nhid_count_check 10  1 2  2 6
+	check_err $? "Group expected to be balanced"
+
+	log_test "Bucket migration after idle timer (with delete)"
+
+	$IP nexthop flush &> /dev/null
+}
+
+__nexthop_res_group_increase_timer_test()
+{
+	local timer=$1; shift
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1/2 type resilient buckets 8 $timer 4
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1/2,3 type resilient
+
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_fail $? "Group expected to be unbalanced"
+
+	sleep 2
+	$IP nexthop replace id 10 group 1/2,3 type resilient $timer 8
+	sleep 4
+
+	# 6 seconds, past the original timer.
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_fail $? "Group still expected to be unbalanced"
+
+	sleep 4
+
+	# 10 seconds, past the new timer.
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_err $? "Group expected to be balanced"
+
+	log_test "Bucket migration after $timer increase"
+
+	$IP nexthop flush &> /dev/null
+}
+
+__nexthop_res_group_decrease_timer_test()
+{
+	local timer=$1; shift
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1/2 type resilient buckets 8 $timer 8
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1/2,3 type resilient
+
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_fail $? "Group expected to be unbalanced"
+
+	sleep 2
+	$IP nexthop replace id 10 group 1/2,3 type resilient $timer 4
+	sleep 4
+
+	# 6 seconds, past the new timer, before the old timer.
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_err $? "Group expected to be balanced"
+
+	log_test "Bucket migration after $timer decrease"
+
+	$IP nexthop flush &> /dev/null
+}
+
+__nexthop_res_group_increase_timer_del_test()
+{
+	local timer=$1; shift
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 3 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1,100/2,100/3,1 \
+	    type resilient buckets 8 $timer 4
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1,100/2,300/3,1 type resilient
+
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_fail $? "Group expected to be unbalanced"
+
+	sleep 2
+	$IP nexthop replace id 10 group 1/2,3 type resilient $timer 8
+	sleep 4
+
+	# 6 seconds, past the original timer.
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_fail $? "Group still expected to be unbalanced"
+
+	sleep 4
+
+	# 10 seconds, past the new timer.
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_err $? "Group expected to be balanced"
+
+	log_test "Bucket migration after $timer increase"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_increase_idle_timer_test()
+{
+	__nexthop_res_group_increase_timer_test idle_timer
+}
+
+nexthop_res_group_decrease_idle_timer_test()
+{
+	__nexthop_res_group_decrease_timer_test idle_timer
+}
+
+nexthop_res_group_unbalanced_timer_test()
+{
+	local i
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1/2 type resilient \
+	    buckets 8 idle_timer 6 unbalanced_timer 10
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1/2,3 type resilient
+
+	for i in 1 2; do
+		sleep 4
+		nexthop_bucket_nhid_count_check 10  1 4  2 4
+		check_err $? "$i: Group expected to be unbalanced"
+		nexthop_res_mark_buckets_busy 10 1
+	done
+
+	# 3 x sleep 4 > unbalanced timer 10
+	sleep 4
+	nexthop_bucket_nhid_count_check 10  1 2  2 6
+	check_err $? "Group expected to be balanced"
+
+	log_test "Bucket migration after unbalanced timer"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_unbalanced_timer_del_test()
+{
+	local i
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 3 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1,50/2,50/3,1 type resilient \
+	    buckets 8 idle_timer 6 unbalanced_timer 10
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1,50/2,150/3,1 type resilient
+
+	# Check that NH delete does not reset unbalanced time.
+	sleep 4
+	$IP nexthop delete id 3
+	nexthop_bucket_nhid_count_check 10  1 4  2 4
+	check_err $? "1: Group expected to be unbalanced"
+	nexthop_res_mark_buckets_busy 10 1
+
+	sleep 4
+	nexthop_bucket_nhid_count_check 10  1 4  2 4
+	check_err $? "2: Group expected to be unbalanced"
+	nexthop_res_mark_buckets_busy 10 1
+
+	# 3 x sleep 4 > unbalanced timer 10
+	sleep 4
+	nexthop_bucket_nhid_count_check 10  1 2  2 6
+	check_err $? "Group expected to be balanced"
+
+	log_test "Bucket migration after unbalanced timer (with delete)"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_no_unbalanced_timer_test()
+{
+	local i
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1/2 type resilient buckets 8
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1/2,3 type resilient
+
+	for i in $(seq 3); do
+		sleep 60
+		nexthop_bucket_nhid_count_check 10 2 6
+		check_fail $? "$i: Group expected to be unbalanced"
+		nexthop_res_mark_buckets_busy 10 1
+	done
+
+	log_test "Buckets never force-migrated without unbalanced timer"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_short_unbalanced_timer_test()
+{
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1/2 type resilient \
+	    buckets 8 idle_timer 120 unbalanced_timer 4
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1/2,3 type resilient
+
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_fail $? "Group expected to be unbalanced"
+
+	sleep 5
+
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_err $? "Group expected to be balanced"
+
+	log_test "Bucket migration after unbalanced < idle timer"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_increase_unbalanced_timer_test()
+{
+	__nexthop_res_group_increase_timer_test unbalanced_timer
+}
+
+nexthop_res_group_decrease_unbalanced_timer_test()
+{
+	__nexthop_res_group_decrease_timer_test unbalanced_timer
+}
+
+nexthop_res_group_force_migrate_busy_test()
+{
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	RET=0
+
+	$IP nexthop add id 10 group 1/2 type resilient \
+	    buckets 8 idle_timer 120
+	nexthop_res_mark_buckets_busy 10 1
+	$IP nexthop replace id 10 group 1/2,3 type resilient
+
+	nexthop_bucket_nhid_count_check 10 2 6
+	check_fail $? "Group expected to be unbalanced"
+
+	$IP nexthop replace id 10 group 2 type resilient
+	nexthop_bucket_nhid_count_check 10 2 8
+	check_err $? "All buckets expected to have migrated"
+
+	log_test "Busy buckets force-migrated when NH removed"
+
+	$IP nexthop flush &> /dev/null
+}
+
 nexthop_single_replace_test()
 {
 	RET=0
@@ -299,6 +811,63 @@ nexthop_single_in_group_replace_err_test()
 	nexthop_resource_set 9999
 }
 
+nexthop_single_in_res_group_replace_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 10 group 1/2 type resilient buckets 4
+
+	$IP nexthop replace id 1 via 192.0.2.4 dev dummy1
+	check_err $? "Failed to replace nexthop when should not"
+
+	nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+	check_err $? "Unexpected nexthop group entry"
+
+	nexthop_bucket_nhid_count_check 10  1 2  2 2
+	check_err $? "Wrong nexthop buckets count"
+
+	nexthop_resource_check 6
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop replace while in resilient group"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_single_in_res_group_replace_err_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 10 group 1/2 type resilient buckets 4
+
+	ip netns exec testns1 \
+		echo 1 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+	$IP nexthop replace id 1 via 192.0.2.4 dev dummy1 &> /dev/null
+	check_fail $? "Nexthop replacement succeeded when should fail"
+
+	nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+	check_err $? "Unexpected nexthop entry after failure"
+
+	nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+	check_err $? "Unexpected nexthop group entry after failure"
+
+	nexthop_bucket_nhid_count_check 10  1 2  2 2
+	check_err $? "Wrong nexthop buckets count"
+
+	nexthop_resource_check 6
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop replace while in resilient group failure"
+
+	$IP nexthop flush &> /dev/null
+	ip netns exec testns1 \
+		echo 0 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+}
+
 nexthop_single_in_group_delete_test()
 {
 	RET=0
@@ -346,6 +915,57 @@ nexthop_single_in_group_delete_err_test()
 	nexthop_resource_set 9999
 }
 
+nexthop_single_in_res_group_delete_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 10 group 1/2 type resilient buckets 4
+
+	$IP nexthop del id 1
+	nexthop_check "id 10" "id 10 group 2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+	check_err $? "Unexpected nexthop group entry"
+
+	nexthop_bucket_nhid_count_check 10 2 4
+	check_err $? "Wrong nexthop buckets count"
+
+	nexthop_resource_check 5
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop delete while in resilient group"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_single_in_res_group_delete_err_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 3 via 192.0.2.4 dev dummy1
+	$IP nexthop add id 10 group 1/2/3 type resilient buckets 6
+
+	ip netns exec testns1 \
+		echo 1 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+	$IP nexthop del id 1
+
+	# We failed to replace the two nexthop buckets that were originally
+	# assigned to nhid 1.
+	nexthop_bucket_nhid_count_check 10  2 2  3 2
+	check_err $? "Wrong nexthop buckets count"
+
+	nexthop_resource_check 8
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop delete while in resilient group failure"
+
+	$IP nexthop flush &> /dev/null
+	ip netns exec testns1 \
+		echo 0 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+}
+
 nexthop_replay_test()
 {
 	RET=0
-- 
cgit v1.2.3


From 6445e17af7c58b8a9be8ebf400b04c65202f6497 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 12 Mar 2021 17:16:11 -0800
Subject: mptcp: add rm_list in mptcp_out_options

This patch defined a new struct mptcp_rm_list, the ids field was an
array of the removing address ids, the nr field was the valid number of
removing address ids in the array. The array size was definced as a new
macro MPTCP_RM_IDS_MAX. Changed the member rm_id of struct
mptcp_out_options to rm_list.

In mptcp_established_options_rm_addr, invoked mptcp_pm_rm_addr_signal to
get the rm_list. According the number of addresses in it, calculated
the padded RM_ADDR suboption length. And saved the ids array in struct
mptcp_out_options's rm_list member.

In mptcp_write_options, iterated each address id from struct
mptcp_out_options's rm_list member, set the invalid ones as TCPOPT_NOP,
then filled them into the RM_ADDR suboption.

Changed TCPOLEN_MPTCP_RM_ADDR_BASE from 4 to 3.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/mptcp.h  |  9 ++++++++-
 net/mptcp/options.c  | 35 +++++++++++++++++++++++++++--------
 net/mptcp/pm.c       |  5 +++--
 net/mptcp/protocol.h | 12 ++++++++++--
 4 files changed, 48 insertions(+), 13 deletions(-)

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 5694370be3d4..cea69c801595 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -34,6 +34,13 @@ struct mptcp_ext {
 	/* one byte hole */
 };
 
+#define MPTCP_RM_IDS_MAX	8
+
+struct mptcp_rm_list {
+	u8 ids[MPTCP_RM_IDS_MAX];
+	u8 nr;
+};
+
 struct mptcp_out_options {
 #if IS_ENABLED(CONFIG_MPTCP)
 	u16 suboptions;
@@ -48,7 +55,7 @@ struct mptcp_out_options {
 	u8 addr_id;
 	u16 port;
 	u64 ahmac;
-	u8 rm_id;
+	struct mptcp_rm_list rm_list;
 	u8 join_id;
 	u8 backup;
 	u32 nonce;
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 444a38681e93..e74d0513187f 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -674,20 +674,25 @@ static bool mptcp_established_options_rm_addr(struct sock *sk,
 {
 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
-	u8 rm_id;
+	struct mptcp_rm_list rm_list;
+	int i, len;
 
 	if (!mptcp_pm_should_rm_signal(msk) ||
-	    !(mptcp_pm_rm_addr_signal(msk, remaining, &rm_id)))
+	    !(mptcp_pm_rm_addr_signal(msk, remaining, &rm_list)))
 		return false;
 
-	if (remaining < TCPOLEN_MPTCP_RM_ADDR_BASE)
+	len = mptcp_rm_addr_len(&rm_list);
+	if (len < 0)
+		return false;
+	if (remaining < len)
 		return false;
 
-	*size = TCPOLEN_MPTCP_RM_ADDR_BASE;
+	*size = len;
 	opts->suboptions |= OPTION_MPTCP_RM_ADDR;
-	opts->rm_id = rm_id;
+	opts->rm_list = rm_list;
 
-	pr_debug("rm_id=%d", opts->rm_id);
+	for (i = 0; i < opts->rm_list.nr; i++)
+		pr_debug("rm_list_ids[%d]=%d", i, opts->rm_list.ids[i]);
 
 	return true;
 }
@@ -1217,9 +1222,23 @@ mp_capable_done:
 	}
 
 	if (OPTION_MPTCP_RM_ADDR & opts->suboptions) {
+		u8 i = 1;
+
 		*ptr++ = mptcp_option(MPTCPOPT_RM_ADDR,
-				      TCPOLEN_MPTCP_RM_ADDR_BASE,
-				      0, opts->rm_id);
+				      TCPOLEN_MPTCP_RM_ADDR_BASE + opts->rm_list.nr,
+				      0, opts->rm_list.ids[0]);
+
+		while (i < opts->rm_list.nr) {
+			u8 id1, id2, id3, id4;
+
+			id1 = opts->rm_list.ids[i];
+			id2 = i + 1 < opts->rm_list.nr ? opts->rm_list.ids[i + 1] : TCPOPT_NOP;
+			id3 = i + 2 < opts->rm_list.nr ? opts->rm_list.ids[i + 2] : TCPOPT_NOP;
+			id4 = i + 3 < opts->rm_list.nr ? opts->rm_list.ids[i + 3] : TCPOPT_NOP;
+			put_unaligned_be32(id1 << 24 | id2 << 16 | id3 << 8 | id4, ptr);
+			ptr += 1;
+			i += 4;
+		}
 	}
 
 	if (OPTION_MPTCP_PRIO & opts->suboptions) {
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 6fd4b2c1b076..0654c86cd5ff 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -258,7 +258,7 @@ out_unlock:
 }
 
 bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
-			     u8 *rm_id)
+			     struct mptcp_rm_list *rm_list)
 {
 	int ret = false;
 
@@ -271,7 +271,8 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
 	if (remaining < TCPOLEN_MPTCP_RM_ADDR_BASE)
 		goto out_unlock;
 
-	*rm_id = msk->pm.rm_id;
+	rm_list->ids[0] = msk->pm.rm_id;
+	rm_list->nr = 1;
 	WRITE_ONCE(msk->pm.addr_signal, 0);
 	ret = true;
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index e21a5bc36cf0..c896bcf3e70f 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -61,7 +61,7 @@
 #define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT	22
 #define TCPOLEN_MPTCP_PORT_LEN		2
 #define TCPOLEN_MPTCP_PORT_ALIGN	2
-#define TCPOLEN_MPTCP_RM_ADDR_BASE	4
+#define TCPOLEN_MPTCP_RM_ADDR_BASE	3
 #define TCPOLEN_MPTCP_PRIO		3
 #define TCPOLEN_MPTCP_PRIO_ALIGN	4
 #define TCPOLEN_MPTCP_FASTCLOSE		12
@@ -709,10 +709,18 @@ static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port)
 	return len;
 }
 
+static inline int mptcp_rm_addr_len(const struct mptcp_rm_list *rm_list)
+{
+	if (rm_list->nr == 0 || rm_list->nr > MPTCP_RM_IDS_MAX)
+		return -EINVAL;
+
+	return TCPOLEN_MPTCP_RM_ADDR_BASE + roundup(rm_list->nr - 1, 4) + 1;
+}
+
 bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
 			      struct mptcp_addr_info *saddr, bool *echo, bool *port);
 bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
-			     u8 *rm_id);
+			     struct mptcp_rm_list *rm_list);
 int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
 
 void __init mptcp_pm_nl_init(void);
-- 
cgit v1.2.3


From cbde2787189632160a2fc09786d17648a8b191c5 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 12 Mar 2021 17:16:12 -0800
Subject: mptcp: add rm_list_tx in mptcp_pm_data

This patch added a new member rm_list_tx for struct mptcp_pm_data as the
removing address list on the outgoing direction. Initialize its nr field
to zero in mptcp_pm_data_init.

In mptcp_pm_remove_anno_addr, put the single address id into an removing
list, and passed it to mptcp_pm_remove_addr.

In mptcp_pm_remove_addr, save the input rm_list to rm_list_tx in struct
mptcp_pm_data.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/pm.c         | 20 ++++++++++++--------
 net/mptcp/pm_netlink.c |  5 ++++-
 net/mptcp/protocol.h   |  3 ++-
 3 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 0654c86cd5ff..9a91605885bb 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -39,18 +39,18 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk,
 	return 0;
 }
 
-int mptcp_pm_remove_addr(struct mptcp_sock *msk, u8 local_id)
+int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list)
 {
 	u8 rm_addr = READ_ONCE(msk->pm.addr_signal);
 
-	pr_debug("msk=%p, local_id=%d", msk, local_id);
+	pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr);
 
 	if (rm_addr) {
 		pr_warn("addr_signal error, rm_addr=%d", rm_addr);
 		return -EINVAL;
 	}
 
-	msk->pm.rm_id = local_id;
+	msk->pm.rm_list_tx = *rm_list;
 	rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL);
 	WRITE_ONCE(msk->pm.addr_signal, rm_addr);
 	return 0;
@@ -260,7 +260,7 @@ out_unlock:
 bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
 			     struct mptcp_rm_list *rm_list)
 {
-	int ret = false;
+	int ret = false, len;
 
 	spin_lock_bh(&msk->pm.lock);
 
@@ -268,11 +268,15 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
 	if (!mptcp_pm_should_rm_signal(msk))
 		goto out_unlock;
 
-	if (remaining < TCPOLEN_MPTCP_RM_ADDR_BASE)
+	len = mptcp_rm_addr_len(&msk->pm.rm_list_tx);
+	if (len < 0) {
+		WRITE_ONCE(msk->pm.addr_signal, 0);
+		goto out_unlock;
+	}
+	if (remaining < len)
 		goto out_unlock;
 
-	rm_list->ids[0] = msk->pm.rm_id;
-	rm_list->nr = 1;
+	*rm_list = msk->pm.rm_list_tx;
 	WRITE_ONCE(msk->pm.addr_signal, 0);
 	ret = true;
 
@@ -292,7 +296,7 @@ void mptcp_pm_data_init(struct mptcp_sock *msk)
 	msk->pm.add_addr_accepted = 0;
 	msk->pm.local_addr_used = 0;
 	msk->pm.subflows = 0;
-	msk->pm.rm_id = 0;
+	msk->pm.rm_list_tx.nr = 0;
 	WRITE_ONCE(msk->pm.work_pending, false);
 	WRITE_ONCE(msk->pm.addr_signal, 0);
 	WRITE_ONCE(msk->pm.accept_addr, false);
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 8e8e35fa4002..1eb9d0139267 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -1071,12 +1071,15 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk,
 				      struct mptcp_addr_info *addr,
 				      bool force)
 {
+	struct mptcp_rm_list list = { .nr = 0 };
 	bool ret;
 
+	list.ids[list.nr++] = addr->id;
+
 	ret = remove_anno_list_by_saddr(msk, addr);
 	if (ret || force) {
 		spin_lock_bh(&msk->pm.lock);
-		mptcp_pm_remove_addr(msk, addr->id);
+		mptcp_pm_remove_addr(msk, &list);
 		spin_unlock_bh(&msk->pm.lock);
 	}
 	return ret;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index c896bcf3e70f..ac15be7cf06b 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -207,6 +207,7 @@ struct mptcp_pm_data {
 	u8		local_addr_used;
 	u8		subflows;
 	u8		status;
+	struct mptcp_rm_list rm_list_tx;
 	u8		rm_id;
 };
 
@@ -661,7 +662,7 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
 int mptcp_pm_announce_addr(struct mptcp_sock *msk,
 			   const struct mptcp_addr_info *addr,
 			   bool echo, bool port);
-int mptcp_pm_remove_addr(struct mptcp_sock *msk, u8 local_id);
+int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
 int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 local_id);
 
 void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
-- 
cgit v1.2.3


From 5c4a824dcb589f3da2574dd99e761063e7d5529d Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 12 Mar 2021 17:16:13 -0800
Subject: mptcp: add rm_list in mptcp_options_received

This patch changed the member rm_id in struct mptcp_options_received as a
list of the removing address ids, and renamed it to rm_list.

In mptcp_parse_option, parsed the RM_ADDR suboption and filled them into
the rm_list in struct mptcp_options_received.

In mptcp_incoming_options, passed this rm_list to the function
mptcp_pm_rm_addr_received.

It also changed the parameter type of mptcp_pm_rm_addr_received.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/options.c  | 12 ++++++++----
 net/mptcp/pm.c       | 11 +++++++----
 net/mptcp/protocol.h |  5 +++--
 3 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index e74d0513187f..5fabf3e9a38d 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -26,6 +26,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 	int expected_opsize;
 	u8 version;
 	u8 flags;
+	u8 i;
 
 	switch (subtype) {
 	case MPTCPOPT_MP_CAPABLE:
@@ -272,14 +273,17 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 		break;
 
 	case MPTCPOPT_RM_ADDR:
-		if (opsize != TCPOLEN_MPTCP_RM_ADDR_BASE)
+		if (opsize < TCPOLEN_MPTCP_RM_ADDR_BASE + 1 ||
+		    opsize > TCPOLEN_MPTCP_RM_ADDR_BASE + MPTCP_RM_IDS_MAX)
 			break;
 
 		ptr++;
 
 		mp_opt->rm_addr = 1;
-		mp_opt->rm_id = *ptr++;
-		pr_debug("RM_ADDR: id=%d", mp_opt->rm_id);
+		mp_opt->rm_list.nr = opsize - TCPOLEN_MPTCP_RM_ADDR_BASE;
+		for (i = 0; i < mp_opt->rm_list.nr; i++)
+			mp_opt->rm_list.ids[i] = *ptr++;
+		pr_debug("RM_ADDR: rm_list_nr=%d", mp_opt->rm_list.nr);
 		break;
 
 	case MPTCPOPT_MP_PRIO:
@@ -1043,7 +1047,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
 	}
 
 	if (mp_opt.rm_addr) {
-		mptcp_pm_rm_addr_received(msk, mp_opt.rm_id);
+		mptcp_pm_rm_addr_received(msk, &mp_opt.rm_list);
 		mp_opt.rm_addr = 0;
 	}
 
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 9a91605885bb..7553f82076ca 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -205,17 +205,20 @@ void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk)
 	mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_SEND_ACK);
 }
 
-void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id)
+void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
+			       const struct mptcp_rm_list *rm_list)
 {
 	struct mptcp_pm_data *pm = &msk->pm;
+	u8 i;
 
-	pr_debug("msk=%p remote_id=%d", msk, rm_id);
+	pr_debug("msk=%p remote_ids_nr=%d", msk, rm_list->nr);
 
-	mptcp_event_addr_removed(msk, rm_id);
+	for (i = 0; i < rm_list->nr; i++)
+		mptcp_event_addr_removed(msk, rm_list->ids[i]);
 
 	spin_lock_bh(&pm->lock);
 	mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED);
-	pm->rm_id = rm_id;
+	pm->rm_id = rm_list->ids[0];
 	spin_unlock_bh(&pm->lock);
 }
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index ac15be7cf06b..d7daf7e0d5d2 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -142,7 +142,7 @@ struct mptcp_options_received {
 		mpc_map:1,
 		__unused:2;
 	u8	addr_id;
-	u8	rm_id;
+	struct mptcp_rm_list rm_list;
 	union {
 		struct in_addr	addr;
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
@@ -648,7 +648,8 @@ void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id);
 void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
 				const struct mptcp_addr_info *addr);
 void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk);
-void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id);
+void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
+			       const struct mptcp_rm_list *rm_list);
 void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
 int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
 				 struct mptcp_addr_info *addr,
-- 
cgit v1.2.3


From b5c55f334c7f75b59b2d14be0abb7c827a016059 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 12 Mar 2021 17:16:14 -0800
Subject: mptcp: add rm_list_rx in mptcp_pm_data

This patch added a new member rm_list_rx for struct mptcp_pm_data as an
list of the removing address ids on the incoming direction. Initialized
its nr field to zero in mptcp_pm_data_init.

In mptcp_pm_rm_addr_received, set it as the input rm_list.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/pm.c       | 3 ++-
 net/mptcp/protocol.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 7553f82076ca..a47436205d88 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -218,7 +218,7 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
 
 	spin_lock_bh(&pm->lock);
 	mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED);
-	pm->rm_id = rm_list->ids[0];
+	pm->rm_list_rx = *rm_list;
 	spin_unlock_bh(&pm->lock);
 }
 
@@ -300,6 +300,7 @@ void mptcp_pm_data_init(struct mptcp_sock *msk)
 	msk->pm.local_addr_used = 0;
 	msk->pm.subflows = 0;
 	msk->pm.rm_list_tx.nr = 0;
+	msk->pm.rm_list_rx.nr = 0;
 	WRITE_ONCE(msk->pm.work_pending, false);
 	WRITE_ONCE(msk->pm.addr_signal, 0);
 	WRITE_ONCE(msk->pm.accept_addr, false);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d7daf7e0d5d2..82a63abf2c7e 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -208,6 +208,7 @@ struct mptcp_pm_data {
 	u8		subflows;
 	u8		status;
 	struct mptcp_rm_list rm_list_tx;
+	struct mptcp_rm_list rm_list_rx;
 	u8		rm_id;
 };
 
-- 
cgit v1.2.3


From d0b698ca9a27e90b641804fc2fb49ae3719c0904 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 12 Mar 2021 17:16:15 -0800
Subject: mptcp: remove multi addresses in PM

This patch dropped the member rm_id of struct mptcp_pm_data. Use
rm_list_rx in mptcp_pm_nl_rm_addr_received instead of using rm_id.

In mptcp_pm_nl_rm_addr_received, iterated each address id from
pm.rm_list_rx, then shut down and closed each address id's subsocket.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/pm_netlink.c | 36 ++++++++++++++++++++----------------
 net/mptcp/protocol.h   |  1 -
 2 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 1eb9d0139267..e8135702af39 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -575,36 +575,40 @@ static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
 {
 	struct mptcp_subflow_context *subflow, *tmp;
 	struct sock *sk = (struct sock *)msk;
+	u8 i;
 
-	pr_debug("address rm_id %d", msk->pm.rm_id);
+	pr_debug("address rm_list_nr %d", msk->pm.rm_list_rx.nr);
 
 	msk_owned_by_me(msk);
 
-	if (!msk->pm.rm_id)
+	if (!msk->pm.rm_list_rx.nr)
 		return;
 
 	if (list_empty(&msk->conn_list))
 		return;
 
-	list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
-		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-		int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
+	for (i = 0; i < msk->pm.rm_list_rx.nr; i++) {
+		list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
+			struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+			int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
 
-		if (msk->pm.rm_id != subflow->remote_id)
-			continue;
+			if (msk->pm.rm_list_rx.ids[i] != subflow->remote_id)
+				continue;
 
-		spin_unlock_bh(&msk->pm.lock);
-		mptcp_subflow_shutdown(sk, ssk, how);
-		mptcp_close_ssk(sk, ssk, subflow);
-		spin_lock_bh(&msk->pm.lock);
+			pr_debug(" -> address rm_list_ids[%d]=%u", i, msk->pm.rm_list_rx.ids[i]);
+			spin_unlock_bh(&msk->pm.lock);
+			mptcp_subflow_shutdown(sk, ssk, how);
+			mptcp_close_ssk(sk, ssk, subflow);
+			spin_lock_bh(&msk->pm.lock);
 
-		msk->pm.add_addr_accepted--;
-		msk->pm.subflows--;
-		WRITE_ONCE(msk->pm.accept_addr, true);
+			msk->pm.add_addr_accepted--;
+			msk->pm.subflows--;
+			WRITE_ONCE(msk->pm.accept_addr, true);
 
-		__MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMADDR);
+			__MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMADDR);
 
-		break;
+			break;
+		}
 	}
 }
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 82a63abf2c7e..5324fbe40528 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -209,7 +209,6 @@ struct mptcp_pm_data {
 	u8		status;
 	struct mptcp_rm_list rm_list_tx;
 	struct mptcp_rm_list rm_list_rx;
-	u8		rm_id;
 };
 
 struct mptcp_data_frag {
-- 
cgit v1.2.3


From ddd14bb85dd8d26e10a2ce4f9606879b94e81888 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 12 Mar 2021 17:16:16 -0800
Subject: mptcp: remove multi subflows in PM

This patch dealt with removing multi subflows in PM:

In mptcp_pm_remove_subflow, changed the input parameter local_id as an
list of removing address ids, and passed the list to
mptcp_pm_nl_rm_subflow_received.

In mptcp_pm_nl_rm_subflow_received, iterated each address id from the
received ids list. Then shut down and closed each address id's subsocket.

In mptcp_nl_remove_subflow_and_signal_addr, put the single address id into
an ids list, and passed it to mptcp_pm_remove_subflow.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/pm.c         |  6 +++---
 net/mptcp/pm_netlink.c | 42 +++++++++++++++++++++++++-----------------
 net/mptcp/protocol.h   |  5 +++--
 3 files changed, 31 insertions(+), 22 deletions(-)

diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index a47436205d88..4cfd80f90003 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -56,12 +56,12 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_
 	return 0;
 }
 
-int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 local_id)
+int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list)
 {
-	pr_debug("msk=%p, local_id=%d", msk, local_id);
+	pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr);
 
 	spin_lock_bh(&msk->pm.lock);
-	mptcp_pm_nl_rm_subflow_received(msk, local_id);
+	mptcp_pm_nl_rm_subflow_received(msk, rm_list);
 	spin_unlock_bh(&msk->pm.lock);
 	return 0;
 }
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index e8135702af39..769a05d836da 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -645,39 +645,44 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk)
 	spin_unlock_bh(&msk->pm.lock);
 }
 
-void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id)
+void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
+				     const struct mptcp_rm_list *rm_list)
 {
 	struct mptcp_subflow_context *subflow, *tmp;
 	struct sock *sk = (struct sock *)msk;
+	u8 i;
 
-	pr_debug("subflow rm_id %d", rm_id);
+	pr_debug("subflow rm_list_nr %d", rm_list->nr);
 
 	msk_owned_by_me(msk);
 
-	if (!rm_id)
+	if (!rm_list->nr)
 		return;
 
 	if (list_empty(&msk->conn_list))
 		return;
 
-	list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
-		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-		int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
+	for (i = 0; i < rm_list->nr; i++) {
+		list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
+			struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+			int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
 
-		if (rm_id != subflow->local_id)
-			continue;
+			if (rm_list->ids[i] != subflow->local_id)
+				continue;
 
-		spin_unlock_bh(&msk->pm.lock);
-		mptcp_subflow_shutdown(sk, ssk, how);
-		mptcp_close_ssk(sk, ssk, subflow);
-		spin_lock_bh(&msk->pm.lock);
+			pr_debug(" -> subflow rm_list_ids[%d]=%u", i, rm_list->ids[i]);
+			spin_unlock_bh(&msk->pm.lock);
+			mptcp_subflow_shutdown(sk, ssk, how);
+			mptcp_close_ssk(sk, ssk, subflow);
+			spin_lock_bh(&msk->pm.lock);
 
-		msk->pm.local_addr_used--;
-		msk->pm.subflows--;
+			msk->pm.local_addr_used--;
+			msk->pm.subflows--;
 
-		__MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW);
+			__MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW);
 
-		break;
+			break;
+		}
 	}
 }
 
@@ -1094,9 +1099,12 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
 {
 	struct mptcp_sock *msk;
 	long s_slot = 0, s_num = 0;
+	struct mptcp_rm_list list = { .nr = 0 };
 
 	pr_debug("remove_id=%d", addr->id);
 
+	list.ids[list.nr++] = addr->id;
+
 	while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
 		struct sock *sk = (struct sock *)msk;
 		bool remove_subflow;
@@ -1110,7 +1118,7 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
 		remove_subflow = lookup_subflow_by_saddr(&msk->conn_list, addr);
 		mptcp_pm_remove_anno_addr(msk, addr, remove_subflow);
 		if (remove_subflow)
-			mptcp_pm_remove_subflow(msk, addr->id);
+			mptcp_pm_remove_subflow(msk, &list);
 		release_sock(sk);
 
 next:
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 5324fbe40528..1111a99b024f 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -664,7 +664,7 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk,
 			   const struct mptcp_addr_info *addr,
 			   bool echo, bool port);
 int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
-int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 local_id);
+int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
 
 void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
 		 const struct sock *ssk, gfp_t gfp);
@@ -728,7 +728,8 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
 void __init mptcp_pm_nl_init(void);
 void mptcp_pm_nl_data_init(struct mptcp_sock *msk);
 void mptcp_pm_nl_work(struct mptcp_sock *msk);
-void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id);
+void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
+				     const struct mptcp_rm_list *rm_list);
 int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
 unsigned int mptcp_pm_get_add_addr_signal_max(struct mptcp_sock *msk);
 unsigned int mptcp_pm_get_add_addr_accept_max(struct mptcp_sock *msk);
-- 
cgit v1.2.3


From 06faa22710342bca5e9c249634199c650799fce6 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 12 Mar 2021 17:16:17 -0800
Subject: mptcp: remove multi addresses and subflows in PM

This patch implemented the function to remove a list of addresses and
subflows, named mptcp_nl_remove_addrs_list, which had a input parameter
rm_list as the removing addresses list.

In mptcp_nl_remove_addrs_list, traverse all the existing msk sockets to
invoke mptcp_pm_remove_addrs_and_subflows to remove a list of addresses
for each msk socket.

In mptcp_pm_remove_addrs_and_subflows, traverse all the addresses in the
removing addresses list, to find whether this address is in the conn_list
or anno_list. If it is, put the address ID into the removing address list
or the removing subflow list, and pass the two lists to
mptcp_pm_remove_addr and mptcp_pm_remove_subflow.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/pm_netlink.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 769a05d836da..a5f6ab96a1b4 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -1200,6 +1200,54 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
 	return ret;
 }
 
+static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
+					       struct list_head *rm_list)
+{
+	struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 };
+	struct mptcp_pm_addr_entry *entry;
+
+	list_for_each_entry(entry, rm_list, list) {
+		if (lookup_subflow_by_saddr(&msk->conn_list, &entry->addr) &&
+		    alist.nr < MPTCP_RM_IDS_MAX &&
+		    slist.nr < MPTCP_RM_IDS_MAX) {
+			alist.ids[alist.nr++] = entry->addr.id;
+			slist.ids[slist.nr++] = entry->addr.id;
+		} else if (remove_anno_list_by_saddr(msk, &entry->addr) &&
+			 alist.nr < MPTCP_RM_IDS_MAX) {
+			alist.ids[alist.nr++] = entry->addr.id;
+		}
+	}
+
+	if (alist.nr) {
+		spin_lock_bh(&msk->pm.lock);
+		mptcp_pm_remove_addr(msk, &alist);
+		spin_unlock_bh(&msk->pm.lock);
+	}
+	if (slist.nr)
+		mptcp_pm_remove_subflow(msk, &slist);
+}
+
+static void mptcp_nl_remove_addrs_list(struct net *net,
+				       struct list_head *rm_list)
+{
+	long s_slot = 0, s_num = 0;
+	struct mptcp_sock *msk;
+
+	if (list_empty(rm_list))
+		return;
+
+	while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
+		struct sock *sk = (struct sock *)msk;
+
+		lock_sock(sk);
+		mptcp_pm_remove_addrs_and_subflows(msk, rm_list);
+		release_sock(sk);
+
+		sock_put(sk);
+		cond_resched();
+	}
+}
+
 static void __flush_addrs(struct net *net, struct list_head *list)
 {
 	while (!list_empty(list)) {
-- 
cgit v1.2.3


From 0e4a3e68862b7844c87126628082ae97c066b9da Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 12 Mar 2021 17:16:18 -0800
Subject: mptcp: remove a list of addrs when flushing

This patch invoked mptcp_nl_remove_addrs_list to remove a list of addresses
when the netlink flushes addresses, instead of using
mptcp_nl_remove_subflow_and_signal_addr to remove them one by one.

And dropped the unused parameter net in __flush_addrs too.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/pm_netlink.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index a5f6ab96a1b4..5857b82c88bf 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -1248,14 +1248,13 @@ static void mptcp_nl_remove_addrs_list(struct net *net,
 	}
 }
 
-static void __flush_addrs(struct net *net, struct list_head *list)
+static void __flush_addrs(struct list_head *list)
 {
 	while (!list_empty(list)) {
 		struct mptcp_pm_addr_entry *cur;
 
 		cur = list_entry(list->next,
 				 struct mptcp_pm_addr_entry, list);
-		mptcp_nl_remove_subflow_and_signal_addr(net, &cur->addr);
 		list_del_rcu(&cur->list);
 		mptcp_pm_free_addr_entry(cur);
 	}
@@ -1280,7 +1279,8 @@ static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info)
 	pernet->next_id = 1;
 	bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1);
 	spin_unlock_bh(&pernet->lock);
-	__flush_addrs(sock_net(skb->sk), &free_list);
+	mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list);
+	__flush_addrs(&free_list);
 	return 0;
 }
 
@@ -1877,7 +1877,7 @@ static void __net_exit pm_nl_exit_net(struct list_head *net_list)
 		/* net is removed from namespace list, can't race with
 		 * other modifiers
 		 */
-		__flush_addrs(net, &pernet->local_addr_list);
+		__flush_addrs(&pernet->local_addr_list);
 	}
 }
 
-- 
cgit v1.2.3


From 7028ba8ac9683b7e4f4db4db63a306d7497e7150 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 12 Mar 2021 17:16:19 -0800
Subject: selftests: mptcp: add invert argument for chk_rm_nr

Some of the removing testcases used two zeros as arguments for chk_rm_nr
like this: chk_rm_nr 0 0. This doesn't mean that no RM_ADDR has been sent.
It only means that RM_ADDR had been sent in the opposite direction that
chk_rm_nr is checking.

This patch added a new argument invert for chk_rm_nr to allow it can
check the RM_ADDR from the opposite direction.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 964db9ed544f..15b71ddee615 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -610,11 +610,22 @@ chk_rm_nr()
 {
 	local rm_addr_nr=$1
 	local rm_subflow_nr=$2
+	local invert=${3:-""}
 	local count
 	local dump_stats
+	local addr_ns
+	local subflow_ns
+
+	if [ -z $invert ]; then
+		addr_ns=$ns1
+		subflow_ns=$ns2
+	elif [ $invert = "invert" ]; then
+		addr_ns=$ns2
+		subflow_ns=$ns1
+	fi
 
 	printf "%-39s %s" " " "rm "
-	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'`
+	count=`ip netns exec $addr_ns nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'`
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$rm_addr_nr" ]; then
 		echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr"
@@ -625,7 +636,7 @@ chk_rm_nr()
 	fi
 
 	echo -n " - sf    "
-	count=`ip netns exec $ns2 nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'`
+	count=`ip netns exec $subflow_ns nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'`
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$rm_subflow_nr" ]; then
 		echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr"
@@ -833,7 +844,7 @@ remove_tests()
 	run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow
 	chk_join_nr "remove single address" 1 1 1
 	chk_add_nr 1 1
-	chk_rm_nr 0 0
+	chk_rm_nr 1 1 invert
 
 	# subflow and signal, remove
 	reset
@@ -945,7 +956,7 @@ ipv6_tests()
 	run_tests $ns1 $ns2 dead:beef:1::1 0 -1 0 slow
 	chk_join_nr "remove single address IPv6" 1 1 1
 	chk_add_nr 1 1
-	chk_rm_nr 0 0
+	chk_rm_nr 1 1 invert
 
 	# subflow and signal IPv6, remove
 	reset
@@ -1088,7 +1099,7 @@ add_addr_ports_tests()
 	run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow
 	chk_join_nr "remove single address with port" 1 1 1
 	chk_add_nr 1 1 1
-	chk_rm_nr 0 0
+	chk_rm_nr 1 1 invert
 
 	# subflow and signal with port, remove
 	reset
-- 
cgit v1.2.3


From f87744ad42446c8510296f11fdc73f6e6f1376cc Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 12 Mar 2021 17:16:20 -0800
Subject: selftests: mptcp: set addr id for removing testcases

The removing testcases can only delete the addresses from id 1, this
patch added the support for deleting the addresses from any id that user
set.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 38 ++++++++++++++++---------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 15b71ddee615..6782a891b3e7 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -284,14 +284,19 @@ do_transfer()
 		let rm_nr_ns1=-addr_nr_ns1
 		if [ $rm_nr_ns1 -lt 8 ]; then
 			counter=1
-			sleep 1
-
-			while [ $counter -le $rm_nr_ns1 ]
-			do
-				ip netns exec ${listener_ns} ./pm_nl_ctl del $counter
+			dump=(`ip netns exec ${listener_ns} ./pm_nl_ctl dump`)
+			if [ ${#dump[@]} -gt 0 ]; then
+				id=${dump[1]}
 				sleep 1
-				let counter+=1
-			done
+
+				while [ $counter -le $rm_nr_ns1 ]
+				do
+					ip netns exec ${listener_ns} ./pm_nl_ctl del $id
+					sleep 1
+					let counter+=1
+					let id+=1
+				done
+			fi
 		else
 			sleep 1
 			ip netns exec ${listener_ns} ./pm_nl_ctl flush
@@ -318,14 +323,19 @@ do_transfer()
 		let rm_nr_ns2=-addr_nr_ns2
 		if [ $rm_nr_ns2 -lt 8 ]; then
 			counter=1
-			sleep 1
-
-			while [ $counter -le $rm_nr_ns2 ]
-			do
-				ip netns exec ${connector_ns} ./pm_nl_ctl del $counter
+			dump=(`ip netns exec ${connector_ns} ./pm_nl_ctl dump`)
+			if [ ${#dump[@]} -gt 0 ]; then
+				id=${dump[1]}
 				sleep 1
-				let counter+=1
-			done
+
+				while [ $counter -le $rm_nr_ns2 ]
+				do
+					ip netns exec ${connector_ns} ./pm_nl_ctl del $id
+					sleep 1
+					let counter+=1
+					let id+=1
+				done
+			fi
 		else
 			sleep 1
 			ip netns exec ${connector_ns} ./pm_nl_ctl flush
-- 
cgit v1.2.3


From d2c4333a801c73a8bc2e4bde75b573e2d1014436 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 12 Mar 2021 17:16:21 -0800
Subject: selftests: mptcp: add testcases for removing addrs

This patch added the testcases for removing a list of addresses. Used
the netlink to flush the addresses in the testcases.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 6782a891b3e7..191303b652a6 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -890,6 +890,29 @@ remove_tests()
 	chk_join_nr "flush subflows and signal" 3 3 3
 	chk_add_nr 1 1
 	chk_rm_nr 2 2
+
+	# subflows flush
+	reset
+	ip netns exec $ns1 ./pm_nl_ctl limits 3 3
+	ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow id 150
+	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+	run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
+	chk_join_nr "flush subflows" 3 3 3
+	chk_rm_nr 3 3
+
+	# addresses flush
+	reset
+	ip netns exec $ns1 ./pm_nl_ctl limits 3 3
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal id 250
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
+	ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+	run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
+	chk_join_nr "flush addresses" 3 3 3
+	chk_add_nr 3 3
+	chk_rm_nr 3 3 invert
 }
 
 add_tests()
-- 
cgit v1.2.3


From bc9d992ca4d240657dba8fe722ce327bd743b35f Mon Sep 17 00:00:00 2001
From: Sergey Shtylyov <s.shtylyov@omprussia.ru>
Date: Fri, 12 Mar 2021 23:43:46 +0300
Subject: sh_eth: rename TRSCER bits

In all the SoC manuals the TRSCER register bits match the corresponding
EESR registers's bits, but only on the R-Car gen2 SoC those are named
RINT<n> and TINT<n>.  Follow the suit and rename the *enum* tag/entries
from DESC_I_* to TRSCER_*.

Signed-off-by: Sergey Shtylyov <s.shtylyov@omprussia.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c |  8 ++++----
 drivers/net/ethernet/renesas/sh_eth.h | 22 ++++++++++++++--------
 2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index f029c7c03804..8023f48e68b6 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -560,7 +560,7 @@ static struct sh_eth_cpu_data r7s72100_data = {
 			  EESR_TDE,
 	.fdr_value	= 0x0000070f,
 
-	.trscer_err_mask = DESC_I_RINT8 | DESC_I_RINT5,
+	.trscer_err_mask = TRSCER_RMAFCE | TRSCER_RRFCE,
 
 	.no_psr		= 1,
 	.apr		= 1,
@@ -701,7 +701,7 @@ static struct sh_eth_cpu_data rcar_gen2_data = {
 			  EESR_RDE | EESR_RFRMER | EESR_TFE | EESR_TDE,
 	.fdr_value	= 0x00000f0f,
 
-	.trscer_err_mask = DESC_I_RINT8,
+	.trscer_err_mask = TRSCER_RMAFCE,
 
 	.apr		= 1,
 	.mpr		= 1,
@@ -782,7 +782,7 @@ static struct sh_eth_cpu_data r7s9210_data = {
 
 	.fdr_value	= 0x0000070f,
 
-	.trscer_err_mask = DESC_I_RINT8 | DESC_I_RINT5,
+	.trscer_err_mask = TRSCER_RMAFCE | TRSCER_RRFCE,
 
 	.apr		= 1,
 	.mpr		= 1,
@@ -1094,7 +1094,7 @@ static struct sh_eth_cpu_data sh771x_data = {
 			  EESIPR_RRFIP | EESIPR_RTLFIP | EESIPR_RTSFIP |
 			  EESIPR_PREIP | EESIPR_CERFIP,
 
-	.trscer_err_mask = DESC_I_RINT8,
+	.trscer_err_mask = TRSCER_RMAFCE,
 
 	.tsu		= 1,
 	.dual_port	= 1,
diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index c1b3751b12c4..9a4bfdbc37bc 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h
@@ -380,14 +380,20 @@ enum MPR_BIT {
 };
 
 /* TRSCER */
-enum DESC_I_BIT {
-	DESC_I_TINT4 = 0x0800, DESC_I_TINT3 = 0x0400, DESC_I_TINT2 = 0x0200,
-	DESC_I_TINT1 = 0x0100, DESC_I_RINT8 = 0x0080, DESC_I_RINT5 = 0x0010,
-	DESC_I_RINT4 = 0x0008, DESC_I_RINT3 = 0x0004, DESC_I_RINT2 = 0x0002,
-	DESC_I_RINT1 = 0x0001,
-};
-
-#define DEFAULT_TRSCER_ERR_MASK (DESC_I_RINT8 | DESC_I_RINT5 | DESC_I_TINT2)
+enum TRSCER_BIT {
+	TRSCER_CNDCE	= 0x00000800,
+	TRSCER_DLCCE	= 0x00000400,
+	TRSCER_CDCE	= 0x00000200,
+	TRSCER_TROCE	= 0x00000100,
+	TRSCER_RMAFCE	= 0x00000080,
+	TRSCER_RRFCE	= 0x00000010,
+	TRSCER_RTLFCE	= 0x00000008,
+	TRSCER_RTSFCE	= 0x00000004,
+	TRSCER_PRECE	= 0x00000002,
+	TRSCER_CERFCE	= 0x00000001,
+};
+
+#define DEFAULT_TRSCER_ERR_MASK (TRSCER_RMAFCE | TRSCER_RRFCE | TRSCER_CDCE)
 
 /* RPADIR */
 enum RPADIR_BIT {
-- 
cgit v1.2.3


From 4585b72d97cc9f50b29a3c9774222c09fed92240 Mon Sep 17 00:00:00 2001
From: Sergey Shtylyov <s.shtylyov@omprussia.ru>
Date: Fri, 12 Mar 2021 23:44:53 +0300
Subject: sh_eth: rename PSR bits

In all the SoC manuals (except R-Car gen2) the PHY status register's name
is abbreviated to  PSR with the only valid bit 0 named LMON.  Follow the
suit and rename the corresponding *enum* tag/entry.

Signed-off-by: Sergey Shtylyov <s.shtylyov@omprussia.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 2 +-
 drivers/net/ethernet/renesas/sh_eth.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 8023f48e68b6..ebedb1a11132 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -1749,7 +1749,7 @@ static void sh_eth_emac_interrupt(struct net_device *ndev)
 		link_stat = sh_eth_read(ndev, PSR);
 		if (mdp->ether_link_active_low)
 			link_stat = ~link_stat;
-		if (!(link_stat & PHY_ST_LINK)) {
+		if (!(link_stat & PSR_LMON)) {
 			sh_eth_rcv_snd_disable(ndev);
 		} else {
 			/* Link Up */
diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index 9a4bfdbc37bc..73de916e0928 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h
@@ -208,7 +208,7 @@ enum PIR_BIT {
 };
 
 /* PSR */
-enum PHY_STATUS_BIT { PHY_ST_LINK = 0x01, };
+enum PSR_BIT { PSR_LMON = 0x01, };
 
 /* EESR */
 enum EESR_BIT {
-- 
cgit v1.2.3


From e2dccaf194dd81f4cb7fcc2b8effc10a83302a48 Mon Sep 17 00:00:00 2001
From: Sergey Shtylyov <s.shtylyov@omprussia.ru>
Date: Fri, 12 Mar 2021 23:45:48 +0300
Subject: sh_eth: rename *enum*s still not matching register names

Finally, rename the rest of the *enum* tags still not (exactly) matching
the abbreviated register names from the manuals...

Signed-off-by: Sergey Shtylyov <s.shtylyov@omprussia.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index 73de916e0928..27bc003f28b5 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h
@@ -171,7 +171,7 @@ enum GECMR_BIT {
 };
 
 /* EDMR */
-enum DMAC_M_BIT {
+enum EDMR_BIT {
 	EDMR_NBST = 0x80,
 	EDMR_EL = 0x40, /* Litte endian */
 	EDMR_DL1 = 0x20, EDMR_DL0 = 0x10,
@@ -180,13 +180,13 @@ enum DMAC_M_BIT {
 };
 
 /* EDTRR */
-enum DMAC_T_BIT {
+enum EDTRR_BIT {
 	EDTRR_TRNS_GETHER = 0x03,
 	EDTRR_TRNS_ETHER = 0x01,
 };
 
 /* EDRRR */
-enum EDRRR_R_BIT {
+enum EDRRR_BIT {
 	EDRRR_R = 0x01,
 };
 
@@ -339,7 +339,7 @@ enum RMCR_BIT {
 };
 
 /* ECMR */
-enum FELIC_MODE_BIT {
+enum ECMR_BIT {
 	ECMR_TRCCM = 0x04000000, ECMR_RCSC = 0x00800000,
 	ECMR_DPAD = 0x00200000, ECMR_RZPF = 0x00100000,
 	ECMR_ZPF = 0x00080000, ECMR_PFR = 0x00040000, ECMR_RXF = 0x00020000,
@@ -350,7 +350,7 @@ enum FELIC_MODE_BIT {
 };
 
 /* ECSR */
-enum ECSR_STATUS_BIT {
+enum ECSR_BIT {
 	ECSR_BRCRX = 0x20, ECSR_PSRTO = 0x10,
 	ECSR_LCHNG = 0x04,
 	ECSR_MPD = 0x02, ECSR_ICD = 0x01,
@@ -360,7 +360,7 @@ enum ECSR_STATUS_BIT {
 				 ECSR_ICD | ECSIPR_MPDIP)
 
 /* ECSIPR */
-enum ECSIPR_STATUS_MASK_BIT {
+enum ECSIPR_BIT {
 	ECSIPR_BRCRXIP = 0x20, ECSIPR_PSRTOIP = 0x10,
 	ECSIPR_LCHNGIP = 0x04,
 	ECSIPR_MPDIP = 0x02, ECSIPR_ICDIP = 0x01,
-- 
cgit v1.2.3


From 0deaeabf271262f0b13a23b49fb1c7e9b9bcbd50 Mon Sep 17 00:00:00 2001
From: Sergey Shtylyov <s.shtylyov@omprussia.ru>
Date: Fri, 12 Mar 2021 23:47:02 +0300
Subject: sh_eth: place RX/TX descriptor *enum*s after their *struct*s

Place the RX/TX descriptor bit *enum*s where they belong -- after the
corresponding RX/TX descriptor *struct*s and, while at it, switch to
declaring one *enum* entry per line...

Signed-off-by: Sergey Shtylyov <s.shtylyov@omprussia.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.h | 82 ++++++++++++++++++++---------------
 1 file changed, 46 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index 27bc003f28b5..a5c07c6ff44a 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h
@@ -288,27 +288,6 @@ enum EESIPR_BIT {
 	EESIPR_CERFIP	= 0x00000001,
 };
 
-/* Receive descriptor 0 bits */
-enum RD_STS_BIT {
-	RD_RACT = 0x80000000, RD_RDLE = 0x40000000,
-	RD_RFP1 = 0x20000000, RD_RFP0 = 0x10000000,
-	RD_RFE = 0x08000000, RD_RFS10 = 0x00000200,
-	RD_RFS9 = 0x00000100, RD_RFS8 = 0x00000080,
-	RD_RFS7 = 0x00000040, RD_RFS6 = 0x00000020,
-	RD_RFS5 = 0x00000010, RD_RFS4 = 0x00000008,
-	RD_RFS3 = 0x00000004, RD_RFS2 = 0x00000002,
-	RD_RFS1 = 0x00000001,
-};
-#define RDF1ST	RD_RFP1
-#define RDFEND	RD_RFP0
-#define RD_RFP	(RD_RFP1|RD_RFP0)
-
-/* Receive descriptor 1 bits */
-enum RD_LEN_BIT {
-	RD_RFL	= 0x0000ffff,	/* receive frame  length */
-	RD_RBL	= 0xffff0000,	/* receive buffer length */
-};
-
 /* FCFTR */
 enum FCFTR_BIT {
 	FCFTR_RFF2 = 0x00040000, FCFTR_RFF1 = 0x00020000,
@@ -318,21 +297,6 @@ enum FCFTR_BIT {
 #define DEFAULT_FIFO_F_D_RFF	(FCFTR_RFF2 | FCFTR_RFF1 | FCFTR_RFF0)
 #define DEFAULT_FIFO_F_D_RFD	(FCFTR_RFD2 | FCFTR_RFD1 | FCFTR_RFD0)
 
-/* Transmit descriptor 0 bits */
-enum TD_STS_BIT {
-	TD_TACT = 0x80000000, TD_TDLE = 0x40000000,
-	TD_TFP1 = 0x20000000, TD_TFP0 = 0x10000000,
-	TD_TFE  = 0x08000000, TD_TWBI = 0x04000000,
-};
-#define TDF1ST	TD_TFP1
-#define TDFEND	TD_TFP0
-#define TD_TFP	(TD_TFP1|TD_TFP0)
-
-/* Transmit descriptor 1 bits */
-enum TD_LEN_BIT {
-	TD_TBL	= 0xffff0000,	/* transmit buffer length */
-};
-
 /* RMCR */
 enum RMCR_BIT {
 	RMCR_RNC = 0x00000001,
@@ -451,6 +415,24 @@ struct sh_eth_txdesc {
 	u32 pad0;		/* padding data */
 } __aligned(2) __packed;
 
+/* Transmit descriptor 0 bits */
+enum TD_STS_BIT {
+	TD_TACT	= 0x80000000,
+	TD_TDLE	= 0x40000000,
+	TD_TFP1	= 0x20000000,
+	TD_TFP0	= 0x10000000,
+	TD_TFE	= 0x08000000,
+	TD_TWBI	= 0x04000000,
+};
+#define TDF1ST	TD_TFP1
+#define TDFEND	TD_TFP0
+#define TD_TFP	(TD_TFP1 | TD_TFP0)
+
+/* Transmit descriptor 1 bits */
+enum TD_LEN_BIT {
+	TD_TBL	= 0xffff0000,	/* transmit buffer length */
+};
+
 /* The sh ether Rx buffer descriptors.
  * This structure should be 20 bytes.
  */
@@ -461,6 +443,34 @@ struct sh_eth_rxdesc {
 	u32 pad0;		/* padding data */
 } __aligned(2) __packed;
 
+/* Receive descriptor 0 bits */
+enum RD_STS_BIT {
+	RD_RACT	= 0x80000000,
+	RD_RDLE	= 0x40000000,
+	RD_RFP1	= 0x20000000,
+	RD_RFP0	= 0x10000000,
+	RD_RFE	= 0x08000000,
+	RD_RFS10 = 0x00000200,
+	RD_RFS9	= 0x00000100,
+	RD_RFS8	= 0x00000080,
+	RD_RFS7	= 0x00000040,
+	RD_RFS6	= 0x00000020,
+	RD_RFS5	= 0x00000010,
+	RD_RFS4	= 0x00000008,
+	RD_RFS3	= 0x00000004,
+	RD_RFS2	= 0x00000002,
+	RD_RFS1	= 0x00000001,
+};
+#define RDF1ST	RD_RFP1
+#define RDFEND	RD_RFP0
+#define RD_RFP	(RD_RFP1 | RD_RFP0)
+
+/* Receive descriptor 1 bits */
+enum RD_LEN_BIT {
+	RD_RFL	= 0x0000ffff,	/* receive frame  length */
+	RD_RBL	= 0xffff0000,	/* receive buffer length */
+};
+
 /* This structure is used by each CPU dependency handling. */
 struct sh_eth_cpu_data {
 	/* mandatory functions */
-- 
cgit v1.2.3


From f5f2b3e4dcc0e944dc33b522df84576679fbd8eb Mon Sep 17 00:00:00 2001
From: Guangbin Huang <huangguangbin2@huawei.com>
Date: Fri, 12 Mar 2021 16:50:13 +0800
Subject: net: hns3: add support for imp-controlled PHYs

IMP(Intelligent Management Processor) firmware add a new feature
to take control of PHYs for some new devices, PF driver adds
support for this feature.

Driver queries device's capability to check whether IMP supports
this feature, it will tell IMP to enable this feature by firmware
compatible command if it is supported.

Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h        |   4 +
 drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c |   2 +
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c |   9 +-
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c |   4 +
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h |  27 ++++
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 149 ++++++++++++++++++++-
 6 files changed, 192 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 9e9076f9ba04..3a6bf1ab84e7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -653,6 +653,10 @@ struct hnae3_ae_ops {
 	int (*del_cls_flower)(struct hnae3_handle *handle,
 			      struct flow_cls_offload *cls_flower);
 	bool (*cls_flower_active)(struct hnae3_handle *handle);
+	int (*get_phy_link_ksettings)(struct hnae3_handle *handle,
+				      struct ethtool_link_ksettings *cmd);
+	int (*set_phy_link_ksettings)(struct hnae3_handle *handle,
+				      const struct ethtool_link_ksettings *cmd);
 };
 
 struct hnae3_dcb_ops {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index ded92ea50971..9d702bd0c7c1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -365,6 +365,8 @@ static void hns3_dbg_dev_caps(struct hnae3_handle *h)
 	dev_info(&h->pdev->dev, "support PAUSE: %s\n",
 		 test_bit(HNAE3_DEV_SUPPORT_PAUSE_B, ae_dev->caps) ?
 		 "yes" : "no");
+	dev_info(&h->pdev->dev, "support imp-controlled PHY: %s\n",
+		 test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, caps) ? "yes" : "no");
 }
 
 static void hns3_dbg_dev_specs(struct hnae3_handle *h)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 28afb278508c..a1d69c56d119 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -700,6 +700,7 @@ static int hns3_get_link_ksettings(struct net_device *netdev,
 				   struct ethtool_link_ksettings *cmd)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
 	const struct hnae3_ae_ops *ops;
 	u8 module_type;
 	u8 media_type;
@@ -730,7 +731,10 @@ static int hns3_get_link_ksettings(struct net_device *netdev,
 		break;
 	case HNAE3_MEDIA_TYPE_COPPER:
 		cmd->base.port = PORT_TP;
-		if (!netdev->phydev)
+		if (test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps) &&
+		    ops->get_phy_link_ksettings)
+			ops->get_phy_link_ksettings(h, cmd);
+		else if (!netdev->phydev)
 			hns3_get_ksettings(h, cmd);
 		else
 			phy_ethtool_ksettings_get(netdev->phydev, cmd);
@@ -823,6 +827,9 @@ static int hns3_set_link_ksettings(struct net_device *netdev,
 			return -EINVAL;
 
 		return phy_ethtool_ksettings_set(netdev->phydev, cmd);
+	} else if (test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps) &&
+		   ops->set_phy_link_ksettings) {
+		return ops->set_phy_link_ksettings(handle, cmd);
 	}
 
 	if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index 1c90c6b8a9a3..3284a2cb52e6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -385,6 +385,8 @@ static void hclge_parse_capability(struct hclge_dev *hdev,
 		set_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps);
 	if (hnae3_get_bit(caps, HCLGE_CAP_PAUSE_B))
 		set_bit(HNAE3_DEV_SUPPORT_PAUSE_B, ae_dev->caps);
+	if (hnae3_get_bit(caps, HCLGE_CAP_PHY_IMP_B))
+		set_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps);
 }
 
 static __le32 hclge_build_api_caps(void)
@@ -474,6 +476,8 @@ static int hclge_firmware_compat_config(struct hclge_dev *hdev)
 
 	hnae3_set_bit(compat, HCLGE_LINK_EVENT_REPORT_EN_B, 1);
 	hnae3_set_bit(compat, HCLGE_NCSI_ERROR_REPORT_EN_B, 1);
+	if (hnae3_dev_phy_imp_supported(hdev))
+		hnae3_set_bit(compat, HCLGE_PHY_IMP_EN_B, 1);
 	req->compat = cpu_to_le32(compat);
 
 	return hclge_cmd_send(&hdev->hw, &desc, 1);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 774a9ad32752..f45ceaa8126f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -303,6 +303,9 @@ enum hclge_opcode_type {
 	HCLGE_PPP_CMD1_INT_CMD		= 0x2101,
 	HCLGE_MAC_ETHERTYPE_IDX_RD      = 0x2105,
 	HCLGE_NCSI_INT_EN		= 0x2401,
+
+	/* PHY command */
+	HCLGE_OPC_PHY_LINK_KSETTING	= 0x7025,
 };
 
 #define HCLGE_TQP_REG_OFFSET		0x80000
@@ -1098,6 +1101,7 @@ struct hclge_query_ppu_pf_other_int_dfx_cmd {
 
 #define HCLGE_LINK_EVENT_REPORT_EN_B	0
 #define HCLGE_NCSI_ERROR_REPORT_EN_B	1
+#define HCLGE_PHY_IMP_EN_B		2
 struct hclge_firmware_compat_cmd {
 	__le32 compat;
 	u8 rsv[20];
@@ -1139,6 +1143,29 @@ struct hclge_dev_specs_1_cmd {
 	u8 rsv1[18];
 };
 
+#define HCLGE_PHY_LINK_SETTING_BD_NUM		2
+
+struct hclge_phy_link_ksetting_0_cmd {
+	__le32 speed;
+	u8 duplex;
+	u8 autoneg;
+	u8 eth_tp_mdix;
+	u8 eth_tp_mdix_ctrl;
+	u8 port;
+	u8 transceiver;
+	u8 phy_address;
+	u8 rsv;
+	__le32 supported;
+	__le32 advertising;
+	__le32 lp_advertising;
+};
+
+struct hclge_phy_link_ksetting_1_cmd {
+	u8 master_slave_cfg;
+	u8 master_slave_state;
+	u8 rsv[22];
+};
+
 int hclge_cmd_init(struct hclge_dev *hdev);
 static inline void hclge_write_reg(void __iomem *base, u32 reg, u32 value)
 {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 26269d66e210..a39afcde87b3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2994,6 +2994,141 @@ static int hclge_get_sfp_info(struct hclge_dev *hdev, struct hclge_mac *mac)
 	return 0;
 }
 
+static int hclge_get_phy_link_ksettings(struct hnae3_handle *handle,
+					struct ethtool_link_ksettings *cmd)
+{
+	struct hclge_desc desc[HCLGE_PHY_LINK_SETTING_BD_NUM];
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_phy_link_ksetting_0_cmd *req0;
+	struct hclge_phy_link_ksetting_1_cmd *req1;
+	u32 supported, advertising, lp_advertising;
+	struct hclge_dev *hdev = vport->back;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_PHY_LINK_KSETTING,
+				   true);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	hclge_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_PHY_LINK_KSETTING,
+				   true);
+
+	ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_PHY_LINK_SETTING_BD_NUM);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to get phy link ksetting, ret = %d.\n", ret);
+		return ret;
+	}
+
+	req0 = (struct hclge_phy_link_ksetting_0_cmd *)desc[0].data;
+	cmd->base.autoneg = req0->autoneg;
+	cmd->base.speed = le32_to_cpu(req0->speed);
+	cmd->base.duplex = req0->duplex;
+	cmd->base.port = req0->port;
+	cmd->base.transceiver = req0->transceiver;
+	cmd->base.phy_address = req0->phy_address;
+	cmd->base.eth_tp_mdix = req0->eth_tp_mdix;
+	cmd->base.eth_tp_mdix_ctrl = req0->eth_tp_mdix_ctrl;
+	supported = le32_to_cpu(req0->supported);
+	advertising = le32_to_cpu(req0->advertising);
+	lp_advertising = le32_to_cpu(req0->lp_advertising);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						advertising);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.lp_advertising,
+						lp_advertising);
+
+	req1 = (struct hclge_phy_link_ksetting_1_cmd *)desc[1].data;
+	cmd->base.master_slave_cfg = req1->master_slave_cfg;
+	cmd->base.master_slave_state = req1->master_slave_state;
+
+	return 0;
+}
+
+static int
+hclge_set_phy_link_ksettings(struct hnae3_handle *handle,
+			     const struct ethtool_link_ksettings *cmd)
+{
+	struct hclge_desc desc[HCLGE_PHY_LINK_SETTING_BD_NUM];
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_phy_link_ksetting_0_cmd *req0;
+	struct hclge_phy_link_ksetting_1_cmd *req1;
+	struct hclge_dev *hdev = vport->back;
+	u32 advertising;
+	int ret;
+
+	if (cmd->base.autoneg == AUTONEG_DISABLE &&
+	    ((cmd->base.speed != SPEED_100 && cmd->base.speed != SPEED_10) ||
+	     (cmd->base.duplex != DUPLEX_HALF &&
+	      cmd->base.duplex != DUPLEX_FULL)))
+		return -EINVAL;
+
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_PHY_LINK_KSETTING,
+				   false);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	hclge_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_PHY_LINK_KSETTING,
+				   false);
+
+	req0 = (struct hclge_phy_link_ksetting_0_cmd *)desc[0].data;
+	req0->autoneg = cmd->base.autoneg;
+	req0->speed = cpu_to_le32(cmd->base.speed);
+	req0->duplex = cmd->base.duplex;
+	ethtool_convert_link_mode_to_legacy_u32(&advertising,
+						cmd->link_modes.advertising);
+	req0->advertising = cpu_to_le32(advertising);
+	req0->eth_tp_mdix_ctrl = cmd->base.eth_tp_mdix_ctrl;
+
+	req1 = (struct hclge_phy_link_ksetting_1_cmd *)desc[1].data;
+	req1->master_slave_cfg = cmd->base.master_slave_cfg;
+
+	ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_PHY_LINK_SETTING_BD_NUM);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to set phy link ksettings, ret = %d.\n", ret);
+		return ret;
+	}
+
+	hdev->hw.mac.autoneg = cmd->base.autoneg;
+	hdev->hw.mac.speed = cmd->base.speed;
+	hdev->hw.mac.duplex = cmd->base.duplex;
+	linkmode_copy(hdev->hw.mac.advertising, cmd->link_modes.advertising);
+
+	return 0;
+}
+
+static int hclge_update_tp_port_info(struct hclge_dev *hdev)
+{
+	struct ethtool_link_ksettings cmd;
+	int ret;
+
+	if (!hnae3_dev_phy_imp_supported(hdev))
+		return 0;
+
+	ret = hclge_get_phy_link_ksettings(&hdev->vport->nic, &cmd);
+	if (ret)
+		return ret;
+
+	hdev->hw.mac.autoneg = cmd.base.autoneg;
+	hdev->hw.mac.speed = cmd.base.speed;
+	hdev->hw.mac.duplex = cmd.base.duplex;
+
+	return 0;
+}
+
+static int hclge_tp_port_init(struct hclge_dev *hdev)
+{
+	struct ethtool_link_ksettings cmd;
+
+	if (!hnae3_dev_phy_imp_supported(hdev))
+		return 0;
+
+	cmd.base.autoneg = hdev->hw.mac.autoneg;
+	cmd.base.speed = hdev->hw.mac.speed;
+	cmd.base.duplex = hdev->hw.mac.duplex;
+	linkmode_copy(cmd.link_modes.advertising, hdev->hw.mac.advertising);
+
+	return hclge_set_phy_link_ksettings(&hdev->vport->nic, &cmd);
+}
+
 static int hclge_update_port_info(struct hclge_dev *hdev)
 {
 	struct hclge_mac *mac = &hdev->hw.mac;
@@ -3002,7 +3137,7 @@ static int hclge_update_port_info(struct hclge_dev *hdev)
 
 	/* get the port info from SFP cmd if not copper port */
 	if (mac->media_type == HNAE3_MEDIA_TYPE_COPPER)
-		return 0;
+		return hclge_update_tp_port_info(hdev);
 
 	/* if IMP does not support get SFP/qSFP info, return directly */
 	if (!hdev->support_sfp_query)
@@ -10648,7 +10783,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 	if (ret)
 		goto err_msi_irq_uninit;
 
-	if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER) {
+	if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER &&
+	    !hnae3_dev_phy_imp_supported(hdev)) {
 		ret = hclge_mac_mdio_config(hdev);
 		if (ret)
 			goto err_msi_irq_uninit;
@@ -11041,6 +11177,13 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
 		return ret;
 	}
 
+	ret = hclge_tp_port_init(hdev);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to init tp port, ret = %d\n",
+			ret);
+		return ret;
+	}
+
 	ret = hclge_config_tso(hdev, HCLGE_TSO_MSS_MIN, HCLGE_TSO_MSS_MAX);
 	if (ret) {
 		dev_err(&pdev->dev, "Enable tso fail, ret =%d\n", ret);
@@ -11982,6 +12125,8 @@ static const struct hnae3_ae_ops hclge_ops = {
 	.add_cls_flower = hclge_add_cls_flower,
 	.del_cls_flower = hclge_del_cls_flower,
 	.cls_flower_active = hclge_is_cls_flower_active,
+	.get_phy_link_ksettings = hclge_get_phy_link_ksettings,
+	.set_phy_link_ksettings = hclge_set_phy_link_ksettings,
 };
 
 static struct hnae3_ae_algo ae_algo = {
-- 
cgit v1.2.3


From 57a8f46b1bd3f5f43b06f48aab7c1f7ca0936be3 Mon Sep 17 00:00:00 2001
From: Guangbin Huang <huangguangbin2@huawei.com>
Date: Fri, 12 Mar 2021 16:50:14 +0800
Subject: net: hns3: add get/set pause parameters support for imp-controlled
 PHYs

When the imp-controlled PHYs feature is enabled, phydev is NULL.
In this case, the autoneg is always off when user uses ethtool -a
command to get pause parameters because  hclge_get_pauseparam()
uses phydev to check whether device is TP port. To fit this new
feature, use media type to check whether device is TP port.

And when user set pause parameters, these parameters need to
always set to mac, no matter whether autoneg is off.

Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index a39afcde87b3..dbca489445df 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -10158,9 +10158,10 @@ static void hclge_get_pauseparam(struct hnae3_handle *handle, u32 *auto_neg,
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
-	struct phy_device *phydev = hdev->hw.mac.phydev;
+	u8 media_type = hdev->hw.mac.media_type;
 
-	*auto_neg = phydev ? hclge_get_autoneg(handle) : 0;
+	*auto_neg = (media_type == HNAE3_MEDIA_TYPE_COPPER) ?
+		    hclge_get_autoneg(handle) : 0;
 
 	if (hdev->tm_info.fc_mode == HCLGE_FC_PFC) {
 		*rx_en = 0;
@@ -10206,7 +10207,7 @@ static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg,
 	struct phy_device *phydev = hdev->hw.mac.phydev;
 	u32 fc_autoneg;
 
-	if (phydev) {
+	if (phydev || hnae3_dev_phy_imp_supported(hdev)) {
 		fc_autoneg = hclge_get_autoneg(handle);
 		if (auto_neg != fc_autoneg) {
 			dev_info(&hdev->pdev->dev,
@@ -10225,7 +10226,7 @@ static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg,
 
 	hclge_record_user_pauseparam(hdev, rx_en, tx_en);
 
-	if (!auto_neg)
+	if (!auto_neg || hnae3_dev_phy_imp_supported(hdev))
 		return hclge_cfg_pauseparam(hdev, rx_en, tx_en);
 
 	if (phydev)
-- 
cgit v1.2.3


From 024712f51e5711d69ced729fb3398819ed6e8b53 Mon Sep 17 00:00:00 2001
From: Guangbin Huang <huangguangbin2@huawei.com>
Date: Fri, 12 Mar 2021 16:50:15 +0800
Subject: net: hns3: add ioctl support for imp-controlled PHYs

When the imp-controlled PHYs feature is enabled, driver will not
register mdio bus. In order to support ioctl ops for phy tool to
read or write phy register in this case, the firmware implement
a new command for driver and driver implement ioctl by using this
new command.

Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h |  8 +++++
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 25 +++++++++++++-
 .../ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c    | 39 ++++++++++++++++++++++
 .../ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h    |  2 ++
 4 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index f45ceaa8126f..abeacc990a9b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -306,6 +306,7 @@ enum hclge_opcode_type {
 
 	/* PHY command */
 	HCLGE_OPC_PHY_LINK_KSETTING	= 0x7025,
+	HCLGE_OPC_PHY_REG		= 0x7026,
 };
 
 #define HCLGE_TQP_REG_OFFSET		0x80000
@@ -1166,6 +1167,13 @@ struct hclge_phy_link_ksetting_1_cmd {
 	u8 rsv[22];
 };
 
+struct hclge_phy_reg_cmd {
+	__le16 reg_addr;
+	u8 rsv0[2];
+	__le16 reg_val;
+	u8 rsv1[18];
+};
+
 int hclge_cmd_init(struct hclge_dev *hdev);
 static inline void hclge_write_reg(void __iomem *base, u32 reg, u32 value)
 {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index dbca489445df..adc2ec7265e6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -8904,6 +8904,29 @@ static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p,
 	return 0;
 }
 
+static int hclge_mii_ioctl(struct hclge_dev *hdev, struct ifreq *ifr, int cmd)
+{
+	struct mii_ioctl_data *data = if_mii(ifr);
+
+	if (!hnae3_dev_phy_imp_supported(hdev))
+		return -EOPNOTSUPP;
+
+	switch (cmd) {
+	case SIOCGMIIPHY:
+		data->phy_id = hdev->hw.mac.phy_addr;
+		/* this command reads phy id and register at the same time */
+		fallthrough;
+	case SIOCGMIIREG:
+		data->val_out = hclge_read_phy_reg(hdev, data->reg_num);
+		return 0;
+
+	case SIOCSMIIREG:
+		return hclge_write_phy_reg(hdev, data->reg_num, data->val_in);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static int hclge_do_ioctl(struct hnae3_handle *handle, struct ifreq *ifr,
 			  int cmd)
 {
@@ -8911,7 +8934,7 @@ static int hclge_do_ioctl(struct hnae3_handle *handle, struct ifreq *ifr,
 	struct hclge_dev *hdev = vport->back;
 
 	if (!hdev->hw.mac.phydev)
-		return -EOPNOTSUPP;
+		return hclge_mii_ioctl(hdev, ifr, cmd);
 
 	return phy_mii_ioctl(hdev->hw.mac.phydev, ifr, cmd);
 }
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
index e89820702540..08e88d9422cd 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
@@ -268,3 +268,42 @@ void hclge_mac_stop_phy(struct hclge_dev *hdev)
 
 	phy_stop(phydev);
 }
+
+u16 hclge_read_phy_reg(struct hclge_dev *hdev, u16 reg_addr)
+{
+	struct hclge_phy_reg_cmd *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PHY_REG, true);
+
+	req = (struct hclge_phy_reg_cmd *)desc.data;
+	req->reg_addr = cpu_to_le16(reg_addr);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"failed to read phy reg, ret = %d.\n", ret);
+
+	return le16_to_cpu(req->reg_val);
+}
+
+int hclge_write_phy_reg(struct hclge_dev *hdev, u16 reg_addr, u16 val)
+{
+	struct hclge_phy_reg_cmd *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PHY_REG, false);
+
+	req = (struct hclge_phy_reg_cmd *)desc.data;
+	req->reg_addr = cpu_to_le16(reg_addr);
+	req->reg_val = cpu_to_le16(val);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"failed to write phy reg, ret = %d.\n", ret);
+
+	return ret;
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h
index dd9a1218a7b0..fd0e20190b90 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h
@@ -9,5 +9,7 @@ int hclge_mac_connect_phy(struct hnae3_handle *handle);
 void hclge_mac_disconnect_phy(struct hnae3_handle *handle);
 void hclge_mac_start_phy(struct hclge_dev *hdev);
 void hclge_mac_stop_phy(struct hclge_dev *hdev);
+u16 hclge_read_phy_reg(struct hclge_dev *hdev, u16 reg_addr);
+int hclge_write_phy_reg(struct hclge_dev *hdev, u16 reg_addr, u16 val);
 
 #endif
-- 
cgit v1.2.3


From b47cfe1f402dbf10279b8f12131388fdff9d2259 Mon Sep 17 00:00:00 2001
From: Guangbin Huang <huangguangbin2@huawei.com>
Date: Fri, 12 Mar 2021 16:50:16 +0800
Subject: net: hns3: add phy loopback support for imp-controlled PHYs

If the imp-controlled PHYs feature is enabled, driver can not
call phy driver interface to set loopback anymore and needs
to send command to firmware to start phy loopback.

Driver reuses the existing firmware command 0x0315 to start
phy loopback, just add a setting bit in this command. As this
command is not only for serdes loopback anymore, rename this
command to "xxx_COMMON_LOOPBACK", and modify function name,
macro name and logs related to it.

Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h |  9 ++--
 .../ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c | 20 +++++---
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 58 ++++++++++++----------
 3 files changed, 51 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index abeacc990a9b..804f4c8360cf 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -127,7 +127,7 @@ enum hclge_opcode_type {
 	HCLGE_OPC_QUERY_MAC_TNL_INT	= 0x0310,
 	HCLGE_OPC_MAC_TNL_INT_EN	= 0x0311,
 	HCLGE_OPC_CLEAR_MAC_TNL_INT	= 0x0312,
-	HCLGE_OPC_SERDES_LOOPBACK       = 0x0315,
+	HCLGE_OPC_COMMON_LOOPBACK       = 0x0315,
 	HCLGE_OPC_CONFIG_FEC_MODE	= 0x031A,
 
 	/* PFC/Pause commands */
@@ -964,9 +964,10 @@ struct hclge_pf_rst_done_cmd {
 
 #define HCLGE_CMD_SERDES_SERIAL_INNER_LOOP_B	BIT(0)
 #define HCLGE_CMD_SERDES_PARALLEL_INNER_LOOP_B	BIT(2)
-#define HCLGE_CMD_SERDES_DONE_B			BIT(0)
-#define HCLGE_CMD_SERDES_SUCCESS_B		BIT(1)
-struct hclge_serdes_lb_cmd {
+#define HCLGE_CMD_GE_PHY_INNER_LOOP_B		BIT(3)
+#define HCLGE_CMD_COMMON_LB_DONE_B		BIT(0)
+#define HCLGE_CMD_COMMON_LB_SUCCESS_B		BIT(1)
+struct hclge_common_lb_cmd {
 	u8 mask;
 	u8 enable;
 	u8 result;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 6b1d197df881..1c699131e8df 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -1546,13 +1546,13 @@ static void hclge_dbg_dump_loopback(struct hclge_dev *hdev,
 {
 	struct phy_device *phydev = hdev->hw.mac.phydev;
 	struct hclge_config_mac_mode_cmd *req_app;
-	struct hclge_serdes_lb_cmd *req_serdes;
+	struct hclge_common_lb_cmd *req_common;
 	struct hclge_desc desc;
 	u8 loopback_en;
 	int ret;
 
 	req_app = (struct hclge_config_mac_mode_cmd *)desc.data;
-	req_serdes = (struct hclge_serdes_lb_cmd *)desc.data;
+	req_common = (struct hclge_common_lb_cmd *)desc.data;
 
 	dev_info(&hdev->pdev->dev, "mac id: %u\n", hdev->hw.mac.mac_id);
 
@@ -1569,27 +1569,33 @@ static void hclge_dbg_dump_loopback(struct hclge_dev *hdev,
 	dev_info(&hdev->pdev->dev, "app loopback: %s\n",
 		 loopback_en ? "on" : "off");
 
-	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_SERDES_LOOPBACK, true);
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_COMMON_LOOPBACK, true);
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
-			"failed to dump serdes loopback status, ret = %d\n",
+			"failed to dump common loopback status, ret = %d\n",
 			ret);
 		return;
 	}
 
-	loopback_en = req_serdes->enable & HCLGE_CMD_SERDES_SERIAL_INNER_LOOP_B;
+	loopback_en = req_common->enable & HCLGE_CMD_SERDES_SERIAL_INNER_LOOP_B;
 	dev_info(&hdev->pdev->dev, "serdes serial loopback: %s\n",
 		 loopback_en ? "on" : "off");
 
-	loopback_en = req_serdes->enable &
+	loopback_en = req_common->enable &
 			HCLGE_CMD_SERDES_PARALLEL_INNER_LOOP_B;
 	dev_info(&hdev->pdev->dev, "serdes parallel loopback: %s\n",
 		 loopback_en ? "on" : "off");
 
-	if (phydev)
+	if (phydev) {
 		dev_info(&hdev->pdev->dev, "phy loopback: %s\n",
 			 phydev->loopback_enabled ? "on" : "off");
+	} else if (hnae3_dev_phy_imp_supported(hdev)) {
+		loopback_en = req_common->enable &
+			      HCLGE_CMD_GE_PHY_INNER_LOOP_B;
+		dev_info(&hdev->pdev->dev, "phy loopback: %s\n",
+			 loopback_en ? "on" : "off");
+	}
 }
 
 /* hclge_dbg_dump_mac_tnl_status: print message about mac tnl interrupt
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index adc2ec7265e6..a664383271c8 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -751,8 +751,9 @@ static int hclge_get_sset_count(struct hnae3_handle *handle, int stringset)
 		handle->flags |= HNAE3_SUPPORT_SERDES_SERIAL_LOOPBACK;
 		handle->flags |= HNAE3_SUPPORT_SERDES_PARALLEL_LOOPBACK;
 
-		if (hdev->hw.mac.phydev && hdev->hw.mac.phydev->drv &&
-		    hdev->hw.mac.phydev->drv->set_loopback) {
+		if ((hdev->hw.mac.phydev && hdev->hw.mac.phydev->drv &&
+		     hdev->hw.mac.phydev->drv->set_loopback) ||
+		    hnae3_dev_phy_imp_supported(hdev)) {
 			count += 1;
 			handle->flags |= HNAE3_SUPPORT_PHY_LOOPBACK;
 		}
@@ -7270,19 +7271,19 @@ static int hclge_set_app_loopback(struct hclge_dev *hdev, bool en)
 	return ret;
 }
 
-static int hclge_cfg_serdes_loopback(struct hclge_dev *hdev, bool en,
+static int hclge_cfg_common_loopback(struct hclge_dev *hdev, bool en,
 				     enum hnae3_loop loop_mode)
 {
-#define HCLGE_SERDES_RETRY_MS	10
-#define HCLGE_SERDES_RETRY_NUM	100
+#define HCLGE_COMMON_LB_RETRY_MS	10
+#define HCLGE_COMMON_LB_RETRY_NUM	100
 
-	struct hclge_serdes_lb_cmd *req;
+	struct hclge_common_lb_cmd *req;
 	struct hclge_desc desc;
 	int ret, i = 0;
 	u8 loop_mode_b;
 
-	req = (struct hclge_serdes_lb_cmd *)desc.data;
-	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_SERDES_LOOPBACK, false);
+	req = (struct hclge_common_lb_cmd *)desc.data;
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_COMMON_LOOPBACK, false);
 
 	switch (loop_mode) {
 	case HNAE3_LOOP_SERIAL_SERDES:
@@ -7291,9 +7292,12 @@ static int hclge_cfg_serdes_loopback(struct hclge_dev *hdev, bool en,
 	case HNAE3_LOOP_PARALLEL_SERDES:
 		loop_mode_b = HCLGE_CMD_SERDES_PARALLEL_INNER_LOOP_B;
 		break;
+	case HNAE3_LOOP_PHY:
+		loop_mode_b = HCLGE_CMD_GE_PHY_INNER_LOOP_B;
+		break;
 	default:
 		dev_err(&hdev->pdev->dev,
-			"unsupported serdes loopback mode %d\n", loop_mode);
+			"unsupported common loopback mode %d\n", loop_mode);
 		return -ENOTSUPP;
 	}
 
@@ -7307,39 +7311,39 @@ static int hclge_cfg_serdes_loopback(struct hclge_dev *hdev, bool en,
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
-			"serdes loopback set fail, ret = %d\n", ret);
+			"common loopback set fail, ret = %d\n", ret);
 		return ret;
 	}
 
 	do {
-		msleep(HCLGE_SERDES_RETRY_MS);
-		hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_SERDES_LOOPBACK,
+		msleep(HCLGE_COMMON_LB_RETRY_MS);
+		hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_COMMON_LOOPBACK,
 					   true);
 		ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 		if (ret) {
 			dev_err(&hdev->pdev->dev,
-				"serdes loopback get, ret = %d\n", ret);
+				"common loopback get, ret = %d\n", ret);
 			return ret;
 		}
-	} while (++i < HCLGE_SERDES_RETRY_NUM &&
-		 !(req->result & HCLGE_CMD_SERDES_DONE_B));
+	} while (++i < HCLGE_COMMON_LB_RETRY_NUM &&
+		 !(req->result & HCLGE_CMD_COMMON_LB_DONE_B));
 
-	if (!(req->result & HCLGE_CMD_SERDES_DONE_B)) {
-		dev_err(&hdev->pdev->dev, "serdes loopback set timeout\n");
+	if (!(req->result & HCLGE_CMD_COMMON_LB_DONE_B)) {
+		dev_err(&hdev->pdev->dev, "common loopback set timeout\n");
 		return -EBUSY;
-	} else if (!(req->result & HCLGE_CMD_SERDES_SUCCESS_B)) {
-		dev_err(&hdev->pdev->dev, "serdes loopback set failed in fw\n");
+	} else if (!(req->result & HCLGE_CMD_COMMON_LB_SUCCESS_B)) {
+		dev_err(&hdev->pdev->dev, "common loopback set failed in fw\n");
 		return -EIO;
 	}
 	return ret;
 }
 
-static int hclge_set_serdes_loopback(struct hclge_dev *hdev, bool en,
+static int hclge_set_common_loopback(struct hclge_dev *hdev, bool en,
 				     enum hnae3_loop loop_mode)
 {
 	int ret;
 
-	ret = hclge_cfg_serdes_loopback(hdev, en, loop_mode);
+	ret = hclge_cfg_common_loopback(hdev, en, loop_mode);
 	if (ret)
 		return ret;
 
@@ -7388,8 +7392,12 @@ static int hclge_set_phy_loopback(struct hclge_dev *hdev, bool en)
 	struct phy_device *phydev = hdev->hw.mac.phydev;
 	int ret;
 
-	if (!phydev)
+	if (!phydev) {
+		if (hnae3_dev_phy_imp_supported(hdev))
+			return hclge_set_common_loopback(hdev, en,
+							 HNAE3_LOOP_PHY);
 		return -ENOTSUPP;
+	}
 
 	if (en)
 		ret = hclge_enable_phy_loopback(hdev, phydev);
@@ -7460,7 +7468,7 @@ static int hclge_set_loopback(struct hnae3_handle *handle,
 		break;
 	case HNAE3_LOOP_SERIAL_SERDES:
 	case HNAE3_LOOP_PARALLEL_SERDES:
-		ret = hclge_set_serdes_loopback(hdev, en, loop_mode);
+		ret = hclge_set_common_loopback(hdev, en, loop_mode);
 		break;
 	case HNAE3_LOOP_PHY:
 		ret = hclge_set_phy_loopback(hdev, en);
@@ -7493,11 +7501,11 @@ static int hclge_set_default_loopback(struct hclge_dev *hdev)
 	if (ret)
 		return ret;
 
-	ret = hclge_cfg_serdes_loopback(hdev, false, HNAE3_LOOP_SERIAL_SERDES);
+	ret = hclge_cfg_common_loopback(hdev, false, HNAE3_LOOP_SERIAL_SERDES);
 	if (ret)
 		return ret;
 
-	return hclge_cfg_serdes_loopback(hdev, false,
+	return hclge_cfg_common_loopback(hdev, false,
 					 HNAE3_LOOP_PARALLEL_SERDES);
 }
 
-- 
cgit v1.2.3


From 25660156f4cc4cf0cb55deda69f999dab554b750 Mon Sep 17 00:00:00 2001
From: Xingfeng Hu <xingfeng.hu@corigine.com>
Date: Fri, 12 Mar 2021 15:08:29 +0100
Subject: flow_offload: add support for packet-per-second policing

Allow flow_offload API to configure packet-per-second policing using rate
and burst parameters.

Dummy implementations of tcf_police_rate_pkt_ps() and
tcf_police_burst_pkt() are supplied which return 0, the unconfigured state.
This is to facilitate splitting the offload, driver, and TC code portion of
this feature into separate patches with the aim of providing a logical flow
for review. And the implementation of these helpers will be filled out by a
follow-up patch.

Signed-off-by: Xingfeng Hu <xingfeng.hu@corigine.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Louis Peens <louis.peens@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_offload.h     |  2 ++
 include/net/tc_act/tc_police.h | 12 ++++++++++++
 net/sched/cls_api.c            |  3 +++
 3 files changed, 17 insertions(+)

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index e6bd8ebf9ac3..fde025c57b4f 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -234,6 +234,8 @@ struct flow_action_entry {
 			u32			index;
 			u32			burst;
 			u64			rate_bytes_ps;
+			u64			burst_pkt;
+			u64			rate_pkt_ps;
 			u32			mtu;
 		} police;
 		struct {				/* FLOW_ACTION_CT */
diff --git a/include/net/tc_act/tc_police.h b/include/net/tc_act/tc_police.h
index 6d1e26b709b5..ae117f7937d5 100644
--- a/include/net/tc_act/tc_police.h
+++ b/include/net/tc_act/tc_police.h
@@ -97,6 +97,18 @@ static inline u32 tcf_police_burst(const struct tc_action *act)
 	return burst;
 }
 
+static inline u64 tcf_police_rate_pkt_ps(const struct tc_action *act)
+{
+	/* Not implemented */
+	return 0;
+}
+
+static inline u32 tcf_police_burst_pkt(const struct tc_action *act)
+{
+	/* Not implemented */
+	return 0;
+}
+
 static inline u32 tcf_police_tcfp_mtu(const struct tc_action *act)
 {
 	struct tcf_police *police = to_police(act);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index e37556cc37ab..ca8e177bf31b 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3661,6 +3661,9 @@ int tc_setup_flow_action(struct flow_action *flow_action,
 			entry->police.burst = tcf_police_burst(act);
 			entry->police.rate_bytes_ps =
 				tcf_police_rate_bytes_ps(act);
+			entry->police.burst_pkt = tcf_police_burst_pkt(act);
+			entry->police.rate_pkt_ps =
+				tcf_police_rate_pkt_ps(act);
 			entry->police.mtu = tcf_police_tcfp_mtu(act);
 			entry->police.index = act->tcfa_index;
 		} else if (is_tcf_ct(act)) {
-- 
cgit v1.2.3


From 6a56e19902af01da447cd3104d5a6e8d01792ee1 Mon Sep 17 00:00:00 2001
From: Baowen Zheng <baowen.zheng@corigine.com>
Date: Fri, 12 Mar 2021 15:08:30 +0100
Subject: flow_offload: reject configuration of packet-per-second policing in
 offload drivers

A follow-up patch will allow users to configures packet-per-second policing
in the software datapath. In preparation for this, teach all drivers that
support offload of the policer action to reject such configuration as
currently none of them support it.

Signed-off-by: Baowen Zheng <baowen.zheng@corigine.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Louis Peens <louis.peens@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_flower.c               |  6 ++++++
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c | 11 ++++++++++-
 drivers/net/ethernet/freescale/enetc/enetc_qos.c       |  5 +++++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c        |  4 ++++
 drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c  |  5 +++++
 drivers/net/ethernet/mscc/ocelot_flower.c              |  5 +++++
 drivers/net/ethernet/mscc/ocelot_net.c                 |  6 ++++++
 drivers/net/ethernet/netronome/nfp/flower/qos_conf.c   |  5 +++++
 8 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_flower.c b/drivers/net/dsa/sja1105/sja1105_flower.c
index 12e76020bea3..f78b767f86ee 100644
--- a/drivers/net/dsa/sja1105/sja1105_flower.c
+++ b/drivers/net/dsa/sja1105/sja1105_flower.c
@@ -322,6 +322,12 @@ int sja1105_cls_flower_add(struct dsa_switch *ds, int port,
 	flow_action_for_each(i, act, &rule->action) {
 		switch (act->id) {
 		case FLOW_ACTION_POLICE:
+			if (act->police.rate_pkt_ps) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "QoS offload not support packets per second");
+				goto out;
+			}
+
 			rc = sja1105_flower_policer(priv, port, extack, cookie,
 						    &key,
 						    act->police.rate_bytes_ps,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c
index 2e309f6673f7..28fd2de9e4cf 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c
@@ -48,6 +48,11 @@ static int cxgb4_matchall_egress_validate(struct net_device *dev,
 	flow_action_for_each(i, entry, actions) {
 		switch (entry->id) {
 		case FLOW_ACTION_POLICE:
+			if (entry->police.rate_pkt_ps) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "QoS offload not support packets per second");
+				return -EOPNOTSUPP;
+			}
 			/* Convert bytes per second to bits per second */
 			if (entry->police.rate_bytes_ps * 8 > max_link_rate) {
 				NL_SET_ERR_MSG_MOD(extack,
@@ -145,7 +150,11 @@ static int cxgb4_matchall_alloc_tc(struct net_device *dev,
 	flow_action_for_each(i, entry, &cls->rule->action)
 		if (entry->id == FLOW_ACTION_POLICE)
 			break;
-
+	if (entry->police.rate_pkt_ps) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "QoS offload not support packets per second");
+		return -EOPNOTSUPP;
+	}
 	/* Convert from bytes per second to Kbps */
 	p.u.params.maxrate = div_u64(entry->police.rate_bytes_ps * 8, 1000);
 	p.u.params.channel = pi->tx_chan;
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
index a9aee219fb58..cb7fa4bceaf2 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
@@ -1221,6 +1221,11 @@ static int enetc_psfp_parse_clsflower(struct enetc_ndev_priv *priv,
 
 	/* Flow meter and max frame size */
 	if (entryp) {
+		if (entryp->police.rate_pkt_ps) {
+			NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second");
+			err = -EOPNOTSUPP;
+			goto free_sfi;
+		}
 		if (entryp->police.burst) {
 			fmi = kzalloc(sizeof(*fmi), GFP_KERNEL);
 			if (!fmi) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 54ea0dae7ded..78fc27fc4e37 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -4538,6 +4538,10 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
 	flow_action_for_each(i, act, flow_action) {
 		switch (act->id) {
 		case FLOW_ACTION_POLICE:
+			if (act->police.rate_pkt_ps) {
+				NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second");
+				return -EOPNOTSUPP;
+			}
 			err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
 			if (err)
 				return err;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 41855e58564b..ea637fa552f5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -190,6 +190,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
 				return -EOPNOTSUPP;
 			}
 
+			if (act->police.rate_pkt_ps) {
+				NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second");
+				return -EOPNOTSUPP;
+			}
+
 			/* The kernel might adjust the requested burst size so
 			 * that it is not exactly a power of two. Re-adjust it
 			 * here since the hardware only supports burst sizes
diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c
index a41b458b1b3e..8b843d3c9189 100644
--- a/drivers/net/ethernet/mscc/ocelot_flower.c
+++ b/drivers/net/ethernet/mscc/ocelot_flower.c
@@ -220,6 +220,11 @@ static int ocelot_flower_parse_action(struct ocelot *ocelot, int port,
 						   "Last action must be GOTO");
 				return -EOPNOTSUPP;
 			}
+			if (a->police.rate_pkt_ps) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "QoS offload not support packets per second");
+				return -EOPNOTSUPP;
+			}
 			filter->action.police_ena = true;
 			rate = a->police.rate_bytes_ps;
 			filter->action.pol.rate = div_u64(rate, 1000) * 8;
diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
index 12cb6867a2d0..c08164cd88f4 100644
--- a/drivers/net/ethernet/mscc/ocelot_net.c
+++ b/drivers/net/ethernet/mscc/ocelot_net.c
@@ -251,6 +251,12 @@ static int ocelot_setup_tc_cls_matchall(struct ocelot_port_private *priv,
 			return -EEXIST;
 		}
 
+		if (action->police.rate_pkt_ps) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "QoS offload not support packets per second");
+			return -EOPNOTSUPP;
+		}
+
 		pol.rate = (u32)div_u64(action->police.rate_bytes_ps, 1000) * 8;
 		pol.burst = action->police.burst;
 
diff --git a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c
index d4ce8f9ef3cc..88bea6ad59bc 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c
@@ -104,6 +104,11 @@ nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev,
 		return -EOPNOTSUPP;
 	}
 
+	if (action->police.rate_pkt_ps) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload not support packets per second");
+		return -EOPNOTSUPP;
+	}
+
 	rate = action->police.rate_bytes_ps;
 	burst = action->police.burst;
 	netdev_port_id = nfp_repr_get_port_id(netdev);
-- 
cgit v1.2.3


From 2ffe0395288aa237ff7e0143366bd1cd57bfc5b7 Mon Sep 17 00:00:00 2001
From: Baowen Zheng <baowen.zheng@corigine.com>
Date: Fri, 12 Mar 2021 15:08:31 +0100
Subject: net/sched: act_police: add support for packet-per-second policing

Allow a policer action to enforce a rate-limit based on packets-per-second,
configurable using a packet-per-second rate and burst parameters.

e.g.
tc filter add dev tap1 parent ffff: u32 match \
        u32 0 0 police pkts_rate 3000 pkts_burst 1000

Testing was unable to uncover a performance impact of this change on
existing features.

Signed-off-by: Baowen Zheng <baowen.zheng@corigine.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Louis Peens <louis.peens@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h      | 14 ++++++++
 include/net/tc_act/tc_police.h | 48 ++++++++++++++++++++++++---
 include/uapi/linux/pkt_cls.h   |  2 ++
 net/sched/act_police.c         | 59 +++++++++++++++++++++++++++++----
 net/sched/sch_generic.c        | 75 ++++++++++++++++++++++++++++--------------
 5 files changed, 162 insertions(+), 36 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 2d6eb60c58c8..f7a6e14491fb 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -1242,6 +1242,20 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res,
 	res->linklayer = (r->linklayer & TC_LINKLAYER_MASK);
 }
 
+struct psched_pktrate {
+	u64	rate_pkts_ps; /* packets per second */
+	u32	mult;
+	u8	shift;
+};
+
+static inline u64 psched_pkt2t_ns(const struct psched_pktrate *r,
+				  unsigned int pkt_num)
+{
+	return ((u64)pkt_num * r->mult) >> r->shift;
+}
+
+void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64);
+
 /* Mini Qdisc serves for specific needs of ingress/clsact Qdisc.
  * The fast path only needs to access filter list and to update stats
  */
diff --git a/include/net/tc_act/tc_police.h b/include/net/tc_act/tc_police.h
index ae117f7937d5..72649512dcdd 100644
--- a/include/net/tc_act/tc_police.h
+++ b/include/net/tc_act/tc_police.h
@@ -10,10 +10,13 @@ struct tcf_police_params {
 	s64			tcfp_burst;
 	u32			tcfp_mtu;
 	s64			tcfp_mtu_ptoks;
+	s64			tcfp_pkt_burst;
 	struct psched_ratecfg	rate;
 	bool			rate_present;
 	struct psched_ratecfg	peak;
 	bool			peak_present;
+	struct psched_pktrate	ppsrate;
+	bool			pps_present;
 	struct rcu_head rcu;
 };
 
@@ -24,6 +27,7 @@ struct tcf_police {
 	spinlock_t		tcfp_lock ____cacheline_aligned_in_smp;
 	s64			tcfp_toks;
 	s64			tcfp_ptoks;
+	s64			tcfp_pkttoks;
 	s64			tcfp_t_c;
 };
 
@@ -99,14 +103,50 @@ static inline u32 tcf_police_burst(const struct tc_action *act)
 
 static inline u64 tcf_police_rate_pkt_ps(const struct tc_action *act)
 {
-	/* Not implemented */
-	return 0;
+	struct tcf_police *police = to_police(act);
+	struct tcf_police_params *params;
+
+	params = rcu_dereference_protected(police->params,
+					   lockdep_is_held(&police->tcf_lock));
+	return params->ppsrate.rate_pkts_ps;
 }
 
 static inline u32 tcf_police_burst_pkt(const struct tc_action *act)
 {
-	/* Not implemented */
-	return 0;
+	struct tcf_police *police = to_police(act);
+	struct tcf_police_params *params;
+	u32 burst;
+
+	params = rcu_dereference_protected(police->params,
+					   lockdep_is_held(&police->tcf_lock));
+
+	/*
+	 *  "rate" pkts     "burst" nanoseconds
+	 *  ------------ *  -------------------
+	 *    1 second          2^6 ticks
+	 *
+	 * ------------------------------------
+	 *        NSEC_PER_SEC nanoseconds
+	 *        ------------------------
+	 *              2^6 ticks
+	 *
+	 *    "rate" pkts    "burst" nanoseconds            2^6 ticks
+	 *  = ------------ * ------------------- * ------------------------
+	 *      1 second          2^6 ticks        NSEC_PER_SEC nanoseconds
+	 *
+	 *   "rate" * "burst"
+	 * = ---------------- pkts/nanosecond
+	 *    NSEC_PER_SEC^2
+	 *
+	 *
+	 *   "rate" * "burst"
+	 * = ---------------- pkts/second
+	 *     NSEC_PER_SEC
+	 */
+	burst = div_u64(params->tcfp_pkt_burst * params->ppsrate.rate_pkts_ps,
+			NSEC_PER_SEC);
+
+	return burst;
 }
 
 static inline u32 tcf_police_tcfp_mtu(const struct tc_action *act)
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 7ea59cfe1fa7..025c40fef93d 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -190,6 +190,8 @@ enum {
 	TCA_POLICE_PAD,
 	TCA_POLICE_RATE64,
 	TCA_POLICE_PEAKRATE64,
+	TCA_POLICE_PKTRATE64,
+	TCA_POLICE_PKTBURST64,
 	__TCA_POLICE_MAX
 #define TCA_POLICE_RESULT TCA_POLICE_RESULT
 };
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 8d8452b1cdd4..0fab8de176d2 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -42,6 +42,8 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
 	[TCA_POLICE_RESULT]	= { .type = NLA_U32 },
 	[TCA_POLICE_RATE64]     = { .type = NLA_U64 },
 	[TCA_POLICE_PEAKRATE64] = { .type = NLA_U64 },
+	[TCA_POLICE_PKTRATE64]  = { .type = NLA_U64, .min = 1 },
+	[TCA_POLICE_PKTBURST64] = { .type = NLA_U64, .min = 1 },
 };
 
 static int tcf_police_init(struct net *net, struct nlattr *nla,
@@ -61,6 +63,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
 	bool exists = false;
 	u32 index;
 	u64 rate64, prate64;
+	u64 pps, ppsburst;
 
 	if (nla == NULL)
 		return -EINVAL;
@@ -142,6 +145,21 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
 		}
 	}
 
+	if ((tb[TCA_POLICE_PKTRATE64] && !tb[TCA_POLICE_PKTBURST64]) ||
+	    (!tb[TCA_POLICE_PKTRATE64] && tb[TCA_POLICE_PKTBURST64])) {
+		NL_SET_ERR_MSG(extack,
+			       "Both or neither packet-per-second burst and rate must be provided");
+		err = -EINVAL;
+		goto failure;
+	}
+
+	if (tb[TCA_POLICE_PKTRATE64] && R_tab) {
+		NL_SET_ERR_MSG(extack,
+			       "packet-per-second and byte-per-second rate limits not allowed in same action");
+		err = -EINVAL;
+		goto failure;
+	}
+
 	new = kzalloc(sizeof(*new), GFP_KERNEL);
 	if (unlikely(!new)) {
 		err = -ENOMEM;
@@ -183,6 +201,14 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
 	if (tb[TCA_POLICE_AVRATE])
 		new->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
 
+	if (tb[TCA_POLICE_PKTRATE64]) {
+		pps = nla_get_u64(tb[TCA_POLICE_PKTRATE64]);
+		ppsburst = nla_get_u64(tb[TCA_POLICE_PKTBURST64]);
+		new->pps_present = true;
+		new->tcfp_pkt_burst = PSCHED_TICKS2NS(ppsburst);
+		psched_ppscfg_precompute(&new->ppsrate, pps);
+	}
+
 	spin_lock_bh(&police->tcf_lock);
 	spin_lock_bh(&police->tcfp_lock);
 	police->tcfp_t_c = ktime_get_ns();
@@ -217,8 +243,8 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
 			  struct tcf_result *res)
 {
 	struct tcf_police *police = to_police(a);
+	s64 now, toks, ppstoks = 0, ptoks = 0;
 	struct tcf_police_params *p;
-	s64 now, toks, ptoks = 0;
 	int ret;
 
 	tcf_lastuse_update(&police->tcf_tm);
@@ -236,7 +262,7 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
 	}
 
 	if (qdisc_pkt_len(skb) <= p->tcfp_mtu) {
-		if (!p->rate_present) {
+		if (!p->rate_present && !p->pps_present) {
 			ret = p->tcfp_result;
 			goto end;
 		}
@@ -251,14 +277,23 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
 			ptoks -= (s64)psched_l2t_ns(&p->peak,
 						    qdisc_pkt_len(skb));
 		}
-		toks += police->tcfp_toks;
-		if (toks > p->tcfp_burst)
-			toks = p->tcfp_burst;
-		toks -= (s64)psched_l2t_ns(&p->rate, qdisc_pkt_len(skb));
-		if ((toks|ptoks) >= 0) {
+		if (p->rate_present) {
+			toks += police->tcfp_toks;
+			if (toks > p->tcfp_burst)
+				toks = p->tcfp_burst;
+			toks -= (s64)psched_l2t_ns(&p->rate, qdisc_pkt_len(skb));
+		} else if (p->pps_present) {
+			ppstoks = min_t(s64, now - police->tcfp_t_c, p->tcfp_pkt_burst);
+			ppstoks += police->tcfp_pkttoks;
+			if (ppstoks > p->tcfp_pkt_burst)
+				ppstoks = p->tcfp_pkt_burst;
+			ppstoks -= (s64)psched_pkt2t_ns(&p->ppsrate, 1);
+		}
+		if ((toks | ptoks | ppstoks) >= 0) {
 			police->tcfp_t_c = now;
 			police->tcfp_toks = toks;
 			police->tcfp_ptoks = ptoks;
+			police->tcfp_pkttoks = ppstoks;
 			spin_unlock_bh(&police->tcfp_lock);
 			ret = p->tcfp_result;
 			goto inc_drops;
@@ -331,6 +366,16 @@ static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a,
 				      TCA_POLICE_PAD))
 			goto nla_put_failure;
 	}
+	if (p->pps_present) {
+		if (nla_put_u64_64bit(skb, TCA_POLICE_PKTRATE64,
+				      police->params->ppsrate.rate_pkts_ps,
+				      TCA_POLICE_PAD))
+			goto nla_put_failure;
+		if (nla_put_u64_64bit(skb, TCA_POLICE_PKTBURST64,
+				      PSCHED_NS2TICKS(p->tcfp_pkt_burst),
+				      TCA_POLICE_PAD))
+			goto nla_put_failure;
+	}
 	if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt))
 		goto nla_put_failure;
 	if (p->tcfp_result &&
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 49eae93d1489..44991ea726fc 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -1325,6 +1325,48 @@ void dev_shutdown(struct net_device *dev)
 	WARN_ON(timer_pending(&dev->watchdog_timer));
 }
 
+/**
+ * psched_ratecfg_precompute__() - Pre-compute values for reciprocal division
+ * @rate:   Rate to compute reciprocal division values of
+ * @mult:   Multiplier for reciprocal division
+ * @shift:  Shift for reciprocal division
+ *
+ * The multiplier and shift for reciprocal division by rate are stored
+ * in mult and shift.
+ *
+ * The deal here is to replace a divide by a reciprocal one
+ * in fast path (a reciprocal divide is a multiply and a shift)
+ *
+ * Normal formula would be :
+ *  time_in_ns = (NSEC_PER_SEC * len) / rate_bps
+ *
+ * We compute mult/shift to use instead :
+ *  time_in_ns = (len * mult) >> shift;
+ *
+ * We try to get the highest possible mult value for accuracy,
+ * but have to make sure no overflows will ever happen.
+ *
+ * reciprocal_value() is not used here it doesn't handle 64-bit values.
+ */
+static void psched_ratecfg_precompute__(u64 rate, u32 *mult, u8 *shift)
+{
+	u64 factor = NSEC_PER_SEC;
+
+	*mult = 1;
+	*shift = 0;
+
+	if (rate <= 0)
+		return;
+
+	for (;;) {
+		*mult = div64_u64(factor, rate);
+		if (*mult & (1U << 31) || factor & (1ULL << 63))
+			break;
+		factor <<= 1;
+		(*shift)++;
+	}
+}
+
 void psched_ratecfg_precompute(struct psched_ratecfg *r,
 			       const struct tc_ratespec *conf,
 			       u64 rate64)
@@ -1333,34 +1375,17 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r,
 	r->overhead = conf->overhead;
 	r->rate_bytes_ps = max_t(u64, conf->rate, rate64);
 	r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
-	r->mult = 1;
-	/*
-	 * The deal here is to replace a divide by a reciprocal one
-	 * in fast path (a reciprocal divide is a multiply and a shift)
-	 *
-	 * Normal formula would be :
-	 *  time_in_ns = (NSEC_PER_SEC * len) / rate_bps
-	 *
-	 * We compute mult/shift to use instead :
-	 *  time_in_ns = (len * mult) >> shift;
-	 *
-	 * We try to get the highest possible mult value for accuracy,
-	 * but have to make sure no overflows will ever happen.
-	 */
-	if (r->rate_bytes_ps > 0) {
-		u64 factor = NSEC_PER_SEC;
-
-		for (;;) {
-			r->mult = div64_u64(factor, r->rate_bytes_ps);
-			if (r->mult & (1U << 31) || factor & (1ULL << 63))
-				break;
-			factor <<= 1;
-			r->shift++;
-		}
-	}
+	psched_ratecfg_precompute__(r->rate_bytes_ps, &r->mult, &r->shift);
 }
 EXPORT_SYMBOL(psched_ratecfg_precompute);
 
+void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64)
+{
+	r->rate_pkts_ps = pktrate64;
+	psched_ratecfg_precompute__(r->rate_pkts_ps, &r->mult, &r->shift);
+}
+EXPORT_SYMBOL(psched_ppscfg_precompute);
+
 static void mini_qdisc_rcu_func(struct rcu_head *head)
 {
 }
-- 
cgit v1.2.3


From ba2d1c28886ceacd7da96466529f7929eaf3a498 Mon Sep 17 00:00:00 2001
From: Kurt Kanzenbach <kurt@kmk-computers.de>
Date: Sat, 13 Mar 2021 10:39:36 +0100
Subject: net: dsa: hellcreek: Add devlink VLAN region

Allow to dump the VLAN table via devlink. This especially useful, because the
driver internally leverages VLANs for the port separation. These are not visible
via the bridge utility.

Signed-off-by: Kurt Kanzenbach <kurt@kmk-computers.de>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/hirschmann/hellcreek.c | 74 ++++++++++++++++++++++++++++++++++
 drivers/net/dsa/hirschmann/hellcreek.h |  6 +++
 2 files changed, 80 insertions(+)

diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c
index 463137c39db2..4a14760e2680 100644
--- a/drivers/net/dsa/hirschmann/hellcreek.c
+++ b/drivers/net/dsa/hirschmann/hellcreek.c
@@ -1082,6 +1082,67 @@ out:
 	return err;
 }
 
+static int hellcreek_devlink_region_vlan_snapshot(struct devlink *dl,
+						  const struct devlink_region_ops *ops,
+						  struct netlink_ext_ack *extack,
+						  u8 **data)
+{
+	struct hellcreek_devlink_vlan_entry *table, *entry;
+	struct dsa_switch *ds = dsa_devlink_to_ds(dl);
+	struct hellcreek *hellcreek = ds->priv;
+	int i;
+
+	table = kcalloc(VLAN_N_VID, sizeof(*entry), GFP_KERNEL);
+	if (!table)
+		return -ENOMEM;
+
+	entry = table;
+
+	mutex_lock(&hellcreek->reg_lock);
+	for (i = 0; i < VLAN_N_VID; ++i, ++entry) {
+		entry->member = hellcreek->vidmbrcfg[i];
+		entry->vid    = i;
+	}
+	mutex_unlock(&hellcreek->reg_lock);
+
+	*data = (u8 *)table;
+
+	return 0;
+}
+
+static struct devlink_region_ops hellcreek_region_vlan_ops = {
+	.name	    = "vlan",
+	.snapshot   = hellcreek_devlink_region_vlan_snapshot,
+	.destructor = kfree,
+};
+
+static int hellcreek_setup_devlink_regions(struct dsa_switch *ds)
+{
+	struct hellcreek *hellcreek = ds->priv;
+	struct devlink_region_ops *ops;
+	struct devlink_region *region;
+	u64 size;
+
+	/* VLAN table */
+	size = VLAN_N_VID * sizeof(struct hellcreek_devlink_vlan_entry);
+	ops  = &hellcreek_region_vlan_ops;
+
+	region = dsa_devlink_region_create(ds, ops, 1, size);
+	if (IS_ERR(region))
+		return PTR_ERR(region);
+
+	hellcreek->vlan_region = region;
+
+	return 0;
+}
+
+static void hellcreek_teardown_devlink_regions(struct dsa_switch *ds)
+{
+	struct hellcreek *hellcreek = ds->priv;
+
+	dsa_devlink_region_destroy(hellcreek->vlan_region);
+}
+
 static int hellcreek_setup(struct dsa_switch *ds)
 {
 	struct hellcreek *hellcreek = ds->priv;
@@ -1143,11 +1204,24 @@ static int hellcreek_setup(struct dsa_switch *ds)
 		return ret;
 	}
 
+	ret = hellcreek_setup_devlink_regions(ds);
+	if (ret) {
+		dev_err(hellcreek->dev,
+			"Failed to setup devlink regions!\n");
+		goto err_regions;
+	}
+
 	return 0;
+
+err_regions:
+	dsa_devlink_resources_unregister(ds);
+
+	return ret;
 }
 
 static void hellcreek_teardown(struct dsa_switch *ds)
 {
+	hellcreek_teardown_devlink_regions(ds);
 	dsa_devlink_resources_unregister(ds);
 }
 
diff --git a/drivers/net/dsa/hirschmann/hellcreek.h b/drivers/net/dsa/hirschmann/hellcreek.h
index 305e76dab34d..42339f9359d9 100644
--- a/drivers/net/dsa/hirschmann/hellcreek.h
+++ b/drivers/net/dsa/hirschmann/hellcreek.h
@@ -278,6 +278,7 @@ struct hellcreek {
 	struct mutex reg_lock;	/* Switch IP register lock */
 	struct mutex vlan_lock;	/* VLAN bitmaps lock */
 	struct mutex ptp_lock;	/* PTP IP register lock */
+	struct devlink_region *vlan_region;
 	void __iomem *base;
 	void __iomem *ptp_base;
 	u16 swcfg;		/* swcfg shadow */
@@ -304,4 +305,9 @@ enum hellcreek_devlink_resource_id {
 	HELLCREEK_DEVLINK_PARAM_ID_FDB_TABLE,
 };
 
+struct hellcreek_devlink_vlan_entry {
+	u16 vid;
+	u16 member;
+};
+
 #endif /* _HELLCREEK_H_ */
-- 
cgit v1.2.3


From e81813fb56350641d8d3ba6bb6811ecaab934f10 Mon Sep 17 00:00:00 2001
From: Kurt Kanzenbach <kurt@kmk-computers.de>
Date: Sat, 13 Mar 2021 10:39:37 +0100
Subject: net: dsa: hellcreek: Use boolean value

hellcreek_select_vlan() takes a boolean instead of an integer.
So, use false accordingly.

Signed-off-by: Kurt Kanzenbach <kurt@kmk-computers.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/hirschmann/hellcreek.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c
index 4a14760e2680..edac39462a07 100644
--- a/drivers/net/dsa/hirschmann/hellcreek.c
+++ b/drivers/net/dsa/hirschmann/hellcreek.c
@@ -433,7 +433,7 @@ static void hellcreek_unapply_vlan(struct hellcreek *hellcreek, int port,
 
 	mutex_lock(&hellcreek->reg_lock);
 
-	hellcreek_select_vlan(hellcreek, vid, 0);
+	hellcreek_select_vlan(hellcreek, vid, false);
 
 	/* Setup port vlan membership */
 	hellcreek_select_vlan_params(hellcreek, port, &shift, &mask);
-- 
cgit v1.2.3


From eb5f3d3141805fd22b2fb49a23536cc3f30dd752 Mon Sep 17 00:00:00 2001
From: Kurt Kanzenbach <kurt@kmk-computers.de>
Date: Sat, 13 Mar 2021 10:39:38 +0100
Subject: net: dsa: hellcreek: Move common code to helper

There are two functions which need to populate fdb entries. Move that to a
helper function.

Signed-off-by: Kurt Kanzenbach <kurt@kmk-computers.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/hirschmann/hellcreek.c | 85 +++++++++++++++++-----------------
 1 file changed, 43 insertions(+), 42 deletions(-)

diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c
index edac39462a07..38ff0f12e8a4 100644
--- a/drivers/net/dsa/hirschmann/hellcreek.c
+++ b/drivers/net/dsa/hirschmann/hellcreek.c
@@ -670,6 +670,40 @@ static int __hellcreek_fdb_del(struct hellcreek *hellcreek,
 	return hellcreek_wait_fdb_ready(hellcreek);
 }
 
+static void hellcreek_populate_fdb_entry(struct hellcreek *hellcreek,
+					 struct hellcreek_fdb_entry *entry,
+					 size_t idx)
+{
+	unsigned char addr[ETH_ALEN];
+	u16 meta, mac;
+
+	/* Read values */
+	meta	= hellcreek_read(hellcreek, HR_FDBMDRD);
+	mac	= hellcreek_read(hellcreek, HR_FDBRDL);
+	addr[5] = mac & 0xff;
+	addr[4] = (mac & 0xff00) >> 8;
+	mac	= hellcreek_read(hellcreek, HR_FDBRDM);
+	addr[3] = mac & 0xff;
+	addr[2] = (mac & 0xff00) >> 8;
+	mac	= hellcreek_read(hellcreek, HR_FDBRDH);
+	addr[1] = mac & 0xff;
+	addr[0] = (mac & 0xff00) >> 8;
+
+	/* Populate @entry */
+	memcpy(entry->mac, addr, sizeof(addr));
+	entry->idx	    = idx;
+	entry->portmask	    = (meta & HR_FDBMDRD_PORTMASK_MASK) >>
+		HR_FDBMDRD_PORTMASK_SHIFT;
+	entry->age	    = (meta & HR_FDBMDRD_AGE_MASK) >>
+		HR_FDBMDRD_AGE_SHIFT;
+	entry->is_obt	    = !!(meta & HR_FDBMDRD_OBT);
+	entry->pass_blocked = !!(meta & HR_FDBMDRD_PASS_BLOCKED);
+	entry->is_static    = !!(meta & HR_FDBMDRD_STATIC);
+	entry->reprio_tc    = (meta & HR_FDBMDRD_REPRIO_TC_MASK) >>
+		HR_FDBMDRD_REPRIO_TC_SHIFT;
+	entry->reprio_en    = !!(meta & HR_FDBMDRD_REPRIO_EN);
+}
+
 /* Retrieve the index of a FDB entry by mac address. Currently we search through
  * the complete table in hardware. If that's too slow, we might have to cache
  * the complete FDB table in software.
@@ -691,39 +725,19 @@ static int hellcreek_fdb_get(struct hellcreek *hellcreek,
 	 * enter new entries anywhere.
 	 */
 	for (i = 0; i < hellcreek->fdb_entries; ++i) {
-		unsigned char addr[ETH_ALEN];
-		u16 meta, mac;
-
-		meta	= hellcreek_read(hellcreek, HR_FDBMDRD);
-		mac	= hellcreek_read(hellcreek, HR_FDBRDL);
-		addr[5] = mac & 0xff;
-		addr[4] = (mac & 0xff00) >> 8;
-		mac	= hellcreek_read(hellcreek, HR_FDBRDM);
-		addr[3] = mac & 0xff;
-		addr[2] = (mac & 0xff00) >> 8;
-		mac	= hellcreek_read(hellcreek, HR_FDBRDH);
-		addr[1] = mac & 0xff;
-		addr[0] = (mac & 0xff00) >> 8;
+		struct hellcreek_fdb_entry tmp = { 0 };
+
+		/* Read entry */
+		hellcreek_populate_fdb_entry(hellcreek, &tmp, i);
 
 		/* Force next entry */
 		hellcreek_write(hellcreek, 0x00, HR_FDBRDH);
 
-		if (memcmp(addr, dest, ETH_ALEN))
+		if (memcmp(tmp.mac, dest, ETH_ALEN))
 			continue;
 
 		/* Match found */
-		entry->idx	    = i;
-		entry->portmask	    = (meta & HR_FDBMDRD_PORTMASK_MASK) >>
-			HR_FDBMDRD_PORTMASK_SHIFT;
-		entry->age	    = (meta & HR_FDBMDRD_AGE_MASK) >>
-			HR_FDBMDRD_AGE_SHIFT;
-		entry->is_obt	    = !!(meta & HR_FDBMDRD_OBT);
-		entry->pass_blocked = !!(meta & HR_FDBMDRD_PASS_BLOCKED);
-		entry->is_static    = !!(meta & HR_FDBMDRD_STATIC);
-		entry->reprio_tc    = (meta & HR_FDBMDRD_REPRIO_TC_MASK) >>
-			HR_FDBMDRD_REPRIO_TC_SHIFT;
-		entry->reprio_en    = !!(meta & HR_FDBMDRD_REPRIO_EN);
-		memcpy(entry->mac, addr, sizeof(addr));
+		memcpy(entry, &tmp, sizeof(*entry));
 
 		return 0;
 	}
@@ -838,18 +852,9 @@ static int hellcreek_fdb_dump(struct dsa_switch *ds, int port,
 	for (i = 0; i < hellcreek->fdb_entries; ++i) {
 		unsigned char null_addr[ETH_ALEN] = { 0 };
 		struct hellcreek_fdb_entry entry = { 0 };
-		u16 meta, mac;
-
-		meta	= hellcreek_read(hellcreek, HR_FDBMDRD);
-		mac	= hellcreek_read(hellcreek, HR_FDBRDL);
-		entry.mac[5] = mac & 0xff;
-		entry.mac[4] = (mac & 0xff00) >> 8;
-		mac	= hellcreek_read(hellcreek, HR_FDBRDM);
-		entry.mac[3] = mac & 0xff;
-		entry.mac[2] = (mac & 0xff00) >> 8;
-		mac	= hellcreek_read(hellcreek, HR_FDBRDH);
-		entry.mac[1] = mac & 0xff;
-		entry.mac[0] = (mac & 0xff00) >> 8;
+
+		/* Read entry */
+		hellcreek_populate_fdb_entry(hellcreek, &entry, i);
 
 		/* Force next entry */
 		hellcreek_write(hellcreek, 0x00, HR_FDBRDH);
@@ -858,10 +863,6 @@ static int hellcreek_fdb_dump(struct dsa_switch *ds, int port,
 		if (!memcmp(entry.mac, null_addr, ETH_ALEN))
 			continue;
 
-		entry.portmask	= (meta & HR_FDBMDRD_PORTMASK_MASK) >>
-			HR_FDBMDRD_PORTMASK_SHIFT;
-		entry.is_static	= !!(meta & HR_FDBMDRD_STATIC);
-
 		/* Check port mask */
 		if (!(entry.portmask & BIT(port)))
 			continue;
-- 
cgit v1.2.3


From 292cd449fee3a67541fab2626efb8af6a72b4c69 Mon Sep 17 00:00:00 2001
From: Kurt Kanzenbach <kurt@kmk-computers.de>
Date: Sat, 13 Mar 2021 10:39:39 +0100
Subject: net: dsa: hellcreek: Add devlink FDB region

Allow to dump the FDB table via devlink. This is a useful debugging feature.

Signed-off-by: Kurt Kanzenbach <kurt@kmk-computers.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/hirschmann/hellcreek.c | 62 ++++++++++++++++++++++++++++++++++
 drivers/net/dsa/hirschmann/hellcreek.h |  1 +
 2 files changed, 63 insertions(+)

diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c
index 38ff0f12e8a4..02d8bcb37f31 100644
--- a/drivers/net/dsa/hirschmann/hellcreek.c
+++ b/drivers/net/dsa/hirschmann/hellcreek.c
@@ -1111,18 +1111,62 @@ static int hellcreek_devlink_region_vlan_snapshot(struct devlink *dl,
 	return 0;
 }
 
+static int hellcreek_devlink_region_fdb_snapshot(struct devlink *dl,
+						 const struct devlink_region_ops *ops,
+						 struct netlink_ext_ack *extack,
+						 u8 **data)
+{
+	struct dsa_switch *ds = dsa_devlink_to_ds(dl);
+	struct hellcreek_fdb_entry *table, *entry;
+	struct hellcreek *hellcreek = ds->priv;
+	size_t i;
+
+	table = kcalloc(hellcreek->fdb_entries, sizeof(*entry), GFP_KERNEL);
+	if (!table)
+		return -ENOMEM;
+
+	entry = table;
+
+	mutex_lock(&hellcreek->reg_lock);
+
+	/* Start table read */
+	hellcreek_read(hellcreek, HR_FDBMAX);
+	hellcreek_write(hellcreek, 0x00, HR_FDBMAX);
+
+	for (i = 0; i < hellcreek->fdb_entries; ++i, ++entry) {
+		/* Read current entry */
+		hellcreek_populate_fdb_entry(hellcreek, entry, i);
+
+		/* Advance read pointer */
+		hellcreek_write(hellcreek, 0x00, HR_FDBRDH);
+	}
+
+	mutex_unlock(&hellcreek->reg_lock);
+
+	*data = (u8 *)table;
+
+	return 0;
+}
+
 static struct devlink_region_ops hellcreek_region_vlan_ops = {
 	.name	    = "vlan",
 	.snapshot   = hellcreek_devlink_region_vlan_snapshot,
 	.destructor = kfree,
 };
 
+static struct devlink_region_ops hellcreek_region_fdb_ops = {
+	.name	    = "fdb",
+	.snapshot   = hellcreek_devlink_region_fdb_snapshot,
+	.destructor = kfree,
+};
+
 static int hellcreek_setup_devlink_regions(struct dsa_switch *ds)
 {
 	struct hellcreek *hellcreek = ds->priv;
 	struct devlink_region_ops *ops;
 	struct devlink_region *region;
 	u64 size;
+	int ret;
 
 	/* VLAN table */
 	size = VLAN_N_VID * sizeof(struct hellcreek_devlink_vlan_entry);
@@ -1134,13 +1178,31 @@ static int hellcreek_setup_devlink_regions(struct dsa_switch *ds)
 
 	hellcreek->vlan_region = region;
 
+	/* FDB table */
+	size = hellcreek->fdb_entries * sizeof(struct hellcreek_fdb_entry);
+	ops  = &hellcreek_region_fdb_ops;
+
+	region = dsa_devlink_region_create(ds, ops, 1, size);
+	if (IS_ERR(region)) {
+		ret = PTR_ERR(region);
+		goto err_fdb;
+	}
+
+	hellcreek->fdb_region = region;
+
 	return 0;
+
+err_fdb:
+	dsa_devlink_region_destroy(hellcreek->vlan_region);
+
+	return ret;
 }
 
 static void hellcreek_teardown_devlink_regions(struct dsa_switch *ds)
 {
 	struct hellcreek *hellcreek = ds->priv;
 
+	dsa_devlink_region_destroy(hellcreek->fdb_region);
 	dsa_devlink_region_destroy(hellcreek->vlan_region);
 }
 
diff --git a/drivers/net/dsa/hirschmann/hellcreek.h b/drivers/net/dsa/hirschmann/hellcreek.h
index 42339f9359d9..9e303b8ab13c 100644
--- a/drivers/net/dsa/hirschmann/hellcreek.h
+++ b/drivers/net/dsa/hirschmann/hellcreek.h
@@ -279,6 +279,7 @@ struct hellcreek {
 	struct mutex vlan_lock;	/* VLAN bitmaps lock */
 	struct mutex ptp_lock;	/* PTP IP register lock */
 	struct devlink_region *vlan_region;
+	struct devlink_region *fdb_region;
 	void __iomem *base;
 	void __iomem *ptp_base;
 	u16 swcfg;		/* swcfg shadow */
-- 
cgit v1.2.3


From 65c7bc1b7a66f2cb0bafcefd9e15e1de44ac7ea7 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Sat, 13 Mar 2021 11:15:36 +0530
Subject: net: ethernet: marvell: Fixed typo in the file sky2.c

s/calclation/calculation/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/sky2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index dbec8e187a68..2a752fb6b758 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -4135,7 +4135,7 @@ static int sky2_set_coalesce(struct net_device *dev,
 /*
  * Hardware is limited to min of 128 and max of 2048 for ring size
  * and  rounded up to next power of two
- * to avoid division in modulus calclation
+ * to avoid division in modulus calculation
  */
 static unsigned long roundup_ring_size(unsigned long pending)
 {
-- 
cgit v1.2.3


From 6fadbdd6dd3260120bde3f2e471125d4fbce168c Mon Sep 17 00:00:00 2001
From: Sanjana Srinidhi <sanjanasrinidhi1810@gmail.com>
Date: Sat, 13 Mar 2021 14:06:49 +0530
Subject: drivers: net: vxlan.c: Fix declaration issue

Added a blank line after structure declaration.
This is done to maintain code uniformity.

Signed-off-by: Sanjana Srinidhi <sanjanasrinidhi1810@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 666dd201c3d5..7665817f3cb6 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -3703,6 +3703,7 @@ static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf,
 #if IS_ENABLED(CONFIG_IPV6)
 		if (use_ipv6) {
 			struct inet6_dev *idev = __in6_dev_get(lowerdev);
+
 			if (idev && idev->cnf.disable_ipv6) {
 				NL_SET_ERR_MSG(extack,
 					       "IPv6 support disabled by administrator");
-- 
cgit v1.2.3


From e127906b68b49ddb3ecba39ffa36a329c48197d3 Mon Sep 17 00:00:00 2001
From: Jonathan McDowell <noodles@earth.li>
Date: Sat, 13 Mar 2021 13:18:26 +0000
Subject: net: stmmac: Set FIFO sizes for ipq806x

Commit eaf4fac47807 ("net: stmmac: Do not accept invalid MTU values")
started using the TX FIFO size to verify what counts as a valid MTU
request for the stmmac driver.  This is unset for the ipq806x variant.
Looking at older patches for this it seems the RX + TXs buffers can be
up to 8k, so set appropriately.

(I sent this as an RFC patch in June last year, but received no replies.
I've been running with this on my hardware (a MikroTik RB3011) since
then with larger MTUs to support both the internal qca8k switch and
VLANs with no problems. Without the patch it's impossible to set the
larger MTU required to support this.)

Signed-off-by: Jonathan McDowell <noodles@earth.li>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
index bf3250e0e59c..749585fe6fc9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
@@ -352,6 +352,8 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
 	plat_dat->bsp_priv = gmac;
 	plat_dat->fix_mac_speed = ipq806x_gmac_fix_mac_speed;
 	plat_dat->multicast_filter_bins = 0;
+	plat_dat->tx_fifo_size = 8192;
+	plat_dat->rx_fifo_size = 8192;
 
 	err = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
 	if (err)
-- 
cgit v1.2.3


From 3cc9b29ac0e1739091ccfe9ada2ffdd230312f2e Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 12 Mar 2021 16:30:25 -0800
Subject: docs: net: tweak devlink health documentation

Minor tweaks and improvement of wording about the diagnose callback.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/devlink/devlink-health.rst | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/Documentation/networking/devlink/devlink-health.rst b/Documentation/networking/devlink/devlink-health.rst
index 0c99b11f05f9..d52c9a605188 100644
--- a/Documentation/networking/devlink/devlink-health.rst
+++ b/Documentation/networking/devlink/devlink-health.rst
@@ -24,7 +24,7 @@ attributes of the health reporting and recovery procedures.
 
 The ``devlink`` health reporter:
 Device driver creates a "health reporter" per each error/health type.
-Error/Health type can be a known/generic (eg pci error, fw error, rx/tx error)
+Error/Health type can be a known/generic (e.g. PCI error, fw error, rx/tx error)
 or unknown (driver specific).
 For each registered health reporter a driver can issue error/health reports
 asynchronously. All health reports handling is done by ``devlink``.
@@ -48,6 +48,7 @@ Once an error is reported, devlink health will perform the following actions:
   * Object dump is being taken and saved at the reporter instance (as long as
     there is no other dump which is already stored)
   * Auto recovery attempt is being done. Depends on:
+
     - Auto-recovery configuration
     - Grace period vs. time passed since last recover
 
@@ -72,14 +73,14 @@ via ``devlink``, e.g per error type (per health reporter):
    * - ``DEVLINK_CMD_HEALTH_REPORTER_SET``
      - Allows reporter-related configuration setting.
    * - ``DEVLINK_CMD_HEALTH_REPORTER_RECOVER``
-     - Triggers a reporter's recovery procedure.
+     - Triggers reporter's recovery procedure.
    * - ``DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE``
-     - Retrieves diagnostics data from a reporter on a device.
+     - Retrieves current device state related to the reporter.
    * - ``DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET``
      - Retrieves the last stored dump. Devlink health
-       saves a single dump. If an dump is not already stored by the devlink
+       saves a single dump. If an dump is not already stored by devlink
        for this reporter, devlink generates a new dump.
-       dump output is defined by the reporter.
+       Dump output is defined by the reporter.
    * - ``DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR``
      - Clears the last saved dump file for the specified reporter.
 
@@ -93,7 +94,7 @@ The following diagram provides a general overview of ``devlink-health``::
                                           +--------------------------+
                                                        |request for ops
                                                        |(diagnose,
-     mlx5_core                             devlink     |recover,
+      driver                               devlink     |recover,
                                                        |dump)
     +--------+                            +--------------------------+
     |        |                            |    reporter|             |
-- 
cgit v1.2.3


From 6f1629093399303bf19d6fcd5144061d1e25ec23 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 12 Mar 2021 16:30:26 -0800
Subject: docs: net: add missing devlink health cmd - trigger

Documentation is missing and it's not very clear what
this callback is for - presumably testing the recovery?

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/devlink/devlink-health.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Documentation/networking/devlink/devlink-health.rst b/Documentation/networking/devlink/devlink-health.rst
index d52c9a605188..e37f77734b5b 100644
--- a/Documentation/networking/devlink/devlink-health.rst
+++ b/Documentation/networking/devlink/devlink-health.rst
@@ -74,6 +74,10 @@ via ``devlink``, e.g per error type (per health reporter):
      - Allows reporter-related configuration setting.
    * - ``DEVLINK_CMD_HEALTH_REPORTER_RECOVER``
      - Triggers reporter's recovery procedure.
+   * - ``DEVLINK_CMD_HEALTH_REPORTER_TEST``
+     - Triggers a fake health event on the reporter. The effects of the test
+       event in terms of recovery flow should follow closely that of a real
+       event.
    * - ``DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE``
      - Retrieves current device state related to the reporter.
    * - ``DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET``
-- 
cgit v1.2.3


From ef700f2ea27e54f640c3957374469132e8bf46f5 Mon Sep 17 00:00:00 2001
From: Igor Russkikh <irusskikh@marvell.com>
Date: Thu, 11 Mar 2021 11:32:52 +0100
Subject: samples: pktgen: allow to specify delay parameter via new opt

DELAY may now be explicitly specified via common parameter -w

Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 samples/pktgen/README.rst                                      |  1 +
 samples/pktgen/parameters.sh                                   | 10 +++++++++-
 samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh         |  3 ---
 samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh            |  3 ---
 samples/pktgen/pktgen_sample01_simple.sh                       |  3 ---
 samples/pktgen/pktgen_sample02_multiqueue.sh                   |  1 -
 samples/pktgen/pktgen_sample03_burst_single_flow.sh            |  3 ---
 samples/pktgen/pktgen_sample04_many_flows.sh                   |  3 ---
 samples/pktgen/pktgen_sample05_flow_per_thread.sh              |  3 ---
 .../pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh   |  1 -
 10 files changed, 10 insertions(+), 21 deletions(-)

diff --git a/samples/pktgen/README.rst b/samples/pktgen/README.rst
index f9c53ca5cf93..f4b38180e407 100644
--- a/samples/pktgen/README.rst
+++ b/samples/pktgen/README.rst
@@ -28,6 +28,7 @@ across the sample scripts.  Usage example is printed on errors::
   -b : ($BURST)     HW level bursting of SKBs
   -v : ($VERBOSE)   verbose
   -x : ($DEBUG)     debug
+  -w : ($DELAY)     Tx Delay value (ns)
 
 The global variable being set is also listed.  E.g. the required
 interface/device parameter "-i" sets variable $DEV.
diff --git a/samples/pktgen/parameters.sh b/samples/pktgen/parameters.sh
index ff0ed474fee9..dd6ef1d2e498 100644
--- a/samples/pktgen/parameters.sh
+++ b/samples/pktgen/parameters.sh
@@ -19,12 +19,13 @@ function usage() {
     echo "  -v : (\$VERBOSE)   verbose"
     echo "  -x : (\$DEBUG)     debug"
     echo "  -6 : (\$IP6)       IPv6"
+    echo "  -w : (\$DELAY)     Tx Delay value (ns)"
     echo ""
 }
 
 ##  --- Parse command line arguments / parameters ---
 ## echo "Commandline options:"
-while getopts "s:i:d:m:p:f:t:c:n:b:vxh6" option; do
+while getopts "s:i:d:m:p:f:t:c:n:b:w:vxh6" option; do
     case $option in
         i) # interface
           export DEV=$OPTARG
@@ -66,6 +67,10 @@ while getopts "s:i:d:m:p:f:t:c:n:b:vxh6" option; do
 	  export BURST=$OPTARG
 	  info "SKB bursting: BURST=$BURST"
           ;;
+        w)
+	  export DELAY=$OPTARG
+	  info "DELAY=$DELAY"
+          ;;
         v)
           export VERBOSE=yes
           info "Verbose mode: VERBOSE=$VERBOSE"
@@ -100,6 +105,9 @@ if [ -z "$THREADS" ]; then
     export THREADS=1
 fi
 
+# default DELAY
+[ -z "$DELAY" ] && export DELAY=0 # Zero means max speed
+
 export L_THREAD=$(( THREADS + F_THREAD - 1 ))
 
 if [ -z "$DEV" ]; then
diff --git a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
index 1b6204125d2d..30a610b541ad 100755
--- a/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
+++ b/samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
@@ -50,9 +50,6 @@ if [ -n "$DST_PORT" ]; then
     validate_ports $UDP_DST_MIN $UDP_DST_MAX
 fi
 
-# Base Config
-DELAY="0"        # Zero means max speed
-
 # General cleanup everything since last run
 pg_ctrl "reset"
 
diff --git a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
index e607cb369b20..a6195bd77532 100755
--- a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
+++ b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh
@@ -33,9 +33,6 @@ if [ -n "$DST_PORT" ]; then
     validate_ports $UDP_DST_MIN $UDP_DST_MAX
 fi
 
-# Base Config
-DELAY="0"        # Zero means max speed
-
 # General cleanup everything since last run
 pg_ctrl "reset"
 
diff --git a/samples/pktgen/pktgen_sample01_simple.sh b/samples/pktgen/pktgen_sample01_simple.sh
index a4e250b45dce..c2ad1fa32d3f 100755
--- a/samples/pktgen/pktgen_sample01_simple.sh
+++ b/samples/pktgen/pktgen_sample01_simple.sh
@@ -31,9 +31,6 @@ if [ -n "$DST_PORT" ]; then
     validate_ports $UDP_DST_MIN $UDP_DST_MAX
 fi
 
-# Base Config
-DELAY="0"        # Zero means max speed
-
 # Flow variation random source port between min and max
 UDP_SRC_MIN=9
 UDP_SRC_MAX=109
diff --git a/samples/pktgen/pktgen_sample02_multiqueue.sh b/samples/pktgen/pktgen_sample02_multiqueue.sh
index cb2495fcdc60..49e1e81a2945 100755
--- a/samples/pktgen/pktgen_sample02_multiqueue.sh
+++ b/samples/pktgen/pktgen_sample02_multiqueue.sh
@@ -17,7 +17,6 @@ source ${basedir}/parameters.sh
 [ -z "$COUNT" ] && COUNT="100000" # Zero means indefinitely
 
 # Base Config
-DELAY="0"        # Zero means max speed
 [ -z "$CLONE_SKB" ] && CLONE_SKB="0"
 
 # Flow variation random source port between min and max
diff --git a/samples/pktgen/pktgen_sample03_burst_single_flow.sh b/samples/pktgen/pktgen_sample03_burst_single_flow.sh
index fff50765a5aa..f9b67affb567 100755
--- a/samples/pktgen/pktgen_sample03_burst_single_flow.sh
+++ b/samples/pktgen/pktgen_sample03_burst_single_flow.sh
@@ -42,9 +42,6 @@ if [ -n "$DST_PORT" ]; then
     validate_ports $UDP_DST_MIN $UDP_DST_MAX
 fi
 
-# Base Config
-DELAY="0"  # Zero means max speed
-
 # General cleanup everything since last run
 pg_ctrl "reset"
 
diff --git a/samples/pktgen/pktgen_sample04_many_flows.sh b/samples/pktgen/pktgen_sample04_many_flows.sh
index 2cd6b701400d..ac2d037a6160 100755
--- a/samples/pktgen/pktgen_sample04_many_flows.sh
+++ b/samples/pktgen/pktgen_sample04_many_flows.sh
@@ -34,9 +34,6 @@ fi
 [ -z "$FLOWS" ]     && FLOWS="8000"
 [ -z "$FLOWLEN" ]   && FLOWLEN="10"
 
-# Base Config
-DELAY="0"  # Zero means max speed
-
 if [[ -n "$BURST" ]]; then
     err 1 "Bursting not supported for this mode"
 fi
diff --git a/samples/pktgen/pktgen_sample05_flow_per_thread.sh b/samples/pktgen/pktgen_sample05_flow_per_thread.sh
index 4cb6252ade39..85256484c86f 100755
--- a/samples/pktgen/pktgen_sample05_flow_per_thread.sh
+++ b/samples/pktgen/pktgen_sample05_flow_per_thread.sh
@@ -31,9 +31,6 @@ if [ -n "$DST_PORT" ]; then
     validate_ports $UDP_DST_MIN $UDP_DST_MAX
 fi
 
-# Base Config
-DELAY="0"  # Zero means max speed
-
 # General cleanup everything since last run
 pg_ctrl "reset"
 
diff --git a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
index 728106060a02..7c73ab8fbe3c 100755
--- a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
+++ b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
@@ -15,7 +15,6 @@ root_check_run_with_sudo "$@"
 source ${basedir}/parameters.sh
 
 # Base Config
-DELAY="0"        # Zero means max speed
 [ -z "$COUNT" ]     && COUNT="20000000"   # Zero means indefinitely
 [ -z "$CLONE_SKB" ] && CLONE_SKB="0"
 
-- 
cgit v1.2.3


From c8fd4852022ca8ae85ac4d30d05950eaf506b828 Mon Sep 17 00:00:00 2001
From: Igor Russkikh <irusskikh@marvell.com>
Date: Thu, 11 Mar 2021 11:32:53 +0100
Subject: samples: pktgen: new append mode

To configure various complex flows we for sure can create custom
pktgen init scripts, but sometimes thats not that easy.

New "-a" (append) option in all the existing sample scripts allows
to append more "devices" into pktgen threads.

The most straightforward usecases for that are:
- using multiple devices. We have to generate full linerate on
all physical functions (ports) of our multiport device.
- pushing multiple flows (with different packet options)

Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 samples/pktgen/README.rst                          | 17 ++++++++++++
 samples/pktgen/functions.sh                        |  7 ++++-
 samples/pktgen/parameters.sh                       |  7 ++++-
 samples/pktgen/pktgen_sample01_simple.sh           | 22 +++++++++-------
 samples/pktgen/pktgen_sample02_multiqueue.sh       | 28 +++++++++++---------
 .../pktgen/pktgen_sample03_burst_single_flow.sh    | 12 ++++++---
 samples/pktgen/pktgen_sample04_many_flows.sh       | 14 ++++++----
 samples/pktgen/pktgen_sample05_flow_per_thread.sh  | 14 ++++++----
 ...tgen_sample06_numa_awared_queue_irq_affinity.sh | 30 ++++++++++++----------
 9 files changed, 101 insertions(+), 50 deletions(-)

diff --git a/samples/pktgen/README.rst b/samples/pktgen/README.rst
index f4b38180e407..f4adeed5f5f0 100644
--- a/samples/pktgen/README.rst
+++ b/samples/pktgen/README.rst
@@ -28,11 +28,28 @@ across the sample scripts.  Usage example is printed on errors::
   -b : ($BURST)     HW level bursting of SKBs
   -v : ($VERBOSE)   verbose
   -x : ($DEBUG)     debug
+  -6 : ($IP6)       IPv6
   -w : ($DELAY)     Tx Delay value (ns)
+  -a : ($APPEND)    Script will not reset generator's state, but will append its config
 
 The global variable being set is also listed.  E.g. the required
 interface/device parameter "-i" sets variable $DEV.
 
+"-a" parameter may be used to create different flows simultaneously.
+In this mode script will keep the existing config, will append its settings.
+In this mode you'll have to manually run traffic with "pg_ctrl start".
+
+For example you may use:
+
+    source ./samples/pktgen/functions.sh
+    pg_ctrl reset
+    # add first device
+    ./pktgen_sample06_numa_awared_queue_irq_affinity.sh -a -i ens1f0 -m 34:80:0d:a3:fc:c9 -t 8
+    # add second device
+    ./pktgen_sample06_numa_awared_queue_irq_affinity.sh -a -i ens1f1 -m 34:80:0d:a3:fc:c9 -t 8
+    # run joint traffic on two devs
+    pg_ctrl start
+
 Common functions
 ----------------
 The functions.sh file provides; Three different shell functions for
diff --git a/samples/pktgen/functions.sh b/samples/pktgen/functions.sh
index dae06d5b38fa..a335393157eb 100644
--- a/samples/pktgen/functions.sh
+++ b/samples/pktgen/functions.sh
@@ -108,7 +108,12 @@ function pgset() {
     fi
 }
 
-[[ $EUID -eq 0 ]] && trap 'pg_ctrl "reset"' EXIT
+if [[ -z "$APPEND" ]]; then
+	if [[ $EUID -eq 0 ]]; then
+		# Cleanup pktgen setup on exit if thats not "append mode"
+		trap 'pg_ctrl "reset"' EXIT
+	fi
+fi
 
 ## -- General shell tricks --
 
diff --git a/samples/pktgen/parameters.sh b/samples/pktgen/parameters.sh
index dd6ef1d2e498..b4c1b371e4b8 100644
--- a/samples/pktgen/parameters.sh
+++ b/samples/pktgen/parameters.sh
@@ -20,12 +20,13 @@ function usage() {
     echo "  -x : (\$DEBUG)     debug"
     echo "  -6 : (\$IP6)       IPv6"
     echo "  -w : (\$DELAY)     Tx Delay value (ns)"
+    echo "  -a : (\$APPEND)    Script will not reset generator's state, but will append its config"
     echo ""
 }
 
 ##  --- Parse command line arguments / parameters ---
 ## echo "Commandline options:"
-while getopts "s:i:d:m:p:f:t:c:n:b:w:vxh6" option; do
+while getopts "s:i:d:m:p:f:t:c:n:b:w:vxh6a" option; do
     case $option in
         i) # interface
           export DEV=$OPTARG
@@ -83,6 +84,10 @@ while getopts "s:i:d:m:p:f:t:c:n:b:w:vxh6" option; do
 	  export IP6=6
 	  info "IP6: IP6=$IP6"
 	  ;;
+        a)
+          export APPEND=yes
+          info "Append mode: APPEND=$APPEND"
+          ;;
         h|?|*)
           usage;
           err 2 "[ERROR] Unknown parameters!!!"
diff --git a/samples/pktgen/pktgen_sample01_simple.sh b/samples/pktgen/pktgen_sample01_simple.sh
index c2ad1fa32d3f..a09f3422fbcc 100755
--- a/samples/pktgen/pktgen_sample01_simple.sh
+++ b/samples/pktgen/pktgen_sample01_simple.sh
@@ -37,11 +37,11 @@ UDP_SRC_MAX=109
 
 # General cleanup everything since last run
 # (especially important if other threads were configured by other scripts)
-pg_ctrl "reset"
+[ -z "$APPEND" ] && pg_ctrl "reset"
 
 # Add remove all other devices and add_device $DEV to thread 0
 thread=0
-pg_thread $thread "rem_device_all"
+[ -z "$APPEND" ] && pg_thread $thread "rem_device_all"
 pg_thread $thread "add_device" $DEV
 
 # How many packets to send (zero means indefinitely)
@@ -77,11 +77,15 @@ pg_set $DEV "flag UDPSRC_RND"
 pg_set $DEV "udp_src_min $UDP_SRC_MIN"
 pg_set $DEV "udp_src_max $UDP_SRC_MAX"
 
-# start_run
-echo "Running... ctrl^C to stop" >&2
-pg_ctrl "start"
-echo "Done" >&2
+if [ -z "$APPEND" ]; then
+    # start_run
+    echo "Running... ctrl^C to stop" >&2
+    pg_ctrl "start"
+    echo "Done" >&2
 
-# Print results
-echo "Result device: $DEV"
-cat /proc/net/pktgen/$DEV
+    # Print results
+    echo "Result device: $DEV"
+    cat /proc/net/pktgen/$DEV
+else
+    echo "Append mode: config done. Do more or use 'pg_ctrl start' to run"
+fi
\ No newline at end of file
diff --git a/samples/pktgen/pktgen_sample02_multiqueue.sh b/samples/pktgen/pktgen_sample02_multiqueue.sh
index 49e1e81a2945..acae8ede0d6c 100755
--- a/samples/pktgen/pktgen_sample02_multiqueue.sh
+++ b/samples/pktgen/pktgen_sample02_multiqueue.sh
@@ -38,7 +38,7 @@ if [ -n "$DST_PORT" ]; then
 fi
 
 # General cleanup everything since last run
-pg_ctrl "reset"
+[ -z "$APPEND" ] && pg_ctrl "reset"
 
 # Threads are specified with parameter -t value in $THREADS
 for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
@@ -47,7 +47,7 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
     dev=${DEV}@${thread}
 
     # Add remove all other devices and add_device $dev to thread
-    pg_thread $thread "rem_device_all"
+    [ -z "$APPEND" ] && pg_thread $thread "rem_device_all"
     pg_thread $thread "add_device" $dev
 
     # Notice config queue to map to cpu (mirrors smp_processor_id())
@@ -81,14 +81,18 @@ for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
     pg_set $dev "udp_src_max $UDP_SRC_MAX"
 done
 
-# start_run
-echo "Running... ctrl^C to stop" >&2
-pg_ctrl "start"
-echo "Done" >&2
+if [ -z "$APPEND" ]; then
+    # start_run
+    echo "Running... ctrl^C to stop" >&2
+    pg_ctrl "start"
+    echo "Done" >&2
 
-# Print results
-for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
-    dev=${DEV}@${thread}
-    echo "Device: $dev"
-    cat /proc/net/pktgen/$dev | grep -A2 "Result:"
-done
+    # Print results
+    for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
+        dev=${DEV}@${thread}
+        echo "Device: $dev"
+        cat /proc/net/pktgen/$dev | grep -A2 "Result:"
+    done
+else
+    echo "Append mode: config done. Do more or use 'pg_ctrl start' to run"
+fi
diff --git a/samples/pktgen/pktgen_sample03_burst_single_flow.sh b/samples/pktgen/pktgen_sample03_burst_single_flow.sh
index f9b67affb567..5adcf954de73 100755
--- a/samples/pktgen/pktgen_sample03_burst_single_flow.sh
+++ b/samples/pktgen/pktgen_sample03_burst_single_flow.sh
@@ -43,14 +43,14 @@ if [ -n "$DST_PORT" ]; then
 fi
 
 # General cleanup everything since last run
-pg_ctrl "reset"
+[ -z "$APPEND" ] && pg_ctrl "reset"
 
 # Threads are specified with parameter -t value in $THREADS
 for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
     dev=${DEV}@${thread}
 
     # Add remove all other devices and add_device $dev to thread
-    pg_thread $thread "rem_device_all"
+    [ -z "$APPEND" ] && pg_thread $thread "rem_device_all"
     pg_thread $thread "add_device" $dev
 
     # Base config
@@ -94,5 +94,9 @@ function control_c() {
 # trap keyboard interrupt (Ctrl-C)
 trap control_c SIGINT
 
-echo "Running... ctrl^C to stop" >&2
-pg_ctrl "start"
+if [ -z "$APPEND" ]; then
+    echo "Running... ctrl^C to stop" >&2
+    pg_ctrl "start"
+else
+    echo "Append mode: config done. Do more or use 'pg_ctrl start' to run"
+fi
diff --git a/samples/pktgen/pktgen_sample04_many_flows.sh b/samples/pktgen/pktgen_sample04_many_flows.sh
index ac2d037a6160..ddce876635aa 100755
--- a/samples/pktgen/pktgen_sample04_many_flows.sh
+++ b/samples/pktgen/pktgen_sample04_many_flows.sh
@@ -42,14 +42,14 @@ fi
 read -r SRC_MIN SRC_MAX <<< $(parse_addr 198.18.0.0/15)
 
 # General cleanup everything since last run
-pg_ctrl "reset"
+[ -z "$APPEND" ] && pg_ctrl "reset"
 
 # Threads are specified with parameter -t value in $THREADS
 for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
     dev=${DEV}@${thread}
 
     # Add remove all other devices and add_device $dev to thread
-    pg_thread $thread "rem_device_all"
+    [ -z "$APPEND" ] && pg_thread $thread "rem_device_all"
     pg_thread $thread "add_device" $dev
 
     # Base config
@@ -104,7 +104,11 @@ function print_result() {
 # trap keyboard interrupt (Ctrl-C)
 trap true SIGINT
 
-echo "Running... ctrl^C to stop" >&2
-pg_ctrl "start"
+if [ -z "$APPEND" ]; then
+    echo "Running... ctrl^C to stop" >&2
+    pg_ctrl "start"
 
-print_result
+    print_result
+else
+    echo "Append mode: config done. Do more or use 'pg_ctrl start' to run"
+fi
diff --git a/samples/pktgen/pktgen_sample05_flow_per_thread.sh b/samples/pktgen/pktgen_sample05_flow_per_thread.sh
index 85256484c86f..4a65fe2fcee9 100755
--- a/samples/pktgen/pktgen_sample05_flow_per_thread.sh
+++ b/samples/pktgen/pktgen_sample05_flow_per_thread.sh
@@ -32,14 +32,14 @@ if [ -n "$DST_PORT" ]; then
 fi
 
 # General cleanup everything since last run
-pg_ctrl "reset"
+[ -z "$APPEND" ] && pg_ctrl "reset"
 
 # Threads are specified with parameter -t value in $THREADS
 for ((thread = $F_THREAD; thread <= $L_THREAD; thread++)); do
     dev=${DEV}@${thread}
 
     # Add remove all other devices and add_device $dev to thread
-    pg_thread $thread "rem_device_all"
+    [ -z "$APPEND" ] && pg_thread $thread "rem_device_all"
     pg_thread $thread "add_device" $dev
 
     # Base config
@@ -88,7 +88,11 @@ function print_result() {
 # trap keyboard interrupt (Ctrl-C)
 trap true SIGINT
 
-echo "Running... ctrl^C to stop" >&2
-pg_ctrl "start"
+if [ -z "$APPEND" ]; then
+    echo "Running... ctrl^C to stop" >&2
+    pg_ctrl "start"
 
-print_result
+    print_result
+else
+    echo "Append mode: config done. Do more or use 'pg_ctrl start' to run"
+fi
diff --git a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
index 7c73ab8fbe3c..10f1da571f40 100755
--- a/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
+++ b/samples/pktgen/pktgen_sample06_numa_awared_queue_irq_affinity.sh
@@ -44,7 +44,7 @@ if [ -n "$DST_PORT" ]; then
 fi
 
 # General cleanup everything since last run
-pg_ctrl "reset"
+[ -z "$APPEND" ] && pg_ctrl "reset"
 
 # Threads are specified with parameter -t value in $THREADS
 for ((i = 0; i < $THREADS; i++)); do
@@ -58,7 +58,7 @@ for ((i = 0; i < $THREADS; i++)); do
     info "irq ${irq_array[$i]} is set affinity to `cat /proc/irq/${irq_array[$i]}/smp_affinity_list`"
 
     # Add remove all other devices and add_device $dev to thread
-    pg_thread $thread "rem_device_all"
+    [ -z "$APPEND" ] && pg_thread $thread "rem_device_all"
     pg_thread $thread "add_device" $dev
 
     # select queue and bind the queue and $dev in 1:1 relationship
@@ -99,14 +99,18 @@ for ((i = 0; i < $THREADS; i++)); do
 done
 
 # start_run
-echo "Running... ctrl^C to stop" >&2
-pg_ctrl "start"
-echo "Done" >&2
-
-# Print results
-for ((i = 0; i < $THREADS; i++)); do
-    thread=${cpu_array[$((i+F_THREAD))]}
-    dev=${DEV}@${thread}
-    echo "Device: $dev"
-    cat /proc/net/pktgen/$dev | grep -A2 "Result:"
-done
+if [ -z "$APPEND" ]; then
+    echo "Running... ctrl^C to stop" >&2
+    pg_ctrl "start"
+    echo "Done" >&2
+
+    # Print results
+    for ((i = 0; i < $THREADS; i++)); do
+        thread=${cpu_array[$((i+F_THREAD))]}
+        dev=${DEV}@${thread}
+        echo "Device: $dev"
+        cat /proc/net/pktgen/$dev | grep -A2 "Result:"
+    done
+else
+    echo "Append mode: config done. Do more or use 'pg_ctrl start' to run"
+fi
-- 
cgit v1.2.3


From bd49fea7586b9d39a38846e9ef4ac056e4eb6e59 Mon Sep 17 00:00:00 2001
From: Shachar Raindel <shacharr@microsoft.com>
Date: Fri, 12 Mar 2021 15:45:27 -0800
Subject: hv_netvsc: Add a comment clarifying batching logic

The batching logic in netvsc_send is non-trivial, due to
a combination of the Linux API and the underlying hypervisor
interface. Add a comment explaining why the code is written this
way.

Signed-off-by: Shachar Raindel <shacharr@microsoft.com>
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Reviewed-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../device_drivers/ethernet/microsoft/netvsc.rst     | 14 +++++++++-----
 drivers/net/hyperv/netvsc.c                          | 20 ++++++++++++++++++++
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/Documentation/networking/device_drivers/ethernet/microsoft/netvsc.rst b/Documentation/networking/device_drivers/ethernet/microsoft/netvsc.rst
index c3f51c672a68..fc5acd427a5d 100644
--- a/Documentation/networking/device_drivers/ethernet/microsoft/netvsc.rst
+++ b/Documentation/networking/device_drivers/ethernet/microsoft/netvsc.rst
@@ -87,11 +87,15 @@ Receive Buffer
   contain one or more packets. The number of receive sections may be changed
   via ethtool Rx ring parameters.
 
-  There is a similar send buffer which is used to aggregate packets for sending.
-  The send area is broken into chunks of 6144 bytes, each of section may
-  contain one or more packets. The send buffer is an optimization, the driver
-  will use slower method to handle very large packets or if the send buffer
-  area is exhausted.
+  There is a similar send buffer which is used to aggregate packets
+  for sending.  The send area is broken into chunks, typically of 6144
+  bytes, each of section may contain one or more packets. Small
+  packets are usually transmitted via copy to the send buffer. However,
+  if the buffer is temporarily exhausted, or the packet to be transmitted is
+  an LSO packet, the driver will provide the host with pointers to the data
+  from the SKB. This attempts to achieve a balance between the overhead of
+  data copy and the impact of remapping VM memory to be accessible by the
+  host.
 
 XDP support
 -----------
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index c64cc7639c39..5bce24731502 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -1017,6 +1017,26 @@ static inline void move_pkt_msd(struct hv_netvsc_packet **msd_send,
 }
 
 /* RCU already held by caller */
+/* Batching/bouncing logic is designed to attempt to optimize
+ * performance.
+ *
+ * For small, non-LSO packets we copy the packet to a send buffer
+ * which is pre-registered with the Hyper-V side. This enables the
+ * hypervisor to avoid remapping the aperture to access the packet
+ * descriptor and data.
+ *
+ * If we already started using a buffer and the netdev is transmitting
+ * a burst of packets, keep on copying into the buffer until it is
+ * full or we are done collecting a burst. If there is an existing
+ * buffer with space for the RNDIS descriptor but not the packet, copy
+ * the RNDIS descriptor to the buffer, keeping the packet in place.
+ *
+ * If we do batching and send more than one packet using a single
+ * NetVSC message, free the SKBs of the packets copied, except for the
+ * last packet. This is done to streamline the handling of the case
+ * where the last packet only had the RNDIS descriptor copied to the
+ * send buffer, with the data pointers included in the NetVSC message.
+ */
 int netvsc_send(struct net_device *ndev,
 		struct hv_netvsc_packet *packet,
 		struct rndis_message *rndis_msg,
-- 
cgit v1.2.3


From f4e6d7cdbfae502788bc468295b232dec76ee57e Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 12 Mar 2021 13:11:01 -0800
Subject: net: dsa: bcm_sf2: Fill in BCM4908 CFP entries

The BCM4908 switch has 256 CFP entrie, update that setting so CFP can be
used.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/bcm_sf2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 3a3050e85af7..d8e6dd371468 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -1200,7 +1200,7 @@ static const struct bcm_sf2_of_data bcm_sf2_4908_data = {
 	.type		= BCM4908_DEVICE_ID,
 	.core_reg_align	= 0,
 	.reg_offsets	= bcm_sf2_4908_reg_offsets,
-	.num_cfp_rules	= 0, /* FIXME */
+	.num_cfp_rules	= 256,
 	.num_crossbar_int_ports = 2,
 };
 
-- 
cgit v1.2.3


From 0ccf4d50d14f360dfae5b25b8ffcb27f98e591f0 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <alobakin@pm.me>
Date: Sat, 13 Mar 2021 20:30:05 +0000
Subject: gro: simplify gro_list_prepare()

gro_list_prepare() always returns &napi->gro_hash[bucket].list,
without any variations. Moreover, it uses 'napi' argument only to
have access to this list, and calculates the bucket index for the
second time (firstly it happens at the beginning of
dev_gro_receive()) to do that.
Given that dev_gro_receive() already has an index to the needed
list, just pass it as the first argument to eliminate redundant
calculations, and make gro_list_prepare() return void.
Also, both arguments of gro_list_prepare() can be constified since
this function can only modify the skbs from the bucket list.

Signed-off-by: Alexander Lobakin <alobakin@pm.me>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 2bfdd528c7c3..1317e6b6758a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5858,15 +5858,13 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old)
 }
 EXPORT_SYMBOL(napi_gro_flush);
 
-static struct list_head *gro_list_prepare(struct napi_struct *napi,
-					  struct sk_buff *skb)
+static void gro_list_prepare(const struct list_head *head,
+			     const struct sk_buff *skb)
 {
 	unsigned int maclen = skb->dev->hard_header_len;
 	u32 hash = skb_get_hash_raw(skb);
-	struct list_head *head;
 	struct sk_buff *p;
 
-	head = &napi->gro_hash[hash & (GRO_HASH_BUCKETS - 1)].list;
 	list_for_each_entry(p, head, list) {
 		unsigned long diffs;
 
@@ -5892,8 +5890,6 @@ static struct list_head *gro_list_prepare(struct napi_struct *napi,
 				       maclen);
 		NAPI_GRO_CB(p)->same_flow = !diffs;
 	}
-
-	return head;
 }
 
 static void skb_gro_reset_offset(struct sk_buff *skb)
@@ -5957,10 +5953,10 @@ static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head)
 static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
+	struct list_head *gro_head = &napi->gro_hash[hash].list;
 	struct list_head *head = &offload_base;
 	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
-	struct list_head *gro_head;
 	struct sk_buff *pp = NULL;
 	enum gro_result ret;
 	int same_flow;
@@ -5969,7 +5965,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (netif_elide_gro(skb->dev))
 		goto normal;
 
-	gro_head = gro_list_prepare(napi, skb);
+	gro_list_prepare(gro_head, skb);
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, head, list) {
-- 
cgit v1.2.3


From 9dc2c313378528afe1bddf12cad88dbfe0998820 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <alobakin@pm.me>
Date: Sat, 13 Mar 2021 20:30:10 +0000
Subject: gro: consistentify napi->gro_hash[x] access in dev_gro_receive()

GRO bucket index doesn't change through the entire function.
Store a pointer to the corresponding bucket instead of its member
and use it consistently through the function.
It is performance-safe since &gro_list->list == gro_list.

Misc: remove superfluous braces around single-line branches.

Signed-off-by: Alexander Lobakin <alobakin@pm.me>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 1317e6b6758a..b635467087f3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5953,7 +5953,7 @@ static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head)
 static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
-	struct list_head *gro_head = &napi->gro_hash[hash].list;
+	struct gro_list *gro_list = &napi->gro_hash[hash];
 	struct list_head *head = &offload_base;
 	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
@@ -5965,7 +5965,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (netif_elide_gro(skb->dev))
 		goto normal;
 
-	gro_list_prepare(gro_head, skb);
+	gro_list_prepare(&gro_list->list, skb);
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, head, list) {
@@ -6001,7 +6001,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 
 		pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive,
 					ipv6_gro_receive, inet_gro_receive,
-					gro_head, skb);
+					&gro_list->list, skb);
 		break;
 	}
 	rcu_read_unlock();
@@ -6020,7 +6020,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (pp) {
 		skb_list_del_init(pp);
 		napi_gro_complete(napi, pp);
-		napi->gro_hash[hash].count--;
+		gro_list->count--;
 	}
 
 	if (same_flow)
@@ -6029,16 +6029,16 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (NAPI_GRO_CB(skb)->flush)
 		goto normal;
 
-	if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) {
-		gro_flush_oldest(napi, gro_head);
-	} else {
-		napi->gro_hash[hash].count++;
-	}
+	if (unlikely(gro_list->count >= MAX_GRO_SKBS))
+		gro_flush_oldest(napi, &gro_list->list);
+	else
+		gro_list->count++;
+
 	NAPI_GRO_CB(skb)->count = 1;
 	NAPI_GRO_CB(skb)->age = jiffies;
 	NAPI_GRO_CB(skb)->last = skb;
 	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
-	list_add(&skb->list, gro_head);
+	list_add(&skb->list, &gro_list->list);
 	ret = GRO_HELD;
 
 pull:
@@ -6046,7 +6046,7 @@ pull:
 	if (grow > 0)
 		gro_pull_from_frag0(skb, grow);
 ok:
-	if (napi->gro_hash[hash].count) {
+	if (gro_list->count) {
 		if (!test_bit(hash, &napi->gro_bitmask))
 			__set_bit(hash, &napi->gro_bitmask);
 	} else if (test_bit(hash, &napi->gro_bitmask)) {
-- 
cgit v1.2.3


From d0eed5c325149002c364a1439ae1afe1992beae4 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <alobakin@pm.me>
Date: Sat, 13 Mar 2021 20:30:14 +0000
Subject: gro: give 'hash' variable in dev_gro_receive() a less confusing name

'hash' stores not the flow hash, but the index of the GRO bucket
corresponding to it.
Change its name to 'bucket' to avoid confusion while reading lines
like '__set_bit(hash, &napi->gro_bitmask)'.

Signed-off-by: Alexander Lobakin <alobakin@pm.me>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index b635467087f3..5a2847a19cf2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5952,8 +5952,8 @@ static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head)
 
 static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
-	u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
-	struct gro_list *gro_list = &napi->gro_hash[hash];
+	u32 bucket = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
+	struct gro_list *gro_list = &napi->gro_hash[bucket];
 	struct list_head *head = &offload_base;
 	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
@@ -6047,10 +6047,10 @@ pull:
 		gro_pull_from_frag0(skb, grow);
 ok:
 	if (gro_list->count) {
-		if (!test_bit(hash, &napi->gro_bitmask))
-			__set_bit(hash, &napi->gro_bitmask);
-	} else if (test_bit(hash, &napi->gro_bitmask)) {
-		__clear_bit(hash, &napi->gro_bitmask);
+		if (!test_bit(bucket, &napi->gro_bitmask))
+			__set_bit(bucket, &napi->gro_bitmask);
+	} else if (test_bit(bucket, &napi->gro_bitmask)) {
+		__clear_bit(bucket, &napi->gro_bitmask);
 	}
 
 	return ret;
-- 
cgit v1.2.3


From dac06b32c705dc8824479b03eee826b4f6615ab2 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <alobakin@pm.me>
Date: Sun, 14 Mar 2021 11:11:00 +0000
Subject: flow_dissector: constify bpf_flow_dissector's data pointers

BPF Flow dissection programs are read-only and don't touch input
buffers.
Mark 'data' and 'data_end' in struct bpf_flow_dissector as const
in preparation for global input constifying.

Signed-off-by: Alexander Lobakin <alobakin@pm.me>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_dissector.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index cc10b10dc3a1..bf00e71816ed 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -368,8 +368,8 @@ static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissec
 struct bpf_flow_dissector {
 	struct bpf_flow_keys	*flow_keys;
 	const struct sk_buff	*skb;
-	void			*data;
-	void			*data_end;
+	const void		*data;
+	const void		*data_end;
 };
 
 static inline void
-- 
cgit v1.2.3


From e3305138da47f0ae2241e5daa18af276e1e54457 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <alobakin@pm.me>
Date: Sun, 14 Mar 2021 11:11:14 +0000
Subject: skbuff: make __skb_header_pointer()'s data argument const

The function never modifies the input buffer, so 'data' argument
can be marked as const.
This implies one harmless cast-away.

Signed-off-by: Alexander Lobakin <alobakin@pm.me>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 483e89348f78..d6ea3dc3eddb 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3678,11 +3678,11 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
 		    __wsum csum);
 
 static inline void * __must_check
-__skb_header_pointer(const struct sk_buff *skb, int offset,
-		     int len, void *data, int hlen, void *buffer)
+__skb_header_pointer(const struct sk_buff *skb, int offset, int len,
+		     const void *data, int hlen, void *buffer)
 {
 	if (hlen - offset >= len)
-		return data + offset;
+		return (void *)data + offset;
 
 	if (!skb ||
 	    skb_copy_bits(skb, offset, buffer, len) < 0)
-- 
cgit v1.2.3


From f96533cded173b3b019001a505a746c3cd8fc323 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <alobakin@pm.me>
Date: Sun, 14 Mar 2021 11:11:23 +0000
Subject: flow_dissector: constify raw input data argument

Flow Dissector code never modifies the input buffer, neither skb nor
raw data.
Make 'data' argument const for all of the Flow dissector's functions.

Signed-off-by: Alexander Lobakin <alobakin@pm.me>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h       | 15 +++++++--------
 include/net/flow_dissector.h |  2 +-
 net/core/flow_dissector.c    | 41 ++++++++++++++++++++++-------------------
 3 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d6ea3dc3eddb..46c61e127e9f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1292,10 +1292,10 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4)
 void __skb_get_hash(struct sk_buff *skb);
 u32 __skb_get_hash_symmetric(const struct sk_buff *skb);
 u32 skb_get_poff(const struct sk_buff *skb);
-u32 __skb_get_poff(const struct sk_buff *skb, void *data,
+u32 __skb_get_poff(const struct sk_buff *skb, const void *data,
 		   const struct flow_keys_basic *keys, int hlen);
 __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
-			    void *data, int hlen_proto);
+			    const void *data, int hlen_proto);
 
 static inline __be32 skb_flow_get_ports(const struct sk_buff *skb,
 					int thoff, u8 ip_proto)
@@ -1314,9 +1314,8 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
 bool __skb_flow_dissect(const struct net *net,
 			const struct sk_buff *skb,
 			struct flow_dissector *flow_dissector,
-			void *target_container,
-			void *data, __be16 proto, int nhoff, int hlen,
-			unsigned int flags);
+			void *target_container, const void *data,
+			__be16 proto, int nhoff, int hlen, unsigned int flags);
 
 static inline bool skb_flow_dissect(const struct sk_buff *skb,
 				    struct flow_dissector *flow_dissector,
@@ -1338,9 +1337,9 @@ static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb,
 static inline bool
 skb_flow_dissect_flow_keys_basic(const struct net *net,
 				 const struct sk_buff *skb,
-				 struct flow_keys_basic *flow, void *data,
-				 __be16 proto, int nhoff, int hlen,
-				 unsigned int flags)
+				 struct flow_keys_basic *flow,
+				 const void *data, __be16 proto,
+				 int nhoff, int hlen, unsigned int flags)
 {
 	memset(flow, 0, sizeof(*flow));
 	return __skb_flow_dissect(net, skb, &flow_keys_basic_dissector, flow,
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index bf00e71816ed..ffd386ea0dbb 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -350,7 +350,7 @@ static inline bool flow_keys_have_l4(const struct flow_keys *keys)
 u32 flow_hash_from_keys(struct flow_keys *keys);
 void skb_flow_get_icmp_tci(const struct sk_buff *skb,
 			   struct flow_dissector_key_icmp *key_icmp,
-			   void *data, int thoff, int hlen);
+			   const void *data, int thoff, int hlen);
 
 static inline bool dissector_uses_key(const struct flow_dissector *flow_dissector,
 				      enum flow_dissector_key_id key_id)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 2ef2224b3bff..2ed380d096ce 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -114,7 +114,7 @@ int flow_dissector_bpf_prog_attach_check(struct net *net,
  * is the protocol port offset returned from proto_ports_offset
  */
 __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
-			    void *data, int hlen)
+			    const void *data, int hlen)
 {
 	int poff = proto_ports_offset(ip_proto);
 
@@ -161,7 +161,7 @@ static bool icmp_has_id(u8 type)
  */
 void skb_flow_get_icmp_tci(const struct sk_buff *skb,
 			   struct flow_dissector_key_icmp *key_icmp,
-			   void *data, int thoff, int hlen)
+			   const void *data, int thoff, int hlen)
 {
 	struct icmphdr *ih, _ih;
 
@@ -187,8 +187,8 @@ EXPORT_SYMBOL(skb_flow_get_icmp_tci);
  */
 static void __skb_flow_dissect_icmp(const struct sk_buff *skb,
 				    struct flow_dissector *flow_dissector,
-				    void *target_container,
-				    void *data, int thoff, int hlen)
+				    void *target_container, const void *data,
+				    int thoff, int hlen)
 {
 	struct flow_dissector_key_icmp *key_icmp;
 
@@ -409,8 +409,8 @@ EXPORT_SYMBOL(skb_flow_dissect_hash);
 static enum flow_dissect_ret
 __skb_flow_dissect_mpls(const struct sk_buff *skb,
 			struct flow_dissector *flow_dissector,
-			void *target_container, void *data, int nhoff, int hlen,
-			int lse_index, bool *entropy_label)
+			void *target_container, const void *data, int nhoff,
+			int hlen, int lse_index, bool *entropy_label)
 {
 	struct mpls_label *hdr, _hdr;
 	u32 entry, label, bos;
@@ -467,7 +467,8 @@ __skb_flow_dissect_mpls(const struct sk_buff *skb,
 static enum flow_dissect_ret
 __skb_flow_dissect_arp(const struct sk_buff *skb,
 		       struct flow_dissector *flow_dissector,
-		       void *target_container, void *data, int nhoff, int hlen)
+		       void *target_container, const void *data,
+		       int nhoff, int hlen)
 {
 	struct flow_dissector_key_arp *key_arp;
 	struct {
@@ -523,7 +524,7 @@ static enum flow_dissect_ret
 __skb_flow_dissect_gre(const struct sk_buff *skb,
 		       struct flow_dissector_key_control *key_control,
 		       struct flow_dissector *flow_dissector,
-		       void *target_container, void *data,
+		       void *target_container, const void *data,
 		       __be16 *p_proto, int *p_nhoff, int *p_hlen,
 		       unsigned int flags)
 {
@@ -663,8 +664,8 @@ __skb_flow_dissect_gre(const struct sk_buff *skb,
 static enum flow_dissect_ret
 __skb_flow_dissect_batadv(const struct sk_buff *skb,
 			  struct flow_dissector_key_control *key_control,
-			  void *data, __be16 *p_proto, int *p_nhoff, int hlen,
-			  unsigned int flags)
+			  const void *data, __be16 *p_proto, int *p_nhoff,
+			  int hlen, unsigned int flags)
 {
 	struct {
 		struct batadv_unicast_packet batadv_unicast;
@@ -695,7 +696,8 @@ __skb_flow_dissect_batadv(const struct sk_buff *skb,
 static void
 __skb_flow_dissect_tcp(const struct sk_buff *skb,
 		       struct flow_dissector *flow_dissector,
-		       void *target_container, void *data, int thoff, int hlen)
+		       void *target_container, const void *data,
+		       int thoff, int hlen)
 {
 	struct flow_dissector_key_tcp *key_tcp;
 	struct tcphdr *th, _th;
@@ -719,8 +721,8 @@ __skb_flow_dissect_tcp(const struct sk_buff *skb,
 static void
 __skb_flow_dissect_ports(const struct sk_buff *skb,
 			 struct flow_dissector *flow_dissector,
-			 void *target_container, void *data, int nhoff,
-			 u8 ip_proto, int hlen)
+			 void *target_container, const void *data,
+			 int nhoff, u8 ip_proto, int hlen)
 {
 	enum flow_dissector_key_id dissector_ports = FLOW_DISSECTOR_KEY_MAX;
 	struct flow_dissector_key_ports *key_ports;
@@ -744,7 +746,8 @@ __skb_flow_dissect_ports(const struct sk_buff *skb,
 static void
 __skb_flow_dissect_ipv4(const struct sk_buff *skb,
 			struct flow_dissector *flow_dissector,
-			void *target_container, void *data, const struct iphdr *iph)
+			void *target_container, const void *data,
+			const struct iphdr *iph)
 {
 	struct flow_dissector_key_ip *key_ip;
 
@@ -761,7 +764,8 @@ __skb_flow_dissect_ipv4(const struct sk_buff *skb,
 static void
 __skb_flow_dissect_ipv6(const struct sk_buff *skb,
 			struct flow_dissector *flow_dissector,
-			void *target_container, void *data, const struct ipv6hdr *iph)
+			void *target_container, const void *data,
+			const struct ipv6hdr *iph)
 {
 	struct flow_dissector_key_ip *key_ip;
 
@@ -908,9 +912,8 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
 bool __skb_flow_dissect(const struct net *net,
 			const struct sk_buff *skb,
 			struct flow_dissector *flow_dissector,
-			void *target_container,
-			void *data, __be16 proto, int nhoff, int hlen,
-			unsigned int flags)
+			void *target_container, const void *data,
+			__be16 proto, int nhoff, int hlen, unsigned int flags)
 {
 	struct flow_dissector_key_control *key_control;
 	struct flow_dissector_key_basic *key_basic;
@@ -1642,7 +1645,7 @@ __u32 skb_get_hash_perturb(const struct sk_buff *skb,
 }
 EXPORT_SYMBOL(skb_get_hash_perturb);
 
-u32 __skb_get_poff(const struct sk_buff *skb, void *data,
+u32 __skb_get_poff(const struct sk_buff *skb, const void *data,
 		   const struct flow_keys_basic *keys, int hlen)
 {
 	u32 poff = keys->control.thoff;
-- 
cgit v1.2.3


From 805a25f3a1bdf4aafd0af412ce1e47d0cb6c7628 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <alobakin@pm.me>
Date: Sun, 14 Mar 2021 11:11:32 +0000
Subject: linux/etherdevice.h: misc trailing whitespace cleanup

Caught by the text editor. Fix it separately from the actual changes.

Signed-off-by: Alexander Lobakin <alobakin@pm.me>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 2e5debc0373c..bcb2f81baafb 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -11,7 +11,7 @@
  * Authors:	Ross Biro
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *
- *		Relocated to include/linux where it belongs by Alan Cox 
+ *		Relocated to include/linux where it belongs by Alan Cox
  *							<gw4pts@gw4pts.ampr.org>
  */
 #ifndef _LINUX_ETHERDEVICE_H
-- 
cgit v1.2.3


From 59753ce8b196de60211a989c75ece8aeb0d9d57c Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <alobakin@pm.me>
Date: Sun, 14 Mar 2021 11:11:41 +0000
Subject: ethernet: constify eth_get_headlen()'s data argument

It's used only for flow dissection, which now takes constant data
pointers.

Signed-off-by: Alexander Lobakin <alobakin@pm.me>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h | 2 +-
 net/ethernet/eth.c          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index bcb2f81baafb..330345b1be54 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -29,7 +29,7 @@ struct device;
 int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr);
 unsigned char *arch_get_platform_mac_address(void);
 int nvmem_get_mac_address(struct device *dev, void *addrbuf);
-u32 eth_get_headlen(const struct net_device *dev, void *data, unsigned int len);
+u32 eth_get_headlen(const struct net_device *dev, const void *data, u32 len);
 __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
 extern const struct header_ops eth_header_ops;
 
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 4106373180c6..e01cf766d2c5 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -122,7 +122,7 @@ EXPORT_SYMBOL(eth_header);
  * Make a best effort attempt to pull the length for all of the headers for
  * a given frame in a linear buffer.
  */
-u32 eth_get_headlen(const struct net_device *dev, void *data, unsigned int len)
+u32 eth_get_headlen(const struct net_device *dev, const void *data, u32 len)
 {
 	const unsigned int flags = FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
 	const struct ethhdr *eth = (const struct ethhdr *)data;
-- 
cgit v1.2.3


From d206121faf8bb2239cd970af0bd32f5203780427 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <alobakin@pm.me>
Date: Sun, 14 Mar 2021 11:11:50 +0000
Subject: skbuff: micro-optimize {,__}skb_header_pointer()

{,__}skb_header_pointer() helpers exist mainly for preventing
accesses-beyond-end of the linear data.
In the vast majorify of cases, they bail out on the first condition.
All code going after is mostly a fallback.
Mark the most common branch as 'likely' one to move it in-line.
Also, skb_copy_bits() can return negative values only when the input
arguments are invalid, e.g. offset is greater than skb->len. It can
be safely marked as 'unlikely' branch, assuming that hotpath code
provides sane input to not fail here.

These two bump the throughput with a single Flow Dissector pass on
every packet (e.g. with RPS or driver that uses eth_get_headlen())
on 20 Mbps per flow/core.

Signed-off-by: Alexander Lobakin <alobakin@pm.me>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 46c61e127e9f..ecc029674ae4 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3680,11 +3680,10 @@ static inline void * __must_check
 __skb_header_pointer(const struct sk_buff *skb, int offset, int len,
 		     const void *data, int hlen, void *buffer)
 {
-	if (hlen - offset >= len)
+	if (likely(hlen - offset >= len))
 		return (void *)data + offset;
 
-	if (!skb ||
-	    skb_copy_bits(skb, offset, buffer, len) < 0)
+	if (!skb || unlikely(skb_copy_bits(skb, offset, buffer, len) < 0))
 		return NULL;
 
 	return buffer;
-- 
cgit v1.2.3


From a03e99d39f1943ec88f6fd3b0b9f34c20663d401 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Sun, 14 Mar 2021 14:19:30 +0200
Subject: psample: Encapsulate packet metadata in a struct

Currently, callers of psample_sample_packet() pass three metadata
attributes: Ingress port, egress port and truncated size. Subsequent
patches are going to add more attributes (e.g., egress queue occupancy),
which also need an indication whether they are valid or not.

Encapsulate packet metadata in a struct in order to keep the number of
arguments reasonable.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c |  8 ++++----
 include/net/psample.h                          | 14 +++++++++-----
 net/psample/psample.c                          |  6 ++++--
 net/sched/act_sample.c                         | 16 ++++++----------
 4 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 93b15b8c007e..3b15f8d728a3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2217,7 +2217,7 @@ void mlxsw_sp_sample_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
 	struct mlxsw_sp_port_sample *sample;
-	u32 size;
+	struct psample_metadata md = {};
 
 	if (unlikely(!mlxsw_sp_port)) {
 		dev_warn_ratelimited(mlxsw_sp->bus_info->dev, "Port %d: sample skb received for non-existent port\n",
@@ -2229,9 +2229,9 @@ void mlxsw_sp_sample_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
 	sample = rcu_dereference(mlxsw_sp_port->sample);
 	if (!sample)
 		goto out_unlock;
-	size = sample->truncate ? sample->trunc_size : skb->len;
-	psample_sample_packet(sample->psample_group, skb, size,
-			      mlxsw_sp_port->dev->ifindex, 0, sample->rate);
+	md.trunc_size = sample->truncate ? sample->trunc_size : skb->len;
+	md.in_ifindex = mlxsw_sp_port->dev->ifindex;
+	psample_sample_packet(sample->psample_group, skb, sample->rate, &md);
 out_unlock:
 	rcu_read_unlock();
 out:
diff --git a/include/net/psample.h b/include/net/psample.h
index 68ae16bb0a4a..ac6dbfb3870d 100644
--- a/include/net/psample.h
+++ b/include/net/psample.h
@@ -14,6 +14,12 @@ struct psample_group {
 	struct rcu_head rcu;
 };
 
+struct psample_metadata {
+	u32 trunc_size;
+	int in_ifindex;
+	int out_ifindex;
+};
+
 struct psample_group *psample_group_get(struct net *net, u32 group_num);
 void psample_group_take(struct psample_group *group);
 void psample_group_put(struct psample_group *group);
@@ -21,15 +27,13 @@ void psample_group_put(struct psample_group *group);
 #if IS_ENABLED(CONFIG_PSAMPLE)
 
 void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
-			   u32 trunc_size, int in_ifindex, int out_ifindex,
-			   u32 sample_rate);
+			   u32 sample_rate, const struct psample_metadata *md);
 
 #else
 
 static inline void psample_sample_packet(struct psample_group *group,
-					 struct sk_buff *skb, u32 trunc_size,
-					 int in_ifindex, int out_ifindex,
-					 u32 sample_rate)
+					 struct sk_buff *skb, u32 sample_rate,
+					 const struct psample_metadata *md)
 {
 }
 
diff --git a/net/psample/psample.c b/net/psample/psample.c
index 482c07f2766b..065bc887d239 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -356,9 +356,11 @@ static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info)
 #endif
 
 void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
-			   u32 trunc_size, int in_ifindex, int out_ifindex,
-			   u32 sample_rate)
+			   u32 sample_rate, const struct psample_metadata *md)
 {
+	int out_ifindex = md->out_ifindex;
+	int in_ifindex = md->in_ifindex;
+	u32 trunc_size = md->trunc_size;
 #ifdef CONFIG_INET
 	struct ip_tunnel_info *tun_info;
 #endif
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index db8ee9e5c8c2..6a0c16e4351d 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -158,10 +158,8 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
 {
 	struct tcf_sample *s = to_sample(a);
 	struct psample_group *psample_group;
+	struct psample_metadata md = {};
 	int retval;
-	int size;
-	int iif;
-	int oif;
 
 	tcf_lastuse_update(&s->tcf_tm);
 	bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb);
@@ -172,20 +170,18 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
 	/* randomly sample packets according to rate */
 	if (psample_group && (prandom_u32() % s->rate == 0)) {
 		if (!skb_at_tc_ingress(skb)) {
-			iif = skb->skb_iif;
-			oif = skb->dev->ifindex;
+			md.in_ifindex = skb->skb_iif;
+			md.out_ifindex = skb->dev->ifindex;
 		} else {
-			iif = skb->dev->ifindex;
-			oif = 0;
+			md.in_ifindex = skb->dev->ifindex;
 		}
 
 		/* on ingress, the mac header gets popped, so push it back */
 		if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev))
 			skb_push(skb, skb->mac_len);
 
-		size = s->truncate ? s->trunc_size : skb->len;
-		psample_sample_packet(psample_group, skb, size, iif, oif,
-				      s->rate);
+		md.trunc_size = s->truncate ? s->trunc_size : skb->len;
+		psample_sample_packet(psample_group, skb, s->rate, &md);
 
 		if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev))
 			skb_pull(skb, skb->mac_len);
-- 
cgit v1.2.3


From 07e1a5809b595df6e125504dff6245cb2c8ed3de Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Sun, 14 Mar 2021 14:19:31 +0200
Subject: psample: Add additional metadata attributes

Extend psample to report the following attributes when available:

* Output traffic class as a 16-bit value
* Output traffic class occupancy in bytes as a 64-bit value
* End-to-end latency of the packet in nanoseconds resolution
* Software timestamp in nanoseconds resolution (always available)
* Packet's protocol. Needed for packet dissection in user space (always
  available)

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/psample.h        |  7 +++++++
 include/uapi/linux/psample.h |  7 +++++++
 net/psample/psample.c        | 39 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/include/net/psample.h b/include/net/psample.h
index ac6dbfb3870d..e328c5127757 100644
--- a/include/net/psample.h
+++ b/include/net/psample.h
@@ -18,6 +18,13 @@ struct psample_metadata {
 	u32 trunc_size;
 	int in_ifindex;
 	int out_ifindex;
+	u16 out_tc;
+	u64 out_tc_occ;	/* bytes */
+	u64 latency;	/* nanoseconds */
+	u8 out_tc_valid:1,
+	   out_tc_occ_valid:1,
+	   latency_valid:1,
+	   unused:5;
 };
 
 struct psample_group *psample_group_get(struct net *net, u32 group_num);
diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h
index aea26ab1431c..c6329f71b939 100644
--- a/include/uapi/linux/psample.h
+++ b/include/uapi/linux/psample.h
@@ -16,6 +16,13 @@ enum {
 	/* commands attributes */
 	PSAMPLE_ATTR_GROUP_REFCOUNT,
 
+	PSAMPLE_ATTR_PAD,
+	PSAMPLE_ATTR_OUT_TC,		/* u16 */
+	PSAMPLE_ATTR_OUT_TC_OCC,	/* u64, bytes */
+	PSAMPLE_ATTR_LATENCY,		/* u64, nanoseconds */
+	PSAMPLE_ATTR_TIMESTAMP,		/* u64, nanoseconds */
+	PSAMPLE_ATTR_PROTO,		/* u16 */
+
 	__PSAMPLE_ATTR_MAX
 };
 
diff --git a/net/psample/psample.c b/net/psample/psample.c
index 065bc887d239..118d5d2a81a0 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -8,6 +8,7 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/module.h>
+#include <linux/timekeeping.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/netlink.h>
@@ -358,6 +359,7 @@ static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info)
 void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
 			   u32 sample_rate, const struct psample_metadata *md)
 {
+	ktime_t tstamp = ktime_get_real();
 	int out_ifindex = md->out_ifindex;
 	int in_ifindex = md->in_ifindex;
 	u32 trunc_size = md->trunc_size;
@@ -372,10 +374,15 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
 
 	meta_len = (in_ifindex ? nla_total_size(sizeof(u16)) : 0) +
 		   (out_ifindex ? nla_total_size(sizeof(u16)) : 0) +
+		   (md->out_tc_valid ? nla_total_size(sizeof(u16)) : 0) +
+		   (md->out_tc_occ_valid ? nla_total_size_64bit(sizeof(u64)) : 0) +
+		   (md->latency_valid ? nla_total_size_64bit(sizeof(u64)) : 0) +
 		   nla_total_size(sizeof(u32)) +	/* sample_rate */
 		   nla_total_size(sizeof(u32)) +	/* orig_size */
 		   nla_total_size(sizeof(u32)) +	/* group_num */
-		   nla_total_size(sizeof(u32));		/* seq */
+		   nla_total_size(sizeof(u32)) +	/* seq */
+		   nla_total_size_64bit(sizeof(u64)) +	/* timestamp */
+		   nla_total_size(sizeof(u16));		/* protocol */
 
 #ifdef CONFIG_INET
 	tun_info = skb_tunnel_info(skb);
@@ -425,6 +432,36 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
 	if (unlikely(ret < 0))
 		goto error;
 
+	if (md->out_tc_valid) {
+		ret = nla_put_u16(nl_skb, PSAMPLE_ATTR_OUT_TC, md->out_tc);
+		if (unlikely(ret < 0))
+			goto error;
+	}
+
+	if (md->out_tc_occ_valid) {
+		ret = nla_put_u64_64bit(nl_skb, PSAMPLE_ATTR_OUT_TC_OCC,
+					md->out_tc_occ, PSAMPLE_ATTR_PAD);
+		if (unlikely(ret < 0))
+			goto error;
+	}
+
+	if (md->latency_valid) {
+		ret = nla_put_u64_64bit(nl_skb, PSAMPLE_ATTR_LATENCY,
+					md->latency, PSAMPLE_ATTR_PAD);
+		if (unlikely(ret < 0))
+			goto error;
+	}
+
+	ret = nla_put_u64_64bit(nl_skb, PSAMPLE_ATTR_TIMESTAMP,
+				ktime_to_ns(tstamp), PSAMPLE_ATTR_PAD);
+	if (unlikely(ret < 0))
+		goto error;
+
+	ret = nla_put_u16(nl_skb, PSAMPLE_ATTR_PROTO,
+			  be16_to_cpu(skb->protocol));
+	if (unlikely(ret < 0))
+		goto error;
+
 	if (data_len) {
 		int nla_len = nla_total_size(data_len);
 		struct nlattr *nla;
-- 
cgit v1.2.3


From a8700c3dd0a48f379d269ac6ddcf8bd857042771 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Sun, 14 Mar 2021 14:19:32 +0200
Subject: netdevsim: Add dummy psample implementation

Allow netdevsim to report "sampled" packets to the psample module by
periodically generating packets from a work queue. The behavior can be
enabled / disabled (default) and the various meta data attributes can be
controlled via debugfs knobs.

This implementation enables both testing of the psample module with all
the optional attributes as well as development of user space
applications on top of psample such as hsflowd and a Wireshark dissector
for psample generic netlink packets.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Kconfig               |   1 +
 drivers/net/netdevsim/Makefile    |   4 +
 drivers/net/netdevsim/dev.c       |  17 ++-
 drivers/net/netdevsim/netdevsim.h |  15 +++
 drivers/net/netdevsim/psample.c   | 264 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 299 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/netdevsim/psample.c

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index bcd31f458d1a..5895905b6aa1 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -579,6 +579,7 @@ config NETDEVSIM
 	depends on DEBUG_FS
 	depends on INET
 	depends on IPV6 || IPV6=n
+	depends on PSAMPLE || PSAMPLE=n
 	select NET_DEVLINK
 	help
 	  This driver is a developer testing tool and software model that can
diff --git a/drivers/net/netdevsim/Makefile b/drivers/net/netdevsim/Makefile
index ade086eed955..a1cbfa44a1e1 100644
--- a/drivers/net/netdevsim/Makefile
+++ b/drivers/net/netdevsim/Makefile
@@ -13,3 +13,7 @@ endif
 ifneq ($(CONFIG_XFRM_OFFLOAD),)
 netdevsim-objs += ipsec.o
 endif
+
+ifneq ($(CONFIG_PSAMPLE),)
+netdevsim-objs += psample.o
+endif
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index dbeb29fa16e8..6189a4c0d39e 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -1032,10 +1032,14 @@ static int nsim_dev_reload_create(struct nsim_dev *nsim_dev,
 	if (err)
 		goto err_fib_destroy;
 
-	err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count);
+	err = nsim_dev_psample_init(nsim_dev);
 	if (err)
 		goto err_health_exit;
 
+	err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count);
+	if (err)
+		goto err_psample_exit;
+
 	nsim_dev->take_snapshot = debugfs_create_file("take_snapshot",
 						      0200,
 						      nsim_dev->ddir,
@@ -1043,6 +1047,8 @@ static int nsim_dev_reload_create(struct nsim_dev *nsim_dev,
 						&nsim_dev_take_snapshot_fops);
 	return 0;
 
+err_psample_exit:
+	nsim_dev_psample_exit(nsim_dev);
 err_health_exit:
 	nsim_dev_health_exit(nsim_dev);
 err_fib_destroy:
@@ -1118,14 +1124,20 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev)
 	if (err)
 		goto err_health_exit;
 
-	err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count);
+	err = nsim_dev_psample_init(nsim_dev);
 	if (err)
 		goto err_bpf_dev_exit;
 
+	err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count);
+	if (err)
+		goto err_psample_exit;
+
 	devlink_params_publish(devlink);
 	devlink_reload_enable(devlink);
 	return 0;
 
+err_psample_exit:
+	nsim_dev_psample_exit(nsim_dev);
 err_bpf_dev_exit:
 	nsim_bpf_dev_exit(nsim_dev);
 err_health_exit:
@@ -1158,6 +1170,7 @@ static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev)
 		return;
 	debugfs_remove(nsim_dev->take_snapshot);
 	nsim_dev_port_del_all(nsim_dev);
+	nsim_dev_psample_exit(nsim_dev);
 	nsim_dev_health_exit(nsim_dev);
 	nsim_fib_destroy(devlink, nsim_dev->fib_data);
 	nsim_dev_traps_exit(devlink);
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index 48163c5f2ec9..d735c21def4b 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -180,6 +180,20 @@ struct nsim_dev_health {
 int nsim_dev_health_init(struct nsim_dev *nsim_dev, struct devlink *devlink);
 void nsim_dev_health_exit(struct nsim_dev *nsim_dev);
 
+#if IS_ENABLED(CONFIG_PSAMPLE)
+int nsim_dev_psample_init(struct nsim_dev *nsim_dev);
+void nsim_dev_psample_exit(struct nsim_dev *nsim_dev);
+#else
+static inline int nsim_dev_psample_init(struct nsim_dev *nsim_dev)
+{
+	return 0;
+}
+
+static inline void nsim_dev_psample_exit(struct nsim_dev *nsim_dev)
+{
+}
+#endif
+
 struct nsim_dev_port {
 	struct list_head list;
 	struct devlink_port devlink_port;
@@ -229,6 +243,7 @@ struct nsim_dev {
 		bool static_iana_vxlan;
 		u32 sleep;
 	} udp_ports;
+	struct nsim_dev_psample *psample;
 };
 
 static inline struct net *nsim_dev_net(struct nsim_dev *nsim_dev)
diff --git a/drivers/net/netdevsim/psample.c b/drivers/net/netdevsim/psample.c
new file mode 100644
index 000000000000..5ec3bd7f891b
--- /dev/null
+++ b/drivers/net/netdevsim/psample.c
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Mellanox Technologies. All rights reserved */
+
+#include <linux/debugfs.h>
+#include <linux/err.h>
+#include <linux/etherdevice.h>
+#include <linux/inet.h>
+#include <linux/kernel.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <net/devlink.h>
+#include <net/ip.h>
+#include <net/psample.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/udp.h>
+
+#include "netdevsim.h"
+
+#define NSIM_PSAMPLE_REPORT_INTERVAL_MS	100
+#define NSIM_PSAMPLE_INVALID_TC		0xFFFF
+#define NSIM_PSAMPLE_L4_DATA_LEN	100
+
+struct nsim_dev_psample {
+	struct delayed_work psample_dw;
+	struct dentry *ddir;
+	struct psample_group *group;
+	u32 rate;
+	u32 group_num;
+	u32 trunc_size;
+	int in_ifindex;
+	int out_ifindex;
+	u16 out_tc;
+	u64 out_tc_occ_max;
+	u64 latency_max;
+	bool is_active;
+};
+
+static struct sk_buff *nsim_dev_psample_skb_build(void)
+{
+	int tot_len, data_len = NSIM_PSAMPLE_L4_DATA_LEN;
+	struct sk_buff *skb;
+	struct udphdr *udph;
+	struct ethhdr *eth;
+	struct iphdr *iph;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return NULL;
+	tot_len = sizeof(struct iphdr) + sizeof(struct udphdr) + data_len;
+
+	skb_reset_mac_header(skb);
+	eth = skb_put(skb, sizeof(struct ethhdr));
+	eth_random_addr(eth->h_dest);
+	eth_random_addr(eth->h_source);
+	eth->h_proto = htons(ETH_P_IP);
+	skb->protocol = htons(ETH_P_IP);
+
+	skb_set_network_header(skb, skb->len);
+	iph = skb_put(skb, sizeof(struct iphdr));
+	iph->protocol = IPPROTO_UDP;
+	iph->saddr = in_aton("192.0.2.1");
+	iph->daddr = in_aton("198.51.100.1");
+	iph->version = 0x4;
+	iph->frag_off = 0;
+	iph->ihl = 0x5;
+	iph->tot_len = htons(tot_len);
+	iph->id = 0;
+	iph->ttl = 100;
+	iph->check = 0;
+	iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+
+	skb_set_transport_header(skb, skb->len);
+	udph = skb_put_zero(skb, sizeof(struct udphdr) + data_len);
+	get_random_bytes(&udph->source, sizeof(u16));
+	get_random_bytes(&udph->dest, sizeof(u16));
+	udph->len = htons(sizeof(struct udphdr) + data_len);
+
+	return skb;
+}
+
+static void nsim_dev_psample_md_prepare(const struct nsim_dev_psample *psample,
+					struct psample_metadata *md)
+{
+	md->trunc_size = psample->trunc_size;
+	md->in_ifindex = psample->in_ifindex;
+	md->out_ifindex = psample->out_ifindex;
+
+	if (psample->out_tc != NSIM_PSAMPLE_INVALID_TC) {
+		md->out_tc = psample->out_tc;
+		md->out_tc_valid = 1;
+	}
+
+	if (psample->out_tc_occ_max) {
+		u64 out_tc_occ;
+
+		get_random_bytes(&out_tc_occ, sizeof(u64));
+		md->out_tc_occ = out_tc_occ & (psample->out_tc_occ_max - 1);
+		md->out_tc_occ_valid = 1;
+	}
+
+	if (psample->latency_max) {
+		u64 latency;
+
+		get_random_bytes(&latency, sizeof(u64));
+		md->latency = latency & (psample->latency_max - 1);
+		md->latency_valid = 1;
+	}
+}
+
+static void nsim_dev_psample_report_work(struct work_struct *work)
+{
+	struct nsim_dev_psample *psample;
+	struct psample_metadata md = {};
+	struct sk_buff *skb;
+	unsigned long delay;
+
+	psample = container_of(work, struct nsim_dev_psample, psample_dw.work);
+
+	skb = nsim_dev_psample_skb_build();
+	if (!skb)
+		goto out;
+
+	nsim_dev_psample_md_prepare(psample, &md);
+	psample_sample_packet(psample->group, skb, psample->rate, &md);
+	consume_skb(skb);
+
+out:
+	delay = msecs_to_jiffies(NSIM_PSAMPLE_REPORT_INTERVAL_MS);
+	schedule_delayed_work(&psample->psample_dw, delay);
+}
+
+static int nsim_dev_psample_enable(struct nsim_dev *nsim_dev)
+{
+	struct nsim_dev_psample *psample = nsim_dev->psample;
+	struct devlink *devlink;
+	unsigned long delay;
+
+	if (psample->is_active)
+		return -EBUSY;
+
+	devlink = priv_to_devlink(nsim_dev);
+	psample->group = psample_group_get(devlink_net(devlink),
+					   psample->group_num);
+	if (!psample->group)
+		return -EINVAL;
+
+	delay = msecs_to_jiffies(NSIM_PSAMPLE_REPORT_INTERVAL_MS);
+	schedule_delayed_work(&psample->psample_dw, delay);
+
+	psample->is_active = true;
+
+	return 0;
+}
+
+static int nsim_dev_psample_disable(struct nsim_dev *nsim_dev)
+{
+	struct nsim_dev_psample *psample = nsim_dev->psample;
+
+	if (!psample->is_active)
+		return -EINVAL;
+
+	psample->is_active = false;
+
+	cancel_delayed_work_sync(&psample->psample_dw);
+	psample_group_put(psample->group);
+
+	return 0;
+}
+
+static ssize_t nsim_dev_psample_enable_write(struct file *file,
+					     const char __user *data,
+					     size_t count, loff_t *ppos)
+{
+	struct nsim_dev *nsim_dev = file->private_data;
+	bool enable;
+	int err;
+
+	err = kstrtobool_from_user(data, count, &enable);
+	if (err)
+		return err;
+
+	if (enable)
+		err = nsim_dev_psample_enable(nsim_dev);
+	else
+		err = nsim_dev_psample_disable(nsim_dev);
+
+	return err ? err : count;
+}
+
+static const struct file_operations nsim_psample_enable_fops = {
+	.open = simple_open,
+	.write = nsim_dev_psample_enable_write,
+	.llseek = generic_file_llseek,
+	.owner = THIS_MODULE,
+};
+
+int nsim_dev_psample_init(struct nsim_dev *nsim_dev)
+{
+	struct nsim_dev_psample *psample;
+	int err;
+
+	psample = kzalloc(sizeof(*psample), GFP_KERNEL);
+	if (!psample)
+		return -ENOMEM;
+	nsim_dev->psample = psample;
+
+	INIT_DELAYED_WORK(&psample->psample_dw, nsim_dev_psample_report_work);
+
+	psample->ddir = debugfs_create_dir("psample", nsim_dev->ddir);
+	if (IS_ERR(psample->ddir)) {
+		err = PTR_ERR(psample->ddir);
+		goto err_psample_free;
+	}
+
+	/* Populate sampling parameters with sane defaults. */
+	psample->rate = 100;
+	debugfs_create_u32("rate", 0600, psample->ddir, &psample->rate);
+
+	psample->group_num = 10;
+	debugfs_create_u32("group_num", 0600, psample->ddir,
+			   &psample->group_num);
+
+	psample->trunc_size = 0;
+	debugfs_create_u32("trunc_size", 0600, psample->ddir,
+			   &psample->trunc_size);
+
+	psample->in_ifindex = 1;
+	debugfs_create_u32("in_ifindex", 0600, psample->ddir,
+			   &psample->in_ifindex);
+
+	psample->out_ifindex = 2;
+	debugfs_create_u32("out_ifindex", 0600, psample->ddir,
+			   &psample->out_ifindex);
+
+	psample->out_tc = 0;
+	debugfs_create_u16("out_tc", 0600, psample->ddir, &psample->out_tc);
+
+	psample->out_tc_occ_max = 10000;
+	debugfs_create_u64("out_tc_occ_max", 0600, psample->ddir,
+			   &psample->out_tc_occ_max);
+
+	psample->latency_max = 50;
+	debugfs_create_u64("latency_max", 0600, psample->ddir,
+			   &psample->latency_max);
+
+	debugfs_create_file("enable", 0200, psample->ddir, nsim_dev,
+			    &nsim_psample_enable_fops);
+
+	return 0;
+
+err_psample_free:
+	kfree(nsim_dev->psample);
+	return err;
+}
+
+void nsim_dev_psample_exit(struct nsim_dev *nsim_dev)
+{
+	debugfs_remove_recursive(nsim_dev->psample->ddir);
+	if (nsim_dev->psample->is_active) {
+		cancel_delayed_work_sync(&nsim_dev->psample->psample_dw);
+		psample_group_put(nsim_dev->psample->group);
+	}
+	kfree(nsim_dev->psample);
+}
-- 
cgit v1.2.3


From f26b30918dac568425811f71fd21a53ed2307f44 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Sun, 14 Mar 2021 14:19:33 +0200
Subject: selftests: netdevsim: Test psample functionality

Test various aspects of psample functionality over netdevsim and in
particular test that the psample module correctly reports the provided
metadata.

Example:

 # ./psample.sh
 TEST: psample enable / disable                                      [ OK ]
 TEST: psample group number                                          [ OK ]
 TEST: psample metadata                                              [ OK ]

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/netdevsim/psample.sh     | 181 +++++++++++++++++++++
 1 file changed, 181 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/netdevsim/psample.sh

diff --git a/tools/testing/selftests/drivers/net/netdevsim/psample.sh b/tools/testing/selftests/drivers/net/netdevsim/psample.sh
new file mode 100755
index 000000000000..ee10b1a8933c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/psample.sh
@@ -0,0 +1,181 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking the psample module. It makes use of netdevsim
+# which periodically generates "sampled" packets.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	psample_enable_test
+	psample_group_num_test
+	psample_md_test
+"
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR=1337
+DEV=netdevsim${DEV_ADDR}
+DEVLINK_DEV=netdevsim/${DEV}
+SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
+PSAMPLE_DIR=/sys/kernel/debug/netdevsim/$DEV/psample/
+CAPTURE_FILE=$(mktemp)
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+# Available at https://github.com/Mellanox/libpsample
+require_command psample
+
+psample_capture()
+{
+	rm -f $CAPTURE_FILE
+
+	timeout 2 ip netns exec testns1 psample &> $CAPTURE_FILE
+}
+
+psample_enable_test()
+{
+	RET=0
+
+	echo 1 > $PSAMPLE_DIR/enable
+	check_err $? "Failed to enable sampling when should not"
+
+	echo 1 > $PSAMPLE_DIR/enable 2>/dev/null
+	check_fail $? "Sampling enablement succeeded when should fail"
+
+	psample_capture
+	if [ $(cat $CAPTURE_FILE | wc -l) -eq 0 ]; then
+		check_err 1 "Failed to capture sampled packets"
+	fi
+
+	echo 0 > $PSAMPLE_DIR/enable
+	check_err $? "Failed to disable sampling when should not"
+
+	echo 0 > $PSAMPLE_DIR/enable 2>/dev/null
+	check_fail $? "Sampling disablement succeeded when should fail"
+
+	psample_capture
+	if [ $(cat $CAPTURE_FILE | wc -l) -ne 0 ]; then
+		check_err 1 "Captured sampled packets when should not"
+	fi
+
+	log_test "psample enable / disable"
+}
+
+psample_group_num_test()
+{
+	RET=0
+
+	echo 1234 > $PSAMPLE_DIR/group_num
+	echo 1 > $PSAMPLE_DIR/enable
+
+	psample_capture
+	grep -q -e "group 1234" $CAPTURE_FILE
+	check_err $? "Sampled packets reported with wrong group number"
+
+	# New group number should only be used after disable / enable.
+	echo 4321 > $PSAMPLE_DIR/group_num
+
+	psample_capture
+	grep -q -e "group 4321" $CAPTURE_FILE
+	check_fail $? "Group number changed while sampling is active"
+
+	echo 0 > $PSAMPLE_DIR/enable && echo 1 > $PSAMPLE_DIR/enable
+
+	psample_capture
+	grep -q -e "group 4321" $CAPTURE_FILE
+	check_err $? "Group number did not change after restarting sampling"
+
+	log_test "psample group number"
+
+	echo 0 > $PSAMPLE_DIR/enable
+}
+
+psample_md_test()
+{
+	RET=0
+
+	echo 1 > $PSAMPLE_DIR/enable
+
+	echo 1234 > $PSAMPLE_DIR/in_ifindex
+	echo 4321 > $PSAMPLE_DIR/out_ifindex
+	psample_capture
+
+	grep -q -e "in-ifindex 1234" $CAPTURE_FILE
+	check_err $? "Sampled packets reported with wrong in-ifindex"
+
+	grep -q -e "out-ifindex 4321" $CAPTURE_FILE
+	check_err $? "Sampled packets reported with wrong out-ifindex"
+
+	echo 5 > $PSAMPLE_DIR/out_tc
+	psample_capture
+
+	grep -q -e "out-tc 5" $CAPTURE_FILE
+	check_err $? "Sampled packets reported with wrong out-tc"
+
+	echo $((2**16 - 1)) > $PSAMPLE_DIR/out_tc
+	psample_capture
+
+	grep -q -e "out-tc " $CAPTURE_FILE
+	check_fail $? "Sampled packets reported with out-tc when should not"
+
+	echo 1 > $PSAMPLE_DIR/out_tc
+	echo 10000 > $PSAMPLE_DIR/out_tc_occ_max
+	psample_capture
+
+	grep -q -e "out-tc-occ " $CAPTURE_FILE
+	check_err $? "Sampled packets not reported with out-tc-occ when should"
+
+	echo 0 > $PSAMPLE_DIR/out_tc_occ_max
+	psample_capture
+
+	grep -q -e "out-tc-occ " $CAPTURE_FILE
+	check_fail $? "Sampled packets reported with out-tc-occ when should not"
+
+	echo 10000 > $PSAMPLE_DIR/latency_max
+	psample_capture
+
+	grep -q -e "latency " $CAPTURE_FILE
+	check_err $? "Sampled packets not reported with latency when should"
+
+	echo 0 > $PSAMPLE_DIR/latency_max
+	psample_capture
+
+	grep -q -e "latency " $CAPTURE_FILE
+	check_fail $? "Sampled packets reported with latency when should not"
+
+	log_test "psample metadata"
+
+	echo 0 > $PSAMPLE_DIR/enable
+}
+
+setup_prepare()
+{
+	modprobe netdevsim &> /dev/null
+
+	echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device
+	while [ ! -d $SYSFS_NET_DIR ] ; do :; done
+
+	set -e
+
+	ip netns add testns1
+	devlink dev reload $DEVLINK_DEV netns testns1
+
+	set +e
+}
+
+cleanup()
+{
+	pre_cleanup
+	rm -f $CAPTURE_FILE
+	ip netns del testns1
+	echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
+	modprobe -r netdevsim &> /dev/null
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3


From e0eeede3d23337382e328e0fea4f7794e5a959d2 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Sun, 14 Mar 2021 14:19:34 +0200
Subject: mlxsw: pci: Add more metadata fields to CQEv2

The Completion Queue Element version 2 (CQEv2) includes various metadata
fields for packets that are mirrored / sampled to the CPU.

Add these fields so that they could be used by a later patch.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/pci_hw.h | 71 ++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
index a2c1fbd3e0d1..7b531228d6c0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
@@ -173,6 +173,15 @@ MLXSW_ITEM32(pci, cqe, wqe_counter, 0x04, 16, 16);
  */
 MLXSW_ITEM32(pci, cqe, byte_count, 0x04, 0, 14);
 
+#define MLXSW_PCI_CQE2_MIRROR_CONG_INVALID	0xFFFF
+
+/* pci_cqe_mirror_cong_high
+ * Congestion level in units of 8KB of the egress traffic class of the original
+ * packet that does mirroring to the CPU. Value of 0xFFFF means that the
+ * congestion level is invalid.
+ */
+MLXSW_ITEM32(pci, cqe2, mirror_cong_high, 0x08, 16, 4);
+
 /* pci_cqe_trap_id
  * Trap ID that captured the packet.
  */
@@ -208,6 +217,59 @@ MLXSW_ITEM32(pci, cqe0, dqn, 0x0C, 1, 5);
 MLXSW_ITEM32(pci, cqe12, dqn, 0x0C, 1, 6);
 mlxsw_pci_cqe_item_helpers(dqn, 0, 12, 12);
 
+#define MLXSW_PCI_CQE2_MIRROR_TCLASS_INVALID	0x1F
+
+/* pci_cqe_mirror_tclass
+ * The egress traffic class of the original packet that does mirroring to the
+ * CPU. Value of 0x1F means that the traffic class is invalid.
+ */
+MLXSW_ITEM32(pci, cqe2, mirror_tclass, 0x10, 27, 5);
+
+/* pci_cqe_tx_lag
+ * The Tx port of a packet that is mirrored / sampled to the CPU is a LAG.
+ */
+MLXSW_ITEM32(pci, cqe2, tx_lag, 0x10, 24, 1);
+
+/* pci_cqe_tx_lag_subport
+ * The port index within the LAG of a packet that is mirrored / sampled to the
+ * CPU. Reserved when tx_lag is 0.
+ */
+MLXSW_ITEM32(pci, cqe2, tx_lag_subport, 0x10, 16, 8);
+
+#define MLXSW_PCI_CQE2_TX_PORT_MULTI_PORT	0xFFFE
+#define MLXSW_PCI_CQE2_TX_PORT_INVALID		0xFFFF
+
+/* pci_cqe_tx_lag_id
+ * The Tx LAG ID of the original packet that is mirrored / sampled to the CPU.
+ * Value of 0xFFFE means multi-port. Value fo 0xFFFF means that the Tx LAG ID
+ * is invalid. Reserved when tx_lag is 0.
+ */
+MLXSW_ITEM32(pci, cqe2, tx_lag_id, 0x10, 0, 16);
+
+/* pci_cqe_tx_system_port
+ * The Tx port of the original packet that is mirrored / sampled to the CPU.
+ * Value of 0xFFFE means multi-port. Value fo 0xFFFF means that the Tx port is
+ * invalid. Reserved when tx_lag is 1.
+ */
+MLXSW_ITEM32(pci, cqe2, tx_system_port, 0x10, 0, 16);
+
+/* pci_cqe_mirror_cong_low
+ * Congestion level in units of 8KB of the egress traffic class of the original
+ * packet that does mirroring to the CPU. Value of 0xFFFF means that the
+ * congestion level is invalid.
+ */
+MLXSW_ITEM32(pci, cqe2, mirror_cong_low, 0x14, 20, 12);
+
+#define MLXSW_PCI_CQE2_MIRROR_CONG_SHIFT	13	/* Units of 8KB. */
+
+static inline u16 mlxsw_pci_cqe2_mirror_cong_get(const char *cqe)
+{
+	u16 cong_high = mlxsw_pci_cqe2_mirror_cong_high_get(cqe);
+	u16 cong_low = mlxsw_pci_cqe2_mirror_cong_low_get(cqe);
+
+	return cong_high << 12 | cong_low;
+}
+
 /* pci_cqe_user_def_val_orig_pkt_len
  * When trap_id is an ACL: User defined value from policy engine action.
  */
@@ -218,6 +280,15 @@ MLXSW_ITEM32(pci, cqe2, user_def_val_orig_pkt_len, 0x14, 0, 20);
  */
 MLXSW_ITEM32(pci, cqe2, mirror_reason, 0x18, 24, 8);
 
+#define MLXSW_PCI_CQE2_MIRROR_LATENCY_INVALID	0xFFFFFF
+
+/* pci_cqe_mirror_latency
+ * End-to-end latency of the original packet that does mirroring to the CPU.
+ * Value of 0xFFFFFF means that the latency is invalid. Units are according to
+ * MOGCR.mirror_latency_units.
+ */
+MLXSW_ITEM32(pci, cqe2, mirror_latency, 0x1C, 8, 24);
+
 /* pci_cqe_owner
  * Ownership bit.
  */
-- 
cgit v1.2.3


From d4cabaadeaad96ebaa415b6a7ca00390645a89ec Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Sun, 14 Mar 2021 14:19:35 +0200
Subject: mlxsw: Create dedicated field for Rx metadata in skb control block

Next patch will need to encode more Rx metadata in the skb control
block, so create a dedicated field for it and move the cookie index
there.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/core.h          | 6 +++++-
 drivers/net/ethernet/mellanox/mlxsw/pci.c           | 2 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c | 2 +-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 8af7d9d03475..86adc17c8901 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -58,6 +58,10 @@ struct mlxsw_tx_info {
 	bool is_emad;
 };
 
+struct mlxsw_rx_md_info {
+	u32 cookie_index;
+};
+
 bool mlxsw_core_skb_transmit_busy(struct mlxsw_core *mlxsw_core,
 				  const struct mlxsw_tx_info *tx_info);
 int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb,
@@ -515,7 +519,7 @@ enum mlxsw_devlink_param_id {
 struct mlxsw_skb_cb {
 	union {
 		struct mlxsw_tx_info tx_info;
-		u32 cookie_index; /* Only used during receive */
+		struct mlxsw_rx_md_info rx_md_info;
 	};
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index d0052537e627..8eee8b3c675e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -581,7 +581,7 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
 
 		if (mlxsw_pci->max_cqe_ver >= MLXSW_PCI_CQE_V2)
 			cookie_index = mlxsw_pci_cqe2_user_def_val_orig_pkt_len_get(cqe);
-		mlxsw_skb_cb(skb)->cookie_index = cookie_index;
+		mlxsw_skb_cb(skb)->rx_md_info.cookie_index = cookie_index;
 	} else if (rx_info.trap_id >= MLXSW_TRAP_ID_MIRROR_SESSION0 &&
 		   rx_info.trap_id <= MLXSW_TRAP_ID_MIRROR_SESSION7 &&
 		   mlxsw_pci->max_cqe_ver >= MLXSW_PCI_CQE_V2) {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index e0e6ee58d31a..5d4ae4886f76 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -108,7 +108,7 @@ static void mlxsw_sp_rx_drop_listener(struct sk_buff *skb, u8 local_port,
 static void mlxsw_sp_rx_acl_drop_listener(struct sk_buff *skb, u8 local_port,
 					  void *trap_ctx)
 {
-	u32 cookie_index = mlxsw_skb_cb(skb)->cookie_index;
+	u32 cookie_index = mlxsw_skb_cb(skb)->rx_md_info.cookie_index;
 	const struct flow_action_cookie *fa_cookie;
 	struct devlink_port *in_devlink_port;
 	struct mlxsw_sp_port *mlxsw_sp_port;
-- 
cgit v1.2.3


From 5ab6dc9fa2720cbbbdf2686c06a90c9a6c86b6fd Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Sun, 14 Mar 2021 14:19:36 +0200
Subject: mlxsw: pci: Set extra metadata in skb control block

Packets that are mirrored / sampled to the CPU have extra metadata
encoded in their corresponding Completion Queue Element (CQE). Retrieve
this metadata from the CQE and set it in the skb control block so that
it could be accessed by the switch driver (i.e., 'mlxsw_spectrum').

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/core.h | 15 +++++++++
 drivers/net/ethernet/mellanox/mlxsw/pci.c  | 53 ++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 86adc17c8901..80712dc803d0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -60,6 +60,21 @@ struct mlxsw_tx_info {
 
 struct mlxsw_rx_md_info {
 	u32 cookie_index;
+	u32 latency;
+	u32 tx_congestion;
+	union {
+		/* Valid when 'tx_port_valid' is set. */
+		u16 tx_sys_port;
+		u16 tx_lag_id;
+	};
+	u8 tx_lag_port_index; /* Valid when 'tx_port_is_lag' is set. */
+	u8 tx_tc;
+	u8 latency_valid:1,
+	   tx_congestion_valid:1,
+	   tx_tc_valid:1,
+	   tx_port_valid:1,
+	   tx_port_is_lag:1,
+	   unused:3;
 };
 
 bool mlxsw_core_skb_transmit_busy(struct mlxsw_core *mlxsw_core,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 8eee8b3c675e..8e8456811384 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -540,6 +540,55 @@ static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci,
 	spin_unlock(&q->lock);
 }
 
+static void mlxsw_pci_cqe_rdq_md_tx_port_init(struct sk_buff *skb,
+					      const char *cqe)
+{
+	struct mlxsw_skb_cb *cb = mlxsw_skb_cb(skb);
+
+	if (mlxsw_pci_cqe2_tx_lag_get(cqe)) {
+		cb->rx_md_info.tx_port_is_lag = true;
+		cb->rx_md_info.tx_lag_id = mlxsw_pci_cqe2_tx_lag_id_get(cqe);
+		cb->rx_md_info.tx_lag_port_index =
+			mlxsw_pci_cqe2_tx_lag_subport_get(cqe);
+	} else {
+		cb->rx_md_info.tx_port_is_lag = false;
+		cb->rx_md_info.tx_sys_port =
+			mlxsw_pci_cqe2_tx_system_port_get(cqe);
+	}
+
+	if (cb->rx_md_info.tx_sys_port != MLXSW_PCI_CQE2_TX_PORT_MULTI_PORT &&
+	    cb->rx_md_info.tx_sys_port != MLXSW_PCI_CQE2_TX_PORT_INVALID)
+		cb->rx_md_info.tx_port_valid = 1;
+	else
+		cb->rx_md_info.tx_port_valid = 0;
+}
+
+static void mlxsw_pci_cqe_rdq_md_init(struct sk_buff *skb, const char *cqe)
+{
+	struct mlxsw_skb_cb *cb = mlxsw_skb_cb(skb);
+
+	cb->rx_md_info.tx_congestion = mlxsw_pci_cqe2_mirror_cong_get(cqe);
+	if (cb->rx_md_info.tx_congestion != MLXSW_PCI_CQE2_MIRROR_CONG_INVALID)
+		cb->rx_md_info.tx_congestion_valid = 1;
+	else
+		cb->rx_md_info.tx_congestion_valid = 0;
+	cb->rx_md_info.tx_congestion <<= MLXSW_PCI_CQE2_MIRROR_CONG_SHIFT;
+
+	cb->rx_md_info.latency = mlxsw_pci_cqe2_mirror_latency_get(cqe);
+	if (cb->rx_md_info.latency != MLXSW_PCI_CQE2_MIRROR_LATENCY_INVALID)
+		cb->rx_md_info.latency_valid = 1;
+	else
+		cb->rx_md_info.latency_valid = 0;
+
+	cb->rx_md_info.tx_tc = mlxsw_pci_cqe2_mirror_tclass_get(cqe);
+	if (cb->rx_md_info.tx_tc != MLXSW_PCI_CQE2_MIRROR_TCLASS_INVALID)
+		cb->rx_md_info.tx_tc_valid = 1;
+	else
+		cb->rx_md_info.tx_tc_valid = 0;
+
+	mlxsw_pci_cqe_rdq_md_tx_port_init(skb, cqe);
+}
+
 static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
 				     struct mlxsw_pci_queue *q,
 				     u16 consumer_counter_limit,
@@ -586,6 +635,10 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
 		   rx_info.trap_id <= MLXSW_TRAP_ID_MIRROR_SESSION7 &&
 		   mlxsw_pci->max_cqe_ver >= MLXSW_PCI_CQE_V2) {
 		rx_info.mirror_reason = mlxsw_pci_cqe2_mirror_reason_get(cqe);
+		mlxsw_pci_cqe_rdq_md_init(skb, cqe);
+	} else if (rx_info.trap_id == MLXSW_TRAP_ID_PKT_SAMPLE &&
+		   mlxsw_pci->max_cqe_ver >= MLXSW_PCI_CQE_V2) {
+		mlxsw_pci_cqe_rdq_md_tx_port_init(skb, cqe);
 	}
 
 	byte_count = mlxsw_pci_cqe_byte_count_get(cqe);
-- 
cgit v1.2.3


From e1f78ecdfd59d560c42bc04b9bfb746ef8a9dfb1 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Sun, 14 Mar 2021 14:19:37 +0200
Subject: mlxsw: spectrum: Remove unnecessary RCU read-side critical section

Since commit 7d8e8f3433dc ("mlxsw: core: Increase scope of RCU read-side
critical section"), all Rx handlers are called from an RCU read-side
critical section.

Remove the unnecessary rcu_read_lock() / rcu_read_unlock().

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 3b15f8d728a3..3d8e8d8dfff5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2225,15 +2225,12 @@ void mlxsw_sp_sample_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
 		goto out;
 	}
 
-	rcu_read_lock();
 	sample = rcu_dereference(mlxsw_sp_port->sample);
 	if (!sample)
-		goto out_unlock;
+		goto out;
 	md.trunc_size = sample->truncate ? sample->trunc_size : skb->len;
 	md.in_ifindex = mlxsw_sp_port->dev->ifindex;
 	psample_sample_packet(sample->psample_group, skb, sample->rate, &md);
-out_unlock:
-	rcu_read_unlock();
 out:
 	consume_skb(skb);
 }
-- 
cgit v1.2.3


From 48990bef1e6880613b142cea4f3962dd51458bd6 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Sun, 14 Mar 2021 14:19:38 +0200
Subject: mlxsw: spectrum: Remove mlxsw_sp_sample_receive()

The function resolves the psample sampling group from the Rx port
because this is the only form of sampling the driver currently supports.
Subsequent patches are going to add support for Tx-based and
policy-based sampling, in which case the sampling group would not be
resolved from the Rx port.

Therefore, move this code to the Rx-specific sampling listener.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c     | 23 ----------------------
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h     |  2 --
 .../net/ethernet/mellanox/mlxsw/spectrum_trap.c    | 19 ++++++++++++++++--
 3 files changed, 17 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 3d8e8d8dfff5..6054147fd51c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2212,29 +2212,6 @@ void mlxsw_sp_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
 	mlxsw_sp->ptp_ops->receive(mlxsw_sp, skb, local_port);
 }
 
-void mlxsw_sp_sample_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
-			     u8 local_port)
-{
-	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
-	struct mlxsw_sp_port_sample *sample;
-	struct psample_metadata md = {};
-
-	if (unlikely(!mlxsw_sp_port)) {
-		dev_warn_ratelimited(mlxsw_sp->bus_info->dev, "Port %d: sample skb received for non-existent port\n",
-				     local_port);
-		goto out;
-	}
-
-	sample = rcu_dereference(mlxsw_sp_port->sample);
-	if (!sample)
-		goto out;
-	md.trunc_size = sample->truncate ? sample->trunc_size : skb->len;
-	md.in_ifindex = mlxsw_sp_port->dev->ifindex;
-	psample_sample_packet(sample->psample_group, skb, sample->rate, &md);
-out:
-	consume_skb(skb);
-}
-
 #define MLXSW_SP_RXL_NO_MARK(_trap_id, _action, _trap_group, _is_ctrl)	\
 	MLXSW_RXL(mlxsw_sp_rx_listener_no_mark_func, _trap_id, _action,	\
 		  _is_ctrl, SP_##_trap_group, DISCARD)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index bc7006de7873..0082f70daff3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -570,8 +570,6 @@ void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb,
 				       u8 local_port, void *priv);
 void mlxsw_sp_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
 			  u8 local_port);
-void mlxsw_sp_sample_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
-			     u8 local_port);
 int mlxsw_sp_port_speed_get(struct mlxsw_sp_port *mlxsw_sp_port, u32 *speed);
 int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
 			  enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index 5d4ae4886f76..ea01047f8f8f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -208,17 +208,32 @@ static void mlxsw_sp_rx_sample_listener(struct sk_buff *skb, u8 local_port,
 					void *trap_ctx)
 {
 	struct mlxsw_sp *mlxsw_sp = devlink_trap_ctx_priv(trap_ctx);
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	struct mlxsw_sp_port_sample *sample;
+	struct psample_metadata md = {};
 	int err;
 
 	err = __mlxsw_sp_rx_no_mark_listener(skb, local_port, trap_ctx);
 	if (err)
 		return;
 
-	/* The sample handler expects skb->data to point to the start of the
+	mlxsw_sp_port = mlxsw_sp->ports[local_port];
+	if (!mlxsw_sp_port)
+		goto out;
+
+	sample = rcu_dereference(mlxsw_sp_port->sample);
+	if (!sample)
+		goto out;
+
+	/* The psample module expects skb->data to point to the start of the
 	 * Ethernet header.
 	 */
 	skb_push(skb, ETH_HLEN);
-	mlxsw_sp_sample_receive(mlxsw_sp, skb, local_port);
+	md.trunc_size = sample->truncate ? sample->trunc_size : skb->len;
+	md.in_ifindex = mlxsw_sp_port->dev->ifindex;
+	psample_sample_packet(sample->psample_group, skb, sample->rate, &md);
+out:
+	consume_skb(skb);
 }
 
 #define MLXSW_SP_TRAP_DROP(_id, _group_id)				      \
-- 
cgit v1.2.3


From 2073c600444349b18a80b0b38dc20df92fd74155 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Sun, 14 Mar 2021 14:19:39 +0200
Subject: mlxsw: spectrum: Report extra metadata to psample module

Make use of the previously added metadata and report it to the psample
module. The metadata is read from the skb's control block, which was
initialized by the bus driver (i.e., 'mlxsw_pci') after decoding the
packet's Completion Queue Element (CQE).

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_trap.c    | 54 +++++++++++++++++++++-
 1 file changed, 52 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index ea01047f8f8f..056201029ce5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -204,6 +204,55 @@ static void mlxsw_sp_rx_ptp_listener(struct sk_buff *skb, u8 local_port,
 	mlxsw_sp_ptp_receive(mlxsw_sp, skb, local_port);
 }
 
+static struct mlxsw_sp_port *
+mlxsw_sp_sample_tx_port_get(struct mlxsw_sp *mlxsw_sp,
+			    const struct mlxsw_rx_md_info *rx_md_info)
+{
+	u8 local_port;
+
+	if (!rx_md_info->tx_port_valid)
+		return NULL;
+
+	if (rx_md_info->tx_port_is_lag)
+		local_port = mlxsw_core_lag_mapping_get(mlxsw_sp->core,
+							rx_md_info->tx_lag_id,
+							rx_md_info->tx_lag_port_index);
+	else
+		local_port = rx_md_info->tx_sys_port;
+
+	if (local_port >= mlxsw_core_max_ports(mlxsw_sp->core))
+		return NULL;
+
+	return mlxsw_sp->ports[local_port];
+}
+
+/* The latency units are determined according to MOGCR.mirror_latency_units. It
+ * defaults to 64 nanoseconds.
+ */
+#define MLXSW_SP_MIRROR_LATENCY_SHIFT	6
+
+static void mlxsw_sp_psample_md_init(struct mlxsw_sp *mlxsw_sp,
+				     struct psample_metadata *md,
+				     struct sk_buff *skb, int in_ifindex,
+				     bool truncate, u32 trunc_size)
+{
+	struct mlxsw_rx_md_info *rx_md_info = &mlxsw_skb_cb(skb)->rx_md_info;
+	struct mlxsw_sp_port *mlxsw_sp_port;
+
+	md->trunc_size = truncate ? trunc_size : skb->len;
+	md->in_ifindex = in_ifindex;
+	mlxsw_sp_port = mlxsw_sp_sample_tx_port_get(mlxsw_sp, rx_md_info);
+	md->out_ifindex = mlxsw_sp_port && mlxsw_sp_port->dev ?
+			  mlxsw_sp_port->dev->ifindex : 0;
+	md->out_tc_valid = rx_md_info->tx_tc_valid;
+	md->out_tc = rx_md_info->tx_tc;
+	md->out_tc_occ_valid = rx_md_info->tx_congestion_valid;
+	md->out_tc_occ = rx_md_info->tx_congestion;
+	md->latency_valid = rx_md_info->latency_valid;
+	md->latency = rx_md_info->latency;
+	md->latency <<= MLXSW_SP_MIRROR_LATENCY_SHIFT;
+}
+
 static void mlxsw_sp_rx_sample_listener(struct sk_buff *skb, u8 local_port,
 					void *trap_ctx)
 {
@@ -229,8 +278,9 @@ static void mlxsw_sp_rx_sample_listener(struct sk_buff *skb, u8 local_port,
 	 * Ethernet header.
 	 */
 	skb_push(skb, ETH_HLEN);
-	md.trunc_size = sample->truncate ? sample->trunc_size : skb->len;
-	md.in_ifindex = mlxsw_sp_port->dev->ifindex;
+	mlxsw_sp_psample_md_init(mlxsw_sp, &md, skb,
+				 mlxsw_sp_port->dev->ifindex, sample->truncate,
+				 sample->trunc_size);
 	psample_sample_packet(sample->psample_group, skb, sample->rate, &md);
 out:
 	consume_skb(skb);
-- 
cgit v1.2.3


From bb24d592e66eb059e086595510e0f0b064e4114d Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Sun, 14 Mar 2021 14:19:40 +0200
Subject: selftests: mlxsw: Add tc sample tests

Test that packets are sampled when tc-sample is used and that reported
metadata is correct. Two sets of hosts (with and without LAG) are used,
since metadata extraction in mlxsw is a bit different when LAG is
involved.

 # ./tc_sample.sh
 TEST: tc sample rate (forward)                                      [ OK ]
 TEST: tc sample rate (local receive)                                [ OK ]
 TEST: tc sample maximum rate                                        [ OK ]
 TEST: tc sample group conflict test                                 [ OK ]
 TEST: tc sample iif                                                 [ OK ]
 TEST: tc sample lag iif                                             [ OK ]
 TEST: tc sample oif                                                 [ OK ]
 TEST: tc sample lag oif                                             [ OK ]
 TEST: tc sample out-tc                                              [ OK ]
 TEST: tc sample out-tc-occ                                          [ OK ]

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/mlxsw/tc_sample.sh       | 492 +++++++++++++++++++++
 1 file changed, 492 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh

diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
new file mode 100755
index 000000000000..75d00104f291
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
@@ -0,0 +1,492 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that packets are sampled when tc-sample is used and that reported
+# metadata is correct. Two sets of hosts (with and without LAG) are used, since
+# metadata extraction in mlxsw is a bit different when LAG is involved.
+#
+# +---------------------------------+       +---------------------------------+
+# | H1 (vrf)                        |       | H3 (vrf)                        |
+# |    + $h1                        |       |    + $h3_lag                    |
+# |    | 192.0.2.1/28               |       |    | 192.0.2.17/28              |
+# |    |                            |       |    |                            |
+# |    |  default via 192.0.2.2     |       |    |  default via 192.0.2.18    |
+# +----|----------------------------+       +----|----------------------------+
+#      |                                         |
+# +----|-----------------------------------------|----------------------------+
+# |    | 192.0.2.2/28                            | 192.0.2.18/28              |
+# |    + $rp1                                    + $rp3_lag                   |
+# |                                                                           |
+# |    + $rp2                                    + $rp4_lag                   |
+# |    | 198.51.100.2/28                         | 198.51.100.18/28           |
+# +----|-----------------------------------------|----------------------------+
+#      |                                         |
+# +----|----------------------------+       +----|----------------------------+
+# |    |  default via 198.51.100.2  |       |    |  default via 198.51.100.18 |
+# |    |                            |       |    |                            |
+# |    | 198.51.100.1/28            |       |    | 198.51.100.17/28           |
+# |    + $h2                        |       |    + $h4_lag                    |
+# | H2 (vrf)                        |       | H4 (vrf)                        |
+# +---------------------------------+       +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	tc_sample_rate_test
+	tc_sample_max_rate_test
+	tc_sample_group_conflict_test
+	tc_sample_md_iif_test
+	tc_sample_md_lag_iif_test
+	tc_sample_md_oif_test
+	tc_sample_md_lag_oif_test
+	tc_sample_md_out_tc_test
+	tc_sample_md_out_tc_occ_test
+"
+NUM_NETIFS=8
+CAPTURE_FILE=$(mktemp)
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+# Available at https://github.com/Mellanox/libpsample
+require_command psample
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28
+
+	ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+	ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+	simple_if_init $h2 198.51.100.1/28
+
+	ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+}
+
+h2_destroy()
+{
+	ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+	simple_if_fini $h2 198.51.100.1/28
+}
+
+h3_create()
+{
+	ip link set dev $h3 down
+	ip link add name ${h3}_bond type bond mode 802.3ad
+	ip link set dev $h3 master ${h3}_bond
+
+	simple_if_init ${h3}_bond 192.0.2.17/28
+
+	ip -4 route add default vrf v${h3}_bond nexthop via 192.0.2.18
+}
+
+h3_destroy()
+{
+	ip -4 route del default vrf v${h3}_bond nexthop via 192.0.2.18
+
+	simple_if_fini ${h3}_bond 192.0.2.17/28
+
+	ip link set dev $h3 nomaster
+	ip link del dev ${h3}_bond
+}
+
+h4_create()
+{
+	ip link set dev $h4 down
+	ip link add name ${h4}_bond type bond mode 802.3ad
+	ip link set dev $h4 master ${h4}_bond
+
+	simple_if_init ${h4}_bond 198.51.100.17/28
+
+	ip -4 route add default vrf v${h4}_bond nexthop via 198.51.100.18
+}
+
+h4_destroy()
+{
+	ip -4 route del default vrf v${h4}_bond nexthop via 198.51.100.18
+
+	simple_if_fini ${h4}_bond 198.51.100.17/28
+
+	ip link set dev $h4 nomaster
+	ip link del dev ${h4}_bond
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	__addr_add_del $rp1 add 192.0.2.2/28
+	tc qdisc add dev $rp1 clsact
+
+	ip link set dev $rp2 up
+	__addr_add_del $rp2 add 198.51.100.2/28
+	tc qdisc add dev $rp2 clsact
+
+	ip link add name ${rp3}_bond type bond mode 802.3ad
+	ip link set dev $rp3 master ${rp3}_bond
+	__addr_add_del ${rp3}_bond add 192.0.2.18/28
+	tc qdisc add dev $rp3 clsact
+	ip link set dev ${rp3}_bond up
+
+	ip link add name ${rp4}_bond type bond mode 802.3ad
+	ip link set dev $rp4 master ${rp4}_bond
+	__addr_add_del ${rp4}_bond add 198.51.100.18/28
+	tc qdisc add dev $rp4 clsact
+	ip link set dev ${rp4}_bond up
+}
+
+router_destroy()
+{
+	ip link set dev ${rp4}_bond down
+	tc qdisc del dev $rp4 clsact
+	__addr_add_del ${rp4}_bond del 198.51.100.18/28
+	ip link set dev $rp4 nomaster
+	ip link del dev ${rp4}_bond
+
+	ip link set dev ${rp3}_bond down
+	tc qdisc del dev $rp3 clsact
+	__addr_add_del ${rp3}_bond del 192.0.2.18/28
+	ip link set dev $rp3 nomaster
+	ip link del dev ${rp3}_bond
+
+	tc qdisc del dev $rp2 clsact
+	__addr_add_del $rp2 del 198.51.100.2/28
+	ip link set dev $rp2 down
+
+	tc qdisc del dev $rp1 clsact
+	__addr_add_del $rp1 del 192.0.2.2/28
+	ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+	h3=${NETIFS[p5]}
+	rp3=${NETIFS[p6]}
+	h4=${NETIFS[p7]}
+	rp4=${NETIFS[p8]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	h3_create
+	h4_create
+	router_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	rm -f $CAPTURE_FILE
+
+	router_destroy
+	h4_destroy
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+psample_capture_start()
+{
+	rm -f $CAPTURE_FILE
+
+	psample &> $CAPTURE_FILE &
+
+	sleep 1
+}
+
+psample_capture_stop()
+{
+	{ kill %% && wait %%; } 2>/dev/null
+}
+
+__tc_sample_rate_test()
+{
+	local desc=$1; shift
+	local dip=$1; shift
+	local pkts pct
+
+	RET=0
+
+	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 32 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	psample_capture_start
+
+	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+		-B $dip -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
+	pct=$((100 * (pkts - 100) / 100))
+	(( -25 <= pct && pct <= 25))
+	check_err $? "Expected 100 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
+
+	log_test "tc sample rate ($desc)"
+
+	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_rate_test()
+{
+	__tc_sample_rate_test "forward" 198.51.100.1
+	__tc_sample_rate_test "local receive" 192.0.2.2
+}
+
+tc_sample_max_rate_test()
+{
+	RET=0
+
+	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate $((35 * 10 ** 8)) group 1
+	check_err $? "Failed to configure sampling rule with max rate"
+
+	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+
+	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate $((35 * 10 ** 8 + 1)) \
+		group 1 &> /dev/null
+	check_fail $? "Managed to configure sampling rate above maximum"
+
+	log_test "tc sample maximum rate"
+}
+
+tc_sample_group_conflict_test()
+{
+	RET=0
+
+	# Test that two sampling rules cannot be configured on the same port
+	# with different groups.
+
+	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 1024 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
+		skip_sw action sample rate 1024 group 2 &> /dev/null
+	check_fail $? "Managed to configure sampling rule with conflicting group"
+
+	log_test "tc sample group conflict test"
+
+	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_iif_test()
+{
+	local rp1_ifindex
+
+	RET=0
+
+	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 5 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	psample_capture_start
+
+	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	rp1_ifindex=$(ip -j -p link show dev $rp1 | jq '.[]["ifindex"]')
+	grep -q -e "in-ifindex $rp1_ifindex " $CAPTURE_FILE
+	check_err $? "Sampled packets do not have expected in-ifindex"
+
+	log_test "tc sample iif"
+
+	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_lag_iif_test()
+{
+	local rp3_ifindex
+
+	RET=0
+
+	tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 5 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	psample_capture_start
+
+	ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \
+		-A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	rp3_ifindex=$(ip -j -p link show dev $rp3 | jq '.[]["ifindex"]')
+	grep -q -e "in-ifindex $rp3_ifindex " $CAPTURE_FILE
+	check_err $? "Sampled packets do not have expected in-ifindex"
+
+	log_test "tc sample lag iif"
+
+	tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_oif_test()
+{
+	local rp2_ifindex
+
+	RET=0
+
+	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 5 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	psample_capture_start
+
+	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	rp2_ifindex=$(ip -j -p link show dev $rp2 | jq '.[]["ifindex"]')
+	grep -q -e "out-ifindex $rp2_ifindex " $CAPTURE_FILE
+	check_err $? "Sampled packets do not have expected out-ifindex"
+
+	log_test "tc sample oif"
+
+	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_lag_oif_test()
+{
+	local rp4_ifindex
+
+	RET=0
+
+	tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 5 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	psample_capture_start
+
+	ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \
+		-A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	rp4_ifindex=$(ip -j -p link show dev $rp4 | jq '.[]["ifindex"]')
+	grep -q -e "out-ifindex $rp4_ifindex " $CAPTURE_FILE
+	check_err $? "Sampled packets do not have expected out-ifindex"
+
+	log_test "tc sample lag oif"
+
+	tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_out_tc_test()
+{
+	RET=0
+
+	# Output traffic class is not supported on Spectrum-1.
+	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+
+	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 5 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	# By default, all the packets should go to the same traffic class (0).
+
+	psample_capture_start
+
+	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	grep -q -e "out-tc 0 " $CAPTURE_FILE
+	check_err $? "Sampled packets do not have expected out-tc (0)"
+
+	# Map all priorities to highest traffic class (7) and check reported
+	# out-tc.
+	tc qdisc replace dev $rp2 root handle 1: \
+		prio bands 3 priomap 0 0 0 0 0 0 0 0
+
+	psample_capture_start
+
+	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	grep -q -e "out-tc 7 " $CAPTURE_FILE
+	check_err $? "Sampled packets do not have expected out-tc (7)"
+
+	log_test "tc sample out-tc"
+
+	tc qdisc del dev $rp2 root handle 1:
+	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_out_tc_occ_test()
+{
+	local backlog pct occ
+
+	RET=0
+
+	# Output traffic class occupancy is not supported on Spectrum-1.
+	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+
+	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 1024 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	# Configure a shaper on egress to create congestion.
+	tc qdisc replace dev $rp2 root handle 1: \
+		tbf rate 1Mbit burst 256k limit 1M
+
+	psample_capture_start
+
+	ip vrf exec v$h1 $MZ $h1 -c 0 -d 1usec -p 1400 -A 192.0.2.1 \
+		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q &
+
+	# Allow congestion to reach steady state.
+	sleep 10
+
+	backlog=$(tc -j -p -s qdisc show dev $rp2 | jq '.[0]["backlog"]')
+
+	# Kill mausezahn.
+	{ kill %% && wait %%; } 2>/dev/null
+
+	psample_capture_stop
+
+	# Record last congestion sample.
+	occ=$(grep -e "out-tc-occ " $CAPTURE_FILE | tail -n 1 | \
+		cut -d ' ' -f 16)
+
+	pct=$((100 * (occ - backlog) / backlog))
+	(( -1 <= pct && pct <= 1))
+	check_err $? "Recorded a congestion of $backlog bytes, but sampled congestion is $occ bytes, which is $pct% off. Required accuracy is +-5%"
+
+	log_test "tc sample out-tc-occ"
+
+	tc qdisc del dev $rp2 root handle 1:
+	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3


From 5b492c7dbddaba4a1c1d4e72ec974ee459b4afe7 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Fri, 19 Feb 2021 13:41:40 +0800
Subject: rtw88: coex: add power off setting

Clear WL/BT on/off bit in scoreboard register that is used to exchange
WL/BT status. Since the status is preserved after warm reboot, we must
clear it when WL is going to down.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210219054140.7835-1-pkshih@realtek.com
---
 drivers/net/wireless/realtek/rtw88/coex.c | 5 +++++
 drivers/net/wireless/realtek/rtw88/coex.h | 1 +
 drivers/net/wireless/realtek/rtw88/main.c | 1 +
 3 files changed, 7 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtw88/coex.c b/drivers/net/wireless/realtek/rtw88/coex.c
index ea2be1e25065..7eee2c5ecb11 100644
--- a/drivers/net/wireless/realtek/rtw88/coex.c
+++ b/drivers/net/wireless/realtek/rtw88/coex.c
@@ -2646,6 +2646,11 @@ void rtw_coex_power_on_setting(struct rtw_dev *rtwdev)
 	rtw_coex_set_gnt_debug(rtwdev);
 }
 
+void rtw_coex_power_off_setting(struct rtw_dev *rtwdev)
+{
+	rtw_write16(rtwdev, REG_WIFI_BT_INFO, BIT_BT_INT_EN);
+}
+
 void rtw_coex_init_hw_config(struct rtw_dev *rtwdev, bool wifi_only)
 {
 	__rtw_coex_init_hw_config(rtwdev, wifi_only);
diff --git a/drivers/net/wireless/realtek/rtw88/coex.h b/drivers/net/wireless/realtek/rtw88/coex.h
index 8ab9852ec9ed..57018700ac39 100644
--- a/drivers/net/wireless/realtek/rtw88/coex.h
+++ b/drivers/net/wireless/realtek/rtw88/coex.h
@@ -393,6 +393,7 @@ void rtw_coex_bt_multi_link_remain_work(struct work_struct *work);
 void rtw_coex_wl_ccklock_work(struct work_struct *work);
 
 void rtw_coex_power_on_setting(struct rtw_dev *rtwdev);
+void rtw_coex_power_off_setting(struct rtw_dev *rtwdev);
 void rtw_coex_init_hw_config(struct rtw_dev *rtwdev, bool wifi_only);
 void rtw_coex_ips_notify(struct rtw_dev *rtwdev, u8 type);
 void rtw_coex_lps_notify(struct rtw_dev *rtwdev, u8 type);
diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c
index e6989c0525cc..d20aa069d62a 100644
--- a/drivers/net/wireless/realtek/rtw88/main.c
+++ b/drivers/net/wireless/realtek/rtw88/main.c
@@ -1138,6 +1138,7 @@ int rtw_core_start(struct rtw_dev *rtwdev)
 static void rtw_power_off(struct rtw_dev *rtwdev)
 {
 	rtw_hci_stop(rtwdev);
+	rtw_coex_power_off_setting(rtwdev);
 	rtw_mac_power_off(rtwdev);
 }
 
-- 
cgit v1.2.3


From 4a7ea94377c928355f778bd2f5cac73757935425 Mon Sep 17 00:00:00 2001
From: wengjianfeng <wengjianfeng@yulong.com>
Date: Tue, 23 Feb 2021 15:54:38 +0800
Subject: rtw88: remove unnecessary variable

The variable ret is defined at the beginning and initialized
to 0 until the function returns ret, and the variable ret is
not reassigned.Therefore, we do not need to define the variable
ret, just return 0 directly at the end of the function.

Signed-off-by: wengjianfeng <wengjianfeng@yulong.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210223075438.13676-1-samirweng1979@163.com
---
 drivers/net/wireless/realtek/rtw88/main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c
index d20aa069d62a..ff8893861ccf 100644
--- a/drivers/net/wireless/realtek/rtw88/main.c
+++ b/drivers/net/wireless/realtek/rtw88/main.c
@@ -1394,7 +1394,6 @@ static int rtw_chip_parameter_setup(struct rtw_dev *rtwdev)
 	struct rtw_chip_info *chip = rtwdev->chip;
 	struct rtw_hal *hal = &rtwdev->hal;
 	struct rtw_efuse *efuse = &rtwdev->efuse;
-	int ret = 0;
 
 	switch (rtw_hci_type(rtwdev)) {
 	case RTW_HCI_TYPE_PCIE:
@@ -1432,7 +1431,7 @@ static int rtw_chip_parameter_setup(struct rtw_dev *rtwdev)
 
 	hal->bfee_sts_cap = 3;
 
-	return ret;
+	return 0;
 }
 
 static int rtw_chip_efuse_enable(struct rtw_dev *rtwdev)
-- 
cgit v1.2.3


From f135a1571a0579bebed625e3af0b1fcae6dff7c5 Mon Sep 17 00:00:00 2001
From: David Mosberger-Tang <davidm@egauge.net>
Date: Wed, 24 Feb 2021 03:33:23 +0000
Subject: wilc1000: Support chip sleep over SPI

chip_allow_sleep() only supported wakeup via SDIO, which made the
driver unusable over SPI.  This code is a straight forward port from
the driver in the linux-at91 repository.

Signed-off-by: David Mosberger-Tang <davidm@egauge.net>
Acked-by: Ajay Singh <ajay.kathat@microchip.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210224033317.1507603-1-davidm@egauge.net
---
 drivers/net/wireless/microchip/wilc1000/wlan.c | 56 ++++++++++++++++++++++++--
 drivers/net/wireless/microchip/wilc1000/wlan.h |  6 +++
 2 files changed, 58 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/microchip/wilc1000/wlan.c b/drivers/net/wireless/microchip/wilc1000/wlan.c
index 31d51385ba93..d4a90c490084 100644
--- a/drivers/net/wireless/microchip/wilc1000/wlan.c
+++ b/drivers/net/wireless/microchip/wilc1000/wlan.c
@@ -552,12 +552,60 @@ static struct rxq_entry_t *wilc_wlan_rxq_remove(struct wilc *wilc)
 void chip_allow_sleep(struct wilc *wilc)
 {
 	u32 reg = 0;
+	const struct wilc_hif_func *hif_func = wilc->hif_func;
+	u32 wakeup_reg, wakeup_bit;
+	u32 to_host_from_fw_reg, to_host_from_fw_bit;
+	u32 from_host_to_fw_reg, from_host_to_fw_bit;
+	u32 trials = 100;
+	int ret;
+
+	if (wilc->io_type == WILC_HIF_SDIO) {
+		wakeup_reg = WILC_SDIO_WAKEUP_REG;
+		wakeup_bit = WILC_SDIO_WAKEUP_BIT;
+		from_host_to_fw_reg = WILC_SDIO_HOST_TO_FW_REG;
+		from_host_to_fw_bit = WILC_SDIO_HOST_TO_FW_BIT;
+		to_host_from_fw_reg = WILC_SDIO_FW_TO_HOST_REG;
+		to_host_from_fw_bit = WILC_SDIO_FW_TO_HOST_BIT;
+	} else {
+		wakeup_reg = WILC_SPI_WAKEUP_REG;
+		wakeup_bit = WILC_SPI_WAKEUP_BIT;
+		from_host_to_fw_reg = WILC_SPI_HOST_TO_FW_REG;
+		from_host_to_fw_bit = WILC_SPI_HOST_TO_FW_BIT;
+		to_host_from_fw_reg = WILC_SPI_FW_TO_HOST_REG;
+		to_host_from_fw_bit = WILC_SPI_FW_TO_HOST_BIT;
+	}
+
+	while (trials--) {
+		ret = hif_func->hif_read_reg(wilc, to_host_from_fw_reg, &reg);
+		if (ret)
+			return;
+		if ((reg & to_host_from_fw_bit) == 0)
+			break;
+	}
+	if (!trials)
+		pr_warn("FW not responding\n");
 
-	wilc->hif_func->hif_read_reg(wilc, WILC_SDIO_WAKEUP_REG, &reg);
+	/* Clear bit 1 */
+	ret = hif_func->hif_read_reg(wilc, wakeup_reg, &reg);
+	if (ret)
+		return;
+	if (reg & wakeup_bit) {
+		reg &= ~wakeup_bit;
+		ret = hif_func->hif_write_reg(wilc, wakeup_reg, reg);
+		if (ret)
+			return;
+	}
 
-	wilc->hif_func->hif_write_reg(wilc, WILC_SDIO_WAKEUP_REG,
-				      reg & ~WILC_SDIO_WAKEUP_BIT);
-	wilc->hif_func->hif_write_reg(wilc, WILC_SDIO_HOST_TO_FW_REG, 0);
+	ret = hif_func->hif_read_reg(wilc, from_host_to_fw_reg, &reg);
+	if (ret)
+		return;
+	if (reg & from_host_to_fw_bit) {
+		reg &= ~from_host_to_fw_bit;
+		ret = hif_func->hif_write_reg(wilc, from_host_to_fw_reg, reg);
+		if (ret)
+			return;
+
+	}
 }
 EXPORT_SYMBOL_GPL(chip_allow_sleep);
 
diff --git a/drivers/net/wireless/microchip/wilc1000/wlan.h b/drivers/net/wireless/microchip/wilc1000/wlan.h
index d55eb6b3a12a..6479acc12b95 100644
--- a/drivers/net/wireless/microchip/wilc1000/wlan.h
+++ b/drivers/net/wireless/microchip/wilc1000/wlan.h
@@ -97,6 +97,12 @@
 #define WILC_SPI_WAKEUP_REG		0x1
 #define WILC_SPI_WAKEUP_BIT		BIT(1)
 
+#define WILC_SPI_HOST_TO_FW_REG		0x0b
+#define WILC_SPI_HOST_TO_FW_BIT		BIT(0)
+
+#define WILC_SPI_FW_TO_HOST_REG		0x10
+#define WILC_SPI_FW_TO_HOST_BIT		BIT(0)
+
 #define WILC_SPI_PROTOCOL_OFFSET	(WILC_SPI_PROTOCOL_CONFIG - \
 					 WILC_SPI_REG_BASE)
 
-- 
cgit v1.2.3


From e21b6e5a54628cd3935f200049d4430c25c54e03 Mon Sep 17 00:00:00 2001
From: Marcus Folkesson <marcus.folkesson@gmail.com>
Date: Wed, 24 Feb 2021 17:37:06 +0100
Subject: wilc1000: write value to WILC_INTR2_ENABLE register

Write the value instead of reading it twice.

Fixes: c5c77ba18ea6 ("staging: wilc1000: Add SDIO/SPI 802.11 driver")
Signed-off-by: Marcus Folkesson <marcus.folkesson@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210224163706.519658-1-marcus.folkesson@gmail.com
---
 drivers/net/wireless/microchip/wilc1000/sdio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/microchip/wilc1000/sdio.c b/drivers/net/wireless/microchip/wilc1000/sdio.c
index 351ff909ab1c..e14b9fc2c67a 100644
--- a/drivers/net/wireless/microchip/wilc1000/sdio.c
+++ b/drivers/net/wireless/microchip/wilc1000/sdio.c
@@ -947,7 +947,7 @@ static int wilc_sdio_sync_ext(struct wilc *wilc, int nint)
 			for (i = 0; (i < 3) && (nint > 0); i++, nint--)
 				reg |= BIT(i);
 
-			ret = wilc_sdio_read_reg(wilc, WILC_INTR2_ENABLE, &reg);
+			ret = wilc_sdio_write_reg(wilc, WILC_INTR2_ENABLE, reg);
 			if (ret) {
 				dev_err(&func->dev,
 					"Failed write reg (%08x)...\n",
-- 
cgit v1.2.3


From 50773696301bdd083af57bad82bb99779719f0c7 Mon Sep 17 00:00:00 2001
From: Ajay Singh <ajay.kathat@microchip.com>
Date: Thu, 25 Feb 2021 04:23:10 +0000
Subject: wilc1000: use wilc handler as cookie in request_threaded_irq()

Use same cookie for request_threaded_irq() & free_irq() to properly free
IRQ during module unload. free_irq() already uses *wilc* handler so the
changes are required for request_threaded_irq().

Signed-off-by: Ajay Singh <ajay.kathat@microchip.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210225042302.17048-1-ajay.kathat@microchip.com
---
 drivers/net/wireless/microchip/wilc1000/netdev.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/microchip/wilc1000/netdev.c b/drivers/net/wireless/microchip/wilc1000/netdev.c
index 1b205e7d97a8..9928e91c8ffa 100644
--- a/drivers/net/wireless/microchip/wilc1000/netdev.c
+++ b/drivers/net/wireless/microchip/wilc1000/netdev.c
@@ -24,12 +24,10 @@
 
 static irqreturn_t isr_uh_routine(int irq, void *user_data)
 {
-	struct net_device *dev = user_data;
-	struct wilc_vif *vif = netdev_priv(dev);
-	struct wilc *wilc = vif->wilc;
+	struct wilc *wilc = user_data;
 
 	if (wilc->close) {
-		netdev_err(dev, "Can't handle UH interrupt\n");
+		pr_err("Can't handle UH interrupt");
 		return IRQ_HANDLED;
 	}
 	return IRQ_WAKE_THREAD;
@@ -37,12 +35,10 @@ static irqreturn_t isr_uh_routine(int irq, void *user_data)
 
 static irqreturn_t isr_bh_routine(int irq, void *userdata)
 {
-	struct net_device *dev = userdata;
-	struct wilc_vif *vif = netdev_priv(userdata);
-	struct wilc *wilc = vif->wilc;
+	struct wilc *wilc = userdata;
 
 	if (wilc->close) {
-		netdev_err(dev, "Can't handle BH interrupt\n");
+		pr_err("Can't handle BH interrupt\n");
 		return IRQ_HANDLED;
 	}
 
@@ -60,7 +56,7 @@ static int init_irq(struct net_device *dev)
 	ret = request_threaded_irq(wl->dev_irq_num, isr_uh_routine,
 				   isr_bh_routine,
 				   IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
-				   "WILC_IRQ", dev);
+				   "WILC_IRQ", wl);
 	if (ret) {
 		netdev_err(dev, "Failed to request IRQ [%d]\n", ret);
 		return ret;
-- 
cgit v1.2.3


From 13ce240a932fe9c809ec6e79ffc5a4d4ecf534ee Mon Sep 17 00:00:00 2001
From: Zong-Zhe Yang <kevin_yang@realtek.com>
Date: Tue, 9 Mar 2021 14:01:21 +0800
Subject: rtw88: 8822c: support FW crash dump when FW crash

Although FW crash logs are already supported for dumping in driver, the
logs may not be sufficient to analyze field issues. To improve this part,
we add a support to dump FW memory.

When driver receives FW crash notifications, driver uses DDMA, which is a
HW ability, to copy specified FW memory to FW fifo. Driver can then dump
these information from FW fifo. With this support, not only FW crash log
but also specified FW memory will be dumped while FW crash. Besides,
specified registers are also dumped.

This feature is implemeted on 8822C first due to difference of FW layouts
between ICs. In addition, we add a debugfs to trigger FW crash. It can
simulate the process of crash, dump and reset. Through it, we can also
check if a reset is ongoing.

Signed-off-by: Zong-Zhe Yang <kevin_yang@realtek.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210309060121.9099-1-pkshih@realtek.com
---
 drivers/net/wireless/realtek/rtw88/debug.c    |  41 +++++++++++
 drivers/net/wireless/realtek/rtw88/mac.c      |  19 +++++
 drivers/net/wireless/realtek/rtw88/mac.h      |   4 ++
 drivers/net/wireless/realtek/rtw88/main.c     | 100 ++++++++++++++++++++++----
 drivers/net/wireless/realtek/rtw88/main.h     |  11 +++
 drivers/net/wireless/realtek/rtw88/reg.h      |   1 +
 drivers/net/wireless/realtek/rtw88/rtw8822c.c |  10 +++
 7 files changed, 172 insertions(+), 14 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/debug.c b/drivers/net/wireless/realtek/rtw88/debug.c
index 948cb79050ea..4539b673f6fd 100644
--- a/drivers/net/wireless/realtek/rtw88/debug.c
+++ b/drivers/net/wireless/realtek/rtw88/debug.c
@@ -10,6 +10,7 @@
 #include "fw.h"
 #include "debug.h"
 #include "phy.h"
+#include "reg.h"
 
 #ifdef CONFIG_RTW88_DEBUGFS
 
@@ -818,6 +819,40 @@ static int rtw_debugfs_get_coex_enable(struct seq_file *m, void *v)
 	return 0;
 }
 
+static ssize_t rtw_debugfs_set_fw_crash(struct file *filp,
+					const char __user *buffer,
+					size_t count, loff_t *loff)
+{
+	struct seq_file *seqpriv = (struct seq_file *)filp->private_data;
+	struct rtw_debugfs_priv *debugfs_priv = seqpriv->private;
+	struct rtw_dev *rtwdev = debugfs_priv->rtwdev;
+	char tmp[32 + 1];
+	bool input;
+	int ret;
+
+	rtw_debugfs_copy_from_user(tmp, sizeof(tmp), buffer, count, 1);
+
+	ret = kstrtobool(tmp, &input);
+	if (ret)
+		return -EINVAL;
+
+	if (!input)
+		return -EINVAL;
+
+	rtw_write8(rtwdev, REG_HRCV_MSG, 1);
+
+	return count;
+}
+
+static int rtw_debugfs_get_fw_crash(struct seq_file *m, void *v)
+{
+	struct rtw_debugfs_priv *debugfs_priv = m->private;
+	struct rtw_dev *rtwdev = debugfs_priv->rtwdev;
+
+	seq_printf(m, "%d\n", test_bit(RTW_FLAG_RESTARTING, rtwdev->flags));
+	return 0;
+}
+
 #define rtw_debug_impl_mac(page, addr)				\
 static struct rtw_debugfs_priv rtw_debug_priv_mac_ ##page = {	\
 	.cb_read = rtw_debug_get_mac_page,			\
@@ -921,6 +956,11 @@ static struct rtw_debugfs_priv rtw_debug_priv_coex_info = {
 	.cb_read = rtw_debugfs_get_coex_info,
 };
 
+static struct rtw_debugfs_priv rtw_debug_priv_fw_crash = {
+	.cb_write = rtw_debugfs_set_fw_crash,
+	.cb_read = rtw_debugfs_get_fw_crash,
+};
+
 #define rtw_debugfs_add_core(name, mode, fopname, parent)		\
 	do {								\
 		rtw_debug_priv_ ##name.rtwdev = rtwdev;			\
@@ -994,6 +1034,7 @@ void rtw_debugfs_init(struct rtw_dev *rtwdev)
 	}
 	rtw_debugfs_add_r(rf_dump);
 	rtw_debugfs_add_r(tx_pwr_tbl);
+	rtw_debugfs_add_rw(fw_crash);
 }
 
 #endif /* CONFIG_RTW88_DEBUGFS */
diff --git a/drivers/net/wireless/realtek/rtw88/mac.c b/drivers/net/wireless/realtek/rtw88/mac.c
index 59028b121b00..d1678aed9d9c 100644
--- a/drivers/net/wireless/realtek/rtw88/mac.c
+++ b/drivers/net/wireless/realtek/rtw88/mac.c
@@ -530,6 +530,25 @@ static int iddma_download_firmware(struct rtw_dev *rtwdev, u32 src, u32 dst,
 	return 0;
 }
 
+int rtw_ddma_to_fw_fifo(struct rtw_dev *rtwdev, u32 ocp_src, u32 size)
+{
+	u32 ch0_ctrl = BIT_DDMACH0_OWN | BIT_DDMACH0_DDMA_MODE;
+
+	if (!check_hw_ready(rtwdev, REG_DDMA_CH0CTRL, BIT_DDMACH0_OWN, 0)) {
+		rtw_dbg(rtwdev, RTW_DBG_FW, "busy to start ddma\n");
+		return -EBUSY;
+	}
+
+	ch0_ctrl |= size & BIT_MASK_DDMACH0_DLEN;
+
+	if (iddma_enable(rtwdev, ocp_src, OCPBASE_RXBUF_FW_88XX, ch0_ctrl)) {
+		rtw_dbg(rtwdev, RTW_DBG_FW, "busy to complete ddma\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
 static bool
 check_fw_checksum(struct rtw_dev *rtwdev, u32 addr)
 {
diff --git a/drivers/net/wireless/realtek/rtw88/mac.h b/drivers/net/wireless/realtek/rtw88/mac.h
index ce64cdf7a565..3172aa5ac4de 100644
--- a/drivers/net/wireless/realtek/rtw88/mac.h
+++ b/drivers/net/wireless/realtek/rtw88/mac.h
@@ -15,7 +15,10 @@
 #define ILLEGAL_KEY_GROUP	0xFAAAAA00
 
 /* HW memory address */
+#define OCPBASE_RXBUF_FW_88XX		0x18680000
 #define OCPBASE_TXBUF_88XX		0x18780000
+#define OCPBASE_ROM_88XX		0x00000000
+#define OCPBASE_IMEM_88XX		0x00030000
 #define OCPBASE_DMEM_88XX		0x00200000
 #define OCPBASE_EMEM_88XX		0x00100000
 
@@ -33,6 +36,7 @@ void rtw_mac_power_off(struct rtw_dev *rtwdev);
 int rtw_download_firmware(struct rtw_dev *rtwdev, struct rtw_fw_state *fw);
 int rtw_mac_init(struct rtw_dev *rtwdev);
 void rtw_mac_flush_queues(struct rtw_dev *rtwdev, u32 queues, bool drop);
+int rtw_ddma_to_fw_fifo(struct rtw_dev *rtwdev, u32 ocp_src, u32 size);
 
 static inline void rtw_mac_flush_all_queues(struct rtw_dev *rtwdev, bool drop)
 {
diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c
index ff8893861ccf..f3a3a86fa9b5 100644
--- a/drivers/net/wireless/realtek/rtw88/main.c
+++ b/drivers/net/wireless/realtek/rtw88/main.c
@@ -345,15 +345,9 @@ static bool rtw_fw_dump_crash_log(struct rtw_dev *rtwdev)
 			"fw crash dump's seq is wrong: %d\n", seq);
 		goto free_buf;
 	}
-	if (seq == 0 &&
-	    (GET_FW_DUMP_TLV_TYPE(buf) != FW_CD_TYPE ||
-	     GET_FW_DUMP_TLV_LEN(buf) != FW_CD_LEN ||
-	     GET_FW_DUMP_TLV_VAL(buf) != FW_CD_VAL)) {
-		rtw_dbg(rtwdev, RTW_DBG_FW, "fw crash dump's tlv is wrong\n");
-		goto free_buf;
-	}
 
-	print_hex_dump_bytes("rtw88 fw dump: ", DUMP_PREFIX_OFFSET, buf, size);
+	print_hex_dump(KERN_ERR, "rtw88 fw dump: ", DUMP_PREFIX_OFFSET, 16, 1,
+		       buf, size, true);
 
 	if (GET_FW_DUMP_MORE(buf) == 1) {
 		rtwdev->fw.prev_dump_seq = seq;
@@ -368,6 +362,78 @@ exit:
 	return ret;
 }
 
+int rtw_dump_fw(struct rtw_dev *rtwdev, const u32 ocp_src, u32 size,
+		const char *prefix_str)
+{
+	u32 rxff = rtwdev->chip->fw_rxff_size;
+	u32 dump_size, done_size = 0;
+	u8 *buf;
+	int ret;
+
+	buf = vzalloc(size);
+	if (!buf)
+		return -ENOMEM;
+
+	while (size) {
+		dump_size = size > rxff ? rxff : size;
+
+		ret = rtw_ddma_to_fw_fifo(rtwdev, ocp_src + done_size,
+					  dump_size);
+		if (ret) {
+			rtw_err(rtwdev,
+				"ddma fw 0x%x [+0x%x] to fw fifo fail\n",
+				ocp_src, done_size);
+			goto exit;
+		}
+
+		ret = rtw_fw_dump_fifo(rtwdev, RTW_FW_FIFO_SEL_RXBUF_FW, 0,
+				       dump_size, (u32 *)(buf + done_size));
+		if (ret) {
+			rtw_err(rtwdev,
+				"dump fw 0x%x [+0x%x] from fw fifo fail\n",
+				ocp_src, done_size);
+			goto exit;
+		}
+
+		size -= dump_size;
+		done_size += dump_size;
+	}
+
+	print_hex_dump(KERN_ERR, prefix_str, DUMP_PREFIX_OFFSET, 16, 1,
+		       buf, done_size, true);
+
+exit:
+	vfree(buf);
+	return ret;
+}
+EXPORT_SYMBOL(rtw_dump_fw);
+
+int rtw_dump_reg(struct rtw_dev *rtwdev, const u32 addr, const u32 size,
+		 const char *prefix_str)
+{
+	u8 *buf;
+	u32 i;
+
+	if (addr & 0x3) {
+		WARN(1, "should be 4-byte aligned, addr = 0x%08x\n", addr);
+		return -EINVAL;
+	}
+
+	buf = vzalloc(size);
+	if (!buf)
+		return -ENOMEM;
+
+	for (i = 0; i < size; i += 4)
+		*(u32 *)(buf + i) = rtw_read32(rtwdev, addr + i);
+
+	print_hex_dump(KERN_ERR, prefix_str, DUMP_PREFIX_OFFSET, 16, 4, buf,
+		       size, true);
+
+	vfree(buf);
+	return 0;
+}
+EXPORT_SYMBOL(rtw_dump_reg);
+
 void rtw_vif_assoc_changed(struct rtw_vif *rtwvif,
 			   struct ieee80211_bss_conf *conf)
 {
@@ -419,10 +485,8 @@ void rtw_fw_recovery(struct rtw_dev *rtwdev)
 		ieee80211_queue_work(rtwdev->hw, &rtwdev->fw_recovery_work);
 }
 
-static void rtw_fw_recovery_work(struct work_struct *work)
+static void __fw_recovery_work(struct rtw_dev *rtwdev)
 {
-	struct rtw_dev *rtwdev = container_of(work, struct rtw_dev,
-					      fw_recovery_work);
 
 	/* rtw_fw_dump_crash_log() returns false indicates that there are
 	 * still more log to dump. Driver set 0x1cf[7:0] = 0x1 to tell firmware
@@ -435,18 +499,26 @@ static void rtw_fw_recovery_work(struct work_struct *work)
 	}
 	rtwdev->fw.prev_dump_seq = 0;
 
-	WARN(1, "firmware crash, start reset and recover\n");
+	set_bit(RTW_FLAG_RESTARTING, rtwdev->flags);
+	rtw_chip_dump_fw_crash(rtwdev);
 
-	mutex_lock(&rtwdev->mutex);
+	WARN(1, "firmware crash, start reset and recover\n");
 
-	set_bit(RTW_FLAG_RESTARTING, rtwdev->flags);
 	rcu_read_lock();
 	rtw_iterate_keys_rcu(rtwdev, NULL, rtw_reset_key_iter, rtwdev);
 	rcu_read_unlock();
 	rtw_iterate_stas_atomic(rtwdev, rtw_reset_sta_iter, rtwdev);
 	rtw_iterate_vifs_atomic(rtwdev, rtw_reset_vif_iter, rtwdev);
 	rtw_enter_ips(rtwdev);
+}
 
+static void rtw_fw_recovery_work(struct work_struct *work)
+{
+	struct rtw_dev *rtwdev = container_of(work, struct rtw_dev,
+					      fw_recovery_work);
+
+	mutex_lock(&rtwdev->mutex);
+	__fw_recovery_work(rtwdev);
 	mutex_unlock(&rtwdev->mutex);
 
 	ieee80211_restart_hw(rtwdev->hw);
diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index 35afea91fd29..d185209ee3cc 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -805,6 +805,7 @@ struct rtw_regulatory {
 
 struct rtw_chip_ops {
 	int (*mac_init)(struct rtw_dev *rtwdev);
+	void (*dump_fw_crash)(struct rtw_dev *rtwdev);
 	void (*shutdown)(struct rtw_dev *rtwdev);
 	int (*read_efuse)(struct rtw_dev *rtwdev, u8 *map);
 	void (*phy_set_param)(struct rtw_dev *rtwdev);
@@ -1876,6 +1877,12 @@ static inline void rtw_release_macid(struct rtw_dev *rtwdev, u8 mac_id)
 	clear_bit(mac_id, rtwdev->mac_id_map);
 }
 
+static inline void rtw_chip_dump_fw_crash(struct rtw_dev *rtwdev)
+{
+	if (rtwdev->chip->ops->dump_fw_crash)
+		rtwdev->chip->ops->dump_fw_crash(rtwdev);
+}
+
 void rtw_get_channel_params(struct cfg80211_chan_def *chandef,
 			    struct rtw_channel_params *ch_param);
 bool check_hw_ready(struct rtw_dev *rtwdev, u32 addr, u32 mask, u32 target);
@@ -1905,5 +1912,9 @@ int rtw_sta_add(struct rtw_dev *rtwdev, struct ieee80211_sta *sta,
 void rtw_sta_remove(struct rtw_dev *rtwdev, struct ieee80211_sta *sta,
 		    bool fw_exist);
 void rtw_fw_recovery(struct rtw_dev *rtwdev);
+int rtw_dump_fw(struct rtw_dev *rtwdev, const u32 ocp_src, u32 size,
+		const char *prefix_str);
+int rtw_dump_reg(struct rtw_dev *rtwdev, const u32 addr, const u32 size,
+		 const char *prefix_str);
 
 #endif
diff --git a/drivers/net/wireless/realtek/rtw88/reg.h b/drivers/net/wireless/realtek/rtw88/reg.h
index ea518aa78552..e3a981ad01a6 100644
--- a/drivers/net/wireless/realtek/rtw88/reg.h
+++ b/drivers/net/wireless/realtek/rtw88/reg.h
@@ -534,6 +534,7 @@
 #define BIT_DDMACH0_OWN		BIT(31)
 #define BIT_DDMACH0_CHKSUM_EN	BIT(29)
 #define BIT_DDMACH0_CHKSUM_STS	BIT(27)
+#define BIT_DDMACH0_DDMA_MODE	BIT(26)
 #define BIT_DDMACH0_RESET_CHKSUM_STS BIT(25)
 #define BIT_DDMACH0_CHKSUM_CONT	BIT(24)
 #define BIT_MASK_DDMACH0_DLEN	0x3ffff
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
index dd560c28abb2..a1f1da3db029 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
@@ -1396,6 +1396,15 @@ static int rtw8822c_mac_init(struct rtw_dev *rtwdev)
 	return 0;
 }
 
+static void rtw8822c_dump_fw_crash(struct rtw_dev *rtwdev)
+{
+	rtw_dump_reg(rtwdev, 0x0, 0x2000, "rtw8822c reg_");
+	rtw_dump_fw(rtwdev, OCPBASE_DMEM_88XX, 0x10000, "rtw8822c DMEM_");
+	rtw_dump_fw(rtwdev, OCPBASE_IMEM_88XX, 0x10000, "rtw8822c IMEM_");
+	rtw_dump_fw(rtwdev, OCPBASE_EMEM_88XX, 0x20000, "rtw8822c EMEM_");
+	rtw_dump_fw(rtwdev, OCPBASE_ROM_88XX, 0x10000, "rtw8822c ROM_");
+}
+
 static void rtw8822c_rstb_3wire(struct rtw_dev *rtwdev, bool enable)
 {
 	if (enable) {
@@ -3971,6 +3980,7 @@ static struct rtw_chip_ops rtw8822c_ops = {
 	.query_rx_desc		= rtw8822c_query_rx_desc,
 	.set_channel		= rtw8822c_set_channel,
 	.mac_init		= rtw8822c_mac_init,
+	.dump_fw_crash		= rtw8822c_dump_fw_crash,
 	.read_rf		= rtw_phy_read_rf,
 	.write_rf		= rtw_phy_write_rf_reg_mix,
 	.set_tx_power_index	= rtw8822c_set_tx_power_index,
-- 
cgit v1.2.3


From 6503b9f29a47cdb4ebd6c36d8bbb018418415c2a Mon Sep 17 00:00:00 2001
From: Manu Bretelle <chantra@fb.com>
Date: Wed, 10 Mar 2021 10:23:05 -0800
Subject: bpf: Add getter and setter for SO_REUSEPORT through
 bpf_{g,s}etsockopt

Augment the current set of options that are accessible via
bpf_{g,s}etsockopt to also support SO_REUSEPORT.

Signed-off-by: Manu Bretelle <chantra@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210310182305.1910312-1-chantra@fb.com
---
 net/core/filter.c                              |  6 ++++++
 tools/testing/selftests/bpf/progs/bind4_prog.c | 25 +++++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/bind6_prog.c | 25 +++++++++++++++++++++++++
 3 files changed, 56 insertions(+)

diff --git a/net/core/filter.c b/net/core/filter.c
index b6732000d8a2..10dac9dd5086 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4729,6 +4729,9 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
 				sk->sk_prot->keepalive(sk, valbool);
 			sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
 			break;
+		case SO_REUSEPORT:
+			sk->sk_reuseport = valbool;
+			break;
 		default:
 			ret = -EINVAL;
 		}
@@ -4898,6 +4901,9 @@ static int _bpf_getsockopt(struct sock *sk, int level, int optname,
 		case SO_BINDTOIFINDEX:
 			*((int *)optval) = sk->sk_bound_dev_if;
 			break;
+		case SO_REUSEPORT:
+			*((int *)optval) = sk->sk_reuseport;
+			break;
 		default:
 			goto err_clear;
 		}
diff --git a/tools/testing/selftests/bpf/progs/bind4_prog.c b/tools/testing/selftests/bpf/progs/bind4_prog.c
index 115a3b0ad984..474c6a62078a 100644
--- a/tools/testing/selftests/bpf/progs/bind4_prog.c
+++ b/tools/testing/selftests/bpf/progs/bind4_prog.c
@@ -57,6 +57,27 @@ static __inline int bind_to_device(struct bpf_sock_addr *ctx)
 	return 0;
 }
 
+static __inline int bind_reuseport(struct bpf_sock_addr *ctx)
+{
+	int val = 1;
+
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+			   &val, sizeof(val)))
+		return 1;
+	if (bpf_getsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+			   &val, sizeof(val)) || !val)
+		return 1;
+	val = 0;
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+			   &val, sizeof(val)))
+		return 1;
+	if (bpf_getsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+			   &val, sizeof(val)) || val)
+		return 1;
+
+	return 0;
+}
+
 static __inline int misc_opts(struct bpf_sock_addr *ctx, int opt)
 {
 	int old, tmp, new = 0xeb9f;
@@ -127,6 +148,10 @@ int bind_v4_prog(struct bpf_sock_addr *ctx)
 	if (misc_opts(ctx, SO_MARK) || misc_opts(ctx, SO_PRIORITY))
 		return 0;
 
+	/* Set reuseport and unset */
+	if (bind_reuseport(ctx))
+		return 0;
+
 	ctx->user_ip4 = bpf_htonl(SERV4_REWRITE_IP);
 	ctx->user_port = bpf_htons(SERV4_REWRITE_PORT);
 
diff --git a/tools/testing/selftests/bpf/progs/bind6_prog.c b/tools/testing/selftests/bpf/progs/bind6_prog.c
index 4c0d348034b9..c19cfa869f30 100644
--- a/tools/testing/selftests/bpf/progs/bind6_prog.c
+++ b/tools/testing/selftests/bpf/progs/bind6_prog.c
@@ -63,6 +63,27 @@ static __inline int bind_to_device(struct bpf_sock_addr *ctx)
 	return 0;
 }
 
+static __inline int bind_reuseport(struct bpf_sock_addr *ctx)
+{
+	int val = 1;
+
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+			   &val, sizeof(val)))
+		return 1;
+	if (bpf_getsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+			   &val, sizeof(val)) || !val)
+		return 1;
+	val = 0;
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+			   &val, sizeof(val)))
+		return 1;
+	if (bpf_getsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+			   &val, sizeof(val)) || val)
+		return 1;
+
+	return 0;
+}
+
 static __inline int misc_opts(struct bpf_sock_addr *ctx, int opt)
 {
 	int old, tmp, new = 0xeb9f;
@@ -141,6 +162,10 @@ int bind_v6_prog(struct bpf_sock_addr *ctx)
 	if (misc_opts(ctx, SO_MARK) || misc_opts(ctx, SO_PRIORITY))
 		return 0;
 
+	/* Set reuseport and unset */
+	if (bind_reuseport(ctx))
+		return 0;
+
 	ctx->user_ip6[0] = bpf_htonl(SERV6_REWRITE_IP_0);
 	ctx->user_ip6[1] = bpf_htonl(SERV6_REWRITE_IP_1);
 	ctx->user_ip6[2] = bpf_htonl(SERV6_REWRITE_IP_2);
-- 
cgit v1.2.3


From 346497c78d15cdd5bdc3b642a895009359e5457f Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Wed, 2 Dec 2020 16:07:22 +0100
Subject: i40e: optimize for XDP_REDIRECT in xsk path

Optimize i40e_run_xdp_zc() for the XDP program verdict being
XDP_REDIRECT in the xsk zero-copy path. This path is only used when
having AF_XDP zero-copy on and in that case most packets will be
directed to user space. This provides a little over 100k extra packets
in throughput on my server when running l2fwd in xdpsock.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Tested-by: George Kuruvinakunnel <george.kuruvinakunnel@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_xsk.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index fc32c5019b0f..edddaf034638 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -160,6 +160,13 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
 	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
 	act = bpf_prog_run_xdp(xdp_prog, xdp);
 
+	if (likely(act == XDP_REDIRECT)) {
+		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+		result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED;
+		rcu_read_unlock();
+		return result;
+	}
+
 	switch (act) {
 	case XDP_PASS:
 		break;
@@ -167,10 +174,6 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
 		xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
 		result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring);
 		break;
-	case XDP_REDIRECT:
-		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
-		result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED;
-		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 		fallthrough;
-- 
cgit v1.2.3


From 7d52fe2eaddfa3d7255d43c3e89ebf2748b7ea7a Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Wed, 2 Dec 2020 16:07:23 +0100
Subject: ixgbe: optimize for XDP_REDIRECT in xsk path

Optimize ixgbe_run_xdp_zc() for the XDP program verdict being
XDP_REDIRECT in the xsk zero-copy path. This path is only used when
having AF_XDP zero-copy on and in that case most packets will be
directed to user space. This provides a little under 100k extra
packets in throughput on my server when running l2fwd in xdpsock.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Tested-by: Vishakha Jambekar <vishakha.jambekar@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index 3771857cf887..91ad5b902673 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -104,6 +104,13 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
 	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
 	act = bpf_prog_run_xdp(xdp_prog, xdp);
 
+	if (likely(act == XDP_REDIRECT)) {
+		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+		result = !err ? IXGBE_XDP_REDIR : IXGBE_XDP_CONSUMED;
+		rcu_read_unlock();
+		return result;
+	}
+
 	switch (act) {
 	case XDP_PASS:
 		break;
@@ -115,10 +122,6 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
 		}
 		result = ixgbe_xmit_xdp_ring(adapter, xdpf);
 		break;
-	case XDP_REDIRECT:
-		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
-		result = !err ? IXGBE_XDP_REDIR : IXGBE_XDP_CONSUMED;
-		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 		fallthrough;
-- 
cgit v1.2.3


From bb52073645a618ab4d93c8d932fb8faf114c55bc Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Wed, 2 Dec 2020 16:07:24 +0100
Subject: ice: optimize for XDP_REDIRECT in xsk path

Optimize ice_run_xdp_zc() for the XDP program verdict being
XDP_REDIRECT in the xsk zero-copy path. This path is only used when
having AF_XDP zero-copy on and in that case most packets will be
directed to user space. This provides a little over 100k extra packets
in throughput on my server when running l2fwd in xdpsock.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Tested-by: George Kuruvinakunnel <george.kuruvinakunnel@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_xsk.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 83f3c9574ed1..727f277e9d75 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -473,6 +473,14 @@ ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp)
 	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
 
 	act = bpf_prog_run_xdp(xdp_prog, xdp);
+
+	if (likely(act == XDP_REDIRECT)) {
+		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+		result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED;
+		rcu_read_unlock();
+		return result;
+	}
+
 	switch (act) {
 	case XDP_PASS:
 		break;
@@ -480,10 +488,6 @@ ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp)
 		xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->q_index];
 		result = ice_xmit_xdp_buff(xdp, xdp_ring);
 		break;
-	case XDP_REDIRECT:
-		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
-		result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED;
-		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 		fallthrough;
-- 
cgit v1.2.3


From db7284a6ccc4a6d7714645141f7dcee0fcb4e57d Mon Sep 17 00:00:00 2001
From: Kurt Kanzenbach <kurt@kmk-computers.de>
Date: Sun, 14 Mar 2021 13:52:08 +0100
Subject: net: dsa: hellcreek: Offload bridge port flags

The switch implements unicast and multicast filtering per port.
Add support for it. By default filtering is disabled.

Signed-off-by: Kurt Kanzenbach <kurt@kmk-computers.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/hirschmann/hellcreek.c | 129 ++++++++++++++++++++++++++-------
 1 file changed, 104 insertions(+), 25 deletions(-)

diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c
index 02d8bcb37f31..64a73dd045c0 100644
--- a/drivers/net/dsa/hirschmann/hellcreek.c
+++ b/drivers/net/dsa/hirschmann/hellcreek.c
@@ -596,6 +596,83 @@ static void hellcreek_setup_vlan_membership(struct dsa_switch *ds, int port,
 		hellcreek_unapply_vlan(hellcreek, upstream, vid);
 }
 
+static void hellcreek_port_set_ucast_flood(struct hellcreek *hellcreek,
+					   int port, bool enable)
+{
+	struct hellcreek_port *hellcreek_port;
+	u16 val;
+
+	hellcreek_port = &hellcreek->ports[port];
+
+	dev_dbg(hellcreek->dev, "%s unicast flooding on port %d\n",
+		enable ? "Enable" : "Disable", port);
+
+	mutex_lock(&hellcreek->reg_lock);
+
+	hellcreek_select_port(hellcreek, port);
+	val = hellcreek_port->ptcfg;
+	if (enable)
+		val &= ~HR_PTCFG_UUC_FLT;
+	else
+		val |= HR_PTCFG_UUC_FLT;
+	hellcreek_write(hellcreek, val, HR_PTCFG);
+	hellcreek_port->ptcfg = val;
+
+	mutex_unlock(&hellcreek->reg_lock);
+}
+
+static void hellcreek_port_set_mcast_flood(struct hellcreek *hellcreek,
+					   int port, bool enable)
+{
+	struct hellcreek_port *hellcreek_port;
+	u16 val;
+
+	hellcreek_port = &hellcreek->ports[port];
+
+	dev_dbg(hellcreek->dev, "%s multicast flooding on port %d\n",
+		enable ? "Enable" : "Disable", port);
+
+	mutex_lock(&hellcreek->reg_lock);
+
+	hellcreek_select_port(hellcreek, port);
+	val = hellcreek_port->ptcfg;
+	if (enable)
+		val &= ~HR_PTCFG_UMC_FLT;
+	else
+		val |= HR_PTCFG_UMC_FLT;
+	hellcreek_write(hellcreek, val, HR_PTCFG);
+	hellcreek_port->ptcfg = val;
+
+	mutex_unlock(&hellcreek->reg_lock);
+}
+
+static int hellcreek_pre_bridge_flags(struct dsa_switch *ds, int port,
+				      struct switchdev_brport_flags flags,
+				      struct netlink_ext_ack *extack)
+{
+	if (flags.mask & ~(BR_FLOOD | BR_MCAST_FLOOD))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int hellcreek_bridge_flags(struct dsa_switch *ds, int port,
+				  struct switchdev_brport_flags flags,
+				  struct netlink_ext_ack *extack)
+{
+	struct hellcreek *hellcreek = ds->priv;
+
+	if (flags.mask & BR_FLOOD)
+		hellcreek_port_set_ucast_flood(hellcreek, port,
+					       !!(flags.val & BR_FLOOD));
+
+	if (flags.mask & BR_MCAST_FLOOD)
+		hellcreek_port_set_mcast_flood(hellcreek, port,
+					       !!(flags.val & BR_MCAST_FLOOD));
+
+	return 0;
+}
+
 static int hellcreek_port_bridge_join(struct dsa_switch *ds, int port,
 				      struct net_device *br)
 {
@@ -1655,31 +1732,33 @@ static int hellcreek_port_setup_tc(struct dsa_switch *ds, int port,
 }
 
 static const struct dsa_switch_ops hellcreek_ds_ops = {
-	.get_ethtool_stats   = hellcreek_get_ethtool_stats,
-	.get_sset_count	     = hellcreek_get_sset_count,
-	.get_strings	     = hellcreek_get_strings,
-	.get_tag_protocol    = hellcreek_get_tag_protocol,
-	.get_ts_info	     = hellcreek_get_ts_info,
-	.phylink_validate    = hellcreek_phylink_validate,
-	.port_bridge_join    = hellcreek_port_bridge_join,
-	.port_bridge_leave   = hellcreek_port_bridge_leave,
-	.port_disable	     = hellcreek_port_disable,
-	.port_enable	     = hellcreek_port_enable,
-	.port_fdb_add	     = hellcreek_fdb_add,
-	.port_fdb_del	     = hellcreek_fdb_del,
-	.port_fdb_dump	     = hellcreek_fdb_dump,
-	.port_hwtstamp_set   = hellcreek_port_hwtstamp_set,
-	.port_hwtstamp_get   = hellcreek_port_hwtstamp_get,
-	.port_prechangeupper = hellcreek_port_prechangeupper,
-	.port_rxtstamp	     = hellcreek_port_rxtstamp,
-	.port_setup_tc	     = hellcreek_port_setup_tc,
-	.port_stp_state_set  = hellcreek_port_stp_state_set,
-	.port_txtstamp	     = hellcreek_port_txtstamp,
-	.port_vlan_add	     = hellcreek_vlan_add,
-	.port_vlan_del	     = hellcreek_vlan_del,
-	.port_vlan_filtering = hellcreek_vlan_filtering,
-	.setup		     = hellcreek_setup,
-	.teardown	     = hellcreek_teardown,
+	.get_ethtool_stats     = hellcreek_get_ethtool_stats,
+	.get_sset_count	       = hellcreek_get_sset_count,
+	.get_strings	       = hellcreek_get_strings,
+	.get_tag_protocol      = hellcreek_get_tag_protocol,
+	.get_ts_info	       = hellcreek_get_ts_info,
+	.phylink_validate      = hellcreek_phylink_validate,
+	.port_bridge_flags     = hellcreek_bridge_flags,
+	.port_bridge_join      = hellcreek_port_bridge_join,
+	.port_bridge_leave     = hellcreek_port_bridge_leave,
+	.port_disable	       = hellcreek_port_disable,
+	.port_enable	       = hellcreek_port_enable,
+	.port_fdb_add	       = hellcreek_fdb_add,
+	.port_fdb_del	       = hellcreek_fdb_del,
+	.port_fdb_dump	       = hellcreek_fdb_dump,
+	.port_hwtstamp_set     = hellcreek_port_hwtstamp_set,
+	.port_hwtstamp_get     = hellcreek_port_hwtstamp_get,
+	.port_pre_bridge_flags = hellcreek_pre_bridge_flags,
+	.port_prechangeupper   = hellcreek_port_prechangeupper,
+	.port_rxtstamp	       = hellcreek_port_rxtstamp,
+	.port_setup_tc	       = hellcreek_port_setup_tc,
+	.port_stp_state_set    = hellcreek_port_stp_state_set,
+	.port_txtstamp	       = hellcreek_port_txtstamp,
+	.port_vlan_add	       = hellcreek_vlan_add,
+	.port_vlan_del	       = hellcreek_vlan_del,
+	.port_vlan_filtering   = hellcreek_vlan_filtering,
+	.setup		       = hellcreek_setup,
+	.teardown	       = hellcreek_teardown,
 };
 
 static int hellcreek_probe(struct platform_device *pdev)
-- 
cgit v1.2.3


From 8f64860f8b567cc4f8ac854a65cbf6337404c520 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 14 Mar 2021 15:49:19 +0100
Subject: net: export dev_set_threaded symbol

For wireless devices (e.g. mt76 driver) multiple net_devices belongs to
the same wireless phy and the napi object is registered in a dummy
netdevice related to the wireless phy.
Export dev_set_threaded in order to be reused in device drivers enabling
threaded NAPI.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/dev.c b/net/core/dev.c
index 5a2847a19cf2..6bc20eabd2b0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6766,6 +6766,7 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
 
 	return err;
 }
+EXPORT_SYMBOL(dev_set_threaded);
 
 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 		    int (*poll)(struct napi_struct *, int), int weight)
-- 
cgit v1.2.3


From acdff0df54264d187b648b80bc18265f0698f1ad Mon Sep 17 00:00:00 2001
From: Jianlin Lv <Jianlin.Lv@arm.com>
Date: Sun, 14 Mar 2021 22:56:29 +0800
Subject: bonding: Added -ENODEV interpret for slaves option

When the incorrect interface name is stored in the slaves/active_slave
option of the bonding sysfs, the kernel does not record the log that
interface does not exist.

This patch adds a log for -ENODEV error, which will facilitate users to
figure out such issue.

Signed-off-by: Jianlin Lv <Jianlin.Lv@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_options.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 77d7c38bd435..c9d3604ae129 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -640,6 +640,15 @@ static void bond_opt_error_interpret(struct bonding *bond,
 		netdev_err(bond->dev, "option %s: unable to set because the bond device is up\n",
 			   opt->name);
 		break;
+	case -ENODEV:
+		if (val && val->string) {
+			p = strchr(val->string, '\n');
+			if (p)
+				*p = '\0';
+			netdev_err(bond->dev, "option %s: interface %s does not exist!\n",
+				   opt->name, val->string);
+		}
+		break;
 	default:
 		break;
 	}
-- 
cgit v1.2.3


From 9cb24ea051857f2a7ab85c42842c5baa40497e53 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sun, 14 Mar 2021 18:24:02 +0300
Subject: atm: delete include/linux/atm_suni.h

This file has been effectively empty since 2.3.99-pre3 !

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/fore200e.c   |  1 -
 drivers/atm/suni.c       |  1 -
 include/linux/atm_suni.h | 12 ------------
 3 files changed, 14 deletions(-)
 delete mode 100644 include/linux/atm_suni.h

diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index 9a70bee84125..0b9c99c3d218 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -21,7 +21,6 @@
 #include <linux/module.h>
 #include <linux/atmdev.h>
 #include <linux/sonet.h>
-#include <linux/atm_suni.h>
 #include <linux/dma-mapping.h>
 #include <linux/delay.h>
 #include <linux/firmware.h>
diff --git a/drivers/atm/suni.c b/drivers/atm/suni.c
index c920a8c52925..21e5acc766b8 100644
--- a/drivers/atm/suni.c
+++ b/drivers/atm/suni.c
@@ -21,7 +21,6 @@
 #include <linux/timer.h>
 #include <linux/init.h>
 #include <linux/capability.h>
-#include <linux/atm_suni.h>
 #include <linux/slab.h>
 #include <asm/param.h>
 #include <linux/uaccess.h>
diff --git a/include/linux/atm_suni.h b/include/linux/atm_suni.h
deleted file mode 100644
index 84f3aab54468..000000000000
--- a/include/linux/atm_suni.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* atm_suni.h - Driver-specific declarations of the SUNI driver (for use by
-		driver-specific utilities) */
-
-/* Written 1998,2000 by Werner Almesberger, EPFL ICA */
-
-
-#ifndef LINUX_ATM_SUNI_H
-#define LINUX_ATM_SUNI_H
-
-/* everything obsoleted */
-
-#endif
-- 
cgit v1.2.3


From a7dde236b3173c54195900dd46ffc5730de1a216 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Mon, 15 Mar 2021 03:52:21 +0530
Subject: ethernet: amazon: ena: A typo fix in the file ena_com.h

Mundane typo fix.

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Shay Agroskin <shayagr@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_com.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h
index 343caf41e709..73b03ce59412 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_com.h
@@ -124,7 +124,7 @@ struct ena_com_io_cq {
 
 	/* holds the number of cdesc of the current packet */
 	u16 cur_rx_pkt_cdesc_count;
-	/* save the firt cdesc idx of the current packet */
+	/* save the first cdesc idx of the current packet */
 	u16 cur_rx_pkt_cdesc_start_idx;
 
 	u16 q_depth;
-- 
cgit v1.2.3


From 6f05a1224174a67a71a196e9e776b1ce72aa4a3d Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Mon, 15 Mar 2021 07:18:47 +0530
Subject: net: ethernet: intel: igb: Typo fix in the file igb_main.c

s/structue/structure/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/igb/igb_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 369533feb4f2..f929eaa836c8 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -3115,7 +3115,7 @@ static s32 igb_init_i2c(struct igb_adapter *adapter)
 		return 0;
 
 	/* Initialize the i2c bus which is controlled by the registers.
-	 * This bus will use the i2c_algo_bit structue that implements
+	 * This bus will use the i2c_algo_bit structure that implements
 	 * the protocol through toggling of the 4 bits in the register.
 	 */
 	adapter->i2c_adap.owner = THIS_MODULE;
-- 
cgit v1.2.3


From 29c35da103471d7fc39839192bead60fbf3ba1f9 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Mon, 15 Mar 2021 07:23:22 +0530
Subject: net: ethernet: neterion: Fix a typo in the file s2io.c

s/structue/structure/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/neterion/s2io.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index 8f2f091bce89..9cfcd5500462 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c
@@ -6657,7 +6657,7 @@ static int s2io_change_mtu(struct net_device *dev, int new_mtu)
 
 /**
  * s2io_set_link - Set the LInk status
- * @work: work struct containing a pointer to device private structue
+ * @work: work struct containing a pointer to device private structure
  * Description: Sets the link status for the adapter
  */
 
-- 
cgit v1.2.3


From 07a4bc51fc732b3618fd46dc51609948933064a4 Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Mon, 15 Mar 2021 13:27:06 +0800
Subject: net: pcs: rearrange C73 functions to prepare for C37 support later

The current implementation for XPCS is validated for C73, so we rename them
to have _c73 suffix and introduce a set of functions to use an_mode flag
to switch between C73 and C37 AN later.

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pcs/pcs-xpcs.c   | 94 +++++++++++++++++++++++++++++++-------------
 include/linux/pcs/pcs-xpcs.h |  4 ++
 2 files changed, 70 insertions(+), 28 deletions(-)

diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 1aa9903d602e..10def2d98696 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -125,22 +125,26 @@ static struct xpcs_id {
 	u32 mask;
 	const int *supported;
 	const phy_interface_t *interface;
+	int an_mode;
 } xpcs_id_list[] = {
 	{
 		.id = SYNOPSYS_XPCS_USXGMII_ID,
 		.mask = SYNOPSYS_XPCS_MASK,
 		.supported = xpcs_usxgmii_features,
 		.interface = xpcs_usxgmii_interfaces,
+		.an_mode = DW_AN_C73,
 	}, {
 		.id = SYNOPSYS_XPCS_10GKR_ID,
 		.mask = SYNOPSYS_XPCS_MASK,
 		.supported = xpcs_10gkr_features,
 		.interface = xpcs_10gkr_interfaces,
+		.an_mode = DW_AN_C73,
 	}, {
 		.id = SYNOPSYS_XPCS_XLGMII_ID,
 		.mask = SYNOPSYS_XPCS_MASK,
 		.supported = xpcs_xlgmii_features,
 		.interface = xpcs_xlgmii_interfaces,
+		.an_mode = DW_AN_C73,
 	},
 };
 
@@ -195,9 +199,17 @@ static int xpcs_poll_reset(struct mdio_xpcs_args *xpcs, int dev)
 	return (ret & MDIO_CTRL1_RESET) ? -ETIMEDOUT : 0;
 }
 
-static int xpcs_soft_reset(struct mdio_xpcs_args *xpcs, int dev)
+static int xpcs_soft_reset(struct mdio_xpcs_args *xpcs)
 {
-	int ret;
+	int ret, dev;
+
+	switch (xpcs->an_mode) {
+	case DW_AN_C73:
+		dev = MDIO_MMD_PCS;
+		break;
+	default:
+		return -1;
+	}
 
 	ret = xpcs_write(xpcs, dev, MDIO_CTRL1, MDIO_CTRL1_RESET);
 	if (ret < 0)
@@ -212,8 +224,8 @@ static int xpcs_soft_reset(struct mdio_xpcs_args *xpcs, int dev)
 		dev_warn(&(__xpcs)->bus->dev, ##__args); \
 })
 
-static int xpcs_read_fault(struct mdio_xpcs_args *xpcs,
-			   struct phylink_link_state *state)
+static int xpcs_read_fault_c73(struct mdio_xpcs_args *xpcs,
+			       struct phylink_link_state *state)
 {
 	int ret;
 
@@ -263,7 +275,7 @@ static int xpcs_read_fault(struct mdio_xpcs_args *xpcs,
 	return 0;
 }
 
-static int xpcs_read_link(struct mdio_xpcs_args *xpcs, bool an)
+static int xpcs_read_link_c73(struct mdio_xpcs_args *xpcs, bool an)
 {
 	bool link = true;
 	int ret;
@@ -357,7 +369,7 @@ static int xpcs_config_usxgmii(struct mdio_xpcs_args *xpcs, int speed)
 	return xpcs_write_vpcs(xpcs, MDIO_CTRL1, ret | DW_USXGMII_RST);
 }
 
-static int xpcs_config_aneg_c73(struct mdio_xpcs_args *xpcs)
+static int _xpcs_config_aneg_c73(struct mdio_xpcs_args *xpcs)
 {
 	int ret, adv;
 
@@ -401,11 +413,11 @@ static int xpcs_config_aneg_c73(struct mdio_xpcs_args *xpcs)
 	return xpcs_write(xpcs, MDIO_MMD_AN, DW_SR_AN_ADV1, adv);
 }
 
-static int xpcs_config_aneg(struct mdio_xpcs_args *xpcs)
+static int xpcs_config_aneg_c73(struct mdio_xpcs_args *xpcs)
 {
 	int ret;
 
-	ret = xpcs_config_aneg_c73(xpcs);
+	ret = _xpcs_config_aneg_c73(xpcs);
 	if (ret < 0)
 		return ret;
 
@@ -418,8 +430,8 @@ static int xpcs_config_aneg(struct mdio_xpcs_args *xpcs)
 	return xpcs_write(xpcs, MDIO_MMD_AN, MDIO_CTRL1, ret);
 }
 
-static int xpcs_aneg_done(struct mdio_xpcs_args *xpcs,
-			  struct phylink_link_state *state)
+static int xpcs_aneg_done_c73(struct mdio_xpcs_args *xpcs,
+			      struct phylink_link_state *state)
 {
 	int ret;
 
@@ -434,7 +446,7 @@ static int xpcs_aneg_done(struct mdio_xpcs_args *xpcs,
 
 		/* Check if Aneg outcome is valid */
 		if (!(ret & DW_C73_AN_ADV_SF)) {
-			xpcs_config_aneg(xpcs);
+			xpcs_config_aneg_c73(xpcs);
 			return 0;
 		}
 
@@ -444,8 +456,8 @@ static int xpcs_aneg_done(struct mdio_xpcs_args *xpcs,
 	return 0;
 }
 
-static int xpcs_read_lpa(struct mdio_xpcs_args *xpcs,
-			 struct phylink_link_state *state)
+static int xpcs_read_lpa_c73(struct mdio_xpcs_args *xpcs,
+			     struct phylink_link_state *state)
 {
 	int ret;
 
@@ -493,8 +505,8 @@ static int xpcs_read_lpa(struct mdio_xpcs_args *xpcs,
 	return 0;
 }
 
-static void xpcs_resolve_lpa(struct mdio_xpcs_args *xpcs,
-			     struct phylink_link_state *state)
+static void xpcs_resolve_lpa_c73(struct mdio_xpcs_args *xpcs,
+				 struct phylink_link_state *state)
 {
 	int max_speed = xpcs_get_max_usxgmii_speed(state->lp_advertising);
 
@@ -590,27 +602,33 @@ static int xpcs_config(struct mdio_xpcs_args *xpcs,
 {
 	int ret;
 
-	if (state->an_enabled) {
-		ret = xpcs_config_aneg(xpcs);
-		if (ret)
-			return ret;
+	switch (xpcs->an_mode) {
+	case DW_AN_C73:
+		if (state->an_enabled) {
+			ret = xpcs_config_aneg_c73(xpcs);
+			if (ret)
+				return ret;
+		}
+		break;
+	default:
+		return -1;
 	}
 
 	return 0;
 }
 
-static int xpcs_get_state(struct mdio_xpcs_args *xpcs,
-			  struct phylink_link_state *state)
+static int xpcs_get_state_c73(struct mdio_xpcs_args *xpcs,
+			      struct phylink_link_state *state)
 {
 	int ret;
 
 	/* Link needs to be read first ... */
-	state->link = xpcs_read_link(xpcs, state->an_enabled) > 0 ? 1 : 0;
+	state->link = xpcs_read_link_c73(xpcs, state->an_enabled) > 0 ? 1 : 0;
 
 	/* ... and then we check the faults. */
-	ret = xpcs_read_fault(xpcs, state);
+	ret = xpcs_read_fault_c73(xpcs, state);
 	if (ret) {
-		ret = xpcs_soft_reset(xpcs, MDIO_MMD_PCS);
+		ret = xpcs_soft_reset(xpcs);
 		if (ret)
 			return ret;
 
@@ -619,10 +637,10 @@ static int xpcs_get_state(struct mdio_xpcs_args *xpcs,
 		return xpcs_config(xpcs, state);
 	}
 
-	if (state->an_enabled && xpcs_aneg_done(xpcs, state)) {
+	if (state->an_enabled && xpcs_aneg_done_c73(xpcs, state)) {
 		state->an_complete = true;
-		xpcs_read_lpa(xpcs, state);
-		xpcs_resolve_lpa(xpcs, state);
+		xpcs_read_lpa_c73(xpcs, state);
+		xpcs_resolve_lpa_c73(xpcs, state);
 	} else if (state->an_enabled) {
 		state->link = 0;
 	} else if (state->link) {
@@ -632,6 +650,24 @@ static int xpcs_get_state(struct mdio_xpcs_args *xpcs,
 	return 0;
 }
 
+static int xpcs_get_state(struct mdio_xpcs_args *xpcs,
+			  struct phylink_link_state *state)
+{
+	int ret;
+
+	switch (xpcs->an_mode) {
+	case DW_AN_C73:
+		ret = xpcs_get_state_c73(xpcs, state);
+		if (ret)
+			return ret;
+		break;
+	default:
+		return -1;
+	}
+
+	return 0;
+}
+
 static int xpcs_link_up(struct mdio_xpcs_args *xpcs, int speed,
 			phy_interface_t interface)
 {
@@ -676,6 +712,8 @@ static bool xpcs_check_features(struct mdio_xpcs_args *xpcs,
 	for (i = 0; match->supported[i] != __ETHTOOL_LINK_MODE_MASK_NBITS; i++)
 		set_bit(match->supported[i], xpcs->supported);
 
+	xpcs->an_mode = match->an_mode;
+
 	return true;
 }
 
@@ -692,7 +730,7 @@ static int xpcs_probe(struct mdio_xpcs_args *xpcs, phy_interface_t interface)
 			match = entry;
 
 			if (xpcs_check_features(xpcs, match, interface))
-				return xpcs_soft_reset(xpcs, MDIO_MMD_PCS);
+				return xpcs_soft_reset(xpcs);
 		}
 	}
 
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index 351c1c9aedc5..a04e57c25fea 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -10,10 +10,14 @@
 #include <linux/phy.h>
 #include <linux/phylink.h>
 
+/* AN mode */
+#define DW_AN_C73			1
+
 struct mdio_xpcs_args {
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
 	struct mii_bus *bus;
 	int addr;
+	int an_mode;
 };
 
 struct mdio_xpcs_ops {
-- 
cgit v1.2.3


From b97b5331b8ab7f60fb880e0c31c9b09b73d2fa4e Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Mon, 15 Mar 2021 13:27:07 +0800
Subject: net: pcs: add C37 SGMII AN support for intel mGbE controller

XPCS IP supports C37 SGMII AN process and it is used in intel multi-GbE
controller as MAC-side SGMII.

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pcs/pcs-xpcs.c   | 167 ++++++++++++++++++++++++++++++++++++++++++-
 include/linux/pcs/pcs-xpcs.h |   1 +
 2 files changed, 167 insertions(+), 1 deletion(-)

diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 10def2d98696..944ba105cac1 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -15,6 +15,7 @@
 #define SYNOPSYS_XPCS_USXGMII_ID	0x7996ced0
 #define SYNOPSYS_XPCS_10GKR_ID		0x7996ced0
 #define SYNOPSYS_XPCS_XLGMII_ID		0x7996ced0
+#define SYNOPSYS_XPCS_SGMII_ID		0x7996ced0
 #define SYNOPSYS_XPCS_MASK		0xffffffff
 
 /* Vendor regs access */
@@ -57,6 +58,34 @@
 #define DW_C73_2500KX			BIT(0)
 #define DW_C73_5000KR			BIT(1)
 
+/* Clause 37 Defines */
+/* VR MII MMD registers offsets */
+#define DW_VR_MII_DIG_CTRL1		0x8000
+#define DW_VR_MII_AN_CTRL		0x8001
+#define DW_VR_MII_AN_INTR_STS		0x8002
+
+/* VR_MII_DIG_CTRL1 */
+#define DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW		BIT(9)
+
+/* VR_MII_AN_CTRL */
+#define DW_VR_MII_AN_CTRL_TX_CONFIG_SHIFT	3
+#define DW_VR_MII_TX_CONFIG_MASK		BIT(3)
+#define DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII	0x1
+#define DW_VR_MII_TX_CONFIG_MAC_SIDE_SGMII	0x0
+#define DW_VR_MII_AN_CTRL_PCS_MODE_SHIFT	1
+#define DW_VR_MII_PCS_MODE_MASK			GENMASK(2, 1)
+#define DW_VR_MII_PCS_MODE_C37_1000BASEX	0x0
+#define DW_VR_MII_PCS_MODE_C37_SGMII		0x2
+
+/* VR_MII_AN_INTR_STS */
+#define DW_VR_MII_AN_STS_C37_ANSGM_FD		BIT(1)
+#define DW_VR_MII_AN_STS_C37_ANSGM_SP_SHIFT	2
+#define DW_VR_MII_AN_STS_C37_ANSGM_SP		GENMASK(3, 2)
+#define DW_VR_MII_C37_ANSGM_SP_10		0x0
+#define DW_VR_MII_C37_ANSGM_SP_100		0x1
+#define DW_VR_MII_C37_ANSGM_SP_1000		0x2
+#define DW_VR_MII_C37_ANSGM_SP_LNKSTS		BIT(4)
+
 static const int xpcs_usxgmii_features[] = {
 	ETHTOOL_LINK_MODE_Pause_BIT,
 	ETHTOOL_LINK_MODE_Asym_Pause_BIT,
@@ -105,6 +134,16 @@ static const int xpcs_xlgmii_features[] = {
 	__ETHTOOL_LINK_MODE_MASK_NBITS,
 };
 
+static const int xpcs_sgmii_features[] = {
+	ETHTOOL_LINK_MODE_10baseT_Half_BIT,
+	ETHTOOL_LINK_MODE_10baseT_Full_BIT,
+	ETHTOOL_LINK_MODE_100baseT_Half_BIT,
+	ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+	ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
+	ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+	__ETHTOOL_LINK_MODE_MASK_NBITS,
+};
+
 static const phy_interface_t xpcs_usxgmii_interfaces[] = {
 	PHY_INTERFACE_MODE_USXGMII,
 	PHY_INTERFACE_MODE_MAX,
@@ -120,6 +159,11 @@ static const phy_interface_t xpcs_xlgmii_interfaces[] = {
 	PHY_INTERFACE_MODE_MAX,
 };
 
+static const phy_interface_t xpcs_sgmii_interfaces[] = {
+	PHY_INTERFACE_MODE_SGMII,
+	PHY_INTERFACE_MODE_MAX,
+};
+
 static struct xpcs_id {
 	u32 id;
 	u32 mask;
@@ -145,6 +189,12 @@ static struct xpcs_id {
 		.supported = xpcs_xlgmii_features,
 		.interface = xpcs_xlgmii_interfaces,
 		.an_mode = DW_AN_C73,
+	}, {
+		.id = SYNOPSYS_XPCS_SGMII_ID,
+		.mask = SYNOPSYS_XPCS_MASK,
+		.supported = xpcs_sgmii_features,
+		.interface = xpcs_sgmii_interfaces,
+		.an_mode = DW_AN_C37_SGMII,
 	},
 };
 
@@ -207,6 +257,9 @@ static int xpcs_soft_reset(struct mdio_xpcs_args *xpcs)
 	case DW_AN_C73:
 		dev = MDIO_MMD_PCS;
 		break;
+	case DW_AN_C37_SGMII:
+		dev = MDIO_MMD_VEND2;
+		break;
 	default:
 		return -1;
 	}
@@ -597,6 +650,47 @@ static int xpcs_validate(struct mdio_xpcs_args *xpcs,
 	return 0;
 }
 
+static int xpcs_config_aneg_c37_sgmii(struct mdio_xpcs_args *xpcs)
+{
+	int ret;
+
+	/* For AN for C37 SGMII mode, the settings are :-
+	 * 1) VR_MII_AN_CTRL Bit(2:1)[PCS_MODE] = 10b (SGMII AN)
+	 * 2) VR_MII_AN_CTRL Bit(3) [TX_CONFIG] = 0b (MAC side SGMII)
+	 *    DW xPCS used with DW EQoS MAC is always MAC side SGMII.
+	 * 3) VR_MII_DIG_CTRL1 Bit(9) [MAC_AUTO_SW] = 1b (Automatic
+	 *    speed/duplex mode change by HW after SGMII AN complete)
+	 *
+	 * Note: Since it is MAC side SGMII, there is no need to set
+	 *	 SR_MII_AN_ADV. MAC side SGMII receives AN Tx Config from
+	 *	 PHY about the link state change after C28 AN is completed
+	 *	 between PHY and Link Partner. There is also no need to
+	 *	 trigger AN restart for MAC-side SGMII.
+	 */
+	ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_CTRL);
+	if (ret < 0)
+		return ret;
+
+	ret &= ~(DW_VR_MII_PCS_MODE_MASK | DW_VR_MII_TX_CONFIG_MASK);
+	ret |= (DW_VR_MII_PCS_MODE_C37_SGMII <<
+		DW_VR_MII_AN_CTRL_PCS_MODE_SHIFT &
+		DW_VR_MII_PCS_MODE_MASK);
+	ret |= (DW_VR_MII_TX_CONFIG_MAC_SIDE_SGMII <<
+		DW_VR_MII_AN_CTRL_TX_CONFIG_SHIFT &
+		DW_VR_MII_TX_CONFIG_MASK);
+	ret = xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_CTRL, ret);
+	if (ret < 0)
+		return ret;
+
+	ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_DIG_CTRL1);
+	if (ret < 0)
+		return ret;
+
+	ret |= DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW;
+
+	return xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_DIG_CTRL1, ret);
+}
+
 static int xpcs_config(struct mdio_xpcs_args *xpcs,
 		       const struct phylink_link_state *state)
 {
@@ -610,6 +704,11 @@ static int xpcs_config(struct mdio_xpcs_args *xpcs,
 				return ret;
 		}
 		break;
+	case DW_AN_C37_SGMII:
+		ret = xpcs_config_aneg_c37_sgmii(xpcs);
+		if (ret)
+			return ret;
+		break;
 	default:
 		return -1;
 	}
@@ -650,6 +749,47 @@ static int xpcs_get_state_c73(struct mdio_xpcs_args *xpcs,
 	return 0;
 }
 
+static int xpcs_get_state_c37_sgmii(struct mdio_xpcs_args *xpcs,
+				    struct phylink_link_state *state)
+{
+	int ret;
+
+	/* Reset link_state */
+	state->link = false;
+	state->speed = SPEED_UNKNOWN;
+	state->duplex = DUPLEX_UNKNOWN;
+	state->pause = 0;
+
+	/* For C37 SGMII mode, we check DW_VR_MII_AN_INTR_STS for link
+	 * status, speed and duplex.
+	 */
+	ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS);
+	if (ret < 0)
+		return false;
+
+	if (ret & DW_VR_MII_C37_ANSGM_SP_LNKSTS) {
+		int speed_value;
+
+		state->link = true;
+
+		speed_value = (ret & DW_VR_MII_AN_STS_C37_ANSGM_SP) >>
+			      DW_VR_MII_AN_STS_C37_ANSGM_SP_SHIFT;
+		if (speed_value == DW_VR_MII_C37_ANSGM_SP_1000)
+			state->speed = SPEED_1000;
+		else if (speed_value == DW_VR_MII_C37_ANSGM_SP_100)
+			state->speed = SPEED_100;
+		else
+			state->speed = SPEED_10;
+
+		if (ret & DW_VR_MII_AN_STS_C37_ANSGM_FD)
+			state->duplex = DUPLEX_FULL;
+		else
+			state->duplex = DUPLEX_HALF;
+	}
+
+	return 0;
+}
+
 static int xpcs_get_state(struct mdio_xpcs_args *xpcs,
 			  struct phylink_link_state *state)
 {
@@ -661,6 +801,11 @@ static int xpcs_get_state(struct mdio_xpcs_args *xpcs,
 		if (ret)
 			return ret;
 		break;
+	case DW_AN_C37_SGMII:
+		ret = xpcs_get_state_c37_sgmii(xpcs, state);
+		if (ret)
+			return ret;
+		break;
 	default:
 		return -1;
 	}
@@ -682,6 +827,7 @@ static u32 xpcs_get_id(struct mdio_xpcs_args *xpcs)
 	int ret;
 	u32 id;
 
+	/* First, search C73 PCS using PCS MMD */
 	ret = xpcs_read(xpcs, MDIO_MMD_PCS, MII_PHYSID1);
 	if (ret < 0)
 		return 0xffffffff;
@@ -692,7 +838,26 @@ static u32 xpcs_get_id(struct mdio_xpcs_args *xpcs)
 	if (ret < 0)
 		return 0xffffffff;
 
-	return id | ret;
+	/* If Device IDs are not all zeros, we found C73 AN-type device */
+	if (id | ret)
+		return id | ret;
+
+	/* Next, search C37 PCS using Vendor-Specific MII MMD */
+	ret = xpcs_read(xpcs, MDIO_MMD_VEND2, MII_PHYSID1);
+	if (ret < 0)
+		return 0xffffffff;
+
+	id = ret << 16;
+
+	ret = xpcs_read(xpcs, MDIO_MMD_VEND2, MII_PHYSID2);
+	if (ret < 0)
+		return 0xffffffff;
+
+	/* If Device IDs are not all zeros, we found C37 AN-type device */
+	if (id | ret)
+		return id | ret;
+
+	return 0xffffffff;
 }
 
 static bool xpcs_check_features(struct mdio_xpcs_args *xpcs,
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index a04e57c25fea..2cb5188a7ef1 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -12,6 +12,7 @@
 
 /* AN mode */
 #define DW_AN_C73			1
+#define DW_AN_C37_SGMII			2
 
 struct mdio_xpcs_args {
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
-- 
cgit v1.2.3


From ab39385021d1e0b4cd6cc521dc35c2fe659bbddf Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Mon, 15 Mar 2021 13:27:08 +0800
Subject: net: phylink: make phylink_parse_mode() support non-DT platform

Certain platform does not support DT, so we make phylink_parse_mode() to
allow non-DT platform to use it to setup in-band AN advertising.

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phylink.c | 5 +++--
 include/linux/phylink.h   | 2 ++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 053c92e02cd8..12a047d47dec 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -271,8 +271,9 @@ static int phylink_parse_mode(struct phylink *pl, struct fwnode_handle *fwnode)
 		pl->cfg_link_an_mode = MLO_AN_FIXED;
 	fwnode_handle_put(dn);
 
-	if (fwnode_property_read_string(fwnode, "managed", &managed) == 0 &&
-	    strcmp(managed, "in-band-status") == 0) {
+	if ((fwnode_property_read_string(fwnode, "managed", &managed) == 0 &&
+	     strcmp(managed, "in-band-status") == 0) ||
+	    pl->config->ovr_an_inband) {
 		if (pl->cfg_link_an_mode == MLO_AN_FIXED) {
 			phylink_err(pl,
 				    "can't use both fixed-link and in-band-status\n");
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index d81a714cfbbd..fd2acfd9b597 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -64,6 +64,7 @@ enum phylink_op_type {
  * @pcs_poll: MAC PCS cannot provide link change interrupt
  * @poll_fixed_state: if true, starts link_poll,
  *		      if MAC link is at %MLO_AN_FIXED mode.
+ * @ovr_an_inband: if true, override PCS to MLO_AN_INBAND
  * @get_fixed_state: callback to execute to determine the fixed link state,
  *		     if MAC link is at %MLO_AN_FIXED mode.
  */
@@ -72,6 +73,7 @@ struct phylink_config {
 	enum phylink_op_type type;
 	bool pcs_poll;
 	bool poll_fixed_state;
+	bool ovr_an_inband;
 	void (*get_fixed_state)(struct phylink_config *config,
 				struct phylink_link_state *state);
 };
-- 
cgit v1.2.3


From e5e5b771f684c22b25c67df85d2deb43901f7b95 Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Mon, 15 Mar 2021 13:27:09 +0800
Subject: net: stmmac: make in-band AN mode parsing is supported for non-DT

Not all platform uses DT, so phylink_parse_mode() will skip in-band setup
of pl->supported and pl->link_config.advertising entirely. So, we add the
setting of ovr_an_inband flag to make it works for non-DT platform.

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 ++
 include/linux/stmmac.h                            | 1 +
 2 files changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 208cae344ffa..b64ee029d41f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1117,6 +1117,8 @@ static int stmmac_phy_setup(struct stmmac_priv *priv)
 	priv->phylink_config.dev = &priv->dev->dev;
 	priv->phylink_config.type = PHYLINK_NETDEV;
 	priv->phylink_config.pcs_poll = true;
+	priv->phylink_config.ovr_an_inband =
+		priv->plat->mdio_bus_data->xpcs_an_inband;
 
 	if (!fwnode)
 		fwnode = dev_fwnode(priv->device);
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index a302982de2d7..722dc167b5c9 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -81,6 +81,7 @@
 struct stmmac_mdio_bus_data {
 	unsigned int phy_mask;
 	unsigned int has_xpcs;
+	unsigned int xpcs_an_inband;
 	int *irqs;
 	int probed_phy_irq;
 	bool needs_reset;
-- 
cgit v1.2.3


From c62808e8105f6a0872ac716c0030e97af81e4cf0 Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Mon, 15 Mar 2021 13:27:10 +0800
Subject: net: stmmac: ensure phydev is attached to phylink for C37 AN

As the support for MAC-side SGMII C37 AN is added to pcs-xpcs, phydev
should be attached to phylink during driver's open(). So, we change the
condition to "Not C73 AN" instead.

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index b64ee029d41f..e58ff652e95f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2898,7 +2898,7 @@ static int stmmac_open(struct net_device *dev)
 
 	if (priv->hw->pcs != STMMAC_PCS_TBI &&
 	    priv->hw->pcs != STMMAC_PCS_RTBI &&
-	    priv->hw->xpcs == NULL) {
+	    priv->hw->xpcs_args.an_mode != DW_AN_C73) {
 		ret = stmmac_init_phy(dev);
 		if (ret) {
 			netdev_err(priv->dev,
-- 
cgit v1.2.3


From 7310fe538ea5c966a773cbc39272ef145f8d05ae Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Mon, 15 Mar 2021 13:27:11 +0800
Subject: stmmac: intel: add pcs-xpcs for Intel mGbE controller

Intel mGbE controller such as those in EHL & TGL uses pcs-xpcs driver for
SGMII interface. To ensure mdio bus scanning does not assign phy_device
to MDIO-addressable entities like intel serdes and pcs-xpcs, we set up
to phy_mask to skip them.

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 0b64f7710d17..c49646773871 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -9,6 +9,9 @@
 #include "dwmac4.h"
 #include "stmmac.h"
 
+#define INTEL_MGBE_ADHOC_ADDR	0x15
+#define INTEL_MGBE_XPCS_ADDR	0x16
+
 struct intel_priv_data {
 	int mdio_adhoc_addr;	/* mdio address for serdes & etc */
 };
@@ -333,6 +336,16 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
 	/* Use the last Rx queue */
 	plat->vlan_fail_q = plat->rx_queues_to_use - 1;
 
+	/* Intel mgbe SGMII interface uses pcs-xcps */
+	if (plat->phy_interface == PHY_INTERFACE_MODE_SGMII) {
+		plat->mdio_bus_data->has_xpcs = true;
+		plat->mdio_bus_data->xpcs_an_inband = true;
+	}
+
+	/* Ensure mdio bus scan skips intel serdes and pcs-xpcs */
+	plat->mdio_bus_data->phy_mask = 1 << INTEL_MGBE_ADHOC_ADDR;
+	plat->mdio_bus_data->phy_mask |= 1 << INTEL_MGBE_XPCS_ADDR;
+
 	return 0;
 }
 
@@ -664,7 +677,7 @@ static int intel_eth_pci_probe(struct pci_dev *pdev,
 	pci_set_master(pdev);
 
 	plat->bsp_priv = intel_priv;
-	intel_priv->mdio_adhoc_addr = 0x15;
+	intel_priv->mdio_adhoc_addr = INTEL_MGBE_ADHOC_ADDR;
 
 	ret = info->setup(pdev, plat);
 	if (ret)
-- 
cgit v1.2.3


From c06632a4ec3a7b823f0e9c04cd4836135bba44bf Mon Sep 17 00:00:00 2001
From: Sonny Sasaka <sonnysasaka@chromium.org>
Date: Mon, 15 Mar 2021 10:30:59 -0700
Subject: Bluetooth: Cancel le_scan_restart work when stopping discovery

Not cancelling it has caused a bug where passive background scanning is
disabled out of the blue, preventing BLE keyboards/mice to reconnect.
Here is how it happens:
After hci_req_stop_discovery, there is still le_scan_restart_work
scheduled. Invocation of le_scan_restart_work causes a harmful
le_scan_disable_work to be scheduled. This le_scan_disable_work will
eventually disable passive scanning when the timer fires.

Sample btmon trace:

< HCI Command: LE Set Scan Parameters (0x08|0x000b) plen 7
        Type: Passive (0x00)
        Interval: 367.500 msec (0x024c)
        Window: 37.500 msec (0x003c)
        Own address type: Public (0x00)
        Filter policy: Accept all advertisement (0x00)
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Scan Parameters (0x08|0x000b) ncmd 1
        Status: Success (0x00)
< HCI Command: LE Set Scan Enable (0x08|0x000c) plen 2
        Scanning: Enabled (0x01)
        Filter duplicates: Disabled (0x00)
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Scan Enable (0x08|0x000c) ncmd 2
        Status: Success (0x00)
...
< HCI Command: LE Set Scan Enable (0x08|0x000c) plen 2
        Scanning: Disabled (0x00)
        Filter duplicates: Disabled (0x00)
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Scan Enable (0x08|0x000c) ncmd 2
        Status: Success (0x00)
// Background scanning is not working here onwards.

Reviewed-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Signed-off-by: Sonny Sasaka <sonnysasaka@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_request.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index d7ee11ef70d3..8ace5d34b01e 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -3252,6 +3252,7 @@ bool hci_req_stop_discovery(struct hci_request *req)
 
 		if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) {
 			cancel_delayed_work(&hdev->le_scan_disable);
+			cancel_delayed_work(&hdev->le_scan_restart);
 			hci_req_add_le_scan_disable(req, false);
 		}
 
-- 
cgit v1.2.3


From 7cf3b1dd6aa603fd80969e9e7160becf1455a0eb Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Mon, 15 Mar 2021 13:04:37 -0700
Subject: Bluetooth: L2CAP: Fix not checking for maximum number of DCID

When receiving L2CAP_CREDIT_BASED_CONNECTION_REQ the remote may request
more channels than allowed by the spec (10 octecs = 5 CIDs) so this
checks if the number of channels is bigger than the maximum allowed and
respond with an error.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h |  1 +
 net/bluetooth/l2cap_core.c    | 12 +++++++++---
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 61800a7b6192..3c4f550e5a8b 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -494,6 +494,7 @@ struct l2cap_le_credits {
 
 #define L2CAP_ECRED_MIN_MTU		64
 #define L2CAP_ECRED_MIN_MPS		64
+#define L2CAP_ECRED_MAX_CID		5
 
 struct l2cap_ecred_conn_req {
 	__le16 psm;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 72c2f5226d67..374cc32d7138 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -5921,7 +5921,7 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn,
 	struct l2cap_ecred_conn_req *req = (void *) data;
 	struct {
 		struct l2cap_ecred_conn_rsp rsp;
-		__le16 dcid[5];
+		__le16 dcid[L2CAP_ECRED_MAX_CID];
 	} __packed pdu;
 	struct l2cap_chan *chan, *pchan;
 	u16 mtu, mps;
@@ -5938,6 +5938,14 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn,
 		goto response;
 	}
 
+	cmd_len -= sizeof(*req);
+	num_scid = cmd_len / sizeof(u16);
+
+	if (num_scid > ARRAY_SIZE(pdu.dcid)) {
+		result = L2CAP_CR_LE_INVALID_PARAMS;
+		goto response;
+	}
+
 	mtu  = __le16_to_cpu(req->mtu);
 	mps  = __le16_to_cpu(req->mps);
 
@@ -5970,8 +5978,6 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn,
 	}
 
 	result = L2CAP_CR_LE_SUCCESS;
-	cmd_len -= sizeof(*req);
-	num_scid = cmd_len / sizeof(u16);
 
 	for (i = 0; i < num_scid; i++) {
 		u16 scid = __le16_to_cpu(req->scid[i]);
-- 
cgit v1.2.3


From 2e1614f7d61e407f1a8e7935a2903a6fa3cb0b11 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Mon, 15 Mar 2021 14:39:29 -0700
Subject: Bluetooth: SMP: Convert BT_ERR/BT_DBG to bt_dev_err/bt_dev_dbg

This converts instances of BT_ERR and BT_DBG to bt_dev_err and
bt_dev_dbg which can be enabled at runtime when BT_FEATURE_DEBUG is
enabled.

Note: Not all instances could be converted as some are exercised by
selftest.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/smp.c | 98 ++++++++++++++++++++++++++++-------------------------
 1 file changed, 52 insertions(+), 46 deletions(-)

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index e03cc284161c..efc19f98b959 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -595,7 +595,7 @@ static void smp_send_cmd(struct l2cap_conn *conn, u8 code, u16 len, void *data)
 	if (!chan)
 		return;
 
-	BT_DBG("code 0x%2.2x", code);
+	bt_dev_dbg(conn->hcon->hdev, "code 0x%2.2x", code);
 
 	iv[0].iov_base = &code;
 	iv[0].iov_len = 1;
@@ -859,7 +859,8 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth,
 	memset(smp->tk, 0, sizeof(smp->tk));
 	clear_bit(SMP_FLAG_TK_VALID, &smp->flags);
 
-	BT_DBG("tk_request: auth:%d lcl:%d rem:%d", auth, local_io, remote_io);
+	bt_dev_dbg(hcon->hdev, "auth:%d lcl:%d rem:%d", auth, local_io,
+		   remote_io);
 
 	/* If neither side wants MITM, either "just" confirm an incoming
 	 * request or use just-works for outgoing ones. The JUST_CFM
@@ -924,7 +925,7 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth,
 		get_random_bytes(&passkey, sizeof(passkey));
 		passkey %= 1000000;
 		put_unaligned_le32(passkey, smp->tk);
-		BT_DBG("PassKey: %d", passkey);
+		bt_dev_dbg(hcon->hdev, "PassKey: %d", passkey);
 		set_bit(SMP_FLAG_TK_VALID, &smp->flags);
 	}
 
@@ -949,7 +950,7 @@ static u8 smp_confirm(struct smp_chan *smp)
 	struct smp_cmd_pairing_confirm cp;
 	int ret;
 
-	BT_DBG("conn %p", conn);
+	bt_dev_dbg(conn->hcon->hdev, "conn %p", conn);
 
 	ret = smp_c1(smp->tk, smp->prnd, smp->preq, smp->prsp,
 		     conn->hcon->init_addr_type, &conn->hcon->init_addr,
@@ -977,7 +978,8 @@ static u8 smp_random(struct smp_chan *smp)
 	u8 confirm[16];
 	int ret;
 
-	BT_DBG("conn %p %s", conn, conn->hcon->out ? "master" : "slave");
+	bt_dev_dbg(conn->hcon->hdev, "conn %p %s", conn,
+		   conn->hcon->out ? "master" : "slave");
 
 	ret = smp_c1(smp->tk, smp->rrnd, smp->preq, smp->prsp,
 		     hcon->init_addr_type, &hcon->init_addr,
@@ -1236,7 +1238,7 @@ static void smp_distribute_keys(struct smp_chan *smp)
 	struct hci_dev *hdev = hcon->hdev;
 	__u8 *keydist;
 
-	BT_DBG("conn %p", conn);
+	bt_dev_dbg(hdev, "conn %p", conn);
 
 	rsp = (void *) &smp->prsp[1];
 
@@ -1266,7 +1268,7 @@ static void smp_distribute_keys(struct smp_chan *smp)
 		*keydist &= ~SMP_SC_NO_DIST;
 	}
 
-	BT_DBG("keydist 0x%x", *keydist);
+	bt_dev_dbg(hdev, "keydist 0x%x", *keydist);
 
 	if (*keydist & SMP_DIST_ENC_KEY) {
 		struct smp_cmd_encrypt_info enc;
@@ -1366,13 +1368,14 @@ static void smp_timeout(struct work_struct *work)
 					    security_timer.work);
 	struct l2cap_conn *conn = smp->conn;
 
-	BT_DBG("conn %p", conn);
+	bt_dev_dbg(conn->hcon->hdev, "conn %p", conn);
 
 	hci_disconnect(conn->hcon, HCI_ERROR_REMOTE_USER_TERM);
 }
 
 static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
 {
+	struct hci_conn *hcon = conn->hcon;
 	struct l2cap_chan *chan = conn->smp;
 	struct smp_chan *smp;
 
@@ -1382,13 +1385,13 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
 
 	smp->tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0);
 	if (IS_ERR(smp->tfm_cmac)) {
-		BT_ERR("Unable to create CMAC crypto context");
+		bt_dev_err(hcon->hdev, "Unable to create CMAC crypto context");
 		goto zfree_smp;
 	}
 
 	smp->tfm_ecdh = crypto_alloc_kpp("ecdh", 0, 0);
 	if (IS_ERR(smp->tfm_ecdh)) {
-		BT_ERR("Unable to create ECDH crypto context");
+		bt_dev_err(hcon->hdev, "Unable to create ECDH crypto context");
 		goto free_shash;
 	}
 
@@ -1399,7 +1402,7 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
 
 	INIT_DELAYED_WORK(&smp->security_timer, smp_timeout);
 
-	hci_conn_hold(conn->hcon);
+	hci_conn_hold(hcon);
 
 	return smp;
 
@@ -1564,8 +1567,8 @@ static u8 sc_passkey_round(struct smp_chan *smp, u8 smp_op)
 		if (!hcon->out)
 			return 0;
 
-		BT_DBG("%s Starting passkey round %u", hdev->name,
-		       smp->passkey_round + 1);
+		bt_dev_dbg(hdev, "Starting passkey round %u",
+			   smp->passkey_round + 1);
 
 		SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_CONFIRM);
 
@@ -1625,7 +1628,7 @@ int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey)
 	u32 value;
 	int err;
 
-	BT_DBG("");
+	bt_dev_dbg(conn->hcon->hdev, "");
 
 	if (!conn)
 		return -ENOTCONN;
@@ -1651,7 +1654,7 @@ int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey)
 	case MGMT_OP_USER_PASSKEY_REPLY:
 		value = le32_to_cpu(passkey);
 		memset(smp->tk, 0, sizeof(smp->tk));
-		BT_DBG("PassKey: %d", value);
+		bt_dev_dbg(conn->hcon->hdev, "PassKey: %d", value);
 		put_unaligned_le32(value, smp->tk);
 		fallthrough;
 	case MGMT_OP_USER_CONFIRM_REPLY:
@@ -1733,7 +1736,7 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
 	u8 key_size, auth, sec_level;
 	int ret;
 
-	BT_DBG("conn %p", conn);
+	bt_dev_dbg(hdev, "conn %p", conn);
 
 	if (skb->len < sizeof(*req))
 		return SMP_INVALID_PARAMS;
@@ -1887,7 +1890,7 @@ static u8 sc_send_public_key(struct smp_chan *smp)
 	}
 
 	if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) {
-		BT_DBG("Using debug keys");
+		bt_dev_dbg(hdev, "Using debug keys");
 		if (set_ecdh_privkey(smp->tfm_ecdh, debug_sk))
 			return SMP_UNSPECIFIED;
 		memcpy(smp->local_pk, debug_pk, 64);
@@ -1924,7 +1927,7 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb)
 	u8 key_size, auth;
 	int ret;
 
-	BT_DBG("conn %p", conn);
+	bt_dev_dbg(hdev, "conn %p", conn);
 
 	if (skb->len < sizeof(*rsp))
 		return SMP_INVALID_PARAMS;
@@ -2019,7 +2022,7 @@ static u8 sc_check_confirm(struct smp_chan *smp)
 {
 	struct l2cap_conn *conn = smp->conn;
 
-	BT_DBG("");
+	bt_dev_dbg(conn->hcon->hdev, "");
 
 	if (smp->method == REQ_PASSKEY || smp->method == DSP_PASSKEY)
 		return sc_passkey_round(smp, SMP_CMD_PAIRING_CONFIRM);
@@ -2078,8 +2081,10 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb)
 {
 	struct l2cap_chan *chan = conn->smp;
 	struct smp_chan *smp = chan->data;
+	struct hci_conn *hcon = conn->hcon;
+	struct hci_dev *hdev = hcon->hdev;
 
-	BT_DBG("conn %p %s", conn, conn->hcon->out ? "master" : "slave");
+	bt_dev_dbg(hdev, "conn %p %s", conn, hcon->out ? "master" : "slave");
 
 	if (skb->len < sizeof(smp->pcnf))
 		return SMP_INVALID_PARAMS;
@@ -2094,7 +2099,7 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb)
 		if (test_bit(SMP_FLAG_REMOTE_PK, &smp->flags))
 			return sc_check_confirm(smp);
 
-		BT_ERR("Unexpected SMP Pairing Confirm");
+		bt_dev_err(hdev, "Unexpected SMP Pairing Confirm");
 
 		ret = fixup_sc_false_positive(smp);
 		if (ret)
@@ -2125,7 +2130,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb)
 	u32 passkey;
 	int err;
 
-	BT_DBG("conn %p", conn);
+	bt_dev_dbg(hcon->hdev, "conn %p", conn);
 
 	if (skb->len < sizeof(smp->rrnd))
 		return SMP_INVALID_PARAMS;
@@ -2284,7 +2289,7 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb)
 	struct smp_chan *smp;
 	u8 sec_level, auth;
 
-	BT_DBG("conn %p", conn);
+	bt_dev_dbg(hdev, "conn %p", conn);
 
 	if (skb->len < sizeof(*rp))
 		return SMP_INVALID_PARAMS;
@@ -2347,7 +2352,8 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
 	__u8 authreq;
 	int ret;
 
-	BT_DBG("conn %p hcon %p level 0x%2.2x", conn, hcon, sec_level);
+	bt_dev_dbg(hcon->hdev, "conn %p hcon %p level 0x%2.2x", conn, hcon,
+		   sec_level);
 
 	/* This may be NULL if there's an unexpected disconnection */
 	if (!conn)
@@ -2483,7 +2489,7 @@ static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb)
 	struct l2cap_chan *chan = conn->smp;
 	struct smp_chan *smp = chan->data;
 
-	BT_DBG("conn %p", conn);
+	bt_dev_dbg(conn->hcon->hdev, "conn %p", conn);
 
 	if (skb->len < sizeof(*rp))
 		return SMP_INVALID_PARAMS;
@@ -2516,7 +2522,7 @@ static int smp_cmd_master_ident(struct l2cap_conn *conn, struct sk_buff *skb)
 	struct smp_ltk *ltk;
 	u8 authenticated;
 
-	BT_DBG("conn %p", conn);
+	bt_dev_dbg(hdev, "conn %p", conn);
 
 	if (skb->len < sizeof(*rp))
 		return SMP_INVALID_PARAMS;
@@ -2548,7 +2554,7 @@ static int smp_cmd_ident_info(struct l2cap_conn *conn, struct sk_buff *skb)
 	struct l2cap_chan *chan = conn->smp;
 	struct smp_chan *smp = chan->data;
 
-	BT_DBG("");
+	bt_dev_dbg(conn->hcon->hdev, "");
 
 	if (skb->len < sizeof(*info))
 		return SMP_INVALID_PARAMS;
@@ -2580,7 +2586,7 @@ static int smp_cmd_ident_addr_info(struct l2cap_conn *conn,
 	struct hci_conn *hcon = conn->hcon;
 	bdaddr_t rpa;
 
-	BT_DBG("");
+	bt_dev_dbg(hcon->hdev, "");
 
 	if (skb->len < sizeof(*info))
 		return SMP_INVALID_PARAMS;
@@ -2647,7 +2653,7 @@ static int smp_cmd_sign_info(struct l2cap_conn *conn, struct sk_buff *skb)
 	struct smp_chan *smp = chan->data;
 	struct smp_csrk *csrk;
 
-	BT_DBG("conn %p", conn);
+	bt_dev_dbg(conn->hcon->hdev, "conn %p", conn);
 
 	if (skb->len < sizeof(*rp))
 		return SMP_INVALID_PARAMS;
@@ -2727,7 +2733,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb)
 	struct smp_cmd_pairing_confirm cfm;
 	int err;
 
-	BT_DBG("conn %p", conn);
+	bt_dev_dbg(hdev, "conn %p", conn);
 
 	if (skb->len < sizeof(*key))
 		return SMP_INVALID_PARAMS;
@@ -2791,7 +2797,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb)
 
 	smp->method = sc_select_method(smp);
 
-	BT_DBG("%s selected method 0x%02x", hdev->name, smp->method);
+	bt_dev_dbg(hdev, "selected method 0x%02x", smp->method);
 
 	/* JUST_WORKS and JUST_CFM result in an unauthenticated key */
 	if (smp->method == JUST_WORKS || smp->method == JUST_CFM)
@@ -2866,7 +2872,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb)
 	u8 io_cap[3], r[16], e[16];
 	int err;
 
-	BT_DBG("conn %p", conn);
+	bt_dev_dbg(hcon->hdev, "conn %p", conn);
 
 	if (skb->len < sizeof(*check))
 		return SMP_INVALID_PARAMS;
@@ -2926,7 +2932,7 @@ static int smp_cmd_keypress_notify(struct l2cap_conn *conn,
 {
 	struct smp_cmd_keypress_notify *kp = (void *) skb->data;
 
-	BT_DBG("value 0x%02x", kp->value);
+	bt_dev_dbg(conn->hcon->hdev, "value 0x%02x", kp->value);
 
 	return 0;
 }
@@ -3023,7 +3029,7 @@ static int smp_sig_channel(struct l2cap_chan *chan, struct sk_buff *skb)
 		break;
 
 	default:
-		BT_DBG("Unknown command code 0x%2.2x", code);
+		bt_dev_dbg(hcon->hdev, "Unknown command code 0x%2.2x", code);
 		reason = SMP_CMD_NOTSUPP;
 		goto done;
 	}
@@ -3048,7 +3054,7 @@ static void smp_teardown_cb(struct l2cap_chan *chan, int err)
 {
 	struct l2cap_conn *conn = chan->conn;
 
-	BT_DBG("chan %p", chan);
+	bt_dev_dbg(conn->hcon->hdev, "chan %p", chan);
 
 	if (chan->data)
 		smp_chan_destroy(conn);
@@ -3065,7 +3071,7 @@ static void bredr_pairing(struct l2cap_chan *chan)
 	struct smp_cmd_pairing req;
 	struct smp_chan *smp;
 
-	BT_DBG("chan %p", chan);
+	bt_dev_dbg(hdev, "chan %p", chan);
 
 	/* Only new pairings are interesting */
 	if (!test_bit(HCI_CONN_NEW_LINK_KEY, &hcon->flags))
@@ -3112,7 +3118,7 @@ static void bredr_pairing(struct l2cap_chan *chan)
 
 	set_bit(SMP_FLAG_SC, &smp->flags);
 
-	BT_DBG("%s starting SMP over BR/EDR", hdev->name);
+	bt_dev_dbg(hdev, "starting SMP over BR/EDR");
 
 	/* Prepare and send the BR/EDR SMP Pairing Request */
 	build_bredr_pairing_cmd(smp, &req, NULL);
@@ -3130,7 +3136,7 @@ static void smp_resume_cb(struct l2cap_chan *chan)
 	struct l2cap_conn *conn = chan->conn;
 	struct hci_conn *hcon = conn->hcon;
 
-	BT_DBG("chan %p", chan);
+	bt_dev_dbg(hcon->hdev, "chan %p", chan);
 
 	if (hcon->type == ACL_LINK) {
 		bredr_pairing(chan);
@@ -3153,7 +3159,7 @@ static void smp_ready_cb(struct l2cap_chan *chan)
 	struct l2cap_conn *conn = chan->conn;
 	struct hci_conn *hcon = conn->hcon;
 
-	BT_DBG("chan %p", chan);
+	bt_dev_dbg(hcon->hdev, "chan %p", chan);
 
 	/* No need to call l2cap_chan_hold() here since we already own
 	 * the reference taken in smp_new_conn_cb(). This is just the
@@ -3171,7 +3177,7 @@ static int smp_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
 {
 	int err;
 
-	BT_DBG("chan %p", chan);
+	bt_dev_dbg(chan->conn->hcon->hdev, "chan %p", chan);
 
 	err = smp_sig_channel(chan, skb);
 	if (err) {
@@ -3223,7 +3229,7 @@ static inline struct l2cap_chan *smp_new_conn_cb(struct l2cap_chan *pchan)
 {
 	struct l2cap_chan *chan;
 
-	BT_DBG("pchan %p", pchan);
+	bt_dev_dbg(pchan->conn->hcon->hdev, "pchan %p", pchan);
 
 	chan = l2cap_chan_create();
 	if (!chan)
@@ -3244,7 +3250,7 @@ static inline struct l2cap_chan *smp_new_conn_cb(struct l2cap_chan *pchan)
 	 */
 	atomic_set(&chan->nesting, L2CAP_NESTING_SMP);
 
-	BT_DBG("created chan %p", chan);
+	bt_dev_dbg(pchan->conn->hcon->hdev, "created chan %p", chan);
 
 	return chan;
 }
@@ -3285,14 +3291,14 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
 
 	tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0);
 	if (IS_ERR(tfm_cmac)) {
-		BT_ERR("Unable to create CMAC crypto context");
+		bt_dev_err(hdev, "Unable to create CMAC crypto context");
 		kfree_sensitive(smp);
 		return ERR_CAST(tfm_cmac);
 	}
 
 	tfm_ecdh = crypto_alloc_kpp("ecdh", 0, 0);
 	if (IS_ERR(tfm_ecdh)) {
-		BT_ERR("Unable to create ECDH crypto context");
+		bt_dev_err(hdev, "Unable to create ECDH crypto context");
 		crypto_free_shash(tfm_cmac);
 		kfree_sensitive(smp);
 		return ERR_CAST(tfm_ecdh);
@@ -3348,7 +3354,7 @@ static void smp_del_chan(struct l2cap_chan *chan)
 {
 	struct smp_dev *smp;
 
-	BT_DBG("chan %p", chan);
+	bt_dev_dbg(chan->conn->hcon->hdev, "chan %p", chan);
 
 	smp = chan->data;
 	if (smp) {
@@ -3391,7 +3397,7 @@ int smp_register(struct hci_dev *hdev)
 {
 	struct l2cap_chan *chan;
 
-	BT_DBG("%s", hdev->name);
+	bt_dev_dbg(hdev, "");
 
 	/* If the controller does not support Low Energy operation, then
 	 * there is also no need to register any SMP channel.
-- 
cgit v1.2.3


From 5ec55823438e850c91c6b92aec93fb04ebde29e2 Mon Sep 17 00:00:00 2001
From: Joakim Zhang <qiangqing.zhang@nxp.com>
Date: Mon, 15 Mar 2021 20:16:46 +0800
Subject: net: stmmac: add clocks management for gmac driver

This patch intends to add clocks management for stmmac driver:

If CONFIG_PM enabled:
1. Keep clocks disabled after driver probed.
2. Enable clocks when up the net device, and disable clocks when down
the net device.

If CONFIG_PM disabled:
Keep clocks always enabled after driver probed.

Note:
1. It is fine for ethtool, since the way of implementing ethtool_ops::begin
in stmmac is only can be accessed when interface is enabled, so the clocks
are ticked.
2. The MDIO bus has a different life cycle to the MAC, need ensure
clocks are enabled when _mdio_read/write() need clocks, because these
functions can be called while the interface it not opened.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac.h       |   1 +
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |  75 ++++++++++++--
 drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c  | 111 ++++++++++++++++-----
 .../net/ethernet/stmicro/stmmac/stmmac_platform.c  |  24 ++++-
 4 files changed, 174 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index e553b9a1f785..10e8ae8e2d58 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -272,6 +272,7 @@ void stmmac_disable_eee_mode(struct stmmac_priv *priv);
 bool stmmac_eee_init(struct stmmac_priv *priv);
 int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt);
 int stmmac_reinit_ringparam(struct net_device *dev, u32 rx_size, u32 tx_size);
+int stmmac_bus_clks_config(struct stmmac_priv *priv, bool enabled);
 
 #if IS_ENABLED(CONFIG_STMMAC_SELFTESTS)
 void stmmac_selftest_run(struct net_device *dev,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index e58ff652e95f..3c50846f59cd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -28,6 +28,7 @@
 #include <linux/if_vlan.h>
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
+#include <linux/pm_runtime.h>
 #include <linux/prefetch.h>
 #include <linux/pinctrl/consumer.h>
 #ifdef CONFIG_DEBUG_FS
@@ -113,6 +114,28 @@ static void stmmac_exit_fs(struct net_device *dev);
 
 #define STMMAC_COAL_TIMER(x) (ns_to_ktime((x) * NSEC_PER_USEC))
 
+int stmmac_bus_clks_config(struct stmmac_priv *priv, bool enabled)
+{
+	int ret = 0;
+
+	if (enabled) {
+		ret = clk_prepare_enable(priv->plat->stmmac_clk);
+		if (ret)
+			return ret;
+		ret = clk_prepare_enable(priv->plat->pclk);
+		if (ret) {
+			clk_disable_unprepare(priv->plat->stmmac_clk);
+			return ret;
+		}
+	} else {
+		clk_disable_unprepare(priv->plat->stmmac_clk);
+		clk_disable_unprepare(priv->plat->pclk);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(stmmac_bus_clks_config);
+
 /**
  * stmmac_verify_args - verify the driver parameters.
  * Description: it checks the driver parameters and set a default in case of
@@ -2896,6 +2919,12 @@ static int stmmac_open(struct net_device *dev)
 	u32 chan;
 	int ret;
 
+	ret = pm_runtime_get_sync(priv->device);
+	if (ret < 0) {
+		pm_runtime_put_noidle(priv->device);
+		return ret;
+	}
+
 	if (priv->hw->pcs != STMMAC_PCS_TBI &&
 	    priv->hw->pcs != STMMAC_PCS_RTBI &&
 	    priv->hw->xpcs_args.an_mode != DW_AN_C73) {
@@ -2904,7 +2933,7 @@ static int stmmac_open(struct net_device *dev)
 			netdev_err(priv->dev,
 				   "%s: Cannot attach to PHY (error: %d)\n",
 				   __func__, ret);
-			return ret;
+			goto init_phy_error;
 		}
 	}
 
@@ -3020,6 +3049,8 @@ init_error:
 	free_dma_desc_resources(priv);
 dma_desc_error:
 	phylink_disconnect_phy(priv->phylink);
+init_phy_error:
+	pm_runtime_put(priv->device);
 	return ret;
 }
 
@@ -3070,6 +3101,8 @@ static int stmmac_release(struct net_device *dev)
 
 	stmmac_release_ptp(priv);
 
+	pm_runtime_put(priv->device);
+
 	return 0;
 }
 
@@ -4706,6 +4739,12 @@ static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid
 	bool is_double = false;
 	int ret;
 
+	ret = pm_runtime_get_sync(priv->device);
+	if (ret < 0) {
+		pm_runtime_put_noidle(priv->device);
+		return ret;
+	}
+
 	if (be16_to_cpu(proto) == ETH_P_8021AD)
 		is_double = true;
 
@@ -4739,10 +4778,15 @@ static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vi
 	if (priv->hw->num_vlan) {
 		ret = stmmac_del_hw_vlan_rx_fltr(priv, ndev, priv->hw, proto, vid);
 		if (ret)
-			return ret;
+			goto del_vlan_error;
 	}
 
-	return stmmac_vlan_update(priv, is_double);
+	ret = stmmac_vlan_update(priv, is_double);
+
+del_vlan_error:
+	pm_runtime_put(priv->device);
+
+	return ret;
 }
 
 static const struct net_device_ops stmmac_netdev_ops = {
@@ -5181,6 +5225,10 @@ int stmmac_dvr_probe(struct device *device,
 
 	stmmac_check_pcs_mode(priv);
 
+	pm_runtime_get_noresume(device);
+	pm_runtime_set_active(device);
+	pm_runtime_enable(device);
+
 	if (priv->hw->pcs != STMMAC_PCS_TBI &&
 	    priv->hw->pcs != STMMAC_PCS_RTBI) {
 		/* MDIO bus Registration */
@@ -5218,6 +5266,11 @@ int stmmac_dvr_probe(struct device *device,
 	stmmac_init_fs(ndev);
 #endif
 
+	/* Let pm_runtime_put() disable the clocks.
+	 * If CONFIG_PM is not enabled, the clocks will stay powered.
+	 */
+	pm_runtime_put(device);
+
 	return ret;
 
 error_serdes_powerup:
@@ -5232,6 +5285,7 @@ error_mdio_register:
 	stmmac_napi_del(ndev);
 error_hw_init:
 	destroy_workqueue(priv->wq);
+	stmmac_bus_clks_config(priv, false);
 
 	return ret;
 }
@@ -5267,8 +5321,8 @@ int stmmac_dvr_remove(struct device *dev)
 	phylink_destroy(priv->phylink);
 	if (priv->plat->stmmac_rst)
 		reset_control_assert(priv->plat->stmmac_rst);
-	clk_disable_unprepare(priv->plat->pclk);
-	clk_disable_unprepare(priv->plat->stmmac_clk);
+	pm_runtime_put(dev);
+	pm_runtime_disable(dev);
 	if (priv->hw->pcs != STMMAC_PCS_TBI &&
 	    priv->hw->pcs != STMMAC_PCS_RTBI)
 		stmmac_mdio_unregister(ndev);
@@ -5291,6 +5345,7 @@ int stmmac_suspend(struct device *dev)
 	struct net_device *ndev = dev_get_drvdata(dev);
 	struct stmmac_priv *priv = netdev_priv(ndev);
 	u32 chan;
+	int ret;
 
 	if (!ndev || !netif_running(ndev))
 		return 0;
@@ -5334,8 +5389,9 @@ int stmmac_suspend(struct device *dev)
 		pinctrl_pm_select_sleep_state(priv->device);
 		/* Disable clock in case of PWM is off */
 		clk_disable_unprepare(priv->plat->clk_ptp_ref);
-		clk_disable_unprepare(priv->plat->pclk);
-		clk_disable_unprepare(priv->plat->stmmac_clk);
+		ret = pm_runtime_force_suspend(dev);
+		if (ret)
+			return ret;
 	}
 	mutex_unlock(&priv->lock);
 
@@ -5401,8 +5457,9 @@ int stmmac_resume(struct device *dev)
 	} else {
 		pinctrl_pm_select_default_state(priv->device);
 		/* enable the clk previously disabled */
-		clk_prepare_enable(priv->plat->stmmac_clk);
-		clk_prepare_enable(priv->plat->pclk);
+		ret = pm_runtime_force_resume(dev);
+		if (ret)
+			return ret;
 		if (priv->plat->clk_ptp_ref)
 			clk_prepare_enable(priv->plat->clk_ptp_ref);
 		/* reset the phy so that it's ready */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index d64116e0543e..b750074f8f9c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -15,6 +15,7 @@
 #include <linux/iopoll.h>
 #include <linux/mii.h>
 #include <linux/of_mdio.h>
+#include <linux/pm_runtime.h>
 #include <linux/phy.h>
 #include <linux/property.h>
 #include <linux/slab.h>
@@ -87,21 +88,29 @@ static int stmmac_xgmac2_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg)
 	u32 tmp, addr, value = MII_XGMAC_BUSY;
 	int ret;
 
+	ret = pm_runtime_get_sync(priv->device);
+	if (ret < 0) {
+		pm_runtime_put_noidle(priv->device);
+		return ret;
+	}
+
 	/* Wait until any existing MII operation is complete */
 	if (readl_poll_timeout(priv->ioaddr + mii_data, tmp,
-			       !(tmp & MII_XGMAC_BUSY), 100, 10000))
-		return -EBUSY;
+			       !(tmp & MII_XGMAC_BUSY), 100, 10000)) {
+		ret = -EBUSY;
+		goto err_disable_clks;
+	}
 
 	if (phyreg & MII_ADDR_C45) {
 		phyreg &= ~MII_ADDR_C45;
 
 		ret = stmmac_xgmac2_c45_format(priv, phyaddr, phyreg, &addr);
 		if (ret)
-			return ret;
+			goto err_disable_clks;
 	} else {
 		ret = stmmac_xgmac2_c22_format(priv, phyaddr, phyreg, &addr);
 		if (ret)
-			return ret;
+			goto err_disable_clks;
 
 		value |= MII_XGMAC_SADDR;
 	}
@@ -112,8 +121,10 @@ static int stmmac_xgmac2_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg)
 
 	/* Wait until any existing MII operation is complete */
 	if (readl_poll_timeout(priv->ioaddr + mii_data, tmp,
-			       !(tmp & MII_XGMAC_BUSY), 100, 10000))
-		return -EBUSY;
+			       !(tmp & MII_XGMAC_BUSY), 100, 10000)) {
+		ret = -EBUSY;
+		goto err_disable_clks;
+	}
 
 	/* Set the MII address register to read */
 	writel(addr, priv->ioaddr + mii_address);
@@ -121,11 +132,18 @@ static int stmmac_xgmac2_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg)
 
 	/* Wait until any existing MII operation is complete */
 	if (readl_poll_timeout(priv->ioaddr + mii_data, tmp,
-			       !(tmp & MII_XGMAC_BUSY), 100, 10000))
-		return -EBUSY;
+			       !(tmp & MII_XGMAC_BUSY), 100, 10000)) {
+		ret = -EBUSY;
+		goto err_disable_clks;
+	}
 
 	/* Read the data from the MII data register */
-	return readl(priv->ioaddr + mii_data) & GENMASK(15, 0);
+	ret = (int)readl(priv->ioaddr + mii_data) & GENMASK(15, 0);
+
+err_disable_clks:
+	pm_runtime_put(priv->device);
+
+	return ret;
 }
 
 static int stmmac_xgmac2_mdio_write(struct mii_bus *bus, int phyaddr,
@@ -138,21 +156,29 @@ static int stmmac_xgmac2_mdio_write(struct mii_bus *bus, int phyaddr,
 	u32 addr, tmp, value = MII_XGMAC_BUSY;
 	int ret;
 
+	ret = pm_runtime_get_sync(priv->device);
+	if (ret < 0) {
+		pm_runtime_put_noidle(priv->device);
+		return ret;
+	}
+
 	/* Wait until any existing MII operation is complete */
 	if (readl_poll_timeout(priv->ioaddr + mii_data, tmp,
-			       !(tmp & MII_XGMAC_BUSY), 100, 10000))
-		return -EBUSY;
+			       !(tmp & MII_XGMAC_BUSY), 100, 10000)) {
+		ret = -EBUSY;
+		goto err_disable_clks;
+	}
 
 	if (phyreg & MII_ADDR_C45) {
 		phyreg &= ~MII_ADDR_C45;
 
 		ret = stmmac_xgmac2_c45_format(priv, phyaddr, phyreg, &addr);
 		if (ret)
-			return ret;
+			goto err_disable_clks;
 	} else {
 		ret = stmmac_xgmac2_c22_format(priv, phyaddr, phyreg, &addr);
 		if (ret)
-			return ret;
+			goto err_disable_clks;
 
 		value |= MII_XGMAC_SADDR;
 	}
@@ -164,16 +190,23 @@ static int stmmac_xgmac2_mdio_write(struct mii_bus *bus, int phyaddr,
 
 	/* Wait until any existing MII operation is complete */
 	if (readl_poll_timeout(priv->ioaddr + mii_data, tmp,
-			       !(tmp & MII_XGMAC_BUSY), 100, 10000))
-		return -EBUSY;
+			       !(tmp & MII_XGMAC_BUSY), 100, 10000)) {
+		ret = -EBUSY;
+		goto err_disable_clks;
+	}
 
 	/* Set the MII address register to write */
 	writel(addr, priv->ioaddr + mii_address);
 	writel(value, priv->ioaddr + mii_data);
 
 	/* Wait until any existing MII operation is complete */
-	return readl_poll_timeout(priv->ioaddr + mii_data, tmp,
-				  !(tmp & MII_XGMAC_BUSY), 100, 10000);
+	ret = readl_poll_timeout(priv->ioaddr + mii_data, tmp,
+				 !(tmp & MII_XGMAC_BUSY), 100, 10000);
+
+err_disable_clks:
+	pm_runtime_put(priv->device);
+
+	return ret;
 }
 
 /**
@@ -196,6 +229,12 @@ static int stmmac_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg)
 	int data = 0;
 	u32 v;
 
+	data = pm_runtime_get_sync(priv->device);
+	if (data < 0) {
+		pm_runtime_put_noidle(priv->device);
+		return data;
+	}
+
 	value |= (phyaddr << priv->hw->mii.addr_shift)
 		& priv->hw->mii.addr_mask;
 	value |= (phyreg << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask;
@@ -216,19 +255,26 @@ static int stmmac_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg)
 	}
 
 	if (readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY),
-			       100, 10000))
-		return -EBUSY;
+			       100, 10000)) {
+		data = -EBUSY;
+		goto err_disable_clks;
+	}
 
 	writel(data, priv->ioaddr + mii_data);
 	writel(value, priv->ioaddr + mii_address);
 
 	if (readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY),
-			       100, 10000))
-		return -EBUSY;
+			       100, 10000)) {
+		data = -EBUSY;
+		goto err_disable_clks;
+	}
 
 	/* Read the data from the MII data register */
 	data = (int)readl(priv->ioaddr + mii_data) & MII_DATA_MASK;
 
+err_disable_clks:
+	pm_runtime_put(priv->device);
+
 	return data;
 }
 
@@ -247,10 +293,16 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg,
 	struct stmmac_priv *priv = netdev_priv(ndev);
 	unsigned int mii_address = priv->hw->mii.addr;
 	unsigned int mii_data = priv->hw->mii.data;
+	int ret, data = phydata;
 	u32 value = MII_BUSY;
-	int data = phydata;
 	u32 v;
 
+	ret = pm_runtime_get_sync(priv->device);
+	if (ret < 0) {
+		pm_runtime_put_noidle(priv->device);
+		return ret;
+	}
+
 	value |= (phyaddr << priv->hw->mii.addr_shift)
 		& priv->hw->mii.addr_mask;
 	value |= (phyreg << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask;
@@ -275,16 +327,23 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg,
 
 	/* Wait until any existing MII operation is complete */
 	if (readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY),
-			       100, 10000))
-		return -EBUSY;
+			       100, 10000)) {
+		ret = -EBUSY;
+		goto err_disable_clks;
+	}
 
 	/* Set the MII address register to write */
 	writel(data, priv->ioaddr + mii_data);
 	writel(value, priv->ioaddr + mii_address);
 
 	/* Wait until any existing MII operation is complete */
-	return readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY),
-				  100, 10000);
+	ret = readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY),
+				 100, 10000);
+
+err_disable_clks:
+	pm_runtime_put(priv->device);
+
+	return ret;
 }
 
 /**
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 6dc9f10414e4..a70b0bc84b2a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -744,10 +744,30 @@ static int stmmac_pltfr_resume(struct device *dev)
 
 	return stmmac_resume(dev);
 }
+
+static int stmmac_runtime_suspend(struct device *dev)
+{
+	struct net_device *ndev = dev_get_drvdata(dev);
+	struct stmmac_priv *priv = netdev_priv(ndev);
+
+	stmmac_bus_clks_config(priv, false);
+
+	return 0;
+}
+
+static int stmmac_runtime_resume(struct device *dev)
+{
+	struct net_device *ndev = dev_get_drvdata(dev);
+	struct stmmac_priv *priv = netdev_priv(ndev);
+
+	return stmmac_bus_clks_config(priv, true);
+}
 #endif /* CONFIG_PM_SLEEP */
 
-SIMPLE_DEV_PM_OPS(stmmac_pltfr_pm_ops, stmmac_pltfr_suspend,
-				       stmmac_pltfr_resume);
+const struct dev_pm_ops stmmac_pltfr_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(stmmac_pltfr_suspend, stmmac_pltfr_resume)
+	SET_RUNTIME_PM_OPS(stmmac_runtime_suspend, stmmac_runtime_resume, NULL)
+};
 EXPORT_SYMBOL_GPL(stmmac_pltfr_pm_ops);
 
 MODULE_DESCRIPTION("STMMAC 10/100/1000 Ethernet platform support");
-- 
cgit v1.2.3


From b4d45aee6635197d257f3469413837cd94fc11f4 Mon Sep 17 00:00:00 2001
From: Joakim Zhang <qiangqing.zhang@nxp.com>
Date: Mon, 15 Mar 2021 20:16:47 +0800
Subject: net: stmmac: add platform level clocks management

This patch intends to add platform level clocks management. Some
platforms may have their own special clocks, they also need to be
managed dynamically. If you want to manage such clocks, please implement
clks_config callback.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 10 ++++++++++
 include/linux/stmmac.h                            |  1 +
 2 files changed, 11 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 3c50846f59cd..a10704d8e3c6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -127,9 +127,19 @@ int stmmac_bus_clks_config(struct stmmac_priv *priv, bool enabled)
 			clk_disable_unprepare(priv->plat->stmmac_clk);
 			return ret;
 		}
+		if (priv->plat->clks_config) {
+			ret = priv->plat->clks_config(priv->plat->bsp_priv, enabled);
+			if (ret) {
+				clk_disable_unprepare(priv->plat->stmmac_clk);
+				clk_disable_unprepare(priv->plat->pclk);
+				return ret;
+			}
+		}
 	} else {
 		clk_disable_unprepare(priv->plat->stmmac_clk);
 		clk_disable_unprepare(priv->plat->pclk);
+		if (priv->plat->clks_config)
+			priv->plat->clks_config(priv->plat->bsp_priv, enabled);
 	}
 
 	return ret;
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 722dc167b5c9..51004ebd0540 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -184,6 +184,7 @@ struct plat_stmmacenet_data {
 	int (*init)(struct platform_device *pdev, void *priv);
 	void (*exit)(struct platform_device *pdev, void *priv);
 	struct mac_device_info *(*setup)(void *priv);
+	int (*clks_config)(void *priv, bool enabled);
 	void *bsp_priv;
 	struct clk *stmmac_clk;
 	struct clk *pclk;
-- 
cgit v1.2.3


From 8f2f83765eb0c402fdf77bc8a81d6435b5aa98b7 Mon Sep 17 00:00:00 2001
From: Joakim Zhang <qiangqing.zhang@nxp.com>
Date: Mon, 15 Mar 2021 20:16:48 +0800
Subject: net: stmmac: dwmac-imx: add platform level clocks management for i.MX

Split clocks settings from init callback into clks_config callback,
which could support platform level clocks management.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c | 60 +++++++++++++++----------
 1 file changed, 36 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
index 223f69da7e95..c1a361305a5a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
@@ -90,6 +90,32 @@ imx8dxl_set_intf_mode(struct plat_stmmacenet_data *plat_dat)
 	return ret;
 }
 
+static int imx_dwmac_clks_config(void *priv, bool enabled)
+{
+	struct imx_priv_data *dwmac = priv;
+	int ret = 0;
+
+	if (enabled) {
+		ret = clk_prepare_enable(dwmac->clk_mem);
+		if (ret) {
+			dev_err(dwmac->dev, "mem clock enable failed\n");
+			return ret;
+		}
+
+		ret = clk_prepare_enable(dwmac->clk_tx);
+		if (ret) {
+			dev_err(dwmac->dev, "tx clock enable failed\n");
+			clk_disable_unprepare(dwmac->clk_mem);
+			return ret;
+		}
+	} else {
+		clk_disable_unprepare(dwmac->clk_tx);
+		clk_disable_unprepare(dwmac->clk_mem);
+	}
+
+	return ret;
+}
+
 static int imx_dwmac_init(struct platform_device *pdev, void *priv)
 {
 	struct plat_stmmacenet_data *plat_dat;
@@ -98,39 +124,18 @@ static int imx_dwmac_init(struct platform_device *pdev, void *priv)
 
 	plat_dat = dwmac->plat_dat;
 
-	ret = clk_prepare_enable(dwmac->clk_mem);
-	if (ret) {
-		dev_err(&pdev->dev, "mem clock enable failed\n");
-		return ret;
-	}
-
-	ret = clk_prepare_enable(dwmac->clk_tx);
-	if (ret) {
-		dev_err(&pdev->dev, "tx clock enable failed\n");
-		goto clk_tx_en_failed;
-	}
-
 	if (dwmac->ops->set_intf_mode) {
 		ret = dwmac->ops->set_intf_mode(plat_dat);
 		if (ret)
-			goto intf_mode_failed;
+			return ret;
 	}
 
 	return 0;
-
-intf_mode_failed:
-	clk_disable_unprepare(dwmac->clk_tx);
-clk_tx_en_failed:
-	clk_disable_unprepare(dwmac->clk_mem);
-	return ret;
 }
 
 static void imx_dwmac_exit(struct platform_device *pdev, void *priv)
 {
-	struct imx_priv_data *dwmac = priv;
-
-	clk_disable_unprepare(dwmac->clk_tx);
-	clk_disable_unprepare(dwmac->clk_mem);
+	/* nothing to do now */
 }
 
 static void imx_dwmac_fix_speed(void *priv, unsigned int speed)
@@ -249,10 +254,15 @@ static int imx_dwmac_probe(struct platform_device *pdev)
 	plat_dat->addr64 = dwmac->ops->addr_width;
 	plat_dat->init = imx_dwmac_init;
 	plat_dat->exit = imx_dwmac_exit;
+	plat_dat->clks_config = imx_dwmac_clks_config;
 	plat_dat->fix_mac_speed = imx_dwmac_fix_speed;
 	plat_dat->bsp_priv = dwmac;
 	dwmac->plat_dat = plat_dat;
 
+	ret = imx_dwmac_clks_config(dwmac, true);
+	if (ret)
+		goto err_clks_config;
+
 	ret = imx_dwmac_init(pdev, dwmac);
 	if (ret)
 		goto err_dwmac_init;
@@ -263,9 +273,11 @@ static int imx_dwmac_probe(struct platform_device *pdev)
 
 	return 0;
 
-err_dwmac_init:
 err_drv_probe:
 	imx_dwmac_exit(pdev, plat_dat->bsp_priv);
+err_dwmac_init:
+	imx_dwmac_clks_config(dwmac, false);
+err_clks_config:
 err_parse_dt:
 err_match_data:
 	stmmac_remove_config_dt(pdev, plat_dat);
-- 
cgit v1.2.3


From 6e3bac3eba448a438840ab8152cb8bbfcb8787b8 Mon Sep 17 00:00:00 2001
From: Ivan Bornyakov <i.bornyakov@metrotek.ru>
Date: Mon, 15 Mar 2021 17:19:26 +0300
Subject: net: phy: add Marvell 88X2222 transceiver support

Add basic support for the Marvell 88X2222 multi-speed ethernet
transceiver.

This PHY provides data transmission over fiber-optic as well as Twinax
copper links. The 88X2222 supports 2 ports of 10GBase-R and 1000Base-X
on the line-side interface. The host-side interface supports 4 ports of
10GBase-R, RXAUI, 1000Base-X and 2 ports of XAUI.

This driver, however, supports only XAUI on the host-side and
1000Base-X/10GBase-R on the line-side, for now. The SGMII is also
supported over 1000Base-X. Interrupts are not supported.

Internal registers access compliant with the Clause 45 specification.

Signed-off-by: Ivan Bornyakov <i.bornyakov@metrotek.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/Kconfig           |   6 +
 drivers/net/phy/Makefile          |   1 +
 drivers/net/phy/marvell-88x2222.c | 519 ++++++++++++++++++++++++++++++++++++++
 include/linux/marvell_phy.h       |   1 +
 4 files changed, 527 insertions(+)
 create mode 100644 drivers/net/phy/marvell-88x2222.c

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 698bea312adc..a615b3660b05 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -201,6 +201,12 @@ config MARVELL_10G_PHY
 	help
 	  Support for the Marvell Alaska MV88X3310 and compatible PHYs.
 
+config MARVELL_88X2222_PHY
+	tristate "Marvell 88X2222 PHY"
+	help
+	  Support for the Marvell 88X2222 Dual-port Multi-speed Ethernet
+	  Transceiver.
+
 config MICREL_PHY
 	tristate "Micrel PHYs"
 	help
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index a13e402074cf..de683e3abe63 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_LSI_ET1011C_PHY)	+= et1011c.o
 obj-$(CONFIG_LXT_PHY)		+= lxt.o
 obj-$(CONFIG_MARVELL_10G_PHY)	+= marvell10g.o
 obj-$(CONFIG_MARVELL_PHY)	+= marvell.o
+obj-$(CONFIG_MARVELL_88X2222_PHY)	+= marvell-88x2222.o
 obj-$(CONFIG_MESON_GXL_PHY)	+= meson-gxl.o
 obj-$(CONFIG_MICREL_KS8995MA)	+= spi_ks8995.o
 obj-$(CONFIG_MICREL_PHY)	+= micrel.o
diff --git a/drivers/net/phy/marvell-88x2222.c b/drivers/net/phy/marvell-88x2222.c
new file mode 100644
index 000000000000..eca8c2f20684
--- /dev/null
+++ b/drivers/net/phy/marvell-88x2222.c
@@ -0,0 +1,519 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Marvell 88x2222 dual-port multi-speed ethernet transceiver.
+ *
+ * Supports:
+ *	XAUI on the host side.
+ *	1000Base-X or 10GBase-R on the line side.
+ *	SGMII over 1000Base-X.
+ */
+#include <linux/module.h>
+#include <linux/phy.h>
+#include <linux/gpio.h>
+#include <linux/delay.h>
+#include <linux/mdio.h>
+#include <linux/marvell_phy.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_gpio.h>
+#include <linux/sfp.h>
+#include <linux/netdevice.h>
+
+/* Port PCS Configuration */
+#define	MV_PCS_CONFIG		0xF002
+#define	MV_PCS_HOST_XAUI	0x73
+#define	MV_PCS_LINE_10GBR	(0x71 << 8)
+#define	MV_PCS_LINE_1GBX_AN	(0x7B << 8)
+#define	MV_PCS_LINE_SGMII_AN	(0x7F << 8)
+
+/* Port Reset and Power Down */
+#define	MV_PORT_RST	0xF003
+#define	MV_LINE_RST_SW	BIT(15)
+#define	MV_HOST_RST_SW	BIT(7)
+#define	MV_PORT_RST_SW	(MV_LINE_RST_SW | MV_HOST_RST_SW)
+
+/* 1000Base-X/SGMII Control Register */
+#define	MV_1GBX_CTRL		(0x2000 + MII_BMCR)
+
+/* 1000BASE-X/SGMII Status Register */
+#define	MV_1GBX_STAT		(0x2000 + MII_BMSR)
+
+/* 1000Base-X Auto-Negotiation Advertisement Register */
+#define	MV_1GBX_ADVERTISE	(0x2000 + MII_ADVERTISE)
+
+/* 1000Base-X PHY Specific Status Register */
+#define	MV_1GBX_PHY_STAT		0xA003
+#define	MV_1GBX_PHY_STAT_AN_RESOLVED	BIT(11)
+#define	MV_1GBX_PHY_STAT_DUPLEX		BIT(13)
+#define	MV_1GBX_PHY_STAT_SPEED100	BIT(14)
+#define	MV_1GBX_PHY_STAT_SPEED1000	BIT(15)
+
+struct mv2222_data {
+	phy_interface_t line_interface;
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
+};
+
+/* SFI PMA transmit enable */
+static int mv2222_tx_enable(struct phy_device *phydev)
+{
+	return phy_clear_bits_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_TXDIS,
+				  MDIO_PMD_TXDIS_GLOBAL);
+}
+
+/* SFI PMA transmit disable */
+static int mv2222_tx_disable(struct phy_device *phydev)
+{
+	return phy_set_bits_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_TXDIS,
+				MDIO_PMD_TXDIS_GLOBAL);
+}
+
+static int mv2222_soft_reset(struct phy_device *phydev)
+{
+	int val, ret;
+
+	ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, MV_PORT_RST,
+			    MV_PORT_RST_SW);
+	if (ret < 0)
+		return ret;
+
+	return phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND2, MV_PORT_RST,
+					 val, !(val & MV_PORT_RST_SW),
+					 5000, 1000000, true);
+}
+
+/* Returns negative on error, 0 if link is down, 1 if link is up */
+static int mv2222_read_status_10g(struct phy_device *phydev)
+{
+	int val, link = 0;
+
+	val = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_STAT1);
+	if (val < 0)
+		return val;
+
+	if (val & MDIO_STAT1_LSTATUS) {
+		link = 1;
+
+		/* 10GBASE-R do not support auto-negotiation */
+		phydev->autoneg = AUTONEG_DISABLE;
+		phydev->speed = SPEED_10000;
+		phydev->duplex = DUPLEX_FULL;
+	}
+
+	return link;
+}
+
+/* Returns negative on error, 0 if link is down, 1 if link is up */
+static int mv2222_read_status_1g(struct phy_device *phydev)
+{
+	int val, link = 0;
+
+	val = phy_read_mmd(phydev, MDIO_MMD_PCS, MV_1GBX_STAT);
+	if (val < 0)
+		return val;
+
+	if (!(val & BMSR_LSTATUS) ||
+	    (phydev->autoneg == AUTONEG_ENABLE &&
+	     !(val & BMSR_ANEGCOMPLETE)))
+		return 0;
+
+	link = 1;
+
+	val = phy_read_mmd(phydev, MDIO_MMD_PCS, MV_1GBX_PHY_STAT);
+	if (val < 0)
+		return val;
+
+	if (val & MV_1GBX_PHY_STAT_AN_RESOLVED) {
+		if (val & MV_1GBX_PHY_STAT_DUPLEX)
+			phydev->duplex = DUPLEX_FULL;
+		else
+			phydev->duplex = DUPLEX_HALF;
+
+		if (val & MV_1GBX_PHY_STAT_SPEED1000)
+			phydev->speed = SPEED_1000;
+		else if (val & MV_1GBX_PHY_STAT_SPEED100)
+			phydev->speed = SPEED_100;
+		else
+			phydev->speed = SPEED_10;
+	}
+
+	return link;
+}
+
+static int mv2222_read_status(struct phy_device *phydev)
+{
+	struct mv2222_data *priv = phydev->priv;
+	int link;
+
+	phydev->link = 0;
+	phydev->speed = SPEED_UNKNOWN;
+	phydev->duplex = DUPLEX_UNKNOWN;
+
+	if (priv->line_interface == PHY_INTERFACE_MODE_10GBASER)
+		link = mv2222_read_status_10g(phydev);
+	else
+		link = mv2222_read_status_1g(phydev);
+
+	if (link < 0)
+		return link;
+
+	phydev->link = link;
+
+	return 0;
+}
+
+static int mv2222_disable_aneg(struct phy_device *phydev)
+{
+	int ret = phy_clear_bits_mmd(phydev, MDIO_MMD_PCS, MV_1GBX_CTRL,
+				     BMCR_ANENABLE | BMCR_ANRESTART);
+	if (ret < 0)
+		return ret;
+
+	return mv2222_soft_reset(phydev);
+}
+
+static int mv2222_enable_aneg(struct phy_device *phydev)
+{
+	int ret = phy_set_bits_mmd(phydev, MDIO_MMD_PCS, MV_1GBX_CTRL,
+				   BMCR_ANENABLE | BMCR_RESET);
+	if (ret < 0)
+		return ret;
+
+	return mv2222_soft_reset(phydev);
+}
+
+static int mv2222_set_sgmii_speed(struct phy_device *phydev)
+{
+	struct mv2222_data *priv = phydev->priv;
+
+	switch (phydev->speed) {
+	default:
+	case SPEED_1000:
+		if ((linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+				       priv->supported) ||
+		     linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
+				       priv->supported)))
+			return phy_modify_mmd(phydev, MDIO_MMD_PCS,
+					      MV_1GBX_CTRL,
+					      BMCR_SPEED1000 | BMCR_SPEED100,
+					      BMCR_SPEED1000);
+
+		fallthrough;
+	case SPEED_100:
+		if ((linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+				       priv->supported) ||
+		     linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT,
+				       priv->supported)))
+			return phy_modify_mmd(phydev, MDIO_MMD_PCS,
+					      MV_1GBX_CTRL,
+					      BMCR_SPEED1000 | BMCR_SPEED100,
+					      BMCR_SPEED100);
+		fallthrough;
+	case SPEED_10:
+		if ((linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT,
+				       priv->supported) ||
+		     linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT,
+				       priv->supported)))
+			return phy_modify_mmd(phydev, MDIO_MMD_PCS,
+					      MV_1GBX_CTRL,
+					      BMCR_SPEED1000 | BMCR_SPEED100,
+					      BMCR_SPEED10);
+
+		return -EINVAL;
+	}
+}
+
+static bool mv2222_is_10g_capable(struct phy_device *phydev)
+{
+	struct mv2222_data *priv = phydev->priv;
+
+	return (linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+				  priv->supported) ||
+		linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseCR_Full_BIT,
+				  priv->supported) ||
+		linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseSR_Full_BIT,
+				  priv->supported) ||
+		linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseLR_Full_BIT,
+				  priv->supported) ||
+		linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT,
+				  priv->supported) ||
+		linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseER_Full_BIT,
+				  priv->supported));
+}
+
+static bool mv2222_is_1gbx_capable(struct phy_device *phydev)
+{
+	struct mv2222_data *priv = phydev->priv;
+
+	return linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
+				 priv->supported);
+}
+
+static int mv2222_config_line(struct phy_device *phydev)
+{
+	struct mv2222_data *priv = phydev->priv;
+
+	switch (priv->line_interface) {
+	case PHY_INTERFACE_MODE_10GBASER:
+		return phy_write_mmd(phydev, MDIO_MMD_VEND2, MV_PCS_CONFIG,
+				     MV_PCS_HOST_XAUI | MV_PCS_LINE_10GBR);
+	case PHY_INTERFACE_MODE_1000BASEX:
+		return phy_write_mmd(phydev, MDIO_MMD_VEND2, MV_PCS_CONFIG,
+				     MV_PCS_HOST_XAUI | MV_PCS_LINE_1GBX_AN);
+	case PHY_INTERFACE_MODE_SGMII:
+		return phy_write_mmd(phydev, MDIO_MMD_VEND2, MV_PCS_CONFIG,
+				     MV_PCS_HOST_XAUI | MV_PCS_LINE_SGMII_AN);
+	default:
+		return -EINVAL;
+	}
+}
+
+static int mv2222_setup_forced(struct phy_device *phydev)
+{
+	struct mv2222_data *priv = phydev->priv;
+	bool changed = false;
+	int ret;
+
+	switch (priv->line_interface) {
+	case PHY_INTERFACE_MODE_10GBASER:
+		if (phydev->speed == SPEED_1000 &&
+		    mv2222_is_1gbx_capable(phydev)) {
+			priv->line_interface = PHY_INTERFACE_MODE_1000BASEX;
+			changed = true;
+		}
+
+		break;
+	case PHY_INTERFACE_MODE_1000BASEX:
+		if (phydev->speed == SPEED_10000 &&
+		    mv2222_is_10g_capable(phydev)) {
+			priv->line_interface = PHY_INTERFACE_MODE_10GBASER;
+			changed = true;
+		}
+
+		break;
+	case PHY_INTERFACE_MODE_SGMII:
+		ret = mv2222_set_sgmii_speed(phydev);
+		if (ret < 0)
+			return ret;
+
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (changed) {
+		ret = mv2222_config_line(phydev);
+		if (ret < 0)
+			return ret;
+	}
+
+	return mv2222_disable_aneg(phydev);
+}
+
+static int mv2222_config_aneg(struct phy_device *phydev)
+{
+	struct mv2222_data *priv = phydev->priv;
+	int ret, adv;
+
+	/* SFP is not present, do nothing */
+	if (priv->line_interface == PHY_INTERFACE_MODE_NA)
+		return 0;
+
+	if (phydev->autoneg == AUTONEG_DISABLE ||
+	    phydev->speed == SPEED_10000)
+		return mv2222_setup_forced(phydev);
+
+	if (priv->line_interface == PHY_INTERFACE_MODE_10GBASER &&
+	    mv2222_is_1gbx_capable(phydev)) {
+		priv->line_interface = PHY_INTERFACE_MODE_1000BASEX;
+		ret = mv2222_config_line(phydev);
+		if (ret < 0)
+			return ret;
+	}
+
+	adv = linkmode_adv_to_mii_adv_x(priv->supported,
+					ETHTOOL_LINK_MODE_1000baseX_Full_BIT);
+
+	ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, MV_1GBX_ADVERTISE,
+			     ADVERTISE_1000XFULL |
+			     ADVERTISE_1000XPAUSE | ADVERTISE_1000XPSE_ASYM,
+			     adv);
+	if (ret < 0)
+		return ret;
+
+	return mv2222_enable_aneg(phydev);
+}
+
+static int mv2222_aneg_done(struct phy_device *phydev)
+{
+	int ret;
+
+	if (mv2222_is_10g_capable(phydev)) {
+		ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_STAT1);
+		if (ret < 0)
+			return ret;
+
+		if (ret & MDIO_STAT1_LSTATUS)
+			return 1;
+	}
+
+	ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MV_1GBX_STAT);
+	if (ret < 0)
+		return ret;
+
+	return (ret & BMSR_ANEGCOMPLETE);
+}
+
+static int mv2222_resume(struct phy_device *phydev)
+{
+	return mv2222_tx_enable(phydev);
+}
+
+static int mv2222_suspend(struct phy_device *phydev)
+{
+	return mv2222_tx_disable(phydev);
+}
+
+static int mv2222_get_features(struct phy_device *phydev)
+{
+	/* All supported linkmodes are set at probe */
+
+	return 0;
+}
+
+static int mv2222_config_init(struct phy_device *phydev)
+{
+	if (phydev->interface != PHY_INTERFACE_MODE_XAUI)
+		return -EINVAL;
+
+	phydev->autoneg = AUTONEG_DISABLE;
+
+	return 0;
+}
+
+static int mv2222_sfp_insert(void *upstream, const struct sfp_eeprom_id *id)
+{
+	struct phy_device *phydev = upstream;
+	phy_interface_t sfp_interface;
+	struct mv2222_data *priv;
+	struct device *dev;
+	int ret;
+
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(sfp_supported) = { 0, };
+
+	priv = (struct mv2222_data *)phydev->priv;
+	dev = &phydev->mdio.dev;
+
+	sfp_parse_support(phydev->sfp_bus, id, sfp_supported);
+	sfp_interface = sfp_select_interface(phydev->sfp_bus, sfp_supported);
+
+	dev_info(dev, "%s SFP module inserted\n", phy_modes(sfp_interface));
+
+	if (sfp_interface != PHY_INTERFACE_MODE_10GBASER &&
+	    sfp_interface != PHY_INTERFACE_MODE_1000BASEX &&
+	    sfp_interface != PHY_INTERFACE_MODE_SGMII) {
+		dev_err(dev, "Incompatible SFP module inserted\n");
+
+		return -EINVAL;
+	}
+
+	priv->line_interface = sfp_interface;
+	linkmode_and(priv->supported, phydev->supported, sfp_supported);
+
+	ret = mv2222_config_line(phydev);
+	if (ret < 0)
+		return ret;
+
+	if (mutex_trylock(&phydev->lock)) {
+		if (priv->line_interface == PHY_INTERFACE_MODE_10GBASER)
+			ret = mv2222_setup_forced(phydev);
+		else
+			ret = mv2222_config_aneg(phydev);
+
+		mutex_unlock(&phydev->lock);
+	}
+
+	return ret;
+}
+
+static void mv2222_sfp_remove(void *upstream)
+{
+	struct phy_device *phydev = upstream;
+	struct mv2222_data *priv;
+
+	priv = (struct mv2222_data *)phydev->priv;
+
+	priv->line_interface = PHY_INTERFACE_MODE_NA;
+	linkmode_zero(priv->supported);
+}
+
+static const struct sfp_upstream_ops sfp_phy_ops = {
+	.module_insert = mv2222_sfp_insert,
+	.module_remove = mv2222_sfp_remove,
+	.attach = phy_sfp_attach,
+	.detach = phy_sfp_detach,
+};
+
+static int mv2222_probe(struct phy_device *phydev)
+{
+	struct device *dev = &phydev->mdio.dev;
+	struct mv2222_data *priv = NULL;
+
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, };
+
+	linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_TP_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseCR_Full_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseSR_Full_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseLR_Full_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT, supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseER_Full_BIT, supported);
+
+	linkmode_copy(phydev->supported, supported);
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->line_interface = PHY_INTERFACE_MODE_NA;
+	phydev->priv = priv;
+
+	return phy_sfp_probe(phydev, &sfp_phy_ops);
+}
+
+static struct phy_driver mv2222_drivers[] = {
+	{
+		.phy_id = MARVELL_PHY_ID_88X2222,
+		.phy_id_mask = MARVELL_PHY_ID_MASK,
+		.name = "Marvell 88X2222",
+		.get_features = mv2222_get_features,
+		.soft_reset = mv2222_soft_reset,
+		.config_init = mv2222_config_init,
+		.config_aneg = mv2222_config_aneg,
+		.aneg_done = mv2222_aneg_done,
+		.probe = mv2222_probe,
+		.suspend = mv2222_suspend,
+		.resume = mv2222_resume,
+		.read_status = mv2222_read_status,
+	},
+};
+module_phy_driver(mv2222_drivers);
+
+static struct mdio_device_id __maybe_unused mv2222_tbl[] = {
+	{ MARVELL_PHY_ID_88X2222, MARVELL_PHY_ID_MASK },
+	{ }
+};
+MODULE_DEVICE_TABLE(mdio, mv2222_tbl);
+
+MODULE_DESCRIPTION("Marvell 88x2222 ethernet transceiver driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
index 52b1610eae68..274abd5fbac3 100644
--- a/include/linux/marvell_phy.h
+++ b/include/linux/marvell_phy.h
@@ -24,6 +24,7 @@
 #define MARVELL_PHY_ID_88E3016		0x01410e60
 #define MARVELL_PHY_ID_88X3310		0x002b09a0
 #define MARVELL_PHY_ID_88E2110		0x002b09b0
+#define MARVELL_PHY_ID_88X2222		0x01410f10
 
 /* Marvel 88E1111 in Finisar SFP module with modified PHY ID */
 #define MARVELL_PHY_ID_88E1111_FINISAR	0x01ff0cc0
-- 
cgit v1.2.3


From 0f13b5e6bf283a086b6d5b47be1ecf05a341eb9b Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 15 Mar 2021 10:01:18 -0500
Subject: net: ipa: make ipa_table_hash_support() inline

In review, Alexander Duyck suggested that ipa_table_hash_support()
was trivial enough that it could be implemented as a static inline
function in the header file.  But the patch had already been
accepted.  Implement his suggestion.

Signed-off-by: Alex Elder <elder@linaro.org>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_table.c | 5 -----
 drivers/net/ipa/ipa_table.h | 5 ++++-
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c
index baaab3dd0e63..7450e27068f1 100644
--- a/drivers/net/ipa/ipa_table.c
+++ b/drivers/net/ipa/ipa_table.c
@@ -239,11 +239,6 @@ static void ipa_table_validate_build(void)
 
 #endif /* !IPA_VALIDATE */
 
-bool ipa_table_hash_support(struct ipa *ipa)
-{
-	return ipa->version != IPA_VERSION_4_2;
-}
-
 /* Zero entry count means no table, so just return a 0 address */
 static dma_addr_t ipa_table_addr(struct ipa *ipa, bool filter_mask, u16 count)
 {
diff --git a/drivers/net/ipa/ipa_table.h b/drivers/net/ipa/ipa_table.h
index 1a68d20f19d6..889c2e93b122 100644
--- a/drivers/net/ipa/ipa_table.h
+++ b/drivers/net/ipa/ipa_table.h
@@ -55,7 +55,10 @@ static inline bool ipa_filter_map_valid(struct ipa *ipa, u32 filter_mask)
  * ipa_table_hash_support() - Return true if hashed tables are supported
  * @ipa:	IPA pointer
  */
-bool ipa_table_hash_support(struct ipa *ipa);
+static inline bool ipa_table_hash_support(struct ipa *ipa)
+{
+	return ipa->version != IPA_VERSION_4_2;
+}
 
 /**
  * ipa_table_reset() - Reset filter and route tables entries to "none"
-- 
cgit v1.2.3


From ccf8b940e5fdd331231a7442cd80f0e83336cfd3 Mon Sep 17 00:00:00 2001
From: Chen Yu <yu.c.chen@intel.com>
Date: Tue, 1 Dec 2020 09:21:50 +0800
Subject: e1000e: Leverage direct_complete to speed up s2ram

The NIC is put in runtime suspend status when there is no cable connected.
As a result, it is safe to keep non-wakeup NIC in runtime suspended during
s2ram because the system does not rely on the NIC plug event nor WoL to wake
up the system. Besides that, unlike the s2idle, s2ram does not need to
manipulate S0ix settings during suspend.

This patch introduces the .prepare() for e1000e so that if the NIC is runtime
suspended the subsequent suspend/resume hooks will be skipped so as to speed
up the s2ram. The pm core will check whether the NIC is a wake up device so
there's no need to check it again in .prepare(). DPM_FLAG_SMART_PREPARE flag
should be set during probe to ask the pci subsystem to honor the driver's
prepare() result. Besides, the NIC remains runtime suspended after resumed
from s2ram as there is no need to resume it.

Tested on i7-2600K with 82579V NIC
Before the patch:
e1000e 0000:00:19.0: pci_pm_suspend+0x0/0x160 returned 0 after 225146 usecs
e1000e 0000:00:19.0: pci_pm_resume+0x0/0x90 returned 0 after 140588 usecs

After the patch:
echo disabled > //sys/devices/pci0000\:00/0000\:00\:19.0/power/wakeup
becomes 0 usecs because the hooks will be skipped.

Suggested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index e9b82c209c2d..3eaae9a5a44e 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -25,6 +25,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/aer.h>
 #include <linux/prefetch.h>
+#include <linux/suspend.h>
 
 #include "e1000.h"
 
@@ -6918,6 +6919,12 @@ static int __e1000_resume(struct pci_dev *pdev)
 	return 0;
 }
 
+static int e1000e_pm_prepare(struct device *dev)
+{
+	return pm_runtime_suspended(dev) &&
+		pm_suspend_via_firmware();
+}
+
 static __maybe_unused int e1000e_pm_suspend(struct device *dev)
 {
 	struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev));
@@ -7626,7 +7633,7 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	e1000_print_device_info(adapter);
 
-	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
+	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_SMART_PREPARE);
 
 	if (pci_dev_run_wake(pdev) && hw->mac.type < e1000_pch_cnp)
 		pm_runtime_put_noidle(&pdev->dev);
@@ -7851,6 +7858,7 @@ MODULE_DEVICE_TABLE(pci, e1000_pci_tbl);
 
 static const struct dev_pm_ops e1000_pm_ops = {
 #ifdef CONFIG_PM_SLEEP
+	.prepare	= e1000e_pm_prepare,
 	.suspend	= e1000e_pm_suspend,
 	.resume		= e1000e_pm_resume,
 	.freeze		= e1000e_pm_freeze,
-- 
cgit v1.2.3


From 3335369bad99c40b3513da2d165e6ae83f3f3c8b Mon Sep 17 00:00:00 2001
From: Chen Yu <yu.c.chen@intel.com>
Date: Tue, 1 Dec 2020 09:22:09 +0800
Subject: e1000e: Remove the runtime suspend restriction on CNP+

Although there is platform issue of runtime suspend support
on CNP, it would be more flexible to let the user decide whether
to disable runtime or not because:
1. This can be done in userspace via
   echo on > /sys/devices/pci0000\:00/0000\:00\:1f.d/power/control
2. More and more NICs would support runtime suspend, disabling the
   runtime suspend on them by default would impact the validation.

Only disable runtime suspend on CNP in case of any user space regression.

Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 3eaae9a5a44e..480f6712a3b6 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -7635,7 +7635,7 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_SMART_PREPARE);
 
-	if (pci_dev_run_wake(pdev) && hw->mac.type < e1000_pch_cnp)
+	if (pci_dev_run_wake(pdev) && hw->mac.type != e1000_pch_cnp)
 		pm_runtime_put_noidle(&pdev->dev);
 
 	return 0;
-- 
cgit v1.2.3


From 45f3a13c816656c9d3d311880d90286341644d9b Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 15 Mar 2021 16:51:46 -0500
Subject: net: qualcomm: rmnet: mark trailer field endianness

The fields in the checksum trailer structure used for QMAP protocol
RX packets are all big-endian format, so define them that way.

It turns out these fields are never actually used by the RMNet code.
The start offset is always assumed to be zero, and the length is
taken from the other packet headers.  So making these fields
explicitly big endian has no effect on the behavior of the code.

Signed-off-by: Alex Elder <elder@linaro.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_rmnet.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/if_rmnet.h b/include/linux/if_rmnet.h
index 9661416a9bb4..8c7845baf383 100644
--- a/include/linux/if_rmnet.h
+++ b/include/linux/if_rmnet.h
@@ -32,8 +32,8 @@ struct rmnet_map_dl_csum_trailer {
 #else
 #error	"Please fix <asm/byteorder.h>"
 #endif
-	u16 csum_start_offset;
-	u16 csum_length;
+	__be16 csum_start_offset;
+	__be16 csum_length;
 	__be16 csum_value;
 } __aligned(1);
 
-- 
cgit v1.2.3


From 50c62a111c4855510b0d8a2d8e50214172166b61 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 15 Mar 2021 16:51:47 -0500
Subject: net: qualcomm: rmnet: simplify some byte order logic

In rmnet_map_ipv4_ul_csum_header() and rmnet_map_ipv6_ul_csum_header()
the offset within a packet at which checksumming should commence is
calculated.  This calculation involves byte swapping and a forced type
conversion that makes it hard to understand.

Simplify this by computing the offset in host byte order, then
converting the result when assigning it into the header field.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index 21d38167f961..0bfe69698b27 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -197,12 +197,10 @@ rmnet_map_ipv4_ul_csum_header(void *iphdr,
 			      struct rmnet_map_ul_csum_header *ul_header,
 			      struct sk_buff *skb)
 {
-	struct iphdr *ip4h = (struct iphdr *)iphdr;
-	__be16 *hdr = (__be16 *)ul_header, offset;
+	__be16 *hdr = (__be16 *)ul_header;
+	struct iphdr *ip4h = iphdr;
 
-	offset = htons((__force u16)(skb_transport_header(skb) -
-				     (unsigned char *)iphdr));
-	ul_header->csum_start_offset = offset;
+	ul_header->csum_start_offset = htons(skb_network_header_len(skb));
 	ul_header->csum_insert_offset = skb->csum_offset;
 	ul_header->csum_enabled = 1;
 	if (ip4h->protocol == IPPROTO_UDP)
@@ -239,12 +237,10 @@ rmnet_map_ipv6_ul_csum_header(void *ip6hdr,
 			      struct rmnet_map_ul_csum_header *ul_header,
 			      struct sk_buff *skb)
 {
-	struct ipv6hdr *ip6h = (struct ipv6hdr *)ip6hdr;
-	__be16 *hdr = (__be16 *)ul_header, offset;
+	__be16 *hdr = (__be16 *)ul_header;
+	struct ipv6hdr *ip6h = ip6hdr;
 
-	offset = htons((__force u16)(skb_transport_header(skb) -
-				     (unsigned char *)ip6hdr));
-	ul_header->csum_start_offset = offset;
+	ul_header->csum_start_offset = htons(skb_network_header_len(skb));
 	ul_header->csum_insert_offset = skb->csum_offset;
 	ul_header->csum_enabled = 1;
 
-- 
cgit v1.2.3


From 9d131d044f89a808f947d49f1a1865815c610158 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 15 Mar 2021 16:51:48 -0500
Subject: net: qualcomm: rmnet: kill RMNET_MAP_GET_*() accessor macros

The following macros, defined in "rmnet_map.h", assume a socket
buffer is provided as an argument without any real indication this
is the case.
    RMNET_MAP_GET_MUX_ID()
    RMNET_MAP_GET_CD_BIT()
    RMNET_MAP_GET_PAD()
    RMNET_MAP_GET_CMD_START()
    RMNET_MAP_GET_LENGTH()
What they hide is pretty trivial accessing of fields in a structure,
and it's much clearer to see this if we do these accesses directly.

So rather than using these accessor macros, assign a local
variable of the map header pointer type to the socket buffer data
pointer, and derereference that pointer variable.

In "rmnet_map_data.c", use sizeof(object) rather than sizeof(type)
in one spot.  Also, there's no need to byte swap 0; it's all zeros
irrespective of endianness.

Signed-off-by: Alex Elder <elder@linaro.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c    | 10 ++++++----
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h         | 12 ------------
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c | 11 ++++++++---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c    |  4 ++--
 4 files changed, 16 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
index 3d00b3232308..2a6b2a609884 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -56,20 +56,22 @@ static void
 __rmnet_map_ingress_handler(struct sk_buff *skb,
 			    struct rmnet_port *port)
 {
+	struct rmnet_map_header *map_header = (void *)skb->data;
 	struct rmnet_endpoint *ep;
 	u16 len, pad;
 	u8 mux_id;
 
-	if (RMNET_MAP_GET_CD_BIT(skb)) {
+	if (map_header->cd_bit) {
+		/* Packet contains a MAP command (not data) */
 		if (port->data_format & RMNET_FLAGS_INGRESS_MAP_COMMANDS)
 			return rmnet_map_command(skb, port);
 
 		goto free_skb;
 	}
 
-	mux_id = RMNET_MAP_GET_MUX_ID(skb);
-	pad = RMNET_MAP_GET_PAD(skb);
-	len = RMNET_MAP_GET_LENGTH(skb) - pad;
+	mux_id = map_header->mux_id;
+	pad = map_header->pad_len;
+	len = ntohs(map_header->pkt_len) - pad;
 
 	if (mux_id >= RMNET_MAX_LOGICAL_EP)
 		goto free_skb;
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
index 576501db2a0b..2aea153f4247 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
@@ -32,18 +32,6 @@ enum rmnet_map_commands {
 	RMNET_MAP_COMMAND_ENUM_LENGTH
 };
 
-#define RMNET_MAP_GET_MUX_ID(Y) (((struct rmnet_map_header *) \
-				 (Y)->data)->mux_id)
-#define RMNET_MAP_GET_CD_BIT(Y) (((struct rmnet_map_header *) \
-				(Y)->data)->cd_bit)
-#define RMNET_MAP_GET_PAD(Y) (((struct rmnet_map_header *) \
-				(Y)->data)->pad_len)
-#define RMNET_MAP_GET_CMD_START(Y) ((struct rmnet_map_control_command *) \
-				    ((Y)->data + \
-				      sizeof(struct rmnet_map_header)))
-#define RMNET_MAP_GET_LENGTH(Y) (ntohs(((struct rmnet_map_header *) \
-					(Y)->data)->pkt_len))
-
 #define RMNET_MAP_COMMAND_REQUEST     0
 #define RMNET_MAP_COMMAND_ACK         1
 #define RMNET_MAP_COMMAND_UNSUPPORTED 2
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
index beaee4962128..add0f5ade2e6 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
@@ -12,12 +12,13 @@ static u8 rmnet_map_do_flow_control(struct sk_buff *skb,
 				    struct rmnet_port *port,
 				    int enable)
 {
+	struct rmnet_map_header *map_header = (void *)skb->data;
 	struct rmnet_endpoint *ep;
 	struct net_device *vnd;
 	u8 mux_id;
 	int r;
 
-	mux_id = RMNET_MAP_GET_MUX_ID(skb);
+	mux_id = map_header->mux_id;
 
 	if (mux_id >= RMNET_MAX_LOGICAL_EP) {
 		kfree_skb(skb);
@@ -49,6 +50,7 @@ static void rmnet_map_send_ack(struct sk_buff *skb,
 			       unsigned char type,
 			       struct rmnet_port *port)
 {
+	struct rmnet_map_header *map_header = (void *)skb->data;
 	struct rmnet_map_control_command *cmd;
 	struct net_device *dev = skb->dev;
 
@@ -58,7 +60,8 @@ static void rmnet_map_send_ack(struct sk_buff *skb,
 
 	skb->protocol = htons(ETH_P_MAP);
 
-	cmd = RMNET_MAP_GET_CMD_START(skb);
+	/* Command data immediately follows the MAP header */
+	cmd = (struct rmnet_map_control_command *)(map_header + 1);
 	cmd->cmd_type = type & 0x03;
 
 	netif_tx_lock(dev);
@@ -71,11 +74,13 @@ static void rmnet_map_send_ack(struct sk_buff *skb,
  */
 void rmnet_map_command(struct sk_buff *skb, struct rmnet_port *port)
 {
+	struct rmnet_map_header *map_header = (void *)skb->data;
 	struct rmnet_map_control_command *cmd;
 	unsigned char command_name;
 	unsigned char rc = 0;
 
-	cmd = RMNET_MAP_GET_CMD_START(skb);
+	/* Command data immediately follows the MAP header */
+	cmd = (struct rmnet_map_control_command *)(map_header + 1);
 	command_name = cmd->command_name;
 
 	switch (command_name) {
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index 0bfe69698b27..3af68368fc31 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -315,7 +315,7 @@ struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb,
 		return NULL;
 
 	maph = (struct rmnet_map_header *)skb->data;
-	packet_len = ntohs(maph->pkt_len) + sizeof(struct rmnet_map_header);
+	packet_len = ntohs(maph->pkt_len) + sizeof(*maph);
 
 	if (port->data_format & RMNET_FLAGS_INGRESS_MAP_CKSUMV4)
 		packet_len += sizeof(struct rmnet_map_dl_csum_trailer);
@@ -324,7 +324,7 @@ struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb,
 		return NULL;
 
 	/* Some hardware can send us empty frames. Catch them */
-	if (ntohs(maph->pkt_len) == 0)
+	if (!maph->pkt_len)
 		return NULL;
 
 	skbn = alloc_skb(packet_len + RMNET_MAP_DEAGGR_SPACING, GFP_ATOMIC);
-- 
cgit v1.2.3


From 16653c16d282e768763b2e8cc78f75df8fd53992 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 15 Mar 2021 16:51:49 -0500
Subject: net: qualcomm: rmnet: use masks instead of C bit-fields

The actual layout of bits defined in C bit-fields (e.g. int foo : 3)
is implementation-defined.  Structures defined in <linux/if_rmnet.h>
address this by specifying all bit-fields twice, to cover two
possible layouts.

I think this pattern is repetitive and noisy, and I find the whole
notion of compiler "bitfield endianness" to be non-intuitive.

Stop using C bit-fields for the command/data flag and the pad length
fields in the rmnet_map structure, and define a single-byte flags
field instead.  Define a mask for the single-bit "command" flag,
and another mask for the encoded pad length.  The content of both
fields can be accessed using a simple bitwise AND operation.

Signed-off-by: Alex Elder <elder@linaro.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/qualcomm/rmnet/rmnet_handlers.c   |  4 ++--
 .../net/ethernet/qualcomm/rmnet/rmnet_map_data.c   |  4 +++-
 include/linux/if_rmnet.h                           | 23 ++++++++++------------
 3 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
index 2a6b2a609884..0be5ac7ab261 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -61,7 +61,7 @@ __rmnet_map_ingress_handler(struct sk_buff *skb,
 	u16 len, pad;
 	u8 mux_id;
 
-	if (map_header->cd_bit) {
+	if (map_header->flags & MAP_CMD_FLAG) {
 		/* Packet contains a MAP command (not data) */
 		if (port->data_format & RMNET_FLAGS_INGRESS_MAP_COMMANDS)
 			return rmnet_map_command(skb, port);
@@ -70,7 +70,7 @@ __rmnet_map_ingress_handler(struct sk_buff *skb,
 	}
 
 	mux_id = map_header->mux_id;
-	pad = map_header->pad_len;
+	pad = map_header->flags & MAP_PAD_LEN_MASK;
 	len = ntohs(map_header->pkt_len) - pad;
 
 	if (mux_id >= RMNET_MAX_LOGICAL_EP)
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index 3af68368fc31..e7d0394cb297 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -280,6 +280,7 @@ struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff *skb,
 		return map_header;
 	}
 
+	BUILD_BUG_ON(MAP_PAD_LEN_MASK < 3);
 	padding = ALIGN(map_datalen, 4) - map_datalen;
 
 	if (padding == 0)
@@ -293,7 +294,8 @@ struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff *skb,
 
 done:
 	map_header->pkt_len = htons(map_datalen + padding);
-	map_header->pad_len = padding & 0x3F;
+	/* This is a data packet, so the CMD bit is 0 */
+	map_header->flags = padding & MAP_PAD_LEN_MASK;
 
 	return map_header;
 }
diff --git a/include/linux/if_rmnet.h b/include/linux/if_rmnet.h
index 8c7845baf383..a02f0a3df1d9 100644
--- a/include/linux/if_rmnet.h
+++ b/include/linux/if_rmnet.h
@@ -6,21 +6,18 @@
 #define _LINUX_IF_RMNET_H_
 
 struct rmnet_map_header {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-	u8  pad_len:6;
-	u8  reserved_bit:1;
-	u8  cd_bit:1;
-#elif defined (__BIG_ENDIAN_BITFIELD)
-	u8  cd_bit:1;
-	u8  reserved_bit:1;
-	u8  pad_len:6;
-#else
-#error	"Please fix <asm/byteorder.h>"
-#endif
-	u8  mux_id;
-	__be16 pkt_len;
+	u8 flags;			/* MAP_CMD_FLAG, MAP_PAD_LEN_MASK */
+	u8 mux_id;
+	__be16 pkt_len;			/* Length of packet, including pad */
 }  __aligned(1);
 
+/* rmnet_map_header flags field:
+ *  PAD_LEN:	number of pad bytes following packet data
+ *  CMD:	1 = packet contains a MAP command; 0 = packet contains data
+ */
+#define MAP_PAD_LEN_MASK		GENMASK(5, 0)
+#define MAP_CMD_FLAG			BIT(7)
+
 struct rmnet_map_dl_csum_trailer {
 	u8  reserved1;
 #if defined(__LITTLE_ENDIAN_BITFIELD)
-- 
cgit v1.2.3


From cc1b21ba6251c8dd8e4e86018c9fdba85df0d219 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 15 Mar 2021 16:51:50 -0500
Subject: net: qualcomm: rmnet: don't use C bit-fields in rmnet checksum
 trailer

Replace the use of C bit-fields in the rmnet_map_dl_csum_trailer
structure with a single one-byte field, using constant field masks
to encode or get at embedded values.

Signed-off-by: Alex Elder <elder@linaro.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c |  2 +-
 include/linux/if_rmnet.h                             | 17 +++++++----------
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index e7d0394cb297..c336c17e01fe 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -359,7 +359,7 @@ int rmnet_map_checksum_downlink_packet(struct sk_buff *skb, u16 len)
 
 	csum_trailer = (struct rmnet_map_dl_csum_trailer *)(skb->data + len);
 
-	if (!csum_trailer->valid) {
+	if (!(csum_trailer->flags & MAP_CSUM_DL_VALID_FLAG)) {
 		priv->stats.csum_valid_unset++;
 		return -EINVAL;
 	}
diff --git a/include/linux/if_rmnet.h b/include/linux/if_rmnet.h
index a02f0a3df1d9..941997df9e08 100644
--- a/include/linux/if_rmnet.h
+++ b/include/linux/if_rmnet.h
@@ -19,21 +19,18 @@ struct rmnet_map_header {
 #define MAP_CMD_FLAG			BIT(7)
 
 struct rmnet_map_dl_csum_trailer {
-	u8  reserved1;
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-	u8  valid:1;
-	u8  reserved2:7;
-#elif defined (__BIG_ENDIAN_BITFIELD)
-	u8  reserved2:7;
-	u8  valid:1;
-#else
-#error	"Please fix <asm/byteorder.h>"
-#endif
+	u8 reserved1;
+	u8 flags;			/* MAP_CSUM_DL_VALID_FLAG */
 	__be16 csum_start_offset;
 	__be16 csum_length;
 	__be16 csum_value;
 } __aligned(1);
 
+/* rmnet_map_dl_csum_trailer flags field:
+ *  VALID:	1 = checksum and length valid; 0 = ignore them
+ */
+#define MAP_CSUM_DL_VALID_FLAG		BIT(0)
+
 struct rmnet_map_ul_csum_header {
 	__be16 csum_start_offset;
 #if defined(__LITTLE_ENDIAN_BITFIELD)
-- 
cgit v1.2.3


From 86ca860e12ec0feab7d721d3b05e60fb86613540 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 15 Mar 2021 16:51:51 -0500
Subject: net: qualcomm: rmnet: don't use C bit-fields in rmnet checksum header

Replace the use of C bit-fields in the rmnet_map_ul_csum_header
structure with a single two-byte (big endian) structure member,
and use masks to encode or get values within it.  The content of
these fields can be accessed using simple bitwise AND and OR
operations on the (host byte order) value of the new structure
member.

Previously rmnet_map_ipv4_ul_csum_header() would update C bit-field
values in host byte order, then forcibly fix their byte order using
a combination of byte swap operations and types.

Instead, just compute the value that needs to go into the new
structure member and save it with a simple byte-order conversion.

Make similar simplifications in rmnet_map_ipv6_ul_csum_header().

Finally, in rmnet_map_checksum_uplink_packet() a set of assignments
zeroes every field in the upload checksum header.  Replace that with
a single memset() operation.

Signed-off-by: Alex Elder <elder@linaro.org>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/qualcomm/rmnet/rmnet_map_data.c   | 38 ++++++++--------------
 include/linux/if_rmnet.h                           | 21 ++++++------
 2 files changed, 23 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index c336c17e01fe..0ac2ff828320 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -197,20 +197,16 @@ rmnet_map_ipv4_ul_csum_header(void *iphdr,
 			      struct rmnet_map_ul_csum_header *ul_header,
 			      struct sk_buff *skb)
 {
-	__be16 *hdr = (__be16 *)ul_header;
 	struct iphdr *ip4h = iphdr;
+	u16 val;
 
-	ul_header->csum_start_offset = htons(skb_network_header_len(skb));
-	ul_header->csum_insert_offset = skb->csum_offset;
-	ul_header->csum_enabled = 1;
+	val = MAP_CSUM_UL_ENABLED_FLAG;
 	if (ip4h->protocol == IPPROTO_UDP)
-		ul_header->udp_ind = 1;
-	else
-		ul_header->udp_ind = 0;
+		val |= MAP_CSUM_UL_UDP_FLAG;
+	val |= skb->csum_offset & MAP_CSUM_UL_OFFSET_MASK;
 
-	/* Changing remaining fields to network order */
-	hdr++;
-	*hdr = htons((__force u16)*hdr);
+	ul_header->csum_start_offset = htons(skb_network_header_len(skb));
+	ul_header->csum_info = htons(val);
 
 	skb->ip_summed = CHECKSUM_NONE;
 
@@ -237,21 +233,16 @@ rmnet_map_ipv6_ul_csum_header(void *ip6hdr,
 			      struct rmnet_map_ul_csum_header *ul_header,
 			      struct sk_buff *skb)
 {
-	__be16 *hdr = (__be16 *)ul_header;
 	struct ipv6hdr *ip6h = ip6hdr;
+	u16 val;
 
-	ul_header->csum_start_offset = htons(skb_network_header_len(skb));
-	ul_header->csum_insert_offset = skb->csum_offset;
-	ul_header->csum_enabled = 1;
-
+	val = MAP_CSUM_UL_ENABLED_FLAG;
 	if (ip6h->nexthdr == IPPROTO_UDP)
-		ul_header->udp_ind = 1;
-	else
-		ul_header->udp_ind = 0;
+		val |= MAP_CSUM_UL_UDP_FLAG;
+	val |= skb->csum_offset & MAP_CSUM_UL_OFFSET_MASK;
 
-	/* Changing remaining fields to network order */
-	hdr++;
-	*hdr = htons((__force u16)*hdr);
+	ul_header->csum_start_offset = htons(skb_network_header_len(skb));
+	ul_header->csum_info = htons(val);
 
 	skb->ip_summed = CHECKSUM_NONE;
 
@@ -419,10 +410,7 @@ void rmnet_map_checksum_uplink_packet(struct sk_buff *skb,
 	}
 
 sw_csum:
-	ul_header->csum_start_offset = 0;
-	ul_header->csum_insert_offset = 0;
-	ul_header->csum_enabled = 0;
-	ul_header->udp_ind = 0;
+	memset(ul_header, 0, sizeof(*ul_header));
 
 	priv->stats.csum_sw++;
 }
diff --git a/include/linux/if_rmnet.h b/include/linux/if_rmnet.h
index 941997df9e08..4efb537f57f3 100644
--- a/include/linux/if_rmnet.h
+++ b/include/linux/if_rmnet.h
@@ -33,17 +33,16 @@ struct rmnet_map_dl_csum_trailer {
 
 struct rmnet_map_ul_csum_header {
 	__be16 csum_start_offset;
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-	u16 csum_insert_offset:14;
-	u16 udp_ind:1;
-	u16 csum_enabled:1;
-#elif defined (__BIG_ENDIAN_BITFIELD)
-	u16 csum_enabled:1;
-	u16 udp_ind:1;
-	u16 csum_insert_offset:14;
-#else
-#error	"Please fix <asm/byteorder.h>"
-#endif
+	__be16 csum_info;		/* MAP_CSUM_UL_* */
 } __aligned(1);
 
+/* csum_info field:
+ *  OFFSET:	where (offset in bytes) to insert computed checksum
+ *  UDP:	1 = UDP checksum (zero checkum means no checksum)
+ *  ENABLED:	1 = checksum computation requested
+ */
+#define MAP_CSUM_UL_OFFSET_MASK		GENMASK(13, 0)
+#define MAP_CSUM_UL_UDP_FLAG		BIT(14)
+#define MAP_CSUM_UL_ENABLED_FLAG	BIT(15)
+
 #endif /* !(_LINUX_IF_RMNET_H_) */
-- 
cgit v1.2.3


From 5b039241fe3a31b67fbffb07082c6d9d972204f8 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Mon, 15 Mar 2021 19:31:33 -0700
Subject: ionic: simplify TSO descriptor mapping

One issue with the original TSO code was that it was working too
hard to deal with skb layouts that were never going to show up,
such as an skb->data that was longer than a single descriptor's
length.  The other issue was trying to arrange the fragment dma
mapping at the same time as figuring out the descriptors needed.
There was just too much going on at the same time.

Now we do the dma mapping first, which sets up the buffers with
skb->data in buf[0] and the remaining frags in buf[1..n-1].
Next we spread the bufs across the descriptors needed, where
each descriptor gets up to mss number of bytes.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_dev.h  |   5 +-
 drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 237 +++++++++++------------
 2 files changed, 117 insertions(+), 125 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index 339824cfd618..d0c969a6d43e 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -179,8 +179,11 @@ struct ionic_buf_info {
 	struct page *page;
 	dma_addr_t dma_addr;
 	u32 page_offset;
+	u32 len;
 };
 
+#define IONIC_MAX_FRAGS			(1 + IONIC_TX_MAX_SG_ELEMS_V1)
+
 struct ionic_desc_info {
 	union {
 		void *desc;
@@ -194,7 +197,7 @@ struct ionic_desc_info {
 		struct ionic_rxq_sg_desc *rxq_sgl_desc;
 	};
 	unsigned int nbufs;
-	struct ionic_buf_info bufs[IONIC_RX_MAX_SG_ELEMS + 1];
+	struct ionic_buf_info bufs[IONIC_MAX_FRAGS];
 	ionic_desc_cb cb;
 	void *cb_arg;
 };
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index c63e6e7aa47b..639000a2e495 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -605,12 +605,51 @@ static dma_addr_t ionic_tx_map_frag(struct ionic_queue *q,
 	return dma_addr;
 }
 
+static int ionic_tx_map_tso(struct ionic_queue *q, struct sk_buff *skb,
+			    struct ionic_buf_info *buf_info)
+{
+	struct device *dev = q->dev;
+	dma_addr_t dma_addr;
+	skb_frag_t *frag;
+	int frag_idx;
+
+	dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb));
+	if (dma_mapping_error(dev, dma_addr))
+		return -EIO;
+	buf_info->dma_addr = dma_addr;
+	buf_info->len = skb_headlen(skb);
+	buf_info++;
+
+	for (frag_idx = 0; frag_idx < skb_shinfo(skb)->nr_frags; frag_idx++, buf_info++) {
+		frag = &skb_shinfo(skb)->frags[frag_idx];
+		dma_addr = ionic_tx_map_frag(q, frag, 0, skb_frag_size(frag));
+		if (dma_mapping_error(dev, dma_addr))
+			goto dma_fail;
+		buf_info->dma_addr = dma_addr;
+		buf_info->len = skb_frag_size(frag);
+	}
+
+	return 0;
+
+dma_fail:
+	/* unwind the frag mappings and the head mapping */
+	while (frag_idx > 0) {
+		frag_idx--;
+		buf_info--;
+		dma_unmap_page(dev, buf_info->dma_addr,
+			       buf_info->len, DMA_TO_DEVICE);
+	}
+	dma_unmap_single(dev, buf_info->dma_addr, buf_info->len, DMA_TO_DEVICE);
+	return -EIO;
+}
+
 static void ionic_tx_clean(struct ionic_queue *q,
 			   struct ionic_desc_info *desc_info,
 			   struct ionic_cq_info *cq_info,
 			   void *cb_arg)
 {
 	struct ionic_txq_sg_desc *sg_desc = desc_info->sg_desc;
+	struct ionic_buf_info *buf_info = desc_info->bufs;
 	struct ionic_txq_sg_elem *elem = sg_desc->elems;
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
 	struct ionic_txq_desc *desc = desc_info->desc;
@@ -623,20 +662,22 @@ static void ionic_tx_clean(struct ionic_queue *q,
 	decode_txq_desc_cmd(le64_to_cpu(desc->cmd),
 			    &opcode, &flags, &nsge, &addr);
 
-	/* use unmap_single only if either this is not TSO,
-	 * or this is first descriptor of a TSO
-	 */
-	if (opcode != IONIC_TXQ_DESC_OPCODE_TSO ||
-	    flags & IONIC_TXQ_DESC_FLAG_TSO_SOT)
+	if (opcode != IONIC_TXQ_DESC_OPCODE_TSO) {
 		dma_unmap_single(dev, (dma_addr_t)addr,
 				 le16_to_cpu(desc->len), DMA_TO_DEVICE);
-	else
-		dma_unmap_page(dev, (dma_addr_t)addr,
-			       le16_to_cpu(desc->len), DMA_TO_DEVICE);
-
-	for (i = 0; i < nsge; i++, elem++)
-		dma_unmap_page(dev, (dma_addr_t)le64_to_cpu(elem->addr),
-			       le16_to_cpu(elem->len), DMA_TO_DEVICE);
+		for (i = 0; i < nsge; i++, elem++)
+			dma_unmap_page(dev, (dma_addr_t)le64_to_cpu(elem->addr),
+				       le16_to_cpu(elem->len), DMA_TO_DEVICE);
+	} else {
+		if (flags & IONIC_TXQ_DESC_FLAG_TSO_EOT) {
+			dma_unmap_single(dev, (dma_addr_t)buf_info->dma_addr,
+					 buf_info->len, DMA_TO_DEVICE);
+			buf_info++;
+			for (i = 1; i < desc_info->nbufs; i++, buf_info++)
+				dma_unmap_page(dev, (dma_addr_t)buf_info->dma_addr,
+					       buf_info->len, DMA_TO_DEVICE);
+		}
+	}
 
 	if (cb_arg) {
 		struct sk_buff *skb = cb_arg;
@@ -794,29 +835,23 @@ static struct ionic_txq_desc *ionic_tx_tso_next(struct ionic_queue *q,
 
 static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
 {
+	struct ionic_buf_info buf_info[IONIC_MAX_FRAGS] = {{0}};
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
-	struct ionic_desc_info *rewind_desc_info;
 	struct ionic_txq_sg_elem *elem;
-	struct device *dev = q->dev;
 	struct ionic_txq_desc *desc;
-	unsigned int frag_left = 0;
-	unsigned int offset = 0;
-	u16 abort = q->head_idx;
-	unsigned int len_left;
+	unsigned int chunk_len;
+	unsigned int frag_rem;
+	unsigned int frag_idx;
+	unsigned int tso_rem;
+	unsigned int seg_rem;
 	dma_addr_t desc_addr;
+	dma_addr_t frag_addr;
 	unsigned int hdrlen;
 	unsigned int nfrags;
-	unsigned int seglen;
-	u64 total_bytes = 0;
-	u64 total_pkts = 0;
-	u16 rewind = abort;
-	unsigned int left;
 	unsigned int len;
 	unsigned int mss;
-	skb_frag_t *frag;
 	bool start, done;
 	bool outer_csum;
-	dma_addr_t addr;
 	bool has_vlan;
 	u16 desc_len;
 	u8 desc_nsge;
@@ -824,9 +859,12 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
 	bool encap;
 	int err;
 
+	if (unlikely(ionic_tx_map_tso(q, skb, buf_info)))
+		return -EIO;
+
+	len = skb->len;
 	mss = skb_shinfo(skb)->gso_size;
 	nfrags = skb_shinfo(skb)->nr_frags;
-	len_left = skb->len - skb_headlen(skb);
 	outer_csum = (skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM) ||
 		     (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
 	has_vlan = !!skb_vlan_tag_present(skb);
@@ -851,117 +889,68 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
 	else
 		hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
 
-	seglen = hdrlen + mss;
-	left = skb_headlen(skb);
+	tso_rem = len;
+	seg_rem = min(tso_rem, hdrlen + mss);
 
-	desc = ionic_tx_tso_next(q, &elem);
-	start = true;
+	frag_idx = 0;
+	frag_addr = 0;
+	frag_rem = 0;
 
-	/* Chop skb->data up into desc segments */
+	start = true;
 
-	while (left > 0) {
-		len = min(seglen, left);
-		frag_left = seglen - len;
-		desc_addr = ionic_tx_map_single(q, skb->data + offset, len);
-		if (dma_mapping_error(dev, desc_addr))
-			goto err_out_abort;
-		desc_len = len;
+	while (tso_rem > 0) {
+		desc = NULL;
+		elem = NULL;
+		desc_addr = 0;
+		desc_len = 0;
 		desc_nsge = 0;
-		left -= len;
-		offset += len;
-		if (nfrags > 0 && frag_left > 0)
-			continue;
-		done = (nfrags == 0 && left == 0);
-		ionic_tx_tso_post(q, desc, skb,
-				  desc_addr, desc_nsge, desc_len,
-				  hdrlen, mss,
-				  outer_csum,
-				  vlan_tci, has_vlan,
-				  start, done);
-		total_pkts++;
-		total_bytes += start ? len : len + hdrlen;
-		desc = ionic_tx_tso_next(q, &elem);
-		start = false;
-		seglen = mss;
-	}
-
-	/* Chop skb frags into desc segments */
-
-	for (frag = skb_shinfo(skb)->frags; len_left; frag++) {
-		offset = 0;
-		left = skb_frag_size(frag);
-		len_left -= left;
-		nfrags--;
-		stats->frags++;
-
-		while (left > 0) {
-			if (frag_left > 0) {
-				len = min(frag_left, left);
-				frag_left -= len;
-				addr = ionic_tx_map_frag(q, frag, offset, len);
-				if (dma_mapping_error(dev, addr))
-					goto err_out_abort;
-				elem->addr = cpu_to_le64(addr);
-				elem->len = cpu_to_le16(len);
-				elem++;
-				desc_nsge++;
-				left -= len;
-				offset += len;
-				if (nfrags > 0 && frag_left > 0)
-					continue;
-				done = (nfrags == 0 && left == 0);
-				ionic_tx_tso_post(q, desc, skb, desc_addr,
-						  desc_nsge, desc_len,
-						  hdrlen, mss, outer_csum,
-						  vlan_tci, has_vlan,
-						  start, done);
-				total_pkts++;
-				total_bytes += start ? len : len + hdrlen;
+		/* loop until a full tcp segment can be created */
+		while (seg_rem > 0) {
+			/* if the fragment is exhausted get the next one */
+			if (frag_rem == 0) {
+				/* grab the next fragment */
+				frag_addr = buf_info[frag_idx].dma_addr;
+				frag_rem = buf_info[frag_idx].len;
+				frag_idx++;
+			}
+			chunk_len = min(frag_rem, seg_rem);
+			if (!desc) {
+				/* fill main descriptor */
 				desc = ionic_tx_tso_next(q, &elem);
-				start = false;
+				desc_addr = frag_addr;
+				desc_len = chunk_len;
 			} else {
-				len = min(mss, left);
-				frag_left = mss - len;
-				desc_addr = ionic_tx_map_frag(q, frag,
-							      offset, len);
-				if (dma_mapping_error(dev, desc_addr))
-					goto err_out_abort;
-				desc_len = len;
-				desc_nsge = 0;
-				left -= len;
-				offset += len;
-				if (nfrags > 0 && frag_left > 0)
-					continue;
-				done = (nfrags == 0 && left == 0);
-				ionic_tx_tso_post(q, desc, skb, desc_addr,
-						  desc_nsge, desc_len,
-						  hdrlen, mss, outer_csum,
-						  vlan_tci, has_vlan,
-						  start, done);
-				total_pkts++;
-				total_bytes += start ? len : len + hdrlen;
-				desc = ionic_tx_tso_next(q, &elem);
-				start = false;
+				/* fill sg descriptor */
+				elem->addr = cpu_to_le64(frag_addr);
+				elem->len = cpu_to_le16(chunk_len);
+				elem++;
+				desc_nsge++;
 			}
+			frag_addr += chunk_len;
+			frag_rem -= chunk_len;
+			tso_rem -= chunk_len;
+			seg_rem -= chunk_len;
+		}
+		seg_rem = min(tso_rem, mss);
+		done = (tso_rem == 0);
+		if (done) {
+			memcpy(&q->info[q->head_idx].bufs, buf_info, sizeof(buf_info));
+			q->info[q->head_idx].nbufs = nfrags + 1;
 		}
+		/* post descriptor */
+		ionic_tx_tso_post(q, desc, skb,
+				  desc_addr, desc_nsge, desc_len,
+				  hdrlen, mss, outer_csum, vlan_tci, has_vlan,
+				  start, done);
+		start = false;
 	}
 
-	stats->pkts += total_pkts;
-	stats->bytes += total_bytes;
+	stats->pkts += DIV_ROUND_UP(len - hdrlen, mss);
+	stats->bytes += len;
 	stats->tso++;
-	stats->tso_bytes += total_bytes;
+	stats->tso_bytes = len;
 
 	return 0;
-
-err_out_abort:
-	while (rewind != q->head_idx) {
-		rewind_desc_info = &q->info[rewind];
-		ionic_tx_clean(q, rewind_desc_info, NULL, NULL);
-		rewind = (rewind + 1) & (q->num_descs - 1);
-	}
-	q->head_idx = abort;
-
-	return -ENOMEM;
 }
 
 static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb)
-- 
cgit v1.2.3


From 2da479ca0814c604454616ad1de813ab662e23cd Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Mon, 15 Mar 2021 19:31:34 -0700
Subject: ionic: generic tx skb mapping

Make the new ionic_tx_map_tso() usable by the non-TSO paths,
and pull the call up a level into ionic_tx() before calling
the csum or no-csum routines.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 142 +++++++++++------------
 1 file changed, 68 insertions(+), 74 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 639000a2e495..1d27d6cad504 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -605,11 +605,13 @@ static dma_addr_t ionic_tx_map_frag(struct ionic_queue *q,
 	return dma_addr;
 }
 
-static int ionic_tx_map_tso(struct ionic_queue *q, struct sk_buff *skb,
-			    struct ionic_buf_info *buf_info)
+static int ionic_tx_map_skb(struct ionic_queue *q, struct sk_buff *skb,
+			    struct ionic_desc_info *desc_info)
 {
+	struct ionic_buf_info *buf_info = desc_info->bufs;
 	struct device *dev = q->dev;
 	dma_addr_t dma_addr;
+	unsigned int nfrags;
 	skb_frag_t *frag;
 	int frag_idx;
 
@@ -620,15 +622,19 @@ static int ionic_tx_map_tso(struct ionic_queue *q, struct sk_buff *skb,
 	buf_info->len = skb_headlen(skb);
 	buf_info++;
 
-	for (frag_idx = 0; frag_idx < skb_shinfo(skb)->nr_frags; frag_idx++, buf_info++) {
-		frag = &skb_shinfo(skb)->frags[frag_idx];
+	frag = skb_shinfo(skb)->frags;
+	nfrags = skb_shinfo(skb)->nr_frags;
+	for (frag_idx = 0; frag_idx < nfrags; frag_idx++, frag++) {
 		dma_addr = ionic_tx_map_frag(q, frag, 0, skb_frag_size(frag));
 		if (dma_mapping_error(dev, dma_addr))
 			goto dma_fail;
 		buf_info->dma_addr = dma_addr;
 		buf_info->len = skb_frag_size(frag);
+		buf_info++;
 	}
 
+	desc_info->nbufs = 1 + nfrags;
+
 	return 0;
 
 dma_fail:
@@ -814,40 +820,29 @@ static void ionic_tx_tso_post(struct ionic_queue *q, struct ionic_txq_desc *desc
 	desc->hdr_len = cpu_to_le16(hdrlen);
 	desc->mss = cpu_to_le16(mss);
 
-	if (done) {
+	if (start) {
 		skb_tx_timestamp(skb);
 		netdev_tx_sent_queue(q_to_ndq(q), skb->len);
-		ionic_txq_post(q, !netdev_xmit_more(), ionic_tx_clean, skb);
+		ionic_txq_post(q, false, ionic_tx_clean, skb);
 	} else {
-		ionic_txq_post(q, false, ionic_tx_clean, NULL);
+		ionic_txq_post(q, done, NULL, NULL);
 	}
 }
 
-static struct ionic_txq_desc *ionic_tx_tso_next(struct ionic_queue *q,
-						struct ionic_txq_sg_elem **elem)
-{
-	struct ionic_txq_sg_desc *sg_desc = q->info[q->head_idx].txq_sg_desc;
-	struct ionic_txq_desc *desc = q->info[q->head_idx].txq_desc;
-
-	*elem = sg_desc->elems;
-	return desc;
-}
-
 static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
 {
-	struct ionic_buf_info buf_info[IONIC_MAX_FRAGS] = {{0}};
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
+	struct ionic_desc_info *desc_info;
+	struct ionic_buf_info *buf_info;
 	struct ionic_txq_sg_elem *elem;
 	struct ionic_txq_desc *desc;
 	unsigned int chunk_len;
 	unsigned int frag_rem;
-	unsigned int frag_idx;
 	unsigned int tso_rem;
 	unsigned int seg_rem;
 	dma_addr_t desc_addr;
 	dma_addr_t frag_addr;
 	unsigned int hdrlen;
-	unsigned int nfrags;
 	unsigned int len;
 	unsigned int mss;
 	bool start, done;
@@ -859,12 +854,14 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
 	bool encap;
 	int err;
 
-	if (unlikely(ionic_tx_map_tso(q, skb, buf_info)))
+	desc_info = &q->info[q->head_idx];
+	buf_info = desc_info->bufs;
+
+	if (unlikely(ionic_tx_map_skb(q, skb, desc_info)))
 		return -EIO;
 
 	len = skb->len;
 	mss = skb_shinfo(skb)->gso_size;
-	nfrags = skb_shinfo(skb)->nr_frags;
 	outer_csum = (skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM) ||
 		     (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
 	has_vlan = !!skb_vlan_tag_present(skb);
@@ -892,7 +889,6 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
 	tso_rem = len;
 	seg_rem = min(tso_rem, hdrlen + mss);
 
-	frag_idx = 0;
 	frag_addr = 0;
 	frag_rem = 0;
 
@@ -904,19 +900,20 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
 		desc_addr = 0;
 		desc_len = 0;
 		desc_nsge = 0;
-		/* loop until a full tcp segment can be created */
+		/* use fragments until we have enough to post a single descriptor */
 		while (seg_rem > 0) {
-			/* if the fragment is exhausted get the next one */
+			/* if the fragment is exhausted then move to the next one */
 			if (frag_rem == 0) {
 				/* grab the next fragment */
-				frag_addr = buf_info[frag_idx].dma_addr;
-				frag_rem = buf_info[frag_idx].len;
-				frag_idx++;
+				frag_addr = buf_info->dma_addr;
+				frag_rem = buf_info->len;
+				buf_info++;
 			}
 			chunk_len = min(frag_rem, seg_rem);
 			if (!desc) {
 				/* fill main descriptor */
-				desc = ionic_tx_tso_next(q, &elem);
+				desc = desc_info->txq_desc;
+				elem = desc_info->txq_sg_desc->elems;
 				desc_addr = frag_addr;
 				desc_len = chunk_len;
 			} else {
@@ -933,16 +930,15 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
 		}
 		seg_rem = min(tso_rem, mss);
 		done = (tso_rem == 0);
-		if (done) {
-			memcpy(&q->info[q->head_idx].bufs, buf_info, sizeof(buf_info));
-			q->info[q->head_idx].nbufs = nfrags + 1;
-		}
 		/* post descriptor */
 		ionic_tx_tso_post(q, desc, skb,
 				  desc_addr, desc_nsge, desc_len,
 				  hdrlen, mss, outer_csum, vlan_tci, has_vlan,
 				  start, done);
 		start = false;
+		/* Buffer information is stored with the first tso descriptor */
+		desc_info = &q->info[q->head_idx];
+		desc_info->nbufs = 0;
 	}
 
 	stats->pkts += DIV_ROUND_UP(len - hdrlen, mss);
@@ -953,12 +949,12 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
 	return 0;
 }
 
-static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb)
+static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb,
+			      struct ionic_desc_info *desc_info)
 {
-	struct ionic_txq_desc *desc = q->info[q->head_idx].txq_desc;
+	struct ionic_txq_desc *desc = desc_info->txq_desc;
+	struct ionic_buf_info *buf_info = desc_info->bufs;
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
-	struct device *dev = q->dev;
-	dma_addr_t dma_addr;
 	bool has_vlan;
 	u8 flags = 0;
 	bool encap;
@@ -967,23 +963,22 @@ static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb)
 	has_vlan = !!skb_vlan_tag_present(skb);
 	encap = skb->encapsulation;
 
-	dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb));
-	if (dma_mapping_error(dev, dma_addr))
-		return -ENOMEM;
-
 	flags |= has_vlan ? IONIC_TXQ_DESC_FLAG_VLAN : 0;
 	flags |= encap ? IONIC_TXQ_DESC_FLAG_ENCAP : 0;
 
 	cmd = encode_txq_desc_cmd(IONIC_TXQ_DESC_OPCODE_CSUM_PARTIAL,
-				  flags, skb_shinfo(skb)->nr_frags, dma_addr);
+				  flags, skb_shinfo(skb)->nr_frags,
+				  buf_info->dma_addr);
 	desc->cmd = cpu_to_le64(cmd);
-	desc->len = cpu_to_le16(skb_headlen(skb));
-	desc->csum_start = cpu_to_le16(skb_checksum_start_offset(skb));
-	desc->csum_offset = cpu_to_le16(skb->csum_offset);
+	desc->len = cpu_to_le16(buf_info->len);
 	if (has_vlan) {
 		desc->vlan_tci = cpu_to_le16(skb_vlan_tag_get(skb));
 		stats->vlan_inserted++;
+	} else {
+		desc->vlan_tci = 0;
 	}
+	desc->csum_start = cpu_to_le16(skb_checksum_start_offset(skb));
+	desc->csum_offset = cpu_to_le16(skb->csum_offset);
 
 	if (skb_csum_is_sctp(skb))
 		stats->crc32_csum++;
@@ -993,12 +988,12 @@ static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb)
 	return 0;
 }
 
-static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb)
+static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb,
+				 struct ionic_desc_info *desc_info)
 {
-	struct ionic_txq_desc *desc = q->info[q->head_idx].txq_desc;
+	struct ionic_txq_desc *desc = desc_info->txq_desc;
+	struct ionic_buf_info *buf_info = desc_info->bufs;
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
-	struct device *dev = q->dev;
-	dma_addr_t dma_addr;
 	bool has_vlan;
 	u8 flags = 0;
 	bool encap;
@@ -1007,67 +1002,66 @@ static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb)
 	has_vlan = !!skb_vlan_tag_present(skb);
 	encap = skb->encapsulation;
 
-	dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb));
-	if (dma_mapping_error(dev, dma_addr))
-		return -ENOMEM;
-
 	flags |= has_vlan ? IONIC_TXQ_DESC_FLAG_VLAN : 0;
 	flags |= encap ? IONIC_TXQ_DESC_FLAG_ENCAP : 0;
 
 	cmd = encode_txq_desc_cmd(IONIC_TXQ_DESC_OPCODE_CSUM_NONE,
-				  flags, skb_shinfo(skb)->nr_frags, dma_addr);
+				  flags, skb_shinfo(skb)->nr_frags,
+				  buf_info->dma_addr);
 	desc->cmd = cpu_to_le64(cmd);
-	desc->len = cpu_to_le16(skb_headlen(skb));
+	desc->len = cpu_to_le16(buf_info->len);
 	if (has_vlan) {
 		desc->vlan_tci = cpu_to_le16(skb_vlan_tag_get(skb));
 		stats->vlan_inserted++;
+	} else {
+		desc->vlan_tci = 0;
 	}
+	desc->csum_start = 0;
+	desc->csum_offset = 0;
 
 	stats->csum_none++;
 
 	return 0;
 }
 
-static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb)
+static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb,
+			      struct ionic_desc_info *desc_info)
 {
-	struct ionic_txq_sg_desc *sg_desc = q->info[q->head_idx].txq_sg_desc;
-	unsigned int len_left = skb->len - skb_headlen(skb);
+	struct ionic_txq_sg_desc *sg_desc = desc_info->txq_sg_desc;
+	struct ionic_buf_info *buf_info = &desc_info->bufs[1];
 	struct ionic_txq_sg_elem *elem = sg_desc->elems;
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
-	struct device *dev = q->dev;
-	dma_addr_t dma_addr;
-	skb_frag_t *frag;
-	u16 len;
+	unsigned int i;
 
-	for (frag = skb_shinfo(skb)->frags; len_left; frag++, elem++) {
-		len = skb_frag_size(frag);
-		elem->len = cpu_to_le16(len);
-		dma_addr = ionic_tx_map_frag(q, frag, 0, len);
-		if (dma_mapping_error(dev, dma_addr))
-			return -ENOMEM;
-		elem->addr = cpu_to_le64(dma_addr);
-		len_left -= len;
-		stats->frags++;
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++, buf_info++, elem++) {
+		elem->addr = cpu_to_le64(buf_info->dma_addr);
+		elem->len = cpu_to_le16(buf_info->len);
 	}
 
+	stats->frags += skb_shinfo(skb)->nr_frags;
+
 	return 0;
 }
 
 static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb)
 {
+	struct ionic_desc_info *desc_info = &q->info[q->head_idx];
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
 	int err;
 
+	if (unlikely(ionic_tx_map_skb(q, skb, desc_info)))
+		return -EIO;
+
 	/* set up the initial descriptor */
 	if (skb->ip_summed == CHECKSUM_PARTIAL)
-		err = ionic_tx_calc_csum(q, skb);
+		err = ionic_tx_calc_csum(q, skb, desc_info);
 	else
-		err = ionic_tx_calc_no_csum(q, skb);
+		err = ionic_tx_calc_no_csum(q, skb, desc_info);
 	if (err)
 		return err;
 
 	/* add frags */
-	err = ionic_tx_skb_frags(q, skb);
+	err = ionic_tx_skb_frags(q, skb, desc_info);
 	if (err)
 		return err;
 
-- 
cgit v1.2.3


From 19fef72cb4ba4c3c25bf89f4e73fdcefb9fd7bd2 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Mon, 15 Mar 2021 19:31:35 -0700
Subject: ionic: simplify tx clean

The descriptor mappings are set up the same way whether
or not it is a TSO, so we don't need separate logic for
the two cases.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 30 ++++++------------------
 1 file changed, 7 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 1d27d6cad504..f841ccb5adfd 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -654,35 +654,19 @@ static void ionic_tx_clean(struct ionic_queue *q,
 			   struct ionic_cq_info *cq_info,
 			   void *cb_arg)
 {
-	struct ionic_txq_sg_desc *sg_desc = desc_info->sg_desc;
 	struct ionic_buf_info *buf_info = desc_info->bufs;
-	struct ionic_txq_sg_elem *elem = sg_desc->elems;
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
-	struct ionic_txq_desc *desc = desc_info->desc;
 	struct device *dev = q->dev;
-	u8 opcode, flags, nsge;
 	u16 queue_index;
 	unsigned int i;
-	u64 addr;
-
-	decode_txq_desc_cmd(le64_to_cpu(desc->cmd),
-			    &opcode, &flags, &nsge, &addr);
 
-	if (opcode != IONIC_TXQ_DESC_OPCODE_TSO) {
-		dma_unmap_single(dev, (dma_addr_t)addr,
-				 le16_to_cpu(desc->len), DMA_TO_DEVICE);
-		for (i = 0; i < nsge; i++, elem++)
-			dma_unmap_page(dev, (dma_addr_t)le64_to_cpu(elem->addr),
-				       le16_to_cpu(elem->len), DMA_TO_DEVICE);
-	} else {
-		if (flags & IONIC_TXQ_DESC_FLAG_TSO_EOT) {
-			dma_unmap_single(dev, (dma_addr_t)buf_info->dma_addr,
-					 buf_info->len, DMA_TO_DEVICE);
-			buf_info++;
-			for (i = 1; i < desc_info->nbufs; i++, buf_info++)
-				dma_unmap_page(dev, (dma_addr_t)buf_info->dma_addr,
-					       buf_info->len, DMA_TO_DEVICE);
-		}
+	if (desc_info->nbufs) {
+		dma_unmap_single(dev, (dma_addr_t)buf_info->dma_addr,
+				 buf_info->len, DMA_TO_DEVICE);
+		buf_info++;
+		for (i = 1; i < desc_info->nbufs; i++, buf_info++)
+			dma_unmap_page(dev, (dma_addr_t)buf_info->dma_addr,
+				       buf_info->len, DMA_TO_DEVICE);
 	}
 
 	if (cb_arg) {
-- 
cgit v1.2.3


From 633eddf120ac148c05db45fc8fd878af54f72eaa Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Mon, 15 Mar 2021 19:31:36 -0700
Subject: ionic: aggregate Tx byte counting calls

Gather the Tx packet and byte counts and call
netdev_tx_completed_queue() only once per clean cycle.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_dev.h  |  1 +
 drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 27 +++++++++++++++++++++---
 2 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index d0c969a6d43e..ca7e55455165 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -196,6 +196,7 @@ struct ionic_desc_info {
 		struct ionic_txq_sg_desc *txq_sg_desc;
 		struct ionic_rxq_sg_desc *rxq_sgl_desc;
 	};
+	unsigned int bytes;
 	unsigned int nbufs;
 	struct ionic_buf_info bufs[IONIC_MAX_FRAGS];
 	ionic_desc_cb cb;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index f841ccb5adfd..03e00a6c413a 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -671,7 +671,6 @@ static void ionic_tx_clean(struct ionic_queue *q,
 
 	if (cb_arg) {
 		struct sk_buff *skb = cb_arg;
-		u32 len = skb->len;
 
 		queue_index = skb_get_queue_mapping(skb);
 		if (unlikely(__netif_subqueue_stopped(q->lif->netdev,
@@ -679,9 +678,11 @@ static void ionic_tx_clean(struct ionic_queue *q,
 			netif_wake_subqueue(q->lif->netdev, queue_index);
 			q->wake++;
 		}
-		dev_kfree_skb_any(skb);
+
+		desc_info->bytes = skb->len;
 		stats->clean++;
-		netdev_tx_completed_queue(q_to_ndq(q), 1, len);
+
+		dev_consume_skb_any(skb);
 	}
 }
 
@@ -690,6 +691,8 @@ static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
 	struct ionic_txq_comp *comp = cq_info->txcq;
 	struct ionic_queue *q = cq->bound_q;
 	struct ionic_desc_info *desc_info;
+	int bytes = 0;
+	int pkts = 0;
 	u16 index;
 
 	if (!color_match(comp->color, cq->done_color))
@@ -700,13 +703,21 @@ static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
 	 */
 	do {
 		desc_info = &q->info[q->tail_idx];
+		desc_info->bytes = 0;
 		index = q->tail_idx;
 		q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
 		ionic_tx_clean(q, desc_info, cq_info, desc_info->cb_arg);
+		if (desc_info->cb_arg) {
+			pkts++;
+			bytes += desc_info->bytes;
+		}
 		desc_info->cb = NULL;
 		desc_info->cb_arg = NULL;
 	} while (index != le16_to_cpu(comp->comp_index));
 
+	if (pkts && bytes)
+		netdev_tx_completed_queue(q_to_ndq(q), pkts, bytes);
+
 	return true;
 }
 
@@ -725,15 +736,25 @@ void ionic_tx_flush(struct ionic_cq *cq)
 void ionic_tx_empty(struct ionic_queue *q)
 {
 	struct ionic_desc_info *desc_info;
+	int bytes = 0;
+	int pkts = 0;
 
 	/* walk the not completed tx entries, if any */
 	while (q->head_idx != q->tail_idx) {
 		desc_info = &q->info[q->tail_idx];
+		desc_info->bytes = 0;
 		q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
 		ionic_tx_clean(q, desc_info, NULL, desc_info->cb_arg);
+		if (desc_info->cb_arg) {
+			pkts++;
+			bytes += desc_info->bytes;
+		}
 		desc_info->cb = NULL;
 		desc_info->cb_arg = NULL;
 	}
+
+	if (pkts && bytes)
+		netdev_tx_completed_queue(q_to_ndq(q), pkts, bytes);
 }
 
 static int ionic_tx_tcp_inner_pseudo_csum(struct sk_buff *skb)
-- 
cgit v1.2.3


From 0205e9de42911404902728911b03fc1469242419 Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@gmail.com>
Date: Sun, 14 Mar 2021 18:38:38 +0100
Subject: libbpf: Avoid inline hint definition from 'linux/stddef.h'

Linux headers might pull 'linux/stddef.h' which defines
'__always_inline' as the following:

   #ifndef __always_inline
   #define __always_inline inline
   #endif

This becomes an issue if the program picks up the 'linux/stddef.h'
definition as the macro now just hints inline to clang.

This change now enforces the proper definition for BPF programs
regardless of the include order.

Signed-off-by: Pedro Tammela <pctammela@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210314173839.457768-1-pctammela@gmail.com
---
 tools/lib/bpf/bpf_helpers.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index ae6c975e0b87..53ff81c49dbd 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -29,9 +29,10 @@
  */
 #define SEC(NAME) __attribute__((section(NAME), used))
 
-#ifndef __always_inline
+/* Avoid 'linux/stddef.h' definition of '__always_inline'. */
+#undef __always_inline
 #define __always_inline inline __attribute__((always_inline))
-#endif
+
 #ifndef __noinline
 #define __noinline __attribute__((noinline))
 #endif
-- 
cgit v1.2.3


From d94436a5d1a0ac8bcad29eb6735384940ae15603 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Mon, 15 Mar 2021 21:44:54 +0900
Subject: samples: bpf: Fix a spelling typo in do_hbm_test.sh

This patch fixes a spelling typo in do_hbm_test.sh

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210315124454.1744594-1-standby24x7@gmail.com
---
 samples/bpf/do_hbm_test.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/bpf/do_hbm_test.sh b/samples/bpf/do_hbm_test.sh
index 21790ea5c460..38e4599350db 100755
--- a/samples/bpf/do_hbm_test.sh
+++ b/samples/bpf/do_hbm_test.sh
@@ -10,7 +10,7 @@
 Usage() {
   echo "Script for testing HBM (Host Bandwidth Manager) framework."
   echo "It creates a cgroup to use for testing and load a BPF program to limit"
-  echo "egress or ingress bandwidht. It then uses iperf3 or netperf to create"
+  echo "egress or ingress bandwidth. It then uses iperf3 or netperf to create"
   echo "loads. The output is the goodput in Mbps (unless -D was used)."
   echo ""
   echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>]"
-- 
cgit v1.2.3


From 23f50b5ac331c8c27c421a7116618355508e8427 Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@gmail.com>
Date: Mon, 15 Mar 2021 14:29:51 +0100
Subject: bpf: selftests: Remove unused 'nospace_err' in tests for batched ops
 in array maps

This seems to be a reminiscent from the hashmap tests.

Signed-off-by: Pedro Tammela <pctammela@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210315132954.603108-1-pctammela@gmail.com
---
 tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c
index f0a64d8ac59a..e42ea1195d18 100644
--- a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c
+++ b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c
@@ -55,7 +55,6 @@ void test_array_map_batch_ops(void)
 	int map_fd, *keys, *values, *visited;
 	__u32 count, total, total_success;
 	const __u32 max_entries = 10;
-	bool nospace_err;
 	__u64 batch = 0;
 	int err, step;
 	DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
@@ -90,7 +89,6 @@ void test_array_map_batch_ops(void)
 		 * elements each.
 		 */
 		count = step;
-		nospace_err = false;
 		while (true) {
 			err = bpf_map_lookup_batch(map_fd,
 						total ? &batch : NULL, &batch,
@@ -107,9 +105,6 @@ void test_array_map_batch_ops(void)
 
 		}
 
-		if (nospace_err == true)
-			continue;
-
 		CHECK(total != max_entries, "lookup with steps",
 		      "total = %u, max_entries = %u\n", total, max_entries);
 
-- 
cgit v1.2.3


From 17486960d79b900c45e0bb8fbcac0262848582ba Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Tue, 16 Mar 2021 15:08:00 +0100
Subject: Bluetooth: avoid deadlock between hci_dev->lock and socket lock

Commit eab2404ba798 ("Bluetooth: Add BT_PHY socket option") added a
dependency between socket lock and hci_dev->lock that could lead to
deadlock.

It turns out that hci_conn_get_phy() is not in any way relying on hdev
being immutable during the runtime of this function, neither does it even
look at any of the members of hdev, and as such there is no need to hold
that lock.

This fixes the lockdep splat below:

 ======================================================
 WARNING: possible circular locking dependency detected
 5.12.0-rc1-00026-g73d464503354 #10 Not tainted
 ------------------------------------------------------
 bluetoothd/1118 is trying to acquire lock:
 ffff8f078383c078 (&hdev->lock){+.+.}-{3:3}, at: hci_conn_get_phy+0x1c/0x150 [bluetooth]

 but task is already holding lock:
 ffff8f07e831d920 (sk_lock-AF_BLUETOOTH-BTPROTO_L2CAP){+.+.}-{0:0}, at: l2cap_sock_getsockopt+0x8b/0x610

 which lock already depends on the new lock.

 the existing dependency chain (in reverse order) is:

 -> #3 (sk_lock-AF_BLUETOOTH-BTPROTO_L2CAP){+.+.}-{0:0}:
        lock_sock_nested+0x72/0xa0
        l2cap_sock_ready_cb+0x18/0x70 [bluetooth]
        l2cap_config_rsp+0x27a/0x520 [bluetooth]
        l2cap_sig_channel+0x658/0x1330 [bluetooth]
        l2cap_recv_frame+0x1ba/0x310 [bluetooth]
        hci_rx_work+0x1cc/0x640 [bluetooth]
        process_one_work+0x244/0x5f0
        worker_thread+0x3c/0x380
        kthread+0x13e/0x160
        ret_from_fork+0x22/0x30

 -> #2 (&chan->lock#2/1){+.+.}-{3:3}:
        __mutex_lock+0xa3/0xa10
        l2cap_chan_connect+0x33a/0x940 [bluetooth]
        l2cap_sock_connect+0x141/0x2a0 [bluetooth]
        __sys_connect+0x9b/0xc0
        __x64_sys_connect+0x16/0x20
        do_syscall_64+0x33/0x80
        entry_SYSCALL_64_after_hwframe+0x44/0xae

 -> #1 (&conn->chan_lock){+.+.}-{3:3}:
        __mutex_lock+0xa3/0xa10
        l2cap_chan_connect+0x322/0x940 [bluetooth]
        l2cap_sock_connect+0x141/0x2a0 [bluetooth]
        __sys_connect+0x9b/0xc0
        __x64_sys_connect+0x16/0x20
        do_syscall_64+0x33/0x80
        entry_SYSCALL_64_after_hwframe+0x44/0xae

 -> #0 (&hdev->lock){+.+.}-{3:3}:
        __lock_acquire+0x147a/0x1a50
        lock_acquire+0x277/0x3d0
        __mutex_lock+0xa3/0xa10
        hci_conn_get_phy+0x1c/0x150 [bluetooth]
        l2cap_sock_getsockopt+0x5a9/0x610 [bluetooth]
        __sys_getsockopt+0xcc/0x200
        __x64_sys_getsockopt+0x20/0x30
        do_syscall_64+0x33/0x80
        entry_SYSCALL_64_after_hwframe+0x44/0xae

 other info that might help us debug this:

 Chain exists of:
   &hdev->lock --> &chan->lock#2/1 --> sk_lock-AF_BLUETOOTH-BTPROTO_L2CAP

  Possible unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(sk_lock-AF_BLUETOOTH-BTPROTO_L2CAP);
                                lock(&chan->lock#2/1);
                                lock(sk_lock-AF_BLUETOOTH-BTPROTO_L2CAP);
   lock(&hdev->lock);

  *** DEADLOCK ***

 1 lock held by bluetoothd/1118:
  #0: ffff8f07e831d920 (sk_lock-AF_BLUETOOTH-BTPROTO_L2CAP){+.+.}-{0:0}, at: l2cap_sock_getsockopt+0x8b/0x610 [bluetooth]

 stack backtrace:
 CPU: 3 PID: 1118 Comm: bluetoothd Not tainted 5.12.0-rc1-00026-g73d464503354 #10
 Hardware name: LENOVO 20K5S22R00/20K5S22R00, BIOS R0IET38W (1.16 ) 05/31/2017
 Call Trace:
  dump_stack+0x7f/0xa1
  check_noncircular+0x105/0x120
  ? __lock_acquire+0x147a/0x1a50
  __lock_acquire+0x147a/0x1a50
  lock_acquire+0x277/0x3d0
  ? hci_conn_get_phy+0x1c/0x150 [bluetooth]
  ? __lock_acquire+0x2e1/0x1a50
  ? lock_is_held_type+0xb4/0x120
  ? hci_conn_get_phy+0x1c/0x150 [bluetooth]
  __mutex_lock+0xa3/0xa10
  ? hci_conn_get_phy+0x1c/0x150 [bluetooth]
  ? lock_acquire+0x277/0x3d0
  ? mark_held_locks+0x49/0x70
  ? mark_held_locks+0x49/0x70
  ? hci_conn_get_phy+0x1c/0x150 [bluetooth]
  hci_conn_get_phy+0x1c/0x150 [bluetooth]
  l2cap_sock_getsockopt+0x5a9/0x610 [bluetooth]
  __sys_getsockopt+0xcc/0x200
  __x64_sys_getsockopt+0x20/0x30
  do_syscall_64+0x33/0x80
  entry_SYSCALL_64_after_hwframe+0x44/0xae
 RIP: 0033:0x7fb73df33eee
 Code: 48 8b 0d 85 0f 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 49 89 ca b8 37 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 52 0f 0c 00 f7 d8 64 89 01 48
 RSP: 002b:00007fffcfbbbf08 EFLAGS: 00000203 ORIG_RAX: 0000000000000037
 RAX: ffffffffffffffda RBX: 0000000000000019 RCX: 00007fb73df33eee
 RDX: 000000000000000e RSI: 0000000000000112 RDI: 0000000000000018
 RBP: 0000000000000000 R08: 00007fffcfbbbf44 R09: 0000000000000000
 R10: 00007fffcfbbbf3c R11: 0000000000000203 R12: 0000000000000000
 R13: 0000000000000018 R14: 0000000000000000 R15: 0000556fcefc70d0

Fixes: eab2404ba798 ("Bluetooth: Add BT_PHY socket option")
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_conn.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 468d31f3303d..88ec08978ff4 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -1840,8 +1840,6 @@ u32 hci_conn_get_phy(struct hci_conn *conn)
 {
 	u32 phys = 0;
 
-	hci_dev_lock(conn->hdev);
-
 	/* BLUETOOTH CORE SPECIFICATION Version 5.2 | Vol 2, Part B page 471:
 	 * Table 6.2: Packets defined for synchronous, asynchronous, and
 	 * CSB logical transport types.
@@ -1938,7 +1936,5 @@ u32 hci_conn_get_phy(struct hci_conn *conn)
 		break;
 	}
 
-	hci_dev_unlock(conn->hdev);
-
 	return phys;
 }
-- 
cgit v1.2.3


From 1bf343665057312167750509b0c48e8299293ac5 Mon Sep 17 00:00:00 2001
From: Calvin Johnson <calvin.johnson@oss.nxp.com>
Date: Mon, 15 Mar 2021 16:19:05 +0530
Subject: net: mdio: Alphabetically sort header inclusion

Alphabetically sort header inclusion

Signed-off-by: Calvin Johnson <calvin.johnson@oss.nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mdio/mdio-bcm-unimac.c      | 16 +++++++---------
 drivers/net/mdio/mdio-bitbang.c         |  4 ++--
 drivers/net/mdio/mdio-cavium.c          |  2 +-
 drivers/net/mdio/mdio-gpio.c            | 10 +++++-----
 drivers/net/mdio/mdio-ipq4019.c         |  4 ++--
 drivers/net/mdio/mdio-ipq8064.c         |  4 ++--
 drivers/net/mdio/mdio-mscc-miim.c       |  8 ++++----
 drivers/net/mdio/mdio-mux-bcm-iproc.c   | 10 +++++-----
 drivers/net/mdio/mdio-mux-gpio.c        |  8 ++++----
 drivers/net/mdio/mdio-mux-mmioreg.c     |  6 +++---
 drivers/net/mdio/mdio-mux-multiplexer.c |  2 +-
 drivers/net/mdio/mdio-mux.c             |  6 +++---
 drivers/net/mdio/mdio-octeon.c          |  8 ++++----
 drivers/net/mdio/mdio-thunder.c         | 10 +++++-----
 drivers/net/mdio/mdio-xgene.c           |  6 +++---
 drivers/net/mdio/of_mdio.c              | 10 +++++-----
 16 files changed, 56 insertions(+), 58 deletions(-)

diff --git a/drivers/net/mdio/mdio-bcm-unimac.c b/drivers/net/mdio/mdio-bcm-unimac.c
index fbd36891ee64..5d171e7f118d 100644
--- a/drivers/net/mdio/mdio-bcm-unimac.c
+++ b/drivers/net/mdio/mdio-bcm-unimac.c
@@ -5,20 +5,18 @@
  * Copyright (C) 2014-2017 Broadcom
  */
 
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/io.h>
 #include <linux/kernel.h>
-#include <linux/phy.h>
-#include <linux/platform_device.h>
-#include <linux/sched.h>
 #include <linux/module.h>
-#include <linux/io.h>
-#include <linux/delay.h>
-#include <linux/clk.h>
-
 #include <linux/of.h>
-#include <linux/of_platform.h>
 #include <linux/of_mdio.h>
-
+#include <linux/of_platform.h>
+#include <linux/phy.h>
 #include <linux/platform_data/mdio-bcm-unimac.h>
+#include <linux/platform_device.h>
+#include <linux/sched.h>
 
 #define MDIO_CMD		0x00
 #define  MDIO_START_BUSY	(1 << 29)
diff --git a/drivers/net/mdio/mdio-bitbang.c b/drivers/net/mdio/mdio-bitbang.c
index d3915f831854..0f457c436335 100644
--- a/drivers/net/mdio/mdio-bitbang.c
+++ b/drivers/net/mdio/mdio-bitbang.c
@@ -14,10 +14,10 @@
  * Vitaly Bordug <vbordug@ru.mvista.com>
  */
 
-#include <linux/module.h>
+#include <linux/delay.h>
 #include <linux/mdio-bitbang.h>
+#include <linux/module.h>
 #include <linux/types.h>
-#include <linux/delay.h>
 
 #define MDIO_READ 2
 #define MDIO_WRITE 1
diff --git a/drivers/net/mdio/mdio-cavium.c b/drivers/net/mdio/mdio-cavium.c
index 1afd6fc1a351..95ce274c1be1 100644
--- a/drivers/net/mdio/mdio-cavium.c
+++ b/drivers/net/mdio/mdio-cavium.c
@@ -4,9 +4,9 @@
  */
 
 #include <linux/delay.h>
+#include <linux/io.h>
 #include <linux/module.h>
 #include <linux/phy.h>
-#include <linux/io.h>
 
 #include "mdio-cavium.h"
 
diff --git a/drivers/net/mdio/mdio-gpio.c b/drivers/net/mdio/mdio-gpio.c
index 1b00235d7dc5..56c8f914f893 100644
--- a/drivers/net/mdio/mdio-gpio.c
+++ b/drivers/net/mdio/mdio-gpio.c
@@ -17,15 +17,15 @@
  * Vitaly Bordug <vbordug@ru.mvista.com>
  */
 
-#include <linux/module.h>
-#include <linux/slab.h>
+#include <linux/gpio/consumer.h>
 #include <linux/interrupt.h>
-#include <linux/platform_device.h>
-#include <linux/platform_data/mdio-gpio.h>
 #include <linux/mdio-bitbang.h>
 #include <linux/mdio-gpio.h>
-#include <linux/gpio/consumer.h>
+#include <linux/module.h>
 #include <linux/of_mdio.h>
+#include <linux/platform_data/mdio-gpio.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
 
 struct mdio_gpio_info {
 	struct mdiobb_ctrl ctrl;
diff --git a/drivers/net/mdio/mdio-ipq4019.c b/drivers/net/mdio/mdio-ipq4019.c
index 25c25ea6da66..9cd71d896963 100644
--- a/drivers/net/mdio/mdio-ipq4019.c
+++ b/drivers/net/mdio/mdio-ipq4019.c
@@ -3,10 +3,10 @@
 /* Copyright (c) 2020 Sartura Ltd. */
 
 #include <linux/delay.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/io.h>
 #include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/of_mdio.h>
 #include <linux/phy.h>
diff --git a/drivers/net/mdio/mdio-ipq8064.c b/drivers/net/mdio/mdio-ipq8064.c
index 1bd18857e1c5..8fe8f0119fc1 100644
--- a/drivers/net/mdio/mdio-ipq8064.c
+++ b/drivers/net/mdio/mdio-ipq8064.c
@@ -7,12 +7,12 @@
 
 #include <linux/delay.h>
 #include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
 #include <linux/module.h>
-#include <linux/regmap.h>
 #include <linux/of_mdio.h>
 #include <linux/phy.h>
 #include <linux/platform_device.h>
-#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
 
 /* MII address register definitions */
 #define MII_ADDR_REG_ADDR                       0x10
diff --git a/drivers/net/mdio/mdio-mscc-miim.c b/drivers/net/mdio/mdio-mscc-miim.c
index 11f583fd4611..b36e5ea04ddf 100644
--- a/drivers/net/mdio/mdio-mscc-miim.c
+++ b/drivers/net/mdio/mdio-mscc-miim.c
@@ -6,14 +6,14 @@
  * Copyright (c) 2017 Microsemi Corporation
  */
 
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/phy.h>
-#include <linux/platform_device.h>
 #include <linux/bitops.h>
 #include <linux/io.h>
 #include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/of_mdio.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
 
 #define MSCC_MIIM_REG_STATUS		0x0
 #define		MSCC_MIIM_STATUS_STAT_PENDING	BIT(2)
diff --git a/drivers/net/mdio/mdio-mux-bcm-iproc.c b/drivers/net/mdio/mdio-mux-bcm-iproc.c
index 42fb5f166136..641cfa41f492 100644
--- a/drivers/net/mdio/mdio-mux-bcm-iproc.c
+++ b/drivers/net/mdio/mdio-mux-bcm-iproc.c
@@ -3,14 +3,14 @@
  * Copyright 2016 Broadcom
  */
 #include <linux/clk.h>
-#include <linux/platform_device.h>
+#include <linux/delay.h>
 #include <linux/device.h>
-#include <linux/of_mdio.h>
+#include <linux/iopoll.h>
+#include <linux/mdio-mux.h>
 #include <linux/module.h>
+#include <linux/of_mdio.h>
 #include <linux/phy.h>
-#include <linux/mdio-mux.h>
-#include <linux/delay.h>
-#include <linux/iopoll.h>
+#include <linux/platform_device.h>
 
 #define MDIO_RATE_ADJ_EXT_OFFSET	0x000
 #define MDIO_RATE_ADJ_INT_OFFSET	0x004
diff --git a/drivers/net/mdio/mdio-mux-gpio.c b/drivers/net/mdio/mdio-mux-gpio.c
index 10a758fdc9e6..3c7f16f06b45 100644
--- a/drivers/net/mdio/mdio-mux-gpio.c
+++ b/drivers/net/mdio/mdio-mux-gpio.c
@@ -3,13 +3,13 @@
  * Copyright (C) 2011, 2012 Cavium, Inc.
  */
 
-#include <linux/platform_device.h>
 #include <linux/device.h>
-#include <linux/of_mdio.h>
+#include <linux/gpio/consumer.h>
+#include <linux/mdio-mux.h>
 #include <linux/module.h>
+#include <linux/of_mdio.h>
 #include <linux/phy.h>
-#include <linux/mdio-mux.h>
-#include <linux/gpio/consumer.h>
+#include <linux/platform_device.h>
 
 #define DRV_VERSION "1.1"
 #define DRV_DESCRIPTION "GPIO controlled MDIO bus multiplexer driver"
diff --git a/drivers/net/mdio/mdio-mux-mmioreg.c b/drivers/net/mdio/mdio-mux-mmioreg.c
index d1a8780e24d8..c02fb2a067ee 100644
--- a/drivers/net/mdio/mdio-mux-mmioreg.c
+++ b/drivers/net/mdio/mdio-mux-mmioreg.c
@@ -7,13 +7,13 @@
  * Copyright 2012 Freescale Semiconductor, Inc.
  */
 
-#include <linux/platform_device.h>
 #include <linux/device.h>
+#include <linux/mdio-mux.h>
+#include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/of_mdio.h>
-#include <linux/module.h>
 #include <linux/phy.h>
-#include <linux/mdio-mux.h>
+#include <linux/platform_device.h>
 
 struct mdio_mux_mmioreg_state {
 	void *mux_handle;
diff --git a/drivers/net/mdio/mdio-mux-multiplexer.c b/drivers/net/mdio/mdio-mux-multiplexer.c
index d6564381aa3e..527acfc3c045 100644
--- a/drivers/net/mdio/mdio-mux-multiplexer.c
+++ b/drivers/net/mdio/mdio-mux-multiplexer.c
@@ -4,10 +4,10 @@
  * Copyright 2019 NXP
  */
 
-#include <linux/platform_device.h>
 #include <linux/mdio-mux.h>
 #include <linux/module.h>
 #include <linux/mux/consumer.h>
+#include <linux/platform_device.h>
 
 struct mdio_mux_multiplexer_state {
 	struct mux_control *muxc;
diff --git a/drivers/net/mdio/mdio-mux.c b/drivers/net/mdio/mdio-mux.c
index 6a1d3540210b..110e4ee85785 100644
--- a/drivers/net/mdio/mdio-mux.c
+++ b/drivers/net/mdio/mdio-mux.c
@@ -3,12 +3,12 @@
  * Copyright (C) 2011, 2012 Cavium, Inc.
  */
 
-#include <linux/platform_device.h>
-#include <linux/mdio-mux.h>
-#include <linux/of_mdio.h>
 #include <linux/device.h>
+#include <linux/mdio-mux.h>
 #include <linux/module.h>
+#include <linux/of_mdio.h>
 #include <linux/phy.h>
+#include <linux/platform_device.h>
 
 #define DRV_DESCRIPTION "MDIO bus multiplexer driver"
 
diff --git a/drivers/net/mdio/mdio-octeon.c b/drivers/net/mdio/mdio-octeon.c
index d1e1009d51af..8ce99c4888e1 100644
--- a/drivers/net/mdio/mdio-octeon.c
+++ b/drivers/net/mdio/mdio-octeon.c
@@ -3,13 +3,13 @@
  * Copyright (C) 2009-2015 Cavium, Inc.
  */
 
-#include <linux/platform_device.h>
+#include <linux/gfp.h>
+#include <linux/io.h>
+#include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/of_mdio.h>
-#include <linux/module.h>
-#include <linux/gfp.h>
 #include <linux/phy.h>
-#include <linux/io.h>
+#include <linux/platform_device.h>
 
 #include "mdio-cavium.h"
 
diff --git a/drivers/net/mdio/mdio-thunder.c b/drivers/net/mdio/mdio-thunder.c
index 3d7eda99d34e..cb1761693b69 100644
--- a/drivers/net/mdio/mdio-thunder.c
+++ b/drivers/net/mdio/mdio-thunder.c
@@ -3,14 +3,14 @@
  * Copyright (C) 2009-2016 Cavium, Inc.
  */
 
-#include <linux/of_address.h>
-#include <linux/of_mdio.h>
-#include <linux/module.h>
+#include <linux/acpi.h>
 #include <linux/gfp.h>
-#include <linux/phy.h>
 #include <linux/io.h>
-#include <linux/acpi.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_mdio.h>
 #include <linux/pci.h>
+#include <linux/phy.h>
 
 #include "mdio-cavium.h"
 
diff --git a/drivers/net/mdio/mdio-xgene.c b/drivers/net/mdio/mdio-xgene.c
index 461207cdf5d6..7ab4e26db08c 100644
--- a/drivers/net/mdio/mdio-xgene.c
+++ b/drivers/net/mdio/mdio-xgene.c
@@ -13,11 +13,11 @@
 #include <linux/io.h>
 #include <linux/mdio/mdio-xgene.h>
 #include <linux/module.h>
-#include <linux/of_platform.h>
-#include <linux/of_net.h>
 #include <linux/of_mdio.h>
-#include <linux/prefetch.h>
+#include <linux/of_net.h>
+#include <linux/of_platform.h>
 #include <linux/phy.h>
+#include <linux/prefetch.h>
 #include <net/ip.h>
 
 static bool xgene_mdio_status;
diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c
index ea9d5855fb52..094494a68ddf 100644
--- a/drivers/net/mdio/of_mdio.c
+++ b/drivers/net/mdio/of_mdio.c
@@ -8,17 +8,17 @@
  * out of the OpenFirmware device tree and using it to populate an mii_bus.
  */
 
-#include <linux/kernel.h>
 #include <linux/device.h>
-#include <linux/netdevice.h>
 #include <linux/err.h>
-#include <linux/phy.h>
-#include <linux/phy_fixed.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_mdio.h>
 #include <linux/of_net.h>
-#include <linux/module.h>
+#include <linux/phy.h>
+#include <linux/phy_fixed.h>
 
 #define DEFAULT_GPIO_RESET_DELAY	10	/* in microseconds */
 
-- 
cgit v1.2.3


From 6d16eadab6db0c1d61e59fee7ed1ecc2d10269be Mon Sep 17 00:00:00 2001
From: Álvaro Fernández Rojas <noltari@gmail.com>
Date: Mon, 15 Mar 2021 15:14:23 +0100
Subject: net: dsa: b53: spi: allow device tree probing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add missing of_match_table to allow device tree probing.

Signed-off-by: Álvaro Fernández Rojas <noltari@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/b53/b53_spi.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/net/dsa/b53/b53_spi.c b/drivers/net/dsa/b53/b53_spi.c
index 7abec8dab8ba..413158275db8 100644
--- a/drivers/net/dsa/b53/b53_spi.c
+++ b/drivers/net/dsa/b53/b53_spi.c
@@ -324,9 +324,22 @@ static int b53_spi_remove(struct spi_device *spi)
 	return 0;
 }
 
+static const struct of_device_id b53_spi_of_match[] = {
+	{ .compatible = "brcm,bcm5325" },
+	{ .compatible = "brcm,bcm5365" },
+	{ .compatible = "brcm,bcm5395" },
+	{ .compatible = "brcm,bcm5397" },
+	{ .compatible = "brcm,bcm5398" },
+	{ .compatible = "brcm,bcm53115" },
+	{ .compatible = "brcm,bcm53125" },
+	{ .compatible = "brcm,bcm53128" },
+	{ /* sentinel */ }
+};
+
 static struct spi_driver b53_spi_driver = {
 	.driver = {
 		.name	= "b53-switch",
+		.of_match_table = b53_spi_of_match,
 	},
 	.probe	= b53_spi_probe,
 	.remove	= b53_spi_remove,
-- 
cgit v1.2.3


From 6f0d32509a92d656d9394788436792d863dff5da Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Mon, 15 Mar 2021 14:43:23 +0000
Subject: net: dsa: sja1105: fix error return code in sja1105_cls_flower_add()

The return value 'rc' maybe overwrite to 0 in the flow_action_for_each
loop, the error code from the offload not support error handling will
not set. This commit fix it to return -EOPNOTSUPP.

Fixes: 6a56e19902af ("flow_offload: reject configuration of packet-per-second policing in offload drivers")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_flower.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_flower.c b/drivers/net/dsa/sja1105/sja1105_flower.c
index f78b767f86ee..973761132fc3 100644
--- a/drivers/net/dsa/sja1105/sja1105_flower.c
+++ b/drivers/net/dsa/sja1105/sja1105_flower.c
@@ -317,14 +317,13 @@ int sja1105_cls_flower_add(struct dsa_switch *ds, int port,
 	if (rc)
 		return rc;
 
-	rc = -EOPNOTSUPP;
-
 	flow_action_for_each(i, act, &rule->action) {
 		switch (act->id) {
 		case FLOW_ACTION_POLICE:
 			if (act->police.rate_pkt_ps) {
 				NL_SET_ERR_MSG_MOD(extack,
 						   "QoS offload not support packets per second");
+				rc = -EOPNOTSUPP;
 				goto out;
 			}
 
-- 
cgit v1.2.3


From 8aa683041682c479e321dbbcc34f4c8ee6fcfc5d Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 15 Mar 2021 10:21:10 -0500
Subject: net: ipa: fix a duplicated tlv_type value

In the ipa_indication_register_req_ei[] encoding array, the tlv_type
associated with the ipa_mhi_ready_ind field is wrong.  It duplicates
the value used for the data_usage_quota_reached field (0x11) and
should use value 0x12 instead.  Fix this bug.

Reported-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Alex Elder <elder@linaro.org>
Acked-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_qmi_msg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ipa/ipa_qmi_msg.c b/drivers/net/ipa/ipa_qmi_msg.c
index 73413371e3d3..e00f829a783f 100644
--- a/drivers/net/ipa/ipa_qmi_msg.c
+++ b/drivers/net/ipa/ipa_qmi_msg.c
@@ -56,7 +56,7 @@ struct qmi_elem_info ipa_indication_register_req_ei[] = {
 		.elem_size	=
 			sizeof_field(struct ipa_indication_register_req,
 				     ipa_mhi_ready_ind_valid),
-		.tlv_type	= 0x11,
+		.tlv_type	= 0x12,
 		.offset		= offsetof(struct ipa_indication_register_req,
 					   ipa_mhi_ready_ind_valid),
 	},
@@ -66,7 +66,7 @@ struct qmi_elem_info ipa_indication_register_req_ei[] = {
 		.elem_size	=
 			sizeof_field(struct ipa_indication_register_req,
 				     ipa_mhi_ready_ind),
-		.tlv_type	= 0x11,
+		.tlv_type	= 0x12,
 		.offset		= offsetof(struct ipa_indication_register_req,
 					   ipa_mhi_ready_ind),
 	},
-- 
cgit v1.2.3


From 7ac629e390bd6859c882bb4feb94f56c10e8126b Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 15 Mar 2021 10:21:11 -0500
Subject: net: ipa: fix another QMI message definition

The ipa_init_modem_driver_req_ei[] encoding array for the
INIT_MODEM_DRIVER request message has some errors in it.

First, the tlv_type associated with the hw_stats_quota_size field is
wrong; it duplicates the valiue used for the hw_stats_quota_base_addr
field (0x1f) and should use 0x20 instead.  The tlv_type value for
the hw_stats_drop_size field also uses the same duplicate value; it
should use 0x22 instead.

Second, there is no definition for the hw_stats_drop_base_addr
field.  It is an optional 32-bit enumerated type value.

Finally, the hw_stats_quota_base_addr, hw_stats_quota_size, and
hw_stats_drop_size fields are defined as enumerated types; they
should be unsigned 4-byte values.

Reported-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Alex Elder <elder@linaro.org>
Acked-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_qmi_msg.c | 34 +++++++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ipa/ipa_qmi_msg.c b/drivers/net/ipa/ipa_qmi_msg.c
index e00f829a783f..e4a6efbe9bd0 100644
--- a/drivers/net/ipa/ipa_qmi_msg.c
+++ b/drivers/net/ipa/ipa_qmi_msg.c
@@ -530,7 +530,7 @@ struct qmi_elem_info ipa_init_modem_driver_req_ei[] = {
 					   hw_stats_quota_base_addr_valid),
 	},
 	{
-		.data_type	= QMI_SIGNED_4_BYTE_ENUM,
+		.data_type	= QMI_UNSIGNED_4_BYTE,
 		.elem_len	= 1,
 		.elem_size	=
 			sizeof_field(struct ipa_init_modem_driver_req,
@@ -545,37 +545,57 @@ struct qmi_elem_info ipa_init_modem_driver_req_ei[] = {
 		.elem_size	=
 			sizeof_field(struct ipa_init_modem_driver_req,
 				     hw_stats_quota_size_valid),
-		.tlv_type	= 0x1f,
+		.tlv_type	= 0x20,
 		.offset		= offsetof(struct ipa_init_modem_driver_req,
 					   hw_stats_quota_size_valid),
 	},
 	{
-		.data_type	= QMI_SIGNED_4_BYTE_ENUM,
+		.data_type	= QMI_UNSIGNED_4_BYTE,
 		.elem_len	= 1,
 		.elem_size	=
 			sizeof_field(struct ipa_init_modem_driver_req,
 				     hw_stats_quota_size),
-		.tlv_type	= 0x1f,
+		.tlv_type	= 0x20,
 		.offset		= offsetof(struct ipa_init_modem_driver_req,
 					   hw_stats_quota_size),
 	},
+	{
+		.data_type	= QMI_OPT_FLAG,
+		.elem_len	= 1,
+		.elem_size	=
+			sizeof_field(struct ipa_init_modem_driver_req,
+				     hw_stats_drop_base_addr_valid),
+		.tlv_type	= 0x21,
+		.offset		= offsetof(struct ipa_init_modem_driver_req,
+					   hw_stats_drop_base_addr_valid),
+	},
+	{
+		.data_type	= QMI_UNSIGNED_4_BYTE,
+		.elem_len	= 1,
+		.elem_size	=
+			sizeof_field(struct ipa_init_modem_driver_req,
+				     hw_stats_drop_base_addr),
+		.tlv_type	= 0x21,
+		.offset		= offsetof(struct ipa_init_modem_driver_req,
+					   hw_stats_drop_base_addr),
+	},
 	{
 		.data_type	= QMI_OPT_FLAG,
 		.elem_len	= 1,
 		.elem_size	=
 			sizeof_field(struct ipa_init_modem_driver_req,
 				     hw_stats_drop_size_valid),
-		.tlv_type	= 0x1f,
+		.tlv_type	= 0x22,
 		.offset		= offsetof(struct ipa_init_modem_driver_req,
 					   hw_stats_drop_size_valid),
 	},
 	{
-		.data_type	= QMI_SIGNED_4_BYTE_ENUM,
+		.data_type	= QMI_UNSIGNED_4_BYTE,
 		.elem_len	= 1,
 		.elem_size	=
 			sizeof_field(struct ipa_init_modem_driver_req,
 				     hw_stats_drop_size),
-		.tlv_type	= 0x1f,
+		.tlv_type	= 0x22,
 		.offset		= offsetof(struct ipa_init_modem_driver_req,
 					   hw_stats_drop_size),
 	},
-- 
cgit v1.2.3


From 6ec7a9c2e8be0bc064e8c25df4f71ee63e978b5f Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 15 Mar 2021 10:21:12 -0500
Subject: net: ipa: extend the INDICATION_REGISTER request

The specified format of the INDICATION_REGISTER QMI request message
has been extended to support two more optional fields:
  endpoint_desc_ind:
    sender wishes to receive endpoint descriptor information via
    an IPA ENDP_DESC indication QMI message
  bw_change_ind:
    sender wishes to receive bandwidth change information via
    an IPA BW_CHANGE indication QMI message

Add definitions that permit these fields to be formatted and parsed
by the QMI library code.

Signed-off-by: Alex Elder <elder@linaro.org>
Acked-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_qmi_msg.c | 40 ++++++++++++++++++++++++++++++++++++++++
 drivers/net/ipa/ipa_qmi_msg.h |  6 +++++-
 2 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ipa/ipa_qmi_msg.c b/drivers/net/ipa/ipa_qmi_msg.c
index e4a6efbe9bd0..6838e8065072 100644
--- a/drivers/net/ipa/ipa_qmi_msg.c
+++ b/drivers/net/ipa/ipa_qmi_msg.c
@@ -70,6 +70,46 @@ struct qmi_elem_info ipa_indication_register_req_ei[] = {
 		.offset		= offsetof(struct ipa_indication_register_req,
 					   ipa_mhi_ready_ind),
 	},
+	{
+		.data_type	= QMI_OPT_FLAG,
+		.elem_len	= 1,
+		.elem_size	=
+			sizeof_field(struct ipa_indication_register_req,
+				     endpoint_desc_ind_valid),
+		.tlv_type	= 0x13,
+		.offset		= offsetof(struct ipa_indication_register_req,
+					   endpoint_desc_ind_valid),
+	},
+	{
+		.data_type	= QMI_UNSIGNED_1_BYTE,
+		.elem_len	= 1,
+		.elem_size	=
+			sizeof_field(struct ipa_indication_register_req,
+				     endpoint_desc_ind),
+		.tlv_type	= 0x13,
+		.offset		= offsetof(struct ipa_indication_register_req,
+					   endpoint_desc_ind),
+	},
+	{
+		.data_type	= QMI_OPT_FLAG,
+		.elem_len	= 1,
+		.elem_size	=
+			sizeof_field(struct ipa_indication_register_req,
+				     bw_change_ind_valid),
+		.tlv_type	= 0x14,
+		.offset		= offsetof(struct ipa_indication_register_req,
+					   bw_change_ind_valid),
+	},
+	{
+		.data_type	= QMI_UNSIGNED_1_BYTE,
+		.elem_len	= 1,
+		.elem_size	=
+			sizeof_field(struct ipa_indication_register_req,
+				     bw_change_ind),
+		.tlv_type	= 0x14,
+		.offset		= offsetof(struct ipa_indication_register_req,
+					   bw_change_ind),
+	},
 	{
 		.data_type	= QMI_EOTI,
 	},
diff --git a/drivers/net/ipa/ipa_qmi_msg.h b/drivers/net/ipa/ipa_qmi_msg.h
index 12b6621f4b0e..3233d145fd87 100644
--- a/drivers/net/ipa/ipa_qmi_msg.h
+++ b/drivers/net/ipa/ipa_qmi_msg.h
@@ -24,7 +24,7 @@
  * information for each field.  The qmi_send_*() interfaces require
  * the message size to be provided.
  */
-#define IPA_QMI_INDICATION_REGISTER_REQ_SZ	12	/* -> server handle */
+#define IPA_QMI_INDICATION_REGISTER_REQ_SZ	20	/* -> server handle */
 #define IPA_QMI_INDICATION_REGISTER_RSP_SZ	7	/* <- server handle */
 #define IPA_QMI_INIT_DRIVER_REQ_SZ		162	/* client handle -> */
 #define IPA_QMI_INIT_DRIVER_RSP_SZ		25	/* client handle <- */
@@ -44,6 +44,10 @@ struct ipa_indication_register_req {
 	u8 data_usage_quota_reached;
 	u8 ipa_mhi_ready_ind_valid;
 	u8 ipa_mhi_ready_ind;
+	u8 endpoint_desc_ind_valid;
+	u8 endpoint_desc_ind;
+	u8 bw_change_ind_valid;
+	u8 bw_change_ind;
 };
 
 /* The response to a IPA_QMI_INDICATION_REGISTER request consists only of
-- 
cgit v1.2.3


From 5acd0cfbfbb5a688da1bfb1a2152b0c855115a35 Mon Sep 17 00:00:00 2001
From: Xie He <xie.he.0141@gmail.com>
Date: Wed, 10 Mar 2021 23:23:09 -0800
Subject: net: lapbether: Prevent racing when checking whether the netif is
 running

There are two "netif_running" checks in this driver. One is in
"lapbeth_xmit" and the other is in "lapbeth_rcv". They serve to make
sure that the LAPB APIs called in these functions are called before
"lapb_unregister" is called by the "ndo_stop" function.

However, these "netif_running" checks are unreliable, because it's
possible that immediately after "netif_running" returns true, "ndo_stop"
is called (which causes "lapb_unregister" to be called).

This patch adds locking to make sure "lapbeth_xmit" and "lapbeth_rcv" can
reliably check and ensure the netif is running while doing their work.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Xie He <xie.he.0141@gmail.com>
Acked-by: Martin Schiller <ms@dev.tdt.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/lapbether.c | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c
index c3372498f4f1..8fda0446ff71 100644
--- a/drivers/net/wan/lapbether.c
+++ b/drivers/net/wan/lapbether.c
@@ -51,6 +51,8 @@ struct lapbethdev {
 	struct list_head	node;
 	struct net_device	*ethdev;	/* link to ethernet device */
 	struct net_device	*axdev;		/* lapbeth device (lapb#) */
+	bool			up;
+	spinlock_t		up_lock;	/* Protects "up" */
 };
 
 static LIST_HEAD(lapbeth_devices);
@@ -101,8 +103,9 @@ static int lapbeth_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 	rcu_read_lock();
 	lapbeth = lapbeth_get_x25_dev(dev);
 	if (!lapbeth)
-		goto drop_unlock;
-	if (!netif_running(lapbeth->axdev))
+		goto drop_unlock_rcu;
+	spin_lock_bh(&lapbeth->up_lock);
+	if (!lapbeth->up)
 		goto drop_unlock;
 
 	len = skb->data[0] + skb->data[1] * 256;
@@ -117,11 +120,14 @@ static int lapbeth_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 		goto drop_unlock;
 	}
 out:
+	spin_unlock_bh(&lapbeth->up_lock);
 	rcu_read_unlock();
 	return 0;
 drop_unlock:
 	kfree_skb(skb);
 	goto out;
+drop_unlock_rcu:
+	rcu_read_unlock();
 drop:
 	kfree_skb(skb);
 	return 0;
@@ -151,13 +157,11 @@ static int lapbeth_data_indication(struct net_device *dev, struct sk_buff *skb)
 static netdev_tx_t lapbeth_xmit(struct sk_buff *skb,
 				      struct net_device *dev)
 {
+	struct lapbethdev *lapbeth = netdev_priv(dev);
 	int err;
 
-	/*
-	 * Just to be *really* sure not to send anything if the interface
-	 * is down, the ethernet device may have gone.
-	 */
-	if (!netif_running(dev))
+	spin_lock_bh(&lapbeth->up_lock);
+	if (!lapbeth->up)
 		goto drop;
 
 	/* There should be a pseudo header of 1 byte added by upper layers.
@@ -194,6 +198,7 @@ static netdev_tx_t lapbeth_xmit(struct sk_buff *skb,
 		goto drop;
 	}
 out:
+	spin_unlock_bh(&lapbeth->up_lock);
 	return NETDEV_TX_OK;
 drop:
 	kfree_skb(skb);
@@ -285,6 +290,7 @@ static const struct lapb_register_struct lapbeth_callbacks = {
  */
 static int lapbeth_open(struct net_device *dev)
 {
+	struct lapbethdev *lapbeth = netdev_priv(dev);
 	int err;
 
 	if ((err = lapb_register(dev, &lapbeth_callbacks)) != LAPB_OK) {
@@ -292,13 +298,22 @@ static int lapbeth_open(struct net_device *dev)
 		return -ENODEV;
 	}
 
+	spin_lock_bh(&lapbeth->up_lock);
+	lapbeth->up = true;
+	spin_unlock_bh(&lapbeth->up_lock);
+
 	return 0;
 }
 
 static int lapbeth_close(struct net_device *dev)
 {
+	struct lapbethdev *lapbeth = netdev_priv(dev);
 	int err;
 
+	spin_lock_bh(&lapbeth->up_lock);
+	lapbeth->up = false;
+	spin_unlock_bh(&lapbeth->up_lock);
+
 	if ((err = lapb_unregister(dev)) != LAPB_OK)
 		pr_err("lapb_unregister error: %d\n", err);
 
@@ -356,6 +371,9 @@ static int lapbeth_new_device(struct net_device *dev)
 	dev_hold(dev);
 	lapbeth->ethdev = dev;
 
+	lapbeth->up = false;
+	spin_lock_init(&lapbeth->up_lock);
+
 	rc = -EIO;
 	if (register_netdevice(ndev))
 		goto fail;
-- 
cgit v1.2.3


From da6557edb9f3f4513b01d9a20a36c2fbc31810a1 Mon Sep 17 00:00:00 2001
From: Álvaro Fernández Rojas <noltari@gmail.com>
Date: Mon, 15 Mar 2021 16:45:27 +0100
Subject: dt-bindings: net: Add bcm6368-mdio-mux bindings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add documentations for bcm6368 mdio mux driver.

Signed-off-by: Álvaro Fernández Rojas <noltari@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../bindings/net/brcm,bcm6368-mdio-mux.yaml        | 76 ++++++++++++++++++++++
 1 file changed, 76 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/brcm,bcm6368-mdio-mux.yaml

diff --git a/Documentation/devicetree/bindings/net/brcm,bcm6368-mdio-mux.yaml b/Documentation/devicetree/bindings/net/brcm,bcm6368-mdio-mux.yaml
new file mode 100644
index 000000000000..2f34fda55fd0
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/brcm,bcm6368-mdio-mux.yaml
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/brcm,bcm6368-mdio-mux.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom BCM6368 MDIO bus multiplexer
+
+maintainers:
+  - Álvaro Fernández Rojas <noltari@gmail.com>
+
+description:
+  This MDIO bus multiplexer defines buses that could be internal as well as
+  external to SoCs. When child bus is selected, one needs to select these two
+  properties as well to generate desired MDIO transaction on appropriate bus.
+
+allOf:
+  - $ref: "mdio.yaml#"
+
+properties:
+  compatible:
+    const: brcm,bcm6368-mdio-mux
+
+  "#address-cells":
+    const: 1
+
+  "#size-cells":
+    const: 0
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+patternProperties:
+  '^mdio@[0-1]$':
+    type: object
+    properties:
+      reg:
+        maxItems: 1
+
+      "#address-cells":
+        const: 1
+
+      "#size-cells":
+        const: 0
+
+    required:
+      - reg
+      - "#address-cells"
+      - "#size-cells"
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    mdio0: mdio@10e000b0 {
+      #address-cells = <1>;
+      #size-cells = <0>;
+      compatible = "brcm,bcm6368-mdio-mux";
+      reg = <0x10e000b0 0x6>;
+
+      mdio_int: mdio@0 {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        reg = <0>;
+      };
+
+      mdio_ext: mdio@1 {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        reg = <1>;
+      };
+    };
-- 
cgit v1.2.3


From e239756717b5c866958823a1609e2ccf268435be Mon Sep 17 00:00:00 2001
From: Álvaro Fernández Rojas <noltari@gmail.com>
Date: Mon, 15 Mar 2021 16:45:28 +0100
Subject: net: mdio: Add BCM6368 MDIO mux bus controller
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This controller is present on BCM6318, BCM6328, BCM6362, BCM6368 and BCM63268
SoCs.

Signed-off-by: Álvaro Fernández Rojas <noltari@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mdio/Kconfig            |  11 +++
 drivers/net/mdio/Makefile           |   1 +
 drivers/net/mdio/mdio-mux-bcm6368.c | 184 ++++++++++++++++++++++++++++++++++++
 3 files changed, 196 insertions(+)
 create mode 100644 drivers/net/mdio/mdio-mux-bcm6368.c

diff --git a/drivers/net/mdio/Kconfig b/drivers/net/mdio/Kconfig
index a10cc460d7cf..d06e06f5e31a 100644
--- a/drivers/net/mdio/Kconfig
+++ b/drivers/net/mdio/Kconfig
@@ -200,6 +200,17 @@ config MDIO_BUS_MUX_MESON_G12A
 	  the amlogic g12a SoC. The multiplexers connects either the external
 	  or the internal MDIO bus to the parent bus.
 
+config MDIO_BUS_MUX_BCM6368
+	tristate "Broadcom BCM6368 MDIO bus multiplexers"
+	depends on OF && OF_MDIO && (BMIPS_GENERIC || COMPILE_TEST)
+	select MDIO_BUS_MUX
+	default BMIPS_GENERIC
+	help
+	  This module provides a driver for MDIO bus multiplexers found in
+	  BCM6368 based Broadcom SoCs. This multiplexer connects one of several
+	  child MDIO bus to a parent bus. Buses could be internal as well as
+	  external and selection logic lies inside the same multiplexer.
+
 config MDIO_BUS_MUX_BCM_IPROC
 	tristate "Broadcom iProc based MDIO bus multiplexers"
 	depends on OF && OF_MDIO && (ARCH_BCM_IPROC || COMPILE_TEST)
diff --git a/drivers/net/mdio/Makefile b/drivers/net/mdio/Makefile
index 5c498dde463f..c3ec0ef989df 100644
--- a/drivers/net/mdio/Makefile
+++ b/drivers/net/mdio/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_MDIO_THUNDER)		+= mdio-thunder.o
 obj-$(CONFIG_MDIO_XGENE)		+= mdio-xgene.o
 
 obj-$(CONFIG_MDIO_BUS_MUX)		+= mdio-mux.o
+obj-$(CONFIG_MDIO_BUS_MUX_BCM6368)	+= mdio-mux-bcm6368.o
 obj-$(CONFIG_MDIO_BUS_MUX_BCM_IPROC)	+= mdio-mux-bcm-iproc.o
 obj-$(CONFIG_MDIO_BUS_MUX_GPIO)		+= mdio-mux-gpio.o
 obj-$(CONFIG_MDIO_BUS_MUX_MESON_G12A)	+= mdio-mux-meson-g12a.o
diff --git a/drivers/net/mdio/mdio-mux-bcm6368.c b/drivers/net/mdio/mdio-mux-bcm6368.c
new file mode 100644
index 000000000000..6dcbf987d61b
--- /dev/null
+++ b/drivers/net/mdio/mdio-mux-bcm6368.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Broadcom BCM6368 mdiomux bus controller driver
+ *
+ * Copyright (C) 2021 Álvaro Fernández Rojas <noltari@gmail.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/mdio-mux.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/of_mdio.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/sched.h>
+
+#define MDIOC_REG		0x0
+#define MDIOC_EXT_MASK		BIT(16)
+#define MDIOC_REG_SHIFT		20
+#define MDIOC_PHYID_SHIFT	25
+#define MDIOC_RD_MASK		BIT(30)
+#define MDIOC_WR_MASK		BIT(31)
+
+#define MDIOD_REG		0x4
+
+struct bcm6368_mdiomux_desc {
+	void *mux_handle;
+	void __iomem *base;
+	struct device *dev;
+	struct mii_bus *mii_bus;
+	int ext_phy;
+};
+
+static int bcm6368_mdiomux_read(struct mii_bus *bus, int phy_id, int loc)
+{
+	struct bcm6368_mdiomux_desc *md = bus->priv;
+	uint32_t reg;
+	int ret;
+
+	__raw_writel(0, md->base + MDIOC_REG);
+
+	reg = MDIOC_RD_MASK |
+	      (phy_id << MDIOC_PHYID_SHIFT) |
+	      (loc << MDIOC_REG_SHIFT);
+	if (md->ext_phy)
+		reg |= MDIOC_EXT_MASK;
+
+	__raw_writel(reg, md->base + MDIOC_REG);
+	udelay(50);
+	ret = __raw_readw(md->base + MDIOD_REG);
+
+	return ret;
+}
+
+static int bcm6368_mdiomux_write(struct mii_bus *bus, int phy_id, int loc,
+				 uint16_t val)
+{
+	struct bcm6368_mdiomux_desc *md = bus->priv;
+	uint32_t reg;
+
+	__raw_writel(0, md->base + MDIOC_REG);
+
+	reg = MDIOC_WR_MASK |
+	      (phy_id << MDIOC_PHYID_SHIFT) |
+	      (loc << MDIOC_REG_SHIFT);
+	if (md->ext_phy)
+		reg |= MDIOC_EXT_MASK;
+	reg |= val;
+
+	__raw_writel(reg, md->base + MDIOC_REG);
+	udelay(50);
+
+	return 0;
+}
+
+static int bcm6368_mdiomux_switch_fn(int current_child, int desired_child,
+				     void *data)
+{
+	struct bcm6368_mdiomux_desc *md = data;
+
+	md->ext_phy = desired_child;
+
+	return 0;
+}
+
+static int bcm6368_mdiomux_probe(struct platform_device *pdev)
+{
+	struct bcm6368_mdiomux_desc *md;
+	struct mii_bus *bus;
+	struct resource *res;
+	int rc;
+
+	md = devm_kzalloc(&pdev->dev, sizeof(*md), GFP_KERNEL);
+	if (!md)
+		return -ENOMEM;
+	md->dev = &pdev->dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -EINVAL;
+
+	/*
+	 * Just ioremap, as this MDIO block is usually integrated into an
+	 * Ethernet MAC controller register range
+	 */
+	md->base = devm_ioremap(&pdev->dev, res->start, resource_size(res));
+	if (!md->base) {
+		dev_err(&pdev->dev, "failed to ioremap register\n");
+		return -ENOMEM;
+	}
+
+	md->mii_bus = devm_mdiobus_alloc(&pdev->dev);
+	if (!md->mii_bus) {
+		dev_err(&pdev->dev, "mdiomux bus alloc failed\n");
+		return ENOMEM;
+	}
+
+	bus = md->mii_bus;
+	bus->priv = md;
+	bus->name = "BCM6368 MDIO mux bus";
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%s-%d", pdev->name, pdev->id);
+	bus->parent = &pdev->dev;
+	bus->read = bcm6368_mdiomux_read;
+	bus->write = bcm6368_mdiomux_write;
+	bus->phy_mask = 0x3f;
+	bus->dev.of_node = pdev->dev.of_node;
+
+	rc = mdiobus_register(bus);
+	if (rc) {
+		dev_err(&pdev->dev, "mdiomux registration failed\n");
+		return rc;
+	}
+
+	platform_set_drvdata(pdev, md);
+
+	rc = mdio_mux_init(md->dev, md->dev->of_node,
+			   bcm6368_mdiomux_switch_fn, &md->mux_handle, md,
+			   md->mii_bus);
+	if (rc) {
+		dev_info(md->dev, "mdiomux initialization failed\n");
+		goto out_register;
+	}
+
+	dev_info(&pdev->dev, "Broadcom BCM6368 MDIO mux bus\n");
+
+	return 0;
+
+out_register:
+	mdiobus_unregister(bus);
+	return rc;
+}
+
+static int bcm6368_mdiomux_remove(struct platform_device *pdev)
+{
+	struct bcm6368_mdiomux_desc *md = platform_get_drvdata(pdev);
+
+	mdio_mux_uninit(md->mux_handle);
+	mdiobus_unregister(md->mii_bus);
+
+	return 0;
+}
+
+static const struct of_device_id bcm6368_mdiomux_ids[] = {
+	{ .compatible = "brcm,bcm6368-mdio-mux", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, bcm6368_mdiomux_ids);
+
+static struct platform_driver bcm6368_mdiomux_driver = {
+	.driver = {
+		.name = "bcm6368-mdio-mux",
+		.of_match_table = bcm6368_mdiomux_ids,
+	},
+	.probe	= bcm6368_mdiomux_probe,
+	.remove	= bcm6368_mdiomux_remove,
+};
+module_platform_driver(bcm6368_mdiomux_driver);
+
+MODULE_AUTHOR("Álvaro Fernández Rojas <noltari@gmail.com>");
+MODULE_DESCRIPTION("BCM6368 mdiomux bus controller driver");
+MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3


From 5a30833b9a16f8d1aa15de06636f9317ca51f9df Mon Sep 17 00:00:00 2001
From: DENG Qingfang <dqfext@gmail.com>
Date: Tue, 16 Mar 2021 01:09:40 +0800
Subject: net: dsa: mt7530: support MDB and bridge flag operations

Support port MDB and bridge flag operations.

As the hardware can manage multicast forwarding itself, offload_fwd_mark
can be unconditionally set to true.

Signed-off-by: DENG Qingfang <dqfext@gmail.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mt7530.c | 124 +++++++++++++++++++++++++++++++++++++++++++++--
 drivers/net/dsa/mt7530.h |   1 +
 net/dsa/tag_mtk.c        |  14 +-----
 3 files changed, 122 insertions(+), 17 deletions(-)

diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index f06f5fa2f898..127856823a3b 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -999,8 +999,9 @@ mt753x_cpu_port_enable(struct dsa_switch *ds, int port)
 	mt7530_write(priv, MT7530_PVC_P(port),
 		     PORT_SPEC_TAG);
 
-	/* Unknown multicast frame forwarding to the cpu port */
-	mt7530_rmw(priv, MT7530_MFC, UNM_FFP_MASK, UNM_FFP(BIT(port)));
+	/* Disable flooding by default */
+	mt7530_rmw(priv, MT7530_MFC, BC_FFP_MASK | UNM_FFP_MASK | UNU_FFP_MASK,
+		   BC_FFP(BIT(port)) | UNM_FFP(BIT(port)) | UNU_FFP(BIT(port)));
 
 	/* Set CPU port number */
 	if (priv->id == ID_MT7621)
@@ -1137,6 +1138,56 @@ mt7530_stp_state_set(struct dsa_switch *ds, int port, u8 state)
 	mt7530_rmw(priv, MT7530_SSP_P(port), FID_PST_MASK, stp_state);
 }
 
+static int
+mt7530_port_pre_bridge_flags(struct dsa_switch *ds, int port,
+			     struct switchdev_brport_flags flags,
+			     struct netlink_ext_ack *extack)
+{
+	if (flags.mask & ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD |
+			   BR_BCAST_FLOOD))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int
+mt7530_port_bridge_flags(struct dsa_switch *ds, int port,
+			 struct switchdev_brport_flags flags,
+			 struct netlink_ext_ack *extack)
+{
+	struct mt7530_priv *priv = ds->priv;
+
+	if (flags.mask & BR_LEARNING)
+		mt7530_rmw(priv, MT7530_PSC_P(port), SA_DIS,
+			   flags.val & BR_LEARNING ? 0 : SA_DIS);
+
+	if (flags.mask & BR_FLOOD)
+		mt7530_rmw(priv, MT7530_MFC, UNU_FFP(BIT(port)),
+			   flags.val & BR_FLOOD ? UNU_FFP(BIT(port)) : 0);
+
+	if (flags.mask & BR_MCAST_FLOOD)
+		mt7530_rmw(priv, MT7530_MFC, UNM_FFP(BIT(port)),
+			   flags.val & BR_MCAST_FLOOD ? UNM_FFP(BIT(port)) : 0);
+
+	if (flags.mask & BR_BCAST_FLOOD)
+		mt7530_rmw(priv, MT7530_MFC, BC_FFP(BIT(port)),
+			   flags.val & BR_BCAST_FLOOD ? BC_FFP(BIT(port)) : 0);
+
+	return 0;
+}
+
+static int
+mt7530_port_set_mrouter(struct dsa_switch *ds, int port, bool mrouter,
+			struct netlink_ext_ack *extack)
+{
+	struct mt7530_priv *priv = ds->priv;
+
+	mt7530_rmw(priv, MT7530_MFC, UNM_FFP(BIT(port)),
+		   mrouter ? UNM_FFP(BIT(port)) : 0);
+
+	return 0;
+}
+
 static int
 mt7530_port_bridge_join(struct dsa_switch *ds, int port,
 			struct net_device *bridge)
@@ -1348,6 +1399,59 @@ err:
 	return 0;
 }
 
+static int
+mt7530_port_mdb_add(struct dsa_switch *ds, int port,
+		    const struct switchdev_obj_port_mdb *mdb)
+{
+	struct mt7530_priv *priv = ds->priv;
+	const u8 *addr = mdb->addr;
+	u16 vid = mdb->vid;
+	u8 port_mask = 0;
+	int ret;
+
+	mutex_lock(&priv->reg_mutex);
+
+	mt7530_fdb_write(priv, vid, 0, addr, 0, STATIC_EMP);
+	if (!mt7530_fdb_cmd(priv, MT7530_FDB_READ, NULL))
+		port_mask = (mt7530_read(priv, MT7530_ATRD) >> PORT_MAP)
+			    & PORT_MAP_MASK;
+
+	port_mask |= BIT(port);
+	mt7530_fdb_write(priv, vid, port_mask, addr, -1, STATIC_ENT);
+	ret = mt7530_fdb_cmd(priv, MT7530_FDB_WRITE, NULL);
+
+	mutex_unlock(&priv->reg_mutex);
+
+	return ret;
+}
+
+static int
+mt7530_port_mdb_del(struct dsa_switch *ds, int port,
+		    const struct switchdev_obj_port_mdb *mdb)
+{
+	struct mt7530_priv *priv = ds->priv;
+	const u8 *addr = mdb->addr;
+	u16 vid = mdb->vid;
+	u8 port_mask = 0;
+	int ret;
+
+	mutex_lock(&priv->reg_mutex);
+
+	mt7530_fdb_write(priv, vid, 0, addr, 0, STATIC_EMP);
+	if (!mt7530_fdb_cmd(priv, MT7530_FDB_READ, NULL))
+		port_mask = (mt7530_read(priv, MT7530_ATRD) >> PORT_MAP)
+			    & PORT_MAP_MASK;
+
+	port_mask &= ~BIT(port);
+	mt7530_fdb_write(priv, vid, port_mask, addr, -1,
+			 port_mask ? STATIC_ENT : STATIC_EMP);
+	ret = mt7530_fdb_cmd(priv, MT7530_FDB_WRITE, NULL);
+
+	mutex_unlock(&priv->reg_mutex);
+
+	return ret;
+}
+
 static int
 mt7530_vlan_cmd(struct mt7530_priv *priv, enum mt7530_vlan_cmd cmd, u16 vid)
 {
@@ -1820,9 +1924,12 @@ mt7530_setup(struct dsa_switch *ds)
 			ret = mt753x_cpu_port_enable(ds, i);
 			if (ret)
 				return ret;
-		} else
+		} else {
 			mt7530_port_disable(ds, i);
 
+			/* Disable learning by default on all user ports */
+			mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
+		}
 		/* Enable consistent egress tag */
 		mt7530_rmw(priv, MT7530_PVC_P(i), PVC_EG_TAG_MASK,
 			   PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
@@ -1984,9 +2091,13 @@ mt7531_setup(struct dsa_switch *ds)
 			ret = mt753x_cpu_port_enable(ds, i);
 			if (ret)
 				return ret;
-		} else
+		} else {
 			mt7530_port_disable(ds, i);
 
+			/* Disable learning by default on all user ports */
+			mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
+		}
+
 		/* Enable consistent egress tag */
 		mt7530_rmw(priv, MT7530_PVC_P(i), PVC_EG_TAG_MASK,
 			   PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
@@ -2708,11 +2819,16 @@ static const struct dsa_switch_ops mt7530_switch_ops = {
 	.port_change_mtu	= mt7530_port_change_mtu,
 	.port_max_mtu		= mt7530_port_max_mtu,
 	.port_stp_state_set	= mt7530_stp_state_set,
+	.port_pre_bridge_flags	= mt7530_port_pre_bridge_flags,
+	.port_bridge_flags	= mt7530_port_bridge_flags,
+	.port_set_mrouter	= mt7530_port_set_mrouter,
 	.port_bridge_join	= mt7530_port_bridge_join,
 	.port_bridge_leave	= mt7530_port_bridge_leave,
 	.port_fdb_add		= mt7530_port_fdb_add,
 	.port_fdb_del		= mt7530_port_fdb_del,
 	.port_fdb_dump		= mt7530_port_fdb_dump,
+	.port_mdb_add		= mt7530_port_mdb_add,
+	.port_mdb_del		= mt7530_port_mdb_del,
 	.port_vlan_filtering	= mt7530_port_vlan_filtering,
 	.port_vlan_add		= mt7530_port_vlan_add,
 	.port_vlan_del		= mt7530_port_vlan_del,
diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h
index 64a9bb377e15..ec36ea5dfd57 100644
--- a/drivers/net/dsa/mt7530.h
+++ b/drivers/net/dsa/mt7530.h
@@ -34,6 +34,7 @@ enum mt753x_id {
 /* Registers to mac forward control for unknown frames */
 #define MT7530_MFC			0x10
 #define  BC_FFP(x)			(((x) & 0xff) << 24)
+#define  BC_FFP_MASK			BC_FFP(~0)
 #define  UNM_FFP(x)			(((x) & 0xff) << 16)
 #define  UNM_FFP_MASK			UNM_FFP(~0)
 #define  UNU_FFP(x)			(((x) & 0xff) << 8)
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index 59748487664f..f9b2966d1936 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -24,9 +24,6 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
 	struct dsa_port *dp = dsa_slave_to_port(dev);
 	u8 xmit_tpid;
 	u8 *mtk_tag;
-	unsigned char *dest = eth_hdr(skb)->h_dest;
-	bool is_multicast_skb = is_multicast_ether_addr(dest) &&
-				!is_broadcast_ether_addr(dest);
 
 	/* Build the special tag after the MAC Source Address. If VLAN header
 	 * is present, it's required that VLAN header and special tag is
@@ -55,10 +52,6 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
 	mtk_tag[0] = xmit_tpid;
 	mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK;
 
-	/* Disable SA learning for multicast frames */
-	if (unlikely(is_multicast_skb))
-		mtk_tag[1] |= MTK_HDR_XMIT_SA_DIS;
-
 	/* Tag control information is kept for 802.1Q */
 	if (xmit_tpid == MTK_HDR_XMIT_UNTAGGED) {
 		mtk_tag[2] = 0;
@@ -74,9 +67,6 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
 	u16 hdr;
 	int port;
 	__be16 *phdr;
-	unsigned char *dest = eth_hdr(skb)->h_dest;
-	bool is_multicast_skb = is_multicast_ether_addr(dest) &&
-				!is_broadcast_ether_addr(dest);
 
 	if (unlikely(!pskb_may_pull(skb, MTK_HDR_LEN)))
 		return NULL;
@@ -102,9 +92,7 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
 	if (!skb->dev)
 		return NULL;
 
-	/* Only unicast or broadcast frames are offloaded */
-	if (likely(!is_multicast_skb))
-		skb->offload_fwd_mark = 1;
+	skb->offload_fwd_mark = 1;
 
 	return skb;
 }
-- 
cgit v1.2.3


From 6aa2c371c729a3e3f8c7d4e08e0ff10e706b81d3 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Mon, 15 Mar 2021 19:13:41 +0200
Subject: net: bridge: mcast: remove unreachable EHT code

In the initial EHT versions there were common functions which handled
allow/block messages for both INCLUDE and EXCLUDE modes, but later they
were separated. It seems I've left some common code which cannot be
reached because the filter mode is checked before calling the respective
functions, i.e. the host filter is always in EXCLUDE mode when using
__eht_allow_excl() and __eht_block_excl() thus we can drop the host_excl
checks inside and simplify the code a bit.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast_eht.c | 57 ++++++++++++-------------------------------
 1 file changed, 15 insertions(+), 42 deletions(-)

diff --git a/net/bridge/br_multicast_eht.c b/net/bridge/br_multicast_eht.c
index fea38b9a7268..982398e44658 100644
--- a/net/bridge/br_multicast_eht.c
+++ b/net/bridge/br_multicast_eht.c
@@ -522,31 +522,24 @@ static bool __eht_allow_excl(struct net_bridge_port_group *pg,
 			     u32 nsrcs,
 			     size_t addr_size)
 {
-	bool changed = false, host_excl = false;
 	union net_bridge_eht_addr eht_src_addr;
 	struct net_bridge_group_src *src_ent;
+	bool changed = false;
 	struct br_ip src_ip;
 	u32 src_idx;
 
-	host_excl = !!(br_multicast_eht_host_filter_mode(pg, h_addr) == MCAST_EXCLUDE);
 	memset(&eht_src_addr, 0, sizeof(eht_src_addr));
 	for (src_idx = 0; src_idx < nsrcs; src_idx++) {
 		memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
-		if (!host_excl) {
-			br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr,
-							  MCAST_INCLUDE,
-							  false);
-		} else {
-			if (!br_multicast_del_eht_set_entry(pg, &eht_src_addr,
-							    h_addr))
-				continue;
-			memcpy(&src_ip, srcs + (src_idx * addr_size), addr_size);
-			src_ent = br_multicast_find_group_src(pg, &src_ip);
-			if (!src_ent)
-				continue;
-			br_multicast_del_group_src(src_ent, true);
-			changed = true;
-		}
+		if (!br_multicast_del_eht_set_entry(pg, &eht_src_addr,
+						    h_addr))
+			continue;
+		memcpy(&src_ip, srcs + (src_idx * addr_size), addr_size);
+		src_ent = br_multicast_find_group_src(pg, &src_ip);
+		if (!src_ent)
+			continue;
+		br_multicast_del_group_src(src_ent, true);
+		changed = true;
 	}
 
 	return changed;
@@ -602,42 +595,22 @@ static bool __eht_block_incl(struct net_bridge_port_group *pg,
 	return changed;
 }
 
-static bool __eht_block_excl(struct net_bridge_port_group *pg,
+static void __eht_block_excl(struct net_bridge_port_group *pg,
 			     union net_bridge_eht_addr *h_addr,
 			     void *srcs,
 			     u32 nsrcs,
 			     size_t addr_size)
 {
-	bool changed = false, host_excl = false;
 	union net_bridge_eht_addr eht_src_addr;
-	struct net_bridge_group_src *src_ent;
-	struct br_ip src_ip;
 	u32 src_idx;
 
-	host_excl = !!(br_multicast_eht_host_filter_mode(pg, h_addr) == MCAST_EXCLUDE);
 	memset(&eht_src_addr, 0, sizeof(eht_src_addr));
-	memset(&src_ip, 0, sizeof(src_ip));
-	src_ip.proto = pg->key.addr.proto;
 	for (src_idx = 0; src_idx < nsrcs; src_idx++) {
 		memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
-		if (host_excl) {
-			br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr,
-							  MCAST_EXCLUDE,
-							  false);
-		} else {
-			if (!br_multicast_del_eht_set_entry(pg, &eht_src_addr,
-							    h_addr))
-				continue;
-			memcpy(&src_ip, srcs + (src_idx * addr_size), addr_size);
-			src_ent = br_multicast_find_group_src(pg, &src_ip);
-			if (!src_ent)
-				continue;
-			br_multicast_del_group_src(src_ent, true);
-			changed = true;
-		}
+		br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr,
+						  MCAST_EXCLUDE,
+						  false);
 	}
-
-	return changed;
 }
 
 static bool br_multicast_eht_block(struct net_bridge_port_group *pg,
@@ -653,7 +626,7 @@ static bool br_multicast_eht_block(struct net_bridge_port_group *pg,
 		changed = __eht_block_incl(pg, h_addr, srcs, nsrcs, addr_size);
 		break;
 	case MCAST_EXCLUDE:
-		changed = __eht_block_excl(pg, h_addr, srcs, nsrcs, addr_size);
+		__eht_block_excl(pg, h_addr, srcs, nsrcs, addr_size);
 		break;
 	}
 
-- 
cgit v1.2.3


From e09cf582059ef4c1e5c496d6494fe4e26482530f Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Mon, 15 Mar 2021 19:13:42 +0200
Subject: net: bridge: mcast: factor out common allow/block EHT handling

We hande EHT state change for ALLOW messages in INCLUDE mode and for
BLOCK messages in EXCLUDE mode similarly - create the new set entries
with the proper filter mode. We also handle EHT state change for ALLOW
messages in EXCLUDE mode and for BLOCK messages in INCLUDE mode in a
similar way - delete the common entries (current set and new set).
Factor out all the common code as follows:
 - ALLOW/INCLUDE, BLOCK/EXCLUDE: call __eht_create_set_entries()
 - ALLOW/EXCLUDE, BLOCK/INCLUDE: call __eht_del_common_set_entries()

The set entries creation can be reused in __eht_inc_exc() as well.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast_eht.c | 98 ++++++++++++-------------------------------
 1 file changed, 27 insertions(+), 71 deletions(-)

diff --git a/net/bridge/br_multicast_eht.c b/net/bridge/br_multicast_eht.c
index 982398e44658..13290a749d09 100644
--- a/net/bridge/br_multicast_eht.c
+++ b/net/bridge/br_multicast_eht.c
@@ -498,11 +498,13 @@ static void br_multicast_del_eht_host(struct net_bridge_port_group *pg,
 					       &set_h->h_addr);
 }
 
-static void __eht_allow_incl(struct net_bridge_port_group *pg,
-			     union net_bridge_eht_addr *h_addr,
-			     void *srcs,
-			     u32 nsrcs,
-			     size_t addr_size)
+/* create new set entries from reports */
+static void __eht_create_set_entries(struct net_bridge_port_group *pg,
+				     union net_bridge_eht_addr *h_addr,
+				     void *srcs,
+				     u32 nsrcs,
+				     size_t addr_size,
+				     int filter_mode)
 {
 	union net_bridge_eht_addr eht_src_addr;
 	u32 src_idx;
@@ -511,16 +513,17 @@ static void __eht_allow_incl(struct net_bridge_port_group *pg,
 	for (src_idx = 0; src_idx < nsrcs; src_idx++) {
 		memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
 		br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr,
-						  MCAST_INCLUDE,
+						  filter_mode,
 						  false);
 	}
 }
 
-static bool __eht_allow_excl(struct net_bridge_port_group *pg,
-			     union net_bridge_eht_addr *h_addr,
-			     void *srcs,
-			     u32 nsrcs,
-			     size_t addr_size)
+/* delete existing set entries and their (S,G) entries if they were the last */
+static bool __eht_del_set_entries(struct net_bridge_port_group *pg,
+				  union net_bridge_eht_addr *h_addr,
+				  void *srcs,
+				  u32 nsrcs,
+				  size_t addr_size)
 {
 	union net_bridge_eht_addr eht_src_addr;
 	struct net_bridge_group_src *src_ent;
@@ -529,10 +532,11 @@ static bool __eht_allow_excl(struct net_bridge_port_group *pg,
 	u32 src_idx;
 
 	memset(&eht_src_addr, 0, sizeof(eht_src_addr));
+	memset(&src_ip, 0, sizeof(src_ip));
+	src_ip.proto = pg->key.addr.proto;
 	for (src_idx = 0; src_idx < nsrcs; src_idx++) {
 		memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
-		if (!br_multicast_del_eht_set_entry(pg, &eht_src_addr,
-						    h_addr))
+		if (!br_multicast_del_eht_set_entry(pg, &eht_src_addr, h_addr))
 			continue;
 		memcpy(&src_ip, srcs + (src_idx * addr_size), addr_size);
 		src_ent = br_multicast_find_group_src(pg, &src_ip);
@@ -555,64 +559,18 @@ static bool br_multicast_eht_allow(struct net_bridge_port_group *pg,
 
 	switch (br_multicast_eht_host_filter_mode(pg, h_addr)) {
 	case MCAST_INCLUDE:
-		__eht_allow_incl(pg, h_addr, srcs, nsrcs, addr_size);
+		__eht_create_set_entries(pg, h_addr, srcs, nsrcs, addr_size,
+					 MCAST_INCLUDE);
 		break;
 	case MCAST_EXCLUDE:
-		changed = __eht_allow_excl(pg, h_addr, srcs, nsrcs, addr_size);
+		changed = __eht_del_set_entries(pg, h_addr, srcs, nsrcs,
+						addr_size);
 		break;
 	}
 
 	return changed;
 }
 
-static bool __eht_block_incl(struct net_bridge_port_group *pg,
-			     union net_bridge_eht_addr *h_addr,
-			     void *srcs,
-			     u32 nsrcs,
-			     size_t addr_size)
-{
-	union net_bridge_eht_addr eht_src_addr;
-	struct net_bridge_group_src *src_ent;
-	bool changed = false;
-	struct br_ip src_ip;
-	u32 src_idx;
-
-	memset(&eht_src_addr, 0, sizeof(eht_src_addr));
-	memset(&src_ip, 0, sizeof(src_ip));
-	src_ip.proto = pg->key.addr.proto;
-	for (src_idx = 0; src_idx < nsrcs; src_idx++) {
-		memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
-		if (!br_multicast_del_eht_set_entry(pg, &eht_src_addr, h_addr))
-			continue;
-		memcpy(&src_ip, srcs + (src_idx * addr_size), addr_size);
-		src_ent = br_multicast_find_group_src(pg, &src_ip);
-		if (!src_ent)
-			continue;
-		br_multicast_del_group_src(src_ent, true);
-		changed = true;
-	}
-
-	return changed;
-}
-
-static void __eht_block_excl(struct net_bridge_port_group *pg,
-			     union net_bridge_eht_addr *h_addr,
-			     void *srcs,
-			     u32 nsrcs,
-			     size_t addr_size)
-{
-	union net_bridge_eht_addr eht_src_addr;
-	u32 src_idx;
-
-	memset(&eht_src_addr, 0, sizeof(eht_src_addr));
-	for (src_idx = 0; src_idx < nsrcs; src_idx++) {
-		memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
-		br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr,
-						  MCAST_EXCLUDE,
-						  false);
-	}
-}
-
 static bool br_multicast_eht_block(struct net_bridge_port_group *pg,
 				   union net_bridge_eht_addr *h_addr,
 				   void *srcs,
@@ -623,10 +581,12 @@ static bool br_multicast_eht_block(struct net_bridge_port_group *pg,
 
 	switch (br_multicast_eht_host_filter_mode(pg, h_addr)) {
 	case MCAST_INCLUDE:
-		changed = __eht_block_incl(pg, h_addr, srcs, nsrcs, addr_size);
+		changed = __eht_del_set_entries(pg, h_addr, srcs, nsrcs,
+						addr_size);
 		break;
 	case MCAST_EXCLUDE:
-		__eht_block_excl(pg, h_addr, srcs, nsrcs, addr_size);
+		__eht_create_set_entries(pg, h_addr, srcs, nsrcs, addr_size,
+					 MCAST_EXCLUDE);
 		break;
 	}
 
@@ -644,7 +604,6 @@ static bool __eht_inc_exc(struct net_bridge_port_group *pg,
 {
 	bool changed = false, flush_entries = to_report;
 	union net_bridge_eht_addr eht_src_addr;
-	u32 src_idx;
 
 	if (br_multicast_eht_host_filter_mode(pg, h_addr) != filter_mode)
 		flush_entries = true;
@@ -653,11 +612,8 @@ static bool __eht_inc_exc(struct net_bridge_port_group *pg,
 	/* if we're changing mode del host and its entries */
 	if (flush_entries)
 		br_multicast_del_eht_host(pg, h_addr);
-	for (src_idx = 0; src_idx < nsrcs; src_idx++) {
-		memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
-		br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr,
-						  filter_mode, false);
-	}
+	__eht_create_set_entries(pg, h_addr, srcs, nsrcs, addr_size,
+				 filter_mode);
 	/* we can be missing sets only if we've deleted some entries */
 	if (flush_entries) {
 		struct net_bridge *br = pg->key.port->br;
-- 
cgit v1.2.3


From ba3b86b9cef0c72ae78173f2c4db8a08bf4d3770 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 5 Mar 2021 00:30:02 +0100
Subject: s390/bpf: Implement new atomic ops

Implement BPF_AND, BPF_OR and BPF_XOR as the existing BPF_ADD. Since
the corresponding machine instructions return the old value, BPF_FETCH
happens by itself, the only additional thing that is required is
zero-extension.

There is no single instruction that implements BPF_XCHG on s390, so use
a COMPARE AND SWAP loop.

BPF_CMPXCHG, on the other hand, can be implemented by a single COMPARE
AND SWAP. Zero-extension is automatically inserted by the verifier.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210304233002.149096-1-iii@linux.ibm.com
---
 arch/s390/net/bpf_jit_comp.c | 64 +++++++++++++++++++++++++++++++++++++-------
 1 file changed, 55 insertions(+), 9 deletions(-)

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index f973e2ead197..63cae0476bb4 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -1209,21 +1209,67 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 	 */
 	case BPF_STX | BPF_ATOMIC | BPF_DW:
 	case BPF_STX | BPF_ATOMIC | BPF_W:
-		if (insn->imm != BPF_ADD) {
+	{
+		bool is32 = BPF_SIZE(insn->code) == BPF_W;
+
+		switch (insn->imm) {
+/* {op32|op64} {%w0|%src},%src,off(%dst) */
+#define EMIT_ATOMIC(op32, op64) do {					\
+	EMIT6_DISP_LH(0xeb000000, is32 ? (op32) : (op64),		\
+		      (insn->imm & BPF_FETCH) ? src_reg : REG_W0,	\
+		      src_reg, dst_reg, off);				\
+	if (is32 && (insn->imm & BPF_FETCH))				\
+		EMIT_ZERO(src_reg);					\
+} while (0)
+		case BPF_ADD:
+		case BPF_ADD | BPF_FETCH:
+			/* {laal|laalg} */
+			EMIT_ATOMIC(0x00fa, 0x00ea);
+			break;
+		case BPF_AND:
+		case BPF_AND | BPF_FETCH:
+			/* {lan|lang} */
+			EMIT_ATOMIC(0x00f4, 0x00e4);
+			break;
+		case BPF_OR:
+		case BPF_OR | BPF_FETCH:
+			/* {lao|laog} */
+			EMIT_ATOMIC(0x00f6, 0x00e6);
+			break;
+		case BPF_XOR:
+		case BPF_XOR | BPF_FETCH:
+			/* {lax|laxg} */
+			EMIT_ATOMIC(0x00f7, 0x00e7);
+			break;
+#undef EMIT_ATOMIC
+		case BPF_XCHG:
+			/* {ly|lg} %w0,off(%dst) */
+			EMIT6_DISP_LH(0xe3000000,
+				      is32 ? 0x0058 : 0x0004, REG_W0, REG_0,
+				      dst_reg, off);
+			/* 0: {csy|csg} %w0,%src,off(%dst) */
+			EMIT6_DISP_LH(0xeb000000, is32 ? 0x0014 : 0x0030,
+				      REG_W0, src_reg, dst_reg, off);
+			/* brc 4,0b */
+			EMIT4_PCREL_RIC(0xa7040000, 4, jit->prg - 6);
+			/* {llgfr|lgr} %src,%w0 */
+			EMIT4(is32 ? 0xb9160000 : 0xb9040000, src_reg, REG_W0);
+			if (is32 && insn_is_zext(&insn[1]))
+				insn_count = 2;
+			break;
+		case BPF_CMPXCHG:
+			/* 0: {csy|csg} %b0,%src,off(%dst) */
+			EMIT6_DISP_LH(0xeb000000, is32 ? 0x0014 : 0x0030,
+				      BPF_REG_0, src_reg, dst_reg, off);
+			break;
+		default:
 			pr_err("Unknown atomic operation %02x\n", insn->imm);
 			return -1;
 		}
 
-		/* *(u32/u64 *)(dst + off) += src
-		 *
-		 * BFW_W:  laal  %w0,%src,off(%dst)
-		 * BPF_DW: laalg %w0,%src,off(%dst)
-		 */
-		EMIT6_DISP_LH(0xeb000000,
-			      BPF_SIZE(insn->code) == BPF_W ? 0x00fa : 0x00ea,
-			      REG_W0, src_reg, dst_reg, off);
 		jit->seen |= SEEN_MEM;
 		break;
+	}
 	/*
 	 * BPF_LDX
 	 */
-- 
cgit v1.2.3


From 6bd45f2e78f31bde335f7720e570a07331031110 Mon Sep 17 00:00:00 2001
From: Liu xuzhi <liu.xuzhi@zte.com.cn>
Date: Thu, 11 Mar 2021 04:31:03 -0800
Subject: kernel/bpf/: Fix misspellings using codespell tool

A typo is found out by codespell tool in 34th lines of hashtab.c:

$ codespell ./kernel/bpf/
./hashtab.c:34 : differrent ==> different

Fix a typo found by codespell.

Signed-off-by: Liu xuzhi <liu.xuzhi@zte.com.cn>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210311123103.323589-1-liu.xuzhi@zte.com.cn
---
 kernel/bpf/hashtab.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 330d721dd2af..d7ebb12ffffc 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -31,7 +31,7 @@
 /*
  * The bucket lock has two protection scopes:
  *
- * 1) Serializing concurrent operations from BPF programs on differrent
+ * 1) Serializing concurrent operations from BPF programs on different
  *    CPUs
  *
  * 2) Serializing concurrent operations from BPF programs and sys_bpf()
-- 
cgit v1.2.3


From 4d0b93896ff8d6795ad8d1b604e41aa850d5a635 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Thu, 11 Mar 2021 13:15:05 +0000
Subject: bpf: Make symbol 'bpf_task_storage_busy' static

The sparse tool complains as follows:

kernel/bpf/bpf_task_storage.c:23:1: warning:
 symbol '__pcpu_scope_bpf_task_storage_busy' was not declared. Should it be static?

This symbol is not used outside of bpf_task_storage.c, so this
commit marks it static.

Fixes: bc235cdb423a ("bpf: Prevent deadlock from recursive bpf_task_storage_[get|delete]")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20210311131505.1901509-1-weiyongjun1@huawei.com
---
 kernel/bpf/bpf_task_storage.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index fd3c74ef608e..3ce75758d394 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -20,7 +20,7 @@
 
 DEFINE_BPF_STORAGE_CACHE(task_cache);
 
-DEFINE_PER_CPU(int, bpf_task_storage_busy);
+static DEFINE_PER_CPU(int, bpf_task_storage_busy);
 
 static void bpf_task_storage_lock(void)
 {
-- 
cgit v1.2.3


From dde7b3f5f2f458297aeccfd4783e53ab8ca046db Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Sat, 13 Mar 2021 13:09:17 -0800
Subject: libbpf: Add explicit padding to bpf_xdp_set_link_opts

Adding such anonymous padding fixes the issue with uninitialized portions of
bpf_xdp_set_link_opts when using LIBBPF_DECLARE_OPTS macro with inline field
initialization:

DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1);

When such code is compiled in debug mode, compiler is generating code that
leaves padding bytes uninitialized, which triggers error inside libbpf APIs
that do strict zero initialization checks for OPTS structs.

Adding anonymous padding field fixes the issue.

Fixes: bd5ca3ef93cd ("libbpf: Add function to set link XDP fd while specifying old program")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210313210920.1959628-2-andrii@kernel.org
---
 tools/lib/bpf/libbpf.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 3c35eb401931..3d690d4e785c 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -507,6 +507,7 @@ struct xdp_link_info {
 struct bpf_xdp_set_link_opts {
 	size_t sz;
 	int old_fd;
+	size_t :0;
 };
 #define bpf_xdp_set_link_opts__last_field old_fd
 
-- 
cgit v1.2.3


From 4bbb3583687051ef99966ddaeb1730441b777d40 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Sat, 13 Mar 2021 13:09:18 -0800
Subject: bpftool: Fix maybe-uninitialized warnings

Somehow when bpftool is compiled in -Og mode, compiler produces new warnings
about possibly uninitialized variables. Fix all the reported problems.

Fixes: 2119f2189df1 ("bpftool: add C output format option to btf dump subcommand")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210313210920.1959628-3-andrii@kernel.org
---
 tools/bpf/bpftool/btf.c  | 3 +++
 tools/bpf/bpftool/main.c | 3 +--
 tools/bpf/bpftool/map.c  | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 985610c3f193..62953bbf68b4 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -546,6 +546,7 @@ static int do_dump(int argc, char **argv)
 			NEXT_ARG();
 			if (argc < 1) {
 				p_err("expecting value for 'format' option\n");
+				err = -EINVAL;
 				goto done;
 			}
 			if (strcmp(*argv, "c") == 0) {
@@ -555,11 +556,13 @@ static int do_dump(int argc, char **argv)
 			} else {
 				p_err("unrecognized format specifier: '%s', possible values: raw, c",
 				      *argv);
+				err = -EINVAL;
 				goto done;
 			}
 			NEXT_ARG();
 		} else {
 			p_err("unrecognized option: '%s'", *argv);
+			err = -EINVAL;
 			goto done;
 		}
 	}
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index b86f450e6fce..d9afb730136a 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -276,7 +276,7 @@ static int do_batch(int argc, char **argv)
 	int n_argc;
 	FILE *fp;
 	char *cp;
-	int err;
+	int err = 0;
 	int i;
 
 	if (argc < 2) {
@@ -370,7 +370,6 @@ static int do_batch(int argc, char **argv)
 	} else {
 		if (!json_output)
 			printf("processed %d commands\n", lines);
-		err = 0;
 	}
 err_close:
 	if (fp != stdin)
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index b400364ee054..09ae0381205b 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -100,7 +100,7 @@ static int do_dump_btf(const struct btf_dumper *d,
 		       void *value)
 {
 	__u32 value_id;
-	int ret;
+	int ret = 0;
 
 	/* start of key-value pair */
 	jsonw_start_object(d->jw);
-- 
cgit v1.2.3


From 105b842ba4ef2ddc287645cb33f90b960c16075b Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Sat, 13 Mar 2021 13:09:19 -0800
Subject: selftests/bpf: Fix maybe-uninitialized warning in xdpxceiver test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

xsk_ring_prod__reserve() doesn't necessarily set idx in some conditions, so
from static analysis point of view compiler is right about the problems like:

In file included from xdpxceiver.c:92:
xdpxceiver.c: In function ‘xsk_populate_fill_ring’:
/data/users/andriin/linux/tools/testing/selftests/bpf/tools/include/bpf/xsk.h:119:20: warning: ‘idx’ may be used uninitialized in this function [-Wmaybe-uninitialized]
  return &addrs[idx & fill->mask];
                ~~~~^~~~~~~~~~~~
xdpxceiver.c:300:6: note: ‘idx’ was declared here
  u32 idx;
      ^~~
xdpxceiver.c: In function ‘tx_only’:
xdpxceiver.c:596:30: warning: ‘idx’ may be used uninitialized in this function [-Wmaybe-uninitialized]
   struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
                              ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Fix two warnings reported by compiler by pre-initializing variable.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210313210920.1959628-4-andrii@kernel.org
---
 tools/testing/selftests/bpf/xdpxceiver.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 8b0f7fdd9003..1e21a3172687 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -297,7 +297,7 @@ static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size)
 static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
 {
 	int ret, i;
-	u32 idx;
+	u32 idx = 0;
 
 	ret = xsk_ring_prod__reserve(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx);
 	if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
@@ -584,7 +584,7 @@ static void rx_pkt(struct xsk_socket_info *xsk, struct pollfd *fds)
 
 static void tx_only(struct xsk_socket_info *xsk, u32 *frameptr, int batch_size)
 {
-	u32 idx;
+	u32 idx = 0;
 	unsigned int i;
 	bool tx_invalid_test = stat_test_type == STAT_TEST_TX_INVALID;
 	u32 len = tx_invalid_test ? XSK_UMEM__DEFAULT_FRAME_SIZE + 1 : PKT_SIZE;
-- 
cgit v1.2.3


From 252e3cbf2b623422b359b965b94060e8d68597e1 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Sat, 13 Mar 2021 13:09:20 -0800
Subject: selftests/bpf: Build everything in debug mode

Build selftests, bpftool, and libbpf in debug mode with DWARF data to
facilitate easier debugging.

In terms of impact on building and running selftests. Build is actually faster
now:

BEFORE: make -j60  380.21s user 37.87s system 1466% cpu 28.503 total
AFTER:  make -j60  345.47s user 37.37s system 1599% cpu 23.939 total

test_progs runtime seems to be the same:

BEFORE:
real    1m5.139s
user    0m1.600s
sys     0m43.977s

AFTER:
real    1m3.799s
user    0m1.721s
sys     0m42.420s

Huge difference is being able to debug issues throughout test_progs, bpftool,
and libbpf without constantly updating 3 Makefiles by hand (including GDB
seeing the source code without any extra incantations).

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210313210920.1959628-5-andrii@kernel.org
---
 tools/testing/selftests/bpf/Makefile | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index c3999587bc23..d0db2b673c6f 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -21,7 +21,7 @@ endif
 
 BPF_GCC		?= $(shell command -v bpf-gcc;)
 SAN_CFLAGS	?=
-CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) $(SAN_CFLAGS)		\
+CFLAGS += -g -Og -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS)		\
 	  -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR)		\
 	  -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT)			\
 	  -Dbpf_prog_load=bpf_prog_test_load				\
@@ -201,6 +201,7 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile)    \
 		    $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool
 	$(Q)$(MAKE) $(submake_extras)  -C $(BPFTOOLDIR)			       \
 		    CC=$(HOSTCC) LD=$(HOSTLD)				       \
+		    EXTRA_CFLAGS='-g -Og'				       \
 		    OUTPUT=$(HOST_BUILD_DIR)/bpftool/			       \
 		    prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install
 
@@ -218,6 +219,7 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)		       \
 	   ../../../include/uapi/linux/bpf.h                                   \
 	   | $(INCLUDE_DIR) $(BUILD_DIR)/libbpf
 	$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
+		    EXTRA_CFLAGS='-g -Og'					       \
 		    DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
 
 ifneq ($(BPFOBJ),$(HOST_BPFOBJ))
@@ -225,7 +227,8 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)                \
 	   ../../../include/uapi/linux/bpf.h                                   \
 	   | $(INCLUDE_DIR) $(HOST_BUILD_DIR)/libbpf
 	$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR)                             \
-		OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD)     \
+		    EXTRA_CFLAGS='-g -Og'					       \
+		    OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \
 		    DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers
 endif
 
-- 
cgit v1.2.3


From f105f26e456040ac5ea451ffed02fe4c48f36ac7 Mon Sep 17 00:00:00 2001
From: Yejune Deng <yejune.deng@gmail.com>
Date: Tue, 16 Mar 2021 10:57:36 +0800
Subject: net: ipv4: route.c: simplify procfs code

proc_creat_seq() that directly take a struct seq_operations,
and deal with network namespaces in ->open.

Signed-off-by: Yejune Deng <yejune.deng@gmail.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 34 ++++------------------------------
 1 file changed, 4 insertions(+), 30 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 350bf500bb6d..0470442ff61d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -234,19 +234,6 @@ static const struct seq_operations rt_cache_seq_ops = {
 	.show   = rt_cache_seq_show,
 };
 
-static int rt_cache_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &rt_cache_seq_ops);
-}
-
-static const struct proc_ops rt_cache_proc_ops = {
-	.proc_open	= rt_cache_seq_open,
-	.proc_read	= seq_read,
-	.proc_lseek	= seq_lseek,
-	.proc_release	= seq_release,
-};
-
-
 static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	int cpu;
@@ -324,19 +311,6 @@ static const struct seq_operations rt_cpu_seq_ops = {
 	.show   = rt_cpu_seq_show,
 };
 
-
-static int rt_cpu_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &rt_cpu_seq_ops);
-}
-
-static const struct proc_ops rt_cpu_proc_ops = {
-	.proc_open	= rt_cpu_seq_open,
-	.proc_read	= seq_read,
-	.proc_lseek	= seq_lseek,
-	.proc_release	= seq_release,
-};
-
 #ifdef CONFIG_IP_ROUTE_CLASSID
 static int rt_acct_proc_show(struct seq_file *m, void *v)
 {
@@ -367,13 +341,13 @@ static int __net_init ip_rt_do_proc_init(struct net *net)
 {
 	struct proc_dir_entry *pde;
 
-	pde = proc_create("rt_cache", 0444, net->proc_net,
-			  &rt_cache_proc_ops);
+	pde = proc_create_seq("rt_cache", 0444, net->proc_net,
+			      &rt_cache_seq_ops);
 	if (!pde)
 		goto err1;
 
-	pde = proc_create("rt_cache", 0444,
-			  net->proc_net_stat, &rt_cpu_proc_ops);
+	pde = proc_create_seq("rt_cache", 0444, net->proc_net_stat,
+			      &rt_cpu_seq_ops);
 	if (!pde)
 		goto err2;
 
-- 
cgit v1.2.3


From 0f455371054bd40a69199868af99b80d1f3c4796 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 16 Mar 2021 13:24:08 +0200
Subject: Documentation: networking: update the graphical representation

While preparing some slides for a customer presentation, I found the
existing high-level view to be a bit confusing, so I modified it a
little bit.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Tobias Waldekranz <tobias@waldekranz.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dsa/dsa.rst | 45 ++++++++++++++++++++++--------------
 1 file changed, 28 insertions(+), 17 deletions(-)

diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst
index e9517af5fe02..e20fbad2241a 100644
--- a/Documentation/networking/dsa/dsa.rst
+++ b/Documentation/networking/dsa/dsa.rst
@@ -172,23 +172,34 @@ Graphical representation
 Summarized, this is basically how DSA looks like from a network device
 perspective::
 
-
-                |---------------------------
-                | CPU network device (eth0)|
-                ----------------------------
-                | <tag added by switch     |
-                |                          |
-                |                          |
-                |        tag added by CPU> |
-        |--------------------------------------------|
-        |            Switch driver                   |
-        |--------------------------------------------|
-                  ||        ||         ||
-              |-------|  |-------|  |-------|
-              | sw0p0 |  | sw0p1 |  | sw0p2 |
-              |-------|  |-------|  |-------|
-
-
+                Unaware application
+              opens and binds socket
+                       |  ^
+                       |  |
+           +-----------v--|--------------------+
+           |+------+ +------+ +------+ +------+|
+           || swp0 | | swp1 | | swp2 | | swp3 ||
+           |+------+-+------+-+------+-+------+|
+           |          DSA switch driver        |
+           +-----------------------------------+
+                         |        ^
+            Tag added by |        | Tag consumed by
+           switch driver |        | switch driver
+                         v        |
+           +-----------------------------------+
+           | Unmodified host interface driver  | Software
+   --------+-----------------------------------+------------
+           |       Host interface (eth0)       | Hardware
+           +-----------------------------------+
+                         |        ^
+         Tag consumed by |        | Tag added by
+         switch hardware |        | switch hardware
+                         v        |
+           +-----------------------------------+
+           |               Switch              |
+           |+------+ +------+ +------+ +------+|
+           || swp0 | | swp1 | | swp2 | | swp3 ||
+           ++------+-+------+-+------+-+------++
 
 Slave MDIO bus
 --------------
-- 
cgit v1.2.3


From 7714ee152cd41bb5d2f725662ab3080a0af1aa91 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 16 Mar 2021 13:24:09 +0200
Subject: Documentation: networking: dsa: rewrite chapter about tagging
 protocol

The chapter about tagging protocols is out of date because it doesn't
mention all taggers that have been added since last documentation
update. But judging based on that, it will always tend to lag behind,
and there's no good reason why we would enumerate the supported
hardware. Instead we could do something more useful and explain what
there is to know about tagging protocols instead.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Tobias Waldekranz <tobias@waldekranz.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dsa/dsa.rst | 148 +++++++++++++++++++++++++++++++++--
 1 file changed, 140 insertions(+), 8 deletions(-)

diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst
index e20fbad2241a..17d1422ac085 100644
--- a/Documentation/networking/dsa/dsa.rst
+++ b/Documentation/networking/dsa/dsa.rst
@@ -65,14 +65,8 @@ Note that DSA does not currently create network interfaces for the "cpu" and
 Switch tagging protocols
 ------------------------
 
-DSA currently supports 5 different tagging protocols, and a tag-less mode as
-well. The different protocols are implemented in:
-
-- ``net/dsa/tag_trailer.c``: Marvell's 4 trailer tag mode (legacy)
-- ``net/dsa/tag_dsa.c``: Marvell's original DSA tag
-- ``net/dsa/tag_edsa.c``: Marvell's enhanced DSA tag
-- ``net/dsa/tag_brcm.c``: Broadcom's 4 bytes tag
-- ``net/dsa/tag_qca.c``: Qualcomm's 2 bytes tag
+DSA supports many vendor-specific tagging protocols, one software-defined
+tagging protocol, and a tag-less mode as well (``DSA_TAG_PROTO_NONE``).
 
 The exact format of the tag protocol is vendor specific, but in general, they
 all contain something which:
@@ -80,6 +74,144 @@ all contain something which:
 - identifies which port the Ethernet frame came from/should be sent to
 - provides a reason why this frame was forwarded to the management interface
 
+All tagging protocols are in ``net/dsa/tag_*.c`` files and implement the
+methods of the ``struct dsa_device_ops`` structure, which are detailed below.
+
+Tagging protocols generally fall in one of three categories:
+
+1. The switch-specific frame header is located before the Ethernet header,
+   shifting to the right (from the perspective of the DSA master's frame
+   parser) the MAC DA, MAC SA, EtherType and the entire L2 payload.
+2. The switch-specific frame header is located before the EtherType, keeping
+   the MAC DA and MAC SA in place from the DSA master's perspective, but
+   shifting the 'real' EtherType and L2 payload to the right.
+3. The switch-specific frame header is located at the tail of the packet,
+   keeping all frame headers in place and not altering the view of the packet
+   that the DSA master's frame parser has.
+
+A tagging protocol may tag all packets with switch tags of the same length, or
+the tag length might vary (for example packets with PTP timestamps might
+require an extended switch tag, or there might be one tag length on TX and a
+different one on RX). Either way, the tagging protocol driver must populate the
+``struct dsa_device_ops::overhead`` with the length in octets of the longest
+switch frame header. The DSA framework will automatically adjust the MTU of the
+master interface to accomodate for this extra size in order for DSA user ports
+to support the standard MTU (L2 payload length) of 1500 octets. The ``overhead``
+is also used to request from the network stack, on a best-effort basis, the
+allocation of packets with a ``needed_headroom`` or ``needed_tailroom``
+sufficient such that the act of pushing the switch tag on transmission of a
+packet does not cause it to reallocate due to lack of memory.
+
+Even though applications are not expected to parse DSA-specific frame headers,
+the format on the wire of the tagging protocol represents an Application Binary
+Interface exposed by the kernel towards user space, for decoders such as
+``libpcap``. The tagging protocol driver must populate the ``proto`` member of
+``struct dsa_device_ops`` with a value that uniquely describes the
+characteristics of the interaction required between the switch hardware and the
+data path driver: the offset of each bit field within the frame header and any
+stateful processing required to deal with the frames (as may be required for
+PTP timestamping).
+
+From the perspective of the network stack, all switches within the same DSA
+switch tree use the same tagging protocol. In case of a packet transiting a
+fabric with more than one switch, the switch-specific frame header is inserted
+by the first switch in the fabric that the packet was received on. This header
+typically contains information regarding its type (whether it is a control
+frame that must be trapped to the CPU, or a data frame to be forwarded).
+Control frames should be decapsulated only by the software data path, whereas
+data frames might also be autonomously forwarded towards other user ports of
+other switches from the same fabric, and in this case, the outermost switch
+ports must decapsulate the packet.
+
+Note that in certain cases, it might be the case that the tagging format used
+by a leaf switch (not connected directly to the CPU) to not be the same as what
+the network stack sees. This can be seen with Marvell switch trees, where the
+CPU port can be configured to use either the DSA or the Ethertype DSA (EDSA)
+format, but the DSA links are configured to use the shorter (without Ethertype)
+DSA frame header, in order to reduce the autonomous packet forwarding overhead.
+It still remains the case that, if the DSA switch tree is configured for the
+EDSA tagging protocol, the operating system sees EDSA-tagged packets from the
+leaf switches that tagged them with the shorter DSA header. This can be done
+because the Marvell switch connected directly to the CPU is configured to
+perform tag translation between DSA and EDSA (which is simply the operation of
+adding or removing the ``ETH_P_EDSA`` EtherType and some padding octets).
+
+It is possible to construct cascaded setups of DSA switches even if their
+tagging protocols are not compatible with one another. In this case, there are
+no DSA links in this fabric, and each switch constitutes a disjoint DSA switch
+tree. The DSA links are viewed as simply a pair of a DSA master (the out-facing
+port of the upstream DSA switch) and a CPU port (the in-facing port of the
+downstream DSA switch).
+
+The tagging protocol of the attached DSA switch tree can be viewed through the
+``dsa/tagging`` sysfs attribute of the DSA master::
+
+    cat /sys/class/net/eth0/dsa/tagging
+
+If the hardware and driver are capable, the tagging protocol of the DSA switch
+tree can be changed at runtime. This is done by writing the new tagging
+protocol name to the same sysfs device attribute as above (the DSA master and
+all attached switch ports must be down while doing this).
+
+It is desirable that all tagging protocols are testable with the ``dsa_loop``
+mockup driver, which can be attached to any network interface. The goal is that
+any network interface should be capable of transmitting the same packet in the
+same way, and the tagger should decode the same received packet in the same way
+regardless of the driver used for the switch control path, and the driver used
+for the DSA master.
+
+The transmission of a packet goes through the tagger's ``xmit`` function.
+The passed ``struct sk_buff *skb`` has ``skb->data`` pointing at
+``skb_mac_header(skb)``, i.e. at the destination MAC address, and the passed
+``struct net_device *dev`` represents the virtual DSA user network interface
+whose hardware counterpart the packet must be steered to (i.e. ``swp0``).
+The job of this method is to prepare the skb in a way that the switch will
+understand what egress port the packet is for (and not deliver it towards other
+ports). Typically this is fulfilled by pushing a frame header. Checking for
+insufficient size in the skb headroom or tailroom is unnecessary provided that
+the ``overhead`` and ``tail_tag`` properties were filled out properly, because
+DSA ensures there is enough space before calling this method.
+
+The reception of a packet goes through the tagger's ``rcv`` function. The
+passed ``struct sk_buff *skb`` has ``skb->data`` pointing at
+``skb_mac_header(skb) + ETH_ALEN`` octets, i.e. to where the first octet after
+the EtherType would have been, were this frame not tagged. The role of this
+method is to consume the frame header, adjust ``skb->data`` to really point at
+the first octet after the EtherType, and to change ``skb->dev`` to point to the
+virtual DSA user network interface corresponding to the physical front-facing
+switch port that the packet was received on.
+
+Since tagging protocols in category 1 and 2 break software (and most often also
+hardware) packet dissection on the DSA master, features such as RPS (Receive
+Packet Steering) on the DSA master would be broken. The DSA framework deals
+with this by hooking into the flow dissector and shifting the offset at which
+the IP header is to be found in the tagged frame as seen by the DSA master.
+This behavior is automatic based on the ``overhead`` value of the tagging
+protocol. If not all packets are of equal size, the tagger can implement the
+``flow_dissect`` method of the ``struct dsa_device_ops`` and override this
+default behavior by specifying the correct offset incurred by each individual
+RX packet. Tail taggers do not cause issues to the flow dissector.
+
+Due to various reasons (most common being category 1 taggers being associated
+with DSA-unaware masters, mangling what the master perceives as MAC DA), the
+tagging protocol may require the DSA master to operate in promiscuous mode, to
+receive all frames regardless of the value of the MAC DA. This can be done by
+setting the ``promisc_on_master`` property of the ``struct dsa_device_ops``.
+Note that this assumes a DSA-unaware master driver, which is the norm.
+
+Hardware manufacturers are strongly discouraged to do this, but some tagging
+protocols might not provide source port information on RX for all packets, but
+e.g. only for control traffic (link-local PDUs). In this case, by implementing
+the ``filter`` method of ``struct dsa_device_ops``, the tagger might select
+which packets are to be redirected on RX towards the virtual DSA user network
+interfaces, and which are to be left in the DSA master's RX data path.
+
+It might also happen (although silicon vendors are strongly discouraged to
+produce hardware like this) that a tagging protocol splits the switch-specific
+information into a header portion and a tail portion, therefore not falling
+cleanly into any of the above 3 categories. DSA does not support this
+configuration.
+
 Master network devices
 ----------------------
 
-- 
cgit v1.2.3


From f23f1404ebd37b0b0b120180361db3867359cc0c Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 16 Mar 2021 13:24:10 +0200
Subject: Documentation: networking: dsa: remove static port count from
 limitations

After Vivien's series from 2019 containing commits 27d4d19d7c82 ("net:
dsa: remove limitation of switch index value") and ab8ccae122a4 ("net:
dsa: add ports list in the switch fabric"), this is basically no longer
true.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dsa/dsa.rst | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst
index 17d1422ac085..3bca80a53a86 100644
--- a/Documentation/networking/dsa/dsa.rst
+++ b/Documentation/networking/dsa/dsa.rst
@@ -382,14 +382,6 @@ DSA data structures are defined in ``include/net/dsa.h`` as well as
 Design limitations
 ==================
 
-Limits on the number of devices and ports
------------------------------------------
-
-DSA currently limits the number of maximum switches within a tree to 4
-(``DSA_MAX_SWITCHES``), and the number of ports per switch to 12 (``DSA_MAX_PORTS``).
-These limits could be extended to support larger configurations would this need
-arise.
-
 Lack of CPU/DSA network devices
 -------------------------------
 
@@ -719,7 +711,6 @@ two subsystems and get the best of both worlds.
 Other hanging fruits
 --------------------
 
-- making the number of ports fully dynamic and not dependent on ``DSA_MAX_PORTS``
 - allowing more than one CPU/management interface:
   http://comments.gmane.org/gmane.linux.network/365657
 - porting more drivers from other vendors:
-- 
cgit v1.2.3


From f88439918589332a2c5feef225b9644873cd0769 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 16 Mar 2021 13:24:11 +0200
Subject: Documentation: networking: dsa: remove references to switchdev
 prepare/commit

After the recent series containing commit bae33f2b5afe ("net: switchdev:
remove the transaction structure from port attributes"), there aren't
prepare/commit transactional phases anymore in most of the switchdev
objects/attributes, and as a result, there aren't any in the DSA driver
API either. So remove this piece.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dsa/dsa.rst | 30 ++++++++----------------------
 1 file changed, 8 insertions(+), 22 deletions(-)

diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst
index 3bca80a53a86..ced2eb6d647a 100644
--- a/Documentation/networking/dsa/dsa.rst
+++ b/Documentation/networking/dsa/dsa.rst
@@ -452,14 +452,8 @@ SWITCHDEV
 
 DSA directly utilizes SWITCHDEV when interfacing with the bridge layer, and
 more specifically with its VLAN filtering portion when configuring VLANs on top
-of per-port slave network devices. Since DSA primarily deals with
-MDIO-connected switches, although not exclusively, SWITCHDEV's
-prepare/abort/commit phases are often simplified into a prepare phase which
-checks whether the operation is supported by the DSA switch driver, and a commit
-phase which applies the changes.
-
-As of today, the only SWITCHDEV objects supported by DSA are the FDB and VLAN
-objects.
+of per-port slave network devices. As of today, the only SWITCHDEV objects
+supported by DSA are the FDB and VLAN objects.
 
 Device Tree
 -----------
@@ -638,14 +632,10 @@ Bridge VLAN filtering
   accept any 802.1Q frames irrespective of their VLAN ID, and untagged frames are
   allowed.
 
-- ``port_vlan_prepare``: bridge layer function invoked when the bridge prepares the
-  configuration of a VLAN on the given port. If the operation is not supported
-  by the hardware, this function should return ``-EOPNOTSUPP`` to inform the bridge
-  code to fallback to a software implementation. No hardware setup must be done
-  in this function. See port_vlan_add for this and details.
-
 - ``port_vlan_add``: bridge layer function invoked when a VLAN is configured
-  (tagged or untagged) for the given switch port
+  (tagged or untagged) for the given switch port. If the operation is not
+  supported by the hardware, this function should return ``-EOPNOTSUPP`` to
+  inform the bridge code to fallback to a software implementation.
 
 - ``port_vlan_del``: bridge layer function invoked when a VLAN is removed from the
   given switch port
@@ -673,14 +663,10 @@ Bridge VLAN filtering
   function that the driver has to call for each MAC address known to be behind
   the given port. A switchdev object is used to carry the VID and FDB info.
 
-- ``port_mdb_prepare``: bridge layer function invoked when the bridge prepares the
-  installation of a multicast database entry. If the operation is not supported,
-  this function should return ``-EOPNOTSUPP`` to inform the bridge code to fallback
-  to a software implementation. No hardware setup must be done in this function.
-  See ``port_fdb_add`` for this and details.
-
 - ``port_mdb_add``: bridge layer function invoked when the bridge wants to install
-  a multicast database entry, the switch hardware should be programmed with the
+  a multicast database entry. If the operation is not supported, this function
+  should return ``-EOPNOTSUPP`` to inform the bridge code to fallback to a
+  software implementation. The switch hardware should be programmed with the
   specified address in the specified VLAN ID in the forwarding database
   associated with this VLAN ID.
 
-- 
cgit v1.2.3


From f4b5c53a03ea76b2abfd71b45aa7363fd2ad0cc1 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 16 Mar 2021 13:24:12 +0200
Subject: Documentation: networking: dsa: remove TODO about porting more vendor
 drivers

On one hand, the link is dead and therefore useless.

On the other hand, there are always more drivers to port, but at this
stage, DSA does not need to affirm itself as the driver model to use for
Ethernet-connected switches (since we already have 15 tagging protocols
supported and probably more switch families from various vendors), so
there is nothing actionable to do.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dsa/dsa.rst | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst
index ced2eb6d647a..b90a852e5329 100644
--- a/Documentation/networking/dsa/dsa.rst
+++ b/Documentation/networking/dsa/dsa.rst
@@ -699,5 +699,3 @@ Other hanging fruits
 
 - allowing more than one CPU/management interface:
   http://comments.gmane.org/gmane.linux.network/365657
-- porting more drivers from other vendors:
-  http://comments.gmane.org/gmane.linux.network/365510
-- 
cgit v1.2.3


From 5a275f4c2989f6f1fab626c61d34001f28381a18 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 16 Mar 2021 13:24:13 +0200
Subject: Documentation: networking: dsa: document the port_bridge_flags method

The documentation was already lagging behind by not mentioning the old
version of port_bridge_flags (port_set_egress_floods). So now we are
skipping one step and just explaining how a DSA driver should configure
address learning and flooding settings.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dsa/dsa.rst | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst
index b90a852e5329..9c287dfd3c45 100644
--- a/Documentation/networking/dsa/dsa.rst
+++ b/Documentation/networking/dsa/dsa.rst
@@ -619,6 +619,17 @@ Bridge layer
   computing a STP state change based on current and asked parameters and perform
   the relevant ageing based on the intersection results
 
+- ``port_bridge_flags``: bridge layer function invoked when a port must
+  configure its settings for e.g. flooding of unknown traffic or source address
+  learning. The switch driver is responsible for initial setup of the
+  standalone ports with address learning disabled and egress flooding of all
+  types of traffic, then the DSA core notifies of any change to the bridge port
+  flags when the port joins and leaves a bridge. DSA does not currently manage
+  the bridge port flags for the CPU port. The assumption is that address
+  learning should be statically enabled (if supported by the hardware) on the
+  CPU port, and flooding towards the CPU port should also be enabled, due to a
+  lack of an explicit address filtering mechanism in the DSA core.
+
 Bridge VLAN filtering
 ---------------------
 
-- 
cgit v1.2.3


From 8411abbcad8e73a3b3a27ff6c0d755c6036f4c77 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 16 Mar 2021 13:24:14 +0200
Subject: Documentation: networking: dsa: mention integration with devlink

Add a short summary of the devlink features supported by the DSA core.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dsa/dsa.rst | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst
index 9c287dfd3c45..af604fe976b3 100644
--- a/Documentation/networking/dsa/dsa.rst
+++ b/Documentation/networking/dsa/dsa.rst
@@ -416,6 +416,7 @@ DSA currently leverages the following subsystems:
 - MDIO/PHY library: ``drivers/net/phy/phy.c``, ``mdio_bus.c``
 - Switchdev:``net/switchdev/*``
 - Device Tree for various of_* functions
+- Devlink: ``net/core/devlink.c``
 
 MDIO/PHY library
 ----------------
@@ -455,6 +456,36 @@ more specifically with its VLAN filtering portion when configuring VLANs on top
 of per-port slave network devices. As of today, the only SWITCHDEV objects
 supported by DSA are the FDB and VLAN objects.
 
+Devlink
+-------
+
+DSA registers one devlink device per physical switch in the fabric.
+For each devlink device, every physical port (i.e. user ports, CPU ports, DSA
+links or unused ports) is exposed as a devlink port.
+
+DSA drivers can make use of the following devlink features:
+- Regions: debugging feature which allows user space to dump driver-defined
+  areas of hardware information in a low-level, binary format. Both global
+  regions as well as per-port regions are supported. It is possible to export
+  devlink regions even for pieces of data that are already exposed in some way
+  to the standard iproute2 user space programs (ip-link, bridge), like address
+  tables and VLAN tables. For example, this might be useful if the tables
+  contain additional hardware-specific details which are not visible through
+  the iproute2 abstraction, or it might be useful to inspect these tables on
+  the non-user ports too, which are invisible to iproute2 because no network
+  interface is registered for them.
+- Params: a feature which enables user to configure certain low-level tunable
+  knobs pertaining to the device. Drivers may implement applicable generic
+  devlink params, or may add new device-specific devlink params.
+- Resources: a monitoring feature which enables users to see the degree of
+  utilization of certain hardware tables in the device, such as FDB, VLAN, etc.
+- Shared buffers: a QoS feature for adjusting and partitioning memory and frame
+  reservations per port and per traffic class, in the ingress and egress
+  directions, such that low-priority bulk traffic does not impede the
+  processing of high-priority critical traffic.
+
+For more details, consult ``Documentation/networking/devlink/``.
+
 Device Tree
 -----------
 
-- 
cgit v1.2.3


From a9985444f2b5b6e60c7b1faeceebe58367a08915 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 16 Mar 2021 13:24:15 +0200
Subject: Documentation: networking: dsa: add paragraph for the LAG offload

Add a short summary of the methods that a driver writer must implement
for offloading a link aggregation group, and what is still missing.

Cc: Tobias Waldekranz <tobias@waldekranz.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Tobias Waldekranz <tobias@waldekranz.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dsa/dsa.rst | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst
index af604fe976b3..e8576e81735c 100644
--- a/Documentation/networking/dsa/dsa.rst
+++ b/Documentation/networking/dsa/dsa.rst
@@ -724,6 +724,39 @@ Bridge VLAN filtering
   function that the driver has to call for each MAC address known to be behind
   the given port. A switchdev object is used to carry the VID and MDB info.
 
+Link aggregation
+----------------
+
+Link aggregation is implemented in the Linux networking stack by the bonding
+and team drivers, which are modeled as virtual, stackable network interfaces.
+DSA is capable of offloading a link aggregation group (LAG) to hardware that
+supports the feature, and supports bridging between physical ports and LAGs,
+as well as between LAGs. A bonding/team interface which holds multiple physical
+ports constitutes a logical port, although DSA has no explicit concept of a
+logical port at the moment. Due to this, events where a LAG joins/leaves a
+bridge are treated as if all individual physical ports that are members of that
+LAG join/leave the bridge. Switchdev port attributes (VLAN filtering, STP
+state, etc) and objects (VLANs, MDB entries) offloaded to a LAG as bridge port
+are treated similarly: DSA offloads the same switchdev object / port attribute
+on all members of the LAG. Static bridge FDB entries on a LAG are not yet
+supported, since the DSA driver API does not have the concept of a logical port
+ID.
+
+- ``port_lag_join``: function invoked when a given switch port is added to a
+  LAG. The driver may return ``-EOPNOTSUPP``, and in this case, DSA will fall
+  back to a software implementation where all traffic from this port is sent to
+  the CPU.
+- ``port_lag_leave``: function invoked when a given switch port leaves a LAG
+  and returns to operation as a standalone port.
+- ``port_lag_change``: function invoked when the link state of any member of
+  the LAG changes, and the hashing function needs rebalancing to only make use
+  of the subset of physical LAG member ports that are up.
+
+Drivers that benefit from having an ID associated with each offloaded LAG
+can optionally populate ``ds->num_lag_ids`` from the ``dsa_switch_ops::setup``
+method. The LAG ID associated with a bonding/team interface can then be
+retrieved by a DSA switch driver using the ``dsa_lag_id`` function.
+
 TODO
 ====
 
-- 
cgit v1.2.3


From f8f3c20af1ea27989e01dfc11a3136b380eb6120 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 16 Mar 2021 13:24:16 +0200
Subject: Documentation: networking: dsa: add paragraph for the MRP offload

Add a short summary of the methods that a driver writer must implement
for getting an MRP instance to work on top of a DSA switch.

Cc: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dsa/dsa.rst | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst
index e8576e81735c..0daafa2fb9eb 100644
--- a/Documentation/networking/dsa/dsa.rst
+++ b/Documentation/networking/dsa/dsa.rst
@@ -757,6 +757,36 @@ can optionally populate ``ds->num_lag_ids`` from the ``dsa_switch_ops::setup``
 method. The LAG ID associated with a bonding/team interface can then be
 retrieved by a DSA switch driver using the ``dsa_lag_id`` function.
 
+IEC 62439-2 (MRP)
+-----------------
+
+The Media Redundancy Protocol is a topology management protocol optimized for
+fast fault recovery time for ring networks, which has some components
+implemented as a function of the bridge driver. MRP uses management PDUs
+(Test, Topology, LinkDown/Up, Option) sent at a multicast destination MAC
+address range of 01:15:4e:00:00:0x and with an EtherType of 0x88e3.
+Depending on the node's role in the ring (MRM: Media Redundancy Manager,
+MRC: Media Redundancy Client, MRA: Media Redundancy Automanager), certain MRP
+PDUs might need to be terminated locally and others might need to be forwarded.
+An MRM might also benefit from offloading to hardware the creation and
+transmission of certain MRP PDUs (Test).
+
+Normally an MRP instance can be created on top of any network interface,
+however in the case of a device with an offloaded data path such as DSA, it is
+necessary for the hardware, even if it is not MRP-aware, to be able to extract
+the MRP PDUs from the fabric before the driver can proceed with the software
+implementation. DSA today has no driver which is MRP-aware, therefore it only
+listens for the bare minimum switchdev objects required for the software assist
+to work properly. The operations are detailed below.
+
+- ``port_mrp_add`` and ``port_mrp_del``: notifies driver when an MRP instance
+  with a certain ring ID, priority, primary port and secondary port is
+  created/deleted.
+- ``port_mrp_add_ring_role`` and ``port_mrp_del_ring_role``: function invoked
+  when an MRP instance changes ring roles between MRM or MRC. This affects
+  which MRP PDUs should be trapped to software and which should be autonomously
+  forwarded.
+
 TODO
 ====
 
-- 
cgit v1.2.3


From 6e9530f4c0429ac4d8f58743f047cb67a7e74c35 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 16 Mar 2021 13:24:17 +0200
Subject: Documentation: networking: dsa: add paragraph for the HSR/PRP offload

Add a short summary of the methods that a driver writer must implement
for offloading a HSR/PRP network interface.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: George McCollister <george.mccollister@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dsa/dsa.rst | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst
index 0daafa2fb9eb..69040e11ee5e 100644
--- a/Documentation/networking/dsa/dsa.rst
+++ b/Documentation/networking/dsa/dsa.rst
@@ -787,6 +787,38 @@ to work properly. The operations are detailed below.
   which MRP PDUs should be trapped to software and which should be autonomously
   forwarded.
 
+IEC 62439-3 (HSR/PRP)
+---------------------
+
+The Parallel Redundancy Protocol (PRP) is a network redundancy protocol which
+works by duplicating and sequence numbering packets through two independent L2
+networks (which are unaware of the PRP tail tags carried in the packets), and
+eliminating the duplicates at the receiver. The High-availability Seamless
+Redundancy (HSR) protocol is similar in concept, except all nodes that carry
+the redundant traffic are aware of the fact that it is HSR-tagged (because HSR
+uses a header with an EtherType of 0x892f) and are physically connected in a
+ring topology. Both HSR and PRP use supervision frames for monitoring the
+health of the network and for discovery of other nodes.
+
+In Linux, both HSR and PRP are implemented in the hsr driver, which
+instantiates a virtual, stackable network interface with two member ports.
+The driver only implements the basic roles of DANH (Doubly Attached Node
+implementing HSR) and DANP (Doubly Attached Node implementing PRP); the roles
+of RedBox and QuadBox are not implemented (therefore, bridging a hsr network
+interface with a physical switch port does not produce the expected result).
+
+A driver which is able of offloading certain functions of a DANP or DANH should
+declare the corresponding netdev features as indicated by the documentation at
+``Documentation/networking/netdev-features.rst``. Additionally, the following
+methods must be implemented:
+
+- ``port_hsr_join``: function invoked when a given switch port is added to a
+  DANP/DANH. The driver may return ``-EOPNOTSUPP`` and in this case, DSA will
+  fall back to a software implementation where all traffic from this port is
+  sent to the CPU.
+- ``port_hsr_leave``: function invoked when a given switch port leaves a
+  DANP/DANH and returns to normal operation as a standalone port.
+
 TODO
 ====
 
-- 
cgit v1.2.3


From 0f22ad45f47cca1f0fd564c7bf4a28f481b95f10 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 16 Mar 2021 13:24:18 +0200
Subject: Documentation: networking: switchdev: clarify device driver behavior

This patch provides details on the expected behavior of switchdev
enabled network devices when operating in a "stand alone" mode, as well
as when being bridge members. This clarifies a number of things that
recently came up during a bug fixing session on the b53 DSA switch
driver.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/switchdev.rst | 152 +++++++++++++++++++++++++++++++++
 1 file changed, 152 insertions(+)

diff --git a/Documentation/networking/switchdev.rst b/Documentation/networking/switchdev.rst
index ddc3f35775dc..016531a3d471 100644
--- a/Documentation/networking/switchdev.rst
+++ b/Documentation/networking/switchdev.rst
@@ -385,3 +385,155 @@ The driver can monitor for updates to arp_tbl using the netevent notifier
 NETEVENT_NEIGH_UPDATE.  The device can be programmed with resolved nexthops
 for the routes as arp_tbl updates.  The driver implements ndo_neigh_destroy
 to know when arp_tbl neighbor entries are purged from the port.
+
+Device driver expected behavior
+-------------------------------
+
+Below is a set of defined behavior that switchdev enabled network devices must
+adhere to.
+
+Configuration-less state
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Upon driver bring up, the network devices must be fully operational, and the
+backing driver must configure the network device such that it is possible to
+send and receive traffic to this network device and it is properly separated
+from other network devices/ports (e.g.: as is frequent with a switch ASIC). How
+this is achieved is heavily hardware dependent, but a simple solution can be to
+use per-port VLAN identifiers unless a better mechanism is available
+(proprietary metadata for each network port for instance).
+
+The network device must be capable of running a full IP protocol stack
+including multicast, DHCP, IPv4/6, etc. If necessary, it should program the
+appropriate filters for VLAN, multicast, unicast etc. The underlying device
+driver must effectively be configured in a similar fashion to what it would do
+when IGMP snooping is enabled for IP multicast over these switchdev network
+devices and unsolicited multicast must be filtered as early as possible in
+the hardware.
+
+When configuring VLANs on top of the network device, all VLANs must be working,
+irrespective of the state of other network devices (e.g.: other ports being part
+of a VLAN-aware bridge doing ingress VID checking). See below for details.
+
+If the device implements e.g.: VLAN filtering, putting the interface in
+promiscuous mode should allow the reception of all VLAN tags (including those
+not present in the filter(s)).
+
+Bridged switch ports
+^^^^^^^^^^^^^^^^^^^^
+
+When a switchdev enabled network device is added as a bridge member, it should
+not disrupt any functionality of non-bridged network devices and they
+should continue to behave as normal network devices. Depending on the bridge
+configuration knobs below, the expected behavior is documented.
+
+Bridge VLAN filtering
+^^^^^^^^^^^^^^^^^^^^^
+
+The Linux bridge allows the configuration of a VLAN filtering mode (statically,
+at device creation time, and dynamically, during run time) which must be
+observed by the underlying switchdev network device/hardware:
+
+- with VLAN filtering turned off: the bridge is strictly VLAN unaware and its
+  data path will process all Ethernet frames as if they are VLAN-untagged.
+  The bridge VLAN database can still be modified, but the modifications should
+  have no effect while VLAN filtering is turned off. Frames ingressing the
+  device with a VID that is not programmed into the bridge/switch's VLAN table
+  must be forwarded and may be processed using a VLAN device (see below).
+
+- with VLAN filtering turned on: the bridge is VLAN-aware and frames ingressing
+  the device with a VID that is not programmed into the bridges/switch's VLAN
+  table must be dropped (strict VID checking).
+
+When there is a VLAN device (e.g: sw0p1.100) configured on top of a switchdev
+network device which is a bridge port member, the behavior of the software
+network stack must be preserved, or the configuration must be refused if that
+is not possible.
+
+- with VLAN filtering turned off, the bridge will process all ingress traffic
+  for the port, except for the traffic tagged with a VLAN ID destined for a
+  VLAN upper. The VLAN upper interface (which consumes the VLAN tag) can even
+  be added to a second bridge, which includes other switch ports or software
+  interfaces. Some approaches to ensure that the forwarding domain for traffic
+  belonging to the VLAN upper interfaces are managed properly:
+    * If forwarding destinations can be managed per VLAN, the hardware could be
+      configured to map all traffic, except the packets tagged with a VID
+      belonging to a VLAN upper interface, to an internal VID corresponding to
+      untagged packets. This internal VID spans all ports of the VLAN-unaware
+      bridge. The VID corresponding to the VLAN upper interface spans the
+      physical port of that VLAN interface, as well as the other ports that
+      might be bridged with it.
+    * Treat bridge ports with VLAN upper interfaces as standalone, and let
+      forwarding be handled in the software data path.
+
+- with VLAN filtering turned on, these VLAN devices can be created as long as
+  the bridge does not have an existing VLAN entry with the same VID on any
+  bridge port. These VLAN devices cannot be enslaved into the bridge since they
+  duplicate functionality/use case with the bridge's VLAN data path processing.
+
+Non-bridged network ports of the same switch fabric must not be disturbed in any
+way by the enabling of VLAN filtering on the bridge device(s). If the VLAN
+filtering setting is global to the entire chip, then the standalone ports
+should indicate to the network stack that VLAN filtering is required by setting
+'rx-vlan-filter: on [fixed]' in the ethtool features.
+
+Because VLAN filtering can be turned on/off at runtime, the switchdev driver
+must be able to reconfigure the underlying hardware on the fly to honor the
+toggling of that option and behave appropriately. If that is not possible, the
+switchdev driver can also refuse to support dynamic toggling of the VLAN
+filtering knob at runtime and require a destruction of the bridge device(s) and
+creation of new bridge device(s) with a different VLAN filtering value to
+ensure VLAN awareness is pushed down to the hardware.
+
+Even when VLAN filtering in the bridge is turned off, the underlying switch
+hardware and driver may still configure itself in a VLAN-aware mode provided
+that the behavior described above is observed.
+
+The VLAN protocol of the bridge plays a role in deciding whether a packet is
+treated as tagged or not: a bridge using the 802.1ad protocol must treat both
+VLAN-untagged packets, as well as packets tagged with 802.1Q headers, as
+untagged.
+
+The 802.1p (VID 0) tagged packets must be treated in the same way by the device
+as untagged packets, since the bridge device does not allow the manipulation of
+VID 0 in its database.
+
+When the bridge has VLAN filtering enabled and a PVID is not configured on the
+ingress port, untagged 802.1p tagged packets must be dropped. When the bridge
+has VLAN filtering enabled and a PVID exists on the ingress port, untagged and
+priority-tagged packets must be accepted and forwarded according to the
+bridge's port membership of the PVID VLAN. When the bridge has VLAN filtering
+disabled, the presence/lack of a PVID should not influence the packet
+forwarding decision.
+
+Bridge IGMP snooping
+^^^^^^^^^^^^^^^^^^^^
+
+The Linux bridge allows the configuration of IGMP snooping (statically, at
+interface creation time, or dynamically, during runtime) which must be observed
+by the underlying switchdev network device/hardware in the following way:
+
+- when IGMP snooping is turned off, multicast traffic must be flooded to all
+  ports within the same bridge that have mcast_flood=true. The CPU/management
+  port should ideally not be flooded (unless the ingress interface has
+  IFF_ALLMULTI or IFF_PROMISC) and continue to learn multicast traffic through
+  the network stack notifications. If the hardware is not capable of doing that
+  then the CPU/management port must also be flooded and multicast filtering
+  happens in software.
+
+- when IGMP snooping is turned on, multicast traffic must selectively flow
+  to the appropriate network ports (including CPU/management port). Flooding of
+  unknown multicast should be only towards the ports connected to a multicast
+  router (the local device may also act as a multicast router).
+
+The switch must adhere to RFC 4541 and flood multicast traffic accordingly
+since that is what the Linux bridge implementation does.
+
+Because IGMP snooping can be turned on/off at runtime, the switchdev driver
+must be able to reconfigure the underlying hardware on the fly to honor the
+toggling of that option and behave appropriately.
+
+A switchdev driver can also refuse to support dynamic toggling of the multicast
+snooping knob at runtime and require the destruction of the bridge device(s)
+and creation of a new bridge device(s) with a different multicast snooping
+value.
-- 
cgit v1.2.3


From 787a4109f46847975ffae7d528a55c6b768ef0aa Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 16 Mar 2021 13:24:19 +0200
Subject: Documentation: networking: switchdev: fix command for static FDB
 entries

The "bridge fdb add" command provided in the switchdev documentation is
junk now, not only because it is syntactically incorrect and rejected by
the iproute2 bridge program, but also because it was not updated in
light of Arkadi Sharshevsky's radical switchdev refactoring in commit
29ab586c3d83 ("net: switchdev: Remove bridge bypass support from
switchdev"). Try to explain what the intended usage pattern is with the
new kernel implementation.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/switchdev.rst | 47 +++++++++++++++++++++++++---------
 1 file changed, 35 insertions(+), 12 deletions(-)

diff --git a/Documentation/networking/switchdev.rst b/Documentation/networking/switchdev.rst
index 016531a3d471..1b56367d85ad 100644
--- a/Documentation/networking/switchdev.rst
+++ b/Documentation/networking/switchdev.rst
@@ -181,18 +181,41 @@ To offloading L2 bridging, the switchdev driver/device should support:
 Static FDB Entries
 ^^^^^^^^^^^^^^^^^^
 
-The switchdev driver should implement ndo_fdb_add, ndo_fdb_del and ndo_fdb_dump
-to support static FDB entries installed to the device.  Static bridge FDB
-entries are installed, for example, using iproute2 bridge cmd::
-
-	bridge fdb add ADDR dev DEV [vlan VID] [self]
-
-The driver should use the helper switchdev_port_fdb_xxx ops for ndo_fdb_xxx
-ops, and handle add/delete/dump of SWITCHDEV_OBJ_ID_PORT_FDB object using
-switchdev_port_obj_xxx ops.
-
-XXX: what should be done if offloading this rule to hardware fails (for
-example, due to full capacity in hardware tables) ?
+A driver which implements the ``ndo_fdb_add``, ``ndo_fdb_del`` and
+``ndo_fdb_dump`` operations is able to support the command below, which adds a
+static bridge FDB entry::
+
+        bridge fdb add dev DEV ADDRESS [vlan VID] [self] static
+
+(the "static" keyword is non-optional: if not specified, the entry defaults to
+being "local", which means that it should not be forwarded)
+
+The "self" keyword (optional because it is implicit) has the role of
+instructing the kernel to fulfill the operation through the ``ndo_fdb_add``
+implementation of the ``DEV`` device itself. If ``DEV`` is a bridge port, this
+will bypass the bridge and therefore leave the software database out of sync
+with the hardware one.
+
+To avoid this, the "master" keyword can be used::
+
+        bridge fdb add dev DEV ADDRESS [vlan VID] master static
+
+The above command instructs the kernel to search for a master interface of
+``DEV`` and fulfill the operation through the ``ndo_fdb_add`` method of that.
+This time, the bridge generates a ``SWITCHDEV_FDB_ADD_TO_DEVICE`` notification
+which the port driver can handle and use it to program its hardware table. This
+way, the software and the hardware database will both contain this static FDB
+entry.
+
+Note: for new switchdev drivers that offload the Linux bridge, implementing the
+``ndo_fdb_add`` and ``ndo_fdb_del`` bridge bypass methods is strongly
+discouraged: all static FDB entries should be added on a bridge port using the
+"master" flag. The ``ndo_fdb_dump`` is an exception and can be implemented to
+visualize the hardware tables, if the device does not have an interrupt for
+notifying the operating system of newly learned/forgotten dynamic FDB
+addresses. In that case, the hardware FDB might end up having entries that the
+software FDB does not, and implementing ``ndo_fdb_dump`` is the only way to see
+them.
 
 Note: by default, the bridge does not filter on VLAN and only bridges untagged
 traffic.  To enable VLAN support, turn on VLAN filtering::
-- 
cgit v1.2.3


From 91306d1d131ee20afe6447668ba3a8f13151b9f4 Mon Sep 17 00:00:00 2001
From: Zihao Tang <tangzihao1@hisilicon.com>
Date: Tue, 16 Mar 2021 17:41:06 +0800
Subject: net: ipa: Remove useless error message

Fix the following coccicheck report:

drivers/net/ipa/gsi.c:1341:2-9:
line 1341 is redundant because platform_get_irq() already prints an error

Remove dev_err() messages after platform_get_irq_byname() failures.

Signed-off-by: Zihao Tang <tangzihao1@hisilicon.com>
Signed-off-by: Jay Fang <f.fangjian@huawei.com>
Reviewed-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/gsi.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
index 390d3403386a..2119367b93ea 100644
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -1337,10 +1337,9 @@ static int gsi_irq_init(struct gsi *gsi, struct platform_device *pdev)
 	int ret;
 
 	ret = platform_get_irq_byname(pdev, "gsi");
-	if (ret <= 0) {
-		dev_err(dev, "DT error %d getting \"gsi\" IRQ property\n", ret);
+	if (ret <= 0)
 		return ret ? : -EINVAL;
-	}
+
 	irq = ret;
 
 	ret = request_irq(irq, gsi_isr, 0, "gsi", gsi);
-- 
cgit v1.2.3


From cba0445633bc91508d9231880a69c74cdad0bbb8 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Tue, 16 Mar 2021 16:55:08 +0200
Subject: dpaa2-switch: remove unused ABI functions

Cleanup the dpaa2-switch driver a bit by removing any unused MC firmware
ABI definitions.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h |  7 ---
 drivers/net/ethernet/freescale/dpaa2/dpsw.c     | 70 +------------------------
 drivers/net/ethernet/freescale/dpaa2/dpsw.h     |  6 ---
 3 files changed, 1 insertion(+), 82 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
index eb620e832412..2371fd5c40e3 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
@@ -75,8 +75,6 @@
 #define DPSW_CMDID_FDB_DUMP                 DPSW_CMD_ID(0x08A)
 
 #define DPSW_CMDID_IF_GET_PORT_MAC_ADDR     DPSW_CMD_ID(0x0A7)
-#define DPSW_CMDID_IF_GET_PRIMARY_MAC_ADDR  DPSW_CMD_ID(0x0A8)
-#define DPSW_CMDID_IF_SET_PRIMARY_MAC_ADDR  DPSW_CMD_ID(0x0A9)
 
 #define DPSW_CMDID_CTRL_IF_GET_ATTR         DPSW_CMD_ID(0x0A0)
 #define DPSW_CMDID_CTRL_IF_SET_POOLS        DPSW_CMD_ID(0x0A1)
@@ -443,11 +441,6 @@ struct dpsw_rsp_if_get_mac_addr {
 	u8 mac_addr[6];
 };
 
-struct dpsw_cmd_if_set_mac_addr {
-	__le16 if_id;
-	u8 mac_addr[6];
-};
-
 struct dpsw_cmd_set_egress_flood {
 	__le16 fdb_id;
 	u8 flood_type;
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.c b/drivers/net/ethernet/freescale/dpaa2/dpsw.c
index 5189f156100e..f94874381b69 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.c
@@ -397,7 +397,7 @@ int dpsw_if_set_link_cfg(struct fsl_mc_io *mc_io,
  * @if_id:	Interface id
  * @state:	Link state	1 - linkup, 0 - link down or disconnected
  *
- * @Return	'0' on Success; Error code otherwise.
+ * Return:	'0' on Success; Error code otherwise.
  */
 int dpsw_if_get_link_state(struct fsl_mc_io *mc_io,
 			   u32 cmd_flags,
@@ -1356,74 +1356,6 @@ int dpsw_if_get_port_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
 	return 0;
 }
 
-/**
- * dpsw_if_get_primary_mac_addr()
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @if_id:	Interface Identifier
- * @mac_addr:	MAC address of the physical port, if any, otherwise 0
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_if_get_primary_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags,
-				 u16 token, u16 if_id, u8 mac_addr[6])
-{
-	struct dpsw_rsp_if_get_mac_addr *rsp_params;
-	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_if *cmd_params;
-	int err, i;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_PRIMARY_MAC_ADDR,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_if *)cmd.params;
-	cmd_params->if_id = cpu_to_le16(if_id);
-
-	/* send command to mc*/
-	err = mc_send_command(mc_io, &cmd);
-	if (err)
-		return err;
-
-	/* retrieve response parameters */
-	rsp_params = (struct dpsw_rsp_if_get_mac_addr *)cmd.params;
-	for (i = 0; i < 6; i++)
-		mac_addr[5 - i] = rsp_params->mac_addr[i];
-
-	return 0;
-}
-
-/**
- * dpsw_if_set_primary_mac_addr()
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPSW object
- * @if_id:	Interface Identifier
- * @mac_addr:	MAC address of the physical port, if any, otherwise 0
- *
- * Return:	Completion status. '0' on Success; Error code otherwise.
- */
-int dpsw_if_set_primary_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags,
-				 u16 token, u16 if_id, u8 mac_addr[6])
-{
-	struct dpsw_cmd_if_set_mac_addr *cmd_params;
-	struct fsl_mc_command cmd = { 0 };
-	int i;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_PRIMARY_MAC_ADDR,
-					  cmd_flags,
-					  token);
-	cmd_params = (struct dpsw_cmd_if_set_mac_addr *)cmd.params;
-	cmd_params->if_id = cpu_to_le16(if_id);
-	for (i = 0; i < 6; i++)
-		cmd_params->mac_addr[i] = mac_addr[5 - i];
-
-	/* send command to mc*/
-	return mc_send_command(mc_io, &cmd);
-}
-
 /**
  * dpsw_ctrl_if_enable() - Enable control interface
  * @mc_io:	Pointer to MC portal's I/O object
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.h b/drivers/net/ethernet/freescale/dpaa2/dpsw.h
index 9e04350f3277..e5e9c35604a7 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.h
@@ -707,12 +707,6 @@ int dpsw_get_api_version(struct fsl_mc_io *mc_io,
 int dpsw_if_get_port_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
 			      u16 if_id, u8 mac_addr[6]);
 
-int dpsw_if_get_primary_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags,
-				 u16 token, u16 if_id, u8 mac_addr[6]);
-
-int dpsw_if_set_primary_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags,
-				 u16 token, u16 if_id, u8 mac_addr[6]);
-
 /**
  * struct dpsw_fdb_cfg  - FDB Configuration
  * @num_fdb_entries: Number of FDB entries
-- 
cgit v1.2.3


From 05b363608b5bfb1d55675655bab36486c6df729b Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Tue, 16 Mar 2021 16:55:09 +0200
Subject: dpaa2-switch: fix kdoc warnings

Running kernel-doc over the dpaa2-switch driver generates a bunch of
warnings. Fix them up by removing code comments for macros which are
self-explanatory and adding a bit more context for the
dpsw_if_get_port_mac_addr() function and the fields of the
dpsw_vlan_if_cfg structure.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpsw.c |  2 +-
 drivers/net/ethernet/freescale/dpaa2/dpsw.h | 57 +++++++----------------------
 2 files changed, 15 insertions(+), 44 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.c b/drivers/net/ethernet/freescale/dpaa2/dpsw.c
index f94874381b69..ad7a4c03b130 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.c
@@ -1319,7 +1319,7 @@ int dpsw_get_api_version(struct fsl_mc_io *mc_io,
 }
 
 /**
- * dpsw_if_get_port_mac_addr()
+ * dpsw_if_get_port_mac_addr() - Retrieve MAC address associated to the physical port
  * @mc_io:	Pointer to MC portal's I/O object
  * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
  * @token:	Token of DPSW object
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.h b/drivers/net/ethernet/freescale/dpaa2/dpsw.h
index e5e9c35604a7..6807b15f5807 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.h
@@ -14,17 +14,10 @@
 
 struct fsl_mc_io;
 
-/**
- * DPSW general definitions
- */
+/* DPSW general definitions */
 
-/**
- * Maximum number of traffic class priorities
- */
 #define DPSW_MAX_PRIORITIES	8
-/**
- * Maximum number of interfaces
- */
+
 #define DPSW_MAX_IF		64
 
 int dpsw_open(struct fsl_mc_io *mc_io,
@@ -36,30 +29,20 @@ int dpsw_close(struct fsl_mc_io *mc_io,
 	       u32 cmd_flags,
 	       u16 token);
 
-/**
- * DPSW options
- */
+/* DPSW options */
 
 /**
- * Disable flooding
+ * DPSW_OPT_FLOODING_DIS - Flooding was disabled at device create
  */
 #define DPSW_OPT_FLOODING_DIS		0x0000000000000001ULL
 /**
- * Disable Multicast
+ * DPSW_OPT_MULTICAST_DIS - Multicast was disabled at device create
  */
 #define DPSW_OPT_MULTICAST_DIS		0x0000000000000004ULL
 /**
- * Support control interface
+ * DPSW_OPT_CTRL_IF_DIS - Control interface support is disabled
  */
 #define DPSW_OPT_CTRL_IF_DIS		0x0000000000000010ULL
-/**
- * Disable flooding metering
- */
-#define DPSW_OPT_FLOODING_METERING_DIS  0x0000000000000020ULL
-/**
- * Enable metering
- */
-#define DPSW_OPT_METERING_EN            0x0000000000000040ULL
 
 /**
  * enum dpsw_component_type - component type of a bridge
@@ -116,15 +99,13 @@ int dpsw_reset(struct fsl_mc_io *mc_io,
 	       u32 cmd_flags,
 	       u16 token);
 
-/**
- * DPSW IRQ Index and Events
- */
+/* DPSW IRQ Index and Events */
 
 #define DPSW_IRQ_INDEX_IF		0x0000
 #define DPSW_IRQ_INDEX_L2SW		0x0001
 
 /**
- * IRQ event - Indicates that the link state changed
+ * DPSW_IRQ_EVENT_LINK_CHANGED - Indicates that the link state changed
  */
 #define DPSW_IRQ_EVENT_LINK_CHANGED	0x0001
 
@@ -229,9 +210,6 @@ enum dpsw_queue_type {
 	DPSW_QUEUE_RX_ERR,
 };
 
-/**
- * Maximum number of DPBP
- */
 #define DPSW_MAX_DPBP     8
 
 /**
@@ -293,21 +271,9 @@ enum dpsw_action {
 	DPSW_ACTION_REDIRECT = 1
 };
 
-/**
- * Enable auto-negotiation
- */
 #define DPSW_LINK_OPT_AUTONEG		0x0000000000000001ULL
-/**
- * Enable half-duplex mode
- */
 #define DPSW_LINK_OPT_HALF_DUPLEX	0x0000000000000002ULL
-/**
- * Enable pause frames
- */
 #define DPSW_LINK_OPT_PAUSE		0x0000000000000004ULL
-/**
- * Enable a-symmetric pause frames
- */
 #define DPSW_LINK_OPT_ASYM_PAUSE	0x0000000000000008ULL
 
 /**
@@ -376,10 +342,11 @@ int dpsw_if_get_tci(struct fsl_mc_io *mc_io,
 
 /**
  * enum dpsw_stp_state - Spanning Tree Protocol (STP) states
- * @DPSW_STP_STATE_BLOCKING: Blocking state
+ * @DPSW_STP_STATE_DISABLED: Disabled state
  * @DPSW_STP_STATE_LISTENING: Listening state
  * @DPSW_STP_STATE_LEARNING: Learning state
  * @DPSW_STP_STATE_FORWARDING: Forwarding state
+ * @DPSW_STP_STATE_BLOCKING: Blocking state
  *
  */
 enum dpsw_stp_state {
@@ -524,6 +491,10 @@ int dpsw_vlan_add(struct fsl_mc_io *mc_io,
  *		list for this VLAN
  * @if_id: The set of interfaces that are
  *		assigned to the egress list for this VLAN
+ * @options: Options map for this command (DPSW_VLAN_ADD_IF_OPT_FDB_ID)
+ * @fdb_id: FDB id to be used by this VLAN on these specific interfaces
+ *		(taken into account only if the DPSW_VLAN_ADD_IF_OPT_FDB_ID is
+ *		specified in the options field)
  */
 struct dpsw_vlan_if_cfg {
 	u16 num_ifs;
-- 
cgit v1.2.3


From 2b7e3f7d1b7e347c328a88f937acd53b2849534a Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Tue, 16 Mar 2021 16:55:10 +0200
Subject: dpaa2-switch: reduce the size of the if_id bitmap to 64 bits

The maximum number of DPAA2 switch interfaces, including the control
interface, is 64. Even though this restriction existed from the first
place, the command structures which use an interface id bitmap were
poorly described and even though a single uint64_t is enough, all of
them used an array of 4 uint64_t's.
Fix this by reducing the size of the interface id field to a single
uint64_t.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h |  4 ++--
 drivers/net/ethernet/freescale/dpaa2/dpsw.c     | 18 ++++++++++--------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
index 2371fd5c40e3..996a59dcd01d 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
@@ -340,7 +340,7 @@ struct dpsw_cmd_vlan_manage_if {
 	__le16 vlan_id;
 	__le32 pad1;
 	/* cmd word 1-4 */
-	__le64 if_id[4];
+	__le64 if_id;
 };
 
 struct dpsw_cmd_vlan_remove {
@@ -386,7 +386,7 @@ struct dpsw_cmd_fdb_multicast_op {
 	u8 mac_addr[6];
 	__le16 pad2;
 	/* cmd word 2-5 */
-	__le64 if_id[4];
+	__le64 if_id;
 };
 
 struct dpsw_cmd_fdb_dump {
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.c b/drivers/net/ethernet/freescale/dpaa2/dpsw.c
index ad7a4c03b130..ef0f90ae683f 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.c
@@ -773,16 +773,18 @@ int dpsw_vlan_add_if(struct fsl_mc_io *mc_io,
 		     u16 vlan_id,
 		     const struct dpsw_vlan_if_cfg *cfg)
 {
+	struct dpsw_cmd_vlan_add_if *cmd_params;
 	struct fsl_mc_command cmd = { 0 };
-	struct dpsw_cmd_vlan_manage_if *cmd_params;
 
 	/* prepare command */
 	cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_ADD_IF,
 					  cmd_flags,
 					  token);
-	cmd_params = (struct dpsw_cmd_vlan_manage_if *)cmd.params;
+	cmd_params = (struct dpsw_cmd_vlan_add_if *)cmd.params;
 	cmd_params->vlan_id = cpu_to_le16(vlan_id);
-	build_if_id_bitmap(cmd_params->if_id, cfg->if_id, cfg->num_ifs);
+	cmd_params->options = cpu_to_le16(cfg->options);
+	cmd_params->fdb_id = cpu_to_le16(cfg->fdb_id);
+	build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs);
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -820,7 +822,7 @@ int dpsw_vlan_add_if_untagged(struct fsl_mc_io *mc_io,
 					  token);
 	cmd_params = (struct dpsw_cmd_vlan_manage_if *)cmd.params;
 	cmd_params->vlan_id = cpu_to_le16(vlan_id);
-	build_if_id_bitmap(cmd_params->if_id, cfg->if_id, cfg->num_ifs);
+	build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs);
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -854,7 +856,7 @@ int dpsw_vlan_remove_if(struct fsl_mc_io *mc_io,
 					  token);
 	cmd_params = (struct dpsw_cmd_vlan_manage_if *)cmd.params;
 	cmd_params->vlan_id = cpu_to_le16(vlan_id);
-	build_if_id_bitmap(cmd_params->if_id, cfg->if_id, cfg->num_ifs);
+	build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs);
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -890,7 +892,7 @@ int dpsw_vlan_remove_if_untagged(struct fsl_mc_io *mc_io,
 					  token);
 	cmd_params = (struct dpsw_cmd_vlan_manage_if *)cmd.params;
 	cmd_params->vlan_id = cpu_to_le16(vlan_id);
-	build_if_id_bitmap(cmd_params->if_id, cfg->if_id, cfg->num_ifs);
+	build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs);
 
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
@@ -1140,7 +1142,7 @@ int dpsw_fdb_add_multicast(struct fsl_mc_io *mc_io,
 	cmd_params->fdb_id = cpu_to_le16(fdb_id);
 	cmd_params->num_ifs = cpu_to_le16(cfg->num_ifs);
 	dpsw_set_field(cmd_params->type, ENTRY_TYPE, cfg->type);
-	build_if_id_bitmap(cmd_params->if_id, cfg->if_id, cfg->num_ifs);
+	build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs);
 	for (i = 0; i < 6; i++)
 		cmd_params->mac_addr[i] = cfg->mac_addr[5 - i];
 
@@ -1182,7 +1184,7 @@ int dpsw_fdb_remove_multicast(struct fsl_mc_io *mc_io,
 	cmd_params->fdb_id = cpu_to_le16(fdb_id);
 	cmd_params->num_ifs = cpu_to_le16(cfg->num_ifs);
 	dpsw_set_field(cmd_params->type, ENTRY_TYPE, cfg->type);
-	build_if_id_bitmap(cmd_params->if_id, cfg->if_id, cfg->num_ifs);
+	build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs);
 	for (i = 0; i < 6; i++)
 		cmd_params->mac_addr[i] = cfg->mac_addr[5 - i];
 
-- 
cgit v1.2.3


From 5ac2d254382c095393de06f5b041aad2d91ba35e Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Tue, 16 Mar 2021 16:55:11 +0200
Subject: dpaa2-switch: fit the function declaration on the same line

Multiple ABI function declarations are split unnecessarry on multiple
lines. Fix this so that we have a consistent coding style.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpsw.c | 191 +++++++-------------------
 drivers/net/ethernet/freescale/dpaa2/dpsw.h | 202 ++++++++--------------------
 2 files changed, 107 insertions(+), 286 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.c b/drivers/net/ethernet/freescale/dpaa2/dpsw.c
index ef0f90ae683f..56f3c23fce07 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.c
@@ -9,9 +9,7 @@
 #include "dpsw.h"
 #include "dpsw-cmd.h"
 
-static void build_if_id_bitmap(__le64 *bmap,
-			       const u16 *id,
-			       const u16 num_ifs)
+static void build_if_id_bitmap(__le64 *bmap, const u16 *id, const u16 num_ifs)
 {
 	int i;
 
@@ -38,10 +36,7 @@ static void build_if_id_bitmap(__le64 *bmap,
  *
  * Return:	'0' on Success; Error code otherwise.
  */
-int dpsw_open(struct fsl_mc_io *mc_io,
-	      u32 cmd_flags,
-	      int dpsw_id,
-	      u16 *token)
+int dpsw_open(struct fsl_mc_io *mc_io, u32 cmd_flags, int dpsw_id, u16 *token)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_open *cmd_params;
@@ -76,9 +71,7 @@ int dpsw_open(struct fsl_mc_io *mc_io,
  *
  * Return:	'0' on Success; Error code otherwise.
  */
-int dpsw_close(struct fsl_mc_io *mc_io,
-	       u32 cmd_flags,
-	       u16 token)
+int dpsw_close(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
 {
 	struct fsl_mc_command cmd = { 0 };
 
@@ -99,9 +92,7 @@ int dpsw_close(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_enable(struct fsl_mc_io *mc_io,
-		u32 cmd_flags,
-		u16 token)
+int dpsw_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
 {
 	struct fsl_mc_command cmd = { 0 };
 
@@ -122,9 +113,7 @@ int dpsw_enable(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_disable(struct fsl_mc_io *mc_io,
-		 u32 cmd_flags,
-		 u16 token)
+int dpsw_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
 {
 	struct fsl_mc_command cmd = { 0 };
 
@@ -145,9 +134,7 @@ int dpsw_disable(struct fsl_mc_io *mc_io,
  *
  * Return:	'0' on Success; Error code otherwise.
  */
-int dpsw_reset(struct fsl_mc_io *mc_io,
-	       u32 cmd_flags,
-	       u16 token)
+int dpsw_reset(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token)
 {
 	struct fsl_mc_command cmd = { 0 };
 
@@ -175,11 +162,8 @@ int dpsw_reset(struct fsl_mc_io *mc_io,
  *
  * Return:	'0' on Success; Error code otherwise.
  */
-int dpsw_set_irq_enable(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			u8 irq_index,
-			u8 en)
+int dpsw_set_irq_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			u8 irq_index, u8 en)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_set_irq_enable *cmd_params;
@@ -212,11 +196,8 @@ int dpsw_set_irq_enable(struct fsl_mc_io *mc_io,
  *
  * Return:	'0' on Success; Error code otherwise.
  */
-int dpsw_set_irq_mask(struct fsl_mc_io *mc_io,
-		      u32 cmd_flags,
-		      u16 token,
-		      u8 irq_index,
-		      u32 mask)
+int dpsw_set_irq_mask(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+		      u8 irq_index, u32 mask)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_set_irq_mask *cmd_params;
@@ -245,11 +226,8 @@ int dpsw_set_irq_mask(struct fsl_mc_io *mc_io,
  *
  * Return:	'0' on Success; Error code otherwise.
  */
-int dpsw_get_irq_status(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			u8 irq_index,
-			u32 *status)
+int dpsw_get_irq_status(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			u8 irq_index, u32 *status)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_get_irq_status *cmd_params;
@@ -288,11 +266,8 @@ int dpsw_get_irq_status(struct fsl_mc_io *mc_io,
  *
  * Return:	'0' on Success; Error code otherwise.
  */
-int dpsw_clear_irq_status(struct fsl_mc_io *mc_io,
-			  u32 cmd_flags,
-			  u16 token,
-			  u8 irq_index,
-			  u32 status)
+int dpsw_clear_irq_status(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			  u8 irq_index, u32 status)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_clear_irq_status *cmd_params;
@@ -318,9 +293,7 @@ int dpsw_clear_irq_status(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_get_attributes(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
+int dpsw_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
 			struct dpsw_attr *attr)
 {
 	struct fsl_mc_command cmd = { 0 };
@@ -367,10 +340,7 @@ int dpsw_get_attributes(struct fsl_mc_io *mc_io,
  *
  * Return:	'0' on Success; Error code otherwise.
  */
-int dpsw_if_set_link_cfg(struct fsl_mc_io *mc_io,
-			 u32 cmd_flags,
-			 u16 token,
-			 u16 if_id,
+int dpsw_if_set_link_cfg(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id,
 			 struct dpsw_link_cfg *cfg)
 {
 	struct fsl_mc_command cmd = { 0 };
@@ -399,11 +369,8 @@ int dpsw_if_set_link_cfg(struct fsl_mc_io *mc_io,
  *
  * Return:	'0' on Success; Error code otherwise.
  */
-int dpsw_if_get_link_state(struct fsl_mc_io *mc_io,
-			   u32 cmd_flags,
-			   u16 token,
-			   u16 if_id,
-			   struct dpsw_link_state *state)
+int dpsw_if_get_link_state(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			   u16 if_id, struct dpsw_link_state *state)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_if_get_link_state *cmd_params;
@@ -441,10 +408,7 @@ int dpsw_if_get_link_state(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_if_set_tci(struct fsl_mc_io *mc_io,
-		    u32 cmd_flags,
-		    u16 token,
-		    u16 if_id,
+int dpsw_if_set_tci(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id,
 		    const struct dpsw_tci_cfg *cfg)
 {
 	struct fsl_mc_command cmd = { 0 };
@@ -476,10 +440,7 @@ int dpsw_if_set_tci(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_if_get_tci(struct fsl_mc_io *mc_io,
-		    u32 cmd_flags,
-		    u16 token,
-		    u16 if_id,
+int dpsw_if_get_tci(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id,
 		    struct dpsw_tci_cfg *cfg)
 {
 	struct fsl_mc_command cmd = { 0 };
@@ -521,10 +482,7 @@ int dpsw_if_get_tci(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_if_set_stp(struct fsl_mc_io *mc_io,
-		    u32 cmd_flags,
-		    u16 token,
-		    u16 if_id,
+int dpsw_if_set_stp(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id,
 		    const struct dpsw_stp_cfg *cfg)
 {
 	struct fsl_mc_command cmd = { 0 };
@@ -554,12 +512,8 @@ int dpsw_if_set_stp(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_if_get_counter(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			u16 if_id,
-			enum dpsw_counter type,
-			u64 *counter)
+int dpsw_if_get_counter(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			u16 if_id, enum dpsw_counter type, u64 *counter)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_if_get_counter *cmd_params;
@@ -595,10 +549,7 @@ int dpsw_if_get_counter(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_if_enable(struct fsl_mc_io *mc_io,
-		   u32 cmd_flags,
-		   u16 token,
-		   u16 if_id)
+int dpsw_if_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_if *cmd_params;
@@ -623,10 +574,7 @@ int dpsw_if_enable(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_if_disable(struct fsl_mc_io *mc_io,
-		    u32 cmd_flags,
-		    u16 token,
-		    u16 if_id)
+int dpsw_if_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_if *cmd_params;
@@ -693,11 +641,8 @@ int dpsw_if_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_if_set_max_frame_length(struct fsl_mc_io *mc_io,
-				 u32 cmd_flags,
-				 u16 token,
-				 u16 if_id,
-				 u16 frame_length)
+int dpsw_if_set_max_frame_length(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+				 u16 if_id, u16 frame_length)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_if_set_max_frame_length *cmd_params;
@@ -731,11 +676,8 @@ int dpsw_if_set_max_frame_length(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_vlan_add(struct fsl_mc_io *mc_io,
-		  u32 cmd_flags,
-		  u16 token,
-		  u16 vlan_id,
-		  const struct dpsw_vlan_cfg *cfg)
+int dpsw_vlan_add(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+		  u16 vlan_id, const struct dpsw_vlan_cfg *cfg)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_vlan_add *cmd_params;
@@ -767,11 +709,8 @@ int dpsw_vlan_add(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_vlan_add_if(struct fsl_mc_io *mc_io,
-		     u32 cmd_flags,
-		     u16 token,
-		     u16 vlan_id,
-		     const struct dpsw_vlan_if_cfg *cfg)
+int dpsw_vlan_add_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+		     u16 vlan_id, const struct dpsw_vlan_if_cfg *cfg)
 {
 	struct dpsw_cmd_vlan_add_if *cmd_params;
 	struct fsl_mc_command cmd = { 0 };
@@ -807,11 +746,8 @@ int dpsw_vlan_add_if(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_vlan_add_if_untagged(struct fsl_mc_io *mc_io,
-			      u32 cmd_flags,
-			      u16 token,
-			      u16 vlan_id,
-			      const struct dpsw_vlan_if_cfg *cfg)
+int dpsw_vlan_add_if_untagged(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			      u16 vlan_id, const struct dpsw_vlan_if_cfg *cfg)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_vlan_manage_if *cmd_params;
@@ -841,11 +777,8 @@ int dpsw_vlan_add_if_untagged(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_vlan_remove_if(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			u16 vlan_id,
-			const struct dpsw_vlan_if_cfg *cfg)
+int dpsw_vlan_remove_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			u16 vlan_id, const struct dpsw_vlan_if_cfg *cfg)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_vlan_manage_if *cmd_params;
@@ -877,11 +810,8 @@ int dpsw_vlan_remove_if(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_vlan_remove_if_untagged(struct fsl_mc_io *mc_io,
-				 u32 cmd_flags,
-				 u16 token,
-				 u16 vlan_id,
-				 const struct dpsw_vlan_if_cfg *cfg)
+int dpsw_vlan_remove_if_untagged(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+				 u16 vlan_id, const struct dpsw_vlan_if_cfg *cfg)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_vlan_manage_if *cmd_params;
@@ -907,9 +837,7 @@ int dpsw_vlan_remove_if_untagged(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_vlan_remove(struct fsl_mc_io *mc_io,
-		     u32 cmd_flags,
-		     u16 token,
+int dpsw_vlan_remove(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
 		     u16 vlan_id)
 {
 	struct fsl_mc_command cmd = { 0 };
@@ -996,11 +924,8 @@ int dpsw_fdb_remove(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 fdb_i
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_fdb_add_unicast(struct fsl_mc_io *mc_io,
-			 u32 cmd_flags,
-			 u16 token,
-			 u16 fdb_id,
-			 const struct dpsw_fdb_unicast_cfg *cfg)
+int dpsw_fdb_add_unicast(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			 u16 fdb_id, const struct dpsw_fdb_unicast_cfg *cfg)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_fdb_unicast_op *cmd_params;
@@ -1039,13 +964,8 @@ int dpsw_fdb_add_unicast(struct fsl_mc_io *mc_io,
  * The struct fdb_dump_entry array must be parsed until the end of memory
  * area or until an entry with mac_addr set to zero is found.
  */
-int dpsw_fdb_dump(struct fsl_mc_io *mc_io,
-		  u32 cmd_flags,
-		  u16 token,
-		  u16 fdb_id,
-		  u64 iova_addr,
-		  u32 iova_size,
-		  u16 *num_entries)
+int dpsw_fdb_dump(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 fdb_id,
+		  u64 iova_addr, u32 iova_size, u16 *num_entries)
 {
 	struct dpsw_cmd_fdb_dump *cmd_params;
 	struct dpsw_rsp_fdb_dump *rsp_params;
@@ -1082,11 +1002,8 @@ int dpsw_fdb_dump(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_fdb_remove_unicast(struct fsl_mc_io *mc_io,
-			    u32 cmd_flags,
-			    u16 token,
-			    u16 fdb_id,
-			    const struct dpsw_fdb_unicast_cfg *cfg)
+int dpsw_fdb_remove_unicast(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			    u16 fdb_id, const struct dpsw_fdb_unicast_cfg *cfg)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_fdb_unicast_op *cmd_params;
@@ -1124,11 +1041,8 @@ int dpsw_fdb_remove_unicast(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_fdb_add_multicast(struct fsl_mc_io *mc_io,
-			   u32 cmd_flags,
-			   u16 token,
-			   u16 fdb_id,
-			   const struct dpsw_fdb_multicast_cfg *cfg)
+int dpsw_fdb_add_multicast(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			   u16 fdb_id, const struct dpsw_fdb_multicast_cfg *cfg)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_fdb_multicast_op *cmd_params;
@@ -1166,11 +1080,8 @@ int dpsw_fdb_add_multicast(struct fsl_mc_io *mc_io,
  *
  * Return:	Completion status. '0' on Success; Error code otherwise.
  */
-int dpsw_fdb_remove_multicast(struct fsl_mc_io *mc_io,
-			      u32 cmd_flags,
-			      u16 token,
-			      u16 fdb_id,
-			      const struct dpsw_fdb_multicast_cfg *cfg)
+int dpsw_fdb_remove_multicast(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			      u16 fdb_id, const struct dpsw_fdb_multicast_cfg *cfg)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_cmd_fdb_multicast_op *cmd_params;
@@ -1296,10 +1207,8 @@ int dpsw_ctrl_if_set_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
  *
  * Return:  '0' on Success; Error code otherwise.
  */
-int dpsw_get_api_version(struct fsl_mc_io *mc_io,
-			 u32 cmd_flags,
-			 u16 *major_ver,
-			 u16 *minor_ver)
+int dpsw_get_api_version(struct fsl_mc_io *mc_io, u32 cmd_flags,
+			 u16 *major_ver, u16 *minor_ver)
 {
 	struct fsl_mc_command cmd = { 0 };
 	struct dpsw_rsp_get_api_version *rsp_params;
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.h b/drivers/net/ethernet/freescale/dpaa2/dpsw.h
index 6807b15f5807..f108cc61bb27 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.h
@@ -20,14 +20,9 @@ struct fsl_mc_io;
 
 #define DPSW_MAX_IF		64
 
-int dpsw_open(struct fsl_mc_io *mc_io,
-	      u32 cmd_flags,
-	      int dpsw_id,
-	      u16 *token);
+int dpsw_open(struct fsl_mc_io *mc_io, u32 cmd_flags, int dpsw_id, u16 *token);
 
-int dpsw_close(struct fsl_mc_io *mc_io,
-	       u32 cmd_flags,
-	       u16 token);
+int dpsw_close(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
 
 /* DPSW options */
 
@@ -87,17 +82,11 @@ enum dpsw_broadcast_cfg {
 	DPSW_BROADCAST_PER_FDB,
 };
 
-int dpsw_enable(struct fsl_mc_io *mc_io,
-		u32 cmd_flags,
-		u16 token);
+int dpsw_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
 
-int dpsw_disable(struct fsl_mc_io *mc_io,
-		 u32 cmd_flags,
-		 u16 token);
+int dpsw_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
 
-int dpsw_reset(struct fsl_mc_io *mc_io,
-	       u32 cmd_flags,
-	       u16 token);
+int dpsw_reset(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
 
 /* DPSW IRQ Index and Events */
 
@@ -121,29 +110,17 @@ struct dpsw_irq_cfg {
 	int irq_num;
 };
 
-int dpsw_set_irq_enable(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			u8 irq_index,
-			u8 en);
-
-int dpsw_set_irq_mask(struct fsl_mc_io *mc_io,
-		      u32 cmd_flags,
-		      u16 token,
-		      u8 irq_index,
-		      u32 mask);
-
-int dpsw_get_irq_status(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			u8 irq_index,
-			u32 *status);
-
-int dpsw_clear_irq_status(struct fsl_mc_io *mc_io,
-			  u32 cmd_flags,
-			  u16 token,
-			  u8 irq_index,
-			  u32 status);
+int dpsw_set_irq_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			u8 irq_index, u8 en);
+
+int dpsw_set_irq_mask(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+		      u8 irq_index, u32 mask);
+
+int dpsw_get_irq_status(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			u8 irq_index, u32 *status);
+
+int dpsw_clear_irq_status(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			  u8 irq_index, u32 status);
 
 /**
  * struct dpsw_attr - Structure representing DPSW attributes
@@ -184,9 +161,7 @@ struct dpsw_attr {
 	enum dpsw_broadcast_cfg broadcast_cfg;
 };
 
-int dpsw_get_attributes(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
+int dpsw_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
 			struct dpsw_attr *attr);
 
 /**
@@ -286,11 +261,9 @@ struct dpsw_link_cfg {
 	u64 options;
 };
 
-int dpsw_if_set_link_cfg(struct fsl_mc_io *mc_io,
-			 u32 cmd_flags,
-			 u16 token,
-			 u16 if_id,
+int dpsw_if_set_link_cfg(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id,
 			 struct dpsw_link_cfg *cfg);
+
 /**
  * struct dpsw_link_state - Structure representing DPSW link state
  * @rate: Rate
@@ -303,11 +276,8 @@ struct dpsw_link_state {
 	u8 up;
 };
 
-int dpsw_if_get_link_state(struct fsl_mc_io *mc_io,
-			   u32 cmd_flags,
-			   u16 token,
-			   u16 if_id,
-			   struct dpsw_link_state *state);
+int dpsw_if_get_link_state(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			   u16 if_id, struct dpsw_link_state *state);
 
 /**
  * struct dpsw_tci_cfg - Tag Control Information (TCI) configuration
@@ -328,16 +298,10 @@ struct dpsw_tci_cfg {
 	u16 vlan_id;
 };
 
-int dpsw_if_set_tci(struct fsl_mc_io *mc_io,
-		    u32 cmd_flags,
-		    u16 token,
-		    u16 if_id,
+int dpsw_if_set_tci(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id,
 		    const struct dpsw_tci_cfg *cfg);
 
-int dpsw_if_get_tci(struct fsl_mc_io *mc_io,
-		    u32 cmd_flags,
-		    u16 token,
-		    u16 if_id,
+int dpsw_if_get_tci(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id,
 		    struct dpsw_tci_cfg *cfg);
 
 /**
@@ -367,10 +331,7 @@ struct dpsw_stp_cfg {
 	enum dpsw_stp_state state;
 };
 
-int dpsw_if_set_stp(struct fsl_mc_io *mc_io,
-		    u32 cmd_flags,
-		    u16 token,
-		    u16 if_id,
+int dpsw_if_set_stp(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id,
 		    const struct dpsw_stp_cfg *cfg);
 
 /**
@@ -418,22 +379,12 @@ enum dpsw_counter {
 	DPSW_CNT_ING_NO_BUFF_DISCARD = 0xc,
 };
 
-int dpsw_if_get_counter(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			u16 if_id,
-			enum dpsw_counter type,
-			u64 *counter);
+int dpsw_if_get_counter(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			u16 if_id, enum dpsw_counter type, u64 *counter);
 
-int dpsw_if_enable(struct fsl_mc_io *mc_io,
-		   u32 cmd_flags,
-		   u16 token,
-		   u16 if_id);
+int dpsw_if_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id);
 
-int dpsw_if_disable(struct fsl_mc_io *mc_io,
-		    u32 cmd_flags,
-		    u16 token,
-		    u16 if_id);
+int dpsw_if_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id);
 
 /**
  * struct dpsw_if_attr - Structure representing DPSW interface attributes
@@ -463,11 +414,8 @@ struct dpsw_if_attr {
 int dpsw_if_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
 			   u16 if_id, struct dpsw_if_attr *attr);
 
-int dpsw_if_set_max_frame_length(struct fsl_mc_io *mc_io,
-				 u32 cmd_flags,
-				 u16 token,
-				 u16 if_id,
-				 u16 frame_length);
+int dpsw_if_set_max_frame_length(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+				 u16 if_id, u16 frame_length);
 
 /**
  * struct dpsw_vlan_cfg - VLAN Configuration
@@ -477,11 +425,8 @@ struct dpsw_vlan_cfg {
 	u16 fdb_id;
 };
 
-int dpsw_vlan_add(struct fsl_mc_io *mc_io,
-		  u32 cmd_flags,
-		  u16 token,
-		  u16 vlan_id,
-		  const struct dpsw_vlan_cfg *cfg);
+int dpsw_vlan_add(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+		  u16 vlan_id, const struct dpsw_vlan_cfg *cfg);
 
 #define DPSW_VLAN_ADD_IF_OPT_FDB_ID            0x0001
 
@@ -503,33 +448,19 @@ struct dpsw_vlan_if_cfg {
 	u16 fdb_id;
 };
 
-int dpsw_vlan_add_if(struct fsl_mc_io *mc_io,
-		     u32 cmd_flags,
-		     u16 token,
-		     u16 vlan_id,
-		     const struct dpsw_vlan_if_cfg *cfg);
-
-int dpsw_vlan_add_if_untagged(struct fsl_mc_io *mc_io,
-			      u32 cmd_flags,
-			      u16 token,
-			      u16 vlan_id,
-			      const struct dpsw_vlan_if_cfg *cfg);
-
-int dpsw_vlan_remove_if(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			u16 vlan_id,
-			const struct dpsw_vlan_if_cfg *cfg);
-
-int dpsw_vlan_remove_if_untagged(struct fsl_mc_io *mc_io,
-				 u32 cmd_flags,
-				 u16 token,
-				 u16 vlan_id,
-				 const struct dpsw_vlan_if_cfg *cfg);
-
-int dpsw_vlan_remove(struct fsl_mc_io *mc_io,
-		     u32 cmd_flags,
-		     u16 token,
+int dpsw_vlan_add_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+		     u16 vlan_id, const struct dpsw_vlan_if_cfg *cfg);
+
+int dpsw_vlan_add_if_untagged(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			      u16 vlan_id, const struct dpsw_vlan_if_cfg *cfg);
+
+int dpsw_vlan_remove_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			u16 vlan_id, const struct dpsw_vlan_if_cfg *cfg);
+
+int dpsw_vlan_remove_if_untagged(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+				 u16 vlan_id, const struct dpsw_vlan_if_cfg *cfg);
+
+int dpsw_vlan_remove(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
 		     u16 vlan_id);
 
 /**
@@ -554,17 +485,11 @@ struct dpsw_fdb_unicast_cfg {
 	u16 if_egress;
 };
 
-int dpsw_fdb_add_unicast(struct fsl_mc_io *mc_io,
-			 u32 cmd_flags,
-			 u16 token,
-			 u16 fdb_id,
-			 const struct dpsw_fdb_unicast_cfg *cfg);
+int dpsw_fdb_add_unicast(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			 u16 fdb_id, const struct dpsw_fdb_unicast_cfg *cfg);
 
-int dpsw_fdb_remove_unicast(struct fsl_mc_io *mc_io,
-			    u32 cmd_flags,
-			    u16 token,
-			    u16 fdb_id,
-			    const struct dpsw_fdb_unicast_cfg *cfg);
+int dpsw_fdb_remove_unicast(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			    u16 fdb_id, const struct dpsw_fdb_unicast_cfg *cfg);
 
 #define DPSW_FDB_ENTRY_TYPE_DYNAMIC  BIT(0)
 #define DPSW_FDB_ENTRY_TYPE_UNICAST  BIT(1)
@@ -583,13 +508,8 @@ struct fdb_dump_entry {
 	u8 if_mask[8];
 };
 
-int dpsw_fdb_dump(struct fsl_mc_io *mc_io,
-		  u32 cmd_flags,
-		  u16 token,
-		  u16 fdb_id,
-		  u64 iova_addr,
-		  u32 iova_size,
-		  u16 *num_entries);
+int dpsw_fdb_dump(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 fdb_id,
+		  u64 iova_addr, u32 iova_size, u16 *num_entries);
 
 /**
  * struct dpsw_fdb_multicast_cfg - Multi-cast entry configuration
@@ -605,17 +525,11 @@ struct dpsw_fdb_multicast_cfg {
 	u16 if_id[DPSW_MAX_IF];
 };
 
-int dpsw_fdb_add_multicast(struct fsl_mc_io *mc_io,
-			   u32 cmd_flags,
-			   u16 token,
-			   u16 fdb_id,
-			   const struct dpsw_fdb_multicast_cfg *cfg);
+int dpsw_fdb_add_multicast(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			   u16 fdb_id, const struct dpsw_fdb_multicast_cfg *cfg);
 
-int dpsw_fdb_remove_multicast(struct fsl_mc_io *mc_io,
-			      u32 cmd_flags,
-			      u16 token,
-			      u16 fdb_id,
-			      const struct dpsw_fdb_multicast_cfg *cfg);
+int dpsw_fdb_remove_multicast(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			      u16 fdb_id, const struct dpsw_fdb_multicast_cfg *cfg);
 
 /**
  * enum dpsw_fdb_learning_mode - Auto-learning modes
@@ -670,10 +584,8 @@ struct dpsw_fdb_attr {
 	u16 max_fdb_mc_groups;
 };
 
-int dpsw_get_api_version(struct fsl_mc_io *mc_io,
-			 u32 cmd_flags,
-			 u16 *major_ver,
-			 u16 *minor_ver);
+int dpsw_get_api_version(struct fsl_mc_io *mc_io, u32 cmd_flags,
+			 u16 *major_ver, u16 *minor_ver);
 
 int dpsw_if_get_port_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
 			      u16 if_id, u8 mac_addr[6]);
-- 
cgit v1.2.3


From 4fe72de61ec8168fa0e4922f957992d91621a428 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Tue, 16 Mar 2021 16:55:12 +0200
Subject: dpaa2-eth: fixup kdoc warnings

Running kernel-doc over the dpaa2-eth driver generates a bunch of
warnings. Fix them up by removing code comments for macros which are
self-explanatory, respecting the kdoc format for macro documentation and
other small changes like describing the expected return values of
functions.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpkg.h  |   5 +-
 drivers/net/ethernet/freescale/dpaa2/dpmac.h |  24 +---
 drivers/net/ethernet/freescale/dpaa2/dpni.c  |   6 +
 drivers/net/ethernet/freescale/dpaa2/dpni.h  | 162 ++++++++++++---------------
 drivers/net/ethernet/freescale/dpaa2/dprtc.h |   3 -
 5 files changed, 82 insertions(+), 118 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpkg.h b/drivers/net/ethernet/freescale/dpaa2/dpkg.h
index 6de613b13e4d..6f596a5fbeeb 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpkg.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpkg.h
@@ -13,11 +13,12 @@
 /** Key Generator properties */
 
 /**
- * Number of masks per key extraction
+ * DPKG_NUM_OF_MASKS - Number of masks per key extraction
  */
 #define DPKG_NUM_OF_MASKS		4
+
 /**
- * Number of extractions per key profile
+ * DPKG_MAX_NUM_OF_EXTRACTS - Number of extractions per key profile
  */
 #define DPKG_MAX_NUM_OF_EXTRACTS	10
 
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpmac.h b/drivers/net/ethernet/freescale/dpaa2/dpmac.h
index 135f143097a5..8f7ceb731282 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpmac.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpmac.h
@@ -83,39 +83,21 @@ int dpmac_get_attributes(struct fsl_mc_io *mc_io,
 			 u16 token,
 			 struct dpmac_attr *attr);
 
-/**
- * DPMAC link configuration/state options
- */
+/* DPMAC link configuration/state options */
 
-/**
- * Enable auto-negotiation
- */
 #define DPMAC_LINK_OPT_AUTONEG			BIT_ULL(0)
-/**
- * Enable half-duplex mode
- */
 #define DPMAC_LINK_OPT_HALF_DUPLEX		BIT_ULL(1)
-/**
- * Enable pause frames
- */
 #define DPMAC_LINK_OPT_PAUSE			BIT_ULL(2)
-/**
- * Enable a-symmetric pause frames
- */
 #define DPMAC_LINK_OPT_ASYM_PAUSE		BIT_ULL(3)
 
-/**
- * Advertised link speeds
- */
+/* Advertised link speeds */
 #define DPMAC_ADVERTISED_10BASET_FULL		BIT_ULL(0)
 #define DPMAC_ADVERTISED_100BASET_FULL		BIT_ULL(1)
 #define DPMAC_ADVERTISED_1000BASET_FULL		BIT_ULL(2)
 #define DPMAC_ADVERTISED_10000BASET_FULL	BIT_ULL(4)
 #define DPMAC_ADVERTISED_2500BASEX_FULL		BIT_ULL(5)
 
-/**
- * Advertise auto-negotiation enable
- */
+/* Advertise auto-negotiation enable */
 #define DPMAC_ADVERTISED_AUTONEG		BIT_ULL(3)
 
 /**
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.c b/drivers/net/ethernet/freescale/dpaa2/dpni.c
index aa429c17c343..d6afada99fb6 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpni.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni.c
@@ -17,6 +17,8 @@
  * This function has to be called before the following functions:
  *	- dpni_set_rx_tc_dist()
  *	- dpni_set_qos_table()
+ *
+ * Return:	'0' on Success; Error code otherwise.
  */
 int dpni_prepare_key_cfg(const struct dpkg_profile_cfg *cfg, u8 *key_cfg_buf)
 {
@@ -1793,6 +1795,8 @@ int dpni_get_api_version(struct fsl_mc_io *mc_io,
  * If cfg.enable is set to 0 the command will clear flow steering table.
  * The packets will be classified according to settings made in
  * dpni_set_rx_hash_dist()
+ *
+ * Return:	'0' on Success; Error code otherwise.
  */
 int dpni_set_rx_fs_dist(struct fsl_mc_io *mc_io,
 			u32 cmd_flags,
@@ -1826,6 +1830,8 @@ int dpni_set_rx_fs_dist(struct fsl_mc_io *mc_io,
  * If cfg.enable is set to 1 the packets will be classified using a hash
  * function based on the key received in cfg.key_cfg_iova parameter.
  * If cfg.enable is set to 0 the packets will be sent to the default queue
+ *
+ * Return:	'0' on Success; Error code otherwise.
  */
 int dpni_set_rx_hash_dist(struct fsl_mc_io *mc_io,
 			  u32 cmd_flags,
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.h b/drivers/net/ethernet/freescale/dpaa2/dpni.h
index 4e96d9362dd2..7de0562bbf59 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpni.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni.h
@@ -10,73 +10,76 @@
 
 struct fsl_mc_io;
 
-/**
- * Data Path Network Interface API
+/* Data Path Network Interface API
  * Contains initialization APIs and runtime control APIs for DPNI
  */
 
 /** General DPNI macros */
 
 /**
- * Maximum number of traffic classes
+ * DPNI_MAX_TC - Maximum number of traffic classes
  */
 #define DPNI_MAX_TC				8
 /**
- * Maximum number of buffer pools per DPNI
+ * DPNI_MAX_DPBP - Maximum number of buffer pools per DPNI
  */
 #define DPNI_MAX_DPBP				8
 
 /**
- * All traffic classes considered; see dpni_set_queue()
+ * DPNI_ALL_TCS - All traffic classes considered; see dpni_set_queue()
  */
 #define DPNI_ALL_TCS				(u8)(-1)
 /**
- * All flows within traffic class considered; see dpni_set_queue()
+ * DPNI_ALL_TC_FLOWS - All flows within traffic class considered; see
+ * dpni_set_queue()
  */
 #define DPNI_ALL_TC_FLOWS			(u16)(-1)
 /**
- * Generate new flow ID; see dpni_set_queue()
+ * DPNI_NEW_FLOW_ID - Generate new flow ID; see dpni_set_queue()
  */
 #define DPNI_NEW_FLOW_ID			(u16)(-1)
 
 /**
- * Tx traffic is always released to a buffer pool on transmit, there are no
- * resources allocated to have the frames confirmed back to the source after
- * transmission.
+ * DPNI_OPT_TX_FRM_RELEASE - Tx traffic is always released to a buffer pool on
+ * transmit, there are no resources allocated to have the frames confirmed back
+ * to the source after transmission.
  */
 #define DPNI_OPT_TX_FRM_RELEASE			0x000001
 /**
- * Disables support for MAC address filtering for addresses other than primary
- * MAC address. This affects both unicast and multicast. Promiscuous mode can
- * still be enabled/disabled for both unicast and multicast. If promiscuous mode
- * is disabled, only traffic matching the primary MAC address will be accepted.
+ * DPNI_OPT_NO_MAC_FILTER - Disables support for MAC address filtering for
+ * addresses other than primary MAC address. This affects both unicast and
+ * multicast. Promiscuous mode can still be enabled/disabled for both unicast
+ * and multicast. If promiscuous mode is disabled, only traffic matching the
+ * primary MAC address will be accepted.
  */
 #define DPNI_OPT_NO_MAC_FILTER			0x000002
 /**
- * Allocate policers for this DPNI. They can be used to rate-limit traffic per
- * traffic class (TC) basis.
+ * DPNI_OPT_HAS_POLICING - Allocate policers for this DPNI. They can be used to
+ * rate-limit traffic per traffic class (TC) basis.
  */
 #define DPNI_OPT_HAS_POLICING			0x000004
 /**
- * Congestion can be managed in several ways, allowing the buffer pool to
- * deplete on ingress, taildrop on each queue or use congestion groups for sets
- * of queues. If set, it configures a single congestion groups across all TCs.
- * If reset, a congestion group is allocated for each TC. Only relevant if the
- * DPNI has multiple traffic classes.
+ * DPNI_OPT_SHARED_CONGESTION - Congestion can be managed in several ways,
+ * allowing the buffer pool to deplete on ingress, taildrop on each queue or
+ * use congestion groups for sets of queues. If set, it configures a single
+ * congestion groups across all TCs.  If reset, a congestion group is allocated
+ * for each TC. Only relevant if the DPNI has multiple traffic classes.
  */
 #define DPNI_OPT_SHARED_CONGESTION		0x000008
 /**
- * Enables TCAM for Flow Steering and QoS look-ups. If not specified, all
- * look-ups are exact match. Note that TCAM is not available on LS1088 and its
- * variants. Setting this bit on these SoCs will trigger an error.
+ * DPNI_OPT_HAS_KEY_MASKING - Enables TCAM for Flow Steering and QoS look-ups.
+ * If not specified, all look-ups are exact match. Note that TCAM is not
+ * available on LS1088 and its variants. Setting this bit on these SoCs will
+ * trigger an error.
  */
 #define DPNI_OPT_HAS_KEY_MASKING		0x000010
 /**
- * Disables the flow steering table.
+ * DPNI_OPT_NO_FS - Disables the flow steering table.
  */
 #define DPNI_OPT_NO_FS				0x000020
 /**
- * Flow steering table is shared between all traffic classes
+ * DPNI_OPT_SHARED_FS - Flow steering table is shared between all traffic
+ * classes
  */
 #define DPNI_OPT_SHARED_FS			0x001000
 
@@ -129,20 +132,14 @@ int dpni_reset(struct fsl_mc_io	*mc_io,
 	       u32		cmd_flags,
 	       u16		token);
 
-/**
- * DPNI IRQ Index and Events
- */
+/* DPNI IRQ Index and Events */
 
-/**
- * IRQ index
- */
 #define DPNI_IRQ_INDEX				0
-/**
- * IRQ events:
- *       indicates a change in link state
- *       indicates a change in endpoint
- */
+
+/* DPNI_IRQ_EVENT_LINK_CHANGED - indicates a change in link state */
 #define DPNI_IRQ_EVENT_LINK_CHANGED		0x00000001
+
+/* DPNI_IRQ_EVENT_ENDPOINT_CHANGED - indicates a change in endpoint */
 #define DPNI_IRQ_EVENT_ENDPOINT_CHANGED		0x00000002
 
 int dpni_set_irq_enable(struct fsl_mc_io	*mc_io,
@@ -222,32 +219,30 @@ int dpni_get_attributes(struct fsl_mc_io	*mc_io,
 			u16			token,
 			struct dpni_attr	*attr);
 
-/**
- * DPNI errors
- */
+/* DPNI errors */
 
 /**
- * Extract out of frame header error
+ * DPNI_ERROR_EOFHE - Extract out of frame header error
  */
 #define DPNI_ERROR_EOFHE	0x00020000
 /**
- * Frame length error
+ * DPNI_ERROR_FLE - Frame length error
  */
 #define DPNI_ERROR_FLE		0x00002000
 /**
- * Frame physical error
+ * DPNI_ERROR_FPE - Frame physical error
  */
 #define DPNI_ERROR_FPE		0x00001000
 /**
- * Parsing header error
+ * DPNI_ERROR_PHE - Parsing header error
  */
 #define DPNI_ERROR_PHE		0x00000020
 /**
- * Parser L3 checksum error
+ * DPNI_ERROR_L3CE - Parser L3 checksum error
  */
 #define DPNI_ERROR_L3CE		0x00000004
 /**
- * Parser L3 checksum error
+ * DPNI_ERROR_L4CE - Parser L3 checksum error
  */
 #define DPNI_ERROR_L4CE		0x00000001
 
@@ -281,36 +276,35 @@ int dpni_set_errors_behavior(struct fsl_mc_io		*mc_io,
 			     u16			token,
 			     struct dpni_error_cfg	*cfg);
 
-/**
- * DPNI buffer layout modification options
- */
+/* DPNI buffer layout modification options */
 
 /**
- * Select to modify the time-stamp setting
+ * DPNI_BUF_LAYOUT_OPT_TIMESTAMP - Select to modify the time-stamp setting
  */
 #define DPNI_BUF_LAYOUT_OPT_TIMESTAMP		0x00000001
 /**
- * Select to modify the parser-result setting; not applicable for Tx
+ * DPNI_BUF_LAYOUT_OPT_PARSER_RESULT - Select to modify the parser-result
+ * setting; not applicable for Tx
  */
 #define DPNI_BUF_LAYOUT_OPT_PARSER_RESULT	0x00000002
 /**
- * Select to modify the frame-status setting
+ * DPNI_BUF_LAYOUT_OPT_FRAME_STATUS - Select to modify the frame-status setting
  */
 #define DPNI_BUF_LAYOUT_OPT_FRAME_STATUS	0x00000004
 /**
- * Select to modify the private-data-size setting
+ * DPNI_BUF_LAYOUT_OPT_PRIVATE_DATA_SIZE - Select to modify the private-data-size setting
  */
 #define DPNI_BUF_LAYOUT_OPT_PRIVATE_DATA_SIZE	0x00000008
 /**
- * Select to modify the data-alignment setting
+ * DPNI_BUF_LAYOUT_OPT_DATA_ALIGN - Select to modify the data-alignment setting
  */
 #define DPNI_BUF_LAYOUT_OPT_DATA_ALIGN		0x00000010
 /**
- * Select to modify the data-head-room setting
+ * DPNI_BUF_LAYOUT_OPT_DATA_HEAD_ROOM - Select to modify the data-head-room setting
  */
 #define DPNI_BUF_LAYOUT_OPT_DATA_HEAD_ROOM	0x00000020
 /**
- * Select to modify the data-tail-room setting
+ * DPNI_BUF_LAYOUT_OPT_DATA_TAIL_ROOM - Select to modify the data-tail-room setting
  */
 #define DPNI_BUF_LAYOUT_OPT_DATA_TAIL_ROOM	0x00000040
 
@@ -343,7 +337,8 @@ struct dpni_buffer_layout {
  * @DPNI_QUEUE_TX: Tx queue
  * @DPNI_QUEUE_TX_CONFIRM: Tx confirmation queue
  * @DPNI_QUEUE_RX_ERR: Rx error queue
- */enum dpni_queue_type {
+ */
+enum dpni_queue_type {
 	DPNI_QUEUE_RX,
 	DPNI_QUEUE_TX,
 	DPNI_QUEUE_TX_CONFIRM,
@@ -424,7 +419,7 @@ int dpni_get_tx_data_offset(struct fsl_mc_io	*mc_io,
  *	lack of buffers
  * @page_2.egress_discarded_frames: Egress discarded frame count
  * @page_2.egress_confirmed_frames: Egress confirmed frame count
- * @page3: Page_3 statistics structure
+ * @page_3: Page_3 statistics structure
  * @page_3.egress_dequeue_bytes: Cumulative count of the number of bytes
  *	dequeued from egress FQs
  * @page_3.egress_dequeue_frames: Cumulative count of the number of frames
@@ -501,30 +496,14 @@ int dpni_get_statistics(struct fsl_mc_io	*mc_io,
 			u8			page,
 			union dpni_statistics	*stat);
 
-/**
- * Enable auto-negotiation
- */
 #define DPNI_LINK_OPT_AUTONEG		0x0000000000000001ULL
-/**
- * Enable half-duplex mode
- */
 #define DPNI_LINK_OPT_HALF_DUPLEX	0x0000000000000002ULL
-/**
- * Enable pause frames
- */
 #define DPNI_LINK_OPT_PAUSE		0x0000000000000004ULL
-/**
- * Enable a-symmetric pause frames
- */
 #define DPNI_LINK_OPT_ASYM_PAUSE	0x0000000000000008ULL
-
-/**
- * Enable priority flow control pause frames
- */
 #define DPNI_LINK_OPT_PFC_PAUSE		0x0000000000000010ULL
 
 /**
- * struct - Structure representing DPNI link configuration
+ * struct dpni_link_cfg - Structure representing DPNI link configuration
  * @rate: Rate
  * @options: Mask of available options; use 'DPNI_LINK_OPT_<X>' values
  */
@@ -687,8 +666,8 @@ int dpni_set_rx_tc_dist(struct fsl_mc_io			*mc_io,
 			const struct dpni_rx_tc_dist_cfg	*cfg);
 
 /**
- * When used for fs_miss_flow_id in function dpni_set_rx_dist,
- * will signal to dpni to drop all unclassified frames
+ * DPNI_FS_MISS_DROP - When used for fs_miss_flow_id in function
+ * dpni_set_rx_dist, will signal to dpni to drop all unclassified frames
  */
 #define DPNI_FS_MISS_DROP		((uint16_t)-1)
 
@@ -766,7 +745,7 @@ enum dpni_dest {
 
 /**
  * struct dpni_queue - Queue structure
- * @destination - Destination structure
+ * @destination: - Destination structure
  * @destination.id: ID of the destination, only relevant if DEST_TYPE is > 0.
  *	Identifies either a DPIO or a DPCON object.
  *	Not relevant for Tx queues.
@@ -837,9 +816,7 @@ struct dpni_queue_id {
 	u16 qdbin;
 };
 
-/**
- * Set User Context
- */
+/* Set User Context */
 #define DPNI_QUEUE_OPT_USER_CTX		0x00000001
 #define DPNI_QUEUE_OPT_DEST		0x00000002
 #define DPNI_QUEUE_OPT_FLC		0x00000004
@@ -904,9 +881,9 @@ struct dpni_dest_cfg {
 /* DPNI congestion options */
 
 /**
- * This congestion will trigger flow control or priority flow control.
- * This will have effect only if flow control is enabled with
- * dpni_set_link_cfg().
+ * DPNI_CONG_OPT_FLOW_CONTROL - This congestion will trigger flow control or
+ * priority flow control.  This will have effect only if flow control is
+ * enabled with dpni_set_link_cfg().
  */
 #define DPNI_CONG_OPT_FLOW_CONTROL		0x00000040
 
@@ -990,23 +967,24 @@ struct dpni_rule_cfg {
 };
 
 /**
- * Discard matching traffic. If set, this takes precedence over any other
- * configuration and matching traffic is always discarded.
+ * DPNI_FS_OPT_DISCARD - Discard matching traffic. If set, this takes
+ * precedence over any other configuration and matching traffic is always
+ * discarded.
  */
  #define DPNI_FS_OPT_DISCARD            0x1
 
 /**
- * Set FLC value. If set, flc member of struct dpni_fs_action_cfg is used to
- * override the FLC value set per queue.
+ * DPNI_FS_OPT_SET_FLC - Set FLC value. If set, flc member of struct
+ * dpni_fs_action_cfg is used to override the FLC value set per queue.
  * For more details check the Frame Descriptor section in the hardware
  * documentation.
  */
 #define DPNI_FS_OPT_SET_FLC            0x2
 
 /**
- * Indicates whether the 6 lowest significant bits of FLC are used for stash
- * control. If set, the 6 least significant bits in value are interpreted as
- * follows:
+ * DPNI_FS_OPT_SET_STASH_CONTROL - Indicates whether the 6 lowest significant
+ * bits of FLC are used for stash control. If set, the 6 least significant bits
+ * in value are interpreted as follows:
  *     - bits 0-1: indicates the number of 64 byte units of context that are
  *     stashed. FLC value is interpreted as a memory address in this case,
  *     excluding the 6 LS bits.
@@ -1068,7 +1046,7 @@ int dpni_get_api_version(struct fsl_mc_io *mc_io,
 			 u16 *major_ver,
 			 u16 *minor_ver);
 /**
- * struct dpni_tx_shaping - Structure representing DPNI tx shaping configuration
+ * struct dpni_tx_shaping_cfg - Structure representing DPNI tx shaping configuration
  * @rate_limit:		Rate in Mbps
  * @max_burst_size:	Burst size in bytes (up to 64KB)
  */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dprtc.h b/drivers/net/ethernet/freescale/dpaa2/dprtc.h
index 05c413719e55..01d77c685a5b 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dprtc.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dprtc.h
@@ -13,9 +13,6 @@
 
 struct fsl_mc_io;
 
-/**
- * Number of irq's
- */
 #define DPRTC_MAX_IRQ_NUM	1
 #define DPRTC_IRQ_INDEX		0
 
-- 
cgit v1.2.3


From 6561df560833952fee3ff8dd11c3b6a2041b3b1a Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 16 Mar 2021 17:02:54 +0200
Subject: mlxsw: spectrum_matchall: Propagate extack further

Due to the differences between Spectrum-1 and later ASICs, some of the
checks currently performed at the common code (where extack is
available) will need to be pushed to the per-ASIC operations.

As a preparation, propagate extack further to maintain proper error
reporting.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h     |  6 ++-
 .../net/ethernet/mellanox/mlxsw/spectrum_flow.c    |  2 +-
 .../ethernet/mellanox/mlxsw/spectrum_matchall.c    | 57 ++++++++++++++--------
 3 files changed, 43 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 0082f70daff3..650294d64237 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -1035,7 +1035,8 @@ extern const struct mlxsw_afk_ops mlxsw_sp2_afk_ops;
 /* spectrum_matchall.c */
 struct mlxsw_sp_mall_ops {
 	int (*sample_add)(struct mlxsw_sp *mlxsw_sp,
-			  struct mlxsw_sp_port *mlxsw_sp_port, u32 rate);
+			  struct mlxsw_sp_port *mlxsw_sp_port, u32 rate,
+			  struct netlink_ext_ack *extack);
 	void (*sample_del)(struct mlxsw_sp *mlxsw_sp,
 			   struct mlxsw_sp_port *mlxsw_sp_port);
 };
@@ -1078,7 +1079,8 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp *mlxsw_sp,
 void mlxsw_sp_mall_destroy(struct mlxsw_sp_flow_block *block,
 			   struct tc_cls_matchall_offload *f);
 int mlxsw_sp_mall_port_bind(struct mlxsw_sp_flow_block *block,
-			    struct mlxsw_sp_port *mlxsw_sp_port);
+			    struct mlxsw_sp_port *mlxsw_sp_port,
+			    struct netlink_ext_ack *extack);
 void mlxsw_sp_mall_port_unbind(struct mlxsw_sp_flow_block *block,
 			       struct mlxsw_sp_port *mlxsw_sp_port);
 int mlxsw_sp_mall_prio_get(struct mlxsw_sp_flow_block *block, u32 chain_index,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c
index 0456cda33808..9e50c823a354 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c
@@ -71,7 +71,7 @@ static int mlxsw_sp_flow_block_bind(struct mlxsw_sp *mlxsw_sp,
 		return -EOPNOTSUPP;
 	}
 
-	err = mlxsw_sp_mall_port_bind(block, mlxsw_sp_port);
+	err = mlxsw_sp_mall_port_bind(block, mlxsw_sp_port, extack);
 	if (err)
 		return err;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
index 841a2de37f36..d44a4c4b57f8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
@@ -24,7 +24,8 @@ mlxsw_sp_mall_entry_find(struct mlxsw_sp_flow_block *block, unsigned long cookie
 
 static int
 mlxsw_sp_mall_port_mirror_add(struct mlxsw_sp_port *mlxsw_sp_port,
-			      struct mlxsw_sp_mall_entry *mall_entry)
+			      struct mlxsw_sp_mall_entry *mall_entry,
+			      struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	struct mlxsw_sp_span_agent_parms agent_parms = {};
@@ -33,20 +34,24 @@ mlxsw_sp_mall_port_mirror_add(struct mlxsw_sp_port *mlxsw_sp_port,
 	int err;
 
 	if (!mall_entry->mirror.to_dev) {
-		netdev_err(mlxsw_sp_port->dev, "Could not find requested device\n");
+		NL_SET_ERR_MSG(extack, "Could not find requested device");
 		return -EINVAL;
 	}
 
 	agent_parms.to_dev = mall_entry->mirror.to_dev;
 	err = mlxsw_sp_span_agent_get(mlxsw_sp, &mall_entry->mirror.span_id,
 				      &agent_parms);
-	if (err)
+	if (err) {
+		NL_SET_ERR_MSG(extack, "Failed to get SPAN agent");
 		return err;
+	}
 
 	err = mlxsw_sp_span_analyzed_port_get(mlxsw_sp_port,
 					      mall_entry->ingress);
-	if (err)
+	if (err) {
+		NL_SET_ERR_MSG(extack, "Failed to get analyzed port");
 		goto err_analyzed_port_get;
+	}
 
 	trigger = mall_entry->ingress ? MLXSW_SP_SPAN_TRIGGER_INGRESS :
 					MLXSW_SP_SPAN_TRIGGER_EGRESS;
@@ -54,8 +59,10 @@ mlxsw_sp_mall_port_mirror_add(struct mlxsw_sp_port *mlxsw_sp_port,
 	parms.probability_rate = 1;
 	err = mlxsw_sp_span_agent_bind(mlxsw_sp, trigger, mlxsw_sp_port,
 				       &parms);
-	if (err)
+	if (err) {
+		NL_SET_ERR_MSG(extack, "Failed to bind SPAN agent");
 		goto err_agent_bind;
+	}
 
 	return 0;
 
@@ -94,19 +101,20 @@ static int mlxsw_sp_mall_port_sample_set(struct mlxsw_sp_port *mlxsw_sp_port,
 
 static int
 mlxsw_sp_mall_port_sample_add(struct mlxsw_sp_port *mlxsw_sp_port,
-			      struct mlxsw_sp_mall_entry *mall_entry)
+			      struct mlxsw_sp_mall_entry *mall_entry,
+			      struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	int err;
 
 	if (rtnl_dereference(mlxsw_sp_port->sample)) {
-		netdev_err(mlxsw_sp_port->dev, "sample already active\n");
+		NL_SET_ERR_MSG(extack, "Sampling already active on port");
 		return -EEXIST;
 	}
 	rcu_assign_pointer(mlxsw_sp_port->sample, &mall_entry->sample);
 
 	err = mlxsw_sp->mall_ops->sample_add(mlxsw_sp, mlxsw_sp_port,
-					     mall_entry->sample.rate);
+					     mall_entry->sample.rate, extack);
 	if (err)
 		goto err_port_sample_set;
 	return 0;
@@ -130,13 +138,16 @@ mlxsw_sp_mall_port_sample_del(struct mlxsw_sp_port *mlxsw_sp_port)
 
 static int
 mlxsw_sp_mall_port_rule_add(struct mlxsw_sp_port *mlxsw_sp_port,
-			    struct mlxsw_sp_mall_entry *mall_entry)
+			    struct mlxsw_sp_mall_entry *mall_entry,
+			    struct netlink_ext_ack *extack)
 {
 	switch (mall_entry->type) {
 	case MLXSW_SP_MALL_ACTION_TYPE_MIRROR:
-		return mlxsw_sp_mall_port_mirror_add(mlxsw_sp_port, mall_entry);
+		return mlxsw_sp_mall_port_mirror_add(mlxsw_sp_port, mall_entry,
+						     extack);
 	case MLXSW_SP_MALL_ACTION_TYPE_SAMPLE:
-		return mlxsw_sp_mall_port_sample_add(mlxsw_sp_port, mall_entry);
+		return mlxsw_sp_mall_port_sample_add(mlxsw_sp_port, mall_entry,
+						     extack);
 	default:
 		WARN_ON(1);
 		return -EINVAL;
@@ -270,7 +281,7 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp *mlxsw_sp,
 
 	list_for_each_entry(binding, &block->binding_list, list) {
 		err = mlxsw_sp_mall_port_rule_add(binding->mlxsw_sp_port,
-						  mall_entry);
+						  mall_entry, f->common.extack);
 		if (err)
 			goto rollback;
 	}
@@ -318,13 +329,15 @@ void mlxsw_sp_mall_destroy(struct mlxsw_sp_flow_block *block,
 }
 
 int mlxsw_sp_mall_port_bind(struct mlxsw_sp_flow_block *block,
-			    struct mlxsw_sp_port *mlxsw_sp_port)
+			    struct mlxsw_sp_port *mlxsw_sp_port,
+			    struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_mall_entry *mall_entry;
 	int err;
 
 	list_for_each_entry(mall_entry, &block->mall.list, list) {
-		err = mlxsw_sp_mall_port_rule_add(mlxsw_sp_port, mall_entry);
+		err = mlxsw_sp_mall_port_rule_add(mlxsw_sp_port, mall_entry,
+						  extack);
 		if (err)
 			goto rollback;
 	}
@@ -362,7 +375,7 @@ int mlxsw_sp_mall_prio_get(struct mlxsw_sp_flow_block *block, u32 chain_index,
 
 static int mlxsw_sp1_mall_sample_add(struct mlxsw_sp *mlxsw_sp,
 				     struct mlxsw_sp_port *mlxsw_sp_port,
-				     u32 rate)
+				     u32 rate, struct netlink_ext_ack *extack)
 {
 	return mlxsw_sp_mall_port_sample_set(mlxsw_sp_port, true, rate);
 }
@@ -380,7 +393,7 @@ const struct mlxsw_sp_mall_ops mlxsw_sp1_mall_ops = {
 
 static int mlxsw_sp2_mall_sample_add(struct mlxsw_sp *mlxsw_sp,
 				     struct mlxsw_sp_port *mlxsw_sp_port,
-				     u32 rate)
+				     u32 rate, struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_span_trigger_parms trigger_parms = {};
 	struct mlxsw_sp_span_agent_parms agent_parms = {
@@ -393,19 +406,25 @@ static int mlxsw_sp2_mall_sample_add(struct mlxsw_sp *mlxsw_sp,
 	sample = rtnl_dereference(mlxsw_sp_port->sample);
 
 	err = mlxsw_sp_span_agent_get(mlxsw_sp, &sample->span_id, &agent_parms);
-	if (err)
+	if (err) {
+		NL_SET_ERR_MSG(extack, "Failed to get SPAN agent");
 		return err;
+	}
 
 	err = mlxsw_sp_span_analyzed_port_get(mlxsw_sp_port, true);
-	if (err)
+	if (err) {
+		NL_SET_ERR_MSG(extack, "Failed to get analyzed port");
 		goto err_analyzed_port_get;
+	}
 
 	trigger_parms.span_id = sample->span_id;
 	trigger_parms.probability_rate = rate;
 	err = mlxsw_sp_span_agent_bind(mlxsw_sp, MLXSW_SP_SPAN_TRIGGER_INGRESS,
 				       mlxsw_sp_port, &trigger_parms);
-	if (err)
+	if (err) {
+		NL_SET_ERR_MSG(extack, "Failed to bind SPAN agent");
 		goto err_agent_bind;
+	}
 
 	return 0;
 
-- 
cgit v1.2.3


From 559313b2cbb762ee4efa4ab6cd6f800c39984c20 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 16 Mar 2021 17:02:55 +0200
Subject: mlxsw: spectrum_matchall: Push sampling checks to per-ASIC operations

Push some sampling checks to the per-ASIC operations, as they are no
longer relevant for all ASICs.

The sampling rate validation against the MPSC maximum rate is only
relevant for Spectrum-1, as Spectrum-2 and later ASICs no longer use
MPSC register for sampling.

The ingress / egress validation is pushed down to the per-ASIC
operations since subsequent patches are going to remove it for
Spectrum-2 and later ASICs.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h     |  4 +--
 .../ethernet/mellanox/mlxsw/spectrum_matchall.c    | 32 ++++++++++++++--------
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 650294d64237..848ae949e521 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -1035,8 +1035,8 @@ extern const struct mlxsw_afk_ops mlxsw_sp2_afk_ops;
 /* spectrum_matchall.c */
 struct mlxsw_sp_mall_ops {
 	int (*sample_add)(struct mlxsw_sp *mlxsw_sp,
-			  struct mlxsw_sp_port *mlxsw_sp_port, u32 rate,
-			  struct netlink_ext_ack *extack);
+			  struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
+			  u32 rate, struct netlink_ext_ack *extack);
 	void (*sample_del)(struct mlxsw_sp *mlxsw_sp,
 			   struct mlxsw_sp_port *mlxsw_sp_port);
 };
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
index d44a4c4b57f8..483b902c2dd7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
@@ -114,6 +114,7 @@ mlxsw_sp_mall_port_sample_add(struct mlxsw_sp_port *mlxsw_sp_port,
 	rcu_assign_pointer(mlxsw_sp_port->sample, &mall_entry->sample);
 
 	err = mlxsw_sp->mall_ops->sample_add(mlxsw_sp, mlxsw_sp_port,
+					     mall_entry->ingress,
 					     mall_entry->sample.rate, extack);
 	if (err)
 		goto err_port_sample_set;
@@ -253,22 +254,12 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp *mlxsw_sp,
 		mall_entry->mirror.to_dev = act->dev;
 	} else if (act->id == FLOW_ACTION_SAMPLE &&
 		   protocol == htons(ETH_P_ALL)) {
-		if (!mall_entry->ingress) {
-			NL_SET_ERR_MSG(f->common.extack, "Sample is not supported on egress");
-			err = -EOPNOTSUPP;
-			goto errout;
-		}
 		if (flower_prio_valid &&
 		    mall_entry->priority >= flower_min_prio) {
 			NL_SET_ERR_MSG(f->common.extack, "Failed to add behind existing flower rules");
 			err = -EOPNOTSUPP;
 			goto errout;
 		}
-		if (act->sample.rate > MLXSW_REG_MPSC_RATE_MAX) {
-			NL_SET_ERR_MSG(f->common.extack, "Sample rate not supported");
-			err = -EOPNOTSUPP;
-			goto errout;
-		}
 		mall_entry->type = MLXSW_SP_MALL_ACTION_TYPE_SAMPLE;
 		mall_entry->sample.psample_group = act->sample.psample_group;
 		mall_entry->sample.truncate = act->sample.truncate;
@@ -375,8 +366,19 @@ int mlxsw_sp_mall_prio_get(struct mlxsw_sp_flow_block *block, u32 chain_index,
 
 static int mlxsw_sp1_mall_sample_add(struct mlxsw_sp *mlxsw_sp,
 				     struct mlxsw_sp_port *mlxsw_sp_port,
-				     u32 rate, struct netlink_ext_ack *extack)
+				     bool ingress, u32 rate,
+				     struct netlink_ext_ack *extack)
 {
+	if (!ingress) {
+		NL_SET_ERR_MSG(extack, "Sampling is not supported on egress");
+		return -EOPNOTSUPP;
+	}
+
+	if (rate > MLXSW_REG_MPSC_RATE_MAX) {
+		NL_SET_ERR_MSG(extack, "Unsupported sampling rate");
+		return -EOPNOTSUPP;
+	}
+
 	return mlxsw_sp_mall_port_sample_set(mlxsw_sp_port, true, rate);
 }
 
@@ -393,7 +395,8 @@ const struct mlxsw_sp_mall_ops mlxsw_sp1_mall_ops = {
 
 static int mlxsw_sp2_mall_sample_add(struct mlxsw_sp *mlxsw_sp,
 				     struct mlxsw_sp_port *mlxsw_sp_port,
-				     u32 rate, struct netlink_ext_ack *extack)
+				     bool ingress, u32 rate,
+				     struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_span_trigger_parms trigger_parms = {};
 	struct mlxsw_sp_span_agent_parms agent_parms = {
@@ -403,6 +406,11 @@ static int mlxsw_sp2_mall_sample_add(struct mlxsw_sp *mlxsw_sp,
 	struct mlxsw_sp_port_sample *sample;
 	int err;
 
+	if (!ingress) {
+		NL_SET_ERR_MSG(extack, "Sampling is not supported on egress");
+		return -EOPNOTSUPP;
+	}
+
 	sample = rtnl_dereference(mlxsw_sp_port->sample);
 
 	err = mlxsw_sp_span_agent_get(mlxsw_sp, &sample->span_id, &agent_parms);
-- 
cgit v1.2.3


From e09a59555a3028564f3cb0d10c24ab86f50cbb79 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 16 Mar 2021 17:02:56 +0200
Subject: mlxsw: spectrum_matchall: Pass matchall entry to sampling operations

The entry will be required by the next patches, so pass it. No
functional changes intended.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h     |  9 +++++---
 .../ethernet/mellanox/mlxsw/spectrum_matchall.c    | 27 +++++++++++++---------
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 848ae949e521..1d8afa6365d8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -133,6 +133,7 @@ struct mlxsw_sp_ptp_state;
 struct mlxsw_sp_ptp_ops;
 struct mlxsw_sp_span_ops;
 struct mlxsw_sp_qdisc_state;
+struct mlxsw_sp_mall_entry;
 
 struct mlxsw_sp_port_mapping {
 	u8 module;
@@ -1035,10 +1036,12 @@ extern const struct mlxsw_afk_ops mlxsw_sp2_afk_ops;
 /* spectrum_matchall.c */
 struct mlxsw_sp_mall_ops {
 	int (*sample_add)(struct mlxsw_sp *mlxsw_sp,
-			  struct mlxsw_sp_port *mlxsw_sp_port, bool ingress,
-			  u32 rate, struct netlink_ext_ack *extack);
+			  struct mlxsw_sp_port *mlxsw_sp_port,
+			  struct mlxsw_sp_mall_entry *mall_entry,
+			  struct netlink_ext_ack *extack);
 	void (*sample_del)(struct mlxsw_sp *mlxsw_sp,
-			   struct mlxsw_sp_port *mlxsw_sp_port);
+			   struct mlxsw_sp_port *mlxsw_sp_port,
+			   struct mlxsw_sp_mall_entry *mall_entry);
 };
 
 extern const struct mlxsw_sp_mall_ops mlxsw_sp1_mall_ops;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
index 483b902c2dd7..b189e84987db 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
@@ -114,8 +114,7 @@ mlxsw_sp_mall_port_sample_add(struct mlxsw_sp_port *mlxsw_sp_port,
 	rcu_assign_pointer(mlxsw_sp_port->sample, &mall_entry->sample);
 
 	err = mlxsw_sp->mall_ops->sample_add(mlxsw_sp, mlxsw_sp_port,
-					     mall_entry->ingress,
-					     mall_entry->sample.rate, extack);
+					     mall_entry, extack);
 	if (err)
 		goto err_port_sample_set;
 	return 0;
@@ -126,14 +125,15 @@ err_port_sample_set:
 }
 
 static void
-mlxsw_sp_mall_port_sample_del(struct mlxsw_sp_port *mlxsw_sp_port)
+mlxsw_sp_mall_port_sample_del(struct mlxsw_sp_port *mlxsw_sp_port,
+			      struct mlxsw_sp_mall_entry *mall_entry)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 
 	if (!mlxsw_sp_port->sample)
 		return;
 
-	mlxsw_sp->mall_ops->sample_del(mlxsw_sp, mlxsw_sp_port);
+	mlxsw_sp->mall_ops->sample_del(mlxsw_sp, mlxsw_sp_port, mall_entry);
 	RCU_INIT_POINTER(mlxsw_sp_port->sample, NULL);
 }
 
@@ -164,7 +164,7 @@ mlxsw_sp_mall_port_rule_del(struct mlxsw_sp_port *mlxsw_sp_port,
 		mlxsw_sp_mall_port_mirror_del(mlxsw_sp_port, mall_entry);
 		break;
 	case MLXSW_SP_MALL_ACTION_TYPE_SAMPLE:
-		mlxsw_sp_mall_port_sample_del(mlxsw_sp_port);
+		mlxsw_sp_mall_port_sample_del(mlxsw_sp_port, mall_entry);
 		break;
 	default:
 		WARN_ON(1);
@@ -366,10 +366,12 @@ int mlxsw_sp_mall_prio_get(struct mlxsw_sp_flow_block *block, u32 chain_index,
 
 static int mlxsw_sp1_mall_sample_add(struct mlxsw_sp *mlxsw_sp,
 				     struct mlxsw_sp_port *mlxsw_sp_port,
-				     bool ingress, u32 rate,
+				     struct mlxsw_sp_mall_entry *mall_entry,
 				     struct netlink_ext_ack *extack)
 {
-	if (!ingress) {
+	u32 rate = mall_entry->sample.rate;
+
+	if (!mall_entry->ingress) {
 		NL_SET_ERR_MSG(extack, "Sampling is not supported on egress");
 		return -EOPNOTSUPP;
 	}
@@ -383,7 +385,8 @@ static int mlxsw_sp1_mall_sample_add(struct mlxsw_sp *mlxsw_sp,
 }
 
 static void mlxsw_sp1_mall_sample_del(struct mlxsw_sp *mlxsw_sp,
-				      struct mlxsw_sp_port *mlxsw_sp_port)
+				      struct mlxsw_sp_port *mlxsw_sp_port,
+				      struct mlxsw_sp_mall_entry *mall_entry)
 {
 	mlxsw_sp_mall_port_sample_set(mlxsw_sp_port, false, 1);
 }
@@ -395,7 +398,7 @@ const struct mlxsw_sp_mall_ops mlxsw_sp1_mall_ops = {
 
 static int mlxsw_sp2_mall_sample_add(struct mlxsw_sp *mlxsw_sp,
 				     struct mlxsw_sp_port *mlxsw_sp_port,
-				     bool ingress, u32 rate,
+				     struct mlxsw_sp_mall_entry *mall_entry,
 				     struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_span_trigger_parms trigger_parms = {};
@@ -404,9 +407,10 @@ static int mlxsw_sp2_mall_sample_add(struct mlxsw_sp *mlxsw_sp,
 		.session_id = MLXSW_SP_SPAN_SESSION_ID_SAMPLING,
 	};
 	struct mlxsw_sp_port_sample *sample;
+	u32 rate = mall_entry->sample.rate;
 	int err;
 
-	if (!ingress) {
+	if (!mall_entry->ingress) {
 		NL_SET_ERR_MSG(extack, "Sampling is not supported on egress");
 		return -EOPNOTSUPP;
 	}
@@ -444,7 +448,8 @@ err_analyzed_port_get:
 }
 
 static void mlxsw_sp2_mall_sample_del(struct mlxsw_sp *mlxsw_sp,
-				      struct mlxsw_sp_port *mlxsw_sp_port)
+				      struct mlxsw_sp_port *mlxsw_sp_port,
+				      struct mlxsw_sp_mall_entry *mall_entry)
 {
 	struct mlxsw_sp_span_trigger_parms trigger_parms = {};
 	struct mlxsw_sp_port_sample *sample;
-- 
cgit v1.2.3


From 1b9fc42e46dfea0efa39165d906b0f6a05d6b558 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 16 Mar 2021 17:02:57 +0200
Subject: mlxsw: spectrum: Track sampling triggers in a hash table

Currently, mlxsw supports a single sampling trigger type (i.e., received
packet). When sampling is configured on an ingress port, the sampling
parameters (e.g., pointer to the psample group) are stored as an
attribute of the port, so that they could be passed to
psample_sample_packet() when a sampled packet is trapped to the CPU.

Subsequent patches are going to add more types of sampling triggers,
making it difficult to maintain the current scheme.

Instead, store all the active sampling triggers with their associated
parameters in a hash table. That way, more trigger types can be easily
added.

The next patch will flip mlxsw to use the hash table instead of the
current scheme.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 148 +++++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h |  30 +++++
 2 files changed, 178 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 6054147fd51c..7e40d45b55fe 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -23,6 +23,8 @@
 #include <linux/netlink.h>
 #include <linux/jhash.h>
 #include <linux/log2.h>
+#include <linux/refcount.h>
+#include <linux/rhashtable.h>
 #include <net/switchdev.h>
 #include <net/pkt_cls.h>
 #include <net/netevent.h>
@@ -2550,6 +2552,142 @@ static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = {
 	.get_stats	= mlxsw_sp2_get_stats,
 };
 
+struct mlxsw_sp_sample_trigger_node {
+	struct mlxsw_sp_sample_trigger trigger;
+	struct mlxsw_sp_sample_params params;
+	struct rhash_head ht_node;
+	struct rcu_head rcu;
+	refcount_t refcount;
+};
+
+static const struct rhashtable_params mlxsw_sp_sample_trigger_ht_params = {
+	.key_offset = offsetof(struct mlxsw_sp_sample_trigger_node, trigger),
+	.head_offset = offsetof(struct mlxsw_sp_sample_trigger_node, ht_node),
+	.key_len = sizeof(struct mlxsw_sp_sample_trigger),
+	.automatic_shrinking = true,
+};
+
+static void
+mlxsw_sp_sample_trigger_key_init(struct mlxsw_sp_sample_trigger *key,
+				 const struct mlxsw_sp_sample_trigger *trigger)
+{
+	memset(key, 0, sizeof(*key));
+	key->type = trigger->type;
+	key->local_port = trigger->local_port;
+}
+
+/* RCU read lock must be held */
+struct mlxsw_sp_sample_params *
+mlxsw_sp_sample_trigger_params_lookup(struct mlxsw_sp *mlxsw_sp,
+				      const struct mlxsw_sp_sample_trigger *trigger)
+{
+	struct mlxsw_sp_sample_trigger_node *trigger_node;
+	struct mlxsw_sp_sample_trigger key;
+
+	mlxsw_sp_sample_trigger_key_init(&key, trigger);
+	trigger_node = rhashtable_lookup(&mlxsw_sp->sample_trigger_ht, &key,
+					 mlxsw_sp_sample_trigger_ht_params);
+	if (!trigger_node)
+		return NULL;
+
+	return &trigger_node->params;
+}
+
+static int
+mlxsw_sp_sample_trigger_node_init(struct mlxsw_sp *mlxsw_sp,
+				  const struct mlxsw_sp_sample_trigger *trigger,
+				  const struct mlxsw_sp_sample_params *params)
+{
+	struct mlxsw_sp_sample_trigger_node *trigger_node;
+	int err;
+
+	trigger_node = kzalloc(sizeof(*trigger_node), GFP_KERNEL);
+	if (!trigger_node)
+		return -ENOMEM;
+
+	trigger_node->trigger = *trigger;
+	trigger_node->params = *params;
+	refcount_set(&trigger_node->refcount, 1);
+
+	err = rhashtable_insert_fast(&mlxsw_sp->sample_trigger_ht,
+				     &trigger_node->ht_node,
+				     mlxsw_sp_sample_trigger_ht_params);
+	if (err)
+		goto err_rhashtable_insert;
+
+	return 0;
+
+err_rhashtable_insert:
+	kfree(trigger_node);
+	return err;
+}
+
+static void
+mlxsw_sp_sample_trigger_node_fini(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_sample_trigger_node *trigger_node)
+{
+	rhashtable_remove_fast(&mlxsw_sp->sample_trigger_ht,
+			       &trigger_node->ht_node,
+			       mlxsw_sp_sample_trigger_ht_params);
+	kfree_rcu(trigger_node, rcu);
+}
+
+int
+mlxsw_sp_sample_trigger_params_set(struct mlxsw_sp *mlxsw_sp,
+				   const struct mlxsw_sp_sample_trigger *trigger,
+				   const struct mlxsw_sp_sample_params *params,
+				   struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp_sample_trigger_node *trigger_node;
+	struct mlxsw_sp_sample_trigger key;
+
+	ASSERT_RTNL();
+
+	mlxsw_sp_sample_trigger_key_init(&key, trigger);
+
+	trigger_node = rhashtable_lookup_fast(&mlxsw_sp->sample_trigger_ht,
+					      &key,
+					      mlxsw_sp_sample_trigger_ht_params);
+	if (!trigger_node)
+		return mlxsw_sp_sample_trigger_node_init(mlxsw_sp, &key,
+							 params);
+
+	if (trigger_node->params.psample_group != params->psample_group ||
+	    trigger_node->params.truncate != params->truncate ||
+	    trigger_node->params.rate != params->rate ||
+	    trigger_node->params.trunc_size != params->trunc_size) {
+		NL_SET_ERR_MSG_MOD(extack, "Sampling parameters do not match for an existing sampling trigger");
+		return -EINVAL;
+	}
+
+	refcount_inc(&trigger_node->refcount);
+
+	return 0;
+}
+
+void
+mlxsw_sp_sample_trigger_params_unset(struct mlxsw_sp *mlxsw_sp,
+				     const struct mlxsw_sp_sample_trigger *trigger)
+{
+	struct mlxsw_sp_sample_trigger_node *trigger_node;
+	struct mlxsw_sp_sample_trigger key;
+
+	ASSERT_RTNL();
+
+	mlxsw_sp_sample_trigger_key_init(&key, trigger);
+
+	trigger_node = rhashtable_lookup_fast(&mlxsw_sp->sample_trigger_ht,
+					      &key,
+					      mlxsw_sp_sample_trigger_ht_params);
+	if (!trigger_node)
+		return;
+
+	if (!refcount_dec_and_test(&trigger_node->refcount))
+		return;
+
+	mlxsw_sp_sample_trigger_node_fini(mlxsw_sp, trigger_node);
+}
+
 static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
 				    unsigned long event, void *ptr);
 
@@ -2704,6 +2842,13 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 		goto err_port_module_info_init;
 	}
 
+	err = rhashtable_init(&mlxsw_sp->sample_trigger_ht,
+			      &mlxsw_sp_sample_trigger_ht_params);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Failed to init sampling trigger hashtable\n");
+		goto err_sample_trigger_init;
+	}
+
 	err = mlxsw_sp_ports_create(mlxsw_sp);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n");
@@ -2713,6 +2858,8 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 	return 0;
 
 err_ports_create:
+	rhashtable_destroy(&mlxsw_sp->sample_trigger_ht);
+err_sample_trigger_init:
 	mlxsw_sp_port_module_info_fini(mlxsw_sp);
 err_port_module_info_init:
 	mlxsw_sp_dpipe_fini(mlxsw_sp);
@@ -2847,6 +2994,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 
 	mlxsw_sp_ports_remove(mlxsw_sp);
+	rhashtable_destroy(&mlxsw_sp->sample_trigger_ht);
 	mlxsw_sp_port_module_info_fini(mlxsw_sp);
 	mlxsw_sp_dpipe_fini(mlxsw_sp);
 	unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 1d8afa6365d8..877d17f42e4a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -16,6 +16,7 @@
 #include <linux/in6.h>
 #include <linux/notifier.h>
 #include <linux/net_namespace.h>
+#include <linux/spinlock.h>
 #include <net/psample.h>
 #include <net/pkt_cls.h>
 #include <net/red.h>
@@ -149,6 +150,7 @@ struct mlxsw_sp {
 	const unsigned char *mac_mask;
 	struct mlxsw_sp_upper *lags;
 	struct mlxsw_sp_port_mapping **port_mapping;
+	struct rhashtable sample_trigger_ht;
 	struct mlxsw_sp_sb *sb;
 	struct mlxsw_sp_bridge *bridge;
 	struct mlxsw_sp_router *router;
@@ -234,6 +236,23 @@ struct mlxsw_sp_port_pcpu_stats {
 	u32			tx_dropped;
 };
 
+enum mlxsw_sp_sample_trigger_type {
+	MLXSW_SP_SAMPLE_TRIGGER_TYPE_INGRESS,
+	MLXSW_SP_SAMPLE_TRIGGER_TYPE_EGRESS,
+};
+
+struct mlxsw_sp_sample_trigger {
+	enum mlxsw_sp_sample_trigger_type type;
+	u8 local_port;
+};
+
+struct mlxsw_sp_sample_params {
+	struct psample_group *psample_group;
+	u32 trunc_size;
+	u32 rate;
+	bool truncate;
+};
+
 struct mlxsw_sp_port_sample {
 	struct psample_group *psample_group;
 	u32 trunc_size;
@@ -534,6 +553,17 @@ void mlxsw_sp_hdroom_bufs_reset_sizes(struct mlxsw_sp_port *mlxsw_sp_port,
 				      struct mlxsw_sp_hdroom *hdroom);
 int mlxsw_sp_hdroom_configure(struct mlxsw_sp_port *mlxsw_sp_port,
 			      const struct mlxsw_sp_hdroom *hdroom);
+struct mlxsw_sp_sample_params *
+mlxsw_sp_sample_trigger_params_lookup(struct mlxsw_sp *mlxsw_sp,
+				      const struct mlxsw_sp_sample_trigger *trigger);
+int
+mlxsw_sp_sample_trigger_params_set(struct mlxsw_sp *mlxsw_sp,
+				   const struct mlxsw_sp_sample_trigger *trigger,
+				   const struct mlxsw_sp_sample_params *params,
+				   struct netlink_ext_ack *extack);
+void
+mlxsw_sp_sample_trigger_params_unset(struct mlxsw_sp *mlxsw_sp,
+				     const struct mlxsw_sp_sample_trigger *trigger);
 
 extern const struct mlxsw_sp_sb_vals mlxsw_sp1_sb_vals;
 extern const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals;
-- 
cgit v1.2.3


From 90f53c53ec4acaa86055f4d2e98767eeb735b42d Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 16 Mar 2021 17:02:58 +0200
Subject: mlxsw: spectrum: Start using sampling triggers hash table

Start using the previously introduced sampling triggers hash table to
store sampling parameters instead of storing them as attributes of the
sampled port.

This makes it easier to introduce new sampling triggers.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h     | 16 +++---
 .../ethernet/mellanox/mlxsw/spectrum_matchall.c    | 57 ++++++++++++----------
 .../net/ethernet/mellanox/mlxsw/spectrum_trap.c    | 15 +++---
 3 files changed, 46 insertions(+), 42 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 877d17f42e4a..4b4d643abceb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -253,14 +253,6 @@ struct mlxsw_sp_sample_params {
 	bool truncate;
 };
 
-struct mlxsw_sp_port_sample {
-	struct psample_group *psample_group;
-	u32 trunc_size;
-	u32 rate;
-	bool truncate;
-	int span_id;	/* Relevant for Spectrum-2 onwards. */
-};
-
 struct mlxsw_sp_bridge_port;
 struct mlxsw_sp_fid;
 
@@ -324,7 +316,6 @@ struct mlxsw_sp_port {
 		struct mlxsw_sp_port_xstats xstats;
 		struct delayed_work update_dw;
 	} periodic_hw_stats;
-	struct mlxsw_sp_port_sample __rcu *sample;
 	struct list_head vlans_list;
 	struct mlxsw_sp_port_vlan *default_vlan;
 	struct mlxsw_sp_qdisc_state *qdisc;
@@ -1092,6 +1083,11 @@ struct mlxsw_sp_mall_trap_entry {
 	int span_id;
 };
 
+struct mlxsw_sp_mall_sample_entry {
+	struct mlxsw_sp_sample_params params;
+	int span_id;	/* Relevant for Spectrum-2 onwards. */
+};
+
 struct mlxsw_sp_mall_entry {
 	struct list_head list;
 	unsigned long cookie;
@@ -1101,7 +1097,7 @@ struct mlxsw_sp_mall_entry {
 	union {
 		struct mlxsw_sp_mall_mirror_entry mirror;
 		struct mlxsw_sp_mall_trap_entry trap;
-		struct mlxsw_sp_port_sample sample;
+		struct mlxsw_sp_mall_sample_entry sample;
 	};
 	struct rcu_head rcu;
 };
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
index b189e84987db..459c452b60ba 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
@@ -105,13 +105,19 @@ mlxsw_sp_mall_port_sample_add(struct mlxsw_sp_port *mlxsw_sp_port,
 			      struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_sample_trigger trigger;
 	int err;
 
-	if (rtnl_dereference(mlxsw_sp_port->sample)) {
-		NL_SET_ERR_MSG(extack, "Sampling already active on port");
-		return -EEXIST;
-	}
-	rcu_assign_pointer(mlxsw_sp_port->sample, &mall_entry->sample);
+	if (mall_entry->ingress)
+		trigger.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_INGRESS;
+	else
+		trigger.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_EGRESS;
+	trigger.local_port = mlxsw_sp_port->local_port;
+	err = mlxsw_sp_sample_trigger_params_set(mlxsw_sp, &trigger,
+						 &mall_entry->sample.params,
+						 extack);
+	if (err)
+		return err;
 
 	err = mlxsw_sp->mall_ops->sample_add(mlxsw_sp, mlxsw_sp_port,
 					     mall_entry, extack);
@@ -120,7 +126,7 @@ mlxsw_sp_mall_port_sample_add(struct mlxsw_sp_port *mlxsw_sp_port,
 	return 0;
 
 err_port_sample_set:
-	RCU_INIT_POINTER(mlxsw_sp_port->sample, NULL);
+	mlxsw_sp_sample_trigger_params_unset(mlxsw_sp, &trigger);
 	return err;
 }
 
@@ -129,12 +135,16 @@ mlxsw_sp_mall_port_sample_del(struct mlxsw_sp_port *mlxsw_sp_port,
 			      struct mlxsw_sp_mall_entry *mall_entry)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_sample_trigger trigger;
 
-	if (!mlxsw_sp_port->sample)
-		return;
+	if (mall_entry->ingress)
+		trigger.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_INGRESS;
+	else
+		trigger.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_EGRESS;
+	trigger.local_port = mlxsw_sp_port->local_port;
 
 	mlxsw_sp->mall_ops->sample_del(mlxsw_sp, mlxsw_sp_port, mall_entry);
-	RCU_INIT_POINTER(mlxsw_sp_port->sample, NULL);
+	mlxsw_sp_sample_trigger_params_unset(mlxsw_sp, &trigger);
 }
 
 static int
@@ -261,10 +271,10 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp *mlxsw_sp,
 			goto errout;
 		}
 		mall_entry->type = MLXSW_SP_MALL_ACTION_TYPE_SAMPLE;
-		mall_entry->sample.psample_group = act->sample.psample_group;
-		mall_entry->sample.truncate = act->sample.truncate;
-		mall_entry->sample.trunc_size = act->sample.trunc_size;
-		mall_entry->sample.rate = act->sample.rate;
+		mall_entry->sample.params.psample_group = act->sample.psample_group;
+		mall_entry->sample.params.truncate = act->sample.truncate;
+		mall_entry->sample.params.trunc_size = act->sample.trunc_size;
+		mall_entry->sample.params.rate = act->sample.rate;
 	} else {
 		err = -EOPNOTSUPP;
 		goto errout;
@@ -369,7 +379,7 @@ static int mlxsw_sp1_mall_sample_add(struct mlxsw_sp *mlxsw_sp,
 				     struct mlxsw_sp_mall_entry *mall_entry,
 				     struct netlink_ext_ack *extack)
 {
-	u32 rate = mall_entry->sample.rate;
+	u32 rate = mall_entry->sample.params.rate;
 
 	if (!mall_entry->ingress) {
 		NL_SET_ERR_MSG(extack, "Sampling is not supported on egress");
@@ -406,8 +416,7 @@ static int mlxsw_sp2_mall_sample_add(struct mlxsw_sp *mlxsw_sp,
 		.to_dev = NULL,	/* Mirror to CPU. */
 		.session_id = MLXSW_SP_SPAN_SESSION_ID_SAMPLING,
 	};
-	struct mlxsw_sp_port_sample *sample;
-	u32 rate = mall_entry->sample.rate;
+	u32 rate = mall_entry->sample.params.rate;
 	int err;
 
 	if (!mall_entry->ingress) {
@@ -415,9 +424,8 @@ static int mlxsw_sp2_mall_sample_add(struct mlxsw_sp *mlxsw_sp,
 		return -EOPNOTSUPP;
 	}
 
-	sample = rtnl_dereference(mlxsw_sp_port->sample);
-
-	err = mlxsw_sp_span_agent_get(mlxsw_sp, &sample->span_id, &agent_parms);
+	err = mlxsw_sp_span_agent_get(mlxsw_sp, &mall_entry->sample.span_id,
+				      &agent_parms);
 	if (err) {
 		NL_SET_ERR_MSG(extack, "Failed to get SPAN agent");
 		return err;
@@ -429,7 +437,7 @@ static int mlxsw_sp2_mall_sample_add(struct mlxsw_sp *mlxsw_sp,
 		goto err_analyzed_port_get;
 	}
 
-	trigger_parms.span_id = sample->span_id;
+	trigger_parms.span_id = mall_entry->sample.span_id;
 	trigger_parms.probability_rate = rate;
 	err = mlxsw_sp_span_agent_bind(mlxsw_sp, MLXSW_SP_SPAN_TRIGGER_INGRESS,
 				       mlxsw_sp_port, &trigger_parms);
@@ -443,7 +451,7 @@ static int mlxsw_sp2_mall_sample_add(struct mlxsw_sp *mlxsw_sp,
 err_agent_bind:
 	mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, true);
 err_analyzed_port_get:
-	mlxsw_sp_span_agent_put(mlxsw_sp, sample->span_id);
+	mlxsw_sp_span_agent_put(mlxsw_sp, mall_entry->sample.span_id);
 	return err;
 }
 
@@ -452,15 +460,12 @@ static void mlxsw_sp2_mall_sample_del(struct mlxsw_sp *mlxsw_sp,
 				      struct mlxsw_sp_mall_entry *mall_entry)
 {
 	struct mlxsw_sp_span_trigger_parms trigger_parms = {};
-	struct mlxsw_sp_port_sample *sample;
-
-	sample = rtnl_dereference(mlxsw_sp_port->sample);
 
-	trigger_parms.span_id = sample->span_id;
+	trigger_parms.span_id = mall_entry->sample.span_id;
 	mlxsw_sp_span_agent_unbind(mlxsw_sp, MLXSW_SP_SPAN_TRIGGER_INGRESS,
 				   mlxsw_sp_port, &trigger_parms);
 	mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, true);
-	mlxsw_sp_span_agent_put(mlxsw_sp, sample->span_id);
+	mlxsw_sp_span_agent_put(mlxsw_sp, mall_entry->sample.span_id);
 }
 
 const struct mlxsw_sp_mall_ops mlxsw_sp2_mall_ops = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index 056201029ce5..db3c561ac3ea 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -257,8 +257,9 @@ static void mlxsw_sp_rx_sample_listener(struct sk_buff *skb, u8 local_port,
 					void *trap_ctx)
 {
 	struct mlxsw_sp *mlxsw_sp = devlink_trap_ctx_priv(trap_ctx);
+	struct mlxsw_sp_sample_trigger trigger;
+	struct mlxsw_sp_sample_params *params;
 	struct mlxsw_sp_port *mlxsw_sp_port;
-	struct mlxsw_sp_port_sample *sample;
 	struct psample_metadata md = {};
 	int err;
 
@@ -270,8 +271,10 @@ static void mlxsw_sp_rx_sample_listener(struct sk_buff *skb, u8 local_port,
 	if (!mlxsw_sp_port)
 		goto out;
 
-	sample = rcu_dereference(mlxsw_sp_port->sample);
-	if (!sample)
+	trigger.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_INGRESS;
+	trigger.local_port = local_port;
+	params = mlxsw_sp_sample_trigger_params_lookup(mlxsw_sp, &trigger);
+	if (!params)
 		goto out;
 
 	/* The psample module expects skb->data to point to the start of the
@@ -279,9 +282,9 @@ static void mlxsw_sp_rx_sample_listener(struct sk_buff *skb, u8 local_port,
 	 */
 	skb_push(skb, ETH_HLEN);
 	mlxsw_sp_psample_md_init(mlxsw_sp, &md, skb,
-				 mlxsw_sp_port->dev->ifindex, sample->truncate,
-				 sample->trunc_size);
-	psample_sample_packet(sample->psample_group, skb, sample->rate, &md);
+				 mlxsw_sp_port->dev->ifindex, params->truncate,
+				 params->trunc_size);
+	psample_sample_packet(params->psample_group, skb, params->rate, &md);
 out:
 	consume_skb(skb);
 }
-- 
cgit v1.2.3


From 54d0e963f683362418424f9ce61884a6e1cced38 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 16 Mar 2021 17:02:59 +0200
Subject: mlxsw: spectrum_matchall: Add support for egress sampling

Allow user space to install a matchall classifier with sample action on
egress. This is only supported on Spectrum-2 onwards, so Spectrum-1 will
continue to return an error.

Programming the hardware to sample on egress is identical to ingress
sampling with the sole change of using a different sampling trigger.

Upon receiving a sampled packet, the sampling trigger (ingress vs.
egress) will be encoded in the mirroring reason in the Completion Queue
Element (CQE). The mirroring reason is used to lookup the sampling
parameters (e.g., psample group) which are passed to the psample module.

Note that locally generated packets that are sampled are simply
consumed. This is done for several reasons.

First, such packets do not have an ingress netdev given that their Rx
local port is the CPU port. This breaks several basic assumptions.

Second, sampling using the same interface (tc), but with flower
classifier will not result in locally generated packets being sampled
given that such packets are not subject to the policy engine.

Third, realistically, this is not a big deal given that the vast
majority of the packets being transmitted through the port are not
locally generated packets.

Fourth, if such packets do need to be sampled, they can be sampled with
a 'skip_hw' filter and reported to the same sampling group as the data
path packets. The software sampling rate can also be adjusted to fit the
rate of the locally generated packets which is much lower than the rate
of the data path traffic.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_matchall.c    | 26 +++++-----
 .../net/ethernet/mellanox/mlxsw/spectrum_trap.c    | 55 ++++++++++++++++++++++
 2 files changed, 69 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
index 459c452b60ba..ce58a795c6fc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
@@ -417,13 +417,9 @@ static int mlxsw_sp2_mall_sample_add(struct mlxsw_sp *mlxsw_sp,
 		.session_id = MLXSW_SP_SPAN_SESSION_ID_SAMPLING,
 	};
 	u32 rate = mall_entry->sample.params.rate;
+	enum mlxsw_sp_span_trigger span_trigger;
 	int err;
 
-	if (!mall_entry->ingress) {
-		NL_SET_ERR_MSG(extack, "Sampling is not supported on egress");
-		return -EOPNOTSUPP;
-	}
-
 	err = mlxsw_sp_span_agent_get(mlxsw_sp, &mall_entry->sample.span_id,
 				      &agent_parms);
 	if (err) {
@@ -431,16 +427,19 @@ static int mlxsw_sp2_mall_sample_add(struct mlxsw_sp *mlxsw_sp,
 		return err;
 	}
 
-	err = mlxsw_sp_span_analyzed_port_get(mlxsw_sp_port, true);
+	err = mlxsw_sp_span_analyzed_port_get(mlxsw_sp_port,
+					      mall_entry->ingress);
 	if (err) {
 		NL_SET_ERR_MSG(extack, "Failed to get analyzed port");
 		goto err_analyzed_port_get;
 	}
 
+	span_trigger = mall_entry->ingress ? MLXSW_SP_SPAN_TRIGGER_INGRESS :
+					     MLXSW_SP_SPAN_TRIGGER_EGRESS;
 	trigger_parms.span_id = mall_entry->sample.span_id;
 	trigger_parms.probability_rate = rate;
-	err = mlxsw_sp_span_agent_bind(mlxsw_sp, MLXSW_SP_SPAN_TRIGGER_INGRESS,
-				       mlxsw_sp_port, &trigger_parms);
+	err = mlxsw_sp_span_agent_bind(mlxsw_sp, span_trigger, mlxsw_sp_port,
+				       &trigger_parms);
 	if (err) {
 		NL_SET_ERR_MSG(extack, "Failed to bind SPAN agent");
 		goto err_agent_bind;
@@ -449,7 +448,7 @@ static int mlxsw_sp2_mall_sample_add(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 
 err_agent_bind:
-	mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, true);
+	mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, mall_entry->ingress);
 err_analyzed_port_get:
 	mlxsw_sp_span_agent_put(mlxsw_sp, mall_entry->sample.span_id);
 	return err;
@@ -460,11 +459,14 @@ static void mlxsw_sp2_mall_sample_del(struct mlxsw_sp *mlxsw_sp,
 				      struct mlxsw_sp_mall_entry *mall_entry)
 {
 	struct mlxsw_sp_span_trigger_parms trigger_parms = {};
+	enum mlxsw_sp_span_trigger span_trigger;
 
+	span_trigger = mall_entry->ingress ? MLXSW_SP_SPAN_TRIGGER_INGRESS :
+					     MLXSW_SP_SPAN_TRIGGER_EGRESS;
 	trigger_parms.span_id = mall_entry->sample.span_id;
-	mlxsw_sp_span_agent_unbind(mlxsw_sp, MLXSW_SP_SPAN_TRIGGER_INGRESS,
-				   mlxsw_sp_port, &trigger_parms);
-	mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, true);
+	mlxsw_sp_span_agent_unbind(mlxsw_sp, span_trigger, mlxsw_sp_port,
+				   &trigger_parms);
+	mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, mall_entry->ingress);
 	mlxsw_sp_span_agent_put(mlxsw_sp, mall_entry->sample.span_id);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index db3c561ac3ea..3dbf5e53e9ff 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -53,6 +53,8 @@ enum {
 	MLXSW_SP_MIRROR_REASON_INGRESS = 1,
 	/* Packet was early dropped. */
 	MLXSW_SP_MIRROR_REASON_INGRESS_WRED = 9,
+	/* Packet was mirrored from egress. */
+	MLXSW_SP_MIRROR_REASON_EGRESS = 14,
 };
 
 static int mlxsw_sp_rx_listener(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
@@ -289,6 +291,56 @@ out:
 	consume_skb(skb);
 }
 
+static void mlxsw_sp_rx_sample_tx_listener(struct sk_buff *skb, u8 local_port,
+					   void *trap_ctx)
+{
+	struct mlxsw_rx_md_info *rx_md_info = &mlxsw_skb_cb(skb)->rx_md_info;
+	struct mlxsw_sp *mlxsw_sp = devlink_trap_ctx_priv(trap_ctx);
+	struct mlxsw_sp_port *mlxsw_sp_port, *mlxsw_sp_port_tx;
+	struct mlxsw_sp_sample_trigger trigger;
+	struct mlxsw_sp_sample_params *params;
+	struct psample_metadata md = {};
+	int err;
+
+	/* Locally generated packets are not reported from the policy engine
+	 * trigger, so do not report them from the egress trigger as well.
+	 */
+	if (local_port == MLXSW_PORT_CPU_PORT)
+		goto out;
+
+	err = __mlxsw_sp_rx_no_mark_listener(skb, local_port, trap_ctx);
+	if (err)
+		return;
+
+	mlxsw_sp_port = mlxsw_sp->ports[local_port];
+	if (!mlxsw_sp_port)
+		goto out;
+
+	/* Packet was sampled from Tx, so we need to retrieve the sample
+	 * parameters based on the Tx port and not the Rx port.
+	 */
+	mlxsw_sp_port_tx = mlxsw_sp_sample_tx_port_get(mlxsw_sp, rx_md_info);
+	if (!mlxsw_sp_port_tx)
+		goto out;
+
+	trigger.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_EGRESS;
+	trigger.local_port = mlxsw_sp_port_tx->local_port;
+	params = mlxsw_sp_sample_trigger_params_lookup(mlxsw_sp, &trigger);
+	if (!params)
+		goto out;
+
+	/* The psample module expects skb->data to point to the start of the
+	 * Ethernet header.
+	 */
+	skb_push(skb, ETH_HLEN);
+	mlxsw_sp_psample_md_init(mlxsw_sp, &md, skb,
+				 mlxsw_sp_port->dev->ifindex, params->truncate,
+				 params->trunc_size);
+	psample_sample_packet(params->psample_group, skb, params->rate, &md);
+out:
+	consume_skb(skb);
+}
+
 #define MLXSW_SP_TRAP_DROP(_id, _group_id)				      \
 	DEVLINK_TRAP_GENERIC(DROP, DROP, _id,				      \
 			     DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id,	      \
@@ -1843,6 +1895,9 @@ mlxsw_sp2_trap_items_arr[] = {
 			MLXSW_RXL_MIRROR(mlxsw_sp_rx_sample_listener, 1,
 					 SP_PKT_SAMPLE,
 					 MLXSW_SP_MIRROR_REASON_INGRESS),
+			MLXSW_RXL_MIRROR(mlxsw_sp_rx_sample_tx_listener, 1,
+					 SP_PKT_SAMPLE,
+					 MLXSW_SP_MIRROR_REASON_EGRESS),
 		},
 	},
 };
-- 
cgit v1.2.3


From ca19ea63f739c7a4e5dad64951706fea789e1f1a Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 16 Mar 2021 17:03:00 +0200
Subject: mlxsw: core_acl_flex_actions: Add mirror sampler action

Add core functionality required to support mirror sampler action in the
policy engine. The switch driver (e.g., 'mlxsw_spectrum') is required to
implement the sampler_add() / sampler_del() callbacks that perform the
necessary configuration before the sampler action can be installed. The
next patch will implement it for Spectrum-{2,3}, while Spectrum-1 will
return an error, given it is not supported.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../mellanox/mlxsw/core_acl_flex_actions.c         | 131 +++++++++++++++++++++
 .../mellanox/mlxsw/core_acl_flex_actions.h         |  11 ++
 2 files changed, 142 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
index 4d699fe98cb6..78d9c0196f2b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
@@ -2007,3 +2007,134 @@ int mlxsw_afa_block_append_l4port(struct mlxsw_afa_block *block, bool is_dport,
 	return 0;
 }
 EXPORT_SYMBOL(mlxsw_afa_block_append_l4port);
+
+/* Mirror Sampler Action
+ * ---------------------
+ * The SAMPLER_ACTION is used to mirror packets with a probability (sampling).
+ */
+
+#define MLXSW_AFA_SAMPLER_CODE 0x13
+#define MLXSW_AFA_SAMPLER_SIZE 1
+
+/* afa_sampler_mirror_agent
+ * Mirror (SPAN) agent.
+ */
+MLXSW_ITEM32(afa, sampler, mirror_agent, 0x04, 0, 3);
+
+#define MLXSW_AFA_SAMPLER_RATE_MAX (BIT(24) - 1)
+
+/* afa_sampler_mirror_probability_rate
+ * Mirroring probability.
+ * Valid values are 1 to 2^24 - 1
+ */
+MLXSW_ITEM32(afa, sampler, mirror_probability_rate, 0x08, 0, 24);
+
+static void mlxsw_afa_sampler_pack(char *payload, u8 mirror_agent, u32 rate)
+{
+	mlxsw_afa_sampler_mirror_agent_set(payload, mirror_agent);
+	mlxsw_afa_sampler_mirror_probability_rate_set(payload, rate);
+}
+
+struct mlxsw_afa_sampler {
+	struct mlxsw_afa_resource resource;
+	int span_id;
+	u8 local_port;
+	bool ingress;
+};
+
+static void mlxsw_afa_sampler_destroy(struct mlxsw_afa_block *block,
+				      struct mlxsw_afa_sampler *sampler)
+{
+	mlxsw_afa_resource_del(&sampler->resource);
+	block->afa->ops->sampler_del(block->afa->ops_priv, sampler->local_port,
+				     sampler->span_id, sampler->ingress);
+	kfree(sampler);
+}
+
+static void mlxsw_afa_sampler_destructor(struct mlxsw_afa_block *block,
+					 struct mlxsw_afa_resource *resource)
+{
+	struct mlxsw_afa_sampler *sampler;
+
+	sampler = container_of(resource, struct mlxsw_afa_sampler, resource);
+	mlxsw_afa_sampler_destroy(block, sampler);
+}
+
+static struct mlxsw_afa_sampler *
+mlxsw_afa_sampler_create(struct mlxsw_afa_block *block, u8 local_port,
+			 struct psample_group *psample_group, u32 rate,
+			 u32 trunc_size, bool truncate, bool ingress,
+			 struct netlink_ext_ack *extack)
+{
+	struct mlxsw_afa_sampler *sampler;
+	int err;
+
+	sampler = kzalloc(sizeof(*sampler), GFP_KERNEL);
+	if (!sampler)
+		return ERR_PTR(-ENOMEM);
+
+	err = block->afa->ops->sampler_add(block->afa->ops_priv, local_port,
+					   psample_group, rate, trunc_size,
+					   truncate, ingress, &sampler->span_id,
+					   extack);
+	if (err)
+		goto err_sampler_add;
+
+	sampler->ingress = ingress;
+	sampler->local_port = local_port;
+	sampler->resource.destructor = mlxsw_afa_sampler_destructor;
+	mlxsw_afa_resource_add(block, &sampler->resource);
+	return sampler;
+
+err_sampler_add:
+	kfree(sampler);
+	return ERR_PTR(err);
+}
+
+static int
+mlxsw_afa_block_append_allocated_sampler(struct mlxsw_afa_block *block,
+					 u8 mirror_agent, u32 rate)
+{
+	char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_SAMPLER_CODE,
+						  MLXSW_AFA_SAMPLER_SIZE);
+
+	if (IS_ERR(act))
+		return PTR_ERR(act);
+	mlxsw_afa_sampler_pack(act, mirror_agent, rate);
+	return 0;
+}
+
+int mlxsw_afa_block_append_sampler(struct mlxsw_afa_block *block, u8 local_port,
+				   struct psample_group *psample_group,
+				   u32 rate, u32 trunc_size, bool truncate,
+				   bool ingress,
+				   struct netlink_ext_ack *extack)
+{
+	struct mlxsw_afa_sampler *sampler;
+	int err;
+
+	if (rate > MLXSW_AFA_SAMPLER_RATE_MAX) {
+		NL_SET_ERR_MSG_MOD(extack, "Sampling rate is too high");
+		return -EINVAL;
+	}
+
+	sampler = mlxsw_afa_sampler_create(block, local_port, psample_group,
+					   rate, trunc_size, truncate, ingress,
+					   extack);
+	if (IS_ERR(sampler))
+		return PTR_ERR(sampler);
+
+	err = mlxsw_afa_block_append_allocated_sampler(block, sampler->span_id,
+						       rate);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Cannot append sampler action");
+		goto err_append_allocated_sampler;
+	}
+
+	return 0;
+
+err_append_allocated_sampler:
+	mlxsw_afa_sampler_destroy(block, sampler);
+	return err;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_append_sampler);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
index b652497b1002..b65bf98eb5ab 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
@@ -30,6 +30,12 @@ struct mlxsw_afa_ops {
 			   u16 *p_policer_index,
 			   struct netlink_ext_ack *extack);
 	void (*policer_del)(void *priv, u16 policer_index);
+	int (*sampler_add)(void *priv, u8 local_port,
+			   struct psample_group *psample_group, u32 rate,
+			   u32 trunc_size, bool truncate, bool ingress,
+			   int *p_span_id, struct netlink_ext_ack *extack);
+	void (*sampler_del)(void *priv, u8 local_port, int span_id,
+			    bool ingress);
 	bool dummy_first_set;
 };
 
@@ -92,5 +98,10 @@ int mlxsw_afa_block_append_police(struct mlxsw_afa_block *block,
 				  u32 fa_index, u64 rate_bytes_ps, u32 burst,
 				  u16 *p_policer_index,
 				  struct netlink_ext_ack *extack);
+int mlxsw_afa_block_append_sampler(struct mlxsw_afa_block *block, u8 local_port,
+				   struct psample_group *psample_group,
+				   u32 rate, u32 trunc_size, bool truncate,
+				   bool ingress,
+				   struct netlink_ext_ack *extack);
 
 #endif
-- 
cgit v1.2.3


From 45aad0b7043da88244622a65773dae24fd1dd4da Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 16 Mar 2021 17:03:01 +0200
Subject: mlxsw: spectrum_acl: Offload FLOW_ACTION_SAMPLE

Implement support for action sample when used with a flower classifier
by implementing the required sampler_add() / sampler_del() callbacks and
registering an Rx listener for the sampled packets.

The sampler_add() callback returns an error for Spectrum-1 as the
functionality is not supported. In Spectrum-{2,3} the callback creates a
mirroring agent towards the CPU. The agent's identifier is used by the
policy engine code to mirror towards the CPU with probability.

The Rx listener for the sampled packet is registered with the 'policy
engine' mirroring reason and passes trapped packets to the psample
module after looking up their parameters (e.g., sampling group).

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h     |  9 ++-
 drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c | 25 +++++++
 .../mellanox/mlxsw/spectrum_acl_flex_actions.c     | 83 ++++++++++++++++++++++
 .../net/ethernet/mellanox/mlxsw/spectrum_flower.c  | 18 +++++
 .../net/ethernet/mellanox/mlxsw/spectrum_trap.c    | 41 +++++++++++
 5 files changed, 175 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 4b4d643abceb..63cc8fa3fa62 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -239,11 +239,12 @@ struct mlxsw_sp_port_pcpu_stats {
 enum mlxsw_sp_sample_trigger_type {
 	MLXSW_SP_SAMPLE_TRIGGER_TYPE_INGRESS,
 	MLXSW_SP_SAMPLE_TRIGGER_TYPE_EGRESS,
+	MLXSW_SP_SAMPLE_TRIGGER_TYPE_POLICY_ENGINE,
 };
 
 struct mlxsw_sp_sample_trigger {
 	enum mlxsw_sp_sample_trigger_type type;
-	u8 local_port;
+	u8 local_port; /* Reserved when trigger type is not ingress / egress. */
 };
 
 struct mlxsw_sp_sample_params {
@@ -946,6 +947,12 @@ int mlxsw_sp_acl_rulei_act_count(struct mlxsw_sp *mlxsw_sp,
 int mlxsw_sp_acl_rulei_act_fid_set(struct mlxsw_sp *mlxsw_sp,
 				   struct mlxsw_sp_acl_rule_info *rulei,
 				   u16 fid, struct netlink_ext_ack *extack);
+int mlxsw_sp_acl_rulei_act_sample(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_acl_rule_info *rulei,
+				  struct mlxsw_sp_flow_block *block,
+				  struct psample_group *psample_group, u32 rate,
+				  u32 trunc_size, bool truncate,
+				  struct netlink_ext_ack *extack);
 
 struct mlxsw_sp_acl_rule;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index 8cfa03a75374..67cedfa76f78 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -688,6 +688,31 @@ int mlxsw_sp_acl_rulei_act_fid_set(struct mlxsw_sp *mlxsw_sp,
 	return mlxsw_afa_block_append_fid_set(rulei->act_block, fid, extack);
 }
 
+int mlxsw_sp_acl_rulei_act_sample(struct mlxsw_sp *mlxsw_sp,
+				  struct mlxsw_sp_acl_rule_info *rulei,
+				  struct mlxsw_sp_flow_block *block,
+				  struct psample_group *psample_group, u32 rate,
+				  u32 trunc_size, bool truncate,
+				  struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp_flow_block_binding *binding;
+	struct mlxsw_sp_port *mlxsw_sp_port;
+
+	if (!list_is_singular(&block->binding_list)) {
+		NL_SET_ERR_MSG_MOD(extack, "Only a single sampling source is allowed");
+		return -EOPNOTSUPP;
+	}
+	binding = list_first_entry(&block->binding_list,
+				   struct mlxsw_sp_flow_block_binding, list);
+	mlxsw_sp_port = binding->mlxsw_sp_port;
+
+	return mlxsw_afa_block_append_sampler(rulei->act_block,
+					      mlxsw_sp_port->local_port,
+					      psample_group, rate, trunc_size,
+					      truncate, binding->ingress,
+					      extack);
+}
+
 struct mlxsw_sp_acl_rule *
 mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp,
 			 struct mlxsw_sp_acl_ruleset *ruleset,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
index 90372d1c28d4..c72aa38424dc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
@@ -192,6 +192,22 @@ static void mlxsw_sp_act_policer_del(void *priv, u16 policer_index)
 			     policer_index);
 }
 
+static int mlxsw_sp1_act_sampler_add(void *priv, u8 local_port,
+				     struct psample_group *psample_group,
+				     u32 rate, u32 trunc_size, bool truncate,
+				     bool ingress, int *p_span_id,
+				     struct netlink_ext_ack *extack)
+{
+	NL_SET_ERR_MSG_MOD(extack, "Sampling action is not supported on Spectrum-1");
+	return -EOPNOTSUPP;
+}
+
+static void mlxsw_sp1_act_sampler_del(void *priv, u8 local_port, int span_id,
+				      bool ingress)
+{
+	WARN_ON_ONCE(1);
+}
+
 const struct mlxsw_afa_ops mlxsw_sp1_act_afa_ops = {
 	.kvdl_set_add		= mlxsw_sp1_act_kvdl_set_add,
 	.kvdl_set_del		= mlxsw_sp_act_kvdl_set_del,
@@ -204,8 +220,73 @@ const struct mlxsw_afa_ops mlxsw_sp1_act_afa_ops = {
 	.mirror_del		= mlxsw_sp_act_mirror_del,
 	.policer_add		= mlxsw_sp_act_policer_add,
 	.policer_del		= mlxsw_sp_act_policer_del,
+	.sampler_add		= mlxsw_sp1_act_sampler_add,
+	.sampler_del		= mlxsw_sp1_act_sampler_del,
 };
 
+static int mlxsw_sp2_act_sampler_add(void *priv, u8 local_port,
+				     struct psample_group *psample_group,
+				     u32 rate, u32 trunc_size, bool truncate,
+				     bool ingress, int *p_span_id,
+				     struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp_span_agent_parms agent_parms = {
+		.session_id = MLXSW_SP_SPAN_SESSION_ID_SAMPLING,
+	};
+	struct mlxsw_sp_sample_trigger trigger = {
+		.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_POLICY_ENGINE,
+	};
+	struct mlxsw_sp_sample_params params;
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	struct mlxsw_sp *mlxsw_sp = priv;
+	int err;
+
+	params.psample_group = psample_group;
+	params.trunc_size = trunc_size;
+	params.rate = rate;
+	params.truncate = truncate;
+	err = mlxsw_sp_sample_trigger_params_set(mlxsw_sp, &trigger, &params,
+						 extack);
+	if (err)
+		return err;
+
+	err = mlxsw_sp_span_agent_get(mlxsw_sp, p_span_id, &agent_parms);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to get SPAN agent");
+		goto err_span_agent_get;
+	}
+
+	mlxsw_sp_port = mlxsw_sp->ports[local_port];
+	err = mlxsw_sp_span_analyzed_port_get(mlxsw_sp_port, ingress);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to get analyzed port");
+		goto err_analyzed_port_get;
+	}
+
+	return 0;
+
+err_analyzed_port_get:
+	mlxsw_sp_span_agent_put(mlxsw_sp, *p_span_id);
+err_span_agent_get:
+	mlxsw_sp_sample_trigger_params_unset(mlxsw_sp, &trigger);
+	return err;
+}
+
+static void mlxsw_sp2_act_sampler_del(void *priv, u8 local_port, int span_id,
+				      bool ingress)
+{
+	struct mlxsw_sp_sample_trigger trigger = {
+		.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_POLICY_ENGINE,
+	};
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	struct mlxsw_sp *mlxsw_sp = priv;
+
+	mlxsw_sp_port = mlxsw_sp->ports[local_port];
+	mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, ingress);
+	mlxsw_sp_span_agent_put(mlxsw_sp, span_id);
+	mlxsw_sp_sample_trigger_params_unset(mlxsw_sp, &trigger);
+}
+
 const struct mlxsw_afa_ops mlxsw_sp2_act_afa_ops = {
 	.kvdl_set_add		= mlxsw_sp2_act_kvdl_set_add,
 	.kvdl_set_del		= mlxsw_sp_act_kvdl_set_del,
@@ -218,6 +299,8 @@ const struct mlxsw_afa_ops mlxsw_sp2_act_afa_ops = {
 	.mirror_del		= mlxsw_sp_act_mirror_del,
 	.policer_add		= mlxsw_sp_act_policer_add,
 	.policer_del		= mlxsw_sp_act_policer_del,
+	.sampler_add		= mlxsw_sp2_act_sampler_add,
+	.sampler_del		= mlxsw_sp2_act_sampler_del,
 	.dummy_first_set	= true,
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index ea637fa552f5..be3791ca6069 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -24,6 +24,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
 	const struct flow_action_entry *act;
 	int mirror_act_count = 0;
 	int police_act_count = 0;
+	int sample_act_count = 0;
 	int err, i;
 
 	if (!flow_action_has_entries(flow_action))
@@ -209,6 +210,23 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
 				return err;
 			break;
 			}
+		case FLOW_ACTION_SAMPLE: {
+			if (sample_act_count++) {
+				NL_SET_ERR_MSG_MOD(extack, "Multiple sample actions per rule are not supported");
+				return -EOPNOTSUPP;
+			}
+
+			err = mlxsw_sp_acl_rulei_act_sample(mlxsw_sp, rulei,
+							    block,
+							    act->sample.psample_group,
+							    act->sample.rate,
+							    act->sample.trunc_size,
+							    act->sample.truncate,
+							    extack);
+			if (err)
+				return err;
+			break;
+			}
 		default:
 			NL_SET_ERR_MSG_MOD(extack, "Unsupported action");
 			dev_err(mlxsw_sp->bus_info->dev, "Unsupported action\n");
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index 3dbf5e53e9ff..26d01adbedad 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -51,6 +51,8 @@ enum {
 enum {
 	/* Packet was mirrored from ingress. */
 	MLXSW_SP_MIRROR_REASON_INGRESS = 1,
+	/* Packet was mirrored from policy engine. */
+	MLXSW_SP_MIRROR_REASON_POLICY_ENGINE = 2,
 	/* Packet was early dropped. */
 	MLXSW_SP_MIRROR_REASON_INGRESS_WRED = 9,
 	/* Packet was mirrored from egress. */
@@ -341,6 +343,42 @@ out:
 	consume_skb(skb);
 }
 
+static void mlxsw_sp_rx_sample_acl_listener(struct sk_buff *skb, u8 local_port,
+					    void *trap_ctx)
+{
+	struct mlxsw_sp *mlxsw_sp = devlink_trap_ctx_priv(trap_ctx);
+	struct mlxsw_sp_sample_trigger trigger = {
+		.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_POLICY_ENGINE,
+	};
+	struct mlxsw_sp_sample_params *params;
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	struct psample_metadata md = {};
+	int err;
+
+	err = __mlxsw_sp_rx_no_mark_listener(skb, local_port, trap_ctx);
+	if (err)
+		return;
+
+	mlxsw_sp_port = mlxsw_sp->ports[local_port];
+	if (!mlxsw_sp_port)
+		goto out;
+
+	params = mlxsw_sp_sample_trigger_params_lookup(mlxsw_sp, &trigger);
+	if (!params)
+		goto out;
+
+	/* The psample module expects skb->data to point to the start of the
+	 * Ethernet header.
+	 */
+	skb_push(skb, ETH_HLEN);
+	mlxsw_sp_psample_md_init(mlxsw_sp, &md, skb,
+				 mlxsw_sp_port->dev->ifindex, params->truncate,
+				 params->trunc_size);
+	psample_sample_packet(params->psample_group, skb, params->rate, &md);
+out:
+	consume_skb(skb);
+}
+
 #define MLXSW_SP_TRAP_DROP(_id, _group_id)				      \
 	DEVLINK_TRAP_GENERIC(DROP, DROP, _id,				      \
 			     DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id,	      \
@@ -1898,6 +1936,9 @@ mlxsw_sp2_trap_items_arr[] = {
 			MLXSW_RXL_MIRROR(mlxsw_sp_rx_sample_tx_listener, 1,
 					 SP_PKT_SAMPLE,
 					 MLXSW_SP_MIRROR_REASON_EGRESS),
+			MLXSW_RXL_MIRROR(mlxsw_sp_rx_sample_acl_listener, 1,
+					 SP_PKT_SAMPLE,
+					 MLXSW_SP_MIRROR_REASON_POLICY_ENGINE),
 		},
 	},
 };
-- 
cgit v1.2.3


From f0b692c4ee2fdd0d4291fe3cbde156792896895e Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 16 Mar 2021 17:03:02 +0200
Subject: selftests: mlxsw: Add tc sample tests for new triggers

Test that packets are sampled when tc-sample is used with matchall
egress binding and flower classifier. Verify that when performing
sampling on egress the end-to-end latency is reported as metadata.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/mlxsw/tc_sample.sh       | 135 +++++++++++++++++++++
 1 file changed, 135 insertions(+)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
index 75d00104f291..57b05f042787 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
@@ -41,6 +41,10 @@ ALL_TESTS="
 	tc_sample_md_lag_oif_test
 	tc_sample_md_out_tc_test
 	tc_sample_md_out_tc_occ_test
+	tc_sample_md_latency_test
+	tc_sample_acl_group_conflict_test
+	tc_sample_acl_rate_test
+	tc_sample_acl_max_rate_test
 "
 NUM_NETIFS=8
 CAPTURE_FILE=$(mktemp)
@@ -482,6 +486,137 @@ tc_sample_md_out_tc_occ_test()
 	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
 }
 
+tc_sample_md_latency_test()
+{
+	RET=0
+
+	# Egress sampling not supported on Spectrum-1.
+	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+
+	tc filter add dev $rp2 egress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 5 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	psample_capture_start
+
+	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	grep -q -e "latency " $CAPTURE_FILE
+	check_err $? "Sampled packets do not have latency attribute"
+
+	log_test "tc sample latency"
+
+	tc filter del dev $rp2 egress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_acl_group_conflict_test()
+{
+	RET=0
+
+	# Test that two flower sampling rules cannot be configured on the same
+	# port with different groups.
+
+	# Policy-based sampling is not supported on Spectrum-1.
+	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+
+	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_sw action sample rate 1024 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	tc filter add dev $rp1 ingress protocol ip pref 2 handle 102 flower \
+		skip_sw action sample rate 1024 group 1
+	check_err $? "Failed to configure sampling rule with same group"
+
+	tc filter add dev $rp1 ingress protocol ip pref 3 handle 103 flower \
+		skip_sw action sample rate 1024 group 2 &> /dev/null
+	check_fail $? "Managed to configure sampling rule with conflicting group"
+
+	log_test "tc sample (w/ flower) group conflict test"
+
+	tc filter del dev $rp1 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+}
+
+__tc_sample_acl_rate_test()
+{
+	local bind=$1; shift
+	local port=$1; shift
+	local pkts pct
+
+	RET=0
+
+	# Policy-based sampling is not supported on Spectrum-1.
+	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+
+	tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 198.51.100.1 action sample rate 32 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	psample_capture_start
+
+	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
+	pct=$((100 * (pkts - 100) / 100))
+	(( -25 <= pct && pct <= 25))
+	check_err $? "Expected 100 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
+
+	# Setup a filter that should not match any packet and make sure packets
+	# are not sampled.
+	tc filter del dev $port $bind protocol ip pref 1 handle 101 flower
+
+	tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 198.51.100.10 action sample rate 32 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	psample_capture_start
+
+	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+	psample_capture_stop
+
+	grep -q -e "group 1 " $CAPTURE_FILE
+	check_fail $? "Sampled packets when should not"
+
+	log_test "tc sample (w/ flower) rate ($bind)"
+
+	tc filter del dev $port $bind protocol ip pref 1 handle 101 flower
+}
+
+tc_sample_acl_rate_test()
+{
+	__tc_sample_acl_rate_test ingress $rp1
+	__tc_sample_acl_rate_test egress $rp2
+}
+
+tc_sample_acl_max_rate_test()
+{
+	RET=0
+
+	# Policy-based sampling is not supported on Spectrum-1.
+	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+
+	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_sw action sample rate $((2 ** 24 - 1)) group 1
+	check_err $? "Failed to configure sampling rule with max rate"
+
+	tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+
+	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_sw action sample rate $((2 ** 24)) \
+		group 1 &> /dev/null
+	check_fail $? "Managed to configure sampling rate above maximum"
+
+	log_test "tc sample (w/ flower) maximum rate"
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3


From 0f967d9e5a20aeee51bb004295e83c2c913cceda Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 16 Mar 2021 17:03:03 +0200
Subject: selftests: mlxsw: Test egress sampling limitation on Spectrum-1 only

Make sure egress sampling configuration only fails on Spectrum-1, given
that mlxsw now supports it on Spectrum-{2,3}.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
index 553cb9fad508..b4dbda706c4d 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
@@ -18,6 +18,7 @@ NUM_NETIFS=2
 
 source $lib_dir/tc_common.sh
 source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
 
 switch_create()
 {
@@ -166,7 +167,8 @@ matchall_sample_egress_test()
 	RET=0
 
 	# It is forbidden in mlxsw driver to have matchall with sample action
-	# bound on egress
+	# bound on egress. Spectrum-1 specific restriction
+	[[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return
 
 	tc qdisc add dev $swp1 clsact
 
-- 
cgit v1.2.3


From ebb1bb401303ffac0ee994ba8ed9dfd24bb2ac5f Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Tue, 16 Mar 2021 21:10:17 +0100
Subject: net: ocelot: Add PGID_BLACKHOLE

Add a new PGID that is used not to forward frames anywhere. It is used
by MRP to make sure that MRP Test frames will not reach CPU port.

Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot.c | 3 +++
 include/soc/mscc/ocelot.h          | 4 +++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 46e5c9136bac..f74d7cf002a5 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -2051,6 +2051,9 @@ int ocelot_init(struct ocelot *ocelot)
 
 		ocelot_write_rix(ocelot, val, ANA_PGID_PGID, i);
 	}
+
+	ocelot_write_rix(ocelot, 0, ANA_PGID_PGID, PGID_BLACKHOLE);
+
 	/* Allow broadcast and unknown L2 multicast to the CPU. */
 	ocelot_rmw_rix(ocelot, ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)),
 		       ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)),
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 425ff29d9389..4d10ccc8e7b5 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -51,6 +51,7 @@
  */
 
 /* Reserve some destination PGIDs at the end of the range:
+ * PGID_BLACKHOLE: used for not forwarding the frames
  * PGID_CPU: used for whitelisting certain MAC addresses, such as the addresses
  *           of the switch port net devices, towards the CPU port module.
  * PGID_UC: the flooding destinations for unknown unicast traffic.
@@ -59,6 +60,7 @@
  * PGID_MCIPV6: the flooding destinations for IPv6 multicast traffic.
  * PGID_BC: the flooding destinations for broadcast traffic.
  */
+#define PGID_BLACKHOLE			57
 #define PGID_CPU			58
 #define PGID_UC				59
 #define PGID_MC				60
@@ -73,7 +75,7 @@
 
 #define for_each_nonreserved_multicast_dest_pgid(ocelot, pgid)	\
 	for ((pgid) = (ocelot)->num_phys_ports + 1;		\
-	     (pgid) < PGID_CPU;					\
+	     (pgid) < PGID_BLACKHOLE;				\
 	     (pgid)++)
 
 #define for_each_aggr_pgid(ocelot, pgid)			\
-- 
cgit v1.2.3


From 7c588c3e96e9733a2a8a40caefd26c9189416821 Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Tue, 16 Mar 2021 21:10:18 +0100
Subject: net: ocelot: Extend MRP

This patch extends MRP support for Ocelot. It allows to have multiple
rings and when the node has the MRC role it forwards MRP Test frames in
HW. For MRM there is no change.

Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot.c     |   6 -
 drivers/net/ethernet/mscc/ocelot_mrp.c | 233 ++++++++++++++++++++++-----------
 include/soc/mscc/ocelot.h              |   8 +-
 net/dsa/tag_ocelot.c                   |   6 -
 4 files changed, 160 insertions(+), 93 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index f74d7cf002a5..9cc9378157e4 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -772,12 +772,6 @@ int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **nskb)
 
 	skb->protocol = eth_type_trans(skb, dev);
 
-#if IS_ENABLED(CONFIG_BRIDGE_MRP)
-	if (skb->protocol == cpu_to_be16(ETH_P_MRP) &&
-	    cpuq & BIT(OCELOT_MRP_CPUQ))
-		skb->offload_fwd_mark = 0;
-#endif
-
 	*nskb = skb;
 
 	return 0;
diff --git a/drivers/net/ethernet/mscc/ocelot_mrp.c b/drivers/net/ethernet/mscc/ocelot_mrp.c
index 683da320bfd8..439129a65b71 100644
--- a/drivers/net/ethernet/mscc/ocelot_mrp.c
+++ b/drivers/net/ethernet/mscc/ocelot_mrp.c
@@ -1,8 +1,5 @@
 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
 /* Microsemi Ocelot Switch driver
- *
- * This contains glue logic between the switchdev driver operations and the
- * mscc_ocelot_switch_lib.
  *
  * Copyright (c) 2017, 2019 Microsemi Corporation
  * Copyright 2020-2021 NXP Semiconductors
@@ -15,13 +12,34 @@
 #include "ocelot.h"
 #include "ocelot_vcap.h"
 
-static int ocelot_mrp_del_vcap(struct ocelot *ocelot, int port)
+static const u8 mrp_test_dmac[] = { 0x01, 0x15, 0x4e, 0x00, 0x00, 0x01 };
+static const u8 mrp_control_dmac[] = { 0x01, 0x15, 0x4e, 0x00, 0x00, 0x02 };
+
+static int ocelot_mrp_find_partner_port(struct ocelot *ocelot,
+					struct ocelot_port *p)
+{
+	int i;
+
+	for (i = 0; i < ocelot->num_phys_ports; ++i) {
+		struct ocelot_port *ocelot_port = ocelot->ports[i];
+
+		if (!ocelot_port || p == ocelot_port)
+			continue;
+
+		if (ocelot_port->mrp_ring_id == p->mrp_ring_id)
+			return i;
+	}
+
+	return -1;
+}
+
+static int ocelot_mrp_del_vcap(struct ocelot *ocelot, int id)
 {
 	struct ocelot_vcap_block *block_vcap_is2;
 	struct ocelot_vcap_filter *filter;
 
 	block_vcap_is2 = &ocelot->block[VCAP_IS2];
-	filter = ocelot_vcap_block_find_filter_by_id(block_vcap_is2, port,
+	filter = ocelot_vcap_block_find_filter_by_id(block_vcap_is2, id,
 						     false);
 	if (!filter)
 		return 0;
@@ -29,6 +47,87 @@ static int ocelot_mrp_del_vcap(struct ocelot *ocelot, int port)
 	return ocelot_vcap_filter_del(ocelot, filter);
 }
 
+static int ocelot_mrp_redirect_add_vcap(struct ocelot *ocelot, int src_port,
+					int dst_port)
+{
+	const u8 mrp_test_mask[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+	struct ocelot_vcap_filter *filter;
+	int err;
+
+	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (!filter)
+		return -ENOMEM;
+
+	filter->key_type = OCELOT_VCAP_KEY_ETYPE;
+	filter->prio = 1;
+	filter->id.cookie = src_port;
+	filter->id.tc_offload = false;
+	filter->block_id = VCAP_IS2;
+	filter->type = OCELOT_VCAP_FILTER_OFFLOAD;
+	filter->ingress_port_mask = BIT(src_port);
+	ether_addr_copy(filter->key.etype.dmac.value, mrp_test_dmac);
+	ether_addr_copy(filter->key.etype.dmac.mask, mrp_test_mask);
+	filter->action.mask_mode = OCELOT_MASK_MODE_REDIRECT;
+	filter->action.port_mask = BIT(dst_port);
+
+	err = ocelot_vcap_filter_add(ocelot, filter, NULL);
+	if (err)
+		kfree(filter);
+
+	return err;
+}
+
+static int ocelot_mrp_copy_add_vcap(struct ocelot *ocelot, int port,
+				    int prio, unsigned long cookie)
+{
+	const u8 mrp_mask[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
+	struct ocelot_vcap_filter *filter;
+	int err;
+
+	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (!filter)
+		return -ENOMEM;
+
+	filter->key_type = OCELOT_VCAP_KEY_ETYPE;
+	filter->prio = prio;
+	filter->id.cookie = cookie;
+	filter->id.tc_offload = false;
+	filter->block_id = VCAP_IS2;
+	filter->type = OCELOT_VCAP_FILTER_OFFLOAD;
+	filter->ingress_port_mask = BIT(port);
+	/* Here is possible to use control or test dmac because the mask
+	 * doesn't cover the LSB
+	 */
+	ether_addr_copy(filter->key.etype.dmac.value, mrp_test_dmac);
+	ether_addr_copy(filter->key.etype.dmac.mask, mrp_mask);
+	filter->action.mask_mode = OCELOT_MASK_MODE_PERMIT_DENY;
+	filter->action.port_mask = 0x0;
+	filter->action.cpu_copy_ena = true;
+	filter->action.cpu_qu_num = OCELOT_MRP_CPUQ;
+
+	err = ocelot_vcap_filter_add(ocelot, filter, NULL);
+	if (err)
+		kfree(filter);
+
+	return err;
+}
+
+static void ocelot_mrp_save_mac(struct ocelot *ocelot,
+				struct ocelot_port *port)
+{
+	ocelot_mact_learn(ocelot, PGID_BLACKHOLE, mrp_test_dmac,
+			  port->pvid_vlan.vid, ENTRYTYPE_LOCKED);
+	ocelot_mact_learn(ocelot, PGID_BLACKHOLE, mrp_control_dmac,
+			  port->pvid_vlan.vid, ENTRYTYPE_LOCKED);
+}
+
+static void ocelot_mrp_del_mac(struct ocelot *ocelot,
+			       struct ocelot_port *port)
+{
+	ocelot_mact_forget(ocelot, mrp_test_dmac, port->pvid_vlan.vid);
+	ocelot_mact_forget(ocelot, mrp_control_dmac, port->pvid_vlan.vid);
+}
+
 int ocelot_mrp_add(struct ocelot *ocelot, int port,
 		   const struct switchdev_obj_mrp *mrp)
 {
@@ -45,18 +144,7 @@ int ocelot_mrp_add(struct ocelot *ocelot, int port,
 	if (mrp->p_port != dev && mrp->s_port != dev)
 		return 0;
 
-	if (ocelot->mrp_ring_id != 0 &&
-	    ocelot->mrp_s_port &&
-	    ocelot->mrp_p_port)
-		return -EINVAL;
-
-	if (mrp->p_port == dev)
-		ocelot->mrp_p_port = dev;
-
-	if (mrp->s_port == dev)
-		ocelot->mrp_s_port = dev;
-
-	ocelot->mrp_ring_id = mrp->ring_id;
+	ocelot_port->mrp_ring_id = mrp->ring_id;
 
 	return 0;
 }
@@ -66,34 +154,31 @@ int ocelot_mrp_del(struct ocelot *ocelot, int port,
 		   const struct switchdev_obj_mrp *mrp)
 {
 	struct ocelot_port *ocelot_port = ocelot->ports[port];
-	struct ocelot_port_private *priv;
-	struct net_device *dev;
+	int i;
 
 	if (!ocelot_port)
 		return -EOPNOTSUPP;
 
-	priv = container_of(ocelot_port, struct ocelot_port_private, port);
-	dev = priv->dev;
-
-	if (ocelot->mrp_p_port != dev && ocelot->mrp_s_port != dev)
+	if (ocelot_port->mrp_ring_id != mrp->ring_id)
 		return 0;
 
-	if (ocelot->mrp_ring_id == 0 &&
-	    !ocelot->mrp_s_port &&
-	    !ocelot->mrp_p_port)
-		return -EINVAL;
+	ocelot_mrp_del_vcap(ocelot, port);
+	ocelot_mrp_del_vcap(ocelot, port + ocelot->num_phys_ports);
 
-	if (ocelot_mrp_del_vcap(ocelot, priv->chip_port))
-		return -EINVAL;
+	ocelot_port->mrp_ring_id = 0;
 
-	if (ocelot->mrp_p_port == dev)
-		ocelot->mrp_p_port = NULL;
+	for (i = 0; i < ocelot->num_phys_ports; ++i) {
+		ocelot_port = ocelot->ports[i];
 
-	if (ocelot->mrp_s_port == dev)
-		ocelot->mrp_s_port = NULL;
+		if (!ocelot_port)
+			continue;
 
-	ocelot->mrp_ring_id = 0;
+		if (ocelot_port->mrp_ring_id != 0)
+			goto out;
+	}
 
+	ocelot_mrp_del_mac(ocelot, ocelot_port);
+out:
 	return 0;
 }
 EXPORT_SYMBOL(ocelot_mrp_del);
@@ -102,49 +187,39 @@ int ocelot_mrp_add_ring_role(struct ocelot *ocelot, int port,
 			     const struct switchdev_obj_ring_role_mrp *mrp)
 {
 	struct ocelot_port *ocelot_port = ocelot->ports[port];
-	struct ocelot_vcap_filter *filter;
-	struct ocelot_port_private *priv;
-	struct net_device *dev;
+	int dst_port;
 	int err;
 
 	if (!ocelot_port)
 		return -EOPNOTSUPP;
 
-	priv = container_of(ocelot_port, struct ocelot_port_private, port);
-	dev = priv->dev;
-
-	if (ocelot->mrp_ring_id != mrp->ring_id)
-		return -EINVAL;
-
-	if (!mrp->sw_backup)
+	if (mrp->ring_role != BR_MRP_RING_ROLE_MRC && !mrp->sw_backup)
 		return -EOPNOTSUPP;
 
-	if (ocelot->mrp_p_port != dev && ocelot->mrp_s_port != dev)
+	if (ocelot_port->mrp_ring_id != mrp->ring_id)
 		return 0;
 
-	filter = kzalloc(sizeof(*filter), GFP_ATOMIC);
-	if (!filter)
-		return -ENOMEM;
+	ocelot_mrp_save_mac(ocelot, ocelot_port);
 
-	filter->key_type = OCELOT_VCAP_KEY_ETYPE;
-	filter->prio = 1;
-	filter->id.cookie = priv->chip_port;
-	filter->id.tc_offload = false;
-	filter->block_id = VCAP_IS2;
-	filter->type = OCELOT_VCAP_FILTER_OFFLOAD;
-	filter->ingress_port_mask = BIT(priv->chip_port);
-	*(__be16 *)filter->key.etype.etype.value = htons(ETH_P_MRP);
-	*(__be16 *)filter->key.etype.etype.mask = htons(0xffff);
-	filter->action.mask_mode = OCELOT_MASK_MODE_PERMIT_DENY;
-	filter->action.port_mask = 0x0;
-	filter->action.cpu_copy_ena = true;
-	filter->action.cpu_qu_num = OCELOT_MRP_CPUQ;
+	if (mrp->ring_role != BR_MRP_RING_ROLE_MRC)
+		return ocelot_mrp_copy_add_vcap(ocelot, port, 1, port);
 
-	err = ocelot_vcap_filter_add(ocelot, filter, NULL);
+	dst_port = ocelot_mrp_find_partner_port(ocelot, ocelot_port);
+	if (dst_port == -1)
+		return -EINVAL;
+
+	err = ocelot_mrp_redirect_add_vcap(ocelot, port, dst_port);
 	if (err)
-		kfree(filter);
+		return err;
 
-	return err;
+	err = ocelot_mrp_copy_add_vcap(ocelot, port, 2,
+				       port + ocelot->num_phys_ports);
+	if (err) {
+		ocelot_mrp_del_vcap(ocelot, port);
+		return err;
+	}
+
+	return 0;
 }
 EXPORT_SYMBOL(ocelot_mrp_add_ring_role);
 
@@ -152,24 +227,32 @@ int ocelot_mrp_del_ring_role(struct ocelot *ocelot, int port,
 			     const struct switchdev_obj_ring_role_mrp *mrp)
 {
 	struct ocelot_port *ocelot_port = ocelot->ports[port];
-	struct ocelot_port_private *priv;
-	struct net_device *dev;
+	int i;
 
 	if (!ocelot_port)
 		return -EOPNOTSUPP;
 
-	priv = container_of(ocelot_port, struct ocelot_port_private, port);
-	dev = priv->dev;
-
-	if (ocelot->mrp_ring_id != mrp->ring_id)
-		return -EINVAL;
-
-	if (!mrp->sw_backup)
+	if (mrp->ring_role != BR_MRP_RING_ROLE_MRC && !mrp->sw_backup)
 		return -EOPNOTSUPP;
 
-	if (ocelot->mrp_p_port != dev && ocelot->mrp_s_port != dev)
+	if (ocelot_port->mrp_ring_id != mrp->ring_id)
 		return 0;
 
-	return ocelot_mrp_del_vcap(ocelot, priv->chip_port);
+	ocelot_mrp_del_vcap(ocelot, port);
+	ocelot_mrp_del_vcap(ocelot, port + ocelot->num_phys_ports);
+
+	for (i = 0; i < ocelot->num_phys_ports; ++i) {
+		ocelot_port = ocelot->ports[i];
+
+		if (!ocelot_port)
+			continue;
+
+		if (ocelot_port->mrp_ring_id != 0)
+			goto out;
+	}
+
+	ocelot_mrp_del_mac(ocelot, ocelot_port);
+out:
+	return 0;
 }
 EXPORT_SYMBOL(ocelot_mrp_del_ring_role);
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 4d10ccc8e7b5..0a0751bf97dd 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -613,6 +613,8 @@ struct ocelot_port {
 
 	struct net_device		*bond;
 	bool				lag_tx_active;
+
+	u16				mrp_ring_id;
 };
 
 struct ocelot {
@@ -681,12 +683,6 @@ struct ocelot {
 	/* Protects the PTP clock */
 	spinlock_t			ptp_clock_lock;
 	struct ptp_pin_desc		ptp_pins[OCELOT_PTP_PINS_NUM];
-
-#if IS_ENABLED(CONFIG_BRIDGE_MRP)
-	u16				mrp_ring_id;
-	struct net_device		*mrp_p_port;
-	struct net_device		*mrp_s_port;
-#endif
 };
 
 struct ocelot_policer {
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index 743809b5806b..157f95689d8d 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -128,12 +128,6 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
 	skb->offload_fwd_mark = 1;
 	skb->priority = qos_class;
 
-#if IS_ENABLED(CONFIG_BRIDGE_MRP)
-	if (eth_hdr(skb)->h_proto == cpu_to_be16(ETH_P_MRP) &&
-	    cpuq & BIT(OCELOT_MRP_CPUQ))
-		skb->offload_fwd_mark = 0;
-#endif
-
 	/* Ocelot switches copy frames unmodified to the CPU. However, it is
 	 * possible for the user to request a VLAN modification through
 	 * VCAP_IS1_ACT_VID_REPLACE_ENA. In this case, what will happen is that
-- 
cgit v1.2.3


From 2ed2c5f0391106406ead3a74bfa571575eafe8b6 Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Tue, 16 Mar 2021 21:10:19 +0100
Subject: net: ocelot: Remove ocelot_xfh_get_cpuq

Now when extracting frames from CPU the cpuq is not used anymore so
remove it.

Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot.c | 3 +--
 include/linux/dsa/ocelot.h         | 5 -----
 net/dsa/tag_ocelot.c               | 2 --
 3 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 9cc9378157e4..9f0c9bdd9f5d 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -687,7 +687,7 @@ static int ocelot_xtr_poll_xfh(struct ocelot *ocelot, int grp, u32 *xfh)
 int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **nskb)
 {
 	struct skb_shared_hwtstamps *shhwtstamps;
-	u64 tod_in_ns, full_ts_in_ns, cpuq;
+	u64 tod_in_ns, full_ts_in_ns;
 	u64 timestamp, src_port, len;
 	u32 xfh[OCELOT_TAG_LEN / 4];
 	struct net_device *dev;
@@ -704,7 +704,6 @@ int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **nskb)
 	ocelot_xfh_get_src_port(xfh, &src_port);
 	ocelot_xfh_get_len(xfh, &len);
 	ocelot_xfh_get_rew_val(xfh, &timestamp);
-	ocelot_xfh_get_cpuq(xfh, &cpuq);
 
 	if (WARN_ON(src_port >= ocelot->num_phys_ports))
 		return -EINVAL;
diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h
index 4265f328681a..c6bc45ae5e03 100644
--- a/include/linux/dsa/ocelot.h
+++ b/include/linux/dsa/ocelot.h
@@ -160,11 +160,6 @@ static inline void ocelot_xfh_get_src_port(void *extraction, u64 *src_port)
 	packing(extraction, src_port, 46, 43, OCELOT_TAG_LEN, UNPACK, 0);
 }
 
-static inline void ocelot_xfh_get_cpuq(void *extraction, u64 *cpuq)
-{
-	packing(extraction, cpuq, 28, 20, OCELOT_TAG_LEN, UNPACK, 0);
-}
-
 static inline void ocelot_xfh_get_qos_class(void *extraction, u64 *qos_class)
 {
 	packing(extraction, qos_class, 19, 17, OCELOT_TAG_LEN, UNPACK, 0);
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index 157f95689d8d..f9df9cac81c5 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -83,7 +83,6 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
 	struct dsa_port *dp;
 	u8 *extraction;
 	u16 vlan_tpid;
-	u64 cpuq;
 
 	/* Revert skb->data by the amount consumed by the DSA master,
 	 * so it points to the beginning of the frame.
@@ -113,7 +112,6 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
 	ocelot_xfh_get_qos_class(extraction, &qos_class);
 	ocelot_xfh_get_tag_type(extraction, &tag_type);
 	ocelot_xfh_get_vlan_tci(extraction, &vlan_tci);
-	ocelot_xfh_get_cpuq(extraction, &cpuq);
 
 	skb->dev = dsa_master_find_slave(netdev, 0, src_port);
 	if (!skb->dev)
-- 
cgit v1.2.3


From 01035bcc0f9195a19a76c8a006b3c520428acb61 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 16 Mar 2021 15:52:15 -0700
Subject: Revert "net: socket: use BIT() for MSG_*"

This reverts commit 0bb3262c0248d44aea3be31076f44beb82a7b120.

Breaks things on mips64/qemu

Reported-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h | 71 ++++++++++++++++++++++++--------------------------
 1 file changed, 34 insertions(+), 37 deletions(-)

diff --git a/include/linux/socket.h b/include/linux/socket.h
index e88859f38cd0..385894b4a8bb 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -283,45 +283,42 @@ struct ucred {
    Added those for 1003.1g not all are supported yet
  */
 
-#define MSG_OOB		BIT(0)
-#define MSG_PEEK	BIT(1)
-#define MSG_DONTROUTE	BIT(2)
-#define MSG_TRYHARD	BIT(2)	/* Synonym for MSG_DONTROUTE for DECnet		*/
-#define MSG_CTRUNC	BIT(3)
-#define MSG_PROBE	BIT(4)	/* Do not send. Only probe path f.e. for MTU	*/
-#define MSG_TRUNC	BIT(5)
-#define MSG_DONTWAIT	BIT(6)	/* Nonblocking io		*/
-#define MSG_EOR		BIT(7)	/* End of record		*/
-#define MSG_WAITALL	BIT(8)	/* Wait for a full request	*/
-#define MSG_FIN		BIT(9)
-#define MSG_SYN		BIT(10)
-#define MSG_CONFIRM	BIT(11)	/* Confirm path validity	*/
-#define MSG_RST		BIT(12)
-#define MSG_ERRQUEUE	BIT(13)	/* Fetch message from error queue */
-#define MSG_NOSIGNAL	BIT(14)	/* Do not generate SIGPIPE	*/
-#define MSG_MORE	BIT(15)	/* Sender will send more	*/
-#define MSG_WAITFORONE	BIT(16)	/* recvmmsg(): block until 1+ packets avail */
-#define MSG_SENDPAGE_NOPOLICY	BIT(16)	/* sendpage() internal : do no apply policy */
-#define MSG_SENDPAGE_NOTLAST	BIT(17)	/* sendpage() internal : not the last page  */
-#define MSG_BATCH	BIT(18)		/* sendmmsg(): more messages coming */
-#define MSG_EOF		MSG_FIN
-#define MSG_NO_SHARED_FRAGS	BIT(19)	/* sendpage() internal : page frags
-					 * are not shared
-					 */
-#define MSG_SENDPAGE_DECRYPTED	BIT(20)	/* sendpage() internal : page may carry
-					 * plain text and require encryption
-					 */
-
-#define MSG_ZEROCOPY	BIT(26)		/* Use user data in kernel path */
-#define MSG_FASTOPEN	BIT(29)		/* Send data in TCP SYN */
-#define MSG_CMSG_CLOEXEC	BIT(30)	/* Set close_on_exec for file
-					 * descriptor received through
-					 * SCM_RIGHTS
-					 */
+#define MSG_OOB		1
+#define MSG_PEEK	2
+#define MSG_DONTROUTE	4
+#define MSG_TRYHARD     4       /* Synonym for MSG_DONTROUTE for DECnet */
+#define MSG_CTRUNC	8
+#define MSG_PROBE	0x10	/* Do not send. Only probe path f.e. for MTU */
+#define MSG_TRUNC	0x20
+#define MSG_DONTWAIT	0x40	/* Nonblocking io		 */
+#define MSG_EOR         0x80	/* End of record */
+#define MSG_WAITALL	0x100	/* Wait for a full request */
+#define MSG_FIN         0x200
+#define MSG_SYN		0x400
+#define MSG_CONFIRM	0x800	/* Confirm path validity */
+#define MSG_RST		0x1000
+#define MSG_ERRQUEUE	0x2000	/* Fetch message from error queue */
+#define MSG_NOSIGNAL	0x4000	/* Do not generate SIGPIPE */
+#define MSG_MORE	0x8000	/* Sender will send more */
+#define MSG_WAITFORONE	0x10000	/* recvmmsg(): block until 1+ packets avail */
+#define MSG_SENDPAGE_NOPOLICY 0x10000 /* sendpage() internal : do no apply policy */
+#define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */
+#define MSG_BATCH	0x40000 /* sendmmsg(): more messages coming */
+#define MSG_EOF         MSG_FIN
+#define MSG_NO_SHARED_FRAGS 0x80000 /* sendpage() internal : page frags are not shared */
+#define MSG_SENDPAGE_DECRYPTED	0x100000 /* sendpage() internal : page may carry
+					  * plain text and require encryption
+					  */
+
+#define MSG_ZEROCOPY	0x4000000	/* Use user data in kernel path */
+#define MSG_FASTOPEN	0x20000000	/* Send data in TCP SYN */
+#define MSG_CMSG_CLOEXEC 0x40000000	/* Set close_on_exec for file
+					   descriptor received through
+					   SCM_RIGHTS */
 #if defined(CONFIG_COMPAT)
-#define MSG_CMSG_COMPAT	BIT(31)	/* This message needs 32 bit fixups */
+#define MSG_CMSG_COMPAT	0x80000000	/* This message needs 32 bit fixups */
 #else
-#define MSG_CMSG_COMPAT	0	/* We never have 32 bit fixups */
+#define MSG_CMSG_COMPAT	0		/* We never have 32 bit fixups */
 #endif
 
 
-- 
cgit v1.2.3


From ebda107e5f222a086c83ddf6d1ab1da97dd15810 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Tue, 16 Mar 2021 15:59:37 +0800
Subject: selftests/bpf: Fix warning comparing pointer to 0

Fix the following coccicheck warnings:

  ./tools/testing/selftests/bpf/progs/fexit_test.c:77:15-16: WARNING
  comparing pointer to 0.

  ./tools/testing/selftests/bpf/progs/fexit_test.c:68:12-13: WARNING
  comparing pointer to 0.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/1615881577-3493-1-git-send-email-jiapeng.chong@linux.alibaba.com
---
 tools/testing/selftests/bpf/progs/fexit_test.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/progs/fexit_test.c b/tools/testing/selftests/bpf/progs/fexit_test.c
index 0952affb22a6..8f1ccb7302e1 100644
--- a/tools/testing/selftests/bpf/progs/fexit_test.c
+++ b/tools/testing/selftests/bpf/progs/fexit_test.c
@@ -65,7 +65,7 @@ __u64 test7_result = 0;
 SEC("fexit/bpf_fentry_test7")
 int BPF_PROG(test7, struct bpf_fentry_test_t *arg)
 {
-	if (arg == 0)
+	if (!arg)
 		test7_result = 1;
 	return 0;
 }
@@ -74,7 +74,7 @@ __u64 test8_result = 0;
 SEC("fexit/bpf_fentry_test8")
 int BPF_PROG(test8, struct bpf_fentry_test_t *arg)
 {
-	if (arg->a == 0)
+	if (!arg->a)
 		test8_result = 1;
 	return 0;
 }
-- 
cgit v1.2.3


From 56901d483bf14ca729a6c4e06d2b49590b9f1971 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Date: Tue, 16 Mar 2021 21:00:48 +0530
Subject: selftests/bpf: Use nanosleep() syscall instead of sleep() in
 get_cgroup_id

Glibc's sleep() switched to clock_nanosleep() from nanosleep(), and thus
syscalls:sys_enter_nanosleep tracepoint is not hitting which is causing
testcase failure. Instead of depending on glibc sleep(), call nanosleep()
systemcall directly.

Before:

  # ./get_cgroup_id_user
  ...
  main:FAIL:compare_cgroup_id kern cgid 0 user cgid 483

After:

  # ./get_cgroup_id_user
  ...
  main:PASS:compare_cgroup_id

Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210316153048.136447-1-ravi.bangoria@linux.ibm.com
---
 tools/testing/selftests/bpf/get_cgroup_id_user.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c
index b8d6aef99db4..99628e1a1e58 100644
--- a/tools/testing/selftests/bpf/get_cgroup_id_user.c
+++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c
@@ -57,6 +57,10 @@ int main(int argc, char **argv)
 	__u32 key = 0, pid;
 	int exit_code = 1;
 	char buf[256];
+	const struct timespec req = {
+		.tv_sec = 1,
+		.tv_nsec = 0,
+	};
 
 	cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
 	if (CHECK(cgroup_fd < 0, "cgroup_setup_and_join", "err %d errno %d\n", cgroup_fd, errno))
@@ -115,7 +119,7 @@ int main(int argc, char **argv)
 		goto close_pmu;
 
 	/* trigger some syscalls */
-	sleep(1);
+	syscall(__NR_nanosleep, &req, NULL);
 
 	err = bpf_map_lookup_elem(cgidmap_fd, &key, &kcgid);
 	if (CHECK(err, "bpf_map_lookup_elem", "err %d errno %d\n", err, errno))
-- 
cgit v1.2.3


From ebfbc46b35cb70b9fbd88f376d7a33b79f60adff Mon Sep 17 00:00:00 2001
From: Flavio Leitner <fbl@sysclose.org>
Date: Tue, 16 Mar 2021 17:14:27 -0300
Subject: openvswitch: Warn over-mtu packets only if iface is UP.

It is not unusual to have the bridge port down. Sometimes
it has the old MTU, which is fine since it's not being used.

However, the kernel spams the log with a warning message
when a packet is going to be sent over such port. Fix that
by warning only if the interface is UP.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/vport.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 4ed7e52c7012..88deb5b41429 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -497,10 +497,12 @@ void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto)
 
 	if (unlikely(packet_length(skb, vport->dev) > mtu &&
 		     !skb_is_gso(skb))) {
-		net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
-				     vport->dev->name,
-				     packet_length(skb, vport->dev), mtu);
 		vport->dev->stats.tx_errors++;
+		if (vport->dev->flags & IFF_UP)
+			net_warn_ratelimited("%s: dropped over-mtu packet: "
+					     "%d > %d\n", vport->dev->name,
+					     packet_length(skb, vport->dev),
+					     mtu);
 		goto drop;
 	}
 
-- 
cgit v1.2.3


From 7a126a43a3dcf0fa6b9f7f2fe3ce82102517afe3 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Wed, 16 Sep 2020 10:11:20 +0300
Subject: net: Change dev parameter to const in netif_device_present()

Not all ndos check the present bit before calling the ndo and the driver
may want to check it. Sometimes the dev parameter passed as const so we
pass it to netif_device_present() as const.
Since netif_device_present() doesn't modify dev parameter anyway, declare
it as const.

Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b379d08a12ed..97254c089eb2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4175,7 +4175,7 @@ static inline bool netif_oper_up(const struct net_device *dev)
  *
  * Check if device has not been removed from system.
  */
-static inline bool netif_device_present(struct net_device *dev)
+static inline bool netif_device_present(const struct net_device *dev)
 {
 	return test_bit(__LINK_STATE_PRESENT, &dev->state);
 }
-- 
cgit v1.2.3


From f031dbd530eae5db3ff03510207772df68f5d9e6 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@nvidia.com>
Date: Sun, 22 Mar 2020 23:22:20 -0700
Subject: net/mlx5e: Same max num channels for both nic and uplink profiles

In downstream patches NIC netdev can change profile dynamically from
NIC mode to uplink mode and vise-versa. It is required that both profiles
must advertise the same max amount of tx/rx queues.

Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index a132fff7a980..9156978c900d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -1198,7 +1198,8 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
 	.update_carrier	        = mlx5e_update_carrier,
 	.rx_handlers            = &mlx5e_rx_handlers_rep,
 	.max_tc			= MLX5E_MAX_NUM_TC,
-	.rq_groups		= MLX5E_NUM_RQ_GROUPS(REGULAR),
+	/* XSK is needed so we can replace profile with NIC netdev */
+	.rq_groups		= MLX5E_NUM_RQ_GROUPS(XSK),
 	.stats_grps		= mlx5e_ul_rep_stats_grps,
 	.stats_grps_num		= mlx5e_ul_rep_stats_grps_num,
 };
-- 
cgit v1.2.3


From 1aa48ca6aa9fe7d59b853d2edd295e0486547700 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Wed, 16 Sep 2020 10:10:48 +0300
Subject: net/mlx5e: Allow legacy vf ndos only if in legacy mode

We will re-use the native NIC port net device instance for the Uplink
representor. Several VF ndo ops are not relevant in switchdev mode.
Disallow them when eswitch mode is not legacy as a preparation.

Signed-off-by: Roi Dayan <roid@nvidia.com>
Reviewed-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c   |  7 +++++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 19 ++++++++++---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 34 +++++++++++++++++++----
 3 files changed, 51 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 16ce7756ac43..cf1d3c9c88af 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -36,6 +36,7 @@
 #include <linux/tcp.h>
 #include <linux/mlx5/fs.h>
 #include "en.h"
+#include "en_rep.h"
 #include "lib/mpfs.h"
 
 static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
@@ -435,6 +436,9 @@ int mlx5e_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 
+	if (mlx5e_is_uplink_rep(priv))
+		return 0; /* no vlan table for uplink rep */
+
 	if (be16_to_cpu(proto) == ETH_P_8021Q)
 		return mlx5e_vlan_rx_add_cvid(priv, vid);
 	else if (be16_to_cpu(proto) == ETH_P_8021AD)
@@ -447,6 +451,9 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 
+	if (mlx5e_is_uplink_rep(priv))
+		return 0; /* no vlan table for uplink rep */
+
 	if (be16_to_cpu(proto) == ETH_P_8021Q) {
 		clear_bit(vid, priv->fs.vlan.active_cvlans);
 		mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index ec2fcb2a2977..efd1a23ce7a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3857,11 +3857,19 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
 	stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors;
 }
 
+static void mlx5e_nic_set_rx_mode(struct mlx5e_priv *priv)
+{
+	if (mlx5e_is_uplink_rep(priv))
+		return; /* no rx mode for uplink rep */
+
+	queue_work(priv->wq, &priv->set_rx_mode_work);
+}
+
 static void mlx5e_set_rx_mode(struct net_device *dev)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 
-	queue_work(priv->wq, &priv->set_rx_mode_work);
+	mlx5e_nic_set_rx_mode(priv);
 }
 
 static int mlx5e_set_mac(struct net_device *netdev, void *addr)
@@ -3876,7 +3884,7 @@ static int mlx5e_set_mac(struct net_device *netdev, void *addr)
 	ether_addr_copy(netdev->dev_addr, saddr->sa_data);
 	netif_addr_unlock_bh(netdev);
 
-	queue_work(priv->wq, &priv->set_rx_mode_work);
+	mlx5e_nic_set_rx_mode(priv);
 
 	return 0;
 }
@@ -4414,6 +4422,9 @@ static int mlx5e_set_vf_link_state(struct net_device *dev, int vf,
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5_core_dev *mdev = priv->mdev;
 
+	if (mlx5e_is_uplink_rep(priv))
+		return -EOPNOTSUPP;
+
 	return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1,
 					    mlx5_ifla_link2vport(link_state));
 }
@@ -5405,7 +5416,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
 		return;
 	mlx5e_dcbnl_init_app(priv);
 
-	queue_work(priv->wq, &priv->set_rx_mode_work);
+	mlx5e_nic_set_rx_mode(priv);
 
 	rtnl_lock();
 	if (netif_running(netdev))
@@ -5428,7 +5439,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
 	netif_device_detach(priv->netdev);
 	rtnl_unlock();
 
-	queue_work(priv->wq, &priv->set_rx_mode_work);
+	mlx5e_nic_set_rx_mode(priv);
 
 	mlx5e_hv_vhca_stats_destroy(priv);
 	if (mlx5e_monitor_counter_supported(priv))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index fe1e06d95a12..9eb8e7a22dc2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -2040,6 +2040,10 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
 		vport = 0;
 	}
 	mutex_lock(&esw->state_lock);
+	if (esw->mode != MLX5_ESWITCH_LEGACY) {
+		err = -EOPNOTSUPP;
+		goto unlock;
+	}
 
 	err = mlx5_modify_vport_admin_state(esw->dev, opmod, vport, other_vport, link_state);
 	if (err) {
@@ -2111,7 +2115,7 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
 				u16 vport, u16 vlan, u8 qos)
 {
 	u8 set_flags = 0;
-	int err;
+	int err = 0;
 
 	if (!ESW_ALLOWED(esw))
 		return -EPERM;
@@ -2120,9 +2124,18 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
 		set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT;
 
 	mutex_lock(&esw->state_lock);
+	if (esw->mode != MLX5_ESWITCH_LEGACY) {
+		if (!vlan)
+			goto unlock; /* compatibility with libvirt */
+
+		err = -EOPNOTSUPP;
+		goto unlock;
+	}
+
 	err = __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags);
-	mutex_unlock(&esw->state_lock);
 
+unlock:
+	mutex_unlock(&esw->state_lock);
 	return err;
 }
 
@@ -2139,6 +2152,10 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
 		return PTR_ERR(evport);
 
 	mutex_lock(&esw->state_lock);
+	if (esw->mode != MLX5_ESWITCH_LEGACY) {
+		err = -EOPNOTSUPP;
+		goto unlock;
+	}
 	pschk = evport->info.spoofchk;
 	evport->info.spoofchk = spoofchk;
 	if (pschk && !is_valid_ether_addr(evport->info.mac))
@@ -2149,8 +2166,9 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
 		err = esw_acl_ingress_lgcy_setup(esw, evport);
 	if (err)
 		evport->info.spoofchk = pschk;
-	mutex_unlock(&esw->state_lock);
 
+unlock:
+	mutex_unlock(&esw->state_lock);
 	return err;
 }
 
@@ -2271,6 +2289,7 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
 				 u16 vport, bool setting)
 {
 	struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
+	int err = 0;
 
 	if (!ESW_ALLOWED(esw))
 		return -EPERM;
@@ -2278,12 +2297,17 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
 		return PTR_ERR(evport);
 
 	mutex_lock(&esw->state_lock);
+	if (esw->mode != MLX5_ESWITCH_LEGACY) {
+		err = -EOPNOTSUPP;
+		goto unlock;
+	}
 	evport->info.trusted = setting;
 	if (evport->enabled)
 		esw_vport_change_handle_locked(evport);
-	mutex_unlock(&esw->state_lock);
 
-	return 0;
+unlock:
+	mutex_unlock(&esw->state_lock);
+	return err;
 }
 
 static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw)
-- 
cgit v1.2.3


From ec9457a6f64af41bb2b295efa34992903b0768a9 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Wed, 16 Sep 2020 10:10:56 +0300
Subject: net/mlx5e: Distinguish nic and esw offload in tc setup block cb

We will re-use the native NIC port net device instance for the Uplink
representor, hence same ndos will be used.
Now we need to distinguish in the TC callback if the mode is legacy or
switchdev and set the proper flag.

Signed-off-by: Roi Dayan <roid@nvidia.com>
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 78fc27fc4e37..8a3b2d76cc82 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -4892,9 +4892,14 @@ static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
 int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
 			    void *cb_priv)
 {
-	unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD);
+	unsigned long flags = MLX5_TC_FLAG(INGRESS);
 	struct mlx5e_priv *priv = cb_priv;
 
+	if (mlx5e_is_uplink_rep(priv))
+		flags |= MLX5_TC_FLAG(ESW_OFFLOAD);
+	else
+		flags |= MLX5_TC_FLAG(NIC_OFFLOAD);
+
 	switch (type) {
 	case TC_SETUP_CLSFLOWER:
 		return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
-- 
cgit v1.2.3


From ee5260307c54f9b1b964a44863751ac220424844 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Wed, 16 Sep 2020 10:11:01 +0300
Subject: net/mlx5e: Add offload stats ndos to nic netdev ops

We will re-use the native NIC port net device instance for the Uplink
representor, hence same ndos must be used.

Signed-off-by: Roi Dayan <roid@nvidia.com>
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 25 +++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c  |  6 +++---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.h  |  9 ++++++++
 3 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index efd1a23ce7a0..39de3156877c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4452,6 +4452,29 @@ int mlx5e_get_vf_stats(struct net_device *dev,
 	return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1,
 					    vf_stats);
 }
+
+static bool
+mlx5e_has_offload_stats(const struct net_device *dev, int attr_id)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
+	if (!mlx5e_is_uplink_rep(priv))
+		return false;
+
+	return mlx5e_rep_has_offload_stats(dev, attr_id);
+}
+
+static int
+mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
+			void *sp)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
+	if (!mlx5e_is_uplink_rep(priv))
+		return -EOPNOTSUPP;
+
+	return mlx5e_rep_get_offload_stats(attr_id, dev, sp);
+}
 #endif
 
 static bool mlx5e_tunnel_proto_supported_tx(struct mlx5_core_dev *mdev, u8 proto_type)
@@ -4808,6 +4831,8 @@ const struct net_device_ops mlx5e_netdev_ops = {
 	.ndo_get_vf_config       = mlx5e_get_vf_config,
 	.ndo_set_vf_link_state   = mlx5e_set_vf_link_state,
 	.ndo_get_vf_stats        = mlx5e_get_vf_stats,
+	.ndo_has_offload_stats   = mlx5e_has_offload_stats,
+	.ndo_get_offload_stats   = mlx5e_get_offload_stats,
 #endif
 	.ndo_get_devlink_port    = mlx5e_get_devlink_port,
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 9156978c900d..b83652ed35cc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -522,7 +522,7 @@ bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
 	return (rep->vport == MLX5_VPORT_UPLINK);
 }
 
-static bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id)
+bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id)
 {
 	switch (attr_id) {
 	case IFLA_OFFLOAD_XSTATS_CPU_HIT:
@@ -542,8 +542,8 @@ mlx5e_get_sw_stats64(const struct net_device *dev,
 	return 0;
 }
 
-static int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev,
-				       void *sp)
+int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev,
+				void *sp)
 {
 	switch (attr_id) {
 	case IFLA_OFFLOAD_XSTATS_CPU_HIT:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index d1696404cca9..931fa619cb01 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -220,6 +220,10 @@ void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw,
 			    const struct net_device *lag_dev);
 int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup);
 
+bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id);
+int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev,
+				void *sp);
+
 bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv);
 int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
 void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);
@@ -240,6 +244,11 @@ static inline int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { return 0; }
 static inline void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) {}
 static inline int mlx5e_rep_init(void) { return 0; };
 static inline void mlx5e_rep_cleanup(void) {};
+static inline bool mlx5e_rep_has_offload_stats(const struct net_device *dev,
+					       int attr_id) { return false; }
+static inline int mlx5e_rep_get_offload_stats(int attr_id,
+					      const struct net_device *dev,
+					      void *sp) { return -EOPNOTSUPP; }
 #endif
 
 static inline bool mlx5e_is_vport_rep(struct mlx5e_priv *priv)
-- 
cgit v1.2.3


From c97a2c06919ae8cf09773b8dfa24909ccfba9316 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Wed, 16 Sep 2020 10:11:07 +0300
Subject: net/mlx5e: Use nic mode netdev ndos and ethtool ops for uplink
 representor

Remove dedicated uplink rep netdev ndos and ethtools ops.
We will re-use the native NIC port net device instance and ethtool ops for
the Uplink representor.

Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 133 +----------------------
 1 file changed, 4 insertions(+), 129 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index b83652ed35cc..9533085005c3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -69,16 +69,6 @@ static void mlx5e_rep_get_drvinfo(struct net_device *dev,
 		 fw_rev_sub(mdev), mdev->board_id);
 }
 
-static void mlx5e_uplink_rep_get_drvinfo(struct net_device *dev,
-					 struct ethtool_drvinfo *drvinfo)
-{
-	struct mlx5e_priv *priv = netdev_priv(dev);
-
-	mlx5e_rep_get_drvinfo(dev, drvinfo);
-	strlcpy(drvinfo->bus_info, pci_name(priv->mdev->pdev),
-		sizeof(drvinfo->bus_info));
-}
-
 static const struct counter_desc sw_rep_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) },
@@ -285,46 +275,6 @@ static u32 mlx5e_rep_get_rxfh_indir_size(struct net_device *netdev)
 	return mlx5e_ethtool_get_rxfh_indir_size(priv);
 }
 
-static void mlx5e_uplink_rep_get_pause_stats(struct net_device *netdev,
-					     struct ethtool_pause_stats *stats)
-{
-	struct mlx5e_priv *priv = netdev_priv(netdev);
-
-	mlx5e_stats_pause_get(priv, stats);
-}
-
-static void mlx5e_uplink_rep_get_pauseparam(struct net_device *netdev,
-					    struct ethtool_pauseparam *pauseparam)
-{
-	struct mlx5e_priv *priv = netdev_priv(netdev);
-
-	mlx5e_ethtool_get_pauseparam(priv, pauseparam);
-}
-
-static int mlx5e_uplink_rep_set_pauseparam(struct net_device *netdev,
-					   struct ethtool_pauseparam *pauseparam)
-{
-	struct mlx5e_priv *priv = netdev_priv(netdev);
-
-	return mlx5e_ethtool_set_pauseparam(priv, pauseparam);
-}
-
-static int mlx5e_uplink_rep_get_link_ksettings(struct net_device *netdev,
-					       struct ethtool_link_ksettings *link_ksettings)
-{
-	struct mlx5e_priv *priv = netdev_priv(netdev);
-
-	return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings);
-}
-
-static int mlx5e_uplink_rep_set_link_ksettings(struct net_device *netdev,
-					       const struct ethtool_link_ksettings *link_ksettings)
-{
-	struct mlx5e_priv *priv = netdev_priv(netdev);
-
-	return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings);
-}
-
 static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
 	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
 				     ETHTOOL_COALESCE_MAX_FRAMES |
@@ -344,34 +294,6 @@ static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
 	.get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size,
 };
 
-static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = {
-	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
-				     ETHTOOL_COALESCE_MAX_FRAMES |
-				     ETHTOOL_COALESCE_USE_ADAPTIVE,
-	.get_drvinfo	   = mlx5e_uplink_rep_get_drvinfo,
-	.get_link	   = ethtool_op_get_link,
-	.get_strings       = mlx5e_rep_get_strings,
-	.get_sset_count    = mlx5e_rep_get_sset_count,
-	.get_ethtool_stats = mlx5e_rep_get_ethtool_stats,
-	.get_ringparam     = mlx5e_rep_get_ringparam,
-	.set_ringparam     = mlx5e_rep_set_ringparam,
-	.get_channels      = mlx5e_rep_get_channels,
-	.set_channels      = mlx5e_rep_set_channels,
-	.get_coalesce      = mlx5e_rep_get_coalesce,
-	.set_coalesce      = mlx5e_rep_set_coalesce,
-	.get_link_ksettings = mlx5e_uplink_rep_get_link_ksettings,
-	.set_link_ksettings = mlx5e_uplink_rep_set_link_ksettings,
-	.get_rxfh_key_size   = mlx5e_rep_get_rxfh_key_size,
-	.get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size,
-	.get_rxfh          = mlx5e_get_rxfh,
-	.set_rxfh          = mlx5e_set_rxfh,
-	.get_rxnfc         = mlx5e_get_rxnfc,
-	.set_rxnfc         = mlx5e_set_rxnfc,
-	.get_pause_stats   = mlx5e_uplink_rep_get_pause_stats,
-	.get_pauseparam    = mlx5e_uplink_rep_get_pauseparam,
-	.set_pauseparam    = mlx5e_uplink_rep_set_pauseparam,
-};
-
 static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
 				 struct mlx5_eswitch_rep *rep)
 {
@@ -568,34 +490,6 @@ static int mlx5e_rep_change_mtu(struct net_device *netdev, int new_mtu)
 	return mlx5e_change_mtu(netdev, new_mtu, NULL);
 }
 
-static int mlx5e_uplink_rep_change_mtu(struct net_device *netdev, int new_mtu)
-{
-	return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu_ctx);
-}
-
-static int mlx5e_uplink_rep_set_mac(struct net_device *netdev, void *addr)
-{
-	struct sockaddr *saddr = addr;
-
-	if (!is_valid_ether_addr(saddr->sa_data))
-		return -EADDRNOTAVAIL;
-
-	ether_addr_copy(netdev->dev_addr, saddr->sa_data);
-	return 0;
-}
-
-static int mlx5e_uplink_rep_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos,
-					__be16 vlan_proto)
-{
-	netdev_warn_once(dev, "legacy vf vlan setting isn't supported in switchdev mode\n");
-
-	if (vlan != 0)
-		return -EOPNOTSUPP;
-
-	/* allow setting 0-vid for compatibility with libvirt */
-	return 0;
-}
-
 static struct devlink_port *mlx5e_rep_get_devlink_port(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -641,29 +535,10 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = {
 	.ndo_change_carrier      = mlx5e_rep_change_carrier,
 };
 
-static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = {
-	.ndo_open                = mlx5e_open,
-	.ndo_stop                = mlx5e_close,
-	.ndo_start_xmit          = mlx5e_xmit,
-	.ndo_set_mac_address     = mlx5e_uplink_rep_set_mac,
-	.ndo_setup_tc            = mlx5e_rep_setup_tc,
-	.ndo_get_devlink_port    = mlx5e_rep_get_devlink_port,
-	.ndo_get_stats64         = mlx5e_get_stats,
-	.ndo_has_offload_stats	 = mlx5e_rep_has_offload_stats,
-	.ndo_get_offload_stats	 = mlx5e_rep_get_offload_stats,
-	.ndo_change_mtu          = mlx5e_uplink_rep_change_mtu,
-	.ndo_features_check      = mlx5e_features_check,
-	.ndo_set_vf_mac          = mlx5e_set_vf_mac,
-	.ndo_set_vf_rate         = mlx5e_set_vf_rate,
-	.ndo_get_vf_config       = mlx5e_get_vf_config,
-	.ndo_get_vf_stats        = mlx5e_get_vf_stats,
-	.ndo_set_vf_vlan         = mlx5e_uplink_rep_set_vf_vlan,
-	.ndo_set_features        = mlx5e_set_features,
-};
-
 bool mlx5e_eswitch_uplink_rep(struct net_device *netdev)
 {
-	return netdev->netdev_ops == &mlx5e_netdev_ops_uplink_rep;
+	return netdev->netdev_ops == &mlx5e_netdev_ops &&
+	       mlx5e_is_uplink_rep(netdev_priv(netdev));
 }
 
 bool mlx5e_eswitch_vf_rep(struct net_device *netdev)
@@ -718,10 +593,10 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev,
 {
 	SET_NETDEV_DEV(netdev, mdev->device);
 	if (rep->vport == MLX5_VPORT_UPLINK) {
-		netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep;
+		netdev->netdev_ops = &mlx5e_netdev_ops;
 		/* we want a persistent mac for the uplink rep */
 		mlx5_query_mac_address(mdev, netdev->dev_addr);
-		netdev->ethtool_ops = &mlx5e_uplink_rep_ethtool_ops;
+		netdev->ethtool_ops = &mlx5e_ethtool_ops;
 		mlx5e_dcbnl_build_rep_netdev(netdev);
 	} else {
 		netdev->netdev_ops = &mlx5e_netdev_ops_rep;
-- 
cgit v1.2.3


From 2ff349c5edfe3ea3c017dab28a1912f337a6500c Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Wed, 16 Sep 2020 10:11:26 +0300
Subject: net/mlx5e: Verify dev is present in some ndos

We will re-use the native NIC port net device instance for the Uplink
representor. While changing profiles private resources are not
available but some ndos are not checking if the netdev is present.
So for those ndos check the netdev is present in the driver before
accessing the private resources.

Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c |  6 ++++++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c   | 17 +++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c     |  3 +++
 3 files changed, 26 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index fcae3c0a4e9f..11a44d30adc7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -169,6 +169,9 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
 	unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD);
 	struct mlx5e_priv *priv = cb_priv;
 
+	if (!priv->netdev || !netif_device_present(priv->netdev))
+		return -EOPNOTSUPP;
+
 	switch (type) {
 	case TC_SETUP_CLSFLOWER:
 		return mlx5e_rep_setup_tc_cls_flower(priv, type_data, flags);
@@ -321,6 +324,9 @@ mlx5e_rep_indr_offload(struct net_device *netdev,
 	struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev);
 	int err = 0;
 
+	if (!netif_device_present(indr_priv->rpriv->netdev))
+		return -EOPNOTSUPP;
+
 	switch (flower->command) {
 	case FLOW_CLS_REPLACE:
 		err = mlx5e_configure_flower(netdev, priv, flower, flags);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 39de3156877c..efe8af49b908 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3769,8 +3769,16 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			  void *type_data)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
+	bool tc_unbind = false;
 	int err;
 
+	if (type == TC_SETUP_BLOCK &&
+	    ((struct flow_block_offload *)type_data)->command == FLOW_BLOCK_UNBIND)
+		tc_unbind = true;
+
+	if (!netif_device_present(dev) && !tc_unbind)
+		return -ENODEV;
+
 	switch (type) {
 	case TC_SETUP_BLOCK: {
 		struct flow_block_offload *f = type_data;
@@ -3823,6 +3831,9 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5e_pport_stats *pstats = &priv->stats.pport;
 
+	if (!netif_device_present(dev))
+		return;
+
 	/* In switchdev mode, monitor counters doesn't monitor
 	 * rx/tx stats of 802_3. The update stats mechanism
 	 * should keep the 802_3 layout counters updated
@@ -4436,6 +4447,9 @@ int mlx5e_get_vf_config(struct net_device *dev,
 	struct mlx5_core_dev *mdev = priv->mdev;
 	int err;
 
+	if (!netif_device_present(dev))
+		return -EOPNOTSUPP;
+
 	err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi);
 	if (err)
 		return err;
@@ -4458,6 +4472,9 @@ mlx5e_has_offload_stats(const struct net_device *dev, int attr_id)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 
+	if (!netif_device_present(dev))
+		return false;
+
 	if (!mlx5e_is_uplink_rep(priv))
 		return false;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 8a3b2d76cc82..b3bf7bb7b97e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -4895,6 +4895,9 @@ int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
 	unsigned long flags = MLX5_TC_FLAG(INGRESS);
 	struct mlx5e_priv *priv = cb_priv;
 
+	if (!priv->netdev || !netif_device_present(priv->netdev))
+		return -EOPNOTSUPP;
+
 	if (mlx5e_is_uplink_rep(priv))
 		flags |= MLX5_TC_FLAG(ESW_OFFLOAD);
 	else
-- 
cgit v1.2.3


From 865d6d1c2df8e60260499efba2cd0537fdb8ca72 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Tue, 20 Oct 2020 12:26:51 +0300
Subject: net/mlx5e: Move devlink port register and unregister calls

We will re-use the native NIC port net device instance for the Uplink
representor. As such we also don't want to unregister/register the
devlink port as part of the profile.

Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c       | 17 +++++++++++------
 .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c  | 15 ++++++++++-----
 2 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index efe8af49b908..3e8434dcc1df 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -5306,10 +5306,6 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
 	if (err)
 		mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
 
-	err = mlx5e_devlink_port_register(priv);
-	if (err)
-		mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err);
-
 	mlx5e_health_create_reporters(priv);
 
 	return 0;
@@ -5318,7 +5314,6 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
 static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
 {
 	mlx5e_health_destroy_reporters(priv);
-	mlx5e_devlink_port_unregister(priv);
 	mlx5e_tls_cleanup(priv);
 	mlx5e_ipsec_cleanup(priv);
 }
@@ -5829,10 +5824,17 @@ static int mlx5e_probe(struct auxiliary_device *adev,
 
 	priv->profile = profile;
 	priv->ppriv = NULL;
+
+	err = mlx5e_devlink_port_register(priv);
+	if (err) {
+		mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err);
+		goto err_destroy_netdev;
+	}
+
 	err = profile->init(mdev, netdev);
 	if (err) {
 		mlx5_core_err(mdev, "mlx5e_nic_profile init failed, %d\n", err);
-		goto err_destroy_netdev;
+		goto err_devlink_cleanup;
 	}
 
 	err = mlx5e_resume(adev);
@@ -5856,6 +5858,8 @@ err_resume:
 	mlx5e_suspend(adev, state);
 err_profile_cleanup:
 	profile->cleanup(priv);
+err_devlink_cleanup:
+	mlx5e_devlink_port_unregister(priv);
 err_destroy_netdev:
 	mlx5e_destroy_netdev(priv);
 	return err;
@@ -5870,6 +5874,7 @@ static void mlx5e_remove(struct auxiliary_device *adev)
 	unregister_netdev(priv->netdev);
 	mlx5e_suspend(adev, state);
 	priv->profile->cleanup(priv);
+	mlx5e_devlink_port_unregister(priv);
 	mlx5e_destroy_netdev(priv);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index a215ccee3e61..e1e33e991123 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -2259,9 +2259,11 @@ int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num)
 	if (esw->mode != MLX5_ESWITCH_OFFLOADS)
 		return 0;
 
-	err = mlx5_esw_offloads_devlink_port_register(esw, vport_num);
-	if (err)
-		return err;
+	if (vport_num != MLX5_VPORT_UPLINK) {
+		err = mlx5_esw_offloads_devlink_port_register(esw, vport_num);
+		if (err)
+			return err;
+	}
 
 	err = mlx5_esw_offloads_rep_load(esw, vport_num);
 	if (err)
@@ -2269,7 +2271,8 @@ int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num)
 	return err;
 
 load_err:
-	mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
+	if (vport_num != MLX5_VPORT_UPLINK)
+		mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
 	return err;
 }
 
@@ -2279,7 +2282,9 @@ void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num)
 		return;
 
 	mlx5_esw_offloads_rep_unload(esw, vport_num);
-	mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
+
+	if (vport_num != MLX5_VPORT_UPLINK)
+		mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
 }
 
 #define ESW_OFFLOADS_DEVCOM_PAIR	(0)
-- 
cgit v1.2.3


From 5a65d85dc7f4532755750aab645c46a47219fd1a Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Wed, 21 Oct 2020 10:28:50 +0300
Subject: net/mlx5e: Register nic devlink port with switch id

We will re-use the native NIC port net device instance for the Uplink
representor. Since the netdev will be kept registered while we engage
switchdev mode also the devlink will be kept registered.
Register the nic devlink port with switch id so it will be available
when changing profiles.

Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/en/devlink.c   | 23 +++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  7 +++++++
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
index a69c62d72d16..054bc2fc0520 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
@@ -2,22 +2,43 @@
 /* Copyright (c) 2020, Mellanox Technologies inc.  All rights reserved. */
 
 #include "en/devlink.h"
+#include "eswitch.h"
+
+static void
+mlx5e_devlink_get_port_parent_id(struct mlx5_core_dev *dev, struct netdev_phys_item_id *ppid)
+{
+	u64 parent_id;
+
+	parent_id = mlx5_query_nic_system_image_guid(dev);
+	ppid->id_len = sizeof(parent_id);
+	memcpy(ppid->id, &parent_id, sizeof(parent_id));
+}
 
 int mlx5e_devlink_port_register(struct mlx5e_priv *priv)
 {
 	struct devlink *devlink = priv_to_devlink(priv->mdev);
 	struct devlink_port_attrs attrs = {};
+	struct netdev_phys_item_id ppid = {};
+	unsigned int dl_port_index;
 
 	if (mlx5_core_is_pf(priv->mdev)) {
 		attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
 		attrs.phys.port_number = PCI_FUNC(priv->mdev->pdev->devfn);
+		if (MLX5_ESWITCH_MANAGER(priv->mdev)) {
+			mlx5e_devlink_get_port_parent_id(priv->mdev, &ppid);
+			memcpy(attrs.switch_id.id, ppid.id, ppid.id_len);
+			attrs.switch_id.id_len = ppid.id_len;
+		}
+		dl_port_index = mlx5_esw_vport_to_devlink_port_index(priv->mdev,
+								     MLX5_VPORT_UPLINK);
 	} else {
 		attrs.flavour = DEVLINK_PORT_FLAVOUR_VIRTUAL;
+		dl_port_index = mlx5_esw_vport_to_devlink_port_index(priv->mdev, 0);
 	}
 
 	devlink_port_attrs_set(&priv->dl_port, &attrs);
 
-	return devlink_port_register(devlink, &priv->dl_port, 1);
+	return devlink_port_register(devlink, &priv->dl_port, dl_port_index);
 }
 
 void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index fdf5c8c05c1b..d0b907a9ef28 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -781,6 +781,13 @@ esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
+
+static inline unsigned int
+mlx5_esw_vport_to_devlink_port_index(const struct mlx5_core_dev *dev,
+				     u16 vport_num)
+{
+	return vport_num;
+}
 #endif /* CONFIG_MLX5_ESWITCH */
 
 #endif /* __MLX5_ESWITCH_H__ */
-- 
cgit v1.2.3


From c276aae8c19d65e21a43c2690c7c7dafea0e97fa Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Tue, 26 Jan 2021 11:51:04 +0200
Subject: net/mlx5: Move mlx5e hw resources into a sub object

This is to separate between resources attributes and other
attributes we will want to use.

Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c   |  4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en/trap.c  |  6 ++---
 .../ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c |  2 +-
 .../net/ethernet/mellanox/mlx5/core/en_common.c    | 27 ++++++++++++----------
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 22 +++++++++---------
 .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c  |  2 +-
 .../net/ethernet/mellanox/mlx5/core/lib/crypto.c   |  2 +-
 include/linux/mlx5/driver.h                        | 10 ++++----
 8 files changed, 40 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index d57b6f06382f..bb5d108f75d0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -174,7 +174,7 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_port_ptp *c, int txq_ix,
 	sq->mdev      = mdev;
 	sq->ch_ix     = c->ix;
 	sq->txq_ix    = txq_ix;
-	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
+	sq->uar_map   = mdev->mlx5e_res.hw_objs.bfreg.map;
 	sq->min_inline_mode = params->tx_min_inline_mode;
 	sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
 	sq->stats     = &c->priv->port_ptp_stats.sq[tc];
@@ -475,7 +475,7 @@ int mlx5e_port_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
 	c->ix       = 0;
 	c->pdev     = mlx5_core_dma_dev(priv->mdev);
 	c->netdev   = priv->netdev;
-	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
+	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key);
 	c->num_tc   = params->num_tc;
 	c->stats    = &priv->port_ptp_stats.ch;
 	c->lag_port = lag_port;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
index 37fc1d77ded7..41db93883fea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
@@ -84,7 +84,7 @@ static int mlx5e_alloc_trap_rq(struct mlx5e_priv *priv, struct mlx5e_rq_param *r
 	if (err)
 		goto err_free_frags;
 
-	rq->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
+	rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey.key);
 
 	mlx5e_rq_set_trap_handlers(rq, params);
 
@@ -213,7 +213,7 @@ static int mlx5e_create_trap_direct_rq_tir(struct mlx5_core_dev *mdev, struct ml
 		return -ENOMEM;
 
 	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
-	MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.td.tdn);
+	MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn);
 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_NONE);
 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
 	MLX5_SET(tirc, tirc, inline_rqn, rqn);
@@ -266,7 +266,7 @@ static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv)
 	t->tstamp   = &priv->tstamp;
 	t->pdev     = mlx5_core_dma_dev(priv->mdev);
 	t->netdev   = priv->netdev;
-	t->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
+	t->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key);
 	t->stats    = &priv->trap_stats.ch;
 
 	netif_napi_add(netdev, &t->napi, mlx5e_trap_napi_poll, 64);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
index d06532d0baa4..f7c880edae37 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
@@ -84,7 +84,7 @@ static int mlx5e_ktls_create_tir(struct mlx5_core_dev *mdev, u32 *tirn, u32 rqtn
 
 	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
 
-	MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.td.tdn);
+	MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn);
 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8);
 	MLX5_SET(tirc, tirc, indirect_table, rqtn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index a6cf008057b5..8c166ee56d8b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -38,15 +38,16 @@
 
 int mlx5e_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, u32 *in)
 {
+	struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs;
 	int err;
 
 	err = mlx5_core_create_tir(mdev, in, &tir->tirn);
 	if (err)
 		return err;
 
-	mutex_lock(&mdev->mlx5e_res.td.list_lock);
-	list_add(&tir->list, &mdev->mlx5e_res.td.tirs_list);
-	mutex_unlock(&mdev->mlx5e_res.td.list_lock);
+	mutex_lock(&res->td.list_lock);
+	list_add(&tir->list, &res->td.tirs_list);
+	mutex_unlock(&res->td.list_lock);
 
 	return 0;
 }
@@ -54,10 +55,12 @@ int mlx5e_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, u32 *in)
 void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
 		       struct mlx5e_tir *tir)
 {
-	mutex_lock(&mdev->mlx5e_res.td.list_lock);
+	struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs;
+
+	mutex_lock(&res->td.list_lock);
 	mlx5_core_destroy_tir(mdev, tir->tirn);
 	list_del(&tir->list);
-	mutex_unlock(&mdev->mlx5e_res.td.list_lock);
+	mutex_unlock(&res->td.list_lock);
 }
 
 void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc)
@@ -99,7 +102,7 @@ static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
 
 int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev)
 {
-	struct mlx5e_resources *res = &mdev->mlx5e_res;
+	struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs;
 	int err;
 
 	err = mlx5_core_alloc_pd(mdev, &res->pdn);
@@ -126,8 +129,8 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev)
 		goto err_destroy_mkey;
 	}
 
-	INIT_LIST_HEAD(&mdev->mlx5e_res.td.tirs_list);
-	mutex_init(&mdev->mlx5e_res.td.list_lock);
+	INIT_LIST_HEAD(&res->td.tirs_list);
+	mutex_init(&res->td.list_lock);
 
 	return 0;
 
@@ -142,7 +145,7 @@ err_dealloc_pd:
 
 void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev)
 {
-	struct mlx5e_resources *res = &mdev->mlx5e_res;
+	struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs;
 
 	mlx5_free_bfreg(mdev, &res->bfreg);
 	mlx5_core_destroy_mkey(mdev, &res->mkey);
@@ -180,8 +183,8 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb,
 
 	MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
 
-	mutex_lock(&mdev->mlx5e_res.td.list_lock);
-	list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) {
+	mutex_lock(&mdev->mlx5e_res.hw_objs.td.list_lock);
+	list_for_each_entry(tir, &mdev->mlx5e_res.hw_objs.td.tirs_list, list) {
 		tirn = tir->tirn;
 		err = mlx5_core_modify_tir(mdev, tirn, in);
 		if (err)
@@ -192,7 +195,7 @@ out:
 	kvfree(in);
 	if (err)
 		netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err);
-	mutex_unlock(&mdev->mlx5e_res.td.list_lock);
+	mutex_unlock(&mdev->mlx5e_res.hw_objs.td.list_lock);
 
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 3e8434dcc1df..2f961bd9e528 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -302,7 +302,7 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
 	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
 	mlx5e_mkey_set_relaxed_ordering(mdev, mkc);
 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
-	MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn);
+	MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn);
 	MLX5_SET64(mkc, mkc, len, npages << page_shift);
 	MLX5_SET(mkc, mkc, translations_octword_size,
 		 MLX5_MTT_OCTW(npages));
@@ -1019,7 +1019,7 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
 	sq->pdev      = c->pdev;
 	sq->mkey_be   = c->mkey_be;
 	sq->channel   = c;
-	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
+	sq->uar_map   = mdev->mlx5e_res.hw_objs.bfreg.map;
 	sq->min_inline_mode = params->tx_min_inline_mode;
 	sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
 	sq->xsk_pool  = xsk_pool;
@@ -1090,7 +1090,7 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
 	int err;
 
 	sq->channel   = c;
-	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
+	sq->uar_map   = mdev->mlx5e_res.hw_objs.bfreg.map;
 
 	param->wq.db_numa_node = cpu_to_node(c->cpu);
 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
@@ -1174,7 +1174,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
 	sq->priv      = c->priv;
 	sq->ch_ix     = c->ix;
 	sq->txq_ix    = txq_ix;
-	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
+	sq->uar_map   = mdev->mlx5e_res.hw_objs.bfreg.map;
 	sq->min_inline_mode = params->tx_min_inline_mode;
 	sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
 	INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
@@ -1257,7 +1257,7 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
 	MLX5_SET(sqc,  sqc, flush_in_error_en, 1);
 
 	MLX5_SET(wq,   wq, wq_type,       MLX5_WQ_TYPE_CYCLIC);
-	MLX5_SET(wq,   wq, uar_page,      mdev->mlx5e_res.bfreg.index);
+	MLX5_SET(wq,   wq, uar_page,      mdev->mlx5e_res.hw_objs.bfreg.index);
 	MLX5_SET(wq,   wq, log_wq_pg_sz,  csp->wq_ctrl->buf.page_shift -
 					  MLX5_ADAPTER_PAGE_SHIFT);
 	MLX5_SET64(wq, wq, dbr_addr,      csp->wq_ctrl->db.dma);
@@ -2032,7 +2032,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 	c->cpu      = cpu;
 	c->pdev     = mlx5_core_dma_dev(priv->mdev);
 	c->netdev   = priv->netdev;
-	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
+	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key);
 	c->num_tc   = params->num_tc;
 	c->xdp      = !!params->xdp_prog;
 	c->stats    = &priv->channel_stats[ix].ch;
@@ -2217,7 +2217,7 @@ void mlx5e_build_rq_param(struct mlx5e_priv *priv,
 	MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
 	MLX5_SET(wq, wq, log_wq_stride,
 		 mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs));
-	MLX5_SET(wq, wq, pd,               mdev->mlx5e_res.pdn);
+	MLX5_SET(wq, wq, pd,               mdev->mlx5e_res.hw_objs.pdn);
 	MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter);
 	MLX5_SET(rqc, rqc, vsd,            params->vlan_strip_disable);
 	MLX5_SET(rqc, rqc, scatter_fcs,    params->scatter_fcs_en);
@@ -2248,7 +2248,7 @@ void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
 
 	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
-	MLX5_SET(wq, wq, pd,            priv->mdev->mlx5e_res.pdn);
+	MLX5_SET(wq, wq, pd,            priv->mdev->mlx5e_res.hw_objs.pdn);
 
 	param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(priv->mdev));
 }
@@ -3421,10 +3421,10 @@ int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn)
 {
 	void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
 
-	MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn);
+	MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn);
 
 	if (MLX5_GET(tisc, tisc, tls_en))
-		MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.pdn);
+		MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.hw_objs.pdn);
 
 	if (mlx5_lag_is_lacp_owner(mdev))
 		MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1);
@@ -3494,7 +3494,7 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
 static void mlx5e_build_indir_tir_ctx_common(struct mlx5e_priv *priv,
 					     u32 rqtn, u32 *tirc)
 {
-	MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn);
+	MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.hw_objs.td.tdn);
 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
 	MLX5_SET(tirc, tirc, indirect_table, rqtn);
 	MLX5_SET(tirc, tirc, tunneled_offload_en,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 1eeca45cfcdf..0fc055cdf221 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -708,7 +708,7 @@ static void mlx5_rdma_netdev_free(struct net_device *netdev)
 
 static bool mlx5_is_sub_interface(struct mlx5_core_dev *mdev)
 {
-	return mdev->mlx5e_res.pdn != 0;
+	return mdev->mlx5e_res.hw_objs.pdn != 0;
 }
 
 static const struct mlx5e_profile *mlx5_get_profile(struct mlx5_core_dev *mdev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c
index 57eb91bcbca7..e995f8378df7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c
@@ -46,7 +46,7 @@ int mlx5_create_encryption_key(struct mlx5_core_dev *mdev,
 		 MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
 	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
 		 MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY);
-	MLX5_SET(encryption_key_obj, obj, pd, mdev->mlx5e_res.pdn);
+	MLX5_SET(encryption_key_obj, obj, pd, mdev->mlx5e_res.hw_objs.pdn);
 
 	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
 	if (!err)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 53b89631a1d9..9887181dea5f 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -644,10 +644,12 @@ struct mlx5_td {
 };
 
 struct mlx5e_resources {
-	u32                        pdn;
-	struct mlx5_td             td;
-	struct mlx5_core_mkey      mkey;
-	struct mlx5_sq_bfreg       bfreg;
+	struct mlx5e_hw_objs {
+		u32                        pdn;
+		struct mlx5_td             td;
+		struct mlx5_core_mkey      mkey;
+		struct mlx5_sq_bfreg       bfreg;
+	} hw_objs;
 };
 
 enum mlx5_sw_icm_type {
-- 
cgit v1.2.3


From c27971d08abecc91f06214dacc66ce3ce2662a44 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Wed, 28 Oct 2020 11:21:26 +0200
Subject: net/mlx5: Move devlink port from mlx5e priv to mlx5e resources

We re-use the native NIC port net device instance for the Uplink
representor, and the devlink port.
When changing profiles we reset the mlx5e priv but we should still
use the devlink port so move it to mlx5e resources.

Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h            |  1 -
 drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c    | 17 ++++++++++++-----
 drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h    |  6 ++++++
 .../net/ethernet/mellanox/mlx5/core/en/reporter_rx.c    |  4 +++-
 .../net/ethernet/mellanox/mlx5/core/en/reporter_tx.c    |  4 +++-
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c         |  5 ++++-
 include/linux/mlx5/driver.h                             |  1 +
 7 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 7435fe6829b6..4d621d142f76 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -880,7 +880,6 @@ struct mlx5e_priv {
 #endif
 	struct devlink_health_reporter *tx_reporter;
 	struct devlink_health_reporter *rx_reporter;
-	struct devlink_port            dl_port;
 	struct mlx5e_xsk           xsk;
 #if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
 	struct mlx5e_hv_vhca_stats_agent stats_agent;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
index 054bc2fc0520..765f3064689d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
@@ -19,6 +19,7 @@ int mlx5e_devlink_port_register(struct mlx5e_priv *priv)
 	struct devlink *devlink = priv_to_devlink(priv->mdev);
 	struct devlink_port_attrs attrs = {};
 	struct netdev_phys_item_id ppid = {};
+	struct devlink_port *dl_port;
 	unsigned int dl_port_index;
 
 	if (mlx5_core_is_pf(priv->mdev)) {
@@ -36,24 +37,30 @@ int mlx5e_devlink_port_register(struct mlx5e_priv *priv)
 		dl_port_index = mlx5_esw_vport_to_devlink_port_index(priv->mdev, 0);
 	}
 
-	devlink_port_attrs_set(&priv->dl_port, &attrs);
+	dl_port = mlx5e_devlink_get_dl_port(priv);
+	memset(dl_port, 0, sizeof(*dl_port));
+	devlink_port_attrs_set(dl_port, &attrs);
 
-	return devlink_port_register(devlink, &priv->dl_port, dl_port_index);
+	return devlink_port_register(devlink, dl_port, dl_port_index);
 }
 
 void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv)
 {
-	devlink_port_type_eth_set(&priv->dl_port, priv->netdev);
+	struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
+
+	devlink_port_type_eth_set(dl_port, priv->netdev);
 }
 
 void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv)
 {
-	devlink_port_unregister(&priv->dl_port);
+	struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
+
+	devlink_port_unregister(dl_port);
 }
 
 struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 
-	return &priv->dl_port;
+	return mlx5e_devlink_get_dl_port(priv);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h
index 83123a801adc..10b50feb9883 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h
@@ -12,4 +12,10 @@ void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv);
 void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv);
 struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev);
 
+static inline struct devlink_port *
+mlx5e_devlink_get_dl_port(struct mlx5e_priv *priv)
+{
+	return &priv->mdev->mlx5e_res.dl_port;
+}
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index d80bbd17e5f8..f0a419fc4adf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -4,6 +4,7 @@
 #include "health.h"
 #include "params.h"
 #include "txrx.h"
+#include "devlink.h"
 
 static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state)
 {
@@ -615,9 +616,10 @@ static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = {
 
 void mlx5e_reporter_rx_create(struct mlx5e_priv *priv)
 {
+	struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
 	struct devlink_health_reporter *reporter;
 
-	reporter = devlink_port_health_reporter_create(&priv->dl_port, &mlx5_rx_reporter_ops,
+	reporter = devlink_port_health_reporter_create(dl_port, &mlx5_rx_reporter_ops,
 						       MLX5E_REPORTER_RX_GRACEFUL_PERIOD, priv);
 	if (IS_ERR(reporter)) {
 		netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index d7275c84313e..db64fa2620c4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -3,6 +3,7 @@
 
 #include "health.h"
 #include "en/ptp.h"
+#include "en/devlink.h"
 
 static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
 {
@@ -572,9 +573,10 @@ static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
 
 void mlx5e_reporter_tx_create(struct mlx5e_priv *priv)
 {
+	struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
 	struct devlink_health_reporter *reporter;
 
-	reporter = devlink_port_health_reporter_create(&priv->dl_port, &mlx5_tx_reporter_ops,
+	reporter = devlink_port_health_reporter_create(dl_port, &mlx5_tx_reporter_ops,
 						       MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv);
 	if (IS_ERR(reporter)) {
 		netdev_warn(priv->netdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 1f15c6183dc1..b0604b113530 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -52,6 +52,7 @@
 #include "en/health.h"
 #include "en/params.h"
 #include "devlink.h"
+#include "en/devlink.h"
 
 static struct sk_buff *
 mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
@@ -1823,6 +1824,7 @@ static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe
 	struct mlx5e_priv *priv = netdev_priv(rq->netdev);
 	struct mlx5_wq_cyc *wq = &rq->wqe.wq;
 	struct mlx5e_wqe_frag_info *wi;
+	struct devlink_port *dl_port;
 	struct sk_buff *skb;
 	u32 cqe_bcnt;
 	u16 trap_id;
@@ -1845,7 +1847,8 @@ static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe
 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
 	skb_push(skb, ETH_HLEN);
 
-	mlx5_devlink_trap_report(rq->mdev, trap_id, skb, &priv->dl_port);
+	dl_port = mlx5e_devlink_get_dl_port(priv);
+	mlx5_devlink_trap_report(rq->mdev, trap_id, skb, dl_port);
 	dev_kfree_skb_any(skb);
 
 free_wqe:
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 9887181dea5f..f1d0340e46a7 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -650,6 +650,7 @@ struct mlx5e_resources {
 		struct mlx5_core_mkey      mkey;
 		struct mlx5_sq_bfreg       bfreg;
 	} hw_objs;
+	struct devlink_port dl_port;
 };
 
 enum mlx5_sw_icm_type {
-- 
cgit v1.2.3


From fec2b4bb39d961e5a804c34b7184e2cbbb2d7c2c Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Thu, 14 Jan 2021 16:05:56 +0200
Subject: net/mlx5e: Unregister eth-reps devices first

When we clean all the interfaces, i.e. rescan or reload module,
we need to clean eth-reps devices first, before eth devices.

We will re-use the native NIC port net device instance for the Uplink
representor. Changing eswitch mode will skip destroying the eth device
so the net device won't be destroyed and only change the profile.

Creating uplink eth-rep will initialize the representor related resources.
In that sense when we destroy all devices we first need to destroy
eth-rep devices so uplink eth-rep will clean all representor related
resources and only then destroy the eth device which will destroy rest
of the resources and the net device.

Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 2f961bd9e528..685cf071a9de 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -5900,18 +5900,18 @@ int mlx5e_init(void)
 
 	mlx5e_ipsec_build_inverse_table();
 	mlx5e_build_ptys2ethtool_map();
-	ret = mlx5e_rep_init();
+	ret = auxiliary_driver_register(&mlx5e_driver);
 	if (ret)
 		return ret;
 
-	ret = auxiliary_driver_register(&mlx5e_driver);
+	ret = mlx5e_rep_init();
 	if (ret)
-		mlx5e_rep_cleanup();
+		auxiliary_driver_unregister(&mlx5e_driver);
 	return ret;
 }
 
 void mlx5e_cleanup(void)
 {
-	auxiliary_driver_unregister(&mlx5e_driver);
 	mlx5e_rep_cleanup();
+	auxiliary_driver_unregister(&mlx5e_driver);
 }
-- 
cgit v1.2.3


From 7a9fb35e8c3a67145fca262c304de65cb2f83abf Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Wed, 16 Sep 2020 10:11:33 +0300
Subject: net/mlx5e: Do not reload ethernet ports when changing eswitch mode

When switching modes between legacy and switchdev and back, do not
reload ethernet interfaces. just change the profile from nic profile
to uplink rep profile in switchdev mode.

Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/dev.c      |   3 -
 drivers/net/ethernet/mellanox/mlx5/core/en.h       |   1 +
 .../ethernet/mellanox/mlx5/core/en/reporter_rx.c   |   1 +
 .../ethernet/mellanox/mlx5/core/en/reporter_tx.c   |   1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |   6 +
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   | 148 ++++++++++++++-------
 drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h |   9 ++
 include/linux/mlx5/driver.h                        |   1 +
 8 files changed, 116 insertions(+), 54 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index b051417ede67..4def64d0e669 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -58,9 +58,6 @@ static bool is_eth_supported(struct mlx5_core_dev *dev)
 	if (!IS_ENABLED(CONFIG_MLX5_CORE_EN))
 		return false;
 
-	if (is_eth_rep_supported(dev))
-		return false;
-
 	if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
 		return false;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 4d621d142f76..1f5bc4d91060 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -1173,6 +1173,7 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv);
 void mlx5e_destroy_netdev(struct mlx5e_priv *priv);
 int mlx5e_netdev_change_profile(struct mlx5e_priv *priv,
 				const struct mlx5e_profile *new_profile, void *new_ppriv);
+void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv);
 void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv);
 void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu);
 void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index f0a419fc4adf..34b3b316b688 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -635,4 +635,5 @@ void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv)
 		return;
 
 	devlink_port_health_reporter_destroy(priv->rx_reporter);
+	priv->rx_reporter = NULL;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index db64fa2620c4..63ee3b9416de 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -593,4 +593,5 @@ void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv)
 		return;
 
 	devlink_port_health_reporter_destroy(priv->tx_reporter);
+	priv->tx_reporter = NULL;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 685cf071a9de..9c08f0bd1fcc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -5742,6 +5742,11 @@ rollback:
 	return err;
 }
 
+void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv)
+{
+	mlx5e_netdev_change_profile(priv, &mlx5e_nic_profile, NULL);
+}
+
 void mlx5e_destroy_netdev(struct mlx5e_priv *priv)
 {
 	struct net_device *netdev = priv->netdev;
@@ -5852,6 +5857,7 @@ static int mlx5e_probe(struct auxiliary_device *adev,
 	mlx5e_devlink_port_type_eth_set(priv);
 
 	mlx5e_dcbnl_init_app(priv);
+	mlx5_uplink_netdev_set(mdev, netdev);
 	return 0;
 
 err_resume:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 9533085005c3..4cc902e0d71b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -44,6 +44,7 @@
 #include "en_tc.h"
 #include "en/rep/tc.h"
 #include "en/rep/neigh.h"
+#include "en/devlink.h"
 #include "fs_core.h"
 #include "lib/mlx5.h"
 #define CREATE_TRACE_POINTS
@@ -588,26 +589,15 @@ static void mlx5e_build_rep_params(struct net_device *netdev)
 }
 
 static void mlx5e_build_rep_netdev(struct net_device *netdev,
-				   struct mlx5_core_dev *mdev,
-				   struct mlx5_eswitch_rep *rep)
+				   struct mlx5_core_dev *mdev)
 {
 	SET_NETDEV_DEV(netdev, mdev->device);
-	if (rep->vport == MLX5_VPORT_UPLINK) {
-		netdev->netdev_ops = &mlx5e_netdev_ops;
-		/* we want a persistent mac for the uplink rep */
-		mlx5_query_mac_address(mdev, netdev->dev_addr);
-		netdev->ethtool_ops = &mlx5e_ethtool_ops;
-		mlx5e_dcbnl_build_rep_netdev(netdev);
-	} else {
-		netdev->netdev_ops = &mlx5e_netdev_ops_rep;
-		eth_hw_addr_random(netdev);
-		netdev->ethtool_ops = &mlx5e_rep_ethtool_ops;
-	}
+	netdev->netdev_ops = &mlx5e_netdev_ops_rep;
+	eth_hw_addr_random(netdev);
+	netdev->ethtool_ops = &mlx5e_rep_ethtool_ops;
 
 	netdev->watchdog_timeo    = 15 * HZ;
 
-	netdev->features       |= NETIF_F_NETNS_LOCAL;
-
 #if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
 	netdev->hw_features    |= NETIF_F_HW_TC;
 #endif
@@ -619,12 +609,9 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev,
 	netdev->hw_features    |= NETIF_F_TSO6;
 	netdev->hw_features    |= NETIF_F_RXCSUM;
 
-	if (rep->vport == MLX5_VPORT_UPLINK)
-		netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
-	else
-		netdev->features |= NETIF_F_VLAN_CHALLENGED;
-
 	netdev->features |= netdev->hw_features;
+	netdev->features |= NETIF_F_VLAN_CHALLENGED;
+	netdev->features |= NETIF_F_NETNS_LOCAL;
 }
 
 static int mlx5e_init_rep(struct mlx5_core_dev *mdev,
@@ -990,6 +977,14 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
 	mlx5e_dcbnl_initialize(priv);
 	mlx5e_dcbnl_init_app(priv);
 	mlx5e_rep_neigh_init(rpriv);
+
+	netdev->wanted_features |= NETIF_F_HW_TC;
+
+	rtnl_lock();
+	if (netif_running(netdev))
+		mlx5e_open(netdev);
+	netif_device_attach(netdev);
+	rtnl_unlock();
 }
 
 static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
@@ -997,6 +992,12 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 	struct mlx5_core_dev *mdev = priv->mdev;
 
+	rtnl_lock();
+	if (netif_running(priv->netdev))
+		mlx5e_close(priv->netdev);
+	netif_device_detach(priv->netdev);
+	rtnl_unlock();
+
 	mlx5e_rep_neigh_cleanup(rpriv);
 	mlx5e_dcbnl_delete_app(priv);
 	mlx5_notifier_unregister(mdev, &priv->events_nb);
@@ -1081,26 +1082,56 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
 
 /* e-Switch vport representors */
 static int
-mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+mlx5e_vport_uplink_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+	struct mlx5e_priv *priv = netdev_priv(mlx5_uplink_netdev_get(dev));
+	struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
+	struct devlink_port *dl_port;
+	int err;
+
+	rpriv->netdev = priv->netdev;
+
+	err = mlx5e_netdev_change_profile(priv, &mlx5e_uplink_rep_profile,
+					  rpriv);
+	if (err)
+		return err;
+
+	dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport);
+	if (dl_port)
+		devlink_port_type_eth_set(dl_port, rpriv->netdev);
+
+	return 0;
+}
+
+static void
+mlx5e_vport_uplink_rep_unload(struct mlx5e_rep_priv *rpriv)
+{
+	struct net_device *netdev = rpriv->netdev;
+	struct devlink_port *dl_port;
+	struct mlx5_core_dev *dev;
+	struct mlx5e_priv *priv;
+
+	priv = netdev_priv(netdev);
+	dev = priv->mdev;
+
+	dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport);
+	if (dl_port)
+		devlink_port_type_clear(dl_port);
+	mlx5e_netdev_attach_nic_profile(priv);
+}
+
+static int
+mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 {
+	struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
 	const struct mlx5e_profile *profile;
-	struct mlx5e_rep_priv *rpriv;
 	struct devlink_port *dl_port;
 	struct net_device *netdev;
 	struct mlx5e_priv *priv;
 	unsigned int txqs, rxqs;
 	int nch, err;
 
-	rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
-	if (!rpriv)
-		return -ENOMEM;
-
-	/* rpriv->rep to be looked up when profile->init() is called */
-	rpriv->rep = rep;
-
-	profile = (rep->vport == MLX5_VPORT_UPLINK) ?
-		  &mlx5e_uplink_rep_profile : &mlx5e_rep_profile;
-
+	profile = &mlx5e_rep_profile;
 	nch = mlx5e_get_max_num_channels(dev);
 	txqs = nch * profile->max_tc;
 	rxqs = nch * profile->rq_groups;
@@ -1109,21 +1140,11 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 		mlx5_core_warn(dev,
 			       "Failed to create representor netdev for vport %d\n",
 			       rep->vport);
-		kfree(rpriv);
 		return -EINVAL;
 	}
 
-	mlx5e_build_rep_netdev(netdev, dev, rep);
-
+	mlx5e_build_rep_netdev(netdev, dev);
 	rpriv->netdev = netdev;
-	rep->rep_data[REP_ETH].priv = rpriv;
-	INIT_LIST_HEAD(&rpriv->vport_sqs_list);
-
-	if (rep->vport == MLX5_VPORT_UPLINK) {
-		err = mlx5e_create_mdev_resources(dev);
-		if (err)
-			goto err_destroy_netdev;
-	}
 
 	priv = netdev_priv(netdev);
 	priv->profile = profile;
@@ -1131,7 +1152,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 	err = profile->init(dev, netdev);
 	if (err) {
 		netdev_warn(netdev, "rep profile init failed, %d\n", err);
-		goto err_destroy_mdev_resources;
+		goto err_destroy_netdev;
 	}
 
 	err = mlx5e_attach_netdev(netdev_priv(netdev));
@@ -1161,13 +1182,34 @@ err_detach_netdev:
 err_cleanup_profile:
 	priv->profile->cleanup(priv);
 
-err_destroy_mdev_resources:
-	if (rep->vport == MLX5_VPORT_UPLINK)
-		mlx5e_destroy_mdev_resources(dev);
-
 err_destroy_netdev:
 	mlx5e_destroy_netdev(netdev_priv(netdev));
-	kfree(rpriv);
+	return err;
+}
+
+static int
+mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+	struct mlx5e_rep_priv *rpriv;
+	int err;
+
+	rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
+	if (!rpriv)
+		return -ENOMEM;
+
+	/* rpriv->rep to be looked up when profile->init() is called */
+	rpriv->rep = rep;
+	rep->rep_data[REP_ETH].priv = rpriv;
+	INIT_LIST_HEAD(&rpriv->vport_sqs_list);
+
+	if (rep->vport == MLX5_VPORT_UPLINK)
+		err = mlx5e_vport_uplink_rep_load(dev, rep);
+	else
+		err = mlx5e_vport_vf_rep_load(dev, rep);
+
+	if (err)
+		kfree(rpriv);
+
 	return err;
 }
 
@@ -1181,15 +1223,19 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
 	struct devlink_port *dl_port;
 	void *ppriv = priv->ppriv;
 
+	if (rep->vport == MLX5_VPORT_UPLINK) {
+		mlx5e_vport_uplink_rep_unload(rpriv);
+		goto free_ppriv;
+	}
+
 	dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport);
 	if (dl_port)
 		devlink_port_type_clear(dl_port);
 	unregister_netdev(netdev);
 	mlx5e_detach_netdev(priv);
 	priv->profile->cleanup(priv);
-	if (rep->vport == MLX5_VPORT_UPLINK)
-		mlx5e_destroy_mdev_resources(priv->mdev);
 	mlx5e_destroy_netdev(priv);
+free_ppriv:
 	kfree(ppriv); /* mlx5e_rep_priv */
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
index d046db7bb047..2f536c5d30b1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
@@ -95,4 +95,13 @@ static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev)
 	return devlink_net(priv_to_devlink(dev));
 }
 
+static inline void mlx5_uplink_netdev_set(struct mlx5_core_dev *mdev, struct net_device *netdev)
+{
+	mdev->mlx5e_res.uplink_netdev = netdev;
+}
+
+static inline struct net_device *mlx5_uplink_netdev_get(struct mlx5_core_dev *mdev)
+{
+	return mdev->mlx5e_res.uplink_netdev;
+}
 #endif
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index f1d0340e46a7..23bb01d7c9b9 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -651,6 +651,7 @@ struct mlx5e_resources {
 		struct mlx5_sq_bfreg       bfreg;
 	} hw_objs;
 	struct devlink_port dl_port;
+	struct net_device *uplink_netdev;
 };
 
 enum mlx5_sw_icm_type {
-- 
cgit v1.2.3


From c55479d0cb6a28029844d0e90730704a0fb5efd3 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Wed, 16 Sep 2020 10:11:42 +0300
Subject: net/mlx5: E-Switch, Change mode lock from mutex to rw semaphore

E-Switch mode change routine will take the write lock to prevent any
consumer to access the E-Switch resources while E-Switch is going
through a mode change.

In the next patch
E-Switch consumers (e.g vport representors) will take read_lock prior to
accessing E-Switch resources to prevent E-Switch mode changing in the
middle of the operation.

Signed-off-by: Roi Dayan <roid@nvidia.com>
Reviewed-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  | 11 +++++----
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  2 +-
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 26 +++++++++++-----------
 3 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 9eb8e7a22dc2..ddee2aefe8b9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1720,7 +1720,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
 	if (!ESW_ALLOWED(esw))
 		return 0;
 
-	mutex_lock(&esw->mode_lock);
+	down_write(&esw->mode_lock);
 	if (esw->mode == MLX5_ESWITCH_NONE) {
 		ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs);
 	} else {
@@ -1732,7 +1732,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
 		if (!ret)
 			esw->esw_funcs.num_vfs = num_vfs;
 	}
-	mutex_unlock(&esw->mode_lock);
+	up_write(&esw->mode_lock);
 	return ret;
 }
 
@@ -1780,10 +1780,10 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf)
 	if (!ESW_ALLOWED(esw))
 		return;
 
-	mutex_lock(&esw->mode_lock);
+	down_write(&esw->mode_lock);
 	mlx5_eswitch_disable_locked(esw, clear_vf);
 	esw->esw_funcs.num_vfs = 0;
-	mutex_unlock(&esw->mode_lock);
+	up_write(&esw->mode_lock);
 }
 
 int mlx5_eswitch_init(struct mlx5_core_dev *dev)
@@ -1840,7 +1840,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 	ida_init(&esw->offloads.vport_metadata_ida);
 	xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC);
 	mutex_init(&esw->state_lock);
-	mutex_init(&esw->mode_lock);
+	init_rwsem(&esw->mode_lock);
 
 	mlx5_esw_for_all_vports(esw, i, vport) {
 		vport->vport = mlx5_eswitch_index_to_vport_num(esw, i);
@@ -1876,7 +1876,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
 	esw->dev->priv.eswitch = NULL;
 	destroy_workqueue(esw->work_queue);
 	esw_offloads_cleanup_reps(esw);
-	mutex_destroy(&esw->mode_lock);
 	mutex_destroy(&esw->state_lock);
 	WARN_ON(!xa_empty(&esw->offloads.vhca_map));
 	xa_destroy(&esw->offloads.vhca_map);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index d0b907a9ef28..b149d1d2c150 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -271,7 +271,7 @@ struct mlx5_eswitch {
 	/* Protects eswitch mode change that occurs via one or more
 	 * user commands, i.e. sriov state change, devlink commands.
 	 */
-	struct mutex mode_lock;
+	struct rw_semaphore mode_lock;
 
 	struct {
 		bool            enabled;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index e1e33e991123..5e2712521fec 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -2925,7 +2925,7 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
 	if (esw_mode_from_devlink(mode, &mlx5_mode))
 		return -EINVAL;
 
-	mutex_lock(&esw->mode_lock);
+	down_write(&esw->mode_lock);
 	cur_mlx5_mode = esw->mode;
 	if (cur_mlx5_mode == mlx5_mode)
 		goto unlock;
@@ -2938,7 +2938,7 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
 		err = -EINVAL;
 
 unlock:
-	mutex_unlock(&esw->mode_lock);
+	up_write(&esw->mode_lock);
 	return err;
 }
 
@@ -2951,14 +2951,14 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
 	if (IS_ERR(esw))
 		return PTR_ERR(esw);
 
-	mutex_lock(&esw->mode_lock);
+	down_write(&esw->mode_lock);
 	err = eswitch_devlink_esw_mode_check(esw);
 	if (err)
 		goto unlock;
 
 	err = esw_mode_to_devlink(esw->mode, mode);
 unlock:
-	mutex_unlock(&esw->mode_lock);
+	up_write(&esw->mode_lock);
 	return err;
 }
 
@@ -2974,7 +2974,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
 	if (IS_ERR(esw))
 		return PTR_ERR(esw);
 
-	mutex_lock(&esw->mode_lock);
+	down_write(&esw->mode_lock);
 	err = eswitch_devlink_esw_mode_check(esw);
 	if (err)
 		goto out;
@@ -3013,7 +3013,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
 	}
 
 	esw->offloads.inline_mode = mlx5_mode;
-	mutex_unlock(&esw->mode_lock);
+	up_write(&esw->mode_lock);
 	return 0;
 
 revert_inline_mode:
@@ -3023,7 +3023,7 @@ revert_inline_mode:
 						 vport,
 						 esw->offloads.inline_mode);
 out:
-	mutex_unlock(&esw->mode_lock);
+	up_write(&esw->mode_lock);
 	return err;
 }
 
@@ -3036,14 +3036,14 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
 	if (IS_ERR(esw))
 		return PTR_ERR(esw);
 
-	mutex_lock(&esw->mode_lock);
+	down_write(&esw->mode_lock);
 	err = eswitch_devlink_esw_mode_check(esw);
 	if (err)
 		goto unlock;
 
 	err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
 unlock:
-	mutex_unlock(&esw->mode_lock);
+	up_write(&esw->mode_lock);
 	return err;
 }
 
@@ -3059,7 +3059,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
 	if (IS_ERR(esw))
 		return PTR_ERR(esw);
 
-	mutex_lock(&esw->mode_lock);
+	down_write(&esw->mode_lock);
 	err = eswitch_devlink_esw_mode_check(esw);
 	if (err)
 		goto unlock;
@@ -3105,7 +3105,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
 	}
 
 unlock:
-	mutex_unlock(&esw->mode_lock);
+	up_write(&esw->mode_lock);
 	return err;
 }
 
@@ -3120,14 +3120,14 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
 		return PTR_ERR(esw);
 
 
-	mutex_lock(&esw->mode_lock);
+	down_write(&esw->mode_lock);
 	err = eswitch_devlink_esw_mode_check(esw);
 	if (err)
 		goto unlock;
 
 	*encap = esw->offloads.encap;
 unlock:
-	mutex_unlock(&esw->mode_lock);
+	up_write(&esw->mode_lock);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 7dc84de98babc709910947b24e8cd1c2e01c7857 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Wed, 16 Sep 2020 10:11:47 +0300
Subject: net/mlx5: E-Switch, Protect changing mode while adding rules

We re-use the native NIC port net device instance for the Uplink
representor, a driver currently cannot unbind TC setup callback
actively, hence protect changing E-Switch mode while adding rules.

Signed-off-by: Roi Dayan <roid@nvidia.com>
Reviewed-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    |  9 +++
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  | 93 ++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  9 +++
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 14 +++-
 4 files changed, 122 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index b3bf7bb7b97e..730f33ada90a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -4323,6 +4323,11 @@ int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
 	struct mlx5e_tc_flow *flow;
 	int err = 0;
 
+	if (!mlx5_esw_hold(priv->mdev))
+		return -EAGAIN;
+
+	mlx5_esw_get(priv->mdev);
+
 	rcu_read_lock();
 	flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
 	if (flow) {
@@ -4360,11 +4365,14 @@ rcu_unlock:
 	if (err)
 		goto err_free;
 
+	mlx5_esw_release(priv->mdev);
 	return 0;
 
 err_free:
 	mlx5e_flow_put(priv, flow);
 out:
+	mlx5_esw_put(priv->mdev);
+	mlx5_esw_release(priv->mdev);
 	return err;
 }
 
@@ -4404,6 +4412,7 @@ int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
 	trace_mlx5e_delete_flower(f);
 	mlx5e_flow_put(priv, flow);
 
+	mlx5_esw_put(priv->mdev);
 	return 0;
 
 errout:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index ddee2aefe8b9..6b260dacf853 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -435,6 +435,7 @@ static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw)
 	esw->fdb_table.legacy.addr_grp = NULL;
 	esw->fdb_table.legacy.allmulti_grp = NULL;
 	esw->fdb_table.legacy.promisc_grp = NULL;
+	atomic64_set(&esw->user_count, 0);
 }
 
 static int esw_create_legacy_table(struct mlx5_eswitch *esw)
@@ -442,6 +443,7 @@ static int esw_create_legacy_table(struct mlx5_eswitch *esw)
 	int err;
 
 	memset(&esw->fdb_table.legacy, 0, sizeof(struct legacy_fdb));
+	atomic64_set(&esw->user_count, 0);
 
 	err = esw_create_legacy_vepa_table(esw);
 	if (err)
@@ -2581,3 +2583,94 @@ void mlx5_esw_event_notifier_unregister(struct mlx5_eswitch *esw, struct notifie
 {
 	blocking_notifier_chain_unregister(&esw->n_head, nb);
 }
+
+/**
+ * mlx5_esw_hold() - Try to take a read lock on esw mode lock.
+ * @mdev: mlx5 core device.
+ *
+ * Should be called by esw resources callers.
+ *
+ * Return: true on success or false.
+ */
+bool mlx5_esw_hold(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
+	/* e.g. VF doesn't have eswitch so nothing to do */
+	if (!ESW_ALLOWED(esw))
+		return true;
+
+	if (down_read_trylock(&esw->mode_lock) != 0)
+		return true;
+
+	return false;
+}
+
+/**
+ * mlx5_esw_release() - Release a read lock on esw mode lock.
+ * @mdev: mlx5 core device.
+ */
+void mlx5_esw_release(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
+	if (ESW_ALLOWED(esw))
+		up_read(&esw->mode_lock);
+}
+
+/**
+ * mlx5_esw_get() - Increase esw user count.
+ * @mdev: mlx5 core device.
+ */
+void mlx5_esw_get(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
+	if (ESW_ALLOWED(esw))
+		atomic64_inc(&esw->user_count);
+}
+
+/**
+ * mlx5_esw_put() - Decrease esw user count.
+ * @mdev: mlx5 core device.
+ */
+void mlx5_esw_put(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
+	if (ESW_ALLOWED(esw))
+		atomic64_dec_if_positive(&esw->user_count);
+}
+
+/**
+ * mlx5_esw_try_lock() - Take a write lock on esw mode lock.
+ * @esw: eswitch device.
+ *
+ * Should be called by esw mode change routine.
+ *
+ * Return:
+ * * 0       - esw mode if successfully locked and refcount is 0.
+ * * -EBUSY  - refcount is not 0.
+ * * -EINVAL - In the middle of switching mode or lock is already held.
+ */
+int mlx5_esw_try_lock(struct mlx5_eswitch *esw)
+{
+	if (down_write_trylock(&esw->mode_lock) == 0)
+		return -EINVAL;
+
+	if (atomic64_read(&esw->user_count) > 0) {
+		up_write(&esw->mode_lock);
+		return -EBUSY;
+	}
+
+	return esw->mode;
+}
+
+/**
+ * mlx5_esw_unlock() - Release write lock on esw mode lock
+ * @esw: eswitch device.
+ */
+void mlx5_esw_unlock(struct mlx5_eswitch *esw)
+{
+	up_write(&esw->mode_lock);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index b149d1d2c150..56d85cedb9bd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -272,6 +272,7 @@ struct mlx5_eswitch {
 	 * user commands, i.e. sriov state change, devlink commands.
 	 */
 	struct rw_semaphore mode_lock;
+	atomic64_t user_count;
 
 	struct {
 		bool            enabled;
@@ -761,6 +762,14 @@ struct mlx5_esw_event_info {
 
 int mlx5_esw_event_notifier_register(struct mlx5_eswitch *esw, struct notifier_block *n);
 void mlx5_esw_event_notifier_unregister(struct mlx5_eswitch *esw, struct notifier_block *n);
+
+bool mlx5_esw_hold(struct mlx5_core_dev *dev);
+void mlx5_esw_release(struct mlx5_core_dev *dev);
+void mlx5_esw_get(struct mlx5_core_dev *dev);
+void mlx5_esw_put(struct mlx5_core_dev *dev);
+int mlx5_esw_try_lock(struct mlx5_eswitch *esw);
+void mlx5_esw_unlock(struct mlx5_eswitch *esw);
+
 #else  /* CONFIG_MLX5_ESWITCH */
 /* eswitch API stubs */
 static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 5e2712521fec..8e7a702e23a3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1854,6 +1854,7 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
 	/* Holds true only as long as DMFS is the default */
 	mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns,
 				     MLX5_FLOW_STEERING_MODE_DMFS);
+	atomic64_set(&esw->user_count, 0);
 }
 
 static int esw_create_offloads_table(struct mlx5_eswitch *esw)
@@ -2584,6 +2585,7 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
 	memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
 	mutex_init(&esw->fdb_table.offloads.vports.lock);
 	hash_init(esw->fdb_table.offloads.vports.table);
+	atomic64_set(&esw->user_count, 0);
 
 	indir = mlx5_esw_indir_table_init();
 	if (IS_ERR(indir)) {
@@ -2925,8 +2927,14 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
 	if (esw_mode_from_devlink(mode, &mlx5_mode))
 		return -EINVAL;
 
-	down_write(&esw->mode_lock);
-	cur_mlx5_mode = esw->mode;
+	err = mlx5_esw_try_lock(esw);
+	if (err < 0) {
+		NL_SET_ERR_MSG_MOD(extack, "Can't change mode, E-Switch is busy");
+		return err;
+	}
+	cur_mlx5_mode = err;
+	err = 0;
+
 	if (cur_mlx5_mode == mlx5_mode)
 		goto unlock;
 
@@ -2938,7 +2946,7 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
 		err = -EINVAL;
 
 unlock:
-	up_write(&esw->mode_lock);
+	mlx5_esw_unlock(esw);
 	return err;
 }
 
-- 
cgit v1.2.3


From 87df8bcccd2cede62dfb97dc3d4ca1fe66cb4f83 Mon Sep 17 00:00:00 2001
From: Ayush Garg <ayush.garg@samsung.com>
Date: Wed, 17 Mar 2021 16:52:14 +0530
Subject: Bluetooth: Fix incorrect status handling in LE PHY UPDATE event

Skip updation of tx and rx PHYs values, when PHY Update
event's status is not successful.

Signed-off-by: Ayush Garg <ayush.garg@samsung.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index f4a734f8a9ac..cf2f4a0abdbd 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -5938,7 +5938,7 @@ static void hci_le_phy_update_evt(struct hci_dev *hdev, struct sk_buff *skb)
 
 	BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
 
-	if (!ev->status)
+	if (ev->status)
 		return;
 
 	hci_dev_lock(hdev);
-- 
cgit v1.2.3


From 7888fe53b7066c284e172d98d98d1865d6a9e5a0 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexanderduyck@fb.com>
Date: Tue, 16 Mar 2021 17:30:36 -0700
Subject: ethtool: Add common function for filling out strings

Add a function to handle the common pattern of printing a string into the
ethtool strings interface and incrementing the string pointer by the
ETH_GSTRING_LEN. Most of the drivers end up doing this and several have
implemented their own versions of this function so it would make sense to
consolidate on one implementation.

Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h |  9 +++++++++
 net/ethtool/ioctl.c     | 12 ++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index ec4cd3921c67..3583f7fc075c 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -571,4 +571,13 @@ struct ethtool_phy_ops {
  */
 void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops);
 
+/**
+ * ethtool_sprintf - Write formatted string to ethtool string data
+ * @data: Pointer to start of string to update
+ * @fmt: Format of string to write
+ *
+ * Write formatted string to data. Update data to point at start of
+ * next string.
+ */
+extern __printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...);
 #endif /* _LINUX_ETHTOOL_H */
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 24783b71c584..0788cc3b3114 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -1844,6 +1844,18 @@ out:
 	return ret;
 }
 
+__printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	vsnprintf(*data, ETH_GSTRING_LEN, fmt, args);
+	va_end(args);
+
+	*data += ETH_GSTRING_LEN;
+}
+EXPORT_SYMBOL(ethtool_sprintf);
+
 static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_value id;
-- 
cgit v1.2.3


From c8d4725e985da50979918f57db137e03cb6c55e8 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexanderduyck@fb.com>
Date: Tue, 16 Mar 2021 17:30:44 -0700
Subject: intel: Update drivers to use ethtool_sprintf

Update the Intel drivers to make use of ethtool_sprintf. The general idea
is to reduce code size and overhead by replacing the repeated pattern of
string printf statements and ETH_STRING_LEN counter increments.

Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
Acked-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c   | 16 +++----
 drivers/net/ethernet/intel/ice/ice_ethtool.c     | 55 ++++++++----------------
 drivers/net/ethernet/intel/igb/igb_ethtool.c     | 40 ++++++-----------
 drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 40 ++++++-----------
 4 files changed, 50 insertions(+), 101 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index c70dec65a572..3c9054e13aa5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -2368,21 +2368,15 @@ static void i40e_get_priv_flag_strings(struct net_device *netdev, u8 *data)
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
-	char *p = (char *)data;
 	unsigned int i;
+	u8 *p = data;
 
-	for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++) {
-		snprintf(p, ETH_GSTRING_LEN, "%s",
-			 i40e_gstrings_priv_flags[i].flag_string);
-		p += ETH_GSTRING_LEN;
-	}
+	for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++)
+		ethtool_sprintf(&p, i40e_gstrings_priv_flags[i].flag_string);
 	if (pf->hw.pf_id != 0)
 		return;
-	for (i = 0; i < I40E_GL_PRIV_FLAGS_STR_LEN; i++) {
-		snprintf(p, ETH_GSTRING_LEN, "%s",
-			 i40e_gl_gstrings_priv_flags[i].flag_string);
-		p += ETH_GSTRING_LEN;
-	}
+	for (i = 0; i < I40E_GL_PRIV_FLAGS_STR_LEN; i++)
+		ethtool_sprintf(&p, i40e_gl_gstrings_priv_flags[i].flag_string);
 }
 
 static void i40e_get_strings(struct net_device *netdev, u32 stringset,
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 2dcfa685b763..4f738425fb44 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -871,68 +871,47 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
-	char *p = (char *)data;
 	unsigned int i;
+	u8 *p = data;
 
 	switch (stringset) {
 	case ETH_SS_STATS:
-		for (i = 0; i < ICE_VSI_STATS_LEN; i++) {
-			snprintf(p, ETH_GSTRING_LEN, "%s",
-				 ice_gstrings_vsi_stats[i].stat_string);
-			p += ETH_GSTRING_LEN;
-		}
+		for (i = 0; i < ICE_VSI_STATS_LEN; i++)
+			ethtool_sprintf(&p,
+					ice_gstrings_vsi_stats[i].stat_string);
 
 		ice_for_each_alloc_txq(vsi, i) {
-			snprintf(p, ETH_GSTRING_LEN,
-				 "tx_queue_%u_packets", i);
-			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_bytes", i);
-			p += ETH_GSTRING_LEN;
+			ethtool_sprintf(&p, "tx_queue_%u_packets", i);
+			ethtool_sprintf(&p, "tx_queue_%u_bytes", i);
 		}
 
 		ice_for_each_alloc_rxq(vsi, i) {
-			snprintf(p, ETH_GSTRING_LEN,
-				 "rx_queue_%u_packets", i);
-			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_bytes", i);
-			p += ETH_GSTRING_LEN;
+			ethtool_sprintf(&p, "rx_queue_%u_packets", i);
+			ethtool_sprintf(&p, "rx_queue_%u_bytes", i);
 		}
 
 		if (vsi->type != ICE_VSI_PF)
 			return;
 
-		for (i = 0; i < ICE_PF_STATS_LEN; i++) {
-			snprintf(p, ETH_GSTRING_LEN, "%s",
-				 ice_gstrings_pf_stats[i].stat_string);
-			p += ETH_GSTRING_LEN;
-		}
+		for (i = 0; i < ICE_PF_STATS_LEN; i++)
+			ethtool_sprintf(&p,
+					ice_gstrings_pf_stats[i].stat_string);
 
 		for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
-			snprintf(p, ETH_GSTRING_LEN,
-				 "tx_priority_%u_xon.nic", i);
-			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN,
-				 "tx_priority_%u_xoff.nic", i);
-			p += ETH_GSTRING_LEN;
+			ethtool_sprintf(&p, "tx_priority_%u_xon.nic", i);
+			ethtool_sprintf(&p, "tx_priority_%u_xoff.nic", i);
 		}
 		for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
-			snprintf(p, ETH_GSTRING_LEN,
-				 "rx_priority_%u_xon.nic", i);
-			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN,
-				 "rx_priority_%u_xoff.nic", i);
-			p += ETH_GSTRING_LEN;
+			ethtool_sprintf(&p, "rx_priority_%u_xon.nic", i);
+			ethtool_sprintf(&p, "rx_priority_%u_xoff.nic", i);
 		}
 		break;
 	case ETH_SS_TEST:
 		memcpy(data, ice_gstrings_test, ICE_TEST_LEN * ETH_GSTRING_LEN);
 		break;
 	case ETH_SS_PRIV_FLAGS:
-		for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) {
-			snprintf(p, ETH_GSTRING_LEN, "%s",
-				 ice_gstrings_priv_flags[i].name);
-			p += ETH_GSTRING_LEN;
-		}
+		for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++)
+			ethtool_sprintf(&p, ice_gstrings_priv_flags[i].name);
 		break;
 	default:
 		break;
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 28baf203459a..4ab9f468f08e 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -2347,35 +2347,23 @@ static void igb_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 			IGB_TEST_LEN*ETH_GSTRING_LEN);
 		break;
 	case ETH_SS_STATS:
-		for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++) {
-			memcpy(p, igb_gstrings_stats[i].stat_string,
-			       ETH_GSTRING_LEN);
-			p += ETH_GSTRING_LEN;
-		}
-		for (i = 0; i < IGB_NETDEV_STATS_LEN; i++) {
-			memcpy(p, igb_gstrings_net_stats[i].stat_string,
-			       ETH_GSTRING_LEN);
-			p += ETH_GSTRING_LEN;
-		}
+		for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++)
+			ethtool_sprintf(&p,
+					igb_gstrings_stats[i].stat_string);
+		for (i = 0; i < IGB_NETDEV_STATS_LEN; i++)
+			ethtool_sprintf(&p,
+					igb_gstrings_net_stats[i].stat_string);
 		for (i = 0; i < adapter->num_tx_queues; i++) {
-			sprintf(p, "tx_queue_%u_packets", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "tx_queue_%u_bytes", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "tx_queue_%u_restart", i);
-			p += ETH_GSTRING_LEN;
+			ethtool_sprintf(&p, "tx_queue_%u_packets", i);
+			ethtool_sprintf(&p, "tx_queue_%u_bytes", i);
+			ethtool_sprintf(&p, "tx_queue_%u_restart", i);
 		}
 		for (i = 0; i < adapter->num_rx_queues; i++) {
-			sprintf(p, "rx_queue_%u_packets", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_bytes", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_drops", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_csum_err", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_alloc_failed", i);
-			p += ETH_GSTRING_LEN;
+			ethtool_sprintf(&p, "rx_queue_%u_packets", i);
+			ethtool_sprintf(&p, "rx_queue_%u_bytes", i);
+			ethtool_sprintf(&p, "rx_queue_%u_drops", i);
+			ethtool_sprintf(&p, "rx_queue_%u_csum_err", i);
+			ethtool_sprintf(&p, "rx_queue_%u_alloc_failed", i);
 		}
 		/* BUG_ON(p - data != IGB_STATS_LEN * ETH_GSTRING_LEN); */
 		break;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index a280aa34ca1d..4ceaca0f6ce3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -1368,45 +1368,33 @@ static void ixgbe_get_ethtool_stats(struct net_device *netdev,
 static void ixgbe_get_strings(struct net_device *netdev, u32 stringset,
 			      u8 *data)
 {
-	char *p = (char *)data;
 	unsigned int i;
+	u8 *p = data;
 
 	switch (stringset) {
 	case ETH_SS_TEST:
-		for (i = 0; i < IXGBE_TEST_LEN; i++) {
-			memcpy(data, ixgbe_gstrings_test[i], ETH_GSTRING_LEN);
-			data += ETH_GSTRING_LEN;
-		}
+		for (i = 0; i < IXGBE_TEST_LEN; i++)
+			ethtool_sprintf(&p, ixgbe_gstrings_test[i]);
 		break;
 	case ETH_SS_STATS:
-		for (i = 0; i < IXGBE_GLOBAL_STATS_LEN; i++) {
-			memcpy(p, ixgbe_gstrings_stats[i].stat_string,
-			       ETH_GSTRING_LEN);
-			p += ETH_GSTRING_LEN;
-		}
+		for (i = 0; i < IXGBE_GLOBAL_STATS_LEN; i++)
+			ethtool_sprintf(&p,
+					ixgbe_gstrings_stats[i].stat_string);
 		for (i = 0; i < netdev->num_tx_queues; i++) {
-			sprintf(p, "tx_queue_%u_packets", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "tx_queue_%u_bytes", i);
-			p += ETH_GSTRING_LEN;
+			ethtool_sprintf(&p, "tx_queue_%u_packets", i);
+			ethtool_sprintf(&p, "tx_queue_%u_bytes", i);
 		}
 		for (i = 0; i < IXGBE_NUM_RX_QUEUES; i++) {
-			sprintf(p, "rx_queue_%u_packets", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_bytes", i);
-			p += ETH_GSTRING_LEN;
+			ethtool_sprintf(&p, "rx_queue_%u_packets", i);
+			ethtool_sprintf(&p, "rx_queue_%u_bytes", i);
 		}
 		for (i = 0; i < IXGBE_MAX_PACKET_BUFFERS; i++) {
-			sprintf(p, "tx_pb_%u_pxon", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "tx_pb_%u_pxoff", i);
-			p += ETH_GSTRING_LEN;
+			ethtool_sprintf(&p, "tx_pb_%u_pxon", i);
+			ethtool_sprintf(&p, "tx_pb_%u_pxoff", i);
 		}
 		for (i = 0; i < IXGBE_MAX_PACKET_BUFFERS; i++) {
-			sprintf(p, "rx_pb_%u_pxon", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_pb_%u_pxoff", i);
-			p += ETH_GSTRING_LEN;
+			ethtool_sprintf(&p, "rx_pb_%u_pxon", i);
+			ethtool_sprintf(&p, "rx_pb_%u_pxoff", i);
 		}
 		/* BUG_ON(p - data != IXGBE_STATS_LEN * ETH_GSTRING_LEN); */
 		break;
-- 
cgit v1.2.3


From 6a143a7cf94730f57544ea14a987dc025364dbb8 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexanderduyck@fb.com>
Date: Tue, 16 Mar 2021 17:30:53 -0700
Subject: nfp: Replace nfp_pr_et with ethtool_sprintf

The nfp_pr_et function is nearly identical to ethtool_sprintf except for
the fact that it passes the pointer by value and as a return whereas
ethtool_sprintf passes it as a pointer.

Since they are so close just update nfp to make use of ethtool_sprintf

Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/abm/main.c      |  4 +-
 .../net/ethernet/netronome/nfp/nfp_net_ethtool.c   | 79 ++++++++++------------
 drivers/net/ethernet/netronome/nfp/nfp_port.h      |  2 -
 3 files changed, 36 insertions(+), 49 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.c b/drivers/net/ethernet/netronome/nfp/abm/main.c
index bdbf0726145e..605a1617b195 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.c
@@ -419,8 +419,8 @@ nfp_abm_port_get_stats_strings(struct nfp_app *app, struct nfp_port *port,
 		return data;
 	alink = repr->app_priv;
 	for (i = 0; i < alink->vnic->dp.num_r_vecs; i++) {
-		data = nfp_pr_et(data, "q%u_no_wait", i);
-		data = nfp_pr_et(data, "q%u_delayed", i);
+		ethtool_sprintf(&data, "q%u_no_wait", i);
+		ethtool_sprintf(&data, "q%u_delayed", i);
 	}
 	return data;
 }
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 9c9ae33d84ce..1b482446536d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -429,17 +429,6 @@ static int nfp_net_set_ringparam(struct net_device *netdev,
 	return nfp_net_set_ring_size(nn, rxd_cnt, txd_cnt);
 }
 
-__printf(2, 3) u8 *nfp_pr_et(u8 *data, const char *fmt, ...)
-{
-	va_list args;
-
-	va_start(args, fmt);
-	vsnprintf(data, ETH_GSTRING_LEN, fmt, args);
-	va_end(args);
-
-	return data + ETH_GSTRING_LEN;
-}
-
 static unsigned int nfp_vnic_get_sw_stats_count(struct net_device *netdev)
 {
 	struct nfp_net *nn = netdev_priv(netdev);
@@ -454,29 +443,29 @@ static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data)
 	int i;
 
 	for (i = 0; i < nn->max_r_vecs; i++) {
-		data = nfp_pr_et(data, "rvec_%u_rx_pkts", i);
-		data = nfp_pr_et(data, "rvec_%u_tx_pkts", i);
-		data = nfp_pr_et(data, "rvec_%u_tx_busy", i);
+		ethtool_sprintf(&data, "rvec_%u_rx_pkts", i);
+		ethtool_sprintf(&data, "rvec_%u_tx_pkts", i);
+		ethtool_sprintf(&data, "rvec_%u_tx_busy", i);
 	}
 
-	data = nfp_pr_et(data, "hw_rx_csum_ok");
-	data = nfp_pr_et(data, "hw_rx_csum_inner_ok");
-	data = nfp_pr_et(data, "hw_rx_csum_complete");
-	data = nfp_pr_et(data, "hw_rx_csum_err");
-	data = nfp_pr_et(data, "rx_replace_buf_alloc_fail");
-	data = nfp_pr_et(data, "rx_tls_decrypted_packets");
-	data = nfp_pr_et(data, "hw_tx_csum");
-	data = nfp_pr_et(data, "hw_tx_inner_csum");
-	data = nfp_pr_et(data, "tx_gather");
-	data = nfp_pr_et(data, "tx_lso");
-	data = nfp_pr_et(data, "tx_tls_encrypted_packets");
-	data = nfp_pr_et(data, "tx_tls_ooo");
-	data = nfp_pr_et(data, "tx_tls_drop_no_sync_data");
-
-	data = nfp_pr_et(data, "hw_tls_no_space");
-	data = nfp_pr_et(data, "rx_tls_resync_req_ok");
-	data = nfp_pr_et(data, "rx_tls_resync_req_ign");
-	data = nfp_pr_et(data, "rx_tls_resync_sent");
+	ethtool_sprintf(&data, "hw_rx_csum_ok");
+	ethtool_sprintf(&data, "hw_rx_csum_inner_ok");
+	ethtool_sprintf(&data, "hw_rx_csum_complete");
+	ethtool_sprintf(&data, "hw_rx_csum_err");
+	ethtool_sprintf(&data, "rx_replace_buf_alloc_fail");
+	ethtool_sprintf(&data, "rx_tls_decrypted_packets");
+	ethtool_sprintf(&data, "hw_tx_csum");
+	ethtool_sprintf(&data, "hw_tx_inner_csum");
+	ethtool_sprintf(&data, "tx_gather");
+	ethtool_sprintf(&data, "tx_lso");
+	ethtool_sprintf(&data, "tx_tls_encrypted_packets");
+	ethtool_sprintf(&data, "tx_tls_ooo");
+	ethtool_sprintf(&data, "tx_tls_drop_no_sync_data");
+
+	ethtool_sprintf(&data, "hw_tls_no_space");
+	ethtool_sprintf(&data, "rx_tls_resync_req_ok");
+	ethtool_sprintf(&data, "rx_tls_resync_req_ign");
+	ethtool_sprintf(&data, "rx_tls_resync_sent");
 
 	return data;
 }
@@ -550,19 +539,19 @@ nfp_vnic_get_hw_stats_strings(u8 *data, unsigned int num_vecs, bool repr)
 	swap_off = repr * NN_ET_SWITCH_STATS_LEN;
 
 	for (i = 0; i < NN_ET_SWITCH_STATS_LEN; i++)
-		data = nfp_pr_et(data, nfp_net_et_stats[i + swap_off].name);
+		ethtool_sprintf(&data, nfp_net_et_stats[i + swap_off].name);
 
 	for (i = NN_ET_SWITCH_STATS_LEN; i < NN_ET_SWITCH_STATS_LEN * 2; i++)
-		data = nfp_pr_et(data, nfp_net_et_stats[i - swap_off].name);
+		ethtool_sprintf(&data, nfp_net_et_stats[i - swap_off].name);
 
 	for (i = NN_ET_SWITCH_STATS_LEN * 2; i < NN_ET_GLOBAL_STATS_LEN; i++)
-		data = nfp_pr_et(data, nfp_net_et_stats[i].name);
+		ethtool_sprintf(&data, nfp_net_et_stats[i].name);
 
 	for (i = 0; i < num_vecs; i++) {
-		data = nfp_pr_et(data, "rxq_%u_pkts", i);
-		data = nfp_pr_et(data, "rxq_%u_bytes", i);
-		data = nfp_pr_et(data, "txq_%u_pkts", i);
-		data = nfp_pr_et(data, "txq_%u_bytes", i);
+		ethtool_sprintf(&data, "rxq_%u_pkts", i);
+		ethtool_sprintf(&data, "rxq_%u_bytes", i);
+		ethtool_sprintf(&data, "txq_%u_pkts", i);
+		ethtool_sprintf(&data, "txq_%u_bytes", i);
 	}
 
 	return data;
@@ -610,15 +599,15 @@ static u8 *nfp_vnic_get_tlv_stats_strings(struct nfp_net *nn, u8 *data)
 			memcpy(data, nfp_tlv_stat_names[id], ETH_GSTRING_LEN);
 			data += ETH_GSTRING_LEN;
 		} else {
-			data = nfp_pr_et(data, "dev_unknown_stat%u", id);
+			ethtool_sprintf(&data, "dev_unknown_stat%u", id);
 		}
 	}
 
 	for (i = 0; i < nn->max_r_vecs; i++) {
-		data = nfp_pr_et(data, "rxq_%u_pkts", i);
-		data = nfp_pr_et(data, "rxq_%u_bytes", i);
-		data = nfp_pr_et(data, "txq_%u_pkts", i);
-		data = nfp_pr_et(data, "txq_%u_bytes", i);
+		ethtool_sprintf(&data, "rxq_%u_pkts", i);
+		ethtool_sprintf(&data, "rxq_%u_bytes", i);
+		ethtool_sprintf(&data, "txq_%u_pkts", i);
+		ethtool_sprintf(&data, "txq_%u_bytes", i);
 	}
 
 	return data;
@@ -666,7 +655,7 @@ static u8 *nfp_mac_get_stats_strings(struct net_device *netdev, u8 *data)
 		return data;
 
 	for (i = 0; i < ARRAY_SIZE(nfp_mac_et_stats); i++)
-		data = nfp_pr_et(data, "mac.%s", nfp_mac_et_stats[i].name);
+		ethtool_sprintf(&data, "mac.%s", nfp_mac_et_stats[i].name);
 
 	return data;
 }
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h b/drivers/net/ethernet/netronome/nfp/nfp_port.h
index d7fd203bb180..ae4da189d955 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h
@@ -92,8 +92,6 @@ struct nfp_port {
 
 extern const struct ethtool_ops nfp_port_ethtool_ops;
 
-__printf(2, 3) u8 *nfp_pr_et(u8 *data, const char *fmt, ...);
-
 int nfp_port_setup_tc(struct net_device *netdev, enum tc_setup_type type,
 		      void *type_data);
 
-- 
cgit v1.2.3


From 83cd23974a73b2c8f91983b7130581211aa13522 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexanderduyck@fb.com>
Date: Tue, 16 Mar 2021 17:31:03 -0700
Subject: hisilicon: Update drivers to use ethtool_sprintf

Update the hisilicon drivers to make use of ethtool_sprintf. The general
idea is to reduce code size and overhead by replacing the repeated pattern
of string printf statements and ETH_STRING_LEN counter increments.

Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c |   9 +-
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c  |  41 +++-----
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c  |  91 +++++++-----------
 .../net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c    |   8 +-
 drivers/net/ethernet/hisilicon/hns/hns_ethtool.c   | 103 ++++++++-------------
 5 files changed, 90 insertions(+), 162 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
index 7fb7a419607d..04878b145626 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
@@ -687,17 +687,14 @@ static void hns_gmac_get_stats(void *mac_drv, u64 *data)
 
 static void hns_gmac_get_strings(u32 stringset, u8 *data)
 {
-	char *buff = (char *)data;
+	u8 *buff = data;
 	u32 i;
 
 	if (stringset != ETH_SS_STATS)
 		return;
 
-	for (i = 0; i < ARRAY_SIZE(g_gmac_stats_string); i++) {
-		snprintf(buff, ETH_GSTRING_LEN, "%s",
-			 g_gmac_stats_string[i].desc);
-		buff = buff + ETH_GSTRING_LEN;
-	}
+	for (i = 0; i < ARRAY_SIZE(g_gmac_stats_string); i++)
+		ethtool_sprintf(&buff, g_gmac_stats_string[i].desc);
 }
 
 static int hns_gmac_get_sset_count(int stringset)
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
index d0f8b1fff333..ff03cafccb66 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
@@ -462,33 +462,22 @@ int hns_ppe_get_regs_count(void)
  */
 void hns_ppe_get_strings(struct hns_ppe_cb *ppe_cb, int stringset, u8 *data)
 {
-	char *buff = (char *)data;
 	int index = ppe_cb->index;
-
-	snprintf(buff, ETH_GSTRING_LEN, "ppe%d_rx_sw_pkt", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "ppe%d_rx_pkt_ok", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "ppe%d_rx_drop_pkt_no_bd", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "ppe%d_rx_alloc_buf_fail", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "ppe%d_rx_alloc_buf_wait", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "ppe%d_rx_pkt_drop_no_buf", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "ppe%d_rx_pkt_err_fifo_full", index);
-	buff = buff + ETH_GSTRING_LEN;
-
-	snprintf(buff, ETH_GSTRING_LEN, "ppe%d_tx_bd", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "ppe%d_tx_pkt", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "ppe%d_tx_pkt_ok", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "ppe%d_tx_pkt_err_fifo_empty", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "ppe%d_tx_pkt_err_csum_fail", index);
+	u8 *buff = data;
+
+	ethtool_sprintf(&buff, "ppe%d_rx_sw_pkt", index);
+	ethtool_sprintf(&buff, "ppe%d_rx_pkt_ok", index);
+	ethtool_sprintf(&buff, "ppe%d_rx_drop_pkt_no_bd", index);
+	ethtool_sprintf(&buff, "ppe%d_rx_alloc_buf_fail", index);
+	ethtool_sprintf(&buff, "ppe%d_rx_alloc_buf_wait", index);
+	ethtool_sprintf(&buff, "ppe%d_rx_pkt_drop_no_buf", index);
+	ethtool_sprintf(&buff, "ppe%d_rx_pkt_err_fifo_full", index);
+
+	ethtool_sprintf(&buff, "ppe%d_tx_bd", index);
+	ethtool_sprintf(&buff, "ppe%d_tx_pkt", index);
+	ethtool_sprintf(&buff, "ppe%d_tx_pkt_ok", index);
+	ethtool_sprintf(&buff, "ppe%d_tx_pkt_err_fifo_empty", index);
+	ethtool_sprintf(&buff, "ppe%d_tx_pkt_err_csum_fail", index);
 }
 
 void hns_ppe_get_stats(struct hns_ppe_cb *ppe_cb, u64 *data)
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
index b6c8910cf7ba..37c8effa421c 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
@@ -929,69 +929,42 @@ int hns_rcb_get_ring_regs_count(void)
  */
 void hns_rcb_get_strings(int stringset, u8 *data, int index)
 {
-	char *buff = (char *)data;
+	u8 *buff = data;
 
 	if (stringset != ETH_SS_STATS)
 		return;
 
-	snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_rcb_pkt_num", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_ppe_tx_pkt_num", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_ppe_drop_pkt_num", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_fbd_num", index);
-	buff = buff + ETH_GSTRING_LEN;
-
-	snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_pkt_num", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_bytes", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_err_cnt", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_io_err", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_sw_err", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_seg_pkt", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_restart_queue", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_tx_busy", index);
-	buff = buff + ETH_GSTRING_LEN;
-
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_rcb_pkt_num", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_ppe_pkt_num", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_ppe_drop_pkt_num", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_fbd_num", index);
-	buff = buff + ETH_GSTRING_LEN;
-
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_pkt_num", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_bytes", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_err_cnt", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_io_err", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_sw_err", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_seg_pkt", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_reuse_pg", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_len_err", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_non_vld_desc_err", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_bd_num_err", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_l2_err", index);
-	buff = buff + ETH_GSTRING_LEN;
-	snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_l3l4csum_err", index);
+	ethtool_sprintf(&buff, "tx_ring%d_rcb_pkt_num", index);
+	ethtool_sprintf(&buff, "tx_ring%d_ppe_tx_pkt_num", index);
+	ethtool_sprintf(&buff, "tx_ring%d_ppe_drop_pkt_num", index);
+	ethtool_sprintf(&buff, "tx_ring%d_fbd_num", index);
+
+	ethtool_sprintf(&buff, "tx_ring%d_pkt_num", index);
+	ethtool_sprintf(&buff, "tx_ring%d_bytes", index);
+	ethtool_sprintf(&buff, "tx_ring%d_err_cnt", index);
+	ethtool_sprintf(&buff, "tx_ring%d_io_err", index);
+	ethtool_sprintf(&buff, "tx_ring%d_sw_err", index);
+	ethtool_sprintf(&buff, "tx_ring%d_seg_pkt", index);
+	ethtool_sprintf(&buff, "tx_ring%d_restart_queue", index);
+	ethtool_sprintf(&buff, "tx_ring%d_tx_busy", index);
+
+	ethtool_sprintf(&buff, "rx_ring%d_rcb_pkt_num", index);
+	ethtool_sprintf(&buff, "rx_ring%d_ppe_pkt_num", index);
+	ethtool_sprintf(&buff, "rx_ring%d_ppe_drop_pkt_num", index);
+	ethtool_sprintf(&buff, "rx_ring%d_fbd_num", index);
+
+	ethtool_sprintf(&buff, "rx_ring%d_pkt_num", index);
+	ethtool_sprintf(&buff, "rx_ring%d_bytes", index);
+	ethtool_sprintf(&buff, "rx_ring%d_err_cnt", index);
+	ethtool_sprintf(&buff, "rx_ring%d_io_err", index);
+	ethtool_sprintf(&buff, "rx_ring%d_sw_err", index);
+	ethtool_sprintf(&buff, "rx_ring%d_seg_pkt", index);
+	ethtool_sprintf(&buff, "rx_ring%d_reuse_pg", index);
+	ethtool_sprintf(&buff, "rx_ring%d_len_err", index);
+	ethtool_sprintf(&buff, "rx_ring%d_non_vld_desc_err", index);
+	ethtool_sprintf(&buff, "rx_ring%d_bd_num_err", index);
+	ethtool_sprintf(&buff, "rx_ring%d_l2_err", index);
+	ethtool_sprintf(&buff, "rx_ring%d_l3l4csum_err", index);
 }
 
 void hns_rcb_get_common_regs(struct rcb_common_cb *rcb_com, void *data)
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
index 7e3609ce112a..f5145451ba6f 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
@@ -758,16 +758,14 @@ static void hns_xgmac_get_stats(void *mac_drv, u64 *data)
  */
 static void hns_xgmac_get_strings(u32 stringset, u8 *data)
 {
-	char *buff = (char *)data;
+	u8 *buff = data;
 	u32 i;
 
 	if (stringset != ETH_SS_STATS)
 		return;
 
-	for (i = 0; i < ARRAY_SIZE(g_xgmac_stats_string); i++) {
-		snprintf(buff, ETH_GSTRING_LEN, g_xgmac_stats_string[i].desc);
-		buff = buff + ETH_GSTRING_LEN;
-	}
+	for (i = 0; i < ARRAY_SIZE(g_xgmac_stats_string); i++)
+		ethtool_sprintf(&buff, g_xgmac_stats_string[i].desc);
 }
 
 /**
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index a6e3f07caf99..1af664779d33 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -895,7 +895,7 @@ static void hns_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 {
 	struct hns_nic_priv *priv = netdev_priv(netdev);
 	struct hnae_handle *h = priv->ae_handle;
-	char *buff = (char *)data;
+	u8 *buff = data;
 
 	if (!h->dev->ops->get_strings) {
 		netdev_err(netdev, "h->dev->ops->get_strings is null!\n");
@@ -903,74 +903,45 @@ static void hns_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 	}
 
 	if (stringset == ETH_SS_TEST) {
-		if (priv->ae_handle->phy_if != PHY_INTERFACE_MODE_XGMII) {
-			memcpy(buff, hns_nic_test_strs[MAC_INTERNALLOOP_MAC],
-			       ETH_GSTRING_LEN);
-			buff += ETH_GSTRING_LEN;
-		}
-		memcpy(buff, hns_nic_test_strs[MAC_INTERNALLOOP_SERDES],
-		       ETH_GSTRING_LEN);
-		buff += ETH_GSTRING_LEN;
+		if (priv->ae_handle->phy_if != PHY_INTERFACE_MODE_XGMII)
+			ethtool_sprintf(&buff,
+					hns_nic_test_strs[MAC_INTERNALLOOP_MAC]);
+		ethtool_sprintf(&buff,
+				hns_nic_test_strs[MAC_INTERNALLOOP_SERDES]);
 		if ((netdev->phydev) && (!netdev->phydev->is_c45))
-			memcpy(buff, hns_nic_test_strs[MAC_INTERNALLOOP_PHY],
-			       ETH_GSTRING_LEN);
+			ethtool_sprintf(&buff,
+					hns_nic_test_strs[MAC_INTERNALLOOP_PHY]);
 
 	} else {
-		snprintf(buff, ETH_GSTRING_LEN, "rx_packets");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "tx_packets");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "rx_bytes");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "tx_bytes");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "rx_errors");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "tx_errors");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "rx_dropped");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "tx_dropped");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "multicast");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "collisions");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "rx_over_errors");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "rx_crc_errors");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "rx_frame_errors");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "rx_fifo_errors");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "rx_missed_errors");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "tx_aborted_errors");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "tx_carrier_errors");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "tx_fifo_errors");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "tx_heartbeat_errors");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "rx_length_errors");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "tx_window_errors");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "rx_compressed");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "tx_compressed");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "netdev_rx_dropped");
-		buff = buff + ETH_GSTRING_LEN;
-		snprintf(buff, ETH_GSTRING_LEN, "netdev_tx_dropped");
-		buff = buff + ETH_GSTRING_LEN;
-
-		snprintf(buff, ETH_GSTRING_LEN, "netdev_tx_timeout");
-		buff = buff + ETH_GSTRING_LEN;
-
-		h->dev->ops->get_strings(h, stringset, (u8 *)buff);
+		ethtool_sprintf(&buff, "rx_packets");
+		ethtool_sprintf(&buff, "tx_packets");
+		ethtool_sprintf(&buff, "rx_bytes");
+		ethtool_sprintf(&buff, "tx_bytes");
+		ethtool_sprintf(&buff, "rx_errors");
+		ethtool_sprintf(&buff, "tx_errors");
+		ethtool_sprintf(&buff, "rx_dropped");
+		ethtool_sprintf(&buff, "tx_dropped");
+		ethtool_sprintf(&buff, "multicast");
+		ethtool_sprintf(&buff, "collisions");
+		ethtool_sprintf(&buff, "rx_over_errors");
+		ethtool_sprintf(&buff, "rx_crc_errors");
+		ethtool_sprintf(&buff, "rx_frame_errors");
+		ethtool_sprintf(&buff, "rx_fifo_errors");
+		ethtool_sprintf(&buff, "rx_missed_errors");
+		ethtool_sprintf(&buff, "tx_aborted_errors");
+		ethtool_sprintf(&buff, "tx_carrier_errors");
+		ethtool_sprintf(&buff, "tx_fifo_errors");
+		ethtool_sprintf(&buff, "tx_heartbeat_errors");
+		ethtool_sprintf(&buff, "rx_length_errors");
+		ethtool_sprintf(&buff, "tx_window_errors");
+		ethtool_sprintf(&buff, "rx_compressed");
+		ethtool_sprintf(&buff, "tx_compressed");
+		ethtool_sprintf(&buff, "netdev_rx_dropped");
+		ethtool_sprintf(&buff, "netdev_tx_dropped");
+
+		ethtool_sprintf(&buff, "netdev_tx_timeout");
+
+		h->dev->ops->get_strings(h, stringset, buff);
 	}
 }
 
-- 
cgit v1.2.3


From efbbe4fb5976bfbba2ac3dbfe52505690e4993c3 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexanderduyck@fb.com>
Date: Tue, 16 Mar 2021 17:31:11 -0700
Subject: ena: Update driver to use ethtool_sprintf

Replace instances of snprintf or memcpy with a pointer update with
ethtool_sprintf.

Acked-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_ethtool.c | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index d6cc7aa612b7..2fe7ccee55b2 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -251,10 +251,10 @@ static void ena_queue_strings(struct ena_adapter *adapter, u8 **data)
 		for (j = 0; j < ENA_STATS_ARRAY_TX; j++) {
 			ena_stats = &ena_stats_tx_strings[j];
 
-			snprintf(*data, ETH_GSTRING_LEN,
-				 "queue_%u_%s_%s", i,
-				 is_xdp ? "xdp_tx" : "tx", ena_stats->name);
-			(*data) += ETH_GSTRING_LEN;
+			ethtool_sprintf(data,
+					"queue_%u_%s_%s", i,
+					is_xdp ? "xdp_tx" : "tx",
+					ena_stats->name);
 		}
 
 		if (!is_xdp) {
@@ -264,9 +264,9 @@ static void ena_queue_strings(struct ena_adapter *adapter, u8 **data)
 			for (j = 0; j < ENA_STATS_ARRAY_RX; j++) {
 				ena_stats = &ena_stats_rx_strings[j];
 
-				snprintf(*data, ETH_GSTRING_LEN,
-					 "queue_%u_rx_%s", i, ena_stats->name);
-				(*data) += ETH_GSTRING_LEN;
+				ethtool_sprintf(data,
+						"queue_%u_rx_%s", i,
+						ena_stats->name);
 			}
 		}
 	}
@@ -280,9 +280,8 @@ static void ena_com_dev_strings(u8 **data)
 	for (i = 0; i < ENA_STATS_ARRAY_ENA_COM; i++) {
 		ena_stats = &ena_stats_ena_com_strings[i];
 
-		snprintf(*data, ETH_GSTRING_LEN,
-			 "ena_admin_q_%s", ena_stats->name);
-		(*data) += ETH_GSTRING_LEN;
+		ethtool_sprintf(data,
+				"ena_admin_q_%s", ena_stats->name);
 	}
 }
 
@@ -295,15 +294,13 @@ static void ena_get_strings(struct ena_adapter *adapter,
 
 	for (i = 0; i < ENA_STATS_ARRAY_GLOBAL; i++) {
 		ena_stats = &ena_stats_global_strings[i];
-		memcpy(data, ena_stats->name, ETH_GSTRING_LEN);
-		data += ETH_GSTRING_LEN;
+		ethtool_sprintf(&data, ena_stats->name);
 	}
 
 	if (eni_stats_needed) {
 		for (i = 0; i < ENA_STATS_ARRAY_ENI(adapter); i++) {
 			ena_stats = &ena_stats_eni_strings[i];
-			memcpy(data, ena_stats->name, ETH_GSTRING_LEN);
-			data += ETH_GSTRING_LEN;
+			ethtool_sprintf(&data, ena_stats->name);
 		}
 	}
 
-- 
cgit v1.2.3


From 3ae0ed376d1c2981715c540ba7ad3bf43fec244c Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexanderduyck@fb.com>
Date: Tue, 16 Mar 2021 17:31:20 -0700
Subject: netvsc: Update driver to use ethtool_sprintf

Replace instances of sprintf or memcpy with a pointer update with
ethtool_sprintf.

Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/netvsc_drv.c | 33 +++++++++++----------------------
 1 file changed, 11 insertions(+), 22 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 15f262b70489..97b5c9b60503 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1612,34 +1612,23 @@ static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 
 	switch (stringset) {
 	case ETH_SS_STATS:
-		for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++) {
-			memcpy(p, netvsc_stats[i].name, ETH_GSTRING_LEN);
-			p += ETH_GSTRING_LEN;
-		}
+		for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++)
+			ethtool_sprintf(&p, netvsc_stats[i].name);
 
-		for (i = 0; i < ARRAY_SIZE(vf_stats); i++) {
-			memcpy(p, vf_stats[i].name, ETH_GSTRING_LEN);
-			p += ETH_GSTRING_LEN;
-		}
+		for (i = 0; i < ARRAY_SIZE(vf_stats); i++)
+			ethtool_sprintf(&p, vf_stats[i].name);
 
 		for (i = 0; i < nvdev->num_chn; i++) {
-			sprintf(p, "tx_queue_%u_packets", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "tx_queue_%u_bytes", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_packets", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_bytes", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rx_queue_%u_xdp_drop", i);
-			p += ETH_GSTRING_LEN;
+			ethtool_sprintf(&p, "tx_queue_%u_packets", i);
+			ethtool_sprintf(&p, "tx_queue_%u_bytes", i);
+			ethtool_sprintf(&p, "rx_queue_%u_packets", i);
+			ethtool_sprintf(&p, "rx_queue_%u_bytes", i);
+			ethtool_sprintf(&p, "rx_queue_%u_xdp_drop", i);
 		}
 
 		for_each_present_cpu(cpu) {
-			for (i = 0; i < ARRAY_SIZE(pcpu_stats); i++) {
-				sprintf(p, pcpu_stats[i].name, cpu);
-				p += ETH_GSTRING_LEN;
-			}
+			for (i = 0; i < ARRAY_SIZE(pcpu_stats); i++)
+				ethtool_sprintf(&p, pcpu_stats[i].name, cpu);
 		}
 
 		break;
-- 
cgit v1.2.3


From d7a9a01b4e21b9c49559c7e66e6c0dbc7417b7fe Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexanderduyck@fb.com>
Date: Tue, 16 Mar 2021 17:31:29 -0700
Subject: virtio_net: Update driver to use ethtool_sprintf

Update the code to replace instances of snprintf and a pointer update with
just calling ethtool_sprintf.

Also replace the char pointer with a u8 pointer to avoid having to recast
the pointer type.

Acked-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index e97288dd6e5a..77ba8e2fc11c 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2138,25 +2138,21 @@ static int virtnet_set_channels(struct net_device *dev,
 static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
-	char *p = (char *)data;
 	unsigned int i, j;
+	u8 *p = data;
 
 	switch (stringset) {
 	case ETH_SS_STATS:
 		for (i = 0; i < vi->curr_queue_pairs; i++) {
-			for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) {
-				snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_%s",
-					 i, virtnet_rq_stats_desc[j].desc);
-				p += ETH_GSTRING_LEN;
-			}
+			for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++)
+				ethtool_sprintf(&p, "rx_queue_%u_%s", i,
+						virtnet_rq_stats_desc[j].desc);
 		}
 
 		for (i = 0; i < vi->curr_queue_pairs; i++) {
-			for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) {
-				snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_%s",
-					 i, virtnet_sq_stats_desc[j].desc);
-				p += ETH_GSTRING_LEN;
-			}
+			for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++)
+				ethtool_sprintf(&p, "tx_queue_%u_%s", i,
+						virtnet_sq_stats_desc[j].desc);
 		}
 		break;
 	}
-- 
cgit v1.2.3


From 3b78b3067f386e5943a30112f866cd65b3de2d8b Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexanderduyck@fb.com>
Date: Tue, 16 Mar 2021 17:31:37 -0700
Subject: vmxnet3: Update driver to use ethtool_sprintf

So this patch actually does 3 things.

First it removes a stray white space at the start of the variable
declaration in vmxnet3_get_strings.

Second it flips the logic for the string test so that we exit immediately
if we are not looking for the stats strings. Doing this we can avoid
unnecessary indentation and line wrapping.

Then finally it updates the code to use ethtool_sprintf rather than a
memcpy and pointer increment to write the ethtool strings.

Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vmxnet3/vmxnet3_ethtool.c | 53 +++++++++++++----------------------
 1 file changed, 19 insertions(+), 34 deletions(-)

diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c
index 7ec8652f2c26..c0bd9cbc43b1 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c
@@ -218,43 +218,28 @@ vmxnet3_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
 static void
 vmxnet3_get_strings(struct net_device *netdev, u32 stringset, u8 *buf)
 {
-	 struct vmxnet3_adapter *adapter = netdev_priv(netdev);
-	if (stringset == ETH_SS_STATS) {
-		int i, j;
-		for (j = 0; j < adapter->num_tx_queues; j++) {
-			for (i = 0; i < ARRAY_SIZE(vmxnet3_tq_dev_stats); i++) {
-				memcpy(buf, vmxnet3_tq_dev_stats[i].desc,
-				       ETH_GSTRING_LEN);
-				buf += ETH_GSTRING_LEN;
-			}
-			for (i = 0; i < ARRAY_SIZE(vmxnet3_tq_driver_stats);
-			     i++) {
-				memcpy(buf, vmxnet3_tq_driver_stats[i].desc,
-				       ETH_GSTRING_LEN);
-				buf += ETH_GSTRING_LEN;
-			}
-		}
+	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
+	int i, j;
 
-		for (j = 0; j < adapter->num_rx_queues; j++) {
-			for (i = 0; i < ARRAY_SIZE(vmxnet3_rq_dev_stats); i++) {
-				memcpy(buf, vmxnet3_rq_dev_stats[i].desc,
-				       ETH_GSTRING_LEN);
-				buf += ETH_GSTRING_LEN;
-			}
-			for (i = 0; i < ARRAY_SIZE(vmxnet3_rq_driver_stats);
-			     i++) {
-				memcpy(buf, vmxnet3_rq_driver_stats[i].desc,
-				       ETH_GSTRING_LEN);
-				buf += ETH_GSTRING_LEN;
-			}
-		}
+	if (stringset != ETH_SS_STATS)
+		return;
 
-		for (i = 0; i < ARRAY_SIZE(vmxnet3_global_stats); i++) {
-			memcpy(buf, vmxnet3_global_stats[i].desc,
-				ETH_GSTRING_LEN);
-			buf += ETH_GSTRING_LEN;
-		}
+	for (j = 0; j < adapter->num_tx_queues; j++) {
+		for (i = 0; i < ARRAY_SIZE(vmxnet3_tq_dev_stats); i++)
+			ethtool_sprintf(&buf, vmxnet3_tq_dev_stats[i].desc);
+		for (i = 0; i < ARRAY_SIZE(vmxnet3_tq_driver_stats); i++)
+			ethtool_sprintf(&buf, vmxnet3_tq_driver_stats[i].desc);
+	}
+
+	for (j = 0; j < adapter->num_rx_queues; j++) {
+		for (i = 0; i < ARRAY_SIZE(vmxnet3_rq_dev_stats); i++)
+			ethtool_sprintf(&buf, vmxnet3_rq_dev_stats[i].desc);
+		for (i = 0; i < ARRAY_SIZE(vmxnet3_rq_driver_stats); i++)
+			ethtool_sprintf(&buf, vmxnet3_rq_driver_stats[i].desc);
 	}
+
+	for (i = 0; i < ARRAY_SIZE(vmxnet3_global_stats); i++)
+		ethtool_sprintf(&buf, vmxnet3_global_stats[i].desc);
 }
 
 netdev_features_t vmxnet3_fix_features(struct net_device *netdev,
-- 
cgit v1.2.3


From b82e8118c540cc0d1e3f542bad2e492b600b4a7e Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexanderduyck@fb.com>
Date: Tue, 16 Mar 2021 17:31:46 -0700
Subject: bna: Update driver to use ethtool_sprintf

Update the bnad_get_strings to make use of ethtool_sprintf and avoid
unnecessary line wrapping. To do this we invert the logic for the string
set test and instead exit immediately if we are not working with the stats
strings. In addition the function is broken up into subfunctions for each
area so that we can simply call ethtool_sprintf once for each string in a
given subsection.

Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/brocade/bna/bnad_ethtool.c | 266 ++++++++++--------------
 1 file changed, 105 insertions(+), 161 deletions(-)

diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
index 588c4804d10a..265c2fa6bbe0 100644
--- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
+++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
@@ -524,6 +524,68 @@ bnad_set_pauseparam(struct net_device *netdev,
 	return 0;
 }
 
+static void bnad_get_txf_strings(u8 **string, int f_num)
+{
+	ethtool_sprintf(string, "txf%d_ucast_octets", f_num);
+	ethtool_sprintf(string, "txf%d_ucast", f_num);
+	ethtool_sprintf(string, "txf%d_ucast_vlan", f_num);
+	ethtool_sprintf(string, "txf%d_mcast_octets", f_num);
+	ethtool_sprintf(string, "txf%d_mcast", f_num);
+	ethtool_sprintf(string, "txf%d_mcast_vlan", f_num);
+	ethtool_sprintf(string, "txf%d_bcast_octets", f_num);
+	ethtool_sprintf(string, "txf%d_bcast", f_num);
+	ethtool_sprintf(string, "txf%d_bcast_vlan", f_num);
+	ethtool_sprintf(string, "txf%d_errors", f_num);
+	ethtool_sprintf(string, "txf%d_filter_vlan", f_num);
+	ethtool_sprintf(string, "txf%d_filter_mac_sa", f_num);
+}
+
+static void bnad_get_rxf_strings(u8 **string, int f_num)
+{
+	ethtool_sprintf(string, "rxf%d_ucast_octets", f_num);
+	ethtool_sprintf(string, "rxf%d_ucast", f_num);
+	ethtool_sprintf(string, "rxf%d_ucast_vlan", f_num);
+	ethtool_sprintf(string, "rxf%d_mcast_octets", f_num);
+	ethtool_sprintf(string, "rxf%d_mcast", f_num);
+	ethtool_sprintf(string, "rxf%d_mcast_vlan", f_num);
+	ethtool_sprintf(string, "rxf%d_bcast_octets", f_num);
+	ethtool_sprintf(string, "rxf%d_bcast", f_num);
+	ethtool_sprintf(string, "rxf%d_bcast_vlan", f_num);
+	ethtool_sprintf(string, "rxf%d_frame_drops", f_num);
+}
+
+static void bnad_get_cq_strings(u8 **string, int q_num)
+{
+	ethtool_sprintf(string, "cq%d_producer_index", q_num);
+	ethtool_sprintf(string, "cq%d_consumer_index", q_num);
+	ethtool_sprintf(string, "cq%d_hw_producer_index", q_num);
+	ethtool_sprintf(string, "cq%d_intr", q_num);
+	ethtool_sprintf(string, "cq%d_poll", q_num);
+	ethtool_sprintf(string, "cq%d_schedule", q_num);
+	ethtool_sprintf(string, "cq%d_keep_poll", q_num);
+	ethtool_sprintf(string, "cq%d_complete", q_num);
+}
+
+static void bnad_get_rxq_strings(u8 **string, int q_num)
+{
+	ethtool_sprintf(string, "rxq%d_packets", q_num);
+	ethtool_sprintf(string, "rxq%d_bytes", q_num);
+	ethtool_sprintf(string, "rxq%d_packets_with_error", q_num);
+	ethtool_sprintf(string, "rxq%d_allocbuf_failed", q_num);
+	ethtool_sprintf(string, "rxq%d_mapbuf_failed", q_num);
+	ethtool_sprintf(string, "rxq%d_producer_index", q_num);
+	ethtool_sprintf(string, "rxq%d_consumer_index", q_num);
+}
+
+static void bnad_get_txq_strings(u8 **string, int q_num)
+{
+	ethtool_sprintf(string, "txq%d_packets", q_num);
+	ethtool_sprintf(string, "txq%d_bytes", q_num);
+	ethtool_sprintf(string, "txq%d_producer_index", q_num);
+	ethtool_sprintf(string, "txq%d_consumer_index", q_num);
+	ethtool_sprintf(string, "txq%d_hw_consumer_index", q_num);
+}
+
 static void
 bnad_get_strings(struct net_device *netdev, u32 stringset, u8 *string)
 {
@@ -531,175 +593,57 @@ bnad_get_strings(struct net_device *netdev, u32 stringset, u8 *string)
 	int i, j, q_num;
 	u32 bmap;
 
+	if (stringset != ETH_SS_STATS)
+		return;
+
 	mutex_lock(&bnad->conf_mutex);
 
-	switch (stringset) {
-	case ETH_SS_STATS:
-		for (i = 0; i < BNAD_ETHTOOL_STATS_NUM; i++) {
-			BUG_ON(!(strlen(bnad_net_stats_strings[i]) <
-				   ETH_GSTRING_LEN));
-			strncpy(string, bnad_net_stats_strings[i],
-				ETH_GSTRING_LEN);
-			string += ETH_GSTRING_LEN;
-		}
-		bmap = bna_tx_rid_mask(&bnad->bna);
-		for (i = 0; bmap; i++) {
-			if (bmap & 1) {
-				sprintf(string, "txf%d_ucast_octets", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txf%d_ucast", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txf%d_ucast_vlan", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txf%d_mcast_octets", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txf%d_mcast", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txf%d_mcast_vlan", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txf%d_bcast_octets", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txf%d_bcast", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txf%d_bcast_vlan", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txf%d_errors", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txf%d_filter_vlan", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txf%d_filter_mac_sa", i);
-				string += ETH_GSTRING_LEN;
-			}
-			bmap >>= 1;
-		}
+	for (i = 0; i < BNAD_ETHTOOL_STATS_NUM; i++) {
+		BUG_ON(!(strlen(bnad_net_stats_strings[i]) < ETH_GSTRING_LEN));
+		ethtool_sprintf(&string, bnad_net_stats_strings[i]);
+	}
 
-		bmap = bna_rx_rid_mask(&bnad->bna);
-		for (i = 0; bmap; i++) {
-			if (bmap & 1) {
-				sprintf(string, "rxf%d_ucast_octets", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxf%d_ucast", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxf%d_ucast_vlan", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxf%d_mcast_octets", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxf%d_mcast", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxf%d_mcast_vlan", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxf%d_bcast_octets", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxf%d_bcast", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxf%d_bcast_vlan", i);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxf%d_frame_drops", i);
-				string += ETH_GSTRING_LEN;
-			}
-			bmap >>= 1;
-		}
+	bmap = bna_tx_rid_mask(&bnad->bna);
+	for (i = 0; bmap; i++) {
+		if (bmap & 1)
+			bnad_get_txf_strings(&string, i);
+		bmap >>= 1;
+	}
 
-		q_num = 0;
-		for (i = 0; i < bnad->num_rx; i++) {
-			if (!bnad->rx_info[i].rx)
-				continue;
-			for (j = 0; j < bnad->num_rxp_per_rx; j++) {
-				sprintf(string, "cq%d_producer_index", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "cq%d_consumer_index", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "cq%d_hw_producer_index",
-					q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "cq%d_intr", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "cq%d_poll", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "cq%d_schedule", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "cq%d_keep_poll", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "cq%d_complete", q_num);
-				string += ETH_GSTRING_LEN;
-				q_num++;
-			}
-		}
+	bmap = bna_rx_rid_mask(&bnad->bna);
+	for (i = 0; bmap; i++, bmap >>= 1) {
+		if (bmap & 1)
+			bnad_get_rxf_strings(&string, i);
+		bmap >>= 1;
+	}
 
-		q_num = 0;
-		for (i = 0; i < bnad->num_rx; i++) {
-			if (!bnad->rx_info[i].rx)
-				continue;
-			for (j = 0; j < bnad->num_rxp_per_rx; j++) {
-				sprintf(string, "rxq%d_packets", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxq%d_bytes", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxq%d_packets_with_error",
-								q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxq%d_allocbuf_failed", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxq%d_mapbuf_failed", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxq%d_producer_index", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "rxq%d_consumer_index", q_num);
-				string += ETH_GSTRING_LEN;
-				q_num++;
-				if (bnad->rx_info[i].rx_ctrl[j].ccb &&
-					bnad->rx_info[i].rx_ctrl[j].ccb->
-					rcb[1] &&
-					bnad->rx_info[i].rx_ctrl[j].ccb->
-					rcb[1]->rxq) {
-					sprintf(string, "rxq%d_packets", q_num);
-					string += ETH_GSTRING_LEN;
-					sprintf(string, "rxq%d_bytes", q_num);
-					string += ETH_GSTRING_LEN;
-					sprintf(string,
-					"rxq%d_packets_with_error", q_num);
-					string += ETH_GSTRING_LEN;
-					sprintf(string, "rxq%d_allocbuf_failed",
-								q_num);
-					string += ETH_GSTRING_LEN;
-					sprintf(string, "rxq%d_mapbuf_failed",
-						q_num);
-					string += ETH_GSTRING_LEN;
-					sprintf(string, "rxq%d_producer_index",
-								q_num);
-					string += ETH_GSTRING_LEN;
-					sprintf(string, "rxq%d_consumer_index",
-								q_num);
-					string += ETH_GSTRING_LEN;
-					q_num++;
-				}
-			}
-		}
+	q_num = 0;
+	for (i = 0; i < bnad->num_rx; i++) {
+		if (!bnad->rx_info[i].rx)
+			continue;
+		for (j = 0; j < bnad->num_rxp_per_rx; j++)
+			bnad_get_cq_strings(&string, q_num++);
+	}
 
-		q_num = 0;
-		for (i = 0; i < bnad->num_tx; i++) {
-			if (!bnad->tx_info[i].tx)
-				continue;
-			for (j = 0; j < bnad->num_txq_per_tx; j++) {
-				sprintf(string, "txq%d_packets", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txq%d_bytes", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txq%d_producer_index", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txq%d_consumer_index", q_num);
-				string += ETH_GSTRING_LEN;
-				sprintf(string, "txq%d_hw_consumer_index",
-									q_num);
-				string += ETH_GSTRING_LEN;
-				q_num++;
-			}
+	q_num = 0;
+	for (i = 0; i < bnad->num_rx; i++) {
+		if (!bnad->rx_info[i].rx)
+			continue;
+		for (j = 0; j < bnad->num_rxp_per_rx; j++) {
+			bnad_get_rxq_strings(&string, q_num++);
+			if (bnad->rx_info[i].rx_ctrl[j].ccb &&
+			    bnad->rx_info[i].rx_ctrl[j].ccb->rcb[1] &&
+			    bnad->rx_info[i].rx_ctrl[j].ccb->rcb[1]->rxq)
+				bnad_get_rxq_strings(&string, q_num++);
 		}
+	}
 
-		break;
-
-	default:
-		break;
+	q_num = 0;
+	for (i = 0; i < bnad->num_tx; i++) {
+		if (!bnad->tx_info[i].tx)
+			continue;
+		for (j = 0; j < bnad->num_txq_per_tx; j++)
+			bnad_get_txq_strings(&string, q_num++);
 	}
 
 	mutex_unlock(&bnad->conf_mutex);
-- 
cgit v1.2.3


From acebe5b6107c73aade7f3beca441329ff04823a3 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexanderduyck@fb.com>
Date: Tue, 16 Mar 2021 17:31:55 -0700
Subject: ionic: Update driver to use ethtool_sprintf

Update the ionic driver to make use of ethtool_sprintf. In addition add
separate functions for Tx/Rx stats strings in order to reduce the total
amount of indenting needed in the driver code.

Acked-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_stats.c | 145 +++++++++-------------
 1 file changed, 60 insertions(+), 85 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_stats.c b/drivers/net/ethernet/pensando/ionic/ionic_stats.c
index 6ae75b771a15..308b4ac6c57b 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_stats.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_stats.c
@@ -246,98 +246,73 @@ static u64 ionic_sw_stats_get_count(struct ionic_lif *lif)
 	return total;
 }
 
+static void ionic_sw_stats_get_tx_strings(struct ionic_lif *lif, u8 **buf,
+					  int q_num)
+{
+	int i;
+
+	for (i = 0; i < IONIC_NUM_TX_STATS; i++)
+		ethtool_sprintf(buf, "tx_%d_%s", q_num,
+				ionic_tx_stats_desc[i].name);
+
+	if (!test_bit(IONIC_LIF_F_UP, lif->state) ||
+	    !test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state))
+		return;
+
+	for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++)
+		ethtool_sprintf(buf, "txq_%d_%s", q_num,
+				ionic_txq_stats_desc[i].name);
+	for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++)
+		ethtool_sprintf(buf, "txq_%d_cq_%s", q_num,
+				ionic_dbg_cq_stats_desc[i].name);
+	for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++)
+		ethtool_sprintf(buf, "txq_%d_intr_%s", q_num,
+				ionic_dbg_intr_stats_desc[i].name);
+	for (i = 0; i < IONIC_MAX_NUM_SG_CNTR; i++)
+		ethtool_sprintf(buf, "txq_%d_sg_cntr_%d", q_num, i);
+}
+
+static void ionic_sw_stats_get_rx_strings(struct ionic_lif *lif, u8 **buf,
+					  int q_num)
+{
+	int i;
+
+	for (i = 0; i < IONIC_NUM_RX_STATS; i++)
+		ethtool_sprintf(buf, "rx_%d_%s", q_num,
+				ionic_rx_stats_desc[i].name);
+
+	if (!test_bit(IONIC_LIF_F_UP, lif->state) ||
+	    !test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state))
+		return;
+
+	for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++)
+		ethtool_sprintf(buf, "rxq_%d_cq_%s", q_num,
+				ionic_dbg_cq_stats_desc[i].name);
+	for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++)
+		ethtool_sprintf(buf, "rxq_%d_intr_%s", q_num,
+				ionic_dbg_intr_stats_desc[i].name);
+	for (i = 0; i < IONIC_NUM_DBG_NAPI_STATS; i++)
+		ethtool_sprintf(buf, "rxq_%d_napi_%s", q_num,
+				ionic_dbg_napi_stats_desc[i].name);
+	for (i = 0; i < IONIC_MAX_NUM_NAPI_CNTR; i++)
+		ethtool_sprintf(buf, "rxq_%d_napi_work_done_%d", q_num, i);
+}
+
 static void ionic_sw_stats_get_strings(struct ionic_lif *lif, u8 **buf)
 {
 	int i, q_num;
 
-	for (i = 0; i < IONIC_NUM_LIF_STATS; i++) {
-		snprintf(*buf, ETH_GSTRING_LEN, ionic_lif_stats_desc[i].name);
-		*buf += ETH_GSTRING_LEN;
-	}
+	for (i = 0; i < IONIC_NUM_LIF_STATS; i++)
+		ethtool_sprintf(buf, ionic_lif_stats_desc[i].name);
 
-	for (i = 0; i < IONIC_NUM_PORT_STATS; i++) {
-		snprintf(*buf, ETH_GSTRING_LEN,
-			 ionic_port_stats_desc[i].name);
-		*buf += ETH_GSTRING_LEN;
-	}
+	for (i = 0; i < IONIC_NUM_PORT_STATS; i++)
+		ethtool_sprintf(buf, ionic_port_stats_desc[i].name);
 
-	for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
-		for (i = 0; i < IONIC_NUM_TX_STATS; i++) {
-			snprintf(*buf, ETH_GSTRING_LEN, "tx_%d_%s",
-				 q_num, ionic_tx_stats_desc[i].name);
-			*buf += ETH_GSTRING_LEN;
-		}
+	for (q_num = 0; q_num < MAX_Q(lif); q_num++)
+		ionic_sw_stats_get_tx_strings(lif, buf, q_num);
 
-		if (test_bit(IONIC_LIF_F_UP, lif->state) &&
-		    test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state)) {
-			for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) {
-				snprintf(*buf, ETH_GSTRING_LEN,
-					 "txq_%d_%s",
-					 q_num,
-					 ionic_txq_stats_desc[i].name);
-				*buf += ETH_GSTRING_LEN;
-			}
-			for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) {
-				snprintf(*buf, ETH_GSTRING_LEN,
-					 "txq_%d_cq_%s",
-					 q_num,
-					 ionic_dbg_cq_stats_desc[i].name);
-				*buf += ETH_GSTRING_LEN;
-			}
-			for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) {
-				snprintf(*buf, ETH_GSTRING_LEN,
-					 "txq_%d_intr_%s",
-					 q_num,
-					 ionic_dbg_intr_stats_desc[i].name);
-				*buf += ETH_GSTRING_LEN;
-			}
-			for (i = 0; i < IONIC_MAX_NUM_SG_CNTR; i++) {
-				snprintf(*buf, ETH_GSTRING_LEN,
-					 "txq_%d_sg_cntr_%d",
-					 q_num, i);
-				*buf += ETH_GSTRING_LEN;
-			}
-		}
-	}
-	for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
-		for (i = 0; i < IONIC_NUM_RX_STATS; i++) {
-			snprintf(*buf, ETH_GSTRING_LEN,
-				 "rx_%d_%s",
-				 q_num, ionic_rx_stats_desc[i].name);
-			*buf += ETH_GSTRING_LEN;
-		}
-
-		if (test_bit(IONIC_LIF_F_UP, lif->state) &&
-		    test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state)) {
-			for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) {
-				snprintf(*buf, ETH_GSTRING_LEN,
-					 "rxq_%d_cq_%s",
-					 q_num,
-					 ionic_dbg_cq_stats_desc[i].name);
-				*buf += ETH_GSTRING_LEN;
-			}
-			for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) {
-				snprintf(*buf, ETH_GSTRING_LEN,
-					 "rxq_%d_intr_%s",
-					 q_num,
-					 ionic_dbg_intr_stats_desc[i].name);
-				*buf += ETH_GSTRING_LEN;
-			}
-			for (i = 0; i < IONIC_NUM_DBG_NAPI_STATS; i++) {
-				snprintf(*buf, ETH_GSTRING_LEN,
-					 "rxq_%d_napi_%s",
-					 q_num,
-					 ionic_dbg_napi_stats_desc[i].name);
-				*buf += ETH_GSTRING_LEN;
-			}
-			for (i = 0; i < IONIC_MAX_NUM_NAPI_CNTR; i++) {
-				snprintf(*buf, ETH_GSTRING_LEN,
-					 "rxq_%d_napi_work_done_%d",
-					 q_num, i);
-				*buf += ETH_GSTRING_LEN;
-			}
-		}
-	}
+	for (q_num = 0; q_num < MAX_Q(lif); q_num++)
+		ionic_sw_stats_get_rx_strings(lif, buf, q_num);
 }
 
 static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf)
-- 
cgit v1.2.3


From 998d3907f419ed5c27728161d34f5ed14be53fc0 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 16 Mar 2021 22:06:08 -0400
Subject: tipc: re-organize members of struct publication

In a future commit we will introduce more members to struct publication.
In order to keep this structure comprehensible we now group some of
its current fields into the sub-structures where they really belong,
- A struct tipc_service_range for the functional address the publication
  is representing.
- A struct tipc_socket_addr for the socket bound to that service range.

We also rename the stack variable 'publ' to just 'p' in a few places.
This is just as easy to understand in the given context, and keeps the
number of wrapped code lines to a minimum.

There are no functional changes in this commit.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Hoang Le <hoang.h.le@dektech.com.au>
Acked-by: Tung Nguyen <tung.q.nguyen@dektech.com.au>
Acked-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_distr.c | 66 +++++++++++++++++++++++++--------------------------
 net/tipc/name_table.c | 66 +++++++++++++++++++++++++--------------------------
 net/tipc/name_table.h | 17 +++++--------
 net/tipc/socket.c     | 40 +++++++++++++++----------------
 4 files changed, 92 insertions(+), 97 deletions(-)

diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 6cf57c3bfa27..1070b04d1126 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -1,8 +1,9 @@
 /*
  * net/tipc/name_distr.c: TIPC name distribution code
  *
- * Copyright (c) 2000-2006, 2014, Ericsson AB
+ * Copyright (c) 2000-2006, 2014-2019, Ericsson AB
  * Copyright (c) 2005, 2010-2011, Wind River Systems
+ * Copyright (c) 2020-2021, Red Hat Inc
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -55,10 +56,10 @@ struct distr_queue_item {
  */
 static void publ_to_item(struct distr_item *i, struct publication *p)
 {
-	i->type = htonl(p->type);
-	i->lower = htonl(p->lower);
-	i->upper = htonl(p->upper);
-	i->port = htonl(p->port);
+	i->type = htonl(p->sr.type);
+	i->lower = htonl(p->sr.lower);
+	i->upper = htonl(p->sr.upper);
+	i->port = htonl(p->sk.ref);
 	i->key = htonl(p->key);
 }
 
@@ -90,20 +91,20 @@ static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size,
 /**
  * tipc_named_publish - tell other nodes about a new publication by this node
  * @net: the associated network namespace
- * @publ: the new publication
+ * @p: the new publication
  */
-struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ)
+struct sk_buff *tipc_named_publish(struct net *net, struct publication *p)
 {
 	struct name_table *nt = tipc_name_table(net);
 	struct distr_item *item;
 	struct sk_buff *skb;
 
-	if (publ->scope == TIPC_NODE_SCOPE) {
-		list_add_tail_rcu(&publ->binding_node, &nt->node_scope);
+	if (p->scope == TIPC_NODE_SCOPE) {
+		list_add_tail_rcu(&p->binding_node, &nt->node_scope);
 		return NULL;
 	}
 	write_lock_bh(&nt->cluster_scope_lock);
-	list_add_tail(&publ->binding_node, &nt->cluster_scope);
+	list_add_tail(&p->binding_node, &nt->cluster_scope);
 	write_unlock_bh(&nt->cluster_scope_lock);
 	skb = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0);
 	if (!skb) {
@@ -113,25 +114,25 @@ struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ)
 	msg_set_named_seqno(buf_msg(skb), nt->snd_nxt++);
 	msg_set_non_legacy(buf_msg(skb));
 	item = (struct distr_item *)msg_data(buf_msg(skb));
-	publ_to_item(item, publ);
+	publ_to_item(item, p);
 	return skb;
 }
 
 /**
  * tipc_named_withdraw - tell other nodes about a withdrawn publication by this node
  * @net: the associated network namespace
- * @publ: the withdrawn publication
+ * @p: the withdrawn publication
  */
-struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ)
+struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *p)
 {
 	struct name_table *nt = tipc_name_table(net);
 	struct distr_item *item;
 	struct sk_buff *skb;
 
 	write_lock_bh(&nt->cluster_scope_lock);
-	list_del(&publ->binding_node);
+	list_del(&p->binding_node);
 	write_unlock_bh(&nt->cluster_scope_lock);
-	if (publ->scope == TIPC_NODE_SCOPE)
+	if (p->scope == TIPC_NODE_SCOPE)
 		return NULL;
 
 	skb = named_prepare_buf(net, WITHDRAWAL, ITEM_SIZE, 0);
@@ -142,7 +143,7 @@ struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ)
 	msg_set_named_seqno(buf_msg(skb), nt->snd_nxt++);
 	msg_set_non_legacy(buf_msg(skb));
 	item = (struct distr_item *)msg_data(buf_msg(skb));
-	publ_to_item(item, publ);
+	publ_to_item(item, p);
 	return skb;
 }
 
@@ -233,33 +234,32 @@ void tipc_named_node_up(struct net *net, u32 dnode, u16 capabilities)
 /**
  * tipc_publ_purge - remove publication associated with a failed node
  * @net: the associated network namespace
- * @publ: the publication to remove
+ * @p: the publication to remove
  * @addr: failed node's address
  *
  * Invoked for each publication issued by a newly failed node.
  * Removes publication structure from name table & deletes it.
  */
-static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr)
+static void tipc_publ_purge(struct net *net, struct publication *p, u32 addr)
 {
 	struct tipc_net *tn = tipc_net(net);
-	struct publication *p;
+	struct publication *_p;
 
 	spin_lock_bh(&tn->nametbl_lock);
-	p = tipc_nametbl_remove_publ(net, publ->type, publ->lower, publ->upper,
-				     publ->node, publ->key);
-	if (p)
-		tipc_node_unsubscribe(net, &p->binding_node, addr);
+	_p = tipc_nametbl_remove_publ(net, p->sr.type, p->sr.lower,
+				      p->sr.upper, p->sk.node, p->key);
+	if (_p)
+		tipc_node_unsubscribe(net, &_p->binding_node, addr);
 	spin_unlock_bh(&tn->nametbl_lock);
 
-	if (p != publ) {
+	if (_p != p) {
 		pr_err("Unable to remove publication from failed node\n"
 		       " (type=%u, lower=%u, node=0x%x, port=%u, key=%u)\n",
-		       publ->type, publ->lower, publ->node, publ->port,
-		       publ->key);
+		       p->sr.type, p->sr.lower, p->sk.node, p->sk.ref, p->key);
 	}
 
-	if (p)
-		kfree_rcu(p, rcu);
+	if (_p)
+		kfree_rcu(_p, rcu);
 }
 
 void tipc_publ_notify(struct net *net, struct list_head *nsub_list,
@@ -410,15 +410,15 @@ void tipc_named_reinit(struct net *net)
 {
 	struct name_table *nt = tipc_name_table(net);
 	struct tipc_net *tn = tipc_net(net);
-	struct publication *publ;
+	struct publication *p;
 	u32 self = tipc_own_addr(net);
 
 	spin_lock_bh(&tn->nametbl_lock);
 
-	list_for_each_entry_rcu(publ, &nt->node_scope, binding_node)
-		publ->node = self;
-	list_for_each_entry_rcu(publ, &nt->cluster_scope, binding_node)
-		publ->node = self;
+	list_for_each_entry_rcu(p, &nt->node_scope, binding_node)
+		p->sk.node = self;
+	list_for_each_entry_rcu(p, &nt->cluster_scope, binding_node)
+		p->sk.node = self;
 	nt->rc_dests = 0;
 	spin_unlock_bh(&tn->nametbl_lock);
 }
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index ee5ac40ea2b6..c2410ba7be5c 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2000-2006, 2014-2018, Ericsson AB
  * Copyright (c) 2004-2008, 2010-2014, Wind River Systems
- * Copyright (c) 2020, Red Hat Inc
+ * Copyright (c) 2020-2021, Red Hat Inc
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -234,24 +234,24 @@ static struct publication *tipc_publ_create(u32 type, u32 lower, u32 upper,
 					    u32 scope, u32 node, u32 port,
 					    u32 key)
 {
-	struct publication *publ = kzalloc(sizeof(*publ), GFP_ATOMIC);
+	struct publication *p = kzalloc(sizeof(*p), GFP_ATOMIC);
 
-	if (!publ)
+	if (!p)
 		return NULL;
 
-	publ->type = type;
-	publ->lower = lower;
-	publ->upper = upper;
-	publ->scope = scope;
-	publ->node = node;
-	publ->port = port;
-	publ->key = key;
-	INIT_LIST_HEAD(&publ->binding_sock);
-	INIT_LIST_HEAD(&publ->binding_node);
-	INIT_LIST_HEAD(&publ->local_publ);
-	INIT_LIST_HEAD(&publ->all_publ);
-	INIT_LIST_HEAD(&publ->list);
-	return publ;
+	p->sr.type = type;
+	p->sr.lower = lower;
+	p->sr.upper = upper;
+	p->scope = scope;
+	p->sk.node = node;
+	p->sk.ref = port;
+	p->key = key;
+	INIT_LIST_HEAD(&p->binding_sock);
+	INIT_LIST_HEAD(&p->binding_node);
+	INIT_LIST_HEAD(&p->local_publ);
+	INIT_LIST_HEAD(&p->all_publ);
+	INIT_LIST_HEAD(&p->list);
+	return p;
 }
 
 /**
@@ -347,7 +347,7 @@ static struct publication *tipc_service_insert_publ(struct net *net,
 
 	/* Return if the publication already exists */
 	list_for_each_entry(p, &sr->all_publ, all_publ) {
-		if (p->key == key && (!p->node || p->node == node))
+		if (p->key == key && (!p->sk.node || p->sk.node == node))
 			return NULL;
 	}
 
@@ -363,8 +363,8 @@ static struct publication *tipc_service_insert_publ(struct net *net,
 
 	/* Any subscriptions waiting for notification?  */
 	list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) {
-		tipc_sub_report_overlap(sub, p->lower, p->upper, TIPC_PUBLISHED,
-					p->port, p->node, p->scope, first);
+		tipc_sub_report_overlap(sub, p->sr.lower, p->sr.upper, TIPC_PUBLISHED,
+					p->sk.ref, p->sk.node, p->scope, first);
 	}
 	return p;
 err:
@@ -384,7 +384,7 @@ static struct publication *tipc_service_remove_publ(struct service_range *sr,
 	struct publication *p;
 
 	list_for_each_entry(p, &sr->all_publ, all_publ) {
-		if (p->key != key || (node && node != p->node))
+		if (p->key != key || (node && node != p->sk.node))
 			continue;
 		list_del(&p->all_publ);
 		list_del(&p->local_publ);
@@ -452,8 +452,8 @@ static void tipc_service_subscribe(struct tipc_service *service,
 	/* Sort the publications before reporting */
 	list_sort(NULL, &publ_list, tipc_publ_sort);
 	list_for_each_entry_safe(p, tmp, &publ_list, list) {
-		tipc_sub_report_overlap(sub, p->lower, p->upper,
-					TIPC_PUBLISHED, p->port, p->node,
+		tipc_sub_report_overlap(sub, p->sr.lower, p->sr.upper,
+					TIPC_PUBLISHED, p->sk.ref, p->sk.node,
 					p->scope, true);
 		list_del_init(&p->list);
 	}
@@ -525,7 +525,7 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
 	last = list_empty(&sr->all_publ);
 	list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) {
 		tipc_sub_report_overlap(sub, lower, upper, TIPC_WITHDRAWN,
-					p->port, node, p->scope, last);
+					p->sk.ref, node, p->scope, last);
 	}
 
 	/* Remove service range item if this was its last publication */
@@ -603,8 +603,8 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *dnode)
 					     all_publ);
 			list_move_tail(&p->all_publ, &sr->all_publ);
 		}
-		port = p->port;
-		node = p->node;
+		port = p->sk.ref;
+		node = p->sk.node;
 		/* Todo: as for legacy, pick the first matching range only, a
 		 * "true" round-robin will be performed as needed.
 		 */
@@ -643,9 +643,9 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope,
 	list_for_each_entry(p, &sr->all_publ, all_publ) {
 		if (p->scope != scope)
 			continue;
-		if (p->port == exclude && p->node == self)
+		if (p->sk.ref == exclude && p->sk.node == self)
 			continue;
-		tipc_dest_push(dsts, p->node, p->port);
+		tipc_dest_push(dsts, p->sk.node, p->sk.ref);
 		(*dstcnt)++;
 		if (all)
 			continue;
@@ -675,7 +675,7 @@ void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
 	service_range_foreach_match(sr, sc, lower, upper) {
 		list_for_each_entry(p, &sr->local_publ, local_publ) {
 			if (p->scope == scope || (!exact && p->scope < scope))
-				tipc_dest_push(dports, 0, p->port);
+				tipc_dest_push(dports, 0, p->sk.ref);
 		}
 	}
 	spin_unlock_bh(&sc->lock);
@@ -702,7 +702,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
 	spin_lock_bh(&sc->lock);
 	service_range_foreach_match(sr, sc, lower, upper) {
 		list_for_each_entry(p, &sr->all_publ, all_publ) {
-			tipc_nlist_add(nodes, p->node);
+			tipc_nlist_add(nodes, p->sk.node);
 		}
 	}
 	spin_unlock_bh(&sc->lock);
@@ -731,7 +731,7 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
 		list_for_each_entry(p, &sr->all_publ, all_publ) {
 			if (p->scope != scope)
 				continue;
-			tipc_group_add_member(grp, p->node, p->port, p->lower);
+			tipc_group_add_member(grp, p->sk.node, p->sk.ref, p->sr.lower);
 		}
 	}
 	spin_unlock_bh(&sc->lock);
@@ -909,7 +909,7 @@ static void tipc_service_delete(struct net *net, struct tipc_service *sc)
 	spin_lock_bh(&sc->lock);
 	rbtree_postorder_for_each_entry_safe(sr, tmpr, &sc->ranges, tree_node) {
 		list_for_each_entry_safe(p, tmp, &sr->all_publ, all_publ) {
-			tipc_service_remove_publ(sr, p->node, p->key);
+			tipc_service_remove_publ(sr, p->sk.node, p->key);
 			kfree_rcu(p, rcu);
 		}
 		rb_erase_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks);
@@ -993,9 +993,9 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
 			goto publ_msg_full;
 		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_SCOPE, p->scope))
 			goto publ_msg_full;
-		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_NODE, p->node))
+		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_NODE, p->sk.node))
 			goto publ_msg_full;
-		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->port))
+		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->sk.ref))
 			goto publ_msg_full;
 		if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_KEY, p->key))
 			goto publ_msg_full;
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 5a82a01369d6..d9ad119f966b 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -3,6 +3,7 @@
  *
  * Copyright (c) 2000-2006, 2014-2018, Ericsson AB
  * Copyright (c) 2004-2005, 2010-2011, Wind River Systems
+ * Copyright (c) 2020-2021, Red Hat Inc
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -50,13 +51,10 @@ struct tipc_group;
 #define TIPC_NAMETBL_SIZE	1024	/* must be a power of 2 */
 
 /**
- * struct publication - info about a published (name or) name sequence
- * @type: name sequence type
- * @lower: name sequence lower bound
- * @upper: name sequence upper bound
+ * struct publication - info about a published service address or range
+ * @sr: service range represented by this publication
+ * @sk: address of socket bound to this publication
  * @scope: scope of publication, TIPC_NODE_SCOPE or TIPC_CLUSTER_SCOPE
- * @node: network address of publishing socket's node
- * @port: publishing port
  * @key: publication key, unique across the cluster
  * @id: publication id
  * @binding_node: all publications from the same node which bound this one
@@ -74,12 +72,9 @@ struct tipc_group;
  * @rcu: RCU callback head used for deferred freeing
  */
 struct publication {
-	u32 type;
-	u32 lower;
-	u32 upper;
+	struct tipc_service_range sr;
+	struct tipc_socket_addr sk;
 	u32 scope;
-	u32 node;
-	u32 port;
 	u32 key;
 	u32 id;
 	struct list_head binding_node;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index cebcc104dc70..fe522d49f747 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2001-2007, 2012-2019, Ericsson AB
  * Copyright (c) 2004-2008, 2010-2013, Wind River Systems
- * Copyright (c) 2020, Red Hat Inc
+ * Copyright (c) 2020-2021, Red Hat Inc
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -2923,30 +2923,30 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
 			    struct tipc_service_range const *seq)
 {
 	struct net *net = sock_net(&tsk->sk);
-	struct publication *publ;
+	struct publication *p;
 	struct publication *safe;
 	int rc = -EINVAL;
 
 	if (scope != TIPC_NODE_SCOPE)
 		scope = TIPC_CLUSTER_SCOPE;
 
-	list_for_each_entry_safe(publ, safe, &tsk->publications, binding_sock) {
+	list_for_each_entry_safe(p, safe, &tsk->publications, binding_sock) {
 		if (seq) {
-			if (publ->scope != scope)
+			if (p->scope != scope)
 				continue;
-			if (publ->type != seq->type)
+			if (p->sr.type != seq->type)
 				continue;
-			if (publ->lower != seq->lower)
+			if (p->sr.lower != seq->lower)
 				continue;
-			if (publ->upper != seq->upper)
+			if (p->sr.upper != seq->upper)
 				break;
-			tipc_nametbl_withdraw(net, publ->type, publ->lower,
-					      publ->upper, publ->key);
+			tipc_nametbl_withdraw(net, p->sr.type, p->sr.lower,
+					      p->sr.upper, p->key);
 			rc = 0;
 			break;
 		}
-		tipc_nametbl_withdraw(net, publ->type, publ->lower,
-				      publ->upper, publ->key);
+		tipc_nametbl_withdraw(net, p->sr.type, p->sr.lower,
+				      p->sr.upper, p->key);
 		rc = 0;
 	}
 	if (list_empty(&tsk->publications))
@@ -3711,11 +3711,11 @@ static int __tipc_nl_add_sk_publ(struct sk_buff *skb,
 
 	if (nla_put_u32(skb, TIPC_NLA_PUBL_KEY, publ->key))
 		goto attr_msg_cancel;
-	if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->type))
+	if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->sr.type))
 		goto attr_msg_cancel;
-	if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->lower))
+	if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->sr.lower))
 		goto attr_msg_cancel;
-	if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->upper))
+	if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->sr.upper))
 		goto attr_msg_cancel;
 
 	nla_nest_end(skb, attrs);
@@ -3863,9 +3863,9 @@ bool tipc_sk_filtering(struct sock *sk)
 		p = list_first_entry_or_null(&tsk->publications,
 					     struct publication, binding_sock);
 		if (p) {
-			type = p->type;
-			lower = p->lower;
-			upper = p->upper;
+			type = p->sr.type;
+			lower = p->sr.lower;
+			upper = p->sr.upper;
 		}
 	}
 
@@ -3964,9 +3964,9 @@ int tipc_sk_dump(struct sock *sk, u16 dqueues, char *buf)
 	if (tsk->published) {
 		p = list_first_entry_or_null(&tsk->publications,
 					     struct publication, binding_sock);
-		i += scnprintf(buf + i, sz - i, " %u", (p) ? p->type : 0);
-		i += scnprintf(buf + i, sz - i, " %u", (p) ? p->lower : 0);
-		i += scnprintf(buf + i, sz - i, " %u", (p) ? p->upper : 0);
+		i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.type : 0);
+		i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.lower : 0);
+		i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.upper : 0);
 	}
 	i += scnprintf(buf + i, sz - i, " | %u", tsk->snd_win);
 	i += scnprintf(buf + i, sz - i, " %u", tsk->rcv_win);
-- 
cgit v1.2.3


From b26b5aa9cebe336028421b0641ed762fef81d178 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 16 Mar 2021 22:06:09 -0400
Subject: tipc: move creation of publication item one level up in call chain

We instantiate struct publication in tipc_nametbl_insert_publ()
instead of as currently in tipc_service_insert_publ(). This has the
advantage that we can pass a pointer to the publication struct to
the next call levels, instead of the numerous individual parameters
we pass on now. It also gives us a location to keep the contents of
the additional fields we will introduce in a later commit.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Hoang Le <hoang.h.le@dektech.com.au>
Acked-by: Tung Nguyen <tung.q.nguyen@dektech.com.au>
Acked-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_table.c | 65 +++++++++++++++++++++++++--------------------------
 1 file changed, 32 insertions(+), 33 deletions(-)

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index c2410ba7be5c..c37cef09b54c 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -327,49 +327,48 @@ static struct service_range *tipc_service_create_range(struct tipc_service *sc,
 	return sr;
 }
 
-static struct publication *tipc_service_insert_publ(struct net *net,
-						    struct tipc_service *sc,
-						    u32 type, u32 lower,
-						    u32 upper, u32 scope,
-						    u32 node, u32 port,
-						    u32 key)
+static bool tipc_service_insert_publ(struct net *net,
+				     struct tipc_service *sc,
+				     struct publication *p)
 {
 	struct tipc_subscription *sub, *tmp;
 	struct service_range *sr;
-	struct publication *p;
+	struct publication *_p;
+	u32 node = p->sk.node;
 	bool first = false;
+	bool res = false;
 
-	sr = tipc_service_create_range(sc, lower, upper);
+	spin_lock_bh(&sc->lock);
+	sr = tipc_service_create_range(sc, p->sr.lower, p->sr.upper);
 	if (!sr)
-		goto  err;
+		goto  exit;
 
 	first = list_empty(&sr->all_publ);
 
 	/* Return if the publication already exists */
-	list_for_each_entry(p, &sr->all_publ, all_publ) {
-		if (p->key == key && (!p->sk.node || p->sk.node == node))
-			return NULL;
+	list_for_each_entry(_p, &sr->all_publ, all_publ) {
+		if (_p->key == p->key && (!_p->sk.node || _p->sk.node == node))
+			goto exit;
 	}
 
-	/* Create and insert publication */
-	p = tipc_publ_create(type, lower, upper, scope, node, port, key);
-	if (!p)
-		goto err;
-	/* Suppose there shouldn't be a huge gap btw publs i.e. >INT_MAX */
-	p->id = sc->publ_cnt++;
-	if (in_own_node(net, node))
+	if (in_own_node(net, p->sk.node))
 		list_add(&p->local_publ, &sr->local_publ);
 	list_add(&p->all_publ, &sr->all_publ);
+	p->id = sc->publ_cnt++;
 
 	/* Any subscriptions waiting for notification?  */
 	list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) {
-		tipc_sub_report_overlap(sub, p->sr.lower, p->sr.upper, TIPC_PUBLISHED,
-					p->sk.ref, p->sk.node, p->scope, first);
+		tipc_sub_report_overlap(sub, p->sr.lower, p->sr.upper,
+					TIPC_PUBLISHED, p->sk.ref, p->sk.node,
+					p->scope, first);
 	}
-	return p;
-err:
-	pr_warn("Failed to bind to %u,%u,%u, no memory\n", type, lower, upper);
-	return NULL;
+	res = true;
+exit:
+	if (!res)
+		pr_warn("Failed to bind to %u,%u,%u\n",
+			p->sr.type, p->sr.lower, p->sr.upper);
+	spin_unlock_bh(&sc->lock);
+	return res;
 }
 
 /**
@@ -482,6 +481,10 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
 	struct tipc_service *sc;
 	struct publication *p;
 
+	p = tipc_publ_create(type, lower, upper, scope, node, port, key);
+	if (!p)
+		return NULL;
+
 	if (scope > TIPC_NODE_SCOPE || lower > upper) {
 		pr_debug("Failed to bind illegal {%u,%u,%u} with scope %u\n",
 			 type, lower, upper, scope);
@@ -490,14 +493,10 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
 	sc = tipc_service_find(net, type);
 	if (!sc)
 		sc = tipc_service_create(type, &nt->services[hash(type)]);
-	if (!sc)
-		return NULL;
-
-	spin_lock_bh(&sc->lock);
-	p = tipc_service_insert_publ(net, sc, type, lower, upper,
-				     scope, node, port, key);
-	spin_unlock_bh(&sc->lock);
-	return p;
+	if (sc && tipc_service_insert_publ(net, sc, p))
+		return p;
+	kfree(p);
+	return NULL;
 }
 
 struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
-- 
cgit v1.2.3


From 7823f04f34b89ae65de612adf40b314a61969bf0 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 16 Mar 2021 22:06:10 -0400
Subject: tipc: introduce new unified address type for internal use

We introduce a simplified version of struct sockaddr_tipc, using
anonymous unions and structures. Apart from being nicer to work with,
this struct will come in handy when we in a later commit add another
address type.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Hoang Le <hoang.h.le@dektech.com.au>
Acked-by: Tung Nguyen <tung.q.nguyen@dektech.com.au>
Acked-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/addr.c |  1 +
 net/tipc/addr.h | 46 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index abe29d1aa23a..fd0796269eed 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -3,6 +3,7 @@
  *
  * Copyright (c) 2000-2006, 2018, Ericsson AB
  * Copyright (c) 2004-2005, 2010-2011, Wind River Systems
+ * Copyright (c) 2020-2021, Red Hat Inc
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 1a11831bef62..0772cfadaa0d 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2000-2006, 2018, Ericsson AB
  * Copyright (c) 2004-2005, Wind River Systems
- * Copyright (c) 2020, Red Hat Inc
+ * Copyright (c) 2020-2021, Red Hat Inc
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -44,6 +44,50 @@
 #include <net/netns/generic.h>
 #include "core.h"
 
+/* Struct tipc_uaddr: internal version of struct sockaddr_tipc.
+ * Must be kept aligned both regarding field positions and size.
+ */
+struct tipc_uaddr {
+	unsigned short family;
+	unsigned char addrtype;
+	signed char scope;
+	union {
+		struct {
+			struct tipc_service_addr sa;
+			u32 lookup_node;
+		};
+		struct tipc_service_range sr;
+		struct tipc_socket_addr sk;
+	};
+};
+
+static inline void tipc_uaddr(struct tipc_uaddr *ua, u32 atype, u32 scope,
+			      u32 type, u32 lower, u32 upper)
+{
+	ua->family = AF_TIPC;
+	ua->addrtype = atype;
+	ua->scope = scope;
+	ua->sr.type = type;
+	ua->sr.lower = lower;
+	ua->sr.upper = upper;
+}
+
+static inline bool tipc_uaddr_valid(struct tipc_uaddr *ua, int len)
+{
+	u32 atype;
+
+	if (len < sizeof(struct sockaddr_tipc))
+		return false;
+	atype = ua->addrtype;
+	if (ua->family != AF_TIPC)
+		return false;
+	if (atype == TIPC_SERVICE_ADDR || atype == TIPC_SOCKET_ADDR)
+		return true;
+	if (atype == TIPC_SERVICE_RANGE)
+		return ua->sr.upper >= ua->sr.lower;
+	return false;
+}
+
 static inline u32 tipc_own_addr(struct net *net)
 {
 	return tipc_net(net)->node_addr;
-- 
cgit v1.2.3


From 50a3499ab853acd5ae5056231a576637af1bed8d Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 16 Mar 2021 22:06:11 -0400
Subject: tipc: simplify signature of tipc_namtbl_publish()

Using the new address structure tipc_uaddr, we simplify the signature
of function tipc_sk_publish() and tipc_namtbl_publish() so that fewer
parameters need to be passed around.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Hoang Le <hoang.h.le@dektech.com.au>
Acked-by: Tung Nguyen <tung.q.nguyen@dektech.com.au>
Acked-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_table.c | 10 +++----
 net/tipc/name_table.h |  6 ++---
 net/tipc/net.c        |  8 ++++--
 net/tipc/node.c       | 29 ++++++++++----------
 net/tipc/socket.c     | 73 +++++++++++++++++++++++++++------------------------
 5 files changed, 68 insertions(+), 58 deletions(-)

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index c37cef09b54c..7b309fdd0090 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -740,9 +740,8 @@ exit:
 
 /* tipc_nametbl_publish - add service binding to name table
  */
-struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
-					 u32 upper, u32 scope, u32 port,
-					 u32 key)
+struct publication *tipc_nametbl_publish(struct net *net, struct tipc_uaddr *ua,
+					 struct tipc_socket_addr *sk, u32 key)
 {
 	struct name_table *nt = tipc_name_table(net);
 	struct tipc_net *tn = tipc_net(net);
@@ -757,8 +756,9 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
 		goto exit;
 	}
 
-	p = tipc_nametbl_insert_publ(net, type, lower, upper, scope,
-				     tipc_own_addr(net), port, key);
+	p = tipc_nametbl_insert_publ(net, ua->sr.type, ua->sr.lower,
+				     ua->sr.upper, ua->scope,
+				     sk->node, sk->ref, key);
 	if (p) {
 		nt->local_publ_count++;
 		skb = tipc_named_publish(net, p);
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index d9ad119f966b..47a8c266bcc8 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -42,6 +42,7 @@ struct tipc_subscription;
 struct tipc_plist;
 struct tipc_nlist;
 struct tipc_group;
+struct tipc_uaddr;
 
 /*
  * TIPC name types reserved for internal TIPC use (both current and planned)
@@ -120,9 +121,8 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
 bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
 			 struct list_head *dsts, int *dstcnt, u32 exclude,
 			 bool all);
-struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
-					 u32 upper, u32 scope, u32 port,
-					 u32 key);
+struct publication *tipc_nametbl_publish(struct net *net, struct tipc_uaddr *ua,
+					 struct tipc_socket_addr *sk, u32 key);
 int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 upper,
 			  u32 key);
 struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
diff --git a/net/tipc/net.c b/net/tipc/net.c
index a129f661bee3..3f927949bb23 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -125,6 +125,11 @@ int tipc_net_init(struct net *net, u8 *node_id, u32 addr)
 static void tipc_net_finalize(struct net *net, u32 addr)
 {
 	struct tipc_net *tn = tipc_net(net);
+	struct tipc_socket_addr sk = {0, addr};
+	struct tipc_uaddr ua;
+
+	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_CLUSTER_SCOPE,
+		   TIPC_NODE_STATE, addr, addr);
 
 	if (cmpxchg(&tn->node_addr, 0, addr))
 		return;
@@ -132,8 +137,7 @@ static void tipc_net_finalize(struct net *net, u32 addr)
 	tipc_named_reinit(net);
 	tipc_sk_reinit(net);
 	tipc_mon_reinit_self(net);
-	tipc_nametbl_publish(net, TIPC_NODE_STATE, addr, addr,
-			     TIPC_CLUSTER_SCOPE, 0, addr);
+	tipc_nametbl_publish(net, &ua, &sk, addr);
 }
 
 void tipc_net_finalize_work(struct work_struct *work)
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 9c95ef4b6326..e24cdc13335d 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -398,21 +398,23 @@ static void tipc_node_write_unlock_fast(struct tipc_node *n)
 static void tipc_node_write_unlock(struct tipc_node *n)
 	__releases(n->lock)
 {
+	struct tipc_socket_addr sk;
 	struct net *net = n->net;
-	u32 addr = 0;
 	u32 flags = n->action_flags;
-	u32 link_id = 0;
-	u32 bearer_id;
 	struct list_head *publ_list;
+	struct tipc_uaddr ua;
+	u32 bearer_id;
 
 	if (likely(!flags)) {
 		write_unlock_bh(&n->lock);
 		return;
 	}
 
-	addr = n->addr;
-	link_id = n->link_id;
-	bearer_id = link_id & 0xffff;
+	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE,
+		   TIPC_LINK_STATE, n->addr, n->addr);
+	sk.ref = n->link_id;
+	sk.node = n->addr;
+	bearer_id = n->link_id & 0xffff;
 	publ_list = &n->publ_list;
 
 	n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP |
@@ -421,20 +423,19 @@ static void tipc_node_write_unlock(struct tipc_node *n)
 	write_unlock_bh(&n->lock);
 
 	if (flags & TIPC_NOTIFY_NODE_DOWN)
-		tipc_publ_notify(net, publ_list, addr, n->capabilities);
+		tipc_publ_notify(net, publ_list, n->addr, n->capabilities);
 
 	if (flags & TIPC_NOTIFY_NODE_UP)
-		tipc_named_node_up(net, addr, n->capabilities);
+		tipc_named_node_up(net, n->addr, n->capabilities);
 
 	if (flags & TIPC_NOTIFY_LINK_UP) {
-		tipc_mon_peer_up(net, addr, bearer_id);
-		tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr,
-				     TIPC_NODE_SCOPE, link_id, link_id);
+		tipc_mon_peer_up(net, n->addr, bearer_id);
+		tipc_nametbl_publish(net, &ua, &sk, n->link_id);
 	}
 	if (flags & TIPC_NOTIFY_LINK_DOWN) {
-		tipc_mon_peer_down(net, addr, bearer_id);
-		tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr,
-				      addr, link_id);
+		tipc_mon_peer_down(net, n->addr, bearer_id);
+		tipc_nametbl_withdraw(net, TIPC_LINK_STATE, n->addr,
+				      n->addr, n->link_id);
 	}
 }
 
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index fe522d49f747..94847252a7b7 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -111,7 +111,6 @@ struct tipc_sock {
 	struct sock sk;
 	u32 conn_type;
 	u32 conn_instance;
-	int published;
 	u32 max_pkt;
 	u32 maxnagle;
 	u32 portid;
@@ -141,6 +140,7 @@ struct tipc_sock {
 	bool expect_ack;
 	bool nodelay;
 	bool group_is_open;
+	bool published;
 };
 
 static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
@@ -151,8 +151,7 @@ static int tipc_release(struct socket *sock);
 static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
 		       bool kern);
 static void tipc_sk_timeout(struct timer_list *t);
-static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
-			   struct tipc_service_range const *seq);
+static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua);
 static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
 			    struct tipc_service_range const *seq);
 static int tipc_sk_leave(struct tipc_sock *tsk);
@@ -677,22 +676,31 @@ static int tipc_release(struct socket *sock)
  */
 static int __tipc_bind(struct socket *sock, struct sockaddr *skaddr, int alen)
 {
-	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)skaddr;
+	struct tipc_uaddr *ua = (struct tipc_uaddr *)skaddr;
 	struct tipc_sock *tsk = tipc_sk(sock->sk);
+	bool unbind = false;
 
 	if (unlikely(!alen))
 		return tipc_sk_withdraw(tsk, 0, NULL);
 
-	if (addr->addrtype == TIPC_SERVICE_ADDR)
-		addr->addr.nameseq.upper = addr->addr.nameseq.lower;
+	if (ua->addrtype == TIPC_SERVICE_ADDR) {
+		ua->addrtype = TIPC_SERVICE_RANGE;
+		ua->sr.upper = ua->sr.lower;
+	}
+	if (ua->scope < 0) {
+		unbind = true;
+		ua->scope = -ua->scope;
+	}
+	/* Users may still use deprecated TIPC_ZONE_SCOPE */
+	if (ua->scope != TIPC_NODE_SCOPE)
+		ua->scope = TIPC_CLUSTER_SCOPE;
 
 	if (tsk->group)
 		return -EACCES;
 
-	if (addr->scope >= 0)
-		return tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq);
-	else
-		return tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq);
+	if (unbind)
+		return tipc_sk_withdraw(tsk, ua->scope, &ua->sr);
+	return tipc_sk_publish(tsk, ua);
 }
 
 int tipc_sk_bind(struct socket *sock, struct sockaddr *skaddr, int alen)
@@ -707,18 +715,17 @@ int tipc_sk_bind(struct socket *sock, struct sockaddr *skaddr, int alen)
 
 static int tipc_bind(struct socket *sock, struct sockaddr *skaddr, int alen)
 {
-	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)skaddr;
+	struct tipc_uaddr *ua = (struct tipc_uaddr *)skaddr;
+	u32 atype = ua->addrtype;
 
 	if (alen) {
-		if (alen < sizeof(struct sockaddr_tipc))
+		if (!tipc_uaddr_valid(ua, alen))
 			return -EINVAL;
-		if (addr->family != AF_TIPC)
+		if (atype == TIPC_SOCKET_ADDR)
 			return -EAFNOSUPPORT;
-		if (addr->addrtype > TIPC_SERVICE_ADDR)
-			return -EAFNOSUPPORT;
-		if (addr->addr.nameseq.type < TIPC_RESERVED_TYPES) {
+		if (ua->sr.type < TIPC_RESERVED_TYPES) {
 			pr_warn_once("Can't bind to reserved service type %u\n",
-				     addr->addr.nameseq.type);
+				     ua->sr.type);
 			return -EACCES;
 		}
 	}
@@ -2891,31 +2898,28 @@ static void tipc_sk_timeout(struct timer_list *t)
 	sock_put(sk);
 }
 
-static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
-			   struct tipc_service_range const *seq)
+static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua)
 {
 	struct sock *sk = &tsk->sk;
 	struct net *net = sock_net(sk);
-	struct publication *publ;
+	struct tipc_socket_addr skaddr;
+	struct publication *p;
 	u32 key;
 
-	if (scope != TIPC_NODE_SCOPE)
-		scope = TIPC_CLUSTER_SCOPE;
-
 	if (tipc_sk_connected(sk))
 		return -EINVAL;
 	key = tsk->portid + tsk->pub_count + 1;
 	if (key == tsk->portid)
 		return -EADDRINUSE;
-
-	publ = tipc_nametbl_publish(net, seq->type, seq->lower, seq->upper,
-				    scope, tsk->portid, key);
-	if (unlikely(!publ))
+	skaddr.ref = tsk->portid;
+	skaddr.node = tipc_own_addr(net);
+	p = tipc_nametbl_publish(net, ua, &skaddr, key);
+	if (unlikely(!p))
 		return -EINVAL;
 
-	list_add(&publ->binding_sock, &tsk->publications);
+	list_add(&p->binding_sock, &tsk->publications);
 	tsk->pub_count++;
-	tsk->published = 1;
+	tsk->published = true;
 	return 0;
 }
 
@@ -3067,13 +3071,15 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
 	struct net *net = sock_net(&tsk->sk);
 	struct tipc_group *grp = tsk->group;
 	struct tipc_msg *hdr = &tsk->phdr;
-	struct tipc_service_range seq;
+	struct tipc_uaddr ua;
 	int rc;
 
 	if (mreq->type < TIPC_RESERVED_TYPES)
 		return -EACCES;
 	if (mreq->scope > TIPC_NODE_SCOPE)
 		return -EINVAL;
+	if (mreq->scope != TIPC_NODE_SCOPE)
+		mreq->scope = TIPC_CLUSTER_SCOPE;
 	if (grp)
 		return -EACCES;
 	grp = tipc_group_create(net, tsk->portid, mreq, &tsk->group_is_open);
@@ -3083,11 +3089,10 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
 	msg_set_lookup_scope(hdr, mreq->scope);
 	msg_set_nametype(hdr, mreq->type);
 	msg_set_dest_droppable(hdr, true);
-	seq.type = mreq->type;
-	seq.lower = mreq->instance;
-	seq.upper = seq.lower;
 	tipc_nametbl_build_group(net, grp, mreq->type, mreq->scope);
-	rc = tipc_sk_publish(tsk, mreq->scope, &seq);
+	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, mreq->scope,
+		   mreq->type, mreq->instance, mreq->instance);
+	rc = tipc_sk_publish(tsk, &ua);
 	if (rc) {
 		tipc_group_delete(net, grp);
 		tsk->group = NULL;
-- 
cgit v1.2.3


From a45ffa68573e97fdced4b08aef19efda555f18bc Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 16 Mar 2021 22:06:12 -0400
Subject: tipc: simplify call signatures for publication creation

We simplify the call signatures for tipc_nametbl_insert_publ() and
tipc_publ_create() so that fewer parameters are passed around.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Hoang Le <hoang.h.le@dektech.com.au>
Acked-by: Tung Nguyen <tung.q.nguyen@dektech.com.au>
Acked-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_distr.c | 23 ++++++++++++-----------
 net/tipc/name_table.c | 42 +++++++++++++++++-------------------------
 net/tipc/name_table.h |  9 +++++----
 3 files changed, 34 insertions(+), 40 deletions(-)

diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 1070b04d1126..727f8c54df6b 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -293,30 +293,31 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
 				u32 node, u32 dtype)
 {
 	struct publication *p = NULL;
-	u32 lower = ntohl(i->lower);
-	u32 upper = ntohl(i->upper);
-	u32 type = ntohl(i->type);
-	u32 port = ntohl(i->port);
+	struct tipc_socket_addr sk;
+	struct tipc_uaddr ua;
 	u32 key = ntohl(i->key);
 
+	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_CLUSTER_SCOPE,
+		   ntohl(i->type), ntohl(i->lower), ntohl(i->upper));
+	sk.ref = ntohl(i->port);
+	sk.node = node;
+
 	if (dtype == PUBLICATION) {
-		p = tipc_nametbl_insert_publ(net, type, lower, upper,
-					     TIPC_CLUSTER_SCOPE, node,
-					     port, key);
+		p = tipc_nametbl_insert_publ(net, &ua, &sk, key);
 		if (p) {
 			tipc_node_subscribe(net, &p->binding_node, node);
 			return true;
 		}
 	} else if (dtype == WITHDRAWAL) {
-		p = tipc_nametbl_remove_publ(net, type, lower,
-					     upper, node, key);
+		p = tipc_nametbl_remove_publ(net, ua.sr.type, ua.sr.lower,
+					     ua.sr.upper, node, key);
 		if (p) {
 			tipc_node_unsubscribe(net, &p->binding_node, node);
 			kfree_rcu(p, rcu);
 			return true;
 		}
-		pr_warn_ratelimited("Failed to remove binding %u,%u from %x\n",
-				    type, lower, node);
+		pr_warn_ratelimited("Failed to remove binding %u,%u from %u\n",
+				    ua.sr.type, ua.sr.lower, node);
 	} else {
 		pr_warn("Unrecognized name table message received\n");
 	}
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 7b309fdd0090..146c478143a6 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -222,16 +222,12 @@ static int hash(int x)
 
 /**
  * tipc_publ_create - create a publication structure
- * @type: name sequence type
- * @lower: name sequence lower bound
- * @upper: name sequence upper bound
- * @scope: publication scope
- * @node: network address of publishing socket
- * @port: publishing port
+ * @ua: the service range the user is binding to
+ * @sk: the address of the socket that is bound
  * @key: publication key
  */
-static struct publication *tipc_publ_create(u32 type, u32 lower, u32 upper,
-					    u32 scope, u32 node, u32 port,
+static struct publication *tipc_publ_create(struct tipc_uaddr *ua,
+					    struct tipc_socket_addr *sk,
 					    u32 key)
 {
 	struct publication *p = kzalloc(sizeof(*p), GFP_ATOMIC);
@@ -239,12 +235,9 @@ static struct publication *tipc_publ_create(u32 type, u32 lower, u32 upper,
 	if (!p)
 		return NULL;
 
-	p->sr.type = type;
-	p->sr.lower = lower;
-	p->sr.upper = upper;
-	p->scope = scope;
-	p->sk.node = node;
-	p->sk.ref = port;
+	p->sr = ua->sr;
+	p->sk = *sk;
+	p->scope = ua->scope;
 	p->key = key;
 	INIT_LIST_HEAD(&p->binding_sock);
 	INIT_LIST_HEAD(&p->binding_node);
@@ -472,22 +465,23 @@ static struct tipc_service *tipc_service_find(struct net *net, u32 type)
 	return NULL;
 };
 
-struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
-					     u32 lower, u32 upper,
-					     u32 scope, u32 node,
-					     u32 port, u32 key)
+struct publication *tipc_nametbl_insert_publ(struct net *net,
+					     struct tipc_uaddr *ua,
+					     struct tipc_socket_addr *sk,
+					     u32 key)
 {
 	struct name_table *nt = tipc_name_table(net);
 	struct tipc_service *sc;
 	struct publication *p;
+	u32 type = ua->sr.type;
 
-	p = tipc_publ_create(type, lower, upper, scope, node, port, key);
+	p = tipc_publ_create(ua, sk, key);
 	if (!p)
 		return NULL;
 
-	if (scope > TIPC_NODE_SCOPE || lower > upper) {
-		pr_debug("Failed to bind illegal {%u,%u,%u} with scope %u\n",
-			 type, lower, upper, scope);
+	if (ua->sr.lower > ua->sr.upper) {
+		pr_debug("Failed to bind illegal {%u,%u,%u} from node %u\n",
+			 type, ua->sr.lower, ua->sr.upper, sk->node);
 		return NULL;
 	}
 	sc = tipc_service_find(net, type);
@@ -756,9 +750,7 @@ struct publication *tipc_nametbl_publish(struct net *net, struct tipc_uaddr *ua,
 		goto exit;
 	}
 
-	p = tipc_nametbl_insert_publ(net, ua->sr.type, ua->sr.lower,
-				     ua->sr.upper, ua->scope,
-				     sk->node, sk->ref, key);
+	p = tipc_nametbl_insert_publ(net, ua, sk, key);
 	if (p) {
 		nt->local_publ_count++;
 		skb = tipc_named_publish(net, p);
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 47a8c266bcc8..c8b026e56e81 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -75,7 +75,7 @@ struct tipc_uaddr;
 struct publication {
 	struct tipc_service_range sr;
 	struct tipc_socket_addr sk;
-	u32 scope;
+	u16 scope;
 	u32 key;
 	u32 id;
 	struct list_head binding_node;
@@ -125,9 +125,10 @@ struct publication *tipc_nametbl_publish(struct net *net, struct tipc_uaddr *ua,
 					 struct tipc_socket_addr *sk, u32 key);
 int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 upper,
 			  u32 key);
-struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type,
-					     u32 lower, u32 upper, u32 scope,
-					     u32 node, u32 ref, u32 key);
+struct publication *tipc_nametbl_insert_publ(struct net *net,
+					     struct tipc_uaddr *ua,
+					     struct tipc_socket_addr *sk,
+					     u32 key);
 struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
 					     u32 lower, u32 upper,
 					     u32 node, u32 key);
-- 
cgit v1.2.3


From 2c98da0790634d0ec08ff7856769ffb56ca7c469 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 16 Mar 2021 22:06:13 -0400
Subject: tipc: simplify signature of tipc_nametbl_withdraw() functions

Following the principles of the preceding commits, we reduce
the number of parameters passed along in tipc_sk_withdraw(),
tipc_nametbl_withdraw() and associated functions.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Hoang Le <hoang.h.le@dektech.com.au>
Acked-by: Tung Nguyen <tung.q.nguyen@dektech.com.au>
Acked-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_distr.c | 11 ++++-----
 net/tipc/name_table.c | 51 +++++++++++++++++++++---------------------
 net/tipc/name_table.h | 11 ++++-----
 net/tipc/node.c       |  3 +--
 net/tipc/socket.c     | 62 +++++++++++++++++++++++++--------------------------
 5 files changed, 70 insertions(+), 68 deletions(-)

diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 727f8c54df6b..9e2fab3569b5 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -244,17 +244,19 @@ static void tipc_publ_purge(struct net *net, struct publication *p, u32 addr)
 {
 	struct tipc_net *tn = tipc_net(net);
 	struct publication *_p;
+	struct tipc_uaddr ua;
 
+	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, p->scope, p->sr.type,
+		   p->sr.lower, p->sr.upper);
 	spin_lock_bh(&tn->nametbl_lock);
-	_p = tipc_nametbl_remove_publ(net, p->sr.type, p->sr.lower,
-				      p->sr.upper, p->sk.node, p->key);
+	_p = tipc_nametbl_remove_publ(net, &ua, &p->sk, p->key);
 	if (_p)
 		tipc_node_unsubscribe(net, &_p->binding_node, addr);
 	spin_unlock_bh(&tn->nametbl_lock);
 
 	if (_p != p) {
 		pr_err("Unable to remove publication from failed node\n"
-		       " (type=%u, lower=%u, node=0x%x, port=%u, key=%u)\n",
+		       " (type=%u, lower=%u, node=%u, port=%u, key=%u)\n",
 		       p->sr.type, p->sr.lower, p->sk.node, p->sk.ref, p->key);
 	}
 
@@ -309,8 +311,7 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
 			return true;
 		}
 	} else if (dtype == WITHDRAWAL) {
-		p = tipc_nametbl_remove_publ(net, ua.sr.type, ua.sr.lower,
-					     ua.sr.upper, node, key);
+		p = tipc_nametbl_remove_publ(net, &ua, &sk, key);
 		if (p) {
 			tipc_node_unsubscribe(net, &p->binding_node, node);
 			kfree_rcu(p, rcu);
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 146c478143a6..5676b8d4f08f 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -366,16 +366,18 @@ exit:
 
 /**
  * tipc_service_remove_publ - remove a publication from a service
- * @sr: service_range to remove publication from
- * @node: target node
+ * @r: service_range to remove publication from
+ * @sk: address publishing socket
  * @key: target publication key
  */
-static struct publication *tipc_service_remove_publ(struct service_range *sr,
-						    u32 node, u32 key)
+static struct publication *tipc_service_remove_publ(struct service_range *r,
+						    struct tipc_socket_addr *sk,
+						    u32 key)
 {
 	struct publication *p;
+	u32 node = sk->node;
 
-	list_for_each_entry(p, &sr->all_publ, all_publ) {
+	list_for_each_entry(p, &r->all_publ, all_publ) {
 		if (p->key != key || (node && node != p->sk.node))
 			continue;
 		list_del(&p->all_publ);
@@ -493,16 +495,20 @@ struct publication *tipc_nametbl_insert_publ(struct net *net,
 	return NULL;
 }
 
-struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
-					     u32 lower, u32 upper,
-					     u32 node, u32 key)
+struct publication *tipc_nametbl_remove_publ(struct net *net,
+					     struct tipc_uaddr *ua,
+					     struct tipc_socket_addr *sk,
+					     u32 key)
 {
-	struct tipc_service *sc = tipc_service_find(net, type);
 	struct tipc_subscription *sub, *tmp;
-	struct service_range *sr = NULL;
 	struct publication *p = NULL;
+	struct service_range *sr;
+	struct tipc_service *sc;
+	u32 upper = ua->sr.upper;
+	u32 lower = ua->sr.lower;
 	bool last;
 
+	sc = tipc_service_find(net, ua->sr.type);
 	if (!sc)
 		return NULL;
 
@@ -510,7 +516,7 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
 	sr = tipc_service_find_range(sc, lower, upper);
 	if (!sr)
 		goto exit;
-	p = tipc_service_remove_publ(sr, node, key);
+	p = tipc_service_remove_publ(sr, sk, key);
 	if (!p)
 		goto exit;
 
@@ -518,7 +524,7 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
 	last = list_empty(&sr->all_publ);
 	list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) {
 		tipc_sub_report_overlap(sub, lower, upper, TIPC_WITHDRAWN,
-					p->sk.ref, node, p->scope, last);
+					sk->ref, sk->node, ua->scope, last);
 	}
 
 	/* Remove service range item if this was its last publication */
@@ -768,24 +774,22 @@ exit:
 /**
  * tipc_nametbl_withdraw - withdraw a service binding
  * @net: network namespace
- * @type: service type
- * @lower: service range lower bound
- * @upper: service range upper bound
+ * @ua: service address/range being unbound
+ * @sk: address of the socket being unbound from
  * @key: target publication key
  */
-int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower,
-			  u32 upper, u32 key)
+void tipc_nametbl_withdraw(struct net *net, struct tipc_uaddr *ua,
+			   struct tipc_socket_addr *sk, u32 key)
 {
 	struct name_table *nt = tipc_name_table(net);
 	struct tipc_net *tn = tipc_net(net);
-	u32 self = tipc_own_addr(net);
 	struct sk_buff *skb = NULL;
 	struct publication *p;
 	u32 rc_dests;
 
 	spin_lock_bh(&tn->nametbl_lock);
 
-	p = tipc_nametbl_remove_publ(net, type, lower, upper, self, key);
+	p = tipc_nametbl_remove_publ(net, ua, sk, key);
 	if (p) {
 		nt->local_publ_count--;
 		skb = tipc_named_withdraw(net, p);
@@ -793,16 +797,13 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower,
 		kfree_rcu(p, rcu);
 	} else {
 		pr_err("Failed to remove local publication {%u,%u,%u}/%u\n",
-		       type, lower, upper, key);
+		       ua->sr.type, ua->sr.lower, ua->sr.upper, key);
 	}
 	rc_dests = nt->rc_dests;
 	spin_unlock_bh(&tn->nametbl_lock);
 
-	if (skb) {
+	if (skb)
 		tipc_node_broadcast(net, skb, rc_dests);
-		return 1;
-	}
-	return 0;
 }
 
 /**
@@ -900,7 +901,7 @@ static void tipc_service_delete(struct net *net, struct tipc_service *sc)
 	spin_lock_bh(&sc->lock);
 	rbtree_postorder_for_each_entry_safe(sr, tmpr, &sc->ranges, tree_node) {
 		list_for_each_entry_safe(p, tmp, &sr->all_publ, all_publ) {
-			tipc_service_remove_publ(sr, p->sk.node, p->key);
+			tipc_service_remove_publ(sr, &p->sk, p->key);
 			kfree_rcu(p, rcu);
 		}
 		rb_erase_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks);
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index c8b026e56e81..5a7c83d22ef9 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -123,15 +123,16 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
 			 bool all);
 struct publication *tipc_nametbl_publish(struct net *net, struct tipc_uaddr *ua,
 					 struct tipc_socket_addr *sk, u32 key);
-int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 upper,
-			  u32 key);
+void tipc_nametbl_withdraw(struct net *net, struct tipc_uaddr *ua,
+			   struct tipc_socket_addr *sk, u32 key);
 struct publication *tipc_nametbl_insert_publ(struct net *net,
 					     struct tipc_uaddr *ua,
 					     struct tipc_socket_addr *sk,
 					     u32 key);
-struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
-					     u32 lower, u32 upper,
-					     u32 node, u32 key);
+struct publication *tipc_nametbl_remove_publ(struct net *net,
+					     struct tipc_uaddr *ua,
+					     struct tipc_socket_addr *sk,
+					     u32 key);
 bool tipc_nametbl_subscribe(struct tipc_subscription *s);
 void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
 int tipc_nametbl_init(struct net *net);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index e24cdc13335d..0daf3be11ed1 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -434,8 +434,7 @@ static void tipc_node_write_unlock(struct tipc_node *n)
 	}
 	if (flags & TIPC_NOTIFY_LINK_DOWN) {
 		tipc_mon_peer_down(net, n->addr, bearer_id);
-		tipc_nametbl_withdraw(net, TIPC_LINK_STATE, n->addr,
-				      n->addr, n->link_id);
+		tipc_nametbl_withdraw(net, &ua, &sk, n->link_id);
 	}
 }
 
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 94847252a7b7..7f5722d3b3d0 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -152,8 +152,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
 		       bool kern);
 static void tipc_sk_timeout(struct timer_list *t);
 static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua);
-static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
-			    struct tipc_service_range const *seq);
+static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua);
 static int tipc_sk_leave(struct tipc_sock *tsk);
 static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid);
 static int tipc_sk_insert(struct tipc_sock *tsk);
@@ -643,7 +642,7 @@ static int tipc_release(struct socket *sock)
 	__tipc_shutdown(sock, TIPC_ERR_NO_PORT);
 	sk->sk_shutdown = SHUTDOWN_MASK;
 	tipc_sk_leave(tsk);
-	tipc_sk_withdraw(tsk, 0, NULL);
+	tipc_sk_withdraw(tsk, NULL);
 	__skb_queue_purge(&tsk->mc_method.deferredq);
 	sk_stop_timer(sk, &sk->sk_timer);
 	tipc_sk_remove(tsk);
@@ -681,7 +680,7 @@ static int __tipc_bind(struct socket *sock, struct sockaddr *skaddr, int alen)
 	bool unbind = false;
 
 	if (unlikely(!alen))
-		return tipc_sk_withdraw(tsk, 0, NULL);
+		return tipc_sk_withdraw(tsk, NULL);
 
 	if (ua->addrtype == TIPC_SERVICE_ADDR) {
 		ua->addrtype = TIPC_SERVICE_RANGE;
@@ -699,7 +698,7 @@ static int __tipc_bind(struct socket *sock, struct sockaddr *skaddr, int alen)
 		return -EACCES;
 
 	if (unbind)
-		return tipc_sk_withdraw(tsk, ua->scope, &ua->sr);
+		return tipc_sk_withdraw(tsk, ua);
 	return tipc_sk_publish(tsk, ua);
 }
 
@@ -2923,38 +2922,37 @@ static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua)
 	return 0;
 }
 
-static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
-			    struct tipc_service_range const *seq)
+static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua)
 {
 	struct net *net = sock_net(&tsk->sk);
-	struct publication *p;
-	struct publication *safe;
+	struct publication *safe, *p;
+	struct tipc_uaddr _ua;
 	int rc = -EINVAL;
 
-	if (scope != TIPC_NODE_SCOPE)
-		scope = TIPC_CLUSTER_SCOPE;
-
 	list_for_each_entry_safe(p, safe, &tsk->publications, binding_sock) {
-		if (seq) {
-			if (p->scope != scope)
-				continue;
-			if (p->sr.type != seq->type)
-				continue;
-			if (p->sr.lower != seq->lower)
-				continue;
-			if (p->sr.upper != seq->upper)
-				break;
-			tipc_nametbl_withdraw(net, p->sr.type, p->sr.lower,
-					      p->sr.upper, p->key);
-			rc = 0;
-			break;
+		if (!ua) {
+			tipc_uaddr(&_ua, TIPC_SERVICE_RANGE, p->scope,
+				   p->sr.type, p->sr.lower, p->sr.upper);
+			tipc_nametbl_withdraw(net, &_ua, &p->sk, p->key);
+			continue;
 		}
-		tipc_nametbl_withdraw(net, p->sr.type, p->sr.lower,
-				      p->sr.upper, p->key);
+		/* Unbind specific publication */
+		if (p->scope != ua->scope)
+			continue;
+		if (p->sr.type != ua->sr.type)
+			continue;
+		if (p->sr.lower != ua->sr.lower)
+			continue;
+		if (p->sr.upper != ua->sr.upper)
+			break;
+		tipc_nametbl_withdraw(net, ua, &p->sk, p->key);
 		rc = 0;
+		break;
 	}
-	if (list_empty(&tsk->publications))
+	if (list_empty(&tsk->publications)) {
 		tsk->published = 0;
+		rc = 0;
+	}
 	return rc;
 }
 
@@ -3109,15 +3107,17 @@ static int tipc_sk_leave(struct tipc_sock *tsk)
 {
 	struct net *net = sock_net(&tsk->sk);
 	struct tipc_group *grp = tsk->group;
-	struct tipc_service_range seq;
+	struct tipc_uaddr ua;
 	int scope;
 
 	if (!grp)
 		return -EINVAL;
-	tipc_group_self(grp, &seq, &scope);
+	ua.addrtype = TIPC_SERVICE_RANGE;
+	tipc_group_self(grp, &ua.sr, &scope);
+	ua.scope = scope;
 	tipc_group_delete(net, grp);
 	tsk->group = NULL;
-	tipc_sk_withdraw(tsk, scope, &seq);
+	tipc_sk_withdraw(tsk, &ua);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 66db239c4894be1016c3b1afae8e136c28c2da06 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 16 Mar 2021 22:06:14 -0400
Subject: tipc: rename binding table lookup functions

The binding table provides four different lookup functions, which
purpose is not obvious neither by their names nor by the (lack of)
descriptions.

We now give these functions names that better match their purposes,
and improve the comments that describe what they are doing.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Hoang Le <hoang.h.le@dektech.com.au>
Acked-by: Tung Nguyen <tung.q.nguyen@dektech.com.au>
Acked-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/msg.c        |  4 ++--
 net/tipc/name_table.c | 51 +++++++++++++++++++++++++++++++++------------------
 net/tipc/name_table.h | 19 +++++++++++--------
 net/tipc/socket.c     | 19 ++++++++++---------
 4 files changed, 56 insertions(+), 37 deletions(-)

diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index e9263280a2d4..25afb5949892 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -723,8 +723,8 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
 	if (msg_reroute_cnt(msg))
 		return false;
 	dnode = tipc_scope2node(net, msg_lookup_scope(msg));
-	dport = tipc_nametbl_translate(net, msg_nametype(msg),
-				       msg_nameinst(msg), &dnode);
+	dport = tipc_nametbl_lookup_anycast(net, msg_nametype(msg),
+					    msg_nameinst(msg), &dnode);
 	if (!dport)
 		return false;
 	msg_incr_reroute_cnt(msg);
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 5676b8d4f08f..22616a943e70 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -544,24 +544,26 @@ exit:
 }
 
 /**
- * tipc_nametbl_translate - perform service instance to socket translation
+ * tipc_nametbl_lookup_anycast - perform service instance to socket translation
  * @net: network namespace
  * @type: message type
  * @instance: message instance
  * @dnode: the search domain used during translation
  *
+ * On entry, 'dnode' is the search domain used during the lookup
+ *
  * On exit:
- * - if translation is deferred to another node, leave 'dnode' unchanged and
- * return 0
- * - if translation is attempted and succeeds, set 'dnode' to the publishing
- * node and return the published (non-zero) port number
- * - if translation is attempted and fails, set 'dnode' to 0 and return 0
+ * - if lookup is deferred to another node, leave 'dnode' unchanged and return 0
+ * - if lookup is attempted and succeeds, set 'dnode' to the publishing node and
+ *   return the published (non-zero) port number
+ * - if lookup is attempted and fails, set 'dnode' to 0 and return 0
  *
  * Note that for legacy users (node configured with Z.C.N address format) the
  * 'closest-first' lookup algorithm must be maintained, i.e., if dnode is 0
  * we must look in the local binding list first
  */
-u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *dnode)
+u32 tipc_nametbl_lookup_anycast(struct net *net, u32 type,
+				u32 instance, u32 *dnode)
 {
 	struct tipc_net *tn = tipc_net(net);
 	bool legacy = tn->legacy_addr_format;
@@ -617,9 +619,15 @@ exit:
 	return port;
 }
 
-bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope,
-			 struct list_head *dsts, int *dstcnt, u32 exclude,
-			 bool all)
+/* tipc_nametbl_lookup_group(): lookup destinaton(s) in a communication group
+ * Returns a list of one (== group anycast) or more (== group multicast)
+ * destination socket/node pairs matching the given address.
+ * The requester may or may not want to exclude himself from the list.
+ */
+bool tipc_nametbl_lookup_group(struct net *net, u32 type, u32 instance,
+			       u32 scope, struct list_head *dsts,
+			       int *dstcnt, u32 exclude,
+			       bool mcast)
 {
 	u32 self = tipc_own_addr(net);
 	struct service_range *sr;
@@ -646,7 +654,7 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope,
 			continue;
 		tipc_dest_push(dsts, p->sk.node, p->sk.ref);
 		(*dstcnt)++;
-		if (all)
+		if (mcast)
 			continue;
 		list_move_tail(&p->all_publ, &sr->all_publ);
 		break;
@@ -658,8 +666,14 @@ exit:
 	return !list_empty(dsts);
 }
 
-void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
-			    u32 scope, bool exact, struct list_head *dports)
+/* tipc_nametbl_lookup_mcast_sockets(): look up node local destinaton sockets
+ *                                      matching the given address
+ * Used on nodes which have received a multicast/broadcast message
+ * Returns a list of local sockets
+ */
+void tipc_nametbl_lookup_mcast_sockets(struct net *net, u32 type, u32 lower,
+				       u32 upper, u32 scope, bool exact,
+				       struct list_head *dports)
 {
 	struct service_range *sr;
 	struct tipc_service *sc;
@@ -682,12 +696,13 @@ exit:
 	rcu_read_unlock();
 }
 
-/* tipc_nametbl_lookup_dst_nodes - find broadcast destination nodes
- * - Creates list of nodes that overlap the given multicast address
- * - Determines if any node local destinations overlap
+/* tipc_nametbl_lookup_mcast_nodes(): look up all destination nodes matching
+ *                                    the given address. Used in sending node.
+ * Used on nodes which are sending out a multicast/broadcast message
+ * Returns a list of nodes, including own node if applicable
  */
-void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
-				   u32 upper, struct tipc_nlist *nodes)
+void tipc_nametbl_lookup_mcast_nodes(struct net *net, u32 type, u32 lower,
+				     u32 upper, struct tipc_nlist *nodes)
 {
 	struct service_range *sr;
 	struct tipc_service *sc;
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 5a7c83d22ef9..07a297f22135 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -111,16 +111,19 @@ struct name_table {
 
 int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
 
-u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node);
-void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
-			    u32 scope, bool exact, struct list_head *dports);
+u32 tipc_nametbl_lookup_anycast(struct net *net, u32 type, u32 instance,
+				u32 *node);
+void tipc_nametbl_lookup_mcast_sockets(struct net *net, u32 type, u32 lower,
+				       u32 upper, u32 scope, bool exact,
+				   struct list_head *dports);
+void tipc_nametbl_lookup_mcast_nodes(struct net *net, u32 type, u32 lower,
+				     u32 upper, struct tipc_nlist *nodes);
+bool tipc_nametbl_lookup_group(struct net *net, u32 type, u32 instance,
+			       u32 domain, struct list_head *dsts,
+			       int *dstcnt, u32 exclude,
+			       bool all);
 void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
 			      u32 type, u32 domain);
-void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
-				   u32 upper, struct tipc_nlist *nodes);
-bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
-			 struct list_head *dsts, int *dstcnt, u32 exclude,
-			 bool all);
 struct publication *tipc_nametbl_publish(struct net *net, struct tipc_uaddr *ua,
 					 struct tipc_socket_addr *sk, u32 key);
 void tipc_nametbl_withdraw(struct net *net, struct tipc_uaddr *ua,
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 7f5722d3b3d0..b80c82d3ab87 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -863,8 +863,8 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_service_range *seq,
 
 	/* Lookup destination nodes */
 	tipc_nlist_init(&dsts, tipc_own_addr(net));
-	tipc_nametbl_lookup_dst_nodes(net, seq->type, seq->lower,
-				      seq->upper, &dsts);
+	tipc_nametbl_lookup_mcast_nodes(net, seq->type, seq->lower,
+					seq->upper, &dsts);
 	if (!dsts.local && !dsts.remote)
 		return -EHOSTUNREACH;
 
@@ -1032,8 +1032,9 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
 
 		/* Look for a non-congested destination member, if any */
 		while (1) {
-			if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts,
-						 &dstcnt, exclude, false))
+			if (!tipc_nametbl_lookup_group(net, type, inst, scope,
+						       &dsts, &dstcnt, exclude,
+						       false))
 				return -EHOSTUNREACH;
 			tipc_dest_pop(&dsts, &node, &port);
 			cong = tipc_group_cong(tsk->group, node, port, blks,
@@ -1179,8 +1180,8 @@ static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m,
 	scope = msg_lookup_scope(hdr);
 	exclude = tipc_group_exclude(grp);
 
-	if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts,
-				 &dstcnt, exclude, true))
+	if (!tipc_nametbl_lookup_group(net, type, inst, scope, &dsts,
+				       &dstcnt, exclude, true))
 		return -EHOSTUNREACH;
 
 	if (dstcnt == 1) {
@@ -1254,8 +1255,8 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
 		}
 
 		/* Create destination port list: */
-		tipc_nametbl_mc_lookup(net, type, lower, upper,
-				       scope, exact, &dports);
+		tipc_nametbl_lookup_mcast_sockets(net, type, lower, upper,
+						  scope, exact, &dports);
 
 		/* Clone message per destination */
 		while (tipc_dest_pop(&dports, NULL, &portid)) {
@@ -1485,7 +1486,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 		type = dest->addr.name.name.type;
 		inst = dest->addr.name.name.instance;
 		dnode = dest->addr.name.domain;
-		dport = tipc_nametbl_translate(net, type, inst, &dnode);
+		dport = tipc_nametbl_lookup_anycast(net, type, inst, &dnode);
 		if (unlikely(!dport && !dnode))
 			return -EHOSTUNREACH;
 	} else if (dest->addrtype == TIPC_SOCKET_ADDR) {
-- 
cgit v1.2.3


From 908148bc5046e3503f2758d1d94c43766958d5be Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 16 Mar 2021 22:06:15 -0400
Subject: tipc: refactor tipc_sendmsg() and tipc_lookup_anycast()

We simplify the signature if function tipc_nametbl_lookup_anycast(),
using address structures instead of discrete integers.

This also makes it possible to make some improvements to the functions
__tipc_sendmsg() in socket.c and tipc_msg_lookup_dest() in msg.c.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Hoang Le <hoang.h.le@dektech.com.au>
Acked-by: Tung Nguyen <tung.q.nguyen@dektech.com.au>
Acked-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/msg.c        | 23 +++++++-------
 net/tipc/name_table.c | 75 ++++++++++++++++++++++------------------------
 net/tipc/name_table.h |  5 ++--
 net/tipc/socket.c     | 83 +++++++++++++++++++++++++--------------------------
 4 files changed, 91 insertions(+), 95 deletions(-)

diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 25afb5949892..3f0a25345a7c 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -707,8 +707,11 @@ bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy)
 bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
 {
 	struct tipc_msg *msg = buf_msg(skb);
-	u32 dport, dnode;
-	u32 onode = tipc_own_addr(net);
+	u32 scope = msg_lookup_scope(msg);
+	u32 self = tipc_own_addr(net);
+	u32 inst = msg_nameinst(msg);
+	struct tipc_socket_addr sk;
+	struct tipc_uaddr ua;
 
 	if (!msg_isdata(msg))
 		return false;
@@ -722,16 +725,16 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
 	msg = buf_msg(skb);
 	if (msg_reroute_cnt(msg))
 		return false;
-	dnode = tipc_scope2node(net, msg_lookup_scope(msg));
-	dport = tipc_nametbl_lookup_anycast(net, msg_nametype(msg),
-					    msg_nameinst(msg), &dnode);
-	if (!dport)
+	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, scope,
+		   msg_nametype(msg), inst, inst);
+	sk.node = tipc_scope2node(net, scope);
+	if (!tipc_nametbl_lookup_anycast(net, &ua, &sk))
 		return false;
 	msg_incr_reroute_cnt(msg);
-	if (dnode != onode)
-		msg_set_prevnode(msg, onode);
-	msg_set_destnode(msg, dnode);
-	msg_set_destport(msg, dport);
+	if (sk.node != self)
+		msg_set_prevnode(msg, self);
+	msg_set_destnode(msg, sk.node);
+	msg_set_destport(msg, sk.ref);
 	*err = TIPC_OK;
 
 	return true;
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 22616a943e70..20beb04b2db0 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -546,66 +546,64 @@ exit:
 /**
  * tipc_nametbl_lookup_anycast - perform service instance to socket translation
  * @net: network namespace
- * @type: message type
- * @instance: message instance
- * @dnode: the search domain used during translation
- *
- * On entry, 'dnode' is the search domain used during the lookup
+ * @ua: service address to look up
+ * @sk: address to socket we want to find
  *
+ * On entry, a non-zero 'sk->node' indicates the node where we want lookup to be
+ * performed, which may not be this one.
  * On exit:
- * - if lookup is deferred to another node, leave 'dnode' unchanged and return 0
- * - if lookup is attempted and succeeds, set 'dnode' to the publishing node and
- *   return the published (non-zero) port number
- * - if lookup is attempted and fails, set 'dnode' to 0 and return 0
+ * - If lookup is deferred to another node, leave 'sk->node' unchanged and
+ *   return 'true'.
+ * - If lookup is successful, set the 'sk->node' and 'sk->ref' (== portid) which
+ *   represent the bound socket and return 'true'.
+ * - If lookup fails, return 'false'
  *
  * Note that for legacy users (node configured with Z.C.N address format) the
- * 'closest-first' lookup algorithm must be maintained, i.e., if dnode is 0
+ * 'closest-first' lookup algorithm must be maintained, i.e., if sk.node is 0
  * we must look in the local binding list first
  */
-u32 tipc_nametbl_lookup_anycast(struct net *net, u32 type,
-				u32 instance, u32 *dnode)
+bool tipc_nametbl_lookup_anycast(struct net *net,
+				 struct tipc_uaddr *ua,
+				 struct tipc_socket_addr *sk)
 {
 	struct tipc_net *tn = tipc_net(net);
 	bool legacy = tn->legacy_addr_format;
 	u32 self = tipc_own_addr(net);
-	struct service_range *sr;
+	u32 inst = ua->sa.instance;
+	struct service_range *r;
 	struct tipc_service *sc;
-	struct list_head *list;
 	struct publication *p;
-	u32 port = 0;
-	u32 node = 0;
+	struct list_head *l;
+	bool res = false;
 
-	if (!tipc_in_scope(legacy, *dnode, self))
-		return 0;
+	if (!tipc_in_scope(legacy, sk->node, self))
+		return true;
 
 	rcu_read_lock();
-	sc = tipc_service_find(net, type);
+	sc = tipc_service_find(net, ua->sr.type);
 	if (unlikely(!sc))
 		goto exit;
 
 	spin_lock_bh(&sc->lock);
-	service_range_foreach_match(sr, sc, instance, instance) {
+	service_range_foreach_match(r, sc, inst, inst) {
 		/* Select lookup algo: local, closest-first or round-robin */
-		if (*dnode == self) {
-			list = &sr->local_publ;
-			if (list_empty(list))
+		if (sk->node == self) {
+			l = &r->local_publ;
+			if (list_empty(l))
 				continue;
-			p = list_first_entry(list, struct publication,
-					     local_publ);
-			list_move_tail(&p->local_publ, &sr->local_publ);
-		} else if (legacy && !*dnode && !list_empty(&sr->local_publ)) {
-			list = &sr->local_publ;
-			p = list_first_entry(list, struct publication,
-					     local_publ);
-			list_move_tail(&p->local_publ, &sr->local_publ);
+			p = list_first_entry(l, struct publication, local_publ);
+			list_move_tail(&p->local_publ, &r->local_publ);
+		} else if (legacy && !sk->node && !list_empty(&r->local_publ)) {
+			l = &r->local_publ;
+			p = list_first_entry(l, struct publication, local_publ);
+			list_move_tail(&p->local_publ, &r->local_publ);
 		} else {
-			list = &sr->all_publ;
-			p = list_first_entry(list, struct publication,
-					     all_publ);
-			list_move_tail(&p->all_publ, &sr->all_publ);
+			l = &r->all_publ;
+			p = list_first_entry(l, struct publication, all_publ);
+			list_move_tail(&p->all_publ, &r->all_publ);
 		}
-		port = p->sk.ref;
-		node = p->sk.node;
+		*sk = p->sk;
+		res = true;
 		/* Todo: as for legacy, pick the first matching range only, a
 		 * "true" round-robin will be performed as needed.
 		 */
@@ -615,8 +613,7 @@ u32 tipc_nametbl_lookup_anycast(struct net *net, u32 type,
 
 exit:
 	rcu_read_unlock();
-	*dnode = node;
-	return port;
+	return res;
 }
 
 /* tipc_nametbl_lookup_group(): lookup destinaton(s) in a communication group
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 07a297f22135..9896205a5d66 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -110,9 +110,8 @@ struct name_table {
 };
 
 int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
-
-u32 tipc_nametbl_lookup_anycast(struct net *net, u32 type, u32 instance,
-				u32 *node);
+bool tipc_nametbl_lookup_anycast(struct net *net, struct tipc_uaddr *ua,
+				 struct tipc_socket_addr *sk);
 void tipc_nametbl_lookup_mcast_sockets(struct net *net, u32 type, u32 lower,
 				       u32 upper, u32 scope, bool exact,
 				   struct list_head *dports);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index b80c82d3ab87..26e1ca6e4766 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1424,44 +1424,43 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 	struct sock *sk = sock->sk;
 	struct net *net = sock_net(sk);
 	struct tipc_sock *tsk = tipc_sk(sk);
-	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+	struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name;
 	long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
 	struct list_head *clinks = &tsk->cong_links;
 	bool syn = !tipc_sk_type_connectionless(sk);
 	struct tipc_group *grp = tsk->group;
 	struct tipc_msg *hdr = &tsk->phdr;
-	struct tipc_service_range *seq;
+	struct tipc_socket_addr skaddr;
 	struct sk_buff_head pkts;
-	u32 dport = 0, dnode = 0;
-	u32 type = 0, inst = 0;
-	int mtu, rc;
+	int atype, mtu, rc;
 
 	if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
 		return -EMSGSIZE;
 
-	if (likely(dest)) {
-		if (unlikely(m->msg_namelen < sizeof(*dest)))
-			return -EINVAL;
-		if (unlikely(dest->family != AF_TIPC))
+	if (ua) {
+		if (!tipc_uaddr_valid(ua, m->msg_namelen))
 			return -EINVAL;
+		 atype = ua->addrtype;
 	}
 
+	/* If socket belongs to a communication group follow other paths */
 	if (grp) {
-		if (!dest)
+		if (!ua)
 			return tipc_send_group_bcast(sock, m, dlen, timeout);
-		if (dest->addrtype == TIPC_SERVICE_ADDR)
+		if (atype == TIPC_SERVICE_ADDR)
 			return tipc_send_group_anycast(sock, m, dlen, timeout);
-		if (dest->addrtype == TIPC_SOCKET_ADDR)
+		if (atype == TIPC_SOCKET_ADDR)
 			return tipc_send_group_unicast(sock, m, dlen, timeout);
-		if (dest->addrtype == TIPC_ADDR_MCAST)
+		if (atype == TIPC_SERVICE_RANGE)
 			return tipc_send_group_mcast(sock, m, dlen, timeout);
 		return -EINVAL;
 	}
 
-	if (unlikely(!dest)) {
-		dest = &tsk->peer;
-		if (!syn && dest->family != AF_TIPC)
+	if (!ua) {
+		ua = (struct tipc_uaddr *)&tsk->peer;
+		if (!syn && ua->family != AF_TIPC)
 			return -EDESTADDRREQ;
+		 atype = ua->addrtype;
 	}
 
 	if (unlikely(syn)) {
@@ -1471,54 +1470,51 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 			return -EISCONN;
 		if (tsk->published)
 			return -EOPNOTSUPP;
-		if (dest->addrtype == TIPC_SERVICE_ADDR) {
-			tsk->conn_type = dest->addr.name.name.type;
-			tsk->conn_instance = dest->addr.name.name.instance;
+		if (atype == TIPC_SERVICE_ADDR) {
+			tsk->conn_type = ua->sa.type;
+			tsk->conn_instance = ua->sa.instance;
 		}
 		msg_set_syn(hdr, 1);
 	}
 
-	seq = &dest->addr.nameseq;
-	if (dest->addrtype == TIPC_ADDR_MCAST)
-		return tipc_sendmcast(sock, seq, m, dlen, timeout);
-
-	if (dest->addrtype == TIPC_SERVICE_ADDR) {
-		type = dest->addr.name.name.type;
-		inst = dest->addr.name.name.instance;
-		dnode = dest->addr.name.domain;
-		dport = tipc_nametbl_lookup_anycast(net, type, inst, &dnode);
-		if (unlikely(!dport && !dnode))
+	/* Determine destination */
+	if (atype == TIPC_SERVICE_RANGE) {
+		return tipc_sendmcast(sock, &ua->sr, m, dlen, timeout);
+	} else if (atype == TIPC_SERVICE_ADDR) {
+		skaddr.node = ua->lookup_node;
+		ua->scope = tipc_node2scope(skaddr.node);
+		if (!tipc_nametbl_lookup_anycast(net, ua, &skaddr))
 			return -EHOSTUNREACH;
-	} else if (dest->addrtype == TIPC_SOCKET_ADDR) {
-		dnode = dest->addr.id.node;
+	} else if (atype == TIPC_SOCKET_ADDR) {
+		skaddr = ua->sk;
 	} else {
 		return -EINVAL;
 	}
 
 	/* Block or return if destination link is congested */
 	rc = tipc_wait_for_cond(sock, &timeout,
-				!tipc_dest_find(clinks, dnode, 0));
+				!tipc_dest_find(clinks, skaddr.node, 0));
 	if (unlikely(rc))
 		return rc;
 
-	if (dest->addrtype == TIPC_SERVICE_ADDR) {
+	/* Finally build message header */
+	msg_set_destnode(hdr, skaddr.node);
+	msg_set_destport(hdr, skaddr.ref);
+	if (atype == TIPC_SERVICE_ADDR) {
 		msg_set_type(hdr, TIPC_NAMED_MSG);
 		msg_set_hdr_sz(hdr, NAMED_H_SIZE);
-		msg_set_nametype(hdr, type);
-		msg_set_nameinst(hdr, inst);
-		msg_set_lookup_scope(hdr, tipc_node2scope(dnode));
-		msg_set_destnode(hdr, dnode);
-		msg_set_destport(hdr, dport);
+		msg_set_nametype(hdr, ua->sa.type);
+		msg_set_nameinst(hdr, ua->sa.instance);
+		msg_set_lookup_scope(hdr, ua->scope);
 	} else { /* TIPC_SOCKET_ADDR */
 		msg_set_type(hdr, TIPC_DIRECT_MSG);
 		msg_set_lookup_scope(hdr, 0);
-		msg_set_destnode(hdr, dnode);
-		msg_set_destport(hdr, dest->addr.id.ref);
 		msg_set_hdr_sz(hdr, BASIC_H_SIZE);
 	}
 
+	/* Add message body */
 	__skb_queue_head_init(&pkts);
-	mtu = tipc_node_get_mtu(net, dnode, tsk->portid, true);
+	mtu = tipc_node_get_mtu(net, skaddr.node, tsk->portid, true);
 	rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
 	if (unlikely(rc != dlen))
 		return rc;
@@ -1527,10 +1523,11 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 		return -ENOMEM;
 	}
 
+	/* Send message */
 	trace_tipc_sk_sendmsg(sk, skb_peek(&pkts), TIPC_DUMP_SK_SNDQ, " ");
-	rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
+	rc = tipc_node_xmit(net, &pkts, skaddr.node, tsk->portid);
 	if (unlikely(rc == -ELINKCONG)) {
-		tipc_dest_push(clinks, dnode, 0);
+		tipc_dest_push(clinks, skaddr.node, 0);
 		tsk->cong_link_cnt++;
 		rc = 0;
 	}
-- 
cgit v1.2.3


From 45ceea2d403b811cb67cff59cfb395deeda733be Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 16 Mar 2021 22:06:16 -0400
Subject: tipc: simplify signature of tipc_namtbl_lookup_mcast_sockets()

We reduce the signature of this function according to the same
principle as the preceding commits.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Hoang Le <hoang.h.le@dektech.com.au>
Acked-by: Tung Nguyen <tung.q.nguyen@dektech.com.au>
Acked-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_table.c | 10 +++++-----
 net/tipc/name_table.h |  5 ++---
 net/tipc/socket.c     | 22 +++++++++++-----------
 3 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 20beb04b2db0..ad021d8c02e7 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -668,21 +668,21 @@ exit:
  * Used on nodes which have received a multicast/broadcast message
  * Returns a list of local sockets
  */
-void tipc_nametbl_lookup_mcast_sockets(struct net *net, u32 type, u32 lower,
-				       u32 upper, u32 scope, bool exact,
-				       struct list_head *dports)
+void tipc_nametbl_lookup_mcast_sockets(struct net *net, struct tipc_uaddr *ua,
+				       bool exact, struct list_head *dports)
 {
 	struct service_range *sr;
 	struct tipc_service *sc;
 	struct publication *p;
+	u32 scope = ua->scope;
 
 	rcu_read_lock();
-	sc = tipc_service_find(net, type);
+	sc = tipc_service_find(net, ua->sr.type);
 	if (!sc)
 		goto exit;
 
 	spin_lock_bh(&sc->lock);
-	service_range_foreach_match(sr, sc, lower, upper) {
+	service_range_foreach_match(sr, sc, ua->sr.lower, ua->sr.upper) {
 		list_for_each_entry(p, &sr->local_publ, local_publ) {
 			if (p->scope == scope || (!exact && p->scope < scope))
 				tipc_dest_push(dports, 0, p->sk.ref);
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 9896205a5d66..26aa6acb4512 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -112,9 +112,8 @@ struct name_table {
 int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
 bool tipc_nametbl_lookup_anycast(struct net *net, struct tipc_uaddr *ua,
 				 struct tipc_socket_addr *sk);
-void tipc_nametbl_lookup_mcast_sockets(struct net *net, u32 type, u32 lower,
-				       u32 upper, u32 scope, bool exact,
-				   struct list_head *dports);
+void tipc_nametbl_lookup_mcast_sockets(struct net *net, struct tipc_uaddr *ua,
+				       bool exact, struct list_head *dports);
 void tipc_nametbl_lookup_mcast_nodes(struct net *net, u32 type, u32 lower,
 				     u32 upper, struct tipc_nlist *nodes);
 bool tipc_nametbl_lookup_group(struct net *net, u32 type, u32 instance,
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 26e1ca6e4766..b952128537e1 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1205,17 +1205,18 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
 		       struct sk_buff_head *inputq)
 {
 	u32 self = tipc_own_addr(net);
-	u32 type, lower, upper, scope;
 	struct sk_buff *skb, *_skb;
 	u32 portid, onode;
 	struct sk_buff_head tmpq;
 	struct list_head dports;
 	struct tipc_msg *hdr;
+	struct tipc_uaddr ua;
 	int user, mtyp, hlen;
 	bool exact;
 
 	__skb_queue_head_init(&tmpq);
 	INIT_LIST_HEAD(&dports);
+	ua.addrtype = TIPC_SERVICE_RANGE;
 
 	skb = tipc_skb_peek(arrvq, &inputq->lock);
 	for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
@@ -1224,7 +1225,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
 		mtyp = msg_type(hdr);
 		hlen = skb_headroom(skb) + msg_hdr_sz(hdr);
 		onode = msg_orignode(hdr);
-		type = msg_nametype(hdr);
+		ua.sr.type = msg_nametype(hdr);
 
 		if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) {
 			spin_lock_bh(&inputq->lock);
@@ -1239,24 +1240,23 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
 
 		/* Group messages require exact scope match */
 		if (msg_in_group(hdr)) {
-			lower = 0;
-			upper = ~0;
-			scope = msg_lookup_scope(hdr);
+			ua.sr.lower = 0;
+			ua.sr.upper = ~0;
+			ua.scope = msg_lookup_scope(hdr);
 			exact = true;
 		} else {
 			/* TIPC_NODE_SCOPE means "any scope" in this context */
 			if (onode == self)
-				scope = TIPC_NODE_SCOPE;
+				ua.scope = TIPC_NODE_SCOPE;
 			else
-				scope = TIPC_CLUSTER_SCOPE;
+				ua.scope = TIPC_CLUSTER_SCOPE;
 			exact = false;
-			lower = msg_namelower(hdr);
-			upper = msg_nameupper(hdr);
+			ua.sr.lower = msg_namelower(hdr);
+			ua.sr.upper = msg_nameupper(hdr);
 		}
 
 		/* Create destination port list: */
-		tipc_nametbl_lookup_mcast_sockets(net, type, lower, upper,
-						  scope, exact, &dports);
+		tipc_nametbl_lookup_mcast_sockets(net, &ua, exact, &dports);
 
 		/* Clone message per destination */
 		while (tipc_dest_pop(&dports, NULL, &portid)) {
-- 
cgit v1.2.3


From 833f867089e5fa8dc67c06d4abb51a256e53416c Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 16 Mar 2021 22:06:17 -0400
Subject: tipc: simplify signature of tipc_nametbl_lookup_mcast_nodes()

We follow up the preceding commits by reducing the signature of
the function tipc_nametbl_lookup_mcast_nodes().

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Hoang Le <hoang.h.le@dektech.com.au>
Acked-by: Tung Nguyen <tung.q.nguyen@dektech.com.au>
Acked-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_table.c |  8 ++++----
 net/tipc/name_table.h |  4 ++--
 net/tipc/socket.c     | 18 ++++++++----------
 3 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index ad021d8c02e7..33b79a5da8c9 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -698,20 +698,20 @@ exit:
  * Used on nodes which are sending out a multicast/broadcast message
  * Returns a list of nodes, including own node if applicable
  */
-void tipc_nametbl_lookup_mcast_nodes(struct net *net, u32 type, u32 lower,
-				     u32 upper, struct tipc_nlist *nodes)
+void tipc_nametbl_lookup_mcast_nodes(struct net *net, struct tipc_uaddr *ua,
+				     struct tipc_nlist *nodes)
 {
 	struct service_range *sr;
 	struct tipc_service *sc;
 	struct publication *p;
 
 	rcu_read_lock();
-	sc = tipc_service_find(net, type);
+	sc = tipc_service_find(net, ua->sr.type);
 	if (!sc)
 		goto exit;
 
 	spin_lock_bh(&sc->lock);
-	service_range_foreach_match(sr, sc, lower, upper) {
+	service_range_foreach_match(sr, sc, ua->sr.lower, ua->sr.upper) {
 		list_for_each_entry(p, &sr->all_publ, all_publ) {
 			tipc_nlist_add(nodes, p->sk.node);
 		}
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 26aa6acb4512..c5aa45abbdc3 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -114,8 +114,8 @@ bool tipc_nametbl_lookup_anycast(struct net *net, struct tipc_uaddr *ua,
 				 struct tipc_socket_addr *sk);
 void tipc_nametbl_lookup_mcast_sockets(struct net *net, struct tipc_uaddr *ua,
 				       bool exact, struct list_head *dports);
-void tipc_nametbl_lookup_mcast_nodes(struct net *net, u32 type, u32 lower,
-				     u32 upper, struct tipc_nlist *nodes);
+void tipc_nametbl_lookup_mcast_nodes(struct net *net, struct tipc_uaddr *ua,
+				     struct tipc_nlist *nodes);
 bool tipc_nametbl_lookup_group(struct net *net, u32 type, u32 instance,
 			       u32 domain, struct list_head *dsts,
 			       int *dstcnt, u32 exclude,
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index b952128537e1..83d7c9c25c63 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -832,7 +832,7 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock,
 /**
  * tipc_sendmcast - send multicast message
  * @sock: socket structure
- * @seq: destination address
+ * @ua: destination address struct
  * @msg: message to send
  * @dlen: length of data to send
  * @timeout: timeout to wait for wakeup
@@ -840,7 +840,7 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock,
  * Called from function tipc_sendmsg(), which has done all sanity checks
  * Return: the number of bytes sent on success, or errno
  */
-static int tipc_sendmcast(struct  socket *sock, struct tipc_service_range *seq,
+static int tipc_sendmcast(struct  socket *sock, struct tipc_uaddr *ua,
 			  struct msghdr *msg, size_t dlen, long timeout)
 {
 	struct sock *sk = sock->sk;
@@ -848,7 +848,6 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_service_range *seq,
 	struct tipc_msg *hdr = &tsk->phdr;
 	struct net *net = sock_net(sk);
 	int mtu = tipc_bcast_get_mtu(net);
-	struct tipc_mc_method *method = &tsk->mc_method;
 	struct sk_buff_head pkts;
 	struct tipc_nlist dsts;
 	int rc;
@@ -863,8 +862,7 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_service_range *seq,
 
 	/* Lookup destination nodes */
 	tipc_nlist_init(&dsts, tipc_own_addr(net));
-	tipc_nametbl_lookup_mcast_nodes(net, seq->type, seq->lower,
-					seq->upper, &dsts);
+	tipc_nametbl_lookup_mcast_nodes(net, ua, &dsts);
 	if (!dsts.local && !dsts.remote)
 		return -EHOSTUNREACH;
 
@@ -874,9 +872,9 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_service_range *seq,
 	msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE);
 	msg_set_destport(hdr, 0);
 	msg_set_destnode(hdr, 0);
-	msg_set_nametype(hdr, seq->type);
-	msg_set_namelower(hdr, seq->lower);
-	msg_set_nameupper(hdr, seq->upper);
+	msg_set_nametype(hdr, ua->sr.type);
+	msg_set_namelower(hdr, ua->sr.lower);
+	msg_set_nameupper(hdr, ua->sr.upper);
 
 	/* Build message as chain of buffers */
 	__skb_queue_head_init(&pkts);
@@ -886,7 +884,7 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_service_range *seq,
 	if (unlikely(rc == dlen)) {
 		trace_tipc_sk_sendmcast(sk, skb_peek(&pkts),
 					TIPC_DUMP_SK_SNDQ, " ");
-		rc = tipc_mcast_xmit(net, &pkts, method, &dsts,
+		rc = tipc_mcast_xmit(net, &pkts, &tsk->mc_method, &dsts,
 				     &tsk->cong_link_cnt);
 	}
 
@@ -1479,7 +1477,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 
 	/* Determine destination */
 	if (atype == TIPC_SERVICE_RANGE) {
-		return tipc_sendmcast(sock, &ua->sr, m, dlen, timeout);
+		return tipc_sendmcast(sock, ua, m, dlen, timeout);
 	} else if (atype == TIPC_SERVICE_ADDR) {
 		skaddr.node = ua->lookup_node;
 		ua->scope = tipc_node2scope(skaddr.node);
-- 
cgit v1.2.3


From 006ed14ef82b26605bdc8e2706235e2723d825d3 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 16 Mar 2021 22:06:18 -0400
Subject: tipc: simplify signature of tipc_nametbl_lookup_group()

We reduce the signature of tipc_nametbl_lookup_group() by using a
struct tipc_uaddr pointer. This entails a couple of minor changes in the
functions tipc_send_group_mcast/anycast/unicast/bcast() in socket.c

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Hoang Le <hoang.h.le@dektech.com.au>
Acked-by: Tung Nguyen <tung.q.nguyen@dektech.com.au>
Acked-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_table.c | 14 +++++++-------
 net/tipc/name_table.h |  7 +++----
 net/tipc/socket.c     | 42 +++++++++++++++++-------------------------
 3 files changed, 27 insertions(+), 36 deletions(-)

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 33b79a5da8c9..1ce65cbce672 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -621,31 +621,31 @@ exit:
  * destination socket/node pairs matching the given address.
  * The requester may or may not want to exclude himself from the list.
  */
-bool tipc_nametbl_lookup_group(struct net *net, u32 type, u32 instance,
-			       u32 scope, struct list_head *dsts,
-			       int *dstcnt, u32 exclude,
-			       bool mcast)
+bool tipc_nametbl_lookup_group(struct net *net, struct tipc_uaddr *ua,
+			       struct list_head *dsts, int *dstcnt,
+			       u32 exclude, bool mcast)
 {
 	u32 self = tipc_own_addr(net);
+	u32 inst = ua->sa.instance;
 	struct service_range *sr;
 	struct tipc_service *sc;
 	struct publication *p;
 
 	*dstcnt = 0;
 	rcu_read_lock();
-	sc = tipc_service_find(net, type);
+	sc = tipc_service_find(net, ua->sa.type);
 	if (unlikely(!sc))
 		goto exit;
 
 	spin_lock_bh(&sc->lock);
 
 	/* Todo: a full search i.e. service_range_foreach_match() instead? */
-	sr = service_range_match_first(sc->ranges.rb_node, instance, instance);
+	sr = service_range_match_first(sc->ranges.rb_node, inst, inst);
 	if (!sr)
 		goto no_match;
 
 	list_for_each_entry(p, &sr->all_publ, all_publ) {
-		if (p->scope != scope)
+		if (p->scope != ua->scope)
 			continue;
 		if (p->sk.ref == exclude && p->sk.node == self)
 			continue;
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index c5aa45abbdc3..b20b694c1284 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -116,10 +116,9 @@ void tipc_nametbl_lookup_mcast_sockets(struct net *net, struct tipc_uaddr *ua,
 				       bool exact, struct list_head *dports);
 void tipc_nametbl_lookup_mcast_nodes(struct net *net, struct tipc_uaddr *ua,
 				     struct tipc_nlist *nodes);
-bool tipc_nametbl_lookup_group(struct net *net, u32 type, u32 instance,
-			       u32 domain, struct list_head *dsts,
-			       int *dstcnt, u32 exclude,
-			       bool all);
+bool tipc_nametbl_lookup_group(struct net *net, struct tipc_uaddr *ua,
+			       struct list_head *dsts, int *dstcnt,
+			       u32 exclude, bool mcast);
 void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
 			      u32 type, u32 domain);
 struct publication *tipc_nametbl_publish(struct net *net, struct tipc_uaddr *ua,
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 83d7c9c25c63..a7f86f22c03a 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -958,7 +958,7 @@ static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m,
 				   int dlen, long timeout)
 {
 	struct sock *sk = sock->sk;
-	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+	struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name;
 	int blks = tsk_blocks(GROUP_H_SIZE + dlen);
 	struct tipc_sock *tsk = tipc_sk(sk);
 	struct net *net = sock_net(sk);
@@ -966,8 +966,8 @@ static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m,
 	u32 node, port;
 	int rc;
 
-	node = dest->addr.id.node;
-	port = dest->addr.id.ref;
+	node = ua->sk.node;
+	port = ua->sk.ref;
 	if (!port && !node)
 		return -EHOSTUNREACH;
 
@@ -1001,7 +1001,7 @@ static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m,
 static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
 				   int dlen, long timeout)
 {
-	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+	struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name;
 	struct sock *sk = sock->sk;
 	struct tipc_sock *tsk = tipc_sk(sk);
 	struct list_head *cong_links = &tsk->cong_links;
@@ -1012,16 +1012,13 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
 	struct net *net = sock_net(sk);
 	u32 node, port, exclude;
 	struct list_head dsts;
-	u32 type, inst, scope;
 	int lookups = 0;
 	int dstcnt, rc;
 	bool cong;
 
 	INIT_LIST_HEAD(&dsts);
-
-	type = msg_nametype(hdr);
-	inst = dest->addr.name.name.instance;
-	scope = msg_lookup_scope(hdr);
+	ua->sa.type = msg_nametype(hdr);
+	ua->scope = msg_lookup_scope(hdr);
 
 	while (++lookups < 4) {
 		exclude = tipc_group_exclude(tsk->group);
@@ -1030,9 +1027,8 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
 
 		/* Look for a non-congested destination member, if any */
 		while (1) {
-			if (!tipc_nametbl_lookup_group(net, type, inst, scope,
-						       &dsts, &dstcnt, exclude,
-						       false))
+			if (!tipc_nametbl_lookup_group(net, ua, &dsts, &dstcnt,
+						       exclude, false))
 				return -EHOSTUNREACH;
 			tipc_dest_pop(&dsts, &node, &port);
 			cong = tipc_group_cong(tsk->group, node, port, blks,
@@ -1087,7 +1083,7 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
 static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
 				 int dlen, long timeout)
 {
-	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+	struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name;
 	struct sock *sk = sock->sk;
 	struct net *net = sock_net(sk);
 	struct tipc_sock *tsk = tipc_sk(sk);
@@ -1112,9 +1108,9 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
 		return -EHOSTUNREACH;
 
 	/* Complete message header */
-	if (dest) {
+	if (ua) {
 		msg_set_type(hdr, TIPC_GRP_MCAST_MSG);
-		msg_set_nameinst(hdr, dest->addr.name.name.instance);
+		msg_set_nameinst(hdr, ua->sa.instance);
 	} else {
 		msg_set_type(hdr, TIPC_GRP_BCAST_MSG);
 		msg_set_nameinst(hdr, 0);
@@ -1161,29 +1157,25 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
 static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m,
 				 int dlen, long timeout)
 {
+	struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name;
 	struct sock *sk = sock->sk;
-	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
 	struct tipc_sock *tsk = tipc_sk(sk);
 	struct tipc_group *grp = tsk->group;
 	struct tipc_msg *hdr = &tsk->phdr;
 	struct net *net = sock_net(sk);
-	u32 type, inst, scope, exclude;
 	struct list_head dsts;
-	u32 dstcnt;
+	u32 dstcnt, exclude;
 
 	INIT_LIST_HEAD(&dsts);
-
-	type = msg_nametype(hdr);
-	inst = dest->addr.name.name.instance;
-	scope = msg_lookup_scope(hdr);
+	ua->sa.type = msg_nametype(hdr);
+	ua->scope = msg_lookup_scope(hdr);
 	exclude = tipc_group_exclude(grp);
 
-	if (!tipc_nametbl_lookup_group(net, type, inst, scope, &dsts,
-				       &dstcnt, exclude, true))
+	if (!tipc_nametbl_lookup_group(net, ua, &dsts, &dstcnt, exclude, true))
 		return -EHOSTUNREACH;
 
 	if (dstcnt == 1) {
-		tipc_dest_pop(&dsts, &dest->addr.id.node, &dest->addr.id.ref);
+		tipc_dest_pop(&dsts, &ua->sk.node, &ua->sk.ref);
 		return tipc_send_group_unicast(sock, m, dlen, timeout);
 	}
 
-- 
cgit v1.2.3


From 13c9d23f6ac3fa05f1e4f485c1873748912c1d23 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 16 Mar 2021 22:06:19 -0400
Subject: tipc: simplify signature of tipc_service_find_range()

We simplify the signatures of the functions tipc_service_create_range()
and tipc_service_find_range().

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Hoang Le <hoang.h.le@dektech.com.au>
Acked-by: Tung Nguyen <tung.q.nguyen@dektech.com.au>
Acked-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_table.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 1ce65cbce672..f6a1b78a807e 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -275,13 +275,13 @@ static struct tipc_service *tipc_service_create(u32 type, struct hlist_head *hd)
 /*  tipc_service_find_range - find service range matching publication parameters
  */
 static struct service_range *tipc_service_find_range(struct tipc_service *sc,
-						     u32 lower, u32 upper)
+						     struct tipc_uaddr *ua)
 {
 	struct service_range *sr;
 
-	service_range_foreach_match(sr, sc, lower, upper) {
+	service_range_foreach_match(sr, sc, ua->sr.lower, ua->sr.upper) {
 		/* Look for exact match */
-		if (sr->lower == lower && sr->upper == upper)
+		if (sr->lower == ua->sr.lower && sr->upper == ua->sr.upper)
 			return sr;
 	}
 
@@ -289,10 +289,12 @@ static struct service_range *tipc_service_find_range(struct tipc_service *sc,
 }
 
 static struct service_range *tipc_service_create_range(struct tipc_service *sc,
-						       u32 lower, u32 upper)
+						       struct publication *p)
 {
 	struct rb_node **n, *parent = NULL;
 	struct service_range *sr;
+	u32 lower = p->sr.lower;
+	u32 upper = p->sr.upper;
 
 	n = &sc->ranges.rb_node;
 	while (*n) {
@@ -332,7 +334,7 @@ static bool tipc_service_insert_publ(struct net *net,
 	bool res = false;
 
 	spin_lock_bh(&sc->lock);
-	sr = tipc_service_create_range(sc, p->sr.lower, p->sr.upper);
+	sr = tipc_service_create_range(sc, p);
 	if (!sr)
 		goto  exit;
 
@@ -513,7 +515,7 @@ struct publication *tipc_nametbl_remove_publ(struct net *net,
 		return NULL;
 
 	spin_lock_bh(&sc->lock);
-	sr = tipc_service_find_range(sc, lower, upper);
+	sr = tipc_service_find_range(sc, ua);
 	if (!sr)
 		goto exit;
 	p = tipc_service_remove_publ(sr, sk, key);
-- 
cgit v1.2.3


From 6e44867b01e6998ce4c997dc7921d0fb157c3661 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 16 Mar 2021 22:06:20 -0400
Subject: tipc: simplify signature of tipc_find_service()

We reduce the signature of tipc_find_service() and
tipc_create_service(). The reason for doing this might not
be obvious, but we plan to let struct tipc_uaddr contain
information that is relevant for these functions in a later
commit.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Hoang Le <hoang.h.le@dektech.com.au>
Acked-by: Tung Nguyen <tung.q.nguyen@dektech.com.au>
Acked-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_table.c | 62 ++++++++++++++++++++++++++++++---------------------
 net/tipc/name_table.h |  2 +-
 net/tipc/socket.c     |  2 +-
 3 files changed, 39 insertions(+), 27 deletions(-)

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index f6a1b78a807e..73d9f49662e4 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -249,25 +249,30 @@ static struct publication *tipc_publ_create(struct tipc_uaddr *ua,
 
 /**
  * tipc_service_create - create a service structure for the specified 'type'
- * @type: service type
- * @hd: name_table services list
+ * @net: network namespace
+ * @ua: address representing the service to be bound
  *
  * Allocates a single range structure and sets it to all 0's.
  */
-static struct tipc_service *tipc_service_create(u32 type, struct hlist_head *hd)
+static struct tipc_service *tipc_service_create(struct net *net,
+						struct tipc_uaddr *ua)
 {
-	struct tipc_service *service = kzalloc(sizeof(*service), GFP_ATOMIC);
+	struct name_table *nt = tipc_name_table(net);
+	struct tipc_service *service;
+	struct hlist_head *hd;
 
+	service = kzalloc(sizeof(*service), GFP_ATOMIC);
 	if (!service) {
 		pr_warn("Service creation failed, no memory\n");
 		return NULL;
 	}
 
 	spin_lock_init(&service->lock);
-	service->type = type;
+	service->type = ua->sr.type;
 	service->ranges = RB_ROOT;
 	INIT_HLIST_NODE(&service->service_list);
 	INIT_LIST_HEAD(&service->subscriptions);
+	hd = &nt->services[hash(ua->sr.type)];
 	hlist_add_head_rcu(&service->service_list, hd);
 	return service;
 }
@@ -455,15 +460,16 @@ static void tipc_service_subscribe(struct tipc_service *service,
 	}
 }
 
-static struct tipc_service *tipc_service_find(struct net *net, u32 type)
+static struct tipc_service *tipc_service_find(struct net *net,
+					      struct tipc_uaddr *ua)
 {
 	struct name_table *nt = tipc_name_table(net);
 	struct hlist_head *service_head;
 	struct tipc_service *service;
 
-	service_head = &nt->services[hash(type)];
+	service_head = &nt->services[hash(ua->sr.type)];
 	hlist_for_each_entry_rcu(service, service_head, service_list) {
-		if (service->type == type)
+		if (service->type == ua->sr.type)
 			return service;
 	}
 	return NULL;
@@ -474,7 +480,6 @@ struct publication *tipc_nametbl_insert_publ(struct net *net,
 					     struct tipc_socket_addr *sk,
 					     u32 key)
 {
-	struct name_table *nt = tipc_name_table(net);
 	struct tipc_service *sc;
 	struct publication *p;
 	u32 type = ua->sr.type;
@@ -488,9 +493,9 @@ struct publication *tipc_nametbl_insert_publ(struct net *net,
 			 type, ua->sr.lower, ua->sr.upper, sk->node);
 		return NULL;
 	}
-	sc = tipc_service_find(net, type);
+	sc = tipc_service_find(net, ua);
 	if (!sc)
-		sc = tipc_service_create(type, &nt->services[hash(type)]);
+		sc = tipc_service_create(net, ua);
 	if (sc && tipc_service_insert_publ(net, sc, p))
 		return p;
 	kfree(p);
@@ -510,7 +515,7 @@ struct publication *tipc_nametbl_remove_publ(struct net *net,
 	u32 lower = ua->sr.lower;
 	bool last;
 
-	sc = tipc_service_find(net, ua->sr.type);
+	sc = tipc_service_find(net, ua);
 	if (!sc)
 		return NULL;
 
@@ -582,7 +587,7 @@ bool tipc_nametbl_lookup_anycast(struct net *net,
 		return true;
 
 	rcu_read_lock();
-	sc = tipc_service_find(net, ua->sr.type);
+	sc = tipc_service_find(net, ua);
 	if (unlikely(!sc))
 		goto exit;
 
@@ -635,7 +640,7 @@ bool tipc_nametbl_lookup_group(struct net *net, struct tipc_uaddr *ua,
 
 	*dstcnt = 0;
 	rcu_read_lock();
-	sc = tipc_service_find(net, ua->sa.type);
+	sc = tipc_service_find(net, ua);
 	if (unlikely(!sc))
 		goto exit;
 
@@ -679,7 +684,7 @@ void tipc_nametbl_lookup_mcast_sockets(struct net *net, struct tipc_uaddr *ua,
 	u32 scope = ua->scope;
 
 	rcu_read_lock();
-	sc = tipc_service_find(net, ua->sr.type);
+	sc = tipc_service_find(net, ua);
 	if (!sc)
 		goto exit;
 
@@ -708,7 +713,7 @@ void tipc_nametbl_lookup_mcast_nodes(struct net *net, struct tipc_uaddr *ua,
 	struct publication *p;
 
 	rcu_read_lock();
-	sc = tipc_service_find(net, ua->sr.type);
+	sc = tipc_service_find(net, ua);
 	if (!sc)
 		goto exit;
 
@@ -726,7 +731,7 @@ exit:
 /* tipc_nametbl_build_group - build list of communication group members
  */
 void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
-			      u32 type, u32 scope)
+			      struct tipc_uaddr *ua)
 {
 	struct service_range *sr;
 	struct tipc_service *sc;
@@ -734,7 +739,7 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
 	struct rb_node *n;
 
 	rcu_read_lock();
-	sc = tipc_service_find(net, type);
+	sc = tipc_service_find(net, ua);
 	if (!sc)
 		goto exit;
 
@@ -742,9 +747,10 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
 	for (n = rb_first(&sc->ranges); n; n = rb_next(n)) {
 		sr = container_of(n, struct service_range, tree_node);
 		list_for_each_entry(p, &sr->all_publ, all_publ) {
-			if (p->scope != scope)
+			if (p->scope != ua->scope)
 				continue;
-			tipc_group_add_member(grp, p->sk.node, p->sk.ref, p->sr.lower);
+			tipc_group_add_member(grp, p->sk.node, p->sk.ref,
+					      p->sr.lower);
 		}
 	}
 	spin_unlock_bh(&sc->lock);
@@ -826,17 +832,18 @@ void tipc_nametbl_withdraw(struct net *net, struct tipc_uaddr *ua,
  */
 bool tipc_nametbl_subscribe(struct tipc_subscription *sub)
 {
-	struct name_table *nt = tipc_name_table(sub->net);
 	struct tipc_net *tn = tipc_net(sub->net);
 	struct tipc_subscr *s = &sub->evt.s;
 	u32 type = tipc_sub_read(s, seq.type);
 	struct tipc_service *sc;
+	struct tipc_uaddr ua;
 	bool res = true;
 
+	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, type, 0, 0);
 	spin_lock_bh(&tn->nametbl_lock);
-	sc = tipc_service_find(sub->net, type);
+	sc = tipc_service_find(sub->net, &ua);
 	if (!sc)
-		sc = tipc_service_create(type, &nt->services[hash(type)]);
+		sc = tipc_service_create(sub->net, &ua);
 	if (sc) {
 		spin_lock_bh(&sc->lock);
 		tipc_service_subscribe(sc, sub);
@@ -861,9 +868,11 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *sub)
 	struct tipc_subscr *s = &sub->evt.s;
 	u32 type = tipc_sub_read(s, seq.type);
 	struct tipc_service *sc;
+	struct tipc_uaddr ua;
 
+	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, type, 0, 0);
 	spin_lock_bh(&tn->nametbl_lock);
-	sc = tipc_service_find(sub->net, type);
+	sc = tipc_service_find(sub->net, &ua);
 	if (!sc)
 		goto exit;
 
@@ -1052,6 +1061,7 @@ static int tipc_nl_service_list(struct net *net, struct tipc_nl_msg *msg,
 	struct tipc_net *tn = tipc_net(net);
 	struct tipc_service *service = NULL;
 	struct hlist_head *head;
+	struct tipc_uaddr ua;
 	int err;
 	int i;
 
@@ -1065,7 +1075,9 @@ static int tipc_nl_service_list(struct net *net, struct tipc_nl_msg *msg,
 
 		if (*last_type ||
 		    (!i && *last_key && (*last_lower == *last_key))) {
-			service = tipc_service_find(net, *last_type);
+			tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE,
+				   *last_type, *last_lower, *last_lower);
+			service = tipc_service_find(net, &ua);
 			if (!service)
 				return -EPIPE;
 		} else {
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index b20b694c1284..c7c9a3ddd420 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -120,7 +120,7 @@ bool tipc_nametbl_lookup_group(struct net *net, struct tipc_uaddr *ua,
 			       struct list_head *dsts, int *dstcnt,
 			       u32 exclude, bool mcast);
 void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
-			      u32 type, u32 domain);
+			      struct tipc_uaddr *ua);
 struct publication *tipc_nametbl_publish(struct net *net, struct tipc_uaddr *ua,
 					 struct tipc_socket_addr *sk, u32 key);
 void tipc_nametbl_withdraw(struct net *net, struct tipc_uaddr *ua,
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index a7f86f22c03a..117a472a8e61 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -3075,9 +3075,9 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
 	msg_set_lookup_scope(hdr, mreq->scope);
 	msg_set_nametype(hdr, mreq->type);
 	msg_set_dest_droppable(hdr, true);
-	tipc_nametbl_build_group(net, grp, mreq->type, mreq->scope);
 	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, mreq->scope,
 		   mreq->type, mreq->instance, mreq->instance);
+	tipc_nametbl_build_group(net, grp, &ua);
 	rc = tipc_sk_publish(tsk, &ua);
 	if (rc) {
 		tipc_group_delete(net, grp);
-- 
cgit v1.2.3


From 09f78b851ea332a67ebaf7b4463a80a4d0d3d747 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 16 Mar 2021 22:06:21 -0400
Subject: tipc: simplify api between binding table and topology server

The function tipc_report_overlap() is called from the binding table
with numerous parameters taken from an instance of struct publication.
A closer look reveals that it always is safe to send along a pointer
to the instance itself, and hence reduce the call signature. We do
that in this commit.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Hoang Le <hoang.h.le@dektech.com.au>
Acked-by: Tung Nguyen <tung.q.nguyen@dektech.com.au>
Acked-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_table.c | 21 +++++++-----------
 net/tipc/subscr.c     | 59 +++++++++++++++++++++++++++------------------------
 net/tipc/subscr.h     | 11 ++++------
 3 files changed, 43 insertions(+), 48 deletions(-)

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 73d9f49662e4..f648feae446f 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -358,9 +358,7 @@ static bool tipc_service_insert_publ(struct net *net,
 
 	/* Any subscriptions waiting for notification?  */
 	list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) {
-		tipc_sub_report_overlap(sub, p->sr.lower, p->sr.upper,
-					TIPC_PUBLISHED, p->sk.ref, p->sk.node,
-					p->scope, first);
+		tipc_sub_report_overlap(sub, p, TIPC_PUBLISHED, first);
 	}
 	res = true;
 exit:
@@ -453,9 +451,7 @@ static void tipc_service_subscribe(struct tipc_service *service,
 	/* Sort the publications before reporting */
 	list_sort(NULL, &publ_list, tipc_publ_sort);
 	list_for_each_entry_safe(p, tmp, &publ_list, list) {
-		tipc_sub_report_overlap(sub, p->sr.lower, p->sr.upper,
-					TIPC_PUBLISHED, p->sk.ref, p->sk.node,
-					p->scope, true);
+		tipc_sub_report_overlap(sub, p, TIPC_PUBLISHED, true);
 		list_del_init(&p->list);
 	}
 }
@@ -511,8 +507,6 @@ struct publication *tipc_nametbl_remove_publ(struct net *net,
 	struct publication *p = NULL;
 	struct service_range *sr;
 	struct tipc_service *sc;
-	u32 upper = ua->sr.upper;
-	u32 lower = ua->sr.lower;
 	bool last;
 
 	sc = tipc_service_find(net, ua);
@@ -530,8 +524,7 @@ struct publication *tipc_nametbl_remove_publ(struct net *net,
 	/* Notify any waiting subscriptions */
 	last = list_empty(&sr->all_publ);
 	list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) {
-		tipc_sub_report_overlap(sub, lower, upper, TIPC_WITHDRAWN,
-					sk->ref, sk->node, ua->scope, last);
+		tipc_sub_report_overlap(sub, p, TIPC_WITHDRAWN, last);
 	}
 
 	/* Remove service range item if this was its last publication */
@@ -540,7 +533,7 @@ struct publication *tipc_nametbl_remove_publ(struct net *net,
 		kfree(sr);
 	}
 
-	/* Delete service item if this no more publications and subscriptions */
+	/* Delete service item if no more publications and subscriptions */
 	if (RB_EMPTY_ROOT(&sc->ranges) && list_empty(&sc->subscriptions)) {
 		hlist_del_init_rcu(&sc->service_list);
 		kfree_rcu(sc, rcu);
@@ -839,7 +832,8 @@ bool tipc_nametbl_subscribe(struct tipc_subscription *sub)
 	struct tipc_uaddr ua;
 	bool res = true;
 
-	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, type, 0, 0);
+	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, type,
+		   tipc_sub_read(s, seq.lower), tipc_sub_read(s, seq.upper));
 	spin_lock_bh(&tn->nametbl_lock);
 	sc = tipc_service_find(sub->net, &ua);
 	if (!sc)
@@ -870,7 +864,8 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *sub)
 	struct tipc_service *sc;
 	struct tipc_uaddr ua;
 
-	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, type, 0, 0);
+	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, type,
+		   tipc_sub_read(s, seq.lower), tipc_sub_read(s, seq.upper));
 	spin_lock_bh(&tn->nametbl_lock);
 	sc = tipc_service_find(sub->net, &ua);
 	if (!sc)
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index f6ad0005218c..5f8dc0e7488f 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2000-2017, Ericsson AB
  * Copyright (c) 2005-2007, 2010-2013, Wind River Systems
- * Copyright (c) 2020, Red Hat Inc
+ * Copyright (c) 2020-2021, Red Hat Inc
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -40,18 +40,26 @@
 #include "subscr.h"
 
 static void tipc_sub_send_event(struct tipc_subscription *sub,
-				u32 found_lower, u32 found_upper,
-				u32 event, u32 port, u32 node)
+				struct publication *p,
+				u32 event)
 {
+	struct tipc_subscr *s = &sub->evt.s;
 	struct tipc_event *evt = &sub->evt;
 
 	if (sub->inactive)
 		return;
 	tipc_evt_write(evt, event, event);
-	tipc_evt_write(evt, found_lower, found_lower);
-	tipc_evt_write(evt, found_upper, found_upper);
-	tipc_evt_write(evt, port.ref, port);
-	tipc_evt_write(evt, port.node, node);
+	if (p) {
+		tipc_evt_write(evt, found_lower, p->sr.lower);
+		tipc_evt_write(evt, found_upper, p->sr.upper);
+		tipc_evt_write(evt, port.ref, p->sk.ref);
+		tipc_evt_write(evt, port.node, p->sk.node);
+	} else {
+		tipc_evt_write(evt, found_lower, s->seq.lower);
+		tipc_evt_write(evt, found_upper, s->seq.upper);
+		tipc_evt_write(evt, port.ref, 0);
+		tipc_evt_write(evt, port.node, 0);
+	}
 	tipc_topsrv_queue_evt(sub->net, sub->conid, event, evt);
 }
 
@@ -61,24 +69,23 @@ static void tipc_sub_send_event(struct tipc_subscription *sub,
  * @found_lower: lower value to test
  * @found_upper: upper value to test
  *
- * Return: 1 if there is overlap, otherwise 0.
+ * Returns true if there is overlap, otherwise false.
  */
-int tipc_sub_check_overlap(struct tipc_service_range *seq, u32 found_lower,
-			   u32 found_upper)
+bool tipc_sub_check_overlap(struct tipc_service_range *sr,
+			    u32 found_lower, u32 found_upper)
 {
-	if (found_lower < seq->lower)
-		found_lower = seq->lower;
-	if (found_upper > seq->upper)
-		found_upper = seq->upper;
+	if (found_lower < sr->lower)
+		found_lower = sr->lower;
+	if (found_upper > sr->upper)
+		found_upper = sr->upper;
 	if (found_lower > found_upper)
-		return 0;
-	return 1;
+		return false;
+	return true;
 }
 
 void tipc_sub_report_overlap(struct tipc_subscription *sub,
-			     u32 found_lower, u32 found_upper,
-			     u32 event, u32 port, u32 node,
-			     u32 scope, int must)
+			     struct publication *p,
+			     u32 event, bool must)
 {
 	struct tipc_subscr *s = &sub->evt.s;
 	u32 filter = tipc_sub_read(s, filter);
@@ -88,29 +95,25 @@ void tipc_sub_report_overlap(struct tipc_subscription *sub,
 	seq.lower = tipc_sub_read(s, seq.lower);
 	seq.upper = tipc_sub_read(s, seq.upper);
 
-	if (!tipc_sub_check_overlap(&seq, found_lower, found_upper))
+	if (!tipc_sub_check_overlap(&seq, p->sr.lower, p->sr.upper))
 		return;
-
 	if (!must && !(filter & TIPC_SUB_PORTS))
 		return;
-	if (filter & TIPC_SUB_CLUSTER_SCOPE && scope == TIPC_NODE_SCOPE)
+	if (filter & TIPC_SUB_CLUSTER_SCOPE && p->scope == TIPC_NODE_SCOPE)
 		return;
-	if (filter & TIPC_SUB_NODE_SCOPE && scope != TIPC_NODE_SCOPE)
+	if (filter & TIPC_SUB_NODE_SCOPE && p->scope != TIPC_NODE_SCOPE)
 		return;
 	spin_lock(&sub->lock);
-	tipc_sub_send_event(sub, found_lower, found_upper,
-			    event, port, node);
+	tipc_sub_send_event(sub, p, event);
 	spin_unlock(&sub->lock);
 }
 
 static void tipc_sub_timeout(struct timer_list *t)
 {
 	struct tipc_subscription *sub = from_timer(sub, t, timer);
-	struct tipc_subscr *s = &sub->evt.s;
 
 	spin_lock(&sub->lock);
-	tipc_sub_send_event(sub, s->seq.lower, s->seq.upper,
-			    TIPC_SUBSCR_TIMEOUT, 0, 0);
+	tipc_sub_send_event(sub, NULL, TIPC_SUBSCR_TIMEOUT);
 	sub->inactive = true;
 	spin_unlock(&sub->lock);
 }
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index 3ded27391d54..56769ce46e4d 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2003-2017, Ericsson AB
  * Copyright (c) 2005-2007, 2012-2013, Wind River Systems
- * Copyright (c) 2020, Red Hat Inc
+ * Copyright (c) 2020-2021, Red Hat Inc
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -43,6 +43,7 @@
 #define TIPC_MAX_SUBSCR         65535
 #define TIPC_MAX_PUBL           65535
 
+struct publication;
 struct tipc_subscription;
 struct tipc_conn;
 
@@ -74,13 +75,9 @@ struct tipc_subscription *tipc_sub_subscribe(struct net *net,
 					     struct tipc_subscr *s,
 					     int conid);
 void tipc_sub_unsubscribe(struct tipc_subscription *sub);
-
-int tipc_sub_check_overlap(struct tipc_service_range *seq,
-			   u32 found_lower, u32 found_upper);
 void tipc_sub_report_overlap(struct tipc_subscription *sub,
-			     u32 found_lower, u32 found_upper,
-			     u32 event, u32 port, u32 node,
-			     u32 scope, int must);
+			     struct publication *p,
+			     u32 event, bool must);
 
 int __net_init tipc_topsrv_init_net(struct net *net);
 void __net_exit tipc_topsrv_exit_net(struct net *net);
-- 
cgit v1.2.3


From 429189acac534378cee113b16fe3f18effac1697 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 16 Mar 2021 22:06:22 -0400
Subject: tipc: add host-endian copy of user subscription to struct
 tipc_subscription

We reduce and localize the usage of the tipc_sub_xx() macros by adding a
corresponding member, with fields set in host-endian format, to struct
tipc_subscription.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Hoang Le <hoang.h.le@dektech.com.au>
Acked-by: Tung Nguyen <tung.q.nguyen@dektech.com.au>
Acked-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_table.c | 29 +++++++++++------------------
 net/tipc/subscr.c     | 45 ++++++++++++++++++++++++---------------------
 net/tipc/subscr.h     |  3 ++-
 3 files changed, 37 insertions(+), 40 deletions(-)

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index f648feae446f..98b8874ad2f7 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -416,17 +416,14 @@ static int tipc_publ_sort(void *priv, struct list_head *a,
 static void tipc_service_subscribe(struct tipc_service *service,
 				   struct tipc_subscription *sub)
 {
-	struct tipc_subscr *sb = &sub->evt.s;
 	struct publication *p, *first, *tmp;
 	struct list_head publ_list;
 	struct service_range *sr;
-	struct tipc_service_range r;
-	u32 filter;
+	u32 filter, lower, upper;
 
-	r.type = tipc_sub_read(sb, seq.type);
-	r.lower = tipc_sub_read(sb, seq.lower);
-	r.upper = tipc_sub_read(sb, seq.upper);
-	filter = tipc_sub_read(sb, filter);
+	filter = sub->s.filter;
+	lower = sub->s.seq.lower;
+	upper = sub->s.seq.upper;
 
 	tipc_sub_get(sub);
 	list_add(&sub->service_list, &service->subscriptions);
@@ -435,7 +432,7 @@ static void tipc_service_subscribe(struct tipc_service *service,
 		return;
 
 	INIT_LIST_HEAD(&publ_list);
-	service_range_foreach_match(sr, service, r.lower, r.upper) {
+	service_range_foreach_match(sr, service, lower, upper) {
 		first = NULL;
 		list_for_each_entry(p, &sr->all_publ, all_publ) {
 			if (filter & TIPC_SUB_PORTS)
@@ -826,14 +823,13 @@ void tipc_nametbl_withdraw(struct net *net, struct tipc_uaddr *ua,
 bool tipc_nametbl_subscribe(struct tipc_subscription *sub)
 {
 	struct tipc_net *tn = tipc_net(sub->net);
-	struct tipc_subscr *s = &sub->evt.s;
-	u32 type = tipc_sub_read(s, seq.type);
+	u32 type = sub->s.seq.type;
 	struct tipc_service *sc;
 	struct tipc_uaddr ua;
 	bool res = true;
 
 	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, type,
-		   tipc_sub_read(s, seq.lower), tipc_sub_read(s, seq.upper));
+		   sub->s.seq.lower, sub->s.seq.upper);
 	spin_lock_bh(&tn->nametbl_lock);
 	sc = tipc_service_find(sub->net, &ua);
 	if (!sc)
@@ -843,9 +839,8 @@ bool tipc_nametbl_subscribe(struct tipc_subscription *sub)
 		tipc_service_subscribe(sc, sub);
 		spin_unlock_bh(&sc->lock);
 	} else {
-		pr_warn("Failed to subscribe for {%u,%u,%u}\n", type,
-			tipc_sub_read(s, seq.lower),
-			tipc_sub_read(s, seq.upper));
+		pr_warn("Failed to subscribe for {%u,%u,%u}\n",
+			type, sub->s.seq.lower, sub->s.seq.upper);
 		res = false;
 	}
 	spin_unlock_bh(&tn->nametbl_lock);
@@ -859,13 +854,11 @@ bool tipc_nametbl_subscribe(struct tipc_subscription *sub)
 void tipc_nametbl_unsubscribe(struct tipc_subscription *sub)
 {
 	struct tipc_net *tn = tipc_net(sub->net);
-	struct tipc_subscr *s = &sub->evt.s;
-	u32 type = tipc_sub_read(s, seq.type);
 	struct tipc_service *sc;
 	struct tipc_uaddr ua;
 
-	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, type,
-		   tipc_sub_read(s, seq.lower), tipc_sub_read(s, seq.upper));
+	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE,
+		   sub->s.seq.type, sub->s.seq.lower, sub->s.seq.upper);
 	spin_lock_bh(&tn->nametbl_lock);
 	sc = tipc_service_find(sub->net, &ua);
 	if (!sc)
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 5f8dc0e7488f..8e00d739f03a 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -65,37 +65,32 @@ static void tipc_sub_send_event(struct tipc_subscription *sub,
 
 /**
  * tipc_sub_check_overlap - test for subscription overlap with the given values
- * @seq: tipc_name_seq to check
- * @found_lower: lower value to test
- * @found_upper: upper value to test
+ * @subscribed: the service range subscribed for
+ * @found: the service range we are checning for match
  *
  * Returns true if there is overlap, otherwise false.
  */
-bool tipc_sub_check_overlap(struct tipc_service_range *sr,
-			    u32 found_lower, u32 found_upper)
+static bool tipc_sub_check_overlap(struct tipc_service_range *subscribed,
+				   struct tipc_service_range *found)
 {
-	if (found_lower < sr->lower)
-		found_lower = sr->lower;
-	if (found_upper > sr->upper)
-		found_upper = sr->upper;
-	if (found_lower > found_upper)
-		return false;
-	return true;
+	u32 found_lower = found->lower;
+	u32 found_upper = found->upper;
+
+	if (found_lower < subscribed->lower)
+		found_lower = subscribed->lower;
+	if (found_upper > subscribed->upper)
+		found_upper = subscribed->upper;
+	return found_lower <= found_upper;
 }
 
 void tipc_sub_report_overlap(struct tipc_subscription *sub,
 			     struct publication *p,
 			     u32 event, bool must)
 {
-	struct tipc_subscr *s = &sub->evt.s;
-	u32 filter = tipc_sub_read(s, filter);
-	struct tipc_service_range seq;
-
-	seq.type = tipc_sub_read(s, seq.type);
-	seq.lower = tipc_sub_read(s, seq.lower);
-	seq.upper = tipc_sub_read(s, seq.upper);
+	struct tipc_service_range *sr = &sub->s.seq;
+	u32 filter = sub->s.filter;
 
-	if (!tipc_sub_check_overlap(&seq, p->sr.lower, p->sr.upper))
+	if (!tipc_sub_check_overlap(sr, &p->sr))
 		return;
 	if (!must && !(filter & TIPC_SUB_PORTS))
 		return;
@@ -137,12 +132,14 @@ struct tipc_subscription *tipc_sub_subscribe(struct net *net,
 					     struct tipc_subscr *s,
 					     int conid)
 {
+	u32 lower = tipc_sub_read(s, seq.lower);
+	u32 upper = tipc_sub_read(s, seq.upper);
 	u32 filter = tipc_sub_read(s, filter);
 	struct tipc_subscription *sub;
 	u32 timeout;
 
 	if ((filter & TIPC_SUB_PORTS && filter & TIPC_SUB_SERVICE) ||
-	    (tipc_sub_read(s, seq.lower) > tipc_sub_read(s, seq.upper))) {
+	    lower > upper) {
 		pr_warn("Subscription rejected, illegal request\n");
 		return NULL;
 	}
@@ -157,6 +154,12 @@ struct tipc_subscription *tipc_sub_subscribe(struct net *net,
 	sub->conid = conid;
 	sub->inactive = false;
 	memcpy(&sub->evt.s, s, sizeof(*s));
+	sub->s.seq.type = tipc_sub_read(s, seq.type);
+	sub->s.seq.lower = lower;
+	sub->s.seq.upper = upper;
+	sub->s.filter = filter;
+	sub->s.timeout = tipc_sub_read(s, timeout);
+	memcpy(sub->s.usr_handle, s->usr_handle, 8);
 	spin_lock_init(&sub->lock);
 	kref_init(&sub->kref);
 	if (!tipc_nametbl_subscribe(sub)) {
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index 56769ce46e4d..ddea6554ec46 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -60,12 +60,13 @@ struct tipc_conn;
  * @lock: serialize up/down and timer events
  */
 struct tipc_subscription {
+	struct tipc_subscr s;
+	struct tipc_event evt;
 	struct kref kref;
 	struct net *net;
 	struct timer_list timer;
 	struct list_head service_list;
 	struct list_head sub_list;
-	struct tipc_event evt;
 	int conid;
 	bool inactive;
 	spinlock_t lock;
-- 
cgit v1.2.3


From 5c8349503d007f9fa46e90b58f48b60830d6e47d Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Tue, 16 Mar 2021 22:06:23 -0400
Subject: tipc: remove some unnecessary warnings

We move some warning printouts to more strategic locations to avoid
duplicates and yield more detailed information about the reported
problem.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Hoang Le <hoang.h.le@dektech.com.au>
Acked-by: Tung Nguyen <tung.q.nguyen@dektech.com.au>
Acked-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_distr.c |  7 -------
 net/tipc/name_table.c | 30 ++++++++++++++++--------------
 2 files changed, 16 insertions(+), 21 deletions(-)

diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 9e2fab3569b5..bda902caa814 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -253,13 +253,6 @@ static void tipc_publ_purge(struct net *net, struct publication *p, u32 addr)
 	if (_p)
 		tipc_node_unsubscribe(net, &_p->binding_node, addr);
 	spin_unlock_bh(&tn->nametbl_lock);
-
-	if (_p != p) {
-		pr_err("Unable to remove publication from failed node\n"
-		       " (type=%u, lower=%u, node=%u, port=%u, key=%u)\n",
-		       p->sr.type, p->sr.lower, p->sk.node, p->sk.ref, p->key);
-	}
-
 	if (_p)
 		kfree_rcu(_p, rcu);
 }
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 98b8874ad2f7..6db9f9e7c0ac 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -337,6 +337,7 @@ static bool tipc_service_insert_publ(struct net *net,
 	u32 node = p->sk.node;
 	bool first = false;
 	bool res = false;
+	u32 key = p->key;
 
 	spin_lock_bh(&sc->lock);
 	sr = tipc_service_create_range(sc, p);
@@ -347,8 +348,12 @@ static bool tipc_service_insert_publ(struct net *net,
 
 	/* Return if the publication already exists */
 	list_for_each_entry(_p, &sr->all_publ, all_publ) {
-		if (_p->key == p->key && (!_p->sk.node || _p->sk.node == node))
+		if (_p->key == key && (!_p->sk.node || _p->sk.node == node)) {
+			pr_debug("Failed to bind duplicate %u,%u,%u/%u:%u/%u\n",
+				 p->sr.type, p->sr.lower, p->sr.upper,
+				 node, p->sk.ref, key);
 			goto exit;
+		}
 	}
 
 	if (in_own_node(net, p->sk.node))
@@ -475,17 +480,11 @@ struct publication *tipc_nametbl_insert_publ(struct net *net,
 {
 	struct tipc_service *sc;
 	struct publication *p;
-	u32 type = ua->sr.type;
 
 	p = tipc_publ_create(ua, sk, key);
 	if (!p)
 		return NULL;
 
-	if (ua->sr.lower > ua->sr.upper) {
-		pr_debug("Failed to bind illegal {%u,%u,%u} from node %u\n",
-			 type, ua->sr.lower, ua->sr.upper, sk->node);
-		return NULL;
-	}
 	sc = tipc_service_find(net, ua);
 	if (!sc)
 		sc = tipc_service_create(net, ua);
@@ -508,15 +507,15 @@ struct publication *tipc_nametbl_remove_publ(struct net *net,
 
 	sc = tipc_service_find(net, ua);
 	if (!sc)
-		return NULL;
+		goto exit;
 
 	spin_lock_bh(&sc->lock);
 	sr = tipc_service_find_range(sc, ua);
 	if (!sr)
-		goto exit;
+		goto unlock;
 	p = tipc_service_remove_publ(sr, sk, key);
 	if (!p)
-		goto exit;
+		goto unlock;
 
 	/* Notify any waiting subscriptions */
 	last = list_empty(&sr->all_publ);
@@ -535,8 +534,14 @@ struct publication *tipc_nametbl_remove_publ(struct net *net,
 		hlist_del_init_rcu(&sc->service_list);
 		kfree_rcu(sc, rcu);
 	}
-exit:
+unlock:
 	spin_unlock_bh(&sc->lock);
+exit:
+	if (!p) {
+		pr_err("Failed to remove unknown binding: %u,%u,%u/%u:%u/%u\n",
+		       ua->sr.type, ua->sr.lower, ua->sr.upper,
+		       sk->node, sk->ref, key);
+	}
 	return p;
 }
 
@@ -805,9 +810,6 @@ void tipc_nametbl_withdraw(struct net *net, struct tipc_uaddr *ua,
 		skb = tipc_named_withdraw(net, p);
 		list_del_init(&p->binding_sock);
 		kfree_rcu(p, rcu);
-	} else {
-		pr_err("Failed to remove local publication {%u,%u,%u}/%u\n",
-		       ua->sr.type, ua->sr.lower, ua->sr.upper, key);
 	}
 	rc_dests = nt->rc_dests;
 	spin_unlock_bh(&tn->nametbl_lock);
-- 
cgit v1.2.3


From 3600be5f58c18bee490e17c76f51d777a6410f42 Mon Sep 17 00:00:00 2001
From: Voon Weifeng <weifeng.voon@intel.com>
Date: Wed, 17 Mar 2021 12:09:04 +0800
Subject: net: stmmac: add timestamp correction to rid CDC sync error

According to Synopsis DesignWare EQoS Databook, the Clock Domain Cross
synchronization error is introduced tue to the clock(GMII Tx/Rx clock)
being different at the capture as compared to the PTP
clock(clk_ptp_ref_i) that is used to generate the time.

The CDC synchronization error is almost equal to 2 times the clock
period of the PTP clock(clk_ptp_ref_i).

On a Intel Tigerlake platform (with Marvell 88E2110 external PHY):

Before applying this patch (with CDC synchronization error):
ptp4l[64.044]: rms    8 max   13 freq +30877 +/-  11 delay   216 +/-   0
ptp4l[65.047]: rms   13 max   20 freq +30869 +/-  17 delay   213 +/-   0
ptp4l[66.050]: rms   12 max   20 freq +30857 +/-  11 delay   213 +/-   0
ptp4l[67.052]: rms   11 max   22 freq +30849 +/-  10 delay   215 +/-   0
ptp4l[68.055]: rms   10 max   16 freq +30853 +/-  13 delay   215 +/-   0
ptp4l[69.057]: rms    7 max   13 freq +30848 +/-   9 delay   216 +/-   0
ptp4l[70.060]: rms    8 max   13 freq +30846 +/-  10 delay   216 +/-   0
ptp4l[71.063]: rms    9 max   15 freq +30836 +/-   8 delay   218 +/-   0

After applying this patch (CDC syncrhonization error is taken care of):
ptp4l[61.516]: rms  773 max  824 freq +31526 +/- 158 delay   200 +/-   0
ptp4l[62.519]: rms  427 max  596 freq +31668 +/-  39 delay   198 +/-   0
ptp4l[63.522]: rms  113 max  206 freq +31482 +/-  57 delay   198 +/-   0
ptp4l[64.525]: rms   40 max   56 freq +31316 +/-  29 delay   200 +/-   0
ptp4l[65.528]: rms   47 max   56 freq +31255 +/-  17 delay   200 +/-   0
ptp4l[66.531]: rms   26 max   36 freq +31246 +/-   9 delay   200 +/-   0
ptp4l[67.534]: rms   12 max   18 freq +31254 +/-  12 delay   202 +/-   0
ptp4l[68.537]: rms    7 max   12 freq +31263 +/-  10 delay   202 +/-   0

Signed-off-by: Voon Weifeng <weifeng.voon@intel.com>
Signed-off-by: Wong Vee Khee <vee.khee.wong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index a10704d8e3c6..ddf54b8ad75d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -466,6 +466,7 @@ static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv,
 {
 	struct skb_shared_hwtstamps shhwtstamp;
 	bool found = false;
+	s64 adjust = 0;
 	u64 ns = 0;
 
 	if (!priv->hwts_tx_en)
@@ -484,6 +485,13 @@ static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv,
 	}
 
 	if (found) {
+		/* Correct the clk domain crossing(CDC) error */
+		if (priv->plat->has_gmac4 && priv->plat->clk_ptp_rate) {
+			adjust += -(2 * (NSEC_PER_SEC /
+					 priv->plat->clk_ptp_rate));
+			ns += adjust;
+		}
+
 		memset(&shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
 		shhwtstamp.hwtstamp = ns_to_ktime(ns);
 
@@ -507,6 +515,7 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
 {
 	struct skb_shared_hwtstamps *shhwtstamp = NULL;
 	struct dma_desc *desc = p;
+	u64 adjust = 0;
 	u64 ns = 0;
 
 	if (!priv->hwts_rx_en)
@@ -518,6 +527,13 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
 	/* Check if timestamp is available */
 	if (stmmac_get_rx_timestamp_status(priv, p, np, priv->adv_ts)) {
 		stmmac_get_timestamp(priv, desc, priv->adv_ts, &ns);
+
+		/* Correct the clk domain crossing(CDC) error */
+		if (priv->plat->has_gmac4 && priv->plat->clk_ptp_rate) {
+			adjust += 2 * (NSEC_PER_SEC / priv->plat->clk_ptp_rate);
+			ns -= adjust;
+		}
+
 		netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns);
 		shhwtstamp = skb_hwtstamps(skb);
 		memset(shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
-- 
cgit v1.2.3


From ad426d7d966b525b73ed5a1842dd830312bbba71 Mon Sep 17 00:00:00 2001
From: Álvaro Fernández Rojas <noltari@gmail.com>
Date: Wed, 17 Mar 2021 09:42:01 +0100
Subject: net: dsa: b53: relax is63xx() condition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BCM63xx switches are present on bcm63xx and bmips devices.

Signed-off-by: Álvaro Fernández Rojas <noltari@gmail.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/b53/b53_priv.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 8419bb7f4505..82700a5714c1 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -186,11 +186,7 @@ static inline int is531x5(struct b53_device *dev)
 
 static inline int is63xx(struct b53_device *dev)
 {
-#ifdef CONFIG_BCM63XX
 	return dev->chip_id == BCM63XX_DEVICE_ID;
-#else
-	return 0;
-#endif
 }
 
 static inline int is5301x(struct b53_device *dev)
-- 
cgit v1.2.3


From 73a2218cb268ff62f42420cf37acfb2a2d300959 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Wed, 17 Mar 2021 14:30:59 +0530
Subject: net: ppp: Mundane typo fixes in the file pppoe.c

s/procesing/processing/
s/comparations/comparisons/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp/pppoe.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index d7f50b835050..9dc7f4b93d51 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -25,7 +25,7 @@
  *		in pppoe_release.
  * 051000 :	Initialization cleanup.
  * 111100 :	Fix recvmsg.
- * 050101 :	Fix PADT procesing.
+ * 050101 :	Fix PADT processing.
  * 140501 :	Use pppoe_rcv_core to handle all backlog. (Alexey)
  * 170701 :	Do not lock_sock with rwlock held. (DaveM)
  *		Ignore discovery frames if user has socket
@@ -96,7 +96,7 @@ struct pppoe_net {
 	 * we could use _single_ hash table for all
 	 * nets by injecting net id into the hash but
 	 * it would increase hash chains and add
-	 * a few additional math comparations messy
+	 * a few additional math comparisons messy
 	 * as well, moreover in case of SMP less locking
 	 * controversy here
 	 */
-- 
cgit v1.2.3


From 964dbf186eaa84d409c359ddf09c827a3fbe8228 Mon Sep 17 00:00:00 2001
From: Álvaro Fernández Rojas <noltari@gmail.com>
Date: Wed, 17 Mar 2021 11:29:26 +0100
Subject: net: dsa: tag_brcm: add support for legacy tags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support for legacy Broadcom tags, which are similar to DSA_TAG_PROTO_BRCM.
These tags are used on BCM5325, BCM5365 and BCM63xx switches.

Signed-off-by: Álvaro Fernández Rojas <noltari@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h  |   2 +
 net/dsa/Kconfig    |   7 ++++
 net/dsa/tag_brcm.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 113 insertions(+), 3 deletions(-)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 83a933e563fe..dac303edd33d 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -49,10 +49,12 @@ struct phylink_link_state;
 #define DSA_TAG_PROTO_XRS700X_VALUE		19
 #define DSA_TAG_PROTO_OCELOT_8021Q_VALUE	20
 #define DSA_TAG_PROTO_SEVILLE_VALUE		21
+#define DSA_TAG_PROTO_BRCM_LEGACY_VALUE		22
 
 enum dsa_tag_protocol {
 	DSA_TAG_PROTO_NONE		= DSA_TAG_PROTO_NONE_VALUE,
 	DSA_TAG_PROTO_BRCM		= DSA_TAG_PROTO_BRCM_VALUE,
+	DSA_TAG_PROTO_BRCM_LEGACY	= DSA_TAG_PROTO_BRCM_LEGACY_VALUE,
 	DSA_TAG_PROTO_BRCM_PREPEND	= DSA_TAG_PROTO_BRCM_PREPEND_VALUE,
 	DSA_TAG_PROTO_DSA		= DSA_TAG_PROTO_DSA_VALUE,
 	DSA_TAG_PROTO_EDSA		= DSA_TAG_PROTO_EDSA_VALUE,
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 58b8fc82cd3c..aaf8a452fd5b 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -48,6 +48,13 @@ config NET_DSA_TAG_BRCM
 	  Say Y if you want to enable support for tagging frames for the
 	  Broadcom switches which place the tag after the MAC source address.
 
+config NET_DSA_TAG_BRCM_LEGACY
+	tristate "Tag driver for Broadcom legacy switches using in-frame headers"
+	select NET_DSA_TAG_BRCM_COMMON
+	help
+	  Say Y if you want to enable support for tagging frames for the
+	  Broadcom legacy switches which place the tag after the MAC source
+	  address.
 
 config NET_DSA_TAG_BRCM_PREPEND
 	tristate "Tag driver for Broadcom switches using prepended headers"
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index e2577a7dcbca..40e9f3098c8d 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -12,9 +12,26 @@
 
 #include "dsa_priv.h"
 
-/* This tag length is 4 bytes, older ones were 6 bytes, we do not
- * handle them
- */
+/* Legacy Broadcom tag (6 bytes) */
+#define BRCM_LEG_TAG_LEN	6
+
+/* Type fields */
+/* 1st byte in the tag */
+#define BRCM_LEG_TYPE_HI	0x88
+/* 2nd byte in the tag */
+#define BRCM_LEG_TYPE_LO	0x74
+
+/* Tag fields */
+/* 3rd byte in the tag */
+#define BRCM_LEG_UNICAST	(0 << 5)
+#define BRCM_LEG_MULTICAST	(1 << 5)
+#define BRCM_LEG_EGRESS		(2 << 5)
+#define BRCM_LEG_INGRESS	(3 << 5)
+
+/* 6th byte in the tag */
+#define BRCM_LEG_PORT_ID	(0xf)
+
+/* Newer Broadcom tag (4 bytes) */
 #define BRCM_TAG_LEN	4
 
 /* Tag is constructed and desconstructed using byte by byte access
@@ -195,6 +212,87 @@ DSA_TAG_DRIVER(brcm_netdev_ops);
 MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_BRCM);
 #endif
 
+#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_LEGACY)
+static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb,
+					 struct net_device *dev)
+{
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	u8 *brcm_tag;
+
+	/* The Ethernet switch we are interfaced with needs packets to be at
+	 * least 64 bytes (including FCS) otherwise they will be discarded when
+	 * they enter the switch port logic. When Broadcom tags are enabled, we
+	 * need to make sure that packets are at least 70 bytes
+	 * (including FCS and tag) because the length verification is done after
+	 * the Broadcom tag is stripped off the ingress packet.
+	 *
+	 * Let dsa_slave_xmit() free the SKB
+	 */
+	if (__skb_put_padto(skb, ETH_ZLEN + BRCM_LEG_TAG_LEN, false))
+		return NULL;
+
+	skb_push(skb, BRCM_LEG_TAG_LEN);
+
+	memmove(skb->data, skb->data + BRCM_LEG_TAG_LEN, 2 * ETH_ALEN);
+
+	brcm_tag = skb->data + 2 * ETH_ALEN;
+
+	/* Broadcom tag type */
+	brcm_tag[0] = BRCM_LEG_TYPE_HI;
+	brcm_tag[1] = BRCM_LEG_TYPE_LO;
+
+	/* Broadcom tag value */
+	brcm_tag[2] = BRCM_LEG_EGRESS;
+	brcm_tag[3] = 0;
+	brcm_tag[4] = 0;
+	brcm_tag[5] = dp->index & BRCM_LEG_PORT_ID;
+
+	return skb;
+}
+
+static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb,
+					struct net_device *dev,
+					struct packet_type *pt)
+{
+	int source_port;
+	u8 *brcm_tag;
+
+	if (unlikely(!pskb_may_pull(skb, BRCM_LEG_PORT_ID)))
+		return NULL;
+
+	brcm_tag = skb->data - 2;
+
+	source_port = brcm_tag[5] & BRCM_LEG_PORT_ID;
+
+	skb->dev = dsa_master_find_slave(dev, 0, source_port);
+	if (!skb->dev)
+		return NULL;
+
+	/* Remove Broadcom tag and update checksum */
+	skb_pull_rcsum(skb, BRCM_LEG_TAG_LEN);
+
+	skb->offload_fwd_mark = 1;
+
+	/* Move the Ethernet DA and SA */
+	memmove(skb->data - ETH_HLEN,
+		skb->data - ETH_HLEN - BRCM_LEG_TAG_LEN,
+		2 * ETH_ALEN);
+
+	return skb;
+}
+
+static const struct dsa_device_ops brcm_legacy_netdev_ops = {
+	.name = "brcm-legacy",
+	.proto = DSA_TAG_PROTO_BRCM_LEGACY,
+	.xmit = brcm_leg_tag_xmit,
+	.rcv = brcm_leg_tag_rcv,
+	.overhead = BRCM_LEG_TAG_LEN,
+};
+
+DSA_TAG_DRIVER(brcm_legacy_netdev_ops);
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_BRCM_LEGACY);
+#endif /* CONFIG_NET_DSA_TAG_BRCM_LEGACY */
+
 #if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_PREPEND)
 static struct sk_buff *brcm_tag_xmit_prepend(struct sk_buff *skb,
 					     struct net_device *dev)
@@ -227,6 +325,9 @@ static struct dsa_tag_driver *dsa_tag_driver_array[] =	{
 #if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM)
 	&DSA_TAG_DRIVER_NAME(brcm_netdev_ops),
 #endif
+#if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_LEGACY)
+	&DSA_TAG_DRIVER_NAME(brcm_legacy_netdev_ops),
+#endif
 #if IS_ENABLED(CONFIG_NET_DSA_TAG_BRCM_PREPEND)
 	&DSA_TAG_DRIVER_NAME(brcm_prepend_netdev_ops),
 #endif
-- 
cgit v1.2.3


From 46c5176c586c81306bf9e7024c13b95da775490f Mon Sep 17 00:00:00 2001
From: Álvaro Fernández Rojas <noltari@gmail.com>
Date: Wed, 17 Mar 2021 11:29:27 +0100
Subject: net: dsa: b53: support legacy tags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These tags are used on BCM5325, BCM5365 and BCM63xx switches.

Signed-off-by: Álvaro Fernández Rojas <noltari@gmail.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/b53/Kconfig      |  1 +
 drivers/net/dsa/b53/b53_common.c | 12 +++++++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/net/dsa/b53/Kconfig b/drivers/net/dsa/b53/Kconfig
index f9891a81c808..90b525160b71 100644
--- a/drivers/net/dsa/b53/Kconfig
+++ b/drivers/net/dsa/b53/Kconfig
@@ -3,6 +3,7 @@ menuconfig B53
 	tristate "Broadcom BCM53xx managed switch support"
 	depends on NET_DSA
 	select NET_DSA_TAG_BRCM
+	select NET_DSA_TAG_BRCM_LEGACY
 	select NET_DSA_TAG_BRCM_PREPEND
 	help
 	  This driver adds support for Broadcom managed switch chips. It supports
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 9bd51c2a51d2..8d5a82dedce8 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -2055,15 +2055,17 @@ enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds, int port,
 {
 	struct b53_device *dev = ds->priv;
 
-	/* Older models (5325, 5365) support a different tag format that we do
-	 * not support in net/dsa/tag_brcm.c yet.
-	 */
-	if (is5325(dev) || is5365(dev) ||
-	    !b53_can_enable_brcm_tags(ds, port, mprot)) {
+	if (!b53_can_enable_brcm_tags(ds, port, mprot)) {
 		dev->tag_protocol = DSA_TAG_PROTO_NONE;
 		goto out;
 	}
 
+	/* Older models require a different 6 byte tag */
+	if (is5325(dev) || is5365(dev) || is63xx(dev)) {
+		dev->tag_protocol = DSA_TAG_PROTO_BRCM_LEGACY;
+		goto out;
+	}
+
 	/* Broadcom BCM58xx chips have a flow accelerator on Port 8
 	 * which requires us to use the prepended Broadcom tag type
 	 */
-- 
cgit v1.2.3


From 1b35293b7afc5bdd6731502c6bba11a04a71512a Mon Sep 17 00:00:00 2001
From: Amit Cohen <amcohen@nvidia.com>
Date: Wed, 17 Mar 2021 12:35:23 +0200
Subject: mlxsw: reg: Add egr_et_set field to SPVID

SPVID.egr_et_set=1 means that when VLAN is pushed at ingress (for untagged
packets or for QinQ push mode) then the EtherType is decided at the egress
port.

The next patches will use this field for VxLAN devices (tunnel port) in
order to allow using dual VxLAN bridges (802.1d and 802.1ad at the same
time).

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 44f836246e33..626f5e5c8a93 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -842,6 +842,14 @@ MLXSW_ITEM32(reg, spvid, local_port, 0x00, 16, 8);
  */
 MLXSW_ITEM32(reg, spvid, sub_port, 0x00, 8, 8);
 
+/* reg_spvid_egr_et_set
+ * When VLAN is pushed at ingress (for untagged packets or for
+ * QinQ push mode) then the EtherType is decided at the egress port.
+ * Reserved when Spectrum-1.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, spvid, egr_et_set, 0x04, 24, 1);
+
 /* reg_spvid_et_vlan
  * EtherType used for when VLAN is pushed at ingress (for untagged
  * packets or for QinQ push mode).
@@ -849,6 +857,7 @@ MLXSW_ITEM32(reg, spvid, sub_port, 0x00, 8, 8);
  * 1: ether_type1
  * 2: ether_type2 - Reserved when Spectrum-1, supported by Spectrum-2
  * Ethertype IDs are configured by SVER.
+ * Reserved when egr_et_set = 1.
  * Access: RW
  */
 MLXSW_ITEM32(reg, spvid, et_vlan, 0x04, 16, 2);
-- 
cgit v1.2.3


From d8f4da73cea7438d95c6d6a54ec0a76dbccac02e Mon Sep 17 00:00:00 2001
From: Amit Cohen <amcohen@nvidia.com>
Date: Wed, 17 Mar 2021 12:35:24 +0200
Subject: mlxsw: reg: Add Switch Port Egress VLAN EtherType Register

SPEVET configures which EtherType to push at egress for packets incoming
through a local port for which 'SPVID.egr_et_set' is set.

The next patches will use SPEVET to configure EtherType 0x88A8 and
0x8100 for local ports member in 802.1ad and 802.1q bridges,
respectively. This allows using dual VxLAN bridges (802.1d and 802.1ad at
the same time).

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 36 +++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 626f5e5c8a93..d33c79ad1810 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -2088,6 +2088,41 @@ static inline void mlxsw_reg_spvc_pack(char *payload, u8 local_port, bool et1,
 	mlxsw_reg_spvc_et0_set(payload, et0);
 }
 
+/* SPEVET - Switch Port Egress VLAN EtherType
+ * ------------------------------------------
+ * The switch port egress VLAN EtherType configures which EtherType to push at
+ * egress for packets incoming through a local port for which 'SPVID.egr_et_set'
+ * is set.
+ */
+#define MLXSW_REG_SPEVET_ID 0x202A
+#define MLXSW_REG_SPEVET_LEN 0x08
+
+MLXSW_REG_DEFINE(spevet, MLXSW_REG_SPEVET_ID, MLXSW_REG_SPEVET_LEN);
+
+/* reg_spevet_local_port
+ * Egress Local port number.
+ * Not supported to CPU port.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, spevet, local_port, 0x00, 16, 8);
+
+/* reg_spevet_et_vlan
+ * Egress EtherType VLAN to push when SPVID.egr_et_set field set for the packet:
+ * 0: ether_type0 - (default)
+ * 1: ether_type1
+ * 2: ether_type2
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, spevet, et_vlan, 0x04, 16, 2);
+
+static inline void mlxsw_reg_spevet_pack(char *payload, u8 local_port,
+					 u8 et_vlan)
+{
+	MLXSW_REG_ZERO(spevet, payload);
+	mlxsw_reg_spevet_local_port_set(payload, local_port);
+	mlxsw_reg_spevet_et_vlan_set(payload, et_vlan);
+}
+
 /* CWTP - Congetion WRED ECN TClass Profile
  * ----------------------------------------
  * Configures the profiles for queues of egress port and traffic class
@@ -12026,6 +12061,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
 	MLXSW_REG(sfmr),
 	MLXSW_REG(spvmlr),
 	MLXSW_REG(spvc),
+	MLXSW_REG(spevet),
 	MLXSW_REG(cwtp),
 	MLXSW_REG(cwtpm),
 	MLXSW_REG(pgcr),
-- 
cgit v1.2.3


From 114a465d890a4fa2e3c0a49001cd47d3e804bf54 Mon Sep 17 00:00:00 2001
From: Amit Cohen <amcohen@nvidia.com>
Date: Wed, 17 Mar 2021 12:35:25 +0200
Subject: mlxsw: spectrum: Add mlxsw_sp_port_egress_ethtype_set()

A subsequent patch will cause decapsulated packets to have their EtherType
determined by the egress port. Add mlxsw_sp_port_egress_ethtype_set() which
will be called when a port joins an 802.1ad bridge, so that it will set an
802.1ad EtherType on decapsulated packets transmitted through it, instead
of the default 802.1q EtherType.

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 16 ++++++++++++++++
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h |  2 ++
 2 files changed, 18 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 7e40d45b55fe..9aba4fe2c852 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -402,6 +402,22 @@ int mlxsw_sp_ethtype_to_sver_type(u16 ethtype, u8 *p_sver_type)
 	return 0;
 }
 
+int mlxsw_sp_port_egress_ethtype_set(struct mlxsw_sp_port *mlxsw_sp_port,
+				     u16 ethtype)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char spevet_pl[MLXSW_REG_SPEVET_LEN];
+	u8 sver_type;
+	int err;
+
+	err = mlxsw_sp_ethtype_to_sver_type(ethtype, &sver_type);
+	if (err)
+		return err;
+
+	mlxsw_reg_spevet_pack(spevet_pl, mlxsw_sp_port->local_port, sver_type);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spevet), spevet_pl);
+}
+
 static int __mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port,
 				    u16 vid, u16 ethtype)
 {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 63cc8fa3fa62..180ffa41bf46 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -609,6 +609,8 @@ int mlxsw_sp_port_vp_mode_set(struct mlxsw_sp_port *mlxsw_sp_port, bool enable);
 int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid,
 				   bool learn_enable);
 int mlxsw_sp_ethtype_to_sver_type(u16 ethtype, u8 *p_sver_type);
+int mlxsw_sp_port_egress_ethtype_set(struct mlxsw_sp_port *mlxsw_sp_port,
+				     u16 ethtype);
 int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid,
 			   u16 ethtype);
 struct mlxsw_sp_port_vlan *
-- 
cgit v1.2.3


From 0f74fa5617305aa555db7cbc8c19b8eff4806efe Mon Sep 17 00:00:00 2001
From: Amit Cohen <amcohen@nvidia.com>
Date: Wed, 17 Mar 2021 12:35:26 +0200
Subject: mlxsw: Add struct mlxsw_sp_switchdev_ops per ASIC

A subsequent patch will need to implement different set of operations
when a port joins / leaves an 802.1ad bridge, based on the ASIC type.

Prepare for this change by allowing to initialize the bridge module
based on the ASIC type via 'struct mlxsw_sp_switchdev_ops'.

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c     |  3 +++
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h     |  6 ++++++
 .../ethernet/mellanox/mlxsw/spectrum_switchdev.c   | 22 ++++++++++++++++++++++
 3 files changed, 31 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 9aba4fe2c852..df4ec063adcb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2925,6 +2925,7 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 
+	mlxsw_sp->switchdev_ops = &mlxsw_sp1_switchdev_ops;
 	mlxsw_sp->kvdl_ops = &mlxsw_sp1_kvdl_ops;
 	mlxsw_sp->afa_ops = &mlxsw_sp1_act_afa_ops;
 	mlxsw_sp->afk_ops = &mlxsw_sp1_afk_ops;
@@ -2955,6 +2956,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 
+	mlxsw_sp->switchdev_ops = &mlxsw_sp2_switchdev_ops;
 	mlxsw_sp->kvdl_ops = &mlxsw_sp2_kvdl_ops;
 	mlxsw_sp->afa_ops = &mlxsw_sp2_act_afa_ops;
 	mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops;
@@ -2983,6 +2985,7 @@ static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core,
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 
+	mlxsw_sp->switchdev_ops = &mlxsw_sp2_switchdev_ops;
 	mlxsw_sp->kvdl_ops = &mlxsw_sp2_kvdl_ops;
 	mlxsw_sp->afa_ops = &mlxsw_sp2_act_afa_ops;
 	mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 180ffa41bf46..03655e418edb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -92,6 +92,11 @@ struct mlxsw_sp_rif_ops;
 extern const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[];
 extern const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[];
 
+struct mlxsw_sp_switchdev_ops;
+
+extern const struct mlxsw_sp_switchdev_ops mlxsw_sp1_switchdev_ops;
+extern const struct mlxsw_sp_switchdev_ops mlxsw_sp2_switchdev_ops;
+
 enum mlxsw_sp_fid_type {
 	MLXSW_SP_FID_TYPE_8021Q,
 	MLXSW_SP_FID_TYPE_8021D,
@@ -167,6 +172,7 @@ struct mlxsw_sp {
 	struct mlxsw_sp_counter_pool *counter_pool;
 	struct mlxsw_sp_span *span;
 	struct mlxsw_sp_trap *trap;
+	const struct mlxsw_sp_switchdev_ops *switchdev_ops;
 	const struct mlxsw_sp_kvdl_ops *kvdl_ops;
 	const struct mlxsw_afa_ops *afa_ops;
 	const struct mlxsw_afk_ops *afk_ops;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 23b7e8d6386b..51fdbdc8ac66 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -98,6 +98,10 @@ struct mlxsw_sp_bridge_ops {
 		       const struct mlxsw_sp_fid *fid);
 };
 
+struct mlxsw_sp_switchdev_ops {
+	void (*init)(struct mlxsw_sp *mlxsw_sp);
+};
+
 static int
 mlxsw_sp_bridge_port_fdb_flush(struct mlxsw_sp *mlxsw_sp,
 			       struct mlxsw_sp_bridge_port *bridge_port,
@@ -3535,6 +3539,22 @@ static void mlxsw_sp_fdb_fini(struct mlxsw_sp *mlxsw_sp)
 	unregister_switchdev_notifier(&mlxsw_sp_switchdev_notifier);
 }
 
+static void mlxsw_sp1_switchdev_init(struct mlxsw_sp *mlxsw_sp)
+{
+}
+
+const struct mlxsw_sp_switchdev_ops mlxsw_sp1_switchdev_ops = {
+	.init	= mlxsw_sp1_switchdev_init,
+};
+
+static void mlxsw_sp2_switchdev_init(struct mlxsw_sp *mlxsw_sp)
+{
+}
+
+const struct mlxsw_sp_switchdev_ops mlxsw_sp2_switchdev_ops = {
+	.init	= mlxsw_sp2_switchdev_init,
+};
+
 int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp)
 {
 	struct mlxsw_sp_bridge *bridge;
@@ -3551,6 +3571,8 @@ int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp)
 	bridge->bridge_8021d_ops = &mlxsw_sp_bridge_8021d_ops;
 	bridge->bridge_8021ad_ops = &mlxsw_sp_bridge_8021ad_ops;
 
+	mlxsw_sp->switchdev_ops->init(mlxsw_sp);
+
 	return mlxsw_sp_fdb_init(mlxsw_sp);
 }
 
-- 
cgit v1.2.3


From bf677bd25a9956bbeb9b4e13cb0c786c814d917e Mon Sep 17 00:00:00 2001
From: Amit Cohen <amcohen@nvidia.com>
Date: Wed, 17 Mar 2021 12:35:27 +0200
Subject: mlxsw: Allow 802.1d and .1ad VxLAN bridges to coexist on Spectrum>=2

Currently only one EtherType can be configured for pushing in tunnels
because EtherType is configured using SPVID.et_vlan for tunnel port.

This behavior is forbidden by comparing mlxsw_sp_nve_config struct for
each new tunnel, the struct contains 'ethertype' field which means that
only one EtherType is legal at any given time. Remove 'ethertype' field to
allow creating VxLAN devices with different bridges.

To allow using several types of VxLAN bridges at the same time, the
EtherType should be determined at the egress port. This behavior is
achieved by setting SPVID to decide which EtherType to push at egress and
for each local_port which is member in 802.1ad bridge, set SPEVET.et_vlan
to ether_type1 (i.e., 0x88A8).

Use switchdev_ops->init() to set different mlxsw_sp_bridge_ops for
different ASICs in order to be able to split the behavior when port joins /
leaves an 802.1ad bridge in different ASICs.

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h |  1 -
 .../ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c   | 15 ++-----
 .../ethernet/mellanox/mlxsw/spectrum_switchdev.c   | 52 +++++++++++++++++++++-
 3 files changed, 53 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h
index 2796d3659979..d8104fc6c900 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h
@@ -18,7 +18,6 @@ struct mlxsw_sp_nve_config {
 	u32 ul_tb_id;
 	enum mlxsw_sp_l3proto ul_proto;
 	union mlxsw_sp_l3addr ul_sip;
-	u16 ethertype;
 };
 
 struct mlxsw_sp_nve {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
index 3e2bb22e9ca6..b84bb4b65098 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
@@ -113,7 +113,6 @@ static void mlxsw_sp_nve_vxlan_config(const struct mlxsw_sp_nve *nve,
 	config->ul_proto = MLXSW_SP_L3_PROTO_IPV4;
 	config->ul_sip.addr4 = cfg->saddr.sin.sin_addr.s_addr;
 	config->udp_dport = cfg->dst_port;
-	config->ethertype = params->ethertype;
 }
 
 static int __mlxsw_sp_nve_parsing_set(struct mlxsw_sp *mlxsw_sp,
@@ -318,20 +317,14 @@ static bool mlxsw_sp2_nve_vxlan_learning_set(struct mlxsw_sp *mlxsw_sp,
 }
 
 static int
-mlxsw_sp2_nve_decap_ethertype_set(struct mlxsw_sp *mlxsw_sp, u16 ethertype)
+mlxsw_sp2_nve_decap_ethertype_set(struct mlxsw_sp *mlxsw_sp)
 {
 	char spvid_pl[MLXSW_REG_SPVID_LEN] = {};
-	u8 sver_type;
-	int err;
 
 	mlxsw_reg_spvid_tport_set(spvid_pl, true);
 	mlxsw_reg_spvid_local_port_set(spvid_pl,
 				       MLXSW_REG_TUNNEL_PORT_NVE);
-	err = mlxsw_sp_ethtype_to_sver_type(ethertype, &sver_type);
-	if (err)
-		return err;
-
-	mlxsw_reg_spvid_et_vlan_set(spvid_pl, sver_type);
+	mlxsw_reg_spvid_egr_et_set_set(spvid_pl, true);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvid), spvid_pl);
 }
 
@@ -367,7 +360,7 @@ mlxsw_sp2_nve_vxlan_config_set(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		goto err_spvtr_write;
 
-	err = mlxsw_sp2_nve_decap_ethertype_set(mlxsw_sp, config->ethertype);
+	err = mlxsw_sp2_nve_decap_ethertype_set(mlxsw_sp);
 	if (err)
 		goto err_decap_ethertype_set;
 
@@ -392,8 +385,6 @@ static void mlxsw_sp2_nve_vxlan_config_clear(struct mlxsw_sp *mlxsw_sp)
 	char spvtr_pl[MLXSW_REG_SPVTR_LEN];
 	char tngcr_pl[MLXSW_REG_TNGCR_LEN];
 
-	/* Set default EtherType */
-	mlxsw_sp2_nve_decap_ethertype_set(mlxsw_sp, ETH_P_8021Q);
 	mlxsw_reg_spvtr_pack(spvtr_pl, true, MLXSW_REG_TUNNEL_PORT_NVE,
 			     MLXSW_REG_SPVTR_IPVID_MODE_IEEE_COMPLIANT_PVID);
 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvtr), spvtr_pl);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 51fdbdc8ac66..c1f05c17557d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -2300,7 +2300,7 @@ mlxsw_sp_bridge_8021ad_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device,
 						     vid, ETH_P_8021AD, extack);
 }
 
-static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021ad_ops = {
+static const struct mlxsw_sp_bridge_ops mlxsw_sp1_bridge_8021ad_ops = {
 	.port_join	= mlxsw_sp_bridge_8021ad_port_join,
 	.port_leave	= mlxsw_sp_bridge_8021ad_port_leave,
 	.vxlan_join	= mlxsw_sp_bridge_8021ad_vxlan_join,
@@ -2309,6 +2309,53 @@ static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021ad_ops = {
 	.fid_vid	= mlxsw_sp_bridge_8021q_fid_vid,
 };
 
+static int
+mlxsw_sp2_bridge_8021ad_port_join(struct mlxsw_sp_bridge_device *bridge_device,
+				  struct mlxsw_sp_bridge_port *bridge_port,
+				  struct mlxsw_sp_port *mlxsw_sp_port,
+				  struct netlink_ext_ack *extack)
+{
+	int err;
+
+	/* The EtherType of decapsulated packets is determined at the egress
+	 * port to allow 802.1d and 802.1ad bridges with VXLAN devices to
+	 * co-exist.
+	 */
+	err = mlxsw_sp_port_egress_ethtype_set(mlxsw_sp_port, ETH_P_8021AD);
+	if (err)
+		return err;
+
+	err = mlxsw_sp_bridge_8021ad_port_join(bridge_device, bridge_port,
+					       mlxsw_sp_port, extack);
+	if (err)
+		goto err_bridge_8021ad_port_join;
+
+	return 0;
+
+err_bridge_8021ad_port_join:
+	mlxsw_sp_port_egress_ethtype_set(mlxsw_sp_port, ETH_P_8021Q);
+	return err;
+}
+
+static void
+mlxsw_sp2_bridge_8021ad_port_leave(struct mlxsw_sp_bridge_device *bridge_device,
+				   struct mlxsw_sp_bridge_port *bridge_port,
+				   struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	mlxsw_sp_bridge_8021ad_port_leave(bridge_device, bridge_port,
+					  mlxsw_sp_port);
+	mlxsw_sp_port_egress_ethtype_set(mlxsw_sp_port, ETH_P_8021Q);
+}
+
+static const struct mlxsw_sp_bridge_ops mlxsw_sp2_bridge_8021ad_ops = {
+	.port_join	= mlxsw_sp2_bridge_8021ad_port_join,
+	.port_leave	= mlxsw_sp2_bridge_8021ad_port_leave,
+	.vxlan_join	= mlxsw_sp_bridge_8021ad_vxlan_join,
+	.fid_get	= mlxsw_sp_bridge_8021q_fid_get,
+	.fid_lookup	= mlxsw_sp_bridge_8021q_fid_lookup,
+	.fid_vid	= mlxsw_sp_bridge_8021q_fid_vid,
+};
+
 int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port,
 			      struct net_device *brport_dev,
 			      struct net_device *br_dev,
@@ -3541,6 +3588,7 @@ static void mlxsw_sp_fdb_fini(struct mlxsw_sp *mlxsw_sp)
 
 static void mlxsw_sp1_switchdev_init(struct mlxsw_sp *mlxsw_sp)
 {
+	mlxsw_sp->bridge->bridge_8021ad_ops = &mlxsw_sp1_bridge_8021ad_ops;
 }
 
 const struct mlxsw_sp_switchdev_ops mlxsw_sp1_switchdev_ops = {
@@ -3549,6 +3597,7 @@ const struct mlxsw_sp_switchdev_ops mlxsw_sp1_switchdev_ops = {
 
 static void mlxsw_sp2_switchdev_init(struct mlxsw_sp *mlxsw_sp)
 {
+	mlxsw_sp->bridge->bridge_8021ad_ops = &mlxsw_sp2_bridge_8021ad_ops;
 }
 
 const struct mlxsw_sp_switchdev_ops mlxsw_sp2_switchdev_ops = {
@@ -3569,7 +3618,6 @@ int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp)
 
 	bridge->bridge_8021q_ops = &mlxsw_sp_bridge_8021q_ops;
 	bridge->bridge_8021d_ops = &mlxsw_sp_bridge_8021d_ops;
-	bridge->bridge_8021ad_ops = &mlxsw_sp_bridge_8021ad_ops;
 
 	mlxsw_sp->switchdev_ops->init(mlxsw_sp);
 
-- 
cgit v1.2.3


From 35f15ab378fa7e1eaa25798076a457523b5ace75 Mon Sep 17 00:00:00 2001
From: Amit Cohen <amcohen@nvidia.com>
Date: Wed, 17 Mar 2021 12:35:28 +0200
Subject: selftests: forwarding: Add test for dual VxLAN bridge

Configure VxLAN with an 802.1ad bridge and VxLAN with an 802.1d bridge
at the same time in same switch, verify that traffic passed as expected.

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/forwarding/dual_vxlan_bridge.sh  | 366 +++++++++++++++++++++
 1 file changed, 366 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh

diff --git a/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh b/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh
new file mode 100755
index 000000000000..5148d97a5df8
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh
@@ -0,0 +1,366 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +--------------------+                               +----------------------+
+# | H1 (vrf)           |                               |             H2 (vrf) |
+# |    + h1.10         |                               |  + h2.20             |
+# |    | 192.0.2.1/28  |                               |  | 192.0.2.2/28      |
+# |    |               |                               |  |                   |
+# |    + $h1           |                               |  + $h2               |
+# |    |               |                               |  |                   |
+# +----|---------------+                               +--|-------------------+
+#      |                                                  |
+# +----|--------------------------------------------------|--------------------+
+# | SW |                                                  |                    |
+# | +--|-------------------------------+ +----------------|------------------+ |
+# | |  + $swp1         BR1 (802.1ad)   | | BR2 (802.1d)   + $swp2            | |
+# | |    vid 100 pvid untagged         | |                |                  | |
+# | |                                  | |                + $swp2.20         | |
+# | |                                  | |                                   | |
+# | |  + vx100 (vxlan)                 | |  + vx200 (vxlan)                  | |
+# | |    local 192.0.2.17              | |    local 192.0.2.17               | |
+# | |    remote 192.0.2.34             | |    remote 192.0.2.50              | |
+# | |    id 1000 dstport $VXPORT       | |    id 2000 dstport $VXPORT        | |
+# | |    vid 100 pvid untagged         | |                                   | |
+# | +--------------------------------- + +-----------------------------------+ |
+# |                                                                            |
+# |  192.0.2.32/28 via 192.0.2.18                                              |
+# |  192.0.2.48/28 via 192.0.2.18                                              |
+# |                                                                            |
+# |    + $rp1                                                                  |
+# |    | 192.0.2.17/28                                                         |
+# +----|-----------------------------------------------------------------------+
+#      |
+# +----|--------------------------------------------------------+
+# |    |                                             VRP2 (vrf) |
+# |    + $rp2                                                   |
+# |      192.0.2.18/28                                          |
+# |                                                             |   (maybe) HW
+# =============================================================================
+# |                                                             |  (likely) SW
+# |    + v1 (veth)                             + v3 (veth)      |
+# |    | 192.0.2.33/28                         | 192.0.2.49/28  |
+# +----|---------------------------------------|----------------+
+#      |                                       |
+# +----|------------------------------+   +----|------------------------------+
+# |    + v2 (veth)        NS1 (netns) |   |    + v4 (veth)        NS2 (netns) |
+# |      192.0.2.34/28                |   |      192.0.2.50/28                |
+# |                                   |   |                                   |
+# |   192.0.2.16/28 via 192.0.2.33    |   |   192.0.2.16/28 via 192.0.2.49    |
+# |   192.0.2.50/32 via 192.0.2.33    |   |   192.0.2.34/32 via 192.0.2.49    |
+# |                                   |   |                                   |
+# | +-------------------------------+ |   | +-------------------------------+ |
+# | |                 BR3 (802.1ad) | |   | |                  BR3 (802.1d) | |
+# | |  + vx100 (vxlan)              | |   | |  + vx200 (vxlan)              | |
+# | |    local 192.0.2.34           | |   | |    local 192.0.2.50           | |
+# | |    remote 192.0.2.17          | |   | |    remote 192.0.2.17          | |
+# | |    remote 192.0.2.50          | |   | |    remote 192.0.2.34          | |
+# | |    id 1000 dstport $VXPORT    | |   | |    id 2000 dstport $VXPORT    | |
+# | |    vid 100 pvid untagged      | |   | |                               | |
+# | |                               | |   | |  + w1.20                      | |
+# | |                               | |   | |  |                            | |
+# | |  + w1 (veth)                  | |   | |  + w1 (veth)                  | |
+# | |  | vid 100 pvid untagged      | |   | |  |                            | |
+# | +--|----------------------------+ |   | +--|----------------------------+ |
+# |    |                              |   |    |                              |
+# | +--|----------------------------+ |   | +--|----------------------------+ |
+# | |  |                  VW2 (vrf) | |   | |  |                  VW2 (vrf) | |
+# | |  + w2 (veth)                  | |   | |  + w2 (veth)                  | |
+# | |  |                            | |   | |  |                            | |
+# | |  |                            | |   | |  |                            | |
+# | |  + w2.10                      | |   | |  + w2.20                      | |
+# | |    192.0.2.3/28               | |   | |    192.0.2.4/28               | |
+# | +-------------------------------+ |   | +-------------------------------+ |
+# +-----------------------------------+   +-----------------------------------+
+
+: ${VXPORT:=4789}
+export VXPORT
+
+: ${ALL_TESTS:="
+	ping_ipv4
+    "}
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	tc qdisc add dev $h1 clsact
+	vlan_create $h1 10 v$h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+	vlan_destroy $h1 10
+	tc qdisc del dev $h1 clsact
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	tc qdisc add dev $h2 clsact
+	vlan_create $h2 20 v$h2 192.0.2.2/28
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 20
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2
+}
+
+rp1_set_addr()
+{
+	ip address add dev $rp1 192.0.2.17/28
+
+	ip route add 192.0.2.32/28 nexthop via 192.0.2.18
+	ip route add 192.0.2.48/28 nexthop via 192.0.2.18
+}
+
+rp1_unset_addr()
+{
+	ip route del 192.0.2.48/28 nexthop via 192.0.2.18
+	ip route del 192.0.2.32/28 nexthop via 192.0.2.18
+
+	ip address del dev $rp1 192.0.2.17/28
+}
+
+switch_create()
+{
+	#### BR1 ####
+	ip link add name br1 type bridge vlan_filtering 1 \
+		vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+	# Make sure the bridge uses the MAC address of the local port and not
+	# that of the VxLAN's device.
+	ip link set dev br1 address $(mac_get $swp1)
+	ip link set dev br1 up
+
+	#### BR2 ####
+	ip link add name br2 type bridge vlan_filtering 0 mcast_snooping 0
+	# Make sure the bridge uses the MAC address of the local port and not
+	# that of the VxLAN's device.
+	ip link set dev br2 address $(mac_get $swp2)
+	ip link set dev br2 up
+
+	ip link set dev $rp1 up
+	rp1_set_addr
+
+	#### VX100 ####
+	ip link add name vx100 type vxlan id 1000 local 192.0.2.17 \
+		dstport "$VXPORT" nolearning noudpcsum tos inherit ttl 100
+	ip link set dev vx100 up
+
+	ip link set dev vx100 master br1
+	bridge vlan add vid 100 dev vx100 pvid untagged
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	bridge vlan add vid 100 dev $swp1 pvid untagged
+
+	#### VX200 ####
+	ip link add name vx200 type vxlan id 2000 local 192.0.2.17 \
+		dstport "$VXPORT" nolearning noudpcsum tos inherit ttl 100
+	ip link set dev vx200 up
+
+	ip link set dev vx200 master br2
+
+	ip link set dev $swp2 up
+	ip link add name $swp2.20 link $swp2 type vlan id 20
+	ip link set dev $swp2.20 master br2
+	ip link set dev $swp2.20 up
+
+	bridge fdb append dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self
+	bridge fdb append dev vx200 00:00:00:00:00:00 dst 192.0.2.50 self
+}
+
+switch_destroy()
+{
+	bridge fdb del dev vx200 00:00:00:00:00:00 dst 192.0.2.50 self
+	bridge fdb del dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self
+
+	ip link set dev vx200 nomaster
+	ip link set dev vx200 down
+	ip link del dev vx200
+
+	ip link del dev $swp2.20
+	ip link set dev $swp2 down
+	ip link set dev $swp2 nomaster
+
+	bridge vlan del vid 100 dev $swp1
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+
+	ip link set dev vx100 nomaster
+	ip link set dev vx100 down
+	ip link del dev vx100
+
+	rp1_unset_addr
+	ip link set dev $rp1 down
+
+	ip link set dev br2 down
+	ip link del dev br2
+
+	ip link set dev br1 down
+	ip link del dev br1
+}
+
+vrp2_create()
+{
+	simple_if_init $rp2 192.0.2.18/28
+	__simple_if_init v1 v$rp2 192.0.2.33/28
+	__simple_if_init v3 v$rp2 192.0.2.49/28
+	tc qdisc add dev v1 clsact
+}
+
+vrp2_destroy()
+{
+	tc qdisc del dev v1 clsact
+	__simple_if_fini v3 192.0.2.49/28
+	__simple_if_fini v1 192.0.2.33/28
+	simple_if_fini $rp2 192.0.2.18/28
+}
+
+ns_init_common()
+{
+	local in_if=$1; shift
+	local in_addr=$1; shift
+	local other_in_addr=$1; shift
+	local vxlan_name=$1; shift
+	local vxlan_id=$1; shift
+	local vlan_id=$1; shift
+	local host_addr=$1; shift
+	local nh_addr=$1; shift
+
+	ip link set dev $in_if up
+	ip address add dev $in_if $in_addr/28
+	tc qdisc add dev $in_if clsact
+
+	ip link add name br3 type bridge vlan_filtering 0
+	ip link set dev br3 up
+
+	ip link add name w1 type veth peer name w2
+
+	ip link set dev w1 master br3
+	ip link set dev w1 up
+
+	ip link add name $vxlan_name type vxlan id $vxlan_id local $in_addr \
+		dstport "$VXPORT"
+	ip link set dev $vxlan_name up
+	bridge fdb append dev $vxlan_name 00:00:00:00:00:00 dst 192.0.2.17 self
+	bridge fdb append dev $vxlan_name 00:00:00:00:00:00 dst $other_in_addr self
+
+	ip link set dev $vxlan_name master br3
+	tc qdisc add dev $vxlan_name clsact
+
+	simple_if_init w2
+	vlan_create w2 $vlan_id vw2 $host_addr/28
+
+	ip route add 192.0.2.16/28 nexthop via $nh_addr
+	ip route add $other_in_addr/32 nexthop via $nh_addr
+}
+export -f ns_init_common
+
+ns1_create()
+{
+	ip netns add ns1
+	ip link set dev v2 netns ns1
+	in_ns ns1 \
+	      ns_init_common v2 192.0.2.34 192.0.2.50 vx100 1000 10 192.0.2.3 \
+	      192.0.2.33
+
+	in_ns ns1 bridge vlan add vid 100 dev vx100 pvid untagged
+}
+
+ns1_destroy()
+{
+	ip netns exec ns1 ip link set dev v2 netns 1
+	ip netns del ns1
+}
+
+ns2_create()
+{
+	ip netns add ns2
+	ip link set dev v4 netns ns2
+	in_ns ns2 \
+	      ns_init_common v4 192.0.2.50 192.0.2.34 vx200 2000 20 192.0.2.4 \
+	      192.0.2.49
+
+	in_ns ns2 ip link add name w1.20 link w1 type vlan id 20
+	in_ns ns2 ip link set dev w1.20 master br3
+	in_ns ns2 ip link set dev w1.20 up
+}
+
+ns2_destroy()
+{
+	ip netns exec ns2 ip link set dev v4 netns 1
+	ip netns del ns2
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp1=${NETIFS[p5]}
+	rp2=${NETIFS[p6]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	switch_create
+
+	ip link add name v1 type veth peer name v2
+	ip link add name v3 type veth peer name v4
+	vrp2_create
+	ns1_create
+	ns2_create
+
+	r1_mac=$(in_ns ns1 mac_get w2)
+	r2_mac=$(in_ns ns2 mac_get w2)
+	h2_mac=$(mac_get $h2)
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ns2_destroy
+	ns1_destroy
+	vrp2_destroy
+	ip link del dev v3
+	ip link del dev v1
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.3 ": local->remote 1 through VxLAN with an 802.1ad bridge"
+	ping_test $h2 192.0.2.4 ": local->remote 2 through VxLAN with an 802.1d bridge"
+}
+
+test_all()
+{
+	echo "Running tests with UDP port $VXPORT"
+	tests_run
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+test_all
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3


From 1724c97d2f9ddcfe2e372f9b02d6efde15b885b0 Mon Sep 17 00:00:00 2001
From: Amit Cohen <amcohen@nvidia.com>
Date: Wed, 17 Mar 2021 12:35:29 +0200
Subject: selftests: mlxsw: spectrum-2: Remove q_in_vni_veto test

q_in_vni_veto.sh is not needed anymore because VxLAN with an 802.1ad
bridge and VxLAN with an 802.1d bridge can coexist.

Remove the test.

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh  | 77 ----------------------
 1 file changed, 77 deletions(-)
 delete mode 100755 tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh

diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh
deleted file mode 100755
index 0231205a7147..000000000000
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-lib_dir=$(dirname $0)/../../../../net/forwarding
-
-VXPORT=4789
-
-ALL_TESTS="
-	create_dot1d_and_dot1ad_vxlans
-"
-NUM_NETIFS=2
-source $lib_dir/lib.sh
-
-setup_prepare()
-{
-	swp1=${NETIFS[p1]}
-	swp2=${NETIFS[p2]}
-
-	ip link set dev $swp1 up
-	ip link set dev $swp2 up
-}
-
-cleanup()
-{
-	pre_cleanup
-
-	ip link set dev $swp2 down
-	ip link set dev $swp1 down
-}
-
-create_dot1d_and_dot1ad_vxlans()
-{
-	RET=0
-
-	ip link add dev br0 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
-		vlan_default_pvid 0 mcast_snooping 0
-	ip link set dev br0 up
-
-	ip link add name vx100 type vxlan id 1000 local 192.0.2.17 dstport \
-		"$VXPORT" nolearning noudpcsum tos inherit ttl 100
-	ip link set dev vx100 up
-
-	ip link set dev $swp1 master br0
-	ip link set dev vx100 master br0
-	bridge vlan add vid 100 dev vx100 pvid untagged
-
-	ip link add dev br1 type bridge vlan_filtering 0 mcast_snooping 0
-	ip link set dev br1 up
-
-	ip link add name vx200 type vxlan id 2000 local 192.0.2.17 dstport \
-		"$VXPORT" nolearning noudpcsum tos inherit ttl 100
-	ip link set dev vx200 up
-
-	ip link set dev $swp2 master br1
-	ip link set dev vx200 master br1 2>/dev/null
-	check_fail $? "802.1d and 802.1ad VxLANs at the same time not rejected"
-
-	ip link set dev vx200 master br1 2>&1 >/dev/null \
-		| grep -q mlxsw_spectrum
-	check_err $? "802.1d and 802.1ad VxLANs at the same time rejected without extack"
-
-	log_test "create 802.1d and 802.1ad VxLANs"
-
-	ip link del dev vx200
-	ip link del dev br1
-	ip link del dev vx100
-	ip link del dev br0
-}
-
-trap cleanup EXIT
-
-setup_prepare
-setup_wait
-
-tests_run
-
-exit $EXIT_STATUS
-- 
cgit v1.2.3


From 63f925dc55b33e8c4edd0de5e83904cd179d3095 Mon Sep 17 00:00:00 2001
From: Naveen Mamindlapalli <naveenm@marvell.com>
Date: Wed, 17 Mar 2021 19:05:34 +0530
Subject: octeontx2-af: refactor function npc_install_flow for default entry

This patch refactors npc_install_flow function to install AF
installed default MCAM entries similar to other MCAM entries
installed by PF/VF. As a result the code would be more readable
and easy to maintain. Modified npc_verify_entry and npc_verify_channel
to properly check MCAM rules installed by AF.

Signed-off-by: Naveen Mamindlapalli <naveenm@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/npc.h    |  5 +++
 drivers/net/ethernet/marvell/octeontx2/af/rvu.h    | 14 +++++--
 .../ethernet/marvell/octeontx2/af/rvu_debugfs.c    |  2 +-
 .../net/ethernet/marvell/octeontx2/af/rvu_npc.c    | 47 +++++++---------------
 .../net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c | 45 +++++++++------------
 5 files changed, 51 insertions(+), 62 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
index 3c640f6aba92..0675c55dcc7a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
@@ -420,6 +420,11 @@ struct nix_tx_action {
 #define TX_VTAG1_LID_MASK		GENMASK_ULL(42, 40)
 #define TX_VTAG1_RELPTR_MASK		GENMASK_ULL(39, 32)
 
+/* NPC MCAM reserved entry index per nixlf */
+#define NIXLF_UCAST_ENTRY	0
+#define NIXLF_BCAST_ENTRY	1
+#define NIXLF_PROMISC_ENTRY	2
+
 struct npc_mcam_kex {
 	/* MKEX Profle Header */
 	u64 mkex_sign; /* "mcam-kex-profile" (8 bytes/ASCII characters) */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index fa6e46e36ae4..e6517a697de7 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -548,6 +548,12 @@ static inline int is_afvf(u16 pcifunc)
 	return !(pcifunc & ~RVU_PFVF_FUNC_MASK);
 }
 
+/* check if PF_FUNC is AF */
+static inline bool is_pffunc_af(u16 pcifunc)
+{
+	return !pcifunc;
+}
+
 static inline bool is_rvu_fwdata_valid(struct rvu *rvu)
 {
 	return (rvu->fwdata->header_magic == RVU_FWDATA_HEADER_MAGIC) &&
@@ -665,9 +671,6 @@ int rvu_npc_get_tx_nibble_cfg(struct rvu *rvu, u64 nibble_ena);
 int npc_mcam_verify_channel(struct rvu *rvu, u16 pcifunc, u8 intf, u16 channel);
 int npc_flow_steering_init(struct rvu *rvu, int blkaddr);
 const char *npc_get_field_name(u8 hdr);
-bool rvu_npc_write_default_rule(struct rvu *rvu, int blkaddr, int nixlf,
-				u16 pcifunc, u8 intf, struct mcam_entry *entry,
-				int *entry_index);
 int npc_get_bank(struct npc_mcam *mcam, int index);
 void npc_mcam_enable_flows(struct rvu *rvu, u16 target);
 void npc_mcam_disable_flows(struct rvu *rvu, u16 target);
@@ -679,6 +682,11 @@ void npc_read_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam,
 bool is_mac_feature_supported(struct rvu *rvu, int pf, int feature);
 u32  rvu_cgx_get_fifolen(struct rvu *rvu);
 
+int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, u16 pcifunc, int nixlf,
+			     int type);
+bool is_mcam_entry_enabled(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr,
+			   int index);
+
 /* CPT APIs */
 int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int lf, int slot);
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
index aa2ca8780b9c..c8836d48a614 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
@@ -2145,7 +2145,7 @@ static int rvu_dbg_npc_mcam_show_rules(struct seq_file *s, void *unused)
 		seq_printf(s, "\tmcam entry: %d\n", iter->entry);
 
 		rvu_dbg_npc_mcam_show_flows(s, iter);
-		if (iter->intf == NIX_INTF_RX) {
+		if (is_npc_intf_rx(iter->intf)) {
 			target = iter->rx_action.pf_func;
 			pf = (target >> RVU_PFVF_PF_SHIFT) & RVU_PFVF_PF_MASK;
 			seq_printf(s, "\tForward to: PF%d ", pf);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 04bb0803a5c5..80b90a48df4f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -22,10 +22,6 @@
 #define RSVD_MCAM_ENTRIES_PER_PF	2 /* Bcast & Promisc */
 #define RSVD_MCAM_ENTRIES_PER_NIXLF	1 /* Ucast for LFs */
 
-#define NIXLF_UCAST_ENTRY	0
-#define NIXLF_BCAST_ENTRY	1
-#define NIXLF_PROMISC_ENTRY	2
-
 #define NPC_PARSE_RESULT_DMAC_OFFSET	8
 #define NPC_HW_TSTAMP_OFFSET		8
 #define NPC_KEX_CHAN_MASK		0xFFFULL
@@ -96,6 +92,10 @@ int npc_mcam_verify_channel(struct rvu *rvu, u16 pcifunc, u8 intf, u16 channel)
 	if (is_npc_intf_tx(intf))
 		return 0;
 
+	/* return in case of AF installed rules */
+	if (is_pffunc_af(pcifunc))
+		return 0;
+
 	if (is_afvf(pcifunc)) {
 		end = rvu_get_num_lbk_chans();
 		if (end < 0)
@@ -196,8 +196,8 @@ static int npc_get_ucast_mcam_index(struct npc_mcam *mcam, u16 pcifunc,
 	return mcam->nixlf_offset + (max + nixlf) * RSVD_MCAM_ENTRIES_PER_NIXLF;
 }
 
-static int npc_get_nixlf_mcam_index(struct npc_mcam *mcam,
-				    u16 pcifunc, int nixlf, int type)
+int npc_get_nixlf_mcam_index(struct npc_mcam *mcam,
+			     u16 pcifunc, int nixlf, int type)
 {
 	int pf = rvu_get_pf(pcifunc);
 	int index;
@@ -230,8 +230,8 @@ int npc_get_bank(struct npc_mcam *mcam, int index)
 	return bank;
 }
 
-static bool is_mcam_entry_enabled(struct rvu *rvu, struct npc_mcam *mcam,
-				  int blkaddr, int index)
+bool is_mcam_entry_enabled(struct rvu *rvu, struct npc_mcam *mcam,
+			   int blkaddr, int index)
 {
 	int bank = npc_get_bank(mcam, index);
 	u64 cfg;
@@ -1674,6 +1674,9 @@ void rvu_npc_get_mcam_counter_alloc_info(struct rvu *rvu, u16 pcifunc,
 static int npc_mcam_verify_entry(struct npc_mcam *mcam,
 				 u16 pcifunc, int entry)
 {
+	/* verify AF installed entries */
+	if (is_pffunc_af(pcifunc))
+		return 0;
 	/* Verify if entry is valid and if it is indeed
 	 * allocated to the requesting PFFUNC.
 	 */
@@ -2268,6 +2271,10 @@ int rvu_mbox_handler_npc_mcam_write_entry(struct rvu *rvu,
 		goto exit;
 	}
 
+	/* For AF installed rules, the nix_intf should be set to target NIX */
+	if (is_pffunc_af(req->hdr.pcifunc))
+		nix_intf = req->intf;
+
 	npc_config_mcam_entry(rvu, mcam, blkaddr, req->entry, nix_intf,
 			      &req->entry_data, req->enable_entry);
 
@@ -2730,30 +2737,6 @@ int rvu_mbox_handler_npc_get_kex_cfg(struct rvu *rvu, struct msg_req *req,
 	return 0;
 }
 
-bool rvu_npc_write_default_rule(struct rvu *rvu, int blkaddr, int nixlf,
-				u16 pcifunc, u8 intf, struct mcam_entry *entry,
-				int *index)
-{
-	struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
-	struct npc_mcam *mcam = &rvu->hw->mcam;
-	bool enable;
-	u8 nix_intf;
-
-	if (is_npc_intf_tx(intf))
-		nix_intf = pfvf->nix_tx_intf;
-	else
-		nix_intf = pfvf->nix_rx_intf;
-
-	*index = npc_get_nixlf_mcam_index(mcam, pcifunc,
-					  nixlf, NIXLF_UCAST_ENTRY);
-	/* dont force enable unicast entry  */
-	enable = is_mcam_entry_enabled(rvu, mcam, blkaddr, *index);
-	npc_config_mcam_entry(rvu, mcam, blkaddr, *index, nix_intf,
-			      entry, enable);
-
-	return enable;
-}
-
 int rvu_mbox_handler_npc_read_base_steer_rule(struct rvu *rvu,
 					      struct msg_req *req,
 					      struct npc_mcam_read_base_rule_rsp *rsp)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
index 4ba9d54ce4e3..9e710a534796 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
@@ -998,33 +998,21 @@ static int npc_install_flow(struct rvu *rvu, int blkaddr, u16 target,
 	if (is_npc_intf_tx(req->intf))
 		goto find_rule;
 
-	if (def_ucast_rule)
+	if (req->default_rule) {
+		entry_index = npc_get_nixlf_mcam_index(mcam, target, nixlf,
+						       NIXLF_UCAST_ENTRY);
+		enable = is_mcam_entry_enabled(rvu, mcam, blkaddr, entry_index);
+	}
+
+	/* update mcam entry with default unicast rule attributes */
+	if (def_ucast_rule && (msg_from_vf || (req->default_rule && req->append))) {
 		missing_features = (def_ucast_rule->features ^ features) &
 					def_ucast_rule->features;
-
-	if (req->default_rule && req->append) {
-		/* add to default rule */
 		if (missing_features)
 			npc_update_flow(rvu, entry, missing_features,
 					&def_ucast_rule->packet,
 					&def_ucast_rule->mask,
 					&dummy, req->intf);
-		enable = rvu_npc_write_default_rule(rvu, blkaddr,
-						    nixlf, target,
-						    pfvf->nix_rx_intf, entry,
-						    &entry_index);
-		installed_features = req->features | missing_features;
-	} else if (req->default_rule && !req->append) {
-		/* overwrite default rule */
-		enable = rvu_npc_write_default_rule(rvu, blkaddr,
-						    nixlf, target,
-						    pfvf->nix_rx_intf, entry,
-						    &entry_index);
-	} else if (msg_from_vf) {
-		/* normal rule - include default rule also to it for VF */
-		npc_update_flow(rvu, entry, missing_features,
-				&def_ucast_rule->packet, &def_ucast_rule->mask,
-				&dummy, req->intf);
 		installed_features = req->features | missing_features;
 	}
 
@@ -1036,12 +1024,9 @@ find_rule:
 			return -ENOMEM;
 		new = true;
 	}
-	/* no counter for default rule */
-	if (req->default_rule)
-		goto update_rule;
 
 	/* allocate new counter if rule has no counter */
-	if (req->set_cntr && !rule->has_cntr)
+	if (!req->default_rule && req->set_cntr && !rule->has_cntr)
 		rvu_mcam_add_counter_to_rule(rvu, owner, rule, rsp);
 
 	/* if user wants to delete an existing counter for a rule then
@@ -1051,7 +1036,14 @@ find_rule:
 		rvu_mcam_remove_counter_from_rule(rvu, owner, rule);
 
 	write_req.hdr.pcifunc = owner;
-	write_req.entry = req->entry;
+
+	/* AF owns the default rules so change the owner just to relax
+	 * the checks in rvu_mbox_handler_npc_mcam_write_entry
+	 */
+	if (req->default_rule)
+		write_req.hdr.pcifunc = 0;
+
+	write_req.entry = entry_index;
 	write_req.intf = req->intf;
 	write_req.enable_entry = (u8)enable;
 	/* if counter is available then clear and use it */
@@ -1069,7 +1061,7 @@ find_rule:
 			kfree(rule);
 		return err;
 	}
-update_rule:
+	/* update rule */
 	memcpy(&rule->packet, &dummy.packet, sizeof(rule->packet));
 	memcpy(&rule->mask, &dummy.mask, sizeof(rule->mask));
 	rule->entry = entry_index;
@@ -1278,6 +1270,7 @@ static int npc_update_dmac_value(struct rvu *rvu, int npcblkaddr,
 
 	write_req.hdr.pcifunc = rule->owner;
 	write_req.entry = rule->entry;
+	write_req.intf = pfvf->nix_rx_intf;
 
 	mutex_unlock(&mcam->lock);
 	err = rvu_mbox_handler_npc_mcam_write_entry(rvu, &write_req, &rsp);
-- 
cgit v1.2.3


From d450a23515e00b3c0701f4ae7f3e47dc71aa7bca Mon Sep 17 00:00:00 2001
From: "Nalla, Pradeep" <pnalla@marvell.com>
Date: Wed, 17 Mar 2021 19:05:35 +0530
Subject: octeontx2-af: Add support for multi channel in NIX promisc entry

This patch adds support for multi channel NIX promisc entry. Packets sent
on all those channels by the host should be received by the interface to
which those channels belong. Channel count, if greater than 1, should be
power of 2 as only one promisc entry is available for the interface. Key
mask is modified such that incoming packets from channel base to channel
count are directed to the same pci function.

Signed-off-by: Nalla, Pradeep <pnalla@marvell.com>
Signed-off-by: Naveen Mamindlapalli <naveenm@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/rvu.h     |  3 ++-
 drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c |  6 ++++--
 drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c | 14 +++++++++++++-
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index e6517a697de7..baaba01bd8c5 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -646,7 +646,8 @@ int npc_config_ts_kpuaction(struct rvu *rvu, int pf, u16 pcifunc, bool en);
 void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc,
 				 int nixlf, u64 chan, u8 *mac_addr);
 void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
-				   int nixlf, u64 chan, bool allmulti);
+				   int nixlf, u64 chan, u8 chan_cnt,
+				   bool allmulti);
 void rvu_npc_disable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf);
 void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf);
 void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc,
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index d3000194e2d3..f2a1c4235f74 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -273,7 +273,8 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf)
 		pfvf->rx_chan_cnt = 1;
 		pfvf->tx_chan_cnt = 1;
 		rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf,
-					      pfvf->rx_chan_base, false);
+					      pfvf->rx_chan_base,
+					      pfvf->rx_chan_cnt, false);
 		break;
 	}
 
@@ -3088,7 +3089,8 @@ int rvu_mbox_handler_nix_set_rx_mode(struct rvu *rvu, struct nix_rx_mode *req,
 		rvu_npc_disable_promisc_entry(rvu, pcifunc, nixlf);
 	else
 		rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf,
-					      pfvf->rx_chan_base, allmulti);
+					      pfvf->rx_chan_base,
+					      pfvf->rx_chan_cnt, allmulti);
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 80b90a48df4f..73b430ce52d0 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -647,13 +647,15 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc,
 }
 
 void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
-				   int nixlf, u64 chan, bool allmulti)
+				   int nixlf, u64 chan, u8 chan_cnt,
+				   bool allmulti)
 {
 	struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
 	struct npc_mcam *mcam = &rvu->hw->mcam;
 	int blkaddr, ucast_idx, index, kwi;
 	struct mcam_entry entry = { {0} };
 	struct nix_rx_action action = { };
+	u64 relaxed_mask;
 
 	/* Only PF or AF VF can add a promiscuous entry */
 	if ((pcifunc & RVU_PFVF_FUNC_MASK) && !is_afvf(pcifunc))
@@ -669,6 +671,16 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
 	entry.kw[0] = chan;
 	entry.kw_mask[0] = 0xFFFULL;
 
+	if (chan_cnt > 1) {
+		if (!is_power_of_2(chan_cnt)) {
+			dev_err(rvu->dev, "channel count more than 1, must be power of 2\n");
+			return;
+		}
+		relaxed_mask = GENMASK_ULL(BITS_PER_LONG_LONG - 1,
+					   ilog2(chan_cnt));
+		entry.kw_mask[0] &= relaxed_mask;
+	}
+
 	if (allmulti) {
 		kwi = NPC_KEXOF_DMAC / sizeof(u64);
 		entry.kw[kwi] = BIT_ULL(40); /* LSB bit of 1st byte in DMAC */
-- 
cgit v1.2.3


From 56bcef528bd87d66ddf81f0fb1b8837cce1b2667 Mon Sep 17 00:00:00 2001
From: Naveen Mamindlapalli <naveenm@marvell.com>
Date: Wed, 17 Mar 2021 19:05:36 +0530
Subject: octeontx2-af: Use npc_install_flow API for promisc and broadcast
 entries

Use npc_install_flow mailbox API for installing the default promisc
and broadcast match entries. Earlier these entries were installed
using low level npc_config_mcam_entry API, which does not store these
rules and is not available when the rules are dumped using debugfs.
Added chan_mask field to npc_install_flow_req to calculate channel
mask when channel count is greater than 1 and configure the channel
mask in entry kw_mask.

Signed-off-by: Naveen Mamindlapalli <naveenm@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/mbox.h   |   1 +
 .../net/ethernet/marvell/octeontx2/af/rvu_npc.c    | 107 +++++++++++----------
 .../net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c |  10 +-
 3 files changed, 67 insertions(+), 51 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index ea456099b33c..1dca2ca115c7 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -1141,6 +1141,7 @@ struct npc_install_flow_req {
 	u64 features;
 	u16 entry;
 	u16 channel;
+	u16 chan_mask;
 	u8 intf;
 	u8 set_cntr; /* If counter is available set counter for this entry ? */
 	u8 default_rule;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 73b430ce52d0..4c3f6432b671 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -651,10 +651,12 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
 				   bool allmulti)
 {
 	struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
+	struct npc_install_flow_req req = { 0 };
+	struct npc_install_flow_rsp rsp = { 0 };
 	struct npc_mcam *mcam = &rvu->hw->mcam;
-	int blkaddr, ucast_idx, index, kwi;
-	struct mcam_entry entry = { {0} };
-	struct nix_rx_action action = { };
+	int blkaddr, ucast_idx, index;
+	u8 mac_addr[ETH_ALEN] = { 0 };
+	struct nix_rx_action action;
 	u64 relaxed_mask;
 
 	/* Only PF or AF VF can add a promiscuous entry */
@@ -665,34 +667,15 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
 	if (blkaddr < 0)
 		return;
 
+	*(u64 *)&action = 0x00;
 	index = npc_get_nixlf_mcam_index(mcam, pcifunc,
 					 nixlf, NIXLF_PROMISC_ENTRY);
 
-	entry.kw[0] = chan;
-	entry.kw_mask[0] = 0xFFFULL;
-
-	if (chan_cnt > 1) {
-		if (!is_power_of_2(chan_cnt)) {
-			dev_err(rvu->dev, "channel count more than 1, must be power of 2\n");
-			return;
-		}
-		relaxed_mask = GENMASK_ULL(BITS_PER_LONG_LONG - 1,
-					   ilog2(chan_cnt));
-		entry.kw_mask[0] &= relaxed_mask;
-	}
-
-	if (allmulti) {
-		kwi = NPC_KEXOF_DMAC / sizeof(u64);
-		entry.kw[kwi] = BIT_ULL(40); /* LSB bit of 1st byte in DMAC */
-		entry.kw_mask[kwi] = BIT_ULL(40);
-	}
-
-	ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc,
-					     nixlf, NIXLF_UCAST_ENTRY);
-
 	/* If the corresponding PF's ucast action is RSS,
 	 * use the same action for promisc also
 	 */
+	ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc,
+					     nixlf, NIXLF_UCAST_ENTRY);
 	if (is_mcam_entry_enabled(rvu, mcam, blkaddr, ucast_idx))
 		*(u64 *)&action = npc_get_mcam_action(rvu, mcam,
 							blkaddr, ucast_idx);
@@ -703,9 +686,36 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
 		action.pf_func = pcifunc;
 	}
 
-	entry.action = *(u64 *)&action;
-	npc_config_mcam_entry(rvu, mcam, blkaddr, index,
-			      pfvf->nix_rx_intf, &entry, true);
+	if (allmulti) {
+		mac_addr[0] = 0x01;	/* LSB bit of 1st byte in DMAC */
+		ether_addr_copy(req.packet.dmac, mac_addr);
+		ether_addr_copy(req.mask.dmac, mac_addr);
+		req.features = BIT_ULL(NPC_DMAC);
+	}
+
+	req.chan_mask = 0xFFFU;
+	if (chan_cnt > 1) {
+		if (!is_power_of_2(chan_cnt)) {
+			dev_err(rvu->dev,
+				"%s: channel count more than 1, must be power of 2\n", __func__);
+			return;
+		}
+		relaxed_mask = GENMASK_ULL(BITS_PER_LONG_LONG - 1,
+					   ilog2(chan_cnt));
+		req.chan_mask &= relaxed_mask;
+	}
+
+	req.channel = chan;
+	req.intf = pfvf->nix_rx_intf;
+	req.entry = index;
+	req.op = action.op;
+	req.hdr.pcifunc = 0; /* AF is requester */
+	req.vf = pcifunc;
+	req.index = action.index;
+	req.match_id = action.match_id;
+	req.flow_key_alg = action.flow_key_alg;
+
+	rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp);
 }
 
 static void npc_enadis_promisc_entry(struct rvu *rvu, u16 pcifunc,
@@ -740,12 +750,14 @@ void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf)
 void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc,
 				       int nixlf, u64 chan)
 {
+	struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
+	struct npc_install_flow_req req = { 0 };
+	struct npc_install_flow_rsp rsp = { 0 };
 	struct npc_mcam *mcam = &rvu->hw->mcam;
-	struct mcam_entry entry = { {0} };
 	struct rvu_hwinfo *hw = rvu->hw;
-	struct nix_rx_action action;
-	struct rvu_pfvf *pfvf;
 	int blkaddr, index;
+	u32 req_index = 0;
+	u8 op;
 
 	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
 	if (blkaddr < 0)
@@ -767,32 +779,29 @@ void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc,
 	index = npc_get_nixlf_mcam_index(mcam, pcifunc,
 					 nixlf, NIXLF_BCAST_ENTRY);
 
-	/* Match ingress channel */
-	entry.kw[0] = chan;
-	entry.kw_mask[0] = 0xfffull;
-
-	/* Match broadcast MAC address.
-	 * DMAC is extracted at 0th bit of PARSE_KEX::KW1
-	 */
-	entry.kw[1] = 0xffffffffffffull;
-	entry.kw_mask[1] = 0xffffffffffffull;
-
-	*(u64 *)&action = 0x00;
 	if (!hw->cap.nix_rx_multicast) {
 		/* Early silicon doesn't support pkt replication,
 		 * so install entry with UCAST action, so that PF
 		 * receives all broadcast packets.
 		 */
-		action.op = NIX_RX_ACTIONOP_UCAST;
-		action.pf_func = pcifunc;
+		op = NIX_RX_ACTIONOP_UCAST;
 	} else {
-		action.index = pfvf->bcast_mce_idx;
-		action.op = NIX_RX_ACTIONOP_MCAST;
+		op = NIX_RX_ACTIONOP_MCAST;
+		req_index = pfvf->bcast_mce_idx;
 	}
 
-	entry.action = *(u64 *)&action;
-	npc_config_mcam_entry(rvu, mcam, blkaddr, index,
-			      pfvf->nix_rx_intf, &entry, true);
+	eth_broadcast_addr((u8 *)&req.packet.dmac);
+	eth_broadcast_addr((u8 *)&req.mask.dmac);
+	req.features = BIT_ULL(NPC_DMAC);
+	req.channel = chan;
+	req.intf = pfvf->nix_rx_intf;
+	req.entry = index;
+	req.op = op;
+	req.hdr.pcifunc = 0; /* AF is requester */
+	req.vf = pcifunc;
+	req.index = req_index;
+
+	rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp);
 }
 
 void rvu_npc_enable_bcast_entry(struct rvu *rvu, u16 pcifunc, bool enable)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
index 9e710a534796..a31b46d65cc5 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
@@ -903,9 +903,11 @@ static void npc_update_rx_entry(struct rvu *rvu, struct rvu_pfvf *pfvf,
 				struct npc_install_flow_req *req, u16 target)
 {
 	struct nix_rx_action action;
+	u64 chan_mask;
 
-	npc_update_entry(rvu, NPC_CHAN, entry, req->channel, 0,
-			 ~0ULL, 0, NIX_INTF_RX);
+	chan_mask = req->chan_mask ? req->chan_mask : ~0ULL;
+	npc_update_entry(rvu, NPC_CHAN, entry, req->channel, 0, chan_mask, 0,
+			 NIX_INTF_RX);
 
 	*(u64 *)&action = 0x00;
 	action.pf_func = target;
@@ -1137,6 +1139,10 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu,
 	else
 		target = req->hdr.pcifunc;
 
+	/* ignore chan_mask in case pf func is not AF, revisit later */
+	if (!is_pffunc_af(req->hdr.pcifunc))
+		req->chan_mask = 0xFFF;
+
 	if (npc_check_unsupported_flows(rvu, req->features, req->intf))
 		return -EOPNOTSUPP;
 
-- 
cgit v1.2.3


From b6b0e3667e1b2c796cd282d5ec385d5864ccb1ce Mon Sep 17 00:00:00 2001
From: Subbaraya Sundeep <sbhatta@marvell.com>
Date: Wed, 17 Mar 2021 19:05:37 +0530
Subject: octeontx2-af: Avoid duplicate unicast rule in mcam_rules list

A mcam rule described by mcam_rule struct has all the info
such as the hardware MCAM entry number, match criteria and
corresponding action etc. All mcam rules are stored in a
linked list mcam->rules. When adding/updating a rule to the
mcam->rules it is checked if a rule already exists for the
mcam entry. If the rule already exists, the same rule is
updated instead of creating new rule. This way only one
mcam_rule exists for the only one default unicast entry
installed by AF. But a PF/VF can get different NIXLF
(or default unicast entry number) after a attach-detach-attach
sequence. When that happens mcam_rules list end up with two
default unicast rules. Fix the problem by deleting the default
unicast rule list node always when disabling mcam rules.

Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
Signed-off-by: Naveen Mamindlapalli <naveenm@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 4 +---
 drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c | 9 ++++++---
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index f2a1c4235f74..a87104121344 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -3637,9 +3637,7 @@ int rvu_mbox_handler_nix_lf_stop_rx(struct rvu *rvu, struct msg_req *req,
 	if (err)
 		return err;
 
-	rvu_npc_disable_default_entries(rvu, pcifunc, nixlf);
-
-	npc_mcam_disable_flows(rvu, pcifunc);
+	rvu_npc_disable_mcam_entries(rvu, pcifunc, nixlf);
 
 	return rvu_cgx_start_stop_io(rvu, pcifunc, false);
 }
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 4c3f6432b671..6cce7ecad007 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -988,7 +988,7 @@ void rvu_npc_disable_mcam_entries(struct rvu *rvu, u16 pcifunc, int nixlf)
 {
 	struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
 	struct npc_mcam *mcam = &rvu->hw->mcam;
-	struct rvu_npc_mcam_rule *rule;
+	struct rvu_npc_mcam_rule *rule, *tmp;
 	int blkaddr;
 
 	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
@@ -998,15 +998,18 @@ void rvu_npc_disable_mcam_entries(struct rvu *rvu, u16 pcifunc, int nixlf)
 	mutex_lock(&mcam->lock);
 
 	/* Disable MCAM entries directing traffic to this 'pcifunc' */
-	list_for_each_entry(rule, &mcam->mcam_rules, list) {
+	list_for_each_entry_safe(rule, tmp, &mcam->mcam_rules, list) {
 		if (is_npc_intf_rx(rule->intf) &&
 		    rule->rx_action.pf_func == pcifunc) {
 			npc_enable_mcam_entry(rvu, mcam, blkaddr,
 					      rule->entry, false);
 			rule->enable = false;
 			/* Indicate that default rule is disabled */
-			if (rule->default_rule)
+			if (rule->default_rule) {
 				pfvf->def_ucast_rule = NULL;
+				list_del(&rule->list);
+				kfree(rule);
+			}
 		}
 	}
 
-- 
cgit v1.2.3


From 058fa3d915eac6e0babbe3c4776e8ca382415c2c Mon Sep 17 00:00:00 2001
From: Naveen Mamindlapalli <naveenm@marvell.com>
Date: Wed, 17 Mar 2021 19:05:38 +0530
Subject: octeontx2-af: Modify the return code for unsupported flow keys

The mbox handler npc_install_flow returns ENOTSUPP for unsupported
flow keys. This patch modifies the return value to AF driver defined
error code for debugging purpose.

Signed-off-by: Naveen Mamindlapalli <naveenm@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/mbox.h       | 1 +
 drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c | 7 ++++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index 1dca2ca115c7..46f919625464 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -605,6 +605,7 @@ enum nix_af_status {
 	NIX_AF_INVAL_SSO_PF_FUNC    = -420,
 	NIX_AF_ERR_TX_VTAG_NOSPC    = -421,
 	NIX_AF_ERR_RX_VTAG_INUSE    = -422,
+	NIX_AF_ERR_NPC_KEY_NOT_SUPP = -423,
 };
 
 /* For NIX RX vtag action  */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
index a31b46d65cc5..d805a189a268 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
@@ -597,7 +597,7 @@ static int npc_check_unsupported_flows(struct rvu *rvu, u64 features, u8 intf)
 		dev_info(rvu->dev, "Unsupported flow(s):\n");
 		for_each_set_bit(bit, (unsigned long *)&unsupported, 64)
 			dev_info(rvu->dev, "%s ", npc_get_field_name(bit));
-		return -EOPNOTSUPP;
+		return NIX_AF_ERR_NPC_KEY_NOT_SUPP;
 	}
 
 	return 0;
@@ -1143,8 +1143,9 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu,
 	if (!is_pffunc_af(req->hdr.pcifunc))
 		req->chan_mask = 0xFFF;
 
-	if (npc_check_unsupported_flows(rvu, req->features, req->intf))
-		return -EOPNOTSUPP;
+	err = npc_check_unsupported_flows(rvu, req->features, req->intf);
+	if (err)
+		return err;
 
 	if (npc_mcam_verify_channel(rvu, target, req->intf, req->channel))
 		return -EINVAL;
-- 
cgit v1.2.3


From cfeb961a2b5f1a7c6e0dc43a1673b28fab05baf8 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 17 Mar 2021 19:44:54 +0200
Subject: Documentation: networking: switchdev: separate bulleted items with
 new line

It looks like "make htmldocs" produces this warning:
Documentation/networking/switchdev.rst:482: WARNING: Unexpected indentation.

Fixes: 0f22ad45f47c ("Documentation: networking: switchdev: clarify device driver behavior")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/switchdev.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/networking/switchdev.rst b/Documentation/networking/switchdev.rst
index 1b56367d85ad..aa78102c9fab 100644
--- a/Documentation/networking/switchdev.rst
+++ b/Documentation/networking/switchdev.rst
@@ -479,6 +479,7 @@ is not possible.
   be added to a second bridge, which includes other switch ports or software
   interfaces. Some approaches to ensure that the forwarding domain for traffic
   belonging to the VLAN upper interfaces are managed properly:
+
     * If forwarding destinations can be managed per VLAN, the hardware could be
       configured to map all traffic, except the packets tagged with a VID
       belonging to a VLAN upper interface, to an internal VID corresponding to
-- 
cgit v1.2.3


From 6b38c57198366a86dd73fb250db5dfcbdc45763f Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 17 Mar 2021 19:44:55 +0200
Subject: Documentation: networking: switchdev: add missing "and" word

Even though this is clear from the context, it is nice to actually be
grammatically correct.

Fixes: 0f22ad45f47c ("Documentation: networking: switchdev: clarify device driver behavior")
Reported-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/switchdev.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/networking/switchdev.rst b/Documentation/networking/switchdev.rst
index aa78102c9fab..f1f4e6a85a29 100644
--- a/Documentation/networking/switchdev.rst
+++ b/Documentation/networking/switchdev.rst
@@ -523,7 +523,7 @@ as untagged packets, since the bridge device does not allow the manipulation of
 VID 0 in its database.
 
 When the bridge has VLAN filtering enabled and a PVID is not configured on the
-ingress port, untagged 802.1p tagged packets must be dropped. When the bridge
+ingress port, untagged and 802.1p tagged packets must be dropped. When the bridge
 has VLAN filtering enabled and a PVID exists on the ingress port, untagged and
 priority-tagged packets must be accepted and forwarded according to the
 bridge's port membership of the PVID VLAN. When the bridge has VLAN filtering
-- 
cgit v1.2.3


From 8794be45cd456089b0582b7e72c686baa9d6f9b7 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 17 Mar 2021 19:44:56 +0200
Subject: Documentation: networking: dsa: add missing new line in devlink
 section

"make htmldocs" produces these warnings:
Documentation/networking/dsa/dsa.rst:468: WARNING: Unexpected indentation.
Documentation/networking/dsa/dsa.rst:477: WARNING: Block quote ends without a blank line; unexpected unindent.

Fixes: 8411abbcad8e ("Documentation: networking: dsa: mention integration with devlink")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dsa/dsa.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst
index 69040e11ee5e..8688009514cc 100644
--- a/Documentation/networking/dsa/dsa.rst
+++ b/Documentation/networking/dsa/dsa.rst
@@ -464,6 +464,7 @@ For each devlink device, every physical port (i.e. user ports, CPU ports, DSA
 links or unused ports) is exposed as a devlink port.
 
 DSA drivers can make use of the following devlink features:
+
 - Regions: debugging feature which allows user space to dump driver-defined
   areas of hardware information in a low-level, binary format. Both global
   regions as well as per-port regions are supported. It is possible to export
-- 
cgit v1.2.3


From e322bacb914d5cb69783045f3c639521827b30e8 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 17 Mar 2021 19:44:57 +0200
Subject: Documentation: networking: dsa: demote subsections to simple
 emphasized words

"make htmldocs" complains:
configuration.rst:165: WARNING: duplicate label networking/dsa/configuration:single port, other instance in (...)
configuration.rst:212: WARNING: duplicate label networking/dsa/configuration:bridge, other instance in (...)
configuration.rst:252: WARNING: duplicate label networking/dsa/configuration:gateway, other instance in (...)

And for good reason, because the "single port", "bridge" and "gateway"
use cases are replicated twice, once for normal taggers and twice for
DSA_TAG_PROTO_NONE. So when trying to reference these sections via a
hyperlink such as:

https://www.kernel.org/doc/html/latest/networking/dsa/configuration.html#single-port

it will always reference the first occurrence, and never the second one.

This change makes the "single port", "bridge" and "gateway"
configuration examples consistent with the formatting used in the
"Configuration showcases" subsection.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dsa/configuration.rst | 295 ++++++++++++-------------
 1 file changed, 142 insertions(+), 153 deletions(-)

diff --git a/Documentation/networking/dsa/configuration.rst b/Documentation/networking/dsa/configuration.rst
index 11bd5e6108c0..d20b908bd861 100644
--- a/Documentation/networking/dsa/configuration.rst
+++ b/Documentation/networking/dsa/configuration.rst
@@ -78,79 +78,73 @@ The tagging based configuration is desired and supported by the majority of
 DSA switches. These switches are capable to tag incoming and outgoing traffic
 without using a VLAN based configuration.
 
-single port
-~~~~~~~~~~~
-
-.. code-block:: sh
-
-  # configure each interface
-  ip addr add 192.0.2.1/30 dev lan1
-  ip addr add 192.0.2.5/30 dev lan2
-  ip addr add 192.0.2.9/30 dev lan3
-
-  # The master interface needs to be brought up before the slave ports.
-  ip link set eth0 up
+*single port*
+  .. code-block:: sh
 
-  # bring up the slave interfaces
-  ip link set lan1 up
-  ip link set lan2 up
-  ip link set lan3 up
+    # configure each interface
+    ip addr add 192.0.2.1/30 dev lan1
+    ip addr add 192.0.2.5/30 dev lan2
+    ip addr add 192.0.2.9/30 dev lan3
 
-bridge
-~~~~~~
+    # The master interface needs to be brought up before the slave ports.
+    ip link set eth0 up
 
-.. code-block:: sh
+    # bring up the slave interfaces
+    ip link set lan1 up
+    ip link set lan2 up
+    ip link set lan3 up
 
-  # The master interface needs to be brought up before the slave ports.
-  ip link set eth0 up
+*bridge*
+  .. code-block:: sh
 
-  # bring up the slave interfaces
-  ip link set lan1 up
-  ip link set lan2 up
-  ip link set lan3 up
+    # The master interface needs to be brought up before the slave ports.
+    ip link set eth0 up
 
-  # create bridge
-  ip link add name br0 type bridge
+    # bring up the slave interfaces
+    ip link set lan1 up
+    ip link set lan2 up
+    ip link set lan3 up
 
-  # add ports to bridge
-  ip link set dev lan1 master br0
-  ip link set dev lan2 master br0
-  ip link set dev lan3 master br0
+    # create bridge
+    ip link add name br0 type bridge
 
-  # configure the bridge
-  ip addr add 192.0.2.129/25 dev br0
+    # add ports to bridge
+    ip link set dev lan1 master br0
+    ip link set dev lan2 master br0
+    ip link set dev lan3 master br0
 
-  # bring up the bridge
-  ip link set dev br0 up
+    # configure the bridge
+    ip addr add 192.0.2.129/25 dev br0
 
-gateway
-~~~~~~~
+    # bring up the bridge
+    ip link set dev br0 up
 
-.. code-block:: sh
+*gateway*
+  .. code-block:: sh
 
-  # The master interface needs to be brought up before the slave ports.
-  ip link set eth0 up
+    # The master interface needs to be brought up before the slave ports.
+    ip link set eth0 up
 
-  # bring up the slave interfaces
-  ip link set wan up
-  ip link set lan1 up
-  ip link set lan2 up
+    # bring up the slave interfaces
+    ip link set wan up
+    ip link set lan1 up
+    ip link set lan2 up
 
-  # configure the upstream port
-  ip addr add 192.0.2.1/30 dev wan
+    # configure the upstream port
+    ip addr add 192.0.2.1/30 dev wan
 
-  # create bridge
-  ip link add name br0 type bridge
+    # create bridge
+    ip link add name br0 type bridge
 
-  # add ports to bridge
-  ip link set dev lan1 master br0
-  ip link set dev lan2 master br0
+    # add ports to bridge
+    ip link set dev lan1 master br0
+    ip link set dev lan2 master br0
 
-  # configure the bridge
-  ip addr add 192.0.2.129/25 dev br0
+    # configure the bridge
+    ip addr add 192.0.2.129/25 dev br0
 
-  # bring up the bridge
-  ip link set dev br0 up
+    # bring up the bridge
+    ip link set dev br0 up
 
 .. _dsa-vlan-configuration:
 
@@ -161,132 +155,127 @@ A minority of switches are not capable to use a taging protocol
 (DSA_TAG_PROTO_NONE). These switches can be configured by a VLAN based
 configuration.
 
-single port
-~~~~~~~~~~~
-The configuration can only be set up via VLAN tagging and bridge setup.
-
-.. code-block:: sh
-
-  # tag traffic on CPU port
-  ip link add link eth0 name eth0.1 type vlan id 1
-  ip link add link eth0 name eth0.2 type vlan id 2
-  ip link add link eth0 name eth0.3 type vlan id 3
+*single port*
+  The configuration can only be set up via VLAN tagging and bridge setup.
 
-  # The master interface needs to be brought up before the slave ports.
-  ip link set eth0 up
-  ip link set eth0.1 up
-  ip link set eth0.2 up
-  ip link set eth0.3 up
+  .. code-block:: sh
 
-  # bring up the slave interfaces
-  ip link set lan1 up
-  ip link set lan2 up
-  ip link set lan3 up
+    # tag traffic on CPU port
+    ip link add link eth0 name eth0.1 type vlan id 1
+    ip link add link eth0 name eth0.2 type vlan id 2
+    ip link add link eth0 name eth0.3 type vlan id 3
 
-  # create bridge
-  ip link add name br0 type bridge
+    # The master interface needs to be brought up before the slave ports.
+    ip link set eth0 up
+    ip link set eth0.1 up
+    ip link set eth0.2 up
+    ip link set eth0.3 up
 
-  # activate VLAN filtering
-  ip link set dev br0 type bridge vlan_filtering 1
+    # bring up the slave interfaces
+    ip link set lan1 up
+    ip link set lan2 up
+    ip link set lan3 up
 
-  # add ports to bridges
-  ip link set dev lan1 master br0
-  ip link set dev lan2 master br0
-  ip link set dev lan3 master br0
+    # create bridge
+    ip link add name br0 type bridge
 
-  # tag traffic on ports
-  bridge vlan add dev lan1 vid 1 pvid untagged
-  bridge vlan add dev lan2 vid 2 pvid untagged
-  bridge vlan add dev lan3 vid 3 pvid untagged
+    # activate VLAN filtering
+    ip link set dev br0 type bridge vlan_filtering 1
 
-  # configure the VLANs
-  ip addr add 192.0.2.1/30 dev eth0.1
-  ip addr add 192.0.2.5/30 dev eth0.2
-  ip addr add 192.0.2.9/30 dev eth0.3
+    # add ports to bridges
+    ip link set dev lan1 master br0
+    ip link set dev lan2 master br0
+    ip link set dev lan3 master br0
 
-  # bring up the bridge devices
-  ip link set br0 up
+    # tag traffic on ports
+    bridge vlan add dev lan1 vid 1 pvid untagged
+    bridge vlan add dev lan2 vid 2 pvid untagged
+    bridge vlan add dev lan3 vid 3 pvid untagged
 
+    # configure the VLANs
+    ip addr add 192.0.2.1/30 dev eth0.1
+    ip addr add 192.0.2.5/30 dev eth0.2
+    ip addr add 192.0.2.9/30 dev eth0.3
 
-bridge
-~~~~~~
+    # bring up the bridge devices
+    ip link set br0 up
 
-.. code-block:: sh
 
-  # tag traffic on CPU port
-  ip link add link eth0 name eth0.1 type vlan id 1
+*bridge*
+  .. code-block:: sh
 
-  # The master interface needs to be brought up before the slave ports.
-  ip link set eth0 up
-  ip link set eth0.1 up
+    # tag traffic on CPU port
+    ip link add link eth0 name eth0.1 type vlan id 1
 
-  # bring up the slave interfaces
-  ip link set lan1 up
-  ip link set lan2 up
-  ip link set lan3 up
+    # The master interface needs to be brought up before the slave ports.
+    ip link set eth0 up
+    ip link set eth0.1 up
 
-  # create bridge
-  ip link add name br0 type bridge
+    # bring up the slave interfaces
+    ip link set lan1 up
+    ip link set lan2 up
+    ip link set lan3 up
 
-  # activate VLAN filtering
-  ip link set dev br0 type bridge vlan_filtering 1
+    # create bridge
+    ip link add name br0 type bridge
 
-  # add ports to bridge
-  ip link set dev lan1 master br0
-  ip link set dev lan2 master br0
-  ip link set dev lan3 master br0
-  ip link set eth0.1 master br0
+    # activate VLAN filtering
+    ip link set dev br0 type bridge vlan_filtering 1
 
-  # tag traffic on ports
-  bridge vlan add dev lan1 vid 1 pvid untagged
-  bridge vlan add dev lan2 vid 1 pvid untagged
-  bridge vlan add dev lan3 vid 1 pvid untagged
+    # add ports to bridge
+    ip link set dev lan1 master br0
+    ip link set dev lan2 master br0
+    ip link set dev lan3 master br0
+    ip link set eth0.1 master br0
 
-  # configure the bridge
-  ip addr add 192.0.2.129/25 dev br0
+    # tag traffic on ports
+    bridge vlan add dev lan1 vid 1 pvid untagged
+    bridge vlan add dev lan2 vid 1 pvid untagged
+    bridge vlan add dev lan3 vid 1 pvid untagged
 
-  # bring up the bridge
-  ip link set dev br0 up
+    # configure the bridge
+    ip addr add 192.0.2.129/25 dev br0
 
-gateway
-~~~~~~~
+    # bring up the bridge
+    ip link set dev br0 up
 
-.. code-block:: sh
+*gateway*
+  .. code-block:: sh
 
-  # tag traffic on CPU port
-  ip link add link eth0 name eth0.1 type vlan id 1
-  ip link add link eth0 name eth0.2 type vlan id 2
+    # tag traffic on CPU port
+    ip link add link eth0 name eth0.1 type vlan id 1
+    ip link add link eth0 name eth0.2 type vlan id 2
 
-  # The master interface needs to be brought up before the slave ports.
-  ip link set eth0 up
-  ip link set eth0.1 up
-  ip link set eth0.2 up
+    # The master interface needs to be brought up before the slave ports.
+    ip link set eth0 up
+    ip link set eth0.1 up
+    ip link set eth0.2 up
 
-  # bring up the slave interfaces
-  ip link set wan up
-  ip link set lan1 up
-  ip link set lan2 up
+    # bring up the slave interfaces
+    ip link set wan up
+    ip link set lan1 up
+    ip link set lan2 up
 
-  # create bridge
-  ip link add name br0 type bridge
+    # create bridge
+    ip link add name br0 type bridge
 
-  # activate VLAN filtering
-  ip link set dev br0 type bridge vlan_filtering 1
+    # activate VLAN filtering
+    ip link set dev br0 type bridge vlan_filtering 1
 
-  # add ports to bridges
-  ip link set dev wan master br0
-  ip link set eth0.1 master br0
-  ip link set dev lan1 master br0
-  ip link set dev lan2 master br0
+    # add ports to bridges
+    ip link set dev wan master br0
+    ip link set eth0.1 master br0
+    ip link set dev lan1 master br0
+    ip link set dev lan2 master br0
 
-  # tag traffic on ports
-  bridge vlan add dev lan1 vid 1 pvid untagged
-  bridge vlan add dev lan2 vid 1 pvid untagged
-  bridge vlan add dev wan vid 2 pvid untagged
+    # tag traffic on ports
+    bridge vlan add dev lan1 vid 1 pvid untagged
+    bridge vlan add dev lan2 vid 1 pvid untagged
+    bridge vlan add dev wan vid 2 pvid untagged
 
-  # configure the VLANs
-  ip addr add 192.0.2.1/30 dev eth0.2
-  ip addr add 192.0.2.129/25 dev br0
+    # configure the VLANs
+    ip addr add 192.0.2.1/30 dev eth0.2
+    ip addr add 192.0.2.129/25 dev br0
 
-  # bring up the bridge devices
-  ip link set br0 up
+    # bring up the bridge devices
+    ip link set br0 up
-- 
cgit v1.2.3


From 0929ff71cf9272a2b47d6708fc7eff9c381db7b7 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 17 Mar 2021 19:44:58 +0200
Subject: Documentation: networking: dsa: mention that the master is brought up
 automatically

Since commit 9d5ef190e561 ("net: dsa: automatically bring up DSA master
when opening user port"), DSA manages the administrative status of the
host port automatically. Update the configuration steps to reflect this.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dsa/configuration.rst | 29 +++++++++++++++++++-------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/Documentation/networking/dsa/configuration.rst b/Documentation/networking/dsa/configuration.rst
index d20b908bd861..774f0e76c746 100644
--- a/Documentation/networking/dsa/configuration.rst
+++ b/Documentation/networking/dsa/configuration.rst
@@ -34,8 +34,15 @@ interface. The CPU port is the switch port connected to an Ethernet MAC chip.
 The corresponding linux Ethernet interface is called the master interface.
 All other corresponding linux interfaces are called slave interfaces.
 
-The slave interfaces depend on the master interface. They can only brought up,
-when the master interface is up.
+The slave interfaces depend on the master interface being up in order for them
+to send or receive traffic. Prior to kernel v5.12, the state of the master
+interface had to be managed explicitly by the user. Starting with kernel v5.12,
+the behavior is as follows:
+
+- when a DSA slave interface is brought up, the master interface is
+  automatically brought up.
+- when the master interface is brought down, all DSA slave interfaces are
+  automatically brought down.
 
 In this documentation the following Ethernet interfaces are used:
 
@@ -86,7 +93,8 @@ without using a VLAN based configuration.
     ip addr add 192.0.2.5/30 dev lan2
     ip addr add 192.0.2.9/30 dev lan3
 
-    # The master interface needs to be brought up before the slave ports.
+    # For kernels earlier than v5.12, the master interface needs to be
+    # brought up manually before the slave ports.
     ip link set eth0 up
 
     # bring up the slave interfaces
@@ -97,7 +105,8 @@ without using a VLAN based configuration.
 *bridge*
   .. code-block:: sh
 
-    # The master interface needs to be brought up before the slave ports.
+    # For kernels earlier than v5.12, the master interface needs to be
+    # brought up manually before the slave ports.
     ip link set eth0 up
 
     # bring up the slave interfaces
@@ -122,7 +131,8 @@ without using a VLAN based configuration.
 *gateway*
   .. code-block:: sh
 
-    # The master interface needs to be brought up before the slave ports.
+    # For kernels earlier than v5.12, the master interface needs to be
+    # brought up manually before the slave ports.
     ip link set eth0 up
 
     # bring up the slave interfaces
@@ -165,7 +175,8 @@ configuration.
     ip link add link eth0 name eth0.2 type vlan id 2
     ip link add link eth0 name eth0.3 type vlan id 3
 
-    # The master interface needs to be brought up before the slave ports.
+    # For kernels earlier than v5.12, the master interface needs to be
+    # brought up manually before the slave ports.
     ip link set eth0 up
     ip link set eth0.1 up
     ip link set eth0.2 up
@@ -207,7 +218,8 @@ configuration.
     # tag traffic on CPU port
     ip link add link eth0 name eth0.1 type vlan id 1
 
-    # The master interface needs to be brought up before the slave ports.
+    # For kernels earlier than v5.12, the master interface needs to be
+    # brought up manually before the slave ports.
     ip link set eth0 up
     ip link set eth0.1 up
 
@@ -246,7 +258,8 @@ configuration.
     ip link add link eth0 name eth0.1 type vlan id 1
     ip link add link eth0 name eth0.2 type vlan id 2
 
-    # The master interface needs to be brought up before the slave ports.
+    # For kernels earlier than v5.12, the master interface needs to be
+    # brought up manually before the slave ports.
     ip link set eth0 up
     ip link set eth0.1 up
     ip link set eth0.2 up
-- 
cgit v1.2.3


From db2f2842e6f56027b1a29c7b16dc40482f41563b Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Wed, 17 Mar 2021 09:01:23 +0800
Subject: net: stmmac: add per-queue TX & RX coalesce ethtool support

Extending the driver to support per-queue RX and TX coalesce settings in
order to support below commands:

To show per-queue coalesce setting:-
 $ ethtool --per-queue <DEVNAME> queue_mask <MASK> --show-coalesce

To set per-queue coalesce setting:-
 $ ethtool --per-queue <DEVNAME> queue_mask <MASK> --coalesce \
     [rx-usecs N] [rx-frames M] [tx-usecs P] [tx-frames Q]

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/stmicro/stmmac/dwmac1000_dma.c    |   2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c   |   7 +-
 drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c |   7 +-
 drivers/net/ethernet/stmicro/stmmac/hwif.h         |   2 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac.h       |   8 +-
 .../net/ethernet/stmicro/stmmac/stmmac_ethtool.c   | 118 ++++++++++++++++++---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |  48 +++++----
 7 files changed, 143 insertions(+), 49 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
index 2bac49b49f73..90383abafa66 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
@@ -255,7 +255,7 @@ static void dwmac1000_get_hw_feature(void __iomem *ioaddr,
 }
 
 static void dwmac1000_rx_watchdog(void __iomem *ioaddr, u32 riwt,
-				  u32 number_chan)
+				  u32 queue)
 {
 	writel(riwt, ioaddr + DMA_RX_WATCHDOG);
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
index 62aa0e95beb7..8958778d16b7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
@@ -210,12 +210,9 @@ static void dwmac4_dump_dma_regs(void __iomem *ioaddr, u32 *reg_space)
 		_dwmac4_dump_dma_regs(ioaddr, i, reg_space);
 }
 
-static void dwmac4_rx_watchdog(void __iomem *ioaddr, u32 riwt, u32 number_chan)
+static void dwmac4_rx_watchdog(void __iomem *ioaddr, u32 riwt, u32 queue)
 {
-	u32 chan;
-
-	for (chan = 0; chan < number_chan; chan++)
-		writel(riwt, ioaddr + DMA_CHAN_RX_WATCHDOG(chan));
+	writel(riwt, ioaddr + DMA_CHAN_RX_WATCHDOG(queue));
 }
 
 static void dwmac4_dma_rx_chan_op_mode(void __iomem *ioaddr, int mode,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index 77308c5c5d29..f2cab5b76732 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -441,12 +441,9 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr,
 	dma_cap->frpsel = (hw_cap & XGMAC_HWFEAT_FRPSEL) >> 3;
 }
 
-static void dwxgmac2_rx_watchdog(void __iomem *ioaddr, u32 riwt, u32 nchan)
+static void dwxgmac2_rx_watchdog(void __iomem *ioaddr, u32 riwt, u32 queue)
 {
-	u32 i;
-
-	for (i = 0; i < nchan; i++)
-		writel(riwt & XGMAC_RWT, ioaddr + XGMAC_DMA_CH_Rx_WATCHDOG(i));
+	writel(riwt & XGMAC_RWT, ioaddr + XGMAC_DMA_CH_Rx_WATCHDOG(queue));
 }
 
 static void dwxgmac2_set_rx_ring_len(void __iomem *ioaddr, u32 len, u32 chan)
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 979ac9fca23c..da9996a985f6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -206,7 +206,7 @@ struct stmmac_dma_ops {
 	void (*get_hw_feature)(void __iomem *ioaddr,
 			       struct dma_features *dma_cap);
 	/* Program the HW RX Watchdog */
-	void (*rx_watchdog)(void __iomem *ioaddr, u32 riwt, u32 number_chan);
+	void (*rx_watchdog)(void __iomem *ioaddr, u32 riwt, u32 queue);
 	void (*set_tx_ring_len)(void __iomem *ioaddr, u32 len, u32 chan);
 	void (*set_rx_ring_len)(void __iomem *ioaddr, u32 len, u32 chan);
 	void (*set_rx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 10e8ae8e2d58..375c503d2df8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -147,9 +147,9 @@ struct stmmac_flow_entry {
 
 struct stmmac_priv {
 	/* Frequently used values are kept adjacent for cache effect */
-	u32 tx_coal_frames;
-	u32 tx_coal_timer;
-	u32 rx_coal_frames;
+	u32 tx_coal_frames[MTL_MAX_TX_QUEUES];
+	u32 tx_coal_timer[MTL_MAX_TX_QUEUES];
+	u32 rx_coal_frames[MTL_MAX_TX_QUEUES];
 
 	int tx_coalesce;
 	int hwts_tx_en;
@@ -160,7 +160,7 @@ struct stmmac_priv {
 
 	unsigned int dma_buf_sz;
 	unsigned int rx_copybreak;
-	u32 rx_riwt;
+	u32 rx_riwt[MTL_MAX_TX_QUEUES];
 	int hwts_rx_en;
 
 	void __iomem *ioaddr;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index c5642985ef95..a78d5b0686bf 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -756,28 +756,75 @@ static u32 stmmac_riwt2usec(u32 riwt, struct stmmac_priv *priv)
 	return (riwt * 256) / (clk / 1000000);
 }
 
-static int stmmac_get_coalesce(struct net_device *dev,
-			       struct ethtool_coalesce *ec)
+static int __stmmac_get_coalesce(struct net_device *dev,
+				 struct ethtool_coalesce *ec,
+				 int queue)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
+	u32 max_cnt;
+	u32 rx_cnt;
+	u32 tx_cnt;
 
-	ec->tx_coalesce_usecs = priv->tx_coal_timer;
-	ec->tx_max_coalesced_frames = priv->tx_coal_frames;
+	rx_cnt = priv->plat->rx_queues_to_use;
+	tx_cnt = priv->plat->tx_queues_to_use;
+	max_cnt = max(rx_cnt, tx_cnt);
 
-	if (priv->use_riwt) {
-		ec->rx_max_coalesced_frames = priv->rx_coal_frames;
-		ec->rx_coalesce_usecs = stmmac_riwt2usec(priv->rx_riwt, priv);
+	if (queue < 0)
+		queue = 0;
+	else if (queue >= max_cnt)
+		return -EINVAL;
+
+	if (queue < tx_cnt) {
+		ec->tx_coalesce_usecs = priv->tx_coal_timer[queue];
+		ec->tx_max_coalesced_frames = priv->tx_coal_frames[queue];
+	} else {
+		ec->tx_coalesce_usecs = 0;
+		ec->tx_max_coalesced_frames = 0;
+	}
+
+	if (priv->use_riwt && queue < rx_cnt) {
+		ec->rx_max_coalesced_frames = priv->rx_coal_frames[queue];
+		ec->rx_coalesce_usecs = stmmac_riwt2usec(priv->rx_riwt[queue],
+							 priv);
+	} else {
+		ec->rx_max_coalesced_frames = 0;
+		ec->rx_coalesce_usecs = 0;
 	}
 
 	return 0;
 }
 
-static int stmmac_set_coalesce(struct net_device *dev,
+static int stmmac_get_coalesce(struct net_device *dev,
 			       struct ethtool_coalesce *ec)
+{
+	return __stmmac_get_coalesce(dev, ec, -1);
+}
+
+static int stmmac_get_per_queue_coalesce(struct net_device *dev, u32 queue,
+					 struct ethtool_coalesce *ec)
+{
+	return __stmmac_get_coalesce(dev, ec, queue);
+}
+
+static int __stmmac_set_coalesce(struct net_device *dev,
+				 struct ethtool_coalesce *ec,
+				 int queue)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	u32 rx_cnt = priv->plat->rx_queues_to_use;
+	bool all_queues = false;
 	unsigned int rx_riwt;
+	u32 max_cnt;
+	u32 rx_cnt;
+	u32 tx_cnt;
+
+	rx_cnt = priv->plat->rx_queues_to_use;
+	tx_cnt = priv->plat->tx_queues_to_use;
+	max_cnt = max(rx_cnt, tx_cnt);
+
+	if (queue < 0)
+		all_queues = true;
+	else if (queue >= max_cnt)
+		return -EINVAL;
 
 	if (priv->use_riwt && (ec->rx_coalesce_usecs > 0)) {
 		rx_riwt = stmmac_usec2riwt(ec->rx_coalesce_usecs, priv);
@@ -785,8 +832,23 @@ static int stmmac_set_coalesce(struct net_device *dev,
 		if ((rx_riwt > MAX_DMA_RIWT) || (rx_riwt < MIN_DMA_RIWT))
 			return -EINVAL;
 
-		priv->rx_riwt = rx_riwt;
-		stmmac_rx_watchdog(priv, priv->ioaddr, priv->rx_riwt, rx_cnt);
+		if (all_queues) {
+			int i;
+
+			for (i = 0; i < rx_cnt; i++) {
+				priv->rx_riwt[i] = rx_riwt;
+				stmmac_rx_watchdog(priv, priv->ioaddr,
+						   rx_riwt, i);
+				priv->rx_coal_frames[i] =
+					ec->rx_max_coalesced_frames;
+			}
+		} else if (queue < rx_cnt) {
+			priv->rx_riwt[queue] = rx_riwt;
+			stmmac_rx_watchdog(priv, priv->ioaddr,
+					   rx_riwt, queue);
+			priv->rx_coal_frames[queue] =
+				ec->rx_max_coalesced_frames;
+		}
 	}
 
 	if ((ec->tx_coalesce_usecs == 0) &&
@@ -797,13 +859,37 @@ static int stmmac_set_coalesce(struct net_device *dev,
 	    (ec->tx_max_coalesced_frames > STMMAC_TX_MAX_FRAMES))
 		return -EINVAL;
 
-	/* Only copy relevant parameters, ignore all others. */
-	priv->tx_coal_frames = ec->tx_max_coalesced_frames;
-	priv->tx_coal_timer = ec->tx_coalesce_usecs;
-	priv->rx_coal_frames = ec->rx_max_coalesced_frames;
+	if (all_queues) {
+		int i;
+
+		for (i = 0; i < tx_cnt; i++) {
+			priv->tx_coal_frames[i] =
+				ec->tx_max_coalesced_frames;
+			priv->tx_coal_timer[i] =
+				ec->tx_coalesce_usecs;
+		}
+	} else if (queue < tx_cnt) {
+		priv->tx_coal_frames[queue] =
+			ec->tx_max_coalesced_frames;
+		priv->tx_coal_timer[queue] =
+			ec->tx_coalesce_usecs;
+	}
+
 	return 0;
 }
 
+static int stmmac_set_coalesce(struct net_device *dev,
+			       struct ethtool_coalesce *ec)
+{
+	return __stmmac_set_coalesce(dev, ec, -1);
+}
+
+static int stmmac_set_per_queue_coalesce(struct net_device *dev, u32 queue,
+					 struct ethtool_coalesce *ec)
+{
+	return __stmmac_set_coalesce(dev, ec, queue);
+}
+
 static int stmmac_get_rxnfc(struct net_device *dev,
 			    struct ethtool_rxnfc *rxnfc, u32 *rule_locs)
 {
@@ -1001,6 +1087,8 @@ static const struct ethtool_ops stmmac_ethtool_ops = {
 	.get_ts_info = stmmac_get_ts_info,
 	.get_coalesce = stmmac_get_coalesce,
 	.set_coalesce = stmmac_set_coalesce,
+	.get_per_queue_coalesce = stmmac_get_per_queue_coalesce,
+	.set_per_queue_coalesce = stmmac_set_per_queue_coalesce,
 	.get_channels = stmmac_get_channels,
 	.set_channels = stmmac_set_channels,
 	.get_tunable = stmmac_get_tunable,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index ddf54b8ad75d..1b34373a1918 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2234,7 +2234,8 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 
 	/* We still have pending packets, let's call for a new scheduling */
 	if (tx_q->dirty_tx != tx_q->cur_tx)
-		hrtimer_start(&tx_q->txtimer, STMMAC_COAL_TIMER(priv->tx_coal_timer),
+		hrtimer_start(&tx_q->txtimer,
+			      STMMAC_COAL_TIMER(priv->tx_coal_timer[queue]),
 			      HRTIMER_MODE_REL);
 
 	__netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue));
@@ -2519,7 +2520,8 @@ static void stmmac_tx_timer_arm(struct stmmac_priv *priv, u32 queue)
 {
 	struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
 
-	hrtimer_start(&tx_q->txtimer, STMMAC_COAL_TIMER(priv->tx_coal_timer),
+	hrtimer_start(&tx_q->txtimer,
+		      STMMAC_COAL_TIMER(priv->tx_coal_timer[queue]),
 		      HRTIMER_MODE_REL);
 }
 
@@ -2560,18 +2562,21 @@ static enum hrtimer_restart stmmac_tx_timer(struct hrtimer *t)
 static void stmmac_init_coalesce(struct stmmac_priv *priv)
 {
 	u32 tx_channel_count = priv->plat->tx_queues_to_use;
+	u32 rx_channel_count = priv->plat->rx_queues_to_use;
 	u32 chan;
 
-	priv->tx_coal_frames = STMMAC_TX_FRAMES;
-	priv->tx_coal_timer = STMMAC_COAL_TX_TIMER;
-	priv->rx_coal_frames = STMMAC_RX_FRAMES;
-
 	for (chan = 0; chan < tx_channel_count; chan++) {
 		struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
 
+		priv->tx_coal_frames[chan] = STMMAC_TX_FRAMES;
+		priv->tx_coal_timer[chan] = STMMAC_COAL_TX_TIMER;
+
 		hrtimer_init(&tx_q->txtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 		tx_q->txtimer.function = stmmac_tx_timer;
 	}
+
+	for (chan = 0; chan < rx_channel_count; chan++)
+		priv->rx_coal_frames[chan] = STMMAC_RX_FRAMES;
 }
 
 static void stmmac_set_rings_length(struct stmmac_priv *priv)
@@ -2876,10 +2881,15 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 		priv->tx_lpi_timer = eee_timer * 1000;
 
 	if (priv->use_riwt) {
-		if (!priv->rx_riwt)
-			priv->rx_riwt = DEF_DMA_RIWT;
+		u32 queue;
+
+		for (queue = 0; queue < rx_cnt; queue++) {
+			if (!priv->rx_riwt[queue])
+				priv->rx_riwt[queue] = DEF_DMA_RIWT;
 
-		ret = stmmac_rx_watchdog(priv, priv->ioaddr, priv->rx_riwt, rx_cnt);
+			stmmac_rx_watchdog(priv, priv->ioaddr,
+					   priv->rx_riwt[queue], queue);
+		}
 	}
 
 	if (priv->hw->pcs)
@@ -3378,11 +3388,12 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if ((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && priv->hwts_tx_en)
 		set_ic = true;
-	else if (!priv->tx_coal_frames)
+	else if (!priv->tx_coal_frames[queue])
 		set_ic = false;
-	else if (tx_packets > priv->tx_coal_frames)
+	else if (tx_packets > priv->tx_coal_frames[queue])
 		set_ic = true;
-	else if ((tx_q->tx_count_frames % priv->tx_coal_frames) < tx_packets)
+	else if ((tx_q->tx_count_frames %
+		  priv->tx_coal_frames[queue]) < tx_packets)
 		set_ic = true;
 	else
 		set_ic = false;
@@ -3607,11 +3618,12 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if ((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && priv->hwts_tx_en)
 		set_ic = true;
-	else if (!priv->tx_coal_frames)
+	else if (!priv->tx_coal_frames[queue])
 		set_ic = false;
-	else if (tx_packets > priv->tx_coal_frames)
+	else if (tx_packets > priv->tx_coal_frames[queue])
 		set_ic = true;
-	else if ((tx_q->tx_count_frames % priv->tx_coal_frames) < tx_packets)
+	else if ((tx_q->tx_count_frames %
+		  priv->tx_coal_frames[queue]) < tx_packets)
 		set_ic = true;
 	else
 		set_ic = false;
@@ -3810,11 +3822,11 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
 		stmmac_refill_desc3(priv, rx_q, p);
 
 		rx_q->rx_count_frames++;
-		rx_q->rx_count_frames += priv->rx_coal_frames;
-		if (rx_q->rx_count_frames > priv->rx_coal_frames)
+		rx_q->rx_count_frames += priv->rx_coal_frames[queue];
+		if (rx_q->rx_count_frames > priv->rx_coal_frames[queue])
 			rx_q->rx_count_frames = 0;
 
-		use_rx_wd = !priv->rx_coal_frames;
+		use_rx_wd = !priv->rx_coal_frames[queue];
 		use_rx_wd |= rx_q->rx_count_frames > 0;
 		if (!priv->use_riwt)
 			use_rx_wd = false;
-- 
cgit v1.2.3


From f0744a84f36140a18d8f4c63114ad90a133a946c Mon Sep 17 00:00:00 2001
From: dingsenjie <dingsenjie@yulong.com>
Date: Wed, 17 Mar 2021 20:29:33 +0800
Subject: ethernet/broadcom:remove unneeded variable: "ret"

remove unneeded variable: "ret".

Signed-off-by: dingsenjie <dingsenjie@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index b652ed72a621..56801387591d 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -1395,7 +1395,6 @@ int bnx2x_send_final_clnup(struct bnx2x *bp, u8 clnup_func, u32 poll_cnt)
 	u32 op_gen_command = 0;
 	u32 comp_addr = BAR_CSTRORM_INTMEM +
 			CSTORM_FINAL_CLEANUP_COMPLETE_OFFSET(clnup_func);
-	int ret = 0;
 
 	if (REG_RD(bp, comp_addr)) {
 		BNX2X_ERR("Cleanup complete was not 0 before sending\n");
@@ -1420,7 +1419,7 @@ int bnx2x_send_final_clnup(struct bnx2x *bp, u8 clnup_func, u32 poll_cnt)
 	/* Zero completion for next FLR */
 	REG_WR(bp, comp_addr, 0);
 
-	return ret;
+	return 0;
 }
 
 u8 bnx2x_is_pcie_pending(struct pci_dev *dev)
-- 
cgit v1.2.3


From ac1bbf8a81d3d3b0a318c82b88742f4282fb91d8 Mon Sep 17 00:00:00 2001
From: dingsenjie <dingsenjie@yulong.com>
Date: Wed, 17 Mar 2021 20:30:30 +0800
Subject: ethernet/microchip:remove unneeded variable: "ret"

remove unneeded variable: "ret".

Signed-off-by: dingsenjie <dingsenjie@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/encx24j600.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c
index 2c0dcd7acf3f..a66a179236fd 100644
--- a/drivers/net/ethernet/microchip/encx24j600.c
+++ b/drivers/net/ethernet/microchip/encx24j600.c
@@ -222,7 +222,6 @@ static int encx24j600_wait_for_autoneg(struct encx24j600_priv *priv)
 	unsigned long timeout = jiffies + msecs_to_jiffies(2000);
 	u16 phstat1;
 	u16 estat;
-	int ret = 0;
 
 	phstat1 = encx24j600_read_phy(priv, PHSTAT1);
 	while ((phstat1 & ANDONE) == 0) {
@@ -258,7 +257,7 @@ static int encx24j600_wait_for_autoneg(struct encx24j600_priv *priv)
 		encx24j600_write_reg(priv, MACLCON, 0x370f);
 	}
 
-	return ret;
+	return 0;
 }
 
 /* Access the PHY to determine link status */
-- 
cgit v1.2.3


From 193c5b2698e3915a66ae79702858396d6e6fc9f5 Mon Sep 17 00:00:00 2001
From: Pavana Sharma <pavana.sharma@digi.com>
Date: Wed, 17 Mar 2021 14:46:40 +0100
Subject: net: dsa: mv88e6xxx: change serdes lane parameter type from u8 type
 to int
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Returning 0 is no more an error case with MV88E6393 family
which has serdes lane numbers 0, 9 or 10.
So with this change .serdes_get_lane will return lane number
or -errno (-ENODEV or -EOPNOTSUPP).

Signed-off-by: Pavana Sharma <pavana.sharma@digi.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: Marek Behún <kabel@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c   | 28 ++++++-------
 drivers/net/dsa/mv88e6xxx/chip.h   | 16 ++++----
 drivers/net/dsa/mv88e6xxx/port.c   |  8 ++--
 drivers/net/dsa/mv88e6xxx/serdes.c | 82 +++++++++++++++++++-------------------
 drivers/net/dsa/mv88e6xxx/serdes.h | 64 ++++++++++++++---------------
 5 files changed, 99 insertions(+), 99 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 903d619e08ed..cd0c8d1ed905 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -485,12 +485,12 @@ static int mv88e6xxx_serdes_pcs_get_state(struct dsa_switch *ds, int port,
 					  struct phylink_link_state *state)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
-	u8 lane;
+	int lane;
 	int err;
 
 	mv88e6xxx_reg_lock(chip);
 	lane = mv88e6xxx_serdes_get_lane(chip, port);
-	if (lane && chip->info->ops->serdes_pcs_get_state)
+	if (lane >= 0 && chip->info->ops->serdes_pcs_get_state)
 		err = chip->info->ops->serdes_pcs_get_state(chip, port, lane,
 							    state);
 	else
@@ -506,11 +506,11 @@ static int mv88e6xxx_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port,
 				       const unsigned long *advertise)
 {
 	const struct mv88e6xxx_ops *ops = chip->info->ops;
-	u8 lane;
+	int lane;
 
 	if (ops->serdes_pcs_config) {
 		lane = mv88e6xxx_serdes_get_lane(chip, port);
-		if (lane)
+		if (lane >= 0)
 			return ops->serdes_pcs_config(chip, port, lane, mode,
 						      interface, advertise);
 	}
@@ -523,14 +523,14 @@ static void mv88e6xxx_serdes_pcs_an_restart(struct dsa_switch *ds, int port)
 	struct mv88e6xxx_chip *chip = ds->priv;
 	const struct mv88e6xxx_ops *ops;
 	int err = 0;
-	u8 lane;
+	int lane;
 
 	ops = chip->info->ops;
 
 	if (ops->serdes_pcs_an_restart) {
 		mv88e6xxx_reg_lock(chip);
 		lane = mv88e6xxx_serdes_get_lane(chip, port);
-		if (lane)
+		if (lane >= 0)
 			err = ops->serdes_pcs_an_restart(chip, port, lane);
 		mv88e6xxx_reg_unlock(chip);
 
@@ -544,11 +544,11 @@ static int mv88e6xxx_serdes_pcs_link_up(struct mv88e6xxx_chip *chip, int port,
 					int speed, int duplex)
 {
 	const struct mv88e6xxx_ops *ops = chip->info->ops;
-	u8 lane;
+	int lane;
 
 	if (!phylink_autoneg_inband(mode) && ops->serdes_pcs_link_up) {
 		lane = mv88e6xxx_serdes_get_lane(chip, port);
-		if (lane)
+		if (lane >= 0)
 			return ops->serdes_pcs_link_up(chip, port, lane,
 						       speed, duplex);
 	}
@@ -2460,11 +2460,11 @@ static irqreturn_t mv88e6xxx_serdes_irq_thread_fn(int irq, void *dev_id)
 	struct mv88e6xxx_chip *chip = mvp->chip;
 	irqreturn_t ret = IRQ_NONE;
 	int port = mvp->port;
-	u8 lane;
+	int lane;
 
 	mv88e6xxx_reg_lock(chip);
 	lane = mv88e6xxx_serdes_get_lane(chip, port);
-	if (lane)
+	if (lane >= 0)
 		ret = mv88e6xxx_serdes_irq_status(chip, port, lane);
 	mv88e6xxx_reg_unlock(chip);
 
@@ -2472,7 +2472,7 @@ static irqreturn_t mv88e6xxx_serdes_irq_thread_fn(int irq, void *dev_id)
 }
 
 static int mv88e6xxx_serdes_irq_request(struct mv88e6xxx_chip *chip, int port,
-					u8 lane)
+					int lane)
 {
 	struct mv88e6xxx_port *dev_id = &chip->ports[port];
 	unsigned int irq;
@@ -2501,7 +2501,7 @@ static int mv88e6xxx_serdes_irq_request(struct mv88e6xxx_chip *chip, int port,
 }
 
 static int mv88e6xxx_serdes_irq_free(struct mv88e6xxx_chip *chip, int port,
-				     u8 lane)
+				     int lane)
 {
 	struct mv88e6xxx_port *dev_id = &chip->ports[port];
 	unsigned int irq = dev_id->serdes_irq;
@@ -2526,11 +2526,11 @@ static int mv88e6xxx_serdes_irq_free(struct mv88e6xxx_chip *chip, int port,
 static int mv88e6xxx_serdes_power(struct mv88e6xxx_chip *chip, int port,
 				  bool on)
 {
-	u8 lane;
+	int lane;
 	int err;
 
 	lane = mv88e6xxx_serdes_get_lane(chip, port);
-	if (!lane)
+	if (lane < 0)
 		return 0;
 
 	if (on) {
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index a57c8886f3ac..022e382339e2 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -513,30 +513,30 @@ struct mv88e6xxx_ops {
 	int (*mgmt_rsvd2cpu)(struct mv88e6xxx_chip *chip);
 
 	/* Power on/off a SERDES interface */
-	int (*serdes_power)(struct mv88e6xxx_chip *chip, int port, u8 lane,
+	int (*serdes_power)(struct mv88e6xxx_chip *chip, int port, int lane,
 			    bool up);
 
 	/* SERDES lane mapping */
-	u8 (*serdes_get_lane)(struct mv88e6xxx_chip *chip, int port);
+	int (*serdes_get_lane)(struct mv88e6xxx_chip *chip, int port);
 
 	int (*serdes_pcs_get_state)(struct mv88e6xxx_chip *chip, int port,
-				    u8 lane, struct phylink_link_state *state);
+				    int lane, struct phylink_link_state *state);
 	int (*serdes_pcs_config)(struct mv88e6xxx_chip *chip, int port,
-				 u8 lane, unsigned int mode,
+				 int lane, unsigned int mode,
 				 phy_interface_t interface,
 				 const unsigned long *advertise);
 	int (*serdes_pcs_an_restart)(struct mv88e6xxx_chip *chip, int port,
-				     u8 lane);
+				     int lane);
 	int (*serdes_pcs_link_up)(struct mv88e6xxx_chip *chip, int port,
-				  u8 lane, int speed, int duplex);
+				  int lane, int speed, int duplex);
 
 	/* SERDES interrupt handling */
 	unsigned int (*serdes_irq_mapping)(struct mv88e6xxx_chip *chip,
 					   int port);
-	int (*serdes_irq_enable)(struct mv88e6xxx_chip *chip, int port, u8 lane,
+	int (*serdes_irq_enable)(struct mv88e6xxx_chip *chip, int port, int lane,
 				 bool enable);
 	irqreturn_t (*serdes_irq_status)(struct mv88e6xxx_chip *chip, int port,
-					 u8 lane);
+					 int lane);
 
 	/* Statistics from the SERDES interface */
 	int (*serdes_get_sset_count)(struct mv88e6xxx_chip *chip, int port);
diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c
index 4561f289ab76..d8590025e75f 100644
--- a/drivers/net/dsa/mv88e6xxx/port.c
+++ b/drivers/net/dsa/mv88e6xxx/port.c
@@ -429,8 +429,8 @@ phy_interface_t mv88e6390x_port_max_speed_mode(int port)
 static int mv88e6xxx_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 				    phy_interface_t mode, bool force)
 {
-	u8 lane;
 	u16 cmode;
+	int lane;
 	u16 reg;
 	int err;
 
@@ -466,7 +466,7 @@ static int mv88e6xxx_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 		return 0;
 
 	lane = mv88e6xxx_serdes_get_lane(chip, port);
-	if (lane) {
+	if (lane >= 0) {
 		if (chip->ports[port].serdes_irq) {
 			err = mv88e6xxx_serdes_irq_disable(chip, port, lane);
 			if (err)
@@ -495,8 +495,8 @@ static int mv88e6xxx_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 		chip->ports[port].cmode = cmode;
 
 		lane = mv88e6xxx_serdes_get_lane(chip, port);
-		if (!lane)
-			return -ENODEV;
+		if (lane < 0)
+			return lane;
 
 		err = mv88e6xxx_serdes_power_up(chip, port, lane);
 		if (err)
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c
index 3195936dc5be..e48260c5c6ba 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.c
+++ b/drivers/net/dsa/mv88e6xxx/serdes.c
@@ -95,7 +95,7 @@ static int mv88e6xxx_serdes_pcs_get_state(struct mv88e6xxx_chip *chip,
 	return 0;
 }
 
-int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, u8 lane,
+int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
 			   bool up)
 {
 	u16 val, new_val;
@@ -117,7 +117,7 @@ int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, u8 lane,
 }
 
 int mv88e6352_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port,
-				u8 lane, unsigned int mode,
+				int lane, unsigned int mode,
 				phy_interface_t interface,
 				const unsigned long *advertise)
 {
@@ -166,7 +166,7 @@ int mv88e6352_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port,
 }
 
 int mv88e6352_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
-				   u8 lane, struct phylink_link_state *state)
+				   int lane, struct phylink_link_state *state)
 {
 	u16 lpa, status;
 	int err;
@@ -187,7 +187,7 @@ int mv88e6352_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
 }
 
 int mv88e6352_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port,
-				    u8 lane)
+				    int lane)
 {
 	u16 bmcr;
 	int err;
@@ -200,7 +200,7 @@ int mv88e6352_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port,
 }
 
 int mv88e6352_serdes_pcs_link_up(struct mv88e6xxx_chip *chip, int port,
-				 u8 lane, int speed, int duplex)
+				 int lane, int speed, int duplex)
 {
 	u16 val, bmcr;
 	int err;
@@ -230,10 +230,10 @@ int mv88e6352_serdes_pcs_link_up(struct mv88e6xxx_chip *chip, int port,
 	return mv88e6352_serdes_write(chip, MII_BMCR, bmcr);
 }
 
-u8 mv88e6352_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
+int mv88e6352_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 {
 	u8 cmode = chip->ports[port].cmode;
-	u8 lane = 0;
+	int lane = -ENODEV;
 
 	if ((cmode == MV88E6XXX_PORT_STS_CMODE_100BASEX) ||
 	    (cmode == MV88E6XXX_PORT_STS_CMODE_1000BASEX) ||
@@ -245,7 +245,7 @@ u8 mv88e6352_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 
 static bool mv88e6352_port_has_serdes(struct mv88e6xxx_chip *chip, int port)
 {
-	if (mv88e6xxx_serdes_get_lane(chip, port))
+	if (mv88e6xxx_serdes_get_lane(chip, port) >= 0)
 		return true;
 
 	return false;
@@ -354,7 +354,7 @@ static void mv88e6352_serdes_irq_link(struct mv88e6xxx_chip *chip, int port)
 }
 
 irqreturn_t mv88e6352_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
-					u8 lane)
+					int lane)
 {
 	irqreturn_t ret = IRQ_NONE;
 	u16 status;
@@ -372,7 +372,7 @@ irqreturn_t mv88e6352_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
 	return ret;
 }
 
-int mv88e6352_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, u8 lane,
+int mv88e6352_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, int lane,
 				bool enable)
 {
 	u16 val = 0;
@@ -413,10 +413,10 @@ void mv88e6352_serdes_get_regs(struct mv88e6xxx_chip *chip, int port, void *_p)
 	}
 }
 
-u8 mv88e6341_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
+int mv88e6341_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 {
 	u8 cmode = chip->ports[port].cmode;
-	u8 lane = 0;
+	int lane = -ENODEV;
 
 	switch (port) {
 	case 5:
@@ -430,7 +430,7 @@ u8 mv88e6341_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 	return lane;
 }
 
-int mv88e6185_serdes_power(struct mv88e6xxx_chip *chip, int port, u8 lane,
+int mv88e6185_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
 			   bool up)
 {
 	/* The serdes power can't be controlled on this switch chip but we need
@@ -440,7 +440,7 @@ int mv88e6185_serdes_power(struct mv88e6xxx_chip *chip, int port, u8 lane,
 	return 0;
 }
 
-u8 mv88e6185_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
+int mv88e6185_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 {
 	/* There are no configurable serdes lanes on this switch chip but we
 	 * need to return non-zero so that callers of
@@ -456,7 +456,7 @@ u8 mv88e6185_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 }
 
 int mv88e6185_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
-				   u8 lane, struct phylink_link_state *state)
+				   int lane, struct phylink_link_state *state)
 {
 	int err;
 	u16 status;
@@ -492,7 +492,7 @@ int mv88e6185_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
 	return 0;
 }
 
-int mv88e6097_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, u8 lane,
+int mv88e6097_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, int lane,
 				bool enable)
 {
 	u8 cmode = chip->ports[port].cmode;
@@ -525,7 +525,7 @@ static void mv88e6097_serdes_irq_link(struct mv88e6xxx_chip *chip, int port)
 }
 
 irqreturn_t mv88e6097_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
-					u8 lane)
+					int lane)
 {
 	u8 cmode = chip->ports[port].cmode;
 
@@ -539,10 +539,10 @@ irqreturn_t mv88e6097_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
 	return IRQ_NONE;
 }
 
-u8 mv88e6390_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
+int mv88e6390_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 {
 	u8 cmode = chip->ports[port].cmode;
-	u8 lane = 0;
+	int lane = -ENODEV;
 
 	switch (port) {
 	case 9:
@@ -562,12 +562,12 @@ u8 mv88e6390_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 	return lane;
 }
 
-u8 mv88e6390x_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
+int mv88e6390x_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 {
 	u8 cmode_port = chip->ports[port].cmode;
 	u8 cmode_port10 = chip->ports[10].cmode;
 	u8 cmode_port9 = chip->ports[9].cmode;
-	u8 lane = 0;
+	int lane = -ENODEV;
 
 	switch (port) {
 	case 2:
@@ -638,7 +638,7 @@ u8 mv88e6390x_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 }
 
 /* Set power up/down for 10GBASE-R and 10GBASE-X4/X2 */
-static int mv88e6390_serdes_power_10g(struct mv88e6xxx_chip *chip, u8 lane,
+static int mv88e6390_serdes_power_10g(struct mv88e6xxx_chip *chip, int lane,
 				      bool up)
 {
 	u16 val, new_val;
@@ -665,7 +665,7 @@ static int mv88e6390_serdes_power_10g(struct mv88e6xxx_chip *chip, u8 lane,
 }
 
 /* Set power up/down for SGMII and 1000Base-X */
-static int mv88e6390_serdes_power_sgmii(struct mv88e6xxx_chip *chip, u8 lane,
+static int mv88e6390_serdes_power_sgmii(struct mv88e6xxx_chip *chip, int lane,
 					bool up)
 {
 	u16 val, new_val;
@@ -701,7 +701,7 @@ static struct mv88e6390_serdes_hw_stat mv88e6390_serdes_hw_stats[] = {
 
 int mv88e6390_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port)
 {
-	if (mv88e6390_serdes_get_lane(chip, port) == 0)
+	if (mv88e6390_serdes_get_lane(chip, port) < 0)
 		return 0;
 
 	return ARRAY_SIZE(mv88e6390_serdes_hw_stats);
@@ -713,7 +713,7 @@ int mv88e6390_serdes_get_strings(struct mv88e6xxx_chip *chip,
 	struct mv88e6390_serdes_hw_stat *stat;
 	int i;
 
-	if (mv88e6390_serdes_get_lane(chip, port) == 0)
+	if (mv88e6390_serdes_get_lane(chip, port) < 0)
 		return 0;
 
 	for (i = 0; i < ARRAY_SIZE(mv88e6390_serdes_hw_stats); i++) {
@@ -750,7 +750,7 @@ int mv88e6390_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
 	int i;
 
 	lane = mv88e6390_serdes_get_lane(chip, port);
-	if (lane == 0)
+	if (lane < 0)
 		return 0;
 
 	for (i = 0; i < ARRAY_SIZE(mv88e6390_serdes_hw_stats); i++) {
@@ -761,7 +761,7 @@ int mv88e6390_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
 	return ARRAY_SIZE(mv88e6390_serdes_hw_stats);
 }
 
-static int mv88e6390_serdes_enable_checker(struct mv88e6xxx_chip *chip, u8 lane)
+static int mv88e6390_serdes_enable_checker(struct mv88e6xxx_chip *chip, int lane)
 {
 	u16 reg;
 	int err;
@@ -776,7 +776,7 @@ static int mv88e6390_serdes_enable_checker(struct mv88e6xxx_chip *chip, u8 lane)
 				      MV88E6390_PG_CONTROL, reg);
 }
 
-int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, u8 lane,
+int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
 			   bool up)
 {
 	u8 cmode = chip->ports[port].cmode;
@@ -801,7 +801,7 @@ int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, u8 lane,
 }
 
 int mv88e6390_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port,
-				u8 lane, unsigned int mode,
+				int lane, unsigned int mode,
 				phy_interface_t interface,
 				const unsigned long *advertise)
 {
@@ -860,7 +860,7 @@ int mv88e6390_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port,
 }
 
 static int mv88e6390_serdes_pcs_get_state_sgmii(struct mv88e6xxx_chip *chip,
-	int port, u8 lane, struct phylink_link_state *state)
+	int port, int lane, struct phylink_link_state *state)
 {
 	u16 lpa, status;
 	int err;
@@ -883,7 +883,7 @@ static int mv88e6390_serdes_pcs_get_state_sgmii(struct mv88e6xxx_chip *chip,
 }
 
 static int mv88e6390_serdes_pcs_get_state_10g(struct mv88e6xxx_chip *chip,
-	int port, u8 lane, struct phylink_link_state *state)
+	int port, int lane, struct phylink_link_state *state)
 {
 	u16 status;
 	int err;
@@ -903,7 +903,7 @@ static int mv88e6390_serdes_pcs_get_state_10g(struct mv88e6xxx_chip *chip,
 }
 
 int mv88e6390_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
-				   u8 lane, struct phylink_link_state *state)
+				   int lane, struct phylink_link_state *state)
 {
 	switch (state->interface) {
 	case PHY_INTERFACE_MODE_SGMII:
@@ -922,7 +922,7 @@ int mv88e6390_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
 }
 
 int mv88e6390_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port,
-				    u8 lane)
+				    int lane)
 {
 	u16 bmcr;
 	int err;
@@ -938,7 +938,7 @@ int mv88e6390_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port,
 }
 
 int mv88e6390_serdes_pcs_link_up(struct mv88e6xxx_chip *chip, int port,
-				 u8 lane, int speed, int duplex)
+				 int lane, int speed, int duplex)
 {
 	u16 val, bmcr;
 	int err;
@@ -972,7 +972,7 @@ int mv88e6390_serdes_pcs_link_up(struct mv88e6xxx_chip *chip, int port,
 }
 
 static void mv88e6390_serdes_irq_link_sgmii(struct mv88e6xxx_chip *chip,
-					    int port, u8 lane)
+					    int port, int lane)
 {
 	u16 bmsr;
 	int err;
@@ -989,7 +989,7 @@ static void mv88e6390_serdes_irq_link_sgmii(struct mv88e6xxx_chip *chip,
 }
 
 static int mv88e6390_serdes_irq_enable_sgmii(struct mv88e6xxx_chip *chip,
-					     u8 lane, bool enable)
+					     int lane, bool enable)
 {
 	u16 val = 0;
 
@@ -1001,7 +1001,7 @@ static int mv88e6390_serdes_irq_enable_sgmii(struct mv88e6xxx_chip *chip,
 				      MV88E6390_SGMII_INT_ENABLE, val);
 }
 
-int mv88e6390_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, u8 lane,
+int mv88e6390_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, int lane,
 				bool enable)
 {
 	u8 cmode = chip->ports[port].cmode;
@@ -1017,7 +1017,7 @@ int mv88e6390_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, u8 lane,
 }
 
 static int mv88e6390_serdes_irq_status_sgmii(struct mv88e6xxx_chip *chip,
-					     u8 lane, u16 *status)
+					     int lane, u16 *status)
 {
 	int err;
 
@@ -1028,7 +1028,7 @@ static int mv88e6390_serdes_irq_status_sgmii(struct mv88e6xxx_chip *chip,
 }
 
 irqreturn_t mv88e6390_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
-					u8 lane)
+					int lane)
 {
 	u8 cmode = chip->ports[port].cmode;
 	irqreturn_t ret = IRQ_NONE;
@@ -1087,7 +1087,7 @@ static const u16 mv88e6390_serdes_regs[] = {
 
 int mv88e6390_serdes_get_regs_len(struct mv88e6xxx_chip *chip, int port)
 {
-	if (mv88e6xxx_serdes_get_lane(chip, port) == 0)
+	if (mv88e6xxx_serdes_get_lane(chip, port) < 0)
 		return 0;
 
 	return ARRAY_SIZE(mv88e6390_serdes_regs) * sizeof(u16);
@@ -1102,7 +1102,7 @@ void mv88e6390_serdes_get_regs(struct mv88e6xxx_chip *chip, int port, void *_p)
 	int i;
 
 	lane = mv88e6xxx_serdes_get_lane(chip, port);
-	if (lane == 0)
+	if (lane < 0)
 		return;
 
 	for (i = 0 ; i < ARRAY_SIZE(mv88e6390_serdes_regs); i++) {
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.h b/drivers/net/dsa/mv88e6xxx/serdes.h
index 93822ef9bab8..ce4d0fef124d 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.h
+++ b/drivers/net/dsa/mv88e6xxx/serdes.h
@@ -73,55 +73,55 @@
 #define MV88E6390_PG_CONTROL		0xf010
 #define MV88E6390_PG_CONTROL_ENABLE_PC		BIT(0)
 
-u8 mv88e6185_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
-u8 mv88e6341_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
-u8 mv88e6352_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
-u8 mv88e6390_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
-u8 mv88e6390x_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
+int mv88e6185_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
+int mv88e6341_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
+int mv88e6352_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
+int mv88e6390_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
+int mv88e6390x_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
 int mv88e6352_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port,
-				u8 lane, unsigned int mode,
+				int lane, unsigned int mode,
 				phy_interface_t interface,
 				const unsigned long *advertise);
 int mv88e6390_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port,
-				u8 lane, unsigned int mode,
+				int lane, unsigned int mode,
 				phy_interface_t interface,
 				const unsigned long *advertise);
 int mv88e6185_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
-				   u8 lane, struct phylink_link_state *state);
+				   int lane, struct phylink_link_state *state);
 int mv88e6352_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
-				   u8 lane, struct phylink_link_state *state);
+				   int lane, struct phylink_link_state *state);
 int mv88e6390_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
-				   u8 lane, struct phylink_link_state *state);
+				   int lane, struct phylink_link_state *state);
 int mv88e6352_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port,
-				    u8 lane);
+				    int lane);
 int mv88e6390_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port,
-				    u8 lane);
+				    int lane);
 int mv88e6352_serdes_pcs_link_up(struct mv88e6xxx_chip *chip, int port,
-				 u8 lane, int speed, int duplex);
+				 int lane, int speed, int duplex);
 int mv88e6390_serdes_pcs_link_up(struct mv88e6xxx_chip *chip, int port,
-				 u8 lane, int speed, int duplex);
+				 int lane, int speed, int duplex);
 unsigned int mv88e6352_serdes_irq_mapping(struct mv88e6xxx_chip *chip,
 					  int port);
 unsigned int mv88e6390_serdes_irq_mapping(struct mv88e6xxx_chip *chip,
 					  int port);
-int mv88e6185_serdes_power(struct mv88e6xxx_chip *chip, int port, u8 lane,
+int mv88e6185_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
 			   bool up);
-int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, u8 lane,
+int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
 			   bool on);
-int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, u8 lane,
+int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
 			   bool on);
-int mv88e6097_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, u8 lane,
+int mv88e6097_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, int lane,
 				bool enable);
-int mv88e6352_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, u8 lane,
+int mv88e6352_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, int lane,
 				bool enable);
-int mv88e6390_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, u8 lane,
+int mv88e6390_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, int lane,
 				bool enable);
 irqreturn_t mv88e6097_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
-					u8 lane);
+					int lane);
 irqreturn_t mv88e6352_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
-					u8 lane);
+					int lane);
 irqreturn_t mv88e6390_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
-					u8 lane);
+					int lane);
 int mv88e6352_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port);
 int mv88e6352_serdes_get_strings(struct mv88e6xxx_chip *chip,
 				 int port, uint8_t *data);
@@ -138,18 +138,18 @@ void mv88e6352_serdes_get_regs(struct mv88e6xxx_chip *chip, int port, void *_p);
 int mv88e6390_serdes_get_regs_len(struct mv88e6xxx_chip *chip, int port);
 void mv88e6390_serdes_get_regs(struct mv88e6xxx_chip *chip, int port, void *_p);
 
-/* Return the (first) SERDES lane address a port is using, 0 otherwise. */
-static inline u8 mv88e6xxx_serdes_get_lane(struct mv88e6xxx_chip *chip,
-					   int port)
+/* Return the (first) SERDES lane address a port is using, -errno otherwise. */
+static inline int mv88e6xxx_serdes_get_lane(struct mv88e6xxx_chip *chip,
+					    int port)
 {
 	if (!chip->info->ops->serdes_get_lane)
-		return 0;
+		return -EOPNOTSUPP;
 
 	return chip->info->ops->serdes_get_lane(chip, port);
 }
 
 static inline int mv88e6xxx_serdes_power_up(struct mv88e6xxx_chip *chip,
-					    int port, u8 lane)
+					    int port, int lane)
 {
 	if (!chip->info->ops->serdes_power)
 		return -EOPNOTSUPP;
@@ -158,7 +158,7 @@ static inline int mv88e6xxx_serdes_power_up(struct mv88e6xxx_chip *chip,
 }
 
 static inline int mv88e6xxx_serdes_power_down(struct mv88e6xxx_chip *chip,
-					      int port, u8 lane)
+					      int port, int lane)
 {
 	if (!chip->info->ops->serdes_power)
 		return -EOPNOTSUPP;
@@ -176,7 +176,7 @@ mv88e6xxx_serdes_irq_mapping(struct mv88e6xxx_chip *chip, int port)
 }
 
 static inline int mv88e6xxx_serdes_irq_enable(struct mv88e6xxx_chip *chip,
-					      int port, u8 lane)
+					      int port, int lane)
 {
 	if (!chip->info->ops->serdes_irq_enable)
 		return -EOPNOTSUPP;
@@ -185,7 +185,7 @@ static inline int mv88e6xxx_serdes_irq_enable(struct mv88e6xxx_chip *chip,
 }
 
 static inline int mv88e6xxx_serdes_irq_disable(struct mv88e6xxx_chip *chip,
-					       int port, u8 lane)
+					       int port, int lane)
 {
 	if (!chip->info->ops->serdes_irq_enable)
 		return -EOPNOTSUPP;
@@ -194,7 +194,7 @@ static inline int mv88e6xxx_serdes_irq_disable(struct mv88e6xxx_chip *chip,
 }
 
 static inline irqreturn_t
-mv88e6xxx_serdes_irq_status(struct mv88e6xxx_chip *chip, int port, u8 lane)
+mv88e6xxx_serdes_irq_status(struct mv88e6xxx_chip *chip, int port, int lane)
 {
 	if (!chip->info->ops->serdes_irq_status)
 		return IRQ_NONE;
-- 
cgit v1.2.3


From 2fda45f019fd4feca5ed672dbb8323375adbdcfb Mon Sep 17 00:00:00 2001
From: Marek Behún <kabel@kernel.org>
Date: Wed, 17 Mar 2021 14:46:41 +0100
Subject: net: dsa: mv88e6xxx: wrap .set_egress_port method
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are two implementations of the .set_egress_port method, and both
of them, if successful, set chip->*gress_dest_port variable.

To avoid code repetition, wrap this method into
mv88e6xxx_set_egress_port.

Signed-off-by: Marek Behún <kabel@kernel.org>
Reviewed-by: Pavana Sharma <pavana.sharma@digi.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c    | 49 +++++++++++++++++++++++--------------
 drivers/net/dsa/mv88e6xxx/global1.c | 19 ++------------
 2 files changed, 33 insertions(+), 35 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index cd0c8d1ed905..fc9d3875f099 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2550,6 +2550,27 @@ static int mv88e6xxx_serdes_power(struct mv88e6xxx_chip *chip, int port,
 	return err;
 }
 
+static int mv88e6xxx_set_egress_port(struct mv88e6xxx_chip *chip,
+				     enum mv88e6xxx_egress_direction direction,
+				     int port)
+{
+	int err;
+
+	if (!chip->info->ops->set_egress_port)
+		return -EOPNOTSUPP;
+
+	err = chip->info->ops->set_egress_port(chip, direction, port);
+	if (err)
+		return err;
+
+	if (direction == MV88E6XXX_EGRESS_DIR_INGRESS)
+		chip->ingress_dest_port = port;
+	else
+		chip->egress_dest_port = port;
+
+	return 0;
+}
+
 static int mv88e6xxx_setup_upstream_port(struct mv88e6xxx_chip *chip, int port)
 {
 	struct dsa_switch *ds = chip->ds;
@@ -2572,19 +2593,17 @@ static int mv88e6xxx_setup_upstream_port(struct mv88e6xxx_chip *chip, int port)
 				return err;
 		}
 
-		if (chip->info->ops->set_egress_port) {
-			err = chip->info->ops->set_egress_port(chip,
+		err = mv88e6xxx_set_egress_port(chip,
 						MV88E6XXX_EGRESS_DIR_INGRESS,
 						upstream_port);
-			if (err)
-				return err;
+		if (err && err != -EOPNOTSUPP)
+			return err;
 
-			err = chip->info->ops->set_egress_port(chip,
+		err = mv88e6xxx_set_egress_port(chip,
 						MV88E6XXX_EGRESS_DIR_EGRESS,
 						upstream_port);
-			if (err)
-				return err;
-		}
+		if (err && err != -EOPNOTSUPP)
+			return err;
 	}
 
 	return 0;
@@ -5338,9 +5357,6 @@ static int mv88e6xxx_port_mirror_add(struct dsa_switch *ds, int port,
 	int i;
 	int err;
 
-	if (!chip->info->ops->set_egress_port)
-		return -EOPNOTSUPP;
-
 	mutex_lock(&chip->reg_lock);
 	if ((ingress ? chip->ingress_dest_port : chip->egress_dest_port) !=
 	    mirror->to_local_port) {
@@ -5355,9 +5371,8 @@ static int mv88e6xxx_port_mirror_add(struct dsa_switch *ds, int port,
 			goto out;
 		}
 
-		err = chip->info->ops->set_egress_port(chip,
-						       direction,
-						       mirror->to_local_port);
+		err = mv88e6xxx_set_egress_port(chip, direction,
+						mirror->to_local_port);
 		if (err)
 			goto out;
 	}
@@ -5390,10 +5405,8 @@ static void mv88e6xxx_port_mirror_del(struct dsa_switch *ds, int port,
 
 	/* Reset egress port when no other mirror is active */
 	if (!other_mirrors) {
-		if (chip->info->ops->set_egress_port(chip,
-						     direction,
-						     dsa_upstream_port(ds,
-								       port)))
+		if (mv88e6xxx_set_egress_port(chip, direction,
+					      dsa_upstream_port(ds, port)))
 			dev_err(ds->dev, "failed to set egress port\n");
 	}
 
diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c
index 33d443a37efc..815b0f681d69 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.c
+++ b/drivers/net/dsa/mv88e6xxx/global1.c
@@ -315,7 +315,6 @@ int mv88e6095_g1_set_egress_port(struct mv88e6xxx_chip *chip,
 				 enum mv88e6xxx_egress_direction direction,
 				 int port)
 {
-	int *dest_port_chip;
 	u16 reg;
 	int err;
 
@@ -325,13 +324,11 @@ int mv88e6095_g1_set_egress_port(struct mv88e6xxx_chip *chip,
 
 	switch (direction) {
 	case MV88E6XXX_EGRESS_DIR_INGRESS:
-		dest_port_chip = &chip->ingress_dest_port;
 		reg &= ~MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK;
 		reg |= port <<
 		       __bf_shf(MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK);
 		break;
 	case MV88E6XXX_EGRESS_DIR_EGRESS:
-		dest_port_chip = &chip->egress_dest_port;
 		reg &= ~MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK;
 		reg |= port <<
 		       __bf_shf(MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK);
@@ -340,11 +337,7 @@ int mv88e6095_g1_set_egress_port(struct mv88e6xxx_chip *chip,
 		return -EINVAL;
 	}
 
-	err = mv88e6xxx_g1_write(chip, MV88E6185_G1_MONITOR_CTL, reg);
-	if (!err)
-		*dest_port_chip = port;
-
-	return err;
+	return mv88e6xxx_g1_write(chip, MV88E6185_G1_MONITOR_CTL, reg);
 }
 
 /* Older generations also call this the ARP destination. It has been
@@ -380,28 +373,20 @@ int mv88e6390_g1_set_egress_port(struct mv88e6xxx_chip *chip,
 				 enum mv88e6xxx_egress_direction direction,
 				 int port)
 {
-	int *dest_port_chip;
 	u16 ptr;
-	int err;
 
 	switch (direction) {
 	case MV88E6XXX_EGRESS_DIR_INGRESS:
-		dest_port_chip = &chip->ingress_dest_port;
 		ptr = MV88E6390_G1_MONITOR_MGMT_CTL_PTR_INGRESS_DEST;
 		break;
 	case MV88E6XXX_EGRESS_DIR_EGRESS:
-		dest_port_chip = &chip->egress_dest_port;
 		ptr = MV88E6390_G1_MONITOR_MGMT_CTL_PTR_EGRESS_DEST;
 		break;
 	default:
 		return -EINVAL;
 	}
 
-	err = mv88e6390_g1_monitor_write(chip, ptr, port);
-	if (!err)
-		*dest_port_chip = port;
-
-	return err;
+	return mv88e6390_g1_monitor_write(chip, ptr, port);
 }
 
 int mv88e6390_g1_set_cpu_port(struct mv88e6xxx_chip *chip, int port)
-- 
cgit v1.2.3


From de776d0d316f7230d96ac1aa1df354d880476c1f Mon Sep 17 00:00:00 2001
From: Pavana Sharma <pavana.sharma@digi.com>
Date: Wed, 17 Mar 2021 14:46:42 +0100
Subject: net: dsa: mv88e6xxx: add support for mv88e6393x family
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Marvell 88E6393X device is a single-chip integration of a 11-port
Ethernet switch with eight integrated Gigabit Ethernet (GbE)
transceivers and three 10-Gigabit interfaces.

This patch adds functionalities specific to mv88e6393x family (88E6393X,
88E6193X and 88E6191X).

The main differences between previous devices and this one are:
- port 0 can be a SERDES port
- all SERDESes are one-lane, eg. no XAUI nor RXAUI
- on the other hand the SERDESes can do USXGMII, 10GBASER and 5GBASER
  (on 6191X only one SERDES is capable of more than 1g; USXGMII is not
  yet supported with this change)
- Port Policy CTL register is changed to Port Policy MGMT CTL register,
  via which several more registers can be accessed indirectly
- egress monitor port is configured differently
- ingress monitor/CPU/mirror ports are configured differently and can be
  configured per port (ie. each port can have different ingress monitor
  port, for example)
- port speed AltBit works differently than previously
- PHY registers can be also accessed via MDIO address 0x18 and 0x19
  (on previous devices they could be accessed only via Global 2 offsets
   0x18 and 0x19, which means two indirections; this feature is not yet
   leveraged with thiis commit)

Co-developed-by: Ashkan Boldaji <ashkan.boldaji@digi.com>
Signed-off-by: Ashkan Boldaji <ashkan.boldaji@digi.com>
Signed-off-by: Pavana Sharma <pavana.sharma@digi.com>
Co-developed-by: Marek Behún <kabel@kernel.org>
Signed-off-by: Marek Behún <kabel@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c    | 155 +++++++++++++++++++++
 drivers/net/dsa/mv88e6xxx/chip.h    |   4 +
 drivers/net/dsa/mv88e6xxx/global1.h |   2 +
 drivers/net/dsa/mv88e6xxx/global2.h |   8 ++
 drivers/net/dsa/mv88e6xxx/port.c    | 267 ++++++++++++++++++++++++++++++++++++
 drivers/net/dsa/mv88e6xxx/port.h    |  45 ++++++
 drivers/net/dsa/mv88e6xxx/serdes.c  | 256 ++++++++++++++++++++++++++++++++++
 drivers/net/dsa/mv88e6xxx/serdes.h  |  34 +++++
 8 files changed, 771 insertions(+)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index fc9d3875f099..0ffeb73a4058 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -635,6 +635,29 @@ static void mv88e6390x_phylink_validate(struct mv88e6xxx_chip *chip, int port,
 	mv88e6390_phylink_validate(chip, port, mask, state);
 }
 
+static void mv88e6393x_phylink_validate(struct mv88e6xxx_chip *chip, int port,
+					unsigned long *mask,
+					struct phylink_link_state *state)
+{
+	if (port == 0 || port == 9 || port == 10) {
+		phylink_set(mask, 10000baseT_Full);
+		phylink_set(mask, 10000baseKR_Full);
+		phylink_set(mask, 10000baseCR_Full);
+		phylink_set(mask, 10000baseSR_Full);
+		phylink_set(mask, 10000baseLR_Full);
+		phylink_set(mask, 10000baseLRM_Full);
+		phylink_set(mask, 10000baseER_Full);
+		phylink_set(mask, 5000baseT_Full);
+		phylink_set(mask, 2500baseX_Full);
+		phylink_set(mask, 2500baseT_Full);
+	}
+
+	phylink_set(mask, 1000baseT_Full);
+	phylink_set(mask, 1000baseX_Full);
+
+	mv88e6065_phylink_validate(chip, port, mask, state);
+}
+
 static void mv88e6xxx_validate(struct dsa_switch *ds, int port,
 			       unsigned long *supported,
 			       struct phylink_link_state *state)
@@ -4589,6 +4612,69 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = {
 	.phylink_validate = mv88e6390x_phylink_validate,
 };
 
+static const struct mv88e6xxx_ops mv88e6393x_ops = {
+	/* MV88E6XXX_FAMILY_6393 */
+	.setup_errata = mv88e6393x_serdes_setup_errata,
+	.irl_init_all = mv88e6390_g2_irl_init_all,
+	.get_eeprom = mv88e6xxx_g2_get_eeprom8,
+	.set_eeprom = mv88e6xxx_g2_set_eeprom8,
+	.set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+	.phy_read = mv88e6xxx_g2_smi_phy_read,
+	.phy_write = mv88e6xxx_g2_smi_phy_write,
+	.port_set_link = mv88e6xxx_port_set_link,
+	.port_sync_link = mv88e6xxx_port_sync_link,
+	.port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
+	.port_set_speed_duplex = mv88e6393x_port_set_speed_duplex,
+	.port_max_speed_mode = mv88e6393x_port_max_speed_mode,
+	.port_tag_remap = mv88e6390_port_tag_remap,
+	.port_set_frame_mode = mv88e6351_port_set_frame_mode,
+	.port_set_ucast_flood = mv88e6352_port_set_ucast_flood,
+	.port_set_mcast_flood = mv88e6352_port_set_mcast_flood,
+	.port_set_ether_type = mv88e6393x_port_set_ether_type,
+	.port_set_jumbo_size = mv88e6165_port_set_jumbo_size,
+	.port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting,
+	.port_pause_limit = mv88e6390_port_pause_limit,
+	.port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit,
+	.port_disable_pri_override = mv88e6xxx_port_disable_pri_override,
+	.port_get_cmode = mv88e6352_port_get_cmode,
+	.port_set_cmode = mv88e6393x_port_set_cmode,
+	.port_setup_message_port = mv88e6xxx_setup_message_port,
+	.port_set_upstream_port = mv88e6393x_port_set_upstream_port,
+	.stats_snapshot = mv88e6390_g1_stats_snapshot,
+	.stats_set_histogram = mv88e6390_g1_stats_set_histogram,
+	.stats_get_sset_count = mv88e6320_stats_get_sset_count,
+	.stats_get_strings = mv88e6320_stats_get_strings,
+	.stats_get_stats = mv88e6390_stats_get_stats,
+	/* .set_cpu_port is missing because this family does not support a global
+	 * CPU port, only per port CPU port which is set via
+	 * .port_set_upstream_port method.
+	 */
+	.set_egress_port = mv88e6393x_set_egress_port,
+	.watchdog_ops = &mv88e6390_watchdog_ops,
+	.mgmt_rsvd2cpu = mv88e6393x_port_mgmt_rsvd2cpu,
+	.pot_clear = mv88e6xxx_g2_pot_clear,
+	.reset = mv88e6352_g1_reset,
+	.rmu_disable = mv88e6390_g1_rmu_disable,
+	.atu_get_hash = mv88e6165_g1_atu_get_hash,
+	.atu_set_hash = mv88e6165_g1_atu_set_hash,
+	.vtu_getnext = mv88e6390_g1_vtu_getnext,
+	.vtu_loadpurge = mv88e6390_g1_vtu_loadpurge,
+	.serdes_power = mv88e6393x_serdes_power,
+	.serdes_get_lane = mv88e6393x_serdes_get_lane,
+	.serdes_pcs_get_state = mv88e6393x_serdes_pcs_get_state,
+	.serdes_pcs_config = mv88e6390_serdes_pcs_config,
+	.serdes_pcs_an_restart = mv88e6390_serdes_pcs_an_restart,
+	.serdes_pcs_link_up = mv88e6390_serdes_pcs_link_up,
+	.serdes_irq_mapping = mv88e6390_serdes_irq_mapping,
+	.serdes_irq_enable = mv88e6393x_serdes_irq_enable,
+	.serdes_irq_status = mv88e6393x_serdes_irq_status,
+	/* TODO: serdes stats */
+	.gpio_ops = &mv88e6352_gpio_ops,
+	.avb_ops = &mv88e6390_avb_ops,
+	.ptp_ops = &mv88e6352_ptp_ops,
+	.phylink_validate = mv88e6393x_phylink_validate,
+};
+
 static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 	[MV88E6085] = {
 		.prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6085,
@@ -4960,6 +5046,52 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.ops = &mv88e6191_ops,
 	},
 
+	[MV88E6191X] = {
+		.prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6191X,
+		.family = MV88E6XXX_FAMILY_6393,
+		.name = "Marvell 88E6191X",
+		.num_databases = 4096,
+		.num_ports = 11,	/* 10 + Z80 */
+		.num_internal_phys = 9,
+		.max_vid = 8191,
+		.port_base_addr = 0x0,
+		.phy_base_addr = 0x0,
+		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
+		.age_time_coeff = 3750,
+		.g1_irqs = 10,
+		.g2_irqs = 14,
+		.atu_move_port_mask = 0x1f,
+		.pvt = true,
+		.multi_chip = true,
+		.tag_protocol = DSA_TAG_PROTO_DSA,
+		.ptp_support = true,
+		.ops = &mv88e6393x_ops,
+	},
+
+	[MV88E6193X] = {
+		.prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6193X,
+		.family = MV88E6XXX_FAMILY_6393,
+		.name = "Marvell 88E6193X",
+		.num_databases = 4096,
+		.num_ports = 11,	/* 10 + Z80 */
+		.num_internal_phys = 9,
+		.max_vid = 8191,
+		.port_base_addr = 0x0,
+		.phy_base_addr = 0x0,
+		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
+		.age_time_coeff = 3750,
+		.g1_irqs = 10,
+		.g2_irqs = 14,
+		.atu_move_port_mask = 0x1f,
+		.pvt = true,
+		.multi_chip = true,
+		.tag_protocol = DSA_TAG_PROTO_DSA,
+		.ptp_support = true,
+		.ops = &mv88e6393x_ops,
+	},
+
 	[MV88E6220] = {
 		.prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6220,
 		.family = MV88E6XXX_FAMILY_6250,
@@ -5250,6 +5382,29 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.ptp_support = true,
 		.ops = &mv88e6390x_ops,
 	},
+
+	[MV88E6393X] = {
+		.prod_num = MV88E6XXX_PORT_SWITCH_ID_PROD_6393X,
+		.family = MV88E6XXX_FAMILY_6393,
+		.name = "Marvell 88E6393X",
+		.num_databases = 4096,
+		.num_ports = 11,	/* 10 + Z80 */
+		.num_internal_phys = 9,
+		.max_vid = 8191,
+		.port_base_addr = 0x0,
+		.phy_base_addr = 0x0,
+		.global1_addr = 0x1b,
+		.global2_addr = 0x1c,
+		.age_time_coeff = 3750,
+		.g1_irqs = 10,
+		.g2_irqs = 14,
+		.atu_move_port_mask = 0x1f,
+		.pvt = true,
+		.multi_chip = true,
+		.tag_protocol = DSA_TAG_PROTO_DSA,
+		.ptp_support = true,
+		.ops = &mv88e6393x_ops,
+	},
 };
 
 static const struct mv88e6xxx_info *mv88e6xxx_lookup_info(unsigned int prod_num)
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 022e382339e2..bce6e0dc8535 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -63,6 +63,8 @@ enum mv88e6xxx_model {
 	MV88E6190,
 	MV88E6190X,
 	MV88E6191,
+	MV88E6191X,
+	MV88E6193X,
 	MV88E6220,
 	MV88E6240,
 	MV88E6250,
@@ -75,6 +77,7 @@ enum mv88e6xxx_model {
 	MV88E6352,
 	MV88E6390,
 	MV88E6390X,
+	MV88E6393X,
 };
 
 enum mv88e6xxx_family {
@@ -90,6 +93,7 @@ enum mv88e6xxx_family {
 	MV88E6XXX_FAMILY_6351,	/* 6171 6175 6350 6351 */
 	MV88E6XXX_FAMILY_6352,	/* 6172 6176 6240 6352 */
 	MV88E6XXX_FAMILY_6390,  /* 6190 6190X 6191 6290 6390 6390X */
+	MV88E6XXX_FAMILY_6393,	/* 6191X 6193X 6393X */
 };
 
 struct mv88e6xxx_ops;
diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h
index 7c396964d0b2..4f3dbb015f77 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.h
+++ b/drivers/net/dsa/mv88e6xxx/global1.h
@@ -22,6 +22,7 @@
 #define MV88E6185_G1_STS_PPU_STATE_DISABLED		0x8000
 #define MV88E6185_G1_STS_PPU_STATE_POLLING		0xc000
 #define MV88E6XXX_G1_STS_INIT_READY			0x0800
+#define MV88E6393X_G1_STS_IRQ_DEVICE_2			9
 #define MV88E6XXX_G1_STS_IRQ_AVB			8
 #define MV88E6XXX_G1_STS_IRQ_DEVICE			7
 #define MV88E6XXX_G1_STS_IRQ_STATS			6
@@ -59,6 +60,7 @@
 #define MV88E6185_G1_CTL1_SCHED_PRIO		0x0800
 #define MV88E6185_G1_CTL1_MAX_FRAME_1632	0x0400
 #define MV88E6185_G1_CTL1_RELOAD_EEPROM		0x0200
+#define MV88E6393X_G1_CTL1_DEVICE2_EN		0x0200
 #define MV88E6XXX_G1_CTL1_DEVICE_EN		0x0080
 #define MV88E6XXX_G1_CTL1_STATS_DONE_EN		0x0040
 #define MV88E6XXX_G1_CTL1_VTU_PROBLEM_EN	0x0020
diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
index 4127f82275ad..c78769cdbb59 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.h
+++ b/drivers/net/dsa/mv88e6xxx/global2.h
@@ -38,9 +38,15 @@
 /* Offset 0x02: MGMT Enable Register 2x */
 #define MV88E6XXX_G2_MGMT_EN_2X		0x02
 
+/* Offset 0x02: MAC LINK change IRQ Register for MV88E6393X */
+#define MV88E6393X_G2_MACLINK_INT_SRC		0x02
+
 /* Offset 0x03: MGMT Enable Register 0x */
 #define MV88E6XXX_G2_MGMT_EN_0X		0x03
 
+/* Offset 0x03: MAC LINK change IRQ Mask Register for MV88E6393X */
+#define MV88E6393X_G2_MACLINK_INT_MASK		0x03
+
 /* Offset 0x04: Flow Control Delay Register */
 #define MV88E6XXX_G2_FLOW_CTL	0x04
 
@@ -52,6 +58,8 @@
 #define MV88E6XXX_G2_SWITCH_MGMT_FORCE_FLOW_CTL_PRI	0x0080
 #define MV88E6XXX_G2_SWITCH_MGMT_RSVD2CPU		0x0008
 
+#define MV88E6393X_G2_EGRESS_MONITOR_DEST		0x05
+
 /* Offset 0x06: Device Mapping Table Register */
 #define MV88E6XXX_G2_DEVICE_MAPPING		0x06
 #define MV88E6XXX_G2_DEVICE_MAPPING_UPDATE	0x8000
diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c
index d8590025e75f..154541d807df 100644
--- a/drivers/net/dsa/mv88e6xxx/port.c
+++ b/drivers/net/dsa/mv88e6xxx/port.c
@@ -14,6 +14,7 @@
 #include <linux/phylink.h>
 
 #include "chip.h"
+#include "global2.h"
 #include "port.h"
 #include "serdes.h"
 
@@ -25,6 +26,14 @@ int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg,
 	return mv88e6xxx_read(chip, addr, reg, val);
 }
 
+int mv88e6xxx_port_wait_bit(struct mv88e6xxx_chip *chip, int port, int reg,
+			    int bit, int val)
+{
+	int addr = chip->info->port_base_addr + port;
+
+	return mv88e6xxx_wait_bit(chip, addr, reg, bit, val);
+}
+
 int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg,
 			 u16 val)
 {
@@ -426,6 +435,106 @@ phy_interface_t mv88e6390x_port_max_speed_mode(int port)
 	return PHY_INTERFACE_MODE_NA;
 }
 
+/* Support 10, 100, 200, 1000, 2500, 5000, 10000 Mbps (e.g. 88E6393X)
+ * Function mv88e6xxx_port_set_speed_duplex() can't be used as the register
+ * values for speeds 2500 & 5000 conflict.
+ */
+int mv88e6393x_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
+				     int speed, int duplex)
+{
+	u16 reg, ctrl;
+	int err;
+
+	if (speed == SPEED_MAX)
+		speed = (port > 0 && port < 9) ? 1000 : 10000;
+
+	if (speed == 200 && port != 0)
+		return -EOPNOTSUPP;
+
+	if (speed >= 2500 && port > 0 && port < 9)
+		return -EOPNOTSUPP;
+
+	switch (speed) {
+	case 10:
+		ctrl = MV88E6XXX_PORT_MAC_CTL_SPEED_10;
+		break;
+	case 100:
+		ctrl = MV88E6XXX_PORT_MAC_CTL_SPEED_100;
+		break;
+	case 200:
+		ctrl = MV88E6XXX_PORT_MAC_CTL_SPEED_100 |
+			MV88E6390_PORT_MAC_CTL_ALTSPEED;
+		break;
+	case 1000:
+		ctrl = MV88E6XXX_PORT_MAC_CTL_SPEED_1000;
+		break;
+	case 2500:
+		ctrl = MV88E6XXX_PORT_MAC_CTL_SPEED_1000 |
+			MV88E6390_PORT_MAC_CTL_ALTSPEED;
+		break;
+	case 5000:
+		ctrl = MV88E6390_PORT_MAC_CTL_SPEED_10000 |
+			MV88E6390_PORT_MAC_CTL_ALTSPEED;
+		break;
+	case 10000:
+	case SPEED_UNFORCED:
+		ctrl = MV88E6XXX_PORT_MAC_CTL_SPEED_UNFORCED;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	switch (duplex) {
+	case DUPLEX_HALF:
+		ctrl |= MV88E6XXX_PORT_MAC_CTL_FORCE_DUPLEX;
+		break;
+	case DUPLEX_FULL:
+		ctrl |= MV88E6XXX_PORT_MAC_CTL_FORCE_DUPLEX |
+			MV88E6XXX_PORT_MAC_CTL_DUPLEX_FULL;
+		break;
+	case DUPLEX_UNFORCED:
+		/* normal duplex detection */
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_MAC_CTL, &reg);
+	if (err)
+		return err;
+
+	reg &= ~(MV88E6XXX_PORT_MAC_CTL_SPEED_MASK |
+		 MV88E6390_PORT_MAC_CTL_ALTSPEED |
+		 MV88E6390_PORT_MAC_CTL_FORCE_SPEED);
+
+	if (speed != SPEED_UNFORCED)
+		reg |= MV88E6390_PORT_MAC_CTL_FORCE_SPEED;
+
+	reg |= ctrl;
+
+	err = mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_MAC_CTL, reg);
+	if (err)
+		return err;
+
+	if (speed)
+		dev_dbg(chip->dev, "p%d: Speed set to %d Mbps\n", port, speed);
+	else
+		dev_dbg(chip->dev, "p%d: Speed unforced\n", port);
+	dev_dbg(chip->dev, "p%d: %s %s duplex\n", port,
+		reg & MV88E6XXX_PORT_MAC_CTL_FORCE_DUPLEX ? "Force" : "Unforce",
+		reg & MV88E6XXX_PORT_MAC_CTL_DUPLEX_FULL ? "full" : "half");
+
+	return 0;
+}
+
+phy_interface_t mv88e6393x_port_max_speed_mode(int port)
+{
+	if (port == 0 || port == 9 || port == 10)
+		return PHY_INTERFACE_MODE_10GBASER;
+
+	return PHY_INTERFACE_MODE_NA;
+}
+
 static int mv88e6xxx_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 				    phy_interface_t mode, bool force)
 {
@@ -450,6 +559,9 @@ static int mv88e6xxx_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 	case PHY_INTERFACE_MODE_2500BASEX:
 		cmode = MV88E6XXX_PORT_STS_CMODE_2500BASEX;
 		break;
+	case PHY_INTERFACE_MODE_5GBASER:
+		cmode = MV88E6393X_PORT_STS_CMODE_5GBASER;
+		break;
 	case PHY_INTERFACE_MODE_XGMII:
 	case PHY_INTERFACE_MODE_XAUI:
 		cmode = MV88E6XXX_PORT_STS_CMODE_XAUI;
@@ -457,6 +569,9 @@ static int mv88e6xxx_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 	case PHY_INTERFACE_MODE_RXAUI:
 		cmode = MV88E6XXX_PORT_STS_CMODE_RXAUI;
 		break;
+	case PHY_INTERFACE_MODE_10GBASER:
+		cmode = MV88E6393X_PORT_STS_CMODE_10GBASER;
+		break;
 	default:
 		cmode = 0;
 	}
@@ -541,6 +656,29 @@ int mv88e6390_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 	return mv88e6xxx_port_set_cmode(chip, port, mode, false);
 }
 
+int mv88e6393x_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
+			      phy_interface_t mode)
+{
+	int err;
+	u16 reg;
+
+	if (port != 0 && port != 9 && port != 10)
+		return -EOPNOTSUPP;
+
+	/* mv88e6393x errata 4.5: EEE should be disabled on SERDES ports */
+	err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_MAC_CTL, &reg);
+	if (err)
+		return err;
+
+	reg &= ~MV88E6XXX_PORT_MAC_CTL_EEE;
+	reg |= MV88E6XXX_PORT_MAC_CTL_FORCE_EEE;
+	err = mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_MAC_CTL, reg);
+	if (err)
+		return err;
+
+	return mv88e6xxx_port_set_cmode(chip, port, mode, false);
+}
+
 static int mv88e6341_port_set_cmode_writable(struct mv88e6xxx_chip *chip,
 					     int port)
 {
@@ -1185,6 +1323,135 @@ int mv88e6xxx_port_disable_pri_override(struct mv88e6xxx_chip *chip, int port)
 	return mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_PRI_OVERRIDE, 0);
 }
 
+/* Offset 0x0E: Policy & MGMT Control Register for FAMILY 6191X 6193X 6393X */
+
+static int mv88e6393x_port_policy_write(struct mv88e6xxx_chip *chip, int port,
+					u16 pointer, u8 data)
+{
+	u16 reg;
+
+	reg = MV88E6393X_PORT_POLICY_MGMT_CTL_UPDATE | pointer | data;
+
+	return mv88e6xxx_port_write(chip, port, MV88E6393X_PORT_POLICY_MGMT_CTL,
+				    reg);
+}
+
+static int mv88e6393x_port_policy_write_all(struct mv88e6xxx_chip *chip,
+					    u16 pointer, u8 data)
+{
+	int err, port;
+
+	for (port = 0; port < mv88e6xxx_num_ports(chip); port++) {
+		if (dsa_is_unused_port(chip->ds, port))
+			continue;
+
+		err = mv88e6393x_port_policy_write(chip, port, pointer, data);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+int mv88e6393x_set_egress_port(struct mv88e6xxx_chip *chip,
+			       enum mv88e6xxx_egress_direction direction,
+			       int port)
+{
+	u16 ptr;
+	int err;
+
+	switch (direction) {
+	case MV88E6XXX_EGRESS_DIR_INGRESS:
+		ptr = MV88E6393X_PORT_POLICY_MGMT_CTL_PTR_INGRESS_DEST;
+		err = mv88e6393x_port_policy_write_all(chip, ptr, port);
+		if (err)
+			return err;
+		break;
+	case MV88E6XXX_EGRESS_DIR_EGRESS:
+		ptr = MV88E6393X_G2_EGRESS_MONITOR_DEST;
+		err = mv88e6xxx_g2_write(chip, ptr, port);
+		if (err)
+			return err;
+		break;
+	}
+
+	return 0;
+}
+
+int mv88e6393x_port_set_upstream_port(struct mv88e6xxx_chip *chip, int port,
+				      int upstream_port)
+{
+	u16 ptr = MV88E6393X_PORT_POLICY_MGMT_CTL_PTR_CPU_DEST;
+	u8 data = MV88E6393X_PORT_POLICY_MGMT_CTL_CPU_DEST_MGMTPRI |
+		  upstream_port;
+
+	return mv88e6393x_port_policy_write(chip, port, ptr, data);
+}
+
+int mv88e6393x_port_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip)
+{
+	u16 ptr;
+	int err;
+
+	/* Consider the frames with reserved multicast destination
+	 * addresses matching 01:80:c2:00:00:00 and
+	 * 01:80:c2:00:00:02 as MGMT.
+	 */
+	ptr = MV88E6393X_PORT_POLICY_MGMT_CTL_PTR_01C280000000XLO;
+	err = mv88e6393x_port_policy_write_all(chip, ptr, 0xff);
+	if (err)
+		return err;
+
+	ptr = MV88E6393X_PORT_POLICY_MGMT_CTL_PTR_01C280000000XHI;
+	err = mv88e6393x_port_policy_write_all(chip, ptr, 0xff);
+	if (err)
+		return err;
+
+	ptr = MV88E6393X_PORT_POLICY_MGMT_CTL_PTR_01C280000002XLO;
+	err = mv88e6393x_port_policy_write_all(chip, ptr, 0xff);
+	if (err)
+		return err;
+
+	ptr = MV88E6393X_PORT_POLICY_MGMT_CTL_PTR_01C280000002XHI;
+	err = mv88e6393x_port_policy_write_all(chip, ptr, 0xff);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+/* Offset 0x10 & 0x11: EPC */
+
+static int mv88e6393x_port_epc_wait_ready(struct mv88e6xxx_chip *chip, int port)
+{
+	int bit = __bf_shf(MV88E6393X_PORT_EPC_CMD_BUSY);
+
+	return mv88e6xxx_port_wait_bit(chip, port, MV88E6393X_PORT_EPC_CMD, bit, 0);
+}
+
+/* Port Ether type for 6393X family */
+
+int mv88e6393x_port_set_ether_type(struct mv88e6xxx_chip *chip, int port,
+				   u16 etype)
+{
+	u16 val;
+	int err;
+
+	err = mv88e6393x_port_epc_wait_ready(chip, port);
+	if (err)
+		return err;
+
+	err = mv88e6xxx_port_write(chip, port, MV88E6393X_PORT_EPC_DATA, etype);
+	if (err)
+		return err;
+
+	val = MV88E6393X_PORT_EPC_CMD_BUSY |
+	      MV88E6393X_PORT_EPC_CMD_WRITE |
+	      MV88E6393X_PORT_EPC_INDEX_PORT_ETYPE;
+
+	return mv88e6xxx_port_write(chip, port, MV88E6393X_PORT_EPC_CMD, val);
+}
+
 /* Offset 0x0f: Port Ether type */
 
 int mv88e6351_port_set_ether_type(struct mv88e6xxx_chip *chip, int port,
diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h
index e6d0eaa6aa1d..948ba577a159 100644
--- a/drivers/net/dsa/mv88e6xxx/port.h
+++ b/drivers/net/dsa/mv88e6xxx/port.h
@@ -49,6 +49,9 @@
 #define MV88E6XXX_PORT_STS_CMODE_2500BASEX	0x000b
 #define MV88E6XXX_PORT_STS_CMODE_XAUI		0x000c
 #define MV88E6XXX_PORT_STS_CMODE_RXAUI		0x000d
+#define MV88E6393X_PORT_STS_CMODE_5GBASER	0x000c
+#define MV88E6393X_PORT_STS_CMODE_10GBASER	0x000d
+#define MV88E6393X_PORT_STS_CMODE_USXGMII	0x000e
 #define MV88E6185_PORT_STS_CDUPLEX		0x0008
 #define MV88E6185_PORT_STS_CMODE_MASK		0x0007
 #define MV88E6185_PORT_STS_CMODE_GMII_FD	0x0000
@@ -68,6 +71,8 @@
 #define MV88E6390_PORT_MAC_CTL_FORCE_SPEED		0x2000
 #define MV88E6390_PORT_MAC_CTL_ALTSPEED			0x1000
 #define MV88E6352_PORT_MAC_CTL_200BASE			0x1000
+#define MV88E6XXX_PORT_MAC_CTL_EEE			0x0200
+#define MV88E6XXX_PORT_MAC_CTL_FORCE_EEE		0x0100
 #define MV88E6185_PORT_MAC_CTL_AN_EN			0x0400
 #define MV88E6185_PORT_MAC_CTL_AN_RESTART		0x0200
 #define MV88E6185_PORT_MAC_CTL_AN_DONE			0x0100
@@ -117,6 +122,8 @@
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_6176	0x1760
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_6190	0x1900
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_6191	0x1910
+#define MV88E6XXX_PORT_SWITCH_ID_PROD_6191X	0x1920
+#define MV88E6XXX_PORT_SWITCH_ID_PROD_6193X	0x1930
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_6185	0x1a70
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_6220	0x2200
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_6240	0x2400
@@ -129,6 +136,7 @@
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_6350	0x3710
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_6351	0x3750
 #define MV88E6XXX_PORT_SWITCH_ID_PROD_6390	0x3900
+#define MV88E6XXX_PORT_SWITCH_ID_PROD_6393X	0x3930
 #define MV88E6XXX_PORT_SWITCH_ID_REV_MASK	0x000f
 
 /* Offset 0x04: Port Control Register */
@@ -236,6 +244,19 @@
 #define MV88E6XXX_PORT_POLICY_CTL_TRAP		0x0002
 #define MV88E6XXX_PORT_POLICY_CTL_DISCARD	0x0003
 
+/* Offset 0x0E: Policy & MGMT Control Register (FAMILY_6393X) */
+#define MV88E6393X_PORT_POLICY_MGMT_CTL				0x0e
+#define MV88E6393X_PORT_POLICY_MGMT_CTL_UPDATE			0x8000
+#define MV88E6393X_PORT_POLICY_MGMT_CTL_PTR_MASK		0x3f00
+#define MV88E6393X_PORT_POLICY_MGMT_CTL_DATA_MASK		0x00ff
+#define MV88E6393X_PORT_POLICY_MGMT_CTL_PTR_01C280000000XLO	0x2000
+#define MV88E6393X_PORT_POLICY_MGMT_CTL_PTR_01C280000000XHI	0x2100
+#define MV88E6393X_PORT_POLICY_MGMT_CTL_PTR_01C280000002XLO	0x2400
+#define MV88E6393X_PORT_POLICY_MGMT_CTL_PTR_01C280000002XHI	0x2500
+#define MV88E6393X_PORT_POLICY_MGMT_CTL_PTR_INGRESS_DEST	0x3000
+#define MV88E6393X_PORT_POLICY_MGMT_CTL_PTR_CPU_DEST		0x3800
+#define MV88E6393X_PORT_POLICY_MGMT_CTL_CPU_DEST_MGMTPRI	0x00e0
+
 /* Offset 0x0F: Port Special Ether Type */
 #define MV88E6XXX_PORT_ETH_TYPE		0x0f
 #define MV88E6XXX_PORT_ETH_TYPE_DEFAULT	0x9100
@@ -243,6 +264,15 @@
 /* Offset 0x10: InDiscards Low Counter */
 #define MV88E6XXX_PORT_IN_DISCARD_LO	0x10
 
+/* Offset 0x10: Extended Port Control Command */
+#define MV88E6393X_PORT_EPC_CMD		0x10
+#define MV88E6393X_PORT_EPC_CMD_BUSY	0x8000
+#define MV88E6393X_PORT_EPC_CMD_WRITE	0x0300
+#define MV88E6393X_PORT_EPC_INDEX_PORT_ETYPE	0x02
+
+/* Offset 0x11: Extended Port Control Data */
+#define MV88E6393X_PORT_EPC_DATA	0x11
+
 /* Offset 0x11: InDiscards High Counter */
 #define MV88E6XXX_PORT_IN_DISCARD_HI	0x11
 
@@ -288,6 +318,8 @@ int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg,
 			u16 *val);
 int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg,
 			 u16 val);
+int mv88e6xxx_port_wait_bit(struct mv88e6xxx_chip *chip, int port, int reg,
+			    int bit, int val);
 
 int mv88e6185_port_set_pause(struct mv88e6xxx_chip *chip, int port,
 			     int pause);
@@ -315,10 +347,13 @@ int mv88e6390_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
 				    int speed, int duplex);
 int mv88e6390x_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
 				     int speed, int duplex);
+int mv88e6393x_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port,
+				     int speed, int duplex);
 
 phy_interface_t mv88e6341_port_max_speed_mode(int port);
 phy_interface_t mv88e6390_port_max_speed_mode(int port);
 phy_interface_t mv88e6390x_port_max_speed_mode(int port);
+phy_interface_t mv88e6393x_port_max_speed_mode(int port);
 
 int mv88e6xxx_port_set_state(struct mv88e6xxx_chip *chip, int port, u8 state);
 
@@ -353,6 +388,14 @@ int mv88e6352_port_set_policy(struct mv88e6xxx_chip *chip, int port,
 			      enum mv88e6xxx_policy_action action);
 int mv88e6351_port_set_ether_type(struct mv88e6xxx_chip *chip, int port,
 				  u16 etype);
+int mv88e6393x_set_egress_port(struct mv88e6xxx_chip *chip,
+			       enum mv88e6xxx_egress_direction direction,
+			       int port);
+int mv88e6393x_port_set_upstream_port(struct mv88e6xxx_chip *chip, int port,
+				      int upstream_port);
+int mv88e6393x_port_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip);
+int mv88e6393x_port_set_ether_type(struct mv88e6xxx_chip *chip, int port,
+				   u16 etype);
 int mv88e6xxx_port_set_message_port(struct mv88e6xxx_chip *chip, int port,
 				    bool message_port);
 int mv88e6xxx_port_set_trunk(struct mv88e6xxx_chip *chip, int port,
@@ -371,6 +414,8 @@ int mv88e6390_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 			     phy_interface_t mode);
 int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
 			      phy_interface_t mode);
+int mv88e6393x_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
+			      phy_interface_t mode);
 int mv88e6185_port_get_cmode(struct mv88e6xxx_chip *chip, int port, u8 *cmode);
 int mv88e6352_port_get_cmode(struct mv88e6xxx_chip *chip, int port, u8 *cmode);
 int mv88e6xxx_port_set_map_da(struct mv88e6xxx_chip *chip, int port);
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c
index e48260c5c6ba..470856bcd2f3 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.c
+++ b/drivers/net/dsa/mv88e6xxx/serdes.c
@@ -637,6 +637,27 @@ int mv88e6390x_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 	return lane;
 }
 
+/* Only Ports 0, 9 and 10 have SERDES lanes. Return the SERDES lane address
+ * a port is using else Returns -ENODEV.
+ */
+int mv88e6393x_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
+{
+	u8 cmode = chip->ports[port].cmode;
+	int lane = -ENODEV;
+
+	if (port != 0 && port != 9 && port != 10)
+		return -EOPNOTSUPP;
+
+	if (cmode == MV88E6XXX_PORT_STS_CMODE_1000BASEX ||
+	    cmode == MV88E6XXX_PORT_STS_CMODE_SGMII ||
+	    cmode == MV88E6XXX_PORT_STS_CMODE_2500BASEX ||
+	    cmode == MV88E6393X_PORT_STS_CMODE_5GBASER ||
+	    cmode == MV88E6393X_PORT_STS_CMODE_10GBASER)
+		lane = port;
+
+	return lane;
+}
+
 /* Set power up/down for 10GBASE-R and 10GBASE-X4/X2 */
 static int mv88e6390_serdes_power_10g(struct mv88e6xxx_chip *chip, int lane,
 				      bool up)
@@ -902,6 +923,30 @@ static int mv88e6390_serdes_pcs_get_state_10g(struct mv88e6xxx_chip *chip,
 	return 0;
 }
 
+static int mv88e6393x_serdes_pcs_get_state_10g(struct mv88e6xxx_chip *chip,
+					       int port, int lane,
+					       struct phylink_link_state *state)
+{
+	u16 status;
+	int err;
+
+	err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
+				    MV88E6390_10G_STAT1, &status);
+	if (err)
+		return err;
+
+	state->link = !!(status & MDIO_STAT1_LSTATUS);
+	if (state->link) {
+		if (state->interface == PHY_INTERFACE_MODE_5GBASER)
+			state->speed = SPEED_5000;
+		else
+			state->speed = SPEED_10000;
+		state->duplex = DUPLEX_FULL;
+	}
+
+	return 0;
+}
+
 int mv88e6390_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
 				   int lane, struct phylink_link_state *state)
 {
@@ -921,6 +966,25 @@ int mv88e6390_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
 	}
 }
 
+int mv88e6393x_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
+				    int lane, struct phylink_link_state *state)
+{
+	switch (state->interface) {
+	case PHY_INTERFACE_MODE_SGMII:
+	case PHY_INTERFACE_MODE_1000BASEX:
+	case PHY_INTERFACE_MODE_2500BASEX:
+		return mv88e6390_serdes_pcs_get_state_sgmii(chip, port, lane,
+							    state);
+	case PHY_INTERFACE_MODE_5GBASER:
+	case PHY_INTERFACE_MODE_10GBASER:
+		return mv88e6393x_serdes_pcs_get_state_10g(chip, port, lane,
+							   state);
+
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 int mv88e6390_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port,
 				    int lane)
 {
@@ -988,6 +1052,23 @@ static void mv88e6390_serdes_irq_link_sgmii(struct mv88e6xxx_chip *chip,
 	dsa_port_phylink_mac_change(chip->ds, port, !!(bmsr & BMSR_LSTATUS));
 }
 
+static void mv88e6393x_serdes_irq_link_10g(struct mv88e6xxx_chip *chip,
+					   int port, u8 lane)
+{
+	u16 status;
+	int err;
+
+	/* If the link has dropped, we want to know about it. */
+	err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
+				    MV88E6390_10G_STAT1, &status);
+	if (err) {
+		dev_err(chip->dev, "can't read Serdes STAT1: %d\n", err);
+		return;
+	}
+
+	dsa_port_phylink_mac_change(chip->ds, port, !!(status & MDIO_STAT1_LSTATUS));
+}
+
 static int mv88e6390_serdes_irq_enable_sgmii(struct mv88e6xxx_chip *chip,
 					     int lane, bool enable)
 {
@@ -1027,6 +1108,83 @@ static int mv88e6390_serdes_irq_status_sgmii(struct mv88e6xxx_chip *chip,
 	return err;
 }
 
+static int mv88e6393x_serdes_irq_enable_10g(struct mv88e6xxx_chip *chip,
+					    u8 lane, bool enable)
+{
+	u16 val = 0;
+
+	if (enable)
+		val |= MV88E6393X_10G_INT_LINK_CHANGE;
+
+	return mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
+				      MV88E6393X_10G_INT_ENABLE, val);
+}
+
+int mv88e6393x_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port,
+				 int lane, bool enable)
+{
+	u8 cmode = chip->ports[port].cmode;
+
+	switch (cmode) {
+	case MV88E6XXX_PORT_STS_CMODE_SGMII:
+	case MV88E6XXX_PORT_STS_CMODE_1000BASEX:
+	case MV88E6XXX_PORT_STS_CMODE_2500BASEX:
+		return mv88e6390_serdes_irq_enable_sgmii(chip, lane, enable);
+	case MV88E6393X_PORT_STS_CMODE_5GBASER:
+	case MV88E6393X_PORT_STS_CMODE_10GBASER:
+		return mv88e6393x_serdes_irq_enable_10g(chip, lane, enable);
+	}
+
+	return 0;
+}
+
+static int mv88e6393x_serdes_irq_status_10g(struct mv88e6xxx_chip *chip,
+					    u8 lane, u16 *status)
+{
+	int err;
+
+	err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
+				    MV88E6393X_10G_INT_STATUS, status);
+
+	return err;
+}
+
+irqreturn_t mv88e6393x_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
+					 int lane)
+{
+	u8 cmode = chip->ports[port].cmode;
+	irqreturn_t ret = IRQ_NONE;
+	u16 status;
+	int err;
+
+	switch (cmode) {
+	case MV88E6XXX_PORT_STS_CMODE_SGMII:
+	case MV88E6XXX_PORT_STS_CMODE_1000BASEX:
+	case MV88E6XXX_PORT_STS_CMODE_2500BASEX:
+		err = mv88e6390_serdes_irq_status_sgmii(chip, lane, &status);
+		if (err)
+			return ret;
+		if (status & (MV88E6390_SGMII_INT_LINK_DOWN |
+			      MV88E6390_SGMII_INT_LINK_UP)) {
+			ret = IRQ_HANDLED;
+			mv88e6390_serdes_irq_link_sgmii(chip, port, lane);
+		}
+		break;
+	case MV88E6393X_PORT_STS_CMODE_5GBASER:
+	case MV88E6393X_PORT_STS_CMODE_10GBASER:
+		err = mv88e6393x_serdes_irq_status_10g(chip, lane, &status);
+		if (err)
+			return err;
+		if (status & MV88E6393X_10G_INT_LINK_CHANGE) {
+			ret = IRQ_HANDLED;
+			mv88e6393x_serdes_irq_link_10g(chip, port, lane);
+		}
+		break;
+	}
+
+	return ret;
+}
+
 irqreturn_t mv88e6390_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
 					int lane)
 {
@@ -1112,3 +1270,101 @@ void mv88e6390_serdes_get_regs(struct mv88e6xxx_chip *chip, int port, void *_p)
 			p[i] = reg;
 	}
 }
+
+static int mv88e6393x_serdes_port_errata(struct mv88e6xxx_chip *chip, int lane)
+{
+	u16 reg, pcs;
+	int err;
+
+	/* mv88e6393x family errata 4.6:
+	 * Cannot clear PwrDn bit on SERDES on port 0 if device is configured
+	 * CPU_MGD mode or P0_mode is configured for [x]MII.
+	 * Workaround: Set Port0 SERDES register 4.F002 bit 5=0 and bit 15=1.
+	 *
+	 * It seems that after this workaround the SERDES is automatically
+	 * powered up (the bit is cleared), so power it down.
+	 */
+	if (lane == MV88E6393X_PORT0_LANE) {
+		err = mv88e6390_serdes_read(chip, MV88E6393X_PORT0_LANE,
+					    MDIO_MMD_PHYXS,
+					    MV88E6393X_SERDES_POC, &reg);
+		if (err)
+			return err;
+
+		reg &= ~MV88E6393X_SERDES_POC_PDOWN;
+		reg |= MV88E6393X_SERDES_POC_RESET;
+
+		err = mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
+					     MV88E6393X_SERDES_POC, reg);
+		if (err)
+			return err;
+
+		err = mv88e6390_serdes_power_sgmii(chip, lane, false);
+		if (err)
+			return err;
+	}
+
+	/* mv88e6393x family errata 4.8:
+	 * When a SERDES port is operating in 1000BASE-X or SGMII mode link may
+	 * not come up after hardware reset or software reset of SERDES core.
+	 * Workaround is to write SERDES register 4.F074.14=1 for only those
+	 * modes and 0 in all other modes.
+	 */
+	err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
+				    MV88E6393X_SERDES_POC, &pcs);
+	if (err)
+		return err;
+
+	pcs &= MV88E6393X_SERDES_POC_PCS_MASK;
+
+	err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
+				    MV88E6393X_ERRATA_4_8_REG, &reg);
+	if (err)
+		return err;
+
+	if (pcs == MV88E6393X_SERDES_POC_PCS_1000BASEX ||
+	    pcs == MV88E6393X_SERDES_POC_PCS_SGMII_PHY ||
+	    pcs == MV88E6393X_SERDES_POC_PCS_SGMII_MAC)
+		reg |= MV88E6393X_ERRATA_4_8_BIT;
+	else
+		reg &= ~MV88E6393X_ERRATA_4_8_BIT;
+
+	return mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
+				      MV88E6393X_ERRATA_4_8_REG, reg);
+}
+
+int mv88e6393x_serdes_setup_errata(struct mv88e6xxx_chip *chip)
+{
+	int err;
+
+	err = mv88e6393x_serdes_port_errata(chip, MV88E6393X_PORT0_LANE);
+	if (err)
+		return err;
+
+	err = mv88e6393x_serdes_port_errata(chip, MV88E6393X_PORT9_LANE);
+	if (err)
+		return err;
+
+	return mv88e6393x_serdes_port_errata(chip, MV88E6393X_PORT10_LANE);
+}
+
+int mv88e6393x_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
+			    bool on)
+{
+	u8 cmode = chip->ports[port].cmode;
+
+	if (port != 0 && port != 9 && port != 10)
+		return -EOPNOTSUPP;
+
+	switch (cmode) {
+	case MV88E6XXX_PORT_STS_CMODE_SGMII:
+	case MV88E6XXX_PORT_STS_CMODE_1000BASEX:
+	case MV88E6XXX_PORT_STS_CMODE_2500BASEX:
+		return mv88e6390_serdes_power_sgmii(chip, lane, on);
+	case MV88E6393X_PORT_STS_CMODE_5GBASER:
+	case MV88E6393X_PORT_STS_CMODE_10GBASER:
+		return mv88e6390_serdes_power_10g(chip, lane, on);
+	}
+
+	return 0;
+}
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.h b/drivers/net/dsa/mv88e6xxx/serdes.h
index ce4d0fef124d..cbb3ba30caea 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.h
+++ b/drivers/net/dsa/mv88e6xxx/serdes.h
@@ -42,6 +42,9 @@
 /* 10GBASE-R and 10GBASE-X4/X2 */
 #define MV88E6390_10G_CTRL1		(0x1000 + MDIO_CTRL1)
 #define MV88E6390_10G_STAT1		(0x1000 + MDIO_STAT1)
+#define MV88E6393X_10G_INT_ENABLE	0x9000
+#define MV88E6393X_10G_INT_LINK_CHANGE	BIT(2)
+#define MV88E6393X_10G_INT_STATUS	0x9001
 
 /* 1000BASE-X and SGMII */
 #define MV88E6390_SGMII_BMCR		(0x2000 + MII_BMCR)
@@ -73,11 +76,33 @@
 #define MV88E6390_PG_CONTROL		0xf010
 #define MV88E6390_PG_CONTROL_ENABLE_PC		BIT(0)
 
+#define MV88E6393X_PORT0_LANE			0x00
+#define MV88E6393X_PORT9_LANE			0x09
+#define MV88E6393X_PORT10_LANE			0x0a
+
+/* Port Operational Configuration */
+#define MV88E6393X_SERDES_POC			0xf002
+#define MV88E6393X_SERDES_POC_PCS_1000BASEX	0x0000
+#define MV88E6393X_SERDES_POC_PCS_2500BASEX	0x0001
+#define MV88E6393X_SERDES_POC_PCS_SGMII_PHY	0x0002
+#define MV88E6393X_SERDES_POC_PCS_SGMII_MAC	0x0003
+#define MV88E6393X_SERDES_POC_PCS_5GBASER	0x0004
+#define MV88E6393X_SERDES_POC_PCS_10GBASER	0x0005
+#define MV88E6393X_SERDES_POC_PCS_USXGMII_PHY	0x0006
+#define MV88E6393X_SERDES_POC_PCS_USXGMII_MAC	0x0007
+#define MV88E6393X_SERDES_POC_PCS_MASK		0x0007
+#define MV88E6393X_SERDES_POC_RESET		BIT(15)
+#define MV88E6393X_SERDES_POC_PDOWN		BIT(5)
+
+#define MV88E6393X_ERRATA_4_8_REG		0xF074
+#define MV88E6393X_ERRATA_4_8_BIT		BIT(14)
+
 int mv88e6185_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
 int mv88e6341_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
 int mv88e6352_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
 int mv88e6390_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
 int mv88e6390x_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
+int mv88e6393x_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
 int mv88e6352_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port,
 				int lane, unsigned int mode,
 				phy_interface_t interface,
@@ -92,6 +117,8 @@ int mv88e6352_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
 				   int lane, struct phylink_link_state *state);
 int mv88e6390_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
 				   int lane, struct phylink_link_state *state);
+int mv88e6393x_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
+				    int lane, struct phylink_link_state *state);
 int mv88e6352_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port,
 				    int lane);
 int mv88e6390_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port,
@@ -110,18 +137,25 @@ int mv88e6352_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
 			   bool on);
 int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
 			   bool on);
+int mv88e6393x_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
+			    bool on);
+int mv88e6393x_serdes_setup_errata(struct mv88e6xxx_chip *chip);
 int mv88e6097_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, int lane,
 				bool enable);
 int mv88e6352_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, int lane,
 				bool enable);
 int mv88e6390_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port, int lane,
 				bool enable);
+int mv88e6393x_serdes_irq_enable(struct mv88e6xxx_chip *chip, int port,
+				 int lane, bool enable);
 irqreturn_t mv88e6097_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
 					int lane);
 irqreturn_t mv88e6352_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
 					int lane);
 irqreturn_t mv88e6390_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
 					int lane);
+irqreturn_t mv88e6393x_serdes_irq_status(struct mv88e6xxx_chip *chip, int port,
+					 int lane);
 int mv88e6352_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port);
 int mv88e6352_serdes_get_strings(struct mv88e6xxx_chip *chip,
 				 int port, uint8_t *data);
-- 
cgit v1.2.3


From 6584b26020fc5bb586d6e9f621eb8a7343a6ed33 Mon Sep 17 00:00:00 2001
From: Marek Behún <kabel@kernel.org>
Date: Wed, 17 Mar 2021 14:46:43 +0100
Subject: net: dsa: mv88e6xxx: implement .port_set_policy for Amethyst
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The 16-bit Port Policy CTL register from older chips is on 6393x changed
to Port Policy MGMT CTL, which can access more data, but indirectly and
via 8-bit registers.

The original 16-bit value is divided into first two 8-bit register in
the Port Policy MGMT CTL.

We can therefore use the previous code to compute the mask and shift,
and then
- if 0 <= shift < 8, we access register 0 in Port Policy MGMT CTL
- if 8 <= shift < 16, we access register 1 in Port Policy MGMT CTL

There are in fact other possible policy settings for Amethyst which
could be added here, but this can be done in the future.

Signed-off-by: Marek Behún <kabel@kernel.org>
Reviewed-by: Pavana Sharma <pavana.sharma@digi.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c |   1 +
 drivers/net/dsa/mv88e6xxx/port.c | 122 ++++++++++++++++++++++++++++++---------
 drivers/net/dsa/mv88e6xxx/port.h |   3 +
 3 files changed, 99 insertions(+), 27 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 0ffeb73a4058..f0a9423af85d 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -4627,6 +4627,7 @@ static const struct mv88e6xxx_ops mv88e6393x_ops = {
 	.port_set_speed_duplex = mv88e6393x_port_set_speed_duplex,
 	.port_max_speed_mode = mv88e6393x_port_max_speed_mode,
 	.port_tag_remap = mv88e6390_port_tag_remap,
+	.port_set_policy = mv88e6393x_port_set_policy,
 	.port_set_frame_mode = mv88e6351_port_set_frame_mode,
 	.port_set_ucast_flood = mv88e6352_port_set_ucast_flood,
 	.port_set_mcast_flood = mv88e6352_port_set_mcast_flood,
diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c
index 154541d807df..6a9c45c2127a 100644
--- a/drivers/net/dsa/mv88e6xxx/port.c
+++ b/drivers/net/dsa/mv88e6xxx/port.c
@@ -1325,6 +1325,27 @@ int mv88e6xxx_port_disable_pri_override(struct mv88e6xxx_chip *chip, int port)
 
 /* Offset 0x0E: Policy & MGMT Control Register for FAMILY 6191X 6193X 6393X */
 
+static int mv88e6393x_port_policy_read(struct mv88e6xxx_chip *chip, int port,
+				       u16 pointer, u8 *data)
+{
+	u16 reg;
+	int err;
+
+	err = mv88e6xxx_port_write(chip, port, MV88E6393X_PORT_POLICY_MGMT_CTL,
+				   pointer);
+	if (err)
+		return err;
+
+	err = mv88e6xxx_port_read(chip, port, MV88E6393X_PORT_POLICY_MGMT_CTL,
+				  &reg);
+	if (err)
+		return err;
+
+	*data = reg;
+
+	return 0;
+}
+
 static int mv88e6393x_port_policy_write(struct mv88e6xxx_chip *chip, int port,
 					u16 pointer, u8 data)
 {
@@ -1526,46 +1547,43 @@ int mv88e6390_port_tag_remap(struct mv88e6xxx_chip *chip, int port)
 
 /* Offset 0x0E: Policy Control Register */
 
-int mv88e6352_port_set_policy(struct mv88e6xxx_chip *chip, int port,
-			      enum mv88e6xxx_policy_mapping mapping,
-			      enum mv88e6xxx_policy_action action)
+static int
+mv88e6xxx_port_policy_mapping_get_pos(enum mv88e6xxx_policy_mapping mapping,
+				      enum mv88e6xxx_policy_action action,
+				      u16 *mask, u16 *val, int *shift)
 {
-	u16 reg, mask, val;
-	int shift;
-	int err;
-
 	switch (mapping) {
 	case MV88E6XXX_POLICY_MAPPING_DA:
-		shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_DA_MASK);
-		mask = MV88E6XXX_PORT_POLICY_CTL_DA_MASK;
+		*shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_DA_MASK);
+		*mask = MV88E6XXX_PORT_POLICY_CTL_DA_MASK;
 		break;
 	case MV88E6XXX_POLICY_MAPPING_SA:
-		shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_SA_MASK);
-		mask = MV88E6XXX_PORT_POLICY_CTL_SA_MASK;
+		*shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_SA_MASK);
+		*mask = MV88E6XXX_PORT_POLICY_CTL_SA_MASK;
 		break;
 	case MV88E6XXX_POLICY_MAPPING_VTU:
-		shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_VTU_MASK);
-		mask = MV88E6XXX_PORT_POLICY_CTL_VTU_MASK;
+		*shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_VTU_MASK);
+		*mask = MV88E6XXX_PORT_POLICY_CTL_VTU_MASK;
 		break;
 	case MV88E6XXX_POLICY_MAPPING_ETYPE:
-		shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_ETYPE_MASK);
-		mask = MV88E6XXX_PORT_POLICY_CTL_ETYPE_MASK;
+		*shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_ETYPE_MASK);
+		*mask = MV88E6XXX_PORT_POLICY_CTL_ETYPE_MASK;
 		break;
 	case MV88E6XXX_POLICY_MAPPING_PPPOE:
-		shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_PPPOE_MASK);
-		mask = MV88E6XXX_PORT_POLICY_CTL_PPPOE_MASK;
+		*shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_PPPOE_MASK);
+		*mask = MV88E6XXX_PORT_POLICY_CTL_PPPOE_MASK;
 		break;
 	case MV88E6XXX_POLICY_MAPPING_VBAS:
-		shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_VBAS_MASK);
-		mask = MV88E6XXX_PORT_POLICY_CTL_VBAS_MASK;
+		*shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_VBAS_MASK);
+		*mask = MV88E6XXX_PORT_POLICY_CTL_VBAS_MASK;
 		break;
 	case MV88E6XXX_POLICY_MAPPING_OPT82:
-		shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_OPT82_MASK);
-		mask = MV88E6XXX_PORT_POLICY_CTL_OPT82_MASK;
+		*shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_OPT82_MASK);
+		*mask = MV88E6XXX_PORT_POLICY_CTL_OPT82_MASK;
 		break;
 	case MV88E6XXX_POLICY_MAPPING_UDP:
-		shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_UDP_MASK);
-		mask = MV88E6XXX_PORT_POLICY_CTL_UDP_MASK;
+		*shift = __bf_shf(MV88E6XXX_PORT_POLICY_CTL_UDP_MASK);
+		*mask = MV88E6XXX_PORT_POLICY_CTL_UDP_MASK;
 		break;
 	default:
 		return -EOPNOTSUPP;
@@ -1573,21 +1591,37 @@ int mv88e6352_port_set_policy(struct mv88e6xxx_chip *chip, int port,
 
 	switch (action) {
 	case MV88E6XXX_POLICY_ACTION_NORMAL:
-		val = MV88E6XXX_PORT_POLICY_CTL_NORMAL;
+		*val = MV88E6XXX_PORT_POLICY_CTL_NORMAL;
 		break;
 	case MV88E6XXX_POLICY_ACTION_MIRROR:
-		val = MV88E6XXX_PORT_POLICY_CTL_MIRROR;
+		*val = MV88E6XXX_PORT_POLICY_CTL_MIRROR;
 		break;
 	case MV88E6XXX_POLICY_ACTION_TRAP:
-		val = MV88E6XXX_PORT_POLICY_CTL_TRAP;
+		*val = MV88E6XXX_PORT_POLICY_CTL_TRAP;
 		break;
 	case MV88E6XXX_POLICY_ACTION_DISCARD:
-		val = MV88E6XXX_PORT_POLICY_CTL_DISCARD;
+		*val = MV88E6XXX_PORT_POLICY_CTL_DISCARD;
 		break;
 	default:
 		return -EOPNOTSUPP;
 	}
 
+	return 0;
+}
+
+int mv88e6352_port_set_policy(struct mv88e6xxx_chip *chip, int port,
+			      enum mv88e6xxx_policy_mapping mapping,
+			      enum mv88e6xxx_policy_action action)
+{
+	u16 reg, mask, val;
+	int shift;
+	int err;
+
+	err = mv88e6xxx_port_policy_mapping_get_pos(mapping, action, &mask,
+						    &val, &shift);
+	if (err)
+		return err;
+
 	err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_POLICY_CTL, &reg);
 	if (err)
 		return err;
@@ -1597,3 +1631,37 @@ int mv88e6352_port_set_policy(struct mv88e6xxx_chip *chip, int port,
 
 	return mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_POLICY_CTL, reg);
 }
+
+int mv88e6393x_port_set_policy(struct mv88e6xxx_chip *chip, int port,
+			       enum mv88e6xxx_policy_mapping mapping,
+			       enum mv88e6xxx_policy_action action)
+{
+	u16 mask, val;
+	int shift;
+	int err;
+	u16 ptr;
+	u8 reg;
+
+	err = mv88e6xxx_port_policy_mapping_get_pos(mapping, action, &mask,
+						    &val, &shift);
+	if (err)
+		return err;
+
+	/* The 16-bit Port Policy CTL register from older chips is on 6393x
+	 * changed to Port Policy MGMT CTL, which can access more data, but
+	 * indirectly. The original 16-bit value is divided into two 8-bit
+	 * registers.
+	 */
+	ptr = shift / 8;
+	shift %= 8;
+	mask >>= ptr * 8;
+
+	err = mv88e6393x_port_policy_read(chip, port, ptr, &reg);
+	if (err)
+		return err;
+
+	reg &= ~mask;
+	reg |= (val << shift) & mask;
+
+	return mv88e6393x_port_policy_write(chip, port, ptr, reg);
+}
diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h
index 948ba577a159..921d54969dad 100644
--- a/drivers/net/dsa/mv88e6xxx/port.h
+++ b/drivers/net/dsa/mv88e6xxx/port.h
@@ -386,6 +386,9 @@ int mv88e6352_port_set_mcast_flood(struct mv88e6xxx_chip *chip, int port,
 int mv88e6352_port_set_policy(struct mv88e6xxx_chip *chip, int port,
 			      enum mv88e6xxx_policy_mapping mapping,
 			      enum mv88e6xxx_policy_action action);
+int mv88e6393x_port_set_policy(struct mv88e6xxx_chip *chip, int port,
+			       enum mv88e6xxx_policy_mapping mapping,
+			       enum mv88e6xxx_policy_action action);
 int mv88e6351_port_set_ether_type(struct mv88e6xxx_chip *chip, int port,
 				  u16 etype);
 int mv88e6393x_set_egress_port(struct mv88e6xxx_chip *chip,
-- 
cgit v1.2.3


From a04be4b6b539cfce52181f6f8f15a823d99f520c Mon Sep 17 00:00:00 2001
From: Michael Tretter <m.tretter@pengutronix.de>
Date: Wed, 17 Mar 2021 17:16:09 +0100
Subject: net: macb: simplify clk_init with dev_err_probe

On some platforms, e.g., the ZynqMP, devm_clk_get can return
-EPROBE_DEFER if the clock controller, which is implemented in firmware,
has not been probed yet.

As clk_init is only called during probe, use dev_err_probe to simplify
the error message and hide it for -EPROBE_DEFER.

Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cadence/macb_main.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index e7c123aadf56..f56f3dbbc015 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -3758,17 +3758,15 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk,
 		*hclk = devm_clk_get(&pdev->dev, "hclk");
 	}
 
-	if (IS_ERR_OR_NULL(*pclk)) {
-		err = IS_ERR(*pclk) ? PTR_ERR(*pclk) : -ENODEV;
-		dev_err(&pdev->dev, "failed to get macb_clk (%d)\n", err);
-		return err;
-	}
-
-	if (IS_ERR_OR_NULL(*hclk)) {
-		err = IS_ERR(*hclk) ? PTR_ERR(*hclk) : -ENODEV;
-		dev_err(&pdev->dev, "failed to get hclk (%d)\n", err);
-		return err;
-	}
+	if (IS_ERR_OR_NULL(*pclk))
+		return dev_err_probe(&pdev->dev,
+				     IS_ERR(*pclk) ? PTR_ERR(*pclk) : -ENODEV,
+				     "failed to get pclk\n");
+
+	if (IS_ERR_OR_NULL(*hclk))
+		return dev_err_probe(&pdev->dev,
+				     IS_ERR(*hclk) ? PTR_ERR(*hclk) : -ENODEV,
+				     "failed to get hclk\n");
 
 	*tx_clk = devm_clk_get_optional(&pdev->dev, "tx_clk");
 	if (IS_ERR(*tx_clk))
-- 
cgit v1.2.3


From 2ed37183abb70233146dc82f19cb1cbceed2b505 Mon Sep 17 00:00:00 2001
From: Oz Shlomo <ozsh@nvidia.com>
Date: Wed, 3 Mar 2021 14:59:53 +0200
Subject: netfilter: flowtable: separate replace, destroy and stats to
 different workqueues

Currently the flow table offload replace, destroy and stats work items are
executed on a single workqueue. As such, DESTROY and STATS commands may
be backloged after a burst of REPLACE work items. This scenario can bloat
up memory and may cause active connections to age.

Instatiate add, del and stats workqueues to avoid backlogs of non-dependent
actions. Provide sysfs control over the workqueue attributes, allowing
userspace applications to control the workqueue cpumask.

Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
Reviewed-by: Paul Blakey <paulb@nvidia.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_flow_table_offload.c | 44 ++++++++++++++++++++++++++++-------
 1 file changed, 36 insertions(+), 8 deletions(-)

diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 2a6993fa40d7..1b979c8b3ba0 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -13,7 +13,9 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
 
-static struct workqueue_struct *nf_flow_offload_wq;
+static struct workqueue_struct *nf_flow_offload_add_wq;
+static struct workqueue_struct *nf_flow_offload_del_wq;
+static struct workqueue_struct *nf_flow_offload_stats_wq;
 
 struct flow_offload_work {
 	struct list_head	list;
@@ -826,7 +828,12 @@ static void flow_offload_work_handler(struct work_struct *work)
 
 static void flow_offload_queue_work(struct flow_offload_work *offload)
 {
-	queue_work(nf_flow_offload_wq, &offload->work);
+	if (offload->cmd == FLOW_CLS_REPLACE)
+		queue_work(nf_flow_offload_add_wq, &offload->work);
+	else if (offload->cmd == FLOW_CLS_DESTROY)
+		queue_work(nf_flow_offload_del_wq, &offload->work);
+	else
+		queue_work(nf_flow_offload_stats_wq, &offload->work);
 }
 
 static struct flow_offload_work *
@@ -898,8 +905,11 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable,
 
 void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
 {
-	if (nf_flowtable_hw_offload(flowtable))
-		flush_workqueue(nf_flow_offload_wq);
+	if (nf_flowtable_hw_offload(flowtable)) {
+		flush_workqueue(nf_flow_offload_add_wq);
+		flush_workqueue(nf_flow_offload_del_wq);
+		flush_workqueue(nf_flow_offload_stats_wq);
+	}
 }
 
 static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
@@ -1011,15 +1021,33 @@ EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
 
 int nf_flow_table_offload_init(void)
 {
-	nf_flow_offload_wq  = alloc_workqueue("nf_flow_table_offload",
-					      WQ_UNBOUND, 0);
-	if (!nf_flow_offload_wq)
+	nf_flow_offload_add_wq  = alloc_workqueue("nf_ft_offload_add",
+						  WQ_UNBOUND | WQ_SYSFS, 0);
+	if (!nf_flow_offload_add_wq)
 		return -ENOMEM;
 
+	nf_flow_offload_del_wq  = alloc_workqueue("nf_ft_offload_del",
+						  WQ_UNBOUND | WQ_SYSFS, 0);
+	if (!nf_flow_offload_del_wq)
+		goto err_del_wq;
+
+	nf_flow_offload_stats_wq  = alloc_workqueue("nf_ft_offload_stats",
+						    WQ_UNBOUND | WQ_SYSFS, 0);
+	if (!nf_flow_offload_stats_wq)
+		goto err_stats_wq;
+
 	return 0;
+
+err_stats_wq:
+	destroy_workqueue(nf_flow_offload_del_wq);
+err_del_wq:
+	destroy_workqueue(nf_flow_offload_add_wq);
+	return -ENOMEM;
 }
 
 void nf_flow_table_offload_exit(void)
 {
-	destroy_workqueue(nf_flow_offload_wq);
+	destroy_workqueue(nf_flow_offload_add_wq);
+	destroy_workqueue(nf_flow_offload_del_wq);
+	destroy_workqueue(nf_flow_offload_stats_wq);
 }
-- 
cgit v1.2.3


From c2168e6bd7ec50cedb69b3be1ba6146e28893c69 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 5 Mar 2021 02:42:09 -0600
Subject: netfilter: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix multiple
warnings by explicitly adding multiple break statements instead of just
letting the code fall through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_proto_dccp.c | 1 +
 net/netfilter/nf_tables_api.c           | 1 +
 net/netfilter/nft_ct.c                  | 1 +
 3 files changed, 3 insertions(+)

diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index db7479db8512..4f33307fa3cf 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -397,6 +397,7 @@ dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
 			msg = "not picking up existing connection ";
 			goto out_invalid;
 		}
+		break;
 	case CT_DCCP_REQUEST:
 		break;
 	case CT_DCCP_INVALID:
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 224c8e537cb3..083c112bee0b 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -8557,6 +8557,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
 							data->verdict.chain);
 				if (err < 0)
 					return err;
+				break;
 			default:
 				break;
 			}
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 882fe8648653..0592a9456084 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -527,6 +527,7 @@ static void __nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv)
 	case NFT_CT_ZONE:
 		if (--nft_ct_pcpu_template_refcnt == 0)
 			nft_ct_tmpl_put_pcpu();
+		break;
 #endif
 	default:
 		break;
-- 
cgit v1.2.3


From d4a96be65423296e42091b0b79973b8d446e7798 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 11 Mar 2021 13:55:59 +0800
Subject: netfilter: conntrack: Remove unused variable declaration

commit e97c3e278e95 ("tproxy: split off ipv6 defragmentation to a separate
module") left behind this.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/ipv6/nf_conntrack_ipv6.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
index 7b3c873f8839..e95483192d1b 100644
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
@@ -4,7 +4,4 @@
 
 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
 
-#include <linux/sysctl.h>
-extern struct ctl_table nf_ct_ipv6_sysctl_table[];
-
 #endif /* _NF_CONNTRACK_IPV6_H*/
-- 
cgit v1.2.3


From 2fc11745c3ffa324643c1e6d8cf8f5273d9f9571 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 17 Mar 2021 15:53:09 +0100
Subject: netfilter: flowtable: consolidate skb_try_make_writable() call

Fetch the layer 4 header size to be mangled by NAT when building the
tuple, then use it to make writable the network and the transport
headers. After this update, the NAT routines now assumes that the skbuff
area is writable. Do the pointer refetch only after the single
skb_try_make_writable() call.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_flow_table_core.c | 12 -------
 net/netfilter/nf_flow_table_ip.c   | 74 ++++++++++++++++----------------------
 2 files changed, 31 insertions(+), 55 deletions(-)

diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 5fa657b8e03d..ff5d94b644ac 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -395,9 +395,6 @@ static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
 {
 	struct tcphdr *tcph;
 
-	if (skb_try_make_writable(skb, thoff + sizeof(*tcph)))
-		return -1;
-
 	tcph = (void *)(skb_network_header(skb) + thoff);
 	inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
 
@@ -409,9 +406,6 @@ static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
 {
 	struct udphdr *udph;
 
-	if (skb_try_make_writable(skb, thoff + sizeof(*udph)))
-		return -1;
-
 	udph = (void *)(skb_network_header(skb) + thoff);
 	if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
 		inet_proto_csum_replace2(&udph->check, skb, port,
@@ -447,9 +441,6 @@ int nf_flow_snat_port(const struct flow_offload *flow,
 	struct flow_ports *hdr;
 	__be16 port, new_port;
 
-	if (skb_try_make_writable(skb, thoff + sizeof(*hdr)))
-		return -1;
-
 	hdr = (void *)(skb_network_header(skb) + thoff);
 
 	switch (dir) {
@@ -478,9 +469,6 @@ int nf_flow_dnat_port(const struct flow_offload *flow,
 	struct flow_ports *hdr;
 	__be16 port, new_port;
 
-	if (skb_try_make_writable(skb, thoff + sizeof(*hdr)))
-		return -1;
-
 	hdr = (void *)(skb_network_header(skb) + thoff);
 
 	switch (dir) {
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index a698dbe28ef5..2b8ee5dcef64 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -39,9 +39,6 @@ static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
 {
 	struct tcphdr *tcph;
 
-	if (skb_try_make_writable(skb, thoff + sizeof(*tcph)))
-		return -1;
-
 	tcph = (void *)(skb_network_header(skb) + thoff);
 	inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
 
@@ -53,9 +50,6 @@ static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
 {
 	struct udphdr *udph;
 
-	if (skb_try_make_writable(skb, thoff + sizeof(*udph)))
-		return -1;
-
 	udph = (void *)(skb_network_header(skb) + thoff);
 	if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
 		inet_proto_csum_replace4(&udph->check, skb, addr,
@@ -136,19 +130,17 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
 }
 
 static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
-			  unsigned int thoff, enum flow_offload_tuple_dir dir)
+			  unsigned int thoff, enum flow_offload_tuple_dir dir,
+			  struct iphdr *iph)
 {
-	struct iphdr *iph = ip_hdr(skb);
-
 	if (test_bit(NF_FLOW_SNAT, &flow->flags) &&
 	    (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
-	     nf_flow_snat_ip(flow, skb, ip_hdr(skb), thoff, dir) < 0))
+	     nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
 		return -1;
 
-	iph = ip_hdr(skb);
 	if (test_bit(NF_FLOW_DNAT, &flow->flags) &&
 	    (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
-	     nf_flow_dnat_ip(flow, skb, ip_hdr(skb), thoff, dir) < 0))
+	     nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
 		return -1;
 
 	return 0;
@@ -160,10 +152,10 @@ static bool ip_has_options(unsigned int thoff)
 }
 
 static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
-			    struct flow_offload_tuple *tuple)
+			    struct flow_offload_tuple *tuple, u32 *hdrsize)
 {
-	unsigned int thoff, hdrsize;
 	struct flow_ports *ports;
+	unsigned int thoff;
 	struct iphdr *iph;
 
 	if (!pskb_may_pull(skb, sizeof(*iph)))
@@ -178,10 +170,10 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
 
 	switch (iph->protocol) {
 	case IPPROTO_TCP:
-		hdrsize = sizeof(struct tcphdr);
+		*hdrsize = sizeof(struct tcphdr);
 		break;
 	case IPPROTO_UDP:
-		hdrsize = sizeof(struct udphdr);
+		*hdrsize = sizeof(struct udphdr);
 		break;
 	default:
 		return -1;
@@ -191,7 +183,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
 		return -1;
 
 	thoff = iph->ihl * 4;
-	if (!pskb_may_pull(skb, thoff + hdrsize))
+	if (!pskb_may_pull(skb, thoff + *hdrsize))
 		return -1;
 
 	iph = ip_hdr(skb);
@@ -252,11 +244,12 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 	unsigned int thoff;
 	struct iphdr *iph;
 	__be32 nexthop;
+	u32 hdrsize;
 
 	if (skb->protocol != htons(ETH_P_IP))
 		return NF_ACCEPT;
 
-	if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
+	if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize) < 0)
 		return NF_ACCEPT;
 
 	tuplehash = flow_offload_lookup(flow_table, &tuple);
@@ -271,11 +264,13 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
 		return NF_ACCEPT;
 
-	if (skb_try_make_writable(skb, sizeof(*iph)))
+	iph = ip_hdr(skb);
+	thoff = iph->ihl * 4;
+	if (skb_try_make_writable(skb, thoff + hdrsize))
 		return NF_DROP;
 
-	thoff = ip_hdr(skb)->ihl * 4;
-	if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
+	iph = ip_hdr(skb);
+	if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
 		return NF_ACCEPT;
 
 	flow_offload_refresh(flow_table, flow);
@@ -285,10 +280,9 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 		return NF_ACCEPT;
 	}
 
-	if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
+	if (nf_flow_nat_ip(flow, skb, thoff, dir, iph) < 0)
 		return NF_DROP;
 
-	iph = ip_hdr(skb);
 	ip_decrease_ttl(iph);
 	skb->tstamp = 0;
 
@@ -317,9 +311,6 @@ static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
 {
 	struct tcphdr *tcph;
 
-	if (skb_try_make_writable(skb, thoff + sizeof(*tcph)))
-		return -1;
-
 	tcph = (void *)(skb_network_header(skb) + thoff);
 	inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
 				  new_addr->s6_addr32, true);
@@ -333,9 +324,6 @@ static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
 {
 	struct udphdr *udph;
 
-	if (skb_try_make_writable(skb, thoff + sizeof(*udph)))
-		return -1;
-
 	udph = (void *)(skb_network_header(skb) + thoff);
 	if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
 		inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
@@ -417,31 +405,30 @@ static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
 
 static int nf_flow_nat_ipv6(const struct flow_offload *flow,
 			    struct sk_buff *skb,
-			    enum flow_offload_tuple_dir dir)
+			    enum flow_offload_tuple_dir dir,
+			    struct ipv6hdr *ip6h)
 {
-	struct ipv6hdr *ip6h = ipv6_hdr(skb);
 	unsigned int thoff = sizeof(*ip6h);
 
 	if (test_bit(NF_FLOW_SNAT, &flow->flags) &&
 	    (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
-	     nf_flow_snat_ipv6(flow, skb, ipv6_hdr(skb), thoff, dir) < 0))
+	     nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
 		return -1;
 
-	ip6h = ipv6_hdr(skb);
 	if (test_bit(NF_FLOW_DNAT, &flow->flags) &&
 	    (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
-	     nf_flow_dnat_ipv6(flow, skb, ipv6_hdr(skb), thoff, dir) < 0))
+	     nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
 		return -1;
 
 	return 0;
 }
 
 static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
-			      struct flow_offload_tuple *tuple)
+			      struct flow_offload_tuple *tuple, u32 *hdrsize)
 {
-	unsigned int thoff, hdrsize;
 	struct flow_ports *ports;
 	struct ipv6hdr *ip6h;
+	unsigned int thoff;
 
 	if (!pskb_may_pull(skb, sizeof(*ip6h)))
 		return -1;
@@ -450,10 +437,10 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
 
 	switch (ip6h->nexthdr) {
 	case IPPROTO_TCP:
-		hdrsize = sizeof(struct tcphdr);
+		*hdrsize = sizeof(struct tcphdr);
 		break;
 	case IPPROTO_UDP:
-		hdrsize = sizeof(struct udphdr);
+		*hdrsize = sizeof(struct udphdr);
 		break;
 	default:
 		return -1;
@@ -463,7 +450,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
 		return -1;
 
 	thoff = sizeof(*ip6h);
-	if (!pskb_may_pull(skb, thoff + hdrsize))
+	if (!pskb_may_pull(skb, thoff + *hdrsize))
 		return -1;
 
 	ip6h = ipv6_hdr(skb);
@@ -493,11 +480,12 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 	struct net_device *outdev;
 	struct ipv6hdr *ip6h;
 	struct rt6_info *rt;
+	u32 hdrsize;
 
 	if (skb->protocol != htons(ETH_P_IPV6))
 		return NF_ACCEPT;
 
-	if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
+	if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize) < 0)
 		return NF_ACCEPT;
 
 	tuplehash = flow_offload_lookup(flow_table, &tuple);
@@ -523,13 +511,13 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 		return NF_ACCEPT;
 	}
 
-	if (skb_try_make_writable(skb, sizeof(*ip6h)))
+	if (skb_try_make_writable(skb, sizeof(*ip6h) + hdrsize))
 		return NF_DROP;
 
-	if (nf_flow_nat_ipv6(flow, skb, dir) < 0)
+	ip6h = ipv6_hdr(skb);
+	if (nf_flow_nat_ipv6(flow, skb, dir, ip6h) < 0)
 		return NF_DROP;
 
-	ip6h = ipv6_hdr(skb);
 	ip6h->hop_limit--;
 	skb->tstamp = 0;
 
-- 
cgit v1.2.3


From 2babb46c8c825e5039bbf0c273d82df3210dd43b Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 17 Mar 2021 15:54:43 +0100
Subject: netfilter: flowtable: move skb_try_make_writable() before NAT in IPv4

For consistency with the IPv6 flowtable datapath and to make sure the
skbuff is writable right before the NAT header updates.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_flow_table_ip.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 2b8ee5dcef64..95adf74515ea 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -266,10 +266,6 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 
 	iph = ip_hdr(skb);
 	thoff = iph->ihl * 4;
-	if (skb_try_make_writable(skb, thoff + hdrsize))
-		return NF_DROP;
-
-	iph = ip_hdr(skb);
 	if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
 		return NF_ACCEPT;
 
@@ -280,6 +276,10 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 		return NF_ACCEPT;
 	}
 
+	if (skb_try_make_writable(skb, thoff + hdrsize))
+		return NF_DROP;
+
+	iph = ip_hdr(skb);
 	if (nf_flow_nat_ip(flow, skb, thoff, dir, iph) < 0)
 		return NF_DROP;
 
-- 
cgit v1.2.3


From 4f08f173d08cad4664e447e580dc0c5aa6332db3 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 17 Mar 2021 15:55:11 +0100
Subject: netfilter: flowtable: move FLOW_OFFLOAD_DIR_MAX away from enumeration

This allows to remove the default case which should not ever happen and
that was added to avoid gcc warnings on unhandled FLOW_OFFLOAD_DIR_MAX
enumeration case.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_flow_table.h | 2 +-
 net/netfilter/nf_flow_table_core.c    | 4 ----
 net/netfilter/nf_flow_table_ip.c      | 8 --------
 3 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 54c4d5c908a5..ce507251b3d8 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -86,8 +86,8 @@ static inline bool nf_flowtable_hw_offload(struct nf_flowtable *flowtable)
 enum flow_offload_tuple_dir {
 	FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
 	FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
-	FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
 };
+#define FLOW_OFFLOAD_DIR_MAX	IP_CT_DIR_MAX
 
 struct flow_offload_tuple {
 	union {
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index ff5d94b644ac..3bdbd962a084 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -454,8 +454,6 @@ int nf_flow_snat_port(const struct flow_offload *flow,
 		new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
 		hdr->dest = new_port;
 		break;
-	default:
-		return -1;
 	}
 
 	return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
@@ -482,8 +480,6 @@ int nf_flow_dnat_port(const struct flow_offload *flow,
 		new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
 		hdr->source = new_port;
 		break;
-	default:
-		return -1;
 	}
 
 	return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 95adf74515ea..0579e15c4968 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -96,8 +96,6 @@ static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
 		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
 		iph->daddr = new_addr;
 		break;
-	default:
-		return -1;
 	}
 	csum_replace4(&iph->check, addr, new_addr);
 
@@ -121,8 +119,6 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
 		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
 		iph->saddr = new_addr;
 		break;
-	default:
-		return -1;
 	}
 	csum_replace4(&iph->check, addr, new_addr);
 
@@ -371,8 +367,6 @@ static int nf_flow_snat_ipv6(const struct flow_offload *flow,
 		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
 		ip6h->daddr = new_addr;
 		break;
-	default:
-		return -1;
 	}
 
 	return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
@@ -396,8 +390,6 @@ static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
 		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
 		ip6h->saddr = new_addr;
 		break;
-	default:
-		return -1;
 	}
 
 	return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
-- 
cgit v1.2.3


From f4401262b927b84d2f1861e347627fa0d77d4eb7 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 17 Mar 2021 15:55:25 +0100
Subject: netfilter: flowtable: fast NAT functions never fail

Simplify existing fast NAT routines by returning void. After the
skb_try_make_writable() call consolidation, these routines cannot ever
fail.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_flow_table.h |  12 +--
 net/netfilter/nf_flow_table_core.c    |  41 ++++------
 net/netfilter/nf_flow_table_ip.c      | 147 ++++++++++++++--------------------
 3 files changed, 84 insertions(+), 116 deletions(-)

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index ce507251b3d8..fb165697c8a1 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -229,12 +229,12 @@ void nf_flow_table_free(struct nf_flowtable *flow_table);
 
 void flow_offload_teardown(struct flow_offload *flow);
 
-int nf_flow_snat_port(const struct flow_offload *flow,
-		      struct sk_buff *skb, unsigned int thoff,
-		      u8 protocol, enum flow_offload_tuple_dir dir);
-int nf_flow_dnat_port(const struct flow_offload *flow,
-		      struct sk_buff *skb, unsigned int thoff,
-		      u8 protocol, enum flow_offload_tuple_dir dir);
+void nf_flow_snat_port(const struct flow_offload *flow,
+		       struct sk_buff *skb, unsigned int thoff,
+		       u8 protocol, enum flow_offload_tuple_dir dir);
+void nf_flow_dnat_port(const struct flow_offload *flow,
+		       struct sk_buff *skb, unsigned int thoff,
+		       u8 protocol, enum flow_offload_tuple_dir dir);
 
 struct flow_ports {
 	__be16 source, dest;
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 3bdbd962a084..8ffd3f3c288c 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -389,20 +389,17 @@ static void nf_flow_offload_work_gc(struct work_struct *work)
 	queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
 }
 
-
-static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
-				__be16 port, __be16 new_port)
+static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
+				 __be16 port, __be16 new_port)
 {
 	struct tcphdr *tcph;
 
 	tcph = (void *)(skb_network_header(skb) + thoff);
 	inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
-
-	return 0;
 }
 
-static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
-				__be16 port, __be16 new_port)
+static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
+				 __be16 port, __be16 new_port)
 {
 	struct udphdr *udph;
 
@@ -413,30 +410,24 @@ static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
 		if (!udph->check)
 			udph->check = CSUM_MANGLED_0;
 	}
-
-	return 0;
 }
 
-static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
-			    u8 protocol, __be16 port, __be16 new_port)
+static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
+			     u8 protocol, __be16 port, __be16 new_port)
 {
 	switch (protocol) {
 	case IPPROTO_TCP:
-		if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
-			return NF_DROP;
+		nf_flow_nat_port_tcp(skb, thoff, port, new_port);
 		break;
 	case IPPROTO_UDP:
-		if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
-			return NF_DROP;
+		nf_flow_nat_port_udp(skb, thoff, port, new_port);
 		break;
 	}
-
-	return 0;
 }
 
-int nf_flow_snat_port(const struct flow_offload *flow,
-		      struct sk_buff *skb, unsigned int thoff,
-		      u8 protocol, enum flow_offload_tuple_dir dir)
+void nf_flow_snat_port(const struct flow_offload *flow,
+		       struct sk_buff *skb, unsigned int thoff,
+		       u8 protocol, enum flow_offload_tuple_dir dir)
 {
 	struct flow_ports *hdr;
 	__be16 port, new_port;
@@ -456,13 +447,13 @@ int nf_flow_snat_port(const struct flow_offload *flow,
 		break;
 	}
 
-	return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
+	nf_flow_nat_port(skb, thoff, protocol, port, new_port);
 }
 EXPORT_SYMBOL_GPL(nf_flow_snat_port);
 
-int nf_flow_dnat_port(const struct flow_offload *flow,
-		      struct sk_buff *skb, unsigned int thoff,
-		      u8 protocol, enum flow_offload_tuple_dir dir)
+void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb,
+		       unsigned int thoff, u8 protocol,
+		       enum flow_offload_tuple_dir dir)
 {
 	struct flow_ports *hdr;
 	__be16 port, new_port;
@@ -482,7 +473,7 @@ int nf_flow_dnat_port(const struct flow_offload *flow,
 		break;
 	}
 
-	return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
+	nf_flow_nat_port(skb, thoff, protocol, port, new_port);
 }
 EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
 
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 0579e15c4968..714dc083f093 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -34,19 +34,17 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
 	return 0;
 }
 
-static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
-			      __be32 addr, __be32 new_addr)
+static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
+			       __be32 addr, __be32 new_addr)
 {
 	struct tcphdr *tcph;
 
 	tcph = (void *)(skb_network_header(skb) + thoff);
 	inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
-
-	return 0;
 }
 
-static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
-			      __be32 addr, __be32 new_addr)
+static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
+			       __be32 addr, __be32 new_addr)
 {
 	struct udphdr *udph;
 
@@ -57,31 +55,25 @@ static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
 		if (!udph->check)
 			udph->check = CSUM_MANGLED_0;
 	}
-
-	return 0;
 }
 
-static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
-				  unsigned int thoff, __be32 addr,
-				  __be32 new_addr)
+static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
+				   unsigned int thoff, __be32 addr,
+				   __be32 new_addr)
 {
 	switch (iph->protocol) {
 	case IPPROTO_TCP:
-		if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
-			return NF_DROP;
+		nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr);
 		break;
 	case IPPROTO_UDP:
-		if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
-			return NF_DROP;
+		nf_flow_nat_ip_udp(skb, thoff, addr, new_addr);
 		break;
 	}
-
-	return 0;
 }
 
-static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
-			   struct iphdr *iph, unsigned int thoff,
-			   enum flow_offload_tuple_dir dir)
+static void nf_flow_snat_ip(const struct flow_offload *flow,
+			    struct sk_buff *skb, struct iphdr *iph,
+			    unsigned int thoff, enum flow_offload_tuple_dir dir)
 {
 	__be32 addr, new_addr;
 
@@ -99,12 +91,12 @@ static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
 	}
 	csum_replace4(&iph->check, addr, new_addr);
 
-	return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+	nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
 }
 
-static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
-			   struct iphdr *iph, unsigned int thoff,
-			   enum flow_offload_tuple_dir dir)
+static void nf_flow_dnat_ip(const struct flow_offload *flow,
+			    struct sk_buff *skb, struct iphdr *iph,
+			    unsigned int thoff, enum flow_offload_tuple_dir dir)
 {
 	__be32 addr, new_addr;
 
@@ -122,24 +114,21 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
 	}
 	csum_replace4(&iph->check, addr, new_addr);
 
-	return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+	nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
 }
 
-static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
 			  unsigned int thoff, enum flow_offload_tuple_dir dir,
 			  struct iphdr *iph)
 {
-	if (test_bit(NF_FLOW_SNAT, &flow->flags) &&
-	    (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
-	     nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
-		return -1;
-
-	if (test_bit(NF_FLOW_DNAT, &flow->flags) &&
-	    (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
-	     nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
-		return -1;
-
-	return 0;
+	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
+		nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir);
+		nf_flow_snat_ip(flow, skb, iph, thoff, dir);
+	}
+	if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
+		nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir);
+		nf_flow_dnat_ip(flow, skb, iph, thoff, dir);
+	}
 }
 
 static bool ip_has_options(unsigned int thoff)
@@ -276,8 +265,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 		return NF_DROP;
 
 	iph = ip_hdr(skb);
-	if (nf_flow_nat_ip(flow, skb, thoff, dir, iph) < 0)
-		return NF_DROP;
+	nf_flow_nat_ip(flow, skb, thoff, dir, iph);
 
 	ip_decrease_ttl(iph);
 	skb->tstamp = 0;
@@ -301,22 +289,21 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 }
 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
 
-static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
-				struct in6_addr *addr,
-				struct in6_addr *new_addr)
+static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
+				 struct in6_addr *addr,
+				 struct in6_addr *new_addr,
+				 struct ipv6hdr *ip6h)
 {
 	struct tcphdr *tcph;
 
 	tcph = (void *)(skb_network_header(skb) + thoff);
 	inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
 				  new_addr->s6_addr32, true);
-
-	return 0;
 }
 
-static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
-				struct in6_addr *addr,
-				struct in6_addr *new_addr)
+static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
+				 struct in6_addr *addr,
+				 struct in6_addr *new_addr)
 {
 	struct udphdr *udph;
 
@@ -327,32 +314,26 @@ static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
 		if (!udph->check)
 			udph->check = CSUM_MANGLED_0;
 	}
-
-	return 0;
 }
 
-static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
-				    unsigned int thoff, struct in6_addr *addr,
-				    struct in6_addr *new_addr)
+static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
+				     unsigned int thoff, struct in6_addr *addr,
+				     struct in6_addr *new_addr)
 {
 	switch (ip6h->nexthdr) {
 	case IPPROTO_TCP:
-		if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
-			return NF_DROP;
+		nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h);
 		break;
 	case IPPROTO_UDP:
-		if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
-			return NF_DROP;
+		nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr);
 		break;
 	}
-
-	return 0;
 }
 
-static int nf_flow_snat_ipv6(const struct flow_offload *flow,
-			     struct sk_buff *skb, struct ipv6hdr *ip6h,
-			     unsigned int thoff,
-			     enum flow_offload_tuple_dir dir)
+static void nf_flow_snat_ipv6(const struct flow_offload *flow,
+			      struct sk_buff *skb, struct ipv6hdr *ip6h,
+			      unsigned int thoff,
+			      enum flow_offload_tuple_dir dir)
 {
 	struct in6_addr addr, new_addr;
 
@@ -369,13 +350,13 @@ static int nf_flow_snat_ipv6(const struct flow_offload *flow,
 		break;
 	}
 
-	return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+	nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
 }
 
-static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
-			     struct sk_buff *skb, struct ipv6hdr *ip6h,
-			     unsigned int thoff,
-			     enum flow_offload_tuple_dir dir)
+static void nf_flow_dnat_ipv6(const struct flow_offload *flow,
+			      struct sk_buff *skb, struct ipv6hdr *ip6h,
+			      unsigned int thoff,
+			      enum flow_offload_tuple_dir dir)
 {
 	struct in6_addr addr, new_addr;
 
@@ -392,27 +373,24 @@ static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
 		break;
 	}
 
-	return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+	nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
 }
 
-static int nf_flow_nat_ipv6(const struct flow_offload *flow,
-			    struct sk_buff *skb,
-			    enum flow_offload_tuple_dir dir,
-			    struct ipv6hdr *ip6h)
+static void nf_flow_nat_ipv6(const struct flow_offload *flow,
+			     struct sk_buff *skb,
+			     enum flow_offload_tuple_dir dir,
+			     struct ipv6hdr *ip6h)
 {
 	unsigned int thoff = sizeof(*ip6h);
 
-	if (test_bit(NF_FLOW_SNAT, &flow->flags) &&
-	    (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
-	     nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
-		return -1;
-
-	if (test_bit(NF_FLOW_DNAT, &flow->flags) &&
-	    (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
-	     nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
-		return -1;
-
-	return 0;
+	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
+		nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir);
+		nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir);
+	}
+	if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
+		nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir);
+		nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir);
+	}
 }
 
 static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
@@ -507,8 +485,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 		return NF_DROP;
 
 	ip6h = ipv6_hdr(skb);
-	if (nf_flow_nat_ipv6(flow, skb, dir, ip6h) < 0)
-		return NF_DROP;
+	nf_flow_nat_ipv6(flow, skb, dir, ip6h);
 
 	ip6h->hop_limit--;
 	skb->tstamp = 0;
-- 
cgit v1.2.3


From e5075c0badaaac245a6fa0b4625b5cd714d8ade3 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 17 Mar 2021 15:55:28 +0100
Subject: netfilter: flowtable: call dst_check() to fall back to classic
 forwarding

In case the route is stale, pass up the packet to the classic forwarding
path for re-evaluation and schedule this flow entry for removal.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_flow_table_ip.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 714dc083f093..3a8423899def 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -197,14 +197,6 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
 	return true;
 }
 
-static int nf_flow_offload_dst_check(struct dst_entry *dst)
-{
-	if (unlikely(dst_xfrm(dst)))
-		return dst_check(dst, 0) ? 0 : -1;
-
-	return 0;
-}
-
 static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
 				      const struct nf_hook_state *state,
 				      struct dst_entry *dst)
@@ -256,7 +248,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 
 	flow_offload_refresh(flow_table, flow);
 
-	if (nf_flow_offload_dst_check(&rt->dst)) {
+	if (!dst_check(&rt->dst, 0)) {
 		flow_offload_teardown(flow);
 		return NF_ACCEPT;
 	}
@@ -476,7 +468,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 
 	flow_offload_refresh(flow_table, flow);
 
-	if (nf_flow_offload_dst_check(&rt->dst)) {
+	if (!dst_check(&rt->dst, 0)) {
 		flow_offload_teardown(flow);
 		return NF_ACCEPT;
 	}
-- 
cgit v1.2.3


From 1b9cd7690a1ef68c8f3756cae1ab88bf86660f0b Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 17 Mar 2021 15:56:40 +0100
Subject: netfilter: flowtable: refresh timeout after dst and writable checks

Refresh the timeout (and retry hardware offload) once the skbuff dst
is confirmed to be current and after the skbuff is made writable.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_flow_table_ip.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 3a8423899def..3be58b6d60af 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -246,8 +246,6 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 	if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
 		return NF_ACCEPT;
 
-	flow_offload_refresh(flow_table, flow);
-
 	if (!dst_check(&rt->dst, 0)) {
 		flow_offload_teardown(flow);
 		return NF_ACCEPT;
@@ -256,6 +254,8 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 	if (skb_try_make_writable(skb, thoff + hdrsize))
 		return NF_DROP;
 
+	flow_offload_refresh(flow_table, flow);
+
 	iph = ip_hdr(skb);
 	nf_flow_nat_ip(flow, skb, thoff, dir, iph);
 
@@ -466,8 +466,6 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 				sizeof(*ip6h)))
 		return NF_ACCEPT;
 
-	flow_offload_refresh(flow_table, flow);
-
 	if (!dst_check(&rt->dst, 0)) {
 		flow_offload_teardown(flow);
 		return NF_ACCEPT;
@@ -476,6 +474,8 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 	if (skb_try_make_writable(skb, sizeof(*ip6h) + hdrsize))
 		return NF_DROP;
 
+	flow_offload_refresh(flow_table, flow);
+
 	ip6h = ipv6_hdr(skb);
 	nf_flow_nat_ipv6(flow, skb, dir, ip6h);
 
-- 
cgit v1.2.3


From 0ce7cf4127f14078ca598ba9700d813178a59409 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 18 Mar 2021 01:25:05 +0100
Subject: netfilter: nftables: update table flags from the commit phase

Do not update table flags from the preparation phase. Store the flags
update into the transaction, then update the flags from the commit
phase.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  9 ++++++---
 net/netfilter/nf_tables_api.c     | 31 ++++++++++++++++---------------
 2 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index fdec57d862b7..67bc36f7f4fb 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1498,13 +1498,16 @@ struct nft_trans_chain {
 
 struct nft_trans_table {
 	bool				update;
-	bool				enable;
+	u8				state;
+	u32				flags;
 };
 
 #define nft_trans_table_update(trans)	\
 	(((struct nft_trans_table *)trans->data)->update)
-#define nft_trans_table_enable(trans)	\
-	(((struct nft_trans_table *)trans->data)->enable)
+#define nft_trans_table_state(trans)	\
+	(((struct nft_trans_table *)trans->data)->state)
+#define nft_trans_table_flags(trans)	\
+	(((struct nft_trans_table *)trans->data)->flags)
 
 struct nft_trans_elem {
 	struct nft_set			*set;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 083c112bee0b..bd5e8122ea5e 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -900,6 +900,12 @@ static void nf_tables_table_disable(struct net *net, struct nft_table *table)
 	nft_table_disable(net, table, 0);
 }
 
+enum {
+	NFT_TABLE_STATE_UNCHANGED	= 0,
+	NFT_TABLE_STATE_DORMANT,
+	NFT_TABLE_STATE_WAKEUP
+};
+
 static int nf_tables_updtable(struct nft_ctx *ctx)
 {
 	struct nft_trans *trans;
@@ -929,19 +935,17 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
 
 	if ((flags & NFT_TABLE_F_DORMANT) &&
 	    !(ctx->table->flags & NFT_TABLE_F_DORMANT)) {
-		nft_trans_table_enable(trans) = false;
+		nft_trans_table_state(trans) = NFT_TABLE_STATE_DORMANT;
 	} else if (!(flags & NFT_TABLE_F_DORMANT) &&
 		   ctx->table->flags & NFT_TABLE_F_DORMANT) {
-		ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
 		ret = nf_tables_table_enable(ctx->net, ctx->table);
 		if (ret >= 0)
-			nft_trans_table_enable(trans) = true;
-		else
-			ctx->table->flags |= NFT_TABLE_F_DORMANT;
+			nft_trans_table_state(trans) = NFT_TABLE_STATE_WAKEUP;
 	}
 	if (ret < 0)
 		goto err;
 
+	nft_trans_table_flags(trans) = flags;
 	nft_trans_table_update(trans) = true;
 	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
 	return 0;
@@ -8068,11 +8072,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 		switch (trans->msg_type) {
 		case NFT_MSG_NEWTABLE:
 			if (nft_trans_table_update(trans)) {
-				if (!nft_trans_table_enable(trans)) {
-					nf_tables_table_disable(net,
-								trans->ctx.table);
-					trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
-				}
+				if (nft_trans_table_state(trans) == NFT_TABLE_STATE_DORMANT)
+					nf_tables_table_disable(net, trans->ctx.table);
+
+				trans->ctx.table->flags = nft_trans_table_flags(trans);
 			} else {
 				nft_clear(net, trans->ctx.table);
 			}
@@ -8283,11 +8286,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
 		switch (trans->msg_type) {
 		case NFT_MSG_NEWTABLE:
 			if (nft_trans_table_update(trans)) {
-				if (nft_trans_table_enable(trans)) {
-					nf_tables_table_disable(net,
-								trans->ctx.table);
-					trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
-				}
+				if (nft_trans_table_state(trans) == NFT_TABLE_STATE_WAKEUP)
+					nf_tables_table_disable(net, trans->ctx.table);
+
 				nft_trans_destroy(trans);
 			} else {
 				list_del_rcu(&trans->ctx.table->list);
-- 
cgit v1.2.3


From 97a19caf1b1f6a9d4f620a9d51405a1973bd4641 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 17 Mar 2021 10:41:32 -0700
Subject: bpf: net: Emit anonymous enum with BPF_TCP_CLOSE value explicitly

The selftest failed to compile with clang-built bpf-next.
Adding LLVM=1 to your vmlinux and selftest build will use clang.
The error message is:
  progs/test_sk_storage_tracing.c:38:18: error: use of undeclared identifier 'BPF_TCP_CLOSE'
          if (newstate == BPF_TCP_CLOSE)
                          ^
  1 error generated.
  make: *** [Makefile:423: /bpf-next/tools/testing/selftests/bpf/test_sk_storage_tracing.o] Error 1

The reason for the failure is that BPF_TCP_CLOSE, a value of
an anonymous enum defined in uapi bpf.h, is not defined in
vmlinux.h. gcc does not have this problem. Since vmlinux.h
is derived from BTF which is derived from vmlinux DWARF,
that means gcc-produced vmlinux DWARF has BPF_TCP_CLOSE
while llvm-produced vmlinux DWARF does not have.

BPF_TCP_CLOSE is referenced in net/ipv4/tcp.c as
  BUILD_BUG_ON((int)BPF_TCP_CLOSE != (int)TCP_CLOSE);
The following test mimics the above BUILD_BUG_ON, preprocessed
with clang compiler, and shows gcc DWARF contains BPF_TCP_CLOSE while
llvm DWARF does not.

  $ cat t.c
  enum {
    BPF_TCP_ESTABLISHED = 1,
    BPF_TCP_CLOSE = 7,
  };
  enum {
    TCP_ESTABLISHED = 1,
    TCP_CLOSE = 7,
  };

  int test() {
    do {
      extern void __compiletime_assert_767(void) ;
      if ((int)BPF_TCP_CLOSE != (int)TCP_CLOSE) __compiletime_assert_767();
    } while (0);
    return 0;
  }
  $ clang t.c -O2 -c -g && llvm-dwarfdump t.o | grep BPF_TCP_CLOSE
  $ gcc t.c -O2 -c -g && llvm-dwarfdump t.o | grep BPF_TCP_CLOSE
                    DW_AT_name    ("BPF_TCP_CLOSE")

Further checking clang code find clang actually tried to
evaluate condition at compile time. If it is definitely
true/false, it will perform optimization and the whole if condition
will be removed before generating IR/debuginfo.

This patch explicited add an expression after the
above mentioned BUILD_BUG_ON in net/ipv4/tcp.c like
  (void)BPF_TCP_ESTABLISHED
to enable generation of debuginfo for the anonymous
enum which also includes BPF_TCP_CLOSE.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210317174132.589276-1-yhs@fb.com
---
 include/linux/btf.h |  1 +
 net/ipv4/tcp.c      | 12 ++++++++++++
 2 files changed, 13 insertions(+)

diff --git a/include/linux/btf.h b/include/linux/btf.h
index 7fabf1428093..9c1b52738bbe 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -9,6 +9,7 @@
 #include <uapi/linux/bpf.h>
 
 #define BTF_TYPE_EMIT(type) ((void)(type *)0)
+#define BTF_TYPE_EMIT_ENUM(enum_val) ((void)enum_val)
 
 struct btf;
 struct btf_member;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index de7cc8445ac0..e14fd0c50c10 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -267,6 +267,7 @@
 #include <linux/slab.h>
 #include <linux/errqueue.h>
 #include <linux/static_key.h>
+#include <linux/btf.h>
 
 #include <net/icmp.h>
 #include <net/inet_common.h>
@@ -2587,6 +2588,17 @@ void tcp_set_state(struct sock *sk, int state)
 	BUILD_BUG_ON((int)BPF_TCP_NEW_SYN_RECV != (int)TCP_NEW_SYN_RECV);
 	BUILD_BUG_ON((int)BPF_TCP_MAX_STATES != (int)TCP_MAX_STATES);
 
+	/* bpf uapi header bpf.h defines an anonymous enum with values
+	 * BPF_TCP_* used by bpf programs. Currently gcc built vmlinux
+	 * is able to emit this enum in DWARF due to the above BUILD_BUG_ON.
+	 * But clang built vmlinux does not have this enum in DWARF
+	 * since clang removes the above code before generating IR/debuginfo.
+	 * Let us explicitly emit the type debuginfo to ensure the
+	 * above-mentioned anonymous enum in the vmlinux DWARF and hence BTF
+	 * regardless of which compiler is used.
+	 */
+	BTF_TYPE_EMIT_ENUM(BPF_TCP_ESTABLISHED);
+
 	if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_STATE_CB_FLAG))
 		tcp_call_bpf_2arg(sk, BPF_SOCK_OPS_STATE_CB, oldstate, state);
 
-- 
cgit v1.2.3


From 9ae2c26e43248b722e79fe867be38062c9dd1e5f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 17 Mar 2021 13:05:09 -0700
Subject: libbpf: provide NULL and KERNEL_VERSION macros in bpf_helpers.h

Given that vmlinux.h is not compatible with headers like stddef.h, NULL poses
an annoying problem: it is defined as #define, so is not captured in BTF, so
is not emitted into vmlinux.h. This leads to users either sticking to explicit
0, or defining their own NULL (as progs/skb_pkt_end.c does).

But it's easy for bpf_helpers.h to provide (conditionally) NULL definition.
Similarly, KERNEL_VERSION is another commonly missed macro that came up
multiple times. So this patch adds both of them, along with offsetof(), that
also is typically defined in stddef.h, just like NULL.

This might cause compilation warning for existing BPF applications defining
their own NULL and/or KERNEL_VERSION already:

  progs/skb_pkt_end.c:7:9: warning: 'NULL' macro redefined [-Wmacro-redefined]
  #define NULL 0
          ^
  /tmp/linux/tools/testing/selftests/bpf/tools/include/vmlinux.h:4:9: note: previous definition is here
  #define NULL ((void *)0)
	  ^

It is trivial to fix, though, so long-term benefits outweight temporary
inconveniences.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20210317200510.1354627-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/bpf_helpers.h | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 53ff81c49dbd..cc2e51c64a54 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -40,8 +40,22 @@
 #define __weak __attribute__((weak))
 #endif
 
+/* When utilizing vmlinux.h with BPF CO-RE, user BPF programs can't include
+ * any system-level headers (such as stddef.h, linux/version.h, etc), and
+ * commonly-used macros like NULL and KERNEL_VERSION aren't available through
+ * vmlinux.h. This just adds unnecessary hurdles and forces users to re-define
+ * them on their own. So as a convenience, provide such definitions here.
+ */
+#ifndef NULL
+#define NULL ((void *)0)
+#endif
+
+#ifndef KERNEL_VERSION
+#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + ((c) > 255 ? 255 : (c))
+#endif
+
 /*
- * Helper macro to manipulate data structures
+ * Helper macros to manipulate data structures
  */
 #ifndef offsetof
 #define offsetof(TYPE, MEMBER)	((unsigned long)&((TYPE *)0)->MEMBER)
-- 
cgit v1.2.3


From c53a3355eb2976fc4eb4d42862db0eea205045a1 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 17 Mar 2021 13:05:10 -0700
Subject: selftests/bpf: drop custom NULL #define in skb_pkt_end selftest

Now that bpftool generates NULL definition as part of vmlinux.h, drop custom
NULL definition in skb_pkt_end.c.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20210317200510.1354627-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/skb_pkt_end.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/progs/skb_pkt_end.c b/tools/testing/selftests/bpf/progs/skb_pkt_end.c
index cf6823f42e80..7f2eaa2f89f8 100644
--- a/tools/testing/selftests/bpf/progs/skb_pkt_end.c
+++ b/tools/testing/selftests/bpf/progs/skb_pkt_end.c
@@ -4,7 +4,6 @@
 #include <bpf/bpf_core_read.h>
 #include <bpf/bpf_helpers.h>
 
-#define NULL 0
 #define INLINE __always_inline
 
 #define skb_shorter(skb, len) ((void *)(long)(skb)->data + (len) > (void *)(long)skb->data_end)
-- 
cgit v1.2.3


From 71793730ebfdbd1b15c6648a67e8d42b83eb131d Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 18 Mar 2021 09:43:08 +0100
Subject: Bluetooth: btbcm: Add BCM4334 DT binding

BCM4334 was missing from the binding.

Cc: devicetree@vger.kernel.org
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml b/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml
index bdd6ca617e23..fbdc2083bec4 100644
--- a/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml
+++ b/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml
@@ -18,6 +18,7 @@ properties:
       - brcm,bcm20702a1
       - brcm,bcm4329-bt
       - brcm,bcm4330-bt
+      - brcm,bcm4334-bt
       - brcm,bcm43438-bt
       - brcm,bcm4345c5
       - brcm,bcm43540-bt
-- 
cgit v1.2.3


From 81534d4835de52758176909f3e446f1d49809241 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 18 Mar 2021 09:43:09 +0100
Subject: Bluetooth: btbcm: Add BCM4330 and BCM4334 compatibles

Add the missing brcm,bcm4330-bt and brcm,bcm4334-bt to the
match table so device trees can use this compatible as well
and not need to use the fallback brcm,bcm4329-bt.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_bcm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index 93a4c03a8787..3cd57fc56ade 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -1499,6 +1499,8 @@ static struct bcm_device_data bcm43438_device_data = {
 static const struct of_device_id bcm_bluetooth_of_match[] = {
 	{ .compatible = "brcm,bcm20702a1" },
 	{ .compatible = "brcm,bcm4329-bt" },
+	{ .compatible = "brcm,bcm4330-bt" },
+	{ .compatible = "brcm,bcm4334-bt" },
 	{ .compatible = "brcm,bcm4345c5" },
 	{ .compatible = "brcm,bcm4330-bt" },
 	{ .compatible = "brcm,bcm43438-bt", .data = &bcm43438_device_data },
-- 
cgit v1.2.3


From fdc13979f91e664717f47eb8c49094e4b7f202e3 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 8 Mar 2021 12:06:58 +0100
Subject: bpf, devmap: Move drop error path to devmap for XDP_REDIRECT

We want to change the current ndo_xdp_xmit drop semantics because it will
allow us to implement better queue overflow handling. This is working
towards the larger goal of a XDP TX queue-hook. Move XDP_REDIRECT error
path handling from each XDP ethernet driver to devmap code. According to
the new APIs, the driver running the ndo_xdp_xmit pointer, will break tx
loop whenever the hw reports a tx error and it will just return to devmap
caller the number of successfully transmitted frames. It will be devmap
responsibility to free dropped frames.

Move each XDP ndo_xdp_xmit capable driver to the new APIs:

- veth
- virtio-net
- mvneta
- mvpp2
- socionext
- amazon ena
- bnxt
- freescale (dpaa2, dpaa)
- xen-frontend
- qede
- ice
- igb
- ixgbe
- i40e
- mlx5
- ti (cpsw, cpsw-new)
- tun
- sfc

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Reviewed-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Reviewed-by: Camelia Groza <camelia.groza@nxp.com>
Acked-by: Edward Cree <ecree.xilinx@gmail.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Shay Agroskin <shayagr@amazon.com>
Link: https://lore.kernel.org/bpf/ed670de24f951cfd77590decf0229a0ad7fd12f6.1615201152.git.lorenzo@kernel.org
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c     | 21 +++++++----------
 drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c    | 20 +++++++---------
 drivers/net/ethernet/freescale/dpaa/dpaa_eth.c   | 12 ++++------
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c |  2 --
 drivers/net/ethernet/intel/i40e/i40e_txrx.c      | 15 ++++++------
 drivers/net/ethernet/intel/ice/ice_txrx.c        | 15 ++++++------
 drivers/net/ethernet/intel/igb/igb_main.c        | 11 ++++-----
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c    | 11 ++++-----
 drivers/net/ethernet/marvell/mvneta.c            | 13 +++++-----
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c  | 13 +++++-----
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 15 +++++-------
 drivers/net/ethernet/qlogic/qede/qede_fp.c       | 19 ++++++---------
 drivers/net/ethernet/sfc/tx.c                    | 15 +-----------
 drivers/net/ethernet/socionext/netsec.c          | 16 ++++++-------
 drivers/net/ethernet/ti/cpsw.c                   | 14 +++++------
 drivers/net/ethernet/ti/cpsw_new.c               | 14 +++++------
 drivers/net/ethernet/ti/cpsw_priv.c              | 11 ++++-----
 drivers/net/tun.c                                | 15 +++++++-----
 drivers/net/veth.c                               | 28 ++++++++++++----------
 drivers/net/virtio_net.c                         | 25 ++++++++++----------
 drivers/net/xen-netfront.c                       | 18 +++++++-------
 kernel/bpf/devmap.c                              | 30 ++++++++++--------------
 22 files changed, 153 insertions(+), 200 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 102f2c91fdb8..5c062c51b4cb 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -300,7 +300,7 @@ static int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
 
 	rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &push_hdr, &push_len);
 	if (unlikely(rc))
-		goto error_drop_packet;
+		return rc;
 
 	ena_tx_ctx.ena_bufs = tx_info->bufs;
 	ena_tx_ctx.push_header = push_hdr;
@@ -330,8 +330,6 @@ static int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
 error_unmap_dma:
 	ena_unmap_tx_buff(xdp_ring, tx_info);
 	tx_info->xdpf = NULL;
-error_drop_packet:
-	xdp_return_frame(xdpf);
 	return rc;
 }
 
@@ -339,8 +337,8 @@ static int ena_xdp_xmit(struct net_device *dev, int n,
 			struct xdp_frame **frames, u32 flags)
 {
 	struct ena_adapter *adapter = netdev_priv(dev);
-	int qid, i, err, drops = 0;
 	struct ena_ring *xdp_ring;
+	int qid, i, nxmit = 0;
 
 	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
 		return -EINVAL;
@@ -360,12 +358,9 @@ static int ena_xdp_xmit(struct net_device *dev, int n,
 	spin_lock(&xdp_ring->xdp_tx_lock);
 
 	for (i = 0; i < n; i++) {
-		err = ena_xdp_xmit_frame(xdp_ring, dev, frames[i], 0);
-		/* The descriptor is freed by ena_xdp_xmit_frame in case
-		 * of an error.
-		 */
-		if (err)
-			drops++;
+		if (ena_xdp_xmit_frame(xdp_ring, dev, frames[i], 0))
+			break;
+		nxmit++;
 	}
 
 	/* Ring doorbell to make device aware of the packets */
@@ -378,7 +373,7 @@ static int ena_xdp_xmit(struct net_device *dev, int n,
 	spin_unlock(&xdp_ring->xdp_tx_lock);
 
 	/* Return number of packets sent */
-	return n - drops;
+	return nxmit;
 }
 
 static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
@@ -415,7 +410,9 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
 		/* The XDP queues are shared between XDP_TX and XDP_REDIRECT */
 		spin_lock(&xdp_ring->xdp_tx_lock);
 
-		ena_xdp_xmit_frame(xdp_ring, rx_ring->netdev, xdpf, XDP_XMIT_FLUSH);
+		if (ena_xdp_xmit_frame(xdp_ring, rx_ring->netdev, xdpf,
+				       XDP_XMIT_FLUSH))
+			xdp_return_frame(xdpf);
 
 		spin_unlock(&xdp_ring->xdp_tx_lock);
 		xdp_stat = &rx_ring->rx_stats.xdp_tx;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 641303894341..ec9564e584e0 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -217,7 +217,7 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames,
 	struct pci_dev *pdev = bp->pdev;
 	struct bnxt_tx_ring_info *txr;
 	dma_addr_t mapping;
-	int drops = 0;
+	int nxmit = 0;
 	int ring;
 	int i;
 
@@ -233,21 +233,17 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames,
 		struct xdp_frame *xdp = frames[i];
 
 		if (!txr || !bnxt_tx_avail(bp, txr) ||
-		    !(bp->bnapi[ring]->flags & BNXT_NAPI_FLAG_XDP)) {
-			xdp_return_frame_rx_napi(xdp);
-			drops++;
-			continue;
-		}
+		    !(bp->bnapi[ring]->flags & BNXT_NAPI_FLAG_XDP))
+			break;
 
 		mapping = dma_map_single(&pdev->dev, xdp->data, xdp->len,
 					 DMA_TO_DEVICE);
 
-		if (dma_mapping_error(&pdev->dev, mapping)) {
-			xdp_return_frame_rx_napi(xdp);
-			drops++;
-			continue;
-		}
+		if (dma_mapping_error(&pdev->dev, mapping))
+			break;
+
 		__bnxt_xmit_xdp_redirect(bp, txr, mapping, xdp->len, xdp);
+		nxmit++;
 	}
 
 	if (flags & XDP_XMIT_FLUSH) {
@@ -256,7 +252,7 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames,
 		bnxt_db_write(bp, &txr->tx_db, txr->tx_prod);
 	}
 
-	return num_frames - drops;
+	return nxmit;
 }
 
 /* Under rtnl_lock */
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index 720dc99bd1fc..177c020bf34a 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -3081,7 +3081,7 @@ static int dpaa_xdp_xmit(struct net_device *net_dev, int n,
 			 struct xdp_frame **frames, u32 flags)
 {
 	struct xdp_frame *xdpf;
-	int i, err, drops = 0;
+	int i, nxmit = 0;
 
 	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
 		return -EINVAL;
@@ -3091,14 +3091,12 @@ static int dpaa_xdp_xmit(struct net_device *net_dev, int n,
 
 	for (i = 0; i < n; i++) {
 		xdpf = frames[i];
-		err = dpaa_xdp_xmit_frame(net_dev, xdpf);
-		if (err) {
-			xdp_return_frame_rx_napi(xdpf);
-			drops++;
-		}
+		if (dpaa_xdp_xmit_frame(net_dev, xdpf))
+			break;
+		nxmit++;
 	}
 
-	return n - drops;
+	return nxmit;
 }
 
 static int dpaa_ts_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 492943bb9c48..fc0eb82cdd6a 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -2431,8 +2431,6 @@ static int dpaa2_eth_xdp_xmit(struct net_device *net_dev, int n,
 	percpu_stats->tx_packets += enqueued;
 	for (i = 0; i < enqueued; i++)
 		percpu_stats->tx_bytes += dpaa2_fd_get_len(&fds[i]);
-	for (i = enqueued; i < n; i++)
-		xdp_return_frame_rx_napi(frames[i]);
 
 	return enqueued;
 }
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 627794b31e33..e2c5c6d83f25 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -3847,8 +3847,8 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
  * @frames: array of XDP buffer pointers
  * @flags: XDP extra info
  *
- * Returns number of frames successfully sent. Frames that fail are
- * free'ed via XDP return API.
+ * Returns number of frames successfully sent. Failed frames
+ * will be free'ed by XDP core.
  *
  * For error cases, a negative errno code is returned and no-frames
  * are transmitted (caller must handle freeing frames).
@@ -3861,7 +3861,7 @@ int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
 	struct i40e_ring *xdp_ring;
-	int drops = 0;
+	int nxmit = 0;
 	int i;
 
 	if (test_bit(__I40E_VSI_DOWN, vsi->state))
@@ -3881,14 +3881,13 @@ int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
 		int err;
 
 		err = i40e_xmit_xdp_ring(xdpf, xdp_ring);
-		if (err != I40E_XDP_TX) {
-			xdp_return_frame_rx_napi(xdpf);
-			drops++;
-		}
+		if (err != I40E_XDP_TX)
+			break;
+		nxmit++;
 	}
 
 	if (unlikely(flags & XDP_XMIT_FLUSH))
 		i40e_xdp_ring_update_tail(xdp_ring);
 
-	return n - drops;
+	return nxmit;
 }
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index b7dc25da1202..d4bfa7905652 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -571,8 +571,8 @@ ice_run_xdp(struct ice_ring *rx_ring, struct xdp_buff *xdp,
  * @frames: XDP frames to be transmitted
  * @flags: transmit flags
  *
- * Returns number of frames successfully sent. Frames that fail are
- * free'ed via XDP return API.
+ * Returns number of frames successfully sent. Failed frames
+ * will be free'ed by XDP core.
  * For error cases, a negative errno code is returned and no-frames
  * are transmitted (caller must handle freeing frames).
  */
@@ -584,7 +584,7 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
 	unsigned int queue_index = smp_processor_id();
 	struct ice_vsi *vsi = np->vsi;
 	struct ice_ring *xdp_ring;
-	int drops = 0, i;
+	int nxmit = 0, i;
 
 	if (test_bit(__ICE_DOWN, vsi->state))
 		return -ENETDOWN;
@@ -601,16 +601,15 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
 		int err;
 
 		err = ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
-		if (err != ICE_XDP_TX) {
-			xdp_return_frame_rx_napi(xdpf);
-			drops++;
-		}
+		if (err != ICE_XDP_TX)
+			break;
+		nxmit++;
 	}
 
 	if (unlikely(flags & XDP_XMIT_FLUSH))
 		ice_xdp_ring_update_tail(xdp_ring);
 
-	return n - drops;
+	return nxmit;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 878b31d534ec..cb0d07ff2492 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -2934,7 +2934,7 @@ static int igb_xdp_xmit(struct net_device *dev, int n,
 	int cpu = smp_processor_id();
 	struct igb_ring *tx_ring;
 	struct netdev_queue *nq;
-	int drops = 0;
+	int nxmit = 0;
 	int i;
 
 	if (unlikely(test_bit(__IGB_DOWN, &adapter->state)))
@@ -2961,10 +2961,9 @@ static int igb_xdp_xmit(struct net_device *dev, int n,
 		int err;
 
 		err = igb_xmit_xdp_ring(adapter, tx_ring, xdpf);
-		if (err != IGB_XDP_TX) {
-			xdp_return_frame_rx_napi(xdpf);
-			drops++;
-		}
+		if (err != IGB_XDP_TX)
+			break;
+		nxmit++;
 	}
 
 	__netif_tx_unlock(nq);
@@ -2972,7 +2971,7 @@ static int igb_xdp_xmit(struct net_device *dev, int n,
 	if (unlikely(flags & XDP_XMIT_FLUSH))
 		igb_xdp_ring_update_tail(tx_ring);
 
-	return n - drops;
+	return nxmit;
 }
 
 static const struct net_device_ops igb_netdev_ops = {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 9f3f12e2ccf2..730c28a1a204 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -10188,7 +10188,7 @@ static int ixgbe_xdp_xmit(struct net_device *dev, int n,
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 	struct ixgbe_ring *ring;
-	int drops = 0;
+	int nxmit = 0;
 	int i;
 
 	if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
@@ -10212,16 +10212,15 @@ static int ixgbe_xdp_xmit(struct net_device *dev, int n,
 		int err;
 
 		err = ixgbe_xmit_xdp_ring(adapter, xdpf);
-		if (err != IXGBE_XDP_TX) {
-			xdp_return_frame_rx_napi(xdpf);
-			drops++;
-		}
+		if (err != IXGBE_XDP_TX)
+			break;
+		nxmit++;
 	}
 
 	if (unlikely(flags & XDP_XMIT_FLUSH))
 		ixgbe_xdp_ring_update_tail(ring);
 
-	return n - drops;
+	return nxmit;
 }
 
 static const struct net_device_ops ixgbe_netdev_ops = {
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index a635cf84608a..20307eec8988 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -2137,7 +2137,7 @@ mvneta_xdp_xmit(struct net_device *dev, int num_frame,
 {
 	struct mvneta_port *pp = netdev_priv(dev);
 	struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
-	int i, nxmit_byte = 0, nxmit = num_frame;
+	int i, nxmit_byte = 0, nxmit = 0;
 	int cpu = smp_processor_id();
 	struct mvneta_tx_queue *txq;
 	struct netdev_queue *nq;
@@ -2155,12 +2155,11 @@ mvneta_xdp_xmit(struct net_device *dev, int num_frame,
 	__netif_tx_lock(nq, cpu);
 	for (i = 0; i < num_frame; i++) {
 		ret = mvneta_xdp_submit_frame(pp, txq, frames[i], true);
-		if (ret == MVNETA_XDP_TX) {
-			nxmit_byte += frames[i]->len;
-		} else {
-			xdp_return_frame_rx_napi(frames[i]);
-			nxmit--;
-		}
+		if (ret != MVNETA_XDP_TX)
+			break;
+
+		nxmit_byte += frames[i]->len;
+		nxmit++;
 	}
 
 	if (unlikely(flags & XDP_XMIT_FLUSH))
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 1767c60056c5..ec706d614cac 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -3744,7 +3744,7 @@ mvpp2_xdp_xmit(struct net_device *dev, int num_frame,
 	       struct xdp_frame **frames, u32 flags)
 {
 	struct mvpp2_port *port = netdev_priv(dev);
-	int i, nxmit_byte = 0, nxmit = num_frame;
+	int i, nxmit_byte = 0, nxmit = 0;
 	struct mvpp2_pcpu_stats *stats;
 	u16 txq_id;
 	u32 ret;
@@ -3762,12 +3762,11 @@ mvpp2_xdp_xmit(struct net_device *dev, int num_frame,
 
 	for (i = 0; i < num_frame; i++) {
 		ret = mvpp2_xdp_submit_frame(port, txq_id, frames[i], true);
-		if (ret == MVPP2_XDP_TX) {
-			nxmit_byte += frames[i]->len;
-		} else {
-			xdp_return_frame_rx_napi(frames[i]);
-			nxmit--;
-		}
+		if (ret != MVPP2_XDP_TX)
+			break;
+
+		nxmit_byte += frames[i]->len;
+		nxmit++;
 	}
 
 	if (likely(nxmit > 0))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 2e3e78b0f333..2f0df5cc1a2d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -500,7 +500,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5e_xdpsq *sq;
-	int drops = 0;
+	int nxmit = 0;
 	int sq_num;
 	int i;
 
@@ -529,11 +529,8 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
 		xdptxd.dma_addr = dma_map_single(sq->pdev, xdptxd.data,
 						 xdptxd.len, DMA_TO_DEVICE);
 
-		if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr))) {
-			xdp_return_frame_rx_napi(xdpf);
-			drops++;
-			continue;
-		}
+		if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr)))
+			break;
 
 		xdpi.mode           = MLX5E_XDP_XMIT_MODE_FRAME;
 		xdpi.frame.xdpf     = xdpf;
@@ -544,9 +541,9 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
 		if (unlikely(!ret)) {
 			dma_unmap_single(sq->pdev, xdptxd.dma_addr,
 					 xdptxd.len, DMA_TO_DEVICE);
-			xdp_return_frame_rx_napi(xdpf);
-			drops++;
+			break;
 		}
+		nxmit++;
 	}
 
 	if (flags & XDP_XMIT_FLUSH) {
@@ -555,7 +552,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
 		mlx5e_xmit_xdp_doorbell(sq);
 	}
 
-	return n - drops;
+	return nxmit;
 }
 
 void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index 8c47a9d2a965..102d0e0808d5 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
@@ -345,7 +345,7 @@ int qede_xdp_transmit(struct net_device *dev, int n_frames,
 	struct qede_tx_queue *xdp_tx;
 	struct xdp_frame *xdpf;
 	dma_addr_t mapping;
-	int i, drops = 0;
+	int i, nxmit = 0;
 	u16 xdp_prod;
 
 	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
@@ -364,18 +364,13 @@ int qede_xdp_transmit(struct net_device *dev, int n_frames,
 
 		mapping = dma_map_single(dmadev, xdpf->data, xdpf->len,
 					 DMA_TO_DEVICE);
-		if (unlikely(dma_mapping_error(dmadev, mapping))) {
-			xdp_return_frame_rx_napi(xdpf);
-			drops++;
-
-			continue;
-		}
+		if (unlikely(dma_mapping_error(dmadev, mapping)))
+			break;
 
 		if (unlikely(qede_xdp_xmit(xdp_tx, mapping, 0, xdpf->len,
-					   NULL, xdpf))) {
-			xdp_return_frame_rx_napi(xdpf);
-			drops++;
-		}
+					   NULL, xdpf)))
+			break;
+		nxmit++;
 	}
 
 	if (flags & XDP_XMIT_FLUSH) {
@@ -387,7 +382,7 @@ int qede_xdp_transmit(struct net_device *dev, int n_frames,
 
 	spin_unlock(&xdp_tx->xdp_tx_lock);
 
-	return n_frames - drops;
+	return nxmit;
 }
 
 int qede_txq_has_work(struct qede_tx_queue *txq)
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index 1665529a7271..0c6650d2e239 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -412,14 +412,6 @@ err:
 	return NETDEV_TX_OK;
 }
 
-static void efx_xdp_return_frames(int n,  struct xdp_frame **xdpfs)
-{
-	int i;
-
-	for (i = 0; i < n; i++)
-		xdp_return_frame_rx_napi(xdpfs[i]);
-}
-
 /* Transmit a packet from an XDP buffer
  *
  * Returns number of packets sent on success, error code otherwise.
@@ -492,12 +484,7 @@ int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs,
 	if (flush && i > 0)
 		efx_nic_push_buffers(tx_queue);
 
-	if (i == 0)
-		return -EIO;
-
-	efx_xdp_return_frames(n - i, xdpfs + i);
-
-	return i;
+	return i == 0 ? -EIO : i;
 }
 
 /* Initiate a packet transmission.  We use one channel per CPU
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index 3c53051bdacf..b9449cf36e31 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -1757,8 +1757,7 @@ static int netsec_xdp_xmit(struct net_device *ndev, int n,
 {
 	struct netsec_priv *priv = netdev_priv(ndev);
 	struct netsec_desc_ring *tx_ring = &priv->desc_ring[NETSEC_RING_TX];
-	int drops = 0;
-	int i;
+	int i, nxmit = 0;
 
 	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
 		return -EINVAL;
@@ -1769,12 +1768,11 @@ static int netsec_xdp_xmit(struct net_device *ndev, int n,
 		int err;
 
 		err = netsec_xdp_queue_one(priv, xdpf, true);
-		if (err != NETSEC_XDP_TX) {
-			xdp_return_frame_rx_napi(xdpf);
-			drops++;
-		} else {
-			tx_ring->xdp_xmit++;
-		}
+		if (err != NETSEC_XDP_TX)
+			break;
+
+		tx_ring->xdp_xmit++;
+		nxmit++;
 	}
 	spin_unlock(&tx_ring->lock);
 
@@ -1783,7 +1781,7 @@ static int netsec_xdp_xmit(struct net_device *ndev, int n,
 		tx_ring->xdp_xmit = 0;
 	}
 
-	return n - drops;
+	return nxmit;
 }
 
 static int netsec_xdp_setup(struct netsec_priv *priv, struct bpf_prog *prog,
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index fd966567464c..074702af3dc6 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1123,25 +1123,23 @@ static int cpsw_ndo_xdp_xmit(struct net_device *ndev, int n,
 	struct cpsw_priv *priv = netdev_priv(ndev);
 	struct cpsw_common *cpsw = priv->cpsw;
 	struct xdp_frame *xdpf;
-	int i, drops = 0, port;
+	int i, nxmit = 0, port;
 
 	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
 		return -EINVAL;
 
 	for (i = 0; i < n; i++) {
 		xdpf = frames[i];
-		if (xdpf->len < CPSW_MIN_PACKET_SIZE) {
-			xdp_return_frame_rx_napi(xdpf);
-			drops++;
-			continue;
-		}
+		if (xdpf->len < CPSW_MIN_PACKET_SIZE)
+			break;
 
 		port = priv->emac_port + cpsw->data.dual_emac;
 		if (cpsw_xdp_tx_frame(priv, xdpf, NULL, port))
-			drops++;
+			break;
+		nxmit++;
 	}
 
-	return n - drops;
+	return nxmit;
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c
index 58a64313ac00..0751f77de2c7 100644
--- a/drivers/net/ethernet/ti/cpsw_new.c
+++ b/drivers/net/ethernet/ti/cpsw_new.c
@@ -1093,24 +1093,22 @@ static int cpsw_ndo_xdp_xmit(struct net_device *ndev, int n,
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
 	struct xdp_frame *xdpf;
-	int i, drops = 0;
+	int i, nxmit = 0;
 
 	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
 		return -EINVAL;
 
 	for (i = 0; i < n; i++) {
 		xdpf = frames[i];
-		if (xdpf->len < CPSW_MIN_PACKET_SIZE) {
-			xdp_return_frame_rx_napi(xdpf);
-			drops++;
-			continue;
-		}
+		if (xdpf->len < CPSW_MIN_PACKET_SIZE)
+			break;
 
 		if (cpsw_xdp_tx_frame(priv, xdpf, NULL, priv->emac_port))
-			drops++;
+			break;
+		nxmit++;
 	}
 
-	return n - drops;
+	return nxmit;
 }
 
 static int cpsw_get_port_parent_id(struct net_device *ndev,
diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c
index bb59e768915e..5862f0a4a975 100644
--- a/drivers/net/ethernet/ti/cpsw_priv.c
+++ b/drivers/net/ethernet/ti/cpsw_priv.c
@@ -1305,19 +1305,15 @@ int cpsw_xdp_tx_frame(struct cpsw_priv *priv, struct xdp_frame *xdpf,
 		ret = cpdma_chan_submit_mapped(txch, cpsw_xdpf_to_handle(xdpf),
 					       dma, xdpf->len, port);
 	} else {
-		if (sizeof(*xmeta) > xdpf->headroom) {
-			xdp_return_frame_rx_napi(xdpf);
+		if (sizeof(*xmeta) > xdpf->headroom)
 			return -EINVAL;
-		}
 
 		ret = cpdma_chan_submit(txch, cpsw_xdpf_to_handle(xdpf),
 					xdpf->data, xdpf->len, port);
 	}
 
-	if (ret) {
+	if (ret)
 		priv->ndev->stats.tx_dropped++;
-		xdp_return_frame_rx_napi(xdpf);
-	}
 
 	return ret;
 }
@@ -1353,7 +1349,8 @@ int cpsw_run_xdp(struct cpsw_priv *priv, int ch, struct xdp_buff *xdp,
 		if (unlikely(!xdpf))
 			goto drop;
 
-		cpsw_xdp_tx_frame(priv, xdpf, page, port);
+		if (cpsw_xdp_tx_frame(priv, xdpf, page, port))
+			xdp_return_frame_rx_napi(xdpf);
 		break;
 	case XDP_REDIRECT:
 		if (xdp_do_redirect(ndev, xdp, prog))
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index fc86da7f1628..6e55697315de 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1181,8 +1181,7 @@ static int tun_xdp_xmit(struct net_device *dev, int n,
 	struct tun_struct *tun = netdev_priv(dev);
 	struct tun_file *tfile;
 	u32 numqueues;
-	int drops = 0;
-	int cnt = n;
+	int nxmit = 0;
 	int i;
 
 	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
@@ -1212,9 +1211,9 @@ resample:
 
 		if (__ptr_ring_produce(&tfile->tx_ring, frame)) {
 			atomic_long_inc(&dev->tx_dropped);
-			xdp_return_frame_rx_napi(xdp);
-			drops++;
+			break;
 		}
+		nxmit++;
 	}
 	spin_unlock(&tfile->tx_ring.producer_lock);
 
@@ -1222,17 +1221,21 @@ resample:
 		__tun_xdp_flush_tfile(tfile);
 
 	rcu_read_unlock();
-	return cnt - drops;
+	return nxmit;
 }
 
 static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
 {
 	struct xdp_frame *frame = xdp_convert_buff_to_frame(xdp);
+	int nxmit;
 
 	if (unlikely(!frame))
 		return -EOVERFLOW;
 
-	return tun_xdp_xmit(dev, 1, &frame, XDP_XMIT_FLUSH);
+	nxmit = tun_xdp_xmit(dev, 1, &frame, XDP_XMIT_FLUSH);
+	if (!nxmit)
+		xdp_return_frame_rx_napi(frame);
+	return nxmit;
 }
 
 static const struct net_device_ops tap_netdev_ops = {
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index aa1a66ad2ce5..05abe360603b 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -434,7 +434,7 @@ static int veth_xdp_xmit(struct net_device *dev, int n,
 			 u32 flags, bool ndo_xmit)
 {
 	struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
-	int i, ret = -ENXIO, drops = 0;
+	int i, ret = -ENXIO, nxmit = 0;
 	struct net_device *rcv;
 	unsigned int max_len;
 	struct veth_rq *rq;
@@ -464,21 +464,20 @@ static int veth_xdp_xmit(struct net_device *dev, int n,
 		void *ptr = veth_xdp_to_ptr(frame);
 
 		if (unlikely(frame->len > max_len ||
-			     __ptr_ring_produce(&rq->xdp_ring, ptr))) {
-			xdp_return_frame_rx_napi(frame);
-			drops++;
-		}
+			     __ptr_ring_produce(&rq->xdp_ring, ptr)))
+			break;
+		nxmit++;
 	}
 	spin_unlock(&rq->xdp_ring.producer_lock);
 
 	if (flags & XDP_XMIT_FLUSH)
 		__veth_xdp_flush(rq);
 
-	ret = n - drops;
+	ret = nxmit;
 	if (ndo_xmit) {
 		u64_stats_update_begin(&rq->stats.syncp);
-		rq->stats.vs.peer_tq_xdp_xmit += n - drops;
-		rq->stats.vs.peer_tq_xdp_xmit_err += drops;
+		rq->stats.vs.peer_tq_xdp_xmit += nxmit;
+		rq->stats.vs.peer_tq_xdp_xmit_err += n - nxmit;
 		u64_stats_update_end(&rq->stats.syncp);
 	}
 
@@ -505,20 +504,23 @@ static int veth_ndo_xdp_xmit(struct net_device *dev, int n,
 
 static void veth_xdp_flush_bq(struct veth_rq *rq, struct veth_xdp_tx_bq *bq)
 {
-	int sent, i, err = 0;
+	int sent, i, err = 0, drops;
 
 	sent = veth_xdp_xmit(rq->dev, bq->count, bq->q, 0, false);
 	if (sent < 0) {
 		err = sent;
 		sent = 0;
-		for (i = 0; i < bq->count; i++)
-			xdp_return_frame(bq->q[i]);
 	}
-	trace_xdp_bulk_tx(rq->dev, sent, bq->count - sent, err);
+
+	for (i = sent; unlikely(i < bq->count); i++)
+		xdp_return_frame(bq->q[i]);
+
+	drops = bq->count - sent;
+	trace_xdp_bulk_tx(rq->dev, sent, drops, err);
 
 	u64_stats_update_begin(&rq->stats.syncp);
 	rq->stats.vs.xdp_tx += sent;
-	rq->stats.vs.xdp_tx_err += bq->count - sent;
+	rq->stats.vs.xdp_tx_err += drops;
 	u64_stats_update_end(&rq->stats.syncp);
 
 	bq->count = 0;
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 708a8b2ca25b..254369a41540 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -499,10 +499,10 @@ static int virtnet_xdp_xmit(struct net_device *dev,
 	unsigned int len;
 	int packets = 0;
 	int bytes = 0;
-	int drops = 0;
+	int nxmit = 0;
 	int kicks = 0;
-	int ret, err;
 	void *ptr;
+	int ret;
 	int i;
 
 	/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
@@ -516,7 +516,6 @@ static int virtnet_xdp_xmit(struct net_device *dev,
 
 	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
 		ret = -EINVAL;
-		drops = n;
 		goto out;
 	}
 
@@ -539,13 +538,11 @@ static int virtnet_xdp_xmit(struct net_device *dev,
 	for (i = 0; i < n; i++) {
 		struct xdp_frame *xdpf = frames[i];
 
-		err = __virtnet_xdp_xmit_one(vi, sq, xdpf);
-		if (err) {
-			xdp_return_frame_rx_napi(xdpf);
-			drops++;
-		}
+		if (__virtnet_xdp_xmit_one(vi, sq, xdpf))
+			break;
+		nxmit++;
 	}
-	ret = n - drops;
+	ret = nxmit;
 
 	if (flags & XDP_XMIT_FLUSH) {
 		if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq))
@@ -556,7 +553,7 @@ out:
 	sq->stats.bytes += bytes;
 	sq->stats.packets += packets;
 	sq->stats.xdp_tx += n;
-	sq->stats.xdp_tx_drops += drops;
+	sq->stats.xdp_tx_drops += n - nxmit;
 	sq->stats.kicks += kicks;
 	u64_stats_update_end(&sq->stats.syncp);
 
@@ -709,7 +706,9 @@ static struct sk_buff *receive_small(struct net_device *dev,
 			if (unlikely(!xdpf))
 				goto err_xdp;
 			err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
-			if (unlikely(err < 0)) {
+			if (unlikely(!err)) {
+				xdp_return_frame_rx_napi(xdpf);
+			} else if (unlikely(err < 0)) {
 				trace_xdp_exception(vi->dev, xdp_prog, act);
 				goto err_xdp;
 			}
@@ -896,7 +895,9 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 			if (unlikely(!xdpf))
 				goto err_xdp;
 			err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
-			if (unlikely(err < 0)) {
+			if (unlikely(!err)) {
+				xdp_return_frame_rx_napi(xdpf);
+			} else if (unlikely(err < 0)) {
 				trace_xdp_exception(vi->dev, xdp_prog, act);
 				if (unlikely(xdp_page != page))
 					put_page(xdp_page);
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index cc19cd9203da..44275908d61a 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -608,8 +608,8 @@ static int xennet_xdp_xmit(struct net_device *dev, int n,
 	struct netfront_info *np = netdev_priv(dev);
 	struct netfront_queue *queue = NULL;
 	unsigned long irq_flags;
-	int drops = 0;
-	int i, err;
+	int nxmit = 0;
+	int i;
 
 	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
 		return -EINVAL;
@@ -622,15 +622,13 @@ static int xennet_xdp_xmit(struct net_device *dev, int n,
 
 		if (!xdpf)
 			continue;
-		err = xennet_xdp_xmit_one(dev, queue, xdpf);
-		if (err) {
-			xdp_return_frame_rx_napi(xdpf);
-			drops++;
-		}
+		if (xennet_xdp_xmit_one(dev, queue, xdpf))
+			break;
+		nxmit++;
 	}
 	spin_unlock_irqrestore(&queue->tx_lock, irq_flags);
 
-	return n - drops;
+	return nxmit;
 }
 
 
@@ -875,7 +873,9 @@ static u32 xennet_run_xdp(struct netfront_queue *queue, struct page *pdata,
 		get_page(pdata);
 		xdpf = xdp_convert_buff_to_frame(xdp);
 		err = xennet_xdp_xmit(queue->info->netdev, 1, &xdpf, 0);
-		if (unlikely(err < 0))
+		if (unlikely(!err))
+			xdp_return_frame_rx_napi(xdpf);
+		else if (unlikely(err < 0))
 			trace_xdp_exception(queue->info->netdev, prog, act);
 		break;
 	case XDP_REDIRECT:
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 7a5ad7331c3b..aa516472ce46 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -329,7 +329,7 @@ bool dev_map_can_have_prog(struct bpf_map *map)
 static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
 {
 	struct net_device *dev = bq->dev;
-	int sent = 0, drops = 0, err = 0;
+	int sent = 0, err = 0;
 	int i;
 
 	if (unlikely(!bq->count))
@@ -343,29 +343,23 @@ static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
 
 	sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q, flags);
 	if (sent < 0) {
+		/* If ndo_xdp_xmit fails with an errno, no frames have
+		 * been xmit'ed.
+		 */
 		err = sent;
 		sent = 0;
-		goto error;
 	}
-	drops = bq->count - sent;
-out:
-	bq->count = 0;
 
-	trace_xdp_devmap_xmit(bq->dev_rx, dev, sent, drops, err);
-	bq->dev_rx = NULL;
-	__list_del_clearprev(&bq->flush_node);
-	return;
-error:
-	/* If ndo_xdp_xmit fails with an errno, no frames have been
-	 * xmit'ed and it's our responsibility to them free all.
+	/* If not all frames have been transmitted, it is our
+	 * responsibility to free them
 	 */
-	for (i = 0; i < bq->count; i++) {
-		struct xdp_frame *xdpf = bq->q[i];
+	for (i = sent; unlikely(i < bq->count); i++)
+		xdp_return_frame_rx_napi(bq->q[i]);
 
-		xdp_return_frame_rx_napi(xdpf);
-		drops++;
-	}
-	goto out;
+	trace_xdp_devmap_xmit(bq->dev_rx, dev, sent, bq->count - sent, err);
+	bq->dev_rx = NULL;
+	bq->count = 0;
+	__list_del_clearprev(&bq->flush_node);
 }
 
 /* __dev_flush is called from xdp_do_flush() which _must_ be signaled
-- 
cgit v1.2.3


From 2b9cef6679020938666a81f3b6e743cdfcb8b32d Mon Sep 17 00:00:00 2001
From: Naveen Mamindlapalli <naveenm@marvell.com>
Date: Thu, 18 Mar 2021 15:32:12 +0530
Subject: octeontx2-pf: Add ip tos and ip proto icmp/icmpv6 flow offload
 support

Add support for programming the HW MCAM match key with IP tos, IP(v6)
proto icmp/icmpv6, allowing flow offload rules to be installed using
those fields. The NPC HW extracts layer type, which will be used as a
matching criteria for different IP protocols.

The ethtool n-tuple filter logic has been updated to parse the IP tos
and l4proto for HW offloading. l4proto tcp/udp/sctp/ah/esp/icmp are
supported. See example usage below.

Ex: Redirect l4proto icmp to vf 0 queue 0
ethtool -U eth0 flow-type ip4 l4proto 1 action vf 0 queue 0

Ex: Redirect flow with ip tos 8 to vf 0 queue 0
ethtool -U eth0 flow-type ip4 tos 8 vf 0 queue 0

Signed-off-by: Naveen Mamindlapalli <naveenm@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/npc.h    |  2 ++
 .../net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c | 17 +++++++--
 .../ethernet/marvell/octeontx2/nic/otx2_flows.c    | 42 ++++++++++++++++++++--
 3 files changed, 57 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
index 0675c55dcc7a..1e012e787260 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
@@ -167,6 +167,8 @@ enum key_fields {
 	NPC_IPPROTO_SCTP,
 	NPC_IPPROTO_AH,
 	NPC_IPPROTO_ESP,
+	NPC_IPPROTO_ICMP,
+	NPC_IPPROTO_ICMP6,
 	NPC_SPORT_TCP,
 	NPC_DPORT_TCP,
 	NPC_SPORT_UDP,
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
index d805a189a268..7f35b62eea13 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
@@ -29,6 +29,8 @@ static const char * const npc_flow_names[] = {
 	[NPC_IPPROTO_TCP] = "ip proto tcp",
 	[NPC_IPPROTO_UDP] = "ip proto udp",
 	[NPC_IPPROTO_SCTP] = "ip proto sctp",
+	[NPC_IPPROTO_ICMP] = "ip proto icmp",
+	[NPC_IPPROTO_ICMP6] = "ip proto icmp6",
 	[NPC_IPPROTO_AH] = "ip proto AH",
 	[NPC_IPPROTO_ESP] = "ip proto ESP",
 	[NPC_SPORT_TCP]	= "tcp source port",
@@ -427,6 +429,7 @@ do {									       \
 	 * packet header fields below.
 	 * Example: Source IP is 4 bytes and starts at 12th byte of IP header
 	 */
+	NPC_SCAN_HDR(NPC_TOS, NPC_LID_LC, NPC_LT_LC_IP, 1, 1);
 	NPC_SCAN_HDR(NPC_SIP_IPV4, NPC_LID_LC, NPC_LT_LC_IP, 12, 4);
 	NPC_SCAN_HDR(NPC_DIP_IPV4, NPC_LID_LC, NPC_LT_LC_IP, 16, 4);
 	NPC_SCAN_HDR(NPC_SIP_IPV6, NPC_LID_LC, NPC_LT_LC_IP6, 8, 16);
@@ -477,9 +480,12 @@ static void npc_set_features(struct rvu *rvu, int blkaddr, u8 intf)
 				     BIT_ULL(NPC_IPPROTO_SCTP);
 	}
 
-	/* for AH, check if corresponding layer type is present in the key */
-	if (npc_check_field(rvu, blkaddr, NPC_LD, intf))
+	/* for AH/ICMP/ICMPv6/, check if corresponding layer type is present in the key */
+	if (npc_check_field(rvu, blkaddr, NPC_LD, intf)) {
 		*features |= BIT_ULL(NPC_IPPROTO_AH);
+		*features |= BIT_ULL(NPC_IPPROTO_ICMP);
+		*features |= BIT_ULL(NPC_IPPROTO_ICMP6);
+	}
 
 	/* for ESP, check if corresponding layer type is present in the key */
 	if (npc_check_field(rvu, blkaddr, NPC_LE, intf))
@@ -769,6 +775,12 @@ static void npc_update_flow(struct rvu *rvu, struct mcam_entry *entry,
 	if (features & BIT_ULL(NPC_IPPROTO_SCTP))
 		npc_update_entry(rvu, NPC_LD, entry, NPC_LT_LD_SCTP,
 				 0, ~0ULL, 0, intf);
+	if (features & BIT_ULL(NPC_IPPROTO_ICMP))
+		npc_update_entry(rvu, NPC_LD, entry, NPC_LT_LD_ICMP,
+				 0, ~0ULL, 0, intf);
+	if (features & BIT_ULL(NPC_IPPROTO_ICMP6))
+		npc_update_entry(rvu, NPC_LD, entry, NPC_LT_LD_ICMP6,
+				 0, ~0ULL, 0, intf);
 
 	if (features & BIT_ULL(NPC_OUTER_VID))
 		npc_update_entry(rvu, NPC_LB, entry,
@@ -798,6 +810,7 @@ do {									      \
 	NPC_WRITE_FLOW(NPC_SMAC, smac, smac_val, 0, smac_mask, 0);
 	NPC_WRITE_FLOW(NPC_ETYPE, etype, ntohs(pkt->etype), 0,
 		       ntohs(mask->etype), 0);
+	NPC_WRITE_FLOW(NPC_TOS, tos, pkt->tos, 0, mask->tos, 0);
 	NPC_WRITE_FLOW(NPC_SIP_IPV4, ip4src, ntohl(pkt->ip4src), 0,
 		       ntohl(mask->ip4src), 0);
 	NPC_WRITE_FLOW(NPC_DIP_IPV4, ip4dst, ntohl(pkt->ip4dst), 0,
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
index 0dbbf38e0597..3264cb793f02 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
@@ -301,6 +301,35 @@ static int otx2_prepare_ipv4_flow(struct ethtool_rx_flow_spec *fsp,
 			       sizeof(pmask->ip4dst));
 			req->features |= BIT_ULL(NPC_DIP_IPV4);
 		}
+		if (ipv4_usr_mask->tos) {
+			pkt->tos = ipv4_usr_hdr->tos;
+			pmask->tos = ipv4_usr_mask->tos;
+			req->features |= BIT_ULL(NPC_TOS);
+		}
+		if (ipv4_usr_mask->proto) {
+			switch (ipv4_usr_hdr->proto) {
+			case IPPROTO_ICMP:
+				req->features |= BIT_ULL(NPC_IPPROTO_ICMP);
+				break;
+			case IPPROTO_TCP:
+				req->features |= BIT_ULL(NPC_IPPROTO_TCP);
+				break;
+			case IPPROTO_UDP:
+				req->features |= BIT_ULL(NPC_IPPROTO_UDP);
+				break;
+			case IPPROTO_SCTP:
+				req->features |= BIT_ULL(NPC_IPPROTO_SCTP);
+				break;
+			case IPPROTO_AH:
+				req->features |= BIT_ULL(NPC_IPPROTO_AH);
+				break;
+			case IPPROTO_ESP:
+				req->features |= BIT_ULL(NPC_IPPROTO_ESP);
+				break;
+			default:
+				return -EOPNOTSUPP;
+			}
+		}
 		pkt->etype = cpu_to_be16(ETH_P_IP);
 		pmask->etype = cpu_to_be16(0xFFFF);
 		req->features |= BIT_ULL(NPC_ETYPE);
@@ -325,6 +354,11 @@ static int otx2_prepare_ipv4_flow(struct ethtool_rx_flow_spec *fsp,
 			       sizeof(pmask->ip4dst));
 			req->features |= BIT_ULL(NPC_DIP_IPV4);
 		}
+		if (ipv4_l4_mask->tos) {
+			pkt->tos = ipv4_l4_hdr->tos;
+			pmask->tos = ipv4_l4_mask->tos;
+			req->features |= BIT_ULL(NPC_TOS);
+		}
 		if (ipv4_l4_mask->psrc) {
 			memcpy(&pkt->sport, &ipv4_l4_hdr->psrc,
 			       sizeof(pkt->sport));
@@ -375,10 +409,14 @@ static int otx2_prepare_ipv4_flow(struct ethtool_rx_flow_spec *fsp,
 			       sizeof(pmask->ip4dst));
 			req->features |= BIT_ULL(NPC_DIP_IPV4);
 		}
+		if (ah_esp_mask->tos) {
+			pkt->tos = ah_esp_hdr->tos;
+			pmask->tos = ah_esp_mask->tos;
+			req->features |= BIT_ULL(NPC_TOS);
+		}
 
 		/* NPC profile doesn't extract AH/ESP header fields */
-		if ((ah_esp_mask->spi & ah_esp_hdr->spi) ||
-		    (ah_esp_mask->tos & ah_esp_mask->tos))
+		if (ah_esp_mask->spi & ah_esp_hdr->spi)
 			return -EOPNOTSUPP;
 
 		if (flow_type == AH_V4_FLOW)
-- 
cgit v1.2.3


From 1d4d9e42c2406bc7599f5a5b7a49b71dced34b1e Mon Sep 17 00:00:00 2001
From: Naveen Mamindlapalli <naveenm@marvell.com>
Date: Thu, 18 Mar 2021 15:32:13 +0530
Subject: octeontx2-pf: Add tc flower hardware offload on ingress traffic

This patch adds support for tc flower hardware offload on ingress
traffic. Since the tc-flower filter rules use the same set of MCAM
rules as the n-tuple filters, the n-tuple filters and tc flower
rules are mutually exclusive. When one of the feature is enabled
using ethtool, the other feature is disabled in the driver. By default
the driver enables n-tuple filters during initialization.

The following flow keys are supported.
    -> Ethernet: dst_mac
    -> L2 proto: all protocols
    -> VLAN (802.1q): vlan_id/vlan_prio
    -> IPv4: dst_ip/src_ip/ip_proto{tcp|udp|sctp|icmp}/ip_tos
    -> IPv6: ip_proto{icmpv6}
    -> L4(tcp/udp/sctp): dst_port/src_port

The following flow actions are supported.
    -> drop
    -> accept
    -> redirect
    -> vlan pop

The flow action supports multiple actions when vlan pop is specified
as the first action. The redirect action supports redirecting to the
PF/VF of same PCI device. Redirecting to other PCI NIX devices is not
supported.

Example #1: Add a tc filter rule to drop UDP traffic with dest port 80
    # ethtool -K eth0 hw-tc-offload on
    # tc qdisc add dev eth0 ingress
    # tc filter add dev eth0 protocol ip parent ffff: flower ip_proto \
          udp dst_port 80 action drop

Example #2: Add a tc filter rule to redirect ingress traffic on eth0
with vlan id 3 to eth6 (ex: eth0 vf0) after stripping the vlan hdr.
    # ethtool -K eth0 hw-tc-offload on
    # tc qdisc add dev eth0 ingress
    # tc filter add dev eth0 parent ffff: protocol 802.1Q flower \
          vlan_id 3 vlan_ethtype ipv4 action vlan pop action mirred \
          ingress redirect dev eth6

Example #3: List the ingress filter rules
    # tc -s filter show dev eth4 ingress

Example #4: Delete tc flower filter rule with handle 0x1
    # tc filter del dev eth0 ingress protocol ip pref 49152 \
      handle 1 flower

Signed-off-by: Naveen Mamindlapalli <naveenm@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/marvell/octeontx2/af/rvu_debugfs.c    |   2 +-
 .../net/ethernet/marvell/octeontx2/nic/Makefile    |   2 +-
 .../ethernet/marvell/octeontx2/nic/otx2_common.h   |  19 +
 .../ethernet/marvell/octeontx2/nic/otx2_flows.c    |   5 +
 .../net/ethernet/marvell/octeontx2/nic/otx2_pf.c   |  37 ++
 .../net/ethernet/marvell/octeontx2/nic/otx2_tc.c   | 488 +++++++++++++++++++++
 6 files changed, 551 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
index c8836d48a614..741da112fdf0 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
@@ -2002,7 +2002,7 @@ static void rvu_dbg_npc_mcam_show_flows(struct seq_file *s,
 			seq_printf(s, "mask 0x%x\n", ntohs(rule->mask.etype));
 			break;
 		case NPC_OUTER_VID:
-			seq_printf(s, "%d ", ntohs(rule->packet.vlan_tci));
+			seq_printf(s, "0x%x ", ntohs(rule->packet.vlan_tci));
 			seq_printf(s, "mask 0x%x\n",
 				   ntohs(rule->mask.vlan_tci));
 			break;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
index 745aa8a19499..457c94793e63 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_OCTEONTX2_PF) += rvu_nicpf.o
 obj-$(CONFIG_OCTEONTX2_VF) += rvu_nicvf.o
 
 rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \
-		     otx2_ptp.o otx2_flows.o cn10k.o
+		     otx2_ptp.o otx2_flows.o otx2_tc.o cn10k.o
 rvu_nicvf-y := otx2_vf.o
 
 ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index a518c2283f18..992aa93178e1 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -18,6 +18,7 @@
 #include <linux/ptp_clock_kernel.h>
 #include <linux/timecounter.h>
 #include <linux/soc/marvell/octeontx2/asm.h>
+#include <net/pkt_cls.h>
 
 #include <mbox.h>
 #include <npc.h>
@@ -264,6 +265,7 @@ struct otx2_flow_config {
 #define OTX2_MAX_NTUPLE_FLOWS	32
 #define OTX2_MAX_UNICAST_FLOWS	8
 #define OTX2_MAX_VLAN_FLOWS	1
+#define OTX2_MAX_TC_FLOWS	OTX2_MAX_NTUPLE_FLOWS
 #define OTX2_MCAM_COUNT		(OTX2_MAX_NTUPLE_FLOWS + \
 				 OTX2_MAX_UNICAST_FLOWS + \
 				 OTX2_MAX_VLAN_FLOWS)
@@ -274,10 +276,20 @@ struct otx2_flow_config {
 #define OTX2_PER_VF_VLAN_FLOWS	2 /* rx+tx per VF */
 #define OTX2_VF_VLAN_RX_INDEX	0
 #define OTX2_VF_VLAN_TX_INDEX	1
+	u32			tc_flower_offset;
 	u32                     ntuple_max_flows;
+	u32			tc_max_flows;
 	struct list_head	flow_list;
 };
 
+struct otx2_tc_info {
+	/* hash table to store TC offloaded flows */
+	struct rhashtable		flow_table;
+	struct rhashtable_params	flow_ht_params;
+	DECLARE_BITMAP(tc_entries_bitmap, OTX2_MAX_TC_FLOWS);
+	unsigned long			num_entries;
+};
+
 struct dev_hw_ops {
 	int	(*sq_aq_init)(void *dev, u16 qidx, u16 sqb_aura);
 	void	(*sqe_flush)(void *dev, struct otx2_snd_queue *sq,
@@ -305,6 +317,7 @@ struct otx2_nic {
 #define OTX2_FLAG_PF_SHUTDOWN			BIT_ULL(8)
 #define OTX2_FLAG_RX_PAUSE_ENABLED		BIT_ULL(9)
 #define OTX2_FLAG_TX_PAUSE_ENABLED		BIT_ULL(10)
+#define OTX2_FLAG_TC_FLOWER_SUPPORT		BIT_ULL(11)
 	u64			flags;
 
 	struct otx2_qset	qset;
@@ -347,6 +360,7 @@ struct otx2_nic {
 	struct hwtstamp_config	tstamp;
 
 	struct otx2_flow_config	*flow_cfg;
+	struct otx2_tc_info	tc_info;
 };
 
 static inline bool is_otx2_lbkvf(struct pci_dev *pdev)
@@ -802,4 +816,9 @@ int otx2_add_macfilter(struct net_device *netdev, const u8 *mac);
 int otx2_enable_rxvlan(struct otx2_nic *pf, bool enable);
 int otx2_install_rxvlan_offload_flow(struct otx2_nic *pfvf);
 u16 otx2_get_max_mtu(struct otx2_nic *pfvf);
+/* tc support */
+int otx2_init_tc(struct otx2_nic *nic);
+void otx2_shutdown_tc(struct otx2_nic *nic);
+int otx2_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+		  void *type_data);
 #endif /* OTX2_COMMON_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
index 3264cb793f02..fa7a46aa15ef 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
@@ -57,10 +57,13 @@ int otx2_alloc_mcam_entries(struct otx2_nic *pfvf)
 		flow_cfg->ntuple_max_flows = rsp->count;
 		flow_cfg->ntuple_offset = 0;
 		pfvf->flags |= OTX2_FLAG_NTUPLE_SUPPORT;
+		flow_cfg->tc_max_flows = flow_cfg->ntuple_max_flows;
+		pfvf->flags |= OTX2_FLAG_TC_FLOWER_SUPPORT;
 	} else {
 		flow_cfg->vf_vlan_offset = 0;
 		flow_cfg->ntuple_offset = flow_cfg->vf_vlan_offset +
 						vf_vlan_max_flows;
+		flow_cfg->tc_flower_offset = flow_cfg->ntuple_offset;
 		flow_cfg->unicast_offset = flow_cfg->ntuple_offset +
 						OTX2_MAX_NTUPLE_FLOWS;
 		flow_cfg->rx_vlan_offset = flow_cfg->unicast_offset +
@@ -69,6 +72,7 @@ int otx2_alloc_mcam_entries(struct otx2_nic *pfvf)
 		pfvf->flags |= OTX2_FLAG_UCAST_FLTR_SUPPORT;
 		pfvf->flags |= OTX2_FLAG_RX_VLAN_SUPPORT;
 		pfvf->flags |= OTX2_FLAG_VF_VLAN_SUPPORT;
+		pfvf->flags |= OTX2_FLAG_TC_FLOWER_SUPPORT;
 	}
 
 	for (i = 0; i < rsp->count; i++)
@@ -93,6 +97,7 @@ int otx2_mcam_flow_init(struct otx2_nic *pf)
 	INIT_LIST_HEAD(&pf->flow_cfg->flow_list);
 
 	pf->flow_cfg->ntuple_max_flows = OTX2_MAX_NTUPLE_FLOWS;
+	pf->flow_cfg->tc_max_flows = pf->flow_cfg->ntuple_max_flows;
 
 	err = otx2_alloc_mcam_entries(pf);
 	if (err)
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index 53ab1814d74b..772a29ba8503 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -1760,6 +1760,24 @@ static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev)
 	return NETDEV_TX_OK;
 }
 
+static netdev_features_t otx2_fix_features(struct net_device *dev,
+					   netdev_features_t features)
+{
+	/* check if n-tuple filters are ON */
+	if ((features & NETIF_F_HW_TC) && (dev->features & NETIF_F_NTUPLE)) {
+		netdev_info(dev, "Disabling n-tuple filters\n");
+		features &= ~NETIF_F_NTUPLE;
+	}
+
+	/* check if tc hw offload is ON */
+	if ((features & NETIF_F_NTUPLE) && (dev->features & NETIF_F_HW_TC)) {
+		netdev_info(dev, "Disabling TC hardware offload\n");
+		features &= ~NETIF_F_HW_TC;
+	}
+
+	return features;
+}
+
 static void otx2_set_rx_mode(struct net_device *netdev)
 {
 	struct otx2_nic *pf = netdev_priv(netdev);
@@ -1822,6 +1840,12 @@ static int otx2_set_features(struct net_device *netdev,
 	if ((changed & NETIF_F_NTUPLE) && !ntuple)
 		otx2_destroy_ntuple_flows(pf);
 
+	if ((netdev->features & NETIF_F_HW_TC) > (features & NETIF_F_HW_TC) &&
+	    pf->tc_info.num_entries) {
+		netdev_err(netdev, "Can't disable TC hardware offload while flows are active\n");
+		return -EBUSY;
+	}
+
 	return 0;
 }
 
@@ -2220,6 +2244,7 @@ static const struct net_device_ops otx2_netdev_ops = {
 	.ndo_open		= otx2_open,
 	.ndo_stop		= otx2_stop,
 	.ndo_start_xmit		= otx2_xmit,
+	.ndo_fix_features	= otx2_fix_features,
 	.ndo_set_mac_address    = otx2_set_mac_address,
 	.ndo_change_mtu		= otx2_change_mtu,
 	.ndo_set_rx_mode	= otx2_set_rx_mode,
@@ -2230,6 +2255,7 @@ static const struct net_device_ops otx2_netdev_ops = {
 	.ndo_set_vf_mac		= otx2_set_vf_mac,
 	.ndo_set_vf_vlan	= otx2_set_vf_vlan,
 	.ndo_get_vf_config	= otx2_get_vf_config,
+	.ndo_setup_tc		= otx2_setup_tc,
 };
 
 static int otx2_wq_init(struct otx2_nic *pf)
@@ -2449,6 +2475,10 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 				       NETIF_F_HW_VLAN_STAG_RX;
 	netdev->features |= netdev->hw_features;
 
+	/* HW supports tc offload but mutually exclusive with n-tuple filters */
+	if (pf->flags & OTX2_FLAG_TC_FLOWER_SUPPORT)
+		netdev->hw_features |= NETIF_F_HW_TC;
+
 	netdev->gso_max_segs = OTX2_MAX_GSO_SEGS;
 	netdev->watchdog_timeo = OTX2_TX_TIMEOUT;
 
@@ -2470,6 +2500,10 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	otx2_set_ethtool_ops(netdev);
 
+	err = otx2_init_tc(pf);
+	if (err)
+		goto err_mcam_flow_del;
+
 	/* Enable link notifications */
 	otx2_cgx_config_linkevents(pf, true);
 
@@ -2479,6 +2513,8 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	return 0;
 
+err_mcam_flow_del:
+	otx2_mcam_flow_del(pf);
 err_unreg_netdev:
 	unregister_netdev(netdev);
 err_del_mcam_entries:
@@ -2646,6 +2682,7 @@ static void otx2_remove(struct pci_dev *pdev)
 
 	otx2_ptp_destroy(pf);
 	otx2_mcam_flow_del(pf);
+	otx2_shutdown_tc(pf);
 	otx2_detach_resources(&pf->mbox);
 	if (pf->hw.lmt_base)
 		iounmap(pf->hw.lmt_base);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
new file mode 100644
index 000000000000..a361eec568a2
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
@@ -0,0 +1,488 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell OcteonTx2 RVU Physcial Function ethernet driver
+ *
+ * Copyright (C) 2021 Marvell.
+ */
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/rhashtable.h>
+#include <net/flow_dissector.h>
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_vlan.h>
+#include <net/ipv6.h>
+
+#include "otx2_common.h"
+
+struct otx2_tc_flow {
+	struct rhash_head		node;
+	unsigned long			cookie;
+	u16				entry;
+	unsigned int			bitpos;
+	struct rcu_head			rcu;
+};
+
+static int otx2_tc_parse_actions(struct otx2_nic *nic,
+				 struct flow_action *flow_action,
+				 struct npc_install_flow_req *req)
+{
+	struct flow_action_entry *act;
+	struct net_device *target;
+	struct otx2_nic *priv;
+	int i;
+
+	if (!flow_action_has_entries(flow_action)) {
+		netdev_info(nic->netdev, "no tc actions specified");
+		return -EINVAL;
+	}
+
+	flow_action_for_each(i, act, flow_action) {
+		switch (act->id) {
+		case FLOW_ACTION_DROP:
+			req->op = NIX_RX_ACTIONOP_DROP;
+			return 0;
+		case FLOW_ACTION_ACCEPT:
+			req->op = NIX_RX_ACTION_DEFAULT;
+			return 0;
+		case FLOW_ACTION_REDIRECT_INGRESS:
+			target = act->dev;
+			priv = netdev_priv(target);
+			/* npc_install_flow_req doesn't support passing a target pcifunc */
+			if (rvu_get_pf(nic->pcifunc) != rvu_get_pf(priv->pcifunc)) {
+				netdev_info(nic->netdev,
+					    "can't redirect to other pf/vf\n");
+				return -EOPNOTSUPP;
+			}
+			req->vf = priv->pcifunc & RVU_PFVF_FUNC_MASK;
+			req->op = NIX_RX_ACTION_DEFAULT;
+			return 0;
+		case FLOW_ACTION_VLAN_POP:
+			req->vtag0_valid = true;
+			/* use RX_VTAG_TYPE7 which is initialized to strip vlan tag */
+			req->vtag0_type = NIX_AF_LFX_RX_VTAG_TYPE7;
+			break;
+		default:
+			return -EOPNOTSUPP;
+		}
+	}
+
+	return 0;
+}
+
+static int otx2_tc_prepare_flow(struct otx2_nic *nic,
+				struct flow_cls_offload *f,
+				struct npc_install_flow_req *req)
+{
+	struct flow_msg *flow_spec = &req->packet;
+	struct flow_msg *flow_mask = &req->mask;
+	struct flow_dissector *dissector;
+	struct flow_rule *rule;
+	u8 ip_proto = 0;
+
+	rule = flow_cls_offload_flow_rule(f);
+	dissector = rule->match.dissector;
+
+	if ((dissector->used_keys &
+	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
+	      BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_VLAN) |
+	      BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_PORTS) |
+	      BIT(FLOW_DISSECTOR_KEY_IP))))  {
+		netdev_info(nic->netdev, "unsupported flow used key 0x%x",
+			    dissector->used_keys);
+		return -EOPNOTSUPP;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
+
+		flow_rule_match_basic(rule, &match);
+
+		/* All EtherTypes can be matched, no hw limitation */
+		flow_spec->etype = match.key->n_proto;
+		flow_mask->etype = match.mask->n_proto;
+		req->features |= BIT_ULL(NPC_ETYPE);
+
+		if (match.mask->ip_proto &&
+		    (match.key->ip_proto != IPPROTO_TCP &&
+		     match.key->ip_proto != IPPROTO_UDP &&
+		     match.key->ip_proto != IPPROTO_SCTP &&
+		     match.key->ip_proto != IPPROTO_ICMP &&
+		     match.key->ip_proto != IPPROTO_ICMPV6)) {
+			netdev_info(nic->netdev,
+				    "ip_proto=0x%x not supported\n",
+				    match.key->ip_proto);
+			return -EOPNOTSUPP;
+		}
+		if (match.mask->ip_proto)
+			ip_proto = match.key->ip_proto;
+
+		if (ip_proto == IPPROTO_UDP)
+			req->features |= BIT_ULL(NPC_IPPROTO_UDP);
+		else if (ip_proto == IPPROTO_TCP)
+			req->features |= BIT_ULL(NPC_IPPROTO_TCP);
+		else if (ip_proto == IPPROTO_SCTP)
+			req->features |= BIT_ULL(NPC_IPPROTO_SCTP);
+		else if (ip_proto == IPPROTO_ICMP)
+			req->features |= BIT_ULL(NPC_IPPROTO_ICMP);
+		else if (ip_proto == IPPROTO_ICMPV6)
+			req->features |= BIT_ULL(NPC_IPPROTO_ICMP6);
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_match_eth_addrs match;
+
+		flow_rule_match_eth_addrs(rule, &match);
+		if (!is_zero_ether_addr(match.mask->src)) {
+			netdev_err(nic->netdev, "src mac match not supported\n");
+			return -EOPNOTSUPP;
+		}
+
+		if (!is_zero_ether_addr(match.mask->dst)) {
+			ether_addr_copy(flow_spec->dmac, (u8 *)&match.key->dst);
+			ether_addr_copy(flow_mask->dmac,
+					(u8 *)&match.mask->dst);
+			req->features |= BIT_ULL(NPC_DMAC);
+		}
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
+		struct flow_match_ip match;
+
+		flow_rule_match_ip(rule, &match);
+		if ((ntohs(flow_spec->etype) != ETH_P_IP) &&
+		    match.mask->tos) {
+			netdev_err(nic->netdev, "tos not supported\n");
+			return -EOPNOTSUPP;
+		}
+		if (match.mask->ttl) {
+			netdev_err(nic->netdev, "ttl not supported\n");
+			return -EOPNOTSUPP;
+		}
+		flow_spec->tos = match.key->tos;
+		flow_mask->tos = match.mask->tos;
+		req->features |= BIT_ULL(NPC_TOS);
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match;
+		u16 vlan_tci, vlan_tci_mask;
+
+		flow_rule_match_vlan(rule, &match);
+
+		if (ntohs(match.key->vlan_tpid) != ETH_P_8021Q) {
+			netdev_err(nic->netdev, "vlan tpid 0x%x not supported\n",
+				   ntohs(match.key->vlan_tpid));
+			return -EOPNOTSUPP;
+		}
+
+		if (match.mask->vlan_id ||
+		    match.mask->vlan_dei ||
+		    match.mask->vlan_priority) {
+			vlan_tci = match.key->vlan_id |
+				   match.key->vlan_dei << 12 |
+				   match.key->vlan_priority << 13;
+
+			vlan_tci_mask = match.mask->vlan_id |
+					match.key->vlan_dei << 12 |
+					match.key->vlan_priority << 13;
+
+			flow_spec->vlan_tci = htons(vlan_tci);
+			flow_mask->vlan_tci = htons(vlan_tci_mask);
+			req->features |= BIT_ULL(NPC_OUTER_VID);
+		}
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
+		struct flow_match_ipv4_addrs match;
+
+		flow_rule_match_ipv4_addrs(rule, &match);
+
+		flow_spec->ip4dst = match.key->dst;
+		flow_mask->ip4dst = match.mask->dst;
+		req->features |= BIT_ULL(NPC_DIP_IPV4);
+
+		flow_spec->ip4src = match.key->src;
+		flow_mask->ip4src = match.mask->src;
+		req->features |= BIT_ULL(NPC_SIP_IPV4);
+	} else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
+		struct flow_match_ipv6_addrs match;
+
+		flow_rule_match_ipv6_addrs(rule, &match);
+
+		if (ipv6_addr_loopback(&match.key->dst) ||
+		    ipv6_addr_loopback(&match.key->src)) {
+			netdev_err(nic->netdev,
+				   "Flow matching on IPv6 loopback addr is not supported\n");
+			return -EOPNOTSUPP;
+		}
+
+		if (!ipv6_addr_any(&match.mask->dst)) {
+			memcpy(&flow_spec->ip6dst,
+			       (struct in6_addr *)&match.key->dst,
+			       sizeof(flow_spec->ip6dst));
+			memcpy(&flow_mask->ip6dst,
+			       (struct in6_addr *)&match.mask->dst,
+			       sizeof(flow_spec->ip6dst));
+			req->features |= BIT_ULL(NPC_DIP_IPV6);
+		}
+
+		if (!ipv6_addr_any(&match.mask->src)) {
+			memcpy(&flow_spec->ip6src,
+			       (struct in6_addr *)&match.key->src,
+			       sizeof(flow_spec->ip6src));
+			memcpy(&flow_mask->ip6src,
+			       (struct in6_addr *)&match.mask->src,
+			       sizeof(flow_spec->ip6src));
+			req->features |= BIT_ULL(NPC_SIP_IPV6);
+		}
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_match_ports match;
+
+		flow_rule_match_ports(rule, &match);
+
+		flow_spec->dport = match.key->dst;
+		flow_mask->dport = match.mask->dst;
+		if (ip_proto == IPPROTO_UDP)
+			req->features |= BIT_ULL(NPC_DPORT_UDP);
+		else if (ip_proto == IPPROTO_TCP)
+			req->features |= BIT_ULL(NPC_DPORT_TCP);
+		else if (ip_proto == IPPROTO_SCTP)
+			req->features |= BIT_ULL(NPC_DPORT_SCTP);
+
+		flow_spec->sport = match.key->src;
+		flow_mask->sport = match.mask->src;
+		if (ip_proto == IPPROTO_UDP)
+			req->features |= BIT_ULL(NPC_SPORT_UDP);
+		else if (ip_proto == IPPROTO_TCP)
+			req->features |= BIT_ULL(NPC_SPORT_TCP);
+		else if (ip_proto == IPPROTO_SCTP)
+			req->features |= BIT_ULL(NPC_SPORT_SCTP);
+	}
+
+	return otx2_tc_parse_actions(nic, &rule->action, req);
+}
+
+static int otx2_del_mcam_flow_entry(struct otx2_nic *nic, u16 entry)
+{
+	struct npc_delete_flow_req *req;
+	int err;
+
+	mutex_lock(&nic->mbox.lock);
+	req = otx2_mbox_alloc_msg_npc_delete_flow(&nic->mbox);
+	if (!req) {
+		mutex_unlock(&nic->mbox.lock);
+		return -ENOMEM;
+	}
+
+	req->entry = entry;
+
+	/* Send message to AF */
+	err = otx2_sync_mbox_msg(&nic->mbox);
+	if (err) {
+		netdev_err(nic->netdev, "Failed to delete MCAM flow entry %d\n",
+			   entry);
+		mutex_unlock(&nic->mbox.lock);
+		return -EFAULT;
+	}
+	mutex_unlock(&nic->mbox.lock);
+
+	return 0;
+}
+
+static int otx2_tc_del_flow(struct otx2_nic *nic,
+			    struct flow_cls_offload *tc_flow_cmd)
+{
+	struct otx2_tc_info *tc_info = &nic->tc_info;
+	struct otx2_tc_flow *flow_node;
+
+	flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
+					   &tc_flow_cmd->cookie,
+					   tc_info->flow_ht_params);
+	if (!flow_node) {
+		netdev_err(nic->netdev, "tc flow not found for cookie 0x%lx\n",
+			   tc_flow_cmd->cookie);
+		return -EINVAL;
+	}
+
+	otx2_del_mcam_flow_entry(nic, flow_node->entry);
+
+	WARN_ON(rhashtable_remove_fast(&nic->tc_info.flow_table,
+				       &flow_node->node,
+				       nic->tc_info.flow_ht_params));
+	kfree_rcu(flow_node, rcu);
+
+	clear_bit(flow_node->bitpos, tc_info->tc_entries_bitmap);
+	tc_info->num_entries--;
+
+	return 0;
+}
+
+static int otx2_tc_add_flow(struct otx2_nic *nic,
+			    struct flow_cls_offload *tc_flow_cmd)
+{
+	struct otx2_tc_info *tc_info = &nic->tc_info;
+	struct otx2_tc_flow *new_node, *old_node;
+	struct npc_install_flow_req *req;
+	int rc;
+
+	if (!(nic->flags & OTX2_FLAG_TC_FLOWER_SUPPORT))
+		return -ENOMEM;
+
+	/* allocate memory for the new flow and it's node */
+	new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
+	if (!new_node)
+		return -ENOMEM;
+	new_node->cookie = tc_flow_cmd->cookie;
+
+	mutex_lock(&nic->mbox.lock);
+	req = otx2_mbox_alloc_msg_npc_install_flow(&nic->mbox);
+	if (!req) {
+		mutex_unlock(&nic->mbox.lock);
+		return -ENOMEM;
+	}
+
+	rc = otx2_tc_prepare_flow(nic, tc_flow_cmd, req);
+	if (rc) {
+		otx2_mbox_reset(&nic->mbox.mbox, 0);
+		mutex_unlock(&nic->mbox.lock);
+		return rc;
+	}
+
+	/* If a flow exists with the same cookie, delete it */
+	old_node = rhashtable_lookup_fast(&tc_info->flow_table,
+					  &tc_flow_cmd->cookie,
+					  tc_info->flow_ht_params);
+	if (old_node)
+		otx2_tc_del_flow(nic, tc_flow_cmd);
+
+	if (bitmap_full(tc_info->tc_entries_bitmap, nic->flow_cfg->tc_max_flows)) {
+		netdev_err(nic->netdev, "Not enough MCAM space to add the flow\n");
+		otx2_mbox_reset(&nic->mbox.mbox, 0);
+		mutex_unlock(&nic->mbox.lock);
+		return -ENOMEM;
+	}
+
+	new_node->bitpos = find_first_zero_bit(tc_info->tc_entries_bitmap,
+					       nic->flow_cfg->tc_max_flows);
+	req->channel = nic->hw.rx_chan_base;
+	req->entry = nic->flow_cfg->entry[nic->flow_cfg->tc_flower_offset +
+					  nic->flow_cfg->tc_max_flows - new_node->bitpos];
+	req->intf = NIX_INTF_RX;
+	req->set_cntr = 1;
+	new_node->entry = req->entry;
+
+	/* Send message to AF */
+	rc = otx2_sync_mbox_msg(&nic->mbox);
+	if (rc) {
+		netdev_err(nic->netdev, "Failed to install MCAM flow entry\n");
+		mutex_unlock(&nic->mbox.lock);
+		goto out;
+	}
+	mutex_unlock(&nic->mbox.lock);
+
+	/* add new flow to flow-table */
+	rc = rhashtable_insert_fast(&nic->tc_info.flow_table, &new_node->node,
+				    nic->tc_info.flow_ht_params);
+	if (rc) {
+		otx2_del_mcam_flow_entry(nic, req->entry);
+		kfree_rcu(new_node, rcu);
+		goto out;
+	}
+
+	set_bit(new_node->bitpos, tc_info->tc_entries_bitmap);
+	tc_info->num_entries++;
+out:
+	return rc;
+}
+
+static int otx2_setup_tc_cls_flower(struct otx2_nic *nic,
+				    struct flow_cls_offload *cls_flower)
+{
+	switch (cls_flower->command) {
+	case FLOW_CLS_REPLACE:
+		return otx2_tc_add_flow(nic, cls_flower);
+	case FLOW_CLS_DESTROY:
+		return otx2_tc_del_flow(nic, cls_flower);
+	case FLOW_CLS_STATS:
+		return -EOPNOTSUPP;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int otx2_setup_tc_block_ingress_cb(enum tc_setup_type type,
+					  void *type_data, void *cb_priv)
+{
+	struct otx2_nic *nic = cb_priv;
+
+	if (!tc_cls_can_offload_and_chain0(nic->netdev, type_data))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return otx2_setup_tc_cls_flower(nic, type_data);
+	default:
+		break;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static LIST_HEAD(otx2_block_cb_list);
+
+static int otx2_setup_tc_block(struct net_device *netdev,
+			       struct flow_block_offload *f)
+{
+	struct otx2_nic *nic = netdev_priv(netdev);
+
+	if (f->block_shared)
+		return -EOPNOTSUPP;
+
+	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+		return -EOPNOTSUPP;
+
+	return flow_block_cb_setup_simple(f, &otx2_block_cb_list,
+					  otx2_setup_tc_block_ingress_cb,
+					  nic, nic, true);
+}
+
+int otx2_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+		  void *type_data)
+{
+	switch (type) {
+	case TC_SETUP_BLOCK:
+		return otx2_setup_tc_block(netdev, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static const struct rhashtable_params tc_flow_ht_params = {
+	.head_offset = offsetof(struct otx2_tc_flow, node),
+	.key_offset = offsetof(struct otx2_tc_flow, cookie),
+	.key_len = sizeof(((struct otx2_tc_flow *)0)->cookie),
+	.automatic_shrinking = true,
+};
+
+int otx2_init_tc(struct otx2_nic *nic)
+{
+	struct otx2_tc_info *tc = &nic->tc_info;
+
+	tc->flow_ht_params = tc_flow_ht_params;
+	return rhashtable_init(&tc->flow_table, &tc->flow_ht_params);
+}
+
+void otx2_shutdown_tc(struct otx2_nic *nic)
+{
+	struct otx2_tc_info *tc = &nic->tc_info;
+
+	rhashtable_destroy(&tc->flow_table);
+}
-- 
cgit v1.2.3


From d8ce30e0cf76afe2dd1acfd3f64f66a8afdfa539 Mon Sep 17 00:00:00 2001
From: Naveen Mamindlapalli <naveenm@marvell.com>
Date: Thu, 18 Mar 2021 15:32:14 +0530
Subject: octeontx2-pf: add tc flower stats handler for hw offloads

Add support to get the stats for tc flower flows that are
offloaded to hardware. To support this feature, added a
new AF mbox handler which returns the MCAM entry stats
for a flow that has hardware stat counter enabled.

Signed-off-by: Naveen Mamindlapalli <naveenm@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/mbox.h   | 14 +++++
 .../net/ethernet/marvell/octeontx2/af/rvu_npc.c    | 39 ++++++++++++
 .../net/ethernet/marvell/octeontx2/nic/otx2_tc.c   | 70 +++++++++++++++++++++-
 3 files changed, 122 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index 46f919625464..3af4d0ffcf7c 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -216,6 +216,9 @@ M(NPC_MCAM_READ_ENTRY,	  0x600f, npc_mcam_read_entry,			\
 				  npc_mcam_read_entry_rsp)		\
 M(NPC_MCAM_READ_BASE_RULE, 0x6011, npc_read_base_steer_rule,            \
 				   msg_req, npc_mcam_read_base_rule_rsp)  \
+M(NPC_MCAM_GET_STATS, 0x6012, npc_mcam_entry_stats,                     \
+				   npc_mcam_get_stats_req,              \
+				   npc_mcam_get_stats_rsp)              \
 /* NIX mbox IDs (range 0x8000 - 0xFFFF) */				\
 M(NIX_LF_ALLOC,		0x8000, nix_lf_alloc,				\
 				 nix_lf_alloc_req, nix_lf_alloc_rsp)	\
@@ -1195,6 +1198,17 @@ struct npc_mcam_read_base_rule_rsp {
 	struct mcam_entry entry;
 };
 
+struct npc_mcam_get_stats_req {
+	struct mbox_msghdr hdr;
+	u16 entry; /* mcam entry */
+};
+
+struct npc_mcam_get_stats_rsp {
+	struct mbox_msghdr hdr;
+	u64 stat;  /* counter stats */
+	u8 stat_ena; /* enabled */
+};
+
 enum ptp_op {
 	PTP_OP_ADJFINE = 0,
 	PTP_OP_GET_CLOCK = 1,
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 6cce7ecad007..03248d280e47 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -2806,3 +2806,42 @@ read_entry:
 out:
 	return rc;
 }
+
+int rvu_mbox_handler_npc_mcam_entry_stats(struct rvu *rvu,
+					  struct npc_mcam_get_stats_req *req,
+					  struct npc_mcam_get_stats_rsp *rsp)
+{
+	struct npc_mcam *mcam = &rvu->hw->mcam;
+	u16 index, cntr;
+	int blkaddr;
+	u64 regval;
+	u32 bank;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+	if (blkaddr < 0)
+		return NPC_MCAM_INVALID_REQ;
+
+	mutex_lock(&mcam->lock);
+
+	index = req->entry & (mcam->banksize - 1);
+	bank = npc_get_bank(mcam, req->entry);
+
+	/* read MCAM entry STAT_ACT register */
+	regval = rvu_read64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_STAT_ACT(index, bank));
+
+	if (!(regval & BIT_ULL(9))) {
+		rsp->stat_ena = 0;
+		mutex_unlock(&mcam->lock);
+		return 0;
+	}
+
+	cntr = regval & 0x1FF;
+
+	rsp->stat_ena = 1;
+	rsp->stat = rvu_read64(rvu, blkaddr, NPC_AF_MATCH_STATX(cntr));
+	rsp->stat &= BIT_ULL(48) - 1;
+
+	mutex_unlock(&mcam->lock);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
index a361eec568a2..43ef6303ed84 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
@@ -16,12 +16,20 @@
 
 #include "otx2_common.h"
 
+struct otx2_tc_flow_stats {
+	u64 bytes;
+	u64 pkts;
+	u64 used;
+};
+
 struct otx2_tc_flow {
 	struct rhash_head		node;
 	unsigned long			cookie;
 	u16				entry;
 	unsigned int			bitpos;
 	struct rcu_head			rcu;
+	struct otx2_tc_flow_stats	stats;
+	spinlock_t			lock; /* lock for stats */
 };
 
 static int otx2_tc_parse_actions(struct otx2_nic *nic,
@@ -403,6 +411,66 @@ out:
 	return rc;
 }
 
+static int otx2_tc_get_flow_stats(struct otx2_nic *nic,
+				  struct flow_cls_offload *tc_flow_cmd)
+{
+	struct otx2_tc_info *tc_info = &nic->tc_info;
+	struct npc_mcam_get_stats_req *req;
+	struct npc_mcam_get_stats_rsp *rsp;
+	struct otx2_tc_flow_stats *stats;
+	struct otx2_tc_flow *flow_node;
+	int err;
+
+	flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
+					   &tc_flow_cmd->cookie,
+					   tc_info->flow_ht_params);
+	if (!flow_node) {
+		netdev_info(nic->netdev, "tc flow not found for cookie %lx",
+			    tc_flow_cmd->cookie);
+		return -EINVAL;
+	}
+
+	mutex_lock(&nic->mbox.lock);
+
+	req = otx2_mbox_alloc_msg_npc_mcam_entry_stats(&nic->mbox);
+	if (!req) {
+		mutex_unlock(&nic->mbox.lock);
+		return -ENOMEM;
+	}
+
+	req->entry = flow_node->entry;
+
+	err = otx2_sync_mbox_msg(&nic->mbox);
+	if (err) {
+		netdev_err(nic->netdev, "Failed to get stats for MCAM flow entry %d\n",
+			   req->entry);
+		mutex_unlock(&nic->mbox.lock);
+		return -EFAULT;
+	}
+
+	rsp = (struct npc_mcam_get_stats_rsp *)otx2_mbox_get_rsp
+		(&nic->mbox.mbox, 0, &req->hdr);
+	if (IS_ERR(rsp)) {
+		mutex_unlock(&nic->mbox.lock);
+		return PTR_ERR(rsp);
+	}
+
+	mutex_unlock(&nic->mbox.lock);
+
+	if (!rsp->stat_ena)
+		return -EINVAL;
+
+	stats = &flow_node->stats;
+
+	spin_lock(&flow_node->lock);
+	flow_stats_update(&tc_flow_cmd->stats, 0x0, rsp->stat - stats->pkts, 0x0, 0x0,
+			  FLOW_ACTION_HW_STATS_IMMEDIATE);
+	stats->pkts = rsp->stat;
+	spin_unlock(&flow_node->lock);
+
+	return 0;
+}
+
 static int otx2_setup_tc_cls_flower(struct otx2_nic *nic,
 				    struct flow_cls_offload *cls_flower)
 {
@@ -412,7 +480,7 @@ static int otx2_setup_tc_cls_flower(struct otx2_nic *nic,
 	case FLOW_CLS_DESTROY:
 		return otx2_tc_del_flow(nic, cls_flower);
 	case FLOW_CLS_STATS:
-		return -EOPNOTSUPP;
+		return otx2_tc_get_flow_stats(nic, cls_flower);
 	default:
 		return -EOPNOTSUPP;
 	}
-- 
cgit v1.2.3


From e638a83f167ee9c4abd3faa3be460afe5a688490 Mon Sep 17 00:00:00 2001
From: Sunil Goutham <sgoutham@marvell.com>
Date: Thu, 18 Mar 2021 15:32:15 +0530
Subject: octeontx2-pf: TC_MATCHALL egress ratelimiting offload

Add TC_MATCHALL egress ratelimiting offload support with POLICE
action for entire traffic going out of the interface.

Eg: To ratelimit egress traffic to 100Mbps

$ ethtool -K eth0 hw-tc-offload on
$ tc qdisc add dev eth0 clsact
$ tc filter add dev eth0 egress matchall skip_sw \
                action police rate 100Mbit burst 16Kbit

HW supports a max burst size of ~128KB.
Only one ratelimiting filter can be installed at a time.

Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
Signed-off-by: Naveen Mamindlapalli <naveenm@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/marvell/octeontx2/nic/otx2_common.h   |   1 +
 .../net/ethernet/marvell/octeontx2/nic/otx2_reg.h  |   1 +
 .../net/ethernet/marvell/octeontx2/nic/otx2_tc.c   | 238 ++++++++++++++++++++-
 3 files changed, 236 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index 992aa93178e1..45730d0d92f2 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -318,6 +318,7 @@ struct otx2_nic {
 #define OTX2_FLAG_RX_PAUSE_ENABLED		BIT_ULL(9)
 #define OTX2_FLAG_TX_PAUSE_ENABLED		BIT_ULL(10)
 #define OTX2_FLAG_TC_FLOWER_SUPPORT		BIT_ULL(11)
+#define OTX2_FLAG_TC_MATCHALL_EGRESS_ENABLED	BIT_ULL(12)
 	u64			flags;
 
 	struct otx2_qset	qset;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h
index 21b811c6ee0f..f4fd72ee9a25 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h
@@ -152,6 +152,7 @@
 #define NIX_AF_TL3X_SCHEDULE(a)		(0x1000 | (a) << 16)
 #define NIX_AF_TL4X_PARENT(a)		(0x1288 | (a) << 16)
 #define NIX_AF_TL4X_SCHEDULE(a)		(0x1200 | (a) << 16)
+#define NIX_AF_TL4X_PIR(a)		(0x1230 | (a) << 16)
 #define NIX_AF_MDQX_SCHEDULE(a)		(0x1400 | (a) << 16)
 #define NIX_AF_MDQX_PARENT(a)		(0x1480 | (a) << 16)
 #define NIX_AF_TL3_TL2X_LINKX_CFG(a, b)	(0x1700 | (a) << 16 | (b) << 3)
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
index 43ef6303ed84..2f75cfc5a23a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
@@ -7,6 +7,7 @@
 #include <linux/etherdevice.h>
 #include <linux/inetdevice.h>
 #include <linux/rhashtable.h>
+#include <linux/bitfield.h>
 #include <net/flow_dissector.h>
 #include <net/pkt_cls.h>
 #include <net/tc_act/tc_gact.h>
@@ -16,6 +17,21 @@
 
 #include "otx2_common.h"
 
+/* Egress rate limiting definitions */
+#define MAX_BURST_EXPONENT		0x0FULL
+#define MAX_BURST_MANTISSA		0xFFULL
+#define MAX_BURST_SIZE			130816ULL
+#define MAX_RATE_DIVIDER_EXPONENT	12ULL
+#define MAX_RATE_EXPONENT		0x0FULL
+#define MAX_RATE_MANTISSA		0xFFULL
+
+/* Bitfields in NIX_TLX_PIR register */
+#define TLX_RATE_MANTISSA		GENMASK_ULL(8, 1)
+#define TLX_RATE_EXPONENT		GENMASK_ULL(12, 9)
+#define TLX_RATE_DIVIDER_EXPONENT	GENMASK_ULL(16, 13)
+#define TLX_BURST_MANTISSA		GENMASK_ULL(36, 29)
+#define TLX_BURST_EXPONENT		GENMASK_ULL(40, 37)
+
 struct otx2_tc_flow_stats {
 	u64 bytes;
 	u64 pkts;
@@ -32,6 +48,178 @@ struct otx2_tc_flow {
 	spinlock_t			lock; /* lock for stats */
 };
 
+static void otx2_get_egress_burst_cfg(u32 burst, u32 *burst_exp,
+				      u32 *burst_mantissa)
+{
+	unsigned int tmp;
+
+	/* Burst is calculated as
+	 * ((256 + BURST_MANTISSA) << (1 + BURST_EXPONENT)) / 256
+	 * Max supported burst size is 130,816 bytes.
+	 */
+	burst = min_t(u32, burst, MAX_BURST_SIZE);
+	if (burst) {
+		*burst_exp = ilog2(burst) ? ilog2(burst) - 1 : 0;
+		tmp = burst - rounddown_pow_of_two(burst);
+		if (burst < MAX_BURST_MANTISSA)
+			*burst_mantissa = tmp * 2;
+		else
+			*burst_mantissa = tmp / (1ULL << (*burst_exp - 7));
+	} else {
+		*burst_exp = MAX_BURST_EXPONENT;
+		*burst_mantissa = MAX_BURST_MANTISSA;
+	}
+}
+
+static void otx2_get_egress_rate_cfg(u32 maxrate, u32 *exp,
+				     u32 *mantissa, u32 *div_exp)
+{
+	unsigned int tmp;
+
+	/* Rate calculation by hardware
+	 *
+	 * PIR_ADD = ((256 + mantissa) << exp) / 256
+	 * rate = (2 * PIR_ADD) / ( 1 << div_exp)
+	 * The resultant rate is in Mbps.
+	 */
+
+	/* 2Mbps to 100Gbps can be expressed with div_exp = 0.
+	 * Setting this to '0' will ease the calculation of
+	 * exponent and mantissa.
+	 */
+	*div_exp = 0;
+
+	if (maxrate) {
+		*exp = ilog2(maxrate) ? ilog2(maxrate) - 1 : 0;
+		tmp = maxrate - rounddown_pow_of_two(maxrate);
+		if (maxrate < MAX_RATE_MANTISSA)
+			*mantissa = tmp * 2;
+		else
+			*mantissa = tmp / (1ULL << (*exp - 7));
+	} else {
+		/* Instead of disabling rate limiting, set all values to max */
+		*exp = MAX_RATE_EXPONENT;
+		*mantissa = MAX_RATE_MANTISSA;
+	}
+}
+
+static int otx2_set_matchall_egress_rate(struct otx2_nic *nic, u32 burst, u32 maxrate)
+{
+	struct otx2_hw *hw = &nic->hw;
+	struct nix_txschq_config *req;
+	u32 burst_exp, burst_mantissa;
+	u32 exp, mantissa, div_exp;
+	int txschq, err;
+
+	/* All SQs share the same TL4, so pick the first scheduler */
+	txschq = hw->txschq_list[NIX_TXSCH_LVL_TL4][0];
+
+	/* Get exponent and mantissa values from the desired rate */
+	otx2_get_egress_burst_cfg(burst, &burst_exp, &burst_mantissa);
+	otx2_get_egress_rate_cfg(maxrate, &exp, &mantissa, &div_exp);
+
+	mutex_lock(&nic->mbox.lock);
+	req = otx2_mbox_alloc_msg_nix_txschq_cfg(&nic->mbox);
+	if (!req) {
+		mutex_unlock(&nic->mbox.lock);
+		return -ENOMEM;
+	}
+
+	req->lvl = NIX_TXSCH_LVL_TL4;
+	req->num_regs = 1;
+	req->reg[0] = NIX_AF_TL4X_PIR(txschq);
+	req->regval[0] = FIELD_PREP(TLX_BURST_EXPONENT, burst_exp) |
+			 FIELD_PREP(TLX_BURST_MANTISSA, burst_mantissa) |
+			 FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) |
+			 FIELD_PREP(TLX_RATE_EXPONENT, exp) |
+			 FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0);
+
+	err = otx2_sync_mbox_msg(&nic->mbox);
+	mutex_unlock(&nic->mbox.lock);
+	return err;
+}
+
+static int otx2_tc_validate_flow(struct otx2_nic *nic,
+				 struct flow_action *actions,
+				 struct netlink_ext_ack *extack)
+{
+	if (nic->flags & OTX2_FLAG_INTF_DOWN) {
+		NL_SET_ERR_MSG_MOD(extack, "Interface not initialized");
+		return -EINVAL;
+	}
+
+	if (!flow_action_has_entries(actions)) {
+		NL_SET_ERR_MSG_MOD(extack, "MATCHALL offload called with no action");
+		return -EINVAL;
+	}
+
+	if (!flow_offload_has_one_action(actions)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Egress MATCHALL offload supports only 1 policing action");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int otx2_tc_egress_matchall_install(struct otx2_nic *nic,
+					   struct tc_cls_matchall_offload *cls)
+{
+	struct netlink_ext_ack *extack = cls->common.extack;
+	struct flow_action *actions = &cls->rule->action;
+	struct flow_action_entry *entry;
+	u32 rate;
+	int err;
+
+	err = otx2_tc_validate_flow(nic, actions, extack);
+	if (err)
+		return err;
+
+	if (nic->flags & OTX2_FLAG_TC_MATCHALL_EGRESS_ENABLED) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Only one Egress MATCHALL ratelimitter can be offloaded");
+		return -ENOMEM;
+	}
+
+	entry = &cls->rule->action.entries[0];
+	switch (entry->id) {
+	case FLOW_ACTION_POLICE:
+		if (entry->police.rate_pkt_ps) {
+			NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second");
+			return -EOPNOTSUPP;
+		}
+		/* Convert bytes per second to Mbps */
+		rate = entry->police.rate_bytes_ps * 8;
+		rate = max_t(u32, rate / 1000000, 1);
+		err = otx2_set_matchall_egress_rate(nic, entry->police.burst, rate);
+		if (err)
+			return err;
+		nic->flags |= OTX2_FLAG_TC_MATCHALL_EGRESS_ENABLED;
+		break;
+	default:
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Only police action is supported with Egress MATCHALL offload");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int otx2_tc_egress_matchall_delete(struct otx2_nic *nic,
+					  struct tc_cls_matchall_offload *cls)
+{
+	struct netlink_ext_ack *extack = cls->common.extack;
+	int err;
+
+	if (nic->flags & OTX2_FLAG_INTF_DOWN) {
+		NL_SET_ERR_MSG_MOD(extack, "Interface not initialized");
+		return -EINVAL;
+	}
+
+	err = otx2_set_matchall_egress_rate(nic, 0, 0);
+	nic->flags &= ~OTX2_FLAG_TC_MATCHALL_EGRESS_ENABLED;
+	return err;
+}
+
 static int otx2_tc_parse_actions(struct otx2_nic *nic,
 				 struct flow_action *flow_action,
 				 struct npc_install_flow_req *req)
@@ -504,22 +692,64 @@ static int otx2_setup_tc_block_ingress_cb(enum tc_setup_type type,
 	return -EOPNOTSUPP;
 }
 
+static int otx2_setup_tc_egress_matchall(struct otx2_nic *nic,
+					 struct tc_cls_matchall_offload *cls_matchall)
+{
+	switch (cls_matchall->command) {
+	case TC_CLSMATCHALL_REPLACE:
+		return otx2_tc_egress_matchall_install(nic, cls_matchall);
+	case TC_CLSMATCHALL_DESTROY:
+		return otx2_tc_egress_matchall_delete(nic, cls_matchall);
+	case TC_CLSMATCHALL_STATS:
+	default:
+		break;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int otx2_setup_tc_block_egress_cb(enum tc_setup_type type,
+					 void *type_data, void *cb_priv)
+{
+	struct otx2_nic *nic = cb_priv;
+
+	if (!tc_cls_can_offload_and_chain0(nic->netdev, type_data))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case TC_SETUP_CLSMATCHALL:
+		return otx2_setup_tc_egress_matchall(nic, type_data);
+	default:
+		break;
+	}
+
+	return -EOPNOTSUPP;
+}
+
 static LIST_HEAD(otx2_block_cb_list);
 
 static int otx2_setup_tc_block(struct net_device *netdev,
 			       struct flow_block_offload *f)
 {
 	struct otx2_nic *nic = netdev_priv(netdev);
+	flow_setup_cb_t *cb;
+	bool ingress;
 
 	if (f->block_shared)
 		return -EOPNOTSUPP;
 
-	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+	if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) {
+		cb = otx2_setup_tc_block_ingress_cb;
+		ingress = true;
+	} else if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) {
+		cb = otx2_setup_tc_block_egress_cb;
+		ingress = false;
+	} else {
 		return -EOPNOTSUPP;
+	}
 
-	return flow_block_cb_setup_simple(f, &otx2_block_cb_list,
-					  otx2_setup_tc_block_ingress_cb,
-					  nic, nic, true);
+	return flow_block_cb_setup_simple(f, &otx2_block_cb_list, cb,
+					  nic, nic, ingress);
 }
 
 int otx2_setup_tc(struct net_device *netdev, enum tc_setup_type type,
-- 
cgit v1.2.3


From bd0f670e793137fac1f8cc95f1feac99cc081cba Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Fri, 19 Mar 2021 01:22:03 +0800
Subject: net: stmmac: restructure tc implementation for RX VLAN Priority
 steering

The current tc_add_flow() and tc_del_flow() use hardware L3 & L4 filters
as offloading. The number of L3/L4 filters is read from L3L4FNUM field
from MAC_HW_Feature1 register and is used to alloc priv->tc_entries[].

For RX frame steering based on VLAN priority offloading, we use
MAC_RXQ_CTRL2 & MAC_RXQ_CTRL3 registers and all VLAN priority level
can be configured independent from L3 & L4 filters.

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
index 44bb133c3000..f4d8d7980ec5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
@@ -598,6 +598,26 @@ static int tc_del_flow(struct stmmac_priv *priv,
 	return ret;
 }
 
+static int tc_add_flow_cls(struct stmmac_priv *priv,
+			   struct flow_cls_offload *cls)
+{
+	int ret;
+
+	ret = tc_add_flow(priv, cls);
+
+	return ret;
+}
+
+static int tc_del_flow_cls(struct stmmac_priv *priv,
+			   struct flow_cls_offload *cls)
+{
+	int ret;
+
+	ret = tc_del_flow(priv, cls);
+
+	return ret;
+}
+
 static int tc_setup_cls(struct stmmac_priv *priv,
 			struct flow_cls_offload *cls)
 {
@@ -609,10 +629,10 @@ static int tc_setup_cls(struct stmmac_priv *priv,
 
 	switch (cls->command) {
 	case FLOW_CLS_REPLACE:
-		ret = tc_add_flow(priv, cls);
+		ret = tc_add_flow_cls(priv, cls);
 		break;
 	case FLOW_CLS_DESTROY:
-		ret = tc_del_flow(priv, cls);
+		ret = tc_del_flow_cls(priv, cls);
 		break;
 	default:
 		return -EOPNOTSUPP;
-- 
cgit v1.2.3


From 0e039f5cf86ce2fcb62077a163e7ff3d7b7b7cf3 Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Fri, 19 Mar 2021 01:22:04 +0800
Subject: net: stmmac: add RX frame steering based on VLAN priority in tc
 flower

We extend tc flower to support configuration of VLAN priority-based RX
frame steering hardware offloading.

To map VLAN <PCP> to Traffic Class <TC>:
  $ tc filter add dev <IFNAME> parent ffff: protocol 802.1Q flower \
       vlan_prio <PCP> hw_tc <TC>

  Note: <TC> < N whereby "tc qdisc ... num_tc N ..."

To delete all tc flower configurations:
  $ tc qdisc delete dev <IFNAME> ingress

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 65 ++++++++++++++++++++++++-
 1 file changed, 63 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
index f4d8d7980ec5..b80cb2985b39 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
@@ -598,14 +598,73 @@ static int tc_del_flow(struct stmmac_priv *priv,
 	return ret;
 }
 
+#define VLAN_PRIO_FULL_MASK (0x07)
+
+static int tc_add_vlan_flow(struct stmmac_priv *priv,
+			    struct flow_cls_offload *cls)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+	struct flow_dissector *dissector = rule->match.dissector;
+	int tc = tc_classid_to_hwtc(priv->dev, cls->classid);
+	struct flow_match_vlan match;
+
+	/* Nothing to do here */
+	if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_VLAN))
+		return -EINVAL;
+
+	if (tc < 0) {
+		netdev_err(priv->dev, "Invalid traffic class\n");
+		return -EINVAL;
+	}
+
+	flow_rule_match_vlan(rule, &match);
+
+	if (match.mask->vlan_priority) {
+		u32 prio;
+
+		if (match.mask->vlan_priority != VLAN_PRIO_FULL_MASK) {
+			netdev_err(priv->dev, "Only full mask is supported for VLAN priority");
+			return -EINVAL;
+		}
+
+		prio = BIT(match.key->vlan_priority);
+		stmmac_rx_queue_prio(priv, priv->hw, prio, tc);
+	}
+
+	return 0;
+}
+
+static int tc_del_vlan_flow(struct stmmac_priv *priv,
+			    struct flow_cls_offload *cls)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+	struct flow_dissector *dissector = rule->match.dissector;
+	int tc = tc_classid_to_hwtc(priv->dev, cls->classid);
+
+	/* Nothing to do here */
+	if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_VLAN))
+		return -EINVAL;
+
+	if (tc < 0) {
+		netdev_err(priv->dev, "Invalid traffic class\n");
+		return -EINVAL;
+	}
+
+	stmmac_rx_queue_prio(priv, priv->hw, 0, tc);
+
+	return 0;
+}
+
 static int tc_add_flow_cls(struct stmmac_priv *priv,
 			   struct flow_cls_offload *cls)
 {
 	int ret;
 
 	ret = tc_add_flow(priv, cls);
+	if (!ret)
+		return ret;
 
-	return ret;
+	return tc_add_vlan_flow(priv, cls);
 }
 
 static int tc_del_flow_cls(struct stmmac_priv *priv,
@@ -614,8 +673,10 @@ static int tc_del_flow_cls(struct stmmac_priv *priv,
 	int ret;
 
 	ret = tc_del_flow(priv, cls);
+	if (!ret)
+		return ret;
 
-	return ret;
+	return tc_del_vlan_flow(priv, cls);
 }
 
 static int tc_setup_cls(struct stmmac_priv *priv,
-- 
cgit v1.2.3


From e49aa315cb01828e7aec0710f3b954e80ba70dd8 Mon Sep 17 00:00:00 2001
From: Voon Weifeng <weifeng.voon@intel.com>
Date: Thu, 18 Mar 2021 08:50:52 +0800
Subject: net: stmmac: EST interrupts handling and error reporting

Enabled EST related interrupts as below:
1) Constant Gate Control Error (CGCE)
2) Head-of-Line Blocking due to Scheduling (HLBS)
3) Head-of-Line Blocking due to Frame Size (HLBF).
4) Base Time Register error (BTRE)
5) Switch to S/W owned list Complete (SWLC)

For HLBS, the user will get the info of all the queues that shows this
error. For HLBF, the user will get the info of all the queue with the
latest frame size which causes the error. Frame size 0 indicates no
error.

The ISR handling takes place when EST feature is enabled by user.

Signed-off-by: Voon Weifeng <weifeng.voon@intel.com>
Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Co-developed-by: Mohammad Athari Bin Ismail <mohammad.athari.ismail@intel.com>
Signed-off-by: Mohammad Athari Bin Ismail <mohammad.athari.ismail@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac5.c      | 75 +++++++++++++++++++++++
 drivers/net/ethernet/stmicro/stmmac/dwmac5.h      | 32 ++++++++++
 drivers/net/ethernet/stmicro/stmmac/hwif.h        |  4 ++
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |  3 +
 4 files changed, 114 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
index 8f7ac24545ef..809015f59ee2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
@@ -595,9 +595,84 @@ int dwmac5_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg,
 		ctrl &= ~EEST;
 
 	writel(ctrl, ioaddr + MTL_EST_CONTROL);
+
+	/* Configure EST interrupt */
+	if (cfg->enable)
+		ctrl = (IECGCE | IEHS | IEHF | IEBE | IECC);
+	else
+		ctrl = 0;
+
+	writel(ctrl, ioaddr + MTL_EST_INT_EN);
+
 	return 0;
 }
 
+void dwmac5_est_irq_status(void __iomem *ioaddr, struct net_device *dev,
+			  u32 txqcnt)
+{
+	u32 status, value, feqn, hbfq, hbfs, btrl;
+	u32 txqcnt_mask = (1 << txqcnt) - 1;
+
+	status = readl(ioaddr + MTL_EST_STATUS);
+
+	value = (CGCE | HLBS | HLBF | BTRE | SWLC);
+
+	/* Return if there is no error */
+	if (!(status & value))
+		return;
+
+	if (status & CGCE) {
+		/* Clear Interrupt */
+		writel(CGCE, ioaddr + MTL_EST_STATUS);
+	}
+
+	if (status & HLBS) {
+		value = readl(ioaddr + MTL_EST_SCH_ERR);
+		value &= txqcnt_mask;
+
+		/* Clear Interrupt */
+		writel(value, ioaddr + MTL_EST_SCH_ERR);
+
+		/* Collecting info to shows all the queues that has HLBS
+		 * issue. The only way to clear this is to clear the
+		 * statistic
+		 */
+		if (net_ratelimit())
+			netdev_err(dev, "EST: HLB(sched) Queue 0x%x\n", value);
+	}
+
+	if (status & HLBF) {
+		value = readl(ioaddr + MTL_EST_FRM_SZ_ERR);
+		feqn = value & txqcnt_mask;
+
+		value = readl(ioaddr + MTL_EST_FRM_SZ_CAP);
+		hbfq = (value & SZ_CAP_HBFQ_MASK(txqcnt)) >> SZ_CAP_HBFQ_SHIFT;
+		hbfs = value & SZ_CAP_HBFS_MASK;
+
+		/* Clear Interrupt */
+		writel(feqn, ioaddr + MTL_EST_FRM_SZ_ERR);
+
+		if (net_ratelimit())
+			netdev_err(dev, "EST: HLB(size) Queue %u Size %u\n",
+				   hbfq, hbfs);
+	}
+
+	if (status & BTRE) {
+		btrl = (status & BTRL) >> BTRL_SHIFT;
+
+		if (net_ratelimit())
+			netdev_info(dev, "EST: BTR Error Loop Count %u\n",
+				    btrl);
+
+		writel(BTRE, ioaddr + MTL_EST_STATUS);
+	}
+
+	if (status & SWLC) {
+		writel(SWLC, ioaddr + MTL_EST_STATUS);
+		netdev_info(dev, "EST: SWOL has been switched\n");
+	}
+}
+
 void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq,
 			  bool enable)
 {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
index 56b0762c1276..7174f5e1501b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
@@ -38,6 +38,36 @@
 #define PTOV_SHIFT			24
 #define SSWL				BIT(1)
 #define EEST				BIT(0)
+
+#define MTL_EST_STATUS			0x00000c58
+#define BTRL				GENMASK(11, 8)
+#define BTRL_SHIFT			8
+#define BTRL_MAX			(0xF << BTRL_SHIFT)
+#define SWOL				BIT(7)
+#define SWOL_SHIFT			7
+#define CGCE				BIT(4)
+#define HLBS				BIT(3)
+#define HLBF				BIT(2)
+#define BTRE				BIT(1)
+#define SWLC				BIT(0)
+
+#define MTL_EST_SCH_ERR			0x00000c60
+#define MTL_EST_FRM_SZ_ERR		0x00000c64
+#define MTL_EST_FRM_SZ_CAP		0x00000c68
+#define SZ_CAP_HBFS_MASK		GENMASK(14, 0)
+#define SZ_CAP_HBFQ_SHIFT		16
+#define SZ_CAP_HBFQ_MASK(_val)		({ typeof(_val) (val) = (_val);	\
+					((val) > 4 ? GENMASK(18, 16) :	\
+					 (val) > 2 ? GENMASK(17, 16) :	\
+					 BIT(16)); })
+
+#define MTL_EST_INT_EN			0x00000c70
+#define IECGCE				CGCE
+#define IEHS				HLBS
+#define IEHF				HLBF
+#define IEBE				BTRE
+#define IECC				SWLC
+
 #define MTL_EST_GCL_CONTROL		0x00000c80
 #define BTR_LOW				0x0
 #define BTR_HIGH			0x1
@@ -111,6 +141,8 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index,
 			   u32 sub_second_inc, u32 systime_flags);
 int dwmac5_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg,
 			 unsigned int ptp_rate);
+void dwmac5_est_irq_status(void __iomem *ioaddr, struct net_device *dev,
+			   u32 txqcnt);
 void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq,
 			  bool enable);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index da9996a985f6..4bcdedf291a5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -393,6 +393,8 @@ struct stmmac_ops {
 	void (*set_arp_offload)(struct mac_device_info *hw, bool en, u32 addr);
 	int (*est_configure)(void __iomem *ioaddr, struct stmmac_est *cfg,
 			     unsigned int ptp_rate);
+	void (*est_irq_status)(void __iomem *ioaddr, struct net_device *dev,
+			       u32 txqcnt);
 	void (*fpe_configure)(void __iomem *ioaddr, u32 num_txq, u32 num_rxq,
 			      bool enable);
 };
@@ -491,6 +493,8 @@ struct stmmac_ops {
 	stmmac_do_void_callback(__priv, mac, set_arp_offload, __args)
 #define stmmac_est_configure(__priv, __args...) \
 	stmmac_do_callback(__priv, mac, est_configure, __args)
+#define stmmac_est_irq_status(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, est_irq_status, __args)
 #define stmmac_fpe_configure(__priv, __args...) \
 	stmmac_do_void_callback(__priv, mac, fpe_configure, __args)
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 1b34373a1918..0d74d1102aba 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -4314,6 +4314,9 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
 	if (stmmac_safety_feat_interrupt(priv))
 		return IRQ_HANDLED;
 
+	if (priv->dma_cap.estsel)
+		stmmac_est_irq_status(priv, priv->ioaddr, priv->dev, tx_cnt);
+
 	/* To handle GMAC own interrupts */
 	if ((priv->plat->has_gmac) || xmac) {
 		int status = stmmac_host_irq_status(priv, priv->hw, &priv->xstats);
-- 
cgit v1.2.3


From 9f298959191b0a3a8451ad308a68a9d697ea6819 Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Thu, 18 Mar 2021 08:50:53 +0800
Subject: net: stmmac: Add EST errors into ethtool statistic

Below EST errors are added into ethtool statistic:
1) Constant Gate Control Error (CGCE):
   The counter "mtl_est_cgce" increases everytime CGCE interrupt is
   triggered.

2) Head-of-Line Blocking due to Scheduling (HLBS):
   The counter "mtl_est_hlbs" increases everytime HLBS interrupt is
   triggered.

3) Head-of-Line Blocking due to Frame Size (HLBF):
   The counter "mtl_est_hlbf" increases everytime HLBF interrupt is
   triggered.

4) Base Time Register error (BTRE):
   The counter "mtl_est_btre" increases everytime BTRE interrupt is
   triggered but BTRL not reaches maximum value of 15.

5) Base Time Register Error Loop Count (BTRL) reaches maximum value:
   The counter "mtl_est_btrlm" increases everytime BTRE interrupt is
   triggered and BTRL value reaches maximum value of 15.

Please refer to MTL_EST_STATUS register in DesignWare Cores Ethernet
Quality-of-Service Databook for more detail explanation.

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: Voon Weifeng <weifeng.voon@intel.com>
Co-developed-by: Mohammad Athari Bin Ismail <mohammad.athari.ismail@intel.com>
Signed-off-by: Mohammad Athari Bin Ismail <mohammad.athari.ismail@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/common.h         |  6 ++++++
 drivers/net/ethernet/stmicro/stmmac/dwmac5.c         | 13 ++++++++++++-
 drivers/net/ethernet/stmicro/stmmac/dwmac5.h         |  2 +-
 drivers/net/ethernet/stmicro/stmmac/hwif.h           |  2 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c |  6 ++++++
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c    |  3 ++-
 6 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 6f271c46368d..1c0c60bdf854 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -182,6 +182,12 @@ struct stmmac_extra_stats {
 	/* TSO */
 	unsigned long tx_tso_frames;
 	unsigned long tx_tso_nfrags;
+	/* EST */
+	unsigned long mtl_est_cgce;
+	unsigned long mtl_est_hlbs;
+	unsigned long mtl_est_hlbf;
+	unsigned long mtl_est_btre;
+	unsigned long mtl_est_btrlm;
 };
 
 /* Safety Feature statistics exposed by ethtool */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
index 809015f59ee2..0ae85f8adf67 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
@@ -608,7 +608,7 @@ int dwmac5_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg,
 }
 
 void dwmac5_est_irq_status(void __iomem *ioaddr, struct net_device *dev,
-			  u32 txqcnt)
+			  struct stmmac_extra_stats *x, u32 txqcnt)
 {
 	u32 status, value, feqn, hbfq, hbfs, btrl;
 	u32 txqcnt_mask = (1 << txqcnt) - 1;
@@ -624,12 +624,16 @@ void dwmac5_est_irq_status(void __iomem *ioaddr, struct net_device *dev,
 	if (status & CGCE) {
 		/* Clear Interrupt */
 		writel(CGCE, ioaddr + MTL_EST_STATUS);
+
+		x->mtl_est_cgce++;
 	}
 
 	if (status & HLBS) {
 		value = readl(ioaddr + MTL_EST_SCH_ERR);
 		value &= txqcnt_mask;
 
+		x->mtl_est_hlbs++;
+
 		/* Clear Interrupt */
 		writel(value, ioaddr + MTL_EST_SCH_ERR);
 
@@ -649,6 +653,8 @@ void dwmac5_est_irq_status(void __iomem *ioaddr, struct net_device *dev,
 		hbfq = (value & SZ_CAP_HBFQ_MASK(txqcnt)) >> SZ_CAP_HBFQ_SHIFT;
 		hbfs = value & SZ_CAP_HBFS_MASK;
 
+		x->mtl_est_hlbf++;
+
 		/* Clear Interrupt */
 		writel(feqn, ioaddr + MTL_EST_FRM_SZ_ERR);
 
@@ -658,6 +664,11 @@ void dwmac5_est_irq_status(void __iomem *ioaddr, struct net_device *dev,
 	}
 
 	if (status & BTRE) {
+		if ((status & BTRL) == BTRL_MAX)
+			x->mtl_est_btrlm++;
+		else
+			x->mtl_est_btre++;
+
 		btrl = (status & BTRL) >> BTRL_SHIFT;
 
 		if (net_ratelimit())
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
index 7174f5e1501b..709bbfc9ae61 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
@@ -142,7 +142,7 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index,
 int dwmac5_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg,
 			 unsigned int ptp_rate);
 void dwmac5_est_irq_status(void __iomem *ioaddr, struct net_device *dev,
-			   u32 txqcnt);
+			   struct stmmac_extra_stats *x, u32 txqcnt);
 void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq,
 			  bool enable);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 4bcdedf291a5..692541c7b419 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -394,7 +394,7 @@ struct stmmac_ops {
 	int (*est_configure)(void __iomem *ioaddr, struct stmmac_est *cfg,
 			     unsigned int ptp_rate);
 	void (*est_irq_status)(void __iomem *ioaddr, struct net_device *dev,
-			       u32 txqcnt);
+			       struct stmmac_extra_stats *x, u32 txqcnt);
 	void (*fpe_configure)(void __iomem *ioaddr, u32 num_txq, u32 num_rxq,
 			      bool enable);
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index a78d5b0686bf..61b11639ee0c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -158,6 +158,12 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = {
 	/* TSO */
 	STMMAC_STAT(tx_tso_frames),
 	STMMAC_STAT(tx_tso_nfrags),
+	/* EST */
+	STMMAC_STAT(mtl_est_cgce),
+	STMMAC_STAT(mtl_est_hlbs),
+	STMMAC_STAT(mtl_est_hlbf),
+	STMMAC_STAT(mtl_est_btre),
+	STMMAC_STAT(mtl_est_btrlm),
 };
 #define STMMAC_STATS_LEN ARRAY_SIZE(stmmac_gstrings_stats)
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 0d74d1102aba..8d7015d3a537 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -4315,7 +4315,8 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
 		return IRQ_HANDLED;
 
 	if (priv->dma_cap.estsel)
-		stmmac_est_irq_status(priv, priv->ioaddr, priv->dev, tx_cnt);
+		stmmac_est_irq_status(priv, priv->ioaddr, priv->dev,
+				      &priv->xstats, tx_cnt);
 
 	/* To handle GMAC own interrupts */
 	if ((priv->plat->has_gmac) || xmac) {
-- 
cgit v1.2.3


From a5538a777b73b35750ed1ffff8c1ef539e861624 Mon Sep 17 00:00:00 2001
From: Álvaro Fernández Rojas <noltari@gmail.com>
Date: Wed, 17 Mar 2021 10:23:17 +0100
Subject: net: dsa: b53: mmap: Add device tree support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add device tree support to b53_mmap.c while keeping platform devices support.

Signed-off-by: Álvaro Fernández Rojas <noltari@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/b53/b53_mmap.c | 55 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/drivers/net/dsa/b53/b53_mmap.c b/drivers/net/dsa/b53/b53_mmap.c
index c628d0980c0b..82680e083cc2 100644
--- a/drivers/net/dsa/b53/b53_mmap.c
+++ b/drivers/net/dsa/b53/b53_mmap.c
@@ -16,6 +16,7 @@
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
+#include <linux/bits.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/io.h>
@@ -228,11 +229,65 @@ static const struct b53_io_ops b53_mmap_ops = {
 	.write64 = b53_mmap_write64,
 };
 
+static int b53_mmap_probe_of(struct platform_device *pdev,
+			     struct b53_platform_data **ppdata)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct device_node *of_ports, *of_port;
+	struct device *dev = &pdev->dev;
+	struct b53_platform_data *pdata;
+	void __iomem *mem;
+
+	mem = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(mem))
+		return PTR_ERR(mem);
+
+	pdata = devm_kzalloc(dev, sizeof(struct b53_platform_data),
+			     GFP_KERNEL);
+	if (!pdata)
+		return -ENOMEM;
+
+	pdata->regs = mem;
+	pdata->chip_id = BCM63XX_DEVICE_ID;
+	pdata->big_endian = of_property_read_bool(np, "big-endian");
+
+	of_ports = of_get_child_by_name(np, "ports");
+	if (!of_ports) {
+		dev_err(dev, "no ports child node found\n");
+		return -EINVAL;
+	}
+
+	for_each_available_child_of_node(of_ports, of_port) {
+		u32 reg;
+
+		if (of_property_read_u32(of_port, "reg", &reg))
+			continue;
+
+		if (reg < B53_CPU_PORT)
+			pdata->enabled_ports |= BIT(reg);
+	}
+
+	of_node_put(of_ports);
+	*ppdata = pdata;
+
+	return 0;
+}
+
 static int b53_mmap_probe(struct platform_device *pdev)
 {
+	struct device_node *np = pdev->dev.of_node;
 	struct b53_platform_data *pdata = pdev->dev.platform_data;
 	struct b53_mmap_priv *priv;
 	struct b53_device *dev;
+	int ret;
+
+	if (!pdata && np) {
+		ret = b53_mmap_probe_of(pdev, &pdata);
+		if (ret) {
+			dev_err(&pdev->dev, "OF probe error\n");
+			return ret;
+		}
+	}
 
 	if (!pdata)
 		return -EINVAL;
-- 
cgit v1.2.3


From 55cfeb396965c3906a84d09a9c487d065e37773b Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <rafal@milecki.pl>
Date: Thu, 18 Mar 2021 09:01:42 +0100
Subject: net: dsa: bcm_sf2: add function finding RGMII register
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Simple macro like REG_RGMII_CNTRL_P() is insufficient as:
1. It doesn't validate port argument
2. It doesn't support chipsets with non-lineral RGMII regs layout

Missing port validation could result in getting register offset from out
of array. Random memory -> random offset -> random reads/writes. It
affected e.g. BCM4908 for REG_RGMII_CNTRL_P(7).

Fixes: a78e86ed586d ("net: dsa: bcm_sf2: Prepare for different register layouts")
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/bcm_sf2.c      | 49 ++++++++++++++++++++++++++++++++++++------
 drivers/net/dsa/bcm_sf2_regs.h |  2 --
 2 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index d8e6dd371468..044972b7d118 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -32,6 +32,31 @@
 #include "b53/b53_priv.h"
 #include "b53/b53_regs.h"
 
+static u16 bcm_sf2_reg_rgmii_cntrl(struct bcm_sf2_priv *priv, int port)
+{
+	switch (priv->type) {
+	case BCM4908_DEVICE_ID:
+		/* TODO */
+		break;
+	default:
+		switch (port) {
+		case 0:
+			return REG_RGMII_0_CNTRL;
+		case 1:
+			return REG_RGMII_1_CNTRL;
+		case 2:
+			return REG_RGMII_2_CNTRL;
+		default:
+			break;
+		}
+	}
+
+	WARN_ONCE(1, "Unsupported port %d\n", port);
+
+	/* RO fallback reg */
+	return REG_SWITCH_STATUS;
+}
+
 /* Return the number of active ports, not counting the IMP (CPU) port */
 static unsigned int bcm_sf2_num_active_ports(struct dsa_switch *ds)
 {
@@ -688,6 +713,7 @@ static void bcm_sf2_sw_mac_config(struct dsa_switch *ds, int port,
 {
 	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	u32 id_mode_dis = 0, port_mode;
+	u32 reg_rgmii_ctrl;
 	u32 reg;
 
 	if (port == core_readl(priv, CORE_IMP0_PRT_ID))
@@ -711,10 +737,12 @@ static void bcm_sf2_sw_mac_config(struct dsa_switch *ds, int port,
 		return;
 	}
 
+	reg_rgmii_ctrl = bcm_sf2_reg_rgmii_cntrl(priv, port);
+
 	/* Clear id_mode_dis bit, and the existing port mode, let
 	 * RGMII_MODE_EN bet set by mac_link_{up,down}
 	 */
-	reg = reg_readl(priv, REG_RGMII_CNTRL_P(port));
+	reg = reg_readl(priv, reg_rgmii_ctrl);
 	reg &= ~ID_MODE_DIS;
 	reg &= ~(PORT_MODE_MASK << PORT_MODE_SHIFT);
 
@@ -722,13 +750,14 @@ static void bcm_sf2_sw_mac_config(struct dsa_switch *ds, int port,
 	if (id_mode_dis)
 		reg |= ID_MODE_DIS;
 
-	reg_writel(priv, reg, REG_RGMII_CNTRL_P(port));
+	reg_writel(priv, reg, reg_rgmii_ctrl);
 }
 
 static void bcm_sf2_sw_mac_link_set(struct dsa_switch *ds, int port,
 				    phy_interface_t interface, bool link)
 {
 	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
+	u32 reg_rgmii_ctrl;
 	u32 reg;
 
 	if (!phy_interface_mode_is_rgmii(interface) &&
@@ -736,13 +765,15 @@ static void bcm_sf2_sw_mac_link_set(struct dsa_switch *ds, int port,
 	    interface != PHY_INTERFACE_MODE_REVMII)
 		return;
 
+	reg_rgmii_ctrl = bcm_sf2_reg_rgmii_cntrl(priv, port);
+
 	/* If the link is down, just disable the interface to conserve power */
-	reg = reg_readl(priv, REG_RGMII_CNTRL_P(port));
+	reg = reg_readl(priv, reg_rgmii_ctrl);
 	if (link)
 		reg |= RGMII_MODE_EN;
 	else
 		reg &= ~RGMII_MODE_EN;
-	reg_writel(priv, reg, REG_RGMII_CNTRL_P(port));
+	reg_writel(priv, reg, reg_rgmii_ctrl);
 }
 
 static void bcm_sf2_sw_mac_link_down(struct dsa_switch *ds, int port,
@@ -776,11 +807,15 @@ static void bcm_sf2_sw_mac_link_up(struct dsa_switch *ds, int port,
 {
 	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	struct ethtool_eee *p = &priv->dev->ports[port].eee;
-	u32 reg, offset;
 
 	bcm_sf2_sw_mac_link_set(ds, port, interface, true);
 
 	if (port != core_readl(priv, CORE_IMP0_PRT_ID)) {
+		u32 reg_rgmii_ctrl;
+		u32 reg, offset;
+
+		reg_rgmii_ctrl = bcm_sf2_reg_rgmii_cntrl(priv, port);
+
 		if (priv->type == BCM4908_DEVICE_ID ||
 		    priv->type == BCM7445_DEVICE_ID)
 			offset = CORE_STS_OVERRIDE_GMIIP_PORT(port);
@@ -791,7 +826,7 @@ static void bcm_sf2_sw_mac_link_up(struct dsa_switch *ds, int port,
 		    interface == PHY_INTERFACE_MODE_RGMII_TXID ||
 		    interface == PHY_INTERFACE_MODE_MII ||
 		    interface == PHY_INTERFACE_MODE_REVMII) {
-			reg = reg_readl(priv, REG_RGMII_CNTRL_P(port));
+			reg = reg_readl(priv, reg_rgmii_ctrl);
 			reg &= ~(RX_PAUSE_EN | TX_PAUSE_EN);
 
 			if (tx_pause)
@@ -799,7 +834,7 @@ static void bcm_sf2_sw_mac_link_up(struct dsa_switch *ds, int port,
 			if (rx_pause)
 				reg |= RX_PAUSE_EN;
 
-			reg_writel(priv, reg, REG_RGMII_CNTRL_P(port));
+			reg_writel(priv, reg, reg_rgmii_ctrl);
 		}
 
 		reg = SW_OVERRIDE | LINK_STS;
diff --git a/drivers/net/dsa/bcm_sf2_regs.h b/drivers/net/dsa/bcm_sf2_regs.h
index e297b09411f3..3d5515fe43cb 100644
--- a/drivers/net/dsa/bcm_sf2_regs.h
+++ b/drivers/net/dsa/bcm_sf2_regs.h
@@ -55,8 +55,6 @@ enum bcm_sf2_reg_offs {
 #define CROSSBAR_BCM4908_EXT_GPHY4	1
 #define CROSSBAR_BCM4908_EXT_RGMII	2
 
-#define REG_RGMII_CNTRL_P(x)		(REG_RGMII_0_CNTRL + (x))
-
 /* Relative to REG_RGMII_CNTRL */
 #define  RGMII_MODE_EN			(1 << 0)
 #define  ID_MODE_DIS			(1 << 1)
-- 
cgit v1.2.3


From 6859d91549341c2ad769d482de58129f080c0f04 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <rafal@milecki.pl>
Date: Thu, 18 Mar 2021 09:01:43 +0100
Subject: net: dsa: bcm_sf2: fix BCM4908 RGMII reg(s)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BCM4908 has only 1 RGMII reg for controlling port 7.

Fixes: 73b7a6047971 ("net: dsa: bcm_sf2: support BCM4908's integrated switch")
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/bcm_sf2.c      | 11 +++++++----
 drivers/net/dsa/bcm_sf2_regs.h |  1 +
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 044972b7d118..7e0ca8012983 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -36,7 +36,12 @@ static u16 bcm_sf2_reg_rgmii_cntrl(struct bcm_sf2_priv *priv, int port)
 {
 	switch (priv->type) {
 	case BCM4908_DEVICE_ID:
-		/* TODO */
+		switch (port) {
+		case 7:
+			return REG_RGMII_11_CNTRL;
+		default:
+			break;
+		}
 		break;
 	default:
 		switch (port) {
@@ -1223,9 +1228,7 @@ static const u16 bcm_sf2_4908_reg_offsets[] = {
 	[REG_PHY_REVISION]	= 0x14,
 	[REG_SPHY_CNTRL]	= 0x24,
 	[REG_CROSSBAR]		= 0xc8,
-	[REG_RGMII_0_CNTRL]	= 0xe0,
-	[REG_RGMII_1_CNTRL]	= 0xec,
-	[REG_RGMII_2_CNTRL]	= 0xf8,
+	[REG_RGMII_11_CNTRL]	= 0x014c,
 	[REG_LED_0_CNTRL]	= 0x40,
 	[REG_LED_1_CNTRL]	= 0x4c,
 	[REG_LED_2_CNTRL]	= 0x58,
diff --git a/drivers/net/dsa/bcm_sf2_regs.h b/drivers/net/dsa/bcm_sf2_regs.h
index 3d5515fe43cb..7bffc80f241f 100644
--- a/drivers/net/dsa/bcm_sf2_regs.h
+++ b/drivers/net/dsa/bcm_sf2_regs.h
@@ -21,6 +21,7 @@ enum bcm_sf2_reg_offs {
 	REG_RGMII_0_CNTRL,
 	REG_RGMII_1_CNTRL,
 	REG_RGMII_2_CNTRL,
+	REG_RGMII_11_CNTRL,
 	REG_LED_0_CNTRL,
 	REG_LED_1_CNTRL,
 	REG_LED_2_CNTRL,
-- 
cgit v1.2.3


From ea4fe7e842f6c7f972d795a8efc167c4bb33b62f Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Thu, 18 Mar 2021 19:37:40 +0100
Subject: net-sysfs: convert xps_cpus_show to bitmap_zalloc

Use bitmap_zalloc instead of zalloc_cpumask_var in xps_cpus_show to
align with xps_rxqs_show. This will improve maintenance and allow us to
factorize the two functions. The function should behave the same.

Signed-off-by: Antoine Tenart <atenart@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 307628fdf380..3a083c0c9dd3 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1367,8 +1367,7 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
 	int cpu, len, ret, num_tc = 1, tc = 0;
 	struct net_device *dev = queue->dev;
 	struct xps_dev_maps *dev_maps;
-	cpumask_var_t mask;
-	unsigned long index;
+	unsigned long *mask, index;
 
 	if (!netif_is_multiqueue(dev))
 		return -ENOENT;
@@ -1396,7 +1395,8 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
 		}
 	}
 
-	if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
+	mask = bitmap_zalloc(nr_cpu_ids, GFP_KERNEL);
+	if (!mask) {
 		ret = -ENOMEM;
 		goto err_rtnl_unlock;
 	}
@@ -1414,7 +1414,7 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
 
 			for (i = map->len; i--;) {
 				if (map->queues[i] == index) {
-					cpumask_set_cpu(cpu, mask);
+					set_bit(cpu, mask);
 					break;
 				}
 			}
@@ -1424,8 +1424,8 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
 
 	rtnl_unlock();
 
-	len = snprintf(buf, PAGE_SIZE, "%*pb\n", cpumask_pr_args(mask));
-	free_cpumask_var(mask);
+	len = bitmap_print_to_pagebuf(false, buf, mask, nr_cpu_ids);
+	bitmap_free(mask);
 	return len < PAGE_SIZE ? len : -EINVAL;
 
 err_rtnl_unlock:
-- 
cgit v1.2.3


From d9a063d207f0e538b0f5aa8b04a6c14f88906a6d Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Thu, 18 Mar 2021 19:37:41 +0100
Subject: net-sysfs: store the return of get_netdev_queue_index in an unsigned
 int

In net-sysfs, get_netdev_queue_index returns an unsigned int. Some of
its callers use an unsigned long to store the returned value. Update the
code to be consistent, this should only be cosmetic.

Signed-off-by: Antoine Tenart <atenart@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 3a083c0c9dd3..5dc4223f6b68 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1367,7 +1367,8 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
 	int cpu, len, ret, num_tc = 1, tc = 0;
 	struct net_device *dev = queue->dev;
 	struct xps_dev_maps *dev_maps;
-	unsigned long *mask, index;
+	unsigned long *mask;
+	unsigned int index;
 
 	if (!netif_is_multiqueue(dev))
 		return -ENOENT;
@@ -1437,7 +1438,7 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue,
 			      const char *buf, size_t len)
 {
 	struct net_device *dev = queue->dev;
-	unsigned long index;
+	unsigned int index;
 	cpumask_var_t mask;
 	int err;
 
@@ -1479,7 +1480,8 @@ static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
 	int j, len, ret, num_tc = 1, tc = 0;
 	struct net_device *dev = queue->dev;
 	struct xps_dev_maps *dev_maps;
-	unsigned long *mask, index;
+	unsigned long *mask;
+	unsigned int index;
 
 	index = get_netdev_queue_index(queue);
 
@@ -1541,7 +1543,8 @@ static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
 {
 	struct net_device *dev = queue->dev;
 	struct net *net = dev_net(dev);
-	unsigned long *mask, index;
+	unsigned long *mask;
+	unsigned int index;
 	int err;
 
 	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
-- 
cgit v1.2.3


From 73f5e52b15e3aa4ef641264228cd9069b1948149 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Thu, 18 Mar 2021 19:37:42 +0100
Subject: net-sysfs: make xps_cpus_show and xps_rxqs_show consistent

Make the implementations of xps_cpus_show and xps_rxqs_show to converge,
as the two share the same logic but diverted over time. This should not
modify their behaviour but will help future changes and improve
maintenance.

Signed-off-by: Antoine Tenart <atenart@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 5dc4223f6b68..5f76183ad5bc 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1364,7 +1364,7 @@ static const struct attribute_group dql_group = {
 static ssize_t xps_cpus_show(struct netdev_queue *queue,
 			     char *buf)
 {
-	int cpu, len, ret, num_tc = 1, tc = 0;
+	int j, len, ret, num_tc = 1, tc = 0;
 	struct net_device *dev = queue->dev;
 	struct xps_dev_maps *dev_maps;
 	unsigned long *mask;
@@ -1404,23 +1404,26 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
 
 	rcu_read_lock();
 	dev_maps = rcu_dereference(dev->xps_cpus_map);
-	if (dev_maps) {
-		for_each_possible_cpu(cpu) {
-			int i, tci = cpu * num_tc + tc;
-			struct xps_map *map;
-
-			map = rcu_dereference(dev_maps->attr_map[tci]);
-			if (!map)
-				continue;
-
-			for (i = map->len; i--;) {
-				if (map->queues[i] == index) {
-					set_bit(cpu, mask);
-					break;
-				}
+	if (!dev_maps)
+		goto out_no_maps;
+
+	for (j = -1; j = netif_attrmask_next(j, NULL, nr_cpu_ids),
+	     j < nr_cpu_ids;) {
+		int i, tci = j * num_tc + tc;
+		struct xps_map *map;
+
+		map = rcu_dereference(dev_maps->attr_map[tci]);
+		if (!map)
+			continue;
+
+		for (i = map->len; i--;) {
+			if (map->queues[i] == index) {
+				set_bit(j, mask);
+				break;
 			}
 		}
 	}
+out_no_maps:
 	rcu_read_unlock();
 
 	rtnl_unlock();
-- 
cgit v1.2.3


From 255c04a87f4381849fce9ed81e5efabf78a71a30 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Thu, 18 Mar 2021 19:37:43 +0100
Subject: net: embed num_tc in the xps maps

The xps cpus/rxqs map is accessed using dev->num_tc, which is used when
allocating the map. But later updates of dev->num_tc can lead to having
a mismatch between the maps and how they're accessed. In such cases the
map values do not make any sense and out of bound accesses can occur
(that can be easily seen using KASAN).

This patch aims at fixing this by embedding num_tc into the maps, using
the value at the time the map is created. This brings two improvements:
- The maps can be accessed using the embedded num_tc, so we know for
  sure we won't have out of bound accesses.
- Checks can be made before accessing the maps so we know the values
  retrieved will make sense.

We also update __netif_set_xps_queue to conditionally copy old maps from
dev_maps in the new one only if the number of traffic classes from both
maps match.

Signed-off-by: Antoine Tenart <atenart@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  6 +++++
 net/core/dev.c            | 63 ++++++++++++++++++++++++++++++-----------------
 net/core/net-sysfs.c      | 45 +++++++++++++--------------------
 3 files changed, 64 insertions(+), 50 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 97254c089eb2..c38534c55ea1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -771,9 +771,15 @@ struct xps_map {
 
 /*
  * This structure holds all XPS maps for device.  Maps are indexed by CPU.
+ *
+ * We keep track of the number of traffic classes used when the struct is
+ * allocated, in num_tc. This will be used to navigate the maps, to ensure we're
+ * not crossing its upper bound, as the original dev->num_tc can be updated in
+ * the meantime.
  */
 struct xps_dev_maps {
 	struct rcu_head rcu;
+	s16 num_tc;
 	struct xps_map __rcu *attr_map[]; /* Either CPUs map or RXQs map */
 };
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 6bc20eabd2b0..4e29d1994fdd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2491,7 +2491,7 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
 				 struct xps_dev_maps *dev_maps,
 				 int cpu, u16 offset, u16 count)
 {
-	int num_tc = dev->num_tc ? : 1;
+	int num_tc = dev_maps->num_tc;
 	bool active = false;
 	int tci;
 
@@ -2634,10 +2634,10 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 {
 	const unsigned long *online_mask = NULL, *possible_mask = NULL;
 	struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
+	bool active = false, copy = false;
 	int i, j, tci, numa_node_id = -2;
 	int maps_sz, num_tc = 1, tc = 0;
 	struct xps_map *map, *new_map;
-	bool active = false;
 	unsigned int nr_ids;
 
 	if (dev->num_tc) {
@@ -2672,19 +2672,29 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 	if (maps_sz < L1_CACHE_BYTES)
 		maps_sz = L1_CACHE_BYTES;
 
+	/* The old dev_maps could be larger or smaller than the one we're
+	 * setting up now, as dev->num_tc could have been updated in between. We
+	 * could try to be smart, but let's be safe instead and only copy
+	 * foreign traffic classes if the two map sizes match.
+	 */
+	if (dev_maps && dev_maps->num_tc == num_tc)
+		copy = true;
+
 	/* allocate memory for queue storage */
 	for (j = -1; j = netif_attrmask_next_and(j, online_mask, mask, nr_ids),
 	     j < nr_ids;) {
-		if (!new_dev_maps)
-			new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
 		if (!new_dev_maps) {
-			mutex_unlock(&xps_map_mutex);
-			return -ENOMEM;
+			new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
+			if (!new_dev_maps) {
+				mutex_unlock(&xps_map_mutex);
+				return -ENOMEM;
+			}
+
+			new_dev_maps->num_tc = num_tc;
 		}
 
 		tci = j * num_tc + tc;
-		map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) :
-				 NULL;
+		map = copy ? xmap_dereference(dev_maps->attr_map[tci]) : NULL;
 
 		map = expand_xps_map(map, j, index, is_rxqs_map);
 		if (!map)
@@ -2706,7 +2716,7 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
 	     j < nr_ids;) {
 		/* copy maps belonging to foreign traffic classes */
-		for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) {
+		for (i = tc, tci = j * num_tc; copy && i--; tci++) {
 			/* fill in the new device map from the old device map */
 			map = xmap_dereference(dev_maps->attr_map[tci]);
 			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
@@ -2736,14 +2746,14 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 					numa_node_id = -1;
 			}
 #endif
-		} else if (dev_maps) {
+		} else if (copy) {
 			/* fill in the new device map from the old device map */
 			map = xmap_dereference(dev_maps->attr_map[tci]);
 			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
 		}
 
 		/* copy maps belonging to foreign traffic classes */
-		for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
+		for (i = num_tc - tc, tci++; copy && --i; tci++) {
 			/* fill in the new device map from the old device map */
 			map = xmap_dereference(dev_maps->attr_map[tci]);
 			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
@@ -2761,11 +2771,18 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 
 	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
 	     j < nr_ids;) {
-		for (i = num_tc, tci = j * num_tc; i--; tci++) {
-			new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
+		for (i = num_tc, tci = j * dev_maps->num_tc; i--; tci++) {
 			map = xmap_dereference(dev_maps->attr_map[tci]);
-			if (map && map != new_map)
-				kfree_rcu(map, rcu);
+			if (!map)
+				continue;
+
+			if (copy) {
+				new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
+				if (map == new_map)
+					continue;
+			}
+
+			kfree_rcu(map, rcu);
 		}
 	}
 
@@ -2789,12 +2806,12 @@ out_no_new_maps:
 	/* removes tx-queue from unused CPUs/rx-queues */
 	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
 	     j < nr_ids;) {
-		for (i = tc, tci = j * num_tc; i--; tci++)
+		for (i = tc, tci = j * dev_maps->num_tc; i--; tci++)
 			active |= remove_xps_queue(dev_maps, tci, index);
 		if (!netif_attr_test_mask(j, mask, nr_ids) ||
 		    !netif_attr_test_online(j, online_mask, nr_ids))
 			active |= remove_xps_queue(dev_maps, tci, index);
-		for (i = num_tc - tc, tci++; --i; tci++)
+		for (i = dev_maps->num_tc - tc, tci++; --i; tci++)
 			active |= remove_xps_queue(dev_maps, tci, index);
 	}
 
@@ -2812,7 +2829,7 @@ error:
 	     j < nr_ids;) {
 		for (i = num_tc, tci = j * num_tc; i--; tci++) {
 			new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
-			map = dev_maps ?
+			map = copy ?
 			      xmap_dereference(dev_maps->attr_map[tci]) :
 			      NULL;
 			if (new_map && new_map != map)
@@ -3944,13 +3961,15 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
 static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
 			       struct xps_dev_maps *dev_maps, unsigned int tci)
 {
+	int tc = netdev_get_prio_tc_map(dev, skb->priority);
 	struct xps_map *map;
 	int queue_index = -1;
 
-	if (dev->num_tc) {
-		tci *= dev->num_tc;
-		tci += netdev_get_prio_tc_map(dev, skb->priority);
-	}
+	if (tc >= dev_maps->num_tc)
+		return queue_index;
+
+	tci *= dev_maps->num_tc;
+	tci += tc;
 
 	map = rcu_dereference(dev_maps->attr_map[tci]);
 	if (map) {
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 5f76183ad5bc..1364d0f39cb0 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1364,9 +1364,9 @@ static const struct attribute_group dql_group = {
 static ssize_t xps_cpus_show(struct netdev_queue *queue,
 			     char *buf)
 {
-	int j, len, ret, num_tc = 1, tc = 0;
 	struct net_device *dev = queue->dev;
 	struct xps_dev_maps *dev_maps;
+	int j, len, ret, tc = 0;
 	unsigned long *mask;
 	unsigned int index;
 
@@ -1378,22 +1378,13 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
 	if (!rtnl_trylock())
 		return restart_syscall();
 
-	if (dev->num_tc) {
-		/* Do not allow XPS on subordinate device directly */
-		num_tc = dev->num_tc;
-		if (num_tc < 0) {
-			ret = -EINVAL;
-			goto err_rtnl_unlock;
-		}
-
-		/* If queue belongs to subordinate dev use its map */
-		dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
+	/* If queue belongs to subordinate dev use its map */
+	dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
 
-		tc = netdev_txq_to_tc(dev, index);
-		if (tc < 0) {
-			ret = -EINVAL;
-			goto err_rtnl_unlock;
-		}
+	tc = netdev_txq_to_tc(dev, index);
+	if (tc < 0) {
+		ret = -EINVAL;
+		goto err_rtnl_unlock;
 	}
 
 	mask = bitmap_zalloc(nr_cpu_ids, GFP_KERNEL);
@@ -1404,12 +1395,12 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
 
 	rcu_read_lock();
 	dev_maps = rcu_dereference(dev->xps_cpus_map);
-	if (!dev_maps)
+	if (!dev_maps || tc >= dev_maps->num_tc)
 		goto out_no_maps;
 
 	for (j = -1; j = netif_attrmask_next(j, NULL, nr_cpu_ids),
 	     j < nr_cpu_ids;) {
-		int i, tci = j * num_tc + tc;
+		int i, tci = j * dev_maps->num_tc + tc;
 		struct xps_map *map;
 
 		map = rcu_dereference(dev_maps->attr_map[tci]);
@@ -1480,9 +1471,9 @@ static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init
 
 static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
 {
-	int j, len, ret, num_tc = 1, tc = 0;
 	struct net_device *dev = queue->dev;
 	struct xps_dev_maps *dev_maps;
+	int j, len, ret, tc = 0;
 	unsigned long *mask;
 	unsigned int index;
 
@@ -1491,14 +1482,12 @@ static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
 	if (!rtnl_trylock())
 		return restart_syscall();
 
-	if (dev->num_tc) {
-		num_tc = dev->num_tc;
-		tc = netdev_txq_to_tc(dev, index);
-		if (tc < 0) {
-			ret = -EINVAL;
-			goto err_rtnl_unlock;
-		}
+	tc = netdev_txq_to_tc(dev, index);
+	if (tc < 0) {
+		ret = -EINVAL;
+		goto err_rtnl_unlock;
 	}
+
 	mask = bitmap_zalloc(dev->num_rx_queues, GFP_KERNEL);
 	if (!mask) {
 		ret = -ENOMEM;
@@ -1507,12 +1496,12 @@ static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
 
 	rcu_read_lock();
 	dev_maps = rcu_dereference(dev->xps_rxqs_map);
-	if (!dev_maps)
+	if (!dev_maps || tc >= dev_maps->num_tc)
 		goto out_no_maps;
 
 	for (j = -1; j = netif_attrmask_next(j, NULL, dev->num_rx_queues),
 	     j < dev->num_rx_queues;) {
-		int i, tci = j * num_tc + tc;
+		int i, tci = j * dev_maps->num_tc + tc;
 		struct xps_map *map;
 
 		map = rcu_dereference(dev_maps->attr_map[tci]);
-- 
cgit v1.2.3


From 5478fcd0f48322e04ae6c173ad3a1959e066dc83 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Thu, 18 Mar 2021 19:37:44 +0100
Subject: net: embed nr_ids in the xps maps

Embed nr_ids (the number of cpu for the xps cpus map, and the number of
rxqs for the xps cpus map) in dev_maps. That will help not accessing out
of bound memory if those values change after dev_maps was allocated.

Suggested-by: Alexander Duyck <alexander.duyck@gmail.com>
Signed-off-by: Antoine Tenart <atenart@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  4 ++++
 net/core/dev.c            | 45 +++++++++++++++++++++------------------------
 net/core/net-sysfs.c      | 38 ++++++++++++++++++++++----------------
 3 files changed, 47 insertions(+), 40 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c38534c55ea1..09e73f5a8c78 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -772,6 +772,9 @@ struct xps_map {
 /*
  * This structure holds all XPS maps for device.  Maps are indexed by CPU.
  *
+ * We keep track of the number of cpus/rxqs used when the struct is allocated,
+ * in nr_ids. This will help not accessing out-of-bound memory.
+ *
  * We keep track of the number of traffic classes used when the struct is
  * allocated, in num_tc. This will be used to navigate the maps, to ensure we're
  * not crossing its upper bound, as the original dev->num_tc can be updated in
@@ -779,6 +782,7 @@ struct xps_map {
  */
 struct xps_dev_maps {
 	struct rcu_head rcu;
+	unsigned int nr_ids;
 	s16 num_tc;
 	struct xps_map __rcu *attr_map[]; /* Either CPUs map or RXQs map */
 };
diff --git a/net/core/dev.c b/net/core/dev.c
index 4e29d1994fdd..7530c95970a0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2524,14 +2524,14 @@ static void reset_xps_maps(struct net_device *dev,
 }
 
 static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
-			   struct xps_dev_maps *dev_maps, unsigned int nr_ids,
-			   u16 offset, u16 count, bool is_rxqs_map)
+			   struct xps_dev_maps *dev_maps, u16 offset, u16 count,
+			   bool is_rxqs_map)
 {
+	unsigned int nr_ids = dev_maps->nr_ids;
 	bool active = false;
 	int i, j;
 
-	for (j = -1; j = netif_attrmask_next(j, mask, nr_ids),
-	     j < nr_ids;)
+	for (j = -1; j = netif_attrmask_next(j, mask, nr_ids), j < nr_ids;)
 		active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
 					       count);
 	if (!active)
@@ -2551,7 +2551,6 @@ static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
 {
 	const unsigned long *possible_mask = NULL;
 	struct xps_dev_maps *dev_maps;
-	unsigned int nr_ids;
 
 	if (!static_key_false(&xps_needed))
 		return;
@@ -2561,11 +2560,9 @@ static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
 
 	if (static_key_false(&xps_rxqs_needed)) {
 		dev_maps = xmap_dereference(dev->xps_rxqs_map);
-		if (dev_maps) {
-			nr_ids = dev->num_rx_queues;
-			clean_xps_maps(dev, possible_mask, dev_maps, nr_ids,
-				       offset, count, true);
-		}
+		if (dev_maps)
+			clean_xps_maps(dev, possible_mask, dev_maps, offset,
+				       count, true);
 	}
 
 	dev_maps = xmap_dereference(dev->xps_cpus_map);
@@ -2574,9 +2571,7 @@ static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
 
 	if (num_possible_cpus() > 1)
 		possible_mask = cpumask_bits(cpu_possible_mask);
-	nr_ids = nr_cpu_ids;
-	clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset, count,
-		       false);
+	clean_xps_maps(dev, possible_mask, dev_maps, offset, count, false);
 
 out_no_maps:
 	mutex_unlock(&xps_map_mutex);
@@ -2673,11 +2668,12 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 		maps_sz = L1_CACHE_BYTES;
 
 	/* The old dev_maps could be larger or smaller than the one we're
-	 * setting up now, as dev->num_tc could have been updated in between. We
-	 * could try to be smart, but let's be safe instead and only copy
-	 * foreign traffic classes if the two map sizes match.
+	 * setting up now, as dev->num_tc or nr_ids could have been updated in
+	 * between. We could try to be smart, but let's be safe instead and only
+	 * copy foreign traffic classes if the two map sizes match.
 	 */
-	if (dev_maps && dev_maps->num_tc == num_tc)
+	if (dev_maps &&
+	    dev_maps->num_tc == num_tc && dev_maps->nr_ids == nr_ids)
 		copy = true;
 
 	/* allocate memory for queue storage */
@@ -2690,6 +2686,7 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 				return -ENOMEM;
 			}
 
+			new_dev_maps->nr_ids = nr_ids;
 			new_dev_maps->num_tc = num_tc;
 		}
 
@@ -2770,7 +2767,7 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 		goto out_no_old_maps;
 
 	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
-	     j < nr_ids;) {
+	     j < dev_maps->nr_ids;) {
 		for (i = num_tc, tci = j * dev_maps->num_tc; i--; tci++) {
 			map = xmap_dereference(dev_maps->attr_map[tci]);
 			if (!map)
@@ -2804,12 +2801,12 @@ out_no_new_maps:
 		goto out_no_maps;
 
 	/* removes tx-queue from unused CPUs/rx-queues */
-	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
-	     j < nr_ids;) {
+	for (j = -1; j = netif_attrmask_next(j, possible_mask, dev_maps->nr_ids),
+	     j < dev_maps->nr_ids;) {
 		for (i = tc, tci = j * dev_maps->num_tc; i--; tci++)
 			active |= remove_xps_queue(dev_maps, tci, index);
-		if (!netif_attr_test_mask(j, mask, nr_ids) ||
-		    !netif_attr_test_online(j, online_mask, nr_ids))
+		if (!netif_attr_test_mask(j, mask, dev_maps->nr_ids) ||
+		    !netif_attr_test_online(j, online_mask, dev_maps->nr_ids))
 			active |= remove_xps_queue(dev_maps, tci, index);
 		for (i = dev_maps->num_tc - tc, tci++; --i; tci++)
 			active |= remove_xps_queue(dev_maps, tci, index);
@@ -3965,7 +3962,7 @@ static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
 	struct xps_map *map;
 	int queue_index = -1;
 
-	if (tc >= dev_maps->num_tc)
+	if (tc >= dev_maps->num_tc || tci >= dev_maps->nr_ids)
 		return queue_index;
 
 	tci *= dev_maps->num_tc;
@@ -4004,7 +4001,7 @@ static int get_xps_queue(struct net_device *dev, struct net_device *sb_dev,
 	if (dev_maps) {
 		int tci = sk_rx_queue_get(sk);
 
-		if (tci >= 0 && tci < dev->num_rx_queues)
+		if (tci >= 0)
 			queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
 							  tci);
 	}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 1364d0f39cb0..bb08bdc88fa9 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1366,9 +1366,9 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
 {
 	struct net_device *dev = queue->dev;
 	struct xps_dev_maps *dev_maps;
+	unsigned int index, nr_ids;
 	int j, len, ret, tc = 0;
 	unsigned long *mask;
-	unsigned int index;
 
 	if (!netif_is_multiqueue(dev))
 		return -ENOENT;
@@ -1387,19 +1387,20 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
 		goto err_rtnl_unlock;
 	}
 
-	mask = bitmap_zalloc(nr_cpu_ids, GFP_KERNEL);
+	rcu_read_lock();
+	dev_maps = rcu_dereference(dev->xps_cpus_map);
+	nr_ids = dev_maps ? dev_maps->nr_ids : nr_cpu_ids;
+
+	mask = bitmap_zalloc(nr_ids, GFP_KERNEL);
 	if (!mask) {
 		ret = -ENOMEM;
-		goto err_rtnl_unlock;
+		goto err_rcu_unlock;
 	}
 
-	rcu_read_lock();
-	dev_maps = rcu_dereference(dev->xps_cpus_map);
 	if (!dev_maps || tc >= dev_maps->num_tc)
 		goto out_no_maps;
 
-	for (j = -1; j = netif_attrmask_next(j, NULL, nr_cpu_ids),
-	     j < nr_cpu_ids;) {
+	for (j = -1; j = netif_attrmask_next(j, NULL, nr_ids), j < nr_ids;) {
 		int i, tci = j * dev_maps->num_tc + tc;
 		struct xps_map *map;
 
@@ -1419,10 +1420,12 @@ out_no_maps:
 
 	rtnl_unlock();
 
-	len = bitmap_print_to_pagebuf(false, buf, mask, nr_cpu_ids);
+	len = bitmap_print_to_pagebuf(false, buf, mask, nr_ids);
 	bitmap_free(mask);
 	return len < PAGE_SIZE ? len : -EINVAL;
 
+err_rcu_unlock:
+	rcu_read_unlock();
 err_rtnl_unlock:
 	rtnl_unlock();
 	return ret;
@@ -1473,9 +1476,9 @@ static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
 {
 	struct net_device *dev = queue->dev;
 	struct xps_dev_maps *dev_maps;
+	unsigned int index, nr_ids;
 	int j, len, ret, tc = 0;
 	unsigned long *mask;
-	unsigned int index;
 
 	index = get_netdev_queue_index(queue);
 
@@ -1488,19 +1491,20 @@ static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
 		goto err_rtnl_unlock;
 	}
 
-	mask = bitmap_zalloc(dev->num_rx_queues, GFP_KERNEL);
+	rcu_read_lock();
+	dev_maps = rcu_dereference(dev->xps_rxqs_map);
+	nr_ids = dev_maps ? dev_maps->nr_ids : dev->num_rx_queues;
+
+	mask = bitmap_zalloc(nr_ids, GFP_KERNEL);
 	if (!mask) {
 		ret = -ENOMEM;
-		goto err_rtnl_unlock;
+		goto err_rcu_unlock;
 	}
 
-	rcu_read_lock();
-	dev_maps = rcu_dereference(dev->xps_rxqs_map);
 	if (!dev_maps || tc >= dev_maps->num_tc)
 		goto out_no_maps;
 
-	for (j = -1; j = netif_attrmask_next(j, NULL, dev->num_rx_queues),
-	     j < dev->num_rx_queues;) {
+	for (j = -1; j = netif_attrmask_next(j, NULL, nr_ids), j < nr_ids;) {
 		int i, tci = j * dev_maps->num_tc + tc;
 		struct xps_map *map;
 
@@ -1520,11 +1524,13 @@ out_no_maps:
 
 	rtnl_unlock();
 
-	len = bitmap_print_to_pagebuf(false, buf, mask, dev->num_rx_queues);
+	len = bitmap_print_to_pagebuf(false, buf, mask, nr_ids);
 	bitmap_free(mask);
 
 	return len < PAGE_SIZE ? len : -EINVAL;
 
+err_rcu_unlock:
+	rcu_read_unlock();
 err_rtnl_unlock:
 	rtnl_unlock();
 	return ret;
-- 
cgit v1.2.3


From 6f36158e058409ec5ceb4290541e77ae2648fc86 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Thu, 18 Mar 2021 19:37:45 +0100
Subject: net: remove the xps possible_mask

Remove the xps possible_mask. It was an optimization but we can just
loop from 0 to nr_ids now that it is embedded in the xps dev_maps. That
simplifies the code a bit.

Suggested-by: Alexander Duyck <alexander.duyck@gmail.com>
Signed-off-by: Antoine Tenart <atenart@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c       | 40 +++++++++++++---------------------------
 net/core/net-sysfs.c |  4 ++--
 2 files changed, 15 insertions(+), 29 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 7530c95970a0..3ed8cb3a4061 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2523,33 +2523,28 @@ static void reset_xps_maps(struct net_device *dev,
 	kfree_rcu(dev_maps, rcu);
 }
 
-static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
+static void clean_xps_maps(struct net_device *dev,
 			   struct xps_dev_maps *dev_maps, u16 offset, u16 count,
 			   bool is_rxqs_map)
 {
-	unsigned int nr_ids = dev_maps->nr_ids;
 	bool active = false;
 	int i, j;
 
-	for (j = -1; j = netif_attrmask_next(j, mask, nr_ids), j < nr_ids;)
-		active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
-					       count);
+	for (j = 0; j < dev_maps->nr_ids; j++)
+		active |= remove_xps_queue_cpu(dev, dev_maps, j, offset, count);
 	if (!active)
 		reset_xps_maps(dev, dev_maps, is_rxqs_map);
 
 	if (!is_rxqs_map) {
-		for (i = offset + (count - 1); count--; i--) {
+		for (i = offset + (count - 1); count--; i--)
 			netdev_queue_numa_node_write(
-				netdev_get_tx_queue(dev, i),
-				NUMA_NO_NODE);
-		}
+				netdev_get_tx_queue(dev, i), NUMA_NO_NODE);
 	}
 }
 
 static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
 				   u16 count)
 {
-	const unsigned long *possible_mask = NULL;
 	struct xps_dev_maps *dev_maps;
 
 	if (!static_key_false(&xps_needed))
@@ -2561,17 +2556,14 @@ static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
 	if (static_key_false(&xps_rxqs_needed)) {
 		dev_maps = xmap_dereference(dev->xps_rxqs_map);
 		if (dev_maps)
-			clean_xps_maps(dev, possible_mask, dev_maps, offset,
-				       count, true);
+			clean_xps_maps(dev, dev_maps, offset, count, true);
 	}
 
 	dev_maps = xmap_dereference(dev->xps_cpus_map);
 	if (!dev_maps)
 		goto out_no_maps;
 
-	if (num_possible_cpus() > 1)
-		possible_mask = cpumask_bits(cpu_possible_mask);
-	clean_xps_maps(dev, possible_mask, dev_maps, offset, count, false);
+	clean_xps_maps(dev, dev_maps, offset, count, false);
 
 out_no_maps:
 	mutex_unlock(&xps_map_mutex);
@@ -2627,8 +2619,8 @@ static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
 int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 			  u16 index, bool is_rxqs_map)
 {
-	const unsigned long *online_mask = NULL, *possible_mask = NULL;
 	struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
+	const unsigned long *online_mask = NULL;
 	bool active = false, copy = false;
 	int i, j, tci, numa_node_id = -2;
 	int maps_sz, num_tc = 1, tc = 0;
@@ -2656,10 +2648,8 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 		nr_ids = dev->num_rx_queues;
 	} else {
 		maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
-		if (num_possible_cpus() > 1) {
+		if (num_possible_cpus() > 1)
 			online_mask = cpumask_bits(cpu_online_mask);
-			possible_mask = cpumask_bits(cpu_possible_mask);
-		}
 		dev_maps = xmap_dereference(dev->xps_cpus_map);
 		nr_ids = nr_cpu_ids;
 	}
@@ -2710,8 +2700,7 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 			static_key_slow_inc_cpuslocked(&xps_rxqs_needed);
 	}
 
-	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
-	     j < nr_ids;) {
+	for (j = 0; j < nr_ids; j++) {
 		/* copy maps belonging to foreign traffic classes */
 		for (i = tc, tci = j * num_tc; copy && i--; tci++) {
 			/* fill in the new device map from the old device map */
@@ -2766,8 +2755,7 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 	if (!dev_maps)
 		goto out_no_old_maps;
 
-	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
-	     j < dev_maps->nr_ids;) {
+	for (j = 0; j < dev_maps->nr_ids; j++) {
 		for (i = num_tc, tci = j * dev_maps->num_tc; i--; tci++) {
 			map = xmap_dereference(dev_maps->attr_map[tci]);
 			if (!map)
@@ -2801,8 +2789,7 @@ out_no_new_maps:
 		goto out_no_maps;
 
 	/* removes tx-queue from unused CPUs/rx-queues */
-	for (j = -1; j = netif_attrmask_next(j, possible_mask, dev_maps->nr_ids),
-	     j < dev_maps->nr_ids;) {
+	for (j = 0; j < dev_maps->nr_ids; j++) {
 		for (i = tc, tci = j * dev_maps->num_tc; i--; tci++)
 			active |= remove_xps_queue(dev_maps, tci, index);
 		if (!netif_attr_test_mask(j, mask, dev_maps->nr_ids) ||
@@ -2822,8 +2809,7 @@ out_no_maps:
 	return 0;
 error:
 	/* remove any maps that we added */
-	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
-	     j < nr_ids;) {
+	for (j = 0; j < nr_ids; j++) {
 		for (i = num_tc, tci = j * num_tc; i--; tci++) {
 			new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
 			map = copy ?
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index bb08bdc88fa9..c762c435ff76 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1400,7 +1400,7 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
 	if (!dev_maps || tc >= dev_maps->num_tc)
 		goto out_no_maps;
 
-	for (j = -1; j = netif_attrmask_next(j, NULL, nr_ids), j < nr_ids;) {
+	for (j = 0; j < nr_ids; j++) {
 		int i, tci = j * dev_maps->num_tc + tc;
 		struct xps_map *map;
 
@@ -1504,7 +1504,7 @@ static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
 	if (!dev_maps || tc >= dev_maps->num_tc)
 		goto out_no_maps;
 
-	for (j = -1; j = netif_attrmask_next(j, NULL, nr_ids), j < nr_ids;) {
+	for (j = 0; j < nr_ids; j++) {
 		int i, tci = j * dev_maps->num_tc + tc;
 		struct xps_map *map;
 
-- 
cgit v1.2.3


From 044ab86d431b59b88966457dbb62679f274ec442 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Thu, 18 Mar 2021 19:37:46 +0100
Subject: net: move the xps maps to an array

Move the xps maps (xps_cpus_map and xps_rxqs_map) to an array in
net_device. That will simplify a lot the code removing the need for lots
of if/else conditionals as the correct map will be available using its
offset in the array.

This should not modify the xps maps behaviour in any way.

Suggested-by: Alexander Duyck <alexander.duyck@gmail.com>
Signed-off-by: Antoine Tenart <atenart@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c  |  2 +-
 include/linux/netdevice.h | 17 +++++++----
 net/core/dev.c            | 73 ++++++++++++++++++++---------------------------
 net/core/net-sysfs.c      |  6 ++--
 4 files changed, 46 insertions(+), 52 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 77ba8e2fc11c..584a9bd59dda 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2015,7 +2015,7 @@ static void virtnet_set_affinity(struct virtnet_info *vi)
 		}
 		virtqueue_set_affinity(vi->rq[i].vq, mask);
 		virtqueue_set_affinity(vi->sq[i].vq, mask);
-		__netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, false);
+		__netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS);
 		cpumask_clear(mask);
 	}
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 09e73f5a8c78..4940509999be 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -754,6 +754,13 @@ struct rx_queue_attribute {
 			 const char *buf, size_t len);
 };
 
+/* XPS map type and offset of the xps map within net_device->xps_maps[]. */
+enum xps_map_type {
+	XPS_CPUS = 0,
+	XPS_RXQS,
+	XPS_MAPS_MAX,
+};
+
 #ifdef CONFIG_XPS
 /*
  * This structure holds an XPS map which can be of variable length.  The
@@ -1773,8 +1780,7 @@ enum netdev_ml_priv_type {
  *	@tx_queue_len:		Max frames per queue allowed
  *	@tx_global_lock: 	XXX: need comments on this one
  *	@xdp_bulkq:		XDP device bulk queue
- *	@xps_cpus_map:		all CPUs map for XPS device
- *	@xps_rxqs_map:		all RXQs map for XPS device
+ *	@xps_maps:		all CPUs/RXQs maps for XPS device
  *
  *	@xps_maps:	XXX: need comments on this one
  *	@miniq_egress:		clsact qdisc specific data for
@@ -2070,8 +2076,7 @@ struct net_device {
 	struct xdp_dev_bulk_queue __percpu *xdp_bulkq;
 
 #ifdef CONFIG_XPS
-	struct xps_dev_maps __rcu *xps_cpus_map;
-	struct xps_dev_maps __rcu *xps_rxqs_map;
+	struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX];
 #endif
 #ifdef CONFIG_NET_CLS_ACT
 	struct mini_Qdisc __rcu	*miniq_egress;
@@ -3701,7 +3706,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 			u16 index);
 int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
-			  u16 index, bool is_rxqs_map);
+			  u16 index, enum xps_map_type type);
 
 /**
  *	netif_attr_test_mask - Test a CPU or Rx queue set in a mask
@@ -3796,7 +3801,7 @@ static inline int netif_set_xps_queue(struct net_device *dev,
 
 static inline int __netif_set_xps_queue(struct net_device *dev,
 					const unsigned long *mask,
-					u16 index, bool is_rxqs_map)
+					u16 index, enum xps_map_type type)
 {
 	return 0;
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index 3ed8cb3a4061..af57e32bb543 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2511,31 +2511,34 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
 
 static void reset_xps_maps(struct net_device *dev,
 			   struct xps_dev_maps *dev_maps,
-			   bool is_rxqs_map)
+			   enum xps_map_type type)
 {
-	if (is_rxqs_map) {
-		static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
-		RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
-	} else {
-		RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
-	}
 	static_key_slow_dec_cpuslocked(&xps_needed);
+	if (type == XPS_RXQS)
+		static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
+
+	RCU_INIT_POINTER(dev->xps_maps[type], NULL);
+
 	kfree_rcu(dev_maps, rcu);
 }
 
-static void clean_xps_maps(struct net_device *dev,
-			   struct xps_dev_maps *dev_maps, u16 offset, u16 count,
-			   bool is_rxqs_map)
+static void clean_xps_maps(struct net_device *dev, enum xps_map_type type,
+			   u16 offset, u16 count)
 {
+	struct xps_dev_maps *dev_maps;
 	bool active = false;
 	int i, j;
 
+	dev_maps = xmap_dereference(dev->xps_maps[type]);
+	if (!dev_maps)
+		return;
+
 	for (j = 0; j < dev_maps->nr_ids; j++)
 		active |= remove_xps_queue_cpu(dev, dev_maps, j, offset, count);
 	if (!active)
-		reset_xps_maps(dev, dev_maps, is_rxqs_map);
+		reset_xps_maps(dev, dev_maps, type);
 
-	if (!is_rxqs_map) {
+	if (type == XPS_CPUS) {
 		for (i = offset + (count - 1); count--; i--)
 			netdev_queue_numa_node_write(
 				netdev_get_tx_queue(dev, i), NUMA_NO_NODE);
@@ -2545,27 +2548,17 @@ static void clean_xps_maps(struct net_device *dev,
 static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
 				   u16 count)
 {
-	struct xps_dev_maps *dev_maps;
-
 	if (!static_key_false(&xps_needed))
 		return;
 
 	cpus_read_lock();
 	mutex_lock(&xps_map_mutex);
 
-	if (static_key_false(&xps_rxqs_needed)) {
-		dev_maps = xmap_dereference(dev->xps_rxqs_map);
-		if (dev_maps)
-			clean_xps_maps(dev, dev_maps, offset, count, true);
-	}
-
-	dev_maps = xmap_dereference(dev->xps_cpus_map);
-	if (!dev_maps)
-		goto out_no_maps;
+	if (static_key_false(&xps_rxqs_needed))
+		clean_xps_maps(dev, XPS_RXQS, offset, count);
 
-	clean_xps_maps(dev, dev_maps, offset, count, false);
+	clean_xps_maps(dev, XPS_CPUS, offset, count);
 
-out_no_maps:
 	mutex_unlock(&xps_map_mutex);
 	cpus_read_unlock();
 }
@@ -2617,7 +2610,7 @@ static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
 
 /* Must be called under cpus_read_lock */
 int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
-			  u16 index, bool is_rxqs_map)
+			  u16 index, enum xps_map_type type)
 {
 	struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
 	const unsigned long *online_mask = NULL;
@@ -2642,15 +2635,15 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 	}
 
 	mutex_lock(&xps_map_mutex);
-	if (is_rxqs_map) {
+
+	dev_maps = xmap_dereference(dev->xps_maps[type]);
+	if (type == XPS_RXQS) {
 		maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
-		dev_maps = xmap_dereference(dev->xps_rxqs_map);
 		nr_ids = dev->num_rx_queues;
 	} else {
 		maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
 		if (num_possible_cpus() > 1)
 			online_mask = cpumask_bits(cpu_online_mask);
-		dev_maps = xmap_dereference(dev->xps_cpus_map);
 		nr_ids = nr_cpu_ids;
 	}
 
@@ -2683,7 +2676,7 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 		tci = j * num_tc + tc;
 		map = copy ? xmap_dereference(dev_maps->attr_map[tci]) : NULL;
 
-		map = expand_xps_map(map, j, index, is_rxqs_map);
+		map = expand_xps_map(map, j, index, type == XPS_RXQS);
 		if (!map)
 			goto error;
 
@@ -2696,7 +2689,7 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 	if (!dev_maps) {
 		/* Increment static keys at most once per type */
 		static_key_slow_inc_cpuslocked(&xps_needed);
-		if (is_rxqs_map)
+		if (type == XPS_RXQS)
 			static_key_slow_inc_cpuslocked(&xps_rxqs_needed);
 	}
 
@@ -2725,7 +2718,7 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 			if (pos == map->len)
 				map->queues[map->len++] = index;
 #ifdef CONFIG_NUMA
-			if (!is_rxqs_map) {
+			if (type == XPS_CPUS) {
 				if (numa_node_id == -2)
 					numa_node_id = cpu_to_node(j);
 				else if (numa_node_id != cpu_to_node(j))
@@ -2746,10 +2739,7 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 		}
 	}
 
-	if (is_rxqs_map)
-		rcu_assign_pointer(dev->xps_rxqs_map, new_dev_maps);
-	else
-		rcu_assign_pointer(dev->xps_cpus_map, new_dev_maps);
+	rcu_assign_pointer(dev->xps_maps[type], new_dev_maps);
 
 	/* Cleanup old maps */
 	if (!dev_maps)
@@ -2778,12 +2768,11 @@ out_no_old_maps:
 	active = true;
 
 out_no_new_maps:
-	if (!is_rxqs_map) {
+	if (type == XPS_CPUS)
 		/* update Tx queue numa node */
 		netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
 					     (numa_node_id >= 0) ?
 					     numa_node_id : NUMA_NO_NODE);
-	}
 
 	if (!dev_maps)
 		goto out_no_maps;
@@ -2801,7 +2790,7 @@ out_no_new_maps:
 
 	/* free map if not active */
 	if (!active)
-		reset_xps_maps(dev, dev_maps, is_rxqs_map);
+		reset_xps_maps(dev, dev_maps, type);
 
 out_no_maps:
 	mutex_unlock(&xps_map_mutex);
@@ -2833,7 +2822,7 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 	int ret;
 
 	cpus_read_lock();
-	ret =  __netif_set_xps_queue(dev, cpumask_bits(mask), index, false);
+	ret =  __netif_set_xps_queue(dev, cpumask_bits(mask), index, XPS_CPUS);
 	cpus_read_unlock();
 
 	return ret;
@@ -3983,7 +3972,7 @@ static int get_xps_queue(struct net_device *dev, struct net_device *sb_dev,
 	if (!static_key_false(&xps_rxqs_needed))
 		goto get_cpus_map;
 
-	dev_maps = rcu_dereference(sb_dev->xps_rxqs_map);
+	dev_maps = rcu_dereference(sb_dev->xps_maps[XPS_RXQS]);
 	if (dev_maps) {
 		int tci = sk_rx_queue_get(sk);
 
@@ -3994,7 +3983,7 @@ static int get_xps_queue(struct net_device *dev, struct net_device *sb_dev,
 
 get_cpus_map:
 	if (queue_index < 0) {
-		dev_maps = rcu_dereference(sb_dev->xps_cpus_map);
+		dev_maps = rcu_dereference(sb_dev->xps_maps[XPS_CPUS]);
 		if (dev_maps) {
 			unsigned int tci = skb->sender_cpu - 1;
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index c762c435ff76..ca1f3b63cfad 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1388,7 +1388,7 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
 	}
 
 	rcu_read_lock();
-	dev_maps = rcu_dereference(dev->xps_cpus_map);
+	dev_maps = rcu_dereference(dev->xps_maps[XPS_CPUS]);
 	nr_ids = dev_maps ? dev_maps->nr_ids : nr_cpu_ids;
 
 	mask = bitmap_zalloc(nr_ids, GFP_KERNEL);
@@ -1492,7 +1492,7 @@ static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
 	}
 
 	rcu_read_lock();
-	dev_maps = rcu_dereference(dev->xps_rxqs_map);
+	dev_maps = rcu_dereference(dev->xps_maps[XPS_RXQS]);
 	nr_ids = dev_maps ? dev_maps->nr_ids : dev->num_rx_queues;
 
 	mask = bitmap_zalloc(nr_ids, GFP_KERNEL);
@@ -1566,7 +1566,7 @@ static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
 	}
 
 	cpus_read_lock();
-	err = __netif_set_xps_queue(dev, mask, index, true);
+	err = __netif_set_xps_queue(dev, mask, index, XPS_RXQS);
 	cpus_read_unlock();
 
 	rtnl_unlock();
-- 
cgit v1.2.3


From 402fbb992e13fc57e917ac7c0a07a8a3e2385858 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Thu, 18 Mar 2021 19:37:47 +0100
Subject: net: add an helper to copy xps maps to the new dev_maps

This patch adds an helper, xps_copy_dev_maps, to copy maps from dev_maps
to new_dev_maps at a given index. The logic should be the same, with an
improved code readability and maintenance.

Signed-off-by: Antoine Tenart <atenart@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 45 +++++++++++++++++++++++++--------------------
 1 file changed, 25 insertions(+), 20 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index af57e32bb543..00f6b41e11d8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2608,6 +2608,25 @@ static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
 	return new_map;
 }
 
+/* Copy xps maps at a given index */
+static void xps_copy_dev_maps(struct xps_dev_maps *dev_maps,
+			      struct xps_dev_maps *new_dev_maps, int index,
+			      int tc, bool skip_tc)
+{
+	int i, tci = index * dev_maps->num_tc;
+	struct xps_map *map;
+
+	/* copy maps belonging to foreign traffic classes */
+	for (i = 0; i < dev_maps->num_tc; i++, tci++) {
+		if (i == tc && skip_tc)
+			continue;
+
+		/* fill in the new device map from the old device map */
+		map = xmap_dereference(dev_maps->attr_map[tci]);
+		RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
+	}
+}
+
 /* Must be called under cpus_read_lock */
 int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 			  u16 index, enum xps_map_type type)
@@ -2694,23 +2713,16 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 	}
 
 	for (j = 0; j < nr_ids; j++) {
-		/* copy maps belonging to foreign traffic classes */
-		for (i = tc, tci = j * num_tc; copy && i--; tci++) {
-			/* fill in the new device map from the old device map */
-			map = xmap_dereference(dev_maps->attr_map[tci]);
-			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
-		}
+		bool skip_tc = false;
 
-		/* We need to explicitly update tci as prevous loop
-		 * could break out early if dev_maps is NULL.
-		 */
 		tci = j * num_tc + tc;
-
 		if (netif_attr_test_mask(j, mask, nr_ids) &&
 		    netif_attr_test_online(j, online_mask, nr_ids)) {
 			/* add tx-queue to CPU/rx-queue maps */
 			int pos = 0;
 
+			skip_tc = true;
+
 			map = xmap_dereference(new_dev_maps->attr_map[tci]);
 			while ((pos < map->len) && (map->queues[pos] != index))
 				pos++;
@@ -2725,18 +2737,11 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 					numa_node_id = -1;
 			}
 #endif
-		} else if (copy) {
-			/* fill in the new device map from the old device map */
-			map = xmap_dereference(dev_maps->attr_map[tci]);
-			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
 		}
 
-		/* copy maps belonging to foreign traffic classes */
-		for (i = num_tc - tc, tci++; copy && --i; tci++) {
-			/* fill in the new device map from the old device map */
-			map = xmap_dereference(dev_maps->attr_map[tci]);
-			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
-		}
+		if (copy)
+			xps_copy_dev_maps(dev_maps, new_dev_maps, j, tc,
+					  skip_tc);
 	}
 
 	rcu_assign_pointer(dev->xps_maps[type], new_dev_maps);
-- 
cgit v1.2.3


From 132f743b01b85b8fae7e1f298bfd81a66b9389a8 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Thu, 18 Mar 2021 19:37:48 +0100
Subject: net: improve queue removal readability in __netif_set_xps_queue

Improve the readability of the loop removing tx-queue from unused
CPUs/rx-queues in __netif_set_xps_queue. The change should only be
cosmetic.

Signed-off-by: Antoine Tenart <atenart@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 00f6b41e11d8..c8ce2dfcc97d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2784,13 +2784,16 @@ out_no_new_maps:
 
 	/* removes tx-queue from unused CPUs/rx-queues */
 	for (j = 0; j < dev_maps->nr_ids; j++) {
-		for (i = tc, tci = j * dev_maps->num_tc; i--; tci++)
-			active |= remove_xps_queue(dev_maps, tci, index);
-		if (!netif_attr_test_mask(j, mask, dev_maps->nr_ids) ||
-		    !netif_attr_test_online(j, online_mask, dev_maps->nr_ids))
-			active |= remove_xps_queue(dev_maps, tci, index);
-		for (i = dev_maps->num_tc - tc, tci++; --i; tci++)
+		tci = j * dev_maps->num_tc;
+
+		for (i = 0; i < dev_maps->num_tc; i++, tci++) {
+			if (i == tc &&
+			    netif_attr_test_mask(j, mask, dev_maps->nr_ids) &&
+			    netif_attr_test_online(j, online_mask, dev_maps->nr_ids))
+				continue;
+
 			active |= remove_xps_queue(dev_maps, tci, index);
+		}
 	}
 
 	/* free map if not active */
-- 
cgit v1.2.3


From d7be87a687cc261d663dcf97c01056f71398f9f9 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Thu, 18 Mar 2021 19:37:49 +0100
Subject: net-sysfs: move the rtnl unlock up in the xps show helpers

Now that nr_ids and num_tc are stored in the xps dev_maps, which are RCU
protected, we do not have the need to protect the maps in the rtnl lock.
Move the rtnl unlock up so we reduce the rtnl locking section.

We also increase the reference count on the subordinate device if any,
as we don't want this device to be freed while we use it (now that the
rtnl lock isn't protecting it in the whole function).

Signed-off-by: Antoine Tenart <atenart@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index ca1f3b63cfad..094fea082649 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1383,10 +1383,14 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
 
 	tc = netdev_txq_to_tc(dev, index);
 	if (tc < 0) {
-		ret = -EINVAL;
-		goto err_rtnl_unlock;
+		rtnl_unlock();
+		return -EINVAL;
 	}
 
+	/* Make sure the subordinate device can't be freed */
+	get_device(&dev->dev);
+	rtnl_unlock();
+
 	rcu_read_lock();
 	dev_maps = rcu_dereference(dev->xps_maps[XPS_CPUS]);
 	nr_ids = dev_maps ? dev_maps->nr_ids : nr_cpu_ids;
@@ -1417,8 +1421,7 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
 	}
 out_no_maps:
 	rcu_read_unlock();
-
-	rtnl_unlock();
+	put_device(&dev->dev);
 
 	len = bitmap_print_to_pagebuf(false, buf, mask, nr_ids);
 	bitmap_free(mask);
@@ -1426,8 +1429,7 @@ out_no_maps:
 
 err_rcu_unlock:
 	rcu_read_unlock();
-err_rtnl_unlock:
-	rtnl_unlock();
+	put_device(&dev->dev);
 	return ret;
 }
 
@@ -1486,10 +1488,9 @@ static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
 		return restart_syscall();
 
 	tc = netdev_txq_to_tc(dev, index);
-	if (tc < 0) {
-		ret = -EINVAL;
-		goto err_rtnl_unlock;
-	}
+	rtnl_unlock();
+	if (tc < 0)
+		return -EINVAL;
 
 	rcu_read_lock();
 	dev_maps = rcu_dereference(dev->xps_maps[XPS_RXQS]);
@@ -1522,8 +1523,6 @@ static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
 out_no_maps:
 	rcu_read_unlock();
 
-	rtnl_unlock();
-
 	len = bitmap_print_to_pagebuf(false, buf, mask, nr_ids);
 	bitmap_free(mask);
 
@@ -1531,8 +1530,6 @@ out_no_maps:
 
 err_rcu_unlock:
 	rcu_read_unlock();
-err_rtnl_unlock:
-	rtnl_unlock();
 	return ret;
 }
 
-- 
cgit v1.2.3


From 2db6cdaebac83c13acb165594b09282fa03cec89 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Thu, 18 Mar 2021 19:37:50 +0100
Subject: net-sysfs: move the xps cpus/rxqs retrieval in a common function

Most of the xps_cpus_show and xps_rxqs_show functions share the same
logic. Having it in two different functions does not help maintenance.
This patch moves their common logic into a new function, xps_queue_show,
to improve this.

Signed-off-by: Antoine Tenart <atenart@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 125 ++++++++++++++++++++-------------------------------
 1 file changed, 48 insertions(+), 77 deletions(-)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 094fea082649..562a42fcd437 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1361,44 +1361,27 @@ static const struct attribute_group dql_group = {
 #endif /* CONFIG_BQL */
 
 #ifdef CONFIG_XPS
-static ssize_t xps_cpus_show(struct netdev_queue *queue,
-			     char *buf)
+static ssize_t xps_queue_show(struct net_device *dev, unsigned int index,
+			      int tc, char *buf, enum xps_map_type type)
 {
-	struct net_device *dev = queue->dev;
 	struct xps_dev_maps *dev_maps;
-	unsigned int index, nr_ids;
-	int j, len, ret, tc = 0;
 	unsigned long *mask;
-
-	if (!netif_is_multiqueue(dev))
-		return -ENOENT;
-
-	index = get_netdev_queue_index(queue);
-
-	if (!rtnl_trylock())
-		return restart_syscall();
-
-	/* If queue belongs to subordinate dev use its map */
-	dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
-
-	tc = netdev_txq_to_tc(dev, index);
-	if (tc < 0) {
-		rtnl_unlock();
-		return -EINVAL;
-	}
-
-	/* Make sure the subordinate device can't be freed */
-	get_device(&dev->dev);
-	rtnl_unlock();
+	unsigned int nr_ids;
+	int j, len;
 
 	rcu_read_lock();
-	dev_maps = rcu_dereference(dev->xps_maps[XPS_CPUS]);
-	nr_ids = dev_maps ? dev_maps->nr_ids : nr_cpu_ids;
+	dev_maps = rcu_dereference(dev->xps_maps[type]);
+
+	/* Default to nr_cpu_ids/dev->num_rx_queues and do not just return 0
+	 * when dev_maps hasn't been allocated yet, to be backward compatible.
+	 */
+	nr_ids = dev_maps ? dev_maps->nr_ids :
+		 (type == XPS_CPUS ? nr_cpu_ids : dev->num_rx_queues);
 
 	mask = bitmap_zalloc(nr_ids, GFP_KERNEL);
 	if (!mask) {
-		ret = -ENOMEM;
-		goto err_rcu_unlock;
+		rcu_read_unlock();
+		return -ENOMEM;
 	}
 
 	if (!dev_maps || tc >= dev_maps->num_tc)
@@ -1421,16 +1404,44 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
 	}
 out_no_maps:
 	rcu_read_unlock();
-	put_device(&dev->dev);
 
 	len = bitmap_print_to_pagebuf(false, buf, mask, nr_ids);
 	bitmap_free(mask);
+
 	return len < PAGE_SIZE ? len : -EINVAL;
+}
+
+static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf)
+{
+	struct net_device *dev = queue->dev;
+	unsigned int index;
+	int len, tc;
+
+	if (!netif_is_multiqueue(dev))
+		return -ENOENT;
+
+	index = get_netdev_queue_index(queue);
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	/* If queue belongs to subordinate dev use its map */
+	dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
+
+	tc = netdev_txq_to_tc(dev, index);
+	if (tc < 0) {
+		rtnl_unlock();
+		return -EINVAL;
+	}
+
+	/* Make sure the subordinate device can't be freed */
+	get_device(&dev->dev);
+	rtnl_unlock();
+
+	len = xps_queue_show(dev, index, tc, buf, XPS_CPUS);
 
-err_rcu_unlock:
-	rcu_read_unlock();
 	put_device(&dev->dev);
-	return ret;
+	return len;
 }
 
 static ssize_t xps_cpus_store(struct netdev_queue *queue,
@@ -1477,10 +1488,8 @@ static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init
 static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
 {
 	struct net_device *dev = queue->dev;
-	struct xps_dev_maps *dev_maps;
-	unsigned int index, nr_ids;
-	int j, len, ret, tc = 0;
-	unsigned long *mask;
+	unsigned int index;
+	int tc;
 
 	index = get_netdev_queue_index(queue);
 
@@ -1492,45 +1501,7 @@ static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
 	if (tc < 0)
 		return -EINVAL;
 
-	rcu_read_lock();
-	dev_maps = rcu_dereference(dev->xps_maps[XPS_RXQS]);
-	nr_ids = dev_maps ? dev_maps->nr_ids : dev->num_rx_queues;
-
-	mask = bitmap_zalloc(nr_ids, GFP_KERNEL);
-	if (!mask) {
-		ret = -ENOMEM;
-		goto err_rcu_unlock;
-	}
-
-	if (!dev_maps || tc >= dev_maps->num_tc)
-		goto out_no_maps;
-
-	for (j = 0; j < nr_ids; j++) {
-		int i, tci = j * dev_maps->num_tc + tc;
-		struct xps_map *map;
-
-		map = rcu_dereference(dev_maps->attr_map[tci]);
-		if (!map)
-			continue;
-
-		for (i = map->len; i--;) {
-			if (map->queues[i] == index) {
-				set_bit(j, mask);
-				break;
-			}
-		}
-	}
-out_no_maps:
-	rcu_read_unlock();
-
-	len = bitmap_print_to_pagebuf(false, buf, mask, nr_ids);
-	bitmap_free(mask);
-
-	return len < PAGE_SIZE ? len : -EINVAL;
-
-err_rcu_unlock:
-	rcu_read_unlock();
-	return ret;
+	return xps_queue_show(dev, index, tc, buf, XPS_RXQS);
 }
 
 static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
-- 
cgit v1.2.3


From 2d05bf015308275f7c67a780f70026077285cfc0 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Thu, 18 Mar 2021 19:37:51 +0100
Subject: net: fix use after free in xps

When setting up an new dev_maps in __netif_set_xps_queue, we remove and
free maps from unused CPUs/rx-queues near the end of the function; by
calling remove_xps_queue. However it's possible those maps are also part
of the old not-freed-yet dev_maps, which might be used concurrently.
When that happens, a map can be freed while its corresponding entry in
the old dev_maps table isn't NULLed, leading to: "BUG: KASAN:
use-after-free" in different places.

This fixes the map freeing logic for unused CPUs/rx-queues, to also NULL
the map entries from the old dev_maps table.

Signed-off-by: Antoine Tenart <atenart@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index c8ce2dfcc97d..d5f6ba209f1e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2460,7 +2460,7 @@ static DEFINE_MUTEX(xps_map_mutex);
 	rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
 
 static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
-			     int tci, u16 index)
+			     struct xps_dev_maps *old_maps, int tci, u16 index)
 {
 	struct xps_map *map = NULL;
 	int pos;
@@ -2479,6 +2479,8 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
 			break;
 		}
 
+		if (old_maps)
+			RCU_INIT_POINTER(old_maps->attr_map[tci], NULL);
 		RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
 		kfree_rcu(map, rcu);
 		return false;
@@ -2499,7 +2501,7 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
 		int i, j;
 
 		for (i = count, j = offset; i--; j++) {
-			if (!remove_xps_queue(dev_maps, tci, j))
+			if (!remove_xps_queue(dev_maps, NULL, tci, j))
 				break;
 		}
 
@@ -2631,7 +2633,7 @@ static void xps_copy_dev_maps(struct xps_dev_maps *dev_maps,
 int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 			  u16 index, enum xps_map_type type)
 {
-	struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
+	struct xps_dev_maps *dev_maps, *new_dev_maps = NULL, *old_dev_maps = NULL;
 	const unsigned long *online_mask = NULL;
 	bool active = false, copy = false;
 	int i, j, tci, numa_node_id = -2;
@@ -2766,7 +2768,7 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 		}
 	}
 
-	kfree_rcu(dev_maps, rcu);
+	old_dev_maps = dev_maps;
 
 out_no_old_maps:
 	dev_maps = new_dev_maps;
@@ -2792,10 +2794,15 @@ out_no_new_maps:
 			    netif_attr_test_online(j, online_mask, dev_maps->nr_ids))
 				continue;
 
-			active |= remove_xps_queue(dev_maps, tci, index);
+			active |= remove_xps_queue(dev_maps,
+						   copy ? old_dev_maps : NULL,
+						   tci, index);
 		}
 	}
 
+	if (old_dev_maps)
+		kfree_rcu(old_dev_maps, rcu);
+
 	/* free map if not active */
 	if (!active)
 		reset_xps_maps(dev, dev_maps, type);
-- 
cgit v1.2.3


From 75b2758abc355c410dd335d45b2d40f920e27cde Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Thu, 18 Mar 2021 19:37:52 +0100
Subject: net: NULL the old xps map entries when freeing them

In __netif_set_xps_queue, old map entries from the old dev_maps are
freed but their corresponding entry in the old dev_maps aren't NULLed.
Fix this.

Signed-off-by: Antoine Tenart <atenart@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/dev.c b/net/core/dev.c
index d5f6ba209f1e..4961fc2e9b19 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2764,6 +2764,7 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 					continue;
 			}
 
+			RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
 			kfree_rcu(map, rcu);
 		}
 	}
-- 
cgit v1.2.3


From e14ef4bf011192b61b48c4f3a35b3041140073ff Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 18 Mar 2021 12:40:25 -0700
Subject: libbpf: Expose btf_type_by_id() internally

btf_type_by_id() is internal-only convenience API returning non-const pointer
to struct btf_type. Expose it outside of btf.c for re-use.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210318194036.3521577-2-andrii@kernel.org
---
 tools/lib/bpf/btf.c             | 2 +-
 tools/lib/bpf/libbpf_internal.h | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 3aa58f2ac183..e0b0a78b04fe 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -435,7 +435,7 @@ const struct btf *btf__base_btf(const struct btf *btf)
 }
 
 /* internal helper returning non-const pointer to a type */
-static struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id)
+struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id)
 {
 	if (type_id == 0)
 		return &btf_void;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 343f6eb05637..d09860e435c8 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -107,6 +107,11 @@ static inline void *libbpf_reallocarray(void *ptr, size_t nmemb, size_t size)
 	return realloc(ptr, total);
 }
 
+struct btf;
+struct btf_type;
+
+struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id);
+
 void *btf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
 		  size_t cur_cnt, size_t max_cnt, size_t add_cnt);
 int btf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt);
-- 
cgit v1.2.3


From f36e99a45dbe76949eb99bba413c67eda5cd2591 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 18 Mar 2021 12:40:26 -0700
Subject: libbpf: Generalize BTF and BTF.ext type ID and strings iteration

Extract and generalize the logic to iterate BTF type ID and string offset
fields within BTF types and .BTF.ext data. Expose this internally in libbpf
for re-use by bpf_linker.

Additionally, complete strings deduplication handling for BTF.ext (e.g., CO-RE
access strings), which was previously missing. There previously was no
case of deduplicating .BTF.ext data, but bpf_linker is going to use it.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210318194036.3521577-3-andrii@kernel.org
---
 tools/lib/bpf/btf.c             | 393 ++++++++++++++++++++++------------------
 tools/lib/bpf/libbpf_internal.h |   7 +
 2 files changed, 228 insertions(+), 172 deletions(-)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index e0b0a78b04fe..e137781f9bc6 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -3155,95 +3155,28 @@ done:
 	return d;
 }
 
-typedef int (*str_off_fn_t)(__u32 *str_off_ptr, void *ctx);
-
 /*
  * Iterate over all possible places in .BTF and .BTF.ext that can reference
  * string and pass pointer to it to a provided callback `fn`.
  */
-static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx)
+static int btf_for_each_str_off(struct btf_dedup *d, str_off_visit_fn fn, void *ctx)
 {
-	void *line_data_cur, *line_data_end;
-	int i, j, r, rec_size;
-	struct btf_type *t;
+	int i, r;
 
 	for (i = 0; i < d->btf->nr_types; i++) {
-		t = btf_type_by_id(d->btf, d->btf->start_id + i);
-		r = fn(&t->name_off, ctx);
+		struct btf_type *t = btf_type_by_id(d->btf, d->btf->start_id + i);
+
+		r = btf_type_visit_str_offs(t, fn, ctx);
 		if (r)
 			return r;
-
-		switch (btf_kind(t)) {
-		case BTF_KIND_STRUCT:
-		case BTF_KIND_UNION: {
-			struct btf_member *m = btf_members(t);
-			__u16 vlen = btf_vlen(t);
-
-			for (j = 0; j < vlen; j++) {
-				r = fn(&m->name_off, ctx);
-				if (r)
-					return r;
-				m++;
-			}
-			break;
-		}
-		case BTF_KIND_ENUM: {
-			struct btf_enum *m = btf_enum(t);
-			__u16 vlen = btf_vlen(t);
-
-			for (j = 0; j < vlen; j++) {
-				r = fn(&m->name_off, ctx);
-				if (r)
-					return r;
-				m++;
-			}
-			break;
-		}
-		case BTF_KIND_FUNC_PROTO: {
-			struct btf_param *m = btf_params(t);
-			__u16 vlen = btf_vlen(t);
-
-			for (j = 0; j < vlen; j++) {
-				r = fn(&m->name_off, ctx);
-				if (r)
-					return r;
-				m++;
-			}
-			break;
-		}
-		default:
-			break;
-		}
 	}
 
 	if (!d->btf_ext)
 		return 0;
 
-	line_data_cur = d->btf_ext->line_info.info;
-	line_data_end = d->btf_ext->line_info.info + d->btf_ext->line_info.len;
-	rec_size = d->btf_ext->line_info.rec_size;
-
-	while (line_data_cur < line_data_end) {
-		struct btf_ext_info_sec *sec = line_data_cur;
-		struct bpf_line_info_min *line_info;
-		__u32 num_info = sec->num_info;
-
-		r = fn(&sec->sec_name_off, ctx);
-		if (r)
-			return r;
-
-		line_data_cur += sizeof(struct btf_ext_info_sec);
-		for (i = 0; i < num_info; i++) {
-			line_info = line_data_cur;
-			r = fn(&line_info->file_name_off, ctx);
-			if (r)
-				return r;
-			r = fn(&line_info->line_off, ctx);
-			if (r)
-				return r;
-			line_data_cur += rec_size;
-		}
-	}
+	r = btf_ext_visit_str_offs(d->btf_ext, fn, ctx);
+	if (r)
+		return r;
 
 	return 0;
 }
@@ -4498,15 +4431,18 @@ static int btf_dedup_compact_types(struct btf_dedup *d)
  * then mapping it to a deduplicated type ID, stored in btf_dedup->hypot_map,
  * which is populated during compaction phase.
  */
-static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id)
+static int btf_dedup_remap_type_id(__u32 *type_id, void *ctx)
 {
+	struct btf_dedup *d = ctx;
 	__u32 resolved_type_id, new_type_id;
 
-	resolved_type_id = resolve_type_id(d, type_id);
+	resolved_type_id = resolve_type_id(d, *type_id);
 	new_type_id = d->hypot_map[resolved_type_id];
 	if (new_type_id > BTF_MAX_NR_TYPES)
 		return -EINVAL;
-	return new_type_id;
+
+	*type_id = new_type_id;
+	return 0;
 }
 
 /*
@@ -4519,109 +4455,25 @@ static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id)
  * referenced from any BTF type (e.g., struct fields, func proto args, etc) to
  * their final deduped type IDs.
  */
-static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)
+static int btf_dedup_remap_types(struct btf_dedup *d)
 {
-	struct btf_type *t = btf_type_by_id(d->btf, type_id);
 	int i, r;
 
-	switch (btf_kind(t)) {
-	case BTF_KIND_INT:
-	case BTF_KIND_ENUM:
-	case BTF_KIND_FLOAT:
-		break;
-
-	case BTF_KIND_FWD:
-	case BTF_KIND_CONST:
-	case BTF_KIND_VOLATILE:
-	case BTF_KIND_RESTRICT:
-	case BTF_KIND_PTR:
-	case BTF_KIND_TYPEDEF:
-	case BTF_KIND_FUNC:
-	case BTF_KIND_VAR:
-		r = btf_dedup_remap_type_id(d, t->type);
-		if (r < 0)
-			return r;
-		t->type = r;
-		break;
-
-	case BTF_KIND_ARRAY: {
-		struct btf_array *arr_info = btf_array(t);
-
-		r = btf_dedup_remap_type_id(d, arr_info->type);
-		if (r < 0)
-			return r;
-		arr_info->type = r;
-		r = btf_dedup_remap_type_id(d, arr_info->index_type);
-		if (r < 0)
-			return r;
-		arr_info->index_type = r;
-		break;
-	}
-
-	case BTF_KIND_STRUCT:
-	case BTF_KIND_UNION: {
-		struct btf_member *member = btf_members(t);
-		__u16 vlen = btf_vlen(t);
-
-		for (i = 0; i < vlen; i++) {
-			r = btf_dedup_remap_type_id(d, member->type);
-			if (r < 0)
-				return r;
-			member->type = r;
-			member++;
-		}
-		break;
-	}
-
-	case BTF_KIND_FUNC_PROTO: {
-		struct btf_param *param = btf_params(t);
-		__u16 vlen = btf_vlen(t);
+	for (i = 0; i < d->btf->nr_types; i++) {
+		struct btf_type *t = btf_type_by_id(d->btf, d->btf->start_id + i);
 
-		r = btf_dedup_remap_type_id(d, t->type);
-		if (r < 0)
+		r = btf_type_visit_type_ids(t, btf_dedup_remap_type_id, d);
+		if (r)
 			return r;
-		t->type = r;
-
-		for (i = 0; i < vlen; i++) {
-			r = btf_dedup_remap_type_id(d, param->type);
-			if (r < 0)
-				return r;
-			param->type = r;
-			param++;
-		}
-		break;
-	}
-
-	case BTF_KIND_DATASEC: {
-		struct btf_var_secinfo *var = btf_var_secinfos(t);
-		__u16 vlen = btf_vlen(t);
-
-		for (i = 0; i < vlen; i++) {
-			r = btf_dedup_remap_type_id(d, var->type);
-			if (r < 0)
-				return r;
-			var->type = r;
-			var++;
-		}
-		break;
-	}
-
-	default:
-		return -EINVAL;
 	}
 
-	return 0;
-}
+	if (!d->btf_ext)
+		return 0;
 
-static int btf_dedup_remap_types(struct btf_dedup *d)
-{
-	int i, r;
+	r = btf_ext_visit_type_ids(d->btf_ext, btf_dedup_remap_type_id, d);
+	if (r)
+		return r;
 
-	for (i = 0; i < d->btf->nr_types; i++) {
-		r = btf_dedup_remap_type(d, d->btf->start_id + i);
-		if (r < 0)
-			return r;
-	}
 	return 0;
 }
 
@@ -4675,3 +4527,200 @@ struct btf *libbpf_find_kernel_btf(void)
 	pr_warn("failed to find valid kernel BTF\n");
 	return ERR_PTR(-ESRCH);
 }
+
+int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx)
+{
+	int i, n, err;
+
+	switch (btf_kind(t)) {
+	case BTF_KIND_INT:
+	case BTF_KIND_FLOAT:
+	case BTF_KIND_ENUM:
+		return 0;
+
+	case BTF_KIND_FWD:
+	case BTF_KIND_CONST:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_RESTRICT:
+	case BTF_KIND_PTR:
+	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_FUNC:
+	case BTF_KIND_VAR:
+		return visit(&t->type, ctx);
+
+	case BTF_KIND_ARRAY: {
+		struct btf_array *a = btf_array(t);
+
+		err = visit(&a->type, ctx);
+		err = err ?: visit(&a->index_type, ctx);
+		return err;
+	}
+
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION: {
+		struct btf_member *m = btf_members(t);
+
+		for (i = 0, n = btf_vlen(t); i < n; i++, m++) {
+			err = visit(&m->type, ctx);
+			if (err)
+				return err;
+		}
+		return 0;
+	}
+
+	case BTF_KIND_FUNC_PROTO: {
+		struct btf_param *m = btf_params(t);
+
+		err = visit(&t->type, ctx);
+		if (err)
+			return err;
+		for (i = 0, n = btf_vlen(t); i < n; i++, m++) {
+			err = visit(&m->type, ctx);
+			if (err)
+				return err;
+		}
+		return 0;
+	}
+
+	case BTF_KIND_DATASEC: {
+		struct btf_var_secinfo *m = btf_var_secinfos(t);
+
+		for (i = 0, n = btf_vlen(t); i < n; i++, m++) {
+			err = visit(&m->type, ctx);
+			if (err)
+				return err;
+		}
+		return 0;
+	}
+
+	default:
+		return -EINVAL;
+	}
+}
+
+int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ctx)
+{
+	int i, n, err;
+
+	err = visit(&t->name_off, ctx);
+	if (err)
+		return err;
+
+	switch (btf_kind(t)) {
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION: {
+		struct btf_member *m = btf_members(t);
+
+		for (i = 0, n = btf_vlen(t); i < n; i++, m++) {
+			err = visit(&m->name_off, ctx);
+			if (err)
+				return err;
+		}
+		break;
+	}
+	case BTF_KIND_ENUM: {
+		struct btf_enum *m = btf_enum(t);
+
+		for (i = 0, n = btf_vlen(t); i < n; i++, m++) {
+			err = visit(&m->name_off, ctx);
+			if (err)
+				return err;
+		}
+		break;
+	}
+	case BTF_KIND_FUNC_PROTO: {
+		struct btf_param *m = btf_params(t);
+
+		for (i = 0, n = btf_vlen(t); i < n; i++, m++) {
+			err = visit(&m->name_off, ctx);
+			if (err)
+				return err;
+		}
+		break;
+	}
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx)
+{
+	const struct btf_ext_info *seg;
+	struct btf_ext_info_sec *sec;
+	int i, err;
+
+	seg = &btf_ext->func_info;
+	for_each_btf_ext_sec(seg, sec) {
+		struct bpf_func_info_min *rec;
+
+		for_each_btf_ext_rec(seg, sec, i, rec) {
+			err = visit(&rec->type_id, ctx);
+			if (err < 0)
+				return err;
+		}
+	}
+
+	seg = &btf_ext->core_relo_info;
+	for_each_btf_ext_sec(seg, sec) {
+		struct bpf_core_relo *rec;
+
+		for_each_btf_ext_rec(seg, sec, i, rec) {
+			err = visit(&rec->type_id, ctx);
+			if (err < 0)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void *ctx)
+{
+	const struct btf_ext_info *seg;
+	struct btf_ext_info_sec *sec;
+	int i, err;
+
+	seg = &btf_ext->func_info;
+	for_each_btf_ext_sec(seg, sec) {
+		err = visit(&sec->sec_name_off, ctx);
+		if (err)
+			return err;
+	}
+
+	seg = &btf_ext->line_info;
+	for_each_btf_ext_sec(seg, sec) {
+		struct bpf_line_info_min *rec;
+
+		err = visit(&sec->sec_name_off, ctx);
+		if (err)
+			return err;
+
+		for_each_btf_ext_rec(seg, sec, i, rec) {
+			err = visit(&rec->file_name_off, ctx);
+			if (err)
+				return err;
+			err = visit(&rec->line_off, ctx);
+			if (err)
+				return err;
+		}
+	}
+
+	seg = &btf_ext->core_relo_info;
+	for_each_btf_ext_sec(seg, sec) {
+		struct bpf_core_relo *rec;
+
+		err = visit(&sec->sec_name_off, ctx);
+		if (err)
+			return err;
+
+		for_each_btf_ext_rec(seg, sec, i, rec) {
+			err = visit(&rec->access_str_off, ctx);
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index d09860e435c8..97b6b9cc9839 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -356,4 +356,11 @@ struct bpf_core_relo {
 	enum bpf_core_relo_kind kind;
 };
 
+typedef int (*type_id_visit_fn)(__u32 *type_id, void *ctx);
+typedef int (*str_off_visit_fn)(__u32 *str_off, void *ctx);
+int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx);
+int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ctx);
+int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx);
+int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void *ctx);
+
 #endif /* __LIBBPF_LIBBPF_INTERNAL_H */
-- 
cgit v1.2.3


From 3b029e06f624efa90c9a4354e408acf134adb185 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 18 Mar 2021 12:40:27 -0700
Subject: libbpf: Rename internal memory-management helpers

Rename btf_add_mem() and btf_ensure_mem() helpers that abstract away details
of dynamically resizable memory to use libbpf_ prefix, as they are not
BTF-specific. No functional changes.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210318194036.3521577-4-andrii@kernel.org
---
 tools/lib/bpf/btf.c             | 24 ++++++++++++------------
 tools/lib/bpf/btf_dump.c        |  8 ++++----
 tools/lib/bpf/libbpf.c          |  4 ++--
 tools/lib/bpf/libbpf_internal.h |  6 +++---
 4 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index e137781f9bc6..c98d39710515 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -142,8 +142,8 @@ static inline __u64 ptr_to_u64(const void *ptr)
  * On success, memory pointer to the beginning of unused memory is returned.
  * On error, NULL is returned.
  */
-void *btf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
-		  size_t cur_cnt, size_t max_cnt, size_t add_cnt)
+void *libbpf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
+		     size_t cur_cnt, size_t max_cnt, size_t add_cnt)
 {
 	size_t new_cnt;
 	void *new_data;
@@ -179,14 +179,14 @@ void *btf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
 /* Ensure given dynamically allocated memory region has enough allocated space
  * to accommodate *need_cnt* elements of size *elem_sz* bytes each
  */
-int btf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt)
+int libbpf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt)
 {
 	void *p;
 
 	if (need_cnt <= *cap_cnt)
 		return 0;
 
-	p = btf_add_mem(data, cap_cnt, elem_sz, *cap_cnt, SIZE_MAX, need_cnt - *cap_cnt);
+	p = libbpf_add_mem(data, cap_cnt, elem_sz, *cap_cnt, SIZE_MAX, need_cnt - *cap_cnt);
 	if (!p)
 		return -ENOMEM;
 
@@ -197,8 +197,8 @@ static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off)
 {
 	__u32 *p;
 
-	p = btf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32),
-			btf->nr_types, BTF_MAX_NR_TYPES, 1);
+	p = libbpf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32),
+			   btf->nr_types, BTF_MAX_NR_TYPES, 1);
 	if (!p)
 		return -ENOMEM;
 
@@ -1586,8 +1586,8 @@ err_out:
 
 static void *btf_add_str_mem(struct btf *btf, size_t add_sz)
 {
-	return btf_add_mem(&btf->strs_data, &btf->strs_data_cap, 1,
-			   btf->hdr->str_len, BTF_MAX_STR_OFFSET, add_sz);
+	return libbpf_add_mem(&btf->strs_data, &btf->strs_data_cap, 1,
+			      btf->hdr->str_len, BTF_MAX_STR_OFFSET, add_sz);
 }
 
 /* Find an offset in BTF string section that corresponds to a given string *s*.
@@ -1683,8 +1683,8 @@ int btf__add_str(struct btf *btf, const char *s)
 
 static void *btf_add_type_mem(struct btf *btf, size_t add_sz)
 {
-	return btf_add_mem(&btf->types_data, &btf->types_data_cap, 1,
-			   btf->hdr->type_len, UINT_MAX, add_sz);
+	return libbpf_add_mem(&btf->types_data, &btf->types_data_cap, 1,
+			      btf->hdr->type_len, UINT_MAX, add_sz);
 }
 
 static __u32 btf_type_info(int kind, int vlen, int kflag)
@@ -3208,7 +3208,7 @@ static int strs_dedup_remap_str_off(__u32 *str_off_ptr, void *ctx)
 	len = strlen(s) + 1;
 
 	new_off = d->strs_len;
-	p = btf_add_mem(&d->strs_data, &d->strs_cap, 1, new_off, BTF_MAX_STR_OFFSET, len);
+	p = libbpf_add_mem(&d->strs_data, &d->strs_cap, 1, new_off, BTF_MAX_STR_OFFSET, len);
 	if (!p)
 		return -ENOMEM;
 
@@ -3264,7 +3264,7 @@ static int btf_dedup_strings(struct btf_dedup *d)
 	}
 
 	if (!d->btf->base_btf) {
-		s = btf_add_mem(&d->strs_data, &d->strs_cap, 1, d->strs_len, BTF_MAX_STR_OFFSET, 1);
+		s = libbpf_add_mem(&d->strs_data, &d->strs_cap, 1, d->strs_len, BTF_MAX_STR_OFFSET, 1);
 		if (!s)
 			return -ENOMEM;
 		/* initial empty string */
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 5e957fcceee6..b5dbd5adc0e8 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -166,11 +166,11 @@ static int btf_dump_resize(struct btf_dump *d)
 	if (last_id <= d->last_id)
 		return 0;
 
-	if (btf_ensure_mem((void **)&d->type_states, &d->type_states_cap,
-			   sizeof(*d->type_states), last_id + 1))
+	if (libbpf_ensure_mem((void **)&d->type_states, &d->type_states_cap,
+			      sizeof(*d->type_states), last_id + 1))
 		return -ENOMEM;
-	if (btf_ensure_mem((void **)&d->cached_names, &d->cached_names_cap,
-			   sizeof(*d->cached_names), last_id + 1))
+	if (libbpf_ensure_mem((void **)&d->cached_names, &d->cached_names_cap,
+			      sizeof(*d->cached_names), last_id + 1))
 		return -ENOMEM;
 
 	if (d->last_id == 0) {
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 2f351d3ad3e7..18ba37164e17 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -4867,8 +4867,8 @@ static int load_module_btfs(struct bpf_object *obj)
 			goto err_out;
 		}
 
-		err = btf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
-				     sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
+		err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
+				        sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
 		if (err)
 			goto err_out;
 
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 97b6b9cc9839..e0787900eeca 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -112,9 +112,9 @@ struct btf_type;
 
 struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id);
 
-void *btf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
-		  size_t cur_cnt, size_t max_cnt, size_t add_cnt);
-int btf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt);
+void *libbpf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
+		     size_t cur_cnt, size_t max_cnt, size_t add_cnt);
+int libbpf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt);
 
 static inline bool libbpf_validate_opts(const char *opts,
 					size_t opts_sz, size_t user_sz,
-- 
cgit v1.2.3


From 90d76d3ececc74bf43b2a97f178dadfa1e52be54 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 18 Mar 2021 12:40:28 -0700
Subject: libbpf: Extract internal set-of-strings datastructure APIs

Extract BTF logic for maintaining a set of strings data structure, used for
BTF strings section construction in writable mode, into separate re-usable
API. This data structure is going to be used by bpf_linker to maintains ELF
STRTAB section, which has the same layout as BTF strings section.

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210318194036.3521577-5-andrii@kernel.org
---
 tools/lib/bpf/Build    |   2 +-
 tools/lib/bpf/btf.c    | 255 ++++++++++++-------------------------------------
 tools/lib/bpf/strset.c | 176 ++++++++++++++++++++++++++++++++++
 tools/lib/bpf/strset.h |  21 ++++
 4 files changed, 259 insertions(+), 195 deletions(-)
 create mode 100644 tools/lib/bpf/strset.c
 create mode 100644 tools/lib/bpf/strset.h

diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 190366d05588..8136186a453f 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,3 +1,3 @@
 libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
 	    netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
-	    btf_dump.o ringbuf.o
+	    btf_dump.o ringbuf.o strset.o
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index c98d39710515..fe087592ad35 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -21,6 +21,7 @@
 #include "libbpf.h"
 #include "libbpf_internal.h"
 #include "hashmap.h"
+#include "strset.h"
 
 #define BTF_MAX_NR_TYPES 0x7fffffffU
 #define BTF_MAX_STR_OFFSET 0x7fffffffU
@@ -67,7 +68,7 @@ struct btf {
 	 * |             |            |
 	 * hdr           |            |
 	 * types_data----+            |
-	 * strs_data------------------+
+	 * strset__data(strs_set)-----+
 	 *
 	 *               +----------+---------+-----------+
 	 *               |  Header  |  Types  |  Strings  |
@@ -105,20 +106,15 @@ struct btf {
 	 */
 	int start_str_off;
 
+	/* only one of strs_data or strs_set can be non-NULL, depending on
+	 * whether BTF is in a modifiable state (strs_set is used) or not
+	 * (strs_data points inside raw_data)
+	 */
 	void *strs_data;
-	size_t strs_data_cap; /* used size stored in hdr->str_len */
-
-	/* lookup index for each unique string in strings section */
-	struct hashmap *strs_hash;
+	/* a set of unique strings */
+	struct strset *strs_set;
 	/* whether strings are already deduplicated */
 	bool strs_deduped;
-	/* extra indirection layer to make strings hashmap work with stable
-	 * string offsets and ability to transparently choose between
-	 * btf->strs_data or btf_dedup->strs_data as a source of strings.
-	 * This is used for BTF strings dedup to transfer deduplicated strings
-	 * data back to struct btf without re-building strings index.
-	 */
-	void **strs_data_ptr;
 
 	/* BTF object FD, if loaded into kernel */
 	int fd;
@@ -738,7 +734,7 @@ void btf__free(struct btf *btf)
 		 */
 		free(btf->hdr);
 		free(btf->types_data);
-		free(btf->strs_data);
+		strset__free(btf->strs_set);
 	}
 	free(btf->raw_data);
 	free(btf->raw_data_swapped);
@@ -1246,6 +1242,11 @@ void btf__set_fd(struct btf *btf, int fd)
 	btf->fd = fd;
 }
 
+static const void *btf_strs_data(const struct btf *btf)
+{
+	return btf->strs_data ? btf->strs_data : strset__data(btf->strs_set);
+}
+
 static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian)
 {
 	struct btf_header *hdr = btf->hdr;
@@ -1286,7 +1287,7 @@ static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endi
 	}
 	p += hdr->type_len;
 
-	memcpy(p, btf->strs_data, hdr->str_len);
+	memcpy(p, btf_strs_data(btf), hdr->str_len);
 	p += hdr->str_len;
 
 	*size = data_sz;
@@ -1320,7 +1321,7 @@ const char *btf__str_by_offset(const struct btf *btf, __u32 offset)
 	if (offset < btf->start_str_off)
 		return btf__str_by_offset(btf->base_btf, offset);
 	else if (offset - btf->start_str_off < btf->hdr->str_len)
-		return btf->strs_data + (offset - btf->start_str_off);
+		return btf_strs_data(btf) + (offset - btf->start_str_off);
 	else
 		return NULL;
 }
@@ -1474,25 +1475,6 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
 	return 0;
 }
 
-static size_t strs_hash_fn(const void *key, void *ctx)
-{
-	const struct btf *btf = ctx;
-	const char *strs = *btf->strs_data_ptr;
-	const char *str = strs + (long)key;
-
-	return str_hash(str);
-}
-
-static bool strs_hash_equal_fn(const void *key1, const void *key2, void *ctx)
-{
-	const struct btf *btf = ctx;
-	const char *strs = *btf->strs_data_ptr;
-	const char *str1 = strs + (long)key1;
-	const char *str2 = strs + (long)key2;
-
-	return strcmp(str1, str2) == 0;
-}
-
 static void btf_invalidate_raw_data(struct btf *btf)
 {
 	if (btf->raw_data) {
@@ -1511,10 +1493,9 @@ static void btf_invalidate_raw_data(struct btf *btf)
  */
 static int btf_ensure_modifiable(struct btf *btf)
 {
-	void *hdr, *types, *strs, *strs_end, *s;
-	struct hashmap *hash = NULL;
-	long off;
-	int err;
+	void *hdr, *types;
+	struct strset *set = NULL;
+	int err = -ENOMEM;
 
 	if (btf_is_modifiable(btf)) {
 		/* any BTF modification invalidates raw_data */
@@ -1525,44 +1506,25 @@ static int btf_ensure_modifiable(struct btf *btf)
 	/* split raw data into three memory regions */
 	hdr = malloc(btf->hdr->hdr_len);
 	types = malloc(btf->hdr->type_len);
-	strs = malloc(btf->hdr->str_len);
-	if (!hdr || !types || !strs)
+	if (!hdr || !types)
 		goto err_out;
 
 	memcpy(hdr, btf->hdr, btf->hdr->hdr_len);
 	memcpy(types, btf->types_data, btf->hdr->type_len);
-	memcpy(strs, btf->strs_data, btf->hdr->str_len);
-
-	/* make hashmap below use btf->strs_data as a source of strings */
-	btf->strs_data_ptr = &btf->strs_data;
 
 	/* build lookup index for all strings */
-	hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, btf);
-	if (IS_ERR(hash)) {
-		err = PTR_ERR(hash);
-		hash = NULL;
+	set = strset__new(BTF_MAX_STR_OFFSET, btf->strs_data, btf->hdr->str_len);
+	if (IS_ERR(set)) {
+		err = PTR_ERR(set);
 		goto err_out;
 	}
 
-	strs_end = strs + btf->hdr->str_len;
-	for (off = 0, s = strs; s < strs_end; off += strlen(s) + 1, s = strs + off) {
-		/* hashmap__add() returns EEXIST if string with the same
-		 * content already is in the hash map
-		 */
-		err = hashmap__add(hash, (void *)off, (void *)off);
-		if (err == -EEXIST)
-			continue; /* duplicate */
-		if (err)
-			goto err_out;
-	}
-
 	/* only when everything was successful, update internal state */
 	btf->hdr = hdr;
 	btf->types_data = types;
 	btf->types_data_cap = btf->hdr->type_len;
-	btf->strs_data = strs;
-	btf->strs_data_cap = btf->hdr->str_len;
-	btf->strs_hash = hash;
+	btf->strs_data = NULL;
+	btf->strs_set = set;
 	/* if BTF was created from scratch, all strings are guaranteed to be
 	 * unique and deduplicated
 	 */
@@ -1577,17 +1539,10 @@ static int btf_ensure_modifiable(struct btf *btf)
 	return 0;
 
 err_out:
-	hashmap__free(hash);
+	strset__free(set);
 	free(hdr);
 	free(types);
-	free(strs);
-	return -ENOMEM;
-}
-
-static void *btf_add_str_mem(struct btf *btf, size_t add_sz)
-{
-	return libbpf_add_mem(&btf->strs_data, &btf->strs_data_cap, 1,
-			      btf->hdr->str_len, BTF_MAX_STR_OFFSET, add_sz);
+	return err;
 }
 
 /* Find an offset in BTF string section that corresponds to a given string *s*.
@@ -1598,34 +1553,23 @@ static void *btf_add_str_mem(struct btf *btf, size_t add_sz)
  */
 int btf__find_str(struct btf *btf, const char *s)
 {
-	long old_off, new_off, len;
-	void *p;
+	int off;
 
 	if (btf->base_btf) {
-		int ret;
-
-		ret = btf__find_str(btf->base_btf, s);
-		if (ret != -ENOENT)
-			return ret;
+		off = btf__find_str(btf->base_btf, s);
+		if (off != -ENOENT)
+			return off;
 	}
 
 	/* BTF needs to be in a modifiable state to build string lookup index */
 	if (btf_ensure_modifiable(btf))
 		return -ENOMEM;
 
-	/* see btf__add_str() for why we do this */
-	len = strlen(s) + 1;
-	p = btf_add_str_mem(btf, len);
-	if (!p)
-		return -ENOMEM;
-
-	new_off = btf->hdr->str_len;
-	memcpy(p, s, len);
+	off = strset__find_str(btf->strs_set, s);
+	if (off < 0)
+		return off;
 
-	if (hashmap__find(btf->strs_hash, (void *)new_off, (void **)&old_off))
-		return btf->start_str_off + old_off;
-
-	return -ENOENT;
+	return btf->start_str_off + off;
 }
 
 /* Add a string s to the BTF string section.
@@ -1635,50 +1579,24 @@ int btf__find_str(struct btf *btf, const char *s)
  */
 int btf__add_str(struct btf *btf, const char *s)
 {
-	long old_off, new_off, len;
-	void *p;
-	int err;
+	int off;
 
 	if (btf->base_btf) {
-		int ret;
-
-		ret = btf__find_str(btf->base_btf, s);
-		if (ret != -ENOENT)
-			return ret;
+		off = btf__find_str(btf->base_btf, s);
+		if (off != -ENOENT)
+			return off;
 	}
 
 	if (btf_ensure_modifiable(btf))
 		return -ENOMEM;
 
-	/* Hashmap keys are always offsets within btf->strs_data, so to even
-	 * look up some string from the "outside", we need to first append it
-	 * at the end, so that it can be addressed with an offset. Luckily,
-	 * until btf->hdr->str_len is incremented, that string is just a piece
-	 * of garbage for the rest of BTF code, so no harm, no foul. On the
-	 * other hand, if the string is unique, it's already appended and
-	 * ready to be used, only a simple btf->hdr->str_len increment away.
-	 */
-	len = strlen(s) + 1;
-	p = btf_add_str_mem(btf, len);
-	if (!p)
-		return -ENOMEM;
+	off = strset__add_str(btf->strs_set, s);
+	if (off < 0)
+		return off;
 
-	new_off = btf->hdr->str_len;
-	memcpy(p, s, len);
+	btf->hdr->str_len = strset__data_size(btf->strs_set);
 
-	/* Now attempt to add the string, but only if the string with the same
-	 * contents doesn't exist already (HASHMAP_ADD strategy). If such
-	 * string exists, we'll get its offset in old_off (that's old_key).
-	 */
-	err = hashmap__insert(btf->strs_hash, (void *)new_off, (void *)new_off,
-			      HASHMAP_ADD, (const void **)&old_off, NULL);
-	if (err == -EEXIST)
-		return btf->start_str_off + old_off; /* duplicated string, return existing offset */
-	if (err)
-		return err;
-
-	btf->hdr->str_len += len; /* new unique string, adjust data length */
-	return btf->start_str_off + new_off;
+	return btf->start_str_off + off;
 }
 
 static void *btf_add_type_mem(struct btf *btf, size_t add_sz)
@@ -3016,10 +2934,7 @@ struct btf_dedup {
 	/* Various option modifying behavior of algorithm */
 	struct btf_dedup_opts opts;
 	/* temporary strings deduplication state */
-	void *strs_data;
-	size_t strs_cap;
-	size_t strs_len;
-	struct hashmap* strs_hash;
+	struct strset *strs_set;
 };
 
 static long hash_combine(long h, long value)
@@ -3185,10 +3100,8 @@ static int strs_dedup_remap_str_off(__u32 *str_off_ptr, void *ctx)
 {
 	struct btf_dedup *d = ctx;
 	__u32 str_off = *str_off_ptr;
-	long old_off, new_off, len;
 	const char *s;
-	void *p;
-	int err;
+	int off, err;
 
 	/* don't touch empty string or string in main BTF */
 	if (str_off == 0 || str_off < d->btf->start_str_off)
@@ -3205,29 +3118,11 @@ static int strs_dedup_remap_str_off(__u32 *str_off_ptr, void *ctx)
 			return err;
 	}
 
-	len = strlen(s) + 1;
+	off = strset__add_str(d->strs_set, s);
+	if (off < 0)
+		return off;
 
-	new_off = d->strs_len;
-	p = libbpf_add_mem(&d->strs_data, &d->strs_cap, 1, new_off, BTF_MAX_STR_OFFSET, len);
-	if (!p)
-		return -ENOMEM;
-
-	memcpy(p, s, len);
-
-	/* Now attempt to add the string, but only if the string with the same
-	 * contents doesn't exist already (HASHMAP_ADD strategy). If such
-	 * string exists, we'll get its offset in old_off (that's old_key).
-	 */
-	err = hashmap__insert(d->strs_hash, (void *)new_off, (void *)new_off,
-			      HASHMAP_ADD, (const void **)&old_off, NULL);
-	if (err == -EEXIST) {
-		*str_off_ptr = d->btf->start_str_off + old_off;
-	} else if (err) {
-		return err;
-	} else {
-		*str_off_ptr = d->btf->start_str_off + new_off;
-		d->strs_len += len;
-	}
+	*str_off_ptr = d->btf->start_str_off + off;
 	return 0;
 }
 
@@ -3244,39 +3139,23 @@ static int strs_dedup_remap_str_off(__u32 *str_off_ptr, void *ctx)
  */
 static int btf_dedup_strings(struct btf_dedup *d)
 {
-	char *s;
 	int err;
 
 	if (d->btf->strs_deduped)
 		return 0;
 
-	/* temporarily switch to use btf_dedup's strs_data for strings for hash
-	 * functions; later we'll just transfer hashmap to struct btf as is,
-	 * along the strs_data
-	 */
-	d->btf->strs_data_ptr = &d->strs_data;
-
-	d->strs_hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, d->btf);
-	if (IS_ERR(d->strs_hash)) {
-		err = PTR_ERR(d->strs_hash);
-		d->strs_hash = NULL;
+	d->strs_set = strset__new(BTF_MAX_STR_OFFSET, NULL, 0);
+	if (IS_ERR(d->strs_set)) {
+		err = PTR_ERR(d->strs_set);
 		goto err_out;
 	}
 
 	if (!d->btf->base_btf) {
-		s = libbpf_add_mem(&d->strs_data, &d->strs_cap, 1, d->strs_len, BTF_MAX_STR_OFFSET, 1);
-		if (!s)
-			return -ENOMEM;
-		/* initial empty string */
-		s[0] = 0;
-		d->strs_len = 1;
-
 		/* insert empty string; we won't be looking it up during strings
 		 * dedup, but it's good to have it for generic BTF string lookups
 		 */
-		err = hashmap__insert(d->strs_hash, (void *)0, (void *)0,
-				      HASHMAP_ADD, NULL, NULL);
-		if (err)
+		err = strset__add_str(d->strs_set, "");
+		if (err < 0)
 			goto err_out;
 	}
 
@@ -3286,28 +3165,16 @@ static int btf_dedup_strings(struct btf_dedup *d)
 		goto err_out;
 
 	/* replace BTF string data and hash with deduped ones */
-	free(d->btf->strs_data);
-	hashmap__free(d->btf->strs_hash);
-	d->btf->strs_data = d->strs_data;
-	d->btf->strs_data_cap = d->strs_cap;
-	d->btf->hdr->str_len = d->strs_len;
-	d->btf->strs_hash = d->strs_hash;
-	/* now point strs_data_ptr back to btf->strs_data */
-	d->btf->strs_data_ptr = &d->btf->strs_data;
-
-	d->strs_data = d->strs_hash = NULL;
-	d->strs_len = d->strs_cap = 0;
+	strset__free(d->btf->strs_set);
+	d->btf->hdr->str_len = strset__data_size(d->strs_set);
+	d->btf->strs_set = d->strs_set;
+	d->strs_set = NULL;
 	d->btf->strs_deduped = true;
 	return 0;
 
 err_out:
-	free(d->strs_data);
-	hashmap__free(d->strs_hash);
-	d->strs_data = d->strs_hash = NULL;
-	d->strs_len = d->strs_cap = 0;
-
-	/* restore strings pointer for existing d->btf->strs_hash back */
-	d->btf->strs_data_ptr = &d->strs_data;
+	strset__free(d->strs_set);
+	d->strs_set = NULL;
 
 	return err;
 }
diff --git a/tools/lib/bpf/strset.c b/tools/lib/bpf/strset.c
new file mode 100644
index 000000000000..1fb8b49de1d6
--- /dev/null
+++ b/tools/lib/bpf/strset.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2021 Facebook */
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <linux/err.h>
+#include "hashmap.h"
+#include "libbpf_internal.h"
+#include "strset.h"
+
+struct strset {
+	void *strs_data;
+	size_t strs_data_len;
+	size_t strs_data_cap;
+	size_t strs_data_max_len;
+
+	/* lookup index for each unique string in strings set */
+	struct hashmap *strs_hash;
+};
+
+static size_t strset_hash_fn(const void *key, void *ctx)
+{
+	const struct strset *s = ctx;
+	const char *str = s->strs_data + (long)key;
+
+	return str_hash(str);
+}
+
+static bool strset_equal_fn(const void *key1, const void *key2, void *ctx)
+{
+	const struct strset *s = ctx;
+	const char *str1 = s->strs_data + (long)key1;
+	const char *str2 = s->strs_data + (long)key2;
+
+	return strcmp(str1, str2) == 0;
+}
+
+struct strset *strset__new(size_t max_data_sz, const char *init_data, size_t init_data_sz)
+{
+	struct strset *set = calloc(1, sizeof(*set));
+	struct hashmap *hash;
+	int err = -ENOMEM;
+
+	if (!set)
+		return ERR_PTR(-ENOMEM);
+
+	hash = hashmap__new(strset_hash_fn, strset_equal_fn, set);
+	if (IS_ERR(hash))
+		goto err_out;
+
+	set->strs_data_max_len = max_data_sz;
+	set->strs_hash = hash;
+
+	if (init_data) {
+		long off;
+
+		set->strs_data = malloc(init_data_sz);
+		if (!set->strs_data)
+			goto err_out;
+
+		memcpy(set->strs_data, init_data, init_data_sz);
+		set->strs_data_len = init_data_sz;
+		set->strs_data_cap = init_data_sz;
+
+		for (off = 0; off < set->strs_data_len; off += strlen(set->strs_data + off) + 1) {
+			/* hashmap__add() returns EEXIST if string with the same
+			 * content already is in the hash map
+			 */
+			err = hashmap__add(hash, (void *)off, (void *)off);
+			if (err == -EEXIST)
+				continue; /* duplicate */
+			if (err)
+				goto err_out;
+		}
+	}
+
+	return set;
+err_out:
+	strset__free(set);
+	return ERR_PTR(err);
+}
+
+void strset__free(struct strset *set)
+{
+	if (IS_ERR_OR_NULL(set))
+		return;
+
+	hashmap__free(set->strs_hash);
+	free(set->strs_data);
+}
+
+size_t strset__data_size(const struct strset *set)
+{
+	return set->strs_data_len;
+}
+
+const char *strset__data(const struct strset *set)
+{
+	return set->strs_data;
+}
+
+static void *strset_add_str_mem(struct strset *set, size_t add_sz)
+{
+	return libbpf_add_mem(&set->strs_data, &set->strs_data_cap, 1,
+			      set->strs_data_len, set->strs_data_max_len, add_sz);
+}
+
+/* Find string offset that corresponds to a given string *s*.
+ * Returns:
+ *   - >0 offset into string data, if string is found;
+ *   - -ENOENT, if string is not in the string data;
+ *   - <0, on any other error.
+ */
+int strset__find_str(struct strset *set, const char *s)
+{
+	long old_off, new_off, len;
+	void *p;
+
+	/* see strset__add_str() for why we do this */
+	len = strlen(s) + 1;
+	p = strset_add_str_mem(set, len);
+	if (!p)
+		return -ENOMEM;
+
+	new_off = set->strs_data_len;
+	memcpy(p, s, len);
+
+	if (hashmap__find(set->strs_hash, (void *)new_off, (void **)&old_off))
+		return old_off;
+
+	return -ENOENT;
+}
+
+/* Add a string s to the string data. If the string already exists, return its
+ * offset within string data.
+ * Returns:
+ *   - > 0 offset into string data, on success;
+ *   - < 0, on error.
+ */
+int strset__add_str(struct strset *set, const char *s)
+{
+	long old_off, new_off, len;
+	void *p;
+	int err;
+
+	/* Hashmap keys are always offsets within set->strs_data, so to even
+	 * look up some string from the "outside", we need to first append it
+	 * at the end, so that it can be addressed with an offset. Luckily,
+	 * until set->strs_data_len is incremented, that string is just a piece
+	 * of garbage for the rest of the code, so no harm, no foul. On the
+	 * other hand, if the string is unique, it's already appended and
+	 * ready to be used, only a simple set->strs_data_len increment away.
+	 */
+	len = strlen(s) + 1;
+	p = strset_add_str_mem(set, len);
+	if (!p)
+		return -ENOMEM;
+
+	new_off = set->strs_data_len;
+	memcpy(p, s, len);
+
+	/* Now attempt to add the string, but only if the string with the same
+	 * contents doesn't exist already (HASHMAP_ADD strategy). If such
+	 * string exists, we'll get its offset in old_off (that's old_key).
+	 */
+	err = hashmap__insert(set->strs_hash, (void *)new_off, (void *)new_off,
+			      HASHMAP_ADD, (const void **)&old_off, NULL);
+	if (err == -EEXIST)
+		return old_off; /* duplicated string, return existing offset */
+	if (err)
+		return err;
+
+	set->strs_data_len += len; /* new unique string, adjust data length */
+	return new_off;
+}
diff --git a/tools/lib/bpf/strset.h b/tools/lib/bpf/strset.h
new file mode 100644
index 000000000000..b6ddf77a83c2
--- /dev/null
+++ b/tools/lib/bpf/strset.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/* Copyright (c) 2021 Facebook */
+#ifndef __LIBBPF_STRSET_H
+#define __LIBBPF_STRSET_H
+
+#include <stdbool.h>
+#include <stddef.h>
+
+struct strset;
+
+struct strset *strset__new(size_t max_data_sz, const char *init_data, size_t init_data_sz);
+void strset__free(struct strset *set);
+
+const char *strset__data(const struct strset *set);
+size_t strset__data_size(const struct strset *set);
+
+int strset__find_str(struct strset *set, const char *s);
+int strset__add_str(struct strset *set, const char *s);
+
+#endif /* __LIBBPF_STRSET_H */
-- 
cgit v1.2.3


From 9af44bc5d4d70b37c9ada24d8e0367b34b805bd3 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 18 Mar 2021 12:40:29 -0700
Subject: libbpf: Add generic BTF type shallow copy API

Add btf__add_type() API that performs shallow copy of a given BTF type from
the source BTF into the destination BTF. All the information and type IDs are
preserved, but all the strings encountered are added into the destination BTF
and corresponding offsets are rewritten. BTF type IDs are assumed to be
correct or such that will be (somehow) modified afterwards.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210318194036.3521577-6-andrii@kernel.org
---
 tools/lib/bpf/btf.c      | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 tools/lib/bpf/btf.h      |  2 ++
 tools/lib/bpf/libbpf.map |  1 +
 3 files changed, 51 insertions(+)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index fe087592ad35..d30e67e7e1e5 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1629,6 +1629,54 @@ static int btf_commit_type(struct btf *btf, int data_sz)
 	return btf->start_id + btf->nr_types - 1;
 }
 
+struct btf_pipe {
+	const struct btf *src;
+	struct btf *dst;
+};
+
+static int btf_rewrite_str(__u32 *str_off, void *ctx)
+{
+	struct btf_pipe *p = ctx;
+	int off;
+
+	if (!*str_off) /* nothing to do for empty strings */
+		return 0;
+
+	off = btf__add_str(p->dst, btf__str_by_offset(p->src, *str_off));
+	if (off < 0)
+		return off;
+
+	*str_off = off;
+	return 0;
+}
+
+int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type)
+{
+	struct btf_pipe p = { .src = src_btf, .dst = btf };
+	struct btf_type *t;
+	int sz, err;
+
+	sz = btf_type_size(src_type);
+	if (sz < 0)
+		return sz;
+
+	/* deconstruct BTF, if necessary, and invalidate raw_data */
+	if (btf_ensure_modifiable(btf))
+		return -ENOMEM;
+
+	t = btf_add_type_mem(btf, sz);
+	if (!t)
+		return -ENOMEM;
+
+	memcpy(t, src_type, sz);
+
+	err = btf_type_visit_str_offs(t, btf_rewrite_str, &p);
+	if (err)
+		return err;
+
+	return btf_commit_type(btf, sz);
+}
+
 /*
  * Append new BTF_KIND_INT type with:
  *   - *name* - non-empty, non-NULL type name;
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 029a9cfc8c2d..3b0b17ba94a1 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -93,6 +93,8 @@ LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
 
 LIBBPF_API int btf__find_str(struct btf *btf, const char *s);
 LIBBPF_API int btf__add_str(struct btf *btf, const char *s);
+LIBBPF_API int btf__add_type(struct btf *btf, const struct btf *src_btf,
+			     const struct btf_type *src_type);
 
 LIBBPF_API int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding);
 LIBBPF_API int btf__add_float(struct btf *btf, const char *name, size_t byte_sz);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index ec898f464ab9..d31d8c968097 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -354,4 +354,5 @@ LIBBPF_0.3.0 {
 LIBBPF_0.4.0 {
 	global:
 		btf__add_float;
+		btf__add_type;
 } LIBBPF_0.3.0;
-- 
cgit v1.2.3


From faf6ed321cf61fafa17444fe01e7e336b8e89acc Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 18 Mar 2021 12:40:30 -0700
Subject: libbpf: Add BPF static linker APIs

Introduce BPF static linker APIs to libbpf. BPF static linker allows to
perform static linking of multiple BPF object files into a single combined
resulting object file, preserving all the BPF programs, maps, global
variables, etc.

Data sections (.bss, .data, .rodata, .maps, maps, etc) with the same name are
concatenated together. Similarly, code sections are also concatenated. All the
symbols and ELF relocations are also concatenated in their respective ELF
sections and are adjusted accordingly to the new object file layout.

Static variables and functions are handled correctly as well, adjusting BPF
instructions offsets to reflect new variable/function offset within the
combined ELF section. Such relocations are referencing STT_SECTION symbols and
that stays intact.

Data sections in different files can have different alignment requirements, so
that is taken care of as well, adjusting sizes and offsets as necessary to
satisfy both old and new alignment requirements.

DWARF data sections are stripped out, currently. As well as LLLVM_ADDRSIG
section, which is ignored by libbpf in bpf_object__open() anyways. So, in
a way, BPF static linker is an analogue to `llvm-strip -g`, which is a pretty
nice property, especially if resulting .o file is then used to generate BPF
skeleton.

Original string sections are ignored and instead we construct our own set of
unique strings using libbpf-internal `struct strset` API.

To reduce the size of the patch, all the .BTF and .BTF.ext processing was
moved into a separate patch.

The high-level API consists of just 4 functions:
  - bpf_linker__new() creates an instance of BPF static linker. It accepts
    output filename and (currently empty) options struct;
  - bpf_linker__add_file() takes input filename and appends it to the already
    processed ELF data; it can be called multiple times, one for each BPF
    ELF object file that needs to be linked in;
  - bpf_linker__finalize() needs to be called to dump final ELF contents into
    the output file, specified when bpf_linker was created; after
    bpf_linker__finalize() is called, no more bpf_linker__add_file() and
    bpf_linker__finalize() calls are allowed, they will return error;
  - regardless of whether bpf_linker__finalize() was called or not,
    bpf_linker__free() will free up all the used resources.

Currently, BPF static linker doesn't resolve cross-object file references
(extern variables and/or functions). This will be added in the follow up patch
set.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210318194036.3521577-7-andrii@kernel.org
---
 tools/lib/bpf/Build             |    2 +-
 tools/lib/bpf/libbpf.c          |   11 +-
 tools/lib/bpf/libbpf.h          |   13 +
 tools/lib/bpf/libbpf.map        |    4 +
 tools/lib/bpf/libbpf_internal.h |   20 +
 tools/lib/bpf/linker.c          | 1176 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 1215 insertions(+), 11 deletions(-)
 create mode 100644 tools/lib/bpf/linker.c

diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 8136186a453f..9b057cc7650a 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,3 +1,3 @@
 libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
 	    netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
-	    btf_dump.o ringbuf.o strset.o
+	    btf_dump.o ringbuf.o strset.o linker.o
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 18ba37164e17..058b643cbcb1 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -55,10 +55,6 @@
 #include "libbpf_internal.h"
 #include "hashmap.h"
 
-#ifndef EM_BPF
-#define EM_BPF 247
-#endif
-
 #ifndef BPF_FS_MAGIC
 #define BPF_FS_MAGIC		0xcafe4a11
 #endif
@@ -1134,11 +1130,6 @@ static void bpf_object__elf_finish(struct bpf_object *obj)
 	obj->efile.obj_buf_sz = 0;
 }
 
-/* if libelf is old and doesn't support mmap(), fall back to read() */
-#ifndef ELF_C_READ_MMAP
-#define ELF_C_READ_MMAP ELF_C_READ
-#endif
-
 static int bpf_object__elf_init(struct bpf_object *obj)
 {
 	int err = 0;
@@ -2807,7 +2798,7 @@ static bool ignore_elf_section(GElf_Shdr *hdr, const char *name)
 		return true;
 
 	/* ignore .llvm_addrsig section as well */
-	if (hdr->sh_type == 0x6FFF4C03 /* SHT_LLVM_ADDRSIG */)
+	if (hdr->sh_type == SHT_LLVM_ADDRSIG)
 		return true;
 
 	/* no subprograms will lead to an empty .text section, ignore it */
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 3d690d4e785c..a1a424b9b8ff 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -760,6 +760,19 @@ enum libbpf_tristate {
 	TRI_MODULE = 2,
 };
 
+struct bpf_linker_opts {
+	/* size of this struct, for forward/backward compatiblity */
+	size_t sz;
+};
+#define bpf_linker_opts__last_field sz
+
+struct bpf_linker;
+
+LIBBPF_API struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts *opts);
+LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filename);
+LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker);
+LIBBPF_API void bpf_linker__free(struct bpf_linker *linker);
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index d31d8c968097..279ae861f568 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -355,4 +355,8 @@ LIBBPF_0.4.0 {
 	global:
 		btf__add_float;
 		btf__add_type;
+		bpf_linker__add_file;
+		bpf_linker__finalize;
+		bpf_linker__free;
+		bpf_linker__new;
 } LIBBPF_0.3.0;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index e0787900eeca..6017902c687e 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -20,6 +20,26 @@
 
 #include "libbpf.h"
 
+#ifndef EM_BPF
+#define EM_BPF 247
+#endif
+
+#ifndef R_BPF_64_64
+#define R_BPF_64_64 1
+#endif
+#ifndef R_BPF_64_32
+#define R_BPF_64_32 10
+#endif
+
+#ifndef SHT_LLVM_ADDRSIG
+#define SHT_LLVM_ADDRSIG 0x6FFF4C03
+#endif
+
+/* if libelf is old and doesn't support mmap(), fall back to read() */
+#ifndef ELF_C_READ_MMAP
+#define ELF_C_READ_MMAP ELF_C_READ
+#endif
+
 #define BTF_INFO_ENC(kind, kind_flag, vlen) \
 	((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
 #define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type)
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
new file mode 100644
index 000000000000..01b56939a835
--- /dev/null
+++ b/tools/lib/bpf/linker.c
@@ -0,0 +1,1176 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/*
+ * BPF static linker
+ *
+ * Copyright (c) 2021 Facebook
+ */
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/err.h>
+#include <linux/btf.h>
+#include <elf.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <fcntl.h>
+#include "libbpf.h"
+#include "btf.h"
+#include "libbpf_internal.h"
+#include "strset.h"
+
+struct src_sec {
+	const char *sec_name;
+	/* positional (not necessarily ELF) index in an array of sections */
+	int id;
+	/* positional (not necessarily ELF) index of a matching section in a final object file */
+	int dst_id;
+	/* section data offset in a matching output section */
+	int dst_off;
+	/* whether section is omitted from the final ELF file */
+	bool skipped;
+
+	/* ELF info */
+	size_t sec_idx;
+	Elf_Scn *scn;
+	Elf64_Shdr *shdr;
+	Elf_Data *data;
+};
+
+struct src_obj {
+	const char *filename;
+	int fd;
+	Elf *elf;
+	/* Section header strings section index */
+	size_t shstrs_sec_idx;
+	/* SYMTAB section index */
+	size_t symtab_sec_idx;
+
+	/* List of sections. Slot zero is unused. */
+	struct src_sec *secs;
+	int sec_cnt;
+
+	/* mapping of symbol indices from src to dst ELF */
+	int *sym_map;
+};
+
+struct dst_sec {
+	char *sec_name;
+	/* positional (not necessarily ELF) index in an array of sections */
+	int id;
+
+	/* ELF info */
+	size_t sec_idx;
+	Elf_Scn *scn;
+	Elf64_Shdr *shdr;
+	Elf_Data *data;
+
+	/* final output section size */
+	int sec_sz;
+	/* final output contents of the section */
+	void *raw_data;
+
+	/* corresponding STT_SECTION symbol index in SYMTAB */
+	int sec_sym_idx;
+};
+
+struct bpf_linker {
+	char *filename;
+	int fd;
+	Elf *elf;
+	Elf64_Ehdr *elf_hdr;
+
+	/* Output sections metadata */
+	struct dst_sec *secs;
+	int sec_cnt;
+
+	struct strset *strtab_strs; /* STRTAB unique strings */
+	size_t strtab_sec_idx; /* STRTAB section index */
+	size_t symtab_sec_idx; /* SYMTAB section index */
+};
+
+#define pr_warn_elf(fmt, ...)									\
+do {												\
+	libbpf_print(LIBBPF_WARN, "libbpf: " fmt ": %s\n", ##__VA_ARGS__, elf_errmsg(-1));	\
+} while (0)
+
+static int init_output_elf(struct bpf_linker *linker, const char *file);
+
+static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, struct src_obj *obj);
+static int linker_sanity_check_elf(struct src_obj *obj);
+static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj);
+static int linker_append_elf_syms(struct bpf_linker *linker, struct src_obj *obj);
+static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *obj);
+
+void bpf_linker__free(struct bpf_linker *linker)
+{
+	int i;
+
+	if (!linker)
+		return;
+
+	free(linker->filename);
+
+	if (linker->elf)
+		elf_end(linker->elf);
+
+	if (linker->fd >= 0)
+		close(linker->fd);
+
+	strset__free(linker->strtab_strs);
+
+	for (i = 1; i < linker->sec_cnt; i++) {
+		struct dst_sec *sec = &linker->secs[i];
+
+		free(sec->sec_name);
+		free(sec->raw_data);
+	}
+	free(linker->secs);
+
+	free(linker);
+}
+
+struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts *opts)
+{
+	struct bpf_linker *linker;
+	int err;
+
+	if (!OPTS_VALID(opts, bpf_linker_opts))
+		return NULL;
+
+	if (elf_version(EV_CURRENT) == EV_NONE) {
+		pr_warn_elf("libelf initialization failed");
+		return NULL;
+	}
+
+	linker = calloc(1, sizeof(*linker));
+	if (!linker)
+		return NULL;
+
+	linker->fd = -1;
+
+	err = init_output_elf(linker, filename);
+	if (err)
+		goto err_out;
+
+	return linker;
+
+err_out:
+	bpf_linker__free(linker);
+	return NULL;
+}
+
+static struct dst_sec *add_dst_sec(struct bpf_linker *linker, const char *sec_name)
+{
+	struct dst_sec *secs = linker->secs, *sec;
+	size_t new_cnt = linker->sec_cnt ? linker->sec_cnt + 1 : 2;
+
+	secs = libbpf_reallocarray(secs, new_cnt, sizeof(*secs));
+	if (!secs)
+		return NULL;
+
+	/* zero out newly allocated memory */
+	memset(secs + linker->sec_cnt, 0, (new_cnt - linker->sec_cnt) * sizeof(*secs));
+
+	linker->secs = secs;
+	linker->sec_cnt = new_cnt;
+
+	sec = &linker->secs[new_cnt - 1];
+	sec->id = new_cnt - 1;
+	sec->sec_name = strdup(sec_name);
+	if (!sec->sec_name)
+		return NULL;
+
+	return sec;
+}
+
+static Elf64_Sym *add_new_sym(struct bpf_linker *linker, size_t *sym_idx)
+{
+	struct dst_sec *symtab = &linker->secs[linker->symtab_sec_idx];
+	Elf64_Sym *syms, *sym;
+	size_t sym_cnt = symtab->sec_sz / sizeof(*sym);
+
+	syms = libbpf_reallocarray(symtab->raw_data, sym_cnt + 1, sizeof(*sym));
+	if (!syms)
+		return NULL;
+
+	sym = &syms[sym_cnt];
+	memset(sym, 0, sizeof(*sym));
+
+	symtab->raw_data = syms;
+	symtab->sec_sz += sizeof(*sym);
+	symtab->shdr->sh_size += sizeof(*sym);
+	symtab->data->d_size += sizeof(*sym);
+
+	if (sym_idx)
+		*sym_idx = sym_cnt;
+
+	return sym;
+}
+
+static int init_output_elf(struct bpf_linker *linker, const char *file)
+{
+	int err, str_off;
+	Elf64_Sym *init_sym;
+	struct dst_sec *sec;
+
+	linker->filename = strdup(file);
+	if (!linker->filename)
+		return -ENOMEM;
+
+	linker->fd = open(file, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+	if (linker->fd < 0) {
+		err = -errno;
+		pr_warn("failed to create '%s': %d\n", file, err);
+		return err;
+	}
+
+	linker->elf = elf_begin(linker->fd, ELF_C_WRITE, NULL);
+	if (!linker->elf) {
+		pr_warn_elf("failed to create ELF object");
+		return -EINVAL;
+	}
+
+	/* ELF header */
+	linker->elf_hdr = elf64_newehdr(linker->elf);
+	if (!linker->elf_hdr){
+		pr_warn_elf("failed to create ELF header");
+		return -EINVAL;
+	}
+
+	linker->elf_hdr->e_machine = EM_BPF;
+	linker->elf_hdr->e_type = ET_REL;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+	linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2LSB;
+#elif __BYTE_ORDER == __BIG_ENDIAN
+	linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2MSB;
+#else
+#error "Unknown __BYTE_ORDER"
+#endif
+
+	/* STRTAB */
+	/* initialize strset with an empty string to conform to ELF */
+	linker->strtab_strs = strset__new(INT_MAX, "", sizeof(""));
+	if (libbpf_get_error(linker->strtab_strs))
+		return libbpf_get_error(linker->strtab_strs);
+
+	sec = add_dst_sec(linker, ".strtab");
+	if (!sec)
+		return -ENOMEM;
+
+	sec->scn = elf_newscn(linker->elf);
+	if (!sec->scn) {
+		pr_warn_elf("failed to create STRTAB section");
+		return -EINVAL;
+	}
+
+	sec->shdr = elf64_getshdr(sec->scn);
+	if (!sec->shdr)
+		return -EINVAL;
+
+	sec->data = elf_newdata(sec->scn);
+	if (!sec->data) {
+		pr_warn_elf("failed to create STRTAB data");
+		return -EINVAL;
+	}
+
+	str_off = strset__add_str(linker->strtab_strs, sec->sec_name);
+	if (str_off < 0)
+		return str_off;
+
+	sec->sec_idx = elf_ndxscn(sec->scn);
+	linker->elf_hdr->e_shstrndx = sec->sec_idx;
+	linker->strtab_sec_idx = sec->sec_idx;
+
+	sec->shdr->sh_name = str_off;
+	sec->shdr->sh_type = SHT_STRTAB;
+	sec->shdr->sh_flags = SHF_STRINGS;
+	sec->shdr->sh_offset = 0;
+	sec->shdr->sh_link = 0;
+	sec->shdr->sh_info = 0;
+	sec->shdr->sh_addralign = 1;
+	sec->shdr->sh_size = sec->sec_sz = 0;
+	sec->shdr->sh_entsize = 0;
+
+	/* SYMTAB */
+	sec = add_dst_sec(linker, ".symtab");
+	if (!sec)
+		return -ENOMEM;
+
+	sec->scn = elf_newscn(linker->elf);
+	if (!sec->scn) {
+		pr_warn_elf("failed to create SYMTAB section");
+		return -EINVAL;
+	}
+
+	sec->shdr = elf64_getshdr(sec->scn);
+	if (!sec->shdr)
+		return -EINVAL;
+
+	sec->data = elf_newdata(sec->scn);
+	if (!sec->data) {
+		pr_warn_elf("failed to create SYMTAB data");
+		return -EINVAL;
+	}
+
+	str_off = strset__add_str(linker->strtab_strs, sec->sec_name);
+	if (str_off < 0)
+		return str_off;
+
+	sec->sec_idx = elf_ndxscn(sec->scn);
+	linker->symtab_sec_idx = sec->sec_idx;
+
+	sec->shdr->sh_name = str_off;
+	sec->shdr->sh_type = SHT_SYMTAB;
+	sec->shdr->sh_flags = 0;
+	sec->shdr->sh_offset = 0;
+	sec->shdr->sh_link = linker->strtab_sec_idx;
+	/* sh_info should be one greater than the index of the last local
+	 * symbol (i.e., binding is STB_LOCAL). But why and who cares?
+	 */
+	sec->shdr->sh_info = 0;
+	sec->shdr->sh_addralign = 8;
+	sec->shdr->sh_entsize = sizeof(Elf64_Sym);
+
+	/* add the special all-zero symbol */
+	init_sym = add_new_sym(linker, NULL);
+	if (!init_sym)
+		return -EINVAL;
+
+	init_sym->st_name = 0;
+	init_sym->st_info = 0;
+	init_sym->st_other = 0;
+	init_sym->st_shndx = SHN_UNDEF;
+	init_sym->st_value = 0;
+	init_sym->st_size = 0;
+
+	return 0;
+}
+
+int bpf_linker__add_file(struct bpf_linker *linker, const char *filename)
+{
+	struct src_obj obj = {};
+	int err = 0;
+
+	if (!linker->elf)
+		return -EINVAL;
+
+	err = err ?: linker_load_obj_file(linker, filename, &obj);
+	err = err ?: linker_append_sec_data(linker, &obj);
+	err = err ?: linker_append_elf_syms(linker, &obj);
+	err = err ?: linker_append_elf_relos(linker, &obj);
+
+	/* free up src_obj resources */
+	free(obj.secs);
+	free(obj.sym_map);
+	if (obj.elf)
+		elf_end(obj.elf);
+	if (obj.fd >= 0)
+		close(obj.fd);
+
+	return err;
+}
+
+static bool is_dwarf_sec_name(const char *name)
+{
+	/* approximation, but the actual list is too long */
+	return strncmp(name, ".debug_", sizeof(".debug_") - 1) == 0;
+}
+
+static bool is_ignored_sec(struct src_sec *sec)
+{
+	Elf64_Shdr *shdr = sec->shdr;
+	const char *name = sec->sec_name;
+
+	/* no special handling of .strtab */
+	if (shdr->sh_type == SHT_STRTAB)
+		return true;
+
+	/* ignore .llvm_addrsig section as well */
+	if (shdr->sh_type == SHT_LLVM_ADDRSIG)
+		return true;
+
+	/* no subprograms will lead to an empty .text section, ignore it */
+	if (shdr->sh_type == SHT_PROGBITS && shdr->sh_size == 0 &&
+	    strcmp(sec->sec_name, ".text") == 0)
+		return true;
+
+	/* DWARF sections */
+	if (is_dwarf_sec_name(sec->sec_name))
+		return true;
+
+	if (strncmp(name, ".rel", sizeof(".rel") - 1) == 0) {
+		name += sizeof(".rel") - 1;
+		/* DWARF section relocations */
+		if (is_dwarf_sec_name(name))
+			return true;
+
+		/* .BTF and .BTF.ext don't need relocations */
+		if (strcmp(name, BTF_ELF_SEC) == 0 ||
+		    strcmp(name, BTF_EXT_ELF_SEC) == 0)
+			return true;
+	}
+
+	return false;
+}
+
+static struct src_sec *add_src_sec(struct src_obj *obj, const char *sec_name)
+{
+	struct src_sec *secs = obj->secs, *sec;
+	size_t new_cnt = obj->sec_cnt ? obj->sec_cnt + 1 : 2;
+
+	secs = libbpf_reallocarray(secs, new_cnt, sizeof(*secs));
+	if (!secs)
+		return NULL;
+
+	/* zero out newly allocated memory */
+	memset(secs + obj->sec_cnt, 0, (new_cnt - obj->sec_cnt) * sizeof(*secs));
+
+	obj->secs = secs;
+	obj->sec_cnt = new_cnt;
+
+	sec = &obj->secs[new_cnt - 1];
+	sec->id = new_cnt - 1;
+	sec->sec_name = sec_name;
+
+	return sec;
+}
+
+static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, struct src_obj *obj)
+{
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+	const int host_endianness = ELFDATA2LSB;
+#elif __BYTE_ORDER == __BIG_ENDIAN
+	const int host_endianness = ELFDATA2MSB;
+#else
+#error "Unknown __BYTE_ORDER"
+#endif
+	int err = 0;
+	Elf_Scn *scn;
+	Elf_Data *data;
+	Elf64_Ehdr *ehdr;
+	Elf64_Shdr *shdr;
+	struct src_sec *sec;
+
+	pr_debug("linker: adding object file '%s'...\n", filename);
+
+	obj->filename = filename;
+
+	obj->fd = open(filename, O_RDONLY);
+	if (obj->fd < 0) {
+		err = -errno;
+		pr_warn("failed to open file '%s': %d\n", filename, err);
+		return err;
+	}
+	obj->elf = elf_begin(obj->fd, ELF_C_READ_MMAP, NULL);
+	if (!obj->elf) {
+		err = -errno;
+		pr_warn_elf("failed to parse ELF file '%s'", filename);
+		return err;
+	}
+
+	/* Sanity check ELF file high-level properties */
+	ehdr = elf64_getehdr(obj->elf);
+	if (!ehdr) {
+		err = -errno;
+		pr_warn_elf("failed to get ELF header for %s", filename);
+		return err;
+	}
+	if (ehdr->e_ident[EI_DATA] != host_endianness) {
+		err = -EOPNOTSUPP;
+		pr_warn_elf("unsupported byte order of ELF file %s", filename);
+		return err;
+	}
+	if (ehdr->e_type != ET_REL
+	    || ehdr->e_machine != EM_BPF
+	    || ehdr->e_ident[EI_CLASS] != ELFCLASS64) {
+		err = -EOPNOTSUPP;
+		pr_warn_elf("unsupported kind of ELF file %s", filename);
+		return err;
+	}
+
+	if (elf_getshdrstrndx(obj->elf, &obj->shstrs_sec_idx)) {
+		err = -errno;
+		pr_warn_elf("failed to get SHSTRTAB section index for %s", filename);
+		return err;
+	}
+
+	scn = NULL;
+	while ((scn = elf_nextscn(obj->elf, scn)) != NULL) {
+		size_t sec_idx = elf_ndxscn(scn);
+		const char *sec_name;
+
+		shdr = elf64_getshdr(scn);
+		if (!shdr) {
+			err = -errno;
+			pr_warn_elf("failed to get section #%zu header for %s",
+				    sec_idx, filename);
+			return err;
+		}
+
+		sec_name = elf_strptr(obj->elf, obj->shstrs_sec_idx, shdr->sh_name);
+		if (!sec_name) {
+			err = -errno;
+			pr_warn_elf("failed to get section #%zu name for %s",
+				    sec_idx, filename);
+			return err;
+		}
+
+		data = elf_getdata(scn, 0);
+		if (!data) {
+			err = -errno;
+			pr_warn_elf("failed to get section #%zu (%s) data from %s",
+				    sec_idx, sec_name, filename);
+			return err;
+		}
+
+		sec = add_src_sec(obj, sec_name);
+		if (!sec)
+			return -ENOMEM;
+
+		sec->scn = scn;
+		sec->shdr = shdr;
+		sec->data = data;
+		sec->sec_idx = elf_ndxscn(scn);
+
+		if (is_ignored_sec(sec)) {
+			sec->skipped = true;
+			continue;
+		}
+
+		switch (shdr->sh_type) {
+		case SHT_SYMTAB:
+			if (obj->symtab_sec_idx) {
+				err = -EOPNOTSUPP;
+				pr_warn("multiple SYMTAB sections found, not supported\n");
+				return err;
+			}
+			obj->symtab_sec_idx = sec_idx;
+			break;
+		case SHT_STRTAB:
+			/* we'll construct our own string table */
+			break;
+		case SHT_PROGBITS:
+			if (strcmp(sec_name, BTF_ELF_SEC) == 0) {
+				sec->skipped = true;
+				continue;
+			}
+			if (strcmp(sec_name, BTF_EXT_ELF_SEC) == 0) {
+				sec->skipped = true;
+				continue;
+			}
+
+			/* data & code */
+			break;
+		case SHT_NOBITS:
+			/* BSS */
+			break;
+		case SHT_REL:
+			/* relocations */
+			break;
+		default:
+			pr_warn("unrecognized section #%zu (%s) in %s\n",
+				sec_idx, sec_name, filename);
+			err = -EINVAL;
+			return err;
+		}
+	}
+
+	err = err ?: linker_sanity_check_elf(obj);
+
+	return err;
+}
+
+static bool is_pow_of_2(size_t x)
+{
+	return x && (x & (x - 1)) == 0;
+}
+
+static int linker_sanity_check_elf(struct src_obj *obj)
+{
+	struct src_sec *sec, *link_sec;
+	int i, j, n;
+
+	if (!obj->symtab_sec_idx) {
+		pr_warn("ELF is missing SYMTAB section in %s\n", obj->filename);
+		return -EINVAL;
+	}
+	if (!obj->shstrs_sec_idx) {
+		pr_warn("ELF is missing section headers STRTAB section in %s\n", obj->filename);
+		return -EINVAL;
+	}
+
+	for (i = 1; i < obj->sec_cnt; i++) {
+		sec = &obj->secs[i];
+
+		if (sec->sec_name[0] == '\0') {
+			pr_warn("ELF section #%zu has empty name in %s\n", sec->sec_idx, obj->filename);
+			return -EINVAL;
+		}
+
+		if (sec->shdr->sh_addralign && !is_pow_of_2(sec->shdr->sh_addralign))
+			return -EINVAL;
+		if (sec->shdr->sh_addralign != sec->data->d_align)
+			return -EINVAL;
+
+		if (sec->shdr->sh_size != sec->data->d_size)
+			return -EINVAL;
+
+		switch (sec->shdr->sh_type) {
+		case SHT_SYMTAB: {
+			Elf64_Sym *sym;
+
+			if (sec->shdr->sh_entsize != sizeof(Elf64_Sym))
+				return -EINVAL;
+			if (sec->shdr->sh_size % sec->shdr->sh_entsize != 0)
+				return -EINVAL;
+
+			if (!sec->shdr->sh_link || sec->shdr->sh_link >= obj->sec_cnt) {
+				pr_warn("ELF SYMTAB section #%zu points to missing STRTAB section #%zu in %s\n",
+					sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
+				return -EINVAL;
+			}
+			link_sec = &obj->secs[sec->shdr->sh_link];
+			if (link_sec->shdr->sh_type != SHT_STRTAB) {
+				pr_warn("ELF SYMTAB section #%zu points to invalid STRTAB section #%zu in %s\n",
+					sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
+				return -EINVAL;
+			}
+
+			n = sec->shdr->sh_size / sec->shdr->sh_entsize;
+			sym = sec->data->d_buf;
+			for (j = 0; j < n; j++, sym++) {
+				if (sym->st_shndx
+				    && sym->st_shndx < SHN_LORESERVE
+				    && sym->st_shndx >= obj->sec_cnt) {
+					pr_warn("ELF sym #%d in section #%zu points to missing section #%zu in %s\n",
+						j, sec->sec_idx, (size_t)sym->st_shndx, obj->filename);
+					return -EINVAL;
+				}
+				if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION) {
+					if (sym->st_value != 0)
+						return -EINVAL;
+				}
+			}
+			break;
+		}
+		case SHT_STRTAB:
+			break;
+		case SHT_PROGBITS:
+			if (sec->shdr->sh_flags & SHF_EXECINSTR) {
+				if (sec->shdr->sh_size % sizeof(struct bpf_insn) != 0)
+					return -EINVAL;
+			}
+			break;
+		case SHT_NOBITS:
+			break;
+		case SHT_REL: {
+			Elf64_Rel *relo;
+			struct src_sec *sym_sec;
+
+			if (sec->shdr->sh_entsize != sizeof(Elf64_Rel))
+				return -EINVAL;
+			if (sec->shdr->sh_size % sec->shdr->sh_entsize != 0)
+				return -EINVAL;
+
+			/* SHT_REL's sh_link should point to SYMTAB */
+			if (sec->shdr->sh_link != obj->symtab_sec_idx) {
+				pr_warn("ELF relo section #%zu points to invalid SYMTAB section #%zu in %s\n",
+					sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
+				return -EINVAL;
+			}
+
+			/* SHT_REL's sh_info points to relocated section */
+			if (!sec->shdr->sh_info || sec->shdr->sh_info >= obj->sec_cnt) {
+				pr_warn("ELF relo section #%zu points to missing section #%zu in %s\n",
+					sec->sec_idx, (size_t)sec->shdr->sh_info, obj->filename);
+				return -EINVAL;
+			}
+			link_sec = &obj->secs[sec->shdr->sh_info];
+
+			/* .rel<secname> -> <secname> pattern is followed */
+			if (strncmp(sec->sec_name, ".rel", sizeof(".rel") - 1) != 0
+			    || strcmp(sec->sec_name + sizeof(".rel") - 1, link_sec->sec_name) != 0) {
+				pr_warn("ELF relo section #%zu name has invalid name in %s\n",
+					sec->sec_idx, obj->filename);
+				return -EINVAL;
+			}
+
+			/* don't further validate relocations for ignored sections */
+			if (link_sec->skipped)
+				break;
+
+			/* relocatable section is data or instructions */
+			if (link_sec->shdr->sh_type != SHT_PROGBITS
+			    && link_sec->shdr->sh_type != SHT_NOBITS) {
+				pr_warn("ELF relo section #%zu points to invalid section #%zu in %s\n",
+					sec->sec_idx, (size_t)sec->shdr->sh_info, obj->filename);
+				return -EINVAL;
+			}
+
+			/* check sanity of each relocation */
+			n = sec->shdr->sh_size / sec->shdr->sh_entsize;
+			relo = sec->data->d_buf;
+			sym_sec = &obj->secs[obj->symtab_sec_idx];
+			for (j = 0; j < n; j++, relo++) {
+				size_t sym_idx = ELF64_R_SYM(relo->r_info);
+				size_t sym_type = ELF64_R_TYPE(relo->r_info);
+
+				if (sym_type != R_BPF_64_64 && sym_type != R_BPF_64_32) {
+					pr_warn("ELF relo #%d in section #%zu has unexpected type %zu in %s\n",
+						j, sec->sec_idx, sym_type, obj->filename);
+					return -EINVAL;
+				}
+
+				if (!sym_idx || sym_idx * sizeof(Elf64_Sym) >= sym_sec->shdr->sh_size) {
+					pr_warn("ELF relo #%d in section #%zu points to invalid symbol #%zu in %s\n",
+						j, sec->sec_idx, sym_idx, obj->filename);
+					return -EINVAL;
+				}
+
+				if (link_sec->shdr->sh_flags & SHF_EXECINSTR) {
+					if (relo->r_offset % sizeof(struct bpf_insn) != 0) {
+						pr_warn("ELF relo #%d in section #%zu points to missing symbol #%zu in %s\n",
+							j, sec->sec_idx, sym_idx, obj->filename);
+						return -EINVAL;
+					}
+				}
+			}
+			break;
+		}
+		case SHT_LLVM_ADDRSIG:
+			break;
+		default:
+			pr_warn("ELF section #%zu (%s) has unrecognized type %zu in %s\n",
+				sec->sec_idx, sec->sec_name, (size_t)sec->shdr->sh_type, obj->filename);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int init_sec(struct bpf_linker *linker, struct dst_sec *dst_sec, struct src_sec *src_sec)
+{
+	Elf_Scn *scn;
+	Elf_Data *data;
+	Elf64_Shdr *shdr;
+	int name_off;
+
+	dst_sec->sec_sz = 0;
+	dst_sec->sec_idx = 0;
+
+	scn = elf_newscn(linker->elf);
+	if (!scn)
+		return -1;
+	data = elf_newdata(scn);
+	if (!data)
+		return -1;
+	shdr = elf64_getshdr(scn);
+	if (!shdr)
+		return -1;
+
+	dst_sec->scn = scn;
+	dst_sec->shdr = shdr;
+	dst_sec->data = data;
+	dst_sec->sec_idx = elf_ndxscn(scn);
+
+	name_off = strset__add_str(linker->strtab_strs, src_sec->sec_name);
+	if (name_off < 0)
+		return name_off;
+
+	shdr->sh_name = name_off;
+	shdr->sh_type = src_sec->shdr->sh_type;
+	shdr->sh_flags = src_sec->shdr->sh_flags;
+	shdr->sh_size = 0;
+	/* sh_link and sh_info have different meaning for different types of
+	 * sections, so we leave it up to the caller code to fill them in, if
+	 * necessary
+	 */
+	shdr->sh_link = 0;
+	shdr->sh_info = 0;
+	shdr->sh_addralign = src_sec->shdr->sh_addralign;
+	shdr->sh_entsize = src_sec->shdr->sh_entsize;
+
+	data->d_type = src_sec->data->d_type;
+	data->d_size = 0;
+	data->d_buf = NULL;
+	data->d_align = src_sec->data->d_align;
+	data->d_off = 0;
+
+	return 0;
+}
+
+static struct dst_sec *find_dst_sec_by_name(struct bpf_linker *linker, const char *sec_name)
+{
+	struct dst_sec *sec;
+	int i;
+
+	for (i = 1; i < linker->sec_cnt; i++) {
+		sec = &linker->secs[i];
+
+		if (strcmp(sec->sec_name, sec_name) == 0)
+			return sec;
+	}
+
+	return NULL;
+}
+
+static bool secs_match(struct dst_sec *dst, struct src_sec *src)
+{
+	if (dst->shdr->sh_type != src->shdr->sh_type) {
+		pr_warn("sec %s types mismatch\n", dst->sec_name);
+		return false;
+	}
+	if (dst->shdr->sh_flags != src->shdr->sh_flags) {
+		pr_warn("sec %s flags mismatch\n", dst->sec_name);
+		return false;
+	}
+	if (dst->shdr->sh_entsize != src->shdr->sh_entsize) {
+		pr_warn("sec %s entsize mismatch\n", dst->sec_name);
+		return false;
+	}
+
+	return true;
+}
+
+static bool sec_content_is_same(struct dst_sec *dst_sec, struct src_sec *src_sec)
+{
+	if (dst_sec->sec_sz != src_sec->shdr->sh_size)
+		return false;
+	if (memcmp(dst_sec->raw_data, src_sec->data->d_buf, dst_sec->sec_sz) != 0)
+		return false;
+	return true;
+}
+
+static int extend_sec(struct dst_sec *dst, struct src_sec *src)
+{
+	void *tmp;
+	size_t dst_align = dst->shdr->sh_addralign;
+	size_t src_align = src->shdr->sh_addralign;
+	size_t dst_align_sz, dst_final_sz;
+
+	if (dst_align == 0)
+		dst_align = 1;
+	if (dst_align < src_align)
+		dst_align = src_align;
+
+	dst_align_sz = (dst->sec_sz + dst_align - 1) / dst_align * dst_align;
+
+	/* no need to re-align final size */
+	dst_final_sz = dst_align_sz + src->shdr->sh_size;
+
+	if (src->shdr->sh_type != SHT_NOBITS) {
+		tmp = realloc(dst->raw_data, dst_final_sz);
+		if (!tmp)
+			return -ENOMEM;
+		dst->raw_data = tmp;
+
+		/* pad dst section, if it's alignment forced size increase */
+		memset(dst->raw_data + dst->sec_sz, 0, dst_align_sz - dst->sec_sz);
+		/* now copy src data at a properly aligned offset */
+		memcpy(dst->raw_data + dst_align_sz, src->data->d_buf, src->shdr->sh_size);
+	}
+
+	dst->sec_sz = dst_final_sz;
+	dst->shdr->sh_size = dst_final_sz;
+	dst->data->d_size = dst_final_sz;
+
+	dst->shdr->sh_addralign = dst_align;
+	dst->data->d_align = dst_align;
+
+	src->dst_off = dst_align_sz;
+
+	return 0;
+}
+
+static bool is_data_sec(struct src_sec *sec)
+{
+	if (!sec || sec->skipped)
+		return false;
+	return sec->shdr->sh_type == SHT_PROGBITS || sec->shdr->sh_type == SHT_NOBITS;
+}
+
+static bool is_relo_sec(struct src_sec *sec)
+{
+	if (!sec || sec->skipped)
+		return false;
+	return sec->shdr->sh_type == SHT_REL;
+}
+
+static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj)
+{
+	int i, err;
+
+	for (i = 1; i < obj->sec_cnt; i++) {
+		struct src_sec *src_sec;
+		struct dst_sec *dst_sec;
+
+		src_sec = &obj->secs[i];
+		if (!is_data_sec(src_sec))
+			continue;
+
+		dst_sec = find_dst_sec_by_name(linker, src_sec->sec_name);
+		if (!dst_sec) {
+			dst_sec = add_dst_sec(linker, src_sec->sec_name);
+			if (!dst_sec)
+				return -ENOMEM;
+			err = init_sec(linker, dst_sec, src_sec);
+			if (err) {
+				pr_warn("failed to init section '%s'\n", src_sec->sec_name);
+				return err;
+			}
+		} else {
+			if (!secs_match(dst_sec, src_sec)) {
+				pr_warn("ELF sections %s are incompatible\n", src_sec->sec_name);
+				return -1;
+			}
+
+			/* "license" and "version" sections are deduped */
+			if (strcmp(src_sec->sec_name, "license") == 0
+			    || strcmp(src_sec->sec_name, "version") == 0) {
+				if (!sec_content_is_same(dst_sec, src_sec)) {
+					pr_warn("non-identical contents of section '%s' are not supported\n", src_sec->sec_name);
+					return -EINVAL;
+				}
+				src_sec->skipped = true;
+				src_sec->dst_id = dst_sec->id;
+				continue;
+			}
+		}
+
+		/* record mapped section index */
+		src_sec->dst_id = dst_sec->id;
+
+		err = extend_sec(dst_sec, src_sec);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int linker_append_elf_syms(struct bpf_linker *linker, struct src_obj *obj)
+{
+	struct src_sec *symtab = &obj->secs[obj->symtab_sec_idx];
+	Elf64_Sym *sym = symtab->data->d_buf, *dst_sym;
+	int i, n = symtab->shdr->sh_size / symtab->shdr->sh_entsize;
+	int str_sec_idx = symtab->shdr->sh_link;
+
+	obj->sym_map = calloc(n + 1, sizeof(*obj->sym_map));
+	if (!obj->sym_map)
+		return -ENOMEM;
+
+	for (i = 0; i < n; i++, sym++) {
+		struct src_sec *src_sec = NULL;
+		struct dst_sec *dst_sec = NULL;
+		const char *sym_name;
+		size_t dst_sym_idx;
+		int name_off;
+
+		/* we already have all-zero initial symbol */
+		if (sym->st_name == 0 && sym->st_info == 0 &&
+		    sym->st_other == 0 && sym->st_shndx == SHN_UNDEF &&
+		    sym->st_value == 0 && sym->st_size ==0)
+			continue;
+
+		sym_name = elf_strptr(obj->elf, str_sec_idx, sym->st_name);
+		if (!sym_name) {
+			pr_warn("can't fetch symbol name for symbol #%d in '%s'\n", i, obj->filename);
+			return -1;
+		}
+
+		if (sym->st_shndx && sym->st_shndx < SHN_LORESERVE) {
+			src_sec = &obj->secs[sym->st_shndx];
+			if (src_sec->skipped)
+				continue;
+			dst_sec = &linker->secs[src_sec->dst_id];
+
+			/* allow only one STT_SECTION symbol per section */
+			if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && dst_sec->sec_sym_idx) {
+				obj->sym_map[i] = dst_sec->sec_sym_idx;
+				continue;
+			}
+		}
+
+		name_off = strset__add_str(linker->strtab_strs, sym_name);
+		if (name_off < 0)
+			return name_off;
+
+		dst_sym = add_new_sym(linker, &dst_sym_idx);
+		if (!dst_sym)
+			return -ENOMEM;
+
+		dst_sym->st_name = name_off;
+		dst_sym->st_info = sym->st_info;
+		dst_sym->st_other = sym->st_other;
+		dst_sym->st_shndx = src_sec ? dst_sec->sec_idx : sym->st_shndx;
+		dst_sym->st_value = (src_sec ? src_sec->dst_off : 0) + sym->st_value;
+		dst_sym->st_size = sym->st_size;
+
+		obj->sym_map[i] = dst_sym_idx;
+
+		if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && dst_sym) {
+			dst_sec->sec_sym_idx = dst_sym_idx;
+			dst_sym->st_value = 0;
+		}
+
+	}
+
+	return 0;
+}
+
+static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *obj)
+{
+	struct src_sec *src_symtab = &obj->secs[obj->symtab_sec_idx];
+	struct dst_sec *dst_symtab = &linker->secs[linker->symtab_sec_idx];
+	int i, err;
+
+	for (i = 1; i < obj->sec_cnt; i++) {
+		struct src_sec *src_sec, *src_linked_sec;
+		struct dst_sec *dst_sec, *dst_linked_sec;
+		Elf64_Rel *src_rel, *dst_rel;
+		int j, n;
+
+		src_sec = &obj->secs[i];
+		if (!is_relo_sec(src_sec))
+			continue;
+
+		/* shdr->sh_info points to relocatable section */
+		src_linked_sec = &obj->secs[src_sec->shdr->sh_info];
+		if (src_linked_sec->skipped)
+			continue;
+
+		dst_sec = find_dst_sec_by_name(linker, src_sec->sec_name);
+		if (!dst_sec) {
+			dst_sec = add_dst_sec(linker, src_sec->sec_name);
+			if (!dst_sec)
+				return -ENOMEM;
+			err = init_sec(linker, dst_sec, src_sec);
+			if (err) {
+				pr_warn("failed to init section '%s'\n", src_sec->sec_name);
+				return err;
+			}
+		} else if (!secs_match(dst_sec, src_sec)) {
+			pr_warn("Secs %s are not compatible\n", src_sec->sec_name);
+			return -1;
+		}
+
+		/* shdr->sh_link points to SYMTAB */
+		dst_sec->shdr->sh_link = linker->symtab_sec_idx;
+
+		/* shdr->sh_info points to relocated section */
+		dst_linked_sec = &linker->secs[src_linked_sec->dst_id];
+		dst_sec->shdr->sh_info = dst_linked_sec->sec_idx;
+
+		src_sec->dst_id = dst_sec->id;
+		err = extend_sec(dst_sec, src_sec);
+		if (err)
+			return err;
+
+		src_rel = src_sec->data->d_buf;
+		dst_rel = dst_sec->raw_data + src_sec->dst_off;
+		n = src_sec->shdr->sh_size / src_sec->shdr->sh_entsize;
+		for (j = 0; j < n; j++, src_rel++, dst_rel++) {
+			size_t src_sym_idx = ELF64_R_SYM(src_rel->r_info);
+			size_t sym_type = ELF64_R_TYPE(src_rel->r_info);
+			Elf64_Sym *src_sym, *dst_sym;
+			size_t dst_sym_idx;
+
+			src_sym_idx = ELF64_R_SYM(src_rel->r_info);
+			src_sym = src_symtab->data->d_buf + sizeof(*src_sym) * src_sym_idx;
+
+			dst_sym_idx = obj->sym_map[src_sym_idx];
+			dst_sym = dst_symtab->raw_data + sizeof(*dst_sym) * dst_sym_idx;
+			dst_rel->r_offset += src_linked_sec->dst_off;
+			sym_type = ELF64_R_TYPE(src_rel->r_info);
+			dst_rel->r_info = ELF64_R_INFO(dst_sym_idx, sym_type);
+
+			if (ELF64_ST_TYPE(src_sym->st_info) == STT_SECTION) {
+				struct src_sec *sec = &obj->secs[src_sym->st_shndx];
+				struct bpf_insn *insn;
+
+				if (src_linked_sec->shdr->sh_flags & SHF_EXECINSTR) {
+					/* calls to the very first static function inside
+					 * .text section at offset 0 will
+					 * reference section symbol, not the
+					 * function symbol. Fix that up,
+					 * otherwise it won't be possible to
+					 * relocate calls to two different
+					 * static functions with the same name
+					 * (rom two different object files)
+					 */
+					insn = dst_linked_sec->raw_data + dst_rel->r_offset;
+					if (insn->code == (BPF_JMP | BPF_CALL))
+						insn->imm += sec->dst_off / sizeof(struct bpf_insn);
+					else
+						insn->imm += sec->dst_off;
+				} else {
+					pr_warn("relocation against STT_SECTION in non-exec section is not supported!\n");
+					return -EINVAL;
+				}
+			}
+
+		}
+	}
+
+	return 0;
+}
+
+int bpf_linker__finalize(struct bpf_linker *linker)
+{
+	struct dst_sec *sec;
+	size_t strs_sz;
+	const void *strs;
+	int err, i;
+
+	if (!linker->elf)
+		return -EINVAL;
+
+	/* Finalize strings */
+	strs_sz = strset__data_size(linker->strtab_strs);
+	strs = strset__data(linker->strtab_strs);
+
+	sec = &linker->secs[linker->strtab_sec_idx];
+	sec->data->d_align = 1;
+	sec->data->d_off = 0LL;
+	sec->data->d_buf = (void *)strs;
+	sec->data->d_type = ELF_T_BYTE;
+	sec->data->d_size = strs_sz;
+	sec->shdr->sh_size = strs_sz;
+
+	for (i = 1; i < linker->sec_cnt; i++) {
+		sec = &linker->secs[i];
+
+		/* STRTAB is handled specially above */
+		if (sec->sec_idx == linker->strtab_sec_idx)
+			continue;
+
+		sec->data->d_buf = sec->raw_data;
+	}
+
+	/* Finalize ELF layout */
+	if (elf_update(linker->elf, ELF_C_NULL) < 0) {
+		err = -errno;
+		pr_warn_elf("failed to finalize ELF layout");
+		return err;
+	}
+
+	/* Write out final ELF contents */
+	if (elf_update(linker->elf, ELF_C_WRITE) < 0) {
+		err = -errno;
+		pr_warn_elf("failed to write ELF contents");
+		return err;
+	}
+
+	elf_end(linker->elf);
+	close(linker->fd);
+
+	linker->elf = NULL;
+	linker->fd = -1;
+
+	return 0;
+}
-- 
cgit v1.2.3


From 8fd27bf69b864b1c2a6e64cf5673603f3959a6ef Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 18 Mar 2021 12:40:31 -0700
Subject: libbpf: Add BPF static linker BTF and BTF.ext support

Add .BTF and .BTF.ext static linking logic.

When multiple BPF object files are linked together, their respective .BTF and
.BTF.ext sections are merged together. BTF types are not just concatenated,
but also deduplicated. .BTF.ext data is grouped by type (func info, line info,
core_relos) and target section names, and then all the records are
concatenated together, preserving their relative order. All the BTF type ID
references and string offsets are updated as necessary, to take into account
possibly deduplicated strings and types.

BTF DATASEC types are handled specially. Their respective var_secinfos are
accumulated separately in special per-section data and then final DATASEC
types are emitted at the very end during bpf_linker__finalize() operation,
just before emitting final ELF output file.

BTF data can also provide "section annotations" for some extern variables.
Such concept is missing in ELF, but BTF will have DATASEC types for such
special extern datasections (e.g., .kconfig, .ksyms). Such sections are called
"ephemeral" internally. Internally linker will keep metadata for each such
section, collecting variables information, but those sections won't be emitted
into the final ELF file.

Also, given LLVM/Clang during compilation emits BTF DATASECS that are
incomplete, missing section size and variable offsets for static variables,
BPF static linker will initially fix up such DATASECs, using ELF symbols data.
The final DATASECs will preserve section sizes and all variable offsets. This
is handled correctly by libbpf already, so won't cause any new issues. On the
other hand, it's actually a nice property to have a complete BTF data without
runtime adjustments done during bpf_object__open() by libbpf. In that sense,
BPF static linker is also a BTF normalizer.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210318194036.3521577-8-andrii@kernel.org
---
 tools/lib/bpf/linker.c | 769 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 767 insertions(+), 2 deletions(-)

diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 01b56939a835..b4fff912dce2 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -32,12 +32,17 @@ struct src_sec {
 	int dst_off;
 	/* whether section is omitted from the final ELF file */
 	bool skipped;
+	/* whether section is an ephemeral section, not mapped to an ELF section */
+	bool ephemeral;
 
 	/* ELF info */
 	size_t sec_idx;
 	Elf_Scn *scn;
 	Elf64_Shdr *shdr;
 	Elf_Data *data;
+
+	/* corresponding BTF DATASEC type ID */
+	int sec_type_id;
 };
 
 struct src_obj {
@@ -49,12 +54,24 @@ struct src_obj {
 	/* SYMTAB section index */
 	size_t symtab_sec_idx;
 
-	/* List of sections. Slot zero is unused. */
+	struct btf *btf;
+	struct btf_ext *btf_ext;
+
+	/* List of sections (including ephemeral). Slot zero is unused. */
 	struct src_sec *secs;
 	int sec_cnt;
 
 	/* mapping of symbol indices from src to dst ELF */
 	int *sym_map;
+	/* mapping from the src BTF type IDs to dst ones */
+	int *btf_type_map;
+};
+
+/* single .BTF.ext data section */
+struct btf_ext_sec_data {
+	size_t rec_cnt;
+	__u32 rec_sz;
+	void *recs;
 };
 
 struct dst_sec {
@@ -75,6 +92,15 @@ struct dst_sec {
 
 	/* corresponding STT_SECTION symbol index in SYMTAB */
 	int sec_sym_idx;
+
+	/* section's DATASEC variable info, emitted on BTF finalization */
+	int sec_var_cnt;
+	struct btf_var_secinfo *sec_vars;
+
+	/* section's .BTF.ext data */
+	struct btf_ext_sec_data func_info;
+	struct btf_ext_sec_data line_info;
+	struct btf_ext_sec_data core_relo_info;
 };
 
 struct bpf_linker {
@@ -90,6 +116,9 @@ struct bpf_linker {
 	struct strset *strtab_strs; /* STRTAB unique strings */
 	size_t strtab_sec_idx; /* STRTAB section index */
 	size_t symtab_sec_idx; /* SYMTAB section index */
+
+	struct btf *btf;
+	struct btf_ext *btf_ext;
 };
 
 #define pr_warn_elf(fmt, ...)									\
@@ -101,9 +130,17 @@ static int init_output_elf(struct bpf_linker *linker, const char *file);
 
 static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, struct src_obj *obj);
 static int linker_sanity_check_elf(struct src_obj *obj);
+static int linker_sanity_check_btf(struct src_obj *obj);
+static int linker_sanity_check_btf_ext(struct src_obj *obj);
+static int linker_fixup_btf(struct src_obj *obj);
 static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj);
 static int linker_append_elf_syms(struct bpf_linker *linker, struct src_obj *obj);
 static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *obj);
+static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj);
+static int linker_append_btf_ext(struct bpf_linker *linker, struct src_obj *obj);
+
+static int finalize_btf(struct bpf_linker *linker);
+static int finalize_btf_ext(struct bpf_linker *linker);
 
 void bpf_linker__free(struct bpf_linker *linker)
 {
@@ -122,11 +159,19 @@ void bpf_linker__free(struct bpf_linker *linker)
 
 	strset__free(linker->strtab_strs);
 
+	btf__free(linker->btf);
+	btf_ext__free(linker->btf_ext);
+
 	for (i = 1; i < linker->sec_cnt; i++) {
 		struct dst_sec *sec = &linker->secs[i];
 
 		free(sec->sec_name);
 		free(sec->raw_data);
+		free(sec->sec_vars);
+
+		free(sec->func_info.recs);
+		free(sec->line_info.recs);
+		free(sec->core_relo_info.recs);
 	}
 	free(linker->secs);
 
@@ -335,6 +380,12 @@ static int init_output_elf(struct bpf_linker *linker, const char *file)
 	sec->shdr->sh_addralign = 8;
 	sec->shdr->sh_entsize = sizeof(Elf64_Sym);
 
+	/* .BTF */
+	linker->btf = btf__new_empty();
+	err = libbpf_get_error(linker->btf);
+	if (err)
+		return err;
+
 	/* add the special all-zero symbol */
 	init_sym = add_new_sym(linker, NULL);
 	if (!init_sym)
@@ -362,8 +413,13 @@ int bpf_linker__add_file(struct bpf_linker *linker, const char *filename)
 	err = err ?: linker_append_sec_data(linker, &obj);
 	err = err ?: linker_append_elf_syms(linker, &obj);
 	err = err ?: linker_append_elf_relos(linker, &obj);
+	err = err ?: linker_append_btf(linker, &obj);
+	err = err ?: linker_append_btf_ext(linker, &obj);
 
 	/* free up src_obj resources */
+	free(obj.btf_type_map);
+	btf__free(obj.btf);
+	btf_ext__free(obj.btf_ext);
 	free(obj.secs);
 	free(obj.sym_map);
 	if (obj.elf)
@@ -555,10 +611,22 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename,
 			break;
 		case SHT_PROGBITS:
 			if (strcmp(sec_name, BTF_ELF_SEC) == 0) {
+				obj->btf = btf__new(data->d_buf, shdr->sh_size);
+				err = libbpf_get_error(obj->btf);
+				if (err) {
+					pr_warn("failed to parse .BTF from %s: %d\n", filename, err);
+					return err;
+				}
 				sec->skipped = true;
 				continue;
 			}
 			if (strcmp(sec_name, BTF_EXT_ELF_SEC) == 0) {
+				obj->btf_ext = btf_ext__new(data->d_buf, shdr->sh_size);
+				err = libbpf_get_error(obj->btf_ext);
+				if (err) {
+					pr_warn("failed to parse .BTF.ext from '%s': %d\n", filename, err);
+					return err;
+				}
 				sec->skipped = true;
 				continue;
 			}
@@ -580,6 +648,9 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename,
 	}
 
 	err = err ?: linker_sanity_check_elf(obj);
+	err = err ?: linker_sanity_check_btf(obj);
+	err = err ?: linker_sanity_check_btf_ext(obj);
+	err = err ?: linker_fixup_btf(obj);
 
 	return err;
 }
@@ -753,6 +824,69 @@ static int linker_sanity_check_elf(struct src_obj *obj)
 	return 0;
 }
 
+static int check_btf_type_id(__u32 *type_id, void *ctx)
+{
+	struct btf *btf = ctx;
+
+	if (*type_id > btf__get_nr_types(btf))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int check_btf_str_off(__u32 *str_off, void *ctx)
+{
+	struct btf *btf = ctx;
+	const char *s;
+
+	s = btf__str_by_offset(btf, *str_off);
+
+	if (!s)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int linker_sanity_check_btf(struct src_obj *obj)
+{
+	struct btf_type *t;
+	int i, n, err = 0;
+
+	if (!obj->btf)
+		return 0;
+
+	n = btf__get_nr_types(obj->btf);
+	for (i = 1; i <= n; i++) {
+		t = btf_type_by_id(obj->btf, i);
+
+		err = err ?: btf_type_visit_type_ids(t, check_btf_type_id, obj->btf);
+		err = err ?: btf_type_visit_str_offs(t, check_btf_str_off, obj->btf);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int linker_sanity_check_btf_ext(struct src_obj *obj)
+{
+	int err = 0;
+
+	if (!obj->btf_ext)
+		return 0;
+
+	/* can't use .BTF.ext without .BTF */
+	if (!obj->btf)
+		return -EINVAL;
+
+	err = err ?: btf_ext_visit_type_ids(obj->btf_ext, check_btf_type_id, obj->btf);
+	err = err ?: btf_ext_visit_str_offs(obj->btf_ext, check_btf_str_off, obj->btf);
+	if (err)
+		return err;
+
+	return 0;
+}
+
 static int init_sec(struct bpf_linker *linker, struct dst_sec *dst_sec, struct src_sec *src_sec)
 {
 	Elf_Scn *scn;
@@ -763,6 +897,10 @@ static int init_sec(struct bpf_linker *linker, struct dst_sec *dst_sec, struct s
 	dst_sec->sec_sz = 0;
 	dst_sec->sec_idx = 0;
 
+	/* ephemeral sections are just thin section shells lacking most parts */
+	if (src_sec->ephemeral)
+		return 0;
+
 	scn = elf_newscn(linker->elf);
 	if (!scn)
 		return -1;
@@ -891,12 +1029,15 @@ static bool is_data_sec(struct src_sec *sec)
 {
 	if (!sec || sec->skipped)
 		return false;
+	/* ephemeral sections are data sections, e.g., .kconfig, .ksyms */
+	if (sec->ephemeral)
+		return true;
 	return sec->shdr->sh_type == SHT_PROGBITS || sec->shdr->sh_type == SHT_NOBITS;
 }
 
 static bool is_relo_sec(struct src_sec *sec)
 {
-	if (!sec || sec->skipped)
+	if (!sec || sec->skipped || sec->ephemeral)
 		return false;
 	return sec->shdr->sh_type == SHT_REL;
 }
@@ -945,6 +1086,9 @@ static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj
 		/* record mapped section index */
 		src_sec->dst_id = dst_sec->id;
 
+		if (src_sec->ephemeral)
+			continue;
+
 		err = extend_sec(dst_sec, src_sec);
 		if (err)
 			return err;
@@ -1120,6 +1264,339 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob
 	return 0;
 }
 
+static struct src_sec *find_src_sec_by_name(struct src_obj *obj, const char *sec_name)
+{
+	struct src_sec *sec;
+	int i;
+
+	for (i = 1; i < obj->sec_cnt; i++) {
+		sec = &obj->secs[i];
+
+		if (strcmp(sec->sec_name, sec_name) == 0)
+			return sec;
+	}
+
+	return NULL;
+}
+
+static Elf64_Sym *find_sym_by_name(struct src_obj *obj, size_t sec_idx,
+				   int sym_type, const char *sym_name)
+{
+	struct src_sec *symtab = &obj->secs[obj->symtab_sec_idx];
+	Elf64_Sym *sym = symtab->data->d_buf;
+	int i, n = symtab->shdr->sh_size / symtab->shdr->sh_entsize;
+	int str_sec_idx = symtab->shdr->sh_link;
+	const char *name;
+
+	for (i = 0; i < n; i++, sym++) {
+		if (sym->st_shndx != sec_idx)
+			continue;
+		if (ELF64_ST_TYPE(sym->st_info) != sym_type)
+			continue;
+
+		name = elf_strptr(obj->elf, str_sec_idx, sym->st_name);
+		if (!name)
+			return NULL;
+
+		if (strcmp(sym_name, name) != 0)
+			continue;
+
+		return sym;
+	}
+
+	return NULL;
+}
+
+static int linker_fixup_btf(struct src_obj *obj)
+{
+	const char *sec_name;
+	struct src_sec *sec;
+	int i, j, n, m;
+
+	n = btf__get_nr_types(obj->btf);
+	for (i = 1; i <= n; i++) {
+		struct btf_var_secinfo *vi;
+		struct btf_type *t;
+
+		t = btf_type_by_id(obj->btf, i);
+		if (btf_kind(t) != BTF_KIND_DATASEC)
+			continue;
+
+		sec_name = btf__str_by_offset(obj->btf, t->name_off);
+		sec = find_src_sec_by_name(obj, sec_name);
+		if (sec) {
+			/* record actual section size, unless ephemeral */
+			if (sec->shdr)
+				t->size = sec->shdr->sh_size;
+		} else {
+			/* BTF can have some sections that are not represented
+			 * in ELF, e.g., .kconfig and .ksyms, which are used
+			 * for special extern variables.  Here we'll
+			 * pre-create "section shells" for them to be able to
+			 * keep track of extra per-section metadata later
+			 * (e.g., BTF variables).
+			 */
+			sec = add_src_sec(obj, sec_name);
+			if (!sec)
+				return -ENOMEM;
+
+			sec->ephemeral = true;
+			sec->sec_idx = 0; /* will match UNDEF shndx in ELF */
+		}
+
+		/* remember ELF section and its BTF type ID match */
+		sec->sec_type_id = i;
+
+		/* fix up variable offsets */
+		vi = btf_var_secinfos(t);
+		for (j = 0, m = btf_vlen(t); j < m; j++, vi++) {
+			const struct btf_type *vt = btf__type_by_id(obj->btf, vi->type);
+			const char *var_name = btf__str_by_offset(obj->btf, vt->name_off);
+			int var_linkage = btf_var(vt)->linkage;
+			Elf64_Sym *sym;
+
+			/* no need to patch up static or extern vars */
+			if (var_linkage != BTF_VAR_GLOBAL_ALLOCATED)
+				continue;
+
+			sym = find_sym_by_name(obj, sec->sec_idx, STT_OBJECT, var_name);
+			if (!sym) {
+				pr_warn("failed to find symbol for variable '%s' in section '%s'\n", var_name, sec_name);
+				return -ENOENT;
+			}
+
+			vi->offset = sym->st_value;
+		}
+	}
+
+	return 0;
+}
+
+static int remap_type_id(__u32 *type_id, void *ctx)
+{
+	int *id_map = ctx;
+
+	*type_id = id_map[*type_id];
+
+	return 0;
+}
+
+static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj)
+{
+	const struct btf_type *t;
+	int i, j, n, start_id, id;
+
+	if (!obj->btf)
+		return 0;
+
+	start_id = btf__get_nr_types(linker->btf) + 1;
+	n = btf__get_nr_types(obj->btf);
+
+	obj->btf_type_map = calloc(n + 1, sizeof(int));
+	if (!obj->btf_type_map)
+		return -ENOMEM;
+
+	for (i = 1; i <= n; i++) {
+		t = btf__type_by_id(obj->btf, i);
+
+		/* DATASECs are handled specially below */
+		if (btf_kind(t) == BTF_KIND_DATASEC)
+			continue;
+
+		id = btf__add_type(linker->btf, obj->btf, t);
+		if (id < 0) {
+			pr_warn("failed to append BTF type #%d from file '%s'\n", i, obj->filename);
+			return id;
+		}
+
+		obj->btf_type_map[i] = id;
+	}
+
+	/* remap all the types except DATASECs */
+	n = btf__get_nr_types(linker->btf);
+	for (i = start_id; i <= n; i++) {
+		struct btf_type *dst_t = btf_type_by_id(linker->btf, i);
+
+		if (btf_type_visit_type_ids(dst_t, remap_type_id, obj->btf_type_map))
+			return -EINVAL;
+	}
+
+	/* append DATASEC info */
+	for (i = 1; i < obj->sec_cnt; i++) {
+		struct src_sec *src_sec;
+		struct dst_sec *dst_sec;
+		const struct btf_var_secinfo *src_var;
+		struct btf_var_secinfo *dst_var;
+
+		src_sec = &obj->secs[i];
+		if (!src_sec->sec_type_id || src_sec->skipped)
+			continue;
+		dst_sec = &linker->secs[src_sec->dst_id];
+
+		t = btf__type_by_id(obj->btf, src_sec->sec_type_id);
+		src_var = btf_var_secinfos(t);
+		n = btf_vlen(t);
+		for (j = 0; j < n; j++, src_var++) {
+			void *sec_vars = dst_sec->sec_vars;
+
+			sec_vars = libbpf_reallocarray(sec_vars,
+						       dst_sec->sec_var_cnt + 1,
+						       sizeof(*dst_sec->sec_vars));
+			if (!sec_vars)
+				return -ENOMEM;
+
+			dst_sec->sec_vars = sec_vars;
+			dst_sec->sec_var_cnt++;
+
+			dst_var = &dst_sec->sec_vars[dst_sec->sec_var_cnt - 1];
+			dst_var->type = obj->btf_type_map[src_var->type];
+			dst_var->size = src_var->size;
+			dst_var->offset = src_sec->dst_off + src_var->offset;
+		}
+	}
+
+	return 0;
+}
+
+static void *add_btf_ext_rec(struct btf_ext_sec_data *ext_data, const void *src_rec)
+{
+	void *tmp;
+
+	tmp = libbpf_reallocarray(ext_data->recs, ext_data->rec_cnt + 1, ext_data->rec_sz);
+	if (!tmp)
+		return NULL;
+	ext_data->recs = tmp;
+
+	tmp += ext_data->rec_cnt * ext_data->rec_sz;
+	memcpy(tmp, src_rec, ext_data->rec_sz);
+
+	ext_data->rec_cnt++;
+
+	return tmp;
+}
+
+static int linker_append_btf_ext(struct bpf_linker *linker, struct src_obj *obj)
+{
+	const struct btf_ext_info_sec *ext_sec;
+	const char *sec_name, *s;
+	struct src_sec *src_sec;
+	struct dst_sec *dst_sec;
+	int rec_sz, str_off, i;
+
+	if (!obj->btf_ext)
+		return 0;
+
+	rec_sz = obj->btf_ext->func_info.rec_size;
+	for_each_btf_ext_sec(&obj->btf_ext->func_info, ext_sec) {
+		struct bpf_func_info_min *src_rec, *dst_rec;
+
+		sec_name = btf__name_by_offset(obj->btf, ext_sec->sec_name_off);
+		src_sec = find_src_sec_by_name(obj, sec_name);
+		if (!src_sec) {
+			pr_warn("can't find section '%s' referenced from .BTF.ext\n", sec_name);
+			return -EINVAL;
+		}
+		dst_sec = &linker->secs[src_sec->dst_id];
+
+		if (dst_sec->func_info.rec_sz == 0)
+			dst_sec->func_info.rec_sz = rec_sz;
+		if (dst_sec->func_info.rec_sz != rec_sz) {
+			pr_warn("incompatible .BTF.ext record sizes for section '%s'\n", sec_name);
+			return -EINVAL;
+		}
+
+		for_each_btf_ext_rec(&obj->btf_ext->func_info, ext_sec, i, src_rec) {
+			dst_rec = add_btf_ext_rec(&dst_sec->func_info, src_rec);
+			if (!dst_rec)
+				return -ENOMEM;
+
+			dst_rec->insn_off += src_sec->dst_off;
+			dst_rec->type_id = obj->btf_type_map[dst_rec->type_id];
+		}
+	}
+
+	rec_sz = obj->btf_ext->line_info.rec_size;
+	for_each_btf_ext_sec(&obj->btf_ext->line_info, ext_sec) {
+		struct bpf_line_info_min *src_rec, *dst_rec;
+
+		sec_name = btf__name_by_offset(obj->btf, ext_sec->sec_name_off);
+		src_sec = find_src_sec_by_name(obj, sec_name);
+		if (!src_sec) {
+			pr_warn("can't find section '%s' referenced from .BTF.ext\n", sec_name);
+			return -EINVAL;
+		}
+		dst_sec = &linker->secs[src_sec->dst_id];
+
+		if (dst_sec->line_info.rec_sz == 0)
+			dst_sec->line_info.rec_sz = rec_sz;
+		if (dst_sec->line_info.rec_sz != rec_sz) {
+			pr_warn("incompatible .BTF.ext record sizes for section '%s'\n", sec_name);
+			return -EINVAL;
+		}
+
+		for_each_btf_ext_rec(&obj->btf_ext->line_info, ext_sec, i, src_rec) {
+			dst_rec = add_btf_ext_rec(&dst_sec->line_info, src_rec);
+			if (!dst_rec)
+				return -ENOMEM;
+
+			dst_rec->insn_off += src_sec->dst_off;
+
+			s = btf__str_by_offset(obj->btf, src_rec->file_name_off);
+			str_off = btf__add_str(linker->btf, s);
+			if (str_off < 0)
+				return -ENOMEM;
+			dst_rec->file_name_off = str_off;
+
+			s = btf__str_by_offset(obj->btf, src_rec->line_off);
+			str_off = btf__add_str(linker->btf, s);
+			if (str_off < 0)
+				return -ENOMEM;
+			dst_rec->line_off = str_off;
+
+			/* dst_rec->line_col is fine */
+		}
+	}
+
+	rec_sz = obj->btf_ext->core_relo_info.rec_size;
+	for_each_btf_ext_sec(&obj->btf_ext->core_relo_info, ext_sec) {
+		struct bpf_core_relo *src_rec, *dst_rec;
+
+		sec_name = btf__name_by_offset(obj->btf, ext_sec->sec_name_off);
+		src_sec = find_src_sec_by_name(obj, sec_name);
+		if (!src_sec) {
+			pr_warn("can't find section '%s' referenced from .BTF.ext\n", sec_name);
+			return -EINVAL;
+		}
+		dst_sec = &linker->secs[src_sec->dst_id];
+
+		if (dst_sec->core_relo_info.rec_sz == 0)
+			dst_sec->core_relo_info.rec_sz = rec_sz;
+		if (dst_sec->core_relo_info.rec_sz != rec_sz) {
+			pr_warn("incompatible .BTF.ext record sizes for section '%s'\n", sec_name);
+			return -EINVAL;
+		}
+
+		for_each_btf_ext_rec(&obj->btf_ext->core_relo_info, ext_sec, i, src_rec) {
+			dst_rec = add_btf_ext_rec(&dst_sec->core_relo_info, src_rec);
+			if (!dst_rec)
+				return -ENOMEM;
+
+			dst_rec->insn_off += src_sec->dst_off;
+			dst_rec->type_id = obj->btf_type_map[dst_rec->type_id];
+
+			s = btf__str_by_offset(obj->btf, src_rec->access_str_off);
+			str_off = btf__add_str(linker->btf, s);
+			if (str_off < 0)
+				return -ENOMEM;
+			dst_rec->access_str_off = str_off;
+
+			/* dst_rec->kind is fine */
+		}
+	}
+
+	return 0;
+}
+
 int bpf_linker__finalize(struct bpf_linker *linker)
 {
 	struct dst_sec *sec;
@@ -1130,6 +1607,10 @@ int bpf_linker__finalize(struct bpf_linker *linker)
 	if (!linker->elf)
 		return -EINVAL;
 
+	err = finalize_btf(linker);
+	if (err)
+		return err;
+
 	/* Finalize strings */
 	strs_sz = strset__data_size(linker->strtab_strs);
 	strs = strset__data(linker->strtab_strs);
@@ -1149,6 +1630,10 @@ int bpf_linker__finalize(struct bpf_linker *linker)
 		if (sec->sec_idx == linker->strtab_sec_idx)
 			continue;
 
+		/* special ephemeral sections (.ksyms, .kconfig, etc) */
+		if (!sec->scn)
+			continue;
+
 		sec->data->d_buf = sec->raw_data;
 	}
 
@@ -1174,3 +1659,283 @@ int bpf_linker__finalize(struct bpf_linker *linker)
 
 	return 0;
 }
+
+static int emit_elf_data_sec(struct bpf_linker *linker, const char *sec_name,
+			     size_t align, const void *raw_data, size_t raw_sz)
+{
+	Elf_Scn *scn;
+	Elf_Data *data;
+	Elf64_Shdr *shdr;
+	int name_off;
+
+	name_off = strset__add_str(linker->strtab_strs, sec_name);
+	if (name_off < 0)
+		return name_off;
+
+	scn = elf_newscn(linker->elf);
+	if (!scn)
+		return -ENOMEM;
+	data = elf_newdata(scn);
+	if (!data)
+		return -ENOMEM;
+	shdr = elf64_getshdr(scn);
+	if (!shdr)
+		return -EINVAL;
+
+	shdr->sh_name = name_off;
+	shdr->sh_type = SHT_PROGBITS;
+	shdr->sh_flags = 0;
+	shdr->sh_size = raw_sz;
+	shdr->sh_link = 0;
+	shdr->sh_info = 0;
+	shdr->sh_addralign = align;
+	shdr->sh_entsize = 0;
+
+	data->d_type = ELF_T_BYTE;
+	data->d_size = raw_sz;
+	data->d_buf = (void *)raw_data;
+	data->d_align = align;
+	data->d_off = 0;
+
+	return 0;
+}
+
+static int finalize_btf(struct bpf_linker *linker)
+{
+	struct btf *btf = linker->btf;
+	const void *raw_data;
+	int i, j, id, err;
+	__u32 raw_sz;
+
+	/* bail out if no BTF data was produced */
+	if (btf__get_nr_types(linker->btf) == 0)
+		return 0;
+
+	for (i = 1; i < linker->sec_cnt; i++) {
+		struct dst_sec *sec = &linker->secs[i];
+
+		if (!sec->sec_var_cnt)
+			continue;
+
+		id = btf__add_datasec(btf, sec->sec_name, sec->sec_sz);
+		if (id < 0) {
+			pr_warn("failed to add consolidated BTF type for datasec '%s': %d\n",
+				sec->sec_name, id);
+			return id;
+		}
+
+		for (j = 0; j < sec->sec_var_cnt; j++) {
+			struct btf_var_secinfo *vi = &sec->sec_vars[j];
+
+			if (btf__add_datasec_var_info(btf, vi->type, vi->offset, vi->size))
+				return -EINVAL;
+		}
+	}
+
+	err = finalize_btf_ext(linker);
+	if (err) {
+		pr_warn(".BTF.ext generation failed: %d\n", err);
+		return err;
+	}
+
+	err = btf__dedup(linker->btf, linker->btf_ext, NULL);
+	if (err) {
+		pr_warn("BTF dedup failed: %d\n", err);
+		return err;
+	}
+
+	/* Emit .BTF section */
+	raw_data = btf__get_raw_data(linker->btf, &raw_sz);
+	if (!raw_data)
+		return -ENOMEM;
+
+	err = emit_elf_data_sec(linker, BTF_ELF_SEC, 8, raw_data, raw_sz);
+	if (err) {
+		pr_warn("failed to write out .BTF ELF section: %d\n", err);
+		return err;
+	}
+
+	/* Emit .BTF.ext section */
+	if (linker->btf_ext) {
+		raw_data = btf_ext__get_raw_data(linker->btf_ext, &raw_sz);
+		if (!raw_data)
+			return -ENOMEM;
+
+		err = emit_elf_data_sec(linker, BTF_EXT_ELF_SEC, 8, raw_data, raw_sz);
+		if (err) {
+			pr_warn("failed to write out .BTF.ext ELF section: %d\n", err);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static int emit_btf_ext_data(struct bpf_linker *linker, void *output,
+			     const char *sec_name, struct btf_ext_sec_data *sec_data)
+{
+	struct btf_ext_info_sec *sec_info;
+	void *cur = output;
+	int str_off;
+	size_t sz;
+
+	if (!sec_data->rec_cnt)
+		return 0;
+
+	str_off = btf__add_str(linker->btf, sec_name);
+	if (str_off < 0)
+		return -ENOMEM;
+
+	sec_info = cur;
+	sec_info->sec_name_off = str_off;
+	sec_info->num_info = sec_data->rec_cnt;
+	cur += sizeof(struct btf_ext_info_sec);
+
+	sz = sec_data->rec_cnt * sec_data->rec_sz;
+	memcpy(cur, sec_data->recs, sz);
+	cur += sz;
+
+	return cur - output;
+}
+
+static int finalize_btf_ext(struct bpf_linker *linker)
+{
+	size_t funcs_sz = 0, lines_sz = 0, core_relos_sz = 0, total_sz = 0;
+	size_t func_rec_sz = 0, line_rec_sz = 0, core_relo_rec_sz = 0;
+	struct btf_ext_header *hdr;
+	void *data, *cur;
+	int i, err, sz;
+
+	/* validate that all sections have the same .BTF.ext record sizes
+	 * and calculate total data size for each type of data (func info,
+	 * line info, core relos)
+	 */
+	for (i = 1; i < linker->sec_cnt; i++) {
+		struct dst_sec *sec = &linker->secs[i];
+
+		if (sec->func_info.rec_cnt) {
+			if (func_rec_sz == 0)
+				func_rec_sz = sec->func_info.rec_sz;
+			if (func_rec_sz != sec->func_info.rec_sz) {
+				pr_warn("mismatch in func_info record size %zu != %u\n",
+					func_rec_sz, sec->func_info.rec_sz);
+				return -EINVAL;
+			}
+
+			funcs_sz += sizeof(struct btf_ext_info_sec) + func_rec_sz * sec->func_info.rec_cnt;
+		}
+		if (sec->line_info.rec_cnt) {
+			if (line_rec_sz == 0)
+				line_rec_sz = sec->line_info.rec_sz;
+			if (line_rec_sz != sec->line_info.rec_sz) {
+				pr_warn("mismatch in line_info record size %zu != %u\n",
+					line_rec_sz, sec->line_info.rec_sz);
+				return -EINVAL;
+			}
+
+			lines_sz += sizeof(struct btf_ext_info_sec) + line_rec_sz * sec->line_info.rec_cnt;
+		}
+		if (sec->core_relo_info.rec_cnt) {
+			if (core_relo_rec_sz == 0)
+				core_relo_rec_sz = sec->core_relo_info.rec_sz;
+			if (core_relo_rec_sz != sec->core_relo_info.rec_sz) {
+				pr_warn("mismatch in core_relo_info record size %zu != %u\n",
+					core_relo_rec_sz, sec->core_relo_info.rec_sz);
+				return -EINVAL;
+			}
+
+			core_relos_sz += sizeof(struct btf_ext_info_sec) + core_relo_rec_sz * sec->core_relo_info.rec_cnt;
+		}
+	}
+
+	if (!funcs_sz && !lines_sz && !core_relos_sz)
+		return 0;
+
+	total_sz += sizeof(struct btf_ext_header);
+	if (funcs_sz) {
+		funcs_sz += sizeof(__u32); /* record size prefix */
+		total_sz += funcs_sz;
+	}
+	if (lines_sz) {
+		lines_sz += sizeof(__u32); /* record size prefix */
+		total_sz += lines_sz;
+	}
+	if (core_relos_sz) {
+		core_relos_sz += sizeof(__u32); /* record size prefix */
+		total_sz += core_relos_sz;
+	}
+
+	cur = data = calloc(1, total_sz);
+	if (!data)
+		return -ENOMEM;
+
+	hdr = cur;
+	hdr->magic = BTF_MAGIC;
+	hdr->version = BTF_VERSION;
+	hdr->flags = 0;
+	hdr->hdr_len = sizeof(struct btf_ext_header);
+	cur += sizeof(struct btf_ext_header);
+
+	/* All offsets are in bytes relative to the end of this header */
+	hdr->func_info_off = 0;
+	hdr->func_info_len = funcs_sz;
+	hdr->line_info_off = funcs_sz;
+	hdr->line_info_len = lines_sz;
+	hdr->core_relo_off = funcs_sz + lines_sz;;
+	hdr->core_relo_len = core_relos_sz;
+
+	if (funcs_sz) {
+		*(__u32 *)cur = func_rec_sz;
+		cur += sizeof(__u32);
+
+		for (i = 1; i < linker->sec_cnt; i++) {
+			struct dst_sec *sec = &linker->secs[i];
+
+			sz = emit_btf_ext_data(linker, cur, sec->sec_name, &sec->func_info);
+			if (sz < 0)
+				return sz;
+
+			cur += sz;
+		}
+	}
+
+	if (lines_sz) {
+		*(__u32 *)cur = line_rec_sz;
+		cur += sizeof(__u32);
+
+		for (i = 1; i < linker->sec_cnt; i++) {
+			struct dst_sec *sec = &linker->secs[i];
+
+			sz = emit_btf_ext_data(linker, cur, sec->sec_name, &sec->line_info);
+			if (sz < 0)
+				return sz;
+
+			cur += sz;
+		}
+	}
+
+	if (core_relos_sz) {
+		*(__u32 *)cur = core_relo_rec_sz;
+		cur += sizeof(__u32);
+
+		for (i = 1; i < linker->sec_cnt; i++) {
+			struct dst_sec *sec = &linker->secs[i];
+
+			sz = emit_btf_ext_data(linker, cur, sec->sec_name, &sec->core_relo_info);
+			if (sz < 0)
+				return sz;
+
+			cur += sz;
+		}
+	}
+
+	linker->btf_ext = btf_ext__new(data, total_sz);
+	err = libbpf_get_error(linker->btf_ext);
+	if (err) {
+		linker->btf_ext = NULL;
+		pr_warn("failed to parse final .BTF.ext data: %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
-- 
cgit v1.2.3


From c41226654550b0a8aa75e91ce0a1cdb6ce2316ee Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 18 Mar 2021 12:40:32 -0700
Subject: bpftool: Add ability to specify custom skeleton object name

Add optional name OBJECT_NAME parameter to `gen skeleton` command to override
default object name, normally derived from input file name. This allows much
more flexibility during build time.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210318194036.3521577-9-andrii@kernel.org
---
 tools/bpf/bpftool/Documentation/bpftool-gen.rst | 13 ++++++-----
 tools/bpf/bpftool/bash-completion/bpftool       | 11 +++++++++-
 tools/bpf/bpftool/gen.c                         | 29 ++++++++++++++++++++++---
 3 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
index 84cf0639696f..d4e7338e22e7 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
@@ -19,7 +19,7 @@ SYNOPSIS
 GEN COMMANDS
 =============
 
-|	**bpftool** **gen skeleton** *FILE*
+|	**bpftool** **gen skeleton** *FILE* [**name** *OBJECT_NAME*]
 |	**bpftool** **gen help**
 
 DESCRIPTION
@@ -75,10 +75,13 @@ DESCRIPTION
 		  specific maps, programs, etc.
 
 		  As part of skeleton, few custom functions are generated.
-		  Each of them is prefixed with object name, derived from
-		  object file name. I.e., if BPF object file name is
-		  **example.o**, BPF object name will be **example**. The
-		  following custom functions are provided in such case:
+		  Each of them is prefixed with object name. Object name can
+		  either be derived from object file name, i.e., if BPF object
+		  file name is **example.o**, BPF object name will be
+		  **example**. Object name can be also specified explicitly
+		  through **name** *OBJECT_NAME* parameter. The following
+		  custom functions are provided (assuming **example** as
+		  the object name):
 
 		  - **example__open** and **example__open_opts**.
 		    These functions are used to instantiate skeleton. It
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index fdffbc64c65c..bf7b4bdbb23a 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -982,7 +982,16 @@ _bpftool()
         gen)
             case $command in
                 skeleton)
-                    _filedir
+                    case $prev in
+                        $command)
+                            _filedir
+                            return 0
+                            ;;
+                        *)
+                            _bpftool_once_attr 'name'
+                            return 0
+                            ;;
+                    esac
                     ;;
                 *)
                     [[ $prev == $object ]] && \
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 4033c46d83e7..9bff89a66835 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -273,7 +273,7 @@ static int do_skeleton(int argc, char **argv)
 	char header_guard[MAX_OBJ_NAME_LEN + sizeof("__SKEL_H__")];
 	size_t i, map_cnt = 0, prog_cnt = 0, file_sz, mmap_sz;
 	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts);
-	char obj_name[MAX_OBJ_NAME_LEN], *obj_data;
+	char obj_name[MAX_OBJ_NAME_LEN] = "", *obj_data;
 	struct bpf_object *obj = NULL;
 	const char *file, *ident;
 	struct bpf_program *prog;
@@ -288,6 +288,28 @@ static int do_skeleton(int argc, char **argv)
 	}
 	file = GET_ARG();
 
+	while (argc) {
+		if (!REQ_ARGS(2))
+			return -1;
+
+		if (is_prefix(*argv, "name")) {
+			NEXT_ARG();
+
+			if (obj_name[0] != '\0') {
+				p_err("object name already specified");
+				return -1;
+			}
+
+			strncpy(obj_name, *argv, MAX_OBJ_NAME_LEN - 1);
+			obj_name[MAX_OBJ_NAME_LEN - 1] = '\0';
+		} else {
+			p_err("unknown arg %s", *argv);
+			return -1;
+		}
+
+		NEXT_ARG();
+	}
+
 	if (argc) {
 		p_err("extra unknown arguments");
 		return -1;
@@ -310,7 +332,8 @@ static int do_skeleton(int argc, char **argv)
 		p_err("failed to mmap() %s: %s", file, strerror(errno));
 		goto out;
 	}
-	get_obj_name(obj_name, file);
+	if (obj_name[0] == '\0')
+		get_obj_name(obj_name, file);
 	opts.object_name = obj_name;
 	obj = bpf_object__open_mem(obj_data, file_sz, &opts);
 	if (IS_ERR(obj)) {
@@ -599,7 +622,7 @@ static int do_help(int argc, char **argv)
 	}
 
 	fprintf(stderr,
-		"Usage: %1$s %2$s skeleton FILE\n"
+		"Usage: %1$s %2$s skeleton FILE [name OBJECT_NAME]\n"
 		"       %1$s %2$s help\n"
 		"\n"
 		"       " HELP_SPEC_OPTIONS "\n"
-- 
cgit v1.2.3


From d80b2fcbe0a023619e0fc73112f2a02c2662f6ab Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 18 Mar 2021 12:40:33 -0700
Subject: bpftool: Add `gen object` command to perform BPF static linking

Add `bpftool gen object <output-file> <input_file>...` command to statically
link multiple BPF ELF object files into a single output BPF ELF object file.

This patch also updates bash completions and man page. Man page gets a short
section on `gen object` command, but also updates the skeleton example to show
off workflow for BPF application with two .bpf.c files, compiled individually
with Clang, then resulting object files are linked together with `gen object`,
and then final object file is used to generate usable BPF skeleton. This
should help new users understand realistic workflow w.r.t. compiling
mutli-file BPF application.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20210318194036.3521577-10-andrii@kernel.org
---
 tools/bpf/bpftool/Documentation/bpftool-gen.rst | 65 +++++++++++++++++++------
 tools/bpf/bpftool/bash-completion/bpftool       |  6 ++-
 tools/bpf/bpftool/gen.c                         | 45 ++++++++++++++++-
 3 files changed, 100 insertions(+), 16 deletions(-)

diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
index d4e7338e22e7..7cd6681137f3 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
@@ -14,16 +14,37 @@ SYNOPSIS
 
 	*OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
 
-	*COMMAND* := { **skeleton** | **help** }
+	*COMMAND* := { **object** | **skeleton** | **help** }
 
 GEN COMMANDS
 =============
 
+|	**bpftool** **gen object** *OUTPUT_FILE* *INPUT_FILE* [*INPUT_FILE*...]
 |	**bpftool** **gen skeleton** *FILE* [**name** *OBJECT_NAME*]
 |	**bpftool** **gen help**
 
 DESCRIPTION
 ===========
+	**bpftool gen object** *OUTPUT_FILE* *INPUT_FILE* [*INPUT_FILE*...]
+		  Statically link (combine) together one or more *INPUT_FILE*'s
+		  into a single resulting *OUTPUT_FILE*. All the files involved
+		  are BPF ELF object files.
+
+		  The rules of BPF static linking are mostly the same as for
+		  user-space object files, but in addition to combining data
+		  and instruction sections, .BTF and .BTF.ext (if present in
+		  any of the input files) data are combined together. .BTF
+		  data is deduplicated, so all the common types across
+		  *INPUT_FILE*'s will only be represented once in the resulting
+		  BTF information.
+
+		  BPF static linking allows to partition BPF source code into
+		  individually compiled files that are then linked into
+		  a single resulting BPF object file, which can be used to
+		  generated BPF skeleton (with **gen skeleton** command) or
+		  passed directly into **libbpf** (using **bpf_object__open()**
+		  family of APIs).
+
 	**bpftool gen skeleton** *FILE*
 		  Generate BPF skeleton C header file for a given *FILE*.
 
@@ -133,26 +154,19 @@ OPTIONS
 
 EXAMPLES
 ========
-**$ cat example.c**
+**$ cat example1.bpf.c**
 
 ::
 
   #include <stdbool.h>
   #include <linux/ptrace.h>
   #include <linux/bpf.h>
-  #include "bpf_helpers.h"
+  #include <bpf/bpf_helpers.h>
 
   const volatile int param1 = 42;
   bool global_flag = true;
   struct { int x; } data = {};
 
-  struct {
-  	__uint(type, BPF_MAP_TYPE_HASH);
-  	__uint(max_entries, 128);
-  	__type(key, int);
-  	__type(value, long);
-  } my_map SEC(".maps");
-
   SEC("raw_tp/sys_enter")
   int handle_sys_enter(struct pt_regs *ctx)
   {
@@ -164,6 +178,21 @@ EXAMPLES
   	return 0;
   }
 
+**$ cat example2.bpf.c**
+
+::
+
+  #include <linux/ptrace.h>
+  #include <linux/bpf.h>
+  #include <bpf/bpf_helpers.h>
+
+  struct {
+  	__uint(type, BPF_MAP_TYPE_HASH);
+  	__uint(max_entries, 128);
+  	__type(key, int);
+  	__type(value, long);
+  } my_map SEC(".maps");
+
   SEC("raw_tp/sys_exit")
   int handle_sys_exit(struct pt_regs *ctx)
   {
@@ -173,9 +202,17 @@ EXAMPLES
   }
 
 This is example BPF application with two BPF programs and a mix of BPF maps
-and global variables.
+and global variables. Source code is split across two source code files.
 
-**$ bpftool gen skeleton example.o**
+**$ clang -target bpf -g example1.bpf.c -o example1.bpf.o**
+**$ clang -target bpf -g example2.bpf.c -o example2.bpf.o**
+**$ bpftool gen object example.bpf.o example1.bpf.o example2.bpf.o**
+
+This set of commands compiles *example1.bpf.c* and *example2.bpf.c*
+individually and then statically links respective object files into the final
+BPF ELF object file *example.bpf.o*.
+
+**$ bpftool gen skeleton example.bpf.o name example | tee example.skel.h**
 
 ::
 
@@ -230,7 +267,7 @@ and global variables.
 
   #endif /* __EXAMPLE_SKEL_H__ */
 
-**$ cat example_user.c**
+**$ cat example.c**
 
 ::
 
@@ -273,7 +310,7 @@ and global variables.
   	return err;
   }
 
-**# ./example_user**
+**# ./example**
 
 ::
 
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index bf7b4bdbb23a..d67518bcbd44 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -981,6 +981,10 @@ _bpftool()
             ;;
         gen)
             case $command in
+                object)
+                    _filedir
+                    return 0
+                    ;;
                 skeleton)
                     case $prev in
                         $command)
@@ -995,7 +999,7 @@ _bpftool()
                     ;;
                 *)
                     [[ $prev == $object ]] && \
-                        COMPREPLY=( $( compgen -W 'skeleton help' -- "$cur" ) )
+                        COMPREPLY=( $( compgen -W 'object skeleton help' -- "$cur" ) )
                     ;;
             esac
             ;;
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 9bff89a66835..31ade77f5ef8 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -614,6 +614,47 @@ out:
 	return err;
 }
 
+static int do_object(int argc, char **argv)
+{
+	struct bpf_linker *linker;
+	const char *output_file, *file;
+	int err = 0;
+
+	if (!REQ_ARGS(2)) {
+		usage();
+		return -1;
+	}
+
+	output_file = GET_ARG();
+
+	linker = bpf_linker__new(output_file, NULL);
+	if (!linker) {
+		p_err("failed to create BPF linker instance");
+		return -1;
+	}
+
+	while (argc) {
+		file = GET_ARG();
+
+		err = bpf_linker__add_file(linker, file);
+		if (err) {
+			p_err("failed to link '%s': %s (%d)", file, strerror(err), err);
+			goto out;
+		}
+	}
+
+	err = bpf_linker__finalize(linker);
+	if (err) {
+		p_err("failed to finalize ELF file: %s (%d)", strerror(err), err);
+		goto out;
+	}
+
+	err = 0;
+out:
+	bpf_linker__free(linker);
+	return err;
+}
+
 static int do_help(int argc, char **argv)
 {
 	if (json_output) {
@@ -622,7 +663,8 @@ static int do_help(int argc, char **argv)
 	}
 
 	fprintf(stderr,
-		"Usage: %1$s %2$s skeleton FILE [name OBJECT_NAME]\n"
+		"Usage: %1$s %2$s object OUTPUT_FILE INPUT_FILE [INPUT_FILE...]\n"
+		"       %1$s %2$s skeleton FILE [name OBJECT_NAME]\n"
 		"       %1$s %2$s help\n"
 		"\n"
 		"       " HELP_SPEC_OPTIONS "\n"
@@ -633,6 +675,7 @@ static int do_help(int argc, char **argv)
 }
 
 static const struct cmd cmds[] = {
+	{ "object",	do_object },
 	{ "skeleton",	do_skeleton },
 	{ "help",	do_help },
 	{ 0 }
-- 
cgit v1.2.3


From cab62c37be057379a2a17b1b2eacd9dcba1e14dc Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 18 Mar 2021 12:40:34 -0700
Subject: selftests/bpf: Re-generate vmlinux.h and BPF skeletons if bpftool
 changed

Trigger vmlinux.h and BPF skeletons re-generation if detected that bpftool was
re-compiled. Otherwise full `make clean` is required to get updated skeletons,
if bpftool is modified.

Fixes: acbd06206bbb ("selftests/bpf: Add vmlinux.h selftest exercising tracing of syscalls")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210318194036.3521577-11-andrii@kernel.org
---
 tools/testing/selftests/bpf/Makefile | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index d0db2b673c6f..dbca39f45382 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -232,7 +232,7 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)                \
 		    DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers
 endif
 
-$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR)
+$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) | $(INCLUDE_DIR)
 ifeq ($(VMLINUX_H),)
 	$(call msg,GEN,,$@)
 	$(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
@@ -357,7 +357,8 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o:				\
 
 $(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h:			\
 		      $(TRUNNER_OUTPUT)/%.o				\
-		      | $(BPFTOOL) $(TRUNNER_OUTPUT)
+		      $(BPFTOOL)					\
+		      | $(TRUNNER_OUTPUT)
 	$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
 	$(Q)$$(BPFTOOL) gen skeleton $$< > $$@
 endif
-- 
cgit v1.2.3


From 14137f3c62186799b01eea8a338f90c9cbc57f00 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 18 Mar 2021 12:40:35 -0700
Subject: selftests/bpf: Pass all BPF .o's through BPF static linker

Pass all individual BPF object files (generated from progs/*.c) through
`bpftool gen object` command to validate that BPF static linker doesn't
corrupt them.

As an additional sanity checks, validate that passing resulting object files
through linker again results in identical ELF files. Exact same ELF contents
can be guaranteed only after two passes, as after the first pass ELF sections
order changes, and thus .BTF.ext data sections order changes. That, in turn,
means that strings are added into the final BTF string sections in different
order, so .BTF strings data might not be exactly the same. But doing another
round of linking afterwards should result in the identical ELF file, which is
checked with additional `diff` command.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210318194036.3521577-12-andrii@kernel.org
---
 tools/testing/selftests/bpf/Makefile | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index dbca39f45382..3a7c02add70c 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -355,12 +355,13 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o:				\
 	$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@,			\
 					  $(TRUNNER_BPF_CFLAGS))
 
-$(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h:			\
-		      $(TRUNNER_OUTPUT)/%.o				\
-		      $(BPFTOOL)					\
-		      | $(TRUNNER_OUTPUT)
+$(TRUNNER_BPF_SKELS): %.skel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
 	$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
-	$(Q)$$(BPFTOOL) gen skeleton $$< > $$@
+	$(Q)$$(BPFTOOL) gen object $$(<:.o=.linked.o) $$<
+	$(Q)$$(BPFTOOL) gen object $$(<:.o=.linked2.o) $$(<:.o=.linked.o)
+	$(Q)$$(BPFTOOL) gen object $$(<:.o=.linked3.o) $$(<:.o=.linked2.o)
+	$(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o)
+	$(Q)$$(BPFTOOL) gen skeleton $$(<:.o=.linked.o) name $$(notdir $$(<:.o=)) > $$@
 endif
 
 # ensure we set up tests.h header generation rule just once
-- 
cgit v1.2.3


From a0964f526df6facd4e12a4c416185013026eecf9 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 18 Mar 2021 12:40:36 -0700
Subject: selftests/bpf: Add multi-file statically linked BPF object file test

Add Makefile infra to specify multi-file BPF object files (and derivative
skeletons). Add first selftest validating BPF static linker can merge together
successfully two independent BPF object files and resulting object and
skeleton are correct and usable.

Use the same F(F(F(X))) = F(F(X)) identity test on linked object files as for
the case of single BPF object files.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210318194036.3521577-13-andrii@kernel.org
---
 tools/testing/selftests/bpf/Makefile               | 21 ++++++++++--
 .../selftests/bpf/prog_tests/static_linked.c       | 40 ++++++++++++++++++++++
 .../selftests/bpf/progs/test_static_linked1.c      | 30 ++++++++++++++++
 .../selftests/bpf/progs/test_static_linked2.c      | 31 +++++++++++++++++
 4 files changed, 119 insertions(+), 3 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/static_linked.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_static_linked1.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_static_linked2.c

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 3a7c02add70c..6448c626498f 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -303,6 +303,10 @@ endef
 
 SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
 
+LINKED_SKELS := test_static_linked.skel.h
+
+test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o
+
 # Set up extra TRUNNER_XXX "temporary" variables in the environment (relies on
 # $eval()) and pass control to DEFINE_TEST_RUNNER_RULES.
 # Parameters:
@@ -323,6 +327,7 @@ TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS)
 TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h,	\
 				 $$(filter-out $(SKEL_BLACKLIST),	\
 					       $$(TRUNNER_BPF_SRCS)))
+TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS))
 TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS)
 
 # Evaluate rules now with extra TRUNNER_XXX variables above already defined
@@ -357,11 +362,20 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o:				\
 
 $(TRUNNER_BPF_SKELS): %.skel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
 	$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
-	$(Q)$$(BPFTOOL) gen object $$(<:.o=.linked.o) $$<
-	$(Q)$$(BPFTOOL) gen object $$(<:.o=.linked2.o) $$(<:.o=.linked.o)
+	$(Q)$$(BPFTOOL) gen object $$(<:.o=.linked1.o) $$<
+	$(Q)$$(BPFTOOL) gen object $$(<:.o=.linked2.o) $$(<:.o=.linked1.o)
 	$(Q)$$(BPFTOOL) gen object $$(<:.o=.linked3.o) $$(<:.o=.linked2.o)
 	$(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o)
-	$(Q)$$(BPFTOOL) gen skeleton $$(<:.o=.linked.o) name $$(notdir $$(<:.o=)) > $$@
+	$(Q)$$(BPFTOOL) gen skeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=)) > $$@
+
+$(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT)
+	$$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.o))
+	$(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked1.o) $$(addprefix $(TRUNNER_OUTPUT)/,$$($$(@F)-deps))
+	$(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked1.o)
+	$(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked3.o) $$(@:.skel.h=.linked2.o)
+	$(Q)diff $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked3.o)
+	$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
+	$(Q)$$(BPFTOOL) gen skeleton $$(@:.skel.h=.linked3.o) name $$(notdir $$(@:.skel.h=)) > $$@
 endif
 
 # ensure we set up tests.h header generation rule just once
@@ -383,6 +397,7 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o:			\
 		      $(TRUNNER_EXTRA_HDRS)				\
 		      $(TRUNNER_BPF_OBJS)				\
 		      $(TRUNNER_BPF_SKELS)				\
+		      $(TRUNNER_BPF_SKELS_LINKED)			\
 		      $$(BPFOBJ) | $(TRUNNER_OUTPUT)
 	$$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@)
 	$(Q)cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
diff --git a/tools/testing/selftests/bpf/prog_tests/static_linked.c b/tools/testing/selftests/bpf/prog_tests/static_linked.c
new file mode 100644
index 000000000000..46556976dccc
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/static_linked.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <test_progs.h>
+#include "test_static_linked.skel.h"
+
+void test_static_linked(void)
+{
+	int err;
+	struct test_static_linked* skel;
+
+	skel = test_static_linked__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	skel->rodata->rovar1 = 1;
+	skel->bss->static_var1 = 2;
+	skel->bss->static_var11 = 3;
+
+	skel->rodata->rovar2 = 4;
+	skel->bss->static_var2 = 5;
+	skel->bss->static_var22 = 6;
+
+	err = test_static_linked__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto cleanup;
+
+	err = test_static_linked__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto cleanup;
+
+	/* trigger */
+	usleep(1);
+
+	ASSERT_EQ(skel->bss->var1, 1 * 2 + 2 + 3, "var1");
+	ASSERT_EQ(skel->bss->var2, 4 * 3 + 5 + 6, "var2");
+
+cleanup:
+	test_static_linked__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_static_linked1.c b/tools/testing/selftests/bpf/progs/test_static_linked1.c
new file mode 100644
index 000000000000..ea1a6c4c7172
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_static_linked1.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* 8-byte aligned .bss */
+static volatile long static_var1;
+static volatile int static_var11;
+int var1 = 0;
+/* 4-byte aligned .rodata */
+const volatile int rovar1;
+
+/* same "subprog" name in both files */
+static __noinline int subprog(int x)
+{
+	/* but different formula */
+	return x * 2;
+}
+
+SEC("raw_tp/sys_enter")
+int handler1(const void *ctx)
+{
+	var1 = subprog(rovar1) + static_var1 + static_var11;
+
+	return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
+int VERSION SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_static_linked2.c b/tools/testing/selftests/bpf/progs/test_static_linked2.c
new file mode 100644
index 000000000000..54d8d1ab577c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_static_linked2.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* 4-byte aligned .bss */
+static volatile int static_var2;
+static volatile int static_var22;
+int var2 = 0;
+/* 8-byte aligned .rodata */
+const volatile long rovar2;
+
+/* same "subprog" name in both files */
+static __noinline int subprog(int x)
+{
+	/* but different formula */
+	return x * 3;
+}
+
+SEC("raw_tp/sys_enter")
+int handler2(const void *ctx)
+{
+	var2 = subprog(rovar2) + static_var2 + static_var22;
+
+	return 0;
+}
+
+/* different name and/or type of the variable doesn't matter */
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
-- 
cgit v1.2.3


From e47ded97f9728da5ced038c627c6b607e9706a13 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Thu, 18 Mar 2021 19:54:54 +0100
Subject: s390/qeth: allocate initial TX Buffer structs with GFP_KERNEL

qeth_init_qdio_out_buf() is typically called during initialization, and
the GFP_ATOMIC is only needed for a very specific & rare case during TX
completion.

Allow callers to specify a gfp mask, so that the initialization path can
select GFP_KERNEL. While at it also clarify the function name.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core_main.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index a814698387bc..abd1e49cf97a 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -2590,11 +2590,12 @@ static int qeth_ulp_setup(struct qeth_card *card)
 	return qeth_send_control_data(card, iob, qeth_ulp_setup_cb, NULL);
 }
 
-static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *q, int bidx)
+static int qeth_alloc_out_buf(struct qeth_qdio_out_q *q, unsigned int bidx,
+			      gfp_t gfp)
 {
 	struct qeth_qdio_out_buffer *newbuf;
 
-	newbuf = kmem_cache_zalloc(qeth_qdio_outbuf_cache, GFP_ATOMIC);
+	newbuf = kmem_cache_zalloc(qeth_qdio_outbuf_cache, gfp);
 	if (!newbuf)
 		return -ENOMEM;
 
@@ -2629,7 +2630,7 @@ static struct qeth_qdio_out_q *qeth_alloc_output_queue(void)
 		goto err_qdio_bufs;
 
 	for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; i++) {
-		if (qeth_init_qdio_out_buf(q, i))
+		if (qeth_alloc_out_buf(q, i, GFP_KERNEL))
 			goto err_out_bufs;
 	}
 
@@ -6088,7 +6089,8 @@ static void qeth_iqd_tx_complete(struct qeth_qdio_out_q *queue,
 
 				/* Prepare the queue slot for immediate re-use: */
 				qeth_scrub_qdio_buffer(buffer->buffer, queue->max_elements);
-				if (qeth_init_qdio_out_buf(queue, bidx)) {
+				if (qeth_alloc_out_buf(queue, bidx,
+						       GFP_ATOMIC)) {
 					QETH_CARD_TEXT(card, 2, "outofbuf");
 					qeth_schedule_recovery(card);
 				}
-- 
cgit v1.2.3


From ad4bbd7285ad0f04f2cfd8bada1755c3c5c75143 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Thu, 18 Mar 2021 19:54:55 +0100
Subject: s390/qeth: enable napi_consume_skb() for pending TX buffers

Pending TX buffers are completed from the same NAPI code as normal
TX buffers. Pass the NAPI budget to qeth_tx_complete_buf() so that
the freeing of the completed skbs can be deferred.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core_main.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index abd1e49cf97a..6954d4e831a3 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -1453,7 +1453,7 @@ static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue,
 
 static void qeth_tx_complete_pending_bufs(struct qeth_card *card,
 					  struct qeth_qdio_out_q *queue,
-					  bool drain)
+					  bool drain, int budget)
 {
 	struct qeth_qdio_out_buffer *buf, *tmp;
 
@@ -1465,7 +1465,7 @@ static void qeth_tx_complete_pending_bufs(struct qeth_card *card,
 			if (drain)
 				qeth_notify_skbs(queue, buf,
 						 TX_NOTIFY_GENERALERROR);
-			qeth_tx_complete_buf(buf, drain, 0);
+			qeth_tx_complete_buf(buf, drain, budget);
 
 			list_del(&buf->list_entry);
 			kmem_cache_free(qeth_qdio_outbuf_cache, buf);
@@ -1477,7 +1477,7 @@ static void qeth_drain_output_queue(struct qeth_qdio_out_q *q, bool free)
 {
 	int j;
 
-	qeth_tx_complete_pending_bufs(q->card, q, true);
+	qeth_tx_complete_pending_bufs(q->card, q, true, 0);
 
 	for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) {
 		if (!q->bufs[j])
@@ -6152,7 +6152,7 @@ static int qeth_tx_poll(struct napi_struct *napi, int budget)
 		unsigned int bytes = 0;
 		int completed;
 
-		qeth_tx_complete_pending_bufs(card, queue, false);
+		qeth_tx_complete_pending_bufs(card, queue, false, budget);
 
 		if (qeth_out_queue_is_empty(queue)) {
 			napi_complete(napi);
-- 
cgit v1.2.3


From d96a8c693d0a09cd89efbb17373ded5b275b2960 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Thu, 18 Mar 2021 19:54:56 +0100
Subject: s390/qeth: remove RX VLAN filter stubs in L3 driver

The callbacks have been slimmed down to a level where they no longer do
any actual work. So stop pretending that we support the
NETIF_F_HW_VLAN_CTAG_FILTER feature.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l3_main.c | 25 +------------------------
 1 file changed, 1 insertion(+), 24 deletions(-)

diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index dd441eaec66e..35b42275a06c 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1123,24 +1123,6 @@ out:
 	return 0;
 }
 
-static int qeth_l3_vlan_rx_add_vid(struct net_device *dev,
-				   __be16 proto, u16 vid)
-{
-	struct qeth_card *card = dev->ml_priv;
-
-	QETH_CARD_TEXT_(card, 4, "aid:%d", vid);
-	return 0;
-}
-
-static int qeth_l3_vlan_rx_kill_vid(struct net_device *dev,
-				    __be16 proto, u16 vid)
-{
-	struct qeth_card *card = dev->ml_priv;
-
-	QETH_CARD_TEXT_(card, 4, "kid:%d", vid);
-	return 0;
-}
-
 static void qeth_l3_set_promisc_mode(struct qeth_card *card)
 {
 	bool enable = card->dev->flags & IFF_PROMISC;
@@ -1861,8 +1843,6 @@ static const struct net_device_ops qeth_l3_netdev_ops = {
 	.ndo_do_ioctl		= qeth_do_ioctl,
 	.ndo_fix_features	= qeth_fix_features,
 	.ndo_set_features	= qeth_set_features,
-	.ndo_vlan_rx_add_vid	= qeth_l3_vlan_rx_add_vid,
-	.ndo_vlan_rx_kill_vid   = qeth_l3_vlan_rx_kill_vid,
 	.ndo_tx_timeout		= qeth_tx_timeout,
 };
 
@@ -1878,8 +1858,6 @@ static const struct net_device_ops qeth_l3_osa_netdev_ops = {
 	.ndo_do_ioctl		= qeth_do_ioctl,
 	.ndo_fix_features	= qeth_fix_features,
 	.ndo_set_features	= qeth_set_features,
-	.ndo_vlan_rx_add_vid	= qeth_l3_vlan_rx_add_vid,
-	.ndo_vlan_rx_kill_vid   = qeth_l3_vlan_rx_kill_vid,
 	.ndo_tx_timeout		= qeth_tx_timeout,
 	.ndo_neigh_setup	= qeth_l3_neigh_setup,
 };
@@ -1933,8 +1911,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 
 	card->dev->needed_headroom = headroom;
 	card->dev->features |=	NETIF_F_HW_VLAN_CTAG_TX |
-				NETIF_F_HW_VLAN_CTAG_RX |
-				NETIF_F_HW_VLAN_CTAG_FILTER;
+				NETIF_F_HW_VLAN_CTAG_RX;
 
 	netif_keep_dst(card->dev);
 	if (card->dev->hw_features & (NETIF_F_TSO | NETIF_F_TSO6))
-- 
cgit v1.2.3


From d2fd2311de909a7f4e99b4bd11a19e6b671d6a6b Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Thu, 18 Mar 2021 13:59:27 -0500
Subject: net: ipa: fix assumptions about DMA address size

Some build time checks in ipa_table_validate_build() assume that a
DMA address is 64 bits wide.  That is more restrictive than it has
to be.  A route or filter table is 64 bits wide no matter what the
size of a DMA address is on the AP.  The code actually uses a
pointer to __le64 to access table entries, and a fixed constant
IPA_TABLE_ENTRY_SIZE to describe the size of those entries.

Loosen up two checks so they still verify some requirements, but
such that they do not assume the size of a DMA address is 64 bits.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_table.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c
index 7450e27068f1..dd07fe9dd87a 100644
--- a/drivers/net/ipa/ipa_table.c
+++ b/drivers/net/ipa/ipa_table.c
@@ -126,13 +126,15 @@ static void ipa_table_validate_build(void)
 	 */
 	BUILD_BUG_ON(ARCH_DMA_MINALIGN % IPA_TABLE_ALIGN);
 
-	/* Filter and route tables contain DMA addresses that refer to
-	 * filter or route rules.  We use a fixed constant to represent
-	 * the size of either type of table entry.  Code in ipa_table_init()
-	 * uses a pointer to __le64 to initialize table entriews.
+	/* Filter and route tables contain DMA addresses that refer
+	 * to filter or route rules.  But the size of a table entry
+	 * is 64 bits regardless of what the size of an AP DMA address
+	 * is.  A fixed constant defines the size of an entry, and
+	 * code in ipa_table_init() uses a pointer to __le64 to
+	 * initialize tables.
 	 */
-	BUILD_BUG_ON(IPA_TABLE_ENTRY_SIZE != sizeof(dma_addr_t));
-	BUILD_BUG_ON(sizeof(dma_addr_t) != sizeof(__le64));
+	BUILD_BUG_ON(sizeof(dma_addr_t) > IPA_TABLE_ENTRY_SIZE);
+	BUILD_BUG_ON(sizeof(__le64) != IPA_TABLE_ENTRY_SIZE);
 
 	/* A "zero rule" is used to represent no filtering or no routing.
 	 * It is a 64-bit block of zeroed memory.  Code in ipa_table_init()
-- 
cgit v1.2.3


From 3c54b7be5d3677f37dfcd68afcc1d8e2b677a5b9 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Thu, 18 Mar 2021 13:59:28 -0500
Subject: net: ipa: use upper_32_bits()

Use upper_32_bits() to extract the high-order 32 bits of a DMA
address.  This avoids doing a 32-position shift on a DMA address
if it happens not to be 64 bits wide.  Use lower_32_bits() to
extract the low-order 32 bits (because that's what it's for).

Suggested-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Alex Elder <elder@linaro.org>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/gsi.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
index 2119367b93ea..7f3e338ca7a7 100644
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -351,7 +351,7 @@ void *gsi_ring_virt(struct gsi_ring *ring, u32 index)
 /* Return the 32-bit DMA address associated with a ring index */
 static u32 gsi_ring_addr(struct gsi_ring *ring, u32 index)
 {
-	return (ring->addr & GENMASK(31, 0)) + index * GSI_RING_ELEMENT_SIZE;
+	return lower_32_bits(ring->addr) + index * GSI_RING_ELEMENT_SIZE;
 }
 
 /* Return the ring index of a 32-bit ring offset */
@@ -708,10 +708,9 @@ static void gsi_evt_ring_program(struct gsi *gsi, u32 evt_ring_id)
 	 * high-order 32 bits of the address of the event ring,
 	 * respectively.
 	 */
-	val = evt_ring->ring.addr & GENMASK(31, 0);
+	val = lower_32_bits(evt_ring->ring.addr);
 	iowrite32(val, gsi->virt + GSI_EV_CH_E_CNTXT_2_OFFSET(evt_ring_id));
-
-	val = evt_ring->ring.addr >> 32;
+	val = upper_32_bits(evt_ring->ring.addr);
 	iowrite32(val, gsi->virt + GSI_EV_CH_E_CNTXT_3_OFFSET(evt_ring_id));
 
 	/* Enable interrupt moderation by setting the moderation delay */
@@ -816,10 +815,9 @@ static void gsi_channel_program(struct gsi_channel *channel, bool doorbell)
 	 * high-order 32 bits of the address of the channel ring,
 	 * respectively.
 	 */
-	val = channel->tre_ring.addr & GENMASK(31, 0);
+	val = lower_32_bits(channel->tre_ring.addr);
 	iowrite32(val, gsi->virt + GSI_CH_C_CNTXT_2_OFFSET(channel_id));
-
-	val = channel->tre_ring.addr >> 32;
+	val = upper_32_bits(channel->tre_ring.addr);
 	iowrite32(val, gsi->virt + GSI_CH_C_CNTXT_3_OFFSET(channel_id));
 
 	/* Command channel gets low weighted round-robin priority */
@@ -1365,7 +1363,7 @@ static struct gsi_trans *gsi_event_trans(struct gsi_channel *channel,
 	u32 tre_index;
 
 	/* Event xfer_ptr records the TRE it's associated with */
-	tre_offset = le64_to_cpu(event->xfer_ptr) & GENMASK(31, 0);
+	tre_offset = lower_32_bits(le64_to_cpu(event->xfer_ptr));
 	tre_index = gsi_ring_index(&channel->tre_ring, tre_offset);
 
 	return gsi_channel_trans_mapped(channel, tre_index);
-- 
cgit v1.2.3


From e5d4e96b44cf20330c970c3e30ea0a8c3a23feca Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Thu, 18 Mar 2021 13:59:29 -0500
Subject: net: ipa: fix table alignment requirement

We currently have a build-time check to ensure that the minimum DMA
allocation alignment satisfies the constraint that IPA filter and
route tables must point to rules that are 128-byte aligned.

But what's really important is that the actual allocated DMA memory
has that alignment, even if the minimum is smaller than that.

Remove the BUILD_BUG_ON() call checking against minimim DMA alignment
and instead verify at rutime that the allocated memory is properly
aligned.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_table.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c
index dd07fe9dd87a..988f2c2886b9 100644
--- a/drivers/net/ipa/ipa_table.c
+++ b/drivers/net/ipa/ipa_table.c
@@ -118,14 +118,6 @@
 /* Check things that can be validated at build time. */
 static void ipa_table_validate_build(void)
 {
-	/* IPA hardware accesses memory 128 bytes at a time.  Addresses
-	 * referred to by entries in filter and route tables must be
-	 * aligned on 128-byte byte boundaries.  The only rule address
-	 * ever use is the "zero rule", and it's aligned at the base
-	 * of a coherent DMA allocation.
-	 */
-	BUILD_BUG_ON(ARCH_DMA_MINALIGN % IPA_TABLE_ALIGN);
-
 	/* Filter and route tables contain DMA addresses that refer
 	 * to filter or route rules.  But the size of a table entry
 	 * is 64 bits regardless of what the size of an AP DMA address
@@ -665,6 +657,18 @@ int ipa_table_init(struct ipa *ipa)
 	if (!virt)
 		return -ENOMEM;
 
+	/* We put the "zero rule" at the base of our table area.  The IPA
+	 * hardware requires rules to be aligned on a 128-byte boundary.
+	 * Make sure the allocation satisfies this constraint.
+	 */
+	if (addr % IPA_TABLE_ALIGN) {
+		dev_err(dev, "table address %pad not %u-byte aligned\n",
+			&addr, IPA_TABLE_ALIGN);
+		dma_free_coherent(dev, size, virt, addr);
+
+		return -ERANGE;
+	}
+
 	ipa->table_virt = virt;
 	ipa->table_addr = addr;
 
-- 
cgit v1.2.3


From 99e75a37bd0af8eb8a0560f48091672b1b6d9218 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Thu, 18 Mar 2021 13:59:30 -0500
Subject: net: ipa: relax 64-bit build requirement

We currently assume the IPA driver is built only for a 64 bit kernel.

When this constraint was put in place it eliminated some do_div()
calls, replacing them with the "/" and "%" operators.  We now only
use these operations on u32 and size_t objects.  In a 32-bit kernel
build, size_t will be 32 bits wide, so there remains no reason to
use do_div() for divide and modulo.

A few recent commits also fix some code that assumes that DMA
addresses are 64 bits wide.

With that, we can get rid of the 64-bit build requirement.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/Kconfig    |  2 +-
 drivers/net/ipa/ipa_main.c | 10 ++++++++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ipa/Kconfig b/drivers/net/ipa/Kconfig
index b68f1289b89e..90a90262e0d0 100644
--- a/drivers/net/ipa/Kconfig
+++ b/drivers/net/ipa/Kconfig
@@ -1,6 +1,6 @@
 config QCOM_IPA
 	tristate "Qualcomm IPA support"
-	depends on 64BIT && NET && QCOM_SMEM
+	depends on NET && QCOM_SMEM
 	depends on ARCH_QCOM || COMPILE_TEST
 	depends on QCOM_RPROC_COMMON || (QCOM_RPROC_COMMON=n && COMPILE_TEST)
 	select QCOM_MDT_LOADER if ARCH_QCOM
diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index 97c1b55405cb..d354e3e65ec5 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -735,8 +735,14 @@ MODULE_DEVICE_TABLE(of, ipa_match);
 static void ipa_validate_build(void)
 {
 #ifdef IPA_VALIDATE
-	/* We assume we're working on 64-bit hardware */
-	BUILD_BUG_ON(!IS_ENABLED(CONFIG_64BIT));
+	/* At one time we assumed a 64-bit build, allowing some do_div()
+	 * calls to be replaced by simple division or modulo operations.
+	 * We currently only perform divide and modulo operations on u32,
+	 * u16, or size_t objects, and of those only size_t has any chance
+	 * of being a 64-bit value.  (It should be guaranteed 32 bits wide
+	 * on a 32-bit build, but there is no harm in verifying that.)
+	 */
+	BUILD_BUG_ON(!IS_ENABLED(CONFIG_64BIT) && sizeof(size_t) != 4);
 
 	/* Code assumes the EE ID for the AP is 0 (zeroed structure field) */
 	BUILD_BUG_ON(GSI_EE_AP != 0);
-- 
cgit v1.2.3


From cc76ce9e8dc659561ee62876da2cffc03fb58cc5 Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Thu, 18 Mar 2021 20:25:33 +0100
Subject: net: dsa: Add helper to resolve bridge port from DSA port

In order for a driver to be able to query a bridge for information
about itself, e.g. reading out port flags, it has to use a netdev that
is known to the bridge. In the simple case, that is just the netdev
representing the port, e.g. swp0 or swp1 in this example:

   br0
   / \
swp0 swp1

But in the case of an offloaded lag, this will be the bond or team
interface, e.g. bond0 in this example:

     br0
     /
  bond0
   / \
swp0 swp1

Add a helper that hides some of this complexity from the
drivers. Then, redefine dsa_port_offloads_bridge_port using the helper
to avoid double accounting of the set of possible offloaded uppers.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h  | 14 ++++++++++++++
 net/dsa/dsa_priv.h | 14 +-------------
 2 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index dac303edd33d..57b2c49f72f4 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -493,6 +493,20 @@ static inline bool dsa_port_is_vlan_filtering(const struct dsa_port *dp)
 		return dp->vlan_filtering;
 }
 
+static inline
+struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp)
+{
+	if (!dp->bridge_dev)
+		return NULL;
+
+	if (dp->lag_dev)
+		return dp->lag_dev;
+	else if (dp->hsr_dev)
+		return dp->hsr_dev;
+
+	return dp->slave;
+}
+
 typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid,
 			      bool is_static, void *data);
 struct dsa_switch_ops {
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 9d4b0e9b1aa1..4c43c5406834 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -233,19 +233,7 @@ extern const struct phylink_mac_ops dsa_port_phylink_mac_ops;
 static inline bool dsa_port_offloads_bridge_port(struct dsa_port *dp,
 						 struct net_device *dev)
 {
-	/* Switchdev offloading can be configured on: */
-
-	if (dev == dp->slave)
-		/* DSA ports directly connected to a bridge, and event
-		 * was emitted for the ports themselves.
-		 */
-		return true;
-
-	if (dp->lag_dev == dev)
-		/* DSA ports connected to a bridge via a LAG */
-		return true;
-
-	return false;
+	return dsa_port_to_bridge_port(dp) == dev;
 }
 
 static inline bool dsa_port_offloads_bridge(struct dsa_port *dp,
-- 
cgit v1.2.3


From ffcec3f257ccc2bf27642b9b1d97d2141f9cfcec Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Thu, 18 Mar 2021 20:25:34 +0100
Subject: net: dsa: mv88e6xxx: Avoid useless attempts to fast-age LAGs

When a port is a part of a LAG, the ATU will create dynamic entries
belonging to the LAG ID when learning is enabled. So trying to
fast-age those out using the constituent port will have no
effect. Unfortunately the hardware does not support move operations on
LAGs so there is no obvious way to transform the request to target the
LAG instead.

Instead we document this known limitation and at least avoid wasting
any time on it.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index f0a9423af85d..ed38b4431d74 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -1479,6 +1479,13 @@ static void mv88e6xxx_port_fast_age(struct dsa_switch *ds, int port)
 	struct mv88e6xxx_chip *chip = ds->priv;
 	int err;
 
+	if (dsa_to_port(ds, port)->lag_dev)
+		/* Hardware is incapable of fast-aging a LAG through a
+		 * regular ATU move operation. Until we have something
+		 * more fancy in place this is a no-op.
+		 */
+		return;
+
 	mv88e6xxx_reg_lock(chip);
 	err = mv88e6xxx_g1_atu_remove(chip, 0, port, false);
 	mv88e6xxx_reg_unlock(chip);
-- 
cgit v1.2.3


From d89ef4b8b39cdb88675b2629b35dc9ffdf5ca347 Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Thu, 18 Mar 2021 20:25:35 +0100
Subject: net: dsa: mv88e6xxx: Provide generic VTU iterator

Move the intricacies of correctly iterating over the VTU to a common
implementation.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 100 +++++++++++++++++++++++++--------------
 1 file changed, 64 insertions(+), 36 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index ed38b4431d74..0a4e467179c9 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -1511,6 +1511,37 @@ static int mv88e6xxx_vtu_getnext(struct mv88e6xxx_chip *chip,
 	return chip->info->ops->vtu_getnext(chip, entry);
 }
 
+static int mv88e6xxx_vtu_walk(struct mv88e6xxx_chip *chip,
+			      int (*cb)(struct mv88e6xxx_chip *chip,
+					const struct mv88e6xxx_vtu_entry *entry,
+					void *priv),
+			      void *priv)
+{
+	struct mv88e6xxx_vtu_entry entry = {
+		.vid = mv88e6xxx_max_vid(chip),
+		.valid = false,
+	};
+	int err;
+
+	if (!chip->info->ops->vtu_getnext)
+		return -EOPNOTSUPP;
+
+	do {
+		err = chip->info->ops->vtu_getnext(chip, &entry);
+		if (err)
+			return err;
+
+		if (!entry.valid)
+			break;
+
+		err = cb(chip, &entry, priv);
+		if (err)
+			return err;
+	} while (entry.vid < mv88e6xxx_max_vid(chip));
+
+	return 0;
+}
+
 static int mv88e6xxx_vtu_loadpurge(struct mv88e6xxx_chip *chip,
 				   struct mv88e6xxx_vtu_entry *entry)
 {
@@ -1520,9 +1551,18 @@ static int mv88e6xxx_vtu_loadpurge(struct mv88e6xxx_chip *chip,
 	return chip->info->ops->vtu_loadpurge(chip, entry);
 }
 
+static int mv88e6xxx_fid_map_vlan(struct mv88e6xxx_chip *chip,
+				  const struct mv88e6xxx_vtu_entry *entry,
+				  void *_fid_bitmap)
+{
+	unsigned long *fid_bitmap = _fid_bitmap;
+
+	set_bit(entry->fid, fid_bitmap);
+	return 0;
+}
+
 int mv88e6xxx_fid_map(struct mv88e6xxx_chip *chip, unsigned long *fid_bitmap)
 {
-	struct mv88e6xxx_vtu_entry vlan;
 	int i, err;
 	u16 fid;
 
@@ -1538,21 +1578,7 @@ int mv88e6xxx_fid_map(struct mv88e6xxx_chip *chip, unsigned long *fid_bitmap)
 	}
 
 	/* Set every FID bit used by the VLAN entries */
-	vlan.vid = mv88e6xxx_max_vid(chip);
-	vlan.valid = false;
-
-	do {
-		err = mv88e6xxx_vtu_getnext(chip, &vlan);
-		if (err)
-			return err;
-
-		if (!vlan.valid)
-			break;
-
-		set_bit(vlan.fid, fid_bitmap);
-	} while (vlan.vid < mv88e6xxx_max_vid(chip));
-
-	return 0;
+	return mv88e6xxx_vtu_walk(chip, mv88e6xxx_fid_map_vlan, fid_bitmap);
 }
 
 static int mv88e6xxx_atu_new(struct mv88e6xxx_chip *chip, u16 *fid)
@@ -2198,10 +2224,30 @@ static int mv88e6xxx_port_db_dump_fid(struct mv88e6xxx_chip *chip,
 	return err;
 }
 
+struct mv88e6xxx_port_db_dump_vlan_ctx {
+	int port;
+	dsa_fdb_dump_cb_t *cb;
+	void *data;
+};
+
+static int mv88e6xxx_port_db_dump_vlan(struct mv88e6xxx_chip *chip,
+				       const struct mv88e6xxx_vtu_entry *entry,
+				       void *_data)
+{
+	struct mv88e6xxx_port_db_dump_vlan_ctx *ctx = _data;
+
+	return mv88e6xxx_port_db_dump_fid(chip, entry->fid, entry->vid,
+					  ctx->port, ctx->cb, ctx->data);
+}
+
 static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port,
 				  dsa_fdb_dump_cb_t *cb, void *data)
 {
-	struct mv88e6xxx_vtu_entry vlan;
+	struct mv88e6xxx_port_db_dump_vlan_ctx ctx = {
+		.port = port,
+		.cb = cb,
+		.data = data,
+	};
 	u16 fid;
 	int err;
 
@@ -2214,25 +2260,7 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port,
 	if (err)
 		return err;
 
-	/* Dump VLANs' Filtering Information Databases */
-	vlan.vid = mv88e6xxx_max_vid(chip);
-	vlan.valid = false;
-
-	do {
-		err = mv88e6xxx_vtu_getnext(chip, &vlan);
-		if (err)
-			return err;
-
-		if (!vlan.valid)
-			break;
-
-		err = mv88e6xxx_port_db_dump_fid(chip, vlan.fid, vlan.vid, port,
-						 cb, data);
-		if (err)
-			return err;
-	} while (vlan.vid < mv88e6xxx_max_vid(chip));
-
-	return err;
+	return mv88e6xxx_vtu_walk(chip, mv88e6xxx_port_db_dump_vlan, &ctx);
 }
 
 static int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port,
-- 
cgit v1.2.3


From 34065c58306dab883deb323f2edf6074f2225c19 Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Thu, 18 Mar 2021 20:25:36 +0100
Subject: net: dsa: mv88e6xxx: Remove some bureaucracy around querying the VTU

The hardware has a somewhat quirky protocol for reading out the VTU
entry for a particular VID. But there is no reason why we cannot
create a better API for ourselves in the driver.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 45 ++++++++++++++++++----------------------
 1 file changed, 20 insertions(+), 25 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 0a4e467179c9..c18c55e1617e 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -1502,13 +1502,23 @@ static int mv88e6xxx_vtu_setup(struct mv88e6xxx_chip *chip)
 	return mv88e6xxx_g1_vtu_flush(chip);
 }
 
-static int mv88e6xxx_vtu_getnext(struct mv88e6xxx_chip *chip,
-				 struct mv88e6xxx_vtu_entry *entry)
+static int mv88e6xxx_vtu_get(struct mv88e6xxx_chip *chip, u16 vid,
+			     struct mv88e6xxx_vtu_entry *entry)
 {
+	int err;
+
 	if (!chip->info->ops->vtu_getnext)
 		return -EOPNOTSUPP;
 
-	return chip->info->ops->vtu_getnext(chip, entry);
+	entry->vid = vid ? vid - 1 : mv88e6xxx_max_vid(chip);
+	entry->valid = false;
+
+	err = chip->info->ops->vtu_getnext(chip, entry);
+
+	if (entry->vid != vid)
+		entry->valid = false;
+
+	return err;
 }
 
 static int mv88e6xxx_vtu_walk(struct mv88e6xxx_chip *chip,
@@ -1615,19 +1625,13 @@ static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port,
 	if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port))
 		return 0;
 
-	vlan.vid = vid - 1;
-	vlan.valid = false;
-
-	err = mv88e6xxx_vtu_getnext(chip, &vlan);
+	err = mv88e6xxx_vtu_get(chip, vid, &vlan);
 	if (err)
 		return err;
 
 	if (!vlan.valid)
 		return 0;
 
-	if (vlan.vid != vid)
-		return 0;
-
 	for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) {
 		if (dsa_is_dsa_port(ds, i) || dsa_is_cpu_port(ds, i))
 			continue;
@@ -1709,15 +1713,12 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port,
 		if (err)
 			return err;
 	} else {
-		vlan.vid = vid - 1;
-		vlan.valid = false;
-
-		err = mv88e6xxx_vtu_getnext(chip, &vlan);
+		err = mv88e6xxx_vtu_get(chip, vid, &vlan);
 		if (err)
 			return err;
 
 		/* switchdev expects -EOPNOTSUPP to honor software VLANs */
-		if (vlan.vid != vid || !vlan.valid)
+		if (!vlan.valid)
 			return -EOPNOTSUPP;
 
 		fid = vlan.fid;
@@ -1994,14 +1995,11 @@ static int mv88e6xxx_port_vlan_join(struct mv88e6xxx_chip *chip, int port,
 	struct mv88e6xxx_vtu_entry vlan;
 	int i, err;
 
-	vlan.vid = vid - 1;
-	vlan.valid = false;
-
-	err = mv88e6xxx_vtu_getnext(chip, &vlan);
+	err = mv88e6xxx_vtu_get(chip, vid, &vlan);
 	if (err)
 		return err;
 
-	if (vlan.vid != vid || !vlan.valid) {
+	if (!vlan.valid) {
 		memset(&vlan, 0, sizeof(vlan));
 
 		err = mv88e6xxx_atu_new(chip, &vlan.fid);
@@ -2097,17 +2095,14 @@ static int mv88e6xxx_port_vlan_leave(struct mv88e6xxx_chip *chip,
 	if (!vid)
 		return -EOPNOTSUPP;
 
-	vlan.vid = vid - 1;
-	vlan.valid = false;
-
-	err = mv88e6xxx_vtu_getnext(chip, &vlan);
+	err = mv88e6xxx_vtu_get(chip, vid, &vlan);
 	if (err)
 		return err;
 
 	/* If the VLAN doesn't exist in hardware or the port isn't a member,
 	 * tell switchdev that this VLAN is likely handled in software.
 	 */
-	if (vlan.vid != vid || !vlan.valid ||
+	if (!vlan.valid ||
 	    vlan.member[port] == MV88E6XXX_G1_VTU_DATA_MEMBER_TAG_NON_MEMBER)
 		return -EOPNOTSUPP;
 
-- 
cgit v1.2.3


From 0806dd4654145e70e4a4c5b06ddad4cd7a121fdf Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Thu, 18 Mar 2021 20:25:37 +0100
Subject: net: dsa: mv88e6xxx: Use standard helper for broadcast address

Use the conventional declaration style of a MAC address in the
kernel (u8 addr[ETH_ALEN]) for the broadcast address, then set it
using the existing helper.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index c18c55e1617e..17578f774683 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -1968,8 +1968,10 @@ static int mv88e6xxx_set_rxnfc(struct dsa_switch *ds, int port,
 static int mv88e6xxx_port_add_broadcast(struct mv88e6xxx_chip *chip, int port,
 					u16 vid)
 {
-	const char broadcast[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 	u8 state = MV88E6XXX_G1_ATU_DATA_STATE_MC_STATIC;
+	u8 broadcast[ETH_ALEN];
+
+	eth_broadcast_addr(broadcast);
 
 	return mv88e6xxx_port_db_load_purge(chip, port, broadcast, vid, state);
 }
-- 
cgit v1.2.3


From 7b9f16fe401c98cd3e1cb92d02bb7184a6d9e4c1 Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Thu, 18 Mar 2021 20:25:38 +0100
Subject: net: dsa: mv88e6xxx: Flood all traffic classes on standalone ports

In accordance with the comment in dsa_port_bridge_leave, standalone
ports shall be configured to flood all types of traffic. This change
aligns the mv88e6xxx driver with that policy.

Previously a standalone port would initially not egress any unknown
traffic, but after joining and then leaving a bridge, it would.

This does not matter that much since we only ever send FROM_CPUs on
standalone ports, but it seems prudent to make sure that the initial
values match those that are applied after a bridging/unbridging cycle.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 17578f774683..587959b78c7f 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2489,19 +2489,15 @@ static int mv88e6xxx_setup_message_port(struct mv88e6xxx_chip *chip, int port)
 
 static int mv88e6xxx_setup_egress_floods(struct mv88e6xxx_chip *chip, int port)
 {
-	struct dsa_switch *ds = chip->ds;
-	bool flood;
 	int err;
 
-	/* Upstream ports flood frames with unknown unicast or multicast DA */
-	flood = dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port);
 	if (chip->info->ops->port_set_ucast_flood) {
-		err = chip->info->ops->port_set_ucast_flood(chip, port, flood);
+		err = chip->info->ops->port_set_ucast_flood(chip, port, true);
 		if (err)
 			return err;
 	}
 	if (chip->info->ops->port_set_mcast_flood) {
-		err = chip->info->ops->port_set_mcast_flood(chip, port, flood);
+		err = chip->info->ops->port_set_mcast_flood(chip, port, true);
 		if (err)
 			return err;
 	}
-- 
cgit v1.2.3


From 041bd545e1249906b997645ee71f40df21417f17 Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Thu, 18 Mar 2021 20:25:39 +0100
Subject: net: dsa: mv88e6xxx: Offload bridge learning flag

Allow a user to control automatic learning per port.

Many chips have an explicit "LearningDisable"-bit that can be used for
this, but we opt for setting/clearing the PAV instead, as it works on
all devices at least as far back as 6083.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 37 +++++++++++++++++++++++++++++--------
 drivers/net/dsa/mv88e6xxx/port.c | 21 +++++++++++++++++++++
 drivers/net/dsa/mv88e6xxx/port.h |  2 ++
 3 files changed, 52 insertions(+), 8 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 587959b78c7f..7976fb699086 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2740,15 +2740,20 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
 			return err;
 	}
 
-	/* Port Association Vector: when learning source addresses
-	 * of packets, add the address to the address database using
-	 * a port bitmap that has only the bit for this port set and
-	 * the other bits clear.
+	/* Port Association Vector: disable automatic address learning
+	 * on all user ports since they start out in standalone
+	 * mode. When joining a bridge, learning will be configured to
+	 * match the bridge port settings. Enable learning on all
+	 * DSA/CPU ports. NOTE: FROM_CPU frames always bypass the
+	 * learning process.
+	 *
+	 * Disable HoldAt1, IntOnAgeOut, LockedPort, IgnoreWrongData,
+	 * and RefreshLocked. I.e. setup standard automatic learning.
 	 */
-	reg = 1 << port;
-	/* Disable learning for CPU port */
-	if (dsa_is_cpu_port(ds, port))
+	if (dsa_is_user_port(ds, port))
 		reg = 0;
+	else
+		reg = 1 << port;
 
 	err = mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_ASSOC_VECTOR,
 				   reg);
@@ -5604,7 +5609,7 @@ static int mv88e6xxx_port_pre_bridge_flags(struct dsa_switch *ds, int port,
 	struct mv88e6xxx_chip *chip = ds->priv;
 	const struct mv88e6xxx_ops *ops;
 
-	if (flags.mask & ~(BR_FLOOD | BR_MCAST_FLOOD))
+	if (flags.mask & ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD))
 		return -EINVAL;
 
 	ops = chip->info->ops;
@@ -5623,10 +5628,23 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port,
 				       struct netlink_ext_ack *extack)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
+	bool do_fast_age = false;
 	int err = -EOPNOTSUPP;
 
 	mv88e6xxx_reg_lock(chip);
 
+	if (flags.mask & BR_LEARNING) {
+		bool learning = !!(flags.val & BR_LEARNING);
+		u16 pav = learning ? (1 << port) : 0;
+
+		err = mv88e6xxx_port_set_assoc_vector(chip, port, pav);
+		if (err)
+			goto out;
+
+		if (!learning)
+			do_fast_age = true;
+	}
+
 	if (flags.mask & BR_FLOOD) {
 		bool unicast = !!(flags.val & BR_FLOOD);
 
@@ -5648,6 +5666,9 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port,
 out:
 	mv88e6xxx_reg_unlock(chip);
 
+	if (do_fast_age)
+		mv88e6xxx_port_fast_age(ds, port);
+
 	return err;
 }
 
diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c
index 6a9c45c2127a..f77e2ee64a60 100644
--- a/drivers/net/dsa/mv88e6xxx/port.c
+++ b/drivers/net/dsa/mv88e6xxx/port.c
@@ -1309,6 +1309,27 @@ int mv88e6097_port_egress_rate_limiting(struct mv88e6xxx_chip *chip, int port)
 				    0x0001);
 }
 
+/* Offset 0x0B: Port Association Vector */
+
+int mv88e6xxx_port_set_assoc_vector(struct mv88e6xxx_chip *chip, int port,
+				    u16 pav)
+{
+	u16 reg, mask;
+	int err;
+
+	err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_ASSOC_VECTOR,
+				  &reg);
+	if (err)
+		return err;
+
+	mask = mv88e6xxx_port_mask(chip);
+	reg &= ~mask;
+	reg |= pav & mask;
+
+	return mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_ASSOC_VECTOR,
+				    reg);
+}
+
 /* Offset 0x0C: Port ATU Control */
 
 int mv88e6xxx_port_disable_learn_limit(struct mv88e6xxx_chip *chip, int port)
diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h
index 921d54969dad..b10e5aebacf6 100644
--- a/drivers/net/dsa/mv88e6xxx/port.h
+++ b/drivers/net/dsa/mv88e6xxx/port.h
@@ -407,6 +407,8 @@ int mv88e6165_port_set_jumbo_size(struct mv88e6xxx_chip *chip, int port,
 				  size_t size);
 int mv88e6095_port_egress_rate_limiting(struct mv88e6xxx_chip *chip, int port);
 int mv88e6097_port_egress_rate_limiting(struct mv88e6xxx_chip *chip, int port);
+int mv88e6xxx_port_set_assoc_vector(struct mv88e6xxx_chip *chip, int port,
+				    u16 pav);
 int mv88e6097_port_pause_limit(struct mv88e6xxx_chip *chip, int port, u8 in,
 			       u8 out);
 int mv88e6390_port_pause_limit(struct mv88e6xxx_chip *chip, int port, u8 in,
-- 
cgit v1.2.3


From 8d1d8298eb00756cc525e12a133a5cc37cfdf992 Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Thu, 18 Mar 2021 20:25:40 +0100
Subject: net: dsa: mv88e6xxx: Offload bridge broadcast flooding flag

These switches have two modes of classifying broadcast:

1. Broadcast is multicast.
2. Broadcast is its own unique thing that is always flooded
   everywhere.

This driver uses the first option, making sure to load the broadcast
address into all active databases. Because of this, we can support
per-port broadcast flooding by (1) making sure to only set the subset
of ports that have it enabled whenever joining a new bridge or VLAN,
and (2) by updating all active databases whenever the setting is
changed on a port.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 71 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 70 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 7976fb699086..95f07fcd4f85 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -1982,6 +1982,19 @@ static int mv88e6xxx_broadcast_setup(struct mv88e6xxx_chip *chip, u16 vid)
 	int err;
 
 	for (port = 0; port < mv88e6xxx_num_ports(chip); port++) {
+		struct dsa_port *dp = dsa_to_port(chip->ds, port);
+		struct net_device *brport;
+
+		if (dsa_is_unused_port(chip->ds, port))
+			continue;
+
+		brport = dsa_port_to_bridge_port(dp);
+		if (brport && !br_port_flag_is_set(brport, BR_BCAST_FLOOD))
+			/* Skip bridged user ports where broadcast
+			 * flooding is disabled.
+			 */
+			continue;
+
 		err = mv88e6xxx_port_add_broadcast(chip, port, vid);
 		if (err)
 			return err;
@@ -1990,6 +2003,53 @@ static int mv88e6xxx_broadcast_setup(struct mv88e6xxx_chip *chip, u16 vid)
 	return 0;
 }
 
+struct mv88e6xxx_port_broadcast_sync_ctx {
+	int port;
+	bool flood;
+};
+
+static int
+mv88e6xxx_port_broadcast_sync_vlan(struct mv88e6xxx_chip *chip,
+				   const struct mv88e6xxx_vtu_entry *vlan,
+				   void *_ctx)
+{
+	struct mv88e6xxx_port_broadcast_sync_ctx *ctx = _ctx;
+	u8 broadcast[ETH_ALEN];
+	u8 state;
+
+	if (ctx->flood)
+		state = MV88E6XXX_G1_ATU_DATA_STATE_MC_STATIC;
+	else
+		state = MV88E6XXX_G1_ATU_DATA_STATE_MC_UNUSED;
+
+	eth_broadcast_addr(broadcast);
+
+	return mv88e6xxx_port_db_load_purge(chip, ctx->port, broadcast,
+					    vlan->vid, state);
+}
+
+static int mv88e6xxx_port_broadcast_sync(struct mv88e6xxx_chip *chip, int port,
+					 bool flood)
+{
+	struct mv88e6xxx_port_broadcast_sync_ctx ctx = {
+		.port = port,
+		.flood = flood,
+	};
+	struct mv88e6xxx_vtu_entry vid0 = {
+		.vid = 0,
+	};
+	int err;
+
+	/* Update the port's private database... */
+	err = mv88e6xxx_port_broadcast_sync_vlan(chip, &vid0, &ctx);
+	if (err)
+		return err;
+
+	/* ...and the database for all VLANs. */
+	return mv88e6xxx_vtu_walk(chip, mv88e6xxx_port_broadcast_sync_vlan,
+				  &ctx);
+}
+
 static int mv88e6xxx_port_vlan_join(struct mv88e6xxx_chip *chip, int port,
 				    u16 vid, u8 member, bool warn)
 {
@@ -5609,7 +5669,8 @@ static int mv88e6xxx_port_pre_bridge_flags(struct dsa_switch *ds, int port,
 	struct mv88e6xxx_chip *chip = ds->priv;
 	const struct mv88e6xxx_ops *ops;
 
-	if (flags.mask & ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD))
+	if (flags.mask & ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD |
+			   BR_BCAST_FLOOD))
 		return -EINVAL;
 
 	ops = chip->info->ops;
@@ -5663,6 +5724,14 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port,
 			goto out;
 	}
 
+	if (flags.mask & BR_BCAST_FLOOD) {
+		bool broadcast = !!(flags.val & BR_BCAST_FLOOD);
+
+		err = mv88e6xxx_port_broadcast_sync(chip, port, broadcast);
+		if (err)
+			goto out;
+	}
+
 out:
 	mv88e6xxx_reg_unlock(chip);
 
-- 
cgit v1.2.3


From 76da35dc99afb460b9c335182ba6a3e7ff924186 Mon Sep 17 00:00:00 2001
From: "Wong, Vee Khee" <vee.khee.wong@intel.com>
Date: Wed, 17 Mar 2021 09:32:47 +0800
Subject: stmmac: intel: Add PSE and PCH PTP clock source selection

Intel mGbE variant implemented in EHL and TGL can be set to select
different clock frequency based on GPO bits in MAC_GPIO_STATUS register.

We introduce a new "void (*ptp_clk_freq_config)(void *priv)" in platform
data so that if a platform is required to configure the frequency of clock
source, in this case Intel mGBE does, the platform-specific configuration
of the PTP clock setting is done when stmmac_ptp_register() is called.

Signed-off-by: Wong, Vee Khee <vee.khee.wong@intel.com>
Signed-off-by: Voon Weifeng <weifeng.voon@intel.com>
Co-developed-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 46 +++++++++++++++++++++++
 drivers/net/ethernet/stmicro/stmmac/dwmac4.h      |  7 ++++
 drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c  |  3 ++
 include/linux/stmmac.h                            |  1 +
 4 files changed, 57 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index c49646773871..763b549e3c2d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -12,8 +12,18 @@
 #define INTEL_MGBE_ADHOC_ADDR	0x15
 #define INTEL_MGBE_XPCS_ADDR	0x16
 
+/* Selection for PTP Clock Freq belongs to PSE & PCH GbE */
+#define PSE_PTP_CLK_FREQ_MASK		(GMAC_GPO0 | GMAC_GPO3)
+#define PSE_PTP_CLK_FREQ_19_2MHZ	(GMAC_GPO0)
+#define PSE_PTP_CLK_FREQ_200MHZ		(GMAC_GPO0 | GMAC_GPO3)
+#define PSE_PTP_CLK_FREQ_256MHZ		(0)
+#define PCH_PTP_CLK_FREQ_MASK		(GMAC_GPO0)
+#define PCH_PTP_CLK_FREQ_19_2MHZ	(GMAC_GPO0)
+#define PCH_PTP_CLK_FREQ_200MHZ		(0)
+
 struct intel_priv_data {
 	int mdio_adhoc_addr;	/* mdio address for serdes & etc */
+	bool is_pse;
 };
 
 /* This struct is used to associate PCI Function of MAC controller on a board,
@@ -204,6 +214,32 @@ static void intel_serdes_powerdown(struct net_device *ndev, void *intel_data)
 	}
 }
 
+/* Program PTP Clock Frequency for different variant of
+ * Intel mGBE that has slightly different GPO mapping
+ */
+static void intel_mgbe_ptp_clk_freq_config(void *npriv)
+{
+	struct stmmac_priv *priv = (struct stmmac_priv *)npriv;
+	struct intel_priv_data *intel_priv;
+	u32 gpio_value;
+
+	intel_priv = (struct intel_priv_data *)priv->plat->bsp_priv;
+
+	gpio_value = readl(priv->ioaddr + GMAC_GPIO_STATUS);
+
+	if (intel_priv->is_pse) {
+		/* For PSE GbE, use 200MHz */
+		gpio_value &= ~PSE_PTP_CLK_FREQ_MASK;
+		gpio_value |= PSE_PTP_CLK_FREQ_200MHZ;
+	} else {
+		/* For PCH GbE, use 200MHz */
+		gpio_value &= ~PCH_PTP_CLK_FREQ_MASK;
+		gpio_value |= PCH_PTP_CLK_FREQ_200MHZ;
+	}
+
+	writel(gpio_value, priv->ioaddr + GMAC_GPIO_STATUS);
+}
+
 static void common_default_data(struct plat_stmmacenet_data *plat)
 {
 	plat->clk_csr = 2;	/* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */
@@ -322,6 +358,8 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
 		return ret;
 	}
 
+	plat->ptp_clk_freq_config = intel_mgbe_ptp_clk_freq_config;
+
 	/* Set default value for multicast hash bins */
 	plat->multicast_filter_bins = HASH_TABLE_SIZE;
 
@@ -391,8 +429,12 @@ static struct stmmac_pci_info ehl_rgmii1g_info = {
 static int ehl_pse0_common_data(struct pci_dev *pdev,
 				struct plat_stmmacenet_data *plat)
 {
+	struct intel_priv_data *intel_priv = plat->bsp_priv;
+
+	intel_priv->is_pse = true;
 	plat->bus_id = 2;
 	plat->addr64 = 32;
+
 	return ehl_common_data(pdev, plat);
 }
 
@@ -423,8 +465,12 @@ static struct stmmac_pci_info ehl_pse0_sgmii1g_info = {
 static int ehl_pse1_common_data(struct pci_dev *pdev,
 				struct plat_stmmacenet_data *plat)
 {
+	struct intel_priv_data *intel_priv = plat->bsp_priv;
+
+	intel_priv->is_pse = true;
 	plat->bus_id = 3;
 	plat->addr64 = 32;
+
 	return ehl_common_data(pdev, plat);
 }
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index 82df91c130f7..ef8502d2b6e6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -42,6 +42,7 @@
 #define GMAC_HW_FEATURE3		0x00000128
 #define GMAC_MDIO_ADDR			0x00000200
 #define GMAC_MDIO_DATA			0x00000204
+#define GMAC_GPIO_STATUS		0x0000020C
 #define GMAC_ARP_ADDR			0x00000210
 #define GMAC_ADDR_HIGH(reg)		(0x300 + reg * 8)
 #define GMAC_ADDR_LOW(reg)		(0x304 + reg * 8)
@@ -278,6 +279,12 @@ enum power_event {
 #define GMAC_HW_FEAT_DVLAN		BIT(5)
 #define GMAC_HW_FEAT_NRVF		GENMASK(2, 0)
 
+/* GMAC GPIO Status reg */
+#define GMAC_GPO0			BIT(16)
+#define GMAC_GPO1			BIT(17)
+#define GMAC_GPO2			BIT(18)
+#define GMAC_GPO3			BIT(19)
+
 /* MAC HW ADDR regs */
 #define GMAC_HI_DCS			GENMASK(18, 16)
 #define GMAC_HI_DCS_SHIFT		16
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index 0989e2bb6ee3..8b10fd10446f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -192,6 +192,9 @@ void stmmac_ptp_register(struct stmmac_priv *priv)
 {
 	int i;
 
+	if (priv->plat->ptp_clk_freq_config)
+		priv->plat->ptp_clk_freq_config(priv);
+
 	for (i = 0; i < priv->dma_cap.pps_out_num; i++) {
 		if (i >= STMMAC_PPS_MAX)
 			break;
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 51004ebd0540..10abc80b601e 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -181,6 +181,7 @@ struct plat_stmmacenet_data {
 	void (*fix_mac_speed)(void *priv, unsigned int speed);
 	int (*serdes_powerup)(struct net_device *ndev, void *priv);
 	void (*serdes_powerdown)(struct net_device *ndev, void *priv);
+	void (*ptp_clk_freq_config)(void *priv);
 	int (*init)(struct platform_device *pdev, void *priv);
 	void (*exit)(struct platform_device *pdev, void *priv);
 	struct mac_device_info *(*setup)(void *priv);
-- 
cgit v1.2.3


From 21e0b8fc16087e5d3f280caaa3a02d360ff53dd3 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 18 Mar 2021 10:40:34 +0000
Subject: of: of_net: Provide function name and param description

Fixes the following W=1 kernel build warning(s):

 drivers/of/of_net.c:104: warning: Function parameter or member 'np' not described in 'of_get_mac_address'
 drivers/of/of_net.c:104: warning: expecting prototype for mac(). Prototype was for of_get_mac_address() instead

Cc: Andrew Lunn <andrew@lunn.ch>
Cc: Heiner Kallweit <hkallweit1@gmail.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Frank Rowand <frowand.list@gmail.com>
Cc: netdev@vger.kernel.org
Cc: devicetree@vger.kernel.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/of/of_net.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/of/of_net.c b/drivers/of/of_net.c
index 6e411821583e..bc0a27de69d4 100644
--- a/drivers/of/of_net.c
+++ b/drivers/of/of_net.c
@@ -79,6 +79,9 @@ static const void *of_get_mac_addr_nvmem(struct device_node *np)
 }
 
 /**
+ * of_get_mac_address()
+ * @np:		Caller's Device Node
+ *
  * Search the device tree for the best MAC address to use.  'mac-address' is
  * checked first, because that is supposed to contain to "most recent" MAC
  * address. If that isn't set, then 'local-mac-address' is checked next,
-- 
cgit v1.2.3


From 7f1330c1b19d99aab0b0e9e09fae612871766964 Mon Sep 17 00:00:00 2001
From: Xiong Zhenwu <xiong.zhenwu@zte.com.cn>
Date: Thu, 18 Mar 2021 04:39:41 -0700
Subject: /net/hsr: fix misspellings using codespell tool

A typo is found out by codespell tool in 111th line of hsr_debugfs.c:

$ codespell ./net/hsr/

net/hsr/hsr_debugfs.c:111: Debufs  ==> Debugfs

Fix typos found by codespell.

Signed-off-by: Xiong Zhenwu <xiong.zhenwu@zte.com.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/hsr/hsr_debugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c
index 4cfd9e829c7b..99f3af1a9d4d 100644
--- a/net/hsr/hsr_debugfs.c
+++ b/net/hsr/hsr_debugfs.c
@@ -108,7 +108,7 @@ void hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev)
 /* hsr_debugfs_term - Tear down debugfs intrastructure
  *
  * Description:
- * When Debufs is configured this routine removes debugfs file system
+ * When Debugfs is configured this routine removes debugfs file system
  * elements that are specific to hsr
  */
 void
-- 
cgit v1.2.3


From a835f9034efbb699f307575bead2607c2fbc93ac Mon Sep 17 00:00:00 2001
From: Xiong Zhenwu <xiong.zhenwu@zte.com.cn>
Date: Thu, 18 Mar 2021 04:52:13 -0700
Subject: /net/core/: fix misspellings using codespell tool

A typo is found out by codespell tool in 1734th line of drop_monitor.c:

$ codespell ./net/core/
./net/core/drop_monitor.c:1734: guarnateed  ==> guaranteed

Fix a typo found by codespell.

Signed-off-by: Xiong Zhenwu <xiong.zhenwu@zte.com.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/drop_monitor.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 571f191c06d9..1eb02c2236f2 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -1731,7 +1731,7 @@ static void exit_net_drop_monitor(void)
 
 	/*
 	 * Because of the module_get/put we do in the trace state change path
-	 * we are guarnateed not to have any current users when we get here
+	 * we are guaranteed not to have any current users when we get here
 	 */
 
 	for_each_possible_cpu(cpu) {
-- 
cgit v1.2.3


From 92a310cdcf8120c2d007254f53b927c89c417fc6 Mon Sep 17 00:00:00 2001
From: zuoqilin <zuoqilin@yulong.com>
Date: Thu, 18 Mar 2021 21:36:40 +0800
Subject: nfc/fdp: Simplify the return expression of fdp_nci_open()

Simplify the return expression.

Signed-off-by: zuoqilin <zuoqilin@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nfc/fdp/fdp.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c
index 4dc7bd7e02b6..824f2da137a6 100644
--- a/drivers/nfc/fdp/fdp.c
+++ b/drivers/nfc/fdp/fdp.c
@@ -236,15 +236,12 @@ static int fdp_nci_send_patch(struct nci_dev *ndev, u8 conn_id, u8 type)
 
 static int fdp_nci_open(struct nci_dev *ndev)
 {
-	int r;
 	struct fdp_nci_info *info = nci_get_drvdata(ndev);
 	struct device *dev = &info->phy->i2c_dev->dev;
 
 	dev_dbg(dev, "%s\n", __func__);
 
-	r = info->phy_ops->enable(info->phy);
-
-	return r;
+	return info->phy_ops->enable(info->phy);
 }
 
 static int fdp_nci_close(struct nci_dev *ndev)
-- 
cgit v1.2.3


From 269aa0301224dc001676322c0305b0d02c93b7bb Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 18 Mar 2021 17:01:42 +0100
Subject: net: cdc_ncm: drop redundant driver-data assignment

The driver data for the data interface has already been set by
usb_driver_claim_interface() so drop the subsequent redundant
assignment.

Note that this also avoids setting the driver data three times in case
of a combined interface.

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 8acf30115428..8ae565a801b5 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -920,7 +920,6 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
 		goto error2;
 	}
 
-	usb_set_intfdata(ctx->data, dev);
 	usb_set_intfdata(ctx->control, dev);
 
 	if (ctx->ether_desc) {
-- 
cgit v1.2.3


From 0f9651bb3ade97de3576b982513296c8783ad8bc Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 18 Mar 2021 16:14:28 +0000
Subject: octeontx2-af: Remove redundant initialization of pointer pfvf

The pointer pfvf is being initialized with a value that is
never read and it is being updated later with a new value.  The
initialization is redundant and can be removed.

Addresses-Coverity: ("Unused value")
Fixes: 56bcef528bd8 ("octeontx2-af: Use npc_install_flow API for promisc and broadcast entries")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 03248d280e47..16d7797b7a14 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -750,7 +750,7 @@ void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf)
 void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc,
 				       int nixlf, u64 chan)
 {
-	struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
+	struct rvu_pfvf *pfvf;
 	struct npc_install_flow_req req = { 0 };
 	struct npc_install_flow_rsp rsp = { 0 };
 	struct npc_mcam *mcam = &rvu->hw->mcam;
-- 
cgit v1.2.3


From 536e1004d273cf55d0e6c6ab6bfe74dc60464cd2 Mon Sep 17 00:00:00 2001
From: Xie He <xie.he.0141@gmail.com>
Date: Thu, 18 Mar 2021 12:07:47 -0700
Subject: net: lapbether: Close the LAPB device before its underlying Ethernet
 device closes

When a virtual LAPB device's underlying Ethernet device closes, the LAPB
device is also closed.

However, currently the LAPB device is closed after the Ethernet device
closes. It would be better to close it before the Ethernet device closes.
This would allow the LAPB device to transmit a last frame to notify the
other side that it is disconnecting.

Signed-off-by: Xie He <xie.he.0141@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/lapbether.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c
index 8fda0446ff71..45d74285265a 100644
--- a/drivers/net/wan/lapbether.c
+++ b/drivers/net/wan/lapbether.c
@@ -421,8 +421,8 @@ static int lapbeth_device_event(struct notifier_block *this,
 		if (lapbeth_get_x25_dev(dev) == NULL)
 			lapbeth_new_device(dev);
 		break;
-	case NETDEV_DOWN:	
-		/* ethernet device closed -> close LAPB interface */
+	case NETDEV_GOING_DOWN:
+		/* ethernet device closes -> close LAPB interface */
 		lapbeth = lapbeth_get_x25_dev(dev);
 		if (lapbeth) 
 			dev_close(lapbeth->axdev);
-- 
cgit v1.2.3


From d25fde64d1c271277b801c57a954037f80babbd1 Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Thu, 18 Mar 2021 20:29:38 +0100
Subject: net: ocelot: Fix deletetion of MRP entries from MAC table

When a MRP ring was deleted or disabled, the driver was iterating over
the ports to detect if any other MPR rings exists and in case it didn't
exist it would delete the MAC table entry. But the problem was that it
used the last iterated port to delete the MAC table entry and this could
be a NULL port.

The fix consists of using the port on which the function was called.

Fixes: 7c588c3e96e9733a ("net: ocelot: Extend MRP")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot_mrp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot_mrp.c b/drivers/net/ethernet/mscc/ocelot_mrp.c
index 439129a65b71..c3cbcaf64bb2 100644
--- a/drivers/net/ethernet/mscc/ocelot_mrp.c
+++ b/drivers/net/ethernet/mscc/ocelot_mrp.c
@@ -177,7 +177,7 @@ int ocelot_mrp_del(struct ocelot *ocelot, int port,
 			goto out;
 	}
 
-	ocelot_mrp_del_mac(ocelot, ocelot_port);
+	ocelot_mrp_del_mac(ocelot, ocelot->ports[port]);
 out:
 	return 0;
 }
@@ -251,7 +251,7 @@ int ocelot_mrp_del_ring_role(struct ocelot *ocelot, int port,
 			goto out;
 	}
 
-	ocelot_mrp_del_mac(ocelot, ocelot_port);
+	ocelot_mrp_del_mac(ocelot, ocelot->ports[port]);
 out:
 	return 0;
 }
-- 
cgit v1.2.3


From df291e54ccca0ef357f07a7b89263f7918d6ed7a Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 19 Mar 2021 01:36:36 +0200
Subject: net: ocelot: support multiple bridges

The ocelot switches are a bit odd in that they do not have an STP state
to put the ports into. Instead, the forwarding configuration is delayed
from the typical port_bridge_join into stp_state_set, when the port enters
the BR_STATE_FORWARDING state.

I can only guess that the implementation of this quirk is the reason that
led to the simplification of the driver such that only one bridge could
be offloaded at a time.

We can simplify the data structures somewhat, and introduce a per-port
bridge device pointer and STP state, similar to how the LAG offload
works now (there we have a per-port bonding device pointer and TX
enabled state). This allows offloading multiple bridges with relative
ease, while still keeping in place the quirk to delay the programming of
the PGIDs.

We actually need this change now because we need to remove the bogus
restriction from ocelot_bridge_stp_state_set that ocelot->bridge_mask
needs to contain BIT(port), otherwise that function is a no-op.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot.c | 72 +++++++++++++++++++-------------------
 include/soc/mscc/ocelot.h          |  7 ++--
 2 files changed, 39 insertions(+), 40 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 9f0c9bdd9f5d..ce57929ba3d1 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -766,7 +766,7 @@ int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **nskb)
 	/* Everything we see on an interface that is in the HW bridge
 	 * has already been forwarded.
 	 */
-	if (ocelot->bridge_mask & BIT(src_port))
+	if (ocelot->ports[src_port]->bridge)
 		skb->offload_fwd_mark = 1;
 
 	skb->protocol = eth_type_trans(skb, dev);
@@ -1183,6 +1183,26 @@ static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond,
 	return mask;
 }
 
+static u32 ocelot_get_bridge_fwd_mask(struct ocelot *ocelot,
+				      struct net_device *bridge)
+{
+	u32 mask = 0;
+	int port;
+
+	for (port = 0; port < ocelot->num_phys_ports; port++) {
+		struct ocelot_port *ocelot_port = ocelot->ports[port];
+
+		if (!ocelot_port)
+			continue;
+
+		if (ocelot_port->stp_state == BR_STATE_FORWARDING &&
+		    ocelot_port->bridge == bridge)
+			mask |= BIT(port);
+	}
+
+	return mask;
+}
+
 static u32 ocelot_get_dsa_8021q_cpu_mask(struct ocelot *ocelot)
 {
 	u32 mask = 0;
@@ -1232,10 +1252,12 @@ void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot)
 			 */
 			mask = GENMASK(ocelot->num_phys_ports - 1, 0);
 			mask &= ~cpu_fwd_mask;
-		} else if (ocelot->bridge_fwd_mask & BIT(port)) {
+		} else if (ocelot_port->bridge) {
+			struct net_device *bridge = ocelot_port->bridge;
 			struct net_device *bond = ocelot_port->bond;
 
-			mask = ocelot->bridge_fwd_mask & ~BIT(port);
+			mask = ocelot_get_bridge_fwd_mask(ocelot, bridge);
+			mask &= ~BIT(port);
 			if (bond) {
 				mask &= ~ocelot_get_bond_mask(ocelot, bond,
 							      false);
@@ -1256,29 +1278,16 @@ EXPORT_SYMBOL(ocelot_apply_bridge_fwd_mask);
 void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state)
 {
 	struct ocelot_port *ocelot_port = ocelot->ports[port];
-	u32 port_cfg;
-
-	if (!(BIT(port) & ocelot->bridge_mask))
-		return;
+	u32 learn_ena = 0;
 
-	port_cfg = ocelot_read_gix(ocelot, ANA_PORT_PORT_CFG, port);
+	ocelot_port->stp_state = state;
 
-	switch (state) {
-	case BR_STATE_FORWARDING:
-		ocelot->bridge_fwd_mask |= BIT(port);
-		fallthrough;
-	case BR_STATE_LEARNING:
-		if (ocelot_port->learn_ena)
-			port_cfg |= ANA_PORT_PORT_CFG_LEARN_ENA;
-		break;
-
-	default:
-		port_cfg &= ~ANA_PORT_PORT_CFG_LEARN_ENA;
-		ocelot->bridge_fwd_mask &= ~BIT(port);
-		break;
-	}
+	if ((state == BR_STATE_LEARNING || state == BR_STATE_FORWARDING) &&
+	    ocelot_port->learn_ena)
+		learn_ena = ANA_PORT_PORT_CFG_LEARN_ENA;
 
-	ocelot_write_gix(ocelot, port_cfg, ANA_PORT_PORT_CFG, port);
+	ocelot_rmw_gix(ocelot, learn_ena, ANA_PORT_PORT_CFG_LEARN_ENA,
+		       ANA_PORT_PORT_CFG, port);
 
 	ocelot_apply_bridge_fwd_mask(ocelot);
 }
@@ -1508,16 +1517,9 @@ EXPORT_SYMBOL(ocelot_port_mdb_del);
 int ocelot_port_bridge_join(struct ocelot *ocelot, int port,
 			    struct net_device *bridge)
 {
-	if (!ocelot->bridge_mask) {
-		ocelot->hw_bridge_dev = bridge;
-	} else {
-		if (ocelot->hw_bridge_dev != bridge)
-			/* This is adding the port to a second bridge, this is
-			 * unsupported */
-			return -ENODEV;
-	}
+	struct ocelot_port *ocelot_port = ocelot->ports[port];
 
-	ocelot->bridge_mask |= BIT(port);
+	ocelot_port->bridge = bridge;
 
 	return 0;
 }
@@ -1526,13 +1528,11 @@ EXPORT_SYMBOL(ocelot_port_bridge_join);
 int ocelot_port_bridge_leave(struct ocelot *ocelot, int port,
 			     struct net_device *bridge)
 {
+	struct ocelot_port *ocelot_port = ocelot->ports[port];
 	struct ocelot_vlan pvid = {0}, native_vlan = {0};
 	int ret;
 
-	ocelot->bridge_mask &= ~BIT(port);
-
-	if (!ocelot->bridge_mask)
-		ocelot->hw_bridge_dev = NULL;
+	ocelot_port->bridge = NULL;
 
 	ret = ocelot_port_vlan_filtering(ocelot, port, false);
 	if (ret)
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 0a0751bf97dd..ce7e5c1bd90d 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -615,6 +615,9 @@ struct ocelot_port {
 	bool				lag_tx_active;
 
 	u16				mrp_ring_id;
+
+	struct net_device		*bridge;
+	u8				stp_state;
 };
 
 struct ocelot {
@@ -634,10 +637,6 @@ struct ocelot {
 	int				num_frame_refs;
 	int				num_mact_rows;
 
-	struct net_device		*hw_bridge_dev;
-	u16				bridge_mask;
-	u16				bridge_fwd_mask;
-
 	struct ocelot_port		**ports;
 
 	u8				base_mac[ETH_ALEN];
-- 
cgit v1.2.3


From 25cc5a5fac15c8e140c17f7c13c6874736f3e883 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Thu, 18 Mar 2021 17:48:04 -0700
Subject: ionic: code cleanup details

Catch a couple of missing macro name uses, fix a couple
of misspellings, etc.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/pensando/ionic/ionic_ethtool.c    |  8 ++-----
 drivers/net/ethernet/pensando/ionic/ionic_if.h     | 26 +++++++++++-----------
 drivers/net/ethernet/pensando/ionic/ionic_lif.c    | 23 +++++++++----------
 3 files changed, 25 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
index 0832bedcb3b4..9df4b9df7a82 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
@@ -29,11 +29,9 @@ static void ionic_get_stats_strings(struct ionic_lif *lif, u8 *buf)
 static void ionic_get_stats(struct net_device *netdev,
 			    struct ethtool_stats *stats, u64 *buf)
 {
-	struct ionic_lif *lif;
+	struct ionic_lif *lif = netdev_priv(netdev);
 	u32 i;
 
-	lif = netdev_priv(netdev);
-
 	memset(buf, 0, stats->n_stats * sizeof(*buf));
 	for (i = 0; i < ionic_num_stats_grps; i++)
 		ionic_stats_groups[i].get_values(lif, &buf);
@@ -264,12 +262,10 @@ static int ionic_set_link_ksettings(struct net_device *netdev,
 				    const struct ethtool_link_ksettings *ks)
 {
 	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic_dev *idev = &lif->ionic->idev;
 	struct ionic *ionic = lif->ionic;
-	struct ionic_dev *idev;
 	int err = 0;
 
-	idev = &lif->ionic->idev;
-
 	/* set autoneg */
 	if (ks->base.autoneg != idev->port_info->config.an_enable) {
 		mutex_lock(&ionic->dev_cmd_lock);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h
index 31ccfcdc2b0a..40bd72bb5148 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_if.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h
@@ -320,7 +320,7 @@ struct ionic_lif_identify_comp {
 /**
  * enum ionic_lif_capability - LIF capabilities
  * @IONIC_LIF_CAP_ETH:     LIF supports Ethernet
- * @IONIC_LIF_CAP_RDMA:    LIF support RDMA
+ * @IONIC_LIF_CAP_RDMA:    LIF supports RDMA
  */
 enum ionic_lif_capability {
 	IONIC_LIF_CAP_ETH        = BIT(0),
@@ -404,7 +404,7 @@ union ionic_lif_config {
  *     @max_ucast_filters:  Number of perfect unicast addresses supported
  *     @max_mcast_filters:  Number of perfect multicast addresses supported
  *     @min_frame_size:     Minimum size of frames to be sent
- *     @max_frame_size:     Maximim size of frames to be sent
+ *     @max_frame_size:     Maximum size of frames to be sent
  *     @config:             LIF config struct with features, mtu, mac, q counts
  *
  * @rdma:                RDMA identify structure
@@ -692,7 +692,7 @@ enum ionic_txq_desc_opcode {
  *                      checksums are also updated.
  *
  *                   IONIC_TXQ_DESC_OPCODE_TSO:
- *                      Device preforms TCP segmentation offload
+ *                      Device performs TCP segmentation offload
  *                      (TSO).  @hdr_len is the number of bytes
  *                      to the end of TCP header (the offset to
  *                      the TCP payload).  @mss is the desired
@@ -982,13 +982,13 @@ struct ionic_rxq_comp {
 };
 
 enum ionic_pkt_type {
-	IONIC_PKT_TYPE_NON_IP     = 0x000,
-	IONIC_PKT_TYPE_IPV4       = 0x001,
-	IONIC_PKT_TYPE_IPV4_TCP   = 0x003,
-	IONIC_PKT_TYPE_IPV4_UDP   = 0x005,
-	IONIC_PKT_TYPE_IPV6       = 0x008,
-	IONIC_PKT_TYPE_IPV6_TCP   = 0x018,
-	IONIC_PKT_TYPE_IPV6_UDP   = 0x028,
+	IONIC_PKT_TYPE_NON_IP		= 0x00,
+	IONIC_PKT_TYPE_IPV4		= 0x01,
+	IONIC_PKT_TYPE_IPV4_TCP		= 0x03,
+	IONIC_PKT_TYPE_IPV4_UDP		= 0x05,
+	IONIC_PKT_TYPE_IPV6		= 0x08,
+	IONIC_PKT_TYPE_IPV6_TCP		= 0x18,
+	IONIC_PKT_TYPE_IPV6_UDP		= 0x28,
 	/* below types are only used if encap offloads are enabled on lif */
 	IONIC_PKT_TYPE_ENCAP_NON_IP	= 0x40,
 	IONIC_PKT_TYPE_ENCAP_IPV4	= 0x41,
@@ -1331,7 +1331,7 @@ enum ionic_stats_ctl_cmd {
  * @IONIC_PORT_ATTR_STATE:      Port state attribute
  * @IONIC_PORT_ATTR_SPEED:      Port speed attribute
  * @IONIC_PORT_ATTR_MTU:        Port MTU attribute
- * @IONIC_PORT_ATTR_AUTONEG:    Port autonegotation attribute
+ * @IONIC_PORT_ATTR_AUTONEG:    Port autonegotiation attribute
  * @IONIC_PORT_ATTR_FEC:        Port FEC attribute
  * @IONIC_PORT_ATTR_PAUSE:      Port pause attribute
  * @IONIC_PORT_ATTR_LOOPBACK:   Port loopback attribute
@@ -1951,8 +1951,8 @@ enum ionic_qos_sched_type {
  * @pfc_cos:		Priority-Flow Control class of service
  * @dwrr_weight:	QoS class scheduling weight
  * @strict_rlmt:	Rate limit for strict priority scheduling
- * @rw_dot1q_pcp:	Rewrite dot1q pcp to this value	(valid iff F_RW_DOT1Q_PCP)
- * @rw_ip_dscp:		Rewrite ip dscp to this value	(valid iff F_RW_IP_DSCP)
+ * @rw_dot1q_pcp:	Rewrite dot1q pcp to value (valid iff F_RW_DOT1Q_PCP)
+ * @rw_ip_dscp:		Rewrite ip dscp to value (valid iff F_RW_IP_DSCP)
  * @dot1q_pcp:		Dot1q pcp value
  * @ndscp:		Number of valid dscp values in the ip_dscp field
  * @ip_dscp:		IP dscp values
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 48d3c7685b6c..7ee6d2dbbb34 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -124,19 +124,16 @@ static void ionic_link_status_check(struct ionic_lif *lif)
 	link_up = link_status == IONIC_PORT_OPER_STATUS_UP;
 
 	if (link_up) {
-		if (lif->netdev->flags & IFF_UP && netif_running(lif->netdev)) {
+		if (netdev->flags & IFF_UP && netif_running(netdev)) {
 			mutex_lock(&lif->queue_lock);
 			ionic_start_queues(lif);
 			mutex_unlock(&lif->queue_lock);
 		}
 
 		if (!netif_carrier_ok(netdev)) {
-			u32 link_speed;
-
 			ionic_port_identify(lif->ionic);
-			link_speed = le32_to_cpu(lif->info->status.link_speed);
 			netdev_info(netdev, "Link up - %d Gbps\n",
-				    link_speed / 1000);
+				    le32_to_cpu(lif->info->status.link_speed) / 1000);
 			netif_carrier_on(netdev);
 		}
 	} else {
@@ -145,7 +142,7 @@ static void ionic_link_status_check(struct ionic_lif *lif)
 			netif_carrier_off(netdev);
 		}
 
-		if (lif->netdev->flags & IFF_UP && netif_running(lif->netdev)) {
+		if (netdev->flags & IFF_UP && netif_running(netdev)) {
 			mutex_lock(&lif->queue_lock);
 			ionic_stop_queues(lif);
 			mutex_unlock(&lif->queue_lock);
@@ -839,7 +836,7 @@ static bool ionic_notifyq_service(struct ionic_cq *cq,
 
 	switch (le16_to_cpu(comp->event.ecode)) {
 	case IONIC_EVENT_LINK_CHANGE:
-		ionic_link_status_check_request(lif, false);
+		ionic_link_status_check_request(lif, CAN_NOT_SLEEP);
 		break;
 	case IONIC_EVENT_RESET:
 		work = kzalloc(sizeof(*work), GFP_ATOMIC);
@@ -1443,7 +1440,7 @@ static int ionic_start_queues_reconfig(struct ionic_lif *lif)
 	 */
 	err = ionic_txrx_init(lif);
 	mutex_unlock(&lif->queue_lock);
-	ionic_link_status_check_request(lif, true);
+	ionic_link_status_check_request(lif, CAN_SLEEP);
 	netif_device_attach(lif->netdev);
 
 	return err;
@@ -1863,7 +1860,7 @@ static int ionic_open(struct net_device *netdev)
 
 	err = ionic_txrx_init(lif);
 	if (err)
-		goto err_out;
+		goto err_txrx_free;
 
 	err = netif_set_real_num_tx_queues(netdev, lif->nxqs);
 	if (err)
@@ -1884,7 +1881,7 @@ static int ionic_open(struct net_device *netdev)
 
 err_txrx_deinit:
 	ionic_txrx_deinit(lif);
-err_out:
+err_txrx_free:
 	ionic_txrx_free(lif);
 	return err;
 }
@@ -2356,7 +2353,7 @@ int ionic_reconfigure_queues(struct ionic_lif *lif,
 	swap(lif->nxqs, qparam->nxqs);
 
 err_out_reinit_unlock:
-	/* re-init the queues, but don't loose an error code */
+	/* re-init the queues, but don't lose an error code */
 	if (err)
 		ionic_start_queues_reconfig(lif);
 	else
@@ -2605,7 +2602,7 @@ static void ionic_lif_handle_fw_up(struct ionic_lif *lif)
 	}
 
 	clear_bit(IONIC_LIF_F_FW_RESET, lif->state);
-	ionic_link_status_check_request(lif, true);
+	ionic_link_status_check_request(lif, CAN_SLEEP);
 	netif_device_attach(lif->netdev);
 	dev_info(ionic->dev, "FW Up: LIFs restarted\n");
 
@@ -2976,7 +2973,7 @@ int ionic_lif_register(struct ionic_lif *lif)
 		return err;
 	}
 
-	ionic_link_status_check_request(lif, true);
+	ionic_link_status_check_request(lif, CAN_SLEEP);
 	lif->registered = true;
 	ionic_lif_set_netdev_info(lif);
 
-- 
cgit v1.2.3


From 2103ed2fab7de8df7ad035d8a3053e1ba3cb76e2 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Thu, 18 Mar 2021 17:48:05 -0700
Subject: ionic: simplify the intr_index use in txq_init

The qcq->intr.index was set when the queue was allocated,
there is no need to reach around to find it.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_lif.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 7ee6d2dbbb34..83ec3c664790 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -715,10 +715,8 @@ static int ionic_lif_txq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
 	unsigned int intr_index;
 	int err;
 
-	if (qcq->flags & IONIC_QCQ_F_INTR)
-		intr_index = qcq->intr.index;
-	else
-		intr_index = lif->rxqcqs[q->index]->intr.index;
+	intr_index = qcq->intr.index;
+
 	ctx.cmd.q_init.intr_index = cpu_to_le16(intr_index);
 
 	dev_dbg(dev, "txq_init.pid %d\n", ctx.cmd.q_init.pid);
-- 
cgit v1.2.3


From 9b761574fefcead1c9c86b338a321e7ef392b833 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Thu, 18 Mar 2021 17:48:06 -0700
Subject: ionic: fix unchecked reference

We can get to the counter without going through the pointer
that the robot complained about.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_lif.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 83ec3c664790..18fcba4fc413 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -888,7 +888,7 @@ static int ionic_adminq_napi(struct napi_struct *napi, int budget)
 	work_done = max(n_work, a_work);
 	if (work_done < budget && napi_complete_done(napi, work_done)) {
 		flags |= IONIC_INTR_CRED_UNMASK;
-		lif->adminqcq->cq.bound_intr->rearm_count++;
+		intr->rearm_count++;
 	}
 
 	if (work_done || flags) {
-- 
cgit v1.2.3


From acc606d3e4cd8d03a368fb207a0ae868d8cbe9d7 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Thu, 18 Mar 2021 17:48:07 -0700
Subject: ionic: update ethtool support bits for BASET

Add support in get_link_ksettings for a couple of
new BASET connections.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_ethtool.c | 8 ++++++++
 drivers/net/ethernet/pensando/ionic/ionic_if.h      | 2 ++
 2 files changed, 10 insertions(+)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
index 9df4b9df7a82..b1e78b452fad 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
@@ -207,6 +207,14 @@ static int ionic_get_link_ksettings(struct net_device *netdev,
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     10000baseER_Full);
 		break;
+	case IONIC_XCVR_PID_SFP_10GBASE_T:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseT_Full);
+		break;
+	case IONIC_XCVR_PID_SFP_1000BASE_T:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseT_Full);
+		break;
 	case IONIC_XCVR_PID_UNKNOWN:
 		/* This means there's no module plugged in */
 		break;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h
index 40bd72bb5148..88210142395d 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_if.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h
@@ -1111,6 +1111,8 @@ enum ionic_xcvr_pid {
 	IONIC_XCVR_PID_QSFP_100G_CWDM4  = 69,
 	IONIC_XCVR_PID_QSFP_100G_PSM4   = 70,
 	IONIC_XCVR_PID_SFP_25GBASE_ACC  = 71,
+	IONIC_XCVR_PID_SFP_10GBASE_T    = 72,
+	IONIC_XCVR_PID_SFP_1000BASE_T   = 73,
 };
 
 /**
-- 
cgit v1.2.3


From 8c775344c76806c75c4bf94f8ba1e6ac3c069b62 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Thu, 18 Mar 2021 17:48:08 -0700
Subject: ionic: block actions during fw reset

Block some actions while the FW is in a reset activity
and the queues are not configured.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c | 4 ++++
 drivers/net/ethernet/pensando/ionic/ionic_dev.c     | 8 +++++---
 drivers/net/ethernet/pensando/ionic/ionic_lif.c     | 7 ++++++-
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
index b0d8499d373b..e4a5416adc80 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
@@ -184,6 +184,10 @@ static int ionic_sriov_configure(struct pci_dev *pdev, int num_vfs)
 	struct device *dev = ionic->dev;
 	int ret = 0;
 
+	if (ionic->lif &&
+	    test_bit(IONIC_LIF_F_FW_RESET, ionic->lif->state))
+		return -EBUSY;
+
 	if (num_vfs > 0) {
 		ret = pci_enable_sriov(pdev, num_vfs);
 		if (ret) {
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
index b951bf5bbdc4..0532f7cf086d 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
@@ -14,18 +14,20 @@
 static void ionic_watchdog_cb(struct timer_list *t)
 {
 	struct ionic *ionic = from_timer(ionic, t, watchdog_timer);
+	struct ionic_lif *lif = ionic->lif;
 	int hb;
 
 	mod_timer(&ionic->watchdog_timer,
 		  round_jiffies(jiffies + ionic->watchdog_period));
 
-	if (!ionic->lif)
+	if (!lif)
 		return;
 
 	hb = ionic_heartbeat_check(ionic);
 
-	if (hb >= 0)
-		ionic_link_status_check_request(ionic->lif, CAN_NOT_SLEEP);
+	if (hb >= 0 &&
+	    !test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+		ionic_link_status_check_request(lif, CAN_NOT_SLEEP);
 }
 
 void ionic_init_devinfo(struct ionic *ionic)
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 18fcba4fc413..4f4ca183830b 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -1477,7 +1477,8 @@ static void ionic_tx_timeout_work(struct work_struct *ws)
 {
 	struct ionic_lif *lif = container_of(ws, struct ionic_lif, tx_timeout_work);
 
-	netdev_info(lif->netdev, "Tx Timeout recovery\n");
+	if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+		return;
 
 	/* if we were stopped before this scheduled job was launched,
 	 * don't bother the queues as they are already stopped.
@@ -1493,6 +1494,7 @@ static void ionic_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct ionic_lif *lif = netdev_priv(netdev);
 
+	netdev_info(lif->netdev, "Tx Timeout triggered - txq %d\n", txqueue);
 	schedule_work(&lif->tx_timeout_work);
 }
 
@@ -1834,6 +1836,9 @@ static int ionic_start_queues(struct ionic_lif *lif)
 {
 	int err;
 
+	if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+		return -EBUSY;
+
 	if (test_and_set_bit(IONIC_LIF_F_UP, lif->state))
 		return 0;
 
-- 
cgit v1.2.3


From 9e8eaf8427b6e07e8359a565f1f43c499fce6fa7 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Thu, 18 Mar 2021 17:48:09 -0700
Subject: ionic: stop watchdog when in broken state

Up to now we've been ignoring any error return from the
queue starting in the link status check, so we fix that here.
If the driver had to reset and couldn't get things running
properly again, for example after a Tx Timeout and the FW is
not responding to commands, don't let the link watchdog try
to restart the queues.  At this point the user can try to DOWN
and UP the device to clear the errors.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_lif.c | 25 +++++++++++++++++++++++--
 drivers/net/ethernet/pensando/ionic/ionic_lif.h |  1 +
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 4f4ca183830b..9b3afedbc083 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -120,17 +120,31 @@ static void ionic_link_status_check(struct ionic_lif *lif)
 	if (!test_bit(IONIC_LIF_F_LINK_CHECK_REQUESTED, lif->state))
 		return;
 
+	/* Don't put carrier back up if we're in a broken state */
+	if (test_bit(IONIC_LIF_F_BROKEN, lif->state)) {
+		clear_bit(IONIC_LIF_F_LINK_CHECK_REQUESTED, lif->state);
+		return;
+	}
+
 	link_status = le16_to_cpu(lif->info->status.link_status);
 	link_up = link_status == IONIC_PORT_OPER_STATUS_UP;
 
 	if (link_up) {
+		int err = 0;
+
 		if (netdev->flags & IFF_UP && netif_running(netdev)) {
 			mutex_lock(&lif->queue_lock);
-			ionic_start_queues(lif);
+			err = ionic_start_queues(lif);
+			if (err) {
+				netdev_err(lif->netdev,
+					   "Failed to start queues: %d\n", err);
+				set_bit(IONIC_LIF_F_BROKEN, lif->state);
+				netif_carrier_off(lif->netdev);
+			}
 			mutex_unlock(&lif->queue_lock);
 		}
 
-		if (!netif_carrier_ok(netdev)) {
+		if (!err && !netif_carrier_ok(netdev)) {
 			ionic_port_identify(lif->ionic);
 			netdev_info(netdev, "Link up - %d Gbps\n",
 				    le32_to_cpu(lif->info->status.link_speed) / 1000);
@@ -1836,6 +1850,9 @@ static int ionic_start_queues(struct ionic_lif *lif)
 {
 	int err;
 
+	if (test_bit(IONIC_LIF_F_BROKEN, lif->state))
+		return -EIO;
+
 	if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
 		return -EBUSY;
 
@@ -1857,6 +1874,10 @@ static int ionic_open(struct net_device *netdev)
 	struct ionic_lif *lif = netdev_priv(netdev);
 	int err;
 
+	/* If recovering from a broken state, clear the bit and we'll try again */
+	if (test_and_clear_bit(IONIC_LIF_F_BROKEN, lif->state))
+		netdev_info(netdev, "clearing broken state\n");
+
 	err = ionic_txrx_alloc(lif);
 	if (err)
 		return err;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
index 8ffda32a0a7d..be5cc89b2bd9 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -139,6 +139,7 @@ enum ionic_lif_state_flags {
 	IONIC_LIF_F_LINK_CHECK_REQUESTED,
 	IONIC_LIF_F_FW_RESET,
 	IONIC_LIF_F_SPLIT_INTR,
+	IONIC_LIF_F_BROKEN,
 	IONIC_LIF_F_TX_DIM_INTR,
 	IONIC_LIF_F_RX_DIM_INTR,
 
-- 
cgit v1.2.3


From e768929de1e4521ff3b3d5e8a74d62e7eeb50cf9 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Thu, 18 Mar 2021 17:48:10 -0700
Subject: ionic: protect adminq from early destroy

Don't destroy the adminq while there is an outstanding request.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_lif.c  | 14 ++++++++++++--
 drivers/net/ethernet/pensando/ionic/ionic_main.c | 22 ++++++++++++++++------
 2 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 9b3afedbc083..889d234e2ffa 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -393,6 +393,8 @@ static void ionic_qcq_free(struct ionic_lif *lif, struct ionic_qcq *qcq)
 static void ionic_qcqs_free(struct ionic_lif *lif)
 {
 	struct device *dev = lif->ionic->dev;
+	struct ionic_qcq *adminqcq;
+	unsigned long irqflags;
 
 	if (lif->notifyqcq) {
 		ionic_qcq_free(lif, lif->notifyqcq);
@@ -401,9 +403,14 @@ static void ionic_qcqs_free(struct ionic_lif *lif)
 	}
 
 	if (lif->adminqcq) {
-		ionic_qcq_free(lif, lif->adminqcq);
-		devm_kfree(dev, lif->adminqcq);
+		spin_lock_irqsave(&lif->adminq_lock, irqflags);
+		adminqcq = READ_ONCE(lif->adminqcq);
 		lif->adminqcq = NULL;
+		spin_unlock_irqrestore(&lif->adminq_lock, irqflags);
+		if (adminqcq) {
+			ionic_qcq_free(lif, adminqcq);
+			devm_kfree(dev, adminqcq);
+		}
 	}
 
 	if (lif->rxqcqs) {
@@ -886,6 +893,7 @@ static int ionic_adminq_napi(struct napi_struct *napi, int budget)
 	struct ionic_intr_info *intr = napi_to_cq(napi)->bound_intr;
 	struct ionic_lif *lif = napi_to_cq(napi)->lif;
 	struct ionic_dev *idev = &lif->ionic->idev;
+	unsigned long irqflags;
 	unsigned int flags = 0;
 	int n_work = 0;
 	int a_work = 0;
@@ -895,9 +903,11 @@ static int ionic_adminq_napi(struct napi_struct *napi, int budget)
 		n_work = ionic_cq_service(&lif->notifyqcq->cq, budget,
 					  ionic_notifyq_service, NULL, NULL);
 
+	spin_lock_irqsave(&lif->adminq_lock, irqflags);
 	if (lif->adminqcq && lif->adminqcq->flags & IONIC_QCQ_F_INITED)
 		a_work = ionic_cq_service(&lif->adminqcq->cq, budget,
 					  ionic_adminq_service, NULL, NULL);
+	spin_unlock_irqrestore(&lif->adminq_lock, irqflags);
 
 	work_done = max(n_work, a_work);
 	if (work_done < budget && napi_complete_done(napi, work_done)) {
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
index 14ece909a451..c4b2906a2ae6 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -187,10 +187,17 @@ static const char *ionic_opcode_to_str(enum ionic_cmd_opcode opcode)
 
 static void ionic_adminq_flush(struct ionic_lif *lif)
 {
-	struct ionic_queue *q = &lif->adminqcq->q;
 	struct ionic_desc_info *desc_info;
+	unsigned long irqflags;
+	struct ionic_queue *q;
+
+	spin_lock_irqsave(&lif->adminq_lock, irqflags);
+	if (!lif->adminqcq) {
+		spin_unlock_irqrestore(&lif->adminq_lock, irqflags);
+		return;
+	}
 
-	spin_lock(&lif->adminq_lock);
+	q = &lif->adminqcq->q;
 
 	while (q->tail_idx != q->head_idx) {
 		desc_info = &q->info[q->tail_idx];
@@ -199,7 +206,7 @@ static void ionic_adminq_flush(struct ionic_lif *lif)
 		desc_info->cb_arg = NULL;
 		q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
 	}
-	spin_unlock(&lif->adminq_lock);
+	spin_unlock_irqrestore(&lif->adminq_lock, irqflags);
 }
 
 static int ionic_adminq_check_err(struct ionic_lif *lif,
@@ -252,15 +259,18 @@ static void ionic_adminq_cb(struct ionic_queue *q,
 static int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
 {
 	struct ionic_desc_info *desc_info;
+	unsigned long irqflags;
 	struct ionic_queue *q;
 	int err = 0;
 
-	if (!lif->adminqcq)
+	spin_lock_irqsave(&lif->adminq_lock, irqflags);
+	if (!lif->adminqcq) {
+		spin_unlock_irqrestore(&lif->adminq_lock, irqflags);
 		return -EIO;
+	}
 
 	q = &lif->adminqcq->q;
 
-	spin_lock(&lif->adminq_lock);
 	if (!ionic_q_has_space(q, 1)) {
 		err = -ENOSPC;
 		goto err_out;
@@ -280,7 +290,7 @@ static int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
 	ionic_q_post(q, true, ionic_adminq_cb, ctx);
 
 err_out:
-	spin_unlock(&lif->adminq_lock);
+	spin_unlock_irqrestore(&lif->adminq_lock, irqflags);
 
 	return err;
 }
-- 
cgit v1.2.3


From 1816bf1f53cbc3b92f61a03eb4ad8f07637e6438 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Fri, 19 Mar 2021 01:33:42 +0530
Subject: Fix a typo

s/serisouly/seriously/

...and the sentence construction.

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sun/sungem.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c
index 58f142ee78a3..9790656cf970 100644
--- a/drivers/net/ethernet/sun/sungem.c
+++ b/drivers/net/ethernet/sun/sungem.c
@@ -1674,8 +1674,8 @@ static void gem_init_phy(struct gem *gp)
 	if (gp->pdev->vendor == PCI_VENDOR_ID_APPLE) {
 		int i;
 
-		/* Those delay sucks, the HW seem to love them though, I'll
-		 * serisouly consider breaking some locks here to be able
+		/* Those delays sucks, the HW seems to love them though, I'll
+		 * seriously consider breaking some locks here to be able
 		 * to schedule instead
 		 */
 		for (i = 0; i < 3; i++) {
-- 
cgit v1.2.3


From e75ec151c1088c1aa7a49ff16a2adcaddb24a861 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <alobakin@pm.me>
Date: Thu, 18 Mar 2021 18:42:23 +0000
Subject: gro: make net/gro.h self-contained
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If some source file includes <net/gro.h>, but doesn't include
<linux/indirect_call_wrapper.h>:

In file included from net/8021q/vlan_core.c:7:
./include/net/gro.h:6:1: warning: data definition has no type or storage class
    6 | INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *,
      | ^~~~~~~~~~~~~~~~~~~~~~~~~
./include/net/gro.h:6:1: error: type defaults to ‘int’ in declaration of ‘INDIRECT_CALLABLE_DECLARE’ [-Werror=implicit-int]

[...]

Include <linux/indirect_call_wrapper.h> directly. It's small and
won't pull lots of dependencies.
Also add some incomplete struct declarations to be fully stacked.

Fixes: 04f00ab2275f ("net/core: move gro function declarations to separate header ")
Signed-off-by: Alexander Lobakin <alobakin@pm.me>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/gro.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/net/gro.h b/include/net/gro.h
index 8a6eb5303cc4..27c38b36df16 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -3,6 +3,11 @@
 #ifndef _NET_IPV6_GRO_H
 #define _NET_IPV6_GRO_H
 
+#include <linux/indirect_call_wrapper.h>
+
+struct list_head;
+struct sk_buff;
+
 INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *,
 							   struct sk_buff *));
 INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
-- 
cgit v1.2.3


From 86af2c82c28499b4b509d6b15637a96bd829201b Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <alobakin@pm.me>
Date: Thu, 18 Mar 2021 18:42:30 +0000
Subject: gro: add combined call_gro_receive() + INDIRECT_CALL_INET() helper

call_gro_receive() is used to limit GRO recursion, but it works only
with callback pointers.
There's a combined version of call_gro_receive() + INDIRECT_CALL_2()
in <net/inet_common.h>, but it doesn't check for IPv6 modularity.
Add a similar new helper to cover both of these. It can and will be
used to avoid retpoline overhead when IP header lies behind another
offloaded proto.

Signed-off-by: Alexander Lobakin <alobakin@pm.me>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/gro.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/include/net/gro.h b/include/net/gro.h
index 27c38b36df16..01edaf3fdda0 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -14,4 +14,12 @@ INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
 INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
 							   struct sk_buff *));
 INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
+
+#define indirect_call_gro_receive_inet(cb, f2, f1, head, skb)	\
+({								\
+	unlikely(gro_recursion_inc_test(skb)) ?			\
+		NAPI_GRO_CB(skb)->flush |= 1, NULL :		\
+		INDIRECT_CALL_INET(cb, f2, f1, head, skb);	\
+})
+
 #endif /* _NET_IPV6_GRO_H */
-- 
cgit v1.2.3


From 4a6e7ec93a602b1e386704aeb0ce76bfc1ba8030 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <alobakin@pm.me>
Date: Thu, 18 Mar 2021 18:42:34 +0000
Subject: vlan/8021q: avoid retpoline overhead on GRO

The two most popular headers going after VLAN are IPv4 and IPv6.
Retpoline overhead for them is addressed only in dev_gro_receive(),
when they lie right after the outermost Ethernet header.
Use the indirect call wrappers in VLAN GRO receive code to reduce
the penalty on receiving tagged frames (when hardware stripping is
off or not available).

Signed-off-by: Alexander Lobakin <alobakin@pm.me>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_core.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 78ec2e1b14d1..59bc13b5f14f 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -4,6 +4,7 @@
 #include <linux/if_vlan.h>
 #include <linux/netpoll.h>
 #include <linux/export.h>
+#include <net/gro.h>
 #include "vlan.h"
 
 bool vlan_do_receive(struct sk_buff **skbp)
@@ -495,7 +496,10 @@ static struct sk_buff *vlan_gro_receive(struct list_head *head,
 
 	skb_gro_pull(skb, sizeof(*vhdr));
 	skb_gro_postpull_rcsum(skb, vhdr, sizeof(*vhdr));
-	pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
+
+	pp = indirect_call_gro_receive_inet(ptype->callbacks.gro_receive,
+					    ipv6_gro_receive, inet_gro_receive,
+					    head, skb);
 
 out_unlock:
 	rcu_read_unlock();
@@ -515,7 +519,9 @@ static int vlan_gro_complete(struct sk_buff *skb, int nhoff)
 	rcu_read_lock();
 	ptype = gro_find_complete_by_type(type);
 	if (ptype)
-		err = ptype->callbacks.gro_complete(skb, nhoff + sizeof(*vhdr));
+		err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete,
+					 ipv6_gro_complete, inet_gro_complete,
+					 skb, nhoff + sizeof(*vhdr));
 
 	rcu_read_unlock();
 	return err;
-- 
cgit v1.2.3


From 5588796e89777318267ff13f2bf322b2c48843af Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <alobakin@pm.me>
Date: Thu, 18 Mar 2021 18:42:39 +0000
Subject: ethernet: avoid retpoline overhead on TEB (GENEVE, NvGRE, VxLAN) GRO

The two most popular headers going after Ethernet are IPv4 and IPv6.
Retpoline overhead for them is addressed only in dev_gro_receive(),
when they lie right after the outermost Ethernet header.
Use the indirect call wrappers in TEB (Transparent Ethernet Bridging,
such as GENEVE, NvGRE, VxLAN etc.) GRO receive code to reduce the
penalty when processing the inner headers.

Signed-off-by: Alexander Lobakin <alobakin@pm.me>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethernet/eth.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index e01cf766d2c5..933b427122be 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -58,6 +58,7 @@
 #include <net/ip.h>
 #include <net/dsa.h>
 #include <net/flow_dissector.h>
+#include <net/gro.h>
 #include <linux/uaccess.h>
 #include <net/pkt_sched.h>
 
@@ -449,7 +450,10 @@ struct sk_buff *eth_gro_receive(struct list_head *head, struct sk_buff *skb)
 
 	skb_gro_pull(skb, sizeof(*eh));
 	skb_gro_postpull_rcsum(skb, eh, sizeof(*eh));
-	pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
+
+	pp = indirect_call_gro_receive_inet(ptype->callbacks.gro_receive,
+					    ipv6_gro_receive, inet_gro_receive,
+					    head, skb);
 
 out_unlock:
 	rcu_read_unlock();
@@ -473,8 +477,9 @@ int eth_gro_complete(struct sk_buff *skb, int nhoff)
 	rcu_read_lock();
 	ptype = gro_find_complete_by_type(type);
 	if (ptype != NULL)
-		err = ptype->callbacks.gro_complete(skb, nhoff +
-						    sizeof(struct ethhdr));
+		err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete,
+					 ipv6_gro_complete, inet_gro_complete,
+					 skb, nhoff + sizeof(*eh));
 
 	rcu_read_unlock();
 	return err;
-- 
cgit v1.2.3


From 38cb57602369cf194556460a52bd18e53c76e13d Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Fri, 19 Mar 2021 04:59:45 +0530
Subject: selftests: net: forwarding: Fix a typo

s/verfied/verified/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/fib_offload_lib.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/forwarding/fib_offload_lib.sh b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh
index 66496659bea7..e134a5f529c9 100644
--- a/tools/testing/selftests/net/forwarding/fib_offload_lib.sh
+++ b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh
@@ -224,7 +224,7 @@ fib_ipv4_plen_test()
 	ip -n $ns link set dev dummy1 up
 
 	# Add two routes with the same key and different prefix length and
-	# make sure both are in hardware. It can be verfied that both are
+	# make sure both are in hardware. It can be verified that both are
 	# sharing the same leaf by checking the /proc/net/fib_trie
 	ip -n $ns route add 192.0.2.0/24 dev dummy1
 	ip -n $ns route add 192.0.2.0/25 dev dummy1
-- 
cgit v1.2.3


From 497cc00224cfaff89282ec8bfdfb8b797415f72a Mon Sep 17 00:00:00 2001
From: Kurt Kanzenbach <kurt@linutronix.de>
Date: Thu, 18 Mar 2021 08:34:55 +0100
Subject: taprio: Handle short intervals and large packets

When using short intervals e.g. below one millisecond, large packets won't be
transmitted at all. The software implementations checks whether the packet can
be fit into the remaining interval. Therefore, it takes the packet length and
the transmission speed into account. That is correct.

However, for large packets it may be that the transmission time exceeds the
interval resulting in no packet transmission. The same situation works fine with
hardware offloading applied.

The problem has been observed with the following schedule and iperf3:

|tc qdisc replace dev lan1 parent root handle 100 taprio \
|   num_tc 8 \
|   map 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 \
|   queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \
|   base-time $base \
|   sched-entry S 0x40 500000 \
|   sched-entry S 0xbf 500000 \
|   clockid CLOCK_TAI \
|   flags 0x00

[...]

|root@tsn:~# iperf3 -c 192.168.2.105
|Connecting to host 192.168.2.105, port 5201
|[  5] local 192.168.2.121 port 52610 connected to 192.168.2.105 port 5201
|[ ID] Interval           Transfer     Bitrate         Retr  Cwnd
|[  5]   0.00-1.00   sec  45.2 KBytes   370 Kbits/sec    0   1.41 KBytes
|[  5]   1.00-2.00   sec  0.00 Bytes  0.00 bits/sec    0   1.41 KBytes

After debugging, it seems that the packet length stored in the SKB is about
7000-8000 bytes. Using a 100 Mbit/s link the transmission time is about 600us
which larger than the interval of 500us.

Therefore, segment the SKB into smaller chunks if the packet is too big. This
yields similar results than the hardware offload:

|root@tsn:~# iperf3 -c 192.168.2.105
|Connecting to host 192.168.2.105, port 5201
|- - - - - - - - - - - - - - - - - - - - - - - - -
|[ ID] Interval           Transfer     Bitrate         Retr
|[  5]   0.00-10.00  sec  48.9 MBytes  41.0 Mbits/sec    0             sender
|[  5]   0.00-10.02  sec  48.7 MBytes  40.7 Mbits/sec                  receiver

Furthermore, the segmentation can be skipped for the full offload case, as the
driver or the hardware is expected to handle this.

Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_taprio.c | 64 ++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 54 insertions(+), 10 deletions(-)

diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 8287894541e3..922ed6b91abb 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -411,18 +411,10 @@ done:
 	return txtime;
 }
 
-static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
-			  struct sk_buff **to_free)
+static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch,
+			      struct Qdisc *child, struct sk_buff **to_free)
 {
 	struct taprio_sched *q = qdisc_priv(sch);
-	struct Qdisc *child;
-	int queue;
-
-	queue = skb_get_queue_mapping(skb);
-
-	child = q->qdiscs[queue];
-	if (unlikely(!child))
-		return qdisc_drop(skb, sch, to_free);
 
 	if (skb->sk && sock_flag(skb->sk, SOCK_TXTIME)) {
 		if (!is_valid_interval(skb, sch))
@@ -439,6 +431,58 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	return qdisc_enqueue(skb, child, to_free);
 }
 
+static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+			  struct sk_buff **to_free)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct Qdisc *child;
+	int queue;
+
+	queue = skb_get_queue_mapping(skb);
+
+	child = q->qdiscs[queue];
+	if (unlikely(!child))
+		return qdisc_drop(skb, sch, to_free);
+
+	/* Large packets might not be transmitted when the transmission duration
+	 * exceeds any configured interval. Therefore, segment the skb into
+	 * smaller chunks. Skip it for the full offload case, as the driver
+	 * and/or the hardware is expected to handle this.
+	 */
+	if (skb_is_gso(skb) && !FULL_OFFLOAD_IS_ENABLED(q->flags)) {
+		unsigned int slen = 0, numsegs = 0, len = qdisc_pkt_len(skb);
+		netdev_features_t features = netif_skb_features(skb);
+		struct sk_buff *segs, *nskb;
+		int ret;
+
+		segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+		if (IS_ERR_OR_NULL(segs))
+			return qdisc_drop(skb, sch, to_free);
+
+		skb_list_walk_safe(segs, segs, nskb) {
+			skb_mark_not_on_list(segs);
+			qdisc_skb_cb(segs)->pkt_len = segs->len;
+			slen += segs->len;
+
+			ret = taprio_enqueue_one(segs, sch, child, to_free);
+			if (ret != NET_XMIT_SUCCESS) {
+				if (net_xmit_drop_count(ret))
+					qdisc_qstats_drop(sch);
+			} else {
+				numsegs++;
+			}
+		}
+
+		if (numsegs > 1)
+			qdisc_tree_reduce_backlog(sch, 1 - numsegs, len - slen);
+		consume_skb(skb);
+
+		return numsegs > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
+	}
+
+	return taprio_enqueue_one(skb, sch, child, to_free);
+}
+
 static struct sk_buff *taprio_peek_soft(struct Qdisc *sch)
 {
 	struct taprio_sched *q = qdisc_priv(sch);
-- 
cgit v1.2.3


From 5b6b827413e8a86b1f83c3a199fe639c0e292295 Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Thu, 18 Mar 2021 20:44:31 +0100
Subject: net: phy: at803x: remove at803x_aneg_done()

Here is what Vladimir says about it:

  at803x_aneg_done() keeps the aneg reporting as "not done" even when
  the copper-side link was reported as up, but the in-band autoneg has
  not finished.

  That was the _intended_ behavior when that code was introduced, and
  Heiner have said about it [1]:

  | That's not nice from the PHY:
  | It signals "link up", and if the system asks the PHY for link details,
  | then it sheepishly says "well, link is *almost* up".

  If the specification of phy_aneg_done behavior does not include
  in-band autoneg (and it doesn't), then this piece of code does not
  belong here.

  The fact that we can no longer trigger this code from phylib is yet
  another reason why it fails at its intended (and wrong) purpose and
  should be removed.

Removing the SGMII link check, would just keep the call to
genphy_aneg_done(), which is also the fallback. Thus we can just remove
at803x_aneg_done() altogether.

[1] https://lore.kernel.org/netdev/fdf0074a-2572-5914-6f3e-77202cbf96de@gmail.com/

Suggested-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: Michael Walle <michael@walle.cc>
Reviewed-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/at803x.c | 31 -------------------------------
 1 file changed, 31 deletions(-)

diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index c2aa4c92edde..d7799beb811c 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -751,36 +751,6 @@ static void at803x_link_change_notify(struct phy_device *phydev)
 	}
 }
 
-static int at803x_aneg_done(struct phy_device *phydev)
-{
-	int ccr;
-
-	int aneg_done = genphy_aneg_done(phydev);
-	if (aneg_done != BMSR_ANEGCOMPLETE)
-		return aneg_done;
-
-	/*
-	 * in SGMII mode, if copper side autoneg is successful,
-	 * also check SGMII side autoneg result
-	 */
-	ccr = phy_read(phydev, AT803X_REG_CHIP_CONFIG);
-	if ((ccr & AT803X_MODE_CFG_MASK) != AT803X_MODE_CFG_SGMII)
-		return aneg_done;
-
-	/* switch to SGMII/fiber page */
-	phy_write(phydev, AT803X_REG_CHIP_CONFIG, ccr & ~AT803X_BT_BX_REG_SEL);
-
-	/* check if the SGMII link is OK. */
-	if (!(phy_read(phydev, AT803X_PSSR) & AT803X_PSSR_MR_AN_COMPLETE)) {
-		phydev_warn(phydev, "803x_aneg_done: SGMII link is not ok\n");
-		aneg_done = 0;
-	}
-	/* switch back to copper page */
-	phy_write(phydev, AT803X_REG_CHIP_CONFIG, ccr | AT803X_BT_BX_REG_SEL);
-
-	return aneg_done;
-}
-
 static int at803x_read_status(struct phy_device *phydev)
 {
 	int ss, err, old_link = phydev->link;
@@ -1198,7 +1168,6 @@ static struct phy_driver at803x_driver[] = {
 	.resume			= at803x_resume,
 	/* PHY_GBIT_FEATURES */
 	.read_status		= at803x_read_status,
-	.aneg_done		= at803x_aneg_done,
 	.config_intr		= &at803x_config_intr,
 	.handle_interrupt	= at803x_handle_interrupt,
 	.get_tunable		= at803x_get_tunable,
-- 
cgit v1.2.3


From e75a2e02ec998a808dcd19885275f5444b146d44 Mon Sep 17 00:00:00 2001
From: Sieng Piaw Liew <liew.s.piaw@gmail.com>
Date: Fri, 19 Mar 2021 11:59:22 +0800
Subject: atl1c: switch to napi_gro_receive

Changing to napi_gro_receive() improves efficiency significantly. Tested
on Intel Core2-based motherboards and iperf3.

Signed-off-by: Sieng Piaw Liew <liew.s.piaw@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 3f65f2b370c5..3e440c2dc68a 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -1851,7 +1851,7 @@ rrs_checked:
 			vlan = le16_to_cpu(vlan);
 			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan);
 		}
-		netif_receive_skb(skb);
+		napi_gro_receive(&adapter->napi, skb);
 
 		(*work_done)++;
 		count++;
-- 
cgit v1.2.3


From a9d6df642dc8301ee7c3f414ec08548d5c4078ff Mon Sep 17 00:00:00 2001
From: Sieng Piaw Liew <liew.s.piaw@gmail.com>
Date: Fri, 19 Mar 2021 12:13:22 +0800
Subject: atl1c: use napi_alloc_skb

Using napi_alloc_skb in NAPI context avoids enable/disable IRQs, which
increases iperf3 result by a few Mbps. Since napi_alloc_skb() uses
NET_IP_ALIGN, convert other alloc methods to the same padding. Tested
on Intel Core2 and AMD K10 platforms.

Signed-off-by: Sieng Piaw Liew <liew.s.piaw@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 28 +++++++++++++++----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 3e440c2dc68a..d54375b255dc 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -47,7 +47,7 @@ static void atl1c_down(struct atl1c_adapter *adapter);
 static int atl1c_reset_mac(struct atl1c_hw *hw);
 static void atl1c_reset_dma_ring(struct atl1c_adapter *adapter);
 static int atl1c_configure(struct atl1c_adapter *adapter);
-static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter);
+static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter, bool napi_mode);
 
 
 static const u32 atl1c_default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE |
@@ -470,7 +470,7 @@ static void atl1c_set_rxbufsize(struct atl1c_adapter *adapter,
 	adapter->rx_buffer_len = mtu > AT_RX_BUF_SIZE ?
 		roundup(mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN, 8) : AT_RX_BUF_SIZE;
 
-	head_size = SKB_DATA_ALIGN(adapter->rx_buffer_len + NET_SKB_PAD) +
+	head_size = SKB_DATA_ALIGN(adapter->rx_buffer_len + NET_SKB_PAD + NET_IP_ALIGN) +
 		    SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 	adapter->rx_frag_size = roundup_pow_of_two(head_size);
 }
@@ -1434,7 +1434,7 @@ static int atl1c_configure(struct atl1c_adapter *adapter)
 	atl1c_set_multi(netdev);
 	atl1c_restore_vlan(adapter);
 
-	num = atl1c_alloc_rx_buffer(adapter);
+	num = atl1c_alloc_rx_buffer(adapter, false);
 	if (unlikely(num == 0))
 		return -ENOMEM;
 
@@ -1650,14 +1650,20 @@ static inline void atl1c_rx_checksum(struct atl1c_adapter *adapter,
 	skb_checksum_none_assert(skb);
 }
 
-static struct sk_buff *atl1c_alloc_skb(struct atl1c_adapter *adapter)
+static struct sk_buff *atl1c_alloc_skb(struct atl1c_adapter *adapter,
+				       bool napi_mode)
 {
 	struct sk_buff *skb;
 	struct page *page;
 
-	if (adapter->rx_frag_size > PAGE_SIZE)
-		return netdev_alloc_skb(adapter->netdev,
-					adapter->rx_buffer_len);
+	if (adapter->rx_frag_size > PAGE_SIZE) {
+		if (likely(napi_mode))
+			return napi_alloc_skb(&adapter->napi,
+					      adapter->rx_buffer_len);
+		else
+			return netdev_alloc_skb_ip_align(adapter->netdev,
+							 adapter->rx_buffer_len);
+	}
 
 	page = adapter->rx_page;
 	if (!page) {
@@ -1670,7 +1676,7 @@ static struct sk_buff *atl1c_alloc_skb(struct atl1c_adapter *adapter)
 	skb = build_skb(page_address(page) + adapter->rx_page_offset,
 			adapter->rx_frag_size);
 	if (likely(skb)) {
-		skb_reserve(skb, NET_SKB_PAD);
+		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
 		adapter->rx_page_offset += adapter->rx_frag_size;
 		if (adapter->rx_page_offset >= PAGE_SIZE)
 			adapter->rx_page = NULL;
@@ -1680,7 +1686,7 @@ static struct sk_buff *atl1c_alloc_skb(struct atl1c_adapter *adapter)
 	return skb;
 }
 
-static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter)
+static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter, bool napi_mode)
 {
 	struct atl1c_rfd_ring *rfd_ring = &adapter->rfd_ring;
 	struct pci_dev *pdev = adapter->pdev;
@@ -1701,7 +1707,7 @@ static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter)
 	while (next_info->flags & ATL1C_BUFFER_FREE) {
 		rfd_desc = ATL1C_RFD_DESC(rfd_ring, rfd_next_to_use);
 
-		skb = atl1c_alloc_skb(adapter);
+		skb = atl1c_alloc_skb(adapter, napi_mode);
 		if (unlikely(!skb)) {
 			if (netif_msg_rx_err(adapter))
 				dev_warn(&pdev->dev, "alloc rx buffer failed\n");
@@ -1857,7 +1863,7 @@ rrs_checked:
 		count++;
 	}
 	if (count)
-		atl1c_alloc_rx_buffer(adapter);
+		atl1c_alloc_rx_buffer(adapter, true);
 }
 
 /**
-- 
cgit v1.2.3


From c199fdb8abf558e330e3d13244b7c4593956329b Mon Sep 17 00:00:00 2001
From: Daode Huang <huangdaode@huawei.com>
Date: Fri, 19 Mar 2021 14:36:22 +0800
Subject: net: hinic: Remove unnecessary 'out of memory' message

This patch removes unnecessary out of memory message in hinic driver,
fixes the following checkpatch.pl warning:
"WARNING: Possible unnecessary 'out of memory' message"

Signed-off-by: Daode Huang <huangdaode@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c | 8 ++------
 drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c    | 5 +----
 drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c      | 1 -
 drivers/net/ethernet/huawei/hinic/hinic_rx.c         | 8 ++------
 4 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
index 4e4029d5c8e1..06586173add7 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c
@@ -629,10 +629,8 @@ static int alloc_cmd_buf(struct hinic_api_cmd_chain *chain,
 
 	cmd_vaddr = dma_alloc_coherent(&pdev->dev, API_CMD_BUF_SIZE,
 				       &cmd_paddr, GFP_KERNEL);
-	if (!cmd_vaddr) {
-		dev_err(&pdev->dev, "Failed to allocate API CMD DMA memory\n");
+	if (!cmd_vaddr)
 		return -ENOMEM;
-	}
 
 	cell_ctxt = &chain->cell_ctxt[cell_idx];
 
@@ -679,10 +677,8 @@ static int api_cmd_create_cell(struct hinic_api_cmd_chain *chain,
 
 	node = dma_alloc_coherent(&pdev->dev, chain->cell_size, &node_paddr,
 				  GFP_KERNEL);
-	if (!node) {
-		dev_err(&pdev->dev, "Failed to allocate dma API CMD cell\n");
+	if (!node)
 		return -ENOMEM;
-	}
 
 	node->read.hw_wb_resp_paddr = 0;
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
index 819fa13034c0..6b9b94a8925d 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
@@ -443,15 +443,12 @@ static void mgmt_recv_msg_handler(struct hinic_pf_to_mgmt *pf_to_mgmt,
 	struct pci_dev *pdev = pf_to_mgmt->hwif->pdev;
 
 	mgmt_work = kzalloc(sizeof(*mgmt_work), GFP_KERNEL);
-	if (!mgmt_work) {
-		dev_err(&pdev->dev, "Allocate mgmt work memory failed\n");
+	if (!mgmt_work)
 		return;
-	}
 
 	if (recv_msg->msg_len) {
 		mgmt_work->msg = kzalloc(recv_msg->msg_len, GFP_KERNEL);
 		if (!mgmt_work->msg) {
-			dev_err(&pdev->dev, "Allocate mgmt msg memory failed\n");
 			kfree(mgmt_work);
 			return;
 		}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
index fcf7bfe4aa47..dcba4d009bad 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
@@ -414,7 +414,6 @@ int hinic_init_rq(struct hinic_rq *rq, struct hinic_hwif *hwif,
 	rq->pi_virt_addr = dma_alloc_coherent(&pdev->dev, pi_size,
 					      &rq->pi_dma_addr, GFP_KERNEL);
 	if (!rq->pi_virt_addr) {
-		dev_err(&pdev->dev, "Failed to allocate PI address\n");
 		err = -ENOMEM;
 		goto err_pi_virt;
 	}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
index 070a7cc6392e..cce08647b9b2 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
@@ -137,10 +137,8 @@ static struct sk_buff *rx_alloc_skb(struct hinic_rxq *rxq,
 	int err;
 
 	skb = netdev_alloc_skb_ip_align(rxq->netdev, rxq->rq->buf_sz);
-	if (!skb) {
-		netdev_err(rxq->netdev, "Failed to allocate Rx SKB\n");
+	if (!skb)
 		return NULL;
-	}
 
 	addr = dma_map_single(&pdev->dev, skb->data, rxq->rq->buf_sz,
 			      DMA_FROM_DEVICE);
@@ -212,10 +210,8 @@ static int rx_alloc_pkts(struct hinic_rxq *rxq)
 
 	for (i = 0; i < free_wqebbs; i++) {
 		skb = rx_alloc_skb(rxq, &dma_addr);
-		if (!skb) {
-			netdev_err(rxq->netdev, "Failed to alloc Rx skb\n");
+		if (!skb)
 			goto skb_out;
-		}
 
 		hinic_set_sge(&sge, dma_addr, skb->len);
 
-- 
cgit v1.2.3


From 44401b677a52d6e425e6e948dd6e65c720dc66c6 Mon Sep 17 00:00:00 2001
From: Daode Huang <huangdaode@huawei.com>
Date: Fri, 19 Mar 2021 14:36:23 +0800
Subject: net: hinic: add a blank line after declarations

There should be a blank line after declarations, so just add it.

Signed-off-by: Daode Huang <huangdaode@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/huawei/hinic/hinic_tx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
index 8da7d46363b2..710c4ff7bc0e 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
@@ -377,6 +377,7 @@ static int offload_csum(struct hinic_sq_task *task, u32 *queue_info,
 		} else if (ip.v4->version == 6) {
 			unsigned char *exthdr;
 			__be16 frag_off;
+
 			l3_type = IPV6_PKT;
 			tunnel_type = TUNNEL_UDP_CSUM;
 			exthdr = ip.hdr + sizeof(*ip.v6);
-- 
cgit v1.2.3


From e2f84fd17557ab3b0e8a2fcc98bc5133ee89a495 Mon Sep 17 00:00:00 2001
From: Daode Huang <huangdaode@huawei.com>
Date: Fri, 19 Mar 2021 14:36:24 +0800
Subject: net: hinic: remove the repeat word "the" in comment.

There is a duplicate "the" in the comment, so delete it.

Signed-off-by: Daode Huang <huangdaode@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/huawei/hinic/hinic_hw_if.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
index efbaed389440..cab38ff0713c 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
@@ -334,7 +334,7 @@ static void set_dma_attr(struct hinic_hwif *hwif, u32 entry_idx,
 }
 
 /**
- * dma_attr_table_init - initialize the the default dma attributes
+ * dma_attr_table_init - initialize the default dma attributes
  * @hwif: the HW interface of a pci function device
  **/
 static void dma_attr_init(struct hinic_hwif *hwif)
-- 
cgit v1.2.3


From 79d65cab7f8535d7e19a16651aa45ea674cf4069 Mon Sep 17 00:00:00 2001
From: Daode Huang <huangdaode@huawei.com>
Date: Fri, 19 Mar 2021 14:36:25 +0800
Subject: net: hinic: convert strlcpy to strscpy

Usage of strlcpy in linux kernel has been recently
deprecated[1], so convert hinic driver to strscpy

[1] https://lore.kernel.org/lkml/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL
=V6A6G1oUZcprmknw@mail.gmail.com/

Signed-off-by: Daode Huang <huangdaode@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/huawei/hinic/hinic_ethtool.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
index c340d9acba80..c7848c382a5e 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
@@ -543,8 +543,8 @@ static void hinic_get_drvinfo(struct net_device *netdev,
 	struct hinic_hwif *hwif = hwdev->hwif;
 	int err;
 
-	strlcpy(info->driver, HINIC_DRV_NAME, sizeof(info->driver));
-	strlcpy(info->bus_info, pci_name(hwif->pdev), sizeof(info->bus_info));
+	strscpy(info->driver, HINIC_DRV_NAME, sizeof(info->driver));
+	strscpy(info->bus_info, pci_name(hwif->pdev), sizeof(info->bus_info));
 
 	err = hinic_get_mgmt_version(nic_dev, mgmt_ver);
 	if (err)
-- 
cgit v1.2.3


From fc72d4773c6b2ed860e69c646c9f3b9b9e38ad45 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 19 Mar 2021 12:02:54 -0700
Subject: hinic: Remove unused variable.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c: In function ‘mgmt_recv_msg_handler’:
drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c:443:18: warning: unused variable ‘pdev’ [-Wunused-variable]
  443 |  struct pci_dev *pdev = pf_to_mgmt->hwif->pdev;
      |                  ^~~~

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
index 6b9b94a8925d..817173f1fbb7 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
@@ -440,7 +440,6 @@ static void mgmt_recv_msg_handler(struct hinic_pf_to_mgmt *pf_to_mgmt,
 				  struct hinic_recv_msg *recv_msg)
 {
 	struct hinic_mgmt_msg_handle_work *mgmt_work = NULL;
-	struct pci_dev *pdev = pf_to_mgmt->hwif->pdev;
 
 	mgmt_work = kzalloc(sizeof(*mgmt_work), GFP_KERNEL);
 	if (!mgmt_work)
-- 
cgit v1.2.3


From b498ee3f7613f5ed1deac40451486274fc93fe30 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 19 Mar 2021 09:58:36 +0100
Subject: r8169: use lower_32_bits/upper_32_bits macros

Use the lower_32_bits/upper_32_bits macros to simplify the code.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 7aad0ba53372..8ea6ddc7da5c 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -1586,12 +1586,10 @@ DECLARE_RTL_COND(rtl_counters_cond)
 
 static void rtl8169_do_counters(struct rtl8169_private *tp, u32 counter_cmd)
 {
-	dma_addr_t paddr = tp->counters_phys_addr;
-	u32 cmd;
+	u32 cmd = lower_32_bits(tp->counters_phys_addr);
 
-	RTL_W32(tp, CounterAddrHigh, (u64)paddr >> 32);
+	RTL_W32(tp, CounterAddrHigh, upper_32_bits(tp->counters_phys_addr));
 	rtl_pci_commit(tp);
-	cmd = (u64)paddr & DMA_BIT_MASK(32);
 	RTL_W32(tp, CounterAddrLow, cmd);
 	RTL_W32(tp, CounterAddrLow, cmd | counter_cmd);
 
-- 
cgit v1.2.3


From 140960564d6376fe6b702574a64434a7bc8f085c Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Fri, 19 Mar 2021 09:41:03 +0000
Subject: octeontx2-pf: Fix missing spin_lock_init() in otx2_tc_add_flow()

The driver allocates the spinlock but not initialize it.
Use spin_lock_init() on it to initialize it correctly.

Fixes: d8ce30e0cf76 ("octeontx2-pf: add tc flower stats handler for hw offloads")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
index 2f75cfc5a23a..e919140d8965 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
@@ -536,6 +536,7 @@ static int otx2_tc_add_flow(struct otx2_nic *nic,
 	new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
 	if (!new_node)
 		return -ENOMEM;
+	spin_lock_init(&new_node->lock);
 	new_node->cookie = tc_flow_cmd->cookie;
 
 	mutex_lock(&nic->mbox.lock);
-- 
cgit v1.2.3


From 745740ac56b8e468bbfa83d6fa7015177cc21a91 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 19 Mar 2021 09:54:53 +0000
Subject: octeontx2-pf: Fix spelling mistake "ratelimitter" -> "ratelimiter"

There is a spelling mistake in an error message. Fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
index e919140d8965..51157b283f6f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
@@ -176,7 +176,7 @@ static int otx2_tc_egress_matchall_install(struct otx2_nic *nic,
 
 	if (nic->flags & OTX2_FLAG_TC_MATCHALL_EGRESS_ENABLED) {
 		NL_SET_ERR_MSG_MOD(extack,
-				   "Only one Egress MATCHALL ratelimitter can be offloaded");
+				   "Only one Egress MATCHALL ratelimiter can be offloaded");
 		return -ENOMEM;
 	}
 
-- 
cgit v1.2.3


From c54f042dcc1b258ad15d2be0f0b3ba371f758329 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 19 Mar 2021 12:08:06 +0200
Subject: net: enetc: teardown CBDR during PF/VF unbind

Michael reports that after the blamed patch, unbinding a VF would cause
these transactions to remain pending, and trigger some warnings with the
DMA API debug:

$ echo 1 > /sys/bus/pci/devices/0000\:00\:00.0/sriov_numvfs
pci 0000:00:01.0: [1957:ef00] type 00 class 0x020001
fsl_enetc_vf 0000:00:01.0: Adding to iommu group 19
fsl_enetc_vf 0000:00:01.0: enabling device (0000 -> 0002)
fsl_enetc_vf 0000:00:01.0 eno0vf0: renamed from eth0

$ echo 0 > /sys/bus/pci/devices/0000\:00\:00.0/sriov_numvfs
DMA-API: pci 0000:00:01.0: device driver has pending DMA allocations while released from device [count=1]
One of leaked entries details: [size=2048 bytes] [mapped with DMA_BIDIRECTIONAL] [mapped as coherent]
WARNING: CPU: 0 PID: 2547 at kernel/dma/debug.c:853 dma_debug_device_change+0x174/0x1c8
(...)
Call trace:
 dma_debug_device_change+0x174/0x1c8
 blocking_notifier_call_chain+0x74/0xa8
 device_release_driver_internal+0x18c/0x1f0
 device_release_driver+0x20/0x30
 pci_stop_bus_device+0x8c/0xe8
 pci_stop_and_remove_bus_device+0x20/0x38
 pci_iov_remove_virtfn+0xb8/0x128
 sriov_disable+0x3c/0x110
 pci_disable_sriov+0x24/0x30
 enetc_sriov_configure+0x4c/0x108
 sriov_numvfs_store+0x11c/0x198
(...)
DMA-API: Mapped at:
 dma_entry_alloc+0xa4/0x130
 debug_dma_alloc_coherent+0xbc/0x138
 dma_alloc_attrs+0xa4/0x108
 enetc_setup_cbdr+0x4c/0x1d0
 enetc_vf_probe+0x11c/0x250
pci 0000:00:01.0: Removing from iommu group 19

This happens because stupid me moved enetc_teardown_cbdr outside of
enetc_free_si_resources, but did not bother to keep calling
enetc_teardown_cbdr from all the places where enetc_free_si_resources
was called. In particular, now it is no longer called from the main
unbind function, just from the probe error path.

Fixes: 4b47c0b81ffd ("net: enetc: don't initialize unused ports from a separate code path")
Reported-by: Michael Walle <michael@walle.cc>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Tested-by: Michael Walle <michael@walle.cc>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc_pf.c | 1 +
 drivers/net/ethernet/freescale/enetc/enetc_vf.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index c8b6110448d4..3a7a9102eccb 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -1225,6 +1225,7 @@ static void enetc_pf_remove(struct pci_dev *pdev)
 	enetc_free_msix(priv);
 
 	enetc_free_si_resources(priv);
+	enetc_teardown_cbdr(&si->cbd_ring);
 
 	free_netdev(si->ndev);
 
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
index 371a34d3c6b4..03090ba7e226 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
@@ -223,6 +223,7 @@ static void enetc_vf_remove(struct pci_dev *pdev)
 	enetc_free_msix(priv);
 
 	enetc_free_si_resources(priv);
+	enetc_teardown_cbdr(&si->cbd_ring);
 
 	free_netdev(si->ndev);
 
-- 
cgit v1.2.3


From df4771783d6447b9c260bbb2692af15c4046c15b Mon Sep 17 00:00:00 2001
From: Bjarni Jonasson <bjarni.jonasson@microchip.com>
Date: Fri, 19 Mar 2021 14:29:03 +0100
Subject: net: phy: mscc: Applying LCPLL reset to VSC8584

Introduced LCPLL reset in
commit d15e08d9fb82 ("net: phy: mscc: adding LCPLL reset to VSC8514").
Now applying this reset to the VSC8584 phy familiy.

Fixes: a5afc1678044a ("net: phy: mscc: add support for VSC8584 PHY.")
Signed-off-by: Bjarni Jonasson <bjarni.jonasson@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mscc/mscc_main.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c
index 3a7705228ed5..2c2a3424dcc1 100644
--- a/drivers/net/phy/mscc/mscc_main.c
+++ b/drivers/net/phy/mscc/mscc_main.c
@@ -1362,6 +1362,12 @@ static int vsc8584_config_pre_init(struct phy_device *phydev)
 	u16 crc, reg;
 	int ret;
 
+	ret = vsc8584_pll5g_reset(phydev);
+	if (ret < 0) {
+		dev_err(dev, "failed LCPLL reset, ret: %d\n", ret);
+		return ret;
+	}
+
 	phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_STANDARD);
 
 	/* all writes below are broadcasted to all PHYs in the same package */
-- 
cgit v1.2.3


From 23d12335752fc08877d64719a679723d7e0a3962 Mon Sep 17 00:00:00 2001
From: Bjarni Jonasson <bjarni.jonasson@microchip.com>
Date: Fri, 19 Mar 2021 14:29:04 +0100
Subject: net: phy: mscc: improved serdes calibration applied to VSC8584

Introduced 'FOJI' serdes calibration in commit 85e97f0b984e
("net: phy: mscc: improved serdes calibration applied to VSC8514")
Now including the VSC8584 family.

Fixes: a5afc1678044a ("net: phy: mscc: add support for VSC8584 PHY.")
Signed-off-by: Bjarni Jonasson <bjarni.jonasson@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mscc/mscc_main.c | 210 ++++++++++++++++++++++++++++-----------
 1 file changed, 154 insertions(+), 56 deletions(-)

diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c
index 2c2a3424dcc1..2f105dafb6cc 100644
--- a/drivers/net/phy/mscc/mscc_main.c
+++ b/drivers/net/phy/mscc/mscc_main.c
@@ -1472,6 +1472,24 @@ static int vsc8584_config_pre_init(struct phy_device *phydev)
 	if (ret)
 		goto out;
 
+	/* Write patch vector 0, to skip IB cal polling  */
+	phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_EXTENDED_GPIO);
+	reg = MSCC_ROM_TRAP_SERDES_6G_CFG; /* ROM address to trap, for patch vector 0 */
+	ret = phy_base_write(phydev, MSCC_TRAP_ROM_ADDR(1), reg);
+	if (ret)
+		goto out;
+
+	reg = MSCC_RAM_TRAP_SERDES_6G_CFG; /* RAM address to jump to, when patch vector 0 enabled */
+	ret = phy_base_write(phydev, MSCC_PATCH_RAM_ADDR(1), reg);
+	if (ret)
+		goto out;
+
+	reg = phy_base_read(phydev, MSCC_INT_MEM_CNTL);
+	reg |= PATCH_VEC_ZERO_EN; /* bit 8, enable patch vector 0 */
+	ret = phy_base_write(phydev, MSCC_INT_MEM_CNTL, reg);
+	if (ret)
+		goto out;
+
 	vsc8584_micro_deassert_reset(phydev, true);
 
 out:
@@ -1537,62 +1555,81 @@ static void vsc85xx_coma_mode_release(struct phy_device *phydev)
 	vsc85xx_phy_write_page(phydev, MSCC_PHY_PAGE_STANDARD);
 }
 
-static int vsc8584_config_init(struct phy_device *phydev)
+static int vsc8584_config_host_serdes(struct phy_device *phydev)
 {
 	struct vsc8531_private *vsc8531 = phydev->priv;
-	int ret, i;
+	int ret;
 	u16 val;
 
-	phydev->mdix_ctrl = ETH_TP_MDI_AUTO;
+	ret = phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS,
+			     MSCC_PHY_PAGE_EXTENDED_GPIO);
+	if (ret)
+		return ret;
 
-	phy_lock_mdio_bus(phydev);
+	val = phy_base_read(phydev, MSCC_PHY_MAC_CFG_FASTLINK);
+	val &= ~MAC_CFG_MASK;
+	if (phydev->interface == PHY_INTERFACE_MODE_QSGMII) {
+		val |= MAC_CFG_QSGMII;
+	} else if (phydev->interface == PHY_INTERFACE_MODE_SGMII) {
+		val |= MAC_CFG_SGMII;
+	} else {
+		ret = -EINVAL;
+		return ret;
+	}
 
-	/* Some parts of the init sequence are identical for every PHY in the
-	 * package. Some parts are modifying the GPIO register bank which is a
-	 * set of registers that are affecting all PHYs, a few resetting the
-	 * microprocessor common to all PHYs. The CRC check responsible of the
-	 * checking the firmware within the 8051 microprocessor can only be
-	 * accessed via the PHY whose internal address in the package is 0.
-	 * All PHYs' interrupts mask register has to be zeroed before enabling
-	 * any PHY's interrupt in this register.
-	 * For all these reasons, we need to do the init sequence once and only
-	 * once whatever is the first PHY in the package that is initialized and
-	 * do the correct init sequence for all PHYs that are package-critical
-	 * in this pre-init function.
-	 */
-	if (phy_package_init_once(phydev)) {
-		/* The following switch statement assumes that the lowest
-		 * nibble of the phy_id_mask is always 0. This works because
-		 * the lowest nibble of the PHY_ID's below are also 0.
-		 */
-		WARN_ON(phydev->drv->phy_id_mask & 0xf);
+	ret = phy_base_write(phydev, MSCC_PHY_MAC_CFG_FASTLINK, val);
+	if (ret)
+		return ret;
 
-		switch (phydev->phy_id & phydev->drv->phy_id_mask) {
-		case PHY_ID_VSC8504:
-		case PHY_ID_VSC8552:
-		case PHY_ID_VSC8572:
-		case PHY_ID_VSC8574:
-			ret = vsc8574_config_pre_init(phydev);
-			break;
-		case PHY_ID_VSC856X:
-		case PHY_ID_VSC8575:
-		case PHY_ID_VSC8582:
-		case PHY_ID_VSC8584:
-			ret = vsc8584_config_pre_init(phydev);
-			break;
-		default:
-			ret = -EINVAL;
-			break;
-		}
+	ret = phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS,
+			     MSCC_PHY_PAGE_STANDARD);
+	if (ret)
+		return ret;
 
-		if (ret)
-			goto err;
-	}
+	val = PROC_CMD_MCB_ACCESS_MAC_CONF | PROC_CMD_RST_CONF_PORT |
+		PROC_CMD_READ_MOD_WRITE_PORT;
+	if (phydev->interface == PHY_INTERFACE_MODE_QSGMII)
+		val |= PROC_CMD_QSGMII_MAC;
+	else
+		val |= PROC_CMD_SGMII_MAC;
+
+	ret = vsc8584_cmd(phydev, val);
+	if (ret)
+		return ret;
+
+	usleep_range(10000, 20000);
+
+	/* Disable SerDes for 100Base-FX */
+	ret = vsc8584_cmd(phydev, PROC_CMD_FIBER_MEDIA_CONF |
+			  PROC_CMD_FIBER_PORT(vsc8531->addr) |
+			  PROC_CMD_FIBER_DISABLE |
+			  PROC_CMD_READ_MOD_WRITE_PORT |
+			  PROC_CMD_RST_CONF_PORT | PROC_CMD_FIBER_100BASE_FX);
+	if (ret)
+		return ret;
+
+	/* Disable SerDes for 1000Base-X */
+	ret = vsc8584_cmd(phydev, PROC_CMD_FIBER_MEDIA_CONF |
+			  PROC_CMD_FIBER_PORT(vsc8531->addr) |
+			  PROC_CMD_FIBER_DISABLE |
+			  PROC_CMD_READ_MOD_WRITE_PORT |
+			  PROC_CMD_RST_CONF_PORT | PROC_CMD_FIBER_1000BASE_X);
+	if (ret)
+		return ret;
+
+	return vsc85xx_sd6g_config_v2(phydev);
+}
+
+static int vsc8574_config_host_serdes(struct phy_device *phydev)
+{
+	struct vsc8531_private *vsc8531 = phydev->priv;
+	int ret;
+	u16 val;
 
 	ret = phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS,
 			     MSCC_PHY_PAGE_EXTENDED_GPIO);
 	if (ret)
-		goto err;
+		return ret;
 
 	val = phy_base_read(phydev, MSCC_PHY_MAC_CFG_FASTLINK);
 	val &= ~MAC_CFG_MASK;
@@ -1604,17 +1641,17 @@ static int vsc8584_config_init(struct phy_device *phydev)
 		val |= MAC_CFG_RGMII;
 	} else {
 		ret = -EINVAL;
-		goto err;
+		return ret;
 	}
 
 	ret = phy_base_write(phydev, MSCC_PHY_MAC_CFG_FASTLINK, val);
 	if (ret)
-		goto err;
+		return ret;
 
 	ret = phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS,
 			     MSCC_PHY_PAGE_STANDARD);
 	if (ret)
-		goto err;
+		return ret;
 
 	if (!phy_interface_is_rgmii(phydev)) {
 		val = PROC_CMD_MCB_ACCESS_MAC_CONF | PROC_CMD_RST_CONF_PORT |
@@ -1626,7 +1663,7 @@ static int vsc8584_config_init(struct phy_device *phydev)
 
 		ret = vsc8584_cmd(phydev, val);
 		if (ret)
-			goto err;
+			return ret;
 
 		usleep_range(10000, 20000);
 	}
@@ -1638,16 +1675,77 @@ static int vsc8584_config_init(struct phy_device *phydev)
 			  PROC_CMD_READ_MOD_WRITE_PORT |
 			  PROC_CMD_RST_CONF_PORT | PROC_CMD_FIBER_100BASE_FX);
 	if (ret)
-		goto err;
+		return ret;
 
 	/* Disable SerDes for 1000Base-X */
-	ret = vsc8584_cmd(phydev, PROC_CMD_FIBER_MEDIA_CONF |
-			  PROC_CMD_FIBER_PORT(vsc8531->addr) |
-			  PROC_CMD_FIBER_DISABLE |
-			  PROC_CMD_READ_MOD_WRITE_PORT |
-			  PROC_CMD_RST_CONF_PORT | PROC_CMD_FIBER_1000BASE_X);
-	if (ret)
-		goto err;
+	return vsc8584_cmd(phydev, PROC_CMD_FIBER_MEDIA_CONF |
+			   PROC_CMD_FIBER_PORT(vsc8531->addr) |
+			   PROC_CMD_FIBER_DISABLE |
+			   PROC_CMD_READ_MOD_WRITE_PORT |
+			   PROC_CMD_RST_CONF_PORT | PROC_CMD_FIBER_1000BASE_X);
+}
+
+static int vsc8584_config_init(struct phy_device *phydev)
+{
+	struct vsc8531_private *vsc8531 = phydev->priv;
+	int ret, i;
+	u16 val;
+
+	phydev->mdix_ctrl = ETH_TP_MDI_AUTO;
+
+	phy_lock_mdio_bus(phydev);
+
+	/* Some parts of the init sequence are identical for every PHY in the
+	 * package. Some parts are modifying the GPIO register bank which is a
+	 * set of registers that are affecting all PHYs, a few resetting the
+	 * microprocessor common to all PHYs. The CRC check responsible of the
+	 * checking the firmware within the 8051 microprocessor can only be
+	 * accessed via the PHY whose internal address in the package is 0.
+	 * All PHYs' interrupts mask register has to be zeroed before enabling
+	 * any PHY's interrupt in this register.
+	 * For all these reasons, we need to do the init sequence once and only
+	 * once whatever is the first PHY in the package that is initialized and
+	 * do the correct init sequence for all PHYs that are package-critical
+	 * in this pre-init function.
+	 */
+	if (phy_package_init_once(phydev)) {
+		/* The following switch statement assumes that the lowest
+		 * nibble of the phy_id_mask is always 0. This works because
+		 * the lowest nibble of the PHY_ID's below are also 0.
+		 */
+		WARN_ON(phydev->drv->phy_id_mask & 0xf);
+
+		switch (phydev->phy_id & phydev->drv->phy_id_mask) {
+		case PHY_ID_VSC8504:
+		case PHY_ID_VSC8552:
+		case PHY_ID_VSC8572:
+		case PHY_ID_VSC8574:
+			ret = vsc8574_config_pre_init(phydev);
+			if (ret)
+				goto err;
+			ret = vsc8574_config_host_serdes(phydev);
+			if (ret)
+				goto err;
+			break;
+		case PHY_ID_VSC856X:
+		case PHY_ID_VSC8575:
+		case PHY_ID_VSC8582:
+		case PHY_ID_VSC8584:
+			ret = vsc8584_config_pre_init(phydev);
+			if (ret)
+				goto err;
+			ret = vsc8584_config_host_serdes(phydev);
+			if (ret)
+				goto err;
+			break;
+		default:
+			ret = -EINVAL;
+			break;
+		}
+
+		if (ret)
+			goto err;
+	}
 
 	phy_unlock_mdio_bus(phydev);
 
-- 
cgit v1.2.3


From 36d021d1049f252c2bc1e20123b4cce9e9dc60af Mon Sep 17 00:00:00 2001
From: Bjarni Jonasson <bjarni.jonasson@microchip.com>
Date: Fri, 19 Mar 2021 14:29:05 +0100
Subject: net: phy: mscc: coma mode disabled for VSC8584

This patch releases coma mode for VSC8584 as done for VSC8514 in
commit ca0d7fd0a58d ("net: phy: mscc: coma mode disabled for VSC8514")

Fixes: a5afc1678044a ("net: phy: mscc: add support for VSC8584 PHY.")
Signed-off-by: Bjarni Jonasson <bjarni.jonasson@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mscc/mscc_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c
index 2f105dafb6cc..6e32da28e138 100644
--- a/drivers/net/phy/mscc/mscc_main.c
+++ b/drivers/net/phy/mscc/mscc_main.c
@@ -1737,6 +1737,7 @@ static int vsc8584_config_init(struct phy_device *phydev)
 			ret = vsc8584_config_host_serdes(phydev);
 			if (ret)
 				goto err;
+			vsc85xx_coma_mode_release(phydev);
 			break;
 		default:
 			ret = -EINVAL;
-- 
cgit v1.2.3


From a4e39b999a584a3f6e5098eec06b7d37d492eeb1 Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Mon, 11 Jan 2021 07:17:53 +0200
Subject: igc: Remove unused MII_CR_RESET

MII_CR_RESET mask not in use in i225 device and can be removed

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_defines.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index b909f00a79e6..ca460bf1af2f 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -442,7 +442,6 @@
 #define MII_CR_POWER_DOWN	0x0800  /* Power down */
 #define MII_CR_AUTO_NEG_EN	0x1000  /* Auto Neg Enable */
 #define MII_CR_LOOPBACK		0x4000  /* 0 = normal, 1 = loopback */
-#define MII_CR_RESET		0x8000  /* 0 = normal, 1 = PHY reset */
 #define MII_CR_SPEED_1000	0x0040
 #define MII_CR_SPEED_100	0x2000
 #define MII_CR_SPEED_10		0x0000
-- 
cgit v1.2.3


From e4a9f45b0be5904b7f21b2da29b9f596f80fe95b Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 19 Mar 2021 10:24:18 -0500
Subject: net: ipa: make all configuration data constant

All of the platform configuration data should be constant, but
that isn't the case for the memory regions, interconnects, and
clocks.  Fix this.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_data-sc7180.c | 6 +++---
 drivers/net/ipa/ipa_data-sdm845.c | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ipa/ipa_data-sc7180.c b/drivers/net/ipa/ipa_data-sc7180.c
index 997b51ceb7d7..01681be40226 100644
--- a/drivers/net/ipa/ipa_data-sc7180.c
+++ b/drivers/net/ipa/ipa_data-sc7180.c
@@ -300,7 +300,7 @@ static const struct ipa_mem ipa_mem_local_data[] = {
 	},
 };
 
-static struct ipa_mem_data ipa_mem_data = {
+static const struct ipa_mem_data ipa_mem_data = {
 	.local_count	= ARRAY_SIZE(ipa_mem_local_data),
 	.local		= ipa_mem_local_data,
 	.imem_addr	= 0x146a8000,
@@ -310,7 +310,7 @@ static struct ipa_mem_data ipa_mem_data = {
 };
 
 /* Interconnect bandwidths are in 1000 byte/second units */
-static struct ipa_interconnect_data ipa_interconnect_data[] = {
+static const struct ipa_interconnect_data ipa_interconnect_data[] = {
 	{
 		.name			= "memory",
 		.peak_bandwidth		= 465000,	/* 465 MBps */
@@ -329,7 +329,7 @@ static struct ipa_interconnect_data ipa_interconnect_data[] = {
 	},
 };
 
-static struct ipa_clock_data ipa_clock_data = {
+static const struct ipa_clock_data ipa_clock_data = {
 	.core_clock_rate	= 100 * 1000 * 1000,	/* Hz */
 	.interconnect_count	= ARRAY_SIZE(ipa_interconnect_data),
 	.interconnect_data	= ipa_interconnect_data,
diff --git a/drivers/net/ipa/ipa_data-sdm845.c b/drivers/net/ipa/ipa_data-sdm845.c
index 88c9c3562ab7..401861e3c0aa 100644
--- a/drivers/net/ipa/ipa_data-sdm845.c
+++ b/drivers/net/ipa/ipa_data-sdm845.c
@@ -320,7 +320,7 @@ static const struct ipa_mem ipa_mem_local_data[] = {
 	},
 };
 
-static struct ipa_mem_data ipa_mem_data = {
+static const struct ipa_mem_data ipa_mem_data = {
 	.local_count	= ARRAY_SIZE(ipa_mem_local_data),
 	.local		= ipa_mem_local_data,
 	.imem_addr	= 0x146bd000,
@@ -330,7 +330,7 @@ static struct ipa_mem_data ipa_mem_data = {
 };
 
 /* Interconnect bandwidths are in 1000 byte/second units */
-static struct ipa_interconnect_data ipa_interconnect_data[] = {
+static const struct ipa_interconnect_data ipa_interconnect_data[] = {
 	{
 		.name			= "memory",
 		.peak_bandwidth		= 600000,	/* 600 MBps */
@@ -349,7 +349,7 @@ static struct ipa_interconnect_data ipa_interconnect_data[] = {
 	},
 };
 
-static struct ipa_clock_data ipa_clock_data = {
+static const struct ipa_clock_data ipa_clock_data = {
 	.core_clock_rate	= 75 * 1000 * 1000,	/* Hz */
 	.interconnect_count	= ARRAY_SIZE(ipa_interconnect_data),
 	.interconnect_data	= ipa_interconnect_data,
-- 
cgit v1.2.3


From 22e3b314302c004bb216ed742e912e866b6d31a6 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 19 Mar 2021 10:24:19 -0500
Subject: net: ipa: fix canary count for SC7180 UC_INFO region

There should be no canary values written before the beginning of the
UC_INFO memory region.  This was correct for SDM845, but somehow was
committed with the wrong value for SC7180.

This bug seems to cause no harm, so we'll just correct it without
back-porting.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_data-sc7180.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ipa/ipa_data-sc7180.c b/drivers/net/ipa/ipa_data-sc7180.c
index 01681be40226..434869508a21 100644
--- a/drivers/net/ipa/ipa_data-sc7180.c
+++ b/drivers/net/ipa/ipa_data-sc7180.c
@@ -206,7 +206,7 @@ static const struct ipa_mem ipa_mem_local_data[] = {
 	[IPA_MEM_UC_INFO] = {
 		.offset		= 0x0080,
 		.size		= 0x0200,
-		.canary_count	= 2,
+		.canary_count	= 0,
 	},
 	[IPA_MEM_V4_FILTER_HASHED] = {
 		.offset		= 0x0288,
-- 
cgit v1.2.3


From 8f692169b13843532a9eae5ad7de692223c85b7f Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 19 Mar 2021 10:24:20 -0500
Subject: net: ipa: don't define empty memory regions

The AP_HEADER memory region for both the SDM845 and SC7180 SoCs has
zero size, and has no canaries.  Defining an offset for such a
zero-length region is not meaningful, so it's better not to define
it at all.  The size of this region is used in the code, but its
value will still be zero because the memory regions are defined in
statically initialized memory.

For the SC7180, the STATS_DROP memory region has a zero size and no
canaries as well.

These regions are the only place where a zero-sized region is
defined despite having no canaries.  Remove them.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_data-sc7180.c | 10 ----------
 drivers/net/ipa/ipa_data-sdm845.c |  5 -----
 2 files changed, 15 deletions(-)

diff --git a/drivers/net/ipa/ipa_data-sc7180.c b/drivers/net/ipa/ipa_data-sc7180.c
index 434869508a21..0bdb60f6755c 100644
--- a/drivers/net/ipa/ipa_data-sc7180.c
+++ b/drivers/net/ipa/ipa_data-sc7180.c
@@ -253,11 +253,6 @@ static const struct ipa_mem ipa_mem_local_data[] = {
 		.size		= 0x0140,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_AP_HEADER] = {
-		.offset		= 0x05e8,
-		.size		= 0x0000,
-		.canary_count	= 0,
-	},
 	[IPA_MEM_MODEM_PROC_CTX] = {
 		.offset		= 0x05f0,
 		.size		= 0x0200,
@@ -283,11 +278,6 @@ static const struct ipa_mem ipa_mem_local_data[] = {
 		.size		= 0x0140,
 		.canary_count	= 0,
 	},
-	[IPA_MEM_STATS_DROP] = {
-		.offset		= 0x0bf0,
-		.size		= 0,
-		.canary_count	= 0,
-	},
 	[IPA_MEM_MODEM] = {
 		.offset		= 0x0bf0,
 		.size		= 0x140c,
diff --git a/drivers/net/ipa/ipa_data-sdm845.c b/drivers/net/ipa/ipa_data-sdm845.c
index 401861e3c0aa..8cae9325eb08 100644
--- a/drivers/net/ipa/ipa_data-sdm845.c
+++ b/drivers/net/ipa/ipa_data-sdm845.c
@@ -293,11 +293,6 @@ static const struct ipa_mem ipa_mem_local_data[] = {
 		.size		= 0x0140,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_AP_HEADER] = {
-		.offset		= 0x07c8,
-		.size		= 0x0000,
-		.canary_count	= 0,
-	},
 	[IPA_MEM_MODEM_PROC_CTX] = {
 		.offset		= 0x07d0,
 		.size		= 0x0200,
-- 
cgit v1.2.3


From 2ef88644e5d4a81c21d18532529195012e14566f Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 19 Mar 2021 10:24:21 -0500
Subject: net: ipa: define some new memory regions

There are several memory regions that are defined starting with IPA
v4.0, but which were not used for the SC7180 SoC (IPA v4.2).  Even
though they're not used (yet), define them so they are ready to be
used for SoCs when they become supported.

There are two QUOTA statistics memory regions, one for the modem and
one for the AP.  Define distinct names for these regions, and get
rid of the definition of IPA_MEM_STATS_QUOTA.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_data-sc7180.c |  2 +-
 drivers/net/ipa/ipa_mem.h         | 10 +++++++++-
 drivers/net/ipa/ipa_qmi.c         |  2 +-
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ipa/ipa_data-sc7180.c b/drivers/net/ipa/ipa_data-sc7180.c
index 0bdb60f6755c..f65abc19ae9d 100644
--- a/drivers/net/ipa/ipa_data-sc7180.c
+++ b/drivers/net/ipa/ipa_data-sc7180.c
@@ -268,7 +268,7 @@ static const struct ipa_mem ipa_mem_local_data[] = {
 		.size		= 0x0050,
 		.canary_count	= 2,
 	},
-	[IPA_MEM_STATS_QUOTA] = {
+	[IPA_MEM_STATS_QUOTA_MODEM] = {
 		.offset		= 0x0a50,
 		.size		= 0x0060,
 		.canary_count	= 2,
diff --git a/drivers/net/ipa/ipa_mem.h b/drivers/net/ipa/ipa_mem.h
index f99180f84f0d..f82e8939622b 100644
--- a/drivers/net/ipa/ipa_mem.h
+++ b/drivers/net/ipa/ipa_mem.h
@@ -28,6 +28,7 @@ struct ipa_mem_data;
  * The set of memory regions is defined in configuration data.  They are
  * subject to these constraints:
  * - a zero offset and zero size represents and undefined region
+ * - a region's size does not include space for its "canary" values
  * - a region's offset is defined to be *past* all "canary" values
  * - offset must be large enough to account for all canaries
  * - a region's size may be zero, but may still have canaries
@@ -56,9 +57,16 @@ enum ipa_mem_id {
 	IPA_MEM_AP_HEADER,		/* 0 canaries */
 	IPA_MEM_MODEM_PROC_CTX,		/* 2 canaries */
 	IPA_MEM_AP_PROC_CTX,		/* 0 canaries */
+	IPA_MEM_NAT_TABLE,		/* 4 canaries (IPA v4.5 and above) */
 	IPA_MEM_PDN_CONFIG,		/* 2 canaries (IPA v4.0 and above) */
-	IPA_MEM_STATS_QUOTA,		/* 2 canaries (IPA v4.0 and above) */
+	IPA_MEM_STATS_QUOTA_MODEM,	/* 2 canaries (IPA v4.0 and above) */
+	IPA_MEM_STATS_QUOTA_AP,		/* 0 canaries (IPA v4.0 and above) */
 	IPA_MEM_STATS_TETHERING,	/* 0 canaries (IPA v4.0 and above) */
+	IPA_MEM_STATS_V4_FILTER,	/* 0 canaries (IPA v4.0-v4.2) */
+	IPA_MEM_STATS_V6_FILTER,	/* 0 canaries (IPA v4.0-v4.2) */
+	IPA_MEM_STATS_V4_ROUTE,		/* 0 canaries (IPA v4.0-v4.2) */
+	IPA_MEM_STATS_V6_ROUTE,		/* 0 canaries (IPA v4.0-v4.2) */
+	IPA_MEM_STATS_FILTER_ROUTE,	/* 0 canaries (IPA v4.5 and above) */
 	IPA_MEM_STATS_DROP,		/* 0 canaries (IPA v4.0 and above) */
 	IPA_MEM_MODEM,			/* 0 canaries */
 	IPA_MEM_UC_EVENT_RING,		/* 1 canary */
diff --git a/drivers/net/ipa/ipa_qmi.c b/drivers/net/ipa/ipa_qmi.c
index 2fc64483f275..af8666b89b37 100644
--- a/drivers/net/ipa/ipa_qmi.c
+++ b/drivers/net/ipa/ipa_qmi.c
@@ -378,7 +378,7 @@ init_modem_driver_req(struct ipa_qmi *ipa_qmi)
 	/* None of the stats fields are valid (IPA v4.0 and above) */
 
 	if (ipa->version != IPA_VERSION_3_5_1) {
-		mem = &ipa->mem[IPA_MEM_STATS_QUOTA];
+		mem = &ipa->mem[IPA_MEM_STATS_QUOTA_MODEM];
 		if (mem->size) {
 			req.hw_stats_quota_base_addr_valid = 1;
 			req.hw_stats_quota_base_addr =
-- 
cgit v1.2.3


From 37537fa8e97379afa68959094242b1601407e205 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 19 Mar 2021 10:24:22 -0500
Subject: net: ipa: define QSB limits in configuration data

Define the maximum number of reads and writes to configure for the
QSB masters used for IPA in configuration data.

We don't use these values yet; the next commit takes care of that.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_data-sc7180.c | 10 ++++++++++
 drivers/net/ipa/ipa_data-sdm845.c | 14 ++++++++++++++
 drivers/net/ipa/ipa_data.h        | 24 ++++++++++++++++++++++--
 3 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ipa/ipa_data-sc7180.c b/drivers/net/ipa/ipa_data-sc7180.c
index f65abc19ae9d..216f790b22b6 100644
--- a/drivers/net/ipa/ipa_data-sc7180.c
+++ b/drivers/net/ipa/ipa_data-sc7180.c
@@ -9,6 +9,14 @@
 #include "ipa_endpoint.h"
 #include "ipa_mem.h"
 
+/* QSB configuration for the SC7180 SoC. */
+static const struct ipa_qsb_data ipa_qsb_data[] = {
+	[IPA_QSB_MASTER_DDR] = {
+		.max_writes	= 8,
+		.max_reads	= 12,
+	},
+};
+
 /* Endpoint configuration for the SC7180 SoC. */
 static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 	[IPA_ENDPOINT_AP_COMMAND_TX] = {
@@ -328,6 +336,8 @@ static const struct ipa_clock_data ipa_clock_data = {
 /* Configuration data for the SC7180 SoC. */
 const struct ipa_data ipa_data_sc7180 = {
 	.version	= IPA_VERSION_4_2,
+	.qsb_count	= ARRAY_SIZE(ipa_qsb_data),
+	.qsb_data	= ipa_qsb_data,
 	.endpoint_count	= ARRAY_SIZE(ipa_gsi_endpoint_data),
 	.endpoint_data	= ipa_gsi_endpoint_data,
 	.resource_data	= &ipa_resource_data,
diff --git a/drivers/net/ipa/ipa_data-sdm845.c b/drivers/net/ipa/ipa_data-sdm845.c
index 8cae9325eb08..d9659fd22322 100644
--- a/drivers/net/ipa/ipa_data-sdm845.c
+++ b/drivers/net/ipa/ipa_data-sdm845.c
@@ -11,6 +11,18 @@
 #include "ipa_endpoint.h"
 #include "ipa_mem.h"
 
+/* QSB configuration for the SDM845 SoC. */
+static const struct ipa_qsb_data ipa_qsb_data[] = {
+	[IPA_QSB_MASTER_DDR] = {
+		.max_writes	= 8,
+		.max_reads	= 8,
+	},
+	[IPA_QSB_MASTER_PCIE] = {
+		.max_writes	= 4,
+		.max_reads	= 12,
+	},
+};
+
 /* Endpoint configuration for the SDM845 SoC. */
 static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 	[IPA_ENDPOINT_AP_COMMAND_TX] = {
@@ -353,6 +365,8 @@ static const struct ipa_clock_data ipa_clock_data = {
 /* Configuration data for the SDM845 SoC. */
 const struct ipa_data ipa_data_sdm845 = {
 	.version	= IPA_VERSION_3_5_1,
+	.qsb_count	= ARRAY_SIZE(ipa_qsb_data),
+	.qsb_data	= ipa_qsb_data,
 	.endpoint_count	= ARRAY_SIZE(ipa_gsi_endpoint_data),
 	.endpoint_data	= ipa_gsi_endpoint_data,
 	.resource_data	= &ipa_resource_data,
diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h
index b476fc373f7f..d50cd5ae7714 100644
--- a/drivers/net/ipa/ipa_data.h
+++ b/drivers/net/ipa/ipa_data.h
@@ -49,6 +49,22 @@
 #define IPA_RESOURCE_GROUP_SRC_MAX	5
 #define IPA_RESOURCE_GROUP_DST_MAX	5
 
+/** enum ipa_qsb_master_id - array index for IPA QSB configuration data */
+enum ipa_qsb_master_id {
+	IPA_QSB_MASTER_DDR,
+	IPA_QSB_MASTER_PCIE,
+};
+
+/**
+ * struct ipa_qsb_data - Qualcomm System Bus configuration data
+ * @max_writes:	Maximum outstanding write requests for this master
+ * @max_reads:	Maximum outstanding read requests for this master
+ */
+struct ipa_qsb_data {
+	u8 max_writes;
+	u8 max_reads;
+};
+
 /**
  * struct gsi_channel_data - GSI channel configuration data
  * @tre_count:		number of TREs in the channel ring
@@ -285,14 +301,18 @@ struct ipa_clock_data {
 /**
  * struct ipa_data - combined IPA/GSI configuration data
  * @version:		IPA hardware version
- * @endpoint_count:	number of entries in endpoint_data array
+ * @qsb_count:		number of entries in the qsb_data array
+ * @qsb_data:		Qualcomm System Bus configuration data
+ * @endpoint_count:	number of entries in the endpoint_data array
  * @endpoint_data:	IPA endpoint/GSI channel data
  * @resource_data:	IPA resource configuration data
- * @mem_count:		number of entries in mem_data array
+ * @mem_count:		number of entries in the mem_data array
  * @mem_data:		IPA-local shared memory region data
  */
 struct ipa_data {
 	enum ipa_version version;
+	u32 qsb_count;		/* # entries in qsb_data[] */
+	const struct ipa_qsb_data *qsb_data;
 	u32 endpoint_count;	/* # entries in endpoint_data[] */
 	const struct ipa_gsi_endpoint_data *endpoint_data;
 	const struct ipa_resource_data *resource_data;
-- 
cgit v1.2.3


From 919067cc845f323a80b6fe987b64238bd82d309e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 19 Mar 2021 10:39:33 -0700
Subject: net: add CONFIG_PCPU_DEV_REFCNT

I was working on a syzbot issue, claiming one device could not be
dismantled because its refcount was -1

unregister_netdevice: waiting for sit0 to become free. Usage count = -1

It would be nice if syzbot could trigger a warning at the time
this reference count became negative.

This patch adds CONFIG_PCPU_DEV_REFCNT options which defaults
to per cpu variables (as before this patch) on SMP builds.

v2: free_dev label in alloc_netdev_mqs() is moved to avoid
    a compiler warning (-Wunused-label), as reported
    by kernel test robot <lkp@intel.com>

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 13 +++++++++++++
 net/Kconfig               |  8 ++++++++
 net/core/dev.c            | 10 ++++++++++
 3 files changed, 31 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4940509999be..8f003955c485 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2092,7 +2092,12 @@ struct net_device {
 	u32                     proto_down_reason;
 
 	struct list_head	todo_list;
+
+#ifdef CONFIG_PCPU_DEV_REFCNT
 	int __percpu		*pcpu_refcnt;
+#else
+	refcount_t		dev_refcnt;
+#endif
 
 	struct list_head	link_watch_list;
 
@@ -4044,7 +4049,11 @@ void netdev_run_todo(void);
  */
 static inline void dev_put(struct net_device *dev)
 {
+#ifdef CONFIG_PCPU_DEV_REFCNT
 	this_cpu_dec(*dev->pcpu_refcnt);
+#else
+	refcount_dec(&dev->dev_refcnt);
+#endif
 }
 
 /**
@@ -4055,7 +4064,11 @@ static inline void dev_put(struct net_device *dev)
  */
 static inline void dev_hold(struct net_device *dev)
 {
+#ifdef CONFIG_PCPU_DEV_REFCNT
 	this_cpu_inc(*dev->pcpu_refcnt);
+#else
+	refcount_inc(&dev->dev_refcnt);
+#endif
 }
 
 /* Carrier loss detection, dial on demand. The functions netif_carrier_on
diff --git a/net/Kconfig b/net/Kconfig
index 0ead7ec0d2bd..9c456acc379e 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -245,6 +245,14 @@ source "net/l3mdev/Kconfig"
 source "net/qrtr/Kconfig"
 source "net/ncsi/Kconfig"
 
+config PCPU_DEV_REFCNT
+	bool "Use percpu variables to maintain network device refcount"
+	depends on SMP
+	default y
+	help
+	  network device refcount are using per cpu variables if this option is set.
+	  This can be forced to N to detect underflows (with a performance drop).
+
 config RPS
 	bool
 	depends on SMP && SYSFS
diff --git a/net/core/dev.c b/net/core/dev.c
index 4961fc2e9b19..be941ed754ac 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10312,11 +10312,15 @@ EXPORT_SYMBOL(register_netdev);
 
 int netdev_refcnt_read(const struct net_device *dev)
 {
+#ifdef CONFIG_PCPU_DEV_REFCNT
 	int i, refcnt = 0;
 
 	for_each_possible_cpu(i)
 		refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
 	return refcnt;
+#else
+	return refcount_read(&dev->dev_refcnt);
+#endif
 }
 EXPORT_SYMBOL(netdev_refcnt_read);
 
@@ -10674,9 +10678,11 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
 	dev->padded = (char *)dev - (char *)p;
 
+#ifdef CONFIG_PCPU_DEV_REFCNT
 	dev->pcpu_refcnt = alloc_percpu(int);
 	if (!dev->pcpu_refcnt)
 		goto free_dev;
+#endif
 
 	if (dev_addr_init(dev))
 		goto free_pcpu;
@@ -10740,8 +10746,10 @@ free_all:
 	return NULL;
 
 free_pcpu:
+#ifdef CONFIG_PCPU_DEV_REFCNT
 	free_percpu(dev->pcpu_refcnt);
 free_dev:
+#endif
 	netdev_freemem(dev);
 	return NULL;
 }
@@ -10783,8 +10791,10 @@ void free_netdev(struct net_device *dev)
 	list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
 		netif_napi_del(p);
 
+#ifdef CONFIG_PCPU_DEV_REFCNT
 	free_percpu(dev->pcpu_refcnt);
 	dev->pcpu_refcnt = NULL;
+#endif
 	free_percpu(dev->xdp_bulkq);
 	dev->xdp_bulkq = NULL;
 
-- 
cgit v1.2.3


From 1fa81e259b49479fc54658a6e6c3ff629e35777a Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Sun, 17 Jan 2021 10:16:45 +0200
Subject: igc: Remove unused MII_CR_SPEED

Force PHY speed not supported for i225 devices.
MII_CR_SPEED masks not in use in i225 device and can be removed.

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_defines.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index ca460bf1af2f..59043c3c5657 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -442,9 +442,6 @@
 #define MII_CR_POWER_DOWN	0x0800  /* Power down */
 #define MII_CR_AUTO_NEG_EN	0x1000  /* Auto Neg Enable */
 #define MII_CR_LOOPBACK		0x4000  /* 0 = normal, 1 = loopback */
-#define MII_CR_SPEED_1000	0x0040
-#define MII_CR_SPEED_100	0x2000
-#define MII_CR_SPEED_10		0x0000
 
 /* PHY Status Register */
 #define MII_SR_LINK_STATUS	0x0004 /* Link Status 1 = link */
-- 
cgit v1.2.3


From a5d86bd969ea8501cb2e551d3ec8321867bce126 Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Sun, 14 Feb 2021 09:17:49 +0200
Subject: igc: Remove unused MII_CR_LOOPBACK

MII_CR_LOOPBACK masks not in use in i225 device and can be removed.

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_defines.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index 59043c3c5657..35ed997af075 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -441,7 +441,6 @@
 #define MII_CR_RESTART_AUTO_NEG	0x0200  /* Restart auto negotiation */
 #define MII_CR_POWER_DOWN	0x0800  /* Power down */
 #define MII_CR_AUTO_NEG_EN	0x1000  /* Auto Neg Enable */
-#define MII_CR_LOOPBACK		0x4000  /* 0 = normal, 1 = loopback */
 
 /* PHY Status Register */
 #define MII_SR_LINK_STATUS	0x0004 /* Link Status 1 = link */
-- 
cgit v1.2.3


From 501f23092ddb2e071a3464db6744e92c22535c6e Mon Sep 17 00:00:00 2001
From: Jiapeng Zhong <abaci-bugfix@linux.alibaba.com>
Date: Wed, 20 Jan 2021 14:22:58 +0800
Subject: igc: Assign boolean values to a bool variable

Fix the following coccicheck warnings:

./drivers/net/ethernet/intel/igc/igc_main.c:4961:2-14: WARNING:
Assignment of 0/1 to bool variable.

./drivers/net/ethernet/intel/igc/igc_main.c:4955:2-14: WARNING:
Assignment of 0/1 to bool variable.

./drivers/net/ethernet/intel/igc/igc_main.c:4933:1-13: WARNING:
Assignment of 0/1 to bool variable.

./drivers/net/ethernet/intel/igc/igc_main.c:4592:1-24: WARNING:
Assignment of 0/1 to bool variable.

./drivers/net/ethernet/intel/igc/igc_main.c:4438:2-25: WARNING:
Assignment of 0/1 to bool variable.

./drivers/net/ethernet/intel/igc/igc_main.c:4396:2-25: WARNING:
Assignment of 0/1 to bool variable.

./drivers/net/ethernet/intel/igc/igc_main.c:4018:2-25: WARNING:
Assignment of 0/1 to bool variable.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Zhong <abaci-bugfix@linux.alibaba.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 059ffcfb0bda..a476837eafca 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -3580,7 +3580,7 @@ void igc_up(struct igc_adapter *adapter)
 	netif_tx_start_all_queues(adapter->netdev);
 
 	/* start the watchdog. */
-	hw->mac.get_link_status = 1;
+	hw->mac.get_link_status = true;
 	schedule_work(&adapter->watchdog_task);
 }
 
@@ -4000,7 +4000,7 @@ static irqreturn_t igc_msix_other(int irq, void *data)
 	}
 
 	if (icr & IGC_ICR_LSC) {
-		hw->mac.get_link_status = 1;
+		hw->mac.get_link_status = true;
 		/* guard against interrupt when we're going down */
 		if (!test_bit(__IGC_DOWN, &adapter->state))
 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
@@ -4378,7 +4378,7 @@ static irqreturn_t igc_intr_msi(int irq, void *data)
 	}
 
 	if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
-		hw->mac.get_link_status = 1;
+		hw->mac.get_link_status = true;
 		if (!test_bit(__IGC_DOWN, &adapter->state))
 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
 	}
@@ -4420,7 +4420,7 @@ static irqreturn_t igc_intr(int irq, void *data)
 	}
 
 	if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
-		hw->mac.get_link_status = 1;
+		hw->mac.get_link_status = true;
 		/* guard against interrupt when we're going down */
 		if (!test_bit(__IGC_DOWN, &adapter->state))
 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
@@ -4574,7 +4574,7 @@ static int __igc_open(struct net_device *netdev, bool resuming)
 	netif_tx_start_all_queues(netdev);
 
 	/* start the watchdog. */
-	hw->mac.get_link_status = 1;
+	hw->mac.get_link_status = true;
 	schedule_work(&adapter->watchdog_task);
 
 	return IGC_SUCCESS;
@@ -4915,7 +4915,7 @@ int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx)
 {
 	struct igc_mac_info *mac = &adapter->hw.mac;
 
-	mac->autoneg = 0;
+	mac->autoneg = false;
 
 	/* Make sure dplx is at most 1 bit and lsb of speed is not set
 	 * for the switch() below to work
@@ -4937,13 +4937,13 @@ int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx)
 		mac->forced_speed_duplex = ADVERTISE_100_FULL;
 		break;
 	case SPEED_1000 + DUPLEX_FULL:
-		mac->autoneg = 1;
+		mac->autoneg = true;
 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
 		break;
 	case SPEED_1000 + DUPLEX_HALF: /* not supported */
 		goto err_inval;
 	case SPEED_2500 + DUPLEX_FULL:
-		mac->autoneg = 1;
+		mac->autoneg = true;
 		adapter->hw.phy.autoneg_advertised = ADVERTISE_2500_FULL;
 		break;
 	case SPEED_2500 + DUPLEX_HALF: /* not supported */
-- 
cgit v1.2.3


From f2d75b178532087c15b2a86ec79e443177611b7a Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 17 Mar 2021 14:52:34 +0000
Subject: e1000e: Mark e1000e_pm_prepare() as __maybe_unused

The function e1000e_pm_prepare() may have no callers depending
on configuration, so it must be marked __maybe_unused to avoid
harmless warning:

drivers/net/ethernet/intel/e1000e/netdev.c:6926:12:
 warning: 'e1000e_pm_prepare' defined but not used [-Wunused-function]
 6926 | static int e1000e_pm_prepare(struct device *dev)
      |            ^~~~~~~~~~~~~~~~~

Fixes: ccf8b940e5fd ("e1000e: Leverage direct_complete to speed up s2ram")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 480f6712a3b6..39ca02b7fdc5 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -6919,7 +6919,7 @@ static int __e1000_resume(struct pci_dev *pdev)
 	return 0;
 }
 
-static int e1000e_pm_prepare(struct device *dev)
+static __maybe_unused int e1000e_pm_prepare(struct device *dev)
 {
 	return pm_runtime_suspended(dev) &&
 		pm_suspend_via_firmware();
-- 
cgit v1.2.3


From ea24b19562fe5f72c78319dbb347b701818956d9 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@kernel.org>
Date: Fri, 19 Mar 2021 19:21:17 +0000
Subject: libbpf: Add explicit padding to btf_dump_emit_type_decl_opts

Similar to
https://lore.kernel.org/bpf/20210313210920.1959628-2-andrii@kernel.org/

When DECLARE_LIBBPF_OPTS is used with inline field initialization, e.g:

  DECLARE_LIBBPF_OPTS(btf_dump_emit_type_decl_opts, opts,
    .field_name = var_ident,
    .indent_level = 2,
    .strip_mods = strip_mods,
  );

and compiled in debug mode, the compiler generates code which
leaves the padding uninitialized and triggers errors within libbpf APIs
which require strict zero initialization of OPTS structs.

Adding anonymous padding field fixes the issue.

Fixes: 9f81654eebe8 ("libbpf: Expose BTF-to-C type declaration emitting API")
Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: KP Singh <kpsingh@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210319192117.2310658-1-kpsingh@kernel.org
---
 tools/lib/bpf/btf.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 3b0b17ba94a1..b54f1c3ebd57 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -176,6 +176,7 @@ struct btf_dump_emit_type_decl_opts {
 	int indent_level;
 	/* strip all the const/volatile/restrict mods */
 	bool strip_mods;
+	size_t :0;
 };
 #define btf_dump_emit_type_decl_opts__last_field strip_mods
 
-- 
cgit v1.2.3


From 9ef05281e5d01dffdf75a4ae5c1aee7cbd7aaa73 Mon Sep 17 00:00:00 2001
From: Jianlin Lv <Jianlin.Lv@arm.com>
Date: Thu, 18 Mar 2021 10:48:51 +0800
Subject: bpf: Remove insn_buf[] declaration in inner block

Two insn_buf[16] variables are declared in the function which acts on
function scope and block scope respectively. The statement in the inner
block is redundant, so remove it.

Signed-off-by: Jianlin Lv <Jianlin.Lv@arm.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210318024851.49693-1-Jianlin.Lv@arm.com
---
 kernel/bpf/verifier.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f9096b049cd6..e26c5170c953 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -11899,7 +11899,6 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
 		    insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
 			const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
 			const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
-			struct bpf_insn insn_buf[16];
 			struct bpf_insn *patch = &insn_buf[0];
 			bool issrc, isneg;
 			u32 off_reg;
-- 
cgit v1.2.3


From b29648ad5b2ade03edd3f4364b5616b07d74abe4 Mon Sep 17 00:00:00 2001
From: Sai Kalyaan Palla <saikalyaan63@gmail.com>
Date: Sat, 20 Mar 2021 11:45:12 +0530
Subject: net: decnet: Fixed multiple coding style issues

Made changes to coding style as suggested by checkpatch.pl
changes are of the type:
	open brace '{' following struct go on the same line
	do not use assignment in if condition

Signed-off-by: Sai Kalyaan Palla <saikalyaan63@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/dn_route.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 37c90c1fdc93..2f1497797861 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -84,8 +84,7 @@
 #include <net/dn_neigh.h>
 #include <net/dn_fib.h>
 
-struct dn_rt_hash_bucket
-{
+struct dn_rt_hash_bucket {
 	struct dn_route __rcu *chain;
 	spinlock_t lock;
 };
@@ -359,7 +358,8 @@ static void dn_run_flush(struct timer_list *unused)
 	for (i = 0; i < dn_rt_hash_mask; i++) {
 		spin_lock_bh(&dn_rt_hash_table[i].lock);
 
-		if ((rt = xchg((struct dn_route **)&dn_rt_hash_table[i].chain, NULL)) == NULL)
+		rt = xchg((struct dn_route **)&dn_rt_hash_table[i].chain, NULL);
+		if (!rt)
 			goto nothing_to_declare;
 
 		for(; rt; rt = next) {
@@ -425,7 +425,8 @@ static int dn_return_short(struct sk_buff *skb)
 	/* Add back headers */
 	skb_push(skb, skb->data - skb_network_header(skb));
 
-	if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL)
+	skb = skb_unshare(skb, GFP_ATOMIC);
+	if (!skb)
 		return NET_RX_DROP;
 
 	cb = DN_SKB_CB(skb);
@@ -461,7 +462,8 @@ static int dn_return_long(struct sk_buff *skb)
 	/* Add back all headers */
 	skb_push(skb, skb->data - skb_network_header(skb));
 
-	if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL)
+	skb = skb_unshare(skb, GFP_ATOMIC);
+	if (!skb)
 		return NET_RX_DROP;
 
 	cb = DN_SKB_CB(skb);
@@ -505,7 +507,8 @@ static int dn_route_rx_packet(struct net *net, struct sock *sk, struct sk_buff *
 	struct dn_skb_cb *cb;
 	int err;
 
-	if ((err = dn_route_input(skb)) == 0)
+	err = dn_route_input(skb);
+	if (err == 0)
 		return dst_input(skb);
 
 	cb = DN_SKB_CB(skb);
@@ -629,7 +632,8 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type
 	if (dn == NULL)
 		goto dump_it;
 
-	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (!skb)
 		goto out;
 
 	if (!pskb_may_pull(skb, 3))
@@ -1324,7 +1328,8 @@ static int dn_route_input_slow(struct sk_buff *skb)
 
 	dev_hold(in_dev);
 
-	if ((dn_db = rcu_dereference(in_dev->dn_ptr)) == NULL)
+	dn_db = rcu_dereference(in_dev->dn_ptr);
+	if (!dn_db)
 		goto out;
 
 	/* Zero source addresses are not allowed */
-- 
cgit v1.2.3


From 8a81efac9417b7e4b2d3e19442f22dcf727944d5 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Sat, 20 Mar 2021 10:57:03 -0500
Subject: net: ipa: use configuration data for QSB settings

Use the QSB configuration data in ipa_hardware_config_qsb(), rather
than determining in code what values to use based on IPA version.
Pass configuration data to ipa_hardware_config() so it can be passed
to ipa_hardware_config_qsb().

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_main.c | 73 ++++++++++++++++++----------------------------
 1 file changed, 28 insertions(+), 45 deletions(-)

diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index d354e3e65ec5..1ce593b46b04 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -249,53 +249,35 @@ static void ipa_hardware_config_comp(struct ipa *ipa)
 	iowrite32(val, ipa->reg_virt + IPA_REG_COMP_CFG_OFFSET);
 }
 
-/* Configure DDR and PCIe max read/write QSB values */
-static void ipa_hardware_config_qsb(struct ipa *ipa)
+/* Configure DDR and (possibly) PCIe max read/write QSB values */
+static void
+ipa_hardware_config_qsb(struct ipa *ipa, const struct ipa_data *data)
 {
-	enum ipa_version version = ipa->version;
-	u32 max0;
-	u32 max1;
+	const struct ipa_qsb_data *data0;
+	const struct ipa_qsb_data *data1;
 	u32 val;
 
-	/* QMB_0 represents DDR; QMB_1 represents PCIe */
-	val = u32_encode_bits(8, GEN_QMB_0_MAX_WRITES_FMASK);
-	switch (version) {
-	case IPA_VERSION_4_2:
-		max1 = 0;		/* PCIe not present */
-		break;
-	case IPA_VERSION_4_5:
-		max1 = 8;
-		break;
-	default:
-		max1 = 4;
-		break;
-	}
-	val |= u32_encode_bits(max1, GEN_QMB_1_MAX_WRITES_FMASK);
+	/* assert(data->qsb_count > 0); */
+	/* assert(data->qsb_count < 3); */
+
+	/* QMB 0 represents DDR; QMB 1 (if present) represents PCIe */
+	data0 = &data->qsb_data[IPA_QSB_MASTER_DDR];
+	if (data->qsb_count > 1)
+		data1 = &data->qsb_data[IPA_QSB_MASTER_PCIE];
+
+	/* Max outstanding write accesses for QSB masters */
+	val = u32_encode_bits(data0->max_writes, GEN_QMB_0_MAX_WRITES_FMASK);
+	if (data->qsb_count > 1)
+		val |= u32_encode_bits(data1->max_writes,
+				       GEN_QMB_1_MAX_WRITES_FMASK);
 	iowrite32(val, ipa->reg_virt + IPA_REG_QSB_MAX_WRITES_OFFSET);
 
-	max1 = 12;
-	switch (version) {
-	case IPA_VERSION_3_5_1:
-		max0 = 8;
-		break;
-	case IPA_VERSION_4_0:
-	case IPA_VERSION_4_1:
-		max0 = 12;
-		break;
-	case IPA_VERSION_4_2:
-		max0 = 12;
-		max1 = 0;		/* PCIe not present */
-		break;
-	case IPA_VERSION_4_5:
-		max0 = 0;		/* No limit (hardware maximum) */
-		break;
-	}
-	val = u32_encode_bits(max0, GEN_QMB_0_MAX_READS_FMASK);
-	val |= u32_encode_bits(max1, GEN_QMB_1_MAX_READS_FMASK);
-	if (version != IPA_VERSION_3_5_1) {
-		/* GEN_QMB_0_MAX_READS_BEATS is 0 */
-		/* GEN_QMB_1_MAX_READS_BEATS is 0 */
-	}
+	/* Max outstanding read accesses for QSB masters */
+	val = u32_encode_bits(data0->max_reads, GEN_QMB_0_MAX_READS_FMASK);
+	/* GEN_QMB_0_MAX_READS_BEATS is 0 (IPA v4.0 and above) */
+	if (data->qsb_count > 1)
+		val |= u32_encode_bits(data1->max_reads,
+				       GEN_QMB_1_MAX_READS_FMASK);
 	iowrite32(val, ipa->reg_virt + IPA_REG_QSB_MAX_READS_OFFSET);
 }
 
@@ -385,8 +367,9 @@ static void ipa_hardware_dcd_deconfig(struct ipa *ipa)
 /**
  * ipa_hardware_config() - Primitive hardware initialization
  * @ipa:	IPA pointer
+ * @data:	IPA configuration data
  */
-static void ipa_hardware_config(struct ipa *ipa)
+static void ipa_hardware_config(struct ipa *ipa, const struct ipa_data *data)
 {
 	enum ipa_version version = ipa->version;
 	u32 granularity;
@@ -414,7 +397,7 @@ static void ipa_hardware_config(struct ipa *ipa)
 	ipa_hardware_config_comp(ipa);
 
 	/* Configure system bus limits */
-	ipa_hardware_config_qsb(ipa);
+	ipa_hardware_config_qsb(ipa, data);
 
 	if (version < IPA_VERSION_4_5) {
 		/* Configure aggregation timer granularity */
@@ -610,7 +593,7 @@ static int ipa_config(struct ipa *ipa, const struct ipa_data *data)
 	 */
 	ipa_clock_get(ipa);
 
-	ipa_hardware_config(ipa);
+	ipa_hardware_config(ipa, data);
 
 	ret = ipa_endpoint_config(ipa);
 	if (ret)
-- 
cgit v1.2.3


From b9aa0805ed31ed95c720f1a0bb606de2988b3ef5 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Sat, 20 Mar 2021 10:57:04 -0500
Subject: net: ipa: implement MAX_READS_BEATS QSB data

Starting with IPA v4.0, a limit is placed on the number of bytes
outstanding in a transaction, to reduce latency.  The limit is
imposed only if this value is non-zero.

We don't use a non-zero value for SC7180, but newer versions of IPA
do.  Prepare for that by allowing a programmed value to be specified
in the platform configuration data.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_data-sc7180.c |  1 +
 drivers/net/ipa/ipa_data.h        |  2 ++
 drivers/net/ipa/ipa_main.c        | 10 ++++++++--
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ipa/ipa_data-sc7180.c b/drivers/net/ipa/ipa_data-sc7180.c
index 216f790b22b6..8fa10d0d9a4e 100644
--- a/drivers/net/ipa/ipa_data-sc7180.c
+++ b/drivers/net/ipa/ipa_data-sc7180.c
@@ -14,6 +14,7 @@ static const struct ipa_qsb_data ipa_qsb_data[] = {
 	[IPA_QSB_MASTER_DDR] = {
 		.max_writes	= 8,
 		.max_reads	= 12,
+		/* no outstanding read byte (beat) limit */
 	},
 };
 
diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h
index d50cd5ae7714..4162c4722c00 100644
--- a/drivers/net/ipa/ipa_data.h
+++ b/drivers/net/ipa/ipa_data.h
@@ -59,10 +59,12 @@ enum ipa_qsb_master_id {
  * struct ipa_qsb_data - Qualcomm System Bus configuration data
  * @max_writes:	Maximum outstanding write requests for this master
  * @max_reads:	Maximum outstanding read requests for this master
+ * @max_reads_beats: Max outstanding read bytes in 8-byte "beats" (if non-zero)
  */
 struct ipa_qsb_data {
 	u8 max_writes;
 	u8 max_reads;
+	u8 max_reads_beats;		/* Not present for IPA v3.5.1 */
 };
 
 /**
diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index 1ce593b46b04..64b92dfdd3f5 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -274,10 +274,16 @@ ipa_hardware_config_qsb(struct ipa *ipa, const struct ipa_data *data)
 
 	/* Max outstanding read accesses for QSB masters */
 	val = u32_encode_bits(data0->max_reads, GEN_QMB_0_MAX_READS_FMASK);
-	/* GEN_QMB_0_MAX_READS_BEATS is 0 (IPA v4.0 and above) */
-	if (data->qsb_count > 1)
+	if (ipa->version >= IPA_VERSION_4_0)
+		val |= u32_encode_bits(data0->max_reads_beats,
+				       GEN_QMB_0_MAX_READS_BEATS_FMASK);
+	if (data->qsb_count > 1) {
 		val |= u32_encode_bits(data1->max_reads,
 				       GEN_QMB_1_MAX_READS_FMASK);
+		if (ipa->version >= IPA_VERSION_4_0)
+			val |= u32_encode_bits(data1->max_reads_beats,
+					       GEN_QMB_1_MAX_READS_BEATS_FMASK);
+	}
 	iowrite32(val, ipa->reg_virt + IPA_REG_QSB_MAX_READS_OFFSET);
 }
 
-- 
cgit v1.2.3


From 8ee5df6598ff3c04f3842c87fa326d7cdbec9dd2 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Sat, 20 Mar 2021 10:57:05 -0500
Subject: net: ipa: split sequencer type in two

An IPA endpoint has a sequencer that must be configured based on how
the endpoint is to be used.  Currently the IPA code programs the
sequencer type by splitting a value into four 4-bit nibbles.  Doing
that doesn't really add much value, and regardless, a better way of
splitting the sequencer type is into two halves--the lower byte
describing how normal packet processing is handled, and the next
byte describing information about processing replicas.

So split the sequencer type into two sub-parts:  the sequencer type
and the replication sequencer type.  Define the values supported for
the "main" sequencer type, and define the values supported for the
replication part separately.

In addition, the sequencer type names are quite verbose, encoding
what the type includes, but also what it *excludes*.  Rename the
sequencer types in a way that mainly describes the number of passes
that a packet takes through the IPA processing pipeline, and how
many of those passes end by supplying the processed packet to the
microprocessor.

The result expands the supported types beyond what is required for
now, but simplifies the way these are defined.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_data-sc7180.c |  8 ++++---
 drivers/net/ipa/ipa_data-sdm845.c |  7 +++---
 drivers/net/ipa/ipa_data.h        |  6 +++--
 drivers/net/ipa/ipa_endpoint.c    | 13 +++++------
 drivers/net/ipa/ipa_endpoint.h    |  1 +
 drivers/net/ipa/ipa_reg.h         | 48 +++++++++++++++++++++++++--------------
 6 files changed, 51 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ipa/ipa_data-sc7180.c b/drivers/net/ipa/ipa_data-sc7180.c
index 8fa10d0d9a4e..fd2265d032cc 100644
--- a/drivers/net/ipa/ipa_data-sc7180.c
+++ b/drivers/net/ipa/ipa_data-sc7180.c
@@ -31,7 +31,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 			.tlv_count	= 20,
 		},
 		.endpoint = {
-			.seq_type	= IPA_SEQ_DMA_ONLY,
+			.seq_type	= IPA_SEQ_DMA,
 			.config = {
 				.resource_group	= 0,
 				.dma_mode	= true,
@@ -51,6 +51,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 		},
 		.endpoint = {
 			.seq_type	= IPA_SEQ_INVALID,
+			.seq_rep_type	= IPA_SEQ_REP_INVALID,
 			.config = {
 				.resource_group	= 0,
 				.aggregation	= true,
@@ -73,8 +74,8 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 		},
 		.endpoint = {
 			.filter_support	= true,
-			.seq_type	=
-				IPA_SEQ_PKT_PROCESS_NO_DEC_NO_UCP_DMAP,
+			.seq_type	= IPA_SEQ_1_PASS_SKIP_LAST_UC,
+			.seq_rep_type	= IPA_SEQ_REP_DMA_PARSER,
 			.config = {
 				.resource_group	= 0,
 				.checksum	= true,
@@ -99,6 +100,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 		},
 		.endpoint = {
 			.seq_type	= IPA_SEQ_INVALID,
+			.seq_rep_type	= IPA_SEQ_REP_INVALID,
 			.config = {
 				.resource_group	= 0,
 				.checksum	= true,
diff --git a/drivers/net/ipa/ipa_data-sdm845.c b/drivers/net/ipa/ipa_data-sdm845.c
index d9659fd22322..7f7625cd96b0 100644
--- a/drivers/net/ipa/ipa_data-sdm845.c
+++ b/drivers/net/ipa/ipa_data-sdm845.c
@@ -36,7 +36,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 			.tlv_count	= 20,
 		},
 		.endpoint = {
-			.seq_type	= IPA_SEQ_DMA_ONLY,
+			.seq_type	= IPA_SEQ_DMA,
 			.config = {
 				.resource_group	= 1,
 				.dma_mode	= true,
@@ -56,6 +56,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 		},
 		.endpoint = {
 			.seq_type	= IPA_SEQ_INVALID,
+			.seq_rep_type	= IPA_SEQ_REP_INVALID,
 			.config = {
 				.resource_group	= 1,
 				.aggregation	= true,
@@ -78,8 +79,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 		},
 		.endpoint = {
 			.filter_support	= true,
-			.seq_type	=
-				IPA_SEQ_2ND_PKT_PROCESS_PASS_NO_DEC_UCP,
+			.seq_type	= IPA_SEQ_2_PASS_SKIP_LAST_UC,
 			.config = {
 				.resource_group	= 1,
 				.checksum	= true,
@@ -104,6 +104,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 		},
 		.endpoint = {
 			.seq_type	= IPA_SEQ_INVALID,
+			.seq_rep_type	= IPA_SEQ_REP_INVALID,
 			.config = {
 				.resource_group	= 1,
 				.checksum	= true,
diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h
index 4162c4722c00..8808941f44af 100644
--- a/drivers/net/ipa/ipa_data.h
+++ b/drivers/net/ipa/ipa_data.h
@@ -154,7 +154,8 @@ struct ipa_endpoint_config_data {
 /**
  * struct ipa_endpoint_data - IPA endpoint configuration data
  * @filter_support:	whether endpoint supports filtering
- * @seq_type:		hardware sequencer type used for endpoint
+ * @seq_type:		primary packet processing sequencer type
+ * @seq_rep_type:	sequencer type for replication processing
  * @config:		hardware configuration (see above)
  *
  * Not all endpoints support the IPA filtering capability.  A filter table
@@ -170,8 +171,9 @@ struct ipa_endpoint_config_data {
  */
 struct ipa_endpoint_data {
 	bool filter_support;
-	/* The next two are specified only for AP endpoints */
+	/* The next three are specified only for AP endpoints */
 	enum ipa_seq_type seq_type;
+	enum ipa_seq_rep_type seq_rep_type;
 	struct ipa_endpoint_config_data config;
 };
 
diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index 7209ee3c3124..aab66bc4f256 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -884,18 +884,16 @@ static void ipa_endpoint_init_rsrc_grp(struct ipa_endpoint *endpoint)
 static void ipa_endpoint_init_seq(struct ipa_endpoint *endpoint)
 {
 	u32 offset = IPA_REG_ENDP_INIT_SEQ_N_OFFSET(endpoint->endpoint_id);
-	u32 seq_type = endpoint->seq_type;
 	u32 val = 0;
 
 	if (!endpoint->toward_ipa)
 		return;		/* Register not valid for RX endpoints */
 
-	/* Sequencer type is made up of four nibbles */
-	val |= u32_encode_bits(seq_type & 0xf, HPS_SEQ_TYPE_FMASK);
-	val |= u32_encode_bits((seq_type >> 4) & 0xf, DPS_SEQ_TYPE_FMASK);
-	/* The second two apply to replicated packets */
-	val |= u32_encode_bits((seq_type >> 8) & 0xf, HPS_REP_SEQ_TYPE_FMASK);
-	val |= u32_encode_bits((seq_type >> 12) & 0xf, DPS_REP_SEQ_TYPE_FMASK);
+	/* Low-order byte configures primary packet processing */
+	val |= u32_encode_bits(endpoint->seq_type, SEQ_TYPE_FMASK);
+
+	/* Second byte configures replicated packet processing */
+	val |= u32_encode_bits(endpoint->seq_rep_type, SEQ_REP_TYPE_FMASK);
 
 	iowrite32(val, endpoint->ipa->reg_virt + offset);
 }
@@ -1767,6 +1765,7 @@ static void ipa_endpoint_init_one(struct ipa *ipa, enum ipa_endpoint_name name,
 	endpoint->ipa = ipa;
 	endpoint->ee_id = data->ee_id;
 	endpoint->seq_type = data->endpoint.seq_type;
+	endpoint->seq_rep_type = data->endpoint.seq_rep_type;
 	endpoint->channel_id = data->channel_id;
 	endpoint->endpoint_id = data->endpoint_id;
 	endpoint->toward_ipa = data->toward_ipa;
diff --git a/drivers/net/ipa/ipa_endpoint.h b/drivers/net/ipa/ipa_endpoint.h
index 881ecc27bd6e..c48f5324f83c 100644
--- a/drivers/net/ipa/ipa_endpoint.h
+++ b/drivers/net/ipa/ipa_endpoint.h
@@ -47,6 +47,7 @@ enum ipa_endpoint_name {
 struct ipa_endpoint {
 	struct ipa *ipa;
 	enum ipa_seq_type seq_type;
+	enum ipa_seq_rep_type seq_rep_type;
 	enum gsi_ee_id ee_id;
 	u32 channel_id;
 	u32 endpoint_id;
diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h
index 732e691e9aa6..a7ea11a5d225 100644
--- a/drivers/net/ipa/ipa_reg.h
+++ b/drivers/net/ipa/ipa_reg.h
@@ -580,28 +580,42 @@ static inline u32 rsrc_grp_encoded(enum ipa_version version, u32 rsrc_grp)
 /* Valid only for TX (IPA consumer) endpoints */
 #define IPA_REG_ENDP_INIT_SEQ_N_OFFSET(txep) \
 					(0x0000083c + 0x0070 * (txep))
-#define HPS_SEQ_TYPE_FMASK			GENMASK(3, 0)
-#define DPS_SEQ_TYPE_FMASK			GENMASK(7, 4)
-#define HPS_REP_SEQ_TYPE_FMASK			GENMASK(11, 8)
-#define DPS_REP_SEQ_TYPE_FMASK			GENMASK(15, 12)
+#define SEQ_TYPE_FMASK				GENMASK(7, 0)
+#define SEQ_REP_TYPE_FMASK			GENMASK(15, 8)
 
 /**
- * enum ipa_seq_type - HPS and DPS sequencer type fields in ENDP_INIT_SEQ_N
- * @IPA_SEQ_DMA_ONLY:		only DMA is performed
- * @IPA_SEQ_2ND_PKT_PROCESS_PASS_NO_DEC_UCP:
- *	second packet processing pass + no decipher + microcontroller
- * @IPA_SEQ_PKT_PROCESS_NO_DEC_NO_UCP_DMAP:
- *	packet processing + no decipher + no uCP + HPS REP DMA parser
- * @IPA_SEQ_INVALID:		invalid sequencer type
+ * enum ipa_seq_type - HPS and DPS sequencer type
  *
- * The values defined here are broken into 4-bit nibbles that are written
- * into fields of the ENDP_INIT_SEQ registers.
+ * The low-order byte of the sequencer type register defines the number of
+ * passes a packet takes through the IPA pipeline.  The last pass through can
+ * optionally skip the microprocessor.  Deciphering is optional for all types;
+ * if enabled, an additional mask (two bits) is added to the type value.
+ *
+ * Note: not all combinations of ipa_seq_type and ipa_seq_rep_type are
+ * supported (or meaningful).
  */
+#define IPA_SEQ_DECIPHER			0x11
 enum ipa_seq_type {
-	IPA_SEQ_DMA_ONLY			= 0x0000,
-	IPA_SEQ_2ND_PKT_PROCESS_PASS_NO_DEC_UCP	= 0x0004,
-	IPA_SEQ_PKT_PROCESS_NO_DEC_NO_UCP_DMAP	= 0x0806,
-	IPA_SEQ_INVALID				= 0xffff,
+	IPA_SEQ_DMA				= 0x00,
+	IPA_SEQ_1_PASS				= 0x02,
+	IPA_SEQ_2_PASS_SKIP_LAST_UC		= 0x04,
+	IPA_SEQ_1_PASS_SKIP_LAST_UC		= 0x06,
+	IPA_SEQ_2_PASS				= 0x0a,
+	IPA_SEQ_3_PASS_SKIP_LAST_UC		= 0x0c,
+	IPA_SEQ_INVALID				= 0x0c,
+};
+
+/**
+ * enum ipa_seq_rep_type - replicated packet sequencer type
+ *
+ * This goes in the second byte of the endpoint sequencer type register.
+ *
+ * Note: not all combinations of ipa_seq_type and ipa_seq_rep_type are
+ * supported (or meaningful).
+ */
+enum ipa_seq_rep_type {
+	IPA_SEQ_REP_DMA_PARSER			= 0x08,
+	IPA_SEQ_REP_INVALID			= 0x0c,
 };
 
 #define IPA_REG_ENDP_STATUS_N_OFFSET(ep) \
-- 
cgit v1.2.3


From 1690d8a75d873bf3c45ddce319f4902956d84bb0 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Sat, 20 Mar 2021 10:57:06 -0500
Subject: net: ipa: sequencer type is for TX endpoints only

We only program the sequencer type for TX endpoints.  So move the
definition of the sequencer type fields into the TX-specific portion
of the endpoint configuration data.  There's no need to maintain
this in the IPA structure; we can extract it from the configuration
data it points to in the one spot it's needed.

We previously specified the sequencer type for RX endpoints with
INVALID values.  These are no longer needed, so get rid of them.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_data-sc7180.c | 12 +++++-------
 drivers/net/ipa/ipa_data-sdm845.c | 10 ++++------
 drivers/net/ipa/ipa_data.h        | 13 +++++--------
 drivers/net/ipa/ipa_endpoint.c    |  7 +++----
 drivers/net/ipa/ipa_endpoint.h    |  2 --
 drivers/net/ipa/ipa_reg.h         |  2 --
 6 files changed, 17 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ipa/ipa_data-sc7180.c b/drivers/net/ipa/ipa_data-sc7180.c
index fd2265d032cc..621ad15c9e67 100644
--- a/drivers/net/ipa/ipa_data-sc7180.c
+++ b/drivers/net/ipa/ipa_data-sc7180.c
@@ -31,11 +31,13 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 			.tlv_count	= 20,
 		},
 		.endpoint = {
-			.seq_type	= IPA_SEQ_DMA,
 			.config = {
 				.resource_group	= 0,
 				.dma_mode	= true,
 				.dma_endpoint	= IPA_ENDPOINT_AP_LAN_RX,
+				.tx = {
+					.seq_type = IPA_SEQ_DMA,
+				},
 			},
 		},
 	},
@@ -50,8 +52,6 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 			.tlv_count	= 6,
 		},
 		.endpoint = {
-			.seq_type	= IPA_SEQ_INVALID,
-			.seq_rep_type	= IPA_SEQ_REP_INVALID,
 			.config = {
 				.resource_group	= 0,
 				.aggregation	= true,
@@ -74,14 +74,14 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 		},
 		.endpoint = {
 			.filter_support	= true,
-			.seq_type	= IPA_SEQ_1_PASS_SKIP_LAST_UC,
-			.seq_rep_type	= IPA_SEQ_REP_DMA_PARSER,
 			.config = {
 				.resource_group	= 0,
 				.checksum	= true,
 				.qmap		= true,
 				.status_enable	= true,
 				.tx = {
+					.seq_type = IPA_SEQ_1_PASS_SKIP_LAST_UC,
+					.seq_rep_type = IPA_SEQ_REP_DMA_PARSER,
 					.status_endpoint =
 						IPA_ENDPOINT_MODEM_AP_RX,
 				},
@@ -99,8 +99,6 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 			.tlv_count	= 6,
 		},
 		.endpoint = {
-			.seq_type	= IPA_SEQ_INVALID,
-			.seq_rep_type	= IPA_SEQ_REP_INVALID,
 			.config = {
 				.resource_group	= 0,
 				.checksum	= true,
diff --git a/drivers/net/ipa/ipa_data-sdm845.c b/drivers/net/ipa/ipa_data-sdm845.c
index 7f7625cd96b0..6b5173f47444 100644
--- a/drivers/net/ipa/ipa_data-sdm845.c
+++ b/drivers/net/ipa/ipa_data-sdm845.c
@@ -36,11 +36,13 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 			.tlv_count	= 20,
 		},
 		.endpoint = {
-			.seq_type	= IPA_SEQ_DMA,
 			.config = {
 				.resource_group	= 1,
 				.dma_mode	= true,
 				.dma_endpoint	= IPA_ENDPOINT_AP_LAN_RX,
+				.tx = {
+					.seq_type = IPA_SEQ_DMA,
+				},
 			},
 		},
 	},
@@ -55,8 +57,6 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 			.tlv_count	= 8,
 		},
 		.endpoint = {
-			.seq_type	= IPA_SEQ_INVALID,
-			.seq_rep_type	= IPA_SEQ_REP_INVALID,
 			.config = {
 				.resource_group	= 1,
 				.aggregation	= true,
@@ -79,13 +79,13 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 		},
 		.endpoint = {
 			.filter_support	= true,
-			.seq_type	= IPA_SEQ_2_PASS_SKIP_LAST_UC,
 			.config = {
 				.resource_group	= 1,
 				.checksum	= true,
 				.qmap		= true,
 				.status_enable	= true,
 				.tx = {
+					.seq_type = IPA_SEQ_2_PASS_SKIP_LAST_UC,
 					.status_endpoint =
 						IPA_ENDPOINT_MODEM_AP_RX,
 				},
@@ -103,8 +103,6 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 			.tlv_count	= 8,
 		},
 		.endpoint = {
-			.seq_type	= IPA_SEQ_INVALID,
-			.seq_rep_type	= IPA_SEQ_REP_INVALID,
 			.config = {
 				.resource_group	= 1,
 				.checksum	= true,
diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h
index 8808941f44af..caf9b3f9577e 100644
--- a/drivers/net/ipa/ipa_data.h
+++ b/drivers/net/ipa/ipa_data.h
@@ -97,12 +97,16 @@ struct gsi_channel_data {
 
 /**
  * struct ipa_endpoint_tx_data - configuration data for TX endpoints
+ * @seq_type:		primary packet processing sequencer type
+ * @seq_rep_type:	sequencer type for replication processing
  * @status_endpoint:	endpoint to which status elements are sent
  *
  * The @status_endpoint is only valid if the endpoint's @status_enable
  * flag is set.
  */
 struct ipa_endpoint_tx_data {
+	enum ipa_seq_type seq_type;
+	enum ipa_seq_rep_type seq_rep_type;
 	enum ipa_endpoint_name status_endpoint;
 };
 
@@ -154,8 +158,6 @@ struct ipa_endpoint_config_data {
 /**
  * struct ipa_endpoint_data - IPA endpoint configuration data
  * @filter_support:	whether endpoint supports filtering
- * @seq_type:		primary packet processing sequencer type
- * @seq_rep_type:	sequencer type for replication processing
  * @config:		hardware configuration (see above)
  *
  * Not all endpoints support the IPA filtering capability.  A filter table
@@ -165,15 +167,10 @@ struct ipa_endpoint_config_data {
  * in the system, and indicate whether they support filtering.
  *
  * The remaining endpoint configuration data applies only to AP endpoints.
- * The IPA hardware is implemented by sequencers, and the AP must program
- * the type(s) of these sequencers at initialization time.  The remaining
- * endpoint configuration data is defined above.
  */
 struct ipa_endpoint_data {
 	bool filter_support;
-	/* The next three are specified only for AP endpoints */
-	enum ipa_seq_type seq_type;
-	enum ipa_seq_rep_type seq_rep_type;
+	/* Everything else is specified only for AP endpoints */
 	struct ipa_endpoint_config_data config;
 };
 
diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index aab66bc4f256..88310d358557 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -890,10 +890,11 @@ static void ipa_endpoint_init_seq(struct ipa_endpoint *endpoint)
 		return;		/* Register not valid for RX endpoints */
 
 	/* Low-order byte configures primary packet processing */
-	val |= u32_encode_bits(endpoint->seq_type, SEQ_TYPE_FMASK);
+	val |= u32_encode_bits(endpoint->data->tx.seq_type, SEQ_TYPE_FMASK);
 
 	/* Second byte configures replicated packet processing */
-	val |= u32_encode_bits(endpoint->seq_rep_type, SEQ_REP_TYPE_FMASK);
+	val |= u32_encode_bits(endpoint->data->tx.seq_rep_type,
+			       SEQ_REP_TYPE_FMASK);
 
 	iowrite32(val, endpoint->ipa->reg_virt + offset);
 }
@@ -1764,8 +1765,6 @@ static void ipa_endpoint_init_one(struct ipa *ipa, enum ipa_endpoint_name name,
 
 	endpoint->ipa = ipa;
 	endpoint->ee_id = data->ee_id;
-	endpoint->seq_type = data->endpoint.seq_type;
-	endpoint->seq_rep_type = data->endpoint.seq_rep_type;
 	endpoint->channel_id = data->channel_id;
 	endpoint->endpoint_id = data->endpoint_id;
 	endpoint->toward_ipa = data->toward_ipa;
diff --git a/drivers/net/ipa/ipa_endpoint.h b/drivers/net/ipa/ipa_endpoint.h
index c48f5324f83c..c6c55ea35394 100644
--- a/drivers/net/ipa/ipa_endpoint.h
+++ b/drivers/net/ipa/ipa_endpoint.h
@@ -46,8 +46,6 @@ enum ipa_endpoint_name {
  */
 struct ipa_endpoint {
 	struct ipa *ipa;
-	enum ipa_seq_type seq_type;
-	enum ipa_seq_rep_type seq_rep_type;
 	enum gsi_ee_id ee_id;
 	u32 channel_id;
 	u32 endpoint_id;
diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h
index a7ea11a5d225..36fe746575f6 100644
--- a/drivers/net/ipa/ipa_reg.h
+++ b/drivers/net/ipa/ipa_reg.h
@@ -602,7 +602,6 @@ enum ipa_seq_type {
 	IPA_SEQ_1_PASS_SKIP_LAST_UC		= 0x06,
 	IPA_SEQ_2_PASS				= 0x0a,
 	IPA_SEQ_3_PASS_SKIP_LAST_UC		= 0x0c,
-	IPA_SEQ_INVALID				= 0x0c,
 };
 
 /**
@@ -615,7 +614,6 @@ enum ipa_seq_type {
  */
 enum ipa_seq_rep_type {
 	IPA_SEQ_REP_DMA_PARSER			= 0x08,
-	IPA_SEQ_REP_INVALID			= 0x0c,
 };
 
 #define IPA_REG_ENDP_STATUS_N_OFFSET(ep) \
-- 
cgit v1.2.3


From b259cc2a036fc5cead5e838aa8e0c660299c4eef Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Sat, 20 Mar 2021 10:57:07 -0500
Subject: net: ipa: update some comments in "ipa_data.h"

Fix/expand some comments in "ipa_data.h".

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_data.h | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h
index caf9b3f9577e..7816583fc14a 100644
--- a/drivers/net/ipa/ipa_data.h
+++ b/drivers/net/ipa/ipa_data.h
@@ -18,8 +18,9 @@
  * Boot-time configuration data is used to define the configuration of the
  * IPA and GSI resources to use for a given platform.  This data is supplied
  * via the Device Tree match table, associated with a particular compatible
- * string.  The data defines information about resources, endpoints, and
- * channels.
+ * string.  The data defines information about how resources, endpoints and
+ * channels, memory, clocking and so on are allocated and used for the
+ * platform.
  *
  * Resources are data structures used internally by the IPA hardware.  The
  * configuration data defines the number (or limits of the number) of various
@@ -75,10 +76,10 @@ struct ipa_qsb_data {
  *
  * A GSI channel is a unidirectional means of transferring data to or
  * from (and through) the IPA.  A GSI channel has a ring buffer made
- * up of "transfer elements" (TREs) that specify individual data transfers
- * or IPA immediate commands.  TREs are filled by the AP, and control
- * is passed to IPA hardware by writing the last written element
- * into a doorbell register.
+ * up of "transfer ring elements" (TREs) that specify individual data
+ * transfers or IPA immediate commands.  TREs are filled by the AP,
+ * and control is passed to IPA hardware by writing the last written
+ * element into a doorbell register.
  *
  * When data transfer commands have completed the GSI generates an
  * event (a structure of data) and optionally signals the AP with
@@ -264,7 +265,7 @@ struct ipa_resource_data {
  * @imem_addr:		physical address of IPA region within IMEM
  * @imem_size:		size in bytes of IPA IMEM region
  * @smem_id:		item identifier for IPA region within SMEM memory
- * @imem_size:		size in bytes of the IPA SMEM region
+ * @smem_size:		size in bytes of the IPA SMEM region
  */
 struct ipa_mem_data {
 	u32 local_count;
@@ -307,14 +308,14 @@ struct ipa_clock_data {
  * @endpoint_count:	number of entries in the endpoint_data array
  * @endpoint_data:	IPA endpoint/GSI channel data
  * @resource_data:	IPA resource configuration data
- * @mem_count:		number of entries in the mem_data array
- * @mem_data:		IPA-local shared memory region data
+ * @mem_data:		IPA memory region data
+ * @clock_data:		IPA clock and interconnect data
  */
 struct ipa_data {
 	enum ipa_version version;
-	u32 qsb_count;		/* # entries in qsb_data[] */
+	u32 qsb_count;		/* number of entries in qsb_data[] */
 	const struct ipa_qsb_data *qsb_data;
-	u32 endpoint_count;	/* # entries in endpoint_data[] */
+	u32 endpoint_count;	/* number of entries in endpoint_data[] */
 	const struct ipa_gsi_endpoint_data *endpoint_data;
 	const struct ipa_resource_data *resource_data;
 	const struct ipa_mem_data *mem_data;
-- 
cgit v1.2.3


From dc4aa50b13f1d33fdd813b56bb414714268025ad Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 20 Mar 2021 23:14:28 +0100
Subject: r8169: add support for ethtool get_ringparam

Add support for the ethtool get_ringparam operation.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 8ea6ddc7da5c..66d10aa47c08 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -1901,6 +1901,15 @@ static int rtl8169_set_eee(struct net_device *dev, struct ethtool_eee *data)
 	return ret;
 }
 
+static void rtl8169_get_ringparam(struct net_device *dev,
+				  struct ethtool_ringparam *data)
+{
+	data->rx_max_pending = NUM_RX_DESC;
+	data->rx_pending = NUM_RX_DESC;
+	data->tx_max_pending = NUM_TX_DESC;
+	data->tx_pending = NUM_TX_DESC;
+}
+
 static const struct ethtool_ops rtl8169_ethtool_ops = {
 	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
 				     ETHTOOL_COALESCE_MAX_FRAMES,
@@ -1921,6 +1930,7 @@ static const struct ethtool_ops rtl8169_ethtool_ops = {
 	.set_eee		= rtl8169_set_eee,
 	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
 	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
+	.get_ringparam		= rtl8169_get_ringparam,
 };
 
 static void rtl_enable_eee(struct rtl8169_private *tp)
-- 
cgit v1.2.3


From a1e6f641e3075fa83403c699e64623ae272080e2 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sun, 21 Mar 2021 01:04:45 +0200
Subject: Revert "net: dsa: sja1105: Clear VLAN filtering offload netdev
 feature"

This reverts commit e9bf96943b408e6c99dd13fb01cb907335787c61.

The topic of the reverted patch is the support for switches with global
VLAN filtering, added by commit 061f6a505ac3 ("net: dsa: Add
ndo_vlan_rx_{add, kill}_vid implementation"). Be there a switch with 4
ports swp0 -> swp3, and the following setup:

ip link add br0 type bridge vlan_filtering 1
ip link set swp0 master br0
ip link set swp1 master br0

What would happen with VLAN-tagged traffic received on standalone ports
swp2 and swp3? Well, it would get dropped, were it not for the
.ndo_vlan_rx_add_vid and .ndo_vlan_rx_kill_vid implementations (called
from vlan_vid_add and vlan_vid_del respectively). Basically, for DSA
switches where VLAN filtering is a global attribute, we enforce the
standalone ports to have 'rx-vlan-filter: off [fixed]' in their ethtool
features, which lets the user know that all VLAN-tagged packets that are
not explicitly added in the RX filtering list are dropped.

As for the sja1105 driver, at the time of the reverted patch, it was
operating in a pretty handicapped mode when it had ports under a bridge
with vlan_filtering=1. Specifically, it was unable to terminate traffic
through the CPU port (for further explanation see "Traffic support" in
Documentation/networking/dsa/sja1105.rst).

However, since then, the sja1105 driver has made considerable progress,
and that limitation is no longer as severe now. Specifically, since
commit 2cafa72e516f ("net: dsa: sja1105: add a new
best_effort_vlan_filtering devlink parameter"), the driver is able to
perform CPU termination even when some ports are under bridges with
vlan_filtering=1. Then, since commit 8841f6e63f2c ("net: dsa: sja1105:
make devlink property best_effort_vlan_filtering true by default"), this
even became the default operating mode.

So we can now take advantage of the logic in the DSA core.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 51ea104c63bb..d9c198ca0197 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -3049,21 +3049,6 @@ static void sja1105_teardown(struct dsa_switch *ds)
 	}
 }
 
-static int sja1105_port_enable(struct dsa_switch *ds, int port,
-			       struct phy_device *phy)
-{
-	struct net_device *slave;
-
-	if (!dsa_is_user_port(ds, port))
-		return 0;
-
-	slave = dsa_to_port(ds, port)->slave;
-
-	slave->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
-
-	return 0;
-}
-
 static void sja1105_port_disable(struct dsa_switch *ds, int port)
 {
 	struct sja1105_private *priv = ds->priv;
@@ -3491,7 +3476,6 @@ static const struct dsa_switch_ops sja1105_switch_ops = {
 	.get_ethtool_stats	= sja1105_get_ethtool_stats,
 	.get_sset_count		= sja1105_get_sset_count,
 	.get_ts_info		= sja1105_get_ts_info,
-	.port_enable		= sja1105_port_enable,
 	.port_disable		= sja1105_port_disable,
 	.port_fdb_dump		= sja1105_fdb_dump,
 	.port_fdb_add		= sja1105_fdb_add,
-- 
cgit v1.2.3


From 3a9d54b1947ecea8eea9a902c0b7eb58a98add8a Mon Sep 17 00:00:00 2001
From: Archie Pusaka <apusaka@chromium.org>
Date: Mon, 22 Mar 2021 14:02:15 +0800
Subject: Bluetooth: Set CONF_NOT_COMPLETE as l2cap_chan default

Currently l2cap_chan_set_defaults() reset chan->conf_state to zero.
However, there is a flag CONF_NOT_COMPLETE which is set when
creating the l2cap_chan. It is suggested that the flag should be
cleared when l2cap_chan is ready, but when l2cap_chan_set_defaults()
is called, l2cap_chan is not yet ready. Therefore, we must set this
flag as the default.

Example crash call trace:
__dump_stack lib/dump_stack.c:15 [inline]
dump_stack+0xc4/0x118 lib/dump_stack.c:56
panic+0x1c6/0x38b kernel/panic.c:117
__warn+0x170/0x1b9 kernel/panic.c:471
warn_slowpath_fmt+0xc7/0xf8 kernel/panic.c:494
debug_print_object+0x175/0x193 lib/debugobjects.c:260
debug_object_assert_init+0x171/0x1bf lib/debugobjects.c:614
debug_timer_assert_init kernel/time/timer.c:629 [inline]
debug_assert_init kernel/time/timer.c:677 [inline]
del_timer+0x7c/0x179 kernel/time/timer.c:1034
try_to_grab_pending+0x81/0x2e5 kernel/workqueue.c:1230
cancel_delayed_work+0x7c/0x1c4 kernel/workqueue.c:2929
l2cap_clear_timer+0x1e/0x41 include/net/bluetooth/l2cap.h:834
l2cap_chan_del+0x2d8/0x37e net/bluetooth/l2cap_core.c:640
l2cap_chan_close+0x532/0x5d8 net/bluetooth/l2cap_core.c:756
l2cap_sock_shutdown+0x806/0x969 net/bluetooth/l2cap_sock.c:1174
l2cap_sock_release+0x64/0x14d net/bluetooth/l2cap_sock.c:1217
__sock_release+0xda/0x217 net/socket.c:580
sock_close+0x1b/0x1f net/socket.c:1039
__fput+0x322/0x55c fs/file_table.c:208
____fput+0x17/0x19 fs/file_table.c:244
task_work_run+0x19b/0x1d3 kernel/task_work.c:115
exit_task_work include/linux/task_work.h:21 [inline]
do_exit+0xe4c/0x204a kernel/exit.c:766
do_group_exit+0x291/0x291 kernel/exit.c:891
get_signal+0x749/0x1093 kernel/signal.c:2396
do_signal+0xa5/0xcdb arch/x86/kernel/signal.c:737
exit_to_usermode_loop arch/x86/entry/common.c:243 [inline]
prepare_exit_to_usermode+0xed/0x235 arch/x86/entry/common.c:277
syscall_return_slowpath+0x3a7/0x3b3 arch/x86/entry/common.c:348
int_ret_from_sys_call+0x25/0xa3

Signed-off-by: Archie Pusaka <apusaka@chromium.org>
Reported-by: syzbot+338f014a98367a08a114@syzkaller.appspotmail.com
Reviewed-by: Alain Michaud <alainm@chromium.org>
Reviewed-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Reviewed-by: Guenter Roeck <groeck@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap_core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 374cc32d7138..59ab9689b37d 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -516,7 +516,9 @@ void l2cap_chan_set_defaults(struct l2cap_chan *chan)
 	chan->flush_to = L2CAP_DEFAULT_FLUSH_TO;
 	chan->retrans_timeout = L2CAP_DEFAULT_RETRANS_TO;
 	chan->monitor_timeout = L2CAP_DEFAULT_MONITOR_TO;
+
 	chan->conf_state = 0;
+	set_bit(CONF_NOT_COMPLETE, &chan->conf_state);
 
 	set_bit(FLAG_FORCE_ACTIVE, &chan->flags);
 }
-- 
cgit v1.2.3


From 5c4c8c9544099bb9043a10a5318130a943e32fc3 Mon Sep 17 00:00:00 2001
From: Archie Pusaka <apusaka@chromium.org>
Date: Mon, 22 Mar 2021 14:03:11 +0800
Subject: Bluetooth: verify AMP hci_chan before amp_destroy

hci_chan can be created in 2 places: hci_loglink_complete_evt() if
it is an AMP hci_chan, or l2cap_conn_add() otherwise. In theory,
Only AMP hci_chan should be removed by a call to
hci_disconn_loglink_complete_evt(). However, the controller might mess
up, call that function, and destroy an hci_chan which is not initiated
by hci_loglink_complete_evt().

This patch adds a verification that the destroyed hci_chan must have
been init'd by hci_loglink_complete_evt().

Example crash call trace:
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0xe3/0x144 lib/dump_stack.c:118
 print_address_description+0x67/0x22a mm/kasan/report.c:256
 kasan_report_error mm/kasan/report.c:354 [inline]
 kasan_report mm/kasan/report.c:412 [inline]
 kasan_report+0x251/0x28f mm/kasan/report.c:396
 hci_send_acl+0x3b/0x56e net/bluetooth/hci_core.c:4072
 l2cap_send_cmd+0x5af/0x5c2 net/bluetooth/l2cap_core.c:877
 l2cap_send_move_chan_cfm_icid+0x8e/0xb1 net/bluetooth/l2cap_core.c:4661
 l2cap_move_fail net/bluetooth/l2cap_core.c:5146 [inline]
 l2cap_move_channel_rsp net/bluetooth/l2cap_core.c:5185 [inline]
 l2cap_bredr_sig_cmd net/bluetooth/l2cap_core.c:5464 [inline]
 l2cap_sig_channel net/bluetooth/l2cap_core.c:5799 [inline]
 l2cap_recv_frame+0x1d12/0x51aa net/bluetooth/l2cap_core.c:7023
 l2cap_recv_acldata+0x2ea/0x693 net/bluetooth/l2cap_core.c:7596
 hci_acldata_packet net/bluetooth/hci_core.c:4606 [inline]
 hci_rx_work+0x2bd/0x45e net/bluetooth/hci_core.c:4796
 process_one_work+0x6f8/0xb50 kernel/workqueue.c:2175
 worker_thread+0x4fc/0x670 kernel/workqueue.c:2321
 kthread+0x2f0/0x304 kernel/kthread.c:253
 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:415

Allocated by task 38:
 set_track mm/kasan/kasan.c:460 [inline]
 kasan_kmalloc+0x8d/0x9a mm/kasan/kasan.c:553
 kmem_cache_alloc_trace+0x102/0x129 mm/slub.c:2787
 kmalloc include/linux/slab.h:515 [inline]
 kzalloc include/linux/slab.h:709 [inline]
 hci_chan_create+0x86/0x26d net/bluetooth/hci_conn.c:1674
 l2cap_conn_add.part.0+0x1c/0x814 net/bluetooth/l2cap_core.c:7062
 l2cap_conn_add net/bluetooth/l2cap_core.c:7059 [inline]
 l2cap_connect_cfm+0x134/0x852 net/bluetooth/l2cap_core.c:7381
 hci_connect_cfm+0x9d/0x122 include/net/bluetooth/hci_core.h:1404
 hci_remote_ext_features_evt net/bluetooth/hci_event.c:4161 [inline]
 hci_event_packet+0x463f/0x72fa net/bluetooth/hci_event.c:5981
 hci_rx_work+0x197/0x45e net/bluetooth/hci_core.c:4791
 process_one_work+0x6f8/0xb50 kernel/workqueue.c:2175
 worker_thread+0x4fc/0x670 kernel/workqueue.c:2321
 kthread+0x2f0/0x304 kernel/kthread.c:253
 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:415

Freed by task 1732:
 set_track mm/kasan/kasan.c:460 [inline]
 __kasan_slab_free mm/kasan/kasan.c:521 [inline]
 __kasan_slab_free+0x106/0x128 mm/kasan/kasan.c:493
 slab_free_hook mm/slub.c:1409 [inline]
 slab_free_freelist_hook+0xaa/0xf6 mm/slub.c:1436
 slab_free mm/slub.c:3009 [inline]
 kfree+0x182/0x21e mm/slub.c:3972
 hci_disconn_loglink_complete_evt net/bluetooth/hci_event.c:4891 [inline]
 hci_event_packet+0x6a1c/0x72fa net/bluetooth/hci_event.c:6050
 hci_rx_work+0x197/0x45e net/bluetooth/hci_core.c:4791
 process_one_work+0x6f8/0xb50 kernel/workqueue.c:2175
 worker_thread+0x4fc/0x670 kernel/workqueue.c:2321
 kthread+0x2f0/0x304 kernel/kthread.c:253
 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:415

The buggy address belongs to the object at ffff8881d7af9180
 which belongs to the cache kmalloc-128 of size 128
The buggy address is located 24 bytes inside of
 128-byte region [ffff8881d7af9180, ffff8881d7af9200)
The buggy address belongs to the page:
page:ffffea00075ebe40 count:1 mapcount:0 mapping:ffff8881da403200 index:0x0
flags: 0x8000000000000200(slab)
raw: 8000000000000200 dead000000000100 dead000000000200 ffff8881da403200
raw: 0000000000000000 0000000080150015 00000001ffffffff 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 ffff8881d7af9080: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
 ffff8881d7af9100: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
>ffff8881d7af9180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                            ^
 ffff8881d7af9200: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff8881d7af9280: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc

Signed-off-by: Archie Pusaka <apusaka@chromium.org>
Reported-by: syzbot+98228e7407314d2d4ba2@syzkaller.appspotmail.com
Reviewed-by: Alain Michaud <alainm@chromium.org>
Reviewed-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 1 +
 net/bluetooth/hci_event.c        | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index ebdd4afe30d2..ca4ac6603b9a 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -704,6 +704,7 @@ struct hci_chan {
 	struct sk_buff_head data_q;
 	unsigned int	sent;
 	__u8		state;
+	bool		amp;
 };
 
 struct hci_conn_params {
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index cf2f4a0abdbd..341c8ce93648 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -5032,6 +5032,7 @@ static void hci_loglink_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		return;
 
 	hchan->handle = le16_to_cpu(ev->handle);
+	hchan->amp = true;
 
 	BT_DBG("hcon %p mgr %p hchan %p", hcon, hcon->amp_mgr, hchan);
 
@@ -5064,7 +5065,7 @@ static void hci_disconn_loglink_complete_evt(struct hci_dev *hdev,
 	hci_dev_lock(hdev);
 
 	hchan = hci_chan_lookup_handle(hdev, le16_to_cpu(ev->handle));
-	if (!hchan)
+	if (!hchan || !hchan->amp)
 		goto unlock;
 
 	amp_destroy_logical_link(hchan, ev->reason);
-- 
cgit v1.2.3


From 07528783c7da0b711385f0033a836453b5ec0c9c Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Mon, 22 Mar 2021 06:30:51 +0530
Subject: Bluetooth: hci_qca: Mundane typo fix

s/packat/packet/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_qca.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index 73af901c2032..0a0056912d51 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -1066,7 +1066,7 @@ static void qca_controller_memdump(struct work_struct *work)
 		 * packets in the buffer.
 		 */
 		/* For QCA6390, controller does not lost packets but
-		 * sequence number field of packat sometimes has error
+		 * sequence number field of packet sometimes has error
 		 * bits, so skip this checking for missing packet.
 		 */
 		while ((seq_no > qca_memdump->current_seq_no + 1) &&
-- 
cgit v1.2.3


From be8597239379f0f53c9710dd6ab551bbf535bec6 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Mon, 22 Mar 2021 07:52:07 +0900
Subject: Bluetooth: initialize skb_queue_head at l2cap_chan_create()

syzbot is hitting "INFO: trying to register non-static key." message [1],
for "struct l2cap_chan"->tx_q.lock spinlock is not yet initialized when
l2cap_chan_del() is called due to e.g. timeout.

Since "struct l2cap_chan"->lock mutex is initialized at l2cap_chan_create()
immediately after "struct l2cap_chan" is allocated using kzalloc(), let's
as well initialize "struct l2cap_chan"->{tx_q,srej_q}.lock spinlocks there.

[1] https://syzkaller.appspot.com/bug?extid=fadfba6a911f6bf71842

Reported-and-tested-by: syzbot <syzbot+fadfba6a911f6bf71842@syzkaller.appspotmail.com>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap_core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 59ab9689b37d..56e1975cdef1 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -451,6 +451,8 @@ struct l2cap_chan *l2cap_chan_create(void)
 	if (!chan)
 		return NULL;
 
+	skb_queue_head_init(&chan->tx_q);
+	skb_queue_head_init(&chan->srej_q);
 	mutex_init(&chan->lock);
 
 	/* Set default lock nesting level */
-- 
cgit v1.2.3


From 8ff0278d106753a553d6cb2cf49a8888425b8187 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 22 Mar 2021 17:46:30 +0100
Subject: Bluetooth: fix set_ecdh_privkey() prototype
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

gcc-11 points out that the declaration does not match the definition:

net/bluetooth/ecdh_helper.c:122:55: error: argument 2 of type ‘const u8[32]’ {aka ‘const unsigned char[32]’} with mismatched bound [-Werror=array-parameter=]
  122 | int set_ecdh_privkey(struct crypto_kpp *tfm, const u8 private_key[32])
      |                                              ~~~~~~~~~^~~~~~~~~~~~~~~
In file included from net/bluetooth/ecdh_helper.c:23:
net/bluetooth/ecdh_helper.h:28:56: note: previously declared as ‘const u8 *’ {aka ‘const unsigned char *’}
   28 | int set_ecdh_privkey(struct crypto_kpp *tfm, const u8 *private_key);
      |                                              ~~~~~~~~~~^~~~~~~~~~~

Change the declaration to contain the size of the array, rather than
just a pointer.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/ecdh_helper.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bluetooth/ecdh_helper.h b/net/bluetooth/ecdh_helper.h
index a6f8d03d4aaf..830723971cf8 100644
--- a/net/bluetooth/ecdh_helper.h
+++ b/net/bluetooth/ecdh_helper.h
@@ -25,6 +25,6 @@
 
 int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 pair_public_key[64],
 			u8 secret[32]);
-int set_ecdh_privkey(struct crypto_kpp *tfm, const u8 *private_key);
+int set_ecdh_privkey(struct crypto_kpp *tfm, const u8 private_key[32]);
 int generate_ecdh_public_key(struct crypto_kpp *tfm, u8 public_key[64]);
 int generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64]);
-- 
cgit v1.2.3


From 390bd141808d5019506f0f53a777c3b9cb7c5c62 Mon Sep 17 00:00:00 2001
From: Qi Zhang <qi.z.zhang@intel.com>
Date: Tue, 9 Mar 2021 11:07:58 +0800
Subject: ice: Add more basic protocol support for flow filter

Add more protocol and field support for flow filter include:
ETH, VLAN, ICMP, ARP and TCP flag.

Signed-off-by: Kevin Scott <kevin.c.scott@intel.com>
Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
Tested-by: Chen Bo <BoX.C.Chen@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_fdir.c          |  20 +++
 drivers/net/ethernet/intel/ice/ice_fdir.h          |   1 +
 drivers/net/ethernet/intel/ice/ice_flow.c          | 179 ++++++++++++++++++++-
 drivers/net/ethernet/intel/ice/ice_flow.h          |  20 +++
 drivers/net/ethernet/intel/ice/ice_protocol_type.h |   6 +
 5 files changed, 220 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.c b/drivers/net/ethernet/intel/ice/ice_fdir.c
index 59c0c6a0f8c5..1e77f43b3b3e 100644
--- a/drivers/net/ethernet/intel/ice/ice_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.c
@@ -492,6 +492,16 @@ static void ice_pkt_insert_u32(u8 *pkt, int offset, __be32 data)
 	memcpy(pkt + offset, &data, sizeof(data));
 }
 
+/**
+ * ice_pkt_insert_mac_addr - insert a MAC addr into a memory buffer.
+ * @pkt: packet buffer
+ * @addr: MAC address to convert and insert into pkt at offset
+ */
+static void ice_pkt_insert_mac_addr(u8 *pkt, u8 *addr)
+{
+	ether_addr_copy(pkt, addr);
+}
+
 /**
  * ice_fdir_get_gen_prgm_pkt - generate a training packet
  * @hw: pointer to the hardware structure
@@ -580,6 +590,7 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 				   input->ip.v4.dst_ip);
 		ice_pkt_insert_u16(loc, ICE_IPV4_TCP_SRC_PORT_OFFSET,
 				   input->ip.v4.dst_port);
+		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
 		if (frag)
 			loc[20] = ICE_FDIR_IPV4_PKT_FLAG_DF;
 		break;
@@ -592,6 +603,9 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 				   input->ip.v4.dst_ip);
 		ice_pkt_insert_u16(loc, ICE_IPV4_UDP_SRC_PORT_OFFSET,
 				   input->ip.v4.dst_port);
+		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
+		ice_pkt_insert_mac_addr(loc + ETH_ALEN,
+					input->ext_data.src_mac);
 		break;
 	case ICE_FLTR_PTYPE_NONF_IPV4_SCTP:
 		ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
@@ -602,6 +616,7 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 				   input->ip.v4.dst_ip);
 		ice_pkt_insert_u16(loc, ICE_IPV4_SCTP_SRC_PORT_OFFSET,
 				   input->ip.v4.dst_port);
+		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
 		break;
 	case ICE_FLTR_PTYPE_NONF_IPV4_OTHER:
 		ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
@@ -609,6 +624,7 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 		ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
 				   input->ip.v4.dst_ip);
 		ice_pkt_insert_u16(loc, ICE_IPV4_PROTO_OFFSET, 0);
+		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
 		break;
 	case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
 		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
@@ -619,6 +635,7 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 				   input->ip.v6.src_port);
 		ice_pkt_insert_u16(loc, ICE_IPV6_TCP_SRC_PORT_OFFSET,
 				   input->ip.v6.dst_port);
+		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
 		break;
 	case ICE_FLTR_PTYPE_NONF_IPV6_UDP:
 		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
@@ -629,6 +646,7 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 				   input->ip.v6.src_port);
 		ice_pkt_insert_u16(loc, ICE_IPV6_UDP_SRC_PORT_OFFSET,
 				   input->ip.v6.dst_port);
+		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
 		break;
 	case ICE_FLTR_PTYPE_NONF_IPV6_SCTP:
 		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
@@ -639,12 +657,14 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 				   input->ip.v6.src_port);
 		ice_pkt_insert_u16(loc, ICE_IPV6_SCTP_SRC_PORT_OFFSET,
 				   input->ip.v6.dst_port);
+		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
 		break;
 	case ICE_FLTR_PTYPE_NONF_IPV6_OTHER:
 		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
 					 input->ip.v6.src_ip);
 		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_SRC_ADDR_OFFSET,
 					 input->ip.v6.dst_ip);
+		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
 		break;
 	default:
 		return ICE_ERR_PARAM;
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.h b/drivers/net/ethernet/intel/ice/ice_fdir.h
index 1c587766daab..31623545ebae 100644
--- a/drivers/net/ethernet/intel/ice/ice_fdir.h
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.h
@@ -103,6 +103,7 @@ struct ice_fdir_v6 {
 
 struct ice_fdir_extra {
 	u8 dst_mac[ETH_ALEN];	/* dest MAC address */
+	u8 src_mac[ETH_ALEN];	/* src MAC address */
 	u32 usr_def[2];		/* user data */
 	__be16 vlan_type;	/* VLAN ethertype */
 	__be16 vlan_tag;	/* VLAN tag info */
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c
index 89a0cef20506..b2baa478edcc 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.c
+++ b/drivers/net/ethernet/intel/ice/ice_flow.c
@@ -20,6 +20,17 @@ struct ice_flow_field_info {
 /* Table containing properties of supported protocol header fields */
 static const
 struct ice_flow_field_info ice_flds_info[ICE_FLOW_FIELD_IDX_MAX] = {
+	/* Ether */
+	/* ICE_FLOW_FIELD_IDX_ETH_DA */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ETH, 0, ETH_ALEN),
+	/* ICE_FLOW_FIELD_IDX_ETH_SA */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ETH, ETH_ALEN, ETH_ALEN),
+	/* ICE_FLOW_FIELD_IDX_S_VLAN */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_VLAN, 12, sizeof(__be16)),
+	/* ICE_FLOW_FIELD_IDX_C_VLAN */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_VLAN, 14, sizeof(__be16)),
+	/* ICE_FLOW_FIELD_IDX_ETH_TYPE */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ETH, 0, sizeof(__be16)),
 	/* IPv4 / IPv6 */
 	/* ICE_FLOW_FIELD_IDX_IPV4_SA */
 	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV4, 12, sizeof(struct in_addr)),
@@ -42,6 +53,24 @@ struct ice_flow_field_info ice_flds_info[ICE_FLOW_FIELD_IDX_MAX] = {
 	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_SCTP, 0, sizeof(__be16)),
 	/* ICE_FLOW_FIELD_IDX_SCTP_DST_PORT */
 	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_SCTP, 2, sizeof(__be16)),
+	/* ICE_FLOW_FIELD_IDX_TCP_FLAGS */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_TCP, 13, 1),
+	/* ARP */
+	/* ICE_FLOW_FIELD_IDX_ARP_SIP */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ARP, 14, sizeof(struct in_addr)),
+	/* ICE_FLOW_FIELD_IDX_ARP_DIP */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ARP, 24, sizeof(struct in_addr)),
+	/* ICE_FLOW_FIELD_IDX_ARP_SHA */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ARP, 8, ETH_ALEN),
+	/* ICE_FLOW_FIELD_IDX_ARP_DHA */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ARP, 18, ETH_ALEN),
+	/* ICE_FLOW_FIELD_IDX_ARP_OP */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ARP, 6, sizeof(__be16)),
+	/* ICMP */
+	/* ICE_FLOW_FIELD_IDX_ICMP_TYPE */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ICMP, 0, 1),
+	/* ICE_FLOW_FIELD_IDX_ICMP_CODE */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ICMP, 1, 1),
 	/* GRE */
 	/* ICE_FLOW_FIELD_IDX_GRE_KEYID */
 	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GRE, 12,
@@ -50,8 +79,32 @@ struct ice_flow_field_info ice_flds_info[ICE_FLOW_FIELD_IDX_MAX] = {
 
 /* Bitmaps indicating relevant packet types for a particular protocol header
  *
- * Packet types for packets with an Outer/First/Single IPv4 header
+ * Packet types for packets with an Outer/First/Single MAC header
  */
+static const u32 ice_ptypes_mac_ofos[] = {
+	0xFDC00846, 0xBFBF7F7E, 0xF70001DF, 0xFEFDFDFB,
+	0x0000077E, 0x00000000, 0x00000000, 0x00000000,
+	0x00400000, 0x03FFF000, 0x7FFFFFE0, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last MAC VLAN header */
+static const u32 ice_ptypes_macvlan_il[] = {
+	0x00000000, 0xBC000000, 0x000001DF, 0xF0000000,
+	0x0000077E, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outer/First/Single IPv4 header */
 static const u32 ice_ptypes_ipv4_ofos[] = {
 	0x1DC00000, 0x04000800, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
@@ -111,6 +164,18 @@ static const u32 ice_ipv4_ofos_no_l4[] = {
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 };
 
+/* Packet types for packets with an Outermost/First ARP header */
+static const u32 ice_ptypes_arp_of[] = {
+	0x00000800, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
 /* Packet types for packets with an Innermost/Last IPv4 header - no L4 */
 static const u32 ice_ipv4_il_no_l4[] = {
 	0x60000000, 0x18043008, 0x80000002, 0x6010c021,
@@ -182,6 +247,29 @@ static const u32 ice_ptypes_sctp_il[] = {
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outermost/First ICMP header */
+static const u32 ice_ptypes_icmp_of[] = {
+	0x10000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last ICMP header */
+static const u32 ice_ptypes_icmp_il[] = {
+	0x00000000, 0x02040408, 0x40000102, 0x08101020,
+	0x00000408, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x42108000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 };
 
@@ -197,6 +285,18 @@ static const u32 ice_ptypes_gre_of[] = {
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 };
 
+/* Packet types for packets with an Innermost/Last MAC header */
+static const u32 ice_ptypes_mac_il[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
 /* Manage parameters and info. used during the creation of a flow profile */
 struct ice_flow_prof_params {
 	enum ice_block blk;
@@ -212,9 +312,10 @@ struct ice_flow_prof_params {
 };
 
 #define ICE_FLOW_SEG_HDRS_L3_MASK	\
-	(ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV6)
+	(ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_ARP)
 #define ICE_FLOW_SEG_HDRS_L4_MASK	\
-	(ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_SCTP)
+	(ICE_FLOW_SEG_HDR_ICMP | ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_UDP | \
+	 ICE_FLOW_SEG_HDR_SCTP)
 
 /**
  * ice_flow_val_hdrs - validates packet segments for valid protocol headers
@@ -243,8 +344,11 @@ ice_flow_val_hdrs(struct ice_flow_seg_info *segs, u8 segs_cnt)
 
 /* Sizes of fixed known protocol headers without header options */
 #define ICE_FLOW_PROT_HDR_SZ_MAC	14
+#define ICE_FLOW_PROT_HDR_SZ_MAC_VLAN	(ICE_FLOW_PROT_HDR_SZ_MAC + 2)
 #define ICE_FLOW_PROT_HDR_SZ_IPV4	20
 #define ICE_FLOW_PROT_HDR_SZ_IPV6	40
+#define ICE_FLOW_PROT_HDR_SZ_ARP	28
+#define ICE_FLOW_PROT_HDR_SZ_ICMP	8
 #define ICE_FLOW_PROT_HDR_SZ_TCP	20
 #define ICE_FLOW_PROT_HDR_SZ_UDP	8
 #define ICE_FLOW_PROT_HDR_SZ_SCTP	12
@@ -256,16 +360,27 @@ ice_flow_val_hdrs(struct ice_flow_seg_info *segs, u8 segs_cnt)
  */
 static u16 ice_flow_calc_seg_sz(struct ice_flow_prof_params *params, u8 seg)
 {
-	u16 sz = ICE_FLOW_PROT_HDR_SZ_MAC;
+	u16 sz;
+
+	/* L2 headers */
+	sz = (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_VLAN) ?
+		ICE_FLOW_PROT_HDR_SZ_MAC_VLAN : ICE_FLOW_PROT_HDR_SZ_MAC;
 
 	/* L3 headers */
 	if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_IPV4)
 		sz += ICE_FLOW_PROT_HDR_SZ_IPV4;
 	else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_IPV6)
 		sz += ICE_FLOW_PROT_HDR_SZ_IPV6;
+	else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_ARP)
+		sz += ICE_FLOW_PROT_HDR_SZ_ARP;
+	else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDRS_L4_MASK)
+		/* An L3 header is required if L4 is specified */
+		return 0;
 
 	/* L4 headers */
-	if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_TCP)
+	if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_ICMP)
+		sz += ICE_FLOW_PROT_HDR_SZ_ICMP;
+	else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_TCP)
 		sz += ICE_FLOW_PROT_HDR_SZ_TCP;
 	else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_UDP)
 		sz += ICE_FLOW_PROT_HDR_SZ_UDP;
@@ -298,6 +413,24 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params)
 
 		hdrs = prof->segs[i].hdrs;
 
+		if (hdrs & ICE_FLOW_SEG_HDR_ETH) {
+			src = !i ? (const unsigned long *)ice_ptypes_mac_ofos :
+				(const unsigned long *)ice_ptypes_mac_il;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		}
+
+		if (i && hdrs & ICE_FLOW_SEG_HDR_VLAN) {
+			src = (const unsigned long *)ice_ptypes_macvlan_il;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		}
+
+		if (!i && hdrs & ICE_FLOW_SEG_HDR_ARP) {
+			bitmap_and(params->ptypes, params->ptypes,
+				   (const unsigned long *)ice_ptypes_arp_of,
+				   ICE_FLOW_PTYPE_MAX);
+		}
 		if ((hdrs & ICE_FLOW_SEG_HDR_IPV4) &&
 		    !(hdrs & ICE_FLOW_SEG_HDRS_L4_MASK)) {
 			src = !i ? (const unsigned long *)ice_ipv4_ofos_no_l4 :
@@ -334,6 +467,13 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params)
 			src = (const unsigned long *)ice_ptypes_sctp_il;
 			bitmap_and(params->ptypes, params->ptypes, src,
 				   ICE_FLOW_PTYPE_MAX);
+		}
+
+		if (hdrs & ICE_FLOW_SEG_HDR_ICMP) {
+			src = !i ? (const unsigned long *)ice_ptypes_icmp_of :
+				(const unsigned long *)ice_ptypes_icmp_il;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
 		} else if (hdrs & ICE_FLOW_SEG_HDR_GRE) {
 			if (!i) {
 				src = (const unsigned long *)ice_ptypes_gre_of;
@@ -370,6 +510,15 @@ ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params,
 	flds = params->prof->segs[seg].fields;
 
 	switch (fld) {
+	case ICE_FLOW_FIELD_IDX_ETH_DA:
+	case ICE_FLOW_FIELD_IDX_ETH_SA:
+	case ICE_FLOW_FIELD_IDX_S_VLAN:
+	case ICE_FLOW_FIELD_IDX_C_VLAN:
+		prot_id = seg == 0 ? ICE_PROT_MAC_OF_OR_S : ICE_PROT_MAC_IL;
+		break;
+	case ICE_FLOW_FIELD_IDX_ETH_TYPE:
+		prot_id = seg == 0 ? ICE_PROT_ETYPE_OL : ICE_PROT_ETYPE_IL;
+		break;
 	case ICE_FLOW_FIELD_IDX_IPV4_SA:
 	case ICE_FLOW_FIELD_IDX_IPV4_DA:
 		prot_id = seg == 0 ? ICE_PROT_IPV4_OF_OR_S : ICE_PROT_IPV4_IL;
@@ -380,6 +529,7 @@ ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params,
 		break;
 	case ICE_FLOW_FIELD_IDX_TCP_SRC_PORT:
 	case ICE_FLOW_FIELD_IDX_TCP_DST_PORT:
+	case ICE_FLOW_FIELD_IDX_TCP_FLAGS:
 		prot_id = ICE_PROT_TCP_IL;
 		break;
 	case ICE_FLOW_FIELD_IDX_UDP_SRC_PORT:
@@ -390,6 +540,19 @@ ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params,
 	case ICE_FLOW_FIELD_IDX_SCTP_DST_PORT:
 		prot_id = ICE_PROT_SCTP_IL;
 		break;
+	case ICE_FLOW_FIELD_IDX_ARP_SIP:
+	case ICE_FLOW_FIELD_IDX_ARP_DIP:
+	case ICE_FLOW_FIELD_IDX_ARP_SHA:
+	case ICE_FLOW_FIELD_IDX_ARP_DHA:
+	case ICE_FLOW_FIELD_IDX_ARP_OP:
+		prot_id = ICE_PROT_ARP_OF;
+		break;
+	case ICE_FLOW_FIELD_IDX_ICMP_TYPE:
+	case ICE_FLOW_FIELD_IDX_ICMP_CODE:
+		/* ICMP type and code share the same extraction seq. entry */
+		prot_id = (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_IPV4) ?
+				ICE_PROT_ICMP_IL : ICE_PROT_ICMPV6_IL;
+		break;
 	case ICE_FLOW_FIELD_IDX_GRE_KEYID:
 		prot_id = ICE_PROT_GRE_OF;
 		break;
@@ -1158,6 +1321,9 @@ ice_flow_add_fld_raw(struct ice_flow_seg_info *seg, u16 off, u8 len,
 	seg->raws_cnt++;
 }
 
+#define ICE_FLOW_RSS_SEG_HDR_L2_MASKS \
+	(ICE_FLOW_SEG_HDR_ETH | ICE_FLOW_SEG_HDR_VLAN)
+
 #define ICE_FLOW_RSS_SEG_HDR_L3_MASKS \
 	(ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV6)
 
@@ -1165,7 +1331,8 @@ ice_flow_add_fld_raw(struct ice_flow_seg_info *seg, u16 off, u8 len,
 	(ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_SCTP)
 
 #define ICE_FLOW_RSS_SEG_HDR_VAL_MASKS \
-	(ICE_FLOW_RSS_SEG_HDR_L3_MASKS | \
+	(ICE_FLOW_RSS_SEG_HDR_L2_MASKS | \
+	 ICE_FLOW_RSS_SEG_HDR_L3_MASKS | \
 	 ICE_FLOW_RSS_SEG_HDR_L4_MASKS)
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.h b/drivers/net/ethernet/intel/ice/ice_flow.h
index 829f90b1e998..d92dcc791ef4 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.h
+++ b/drivers/net/ethernet/intel/ice/ice_flow.h
@@ -38,8 +38,12 @@
  */
 enum ice_flow_seg_hdr {
 	ICE_FLOW_SEG_HDR_NONE		= 0x00000000,
+	ICE_FLOW_SEG_HDR_ETH		= 0x00000001,
+	ICE_FLOW_SEG_HDR_VLAN		= 0x00000002,
 	ICE_FLOW_SEG_HDR_IPV4		= 0x00000004,
 	ICE_FLOW_SEG_HDR_IPV6		= 0x00000008,
+	ICE_FLOW_SEG_HDR_ARP		= 0x00000010,
+	ICE_FLOW_SEG_HDR_ICMP		= 0x00000020,
 	ICE_FLOW_SEG_HDR_TCP		= 0x00000040,
 	ICE_FLOW_SEG_HDR_UDP		= 0x00000080,
 	ICE_FLOW_SEG_HDR_SCTP		= 0x00000100,
@@ -47,6 +51,12 @@ enum ice_flow_seg_hdr {
 };
 
 enum ice_flow_field {
+	/* L2 */
+	ICE_FLOW_FIELD_IDX_ETH_DA,
+	ICE_FLOW_FIELD_IDX_ETH_SA,
+	ICE_FLOW_FIELD_IDX_S_VLAN,
+	ICE_FLOW_FIELD_IDX_C_VLAN,
+	ICE_FLOW_FIELD_IDX_ETH_TYPE,
 	/* L3 */
 	ICE_FLOW_FIELD_IDX_IPV4_SA,
 	ICE_FLOW_FIELD_IDX_IPV4_DA,
@@ -59,6 +69,16 @@ enum ice_flow_field {
 	ICE_FLOW_FIELD_IDX_UDP_DST_PORT,
 	ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT,
 	ICE_FLOW_FIELD_IDX_SCTP_DST_PORT,
+	ICE_FLOW_FIELD_IDX_TCP_FLAGS,
+	/* ARP */
+	ICE_FLOW_FIELD_IDX_ARP_SIP,
+	ICE_FLOW_FIELD_IDX_ARP_DIP,
+	ICE_FLOW_FIELD_IDX_ARP_SHA,
+	ICE_FLOW_FIELD_IDX_ARP_DHA,
+	ICE_FLOW_FIELD_IDX_ARP_OP,
+	/* ICMP */
+	ICE_FLOW_FIELD_IDX_ICMP_TYPE,
+	ICE_FLOW_FIELD_IDX_ICMP_CODE,
 	/* GRE */
 	ICE_FLOW_FIELD_IDX_GRE_KEYID,
 	/* The total number of enums must not exceed 64 */
diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h
index 7f4c1ec1eff2..fac5f15a692c 100644
--- a/drivers/net/ethernet/intel/ice/ice_protocol_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h
@@ -13,6 +13,9 @@
 enum ice_prot_id {
 	ICE_PROT_ID_INVAL	= 0,
 	ICE_PROT_MAC_OF_OR_S	= 1,
+	ICE_PROT_MAC_IL		= 4,
+	ICE_PROT_ETYPE_OL	= 9,
+	ICE_PROT_ETYPE_IL	= 10,
 	ICE_PROT_IPV4_OF_OR_S	= 32,
 	ICE_PROT_IPV4_IL	= 33,
 	ICE_PROT_IPV6_OF_OR_S	= 40,
@@ -22,6 +25,9 @@ enum ice_prot_id {
 	ICE_PROT_UDP_IL_OR_S	= 53,
 	ICE_PROT_GRE_OF		= 64,
 	ICE_PROT_SCTP_IL	= 96,
+	ICE_PROT_ICMP_IL	= 98,
+	ICE_PROT_ICMPV6_IL	= 100,
+	ICE_PROT_ARP_OF		= 118,
 	ICE_PROT_META_ID	= 255, /* when offset == metadata */
 	ICE_PROT_INVALID	= 255  /* when offset == ICE_FV_OFFSET_INVAL */
 };
-- 
cgit v1.2.3


From b199dddbd399536d5470e10e6bfd7d0e1b5fb71a Mon Sep 17 00:00:00 2001
From: Qi Zhang <qi.z.zhang@intel.com>
Date: Tue, 9 Mar 2021 11:07:59 +0800
Subject: ice: Support non word aligned input set field

To support FDIR input set with protocol field like DSCP, TTL,
PROT, etc. which is not word aligned, we need to enable field
vector masking.

Signed-off-by: Dan Nowlin <dan.nowlin@intel.com>
Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
Tested-by: Chen Bo <BoX.C.Chen@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_fdir.c       |  62 +++-
 drivers/net/ethernet/intel/ice/ice_fdir.h       |   8 +
 drivers/net/ethernet/intel/ice/ice_flex_pipe.c  | 422 +++++++++++++++++++++++-
 drivers/net/ethernet/intel/ice/ice_flex_pipe.h  |   2 +-
 drivers/net/ethernet/intel/ice/ice_flex_type.h  |  17 +
 drivers/net/ethernet/intel/ice/ice_flow.c       | 126 +++++--
 drivers/net/ethernet/intel/ice/ice_flow.h       |   7 +
 drivers/net/ethernet/intel/ice/ice_hw_autogen.h |  15 +
 8 files changed, 623 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.c b/drivers/net/ethernet/intel/ice/ice_fdir.c
index 1e77f43b3b3e..0c2066c0ab1f 100644
--- a/drivers/net/ethernet/intel/ice/ice_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.c
@@ -470,6 +470,39 @@ static void ice_pkt_insert_ipv6_addr(u8 *pkt, int offset, __be32 *addr)
 		       sizeof(*addr));
 }
 
+/**
+ * ice_pkt_insert_u8 - insert a u8 value into a memory buffer.
+ * @pkt: packet buffer
+ * @offset: offset into buffer
+ * @data: 8 bit value to convert and insert into pkt at offset
+ */
+static void ice_pkt_insert_u8(u8 *pkt, int offset, u8 data)
+{
+	memcpy(pkt + offset, &data, sizeof(data));
+}
+
+/**
+ * ice_pkt_insert_u8_tc - insert a u8 value into a memory buffer for TC ipv6.
+ * @pkt: packet buffer
+ * @offset: offset into buffer
+ * @data: 8 bit value to convert and insert into pkt at offset
+ *
+ * This function is designed for inserting Traffic Class (TC) for IPv6,
+ * since that TC is not aligned in number of bytes. Here we split it out
+ * into two part and fill each byte with data copy from pkt, then insert
+ * the two bytes data one by one.
+ */
+static void ice_pkt_insert_u8_tc(u8 *pkt, int offset, u8 data)
+{
+	u8 high, low;
+
+	high = (data >> 4) + (*(pkt + offset) & 0xF0);
+	memcpy(pkt + offset, &high, sizeof(high));
+
+	low = (*(pkt + offset + 1) & 0x0F) + ((data & 0x0F) << 4);
+	memcpy(pkt + offset + 1, &low, sizeof(low));
+}
+
 /**
  * ice_pkt_insert_u16 - insert a be16 value into a memory buffer
  * @pkt: packet buffer
@@ -530,11 +563,9 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 		case IPPROTO_SCTP:
 			flow = ICE_FLTR_PTYPE_NONF_IPV4_SCTP;
 			break;
-		case IPPROTO_IP:
+		default:
 			flow = ICE_FLTR_PTYPE_NONF_IPV4_OTHER;
 			break;
-		default:
-			return ICE_ERR_PARAM;
 		}
 	} else if (input->flow_type == ICE_FLTR_PTYPE_NONF_IPV6_OTHER) {
 		switch (input->ip.v6.proto) {
@@ -547,11 +578,9 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 		case IPPROTO_SCTP:
 			flow = ICE_FLTR_PTYPE_NONF_IPV6_SCTP;
 			break;
-		case IPPROTO_IP:
+		default:
 			flow = ICE_FLTR_PTYPE_NONF_IPV6_OTHER;
 			break;
-		default:
-			return ICE_ERR_PARAM;
 		}
 	} else {
 		flow = input->flow_type;
@@ -590,6 +619,8 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 				   input->ip.v4.dst_ip);
 		ice_pkt_insert_u16(loc, ICE_IPV4_TCP_SRC_PORT_OFFSET,
 				   input->ip.v4.dst_port);
+		ice_pkt_insert_u8(loc, ICE_IPV4_TOS_OFFSET, input->ip.v4.tos);
+		ice_pkt_insert_u8(loc, ICE_IPV4_TTL_OFFSET, input->ip.v4.ttl);
 		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
 		if (frag)
 			loc[20] = ICE_FDIR_IPV4_PKT_FLAG_DF;
@@ -603,6 +634,8 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 				   input->ip.v4.dst_ip);
 		ice_pkt_insert_u16(loc, ICE_IPV4_UDP_SRC_PORT_OFFSET,
 				   input->ip.v4.dst_port);
+		ice_pkt_insert_u8(loc, ICE_IPV4_TOS_OFFSET, input->ip.v4.tos);
+		ice_pkt_insert_u8(loc, ICE_IPV4_TTL_OFFSET, input->ip.v4.ttl);
 		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
 		ice_pkt_insert_mac_addr(loc + ETH_ALEN,
 					input->ext_data.src_mac);
@@ -616,6 +649,8 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 				   input->ip.v4.dst_ip);
 		ice_pkt_insert_u16(loc, ICE_IPV4_SCTP_SRC_PORT_OFFSET,
 				   input->ip.v4.dst_port);
+		ice_pkt_insert_u8(loc, ICE_IPV4_TOS_OFFSET, input->ip.v4.tos);
+		ice_pkt_insert_u8(loc, ICE_IPV4_TTL_OFFSET, input->ip.v4.ttl);
 		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
 		break;
 	case ICE_FLTR_PTYPE_NONF_IPV4_OTHER:
@@ -623,7 +658,10 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 				   input->ip.v4.src_ip);
 		ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
 				   input->ip.v4.dst_ip);
-		ice_pkt_insert_u16(loc, ICE_IPV4_PROTO_OFFSET, 0);
+		ice_pkt_insert_u8(loc, ICE_IPV4_TOS_OFFSET, input->ip.v4.tos);
+		ice_pkt_insert_u8(loc, ICE_IPV4_TTL_OFFSET, input->ip.v4.ttl);
+		ice_pkt_insert_u8(loc, ICE_IPV4_PROTO_OFFSET,
+				  input->ip.v4.proto);
 		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
 		break;
 	case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
@@ -635,6 +673,8 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 				   input->ip.v6.src_port);
 		ice_pkt_insert_u16(loc, ICE_IPV6_TCP_SRC_PORT_OFFSET,
 				   input->ip.v6.dst_port);
+		ice_pkt_insert_u8_tc(loc, ICE_IPV6_TC_OFFSET, input->ip.v6.tc);
+		ice_pkt_insert_u8(loc, ICE_IPV6_HLIM_OFFSET, input->ip.v6.hlim);
 		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
 		break;
 	case ICE_FLTR_PTYPE_NONF_IPV6_UDP:
@@ -646,6 +686,8 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 				   input->ip.v6.src_port);
 		ice_pkt_insert_u16(loc, ICE_IPV6_UDP_SRC_PORT_OFFSET,
 				   input->ip.v6.dst_port);
+		ice_pkt_insert_u8_tc(loc, ICE_IPV6_TC_OFFSET, input->ip.v6.tc);
+		ice_pkt_insert_u8(loc, ICE_IPV6_HLIM_OFFSET, input->ip.v6.hlim);
 		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
 		break;
 	case ICE_FLTR_PTYPE_NONF_IPV6_SCTP:
@@ -657,6 +699,8 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 				   input->ip.v6.src_port);
 		ice_pkt_insert_u16(loc, ICE_IPV6_SCTP_SRC_PORT_OFFSET,
 				   input->ip.v6.dst_port);
+		ice_pkt_insert_u8_tc(loc, ICE_IPV6_TC_OFFSET, input->ip.v6.tc);
+		ice_pkt_insert_u8(loc, ICE_IPV6_HLIM_OFFSET, input->ip.v6.hlim);
 		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
 		break;
 	case ICE_FLTR_PTYPE_NONF_IPV6_OTHER:
@@ -664,6 +708,10 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 					 input->ip.v6.src_ip);
 		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_SRC_ADDR_OFFSET,
 					 input->ip.v6.dst_ip);
+		ice_pkt_insert_u8_tc(loc, ICE_IPV6_TC_OFFSET, input->ip.v6.tc);
+		ice_pkt_insert_u8(loc, ICE_IPV6_HLIM_OFFSET, input->ip.v6.hlim);
+		ice_pkt_insert_u8(loc, ICE_IPV6_PROTO_OFFSET,
+				  input->ip.v6.proto);
 		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
 		break;
 	default:
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.h b/drivers/net/ethernet/intel/ice/ice_fdir.h
index 31623545ebae..84b40298a513 100644
--- a/drivers/net/ethernet/intel/ice/ice_fdir.h
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.h
@@ -25,6 +25,12 @@
 #define ICE_IPV6_UDP_DST_PORT_OFFSET	56
 #define ICE_IPV6_SCTP_SRC_PORT_OFFSET	54
 #define ICE_IPV6_SCTP_DST_PORT_OFFSET	56
+#define ICE_IPV4_TOS_OFFSET		15
+#define ICE_IPV4_TTL_OFFSET		22
+#define ICE_IPV6_TC_OFFSET		14
+#define ICE_IPV6_HLIM_OFFSET		21
+#define ICE_IPV6_PROTO_OFFSET		20
+
 /* IP v4 has 2 flag bits that enable fragment processing: DF and MF. DF
  * requests that the packet not be fragmented. MF indicates that a packet has
  * been fragmented.
@@ -86,6 +92,7 @@ struct ice_fdir_v4 {
 	u8 tos;
 	u8 ip_ver;
 	u8 proto;
+	u8 ttl;
 };
 
 #define ICE_IPV6_ADDR_LEN_AS_U32		4
@@ -99,6 +106,7 @@ struct ice_fdir_v6 {
 	__be32 sec_parm_idx; /* security parameter index */
 	u8 tc;
 	u8 proto;
+	u8 hlim;
 };
 
 struct ice_fdir_extra {
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
index 5e1fd30c0a0f..fa2ebd548b1c 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
@@ -2361,18 +2361,82 @@ ice_vsig_add_mv_vsi(struct ice_hw *hw, enum ice_block blk, u16 vsi, u16 vsig)
 }
 
 /**
- * ice_find_prof_id - find profile ID for a given field vector
+ * ice_prof_has_mask_idx - determine if profile index masking is identical
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @prof: profile to check
+ * @idx: profile index to check
+ * @mask: mask to match
+ */
+static bool
+ice_prof_has_mask_idx(struct ice_hw *hw, enum ice_block blk, u8 prof, u16 idx,
+		      u16 mask)
+{
+	bool expect_no_mask = false;
+	bool found = false;
+	bool match = false;
+	u16 i;
+
+	/* If mask is 0x0000 or 0xffff, then there is no masking */
+	if (mask == 0 || mask == 0xffff)
+		expect_no_mask = true;
+
+	/* Scan the enabled masks on this profile, for the specified idx */
+	for (i = hw->blk[blk].masks.first; i < hw->blk[blk].masks.first +
+	     hw->blk[blk].masks.count; i++)
+		if (hw->blk[blk].es.mask_ena[prof] & BIT(i))
+			if (hw->blk[blk].masks.masks[i].in_use &&
+			    hw->blk[blk].masks.masks[i].idx == idx) {
+				found = true;
+				if (hw->blk[blk].masks.masks[i].mask == mask)
+					match = true;
+				break;
+			}
+
+	if (expect_no_mask) {
+		if (found)
+			return false;
+	} else {
+		if (!match)
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * ice_prof_has_mask - determine if profile masking is identical
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @prof: profile to check
+ * @masks: masks to match
+ */
+static bool
+ice_prof_has_mask(struct ice_hw *hw, enum ice_block blk, u8 prof, u16 *masks)
+{
+	u16 i;
+
+	/* es->mask_ena[prof] will have the mask */
+	for (i = 0; i < hw->blk[blk].es.fvw; i++)
+		if (!ice_prof_has_mask_idx(hw, blk, prof, i, masks[i]))
+			return false;
+
+	return true;
+}
+
+/**
+ * ice_find_prof_id_with_mask - find profile ID for a given field vector
  * @hw: pointer to the hardware structure
  * @blk: HW block
  * @fv: field vector to search for
+ * @masks: masks for FV
  * @prof_id: receives the profile ID
  */
 static enum ice_status
-ice_find_prof_id(struct ice_hw *hw, enum ice_block blk,
-		 struct ice_fv_word *fv, u8 *prof_id)
+ice_find_prof_id_with_mask(struct ice_hw *hw, enum ice_block blk,
+			   struct ice_fv_word *fv, u16 *masks, u8 *prof_id)
 {
 	struct ice_es *es = &hw->blk[blk].es;
-	u16 off;
 	u8 i;
 
 	/* For FD, we don't want to re-use a existed profile with the same
@@ -2382,11 +2446,15 @@ ice_find_prof_id(struct ice_hw *hw, enum ice_block blk,
 		return ICE_ERR_DOES_NOT_EXIST;
 
 	for (i = 0; i < (u8)es->count; i++) {
-		off = i * es->fvw;
+		u16 off = i * es->fvw;
 
 		if (memcmp(&es->t[off], fv, es->fvw * sizeof(*fv)))
 			continue;
 
+		/* check if masks settings are the same for this profile */
+		if (masks && !ice_prof_has_mask(hw, blk, i, masks))
+			continue;
+
 		*prof_id = i;
 		return 0;
 	}
@@ -2536,6 +2604,330 @@ ice_prof_inc_ref(struct ice_hw *hw, enum ice_block blk, u8 prof_id)
 	return 0;
 }
 
+/**
+ * ice_write_prof_mask_reg - write profile mask register
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @mask_idx: mask index
+ * @idx: index of the FV which will use the mask
+ * @mask: the 16-bit mask
+ */
+static void
+ice_write_prof_mask_reg(struct ice_hw *hw, enum ice_block blk, u16 mask_idx,
+			u16 idx, u16 mask)
+{
+	u32 offset;
+	u32 val;
+
+	switch (blk) {
+	case ICE_BLK_RSS:
+		offset = GLQF_HMASK(mask_idx);
+		val = (idx << GLQF_HMASK_MSK_INDEX_S) & GLQF_HMASK_MSK_INDEX_M;
+		val |= (mask << GLQF_HMASK_MASK_S) & GLQF_HMASK_MASK_M;
+		break;
+	case ICE_BLK_FD:
+		offset = GLQF_FDMASK(mask_idx);
+		val = (idx << GLQF_FDMASK_MSK_INDEX_S) & GLQF_FDMASK_MSK_INDEX_M;
+		val |= (mask << GLQF_FDMASK_MASK_S) & GLQF_FDMASK_MASK_M;
+		break;
+	default:
+		ice_debug(hw, ICE_DBG_PKG, "No profile masks for block %d\n",
+			  blk);
+		return;
+	}
+
+	wr32(hw, offset, val);
+	ice_debug(hw, ICE_DBG_PKG, "write mask, blk %d (%d): %x = %x\n",
+		  blk, idx, offset, val);
+}
+
+/**
+ * ice_write_prof_mask_enable_res - write profile mask enable register
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @prof_id: profile ID
+ * @enable_mask: enable mask
+ */
+static void
+ice_write_prof_mask_enable_res(struct ice_hw *hw, enum ice_block blk,
+			       u16 prof_id, u32 enable_mask)
+{
+	u32 offset;
+
+	switch (blk) {
+	case ICE_BLK_RSS:
+		offset = GLQF_HMASK_SEL(prof_id);
+		break;
+	case ICE_BLK_FD:
+		offset = GLQF_FDMASK_SEL(prof_id);
+		break;
+	default:
+		ice_debug(hw, ICE_DBG_PKG, "No profile masks for block %d\n",
+			  blk);
+		return;
+	}
+
+	wr32(hw, offset, enable_mask);
+	ice_debug(hw, ICE_DBG_PKG, "write mask enable, blk %d (%d): %x = %x\n",
+		  blk, prof_id, offset, enable_mask);
+}
+
+/**
+ * ice_init_prof_masks - initial prof masks
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ */
+static void ice_init_prof_masks(struct ice_hw *hw, enum ice_block blk)
+{
+	u16 per_pf;
+	u16 i;
+
+	mutex_init(&hw->blk[blk].masks.lock);
+
+	per_pf = ICE_PROF_MASK_COUNT / hw->dev_caps.num_funcs;
+
+	hw->blk[blk].masks.count = per_pf;
+	hw->blk[blk].masks.first = hw->pf_id * per_pf;
+
+	memset(hw->blk[blk].masks.masks, 0, sizeof(hw->blk[blk].masks.masks));
+
+	for (i = hw->blk[blk].masks.first;
+	     i < hw->blk[blk].masks.first + hw->blk[blk].masks.count; i++)
+		ice_write_prof_mask_reg(hw, blk, i, 0, 0);
+}
+
+/**
+ * ice_init_all_prof_masks - initialize all prof masks
+ * @hw: pointer to the HW struct
+ */
+static void ice_init_all_prof_masks(struct ice_hw *hw)
+{
+	ice_init_prof_masks(hw, ICE_BLK_RSS);
+	ice_init_prof_masks(hw, ICE_BLK_FD);
+}
+
+/**
+ * ice_alloc_prof_mask - allocate profile mask
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @idx: index of FV which will use the mask
+ * @mask: the 16-bit mask
+ * @mask_idx: variable to receive the mask index
+ */
+static enum ice_status
+ice_alloc_prof_mask(struct ice_hw *hw, enum ice_block blk, u16 idx, u16 mask,
+		    u16 *mask_idx)
+{
+	bool found_unused = false, found_copy = false;
+	enum ice_status status = ICE_ERR_MAX_LIMIT;
+	u16 unused_idx = 0, copy_idx = 0;
+	u16 i;
+
+	if (blk != ICE_BLK_RSS && blk != ICE_BLK_FD)
+		return ICE_ERR_PARAM;
+
+	mutex_lock(&hw->blk[blk].masks.lock);
+
+	for (i = hw->blk[blk].masks.first;
+	     i < hw->blk[blk].masks.first + hw->blk[blk].masks.count; i++)
+		if (hw->blk[blk].masks.masks[i].in_use) {
+			/* if mask is in use and it exactly duplicates the
+			 * desired mask and index, then in can be reused
+			 */
+			if (hw->blk[blk].masks.masks[i].mask == mask &&
+			    hw->blk[blk].masks.masks[i].idx == idx) {
+				found_copy = true;
+				copy_idx = i;
+				break;
+			}
+		} else {
+			/* save off unused index, but keep searching in case
+			 * there is an exact match later on
+			 */
+			if (!found_unused) {
+				found_unused = true;
+				unused_idx = i;
+			}
+		}
+
+	if (found_copy)
+		i = copy_idx;
+	else if (found_unused)
+		i = unused_idx;
+	else
+		goto err_ice_alloc_prof_mask;
+
+	/* update mask for a new entry */
+	if (found_unused) {
+		hw->blk[blk].masks.masks[i].in_use = true;
+		hw->blk[blk].masks.masks[i].mask = mask;
+		hw->blk[blk].masks.masks[i].idx = idx;
+		hw->blk[blk].masks.masks[i].ref = 0;
+		ice_write_prof_mask_reg(hw, blk, i, idx, mask);
+	}
+
+	hw->blk[blk].masks.masks[i].ref++;
+	*mask_idx = i;
+	status = 0;
+
+err_ice_alloc_prof_mask:
+	mutex_unlock(&hw->blk[blk].masks.lock);
+
+	return status;
+}
+
+/**
+ * ice_free_prof_mask - free profile mask
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @mask_idx: index of mask
+ */
+static enum ice_status
+ice_free_prof_mask(struct ice_hw *hw, enum ice_block blk, u16 mask_idx)
+{
+	if (blk != ICE_BLK_RSS && blk != ICE_BLK_FD)
+		return ICE_ERR_PARAM;
+
+	if (!(mask_idx >= hw->blk[blk].masks.first &&
+	      mask_idx < hw->blk[blk].masks.first + hw->blk[blk].masks.count))
+		return ICE_ERR_DOES_NOT_EXIST;
+
+	mutex_lock(&hw->blk[blk].masks.lock);
+
+	if (!hw->blk[blk].masks.masks[mask_idx].in_use)
+		goto exit_ice_free_prof_mask;
+
+	if (hw->blk[blk].masks.masks[mask_idx].ref > 1) {
+		hw->blk[blk].masks.masks[mask_idx].ref--;
+		goto exit_ice_free_prof_mask;
+	}
+
+	/* remove mask */
+	hw->blk[blk].masks.masks[mask_idx].in_use = false;
+	hw->blk[blk].masks.masks[mask_idx].mask = 0;
+	hw->blk[blk].masks.masks[mask_idx].idx = 0;
+
+	/* update mask as unused entry */
+	ice_debug(hw, ICE_DBG_PKG, "Free mask, blk %d, mask %d\n", blk,
+		  mask_idx);
+	ice_write_prof_mask_reg(hw, blk, mask_idx, 0, 0);
+
+exit_ice_free_prof_mask:
+	mutex_unlock(&hw->blk[blk].masks.lock);
+
+	return 0;
+}
+
+/**
+ * ice_free_prof_masks - free all profile masks for a profile
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @prof_id: profile ID
+ */
+static enum ice_status
+ice_free_prof_masks(struct ice_hw *hw, enum ice_block blk, u16 prof_id)
+{
+	u32 mask_bm;
+	u16 i;
+
+	if (blk != ICE_BLK_RSS && blk != ICE_BLK_FD)
+		return ICE_ERR_PARAM;
+
+	mask_bm = hw->blk[blk].es.mask_ena[prof_id];
+	for (i = 0; i < BITS_PER_BYTE * sizeof(mask_bm); i++)
+		if (mask_bm & BIT(i))
+			ice_free_prof_mask(hw, blk, i);
+
+	return 0;
+}
+
+/**
+ * ice_shutdown_prof_masks - releases lock for masking
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ *
+ * This should be called before unloading the driver
+ */
+static void ice_shutdown_prof_masks(struct ice_hw *hw, enum ice_block blk)
+{
+	u16 i;
+
+	mutex_lock(&hw->blk[blk].masks.lock);
+
+	for (i = hw->blk[blk].masks.first;
+	     i < hw->blk[blk].masks.first + hw->blk[blk].masks.count; i++) {
+		ice_write_prof_mask_reg(hw, blk, i, 0, 0);
+
+		hw->blk[blk].masks.masks[i].in_use = false;
+		hw->blk[blk].masks.masks[i].idx = 0;
+		hw->blk[blk].masks.masks[i].mask = 0;
+	}
+
+	mutex_unlock(&hw->blk[blk].masks.lock);
+	mutex_destroy(&hw->blk[blk].masks.lock);
+}
+
+/**
+ * ice_shutdown_all_prof_masks - releases all locks for masking
+ * @hw: pointer to the HW struct
+ *
+ * This should be called before unloading the driver
+ */
+static void ice_shutdown_all_prof_masks(struct ice_hw *hw)
+{
+	ice_shutdown_prof_masks(hw, ICE_BLK_RSS);
+	ice_shutdown_prof_masks(hw, ICE_BLK_FD);
+}
+
+/**
+ * ice_update_prof_masking - set registers according to masking
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @prof_id: profile ID
+ * @masks: masks
+ */
+static enum ice_status
+ice_update_prof_masking(struct ice_hw *hw, enum ice_block blk, u16 prof_id,
+			u16 *masks)
+{
+	bool err = false;
+	u32 ena_mask = 0;
+	u16 idx;
+	u16 i;
+
+	/* Only support FD and RSS masking, otherwise nothing to be done */
+	if (blk != ICE_BLK_RSS && blk != ICE_BLK_FD)
+		return 0;
+
+	for (i = 0; i < hw->blk[blk].es.fvw; i++)
+		if (masks[i] && masks[i] != 0xFFFF) {
+			if (!ice_alloc_prof_mask(hw, blk, i, masks[i], &idx)) {
+				ena_mask |= BIT(idx);
+			} else {
+				/* not enough bitmaps */
+				err = true;
+				break;
+			}
+		}
+
+	if (err) {
+		/* free any bitmaps we have allocated */
+		for (i = 0; i < BITS_PER_BYTE * sizeof(ena_mask); i++)
+			if (ena_mask & BIT(i))
+				ice_free_prof_mask(hw, blk, i);
+
+		return ICE_ERR_OUT_OF_RANGE;
+	}
+
+	/* enable the masks for this profile */
+	ice_write_prof_mask_enable_res(hw, blk, prof_id, ena_mask);
+
+	/* store enabled masks with profile so that they can be freed later */
+	hw->blk[blk].es.mask_ena[prof_id] = ena_mask;
+
+	return 0;
+}
+
 /**
  * ice_write_es - write an extraction sequence to hardware
  * @hw: pointer to the HW struct
@@ -2575,6 +2967,7 @@ ice_prof_dec_ref(struct ice_hw *hw, enum ice_block blk, u8 prof_id)
 	if (hw->blk[blk].es.ref_count[prof_id] > 0) {
 		if (!--hw->blk[blk].es.ref_count[prof_id]) {
 			ice_write_es(hw, blk, prof_id, NULL);
+			ice_free_prof_masks(hw, blk, prof_id);
 			return ice_free_prof_id(hw, blk, prof_id);
 		}
 	}
@@ -2937,6 +3330,7 @@ void ice_free_hw_tbls(struct ice_hw *hw)
 		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.t);
 		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.ref_count);
 		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.written);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.mask_ena);
 	}
 
 	list_for_each_entry_safe(r, rt, &hw->rss_list_head, l_entry) {
@@ -2944,6 +3338,7 @@ void ice_free_hw_tbls(struct ice_hw *hw)
 		devm_kfree(ice_hw_to_dev(hw), r);
 	}
 	mutex_destroy(&hw->rss_locks);
+	ice_shutdown_all_prof_masks(hw);
 	memset(hw->blk, 0, sizeof(hw->blk));
 }
 
@@ -2997,6 +3392,7 @@ void ice_clear_hw_tbls(struct ice_hw *hw)
 		memset(es->t, 0, es->count * sizeof(*es->t) * es->fvw);
 		memset(es->ref_count, 0, es->count * sizeof(*es->ref_count));
 		memset(es->written, 0, es->count * sizeof(*es->written));
+		memset(es->mask_ena, 0, es->count * sizeof(*es->mask_ena));
 	}
 }
 
@@ -3010,6 +3406,7 @@ enum ice_status ice_init_hw_tbls(struct ice_hw *hw)
 
 	mutex_init(&hw->rss_locks);
 	INIT_LIST_HEAD(&hw->rss_list_head);
+	ice_init_all_prof_masks(hw);
 	for (i = 0; i < ICE_BLK_COUNT; i++) {
 		struct ice_prof_redir *prof_redir = &hw->blk[i].prof_redir;
 		struct ice_prof_tcam *prof = &hw->blk[i].prof;
@@ -3112,6 +3509,11 @@ enum ice_status ice_init_hw_tbls(struct ice_hw *hw)
 					   sizeof(*es->written), GFP_KERNEL);
 		if (!es->written)
 			goto err;
+
+		es->mask_ena = devm_kcalloc(ice_hw_to_dev(hw), es->count,
+					    sizeof(*es->mask_ena), GFP_KERNEL);
+		if (!es->mask_ena)
+			goto err;
 	}
 	return 0;
 
@@ -3718,15 +4120,16 @@ ice_update_fd_swap(struct ice_hw *hw, u16 prof_id, struct ice_fv_word *es)
  * @id: profile tracking ID
  * @ptypes: array of bitmaps indicating ptypes (ICE_FLOW_PTYPE_MAX bits)
  * @es: extraction sequence (length of array is determined by the block)
+ * @masks: mask for extraction sequence
  *
- * This function registers a profile, which matches a set of PTGs with a
+ * This function registers a profile, which matches a set of PTYPES with a
  * particular extraction sequence. While the hardware profile is allocated
  * it will not be written until the first call to ice_add_flow that specifies
  * the ID value used here.
  */
 enum ice_status
 ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
-	     struct ice_fv_word *es)
+	     struct ice_fv_word *es, u16 *masks)
 {
 	u32 bytes = DIV_ROUND_UP(ICE_FLOW_PTYPE_MAX, BITS_PER_BYTE);
 	DECLARE_BITMAP(ptgs_used, ICE_XLT1_CNT);
@@ -3740,7 +4143,7 @@ ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
 	mutex_lock(&hw->blk[blk].es.prof_map_lock);
 
 	/* search for existing profile */
-	status = ice_find_prof_id(hw, blk, es, &prof_id);
+	status = ice_find_prof_id_with_mask(hw, blk, es, masks, &prof_id);
 	if (status) {
 		/* allocate profile ID */
 		status = ice_alloc_prof_id(hw, blk, &prof_id);
@@ -3758,6 +4161,9 @@ ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
 			if (status)
 				goto err_ice_add_prof;
 		}
+		status = ice_update_prof_masking(hw, blk, prof_id, masks);
+		if (status)
+			goto err_ice_add_prof;
 
 		/* and write new es */
 		ice_write_es(hw, blk, prof_id, es);
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
index 20deddb807c5..08c5b4386536 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
@@ -27,7 +27,7 @@ int ice_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table,
 
 enum ice_status
 ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
-	     struct ice_fv_word *es);
+	     struct ice_fv_word *es, u16 *masks);
 enum ice_status
 ice_add_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl);
 enum ice_status
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_type.h b/drivers/net/ethernet/intel/ice/ice_flex_type.h
index 24063c1351b2..eeb53c3917dd 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_flex_type.h
@@ -335,6 +335,7 @@ struct ice_es {
 	u16 count;
 	u16 fvw;
 	u16 *ref_count;
+	u32 *mask_ena;
 	struct list_head prof_map;
 	struct ice_fv_word *t;
 	struct mutex prof_map_lock;	/* protect access to profiles list */
@@ -478,6 +479,21 @@ struct ice_prof_redir {
 	u16 count;
 };
 
+struct ice_mask {
+	u16 mask;	/* 16-bit mask */
+	u16 idx;	/* index */
+	u16 ref;	/* reference count */
+	u8 in_use;	/* non-zero if used */
+};
+
+struct ice_masks {
+	struct mutex lock; /* lock to protect this structure */
+	u16 first;	/* first mask owned by the PF */
+	u16 count;	/* number of masks owned by the PF */
+#define ICE_PROF_MASK_COUNT 32
+	struct ice_mask masks[ICE_PROF_MASK_COUNT];
+};
+
 /* Tables per block */
 struct ice_blk_info {
 	struct ice_xlt1 xlt1;
@@ -485,6 +501,7 @@ struct ice_blk_info {
 	struct ice_prof_tcam prof;
 	struct ice_prof_redir prof_redir;
 	struct ice_es es;
+	struct ice_masks masks;
 	u8 overwrite; /* set to true to allow overwrite of table entries */
 	u8 is_list_init;
 };
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c
index b2baa478edcc..39744773a403 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.c
+++ b/drivers/net/ethernet/intel/ice/ice_flow.c
@@ -9,12 +9,21 @@ struct ice_flow_field_info {
 	enum ice_flow_seg_hdr hdr;
 	s16 off;	/* Offset from start of a protocol header, in bits */
 	u16 size;	/* Size of fields in bits */
+	u16 mask;	/* 16-bit mask for field */
 };
 
 #define ICE_FLOW_FLD_INFO(_hdr, _offset_bytes, _size_bytes) { \
 	.hdr = _hdr, \
 	.off = (_offset_bytes) * BITS_PER_BYTE, \
 	.size = (_size_bytes) * BITS_PER_BYTE, \
+	.mask = 0, \
+}
+
+#define ICE_FLOW_FLD_INFO_MSK(_hdr, _offset_bytes, _size_bytes, _mask) { \
+	.hdr = _hdr, \
+	.off = (_offset_bytes) * BITS_PER_BYTE, \
+	.size = (_size_bytes) * BITS_PER_BYTE, \
+	.mask = _mask, \
 }
 
 /* Table containing properties of supported protocol header fields */
@@ -32,6 +41,18 @@ struct ice_flow_field_info ice_flds_info[ICE_FLOW_FIELD_IDX_MAX] = {
 	/* ICE_FLOW_FIELD_IDX_ETH_TYPE */
 	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ETH, 0, sizeof(__be16)),
 	/* IPv4 / IPv6 */
+	/* ICE_FLOW_FIELD_IDX_IPV4_DSCP */
+	ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_IPV4, 0, 1, 0x00fc),
+	/* ICE_FLOW_FIELD_IDX_IPV6_DSCP */
+	ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_IPV6, 0, 1, 0x0ff0),
+	/* ICE_FLOW_FIELD_IDX_IPV4_TTL */
+	ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_NONE, 8, 1, 0xff00),
+	/* ICE_FLOW_FIELD_IDX_IPV4_PROT */
+	ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_NONE, 8, 1, 0x00ff),
+	/* ICE_FLOW_FIELD_IDX_IPV6_TTL */
+	ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_NONE, 6, 1, 0x00ff),
+	/* ICE_FLOW_FIELD_IDX_IPV6_PROT */
+	ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_NONE, 6, 1, 0xff00),
 	/* ICE_FLOW_FIELD_IDX_IPV4_SA */
 	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV4, 12, sizeof(struct in_addr)),
 	/* ICE_FLOW_FIELD_IDX_IPV4_DA */
@@ -308,6 +329,8 @@ struct ice_flow_prof_params {
 	 * This will give us the direction flags.
 	 */
 	struct ice_fv_word es[ICE_MAX_FV_WORDS];
+
+	u16 mask[ICE_MAX_FV_WORDS];
 	DECLARE_BITMAP(ptypes, ICE_FLOW_PTYPE_MAX);
 };
 
@@ -492,6 +515,7 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params)
  * @params: information about the flow to be processed
  * @seg: packet segment index of the field to be extracted
  * @fld: ID of field to be extracted
+ * @match: bit field of all fields
  *
  * This function determines the protocol ID, offset, and size of the given
  * field. It then allocates one or more extraction sequence entries for the
@@ -499,12 +523,15 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params)
  */
 static enum ice_status
 ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params,
-		    u8 seg, enum ice_flow_field fld)
+		    u8 seg, enum ice_flow_field fld, u64 match)
 {
+	enum ice_flow_field sib = ICE_FLOW_FIELD_IDX_MAX;
 	enum ice_prot_id prot_id = ICE_PROT_ID_INVAL;
 	u8 fv_words = hw->blk[params->blk].es.fvw;
 	struct ice_flow_fld_info *flds;
 	u16 cnt, ese_bits, i;
+	u16 sib_mask = 0;
+	u16 mask;
 	u16 off;
 
 	flds = params->prof->segs[seg].fields;
@@ -519,6 +546,50 @@ ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params,
 	case ICE_FLOW_FIELD_IDX_ETH_TYPE:
 		prot_id = seg == 0 ? ICE_PROT_ETYPE_OL : ICE_PROT_ETYPE_IL;
 		break;
+	case ICE_FLOW_FIELD_IDX_IPV4_DSCP:
+		prot_id = seg == 0 ? ICE_PROT_IPV4_OF_OR_S : ICE_PROT_IPV4_IL;
+		break;
+	case ICE_FLOW_FIELD_IDX_IPV6_DSCP:
+		prot_id = seg == 0 ? ICE_PROT_IPV6_OF_OR_S : ICE_PROT_IPV6_IL;
+		break;
+	case ICE_FLOW_FIELD_IDX_IPV4_TTL:
+	case ICE_FLOW_FIELD_IDX_IPV4_PROT:
+		prot_id = seg == 0 ? ICE_PROT_IPV4_OF_OR_S : ICE_PROT_IPV4_IL;
+
+		/* TTL and PROT share the same extraction seq. entry.
+		 * Each is considered a sibling to the other in terms of sharing
+		 * the same extraction sequence entry.
+		 */
+		if (fld == ICE_FLOW_FIELD_IDX_IPV4_TTL)
+			sib = ICE_FLOW_FIELD_IDX_IPV4_PROT;
+		else if (fld == ICE_FLOW_FIELD_IDX_IPV4_PROT)
+			sib = ICE_FLOW_FIELD_IDX_IPV4_TTL;
+
+		/* If the sibling field is also included, that field's
+		 * mask needs to be included.
+		 */
+		if (match & BIT(sib))
+			sib_mask = ice_flds_info[sib].mask;
+		break;
+	case ICE_FLOW_FIELD_IDX_IPV6_TTL:
+	case ICE_FLOW_FIELD_IDX_IPV6_PROT:
+		prot_id = seg == 0 ? ICE_PROT_IPV6_OF_OR_S : ICE_PROT_IPV6_IL;
+
+		/* TTL and PROT share the same extraction seq. entry.
+		 * Each is considered a sibling to the other in terms of sharing
+		 * the same extraction sequence entry.
+		 */
+		if (fld == ICE_FLOW_FIELD_IDX_IPV6_TTL)
+			sib = ICE_FLOW_FIELD_IDX_IPV6_PROT;
+		else if (fld == ICE_FLOW_FIELD_IDX_IPV6_PROT)
+			sib = ICE_FLOW_FIELD_IDX_IPV6_TTL;
+
+		/* If the sibling field is also included, that field's
+		 * mask needs to be included.
+		 */
+		if (match & BIT(sib))
+			sib_mask = ice_flds_info[sib].mask;
+		break;
 	case ICE_FLOW_FIELD_IDX_IPV4_SA:
 	case ICE_FLOW_FIELD_IDX_IPV4_DA:
 		prot_id = seg == 0 ? ICE_PROT_IPV4_OF_OR_S : ICE_PROT_IPV4_IL;
@@ -552,6 +623,9 @@ ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params,
 		/* ICMP type and code share the same extraction seq. entry */
 		prot_id = (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_IPV4) ?
 				ICE_PROT_ICMP_IL : ICE_PROT_ICMPV6_IL;
+		sib = fld == ICE_FLOW_FIELD_IDX_ICMP_TYPE ?
+			ICE_FLOW_FIELD_IDX_ICMP_CODE :
+			ICE_FLOW_FIELD_IDX_ICMP_TYPE;
 		break;
 	case ICE_FLOW_FIELD_IDX_GRE_KEYID:
 		prot_id = ICE_PROT_GRE_OF;
@@ -570,6 +644,7 @@ ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params,
 		ICE_FLOW_FV_EXTRACT_SZ;
 	flds[fld].xtrct.disp = (u8)(ice_flds_info[fld].off % ese_bits);
 	flds[fld].xtrct.idx = params->es_cnt;
+	flds[fld].xtrct.mask = ice_flds_info[fld].mask;
 
 	/* Adjust the next field-entry index after accommodating the number of
 	 * entries this field consumes
@@ -579,24 +654,34 @@ ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params,
 
 	/* Fill in the extraction sequence entries needed for this field */
 	off = flds[fld].xtrct.off;
+	mask = flds[fld].xtrct.mask;
 	for (i = 0; i < cnt; i++) {
-		u8 idx;
-
-		/* Make sure the number of extraction sequence required
-		 * does not exceed the block's capability
+		/* Only consume an extraction sequence entry if there is no
+		 * sibling field associated with this field or the sibling entry
+		 * already extracts the word shared with this field.
 		 */
-		if (params->es_cnt >= fv_words)
-			return ICE_ERR_MAX_LIMIT;
+		if (sib == ICE_FLOW_FIELD_IDX_MAX ||
+		    flds[sib].xtrct.prot_id == ICE_PROT_ID_INVAL ||
+		    flds[sib].xtrct.off != off) {
+			u8 idx;
 
-		/* some blocks require a reversed field vector layout */
-		if (hw->blk[params->blk].es.reverse)
-			idx = fv_words - params->es_cnt - 1;
-		else
-			idx = params->es_cnt;
+			/* Make sure the number of extraction sequence required
+			 * does not exceed the block's capability
+			 */
+			if (params->es_cnt >= fv_words)
+				return ICE_ERR_MAX_LIMIT;
 
-		params->es[idx].prot_id = prot_id;
-		params->es[idx].off = off;
-		params->es_cnt++;
+			/* some blocks require a reversed field vector layout */
+			if (hw->blk[params->blk].es.reverse)
+				idx = fv_words - params->es_cnt - 1;
+			else
+				idx = params->es_cnt;
+
+			params->es[idx].prot_id = prot_id;
+			params->es[idx].off = off;
+			params->mask[idx] = mask | sib_mask;
+			params->es_cnt++;
+		}
 
 		off += ICE_FLOW_FV_EXTRACT_SZ;
 	}
@@ -696,14 +781,15 @@ ice_flow_create_xtrct_seq(struct ice_hw *hw,
 	u8 i;
 
 	for (i = 0; i < prof->segs_cnt; i++) {
-		u8 j;
+		u64 match = params->prof->segs[i].match;
+		enum ice_flow_field j;
 
-		for_each_set_bit(j, (unsigned long *)&prof->segs[i].match,
+		for_each_set_bit(j, (unsigned long *)&match,
 				 ICE_FLOW_FIELD_IDX_MAX) {
-			status = ice_flow_xtract_fld(hw, params, i,
-						     (enum ice_flow_field)j);
+			status = ice_flow_xtract_fld(hw, params, i, j, match);
 			if (status)
 				return status;
+			clear_bit(j, (unsigned long *)&match);
 		}
 
 		/* Process raw matching bytes */
@@ -914,7 +1000,7 @@ ice_flow_add_prof_sync(struct ice_hw *hw, enum ice_block blk,
 
 	/* Add a HW profile for this flow profile */
 	status = ice_add_prof(hw, blk, prof_id, (u8 *)params->ptypes,
-			      params->es);
+			      params->es, params->mask);
 	if (status) {
 		ice_debug(hw, ICE_DBG_FLOW, "Error adding a HW flow profile\n");
 		goto out;
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.h b/drivers/net/ethernet/intel/ice/ice_flow.h
index d92dcc791ef4..612ea3be21a9 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.h
+++ b/drivers/net/ethernet/intel/ice/ice_flow.h
@@ -58,6 +58,12 @@ enum ice_flow_field {
 	ICE_FLOW_FIELD_IDX_C_VLAN,
 	ICE_FLOW_FIELD_IDX_ETH_TYPE,
 	/* L3 */
+	ICE_FLOW_FIELD_IDX_IPV4_DSCP,
+	ICE_FLOW_FIELD_IDX_IPV6_DSCP,
+	ICE_FLOW_FIELD_IDX_IPV4_TTL,
+	ICE_FLOW_FIELD_IDX_IPV4_PROT,
+	ICE_FLOW_FIELD_IDX_IPV6_TTL,
+	ICE_FLOW_FIELD_IDX_IPV6_PROT,
 	ICE_FLOW_FIELD_IDX_IPV4_SA,
 	ICE_FLOW_FIELD_IDX_IPV4_DA,
 	ICE_FLOW_FIELD_IDX_IPV6_SA,
@@ -158,6 +164,7 @@ struct ice_flow_seg_xtrct {
 	u16 off;	/* Starting offset of the field in header in bytes */
 	u8 idx;		/* Index of FV entry used */
 	u8 disp;	/* Displacement of field in bits fr. FV entry's start */
+	u16 mask;	/* Mask for field */
 };
 
 enum ice_flow_fld_match_type {
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 093a1818a392..a778e373eff1 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -306,8 +306,23 @@
 #define GLQF_FD_SIZE_FD_BSIZE_S			16
 #define GLQF_FD_SIZE_FD_BSIZE_M			ICE_M(0x7FFF, 16)
 #define GLQF_FDINSET(_i, _j)			(0x00412000 + ((_i) * 4 + (_j) * 512))
+#define GLQF_FDMASK(_i)				(0x00410800 + ((_i) * 4))
+#define GLQF_FDMASK_MAX_INDEX			31
+#define GLQF_FDMASK_MSK_INDEX_S			0
+#define GLQF_FDMASK_MSK_INDEX_M			ICE_M(0x1F, 0)
+#define GLQF_FDMASK_MASK_S			16
+#define GLQF_FDMASK_MASK_M			ICE_M(0xFFFF, 16)
 #define GLQF_FDMASK_SEL(_i)			(0x00410400 + ((_i) * 4))
 #define GLQF_FDSWAP(_i, _j)			(0x00413000 + ((_i) * 4 + (_j) * 512))
+#define GLQF_HMASK(_i)				(0x0040FC00 + ((_i) * 4))
+#define GLQF_HMASK_MAX_INDEX			31
+#define GLQF_HMASK_MSK_INDEX_S			0
+#define GLQF_HMASK_MSK_INDEX_M			ICE_M(0x1F, 0)
+#define GLQF_HMASK_MASK_S			16
+#define GLQF_HMASK_MASK_M			ICE_M(0xFFFF, 16)
+#define GLQF_HMASK_SEL(_i)			(0x00410000 + ((_i) * 4))
+#define GLQF_HMASK_SEL_MAX_INDEX		127
+#define GLQF_HMASK_SEL_MASK_SEL_S		0
 #define PFQF_FD_ENA				0x0043A000
 #define PFQF_FD_ENA_FD_ENA_M			BIT(0)
 #define PFQF_FD_SIZE				0x00460100
-- 
cgit v1.2.3


From 0577313e53887493232206f1afe2a6584fa5e585 Mon Sep 17 00:00:00 2001
From: Qi Zhang <qi.z.zhang@intel.com>
Date: Tue, 9 Mar 2021 11:08:00 +0800
Subject: ice: Add more advanced protocol support in flow filter

Add more protocol support in flow filter, these
include PPPoE, L2TPv3, GTP, PFCP, ESP and AH.

Signed-off-by: Ting Xu <ting.xu@intel.com>
Signed-off-by: Yahui Cao <yahui.cao@intel.com>
Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
Tested-by: Chen Bo <BoX.C.Chen@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_flow.c          | 271 ++++++++++++++++++++-
 drivers/net/ethernet/intel/ice/ice_flow.h          | 128 +++++++++-
 drivers/net/ethernet/intel/ice/ice_protocol_type.h |   4 +
 3 files changed, 397 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c
index 39744773a403..fddf5c24930b 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.c
+++ b/drivers/net/ethernet/intel/ice/ice_flow.c
@@ -96,6 +96,38 @@ struct ice_flow_field_info ice_flds_info[ICE_FLOW_FIELD_IDX_MAX] = {
 	/* ICE_FLOW_FIELD_IDX_GRE_KEYID */
 	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GRE, 12,
 			  sizeof_field(struct gre_full_hdr, key)),
+	/* GTP */
+	/* ICE_FLOW_FIELD_IDX_GTPC_TEID */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPC_TEID, 12, sizeof(__be32)),
+	/* ICE_FLOW_FIELD_IDX_GTPU_IP_TEID */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_IP, 12, sizeof(__be32)),
+	/* ICE_FLOW_FIELD_IDX_GTPU_EH_TEID */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_EH, 12, sizeof(__be32)),
+	/* ICE_FLOW_FIELD_IDX_GTPU_EH_QFI */
+	ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_GTPU_EH, 22, sizeof(__be16),
+			      0x3f00),
+	/* ICE_FLOW_FIELD_IDX_GTPU_UP_TEID */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_UP, 12, sizeof(__be32)),
+	/* ICE_FLOW_FIELD_IDX_GTPU_DWN_TEID */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_DWN, 12, sizeof(__be32)),
+	/* PPPoE */
+	/* ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_PPPOE, 2, sizeof(__be16)),
+	/* PFCP */
+	/* ICE_FLOW_FIELD_IDX_PFCP_SEID */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_PFCP_SESSION, 12, sizeof(__be64)),
+	/* L2TPv3 */
+	/* ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_L2TPV3, 0, sizeof(__be32)),
+	/* ESP */
+	/* ICE_FLOW_FIELD_IDX_ESP_SPI */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ESP, 0, sizeof(__be32)),
+	/* AH */
+	/* ICE_FLOW_FIELD_IDX_AH_SPI */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_AH, 4, sizeof(__be32)),
+	/* NAT_T_ESP */
+	/* ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_NAT_T_ESP, 8, sizeof(__be32)),
 };
 
 /* Bitmaps indicating relevant packet types for a particular protocol header
@@ -128,6 +160,8 @@ static const u32 ice_ptypes_macvlan_il[] = {
 /* Packet types for packets with an Outer/First/Single IPv4 header */
 static const u32 ice_ptypes_ipv4_ofos[] = {
 	0x1DC00000, 0x04000800, 0x00000000, 0x00000000,
+	0x00000000, 0x00000155, 0x00000000, 0x00000000,
+	0x00000000, 0x000FC000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
@@ -141,7 +175,7 @@ static const u32 ice_ptypes_ipv4_ofos[] = {
 static const u32 ice_ptypes_ipv4_il[] = {
 	0xE0000000, 0xB807700E, 0x80000003, 0xE01DC03B,
 	0x0000000E, 0x00000000, 0x00000000, 0x00000000,
-	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x001FF800, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
@@ -152,6 +186,8 @@ static const u32 ice_ptypes_ipv4_il[] = {
 /* Packet types for packets with an Outer/First/Single IPv6 header */
 static const u32 ice_ptypes_ipv6_ofos[] = {
 	0x00000000, 0x00000000, 0x77000000, 0x10002000,
+	0x00000000, 0x000002AA, 0x00000000, 0x00000000,
+	0x00000000, 0x03F00000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
@@ -165,7 +201,7 @@ static const u32 ice_ptypes_ipv6_ofos[] = {
 static const u32 ice_ptypes_ipv6_il[] = {
 	0x00000000, 0x03B80770, 0x000001DC, 0x0EE00000,
 	0x00000770, 0x00000000, 0x00000000, 0x00000000,
-	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x7FE00000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
@@ -239,7 +275,7 @@ static const u32 ice_ipv6_il_no_l4[] = {
 static const u32 ice_ptypes_udp_il[] = {
 	0x81000000, 0x20204040, 0x04000010, 0x80810102,
 	0x00000040, 0x00000000, 0x00000000, 0x00000000,
-	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00410000, 0x90842000, 0x00000007,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
@@ -251,7 +287,7 @@ static const u32 ice_ptypes_udp_il[] = {
 static const u32 ice_ptypes_tcp_il[] = {
 	0x04000000, 0x80810102, 0x10000040, 0x02040408,
 	0x00000102, 0x00000000, 0x00000000, 0x00000000,
-	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00820000, 0x21084000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
@@ -263,6 +299,7 @@ static const u32 ice_ptypes_tcp_il[] = {
 static const u32 ice_ptypes_sctp_il[] = {
 	0x08000000, 0x01020204, 0x20000081, 0x04080810,
 	0x00000204, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x01040000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
@@ -318,6 +355,125 @@ static const u32 ice_ptypes_mac_il[] = {
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 };
 
+/* Packet types for GTPC */
+static const u32 ice_ptypes_gtpc[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000180, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for GTPC with TEID */
+static const u32 ice_ptypes_gtpc_tid[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000060, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+static const u32 ice_ptypes_gtpu[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x7FFFFE00, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for PPPoE */
+static const u32 ice_ptypes_pppoe[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x03ffe000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with PFCP NODE header */
+static const u32 ice_ptypes_pfcp_node[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x80000000, 0x00000002,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with PFCP SESSION header */
+static const u32 ice_ptypes_pfcp_session[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000005,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for L2TPv3 */
+static const u32 ice_ptypes_l2tpv3[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000300,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for ESP */
+static const u32 ice_ptypes_esp[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000003, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for AH */
+static const u32 ice_ptypes_ah[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x0000000C, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with NAT_T ESP header */
+static const u32 ice_ptypes_nat_t_esp[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000030, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
 /* Manage parameters and info. used during the creation of a flow profile */
 struct ice_flow_prof_params {
 	enum ice_block blk;
@@ -334,6 +490,15 @@ struct ice_flow_prof_params {
 	DECLARE_BITMAP(ptypes, ICE_FLOW_PTYPE_MAX);
 };
 
+#define ICE_FLOW_RSS_HDRS_INNER_MASK \
+	(ICE_FLOW_SEG_HDR_PPPOE | ICE_FLOW_SEG_HDR_GTPC | \
+	ICE_FLOW_SEG_HDR_GTPC_TEID | ICE_FLOW_SEG_HDR_GTPU | \
+	ICE_FLOW_SEG_HDR_PFCP_SESSION | ICE_FLOW_SEG_HDR_L2TPV3 | \
+	ICE_FLOW_SEG_HDR_ESP | ICE_FLOW_SEG_HDR_AH | \
+	ICE_FLOW_SEG_HDR_NAT_T_ESP)
+
+#define ICE_FLOW_SEG_HDRS_L2_MASK	\
+	(ICE_FLOW_SEG_HDR_ETH | ICE_FLOW_SEG_HDR_VLAN)
 #define ICE_FLOW_SEG_HDRS_L3_MASK	\
 	(ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_ARP)
 #define ICE_FLOW_SEG_HDRS_L4_MASK	\
@@ -478,6 +643,16 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params)
 				   ICE_FLOW_PTYPE_MAX);
 		}
 
+		if (hdrs & ICE_FLOW_SEG_HDR_PPPOE) {
+			src = (const unsigned long *)ice_ptypes_pppoe;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else {
+			src = (const unsigned long *)ice_ptypes_pppoe;
+			bitmap_andnot(params->ptypes, params->ptypes, src,
+				      ICE_FLOW_PTYPE_MAX);
+		}
+
 		if (hdrs & ICE_FLOW_SEG_HDR_UDP) {
 			src = (const unsigned long *)ice_ptypes_udp_il;
 			bitmap_and(params->ptypes, params->ptypes, src,
@@ -503,6 +678,64 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params)
 				bitmap_and(params->ptypes, params->ptypes,
 					   src, ICE_FLOW_PTYPE_MAX);
 			}
+		} else if (hdrs & ICE_FLOW_SEG_HDR_GTPC) {
+			src = (const unsigned long *)ice_ptypes_gtpc;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_GTPC_TEID) {
+			src = (const unsigned long *)ice_ptypes_gtpc_tid;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_DWN) {
+			src = (const unsigned long *)ice_ptypes_gtpu;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_UP) {
+			src = (const unsigned long *)ice_ptypes_gtpu;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_EH) {
+			src = (const unsigned long *)ice_ptypes_gtpu;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_IP) {
+			src = (const unsigned long *)ice_ptypes_gtpu;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_L2TPV3) {
+			src = (const unsigned long *)ice_ptypes_l2tpv3;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_ESP) {
+			src = (const unsigned long *)ice_ptypes_esp;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_AH) {
+			src = (const unsigned long *)ice_ptypes_ah;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_NAT_T_ESP) {
+			src = (const unsigned long *)ice_ptypes_nat_t_esp;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		}
+
+		if (hdrs & ICE_FLOW_SEG_HDR_PFCP) {
+			if (hdrs & ICE_FLOW_SEG_HDR_PFCP_NODE)
+				src = (const unsigned long *)ice_ptypes_pfcp_node;
+			else
+				src = (const unsigned long *)ice_ptypes_pfcp_session;
+
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else {
+			src = (const unsigned long *)ice_ptypes_pfcp_node;
+			bitmap_andnot(params->ptypes, params->ptypes, src,
+				      ICE_FLOW_PTYPE_MAX);
+
+			src = (const unsigned long *)ice_ptypes_pfcp_session;
+			bitmap_andnot(params->ptypes, params->ptypes, src,
+				      ICE_FLOW_PTYPE_MAX);
 		}
 	}
 
@@ -611,6 +844,33 @@ ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params,
 	case ICE_FLOW_FIELD_IDX_SCTP_DST_PORT:
 		prot_id = ICE_PROT_SCTP_IL;
 		break;
+	case ICE_FLOW_FIELD_IDX_GTPC_TEID:
+	case ICE_FLOW_FIELD_IDX_GTPU_IP_TEID:
+	case ICE_FLOW_FIELD_IDX_GTPU_UP_TEID:
+	case ICE_FLOW_FIELD_IDX_GTPU_DWN_TEID:
+	case ICE_FLOW_FIELD_IDX_GTPU_EH_TEID:
+	case ICE_FLOW_FIELD_IDX_GTPU_EH_QFI:
+		/* GTP is accessed through UDP OF protocol */
+		prot_id = ICE_PROT_UDP_OF;
+		break;
+	case ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID:
+		prot_id = ICE_PROT_PPPOE;
+		break;
+	case ICE_FLOW_FIELD_IDX_PFCP_SEID:
+		prot_id = ICE_PROT_UDP_IL_OR_S;
+		break;
+	case ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID:
+		prot_id = ICE_PROT_L2TPV3;
+		break;
+	case ICE_FLOW_FIELD_IDX_ESP_SPI:
+		prot_id = ICE_PROT_ESP_F;
+		break;
+	case ICE_FLOW_FIELD_IDX_AH_SPI:
+		prot_id = ICE_PROT_ESP_2;
+		break;
+	case ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI:
+		prot_id = ICE_PROT_UDP_IL_OR_S;
+		break;
 	case ICE_FLOW_FIELD_IDX_ARP_SIP:
 	case ICE_FLOW_FIELD_IDX_ARP_DIP:
 	case ICE_FLOW_FIELD_IDX_ARP_SHA:
@@ -1446,7 +1706,8 @@ ice_flow_set_rss_seg_info(struct ice_flow_seg_info *segs, u64 hash_fields,
 
 	ICE_FLOW_SET_HDRS(segs, flow_hdr);
 
-	if (segs->hdrs & ~ICE_FLOW_RSS_SEG_HDR_VAL_MASKS)
+	if (segs->hdrs & ~ICE_FLOW_RSS_SEG_HDR_VAL_MASKS &
+	    ~ICE_FLOW_RSS_HDRS_INNER_MASK)
 		return ICE_ERR_PARAM;
 
 	val = (u64)(segs->hdrs & ICE_FLOW_RSS_SEG_HDR_L3_MASKS);
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.h b/drivers/net/ethernet/intel/ice/ice_flow.h
index 612ea3be21a9..feda2ffd47ae 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.h
+++ b/drivers/net/ethernet/intel/ice/ice_flow.h
@@ -30,6 +30,80 @@
 #define ICE_HASH_UDP_IPV4	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_UDP_PORT)
 #define ICE_HASH_UDP_IPV6	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_UDP_PORT)
 
+#define ICE_FLOW_HASH_GTP_TEID \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_GTPC_TEID))
+
+#define ICE_FLOW_HASH_GTP_IPV4_TEID \
+	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_GTP_TEID)
+#define ICE_FLOW_HASH_GTP_IPV6_TEID \
+	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_GTP_TEID)
+
+#define ICE_FLOW_HASH_GTP_U_TEID \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_IP_TEID))
+
+#define ICE_FLOW_HASH_GTP_U_IPV4_TEID \
+	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_GTP_U_TEID)
+#define ICE_FLOW_HASH_GTP_U_IPV6_TEID \
+	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_GTP_U_TEID)
+
+#define ICE_FLOW_HASH_GTP_U_EH_TEID \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_EH_TEID))
+
+#define ICE_FLOW_HASH_GTP_U_EH_QFI \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_EH_QFI))
+
+#define ICE_FLOW_HASH_GTP_U_IPV4_EH \
+	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_GTP_U_EH_TEID | \
+	 ICE_FLOW_HASH_GTP_U_EH_QFI)
+#define ICE_FLOW_HASH_GTP_U_IPV6_EH \
+	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_GTP_U_EH_TEID | \
+	 ICE_FLOW_HASH_GTP_U_EH_QFI)
+
+#define ICE_FLOW_HASH_PPPOE_SESS_ID \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID))
+
+#define ICE_FLOW_HASH_PPPOE_SESS_ID_ETH \
+	(ICE_FLOW_HASH_ETH | ICE_FLOW_HASH_PPPOE_SESS_ID)
+#define ICE_FLOW_HASH_PPPOE_TCP_ID \
+	(ICE_FLOW_HASH_TCP_PORT | ICE_FLOW_HASH_PPPOE_SESS_ID)
+#define ICE_FLOW_HASH_PPPOE_UDP_ID \
+	(ICE_FLOW_HASH_UDP_PORT | ICE_FLOW_HASH_PPPOE_SESS_ID)
+
+#define ICE_FLOW_HASH_PFCP_SEID \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_PFCP_SEID))
+#define ICE_FLOW_HASH_PFCP_IPV4_SEID \
+	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_PFCP_SEID)
+#define ICE_FLOW_HASH_PFCP_IPV6_SEID \
+	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_PFCP_SEID)
+
+#define ICE_FLOW_HASH_L2TPV3_SESS_ID \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID))
+#define ICE_FLOW_HASH_L2TPV3_IPV4_SESS_ID \
+	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_L2TPV3_SESS_ID)
+#define ICE_FLOW_HASH_L2TPV3_IPV6_SESS_ID \
+	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_L2TPV3_SESS_ID)
+
+#define ICE_FLOW_HASH_ESP_SPI \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_ESP_SPI))
+#define ICE_FLOW_HASH_ESP_IPV4_SPI \
+	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_ESP_SPI)
+#define ICE_FLOW_HASH_ESP_IPV6_SPI \
+	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_ESP_SPI)
+
+#define ICE_FLOW_HASH_AH_SPI \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_AH_SPI))
+#define ICE_FLOW_HASH_AH_IPV4_SPI \
+	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_AH_SPI)
+#define ICE_FLOW_HASH_AH_IPV6_SPI \
+	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_AH_SPI)
+
+#define ICE_FLOW_HASH_NAT_T_ESP_SPI \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI))
+#define ICE_FLOW_HASH_NAT_T_ESP_IPV4_SPI \
+	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_NAT_T_ESP_SPI)
+#define ICE_FLOW_HASH_NAT_T_ESP_IPV6_SPI \
+	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_NAT_T_ESP_SPI)
+
 /* Protocol header fields within a packet segment. A segment consists of one or
  * more protocol headers that make up a logical group of protocol headers. Each
  * logical group of protocol headers encapsulates or is encapsulated using/by
@@ -48,8 +122,37 @@ enum ice_flow_seg_hdr {
 	ICE_FLOW_SEG_HDR_UDP		= 0x00000080,
 	ICE_FLOW_SEG_HDR_SCTP		= 0x00000100,
 	ICE_FLOW_SEG_HDR_GRE		= 0x00000200,
+	ICE_FLOW_SEG_HDR_GTPC		= 0x00000400,
+	ICE_FLOW_SEG_HDR_GTPC_TEID	= 0x00000800,
+	ICE_FLOW_SEG_HDR_GTPU_IP	= 0x00001000,
+	ICE_FLOW_SEG_HDR_GTPU_EH	= 0x00002000,
+	ICE_FLOW_SEG_HDR_GTPU_DWN	= 0x00004000,
+	ICE_FLOW_SEG_HDR_GTPU_UP	= 0x00008000,
+	ICE_FLOW_SEG_HDR_PPPOE		= 0x00010000,
+	ICE_FLOW_SEG_HDR_PFCP_NODE	= 0x00020000,
+	ICE_FLOW_SEG_HDR_PFCP_SESSION	= 0x00040000,
+	ICE_FLOW_SEG_HDR_L2TPV3		= 0x00080000,
+	ICE_FLOW_SEG_HDR_ESP		= 0x00100000,
+	ICE_FLOW_SEG_HDR_AH		= 0x00200000,
+	ICE_FLOW_SEG_HDR_NAT_T_ESP	= 0x00400000,
 };
 
+/* These segments all have the same PTYPES, but are otherwise distinguished by
+ * the value of the gtp_eh_pdu and gtp_eh_pdu_link flags:
+ *
+ *                                gtp_eh_pdu     gtp_eh_pdu_link
+ * ICE_FLOW_SEG_HDR_GTPU_IP           0              0
+ * ICE_FLOW_SEG_HDR_GTPU_EH           1              don't care
+ * ICE_FLOW_SEG_HDR_GTPU_DWN          1              0
+ * ICE_FLOW_SEG_HDR_GTPU_UP           1              1
+ */
+#define ICE_FLOW_SEG_HDR_GTPU (ICE_FLOW_SEG_HDR_GTPU_IP | \
+			       ICE_FLOW_SEG_HDR_GTPU_EH | \
+			       ICE_FLOW_SEG_HDR_GTPU_DWN | \
+			       ICE_FLOW_SEG_HDR_GTPU_UP)
+#define ICE_FLOW_SEG_HDR_PFCP (ICE_FLOW_SEG_HDR_PFCP_NODE | \
+			       ICE_FLOW_SEG_HDR_PFCP_SESSION)
+
 enum ice_flow_field {
 	/* L2 */
 	ICE_FLOW_FIELD_IDX_ETH_DA,
@@ -87,7 +190,30 @@ enum ice_flow_field {
 	ICE_FLOW_FIELD_IDX_ICMP_CODE,
 	/* GRE */
 	ICE_FLOW_FIELD_IDX_GRE_KEYID,
-	/* The total number of enums must not exceed 64 */
+	/* GTPC_TEID */
+	ICE_FLOW_FIELD_IDX_GTPC_TEID,
+	/* GTPU_IP */
+	ICE_FLOW_FIELD_IDX_GTPU_IP_TEID,
+	/* GTPU_EH */
+	ICE_FLOW_FIELD_IDX_GTPU_EH_TEID,
+	ICE_FLOW_FIELD_IDX_GTPU_EH_QFI,
+	/* GTPU_UP */
+	ICE_FLOW_FIELD_IDX_GTPU_UP_TEID,
+	/* GTPU_DWN */
+	ICE_FLOW_FIELD_IDX_GTPU_DWN_TEID,
+	/* PPPoE */
+	ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID,
+	/* PFCP */
+	ICE_FLOW_FIELD_IDX_PFCP_SEID,
+	/* L2TPv3 */
+	ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID,
+	/* ESP */
+	ICE_FLOW_FIELD_IDX_ESP_SPI,
+	/* AH */
+	ICE_FLOW_FIELD_IDX_AH_SPI,
+	/* NAT_T ESP */
+	ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI,
+	 /* The total number of enums must not exceed 64 */
 	ICE_FLOW_FIELD_IDX_MAX
 };
 
diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h
index fac5f15a692c..199aa5b71540 100644
--- a/drivers/net/ethernet/intel/ice/ice_protocol_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h
@@ -24,9 +24,13 @@ enum ice_prot_id {
 	ICE_PROT_UDP_OF		= 52,
 	ICE_PROT_UDP_IL_OR_S	= 53,
 	ICE_PROT_GRE_OF		= 64,
+	ICE_PROT_ESP_F		= 88,
+	ICE_PROT_ESP_2		= 89,
 	ICE_PROT_SCTP_IL	= 96,
 	ICE_PROT_ICMP_IL	= 98,
 	ICE_PROT_ICMPV6_IL	= 100,
+	ICE_PROT_PPPOE		= 103,
+	ICE_PROT_L2TPV3		= 104,
 	ICE_PROT_ARP_OF		= 118,
 	ICE_PROT_META_ID	= 255, /* when offset == metadata */
 	ICE_PROT_INVALID	= 255  /* when offset == ICE_FV_OFFSET_INVAL */
-- 
cgit v1.2.3


From cbad5db88aaf42ca3855c5c485fb5a900caaadc5 Mon Sep 17 00:00:00 2001
From: Qi Zhang <qi.z.zhang@intel.com>
Date: Tue, 9 Mar 2021 11:08:01 +0800
Subject: ice: Support to separate GTP-U uplink and downlink

To apply different input set for GTP-U packet with or without extend
header as well as GTP-U uplink and downlink, we need to add TCAM mask
matching capability. This allows comprehending different PTYPE
attributes by examining flags from the parser. Using this method,
different profiles can be used by examining flag values from the parser.

Signed-off-by: Dan Nowlin <dan.nowlin@intel.com>
Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
Tested-by: Chen Bo <BoX.C.Chen@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_flex_pipe.c | 106 +++++++++++++++++++++----
 drivers/net/ethernet/intel/ice/ice_flex_pipe.h |   1 +
 drivers/net/ethernet/intel/ice/ice_flex_type.h |  61 ++++++++++++++
 drivers/net/ethernet/intel/ice/ice_flow.c      |  88 +++++++++++++++++++-
 4 files changed, 241 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
index fa2ebd548b1c..afe77f7a3199 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
@@ -2506,20 +2506,22 @@ static bool ice_tcam_ent_rsrc_type(enum ice_block blk, u16 *rsrc_type)
  * ice_alloc_tcam_ent - allocate hardware TCAM entry
  * @hw: pointer to the HW struct
  * @blk: the block to allocate the TCAM for
+ * @btm: true to allocate from bottom of table, false to allocate from top
  * @tcam_idx: pointer to variable to receive the TCAM entry
  *
  * This function allocates a new entry in a Profile ID TCAM for a specific
  * block.
  */
 static enum ice_status
-ice_alloc_tcam_ent(struct ice_hw *hw, enum ice_block blk, u16 *tcam_idx)
+ice_alloc_tcam_ent(struct ice_hw *hw, enum ice_block blk, bool btm,
+		   u16 *tcam_idx)
 {
 	u16 res_type;
 
 	if (!ice_tcam_ent_rsrc_type(blk, &res_type))
 		return ICE_ERR_PARAM;
 
-	return ice_alloc_hw_res(hw, res_type, 1, true, tcam_idx);
+	return ice_alloc_hw_res(hw, res_type, 1, btm, tcam_idx);
 }
 
 /**
@@ -4113,12 +4115,67 @@ ice_update_fd_swap(struct ice_hw *hw, u16 prof_id, struct ice_fv_word *es)
 	return 0;
 }
 
+/* The entries here needs to match the order of enum ice_ptype_attrib */
+static const struct ice_ptype_attrib_info ice_ptype_attributes[] = {
+	{ ICE_GTP_PDU_EH,	ICE_GTP_PDU_FLAG_MASK },
+	{ ICE_GTP_SESSION,	ICE_GTP_FLAGS_MASK },
+	{ ICE_GTP_DOWNLINK,	ICE_GTP_FLAGS_MASK },
+	{ ICE_GTP_UPLINK,	ICE_GTP_FLAGS_MASK },
+};
+
+/**
+ * ice_get_ptype_attrib_info - get PTYPE attribute information
+ * @type: attribute type
+ * @info: pointer to variable to the attribute information
+ */
+static void
+ice_get_ptype_attrib_info(enum ice_ptype_attrib_type type,
+			  struct ice_ptype_attrib_info *info)
+{
+	*info = ice_ptype_attributes[type];
+}
+
+/**
+ * ice_add_prof_attrib - add any PTG with attributes to profile
+ * @prof: pointer to the profile to which PTG entries will be added
+ * @ptg: PTG to be added
+ * @ptype: PTYPE that needs to be looked up
+ * @attr: array of attributes that will be considered
+ * @attr_cnt: number of elements in the attribute array
+ */
+static enum ice_status
+ice_add_prof_attrib(struct ice_prof_map *prof, u8 ptg, u16 ptype,
+		    const struct ice_ptype_attributes *attr, u16 attr_cnt)
+{
+	bool found = false;
+	u16 i;
+
+	for (i = 0; i < attr_cnt; i++)
+		if (attr[i].ptype == ptype) {
+			found = true;
+
+			prof->ptg[prof->ptg_cnt] = ptg;
+			ice_get_ptype_attrib_info(attr[i].attrib,
+						  &prof->attr[prof->ptg_cnt]);
+
+			if (++prof->ptg_cnt >= ICE_MAX_PTG_PER_PROFILE)
+				return ICE_ERR_MAX_LIMIT;
+		}
+
+	if (!found)
+		return ICE_ERR_DOES_NOT_EXIST;
+
+	return 0;
+}
+
 /**
  * ice_add_prof - add profile
  * @hw: pointer to the HW struct
  * @blk: hardware block
  * @id: profile tracking ID
  * @ptypes: array of bitmaps indicating ptypes (ICE_FLOW_PTYPE_MAX bits)
+ * @attr: array of attributes
+ * @attr_cnt: number of elements in attr array
  * @es: extraction sequence (length of array is determined by the block)
  * @masks: mask for extraction sequence
  *
@@ -4129,6 +4186,7 @@ ice_update_fd_swap(struct ice_hw *hw, u16 prof_id, struct ice_fv_word *es)
  */
 enum ice_status
 ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
+	     const struct ice_ptype_attributes *attr, u16 attr_cnt,
 	     struct ice_fv_word *es, u16 *masks)
 {
 	u32 bytes = DIV_ROUND_UP(ICE_FLOW_PTYPE_MAX, BITS_PER_BYTE);
@@ -4198,7 +4256,6 @@ ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
 				 BITS_PER_BYTE) {
 			u16 ptype;
 			u8 ptg;
-			u8 m;
 
 			ptype = byte * BITS_PER_BYTE + bit;
 
@@ -4213,15 +4270,25 @@ ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
 				continue;
 
 			set_bit(ptg, ptgs_used);
-			prof->ptg[prof->ptg_cnt] = ptg;
-
-			if (++prof->ptg_cnt >= ICE_MAX_PTG_PER_PROFILE)
+			/* Check to see there are any attributes for
+			 * this PTYPE, and add them if found.
+			 */
+			status = ice_add_prof_attrib(prof, ptg, ptype,
+						     attr, attr_cnt);
+			if (status == ICE_ERR_MAX_LIMIT)
 				break;
+			if (status) {
+				/* This is simple a PTYPE/PTG with no
+				 * attribute
+				 */
+				prof->ptg[prof->ptg_cnt] = ptg;
+				prof->attr[prof->ptg_cnt].flags = 0;
+				prof->attr[prof->ptg_cnt].mask = 0;
 
-			/* nothing left in byte, then exit */
-			m = ~(u8)((1 << (bit + 1)) - 1);
-			if (!(ptypes[byte] & m))
-				break;
+				if (++prof->ptg_cnt >=
+				    ICE_MAX_PTG_PER_PROFILE)
+					break;
+			}
 		}
 
 		bytes--;
@@ -4732,7 +4799,12 @@ ice_prof_tcam_ena_dis(struct ice_hw *hw, enum ice_block blk, bool enable,
 	}
 
 	/* for re-enabling, reallocate a TCAM */
-	status = ice_alloc_tcam_ent(hw, blk, &tcam->tcam_idx);
+	/* for entries with empty attribute masks, allocate entry from
+	 * the bottom of the TCAM table; otherwise, allocate from the
+	 * top of the table in order to give it higher priority
+	 */
+	status = ice_alloc_tcam_ent(hw, blk, tcam->attr.mask == 0,
+				    &tcam->tcam_idx);
 	if (status)
 		return status;
 
@@ -4742,8 +4814,8 @@ ice_prof_tcam_ena_dis(struct ice_hw *hw, enum ice_block blk, bool enable,
 		return ICE_ERR_NO_MEMORY;
 
 	status = ice_tcam_write_entry(hw, blk, tcam->tcam_idx, tcam->prof_id,
-				      tcam->ptg, vsig, 0, 0, vl_msk, dc_msk,
-				      nm_msk);
+				      tcam->ptg, vsig, 0, tcam->attr.flags,
+				      vl_msk, dc_msk, nm_msk);
 	if (status)
 		goto err_ice_prof_tcam_ena_dis;
 
@@ -4891,7 +4963,12 @@ ice_add_prof_id_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, u64 hdl,
 		}
 
 		/* allocate the TCAM entry index */
-		status = ice_alloc_tcam_ent(hw, blk, &tcam_idx);
+		/* for entries with empty attribute masks, allocate entry from
+		 * the bottom of the TCAM table; otherwise, allocate from the
+		 * top of the table in order to give it higher priority
+		 */
+		status = ice_alloc_tcam_ent(hw, blk, map->attr[i].mask == 0,
+					    &tcam_idx);
 		if (status) {
 			devm_kfree(ice_hw_to_dev(hw), p);
 			goto err_ice_add_prof_id_vsig;
@@ -4900,6 +4977,7 @@ ice_add_prof_id_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, u64 hdl,
 		t->tcam[i].ptg = map->ptg[i];
 		t->tcam[i].prof_id = map->prof_id;
 		t->tcam[i].tcam_idx = tcam_idx;
+		t->tcam[i].attr = map->attr[i];
 		t->tcam[i].in_use = true;
 
 		p->type = ICE_TCAM_ADD;
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
index 08c5b4386536..8a58e79729b9 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
@@ -27,6 +27,7 @@ int ice_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table,
 
 enum ice_status
 ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
+	     const struct ice_ptype_attributes *attr, u16 attr_cnt,
 	     struct ice_fv_word *es, u16 *masks);
 enum ice_status
 ice_add_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl);
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_type.h b/drivers/net/ethernet/intel/ice/ice_flex_type.h
index eeb53c3917dd..abc156ce9d8c 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_flex_type.h
@@ -190,6 +190,64 @@ enum ice_sect {
 	ICE_SECT_COUNT
 };
 
+#define ICE_MAC_IPV4_GTPU_IPV4_FRAG	331
+#define ICE_MAC_IPV4_GTPU_IPV4_PAY	332
+#define ICE_MAC_IPV4_GTPU_IPV4_UDP_PAY	333
+#define ICE_MAC_IPV4_GTPU_IPV4_TCP	334
+#define ICE_MAC_IPV4_GTPU_IPV4_ICMP	335
+#define ICE_MAC_IPV6_GTPU_IPV4_FRAG	336
+#define ICE_MAC_IPV6_GTPU_IPV4_PAY	337
+#define ICE_MAC_IPV6_GTPU_IPV4_UDP_PAY	338
+#define ICE_MAC_IPV6_GTPU_IPV4_TCP	339
+#define ICE_MAC_IPV6_GTPU_IPV4_ICMP	340
+#define ICE_MAC_IPV4_GTPU_IPV6_FRAG	341
+#define ICE_MAC_IPV4_GTPU_IPV6_PAY	342
+#define ICE_MAC_IPV4_GTPU_IPV6_UDP_PAY	343
+#define ICE_MAC_IPV4_GTPU_IPV6_TCP	344
+#define ICE_MAC_IPV4_GTPU_IPV6_ICMPV6	345
+#define ICE_MAC_IPV6_GTPU_IPV6_FRAG	346
+#define ICE_MAC_IPV6_GTPU_IPV6_PAY	347
+#define ICE_MAC_IPV6_GTPU_IPV6_UDP_PAY	348
+#define ICE_MAC_IPV6_GTPU_IPV6_TCP	349
+#define ICE_MAC_IPV6_GTPU_IPV6_ICMPV6	350
+
+/* Attributes that can modify PTYPE definitions.
+ *
+ * These values will represent special attributes for PTYPEs, which will
+ * resolve into metadata packet flags definitions that can be used in the TCAM
+ * for identifying a PTYPE with specific characteristics.
+ */
+enum ice_ptype_attrib_type {
+	/* GTP PTYPEs */
+	ICE_PTYPE_ATTR_GTP_PDU_EH,
+	ICE_PTYPE_ATTR_GTP_SESSION,
+	ICE_PTYPE_ATTR_GTP_DOWNLINK,
+	ICE_PTYPE_ATTR_GTP_UPLINK,
+};
+
+struct ice_ptype_attrib_info {
+	u16 flags;
+	u16 mask;
+};
+
+/* TCAM flag definitions */
+#define ICE_GTP_PDU			BIT(14)
+#define ICE_GTP_PDU_LINK		BIT(13)
+
+/* GTP attributes */
+#define ICE_GTP_PDU_FLAG_MASK		(ICE_GTP_PDU)
+#define ICE_GTP_PDU_EH			ICE_GTP_PDU
+
+#define ICE_GTP_FLAGS_MASK		(ICE_GTP_PDU | ICE_GTP_PDU_LINK)
+#define ICE_GTP_SESSION			0
+#define ICE_GTP_DOWNLINK		ICE_GTP_PDU
+#define ICE_GTP_UPLINK			(ICE_GTP_PDU | ICE_GTP_PDU_LINK)
+
+struct ice_ptype_attributes {
+	u16 ptype;
+	enum ice_ptype_attrib_type attrib;
+};
+
 /* package labels */
 struct ice_label {
 	__le16 value;
@@ -373,12 +431,14 @@ struct ice_prof_map {
 	u8 prof_id;
 	u8 ptg_cnt;
 	u8 ptg[ICE_MAX_PTG_PER_PROFILE];
+	struct ice_ptype_attrib_info attr[ICE_MAX_PTG_PER_PROFILE];
 };
 
 #define ICE_INVALID_TCAM	0xFFFF
 
 struct ice_tcam_inf {
 	u16 tcam_idx;
+	struct ice_ptype_attrib_info attr;
 	u8 ptg;
 	u8 prof_id;
 	u8 in_use;
@@ -530,6 +590,7 @@ struct ice_chs_chg {
 	u16 vsig;
 	u16 orig_vsig;
 	u16 tcam_idx;
+	struct ice_ptype_attrib_info attr;
 };
 
 #define ICE_FLOW_PTYPE_MAX		ICE_XLT1_CNT
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c
index fddf5c24930b..5e8e0808a9b2 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.c
+++ b/drivers/net/ethernet/intel/ice/ice_flow.c
@@ -379,6 +379,76 @@ static const u32 ice_ptypes_gtpc_tid[] = {
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 };
 
+/* Packet types for GTPU */
+static const struct ice_ptype_attributes ice_attr_gtpu_eh[] = {
+	{ ICE_MAC_IPV4_GTPU_IPV4_FRAG,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV4_GTPU_IPV4_PAY,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV4_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV4_GTPU_IPV4_TCP,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV4_GTPU_IPV4_ICMP,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV4_FRAG,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV4_PAY,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV4_TCP,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV4_ICMP,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV4_GTPU_IPV6_FRAG,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV4_GTPU_IPV6_PAY,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV4_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV4_GTPU_IPV6_TCP,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV4_GTPU_IPV6_ICMPV6,  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV6_FRAG,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV6_PAY,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV6_TCP,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV6_ICMPV6,  ICE_PTYPE_ATTR_GTP_PDU_EH },
+};
+
+static const struct ice_ptype_attributes ice_attr_gtpu_down[] = {
+	{ ICE_MAC_IPV4_GTPU_IPV4_FRAG,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV4_PAY,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV4_TCP,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV4_ICMP,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_FRAG,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_PAY,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_TCP,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_ICMP,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_FRAG,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_PAY,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_TCP,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_ICMPV6,  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_FRAG,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_PAY,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_TCP,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_ICMPV6,  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+};
+
+static const struct ice_ptype_attributes ice_attr_gtpu_up[] = {
+	{ ICE_MAC_IPV4_GTPU_IPV4_FRAG,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV4_PAY,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV4_TCP,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV4_ICMP,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_FRAG,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_PAY,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_TCP,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_ICMP,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_FRAG,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_PAY,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_TCP,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_ICMPV6,  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_FRAG,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_PAY,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_TCP,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_ICMPV6,  ICE_PTYPE_ATTR_GTP_UPLINK },
+};
+
 static const u32 ice_ptypes_gtpu[] = {
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
@@ -485,6 +555,9 @@ struct ice_flow_prof_params {
 	 * This will give us the direction flags.
 	 */
 	struct ice_fv_word es[ICE_MAX_FV_WORDS];
+	/* attributes can be used to add attributes to a particular PTYPE */
+	const struct ice_ptype_attributes *attr;
+	u16 attr_cnt;
 
 	u16 mask[ICE_MAX_FV_WORDS];
 	DECLARE_BITMAP(ptypes, ICE_FLOW_PTYPE_MAX);
@@ -690,14 +763,26 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params)
 			src = (const unsigned long *)ice_ptypes_gtpu;
 			bitmap_and(params->ptypes, params->ptypes, src,
 				   ICE_FLOW_PTYPE_MAX);
+
+			/* Attributes for GTP packet with downlink */
+			params->attr = ice_attr_gtpu_down;
+			params->attr_cnt = ARRAY_SIZE(ice_attr_gtpu_down);
 		} else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_UP) {
 			src = (const unsigned long *)ice_ptypes_gtpu;
 			bitmap_and(params->ptypes, params->ptypes, src,
 				   ICE_FLOW_PTYPE_MAX);
+
+			/* Attributes for GTP packet with uplink */
+			params->attr = ice_attr_gtpu_up;
+			params->attr_cnt = ARRAY_SIZE(ice_attr_gtpu_up);
 		} else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_EH) {
 			src = (const unsigned long *)ice_ptypes_gtpu;
 			bitmap_and(params->ptypes, params->ptypes, src,
 				   ICE_FLOW_PTYPE_MAX);
+
+			/* Attributes for GTP packet with Extension Header */
+			params->attr = ice_attr_gtpu_eh;
+			params->attr_cnt = ARRAY_SIZE(ice_attr_gtpu_eh);
 		} else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_IP) {
 			src = (const unsigned long *)ice_ptypes_gtpu;
 			bitmap_and(params->ptypes, params->ptypes, src,
@@ -1260,7 +1345,8 @@ ice_flow_add_prof_sync(struct ice_hw *hw, enum ice_block blk,
 
 	/* Add a HW profile for this flow profile */
 	status = ice_add_prof(hw, blk, prof_id, (u8 *)params->ptypes,
-			      params->es, params->mask);
+			      params->attr, params->attr_cnt, params->es,
+			      params->mask);
 	if (status) {
 		ice_debug(hw, ICE_DBG_FLOW, "Error adding a HW flow profile\n");
 		goto out;
-- 
cgit v1.2.3


From 7012dfd1afc335f5e972a1c38b43c01d4b8a4309 Mon Sep 17 00:00:00 2001
From: Qi Zhang <qi.z.zhang@intel.com>
Date: Tue, 9 Mar 2021 11:08:02 +0800
Subject: ice: Enhanced IPv4 and IPv6 flow filter

Separate IPv4 and IPv6 ptype bit mask table into 2 tables:
with or without L4 protocols.

When a flow filter without any l4 type is specified, the
ICE_FLOW_SEG_HDR_IPV_OTHER flag can be used to describe if user
want to create a IP rule target for all IP packet or just IP
packet without l4 header.

Signed-off-by: Dan Nowlin <dan.nowlin@intel.com>
Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
Tested-by: Chen Bo <BoX.C.Chen@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_flow.c | 54 ++++++++++++++++++++++++++++---
 drivers/net/ethernet/intel/ice/ice_flow.h |  4 +++
 2 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c
index 5e8e0808a9b2..ded70e443e7b 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.c
+++ b/drivers/net/ethernet/intel/ice/ice_flow.c
@@ -157,7 +157,9 @@ static const u32 ice_ptypes_macvlan_il[] = {
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 };
 
-/* Packet types for packets with an Outer/First/Single IPv4 header */
+/* Packet types for packets with an Outer/First/Single IPv4 header, does NOT
+ * include IPv4 other PTYPEs
+ */
 static const u32 ice_ptypes_ipv4_ofos[] = {
 	0x1DC00000, 0x04000800, 0x00000000, 0x00000000,
 	0x00000000, 0x00000155, 0x00000000, 0x00000000,
@@ -167,6 +169,18 @@ static const u32 ice_ptypes_ipv4_ofos[] = {
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outer/First/Single IPv4 header, includes
+ * IPv4 other PTYPEs
+ */
+static const u32 ice_ptypes_ipv4_ofos_all[] = {
+	0x1DC00000, 0x04000800, 0x00000000, 0x00000000,
+	0x00000000, 0x00000155, 0x00000000, 0x00000000,
+	0x00000000, 0x000FC000, 0x83E0F800, 0x00000101,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 };
@@ -183,7 +197,9 @@ static const u32 ice_ptypes_ipv4_il[] = {
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 };
 
-/* Packet types for packets with an Outer/First/Single IPv6 header */
+/* Packet types for packets with an Outer/First/Single IPv6 header, does NOT
+ * include IPv6 other PTYPEs
+ */
 static const u32 ice_ptypes_ipv6_ofos[] = {
 	0x00000000, 0x00000000, 0x77000000, 0x10002000,
 	0x00000000, 0x000002AA, 0x00000000, 0x00000000,
@@ -193,6 +209,18 @@ static const u32 ice_ptypes_ipv6_ofos[] = {
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outer/First/Single IPv6 header, includes
+ * IPv6 other PTYPEs
+ */
+static const u32 ice_ptypes_ipv6_ofos_all[] = {
+	0x00000000, 0x00000000, 0x77000000, 0x10002000,
+	0x00000000, 0x000002AA, 0x00000000, 0x00000000,
+	0x00080F00, 0x03F00000, 0x7C1F0000, 0x00000206,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 };
@@ -577,6 +605,9 @@ struct ice_flow_prof_params {
 #define ICE_FLOW_SEG_HDRS_L4_MASK	\
 	(ICE_FLOW_SEG_HDR_ICMP | ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_UDP | \
 	 ICE_FLOW_SEG_HDR_SCTP)
+/* mask for L4 protocols that are NOT part of IPv4/6 OTHER PTYPE groups */
+#define ICE_FLOW_SEG_HDRS_L4_MASK_NO_OTHER	\
+	(ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_SCTP)
 
 /**
  * ice_flow_val_hdrs - validates packet segments for valid protocol headers
@@ -692,8 +723,21 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params)
 				   (const unsigned long *)ice_ptypes_arp_of,
 				   ICE_FLOW_PTYPE_MAX);
 		}
+
 		if ((hdrs & ICE_FLOW_SEG_HDR_IPV4) &&
-		    !(hdrs & ICE_FLOW_SEG_HDRS_L4_MASK)) {
+		    (hdrs & ICE_FLOW_SEG_HDR_IPV_OTHER)) {
+			src = i ? (const unsigned long *)ice_ptypes_ipv4_il :
+				(const unsigned long *)ice_ptypes_ipv4_ofos_all;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if ((hdrs & ICE_FLOW_SEG_HDR_IPV6) &&
+			   (hdrs & ICE_FLOW_SEG_HDR_IPV_OTHER)) {
+			src = i ? (const unsigned long *)ice_ptypes_ipv6_il :
+				(const unsigned long *)ice_ptypes_ipv6_ofos_all;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if ((hdrs & ICE_FLOW_SEG_HDR_IPV4) &&
+			   !(hdrs & ICE_FLOW_SEG_HDRS_L4_MASK_NO_OTHER)) {
 			src = !i ? (const unsigned long *)ice_ipv4_ofos_no_l4 :
 				(const unsigned long *)ice_ipv4_il_no_l4;
 			bitmap_and(params->ptypes, params->ptypes, src,
@@ -704,7 +748,7 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params)
 			bitmap_and(params->ptypes, params->ptypes, src,
 				   ICE_FLOW_PTYPE_MAX);
 		} else if ((hdrs & ICE_FLOW_SEG_HDR_IPV6) &&
-			   !(hdrs & ICE_FLOW_SEG_HDRS_L4_MASK)) {
+			   !(hdrs & ICE_FLOW_SEG_HDRS_L4_MASK_NO_OTHER)) {
 			src = !i ? (const unsigned long *)ice_ipv6_ofos_no_l4 :
 				(const unsigned long *)ice_ipv6_il_no_l4;
 			bitmap_and(params->ptypes, params->ptypes, src,
@@ -1793,7 +1837,7 @@ ice_flow_set_rss_seg_info(struct ice_flow_seg_info *segs, u64 hash_fields,
 	ICE_FLOW_SET_HDRS(segs, flow_hdr);
 
 	if (segs->hdrs & ~ICE_FLOW_RSS_SEG_HDR_VAL_MASKS &
-	    ~ICE_FLOW_RSS_HDRS_INNER_MASK)
+	    ~ICE_FLOW_RSS_HDRS_INNER_MASK & ~ICE_FLOW_SEG_HDR_IPV_OTHER)
 		return ICE_ERR_PARAM;
 
 	val = (u64)(segs->hdrs & ICE_FLOW_RSS_SEG_HDR_L3_MASKS);
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.h b/drivers/net/ethernet/intel/ice/ice_flow.h
index feda2ffd47ae..80bcd6fd21fc 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.h
+++ b/drivers/net/ethernet/intel/ice/ice_flow.h
@@ -135,6 +135,10 @@ enum ice_flow_seg_hdr {
 	ICE_FLOW_SEG_HDR_ESP		= 0x00100000,
 	ICE_FLOW_SEG_HDR_AH		= 0x00200000,
 	ICE_FLOW_SEG_HDR_NAT_T_ESP	= 0x00400000,
+	/* The following is an additive bit for ICE_FLOW_SEG_HDR_IPV4 and
+	 * ICE_FLOW_SEG_HDR_IPV6 which include the IPV4 other PTYPEs
+	 */
+	ICE_FLOW_SEG_HDR_IPV_OTHER      = 0x20000000,
 };
 
 /* These segments all have the same PTYPES, but are otherwise distinguished by
-- 
cgit v1.2.3


From da62c5ff9dcdac67204d6647f3cd43ad931a59f4 Mon Sep 17 00:00:00 2001
From: Qi Zhang <qi.z.zhang@intel.com>
Date: Tue, 9 Mar 2021 11:08:03 +0800
Subject: ice: Add support for per VF ctrl VSI enabling

We are going to enable FDIR configure for AVF through virtual channel.
The first step is to add helper functions to support control VSI setup.
A control VSI will be allocated for a VF when AVF creates its
first FDIR rule through ice_vf_ctrl_vsi_setup().
The patch will also allocate FDIR rule space for VF's control VSI.
If a VF asks for flow director rules, then those should come entirely
from the best effort pool and not from the guaranteed pool. The patch
allow a VF VSI to have only space in the best effort rules.

Signed-off-by: Xiaoyun Li <xiaoyun.li@intel.com>
Signed-off-by: Yahui Cao <yahui.cao@intel.com>
Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
Tested-by: Chen Bo <BoX.C.Chen@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h             |  4 +-
 drivers/net/ethernet/intel/ice/ice_lib.c         | 64 +++++++++++++++++++++---
 drivers/net/ethernet/intel/ice/ice_main.c        |  9 +++-
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 61 ++++++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h |  2 +
 5 files changed, 129 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 357706444dd5..7b45f2ab4036 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -73,7 +73,7 @@
 #define ICE_MIN_LAN_TXRX_MSIX	1
 #define ICE_MIN_LAN_OICR_MSIX	1
 #define ICE_MIN_MSIX		(ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_LAN_OICR_MSIX)
-#define ICE_FDIR_MSIX		1
+#define ICE_FDIR_MSIX		2
 #define ICE_NO_VSI		0xffff
 #define ICE_VSI_MAP_CONTIG	0
 #define ICE_VSI_MAP_SCATTER	1
@@ -84,6 +84,8 @@
 #define ICE_MAX_LG_RSS_QS	256
 #define ICE_RES_VALID_BIT	0x8000
 #define ICE_RES_MISC_VEC_ID	(ICE_RES_VALID_BIT - 1)
+/* All VF control VSIs share the same IRQ, so assign a unique ID for them */
+#define ICE_RES_VF_CTRL_VEC_ID	(ICE_RES_MISC_VEC_ID - 1)
 #define ICE_INVAL_Q_INDEX	0xffff
 #define ICE_INVAL_VFID		256
 
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 8d4e2ad4328d..c345432fac72 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -343,6 +343,9 @@ static int ice_vsi_clear(struct ice_vsi *vsi)
 	pf->vsi[vsi->idx] = NULL;
 	if (vsi->idx < pf->next_vsi && vsi->type != ICE_VSI_CTRL)
 		pf->next_vsi = vsi->idx;
+	if (vsi->idx < pf->next_vsi && vsi->type == ICE_VSI_CTRL &&
+	    vsi->vf_id != ICE_INVAL_VFID)
+		pf->next_vsi = vsi->idx;
 
 	ice_vsi_free_arrays(vsi);
 	mutex_unlock(&pf->sw_mutex);
@@ -454,8 +457,8 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, u16 vf_id)
 		goto unlock_pf;
 	}
 
-	if (vsi->type == ICE_VSI_CTRL) {
-		/* Use the last VSI slot as the index for the control VSI */
+	if (vsi->type == ICE_VSI_CTRL && vf_id == ICE_INVAL_VFID) {
+		/* Use the last VSI slot as the index for PF control VSI */
 		vsi->idx = pf->num_alloc_vsi - 1;
 		pf->ctrl_vsi_idx = vsi->idx;
 		pf->vsi[vsi->idx] = vsi;
@@ -468,6 +471,9 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, u16 vf_id)
 		pf->next_vsi = ice_get_free_slot(pf->vsi, pf->num_alloc_vsi,
 						 pf->next_vsi);
 	}
+
+	if (vsi->type == ICE_VSI_CTRL && vf_id != ICE_INVAL_VFID)
+		pf->vf[vf_id].ctrl_vsi_idx = vsi->idx;
 	goto unlock_pf;
 
 err_rings:
@@ -506,7 +512,7 @@ static int ice_alloc_fd_res(struct ice_vsi *vsi)
 	if (!b_val)
 		return -EPERM;
 
-	if (vsi->type != ICE_VSI_PF)
+	if (!(vsi->type == ICE_VSI_PF || vsi->type == ICE_VSI_VF))
 		return -EPERM;
 
 	if (!test_bit(ICE_FLAG_FD_ENA, pf->flags))
@@ -517,6 +523,13 @@ static int ice_alloc_fd_res(struct ice_vsi *vsi)
 	/* each VSI gets same "best_effort" quota */
 	vsi->num_bfltr = b_val;
 
+	if (vsi->type == ICE_VSI_VF) {
+		vsi->num_gfltr = 0;
+
+		/* each VSI gets same "best_effort" quota */
+		vsi->num_bfltr = b_val;
+	}
+
 	return 0;
 }
 
@@ -856,7 +869,8 @@ static void ice_set_fd_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
 	u8 dflt_q_group, dflt_q_prio;
 	u16 dflt_q, report_q, val;
 
-	if (vsi->type != ICE_VSI_PF && vsi->type != ICE_VSI_CTRL)
+	if (vsi->type != ICE_VSI_PF && vsi->type != ICE_VSI_CTRL &&
+	    vsi->type != ICE_VSI_VF)
 		return;
 
 	val = ICE_AQ_VSI_PROP_FLOW_DIR_VALID;
@@ -1179,7 +1193,24 @@ static int ice_vsi_setup_vector_base(struct ice_vsi *vsi)
 
 	num_q_vectors = vsi->num_q_vectors;
 	/* reserve slots from OS requested IRQs */
-	base = ice_get_res(pf, pf->irq_tracker, num_q_vectors, vsi->idx);
+	if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) {
+		struct ice_vf *vf;
+		int i;
+
+		ice_for_each_vf(pf, i) {
+			vf = &pf->vf[i];
+			if (i != vsi->vf_id && vf->ctrl_vsi_idx != ICE_NO_VSI) {
+				base = pf->vsi[vf->ctrl_vsi_idx]->base_vector;
+				break;
+			}
+		}
+		if (i == pf->num_alloc_vfs)
+			base = ice_get_res(pf, pf->irq_tracker, num_q_vectors,
+					   ICE_RES_VF_CTRL_VEC_ID);
+	} else {
+		base = ice_get_res(pf, pf->irq_tracker, num_q_vectors,
+				   vsi->idx);
+	}
 
 	if (base < 0) {
 		dev_err(dev, "%d MSI-X interrupts available. %s %d failed to get %d MSI-X vectors\n",
@@ -2308,7 +2339,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 	struct ice_vsi *vsi;
 	int ret, i;
 
-	if (vsi_type == ICE_VSI_VF)
+	if (vsi_type == ICE_VSI_VF || vsi_type == ICE_VSI_CTRL)
 		vsi = ice_vsi_alloc(pf, vsi_type, vf_id);
 	else
 		vsi = ice_vsi_alloc(pf, vsi_type, ICE_INVAL_VFID);
@@ -2323,7 +2354,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 	if (vsi->type == ICE_VSI_PF)
 		vsi->ethtype = ETH_P_PAUSE;
 
-	if (vsi->type == ICE_VSI_VF)
+	if (vsi->type == ICE_VSI_VF || vsi->type == ICE_VSI_CTRL)
 		vsi->vf_id = vf_id;
 
 	ice_alloc_fd_res(vsi);
@@ -2770,7 +2801,24 @@ int ice_vsi_release(struct ice_vsi *vsi)
 	 * many interrupts each VF needs. SR-IOV MSIX resources are also
 	 * cleared in the same manner.
 	 */
-	if (vsi->type != ICE_VSI_VF) {
+	if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) {
+		struct ice_vf *vf;
+		int i;
+
+		ice_for_each_vf(pf, i) {
+			vf = &pf->vf[i];
+			if (i != vsi->vf_id && vf->ctrl_vsi_idx != ICE_NO_VSI)
+				break;
+		}
+		if (i == pf->num_alloc_vfs) {
+			/* No other VFs left that have control VSI, reclaim SW
+			 * interrupts back to the common pool
+			 */
+			ice_free_res(pf->irq_tracker, vsi->base_vector,
+				     ICE_RES_VF_CTRL_VEC_ID);
+			pf->num_avail_sw_msix += vsi->num_q_vectors;
+		}
+	} else if (vsi->type != ICE_VSI_VF) {
 		/* reclaim SW interrupts back to the common pool */
 		ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx);
 		pf->num_avail_sw_msix += vsi->num_q_vectors;
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 2c23c8f468a5..9cb5e653b38d 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2220,8 +2220,13 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
 			/* skip this unused q_vector */
 			continue;
 		}
-		err = devm_request_irq(dev, irq_num, vsi->irq_handler, 0,
-				       q_vector->name, q_vector);
+		if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID)
+			err = devm_request_irq(dev, irq_num, vsi->irq_handler,
+					       IRQF_SHARED, q_vector->name,
+					       q_vector);
+		else
+			err = devm_request_irq(dev, irq_num, vsi->irq_handler,
+					       0, q_vector->name, q_vector);
 		if (err) {
 			netdev_err(vsi->netdev, "MSIX request_irq failed, error: %d\n",
 				   err);
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 1f38a8d0c525..fa72b7e2e433 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -201,6 +201,25 @@ static void ice_vf_vsi_release(struct ice_vf *vf)
 	ice_vf_invalidate_vsi(vf);
 }
 
+/**
+ * ice_vf_ctrl_invalidate_vsi - invalidate ctrl_vsi_idx to remove VSI access
+ * @vf: VF that control VSI is being invalidated on
+ */
+static void ice_vf_ctrl_invalidate_vsi(struct ice_vf *vf)
+{
+	vf->ctrl_vsi_idx = ICE_NO_VSI;
+}
+
+/**
+ * ice_vf_ctrl_vsi_release - invalidate the VF's control VSI after freeing it
+ * @vf: VF that control VSI is being released on
+ */
+static void ice_vf_ctrl_vsi_release(struct ice_vf *vf)
+{
+	ice_vsi_release(vf->pf->vsi[vf->ctrl_vsi_idx]);
+	ice_vf_ctrl_invalidate_vsi(vf);
+}
+
 /**
  * ice_free_vf_res - Free a VF's resources
  * @vf: pointer to the VF info
@@ -214,6 +233,9 @@ static void ice_free_vf_res(struct ice_vf *vf)
 	 * accessing the VF's VSI after it's freed or invalidated.
 	 */
 	clear_bit(ICE_VF_STATE_INIT, vf->vf_states);
+	/* free VF control VSI */
+	if (vf->ctrl_vsi_idx != ICE_NO_VSI)
+		ice_vf_ctrl_vsi_release(vf);
 
 	/* free VSI and disconnect it from the parent uplink */
 	if (vf->lan_vsi_idx != ICE_NO_VSI) {
@@ -559,6 +581,28 @@ static struct ice_vsi *ice_vf_vsi_setup(struct ice_vf *vf)
 	return vsi;
 }
 
+/**
+ * ice_vf_ctrl_vsi_setup - Set up a VF control VSI
+ * @vf: VF to setup control VSI for
+ *
+ * Returns pointer to the successfully allocated VSI struct on success,
+ * otherwise returns NULL on failure.
+ */
+struct ice_vsi *ice_vf_ctrl_vsi_setup(struct ice_vf *vf)
+{
+	struct ice_port_info *pi = ice_vf_get_port_info(vf);
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+
+	vsi = ice_vsi_setup(pf, pi, ICE_VSI_CTRL, vf->vf_id);
+	if (!vsi) {
+		dev_err(ice_pf_to_dev(pf), "Failed to create VF control VSI\n");
+		ice_vf_ctrl_invalidate_vsi(vf);
+	}
+
+	return vsi;
+}
+
 /**
  * ice_calc_vf_first_vector_idx - Calculate MSIX vector index in the PF space
  * @pf: pointer to PF structure
@@ -1256,6 +1300,12 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
 	ice_for_each_vf(pf, v) {
 		vf = &pf->vf[v];
 
+		/* clean VF control VSI when resetting VFs since it should be
+		 * setup only when VF creates its first FDIR rule.
+		 */
+		if (vf->ctrl_vsi_idx != ICE_NO_VSI)
+			ice_vf_ctrl_invalidate_vsi(vf);
+
 		ice_vf_pre_vsi_rebuild(vf);
 		ice_vf_rebuild_vsi(vf);
 		ice_vf_post_vsi_rebuild(vf);
@@ -1374,6 +1424,12 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
 			dev_err(dev, "disabling promiscuous mode failed\n");
 	}
 
+	/* clean VF control VSI when resetting VF since it should be setup
+	 * only when VF creates its first FDIR rule.
+	 */
+	if (vf->ctrl_vsi_idx != ICE_NO_VSI)
+		ice_vf_ctrl_vsi_release(vf);
+
 	ice_vf_pre_vsi_rebuild(vf);
 	ice_vf_rebuild_vsi_with_release(vf);
 	ice_vf_post_vsi_rebuild(vf);
@@ -1549,6 +1605,11 @@ static void ice_set_dflt_settings_vfs(struct ice_pf *pf)
 		set_bit(ICE_VIRTCHNL_VF_CAP_L2, &vf->vf_caps);
 		vf->spoofchk = true;
 		vf->num_vf_qs = pf->num_qps_per_vf;
+
+		/* ctrl_vsi_idx will be set to a valid value only when VF
+		 * creates its first fdir rule.
+		 */
+		ice_vf_ctrl_invalidate_vsi(vf);
 	}
 }
 
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index 0f519fba3770..faa879d744a1 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -70,6 +70,7 @@ struct ice_vf {
 
 	u16 vf_id;			/* VF ID in the PF space */
 	u16 lan_vsi_idx;		/* index into PF struct */
+	u16 ctrl_vsi_idx;
 	/* first vector index of this VF in the PF space */
 	int first_vector_idx;
 	struct ice_sw *vf_sw_id;	/* switch ID the VF VSIs connect to */
@@ -138,6 +139,7 @@ void
 ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event);
 void ice_print_vfs_mdd_events(struct ice_pf *pf);
 void ice_print_vf_rx_mdd_event(struct ice_vf *vf);
+struct ice_vsi *ice_vf_ctrl_vsi_setup(struct ice_vf *vf);
 #else /* CONFIG_PCI_IOV */
 #define ice_process_vflr_event(pf) do {} while (0)
 #define ice_free_vfs(pf) do {} while (0)
-- 
cgit v1.2.3


From 1f7ea1cd6a3748427512ccc9582e18cd9efea966 Mon Sep 17 00:00:00 2001
From: Qi Zhang <qi.z.zhang@intel.com>
Date: Tue, 9 Mar 2021 11:08:04 +0800
Subject: ice: Enable FDIR Configure for AVF

The virtual channel is going to be extended to support FDIR and
RSS configure from AVF. New data structures and OP codes will be
added, the patch enable the FDIR part.

To support above advanced AVF feature, we need to figure out
what kind of data structure should be passed from VF to PF to describe
an FDIR rule or RSS config rule. The common part of the requirement is
we need a data structure to represent the input set selection of a rule's
hash key.

An input set selection is a group of fields be selected from one or more
network protocol layers that could be identified as a specific flow.
For example, select dst IP address from an IPv4 header combined with
dst port from the TCP header as the input set for an IPv4/TCP flow.

The patch adds a new data structure virtchnl_proto_hdrs to abstract
a network protocol headers group which is composed of layers of network
protocol header(virtchnl_proto_hdr).

A protocol header contains a 32 bits mask (field_selector) to describe
which fields are selected as input sets, as well as a header type
(enum virtchnl_proto_hdr_type). Each bit is mapped to a field in
enum virtchnl_proto_hdr_field guided by its header type.

+------------+-----------+------------------------------+
|            | Proto Hdr | Header Type A                |
|            |           +------------------------------+
|            |           | BIT 31 | ... | BIT 1 | BIT 0 |
|            |-----------+------------------------------+
|Proto Hdrs  | Proto Hdr | Header Type B                |
|            |           +------------------------------+
|            |           | BIT 31 | ... | BIT 1 | BIT 0 |
|            |-----------+------------------------------+
|            | Proto Hdr | Header Type C                |
|            |           +------------------------------+
|            |           | BIT 31 | ... | BIT 1 | BIT 0 |
|            |-----------+------------------------------+
|            |    ....                                  |
+-------------------------------------------------------+

All fields in enum virtchnl_proto_hdr_fields are grouped with header type
and the value of the first field of a header type is always 32 aligned.

enum proto_hdr_type {
        header_type_A = 0;
        header_type_B = 1;
        ....
}

enum proto_hdr_field {
        /* header type A */
        header_A_field_0 = 0,
        header_A_field_1 = 1,
        header_A_field_2 = 2,
        header_A_field_3 = 3,

        /* header type B */
        header_B_field_0 = 32, // = header_type_B << 5
        header_B_field_0 = 33,
        header_B_field_0 = 34
        header_B_field_0 = 35,
        ....
};

So we have:
proto_hdr_type = proto_hdr_field / 32
bit offset = proto_hdr_field % 32

To simply the protocol header's operations, couple help macros are added.
For example, to select src IP and dst port as input set for an IPv4/UDP
flow.

we have:
struct virtchnl_proto_hdr hdr[2];

VIRTCHNL_SET_PROTO_HDR_TYPE(&hdr[0], IPV4)
VIRTCHNL_ADD_PROTO_HDR_FIELD(&hdr[0], IPV4, SRC)

VIRTCHNL_SET_PROTO_HDR_TYPE(&hdr[1], UDP)
VIRTCHNL_ADD_PROTO_HDR_FIELD(&hdr[1], UDP, DST)

The byte array is used to store the protocol header of a training package.
The byte array must be network order.

The patch added virtual channel support for iAVF FDIR add/validate/delete
filter. iAVF FDIR is Flow Director for Intel Adaptive Virtual Function
which can direct Ethernet packets to the queues of the Network Interface
Card. Add/delete command is adding or deleting one rule for each virtual
channel message, while validate command is just verifying if this rule
is valid without any other operations.

To add or delete one rule, driver needs to config TCAM and Profile,
build training packets which contains the input set value, and send
the training packets through FDIR Tx queue. In addition, driver needs to
manage the software context to avoid adding duplicated rules, deleting
non-existent rule, input set conflicts and other invalid cases.

NOTE:
Supported pattern/actions and their parse functions are not be included in
this patch, they will be added in a separate one.

Signed-off-by: Jeff Guo <jia.guo@intel.com>
Signed-off-by: Yahui Cao <yahui.cao@intel.com>
Signed-off-by: Simei Su <simei.su@intel.com>
Signed-off-by: Beilei Xing <beilei.xing@intel.com>
Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
Tested-by: Chen Bo <BoX.C.Chen@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/Makefile            |    2 +-
 drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c  |    4 +
 drivers/net/ethernet/intel/ice/ice_fdir.c          |    6 +-
 drivers/net/ethernet/intel/ice/ice_fdir.h          |    5 +
 drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h     |    2 +
 drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c | 1034 ++++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h |   24 +
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c   |   17 +-
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h   |    6 +
 include/linux/avf/virtchnl.h                       |  278 ++++++
 10 files changed, 1372 insertions(+), 6 deletions(-)
 create mode 100644 drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
 create mode 100644 drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h

diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index 73da4f71f530..f391691e2c7e 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -26,7 +26,7 @@ ice-y := ice_main.o	\
 	 ice_fw_update.o \
 	 ice_lag.o	\
 	 ice_ethtool.o
-ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o
+ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o ice_virtchnl_fdir.o
 ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o
 ice-$(CONFIG_RFS_ACCEL) += ice_arfs.o
 ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
index 192729546bbf..440964defa4a 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
@@ -1679,6 +1679,10 @@ int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd)
 		input->flex_offset = userdata.flex_offset;
 	}
 
+	input->cnt_ena = ICE_FXD_FLTR_QW0_STAT_ENA_PKTS;
+	input->fdid_prio = ICE_FXD_FLTR_QW1_FDID_PRI_THREE;
+	input->comp_report = ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL;
+
 	/* input struct is added to the HW filter list */
 	ice_fdir_update_list_entry(pf, input, fsp->location);
 
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.c b/drivers/net/ethernet/intel/ice/ice_fdir.c
index 0c2066c0ab1f..8f3e61c6bfd6 100644
--- a/drivers/net/ethernet/intel/ice/ice_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.c
@@ -378,7 +378,7 @@ ice_fdir_get_prgm_desc(struct ice_hw *hw, struct ice_fdir_fltr *input,
 		fdir_fltr_ctx.drop = ICE_FXD_FLTR_QW0_DROP_NO;
 		fdir_fltr_ctx.qindex = input->q_index;
 	}
-	fdir_fltr_ctx.cnt_ena = ICE_FXD_FLTR_QW0_STAT_ENA_PKTS;
+	fdir_fltr_ctx.cnt_ena = input->cnt_ena;
 	fdir_fltr_ctx.cnt_index = input->cnt_index;
 	fdir_fltr_ctx.fd_vsi = ice_get_hw_vsi_num(hw, input->dest_vsi);
 	fdir_fltr_ctx.evict_ena = ICE_FXD_FLTR_QW0_EVICT_ENA_FALSE;
@@ -387,8 +387,8 @@ ice_fdir_get_prgm_desc(struct ice_hw *hw, struct ice_fdir_fltr *input,
 		ICE_FXD_FLTR_QW1_PCMD_REMOVE;
 	fdir_fltr_ctx.swap = ICE_FXD_FLTR_QW1_SWAP_NOT_SET;
 	fdir_fltr_ctx.comp_q = ICE_FXD_FLTR_QW0_COMP_Q_ZERO;
-	fdir_fltr_ctx.comp_report = ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL;
-	fdir_fltr_ctx.fdid_prio = 3;
+	fdir_fltr_ctx.comp_report = input->comp_report;
+	fdir_fltr_ctx.fdid_prio = input->fdid_prio;
 	fdir_fltr_ctx.desc_prof = 1;
 	fdir_fltr_ctx.desc_prof_prio = 3;
 	ice_set_fd_desc_val(&fdir_fltr_ctx, fdesc);
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.h b/drivers/net/ethernet/intel/ice/ice_fdir.h
index 84b40298a513..93f3f0d9d37b 100644
--- a/drivers/net/ethernet/intel/ice/ice_fdir.h
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.h
@@ -31,6 +31,8 @@
 #define ICE_IPV6_HLIM_OFFSET		21
 #define ICE_IPV6_PROTO_OFFSET		20
 
+#define ICE_FDIR_MAX_FLTRS		16384
+
 /* IP v4 has 2 flag bits that enable fragment processing: DF and MF. DF
  * requests that the packet not be fragmented. MF indicates that a packet has
  * been fragmented.
@@ -138,9 +140,12 @@ struct ice_fdir_fltr {
 	u16 q_index;
 	u16 dest_vsi;
 	u8 dest_ctl;
+	u8 cnt_ena;
 	u8 fltr_status;
 	u16 cnt_index;
 	u32 fltr_id;
+	u8 fdid_prio;
+	u8 comp_report;
 };
 
 /* Dummy packet filter definition structure */
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index 4ec24c3e813f..b30c22358c0a 100644
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -55,6 +55,7 @@ struct ice_fltr_desc {
 #define ICE_FXD_FLTR_QW0_COMP_REPORT_M	\
 				(0x3ULL << ICE_FXD_FLTR_QW0_COMP_REPORT_S)
 #define ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL	0x1ULL
+#define ICE_FXD_FLTR_QW0_COMP_REPORT_SW		0x2ULL
 
 #define ICE_FXD_FLTR_QW0_FD_SPACE_S	14
 #define ICE_FXD_FLTR_QW0_FD_SPACE_M	(0x3ULL << ICE_FXD_FLTR_QW0_FD_SPACE_S)
@@ -128,6 +129,7 @@ struct ice_fltr_desc {
 #define ICE_FXD_FLTR_QW1_FDID_PRI_S	25
 #define ICE_FXD_FLTR_QW1_FDID_PRI_M	(0x7ULL << ICE_FXD_FLTR_QW1_FDID_PRI_S)
 #define ICE_FXD_FLTR_QW1_FDID_PRI_ONE	0x1ULL
+#define ICE_FXD_FLTR_QW1_FDID_PRI_THREE	0x3ULL
 
 #define ICE_FXD_FLTR_QW1_FDID_MDID_S	28
 #define ICE_FXD_FLTR_QW1_FDID_MDID_M	(0xFULL << ICE_FXD_FLTR_QW1_FDID_MDID_S)
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
new file mode 100644
index 000000000000..6e7e8531d6ec
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
@@ -0,0 +1,1034 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_base.h"
+#include "ice_lib.h"
+#include "ice_flow.h"
+
+#define to_fltr_conf_from_desc(p) \
+	container_of(p, struct virtchnl_fdir_fltr_conf, input)
+
+#define ICE_FLOW_PROF_TYPE_S	0
+#define ICE_FLOW_PROF_TYPE_M	(0xFFFFFFFFULL << ICE_FLOW_PROF_TYPE_S)
+#define ICE_FLOW_PROF_VSI_S	32
+#define ICE_FLOW_PROF_VSI_M	(0xFFFFFFFFULL << ICE_FLOW_PROF_VSI_S)
+
+/* Flow profile ID format:
+ * [0:31] - flow type, flow + tun_offs
+ * [32:63] - VSI index
+ */
+#define ICE_FLOW_PROF_FD(vsi, flow, tun_offs) \
+	((u64)(((((flow) + (tun_offs)) & ICE_FLOW_PROF_TYPE_M)) | \
+	      (((u64)(vsi) << ICE_FLOW_PROF_VSI_S) & ICE_FLOW_PROF_VSI_M)))
+
+struct virtchnl_fdir_fltr_conf {
+	struct ice_fdir_fltr input;
+};
+
+struct virtchnl_fdir_inset_map {
+	enum virtchnl_proto_hdr_field field;
+	enum ice_flow_field fld;
+};
+
+static const struct virtchnl_fdir_inset_map fdir_inset_map[] = {
+	{VIRTCHNL_PROTO_HDR_IPV4_SRC, ICE_FLOW_FIELD_IDX_IPV4_SA},
+	{VIRTCHNL_PROTO_HDR_IPV4_DST, ICE_FLOW_FIELD_IDX_IPV4_DA},
+	{VIRTCHNL_PROTO_HDR_IPV4_DSCP, ICE_FLOW_FIELD_IDX_IPV4_DSCP},
+	{VIRTCHNL_PROTO_HDR_IPV4_TTL, ICE_FLOW_FIELD_IDX_IPV4_TTL},
+	{VIRTCHNL_PROTO_HDR_IPV4_PROT, ICE_FLOW_FIELD_IDX_IPV4_PROT},
+	{VIRTCHNL_PROTO_HDR_IPV6_SRC, ICE_FLOW_FIELD_IDX_IPV6_SA},
+	{VIRTCHNL_PROTO_HDR_IPV6_DST, ICE_FLOW_FIELD_IDX_IPV6_DA},
+	{VIRTCHNL_PROTO_HDR_IPV6_TC, ICE_FLOW_FIELD_IDX_IPV6_DSCP},
+	{VIRTCHNL_PROTO_HDR_IPV6_HOP_LIMIT, ICE_FLOW_FIELD_IDX_IPV6_TTL},
+	{VIRTCHNL_PROTO_HDR_IPV6_PROT, ICE_FLOW_FIELD_IDX_IPV6_PROT},
+	{VIRTCHNL_PROTO_HDR_UDP_SRC_PORT, ICE_FLOW_FIELD_IDX_UDP_SRC_PORT},
+	{VIRTCHNL_PROTO_HDR_UDP_DST_PORT, ICE_FLOW_FIELD_IDX_UDP_DST_PORT},
+	{VIRTCHNL_PROTO_HDR_TCP_SRC_PORT, ICE_FLOW_FIELD_IDX_TCP_SRC_PORT},
+	{VIRTCHNL_PROTO_HDR_TCP_DST_PORT, ICE_FLOW_FIELD_IDX_TCP_DST_PORT},
+	{VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT, ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT},
+	{VIRTCHNL_PROTO_HDR_SCTP_DST_PORT, ICE_FLOW_FIELD_IDX_SCTP_DST_PORT},
+};
+
+/**
+ * ice_vc_fdir_param_check
+ * @vf: pointer to the VF structure
+ * @vsi_id: VF relative VSI ID
+ *
+ * Check for the valid VSI ID, PF's state and VF's state
+ *
+ * Return: 0 on success, and -EINVAL on error.
+ */
+static int
+ice_vc_fdir_param_check(struct ice_vf *vf, u16 vsi_id)
+{
+	struct ice_pf *pf = vf->pf;
+
+	if (!test_bit(ICE_FLAG_FD_ENA, pf->flags))
+		return -EINVAL;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+		return -EINVAL;
+
+	if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_FDIR_PF))
+		return -EINVAL;
+
+	if (vsi_id != vf->lan_vsi_num)
+		return -EINVAL;
+
+	if (!ice_vc_isvalid_vsi_id(vf, vsi_id))
+		return -EINVAL;
+
+	if (!pf->vsi[vf->lan_vsi_idx])
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * ice_vf_start_ctrl_vsi
+ * @vf: pointer to the VF structure
+ *
+ * Allocate ctrl_vsi for the first time and open the ctrl_vsi port for VF
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int ice_vf_start_ctrl_vsi(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *ctrl_vsi;
+	struct device *dev;
+	int err;
+
+	dev = ice_pf_to_dev(pf);
+	if (vf->ctrl_vsi_idx != ICE_NO_VSI)
+		return -EEXIST;
+
+	ctrl_vsi = ice_vf_ctrl_vsi_setup(vf);
+	if (!ctrl_vsi) {
+		dev_dbg(dev, "Could not setup control VSI for VF %d\n",
+			vf->vf_id);
+		return -ENOMEM;
+	}
+
+	err = ice_vsi_open_ctrl(ctrl_vsi);
+	if (err) {
+		dev_dbg(dev, "Could not open control VSI for VF %d\n",
+			vf->vf_id);
+		goto err_vsi_open;
+	}
+
+	return 0;
+
+err_vsi_open:
+	ice_vsi_release(ctrl_vsi);
+	if (vf->ctrl_vsi_idx != ICE_NO_VSI) {
+		pf->vsi[vf->ctrl_vsi_idx] = NULL;
+		vf->ctrl_vsi_idx = ICE_NO_VSI;
+	}
+	return err;
+}
+
+/**
+ * ice_vc_fdir_alloc_prof - allocate profile for this filter flow type
+ * @vf: pointer to the VF structure
+ * @flow: filter flow type
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_alloc_prof(struct ice_vf *vf, enum ice_fltr_ptype flow)
+{
+	struct ice_vf_fdir *fdir = &vf->fdir;
+
+	if (!fdir->fdir_prof) {
+		fdir->fdir_prof = devm_kcalloc(ice_pf_to_dev(vf->pf),
+					       ICE_FLTR_PTYPE_MAX,
+					       sizeof(*fdir->fdir_prof),
+					       GFP_KERNEL);
+		if (!fdir->fdir_prof)
+			return -ENOMEM;
+	}
+
+	if (!fdir->fdir_prof[flow]) {
+		fdir->fdir_prof[flow] = devm_kzalloc(ice_pf_to_dev(vf->pf),
+						     sizeof(**fdir->fdir_prof),
+						     GFP_KERNEL);
+		if (!fdir->fdir_prof[flow])
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vc_fdir_free_prof - free profile for this filter flow type
+ * @vf: pointer to the VF structure
+ * @flow: filter flow type
+ */
+static void
+ice_vc_fdir_free_prof(struct ice_vf *vf, enum ice_fltr_ptype flow)
+{
+	struct ice_vf_fdir *fdir = &vf->fdir;
+
+	if (!fdir->fdir_prof)
+		return;
+
+	if (!fdir->fdir_prof[flow])
+		return;
+
+	devm_kfree(ice_pf_to_dev(vf->pf), fdir->fdir_prof[flow]);
+	fdir->fdir_prof[flow] = NULL;
+}
+
+/**
+ * ice_vc_fdir_free_prof_all - free all the profile for this VF
+ * @vf: pointer to the VF structure
+ */
+static void ice_vc_fdir_free_prof_all(struct ice_vf *vf)
+{
+	struct ice_vf_fdir *fdir = &vf->fdir;
+	enum ice_fltr_ptype flow;
+
+	if (!fdir->fdir_prof)
+		return;
+
+	for (flow = ICE_FLTR_PTYPE_NONF_NONE; flow < ICE_FLTR_PTYPE_MAX; flow++)
+		ice_vc_fdir_free_prof(vf, flow);
+
+	devm_kfree(ice_pf_to_dev(vf->pf), fdir->fdir_prof);
+	fdir->fdir_prof = NULL;
+}
+
+/**
+ * ice_vc_fdir_parse_flow_fld
+ * @proto_hdr: virtual channel protocol filter header
+ * @conf: FDIR configuration for each filter
+ * @fld: field type array
+ * @fld_cnt: field counter
+ *
+ * Parse the virtual channel filter header and store them into field type array
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_parse_flow_fld(struct virtchnl_proto_hdr *proto_hdr,
+			   struct virtchnl_fdir_fltr_conf *conf,
+			   enum ice_flow_field *fld, int *fld_cnt)
+{
+	struct virtchnl_proto_hdr hdr;
+	u32 i;
+
+	memcpy(&hdr, proto_hdr, sizeof(hdr));
+
+	for (i = 0; (i < ARRAY_SIZE(fdir_inset_map)) &&
+	     VIRTCHNL_GET_PROTO_HDR_FIELD(&hdr); i++)
+		if (VIRTCHNL_TEST_PROTO_HDR(&hdr, fdir_inset_map[i].field)) {
+			fld[*fld_cnt] = fdir_inset_map[i].fld;
+			*fld_cnt += 1;
+			if (*fld_cnt >= ICE_FLOW_FIELD_IDX_MAX)
+				return -EINVAL;
+			VIRTCHNL_DEL_PROTO_HDR_FIELD(&hdr,
+						     fdir_inset_map[i].field);
+		}
+
+	return 0;
+}
+
+/**
+ * ice_vc_fdir_set_flow_fld
+ * @vf: pointer to the VF structure
+ * @fltr: virtual channel add cmd buffer
+ * @conf: FDIR configuration for each filter
+ * @seg: array of one or more packet segments that describe the flow
+ *
+ * Parse the virtual channel add msg buffer's field vector and store them into
+ * flow's packet segment field
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_set_flow_fld(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
+			 struct virtchnl_fdir_fltr_conf *conf,
+			 struct ice_flow_seg_info *seg)
+{
+	struct virtchnl_fdir_rule *rule = &fltr->rule_cfg;
+	enum ice_flow_field fld[ICE_FLOW_FIELD_IDX_MAX];
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	struct virtchnl_proto_hdrs *proto;
+	int fld_cnt = 0;
+	int i;
+
+	proto = &rule->proto_hdrs;
+	for (i = 0; i < proto->count; i++) {
+		struct virtchnl_proto_hdr *hdr = &proto->proto_hdr[i];
+		int ret;
+
+		ret = ice_vc_fdir_parse_flow_fld(hdr, conf, fld, &fld_cnt);
+		if (ret)
+			return ret;
+	}
+
+	if (fld_cnt == 0) {
+		dev_dbg(dev, "Empty input set for VF %d\n", vf->vf_id);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < fld_cnt; i++)
+		ice_flow_set_fld(seg, fld[i],
+				 ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+
+	return 0;
+}
+
+/**
+ * ice_vc_fdir_set_flow_hdr - config the flow's packet segment header
+ * @vf: pointer to the VF structure
+ * @conf: FDIR configuration for each filter
+ * @seg: array of one or more packet segments that describe the flow
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_set_flow_hdr(struct ice_vf *vf,
+			 struct virtchnl_fdir_fltr_conf *conf,
+			 struct ice_flow_seg_info *seg)
+{
+	enum ice_fltr_ptype flow = conf->input.flow_type;
+	struct device *dev = ice_pf_to_dev(vf->pf);
+
+	switch (flow) {
+	case ICE_FLTR_PTYPE_NONF_IPV4_OTHER:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_TCP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_TCP |
+				  ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_UDP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_UDP |
+				  ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_SCTP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_SCTP |
+				  ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_OTHER:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_TCP |
+				  ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_UDP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_UDP |
+				  ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_SCTP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_SCTP |
+				  ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	default:
+		dev_dbg(dev, "Invalid flow type 0x%x for VF %d failed\n",
+			flow, vf->vf_id);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vc_fdir_rem_prof - remove profile for this filter flow type
+ * @vf: pointer to the VF structure
+ * @flow: filter flow type
+ * @tun: 0 implies non-tunnel type filter, 1 implies tunnel type filter
+ */
+static void
+ice_vc_fdir_rem_prof(struct ice_vf *vf, enum ice_fltr_ptype flow, int tun)
+{
+	struct ice_vf_fdir *fdir = &vf->fdir;
+	struct ice_fd_hw_prof *vf_prof;
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vf_vsi;
+	struct device *dev;
+	struct ice_hw *hw;
+	u64 prof_id;
+	int i;
+
+	dev = ice_pf_to_dev(pf);
+	hw = &pf->hw;
+	if (!fdir->fdir_prof || !fdir->fdir_prof[flow])
+		return;
+
+	vf_prof = fdir->fdir_prof[flow];
+
+	vf_vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vf_vsi) {
+		dev_dbg(dev, "NULL vf %d vsi pointer\n", vf->vf_id);
+		return;
+	}
+
+	if (!fdir->prof_entry_cnt[flow][tun])
+		return;
+
+	prof_id = ICE_FLOW_PROF_FD(vf_vsi->vsi_num,
+				   flow, tun ? ICE_FLTR_PTYPE_MAX : 0);
+
+	for (i = 0; i < fdir->prof_entry_cnt[flow][tun]; i++)
+		if (vf_prof->entry_h[i][tun]) {
+			u16 vsi_num = ice_get_hw_vsi_num(hw, vf_prof->vsi_h[i]);
+
+			ice_rem_prof_id_flow(hw, ICE_BLK_FD, vsi_num, prof_id);
+			ice_flow_rem_entry(hw, ICE_BLK_FD,
+					   vf_prof->entry_h[i][tun]);
+			vf_prof->entry_h[i][tun] = 0;
+		}
+
+	ice_flow_rem_prof(hw, ICE_BLK_FD, prof_id);
+	devm_kfree(dev, vf_prof->fdir_seg[tun]);
+	vf_prof->fdir_seg[tun] = NULL;
+
+	for (i = 0; i < vf_prof->cnt; i++)
+		vf_prof->vsi_h[i] = 0;
+
+	fdir->prof_entry_cnt[flow][tun] = 0;
+}
+
+/**
+ * ice_vc_fdir_rem_prof_all - remove profile for this VF
+ * @vf: pointer to the VF structure
+ */
+static void ice_vc_fdir_rem_prof_all(struct ice_vf *vf)
+{
+	enum ice_fltr_ptype flow;
+
+	for (flow = ICE_FLTR_PTYPE_NONF_NONE;
+	     flow < ICE_FLTR_PTYPE_MAX; flow++) {
+		ice_vc_fdir_rem_prof(vf, flow, 0);
+		ice_vc_fdir_rem_prof(vf, flow, 1);
+	}
+}
+
+/**
+ * ice_vc_fdir_write_flow_prof
+ * @vf: pointer to the VF structure
+ * @flow: filter flow type
+ * @seg: array of one or more packet segments that describe the flow
+ * @tun: 0 implies non-tunnel type filter, 1 implies tunnel type filter
+ *
+ * Write the flow's profile config and packet segment into the hardware
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_write_flow_prof(struct ice_vf *vf, enum ice_fltr_ptype flow,
+			    struct ice_flow_seg_info *seg, int tun)
+{
+	struct ice_vf_fdir *fdir = &vf->fdir;
+	struct ice_vsi *vf_vsi, *ctrl_vsi;
+	struct ice_flow_seg_info *old_seg;
+	struct ice_flow_prof *prof = NULL;
+	struct ice_fd_hw_prof *vf_prof;
+	enum ice_status status;
+	struct device *dev;
+	struct ice_pf *pf;
+	struct ice_hw *hw;
+	u64 entry1_h = 0;
+	u64 entry2_h = 0;
+	u64 prof_id;
+	int ret;
+
+	pf = vf->pf;
+	dev = ice_pf_to_dev(pf);
+	hw = &pf->hw;
+	vf_vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vf_vsi)
+		return -EINVAL;
+
+	ctrl_vsi = pf->vsi[vf->ctrl_vsi_idx];
+	if (!ctrl_vsi)
+		return -EINVAL;
+
+	vf_prof = fdir->fdir_prof[flow];
+	old_seg = vf_prof->fdir_seg[tun];
+	if (old_seg) {
+		if (!memcmp(old_seg, seg, sizeof(*seg))) {
+			dev_dbg(dev, "Duplicated profile for VF %d!\n",
+				vf->vf_id);
+			return -EEXIST;
+		}
+
+		if (fdir->fdir_fltr_cnt[flow][tun]) {
+			ret = -EINVAL;
+			dev_dbg(dev, "Input set conflicts for VF %d\n",
+				vf->vf_id);
+			goto err_exit;
+		}
+
+		/* remove previously allocated profile */
+		ice_vc_fdir_rem_prof(vf, flow, tun);
+	}
+
+	prof_id = ICE_FLOW_PROF_FD(vf_vsi->vsi_num, flow,
+				   tun ? ICE_FLTR_PTYPE_MAX : 0);
+
+	status = ice_flow_add_prof(hw, ICE_BLK_FD, ICE_FLOW_RX, prof_id, seg,
+				   tun + 1, &prof);
+	ret = ice_status_to_errno(status);
+	if (ret) {
+		dev_dbg(dev, "Could not add VSI flow 0x%x for VF %d\n",
+			flow, vf->vf_id);
+		goto err_exit;
+	}
+
+	status = ice_flow_add_entry(hw, ICE_BLK_FD, prof_id, vf_vsi->idx,
+				    vf_vsi->idx, ICE_FLOW_PRIO_NORMAL,
+				    seg, &entry1_h);
+	ret = ice_status_to_errno(status);
+	if (ret) {
+		dev_dbg(dev, "Could not add flow 0x%x VSI entry for VF %d\n",
+			flow, vf->vf_id);
+		goto err_prof;
+	}
+
+	status = ice_flow_add_entry(hw, ICE_BLK_FD, prof_id, vf_vsi->idx,
+				    ctrl_vsi->idx, ICE_FLOW_PRIO_NORMAL,
+				    seg, &entry2_h);
+	ret = ice_status_to_errno(status);
+	if (ret) {
+		dev_dbg(dev,
+			"Could not add flow 0x%x Ctrl VSI entry for VF %d\n",
+			flow, vf->vf_id);
+		goto err_entry_1;
+	}
+
+	vf_prof->fdir_seg[tun] = seg;
+	vf_prof->cnt = 0;
+	fdir->prof_entry_cnt[flow][tun] = 0;
+
+	vf_prof->entry_h[vf_prof->cnt][tun] = entry1_h;
+	vf_prof->vsi_h[vf_prof->cnt] = vf_vsi->idx;
+	vf_prof->cnt++;
+	fdir->prof_entry_cnt[flow][tun]++;
+
+	vf_prof->entry_h[vf_prof->cnt][tun] = entry2_h;
+	vf_prof->vsi_h[vf_prof->cnt] = ctrl_vsi->idx;
+	vf_prof->cnt++;
+	fdir->prof_entry_cnt[flow][tun]++;
+
+	return 0;
+
+err_entry_1:
+	ice_rem_prof_id_flow(hw, ICE_BLK_FD,
+			     ice_get_hw_vsi_num(hw, vf_vsi->idx), prof_id);
+	ice_flow_rem_entry(hw, ICE_BLK_FD, entry1_h);
+err_prof:
+	ice_flow_rem_prof(hw, ICE_BLK_FD, prof_id);
+err_exit:
+	return ret;
+}
+
+/**
+ * ice_vc_fdir_config_input_set
+ * @vf: pointer to the VF structure
+ * @fltr: virtual channel add cmd buffer
+ * @conf: FDIR configuration for each filter
+ * @tun: 0 implies non-tunnel type filter, 1 implies tunnel type filter
+ *
+ * Config the input set type and value for virtual channel add msg buffer
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_config_input_set(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
+			     struct virtchnl_fdir_fltr_conf *conf, int tun)
+{
+	struct ice_fdir_fltr *input = &conf->input;
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	struct ice_flow_seg_info *seg;
+	enum ice_fltr_ptype flow;
+	int ret;
+
+	flow = input->flow_type;
+	ret = ice_vc_fdir_alloc_prof(vf, flow);
+	if (ret) {
+		dev_dbg(dev, "Alloc flow prof for VF %d failed\n", vf->vf_id);
+		return ret;
+	}
+
+	seg = devm_kzalloc(dev, sizeof(*seg), GFP_KERNEL);
+	if (!seg)
+		return -ENOMEM;
+
+	ret = ice_vc_fdir_set_flow_fld(vf, fltr, conf, seg);
+	if (ret) {
+		dev_dbg(dev, "Set flow field for VF %d failed\n", vf->vf_id);
+		goto err_exit;
+	}
+
+	ret = ice_vc_fdir_set_flow_hdr(vf, conf, seg);
+	if (ret) {
+		dev_dbg(dev, "Set flow hdr for VF %d failed\n", vf->vf_id);
+		goto err_exit;
+	}
+
+	ret = ice_vc_fdir_write_flow_prof(vf, flow, seg, tun);
+	if (ret == -EEXIST) {
+		devm_kfree(dev, seg);
+	} else if (ret) {
+		dev_dbg(dev, "Write flow profile for VF %d failed\n",
+			vf->vf_id);
+		goto err_exit;
+	}
+
+	return 0;
+
+err_exit:
+	devm_kfree(dev, seg);
+	return ret;
+}
+
+/**
+ * ice_vc_validate_fdir_fltr - validate the virtual channel filter
+ * @vf: pointer to the VF info
+ * @fltr: virtual channel add cmd buffer
+ * @conf: FDIR configuration for each filter
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_validate_fdir_fltr(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
+			  struct virtchnl_fdir_fltr_conf *conf)
+{
+	/* Todo: rule validation */
+	return -EINVAL;
+}
+
+/**
+ * ice_vc_fdir_comp_rules - compare if two filter rules have the same value
+ * @conf_a: FDIR configuration for filter a
+ * @conf_b: FDIR configuration for filter b
+ *
+ * Return: 0 on success, and other on error.
+ */
+static bool
+ice_vc_fdir_comp_rules(struct virtchnl_fdir_fltr_conf *conf_a,
+		       struct virtchnl_fdir_fltr_conf *conf_b)
+{
+	struct ice_fdir_fltr *a = &conf_a->input;
+	struct ice_fdir_fltr *b = &conf_b->input;
+
+	if (a->flow_type != b->flow_type)
+		return false;
+	if (memcmp(&a->ip, &b->ip, sizeof(a->ip)))
+		return false;
+	if (memcmp(&a->mask, &b->mask, sizeof(a->mask)))
+		return false;
+	if (memcmp(&a->ext_data, &b->ext_data, sizeof(a->ext_data)))
+		return false;
+	if (memcmp(&a->ext_mask, &b->ext_mask, sizeof(a->ext_mask)))
+		return false;
+
+	return true;
+}
+
+/**
+ * ice_vc_fdir_is_dup_fltr
+ * @vf: pointer to the VF info
+ * @conf: FDIR configuration for each filter
+ *
+ * Check if there is duplicated rule with same conf value
+ *
+ * Return: 0 true success, and false on error.
+ */
+static bool
+ice_vc_fdir_is_dup_fltr(struct ice_vf *vf, struct virtchnl_fdir_fltr_conf *conf)
+{
+	struct ice_fdir_fltr *desc;
+	bool ret;
+
+	list_for_each_entry(desc, &vf->fdir.fdir_rule_list, fltr_node) {
+		struct virtchnl_fdir_fltr_conf *node =
+				to_fltr_conf_from_desc(desc);
+
+		ret = ice_vc_fdir_comp_rules(node, conf);
+		if (ret)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * ice_vc_fdir_insert_entry
+ * @vf: pointer to the VF info
+ * @conf: FDIR configuration for each filter
+ * @id: pointer to ID value allocated by driver
+ *
+ * Insert FDIR conf entry into list and allocate ID for this filter
+ *
+ * Return: 0 true success, and other on error.
+ */
+static int
+ice_vc_fdir_insert_entry(struct ice_vf *vf,
+			 struct virtchnl_fdir_fltr_conf *conf, u32 *id)
+{
+	struct ice_fdir_fltr *input = &conf->input;
+	int i;
+
+	/* alloc ID corresponding with conf */
+	i = idr_alloc(&vf->fdir.fdir_rule_idr, conf, 0,
+		      ICE_FDIR_MAX_FLTRS, GFP_KERNEL);
+	if (i < 0)
+		return -EINVAL;
+	*id = i;
+
+	list_add(&input->fltr_node, &vf->fdir.fdir_rule_list);
+	return 0;
+}
+
+/**
+ * ice_vc_fdir_remove_entry - remove FDIR conf entry by ID value
+ * @vf: pointer to the VF info
+ * @conf: FDIR configuration for each filter
+ * @id: filter rule's ID
+ */
+static void
+ice_vc_fdir_remove_entry(struct ice_vf *vf,
+			 struct virtchnl_fdir_fltr_conf *conf, u32 id)
+{
+	struct ice_fdir_fltr *input = &conf->input;
+
+	idr_remove(&vf->fdir.fdir_rule_idr, id);
+	list_del(&input->fltr_node);
+}
+
+/**
+ * ice_vc_fdir_lookup_entry - lookup FDIR conf entry by ID value
+ * @vf: pointer to the VF info
+ * @id: filter rule's ID
+ *
+ * Return: NULL on error, and other on success.
+ */
+static struct virtchnl_fdir_fltr_conf *
+ice_vc_fdir_lookup_entry(struct ice_vf *vf, u32 id)
+{
+	return idr_find(&vf->fdir.fdir_rule_idr, id);
+}
+
+/**
+ * ice_vc_fdir_flush_entry - remove all FDIR conf entry
+ * @vf: pointer to the VF info
+ */
+static void ice_vc_fdir_flush_entry(struct ice_vf *vf)
+{
+	struct virtchnl_fdir_fltr_conf *conf;
+	struct ice_fdir_fltr *desc, *temp;
+
+	list_for_each_entry_safe(desc, temp,
+				 &vf->fdir.fdir_rule_list, fltr_node) {
+		conf = to_fltr_conf_from_desc(desc);
+		list_del(&desc->fltr_node);
+		devm_kfree(ice_pf_to_dev(vf->pf), conf);
+	}
+}
+
+/**
+ * ice_vc_fdir_write_fltr - write filter rule into hardware
+ * @vf: pointer to the VF info
+ * @conf: FDIR configuration for each filter
+ * @add: true implies add rule, false implies del rules
+ * @is_tun: false implies non-tunnel type filter, true implies tunnel filter
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int ice_vc_fdir_write_fltr(struct ice_vf *vf,
+				  struct virtchnl_fdir_fltr_conf *conf,
+				  bool add, bool is_tun)
+{
+	struct ice_fdir_fltr *input = &conf->input;
+	struct ice_vsi *vsi, *ctrl_vsi;
+	struct ice_fltr_desc desc;
+	enum ice_status status;
+	struct device *dev;
+	struct ice_pf *pf;
+	struct ice_hw *hw;
+	int ret;
+	u8 *pkt;
+
+	pf = vf->pf;
+	dev = ice_pf_to_dev(pf);
+	hw = &pf->hw;
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		dev_dbg(dev, "Invalid vsi for VF %d\n", vf->vf_id);
+		return -EINVAL;
+	}
+
+	input->dest_vsi = vsi->idx;
+	input->comp_report = ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL;
+
+	ctrl_vsi = pf->vsi[vf->ctrl_vsi_idx];
+	if (!ctrl_vsi) {
+		dev_dbg(dev, "Invalid ctrl_vsi for VF %d\n", vf->vf_id);
+		return -EINVAL;
+	}
+
+	pkt = devm_kzalloc(dev, ICE_FDIR_MAX_RAW_PKT_SIZE, GFP_KERNEL);
+	if (!pkt)
+		return -ENOMEM;
+
+	ice_fdir_get_prgm_desc(hw, input, &desc, add);
+	status = ice_fdir_get_gen_prgm_pkt(hw, input, pkt, false, is_tun);
+	ret = ice_status_to_errno(status);
+	if (ret) {
+		dev_dbg(dev, "Gen training pkt for VF %d ptype %d failed\n",
+			vf->vf_id, input->flow_type);
+		goto err_free_pkt;
+	}
+
+	ret = ice_prgm_fdir_fltr(ctrl_vsi, &desc, pkt);
+	if (ret)
+		goto err_free_pkt;
+
+	return 0;
+
+err_free_pkt:
+	devm_kfree(dev, pkt);
+	return ret;
+}
+
+/**
+ * ice_vc_add_fdir_fltr - add a FDIR filter for VF by the msg buffer
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Return: 0 on success, and other on error.
+ */
+int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg)
+{
+	struct virtchnl_fdir_add *fltr = (struct virtchnl_fdir_add *)msg;
+	struct virtchnl_fdir_add *stat = NULL;
+	struct virtchnl_fdir_fltr_conf *conf;
+	enum virtchnl_status_code v_ret;
+	struct device *dev;
+	struct ice_pf *pf;
+	int is_tun = 0;
+	int len = 0;
+	int ret;
+
+	pf = vf->pf;
+	dev = ice_pf_to_dev(pf);
+	ret = ice_vc_fdir_param_check(vf, fltr->vsi_id);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		dev_dbg(dev, "Parameter check for VF %d failed\n", vf->vf_id);
+		goto err_exit;
+	}
+
+	ret = ice_vf_start_ctrl_vsi(vf);
+	if (ret && (ret != -EEXIST)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		dev_err(dev, "Init FDIR for VF %d failed, ret:%d\n",
+			vf->vf_id, ret);
+		goto err_exit;
+	}
+
+	stat = kzalloc(sizeof(*stat), GFP_KERNEL);
+	if (!stat) {
+		v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+		dev_dbg(dev, "Alloc stat for VF %d failed\n", vf->vf_id);
+		goto err_exit;
+	}
+
+	conf = devm_kzalloc(dev, sizeof(*conf), GFP_KERNEL);
+	if (!conf) {
+		v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+		dev_dbg(dev, "Alloc conf for VF %d failed\n", vf->vf_id);
+		goto err_exit;
+	}
+
+	len = sizeof(*stat);
+	ret = ice_vc_validate_fdir_fltr(vf, fltr, conf);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_INVALID;
+		dev_dbg(dev, "Invalid FDIR filter from VF %d\n", vf->vf_id);
+		goto err_free_conf;
+	}
+
+	if (fltr->validate_only) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_SUCCESS;
+		devm_kfree(dev, conf);
+		ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_FDIR_FILTER,
+					    v_ret, (u8 *)stat, len);
+		goto exit;
+	}
+
+	ret = ice_vc_fdir_config_input_set(vf, fltr, conf, is_tun);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_CONFLICT;
+		dev_err(dev, "VF %d: FDIR input set configure failed, ret:%d\n",
+			vf->vf_id, ret);
+		goto err_free_conf;
+	}
+
+	ret = ice_vc_fdir_is_dup_fltr(vf, conf);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_EXIST;
+		dev_dbg(dev, "VF %d: duplicated FDIR rule detected\n",
+			vf->vf_id);
+		goto err_free_conf;
+	}
+
+	ret = ice_vc_fdir_insert_entry(vf, conf, &stat->flow_id);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+		dev_dbg(dev, "VF %d: insert FDIR list failed\n", vf->vf_id);
+		goto err_free_conf;
+	}
+
+	ret = ice_vc_fdir_write_fltr(vf, conf, true, is_tun);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+		dev_err(dev, "VF %d: writing FDIR rule failed, ret:%d\n",
+			vf->vf_id, ret);
+		goto err_rem_entry;
+	}
+
+	vf->fdir.fdir_fltr_cnt[conf->input.flow_type][is_tun]++;
+
+	v_ret = VIRTCHNL_STATUS_SUCCESS;
+	stat->status = VIRTCHNL_FDIR_SUCCESS;
+exit:
+	ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_FDIR_FILTER, v_ret,
+				    (u8 *)stat, len);
+	kfree(stat);
+	return ret;
+
+err_rem_entry:
+	ice_vc_fdir_remove_entry(vf, conf, stat->flow_id);
+err_free_conf:
+	devm_kfree(dev, conf);
+err_exit:
+	ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_FDIR_FILTER, v_ret,
+				    (u8 *)stat, len);
+	kfree(stat);
+	return ret;
+}
+
+/**
+ * ice_vc_del_fdir_fltr - delete a FDIR filter for VF by the msg buffer
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Return: 0 on success, and other on error.
+ */
+int ice_vc_del_fdir_fltr(struct ice_vf *vf, u8 *msg)
+{
+	struct virtchnl_fdir_del *fltr = (struct virtchnl_fdir_del *)msg;
+	struct virtchnl_fdir_del *stat = NULL;
+	struct virtchnl_fdir_fltr_conf *conf;
+	enum virtchnl_status_code v_ret;
+	struct device *dev;
+	struct ice_pf *pf;
+	int is_tun = 0;
+	int len = 0;
+	int ret;
+
+	pf = vf->pf;
+	dev = ice_pf_to_dev(pf);
+	ret = ice_vc_fdir_param_check(vf, fltr->vsi_id);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		dev_dbg(dev, "Parameter check for VF %d failed\n", vf->vf_id);
+		goto err_exit;
+	}
+
+	stat = kzalloc(sizeof(*stat), GFP_KERNEL);
+	if (!stat) {
+		v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+		dev_dbg(dev, "Alloc stat for VF %d failed\n", vf->vf_id);
+		goto err_exit;
+	}
+
+	len = sizeof(*stat);
+
+	conf = ice_vc_fdir_lookup_entry(vf, fltr->flow_id);
+	if (!conf) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST;
+		dev_dbg(dev, "VF %d: FDIR invalid flow_id:0x%X\n",
+			vf->vf_id, fltr->flow_id);
+		goto err_exit;
+	}
+
+	/* Just return failure when ctrl_vsi idx is invalid */
+	if (vf->ctrl_vsi_idx == ICE_NO_VSI) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+		dev_err(dev, "Invalid FDIR ctrl_vsi for VF %d\n", vf->vf_id);
+		goto err_exit;
+	}
+
+	ret = ice_vc_fdir_write_fltr(vf, conf, false, is_tun);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+		dev_err(dev, "VF %d: writing FDIR rule failed, ret:%d\n",
+			vf->vf_id, ret);
+		goto err_exit;
+	}
+
+	ice_vc_fdir_remove_entry(vf, conf, fltr->flow_id);
+	devm_kfree(dev, conf);
+	vf->fdir.fdir_fltr_cnt[conf->input.flow_type][is_tun]--;
+
+	v_ret = VIRTCHNL_STATUS_SUCCESS;
+	stat->status = VIRTCHNL_FDIR_SUCCESS;
+
+err_exit:
+	ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DEL_FDIR_FILTER, v_ret,
+				    (u8 *)stat, len);
+	kfree(stat);
+	return ret;
+}
+
+/**
+ * ice_vf_fdir_init - init FDIR resource for VF
+ * @vf: pointer to the VF info
+ */
+void ice_vf_fdir_init(struct ice_vf *vf)
+{
+	struct ice_vf_fdir *fdir = &vf->fdir;
+
+	idr_init(&fdir->fdir_rule_idr);
+	INIT_LIST_HEAD(&fdir->fdir_rule_list);
+}
+
+/**
+ * ice_vf_fdir_exit - destroy FDIR resource for VF
+ * @vf: pointer to the VF info
+ */
+void ice_vf_fdir_exit(struct ice_vf *vf)
+{
+	ice_vc_fdir_flush_entry(vf);
+	idr_destroy(&vf->fdir.fdir_rule_idr);
+	ice_vc_fdir_rem_prof_all(vf);
+	ice_vc_fdir_free_prof_all(vf);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h
new file mode 100644
index 000000000000..2a2e0e598559
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021, Intel Corporation. */
+
+#ifndef _ICE_VIRTCHNL_FDIR_H_
+#define _ICE_VIRTCHNL_FDIR_H_
+
+struct ice_vf;
+
+/* VF FDIR information structure */
+struct ice_vf_fdir {
+	u16 fdir_fltr_cnt[ICE_FLTR_PTYPE_MAX][ICE_FD_HW_SEG_MAX];
+	int prof_entry_cnt[ICE_FLTR_PTYPE_MAX][ICE_FD_HW_SEG_MAX];
+	struct ice_fd_hw_prof **fdir_prof;
+
+	struct idr fdir_rule_idr;
+	struct list_head fdir_rule_list;
+};
+
+int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg);
+int ice_vc_del_fdir_fltr(struct ice_vf *vf, u8 *msg);
+void ice_vf_fdir_init(struct ice_vf *vf);
+void ice_vf_fdir_exit(struct ice_vf *vf);
+
+#endif /* _ICE_VIRTCHNL_FDIR_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index fa72b7e2e433..20343a0fe726 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -233,6 +233,7 @@ static void ice_free_vf_res(struct ice_vf *vf)
 	 * accessing the VF's VSI after it's freed or invalidated.
 	 */
 	clear_bit(ICE_VF_STATE_INIT, vf->vf_states);
+	ice_vf_fdir_exit(vf);
 	/* free VF control VSI */
 	if (vf->ctrl_vsi_idx != ICE_NO_VSI)
 		ice_vf_ctrl_vsi_release(vf);
@@ -1300,6 +1301,7 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
 	ice_for_each_vf(pf, v) {
 		vf = &pf->vf[v];
 
+		ice_vf_fdir_exit(vf);
 		/* clean VF control VSI when resetting VFs since it should be
 		 * setup only when VF creates its first FDIR rule.
 		 */
@@ -1424,6 +1426,7 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
 			dev_err(dev, "disabling promiscuous mode failed\n");
 	}
 
+	ice_vf_fdir_exit(vf);
 	/* clean VF control VSI when resetting VF since it should be setup
 	 * only when VF creates its first FDIR rule.
 	 */
@@ -1610,6 +1613,7 @@ static void ice_set_dflt_settings_vfs(struct ice_pf *pf)
 		 * creates its first fdir rule.
 		 */
 		ice_vf_ctrl_invalidate_vsi(vf);
+		ice_vf_fdir_init(vf);
 	}
 }
 
@@ -1909,7 +1913,7 @@ ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event)
  *
  * send msg to VF
  */
-static int
+int
 ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode,
 		      enum virtchnl_status_code v_retval, u8 *msg, u16 msglen)
 {
@@ -2057,6 +2061,9 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg)
 			vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_REG;
 	}
 
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_FDIR_PF)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_FDIR_PF;
+
 	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2)
 		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2;
 
@@ -2145,7 +2152,7 @@ static struct ice_vsi *ice_find_vsi_from_id(struct ice_pf *pf, u16 id)
  *
  * check for the valid VSI ID
  */
-static bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id)
+bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id)
 {
 	struct ice_pf *pf = vf->pf;
 	struct ice_vsi *vsi;
@@ -3877,6 +3884,12 @@ error_handler:
 	case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING:
 		err = ice_vc_dis_vlan_stripping(vf);
 		break;
+	case VIRTCHNL_OP_ADD_FDIR_FILTER:
+		err = ice_vc_add_fdir_fltr(vf, msg);
+		break;
+	case VIRTCHNL_OP_DEL_FDIR_FILTER:
+		err = ice_vc_del_fdir_fltr(vf, msg);
+		break;
 	case VIRTCHNL_OP_UNKNOWN:
 	default:
 		dev_err(dev, "Unsupported opcode %d from VF %d\n", v_opcode,
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index faa879d744a1..46abc5388fc7 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -4,6 +4,7 @@
 #ifndef _ICE_VIRTCHNL_PF_H_
 #define _ICE_VIRTCHNL_PF_H_
 #include "ice.h"
+#include "ice_virtchnl_fdir.h"
 
 /* Restrict number of MAC Addr and VLAN that non-trusted VF can programmed */
 #define ICE_MAX_VLAN_PER_VF		8
@@ -71,6 +72,7 @@ struct ice_vf {
 	u16 vf_id;			/* VF ID in the PF space */
 	u16 lan_vsi_idx;		/* index into PF struct */
 	u16 ctrl_vsi_idx;
+	struct ice_vf_fdir fdir;
 	/* first vector index of this VF in the PF space */
 	int first_vector_idx;
 	struct ice_sw *vf_sw_id;	/* switch ID the VF VSIs connect to */
@@ -140,6 +142,10 @@ ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event);
 void ice_print_vfs_mdd_events(struct ice_pf *pf);
 void ice_print_vf_rx_mdd_event(struct ice_vf *vf);
 struct ice_vsi *ice_vf_ctrl_vsi_setup(struct ice_vf *vf);
+int
+ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode,
+		      enum virtchnl_status_code v_retval, u8 *msg, u16 msglen);
+bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id);
 #else /* CONFIG_PCI_IOV */
 #define ice_process_vflr_event(pf) do {} while (0)
 #define ice_free_vfs(pf) do {} while (0)
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 40bad71865ea..47482049f640 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -136,6 +136,9 @@ enum virtchnl_ops {
 	VIRTCHNL_OP_DISABLE_CHANNELS = 31,
 	VIRTCHNL_OP_ADD_CLOUD_FILTER = 32,
 	VIRTCHNL_OP_DEL_CLOUD_FILTER = 33,
+	/* opcode 34 - 46 are reserved */
+	VIRTCHNL_OP_ADD_FDIR_FILTER = 47,
+	VIRTCHNL_OP_DEL_FDIR_FILTER = 48,
 };
 
 /* These macros are used to generate compilation errors if a structure/union
@@ -247,6 +250,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
 #define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM		0X00200000
 #define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM	0X00400000
 #define VIRTCHNL_VF_OFFLOAD_ADQ			0X00800000
+#define VIRTCHNL_VF_OFFLOAD_FDIR_PF		0X10000000
 
 /* Define below the capability flags that are not offloads */
 #define VIRTCHNL_VF_CAP_ADV_LINK_SPEED		0x00000080
@@ -559,6 +563,11 @@ enum virtchnl_action {
 	/* action types */
 	VIRTCHNL_ACTION_DROP = 0,
 	VIRTCHNL_ACTION_TC_REDIRECT,
+	VIRTCHNL_ACTION_PASSTHRU,
+	VIRTCHNL_ACTION_QUEUE,
+	VIRTCHNL_ACTION_Q_REGION,
+	VIRTCHNL_ACTION_MARK,
+	VIRTCHNL_ACTION_COUNT,
 };
 
 enum virtchnl_flow_type {
@@ -668,6 +677,269 @@ enum virtchnl_vfr_states {
 	VIRTCHNL_VFR_VFACTIVE,
 };
 
+#define VIRTCHNL_MAX_NUM_PROTO_HDRS	32
+#define PROTO_HDR_SHIFT			5
+#define PROTO_HDR_FIELD_START(proto_hdr_type) ((proto_hdr_type) << PROTO_HDR_SHIFT)
+#define PROTO_HDR_FIELD_MASK ((1UL << PROTO_HDR_SHIFT) - 1)
+
+/* VF use these macros to configure each protocol header.
+ * Specify which protocol headers and protocol header fields base on
+ * virtchnl_proto_hdr_type and virtchnl_proto_hdr_field.
+ * @param hdr: a struct of virtchnl_proto_hdr
+ * @param hdr_type: ETH/IPV4/TCP, etc
+ * @param field: SRC/DST/TEID/SPI, etc
+ */
+#define VIRTCHNL_ADD_PROTO_HDR_FIELD(hdr, field) \
+	((hdr)->field_selector |= BIT((field) & PROTO_HDR_FIELD_MASK))
+#define VIRTCHNL_DEL_PROTO_HDR_FIELD(hdr, field) \
+	((hdr)->field_selector &= ~BIT((field) & PROTO_HDR_FIELD_MASK))
+#define VIRTCHNL_TEST_PROTO_HDR_FIELD(hdr, val) \
+	((hdr)->field_selector & BIT((val) & PROTO_HDR_FIELD_MASK))
+#define VIRTCHNL_GET_PROTO_HDR_FIELD(hdr)	((hdr)->field_selector)
+
+#define VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, hdr_type, field) \
+	(VIRTCHNL_ADD_PROTO_HDR_FIELD(hdr, \
+		VIRTCHNL_PROTO_HDR_ ## hdr_type ## _ ## field))
+#define VIRTCHNL_DEL_PROTO_HDR_FIELD_BIT(hdr, hdr_type, field) \
+	(VIRTCHNL_DEL_PROTO_HDR_FIELD(hdr, \
+		VIRTCHNL_PROTO_HDR_ ## hdr_type ## _ ## field))
+
+#define VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, hdr_type) \
+	((hdr)->type = VIRTCHNL_PROTO_HDR_ ## hdr_type)
+#define VIRTCHNL_GET_PROTO_HDR_TYPE(hdr) \
+	(((hdr)->type) >> PROTO_HDR_SHIFT)
+#define VIRTCHNL_TEST_PROTO_HDR_TYPE(hdr, val) \
+	((hdr)->type == ((val) >> PROTO_HDR_SHIFT))
+#define VIRTCHNL_TEST_PROTO_HDR(hdr, val) \
+	(VIRTCHNL_TEST_PROTO_HDR_TYPE((hdr), (val)) && \
+	 VIRTCHNL_TEST_PROTO_HDR_FIELD((hdr), (val)))
+
+/* Protocol header type within a packet segment. A segment consists of one or
+ * more protocol headers that make up a logical group of protocol headers. Each
+ * logical group of protocol headers encapsulates or is encapsulated using/by
+ * tunneling or encapsulation protocols for network virtualization.
+ */
+enum virtchnl_proto_hdr_type {
+	VIRTCHNL_PROTO_HDR_NONE,
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_S_VLAN,
+	VIRTCHNL_PROTO_HDR_C_VLAN,
+	VIRTCHNL_PROTO_HDR_IPV4,
+	VIRTCHNL_PROTO_HDR_IPV6,
+	VIRTCHNL_PROTO_HDR_TCP,
+	VIRTCHNL_PROTO_HDR_UDP,
+	VIRTCHNL_PROTO_HDR_SCTP,
+	VIRTCHNL_PROTO_HDR_GTPU_IP,
+	VIRTCHNL_PROTO_HDR_GTPU_EH,
+	VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_DWN,
+	VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_UP,
+	VIRTCHNL_PROTO_HDR_PPPOE,
+	VIRTCHNL_PROTO_HDR_L2TPV3,
+	VIRTCHNL_PROTO_HDR_ESP,
+	VIRTCHNL_PROTO_HDR_AH,
+	VIRTCHNL_PROTO_HDR_PFCP,
+};
+
+/* Protocol header field within a protocol header. */
+enum virtchnl_proto_hdr_field {
+	/* ETHER */
+	VIRTCHNL_PROTO_HDR_ETH_SRC =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_ETH),
+	VIRTCHNL_PROTO_HDR_ETH_DST,
+	VIRTCHNL_PROTO_HDR_ETH_ETHERTYPE,
+	/* S-VLAN */
+	VIRTCHNL_PROTO_HDR_S_VLAN_ID =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_S_VLAN),
+	/* C-VLAN */
+	VIRTCHNL_PROTO_HDR_C_VLAN_ID =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_C_VLAN),
+	/* IPV4 */
+	VIRTCHNL_PROTO_HDR_IPV4_SRC =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_IPV4),
+	VIRTCHNL_PROTO_HDR_IPV4_DST,
+	VIRTCHNL_PROTO_HDR_IPV4_DSCP,
+	VIRTCHNL_PROTO_HDR_IPV4_TTL,
+	VIRTCHNL_PROTO_HDR_IPV4_PROT,
+	/* IPV6 */
+	VIRTCHNL_PROTO_HDR_IPV6_SRC =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_IPV6),
+	VIRTCHNL_PROTO_HDR_IPV6_DST,
+	VIRTCHNL_PROTO_HDR_IPV6_TC,
+	VIRTCHNL_PROTO_HDR_IPV6_HOP_LIMIT,
+	VIRTCHNL_PROTO_HDR_IPV6_PROT,
+	/* TCP */
+	VIRTCHNL_PROTO_HDR_TCP_SRC_PORT =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_TCP),
+	VIRTCHNL_PROTO_HDR_TCP_DST_PORT,
+	/* UDP */
+	VIRTCHNL_PROTO_HDR_UDP_SRC_PORT =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_UDP),
+	VIRTCHNL_PROTO_HDR_UDP_DST_PORT,
+	/* SCTP */
+	VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_SCTP),
+	VIRTCHNL_PROTO_HDR_SCTP_DST_PORT,
+	/* GTPU_IP */
+	VIRTCHNL_PROTO_HDR_GTPU_IP_TEID =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_GTPU_IP),
+	/* GTPU_EH */
+	VIRTCHNL_PROTO_HDR_GTPU_EH_PDU =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_GTPU_EH),
+	VIRTCHNL_PROTO_HDR_GTPU_EH_QFI,
+	/* PPPOE */
+	VIRTCHNL_PROTO_HDR_PPPOE_SESS_ID =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_PPPOE),
+	/* L2TPV3 */
+	VIRTCHNL_PROTO_HDR_L2TPV3_SESS_ID =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_L2TPV3),
+	/* ESP */
+	VIRTCHNL_PROTO_HDR_ESP_SPI =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_ESP),
+	/* AH */
+	VIRTCHNL_PROTO_HDR_AH_SPI =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_AH),
+	/* PFCP */
+	VIRTCHNL_PROTO_HDR_PFCP_S_FIELD =
+		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_PFCP),
+	VIRTCHNL_PROTO_HDR_PFCP_SEID,
+};
+
+struct virtchnl_proto_hdr {
+	enum virtchnl_proto_hdr_type type;
+	u32 field_selector; /* a bit mask to select field for header type */
+	u8 buffer[64];
+	/**
+	 * binary buffer in network order for specific header type.
+	 * For example, if type = VIRTCHNL_PROTO_HDR_IPV4, a IPv4
+	 * header is expected to be copied into the buffer.
+	 */
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(72, virtchnl_proto_hdr);
+
+struct virtchnl_proto_hdrs {
+	u8 tunnel_level;
+	/**
+	 * specify where protocol header start from.
+	 * 0 - from the outer layer
+	 * 1 - from the first inner layer
+	 * 2 - from the second inner layer
+	 * ....
+	 **/
+	int count; /* the proto layers must < VIRTCHNL_MAX_NUM_PROTO_HDRS */
+	struct virtchnl_proto_hdr proto_hdr[VIRTCHNL_MAX_NUM_PROTO_HDRS];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(2312, virtchnl_proto_hdrs);
+
+/* action configuration for FDIR */
+struct virtchnl_filter_action {
+	enum virtchnl_action type;
+	union {
+		/* used for queue and qgroup action */
+		struct {
+			u16 index;
+			u8 region;
+		} queue;
+		/* used for count action */
+		struct {
+			/* share counter ID with other flow rules */
+			u8 shared;
+			u32 id; /* counter ID */
+		} count;
+		/* used for mark action */
+		u32 mark_id;
+		u8 reserve[32];
+	} act_conf;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(36, virtchnl_filter_action);
+
+#define VIRTCHNL_MAX_NUM_ACTIONS  8
+
+struct virtchnl_filter_action_set {
+	/* action number must be less then VIRTCHNL_MAX_NUM_ACTIONS */
+	int count;
+	struct virtchnl_filter_action actions[VIRTCHNL_MAX_NUM_ACTIONS];
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(292, virtchnl_filter_action_set);
+
+/* pattern and action for FDIR rule */
+struct virtchnl_fdir_rule {
+	struct virtchnl_proto_hdrs proto_hdrs;
+	struct virtchnl_filter_action_set action_set;
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(2604, virtchnl_fdir_rule);
+
+/* Status returned to VF after VF requests FDIR commands
+ * VIRTCHNL_FDIR_SUCCESS
+ * VF FDIR related request is successfully done by PF
+ * The request can be OP_ADD/DEL.
+ *
+ * VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE
+ * OP_ADD_FDIR_FILTER request is failed due to no Hardware resource.
+ *
+ * VIRTCHNL_FDIR_FAILURE_RULE_EXIST
+ * OP_ADD_FDIR_FILTER request is failed due to the rule is already existed.
+ *
+ * VIRTCHNL_FDIR_FAILURE_RULE_CONFLICT
+ * OP_ADD_FDIR_FILTER request is failed due to conflict with existing rule.
+ *
+ * VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST
+ * OP_DEL_FDIR_FILTER request is failed due to this rule doesn't exist.
+ *
+ * VIRTCHNL_FDIR_FAILURE_RULE_INVALID
+ * OP_ADD_FDIR_FILTER request is failed due to parameters validation
+ * or HW doesn't support.
+ *
+ * VIRTCHNL_FDIR_FAILURE_RULE_TIMEOUT
+ * OP_ADD/DEL_FDIR_FILTER request is failed due to timing out
+ * for programming.
+ */
+enum virtchnl_fdir_prgm_status {
+	VIRTCHNL_FDIR_SUCCESS = 0,
+	VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE,
+	VIRTCHNL_FDIR_FAILURE_RULE_EXIST,
+	VIRTCHNL_FDIR_FAILURE_RULE_CONFLICT,
+	VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST,
+	VIRTCHNL_FDIR_FAILURE_RULE_INVALID,
+	VIRTCHNL_FDIR_FAILURE_RULE_TIMEOUT,
+};
+
+/* VIRTCHNL_OP_ADD_FDIR_FILTER
+ * VF sends this request to PF by filling out vsi_id,
+ * validate_only and rule_cfg. PF will return flow_id
+ * if the request is successfully done and return add_status to VF.
+ */
+struct virtchnl_fdir_add {
+	u16 vsi_id;  /* INPUT */
+	/*
+	 * 1 for validating a fdir rule, 0 for creating a fdir rule.
+	 * Validate and create share one ops: VIRTCHNL_OP_ADD_FDIR_FILTER.
+	 */
+	u16 validate_only; /* INPUT */
+	u32 flow_id;       /* OUTPUT */
+	struct virtchnl_fdir_rule rule_cfg; /* INPUT */
+	enum virtchnl_fdir_prgm_status status; /* OUTPUT */
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(2616, virtchnl_fdir_add);
+
+/* VIRTCHNL_OP_DEL_FDIR_FILTER
+ * VF sends this request to PF by filling out vsi_id
+ * and flow_id. PF will return del_status to VF.
+ */
+struct virtchnl_fdir_del {
+	u16 vsi_id;  /* INPUT */
+	u16 pad;
+	u32 flow_id; /* INPUT */
+	enum virtchnl_fdir_prgm_status status; /* OUTPUT */
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del);
+
 /**
  * virtchnl_vc_validate_vf_msg
  * @ver: Virtchnl version info
@@ -828,6 +1100,12 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
 	case VIRTCHNL_OP_DEL_CLOUD_FILTER:
 		valid_len = sizeof(struct virtchnl_filter);
 		break;
+	case VIRTCHNL_OP_ADD_FDIR_FILTER:
+		valid_len = sizeof(struct virtchnl_fdir_add);
+		break;
+	case VIRTCHNL_OP_DEL_FDIR_FILTER:
+		valid_len = sizeof(struct virtchnl_fdir_del);
+		break;
 	/* These are always errors coming from the VF. */
 	case VIRTCHNL_OP_EVENT:
 	case VIRTCHNL_OP_UNKNOWN:
-- 
cgit v1.2.3


From 0ce332fd62f625dd0e269d7d9aef65b019c95a77 Mon Sep 17 00:00:00 2001
From: Qi Zhang <qi.z.zhang@intel.com>
Date: Tue, 9 Mar 2021 11:08:05 +0800
Subject: ice: Add FDIR pattern action parser for VF

Add basic FDIR flow list and pattern / action parse functions for VF.

Signed-off-by: Yahui Cao <yahui.cao@intel.com>
Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
Tested-by: Chen Bo <BoX.C.Chen@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c | 347 ++++++++++++++++++++-
 1 file changed, 345 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
index 6e7e8531d6ec..6c7a9d89be67 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
@@ -26,6 +26,77 @@ struct virtchnl_fdir_fltr_conf {
 	struct ice_fdir_fltr input;
 };
 
+static enum virtchnl_proto_hdr_type vc_pattern_ipv4[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV4,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv4_tcp[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV4,
+	VIRTCHNL_PROTO_HDR_TCP,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv4_udp[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV4,
+	VIRTCHNL_PROTO_HDR_UDP,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv4_sctp[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV4,
+	VIRTCHNL_PROTO_HDR_SCTP,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv6[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV6,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv6_tcp[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV6,
+	VIRTCHNL_PROTO_HDR_TCP,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv6_udp[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV6,
+	VIRTCHNL_PROTO_HDR_UDP,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv6_sctp[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV6,
+	VIRTCHNL_PROTO_HDR_SCTP,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+struct virtchnl_fdir_pattern_match_item {
+	enum virtchnl_proto_hdr_type *list;
+	u64 input_set;
+	u64 *meta;
+};
+
+static const struct virtchnl_fdir_pattern_match_item vc_fdir_pattern[] = {
+	{vc_pattern_ipv4,                     0,         NULL},
+	{vc_pattern_ipv4_tcp,                 0,         NULL},
+	{vc_pattern_ipv4_udp,                 0,         NULL},
+	{vc_pattern_ipv4_sctp,                0,         NULL},
+	{vc_pattern_ipv6,                     0,         NULL},
+	{vc_pattern_ipv6_tcp,                 0,         NULL},
+	{vc_pattern_ipv6_udp,                 0,         NULL},
+	{vc_pattern_ipv6_sctp,                0,         NULL},
+};
+
 struct virtchnl_fdir_inset_map {
 	enum virtchnl_proto_hdr_field field;
 	enum ice_flow_field fld;
@@ -598,6 +669,269 @@ err_exit:
 	return ret;
 }
 
+/**
+ * ice_vc_fdir_match_pattern
+ * @fltr: virtual channel add cmd buffer
+ * @type: virtual channel protocol filter header type
+ *
+ * Matching the header type by comparing fltr and type's value.
+ *
+ * Return: true on success, and false on error.
+ */
+static bool
+ice_vc_fdir_match_pattern(struct virtchnl_fdir_add *fltr,
+			  enum virtchnl_proto_hdr_type *type)
+{
+	struct virtchnl_proto_hdrs *proto = &fltr->rule_cfg.proto_hdrs;
+	int i = 0;
+
+	while ((i < proto->count) &&
+	       (*type == proto->proto_hdr[i].type) &&
+	       (*type != VIRTCHNL_PROTO_HDR_NONE)) {
+		type++;
+		i++;
+	}
+
+	return ((i == proto->count) && (*type == VIRTCHNL_PROTO_HDR_NONE));
+}
+
+/**
+ * ice_vc_fdir_get_pattern - get while list pattern
+ * @vf: pointer to the VF info
+ * @len: filter list length
+ *
+ * Return: pointer to allowed filter list
+ */
+static const struct virtchnl_fdir_pattern_match_item *
+ice_vc_fdir_get_pattern(struct ice_vf *vf, int *len)
+{
+	const struct virtchnl_fdir_pattern_match_item *item;
+
+	item = vc_fdir_pattern;
+	*len = ARRAY_SIZE(vc_fdir_pattern);
+
+	return item;
+}
+
+/**
+ * ice_vc_fdir_search_pattern
+ * @vf: pointer to the VF info
+ * @fltr: virtual channel add cmd buffer
+ *
+ * Search for matched pattern from supported pattern list
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_search_pattern(struct ice_vf *vf, struct virtchnl_fdir_add *fltr)
+{
+	const struct virtchnl_fdir_pattern_match_item *pattern;
+	int len, i;
+
+	pattern = ice_vc_fdir_get_pattern(vf, &len);
+
+	for (i = 0; i < len; i++)
+		if (ice_vc_fdir_match_pattern(fltr, pattern[i].list))
+			return 0;
+
+	return -EINVAL;
+}
+
+/**
+ * ice_vc_fdir_parse_pattern
+ * @vf: pointer to the VF info
+ * @fltr: virtual channel add cmd buffer
+ * @conf: FDIR configuration for each filter
+ *
+ * Parse the virtual channel filter's pattern and store them into conf
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_parse_pattern(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
+			  struct virtchnl_fdir_fltr_conf *conf)
+{
+	struct virtchnl_proto_hdrs *proto = &fltr->rule_cfg.proto_hdrs;
+	enum virtchnl_proto_hdr_type l3 = VIRTCHNL_PROTO_HDR_NONE;
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	struct ice_fdir_fltr *input = &conf->input;
+	int i;
+
+	if (proto->count > VIRTCHNL_MAX_NUM_PROTO_HDRS) {
+		dev_dbg(dev, "Invalid protocol count:0x%x for VF %d\n",
+			proto->count, vf->vf_id);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < proto->count; i++) {
+		struct virtchnl_proto_hdr *hdr = &proto->proto_hdr[i];
+		struct sctphdr *sctph;
+		struct ipv6hdr *ip6h;
+		struct udphdr *udph;
+		struct tcphdr *tcph;
+		struct iphdr *iph;
+
+		switch (hdr->type) {
+		case VIRTCHNL_PROTO_HDR_ETH:
+			break;
+		case VIRTCHNL_PROTO_HDR_IPV4:
+			iph = (struct iphdr *)hdr->buffer;
+			l3 = VIRTCHNL_PROTO_HDR_IPV4;
+			input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_OTHER;
+
+			if (hdr->field_selector) {
+				input->ip.v4.src_ip = iph->saddr;
+				input->ip.v4.dst_ip = iph->daddr;
+				input->ip.v4.tos = iph->tos;
+				input->ip.v4.proto = iph->protocol;
+			}
+			break;
+		case VIRTCHNL_PROTO_HDR_IPV6:
+			ip6h = (struct ipv6hdr *)hdr->buffer;
+			l3 = VIRTCHNL_PROTO_HDR_IPV6;
+			input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_OTHER;
+
+			if (hdr->field_selector) {
+				memcpy(input->ip.v6.src_ip,
+				       ip6h->saddr.in6_u.u6_addr8,
+				       sizeof(ip6h->saddr));
+				memcpy(input->ip.v6.dst_ip,
+				       ip6h->daddr.in6_u.u6_addr8,
+				       sizeof(ip6h->daddr));
+				input->ip.v6.tc = ((u8)(ip6h->priority) << 4) |
+						  (ip6h->flow_lbl[0] >> 4);
+				input->ip.v6.proto = ip6h->nexthdr;
+			}
+			break;
+		case VIRTCHNL_PROTO_HDR_TCP:
+			tcph = (struct tcphdr *)hdr->buffer;
+			if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_TCP;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_TCP;
+
+			if (hdr->field_selector) {
+				if (l3 == VIRTCHNL_PROTO_HDR_IPV4) {
+					input->ip.v4.src_port = tcph->source;
+					input->ip.v4.dst_port = tcph->dest;
+				} else if (l3 == VIRTCHNL_PROTO_HDR_IPV6) {
+					input->ip.v6.src_port = tcph->source;
+					input->ip.v6.dst_port = tcph->dest;
+				}
+			}
+			break;
+		case VIRTCHNL_PROTO_HDR_UDP:
+			udph = (struct udphdr *)hdr->buffer;
+			if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_UDP;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_UDP;
+
+			if (hdr->field_selector) {
+				if (l3 == VIRTCHNL_PROTO_HDR_IPV4) {
+					input->ip.v4.src_port = udph->source;
+					input->ip.v4.dst_port = udph->dest;
+				} else if (l3 == VIRTCHNL_PROTO_HDR_IPV6) {
+					input->ip.v6.src_port = udph->source;
+					input->ip.v6.dst_port = udph->dest;
+				}
+			}
+			break;
+		case VIRTCHNL_PROTO_HDR_SCTP:
+			sctph = (struct sctphdr *)hdr->buffer;
+			if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+				input->flow_type =
+					ICE_FLTR_PTYPE_NONF_IPV4_SCTP;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+				input->flow_type =
+					ICE_FLTR_PTYPE_NONF_IPV6_SCTP;
+
+			if (hdr->field_selector) {
+				if (l3 == VIRTCHNL_PROTO_HDR_IPV4) {
+					input->ip.v4.src_port = sctph->source;
+					input->ip.v4.dst_port = sctph->dest;
+				} else if (l3 == VIRTCHNL_PROTO_HDR_IPV6) {
+					input->ip.v6.src_port = sctph->source;
+					input->ip.v6.dst_port = sctph->dest;
+				}
+			}
+			break;
+		default:
+			dev_dbg(dev, "Invalid header type 0x:%x for VF %d\n",
+				hdr->type, vf->vf_id);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vc_fdir_parse_action
+ * @vf: pointer to the VF info
+ * @fltr: virtual channel add cmd buffer
+ * @conf: FDIR configuration for each filter
+ *
+ * Parse the virtual channel filter's action and store them into conf
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_parse_action(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
+			 struct virtchnl_fdir_fltr_conf *conf)
+{
+	struct virtchnl_filter_action_set *as = &fltr->rule_cfg.action_set;
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	struct ice_fdir_fltr *input = &conf->input;
+	u32 dest_num = 0;
+	u32 mark_num = 0;
+	int i;
+
+	if (as->count > VIRTCHNL_MAX_NUM_ACTIONS) {
+		dev_dbg(dev, "Invalid action numbers:0x%x for VF %d\n",
+			as->count, vf->vf_id);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < as->count; i++) {
+		struct virtchnl_filter_action *action = &as->actions[i];
+
+		switch (action->type) {
+		case VIRTCHNL_ACTION_DROP:
+			dest_num++;
+			input->dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DROP_PKT;
+			break;
+		case VIRTCHNL_ACTION_QUEUE:
+			dest_num++;
+			input->dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QINDEX;
+			input->q_index = action->act_conf.queue.index;
+			break;
+		case VIRTCHNL_ACTION_MARK:
+			mark_num++;
+			input->fltr_id = action->act_conf.mark_id;
+			input->fdid_prio = ICE_FXD_FLTR_QW1_FDID_PRI_THREE;
+			break;
+		default:
+			dev_dbg(dev, "Invalid action type:0x%x for VF %d\n",
+				action->type, vf->vf_id);
+			return -EINVAL;
+		}
+	}
+
+	if (dest_num == 0 || dest_num >= 2) {
+		dev_dbg(dev, "Invalid destination action for VF %d\n",
+			vf->vf_id);
+		return -EINVAL;
+	}
+
+	if (mark_num >= 2) {
+		dev_dbg(dev, "Too many mark actions for VF %d\n", vf->vf_id);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 /**
  * ice_vc_validate_fdir_fltr - validate the virtual channel filter
  * @vf: pointer to the VF info
@@ -610,8 +944,17 @@ static int
 ice_vc_validate_fdir_fltr(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
 			  struct virtchnl_fdir_fltr_conf *conf)
 {
-	/* Todo: rule validation */
-	return -EINVAL;
+	int ret;
+
+	ret = ice_vc_fdir_search_pattern(vf, fltr);
+	if (ret)
+		return ret;
+
+	ret = ice_vc_fdir_parse_pattern(vf, fltr, conf);
+	if (ret)
+		return ret;
+
+	return ice_vc_fdir_parse_action(vf, fltr, conf);
 }
 
 /**
-- 
cgit v1.2.3


From 346bf25043976fe106cd4f739fc67765ac292a3a Mon Sep 17 00:00:00 2001
From: Qi Zhang <qi.z.zhang@intel.com>
Date: Tue, 9 Mar 2021 11:08:06 +0800
Subject: ice: Add new actions support for VF FDIR

Add two new actions support for VF FDIR:

A passthrough action does not specify the destination queue, but
just allow the packet go to next pipeline stage, a typical use
cases is combined with a software mark (FDID) action.

Allow specify a 2^n continuous queues as the destination of a FDIR rule.
Packet distribution is based on current RSS configure.

Signed-off-by: Yahui Cao <yahui.cao@intel.com>
Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
Tested-by: Chen Bo <BoX.C.Chen@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_fdir.c          | 12 +++++++++++-
 drivers/net/ethernet/intel/ice/ice_fdir.h          |  4 ++++
 drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c | 10 ++++++++++
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.c b/drivers/net/ethernet/intel/ice/ice_fdir.c
index 8f3e61c6bfd6..5f8d7a9ca068 100644
--- a/drivers/net/ethernet/intel/ice/ice_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.c
@@ -374,7 +374,14 @@ ice_fdir_get_prgm_desc(struct ice_hw *hw, struct ice_fdir_fltr *input,
 	if (input->dest_ctl == ICE_FLTR_PRGM_DESC_DEST_DROP_PKT) {
 		fdir_fltr_ctx.drop = ICE_FXD_FLTR_QW0_DROP_YES;
 		fdir_fltr_ctx.qindex = 0;
+	} else if (input->dest_ctl ==
+		   ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_OTHER) {
+		fdir_fltr_ctx.drop = ICE_FXD_FLTR_QW0_DROP_NO;
+		fdir_fltr_ctx.qindex = 0;
 	} else {
+		if (input->dest_ctl ==
+		    ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QGROUP)
+			fdir_fltr_ctx.toq = input->q_region;
 		fdir_fltr_ctx.drop = ICE_FXD_FLTR_QW0_DROP_NO;
 		fdir_fltr_ctx.qindex = input->q_index;
 	}
@@ -382,7 +389,10 @@ ice_fdir_get_prgm_desc(struct ice_hw *hw, struct ice_fdir_fltr *input,
 	fdir_fltr_ctx.cnt_index = input->cnt_index;
 	fdir_fltr_ctx.fd_vsi = ice_get_hw_vsi_num(hw, input->dest_vsi);
 	fdir_fltr_ctx.evict_ena = ICE_FXD_FLTR_QW0_EVICT_ENA_FALSE;
-	fdir_fltr_ctx.toq_prio = 3;
+	if (input->dest_ctl == ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_OTHER)
+		fdir_fltr_ctx.toq_prio = 0;
+	else
+		fdir_fltr_ctx.toq_prio = 3;
 	fdir_fltr_ctx.pcmd = add ? ICE_FXD_FLTR_QW1_PCMD_ADD :
 		ICE_FXD_FLTR_QW1_PCMD_REMOVE;
 	fdir_fltr_ctx.swap = ICE_FXD_FLTR_QW1_SWAP_NOT_SET;
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.h b/drivers/net/ethernet/intel/ice/ice_fdir.h
index 93f3f0d9d37b..adf237925b3c 100644
--- a/drivers/net/ethernet/intel/ice/ice_fdir.h
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.h
@@ -42,6 +42,8 @@
 enum ice_fltr_prgm_desc_dest {
 	ICE_FLTR_PRGM_DESC_DEST_DROP_PKT,
 	ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QINDEX,
+	ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QGROUP,
+	ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_OTHER,
 };
 
 enum ice_fltr_prgm_desc_fd_status {
@@ -133,6 +135,8 @@ struct ice_fdir_fltr {
 
 	/* flex byte filter data */
 	__be16 flex_word;
+	/* queue region size (=2^q_region) */
+	u8 q_region;
 	u16 flex_offset;
 	u16 flex_fltr;
 
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
index 6c7a9d89be67..a1b6d3771e2c 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
@@ -897,6 +897,10 @@ ice_vc_fdir_parse_action(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
 		struct virtchnl_filter_action *action = &as->actions[i];
 
 		switch (action->type) {
+		case VIRTCHNL_ACTION_PASSTHRU:
+			dest_num++;
+			input->dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_OTHER;
+			break;
 		case VIRTCHNL_ACTION_DROP:
 			dest_num++;
 			input->dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DROP_PKT;
@@ -906,6 +910,12 @@ ice_vc_fdir_parse_action(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
 			input->dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QINDEX;
 			input->q_index = action->act_conf.queue.index;
 			break;
+		case VIRTCHNL_ACTION_Q_REGION:
+			dest_num++;
+			input->dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QGROUP;
+			input->q_index = action->act_conf.queue.index;
+			input->q_region = action->act_conf.queue.region;
+			break;
 		case VIRTCHNL_ACTION_MARK:
 			mark_num++;
 			input->fltr_id = action->act_conf.mark_id;
-- 
cgit v1.2.3


From 21606584f1bb4c76aeb5a113e0e8a72681a270e4 Mon Sep 17 00:00:00 2001
From: Qi Zhang <qi.z.zhang@intel.com>
Date: Tue, 9 Mar 2021 11:08:07 +0800
Subject: ice: Add non-IP Layer2 protocol FDIR filter for AVF

Add new filter type that allow forward non-IP Ethernet packets base on its
ethertype. The filter is only enabled when COMMS DDP package is loaded.

Signed-off-by: Yahui Cao <yahui.cao@intel.com>
Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
Tested-by: Chen Bo <BoX.C.Chen@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_fdir.c          | 15 ++++++++
 drivers/net/ethernet/intel/ice/ice_fdir.h          |  2 ++
 drivers/net/ethernet/intel/ice/ice_flow.c          | 17 ++++++++-
 drivers/net/ethernet/intel/ice/ice_flow.h          |  1 +
 drivers/net/ethernet/intel/ice/ice_type.h          |  1 +
 drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c | 41 ++++++++++++++++++++--
 6 files changed, 73 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.c b/drivers/net/ethernet/intel/ice/ice_fdir.c
index 5f8d7a9ca068..b9c65d0c9271 100644
--- a/drivers/net/ethernet/intel/ice/ice_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.c
@@ -40,6 +40,12 @@ static const u8 ice_fdir_ipv4_pkt[] = {
 	0x00, 0x00
 };
 
+static const u8 ice_fdir_non_ip_l2_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
 static const u8 ice_fdir_tcpv6_pkt[] = {
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
@@ -238,6 +244,11 @@ static const struct ice_fdir_base_pkt ice_fdir_pkt[] = {
 		sizeof(ice_fdir_ipv4_pkt), ice_fdir_ipv4_pkt,
 		sizeof(ice_fdir_ip4_tun_pkt), ice_fdir_ip4_tun_pkt,
 	},
+	{
+		ICE_FLTR_PTYPE_NON_IP_L2,
+		sizeof(ice_fdir_non_ip_l2_pkt), ice_fdir_non_ip_l2_pkt,
+		sizeof(ice_fdir_non_ip_l2_pkt), ice_fdir_non_ip_l2_pkt,
+	},
 	{
 		ICE_FLTR_PTYPE_NONF_IPV6_TCP,
 		sizeof(ice_fdir_tcpv6_pkt), ice_fdir_tcpv6_pkt,
@@ -674,6 +685,10 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 				  input->ip.v4.proto);
 		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
 		break;
+	case ICE_FLTR_PTYPE_NON_IP_L2:
+		ice_pkt_insert_u16(loc, ICE_MAC_ETHTYPE_OFFSET,
+				   input->ext_data.ether_type);
+		break;
 	case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
 		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
 					 input->ip.v6.src_ip);
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.h b/drivers/net/ethernet/intel/ice/ice_fdir.h
index adf237925b3c..7db6bc3a4a56 100644
--- a/drivers/net/ethernet/intel/ice/ice_fdir.h
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.h
@@ -25,6 +25,7 @@
 #define ICE_IPV6_UDP_DST_PORT_OFFSET	56
 #define ICE_IPV6_SCTP_SRC_PORT_OFFSET	54
 #define ICE_IPV6_SCTP_DST_PORT_OFFSET	56
+#define ICE_MAC_ETHTYPE_OFFSET		12
 #define ICE_IPV4_TOS_OFFSET		15
 #define ICE_IPV4_TTL_OFFSET		22
 #define ICE_IPV6_TC_OFFSET		14
@@ -116,6 +117,7 @@ struct ice_fdir_v6 {
 struct ice_fdir_extra {
 	u8 dst_mac[ETH_ALEN];	/* dest MAC address */
 	u8 src_mac[ETH_ALEN];	/* src MAC address */
+	__be16 ether_type;	/* for NON_IP_L2 */
 	u32 usr_def[2];		/* user data */
 	__be16 vlan_type;	/* VLAN ethertype */
 	__be16 vlan_tag;	/* VLAN tag info */
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c
index ded70e443e7b..8e8bfc6fa2b4 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.c
+++ b/drivers/net/ethernet/intel/ice/ice_flow.c
@@ -572,6 +572,17 @@ static const u32 ice_ptypes_nat_t_esp[] = {
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 };
 
+static const u32 ice_ptypes_mac_non_ip_ofos[] = {
+	0x00000846, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00400000, 0x03FFF000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
 /* Manage parameters and info. used during the creation of a flow profile */
 struct ice_flow_prof_params {
 	enum ice_block blk;
@@ -760,7 +771,11 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params)
 				   ICE_FLOW_PTYPE_MAX);
 		}
 
-		if (hdrs & ICE_FLOW_SEG_HDR_PPPOE) {
+		if (hdrs & ICE_FLOW_SEG_HDR_ETH_NON_IP) {
+			src = (const unsigned long *)ice_ptypes_mac_non_ip_ofos;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_PPPOE) {
 			src = (const unsigned long *)ice_ptypes_pppoe;
 			bitmap_and(params->ptypes, params->ptypes, src,
 				   ICE_FLOW_PTYPE_MAX);
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.h b/drivers/net/ethernet/intel/ice/ice_flow.h
index 80bcd6fd21fc..eec9def8ffca 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.h
+++ b/drivers/net/ethernet/intel/ice/ice_flow.h
@@ -135,6 +135,7 @@ enum ice_flow_seg_hdr {
 	ICE_FLOW_SEG_HDR_ESP		= 0x00100000,
 	ICE_FLOW_SEG_HDR_AH		= 0x00200000,
 	ICE_FLOW_SEG_HDR_NAT_T_ESP	= 0x00400000,
+	ICE_FLOW_SEG_HDR_ETH_NON_IP	= 0x00800000,
 	/* The following is an additive bit for ICE_FLOW_SEG_HDR_IPV4 and
 	 * ICE_FLOW_SEG_HDR_IPV6 which include the IPV4 other PTYPEs
 	 */
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index a6cb0c35748c..de4ddf272b31 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -192,6 +192,7 @@ enum ice_fltr_ptype {
 	ICE_FLTR_PTYPE_NONF_IPV4_TCP,
 	ICE_FLTR_PTYPE_NONF_IPV4_SCTP,
 	ICE_FLTR_PTYPE_NONF_IPV4_OTHER,
+	ICE_FLTR_PTYPE_NON_IP_L2,
 	ICE_FLTR_PTYPE_FRAG_IPV4,
 	ICE_FLTR_PTYPE_NONF_IPV6_UDP,
 	ICE_FLTR_PTYPE_NONF_IPV6_TCP,
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
index a1b6d3771e2c..f211c28b65fb 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
@@ -26,6 +26,11 @@ struct virtchnl_fdir_fltr_conf {
 	struct ice_fdir_fltr input;
 };
 
+static enum virtchnl_proto_hdr_type vc_pattern_ether[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
 static enum virtchnl_proto_hdr_type vc_pattern_ipv4[] = {
 	VIRTCHNL_PROTO_HDR_ETH,
 	VIRTCHNL_PROTO_HDR_IPV4,
@@ -86,7 +91,18 @@ struct virtchnl_fdir_pattern_match_item {
 	u64 *meta;
 };
 
-static const struct virtchnl_fdir_pattern_match_item vc_fdir_pattern[] = {
+static const struct virtchnl_fdir_pattern_match_item vc_fdir_pattern_os[] = {
+	{vc_pattern_ipv4,                     0,         NULL},
+	{vc_pattern_ipv4_tcp,                 0,         NULL},
+	{vc_pattern_ipv4_udp,                 0,         NULL},
+	{vc_pattern_ipv4_sctp,                0,         NULL},
+	{vc_pattern_ipv6,                     0,         NULL},
+	{vc_pattern_ipv6_tcp,                 0,         NULL},
+	{vc_pattern_ipv6_udp,                 0,         NULL},
+	{vc_pattern_ipv6_sctp,                0,         NULL},
+};
+
+static const struct virtchnl_fdir_pattern_match_item vc_fdir_pattern_comms[] = {
 	{vc_pattern_ipv4,                     0,         NULL},
 	{vc_pattern_ipv4_tcp,                 0,         NULL},
 	{vc_pattern_ipv4_udp,                 0,         NULL},
@@ -95,6 +111,7 @@ static const struct virtchnl_fdir_pattern_match_item vc_fdir_pattern[] = {
 	{vc_pattern_ipv6_tcp,                 0,         NULL},
 	{vc_pattern_ipv6_udp,                 0,         NULL},
 	{vc_pattern_ipv6_sctp,                0,         NULL},
+	{vc_pattern_ether,                    0,         NULL},
 };
 
 struct virtchnl_fdir_inset_map {
@@ -371,6 +388,9 @@ ice_vc_fdir_set_flow_hdr(struct ice_vf *vf,
 	struct device *dev = ice_pf_to_dev(vf->pf);
 
 	switch (flow) {
+	case ICE_FLTR_PTYPE_NON_IP_L2:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_ETH_NON_IP);
+		break;
 	case ICE_FLTR_PTYPE_NONF_IPV4_OTHER:
 		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV4 |
 				  ICE_FLOW_SEG_HDR_IPV_OTHER);
@@ -706,9 +726,18 @@ static const struct virtchnl_fdir_pattern_match_item *
 ice_vc_fdir_get_pattern(struct ice_vf *vf, int *len)
 {
 	const struct virtchnl_fdir_pattern_match_item *item;
+	struct ice_pf *pf = vf->pf;
+	struct ice_hw *hw;
 
-	item = vc_fdir_pattern;
-	*len = ARRAY_SIZE(vc_fdir_pattern);
+	hw = &pf->hw;
+	if (!strncmp(hw->active_pkg_name, "ICE COMMS Package",
+		     sizeof(hw->active_pkg_name))) {
+		item = vc_fdir_pattern_comms;
+		*len = ARRAY_SIZE(vc_fdir_pattern_comms);
+	} else {
+		item = vc_fdir_pattern_os;
+		*len = ARRAY_SIZE(vc_fdir_pattern_os);
+	}
 
 	return item;
 }
@@ -769,10 +798,16 @@ ice_vc_fdir_parse_pattern(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
 		struct ipv6hdr *ip6h;
 		struct udphdr *udph;
 		struct tcphdr *tcph;
+		struct ethhdr *eth;
 		struct iphdr *iph;
 
 		switch (hdr->type) {
 		case VIRTCHNL_PROTO_HDR_ETH:
+			eth = (struct ethhdr *)hdr->buffer;
+			input->flow_type = ICE_FLTR_PTYPE_NON_IP_L2;
+
+			if (hdr->field_selector)
+				input->ext_data.ether_type = eth->h_proto;
 			break;
 		case VIRTCHNL_PROTO_HDR_IPV4:
 			iph = (struct iphdr *)hdr->buffer;
-- 
cgit v1.2.3


From ef9e4cc589cafd5c775d4501815e1ebc3110cdb6 Mon Sep 17 00:00:00 2001
From: Qi Zhang <qi.z.zhang@intel.com>
Date: Tue, 9 Mar 2021 11:08:08 +0800
Subject: ice: Add GTPU FDIR filter for AVF

Add new FDIR filter type to forward GTPU packets by matching TEID or QFI.
The filter is only enabled when COMMS DDP package is downloaded.

Signed-off-by: Yahui Cao <yahui.cao@intel.com>
Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
Tested-by: Chen Bo <BoX.C.Chen@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_fdir.c          | 117 +++++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_fdir.h          |  23 ++++
 drivers/net/ethernet/intel/ice/ice_type.h          |   4 +
 drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c |  74 +++++++++++++
 4 files changed, 218 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.c b/drivers/net/ethernet/intel/ice/ice_fdir.c
index b9c65d0c9271..e0b1aa1c5f2c 100644
--- a/drivers/net/ethernet/intel/ice/ice_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.c
@@ -40,6 +40,66 @@ static const u8 ice_fdir_ipv4_pkt[] = {
 	0x00, 0x00
 };
 
+static const u8 ice_fdir_udp4_gtpu4_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x4c, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x08, 0x68, 0x08, 0x68, 0x00, 0x00,
+	0x00, 0x00, 0x34, 0xff, 0x00, 0x28, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x02, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x45, 0x00,
+	0x00, 0x1c, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_tcp4_gtpu4_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x58, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x08, 0x68, 0x08, 0x68, 0x00, 0x00,
+	0x00, 0x00, 0x34, 0xff, 0x00, 0x28, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x02, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x45, 0x00,
+	0x00, 0x28, 0x00, 0x00, 0x40, 0x00, 0x40, 0x06,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_icmp4_gtpu4_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x4c, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x08, 0x68, 0x08, 0x68, 0x00, 0x00,
+	0x00, 0x00, 0x34, 0xff, 0x00, 0x28, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x02, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x45, 0x00,
+	0x00, 0x1c, 0x00, 0x00, 0x40, 0x00, 0x40, 0x01,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv4_gtpu4_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x44, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x08, 0x68, 0x08, 0x68, 0x00, 0x00,
+	0x00, 0x00, 0x34, 0xff, 0x00, 0x28, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x02, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x45, 0x00,
+	0x00, 0x14, 0x00, 0x00, 0x40, 0x00, 0x40, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
 static const u8 ice_fdir_non_ip_l2_pkt[] = {
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -244,6 +304,34 @@ static const struct ice_fdir_base_pkt ice_fdir_pkt[] = {
 		sizeof(ice_fdir_ipv4_pkt), ice_fdir_ipv4_pkt,
 		sizeof(ice_fdir_ip4_tun_pkt), ice_fdir_ip4_tun_pkt,
 	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_UDP,
+		sizeof(ice_fdir_udp4_gtpu4_pkt),
+		ice_fdir_udp4_gtpu4_pkt,
+		sizeof(ice_fdir_udp4_gtpu4_pkt),
+		ice_fdir_udp4_gtpu4_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_TCP,
+		sizeof(ice_fdir_tcp4_gtpu4_pkt),
+		ice_fdir_tcp4_gtpu4_pkt,
+		sizeof(ice_fdir_tcp4_gtpu4_pkt),
+		ice_fdir_tcp4_gtpu4_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_ICMP,
+		sizeof(ice_fdir_icmp4_gtpu4_pkt),
+		ice_fdir_icmp4_gtpu4_pkt,
+		sizeof(ice_fdir_icmp4_gtpu4_pkt),
+		ice_fdir_icmp4_gtpu4_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER,
+		sizeof(ice_fdir_ipv4_gtpu4_pkt),
+		ice_fdir_ipv4_gtpu4_pkt,
+		sizeof(ice_fdir_ipv4_gtpu4_pkt),
+		ice_fdir_ipv4_gtpu4_pkt,
+	},
 	{
 		ICE_FLTR_PTYPE_NON_IP_L2,
 		sizeof(ice_fdir_non_ip_l2_pkt), ice_fdir_non_ip_l2_pkt,
@@ -491,6 +579,22 @@ static void ice_pkt_insert_ipv6_addr(u8 *pkt, int offset, __be32 *addr)
 		       sizeof(*addr));
 }
 
+/**
+ * ice_pkt_insert_u6_qfi - insert a u6 value QFI into a memory buffer for GTPU
+ * @pkt: packet buffer
+ * @offset: offset into buffer
+ * @data: 8 bit value to convert and insert into pkt at offset
+ *
+ * This function is designed for inserting QFI (6 bits) for GTPU.
+ */
+static void ice_pkt_insert_u6_qfi(u8 *pkt, int offset, u8 data)
+{
+	u8 ret;
+
+	ret = (data & 0x3F) + (*(pkt + offset) & 0xC0);
+	memcpy(pkt + offset, &ret, sizeof(ret));
+}
+
 /**
  * ice_pkt_insert_u8 - insert a u8 value into a memory buffer.
  * @pkt: packet buffer
@@ -685,6 +789,19 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 				  input->ip.v4.proto);
 		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
 		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_UDP:
+	case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_TCP:
+	case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_ICMP:
+	case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER:
+		ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
+				   input->ip.v4.src_ip);
+		ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
+				   input->ip.v4.dst_ip);
+		ice_pkt_insert_u32(loc, ICE_IPV4_GTPU_TEID_OFFSET,
+				   input->gtpu_data.teid);
+		ice_pkt_insert_u6_qfi(loc, ICE_IPV4_GTPU_QFI_OFFSET,
+				      input->gtpu_data.qfi);
+		break;
 	case ICE_FLTR_PTYPE_NON_IP_L2:
 		ice_pkt_insert_u16(loc, ICE_MAC_ETHTYPE_OFFSET,
 				   input->ext_data.ether_type);
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.h b/drivers/net/ethernet/intel/ice/ice_fdir.h
index 7db6bc3a4a56..2945421d5b6c 100644
--- a/drivers/net/ethernet/intel/ice/ice_fdir.h
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.h
@@ -31,6 +31,8 @@
 #define ICE_IPV6_TC_OFFSET		14
 #define ICE_IPV6_HLIM_OFFSET		21
 #define ICE_IPV6_PROTO_OFFSET		20
+#define ICE_IPV4_GTPU_TEID_OFFSET	46
+#define ICE_IPV4_GTPU_QFI_OFFSET	56
 
 #define ICE_FDIR_MAX_FLTRS		16384
 
@@ -114,6 +116,24 @@ struct ice_fdir_v6 {
 	u8 hlim;
 };
 
+struct ice_fdir_udp_gtp {
+	u8 flags;
+	u8 msg_type;
+	__be16 rsrvd_len;
+	__be32 teid;
+	__be16 rsrvd_seq_nbr;
+	u8 rsrvd_n_pdu_nbr;
+	u8 rsrvd_next_ext_type;
+	u8 rsvrd_ext_len;
+	u8	pdu_type:4,
+		spare:4;
+	u8	ppp:1,
+		rqi:1,
+		qfi:6;
+	u32 rsvrd;
+	u8 next_ext;
+};
+
 struct ice_fdir_extra {
 	u8 dst_mac[ETH_ALEN];	/* dest MAC address */
 	u8 src_mac[ETH_ALEN];	/* src MAC address */
@@ -132,6 +152,9 @@ struct ice_fdir_fltr {
 		struct ice_fdir_v6 v6;
 	} ip, mask;
 
+	struct ice_fdir_udp_gtp gtpu_data;
+	struct ice_fdir_udp_gtp gtpu_mask;
+
 	struct ice_fdir_extra ext_data;
 	struct ice_fdir_extra ext_mask;
 
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index de4ddf272b31..c55076d20bea 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -192,6 +192,10 @@ enum ice_fltr_ptype {
 	ICE_FLTR_PTYPE_NONF_IPV4_TCP,
 	ICE_FLTR_PTYPE_NONF_IPV4_SCTP,
 	ICE_FLTR_PTYPE_NONF_IPV4_OTHER,
+	ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_UDP,
+	ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_TCP,
+	ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_ICMP,
+	ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER,
 	ICE_FLTR_PTYPE_NON_IP_L2,
 	ICE_FLTR_PTYPE_FRAG_IPV4,
 	ICE_FLTR_PTYPE_NONF_IPV6_UDP,
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
index f211c28b65fb..e0bcd5baab60 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
@@ -22,8 +22,19 @@
 	((u64)(((((flow) + (tun_offs)) & ICE_FLOW_PROF_TYPE_M)) | \
 	      (((u64)(vsi) << ICE_FLOW_PROF_VSI_S) & ICE_FLOW_PROF_VSI_M)))
 
+#define GTPU_TEID_OFFSET 4
+#define GTPU_EH_QFI_OFFSET 1
+#define GTPU_EH_QFI_MASK 0x3F
+
+enum ice_fdir_tunnel_type {
+	ICE_FDIR_TUNNEL_TYPE_NONE = 0,
+	ICE_FDIR_TUNNEL_TYPE_GTPU,
+	ICE_FDIR_TUNNEL_TYPE_GTPU_EH,
+};
+
 struct virtchnl_fdir_fltr_conf {
 	struct ice_fdir_fltr input;
+	enum ice_fdir_tunnel_type ttype;
 };
 
 static enum virtchnl_proto_hdr_type vc_pattern_ether[] = {
@@ -85,6 +96,23 @@ static enum virtchnl_proto_hdr_type vc_pattern_ipv6_sctp[] = {
 	VIRTCHNL_PROTO_HDR_NONE,
 };
 
+static enum virtchnl_proto_hdr_type vc_pattern_ipv4_gtpu[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV4,
+	VIRTCHNL_PROTO_HDR_UDP,
+	VIRTCHNL_PROTO_HDR_GTPU_IP,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv4_gtpu_eh[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV4,
+	VIRTCHNL_PROTO_HDR_UDP,
+	VIRTCHNL_PROTO_HDR_GTPU_IP,
+	VIRTCHNL_PROTO_HDR_GTPU_EH,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
 struct virtchnl_fdir_pattern_match_item {
 	enum virtchnl_proto_hdr_type *list;
 	u64 input_set;
@@ -112,6 +140,8 @@ static const struct virtchnl_fdir_pattern_match_item vc_fdir_pattern_comms[] = {
 	{vc_pattern_ipv6_udp,                 0,         NULL},
 	{vc_pattern_ipv6_sctp,                0,         NULL},
 	{vc_pattern_ether,                    0,         NULL},
+	{vc_pattern_ipv4_gtpu,                0,         NULL},
+	{vc_pattern_ipv4_gtpu_eh,             0,         NULL},
 };
 
 struct virtchnl_fdir_inset_map {
@@ -136,6 +166,8 @@ static const struct virtchnl_fdir_inset_map fdir_inset_map[] = {
 	{VIRTCHNL_PROTO_HDR_TCP_DST_PORT, ICE_FLOW_FIELD_IDX_TCP_DST_PORT},
 	{VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT, ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT},
 	{VIRTCHNL_PROTO_HDR_SCTP_DST_PORT, ICE_FLOW_FIELD_IDX_SCTP_DST_PORT},
+	{VIRTCHNL_PROTO_HDR_GTPU_IP_TEID, ICE_FLOW_FIELD_IDX_GTPU_IP_TEID},
+	{VIRTCHNL_PROTO_HDR_GTPU_EH_QFI, ICE_FLOW_FIELD_IDX_GTPU_EH_QFI},
 };
 
 /**
@@ -385,6 +417,7 @@ ice_vc_fdir_set_flow_hdr(struct ice_vf *vf,
 			 struct ice_flow_seg_info *seg)
 {
 	enum ice_fltr_ptype flow = conf->input.flow_type;
+	enum ice_fdir_tunnel_type ttype = conf->ttype;
 	struct device *dev = ice_pf_to_dev(vf->pf);
 
 	switch (flow) {
@@ -405,6 +438,25 @@ ice_vc_fdir_set_flow_hdr(struct ice_vf *vf,
 				  ICE_FLOW_SEG_HDR_IPV4 |
 				  ICE_FLOW_SEG_HDR_IPV_OTHER);
 		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_UDP:
+	case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_TCP:
+	case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_ICMP:
+	case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER:
+		if (ttype == ICE_FDIR_TUNNEL_TYPE_GTPU) {
+			ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_GTPU_IP |
+					  ICE_FLOW_SEG_HDR_IPV4 |
+					  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		} else if (ttype == ICE_FDIR_TUNNEL_TYPE_GTPU_EH) {
+			ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_GTPU_EH |
+					  ICE_FLOW_SEG_HDR_GTPU_IP |
+					  ICE_FLOW_SEG_HDR_IPV4 |
+					  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		} else {
+			dev_dbg(dev, "Invalid tunnel type 0x%x for VF %d\n",
+				flow, vf->vf_id);
+			return -EINVAL;
+		}
+		break;
 	case ICE_FLTR_PTYPE_NONF_IPV4_SCTP:
 		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_SCTP |
 				  ICE_FLOW_SEG_HDR_IPV4 |
@@ -800,6 +852,7 @@ ice_vc_fdir_parse_pattern(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
 		struct tcphdr *tcph;
 		struct ethhdr *eth;
 		struct iphdr *iph;
+		u8 *rawh;
 
 		switch (hdr->type) {
 		case VIRTCHNL_PROTO_HDR_ETH:
@@ -891,6 +944,21 @@ ice_vc_fdir_parse_pattern(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
 				}
 			}
 			break;
+		case VIRTCHNL_PROTO_HDR_GTPU_IP:
+			rawh = (u8 *)hdr->buffer;
+			input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER;
+
+			if (hdr->field_selector)
+				input->gtpu_data.teid = *(__be32 *)(&rawh[GTPU_TEID_OFFSET]);
+			conf->ttype = ICE_FDIR_TUNNEL_TYPE_GTPU;
+			break;
+		case VIRTCHNL_PROTO_HDR_GTPU_EH:
+			rawh = (u8 *)hdr->buffer;
+
+			if (hdr->field_selector)
+				input->gtpu_data.qfi = rawh[GTPU_EH_QFI_OFFSET] & GTPU_EH_QFI_MASK;
+			conf->ttype = ICE_FDIR_TUNNEL_TYPE_GTPU_EH;
+			break;
 		default:
 			dev_dbg(dev, "Invalid header type 0x:%x for VF %d\n",
 				hdr->type, vf->vf_id);
@@ -1016,12 +1084,18 @@ ice_vc_fdir_comp_rules(struct virtchnl_fdir_fltr_conf *conf_a,
 	struct ice_fdir_fltr *a = &conf_a->input;
 	struct ice_fdir_fltr *b = &conf_b->input;
 
+	if (conf_a->ttype != conf_b->ttype)
+		return false;
 	if (a->flow_type != b->flow_type)
 		return false;
 	if (memcmp(&a->ip, &b->ip, sizeof(a->ip)))
 		return false;
 	if (memcmp(&a->mask, &b->mask, sizeof(a->mask)))
 		return false;
+	if (memcmp(&a->gtpu_data, &b->gtpu_data, sizeof(a->gtpu_data)))
+		return false;
+	if (memcmp(&a->gtpu_mask, &b->gtpu_mask, sizeof(a->gtpu_mask)))
+		return false;
 	if (memcmp(&a->ext_data, &b->ext_data, sizeof(a->ext_data)))
 		return false;
 	if (memcmp(&a->ext_mask, &b->ext_mask, sizeof(a->ext_mask)))
-- 
cgit v1.2.3


From 213528fed2f609a0d67f59337145057f63c5bb0b Mon Sep 17 00:00:00 2001
From: Qi Zhang <qi.z.zhang@intel.com>
Date: Tue, 9 Mar 2021 11:08:09 +0800
Subject: ice: Add more FDIR filter type for AVF

FDIR for AVF can forward
- L2TPV3 packets by matching session id.
- IPSEC ESP packets by matching security parameter index.
- IPSEC AH packets by matching security parameter index.
- NAT_T ESP packets by matching security parameter index.
- Any PFCP session packets(s field is 1).

Signed-off-by: Yahui Cao <yahui.cao@intel.com>
Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
Tested-by: Chen Bo <BoX.C.Chen@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_fdir.c          | 254 +++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_fdir.h          |  15 ++
 drivers/net/ethernet/intel/ice/ice_type.h          |  13 +
 drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c | 281 +++++++++++++++++++--
 4 files changed, 545 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.c b/drivers/net/ethernet/intel/ice/ice_fdir.c
index e0b1aa1c5f2c..e8155c7954a0 100644
--- a/drivers/net/ethernet/intel/ice/ice_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.c
@@ -100,6 +100,138 @@ static const u8 ice_fdir_ipv4_gtpu4_pkt[] = {
 	0x00, 0x00,
 };
 
+static const u8 ice_fdir_ipv4_l2tpv3_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x14, 0x00, 0x00, 0x40, 0x00, 0x40, 0x73,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv6_l2tpv3_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x73, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv4_esp_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x14, 0x00, 0x00, 0x40, 0x00, 0x40, 0x32,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00
+};
+
+static const u8 ice_fdir_ipv6_esp_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x32, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv4_ah_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x14, 0x00, 0x00, 0x40, 0x00, 0x40, 0x33,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00
+};
+
+static const u8 ice_fdir_ipv6_ah_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x33, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv4_nat_t_esp_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x1C, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x11, 0x94, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv6_nat_t_esp_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x08, 0x11, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x11, 0x94, 0x00, 0x00, 0x00, 0x08,
+};
+
+static const u8 ice_fdir_ipv4_pfcp_node_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x2C, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x22, 0x65, 0x22, 0x65, 0x00, 0x00,
+	0x00, 0x00, 0x20, 0x00, 0x00, 0x10, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv4_pfcp_session_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x2C, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x22, 0x65, 0x22, 0x65, 0x00, 0x00,
+	0x00, 0x00, 0x21, 0x00, 0x00, 0x10, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv6_pfcp_node_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x18, 0x11, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x22, 0x65,
+	0x22, 0x65, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00,
+	0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv6_pfcp_session_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x18, 0x11, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x22, 0x65,
+	0x22, 0x65, 0x00, 0x00, 0x00, 0x00, 0x21, 0x00,
+	0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
 static const u8 ice_fdir_non_ip_l2_pkt[] = {
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -332,6 +464,78 @@ static const struct ice_fdir_base_pkt ice_fdir_pkt[] = {
 		sizeof(ice_fdir_ipv4_gtpu4_pkt),
 		ice_fdir_ipv4_gtpu4_pkt,
 	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3,
+		sizeof(ice_fdir_ipv4_l2tpv3_pkt), ice_fdir_ipv4_l2tpv3_pkt,
+		sizeof(ice_fdir_ipv4_l2tpv3_pkt), ice_fdir_ipv4_l2tpv3_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3,
+		sizeof(ice_fdir_ipv6_l2tpv3_pkt), ice_fdir_ipv6_l2tpv3_pkt,
+		sizeof(ice_fdir_ipv6_l2tpv3_pkt), ice_fdir_ipv6_l2tpv3_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_ESP,
+		sizeof(ice_fdir_ipv4_esp_pkt), ice_fdir_ipv4_esp_pkt,
+		sizeof(ice_fdir_ipv4_esp_pkt), ice_fdir_ipv4_esp_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_ESP,
+		sizeof(ice_fdir_ipv6_esp_pkt), ice_fdir_ipv6_esp_pkt,
+		sizeof(ice_fdir_ipv6_esp_pkt), ice_fdir_ipv6_esp_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_AH,
+		sizeof(ice_fdir_ipv4_ah_pkt), ice_fdir_ipv4_ah_pkt,
+		sizeof(ice_fdir_ipv4_ah_pkt), ice_fdir_ipv4_ah_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_AH,
+		sizeof(ice_fdir_ipv6_ah_pkt), ice_fdir_ipv6_ah_pkt,
+		sizeof(ice_fdir_ipv6_ah_pkt), ice_fdir_ipv6_ah_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP,
+		sizeof(ice_fdir_ipv4_nat_t_esp_pkt),
+		ice_fdir_ipv4_nat_t_esp_pkt,
+		sizeof(ice_fdir_ipv4_nat_t_esp_pkt),
+		ice_fdir_ipv4_nat_t_esp_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP,
+		sizeof(ice_fdir_ipv6_nat_t_esp_pkt),
+		ice_fdir_ipv6_nat_t_esp_pkt,
+		sizeof(ice_fdir_ipv6_nat_t_esp_pkt),
+		ice_fdir_ipv6_nat_t_esp_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE,
+		sizeof(ice_fdir_ipv4_pfcp_node_pkt),
+		ice_fdir_ipv4_pfcp_node_pkt,
+		sizeof(ice_fdir_ipv4_pfcp_node_pkt),
+		ice_fdir_ipv4_pfcp_node_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION,
+		sizeof(ice_fdir_ipv4_pfcp_session_pkt),
+		ice_fdir_ipv4_pfcp_session_pkt,
+		sizeof(ice_fdir_ipv4_pfcp_session_pkt),
+		ice_fdir_ipv4_pfcp_session_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE,
+		sizeof(ice_fdir_ipv6_pfcp_node_pkt),
+		ice_fdir_ipv6_pfcp_node_pkt,
+		sizeof(ice_fdir_ipv6_pfcp_node_pkt),
+		ice_fdir_ipv6_pfcp_node_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION,
+		sizeof(ice_fdir_ipv6_pfcp_session_pkt),
+		ice_fdir_ipv6_pfcp_session_pkt,
+		sizeof(ice_fdir_ipv6_pfcp_session_pkt),
+		ice_fdir_ipv6_pfcp_session_pkt,
+	},
 	{
 		ICE_FLTR_PTYPE_NON_IP_L2,
 		sizeof(ice_fdir_non_ip_l2_pkt), ice_fdir_non_ip_l2_pkt,
@@ -802,6 +1006,56 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 		ice_pkt_insert_u6_qfi(loc, ICE_IPV4_GTPU_QFI_OFFSET,
 				      input->gtpu_data.qfi);
 		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3:
+		ice_pkt_insert_u32(loc, ICE_IPV4_L2TPV3_SESS_ID_OFFSET,
+				   input->l2tpv3_data.session_id);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3:
+		ice_pkt_insert_u32(loc, ICE_IPV6_L2TPV3_SESS_ID_OFFSET,
+				   input->l2tpv3_data.session_id);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_ESP:
+		ice_pkt_insert_u32(loc, ICE_IPV4_ESP_SPI_OFFSET,
+				   input->ip.v4.sec_parm_idx);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_ESP:
+		ice_pkt_insert_u32(loc, ICE_IPV6_ESP_SPI_OFFSET,
+				   input->ip.v6.sec_parm_idx);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_AH:
+		ice_pkt_insert_u32(loc, ICE_IPV4_AH_SPI_OFFSET,
+				   input->ip.v4.sec_parm_idx);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_AH:
+		ice_pkt_insert_u32(loc, ICE_IPV6_AH_SPI_OFFSET,
+				   input->ip.v6.sec_parm_idx);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP:
+		ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
+				   input->ip.v4.src_ip);
+		ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
+				   input->ip.v4.dst_ip);
+		ice_pkt_insert_u32(loc, ICE_IPV4_NAT_T_ESP_SPI_OFFSET,
+				   input->ip.v4.sec_parm_idx);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP:
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
+					 input->ip.v6.src_ip);
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_SRC_ADDR_OFFSET,
+					 input->ip.v6.dst_ip);
+		ice_pkt_insert_u32(loc, ICE_IPV6_NAT_T_ESP_SPI_OFFSET,
+				   input->ip.v6.sec_parm_idx);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE:
+	case ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION:
+		ice_pkt_insert_u16(loc, ICE_IPV4_UDP_SRC_PORT_OFFSET,
+				   input->ip.v4.dst_port);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE:
+	case ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION:
+		ice_pkt_insert_u16(loc, ICE_IPV6_UDP_SRC_PORT_OFFSET,
+				   input->ip.v6.dst_port);
+		break;
 	case ICE_FLTR_PTYPE_NON_IP_L2:
 		ice_pkt_insert_u16(loc, ICE_MAC_ETHTYPE_OFFSET,
 				   input->ext_data.ether_type);
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.h b/drivers/net/ethernet/intel/ice/ice_fdir.h
index 2945421d5b6c..d2d40e18ae8a 100644
--- a/drivers/net/ethernet/intel/ice/ice_fdir.h
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.h
@@ -33,6 +33,14 @@
 #define ICE_IPV6_PROTO_OFFSET		20
 #define ICE_IPV4_GTPU_TEID_OFFSET	46
 #define ICE_IPV4_GTPU_QFI_OFFSET	56
+#define ICE_IPV4_L2TPV3_SESS_ID_OFFSET	34
+#define ICE_IPV6_L2TPV3_SESS_ID_OFFSET	54
+#define ICE_IPV4_ESP_SPI_OFFSET		34
+#define ICE_IPV6_ESP_SPI_OFFSET		54
+#define ICE_IPV4_AH_SPI_OFFSET		38
+#define ICE_IPV6_AH_SPI_OFFSET		58
+#define ICE_IPV4_NAT_T_ESP_SPI_OFFSET	42
+#define ICE_IPV6_NAT_T_ESP_SPI_OFFSET	62
 
 #define ICE_FDIR_MAX_FLTRS		16384
 
@@ -134,6 +142,10 @@ struct ice_fdir_udp_gtp {
 	u8 next_ext;
 };
 
+struct ice_fdir_l2tpv3 {
+	__be32 session_id;
+};
+
 struct ice_fdir_extra {
 	u8 dst_mac[ETH_ALEN];	/* dest MAC address */
 	u8 src_mac[ETH_ALEN];	/* src MAC address */
@@ -155,6 +167,9 @@ struct ice_fdir_fltr {
 	struct ice_fdir_udp_gtp gtpu_data;
 	struct ice_fdir_udp_gtp gtpu_mask;
 
+	struct ice_fdir_l2tpv3 l2tpv3_data;
+	struct ice_fdir_l2tpv3 l2tpv3_mask;
+
 	struct ice_fdir_extra ext_data;
 	struct ice_fdir_extra ext_mask;
 
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index c55076d20bea..2893143d9e62 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -196,6 +196,19 @@ enum ice_fltr_ptype {
 	ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_TCP,
 	ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_ICMP,
 	ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER,
+	ICE_FLTR_PTYPE_NONF_IPV6_GTPU_IPV6_OTHER,
+	ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3,
+	ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3,
+	ICE_FLTR_PTYPE_NONF_IPV4_ESP,
+	ICE_FLTR_PTYPE_NONF_IPV6_ESP,
+	ICE_FLTR_PTYPE_NONF_IPV4_AH,
+	ICE_FLTR_PTYPE_NONF_IPV6_AH,
+	ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP,
+	ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP,
+	ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE,
+	ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION,
+	ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE,
+	ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION,
 	ICE_FLTR_PTYPE_NON_IP_L2,
 	ICE_FLTR_PTYPE_FRAG_IPV4,
 	ICE_FLTR_PTYPE_NONF_IPV6_UDP,
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
index e0bcd5baab60..f49947d3df37 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
@@ -25,6 +25,14 @@
 #define GTPU_TEID_OFFSET 4
 #define GTPU_EH_QFI_OFFSET 1
 #define GTPU_EH_QFI_MASK 0x3F
+#define PFCP_S_OFFSET 0
+#define PFCP_S_MASK 0x1
+#define PFCP_PORT_NR 8805
+
+#define FDIR_INSET_FLAG_ESP_S 0
+#define FDIR_INSET_FLAG_ESP_M BIT_ULL(FDIR_INSET_FLAG_ESP_S)
+#define FDIR_INSET_FLAG_ESP_UDP BIT_ULL(FDIR_INSET_FLAG_ESP_S)
+#define FDIR_INSET_FLAG_ESP_IPSEC (0ULL << FDIR_INSET_FLAG_ESP_S)
 
 enum ice_fdir_tunnel_type {
 	ICE_FDIR_TUNNEL_TYPE_NONE = 0,
@@ -35,6 +43,7 @@ enum ice_fdir_tunnel_type {
 struct virtchnl_fdir_fltr_conf {
 	struct ice_fdir_fltr input;
 	enum ice_fdir_tunnel_type ttype;
+	u64 inset_flag;
 };
 
 static enum virtchnl_proto_hdr_type vc_pattern_ether[] = {
@@ -113,6 +122,80 @@ static enum virtchnl_proto_hdr_type vc_pattern_ipv4_gtpu_eh[] = {
 	VIRTCHNL_PROTO_HDR_NONE,
 };
 
+static enum virtchnl_proto_hdr_type vc_pattern_ipv4_l2tpv3[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV4,
+	VIRTCHNL_PROTO_HDR_L2TPV3,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv6_l2tpv3[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV6,
+	VIRTCHNL_PROTO_HDR_L2TPV3,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv4_esp[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV4,
+	VIRTCHNL_PROTO_HDR_ESP,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv6_esp[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV6,
+	VIRTCHNL_PROTO_HDR_ESP,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv4_ah[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV4,
+	VIRTCHNL_PROTO_HDR_AH,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv6_ah[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV6,
+	VIRTCHNL_PROTO_HDR_AH,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv4_nat_t_esp[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV4,
+	VIRTCHNL_PROTO_HDR_UDP,
+	VIRTCHNL_PROTO_HDR_ESP,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv6_nat_t_esp[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV6,
+	VIRTCHNL_PROTO_HDR_UDP,
+	VIRTCHNL_PROTO_HDR_ESP,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv4_pfcp[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV4,
+	VIRTCHNL_PROTO_HDR_UDP,
+	VIRTCHNL_PROTO_HDR_PFCP,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
+static enum virtchnl_proto_hdr_type vc_pattern_ipv6_pfcp[] = {
+	VIRTCHNL_PROTO_HDR_ETH,
+	VIRTCHNL_PROTO_HDR_IPV6,
+	VIRTCHNL_PROTO_HDR_UDP,
+	VIRTCHNL_PROTO_HDR_PFCP,
+	VIRTCHNL_PROTO_HDR_NONE,
+};
+
 struct virtchnl_fdir_pattern_match_item {
 	enum virtchnl_proto_hdr_type *list;
 	u64 input_set;
@@ -142,32 +225,52 @@ static const struct virtchnl_fdir_pattern_match_item vc_fdir_pattern_comms[] = {
 	{vc_pattern_ether,                    0,         NULL},
 	{vc_pattern_ipv4_gtpu,                0,         NULL},
 	{vc_pattern_ipv4_gtpu_eh,             0,         NULL},
+	{vc_pattern_ipv4_l2tpv3,              0,         NULL},
+	{vc_pattern_ipv6_l2tpv3,              0,         NULL},
+	{vc_pattern_ipv4_esp,                 0,         NULL},
+	{vc_pattern_ipv6_esp,                 0,         NULL},
+	{vc_pattern_ipv4_ah,                  0,         NULL},
+	{vc_pattern_ipv6_ah,                  0,         NULL},
+	{vc_pattern_ipv4_nat_t_esp,           0,         NULL},
+	{vc_pattern_ipv6_nat_t_esp,           0,         NULL},
+	{vc_pattern_ipv4_pfcp,                0,         NULL},
+	{vc_pattern_ipv6_pfcp,                0,         NULL},
 };
 
 struct virtchnl_fdir_inset_map {
 	enum virtchnl_proto_hdr_field field;
 	enum ice_flow_field fld;
+	u64 flag;
+	u64 mask;
 };
 
 static const struct virtchnl_fdir_inset_map fdir_inset_map[] = {
-	{VIRTCHNL_PROTO_HDR_IPV4_SRC, ICE_FLOW_FIELD_IDX_IPV4_SA},
-	{VIRTCHNL_PROTO_HDR_IPV4_DST, ICE_FLOW_FIELD_IDX_IPV4_DA},
-	{VIRTCHNL_PROTO_HDR_IPV4_DSCP, ICE_FLOW_FIELD_IDX_IPV4_DSCP},
-	{VIRTCHNL_PROTO_HDR_IPV4_TTL, ICE_FLOW_FIELD_IDX_IPV4_TTL},
-	{VIRTCHNL_PROTO_HDR_IPV4_PROT, ICE_FLOW_FIELD_IDX_IPV4_PROT},
-	{VIRTCHNL_PROTO_HDR_IPV6_SRC, ICE_FLOW_FIELD_IDX_IPV6_SA},
-	{VIRTCHNL_PROTO_HDR_IPV6_DST, ICE_FLOW_FIELD_IDX_IPV6_DA},
-	{VIRTCHNL_PROTO_HDR_IPV6_TC, ICE_FLOW_FIELD_IDX_IPV6_DSCP},
-	{VIRTCHNL_PROTO_HDR_IPV6_HOP_LIMIT, ICE_FLOW_FIELD_IDX_IPV6_TTL},
-	{VIRTCHNL_PROTO_HDR_IPV6_PROT, ICE_FLOW_FIELD_IDX_IPV6_PROT},
-	{VIRTCHNL_PROTO_HDR_UDP_SRC_PORT, ICE_FLOW_FIELD_IDX_UDP_SRC_PORT},
-	{VIRTCHNL_PROTO_HDR_UDP_DST_PORT, ICE_FLOW_FIELD_IDX_UDP_DST_PORT},
-	{VIRTCHNL_PROTO_HDR_TCP_SRC_PORT, ICE_FLOW_FIELD_IDX_TCP_SRC_PORT},
-	{VIRTCHNL_PROTO_HDR_TCP_DST_PORT, ICE_FLOW_FIELD_IDX_TCP_DST_PORT},
-	{VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT, ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT},
-	{VIRTCHNL_PROTO_HDR_SCTP_DST_PORT, ICE_FLOW_FIELD_IDX_SCTP_DST_PORT},
-	{VIRTCHNL_PROTO_HDR_GTPU_IP_TEID, ICE_FLOW_FIELD_IDX_GTPU_IP_TEID},
-	{VIRTCHNL_PROTO_HDR_GTPU_EH_QFI, ICE_FLOW_FIELD_IDX_GTPU_EH_QFI},
+	{VIRTCHNL_PROTO_HDR_ETH_ETHERTYPE, ICE_FLOW_FIELD_IDX_ETH_TYPE, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV4_SRC, ICE_FLOW_FIELD_IDX_IPV4_SA, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV4_DST, ICE_FLOW_FIELD_IDX_IPV4_DA, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV4_DSCP, ICE_FLOW_FIELD_IDX_IPV4_DSCP, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV4_TTL, ICE_FLOW_FIELD_IDX_IPV4_TTL, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV4_PROT, ICE_FLOW_FIELD_IDX_IPV4_PROT, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV6_SRC, ICE_FLOW_FIELD_IDX_IPV6_SA, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV6_DST, ICE_FLOW_FIELD_IDX_IPV6_DA, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV6_TC, ICE_FLOW_FIELD_IDX_IPV6_DSCP, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV6_HOP_LIMIT, ICE_FLOW_FIELD_IDX_IPV6_TTL, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV6_PROT, ICE_FLOW_FIELD_IDX_IPV6_PROT, 0, 0},
+	{VIRTCHNL_PROTO_HDR_UDP_SRC_PORT, ICE_FLOW_FIELD_IDX_UDP_SRC_PORT, 0, 0},
+	{VIRTCHNL_PROTO_HDR_UDP_DST_PORT, ICE_FLOW_FIELD_IDX_UDP_DST_PORT, 0, 0},
+	{VIRTCHNL_PROTO_HDR_TCP_SRC_PORT, ICE_FLOW_FIELD_IDX_TCP_SRC_PORT, 0, 0},
+	{VIRTCHNL_PROTO_HDR_TCP_DST_PORT, ICE_FLOW_FIELD_IDX_TCP_DST_PORT, 0, 0},
+	{VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT, ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT, 0, 0},
+	{VIRTCHNL_PROTO_HDR_SCTP_DST_PORT, ICE_FLOW_FIELD_IDX_SCTP_DST_PORT, 0, 0},
+	{VIRTCHNL_PROTO_HDR_GTPU_IP_TEID, ICE_FLOW_FIELD_IDX_GTPU_IP_TEID, 0, 0},
+	{VIRTCHNL_PROTO_HDR_GTPU_EH_QFI, ICE_FLOW_FIELD_IDX_GTPU_EH_QFI, 0, 0},
+	{VIRTCHNL_PROTO_HDR_ESP_SPI, ICE_FLOW_FIELD_IDX_ESP_SPI,
+		FDIR_INSET_FLAG_ESP_IPSEC, FDIR_INSET_FLAG_ESP_M},
+	{VIRTCHNL_PROTO_HDR_ESP_SPI, ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI,
+		FDIR_INSET_FLAG_ESP_UDP, FDIR_INSET_FLAG_ESP_M},
+	{VIRTCHNL_PROTO_HDR_AH_SPI, ICE_FLOW_FIELD_IDX_AH_SPI, 0, 0},
+	{VIRTCHNL_PROTO_HDR_L2TPV3_SESS_ID, ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID, 0, 0},
+	{VIRTCHNL_PROTO_HDR_PFCP_S_FIELD, ICE_FLOW_FIELD_IDX_UDP_DST_PORT, 0, 0},
 };
 
 /**
@@ -344,6 +447,11 @@ ice_vc_fdir_parse_flow_fld(struct virtchnl_proto_hdr *proto_hdr,
 	for (i = 0; (i < ARRAY_SIZE(fdir_inset_map)) &&
 	     VIRTCHNL_GET_PROTO_HDR_FIELD(&hdr); i++)
 		if (VIRTCHNL_TEST_PROTO_HDR(&hdr, fdir_inset_map[i].field)) {
+			if (fdir_inset_map[i].mask &&
+			    ((fdir_inset_map[i].mask & conf->inset_flag) !=
+			     fdir_inset_map[i].flag))
+				continue;
+
 			fld[*fld_cnt] = fdir_inset_map[i].fld;
 			*fld_cnt += 1;
 			if (*fld_cnt >= ICE_FLOW_FIELD_IDX_MAX)
@@ -424,6 +532,36 @@ ice_vc_fdir_set_flow_hdr(struct ice_vf *vf,
 	case ICE_FLTR_PTYPE_NON_IP_L2:
 		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_ETH_NON_IP);
 		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_L2TPV3 |
+				  ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_ESP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_ESP |
+				  ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_AH:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_AH |
+				  ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_NAT_T_ESP |
+				  ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_PFCP_NODE |
+				  ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_PFCP_SESSION |
+				  ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
 	case ICE_FLTR_PTYPE_NONF_IPV4_OTHER:
 		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV4 |
 				  ICE_FLOW_SEG_HDR_IPV_OTHER);
@@ -462,6 +600,36 @@ ice_vc_fdir_set_flow_hdr(struct ice_vf *vf,
 				  ICE_FLOW_SEG_HDR_IPV4 |
 				  ICE_FLOW_SEG_HDR_IPV_OTHER);
 		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_L2TPV3 |
+				  ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_ESP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_ESP |
+				  ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_AH:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_AH |
+				  ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_NAT_T_ESP |
+				  ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_PFCP_NODE |
+				  ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_PFCP_SESSION |
+				  ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
 	case ICE_FLTR_PTYPE_NONF_IPV6_OTHER:
 		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV6 |
 				  ICE_FLOW_SEG_HDR_IPV_OTHER);
@@ -834,6 +1002,7 @@ ice_vc_fdir_parse_pattern(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
 {
 	struct virtchnl_proto_hdrs *proto = &fltr->rule_cfg.proto_hdrs;
 	enum virtchnl_proto_hdr_type l3 = VIRTCHNL_PROTO_HDR_NONE;
+	enum virtchnl_proto_hdr_type l4 = VIRTCHNL_PROTO_HDR_NONE;
 	struct device *dev = ice_pf_to_dev(vf->pf);
 	struct ice_fdir_fltr *input = &conf->input;
 	int i;
@@ -846,12 +1015,15 @@ ice_vc_fdir_parse_pattern(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
 
 	for (i = 0; i < proto->count; i++) {
 		struct virtchnl_proto_hdr *hdr = &proto->proto_hdr[i];
+		struct ip_esp_hdr *esph;
+		struct ip_auth_hdr *ah;
 		struct sctphdr *sctph;
 		struct ipv6hdr *ip6h;
 		struct udphdr *udph;
 		struct tcphdr *tcph;
 		struct ethhdr *eth;
 		struct iphdr *iph;
+		u8 s_field;
 		u8 *rawh;
 
 		switch (hdr->type) {
@@ -944,6 +1116,75 @@ ice_vc_fdir_parse_pattern(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
 				}
 			}
 			break;
+		case VIRTCHNL_PROTO_HDR_L2TPV3:
+			if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3;
+
+			if (hdr->field_selector)
+				input->l2tpv3_data.session_id = *((__be32 *)hdr->buffer);
+			break;
+		case VIRTCHNL_PROTO_HDR_ESP:
+			esph = (struct ip_esp_hdr *)hdr->buffer;
+			if (l3 == VIRTCHNL_PROTO_HDR_IPV4 &&
+			    l4 == VIRTCHNL_PROTO_HDR_UDP)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV6 &&
+				 l4 == VIRTCHNL_PROTO_HDR_UDP)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV4 &&
+				 l4 == VIRTCHNL_PROTO_HDR_NONE)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_ESP;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV6 &&
+				 l4 == VIRTCHNL_PROTO_HDR_NONE)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_ESP;
+
+			if (l4 == VIRTCHNL_PROTO_HDR_UDP)
+				conf->inset_flag |= FDIR_INSET_FLAG_ESP_UDP;
+			else
+				conf->inset_flag |= FDIR_INSET_FLAG_ESP_IPSEC;
+
+			if (hdr->field_selector) {
+				if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+					input->ip.v4.sec_parm_idx = esph->spi;
+				else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+					input->ip.v6.sec_parm_idx = esph->spi;
+			}
+			break;
+		case VIRTCHNL_PROTO_HDR_AH:
+			ah = (struct ip_auth_hdr *)hdr->buffer;
+			if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_AH;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_AH;
+
+			if (hdr->field_selector) {
+				if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+					input->ip.v4.sec_parm_idx = ah->spi;
+				else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+					input->ip.v6.sec_parm_idx = ah->spi;
+			}
+			break;
+		case VIRTCHNL_PROTO_HDR_PFCP:
+			rawh = (u8 *)hdr->buffer;
+			s_field = (rawh[0] >> PFCP_S_OFFSET) & PFCP_S_MASK;
+			if (l3 == VIRTCHNL_PROTO_HDR_IPV4 && s_field == 0)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV4 && s_field == 1)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV6 && s_field == 0)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV6 && s_field == 1)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION;
+
+			if (hdr->field_selector) {
+				if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+					input->ip.v4.dst_port = cpu_to_be16(PFCP_PORT_NR);
+				else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+					input->ip.v6.dst_port = cpu_to_be16(PFCP_PORT_NR);
+			}
+			break;
 		case VIRTCHNL_PROTO_HDR_GTPU_IP:
 			rawh = (u8 *)hdr->buffer;
 			input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER;
@@ -1096,6 +1337,10 @@ ice_vc_fdir_comp_rules(struct virtchnl_fdir_fltr_conf *conf_a,
 		return false;
 	if (memcmp(&a->gtpu_mask, &b->gtpu_mask, sizeof(a->gtpu_mask)))
 		return false;
+	if (memcmp(&a->l2tpv3_data, &b->l2tpv3_data, sizeof(a->l2tpv3_data)))
+		return false;
+	if (memcmp(&a->l2tpv3_mask, &b->l2tpv3_mask, sizeof(a->l2tpv3_mask)))
+		return false;
 	if (memcmp(&a->ext_data, &b->ext_data, sizeof(a->ext_data)))
 		return false;
 	if (memcmp(&a->ext_mask, &b->ext_mask, sizeof(a->ext_mask)))
-- 
cgit v1.2.3


From d6218317e2eff8b3762f437da582ea970cde576e Mon Sep 17 00:00:00 2001
From: Qi Zhang <qi.z.zhang@intel.com>
Date: Tue, 9 Mar 2021 11:08:10 +0800
Subject: ice: Check FDIR program status for AVF

Enable returning FDIR completion status by checking the
ctrl_vsi Rx queue descriptor value.

To enable returning FDIR completion status from ctrl_vsi Rx queue,
COMP_Queue and COMP_Report of FDIR filter programming descriptor
needs to be properly configured. After program request sent to ctrl_vsi
Tx queue, ctrl_vsi Rx queue interrupt will be triggered and
completion status will be returned.

Driver will first issue request in ice_vc_fdir_add_fltr(), then
pass FDIR context to the background task in interrupt service routine
ice_vc_fdir_irq_handler() and finally deal with them in
ice_flush_fdir_ctx(). ice_flush_fdir_ctx() will check the descriptor's
value, fdir context, and then send back virtual channel message to VF
by calling ice_vc_add_fdir_fltr_post(). An additional timer will be
setup in case of hardware interrupt timeout.

Signed-off-by: Yahui Cao <yahui.cao@intel.com>
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
Tested-by: Chen Bo <BoX.C.Chen@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h               |   1 +
 drivers/net/ethernet/intel/ice/ice_hw_autogen.h    |   3 +
 drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h     |  20 +
 drivers/net/ethernet/intel/ice/ice_main.c          |   2 +
 drivers/net/ethernet/intel/ice/ice_txrx.c          |   5 +
 drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c | 493 ++++++++++++++++++++-
 drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h |  33 +-
 7 files changed, 541 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 7b45f2ab4036..9bf346133cbd 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -231,6 +231,7 @@ enum ice_state {
 	__ICE_VF_RESETS_DISABLED,	/* disable resets during ice_remove */
 	__ICE_LINK_DEFAULT_OVERRIDE_PENDING,
 	__ICE_PHY_INIT_COMPLETE,
+	__ICE_FD_VF_FLUSH_CTX,		/* set at FD Rx IRQ or timeout */
 	__ICE_STATE_NBITS		/* must be last */
 };
 
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index a778e373eff1..67b5b9b9d009 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -384,6 +384,9 @@
 #define VSIQF_FD_CNT(_VSI)			(0x00464000 + ((_VSI) * 4))
 #define VSIQF_FD_CNT_FD_GCNT_S			0
 #define VSIQF_FD_CNT_FD_GCNT_M			ICE_M(0x3FFF, 0)
+#define VSIQF_FD_CNT_FD_BCNT_S			16
+#define VSIQF_FD_CNT_FD_BCNT_M			ICE_M(0x3FFF, 16)
+#define VSIQF_FD_SIZE(_VSI)			(0x00462000 + ((_VSI) * 4))
 #define VSIQF_HKEY_MAX_INDEX			12
 #define VSIQF_HLUT_MAX_INDEX			15
 #define PFPM_APM				0x000B8080
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index b30c22358c0a..21329ed3087e 100644
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -140,6 +140,26 @@ struct ice_fltr_desc {
 			(0xFFFFFFFFULL << ICE_FXD_FLTR_QW1_FDID_S)
 #define ICE_FXD_FLTR_QW1_FDID_ZERO	0x0ULL
 
+/* definition for FD filter programming status descriptor WB format */
+#define ICE_FXD_FLTR_WB_QW1_DD_S	0
+#define ICE_FXD_FLTR_WB_QW1_DD_M	(0x1ULL << ICE_FXD_FLTR_WB_QW1_DD_S)
+#define ICE_FXD_FLTR_WB_QW1_DD_YES	0x1ULL
+
+#define ICE_FXD_FLTR_WB_QW1_PROG_ID_S	1
+#define ICE_FXD_FLTR_WB_QW1_PROG_ID_M	\
+				(0x3ULL << ICE_FXD_FLTR_WB_QW1_PROG_ID_S)
+#define ICE_FXD_FLTR_WB_QW1_PROG_ADD	0x0ULL
+#define ICE_FXD_FLTR_WB_QW1_PROG_DEL	0x1ULL
+
+#define ICE_FXD_FLTR_WB_QW1_FAIL_S	4
+#define ICE_FXD_FLTR_WB_QW1_FAIL_M	(0x1ULL << ICE_FXD_FLTR_WB_QW1_FAIL_S)
+#define ICE_FXD_FLTR_WB_QW1_FAIL_YES	0x1ULL
+
+#define ICE_FXD_FLTR_WB_QW1_FAIL_PROF_S	5
+#define ICE_FXD_FLTR_WB_QW1_FAIL_PROF_M	\
+				(0x1ULL << ICE_FXD_FLTR_WB_QW1_FAIL_PROF_S)
+#define ICE_FXD_FLTR_WB_QW1_FAIL_PROF_YES	0x1ULL
+
 struct ice_rx_ptype_decoded {
 	u32 ptype:10;
 	u32 known:1;
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 9cb5e653b38d..eb895e6db077 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2071,6 +2071,7 @@ static void ice_service_task(struct work_struct *work)
 	ice_process_vflr_event(pf);
 	ice_clean_mailboxq_subtask(pf);
 	ice_sync_arfs_fltrs(pf);
+	ice_flush_fdir_ctx(pf);
 	/* Clear __ICE_SERVICE_SCHED flag to allow scheduling next event */
 	ice_service_task_complete(pf);
 
@@ -2082,6 +2083,7 @@ static void ice_service_task(struct work_struct *work)
 	    test_bit(__ICE_MDD_EVENT_PENDING, pf->state) ||
 	    test_bit(__ICE_VFLR_EVENT_PENDING, pf->state) ||
 	    test_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state) ||
+	    test_bit(__ICE_FD_VF_FLUSH_CTX, pf->state) ||
 	    test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state))
 		mod_timer(&pf->serv_tmr, jiffies);
 }
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index b7dc25da1202..6d87dd9d456e 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1115,6 +1115,11 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
 		dma_rmb();
 
 		if (rx_desc->wb.rxdid == FDIR_DESC_RXDID || !rx_ring->netdev) {
+			struct ice_vsi *ctrl_vsi = rx_ring->vsi;
+
+			if (rx_desc->wb.rxdid == FDIR_DESC_RXDID &&
+			    ctrl_vsi->vf_id != ICE_INVAL_VFID)
+				ice_vc_fdir_irq_handler(ctrl_vsi, rx_desc);
 			ice_put_rx_buf(rx_ring, NULL, 0);
 			cleaned_count++;
 			continue;
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
index f49947d3df37..1f4ba38b1599 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
@@ -44,6 +44,7 @@ struct virtchnl_fdir_fltr_conf {
 	struct ice_fdir_fltr input;
 	enum ice_fdir_tunnel_type ttype;
 	u64 inset_flag;
+	u32 flow_id;
 };
 
 static enum virtchnl_proto_hdr_type vc_pattern_ether[] = {
@@ -1483,7 +1484,7 @@ static int ice_vc_fdir_write_fltr(struct ice_vf *vf,
 	}
 
 	input->dest_vsi = vsi->idx;
-	input->comp_report = ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL;
+	input->comp_report = ICE_FXD_FLTR_QW0_COMP_REPORT_SW;
 
 	ctrl_vsi = pf->vsi[vf->ctrl_vsi_idx];
 	if (!ctrl_vsi) {
@@ -1515,6 +1516,454 @@ err_free_pkt:
 	return ret;
 }
 
+/**
+ * ice_vf_fdir_timer - FDIR program waiting timer interrupt handler
+ * @t: pointer to timer_list
+ */
+static void ice_vf_fdir_timer(struct timer_list *t)
+{
+	struct ice_vf_fdir_ctx *ctx_irq = from_timer(ctx_irq, t, rx_tmr);
+	struct ice_vf_fdir_ctx *ctx_done;
+	struct ice_vf_fdir *fdir;
+	unsigned long flags;
+	struct ice_vf *vf;
+	struct ice_pf *pf;
+
+	fdir = container_of(ctx_irq, struct ice_vf_fdir, ctx_irq);
+	vf = container_of(fdir, struct ice_vf, fdir);
+	ctx_done = &fdir->ctx_done;
+	pf = vf->pf;
+	spin_lock_irqsave(&fdir->ctx_lock, flags);
+	if (!(ctx_irq->flags & ICE_VF_FDIR_CTX_VALID)) {
+		spin_unlock_irqrestore(&fdir->ctx_lock, flags);
+		WARN_ON_ONCE(1);
+		return;
+	}
+
+	ctx_irq->flags &= ~ICE_VF_FDIR_CTX_VALID;
+
+	ctx_done->flags |= ICE_VF_FDIR_CTX_VALID;
+	ctx_done->conf = ctx_irq->conf;
+	ctx_done->stat = ICE_FDIR_CTX_TIMEOUT;
+	ctx_done->v_opcode = ctx_irq->v_opcode;
+	spin_unlock_irqrestore(&fdir->ctx_lock, flags);
+
+	set_bit(__ICE_FD_VF_FLUSH_CTX, pf->state);
+	ice_service_task_schedule(pf);
+}
+
+/**
+ * ice_vc_fdir_irq_handler - ctrl_vsi Rx queue interrupt handler
+ * @ctrl_vsi: pointer to a VF's CTRL VSI
+ * @rx_desc: pointer to FDIR Rx queue descriptor
+ */
+void
+ice_vc_fdir_irq_handler(struct ice_vsi *ctrl_vsi,
+			union ice_32b_rx_flex_desc *rx_desc)
+{
+	struct ice_pf *pf = ctrl_vsi->back;
+	struct ice_vf_fdir_ctx *ctx_done;
+	struct ice_vf_fdir_ctx *ctx_irq;
+	struct ice_vf_fdir *fdir;
+	unsigned long flags;
+	struct device *dev;
+	struct ice_vf *vf;
+	int ret;
+
+	vf = &pf->vf[ctrl_vsi->vf_id];
+
+	fdir = &vf->fdir;
+	ctx_done = &fdir->ctx_done;
+	ctx_irq = &fdir->ctx_irq;
+	dev = ice_pf_to_dev(pf);
+	spin_lock_irqsave(&fdir->ctx_lock, flags);
+	if (!(ctx_irq->flags & ICE_VF_FDIR_CTX_VALID)) {
+		spin_unlock_irqrestore(&fdir->ctx_lock, flags);
+		WARN_ON_ONCE(1);
+		return;
+	}
+
+	ctx_irq->flags &= ~ICE_VF_FDIR_CTX_VALID;
+
+	ctx_done->flags |= ICE_VF_FDIR_CTX_VALID;
+	ctx_done->conf = ctx_irq->conf;
+	ctx_done->stat = ICE_FDIR_CTX_IRQ;
+	ctx_done->v_opcode = ctx_irq->v_opcode;
+	memcpy(&ctx_done->rx_desc, rx_desc, sizeof(*rx_desc));
+	spin_unlock_irqrestore(&fdir->ctx_lock, flags);
+
+	ret = del_timer(&ctx_irq->rx_tmr);
+	if (!ret)
+		dev_err(dev, "VF %d: Unexpected inactive timer!\n", vf->vf_id);
+
+	set_bit(__ICE_FD_VF_FLUSH_CTX, pf->state);
+	ice_service_task_schedule(pf);
+}
+
+/**
+ * ice_vf_fdir_dump_info - dump FDIR information for diagnosis
+ * @vf: pointer to the VF info
+ */
+static void ice_vf_fdir_dump_info(struct ice_vf *vf)
+{
+	struct ice_vsi *vf_vsi;
+	u32 fd_size, fd_cnt;
+	struct device *dev;
+	struct ice_pf *pf;
+	struct ice_hw *hw;
+	u16 vsi_num;
+
+	pf = vf->pf;
+	hw = &pf->hw;
+	dev = ice_pf_to_dev(pf);
+	vf_vsi = pf->vsi[vf->lan_vsi_idx];
+	vsi_num = ice_get_hw_vsi_num(hw, vf_vsi->idx);
+
+	fd_size = rd32(hw, VSIQF_FD_SIZE(vsi_num));
+	fd_cnt = rd32(hw, VSIQF_FD_CNT(vsi_num));
+	dev_dbg(dev, "VF %d: space allocated: guar:0x%x, be:0x%x, space consumed: guar:0x%x, be:0x%x",
+		vf->vf_id,
+		(fd_size & VSIQF_FD_CNT_FD_GCNT_M) >> VSIQF_FD_CNT_FD_GCNT_S,
+		(fd_size & VSIQF_FD_CNT_FD_BCNT_M) >> VSIQF_FD_CNT_FD_BCNT_S,
+		(fd_cnt & VSIQF_FD_CNT_FD_GCNT_M) >> VSIQF_FD_CNT_FD_GCNT_S,
+		(fd_cnt & VSIQF_FD_CNT_FD_BCNT_M) >> VSIQF_FD_CNT_FD_BCNT_S);
+}
+
+/**
+ * ice_vf_verify_rx_desc - verify received FDIR programming status descriptor
+ * @vf: pointer to the VF info
+ * @ctx: FDIR context info for post processing
+ * @status: virtchnl FDIR program status
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vf_verify_rx_desc(struct ice_vf *vf, struct ice_vf_fdir_ctx *ctx,
+		      enum virtchnl_fdir_prgm_status *status)
+{
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	u32 stat_err, error, prog_id;
+	int ret;
+
+	stat_err = le16_to_cpu(ctx->rx_desc.wb.status_error0);
+	if (((stat_err & ICE_FXD_FLTR_WB_QW1_DD_M) >>
+	    ICE_FXD_FLTR_WB_QW1_DD_S) != ICE_FXD_FLTR_WB_QW1_DD_YES) {
+		*status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+		dev_err(dev, "VF %d: Desc Done not set\n", vf->vf_id);
+		ret = -EINVAL;
+		goto err_exit;
+	}
+
+	prog_id = (stat_err & ICE_FXD_FLTR_WB_QW1_PROG_ID_M) >>
+		ICE_FXD_FLTR_WB_QW1_PROG_ID_S;
+	if (prog_id == ICE_FXD_FLTR_WB_QW1_PROG_ADD &&
+	    ctx->v_opcode != VIRTCHNL_OP_ADD_FDIR_FILTER) {
+		dev_err(dev, "VF %d: Desc show add, but ctx not",
+			vf->vf_id);
+		*status = VIRTCHNL_FDIR_FAILURE_RULE_INVALID;
+		ret = -EINVAL;
+		goto err_exit;
+	}
+
+	if (prog_id == ICE_FXD_FLTR_WB_QW1_PROG_DEL &&
+	    ctx->v_opcode != VIRTCHNL_OP_DEL_FDIR_FILTER) {
+		dev_err(dev, "VF %d: Desc show del, but ctx not",
+			vf->vf_id);
+		*status = VIRTCHNL_FDIR_FAILURE_RULE_INVALID;
+		ret = -EINVAL;
+		goto err_exit;
+	}
+
+	error = (stat_err & ICE_FXD_FLTR_WB_QW1_FAIL_M) >>
+		ICE_FXD_FLTR_WB_QW1_FAIL_S;
+	if (error == ICE_FXD_FLTR_WB_QW1_FAIL_YES) {
+		if (prog_id == ICE_FXD_FLTR_WB_QW1_PROG_ADD) {
+			dev_err(dev, "VF %d, Failed to add FDIR rule due to no space in the table",
+				vf->vf_id);
+			*status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+		} else {
+			dev_err(dev, "VF %d, Failed to remove FDIR rule, attempt to remove non-existent entry",
+				vf->vf_id);
+			*status = VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST;
+		}
+		ret = -EINVAL;
+		goto err_exit;
+	}
+
+	error = (stat_err & ICE_FXD_FLTR_WB_QW1_FAIL_PROF_M) >>
+		ICE_FXD_FLTR_WB_QW1_FAIL_PROF_S;
+	if (error == ICE_FXD_FLTR_WB_QW1_FAIL_PROF_YES) {
+		dev_err(dev, "VF %d: Profile matching error", vf->vf_id);
+		*status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+		ret = -EINVAL;
+		goto err_exit;
+	}
+
+	*status = VIRTCHNL_FDIR_SUCCESS;
+
+	return 0;
+
+err_exit:
+	ice_vf_fdir_dump_info(vf);
+	return ret;
+}
+
+/**
+ * ice_vc_add_fdir_fltr_post
+ * @vf: pointer to the VF structure
+ * @ctx: FDIR context info for post processing
+ * @status: virtchnl FDIR program status
+ * @success: true implies success, false implies failure
+ *
+ * Post process for flow director add command. If success, then do post process
+ * and send back success msg by virtchnl. Otherwise, do context reversion and
+ * send back failure msg by virtchnl.
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_add_fdir_fltr_post(struct ice_vf *vf, struct ice_vf_fdir_ctx *ctx,
+			  enum virtchnl_fdir_prgm_status status,
+			  bool success)
+{
+	struct virtchnl_fdir_fltr_conf *conf = ctx->conf;
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	enum virtchnl_status_code v_ret;
+	struct virtchnl_fdir_add *resp;
+	int ret, len, is_tun;
+
+	v_ret = VIRTCHNL_STATUS_SUCCESS;
+	len = sizeof(*resp);
+	resp = kzalloc(len, GFP_KERNEL);
+	if (!resp) {
+		len = 0;
+		v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+		dev_dbg(dev, "VF %d: Alloc resp buf fail", vf->vf_id);
+		goto err_exit;
+	}
+
+	if (!success)
+		goto err_exit;
+
+	is_tun = 0;
+	resp->status = status;
+	resp->flow_id = conf->flow_id;
+	vf->fdir.fdir_fltr_cnt[conf->input.flow_type][is_tun]++;
+
+	ret = ice_vc_send_msg_to_vf(vf, ctx->v_opcode, v_ret,
+				    (u8 *)resp, len);
+	kfree(resp);
+
+	dev_dbg(dev, "VF %d: flow_id:0x%X, FDIR %s success!\n",
+		vf->vf_id, conf->flow_id,
+		(ctx->v_opcode == VIRTCHNL_OP_ADD_FDIR_FILTER) ?
+		"add" : "del");
+	return ret;
+
+err_exit:
+	if (resp)
+		resp->status = status;
+	ice_vc_fdir_remove_entry(vf, conf, conf->flow_id);
+	devm_kfree(dev, conf);
+
+	ret = ice_vc_send_msg_to_vf(vf, ctx->v_opcode, v_ret,
+				    (u8 *)resp, len);
+	kfree(resp);
+	return ret;
+}
+
+/**
+ * ice_vc_del_fdir_fltr_post
+ * @vf: pointer to the VF structure
+ * @ctx: FDIR context info for post processing
+ * @status: virtchnl FDIR program status
+ * @success: true implies success, false implies failure
+ *
+ * Post process for flow director del command. If success, then do post process
+ * and send back success msg by virtchnl. Otherwise, do context reversion and
+ * send back failure msg by virtchnl.
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_del_fdir_fltr_post(struct ice_vf *vf, struct ice_vf_fdir_ctx *ctx,
+			  enum virtchnl_fdir_prgm_status status,
+			  bool success)
+{
+	struct virtchnl_fdir_fltr_conf *conf = ctx->conf;
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	enum virtchnl_status_code v_ret;
+	struct virtchnl_fdir_del *resp;
+	int ret, len, is_tun;
+
+	v_ret = VIRTCHNL_STATUS_SUCCESS;
+	len = sizeof(*resp);
+	resp = kzalloc(len, GFP_KERNEL);
+	if (!resp) {
+		len = 0;
+		v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+		dev_dbg(dev, "VF %d: Alloc resp buf fail", vf->vf_id);
+		goto err_exit;
+	}
+
+	if (!success)
+		goto err_exit;
+
+	is_tun = 0;
+	resp->status = status;
+	ice_vc_fdir_remove_entry(vf, conf, conf->flow_id);
+	vf->fdir.fdir_fltr_cnt[conf->input.flow_type][is_tun]--;
+
+	ret = ice_vc_send_msg_to_vf(vf, ctx->v_opcode, v_ret,
+				    (u8 *)resp, len);
+	kfree(resp);
+
+	dev_dbg(dev, "VF %d: flow_id:0x%X, FDIR %s success!\n",
+		vf->vf_id, conf->flow_id,
+		(ctx->v_opcode == VIRTCHNL_OP_ADD_FDIR_FILTER) ?
+		"add" : "del");
+	devm_kfree(dev, conf);
+	return ret;
+
+err_exit:
+	if (resp)
+		resp->status = status;
+	if (success)
+		devm_kfree(dev, conf);
+
+	ret = ice_vc_send_msg_to_vf(vf, ctx->v_opcode, v_ret,
+				    (u8 *)resp, len);
+	kfree(resp);
+	return ret;
+}
+
+/**
+ * ice_flush_fdir_ctx
+ * @pf: pointer to the PF structure
+ *
+ * Flush all the pending event on ctx_done list and process them.
+ */
+void ice_flush_fdir_ctx(struct ice_pf *pf)
+{
+	int i;
+
+	if (!test_and_clear_bit(__ICE_FD_VF_FLUSH_CTX, pf->state))
+		return;
+
+	ice_for_each_vf(pf, i) {
+		struct device *dev = ice_pf_to_dev(pf);
+		enum virtchnl_fdir_prgm_status status;
+		struct ice_vf *vf = &pf->vf[i];
+		struct ice_vf_fdir_ctx *ctx;
+		unsigned long flags;
+		int ret;
+
+		if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+			continue;
+
+		if (vf->ctrl_vsi_idx == ICE_NO_VSI)
+			continue;
+
+		ctx = &vf->fdir.ctx_done;
+		spin_lock_irqsave(&vf->fdir.ctx_lock, flags);
+		if (!(ctx->flags & ICE_VF_FDIR_CTX_VALID)) {
+			spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+			continue;
+		}
+		spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+
+		WARN_ON(ctx->stat == ICE_FDIR_CTX_READY);
+		if (ctx->stat == ICE_FDIR_CTX_TIMEOUT) {
+			status = VIRTCHNL_FDIR_FAILURE_RULE_TIMEOUT;
+			dev_err(dev, "VF %d: ctrl_vsi irq timeout\n",
+				vf->vf_id);
+			goto err_exit;
+		}
+
+		ret = ice_vf_verify_rx_desc(vf, ctx, &status);
+		if (ret)
+			goto err_exit;
+
+		if (ctx->v_opcode == VIRTCHNL_OP_ADD_FDIR_FILTER)
+			ice_vc_add_fdir_fltr_post(vf, ctx, status, true);
+		else if (ctx->v_opcode == VIRTCHNL_OP_DEL_FDIR_FILTER)
+			ice_vc_del_fdir_fltr_post(vf, ctx, status, true);
+		else
+			dev_err(dev, "VF %d: Unsupported opcode\n", vf->vf_id);
+
+		spin_lock_irqsave(&vf->fdir.ctx_lock, flags);
+		ctx->flags &= ~ICE_VF_FDIR_CTX_VALID;
+		spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+		continue;
+err_exit:
+		if (ctx->v_opcode == VIRTCHNL_OP_ADD_FDIR_FILTER)
+			ice_vc_add_fdir_fltr_post(vf, ctx, status, false);
+		else if (ctx->v_opcode == VIRTCHNL_OP_DEL_FDIR_FILTER)
+			ice_vc_del_fdir_fltr_post(vf, ctx, status, false);
+		else
+			dev_err(dev, "VF %d: Unsupported opcode\n", vf->vf_id);
+
+		spin_lock_irqsave(&vf->fdir.ctx_lock, flags);
+		ctx->flags &= ~ICE_VF_FDIR_CTX_VALID;
+		spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+	}
+}
+
+/**
+ * ice_vc_fdir_set_irq_ctx - set FDIR context info for later IRQ handler
+ * @vf: pointer to the VF structure
+ * @conf: FDIR configuration for each filter
+ * @v_opcode: virtual channel operation code
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_set_irq_ctx(struct ice_vf *vf, struct virtchnl_fdir_fltr_conf *conf,
+			enum virtchnl_ops v_opcode)
+{
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	struct ice_vf_fdir_ctx *ctx;
+	unsigned long flags;
+
+	ctx = &vf->fdir.ctx_irq;
+	spin_lock_irqsave(&vf->fdir.ctx_lock, flags);
+	if ((vf->fdir.ctx_irq.flags & ICE_VF_FDIR_CTX_VALID) ||
+	    (vf->fdir.ctx_done.flags & ICE_VF_FDIR_CTX_VALID)) {
+		spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+		dev_dbg(dev, "VF %d: Last request is still in progress\n",
+			vf->vf_id);
+		return -EBUSY;
+	}
+	ctx->flags |= ICE_VF_FDIR_CTX_VALID;
+	spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+
+	ctx->conf = conf;
+	ctx->v_opcode = v_opcode;
+	ctx->stat = ICE_FDIR_CTX_READY;
+	timer_setup(&ctx->rx_tmr, ice_vf_fdir_timer, 0);
+
+	mod_timer(&ctx->rx_tmr, round_jiffies(msecs_to_jiffies(10) + jiffies));
+
+	return 0;
+}
+
+/**
+ * ice_vc_fdir_clear_irq_ctx - clear FDIR context info for IRQ handler
+ * @vf: pointer to the VF structure
+ *
+ * Return: 0 on success, and other on error.
+ */
+static void ice_vc_fdir_clear_irq_ctx(struct ice_vf *vf)
+{
+	struct ice_vf_fdir_ctx *ctx = &vf->fdir.ctx_irq;
+	unsigned long flags;
+
+	del_timer(&ctx->rx_tmr);
+	spin_lock_irqsave(&vf->fdir.ctx_lock, flags);
+	ctx->flags &= ~ICE_VF_FDIR_CTX_VALID;
+	spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+}
+
 /**
  * ice_vc_add_fdir_fltr - add a FDIR filter for VF by the msg buffer
  * @vf: pointer to the VF info
@@ -1601,7 +2050,7 @@ int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg)
 		goto err_free_conf;
 	}
 
-	ret = ice_vc_fdir_insert_entry(vf, conf, &stat->flow_id);
+	ret = ice_vc_fdir_insert_entry(vf, conf, &conf->flow_id);
 	if (ret) {
 		v_ret = VIRTCHNL_STATUS_SUCCESS;
 		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
@@ -1609,6 +2058,14 @@ int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg)
 		goto err_free_conf;
 	}
 
+	ret = ice_vc_fdir_set_irq_ctx(vf, conf, VIRTCHNL_OP_ADD_FDIR_FILTER);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+		dev_dbg(dev, "VF %d: set FDIR context failed\n", vf->vf_id);
+		goto err_free_conf;
+	}
+
 	ret = ice_vc_fdir_write_fltr(vf, conf, true, is_tun);
 	if (ret) {
 		v_ret = VIRTCHNL_STATUS_SUCCESS;
@@ -1618,18 +2075,13 @@ int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg)
 		goto err_rem_entry;
 	}
 
-	vf->fdir.fdir_fltr_cnt[conf->input.flow_type][is_tun]++;
-
-	v_ret = VIRTCHNL_STATUS_SUCCESS;
-	stat->status = VIRTCHNL_FDIR_SUCCESS;
 exit:
-	ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_FDIR_FILTER, v_ret,
-				    (u8 *)stat, len);
 	kfree(stat);
 	return ret;
 
 err_rem_entry:
-	ice_vc_fdir_remove_entry(vf, conf, stat->flow_id);
+	ice_vc_fdir_clear_irq_ctx(vf);
+	ice_vc_fdir_remove_entry(vf, conf, conf->flow_id);
 err_free_conf:
 	devm_kfree(dev, conf);
 err_exit:
@@ -1693,22 +2145,29 @@ int ice_vc_del_fdir_fltr(struct ice_vf *vf, u8 *msg)
 		goto err_exit;
 	}
 
+	ret = ice_vc_fdir_set_irq_ctx(vf, conf, VIRTCHNL_OP_DEL_FDIR_FILTER);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+		dev_dbg(dev, "VF %d: set FDIR context failed\n", vf->vf_id);
+		goto err_exit;
+	}
+
 	ret = ice_vc_fdir_write_fltr(vf, conf, false, is_tun);
 	if (ret) {
 		v_ret = VIRTCHNL_STATUS_SUCCESS;
 		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
 		dev_err(dev, "VF %d: writing FDIR rule failed, ret:%d\n",
 			vf->vf_id, ret);
-		goto err_exit;
+		goto err_del_tmr;
 	}
 
-	ice_vc_fdir_remove_entry(vf, conf, fltr->flow_id);
-	devm_kfree(dev, conf);
-	vf->fdir.fdir_fltr_cnt[conf->input.flow_type][is_tun]--;
+	kfree(stat);
 
-	v_ret = VIRTCHNL_STATUS_SUCCESS;
-	stat->status = VIRTCHNL_FDIR_SUCCESS;
+	return ret;
 
+err_del_tmr:
+	ice_vc_fdir_clear_irq_ctx(vf);
 err_exit:
 	ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DEL_FDIR_FILTER, v_ret,
 				    (u8 *)stat, len);
@@ -1726,6 +2185,10 @@ void ice_vf_fdir_init(struct ice_vf *vf)
 
 	idr_init(&fdir->fdir_rule_idr);
 	INIT_LIST_HEAD(&fdir->fdir_rule_list);
+
+	spin_lock_init(&fdir->ctx_lock);
+	fdir->ctx_irq.flags = 0;
+	fdir->ctx_done.flags = 0;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h
index 2a2e0e598559..f4e629f4c09b 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h
@@ -5,6 +5,24 @@
 #define _ICE_VIRTCHNL_FDIR_H_
 
 struct ice_vf;
+struct ice_pf;
+
+enum ice_fdir_ctx_stat {
+	ICE_FDIR_CTX_READY,
+	ICE_FDIR_CTX_IRQ,
+	ICE_FDIR_CTX_TIMEOUT,
+};
+
+struct ice_vf_fdir_ctx {
+	struct timer_list rx_tmr;
+	enum virtchnl_ops v_opcode;
+	enum ice_fdir_ctx_stat stat;
+	union ice_32b_rx_flex_desc rx_desc;
+#define ICE_VF_FDIR_CTX_VALID		BIT(0)
+	u32 flags;
+
+	void *conf;
+};
 
 /* VF FDIR information structure */
 struct ice_vf_fdir {
@@ -14,11 +32,24 @@ struct ice_vf_fdir {
 
 	struct idr fdir_rule_idr;
 	struct list_head fdir_rule_list;
+
+	spinlock_t ctx_lock; /* protects FDIR context info */
+	struct ice_vf_fdir_ctx ctx_irq;
+	struct ice_vf_fdir_ctx ctx_done;
 };
 
+#ifdef CONFIG_PCI_IOV
 int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg);
 int ice_vc_del_fdir_fltr(struct ice_vf *vf, u8 *msg);
 void ice_vf_fdir_init(struct ice_vf *vf);
 void ice_vf_fdir_exit(struct ice_vf *vf);
-
+void
+ice_vc_fdir_irq_handler(struct ice_vsi *ctrl_vsi,
+			union ice_32b_rx_flex_desc *rx_desc);
+void ice_flush_fdir_ctx(struct ice_pf *pf);
+#else
+static inline void
+ice_vc_fdir_irq_handler(struct ice_vsi *ctrl_vsi, union ice_32b_rx_flex_desc *rx_desc) { }
+static inline void ice_flush_fdir_ctx(struct ice_pf *pf) { }
+#endif /* CONFIG_PCI_IOV */
 #endif /* _ICE_VIRTCHNL_FDIR_H_ */
-- 
cgit v1.2.3


From 0dbfbabb840d711d7ea1627d88afd0520f374a90 Mon Sep 17 00:00:00 2001
From: Haiyue Wang <haiyue.wang@intel.com>
Date: Tue, 9 Mar 2021 11:08:11 +0800
Subject: iavf: Add framework to enable ethtool ntuple filters

Enable ethtool ntuple filter support on the VF driver using the virtchnl
interface to the PF driver and the Flow director functionality in the
hardware.

Signed-off-by: Haiyue Wang <haiyue.wang@intel.com>
Tested-by: Chen Bo <BoX.C.Chen@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/iavf/iavf.h          |  12 ++
 drivers/net/ethernet/intel/iavf/iavf_fdir.h     |  33 +++++
 drivers/net/ethernet/intel/iavf/iavf_main.c     |  29 +++-
 drivers/net/ethernet/intel/iavf/iavf_virtchnl.c | 184 ++++++++++++++++++++++++
 4 files changed, 257 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/intel/iavf/iavf_fdir.h

diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
index 8a65525a7c0d..bda2a900df8e 100644
--- a/drivers/net/ethernet/intel/iavf/iavf.h
+++ b/drivers/net/ethernet/intel/iavf/iavf.h
@@ -37,6 +37,7 @@
 #include "iavf_type.h"
 #include <linux/avf/virtchnl.h>
 #include "iavf_txrx.h"
+#include "iavf_fdir.h"
 
 #define DEFAULT_DEBUG_LEVEL_SHIFT 3
 #define PFX "iavf: "
@@ -300,6 +301,8 @@ struct iavf_adapter {
 #define IAVF_FLAG_AQ_DISABLE_CHANNELS		BIT(22)
 #define IAVF_FLAG_AQ_ADD_CLOUD_FILTER		BIT(23)
 #define IAVF_FLAG_AQ_DEL_CLOUD_FILTER		BIT(24)
+#define IAVF_FLAG_AQ_ADD_FDIR_FILTER		BIT(25)
+#define IAVF_FLAG_AQ_DEL_FDIR_FILTER		BIT(26)
 
 	/* OS defined structs */
 	struct net_device *netdev;
@@ -340,6 +343,8 @@ struct iavf_adapter {
 			  VIRTCHNL_VF_OFFLOAD_VLAN)
 #define ADV_LINK_SUPPORT(_a) ((_a)->vf_res->vf_cap_flags & \
 			      VIRTCHNL_VF_CAP_ADV_LINK_SPEED)
+#define FDIR_FLTR_SUPPORT(_a) ((_a)->vf_res->vf_cap_flags & \
+			       VIRTCHNL_VF_OFFLOAD_FDIR_PF)
 	struct virtchnl_vf_resource *vf_res; /* incl. all VSIs */
 	struct virtchnl_vsi_resource *vsi_res; /* our LAN VSI */
 	struct virtchnl_version_info pf_version;
@@ -362,6 +367,11 @@ struct iavf_adapter {
 	/* lock to protect access to the cloud filter list */
 	spinlock_t cloud_filter_list_lock;
 	u16 num_cloud_filters;
+
+#define IAVF_MAX_FDIR_FILTERS 128	/* max allowed Flow Director filters */
+	u16 fdir_active_fltr;
+	struct list_head fdir_list_head;
+	spinlock_t fdir_fltr_lock;	/* protect the Flow Director filter list */
 };
 
 
@@ -432,6 +442,8 @@ void iavf_enable_channels(struct iavf_adapter *adapter);
 void iavf_disable_channels(struct iavf_adapter *adapter);
 void iavf_add_cloud_filter(struct iavf_adapter *adapter);
 void iavf_del_cloud_filter(struct iavf_adapter *adapter);
+void iavf_add_fdir_filter(struct iavf_adapter *adapter);
+void iavf_del_fdir_filter(struct iavf_adapter *adapter);
 struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter,
 					const u8 *macaddr);
 #endif /* _IAVF_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.h b/drivers/net/ethernet/intel/iavf/iavf_fdir.h
new file mode 100644
index 000000000000..4e6494b14016
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021, Intel Corporation. */
+
+#ifndef _IAVF_FDIR_H_
+#define _IAVF_FDIR_H_
+
+struct iavf_adapter;
+
+/* State of Flow Director filter */
+enum iavf_fdir_fltr_state_t {
+	IAVF_FDIR_FLTR_ADD_REQUEST,	/* User requests to add filter */
+	IAVF_FDIR_FLTR_ADD_PENDING,	/* Filter pending add by the PF */
+	IAVF_FDIR_FLTR_DEL_REQUEST,	/* User requests to delete filter */
+	IAVF_FDIR_FLTR_DEL_PENDING,	/* Filter pending delete by the PF */
+	IAVF_FDIR_FLTR_ACTIVE,		/* Filter is active */
+};
+
+/* bookkeeping of Flow Director filters */
+struct iavf_fdir_fltr {
+	enum iavf_fdir_fltr_state_t state;
+	struct list_head list;
+
+	u32 flow_id;
+
+	struct virtchnl_fdir_add vc_add_msg;
+};
+
+int iavf_fill_fdir_add_msg(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
+void iavf_print_fdir_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
+bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
+void iavf_fdir_list_add_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
+struct iavf_fdir_fltr *iavf_find_fdir_fltr_by_loc(struct iavf_adapter *adapter, u32 loc);
+#endif /* _IAVF_FDIR_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index dc5b3c06d1e0..d0c16117484f 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -959,8 +959,9 @@ void iavf_down(struct iavf_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
 	struct iavf_vlan_filter *vlf;
-	struct iavf_mac_filter *f;
 	struct iavf_cloud_filter *cf;
+	struct iavf_fdir_fltr *fdir;
+	struct iavf_mac_filter *f;
 
 	if (adapter->state <= __IAVF_DOWN_PENDING)
 		return;
@@ -996,6 +997,13 @@ void iavf_down(struct iavf_adapter *adapter)
 	}
 	spin_unlock_bh(&adapter->cloud_filter_list_lock);
 
+	/* remove all Flow Director filters */
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+	list_for_each_entry(fdir, &adapter->fdir_list_head, list) {
+		fdir->state = IAVF_FDIR_FLTR_DEL_REQUEST;
+	}
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+
 	if (!(adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) &&
 	    adapter->state != __IAVF_RESETTING) {
 		/* cancel any current operation */
@@ -1007,6 +1015,7 @@ void iavf_down(struct iavf_adapter *adapter)
 		adapter->aq_required = IAVF_FLAG_AQ_DEL_MAC_FILTER;
 		adapter->aq_required |= IAVF_FLAG_AQ_DEL_VLAN_FILTER;
 		adapter->aq_required |= IAVF_FLAG_AQ_DEL_CLOUD_FILTER;
+		adapter->aq_required |= IAVF_FLAG_AQ_DEL_FDIR_FILTER;
 		adapter->aq_required |= IAVF_FLAG_AQ_DISABLE_QUEUES;
 	}
 
@@ -1629,6 +1638,14 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter)
 		iavf_add_cloud_filter(adapter);
 		return 0;
 	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_ADD_FDIR_FILTER) {
+		iavf_add_fdir_filter(adapter);
+		return IAVF_SUCCESS;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_DEL_FDIR_FILTER) {
+		iavf_del_fdir_filter(adapter);
+		return IAVF_SUCCESS;
+	}
 	return -EAGAIN;
 }
 
@@ -3738,10 +3755,12 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	spin_lock_init(&adapter->mac_vlan_list_lock);
 	spin_lock_init(&adapter->cloud_filter_list_lock);
+	spin_lock_init(&adapter->fdir_fltr_lock);
 
 	INIT_LIST_HEAD(&adapter->mac_filter_list);
 	INIT_LIST_HEAD(&adapter->vlan_filter_list);
 	INIT_LIST_HEAD(&adapter->cloud_filter_list);
+	INIT_LIST_HEAD(&adapter->fdir_list_head);
 
 	INIT_WORK(&adapter->reset_task, iavf_reset_task);
 	INIT_WORK(&adapter->adminq_task, iavf_adminq_task);
@@ -3845,6 +3864,7 @@ static void iavf_remove(struct pci_dev *pdev)
 {
 	struct net_device *netdev = pci_get_drvdata(pdev);
 	struct iavf_adapter *adapter = netdev_priv(netdev);
+	struct iavf_fdir_fltr *fdir, *fdirtmp;
 	struct iavf_vlan_filter *vlf, *vlftmp;
 	struct iavf_mac_filter *f, *ftmp;
 	struct iavf_cloud_filter *cf, *cftmp;
@@ -3926,6 +3946,13 @@ static void iavf_remove(struct pci_dev *pdev)
 	}
 	spin_unlock_bh(&adapter->cloud_filter_list_lock);
 
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+	list_for_each_entry_safe(fdir, fdirtmp, &adapter->fdir_list_head, list) {
+		list_del(&fdir->list);
+		kfree(fdir);
+	}
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+
 	free_netdev(netdev);
 
 	pci_disable_pcie_error_reporting(pdev);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index 647e7fde11b4..9b8a5b700cff 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -140,6 +140,7 @@ int iavf_send_vf_config_msg(struct iavf_adapter *adapter)
 	       VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM |
 	       VIRTCHNL_VF_OFFLOAD_REQ_QUEUES |
 	       VIRTCHNL_VF_OFFLOAD_ADQ |
+	       VIRTCHNL_VF_OFFLOAD_FDIR_PF |
 	       VIRTCHNL_VF_CAP_ADV_LINK_SPEED;
 
 	adapter->current_op = VIRTCHNL_OP_GET_VF_RESOURCES;
@@ -1197,6 +1198,101 @@ void iavf_del_cloud_filter(struct iavf_adapter *adapter)
 	kfree(f);
 }
 
+/**
+ * iavf_add_fdir_filter
+ * @adapter: the VF adapter structure
+ *
+ * Request that the PF add Flow Director filters as specified
+ * by the user via ethtool.
+ **/
+void iavf_add_fdir_filter(struct iavf_adapter *adapter)
+{
+	struct iavf_fdir_fltr *fdir;
+	struct virtchnl_fdir_add *f;
+	bool process_fltr = false;
+	int len;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot add Flow Director filter, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+
+	len = sizeof(struct virtchnl_fdir_add);
+	f = kzalloc(len, GFP_KERNEL);
+	if (!f)
+		return;
+
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+	list_for_each_entry(fdir, &adapter->fdir_list_head, list) {
+		if (fdir->state == IAVF_FDIR_FLTR_ADD_REQUEST) {
+			process_fltr = true;
+			fdir->state = IAVF_FDIR_FLTR_ADD_PENDING;
+			memcpy(f, &fdir->vc_add_msg, len);
+			break;
+		}
+	}
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+
+	if (!process_fltr) {
+		/* prevent iavf_add_fdir_filter() from being called when there
+		 * are no filters to add
+		 */
+		adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_FDIR_FILTER;
+		kfree(f);
+		return;
+	}
+	adapter->current_op = VIRTCHNL_OP_ADD_FDIR_FILTER;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_FDIR_FILTER, (u8 *)f, len);
+	kfree(f);
+}
+
+/**
+ * iavf_del_fdir_filter
+ * @adapter: the VF adapter structure
+ *
+ * Request that the PF delete Flow Director filters as specified
+ * by the user via ethtool.
+ **/
+void iavf_del_fdir_filter(struct iavf_adapter *adapter)
+{
+	struct iavf_fdir_fltr *fdir;
+	struct virtchnl_fdir_del f;
+	bool process_fltr = false;
+	int len;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot remove Flow Director filter, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+
+	len = sizeof(struct virtchnl_fdir_del);
+
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+	list_for_each_entry(fdir, &adapter->fdir_list_head, list) {
+		if (fdir->state == IAVF_FDIR_FLTR_DEL_REQUEST) {
+			process_fltr = true;
+			memset(&f, 0, len);
+			f.vsi_id = fdir->vc_add_msg.vsi_id;
+			f.flow_id = fdir->flow_id;
+			fdir->state = IAVF_FDIR_FLTR_DEL_PENDING;
+			break;
+		}
+	}
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+
+	if (!process_fltr) {
+		adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_FDIR_FILTER;
+		return;
+	}
+
+	adapter->current_op = VIRTCHNL_OP_DEL_FDIR_FILTER;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_DEL_FDIR_FILTER, (u8 *)&f, len);
+}
+
 /**
  * iavf_request_reset
  * @adapter: adapter structure
@@ -1357,6 +1453,48 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
 			}
 			}
 			break;
+		case VIRTCHNL_OP_ADD_FDIR_FILTER: {
+			struct iavf_fdir_fltr *fdir, *fdir_tmp;
+
+			spin_lock_bh(&adapter->fdir_fltr_lock);
+			list_for_each_entry_safe(fdir, fdir_tmp,
+						 &adapter->fdir_list_head,
+						 list) {
+				if (fdir->state == IAVF_FDIR_FLTR_ADD_PENDING) {
+					dev_info(&adapter->pdev->dev, "Failed to add Flow Director filter, error %s\n",
+						 iavf_stat_str(&adapter->hw,
+							       v_retval));
+					if (msglen)
+						dev_err(&adapter->pdev->dev,
+							"%s\n", msg);
+					list_del(&fdir->list);
+					kfree(fdir);
+					adapter->fdir_active_fltr--;
+				}
+			}
+			spin_unlock_bh(&adapter->fdir_fltr_lock);
+			}
+			break;
+		case VIRTCHNL_OP_DEL_FDIR_FILTER: {
+			struct iavf_fdir_fltr *fdir;
+
+			spin_lock_bh(&adapter->fdir_fltr_lock);
+			list_for_each_entry(fdir, &adapter->fdir_list_head,
+					    list) {
+				if (fdir->state == IAVF_FDIR_FLTR_DEL_PENDING) {
+					fdir->state = IAVF_FDIR_FLTR_ACTIVE;
+					dev_info(&adapter->pdev->dev, "Failed to del Flow Director filter, error %s\n",
+						 iavf_stat_str(&adapter->hw,
+							       v_retval));
+				}
+			}
+			spin_unlock_bh(&adapter->fdir_fltr_lock);
+			}
+			break;
+		case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
+		case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING:
+			dev_warn(&adapter->pdev->dev, "Changing VLAN Stripping is not allowed when Port VLAN is configured\n");
+			break;
 		default:
 			dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n",
 				v_retval, iavf_stat_str(&adapter->hw, v_retval),
@@ -1490,6 +1628,52 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
 		}
 		}
 		break;
+	case VIRTCHNL_OP_ADD_FDIR_FILTER: {
+		struct virtchnl_fdir_add *add_fltr = (struct virtchnl_fdir_add *)msg;
+		struct iavf_fdir_fltr *fdir, *fdir_tmp;
+
+		spin_lock_bh(&adapter->fdir_fltr_lock);
+		list_for_each_entry_safe(fdir, fdir_tmp,
+					 &adapter->fdir_list_head,
+					 list) {
+			if (fdir->state == IAVF_FDIR_FLTR_ADD_PENDING) {
+				if (add_fltr->status == VIRTCHNL_FDIR_SUCCESS) {
+					fdir->state = IAVF_FDIR_FLTR_ACTIVE;
+					fdir->flow_id = add_fltr->flow_id;
+				} else {
+					dev_info(&adapter->pdev->dev, "Failed to add Flow Director filter with status: %d\n",
+						 add_fltr->status);
+					list_del(&fdir->list);
+					kfree(fdir);
+					adapter->fdir_active_fltr--;
+				}
+			}
+		}
+		spin_unlock_bh(&adapter->fdir_fltr_lock);
+		}
+		break;
+	case VIRTCHNL_OP_DEL_FDIR_FILTER: {
+		struct virtchnl_fdir_del *del_fltr = (struct virtchnl_fdir_del *)msg;
+		struct iavf_fdir_fltr *fdir, *fdir_tmp;
+
+		spin_lock_bh(&adapter->fdir_fltr_lock);
+		list_for_each_entry_safe(fdir, fdir_tmp, &adapter->fdir_list_head,
+					 list) {
+			if (fdir->state == IAVF_FDIR_FLTR_DEL_PENDING) {
+				if (del_fltr->status == VIRTCHNL_FDIR_SUCCESS) {
+					list_del(&fdir->list);
+					kfree(fdir);
+					adapter->fdir_active_fltr--;
+				} else {
+					fdir->state = IAVF_FDIR_FLTR_ACTIVE;
+					dev_info(&adapter->pdev->dev, "Failed to delete Flow Director filter with status: %d\n",
+						 del_fltr->status);
+				}
+			}
+		}
+		spin_unlock_bh(&adapter->fdir_fltr_lock);
+		}
+		break;
 	default:
 		if (adapter->current_op && (v_opcode != adapter->current_op))
 			dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n",
-- 
cgit v1.2.3


From 527691bf0682d7ddcca77fc17dabd2fa090572ff Mon Sep 17 00:00:00 2001
From: Haiyue Wang <haiyue.wang@intel.com>
Date: Tue, 9 Mar 2021 11:08:12 +0800
Subject: iavf: Support IPv4 Flow Director filters

Support the addition and deletion of IPv4 filters.

Supported fields are: src-ip, dst-ip, src-port, dst-port and l4proto
Supported flow-types are: tcp4, udp4, sctp4, ip4, ah4, esp4

Example usage:
ethtool -N ens787f0v0 flow-type tcp4 src-ip 192.168.0.20 \
  dst-ip 192.168.0.21 tos 4 src-port 22 dst-port 23 action 8

L2TPv3 over IP with 'Session ID' 17:
ethtool -N ens787f0v0 flow-type ip4 l4proto 115 l4data 17 action 3

Signed-off-by: Haiyue Wang <haiyue.wang@intel.com>
Tested-by: Chen Bo <BoX.C.Chen@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/iavf/Makefile        |   2 +-
 drivers/net/ethernet/intel/iavf/iavf_ethtool.c  | 404 ++++++++++++++++++++++
 drivers/net/ethernet/intel/iavf/iavf_fdir.c     | 428 ++++++++++++++++++++++++
 drivers/net/ethernet/intel/iavf/iavf_fdir.h     |  40 +++
 drivers/net/ethernet/intel/iavf/iavf_virtchnl.c |   8 +
 5 files changed, 881 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/intel/iavf/iavf_fdir.c

diff --git a/drivers/net/ethernet/intel/iavf/Makefile b/drivers/net/ethernet/intel/iavf/Makefile
index c997063ed728..121e194ee734 100644
--- a/drivers/net/ethernet/intel/iavf/Makefile
+++ b/drivers/net/ethernet/intel/iavf/Makefile
@@ -11,5 +11,5 @@ subdir-ccflags-y += -I$(src)
 
 obj-$(CONFIG_IAVF) += iavf.o
 
-iavf-objs := iavf_main.o iavf_ethtool.o iavf_virtchnl.o \
+iavf-objs := iavf_main.o iavf_ethtool.o iavf_virtchnl.o iavf_fdir.o \
 	     iavf_txrx.o iavf_common.o iavf_adminq.o iavf_client.o
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
index c93567f4d0f7..edd864f3b717 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -827,6 +827,396 @@ static int iavf_set_per_queue_coalesce(struct net_device *netdev, u32 queue,
 	return __iavf_set_coalesce(netdev, ec, queue);
 }
 
+/**
+ * iavf_fltr_to_ethtool_flow - convert filter type values to ethtool
+ * flow type values
+ * @flow: filter type to be converted
+ *
+ * Returns the corresponding ethtool flow type.
+ */
+static int iavf_fltr_to_ethtool_flow(enum iavf_fdir_flow_type flow)
+{
+	switch (flow) {
+	case IAVF_FDIR_FLOW_IPV4_TCP:
+		return TCP_V4_FLOW;
+	case IAVF_FDIR_FLOW_IPV4_UDP:
+		return UDP_V4_FLOW;
+	case IAVF_FDIR_FLOW_IPV4_SCTP:
+		return SCTP_V4_FLOW;
+	case IAVF_FDIR_FLOW_IPV4_AH:
+		return AH_V4_FLOW;
+	case IAVF_FDIR_FLOW_IPV4_ESP:
+		return ESP_V4_FLOW;
+	case IAVF_FDIR_FLOW_IPV4_OTHER:
+		return IPV4_USER_FLOW;
+	default:
+		/* 0 is undefined ethtool flow */
+		return 0;
+	}
+}
+
+/**
+ * iavf_ethtool_flow_to_fltr - convert ethtool flow type to filter enum
+ * @eth: Ethtool flow type to be converted
+ *
+ * Returns flow enum
+ */
+static enum iavf_fdir_flow_type iavf_ethtool_flow_to_fltr(int eth)
+{
+	switch (eth) {
+	case TCP_V4_FLOW:
+		return IAVF_FDIR_FLOW_IPV4_TCP;
+	case UDP_V4_FLOW:
+		return IAVF_FDIR_FLOW_IPV4_UDP;
+	case SCTP_V4_FLOW:
+		return IAVF_FDIR_FLOW_IPV4_SCTP;
+	case AH_V4_FLOW:
+		return IAVF_FDIR_FLOW_IPV4_AH;
+	case ESP_V4_FLOW:
+		return IAVF_FDIR_FLOW_IPV4_ESP;
+	case IPV4_USER_FLOW:
+		return IAVF_FDIR_FLOW_IPV4_OTHER;
+	default:
+		return IAVF_FDIR_FLOW_NONE;
+	}
+}
+
+/**
+ * iavf_get_ethtool_fdir_entry - fill ethtool structure with Flow Director filter data
+ * @adapter: the VF adapter structure that contains filter list
+ * @cmd: ethtool command data structure to receive the filter data
+ *
+ * Returns 0 as expected for success by ethtool
+ */
+static int
+iavf_get_ethtool_fdir_entry(struct iavf_adapter *adapter,
+			    struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct iavf_fdir_fltr *rule = NULL;
+	int ret = 0;
+
+	if (!FDIR_FLTR_SUPPORT(adapter))
+		return -EOPNOTSUPP;
+
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+
+	rule = iavf_find_fdir_fltr_by_loc(adapter, fsp->location);
+	if (!rule) {
+		ret = -EINVAL;
+		goto release_lock;
+	}
+
+	fsp->flow_type = iavf_fltr_to_ethtool_flow(rule->flow_type);
+
+	memset(&fsp->m_u, 0, sizeof(fsp->m_u));
+	memset(&fsp->m_ext, 0, sizeof(fsp->m_ext));
+
+	switch (fsp->flow_type) {
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+	case SCTP_V4_FLOW:
+		fsp->h_u.tcp_ip4_spec.ip4src = rule->ip_data.v4_addrs.src_ip;
+		fsp->h_u.tcp_ip4_spec.ip4dst = rule->ip_data.v4_addrs.dst_ip;
+		fsp->h_u.tcp_ip4_spec.psrc = rule->ip_data.src_port;
+		fsp->h_u.tcp_ip4_spec.pdst = rule->ip_data.dst_port;
+		fsp->h_u.tcp_ip4_spec.tos = rule->ip_data.tos;
+		fsp->m_u.tcp_ip4_spec.ip4src = rule->ip_mask.v4_addrs.src_ip;
+		fsp->m_u.tcp_ip4_spec.ip4dst = rule->ip_mask.v4_addrs.dst_ip;
+		fsp->m_u.tcp_ip4_spec.psrc = rule->ip_mask.src_port;
+		fsp->m_u.tcp_ip4_spec.pdst = rule->ip_mask.dst_port;
+		fsp->m_u.tcp_ip4_spec.tos = rule->ip_mask.tos;
+		break;
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+		fsp->h_u.ah_ip4_spec.ip4src = rule->ip_data.v4_addrs.src_ip;
+		fsp->h_u.ah_ip4_spec.ip4dst = rule->ip_data.v4_addrs.dst_ip;
+		fsp->h_u.ah_ip4_spec.spi = rule->ip_data.spi;
+		fsp->h_u.ah_ip4_spec.tos = rule->ip_data.tos;
+		fsp->m_u.ah_ip4_spec.ip4src = rule->ip_mask.v4_addrs.src_ip;
+		fsp->m_u.ah_ip4_spec.ip4dst = rule->ip_mask.v4_addrs.dst_ip;
+		fsp->m_u.ah_ip4_spec.spi = rule->ip_mask.spi;
+		fsp->m_u.ah_ip4_spec.tos = rule->ip_mask.tos;
+		break;
+	case IPV4_USER_FLOW:
+		fsp->h_u.usr_ip4_spec.ip4src = rule->ip_data.v4_addrs.src_ip;
+		fsp->h_u.usr_ip4_spec.ip4dst = rule->ip_data.v4_addrs.dst_ip;
+		fsp->h_u.usr_ip4_spec.l4_4_bytes = rule->ip_data.l4_header;
+		fsp->h_u.usr_ip4_spec.tos = rule->ip_data.tos;
+		fsp->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4;
+		fsp->h_u.usr_ip4_spec.proto = rule->ip_data.proto;
+		fsp->m_u.usr_ip4_spec.ip4src = rule->ip_mask.v4_addrs.src_ip;
+		fsp->m_u.usr_ip4_spec.ip4dst = rule->ip_mask.v4_addrs.dst_ip;
+		fsp->m_u.usr_ip4_spec.l4_4_bytes = rule->ip_mask.l4_header;
+		fsp->m_u.usr_ip4_spec.tos = rule->ip_mask.tos;
+		fsp->m_u.usr_ip4_spec.ip_ver = 0xFF;
+		fsp->m_u.usr_ip4_spec.proto = rule->ip_mask.proto;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	if (rule->action == VIRTCHNL_ACTION_DROP)
+		fsp->ring_cookie = RX_CLS_FLOW_DISC;
+	else
+		fsp->ring_cookie = rule->q_index;
+
+release_lock:
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+	return ret;
+}
+
+/**
+ * iavf_get_fdir_fltr_ids - fill buffer with filter IDs of active filters
+ * @adapter: the VF adapter structure containing the filter list
+ * @cmd: ethtool command data structure
+ * @rule_locs: ethtool array passed in from OS to receive filter IDs
+ *
+ * Returns 0 as expected for success by ethtool
+ */
+static int
+iavf_get_fdir_fltr_ids(struct iavf_adapter *adapter, struct ethtool_rxnfc *cmd,
+		       u32 *rule_locs)
+{
+	struct iavf_fdir_fltr *fltr;
+	unsigned int cnt = 0;
+	int val = 0;
+
+	if (!FDIR_FLTR_SUPPORT(adapter))
+		return -EOPNOTSUPP;
+
+	cmd->data = IAVF_MAX_FDIR_FILTERS;
+
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+
+	list_for_each_entry(fltr, &adapter->fdir_list_head, list) {
+		if (cnt == cmd->rule_cnt) {
+			val = -EMSGSIZE;
+			goto release_lock;
+		}
+		rule_locs[cnt] = fltr->loc;
+		cnt++;
+	}
+
+release_lock:
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+	if (!val)
+		cmd->rule_cnt = cnt;
+
+	return val;
+}
+
+/**
+ * iavf_add_fdir_fltr_info - Set the input set for Flow Director filter
+ * @adapter: pointer to the VF adapter structure
+ * @fsp: pointer to ethtool Rx flow specification
+ * @fltr: filter structure
+ */
+static int
+iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spec *fsp,
+			struct iavf_fdir_fltr *fltr)
+{
+	u32 flow_type, q_index = 0;
+	enum virtchnl_action act;
+
+	if (fsp->ring_cookie == RX_CLS_FLOW_DISC) {
+		act = VIRTCHNL_ACTION_DROP;
+	} else {
+		q_index = fsp->ring_cookie;
+		if (q_index >= adapter->num_active_queues)
+			return -EINVAL;
+
+		act = VIRTCHNL_ACTION_QUEUE;
+	}
+
+	fltr->action = act;
+	fltr->loc = fsp->location;
+	fltr->q_index = q_index;
+
+	flow_type = fsp->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS);
+	fltr->flow_type = iavf_ethtool_flow_to_fltr(flow_type);
+
+	switch (flow_type) {
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+	case SCTP_V4_FLOW:
+		fltr->ip_data.v4_addrs.src_ip = fsp->h_u.tcp_ip4_spec.ip4src;
+		fltr->ip_data.v4_addrs.dst_ip = fsp->h_u.tcp_ip4_spec.ip4dst;
+		fltr->ip_data.src_port = fsp->h_u.tcp_ip4_spec.psrc;
+		fltr->ip_data.dst_port = fsp->h_u.tcp_ip4_spec.pdst;
+		fltr->ip_data.tos = fsp->h_u.tcp_ip4_spec.tos;
+		fltr->ip_mask.v4_addrs.src_ip = fsp->m_u.tcp_ip4_spec.ip4src;
+		fltr->ip_mask.v4_addrs.dst_ip = fsp->m_u.tcp_ip4_spec.ip4dst;
+		fltr->ip_mask.src_port = fsp->m_u.tcp_ip4_spec.psrc;
+		fltr->ip_mask.dst_port = fsp->m_u.tcp_ip4_spec.pdst;
+		fltr->ip_mask.tos = fsp->m_u.tcp_ip4_spec.tos;
+		break;
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+		fltr->ip_data.v4_addrs.src_ip = fsp->h_u.ah_ip4_spec.ip4src;
+		fltr->ip_data.v4_addrs.dst_ip = fsp->h_u.ah_ip4_spec.ip4dst;
+		fltr->ip_data.spi = fsp->h_u.ah_ip4_spec.spi;
+		fltr->ip_data.tos = fsp->h_u.ah_ip4_spec.tos;
+		fltr->ip_mask.v4_addrs.src_ip = fsp->m_u.ah_ip4_spec.ip4src;
+		fltr->ip_mask.v4_addrs.dst_ip = fsp->m_u.ah_ip4_spec.ip4dst;
+		fltr->ip_mask.spi = fsp->m_u.ah_ip4_spec.spi;
+		fltr->ip_mask.tos = fsp->m_u.ah_ip4_spec.tos;
+		break;
+	case IPV4_USER_FLOW:
+		fltr->ip_data.v4_addrs.src_ip = fsp->h_u.usr_ip4_spec.ip4src;
+		fltr->ip_data.v4_addrs.dst_ip = fsp->h_u.usr_ip4_spec.ip4dst;
+		fltr->ip_data.l4_header = fsp->h_u.usr_ip4_spec.l4_4_bytes;
+		fltr->ip_data.tos = fsp->h_u.usr_ip4_spec.tos;
+		fltr->ip_data.proto = fsp->h_u.usr_ip4_spec.proto;
+		fltr->ip_mask.v4_addrs.src_ip = fsp->m_u.usr_ip4_spec.ip4src;
+		fltr->ip_mask.v4_addrs.dst_ip = fsp->m_u.usr_ip4_spec.ip4dst;
+		fltr->ip_mask.l4_header = fsp->m_u.usr_ip4_spec.l4_4_bytes;
+		fltr->ip_mask.tos = fsp->m_u.usr_ip4_spec.tos;
+		fltr->ip_mask.proto = fsp->m_u.usr_ip4_spec.proto;
+		break;
+	default:
+		/* not doing un-parsed flow types */
+		return -EINVAL;
+	}
+
+	if (iavf_fdir_is_dup_fltr(adapter, fltr))
+		return -EEXIST;
+
+	return iavf_fill_fdir_add_msg(adapter, fltr);
+}
+
+/**
+ * iavf_add_fdir_ethtool - add Flow Director filter
+ * @adapter: pointer to the VF adapter structure
+ * @cmd: command to add Flow Director filter
+ *
+ * Returns 0 on success and negative values for failure
+ */
+static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp = &cmd->fs;
+	struct iavf_fdir_fltr *fltr;
+	int count = 50;
+	int err;
+
+	if (!FDIR_FLTR_SUPPORT(adapter))
+		return -EOPNOTSUPP;
+
+	if (fsp->flow_type & FLOW_MAC_EXT)
+		return -EINVAL;
+
+	if (adapter->fdir_active_fltr >= IAVF_MAX_FDIR_FILTERS) {
+		dev_err(&adapter->pdev->dev,
+			"Unable to add Flow Director filter because VF reached the limit of max allowed filters (%u)\n",
+			IAVF_MAX_FDIR_FILTERS);
+		return -ENOSPC;
+	}
+
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+	if (iavf_find_fdir_fltr_by_loc(adapter, fsp->location)) {
+		dev_err(&adapter->pdev->dev, "Failed to add Flow Director filter, it already exists\n");
+		spin_unlock_bh(&adapter->fdir_fltr_lock);
+		return -EEXIST;
+	}
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+
+	fltr = kzalloc(sizeof(*fltr), GFP_KERNEL);
+	if (!fltr)
+		return -ENOMEM;
+
+	while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK,
+				&adapter->crit_section)) {
+		if (--count == 0) {
+			kfree(fltr);
+			return -EINVAL;
+		}
+		udelay(1);
+	}
+
+	err = iavf_add_fdir_fltr_info(adapter, fsp, fltr);
+	if (err)
+		goto ret;
+
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+	iavf_fdir_list_add_fltr(adapter, fltr);
+	adapter->fdir_active_fltr++;
+	fltr->state = IAVF_FDIR_FLTR_ADD_REQUEST;
+	adapter->aq_required |= IAVF_FLAG_AQ_ADD_FDIR_FILTER;
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+
+	mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0);
+
+ret:
+	if (err && fltr)
+		kfree(fltr);
+
+	clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+	return err;
+}
+
+/**
+ * iavf_del_fdir_ethtool - delete Flow Director filter
+ * @adapter: pointer to the VF adapter structure
+ * @cmd: command to delete Flow Director filter
+ *
+ * Returns 0 on success and negative values for failure
+ */
+static int iavf_del_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct iavf_fdir_fltr *fltr = NULL;
+	int err = 0;
+
+	if (!FDIR_FLTR_SUPPORT(adapter))
+		return -EOPNOTSUPP;
+
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+	fltr = iavf_find_fdir_fltr_by_loc(adapter, fsp->location);
+	if (fltr) {
+		if (fltr->state == IAVF_FDIR_FLTR_ACTIVE) {
+			fltr->state = IAVF_FDIR_FLTR_DEL_REQUEST;
+			adapter->aq_required |= IAVF_FLAG_AQ_DEL_FDIR_FILTER;
+		} else {
+			err = -EBUSY;
+		}
+	} else if (adapter->fdir_active_fltr) {
+		err = -EINVAL;
+	}
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+
+	if (fltr && fltr->state == IAVF_FDIR_FLTR_DEL_REQUEST)
+		mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0);
+
+	return err;
+}
+
+/**
+ * iavf_set_rxnfc - command to set Rx flow rules.
+ * @netdev: network interface device structure
+ * @cmd: ethtool rxnfc command
+ *
+ * Returns 0 for success and negative values for errors
+ */
+static int iavf_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXCLSRLINS:
+		ret = iavf_add_fdir_ethtool(adapter, cmd);
+		break;
+	case ETHTOOL_SRXCLSRLDEL:
+		ret = iavf_del_fdir_ethtool(adapter, cmd);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
 /**
  * iavf_get_rxnfc - command to get RX flow classification rules
  * @netdev: network interface device structure
@@ -846,6 +1236,19 @@ static int iavf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
 		cmd->data = adapter->num_active_queues;
 		ret = 0;
 		break;
+	case ETHTOOL_GRXCLSRLCNT:
+		if (!FDIR_FLTR_SUPPORT(adapter))
+			break;
+		cmd->rule_cnt = adapter->fdir_active_fltr;
+		cmd->data = IAVF_MAX_FDIR_FILTERS;
+		ret = 0;
+		break;
+	case ETHTOOL_GRXCLSRULE:
+		ret = iavf_get_ethtool_fdir_entry(adapter, cmd);
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		ret = iavf_get_fdir_fltr_ids(adapter, cmd, (u32 *)rule_locs);
+		break;
 	case ETHTOOL_GRXFH:
 		netdev_info(netdev,
 			    "RSS hash info is not available to vf, use pf.\n");
@@ -1025,6 +1428,7 @@ static const struct ethtool_ops iavf_ethtool_ops = {
 	.set_coalesce		= iavf_set_coalesce,
 	.get_per_queue_coalesce = iavf_get_per_queue_coalesce,
 	.set_per_queue_coalesce = iavf_set_per_queue_coalesce,
+	.set_rxnfc		= iavf_set_rxnfc,
 	.get_rxnfc		= iavf_get_rxnfc,
 	.get_rxfh_indir_size	= iavf_get_rxfh_indir_size,
 	.get_rxfh		= iavf_get_rxfh,
diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.c b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
new file mode 100644
index 000000000000..69de6a45f5f0
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
@@ -0,0 +1,428 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Intel Corporation. */
+
+/* flow director ethtool support for iavf */
+
+#include "iavf.h"
+
+/**
+ * iavf_fill_fdir_ip4_hdr - fill the IPv4 protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the IPv4 protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_ip4_hdr(struct iavf_fdir_fltr *fltr,
+		       struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	struct iphdr *iph = (struct iphdr *)hdr->buffer;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, IPV4);
+
+	if (fltr->ip_mask.tos == U8_MAX) {
+		iph->tos = fltr->ip_data.tos;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, DSCP);
+	}
+
+	if (fltr->ip_mask.proto == U8_MAX) {
+		iph->protocol = fltr->ip_data.proto;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, PROT);
+	}
+
+	if (fltr->ip_mask.v4_addrs.src_ip == htonl(U32_MAX)) {
+		iph->saddr = fltr->ip_data.v4_addrs.src_ip;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, SRC);
+	}
+
+	if (fltr->ip_mask.v4_addrs.dst_ip == htonl(U32_MAX)) {
+		iph->daddr = fltr->ip_data.v4_addrs.dst_ip;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, DST);
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_tcp_hdr - fill the TCP protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the TCP protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_tcp_hdr(struct iavf_fdir_fltr *fltr,
+		       struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	struct tcphdr *tcph = (struct tcphdr *)hdr->buffer;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, TCP);
+
+	if (fltr->ip_mask.src_port == htons(U16_MAX)) {
+		tcph->source = fltr->ip_data.src_port;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, TCP, SRC_PORT);
+	}
+
+	if (fltr->ip_mask.dst_port == htons(U16_MAX)) {
+		tcph->dest = fltr->ip_data.dst_port;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, TCP, DST_PORT);
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_udp_hdr - fill the UDP protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the UDP protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_udp_hdr(struct iavf_fdir_fltr *fltr,
+		       struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	struct udphdr *udph = (struct udphdr *)hdr->buffer;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, UDP);
+
+	if (fltr->ip_mask.src_port == htons(U16_MAX)) {
+		udph->source = fltr->ip_data.src_port;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, UDP, SRC_PORT);
+	}
+
+	if (fltr->ip_mask.dst_port == htons(U16_MAX)) {
+		udph->dest = fltr->ip_data.dst_port;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, UDP, DST_PORT);
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_sctp_hdr - fill the SCTP protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the SCTP protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_sctp_hdr(struct iavf_fdir_fltr *fltr,
+			struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	struct sctphdr *sctph = (struct sctphdr *)hdr->buffer;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, SCTP);
+
+	if (fltr->ip_mask.src_port == htons(U16_MAX)) {
+		sctph->source = fltr->ip_data.src_port;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, SCTP, SRC_PORT);
+	}
+
+	if (fltr->ip_mask.dst_port == htons(U16_MAX)) {
+		sctph->dest = fltr->ip_data.dst_port;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, SCTP, DST_PORT);
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_ah_hdr - fill the AH protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the AH protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_ah_hdr(struct iavf_fdir_fltr *fltr,
+		      struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	struct ip_auth_hdr *ah = (struct ip_auth_hdr *)hdr->buffer;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, AH);
+
+	if (fltr->ip_mask.spi == htonl(U32_MAX)) {
+		ah->spi = fltr->ip_data.spi;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, AH, SPI);
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_esp_hdr - fill the ESP protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the ESP protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_esp_hdr(struct iavf_fdir_fltr *fltr,
+		       struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	struct ip_esp_hdr *esph = (struct ip_esp_hdr *)hdr->buffer;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, ESP);
+
+	if (fltr->ip_mask.spi == htonl(U32_MAX)) {
+		esph->spi = fltr->ip_data.spi;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, ESP, SPI);
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_l4_hdr - fill the L4 protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the L4 protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_l4_hdr(struct iavf_fdir_fltr *fltr,
+		      struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *hdr;
+	__be32 *l4_4_data;
+
+	if (!fltr->ip_mask.proto) /* IPv4/IPv6 header only */
+		return 0;
+
+	hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	l4_4_data = (__be32 *)hdr->buffer;
+
+	/* L2TPv3 over IP with 'Session ID' */
+	if (fltr->ip_data.proto == 115 && fltr->ip_mask.l4_header == htonl(U32_MAX)) {
+		VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, L2TPV3);
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, L2TPV3, SESS_ID);
+
+		*l4_4_data = fltr->ip_data.l4_header;
+	} else {
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_eth_hdr - fill the Ethernet protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the Ethernet protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_eth_hdr(struct iavf_fdir_fltr *fltr,
+		       struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, ETH);
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_add_msg - fill the Flow Director filter into virtchnl message
+ * @adapter: pointer to the VF adapter structure
+ * @fltr: Flow Director filter data structure
+ *
+ * Returns 0 if the add Flow Director virtchnl message is filled successfully
+ */
+int iavf_fill_fdir_add_msg(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr)
+{
+	struct virtchnl_fdir_add *vc_msg = &fltr->vc_add_msg;
+	struct virtchnl_proto_hdrs *proto_hdrs;
+	int err;
+
+	proto_hdrs = &vc_msg->rule_cfg.proto_hdrs;
+
+	err = iavf_fill_fdir_eth_hdr(fltr, proto_hdrs); /* L2 always exists */
+	if (err)
+		return err;
+
+	switch (fltr->flow_type) {
+	case IAVF_FDIR_FLOW_IPV4_TCP:
+		err = iavf_fill_fdir_ip4_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_tcp_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV4_UDP:
+		err = iavf_fill_fdir_ip4_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_udp_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV4_SCTP:
+		err = iavf_fill_fdir_ip4_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_sctp_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV4_AH:
+		err = iavf_fill_fdir_ip4_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_ah_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV4_ESP:
+		err = iavf_fill_fdir_ip4_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_esp_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV4_OTHER:
+		err = iavf_fill_fdir_ip4_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_l4_hdr(fltr, proto_hdrs);
+		break;
+	default:
+		err = -EINVAL;
+		break;
+	}
+
+	if (err)
+		return err;
+
+	vc_msg->vsi_id = adapter->vsi.id;
+	vc_msg->rule_cfg.action_set.count = 1;
+	vc_msg->rule_cfg.action_set.actions[0].type = fltr->action;
+	vc_msg->rule_cfg.action_set.actions[0].act_conf.queue.index = fltr->q_index;
+
+	return 0;
+}
+
+/**
+ * iavf_fdir_flow_proto_name - get the flow protocol name
+ * @flow_type: Flow Director filter flow type
+ **/
+static const char *iavf_fdir_flow_proto_name(enum iavf_fdir_flow_type flow_type)
+{
+	switch (flow_type) {
+	case IAVF_FDIR_FLOW_IPV4_TCP:
+		return "TCP";
+	case IAVF_FDIR_FLOW_IPV4_UDP:
+		return "UDP";
+	case IAVF_FDIR_FLOW_IPV4_SCTP:
+		return "SCTP";
+	case IAVF_FDIR_FLOW_IPV4_AH:
+		return "AH";
+	case IAVF_FDIR_FLOW_IPV4_ESP:
+		return "ESP";
+	case IAVF_FDIR_FLOW_IPV4_OTHER:
+		return "Other";
+	default:
+		return NULL;
+	}
+}
+
+/**
+ * iavf_print_fdir_fltr
+ * @adapter: adapter structure
+ * @fltr: Flow Director filter to print
+ *
+ * Print the Flow Director filter
+ **/
+void iavf_print_fdir_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr)
+{
+	const char *proto = iavf_fdir_flow_proto_name(fltr->flow_type);
+
+	if (!proto)
+		return;
+
+	switch (fltr->flow_type) {
+	case IAVF_FDIR_FLOW_IPV4_TCP:
+	case IAVF_FDIR_FLOW_IPV4_UDP:
+	case IAVF_FDIR_FLOW_IPV4_SCTP:
+		dev_info(&adapter->pdev->dev, "Rule ID: %u dst_ip: %pI4 src_ip %pI4 %s: dst_port %hu src_port %hu\n",
+			 fltr->loc,
+			 &fltr->ip_data.v4_addrs.dst_ip,
+			 &fltr->ip_data.v4_addrs.src_ip,
+			 proto,
+			 ntohs(fltr->ip_data.dst_port),
+			 ntohs(fltr->ip_data.src_port));
+		break;
+	case IAVF_FDIR_FLOW_IPV4_AH:
+	case IAVF_FDIR_FLOW_IPV4_ESP:
+		dev_info(&adapter->pdev->dev, "Rule ID: %u dst_ip: %pI4 src_ip %pI4 %s: SPI %u\n",
+			 fltr->loc,
+			 &fltr->ip_data.v4_addrs.dst_ip,
+			 &fltr->ip_data.v4_addrs.src_ip,
+			 proto,
+			 ntohl(fltr->ip_data.spi));
+		break;
+	case IAVF_FDIR_FLOW_IPV4_OTHER:
+		dev_info(&adapter->pdev->dev, "Rule ID: %u dst_ip: %pI4 src_ip %pI4 proto: %u L4_bytes: 0x%x\n",
+			 fltr->loc,
+			 &fltr->ip_data.v4_addrs.dst_ip,
+			 &fltr->ip_data.v4_addrs.src_ip,
+			 fltr->ip_data.proto,
+			 ntohl(fltr->ip_data.l4_header));
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * iavf_fdir_is_dup_fltr - test if filter is already in list
+ * @adapter: pointer to the VF adapter structure
+ * @fltr: Flow Director filter data structure
+ *
+ * Returns true if the filter is found in the list
+ */
+bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr)
+{
+	struct iavf_fdir_fltr *tmp;
+	bool ret = false;
+
+	list_for_each_entry(tmp, &adapter->fdir_list_head, list) {
+		if (tmp->flow_type != fltr->flow_type)
+			continue;
+
+		if (!memcmp(&tmp->ip_data, &fltr->ip_data,
+			    sizeof(fltr->ip_data))) {
+			ret = true;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * iavf_find_fdir_fltr_by_loc - find filter with location
+ * @adapter: pointer to the VF adapter structure
+ * @loc: location to find.
+ *
+ * Returns pointer to Flow Director filter if found or null
+ */
+struct iavf_fdir_fltr *iavf_find_fdir_fltr_by_loc(struct iavf_adapter *adapter, u32 loc)
+{
+	struct iavf_fdir_fltr *rule;
+
+	list_for_each_entry(rule, &adapter->fdir_list_head, list)
+		if (rule->loc == loc)
+			return rule;
+
+	return NULL;
+}
+
+/**
+ * iavf_fdir_list_add_fltr - add a new node to the flow director filter list
+ * @adapter: pointer to the VF adapter structure
+ * @fltr: filter node to add to structure
+ */
+void iavf_fdir_list_add_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr)
+{
+	struct iavf_fdir_fltr *rule, *parent = NULL;
+
+	list_for_each_entry(rule, &adapter->fdir_list_head, list) {
+		if (rule->loc >= fltr->loc)
+			break;
+		parent = rule;
+	}
+
+	if (parent)
+		list_add(&fltr->list, &parent->list);
+	else
+		list_add(&fltr->list, &adapter->fdir_list_head);
+}
diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.h b/drivers/net/ethernet/intel/iavf/iavf_fdir.h
index 4e6494b14016..bce913c2541d 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_fdir.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.h
@@ -15,13 +15,53 @@ enum iavf_fdir_fltr_state_t {
 	IAVF_FDIR_FLTR_ACTIVE,		/* Filter is active */
 };
 
+enum iavf_fdir_flow_type {
+	/* NONE - used for undef/error */
+	IAVF_FDIR_FLOW_NONE = 0,
+	IAVF_FDIR_FLOW_IPV4_TCP,
+	IAVF_FDIR_FLOW_IPV4_UDP,
+	IAVF_FDIR_FLOW_IPV4_SCTP,
+	IAVF_FDIR_FLOW_IPV4_AH,
+	IAVF_FDIR_FLOW_IPV4_ESP,
+	IAVF_FDIR_FLOW_IPV4_OTHER,
+	/* MAX - this must be last and add anything new just above it */
+	IAVF_FDIR_FLOW_PTYPE_MAX,
+};
+
+struct iavf_ipv4_addrs {
+	__be32 src_ip;
+	__be32 dst_ip;
+};
+
+struct iavf_fdir_ip {
+	union {
+		struct iavf_ipv4_addrs v4_addrs;
+	};
+	__be16 src_port;
+	__be16 dst_port;
+	__be32 l4_header;	/* first 4 bytes of the layer 4 header */
+	__be32 spi;		/* security parameter index for AH/ESP */
+	union {
+		u8 tos;
+	};
+	u8 proto;
+};
 /* bookkeeping of Flow Director filters */
 struct iavf_fdir_fltr {
 	enum iavf_fdir_fltr_state_t state;
 	struct list_head list;
 
+	enum iavf_fdir_flow_type flow_type;
+
+	struct iavf_fdir_ip ip_data;
+	struct iavf_fdir_ip ip_mask;
+
+	enum virtchnl_action action;
 	u32 flow_id;
 
+	u32 loc;	/* Rule location inside the flow table */
+	u32 q_index;
+
 	struct virtchnl_fdir_add vc_add_msg;
 };
 
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index 9b8a5b700cff..43a4d4ef415f 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -1464,6 +1464,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
 					dev_info(&adapter->pdev->dev, "Failed to add Flow Director filter, error %s\n",
 						 iavf_stat_str(&adapter->hw,
 							       v_retval));
+					iavf_print_fdir_fltr(adapter, fdir);
 					if (msglen)
 						dev_err(&adapter->pdev->dev,
 							"%s\n", msg);
@@ -1486,6 +1487,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
 					dev_info(&adapter->pdev->dev, "Failed to del Flow Director filter, error %s\n",
 						 iavf_stat_str(&adapter->hw,
 							       v_retval));
+					iavf_print_fdir_fltr(adapter, fdir);
 				}
 			}
 			spin_unlock_bh(&adapter->fdir_fltr_lock);
@@ -1638,11 +1640,14 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
 					 list) {
 			if (fdir->state == IAVF_FDIR_FLTR_ADD_PENDING) {
 				if (add_fltr->status == VIRTCHNL_FDIR_SUCCESS) {
+					dev_info(&adapter->pdev->dev, "Flow Director filter with location %u is added\n",
+						 fdir->loc);
 					fdir->state = IAVF_FDIR_FLTR_ACTIVE;
 					fdir->flow_id = add_fltr->flow_id;
 				} else {
 					dev_info(&adapter->pdev->dev, "Failed to add Flow Director filter with status: %d\n",
 						 add_fltr->status);
+					iavf_print_fdir_fltr(adapter, fdir);
 					list_del(&fdir->list);
 					kfree(fdir);
 					adapter->fdir_active_fltr--;
@@ -1661,6 +1666,8 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
 					 list) {
 			if (fdir->state == IAVF_FDIR_FLTR_DEL_PENDING) {
 				if (del_fltr->status == VIRTCHNL_FDIR_SUCCESS) {
+					dev_info(&adapter->pdev->dev, "Flow Director filter with location %u is deleted\n",
+						 fdir->loc);
 					list_del(&fdir->list);
 					kfree(fdir);
 					adapter->fdir_active_fltr--;
@@ -1668,6 +1675,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
 					fdir->state = IAVF_FDIR_FLTR_ACTIVE;
 					dev_info(&adapter->pdev->dev, "Failed to delete Flow Director filter with status: %d\n",
 						 del_fltr->status);
+					iavf_print_fdir_fltr(adapter, fdir);
 				}
 			}
 		}
-- 
cgit v1.2.3


From e90cbc257a6f3f9cc2b257acab561b197c708bab Mon Sep 17 00:00:00 2001
From: Haiyue Wang <haiyue.wang@intel.com>
Date: Tue, 9 Mar 2021 11:08:13 +0800
Subject: iavf: Support IPv6 Flow Director filters

Support the addition and deletion of IPv6 filters.

Supported fields are: src-ip, dst-ip, src-port, dst-port and l4proto
Supported flow-types are: tcp6, udp6, sctp6, ip6, ah6, esp6

Example usage:
ethtool -N ens787f0v0 flow-type tcp6 src-ip 2001::2 \
  dst-ip CDCD:910A:2222:5498:8475:1111:3900:2020 \
  tclass 1 src-port 22 dst-port 23 action 7

L2TPv3 over IP with 'Session ID' 17:
ethtool -N ens787f0v0 flow-type ip6 l4proto 115 l4data 17 action 7

Signed-off-by: Haiyue Wang <haiyue.wang@intel.com>
Tested-by: Chen Bo <BoX.C.Chen@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 122 +++++++++++++++++++++++++
 drivers/net/ethernet/intel/iavf/iavf_fdir.c    | 111 ++++++++++++++++++++++
 drivers/net/ethernet/intel/iavf/iavf_fdir.h    |  13 +++
 3 files changed, 246 insertions(+)

diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
index edd864f3b717..8d856f5dc38e 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -849,6 +849,18 @@ static int iavf_fltr_to_ethtool_flow(enum iavf_fdir_flow_type flow)
 		return ESP_V4_FLOW;
 	case IAVF_FDIR_FLOW_IPV4_OTHER:
 		return IPV4_USER_FLOW;
+	case IAVF_FDIR_FLOW_IPV6_TCP:
+		return TCP_V6_FLOW;
+	case IAVF_FDIR_FLOW_IPV6_UDP:
+		return UDP_V6_FLOW;
+	case IAVF_FDIR_FLOW_IPV6_SCTP:
+		return SCTP_V6_FLOW;
+	case IAVF_FDIR_FLOW_IPV6_AH:
+		return AH_V6_FLOW;
+	case IAVF_FDIR_FLOW_IPV6_ESP:
+		return ESP_V6_FLOW;
+	case IAVF_FDIR_FLOW_IPV6_OTHER:
+		return IPV6_USER_FLOW;
 	default:
 		/* 0 is undefined ethtool flow */
 		return 0;
@@ -876,6 +888,18 @@ static enum iavf_fdir_flow_type iavf_ethtool_flow_to_fltr(int eth)
 		return IAVF_FDIR_FLOW_IPV4_ESP;
 	case IPV4_USER_FLOW:
 		return IAVF_FDIR_FLOW_IPV4_OTHER;
+	case TCP_V6_FLOW:
+		return IAVF_FDIR_FLOW_IPV6_TCP;
+	case UDP_V6_FLOW:
+		return IAVF_FDIR_FLOW_IPV6_UDP;
+	case SCTP_V6_FLOW:
+		return IAVF_FDIR_FLOW_IPV6_SCTP;
+	case AH_V6_FLOW:
+		return IAVF_FDIR_FLOW_IPV6_AH;
+	case ESP_V6_FLOW:
+		return IAVF_FDIR_FLOW_IPV6_ESP;
+	case IPV6_USER_FLOW:
+		return IAVF_FDIR_FLOW_IPV6_OTHER;
 	default:
 		return IAVF_FDIR_FLOW_NONE;
 	}
@@ -952,6 +976,55 @@ iavf_get_ethtool_fdir_entry(struct iavf_adapter *adapter,
 		fsp->m_u.usr_ip4_spec.ip_ver = 0xFF;
 		fsp->m_u.usr_ip4_spec.proto = rule->ip_mask.proto;
 		break;
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+	case SCTP_V6_FLOW:
+		memcpy(fsp->h_u.usr_ip6_spec.ip6src, &rule->ip_data.v6_addrs.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->h_u.usr_ip6_spec.ip6dst, &rule->ip_data.v6_addrs.dst_ip,
+		       sizeof(struct in6_addr));
+		fsp->h_u.tcp_ip6_spec.psrc = rule->ip_data.src_port;
+		fsp->h_u.tcp_ip6_spec.pdst = rule->ip_data.dst_port;
+		fsp->h_u.tcp_ip6_spec.tclass = rule->ip_data.tclass;
+		memcpy(fsp->m_u.usr_ip6_spec.ip6src, &rule->ip_mask.v6_addrs.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->m_u.usr_ip6_spec.ip6dst, &rule->ip_mask.v6_addrs.dst_ip,
+		       sizeof(struct in6_addr));
+		fsp->m_u.tcp_ip6_spec.psrc = rule->ip_mask.src_port;
+		fsp->m_u.tcp_ip6_spec.pdst = rule->ip_mask.dst_port;
+		fsp->m_u.tcp_ip6_spec.tclass = rule->ip_mask.tclass;
+		break;
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+		memcpy(fsp->h_u.ah_ip6_spec.ip6src, &rule->ip_data.v6_addrs.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->h_u.ah_ip6_spec.ip6dst, &rule->ip_data.v6_addrs.dst_ip,
+		       sizeof(struct in6_addr));
+		fsp->h_u.ah_ip6_spec.spi = rule->ip_data.spi;
+		fsp->h_u.ah_ip6_spec.tclass = rule->ip_data.tclass;
+		memcpy(fsp->m_u.ah_ip6_spec.ip6src, &rule->ip_mask.v6_addrs.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->m_u.ah_ip6_spec.ip6dst, &rule->ip_mask.v6_addrs.dst_ip,
+		       sizeof(struct in6_addr));
+		fsp->m_u.ah_ip6_spec.spi = rule->ip_mask.spi;
+		fsp->m_u.ah_ip6_spec.tclass = rule->ip_mask.tclass;
+		break;
+	case IPV6_USER_FLOW:
+		memcpy(fsp->h_u.usr_ip6_spec.ip6src, &rule->ip_data.v6_addrs.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->h_u.usr_ip6_spec.ip6dst, &rule->ip_data.v6_addrs.dst_ip,
+		       sizeof(struct in6_addr));
+		fsp->h_u.usr_ip6_spec.l4_4_bytes = rule->ip_data.l4_header;
+		fsp->h_u.usr_ip6_spec.tclass = rule->ip_data.tclass;
+		fsp->h_u.usr_ip6_spec.l4_proto = rule->ip_data.proto;
+		memcpy(fsp->m_u.usr_ip6_spec.ip6src, &rule->ip_mask.v6_addrs.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->m_u.usr_ip6_spec.ip6dst, &rule->ip_mask.v6_addrs.dst_ip,
+		       sizeof(struct in6_addr));
+		fsp->m_u.usr_ip6_spec.l4_4_bytes = rule->ip_mask.l4_header;
+		fsp->m_u.usr_ip6_spec.tclass = rule->ip_mask.tclass;
+		fsp->m_u.usr_ip6_spec.l4_proto = rule->ip_mask.proto;
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -1075,6 +1148,55 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
 		fltr->ip_mask.tos = fsp->m_u.usr_ip4_spec.tos;
 		fltr->ip_mask.proto = fsp->m_u.usr_ip4_spec.proto;
 		break;
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+	case SCTP_V6_FLOW:
+		memcpy(&fltr->ip_data.v6_addrs.src_ip, fsp->h_u.usr_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		memcpy(&fltr->ip_data.v6_addrs.dst_ip, fsp->h_u.usr_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		fltr->ip_data.src_port = fsp->h_u.tcp_ip6_spec.psrc;
+		fltr->ip_data.dst_port = fsp->h_u.tcp_ip6_spec.pdst;
+		fltr->ip_data.tclass = fsp->h_u.tcp_ip6_spec.tclass;
+		memcpy(&fltr->ip_mask.v6_addrs.src_ip, fsp->m_u.usr_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		memcpy(&fltr->ip_mask.v6_addrs.dst_ip, fsp->m_u.usr_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		fltr->ip_mask.src_port = fsp->m_u.tcp_ip6_spec.psrc;
+		fltr->ip_mask.dst_port = fsp->m_u.tcp_ip6_spec.pdst;
+		fltr->ip_mask.tclass = fsp->m_u.tcp_ip6_spec.tclass;
+		break;
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+		memcpy(&fltr->ip_data.v6_addrs.src_ip, fsp->h_u.ah_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		memcpy(&fltr->ip_data.v6_addrs.dst_ip, fsp->h_u.ah_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		fltr->ip_data.spi = fsp->h_u.ah_ip6_spec.spi;
+		fltr->ip_data.tclass = fsp->h_u.ah_ip6_spec.tclass;
+		memcpy(&fltr->ip_mask.v6_addrs.src_ip, fsp->m_u.ah_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		memcpy(&fltr->ip_mask.v6_addrs.dst_ip, fsp->m_u.ah_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		fltr->ip_mask.spi = fsp->m_u.ah_ip6_spec.spi;
+		fltr->ip_mask.tclass = fsp->m_u.ah_ip6_spec.tclass;
+		break;
+	case IPV6_USER_FLOW:
+		memcpy(&fltr->ip_data.v6_addrs.src_ip, fsp->h_u.usr_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		memcpy(&fltr->ip_data.v6_addrs.dst_ip, fsp->h_u.usr_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		fltr->ip_data.l4_header = fsp->h_u.usr_ip6_spec.l4_4_bytes;
+		fltr->ip_data.tclass = fsp->h_u.usr_ip6_spec.tclass;
+		fltr->ip_data.proto = fsp->h_u.usr_ip6_spec.l4_proto;
+		memcpy(&fltr->ip_mask.v6_addrs.src_ip, fsp->m_u.usr_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		memcpy(&fltr->ip_mask.v6_addrs.dst_ip, fsp->m_u.usr_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		fltr->ip_mask.l4_header = fsp->m_u.usr_ip6_spec.l4_4_bytes;
+		fltr->ip_mask.tclass = fsp->m_u.usr_ip6_spec.tclass;
+		fltr->ip_mask.proto = fsp->m_u.usr_ip6_spec.l4_proto;
+		break;
 	default:
 		/* not doing un-parsed flow types */
 		return -EINVAL;
diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.c b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
index 69de6a45f5f0..f84f2ab036d5 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_fdir.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
@@ -5,6 +5,15 @@
 
 #include "iavf.h"
 
+static const struct in6_addr ipv6_addr_full_mask = {
+	.in6_u = {
+		.u6_addr8 = {
+			0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+			0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+		}
+	}
+};
+
 /**
  * iavf_fill_fdir_ip4_hdr - fill the IPv4 protocol header
  * @fltr: Flow Director filter data structure
@@ -44,6 +53,50 @@ iavf_fill_fdir_ip4_hdr(struct iavf_fdir_fltr *fltr,
 	return 0;
 }
 
+/**
+ * iavf_fill_fdir_ip6_hdr - fill the IPv6 protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the IPv6 protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_ip6_hdr(struct iavf_fdir_fltr *fltr,
+		       struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	struct ipv6hdr *iph = (struct ipv6hdr *)hdr->buffer;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, IPV6);
+
+	if (fltr->ip_mask.tclass == U8_MAX) {
+		iph->priority = (fltr->ip_data.tclass >> 4) & 0xF;
+		iph->flow_lbl[0] = (fltr->ip_data.tclass << 4) & 0xF0;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, TC);
+	}
+
+	if (fltr->ip_mask.proto == U8_MAX) {
+		iph->nexthdr = fltr->ip_data.proto;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, PROT);
+	}
+
+	if (!memcmp(&fltr->ip_mask.v6_addrs.src_ip, &ipv6_addr_full_mask,
+		    sizeof(struct in6_addr))) {
+		memcpy(&iph->saddr, &fltr->ip_data.v6_addrs.src_ip,
+		       sizeof(struct in6_addr));
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, SRC);
+	}
+
+	if (!memcmp(&fltr->ip_mask.v6_addrs.dst_ip, &ipv6_addr_full_mask,
+		    sizeof(struct in6_addr))) {
+		memcpy(&iph->daddr, &fltr->ip_data.v6_addrs.dst_ip,
+		       sizeof(struct in6_addr));
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, DST);
+	}
+
+	return 0;
+}
+
 /**
  * iavf_fill_fdir_tcp_hdr - fill the TCP protocol header
  * @fltr: Flow Director filter data structure
@@ -274,6 +327,30 @@ int iavf_fill_fdir_add_msg(struct iavf_adapter *adapter, struct iavf_fdir_fltr *
 		err = iavf_fill_fdir_ip4_hdr(fltr, proto_hdrs) |
 		      iavf_fill_fdir_l4_hdr(fltr, proto_hdrs);
 		break;
+	case IAVF_FDIR_FLOW_IPV6_TCP:
+		err = iavf_fill_fdir_ip6_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_tcp_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV6_UDP:
+		err = iavf_fill_fdir_ip6_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_udp_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV6_SCTP:
+		err = iavf_fill_fdir_ip6_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_sctp_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV6_AH:
+		err = iavf_fill_fdir_ip6_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_ah_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV6_ESP:
+		err = iavf_fill_fdir_ip6_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_esp_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV6_OTHER:
+		err = iavf_fill_fdir_ip6_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_l4_hdr(fltr, proto_hdrs);
+		break;
 	default:
 		err = -EINVAL;
 		break;
@@ -298,16 +375,22 @@ static const char *iavf_fdir_flow_proto_name(enum iavf_fdir_flow_type flow_type)
 {
 	switch (flow_type) {
 	case IAVF_FDIR_FLOW_IPV4_TCP:
+	case IAVF_FDIR_FLOW_IPV6_TCP:
 		return "TCP";
 	case IAVF_FDIR_FLOW_IPV4_UDP:
+	case IAVF_FDIR_FLOW_IPV6_UDP:
 		return "UDP";
 	case IAVF_FDIR_FLOW_IPV4_SCTP:
+	case IAVF_FDIR_FLOW_IPV6_SCTP:
 		return "SCTP";
 	case IAVF_FDIR_FLOW_IPV4_AH:
+	case IAVF_FDIR_FLOW_IPV6_AH:
 		return "AH";
 	case IAVF_FDIR_FLOW_IPV4_ESP:
+	case IAVF_FDIR_FLOW_IPV6_ESP:
 		return "ESP";
 	case IAVF_FDIR_FLOW_IPV4_OTHER:
+	case IAVF_FDIR_FLOW_IPV6_OTHER:
 		return "Other";
 	default:
 		return NULL;
@@ -357,6 +440,34 @@ void iavf_print_fdir_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *f
 			 fltr->ip_data.proto,
 			 ntohl(fltr->ip_data.l4_header));
 		break;
+	case IAVF_FDIR_FLOW_IPV6_TCP:
+	case IAVF_FDIR_FLOW_IPV6_UDP:
+	case IAVF_FDIR_FLOW_IPV6_SCTP:
+		dev_info(&adapter->pdev->dev, "Rule ID: %u dst_ip: %pI6 src_ip %pI6 %s: dst_port %hu src_port %hu\n",
+			 fltr->loc,
+			 &fltr->ip_data.v6_addrs.dst_ip,
+			 &fltr->ip_data.v6_addrs.src_ip,
+			 proto,
+			 ntohs(fltr->ip_data.dst_port),
+			 ntohs(fltr->ip_data.src_port));
+		break;
+	case IAVF_FDIR_FLOW_IPV6_AH:
+	case IAVF_FDIR_FLOW_IPV6_ESP:
+		dev_info(&adapter->pdev->dev, "Rule ID: %u dst_ip: %pI6 src_ip %pI6 %s: SPI %u\n",
+			 fltr->loc,
+			 &fltr->ip_data.v6_addrs.dst_ip,
+			 &fltr->ip_data.v6_addrs.src_ip,
+			 proto,
+			 ntohl(fltr->ip_data.spi));
+		break;
+	case IAVF_FDIR_FLOW_IPV6_OTHER:
+		dev_info(&adapter->pdev->dev, "Rule ID: %u dst_ip: %pI6 src_ip %pI6 proto: %u L4_bytes: 0x%x\n",
+			 fltr->loc,
+			 &fltr->ip_data.v6_addrs.dst_ip,
+			 &fltr->ip_data.v6_addrs.src_ip,
+			 fltr->ip_data.proto,
+			 ntohl(fltr->ip_data.l4_header));
+		break;
 	default:
 		break;
 	}
diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.h b/drivers/net/ethernet/intel/iavf/iavf_fdir.h
index bce913c2541d..f5b222b40952 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_fdir.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.h
@@ -24,6 +24,12 @@ enum iavf_fdir_flow_type {
 	IAVF_FDIR_FLOW_IPV4_AH,
 	IAVF_FDIR_FLOW_IPV4_ESP,
 	IAVF_FDIR_FLOW_IPV4_OTHER,
+	IAVF_FDIR_FLOW_IPV6_TCP,
+	IAVF_FDIR_FLOW_IPV6_UDP,
+	IAVF_FDIR_FLOW_IPV6_SCTP,
+	IAVF_FDIR_FLOW_IPV6_AH,
+	IAVF_FDIR_FLOW_IPV6_ESP,
+	IAVF_FDIR_FLOW_IPV6_OTHER,
 	/* MAX - this must be last and add anything new just above it */
 	IAVF_FDIR_FLOW_PTYPE_MAX,
 };
@@ -33,9 +39,15 @@ struct iavf_ipv4_addrs {
 	__be32 dst_ip;
 };
 
+struct iavf_ipv6_addrs {
+	struct in6_addr src_ip;
+	struct in6_addr dst_ip;
+};
+
 struct iavf_fdir_ip {
 	union {
 		struct iavf_ipv4_addrs v4_addrs;
+		struct iavf_ipv6_addrs v6_addrs;
 	};
 	__be16 src_port;
 	__be16 dst_port;
@@ -43,6 +55,7 @@ struct iavf_fdir_ip {
 	__be32 spi;		/* security parameter index for AH/ESP */
 	union {
 		u8 tos;
+		u8 tclass;
 	};
 	u8 proto;
 };
-- 
cgit v1.2.3


From a6ccffaa8da32b6077e37e7d254d519bc071433a Mon Sep 17 00:00:00 2001
From: Haiyue Wang <haiyue.wang@intel.com>
Date: Tue, 9 Mar 2021 11:08:14 +0800
Subject: iavf: Support Ethernet Type Flow Director filters

Support the addition and deletion of Ethernet filters.

Supported fields are: proto
Supported flow-types are: ether

Example usage:
ethtool -N ens787f0v0 flow-type ether proto 0x8863 action 6
ethtool -N ens787f0v0 flow-type ether proto 0x8864 action 7

Signed-off-by: Haiyue Wang <haiyue.wang@intel.com>
Tested-by: Chen Bo <BoX.C.Chen@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 12 ++++++++++++
 drivers/net/ethernet/intel/iavf/iavf_fdir.c    | 23 ++++++++++++++++++++++-
 drivers/net/ethernet/intel/iavf/iavf_fdir.h    |  8 ++++++++
 3 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
index 8d856f5dc38e..24ee6ddb8dcb 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -861,6 +861,8 @@ static int iavf_fltr_to_ethtool_flow(enum iavf_fdir_flow_type flow)
 		return ESP_V6_FLOW;
 	case IAVF_FDIR_FLOW_IPV6_OTHER:
 		return IPV6_USER_FLOW;
+	case IAVF_FDIR_FLOW_NON_IP_L2:
+		return ETHER_FLOW;
 	default:
 		/* 0 is undefined ethtool flow */
 		return 0;
@@ -900,6 +902,8 @@ static enum iavf_fdir_flow_type iavf_ethtool_flow_to_fltr(int eth)
 		return IAVF_FDIR_FLOW_IPV6_ESP;
 	case IPV6_USER_FLOW:
 		return IAVF_FDIR_FLOW_IPV6_OTHER;
+	case ETHER_FLOW:
+		return IAVF_FDIR_FLOW_NON_IP_L2;
 	default:
 		return IAVF_FDIR_FLOW_NONE;
 	}
@@ -1025,6 +1029,10 @@ iavf_get_ethtool_fdir_entry(struct iavf_adapter *adapter,
 		fsp->m_u.usr_ip6_spec.tclass = rule->ip_mask.tclass;
 		fsp->m_u.usr_ip6_spec.l4_proto = rule->ip_mask.proto;
 		break;
+	case ETHER_FLOW:
+		fsp->h_u.ether_spec.h_proto = rule->eth_data.etype;
+		fsp->m_u.ether_spec.h_proto = rule->eth_mask.etype;
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -1197,6 +1205,10 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
 		fltr->ip_mask.tclass = fsp->m_u.usr_ip6_spec.tclass;
 		fltr->ip_mask.proto = fsp->m_u.usr_ip6_spec.l4_proto;
 		break;
+	case ETHER_FLOW:
+		fltr->eth_data.etype = fsp->h_u.ether_spec.h_proto;
+		fltr->eth_mask.etype = fsp->m_u.ether_spec.h_proto;
+		break;
 	default:
 		/* not doing un-parsed flow types */
 		return -EINVAL;
diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.c b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
index f84f2ab036d5..e1e1af136765 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_fdir.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
@@ -277,9 +277,19 @@ iavf_fill_fdir_eth_hdr(struct iavf_fdir_fltr *fltr,
 		       struct virtchnl_proto_hdrs *proto_hdrs)
 {
 	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	struct ethhdr *ehdr = (struct ethhdr *)hdr->buffer;
 
 	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, ETH);
 
+	if (fltr->eth_mask.etype == htons(U16_MAX)) {
+		if (fltr->eth_data.etype == htons(ETH_P_IP) ||
+		    fltr->eth_data.etype == htons(ETH_P_IPV6))
+			return -EOPNOTSUPP;
+
+		ehdr->h_proto = fltr->eth_data.etype;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, ETH, ETHERTYPE);
+	}
+
 	return 0;
 }
 
@@ -351,6 +361,8 @@ int iavf_fill_fdir_add_msg(struct iavf_adapter *adapter, struct iavf_fdir_fltr *
 		err = iavf_fill_fdir_ip6_hdr(fltr, proto_hdrs) |
 		      iavf_fill_fdir_l4_hdr(fltr, proto_hdrs);
 		break;
+	case IAVF_FDIR_FLOW_NON_IP_L2:
+		break;
 	default:
 		err = -EINVAL;
 		break;
@@ -392,6 +404,8 @@ static const char *iavf_fdir_flow_proto_name(enum iavf_fdir_flow_type flow_type)
 	case IAVF_FDIR_FLOW_IPV4_OTHER:
 	case IAVF_FDIR_FLOW_IPV6_OTHER:
 		return "Other";
+	case IAVF_FDIR_FLOW_NON_IP_L2:
+		return "Ethernet";
 	default:
 		return NULL;
 	}
@@ -468,6 +482,11 @@ void iavf_print_fdir_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *f
 			 fltr->ip_data.proto,
 			 ntohl(fltr->ip_data.l4_header));
 		break;
+	case IAVF_FDIR_FLOW_NON_IP_L2:
+		dev_info(&adapter->pdev->dev, "Rule ID: %u eth_type: 0x%x\n",
+			 fltr->loc,
+			 ntohs(fltr->eth_data.etype));
+		break;
 	default:
 		break;
 	}
@@ -489,7 +508,9 @@ bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *
 		if (tmp->flow_type != fltr->flow_type)
 			continue;
 
-		if (!memcmp(&tmp->ip_data, &fltr->ip_data,
+		if (!memcmp(&tmp->eth_data, &fltr->eth_data,
+			    sizeof(fltr->eth_data)) &&
+		    !memcmp(&tmp->ip_data, &fltr->ip_data,
 			    sizeof(fltr->ip_data))) {
 			ret = true;
 			break;
diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.h b/drivers/net/ethernet/intel/iavf/iavf_fdir.h
index f5b222b40952..ad75b89cb207 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_fdir.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.h
@@ -30,6 +30,7 @@ enum iavf_fdir_flow_type {
 	IAVF_FDIR_FLOW_IPV6_AH,
 	IAVF_FDIR_FLOW_IPV6_ESP,
 	IAVF_FDIR_FLOW_IPV6_OTHER,
+	IAVF_FDIR_FLOW_NON_IP_L2,
 	/* MAX - this must be last and add anything new just above it */
 	IAVF_FDIR_FLOW_PTYPE_MAX,
 };
@@ -44,6 +45,10 @@ struct iavf_ipv6_addrs {
 	struct in6_addr dst_ip;
 };
 
+struct iavf_fdir_eth {
+	__be16 etype;
+};
+
 struct iavf_fdir_ip {
 	union {
 		struct iavf_ipv4_addrs v4_addrs;
@@ -66,6 +71,9 @@ struct iavf_fdir_fltr {
 
 	enum iavf_fdir_flow_type flow_type;
 
+	struct iavf_fdir_eth eth_data;
+	struct iavf_fdir_eth eth_mask;
+
 	struct iavf_fdir_ip ip_data;
 	struct iavf_fdir_ip ip_mask;
 
-- 
cgit v1.2.3


From a6379db818a850d1c1012cffe160cfc14d64cb40 Mon Sep 17 00:00:00 2001
From: Haiyue Wang <haiyue.wang@intel.com>
Date: Tue, 9 Mar 2021 11:08:15 +0800
Subject: iavf: Enable flex-bytes support

Flex-bytes allows for packet matching based on an offset and value. This
is supported via the ethtool user-def option.

The user-def 0xAAAABBBBCCCCDDDD: BBBB is the 2 byte pattern while AAAA
corresponds to its offset in the packet. Similarly DDDD is the 2 byte
pattern with CCCC being the corresponding offset. The offset ranges from
0x0 to 0x1F7 (up to 504 bytes into the packet). The offset starts from
the beginning of the packet.

This feature can be used to allow customers to set flow director rules
for protocols headers that are beyond standard ones supported by ethtool
(e.g. PFCP or GTP-U).

Like for matching GTP-U's TEID value 0x10203040:
ethtool -N ens787f0v0 flow-type udp4 dst-port 2152 \
user-def 0x002e102000303040 action 13

Signed-off-by: Haiyue Wang <haiyue.wang@intel.com>
Tested-by: Chen Bo <BoX.C.Chen@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/iavf/iavf_ethtool.c |  93 +++++++++++
 drivers/net/ethernet/intel/iavf/iavf_fdir.c    | 217 ++++++++++++++++++++++++-
 drivers/net/ethernet/intel/iavf/iavf_fdir.h    |  19 +++
 3 files changed, 327 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
index 24ee6ddb8dcb..3ebfef737f5c 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -909,6 +909,85 @@ static enum iavf_fdir_flow_type iavf_ethtool_flow_to_fltr(int eth)
 	}
 }
 
+/**
+ * iavf_is_mask_valid - check mask field set
+ * @mask: full mask to check
+ * @field: field for which mask should be valid
+ *
+ * If the mask is fully set return true. If it is not valid for field return
+ * false.
+ */
+static bool iavf_is_mask_valid(u64 mask, u64 field)
+{
+	return (mask & field) == field;
+}
+
+/**
+ * iavf_parse_rx_flow_user_data - deconstruct user-defined data
+ * @fsp: pointer to ethtool Rx flow specification
+ * @fltr: pointer to Flow Director filter for userdef data storage
+ *
+ * Returns 0 on success, negative error value on failure
+ */
+static int
+iavf_parse_rx_flow_user_data(struct ethtool_rx_flow_spec *fsp,
+			     struct iavf_fdir_fltr *fltr)
+{
+	struct iavf_flex_word *flex;
+	int i, cnt = 0;
+
+	if (!(fsp->flow_type & FLOW_EXT))
+		return 0;
+
+	for (i = 0; i < 2; i++) {
+#define IAVF_USERDEF_FLEX_WORD_M	GENMASK(15, 0)
+#define IAVF_USERDEF_FLEX_OFFS_S	16
+#define IAVF_USERDEF_FLEX_OFFS_M	GENMASK(31, IAVF_USERDEF_FLEX_OFFS_S)
+#define IAVF_USERDEF_FLEX_FLTR_M	GENMASK(31, 0)
+		u32 value = be32_to_cpu(fsp->h_ext.data[i]);
+		u32 mask = be32_to_cpu(fsp->m_ext.data[i]);
+
+		if (!value || !mask)
+			continue;
+
+		if (!iavf_is_mask_valid(mask, IAVF_USERDEF_FLEX_FLTR_M))
+			return -EINVAL;
+
+		/* 504 is the maximum value for offsets, and offset is measured
+		 * from the start of the MAC address.
+		 */
+#define IAVF_USERDEF_FLEX_MAX_OFFS_VAL 504
+		flex = &fltr->flex_words[cnt++];
+		flex->word = value & IAVF_USERDEF_FLEX_WORD_M;
+		flex->offset = (value & IAVF_USERDEF_FLEX_OFFS_M) >>
+			     IAVF_USERDEF_FLEX_OFFS_S;
+		if (flex->offset > IAVF_USERDEF_FLEX_MAX_OFFS_VAL)
+			return -EINVAL;
+	}
+
+	fltr->flex_cnt = cnt;
+
+	return 0;
+}
+
+/**
+ * iavf_fill_rx_flow_ext_data - fill the additional data
+ * @fsp: pointer to ethtool Rx flow specification
+ * @fltr: pointer to Flow Director filter to get additional data
+ */
+static void
+iavf_fill_rx_flow_ext_data(struct ethtool_rx_flow_spec *fsp,
+			   struct iavf_fdir_fltr *fltr)
+{
+	if (!fltr->ext_mask.usr_def[0] && !fltr->ext_mask.usr_def[1])
+		return;
+
+	fsp->flow_type |= FLOW_EXT;
+
+	memcpy(fsp->h_ext.data, fltr->ext_data.usr_def, sizeof(fsp->h_ext.data));
+	memcpy(fsp->m_ext.data, fltr->ext_mask.usr_def, sizeof(fsp->m_ext.data));
+}
+
 /**
  * iavf_get_ethtool_fdir_entry - fill ethtool structure with Flow Director filter data
  * @adapter: the VF adapter structure that contains filter list
@@ -1038,6 +1117,8 @@ iavf_get_ethtool_fdir_entry(struct iavf_adapter *adapter,
 		break;
 	}
 
+	iavf_fill_rx_flow_ext_data(fsp, rule);
+
 	if (rule->action == VIRTCHNL_ACTION_DROP)
 		fsp->ring_cookie = RX_CLS_FLOW_DISC;
 	else
@@ -1100,6 +1181,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
 {
 	u32 flow_type, q_index = 0;
 	enum virtchnl_action act;
+	int err;
 
 	if (fsp->ring_cookie == RX_CLS_FLOW_DISC) {
 		act = VIRTCHNL_ACTION_DROP;
@@ -1115,6 +1197,13 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
 	fltr->loc = fsp->location;
 	fltr->q_index = q_index;
 
+	if (fsp->flow_type & FLOW_EXT) {
+		memcpy(fltr->ext_data.usr_def, fsp->h_ext.data,
+		       sizeof(fltr->ext_data.usr_def));
+		memcpy(fltr->ext_mask.usr_def, fsp->m_ext.data,
+		       sizeof(fltr->ext_mask.usr_def));
+	}
+
 	flow_type = fsp->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS);
 	fltr->flow_type = iavf_ethtool_flow_to_fltr(flow_type);
 
@@ -1217,6 +1306,10 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
 	if (iavf_fdir_is_dup_fltr(adapter, fltr))
 		return -EEXIST;
 
+	err = iavf_parse_rx_flow_user_data(fsp, fltr);
+	if (err)
+		return err;
+
 	return iavf_fill_fdir_add_msg(adapter, fltr);
 }
 
diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.c b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
index e1e1af136765..3e687189d737 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_fdir.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
@@ -5,6 +5,10 @@
 
 #include "iavf.h"
 
+#define GTPU_PORT	2152
+#define NAT_T_ESP_PORT	4500
+#define PFCP_PORT	8805
+
 static const struct in6_addr ipv6_addr_full_mask = {
 	.in6_u = {
 		.u6_addr8 = {
@@ -14,6 +18,206 @@ static const struct in6_addr ipv6_addr_full_mask = {
 	}
 };
 
+/**
+ * iavf_pkt_udp_no_pay_len - the length of UDP packet without payload
+ * @fltr: Flow Director filter data structure
+ */
+static u16 iavf_pkt_udp_no_pay_len(struct iavf_fdir_fltr *fltr)
+{
+	return sizeof(struct ethhdr) +
+	       (fltr->ip_ver == 4 ? sizeof(struct iphdr) : sizeof(struct ipv6hdr)) +
+	       sizeof(struct udphdr);
+}
+
+/**
+ * iavf_fill_fdir_gtpu_hdr - fill the GTP-U protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the GTP-U protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_gtpu_hdr(struct iavf_fdir_fltr *fltr,
+			struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *uhdr = &proto_hdrs->proto_hdr[proto_hdrs->count - 1];
+	struct virtchnl_proto_hdr *ghdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	struct virtchnl_proto_hdr *ehdr = NULL; /* Extension Header if it exists */
+	u16 adj_offs, hdr_offs;
+	int i;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(ghdr, GTPU_IP);
+
+	adj_offs = iavf_pkt_udp_no_pay_len(fltr);
+
+	for (i = 0; i < fltr->flex_cnt; i++) {
+#define IAVF_GTPU_HDR_TEID_OFFS0	4
+#define IAVF_GTPU_HDR_TEID_OFFS1	6
+#define IAVF_GTPU_HDR_N_PDU_AND_NEXT_EXTHDR_OFFS	10
+#define IAVF_GTPU_HDR_PSC_PDU_TYPE_AND_QFI_OFFS		13
+#define IAVF_GTPU_PSC_EXTHDR_TYPE	0x85 /* PDU Session Container Extension Header */
+		if (fltr->flex_words[i].offset < adj_offs)
+			return -EINVAL;
+
+		hdr_offs = fltr->flex_words[i].offset - adj_offs;
+
+		switch (hdr_offs) {
+		case IAVF_GTPU_HDR_TEID_OFFS0:
+		case IAVF_GTPU_HDR_TEID_OFFS1: {
+			__be16 *pay_word = (__be16 *)ghdr->buffer;
+
+			pay_word[hdr_offs >> 1] = htons(fltr->flex_words[i].word);
+			VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(ghdr, GTPU_IP, TEID);
+			}
+			break;
+		case IAVF_GTPU_HDR_N_PDU_AND_NEXT_EXTHDR_OFFS:
+			if ((fltr->flex_words[i].word & 0xff) != IAVF_GTPU_PSC_EXTHDR_TYPE)
+				return -EOPNOTSUPP;
+			if (!ehdr)
+				ehdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+			VIRTCHNL_SET_PROTO_HDR_TYPE(ehdr, GTPU_EH);
+			break;
+		case IAVF_GTPU_HDR_PSC_PDU_TYPE_AND_QFI_OFFS:
+			if (!ehdr)
+				return -EINVAL;
+			ehdr->buffer[1] = fltr->flex_words[i].word & 0x3F;
+			VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(ehdr, GTPU_EH, QFI);
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	uhdr->field_selector = 0; /* The PF ignores the UDP header fields */
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_pfcp_hdr - fill the PFCP protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the PFCP protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_pfcp_hdr(struct iavf_fdir_fltr *fltr,
+			struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *uhdr = &proto_hdrs->proto_hdr[proto_hdrs->count - 1];
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	u16 adj_offs, hdr_offs;
+	int i;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, PFCP);
+
+	adj_offs = iavf_pkt_udp_no_pay_len(fltr);
+
+	for (i = 0; i < fltr->flex_cnt; i++) {
+#define IAVF_PFCP_HDR_SFIELD_AND_MSG_TYPE_OFFS	0
+		if (fltr->flex_words[i].offset < adj_offs)
+			return -EINVAL;
+
+		hdr_offs = fltr->flex_words[i].offset - adj_offs;
+
+		switch (hdr_offs) {
+		case IAVF_PFCP_HDR_SFIELD_AND_MSG_TYPE_OFFS:
+			hdr->buffer[0] = (fltr->flex_words[i].word >> 8) & 0xff;
+			VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, PFCP, S_FIELD);
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	uhdr->field_selector = 0; /* The PF ignores the UDP header fields */
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_nat_t_esp_hdr - fill the NAT-T-ESP protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the NAT-T-ESP protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_nat_t_esp_hdr(struct iavf_fdir_fltr *fltr,
+			     struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *uhdr = &proto_hdrs->proto_hdr[proto_hdrs->count - 1];
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	u16 adj_offs, hdr_offs;
+	u32 spi = 0;
+	int i;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, ESP);
+
+	adj_offs = iavf_pkt_udp_no_pay_len(fltr);
+
+	for (i = 0; i < fltr->flex_cnt; i++) {
+#define IAVF_NAT_T_ESP_SPI_OFFS0	0
+#define IAVF_NAT_T_ESP_SPI_OFFS1	2
+		if (fltr->flex_words[i].offset < adj_offs)
+			return -EINVAL;
+
+		hdr_offs = fltr->flex_words[i].offset - adj_offs;
+
+		switch (hdr_offs) {
+		case IAVF_NAT_T_ESP_SPI_OFFS0:
+			spi |= fltr->flex_words[i].word << 16;
+			break;
+		case IAVF_NAT_T_ESP_SPI_OFFS1:
+			spi |= fltr->flex_words[i].word;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	if (!spi)
+		return -EOPNOTSUPP; /* Not support IKE Header Format with SPI 0 */
+
+	*(__be32 *)hdr->buffer = htonl(spi);
+	VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, ESP, SPI);
+
+	uhdr->field_selector = 0; /* The PF ignores the UDP header fields */
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_udp_flex_pay_hdr - fill the UDP payload header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the UDP payload defined protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_udp_flex_pay_hdr(struct iavf_fdir_fltr *fltr,
+				struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	int err;
+
+	switch (ntohs(fltr->ip_data.dst_port)) {
+	case GTPU_PORT:
+		err = iavf_fill_fdir_gtpu_hdr(fltr, proto_hdrs);
+		break;
+	case NAT_T_ESP_PORT:
+		err = iavf_fill_fdir_nat_t_esp_hdr(fltr, proto_hdrs);
+		break;
+	case PFCP_PORT:
+		err = iavf_fill_fdir_pfcp_hdr(fltr, proto_hdrs);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+	return err;
+}
+
 /**
  * iavf_fill_fdir_ip4_hdr - fill the IPv4 protocol header
  * @fltr: Flow Director filter data structure
@@ -50,6 +254,8 @@ iavf_fill_fdir_ip4_hdr(struct iavf_fdir_fltr *fltr,
 		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, DST);
 	}
 
+	fltr->ip_ver = 4;
+
 	return 0;
 }
 
@@ -94,6 +300,8 @@ iavf_fill_fdir_ip6_hdr(struct iavf_fdir_fltr *fltr,
 		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, DST);
 	}
 
+	fltr->ip_ver = 6;
+
 	return 0;
 }
 
@@ -152,7 +360,10 @@ iavf_fill_fdir_udp_hdr(struct iavf_fdir_fltr *fltr,
 		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, UDP, DST_PORT);
 	}
 
-	return 0;
+	if (!fltr->flex_cnt)
+		return 0;
+
+	return iavf_fill_fdir_udp_flex_pay_hdr(fltr, proto_hdrs);
 }
 
 /**
@@ -511,7 +722,9 @@ bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *
 		if (!memcmp(&tmp->eth_data, &fltr->eth_data,
 			    sizeof(fltr->eth_data)) &&
 		    !memcmp(&tmp->ip_data, &fltr->ip_data,
-			    sizeof(fltr->ip_data))) {
+			    sizeof(fltr->ip_data)) &&
+		    !memcmp(&tmp->ext_data, &fltr->ext_data,
+			    sizeof(fltr->ext_data))) {
 			ret = true;
 			break;
 		}
diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.h b/drivers/net/ethernet/intel/iavf/iavf_fdir.h
index ad75b89cb207..2439c970b657 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_fdir.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.h
@@ -35,6 +35,11 @@ enum iavf_fdir_flow_type {
 	IAVF_FDIR_FLOW_PTYPE_MAX,
 };
 
+struct iavf_flex_word {
+	u16 offset;
+	u16 word;
+};
+
 struct iavf_ipv4_addrs {
 	__be32 src_ip;
 	__be32 dst_ip;
@@ -64,6 +69,11 @@ struct iavf_fdir_ip {
 	};
 	u8 proto;
 };
+
+struct iavf_fdir_extra {
+	u32 usr_def[2];
+};
+
 /* bookkeeping of Flow Director filters */
 struct iavf_fdir_fltr {
 	enum iavf_fdir_fltr_state_t state;
@@ -77,7 +87,16 @@ struct iavf_fdir_fltr {
 	struct iavf_fdir_ip ip_data;
 	struct iavf_fdir_ip ip_mask;
 
+	struct iavf_fdir_extra ext_data;
+	struct iavf_fdir_extra ext_mask;
+
 	enum virtchnl_action action;
+
+	/* flex byte filter data */
+	u8 ip_ver; /* used to adjust the flex offset, 4 : IPv4, 6 : IPv6 */
+	u8 flex_cnt;
+	struct iavf_flex_word flex_words[2];
+
 	u32 flow_id;
 
 	u32 loc;	/* Rule location inside the flow table */
-- 
cgit v1.2.3


From 227d72063fccb2d19b30fb4197fba478514f7d83 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <alobakin@pm.me>
Date: Fri, 19 Mar 2021 15:46:30 +0000
Subject: dsa: simplify Kconfig symbols and dependencies

1. Remove CONFIG_HAVE_NET_DSA.

CONFIG_HAVE_NET_DSA is a legacy leftover from the times when drivers
should have selected CONFIG_NET_DSA manually.
Currently, all drivers has explicit 'depends on NET_DSA', so this is
no more needed.

2. CONFIG_HAVE_NET_DSA dependencies became CONFIG_NET_DSA's ones.

 - dropped !S390 dependency which was introduced to be sure NET_DSA
   can select CONFIG_PHYLIB. DSA migrated to Phylink almost 3 years
   ago and the PHY library itself doesn't depend on !S390 since
   commit 870a2b5e4fcd ("phylib: remove !S390 dependeny from Kconfig");
 - INET dependency is kept to be sure we can select NET_SWITCHDEV;
 - NETDEVICES dependency is kept to be sure we can select PHYLINK.

3. DSA drivers menu now depends on NET_DSA.

Instead on 'depends on NET_DSA' on every single driver, the entire
menu now depends on it. This eliminates a lot of duplicated lines
from Kconfig with no loss (when CONFIG_NET_DSA=m, drivers also can
be only m or n).
This also has a nice side effect that there's no more empty menu on
configurations without DSA.

4. Kbuild will now descend into 'drivers/net/dsa' only when
   CONFIG_NET_DSA is y or m.

This is safe since no objects inside this folder can be built without
DSA core, as well as when CONFIG_NET_DSA=m, no objects can be
built-in.

Signed-off-by: Alexander Lobakin <alobakin@pm.me>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Makefile    |  2 +-
 drivers/net/dsa/Kconfig | 17 ++++-------------
 net/dsa/Kconfig         | 10 +++-------
 3 files changed, 8 insertions(+), 21 deletions(-)

diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index f4990ff32fa4..040e20b81317 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -45,7 +45,7 @@ obj-$(CONFIG_ARCNET) += arcnet/
 obj-$(CONFIG_DEV_APPLETALK) += appletalk/
 obj-$(CONFIG_CAIF) += caif/
 obj-$(CONFIG_CAN) += can/
-obj-y += dsa/
+obj-$(CONFIG_NET_DSA) += dsa/
 obj-$(CONFIG_ETHERNET) += ethernet/
 obj-$(CONFIG_FDDI) += fddi/
 obj-$(CONFIG_HIPPI) += hippi/
diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig
index 3af373e90806..a5f1aa911fe2 100644
--- a/drivers/net/dsa/Kconfig
+++ b/drivers/net/dsa/Kconfig
@@ -1,12 +1,12 @@
 # SPDX-License-Identifier: GPL-2.0-only
 menu "Distributed Switch Architecture drivers"
-	depends on HAVE_NET_DSA
+	depends on NET_DSA
 
 source "drivers/net/dsa/b53/Kconfig"
 
 config NET_DSA_BCM_SF2
 	tristate "Broadcom Starfighter 2 Ethernet switch support"
-	depends on HAS_IOMEM && NET_DSA
+	depends on HAS_IOMEM
 	select NET_DSA_TAG_BRCM
 	select FIXED_PHY
 	select BCM7XXX_PHY
@@ -18,7 +18,6 @@ config NET_DSA_BCM_SF2
 
 config NET_DSA_LOOP
 	tristate "DSA mock-up Ethernet switch chip support"
-	depends on NET_DSA
 	select FIXED_PHY
 	help
 	  This enables support for a fake mock-up switch chip which
@@ -28,7 +27,7 @@ source "drivers/net/dsa/hirschmann/Kconfig"
 
 config NET_DSA_LANTIQ_GSWIP
 	tristate "Lantiq / Intel GSWIP"
-	depends on HAS_IOMEM && NET_DSA
+	depends on HAS_IOMEM
 	select NET_DSA_TAG_GSWIP
 	help
 	  This enables support for the Lantiq / Intel GSWIP 2.1 found in
@@ -36,7 +35,6 @@ config NET_DSA_LANTIQ_GSWIP
 
 config NET_DSA_MT7530
 	tristate "MediaTek MT753x and MT7621 Ethernet switch support"
-	depends on NET_DSA
 	select NET_DSA_TAG_MTK
 	help
 	  This enables support for the MediaTek MT7530, MT7531, and MT7621
@@ -44,7 +42,6 @@ config NET_DSA_MT7530
 
 config NET_DSA_MV88E6060
 	tristate "Marvell 88E6060 ethernet switch chip support"
-	depends on NET_DSA
 	select NET_DSA_TAG_TRAILER
 	help
 	  This enables support for the Marvell 88E6060 ethernet switch
@@ -64,7 +61,6 @@ source "drivers/net/dsa/xrs700x/Kconfig"
 
 config NET_DSA_QCA8K
 	tristate "Qualcomm Atheros QCA8K Ethernet switch family support"
-	depends on NET_DSA
 	select NET_DSA_TAG_QCA
 	select REGMAP
 	help
@@ -73,7 +69,6 @@ config NET_DSA_QCA8K
 
 config NET_DSA_REALTEK_SMI
 	tristate "Realtek SMI Ethernet switch family support"
-	depends on NET_DSA
 	select NET_DSA_TAG_RTL4_A
 	select FIXED_PHY
 	select IRQ_DOMAIN
@@ -93,7 +88,7 @@ config NET_DSA_SMSC_LAN9303
 
 config NET_DSA_SMSC_LAN9303_I2C
 	tristate "SMSC/Microchip LAN9303 3-ports 10/100 ethernet switch in I2C managed mode"
-	depends on NET_DSA && I2C
+	depends on I2C
 	select NET_DSA_SMSC_LAN9303
 	select REGMAP_I2C
 	help
@@ -102,7 +97,6 @@ config NET_DSA_SMSC_LAN9303_I2C
 
 config NET_DSA_SMSC_LAN9303_MDIO
 	tristate "SMSC/Microchip LAN9303 3-ports 10/100 ethernet switch in MDIO managed mode"
-	depends on NET_DSA
 	select NET_DSA_SMSC_LAN9303
 	help
 	  Enable access functions if the SMSC/Microchip LAN9303 is configured
@@ -110,7 +104,6 @@ config NET_DSA_SMSC_LAN9303_MDIO
 
 config NET_DSA_VITESSE_VSC73XX
 	tristate
-	depends on NET_DSA
 	select FIXED_PHY
 	select VITESSE_PHY
 	select GPIOLIB
@@ -120,7 +113,6 @@ config NET_DSA_VITESSE_VSC73XX
 
 config NET_DSA_VITESSE_VSC73XX_SPI
 	tristate "Vitesse VSC7385/7388/7395/7398 SPI mode support"
-	depends on NET_DSA
 	depends on SPI
 	select NET_DSA_VITESSE_VSC73XX
 	help
@@ -129,7 +121,6 @@ config NET_DSA_VITESSE_VSC73XX_SPI
 
 config NET_DSA_VITESSE_VSC73XX_PLATFORM
 	tristate "Vitesse VSC7385/7388/7395/7398 Platform mode support"
-	depends on NET_DSA
 	depends on HAS_IOMEM
 	select NET_DSA_VITESSE_VSC73XX
 	help
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index aaf8a452fd5b..8746b07668ae 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -1,15 +1,10 @@
 # SPDX-License-Identifier: GPL-2.0-only
-config HAVE_NET_DSA
-	def_bool y
-	depends on INET && NETDEVICES && !S390
-
-# Drivers must select NET_DSA and the appropriate tagging format
 
 menuconfig NET_DSA
 	tristate "Distributed Switch Architecture"
-	depends on HAVE_NET_DSA
 	depends on BRIDGE || BRIDGE=n
 	depends on HSR || HSR=n
+	depends on INET && NETDEVICES
 	select GRO_CELLS
 	select NET_SWITCHDEV
 	select PHYLINK
@@ -20,7 +15,8 @@ menuconfig NET_DSA
 
 if NET_DSA
 
-# tagging formats
+# Drivers must select the appropriate tagging format(s)
+
 config NET_DSA_TAG_8021Q
 	tristate
 	select VLAN_8021Q
-- 
cgit v1.2.3


From f57bac3c33e761fdd78fef159fdc677056c706d0 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Sun, 21 Mar 2021 22:48:49 +0900
Subject: netdev: add netdev_queue_set_dql_min_limit()

Add a function to set the dynamic queue limit minimum value.

Some specific drivers might have legitimate reasons to configure
dql.min_limit to a given value. Typically, this is the case when the
PDU of the protocol is smaller than the packet size to used to
carry those frames to the device.

Concrete example: a CAN (Control Area Network) device with an USB 2.0
interface.  The PDU of classical CAN protocol are roughly 16 bytes but
the USB packet size (which is used to carry the CAN frames to the
device) might be up to 512 bytes.  Wen small traffic burst occurs, BQL
algorithm is not able to immediately adjust and this would result in
having to send many small USB packets (i.e packet of 16 bytes for each
CAN frame). Filling up the USB packet with CAN frames is relatively
fast (small latency issue) but the gain of not having to send several
small USB packets is huge (big throughput increase). In this case,
forcing dql.min_limit to a given value that would allow to stuff the
USB packet is always a win.

This function is to be used by network drivers which are able to prove
through a rationale and through empirical tests on several environment
(with other applications, heavy context switching, virtualization...),
that they constantly reach better performances with a specific
predefined dql.min_limit value with no noticeable latency impact.

Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8f003955c485..33b8ea08996e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3446,6 +3446,24 @@ netif_xmit_frozen_or_drv_stopped(const struct netdev_queue *dev_queue)
 	return dev_queue->state & QUEUE_STATE_DRV_XOFF_OR_FROZEN;
 }
 
+/**
+ *	netdev_queue_set_dql_min_limit - set dql minimum limit
+ *	@dev_queue: pointer to transmit queue
+ *	@min_limit: dql minimum limit
+ *
+ * Forces xmit_more() to return true until the minimum threshold
+ * defined by @min_limit is reached (or until the tx queue is
+ * empty). Warning: to be use with care, misuse will impact the
+ * latency.
+ */
+static inline void netdev_queue_set_dql_min_limit(struct netdev_queue *dev_queue,
+						  unsigned int min_limit)
+{
+#ifdef CONFIG_BQL
+	dev_queue->dql.min_limit = min_limit;
+#endif
+}
+
 /**
  *	netdev_txq_bql_enqueue_prefetchw - prefetch bql data for write
  *	@dev_queue: pointer to transmit queue
-- 
cgit v1.2.3


From 6215afcb9a7e35cef334dc0ae7f998cc72c8465f Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sun, 21 Mar 2021 23:05:48 +0200
Subject: net/sched: cls_flower: use ntohs for struct flow_dissector_key_ports

A make W=1 build complains that:

net/sched/cls_flower.c:214:20: warning: cast from restricted __be16
net/sched/cls_flower.c:214:20: warning: incorrect type in argument 1 (different base types)
net/sched/cls_flower.c:214:20:    expected unsigned short [usertype] val
net/sched/cls_flower.c:214:20:    got restricted __be16 [usertype] dst

This is because we use htons on struct flow_dissector_key_ports members
src and dst, which are defined as __be16, so they are already in network
byte order, not host. The byte swap function for the other direction
should have been used.

Because htons and ntohs do the same thing (either both swap, or none
does), this change has no functional effect except to silence the
warnings.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_flower.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index d097b5c15faa..832a0ece6dbf 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -209,16 +209,16 @@ static bool fl_range_port_dst_cmp(struct cls_fl_filter *filter,
 				  struct fl_flow_key *key,
 				  struct fl_flow_key *mkey)
 {
-	__be16 min_mask, max_mask, min_val, max_val;
+	u16 min_mask, max_mask, min_val, max_val;
 
-	min_mask = htons(filter->mask->key.tp_range.tp_min.dst);
-	max_mask = htons(filter->mask->key.tp_range.tp_max.dst);
-	min_val = htons(filter->key.tp_range.tp_min.dst);
-	max_val = htons(filter->key.tp_range.tp_max.dst);
+	min_mask = ntohs(filter->mask->key.tp_range.tp_min.dst);
+	max_mask = ntohs(filter->mask->key.tp_range.tp_max.dst);
+	min_val = ntohs(filter->key.tp_range.tp_min.dst);
+	max_val = ntohs(filter->key.tp_range.tp_max.dst);
 
 	if (min_mask && max_mask) {
-		if (htons(key->tp_range.tp.dst) < min_val ||
-		    htons(key->tp_range.tp.dst) > max_val)
+		if (ntohs(key->tp_range.tp.dst) < min_val ||
+		    ntohs(key->tp_range.tp.dst) > max_val)
 			return false;
 
 		/* skb does not have min and max values */
@@ -232,16 +232,16 @@ static bool fl_range_port_src_cmp(struct cls_fl_filter *filter,
 				  struct fl_flow_key *key,
 				  struct fl_flow_key *mkey)
 {
-	__be16 min_mask, max_mask, min_val, max_val;
+	u16 min_mask, max_mask, min_val, max_val;
 
-	min_mask = htons(filter->mask->key.tp_range.tp_min.src);
-	max_mask = htons(filter->mask->key.tp_range.tp_max.src);
-	min_val = htons(filter->key.tp_range.tp_min.src);
-	max_val = htons(filter->key.tp_range.tp_max.src);
+	min_mask = ntohs(filter->mask->key.tp_range.tp_min.src);
+	max_mask = ntohs(filter->mask->key.tp_range.tp_max.src);
+	min_val = ntohs(filter->key.tp_range.tp_min.src);
+	max_val = ntohs(filter->key.tp_range.tp_max.src);
 
 	if (min_mask && max_mask) {
-		if (htons(key->tp_range.tp.src) < min_val ||
-		    htons(key->tp_range.tp.src) > max_val)
+		if (ntohs(key->tp_range.tp.src) < min_val ||
+		    ntohs(key->tp_range.tp.src) > max_val)
 			return false;
 
 		/* skb does not have min and max values */
@@ -783,16 +783,16 @@ static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key,
 		       TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.src));
 
 	if (mask->tp_range.tp_min.dst && mask->tp_range.tp_max.dst &&
-	    htons(key->tp_range.tp_max.dst) <=
-	    htons(key->tp_range.tp_min.dst)) {
+	    ntohs(key->tp_range.tp_max.dst) <=
+	    ntohs(key->tp_range.tp_min.dst)) {
 		NL_SET_ERR_MSG_ATTR(extack,
 				    tb[TCA_FLOWER_KEY_PORT_DST_MIN],
 				    "Invalid destination port range (min must be strictly smaller than max)");
 		return -EINVAL;
 	}
 	if (mask->tp_range.tp_min.src && mask->tp_range.tp_max.src &&
-	    htons(key->tp_range.tp_max.src) <=
-	    htons(key->tp_range.tp_min.src)) {
+	    ntohs(key->tp_range.tp_max.src) <=
+	    ntohs(key->tp_range.tp_min.src)) {
 		NL_SET_ERR_MSG_ATTR(extack,
 				    tb[TCA_FLOWER_KEY_PORT_SRC_MIN],
 				    "Invalid source port range (min must be strictly smaller than max)");
-- 
cgit v1.2.3


From abee13f53e889024401de913ff46a6997a866b0c Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sun, 21 Mar 2021 23:05:49 +0200
Subject: net/sched: cls_flower: use nla_get_be32 for TCA_FLOWER_KEY_FLAGS

The existing code is functionally correct: iproute2 parses the ip_flags
argument for tc-flower and really packs it as big endian into the
TCA_FLOWER_KEY_FLAGS netlink attribute. But there is a problem in the
fact that W=1 builds complain:

net/sched/cls_flower.c:1047:15: warning: cast to restricted __be32

This is because we should use the dedicated helper for obtaining a
__be32 pointer to the netlink attribute, not a u32 one. This ensures
type correctness for be32_to_cpu.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_flower.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 832a0ece6dbf..9736df97e04d 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1044,8 +1044,8 @@ static int fl_set_key_flags(struct nlattr **tb, u32 *flags_key,
 		return -EINVAL;
 	}
 
-	key = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS]));
-	mask = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS_MASK]));
+	key = be32_to_cpu(nla_get_be32(tb[TCA_FLOWER_KEY_FLAGS]));
+	mask = be32_to_cpu(nla_get_be32(tb[TCA_FLOWER_KEY_FLAGS_MASK]));
 
 	*flags_key  = 0;
 	*flags_mask = 0;
-- 
cgit v1.2.3


From fd42327f31bbe72e445b400bb35df1e803ae4aaf Mon Sep 17 00:00:00 2001
From: Cristian Ciocaltea <cristian.ciocaltea@gmail.com>
Date: Mon, 22 Mar 2021 01:29:43 +0200
Subject: dt-bindings: net: Add Actions Semi Owl Ethernet MAC binding

Add devicetree binding for the Ethernet MAC present on the Actions
Semi Owl family of SoCs.

For the moment advertise only the support for the Actions Semi S500 SoC
variant.

Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/actions,owl-emac.yaml  | 92 ++++++++++++++++++++++
 1 file changed, 92 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/actions,owl-emac.yaml

diff --git a/Documentation/devicetree/bindings/net/actions,owl-emac.yaml b/Documentation/devicetree/bindings/net/actions,owl-emac.yaml
new file mode 100644
index 000000000000..1626e0a821b0
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/actions,owl-emac.yaml
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/actions,owl-emac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Actions Semi Owl SoCs Ethernet MAC Controller
+
+maintainers:
+  - Cristian Ciocaltea <cristian.ciocaltea@gmail.com>
+
+description: |
+  This Ethernet MAC is used on the Owl family of SoCs from Actions Semi.
+  It provides the RMII and SMII interfaces and is compliant with the
+  IEEE 802.3 CSMA/CD standard, supporting both half-duplex and full-duplex
+  operation modes at 10/100 Mb/s data transfer rates.
+
+allOf:
+  - $ref: "ethernet-controller.yaml#"
+
+properties:
+  compatible:
+    oneOf:
+      - const: actions,owl-emac
+      - items:
+          - enum:
+              - actions,s500-emac
+          - const: actions,owl-emac
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    minItems: 2
+    maxItems: 2
+
+  clock-names:
+    additionalItems: false
+    items:
+      - const: eth
+      - const: rmii
+
+  resets:
+    maxItems: 1
+
+  actions,ethcfg:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      Phandle to the device containing custom config.
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - resets
+  - phy-mode
+  - phy-handle
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/actions,s500-cmu.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/reset/actions,s500-reset.h>
+
+    ethernet@b0310000 {
+        compatible = "actions,s500-emac", "actions,owl-emac";
+        reg = <0xb0310000 0x10000>;
+        interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&cmu 59 /*CLK_ETHERNET*/>, <&cmu CLK_RMII_REF>;
+        clock-names = "eth", "rmii";
+        resets = <&cmu RESET_ETHERNET>;
+        phy-mode = "rmii";
+        phy-handle = <&eth_phy>;
+
+        mdio {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            eth_phy: ethernet-phy@3 {
+                reg = <0x3>;
+                interrupt-parent = <&sirq>;
+                interrupts = <0 IRQ_TYPE_LEVEL_LOW>;
+            };
+        };
+    };
-- 
cgit v1.2.3


From de6e0b198239857943db395377dc1d2ddd6c05df Mon Sep 17 00:00:00 2001
From: Cristian Ciocaltea <cristian.ciocaltea@gmail.com>
Date: Mon, 22 Mar 2021 01:29:44 +0200
Subject: net: ethernet: actions: Add Actions Semi Owl Ethernet MAC driver

Add new driver for the Ethernet MAC used on the Actions Semi Owl
family of SoCs.

Currently this has been tested only on the Actions Semi S500 SoC
variant.

Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/Kconfig            |    1 +
 drivers/net/ethernet/Makefile           |    1 +
 drivers/net/ethernet/actions/Kconfig    |   26 +
 drivers/net/ethernet/actions/Makefile   |    6 +
 drivers/net/ethernet/actions/owl-emac.c | 1625 +++++++++++++++++++++++++++++++
 drivers/net/ethernet/actions/owl-emac.h |  280 ++++++
 6 files changed, 1939 insertions(+)
 create mode 100644 drivers/net/ethernet/actions/Kconfig
 create mode 100644 drivers/net/ethernet/actions/Makefile
 create mode 100644 drivers/net/ethernet/actions/owl-emac.c
 create mode 100644 drivers/net/ethernet/actions/owl-emac.h

diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index ad04660b97b8..4b85f2b74872 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -19,6 +19,7 @@ config SUNGEM_PHY
 	tristate
 
 source "drivers/net/ethernet/3com/Kconfig"
+source "drivers/net/ethernet/actions/Kconfig"
 source "drivers/net/ethernet/adaptec/Kconfig"
 source "drivers/net/ethernet/aeroflex/Kconfig"
 source "drivers/net/ethernet/agere/Kconfig"
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index 1e7dc8a7762d..9394493e8187 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -5,6 +5,7 @@
 
 obj-$(CONFIG_NET_VENDOR_3COM) += 3com/
 obj-$(CONFIG_NET_VENDOR_8390) += 8390/
+obj-$(CONFIG_NET_VENDOR_ACTIONS) += actions/
 obj-$(CONFIG_NET_VENDOR_ADAPTEC) += adaptec/
 obj-$(CONFIG_GRETH) += aeroflex/
 obj-$(CONFIG_NET_VENDOR_AGERE) += agere/
diff --git a/drivers/net/ethernet/actions/Kconfig b/drivers/net/ethernet/actions/Kconfig
new file mode 100644
index 000000000000..ccad6a3f4d6f
--- /dev/null
+++ b/drivers/net/ethernet/actions/Kconfig
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config NET_VENDOR_ACTIONS
+	bool "Actions Semi devices"
+	default y
+	depends on ARCH_ACTIONS
+	help
+	  If you have a network (Ethernet) card belonging to this class, say Y.
+
+	  Note that the answer to this question doesn't directly affect the
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about Actions Semi devices.  If you say Y, you will be
+	  asked for your specific card in the following questions.
+
+if NET_VENDOR_ACTIONS
+
+config OWL_EMAC
+	tristate "Actions Semi Owl Ethernet MAC support"
+	select PHYLIB
+	help
+	  This driver supports the Actions Semi Ethernet Media Access
+	  Controller (EMAC) found on the S500 and S900 SoCs.  The controller
+	  is compliant with the IEEE 802.3 CSMA/CD standard and supports
+	  both half-duplex and full-duplex operation modes at 10/100 Mb/s.
+
+endif # NET_VENDOR_ACTIONS
diff --git a/drivers/net/ethernet/actions/Makefile b/drivers/net/ethernet/actions/Makefile
new file mode 100644
index 000000000000..fde8001d538a
--- /dev/null
+++ b/drivers/net/ethernet/actions/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for the Actions Semi Owl SoCs built-in ethernet macs
+#
+
+obj-$(CONFIG_OWL_EMAC) += owl-emac.o
diff --git a/drivers/net/ethernet/actions/owl-emac.c b/drivers/net/ethernet/actions/owl-emac.c
new file mode 100644
index 000000000000..b8e771c2bc40
--- /dev/null
+++ b/drivers/net/ethernet/actions/owl-emac.c
@@ -0,0 +1,1625 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Actions Semi Owl SoCs Ethernet MAC driver
+ *
+ * Copyright (c) 2012 Actions Semi Inc.
+ * Copyright (c) 2021 Cristian Ciocaltea <cristian.ciocaltea@gmail.com>
+ */
+
+#include <linux/circ_buf.h>
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/etherdevice.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/reset.h>
+
+#include "owl-emac.h"
+
+#define OWL_EMAC_DEFAULT_MSG_ENABLE	(NETIF_MSG_DRV | \
+					 NETIF_MSG_PROBE | \
+					 NETIF_MSG_LINK)
+
+static u32 owl_emac_reg_read(struct owl_emac_priv *priv, u32 reg)
+{
+	return readl(priv->base + reg);
+}
+
+static void owl_emac_reg_write(struct owl_emac_priv *priv, u32 reg, u32 data)
+{
+	writel(data, priv->base + reg);
+}
+
+static u32 owl_emac_reg_update(struct owl_emac_priv *priv,
+			       u32 reg, u32 mask, u32 val)
+{
+	u32 data, old_val;
+
+	data = owl_emac_reg_read(priv, reg);
+	old_val = data & mask;
+
+	data &= ~mask;
+	data |= val & mask;
+
+	owl_emac_reg_write(priv, reg, data);
+
+	return old_val;
+}
+
+static void owl_emac_reg_set(struct owl_emac_priv *priv, u32 reg, u32 bits)
+{
+	owl_emac_reg_update(priv, reg, bits, bits);
+}
+
+static void owl_emac_reg_clear(struct owl_emac_priv *priv, u32 reg, u32 bits)
+{
+	owl_emac_reg_update(priv, reg, bits, 0);
+}
+
+static struct device *owl_emac_get_dev(struct owl_emac_priv *priv)
+{
+	return priv->netdev->dev.parent;
+}
+
+static void owl_emac_irq_enable(struct owl_emac_priv *priv)
+{
+	/* Enable all interrupts except TU.
+	 *
+	 * Note the NIE and AIE bits shall also be set in order to actually
+	 * enable the selected interrupts.
+	 */
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR7,
+			   OWL_EMAC_BIT_MAC_CSR7_NIE |
+			   OWL_EMAC_BIT_MAC_CSR7_AIE |
+			   OWL_EMAC_BIT_MAC_CSR7_ALL_NOT_TUE);
+}
+
+static void owl_emac_irq_disable(struct owl_emac_priv *priv)
+{
+	/* Disable all interrupts.
+	 *
+	 * WARNING: Unset only the NIE and AIE bits in CSR7 to workaround an
+	 * unexpected side effect (MAC hardware bug?!) where some bits in the
+	 * status register (CSR5) are cleared automatically before being able
+	 * to read them via owl_emac_irq_clear().
+	 */
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR7,
+			   OWL_EMAC_BIT_MAC_CSR7_ALL_NOT_TUE);
+}
+
+static u32 owl_emac_irq_status(struct owl_emac_priv *priv)
+{
+	return owl_emac_reg_read(priv, OWL_EMAC_REG_MAC_CSR5);
+}
+
+static u32 owl_emac_irq_clear(struct owl_emac_priv *priv)
+{
+	u32 val = owl_emac_irq_status(priv);
+
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR5, val);
+
+	return val;
+}
+
+static dma_addr_t owl_emac_dma_map_rx(struct owl_emac_priv *priv,
+				      struct sk_buff *skb)
+{
+	struct device *dev = owl_emac_get_dev(priv);
+
+	/* Buffer pointer for the RX DMA descriptor must be word aligned. */
+	return dma_map_single(dev, skb_tail_pointer(skb),
+			      skb_tailroom(skb), DMA_FROM_DEVICE);
+}
+
+static void owl_emac_dma_unmap_rx(struct owl_emac_priv *priv,
+				  struct sk_buff *skb, dma_addr_t dma_addr)
+{
+	struct device *dev = owl_emac_get_dev(priv);
+
+	dma_unmap_single(dev, dma_addr, skb_tailroom(skb), DMA_FROM_DEVICE);
+}
+
+static dma_addr_t owl_emac_dma_map_tx(struct owl_emac_priv *priv,
+				      struct sk_buff *skb)
+{
+	struct device *dev = owl_emac_get_dev(priv);
+
+	return dma_map_single(dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE);
+}
+
+static void owl_emac_dma_unmap_tx(struct owl_emac_priv *priv,
+				  struct sk_buff *skb, dma_addr_t dma_addr)
+{
+	struct device *dev = owl_emac_get_dev(priv);
+
+	dma_unmap_single(dev, dma_addr, skb_headlen(skb), DMA_TO_DEVICE);
+}
+
+static unsigned int owl_emac_ring_num_unused(struct owl_emac_ring *ring)
+{
+	return CIRC_SPACE(ring->head, ring->tail, ring->size);
+}
+
+static unsigned int owl_emac_ring_get_next(struct owl_emac_ring *ring,
+					   unsigned int cur)
+{
+	return (cur + 1) & (ring->size - 1);
+}
+
+static void owl_emac_ring_push_head(struct owl_emac_ring *ring)
+{
+	ring->head = owl_emac_ring_get_next(ring, ring->head);
+}
+
+static void owl_emac_ring_pop_tail(struct owl_emac_ring *ring)
+{
+	ring->tail = owl_emac_ring_get_next(ring, ring->tail);
+}
+
+static struct sk_buff *owl_emac_alloc_skb(struct net_device *netdev)
+{
+	struct sk_buff *skb;
+	int offset;
+
+	skb = netdev_alloc_skb(netdev, OWL_EMAC_RX_FRAME_MAX_LEN +
+			       OWL_EMAC_SKB_RESERVE);
+	if (unlikely(!skb))
+		return NULL;
+
+	/* Ensure 4 bytes DMA alignment. */
+	offset = ((uintptr_t)skb->data) & (OWL_EMAC_SKB_ALIGN - 1);
+	if (unlikely(offset))
+		skb_reserve(skb, OWL_EMAC_SKB_ALIGN - offset);
+
+	return skb;
+}
+
+static int owl_emac_ring_prepare_rx(struct owl_emac_priv *priv)
+{
+	struct owl_emac_ring *ring = &priv->rx_ring;
+	struct device *dev = owl_emac_get_dev(priv);
+	struct net_device *netdev = priv->netdev;
+	struct owl_emac_ring_desc *desc;
+	struct sk_buff *skb;
+	dma_addr_t dma_addr;
+	int i;
+
+	for (i = 0; i < ring->size; i++) {
+		skb = owl_emac_alloc_skb(netdev);
+		if (!skb)
+			return -ENOMEM;
+
+		dma_addr = owl_emac_dma_map_rx(priv, skb);
+		if (dma_mapping_error(dev, dma_addr)) {
+			dev_kfree_skb(skb);
+			return -ENOMEM;
+		}
+
+		desc = &ring->descs[i];
+		desc->status = OWL_EMAC_BIT_RDES0_OWN;
+		desc->control = skb_tailroom(skb) & OWL_EMAC_MSK_RDES1_RBS1;
+		desc->buf_addr = dma_addr;
+		desc->reserved = 0;
+
+		ring->skbs[i] = skb;
+		ring->skbs_dma[i] = dma_addr;
+	}
+
+	desc->control |= OWL_EMAC_BIT_RDES1_RER;
+
+	ring->head = 0;
+	ring->tail = 0;
+
+	return 0;
+}
+
+static void owl_emac_ring_prepare_tx(struct owl_emac_priv *priv)
+{
+	struct owl_emac_ring *ring = &priv->tx_ring;
+	struct owl_emac_ring_desc *desc;
+	int i;
+
+	for (i = 0; i < ring->size; i++) {
+		desc = &ring->descs[i];
+
+		desc->status = 0;
+		desc->control = OWL_EMAC_BIT_TDES1_IC;
+		desc->buf_addr = 0;
+		desc->reserved = 0;
+	}
+
+	desc->control |= OWL_EMAC_BIT_TDES1_TER;
+
+	memset(ring->skbs_dma, 0, sizeof(dma_addr_t) * ring->size);
+
+	ring->head = 0;
+	ring->tail = 0;
+}
+
+static void owl_emac_ring_unprepare_rx(struct owl_emac_priv *priv)
+{
+	struct owl_emac_ring *ring = &priv->rx_ring;
+	int i;
+
+	for (i = 0; i < ring->size; i++) {
+		ring->descs[i].status = 0;
+
+		if (!ring->skbs_dma[i])
+			continue;
+
+		owl_emac_dma_unmap_rx(priv, ring->skbs[i], ring->skbs_dma[i]);
+		ring->skbs_dma[i] = 0;
+
+		dev_kfree_skb(ring->skbs[i]);
+		ring->skbs[i] = NULL;
+	}
+}
+
+static void owl_emac_ring_unprepare_tx(struct owl_emac_priv *priv)
+{
+	struct owl_emac_ring *ring = &priv->tx_ring;
+	int i;
+
+	for (i = 0; i < ring->size; i++) {
+		ring->descs[i].status = 0;
+
+		if (!ring->skbs_dma[i])
+			continue;
+
+		owl_emac_dma_unmap_tx(priv, ring->skbs[i], ring->skbs_dma[i]);
+		ring->skbs_dma[i] = 0;
+
+		dev_kfree_skb(ring->skbs[i]);
+		ring->skbs[i] = NULL;
+	}
+}
+
+static int owl_emac_ring_alloc(struct device *dev, struct owl_emac_ring *ring,
+			       unsigned int size)
+{
+	ring->descs = dmam_alloc_coherent(dev,
+					  sizeof(struct owl_emac_ring_desc) * size,
+					  &ring->descs_dma, GFP_KERNEL);
+	if (!ring->descs)
+		return -ENOMEM;
+
+	ring->skbs = devm_kcalloc(dev, size, sizeof(struct sk_buff *),
+				  GFP_KERNEL);
+	if (!ring->skbs)
+		return -ENOMEM;
+
+	ring->skbs_dma = devm_kcalloc(dev, size, sizeof(dma_addr_t),
+				      GFP_KERNEL);
+	if (!ring->skbs_dma)
+		return -ENOMEM;
+
+	ring->size = size;
+
+	return 0;
+}
+
+static void owl_emac_dma_cmd_resume_rx(struct owl_emac_priv *priv)
+{
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR2,
+			   OWL_EMAC_VAL_MAC_CSR2_RPD);
+}
+
+static void owl_emac_dma_cmd_resume_tx(struct owl_emac_priv *priv)
+{
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR1,
+			   OWL_EMAC_VAL_MAC_CSR1_TPD);
+}
+
+static u32 owl_emac_dma_cmd_set_tx(struct owl_emac_priv *priv, u32 status)
+{
+	return owl_emac_reg_update(priv, OWL_EMAC_REG_MAC_CSR6,
+				   OWL_EMAC_BIT_MAC_CSR6_ST, status);
+}
+
+static u32 owl_emac_dma_cmd_start_tx(struct owl_emac_priv *priv)
+{
+	return owl_emac_dma_cmd_set_tx(priv, ~0);
+}
+
+static u32 owl_emac_dma_cmd_set(struct owl_emac_priv *priv, u32 status)
+{
+	return owl_emac_reg_update(priv, OWL_EMAC_REG_MAC_CSR6,
+				   OWL_EMAC_MSK_MAC_CSR6_STSR, status);
+}
+
+static u32 owl_emac_dma_cmd_start(struct owl_emac_priv *priv)
+{
+	return owl_emac_dma_cmd_set(priv, ~0);
+}
+
+static u32 owl_emac_dma_cmd_stop(struct owl_emac_priv *priv)
+{
+	return owl_emac_dma_cmd_set(priv, 0);
+}
+
+static void owl_emac_set_hw_mac_addr(struct net_device *netdev)
+{
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+	u8 *mac_addr = netdev->dev_addr;
+	u32 addr_high, addr_low;
+
+	addr_high = mac_addr[0] << 8 | mac_addr[1];
+	addr_low = mac_addr[2] << 24 | mac_addr[3] << 16 |
+		   mac_addr[4] << 8 | mac_addr[5];
+
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR17, addr_high);
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR16, addr_low);
+}
+
+static void owl_emac_update_link_state(struct owl_emac_priv *priv)
+{
+	u32 val, status;
+
+	if (priv->pause) {
+		val = OWL_EMAC_BIT_MAC_CSR20_FCE | OWL_EMAC_BIT_MAC_CSR20_TUE;
+		val |= OWL_EMAC_BIT_MAC_CSR20_TPE | OWL_EMAC_BIT_MAC_CSR20_RPE;
+		val |= OWL_EMAC_BIT_MAC_CSR20_BPE;
+	} else {
+		val = 0;
+	}
+
+	/* Update flow control. */
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR20, val);
+
+	val = (priv->speed == SPEED_100) ? OWL_EMAC_VAL_MAC_CSR6_SPEED_100M :
+					   OWL_EMAC_VAL_MAC_CSR6_SPEED_10M;
+	val <<= OWL_EMAC_OFF_MAC_CSR6_SPEED;
+
+	if (priv->duplex == DUPLEX_FULL)
+		val |= OWL_EMAC_BIT_MAC_CSR6_FD;
+
+	spin_lock_bh(&priv->lock);
+
+	/* Temporarily stop DMA TX & RX. */
+	status = owl_emac_dma_cmd_stop(priv);
+
+	/* Update operation modes. */
+	owl_emac_reg_update(priv, OWL_EMAC_REG_MAC_CSR6,
+			    OWL_EMAC_MSK_MAC_CSR6_SPEED |
+			    OWL_EMAC_BIT_MAC_CSR6_FD, val);
+
+	/* Restore DMA TX & RX status. */
+	owl_emac_dma_cmd_set(priv, status);
+
+	spin_unlock_bh(&priv->lock);
+}
+
+static void owl_emac_adjust_link(struct net_device *netdev)
+{
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+	struct phy_device *phydev = netdev->phydev;
+	bool state_changed = false;
+
+	if (phydev->link) {
+		if (!priv->link) {
+			priv->link = phydev->link;
+			state_changed = true;
+		}
+
+		if (priv->speed != phydev->speed) {
+			priv->speed = phydev->speed;
+			state_changed = true;
+		}
+
+		if (priv->duplex != phydev->duplex) {
+			priv->duplex = phydev->duplex;
+			state_changed = true;
+		}
+
+		if (priv->pause != phydev->pause) {
+			priv->pause = phydev->pause;
+			state_changed = true;
+		}
+	} else {
+		if (priv->link) {
+			priv->link = phydev->link;
+			state_changed = true;
+		}
+	}
+
+	if (state_changed) {
+		if (phydev->link)
+			owl_emac_update_link_state(priv);
+
+		if (netif_msg_link(priv))
+			phy_print_status(phydev);
+	}
+}
+
+static irqreturn_t owl_emac_handle_irq(int irq, void *data)
+{
+	struct net_device *netdev = data;
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+
+	if (netif_running(netdev)) {
+		owl_emac_irq_disable(priv);
+		napi_schedule(&priv->napi);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void owl_emac_ether_addr_push(u8 **dst, const u8 *src)
+{
+	u32 *a = (u32 *)(*dst);
+	const u16 *b = (const u16 *)src;
+
+	a[0] = b[0];
+	a[1] = b[1];
+	a[2] = b[2];
+
+	*dst += 12;
+}
+
+static void
+owl_emac_setup_frame_prepare(struct owl_emac_priv *priv, struct sk_buff *skb)
+{
+	const u8 bcast_addr[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
+	const u8 *mac_addr = priv->netdev->dev_addr;
+	u8 *frame;
+	int i;
+
+	skb_put(skb, OWL_EMAC_SETUP_FRAME_LEN);
+
+	frame = skb->data;
+	memset(frame, 0, skb->len);
+
+	owl_emac_ether_addr_push(&frame, mac_addr);
+	owl_emac_ether_addr_push(&frame, bcast_addr);
+
+	/* Fill multicast addresses. */
+	WARN_ON(priv->mcaddr_list.count >= OWL_EMAC_MAX_MULTICAST_ADDRS);
+	for (i = 0; i < priv->mcaddr_list.count; i++) {
+		mac_addr = priv->mcaddr_list.addrs[i];
+		owl_emac_ether_addr_push(&frame, mac_addr);
+	}
+}
+
+/* The setup frame is a special descriptor which is used to provide physical
+ * addresses (i.e. mac, broadcast and multicast) to the MAC hardware for
+ * filtering purposes. To be recognized as a setup frame, the TDES1_SET bit
+ * must be set in the TX descriptor control field.
+ */
+static int owl_emac_setup_frame_xmit(struct owl_emac_priv *priv)
+{
+	struct owl_emac_ring *ring = &priv->tx_ring;
+	struct net_device *netdev = priv->netdev;
+	struct owl_emac_ring_desc *desc;
+	struct sk_buff *skb;
+	unsigned int tx_head;
+	u32 status, control;
+	dma_addr_t dma_addr;
+	int ret;
+
+	skb = owl_emac_alloc_skb(netdev);
+	if (!skb)
+		return -ENOMEM;
+
+	owl_emac_setup_frame_prepare(priv, skb);
+
+	dma_addr = owl_emac_dma_map_tx(priv, skb);
+	if (dma_mapping_error(owl_emac_get_dev(priv), dma_addr)) {
+		ret = -ENOMEM;
+		goto err_free_skb;
+	}
+
+	spin_lock_bh(&priv->lock);
+
+	tx_head = ring->head;
+	desc = &ring->descs[tx_head];
+
+	status = READ_ONCE(desc->status);
+	control = READ_ONCE(desc->control);
+	dma_rmb(); /* Ensure data has been read before used. */
+
+	if (unlikely(status & OWL_EMAC_BIT_TDES0_OWN) ||
+	    !owl_emac_ring_num_unused(ring)) {
+		spin_unlock_bh(&priv->lock);
+		owl_emac_dma_unmap_tx(priv, skb, dma_addr);
+		ret = -EBUSY;
+		goto err_free_skb;
+	}
+
+	ring->skbs[tx_head] = skb;
+	ring->skbs_dma[tx_head] = dma_addr;
+
+	control &= OWL_EMAC_BIT_TDES1_IC | OWL_EMAC_BIT_TDES1_TER; /* Maintain bits */
+	control |= OWL_EMAC_BIT_TDES1_SET;
+	control |= OWL_EMAC_MSK_TDES1_TBS1 & skb->len;
+
+	WRITE_ONCE(desc->control, control);
+	WRITE_ONCE(desc->buf_addr, dma_addr);
+	dma_wmb(); /* Flush descriptor before changing ownership. */
+	WRITE_ONCE(desc->status, OWL_EMAC_BIT_TDES0_OWN);
+
+	owl_emac_ring_push_head(ring);
+
+	/* Temporarily enable DMA TX. */
+	status = owl_emac_dma_cmd_start_tx(priv);
+
+	/* Trigger setup frame processing. */
+	owl_emac_dma_cmd_resume_tx(priv);
+
+	/* Restore DMA TX status. */
+	owl_emac_dma_cmd_set_tx(priv, status);
+
+	/* Stop regular TX until setup frame is processed. */
+	netif_stop_queue(netdev);
+
+	spin_unlock_bh(&priv->lock);
+
+	return 0;
+
+err_free_skb:
+	dev_kfree_skb(skb);
+	return ret;
+}
+
+static netdev_tx_t owl_emac_ndo_start_xmit(struct sk_buff *skb,
+					   struct net_device *netdev)
+{
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+	struct device *dev = owl_emac_get_dev(priv);
+	struct owl_emac_ring *ring = &priv->tx_ring;
+	struct owl_emac_ring_desc *desc;
+	unsigned int tx_head;
+	u32 status, control;
+	dma_addr_t dma_addr;
+
+	dma_addr = owl_emac_dma_map_tx(priv, skb);
+	if (dma_mapping_error(dev, dma_addr)) {
+		dev_err_ratelimited(&netdev->dev, "TX DMA mapping failed\n");
+		dev_kfree_skb(skb);
+		netdev->stats.tx_dropped++;
+		return NETDEV_TX_OK;
+	}
+
+	spin_lock_bh(&priv->lock);
+
+	tx_head = ring->head;
+	desc = &ring->descs[tx_head];
+
+	status = READ_ONCE(desc->status);
+	control = READ_ONCE(desc->control);
+	dma_rmb(); /* Ensure data has been read before used. */
+
+	if (!owl_emac_ring_num_unused(ring) ||
+	    unlikely(status & OWL_EMAC_BIT_TDES0_OWN)) {
+		netif_stop_queue(netdev);
+		spin_unlock_bh(&priv->lock);
+
+		dev_dbg_ratelimited(&netdev->dev, "TX buffer full, status=0x%08x\n",
+				    owl_emac_irq_status(priv));
+		owl_emac_dma_unmap_tx(priv, skb, dma_addr);
+		netdev->stats.tx_dropped++;
+		return NETDEV_TX_BUSY;
+	}
+
+	ring->skbs[tx_head] = skb;
+	ring->skbs_dma[tx_head] = dma_addr;
+
+	control &= OWL_EMAC_BIT_TDES1_IC | OWL_EMAC_BIT_TDES1_TER; /* Maintain bits */
+	control |= OWL_EMAC_BIT_TDES1_FS | OWL_EMAC_BIT_TDES1_LS;
+	control |= OWL_EMAC_MSK_TDES1_TBS1 & skb->len;
+
+	WRITE_ONCE(desc->control, control);
+	WRITE_ONCE(desc->buf_addr, dma_addr);
+	dma_wmb(); /* Flush descriptor before changing ownership. */
+	WRITE_ONCE(desc->status, OWL_EMAC_BIT_TDES0_OWN);
+
+	owl_emac_dma_cmd_resume_tx(priv);
+	owl_emac_ring_push_head(ring);
+
+	/* FIXME: The transmission is currently restricted to a single frame
+	 * at a time as a workaround for a MAC hardware bug that causes random
+	 * freeze of the TX queue processor.
+	 */
+	netif_stop_queue(netdev);
+
+	spin_unlock_bh(&priv->lock);
+
+	return NETDEV_TX_OK;
+}
+
+static bool owl_emac_tx_complete_tail(struct owl_emac_priv *priv)
+{
+	struct owl_emac_ring *ring = &priv->tx_ring;
+	struct net_device *netdev = priv->netdev;
+	struct owl_emac_ring_desc *desc;
+	struct sk_buff *skb;
+	unsigned int tx_tail;
+	u32 status;
+
+	tx_tail = ring->tail;
+	desc = &ring->descs[tx_tail];
+
+	status = READ_ONCE(desc->status);
+	dma_rmb(); /* Ensure data has been read before used. */
+
+	if (status & OWL_EMAC_BIT_TDES0_OWN)
+		return false;
+
+	/* Check for errors. */
+	if (status & OWL_EMAC_BIT_TDES0_ES) {
+		dev_dbg_ratelimited(&netdev->dev,
+				    "TX complete error status: 0x%08x\n",
+				    status);
+
+		netdev->stats.tx_errors++;
+
+		if (status & OWL_EMAC_BIT_TDES0_UF)
+			netdev->stats.tx_fifo_errors++;
+
+		if (status & OWL_EMAC_BIT_TDES0_EC)
+			netdev->stats.tx_aborted_errors++;
+
+		if (status & OWL_EMAC_BIT_TDES0_LC)
+			netdev->stats.tx_window_errors++;
+
+		if (status & OWL_EMAC_BIT_TDES0_NC)
+			netdev->stats.tx_heartbeat_errors++;
+
+		if (status & OWL_EMAC_BIT_TDES0_LO)
+			netdev->stats.tx_carrier_errors++;
+	} else {
+		netdev->stats.tx_packets++;
+		netdev->stats.tx_bytes += ring->skbs[tx_tail]->len;
+	}
+
+	/* Some collisions occurred, but pkt has been transmitted. */
+	if (status & OWL_EMAC_BIT_TDES0_DE)
+		netdev->stats.collisions++;
+
+	skb = ring->skbs[tx_tail];
+	owl_emac_dma_unmap_tx(priv, skb, ring->skbs_dma[tx_tail]);
+	dev_kfree_skb(skb);
+
+	ring->skbs[tx_tail] = NULL;
+	ring->skbs_dma[tx_tail] = 0;
+
+	owl_emac_ring_pop_tail(ring);
+
+	if (unlikely(netif_queue_stopped(netdev)))
+		netif_wake_queue(netdev);
+
+	return true;
+}
+
+static void owl_emac_tx_complete(struct owl_emac_priv *priv)
+{
+	struct owl_emac_ring *ring = &priv->tx_ring;
+	struct net_device *netdev = priv->netdev;
+	unsigned int tx_next;
+	u32 status;
+
+	spin_lock(&priv->lock);
+
+	while (ring->tail != ring->head) {
+		if (!owl_emac_tx_complete_tail(priv))
+			break;
+	}
+
+	/* FIXME: This is a workaround for a MAC hardware bug not clearing
+	 * (sometimes) the OWN bit for a transmitted frame descriptor.
+	 *
+	 * At this point, when TX queue is full, the tail descriptor has the
+	 * OWN bit set, which normally means the frame has not been processed
+	 * or transmitted yet. But if there is at least one descriptor in the
+	 * queue having the OWN bit cleared, we can safely assume the tail
+	 * frame has been also processed by the MAC hardware.
+	 *
+	 * If that's the case, let's force the frame completion by manually
+	 * clearing the OWN bit.
+	 */
+	if (unlikely(!owl_emac_ring_num_unused(ring))) {
+		tx_next = ring->tail;
+
+		while ((tx_next = owl_emac_ring_get_next(ring, tx_next)) != ring->head) {
+			status = READ_ONCE(ring->descs[tx_next].status);
+			dma_rmb(); /* Ensure data has been read before used. */
+
+			if (status & OWL_EMAC_BIT_TDES0_OWN)
+				continue;
+
+			netdev_dbg(netdev, "Found uncleared TX desc OWN bit\n");
+
+			status = READ_ONCE(ring->descs[ring->tail].status);
+			dma_rmb(); /* Ensure data has been read before used. */
+			status &= ~OWL_EMAC_BIT_TDES0_OWN;
+			WRITE_ONCE(ring->descs[ring->tail].status, status);
+
+			owl_emac_tx_complete_tail(priv);
+			break;
+		}
+	}
+
+	spin_unlock(&priv->lock);
+}
+
+static int owl_emac_rx_process(struct owl_emac_priv *priv, int budget)
+{
+	struct owl_emac_ring *ring = &priv->rx_ring;
+	struct device *dev = owl_emac_get_dev(priv);
+	struct net_device *netdev = priv->netdev;
+	struct owl_emac_ring_desc *desc;
+	struct sk_buff *curr_skb, *new_skb;
+	dma_addr_t curr_dma, new_dma;
+	unsigned int rx_tail, len;
+	u32 status;
+	int recv = 0;
+
+	while (recv < budget) {
+		spin_lock(&priv->lock);
+
+		rx_tail = ring->tail;
+		desc = &ring->descs[rx_tail];
+
+		status = READ_ONCE(desc->status);
+		dma_rmb(); /* Ensure data has been read before used. */
+
+		if (status & OWL_EMAC_BIT_RDES0_OWN) {
+			spin_unlock(&priv->lock);
+			break;
+		}
+
+		curr_skb = ring->skbs[rx_tail];
+		curr_dma = ring->skbs_dma[rx_tail];
+		owl_emac_ring_pop_tail(ring);
+
+		spin_unlock(&priv->lock);
+
+		if (status & (OWL_EMAC_BIT_RDES0_DE | OWL_EMAC_BIT_RDES0_RF |
+		    OWL_EMAC_BIT_RDES0_TL | OWL_EMAC_BIT_RDES0_CS |
+		    OWL_EMAC_BIT_RDES0_DB | OWL_EMAC_BIT_RDES0_CE |
+		    OWL_EMAC_BIT_RDES0_ZERO)) {
+			dev_dbg_ratelimited(&netdev->dev,
+					    "RX desc error status: 0x%08x\n",
+					    status);
+
+			if (status & OWL_EMAC_BIT_RDES0_DE)
+				netdev->stats.rx_over_errors++;
+
+			if (status & (OWL_EMAC_BIT_RDES0_RF | OWL_EMAC_BIT_RDES0_DB))
+				netdev->stats.rx_frame_errors++;
+
+			if (status & OWL_EMAC_BIT_RDES0_TL)
+				netdev->stats.rx_length_errors++;
+
+			if (status & OWL_EMAC_BIT_RDES0_CS)
+				netdev->stats.collisions++;
+
+			if (status & OWL_EMAC_BIT_RDES0_CE)
+				netdev->stats.rx_crc_errors++;
+
+			if (status & OWL_EMAC_BIT_RDES0_ZERO)
+				netdev->stats.rx_fifo_errors++;
+
+			goto drop_skb;
+		}
+
+		len = (status & OWL_EMAC_MSK_RDES0_FL) >> OWL_EMAC_OFF_RDES0_FL;
+		if (unlikely(len > OWL_EMAC_RX_FRAME_MAX_LEN)) {
+			netdev->stats.rx_length_errors++;
+			netdev_err(netdev, "invalid RX frame len: %u\n", len);
+			goto drop_skb;
+		}
+
+		/* Prepare new skb before receiving the current one. */
+		new_skb = owl_emac_alloc_skb(netdev);
+		if (unlikely(!new_skb))
+			goto drop_skb;
+
+		new_dma = owl_emac_dma_map_rx(priv, new_skb);
+		if (dma_mapping_error(dev, new_dma)) {
+			dev_kfree_skb(new_skb);
+			netdev_err(netdev, "RX DMA mapping failed\n");
+			goto drop_skb;
+		}
+
+		owl_emac_dma_unmap_rx(priv, curr_skb, curr_dma);
+
+		skb_put(curr_skb, len - ETH_FCS_LEN);
+		curr_skb->ip_summed = CHECKSUM_NONE;
+		curr_skb->protocol = eth_type_trans(curr_skb, netdev);
+		curr_skb->dev = netdev;
+
+		netif_receive_skb(curr_skb);
+
+		netdev->stats.rx_packets++;
+		netdev->stats.rx_bytes += len;
+		recv++;
+		goto push_skb;
+
+drop_skb:
+		netdev->stats.rx_dropped++;
+		netdev->stats.rx_errors++;
+		/* Reuse the current skb. */
+		new_skb = curr_skb;
+		new_dma = curr_dma;
+
+push_skb:
+		spin_lock(&priv->lock);
+
+		ring->skbs[ring->head] = new_skb;
+		ring->skbs_dma[ring->head] = new_dma;
+
+		WRITE_ONCE(desc->buf_addr, new_dma);
+		dma_wmb(); /* Flush descriptor before changing ownership. */
+		WRITE_ONCE(desc->status, OWL_EMAC_BIT_RDES0_OWN);
+
+		owl_emac_ring_push_head(ring);
+
+		spin_unlock(&priv->lock);
+	}
+
+	return recv;
+}
+
+static int owl_emac_poll(struct napi_struct *napi, int budget)
+{
+	int work_done = 0, ru_cnt = 0, recv;
+	static int tx_err_cnt, rx_err_cnt;
+	struct owl_emac_priv *priv;
+	u32 status, proc_status;
+
+	priv = container_of(napi, struct owl_emac_priv, napi);
+
+	while ((status = owl_emac_irq_clear(priv)) &
+	       (OWL_EMAC_BIT_MAC_CSR5_NIS | OWL_EMAC_BIT_MAC_CSR5_AIS)) {
+		recv = 0;
+
+		/* TX setup frame raises ETI instead of TI. */
+		if (status & (OWL_EMAC_BIT_MAC_CSR5_TI | OWL_EMAC_BIT_MAC_CSR5_ETI)) {
+			owl_emac_tx_complete(priv);
+			tx_err_cnt = 0;
+
+			/* Count MAC internal RX errors. */
+			proc_status = status & OWL_EMAC_MSK_MAC_CSR5_RS;
+			proc_status >>= OWL_EMAC_OFF_MAC_CSR5_RS;
+			if (proc_status == OWL_EMAC_VAL_MAC_CSR5_RS_DATA ||
+			    proc_status == OWL_EMAC_VAL_MAC_CSR5_RS_CDES ||
+			    proc_status == OWL_EMAC_VAL_MAC_CSR5_RS_FDES)
+				rx_err_cnt++;
+		}
+
+		if (status & OWL_EMAC_BIT_MAC_CSR5_RI) {
+			recv = owl_emac_rx_process(priv, budget - work_done);
+			rx_err_cnt = 0;
+
+			/* Count MAC internal TX errors. */
+			proc_status = status & OWL_EMAC_MSK_MAC_CSR5_TS;
+			proc_status >>= OWL_EMAC_OFF_MAC_CSR5_TS;
+			if (proc_status == OWL_EMAC_VAL_MAC_CSR5_TS_DATA ||
+			    proc_status == OWL_EMAC_VAL_MAC_CSR5_TS_CDES)
+				tx_err_cnt++;
+		} else if (status & OWL_EMAC_BIT_MAC_CSR5_RU) {
+			/* MAC AHB is in suspended state, will return to RX
+			 * descriptor processing when the host changes ownership
+			 * of the descriptor and either an RX poll demand CMD is
+			 * issued or a new frame is recognized by the MAC AHB.
+			 */
+			if (++ru_cnt == 2)
+				owl_emac_dma_cmd_resume_rx(priv);
+
+			recv = owl_emac_rx_process(priv, budget - work_done);
+
+			/* Guard against too many RU interrupts. */
+			if (ru_cnt > 3)
+				break;
+		}
+
+		work_done += recv;
+		if (work_done >= budget)
+			break;
+	}
+
+	if (work_done < budget) {
+		napi_complete_done(napi, work_done);
+		owl_emac_irq_enable(priv);
+	}
+
+	/* Reset MAC when getting too many internal TX or RX errors. */
+	if (tx_err_cnt > 10 || rx_err_cnt > 10) {
+		netdev_dbg(priv->netdev, "%s error status: 0x%08x\n",
+			   tx_err_cnt > 10 ? "TX" : "RX", status);
+		rx_err_cnt = 0;
+		tx_err_cnt = 0;
+		schedule_work(&priv->mac_reset_task);
+	}
+
+	return work_done;
+}
+
+static void owl_emac_mdio_clock_enable(struct owl_emac_priv *priv)
+{
+	u32 val;
+
+	/* Enable MDC clock generation by adjusting CLKDIV according to
+	 * the vendor implementation of the original driver.
+	 */
+	val = owl_emac_reg_read(priv, OWL_EMAC_REG_MAC_CSR10);
+	val &= OWL_EMAC_MSK_MAC_CSR10_CLKDIV;
+	val |= OWL_EMAC_VAL_MAC_CSR10_CLKDIV_128 << OWL_EMAC_OFF_MAC_CSR10_CLKDIV;
+
+	val |= OWL_EMAC_BIT_MAC_CSR10_SB;
+	val |= OWL_EMAC_VAL_MAC_CSR10_OPCODE_CDS << OWL_EMAC_OFF_MAC_CSR10_OPCODE;
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR10, val);
+}
+
+static void owl_emac_core_hw_reset(struct owl_emac_priv *priv)
+{
+	/* Trigger hardware reset. */
+	reset_control_assert(priv->reset);
+	usleep_range(10, 20);
+	reset_control_deassert(priv->reset);
+	usleep_range(100, 200);
+}
+
+static int owl_emac_core_sw_reset(struct owl_emac_priv *priv)
+{
+	u32 val;
+	int ret;
+
+	/* Trigger software reset. */
+	owl_emac_reg_set(priv, OWL_EMAC_REG_MAC_CSR0, OWL_EMAC_BIT_MAC_CSR0_SWR);
+	ret = readl_poll_timeout(priv->base + OWL_EMAC_REG_MAC_CSR0,
+				 val, !(val & OWL_EMAC_BIT_MAC_CSR0_SWR),
+				 OWL_EMAC_POLL_DELAY_USEC,
+				 OWL_EMAC_RESET_POLL_TIMEOUT_USEC);
+	if (ret)
+		return ret;
+
+	if (priv->phy_mode == PHY_INTERFACE_MODE_RMII) {
+		/* Enable RMII and use the 50MHz rmii clk as output to PHY. */
+		val = 0;
+	} else {
+		/* Enable SMII and use the 125MHz rmii clk as output to PHY.
+		 * Additionally set SMII SYNC delay to 4 half cycle.
+		 */
+		val = 0x04 << OWL_EMAC_OFF_MAC_CTRL_SSDC;
+		val |= OWL_EMAC_BIT_MAC_CTRL_RSIS;
+	}
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CTRL, val);
+
+	/* MDC is disabled after reset. */
+	owl_emac_mdio_clock_enable(priv);
+
+	/* Set FIFO pause & restart threshold levels. */
+	val = 0x40 << OWL_EMAC_OFF_MAC_CSR19_FPTL;
+	val |= 0x10 << OWL_EMAC_OFF_MAC_CSR19_FRTL;
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR19, val);
+
+	/* Set flow control pause quanta time to ~100 ms. */
+	val = 0x4FFF << OWL_EMAC_OFF_MAC_CSR18_PQT;
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR18, val);
+
+	/* Setup interrupt mitigation. */
+	val = 7 << OWL_EMAC_OFF_MAC_CSR11_NRP;
+	val |= 4 << OWL_EMAC_OFF_MAC_CSR11_RT;
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR11, val);
+
+	/* Set RX/TX rings base addresses. */
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR3,
+			   (u32)(priv->rx_ring.descs_dma));
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR4,
+			   (u32)(priv->tx_ring.descs_dma));
+
+	/* Setup initial operation mode. */
+	val = OWL_EMAC_VAL_MAC_CSR6_SPEED_100M << OWL_EMAC_OFF_MAC_CSR6_SPEED;
+	val |= OWL_EMAC_BIT_MAC_CSR6_FD;
+	owl_emac_reg_update(priv, OWL_EMAC_REG_MAC_CSR6,
+			    OWL_EMAC_MSK_MAC_CSR6_SPEED |
+			    OWL_EMAC_BIT_MAC_CSR6_FD, val);
+	owl_emac_reg_clear(priv, OWL_EMAC_REG_MAC_CSR6,
+			   OWL_EMAC_BIT_MAC_CSR6_PR | OWL_EMAC_BIT_MAC_CSR6_PM);
+
+	priv->link = 0;
+	priv->speed = SPEED_UNKNOWN;
+	priv->duplex = DUPLEX_UNKNOWN;
+	priv->pause = 0;
+	priv->mcaddr_list.count = 0;
+
+	return 0;
+}
+
+static int owl_emac_enable(struct net_device *netdev, bool start_phy)
+{
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+	int ret;
+
+	owl_emac_dma_cmd_stop(priv);
+	owl_emac_irq_disable(priv);
+	owl_emac_irq_clear(priv);
+
+	owl_emac_ring_prepare_tx(priv);
+	ret = owl_emac_ring_prepare_rx(priv);
+	if (ret)
+		goto err_unprep;
+
+	ret = owl_emac_core_sw_reset(priv);
+	if (ret) {
+		netdev_err(netdev, "failed to soft reset MAC core: %d\n", ret);
+		goto err_unprep;
+	}
+
+	owl_emac_set_hw_mac_addr(netdev);
+	owl_emac_setup_frame_xmit(priv);
+
+	netdev_reset_queue(netdev);
+	napi_enable(&priv->napi);
+
+	owl_emac_irq_enable(priv);
+	owl_emac_dma_cmd_start(priv);
+
+	if (start_phy)
+		phy_start(netdev->phydev);
+
+	netif_start_queue(netdev);
+
+	return 0;
+
+err_unprep:
+	owl_emac_ring_unprepare_rx(priv);
+	owl_emac_ring_unprepare_tx(priv);
+
+	return ret;
+}
+
+static void owl_emac_disable(struct net_device *netdev, bool stop_phy)
+{
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+
+	owl_emac_dma_cmd_stop(priv);
+	owl_emac_irq_disable(priv);
+
+	netif_stop_queue(netdev);
+	napi_disable(&priv->napi);
+
+	if (stop_phy)
+		phy_stop(netdev->phydev);
+
+	owl_emac_ring_unprepare_rx(priv);
+	owl_emac_ring_unprepare_tx(priv);
+}
+
+static int owl_emac_ndo_open(struct net_device *netdev)
+{
+	return owl_emac_enable(netdev, true);
+}
+
+static int owl_emac_ndo_stop(struct net_device *netdev)
+{
+	owl_emac_disable(netdev, true);
+
+	return 0;
+}
+
+static void owl_emac_set_multicast(struct net_device *netdev, int count)
+{
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+	struct netdev_hw_addr *ha;
+	int index = 0;
+
+	if (count <= 0) {
+		priv->mcaddr_list.count = 0;
+		return;
+	}
+
+	netdev_for_each_mc_addr(ha, netdev) {
+		if (!is_multicast_ether_addr(ha->addr))
+			continue;
+
+		WARN_ON(index >= OWL_EMAC_MAX_MULTICAST_ADDRS);
+		ether_addr_copy(priv->mcaddr_list.addrs[index++], ha->addr);
+	}
+
+	priv->mcaddr_list.count = index;
+
+	owl_emac_setup_frame_xmit(priv);
+}
+
+static void owl_emac_ndo_set_rx_mode(struct net_device *netdev)
+{
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+	u32 status, val = 0;
+	int mcast_count = 0;
+
+	if (netdev->flags & IFF_PROMISC) {
+		val = OWL_EMAC_BIT_MAC_CSR6_PR;
+	} else if (netdev->flags & IFF_ALLMULTI) {
+		val = OWL_EMAC_BIT_MAC_CSR6_PM;
+	} else if (netdev->flags & IFF_MULTICAST) {
+		mcast_count = netdev_mc_count(netdev);
+
+		if (mcast_count > OWL_EMAC_MAX_MULTICAST_ADDRS) {
+			val = OWL_EMAC_BIT_MAC_CSR6_PM;
+			mcast_count = 0;
+		}
+	}
+
+	spin_lock_bh(&priv->lock);
+
+	/* Temporarily stop DMA TX & RX. */
+	status = owl_emac_dma_cmd_stop(priv);
+
+	/* Update operation modes. */
+	owl_emac_reg_update(priv, OWL_EMAC_REG_MAC_CSR6,
+			    OWL_EMAC_BIT_MAC_CSR6_PR | OWL_EMAC_BIT_MAC_CSR6_PM,
+			    val);
+
+	/* Restore DMA TX & RX status. */
+	owl_emac_dma_cmd_set(priv, status);
+
+	spin_unlock_bh(&priv->lock);
+
+	/* Set/reset multicast addr list. */
+	owl_emac_set_multicast(netdev, mcast_count);
+}
+
+static int owl_emac_ndo_set_mac_addr(struct net_device *netdev, void *addr)
+{
+	struct sockaddr *skaddr = addr;
+
+	if (!is_valid_ether_addr(skaddr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	if (netif_running(netdev))
+		return -EBUSY;
+
+	memcpy(netdev->dev_addr, skaddr->sa_data, netdev->addr_len);
+	owl_emac_set_hw_mac_addr(netdev);
+
+	return owl_emac_setup_frame_xmit(netdev_priv(netdev));
+}
+
+static int owl_emac_ndo_do_ioctl(struct net_device *netdev,
+				 struct ifreq *req, int cmd)
+{
+	if (!netif_running(netdev))
+		return -EINVAL;
+
+	return phy_mii_ioctl(netdev->phydev, req, cmd);
+}
+
+static void owl_emac_ndo_tx_timeout(struct net_device *netdev,
+				    unsigned int txqueue)
+{
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+
+	schedule_work(&priv->mac_reset_task);
+}
+
+static void owl_emac_reset_task(struct work_struct *work)
+{
+	struct owl_emac_priv *priv;
+
+	priv = container_of(work, struct owl_emac_priv, mac_reset_task);
+
+	netdev_dbg(priv->netdev, "resetting MAC\n");
+	owl_emac_disable(priv->netdev, false);
+	owl_emac_enable(priv->netdev, false);
+}
+
+static struct net_device_stats *
+owl_emac_ndo_get_stats(struct net_device *netdev)
+{
+	/* FIXME: If possible, try to get stats from MAC hardware registers
+	 * instead of tracking them manually in the driver.
+	 */
+
+	return &netdev->stats;
+}
+
+static const struct net_device_ops owl_emac_netdev_ops = {
+	.ndo_open		= owl_emac_ndo_open,
+	.ndo_stop		= owl_emac_ndo_stop,
+	.ndo_start_xmit		= owl_emac_ndo_start_xmit,
+	.ndo_set_rx_mode	= owl_emac_ndo_set_rx_mode,
+	.ndo_set_mac_address	= owl_emac_ndo_set_mac_addr,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_do_ioctl		= owl_emac_ndo_do_ioctl,
+	.ndo_tx_timeout         = owl_emac_ndo_tx_timeout,
+	.ndo_get_stats		= owl_emac_ndo_get_stats,
+};
+
+static void owl_emac_ethtool_get_drvinfo(struct net_device *dev,
+					 struct ethtool_drvinfo *info)
+{
+	strscpy(info->driver, OWL_EMAC_DRVNAME, sizeof(info->driver));
+}
+
+static u32 owl_emac_ethtool_get_msglevel(struct net_device *netdev)
+{
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+
+	return priv->msg_enable;
+}
+
+static void owl_emac_ethtool_set_msglevel(struct net_device *ndev, u32 val)
+{
+	struct owl_emac_priv *priv = netdev_priv(ndev);
+
+	priv->msg_enable = val;
+}
+
+static const struct ethtool_ops owl_emac_ethtool_ops = {
+	.get_drvinfo		= owl_emac_ethtool_get_drvinfo,
+	.get_link		= ethtool_op_get_link,
+	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
+	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
+	.get_msglevel		= owl_emac_ethtool_get_msglevel,
+	.set_msglevel		= owl_emac_ethtool_set_msglevel,
+};
+
+static int owl_emac_mdio_wait(struct owl_emac_priv *priv)
+{
+	u32 val;
+
+	/* Wait while data transfer is in progress. */
+	return readl_poll_timeout(priv->base + OWL_EMAC_REG_MAC_CSR10,
+				  val, !(val & OWL_EMAC_BIT_MAC_CSR10_SB),
+				  OWL_EMAC_POLL_DELAY_USEC,
+				  OWL_EMAC_MDIO_POLL_TIMEOUT_USEC);
+}
+
+static int owl_emac_mdio_read(struct mii_bus *bus, int addr, int regnum)
+{
+	struct owl_emac_priv *priv = bus->priv;
+	u32 data, tmp;
+	int ret;
+
+	if (regnum & MII_ADDR_C45)
+		return -EOPNOTSUPP;
+
+	data = OWL_EMAC_BIT_MAC_CSR10_SB;
+	data |= OWL_EMAC_VAL_MAC_CSR10_OPCODE_RD << OWL_EMAC_OFF_MAC_CSR10_OPCODE;
+
+	tmp = addr << OWL_EMAC_OFF_MAC_CSR10_PHYADD;
+	data |= tmp & OWL_EMAC_MSK_MAC_CSR10_PHYADD;
+
+	tmp = regnum << OWL_EMAC_OFF_MAC_CSR10_REGADD;
+	data |= tmp & OWL_EMAC_MSK_MAC_CSR10_REGADD;
+
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR10, data);
+
+	ret = owl_emac_mdio_wait(priv);
+	if (ret)
+		return ret;
+
+	data = owl_emac_reg_read(priv, OWL_EMAC_REG_MAC_CSR10);
+	data &= OWL_EMAC_MSK_MAC_CSR10_DATA;
+
+	return data;
+}
+
+static int
+owl_emac_mdio_write(struct mii_bus *bus, int addr, int regnum, u16 val)
+{
+	struct owl_emac_priv *priv = bus->priv;
+	u32 data, tmp;
+
+	if (regnum & MII_ADDR_C45)
+		return -EOPNOTSUPP;
+
+	data = OWL_EMAC_BIT_MAC_CSR10_SB;
+	data |= OWL_EMAC_VAL_MAC_CSR10_OPCODE_WR << OWL_EMAC_OFF_MAC_CSR10_OPCODE;
+
+	tmp = addr << OWL_EMAC_OFF_MAC_CSR10_PHYADD;
+	data |= tmp & OWL_EMAC_MSK_MAC_CSR10_PHYADD;
+
+	tmp = regnum << OWL_EMAC_OFF_MAC_CSR10_REGADD;
+	data |= tmp & OWL_EMAC_MSK_MAC_CSR10_REGADD;
+
+	data |= val & OWL_EMAC_MSK_MAC_CSR10_DATA;
+
+	owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR10, data);
+
+	return owl_emac_mdio_wait(priv);
+}
+
+static int owl_emac_mdio_init(struct net_device *netdev)
+{
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+	struct device *dev = owl_emac_get_dev(priv);
+	struct device_node *mdio_node;
+	int ret;
+
+	mdio_node = of_get_child_by_name(dev->of_node, "mdio");
+	if (!mdio_node)
+		return -ENODEV;
+
+	if (!of_device_is_available(mdio_node)) {
+		ret = -ENODEV;
+		goto err_put_node;
+	}
+
+	priv->mii = devm_mdiobus_alloc(dev);
+	if (!priv->mii) {
+		ret = -ENOMEM;
+		goto err_put_node;
+	}
+
+	snprintf(priv->mii->id, MII_BUS_ID_SIZE, "%s", dev_name(dev));
+	priv->mii->name = "owl-emac-mdio";
+	priv->mii->parent = dev;
+	priv->mii->read = owl_emac_mdio_read;
+	priv->mii->write = owl_emac_mdio_write;
+	priv->mii->phy_mask = ~0; /* Mask out all PHYs from auto probing. */
+	priv->mii->priv = priv;
+
+	ret = devm_of_mdiobus_register(dev, priv->mii, mdio_node);
+
+err_put_node:
+	of_node_put(mdio_node);
+	return ret;
+}
+
+static int owl_emac_phy_init(struct net_device *netdev)
+{
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+	struct device *dev = owl_emac_get_dev(priv);
+	struct phy_device *phy;
+
+	phy = of_phy_get_and_connect(netdev, dev->of_node,
+				     owl_emac_adjust_link);
+	if (!phy)
+		return -ENODEV;
+
+	phy_set_sym_pause(phy, true, true, true);
+
+	if (netif_msg_link(priv))
+		phy_attached_info(phy);
+
+	return 0;
+}
+
+static void owl_emac_get_mac_addr(struct net_device *netdev)
+{
+	struct device *dev = netdev->dev.parent;
+	int ret;
+
+	ret = eth_platform_get_mac_address(dev, netdev->dev_addr);
+	if (!ret && is_valid_ether_addr(netdev->dev_addr))
+		return;
+
+	eth_hw_addr_random(netdev);
+	dev_warn(dev, "using random MAC address %pM\n", netdev->dev_addr);
+}
+
+static __maybe_unused int owl_emac_suspend(struct device *dev)
+{
+	struct net_device *netdev = dev_get_drvdata(dev);
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+
+	disable_irq(netdev->irq);
+
+	if (netif_running(netdev)) {
+		owl_emac_disable(netdev, true);
+		netif_device_detach(netdev);
+	}
+
+	clk_bulk_disable_unprepare(OWL_EMAC_NCLKS, priv->clks);
+
+	return 0;
+}
+
+static __maybe_unused int owl_emac_resume(struct device *dev)
+{
+	struct net_device *netdev = dev_get_drvdata(dev);
+	struct owl_emac_priv *priv = netdev_priv(netdev);
+	int ret;
+
+	ret = clk_bulk_prepare_enable(OWL_EMAC_NCLKS, priv->clks);
+	if (ret)
+		return ret;
+
+	if (netif_running(netdev)) {
+		owl_emac_core_hw_reset(priv);
+		owl_emac_core_sw_reset(priv);
+
+		ret = owl_emac_enable(netdev, true);
+		if (ret) {
+			clk_bulk_disable_unprepare(OWL_EMAC_NCLKS, priv->clks);
+			return ret;
+		}
+
+		netif_device_attach(netdev);
+	}
+
+	enable_irq(netdev->irq);
+
+	return 0;
+}
+
+static void owl_emac_clk_disable_unprepare(void *data)
+{
+	struct owl_emac_priv *priv = data;
+
+	clk_bulk_disable_unprepare(OWL_EMAC_NCLKS, priv->clks);
+}
+
+static int owl_emac_clk_set_rate(struct owl_emac_priv *priv)
+{
+	struct device *dev = owl_emac_get_dev(priv);
+	unsigned long rate;
+	int ret;
+
+	switch (priv->phy_mode) {
+	case PHY_INTERFACE_MODE_RMII:
+		rate = 50000000;
+		break;
+
+	case PHY_INTERFACE_MODE_SMII:
+		rate = 125000000;
+		break;
+
+	default:
+		dev_err(dev, "unsupported phy interface mode %d\n",
+			priv->phy_mode);
+		return -EOPNOTSUPP;
+	}
+
+	ret = clk_set_rate(priv->clks[OWL_EMAC_CLK_RMII].clk, rate);
+	if (ret)
+		dev_err(dev, "failed to set RMII clock rate: %d\n", ret);
+
+	return ret;
+}
+
+static int owl_emac_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct net_device *netdev;
+	struct owl_emac_priv *priv;
+	int ret, i;
+
+	netdev = devm_alloc_etherdev(dev, sizeof(*priv));
+	if (!netdev)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, netdev);
+	SET_NETDEV_DEV(netdev, dev);
+
+	priv = netdev_priv(netdev);
+	priv->netdev = netdev;
+	priv->msg_enable = netif_msg_init(-1, OWL_EMAC_DEFAULT_MSG_ENABLE);
+
+	ret = of_get_phy_mode(dev->of_node, &priv->phy_mode);
+	if (ret) {
+		dev_err(dev, "failed to get phy mode: %d\n", ret);
+		return ret;
+	}
+
+	spin_lock_init(&priv->lock);
+
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+	if (ret) {
+		dev_err(dev, "unsupported DMA mask\n");
+		return ret;
+	}
+
+	ret = owl_emac_ring_alloc(dev, &priv->rx_ring, OWL_EMAC_RX_RING_SIZE);
+	if (ret)
+		return ret;
+
+	ret = owl_emac_ring_alloc(dev, &priv->tx_ring, OWL_EMAC_TX_RING_SIZE);
+	if (ret)
+		return ret;
+
+	priv->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(priv->base))
+		return PTR_ERR(priv->base);
+
+	netdev->irq = platform_get_irq(pdev, 0);
+	if (netdev->irq < 0)
+		return netdev->irq;
+
+	ret = devm_request_irq(dev, netdev->irq, owl_emac_handle_irq,
+			       IRQF_SHARED, netdev->name, netdev);
+	if (ret) {
+		dev_err(dev, "failed to request irq: %d\n", netdev->irq);
+		return ret;
+	}
+
+	for (i = 0; i < OWL_EMAC_NCLKS; i++)
+		priv->clks[i].id = owl_emac_clk_names[i];
+
+	ret = devm_clk_bulk_get(dev, OWL_EMAC_NCLKS, priv->clks);
+	if (ret)
+		return ret;
+
+	ret = clk_bulk_prepare_enable(OWL_EMAC_NCLKS, priv->clks);
+	if (ret)
+		return ret;
+
+	ret = devm_add_action_or_reset(dev, owl_emac_clk_disable_unprepare, priv);
+	if (ret)
+		return ret;
+
+	ret = owl_emac_clk_set_rate(priv);
+	if (ret)
+		return ret;
+
+	priv->reset = devm_reset_control_get_exclusive(dev, NULL);
+	if (IS_ERR(priv->reset))
+		return dev_err_probe(dev, PTR_ERR(priv->reset),
+				     "failed to get reset control");
+
+	owl_emac_get_mac_addr(netdev);
+
+	owl_emac_core_hw_reset(priv);
+	owl_emac_mdio_clock_enable(priv);
+
+	ret = owl_emac_mdio_init(netdev);
+	if (ret) {
+		dev_err(dev, "failed to initialize MDIO bus\n");
+		return ret;
+	}
+
+	ret = owl_emac_phy_init(netdev);
+	if (ret) {
+		dev_err(dev, "failed to initialize PHY\n");
+		return ret;
+	}
+
+	INIT_WORK(&priv->mac_reset_task, owl_emac_reset_task);
+
+	netdev->min_mtu = OWL_EMAC_MTU_MIN;
+	netdev->max_mtu = OWL_EMAC_MTU_MAX;
+	netdev->watchdog_timeo = OWL_EMAC_TX_TIMEOUT;
+	netdev->netdev_ops = &owl_emac_netdev_ops;
+	netdev->ethtool_ops = &owl_emac_ethtool_ops;
+	netif_napi_add(netdev, &priv->napi, owl_emac_poll, NAPI_POLL_WEIGHT);
+
+	ret = devm_register_netdev(dev, netdev);
+	if (ret) {
+		netif_napi_del(&priv->napi);
+		phy_disconnect(netdev->phydev);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int owl_emac_remove(struct platform_device *pdev)
+{
+	struct owl_emac_priv *priv = platform_get_drvdata(pdev);
+
+	netif_napi_del(&priv->napi);
+	phy_disconnect(priv->netdev->phydev);
+	cancel_work_sync(&priv->mac_reset_task);
+
+	return 0;
+}
+
+static const struct of_device_id owl_emac_of_match[] = {
+	{ .compatible = "actions,owl-emac", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, owl_emac_of_match);
+
+static SIMPLE_DEV_PM_OPS(owl_emac_pm_ops,
+			 owl_emac_suspend, owl_emac_resume);
+
+static struct platform_driver owl_emac_driver = {
+	.driver = {
+		.name = OWL_EMAC_DRVNAME,
+		.of_match_table = owl_emac_of_match,
+		.pm = &owl_emac_pm_ops,
+	},
+	.probe = owl_emac_probe,
+	.remove = owl_emac_remove,
+};
+module_platform_driver(owl_emac_driver);
+
+MODULE_DESCRIPTION("Actions Semi Owl SoCs Ethernet MAC Driver");
+MODULE_AUTHOR("Actions Semi Inc.");
+MODULE_AUTHOR("Cristian Ciocaltea <cristian.ciocaltea@gmail.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/actions/owl-emac.h b/drivers/net/ethernet/actions/owl-emac.h
new file mode 100644
index 000000000000..9eb0d1a30242
--- /dev/null
+++ b/drivers/net/ethernet/actions/owl-emac.h
@@ -0,0 +1,280 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Actions Semi Owl SoCs Ethernet MAC driver
+ *
+ * Copyright (c) 2012 Actions Semi Inc.
+ * Copyright (c) 2021 Cristian Ciocaltea <cristian.ciocaltea@gmail.com>
+ */
+
+#ifndef __OWL_EMAC_H__
+#define __OWL_EMAC_H__
+
+#define OWL_EMAC_DRVNAME			"owl-emac"
+
+#define OWL_EMAC_POLL_DELAY_USEC		5
+#define OWL_EMAC_MDIO_POLL_TIMEOUT_USEC		1000
+#define OWL_EMAC_RESET_POLL_TIMEOUT_USEC	2000
+#define OWL_EMAC_TX_TIMEOUT			(2 * HZ)
+
+#define OWL_EMAC_MTU_MIN			ETH_MIN_MTU
+#define OWL_EMAC_MTU_MAX			ETH_DATA_LEN
+#define OWL_EMAC_RX_FRAME_MAX_LEN		(ETH_FRAME_LEN + ETH_FCS_LEN)
+#define OWL_EMAC_SKB_ALIGN			4
+#define OWL_EMAC_SKB_RESERVE			18
+
+#define OWL_EMAC_MAX_MULTICAST_ADDRS		14
+#define OWL_EMAC_SETUP_FRAME_LEN		192
+
+#define OWL_EMAC_RX_RING_SIZE			64
+#define OWL_EMAC_TX_RING_SIZE			32
+
+/* Bus mode register */
+#define OWL_EMAC_REG_MAC_CSR0			0x0000
+#define OWL_EMAC_BIT_MAC_CSR0_SWR		BIT(0)	/* Software reset */
+
+/* Transmit/receive poll demand registers */
+#define OWL_EMAC_REG_MAC_CSR1			0x0008
+#define OWL_EMAC_VAL_MAC_CSR1_TPD		0x01
+#define OWL_EMAC_REG_MAC_CSR2			0x0010
+#define OWL_EMAC_VAL_MAC_CSR2_RPD		0x01
+
+/* Receive/transmit descriptor list base address registers */
+#define OWL_EMAC_REG_MAC_CSR3			0x0018
+#define OWL_EMAC_REG_MAC_CSR4			0x0020
+
+/* Status register */
+#define OWL_EMAC_REG_MAC_CSR5			0x0028
+#define OWL_EMAC_MSK_MAC_CSR5_TS		GENMASK(22, 20)	/* Transmit process state */
+#define OWL_EMAC_OFF_MAC_CSR5_TS		20
+#define OWL_EMAC_VAL_MAC_CSR5_TS_DATA		0x03	/* Transferring data HOST -> FIFO */
+#define OWL_EMAC_VAL_MAC_CSR5_TS_CDES		0x07	/* Closing transmit descriptor */
+#define OWL_EMAC_MSK_MAC_CSR5_RS		GENMASK(19, 17)	/* Receive process state */
+#define OWL_EMAC_OFF_MAC_CSR5_RS		17
+#define OWL_EMAC_VAL_MAC_CSR5_RS_FDES		0x01	/* Fetching receive descriptor */
+#define OWL_EMAC_VAL_MAC_CSR5_RS_CDES		0x05	/* Closing receive descriptor */
+#define OWL_EMAC_VAL_MAC_CSR5_RS_DATA		0x07	/* Transferring data FIFO -> HOST */
+#define OWL_EMAC_BIT_MAC_CSR5_NIS		BIT(16)	/* Normal interrupt summary */
+#define OWL_EMAC_BIT_MAC_CSR5_AIS		BIT(15)	/* Abnormal interrupt summary */
+#define OWL_EMAC_BIT_MAC_CSR5_ERI		BIT(14)	/* Early receive interrupt */
+#define OWL_EMAC_BIT_MAC_CSR5_GTE		BIT(11)	/* General-purpose timer expiration */
+#define OWL_EMAC_BIT_MAC_CSR5_ETI		BIT(10)	/* Early transmit interrupt */
+#define OWL_EMAC_BIT_MAC_CSR5_RPS		BIT(8)	/* Receive process stopped */
+#define OWL_EMAC_BIT_MAC_CSR5_RU		BIT(7)	/* Receive buffer unavailable */
+#define OWL_EMAC_BIT_MAC_CSR5_RI		BIT(6)	/* Receive interrupt */
+#define OWL_EMAC_BIT_MAC_CSR5_UNF		BIT(5)	/* Transmit underflow */
+#define OWL_EMAC_BIT_MAC_CSR5_LCIS		BIT(4)	/* Link change status */
+#define OWL_EMAC_BIT_MAC_CSR5_LCIQ		BIT(3)	/* Link change interrupt */
+#define OWL_EMAC_BIT_MAC_CSR5_TU		BIT(2)	/* Transmit buffer unavailable */
+#define OWL_EMAC_BIT_MAC_CSR5_TPS		BIT(1)	/* Transmit process stopped */
+#define OWL_EMAC_BIT_MAC_CSR5_TI		BIT(0)	/* Transmit interrupt */
+
+/* Operation mode register */
+#define OWL_EMAC_REG_MAC_CSR6			0x0030
+#define OWL_EMAC_BIT_MAC_CSR6_RA		BIT(30)	/* Receive all */
+#define OWL_EMAC_BIT_MAC_CSR6_TTM		BIT(22)	/* Transmit threshold mode */
+#define OWL_EMAC_BIT_MAC_CSR6_SF		BIT(21)	/* Store and forward */
+#define OWL_EMAC_MSK_MAC_CSR6_SPEED		GENMASK(17, 16)	/* Eth speed selection */
+#define OWL_EMAC_OFF_MAC_CSR6_SPEED		16
+#define OWL_EMAC_VAL_MAC_CSR6_SPEED_100M	0x00
+#define OWL_EMAC_VAL_MAC_CSR6_SPEED_10M		0x02
+#define OWL_EMAC_BIT_MAC_CSR6_ST		BIT(13)	/* Start/stop transmit command */
+#define OWL_EMAC_BIT_MAC_CSR6_LP		BIT(10)	/* Loopback mode */
+#define OWL_EMAC_BIT_MAC_CSR6_FD		BIT(9)	/* Full duplex mode */
+#define OWL_EMAC_BIT_MAC_CSR6_PM		BIT(7)	/* Pass all multicast */
+#define OWL_EMAC_BIT_MAC_CSR6_PR		BIT(6)	/* Promiscuous mode */
+#define OWL_EMAC_BIT_MAC_CSR6_IF		BIT(4)	/* Inverse filtering */
+#define OWL_EMAC_BIT_MAC_CSR6_PB		BIT(3)	/* Pass bad frames */
+#define OWL_EMAC_BIT_MAC_CSR6_HO		BIT(2)	/* Hash only filtering mode */
+#define OWL_EMAC_BIT_MAC_CSR6_SR		BIT(1)	/* Start/stop receive command */
+#define OWL_EMAC_BIT_MAC_CSR6_HP		BIT(0)	/* Hash/perfect receive filtering mode */
+#define OWL_EMAC_MSK_MAC_CSR6_STSR	       (OWL_EMAC_BIT_MAC_CSR6_ST | \
+						OWL_EMAC_BIT_MAC_CSR6_SR)
+
+/* Interrupt enable register */
+#define OWL_EMAC_REG_MAC_CSR7			0x0038
+#define OWL_EMAC_BIT_MAC_CSR7_NIE		BIT(16)	/* Normal interrupt summary enable */
+#define OWL_EMAC_BIT_MAC_CSR7_AIE		BIT(15)	/* Abnormal interrupt summary enable */
+#define OWL_EMAC_BIT_MAC_CSR7_ERE		BIT(14)	/* Early receive interrupt enable */
+#define OWL_EMAC_BIT_MAC_CSR7_GTE		BIT(11)	/* General-purpose timer overflow */
+#define OWL_EMAC_BIT_MAC_CSR7_ETE		BIT(10)	/* Early transmit interrupt enable */
+#define OWL_EMAC_BIT_MAC_CSR7_RSE		BIT(8)	/* Receive stopped enable */
+#define OWL_EMAC_BIT_MAC_CSR7_RUE		BIT(7)	/* Receive buffer unavailable enable */
+#define OWL_EMAC_BIT_MAC_CSR7_RIE		BIT(6)	/* Receive interrupt enable */
+#define OWL_EMAC_BIT_MAC_CSR7_UNE		BIT(5)	/* Underflow interrupt enable */
+#define OWL_EMAC_BIT_MAC_CSR7_TUE		BIT(2)	/* Transmit buffer unavailable enable */
+#define OWL_EMAC_BIT_MAC_CSR7_TSE		BIT(1)	/* Transmit stopped enable */
+#define OWL_EMAC_BIT_MAC_CSR7_TIE		BIT(0)	/* Transmit interrupt enable */
+#define OWL_EMAC_BIT_MAC_CSR7_ALL_NOT_TUE      (OWL_EMAC_BIT_MAC_CSR7_ERE | \
+						OWL_EMAC_BIT_MAC_CSR7_GTE | \
+						OWL_EMAC_BIT_MAC_CSR7_ETE | \
+						OWL_EMAC_BIT_MAC_CSR7_RSE | \
+						OWL_EMAC_BIT_MAC_CSR7_RUE | \
+						OWL_EMAC_BIT_MAC_CSR7_RIE | \
+						OWL_EMAC_BIT_MAC_CSR7_UNE | \
+						OWL_EMAC_BIT_MAC_CSR7_TSE | \
+						OWL_EMAC_BIT_MAC_CSR7_TIE)
+
+/* Missed frames and overflow counter register */
+#define OWL_EMAC_REG_MAC_CSR8			0x0040
+/* MII management and serial ROM register */
+#define OWL_EMAC_REG_MAC_CSR9			0x0048
+
+/* MII serial management register */
+#define OWL_EMAC_REG_MAC_CSR10			0x0050
+#define OWL_EMAC_BIT_MAC_CSR10_SB		BIT(31)	/* Start transfer or busy */
+#define OWL_EMAC_MSK_MAC_CSR10_CLKDIV		GENMASK(30, 28)	/* Clock divider */
+#define OWL_EMAC_OFF_MAC_CSR10_CLKDIV		28
+#define OWL_EMAC_VAL_MAC_CSR10_CLKDIV_128	0x04
+#define OWL_EMAC_VAL_MAC_CSR10_OPCODE_WR	0x01	/* Register write command */
+#define OWL_EMAC_OFF_MAC_CSR10_OPCODE		26	/* Operation mode */
+#define OWL_EMAC_VAL_MAC_CSR10_OPCODE_DCG	0x00	/* Disable clock generation */
+#define OWL_EMAC_VAL_MAC_CSR10_OPCODE_WR	0x01	/* Register write command */
+#define OWL_EMAC_VAL_MAC_CSR10_OPCODE_RD	0x02	/* Register read command */
+#define OWL_EMAC_VAL_MAC_CSR10_OPCODE_CDS	0x03	/* Clock divider set */
+#define OWL_EMAC_MSK_MAC_CSR10_PHYADD		GENMASK(25, 21)	/* Physical layer address */
+#define OWL_EMAC_OFF_MAC_CSR10_PHYADD		21
+#define OWL_EMAC_MSK_MAC_CSR10_REGADD		GENMASK(20, 16)	/* Register address */
+#define OWL_EMAC_OFF_MAC_CSR10_REGADD		16
+#define OWL_EMAC_MSK_MAC_CSR10_DATA		GENMASK(15, 0)	/* Register data */
+
+/* General-purpose timer and interrupt mitigation control register */
+#define OWL_EMAC_REG_MAC_CSR11			0x0058
+#define OWL_EMAC_OFF_MAC_CSR11_TT		27	/* Transmit timer */
+#define OWL_EMAC_OFF_MAC_CSR11_NTP		24	/* No. of transmit packets */
+#define OWL_EMAC_OFF_MAC_CSR11_RT		20	/* Receive timer */
+#define OWL_EMAC_OFF_MAC_CSR11_NRP		17	/* No. of receive packets */
+
+/* MAC address low/high registers */
+#define OWL_EMAC_REG_MAC_CSR16			0x0080
+#define OWL_EMAC_REG_MAC_CSR17			0x0088
+
+/* Pause time & cache thresholds register */
+#define OWL_EMAC_REG_MAC_CSR18			0x0090
+#define OWL_EMAC_OFF_MAC_CSR18_CPTL		24	/* Cache pause threshold level */
+#define OWL_EMAC_OFF_MAC_CSR18_CRTL		16	/* Cache restart threshold level */
+#define OWL_EMAC_OFF_MAC_CSR18_PQT		0	/* Flow control pause quanta time */
+
+/* FIFO pause & restart threshold register */
+#define OWL_EMAC_REG_MAC_CSR19			0x0098
+#define OWL_EMAC_OFF_MAC_CSR19_FPTL		16	/* FIFO pause threshold level */
+#define OWL_EMAC_OFF_MAC_CSR19_FRTL		0	/* FIFO restart threshold level */
+
+/* Flow control setup & status register */
+#define OWL_EMAC_REG_MAC_CSR20			0x00A0
+#define OWL_EMAC_BIT_MAC_CSR20_FCE		BIT(31)	/* Flow Control Enable */
+#define OWL_EMAC_BIT_MAC_CSR20_TUE		BIT(30)	/* Transmit Un-pause frames Enable */
+#define OWL_EMAC_BIT_MAC_CSR20_TPE		BIT(29)	/* Transmit Pause frames Enable */
+#define OWL_EMAC_BIT_MAC_CSR20_RPE		BIT(28)	/* Receive Pause frames Enable */
+#define OWL_EMAC_BIT_MAC_CSR20_BPE		BIT(27)	/* Back pressure (half-duplex) Enable */
+
+/* MII control register */
+#define OWL_EMAC_REG_MAC_CTRL			0x00B0
+#define OWL_EMAC_BIT_MAC_CTRL_RRSB		BIT(8)	/* RMII_REFCLK select bit */
+#define OWL_EMAC_OFF_MAC_CTRL_SSDC		4	/* SMII SYNC delay cycle */
+#define OWL_EMAC_BIT_MAC_CTRL_RCPS		BIT(1)	/* REF_CLK phase select */
+#define OWL_EMAC_BIT_MAC_CTRL_RSIS		BIT(0)	/* RMII/SMII interface select */
+
+/* Receive descriptor status field */
+#define OWL_EMAC_BIT_RDES0_OWN			BIT(31)	/* Ownership bit */
+#define OWL_EMAC_BIT_RDES0_FF			BIT(30)	/* Filtering fail */
+#define OWL_EMAC_MSK_RDES0_FL			GENMASK(29, 16)	/* Frame length */
+#define OWL_EMAC_OFF_RDES0_FL			16
+#define OWL_EMAC_BIT_RDES0_ES			BIT(15)	/* Error summary */
+#define OWL_EMAC_BIT_RDES0_DE			BIT(14)	/* Descriptor error */
+#define OWL_EMAC_BIT_RDES0_RF			BIT(11)	/* Runt frame */
+#define OWL_EMAC_BIT_RDES0_MF			BIT(10)	/* Multicast frame */
+#define OWL_EMAC_BIT_RDES0_FS			BIT(9)	/* First descriptor */
+#define OWL_EMAC_BIT_RDES0_LS			BIT(8)	/* Last descriptor */
+#define OWL_EMAC_BIT_RDES0_TL			BIT(7)	/* Frame too long */
+#define OWL_EMAC_BIT_RDES0_CS			BIT(6)	/* Collision seen */
+#define OWL_EMAC_BIT_RDES0_FT			BIT(5)	/* Frame type */
+#define OWL_EMAC_BIT_RDES0_RE			BIT(3)	/* Report on MII error */
+#define OWL_EMAC_BIT_RDES0_DB			BIT(2)	/* Dribbling bit */
+#define OWL_EMAC_BIT_RDES0_CE			BIT(1)	/* CRC error */
+#define OWL_EMAC_BIT_RDES0_ZERO			BIT(0)	/* Legal frame length indicator */
+
+/* Receive descriptor control and count field */
+#define OWL_EMAC_BIT_RDES1_RER			BIT(25)	/* Receive end of ring */
+#define OWL_EMAC_MSK_RDES1_RBS1			GENMASK(10, 0) /* Buffer 1 size */
+
+/* Transmit descriptor status field */
+#define OWL_EMAC_BIT_TDES0_OWN			BIT(31)	/* Ownership bit */
+#define OWL_EMAC_BIT_TDES0_ES			BIT(15)	/* Error summary */
+#define OWL_EMAC_BIT_TDES0_LO			BIT(11)	/* Loss of carrier */
+#define OWL_EMAC_BIT_TDES0_NC			BIT(10)	/* No carrier */
+#define OWL_EMAC_BIT_TDES0_LC			BIT(9)	/* Late collision */
+#define OWL_EMAC_BIT_TDES0_EC			BIT(8)	/* Excessive collisions */
+#define OWL_EMAC_MSK_TDES0_CC			GENMASK(6, 3) /* Collision count */
+#define OWL_EMAC_BIT_TDES0_UF			BIT(1)	/* Underflow error */
+#define OWL_EMAC_BIT_TDES0_DE			BIT(0)	/* Deferred */
+
+/* Transmit descriptor control and count field */
+#define OWL_EMAC_BIT_TDES1_IC			BIT(31)	/* Interrupt on completion */
+#define OWL_EMAC_BIT_TDES1_LS			BIT(30)	/* Last descriptor */
+#define OWL_EMAC_BIT_TDES1_FS			BIT(29)	/* First descriptor */
+#define OWL_EMAC_BIT_TDES1_FT1			BIT(28)	/* Filtering type */
+#define OWL_EMAC_BIT_TDES1_SET			BIT(27)	/* Setup packet */
+#define OWL_EMAC_BIT_TDES1_AC			BIT(26)	/* Add CRC disable */
+#define OWL_EMAC_BIT_TDES1_TER			BIT(25)	/* Transmit end of ring */
+#define OWL_EMAC_BIT_TDES1_DPD			BIT(23)	/* Disabled padding */
+#define OWL_EMAC_BIT_TDES1_FT0			BIT(22)	/* Filtering type */
+#define OWL_EMAC_MSK_TDES1_TBS1			GENMASK(10, 0) /* Buffer 1 size */
+
+static const char *const owl_emac_clk_names[] = { "eth", "rmii" };
+#define OWL_EMAC_NCLKS ARRAY_SIZE(owl_emac_clk_names)
+
+enum owl_emac_clk_map {
+	OWL_EMAC_CLK_ETH = 0,
+	OWL_EMAC_CLK_RMII
+};
+
+struct owl_emac_addr_list {
+	u8 addrs[OWL_EMAC_MAX_MULTICAST_ADDRS][ETH_ALEN];
+	int count;
+};
+
+/* TX/RX descriptors */
+struct owl_emac_ring_desc {
+	u32 status;
+	u32 control;
+	u32 buf_addr;
+	u32 reserved;		/* 2nd buffer address is not used */
+};
+
+struct owl_emac_ring {
+	struct owl_emac_ring_desc *descs;
+	dma_addr_t descs_dma;
+	struct sk_buff **skbs;
+	dma_addr_t *skbs_dma;
+	unsigned int size;
+	unsigned int head;
+	unsigned int tail;
+};
+
+struct owl_emac_priv {
+	struct net_device *netdev;
+	void __iomem *base;
+
+	struct clk_bulk_data clks[OWL_EMAC_NCLKS];
+	struct reset_control *reset;
+
+	struct owl_emac_ring rx_ring;
+	struct owl_emac_ring tx_ring;
+
+	struct mii_bus *mii;
+	struct napi_struct napi;
+
+	phy_interface_t phy_mode;
+	unsigned int link;
+	int speed;
+	int duplex;
+	int pause;
+	struct owl_emac_addr_list mcaddr_list;
+
+	struct work_struct mac_reset_task;
+
+	u32 msg_enable;		/* Debug message level */
+	spinlock_t lock;	/* Sync concurrent ring access */
+};
+
+#endif /* __OWL_EMAC_H__ */
-- 
cgit v1.2.3


From b31f51832acfaf76f5cdfb1381802fbde3309c69 Mon Sep 17 00:00:00 2001
From: Cristian Ciocaltea <cristian.ciocaltea@gmail.com>
Date: Mon, 22 Mar 2021 01:29:45 +0200
Subject: MAINTAINERS: Add entries for Actions Semi Owl Ethernet MAC

Add entries for Actions Semi Owl Ethernet MAC binding and driver.

Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index e1fa5ad9bb30..ad214621655f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1530,6 +1530,7 @@ F:	Documentation/devicetree/bindings/dma/owl-dma.yaml
 F:	Documentation/devicetree/bindings/i2c/i2c-owl.yaml
 F:	Documentation/devicetree/bindings/interrupt-controller/actions,owl-sirq.yaml
 F:	Documentation/devicetree/bindings/mmc/owl-mmc.yaml
+F:	Documentation/devicetree/bindings/net/actions,owl-emac.yaml
 F:	Documentation/devicetree/bindings/pinctrl/actions,*
 F:	Documentation/devicetree/bindings/power/actions,owl-sps.txt
 F:	Documentation/devicetree/bindings/timer/actions,owl-timer.txt
@@ -1542,6 +1543,7 @@ F:	drivers/dma/owl-dma.c
 F:	drivers/i2c/busses/i2c-owl.c
 F:	drivers/irqchip/irq-owl-sirq.c
 F:	drivers/mmc/host/owl-mmc.c
+F:	drivers/net/ethernet/actions/
 F:	drivers/pinctrl/actions/*
 F:	drivers/soc/actions/
 F:	include/dt-bindings/power/owl-*
-- 
cgit v1.2.3


From 0853f5ab35e63ff0a75eadb67455ba11412c4374 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Mon, 22 Mar 2021 06:24:30 +0530
Subject: NFC: Fix a typo

s/packaet/packet/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nfc/fdp/fdp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c
index 824f2da137a6..ee2baa2b2fcf 100644
--- a/drivers/nfc/fdp/fdp.c
+++ b/drivers/nfc/fdp/fdp.c
@@ -176,7 +176,7 @@ static void fdp_nci_set_data_pkt_counter(struct nci_dev *ndev,
  *
  * The firmware will be analyzed and applied when we send NCI_OP_PROP_PATCH_CMD
  * command with NCI_PATCH_TYPE_EOT parameter. The device will send a
- * NFCC_PATCH_NTF packaet and a NCI_OP_CORE_RESET_NTF packet.
+ * NFCC_PATCH_NTF packet and a NCI_OP_CORE_RESET_NTF packet.
  */
 static int fdp_nci_send_patch(struct nci_dev *ndev, u8 conn_id, u8 type)
 {
-- 
cgit v1.2.3


From f44773058ce2363ab4a48cc21e849116103bfae2 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Mon, 22 Mar 2021 07:47:08 +0530
Subject: openvswitch: Fix a typo

s/subsytem/subsystem/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/vport.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 1eb7495ac5b4..8a930ca6d6b1 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -20,7 +20,7 @@
 struct vport;
 struct vport_parms;
 
-/* The following definitions are for users of the vport subsytem: */
+/* The following definitions are for users of the vport subsystem: */
 
 int ovs_vport_init(void);
 void ovs_vport_exit(void);
-- 
cgit v1.2.3


From 405a129f59384c474343d6261a2e0a75650d29a8 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Mon, 22 Mar 2021 08:25:16 +0530
Subject: linux/qed: Mundane spelling fixes throughout the file

s/unrequired/"not required"/
s/consme/consume/ .....two different places
s/accros/across/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Acked-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/qed_chain.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h
index e339b48de32d..f34dbd0db795 100644
--- a/include/linux/qed/qed_chain.h
+++ b/include/linux/qed/qed_chain.h
@@ -19,7 +19,7 @@ enum qed_chain_mode {
 	/* Each Page contains a next pointer at its end */
 	QED_CHAIN_MODE_NEXT_PTR,
 
-	/* Chain is a single page (next ptr) is unrequired */
+	/* Chain is a single page (next ptr) is not required */
 	QED_CHAIN_MODE_SINGLE,
 
 	/* Page pointers are located in a side list */
@@ -56,13 +56,13 @@ struct qed_chain_pbl_u32 {
 };
 
 struct qed_chain_u16 {
-	/* Cyclic index of next element to produce/consme */
+	/* Cyclic index of next element to produce/consume */
 	u16						prod_idx;
 	u16						cons_idx;
 };
 
 struct qed_chain_u32 {
-	/* Cyclic index of next element to produce/consme */
+	/* Cyclic index of next element to produce/consume */
 	u32						prod_idx;
 	u32						cons_idx;
 };
@@ -270,7 +270,7 @@ static inline dma_addr_t qed_chain_get_pbl_phys(const struct qed_chain *chain)
 /**
  * @brief qed_chain_advance_page -
  *
- * Advance the next element accros pages for a linked chain
+ * Advance the next element across pages for a linked chain
  *
  * @param p_chain
  * @param p_next_elem
-- 
cgit v1.2.3


From 5f2b1238b33c38478ddc55536b65277b30f5d456 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Mon, 22 Mar 2021 11:51:56 +0800
Subject: net: hns3: refactor out hclge_add_fd_entry()

The process of function hclge_add_fd_entry() is complex and
prolix. To make it more readable, extract the process of
fs->ring_cookie to a single function.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 67 +++++++++++++---------
 1 file changed, 40 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index a664383271c8..4929220d344a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -6126,6 +6126,42 @@ static bool hclge_is_cls_flower_active(struct hnae3_handle *handle)
 	return hdev->fd_active_type == HCLGE_FD_TC_FLOWER_ACTIVE;
 }
 
+static int hclge_fd_parse_ring_cookie(struct hclge_dev *hdev, u64 ring_cookie,
+				      u16 *vport_id, u8 *action, u16 *queue_id)
+{
+	struct hclge_vport *vport = hdev->vport;
+
+	if (ring_cookie == RX_CLS_FLOW_DISC) {
+		*action = HCLGE_FD_ACTION_DROP_PACKET;
+	} else {
+		u32 ring = ethtool_get_flow_spec_ring(ring_cookie);
+		u8 vf = ethtool_get_flow_spec_ring_vf(ring_cookie);
+		u16 tqps;
+
+		if (vf > hdev->num_req_vfs) {
+			dev_err(&hdev->pdev->dev,
+				"Error: vf id (%u) > max vf num (%u)\n",
+				vf, hdev->num_req_vfs);
+			return -EINVAL;
+		}
+
+		*vport_id = vf ? hdev->vport[vf].vport_id : vport->vport_id;
+		tqps = hdev->vport[vf].nic.kinfo.num_tqps;
+
+		if (ring >= tqps) {
+			dev_err(&hdev->pdev->dev,
+				"Error: queue id (%u) > max tqp num (%u)\n",
+				ring, tqps - 1);
+			return -EINVAL;
+		}
+
+		*action = HCLGE_FD_ACTION_SELECT_QUEUE;
+		*queue_id = ring;
+	}
+
+	return 0;
+}
+
 static int hclge_add_fd_entry(struct hnae3_handle *handle,
 			      struct ethtool_rxnfc *cmd)
 {
@@ -6162,33 +6198,10 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle,
 	if (ret)
 		return ret;
 
-	if (fs->ring_cookie == RX_CLS_FLOW_DISC) {
-		action = HCLGE_FD_ACTION_DROP_PACKET;
-	} else {
-		u32 ring = ethtool_get_flow_spec_ring(fs->ring_cookie);
-		u8 vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie);
-		u16 tqps;
-
-		if (vf > hdev->num_req_vfs) {
-			dev_err(&hdev->pdev->dev,
-				"Error: vf id (%u) > max vf num (%u)\n",
-				vf, hdev->num_req_vfs);
-			return -EINVAL;
-		}
-
-		dst_vport_id = vf ? hdev->vport[vf].vport_id : vport->vport_id;
-		tqps = vf ? hdev->vport[vf].alloc_tqps : vport->alloc_tqps;
-
-		if (ring >= tqps) {
-			dev_err(&hdev->pdev->dev,
-				"Error: queue id (%u) > max tqp num (%u)\n",
-				ring, tqps - 1);
-			return -EINVAL;
-		}
-
-		action = HCLGE_FD_ACTION_SELECT_QUEUE;
-		q_index = ring;
-	}
+	ret = hclge_fd_parse_ring_cookie(hdev, fs->ring_cookie, &dst_vport_id,
+					 &action, &q_index);
+	if (ret)
+		return ret;
 
 	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
 	if (!rule)
-- 
cgit v1.2.3


From 74b755d1dbf1c4ff6f0cc4513e573eb15c0e7dfc Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Mon, 22 Mar 2021 11:51:57 +0800
Subject: net: hns3: refactor out hclge_fd_get_tuple()

The process of function hclge_fd_get_tuple() is complex and
prolix. To make it more readable, extract the process of each
flow-type tuple to a single function.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 220 +++++++++++----------
 1 file changed, 117 insertions(+), 103 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 4929220d344a..a17831f03d6e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -5935,144 +5935,158 @@ static int hclge_fd_update_rule_list(struct hclge_dev *hdev,
 	return 0;
 }
 
-static int hclge_fd_get_tuple(struct hclge_dev *hdev,
-			      struct ethtool_rx_flow_spec *fs,
-			      struct hclge_fd_rule *rule)
+static void hclge_fd_get_tcpip4_tuple(struct hclge_dev *hdev,
+				      struct ethtool_rx_flow_spec *fs,
+				      struct hclge_fd_rule *rule, u8 ip_proto)
 {
-	u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT);
+	rule->tuples.src_ip[IPV4_INDEX] =
+			be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4src);
+	rule->tuples_mask.src_ip[IPV4_INDEX] =
+			be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4src);
 
-	switch (flow_type) {
-	case SCTP_V4_FLOW:
-	case TCP_V4_FLOW:
-	case UDP_V4_FLOW:
-		rule->tuples.src_ip[IPV4_INDEX] =
-				be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4src);
-		rule->tuples_mask.src_ip[IPV4_INDEX] =
-				be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4src);
+	rule->tuples.dst_ip[IPV4_INDEX] =
+			be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4dst);
+	rule->tuples_mask.dst_ip[IPV4_INDEX] =
+			be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4dst);
 
-		rule->tuples.dst_ip[IPV4_INDEX] =
-				be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4dst);
-		rule->tuples_mask.dst_ip[IPV4_INDEX] =
-				be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4dst);
+	rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.psrc);
+	rule->tuples_mask.src_port = be16_to_cpu(fs->m_u.tcp_ip4_spec.psrc);
 
-		rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.psrc);
-		rule->tuples_mask.src_port =
-				be16_to_cpu(fs->m_u.tcp_ip4_spec.psrc);
+	rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.pdst);
+	rule->tuples_mask.dst_port = be16_to_cpu(fs->m_u.tcp_ip4_spec.pdst);
 
-		rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.pdst);
-		rule->tuples_mask.dst_port =
-				be16_to_cpu(fs->m_u.tcp_ip4_spec.pdst);
+	rule->tuples.ip_tos = fs->h_u.tcp_ip4_spec.tos;
+	rule->tuples_mask.ip_tos = fs->m_u.tcp_ip4_spec.tos;
 
-		rule->tuples.ip_tos = fs->h_u.tcp_ip4_spec.tos;
-		rule->tuples_mask.ip_tos = fs->m_u.tcp_ip4_spec.tos;
+	rule->tuples.ether_proto = ETH_P_IP;
+	rule->tuples_mask.ether_proto = 0xFFFF;
 
-		rule->tuples.ether_proto = ETH_P_IP;
-		rule->tuples_mask.ether_proto = 0xFFFF;
+	rule->tuples.ip_proto = ip_proto;
+	rule->tuples_mask.ip_proto = 0xFF;
+}
 
-		break;
-	case IP_USER_FLOW:
-		rule->tuples.src_ip[IPV4_INDEX] =
-				be32_to_cpu(fs->h_u.usr_ip4_spec.ip4src);
-		rule->tuples_mask.src_ip[IPV4_INDEX] =
-				be32_to_cpu(fs->m_u.usr_ip4_spec.ip4src);
+static void hclge_fd_get_ip4_tuple(struct hclge_dev *hdev,
+				   struct ethtool_rx_flow_spec *fs,
+				   struct hclge_fd_rule *rule)
+{
+	rule->tuples.src_ip[IPV4_INDEX] =
+			be32_to_cpu(fs->h_u.usr_ip4_spec.ip4src);
+	rule->tuples_mask.src_ip[IPV4_INDEX] =
+			be32_to_cpu(fs->m_u.usr_ip4_spec.ip4src);
 
-		rule->tuples.dst_ip[IPV4_INDEX] =
-				be32_to_cpu(fs->h_u.usr_ip4_spec.ip4dst);
-		rule->tuples_mask.dst_ip[IPV4_INDEX] =
-				be32_to_cpu(fs->m_u.usr_ip4_spec.ip4dst);
+	rule->tuples.dst_ip[IPV4_INDEX] =
+			be32_to_cpu(fs->h_u.usr_ip4_spec.ip4dst);
+	rule->tuples_mask.dst_ip[IPV4_INDEX] =
+			be32_to_cpu(fs->m_u.usr_ip4_spec.ip4dst);
 
-		rule->tuples.ip_tos = fs->h_u.usr_ip4_spec.tos;
-		rule->tuples_mask.ip_tos = fs->m_u.usr_ip4_spec.tos;
+	rule->tuples.ip_tos = fs->h_u.usr_ip4_spec.tos;
+	rule->tuples_mask.ip_tos = fs->m_u.usr_ip4_spec.tos;
 
-		rule->tuples.ip_proto = fs->h_u.usr_ip4_spec.proto;
-		rule->tuples_mask.ip_proto = fs->m_u.usr_ip4_spec.proto;
+	rule->tuples.ip_proto = fs->h_u.usr_ip4_spec.proto;
+	rule->tuples_mask.ip_proto = fs->m_u.usr_ip4_spec.proto;
 
-		rule->tuples.ether_proto = ETH_P_IP;
-		rule->tuples_mask.ether_proto = 0xFFFF;
+	rule->tuples.ether_proto = ETH_P_IP;
+	rule->tuples_mask.ether_proto = 0xFFFF;
+}
 
-		break;
-	case SCTP_V6_FLOW:
-	case TCP_V6_FLOW:
-	case UDP_V6_FLOW:
-		be32_to_cpu_array(rule->tuples.src_ip,
-				  fs->h_u.tcp_ip6_spec.ip6src, IPV6_SIZE);
-		be32_to_cpu_array(rule->tuples_mask.src_ip,
-				  fs->m_u.tcp_ip6_spec.ip6src, IPV6_SIZE);
+static void hclge_fd_get_tcpip6_tuple(struct hclge_dev *hdev,
+				      struct ethtool_rx_flow_spec *fs,
+				      struct hclge_fd_rule *rule, u8 ip_proto)
+{
+	be32_to_cpu_array(rule->tuples.src_ip, fs->h_u.tcp_ip6_spec.ip6src,
+			  IPV6_SIZE);
+	be32_to_cpu_array(rule->tuples_mask.src_ip, fs->m_u.tcp_ip6_spec.ip6src,
+			  IPV6_SIZE);
 
-		be32_to_cpu_array(rule->tuples.dst_ip,
-				  fs->h_u.tcp_ip6_spec.ip6dst, IPV6_SIZE);
-		be32_to_cpu_array(rule->tuples_mask.dst_ip,
-				  fs->m_u.tcp_ip6_spec.ip6dst, IPV6_SIZE);
+	be32_to_cpu_array(rule->tuples.dst_ip, fs->h_u.tcp_ip6_spec.ip6dst,
+			  IPV6_SIZE);
+	be32_to_cpu_array(rule->tuples_mask.dst_ip, fs->m_u.tcp_ip6_spec.ip6dst,
+			  IPV6_SIZE);
 
-		rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.psrc);
-		rule->tuples_mask.src_port =
-				be16_to_cpu(fs->m_u.tcp_ip6_spec.psrc);
+	rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.psrc);
+	rule->tuples_mask.src_port = be16_to_cpu(fs->m_u.tcp_ip6_spec.psrc);
 
-		rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.pdst);
-		rule->tuples_mask.dst_port =
-				be16_to_cpu(fs->m_u.tcp_ip6_spec.pdst);
+	rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.pdst);
+	rule->tuples_mask.dst_port = be16_to_cpu(fs->m_u.tcp_ip6_spec.pdst);
 
-		rule->tuples.ether_proto = ETH_P_IPV6;
-		rule->tuples_mask.ether_proto = 0xFFFF;
+	rule->tuples.ether_proto = ETH_P_IPV6;
+	rule->tuples_mask.ether_proto = 0xFFFF;
 
-		break;
-	case IPV6_USER_FLOW:
-		be32_to_cpu_array(rule->tuples.src_ip,
-				  fs->h_u.usr_ip6_spec.ip6src, IPV6_SIZE);
-		be32_to_cpu_array(rule->tuples_mask.src_ip,
-				  fs->m_u.usr_ip6_spec.ip6src, IPV6_SIZE);
+	rule->tuples.ip_proto = ip_proto;
+	rule->tuples_mask.ip_proto = 0xFF;
+}
 
-		be32_to_cpu_array(rule->tuples.dst_ip,
-				  fs->h_u.usr_ip6_spec.ip6dst, IPV6_SIZE);
-		be32_to_cpu_array(rule->tuples_mask.dst_ip,
-				  fs->m_u.usr_ip6_spec.ip6dst, IPV6_SIZE);
+static void hclge_fd_get_ip6_tuple(struct hclge_dev *hdev,
+				   struct ethtool_rx_flow_spec *fs,
+				   struct hclge_fd_rule *rule)
+{
+	be32_to_cpu_array(rule->tuples.src_ip, fs->h_u.usr_ip6_spec.ip6src,
+			  IPV6_SIZE);
+	be32_to_cpu_array(rule->tuples_mask.src_ip, fs->m_u.usr_ip6_spec.ip6src,
+			  IPV6_SIZE);
 
-		rule->tuples.ip_proto = fs->h_u.usr_ip6_spec.l4_proto;
-		rule->tuples_mask.ip_proto = fs->m_u.usr_ip6_spec.l4_proto;
+	be32_to_cpu_array(rule->tuples.dst_ip, fs->h_u.usr_ip6_spec.ip6dst,
+			  IPV6_SIZE);
+	be32_to_cpu_array(rule->tuples_mask.dst_ip, fs->m_u.usr_ip6_spec.ip6dst,
+			  IPV6_SIZE);
 
-		rule->tuples.ether_proto = ETH_P_IPV6;
-		rule->tuples_mask.ether_proto = 0xFFFF;
+	rule->tuples.ip_proto = fs->h_u.usr_ip6_spec.l4_proto;
+	rule->tuples_mask.ip_proto = fs->m_u.usr_ip6_spec.l4_proto;
 
-		break;
-	case ETHER_FLOW:
-		ether_addr_copy(rule->tuples.src_mac,
-				fs->h_u.ether_spec.h_source);
-		ether_addr_copy(rule->tuples_mask.src_mac,
-				fs->m_u.ether_spec.h_source);
+	rule->tuples.ether_proto = ETH_P_IPV6;
+	rule->tuples_mask.ether_proto = 0xFFFF;
+}
 
-		ether_addr_copy(rule->tuples.dst_mac,
-				fs->h_u.ether_spec.h_dest);
-		ether_addr_copy(rule->tuples_mask.dst_mac,
-				fs->m_u.ether_spec.h_dest);
+static void hclge_fd_get_ether_tuple(struct hclge_dev *hdev,
+				     struct ethtool_rx_flow_spec *fs,
+				     struct hclge_fd_rule *rule)
+{
+	ether_addr_copy(rule->tuples.src_mac, fs->h_u.ether_spec.h_source);
+	ether_addr_copy(rule->tuples_mask.src_mac, fs->m_u.ether_spec.h_source);
 
-		rule->tuples.ether_proto =
-				be16_to_cpu(fs->h_u.ether_spec.h_proto);
-		rule->tuples_mask.ether_proto =
-				be16_to_cpu(fs->m_u.ether_spec.h_proto);
+	ether_addr_copy(rule->tuples.dst_mac, fs->h_u.ether_spec.h_dest);
+	ether_addr_copy(rule->tuples_mask.dst_mac, fs->m_u.ether_spec.h_dest);
 
-		break;
-	default:
-		return -EOPNOTSUPP;
-	}
+	rule->tuples.ether_proto = be16_to_cpu(fs->h_u.ether_spec.h_proto);
+	rule->tuples_mask.ether_proto = be16_to_cpu(fs->m_u.ether_spec.h_proto);
+}
+
+static int hclge_fd_get_tuple(struct hclge_dev *hdev,
+			      struct ethtool_rx_flow_spec *fs,
+			      struct hclge_fd_rule *rule)
+{
+	u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT);
 
 	switch (flow_type) {
 	case SCTP_V4_FLOW:
-	case SCTP_V6_FLOW:
-		rule->tuples.ip_proto = IPPROTO_SCTP;
-		rule->tuples_mask.ip_proto = 0xFF;
+		hclge_fd_get_tcpip4_tuple(hdev, fs, rule, IPPROTO_SCTP);
 		break;
 	case TCP_V4_FLOW:
-	case TCP_V6_FLOW:
-		rule->tuples.ip_proto = IPPROTO_TCP;
-		rule->tuples_mask.ip_proto = 0xFF;
+		hclge_fd_get_tcpip4_tuple(hdev, fs, rule, IPPROTO_TCP);
 		break;
 	case UDP_V4_FLOW:
+		hclge_fd_get_tcpip4_tuple(hdev, fs, rule, IPPROTO_UDP);
+		break;
+	case IP_USER_FLOW:
+		hclge_fd_get_ip4_tuple(hdev, fs, rule);
+		break;
+	case SCTP_V6_FLOW:
+		hclge_fd_get_tcpip6_tuple(hdev, fs, rule, IPPROTO_SCTP);
+		break;
+	case TCP_V6_FLOW:
+		hclge_fd_get_tcpip6_tuple(hdev, fs, rule, IPPROTO_TCP);
+		break;
 	case UDP_V6_FLOW:
-		rule->tuples.ip_proto = IPPROTO_UDP;
-		rule->tuples_mask.ip_proto = 0xFF;
+		hclge_fd_get_tcpip6_tuple(hdev, fs, rule, IPPROTO_UDP);
 		break;
-	default:
+	case IPV6_USER_FLOW:
+		hclge_fd_get_ip6_tuple(hdev, fs, rule);
 		break;
+	case ETHER_FLOW:
+		hclge_fd_get_ether_tuple(hdev, fs, rule);
+		break;
+	default:
+		return -EOPNOTSUPP;
 	}
 
 	if (fs->flow_type & FLOW_EXT) {
-- 
cgit v1.2.3


From fb72699dfef8706abe203ec8c8fc69a023c161ce Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Mon, 22 Mar 2021 11:51:58 +0800
Subject: net: hns3: refactor for function hclge_fd_convert_tuple

Currently, there are too many branches for hclge_fd_convert_tuple().
And it may be more when add new tuples. Refactor it by sorting the
tuples according to their length. So it only needs several KEY_OPT
now, and being flexible to add new tuples.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 189 +++++++++------------
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h    |  12 ++
 2 files changed, 97 insertions(+), 104 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index a17831f03d6e..3d601c9b6cea 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -384,36 +384,56 @@ static const struct key_info meta_data_key_info[] = {
 };
 
 static const struct key_info tuple_key_info[] = {
-	{ OUTER_DST_MAC, 48},
-	{ OUTER_SRC_MAC, 48},
-	{ OUTER_VLAN_TAG_FST, 16},
-	{ OUTER_VLAN_TAG_SEC, 16},
-	{ OUTER_ETH_TYPE, 16},
-	{ OUTER_L2_RSV, 16},
-	{ OUTER_IP_TOS, 8},
-	{ OUTER_IP_PROTO, 8},
-	{ OUTER_SRC_IP, 32},
-	{ OUTER_DST_IP, 32},
-	{ OUTER_L3_RSV, 16},
-	{ OUTER_SRC_PORT, 16},
-	{ OUTER_DST_PORT, 16},
-	{ OUTER_L4_RSV, 32},
-	{ OUTER_TUN_VNI, 24},
-	{ OUTER_TUN_FLOW_ID, 8},
-	{ INNER_DST_MAC, 48},
-	{ INNER_SRC_MAC, 48},
-	{ INNER_VLAN_TAG_FST, 16},
-	{ INNER_VLAN_TAG_SEC, 16},
-	{ INNER_ETH_TYPE, 16},
-	{ INNER_L2_RSV, 16},
-	{ INNER_IP_TOS, 8},
-	{ INNER_IP_PROTO, 8},
-	{ INNER_SRC_IP, 32},
-	{ INNER_DST_IP, 32},
-	{ INNER_L3_RSV, 16},
-	{ INNER_SRC_PORT, 16},
-	{ INNER_DST_PORT, 16},
-	{ INNER_L4_RSV, 32},
+	{ OUTER_DST_MAC, 48, KEY_OPT_MAC, -1, -1 },
+	{ OUTER_SRC_MAC, 48, KEY_OPT_MAC, -1, -1 },
+	{ OUTER_VLAN_TAG_FST, 16, KEY_OPT_LE16, -1, -1 },
+	{ OUTER_VLAN_TAG_SEC, 16, KEY_OPT_LE16, -1, -1 },
+	{ OUTER_ETH_TYPE, 16, KEY_OPT_LE16, -1, -1 },
+	{ OUTER_L2_RSV, 16, KEY_OPT_LE16, -1, -1 },
+	{ OUTER_IP_TOS, 8, KEY_OPT_U8, -1, -1 },
+	{ OUTER_IP_PROTO, 8, KEY_OPT_U8, -1, -1 },
+	{ OUTER_SRC_IP, 32, KEY_OPT_IP, -1, -1 },
+	{ OUTER_DST_IP, 32, KEY_OPT_IP, -1, -1 },
+	{ OUTER_L3_RSV, 16, KEY_OPT_LE16, -1, -1 },
+	{ OUTER_SRC_PORT, 16, KEY_OPT_LE16, -1, -1 },
+	{ OUTER_DST_PORT, 16, KEY_OPT_LE16, -1, -1 },
+	{ OUTER_L4_RSV, 32, KEY_OPT_LE32, -1, -1 },
+	{ OUTER_TUN_VNI, 24, KEY_OPT_VNI, -1, -1 },
+	{ OUTER_TUN_FLOW_ID, 8, KEY_OPT_U8, -1, -1 },
+	{ INNER_DST_MAC, 48, KEY_OPT_MAC,
+	  offsetof(struct hclge_fd_rule, tuples.dst_mac),
+	  offsetof(struct hclge_fd_rule, tuples_mask.dst_mac) },
+	{ INNER_SRC_MAC, 48, KEY_OPT_MAC,
+	  offsetof(struct hclge_fd_rule, tuples.src_mac),
+	  offsetof(struct hclge_fd_rule, tuples_mask.src_mac) },
+	{ INNER_VLAN_TAG_FST, 16, KEY_OPT_LE16,
+	  offsetof(struct hclge_fd_rule, tuples.vlan_tag1),
+	  offsetof(struct hclge_fd_rule, tuples_mask.vlan_tag1) },
+	{ INNER_VLAN_TAG_SEC, 16, KEY_OPT_LE16, -1, -1 },
+	{ INNER_ETH_TYPE, 16, KEY_OPT_LE16,
+	  offsetof(struct hclge_fd_rule, tuples.ether_proto),
+	  offsetof(struct hclge_fd_rule, tuples_mask.ether_proto) },
+	{ INNER_L2_RSV, 16, KEY_OPT_LE16, -1, -1 },
+	{ INNER_IP_TOS, 8, KEY_OPT_U8,
+	  offsetof(struct hclge_fd_rule, tuples.ip_tos),
+	  offsetof(struct hclge_fd_rule, tuples_mask.ip_tos) },
+	{ INNER_IP_PROTO, 8, KEY_OPT_U8,
+	  offsetof(struct hclge_fd_rule, tuples.ip_proto),
+	  offsetof(struct hclge_fd_rule, tuples_mask.ip_proto) },
+	{ INNER_SRC_IP, 32, KEY_OPT_IP,
+	  offsetof(struct hclge_fd_rule, tuples.src_ip),
+	  offsetof(struct hclge_fd_rule, tuples_mask.src_ip) },
+	{ INNER_DST_IP, 32, KEY_OPT_IP,
+	  offsetof(struct hclge_fd_rule, tuples.dst_ip),
+	  offsetof(struct hclge_fd_rule, tuples_mask.dst_ip) },
+	{ INNER_L3_RSV, 16, KEY_OPT_LE16, -1, -1 },
+	{ INNER_SRC_PORT, 16, KEY_OPT_LE16,
+	  offsetof(struct hclge_fd_rule, tuples.src_port),
+	  offsetof(struct hclge_fd_rule, tuples_mask.src_port) },
+	{ INNER_DST_PORT, 16, KEY_OPT_LE16,
+	  offsetof(struct hclge_fd_rule, tuples.dst_port),
+	  offsetof(struct hclge_fd_rule, tuples_mask.dst_port) },
+	{ INNER_L4_RSV, 32, KEY_OPT_LE32, -1, -1 },
 };
 
 static int hclge_mac_update_stats_defective(struct hclge_dev *hdev)
@@ -5371,96 +5391,57 @@ static int hclge_fd_ad_config(struct hclge_dev *hdev, u8 stage, int loc,
 static bool hclge_fd_convert_tuple(u32 tuple_bit, u8 *key_x, u8 *key_y,
 				   struct hclge_fd_rule *rule)
 {
+	int offset, moffset, ip_offset;
+	enum HCLGE_FD_KEY_OPT key_opt;
 	u16 tmp_x_s, tmp_y_s;
 	u32 tmp_x_l, tmp_y_l;
+	u8 *p = (u8 *)rule;
 	int i;
 
-	if (rule->unused_tuple & tuple_bit)
+	if (rule->unused_tuple & BIT(tuple_bit))
 		return true;
 
-	switch (tuple_bit) {
-	case BIT(INNER_DST_MAC):
-		for (i = 0; i < ETH_ALEN; i++) {
-			calc_x(key_x[ETH_ALEN - 1 - i], rule->tuples.dst_mac[i],
-			       rule->tuples_mask.dst_mac[i]);
-			calc_y(key_y[ETH_ALEN - 1 - i], rule->tuples.dst_mac[i],
-			       rule->tuples_mask.dst_mac[i]);
-		}
+	key_opt = tuple_key_info[tuple_bit].key_opt;
+	offset = tuple_key_info[tuple_bit].offset;
+	moffset = tuple_key_info[tuple_bit].moffset;
 
-		return true;
-	case BIT(INNER_SRC_MAC):
-		for (i = 0; i < ETH_ALEN; i++) {
-			calc_x(key_x[ETH_ALEN - 1 - i], rule->tuples.src_mac[i],
-			       rule->tuples_mask.src_mac[i]);
-			calc_y(key_y[ETH_ALEN - 1 - i], rule->tuples.src_mac[i],
-			       rule->tuples_mask.src_mac[i]);
-		}
+	switch (key_opt) {
+	case KEY_OPT_U8:
+		calc_x(*key_x, p[offset], p[moffset]);
+		calc_y(*key_y, p[offset], p[moffset]);
 
 		return true;
-	case BIT(INNER_VLAN_TAG_FST):
-		calc_x(tmp_x_s, rule->tuples.vlan_tag1,
-		       rule->tuples_mask.vlan_tag1);
-		calc_y(tmp_y_s, rule->tuples.vlan_tag1,
-		       rule->tuples_mask.vlan_tag1);
+	case KEY_OPT_LE16:
+		calc_x(tmp_x_s, *(u16 *)(&p[offset]), *(u16 *)(&p[moffset]));
+		calc_y(tmp_y_s, *(u16 *)(&p[offset]), *(u16 *)(&p[moffset]));
 		*(__le16 *)key_x = cpu_to_le16(tmp_x_s);
 		*(__le16 *)key_y = cpu_to_le16(tmp_y_s);
 
 		return true;
-	case BIT(INNER_ETH_TYPE):
-		calc_x(tmp_x_s, rule->tuples.ether_proto,
-		       rule->tuples_mask.ether_proto);
-		calc_y(tmp_y_s, rule->tuples.ether_proto,
-		       rule->tuples_mask.ether_proto);
-		*(__le16 *)key_x = cpu_to_le16(tmp_x_s);
-		*(__le16 *)key_y = cpu_to_le16(tmp_y_s);
-
-		return true;
-	case BIT(INNER_IP_TOS):
-		calc_x(*key_x, rule->tuples.ip_tos, rule->tuples_mask.ip_tos);
-		calc_y(*key_y, rule->tuples.ip_tos, rule->tuples_mask.ip_tos);
-
-		return true;
-	case BIT(INNER_IP_PROTO):
-		calc_x(*key_x, rule->tuples.ip_proto,
-		       rule->tuples_mask.ip_proto);
-		calc_y(*key_y, rule->tuples.ip_proto,
-		       rule->tuples_mask.ip_proto);
-
-		return true;
-	case BIT(INNER_SRC_IP):
-		calc_x(tmp_x_l, rule->tuples.src_ip[IPV4_INDEX],
-		       rule->tuples_mask.src_ip[IPV4_INDEX]);
-		calc_y(tmp_y_l, rule->tuples.src_ip[IPV4_INDEX],
-		       rule->tuples_mask.src_ip[IPV4_INDEX]);
+	case KEY_OPT_LE32:
+		calc_x(tmp_x_l, *(u32 *)(&p[offset]), *(u32 *)(&p[moffset]));
+		calc_y(tmp_y_l, *(u32 *)(&p[offset]), *(u32 *)(&p[moffset]));
 		*(__le32 *)key_x = cpu_to_le32(tmp_x_l);
 		*(__le32 *)key_y = cpu_to_le32(tmp_y_l);
 
 		return true;
-	case BIT(INNER_DST_IP):
-		calc_x(tmp_x_l, rule->tuples.dst_ip[IPV4_INDEX],
-		       rule->tuples_mask.dst_ip[IPV4_INDEX]);
-		calc_y(tmp_y_l, rule->tuples.dst_ip[IPV4_INDEX],
-		       rule->tuples_mask.dst_ip[IPV4_INDEX]);
-		*(__le32 *)key_x = cpu_to_le32(tmp_x_l);
-		*(__le32 *)key_y = cpu_to_le32(tmp_y_l);
-
-		return true;
-	case BIT(INNER_SRC_PORT):
-		calc_x(tmp_x_s, rule->tuples.src_port,
-		       rule->tuples_mask.src_port);
-		calc_y(tmp_y_s, rule->tuples.src_port,
-		       rule->tuples_mask.src_port);
-		*(__le16 *)key_x = cpu_to_le16(tmp_x_s);
-		*(__le16 *)key_y = cpu_to_le16(tmp_y_s);
+	case KEY_OPT_MAC:
+		for (i = 0; i < ETH_ALEN; i++) {
+			calc_x(key_x[ETH_ALEN - 1 - i], p[offset + i],
+			       p[moffset + i]);
+			calc_y(key_y[ETH_ALEN - 1 - i], p[offset + i],
+			       p[moffset + i]);
+		}
 
 		return true;
-	case BIT(INNER_DST_PORT):
-		calc_x(tmp_x_s, rule->tuples.dst_port,
-		       rule->tuples_mask.dst_port);
-		calc_y(tmp_y_s, rule->tuples.dst_port,
-		       rule->tuples_mask.dst_port);
-		*(__le16 *)key_x = cpu_to_le16(tmp_x_s);
-		*(__le16 *)key_y = cpu_to_le16(tmp_y_s);
+	case KEY_OPT_IP:
+		ip_offset = IPV4_INDEX * sizeof(u32);
+		calc_x(tmp_x_l, *(u32 *)(&p[offset + ip_offset]),
+		       *(u32 *)(&p[moffset + ip_offset]));
+		calc_y(tmp_y_l, *(u32 *)(&p[offset + ip_offset]),
+		       *(u32 *)(&p[moffset + ip_offset]));
+		*(__le32 *)key_x = cpu_to_le32(tmp_x_l);
+		*(__le32 *)key_y = cpu_to_le32(tmp_y_l);
 
 		return true;
 	default:
@@ -5548,12 +5529,12 @@ static int hclge_config_key(struct hclge_dev *hdev, u8 stage,
 
 	for (i = 0 ; i < MAX_TUPLE; i++) {
 		bool tuple_valid;
-		u32 check_tuple;
 
 		tuple_size = tuple_key_info[i].key_length / 8;
-		check_tuple = key_cfg->tuple_active & BIT(i);
+		if (!(key_cfg->tuple_active & BIT(i)))
+			continue;
 
-		tuple_valid = hclge_fd_convert_tuple(check_tuple, cur_key_x,
+		tuple_valid = hclge_fd_convert_tuple(i, cur_key_x,
 						     cur_key_y, rule);
 		if (tuple_valid) {
 			cur_key_x += tuple_size;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 19d7f28773f3..6fe745538b30 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -548,9 +548,21 @@ enum HCLGE_FD_META_DATA {
 	MAX_META_DATA,
 };
 
+enum HCLGE_FD_KEY_OPT {
+	KEY_OPT_U8,
+	KEY_OPT_LE16,
+	KEY_OPT_LE32,
+	KEY_OPT_MAC,
+	KEY_OPT_IP,
+	KEY_OPT_VNI,
+};
+
 struct key_info {
 	u8 key_type;
 	u8 key_length; /* use bit as unit */
+	enum HCLGE_FD_KEY_OPT key_opt;
+	int offset;
+	int moffset;
 };
 
 #define MAX_KEY_LENGTH	400
-- 
cgit v1.2.3


From ae4811913f576d3a891e2ca8a3ad11746f644c69 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Mon, 22 Mar 2021 11:51:59 +0800
Subject: net: hns3: add support for traffic class tuple support for flow
 director by ethtool

The hardware supports to parse and match the traffic class field
of IPv6 packet for flow director, uses the same tuple as ip tos.
So removes the limitation of configure 'tclass' by driver.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 27 ++++++++++++++++------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 3d601c9b6cea..258444410dd2 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -5665,8 +5665,7 @@ static int hclge_fd_check_tcpip6_tuple(struct ethtool_tcpip6_spec *spec,
 	if (!spec || !unused_tuple)
 		return -EINVAL;
 
-	*unused_tuple |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) |
-		BIT(INNER_IP_TOS);
+	*unused_tuple |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC);
 
 	/* check whether src/dst ip address used */
 	if (ipv6_addr_any((struct in6_addr *)spec->ip6src))
@@ -5681,8 +5680,8 @@ static int hclge_fd_check_tcpip6_tuple(struct ethtool_tcpip6_spec *spec,
 	if (!spec->pdst)
 		*unused_tuple |= BIT(INNER_DST_PORT);
 
-	if (spec->tclass)
-		return -EOPNOTSUPP;
+	if (!spec->tclass)
+		*unused_tuple |= BIT(INNER_IP_TOS);
 
 	return 0;
 }
@@ -5694,7 +5693,7 @@ static int hclge_fd_check_ip6_tuple(struct ethtool_usrip6_spec *spec,
 		return -EINVAL;
 
 	*unused_tuple |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) |
-		BIT(INNER_IP_TOS) | BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT);
+			BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT);
 
 	/* check whether src/dst ip address used */
 	if (ipv6_addr_any((struct in6_addr *)spec->ip6src))
@@ -5706,8 +5705,8 @@ static int hclge_fd_check_ip6_tuple(struct ethtool_usrip6_spec *spec,
 	if (!spec->l4_proto)
 		*unused_tuple |= BIT(INNER_IP_PROTO);
 
-	if (spec->tclass)
-		return -EOPNOTSUPP;
+	if (!spec->tclass)
+		*unused_tuple |= BIT(INNER_IP_TOS);
 
 	if (spec->l4_4_bytes)
 		return -EOPNOTSUPP;
@@ -5993,6 +5992,9 @@ static void hclge_fd_get_tcpip6_tuple(struct hclge_dev *hdev,
 	rule->tuples.ether_proto = ETH_P_IPV6;
 	rule->tuples_mask.ether_proto = 0xFFFF;
 
+	rule->tuples.ip_tos = fs->h_u.tcp_ip6_spec.tclass;
+	rule->tuples_mask.ip_tos = fs->m_u.tcp_ip6_spec.tclass;
+
 	rule->tuples.ip_proto = ip_proto;
 	rule->tuples_mask.ip_proto = 0xFF;
 }
@@ -6014,6 +6016,9 @@ static void hclge_fd_get_ip6_tuple(struct hclge_dev *hdev,
 	rule->tuples.ip_proto = fs->h_u.usr_ip6_spec.l4_proto;
 	rule->tuples_mask.ip_proto = fs->m_u.usr_ip6_spec.l4_proto;
 
+	rule->tuples.ip_tos = fs->h_u.tcp_ip6_spec.tclass;
+	rule->tuples_mask.ip_tos = fs->m_u.tcp_ip6_spec.tclass;
+
 	rule->tuples.ether_proto = ETH_P_IPV6;
 	rule->tuples_mask.ether_proto = 0xFFFF;
 }
@@ -6423,6 +6428,10 @@ static void hclge_fd_get_tcpip6_info(struct hclge_fd_rule *rule,
 		cpu_to_be32_array(spec_mask->ip6dst, rule->tuples_mask.dst_ip,
 				  IPV6_SIZE);
 
+	spec->tclass = rule->tuples.ip_tos;
+	spec_mask->tclass = rule->unused_tuple & BIT(INNER_IP_TOS) ?
+			0 : rule->tuples_mask.ip_tos;
+
 	spec->psrc = cpu_to_be16(rule->tuples.src_port);
 	spec_mask->psrc = rule->unused_tuple & BIT(INNER_SRC_PORT) ?
 			0 : cpu_to_be16(rule->tuples_mask.src_port);
@@ -6450,6 +6459,10 @@ static void hclge_fd_get_ip6_info(struct hclge_fd_rule *rule,
 		cpu_to_be32_array(spec_mask->ip6dst,
 				  rule->tuples_mask.dst_ip, IPV6_SIZE);
 
+	spec->tclass = rule->tuples.ip_tos;
+	spec_mask->tclass = rule->unused_tuple & BIT(INNER_IP_TOS) ?
+			0 : rule->tuples_mask.ip_tos;
+
 	spec->l4_proto = rule->tuples.ip_proto;
 	spec_mask->l4_proto = rule->unused_tuple & BIT(INNER_IP_PROTO) ?
 			0 : rule->tuples_mask.ip_proto;
-- 
cgit v1.2.3


From fc4243b8de8b4e7170f07f2660dcab3f8ecda0e9 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Mon, 22 Mar 2021 11:52:00 +0800
Subject: net: hns3: refactor flow director configuration

Currently, the flow director rule of aRFS is configured in
the IO path. It's time-consuming. So move out the configuration,
and configure it asynchronously. And keep ethtool and tc flower
rule using synchronous way, otherwise the application maybe
unable to know the rule is installed or pending.

Add a state member for each flow director rule to indicate the
rule state. There are 4 states:
TO_ADD: the rule is waiting to add to hardware
TO_DEL: the rule is waiting to remove from hardware
DELETED: the rule has been removed from hardware. It's a middle
        state, used to remove the rule node in the fd_rule_list.
ACTIVE: the rule is already added in hardware

For asynchronous way, when receive a new request to add or delete
flow director rule by aRFS, update the rule list, then request to
schedule the service task to finish the configuration.

For synchronous way, when receive a new request to add or delete
flow director rule by ethtool or tc flower, configure hardware
directly, then update the rule list if success.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 518 ++++++++++++---------
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h    |  10 +
 2 files changed, 319 insertions(+), 209 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 258444410dd2..a41bc12e6590 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -62,7 +62,7 @@ static void hclge_sync_vlan_filter(struct hclge_dev *hdev);
 static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev);
 static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle);
 static void hclge_rfs_filter_expire(struct hclge_dev *hdev);
-static void hclge_clear_arfs_rules(struct hnae3_handle *handle);
+static int hclge_clear_arfs_rules(struct hclge_dev *hdev);
 static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev,
 						   unsigned long *addr);
 static int hclge_set_default_loopback(struct hclge_dev *hdev);
@@ -70,6 +70,7 @@ static int hclge_set_default_loopback(struct hclge_dev *hdev);
 static void hclge_sync_mac_table(struct hclge_dev *hdev);
 static void hclge_restore_hw_table(struct hclge_dev *hdev);
 static void hclge_sync_promisc_mode(struct hclge_dev *hdev);
+static void hclge_sync_fd_table(struct hclge_dev *hdev);
 
 static struct hnae3_ae_algo ae_algo;
 
@@ -4261,6 +4262,7 @@ static void hclge_periodic_service_task(struct hclge_dev *hdev)
 	hclge_update_link_status(hdev);
 	hclge_sync_mac_table(hdev);
 	hclge_sync_promisc_mode(hdev);
+	hclge_sync_fd_table(hdev);
 
 	if (time_is_after_jiffies(hdev->last_serv_processed + HZ)) {
 		delta = jiffies - hdev->last_serv_processed;
@@ -5162,6 +5164,150 @@ static void hclge_request_update_promisc_mode(struct hnae3_handle *handle)
 	set_bit(HCLGE_STATE_PROMISC_CHANGED, &hdev->state);
 }
 
+static void hclge_sync_fd_state(struct hclge_dev *hdev)
+{
+	if (hlist_empty(&hdev->fd_rule_list))
+		hdev->fd_active_type = HCLGE_FD_RULE_NONE;
+}
+
+static void hclge_fd_inc_rule_cnt(struct hclge_dev *hdev, u16 location)
+{
+	if (!test_bit(location, hdev->fd_bmap)) {
+		set_bit(location, hdev->fd_bmap);
+		hdev->hclge_fd_rule_num++;
+	}
+}
+
+static void hclge_fd_dec_rule_cnt(struct hclge_dev *hdev, u16 location)
+{
+	if (test_bit(location, hdev->fd_bmap)) {
+		clear_bit(location, hdev->fd_bmap);
+		hdev->hclge_fd_rule_num--;
+	}
+}
+
+static void hclge_fd_free_node(struct hclge_dev *hdev,
+			       struct hclge_fd_rule *rule)
+{
+	hlist_del(&rule->rule_node);
+	kfree(rule);
+	hclge_sync_fd_state(hdev);
+}
+
+static void hclge_update_fd_rule_node(struct hclge_dev *hdev,
+				      struct hclge_fd_rule *old_rule,
+				      struct hclge_fd_rule *new_rule,
+				      enum HCLGE_FD_NODE_STATE state)
+{
+	switch (state) {
+	case HCLGE_FD_TO_ADD:
+	case HCLGE_FD_ACTIVE:
+		/* 1) if the new state is TO_ADD, just replace the old rule
+		 * with the same location, no matter its state, because the
+		 * new rule will be configured to the hardware.
+		 * 2) if the new state is ACTIVE, it means the new rule
+		 * has been configured to the hardware, so just replace
+		 * the old rule node with the same location.
+		 * 3) for it doesn't add a new node to the list, so it's
+		 * unnecessary to update the rule number and fd_bmap.
+		 */
+		new_rule->rule_node.next = old_rule->rule_node.next;
+		new_rule->rule_node.pprev = old_rule->rule_node.pprev;
+		memcpy(old_rule, new_rule, sizeof(*old_rule));
+		kfree(new_rule);
+		break;
+	case HCLGE_FD_DELETED:
+		hclge_fd_dec_rule_cnt(hdev, old_rule->location);
+		hclge_fd_free_node(hdev, old_rule);
+		break;
+	case HCLGE_FD_TO_DEL:
+		/* if new request is TO_DEL, and old rule is existent
+		 * 1) the state of old rule is TO_DEL, we need do nothing,
+		 * because we delete rule by location, other rule content
+		 * is unncessary.
+		 * 2) the state of old rule is ACTIVE, we need to change its
+		 * state to TO_DEL, so the rule will be deleted when periodic
+		 * task being scheduled.
+		 * 3) the state of old rule is TO_ADD, it means the rule hasn't
+		 * been added to hardware, so we just delete the rule node from
+		 * fd_rule_list directly.
+		 */
+		if (old_rule->state == HCLGE_FD_TO_ADD) {
+			hclge_fd_dec_rule_cnt(hdev, old_rule->location);
+			hclge_fd_free_node(hdev, old_rule);
+			return;
+		}
+		old_rule->state = HCLGE_FD_TO_DEL;
+		break;
+	}
+}
+
+static struct hclge_fd_rule *hclge_find_fd_rule(struct hlist_head *hlist,
+						u16 location,
+						struct hclge_fd_rule **parent)
+{
+	struct hclge_fd_rule *rule;
+	struct hlist_node *node;
+
+	hlist_for_each_entry_safe(rule, node, hlist, rule_node) {
+		if (rule->location == location)
+			return rule;
+		else if (rule->location > location)
+			return NULL;
+		/* record the parent node, use to keep the nodes in fd_rule_list
+		 * in ascend order.
+		 */
+		*parent = rule;
+	}
+
+	return NULL;
+}
+
+/* insert fd rule node in ascend order according to rule->location */
+static void hclge_fd_insert_rule_node(struct hlist_head *hlist,
+				      struct hclge_fd_rule *rule,
+				      struct hclge_fd_rule *parent)
+{
+	INIT_HLIST_NODE(&rule->rule_node);
+
+	if (parent)
+		hlist_add_behind(&rule->rule_node, &parent->rule_node);
+	else
+		hlist_add_head(&rule->rule_node, hlist);
+}
+
+static void hclge_update_fd_list(struct hclge_dev *hdev,
+				 enum HCLGE_FD_NODE_STATE state, u16 location,
+				 struct hclge_fd_rule *new_rule)
+{
+	struct hlist_head *hlist = &hdev->fd_rule_list;
+	struct hclge_fd_rule *fd_rule, *parent = NULL;
+
+	fd_rule = hclge_find_fd_rule(hlist, location, &parent);
+	if (fd_rule) {
+		hclge_update_fd_rule_node(hdev, fd_rule, new_rule, state);
+		return;
+	}
+
+	/* it's unlikely to fail here, because we have checked the rule
+	 * exist before.
+	 */
+	if (unlikely(state == HCLGE_FD_TO_DEL || state == HCLGE_FD_DELETED)) {
+		dev_warn(&hdev->pdev->dev,
+			 "failed to delete fd rule %u, it's inexistent\n",
+			 location);
+		return;
+	}
+
+	hclge_fd_insert_rule_node(hlist, new_rule, parent);
+	hclge_fd_inc_rule_cnt(hdev, new_rule->location);
+
+	if (state == HCLGE_FD_TO_ADD) {
+		set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state);
+		hclge_task_schedule(hdev, 0);
+	}
+}
+
 static int hclge_get_fd_mode(struct hclge_dev *hdev, u8 *fd_mode)
 {
 	struct hclge_get_fd_mode_cmd *req;
@@ -5847,74 +5993,6 @@ static int hclge_fd_check_spec(struct hclge_dev *hdev,
 	return hclge_fd_check_ext_tuple(hdev, fs, unused_tuple);
 }
 
-static bool hclge_fd_rule_exist(struct hclge_dev *hdev, u16 location)
-{
-	struct hclge_fd_rule *rule = NULL;
-	struct hlist_node *node2;
-
-	spin_lock_bh(&hdev->fd_rule_lock);
-	hlist_for_each_entry_safe(rule, node2, &hdev->fd_rule_list, rule_node) {
-		if (rule->location >= location)
-			break;
-	}
-
-	spin_unlock_bh(&hdev->fd_rule_lock);
-
-	return  rule && rule->location == location;
-}
-
-/* make sure being called after lock up with fd_rule_lock */
-static int hclge_fd_update_rule_list(struct hclge_dev *hdev,
-				     struct hclge_fd_rule *new_rule,
-				     u16 location,
-				     bool is_add)
-{
-	struct hclge_fd_rule *rule = NULL, *parent = NULL;
-	struct hlist_node *node2;
-
-	if (is_add && !new_rule)
-		return -EINVAL;
-
-	hlist_for_each_entry_safe(rule, node2,
-				  &hdev->fd_rule_list, rule_node) {
-		if (rule->location >= location)
-			break;
-		parent = rule;
-	}
-
-	if (rule && rule->location == location) {
-		hlist_del(&rule->rule_node);
-		kfree(rule);
-		hdev->hclge_fd_rule_num--;
-
-		if (!is_add) {
-			if (!hdev->hclge_fd_rule_num)
-				hdev->fd_active_type = HCLGE_FD_RULE_NONE;
-			clear_bit(location, hdev->fd_bmap);
-
-			return 0;
-		}
-	} else if (!is_add) {
-		dev_err(&hdev->pdev->dev,
-			"delete fail, rule %u is inexistent\n",
-			location);
-		return -EINVAL;
-	}
-
-	INIT_HLIST_NODE(&new_rule->rule_node);
-
-	if (parent)
-		hlist_add_behind(&new_rule->rule_node, &parent->rule_node);
-	else
-		hlist_add_head(&new_rule->rule_node, &hdev->fd_rule_list);
-
-	set_bit(location, hdev->fd_bmap);
-	hdev->hclge_fd_rule_num++;
-	hdev->fd_active_type = new_rule->rule_type;
-
-	return 0;
-}
-
 static void hclge_fd_get_tcpip4_tuple(struct hclge_dev *hdev,
 				      struct ethtool_rx_flow_spec *fs,
 				      struct hclge_fd_rule *rule, u8 ip_proto)
@@ -6088,33 +6166,48 @@ static int hclge_fd_get_tuple(struct hclge_dev *hdev,
 	return 0;
 }
 
-/* make sure being called after lock up with fd_rule_lock */
 static int hclge_fd_config_rule(struct hclge_dev *hdev,
 				struct hclge_fd_rule *rule)
 {
 	int ret;
 
-	if (!rule) {
+	ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
+	if (ret)
+		return ret;
+
+	return hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule);
+}
+
+static int hclge_add_fd_entry_common(struct hclge_dev *hdev,
+				     struct hclge_fd_rule *rule)
+{
+	int ret;
+
+	spin_lock_bh(&hdev->fd_rule_lock);
+
+	if (hdev->fd_active_type != rule->rule_type &&
+	    (hdev->fd_active_type == HCLGE_FD_TC_FLOWER_ACTIVE ||
+	     hdev->fd_active_type == HCLGE_FD_EP_ACTIVE)) {
 		dev_err(&hdev->pdev->dev,
-			"The flow director rule is NULL\n");
+			"mode conflict(new type %d, active type %d), please delete existent rules first\n",
+			rule->rule_type, hdev->fd_active_type);
+		spin_unlock_bh(&hdev->fd_rule_lock);
 		return -EINVAL;
 	}
 
-	/* it will never fail here, so needn't to check return value */
-	hclge_fd_update_rule_list(hdev, rule, rule->location, true);
-
-	ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
+	ret = hclge_clear_arfs_rules(hdev);
 	if (ret)
-		goto clear_rule;
+		goto out;
 
-	ret = hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule);
+	ret = hclge_fd_config_rule(hdev, rule);
 	if (ret)
-		goto clear_rule;
+		goto out;
 
-	return 0;
+	hclge_update_fd_list(hdev, HCLGE_FD_ACTIVE, rule->location, rule);
+	hdev->fd_active_type = rule->rule_type;
 
-clear_rule:
-	hclge_fd_update_rule_list(hdev, rule, rule->location, false);
+out:
+	spin_unlock_bh(&hdev->fd_rule_lock);
 	return ret;
 }
 
@@ -6186,12 +6279,6 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle,
 		return -EOPNOTSUPP;
 	}
 
-	if (hclge_is_cls_flower_active(handle)) {
-		dev_err(&hdev->pdev->dev,
-			"please delete all exist cls flower rules first\n");
-		return -EINVAL;
-	}
-
 	fs = (struct ethtool_rx_flow_spec *)&cmd->fs;
 
 	ret = hclge_fd_check_spec(hdev, fs, &unused);
@@ -6221,15 +6308,9 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle,
 	rule->action = action;
 	rule->rule_type = HCLGE_FD_EP_ACTIVE;
 
-	/* to avoid rule conflict, when user configure rule by ethtool,
-	 * we need to clear all arfs rules
-	 */
-	spin_lock_bh(&hdev->fd_rule_lock);
-	hclge_clear_arfs_rules(handle);
-
-	ret = hclge_fd_config_rule(hdev, rule);
-
-	spin_unlock_bh(&hdev->fd_rule_lock);
+	ret = hclge_add_fd_entry_common(hdev, rule);
+	if (ret)
+		kfree(rule);
 
 	return ret;
 }
@@ -6250,32 +6331,30 @@ static int hclge_del_fd_entry(struct hnae3_handle *handle,
 	if (fs->location >= hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1])
 		return -EINVAL;
 
-	if (hclge_is_cls_flower_active(handle) || !hdev->hclge_fd_rule_num ||
-	    !hclge_fd_rule_exist(hdev, fs->location)) {
+	spin_lock_bh(&hdev->fd_rule_lock);
+	if (hdev->fd_active_type == HCLGE_FD_TC_FLOWER_ACTIVE ||
+	    !test_bit(fs->location, hdev->fd_bmap)) {
 		dev_err(&hdev->pdev->dev,
 			"Delete fail, rule %u is inexistent\n", fs->location);
+		spin_unlock_bh(&hdev->fd_rule_lock);
 		return -ENOENT;
 	}
 
 	ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, fs->location,
 				   NULL, false);
 	if (ret)
-		return ret;
+		goto out;
 
-	spin_lock_bh(&hdev->fd_rule_lock);
-	ret = hclge_fd_update_rule_list(hdev, NULL, fs->location, false);
+	hclge_update_fd_list(hdev, HCLGE_FD_DELETED, fs->location, NULL);
 
+out:
 	spin_unlock_bh(&hdev->fd_rule_lock);
-
 	return ret;
 }
 
-/* make sure being called after lock up with fd_rule_lock */
-static void hclge_del_all_fd_entries(struct hnae3_handle *handle,
-				     bool clear_list)
+static void hclge_clear_fd_rules_in_list(struct hclge_dev *hdev,
+					 bool clear_list)
 {
-	struct hclge_vport *vport = hclge_get_vport(handle);
-	struct hclge_dev *hdev = vport->back;
 	struct hclge_fd_rule *rule;
 	struct hlist_node *node;
 	u16 location;
@@ -6283,6 +6362,8 @@ static void hclge_del_all_fd_entries(struct hnae3_handle *handle,
 	if (!hnae3_dev_fd_supported(hdev))
 		return;
 
+	spin_lock_bh(&hdev->fd_rule_lock);
+
 	for_each_set_bit(location, hdev->fd_bmap,
 			 hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1])
 		hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, location,
@@ -6299,6 +6380,17 @@ static void hclge_del_all_fd_entries(struct hnae3_handle *handle,
 		bitmap_zero(hdev->fd_bmap,
 			    hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]);
 	}
+
+	spin_unlock_bh(&hdev->fd_rule_lock);
+}
+
+static void hclge_del_all_fd_entries(struct hnae3_handle *handle,
+				     bool clear_list)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	hclge_clear_fd_rules_in_list(hdev, clear_list);
 }
 
 static int hclge_restore_fd_entries(struct hnae3_handle *handle)
@@ -6307,7 +6399,6 @@ static int hclge_restore_fd_entries(struct hnae3_handle *handle)
 	struct hclge_dev *hdev = vport->back;
 	struct hclge_fd_rule *rule;
 	struct hlist_node *node;
-	int ret;
 
 	/* Return ok here, because reset error handling will check this
 	 * return value. If error is returned here, the reset process will
@@ -6322,25 +6413,11 @@ static int hclge_restore_fd_entries(struct hnae3_handle *handle)
 
 	spin_lock_bh(&hdev->fd_rule_lock);
 	hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) {
-		ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
-		if (!ret)
-			ret = hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule);
-
-		if (ret) {
-			dev_warn(&hdev->pdev->dev,
-				 "Restore rule %u failed, remove it\n",
-				 rule->location);
-			clear_bit(rule->location, hdev->fd_bmap);
-			hlist_del(&rule->rule_node);
-			kfree(rule);
-			hdev->hclge_fd_rule_num--;
-		}
+		if (rule->state == HCLGE_FD_ACTIVE)
+			rule->state = HCLGE_FD_TO_ADD;
 	}
-
-	if (hdev->hclge_fd_rule_num)
-		hdev->fd_active_type = HCLGE_FD_EP_ACTIVE;
-
 	spin_unlock_bh(&hdev->fd_rule_lock);
+	set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state);
 
 	return 0;
 }
@@ -6609,6 +6686,9 @@ static int hclge_get_all_rules(struct hnae3_handle *handle,
 			return -EMSGSIZE;
 		}
 
+		if (rule->state == HCLGE_FD_TO_DEL)
+			continue;
+
 		rule_locs[cnt] = rule->location;
 		cnt++;
 	}
@@ -6690,9 +6770,7 @@ static int hclge_add_fd_entry_by_arfs(struct hnae3_handle *handle, u16 queue_id,
 	struct hclge_fd_rule_tuples new_tuples = {};
 	struct hclge_dev *hdev = vport->back;
 	struct hclge_fd_rule *rule;
-	u16 tmp_queue_id;
 	u16 bit_id;
-	int ret;
 
 	if (!hnae3_dev_fd_supported(hdev))
 		return -EOPNOTSUPP;
@@ -6728,34 +6806,20 @@ static int hclge_add_fd_entry_by_arfs(struct hnae3_handle *handle, u16 queue_id,
 			return -ENOMEM;
 		}
 
-		set_bit(bit_id, hdev->fd_bmap);
 		rule->location = bit_id;
 		rule->arfs.flow_id = flow_id;
 		rule->queue_id = queue_id;
 		hclge_fd_build_arfs_rule(&new_tuples, rule);
-		ret = hclge_fd_config_rule(hdev, rule);
-
-		spin_unlock_bh(&hdev->fd_rule_lock);
-
-		if (ret)
-			return ret;
-
-		return rule->location;
+		hclge_update_fd_list(hdev, HCLGE_FD_TO_ADD, rule->location,
+				     rule);
+		hdev->fd_active_type = HCLGE_FD_ARFS_ACTIVE;
+	} else if (rule->queue_id != queue_id) {
+		rule->queue_id = queue_id;
+		rule->state = HCLGE_FD_TO_ADD;
+		set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state);
+		hclge_task_schedule(hdev, 0);
 	}
-
 	spin_unlock_bh(&hdev->fd_rule_lock);
-
-	if (rule->queue_id == queue_id)
-		return rule->location;
-
-	tmp_queue_id = rule->queue_id;
-	rule->queue_id = queue_id;
-	ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
-	if (ret) {
-		rule->queue_id = tmp_queue_id;
-		return ret;
-	}
-
 	return rule->location;
 }
 
@@ -6765,7 +6829,6 @@ static void hclge_rfs_filter_expire(struct hclge_dev *hdev)
 	struct hnae3_handle *handle = &hdev->vport[0].nic;
 	struct hclge_fd_rule *rule;
 	struct hlist_node *node;
-	HLIST_HEAD(del_list);
 
 	spin_lock_bh(&hdev->fd_rule_lock);
 	if (hdev->fd_active_type != HCLGE_FD_ARFS_ACTIVE) {
@@ -6773,33 +6836,50 @@ static void hclge_rfs_filter_expire(struct hclge_dev *hdev)
 		return;
 	}
 	hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) {
+		if (rule->state != HCLGE_FD_ACTIVE)
+			continue;
 		if (rps_may_expire_flow(handle->netdev, rule->queue_id,
 					rule->arfs.flow_id, rule->location)) {
-			hlist_del_init(&rule->rule_node);
-			hlist_add_head(&rule->rule_node, &del_list);
-			hdev->hclge_fd_rule_num--;
-			clear_bit(rule->location, hdev->fd_bmap);
+			rule->state = HCLGE_FD_TO_DEL;
+			set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state);
 		}
 	}
 	spin_unlock_bh(&hdev->fd_rule_lock);
-
-	hlist_for_each_entry_safe(rule, node, &del_list, rule_node) {
-		hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true,
-				     rule->location, NULL, false);
-		kfree(rule);
-	}
 #endif
 }
 
 /* make sure being called after lock up with fd_rule_lock */
-static void hclge_clear_arfs_rules(struct hnae3_handle *handle)
+static int hclge_clear_arfs_rules(struct hclge_dev *hdev)
 {
 #ifdef CONFIG_RFS_ACCEL
-	struct hclge_vport *vport = hclge_get_vport(handle);
-	struct hclge_dev *hdev = vport->back;
+	struct hclge_fd_rule *rule;
+	struct hlist_node *node;
+	int ret;
+
+	if (hdev->fd_active_type != HCLGE_FD_ARFS_ACTIVE)
+		return 0;
+
+	hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) {
+		switch (rule->state) {
+		case HCLGE_FD_TO_DEL:
+		case HCLGE_FD_ACTIVE:
+			ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true,
+						   rule->location, NULL, false);
+			if (ret)
+				return ret;
+			fallthrough;
+		case HCLGE_FD_TO_ADD:
+			hclge_fd_dec_rule_cnt(hdev, rule->location);
+			hlist_del(&rule->rule_node);
+			kfree(rule);
+			break;
+		default:
+			break;
+		}
+	}
+	hclge_sync_fd_state(hdev);
 
-	if (hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE)
-		hclge_del_all_fd_entries(handle, true);
+	return 0;
 #endif
 }
 
@@ -6982,12 +7062,6 @@ static int hclge_add_cls_flower(struct hnae3_handle *handle,
 	struct hclge_fd_rule *rule;
 	int ret;
 
-	if (hdev->fd_active_type == HCLGE_FD_EP_ACTIVE) {
-		dev_err(&hdev->pdev->dev,
-			"please remove all exist fd rules via ethtool first\n");
-		return -EINVAL;
-	}
-
 	ret = hclge_check_cls_flower(hdev, cls_flower, tc);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
@@ -7000,8 +7074,10 @@ static int hclge_add_cls_flower(struct hnae3_handle *handle,
 		return -ENOMEM;
 
 	ret = hclge_parse_cls_flower(hdev, cls_flower, rule);
-	if (ret)
-		goto err;
+	if (ret) {
+		kfree(rule);
+		return ret;
+	}
 
 	rule->action = HCLGE_FD_ACTION_SELECT_TC;
 	rule->cls_flower.tc = tc;
@@ -7010,22 +7086,10 @@ static int hclge_add_cls_flower(struct hnae3_handle *handle,
 	rule->cls_flower.cookie = cls_flower->cookie;
 	rule->rule_type = HCLGE_FD_TC_FLOWER_ACTIVE;
 
-	spin_lock_bh(&hdev->fd_rule_lock);
-	hclge_clear_arfs_rules(handle);
-
-	ret = hclge_fd_config_rule(hdev, rule);
-
-	spin_unlock_bh(&hdev->fd_rule_lock);
-
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"failed to add cls flower rule, ret = %d\n", ret);
-		goto err;
-	}
+	ret = hclge_add_fd_entry_common(hdev, rule);
+	if (ret)
+		kfree(rule);
 
-	return 0;
-err:
-	kfree(rule);
 	return ret;
 }
 
@@ -7062,25 +7126,64 @@ static int hclge_del_cls_flower(struct hnae3_handle *handle,
 	ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, rule->location,
 				   NULL, false);
 	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"failed to delete cls flower rule %u, ret = %d\n",
-			rule->location, ret);
 		spin_unlock_bh(&hdev->fd_rule_lock);
 		return ret;
 	}
 
-	ret = hclge_fd_update_rule_list(hdev, NULL, rule->location, false);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"failed to delete cls flower rule %u in list, ret = %d\n",
-			rule->location, ret);
-		spin_unlock_bh(&hdev->fd_rule_lock);
-		return ret;
+	hclge_update_fd_list(hdev, HCLGE_FD_DELETED, rule->location, NULL);
+	spin_unlock_bh(&hdev->fd_rule_lock);
+
+	return 0;
+}
+
+static void hclge_sync_fd_list(struct hclge_dev *hdev, struct hlist_head *hlist)
+{
+	struct hclge_fd_rule *rule;
+	struct hlist_node *node;
+	int ret = 0;
+
+	if (!test_and_clear_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state))
+		return;
+
+	spin_lock_bh(&hdev->fd_rule_lock);
+
+	hlist_for_each_entry_safe(rule, node, hlist, rule_node) {
+		switch (rule->state) {
+		case HCLGE_FD_TO_ADD:
+			ret = hclge_fd_config_rule(hdev, rule);
+			if (ret)
+				goto out;
+			rule->state = HCLGE_FD_ACTIVE;
+			break;
+		case HCLGE_FD_TO_DEL:
+			ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true,
+						   rule->location, NULL, false);
+			if (ret)
+				goto out;
+			hclge_fd_dec_rule_cnt(hdev, rule->location);
+			hclge_fd_free_node(hdev, rule);
+			break;
+		default:
+			break;
+		}
 	}
 
+out:
+	if (ret)
+		set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state);
+
 	spin_unlock_bh(&hdev->fd_rule_lock);
+}
 
-	return 0;
+static void hclge_sync_fd_table(struct hclge_dev *hdev)
+{
+	if (test_and_clear_bit(HCLGE_STATE_FD_CLEAR_ALL, &hdev->state)) {
+		bool clear_list = hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE;
+
+		hclge_clear_fd_rules_in_list(hdev, clear_list);
+	}
+
+	hclge_sync_fd_list(hdev, &hdev->fd_rule_list);
 }
 
 static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle)
@@ -7120,18 +7223,15 @@ static void hclge_enable_fd(struct hnae3_handle *handle, bool enable)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
-	bool clear;
 
 	hdev->fd_en = enable;
-	clear = hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE;
 
-	if (!enable) {
-		spin_lock_bh(&hdev->fd_rule_lock);
-		hclge_del_all_fd_entries(handle, clear);
-		spin_unlock_bh(&hdev->fd_rule_lock);
-	} else {
+	if (!enable)
+		set_bit(HCLGE_STATE_FD_CLEAR_ALL, &hdev->state);
+	else
 		hclge_restore_fd_entries(handle);
-	}
+
+	hclge_task_schedule(hdev, 0);
 }
 
 static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
@@ -7602,7 +7702,7 @@ static void hclge_ae_stop(struct hnae3_handle *handle)
 
 	set_bit(HCLGE_STATE_DOWN, &hdev->state);
 	spin_lock_bh(&hdev->fd_rule_lock);
-	hclge_clear_arfs_rules(handle);
+	hclge_clear_arfs_rules(hdev);
 	spin_unlock_bh(&hdev->fd_rule_lock);
 
 	/* If it is not PF reset, the firmware will disable the MAC,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 6fe745538b30..ccc79d9b2e2b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -223,6 +223,8 @@ enum HCLGE_DEV_STATE {
 	HCLGE_STATE_LINK_UPDATING,
 	HCLGE_STATE_PROMISC_CHANGED,
 	HCLGE_STATE_RST_FAIL,
+	HCLGE_STATE_FD_TBL_CHANGED,
+	HCLGE_STATE_FD_CLEAR_ALL,
 	HCLGE_STATE_MAX
 };
 
@@ -592,6 +594,13 @@ enum HCLGE_FD_ACTION {
 	HCLGE_FD_ACTION_SELECT_TC,
 };
 
+enum HCLGE_FD_NODE_STATE {
+	HCLGE_FD_TO_ADD,
+	HCLGE_FD_TO_DEL,
+	HCLGE_FD_ACTIVE,
+	HCLGE_FD_DELETED,
+};
+
 struct hclge_fd_key_cfg {
 	u8 key_sel;
 	u8 inner_sipv6_word_en;
@@ -647,6 +656,7 @@ struct hclge_fd_rule {
 	u16 vf_id;
 	u16 location;
 	enum HCLGE_FD_ACTIVE_RULE_TYPE rule_type;
+	enum HCLGE_FD_NODE_STATE state;
 	u8 action;
 };
 
-- 
cgit v1.2.3


From f07203b0180f62791371cb50fb1afacd826250fc Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Mon, 22 Mar 2021 11:52:01 +0800
Subject: net: hns3: refine for hns3_del_all_fd_entries()

For only PF driver can configure flow director rule, it's
better to call hclge_del_all_fd_entries() directly in hclge
layer, rather than call hns3_del_all_fd_entries() in hns3
layer. Then the ae_algo->ops.del_all_fd_entries can be removed.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h             |  2 --
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c         | 10 ----------
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 10 +++-------
 3 files changed, 3 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 3a6bf1ab84e7..01d6bfc0917c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -612,8 +612,6 @@ struct hnae3_ae_ops {
 			    struct ethtool_rxnfc *cmd);
 	int (*del_fd_entry)(struct hnae3_handle *handle,
 			    struct ethtool_rxnfc *cmd);
-	void (*del_all_fd_entries)(struct hnae3_handle *handle,
-				   bool clear_list);
 	int (*get_fd_rule_cnt)(struct hnae3_handle *handle,
 			       struct ethtool_rxnfc *cmd);
 	int (*get_fd_rule_info)(struct hnae3_handle *handle,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index bf4302a5cf95..44b775efd5b9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -4143,14 +4143,6 @@ static void hns3_uninit_phy(struct net_device *netdev)
 		h->ae_algo->ops->mac_disconnect_phy(h);
 }
 
-static void hns3_del_all_fd_rules(struct net_device *netdev, bool clear_list)
-{
-	struct hnae3_handle *h = hns3_get_handle(netdev);
-
-	if (h->ae_algo->ops->del_all_fd_entries)
-		h->ae_algo->ops->del_all_fd_entries(h, clear_list);
-}
-
 static int hns3_client_start(struct hnae3_handle *handle)
 {
 	if (!handle->ae_algo->ops->client_start)
@@ -4337,8 +4329,6 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset)
 
 	hns3_nic_uninit_irq(priv);
 
-	hns3_del_all_fd_rules(netdev, true);
-
 	hns3_clear_all_ring(handle, true);
 
 	hns3_nic_uninit_vector_data(priv);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index a41bc12e6590..6a24bda5da04 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -6384,13 +6384,9 @@ static void hclge_clear_fd_rules_in_list(struct hclge_dev *hdev,
 	spin_unlock_bh(&hdev->fd_rule_lock);
 }
 
-static void hclge_del_all_fd_entries(struct hnae3_handle *handle,
-				     bool clear_list)
+static void hclge_del_all_fd_entries(struct hclge_dev *hdev)
 {
-	struct hclge_vport *vport = hclge_get_vport(handle);
-	struct hclge_dev *hdev = vport->back;
-
-	hclge_clear_fd_rules_in_list(hdev, clear_list);
+	hclge_clear_fd_rules_in_list(hdev, true);
 }
 
 static int hclge_restore_fd_entries(struct hnae3_handle *handle)
@@ -11427,6 +11423,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
 	hclge_misc_affinity_teardown(hdev);
 	hclge_state_uninit(hdev);
 	hclge_uninit_mac_table(hdev);
+	hclge_del_all_fd_entries(hdev);
 
 	if (mac->phydev)
 		mdiobus_unregister(mac->mdio_bus);
@@ -12250,7 +12247,6 @@ static const struct hnae3_ae_ops hclge_ops = {
 	.get_link_mode = hclge_get_link_mode,
 	.add_fd_entry = hclge_add_fd_entry,
 	.del_fd_entry = hclge_del_fd_entry,
-	.del_all_fd_entries = hclge_del_all_fd_entries,
 	.get_fd_rule_cnt = hclge_get_fd_rule_cnt,
 	.get_fd_rule_info = hclge_get_fd_rule_info,
 	.get_fd_all_rules = hclge_get_all_rules,
-- 
cgit v1.2.3


From 67b0e1428e2f592c0fc2c7f682a5a049158782b8 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Mon, 22 Mar 2021 11:52:02 +0800
Subject: net: hns3: add support for user-def data of flow director

For DEVICE_VERSION_V3, the hardware supports to match specified
data in the specified offset of packet payload. Each layer can
have one offset, and can't be masked when configure flow director
rule by ethtool command. The layer is selected based on the
flow-type, ether for L2, ip4/ipv6 for L3, and tcp4/tcp6/udp4/udp6
for L4. For example, tcp4/tcp6/udp4/udp6 rules share the same
user-def offset, but each rule can have its own user-def value.

For the user-def field of ethtool -N/U command is 64 bits long.
The bit 0~15 is used for user-def value, and bit 32~47 for user-def
offset in HNS3 driver.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h |  14 +
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 323 ++++++++++++++++++++-
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h    |  36 +++
 3 files changed, 359 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 804f4c8360cf..565c5aa54f88 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -243,6 +243,7 @@ enum hclge_opcode_type {
 	HCLGE_OPC_FD_KEY_CONFIG		= 0x1202,
 	HCLGE_OPC_FD_TCAM_OP		= 0x1203,
 	HCLGE_OPC_FD_AD_OP		= 0x1204,
+	HCLGE_OPC_FD_USER_DEF_OP	= 0x1207,
 
 	/* MDIO command */
 	HCLGE_OPC_MDIO_CONFIG		= 0x1900,
@@ -1082,6 +1083,19 @@ struct hclge_fd_ad_config_cmd {
 	u8 rsv2[8];
 };
 
+#define HCLGE_FD_USER_DEF_OFT_S		0
+#define HCLGE_FD_USER_DEF_OFT_M		GENMASK(14, 0)
+#define HCLGE_FD_USER_DEF_EN_B		15
+struct hclge_fd_user_def_cfg_cmd {
+	__le16 ol2_cfg;
+	__le16 l2_cfg;
+	__le16 ol3_cfg;
+	__le16 l3_cfg;
+	__le16 ol4_cfg;
+	__le16 l4_cfg;
+	u8 rsv[12];
+};
+
 struct hclge_get_m7_bd_cmd {
 	__le32 bd_num;
 	u8 rsv[20];
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 6a24bda5da04..058317ce579c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -414,7 +414,9 @@ static const struct key_info tuple_key_info[] = {
 	{ INNER_ETH_TYPE, 16, KEY_OPT_LE16,
 	  offsetof(struct hclge_fd_rule, tuples.ether_proto),
 	  offsetof(struct hclge_fd_rule, tuples_mask.ether_proto) },
-	{ INNER_L2_RSV, 16, KEY_OPT_LE16, -1, -1 },
+	{ INNER_L2_RSV, 16, KEY_OPT_LE16,
+	  offsetof(struct hclge_fd_rule, tuples.l2_user_def),
+	  offsetof(struct hclge_fd_rule, tuples_mask.l2_user_def) },
 	{ INNER_IP_TOS, 8, KEY_OPT_U8,
 	  offsetof(struct hclge_fd_rule, tuples.ip_tos),
 	  offsetof(struct hclge_fd_rule, tuples_mask.ip_tos) },
@@ -427,14 +429,18 @@ static const struct key_info tuple_key_info[] = {
 	{ INNER_DST_IP, 32, KEY_OPT_IP,
 	  offsetof(struct hclge_fd_rule, tuples.dst_ip),
 	  offsetof(struct hclge_fd_rule, tuples_mask.dst_ip) },
-	{ INNER_L3_RSV, 16, KEY_OPT_LE16, -1, -1 },
+	{ INNER_L3_RSV, 16, KEY_OPT_LE16,
+	  offsetof(struct hclge_fd_rule, tuples.l3_user_def),
+	  offsetof(struct hclge_fd_rule, tuples_mask.l3_user_def) },
 	{ INNER_SRC_PORT, 16, KEY_OPT_LE16,
 	  offsetof(struct hclge_fd_rule, tuples.src_port),
 	  offsetof(struct hclge_fd_rule, tuples_mask.src_port) },
 	{ INNER_DST_PORT, 16, KEY_OPT_LE16,
 	  offsetof(struct hclge_fd_rule, tuples.dst_port),
 	  offsetof(struct hclge_fd_rule, tuples_mask.dst_port) },
-	{ INNER_L4_RSV, 32, KEY_OPT_LE32, -1, -1 },
+	{ INNER_L4_RSV, 32, KEY_OPT_LE32,
+	  offsetof(struct hclge_fd_rule, tuples.l4_user_def),
+	  offsetof(struct hclge_fd_rule, tuples_mask.l4_user_def) },
 };
 
 static int hclge_mac_update_stats_defective(struct hclge_dev *hdev)
@@ -5276,6 +5282,133 @@ static void hclge_fd_insert_rule_node(struct hlist_head *hlist,
 		hlist_add_head(&rule->rule_node, hlist);
 }
 
+static int hclge_fd_set_user_def_cmd(struct hclge_dev *hdev,
+				     struct hclge_fd_user_def_cfg *cfg)
+{
+	struct hclge_fd_user_def_cfg_cmd *req;
+	struct hclge_desc desc;
+	u16 data = 0;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_FD_USER_DEF_OP, false);
+
+	req = (struct hclge_fd_user_def_cfg_cmd *)desc.data;
+
+	hnae3_set_bit(data, HCLGE_FD_USER_DEF_EN_B, cfg[0].ref_cnt > 0);
+	hnae3_set_field(data, HCLGE_FD_USER_DEF_OFT_M,
+			HCLGE_FD_USER_DEF_OFT_S, cfg[0].offset);
+	req->ol2_cfg = cpu_to_le16(data);
+
+	data = 0;
+	hnae3_set_bit(data, HCLGE_FD_USER_DEF_EN_B, cfg[1].ref_cnt > 0);
+	hnae3_set_field(data, HCLGE_FD_USER_DEF_OFT_M,
+			HCLGE_FD_USER_DEF_OFT_S, cfg[1].offset);
+	req->ol3_cfg = cpu_to_le16(data);
+
+	data = 0;
+	hnae3_set_bit(data, HCLGE_FD_USER_DEF_EN_B, cfg[2].ref_cnt > 0);
+	hnae3_set_field(data, HCLGE_FD_USER_DEF_OFT_M,
+			HCLGE_FD_USER_DEF_OFT_S, cfg[2].offset);
+	req->ol4_cfg = cpu_to_le16(data);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"failed to set fd user def data, ret= %d\n", ret);
+	return ret;
+}
+
+static void hclge_sync_fd_user_def_cfg(struct hclge_dev *hdev, bool locked)
+{
+	int ret;
+
+	if (!test_and_clear_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state))
+		return;
+
+	if (!locked)
+		spin_lock_bh(&hdev->fd_rule_lock);
+
+	ret = hclge_fd_set_user_def_cmd(hdev, hdev->fd_cfg.user_def_cfg);
+	if (ret)
+		set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state);
+
+	if (!locked)
+		spin_unlock_bh(&hdev->fd_rule_lock);
+}
+
+static int hclge_fd_check_user_def_refcnt(struct hclge_dev *hdev,
+					  struct hclge_fd_rule *rule)
+{
+	struct hlist_head *hlist = &hdev->fd_rule_list;
+	struct hclge_fd_rule *fd_rule, *parent = NULL;
+	struct hclge_fd_user_def_info *info, *old_info;
+	struct hclge_fd_user_def_cfg *cfg;
+
+	if (!rule || rule->rule_type != HCLGE_FD_EP_ACTIVE ||
+	    rule->ep.user_def.layer == HCLGE_FD_USER_DEF_NONE)
+		return 0;
+
+	/* for valid layer is start from 1, so need minus 1 to get the cfg */
+	cfg = &hdev->fd_cfg.user_def_cfg[rule->ep.user_def.layer - 1];
+	info = &rule->ep.user_def;
+
+	if (!cfg->ref_cnt || cfg->offset == info->offset)
+		return 0;
+
+	if (cfg->ref_cnt > 1)
+		goto error;
+
+	fd_rule = hclge_find_fd_rule(hlist, rule->location, &parent);
+	if (fd_rule) {
+		old_info = &fd_rule->ep.user_def;
+		if (info->layer == old_info->layer)
+			return 0;
+	}
+
+error:
+	dev_err(&hdev->pdev->dev,
+		"No available offset for layer%d fd rule, each layer only support one user def offset.\n",
+		info->layer + 1);
+	return -ENOSPC;
+}
+
+static void hclge_fd_inc_user_def_refcnt(struct hclge_dev *hdev,
+					 struct hclge_fd_rule *rule)
+{
+	struct hclge_fd_user_def_cfg *cfg;
+
+	if (!rule || rule->rule_type != HCLGE_FD_EP_ACTIVE ||
+	    rule->ep.user_def.layer == HCLGE_FD_USER_DEF_NONE)
+		return;
+
+	cfg = &hdev->fd_cfg.user_def_cfg[rule->ep.user_def.layer - 1];
+	if (!cfg->ref_cnt) {
+		cfg->offset = rule->ep.user_def.offset;
+		set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state);
+	}
+	cfg->ref_cnt++;
+}
+
+static void hclge_fd_dec_user_def_refcnt(struct hclge_dev *hdev,
+					 struct hclge_fd_rule *rule)
+{
+	struct hclge_fd_user_def_cfg *cfg;
+
+	if (!rule || rule->rule_type != HCLGE_FD_EP_ACTIVE ||
+	    rule->ep.user_def.layer == HCLGE_FD_USER_DEF_NONE)
+		return;
+
+	cfg = &hdev->fd_cfg.user_def_cfg[rule->ep.user_def.layer - 1];
+	if (!cfg->ref_cnt)
+		return;
+
+	cfg->ref_cnt--;
+	if (!cfg->ref_cnt) {
+		cfg->offset = 0;
+		set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state);
+	}
+}
+
 static void hclge_update_fd_list(struct hclge_dev *hdev,
 				 enum HCLGE_FD_NODE_STATE state, u16 location,
 				 struct hclge_fd_rule *new_rule)
@@ -5285,6 +5418,11 @@ static void hclge_update_fd_list(struct hclge_dev *hdev,
 
 	fd_rule = hclge_find_fd_rule(hlist, location, &parent);
 	if (fd_rule) {
+		hclge_fd_dec_user_def_refcnt(hdev, fd_rule);
+		if (state == HCLGE_FD_ACTIVE)
+			hclge_fd_inc_user_def_refcnt(hdev, new_rule);
+		hclge_sync_fd_user_def_cfg(hdev, true);
+
 		hclge_update_fd_rule_node(hdev, fd_rule, new_rule, state);
 		return;
 	}
@@ -5299,6 +5437,9 @@ static void hclge_update_fd_list(struct hclge_dev *hdev,
 		return;
 	}
 
+	hclge_fd_inc_user_def_refcnt(hdev, new_rule);
+	hclge_sync_fd_user_def_cfg(hdev, true);
+
 	hclge_fd_insert_rule_node(hlist, new_rule, parent);
 	hclge_fd_inc_rule_cnt(hdev, new_rule->location);
 
@@ -5386,6 +5527,17 @@ static int hclge_set_fd_key_config(struct hclge_dev *hdev,
 	return ret;
 }
 
+static void hclge_fd_disable_user_def(struct hclge_dev *hdev)
+{
+	struct hclge_fd_user_def_cfg *cfg = hdev->fd_cfg.user_def_cfg;
+
+	spin_lock_bh(&hdev->fd_rule_lock);
+	memset(cfg, 0, sizeof(hdev->fd_cfg.user_def_cfg));
+	spin_unlock_bh(&hdev->fd_rule_lock);
+
+	hclge_fd_set_user_def_cmd(hdev, cfg);
+}
+
 static int hclge_init_fd_config(struct hclge_dev *hdev)
 {
 #define LOW_2_WORDS		0x03
@@ -5426,9 +5578,12 @@ static int hclge_init_fd_config(struct hclge_dev *hdev)
 				BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT);
 
 	/* If use max 400bit key, we can support tuples for ether type */
-	if (hdev->fd_cfg.fd_mode == HCLGE_FD_MODE_DEPTH_2K_WIDTH_400B_STAGE_1)
+	if (hdev->fd_cfg.fd_mode == HCLGE_FD_MODE_DEPTH_2K_WIDTH_400B_STAGE_1) {
 		key_cfg->tuple_active |=
 				BIT(INNER_DST_MAC) | BIT(INNER_SRC_MAC);
+		if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3)
+			key_cfg->tuple_active |= HCLGE_FD_TUPLE_USER_DEF_TUPLES;
+	}
 
 	/* roce_type is used to filter roce frames
 	 * dst_vport is used to specify the rule
@@ -5922,9 +6077,98 @@ static int hclge_fd_check_ext_tuple(struct hclge_dev *hdev,
 	return 0;
 }
 
+static int hclge_fd_get_user_def_layer(u32 flow_type, u32 *unused_tuple,
+				       struct hclge_fd_user_def_info *info)
+{
+	switch (flow_type) {
+	case ETHER_FLOW:
+		info->layer = HCLGE_FD_USER_DEF_L2;
+		*unused_tuple &= ~BIT(INNER_L2_RSV);
+		break;
+	case IP_USER_FLOW:
+	case IPV6_USER_FLOW:
+		info->layer = HCLGE_FD_USER_DEF_L3;
+		*unused_tuple &= ~BIT(INNER_L3_RSV);
+		break;
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+		info->layer = HCLGE_FD_USER_DEF_L4;
+		*unused_tuple &= ~BIT(INNER_L4_RSV);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static bool hclge_fd_is_user_def_all_masked(struct ethtool_rx_flow_spec *fs)
+{
+	return be32_to_cpu(fs->m_ext.data[1] | fs->m_ext.data[0]) == 0;
+}
+
+static int hclge_fd_parse_user_def_field(struct hclge_dev *hdev,
+					 struct ethtool_rx_flow_spec *fs,
+					 u32 *unused_tuple,
+					 struct hclge_fd_user_def_info *info)
+{
+	u32 tuple_active = hdev->fd_cfg.key_cfg[HCLGE_FD_STAGE_1].tuple_active;
+	u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT);
+	u16 data, offset, data_mask, offset_mask;
+	int ret;
+
+	info->layer = HCLGE_FD_USER_DEF_NONE;
+	*unused_tuple |= HCLGE_FD_TUPLE_USER_DEF_TUPLES;
+
+	if (!(fs->flow_type & FLOW_EXT) || hclge_fd_is_user_def_all_masked(fs))
+		return 0;
+
+	/* user-def data from ethtool is 64 bit value, the bit0~15 is used
+	 * for data, and bit32~47 is used for offset.
+	 */
+	data = be32_to_cpu(fs->h_ext.data[1]) & HCLGE_FD_USER_DEF_DATA;
+	data_mask = be32_to_cpu(fs->m_ext.data[1]) & HCLGE_FD_USER_DEF_DATA;
+	offset = be32_to_cpu(fs->h_ext.data[0]) & HCLGE_FD_USER_DEF_OFFSET;
+	offset_mask = be32_to_cpu(fs->m_ext.data[0]) & HCLGE_FD_USER_DEF_OFFSET;
+
+	if (!(tuple_active & HCLGE_FD_TUPLE_USER_DEF_TUPLES)) {
+		dev_err(&hdev->pdev->dev, "user-def bytes are not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (offset > HCLGE_FD_MAX_USER_DEF_OFFSET) {
+		dev_err(&hdev->pdev->dev,
+			"user-def offset[%u] should be no more than %u\n",
+			offset, HCLGE_FD_MAX_USER_DEF_OFFSET);
+		return -EINVAL;
+	}
+
+	if (offset_mask != HCLGE_FD_USER_DEF_OFFSET_UNMASK) {
+		dev_err(&hdev->pdev->dev, "user-def offset can't be masked\n");
+		return -EINVAL;
+	}
+
+	ret = hclge_fd_get_user_def_layer(flow_type, unused_tuple, info);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"unsupported flow type for user-def bytes, ret = %d\n",
+			ret);
+		return ret;
+	}
+
+	info->data = data;
+	info->data_mask = data_mask;
+	info->offset = offset;
+
+	return 0;
+}
+
 static int hclge_fd_check_spec(struct hclge_dev *hdev,
 			       struct ethtool_rx_flow_spec *fs,
-			       u32 *unused_tuple)
+			       u32 *unused_tuple,
+			       struct hclge_fd_user_def_info *info)
 {
 	u32 flow_type;
 	int ret;
@@ -5937,11 +6181,9 @@ static int hclge_fd_check_spec(struct hclge_dev *hdev,
 		return -EINVAL;
 	}
 
-	if ((fs->flow_type & FLOW_EXT) &&
-	    (fs->h_ext.data[0] != 0 || fs->h_ext.data[1] != 0)) {
-		dev_err(&hdev->pdev->dev, "user-def bytes are not supported\n");
-		return -EOPNOTSUPP;
-	}
+	ret = hclge_fd_parse_user_def_field(hdev, fs, unused_tuple, info);
+	if (ret)
+		return ret;
 
 	flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT);
 	switch (flow_type) {
@@ -6115,9 +6357,33 @@ static void hclge_fd_get_ether_tuple(struct hclge_dev *hdev,
 	rule->tuples_mask.ether_proto = be16_to_cpu(fs->m_u.ether_spec.h_proto);
 }
 
+static void hclge_fd_get_user_def_tuple(struct hclge_fd_user_def_info *info,
+					struct hclge_fd_rule *rule)
+{
+	switch (info->layer) {
+	case HCLGE_FD_USER_DEF_L2:
+		rule->tuples.l2_user_def = info->data;
+		rule->tuples_mask.l2_user_def = info->data_mask;
+		break;
+	case HCLGE_FD_USER_DEF_L3:
+		rule->tuples.l3_user_def = info->data;
+		rule->tuples_mask.l3_user_def = info->data_mask;
+		break;
+	case HCLGE_FD_USER_DEF_L4:
+		rule->tuples.l4_user_def = (u32)info->data << 16;
+		rule->tuples_mask.l4_user_def = (u32)info->data_mask << 16;
+		break;
+	default:
+		break;
+	}
+
+	rule->ep.user_def = *info;
+}
+
 static int hclge_fd_get_tuple(struct hclge_dev *hdev,
 			      struct ethtool_rx_flow_spec *fs,
-			      struct hclge_fd_rule *rule)
+			      struct hclge_fd_rule *rule,
+			      struct hclge_fd_user_def_info *info)
 {
 	u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT);
 
@@ -6156,6 +6422,7 @@ static int hclge_fd_get_tuple(struct hclge_dev *hdev,
 	if (fs->flow_type & FLOW_EXT) {
 		rule->tuples.vlan_tag1 = be16_to_cpu(fs->h_ext.vlan_tci);
 		rule->tuples_mask.vlan_tag1 = be16_to_cpu(fs->m_ext.vlan_tci);
+		hclge_fd_get_user_def_tuple(info, rule);
 	}
 
 	if (fs->flow_type & FLOW_MAC_EXT) {
@@ -6195,6 +6462,10 @@ static int hclge_add_fd_entry_common(struct hclge_dev *hdev,
 		return -EINVAL;
 	}
 
+	ret = hclge_fd_check_user_def_refcnt(hdev, rule);
+	if (ret)
+		goto out;
+
 	ret = hclge_clear_arfs_rules(hdev);
 	if (ret)
 		goto out;
@@ -6260,6 +6531,7 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle,
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
+	struct hclge_fd_user_def_info info;
 	u16 dst_vport_id = 0, q_index = 0;
 	struct ethtool_rx_flow_spec *fs;
 	struct hclge_fd_rule *rule;
@@ -6281,7 +6553,7 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle,
 
 	fs = (struct ethtool_rx_flow_spec *)&cmd->fs;
 
-	ret = hclge_fd_check_spec(hdev, fs, &unused);
+	ret = hclge_fd_check_spec(hdev, fs, &unused, &info);
 	if (ret)
 		return ret;
 
@@ -6294,7 +6566,7 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle,
 	if (!rule)
 		return -ENOMEM;
 
-	ret = hclge_fd_get_tuple(hdev, fs, rule);
+	ret = hclge_fd_get_tuple(hdev, fs, rule, &info);
 	if (ret) {
 		kfree(rule);
 		return ret;
@@ -6387,6 +6659,7 @@ static void hclge_clear_fd_rules_in_list(struct hclge_dev *hdev,
 static void hclge_del_all_fd_entries(struct hclge_dev *hdev)
 {
 	hclge_clear_fd_rules_in_list(hdev, true);
+	hclge_fd_disable_user_def(hdev);
 }
 
 static int hclge_restore_fd_entries(struct hnae3_handle *handle)
@@ -6563,6 +6836,24 @@ static void hclge_fd_get_ether_info(struct hclge_fd_rule *rule,
 			0 : cpu_to_be16(rule->tuples_mask.ether_proto);
 }
 
+static void hclge_fd_get_user_def_info(struct ethtool_rx_flow_spec *fs,
+				       struct hclge_fd_rule *rule)
+{
+	if ((rule->unused_tuple & HCLGE_FD_TUPLE_USER_DEF_TUPLES) ==
+	    HCLGE_FD_TUPLE_USER_DEF_TUPLES) {
+		fs->h_ext.data[0] = 0;
+		fs->h_ext.data[1] = 0;
+		fs->m_ext.data[0] = 0;
+		fs->m_ext.data[1] = 0;
+	} else {
+		fs->h_ext.data[0] = cpu_to_be32(rule->ep.user_def.offset);
+		fs->h_ext.data[1] = cpu_to_be32(rule->ep.user_def.data);
+		fs->m_ext.data[0] =
+				cpu_to_be32(HCLGE_FD_USER_DEF_OFFSET_UNMASK);
+		fs->m_ext.data[1] = cpu_to_be32(rule->ep.user_def.data_mask);
+	}
+}
+
 static void hclge_fd_get_ext_info(struct ethtool_rx_flow_spec *fs,
 				  struct hclge_fd_rule *rule)
 {
@@ -6571,6 +6862,8 @@ static void hclge_fd_get_ext_info(struct ethtool_rx_flow_spec *fs,
 		fs->m_ext.vlan_tci =
 				rule->unused_tuple & BIT(INNER_VLAN_TAG_FST) ?
 				0 : cpu_to_be16(rule->tuples_mask.vlan_tag1);
+
+		hclge_fd_get_user_def_info(fs, rule);
 	}
 
 	if (fs->flow_type & FLOW_MAC_EXT) {
@@ -7179,6 +7472,8 @@ static void hclge_sync_fd_table(struct hclge_dev *hdev)
 		hclge_clear_fd_rules_in_list(hdev, clear_list);
 	}
 
+	hclge_sync_fd_user_def_cfg(hdev, false);
+
 	hclge_sync_fd_list(hdev, &hdev->fd_rule_list);
 }
 
@@ -9734,7 +10029,7 @@ static void hclge_restore_hw_table(struct hclge_dev *hdev)
 	hclge_restore_mac_table_common(vport);
 	hclge_restore_vport_vlan_table(vport);
 	set_bit(HCLGE_STATE_PROMISC_CHANGED, &hdev->state);
-
+	set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state);
 	hclge_restore_fd_entries(handle);
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index ccc79d9b2e2b..97e77e2f7539 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -225,6 +225,7 @@ enum HCLGE_DEV_STATE {
 	HCLGE_STATE_RST_FAIL,
 	HCLGE_STATE_FD_TBL_CHANGED,
 	HCLGE_STATE_FD_CLEAR_ALL,
+	HCLGE_STATE_FD_USER_DEF_CHANGED,
 	HCLGE_STATE_MAX
 };
 
@@ -538,6 +539,9 @@ enum HCLGE_FD_TUPLE {
 	MAX_TUPLE,
 };
 
+#define HCLGE_FD_TUPLE_USER_DEF_TUPLES \
+	(BIT(INNER_L2_RSV) | BIT(INNER_L3_RSV) | BIT(INNER_L4_RSV))
+
 enum HCLGE_FD_META_DATA {
 	PACKET_TYPE_ID,
 	IP_FRAGEMENT,
@@ -572,6 +576,11 @@ struct key_info {
 #define MAX_KEY_BYTES	(MAX_KEY_DWORDS * 4)
 #define MAX_META_DATA_LENGTH	32
 
+#define HCLGE_FD_MAX_USER_DEF_OFFSET	9000
+#define HCLGE_FD_USER_DEF_DATA		GENMASK(15, 0)
+#define HCLGE_FD_USER_DEF_OFFSET	GENMASK(15, 0)
+#define HCLGE_FD_USER_DEF_OFFSET_UNMASK	GENMASK(15, 0)
+
 /* assigned by firmware, the real filter number for each pf may be less */
 #define MAX_FD_FILTER_NUM	4096
 #define HCLGE_ARFS_EXPIRE_INTERVAL	5UL
@@ -601,6 +610,26 @@ enum HCLGE_FD_NODE_STATE {
 	HCLGE_FD_DELETED,
 };
 
+enum HCLGE_FD_USER_DEF_LAYER {
+	HCLGE_FD_USER_DEF_NONE,
+	HCLGE_FD_USER_DEF_L2,
+	HCLGE_FD_USER_DEF_L3,
+	HCLGE_FD_USER_DEF_L4,
+};
+
+#define HCLGE_FD_USER_DEF_LAYER_NUM 3
+struct hclge_fd_user_def_cfg {
+	u16 ref_cnt;
+	u16 offset;
+};
+
+struct hclge_fd_user_def_info {
+	enum HCLGE_FD_USER_DEF_LAYER layer;
+	u16 data;
+	u16 data_mask;
+	u16 offset;
+};
+
 struct hclge_fd_key_cfg {
 	u8 key_sel;
 	u8 inner_sipv6_word_en;
@@ -617,6 +646,7 @@ struct hclge_fd_cfg {
 	u32 rule_num[MAX_STAGE_NUM]; /* rule entry number */
 	u16 cnt_num[MAX_STAGE_NUM]; /* rule hit counter number */
 	struct hclge_fd_key_cfg key_cfg[MAX_STAGE_NUM];
+	struct hclge_fd_user_def_cfg user_def_cfg[HCLGE_FD_USER_DEF_LAYER_NUM];
 };
 
 #define IPV4_INDEX	3
@@ -633,6 +663,9 @@ struct hclge_fd_rule_tuples {
 	u16 dst_port;
 	u16 vlan_tag1;
 	u16 ether_proto;
+	u16 l2_user_def;
+	u16 l3_user_def;
+	u32 l4_user_def;
 	u8 ip_tos;
 	u8 ip_proto;
 };
@@ -651,6 +684,9 @@ struct hclge_fd_rule {
 		struct {
 			u16 flow_id; /* only used for arfs */
 		} arfs;
+		struct {
+			struct hclge_fd_user_def_info user_def;
+		} ep;
 	};
 	u16 queue_id;
 	u16 vf_id;
-- 
cgit v1.2.3


From 43a440c4007b28c473afba966e8410459db4975f Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Mon, 22 Mar 2021 03:08:39 -0400
Subject: bnxt_en: Improve the status_reliable flag in bp->fw_health.

In order to read the firmware health status, we first need to determine
the register location and then the register may need to be mapped.
There are 2 code paths to do this.  The first one is done early as a
best effort attempt by the function bnxt_try_map_fw_health_reg().  The
second one is done later in the function bnxt_map_fw_health_regs()
after establishing communications with the firmware.  We currently
only set fw_health->status_reliable if we can successfully set up the
health register in the first code path.

Improve the scheme by setting the fw_health->status_reliable flag if
either (or both) code paths can successfully set up the health
register.  This flag is relied upon during run-time when we need to
check the health status.  So this will make it work better.

During ifdown, if the health register is mapped, we need to invalidate
the health register mapping because a potential fw reset will reset
the mapping.  Similarly, we need to do the same after firmware reset
during recovery.  We'll remap it during ifup.

Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Reviewed-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 31 ++++++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index b53a0d87371a..16cf18eb7b3d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -7540,6 +7540,19 @@ static void __bnxt_map_fw_health_reg(struct bnxt *bp, u32 reg)
 					 BNXT_FW_HEALTH_WIN_MAP_OFF);
 }
 
+static void bnxt_inv_fw_health_reg(struct bnxt *bp)
+{
+	struct bnxt_fw_health *fw_health = bp->fw_health;
+	u32 reg_type;
+
+	if (!fw_health || !fw_health->status_reliable)
+		return;
+
+	reg_type = BNXT_FW_HEALTH_REG_TYPE(fw_health->regs[BNXT_FW_HEALTH_REG]);
+	if (reg_type == BNXT_FW_HEALTH_REG_TYPE_GRC)
+		fw_health->status_reliable = false;
+}
+
 static void bnxt_try_map_fw_health_reg(struct bnxt *bp)
 {
 	void __iomem *hs;
@@ -7547,6 +7560,9 @@ static void bnxt_try_map_fw_health_reg(struct bnxt *bp)
 	u32 reg_type;
 	u32 sig;
 
+	if (bp->fw_health)
+		bp->fw_health->status_reliable = false;
+
 	__bnxt_map_fw_health_reg(bp, HCOMM_STATUS_STRUCT_LOC);
 	hs = bp->bar0 + BNXT_FW_HEALTH_WIN_OFF(HCOMM_STATUS_STRUCT_LOC);
 
@@ -7558,11 +7574,9 @@ static void bnxt_try_map_fw_health_reg(struct bnxt *bp)
 					     BNXT_FW_HEALTH_WIN_BASE +
 					     BNXT_GRC_REG_CHIP_NUM);
 		}
-		if (!BNXT_CHIP_P5(bp)) {
-			if (bp->fw_health)
-				bp->fw_health->status_reliable = false;
+		if (!BNXT_CHIP_P5(bp))
 			return;
-		}
+
 		status_loc = BNXT_GRC_REG_STATUS_P5 |
 			     BNXT_FW_HEALTH_REG_TYPE_BAR0;
 	} else {
@@ -7592,6 +7606,7 @@ static int bnxt_map_fw_health_regs(struct bnxt *bp)
 	u32 reg_base = 0xffffffff;
 	int i;
 
+	bp->fw_health->status_reliable = false;
 	/* Only pre-map the monitoring GRC registers using window 3 */
 	for (i = 0; i < 4; i++) {
 		u32 reg = fw_health->regs[i];
@@ -7604,6 +7619,7 @@ static int bnxt_map_fw_health_regs(struct bnxt *bp)
 			return -ERANGE;
 		fw_health->mapped_regs[i] = BNXT_FW_HEALTH_WIN_OFF(reg);
 	}
+	bp->fw_health->status_reliable = true;
 	if (reg_base == 0xffffffff)
 		return 0;
 
@@ -9556,13 +9572,17 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
 	if (rc)
 		return rc;
 
-	if (!up)
+	if (!up) {
+		bnxt_inv_fw_health_reg(bp);
 		return 0;
+	}
 
 	if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_RESC_CHANGE)
 		resc_reinit = true;
 	if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_HOT_FW_RESET_DONE)
 		fw_reset = true;
+	else if (bp->fw_health && !bp->fw_health->status_reliable)
+		bnxt_try_map_fw_health_reg(bp);
 
 	if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state) && !fw_reset) {
 		netdev_err(bp->dev, "RESET_DONE not set during FW reset.\n");
@@ -11723,6 +11743,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
 		bnxt_queue_fw_reset_work(bp, bp->fw_reset_min_dsecs * HZ / 10);
 		return;
 	case BNXT_FW_RESET_STATE_ENABLE_DEV:
+		bnxt_inv_fw_health_reg(bp);
 		if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) {
 			u32 val;
 
-- 
cgit v1.2.3


From 80a9641f09f890a27a57e8ad30472553e0f769a6 Mon Sep 17 00:00:00 2001
From: Pavan Chebbi <pavan.chebbi@broadcom.com>
Date: Mon, 22 Mar 2021 03:08:40 -0400
Subject: bnxt_en: Improve wait for firmware commands completion

In situations where FW has crashed, the bnxt_hwrm_do_send_msg() call
will have to wait until timeout for each firmware message.  This
generally takes about half a second for each firmware message.  If we
try to unload the driver n this state, the unload sequence will take
a long time to complete.

Improve this by checking the health register if it is available and
abort the wait for the firmware response if the register shows that
firmware is not healthy.  The very first message HWRM_VER_GET is
excluded from this check because that message is used to poll for
firmware to come out of reset during error recovery.

Signed-off-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 28 ++++++++++++++++++++++++----
 drivers/net/ethernet/broadcom/bnxt/bnxt.h |  5 +++++
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 16cf18eb7b3d..deba552465f6 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4500,12 +4500,15 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 			if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))
 				return -EBUSY;
 			/* on first few passes, just barely sleep */
-			if (i < HWRM_SHORT_TIMEOUT_COUNTER)
+			if (i < HWRM_SHORT_TIMEOUT_COUNTER) {
 				usleep_range(HWRM_SHORT_MIN_TIMEOUT,
 					     HWRM_SHORT_MAX_TIMEOUT);
-			else
+			} else {
+				if (HWRM_WAIT_MUST_ABORT(bp, req))
+					break;
 				usleep_range(HWRM_MIN_TIMEOUT,
 					     HWRM_MAX_TIMEOUT);
+			}
 		}
 
 		if (bp->hwrm_intr_seq_id != (u16)~seq_id) {
@@ -4530,15 +4533,19 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 			if (len)
 				break;
 			/* on first few passes, just barely sleep */
-			if (i < HWRM_SHORT_TIMEOUT_COUNTER)
+			if (i < HWRM_SHORT_TIMEOUT_COUNTER) {
 				usleep_range(HWRM_SHORT_MIN_TIMEOUT,
 					     HWRM_SHORT_MAX_TIMEOUT);
-			else
+			} else {
+				if (HWRM_WAIT_MUST_ABORT(bp, req))
+					goto timeout_abort;
 				usleep_range(HWRM_MIN_TIMEOUT,
 					     HWRM_MAX_TIMEOUT);
+			}
 		}
 
 		if (i >= tmo_count) {
+timeout_abort:
 			if (!silent)
 				netdev_err(bp->dev, "Error (timeout: %d) msg {0x%x 0x%x} len:%d\n",
 					   HWRM_TOTAL_TIMEOUT(i),
@@ -7540,6 +7547,19 @@ static void __bnxt_map_fw_health_reg(struct bnxt *bp, u32 reg)
 					 BNXT_FW_HEALTH_WIN_MAP_OFF);
 }
 
+bool bnxt_is_fw_healthy(struct bnxt *bp)
+{
+	if (bp->fw_health && bp->fw_health->status_reliable) {
+		u32 fw_status;
+
+		fw_status = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
+		if (fw_status && !BNXT_FW_IS_HEALTHY(fw_status))
+			return false;
+	}
+
+	return true;
+}
+
 static void bnxt_inv_fw_health_reg(struct bnxt *bp)
 {
 	struct bnxt_fw_health *fw_health = bp->fw_health;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 1259e68cba2a..e77d60712954 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -671,6 +671,10 @@ struct nqe_cn {
 #define HWRM_MIN_TIMEOUT		25
 #define HWRM_MAX_TIMEOUT		40
 
+#define HWRM_WAIT_MUST_ABORT(bp, req)					\
+	(le16_to_cpu((req)->req_type) != HWRM_VER_GET &&		\
+	 !bnxt_is_fw_healthy(bp))
+
 #define HWRM_TOTAL_TIMEOUT(n)	(((n) <= HWRM_SHORT_TIMEOUT_COUNTER) ?	\
 	((n) * HWRM_SHORT_MIN_TIMEOUT) :				\
 	(HWRM_SHORT_TIMEOUT_COUNTER * HWRM_SHORT_MIN_TIMEOUT +		\
@@ -2228,6 +2232,7 @@ int bnxt_hwrm_set_link_setting(struct bnxt *, bool, bool);
 int bnxt_hwrm_alloc_wol_fltr(struct bnxt *bp);
 int bnxt_hwrm_free_wol_fltr(struct bnxt *bp);
 int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all);
+bool bnxt_is_fw_healthy(struct bnxt *bp);
 int bnxt_hwrm_fw_set_time(struct bnxt *);
 int bnxt_open_nic(struct bnxt *, bool, bool);
 int bnxt_half_open_nic(struct bnxt *bp);
-- 
cgit v1.2.3


From a2f3835cc68a2222d0ab97862187ed98e65fe682 Mon Sep 17 00:00:00 2001
From: Edwin Peer <edwin.peer@broadcom.com>
Date: Mon, 22 Mar 2021 03:08:41 -0400
Subject: bnxt_en: don't fake firmware response success when PCI is disabled

The original intent here is to allow commands during reset to succeed
without error when the device is disabled, to ensure that cleanup
completes normally during NIC close, where firmware is not necessarily
expected to respond.

The problem with faking success during reset's PCI disablement is that
unrelated ULP commands will also see inadvertent success during reset
when failure would otherwise be appropriate. It is better to return
a different error result such that reset related code can detect
this unique condition and ignore as appropriate.

Note, the pci_disable_device() when firmware is fatally wounded in
bnxt_fw_reset_close() does not need to be addressed, as subsequent
commands are already expected to fail due to the BNXT_NO_FW_ACCESS()
check in bnxt_hwrm_do_send_msg().

Reviewed-by: Scott Branden <scott.branden@broadcom.com>
Signed-off-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index deba552465f6..3624e79667b6 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4470,7 +4470,7 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 	writel(1, bp->bar0 + doorbell_offset);
 
 	if (!pci_is_enabled(bp->pdev))
-		return 0;
+		return -ENODEV;
 
 	if (!timeout)
 		timeout = DFLT_HWRM_CMD_TIMEOUT;
@@ -11680,7 +11680,7 @@ static void bnxt_reset_all(struct bnxt *bp)
 		req.selfrst_status = FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP;
 		req.flags = FW_RESET_REQ_FLAGS_RESET_GRACEFUL;
 		rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-		if (rc)
+		if (rc != -ENODEV)
 			netdev_warn(bp->dev, "Unable to reset FW rc=%d\n", rc);
 	}
 	bp->fw_reset_timestamp = jiffies;
-- 
cgit v1.2.3


From 15a7deb895497e4c9496b98367e4a0671add03f1 Mon Sep 17 00:00:00 2001
From: Scott Branden <scott.branden@broadcom.com>
Date: Mon, 22 Mar 2021 03:08:42 -0400
Subject: bnxt_en: check return value of bnxt_hwrm_func_resc_qcaps

Check return value of call to bnxt_hwrm_func_resc_qcaps in
bnxt_hwrm_if_change and return failure on error.

Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Scott Branden <scott.branden@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 3624e79667b6..7f40dd7d847d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -9634,6 +9634,9 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
 			struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
 
 			rc = bnxt_hwrm_func_resc_qcaps(bp, true);
+			if (rc)
+				netdev_err(bp->dev, "resc_qcaps failed\n");
+
 			hw_resc->resv_cp_rings = 0;
 			hw_resc->resv_stat_ctxs = 0;
 			hw_resc->resv_irqs = 0;
@@ -9647,7 +9650,7 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
 			}
 		}
 	}
-	return 0;
+	return rc;
 }
 
 static int bnxt_hwrm_port_led_qcaps(struct bnxt *bp)
-- 
cgit v1.2.3


From 2924ad95cb51673ed3544cf371cafc66e2c76cc8 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Mon, 22 Mar 2021 03:08:43 -0400
Subject: bnxt_en: Set BNXT_STATE_FW_RESET_DET flag earlier for the RDMA
 driver.

During ifup, if the driver detects that firmware has gone through a
reset, it will go through a re-probe sequence.  If the RDMA driver is
loaded, the re-probe sequence includes calling the RDMA driver to stop.
We need to set the BNXT_STATE_FW_RESET_DET flag earlier so that it is
visible to the RDMA driver.  The RDMA driver's stop sequence is
different if firmware has gone through a reset.

Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Reviewed-by: P B S Naresh Kumar <nareshkumar.pbs@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 7f40dd7d847d..edbe5982cf41 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -9611,6 +9611,7 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
 	}
 	if (resc_reinit || fw_reset) {
 		if (fw_reset) {
+			set_bit(BNXT_STATE_FW_RESET_DET, &bp->state);
 			if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
 				bnxt_ulp_stop(bp);
 			bnxt_free_ctx_mem(bp);
@@ -9619,16 +9620,17 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
 			bnxt_dcb_free(bp);
 			rc = bnxt_fw_init_one(bp);
 			if (rc) {
+				clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state);
 				set_bit(BNXT_STATE_ABORT_ERR, &bp->state);
 				return rc;
 			}
 			bnxt_clear_int_mode(bp);
 			rc = bnxt_init_int_mode(bp);
 			if (rc) {
+				clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state);
 				netdev_err(bp->dev, "init int mode failed\n");
 				return rc;
 			}
-			set_bit(BNXT_STATE_FW_RESET_DET, &bp->state);
 		}
 		if (BNXT_NEW_RM(bp)) {
 			struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
-- 
cgit v1.2.3


From bae8a00379f4c1327c8e38a768083460b5ad5b12 Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Mon, 22 Mar 2021 03:08:44 -0400
Subject: bnxt_en: Remove the read of BNXT_FW_RESET_INPROG_REG after firmware
 reset.

Once the chip goes through reset, the register mapping may be lost
and any read of the mapped health registers may return garbage value
until the registers are mapped again in the init path.

Reading BNXT_FW_RESET_INPROG_REG after firmware reset will likely
return garbage value due to the above reason.  Reading this register
is for information purpose only so remove it.

Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 31 +++++++++++--------------------
 1 file changed, 11 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index edbe5982cf41..6db5e927a473 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -11769,28 +11769,19 @@ static void bnxt_fw_reset_task(struct work_struct *work)
 		return;
 	case BNXT_FW_RESET_STATE_ENABLE_DEV:
 		bnxt_inv_fw_health_reg(bp);
-		if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) {
-			u32 val;
-
-			if (!bp->fw_reset_min_dsecs) {
-				u16 val;
-
-				pci_read_config_word(bp->pdev, PCI_SUBSYSTEM_ID,
-						     &val);
-				if (val == 0xffff) {
-					if (bnxt_fw_reset_timeout(bp)) {
-						netdev_err(bp->dev, "Firmware reset aborted, PCI config space invalid\n");
-						goto fw_reset_abort;
-					}
-					bnxt_queue_fw_reset_work(bp, HZ / 1000);
-					return;
+		if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state) &&
+		    !bp->fw_reset_min_dsecs) {
+			u16 val;
+
+			pci_read_config_word(bp->pdev, PCI_SUBSYSTEM_ID, &val);
+			if (val == 0xffff) {
+				if (bnxt_fw_reset_timeout(bp)) {
+					netdev_err(bp->dev, "Firmware reset aborted, PCI config space invalid\n");
+					goto fw_reset_abort;
 				}
+				bnxt_queue_fw_reset_work(bp, HZ / 1000);
+				return;
 			}
-			val = bnxt_fw_health_readl(bp,
-						   BNXT_FW_RESET_INPROG_REG);
-			if (val)
-				netdev_warn(bp->dev, "FW reset inprog %x after min wait time.\n",
-					    val);
 		}
 		clear_bit(BNXT_STATE_FW_FATAL_COND, &bp->state);
 		if (pci_enable_device(bp->pdev)) {
-- 
cgit v1.2.3


From 861aae786f2f7e1cab7926f7bb7783cb893e7edb Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Mon, 22 Mar 2021 03:08:45 -0400
Subject: bnxt_en: Enhance retry of the first message to the firmware.

Two enhancements:

1. Read the health status first before sending the first
HWRM_VER_GET message to firmware instead of the other way around.
This guarantees we got the accurate health status before we attempt
to send the message.

2. We currently only retry sending the first HWRM_VER_GET message to
the firmware if the firmware is in the process of booting.  If the
firmware is in error state and is doing core dump for example, the
driver should also retry if the health register has the RECOVERING
flag set.  This flag indicates the firmware will undergo recovery
soon.  Modify the retry logic to retry for this case as well.

Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 +++--
 drivers/net/ethernet/broadcom/bnxt/bnxt.h | 4 ++++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 6db5e927a473..6f13642121c4 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -9530,9 +9530,10 @@ static int bnxt_try_recover_fw(struct bnxt *bp)
 
 		mutex_lock(&bp->hwrm_cmd_lock);
 		do {
-			rc = __bnxt_hwrm_ver_get(bp, true);
 			sts = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
-			if (!sts || !BNXT_FW_IS_BOOTING(sts))
+			rc = __bnxt_hwrm_ver_get(bp, true);
+			if (!sts || (!BNXT_FW_IS_BOOTING(sts) &&
+				     !BNXT_FW_IS_RECOVERING(sts)))
 				break;
 			retry++;
 		} while (rc == -EBUSY && retry < BNXT_FW_RETRY);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index e77d60712954..29061c577baa 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1564,6 +1564,7 @@ struct bnxt_fw_reporter_ctx {
 #define BNXT_FW_STATUS_HEALTH_MSK	0xffff
 #define BNXT_FW_STATUS_HEALTHY		0x8000
 #define BNXT_FW_STATUS_SHUTDOWN		0x100000
+#define BNXT_FW_STATUS_RECOVERING	0x400000
 
 #define BNXT_FW_IS_HEALTHY(sts)		(((sts) & BNXT_FW_STATUS_HEALTH_MSK) ==\
 					 BNXT_FW_STATUS_HEALTHY)
@@ -1574,6 +1575,9 @@ struct bnxt_fw_reporter_ctx {
 #define BNXT_FW_IS_ERR(sts)		(((sts) & BNXT_FW_STATUS_HEALTH_MSK) > \
 					 BNXT_FW_STATUS_HEALTHY)
 
+#define BNXT_FW_IS_RECOVERING(sts)	(BNXT_FW_IS_ERR(sts) &&		       \
+					 ((sts) & BNXT_FW_STATUS_RECOVERING))
+
 #define BNXT_FW_RETRY			5
 #define BNXT_FW_IF_RETRY		10
 
-- 
cgit v1.2.3


From 3de43dc98615b15516ec65591ebe28ceb7d8d921 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 22 Mar 2021 11:30:09 +0200
Subject: net: dsa: mv88e6xxx: fix up kerneldoc some more

Commit 0b5294483c35 ("net: dsa: mv88e6xxx: scratch: Fixup kerneldoc")
has addressed some but not all kerneldoc warnings for the Global 2
Scratch register accessors. Namely, we have some mismatches between
the function names in the kerneldoc and the ones in the actual code.
Let's adjust the comments so that they match the functions they're
sitting next to.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/global2_scratch.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/global2_scratch.c b/drivers/net/dsa/mv88e6xxx/global2_scratch.c
index 7c2c67405322..eda710062933 100644
--- a/drivers/net/dsa/mv88e6xxx/global2_scratch.c
+++ b/drivers/net/dsa/mv88e6xxx/global2_scratch.c
@@ -42,7 +42,7 @@ static int mv88e6xxx_g2_scratch_write(struct mv88e6xxx_chip *chip, int reg,
 }
 
 /**
- * mv88e6xxx_g2_scratch_gpio_get_bit - get a bit
+ * mv88e6xxx_g2_scratch_get_bit - get a bit
  * @chip: chip private data
  * @base_reg: base of scratch bits
  * @offset: index of bit within the register
@@ -67,7 +67,7 @@ static int mv88e6xxx_g2_scratch_get_bit(struct mv88e6xxx_chip *chip,
 }
 
 /**
- * mv88e6xxx_g2_scratch_gpio_set_bit - set (or clear) a bit
+ * mv88e6xxx_g2_scratch_set_bit - set (or clear) a bit
  * @chip: chip private data
  * @base_reg: base of scratch bits
  * @offset: index of bit within the register
@@ -240,7 +240,7 @@ const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops = {
 };
 
 /**
- * mv88e6xxx_g2_gpio_set_smi - set gpio muxing for external smi
+ * mv88e6xxx_g2_scratch_gpio_set_smi - set gpio muxing for external smi
  * @chip: chip private data
  * @external: set mux for external smi, or free for gpio usage
  *
-- 
cgit v1.2.3


From c3c3791ce31eb7b4fa140df20985285516ef99f2 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Mon, 22 Mar 2021 17:53:14 +0800
Subject: cxgb4: Remove redundant NULL check

Fix the following coccicheck warnings:

./drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c:3540:2-8: WARNING: NULL
check before some freeing functions is not needed.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
index 6c85a10f465c..d2ba40c19696 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
@@ -3536,8 +3536,7 @@ out:
 	}
 
 out_free:
-	if (data)
-		kvfree(data);
+	kvfree(data);
 
 #undef QDESC_GET_FLQ
 #undef QDESC_GET_RXQ
-- 
cgit v1.2.3


From f5fcca89f59c84a917e6d470258a12eb3244875e Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 22 Mar 2021 12:38:19 +0200
Subject: net: bridge: declare br_vlan_tunnel_lookup argument tunnel_id as
 __be64

The only caller of br_vlan_tunnel_lookup, br_handle_ingress_vlan_tunnel,
extracts the tunnel_id from struct ip_tunnel_info::struct ip_tunnel_key::
tun_id which is a __be64 value.

The exact endianness does not seem to matter, because the tunnel id is
just used as a lookup key for the VLAN group's tunnel hash table, and
the value is not interpreted directly per se. Moreover,
rhashtable_lookup_fast treats the key argument as a const void *.

Therefore, there is no functional change associated with this patch,
just one to silence "make W=1" builds.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_vlan_tunnel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c
index 169e005fbda2..0d3a8c01552e 100644
--- a/net/bridge/br_vlan_tunnel.c
+++ b/net/bridge/br_vlan_tunnel.c
@@ -35,7 +35,7 @@ static const struct rhashtable_params br_vlan_tunnel_rht_params = {
 };
 
 static struct net_bridge_vlan *br_vlan_tunnel_lookup(struct rhashtable *tbl,
-						     u64 tunnel_id)
+						     __be64 tunnel_id)
 {
 	return rhashtable_lookup_fast(tbl, &tunnel_id,
 				      br_vlan_tunnel_rht_params);
-- 
cgit v1.2.3


From 5da9ace3405f40d8d93c1b519696f47bc4402318 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 22 Mar 2021 13:30:19 +0200
Subject: net: make xps_needed and xps_rxqs_needed static

Since their introduction in commit 04157469b7b8 ("net: Use static_key
for XPS maps"), xps_needed and xps_rxqs_needed were never used outside
net/core/dev.c, so I don't really understand why they were exported as
symbols in the first place.

This is needed in order to silence a "make W=1" warning about these
static keys not being declared as static variables, but not having a
previous declaration in a header file nonetheless.

Cc: Amritha Nambiar <amritha.nambiar@intel.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index be941ed754ac..ffab3928eeeb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2451,10 +2451,8 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
 EXPORT_SYMBOL(netdev_txq_to_tc);
 
 #ifdef CONFIG_XPS
-struct static_key xps_needed __read_mostly;
-EXPORT_SYMBOL(xps_needed);
-struct static_key xps_rxqs_needed __read_mostly;
-EXPORT_SYMBOL(xps_rxqs_needed);
+static struct static_key xps_needed __read_mostly;
+static struct static_key xps_rxqs_needed __read_mostly;
 static DEFINE_MUTEX(xps_map_mutex);
 #define xmap_dereference(P)		\
 	rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
-- 
cgit v1.2.3


From 744b8376632208137fe4acc9967b93e2970732a3 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 22 Mar 2021 13:31:48 +0200
Subject: net: move the ptype_all and ptype_base declarations to
 include/linux/netdevice.h

ptype_all and ptype_base are declared in net/core/dev.c as non-static,
because they are used by net-procfs.c too. However, a "make W=1" build
complains that there was no previous declaration of ptype_all and
ptype_base in a header file, so this way of declaring things constitutes
a violation of coding style.

Let's move the extern declarations of ptype_all and ptype_base to the
linux/netdevice.h file, which is included by net-procfs.c too.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 +++
 net/core/net-procfs.c     | 3 ---
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 33b8ea08996e..e4a503288d9b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -5336,6 +5336,9 @@ do {								\
 #define PTYPE_HASH_SIZE	(16)
 #define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
 
+extern struct list_head ptype_all __read_mostly;
+extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
+
 extern struct net_device *blackhole_netdev;
 
 #endif	/* _LINUX_NETDEVICE_H */
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index c714e6a9dad4..d8b9dbabd4a4 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -10,9 +10,6 @@
 #define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
 
-extern struct list_head ptype_all __read_mostly;
-extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
-
 static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
 {
 	struct net *net = seq_file_net(seq);
-- 
cgit v1.2.3


From 13e8c216d2ed0bf99b6e8ae4d1edd1f17d6b6448 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 22 Mar 2021 13:14:47 +0100
Subject: misdn: avoid -Wempty-body warning

gcc warns about a pointless condition:

drivers/isdn/hardware/mISDN/hfcmulti.c: In function 'hfcmulti_interrupt':
drivers/isdn/hardware/mISDN/hfcmulti.c:2752:17: error: suggest braces around empty body in an 'if' statement [-Werror=empty-body]
 2752 |                 ; /* external IRQ */

As the check has no effect, just remove it.

Suggested-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/hardware/mISDN/hfcmulti.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c
index 7013a3f08429..14092152b786 100644
--- a/drivers/isdn/hardware/mISDN/hfcmulti.c
+++ b/drivers/isdn/hardware/mISDN/hfcmulti.c
@@ -2748,8 +2748,6 @@ hfcmulti_interrupt(int intno, void *dev_id)
 		if (hc->ctype != HFC_TYPE_E1)
 			ph_state_irq(hc, r_irq_statech);
 	}
-	if (status & V_EXT_IRQSTA)
-		; /* external IRQ */
 	if (status & V_LOST_STA) {
 		/* LOST IRQ */
 		HFC_outb(hc, R_INC_RES_FIFO, V_RES_LOST); /* clear irq! */
-- 
cgit v1.2.3


From aa785f93fcb4e8c66f5d3de88cd7626774f13c1d Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Mon, 22 Mar 2021 17:51:55 +0530
Subject: net: l2tp: Fix a typo

s/verifed/verified/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 203890e378cb..2ee20743cb41 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -802,7 +802,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
 	u16 version;
 	int length;
 
-	/* UDP has verifed checksum */
+	/* UDP has verified checksum */
 
 	/* UDP always verifies the packet length. */
 	__skb_pull(skb, sizeof(struct udphdr));
-- 
cgit v1.2.3


From 7ec05a6035480f3a5934b2b31222620b2e906163 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Mon, 22 Mar 2021 12:23:59 +0000
Subject: net: stmmac: platform: fix build error with !CONFIG_PM_SLEEP

Get rid of the CONFIG_PM_SLEEP ifdefery to fix the build error
and use __maybe_unused for the suspend()/resume() hooks to avoid
build warning:

drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c:769:21:
 error: 'stmmac_runtime_suspend' undeclared here (not in a function); did you mean 'stmmac_suspend'?
  769 |  SET_RUNTIME_PM_OPS(stmmac_runtime_suspend, stmmac_runtime_resume, NULL)
      |                     ^~~~~~~~~~~~~~~~~~~~~~
./include/linux/pm.h:342:21: note: in definition of macro 'SET_RUNTIME_PM_OPS'
  342 |  .runtime_suspend = suspend_fn, \
      |                     ^~~~~~~~~~
drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c:769:45:
 error: 'stmmac_runtime_resume' undeclared here (not in a function)
  769 |  SET_RUNTIME_PM_OPS(stmmac_runtime_suspend, stmmac_runtime_resume, NULL)
      |                                             ^~~~~~~~~~~~~~~~~~~~~
./include/linux/pm.h:343:20: note: in definition of macro 'SET_RUNTIME_PM_OPS'
  343 |  .runtime_resume = resume_fn, \
      |                    ^~~~~~~~~

Fixes: 5ec55823438e ("net: stmmac: add clocks management for gmac driver")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index a70b0bc84b2a..5a1e018884e6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -704,7 +704,6 @@ int stmmac_pltfr_remove(struct platform_device *pdev)
 }
 EXPORT_SYMBOL_GPL(stmmac_pltfr_remove);
 
-#ifdef CONFIG_PM_SLEEP
 /**
  * stmmac_pltfr_suspend
  * @dev: device pointer
@@ -712,7 +711,7 @@ EXPORT_SYMBOL_GPL(stmmac_pltfr_remove);
  * call the main suspend function and then, if required, on some platform, it
  * can call an exit helper.
  */
-static int stmmac_pltfr_suspend(struct device *dev)
+static int __maybe_unused stmmac_pltfr_suspend(struct device *dev)
 {
 	int ret;
 	struct net_device *ndev = dev_get_drvdata(dev);
@@ -733,7 +732,7 @@ static int stmmac_pltfr_suspend(struct device *dev)
  * the main resume function, on some platforms, it can call own init helper
  * if required.
  */
-static int stmmac_pltfr_resume(struct device *dev)
+static int __maybe_unused stmmac_pltfr_resume(struct device *dev)
 {
 	struct net_device *ndev = dev_get_drvdata(dev);
 	struct stmmac_priv *priv = netdev_priv(ndev);
@@ -745,7 +744,7 @@ static int stmmac_pltfr_resume(struct device *dev)
 	return stmmac_resume(dev);
 }
 
-static int stmmac_runtime_suspend(struct device *dev)
+static int __maybe_unused stmmac_runtime_suspend(struct device *dev)
 {
 	struct net_device *ndev = dev_get_drvdata(dev);
 	struct stmmac_priv *priv = netdev_priv(ndev);
@@ -755,14 +754,13 @@ static int stmmac_runtime_suspend(struct device *dev)
 	return 0;
 }
 
-static int stmmac_runtime_resume(struct device *dev)
+static int __maybe_unused stmmac_runtime_resume(struct device *dev)
 {
 	struct net_device *ndev = dev_get_drvdata(dev);
 	struct stmmac_priv *priv = netdev_priv(ndev);
 
 	return stmmac_bus_clks_config(priv, true);
 }
-#endif /* CONFIG_PM_SLEEP */
 
 const struct dev_pm_ops stmmac_pltfr_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(stmmac_pltfr_suspend, stmmac_pltfr_resume)
-- 
cgit v1.2.3


From 7f08ec6e04269ce53b664761c9108b44ed2f54ab Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Mon, 22 Mar 2021 16:43:29 +0100
Subject: net-sysfs: remove possible sleep from an RCU read-side critical
 section

xps_queue_show is mostly made of an RCU read-side critical section and
calls bitmap_zalloc with GFP_KERNEL in the middle of it. That is not
allowed as this call may sleep and such behaviours aren't allowed in RCU
read-side critical sections. Fix this by using GFP_NOWAIT instead.

Fixes: 5478fcd0f483 ("net: embed nr_ids in the xps maps")
Reported-by: kernel test robot <oliver.sang@intel.com>
Suggested-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Antoine Tenart <atenart@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 562a42fcd437..f6197774048b 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1378,7 +1378,7 @@ static ssize_t xps_queue_show(struct net_device *dev, unsigned int index,
 	nr_ids = dev_maps ? dev_maps->nr_ids :
 		 (type == XPS_CPUS ? nr_cpu_ids : dev->num_rx_queues);
 
-	mask = bitmap_zalloc(nr_ids, GFP_KERNEL);
+	mask = bitmap_zalloc(nr_ids, GFP_NOWAIT);
 	if (!mask) {
 		rcu_read_unlock();
 		return -ENOMEM;
-- 
cgit v1.2.3


From 0353b4a96b7a9f60fe20d1b3ebd4931a4085f91c Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Mon, 22 Mar 2021 17:45:27 +0200
Subject: net: bridge: when suppression is enabled exclude RARP packets

Recently we had an interop issue where RARP packets got suppressed with
bridge neigh suppression enabled, but the check in the code was meant to
suppress GARP. Exclude RARP packets from it which would allow some VMWare
setups to work, to quote the report:
"Those RARP packets usually get generated by vMware to notify physical
switches when vMotion occurs. vMware may use random sip/tip or just use
sip=tip=0. So the RARP packet sometimes get properly flooded by the vtep
and other times get dropped by the logic"

Reported-by: Amer Abdalamer <amer@nvidia.com>
Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_arp_nd_proxy.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c
index dfec65eca8a6..3db1def4437b 100644
--- a/net/bridge/br_arp_nd_proxy.c
+++ b/net/bridge/br_arp_nd_proxy.c
@@ -160,7 +160,9 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
 	if (br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) {
 		if (p && (p->flags & BR_NEIGH_SUPPRESS))
 			return;
-		if (ipv4_is_zeronet(sip) || sip == tip) {
+		if (parp->ar_op != htons(ARPOP_RREQUEST) &&
+		    parp->ar_op != htons(ARPOP_RREPLY) &&
+		    (ipv4_is_zeronet(sip) || sip == tip)) {
 			/* prevent flooding to neigh suppress ports */
 			BR_INPUT_SKB_CB(skb)->proxyarp_replied = 1;
 			return;
-- 
cgit v1.2.3


From 08c99b92d76c5bb0208cf89cafd502bdb3b2c98c Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 22 Mar 2021 17:58:42 +0200
Subject: mlxsw: spectrum_router: Remove RTNL assertion

Remove the RTNL assertion in the nexthop notifier block. The assertion
is not needed given RTNL is never assumed to be taken.

This is a preparation for future patches where mlxsw will start handling
nexthop events that are not always sent with RTNL held.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index eda99d82766a..0e0b40e97783 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -4699,8 +4699,6 @@ static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb,
 
 	mutex_lock(&router->lock);
 
-	ASSERT_RTNL();
-
 	switch (event) {
 	case NEXTHOP_EVENT_REPLACE:
 		err = mlxsw_sp_nexthop_obj_new(router->mlxsw_sp, info);
-- 
cgit v1.2.3


From 26df5acc275b8b5fb14de1301feed6697f2433cf Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 22 Mar 2021 17:58:43 +0200
Subject: mlxsw: spectrum_router: Consolidate nexthop helpers

The helper mlxsw_sp_nexthop_offload() is actually interested in finding
out if the nexthop is both written to the adjacency table and forwarding
packets (as opposed to discarding them).

Rename it to mlxsw_sp_nexthop_is_forward() and remove
mlxsw_sp_nexthop_is_discard().

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c  | 15 ++++++---------
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c |  9 ++-------
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h |  3 +--
 3 files changed, 9 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
index ed81d4fa48ac..936224d8c2ea 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
@@ -912,9 +912,8 @@ static u64 mlxsw_sp_dpipe_table_adj_size(struct mlxsw_sp *mlxsw_sp)
 	u64 size = 0;
 
 	mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router)
-		if (mlxsw_sp_nexthop_offload(nh) &&
-		    !mlxsw_sp_nexthop_group_has_ipip(nh) &&
-		    !mlxsw_sp_nexthop_is_discard(nh))
+		if (mlxsw_sp_nexthop_is_forward(nh) &&
+		    !mlxsw_sp_nexthop_group_has_ipip(nh))
 			size++;
 	return size;
 }
@@ -1105,9 +1104,8 @@ start_again:
 	nh_skip = nh_count;
 	nh_count = 0;
 	mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) {
-		if (!mlxsw_sp_nexthop_offload(nh) ||
-		    mlxsw_sp_nexthop_group_has_ipip(nh) ||
-		    mlxsw_sp_nexthop_is_discard(nh))
+		if (!mlxsw_sp_nexthop_is_forward(nh) ||
+		    mlxsw_sp_nexthop_group_has_ipip(nh))
 			continue;
 
 		if (nh_count < nh_skip)
@@ -1187,9 +1185,8 @@ static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable)
 	u32 adj_size = 0;
 
 	mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) {
-		if (!mlxsw_sp_nexthop_offload(nh) ||
-		    mlxsw_sp_nexthop_group_has_ipip(nh) ||
-		    mlxsw_sp_nexthop_is_discard(nh))
+		if (!mlxsw_sp_nexthop_is_forward(nh) ||
+		    mlxsw_sp_nexthop_group_has_ipip(nh))
 			continue;
 
 		mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_size,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 0e0b40e97783..593a40aa9af8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2979,9 +2979,9 @@ struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
 	return list_next_entry(nh, router_list_node);
 }
 
-bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
+bool mlxsw_sp_nexthop_is_forward(const struct mlxsw_sp_nexthop *nh)
 {
-	return nh->offloaded;
+	return nh->offloaded && !nh->discard;
 }
 
 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
@@ -3036,11 +3036,6 @@ bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
 	return false;
 }
 
-bool mlxsw_sp_nexthop_is_discard(const struct mlxsw_sp_nexthop *nh)
-{
-	return nh->discard;
-}
-
 static const struct rhashtable_params mlxsw_sp_nexthop_group_vr_ht_params = {
 	.key_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, key),
 	.head_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, ht_node),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index 2875ee8ec537..8ecd090a5d8a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -195,13 +195,12 @@ mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
 				     const struct mlxsw_sp_ipip_entry *except);
 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
 					       struct mlxsw_sp_nexthop *nh);
-bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh);
+bool mlxsw_sp_nexthop_is_forward(const struct mlxsw_sp_nexthop *nh);
 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh);
 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
 			     u32 *p_adj_size, u32 *p_adj_hash_index);
 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh);
 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh);
-bool mlxsw_sp_nexthop_is_discard(const struct mlxsw_sp_nexthop *nh);
 #define mlxsw_sp_nexthop_for_each(nh, router)				\
 	for (nh = mlxsw_sp_nexthop_next(router, NULL); nh;		\
 	     nh = mlxsw_sp_nexthop_next(router, nh))
-- 
cgit v1.2.3


From c6a5011bec0962cfddcce48065e29c65e9a6ec18 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 22 Mar 2021 17:58:44 +0200
Subject: mlxsw: spectrum_router: Only provide MAC address for valid nexthops

The helper returns the MAC address associated with the nexthop. It is
only valid when the nexthop forwards packets and when it is an Ethernet
nexthop. Reflect this in the checks the helper is performing.

This is not an issue because the sole caller of the function only
invokes it for such nexthops.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 593a40aa9af8..b7358cf611c1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2986,7 +2986,8 @@ bool mlxsw_sp_nexthop_is_forward(const struct mlxsw_sp_nexthop *nh)
 
 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
 {
-	if (!nh->offloaded)
+	if (nh->type != MLXSW_SP_NEXTHOP_TYPE_ETH ||
+	    !mlxsw_sp_nexthop_is_forward(nh))
 		return NULL;
 	return nh->neigh_entry->ha;
 }
-- 
cgit v1.2.3


From 248136fa251abfbd862194175977afd57ab5e760 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 22 Mar 2021 17:58:45 +0200
Subject: mlxsw: spectrum_router: Adjust comments on nexthop fields

The comments assume that nexthops are simple Ethernet nexthops
that are programmed to forward packets to the associated neighbour. This
is no longer the case, as both IPinIP and blackhole nexthops are now
supported.

Adjust the comments to reflect these changes.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index b7358cf611c1..bdf519b569b6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2862,14 +2862,14 @@ struct mlxsw_sp_nexthop {
 	int norm_nh_weight;
 	int num_adj_entries;
 	struct mlxsw_sp_rif *rif;
-	u8 should_offload:1, /* set indicates this neigh is connected and
-			      * should be put to KVD linear area of this group.
+	u8 should_offload:1, /* set indicates this nexthop should be written
+			      * to the adjacency table.
 			      */
-	   offloaded:1, /* set in case the neigh is actually put into
-			 * KVD linear area of this group.
+	   offloaded:1, /* set indicates this nexthop was written to the
+			 * adjacency table.
 			 */
-	   update:1, /* set indicates that MAC of this neigh should be
-		      * updated in HW
+	   update:1, /* set indicates this nexthop should be updated in the
+		      * adjacency table (f.e., its MAC changed).
 		      */
 	   discard:1; /* nexthop is programmed to discard packets */
 	enum mlxsw_sp_nexthop_type type;
-- 
cgit v1.2.3


From 031d5c16065606efc387aa6865690037c13cefc4 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 22 Mar 2021 17:58:46 +0200
Subject: mlxsw: spectrum_router: Introduce nexthop action field

Currently, the action associated with the nexthop is assumed to be
'forward' unless the 'discard' bit is set.

Instead, simplify this by introducing a dedicated field to represent the
action of the nexthop. This will allow us to more easily introduce more
actions, such as trap.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 25 +++++++++++++++-------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index bdf519b569b6..3ad1e1bd2197 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2842,6 +2842,13 @@ enum mlxsw_sp_nexthop_type {
 	MLXSW_SP_NEXTHOP_TYPE_IPIP,
 };
 
+enum mlxsw_sp_nexthop_action {
+	/* Nexthop forwards packets to an egress RIF */
+	MLXSW_SP_NEXTHOP_ACTION_FORWARD,
+	/* Nexthop discards packets */
+	MLXSW_SP_NEXTHOP_ACTION_DISCARD,
+};
+
 struct mlxsw_sp_nexthop_key {
 	struct fib_nh *fib_nh;
 };
@@ -2868,10 +2875,10 @@ struct mlxsw_sp_nexthop {
 	   offloaded:1, /* set indicates this nexthop was written to the
 			 * adjacency table.
 			 */
-	   update:1, /* set indicates this nexthop should be updated in the
+	   update:1; /* set indicates this nexthop should be updated in the
 		      * adjacency table (f.e., its MAC changed).
 		      */
-	   discard:1; /* nexthop is programmed to discard packets */
+	enum mlxsw_sp_nexthop_action action;
 	enum mlxsw_sp_nexthop_type type;
 	union {
 		struct mlxsw_sp_neigh_entry *neigh_entry;
@@ -2981,7 +2988,7 @@ struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
 
 bool mlxsw_sp_nexthop_is_forward(const struct mlxsw_sp_nexthop *nh)
 {
-	return nh->offloaded && !nh->discard;
+	return nh->offloaded && nh->action == MLXSW_SP_NEXTHOP_ACTION_FORWARD;
 }
 
 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
@@ -3408,7 +3415,7 @@ static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
 			    true, MLXSW_REG_RATR_TYPE_ETHERNET,
 			    adj_index, nh->rif->rif_index);
-	if (nh->discard)
+	if (nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD)
 		mlxsw_reg_ratr_trap_action_set(ratr_pl,
 					       MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS);
 	else
@@ -3828,10 +3835,12 @@ set_trap:
 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
 					    bool removing)
 {
-	if (!removing)
+	if (!removing) {
+		nh->action = MLXSW_SP_NEXTHOP_ACTION_FORWARD;
 		nh->should_offload = 1;
-	else
+	} else {
 		nh->should_offload = 0;
+	}
 	nh->update = 1;
 }
 
@@ -4342,7 +4351,7 @@ static void mlxsw_sp_nexthop_obj_blackhole_init(struct mlxsw_sp *mlxsw_sp,
 {
 	u16 lb_rif_index = mlxsw_sp->router->lb_rif_index;
 
-	nh->discard = 1;
+	nh->action = MLXSW_SP_NEXTHOP_ACTION_DISCARD;
 	nh->should_offload = 1;
 	/* While nexthops that discard packets do not forward packets
 	 * via an egress RIF, they still need to be programmed using a
@@ -4405,7 +4414,7 @@ err_type_init:
 static void mlxsw_sp_nexthop_obj_fini(struct mlxsw_sp *mlxsw_sp,
 				      struct mlxsw_sp_nexthop *nh)
 {
-	if (nh->discard)
+	if (nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD)
 		mlxsw_sp_nexthop_obj_blackhole_fini(mlxsw_sp, nh);
 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
 	list_del(&nh->router_list_node);
-- 
cgit v1.2.3


From 1be2361e3ca715cd9315c3c4ebede8cdcfcbf7d5 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 22 Mar 2021 17:58:47 +0200
Subject: mlxsw: spectrum_router: Prepare for nexthops with trap action

Nexthops that need to be programmed with a trap action might not have a
valid router interface (RIF) associated with them. Therefore, use the
loopback RIF created during initialization to program them to the
device.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 3ad1e1bd2197..d09a76866a5f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -3411,10 +3411,13 @@ static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
 {
 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
 	char ratr_pl[MLXSW_REG_RATR_LEN];
+	u16 rif_index;
 
+	rif_index = nh->rif ? nh->rif->rif_index :
+			      mlxsw_sp->router->lb_rif_index;
 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
 			    true, MLXSW_REG_RATR_TYPE_ETHERNET,
-			    adj_index, nh->rif->rif_index);
+			    adj_index, rif_index);
 	if (nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD)
 		mlxsw_reg_ratr_trap_action_set(ratr_pl,
 					       MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS);
-- 
cgit v1.2.3


From fc199d7c08c837095083e4c77678d9de1546945c Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 22 Mar 2021 17:58:48 +0200
Subject: mlxsw: spectrum_router: Add nexthop trap action support

Currently, nexthops are programmed with either forward or discard action
(for blackhole nexthops). Nexthops that do not have a valid MAC address
(neighbour) or router interface (RIF) are simply not written to the
adjacency table.

In resilient nexthop groups, the size of the group must remain fixed and
the kernel is in complete control of the layout of the adjacency table.
A nexthop without a valid MAC or RIF will therefore be written with a
trap action, to trigger neighbour resolution.

Allow such nexthops to be programmed to the adjacency table to enable
above mentioned use case.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 42 +++++++++++++++-------
 1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index d09a76866a5f..50286c6d0a8a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2847,6 +2847,8 @@ enum mlxsw_sp_nexthop_action {
 	MLXSW_SP_NEXTHOP_ACTION_FORWARD,
 	/* Nexthop discards packets */
 	MLXSW_SP_NEXTHOP_ACTION_DISCARD,
+	/* Nexthop traps packets */
+	MLXSW_SP_NEXTHOP_ACTION_TRAP,
 };
 
 struct mlxsw_sp_nexthop_key {
@@ -3418,11 +3420,23 @@ static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
 	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
 			    true, MLXSW_REG_RATR_TYPE_ETHERNET,
 			    adj_index, rif_index);
-	if (nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD)
+	switch (nh->action) {
+	case MLXSW_SP_NEXTHOP_ACTION_FORWARD:
+		mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
+		break;
+	case MLXSW_SP_NEXTHOP_ACTION_DISCARD:
 		mlxsw_reg_ratr_trap_action_set(ratr_pl,
 					       MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS);
-	else
-		mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
+		break;
+	case MLXSW_SP_NEXTHOP_ACTION_TRAP:
+		mlxsw_reg_ratr_trap_action_set(ratr_pl,
+					       MLXSW_REG_RATR_TRAP_ACTION_TRAP);
+		mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	}
 	if (nh->counter_valid)
 		mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
 	else
@@ -3495,16 +3509,18 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
 		if (nh->update || reallocate) {
 			int err = 0;
 
-			switch (nh->type) {
-			case MLXSW_SP_NEXTHOP_TYPE_ETH:
-				err = mlxsw_sp_nexthop_update
-					    (mlxsw_sp, adj_index, nh);
-				break;
-			case MLXSW_SP_NEXTHOP_TYPE_IPIP:
-				err = mlxsw_sp_nexthop_ipip_update
-					    (mlxsw_sp, adj_index, nh);
-				break;
-			}
+			/* When action is discard or trap, the nexthop must be
+			 * programmed as an Ethernet nexthop.
+			 */
+			if (nh->type == MLXSW_SP_NEXTHOP_TYPE_ETH ||
+			    nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD ||
+			    nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP)
+				err = mlxsw_sp_nexthop_update(mlxsw_sp,
+							      adj_index, nh);
+			else
+				err = mlxsw_sp_nexthop_ipip_update(mlxsw_sp,
+								   adj_index,
+								   nh);
 			if (err)
 				return err;
 			nh->update = 0;
-- 
cgit v1.2.3


From 424603ccdd5eb00725f9080d7e8c018039816d17 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 22 Mar 2021 17:58:49 +0200
Subject: mlxsw: spectrum_router: Rename nexthop update function to reflect its
 type

mlxsw_sp_nexthop_update() is used to update the configuration of
Ethernet-type nexthops, as opposed to mlxsw_sp_nexthop_ipip_update(),
which is used to update IPinIP-type nexthops.

Rename the function to mlxsw_sp_nexthop_eth_update(), so that it is
consistent with mlxsw_sp_nexthop_ipip_update().

It will allow us to introduce mlxsw_sp_nexthop_update() in a follow-up
patch, which calls either of above mentioned function based on the
nexthop's type.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c  |  4 ++--
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 17 ++++++++++-------
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h |  4 ++--
 3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
index 936224d8c2ea..af2093fc5025 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
@@ -1195,8 +1195,8 @@ static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable)
 			mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
 		else
 			mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
-		mlxsw_sp_nexthop_update(mlxsw_sp,
-					adj_index + adj_hash_index, nh);
+		mlxsw_sp_nexthop_eth_update(mlxsw_sp,
+					    adj_index + adj_hash_index, nh);
 	}
 	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 50286c6d0a8a..1f1f8af63ef7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -3408,8 +3408,9 @@ err_mass_update_vr:
 	return err;
 }
 
-static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
-				     struct mlxsw_sp_nexthop *nh)
+static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp,
+					 u32 adj_index,
+					 struct mlxsw_sp_nexthop *nh)
 {
 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
 	char ratr_pl[MLXSW_REG_RATR_LEN];
@@ -3445,15 +3446,16 @@ static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
 }
 
-int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
-			    struct mlxsw_sp_nexthop *nh)
+int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
+				struct mlxsw_sp_nexthop *nh)
 {
 	int i;
 
 	for (i = 0; i < nh->num_adj_entries; i++) {
 		int err;
 
-		err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
+		err = __mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index + i,
+						    nh);
 		if (err)
 			return err;
 	}
@@ -3515,8 +3517,9 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
 			if (nh->type == MLXSW_SP_NEXTHOP_TYPE_ETH ||
 			    nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD ||
 			    nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP)
-				err = mlxsw_sp_nexthop_update(mlxsw_sp,
-							      adj_index, nh);
+				err = mlxsw_sp_nexthop_eth_update(mlxsw_sp,
+								  adj_index,
+								  nh);
 			else
 				err = mlxsw_sp_nexthop_ipip_update(mlxsw_sp,
 								   adj_index,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index 8ecd090a5d8a..3d90d4eaba05 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -206,8 +206,8 @@ bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh);
 	     nh = mlxsw_sp_nexthop_next(router, nh))
 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
 				 struct mlxsw_sp_nexthop *nh, u64 *p_counter);
-int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
-			    struct mlxsw_sp_nexthop *nh);
+int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
+				struct mlxsw_sp_nexthop *nh);
 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
 				    struct mlxsw_sp_nexthop *nh);
 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
-- 
cgit v1.2.3


From 29017c643476daf57495c2ccd1a5fdc8dc5186ea Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 22 Mar 2021 17:58:50 +0200
Subject: mlxsw: spectrum_router: Encapsulate nexthop update in a function

Encapsulate this functionality in a separate function, so that it could
be invoked by follow-up patches, when replacing a nexthop bucket that is
part of a resilient nexthop group.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 28 ++++++++++++----------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 1f1f8af63ef7..6be225ec1997 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -3491,6 +3491,20 @@ static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
+static int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
+				   struct mlxsw_sp_nexthop *nh)
+{
+	/* When action is discard or trap, the nexthop must be
+	 * programmed as an Ethernet nexthop.
+	 */
+	if (nh->type == MLXSW_SP_NEXTHOP_TYPE_ETH ||
+	    nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD ||
+	    nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP)
+		return mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index, nh);
+	else
+		return mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index, nh);
+}
+
 static int
 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
 			      struct mlxsw_sp_nexthop_group_info *nhgi,
@@ -3511,19 +3525,7 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
 		if (nh->update || reallocate) {
 			int err = 0;
 
-			/* When action is discard or trap, the nexthop must be
-			 * programmed as an Ethernet nexthop.
-			 */
-			if (nh->type == MLXSW_SP_NEXTHOP_TYPE_ETH ||
-			    nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD ||
-			    nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP)
-				err = mlxsw_sp_nexthop_eth_update(mlxsw_sp,
-								  adj_index,
-								  nh);
-			else
-				err = mlxsw_sp_nexthop_ipip_update(mlxsw_sp,
-								   adj_index,
-								   nh);
+			err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh);
 			if (err)
 				return err;
 			nh->update = 0;
-- 
cgit v1.2.3


From 40f5429fce693bfb79dd9ec78d60576f17f13c76 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 22 Mar 2021 17:58:51 +0200
Subject: mlxsw: spectrum_router: Break nexthop group entry validation to a
 separate function

The validation of a nexthop group entry is also necessary for resilient
nexthop groups, so break the validation to a separate function to allow
for code reuse in subsequent patches.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 36 +++++++++++++++-------
 1 file changed, 25 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 6be225ec1997..fa190e27323e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -4296,6 +4296,29 @@ mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp,
 	return err;
 }
 
+static int
+mlxsw_sp_nexthop_obj_group_entry_validate(struct mlxsw_sp *mlxsw_sp,
+					  const struct nh_notifier_single_info *nh,
+					  struct netlink_ext_ack *extack)
+{
+	int err;
+
+	err = mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, nh, extack);
+	if (err)
+		return err;
+
+	/* Device only nexthops with an IPIP device are programmed as
+	 * encapsulating adjacency entries.
+	 */
+	if (!nh->gw_family && !nh->is_reject &&
+	    !mlxsw_sp_netdev_ipip_type(mlxsw_sp, nh->dev, NULL)) {
+		NL_SET_ERR_MSG_MOD(extack, "Nexthop group entry does not have a gateway");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int
 mlxsw_sp_nexthop_obj_group_validate(struct mlxsw_sp *mlxsw_sp,
 				    const struct nh_notifier_grp_info *nh_grp,
@@ -4313,19 +4336,10 @@ mlxsw_sp_nexthop_obj_group_validate(struct mlxsw_sp *mlxsw_sp,
 		int err;
 
 		nh = &nh_grp->nh_entries[i].nh;
-		err = mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, nh,
-							   extack);
+		err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
+								extack);
 		if (err)
 			return err;
-
-		/* Device only nexthops with an IPIP device are programmed as
-		 * encapsulating adjacency entries.
-		 */
-		if (!nh->gw_family && !nh->is_reject &&
-		    !mlxsw_sp_netdev_ipip_type(mlxsw_sp, nh->dev, NULL)) {
-			NL_SET_ERR_MSG_MOD(extack, "Nexthop group entry does not have a gateway");
-			return -EINVAL;
-		}
 	}
 
 	return 0;
-- 
cgit v1.2.3


From c1efd50002c00cbe51014ddf357d55162cd7d6d8 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 22 Mar 2021 17:58:52 +0200
Subject: mlxsw: spectrum_router: Avoid unnecessary neighbour updates

Avoid updating neighbour and adjacency entries in hardware when the
neighbour is already connected and its MAC address did not change. This
can happen, for example, when neighbour transitions between valid states
such as 'NUD_REACHABLE' and 'NUD_DELAY'.

This is especially important for resilient hashing as these updates will
result in adjacency entries being marked as active.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index fa190e27323e..4114f3b7d3cd 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2662,6 +2662,10 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
 			goto out;
 	}
 
+	if (neigh_entry->connected && entry_connected &&
+	    !memcmp(neigh_entry->ha, ha, ETH_ALEN))
+		goto out;
+
 	memcpy(neigh_entry->ha, ha, ETH_ALEN);
 	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
 	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected,
-- 
cgit v1.2.3


From d354fdd923e7a3dc2f5c34cfcfb0401bd8c56ea8 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 22 Mar 2021 17:58:53 +0200
Subject: mlxsw: spectrum_router: Create per-ASIC router operations

There are several differences in the router module between Spectrum-1
and Spectrum-{2,3}. Currently, this is only apparent in the router
interface (RIF) operations that are split between these ASICs.

A subsequent patch is going to introduce another difference between
these ASICs.

Create per-ASIC router operations that will encapsulate all these
differences. For now, these operations are only used to set the per-ASIC
RIF operations in 'mlxsw_sp->router->rif_ops_arr'. Note that this fields
was unused since commit 1f5b23033937 ("mlxsw: spectrum: Set RIF ops per
ASIC type").

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c     |  6 ++--
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h     |  8 ++---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 37 ++++++++++++++++++++--
 3 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index df4ec063adcb..efc7acb4842c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2934,7 +2934,6 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->acl_tcam_ops = &mlxsw_sp1_acl_tcam_ops;
 	mlxsw_sp->nve_ops_arr = mlxsw_sp1_nve_ops_arr;
 	mlxsw_sp->mac_mask = mlxsw_sp1_mac_mask;
-	mlxsw_sp->rif_ops_arr = mlxsw_sp1_rif_ops_arr;
 	mlxsw_sp->sb_vals = &mlxsw_sp1_sb_vals;
 	mlxsw_sp->sb_ops = &mlxsw_sp1_sb_ops;
 	mlxsw_sp->port_type_speed_ops = &mlxsw_sp1_port_type_speed_ops;
@@ -2943,6 +2942,7 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->policer_core_ops = &mlxsw_sp1_policer_core_ops;
 	mlxsw_sp->trap_ops = &mlxsw_sp1_trap_ops;
 	mlxsw_sp->mall_ops = &mlxsw_sp1_mall_ops;
+	mlxsw_sp->router_ops = &mlxsw_sp1_router_ops;
 	mlxsw_sp->listeners = mlxsw_sp1_listener;
 	mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp1_listener);
 	mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1;
@@ -2965,7 +2965,6 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops;
 	mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr;
 	mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask;
-	mlxsw_sp->rif_ops_arr = mlxsw_sp2_rif_ops_arr;
 	mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals;
 	mlxsw_sp->sb_ops = &mlxsw_sp2_sb_ops;
 	mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops;
@@ -2974,6 +2973,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->policer_core_ops = &mlxsw_sp2_policer_core_ops;
 	mlxsw_sp->trap_ops = &mlxsw_sp2_trap_ops;
 	mlxsw_sp->mall_ops = &mlxsw_sp2_mall_ops;
+	mlxsw_sp->router_ops = &mlxsw_sp2_router_ops;
 	mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2;
 
 	return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack);
@@ -2994,7 +2994,6 @@ static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops;
 	mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr;
 	mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask;
-	mlxsw_sp->rif_ops_arr = mlxsw_sp2_rif_ops_arr;
 	mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals;
 	mlxsw_sp->sb_ops = &mlxsw_sp3_sb_ops;
 	mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops;
@@ -3003,6 +3002,7 @@ static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core,
 	mlxsw_sp->policer_core_ops = &mlxsw_sp2_policer_core_ops;
 	mlxsw_sp->trap_ops = &mlxsw_sp2_trap_ops;
 	mlxsw_sp->mall_ops = &mlxsw_sp2_mall_ops;
+	mlxsw_sp->router_ops = &mlxsw_sp2_router_ops;
 	mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3;
 
 	return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 03655e418edb..97d074d7b78d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -87,10 +87,10 @@ enum mlxsw_sp_rif_type {
 	MLXSW_SP_RIF_TYPE_MAX,
 };
 
-struct mlxsw_sp_rif_ops;
+struct mlxsw_sp_router_ops;
 
-extern const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[];
-extern const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[];
+extern const struct mlxsw_sp_router_ops mlxsw_sp1_router_ops;
+extern const struct mlxsw_sp_router_ops mlxsw_sp2_router_ops;
 
 struct mlxsw_sp_switchdev_ops;
 
@@ -180,7 +180,6 @@ struct mlxsw_sp {
 	const struct mlxsw_sp_acl_rulei_ops *acl_rulei_ops;
 	const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops;
 	const struct mlxsw_sp_nve_ops **nve_ops_arr;
-	const struct mlxsw_sp_rif_ops **rif_ops_arr;
 	const struct mlxsw_sp_sb_vals *sb_vals;
 	const struct mlxsw_sp_sb_ops *sb_ops;
 	const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops;
@@ -189,6 +188,7 @@ struct mlxsw_sp {
 	const struct mlxsw_sp_policer_core_ops *policer_core_ops;
 	const struct mlxsw_sp_trap_ops *trap_ops;
 	const struct mlxsw_sp_mall_ops *mall_ops;
+	const struct mlxsw_sp_router_ops *router_ops;
 	const struct mlxsw_listener *listeners;
 	size_t listeners_count;
 	u32 lowest_shaper_bs;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 4114f3b7d3cd..dd3fb5ca5c32 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -113,6 +113,10 @@ struct mlxsw_sp_rif_ops {
 	void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
 };
 
+struct mlxsw_sp_router_ops {
+	int (*init)(struct mlxsw_sp *mlxsw_sp);
+};
+
 static struct mlxsw_sp_rif *
 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
 			 const struct net_device *dev);
@@ -7712,7 +7716,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
 	int i, err;
 
 	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
-	ops = mlxsw_sp->rif_ops_arr[type];
+	ops = mlxsw_sp->router->rif_ops_arr[type];
 
 	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
 	if (IS_ERR(vr))
@@ -8910,7 +8914,7 @@ static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = {
 	.deconfigure		= mlxsw_sp1_rif_ipip_lb_deconfigure,
 };
 
-const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = {
+static const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = {
 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_emu_ops,
 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
@@ -9095,7 +9099,7 @@ static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = {
 	.deconfigure		= mlxsw_sp2_rif_ipip_lb_deconfigure,
 };
 
-const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
+static const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
 	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
 	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_emu_ops,
 	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
@@ -9347,6 +9351,28 @@ static void mlxsw_sp_lb_rif_fini(struct mlxsw_sp *mlxsw_sp)
 	mlxsw_sp_router_ul_rif_put(mlxsw_sp, mlxsw_sp->router->lb_rif_index);
 }
 
+static int mlxsw_sp1_router_init(struct mlxsw_sp *mlxsw_sp)
+{
+	mlxsw_sp->router->rif_ops_arr = mlxsw_sp1_rif_ops_arr;
+
+	return 0;
+}
+
+const struct mlxsw_sp_router_ops mlxsw_sp1_router_ops = {
+	.init = mlxsw_sp1_router_init,
+};
+
+static int mlxsw_sp2_router_init(struct mlxsw_sp *mlxsw_sp)
+{
+	mlxsw_sp->router->rif_ops_arr = mlxsw_sp2_rif_ops_arr;
+
+	return 0;
+}
+
+const struct mlxsw_sp_router_ops mlxsw_sp2_router_ops = {
+	.init = mlxsw_sp2_router_init,
+};
+
 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
 			 struct netlink_ext_ack *extack)
 {
@@ -9360,6 +9386,10 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_sp->router = router;
 	router->mlxsw_sp = mlxsw_sp;
 
+	err = mlxsw_sp->router_ops->init(mlxsw_sp);
+	if (err)
+		goto err_router_ops_init;
+
 	err = mlxsw_sp_router_xm_init(mlxsw_sp);
 	if (err)
 		goto err_xm_init;
@@ -9500,6 +9530,7 @@ err_router_init:
 err_ll_op_ctx_init:
 	mlxsw_sp_router_xm_fini(mlxsw_sp);
 err_xm_init:
+err_router_ops_init:
 	mutex_destroy(&mlxsw_sp->router->lock);
 	kfree(mlxsw_sp->router);
 	return err;
-- 
cgit v1.2.3


From 164fa130dd1671797c4249195bc7f5447a34a9c2 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 22 Mar 2021 17:58:54 +0200
Subject: mlxsw: spectrum_router: Encode adjacency group size ranges in an
 array

The device supports a fixed set of adjacency group sizes. Encode these
sizes in an array, so that the next patch will be able to split it
between Spectrum-1 and Spectrum-{2,3}, which support different size
ranges.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 65 +++++++++++++++-------
 1 file changed, 44 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index dd3fb5ca5c32..a142d6d3e77d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -3559,34 +3559,57 @@ mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
+struct mlxsw_sp_adj_grp_size_range {
+	u16 start; /* Inclusive */
+	u16 end; /* Inclusive */
+};
+
+/* Ordered by range start value */
+static const struct mlxsw_sp_adj_grp_size_range
+mlxsw_sp_adj_grp_size_ranges[] = {
+	{ .start = 1, .end = 64 },
+	{ .start = 512, .end = 512 },
+	{ .start = 1024, .end = 1024 },
+	{ .start = 2048, .end = 2048 },
+	{ .start = 4096, .end = 4096 },
+};
+
 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
 {
-	/* Valid sizes for an adjacency group are:
-	 * 1-64, 512, 1024, 2048 and 4096.
-	 */
-	if (*p_adj_grp_size <= 64)
-		return;
-	else if (*p_adj_grp_size <= 512)
-		*p_adj_grp_size = 512;
-	else if (*p_adj_grp_size <= 1024)
-		*p_adj_grp_size = 1024;
-	else if (*p_adj_grp_size <= 2048)
-		*p_adj_grp_size = 2048;
-	else
-		*p_adj_grp_size = 4096;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mlxsw_sp_adj_grp_size_ranges); i++) {
+		const struct mlxsw_sp_adj_grp_size_range *size_range;
+
+		size_range = &mlxsw_sp_adj_grp_size_ranges[i];
+
+		if (*p_adj_grp_size >= size_range->start &&
+		    *p_adj_grp_size <= size_range->end)
+			return;
+
+		if (*p_adj_grp_size <= size_range->end) {
+			*p_adj_grp_size = size_range->end;
+			return;
+		}
+	}
 }
 
 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
 					     unsigned int alloc_size)
 {
-	if (alloc_size >= 4096)
-		*p_adj_grp_size = 4096;
-	else if (alloc_size >= 2048)
-		*p_adj_grp_size = 2048;
-	else if (alloc_size >= 1024)
-		*p_adj_grp_size = 1024;
-	else if (alloc_size >= 512)
-		*p_adj_grp_size = 512;
+	size_t arr_size = ARRAY_SIZE(mlxsw_sp_adj_grp_size_ranges);
+	int i;
+
+	for (i = arr_size - 1; i >= 0; i--) {
+		const struct mlxsw_sp_adj_grp_size_range *size_range;
+
+		size_range = &mlxsw_sp_adj_grp_size_ranges[i];
+
+		if (alloc_size >= size_range->end) {
+			*p_adj_grp_size = size_range->end;
+			return;
+		}
+	}
 }
 
 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
-- 
cgit v1.2.3


From ea037b236a05822fba43b98ca68e91859e03bb64 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 22 Mar 2021 17:58:55 +0200
Subject: mlxsw: spectrum_router: Add Spectrum-{2, 3} adjacency group size
 ranges

Spectrum-{2,3} support different adjacency group size ranges compared to
Spectrum-1. Add an array describing these ranges and change the common
code to use the array which was set during the per-ASIC initialization.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 40 ++++++++++++++++------
 .../net/ethernet/mellanox/mlxsw/spectrum_router.h  |  2 ++
 2 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index a142d6d3e77d..75c9fc47cd69 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -3566,7 +3566,7 @@ struct mlxsw_sp_adj_grp_size_range {
 
 /* Ordered by range start value */
 static const struct mlxsw_sp_adj_grp_size_range
-mlxsw_sp_adj_grp_size_ranges[] = {
+mlxsw_sp1_adj_grp_size_ranges[] = {
 	{ .start = 1, .end = 64 },
 	{ .start = 512, .end = 512 },
 	{ .start = 1024, .end = 1024 },
@@ -3574,14 +3574,26 @@ mlxsw_sp_adj_grp_size_ranges[] = {
 	{ .start = 4096, .end = 4096 },
 };
 
-static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
+/* Ordered by range start value */
+static const struct mlxsw_sp_adj_grp_size_range
+mlxsw_sp2_adj_grp_size_ranges[] = {
+	{ .start = 1, .end = 128 },
+	{ .start = 256, .end = 256 },
+	{ .start = 512, .end = 512 },
+	{ .start = 1024, .end = 1024 },
+	{ .start = 2048, .end = 2048 },
+	{ .start = 4096, .end = 4096 },
+};
+
+static void mlxsw_sp_adj_grp_size_round_up(const struct mlxsw_sp *mlxsw_sp,
+					   u16 *p_adj_grp_size)
 {
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(mlxsw_sp_adj_grp_size_ranges); i++) {
+	for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) {
 		const struct mlxsw_sp_adj_grp_size_range *size_range;
 
-		size_range = &mlxsw_sp_adj_grp_size_ranges[i];
+		size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
 
 		if (*p_adj_grp_size >= size_range->start &&
 		    *p_adj_grp_size <= size_range->end)
@@ -3594,16 +3606,16 @@ static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
 	}
 }
 
-static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
+static void mlxsw_sp_adj_grp_size_round_down(const struct mlxsw_sp *mlxsw_sp,
+					     u16 *p_adj_grp_size,
 					     unsigned int alloc_size)
 {
-	size_t arr_size = ARRAY_SIZE(mlxsw_sp_adj_grp_size_ranges);
 	int i;
 
-	for (i = arr_size - 1; i >= 0; i--) {
+	for (i = mlxsw_sp->router->adj_grp_size_ranges_count - 1; i >= 0; i--) {
 		const struct mlxsw_sp_adj_grp_size_range *size_range;
 
-		size_range = &mlxsw_sp_adj_grp_size_ranges[i];
+		size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
 
 		if (alloc_size >= size_range->end) {
 			*p_adj_grp_size = size_range->end;
@@ -3621,7 +3633,7 @@ static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
 	/* Round up the requested group size to the next size supported
 	 * by the device and make sure the request can be satisfied.
 	 */
-	mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
+	mlxsw_sp_adj_grp_size_round_up(mlxsw_sp, p_adj_grp_size);
 	err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
 					      MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
 					      *p_adj_grp_size, &alloc_size);
@@ -3631,7 +3643,7 @@ static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
 	 * entries than requested. Try to use as much of them as
 	 * possible.
 	 */
-	mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
+	mlxsw_sp_adj_grp_size_round_down(mlxsw_sp, p_adj_grp_size, alloc_size);
 
 	return 0;
 }
@@ -9376,7 +9388,11 @@ static void mlxsw_sp_lb_rif_fini(struct mlxsw_sp *mlxsw_sp)
 
 static int mlxsw_sp1_router_init(struct mlxsw_sp *mlxsw_sp)
 {
+	size_t size_ranges_count = ARRAY_SIZE(mlxsw_sp1_adj_grp_size_ranges);
+
 	mlxsw_sp->router->rif_ops_arr = mlxsw_sp1_rif_ops_arr;
+	mlxsw_sp->router->adj_grp_size_ranges = mlxsw_sp1_adj_grp_size_ranges;
+	mlxsw_sp->router->adj_grp_size_ranges_count = size_ranges_count;
 
 	return 0;
 }
@@ -9387,7 +9403,11 @@ const struct mlxsw_sp_router_ops mlxsw_sp1_router_ops = {
 
 static int mlxsw_sp2_router_init(struct mlxsw_sp *mlxsw_sp)
 {
+	size_t size_ranges_count = ARRAY_SIZE(mlxsw_sp2_adj_grp_size_ranges);
+
 	mlxsw_sp->router->rif_ops_arr = mlxsw_sp2_rif_ops_arr;
+	mlxsw_sp->router->adj_grp_size_ranges = mlxsw_sp2_adj_grp_size_ranges;
+	mlxsw_sp->router->adj_grp_size_ranges_count = size_ranges_count;
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index 3d90d4eaba05..01fd9a3d5944 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -78,6 +78,8 @@ struct mlxsw_sp_router {
 	struct mlxsw_sp_fib_entry_op_ctx *ll_op_ctx;
 	u16 lb_rif_index;
 	struct mlxsw_sp_router_xm *xm;
+	const struct mlxsw_sp_adj_grp_size_range *adj_grp_size_ranges;
+	size_t adj_grp_size_ranges_count;
 };
 
 struct mlxsw_sp_fib_entry_priv {
-- 
cgit v1.2.3


From a09d042b086202735c4ed64573cdd79933020001 Mon Sep 17 00:00:00 2001
From: Aleksander Jan Bajkowski <olek2@wp.pl>
Date: Mon, 22 Mar 2021 21:37:15 +0100
Subject: net: dsa: lantiq: allow to use all GPHYs on xRX300 and xRX330

This patch allows to use all PHYs on GRX300 and GRX330. The ARX300
has 3 and the GRX330 has 4 integrated PHYs connected to different
ports compared to VRX200. Each integrated PHY can work as single
Gigabit Ethernet PHY (GMII) or as double Fast Ethernet PHY (MII).

Allowed port configurations:

xRX200:
GMAC0: RGMII, MII, REVMII or RMII port
GMAC1: RGMII, MII, REVMII or RMII port
GMAC2: GPHY0 (GMII)
GMAC3: GPHY0 (MII)
GMAC4: GPHY1 (GMII)
GMAC5: GPHY1 (MII) or RGMII port

xRX300:
GMAC0: RGMII port
GMAC1: GPHY2 (GMII)
GMAC2: GPHY0 (GMII)
GMAC3: GPHY0 (MII)
GMAC4: GPHY1 (GMII)
GMAC5: GPHY1 (MII) or RGMII port

xRX330:
GMAC0: RGMII, GMII or RMII port
GMAC1: GPHY2 (GMII)
GMAC2: GPHY0 (GMII)
GMAC3: GPHY0 (MII) or GPHY3 (GMII)
GMAC4: GPHY1 (GMII)
GMAC5: GPHY1 (MII), RGMII or RMII port

Tested on D-Link DWR966 (xRX330) with OpenWRT.

Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
Acked-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/lantiq_gswip.c | 142 ++++++++++++++++++++++++++++++++---------
 1 file changed, 113 insertions(+), 29 deletions(-)

diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c
index 52e865a3912c..4cab5cd26928 100644
--- a/drivers/net/dsa/lantiq_gswip.c
+++ b/drivers/net/dsa/lantiq_gswip.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Lantiq / Intel GSWIP switch driver for VRX200 SoCs
+ * Lantiq / Intel GSWIP switch driver for VRX200, xRX300 and xRX330 SoCs
  *
  * Copyright (C) 2010 Lantiq Deutschland
  * Copyright (C) 2012 John Crispin <john@phrozen.org>
@@ -100,6 +100,7 @@
 #define  GSWIP_MII_CFG_MODE_RMIIP	0x2
 #define  GSWIP_MII_CFG_MODE_RMIIM	0x3
 #define  GSWIP_MII_CFG_MODE_RGMII	0x4
+#define  GSWIP_MII_CFG_MODE_GMII	0x9
 #define  GSWIP_MII_CFG_MODE_MASK	0xf
 #define  GSWIP_MII_CFG_RATE_M2P5	0x00
 #define  GSWIP_MII_CFG_RATE_M25	0x10
@@ -220,6 +221,7 @@
 struct gswip_hw_info {
 	int max_ports;
 	int cpu_port;
+	const struct dsa_switch_ops *ops;
 };
 
 struct xway_gphy_match_data {
@@ -1384,12 +1386,42 @@ static int gswip_port_fdb_dump(struct dsa_switch *ds, int port,
 	return 0;
 }
 
-static void gswip_phylink_validate(struct dsa_switch *ds, int port,
-				   unsigned long *supported,
-				   struct phylink_link_state *state)
+static void gswip_phylink_set_capab(unsigned long *supported,
+				    struct phylink_link_state *state)
 {
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
 
+	/* Allow all the expected bits */
+	phylink_set(mask, Autoneg);
+	phylink_set_port_modes(mask);
+	phylink_set(mask, Pause);
+	phylink_set(mask, Asym_Pause);
+
+	/* With the exclusion of MII, Reverse MII and Reduced MII, we
+	 * support Gigabit, including Half duplex
+	 */
+	if (state->interface != PHY_INTERFACE_MODE_MII &&
+	    state->interface != PHY_INTERFACE_MODE_REVMII &&
+	    state->interface != PHY_INTERFACE_MODE_RMII) {
+		phylink_set(mask, 1000baseT_Full);
+		phylink_set(mask, 1000baseT_Half);
+	}
+
+	phylink_set(mask, 10baseT_Half);
+	phylink_set(mask, 10baseT_Full);
+	phylink_set(mask, 100baseT_Half);
+	phylink_set(mask, 100baseT_Full);
+
+	bitmap_and(supported, supported, mask,
+		   __ETHTOOL_LINK_MODE_MASK_NBITS);
+	bitmap_and(state->advertising, state->advertising, mask,
+		   __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static void gswip_xrx200_phylink_validate(struct dsa_switch *ds, int port,
+					  unsigned long *supported,
+					  struct phylink_link_state *state)
+{
 	switch (port) {
 	case 0:
 	case 1:
@@ -1416,38 +1448,54 @@ static void gswip_phylink_validate(struct dsa_switch *ds, int port,
 		return;
 	}
 
-	/* Allow all the expected bits */
-	phylink_set(mask, Autoneg);
-	phylink_set_port_modes(mask);
-	phylink_set(mask, Pause);
-	phylink_set(mask, Asym_Pause);
+	gswip_phylink_set_capab(supported, state);
 
-	/* With the exclusion of MII, Reverse MII and Reduced MII, we
-	 * support Gigabit, including Half duplex
-	 */
-	if (state->interface != PHY_INTERFACE_MODE_MII &&
-	    state->interface != PHY_INTERFACE_MODE_REVMII &&
-	    state->interface != PHY_INTERFACE_MODE_RMII) {
-		phylink_set(mask, 1000baseT_Full);
-		phylink_set(mask, 1000baseT_Half);
+	return;
+
+unsupported:
+	bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
+	dev_err(ds->dev, "Unsupported interface '%s' for port %d\n",
+		phy_modes(state->interface), port);
+}
+
+static void gswip_xrx300_phylink_validate(struct dsa_switch *ds, int port,
+					  unsigned long *supported,
+					  struct phylink_link_state *state)
+{
+	switch (port) {
+	case 0:
+		if (!phy_interface_mode_is_rgmii(state->interface) &&
+		    state->interface != PHY_INTERFACE_MODE_GMII &&
+		    state->interface != PHY_INTERFACE_MODE_RMII)
+			goto unsupported;
+		break;
+	case 1:
+	case 2:
+	case 3:
+	case 4:
+		if (state->interface != PHY_INTERFACE_MODE_INTERNAL)
+			goto unsupported;
+		break;
+	case 5:
+		if (!phy_interface_mode_is_rgmii(state->interface) &&
+		    state->interface != PHY_INTERFACE_MODE_INTERNAL &&
+		    state->interface != PHY_INTERFACE_MODE_RMII)
+			goto unsupported;
+		break;
+	default:
+		bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
+		dev_err(ds->dev, "Unsupported port: %i\n", port);
+		return;
 	}
 
-	phylink_set(mask, 10baseT_Half);
-	phylink_set(mask, 10baseT_Full);
-	phylink_set(mask, 100baseT_Half);
-	phylink_set(mask, 100baseT_Full);
+	gswip_phylink_set_capab(supported, state);
 
-	bitmap_and(supported, supported, mask,
-		   __ETHTOOL_LINK_MODE_MASK_NBITS);
-	bitmap_and(state->advertising, state->advertising, mask,
-		   __ETHTOOL_LINK_MODE_MASK_NBITS);
 	return;
 
 unsupported:
 	bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
 	dev_err(ds->dev, "Unsupported interface '%s' for port %d\n",
 		phy_modes(state->interface), port);
-	return;
 }
 
 static void gswip_phylink_mac_config(struct dsa_switch *ds, int port,
@@ -1476,6 +1524,9 @@ static void gswip_phylink_mac_config(struct dsa_switch *ds, int port,
 	case PHY_INTERFACE_MODE_RGMII_TXID:
 		miicfg |= GSWIP_MII_CFG_MODE_RGMII;
 		break;
+	case PHY_INTERFACE_MODE_GMII:
+		miicfg |= GSWIP_MII_CFG_MODE_GMII;
+		break;
 	default:
 		dev_err(ds->dev,
 			"Unsupported interface: %d\n", state->interface);
@@ -1588,7 +1639,7 @@ static int gswip_get_sset_count(struct dsa_switch *ds, int port, int sset)
 	return ARRAY_SIZE(gswip_rmon_cnt);
 }
 
-static const struct dsa_switch_ops gswip_switch_ops = {
+static const struct dsa_switch_ops gswip_xrx200_switch_ops = {
 	.get_tag_protocol	= gswip_get_tag_protocol,
 	.setup			= gswip_setup,
 	.port_enable		= gswip_port_enable,
@@ -1603,7 +1654,31 @@ static const struct dsa_switch_ops gswip_switch_ops = {
 	.port_fdb_add		= gswip_port_fdb_add,
 	.port_fdb_del		= gswip_port_fdb_del,
 	.port_fdb_dump		= gswip_port_fdb_dump,
-	.phylink_validate	= gswip_phylink_validate,
+	.phylink_validate	= gswip_xrx200_phylink_validate,
+	.phylink_mac_config	= gswip_phylink_mac_config,
+	.phylink_mac_link_down	= gswip_phylink_mac_link_down,
+	.phylink_mac_link_up	= gswip_phylink_mac_link_up,
+	.get_strings		= gswip_get_strings,
+	.get_ethtool_stats	= gswip_get_ethtool_stats,
+	.get_sset_count		= gswip_get_sset_count,
+};
+
+static const struct dsa_switch_ops gswip_xrx300_switch_ops = {
+	.get_tag_protocol	= gswip_get_tag_protocol,
+	.setup			= gswip_setup,
+	.port_enable		= gswip_port_enable,
+	.port_disable		= gswip_port_disable,
+	.port_bridge_join	= gswip_port_bridge_join,
+	.port_bridge_leave	= gswip_port_bridge_leave,
+	.port_fast_age		= gswip_port_fast_age,
+	.port_vlan_filtering	= gswip_port_vlan_filtering,
+	.port_vlan_add		= gswip_port_vlan_add,
+	.port_vlan_del		= gswip_port_vlan_del,
+	.port_stp_state_set	= gswip_port_stp_state_set,
+	.port_fdb_add		= gswip_port_fdb_add,
+	.port_fdb_del		= gswip_port_fdb_del,
+	.port_fdb_dump		= gswip_port_fdb_dump,
+	.phylink_validate	= gswip_xrx300_phylink_validate,
 	.phylink_mac_config	= gswip_phylink_mac_config,
 	.phylink_mac_link_down	= gswip_phylink_mac_link_down,
 	.phylink_mac_link_up	= gswip_phylink_mac_link_up,
@@ -1865,7 +1940,7 @@ static int gswip_probe(struct platform_device *pdev)
 	priv->ds->dev = dev;
 	priv->ds->num_ports = priv->hw_info->max_ports;
 	priv->ds->priv = priv;
-	priv->ds->ops = &gswip_switch_ops;
+	priv->ds->ops = priv->hw_info->ops;
 	priv->dev = dev;
 	version = gswip_switch_r(priv, GSWIP_VERSION);
 
@@ -1946,10 +2021,19 @@ static int gswip_remove(struct platform_device *pdev)
 static const struct gswip_hw_info gswip_xrx200 = {
 	.max_ports = 7,
 	.cpu_port = 6,
+	.ops = &gswip_xrx200_switch_ops,
+};
+
+static const struct gswip_hw_info gswip_xrx300 = {
+	.max_ports = 7,
+	.cpu_port = 6,
+	.ops = &gswip_xrx300_switch_ops,
 };
 
 static const struct of_device_id gswip_of_match[] = {
 	{ .compatible = "lantiq,xrx200-gswip", .data = &gswip_xrx200 },
+	{ .compatible = "lantiq,xrx300-gswip", .data = &gswip_xrx300 },
+	{ .compatible = "lantiq,xrx330-gswip", .data = &gswip_xrx300 },
 	{},
 };
 MODULE_DEVICE_TABLE(of, gswip_of_match);
-- 
cgit v1.2.3


From 204c7614738ee4b3fe6269950d367212e790498c Mon Sep 17 00:00:00 2001
From: Aleksander Jan Bajkowski <olek2@wp.pl>
Date: Mon, 22 Mar 2021 21:37:16 +0100
Subject: net: dsa: lantiq: verify compatible strings against hardware

Verify compatible string against hardware.

Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
Acked-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/lantiq_gswip.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c
index 4cab5cd26928..26d0e3bb5dea 100644
--- a/drivers/net/dsa/lantiq_gswip.c
+++ b/drivers/net/dsa/lantiq_gswip.c
@@ -1907,7 +1907,7 @@ remove_gphy:
 static int gswip_probe(struct platform_device *pdev)
 {
 	struct gswip_priv *priv;
-	struct device_node *mdio_np, *gphy_fw_np;
+	struct device_node *np, *mdio_np, *gphy_fw_np;
 	struct device *dev = &pdev->dev;
 	int err;
 	int i;
@@ -1944,6 +1944,24 @@ static int gswip_probe(struct platform_device *pdev)
 	priv->dev = dev;
 	version = gswip_switch_r(priv, GSWIP_VERSION);
 
+	np = dev->of_node;
+	switch (version) {
+	case GSWIP_VERSION_2_0:
+	case GSWIP_VERSION_2_1:
+		if (!of_device_is_compatible(np, "lantiq,xrx200-gswip"))
+			return -EINVAL;
+		break;
+	case GSWIP_VERSION_2_2:
+	case GSWIP_VERSION_2_2_ETC:
+		if (!of_device_is_compatible(np, "lantiq,xrx300-gswip") &&
+		    !of_device_is_compatible(np, "lantiq,xrx330-gswip"))
+			return -EINVAL;
+		break;
+	default:
+		dev_err(dev, "unknown GSWIP version: 0x%x", version);
+		return -ENOENT;
+	}
+
 	/* bring up the mdio bus */
 	gphy_fw_np = of_get_compatible_child(dev->of_node, "lantiq,gphy-fw");
 	if (gphy_fw_np) {
-- 
cgit v1.2.3


From ee83d82407e490ece848b13a4d86600e1e3269a5 Mon Sep 17 00:00:00 2001
From: Aleksander Jan Bajkowski <olek2@wp.pl>
Date: Mon, 22 Mar 2021 21:37:17 +0100
Subject: dt-bindings: net: dsa: lantiq: add xRx300 and xRX330 switch bindings

Add compatible string for xRX300 and xRX330 SoCs.

Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/dsa/lantiq-gswip.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/dsa/lantiq-gswip.txt b/Documentation/devicetree/bindings/net/dsa/lantiq-gswip.txt
index 886cbe8ffb38..e3829d3e480e 100644
--- a/Documentation/devicetree/bindings/net/dsa/lantiq-gswip.txt
+++ b/Documentation/devicetree/bindings/net/dsa/lantiq-gswip.txt
@@ -5,6 +5,10 @@ Required properties for GSWIP core:
 
 - compatible	: "lantiq,xrx200-gswip" for the embedded GSWIP in the
 		  xRX200 SoC
+		  "lantiq,xrx300-gswip" for the embedded GSWIP in the
+		  xRX300 SoC
+		  "lantiq,xrx330-gswip" for the embedded GSWIP in the
+		  xRX330 SoC
 - reg		: memory range of the GSWIP core registers
 		: memory range of the GSWIP MDIO registers
 		: memory range of the GSWIP MII registers
-- 
cgit v1.2.3


From c7e856c85981722013cbdfa8f5324c2ad8c803f4 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Mon, 22 Mar 2021 22:58:54 +0200
Subject: dpaa2-switch: move the dpaa2_switch_fdb_set_egress_flood function

In order to avoid a forward declaration in the next patches, move the
dpaa2_switch_fdb_set_egress_flood() function to the top of the file.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/freescale/dpaa2/dpaa2-switch.c    | 84 +++++++++++-----------
 1 file changed, 42 insertions(+), 42 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 2fd05dd18d46..5254eae5c86a 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -110,6 +110,48 @@ static u16 dpaa2_switch_port_set_fdb(struct ethsw_port_priv *port_priv,
 	return 0;
 }
 
+static int dpaa2_switch_fdb_set_egress_flood(struct ethsw_core *ethsw, u16 fdb_id)
+{
+	struct dpsw_egress_flood_cfg flood_cfg;
+	int i = 0, j;
+	int err;
+
+	/* Add all the DPAA2 switch ports found in the same bridging domain to
+	 * the egress flooding domain
+	 */
+	for (j = 0; j < ethsw->sw_attr.num_ifs; j++)
+		if (ethsw->ports[j] && ethsw->ports[j]->fdb->fdb_id == fdb_id)
+			flood_cfg.if_id[i++] = ethsw->ports[j]->idx;
+
+	/* Add the CTRL interface to the egress flooding domain */
+	flood_cfg.if_id[i++] = ethsw->sw_attr.num_ifs;
+
+	/* Use the FDB of the first dpaa2 switch port added to the bridge */
+	flood_cfg.fdb_id = fdb_id;
+
+	/* Setup broadcast flooding domain */
+	flood_cfg.flood_type = DPSW_BROADCAST;
+	flood_cfg.num_ifs = i;
+	err = dpsw_set_egress_flood(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				    &flood_cfg);
+	if (err) {
+		dev_err(ethsw->dev, "dpsw_set_egress_flood() = %d\n", err);
+		return err;
+	}
+
+	/* Setup unknown flooding domain */
+	flood_cfg.flood_type = DPSW_FLOODING;
+	flood_cfg.num_ifs = i;
+	err = dpsw_set_egress_flood(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				    &flood_cfg);
+	if (err) {
+		dev_err(ethsw->dev, "dpsw_set_egress_flood() = %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
 static void *dpaa2_iova_to_virt(struct iommu_domain *domain,
 				dma_addr_t iova_addr)
 {
@@ -1442,48 +1484,6 @@ static int dpaa2_switch_port_attr_set_event(struct net_device *netdev,
 	return notifier_from_errno(err);
 }
 
-static int dpaa2_switch_fdb_set_egress_flood(struct ethsw_core *ethsw, u16 fdb_id)
-{
-	struct dpsw_egress_flood_cfg flood_cfg;
-	int i = 0, j;
-	int err;
-
-	/* Add all the DPAA2 switch ports found in the same bridging domain to
-	 * the egress flooding domain
-	 */
-	for (j = 0; j < ethsw->sw_attr.num_ifs; j++)
-		if (ethsw->ports[j] && ethsw->ports[j]->fdb->fdb_id == fdb_id)
-			flood_cfg.if_id[i++] = ethsw->ports[j]->idx;
-
-	/* Add the CTRL interface to the egress flooding domain */
-	flood_cfg.if_id[i++] = ethsw->sw_attr.num_ifs;
-
-	/* Use the FDB of the first dpaa2 switch port added to the bridge */
-	flood_cfg.fdb_id = fdb_id;
-
-	/* Setup broadcast flooding domain */
-	flood_cfg.flood_type = DPSW_BROADCAST;
-	flood_cfg.num_ifs = i;
-	err = dpsw_set_egress_flood(ethsw->mc_io, 0, ethsw->dpsw_handle,
-				    &flood_cfg);
-	if (err) {
-		dev_err(ethsw->dev, "dpsw_set_egress_flood() = %d\n", err);
-		return err;
-	}
-
-	/* Setup unknown flooding domain */
-	flood_cfg.flood_type = DPSW_FLOODING;
-	flood_cfg.num_ifs = i;
-	err = dpsw_set_egress_flood(ethsw->mc_io, 0, ethsw->dpsw_handle,
-				    &flood_cfg);
-	if (err) {
-		dev_err(ethsw->dev, "dpsw_set_egress_flood() = %d\n", err);
-		return err;
-	}
-
-	return 0;
-}
-
 static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
 					 struct net_device *upper_dev)
 {
-- 
cgit v1.2.3


From f054e3e217e40ed343a0cea8c68cc053d40f81bd Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Mon, 22 Mar 2021 22:58:55 +0200
Subject: dpaa2-switch: refactor the egress flooding domain setup

Extract the code that determines the list of egress flood interfaces for
a specific flood type into a new function -
dpaa2_switch_fdb_get_flood_cfg().

This will help us to not duplicate code when the broadcast and
unknown ucast/mcast flooding domains will be individually configurable.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/freescale/dpaa2/dpaa2-switch.c    | 38 ++++++++++++++--------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 5254eae5c86a..2db9cd78201d 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -110,28 +110,41 @@ static u16 dpaa2_switch_port_set_fdb(struct ethsw_port_priv *port_priv,
 	return 0;
 }
 
-static int dpaa2_switch_fdb_set_egress_flood(struct ethsw_core *ethsw, u16 fdb_id)
+static void dpaa2_switch_fdb_get_flood_cfg(struct ethsw_core *ethsw, u16 fdb_id,
+					   enum dpsw_flood_type type,
+					   struct dpsw_egress_flood_cfg *cfg)
 {
-	struct dpsw_egress_flood_cfg flood_cfg;
 	int i = 0, j;
-	int err;
+
+	memset(cfg, 0, sizeof(*cfg));
 
 	/* Add all the DPAA2 switch ports found in the same bridging domain to
 	 * the egress flooding domain
 	 */
-	for (j = 0; j < ethsw->sw_attr.num_ifs; j++)
-		if (ethsw->ports[j] && ethsw->ports[j]->fdb->fdb_id == fdb_id)
-			flood_cfg.if_id[i++] = ethsw->ports[j]->idx;
+	for (j = 0; j < ethsw->sw_attr.num_ifs; j++) {
+		if (!ethsw->ports[j])
+			continue;
+		if (ethsw->ports[j]->fdb->fdb_id != fdb_id)
+			continue;
+
+		cfg->if_id[i++] = ethsw->ports[j]->idx;
+	}
 
 	/* Add the CTRL interface to the egress flooding domain */
-	flood_cfg.if_id[i++] = ethsw->sw_attr.num_ifs;
+	cfg->if_id[i++] = ethsw->sw_attr.num_ifs;
+
+	cfg->fdb_id = fdb_id;
+	cfg->flood_type = type;
+	cfg->num_ifs = i;
+}
 
-	/* Use the FDB of the first dpaa2 switch port added to the bridge */
-	flood_cfg.fdb_id = fdb_id;
+static int dpaa2_switch_fdb_set_egress_flood(struct ethsw_core *ethsw, u16 fdb_id)
+{
+	struct dpsw_egress_flood_cfg flood_cfg;
+	int err;
 
 	/* Setup broadcast flooding domain */
-	flood_cfg.flood_type = DPSW_BROADCAST;
-	flood_cfg.num_ifs = i;
+	dpaa2_switch_fdb_get_flood_cfg(ethsw, fdb_id, DPSW_BROADCAST, &flood_cfg);
 	err = dpsw_set_egress_flood(ethsw->mc_io, 0, ethsw->dpsw_handle,
 				    &flood_cfg);
 	if (err) {
@@ -140,8 +153,7 @@ static int dpaa2_switch_fdb_set_egress_flood(struct ethsw_core *ethsw, u16 fdb_i
 	}
 
 	/* Setup unknown flooding domain */
-	flood_cfg.flood_type = DPSW_FLOODING;
-	flood_cfg.num_ifs = i;
+	dpaa2_switch_fdb_get_flood_cfg(ethsw, fdb_id, DPSW_FLOODING, &flood_cfg);
 	err = dpsw_set_egress_flood(ethsw->mc_io, 0, ethsw->dpsw_handle,
 				    &flood_cfg);
 	if (err) {
-- 
cgit v1.2.3


From 1e7cbabfdb12aa944ae0cb03871f8b55ede1341a Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Mon, 22 Mar 2021 22:58:56 +0200
Subject: dpaa2-switch: add support for configuring learning state per port

Add support for configuring the learning state of a switch port.
When the user requests the HW learning to be disabled, a fast-age
procedure on that specific port is run so that previously learnt
addresses do not linger.

At device probe as well as on a bridge leave action, the ports are
configured with HW learning disabled since they are basically a
standalone port.

At the same time, at bridge join we inherit the bridge port BR_LEARNING
flag state and configure it on the switch port.

There were already some MC firmware ABI functions for changing the
learning state, but those were per FDB (bridging domain) and not per
port so we need to adjust those to use the new MC fw command which is
per port.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/freescale/dpaa2/dpaa2-switch.c    | 70 ++++++++++++++++++++++
 drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h    | 10 ++++
 drivers/net/ethernet/freescale/dpaa2/dpsw.c        | 27 +++++++++
 drivers/net/ethernet/freescale/dpaa2/dpsw.h        | 25 ++++----
 4 files changed, 121 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 2db9cd78201d..2ea3a4dac49d 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -1238,6 +1238,56 @@ static int dpaa2_switch_port_attr_stp_state_set(struct net_device *netdev,
 	return dpaa2_switch_port_set_stp_state(port_priv, state);
 }
 
+static int dpaa2_switch_port_set_learning(struct ethsw_port_priv *port_priv, bool enable)
+{
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	enum dpsw_learning_mode learn_mode;
+	int err;
+
+	if (enable)
+		learn_mode = DPSW_LEARNING_MODE_HW;
+	else
+		learn_mode = DPSW_LEARNING_MODE_DIS;
+
+	err = dpsw_if_set_learning_mode(ethsw->mc_io, 0, ethsw->dpsw_handle,
+					port_priv->idx, learn_mode);
+	if (err)
+		netdev_err(port_priv->netdev, "dpsw_if_set_learning_mode err %d\n", err);
+
+	if (!enable)
+		dpaa2_switch_port_fast_age(port_priv);
+
+	return err;
+}
+
+static int dpaa2_switch_port_pre_bridge_flags(struct net_device *netdev,
+					      struct switchdev_brport_flags flags,
+					      struct netlink_ext_ack *extack)
+{
+	if (flags.mask & ~(BR_LEARNING))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int dpaa2_switch_port_bridge_flags(struct net_device *netdev,
+					  struct switchdev_brport_flags flags,
+					  struct netlink_ext_ack *extack)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	int err;
+
+	if (flags.mask & BR_LEARNING) {
+		bool learn_ena = !!(flags.val & BR_LEARNING);
+
+		err = dpaa2_switch_port_set_learning(port_priv, learn_ena);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int dpaa2_switch_port_attr_set(struct net_device *netdev,
 				      const struct switchdev_attr *attr,
 				      struct netlink_ext_ack *extack)
@@ -1256,6 +1306,12 @@ static int dpaa2_switch_port_attr_set(struct net_device *netdev,
 			return -EOPNOTSUPP;
 		}
 		break;
+	case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS:
+		err = dpaa2_switch_port_pre_bridge_flags(netdev, attr->u.brport_flags, extack);
+		break;
+	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
+		err = dpaa2_switch_port_bridge_flags(netdev, attr->u.brport_flags, extack);
+		break;
 	default:
 		err = -EOPNOTSUPP;
 		break;
@@ -1504,6 +1560,7 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
 	struct ethsw_port_priv *other_port_priv;
 	struct net_device *other_dev;
 	struct list_head *iter;
+	bool learn_ena;
 	int err;
 
 	netdev_for_each_lower_dev(upper_dev, other_dev, iter) {
@@ -1525,6 +1582,10 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
 
 	dpaa2_switch_port_set_fdb(port_priv, upper_dev);
 
+	/* Inherit the initial bridge port learning state */
+	learn_ena = br_port_flag_is_set(netdev, BR_LEARNING);
+	err = dpaa2_switch_port_set_learning(port_priv, learn_ena);
+
 	/* Setup the egress flood policy (broadcast, unknown unicast) */
 	err = dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id);
 	if (err)
@@ -1595,6 +1656,11 @@ static int dpaa2_switch_port_bridge_leave(struct net_device *netdev)
 	if (err)
 		return err;
 
+	/* No HW learning when not under a bridge */
+	err = dpaa2_switch_port_set_learning(port_priv, false);
+	if (err)
+		return err;
+
 	/* Add the VLAN 1 as PVID when not under a bridge. We need this since
 	 * the dpaa2 switch interfaces are not capable to be VLAN unaware
 	 */
@@ -2684,6 +2750,10 @@ static int dpaa2_switch_probe_port(struct ethsw_core *ethsw,
 	if (err)
 		goto err_port_probe;
 
+	err = dpaa2_switch_port_set_learning(port_priv, false);
+	if (err)
+		goto err_port_probe;
+
 	return 0;
 
 err_port_probe:
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
index 996a59dcd01d..24b17d6e09af 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
@@ -83,6 +83,7 @@
 #define DPSW_CMDID_CTRL_IF_SET_QUEUE        DPSW_CMD_ID(0x0A6)
 
 #define DPSW_CMDID_SET_EGRESS_FLOOD         DPSW_CMD_ID(0x0AC)
+#define DPSW_CMDID_IF_SET_LEARNING_MODE     DPSW_CMD_ID(0x0AD)
 
 /* Macros for accessing command fields smaller than 1byte */
 #define DPSW_MASK(field)        \
@@ -447,5 +448,14 @@ struct dpsw_cmd_set_egress_flood {
 	u8 pad[5];
 	__le64 if_id;
 };
+
+#define DPSW_LEARNING_MODE_SHIFT	0
+#define DPSW_LEARNING_MODE_SIZE		4
+
+struct dpsw_cmd_if_set_learning_mode {
+	__le16 if_id;
+	/* only the first 4 bits from LSB */
+	u8 mode;
+};
 #pragma pack(pop)
 #endif /* __FSL_DPSW_CMD_H */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.c b/drivers/net/ethernet/freescale/dpaa2/dpsw.c
index 56f3c23fce07..6c787d4b85f9 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.c
@@ -1327,3 +1327,30 @@ int dpsw_set_egress_flood(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
 
 	return mc_send_command(mc_io, &cmd);
 }
+
+/**
+ * dpsw_if_set_learning_mode() - Configure the learning mode on an interface.
+ * If this API is used, it will take precedence over the FDB configuration.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @if_id:	InterfaceID
+ * @mode:	Learning mode
+ *
+ * Return:	Completion status. '0' on Success; Error code otherwise.
+ */
+int dpsw_if_set_learning_mode(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			      u16 if_id, enum dpsw_learning_mode mode)
+{
+	struct dpsw_cmd_if_set_learning_mode *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_LEARNING_MODE,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_if_set_learning_mode *)cmd.params;
+	cmd_params->if_id = cpu_to_le16(if_id);
+	dpsw_set_field(cmd_params->mode, LEARNING_MODE, mode);
+
+	return mc_send_command(mc_io, &cmd);
+}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.h b/drivers/net/ethernet/freescale/dpaa2/dpsw.h
index f108cc61bb27..96837b10cc94 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.h
@@ -532,11 +532,11 @@ int dpsw_fdb_remove_multicast(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
 			      u16 fdb_id, const struct dpsw_fdb_multicast_cfg *cfg);
 
 /**
- * enum dpsw_fdb_learning_mode - Auto-learning modes
- * @DPSW_FDB_LEARNING_MODE_DIS: Disable Auto-learning
- * @DPSW_FDB_LEARNING_MODE_HW: Enable HW auto-Learning
- * @DPSW_FDB_LEARNING_MODE_NON_SECURE: Enable None secure learning by CPU
- * @DPSW_FDB_LEARNING_MODE_SECURE: Enable secure learning by CPU
+ * enum dpsw_learning_mode - Auto-learning modes
+ * @DPSW_LEARNING_MODE_DIS: Disable Auto-learning
+ * @DPSW_LEARNING_MODE_HW: Enable HW auto-Learning
+ * @DPSW_LEARNING_MODE_NON_SECURE: Enable None secure learning by CPU
+ * @DPSW_LEARNING_MODE_SECURE: Enable secure learning by CPU
  *
  *	NONE - SECURE LEARNING
  *	SMAC found	DMAC found	CTLU Action
@@ -561,11 +561,11 @@ int dpsw_fdb_remove_multicast(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
  *	-		-		Forward frame to
  *						1.  Control interface
  */
-enum dpsw_fdb_learning_mode {
-	DPSW_FDB_LEARNING_MODE_DIS = 0,
-	DPSW_FDB_LEARNING_MODE_HW = 1,
-	DPSW_FDB_LEARNING_MODE_NON_SECURE = 2,
-	DPSW_FDB_LEARNING_MODE_SECURE = 3
+enum dpsw_learning_mode {
+	DPSW_LEARNING_MODE_DIS = 0,
+	DPSW_LEARNING_MODE_HW = 1,
+	DPSW_LEARNING_MODE_NON_SECURE = 2,
+	DPSW_LEARNING_MODE_SECURE = 3
 };
 
 /**
@@ -579,7 +579,7 @@ enum dpsw_fdb_learning_mode {
 struct dpsw_fdb_attr {
 	u16 max_fdb_entries;
 	u16 fdb_ageing_time;
-	enum dpsw_fdb_learning_mode learning_mode;
+	enum dpsw_learning_mode learning_mode;
 	u16 num_fdb_mc_groups;
 	u16 max_fdb_mc_groups;
 };
@@ -625,4 +625,7 @@ struct dpsw_egress_flood_cfg {
 int dpsw_set_egress_flood(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
 			  const struct dpsw_egress_flood_cfg *cfg);
 
+int dpsw_if_set_learning_mode(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			      u16 if_id, enum dpsw_learning_mode mode);
+
 #endif /* __FSL_DPSW_H */
-- 
cgit v1.2.3


From b54eb093f5ce784ca00170d4512c47cdc755397e Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Mon, 22 Mar 2021 22:58:57 +0200
Subject: dpaa2-switch: add support for configuring per port broadcast flooding

The BR_BCAST_FLOOD bridge port flag is now accepted by the driver and a
change in its state will determine a reconfiguration of the broadcast
egress flooding list on the FDB associated with the port.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/freescale/dpaa2/dpaa2-switch.c    | 32 ++++++++++++++++++++--
 .../net/ethernet/freescale/dpaa2/dpaa2-switch.h    |  2 +-
 2 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 2ea3a4dac49d..2ae4faf81b1f 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -127,7 +127,10 @@ static void dpaa2_switch_fdb_get_flood_cfg(struct ethsw_core *ethsw, u16 fdb_id,
 		if (ethsw->ports[j]->fdb->fdb_id != fdb_id)
 			continue;
 
-		cfg->if_id[i++] = ethsw->ports[j]->idx;
+		if (type == DPSW_BROADCAST && ethsw->ports[j]->bcast_flood)
+			cfg->if_id[i++] = ethsw->ports[j]->idx;
+		else if (type == DPSW_FLOODING)
+			cfg->if_id[i++] = ethsw->ports[j]->idx;
 	}
 
 	/* Add the CTRL interface to the egress flooding domain */
@@ -1260,11 +1263,22 @@ static int dpaa2_switch_port_set_learning(struct ethsw_port_priv *port_priv, boo
 	return err;
 }
 
+static int dpaa2_switch_port_flood(struct ethsw_port_priv *port_priv,
+				   struct switchdev_brport_flags flags)
+{
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+
+	if (flags.mask & BR_BCAST_FLOOD)
+		port_priv->bcast_flood = !!(flags.val & BR_BCAST_FLOOD);
+
+	return dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id);
+}
+
 static int dpaa2_switch_port_pre_bridge_flags(struct net_device *netdev,
 					      struct switchdev_brport_flags flags,
 					      struct netlink_ext_ack *extack)
 {
-	if (flags.mask & ~(BR_LEARNING))
+	if (flags.mask & ~(BR_LEARNING | BR_BCAST_FLOOD))
 		return -EINVAL;
 
 	return 0;
@@ -1285,6 +1299,12 @@ static int dpaa2_switch_port_bridge_flags(struct net_device *netdev,
 			return err;
 	}
 
+	if (flags.mask & BR_BCAST_FLOOD) {
+		err = dpaa2_switch_port_flood(port_priv, flags);
+		if (err)
+			return err;
+	}
+
 	return 0;
 }
 
@@ -1643,6 +1663,12 @@ static int dpaa2_switch_port_bridge_leave(struct net_device *netdev)
 	if (err)
 		netdev_err(netdev, "Unable to restore RX VLANs to the new FDB, err (%d)\n", err);
 
+	/* Reset the flooding state to denote that this port can send any
+	 * packet in standalone mode. With this, we are also ensuring that any
+	 * later bridge join will have the flooding flag on.
+	 */
+	port_priv->bcast_flood = true;
+
 	/* Setup the egress flood policy (broadcast, unknown unicast).
 	 * When the port is not under a bridge, only the CTRL interface is part
 	 * of the flooding domain besides the actual port
@@ -2728,6 +2754,8 @@ static int dpaa2_switch_probe_port(struct ethsw_core *ethsw,
 
 	port_netdev->needed_headroom = DPAA2_SWITCH_NEEDED_HEADROOM;
 
+	port_priv->bcast_flood = true;
+
 	/* Set MTU limits */
 	port_netdev->min_mtu = ETH_MIN_MTU;
 	port_netdev->max_mtu = ETHSW_MAX_FRAME_LENGTH;
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
index 933563064015..65ede6036870 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
@@ -105,13 +105,13 @@ struct ethsw_port_priv {
 	struct ethsw_core	*ethsw_data;
 	u8			link_state;
 	u8			stp_state;
-	bool			flood;
 
 	u8			vlans[VLAN_VID_MASK + 1];
 	u16			pvid;
 	u16			tx_qdid;
 
 	struct dpaa2_switch_fdb	*fdb;
+	bool			bcast_flood;
 };
 
 /* Switch data */
-- 
cgit v1.2.3


From 6253d5e39ce2bba13c601274768c481846526a80 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Mon, 22 Mar 2021 22:58:58 +0200
Subject: dpaa2-switch: add support for configuring per port unknown flooding

Add support for configuring per port unknown flooding by accepting both
BR_FLOOD and BR_MCAST_FLOOD as offloadable bridge port flags.

The DPAA2 switch does not support at the moment configuration of unknown
multicast flooding independently of unknown unicast flooding, therefore
check that both BR_FLOOD and BR_MCAST_FLOOD have the same state.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/freescale/dpaa2/dpaa2-switch.c    | 23 +++++++++++++++++++---
 .../net/ethernet/freescale/dpaa2/dpaa2-switch.h    |  1 +
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 2ae4faf81b1f..9f1a59219435 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -129,7 +129,7 @@ static void dpaa2_switch_fdb_get_flood_cfg(struct ethsw_core *ethsw, u16 fdb_id,
 
 		if (type == DPSW_BROADCAST && ethsw->ports[j]->bcast_flood)
 			cfg->if_id[i++] = ethsw->ports[j]->idx;
-		else if (type == DPSW_FLOODING)
+		else if (type == DPSW_FLOODING && ethsw->ports[j]->ucast_flood)
 			cfg->if_id[i++] = ethsw->ports[j]->idx;
 	}
 
@@ -1271,6 +1271,9 @@ static int dpaa2_switch_port_flood(struct ethsw_port_priv *port_priv,
 	if (flags.mask & BR_BCAST_FLOOD)
 		port_priv->bcast_flood = !!(flags.val & BR_BCAST_FLOOD);
 
+	if (flags.mask & BR_FLOOD)
+		port_priv->ucast_flood = !!(flags.val & BR_FLOOD);
+
 	return dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id);
 }
 
@@ -1278,9 +1281,21 @@ static int dpaa2_switch_port_pre_bridge_flags(struct net_device *netdev,
 					      struct switchdev_brport_flags flags,
 					      struct netlink_ext_ack *extack)
 {
-	if (flags.mask & ~(BR_LEARNING | BR_BCAST_FLOOD))
+	if (flags.mask & ~(BR_LEARNING | BR_BCAST_FLOOD | BR_FLOOD |
+			   BR_MCAST_FLOOD))
 		return -EINVAL;
 
+	if (flags.mask & (BR_FLOOD | BR_MCAST_FLOOD)) {
+		bool multicast = !!(flags.val & BR_MCAST_FLOOD);
+		bool unicast = !!(flags.val & BR_FLOOD);
+
+		if (unicast != multicast) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Cannot configure multicast flooding independently of unicast");
+			return -EINVAL;
+		}
+	}
+
 	return 0;
 }
 
@@ -1299,7 +1314,7 @@ static int dpaa2_switch_port_bridge_flags(struct net_device *netdev,
 			return err;
 	}
 
-	if (flags.mask & BR_BCAST_FLOOD) {
+	if (flags.mask & (BR_BCAST_FLOOD | BR_FLOOD | BR_MCAST_FLOOD)) {
 		err = dpaa2_switch_port_flood(port_priv, flags);
 		if (err)
 			return err;
@@ -1668,6 +1683,7 @@ static int dpaa2_switch_port_bridge_leave(struct net_device *netdev)
 	 * later bridge join will have the flooding flag on.
 	 */
 	port_priv->bcast_flood = true;
+	port_priv->ucast_flood = true;
 
 	/* Setup the egress flood policy (broadcast, unknown unicast).
 	 * When the port is not under a bridge, only the CTRL interface is part
@@ -2755,6 +2771,7 @@ static int dpaa2_switch_probe_port(struct ethsw_core *ethsw,
 	port_netdev->needed_headroom = DPAA2_SWITCH_NEEDED_HEADROOM;
 
 	port_priv->bcast_flood = true;
+	port_priv->ucast_flood = true;
 
 	/* Set MTU limits */
 	port_netdev->min_mtu = ETH_MIN_MTU;
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
index 65ede6036870..549218994243 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
@@ -112,6 +112,7 @@ struct ethsw_port_priv {
 
 	struct dpaa2_switch_fdb	*fdb;
 	bool			bcast_flood;
+	bool			ucast_flood;
 };
 
 /* Switch data */
-- 
cgit v1.2.3


From b175dfd7e691ba264d190f23197cc29d0ba8bc67 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Mon, 22 Mar 2021 22:58:59 +0200
Subject: dpaa2-switch: mark skbs with offload_fwd_mark

If a switch port is under a bridge, the offload_fwd_mark should be setup
before sending the skb towards the stack so that the bridge does not try
to flood the packet on the other switch ports.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 9f1a59219435..a9b30a72ddad 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -2005,6 +2005,9 @@ static void dpaa2_switch_rx(struct dpaa2_switch_fq *fq,
 	skb->dev = netdev;
 	skb->protocol = eth_type_trans(skb, skb->dev);
 
+	/* Setup the offload_fwd_mark only if the port is under a bridge */
+	skb->offload_fwd_mark = !!(port_priv->fdb->bridge_dev);
+
 	netif_receive_skb(skb);
 
 	return;
-- 
cgit v1.2.3


From add2d73631070c951b0de81a01d1463a15cfbd47 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 22 Mar 2021 11:21:45 -0700
Subject: net: set initial device refcount to 1

When adding CONFIG_PCPU_DEV_REFCNT, I forgot that the
initial net device refcount was 0.

When CONFIG_PCPU_DEV_REFCNT is not set, this means
the first dev_hold() triggers an illegal refcount
operation (addition on 0)

refcount_t: addition on 0; use-after-free.
WARNING: CPU: 0 PID: 1 at lib/refcount.c:25 refcount_warn_saturate+0x128/0x1a4

Fix is to change initial (and final) refcount to be 1.

Also add a missing kerneldoc piece, as reported by
Stephen Rothwell.

Fixes: 919067cc845f ("net: add CONFIG_PCPU_DEV_REFCNT")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Guenter Roeck <groeck@google.com>
Tested-by: Guenter Roeck <groeck@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 net/core/dev.c            | 9 ++++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e4a503288d9b..7005ad80e8d1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1792,6 +1792,7 @@ enum netdev_ml_priv_type {
  *
  *	@proto_down_reason:	reason a netdev interface is held down
  *	@pcpu_refcnt:		Number of references to this device
+ *	@dev_refcnt:		Number of references to this device
  *	@todo_list:		Delayed register/unregister
  *	@link_watch_list:	XXX: need comments on this one
  *
diff --git a/net/core/dev.c b/net/core/dev.c
index ffab3928eeeb..c9a496f5e687 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10346,7 +10346,7 @@ static void netdev_wait_allrefs(struct net_device *dev)
 	rebroadcast_time = warning_time = jiffies;
 	refcnt = netdev_refcnt_read(dev);
 
-	while (refcnt != 0) {
+	while (refcnt != 1) {
 		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
 			rtnl_lock();
 
@@ -10383,7 +10383,7 @@ static void netdev_wait_allrefs(struct net_device *dev)
 
 		refcnt = netdev_refcnt_read(dev);
 
-		if (refcnt && time_after(jiffies, warning_time + 10 * HZ)) {
+		if (refcnt != 1 && time_after(jiffies, warning_time + 10 * HZ)) {
 			pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
 				 dev->name, refcnt);
 			warning_time = jiffies;
@@ -10459,7 +10459,7 @@ void netdev_run_todo(void)
 		netdev_wait_allrefs(dev);
 
 		/* paranoia */
-		BUG_ON(netdev_refcnt_read(dev));
+		BUG_ON(netdev_refcnt_read(dev) != 1);
 		BUG_ON(!list_empty(&dev->ptype_all));
 		BUG_ON(!list_empty(&dev->ptype_specific));
 		WARN_ON(rcu_access_pointer(dev->ip_ptr));
@@ -10680,6 +10680,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	dev->pcpu_refcnt = alloc_percpu(int);
 	if (!dev->pcpu_refcnt)
 		goto free_dev;
+	dev_hold(dev);
+#else
+	refcount_set(&dev->dev_refcnt, 1);
 #endif
 
 	if (dev_addr_init(dev))
-- 
cgit v1.2.3


From 1ab568e92bf8f6a359c977869dc546a23a6b5f13 Mon Sep 17 00:00:00 2001
From: Kurt Kanzenbach <kurt@kmk-computers.de>
Date: Mon, 22 Mar 2021 19:51:13 +0100
Subject: net: dsa: hellcreek: Report switch name and ID

Report the driver name, ASIC ID and the switch name via devlink. This is a
useful information for user space tooling.

Signed-off-by: Kurt Kanzenbach <kurt@kmk-computers.de>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/hirschmann/hellcreek.c             | 18 ++++++++++++++++++
 include/linux/platform_data/hirschmann-hellcreek.h |  1 +
 2 files changed, 19 insertions(+)

diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c
index 64a73dd045c0..918be7eb626f 100644
--- a/drivers/net/dsa/hirschmann/hellcreek.c
+++ b/drivers/net/dsa/hirschmann/hellcreek.c
@@ -1082,6 +1082,22 @@ out:
 	return ret;
 }
 
+static int hellcreek_devlink_info_get(struct dsa_switch *ds,
+				      struct devlink_info_req *req,
+				      struct netlink_ext_ack *extack)
+{
+	struct hellcreek *hellcreek = ds->priv;
+	int ret;
+
+	ret = devlink_info_driver_name_put(req, "hellcreek");
+	if (ret)
+		return ret;
+
+	return devlink_info_version_fixed_put(req,
+					      DEVLINK_INFO_VERSION_GENERIC_ASIC_ID,
+					      hellcreek->pdata->name);
+}
+
 static u64 hellcreek_devlink_vlan_table_get(void *priv)
 {
 	struct hellcreek *hellcreek = priv;
@@ -1732,6 +1748,7 @@ static int hellcreek_port_setup_tc(struct dsa_switch *ds, int port,
 }
 
 static const struct dsa_switch_ops hellcreek_ds_ops = {
+	.devlink_info_get      = hellcreek_devlink_info_get,
 	.get_ethtool_stats     = hellcreek_get_ethtool_stats,
 	.get_sset_count	       = hellcreek_get_sset_count,
 	.get_strings	       = hellcreek_get_strings,
@@ -1909,6 +1926,7 @@ static int hellcreek_remove(struct platform_device *pdev)
 }
 
 static const struct hellcreek_platform_data de1soc_r1_pdata = {
+	.name		 = "r4c30",
 	.num_ports	 = 4,
 	.is_100_mbits	 = 1,
 	.qbv_support	 = 1,
diff --git a/include/linux/platform_data/hirschmann-hellcreek.h b/include/linux/platform_data/hirschmann-hellcreek.h
index 388846766bb2..6a000df5541f 100644
--- a/include/linux/platform_data/hirschmann-hellcreek.h
+++ b/include/linux/platform_data/hirschmann-hellcreek.h
@@ -12,6 +12,7 @@
 #include <linux/types.h>
 
 struct hellcreek_platform_data {
+	const char *name;	/* Switch name */
 	int num_ports;		/* Amount of switch ports */
 	int is_100_mbits;	/* Is it configured to 100 or 1000 mbit/s */
 	int qbv_support;	/* Qbv support on front TSN ports */
-- 
cgit v1.2.3


From 78b226d48106fc91628f941c66545f05273269df Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 19 Mar 2021 13:59:08 -0700
Subject: libbpf: Skip BTF fixup if object file has no BTF

Skip BTF fixup step when input object file is missing BTF altogether.

Fixes: 8fd27bf69b86 ("libbpf: Add BPF static linker BTF and BTF.ext support")
Reported-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Link: https://lore.kernel.org/bpf/20210319205909.1748642-3-andrii@kernel.org
---
 tools/lib/bpf/linker.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index b4fff912dce2..5e0aa2f2c0ca 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -1313,6 +1313,9 @@ static int linker_fixup_btf(struct src_obj *obj)
 	struct src_sec *sec;
 	int i, j, n, m;
 
+	if (!obj->btf)
+		return 0;
+
 	n = btf__get_nr_types(obj->btf);
 	for (i = 1; i <= n; i++) {
 		struct btf_var_secinfo *vi;
-- 
cgit v1.2.3


From 3af70b39fa2d415dc86c370e5b24ddb9fdacbd6f Mon Sep 17 00:00:00 2001
From: Archie Pusaka <apusaka@chromium.org>
Date: Tue, 23 Mar 2021 16:32:20 +0800
Subject: Bluetooth: check for zapped sk before connecting

There is a possibility of receiving a zapped sock on
l2cap_sock_connect(). This could lead to interesting crashes, one
such case is tearing down an already tore l2cap_sock as is happened
with this call trace:

__dump_stack lib/dump_stack.c:15 [inline]
dump_stack+0xc4/0x118 lib/dump_stack.c:56
register_lock_class kernel/locking/lockdep.c:792 [inline]
register_lock_class+0x239/0x6f6 kernel/locking/lockdep.c:742
__lock_acquire+0x209/0x1e27 kernel/locking/lockdep.c:3105
lock_acquire+0x29c/0x2fb kernel/locking/lockdep.c:3599
__raw_spin_lock_bh include/linux/spinlock_api_smp.h:137 [inline]
_raw_spin_lock_bh+0x38/0x47 kernel/locking/spinlock.c:175
spin_lock_bh include/linux/spinlock.h:307 [inline]
lock_sock_nested+0x44/0xfa net/core/sock.c:2518
l2cap_sock_teardown_cb+0x88/0x2fb net/bluetooth/l2cap_sock.c:1345
l2cap_chan_del+0xa3/0x383 net/bluetooth/l2cap_core.c:598
l2cap_chan_close+0x537/0x5dd net/bluetooth/l2cap_core.c:756
l2cap_chan_timeout+0x104/0x17e net/bluetooth/l2cap_core.c:429
process_one_work+0x7e3/0xcb0 kernel/workqueue.c:2064
worker_thread+0x5a5/0x773 kernel/workqueue.c:2196
kthread+0x291/0x2a6 kernel/kthread.c:211
ret_from_fork+0x4e/0x80 arch/x86/entry/entry_64.S:604

Signed-off-by: Archie Pusaka <apusaka@chromium.org>
Reported-by: syzbot+abfc0f5e668d4099af73@syzkaller.appspotmail.com
Reviewed-by: Alain Michaud <alainm@chromium.org>
Reviewed-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Reviewed-by: Guenter Roeck <groeck@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap_sock.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index f1b1edd0b697..c99d65ef13b1 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -179,9 +179,17 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr,
 	struct l2cap_chan *chan = l2cap_pi(sk)->chan;
 	struct sockaddr_l2 la;
 	int len, err = 0;
+	bool zapped;
 
 	BT_DBG("sk %p", sk);
 
+	lock_sock(sk);
+	zapped = sock_flag(sk, SOCK_ZAPPED);
+	release_sock(sk);
+
+	if (zapped)
+		return -EINVAL;
+
 	if (!addr || alen < offsetofend(struct sockaddr, sa_family) ||
 	    addr->sa_family != AF_BLUETOOTH)
 		return -EINVAL;
-- 
cgit v1.2.3


From c29fb5f65072b784717ca16d6f136461d2ee04c4 Mon Sep 17 00:00:00 2001
From: Meng Yu <yumeng18@huawei.com>
Date: Tue, 23 Mar 2021 14:22:56 +0800
Subject: Bluetooth: Remove trailing semicolon in macros

remove trailing semicolon in macros and coding style fix.

Signed-off-by: Meng Yu <yumeng18@huawei.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/smp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index efc19f98b959..2def90668173 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -54,7 +54,7 @@
 #define SMP_ALLOW_CMD(smp, code)	set_bit(code, &smp->allow_cmd)
 
 /* Keys which are not distributed with Secure Connections */
-#define SMP_SC_NO_DIST (SMP_DIST_ENC_KEY | SMP_DIST_LINK_KEY);
+#define SMP_SC_NO_DIST (SMP_DIST_ENC_KEY | SMP_DIST_LINK_KEY)
 
 #define SMP_TIMEOUT	msecs_to_jiffies(30000)
 
@@ -398,7 +398,7 @@ static int smp_e(const u8 *k, u8 *r)
 
 	SMP_DBG("r %16phN", r);
 
-	memzero_explicit(&ctx, sizeof (ctx));
+	memzero_explicit(&ctx, sizeof(ctx));
 	return err;
 }
 
-- 
cgit v1.2.3


From ef860480ea18a8bf0b4d8144c9071e5454d380f2 Mon Sep 17 00:00:00 2001
From: Tony Nguyen <anthony.l.nguyen@intel.com>
Date: Tue, 2 Mar 2021 10:15:45 -0800
Subject: ice: Fix prototype warnings

Correct reported warnings for "warning: expecting prototype for ...
Prototype was for ... instead"

Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_common.c      | 2 +-
 drivers/net/ethernet/intel/ice/ice_dcb.c         | 2 +-
 drivers/net/ethernet/intel/ice/ice_fdir.c        | 2 +-
 drivers/net/ethernet/intel/ice/ice_main.c        | 4 ++--
 drivers/net/ethernet/intel/ice/ice_sched.c       | 2 +-
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 4 ++--
 6 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 3d9475e222cd..1898325e62b5 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -4373,7 +4373,7 @@ ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size,
 }
 
 /**
- * ice_fw_supports_lldp_fltr - check NVM version supports lldp_fltr_ctrl
+ * ice_fw_supports_lldp_fltr_ctrl - check NVM version supports lldp_fltr_ctrl
  * @hw: pointer to HW struct
  */
 bool ice_fw_supports_lldp_fltr_ctrl(struct ice_hw *hw)
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c
index e42727941ef5..85c9eccfdae8 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.c
@@ -834,7 +834,7 @@ ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg,
 }
 
 /**
- * ice_get_ieee_dcb_cfg
+ * ice_get_ieee_or_cee_dcb_cfg
  * @pi: port information structure
  * @dcbx_mode: mode of DCBX (IEEE or CEE)
  *
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.c b/drivers/net/ethernet/intel/ice/ice_fdir.c
index e8155c7954a0..59ef68f072c0 100644
--- a/drivers/net/ethernet/intel/ice/ice_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.c
@@ -1135,7 +1135,7 @@ bool ice_fdir_has_frag(enum ice_fltr_ptype flow)
 }
 
 /**
- * ice_fdir_find_by_idx - find filter with idx
+ * ice_fdir_find_fltr_by_idx - find filter with idx
  * @hw: pointer to hardware structure
  * @fltr_idx: index to find.
  *
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index eb895e6db077..f318d7f607e4 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -1044,7 +1044,7 @@ struct ice_aq_task {
 };
 
 /**
- * ice_wait_for_aq_event - Wait for an AdminQ event from firmware
+ * ice_aq_wait_for_event - Wait for an AdminQ event from firmware
  * @pf: pointer to the PF private structure
  * @opcode: the opcode to wait for
  * @timeout: how long to wait, in jiffies
@@ -4321,7 +4321,7 @@ static void ice_set_wake(struct ice_pf *pf)
 }
 
 /**
- * ice_setup_magic_mc_wake - setup device to wake on multicast magic packet
+ * ice_setup_mc_magic_wake - setup device to wake on multicast magic packet
  * @pf: pointer to the PF struct
  *
  * Issue firmware command to enable multicast magic wake, making
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index 2403cb38b93c..f890337cc24a 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -1857,7 +1857,7 @@ ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
 }
 
 /**
- * ice_sched_rm_agg_vsi_entry - remove aggregator related VSI info entry
+ * ice_sched_rm_agg_vsi_info - remove aggregator related VSI info entry
  * @pi: port information structure
  * @vsi_handle: software VSI handle
  *
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 20343a0fe726..78679ece2e08 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -1591,7 +1591,7 @@ teardown:
 }
 
 /**
- * ice_set_dflt_settings - set VF defaults during initialization/creation
+ * ice_set_dflt_settings_vfs - set VF defaults during initialization/creation
  * @pf: PF holding reference to all VFs for default configuration
  */
 static void ice_set_dflt_settings_vfs(struct ice_pf *pf)
@@ -4182,7 +4182,7 @@ void ice_print_vf_rx_mdd_event(struct ice_vf *vf)
 }
 
 /**
- * ice_print_vfs_mdd_event - print VFs malicious driver detect event
+ * ice_print_vfs_mdd_events - print VFs malicious driver detect event
  * @pf: pointer to the PF structure
  *
  * Called from ice_handle_mdd_event to rate limit and print VFs MDD events.
-- 
cgit v1.2.3


From c4cdb4efa20c8d4f6ceb3d2f57904048494911a4 Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Sun, 7 Mar 2021 10:44:27 +0200
Subject: igc: Fix prototype warning

Correct report warnings in igc_i225.c

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Acked-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_i225.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c
index 7ec04e48860c..cc83bb5c15e8 100644
--- a/drivers/net/ethernet/intel/igc/igc_i225.c
+++ b/drivers/net/ethernet/intel/igc/igc_i225.c
@@ -6,7 +6,7 @@
 #include "igc_hw.h"
 
 /**
- * igc_get_hw_semaphore_i225 - Acquire hardware semaphore
+ * igc_acquire_nvm_i225 - Acquire exclusive access to EEPROM
  * @hw: pointer to the HW structure
  *
  * Acquire the necessary semaphores for exclusive access to the EEPROM.
-- 
cgit v1.2.3


From 39da2cac42d4a4b44c04575ad444ad128e1a2c6b Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Sun, 14 Mar 2021 15:59:20 +0200
Subject: e1000e: Fix prototype warning

Correct report warnings in ich8lan.c, netdev.c phy.c and ptp.c files

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/e1000e/ich8lan.c | 4 ++--
 drivers/net/ethernet/intel/e1000e/netdev.c  | 6 +++---
 drivers/net/ethernet/intel/e1000e/phy.c     | 2 +-
 drivers/net/ethernet/intel/e1000e/ptp.c     | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 0ac8d79a7987..590ad110d383 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -2745,7 +2745,7 @@ release:
 }
 
 /**
- *  e1000_k1_gig_workaround_lv - K1 Si workaround
+ *  e1000_k1_workaround_lv - K1 Si workaround
  *  @hw:   pointer to the HW structure
  *
  *  Workaround to set the K1 beacon duration for 82579 parts in 10Mbps
@@ -5220,7 +5220,7 @@ void e1000e_set_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw,
 }
 
 /**
- *  e1000_ipg3_phy_powerdown_workaround_ich8lan - Power down workaround on D3
+ *  e1000e_igp3_phy_powerdown_workaround_ich8lan - Power down workaround on D3
  *  @hw: pointer to the HW structure
  *
  *  Workaround for 82566 power-down on D3 entry:
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 39ca02b7fdc5..31b8726fd69b 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -5987,7 +5987,7 @@ static void e1000_reset_task(struct work_struct *work)
 }
 
 /**
- * e1000_get_stats64 - Get System Network Statistics
+ * e1000e_get_stats64 - Get System Network Statistics
  * @netdev: network interface device structure
  * @stats: rtnl_link_stats64 pointer
  *
@@ -6160,7 +6160,7 @@ static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr,
 }
 
 /**
- * e1000e_hwtstamp_ioctl - control hardware time stamping
+ * e1000e_hwtstamp_set - control hardware time stamping
  * @netdev: network interface device structure
  * @ifr: interface request
  *
@@ -6818,7 +6818,7 @@ static void e1000e_disable_aspm(struct pci_dev *pdev, u16 state)
 }
 
 /**
- * e1000e_disable_aspm_locked   Disable ASPM states.
+ * e1000e_disable_aspm_locked - Disable ASPM states.
  * @pdev: pointer to PCI device struct
  * @state: bit-mask of ASPM states to disable
  *
diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c
index bdd9dc163f15..1db35b2c7750 100644
--- a/drivers/net/ethernet/intel/e1000e/phy.c
+++ b/drivers/net/ethernet/intel/e1000e/phy.c
@@ -371,7 +371,7 @@ s32 e1000e_read_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 *data)
 }
 
 /**
- *  e1000e_write_phy_reg_igp - Write igp PHY register
+ *  __e1000e_write_phy_reg_igp - Write igp PHY register
  *  @hw: pointer to the HW structure
  *  @offset: register offset to write to
  *  @data: data to write at register offset
diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c
index f3f671311855..9e79d672f4f1 100644
--- a/drivers/net/ethernet/intel/e1000e/ptp.c
+++ b/drivers/net/ethernet/intel/e1000e/ptp.c
@@ -142,7 +142,7 @@ static int e1000e_phc_get_syncdevicetime(ktime_t *device,
 }
 
 /**
- * e1000e_phc_getsynctime - Reads the current system/device cross timestamp
+ * e1000e_phc_getcrosststamp - Reads the current system/device cross timestamp
  * @ptp: ptp clock structure
  * @xtstamp: structure containing timestamp
  *
-- 
cgit v1.2.3


From 262de08f64e34bf1731e0e6296a9fc1e493e8b54 Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Thu, 18 Mar 2021 16:18:52 -0700
Subject: intel: clean up mismatched header comments

A bunch of header comments were showing warnings when compiling
with W=1. Fix them all at once. This changes only comments.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c   |  4 ++--
 drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c |  2 +-
 drivers/net/ethernet/intel/fm10k/fm10k_main.c    |  4 ++--
 drivers/net/ethernet/intel/fm10k/fm10k_mbx.c     |  2 +-
 drivers/net/ethernet/intel/fm10k/fm10k_pf.c      |  2 +-
 drivers/net/ethernet/intel/i40e/i40e_common.c    |  6 +++---
 drivers/net/ethernet/intel/i40e/i40e_dcb.c       |  4 ++--
 drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c    |  2 +-
 drivers/net/ethernet/intel/i40e/i40e_ddp.c       |  2 +-
 drivers/net/ethernet/intel/i40e/i40e_debugfs.c   |  4 ++--
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c   |  2 +-
 drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c   |  2 +-
 drivers/net/ethernet/intel/i40e/i40e_main.c      |  6 +++---
 drivers/net/ethernet/intel/i40e/i40e_nvm.c       |  2 +-
 drivers/net/ethernet/intel/i40e/i40e_ptp.c       |  2 +-
 drivers/net/ethernet/intel/i40e/i40e_txrx.c      |  2 +-
 drivers/net/ethernet/intel/i40e/i40e_xsk.c       |  2 +-
 drivers/net/ethernet/intel/iavf/iavf_main.c      |  2 +-
 drivers/net/ethernet/intel/iavf/iavf_txrx.c      |  2 +-
 drivers/net/ethernet/intel/iavf/iavf_virtchnl.c  |  4 ++--
 drivers/net/ethernet/intel/igb/e1000_mbx.c       |  2 +-
 drivers/net/ethernet/intel/igb/igb_main.c        |  6 +++---
 drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c   |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c  | 15 +++++++--------
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c    |  4 ++--
 drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c     |  5 +++--
 drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c    |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c    |  2 +-
 drivers/net/ethernet/intel/ixgbevf/vf.c          | 18 +++++++++++++-----
 29 files changed, 61 insertions(+), 53 deletions(-)

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c b/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c
index c45315472245..86397c564dfc 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c
@@ -105,7 +105,7 @@ static int fm10k_dcbnl_ieee_setpfc(struct net_device *dev, struct ieee_pfc *pfc)
 }
 
 /**
- * fm10k_dcbnl_ieee_getdcbx - get the DCBX configuration for the device
+ * fm10k_dcbnl_getdcbx - get the DCBX configuration for the device
  * @dev: netdev interface for the device
  *
  * Returns that we support only IEEE DCB for this interface
@@ -116,7 +116,7 @@ static u8 fm10k_dcbnl_getdcbx(struct net_device __always_unused *dev)
 }
 
 /**
- * fm10k_dcbnl_ieee_setdcbx - get the DCBX configuration for the device
+ * fm10k_dcbnl_setdcbx - get the DCBX configuration for the device
  * @dev: netdev interface for the device
  * @mode: new mode for this device
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
index 1d27b2fb23af..5c77054d67c6 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
@@ -185,7 +185,7 @@ void fm10k_dbg_q_vector_init(struct fm10k_q_vector *q_vector)
 }
 
 /**
- * fm10k_dbg_free_q_vector_dir - setup debugfs for the q_vectors
+ * fm10k_dbg_q_vector_exit - setup debugfs for the q_vectors
  * @q_vector: q_vector to allocate directories for
  **/
 void fm10k_dbg_q_vector_exit(struct fm10k_q_vector *q_vector)
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index 247f44f4cb30..3362f26d7f99 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -1774,7 +1774,7 @@ static void fm10k_free_q_vectors(struct fm10k_intfc *interface)
 }
 
 /**
- * f10k_reset_msix_capability - reset MSI-X capability
+ * fm10k_reset_msix_capability - reset MSI-X capability
  * @interface: board private structure to initialize
  *
  * Reset the MSI-X capability back to its starting state
@@ -1787,7 +1787,7 @@ static void fm10k_reset_msix_capability(struct fm10k_intfc *interface)
 }
 
 /**
- * f10k_init_msix_capability - configure MSI-X capability
+ * fm10k_init_msix_capability - configure MSI-X capability
  * @interface: board private structure to initialize
  *
  * Attempt to configure the interrupts using the best available
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
index 8e2e92bf3cd4..a2642b9b3602 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
@@ -692,7 +692,7 @@ static bool fm10k_mbx_tx_complete(struct fm10k_mbx_info *mbx)
 }
 
 /**
- *  fm10k_mbx_deqeueue_rx - Dequeues the message from the head in the Rx FIFO
+ *  fm10k_mbx_dequeue_rx - Dequeues the message from the head in the Rx FIFO
  *  @hw: pointer to hardware structure
  *  @mbx: pointer to mailbox
  *
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
index c0780c3624c8..af1b0cde3670 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
@@ -1417,7 +1417,7 @@ s32 fm10k_iov_msg_lport_state_pf(struct fm10k_hw *hw, u32 **results,
 }
 
 /**
- *  fm10k_update_stats_hw_pf - Updates hardware related statistics of PF
+ *  fm10k_update_hw_stats_pf - Updates hardware related statistics of PF
  *  @hw: pointer to hardware structure
  *  @stats: pointer to the stats structure to update
  *
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index ec19e18305ec..41b813fe07a5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -2332,7 +2332,7 @@ i40e_status i40e_aq_set_vsi_vlan_promisc(struct i40e_hw *hw,
 }
 
 /**
- * i40e_get_vsi_params - get VSI configuration info
+ * i40e_aq_get_vsi_params - get VSI configuration info
  * @hw: pointer to the hw struct
  * @vsi_ctx: pointer to a vsi context struct
  * @cmd_details: pointer to command details structure or NULL
@@ -2586,7 +2586,7 @@ i40e_status i40e_get_link_status(struct i40e_hw *hw, bool *link_up)
 }
 
 /**
- * i40e_updatelink_status - update status of the HW network link
+ * i40e_update_link_info - update status of the HW network link
  * @hw: pointer to the hw struct
  **/
 noinline_for_stack i40e_status i40e_update_link_info(struct i40e_hw *hw)
@@ -5059,7 +5059,7 @@ u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num)
 }
 
 /**
- * i40e_blink_phy_led
+ * i40e_blink_phy_link_led
  * @hw: pointer to the HW structure
  * @time: time how long led will blinks in secs
  * @interval: gap between LED on and off in msecs
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
index 243b0d2b7b72..673f341f4c0c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
@@ -234,7 +234,7 @@ static void i40e_parse_ieee_app_tlv(struct i40e_lldp_org_tlv *tlv,
 }
 
 /**
- * i40e_parse_ieee_etsrec_tlv
+ * i40e_parse_ieee_tlv
  * @tlv: IEEE 802.1Qaz TLV
  * @dcbcfg: Local store to update ETS REC data
  *
@@ -1588,7 +1588,7 @@ void i40e_dcb_hw_rx_ets_bw_config(struct i40e_hw *hw, u8 *bw_share,
 }
 
 /**
- * i40e_dcb_hw_rx_ets_bw_config
+ * i40e_dcb_hw_rx_up2tc_config
  * @hw: pointer to the hw struct
  * @prio_tc: priority to tc assignment indexed by priority
  *
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
index 0345132a0ef5..e32c61909b31 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
@@ -392,7 +392,7 @@ static void i40e_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc,
 }
 
 /**
- * i40e_dcbnl_set_pg_tc_cfg_tx - Set CEE PG Tx BW config
+ * i40e_dcbnl_set_pg_bwg_cfg_tx - Set CEE PG Tx BW config
  * @netdev: the corresponding netdev
  * @pgid: the corresponding traffic class
  * @bw_pct: the BW percentage for the specified traffic class
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ddp.c b/drivers/net/ethernet/intel/i40e/i40e_ddp.c
index 5e08f100c413..e1069ae658ad 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ddp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ddp.c
@@ -77,7 +77,7 @@ static bool i40e_ddp_profiles_overlap(struct i40e_profile_info *new,
 }
 
 /**
- * i40e_ddp_does_profiles_ - checks if DDP overlaps with existing one.
+ * i40e_ddp_does_profile_overlap - checks if DDP overlaps with existing one.
  * @hw: HW data structure
  * @pinfo: DDP profile information structure
  *
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index d7c13ca9be7d..e8230da29f05 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -651,7 +651,7 @@ static void i40e_dbg_dump_vsi_no_seid(struct i40e_pf *pf)
 }
 
 /**
- * i40e_dbg_dump_stats - handles dump stats write into command datum
+ * i40e_dbg_dump_eth_stats - handles dump stats write into command datum
  * @pf: the i40e_pf created in command write
  * @estats: the eth stats structure to be dumped
  **/
@@ -1638,7 +1638,7 @@ static const struct file_operations i40e_dbg_command_fops = {
 static char i40e_dbg_netdev_ops_buf[256] = "";
 
 /**
- * i40e_dbg_netdev_ops - read for netdev_ops datum
+ * i40e_dbg_netdev_ops_read - read for netdev_ops datum
  * @filp: the opened file
  * @buffer: where to write the data for the user to read
  * @count: the size of the user's buffer
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 3c9054e13aa5..c4c167650b6b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -212,7 +212,7 @@ static void __i40e_add_stat_strings(u8 **p, const struct i40e_stats stats[],
 }
 
 /**
- * 40e_add_stat_strings - copy stat strings into ethtool buffer
+ * i40e_add_stat_strings - copy stat strings into ethtool buffer
  * @p: ethtool supplied buffer
  * @stats: stat definitions array
  *
diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
index a3da422ab05b..d6e92ecddfbd 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
@@ -511,7 +511,7 @@ configure_lan_hmc_out:
 }
 
 /**
- * i40e_delete_hmc_object - remove hmc objects
+ * i40e_delete_lan_hmc_object - remove hmc objects
  * @hw: pointer to the HW structure
  * @info: pointer to i40e_hmc_delete_obj_info struct
  *
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 353deae139f9..14a1bad9af74 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -2023,7 +2023,7 @@ static void i40e_undo_add_filter_entries(struct i40e_vsi *vsi,
 }
 
 /**
- * i40e_next_entry - Get the next non-broadcast filter from a list
+ * i40e_next_filter - Get the next non-broadcast filter from a list
  * @next: pointer to filter in list
  *
  * Returns the next non-broadcast filter in the list. Required so that we
@@ -5191,7 +5191,7 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf)
 }
 
 /**
- * i40e_pf_get_pf_tc_map - Get bitmap for enabled traffic classes
+ * i40e_pf_get_tc_map - Get bitmap for enabled traffic classes
  * @pf: PF being queried
  *
  * Return a bitmap for enabled traffic classes for this PF.
@@ -9454,7 +9454,7 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf)
 }
 
 /**
- * i40e_get_current_atr_count - Get the count of total FD ATR filters programmed
+ * i40e_get_current_atr_cnt - Get the count of total FD ATR filters programmed
  * @pf: board private structure
  **/
 u32 i40e_get_current_atr_cnt(struct i40e_pf *pf)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
index 7164f4ad8120..fe6dca846028 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
@@ -4,7 +4,7 @@
 #include "i40e_prototype.h"
 
 /**
- * i40e_init_nvm_ops - Initialize NVM function pointers
+ * i40e_init_nvm - Initialize NVM function pointers
  * @hw: pointer to the HW structure
  *
  * Setup the function pointers and the NVM info structure. Should be called
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index 7a879614ca55..f1f6fc3744e9 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -216,7 +216,7 @@ static int i40e_ptp_feature_enable(struct ptp_clock_info *ptp,
 }
 
 /**
- * i40e_ptp_update_latch_events - Read I40E_PRTTSYN_STAT_1 and latch events
+ * i40e_ptp_get_rx_events - Read I40E_PRTTSYN_STAT_1 and latch events
  * @pf: the PF data structure
  *
  * This function reads I40E_PRTTSYN_STAT_1 and updates the corresponding timers
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 627794b31e33..895f59a06fdb 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -3345,7 +3345,7 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
 }
 
 /**
- * i40e_create_tx_ctx Build the Tx context descriptor
+ * i40e_create_tx_ctx - Build the Tx context descriptor
  * @tx_ring:  ring to create the descriptor on
  * @cd_type_cmd_tso_mss: Quad Word 1
  * @cd_tunneling: Quad Word 0 - bits 0-31
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index edddaf034638..d89c22347d9d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -628,7 +628,7 @@ void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring)
 }
 
 /**
- * i40e_xsk_clean_xdp_ring - Clean the XDP Tx ring on shutdown
+ * i40e_xsk_clean_tx_ring - Clean the XDP Tx ring on shutdown
  * @tx_ring: XDP Tx ring
  **/
 void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring)
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index d0c16117484f..a3268c894d85 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -2546,7 +2546,7 @@ validate_bw:
 }
 
 /**
- * iavf_validate_channel_config - validate queue mapping info
+ * iavf_validate_ch_config - validate queue mapping info
  * @adapter: board private structure
  * @mqprio_qopt: queue parameters
  *
diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
index ffaf2742a2e0..d6cba53a3a21 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
@@ -2098,7 +2098,7 @@ static int iavf_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
 }
 
 /**
- * iavf_create_tx_ctx Build the Tx context descriptor
+ * iavf_create_tx_ctx - Build the Tx context descriptor
  * @tx_ring:  ring to create the descriptor on
  * @cd_type_cmd_tso_mss: Quad Word 1
  * @cd_tunneling: Quad Word 0 - bits 0-31
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index 43a4d4ef415f..3069092468b2 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -1006,7 +1006,7 @@ iavf_set_adapter_link_speed_from_vpe(struct iavf_adapter *adapter,
 }
 
 /**
- * iavf_enable_channel
+ * iavf_enable_channels
  * @adapter: adapter structure
  *
  * Request that the PF enable channels as specified by
@@ -1047,7 +1047,7 @@ void iavf_enable_channels(struct iavf_adapter *adapter)
 }
 
 /**
- * iavf_disable_channel
+ * iavf_disable_channels
  * @adapter: adapter structure
  *
  * Request that the PF disable channels that are configured
diff --git a/drivers/net/ethernet/intel/igb/e1000_mbx.c b/drivers/net/ethernet/intel/igb/e1000_mbx.c
index 33cceb77e960..29383112bc19 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mbx.c
+++ b/drivers/net/ethernet/intel/igb/e1000_mbx.c
@@ -441,7 +441,7 @@ out_no_read:
 }
 
 /**
- *  e1000_init_mbx_params_pf - set initial values for pf mailbox
+ *  igb_init_mbx_params_pf - set initial values for pf mailbox
  *  @hw: pointer to the HW structure
  *
  *  Initializes the hw->mbx struct to correct values for pf mailbox
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index f929eaa836c8..854d19fbf4a4 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -2037,7 +2037,7 @@ static void igb_power_down_link(struct igb_adapter *adapter)
 }
 
 /**
- * Detect and switch function for Media Auto Sense
+ * igb_check_swap_media -  Detect and switch function for Media Auto Sense
  * @adapter: address of the board private structure
  **/
 static void igb_check_swap_media(struct igb_adapter *adapter)
@@ -4020,7 +4020,7 @@ static int igb_sw_init(struct igb_adapter *adapter)
 }
 
 /**
- *  igb_open - Called when a network interface is made active
+ *  __igb_open - Called when a network interface is made active
  *  @netdev: network interface device structure
  *  @resuming: indicates whether we are in a resume call
  *
@@ -4138,7 +4138,7 @@ int igb_open(struct net_device *netdev)
 }
 
 /**
- *  igb_close - Disables a network interface
+ *  __igb_close - Disables a network interface
  *  @netdev: network interface device structure
  *  @suspending: indicates we are in a suspend call
  *
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
index 8d3798a32f0e..f9a31f1ac7bc 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
@@ -1351,7 +1351,7 @@ static u32 ixgbe_atr_compute_sig_hash_82599(union ixgbe_atr_hash_dword input,
 }
 
 /**
- *  ixgbe_atr_add_signature_filter_82599 - Adds a signature hash filter
+ *  ixgbe_fdir_add_signature_filter_82599 - Adds a signature hash filter
  *  @hw: pointer to hardware structure
  *  @input: unique input dword
  *  @common: compressed common input dword
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 62ddb452f862..9521d335ea07 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -2707,7 +2707,7 @@ s32 ixgbe_disable_rx_buff_generic(struct ixgbe_hw *hw)
 }
 
 /**
- *  ixgbe_enable_rx_buff - Enables the receive data path
+ *  ixgbe_enable_rx_buff_generic - Enables the receive data path
  *  @hw: pointer to hardware structure
  *
  *  Enables the receive data path
@@ -3029,14 +3029,14 @@ s32 ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
 }
 
 /**
+ *  ixgbe_set_vmdq_san_mac_generic - Associate VMDq pool index with a rx address
+ *  @hw: pointer to hardware struct
+ *  @vmdq: VMDq pool index
+ *
  *  This function should only be involved in the IOV mode.
  *  In IOV mode, Default pool is next pool after the number of
  *  VFs advertized and not 0.
  *  MPSAR table needs to be updated for SAN_MAC RAR [hw->mac.san_mac_rar_index]
- *
- *  ixgbe_set_vmdq_san_mac - Associate default VMDq pool index with a rx address
- *  @hw: pointer to hardware struct
- *  @vmdq: VMDq pool index
  **/
 s32 ixgbe_set_vmdq_san_mac_generic(struct ixgbe_hw *hw, u32 vmdq)
 {
@@ -3896,7 +3896,7 @@ static s32 ixgbe_get_ets_data(struct ixgbe_hw *hw, u16 *ets_cfg,
 }
 
 /**
- *  ixgbe_get_thermal_sensor_data - Gathers thermal sensor data
+ *  ixgbe_get_thermal_sensor_data_generic - Gathers thermal sensor data
  *  @hw: pointer to hardware structure
  *
  *  Returns the thermal sensor data structure
@@ -4054,8 +4054,7 @@ void ixgbe_get_orom_version(struct ixgbe_hw *hw,
 }
 
 /**
- *  ixgbe_get_oem_prod_version Etrack ID from EEPROM
- *
+ *  ixgbe_get_oem_prod_version - Etrack ID from EEPROM
  *  @hw: pointer to hardware structure
  *  @nvm_ver: pointer to output structure
  *
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 9f3f12e2ccf2..4c90f83fd6ce 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -225,7 +225,7 @@ static s32 ixgbe_get_parent_bus_info(struct ixgbe_adapter *adapter)
 }
 
 /**
- * ixgbe_check_from_parent - Determine whether PCIe info should come from parent
+ * ixgbe_pcie_from_parent - Determine whether PCIe info should come from parent
  * @hw: hw specific details
  *
  * This function is used by probe to determine whether a device's PCI-Express
@@ -6156,7 +6156,7 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
 }
 
 /**
- * ixgbe_eee_capable - helper function to determine EEE support on X550
+ * ixgbe_set_eee_capable - helper function to determine EEE support on X550
  * @adapter: board private structure
  */
 static void ixgbe_set_eee_capable(struct ixgbe_adapter *adapter)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
index fc389eecdd2b..73bc170d1ae9 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
@@ -461,12 +461,13 @@ s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw)
 }
 
 /**
- *  ixgbe_read_phy_mdi - Reads a value from a specified PHY register without
- *  the SWFW lock
+ *  ixgbe_read_phy_reg_mdi - read PHY register
  *  @hw: pointer to hardware structure
  *  @reg_addr: 32 bit address of PHY register to read
  *  @device_type: 5 bit device type
  *  @phy_data: Pointer to read data from PHY register
+ *
+ *  Reads a value from a specified PHY register without the SWFW lock
  **/
 s32 ixgbe_read_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type,
 		       u16 *phy_data)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
index 4b93ba149ec5..d5cfb51ff648 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
@@ -701,7 +701,7 @@ static s32 ixgbe_get_swfw_sync_semaphore(struct ixgbe_hw *hw)
 }
 
 /**
- * ixgbe_release_nvm_semaphore - Release hardware semaphore
+ * ixgbe_release_swfw_sync_semaphore - Release hardware semaphore
  * @hw: pointer to hardware structure
  *
  * This function clears hardware semaphore bits.
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 5e339afa682a..9724ffb16518 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -1248,7 +1248,7 @@ static s32 ixgbe_get_bus_info_X550em(struct ixgbe_hw *hw)
 }
 
 /**
- * ixgbe_fw_recovery_mode - Check FW NVM recovery mode
+ * ixgbe_fw_recovery_mode_X550 - Check FW NVM recovery mode
  * @hw: pointer t hardware structure
  *
  * Returns true if in FW NVM recovery mode.
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c
index bfe6dfcec4ab..5fc347abab3c 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.c
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.c
@@ -121,9 +121,11 @@ static s32 ixgbevf_reset_hw_vf(struct ixgbe_hw *hw)
 }
 
 /**
+ * ixgbevf_hv_reset_hw_vf - reset via Hyper-V
+ * @hw: pointer to private hardware struct
+ *
  * Hyper-V variant; the VF/PF communication is through the PCI
  * config space.
- * @hw: pointer to private hardware struct
  */
 static s32 ixgbevf_hv_reset_hw_vf(struct ixgbe_hw *hw)
 {
@@ -513,9 +515,11 @@ static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw,
 }
 
 /**
- * Hyper-V variant - just a stub.
+ * ixgbevf_hv_update_mc_addr_list_vf - stub
  * @hw: unused
  * @netdev: unused
+ *
+ * Hyper-V variant - just a stub.
  */
 static s32 ixgbevf_hv_update_mc_addr_list_vf(struct ixgbe_hw *hw,
 					     struct net_device *netdev)
@@ -564,9 +568,11 @@ static s32 ixgbevf_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode)
 }
 
 /**
- * Hyper-V variant - just a stub.
+ * ixgbevf_hv_update_xcast_mode - stub
  * @hw: unused
  * @xcast_mode: unused
+ *
+ * Hyper-V variant - just a stub.
  */
 static s32 ixgbevf_hv_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode)
 {
@@ -608,7 +614,7 @@ mbx_err:
 }
 
 /**
- * Hyper-V variant - just a stub.
+ * ixgbevf_hv_set_vfta_vf - * Hyper-V variant - just a stub.
  * @hw: unused
  * @vlan: unused
  * @vind: unused
@@ -726,11 +732,13 @@ out:
 }
 
 /**
- * Hyper-V variant; there is no mailbox communication.
+ * ixgbevf_hv_check_mac_link_vf - check link
  * @hw: pointer to private hardware struct
  * @speed: pointer to link speed
  * @link_up: true is link is up, false otherwise
  * @autoneg_wait_to_complete: unused
+ *
+ * Hyper-V variant; there is no mailbox communication.
  */
 static s32 ixgbevf_hv_check_mac_link_vf(struct ixgbe_hw *hw,
 					ixgbe_link_speed *speed,
-- 
cgit v1.2.3


From 9ded647a5141e1d4152a3b263fe6eab047766567 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 5 Mar 2021 02:52:57 -0600
Subject: ice: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of just letting the code
fall through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
index 02b12736ea80..207f6ee3a7f6 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -143,6 +143,7 @@ ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb,
 	case ICE_RX_PTYPE_INNER_PROT_UDP:
 	case ICE_RX_PTYPE_INNER_PROT_SCTP:
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		break;
 	default:
 		break;
 	}
-- 
cgit v1.2.3


From f83a0d0adac606ef3a6f5ecb6e40b7afeb227a75 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 5 Mar 2021 02:57:33 -0600
Subject: fm10k: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix multiple
warnings by explicitly adding a couple of break statements instead of
just letting the code fall through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/fm10k/fm10k_mbx.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
index a2642b9b3602..30ca9ee1900b 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
@@ -1039,6 +1039,7 @@ static s32 fm10k_mbx_create_reply(struct fm10k_hw *hw,
 	case FM10K_STATE_CLOSED:
 		/* generate new header based on data */
 		fm10k_mbx_create_disconnect_hdr(mbx);
+		break;
 	default:
 		break;
 	}
@@ -2017,6 +2018,7 @@ static s32 fm10k_sm_mbx_process_reset(struct fm10k_hw *hw,
 	case FM10K_STATE_CONNECT:
 		/* Update remote value to match local value */
 		mbx->remote = mbx->local;
+		break;
 	default:
 		break;
 	}
-- 
cgit v1.2.3


From 27e40255e5aca08220893f3a7441741042af072d Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 5 Mar 2021 03:00:00 -0600
Subject: ixgbe: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix multiple
warnings by explicitly adding multiple break statements instead of just
letting the code fall through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c  | 2 ++
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c    | 1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c    | 1 +
 4 files changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
index f9a31f1ac7bc..e324e42fab2d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
@@ -1542,6 +1542,7 @@ s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw,
 	switch (input_mask->formatted.vm_pool & 0x7F) {
 	case 0x0:
 		fdirm |= IXGBE_FDIRM_POOL;
+		break;
 	case 0x7F:
 		break;
 	default:
@@ -1557,6 +1558,7 @@ s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw,
 			hw_dbg(hw, " Error on src/dst port mask\n");
 			return IXGBE_ERR_CONFIG;
 		}
+		break;
 	case IXGBE_ATR_L4TYPE_MASK:
 		break;
 	default:
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 9521d335ea07..03ccbe6b66d2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -93,6 +93,7 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw)
 		default:
 			break;
 		}
+		break;
 	default:
 		break;
 	}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
index df389a11d3af..0218f6c9b925 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
@@ -132,6 +132,7 @@ static void ixgbe_get_first_reg_idx(struct ixgbe_adapter *adapter, u8 tc,
 			else
 				*tx = (tc + 4) << 4;	/* 96, 112 */
 		}
+		break;
 	default:
 		break;
 	}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
index 22a874eee2e8..23ddfd79fc8b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
@@ -999,6 +999,7 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter,
 	switch (config->tx_type) {
 	case HWTSTAMP_TX_OFF:
 		tsync_tx_ctl = 0;
+		break;
 	case HWTSTAMP_TX_ON:
 		break;
 	default:
-- 
cgit v1.2.3


From 52c406989a51bffcaf4f685dd44714835c3ca57b Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 5 Mar 2021 03:05:11 -0600
Subject: igb: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix multiple
warnings by explicitly adding multiple break statements instead of just
letting the code fall through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igb/e1000_phy.c   | 1 +
 drivers/net/ethernet/intel/igb/igb_ethtool.c | 1 +
 drivers/net/ethernet/intel/igb/igb_ptp.c     | 1 +
 3 files changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c
index 8c8eb82e6272..a018000f7db9 100644
--- a/drivers/net/ethernet/intel/igb/e1000_phy.c
+++ b/drivers/net/ethernet/intel/igb/e1000_phy.c
@@ -836,6 +836,7 @@ s32 igb_copper_link_setup_igp(struct e1000_hw *hw)
 			break;
 		case e1000_ms_auto:
 			data &= ~CR_1000T_MS_ENABLE;
+			break;
 		default:
 			break;
 		}
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 4ab9f468f08e..7545da216d8b 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -3010,6 +3010,7 @@ static int igb_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
 		break;
 	case ETHTOOL_SRXCLSRLDEL:
 		ret = igb_del_ethtool_nfc_entry(adapter, cmd);
+		break;
 	default:
 		break;
 	}
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index 7cc5428c3b3d..f3ff565da0a1 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -1008,6 +1008,7 @@ static int igb_ptp_set_timestamp_mode(struct igb_adapter *adapter,
 	switch (config->tx_type) {
 	case HWTSTAMP_TX_OFF:
 		tsync_tx_ctl = 0;
+		break;
 	case HWTSTAMP_TX_ON:
 		break;
 	default:
-- 
cgit v1.2.3


From d8f0c306985eef4893106e24b9a68dbe8561e3b7 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 5 Mar 2021 03:46:42 -0600
Subject: ixgbevf: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of just letting the code
fall through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 449d7d5b280d..ba2ed8a43d2d 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -2633,6 +2633,7 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter)
 			adapter->num_rx_queues = rss;
 			adapter->num_tx_queues = rss;
 			adapter->num_xdp_queues = adapter->xdp_prog ? rss : 0;
+			break;
 		default:
 			break;
 		}
-- 
cgit v1.2.3


From 67831a08a778818350cd4cd6d27dcd989b0321e7 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 5 Mar 2021 03:51:11 -0600
Subject: e1000: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of just letting the code
fall through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/e1000/e1000_hw.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c
index 4c0c9433bd60..19cf36360933 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_hw.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c
@@ -1183,6 +1183,7 @@ static s32 e1000_copper_link_igp_setup(struct e1000_hw *hw)
 			break;
 		case e1000_ms_auto:
 			phy_data &= ~CR_1000T_MS_ENABLE;
+			break;
 		default:
 			break;
 		}
-- 
cgit v1.2.3


From 549750babea10621143eaec2eb58a15537a0a434 Mon Sep 17 00:00:00 2001
From: Linus Lüssing <linus.luessing@c0d3.blue>
Date: Tue, 23 Mar 2021 21:30:07 +0100
Subject: batman-adv: Fix order of kernel doc in batadv_priv
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

During the inlining process of kerneldoc in commit 8b84cc4fb556
("batman-adv: Use inline kernel-doc for enum/struct"), some comments were
placed at the wrong struct members. Fixing this by reordering the comments.

Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/types.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 7c0b475cc22a..2be5d4a712c5 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1659,19 +1659,19 @@ struct batadv_priv {
 	/** @tp_list: list of tp sessions */
 	struct hlist_head tp_list;
 
-	/** @tp_num: number of currently active tp sessions */
+	/** @orig_hash: hash table containing mesh participants (orig nodes) */
 	struct batadv_hashtable *orig_hash;
 
-	/** @orig_hash: hash table containing mesh participants (orig nodes) */
+	/** @forw_bat_list_lock: lock protecting forw_bat_list */
 	spinlock_t forw_bat_list_lock;
 
-	/** @forw_bat_list_lock: lock protecting forw_bat_list */
+	/** @forw_bcast_list_lock: lock protecting forw_bcast_list */
 	spinlock_t forw_bcast_list_lock;
 
-	/** @forw_bcast_list_lock: lock protecting forw_bcast_list */
+	/** @tp_list_lock: spinlock protecting @tp_list */
 	spinlock_t tp_list_lock;
 
-	/** @tp_list_lock: spinlock protecting @tp_list */
+	/** @tp_num: number of currently active tp sessions */
 	atomic_t tp_num;
 
 	/** @orig_work: work queue callback item for orig node purging */
-- 
cgit v1.2.3


From 5fc087ff96fdb4447f9f7e1d8a90c05196cf5ad8 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Tue, 23 Mar 2021 21:52:28 +0100
Subject: batman-adv: Drop unused header preempt.h

The commit b1de0f01b011 ("batman-adv: Use netif_rx_any_context().") removed
the last user for a function declaration from linux/preempt.h. The include
should therefore be cleaned up.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/bridge_loop_avoidance.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index bcd543ce835b..7dc133cfc363 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -25,7 +25,6 @@
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
 #include <linux/netlink.h>
-#include <linux/preempt.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/skbuff.h>
-- 
cgit v1.2.3


From 65d2dbb300197839eafc4171cfeb57a14c452724 Mon Sep 17 00:00:00 2001
From: Xie He <xie.he.0141@gmail.com>
Date: Sun, 21 Mar 2021 02:39:35 -0700
Subject: net: lapb: Make "lapb_t1timer_running" able to detect an already
 running timer

Problem:

The "lapb_t1timer_running" function in "lapb_timer.c" is used in only
one place: in the "lapb_kick" function in "lapb_out.c". "lapb_kick" calls
"lapb_t1timer_running" to check if the timer is already pending, and if
it is not, schedule it to run.

However, if the timer has already fired and is running, and is waiting to
get the "lapb->lock" lock, "lapb_t1timer_running" will not detect this,
and "lapb_kick" will then schedule a new timer. The old timer will then
abort when it sees a new timer pending.

I think this is not right. The purpose of "lapb_kick" should be ensuring
that the actual work of the timer function is scheduled to be done.
If the timer function is already running but waiting for the lock,
"lapb_kick" should not abort and reschedule it.

Changes made:

I added a new field "t1timer_running" in "struct lapb_cb" for
"lapb_t1timer_running" to use. "t1timer_running" will accurately reflect
whether the actual work of the timer is pending. If the timer has fired
but is still waiting for the lock, "t1timer_running" will still correctly
reflect whether the actual work is waiting to be done.

The old "t1timer_stop" field, whose only responsibility is to ask a timer
(that is already running but waiting for the lock) to abort, is no longer
needed, because the new "t1timer_running" field can fully take over its
responsibility. Therefore "t1timer_stop" is deleted.

"t1timer_running" is not simply a negation of the old "t1timer_stop".
At the end of the timer function, if it does not reschedule itself,
"t1timer_running" is set to false to indicate that the timer is stopped.

For consistency of the code, I also added "t2timer_running" and deleted
"t2timer_stop".

Signed-off-by: Xie He <xie.he.0141@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/lapb.h    |  2 +-
 net/lapb/lapb_iface.c |  4 ++--
 net/lapb/lapb_timer.c | 19 ++++++++++++-------
 3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/include/net/lapb.h b/include/net/lapb.h
index eee73442a1ba..124ee122f2c8 100644
--- a/include/net/lapb.h
+++ b/include/net/lapb.h
@@ -92,7 +92,7 @@ struct lapb_cb {
 	unsigned short		n2, n2count;
 	unsigned short		t1, t2;
 	struct timer_list	t1timer, t2timer;
-	bool			t1timer_stop, t2timer_stop;
+	bool			t1timer_running, t2timer_running;
 
 	/* Internal control information */
 	struct sk_buff_head	write_queue;
diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index 0511bbe4af7b..1078e14f1acf 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -122,8 +122,8 @@ static struct lapb_cb *lapb_create_cb(void)
 
 	timer_setup(&lapb->t1timer, NULL, 0);
 	timer_setup(&lapb->t2timer, NULL, 0);
-	lapb->t1timer_stop = true;
-	lapb->t2timer_stop = true;
+	lapb->t1timer_running = false;
+	lapb->t2timer_running = false;
 
 	lapb->t1      = LAPB_DEFAULT_T1;
 	lapb->t2      = LAPB_DEFAULT_T2;
diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c
index 0230b272b7d1..5be68869064d 100644
--- a/net/lapb/lapb_timer.c
+++ b/net/lapb/lapb_timer.c
@@ -40,7 +40,7 @@ void lapb_start_t1timer(struct lapb_cb *lapb)
 	lapb->t1timer.function = lapb_t1timer_expiry;
 	lapb->t1timer.expires  = jiffies + lapb->t1;
 
-	lapb->t1timer_stop = false;
+	lapb->t1timer_running = true;
 	add_timer(&lapb->t1timer);
 }
 
@@ -51,25 +51,25 @@ void lapb_start_t2timer(struct lapb_cb *lapb)
 	lapb->t2timer.function = lapb_t2timer_expiry;
 	lapb->t2timer.expires  = jiffies + lapb->t2;
 
-	lapb->t2timer_stop = false;
+	lapb->t2timer_running = true;
 	add_timer(&lapb->t2timer);
 }
 
 void lapb_stop_t1timer(struct lapb_cb *lapb)
 {
-	lapb->t1timer_stop = true;
+	lapb->t1timer_running = false;
 	del_timer(&lapb->t1timer);
 }
 
 void lapb_stop_t2timer(struct lapb_cb *lapb)
 {
-	lapb->t2timer_stop = true;
+	lapb->t2timer_running = false;
 	del_timer(&lapb->t2timer);
 }
 
 int lapb_t1timer_running(struct lapb_cb *lapb)
 {
-	return timer_pending(&lapb->t1timer);
+	return lapb->t1timer_running;
 }
 
 static void lapb_t2timer_expiry(struct timer_list *t)
@@ -79,13 +79,14 @@ static void lapb_t2timer_expiry(struct timer_list *t)
 	spin_lock_bh(&lapb->lock);
 	if (timer_pending(&lapb->t2timer)) /* A new timer has been set up */
 		goto out;
-	if (lapb->t2timer_stop) /* The timer has been stopped */
+	if (!lapb->t2timer_running) /* The timer has been stopped */
 		goto out;
 
 	if (lapb->condition & LAPB_ACK_PENDING_CONDITION) {
 		lapb->condition &= ~LAPB_ACK_PENDING_CONDITION;
 		lapb_timeout_response(lapb);
 	}
+	lapb->t2timer_running = false;
 
 out:
 	spin_unlock_bh(&lapb->lock);
@@ -98,7 +99,7 @@ static void lapb_t1timer_expiry(struct timer_list *t)
 	spin_lock_bh(&lapb->lock);
 	if (timer_pending(&lapb->t1timer)) /* A new timer has been set up */
 		goto out;
-	if (lapb->t1timer_stop) /* The timer has been stopped */
+	if (!lapb->t1timer_running) /* The timer has been stopped */
 		goto out;
 
 	switch (lapb->state) {
@@ -127,6 +128,7 @@ static void lapb_t1timer_expiry(struct timer_list *t)
 				lapb->state = LAPB_STATE_0;
 				lapb_disconnect_indication(lapb, LAPB_TIMEDOUT);
 				lapb_dbg(0, "(%p) S1 -> S0\n", lapb->dev);
+				lapb->t1timer_running = false;
 				goto out;
 			} else {
 				lapb->n2count++;
@@ -151,6 +153,7 @@ static void lapb_t1timer_expiry(struct timer_list *t)
 				lapb->state = LAPB_STATE_0;
 				lapb_disconnect_confirmation(lapb, LAPB_TIMEDOUT);
 				lapb_dbg(0, "(%p) S2 -> S0\n", lapb->dev);
+				lapb->t1timer_running = false;
 				goto out;
 			} else {
 				lapb->n2count++;
@@ -169,6 +172,7 @@ static void lapb_t1timer_expiry(struct timer_list *t)
 				lapb_stop_t2timer(lapb);
 				lapb_disconnect_indication(lapb, LAPB_TIMEDOUT);
 				lapb_dbg(0, "(%p) S3 -> S0\n", lapb->dev);
+				lapb->t1timer_running = false;
 				goto out;
 			} else {
 				lapb->n2count++;
@@ -186,6 +190,7 @@ static void lapb_t1timer_expiry(struct timer_list *t)
 				lapb->state = LAPB_STATE_0;
 				lapb_disconnect_indication(lapb, LAPB_TIMEDOUT);
 				lapb_dbg(0, "(%p) S4 -> S0\n", lapb->dev);
+				lapb->t1timer_running = false;
 				goto out;
 			} else {
 				lapb->n2count++;
-- 
cgit v1.2.3


From c0e715bbd50e57319f76d0b757dc282893f2d476 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 23 Mar 2021 01:51:42 +0200
Subject: net: bridge: add helper for retrieving the current bridge port STP
 state

It may happen that we have the following topology with DSA or any other
switchdev driver with LAG offload:

ip link add br0 type bridge stp_state 1
ip link add bond0 type bond
ip link set bond0 master br0
ip link set swp0 master bond0
ip link set swp1 master bond0

STP decides that it should put bond0 into the BLOCKING state, and
that's that. The ports that are actively listening for the switchdev
port attributes emitted for the bond0 bridge port (because they are
offloading it) and have the honor of seeing that switchdev port
attribute can react to it, so we can program swp0 and swp1 into the
BLOCKING state.

But if then we do:

ip link set swp2 master bond0

then as far as the bridge is concerned, nothing has changed: it still
has one bridge port. But this new bridge port will not see any STP state
change notification and will remain FORWARDING, which is how the
standalone code leaves it in.

We need a function in the bridge driver which retrieves the current STP
state, such that drivers can synchronize to it when they may have missed
switchdev events.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Tobias Waldekranz <tobias@waldekranz.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h |  6 ++++++
 net/bridge/br_stp.c       | 14 ++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index b979005ea39c..920d3a02cc68 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -136,6 +136,7 @@ struct net_device *br_fdb_find_port(const struct net_device *br_dev,
 				    __u16 vid);
 void br_fdb_clear_offload(const struct net_device *dev, u16 vid);
 bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag);
+u8 br_port_get_stp_state(const struct net_device *dev);
 #else
 static inline struct net_device *
 br_fdb_find_port(const struct net_device *br_dev,
@@ -154,6 +155,11 @@ br_port_flag_is_set(const struct net_device *dev, unsigned long flag)
 {
 	return false;
 }
+
+static inline u8 br_port_get_stp_state(const struct net_device *dev)
+{
+	return BR_STATE_DISABLED;
+}
 #endif
 
 #endif
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 21c6781906aa..86b5e05d3f21 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -64,6 +64,20 @@ void br_set_state(struct net_bridge_port *p, unsigned int state)
 	}
 }
 
+u8 br_port_get_stp_state(const struct net_device *dev)
+{
+	struct net_bridge_port *p;
+
+	ASSERT_RTNL();
+
+	p = br_port_get_rtnl(dev);
+	if (!p)
+		return BR_STATE_DISABLED;
+
+	return p->state;
+}
+EXPORT_SYMBOL_GPL(br_port_get_stp_state);
+
 /* called under bridge lock */
 struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no)
 {
-- 
cgit v1.2.3


From f1d42ea10056b9050d1c5b8e19995f66c30aeded Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 23 Mar 2021 01:51:43 +0200
Subject: net: bridge: add helper to retrieve the current ageing time

The SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME attribute is only emitted from:

sysfs/ioctl/netlink
-> br_set_ageing_time
   -> __set_ageing_time

therefore not at bridge port creation time, so:
(a) switchdev drivers have to hardcode the initial value for the address
    ageing time, because they didn't get any notification
(b) that hardcoded value can be out of sync, if the user changes the
    ageing time before enslaving the port to the bridge

We need a helper in the bridge, such that switchdev drivers can query
the current value of the bridge ageing time when they start offloading
it.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Tobias Waldekranz <tobias@waldekranz.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h |  6 ++++++
 net/bridge/br_stp.c       | 13 +++++++++++++
 2 files changed, 19 insertions(+)

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 920d3a02cc68..ebd16495459c 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -137,6 +137,7 @@ struct net_device *br_fdb_find_port(const struct net_device *br_dev,
 void br_fdb_clear_offload(const struct net_device *dev, u16 vid);
 bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag);
 u8 br_port_get_stp_state(const struct net_device *dev);
+clock_t br_get_ageing_time(struct net_device *br_dev);
 #else
 static inline struct net_device *
 br_fdb_find_port(const struct net_device *br_dev,
@@ -160,6 +161,11 @@ static inline u8 br_port_get_stp_state(const struct net_device *dev)
 {
 	return BR_STATE_DISABLED;
 }
+
+static inline clock_t br_get_ageing_time(struct net_device *br_dev)
+{
+	return 0;
+}
 #endif
 
 #endif
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 86b5e05d3f21..3dafb6143cff 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -639,6 +639,19 @@ int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time)
 	return 0;
 }
 
+clock_t br_get_ageing_time(struct net_device *br_dev)
+{
+	struct net_bridge *br;
+
+	if (!netif_is_bridge_master(br_dev))
+		return 0;
+
+	br = netdev_priv(br_dev);
+
+	return jiffies_to_clock_t(br->ageing_time);
+}
+EXPORT_SYMBOL_GPL(br_get_ageing_time);
+
 /* called under bridge lock */
 void __br_set_topology_change(struct net_bridge *br, unsigned char val)
 {
-- 
cgit v1.2.3


From 4f2673b3a2b6246729a1ff13b8945a040839dbd3 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 23 Mar 2021 01:51:44 +0200
Subject: net: bridge: add helper to replay port and host-joined mdb entries

I have a system with DSA ports, and udhcpcd is configured to bring
interfaces up as soon as they are created.

I create a bridge as follows:

ip link add br0 type bridge

As soon as I create the bridge and udhcpcd brings it up, I also have
avahi which automatically starts sending IPv6 packets to advertise some
local services, and because of that, the br0 bridge joins the following
IPv6 groups due to the code path detailed below:

33:33:ff:6d:c1:9c vid 0
33:33:00:00:00:6a vid 0
33:33:00:00:00:fb vid 0

br_dev_xmit
-> br_multicast_rcv
   -> br_ip6_multicast_add_group
      -> __br_multicast_add_group
         -> br_multicast_host_join
            -> br_mdb_notify

This is all fine, but inside br_mdb_notify we have br_mdb_switchdev_host
hooked up, and switchdev will attempt to offload the host joined groups
to an empty list of ports. Of course nobody offloads them.

Then when we add a port to br0:

ip link set swp0 master br0

the bridge doesn't replay the host-joined MDB entries from br_add_if,
and eventually the host joined addresses expire, and a switchdev
notification for deleting it is emitted, but surprise, the original
addition was already completely missed.

The strategy to address this problem is to replay the MDB entries (both
the port ones and the host joined ones) when the new port joins the
bridge, similar to what vxlan_fdb_replay does (in that case, its FDB can
be populated and only then attached to a bridge that you offload).
However there are 2 possibilities: the addresses can be 'pushed' by the
bridge into the port, or the port can 'pull' them from the bridge.

Considering that in the general case, the new port can be really late to
the party, and there may have been many other switchdev ports that
already received the initial notification, we would like to avoid
delivering duplicate events to them, since they might misbehave. And
currently, the bridge calls the entire switchdev notifier chain, whereas
for replaying it should just call the notifier block of the new guy.
But the bridge doesn't know what is the new guy's notifier block, it
just knows where the switchdev notifier chain is. So for simplification,
we make this a driver-initiated pull for now, and the notifier block is
passed as an argument.

To emulate the calling context for mdb objects (deferred and put on the
blocking notifier chain), we must iterate under RCU protection through
the bridge's mdb entries, queue them, and only call them once we're out
of the RCU read-side critical section.

There was some opportunity for reuse between br_mdb_switchdev_host_port,
br_mdb_notify and the newly added br_mdb_queue_one in how the switchdev
mdb object is created, so a helper was created.

Suggested-by: Ido Schimmel <idosch@idosch.org>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h |   9 +++
 include/net/switchdev.h   |   1 +
 net/bridge/br_mdb.c       | 148 ++++++++++++++++++++++++++++++++++++++++------
 3 files changed, 141 insertions(+), 17 deletions(-)

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index ebd16495459c..f6472969bb44 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -69,6 +69,8 @@ bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto);
 bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto);
 bool br_multicast_enabled(const struct net_device *dev);
 bool br_multicast_router(const struct net_device *dev);
+int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
+		  struct notifier_block *nb, struct netlink_ext_ack *extack);
 #else
 static inline int br_multicast_list_adjacent(struct net_device *dev,
 					     struct list_head *br_ip_list)
@@ -93,6 +95,13 @@ static inline bool br_multicast_router(const struct net_device *dev)
 {
 	return false;
 }
+static inline int br_mdb_replay(struct net_device *br_dev,
+				struct net_device *dev,
+				struct notifier_block *nb,
+				struct netlink_ext_ack *extack)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING)
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index b7fc7d0f54e2..8c3218177136 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -68,6 +68,7 @@ enum switchdev_obj_id {
 };
 
 struct switchdev_obj {
+	struct list_head list;
 	struct net_device *orig_dev;
 	enum switchdev_obj_id id;
 	u32 flags;
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 8846c5bcd075..95fa4af0e8dd 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -506,6 +506,134 @@ err:
 	kfree(priv);
 }
 
+static void br_switchdev_mdb_populate(struct switchdev_obj_port_mdb *mdb,
+				      const struct net_bridge_mdb_entry *mp)
+{
+	if (mp->addr.proto == htons(ETH_P_IP))
+		ip_eth_mc_map(mp->addr.dst.ip4, mdb->addr);
+#if IS_ENABLED(CONFIG_IPV6)
+	else if (mp->addr.proto == htons(ETH_P_IPV6))
+		ipv6_eth_mc_map(&mp->addr.dst.ip6, mdb->addr);
+#endif
+	else
+		ether_addr_copy(mdb->addr, mp->addr.dst.mac_addr);
+
+	mdb->vid = mp->addr.vid;
+}
+
+static int br_mdb_replay_one(struct notifier_block *nb, struct net_device *dev,
+			     struct switchdev_obj_port_mdb *mdb,
+			     struct netlink_ext_ack *extack)
+{
+	struct switchdev_notifier_port_obj_info obj_info = {
+		.info = {
+			.dev = dev,
+			.extack = extack,
+		},
+		.obj = &mdb->obj,
+	};
+	int err;
+
+	err = nb->notifier_call(nb, SWITCHDEV_PORT_OBJ_ADD, &obj_info);
+	return notifier_to_errno(err);
+}
+
+static int br_mdb_queue_one(struct list_head *mdb_list,
+			    enum switchdev_obj_id id,
+			    const struct net_bridge_mdb_entry *mp,
+			    struct net_device *orig_dev)
+{
+	struct switchdev_obj_port_mdb *mdb;
+
+	mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC);
+	if (!mdb)
+		return -ENOMEM;
+
+	mdb->obj.id = id;
+	mdb->obj.orig_dev = orig_dev;
+	br_switchdev_mdb_populate(mdb, mp);
+	list_add_tail(&mdb->obj.list, mdb_list);
+
+	return 0;
+}
+
+int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
+		  struct notifier_block *nb, struct netlink_ext_ack *extack)
+{
+	struct net_bridge_mdb_entry *mp;
+	struct switchdev_obj *obj, *tmp;
+	struct net_bridge *br;
+	LIST_HEAD(mdb_list);
+	int err = 0;
+
+	ASSERT_RTNL();
+
+	if (!netif_is_bridge_master(br_dev) || !netif_is_bridge_port(dev))
+		return -EINVAL;
+
+	br = netdev_priv(br_dev);
+
+	if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
+		return 0;
+
+	/* We cannot walk over br->mdb_list protected just by the rtnl_mutex,
+	 * because the write-side protection is br->multicast_lock. But we
+	 * need to emulate the [ blocking ] calling context of a regular
+	 * switchdev event, so since both br->multicast_lock and RCU read side
+	 * critical sections are atomic, we have no choice but to pick the RCU
+	 * read side lock, queue up all our events, leave the critical section
+	 * and notify switchdev from blocking context.
+	 */
+	rcu_read_lock();
+
+	hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) {
+		struct net_bridge_port_group __rcu **pp;
+		struct net_bridge_port_group *p;
+
+		if (mp->host_joined) {
+			err = br_mdb_queue_one(&mdb_list,
+					       SWITCHDEV_OBJ_ID_HOST_MDB,
+					       mp, br_dev);
+			if (err) {
+				rcu_read_unlock();
+				goto out_free_mdb;
+			}
+		}
+
+		for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL;
+		     pp = &p->next) {
+			if (p->key.port->dev != dev)
+				continue;
+
+			err = br_mdb_queue_one(&mdb_list,
+					       SWITCHDEV_OBJ_ID_PORT_MDB,
+					       mp, dev);
+			if (err) {
+				rcu_read_unlock();
+				goto out_free_mdb;
+			}
+		}
+	}
+
+	rcu_read_unlock();
+
+	list_for_each_entry(obj, &mdb_list, list) {
+		err = br_mdb_replay_one(nb, dev, SWITCHDEV_OBJ_PORT_MDB(obj),
+					extack);
+		if (err)
+			goto out_free_mdb;
+	}
+
+out_free_mdb:
+	list_for_each_entry_safe(obj, tmp, &mdb_list, list) {
+		list_del(&obj->list);
+		kfree(SWITCHDEV_OBJ_PORT_MDB(obj));
+	}
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(br_mdb_replay);
+
 static void br_mdb_switchdev_host_port(struct net_device *dev,
 				       struct net_device *lower_dev,
 				       struct net_bridge_mdb_entry *mp,
@@ -515,18 +643,12 @@ static void br_mdb_switchdev_host_port(struct net_device *dev,
 		.obj = {
 			.id = SWITCHDEV_OBJ_ID_HOST_MDB,
 			.flags = SWITCHDEV_F_DEFER,
+			.orig_dev = dev,
 		},
-		.vid = mp->addr.vid,
 	};
 
-	if (mp->addr.proto == htons(ETH_P_IP))
-		ip_eth_mc_map(mp->addr.dst.ip4, mdb.addr);
-#if IS_ENABLED(CONFIG_IPV6)
-	else
-		ipv6_eth_mc_map(&mp->addr.dst.ip6, mdb.addr);
-#endif
+	br_switchdev_mdb_populate(&mdb, mp);
 
-	mdb.obj.orig_dev = dev;
 	switch (type) {
 	case RTM_NEWMDB:
 		switchdev_port_obj_add(lower_dev, &mdb.obj, NULL);
@@ -558,21 +680,13 @@ void br_mdb_notify(struct net_device *dev,
 			.id = SWITCHDEV_OBJ_ID_PORT_MDB,
 			.flags = SWITCHDEV_F_DEFER,
 		},
-		.vid = mp->addr.vid,
 	};
 	struct net *net = dev_net(dev);
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
 	if (pg) {
-		if (mp->addr.proto == htons(ETH_P_IP))
-			ip_eth_mc_map(mp->addr.dst.ip4, mdb.addr);
-#if IS_ENABLED(CONFIG_IPV6)
-		else if (mp->addr.proto == htons(ETH_P_IPV6))
-			ipv6_eth_mc_map(&mp->addr.dst.ip6, mdb.addr);
-#endif
-		else
-			ether_addr_copy(mdb.addr, mp->addr.dst.mac_addr);
+		br_switchdev_mdb_populate(&mdb, mp);
 
 		mdb.obj.orig_dev = pg->key.port->dev;
 		switch (type) {
-- 
cgit v1.2.3


From 04846f903b53b32d29453e865646309db29f255a Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 23 Mar 2021 01:51:45 +0200
Subject: net: bridge: add helper to replay port and local fdb entries

When a switchdev port starts offloading a LAG that is already in a
bridge and has an FDB entry pointing to it:

ip link set bond0 master br0
bridge fdb add dev bond0 00:01:02:03:04:05 master static
ip link set swp0 master bond0

the switchdev driver will have no idea that this FDB entry is there,
because it missed the switchdev event emitted at its creation.

Ido Schimmel pointed this out during a discussion about challenges with
switchdev offloading of stacked interfaces between the physical port and
the bridge, and recommended to just catch that condition and deny the
CHANGEUPPER event:
https://lore.kernel.org/netdev/20210210105949.GB287766@shredder.lan/

But in fact, we might need to deal with the hard thing anyway, which is
to replay all FDB addresses relevant to this port, because it isn't just
static FDB entries, but also local addresses (ones that are not
forwarded but terminated by the bridge). There, we can't just say 'oh
yeah, there was an upper already so I'm not joining that'.

So, similar to the logic for replaying MDB entries, add a function that
must be called by individual switchdev drivers and replays local FDB
entries as well as ones pointing towards a bridge port. This time, we
use the atomic switchdev notifier block, since that's what FDB entries
expect for some reason.

Reported-by: Ido Schimmel <idosch@idosch.org>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h |  9 +++++++++
 net/bridge/br_fdb.c       | 50 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+)

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index f6472969bb44..b564c4486a45 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -147,6 +147,8 @@ void br_fdb_clear_offload(const struct net_device *dev, u16 vid);
 bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag);
 u8 br_port_get_stp_state(const struct net_device *dev);
 clock_t br_get_ageing_time(struct net_device *br_dev);
+int br_fdb_replay(struct net_device *br_dev, struct net_device *dev,
+		  struct notifier_block *nb);
 #else
 static inline struct net_device *
 br_fdb_find_port(const struct net_device *br_dev,
@@ -175,6 +177,13 @@ static inline clock_t br_get_ageing_time(struct net_device *br_dev)
 {
 	return 0;
 }
+
+static inline int br_fdb_replay(struct net_device *br_dev,
+				struct net_device *dev,
+				struct notifier_block *nb)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 #endif
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index b7490237f3fc..698b79747d32 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -726,6 +726,56 @@ static inline size_t fdb_nlmsg_size(void)
 		+ nla_total_size(sizeof(u8)); /* NFEA_ACTIVITY_NOTIFY */
 }
 
+static int br_fdb_replay_one(struct notifier_block *nb,
+			     struct net_bridge_fdb_entry *fdb,
+			     struct net_device *dev)
+{
+	struct switchdev_notifier_fdb_info item;
+	int err;
+
+	item.addr = fdb->key.addr.addr;
+	item.vid = fdb->key.vlan_id;
+	item.added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
+	item.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags);
+	item.info.dev = dev;
+
+	err = nb->notifier_call(nb, SWITCHDEV_FDB_ADD_TO_DEVICE, &item);
+	return notifier_to_errno(err);
+}
+
+int br_fdb_replay(struct net_device *br_dev, struct net_device *dev,
+		  struct notifier_block *nb)
+{
+	struct net_bridge_fdb_entry *fdb;
+	struct net_bridge *br;
+	int err = 0;
+
+	if (!netif_is_bridge_master(br_dev) || !netif_is_bridge_port(dev))
+		return -EINVAL;
+
+	br = netdev_priv(br_dev);
+
+	rcu_read_lock();
+
+	hlist_for_each_entry_rcu(fdb, &br->fdb_list, fdb_node) {
+		struct net_bridge_port *dst = READ_ONCE(fdb->dst);
+		struct net_device *dst_dev;
+
+		dst_dev = dst ? dst->dev : br->dev;
+		if (dst_dev != br_dev && dst_dev != dev)
+			continue;
+
+		err = br_fdb_replay_one(nb, fdb, dst_dev);
+		if (err)
+			break;
+	}
+
+	rcu_read_unlock();
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(br_fdb_replay);
+
 static void fdb_notify(struct net_bridge *br,
 		       const struct net_bridge_fdb_entry *fdb, int type,
 		       bool swdev_notify)
-- 
cgit v1.2.3


From 22f67cdfae6aaa7e841ced17207391fb368c8e9e Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 23 Mar 2021 01:51:46 +0200
Subject: net: bridge: add helper to replay VLANs installed on port

Currently this simple setup with DSA:

ip link add br0 type bridge vlan_filtering 1
ip link add bond0 type bond
ip link set bond0 master br0
ip link set swp0 master bond0

will not work because the bridge has created the PVID in br_add_if ->
nbp_vlan_init, and it has notified switchdev of the existence of VLAN 1,
but that was too early, since swp0 was not yet a lower of bond0, so it
had no reason to act upon that notification.

We need a helper in the bridge to replay the switchdev VLAN objects that
were notified since the bridge port creation, because some of them may
have been missed.

As opposed to the br_mdb_replay function, the vg->vlan_list write side
protection is offered by the rtnl_mutex which is sleepable, so we don't
need to queue up the objects in atomic context, we can replay them right
away.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h | 10 +++++++
 net/bridge/br_vlan.c      | 73 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+)

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index b564c4486a45..2cc35038a8ca 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -111,6 +111,8 @@ int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid);
 int br_vlan_get_proto(const struct net_device *dev, u16 *p_proto);
 int br_vlan_get_info(const struct net_device *dev, u16 vid,
 		     struct bridge_vlan_info *p_vinfo);
+int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
+		   struct notifier_block *nb, struct netlink_ext_ack *extack);
 #else
 static inline bool br_vlan_enabled(const struct net_device *dev)
 {
@@ -137,6 +139,14 @@ static inline int br_vlan_get_info(const struct net_device *dev, u16 vid,
 {
 	return -EINVAL;
 }
+
+static inline int br_vlan_replay(struct net_device *br_dev,
+				 struct net_device *dev,
+				 struct notifier_block *nb,
+				 struct netlink_ext_ack *extack)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 #if IS_ENABLED(CONFIG_BRIDGE)
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 8829f621b8ec..ca8daccff217 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -1751,6 +1751,79 @@ out_kfree:
 	kfree_skb(skb);
 }
 
+static int br_vlan_replay_one(struct notifier_block *nb,
+			      struct net_device *dev,
+			      struct switchdev_obj_port_vlan *vlan,
+			      struct netlink_ext_ack *extack)
+{
+	struct switchdev_notifier_port_obj_info obj_info = {
+		.info = {
+			.dev = dev,
+			.extack = extack,
+		},
+		.obj = &vlan->obj,
+	};
+	int err;
+
+	err = nb->notifier_call(nb, SWITCHDEV_PORT_OBJ_ADD, &obj_info);
+	return notifier_to_errno(err);
+}
+
+int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
+		   struct notifier_block *nb, struct netlink_ext_ack *extack)
+{
+	struct net_bridge_vlan_group *vg;
+	struct net_bridge_vlan *v;
+	struct net_bridge_port *p;
+	struct net_bridge *br;
+	int err = 0;
+	u16 pvid;
+
+	ASSERT_RTNL();
+
+	if (!netif_is_bridge_master(br_dev))
+		return -EINVAL;
+
+	if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev))
+		return -EINVAL;
+
+	if (netif_is_bridge_master(dev)) {
+		br = netdev_priv(dev);
+		vg = br_vlan_group(br);
+		p = NULL;
+	} else {
+		p = br_port_get_rtnl(dev);
+		if (WARN_ON(!p))
+			return -EINVAL;
+		vg = nbp_vlan_group(p);
+		br = p->br;
+	}
+
+	if (!vg)
+		return 0;
+
+	pvid = br_get_pvid(vg);
+
+	list_for_each_entry(v, &vg->vlan_list, vlist) {
+		struct switchdev_obj_port_vlan vlan = {
+			.obj.orig_dev = dev,
+			.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
+			.flags = br_vlan_flags(v, pvid),
+			.vid = v->vid,
+		};
+
+		if (!br_vlan_should_use(v))
+			continue;
+
+		br_vlan_replay_one(nb, dev, &vlan, extack);
+		if (err)
+			return err;
+	}
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(br_vlan_replay);
+
 /* check if v_curr can enter a range ending in range_end */
 bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
 			     const struct net_bridge_vlan *range_end)
-- 
cgit v1.2.3


From 185c9a760a61b034abb01a2dac8174b7da6e7ce4 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 23 Mar 2021 01:51:47 +0200
Subject: net: dsa: call dsa_port_bridge_join when joining a LAG that is
 already in a bridge

DSA can properly detect and offload this sequence of operations:

ip link add br0 type bridge
ip link add bond0 type bond
ip link set swp0 master bond0
ip link set bond0 master br0

But not this one:

ip link add br0 type bridge
ip link add bond0 type bond
ip link set bond0 master br0
ip link set swp0 master bond0

Actually the second one is more complicated, due to the elapsed time
between the enslavement of bond0 and the offloading of it via swp0, a
lot of things could have happened to the bond0 bridge port in terms of
switchdev objects (host MDBs, VLANs, altered STP state etc). So this is
a bit of a can of worms, and making sure that the DSA port's state is in
sync with this already existing bridge port is handled in the next
patches.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Tobias Waldekranz <tobias@waldekranz.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/port.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/net/dsa/port.c b/net/dsa/port.c
index c9c6d7ab3f47..d39262a9fe0e 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -249,17 +249,31 @@ int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag,
 		.lag = lag,
 		.info = uinfo,
 	};
+	struct net_device *bridge_dev;
 	int err;
 
 	dsa_lag_map(dp->ds->dst, lag);
 	dp->lag_dev = lag;
 
 	err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_JOIN, &info);
-	if (err) {
-		dp->lag_dev = NULL;
-		dsa_lag_unmap(dp->ds->dst, lag);
-	}
+	if (err)
+		goto err_lag_join;
 
+	bridge_dev = netdev_master_upper_dev_get(lag);
+	if (!bridge_dev || !netif_is_bridge_master(bridge_dev))
+		return 0;
+
+	err = dsa_port_bridge_join(dp, bridge_dev);
+	if (err)
+		goto err_bridge_join;
+
+	return 0;
+
+err_bridge_join:
+	dsa_port_notify(dp, DSA_NOTIFIER_LAG_LEAVE, &info);
+err_lag_join:
+	dp->lag_dev = NULL;
+	dsa_lag_unmap(dp->ds->dst, lag);
 	return err;
 }
 
-- 
cgit v1.2.3


From 2afc526ab342899445fb929af610285e0bbc69a9 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 23 Mar 2021 01:51:48 +0200
Subject: net: dsa: pass extack to dsa_port_{bridge,lag}_join

This is a pretty noisy change that was broken out of the larger change
for replaying switchdev attributes and objects at bridge join time,
which is when these extack objects are actually used.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Tobias Waldekranz <tobias@waldekranz.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/dsa_priv.h | 6 ++++--
 net/dsa/port.c     | 8 +++++---
 net/dsa/slave.c    | 7 +++++--
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 4c43c5406834..b8778c5d8529 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -181,12 +181,14 @@ int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy);
 int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
 void dsa_port_disable_rt(struct dsa_port *dp);
 void dsa_port_disable(struct dsa_port *dp);
-int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br);
+int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
+			 struct netlink_ext_ack *extack);
 void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
 int dsa_port_lag_change(struct dsa_port *dp,
 			struct netdev_lag_lower_state_info *linfo);
 int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev,
-		      struct netdev_lag_upper_info *uinfo);
+		      struct netdev_lag_upper_info *uinfo,
+		      struct netlink_ext_ack *extack);
 void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev);
 int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
 			    struct netlink_ext_ack *extack);
diff --git a/net/dsa/port.c b/net/dsa/port.c
index d39262a9fe0e..fcbe5b1545b8 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -144,7 +144,8 @@ static void dsa_port_change_brport_flags(struct dsa_port *dp,
 	}
 }
 
-int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
+int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
+			 struct netlink_ext_ack *extack)
 {
 	struct dsa_notifier_bridge_info info = {
 		.tree_index = dp->ds->dst->index,
@@ -241,7 +242,8 @@ int dsa_port_lag_change(struct dsa_port *dp,
 }
 
 int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag,
-		      struct netdev_lag_upper_info *uinfo)
+		      struct netdev_lag_upper_info *uinfo,
+		      struct netlink_ext_ack *extack)
 {
 	struct dsa_notifier_lag_info info = {
 		.sw_index = dp->ds->index,
@@ -263,7 +265,7 @@ int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag,
 	if (!bridge_dev || !netif_is_bridge_master(bridge_dev))
 		return 0;
 
-	err = dsa_port_bridge_join(dp, bridge_dev);
+	err = dsa_port_bridge_join(dp, bridge_dev, extack);
 	if (err)
 		goto err_bridge_join;
 
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 992fcab4b552..1ff48be476bb 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1976,11 +1976,14 @@ static int dsa_slave_changeupper(struct net_device *dev,
 				 struct netdev_notifier_changeupper_info *info)
 {
 	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct netlink_ext_ack *extack;
 	int err = NOTIFY_DONE;
 
+	extack = netdev_notifier_info_to_extack(&info->info);
+
 	if (netif_is_bridge_master(info->upper_dev)) {
 		if (info->linking) {
-			err = dsa_port_bridge_join(dp, info->upper_dev);
+			err = dsa_port_bridge_join(dp, info->upper_dev, extack);
 			if (!err)
 				dsa_bridge_mtu_normalization(dp);
 			err = notifier_from_errno(err);
@@ -1991,7 +1994,7 @@ static int dsa_slave_changeupper(struct net_device *dev,
 	} else if (netif_is_lag_master(info->upper_dev)) {
 		if (info->linking) {
 			err = dsa_port_lag_join(dp, info->upper_dev,
-						info->upper_info);
+						info->upper_info, extack);
 			if (err == -EOPNOTSUPP) {
 				NL_SET_ERR_MSG_MOD(info->info.extack,
 						   "Offloading not supported");
-- 
cgit v1.2.3


From 5961d6a12c13df834343e56e37672169fe88756b Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 23 Mar 2021 01:51:49 +0200
Subject: net: dsa: inherit the actual bridge port flags at join time

DSA currently assumes that the bridge port starts off with this
constellation of bridge port flags:

- learning on
- unicast flooding on
- multicast flooding on
- broadcast flooding on

just by virtue of code copy-pasta from the bridge layer (new_nbp).
This was a simple enough strategy thus far, because the 'bridge join'
moment always coincided with the 'bridge port creation' moment.

But with sandwiched interfaces, such as:

 br0
  |
bond0
  |
 swp0

it may happen that the user has had time to change the bridge port flags
of bond0 before enslaving swp0 to it. In that case, swp0 will falsely
assume that the bridge port flags are those determined by new_nbp, when
in fact this can happen:

ip link add br0 type bridge
ip link add bond0 type bond
ip link set bond0 master br0
ip link set bond0 type bridge_slave learning off
ip link set swp0 master br0

Now swp0 has learning enabled, bond0 has learning disabled. Not nice.

Fix this by "dumpster diving" through the actual bridge port flags with
br_port_flag_is_set, at bridge join time.

We use this opportunity to split dsa_port_change_brport_flags into two
distinct functions called dsa_port_inherit_brport_flags and
dsa_port_clear_brport_flags, now that the implementation for the two
cases is no longer similar. This patch also creates two functions called
dsa_port_switchdev_sync and dsa_port_switchdev_unsync which collect what
we have so far, even if that's asymmetrical. More is going to be added
in the next patch.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/port.c | 123 ++++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 82 insertions(+), 41 deletions(-)

diff --git a/net/dsa/port.c b/net/dsa/port.c
index fcbe5b1545b8..c712bf3da0a0 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -122,28 +122,84 @@ void dsa_port_disable(struct dsa_port *dp)
 	rtnl_unlock();
 }
 
-static void dsa_port_change_brport_flags(struct dsa_port *dp,
-					 bool bridge_offload)
+static int dsa_port_inherit_brport_flags(struct dsa_port *dp,
+					 struct netlink_ext_ack *extack)
 {
-	struct switchdev_brport_flags flags;
-	int flag;
+	const unsigned long mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD |
+				   BR_BCAST_FLOOD;
+	struct net_device *brport_dev = dsa_port_to_bridge_port(dp);
+	int flag, err;
 
-	flags.mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
-	if (bridge_offload)
-		flags.val = flags.mask;
-	else
-		flags.val = flags.mask & ~BR_LEARNING;
+	for_each_set_bit(flag, &mask, 32) {
+		struct switchdev_brport_flags flags = {0};
+
+		flags.mask = BIT(flag);
 
-	for_each_set_bit(flag, &flags.mask, 32) {
-		struct switchdev_brport_flags tmp;
+		if (br_port_flag_is_set(brport_dev, BIT(flag)))
+			flags.val = BIT(flag);
+
+		err = dsa_port_bridge_flags(dp, flags, extack);
+		if (err && err != -EOPNOTSUPP)
+			return err;
+	}
 
-		tmp.val = flags.val & BIT(flag);
-		tmp.mask = BIT(flag);
+	return 0;
+}
 
-		dsa_port_bridge_flags(dp, tmp, NULL);
+static void dsa_port_clear_brport_flags(struct dsa_port *dp)
+{
+	const unsigned long val = BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
+	const unsigned long mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD |
+				   BR_BCAST_FLOOD;
+	int flag, err;
+
+	for_each_set_bit(flag, &mask, 32) {
+		struct switchdev_brport_flags flags = {0};
+
+		flags.mask = BIT(flag);
+		flags.val = val & BIT(flag);
+
+		err = dsa_port_bridge_flags(dp, flags, NULL);
+		if (err && err != -EOPNOTSUPP)
+			dev_err(dp->ds->dev,
+				"failed to clear bridge port flag %lu: %pe\n",
+				flags.val, ERR_PTR(err));
 	}
 }
 
+static int dsa_port_switchdev_sync(struct dsa_port *dp,
+				   struct netlink_ext_ack *extack)
+{
+	int err;
+
+	err = dsa_port_inherit_brport_flags(dp, extack);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static void dsa_port_switchdev_unsync(struct dsa_port *dp)
+{
+	/* Configure the port for standalone mode (no address learning,
+	 * flood everything).
+	 * The bridge only emits SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS events
+	 * when the user requests it through netlink or sysfs, but not
+	 * automatically at port join or leave, so we need to handle resetting
+	 * the brport flags ourselves. But we even prefer it that way, because
+	 * otherwise, some setups might never get the notification they need,
+	 * for example, when a port leaves a LAG that offloads the bridge,
+	 * it becomes standalone, but as far as the bridge is concerned, no
+	 * port ever left.
+	 */
+	dsa_port_clear_brport_flags(dp);
+
+	/* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
+	 * so allow it to be in BR_STATE_FORWARDING to be kept functional
+	 */
+	dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
+}
+
 int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
 			 struct netlink_ext_ack *extack)
 {
@@ -155,24 +211,25 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
 	};
 	int err;
 
-	/* Notify the port driver to set its configurable flags in a way that
-	 * matches the initial settings of a bridge port.
-	 */
-	dsa_port_change_brport_flags(dp, true);
-
 	/* Here the interface is already bridged. Reflect the current
 	 * configuration so that drivers can program their chips accordingly.
 	 */
 	dp->bridge_dev = br;
 
 	err = dsa_broadcast(DSA_NOTIFIER_BRIDGE_JOIN, &info);
+	if (err)
+		goto out_rollback;
 
-	/* The bridging is rolled back on error */
-	if (err) {
-		dsa_port_change_brport_flags(dp, false);
-		dp->bridge_dev = NULL;
-	}
+	err = dsa_port_switchdev_sync(dp, extack);
+	if (err)
+		goto out_rollback_unbridge;
 
+	return 0;
+
+out_rollback_unbridge:
+	dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info);
+out_rollback:
+	dp->bridge_dev = NULL;
 	return err;
 }
 
@@ -195,23 +252,7 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
 	if (err)
 		pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n");
 
-	/* Configure the port for standalone mode (no address learning,
-	 * flood everything).
-	 * The bridge only emits SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS events
-	 * when the user requests it through netlink or sysfs, but not
-	 * automatically at port join or leave, so we need to handle resetting
-	 * the brport flags ourselves. But we even prefer it that way, because
-	 * otherwise, some setups might never get the notification they need,
-	 * for example, when a port leaves a LAG that offloads the bridge,
-	 * it becomes standalone, but as far as the bridge is concerned, no
-	 * port ever left.
-	 */
-	dsa_port_change_brport_flags(dp, false);
-
-	/* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
-	 * so allow it to be in BR_STATE_FORWARDING to be kept functional
-	 */
-	dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
+	dsa_port_switchdev_unsync(dp);
 }
 
 int dsa_port_lag_change(struct dsa_port *dp,
-- 
cgit v1.2.3


From 010e269f91be8fc1436c753bfbbd9ce561151e71 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 23 Mar 2021 01:51:50 +0200
Subject: net: dsa: sync up switchdev objects and port attributes when joining
 the bridge

If we join an already-created bridge port, such as a bond master
interface, then we can miss the initial switchdev notifications emitted
by the bridge for this port, while it wasn't offloaded by anybody.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/dsa_priv.h |  3 +++
 net/dsa/port.c     | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 net/dsa/slave.c    |  4 ++--
 3 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index b8778c5d8529..92282de54230 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -262,6 +262,9 @@ static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst,
 
 /* slave.c */
 extern const struct dsa_device_ops notag_netdev_ops;
+extern struct notifier_block dsa_slave_switchdev_notifier;
+extern struct notifier_block dsa_slave_switchdev_blocking_notifier;
+
 void dsa_slave_mii_bus_init(struct dsa_switch *ds);
 int dsa_slave_create(struct dsa_port *dp);
 void dsa_slave_destroy(struct net_device *slave_dev);
diff --git a/net/dsa/port.c b/net/dsa/port.c
index c712bf3da0a0..01e30264b25b 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -170,12 +170,46 @@ static void dsa_port_clear_brport_flags(struct dsa_port *dp)
 static int dsa_port_switchdev_sync(struct dsa_port *dp,
 				   struct netlink_ext_ack *extack)
 {
+	struct net_device *brport_dev = dsa_port_to_bridge_port(dp);
+	struct net_device *br = dp->bridge_dev;
 	int err;
 
 	err = dsa_port_inherit_brport_flags(dp, extack);
 	if (err)
 		return err;
 
+	err = dsa_port_set_state(dp, br_port_get_stp_state(brport_dev));
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
+	err = dsa_port_vlan_filtering(dp, br_vlan_enabled(br), extack);
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
+	err = dsa_port_mrouter(dp->cpu_dp, br_multicast_router(br), extack);
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
+	err = dsa_port_ageing_time(dp, br_get_ageing_time(br));
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
+	err = br_mdb_replay(br, brport_dev,
+			    &dsa_slave_switchdev_blocking_notifier,
+			    extack);
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
+	err = br_fdb_replay(br, brport_dev, &dsa_slave_switchdev_notifier);
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
+	err = br_vlan_replay(br, brport_dev,
+			     &dsa_slave_switchdev_blocking_notifier,
+			     extack);
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
 	return 0;
 }
 
@@ -198,6 +232,18 @@ static void dsa_port_switchdev_unsync(struct dsa_port *dp)
 	 * so allow it to be in BR_STATE_FORWARDING to be kept functional
 	 */
 	dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
+
+	/* VLAN filtering is handled by dsa_switch_bridge_leave */
+
+	/* Some drivers treat the notification for having a local multicast
+	 * router by allowing multicast to be flooded to the CPU, so we should
+	 * allow this in standalone mode too.
+	 */
+	dsa_port_mrouter(dp->cpu_dp, true, NULL);
+
+	/* Ageing time may be global to the switch chip, so don't change it
+	 * here because we have no good reason (or value) to change it to.
+	 */
 }
 
 int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 1ff48be476bb..c51e52418a62 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -2392,11 +2392,11 @@ static struct notifier_block dsa_slave_nb __read_mostly = {
 	.notifier_call  = dsa_slave_netdevice_event,
 };
 
-static struct notifier_block dsa_slave_switchdev_notifier = {
+struct notifier_block dsa_slave_switchdev_notifier = {
 	.notifier_call = dsa_slave_switchdev_event,
 };
 
-static struct notifier_block dsa_slave_switchdev_blocking_notifier = {
+struct notifier_block dsa_slave_switchdev_blocking_notifier = {
 	.notifier_call = dsa_slave_switchdev_blocking_event,
 };
 
-- 
cgit v1.2.3


From 81ef35e7619a04b902ca457d60594f956154b9f2 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 23 Mar 2021 01:51:51 +0200
Subject: net: ocelot: call ocelot_netdevice_bridge_join when joining a bridged
 LAG

Similar to the DSA situation, ocelot supports LAG offload but treats
this scenario improperly:

ip link add br0 type bridge
ip link add bond0 type bond
ip link set bond0 master br0
ip link set swp0 master bond0

We do the same thing as we do there, which is to simulate a 'bridge join'
on 'lag join', if we detect that the bonding upper has a bridge upper.

Again, same as DSA, ocelot supports software fallback for LAG, and in
that case, we should avoid calling ocelot_netdevice_changeupper.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot_net.c | 111 +++++++++++++++++++++++++--------
 1 file changed, 86 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
index c08164cd88f4..d1376f7b34fd 100644
--- a/drivers/net/ethernet/mscc/ocelot_net.c
+++ b/drivers/net/ethernet/mscc/ocelot_net.c
@@ -1117,10 +1117,15 @@ static int ocelot_port_obj_del(struct net_device *dev,
 	return ret;
 }
 
-static int ocelot_netdevice_bridge_join(struct ocelot *ocelot, int port,
-					struct net_device *bridge)
+static int ocelot_netdevice_bridge_join(struct net_device *dev,
+					struct net_device *bridge,
+					struct netlink_ext_ack *extack)
 {
+	struct ocelot_port_private *priv = netdev_priv(dev);
+	struct ocelot_port *ocelot_port = &priv->port;
+	struct ocelot *ocelot = ocelot_port->ocelot;
 	struct switchdev_brport_flags flags;
+	int port = priv->chip_port;
 	int err;
 
 	flags.mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
@@ -1135,10 +1140,14 @@ static int ocelot_netdevice_bridge_join(struct ocelot *ocelot, int port,
 	return 0;
 }
 
-static int ocelot_netdevice_bridge_leave(struct ocelot *ocelot, int port,
+static int ocelot_netdevice_bridge_leave(struct net_device *dev,
 					 struct net_device *bridge)
 {
+	struct ocelot_port_private *priv = netdev_priv(dev);
+	struct ocelot_port *ocelot_port = &priv->port;
+	struct ocelot *ocelot = ocelot_port->ocelot;
 	struct switchdev_brport_flags flags;
+	int port = priv->chip_port;
 	int err;
 
 	flags.mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
@@ -1151,43 +1160,89 @@ static int ocelot_netdevice_bridge_leave(struct ocelot *ocelot, int port,
 	return err;
 }
 
-static int ocelot_netdevice_changeupper(struct net_device *dev,
-					struct netdev_notifier_changeupper_info *info)
+static int ocelot_netdevice_lag_join(struct net_device *dev,
+				     struct net_device *bond,
+				     struct netdev_lag_upper_info *info,
+				     struct netlink_ext_ack *extack)
 {
 	struct ocelot_port_private *priv = netdev_priv(dev);
 	struct ocelot_port *ocelot_port = &priv->port;
 	struct ocelot *ocelot = ocelot_port->ocelot;
+	struct net_device *bridge_dev;
 	int port = priv->chip_port;
+	int err;
+
+	err = ocelot_port_lag_join(ocelot, port, bond, info);
+	if (err == -EOPNOTSUPP) {
+		NL_SET_ERR_MSG_MOD(extack, "Offloading not supported");
+		return 0;
+	}
+
+	bridge_dev = netdev_master_upper_dev_get(bond);
+	if (!bridge_dev || !netif_is_bridge_master(bridge_dev))
+		return 0;
+
+	err = ocelot_netdevice_bridge_join(dev, bridge_dev, extack);
+	if (err)
+		goto err_bridge_join;
+
+	return 0;
+
+err_bridge_join:
+	ocelot_port_lag_leave(ocelot, port, bond);
+	return err;
+}
+
+static int ocelot_netdevice_lag_leave(struct net_device *dev,
+				      struct net_device *bond)
+{
+	struct ocelot_port_private *priv = netdev_priv(dev);
+	struct ocelot_port *ocelot_port = &priv->port;
+	struct ocelot *ocelot = ocelot_port->ocelot;
+	struct net_device *bridge_dev;
+	int port = priv->chip_port;
+
+	ocelot_port_lag_leave(ocelot, port, bond);
+
+	bridge_dev = netdev_master_upper_dev_get(bond);
+	if (!bridge_dev || !netif_is_bridge_master(bridge_dev))
+		return 0;
+
+	return ocelot_netdevice_bridge_leave(dev, bridge_dev);
+}
+
+static int ocelot_netdevice_changeupper(struct net_device *dev,
+					struct netdev_notifier_changeupper_info *info)
+{
+	struct netlink_ext_ack *extack;
 	int err = 0;
 
+	extack = netdev_notifier_info_to_extack(&info->info);
+
 	if (netif_is_bridge_master(info->upper_dev)) {
-		if (info->linking) {
-			err = ocelot_netdevice_bridge_join(ocelot, port,
-							   info->upper_dev);
-		} else {
-			err = ocelot_netdevice_bridge_leave(ocelot, port,
-							    info->upper_dev);
-		}
+		if (info->linking)
+			err = ocelot_netdevice_bridge_join(dev, info->upper_dev,
+							   extack);
+		else
+			err = ocelot_netdevice_bridge_leave(dev, info->upper_dev);
 	}
 	if (netif_is_lag_master(info->upper_dev)) {
-		if (info->linking) {
-			err = ocelot_port_lag_join(ocelot, port,
-						   info->upper_dev,
-						   info->upper_info);
-			if (err == -EOPNOTSUPP) {
-				NL_SET_ERR_MSG_MOD(info->info.extack,
-						   "Offloading not supported");
-				err = 0;
-			}
-		} else {
-			ocelot_port_lag_leave(ocelot, port,
-					      info->upper_dev);
-		}
+		if (info->linking)
+			err = ocelot_netdevice_lag_join(dev, info->upper_dev,
+							info->upper_info, extack);
+		else
+			ocelot_netdevice_lag_leave(dev, info->upper_dev);
 	}
 
 	return notifier_from_errno(err);
 }
 
+/* Treat CHANGEUPPER events on an offloaded LAG as individual CHANGEUPPER
+ * events for the lower physical ports of the LAG.
+ * If the LAG upper isn't offloaded, ignore its CHANGEUPPER events.
+ * In case the LAG joined a bridge, notify that we are offloading it and can do
+ * forwarding in hardware towards it.
+ */
 static int
 ocelot_netdevice_lag_changeupper(struct net_device *dev,
 				 struct netdev_notifier_changeupper_info *info)
@@ -1197,6 +1252,12 @@ ocelot_netdevice_lag_changeupper(struct net_device *dev,
 	int err = NOTIFY_DONE;
 
 	netdev_for_each_lower_dev(dev, lower, iter) {
+		struct ocelot_port_private *priv = netdev_priv(lower);
+		struct ocelot_port *ocelot_port = &priv->port;
+
+		if (ocelot_port->bond != dev)
+			return NOTIFY_OK;
+
 		err = ocelot_netdevice_changeupper(lower, info);
 		if (err)
 			return notifier_from_errno(err);
-- 
cgit v1.2.3


From e4bd44e89dcf37345e4851c5e775cb5abf38ab62 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 23 Mar 2021 01:51:52 +0200
Subject: net: ocelot: replay switchdev events when joining bridge

The premise of this change is that the switchdev port attributes and
objects offloaded by ocelot might have been missed when we are joining
an already existing bridge port, such as a bonding interface.

The patch pulls these switchdev attributes and objects from the bridge,
on behalf of the 'bridge port' net device which might be either the
ocelot switch interface, or the bonding upper interface.

The ocelot_net.c belongs strictly to the switchdev ocelot driver, while
ocelot.c is part of a library shared with the DSA felix driver.
The ocelot_port_bridge_leave function (part of the common library) used
to call ocelot_port_vlan_filtering(false), something which is not
necessary for DSA, since the framework deals with that already there.
So we move this function to ocelot_switchdev_unsync, which is specific
to the switchdev driver.

The code movement described above makes ocelot_port_bridge_leave no
longer return an error code, so we change its type from int to void.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/ocelot/felix.c         |   4 +-
 drivers/net/ethernet/mscc/Kconfig      |   3 +-
 drivers/net/ethernet/mscc/ocelot.c     |  18 ++---
 drivers/net/ethernet/mscc/ocelot_net.c | 117 ++++++++++++++++++++++++++++-----
 include/soc/mscc/ocelot.h              |   6 +-
 5 files changed, 113 insertions(+), 35 deletions(-)

diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index 628afb47b579..6b5442be0230 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -719,7 +719,9 @@ static int felix_bridge_join(struct dsa_switch *ds, int port,
 {
 	struct ocelot *ocelot = ds->priv;
 
-	return ocelot_port_bridge_join(ocelot, port, br);
+	ocelot_port_bridge_join(ocelot, port, br);
+
+	return 0;
 }
 
 static void felix_bridge_leave(struct dsa_switch *ds, int port,
diff --git a/drivers/net/ethernet/mscc/Kconfig b/drivers/net/ethernet/mscc/Kconfig
index 05cb040c2677..2d3157e4d081 100644
--- a/drivers/net/ethernet/mscc/Kconfig
+++ b/drivers/net/ethernet/mscc/Kconfig
@@ -11,7 +11,7 @@ config NET_VENDOR_MICROSEMI
 
 if NET_VENDOR_MICROSEMI
 
-# Users should depend on NET_SWITCHDEV, HAS_IOMEM
+# Users should depend on NET_SWITCHDEV, HAS_IOMEM, BRIDGE
 config MSCC_OCELOT_SWITCH_LIB
 	select NET_DEVLINK
 	select REGMAP_MMIO
@@ -24,6 +24,7 @@ config MSCC_OCELOT_SWITCH_LIB
 
 config MSCC_OCELOT_SWITCH
 	tristate "Ocelot switch driver"
+	depends on BRIDGE || BRIDGE=n
 	depends on NET_SWITCHDEV
 	depends on HAS_IOMEM
 	depends on OF_NET
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index ce57929ba3d1..1a36b416fd9b 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -1514,34 +1514,28 @@ int ocelot_port_mdb_del(struct ocelot *ocelot, int port,
 }
 EXPORT_SYMBOL(ocelot_port_mdb_del);
 
-int ocelot_port_bridge_join(struct ocelot *ocelot, int port,
-			    struct net_device *bridge)
+void ocelot_port_bridge_join(struct ocelot *ocelot, int port,
+			     struct net_device *bridge)
 {
 	struct ocelot_port *ocelot_port = ocelot->ports[port];
 
 	ocelot_port->bridge = bridge;
 
-	return 0;
+	ocelot_apply_bridge_fwd_mask(ocelot);
 }
 EXPORT_SYMBOL(ocelot_port_bridge_join);
 
-int ocelot_port_bridge_leave(struct ocelot *ocelot, int port,
-			     struct net_device *bridge)
+void ocelot_port_bridge_leave(struct ocelot *ocelot, int port,
+			      struct net_device *bridge)
 {
 	struct ocelot_port *ocelot_port = ocelot->ports[port];
 	struct ocelot_vlan pvid = {0}, native_vlan = {0};
-	int ret;
 
 	ocelot_port->bridge = NULL;
 
-	ret = ocelot_port_vlan_filtering(ocelot, port, false);
-	if (ret)
-		return ret;
-
 	ocelot_port_set_pvid(ocelot, port, pvid);
 	ocelot_port_set_native_vlan(ocelot, port, native_vlan);
-
-	return 0;
+	ocelot_apply_bridge_fwd_mask(ocelot);
 }
 EXPORT_SYMBOL(ocelot_port_bridge_leave);
 
diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
index d1376f7b34fd..36f32a4d9b0f 100644
--- a/drivers/net/ethernet/mscc/ocelot_net.c
+++ b/drivers/net/ethernet/mscc/ocelot_net.c
@@ -1117,47 +1117,126 @@ static int ocelot_port_obj_del(struct net_device *dev,
 	return ret;
 }
 
+static void ocelot_inherit_brport_flags(struct ocelot *ocelot, int port,
+					struct net_device *brport_dev)
+{
+	struct switchdev_brport_flags flags = {0};
+	int flag;
+
+	flags.mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
+
+	for_each_set_bit(flag, &flags.mask, 32)
+		if (br_port_flag_is_set(brport_dev, BIT(flag)))
+			flags.val |= BIT(flag);
+
+	ocelot_port_bridge_flags(ocelot, port, flags);
+}
+
+static void ocelot_clear_brport_flags(struct ocelot *ocelot, int port)
+{
+	struct switchdev_brport_flags flags;
+
+	flags.mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
+	flags.val = flags.mask & ~BR_LEARNING;
+
+	ocelot_port_bridge_flags(ocelot, port, flags);
+}
+
+static int ocelot_switchdev_sync(struct ocelot *ocelot, int port,
+				 struct net_device *brport_dev,
+				 struct net_device *bridge_dev,
+				 struct netlink_ext_ack *extack)
+{
+	clock_t ageing_time;
+	u8 stp_state;
+	int err;
+
+	ocelot_inherit_brport_flags(ocelot, port, brport_dev);
+
+	stp_state = br_port_get_stp_state(brport_dev);
+	ocelot_bridge_stp_state_set(ocelot, port, stp_state);
+
+	err = ocelot_port_vlan_filtering(ocelot, port,
+					 br_vlan_enabled(bridge_dev));
+	if (err)
+		return err;
+
+	ageing_time = br_get_ageing_time(bridge_dev);
+	ocelot_port_attr_ageing_set(ocelot, port, ageing_time);
+
+	err = br_mdb_replay(bridge_dev, brport_dev,
+			    &ocelot_switchdev_blocking_nb, extack);
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
+	err = br_fdb_replay(bridge_dev, brport_dev, &ocelot_switchdev_nb);
+	if (err)
+		return err;
+
+	err = br_vlan_replay(bridge_dev, brport_dev,
+			     &ocelot_switchdev_blocking_nb, extack);
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
+	return 0;
+}
+
+static int ocelot_switchdev_unsync(struct ocelot *ocelot, int port)
+{
+	int err;
+
+	err = ocelot_port_vlan_filtering(ocelot, port, false);
+	if (err)
+		return err;
+
+	ocelot_clear_brport_flags(ocelot, port);
+
+	ocelot_bridge_stp_state_set(ocelot, port, BR_STATE_FORWARDING);
+
+	return 0;
+}
+
 static int ocelot_netdevice_bridge_join(struct net_device *dev,
+					struct net_device *brport_dev,
 					struct net_device *bridge,
 					struct netlink_ext_ack *extack)
 {
 	struct ocelot_port_private *priv = netdev_priv(dev);
 	struct ocelot_port *ocelot_port = &priv->port;
 	struct ocelot *ocelot = ocelot_port->ocelot;
-	struct switchdev_brport_flags flags;
 	int port = priv->chip_port;
 	int err;
 
-	flags.mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
-	flags.val = flags.mask;
+	ocelot_port_bridge_join(ocelot, port, bridge);
 
-	err = ocelot_port_bridge_join(ocelot, port, bridge);
+	err = ocelot_switchdev_sync(ocelot, port, brport_dev, bridge, extack);
 	if (err)
-		return err;
-
-	ocelot_port_bridge_flags(ocelot, port, flags);
+		goto err_switchdev_sync;
 
 	return 0;
+
+err_switchdev_sync:
+	ocelot_port_bridge_leave(ocelot, port, bridge);
+	return err;
 }
 
 static int ocelot_netdevice_bridge_leave(struct net_device *dev,
+					 struct net_device *brport_dev,
 					 struct net_device *bridge)
 {
 	struct ocelot_port_private *priv = netdev_priv(dev);
 	struct ocelot_port *ocelot_port = &priv->port;
 	struct ocelot *ocelot = ocelot_port->ocelot;
-	struct switchdev_brport_flags flags;
 	int port = priv->chip_port;
 	int err;
 
-	flags.mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
-	flags.val = flags.mask & ~BR_LEARNING;
+	err = ocelot_switchdev_unsync(ocelot, port);
+	if (err)
+		return err;
 
-	err = ocelot_port_bridge_leave(ocelot, port, bridge);
+	ocelot_port_bridge_leave(ocelot, port, bridge);
 
-	ocelot_port_bridge_flags(ocelot, port, flags);
-
-	return err;
+	return 0;
 }
 
 static int ocelot_netdevice_lag_join(struct net_device *dev,
@@ -1182,7 +1261,7 @@ static int ocelot_netdevice_lag_join(struct net_device *dev,
 	if (!bridge_dev || !netif_is_bridge_master(bridge_dev))
 		return 0;
 
-	err = ocelot_netdevice_bridge_join(dev, bridge_dev, extack);
+	err = ocelot_netdevice_bridge_join(dev, bond, bridge_dev, extack);
 	if (err)
 		goto err_bridge_join;
 
@@ -1208,7 +1287,7 @@ static int ocelot_netdevice_lag_leave(struct net_device *dev,
 	if (!bridge_dev || !netif_is_bridge_master(bridge_dev))
 		return 0;
 
-	return ocelot_netdevice_bridge_leave(dev, bridge_dev);
+	return ocelot_netdevice_bridge_leave(dev, bond, bridge_dev);
 }
 
 static int ocelot_netdevice_changeupper(struct net_device *dev,
@@ -1221,10 +1300,12 @@ static int ocelot_netdevice_changeupper(struct net_device *dev,
 
 	if (netif_is_bridge_master(info->upper_dev)) {
 		if (info->linking)
-			err = ocelot_netdevice_bridge_join(dev, info->upper_dev,
+			err = ocelot_netdevice_bridge_join(dev, dev,
+							   info->upper_dev,
 							   extack);
 		else
-			err = ocelot_netdevice_bridge_leave(dev, info->upper_dev);
+			err = ocelot_netdevice_bridge_leave(dev, dev,
+							    info->upper_dev);
 	}
 	if (netif_is_lag_master(info->upper_dev)) {
 		if (info->linking)
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index ce7e5c1bd90d..68cdc7ceaf4d 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -803,10 +803,10 @@ int ocelot_port_pre_bridge_flags(struct ocelot *ocelot, int port,
 				 struct switchdev_brport_flags val);
 void ocelot_port_bridge_flags(struct ocelot *ocelot, int port,
 			      struct switchdev_brport_flags val);
-int ocelot_port_bridge_join(struct ocelot *ocelot, int port,
-			    struct net_device *bridge);
-int ocelot_port_bridge_leave(struct ocelot *ocelot, int port,
+void ocelot_port_bridge_join(struct ocelot *ocelot, int port,
 			     struct net_device *bridge);
+void ocelot_port_bridge_leave(struct ocelot *ocelot, int port,
+			      struct net_device *bridge);
 int ocelot_fdb_dump(struct ocelot *ocelot, int port,
 		    dsa_fdb_dump_cb_t *cb, void *data);
 int ocelot_fdb_add(struct ocelot *ocelot, int port,
-- 
cgit v1.2.3


From 437c78f976f5b39fc4b2a1c65903a229f55912dd Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 22 Mar 2021 20:05:05 -0500
Subject: net: ipa: avoid 64-bit modulus

It is possible for a 32 bit x86 build to use a 64 bit DMA address.

There are two remaining spots where the IPA driver does a modulo
operation to check alignment of a DMA address, and under certain
conditions this can lead to a build error on i386 (at least).

The alignment checks we're doing are for power-of-2 values, and this
means the lower 32 bits of the DMA address can be used.  This ensures
both operands to the modulo operator are 32 bits wide.

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Alex Elder <elder@linaro.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/gsi.c       | 11 +++++++----
 drivers/net/ipa/ipa_table.c |  9 ++++++---
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
index 7f3e338ca7a7..b6355827bf90 100644
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -1436,15 +1436,18 @@ static void gsi_evt_ring_rx_update(struct gsi_evt_ring *evt_ring, u32 index)
 /* Initialize a ring, including allocating DMA memory for its entries */
 static int gsi_ring_alloc(struct gsi *gsi, struct gsi_ring *ring, u32 count)
 {
-	size_t size = count * GSI_RING_ELEMENT_SIZE;
+	u32 size = count * GSI_RING_ELEMENT_SIZE;
 	struct device *dev = gsi->dev;
 	dma_addr_t addr;
 
-	/* Hardware requires a 2^n ring size, with alignment equal to size */
+	/* Hardware requires a 2^n ring size, with alignment equal to size.
+	 * The size is a power of 2, so we can check alignment using just
+	 * the bottom 32 bits for a DMA address of any size.
+	 */
 	ring->virt = dma_alloc_coherent(dev, size, &addr, GFP_KERNEL);
-	if (ring->virt && addr % size) {
+	if (ring->virt && lower_32_bits(addr) % size) {
 		dma_free_coherent(dev, size, ring->virt, addr);
-		dev_err(dev, "unable to alloc 0x%zx-aligned ring buffer\n",
+		dev_err(dev, "unable to alloc 0x%x-aligned ring buffer\n",
 			size);
 		return -EINVAL;	/* Not a good error value, but distinct */
 	} else if (!ring->virt) {
diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c
index 988f2c2886b9..4236a50ff03a 100644
--- a/drivers/net/ipa/ipa_table.c
+++ b/drivers/net/ipa/ipa_table.c
@@ -658,10 +658,13 @@ int ipa_table_init(struct ipa *ipa)
 		return -ENOMEM;
 
 	/* We put the "zero rule" at the base of our table area.  The IPA
-	 * hardware requires rules to be aligned on a 128-byte boundary.
-	 * Make sure the allocation satisfies this constraint.
+	 * hardware requires route and filter table rules to be aligned
+	 * on a 128-byte boundary.  As long as the alignment constraint
+	 * is a power of 2, we can check alignment using just the bottom
+	 * 32 bits for a DMA address of any size.
 	 */
-	if (addr % IPA_TABLE_ALIGN) {
+	BUILD_BUG_ON(!is_power_of_2(IPA_TABLE_ALIGN));
+	if (lower_32_bits(addr) % IPA_TABLE_ALIGN) {
 		dev_err(dev, "table address %pad not %u-byte aligned\n",
 			&addr, IPA_TABLE_ALIGN);
 		dma_free_coherent(dev, size, virt, addr);
-- 
cgit v1.2.3


From ea6c8635d5d51da633bfc333011d4aaeb60d3142 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Tue, 23 Mar 2021 10:00:12 +0800
Subject: net: ethernet: indir_table.h is included twice

indir_table.h has been included at line 41, so remove
the duplicate one at line 43.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 8e7a702e23a3..ab2694835246 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -40,7 +40,6 @@
 #include "eswitch.h"
 #include "esw/indir_table.h"
 #include "esw/acl/ofld.h"
-#include "esw/indir_table.h"
 #include "rdma.h"
 #include "en.h"
 #include "fs_core.h"
-- 
cgit v1.2.3


From 4c94fe88cde4bb5c8e1baa01106c4e6db1c75738 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Tue, 23 Mar 2021 10:05:48 +0800
Subject: net: ethernet: Remove duplicate include of vhca_event.h

vhca_event.h has been included at line 4, so remove the
duplicate one at line 8.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
index 58b6be0b03d7..3c8a00dd573a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
@@ -5,7 +5,6 @@
 #include "priv.h"
 #include "sf.h"
 #include "mlx5_ifc_vhca_event.h"
-#include "vhca_event.h"
 #include "ecpf.h"
 
 struct mlx5_sf_hw {
-- 
cgit v1.2.3


From 5aa3afe107d9099fc0dea2acf82c3e3c8f0f20e2 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Tue, 23 Mar 2021 07:49:23 +0100
Subject: net: make unregister netdev warning timeout configurable

netdev_wait_allrefs() issues a warning if refcount does not drop to 0
after 10 seconds. While 10 second wait generally should not happen
under normal workload in normal environment, it seems to fire falsely
very often during fuzzing and/or in qemu emulation (~10x slower).
At least it's not possible to understand if it's really a false
positive or not. Automated testing generally bumps all timeouts
to very high values to avoid flake failures.
Add net.core.netdev_unregister_timeout_secs sysctl to make
the timeout configurable for automated testing systems.
Lowering the timeout may also be useful for e.g. manual bisection.
The default value matches the current behavior.

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=211877
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/admin-guide/sysctl/net.rst | 11 +++++++++++
 include/linux/netdevice.h                |  1 +
 net/core/dev.c                           |  6 +++++-
 net/core/sysctl_net_core.c               | 10 ++++++++++
 4 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index f2ab8a5b6a4b..2090bfc69aa5 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -311,6 +311,17 @@ permit to distribute the load on several cpus.
 If set to 1 (default), timestamps are sampled as soon as possible, before
 queueing.
 
+netdev_unregister_timeout_secs
+------------------------------
+
+Unregister network device timeout in seconds.
+This option controls the timeout (in seconds) used to issue a warning while
+waiting for a network device refcount to drop to 0 during device
+unregistration. A lower value may be useful during bisection to detect
+a leaked reference faster. A larger value may be useful to prevent false
+warnings on slow/loaded systems.
+Default value is 10, minimum 0, maximum 3600.
+
 optmem_max
 ----------
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7005ad80e8d1..5fa66db0cb5d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4661,6 +4661,7 @@ void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s);
 
 extern int		netdev_max_backlog;
 extern int		netdev_tstamp_prequeue;
+extern int		netdev_unregister_timeout_secs;
 extern int		weight_p;
 extern int		dev_weight_rx_bias;
 extern int		dev_weight_tx_bias;
diff --git a/net/core/dev.c b/net/core/dev.c
index c9a496f5e687..515309573cb8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10322,6 +10322,8 @@ int netdev_refcnt_read(const struct net_device *dev)
 }
 EXPORT_SYMBOL(netdev_refcnt_read);
 
+int netdev_unregister_timeout_secs __read_mostly = 10;
+
 #define WAIT_REFS_MIN_MSECS 1
 #define WAIT_REFS_MAX_MSECS 250
 /**
@@ -10383,7 +10385,9 @@ static void netdev_wait_allrefs(struct net_device *dev)
 
 		refcnt = netdev_refcnt_read(dev);
 
-		if (refcnt != 1 && time_after(jiffies, warning_time + 10 * HZ)) {
+		if (refcnt &&
+		    time_after(jiffies, warning_time +
+			       netdev_unregister_timeout_secs * HZ)) {
 			pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
 				 dev->name, refcnt);
 			warning_time = jiffies;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 4567de519603..d84c8a1b280e 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -24,6 +24,7 @@
 
 static int two = 2;
 static int three = 3;
+static int int_3600 = 3600;
 static int min_sndbuf = SOCK_MIN_SNDBUF;
 static int min_rcvbuf = SOCK_MIN_RCVBUF;
 static int max_skb_frags = MAX_SKB_FRAGS;
@@ -570,6 +571,15 @@ static struct ctl_table net_core_table[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ONE,
 	},
+	{
+		.procname	= "netdev_unregister_timeout_secs",
+		.data		= &netdev_unregister_timeout_secs,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &int_3600,
+	},
 	{ }
 };
 
-- 
cgit v1.2.3


From 0f90d320b4f191ad15604495e06a1636027c96ad Mon Sep 17 00:00:00 2001
From: Meng Yu <yumeng18@huawei.com>
Date: Wed, 24 Mar 2021 10:03:25 +0800
Subject: Bluetooth: Remove trailing semicolon in macros

Macros should not use a trailing semicolon.

Signed-off-by: Meng Yu <yumeng18@huawei.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/sco.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 22a110f37abc..3bd41563f118 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -51,8 +51,8 @@ struct sco_conn {
 	unsigned int    mtu;
 };
 
-#define sco_conn_lock(c)	spin_lock(&c->lock);
-#define sco_conn_unlock(c)	spin_unlock(&c->lock);
+#define sco_conn_lock(c)	spin_lock(&c->lock)
+#define sco_conn_unlock(c)	spin_unlock(&c->lock)
 
 static void sco_sock_close(struct sock *sk);
 static void sco_sock_kill(struct sock *sk);
-- 
cgit v1.2.3


From e2c69f3a5b4edfbcade2c38862c1839fc371c5d5 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 22 Mar 2021 22:51:51 +0100
Subject: bpf: Avoid old-style declaration warnings

gcc -Wextra wants type modifiers in the normal order:

kernel/bpf/bpf_lsm.c:70:1: error: 'static' is not at beginning of declaration [-Werror=old-style-declaration]
   70 | const static struct bpf_func_proto bpf_bprm_opts_set_proto = {
      | ^~~~~
kernel/bpf/bpf_lsm.c:91:1: error: 'static' is not at beginning of declaration [-Werror=old-style-declaration]
   91 | const static struct bpf_func_proto bpf_ima_inode_hash_proto = {
      | ^~~~~

Fixes: 3f6719c7b62f ("bpf: Add bpf_bprm_opts_set helper")
Fixes: 27672f0d280a ("bpf: Add a BPF helper for getting the IMA hash of an inode")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: KP Singh <kpsingh@kernel.org>
Link: https://lore.kernel.org/bpf/20210322215201.1097281-1-arnd@kernel.org
---
 kernel/bpf/bpf_lsm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 9829f381b51c..f211506218ea 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -67,7 +67,7 @@ BPF_CALL_2(bpf_bprm_opts_set, struct linux_binprm *, bprm, u64, flags)
 
 BTF_ID_LIST_SINGLE(bpf_bprm_opts_set_btf_ids, struct, linux_binprm)
 
-const static struct bpf_func_proto bpf_bprm_opts_set_proto = {
+static const struct bpf_func_proto bpf_bprm_opts_set_proto = {
 	.func		= bpf_bprm_opts_set,
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
@@ -88,7 +88,7 @@ static bool bpf_ima_inode_hash_allowed(const struct bpf_prog *prog)
 
 BTF_ID_LIST_SINGLE(bpf_ima_inode_hash_btf_ids, struct, inode)
 
-const static struct bpf_func_proto bpf_ima_inode_hash_proto = {
+static const struct bpf_func_proto bpf_ima_inode_hash_proto = {
 	.func		= bpf_ima_inode_hash,
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
-- 
cgit v1.2.3


From e7eae3ad191e0239d837e9824bc327d613e60e64 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Tue, 23 Mar 2021 15:41:02 +0800
Subject: net: hns: remove unused get_autoneg()

Since get_autoneg() in struct hnae_ae_ops is unused, so remove it.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hnae.h         |  3 ---
 drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c | 12 ------------
 2 files changed, 15 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h
index 6ab9458302e1..b1dd93336c23 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.h
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.h
@@ -416,8 +416,6 @@ enum hnae_media_type {
  *   get tx and rx of pause frame use
  * set_autoneg()
  *   set auto autonegotiation of pause frame use
- * get_autoneg()
- *   get auto autonegotiation of pause frame use
  * set_pauseparam()
  *   set tx and rx of pause frame use
  * get_coalesce_usecs()
@@ -488,7 +486,6 @@ struct hnae_ae_ops {
 	void (*get_pauseparam)(struct hnae_handle *handle,
 			       u32 *auto_neg, u32 *rx_en, u32 *tx_en);
 	int (*set_autoneg)(struct hnae_handle *handle, u8 enable);
-	int (*get_autoneg)(struct hnae_handle *handle);
 	int (*set_pauseparam)(struct hnae_handle *handle,
 			      u32 auto_neg, u32 rx_en, u32 tx_en);
 	void (*get_coalesce_usecs)(struct hnae_handle *handle,
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index b98244f75ab9..72b2a5f1e05b 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
@@ -502,17 +502,6 @@ static void hns_ae_set_promisc_mode(struct hnae_handle *handle, u32 en)
 	hns_mac_set_promisc(mac_cb, (u8)!!en);
 }
 
-static int hns_ae_get_autoneg(struct hnae_handle *handle)
-{
-	u32     auto_neg;
-
-	assert(handle);
-
-	hns_mac_get_autoneg(hns_get_mac_cb(handle), &auto_neg);
-
-	return auto_neg;
-}
-
 static int hns_ae_set_pauseparam(struct hnae_handle *handle,
 				 u32 autoneg, u32 rx_en, u32 tx_en)
 {
@@ -966,7 +955,6 @@ static struct hnae_ae_ops hns_dsaf_ops = {
 	.get_ring_bdnum_limit = hns_ae_get_ring_bdnum_limit,
 	.get_pauseparam = hns_ae_get_pauseparam,
 	.set_autoneg = hns_ae_set_autoneg,
-	.get_autoneg = hns_ae_get_autoneg,
 	.set_pauseparam = hns_ae_set_pauseparam,
 	.get_coalesce_usecs = hns_ae_get_coalesce_usecs,
 	.get_max_coalesced_frames = hns_ae_get_max_coalesced_frames,
-- 
cgit v1.2.3


From 72b06363f12479e11e547484a5220017c6124c31 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Tue, 23 Mar 2021 15:41:03 +0800
Subject: net: hns: remove unused set_autoneg()

Since set_autoneg() in struct hnae_ae_ops is unused, so remove it.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hnae.h         | 3 ---
 drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c | 8 --------
 2 files changed, 11 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h
index b1dd93336c23..2b7db1c22321 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.h
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.h
@@ -414,8 +414,6 @@ enum hnae_media_type {
  *   get ring bd number limit
  * get_pauseparam()
  *   get tx and rx of pause frame use
- * set_autoneg()
- *   set auto autonegotiation of pause frame use
  * set_pauseparam()
  *   set tx and rx of pause frame use
  * get_coalesce_usecs()
@@ -485,7 +483,6 @@ struct hnae_ae_ops {
 				     u32 *uplimit);
 	void (*get_pauseparam)(struct hnae_handle *handle,
 			       u32 *auto_neg, u32 *rx_en, u32 *tx_en);
-	int (*set_autoneg)(struct hnae_handle *handle, u8 enable);
 	int (*set_pauseparam)(struct hnae_handle *handle,
 			      u32 auto_neg, u32 rx_en, u32 tx_en);
 	void (*get_coalesce_usecs)(struct hnae_handle *handle,
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index 72b2a5f1e05b..78217ff8ecd9 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
@@ -487,13 +487,6 @@ static void hns_ae_get_pauseparam(struct hnae_handle *handle,
 		hns_dsaf_get_rx_mac_pause_en(dsaf_dev, mac_cb->mac_id, rx_en);
 }
 
-static int hns_ae_set_autoneg(struct hnae_handle *handle, u8 enable)
-{
-	assert(handle);
-
-	return hns_mac_set_autoneg(hns_get_mac_cb(handle), enable);
-}
-
 static void hns_ae_set_promisc_mode(struct hnae_handle *handle, u32 en)
 {
 	struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle);
@@ -954,7 +947,6 @@ static struct hnae_ae_ops hns_dsaf_ops = {
 	.set_loopback = hns_ae_config_loopback,
 	.get_ring_bdnum_limit = hns_ae_get_ring_bdnum_limit,
 	.get_pauseparam = hns_ae_get_pauseparam,
-	.set_autoneg = hns_ae_set_autoneg,
 	.set_pauseparam = hns_ae_set_pauseparam,
 	.get_coalesce_usecs = hns_ae_get_coalesce_usecs,
 	.get_max_coalesced_frames = hns_ae_get_max_coalesced_frames,
-- 
cgit v1.2.3


From 5bc72849240d9774039b8900f3fe9482fed536a0 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Tue, 23 Mar 2021 15:41:04 +0800
Subject: net: hns: remove unused set_rx_ignore_pause_frames()

Since set_rx_ignore_pause_frames() in struct mac_driver
is unused, so remove it.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c  |  9 ---------
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h   |  2 --
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c | 15 ---------------
 3 files changed, 26 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
index 04878b145626..8907c0881c92 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
@@ -130,14 +130,6 @@ static void hns_gmac_get_tx_auto_pause_frames(void *mac_drv, u16 *newval)
 				     GMAC_FC_TX_TIMER_M, GMAC_FC_TX_TIMER_S);
 }
 
-static void hns_gmac_set_rx_auto_pause_frames(void *mac_drv, u32 newval)
-{
-	struct mac_driver *drv = (struct mac_driver *)mac_drv;
-
-	dsaf_set_dev_bit(drv, GMAC_PAUSE_EN_REG,
-			 GMAC_PAUSE_EN_RX_FDFC_B, !!newval);
-}
-
 static void hns_gmac_config_max_frame_length(void *mac_drv, u16 newval)
 {
 	struct mac_driver *drv = (struct mac_driver *)mac_drv;
@@ -739,7 +731,6 @@ void *hns_gmac_config(struct hns_mac_cb *mac_cb, struct mac_params *mac_param)
 	mac_drv->config_loopback = hns_gmac_config_loopback;
 	mac_drv->config_pad_and_crc = hns_gmac_config_pad_and_crc;
 	mac_drv->config_half_duplex = hns_gmac_set_duplex_type;
-	mac_drv->set_rx_ignore_pause_frames = hns_gmac_set_rx_auto_pause_frames;
 	mac_drv->get_info = hns_gmac_get_info;
 	mac_drv->autoneg_stat = hns_gmac_autoneg_stat;
 	mac_drv->get_pause_enable = hns_gmac_get_pausefrm_cfg;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
index 3278bf471ddf..9771ba8f5be8 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
@@ -368,8 +368,6 @@ struct mac_driver {
 	void (*config_half_duplex)(void *mac_drv, u8 newval);
 	/*config tx pause time,if pause_time is zero,disable tx pause enable*/
 	void (*set_tx_auto_pause_frames)(void *mac_drv, u16 pause_time);
-	/*config rx pause enable*/
-	void (*set_rx_ignore_pause_frames)(void *mac_drv, u32 enable);
 	/* config rx mode for promiscuous*/
 	void (*set_promiscuous)(void *mac_drv, u8 enable);
 	void (*mac_pausefrm_cfg)(void *mac_drv, u32 rx_en, u32 tx_en);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
index f5145451ba6f..8efc966796c3 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
@@ -266,19 +266,6 @@ static void hns_xgmac_set_pausefrm_mac_addr(void *mac_drv, char *mac_addr)
 	dsaf_write_dev(drv, XGMAC_MAC_PAUSE_LOCAL_MAC_H_REG, high_val);
 }
 
-/**
- *hns_xgmac_set_rx_ignore_pause_frames - set rx pause param about xgmac
- *@mac_drv: mac driver
- *@enable:enable rx pause param
- */
-static void hns_xgmac_set_rx_ignore_pause_frames(void *mac_drv, u32 enable)
-{
-	struct mac_driver *drv = (struct mac_driver *)mac_drv;
-
-	dsaf_set_dev_bit(drv, XGMAC_MAC_PAUSE_CTRL_REG,
-			 XGMAC_PAUSE_CTL_RX_B, !!enable);
-}
-
 /**
  *hns_xgmac_set_tx_auto_pause_frames - set tx pause param about xgmac
  *@mac_drv: mac driver
@@ -813,8 +800,6 @@ void *hns_xgmac_config(struct hns_mac_cb *mac_cb, struct mac_params *mac_param)
 	mac_drv->config_loopback = NULL;
 	mac_drv->config_pad_and_crc = hns_xgmac_config_pad_and_crc;
 	mac_drv->config_half_duplex = NULL;
-	mac_drv->set_rx_ignore_pause_frames =
-		hns_xgmac_set_rx_ignore_pause_frames;
 	mac_drv->mac_free = hns_xgmac_free;
 	mac_drv->adjust_link = NULL;
 	mac_drv->set_tx_auto_pause_frames = hns_xgmac_set_tx_auto_pause_frames;
-- 
cgit v1.2.3


From 484da1f4f7c3f7f53f1833dad2aa00110cf97e5b Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Tue, 23 Mar 2021 15:41:05 +0800
Subject: net: hns: remove unused config_half_duplex()

Since config_half_duplex() in struct mac_driver is unused,
so remove it.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c  | 9 ---------
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h   | 2 --
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c | 1 -
 3 files changed, 12 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
index 8907c0881c92..f387a859a201 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
@@ -171,14 +171,6 @@ static void hns_gmac_tx_loop_pkt_dis(void *mac_drv)
 	dsaf_write_dev(drv, GMAC_TX_LOOP_PKT_PRI_REG, tx_loop_pkt_pri);
 }
 
-static void hns_gmac_set_duplex_type(void *mac_drv, u8 newval)
-{
-	struct mac_driver *drv = (struct mac_driver *)mac_drv;
-
-	dsaf_set_dev_bit(drv, GMAC_DUPLEX_TYPE_REG,
-			 GMAC_DUPLEX_TYPE_B, !!newval);
-}
-
 static void hns_gmac_get_duplex_type(void *mac_drv,
 				     enum hns_gmac_duplex_mdoe *duplex_mode)
 {
@@ -730,7 +722,6 @@ void *hns_gmac_config(struct hns_mac_cb *mac_cb, struct mac_params *mac_param)
 	mac_drv->set_an_mode = hns_gmac_config_an_mode;
 	mac_drv->config_loopback = hns_gmac_config_loopback;
 	mac_drv->config_pad_and_crc = hns_gmac_config_pad_and_crc;
-	mac_drv->config_half_duplex = hns_gmac_set_duplex_type;
 	mac_drv->get_info = hns_gmac_get_info;
 	mac_drv->autoneg_stat = hns_gmac_autoneg_stat;
 	mac_drv->get_pause_enable = hns_gmac_get_pausefrm_cfg;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
index 9771ba8f5be8..8943ffab4418 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
@@ -364,8 +364,6 @@ struct mac_driver {
 	void (*config_max_frame_length)(void *mac_drv, u16 newval);
 	/*config PAD and CRC enable */
 	void (*config_pad_and_crc)(void *mac_drv, u8 newval);
-	/* config duplex mode*/
-	void (*config_half_duplex)(void *mac_drv, u8 newval);
 	/*config tx pause time,if pause_time is zero,disable tx pause enable*/
 	void (*set_tx_auto_pause_frames)(void *mac_drv, u16 pause_time);
 	/* config rx mode for promiscuous*/
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
index 8efc966796c3..1c9159ad5963 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
@@ -799,7 +799,6 @@ void *hns_xgmac_config(struct hns_mac_cb *mac_cb, struct mac_params *mac_param)
 	mac_drv->set_an_mode = NULL;
 	mac_drv->config_loopback = NULL;
 	mac_drv->config_pad_and_crc = hns_xgmac_config_pad_and_crc;
-	mac_drv->config_half_duplex = NULL;
 	mac_drv->mac_free = hns_xgmac_free;
 	mac_drv->adjust_link = NULL;
 	mac_drv->set_tx_auto_pause_frames = hns_xgmac_set_tx_auto_pause_frames;
-- 
cgit v1.2.3


From cf7fc356676847c0ef29b3bafd620d8cecfed345 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Tue, 23 Mar 2021 15:41:06 +0800
Subject: net: hns: remove unused NIC_LB_TEST_RX_PKG_ERR

NIC_LB_TEST_RX_PKG_ERR is not used and can be removed.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index 1af664779d33..177ce067fcec 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -480,7 +480,7 @@ static int __lb_run_test(struct net_device *ndev,
 #define NIC_LB_TEST_NO_MEM_ERR 1
 #define NIC_LB_TEST_TX_CNT_ERR 2
 #define NIC_LB_TEST_RX_CNT_ERR 3
-#define NIC_LB_TEST_RX_PKG_ERR 4
+
 	struct hns_nic_priv *priv = netdev_priv(ndev);
 	struct hnae_handle *h = priv->ae_handle;
 	int i, j, lc, good_cnt, ret_val = 0;
-- 
cgit v1.2.3


From dcc683b81fc466c82830369738cab649a37ef3c2 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Tue, 23 Mar 2021 15:41:07 +0800
Subject: net: hns: remove unused HNS_LED_PC_REG

HNS_LED_PC_REG is not used and can be removed.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index 177ce067fcec..da48c05435ea 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -17,7 +17,6 @@
 #define HNS_PHY_CSC_REG		16	/* Copper Specific Control Register */
 #define HNS_PHY_CSS_REG		17	/* Copper Specific Status Register */
 #define HNS_LED_FC_REG		16	/* LED Function Control Reg. */
-#define HNS_LED_PC_REG		17	/* LED Polarity Control Reg. */
 
 #define HNS_LED_FORCE_ON	9
 #define HNS_LED_FORCE_OFF	8
-- 
cgit v1.2.3


From 4a4ec57c0656ad4063775db11ace6d498171364a Mon Sep 17 00:00:00 2001
From: Yonglong Liu <liuyonglong@huawei.com>
Date: Tue, 23 Mar 2021 15:41:08 +0800
Subject: net: hns: remove unnecessary !! operation in
 hns_mac_config_sds_loopback_acpi()

The !! operation of variable en in hns_mac_config_sds_loopback_acpi()
is redundant, so remove it.

Signed-off-by: Yonglong Liu <liuyonglong@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index 173d6966c1a3..325e81d30cfd 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -686,7 +686,7 @@ hns_mac_config_sds_loopback_acpi(struct hns_mac_cb *mac_cb, bool en)
 	obj_args[0].integer.type = ACPI_TYPE_INTEGER;
 	obj_args[0].integer.value = mac_cb->mac_id;
 	obj_args[1].integer.type = ACPI_TYPE_INTEGER;
-	obj_args[1].integer.value = !!en;
+	obj_args[1].integer.value = en;
 
 	argv4.type = ACPI_TYPE_PACKAGE;
 	argv4.package.count = 2;
-- 
cgit v1.2.3


From 7f8bcd915724f9485475b515c554b840278526bc Mon Sep 17 00:00:00 2001
From: Yonglong Liu <liuyonglong@huawei.com>
Date: Tue, 23 Mar 2021 15:41:09 +0800
Subject: net: hns: remove redundant variable initialization

There are some variables in HNS driver will not being referenced
before assigned, so there is no need to init them.

Signed-off-by: Yonglong Liu <liuyonglong@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c   |  2 +-
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c   |  2 +-
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c  | 16 ++++++++--------
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c   |  4 ++--
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c |  2 +-
 drivers/net/ethernet/hisilicon/hns/hns_enet.c       |  4 ++--
 drivers/net/ethernet/hisilicon/hns_mdio.c           |  4 ++--
 7 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index 78217ff8ecd9..c615fbf9094e 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
@@ -630,7 +630,7 @@ static void hns_ae_update_stats(struct hnae_handle *handle,
 	struct hnae_vf_cb *vf_cb = hns_ae_get_vf_cb(handle);
 	u64 tx_bytes = 0, rx_bytes = 0, tx_packets = 0, rx_packets = 0;
 	u64 rx_errors = 0, tx_errors = 0, tx_dropped = 0;
-	u64 rx_missed_errors = 0;
+	u64 rx_missed_errors;
 
 	dsaf_dev = hns_ae_get_dsaf_dev(handle->dev);
 	if (!dsaf_dev)
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 4a448138b4ec..f4cf569a2599 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -1202,7 +1202,7 @@ void hns_mac_get_regs(struct hns_mac_cb *mac_cb, void *data)
 
 void hns_set_led_opt(struct hns_mac_cb *mac_cb)
 {
-	int nic_data = 0;
+	int nic_data;
 	int txpkts, rxpkts;
 
 	txpkts = mac_cb->txpkt_for_led - mac_cb->hw_stats.tx_good_pkts;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index 87d3db4666df..c2a60612f503 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -1613,7 +1613,7 @@ int hns_dsaf_set_mac_uc_entry(
 	struct dsaf_device *dsaf_dev,
 	struct dsaf_drv_mac_single_dest_entry *mac_entry)
 {
-	u16 entry_index = DSAF_INVALID_ENTRY_IDX;
+	u16 entry_index;
 	struct dsaf_drv_tbl_tcam_key mac_key;
 	struct dsaf_tbl_tcam_ucast_cfg mac_data;
 	struct dsaf_drv_priv *priv =
@@ -1679,7 +1679,7 @@ int hns_dsaf_rm_mac_addr(
 	struct dsaf_device *dsaf_dev,
 	struct dsaf_drv_mac_single_dest_entry *mac_entry)
 {
-	u16 entry_index = DSAF_INVALID_ENTRY_IDX;
+	u16 entry_index;
 	struct dsaf_tbl_tcam_ucast_cfg mac_data;
 	struct dsaf_drv_tbl_tcam_key mac_key;
 
@@ -1751,7 +1751,7 @@ static void hns_dsaf_mc_mask_bit_clear(char *dst, const char *src)
 int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev,
 			     struct dsaf_drv_mac_single_dest_entry *mac_entry)
 {
-	u16 entry_index = DSAF_INVALID_ENTRY_IDX;
+	u16 entry_index;
 	struct dsaf_drv_tbl_tcam_key mac_key;
 	struct dsaf_drv_tbl_tcam_key mask_key;
 	struct dsaf_tbl_tcam_data *pmask_key = NULL;
@@ -1861,7 +1861,7 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev,
 int hns_dsaf_del_mac_entry(struct dsaf_device *dsaf_dev, u16 vlan_id,
 			   u8 in_port_num, u8 *addr)
 {
-	u16 entry_index = DSAF_INVALID_ENTRY_IDX;
+	u16 entry_index;
 	struct dsaf_drv_tbl_tcam_key mac_key;
 	struct dsaf_drv_priv *priv =
 	    (struct dsaf_drv_priv *)hns_dsaf_dev_priv(dsaf_dev);
@@ -1910,7 +1910,7 @@ int hns_dsaf_del_mac_entry(struct dsaf_device *dsaf_dev, u16 vlan_id,
 int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev,
 			     struct dsaf_drv_mac_single_dest_entry *mac_entry)
 {
-	u16 entry_index = DSAF_INVALID_ENTRY_IDX;
+	u16 entry_index;
 	struct dsaf_drv_tbl_tcam_key mac_key;
 	struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev);
 	struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl;
@@ -2264,7 +2264,7 @@ void hns_dsaf_update_stats(struct dsaf_device *dsaf_dev, u32 node_num)
  */
 void hns_dsaf_get_regs(struct dsaf_device *ddev, u32 port, void *data)
 {
-	u32 i = 0;
+	u32 i;
 	u32 j;
 	u32 *p = data;
 	u32 reg_tmp;
@@ -2768,7 +2768,7 @@ static void set_promisc_tcam_enable(struct dsaf_device *dsaf_dev, u32 port)
 	struct dsaf_drv_mac_single_dest_entry mask_entry;
 	struct dsaf_drv_tbl_tcam_key temp_key, mask_key;
 	struct dsaf_drv_soft_mac_tbl *soft_mac_entry;
-	u16 entry_index = DSAF_INVALID_ENTRY_IDX;
+	u16 entry_index;
 	struct dsaf_drv_tbl_tcam_key mac_key;
 	struct hns_mac_cb *mac_cb;
 	u8 addr[ETH_ALEN] = {0};
@@ -2870,7 +2870,7 @@ static void set_promisc_tcam_disable(struct dsaf_device *dsaf_dev, u32 port)
 	struct dsaf_tbl_tcam_data tbl_tcam_data_uc = {0, 0};
 	struct dsaf_tbl_tcam_data tbl_tcam_mask = {0, 0};
 	struct dsaf_drv_soft_mac_tbl *soft_mac_entry;
-	u16 entry_index = DSAF_INVALID_ENTRY_IDX;
+	u16 entry_index;
 	struct dsaf_drv_tbl_tcam_key mac_key;
 	u8 addr[ETH_ALEN] = {0};
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
index 37c8effa421c..5d5dc6942232 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
@@ -974,7 +974,7 @@ void hns_rcb_get_common_regs(struct rcb_common_cb *rcb_com, void *data)
 	bool is_dbg = HNS_DSAF_IS_DEBUG(rcb_com->dsaf_dev);
 	u32 reg_tmp;
 	u32 reg_num_tmp;
-	u32 i = 0;
+	u32 i;
 
 	/*rcb common registers */
 	regs[0] = dsaf_read_dev(rcb_com, RCB_COM_CFG_ENDIAN_REG);
@@ -1045,7 +1045,7 @@ void hns_rcb_get_ring_regs(struct hnae_queue *queue, void *data)
 	u32 *regs = data;
 	struct ring_pair_cb *ring_pair
 		= container_of(queue, struct ring_pair_cb, q);
-	u32 i = 0;
+	u32 i;
 
 	/*rcb ring registers */
 	regs[0] = dsaf_read_dev(queue, RCB_RING_RX_RING_BASEADDR_L_REG);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
index 1c9159ad5963..be52acd448f9 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
@@ -482,7 +482,7 @@ static void hns_xgmac_get_link_status(void *mac_drv, u32 *link_stat)
  */
 static void hns_xgmac_get_regs(void *mac_drv, void *data)
 {
-	u32 i = 0;
+	u32 i;
 	struct mac_driver *drv = (struct mac_driver *)mac_drv;
 	u32 *regs = data;
 	u64 qtmp;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index c66a7a51198e..7a1a0a90a1a3 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -872,7 +872,7 @@ out:
 static bool hns_nic_rx_fini_pro(struct hns_nic_ring_data *ring_data)
 {
 	struct hnae_ring *ring = ring_data->ring;
-	int num = 0;
+	int num;
 	bool rx_stopped;
 
 	hns_update_rx_rate(ring);
@@ -1881,7 +1881,7 @@ static void hns_nic_set_rx_mode(struct net_device *ndev)
 static void hns_nic_get_stats64(struct net_device *ndev,
 				struct rtnl_link_stats64 *stats)
 {
-	int idx = 0;
+	int idx;
 	u64 tx_bytes = 0;
 	u64 rx_bytes = 0;
 	u64 tx_pkts = 0;
diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c
index 883d0d7c6858..3e54017a2a5b 100644
--- a/drivers/net/ethernet/hisilicon/hns_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns_mdio.c
@@ -279,7 +279,7 @@ static int hns_mdio_write(struct mii_bus *bus,
 static int hns_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
 {
 	int ret;
-	u16 reg_val = 0;
+	u16 reg_val;
 	u8 devad = ((regnum >> 16) & 0x1f);
 	u8 is_c45 = !!(regnum & MII_ADDR_C45);
 	u16 reg = (u16)(regnum & 0xffff);
@@ -420,7 +420,7 @@ static int hns_mdio_probe(struct platform_device *pdev)
 {
 	struct hns_mdio_device *mdio_dev;
 	struct mii_bus *new_bus;
-	int ret = -ENODEV;
+	int ret;
 
 	if (!pdev) {
 		dev_err(NULL, "pdev is NULL!\r\n");
-- 
cgit v1.2.3


From b3cb91b97c04e29feee8888ad08426d8904be63f Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Tue, 23 Mar 2021 09:33:46 +0100
Subject: bridge: mrp: Disable roles before deleting the MRP instance

When an MRP instance was created, the driver was notified that the
instance is created and then in a different callback about role of the
instance. But when the instance was deleted the driver was notified only
that the MRP instance is deleted and not also that the role is disabled.

This patch make sure that the driver is notified that the role is
changed to disabled before the MRP instance is deleted to have similar
callbacks with the creating of the instance. In this way it would
simplify the logic in the drivers.

Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_mrp.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c
index 12487f6fe9b4..cd2b1e424e54 100644
--- a/net/bridge/br_mrp.c
+++ b/net/bridge/br_mrp.c
@@ -411,6 +411,13 @@ static void br_mrp_del_impl(struct net_bridge *br, struct br_mrp *mrp)
 	cancel_delayed_work_sync(&mrp->in_test_work);
 	br_mrp_switchdev_send_in_test(br, mrp, 0, 0, 0);
 
+	/* Disable the roles */
+	br_mrp_switchdev_set_ring_role(br, mrp, BR_MRP_RING_ROLE_DISABLED);
+	p = rtnl_dereference(mrp->i_port);
+	if (p)
+		br_mrp_switchdev_set_in_role(br, mrp, mrp->in_id, mrp->ring_id,
+					     BR_MRP_IN_ROLE_DISABLED);
+
 	br_mrp_switchdev_del(br, mrp);
 
 	/* Reset the ports */
-- 
cgit v1.2.3


From 5b7c0c32c90494f5aaf13f417cff5dc204575597 Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Tue, 23 Mar 2021 09:33:47 +0100
Subject: net: ocelot: Simplify MRP deletion

Now that the driver will always be notified that the role is deleted
before the ring is deleted, then we don't need to duplicate the logic of
cleaning the resources also in the delete function.

Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot_mrp.c | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot_mrp.c b/drivers/net/ethernet/mscc/ocelot_mrp.c
index c3cbcaf64bb2..08b481a93460 100644
--- a/drivers/net/ethernet/mscc/ocelot_mrp.c
+++ b/drivers/net/ethernet/mscc/ocelot_mrp.c
@@ -154,7 +154,6 @@ int ocelot_mrp_del(struct ocelot *ocelot, int port,
 		   const struct switchdev_obj_mrp *mrp)
 {
 	struct ocelot_port *ocelot_port = ocelot->ports[port];
-	int i;
 
 	if (!ocelot_port)
 		return -EOPNOTSUPP;
@@ -162,23 +161,8 @@ int ocelot_mrp_del(struct ocelot *ocelot, int port,
 	if (ocelot_port->mrp_ring_id != mrp->ring_id)
 		return 0;
 
-	ocelot_mrp_del_vcap(ocelot, port);
-	ocelot_mrp_del_vcap(ocelot, port + ocelot->num_phys_ports);
-
 	ocelot_port->mrp_ring_id = 0;
 
-	for (i = 0; i < ocelot->num_phys_ports; ++i) {
-		ocelot_port = ocelot->ports[i];
-
-		if (!ocelot_port)
-			continue;
-
-		if (ocelot_port->mrp_ring_id != 0)
-			goto out;
-	}
-
-	ocelot_mrp_del_mac(ocelot, ocelot->ports[port]);
-out:
 	return 0;
 }
 EXPORT_SYMBOL(ocelot_mrp_del);
-- 
cgit v1.2.3


From ad248f7761eb9a3ff9ba2a8c93b548600185a938 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 24 Mar 2021 15:09:50 +0000
Subject: net: bridge: Fix missing return assignment from br_vlan_replay_one
 call

The call to br_vlan_replay_one is returning an error return value but
this is not being assigned to err and the following check on err is
currently always false because err was initialized to zero. Fix this
by assigning err.

Addresses-Coverity: ("'Constant' variable guards dead code")
Fixes: 22f67cdfae6a ("net: bridge: add helper to replay VLANs installed on port")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_vlan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index ca8daccff217..6f961cb5f771 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -1815,7 +1815,7 @@ int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
 		if (!br_vlan_should_use(v))
 			continue;
 
-		br_vlan_replay_one(nb, dev, &vlan, extack);
+		err = br_vlan_replay_one(nb, dev, &vlan, extack);
 		if (err)
 			return err;
 	}
-- 
cgit v1.2.3


From ddb94eafab8b597b05904c8277194ea2d6357fa9 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 24 Mar 2021 02:30:32 +0100
Subject: net: resolve forwarding path from virtual netdevice and HW
 destination address

This patch adds dev_fill_forward_path() which resolves the path to reach
the real netdevice from the IP forwarding side. This function takes as
input the netdevice and the destination hardware address and it walks
down the devices calling .ndo_fill_forward_path() for each device until
the real device is found.

For instance, assuming the following topology:

               IP forwarding
              /             \
           br0              eth0
           / \
       eth1  eth2
        .
        .
        .
       ethX
 ab:cd:ef:ab:cd:ef

where eth1 and eth2 are bridge ports and eth0 provides WAN connectivity.
ethX is the interface in another box which is connected to the eth1
bridge port.

For packets going through IP forwarding to br0 whose destination MAC
address is ab:cd:ef:ab:cd:ef, dev_fill_forward_path() provides the
following path:

	br0 -> eth1

.ndo_fill_forward_path for br0 looks up at the FDB for the bridge port
from the destination MAC address to get the bridge port eth1.

This information allows to create a fast path that bypasses the classic
bridge and IP forwarding paths, so packets go directly from the bridge
port eth1 to eth0 (wan interface) and vice versa.

             fast path
      .------------------------.
     /                          \
    |           IP forwarding   |
    |          /             \  \/
    |       br0               eth0
    .       / \
     -> eth1  eth2
        .
        .
        .
       ethX
 ab:cd:ef:ab:cd:ef

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 27 +++++++++++++++++++++++++++
 net/core/dev.c            | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5fa66db0cb5d..03cff88c7292 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -848,6 +848,27 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
 				       struct sk_buff *skb,
 				       struct net_device *sb_dev);
 
+enum net_device_path_type {
+	DEV_PATH_ETHERNET = 0,
+};
+
+struct net_device_path {
+	enum net_device_path_type	type;
+	const struct net_device		*dev;
+};
+
+#define NET_DEVICE_PATH_STACK_MAX	5
+
+struct net_device_path_stack {
+	int			num_paths;
+	struct net_device_path	path[NET_DEVICE_PATH_STACK_MAX];
+};
+
+struct net_device_path_ctx {
+	const struct net_device *dev;
+	const u8		*daddr;
+};
+
 enum tc_setup_type {
 	TC_SETUP_QDISC_MQPRIO,
 	TC_SETUP_CLSU32,
@@ -1282,6 +1303,8 @@ struct netdev_net_notifier {
  * struct net_device *(*ndo_get_peer_dev)(struct net_device *dev);
  *	If a device is paired with a peer device, return the peer instance.
  *	The caller must be under RCU read context.
+ * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path);
+ *     Get the forwarding path to reach the real device from the HW destination address
  */
 struct net_device_ops {
 	int			(*ndo_init)(struct net_device *dev);
@@ -1488,6 +1511,8 @@ struct net_device_ops {
 	int			(*ndo_tunnel_ctl)(struct net_device *dev,
 						  struct ip_tunnel_parm *p, int cmd);
 	struct net_device *	(*ndo_get_peer_dev)(struct net_device *dev);
+	int                     (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx,
+                                                         struct net_device_path *path);
 };
 
 /**
@@ -2870,6 +2895,8 @@ void dev_remove_offload(struct packet_offload *po);
 
 int dev_get_iflink(const struct net_device *dev);
 int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
+			  struct net_device_path_stack *stack);
 struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
 				      unsigned short mask);
 struct net_device *dev_get_by_name(struct net *net, const char *name);
diff --git a/net/core/dev.c b/net/core/dev.c
index 515309573cb8..4bb6dcdbed8b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -848,6 +848,52 @@ int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
 
+static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack)
+{
+	int k = stack->num_paths++;
+
+	if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX))
+		return NULL;
+
+	return &stack->path[k];
+}
+
+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
+			  struct net_device_path_stack *stack)
+{
+	const struct net_device *last_dev;
+	struct net_device_path_ctx ctx = {
+		.dev	= dev,
+		.daddr	= daddr,
+	};
+	struct net_device_path *path;
+	int ret = 0;
+
+	stack->num_paths = 0;
+	while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
+		last_dev = ctx.dev;
+		path = dev_fwd_path(stack);
+		if (!path)
+			return -1;
+
+		memset(path, 0, sizeof(struct net_device_path));
+		ret = ctx.dev->netdev_ops->ndo_fill_forward_path(&ctx, path);
+		if (ret < 0)
+			return -1;
+
+		if (WARN_ON_ONCE(last_dev == ctx.dev))
+			return -1;
+	}
+	path = dev_fwd_path(stack);
+	if (!path)
+		return -1;
+	path->type = DEV_PATH_ETHERNET;
+	path->dev = ctx.dev;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_fill_forward_path);
+
 /**
  *	__dev_get_by_name	- find a device by its name
  *	@net: the applicable net namespace
-- 
cgit v1.2.3


From e4417d6950b06fe6c520e937b337daff093220ff Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 24 Mar 2021 02:30:33 +0100
Subject: net: 8021q: resolve forwarding path for vlan devices

Add .ndo_fill_forward_path for vlan devices.

For instance, assuming the following topology:

                   IP forwarding
                  /             \
            eth0.100             eth0
            |
            eth0
            .
            .
            .
           ethX
     ab:cd:ef:ab:cd:ef

For packets going through IP forwarding to eth0.100 whose destination
MAC address is ab:cd:ef:ab:cd:ef, dev_fill_forward_path() provides the
following path:

        eth0.100 -> eth0

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  7 +++++++
 net/8021q/vlan_dev.c      | 15 +++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 03cff88c7292..8823a56744f1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -850,11 +850,18 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
 
 enum net_device_path_type {
 	DEV_PATH_ETHERNET = 0,
+	DEV_PATH_VLAN,
 };
 
 struct net_device_path {
 	enum net_device_path_type	type;
 	const struct net_device		*dev;
+	union {
+		struct {
+			u16		id;
+			__be16		proto;
+		} encap;
+	};
 };
 
 #define NET_DEVICE_PATH_STACK_MAX	5
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index dc1a197792e6..1b1955a63f7f 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -776,6 +776,20 @@ static int vlan_dev_get_iflink(const struct net_device *dev)
 	return real_dev->ifindex;
 }
 
+static int vlan_dev_fill_forward_path(struct net_device_path_ctx *ctx,
+				      struct net_device_path *path)
+{
+	struct vlan_dev_priv *vlan = vlan_dev_priv(ctx->dev);
+
+	path->type = DEV_PATH_VLAN;
+	path->encap.id = vlan->vlan_id;
+	path->encap.proto = vlan->vlan_proto;
+	path->dev = ctx->dev;
+	ctx->dev = vlan->real_dev;
+
+	return 0;
+}
+
 static const struct ethtool_ops vlan_ethtool_ops = {
 	.get_link_ksettings	= vlan_ethtool_get_link_ksettings,
 	.get_drvinfo	        = vlan_ethtool_get_drvinfo,
@@ -814,6 +828,7 @@ static const struct net_device_ops vlan_netdev_ops = {
 #endif
 	.ndo_fix_features	= vlan_dev_fix_features,
 	.ndo_get_iflink		= vlan_dev_get_iflink,
+	.ndo_fill_forward_path	= vlan_dev_fill_forward_path,
 };
 
 static void vlan_dev_free(struct net_device *dev)
-- 
cgit v1.2.3


From ec9d16bab615ceda8ac22a7b4d2c7601bbe172cb Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 24 Mar 2021 02:30:34 +0100
Subject: net: bridge: resolve forwarding path for bridge devices

Add .ndo_fill_forward_path for bridge devices.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/bridge/br_device.c    | 27 +++++++++++++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8823a56744f1..a24270b0d200 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -851,6 +851,7 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
 enum net_device_path_type {
 	DEV_PATH_ETHERNET = 0,
 	DEV_PATH_VLAN,
+	DEV_PATH_BRIDGE,
 };
 
 struct net_device_path {
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 3f2f06b4dd27..c241719013f4 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -385,6 +385,32 @@ static int br_del_slave(struct net_device *dev, struct net_device *slave_dev)
 	return br_del_if(br, slave_dev);
 }
 
+static int br_fill_forward_path(struct net_device_path_ctx *ctx,
+				struct net_device_path *path)
+{
+	struct net_bridge_fdb_entry *f;
+	struct net_bridge_port *dst;
+	struct net_bridge *br;
+
+	if (netif_is_bridge_port(ctx->dev))
+		return -1;
+
+	br = netdev_priv(ctx->dev);
+	f = br_fdb_find_rcu(br, ctx->daddr, 0);
+	if (!f || !f->dst)
+		return -1;
+
+	dst = READ_ONCE(f->dst);
+	if (!dst)
+		return -1;
+
+	path->type = DEV_PATH_BRIDGE;
+	path->dev = dst->br->dev;
+	ctx->dev = dst->dev;
+
+	return 0;
+}
+
 static const struct ethtool_ops br_ethtool_ops = {
 	.get_drvinfo		 = br_getinfo,
 	.get_link		 = ethtool_op_get_link,
@@ -419,6 +445,7 @@ static const struct net_device_ops br_netdev_ops = {
 	.ndo_bridge_setlink	 = br_setlink,
 	.ndo_bridge_dellink	 = br_dellink,
 	.ndo_features_check	 = passthru_features_check,
+	.ndo_fill_forward_path	 = br_fill_forward_path,
 };
 
 static struct device_type br_type = {
-- 
cgit v1.2.3


From bcf2766b1377421b7c9259865b25c1b62a7fa686 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 24 Mar 2021 02:30:35 +0100
Subject: net: bridge: resolve forwarding path for VLAN tag actions in bridge
 devices

Depending on the VLAN settings of the bridge and the port, the bridge can
either add or remove a tag. When vlan filtering is enabled, the fdb lookup
also needs to know the VLAN tag/proto for the destination address
To provide this, keep track of the stack of VLAN tags for the path in the
lookup context

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 16 ++++++++++++++
 net/8021q/vlan_dev.c      |  6 ++++++
 net/bridge/br_device.c    | 23 +++++++++++++++++++-
 net/bridge/br_private.h   | 20 ++++++++++++++++++
 net/bridge/br_vlan.c      | 53 +++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 117 insertions(+), 1 deletion(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a24270b0d200..344d9c0c9b22 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -862,10 +862,20 @@ struct net_device_path {
 			u16		id;
 			__be16		proto;
 		} encap;
+		struct {
+			enum {
+				DEV_PATH_BR_VLAN_KEEP,
+				DEV_PATH_BR_VLAN_TAG,
+				DEV_PATH_BR_VLAN_UNTAG,
+			}		vlan_mode;
+			u16		vlan_id;
+			__be16		vlan_proto;
+		} bridge;
 	};
 };
 
 #define NET_DEVICE_PATH_STACK_MAX	5
+#define NET_DEVICE_PATH_VLAN_MAX	2
 
 struct net_device_path_stack {
 	int			num_paths;
@@ -875,6 +885,12 @@ struct net_device_path_stack {
 struct net_device_path_ctx {
 	const struct net_device *dev;
 	const u8		*daddr;
+
+	int			num_vlans;
+	struct {
+		u16		id;
+		__be16		proto;
+	} vlan[NET_DEVICE_PATH_VLAN_MAX];
 };
 
 enum tc_setup_type {
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 1b1955a63f7f..4db3f0621959 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -786,6 +786,12 @@ static int vlan_dev_fill_forward_path(struct net_device_path_ctx *ctx,
 	path->encap.proto = vlan->vlan_proto;
 	path->dev = ctx->dev;
 	ctx->dev = vlan->real_dev;
+	if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
+		return -ENOSPC;
+
+	ctx->vlan[ctx->num_vlans].id = vlan->vlan_id;
+	ctx->vlan[ctx->num_vlans].proto = vlan->vlan_proto;
+	ctx->num_vlans++;
 
 	return 0;
 }
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index c241719013f4..0c72503e0d39 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -396,7 +396,10 @@ static int br_fill_forward_path(struct net_device_path_ctx *ctx,
 		return -1;
 
 	br = netdev_priv(ctx->dev);
-	f = br_fdb_find_rcu(br, ctx->daddr, 0);
+
+	br_vlan_fill_forward_path_pvid(br, ctx, path);
+
+	f = br_fdb_find_rcu(br, ctx->daddr, path->bridge.vlan_id);
 	if (!f || !f->dst)
 		return -1;
 
@@ -404,10 +407,28 @@ static int br_fill_forward_path(struct net_device_path_ctx *ctx,
 	if (!dst)
 		return -1;
 
+	if (br_vlan_fill_forward_path_mode(br, dst, path))
+		return -1;
+
 	path->type = DEV_PATH_BRIDGE;
 	path->dev = dst->br->dev;
 	ctx->dev = dst->dev;
 
+	switch (path->bridge.vlan_mode) {
+	case DEV_PATH_BR_VLAN_TAG:
+		if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
+			return -ENOSPC;
+		ctx->vlan[ctx->num_vlans].id = path->bridge.vlan_id;
+		ctx->vlan[ctx->num_vlans].proto = path->bridge.vlan_proto;
+		ctx->num_vlans++;
+		break;
+	case DEV_PATH_BR_VLAN_UNTAG:
+		ctx->num_vlans--;
+		break;
+	case DEV_PATH_BR_VLAN_KEEP:
+		break;
+	}
+
 	return 0;
 }
 
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index d7d167e10b70..50747990188e 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -1118,6 +1118,13 @@ void br_vlan_notify(const struct net_bridge *br,
 bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
 			     const struct net_bridge_vlan *range_end);
 
+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
+				    struct net_device_path_ctx *ctx,
+				    struct net_device_path *path);
+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
+				   struct net_bridge_port *dst,
+				   struct net_device_path *path);
+
 static inline struct net_bridge_vlan_group *br_vlan_group(
 					const struct net_bridge *br)
 {
@@ -1277,6 +1284,19 @@ static inline int nbp_get_num_vlan_infos(struct net_bridge_port *p,
 	return 0;
 }
 
+static inline void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
+						  struct net_device_path_ctx *ctx,
+						  struct net_device_path *path)
+{
+}
+
+static inline int br_vlan_fill_forward_path_mode(struct net_bridge *br,
+						 struct net_bridge_port *dst,
+						 struct net_device_path *path)
+{
+	return 0;
+}
+
 static inline struct net_bridge_vlan_group *br_vlan_group(
 					const struct net_bridge *br)
 {
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 6f961cb5f771..c92240b21c4a 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -1339,6 +1339,59 @@ int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid)
 }
 EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu);
 
+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
+				    struct net_device_path_ctx *ctx,
+				    struct net_device_path *path)
+{
+	struct net_bridge_vlan_group *vg;
+	int idx = ctx->num_vlans - 1;
+	u16 vid;
+
+	path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
+
+	if (!br_opt_get(br, BROPT_VLAN_ENABLED))
+		return;
+
+	vg = br_vlan_group(br);
+
+	if (idx >= 0 &&
+	    ctx->vlan[idx].proto == br->vlan_proto) {
+		vid = ctx->vlan[idx].id;
+	} else {
+		path->bridge.vlan_mode = DEV_PATH_BR_VLAN_TAG;
+		vid = br_get_pvid(vg);
+	}
+
+	path->bridge.vlan_id = vid;
+	path->bridge.vlan_proto = br->vlan_proto;
+}
+
+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
+				   struct net_bridge_port *dst,
+				   struct net_device_path *path)
+{
+	struct net_bridge_vlan_group *vg;
+	struct net_bridge_vlan *v;
+
+	if (!br_opt_get(br, BROPT_VLAN_ENABLED))
+		return 0;
+
+	vg = nbp_vlan_group_rcu(dst);
+	v = br_vlan_find(vg, path->bridge.vlan_id);
+	if (!v || !br_vlan_should_use(v))
+		return -EINVAL;
+
+	if (!(v->flags & BRIDGE_VLAN_INFO_UNTAGGED))
+		return 0;
+
+	if (path->bridge.vlan_mode == DEV_PATH_BR_VLAN_TAG)
+		path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
+	else
+		path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG;
+
+	return 0;
+}
+
 int br_vlan_get_info(const struct net_device *dev, u16 vid,
 		     struct bridge_vlan_info *p_vinfo)
 {
-- 
cgit v1.2.3


From f6efc675c9dd8d93f826b79ae7e33e03301db609 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 24 Mar 2021 02:30:36 +0100
Subject: net: ppp: resolve forwarding path for bridge pppoe devices

Pass on the PPPoE session ID, destination hardware address and the real
device.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp/ppp_generic.c | 22 ++++++++++++++++++++++
 drivers/net/ppp/pppoe.c       | 23 +++++++++++++++++++++++
 include/linux/netdevice.h     |  2 ++
 include/linux/ppp_channel.h   |  3 +++
 4 files changed, 50 insertions(+)

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index d445ecb1d0c7..930e49ef15f6 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -1560,12 +1560,34 @@ static void ppp_dev_priv_destructor(struct net_device *dev)
 		ppp_destroy_interface(ppp);
 }
 
+static int ppp_fill_forward_path(struct net_device_path_ctx *ctx,
+				 struct net_device_path *path)
+{
+	struct ppp *ppp = netdev_priv(ctx->dev);
+	struct ppp_channel *chan;
+	struct channel *pch;
+
+	if (ppp->flags & SC_MULTILINK)
+		return -EOPNOTSUPP;
+
+	if (list_empty(&ppp->channels))
+		return -ENODEV;
+
+	pch = list_first_entry(&ppp->channels, struct channel, clist);
+	chan = pch->chan;
+	if (!chan->ops->fill_forward_path)
+		return -EOPNOTSUPP;
+
+	return chan->ops->fill_forward_path(ctx, path, chan);
+}
+
 static const struct net_device_ops ppp_netdev_ops = {
 	.ndo_init	 = ppp_dev_init,
 	.ndo_uninit      = ppp_dev_uninit,
 	.ndo_start_xmit  = ppp_start_xmit,
 	.ndo_do_ioctl    = ppp_net_ioctl,
 	.ndo_get_stats64 = ppp_get_stats64,
+	.ndo_fill_forward_path = ppp_fill_forward_path,
 };
 
 static struct device_type ppp_type = {
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 9dc7f4b93d51..3619520340b7 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -972,8 +972,31 @@ static int pppoe_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	return __pppoe_xmit(sk, skb);
 }
 
+static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx,
+				   struct net_device_path *path,
+				   const struct ppp_channel *chan)
+{
+	struct sock *sk = (struct sock *)chan->private;
+	struct pppox_sock *po = pppox_sk(sk);
+	struct net_device *dev = po->pppoe_dev;
+
+	if (sock_flag(sk, SOCK_DEAD) ||
+	    !(sk->sk_state & PPPOX_CONNECTED) || !dev)
+		return -1;
+
+	path->type = DEV_PATH_PPPOE;
+	path->encap.proto = htons(ETH_P_PPP_SES);
+	path->encap.id = be16_to_cpu(po->num);
+	memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN);
+	path->dev = ctx->dev;
+	ctx->dev = dev;
+
+	return 0;
+}
+
 static const struct ppp_channel_ops pppoe_chan_ops = {
 	.start_xmit = pppoe_xmit,
+	.fill_forward_path = pppoe_fill_forward_path,
 };
 
 static int pppoe_recvmsg(struct socket *sock, struct msghdr *m,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 344d9c0c9b22..dd54f7cc3f12 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -852,6 +852,7 @@ enum net_device_path_type {
 	DEV_PATH_ETHERNET = 0,
 	DEV_PATH_VLAN,
 	DEV_PATH_BRIDGE,
+	DEV_PATH_PPPOE,
 };
 
 struct net_device_path {
@@ -861,6 +862,7 @@ struct net_device_path {
 		struct {
 			u16		id;
 			__be16		proto;
+			u8		h_dest[ETH_ALEN];
 		} encap;
 		struct {
 			enum {
diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h
index 98966064ee68..91f9a928344e 100644
--- a/include/linux/ppp_channel.h
+++ b/include/linux/ppp_channel.h
@@ -28,6 +28,9 @@ struct ppp_channel_ops {
 	int	(*start_xmit)(struct ppp_channel *, struct sk_buff *);
 	/* Handle an ioctl call that has come in via /dev/ppp. */
 	int	(*ioctl)(struct ppp_channel *, unsigned int, unsigned long);
+	int	(*fill_forward_path)(struct net_device_path_ctx *,
+				     struct net_device_path *,
+				     const struct ppp_channel *);
 };
 
 struct ppp_channel {
-- 
cgit v1.2.3


From 0994d492a1b78dff96671ccf6ad8294cc2bd909e Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 24 Mar 2021 02:30:37 +0100
Subject: net: dsa: resolve forwarding path for dsa slave ports

Add .ndo_fill_forward_path for dsa slave port devices

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  5 +++++
 net/dsa/slave.c           | 16 ++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index dd54f7cc3f12..90db74132090 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -853,6 +853,7 @@ enum net_device_path_type {
 	DEV_PATH_VLAN,
 	DEV_PATH_BRIDGE,
 	DEV_PATH_PPPOE,
+	DEV_PATH_DSA,
 };
 
 struct net_device_path {
@@ -873,6 +874,10 @@ struct net_device_path {
 			u16		vlan_id;
 			__be16		vlan_proto;
 		} bridge;
+		struct {
+			int port;
+			u16 proto;
+		} dsa;
 	};
 };
 
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index c51e52418a62..7453ceca2c7e 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1654,6 +1654,21 @@ static void dsa_slave_get_stats64(struct net_device *dev,
 		dev_get_tstats64(dev, s);
 }
 
+static int dsa_slave_fill_forward_path(struct net_device_path_ctx *ctx,
+				       struct net_device_path *path)
+{
+	struct dsa_port *dp = dsa_slave_to_port(ctx->dev);
+	struct dsa_port *cpu_dp = dp->cpu_dp;
+
+	path->dev = ctx->dev;
+	path->type = DEV_PATH_DSA;
+	path->dsa.proto = cpu_dp->tag_ops->proto;
+	path->dsa.port = dp->index;
+	ctx->dev = cpu_dp->master;
+
+	return 0;
+}
+
 static const struct net_device_ops dsa_slave_netdev_ops = {
 	.ndo_open	 	= dsa_slave_open,
 	.ndo_stop		= dsa_slave_close,
@@ -1679,6 +1694,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
 	.ndo_vlan_rx_kill_vid	= dsa_slave_vlan_rx_kill_vid,
 	.ndo_get_devlink_port	= dsa_slave_get_devlink_port,
 	.ndo_change_mtu		= dsa_slave_change_mtu,
+	.ndo_fill_forward_path	= dsa_slave_fill_forward_path,
 };
 
 static struct device_type dsa_type = {
-- 
cgit v1.2.3


From 5139c0c007250c01c61337d584db4072c4786bf6 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 24 Mar 2021 02:30:38 +0100
Subject: netfilter: flowtable: add xmit path types

Add the xmit_type field that defines the two supported xmit paths in the
flowtable data plane, which are the neighbour and the xfrm xmit paths.
This patch prepares for new flowtable xmit path types to come.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_flow_table.h | 11 +++++++++--
 net/netfilter/nf_flow_table_core.c    |  1 +
 net/netfilter/nf_flow_table_ip.c      | 14 ++++++++------
 net/netfilter/nft_flow_offload.c      | 20 ++++++++++++++++++--
 4 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index fb165697c8a1..828fcfbd5e6f 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -89,6 +89,11 @@ enum flow_offload_tuple_dir {
 };
 #define FLOW_OFFLOAD_DIR_MAX	IP_CT_DIR_MAX
 
+enum flow_offload_xmit_type {
+	FLOW_OFFLOAD_XMIT_NEIGH		= 0,
+	FLOW_OFFLOAD_XMIT_XFRM,
+};
+
 struct flow_offload_tuple {
 	union {
 		struct in_addr		src_v4;
@@ -111,7 +116,8 @@ struct flow_offload_tuple {
 	/* All members above are keys for lookups, see flow_offload_hash(). */
 	struct { }			__hash;
 
-	u8				dir;
+	u8				dir:6,
+					xmit_type:2;
 
 	u16				mtu;
 
@@ -158,7 +164,8 @@ static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
 
 struct nf_flow_route {
 	struct {
-		struct dst_entry	*dst;
+		struct dst_entry		*dst;
+		enum flow_offload_xmit_type	xmit_type;
 	} tuple[FLOW_OFFLOAD_DIR_MAX];
 };
 
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 8ffd3f3c288c..573be4d1efb5 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -95,6 +95,7 @@ static int flow_offload_fill_route(struct flow_offload *flow,
 	}
 
 	flow_tuple->iifidx = other_dst->dev->ifindex;
+	flow_tuple->xmit_type = route->tuple[dir].xmit_type;
 	flow_tuple->dst_cache = dst;
 
 	return 0;
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 3be58b6d60af..e9bef38a356b 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -235,8 +235,6 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 
 	dir = tuplehash->tuple.dir;
 	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
-	rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
-	outdev = rt->dst.dev;
 
 	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
 		return NF_ACCEPT;
@@ -265,13 +263,16 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 	if (flow_table->flags & NF_FLOWTABLE_COUNTER)
 		nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
 
-	if (unlikely(dst_xfrm(&rt->dst))) {
+	rt = (struct rtable *)tuplehash->tuple.dst_cache;
+
+	if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
 		memset(skb->cb, 0, sizeof(struct inet_skb_parm));
 		IPCB(skb)->iif = skb->dev->ifindex;
 		IPCB(skb)->flags = IPSKB_FORWARDED;
 		return nf_flow_xmit_xfrm(skb, state, &rt->dst);
 	}
 
+	outdev = rt->dst.dev;
 	skb->dev = outdev;
 	nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
 	skb_dst_set_noref(skb, &rt->dst);
@@ -456,8 +457,6 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 
 	dir = tuplehash->tuple.dir;
 	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
-	rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
-	outdev = rt->dst.dev;
 
 	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
 		return NF_ACCEPT;
@@ -485,13 +484,16 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 	if (flow_table->flags & NF_FLOWTABLE_COUNTER)
 		nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
 
-	if (unlikely(dst_xfrm(&rt->dst))) {
+	rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
+
+	if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
 		memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
 		IP6CB(skb)->iif = skb->dev->ifindex;
 		IP6CB(skb)->flags = IP6SKB_FORWARDED;
 		return nf_flow_xmit_xfrm(skb, state, &rt->dst);
 	}
 
+	outdev = rt->dst.dev;
 	skb->dev = outdev;
 	nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
 	skb_dst_set_noref(skb, &rt->dst);
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 3a6c84fb2c90..1da2bb24f6c0 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -19,6 +19,22 @@ struct nft_flow_offload {
 	struct nft_flowtable	*flowtable;
 };
 
+static enum flow_offload_xmit_type nft_xmit_type(struct dst_entry *dst)
+{
+	if (dst_xfrm(dst))
+		return FLOW_OFFLOAD_XMIT_XFRM;
+
+	return FLOW_OFFLOAD_XMIT_NEIGH;
+}
+
+static void nft_default_forward_path(struct nf_flow_route *route,
+				     struct dst_entry *dst_cache,
+				     enum ip_conntrack_dir dir)
+{
+	route->tuple[dir].dst		= dst_cache;
+	route->tuple[dir].xmit_type	= nft_xmit_type(dst_cache);
+}
+
 static int nft_flow_route(const struct nft_pktinfo *pkt,
 			  const struct nf_conn *ct,
 			  struct nf_flow_route *route,
@@ -44,8 +60,8 @@ static int nft_flow_route(const struct nft_pktinfo *pkt,
 	if (!other_dst)
 		return -ENOENT;
 
-	route->tuple[dir].dst		= this_dst;
-	route->tuple[!dir].dst		= other_dst;
+	nft_default_forward_path(route, this_dst, dir);
+	nft_default_forward_path(route, other_dst, !dir);
 
 	return 0;
 }
-- 
cgit v1.2.3


From c63a7cc4d795c004b70cb935e8ba77d9e764f0ba Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 24 Mar 2021 02:30:39 +0100
Subject: netfilter: flowtable: use dev_fill_forward_path() to obtain ingress
 device

Obtain the ingress device in the tuple from the route in the reply
direction. Use dev_fill_forward_path() instead to get the real ingress
device for this flow.

Fall back to use the ingress device that the IP forwarding route
provides if:

- dev_fill_forward_path() finds no real ingress device.
- the ingress device that is obtained is not part of the flowtable
  devices.
- this route has a xfrm policy.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_flow_table.h |   3 +
 net/netfilter/nf_flow_table_core.c    |   3 +-
 net/netfilter/nft_flow_offload.c      | 102 +++++++++++++++++++++++++++++++++-
 3 files changed, 103 insertions(+), 5 deletions(-)

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 828fcfbd5e6f..dca9fc66405f 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -165,6 +165,9 @@ static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
 struct nf_flow_route {
 	struct {
 		struct dst_entry		*dst;
+		struct {
+			u32			ifindex;
+		} in;
 		enum flow_offload_xmit_type	xmit_type;
 	} tuple[FLOW_OFFLOAD_DIR_MAX];
 };
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 573be4d1efb5..51e3e1b08e1c 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -79,7 +79,6 @@ static int flow_offload_fill_route(struct flow_offload *flow,
 				   enum flow_offload_tuple_dir dir)
 {
 	struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
-	struct dst_entry *other_dst = route->tuple[!dir].dst;
 	struct dst_entry *dst = route->tuple[dir].dst;
 
 	if (!dst_hold_safe(route->tuple[dir].dst))
@@ -94,7 +93,7 @@ static int flow_offload_fill_route(struct flow_offload *flow,
 		break;
 	}
 
-	flow_tuple->iifidx = other_dst->dev->ifindex;
+	flow_tuple->iifidx = route->tuple[dir].in.ifindex;
 	flow_tuple->xmit_type = route->tuple[dir].xmit_type;
 	flow_tuple->dst_cache = dst;
 
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 1da2bb24f6c0..15f90c31feb0 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -31,14 +31,104 @@ static void nft_default_forward_path(struct nf_flow_route *route,
 				     struct dst_entry *dst_cache,
 				     enum ip_conntrack_dir dir)
 {
+	route->tuple[!dir].in.ifindex	= dst_cache->dev->ifindex;
 	route->tuple[dir].dst		= dst_cache;
 	route->tuple[dir].xmit_type	= nft_xmit_type(dst_cache);
 }
 
+static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
+				     const struct dst_entry *dst_cache,
+				     const struct nf_conn *ct,
+				     enum ip_conntrack_dir dir,
+				     struct net_device_path_stack *stack)
+{
+	const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
+	struct net_device *dev = dst_cache->dev;
+	unsigned char ha[ETH_ALEN];
+	struct neighbour *n;
+	u8 nud_state;
+
+	n = dst_neigh_lookup(dst_cache, daddr);
+	if (!n)
+		return -1;
+
+	read_lock_bh(&n->lock);
+	nud_state = n->nud_state;
+	ether_addr_copy(ha, n->ha);
+	read_unlock_bh(&n->lock);
+	neigh_release(n);
+
+	if (!(nud_state & NUD_VALID))
+		return -1;
+
+	return dev_fill_forward_path(dev, ha, stack);
+}
+
+struct nft_forward_info {
+	const struct net_device *indev;
+};
+
+static void nft_dev_path_info(const struct net_device_path_stack *stack,
+			      struct nft_forward_info *info)
+{
+	const struct net_device_path *path;
+	int i;
+
+	for (i = 0; i < stack->num_paths; i++) {
+		path = &stack->path[i];
+		switch (path->type) {
+		case DEV_PATH_ETHERNET:
+			info->indev = path->dev;
+			break;
+		case DEV_PATH_VLAN:
+		case DEV_PATH_BRIDGE:
+		default:
+			info->indev = NULL;
+			break;
+		}
+	}
+}
+
+static bool nft_flowtable_find_dev(const struct net_device *dev,
+				   struct nft_flowtable *ft)
+{
+	struct nft_hook *hook;
+	bool found = false;
+
+	list_for_each_entry_rcu(hook, &ft->hook_list, list) {
+		if (hook->ops.dev != dev)
+			continue;
+
+		found = true;
+		break;
+	}
+
+	return found;
+}
+
+static void nft_dev_forward_path(struct nf_flow_route *route,
+				 const struct nf_conn *ct,
+				 enum ip_conntrack_dir dir,
+				 struct nft_flowtable *ft)
+{
+	const struct dst_entry *dst = route->tuple[dir].dst;
+	struct net_device_path_stack stack;
+	struct nft_forward_info info = {};
+
+	if (nft_dev_fill_forward_path(route, dst, ct, dir, &stack) >= 0)
+		nft_dev_path_info(&stack, &info);
+
+	if (!info.indev || !nft_flowtable_find_dev(info.indev, ft))
+		return;
+
+	route->tuple[!dir].in.ifindex = info.indev->ifindex;
+}
+
 static int nft_flow_route(const struct nft_pktinfo *pkt,
 			  const struct nf_conn *ct,
 			  struct nf_flow_route *route,
-			  enum ip_conntrack_dir dir)
+			  enum ip_conntrack_dir dir,
+			  struct nft_flowtable *ft)
 {
 	struct dst_entry *this_dst = skb_dst(pkt->skb);
 	struct dst_entry *other_dst = NULL;
@@ -63,6 +153,12 @@ static int nft_flow_route(const struct nft_pktinfo *pkt,
 	nft_default_forward_path(route, this_dst, dir);
 	nft_default_forward_path(route, other_dst, !dir);
 
+	if (route->tuple[dir].xmit_type	== FLOW_OFFLOAD_XMIT_NEIGH &&
+	    route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
+		nft_dev_forward_path(route, ct, dir, ft);
+		nft_dev_forward_path(route, ct, !dir, ft);
+	}
+
 	return 0;
 }
 
@@ -90,8 +186,8 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
 	struct nft_flow_offload *priv = nft_expr_priv(expr);
 	struct nf_flowtable *flowtable = &priv->flowtable->data;
 	struct tcphdr _tcph, *tcph = NULL;
+	struct nf_flow_route route = {};
 	enum ip_conntrack_info ctinfo;
-	struct nf_flow_route route;
 	struct flow_offload *flow;
 	enum ip_conntrack_dir dir;
 	struct nf_conn *ct;
@@ -128,7 +224,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
 		goto out;
 
 	dir = CTINFO2DIR(ctinfo);
-	if (nft_flow_route(pkt, ct, &route, dir) < 0)
+	if (nft_flow_route(pkt, ct, &route, dir, priv->flowtable) < 0)
 		goto err_flow_route;
 
 	flow = flow_offload_alloc(ct);
-- 
cgit v1.2.3


From 7a27f6ab41356ecba47ec2bec6d635704c169779 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 24 Mar 2021 02:30:40 +0100
Subject: netfilter: flowtable: use dev_fill_forward_path() to obtain egress
 device

The egress device in the tuple is obtained from route. Use
dev_fill_forward_path() instead to provide the real egress device for
this flow whenever this is available.

The new FLOW_OFFLOAD_XMIT_DIRECT type uses dev_queue_xmit() to transmit
ethernet frames. Cache the source and destination hardware address to
use dev_queue_xmit() to transfer packets.

The FLOW_OFFLOAD_XMIT_DIRECT replaces FLOW_OFFLOAD_XMIT_NEIGH if
dev_fill_forward_path() finds a direct transmit path.

In case of topology updates, if peer is moved to different bridge port,
the connection will time out, reconnect will result in a new entry with
the correct path. Snooping fdb updates would allow for cleaning up stale
flowtable entries.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_flow_table.h | 16 ++++++-
 net/netfilter/nf_flow_table_core.c    | 35 +++++++++++---
 net/netfilter/nf_flow_table_ip.c      | 88 ++++++++++++++++++++++++++---------
 net/netfilter/nft_flow_offload.c      | 35 +++++++++++---
 4 files changed, 137 insertions(+), 37 deletions(-)

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index dca9fc66405f..41c8436bc77e 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -92,6 +92,7 @@ enum flow_offload_tuple_dir {
 enum flow_offload_xmit_type {
 	FLOW_OFFLOAD_XMIT_NEIGH		= 0,
 	FLOW_OFFLOAD_XMIT_XFRM,
+	FLOW_OFFLOAD_XMIT_DIRECT,
 };
 
 struct flow_offload_tuple {
@@ -120,8 +121,14 @@ struct flow_offload_tuple {
 					xmit_type:2;
 
 	u16				mtu;
-
-	struct dst_entry		*dst_cache;
+	union {
+		struct dst_entry	*dst_cache;
+		struct {
+			u32		ifidx;
+			u8		h_source[ETH_ALEN];
+			u8		h_dest[ETH_ALEN];
+		} out;
+	};
 };
 
 struct flow_offload_tuple_rhash {
@@ -168,6 +175,11 @@ struct nf_flow_route {
 		struct {
 			u32			ifindex;
 		} in;
+		struct {
+			u32			ifindex;
+			u8			h_source[ETH_ALEN];
+			u8			h_dest[ETH_ALEN];
+		} out;
 		enum flow_offload_xmit_type	xmit_type;
 	} tuple[FLOW_OFFLOAD_DIR_MAX];
 };
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 51e3e1b08e1c..a92acb3ed019 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -81,9 +81,6 @@ static int flow_offload_fill_route(struct flow_offload *flow,
 	struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
 	struct dst_entry *dst = route->tuple[dir].dst;
 
-	if (!dst_hold_safe(route->tuple[dir].dst))
-		return -1;
-
 	switch (flow_tuple->l3proto) {
 	case NFPROTO_IPV4:
 		flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
@@ -94,12 +91,36 @@ static int flow_offload_fill_route(struct flow_offload *flow,
 	}
 
 	flow_tuple->iifidx = route->tuple[dir].in.ifindex;
+
+	switch (route->tuple[dir].xmit_type) {
+	case FLOW_OFFLOAD_XMIT_DIRECT:
+		memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest,
+		       ETH_ALEN);
+		memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
+		       ETH_ALEN);
+		flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
+		break;
+	case FLOW_OFFLOAD_XMIT_XFRM:
+	case FLOW_OFFLOAD_XMIT_NEIGH:
+		if (!dst_hold_safe(route->tuple[dir].dst))
+			return -1;
+
+		flow_tuple->dst_cache = dst;
+		break;
+	}
 	flow_tuple->xmit_type = route->tuple[dir].xmit_type;
-	flow_tuple->dst_cache = dst;
 
 	return 0;
 }
 
+static void nft_flow_dst_release(struct flow_offload *flow,
+				 enum flow_offload_tuple_dir dir)
+{
+	if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
+	    flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)
+		dst_release(flow->tuplehash[dir].tuple.dst_cache);
+}
+
 int flow_offload_route_init(struct flow_offload *flow,
 			    const struct nf_flow_route *route)
 {
@@ -118,7 +139,7 @@ int flow_offload_route_init(struct flow_offload *flow,
 	return 0;
 
 err_route_reply:
-	dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
+	nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
 
 	return err;
 }
@@ -169,8 +190,8 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
 
 static void flow_offload_route_release(struct flow_offload *flow)
 {
-	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
-	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
+	nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
+	nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY);
 }
 
 void flow_offload_free(struct flow_offload *flow)
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index e9bef38a356b..9e84767a7e9b 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -207,6 +207,24 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
 	return NF_STOLEN;
 }
 
+static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
+				       const struct flow_offload_tuple_rhash *tuplehash,
+				       unsigned short type)
+{
+	struct net_device *outdev;
+
+	outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx);
+	if (!outdev)
+		return NF_DROP;
+
+	skb->dev = outdev;
+	dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
+			tuplehash->tuple.out.h_source, skb->len);
+	dev_queue_xmit(skb);
+
+	return NF_STOLEN;
+}
+
 unsigned int
 nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 			const struct nf_hook_state *state)
@@ -222,6 +240,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 	struct iphdr *iph;
 	__be32 nexthop;
 	u32 hdrsize;
+	int ret;
 
 	if (skb->protocol != htons(ETH_P_IP))
 		return NF_ACCEPT;
@@ -244,9 +263,13 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 	if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
 		return NF_ACCEPT;
 
-	if (!dst_check(&rt->dst, 0)) {
-		flow_offload_teardown(flow);
-		return NF_ACCEPT;
+	if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
+	    tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
+		rt = (struct rtable *)tuplehash->tuple.dst_cache;
+		if (!dst_check(&rt->dst, 0)) {
+			flow_offload_teardown(flow);
+			return NF_ACCEPT;
+		}
 	}
 
 	if (skb_try_make_writable(skb, thoff + hdrsize))
@@ -263,8 +286,6 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 	if (flow_table->flags & NF_FLOWTABLE_COUNTER)
 		nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
 
-	rt = (struct rtable *)tuplehash->tuple.dst_cache;
-
 	if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
 		memset(skb->cb, 0, sizeof(struct inet_skb_parm));
 		IPCB(skb)->iif = skb->dev->ifindex;
@@ -272,13 +293,23 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 		return nf_flow_xmit_xfrm(skb, state, &rt->dst);
 	}
 
-	outdev = rt->dst.dev;
-	skb->dev = outdev;
-	nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
-	skb_dst_set_noref(skb, &rt->dst);
-	neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
+	switch (tuplehash->tuple.xmit_type) {
+	case FLOW_OFFLOAD_XMIT_NEIGH:
+		outdev = rt->dst.dev;
+		skb->dev = outdev;
+		nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
+		skb_dst_set_noref(skb, &rt->dst);
+		neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
+		ret = NF_STOLEN;
+		break;
+	case FLOW_OFFLOAD_XMIT_DIRECT:
+		ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
+		if (ret == NF_DROP)
+			flow_offload_teardown(flow);
+		break;
+	}
 
-	return NF_STOLEN;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
 
@@ -444,6 +475,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 	struct ipv6hdr *ip6h;
 	struct rt6_info *rt;
 	u32 hdrsize;
+	int ret;
 
 	if (skb->protocol != htons(ETH_P_IPV6))
 		return NF_ACCEPT;
@@ -465,9 +497,13 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 				sizeof(*ip6h)))
 		return NF_ACCEPT;
 
-	if (!dst_check(&rt->dst, 0)) {
-		flow_offload_teardown(flow);
-		return NF_ACCEPT;
+	if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
+	    tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
+		rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
+		if (!dst_check(&rt->dst, 0)) {
+			flow_offload_teardown(flow);
+			return NF_ACCEPT;
+		}
 	}
 
 	if (skb_try_make_writable(skb, sizeof(*ip6h) + hdrsize))
@@ -484,8 +520,6 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 	if (flow_table->flags & NF_FLOWTABLE_COUNTER)
 		nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
 
-	rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
-
 	if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
 		memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
 		IP6CB(skb)->iif = skb->dev->ifindex;
@@ -493,12 +527,22 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 		return nf_flow_xmit_xfrm(skb, state, &rt->dst);
 	}
 
-	outdev = rt->dst.dev;
-	skb->dev = outdev;
-	nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
-	skb_dst_set_noref(skb, &rt->dst);
-	neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
+	switch (tuplehash->tuple.xmit_type) {
+	case FLOW_OFFLOAD_XMIT_NEIGH:
+		outdev = rt->dst.dev;
+		skb->dev = outdev;
+		nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
+		skb_dst_set_noref(skb, &rt->dst);
+		neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
+		ret = NF_STOLEN;
+		break;
+	case FLOW_OFFLOAD_XMIT_DIRECT:
+		ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
+		if (ret == NF_DROP)
+			flow_offload_teardown(flow);
+		break;
+	}
 
-	return NF_STOLEN;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 15f90c31feb0..a6595dca1b1f 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -39,12 +39,11 @@ static void nft_default_forward_path(struct nf_flow_route *route,
 static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
 				     const struct dst_entry *dst_cache,
 				     const struct nf_conn *ct,
-				     enum ip_conntrack_dir dir,
+				     enum ip_conntrack_dir dir, u8 *ha,
 				     struct net_device_path_stack *stack)
 {
 	const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
 	struct net_device *dev = dst_cache->dev;
-	unsigned char ha[ETH_ALEN];
 	struct neighbour *n;
 	u8 nud_state;
 
@@ -66,27 +65,43 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
 
 struct nft_forward_info {
 	const struct net_device *indev;
+	const struct net_device *outdev;
+	u8 h_source[ETH_ALEN];
+	u8 h_dest[ETH_ALEN];
+	enum flow_offload_xmit_type xmit_type;
 };
 
 static void nft_dev_path_info(const struct net_device_path_stack *stack,
-			      struct nft_forward_info *info)
+			      struct nft_forward_info *info,
+			      unsigned char *ha)
 {
 	const struct net_device_path *path;
 	int i;
 
+	memcpy(info->h_dest, ha, ETH_ALEN);
+
 	for (i = 0; i < stack->num_paths; i++) {
 		path = &stack->path[i];
 		switch (path->type) {
 		case DEV_PATH_ETHERNET:
 			info->indev = path->dev;
+			if (is_zero_ether_addr(info->h_source))
+				memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
 			break;
-		case DEV_PATH_VLAN:
 		case DEV_PATH_BRIDGE:
+			if (is_zero_ether_addr(info->h_source))
+				memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
+
+			info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
+			break;
+		case DEV_PATH_VLAN:
 		default:
 			info->indev = NULL;
 			break;
 		}
 	}
+	if (!info->outdev)
+		info->outdev = info->indev;
 }
 
 static bool nft_flowtable_find_dev(const struct net_device *dev,
@@ -114,14 +129,22 @@ static void nft_dev_forward_path(struct nf_flow_route *route,
 	const struct dst_entry *dst = route->tuple[dir].dst;
 	struct net_device_path_stack stack;
 	struct nft_forward_info info = {};
+	unsigned char ha[ETH_ALEN];
 
-	if (nft_dev_fill_forward_path(route, dst, ct, dir, &stack) >= 0)
-		nft_dev_path_info(&stack, &info);
+	if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
+		nft_dev_path_info(&stack, &info, ha);
 
 	if (!info.indev || !nft_flowtable_find_dev(info.indev, ft))
 		return;
 
 	route->tuple[!dir].in.ifindex = info.indev->ifindex;
+
+	if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
+		memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
+		memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
+		route->tuple[dir].out.ifindex = info.outdev->ifindex;
+		route->tuple[dir].xmit_type = info.xmit_type;
+	}
 }
 
 static int nft_flow_route(const struct nft_pktinfo *pkt,
-- 
cgit v1.2.3


From 4cd91f7c290f64fe430867ddbae10bff34657b6a Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 24 Mar 2021 02:30:41 +0100
Subject: netfilter: flowtable: add vlan support

Add the vlan id and protocol to the flow tuple to uniquely identify
flows from the receive path. For the transmit path, dev_hard_header() on
the vlan device push the headers. This patch includes support for two
vlan headers (QinQ) from the ingress path.

Add a generic encap field to the flowtable entry which stores the
protocol and the tag id. This allows to reuse these fields in the PPPoE
support coming in a later patch.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_flow_table.h |  17 ++++-
 net/netfilter/nf_flow_table_core.c    |   7 ++
 net/netfilter/nf_flow_table_ip.c      | 121 +++++++++++++++++++++++++++-------
 net/netfilter/nft_flow_offload.c      |  26 +++++++-
 4 files changed, 142 insertions(+), 29 deletions(-)

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 41c8436bc77e..e34fd3eb4bb5 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -95,6 +95,8 @@ enum flow_offload_xmit_type {
 	FLOW_OFFLOAD_XMIT_DIRECT,
 };
 
+#define NF_FLOW_TABLE_ENCAP_MAX		2
+
 struct flow_offload_tuple {
 	union {
 		struct in_addr		src_v4;
@@ -113,13 +115,17 @@ struct flow_offload_tuple {
 
 	u8				l3proto;
 	u8				l4proto;
+	struct {
+		u16			id;
+		__be16			proto;
+	} encap[NF_FLOW_TABLE_ENCAP_MAX];
 
 	/* All members above are keys for lookups, see flow_offload_hash(). */
 	struct { }			__hash;
 
-	u8				dir:6,
-					xmit_type:2;
-
+	u8				dir:4,
+					xmit_type:2,
+					encap_num:2;
 	u16				mtu;
 	union {
 		struct dst_entry	*dst_cache;
@@ -174,6 +180,11 @@ struct nf_flow_route {
 		struct dst_entry		*dst;
 		struct {
 			u32			ifindex;
+			struct {
+				u16		id;
+				__be16		proto;
+			} encap[NF_FLOW_TABLE_ENCAP_MAX];
+			u8			num_encaps;
 		} in;
 		struct {
 			u32			ifindex;
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index a92acb3ed019..595f4434b84d 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -80,6 +80,7 @@ static int flow_offload_fill_route(struct flow_offload *flow,
 {
 	struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
 	struct dst_entry *dst = route->tuple[dir].dst;
+	int i, j = 0;
 
 	switch (flow_tuple->l3proto) {
 	case NFPROTO_IPV4:
@@ -91,6 +92,12 @@ static int flow_offload_fill_route(struct flow_offload *flow,
 	}
 
 	flow_tuple->iifidx = route->tuple[dir].in.ifindex;
+	for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) {
+		flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id;
+		flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto;
+		j++;
+	}
+	flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
 
 	switch (route->tuple[dir].xmit_type) {
 	case FLOW_OFFLOAD_XMIT_DIRECT:
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 9e84767a7e9b..d90636295b0d 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -136,23 +136,44 @@ static bool ip_has_options(unsigned int thoff)
 	return thoff != sizeof(struct iphdr);
 }
 
+static void nf_flow_tuple_encap(struct sk_buff *skb,
+				struct flow_offload_tuple *tuple)
+{
+	int i = 0;
+
+	if (skb_vlan_tag_present(skb)) {
+		tuple->encap[i].id = skb_vlan_tag_get(skb);
+		tuple->encap[i].proto = skb->vlan_proto;
+		i++;
+	}
+	if (skb->protocol == htons(ETH_P_8021Q)) {
+		struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb_mac_header(skb);
+
+		tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
+		tuple->encap[i].proto = skb->protocol;
+	}
+}
+
 static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
-			    struct flow_offload_tuple *tuple, u32 *hdrsize)
+			    struct flow_offload_tuple *tuple, u32 *hdrsize,
+			    u32 offset)
 {
 	struct flow_ports *ports;
 	unsigned int thoff;
 	struct iphdr *iph;
 
-	if (!pskb_may_pull(skb, sizeof(*iph)))
+	if (!pskb_may_pull(skb, sizeof(*iph) + offset))
 		return -1;
 
-	iph = ip_hdr(skb);
-	thoff = iph->ihl * 4;
+	iph = (struct iphdr *)(skb_network_header(skb) + offset);
+	thoff = (iph->ihl * 4);
 
 	if (ip_is_fragment(iph) ||
 	    unlikely(ip_has_options(thoff)))
 		return -1;
 
+	thoff += offset;
+
 	switch (iph->protocol) {
 	case IPPROTO_TCP:
 		*hdrsize = sizeof(struct tcphdr);
@@ -167,11 +188,10 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
 	if (iph->ttl <= 1)
 		return -1;
 
-	thoff = iph->ihl * 4;
 	if (!pskb_may_pull(skb, thoff + *hdrsize))
 		return -1;
 
-	iph = ip_hdr(skb);
+	iph = (struct iphdr *)(skb_network_header(skb) + offset);
 	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
 
 	tuple->src_v4.s_addr	= iph->saddr;
@@ -181,6 +201,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
 	tuple->l3proto		= AF_INET;
 	tuple->l4proto		= iph->protocol;
 	tuple->iifidx		= dev->ifindex;
+	nf_flow_tuple_encap(skb, tuple);
 
 	return 0;
 }
@@ -207,6 +228,43 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
 	return NF_STOLEN;
 }
 
+static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
+				       u32 *offset)
+{
+	if (skb->protocol == htons(ETH_P_8021Q)) {
+		struct vlan_ethhdr *veth;
+
+		veth = (struct vlan_ethhdr *)skb_mac_header(skb);
+		if (veth->h_vlan_encapsulated_proto == proto) {
+			*offset += VLAN_HLEN;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static void nf_flow_encap_pop(struct sk_buff *skb,
+			      struct flow_offload_tuple_rhash *tuplehash)
+{
+	struct vlan_hdr *vlan_hdr;
+	int i;
+
+	for (i = 0; i < tuplehash->tuple.encap_num; i++) {
+		if (skb_vlan_tag_present(skb)) {
+			__vlan_hwaccel_clear_tag(skb);
+			continue;
+		}
+		if (skb->protocol == htons(ETH_P_8021Q)) {
+			vlan_hdr = (struct vlan_hdr *)skb->data;
+			__skb_pull(skb, VLAN_HLEN);
+			vlan_set_encap_proto(skb, vlan_hdr);
+			skb_reset_network_header(skb);
+			break;
+		}
+	}
+}
+
 static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
 				       const struct flow_offload_tuple_rhash *tuplehash,
 				       unsigned short type)
@@ -235,17 +293,18 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 	enum flow_offload_tuple_dir dir;
 	struct flow_offload *flow;
 	struct net_device *outdev;
+	u32 hdrsize, offset = 0;
+	unsigned int thoff, mtu;
 	struct rtable *rt;
-	unsigned int thoff;
 	struct iphdr *iph;
 	__be32 nexthop;
-	u32 hdrsize;
 	int ret;
 
-	if (skb->protocol != htons(ETH_P_IP))
+	if (skb->protocol != htons(ETH_P_IP) &&
+	    !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &offset))
 		return NF_ACCEPT;
 
-	if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize) < 0)
+	if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize, offset) < 0)
 		return NF_ACCEPT;
 
 	tuplehash = flow_offload_lookup(flow_table, &tuple);
@@ -255,11 +314,12 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 	dir = tuplehash->tuple.dir;
 	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
 
-	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
+	mtu = flow->tuplehash[dir].tuple.mtu + offset;
+	if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
 		return NF_ACCEPT;
 
-	iph = ip_hdr(skb);
-	thoff = iph->ihl * 4;
+	iph = (struct iphdr *)(skb_network_header(skb) + offset);
+	thoff = (iph->ihl * 4) + offset;
 	if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
 		return NF_ACCEPT;
 
@@ -277,6 +337,9 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 
 	flow_offload_refresh(flow_table, flow);
 
+	nf_flow_encap_pop(skb, tuplehash);
+	thoff -= offset;
+
 	iph = ip_hdr(skb);
 	nf_flow_nat_ip(flow, skb, thoff, dir, iph);
 
@@ -418,16 +481,18 @@ static void nf_flow_nat_ipv6(const struct flow_offload *flow,
 }
 
 static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
-			      struct flow_offload_tuple *tuple, u32 *hdrsize)
+			      struct flow_offload_tuple *tuple, u32 *hdrsize,
+			      u32 offset)
 {
 	struct flow_ports *ports;
 	struct ipv6hdr *ip6h;
 	unsigned int thoff;
 
-	if (!pskb_may_pull(skb, sizeof(*ip6h)))
+	thoff = sizeof(*ip6h) + offset;
+	if (!pskb_may_pull(skb, thoff))
 		return -1;
 
-	ip6h = ipv6_hdr(skb);
+	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
 
 	switch (ip6h->nexthdr) {
 	case IPPROTO_TCP:
@@ -443,11 +508,10 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
 	if (ip6h->hop_limit <= 1)
 		return -1;
 
-	thoff = sizeof(*ip6h);
 	if (!pskb_may_pull(skb, thoff + *hdrsize))
 		return -1;
 
-	ip6h = ipv6_hdr(skb);
+	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
 	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
 
 	tuple->src_v6		= ip6h->saddr;
@@ -457,6 +521,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
 	tuple->l3proto		= AF_INET6;
 	tuple->l4proto		= ip6h->nexthdr;
 	tuple->iifidx		= dev->ifindex;
+	nf_flow_tuple_encap(skb, tuple);
 
 	return 0;
 }
@@ -472,15 +537,17 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 	const struct in6_addr *nexthop;
 	struct flow_offload *flow;
 	struct net_device *outdev;
+	unsigned int thoff, mtu;
+	u32 hdrsize, offset = 0;
 	struct ipv6hdr *ip6h;
 	struct rt6_info *rt;
-	u32 hdrsize;
 	int ret;
 
-	if (skb->protocol != htons(ETH_P_IPV6))
+	if (skb->protocol != htons(ETH_P_IPV6) &&
+	    !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &offset))
 		return NF_ACCEPT;
 
-	if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize) < 0)
+	if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize, offset) < 0)
 		return NF_ACCEPT;
 
 	tuplehash = flow_offload_lookup(flow_table, &tuple);
@@ -490,11 +557,13 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 	dir = tuplehash->tuple.dir;
 	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
 
-	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
+	mtu = flow->tuplehash[dir].tuple.mtu + offset;
+	if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
 		return NF_ACCEPT;
 
-	if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
-				sizeof(*ip6h)))
+	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
+	thoff = sizeof(*ip6h) + offset;
+	if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
 		return NF_ACCEPT;
 
 	if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
@@ -506,11 +575,13 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 		}
 	}
 
-	if (skb_try_make_writable(skb, sizeof(*ip6h) + hdrsize))
+	if (skb_try_make_writable(skb, thoff + hdrsize))
 		return NF_DROP;
 
 	flow_offload_refresh(flow_table, flow);
 
+	nf_flow_encap_pop(skb, tuplehash);
+
 	ip6h = ipv6_hdr(skb);
 	nf_flow_nat_ipv6(flow, skb, dir, ip6h);
 
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index a6595dca1b1f..8392b1a8108b 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -66,6 +66,11 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
 struct nft_forward_info {
 	const struct net_device *indev;
 	const struct net_device *outdev;
+	struct id {
+		__u16	id;
+		__be16	proto;
+	} encap[NF_FLOW_TABLE_ENCAP_MAX];
+	u8 num_encaps;
 	u8 h_source[ETH_ALEN];
 	u8 h_dest[ETH_ALEN];
 	enum flow_offload_xmit_type xmit_type;
@@ -84,9 +89,23 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack,
 		path = &stack->path[i];
 		switch (path->type) {
 		case DEV_PATH_ETHERNET:
+		case DEV_PATH_VLAN:
 			info->indev = path->dev;
 			if (is_zero_ether_addr(info->h_source))
 				memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
+
+			if (path->type == DEV_PATH_ETHERNET)
+				break;
+
+			/* DEV_PATH_VLAN */
+			if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
+				info->indev = NULL;
+				break;
+			}
+			info->outdev = path->dev;
+			info->encap[info->num_encaps].id = path->encap.id;
+			info->encap[info->num_encaps].proto = path->encap.proto;
+			info->num_encaps++;
 			break;
 		case DEV_PATH_BRIDGE:
 			if (is_zero_ether_addr(info->h_source))
@@ -94,7 +113,6 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack,
 
 			info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
 			break;
-		case DEV_PATH_VLAN:
 		default:
 			info->indev = NULL;
 			break;
@@ -130,6 +148,7 @@ static void nft_dev_forward_path(struct nf_flow_route *route,
 	struct net_device_path_stack stack;
 	struct nft_forward_info info = {};
 	unsigned char ha[ETH_ALEN];
+	int i;
 
 	if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
 		nft_dev_path_info(&stack, &info, ha);
@@ -138,6 +157,11 @@ static void nft_dev_forward_path(struct nf_flow_route *route,
 		return;
 
 	route->tuple[!dir].in.ifindex = info.indev->ifindex;
+	for (i = 0; i < info.num_encaps; i++) {
+		route->tuple[!dir].in.encap[i].id = info.encap[i].id;
+		route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
+	}
+	route->tuple[!dir].in.num_encaps = info.num_encaps;
 
 	if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
 		memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
-- 
cgit v1.2.3


From e990cef6516daa4e1e236433579e333f74fd38cb Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 24 Mar 2021 02:30:42 +0100
Subject: netfilter: flowtable: add bridge vlan filtering support

Add the vlan tag based when PVID is set on.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nft_flow_offload.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 8392b1a8108b..651364d93efd 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -111,6 +111,18 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack,
 			if (is_zero_ether_addr(info->h_source))
 				memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
 
+			switch (path->bridge.vlan_mode) {
+			case DEV_PATH_BR_VLAN_TAG:
+				info->encap[info->num_encaps].id = path->bridge.vlan_id;
+				info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
+				info->num_encaps++;
+				break;
+			case DEV_PATH_BR_VLAN_UNTAG:
+				info->num_encaps--;
+				break;
+			case DEV_PATH_BR_VLAN_KEEP:
+				break;
+			}
 			info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
 			break;
 		default:
-- 
cgit v1.2.3


From 72efd585f7144a047f7da63864284764596ccad9 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 24 Mar 2021 02:30:43 +0100
Subject: netfilter: flowtable: add pppoe support

Add the PPPoE protocol and session id to the flow tuple using the encap
fields to uniquely identify flows from the receive path. For the
transmit path, dev_hard_header() on the vlan device push the headers.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_flow_table_ip.c | 53 +++++++++++++++++++++++++++++++++++-----
 net/netfilter/nft_flow_offload.c |  5 +++-
 2 files changed, 51 insertions(+), 7 deletions(-)

diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index d90636295b0d..12cb0cc6958c 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -7,6 +7,9 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/netdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_pppox.h>
+#include <linux/ppp_defs.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
@@ -139,6 +142,8 @@ static bool ip_has_options(unsigned int thoff)
 static void nf_flow_tuple_encap(struct sk_buff *skb,
 				struct flow_offload_tuple *tuple)
 {
+	struct vlan_ethhdr *veth;
+	struct pppoe_hdr *phdr;
 	int i = 0;
 
 	if (skb_vlan_tag_present(skb)) {
@@ -146,11 +151,17 @@ static void nf_flow_tuple_encap(struct sk_buff *skb,
 		tuple->encap[i].proto = skb->vlan_proto;
 		i++;
 	}
-	if (skb->protocol == htons(ETH_P_8021Q)) {
-		struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb_mac_header(skb);
-
+	switch (skb->protocol) {
+	case htons(ETH_P_8021Q):
+		veth = (struct vlan_ethhdr *)skb_mac_header(skb);
 		tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
 		tuple->encap[i].proto = skb->protocol;
+		break;
+	case htons(ETH_P_PPP_SES):
+		phdr = (struct pppoe_hdr *)skb_mac_header(skb);
+		tuple->encap[i].id = ntohs(phdr->sid);
+		tuple->encap[i].proto = skb->protocol;
+		break;
 	}
 }
 
@@ -228,17 +239,41 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
 	return NF_STOLEN;
 }
 
+static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
+{
+	__be16 proto;
+
+	proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
+			     sizeof(struct pppoe_hdr)));
+	switch (proto) {
+	case htons(PPP_IP):
+		return htons(ETH_P_IP);
+	case htons(PPP_IPV6):
+		return htons(ETH_P_IPV6);
+	}
+
+	return 0;
+}
+
 static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
 				       u32 *offset)
 {
-	if (skb->protocol == htons(ETH_P_8021Q)) {
-		struct vlan_ethhdr *veth;
+	struct vlan_ethhdr *veth;
 
+	switch (skb->protocol) {
+	case htons(ETH_P_8021Q):
 		veth = (struct vlan_ethhdr *)skb_mac_header(skb);
 		if (veth->h_vlan_encapsulated_proto == proto) {
 			*offset += VLAN_HLEN;
 			return true;
 		}
+		break;
+	case htons(ETH_P_PPP_SES):
+		if (nf_flow_pppoe_proto(skb) == proto) {
+			*offset += PPPOE_SES_HLEN;
+			return true;
+		}
+		break;
 	}
 
 	return false;
@@ -255,12 +290,18 @@ static void nf_flow_encap_pop(struct sk_buff *skb,
 			__vlan_hwaccel_clear_tag(skb);
 			continue;
 		}
-		if (skb->protocol == htons(ETH_P_8021Q)) {
+		switch (skb->protocol) {
+		case htons(ETH_P_8021Q):
 			vlan_hdr = (struct vlan_hdr *)skb->data;
 			__skb_pull(skb, VLAN_HLEN);
 			vlan_set_encap_proto(skb, vlan_hdr);
 			skb_reset_network_header(skb);
 			break;
+		case htons(ETH_P_PPP_SES):
+			skb->protocol = nf_flow_pppoe_proto(skb);
+			skb_pull(skb, PPPOE_SES_HLEN);
+			skb_reset_network_header(skb);
+			break;
 		}
 	}
 }
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 651364d93efd..81a5e2b6c901 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -90,6 +90,7 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack,
 		switch (path->type) {
 		case DEV_PATH_ETHERNET:
 		case DEV_PATH_VLAN:
+		case DEV_PATH_PPPOE:
 			info->indev = path->dev;
 			if (is_zero_ether_addr(info->h_source))
 				memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
@@ -97,7 +98,7 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack,
 			if (path->type == DEV_PATH_ETHERNET)
 				break;
 
-			/* DEV_PATH_VLAN */
+			/* DEV_PATH_VLAN and DEV_PATH_PPPOE */
 			if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
 				info->indev = NULL;
 				break;
@@ -106,6 +107,8 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack,
 			info->encap[info->num_encaps].id = path->encap.id;
 			info->encap[info->num_encaps].proto = path->encap.proto;
 			info->num_encaps++;
+			if (path->type == DEV_PATH_PPPOE)
+				memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
 			break;
 		case DEV_PATH_BRIDGE:
 			if (is_zero_ether_addr(info->h_source))
-- 
cgit v1.2.3


From a11e7973cf918e3e212b7cb0ba8b3cccb9ed82b6 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 24 Mar 2021 02:30:44 +0100
Subject: netfilter: flowtable: add dsa support

Replace the master ethernet device by the dsa slave port. Packets coming
in from the software ingress path use the dsa slave port as input
device.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nft_flow_offload.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 81a5e2b6c901..143d049fd7f1 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -89,6 +89,7 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack,
 		path = &stack->path[i];
 		switch (path->type) {
 		case DEV_PATH_ETHERNET:
+		case DEV_PATH_DSA:
 		case DEV_PATH_VLAN:
 		case DEV_PATH_PPPOE:
 			info->indev = path->dev;
@@ -97,6 +98,10 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack,
 
 			if (path->type == DEV_PATH_ETHERNET)
 				break;
+			if (path->type == DEV_PATH_DSA) {
+				i = stack->num_paths;
+				break;
+			}
 
 			/* DEV_PATH_VLAN and DEV_PATH_PPPOE */
 			if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
-- 
cgit v1.2.3


From 79d4071ea4c49260286cf75dc669a0ed354d1c4e Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 24 Mar 2021 02:30:45 +0100
Subject: selftests: netfilter: flowtable bridge and vlan support

This patch adds two new tests to cover bridge and vlan support:

- Add a bridge device to the Router1 (nsr1) container and attach the
  veth0 device to the bridge. Set the IP address to the bridge device
  to exercise the bridge forwarding path.

- Add vlan encapsulation between to the bridge device in the Router1 and
  one of the sender containers (ns1).

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/netfilter/nft_flowtable.sh | 82 ++++++++++++++++++++++
 1 file changed, 82 insertions(+)

diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh
index 431296c0f91c..427d94816f2d 100755
--- a/tools/testing/selftests/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/netfilter/nft_flowtable.sh
@@ -371,6 +371,88 @@ else
 	ip netns exec nsr1 nft list ruleset
 fi
 
+# Another test:
+# Add bridge interface br0 to Router1, with NAT enabled.
+ip -net nsr1 link add name br0 type bridge
+ip -net nsr1 addr flush dev veth0
+ip -net nsr1 link set up dev veth0
+ip -net nsr1 link set veth0 master br0
+ip -net nsr1 addr add 10.0.1.1/24 dev br0
+ip -net nsr1 addr add dead:1::1/64 dev br0
+ip -net nsr1 link set up dev br0
+
+ip netns exec nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null
+
+# br0 with NAT enabled.
+ip netns exec nsr1 nft -f - <<EOF
+flush table ip nat
+table ip nat {
+   chain prerouting {
+      type nat hook prerouting priority 0; policy accept;
+      meta iif "br0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
+   }
+
+   chain postrouting {
+      type nat hook postrouting priority 0; policy accept;
+      meta oifname "veth1" counter masquerade
+   }
+}
+EOF
+
+if test_tcp_forwarding_nat ns1 ns2; then
+	echo "PASS: flow offloaded for ns1/ns2 with bridge NAT"
+else
+	echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2
+	ip netns exec nsr1 nft list ruleset
+	ret=1
+fi
+
+# Another test:
+# Add bridge interface br0 to Router1, with NAT and VLAN.
+ip -net nsr1 link set veth0 nomaster
+ip -net nsr1 link set down dev veth0
+ip -net nsr1 link add link veth0 name veth0.10 type vlan id 10
+ip -net nsr1 link set up dev veth0
+ip -net nsr1 link set up dev veth0.10
+ip -net nsr1 link set veth0.10 master br0
+
+ip -net ns1 addr flush dev eth0
+ip -net ns1 link add link eth0 name eth0.10 type vlan id 10
+ip -net ns1 link set eth0 up
+ip -net ns1 link set eth0.10 up
+ip -net ns1 addr add 10.0.1.99/24 dev eth0.10
+ip -net ns1 route add default via 10.0.1.1
+ip -net ns1 addr add dead:1::99/64 dev eth0.10
+
+if test_tcp_forwarding_nat ns1 ns2; then
+	echo "PASS: flow offloaded for ns1/ns2 with bridge NAT and VLAN"
+else
+	echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2
+	ip netns exec nsr1 nft list ruleset
+	ret=1
+fi
+
+# restore test topology (remove bridge and VLAN)
+ip -net nsr1 link set veth0 nomaster
+ip -net nsr1 link set veth0 down
+ip -net nsr1 link set veth0.10 down
+ip -net nsr1 link delete veth0.10 type vlan
+ip -net nsr1 link delete br0 type bridge
+ip -net ns1 addr flush dev eth0.10
+ip -net ns1 link set eth0.10 down
+ip -net ns1 link set eth0 down
+ip -net ns1 link delete eth0.10 type vlan
+
+# restore address in ns1 and nsr1
+ip -net ns1 link set eth0 up
+ip -net ns1 addr add 10.0.1.99/24 dev eth0
+ip -net ns1 route add default via 10.0.1.1
+ip -net ns1 addr add dead:1::99/64 dev eth0
+ip -net ns1 route add default via dead:1::1
+ip -net nsr1 addr add 10.0.1.1/24 dev veth0
+ip -net nsr1 addr add dead:1::1/64 dev veth0
+ip -net nsr1 link set up dev veth0
+
 KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1)
 KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1)
 SPI1=$RANDOM
-- 
cgit v1.2.3


From eeff3000f2401fde872f21b0ce2f298dcc89e5c5 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 24 Mar 2021 02:30:46 +0100
Subject: netfilter: flowtable: add offload support for xmit path types

When the flow tuple xmit_type is set to FLOW_OFFLOAD_XMIT_DIRECT, the
dst_cache pointer is not valid, and the h_source/h_dest/ifidx out fields
need to be used.

This patch also adds the FLOW_ACTION_VLAN_PUSH action to pass the VLAN
tag to the driver.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_flow_table_offload.c | 166 +++++++++++++++++++++++++---------
 1 file changed, 124 insertions(+), 42 deletions(-)

diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 1b979c8b3ba0..3fbed447132a 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -177,28 +177,45 @@ static int flow_offload_eth_src(struct net *net,
 				enum flow_offload_tuple_dir dir,
 				struct nf_flow_rule *flow_rule)
 {
-	const struct flow_offload_tuple *tuple = &flow->tuplehash[!dir].tuple;
 	struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
 	struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
-	struct net_device *dev;
+	const struct flow_offload_tuple *other_tuple, *this_tuple;
+	struct net_device *dev = NULL;
+	const unsigned char *addr;
 	u32 mask, val;
 	u16 val16;
 
-	dev = dev_get_by_index(net, tuple->iifidx);
-	if (!dev)
-		return -ENOENT;
+	this_tuple = &flow->tuplehash[dir].tuple;
+
+	switch (this_tuple->xmit_type) {
+	case FLOW_OFFLOAD_XMIT_DIRECT:
+		addr = this_tuple->out.h_source;
+		break;
+	case FLOW_OFFLOAD_XMIT_NEIGH:
+		other_tuple = &flow->tuplehash[!dir].tuple;
+		dev = dev_get_by_index(net, other_tuple->iifidx);
+		if (!dev)
+			return -ENOENT;
+
+		addr = dev->dev_addr;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
 
 	mask = ~0xffff0000;
-	memcpy(&val16, dev->dev_addr, 2);
+	memcpy(&val16, addr, 2);
 	val = val16 << 16;
 	flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
 			    &val, &mask);
 
 	mask = ~0xffffffff;
-	memcpy(&val, dev->dev_addr + 2, 4);
+	memcpy(&val, addr + 2, 4);
 	flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
 			    &val, &mask);
-	dev_put(dev);
+
+	if (dev)
+		dev_put(dev);
 
 	return 0;
 }
@@ -210,27 +227,40 @@ static int flow_offload_eth_dst(struct net *net,
 {
 	struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
 	struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
-	const void *daddr = &flow->tuplehash[!dir].tuple.src_v4;
+	const struct flow_offload_tuple *other_tuple, *this_tuple;
 	const struct dst_entry *dst_cache;
 	unsigned char ha[ETH_ALEN];
 	struct neighbour *n;
+	const void *daddr;
 	u32 mask, val;
 	u8 nud_state;
 	u16 val16;
 
-	dst_cache = flow->tuplehash[dir].tuple.dst_cache;
-	n = dst_neigh_lookup(dst_cache, daddr);
-	if (!n)
-		return -ENOENT;
+	this_tuple = &flow->tuplehash[dir].tuple;
 
-	read_lock_bh(&n->lock);
-	nud_state = n->nud_state;
-	ether_addr_copy(ha, n->ha);
-	read_unlock_bh(&n->lock);
-
-	if (!(nud_state & NUD_VALID)) {
+	switch (this_tuple->xmit_type) {
+	case FLOW_OFFLOAD_XMIT_DIRECT:
+		ether_addr_copy(ha, this_tuple->out.h_dest);
+		break;
+	case FLOW_OFFLOAD_XMIT_NEIGH:
+		other_tuple = &flow->tuplehash[!dir].tuple;
+		daddr = &other_tuple->src_v4;
+		dst_cache = this_tuple->dst_cache;
+		n = dst_neigh_lookup(dst_cache, daddr);
+		if (!n)
+			return -ENOENT;
+
+		read_lock_bh(&n->lock);
+		nud_state = n->nud_state;
+		ether_addr_copy(ha, n->ha);
+		read_unlock_bh(&n->lock);
 		neigh_release(n);
-		return -ENOENT;
+
+		if (!(nud_state & NUD_VALID))
+			return -ENOENT;
+		break;
+	default:
+		return -EOPNOTSUPP;
 	}
 
 	mask = ~0xffffffff;
@@ -243,7 +273,6 @@ static int flow_offload_eth_dst(struct net *net,
 	val = val16;
 	flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
 			    &val, &mask);
-	neigh_release(n);
 
 	return 0;
 }
@@ -465,27 +494,52 @@ static void flow_offload_ipv4_checksum(struct net *net,
 	}
 }
 
-static void flow_offload_redirect(const struct flow_offload *flow,
+static void flow_offload_redirect(struct net *net,
+				  const struct flow_offload *flow,
 				  enum flow_offload_tuple_dir dir,
 				  struct nf_flow_rule *flow_rule)
 {
-	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
-	struct rtable *rt;
+	const struct flow_offload_tuple *this_tuple, *other_tuple;
+	struct flow_action_entry *entry;
+	struct net_device *dev;
+	int ifindex;
 
-	rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
+	this_tuple = &flow->tuplehash[dir].tuple;
+	switch (this_tuple->xmit_type) {
+	case FLOW_OFFLOAD_XMIT_DIRECT:
+		this_tuple = &flow->tuplehash[dir].tuple;
+		ifindex = this_tuple->out.ifidx;
+		break;
+	case FLOW_OFFLOAD_XMIT_NEIGH:
+		other_tuple = &flow->tuplehash[!dir].tuple;
+		ifindex = other_tuple->iifidx;
+		break;
+	default:
+		return;
+	}
+
+	dev = dev_get_by_index(net, ifindex);
+	if (!dev)
+		return;
+
+	entry = flow_action_entry_next(flow_rule);
 	entry->id = FLOW_ACTION_REDIRECT;
-	entry->dev = rt->dst.dev;
-	dev_hold(rt->dst.dev);
+	entry->dev = dev;
 }
 
 static void flow_offload_encap_tunnel(const struct flow_offload *flow,
 				      enum flow_offload_tuple_dir dir,
 				      struct nf_flow_rule *flow_rule)
 {
+	const struct flow_offload_tuple *this_tuple;
 	struct flow_action_entry *entry;
 	struct dst_entry *dst;
 
-	dst = flow->tuplehash[dir].tuple.dst_cache;
+	this_tuple = &flow->tuplehash[dir].tuple;
+	if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
+		return;
+
+	dst = this_tuple->dst_cache;
 	if (dst && dst->lwtstate) {
 		struct ip_tunnel_info *tun_info;
 
@@ -502,10 +556,15 @@ static void flow_offload_decap_tunnel(const struct flow_offload *flow,
 				      enum flow_offload_tuple_dir dir,
 				      struct nf_flow_rule *flow_rule)
 {
+	const struct flow_offload_tuple *other_tuple;
 	struct flow_action_entry *entry;
 	struct dst_entry *dst;
 
-	dst = flow->tuplehash[!dir].tuple.dst_cache;
+	other_tuple = &flow->tuplehash[!dir].tuple;
+	if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
+		return;
+
+	dst = other_tuple->dst_cache;
 	if (dst && dst->lwtstate) {
 		struct ip_tunnel_info *tun_info;
 
@@ -517,10 +576,14 @@ static void flow_offload_decap_tunnel(const struct flow_offload *flow,
 	}
 }
 
-int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
-			    enum flow_offload_tuple_dir dir,
-			    struct nf_flow_rule *flow_rule)
+static int
+nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
+			  enum flow_offload_tuple_dir dir,
+			  struct nf_flow_rule *flow_rule)
 {
+	const struct flow_offload_tuple *other_tuple;
+	int i;
+
 	flow_offload_decap_tunnel(flow, dir, flow_rule);
 	flow_offload_encap_tunnel(flow, dir, flow_rule);
 
@@ -528,6 +591,26 @@ int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
 	    flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
 		return -1;
 
+	other_tuple = &flow->tuplehash[!dir].tuple;
+
+	for (i = 0; i < other_tuple->encap_num; i++) {
+		struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
+
+		entry->id = FLOW_ACTION_VLAN_PUSH;
+		entry->vlan.vid = other_tuple->encap[i].id;
+		entry->vlan.proto = other_tuple->encap[i].proto;
+	}
+
+	return 0;
+}
+
+int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
+			    enum flow_offload_tuple_dir dir,
+			    struct nf_flow_rule *flow_rule)
+{
+	if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
+		return -1;
+
 	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
 		flow_offload_ipv4_snat(net, flow, dir, flow_rule);
 		flow_offload_port_snat(net, flow, dir, flow_rule);
@@ -540,7 +623,7 @@ int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
 	    test_bit(NF_FLOW_DNAT, &flow->flags))
 		flow_offload_ipv4_checksum(net, flow, flow_rule);
 
-	flow_offload_redirect(flow, dir, flow_rule);
+	flow_offload_redirect(net, flow, dir, flow_rule);
 
 	return 0;
 }
@@ -550,11 +633,7 @@ int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
 			    enum flow_offload_tuple_dir dir,
 			    struct nf_flow_rule *flow_rule)
 {
-	flow_offload_decap_tunnel(flow, dir, flow_rule);
-	flow_offload_encap_tunnel(flow, dir, flow_rule);
-
-	if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
-	    flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
+	if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
 		return -1;
 
 	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
@@ -566,7 +645,7 @@ int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
 		flow_offload_port_dnat(net, flow, dir, flow_rule);
 	}
 
-	flow_offload_redirect(flow, dir, flow_rule);
+	flow_offload_redirect(net, flow, dir, flow_rule);
 
 	return 0;
 }
@@ -580,10 +659,10 @@ nf_flow_offload_rule_alloc(struct net *net,
 			   enum flow_offload_tuple_dir dir)
 {
 	const struct nf_flowtable *flowtable = offload->flowtable;
+	const struct flow_offload_tuple *tuple, *other_tuple;
 	const struct flow_offload *flow = offload->flow;
-	const struct flow_offload_tuple *tuple;
+	struct dst_entry *other_dst = NULL;
 	struct nf_flow_rule *flow_rule;
-	struct dst_entry *other_dst;
 	int err = -ENOMEM;
 
 	flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
@@ -599,7 +678,10 @@ nf_flow_offload_rule_alloc(struct net *net,
 	flow_rule->rule->match.key = &flow_rule->match.key;
 
 	tuple = &flow->tuplehash[dir].tuple;
-	other_dst = flow->tuplehash[!dir].tuple.dst_cache;
+	other_tuple = &flow->tuplehash[!dir].tuple;
+	if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
+		other_dst = other_tuple->dst_cache;
+
 	err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
 	if (err < 0)
 		goto err_flow_match;
-- 
cgit v1.2.3


From 73f97025a972cd1506e8b1986264b2fb8833df7c Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 24 Mar 2021 02:30:47 +0100
Subject: netfilter: nft_flow_offload: use direct xmit if hardware offload is
 enabled

If there is a forward path to reach an ethernet device and hardware
offload is enabled, then use the direct xmit path.

Moreover, store the real device in the direct xmit path info since
software datapath uses dev_hard_header() to push the layer encapsulation
headers while hardware offload refers to the real device.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_flow_table.h |  2 ++
 net/netfilter/nf_flow_table_core.c    |  1 +
 net/netfilter/nf_flow_table_offload.c |  2 +-
 net/netfilter/nft_flow_offload.c      | 21 +++++++++++++++++++--
 4 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index e34fd3eb4bb5..52afcee6e999 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -131,6 +131,7 @@ struct flow_offload_tuple {
 		struct dst_entry	*dst_cache;
 		struct {
 			u32		ifidx;
+			u32		hw_ifidx;
 			u8		h_source[ETH_ALEN];
 			u8		h_dest[ETH_ALEN];
 		} out;
@@ -188,6 +189,7 @@ struct nf_flow_route {
 		} in;
 		struct {
 			u32			ifindex;
+			u32			hw_ifindex;
 			u8			h_source[ETH_ALEN];
 			u8			h_dest[ETH_ALEN];
 		} out;
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 595f4434b84d..f728c955b1dc 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -106,6 +106,7 @@ static int flow_offload_fill_route(struct flow_offload *flow,
 		memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
 		       ETH_ALEN);
 		flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
+		flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
 		break;
 	case FLOW_OFFLOAD_XMIT_XFRM:
 	case FLOW_OFFLOAD_XMIT_NEIGH:
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 3fbed447132a..e0d079601fcb 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -508,7 +508,7 @@ static void flow_offload_redirect(struct net *net,
 	switch (this_tuple->xmit_type) {
 	case FLOW_OFFLOAD_XMIT_DIRECT:
 		this_tuple = &flow->tuplehash[dir].tuple;
-		ifindex = this_tuple->out.ifidx;
+		ifindex = this_tuple->out.hw_ifidx;
 		break;
 	case FLOW_OFFLOAD_XMIT_NEIGH:
 		other_tuple = &flow->tuplehash[!dir].tuple;
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 143d049fd7f1..d25b4b109e25 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -66,6 +66,7 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
 struct nft_forward_info {
 	const struct net_device *indev;
 	const struct net_device *outdev;
+	const struct net_device *hw_outdev;
 	struct id {
 		__u16	id;
 		__be16	proto;
@@ -76,9 +77,18 @@ struct nft_forward_info {
 	enum flow_offload_xmit_type xmit_type;
 };
 
+static bool nft_is_valid_ether_device(const struct net_device *dev)
+{
+	if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
+	    dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
+		return false;
+
+	return true;
+}
+
 static void nft_dev_path_info(const struct net_device_path_stack *stack,
 			      struct nft_forward_info *info,
-			      unsigned char *ha)
+			      unsigned char *ha, struct nf_flowtable *flowtable)
 {
 	const struct net_device_path *path;
 	int i;
@@ -140,6 +150,12 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack,
 	}
 	if (!info->outdev)
 		info->outdev = info->indev;
+
+	info->hw_outdev = info->indev;
+
+	if (nf_flowtable_hw_offload(flowtable) &&
+	    nft_is_valid_ether_device(info->indev))
+		info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
 }
 
 static bool nft_flowtable_find_dev(const struct net_device *dev,
@@ -171,7 +187,7 @@ static void nft_dev_forward_path(struct nf_flow_route *route,
 	int i;
 
 	if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
-		nft_dev_path_info(&stack, &info, ha);
+		nft_dev_path_info(&stack, &info, ha, &ft->data);
 
 	if (!info.indev || !nft_flowtable_find_dev(info.indev, ft))
 		return;
@@ -187,6 +203,7 @@ static void nft_dev_forward_path(struct nf_flow_route *route,
 		memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
 		memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
 		route->tuple[dir].out.ifindex = info.outdev->ifindex;
+		route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex;
 		route->tuple[dir].xmit_type = info.xmit_type;
 	}
 }
-- 
cgit v1.2.3


From 26267bf9bb57d504c785d8659adc8e02b6629c95 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 24 Mar 2021 02:30:48 +0100
Subject: netfilter: flowtable: bridge vlan hardware offload and switchdev

The switch might have already added the VLAN tag through PVID hardware
offload. Keep this extra VLAN in the flowtable but skip it on egress.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h             | 1 +
 include/net/netfilter/nf_flow_table.h | 8 +++++---
 net/bridge/br_device.c                | 1 +
 net/bridge/br_vlan.c                  | 2 ++
 net/netfilter/nf_flow_table_core.c    | 2 ++
 net/netfilter/nf_flow_table_offload.c | 6 +++++-
 net/netfilter/nft_flow_offload.c      | 5 +++++
 7 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 90db74132090..02fa1da8cd22 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -870,6 +870,7 @@ struct net_device_path {
 				DEV_PATH_BR_VLAN_KEEP,
 				DEV_PATH_BR_VLAN_TAG,
 				DEV_PATH_BR_VLAN_UNTAG,
+				DEV_PATH_BR_VLAN_UNTAG_HW,
 			}		vlan_mode;
 			u16		vlan_id;
 			__be16		vlan_proto;
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 52afcee6e999..4d991c1e93ef 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -123,9 +123,10 @@ struct flow_offload_tuple {
 	/* All members above are keys for lookups, see flow_offload_hash(). */
 	struct { }			__hash;
 
-	u8				dir:4,
+	u8				dir:2,
 					xmit_type:2,
-					encap_num:2;
+					encap_num:2,
+					in_vlan_ingress:2;
 	u16				mtu;
 	union {
 		struct dst_entry	*dst_cache;
@@ -185,7 +186,8 @@ struct nf_flow_route {
 				u16		id;
 				__be16		proto;
 			} encap[NF_FLOW_TABLE_ENCAP_MAX];
-			u8			num_encaps;
+			u8			num_encaps:2,
+						ingress_vlans:2;
 		} in;
 		struct {
 			u32			ifindex;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 0c72503e0d39..e8b626cc6bfd 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -422,6 +422,7 @@ static int br_fill_forward_path(struct net_device_path_ctx *ctx,
 		ctx->vlan[ctx->num_vlans].proto = path->bridge.vlan_proto;
 		ctx->num_vlans++;
 		break;
+	case DEV_PATH_BR_VLAN_UNTAG_HW:
 	case DEV_PATH_BR_VLAN_UNTAG:
 		ctx->num_vlans--;
 		break;
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index c92240b21c4a..da3256a3eed0 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -1386,6 +1386,8 @@ int br_vlan_fill_forward_path_mode(struct net_bridge *br,
 
 	if (path->bridge.vlan_mode == DEV_PATH_BR_VLAN_TAG)
 		path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
+	else if (v->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)
+		path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG_HW;
 	else
 		path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG;
 
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index f728c955b1dc..8fa7bf9d5f3f 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -95,6 +95,8 @@ static int flow_offload_fill_route(struct flow_offload *flow,
 	for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) {
 		flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id;
 		flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto;
+		if (route->tuple[dir].in.ingress_vlans & BIT(i))
+			flow_tuple->in_vlan_ingress |= BIT(j);
 		j++;
 	}
 	flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index e0d079601fcb..9326ba74745e 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -594,8 +594,12 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
 	other_tuple = &flow->tuplehash[!dir].tuple;
 
 	for (i = 0; i < other_tuple->encap_num; i++) {
-		struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
+		struct flow_action_entry *entry;
 
+		if (other_tuple->in_vlan_ingress & BIT(i))
+			continue;
+
+		entry = flow_action_entry_next(flow_rule);
 		entry->id = FLOW_ACTION_VLAN_PUSH;
 		entry->vlan.vid = other_tuple->encap[i].id;
 		entry->vlan.proto = other_tuple->encap[i].proto;
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index d25b4b109e25..4843dd2b410c 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -72,6 +72,7 @@ struct nft_forward_info {
 		__be16	proto;
 	} encap[NF_FLOW_TABLE_ENCAP_MAX];
 	u8 num_encaps;
+	u8 ingress_vlans;
 	u8 h_source[ETH_ALEN];
 	u8 h_dest[ETH_ALEN];
 	enum flow_offload_xmit_type xmit_type;
@@ -130,6 +131,9 @@ static void nft_dev_path_info(const struct net_device_path_stack *stack,
 				memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
 
 			switch (path->bridge.vlan_mode) {
+			case DEV_PATH_BR_VLAN_UNTAG_HW:
+				info->ingress_vlans |= BIT(info->num_encaps - 1);
+				break;
 			case DEV_PATH_BR_VLAN_TAG:
 				info->encap[info->num_encaps].id = path->bridge.vlan_id;
 				info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
@@ -198,6 +202,7 @@ static void nft_dev_forward_path(struct nf_flow_route *route,
 		route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
 	}
 	route->tuple[!dir].in.num_encaps = info.num_encaps;
+	route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
 
 	if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
 		memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
-- 
cgit v1.2.3


From 563ae557dd4eebb11472a1c264d40bfc08470395 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 24 Mar 2021 02:30:49 +0100
Subject: net: flow_offload: add FLOW_ACTION_PPPOE_PUSH

Add an action to represent the PPPoE hardware offload support that
includes the session ID.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_offload.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index fde025c57b4f..dc5c1e69cd9f 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -147,6 +147,7 @@ enum flow_action_id {
 	FLOW_ACTION_MPLS_POP,
 	FLOW_ACTION_MPLS_MANGLE,
 	FLOW_ACTION_GATE,
+	FLOW_ACTION_PPPOE_PUSH,
 	NUM_FLOW_ACTIONS,
 };
 
@@ -274,6 +275,9 @@ struct flow_action_entry {
 			u32		num_entries;
 			struct action_gate_entry *entries;
 		} gate;
+		struct {				/* FLOW_ACTION_PPPOE_PUSH */
+			u16		sid;
+		} pppoe;
 	};
 	struct flow_action_cookie *cookie; /* user defined action cookie */
 };
-- 
cgit v1.2.3


From 17e52c0aaad7fa7867fa07a5e2666bc8b032ae80 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 24 Mar 2021 02:30:50 +0100
Subject: netfilter: flowtable: support for FLOW_ACTION_PPPOE_PUSH

Add a PPPoE push action if layer 2 protocol is ETH_P_PPP_SES to add
PPPoE flowtable hardware offload support.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_flow_table_offload.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 9326ba74745e..7d0d128407be 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -600,9 +600,18 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
 			continue;
 
 		entry = flow_action_entry_next(flow_rule);
-		entry->id = FLOW_ACTION_VLAN_PUSH;
-		entry->vlan.vid = other_tuple->encap[i].id;
-		entry->vlan.proto = other_tuple->encap[i].proto;
+
+		switch (other_tuple->encap[i].proto) {
+		case htons(ETH_P_PPP_SES):
+			entry->id = FLOW_ACTION_PPPOE_PUSH;
+			entry->pppoe.sid = other_tuple->encap[i].id;
+			break;
+		case htons(ETH_P_8021Q):
+			entry->id = FLOW_ACTION_VLAN_PUSH;
+			entry->vlan.vid = other_tuple->encap[i].id;
+			entry->vlan.proto = other_tuple->encap[i].proto;
+			break;
+		}
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 3fb24a43c97573cc10194e5733098fe03b3ae825 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 24 Mar 2021 02:30:51 +0100
Subject: dsa: slave: add support for TC_SETUP_FT

The dsa infrastructure provides a well-defined hierarchy of devices,
pass up the call to set up the flow block to the master device. From the
software dataplane, the netfilter infrastructure uses the dsa slave
devices to refer to the input and output device for the given skbuff.
Similarly, the flowtable definition in the ruleset refers to the dsa
slave port devices.

This patch adds the glue code to call ndo_setup_tc with TC_SETUP_FT
with the master device via the dsa slave devices.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/slave.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 7453ceca2c7e..995e0e16f295 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1278,14 +1278,32 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
 	}
 }
 
+static int dsa_slave_setup_ft_block(struct dsa_switch *ds, int port,
+				    void *type_data)
+{
+	struct dsa_port *cpu_dp = dsa_to_port(ds, port)->cpu_dp;
+	struct net_device *master = cpu_dp->master;
+
+	if (!master->netdev_ops->ndo_setup_tc)
+		return -EOPNOTSUPP;
+
+	return master->netdev_ops->ndo_setup_tc(master, TC_SETUP_FT, type_data);
+}
+
 static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			      void *type_data)
 {
 	struct dsa_port *dp = dsa_slave_to_port(dev);
 	struct dsa_switch *ds = dp->ds;
 
-	if (type == TC_SETUP_BLOCK)
+	switch (type) {
+	case TC_SETUP_BLOCK:
 		return dsa_slave_setup_tc_block(dev, type_data);
+	case TC_SETUP_FT:
+		return dsa_slave_setup_ft_block(ds, dp->index, type_data);
+	default:
+		break;
+	}
 
 	if (!ds->ops->port_setup_tc)
 		return -EOPNOTSUPP;
-- 
cgit v1.2.3


From d5c53da2b4a57f0d0e51d8220c46e5f5935fe9f3 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 24 Mar 2021 02:30:52 +0100
Subject: net: ethernet: mtk_eth_soc: fix parsing packets in GDM

When using DSA, set the special tag in GDM ingress control to allow the MAC
to parse packets properly earlier. This affects rx DMA source port reporting.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 15 +++++++++------
 drivers/net/ethernet/mediatek/mtk_eth_soc.h |  2 ++
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 01d3ee4b5829..cf65fbdf60e0 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -19,6 +19,7 @@
 #include <linux/interrupt.h>
 #include <linux/pinctrl/devinfo.h>
 #include <linux/phylink.h>
+#include <net/dsa.h>
 
 #include "mtk_eth_soc.h"
 
@@ -1264,13 +1265,12 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
 			break;
 
 		/* find out which mac the packet come from. values start at 1 */
-		if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
+		if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628) ||
+		    (trxd.rxd4 & RX_DMA_SPECIAL_TAG))
 			mac = 0;
-		} else {
-			mac = (trxd.rxd4 >> RX_DMA_FPORT_SHIFT) &
-				RX_DMA_FPORT_MASK;
-			mac--;
-		}
+		else
+			mac = ((trxd.rxd4 >> RX_DMA_FPORT_SHIFT) &
+			       RX_DMA_FPORT_MASK) - 1;
 
 		if (unlikely(mac < 0 || mac >= MTK_MAC_COUNT ||
 			     !eth->netdev[mac]))
@@ -2233,6 +2233,9 @@ static void mtk_gdm_config(struct mtk_eth *eth, u32 config)
 
 		val |= config;
 
+		if (!i && eth->netdev[0] && netdev_uses_dsa(eth->netdev[0]))
+			val |= MTK_GDMA_SPECIAL_TAG;
+
 		mtk_w32(eth, val, MTK_GDMA_FWD_CFG(i));
 	}
 	/* Reset and enable PSE */
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index fd3cec8f06ba..2d065d47dbfd 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -82,6 +82,7 @@
 
 /* GDM Exgress Control Register */
 #define MTK_GDMA_FWD_CFG(x)	(0x500 + (x * 0x1000))
+#define MTK_GDMA_SPECIAL_TAG	BIT(24)
 #define MTK_GDMA_ICS_EN		BIT(22)
 #define MTK_GDMA_TCS_EN		BIT(21)
 #define MTK_GDMA_UCS_EN		BIT(20)
@@ -305,6 +306,7 @@
 #define RX_DMA_L4_VALID_PDMA	BIT(30)		/* when PDMA is used */
 #define RX_DMA_FPORT_SHIFT	19
 #define RX_DMA_FPORT_MASK	0x7
+#define RX_DMA_SPECIAL_TAG	BIT(22)
 
 /* PHY Indirect Access Control registers */
 #define MTK_PHY_IAC		0x10004
-- 
cgit v1.2.3


From ba37b7caf1ed2395cc84d8f823ff933975f1f789 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 24 Mar 2021 02:30:53 +0100
Subject: net: ethernet: mtk_eth_soc: add support for initializing the PPE

The PPE (packet processing engine) is used to offload NAT/routed or even
bridged flows. This patch brings up the PPE and uses it to get a packet
hash. It also contains some functionality that will be used to bring up
flow offloading.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/Makefile          |   2 +-
 drivers/net/ethernet/mediatek/mtk_eth_soc.c     |  21 +-
 drivers/net/ethernet/mediatek/mtk_eth_soc.h     |  11 +
 drivers/net/ethernet/mediatek/mtk_ppe.c         | 511 ++++++++++++++++++++++++
 drivers/net/ethernet/mediatek/mtk_ppe.h         | 287 +++++++++++++
 drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c | 217 ++++++++++
 drivers/net/ethernet/mediatek/mtk_ppe_regs.h    | 144 +++++++
 7 files changed, 1190 insertions(+), 3 deletions(-)
 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.c
 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.h
 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_regs.h

diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile
index 3a777b4a6cd3..871dc3e113e2 100644
--- a/drivers/net/ethernet/mediatek/Makefile
+++ b/drivers/net/ethernet/mediatek/Makefile
@@ -4,5 +4,5 @@
 #
 
 obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth.o
-mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o
+mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o mtk_ppe.o mtk_ppe_debugfs.o
 obj-$(CONFIG_NET_MEDIATEK_STAR_EMAC) += mtk_star_emac.o
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index cf65fbdf60e0..67605b9a3916 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -2258,12 +2258,17 @@ static int mtk_open(struct net_device *dev)
 
 	/* we run 2 netdevs on the same dma ring so we only bring it up once */
 	if (!refcount_read(&eth->dma_refcnt)) {
-		int err = mtk_start_dma(eth);
+		u32 gdm_config = MTK_GDMA_TO_PDMA;
+		int err;
 
+		err = mtk_start_dma(eth);
 		if (err)
 			return err;
 
-		mtk_gdm_config(eth, MTK_GDMA_TO_PDMA);
+		if (eth->soc->offload_version && mtk_ppe_start(&eth->ppe) == 0)
+			gdm_config = MTK_GDMA_TO_PPE;
+
+		mtk_gdm_config(eth, gdm_config);
 
 		napi_enable(&eth->tx_napi);
 		napi_enable(&eth->rx_napi);
@@ -2330,6 +2335,9 @@ static int mtk_stop(struct net_device *dev)
 
 	mtk_dma_free(eth);
 
+	if (eth->soc->offload_version)
+		mtk_ppe_stop(&eth->ppe);
+
 	return 0;
 }
 
@@ -3091,6 +3099,13 @@ static int mtk_probe(struct platform_device *pdev)
 			goto err_free_dev;
 	}
 
+	if (eth->soc->offload_version) {
+		err = mtk_ppe_init(&eth->ppe, eth->dev,
+				   eth->base + MTK_ETH_PPE_BASE, 2);
+		if (err)
+			goto err_free_dev;
+	}
+
 	for (i = 0; i < MTK_MAX_DEVS; i++) {
 		if (!eth->netdev[i])
 			continue;
@@ -3165,6 +3180,7 @@ static const struct mtk_soc_data mt7621_data = {
 	.hw_features = MTK_HW_FEATURES,
 	.required_clks = MT7621_CLKS_BITMAP,
 	.required_pctl = false,
+	.offload_version = 2,
 };
 
 static const struct mtk_soc_data mt7622_data = {
@@ -3173,6 +3189,7 @@ static const struct mtk_soc_data mt7622_data = {
 	.hw_features = MTK_HW_FEATURES,
 	.required_clks = MT7622_CLKS_BITMAP,
 	.required_pctl = false,
+	.offload_version = 2,
 };
 
 static const struct mtk_soc_data mt7623_data = {
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 2d065d47dbfd..66ed537bc133 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -15,6 +15,7 @@
 #include <linux/u64_stats_sync.h>
 #include <linux/refcount.h>
 #include <linux/phylink.h>
+#include "mtk_ppe.h"
 
 #define MTK_QDMA_PAGE_SIZE	2048
 #define MTK_MAX_RX_LENGTH	1536
@@ -87,6 +88,7 @@
 #define MTK_GDMA_TCS_EN		BIT(21)
 #define MTK_GDMA_UCS_EN		BIT(20)
 #define MTK_GDMA_TO_PDMA	0x0
+#define MTK_GDMA_TO_PPE		0x4444
 #define MTK_GDMA_DROP_ALL       0x7777
 
 /* Unicast Filter MAC Address Register - Low */
@@ -301,6 +303,12 @@
 /* QDMA descriptor rxd3 */
 #define RX_DMA_VID(_x)		((_x) & 0xfff)
 
+/* QDMA descriptor rxd4 */
+#define MTK_RXD4_FOE_ENTRY	GENMASK(13, 0)
+#define MTK_RXD4_PPE_CPU_REASON	GENMASK(18, 14)
+#define MTK_RXD4_SRC_PORT	GENMASK(21, 19)
+#define MTK_RXD4_ALG		GENMASK(31, 22)
+
 /* QDMA descriptor rxd4 */
 #define RX_DMA_L4_VALID		BIT(24)
 #define RX_DMA_L4_VALID_PDMA	BIT(30)		/* when PDMA is used */
@@ -804,6 +812,7 @@ struct mtk_soc_data {
 	u32		caps;
 	u32		required_clks;
 	bool		required_pctl;
+	u8		offload_version;
 	netdev_features_t hw_features;
 };
 
@@ -903,6 +912,8 @@ struct mtk_eth {
 	u32				tx_int_status_reg;
 	u32				rx_dma_l4_valid;
 	int				ip_align;
+
+	struct mtk_ppe			ppe;
 };
 
 /* struct mtk_mac -	the structure that holds the info about the MACs of the
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
new file mode 100644
index 000000000000..a1a9959a2461
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
@@ -0,0 +1,511 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
+
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/etherdevice.h>
+#include <linux/platform_device.h>
+#include "mtk_ppe.h"
+#include "mtk_ppe_regs.h"
+
+static void ppe_w32(struct mtk_ppe *ppe, u32 reg, u32 val)
+{
+	writel(val, ppe->base + reg);
+}
+
+static u32 ppe_r32(struct mtk_ppe *ppe, u32 reg)
+{
+	return readl(ppe->base + reg);
+}
+
+static u32 ppe_m32(struct mtk_ppe *ppe, u32 reg, u32 mask, u32 set)
+{
+	u32 val;
+
+	val = ppe_r32(ppe, reg);
+	val &= ~mask;
+	val |= set;
+	ppe_w32(ppe, reg, val);
+
+	return val;
+}
+
+static u32 ppe_set(struct mtk_ppe *ppe, u32 reg, u32 val)
+{
+	return ppe_m32(ppe, reg, 0, val);
+}
+
+static u32 ppe_clear(struct mtk_ppe *ppe, u32 reg, u32 val)
+{
+	return ppe_m32(ppe, reg, val, 0);
+}
+
+static int mtk_ppe_wait_busy(struct mtk_ppe *ppe)
+{
+	unsigned long timeout = jiffies + HZ;
+
+	while (time_is_before_jiffies(timeout)) {
+		if (!(ppe_r32(ppe, MTK_PPE_GLO_CFG) & MTK_PPE_GLO_CFG_BUSY))
+			return 0;
+
+		usleep_range(10, 20);
+	}
+
+	dev_err(ppe->dev, "PPE table busy");
+
+	return -ETIMEDOUT;
+}
+
+static void mtk_ppe_cache_clear(struct mtk_ppe *ppe)
+{
+	ppe_set(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
+	ppe_clear(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
+}
+
+static void mtk_ppe_cache_enable(struct mtk_ppe *ppe, bool enable)
+{
+	mtk_ppe_cache_clear(ppe);
+
+	ppe_m32(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_EN,
+		enable * MTK_PPE_CACHE_CTL_EN);
+}
+
+static u32 mtk_ppe_hash_entry(struct mtk_foe_entry *e)
+{
+	u32 hv1, hv2, hv3;
+	u32 hash;
+
+	switch (FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, e->ib1)) {
+		case MTK_PPE_PKT_TYPE_BRIDGE:
+			hv1 = e->bridge.src_mac_lo;
+			hv1 ^= ((e->bridge.src_mac_hi & 0xffff) << 16);
+			hv2 = e->bridge.src_mac_hi >> 16;
+			hv2 ^= e->bridge.dest_mac_lo;
+			hv3 = e->bridge.dest_mac_hi;
+			break;
+		case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
+		case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
+			hv1 = e->ipv4.orig.ports;
+			hv2 = e->ipv4.orig.dest_ip;
+			hv3 = e->ipv4.orig.src_ip;
+			break;
+		case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
+		case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
+			hv1 = e->ipv6.src_ip[3] ^ e->ipv6.dest_ip[3];
+			hv1 ^= e->ipv6.ports;
+
+			hv2 = e->ipv6.src_ip[2] ^ e->ipv6.dest_ip[2];
+			hv2 ^= e->ipv6.dest_ip[0];
+
+			hv3 = e->ipv6.src_ip[1] ^ e->ipv6.dest_ip[1];
+			hv3 ^= e->ipv6.src_ip[0];
+			break;
+		case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
+		case MTK_PPE_PKT_TYPE_IPV6_6RD:
+		default:
+			WARN_ON_ONCE(1);
+			return MTK_PPE_HASH_MASK;
+	}
+
+	hash = (hv1 & hv2) | ((~hv1) & hv3);
+	hash = (hash >> 24) | ((hash & 0xffffff) << 8);
+	hash ^= hv1 ^ hv2 ^ hv3;
+	hash ^= hash >> 16;
+	hash <<= 1;
+	hash &= MTK_PPE_ENTRIES - 1;
+
+	return hash;
+}
+
+static inline struct mtk_foe_mac_info *
+mtk_foe_entry_l2(struct mtk_foe_entry *entry)
+{
+	int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
+
+	if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
+		return &entry->ipv6.l2;
+
+	return &entry->ipv4.l2;
+}
+
+static inline u32 *
+mtk_foe_entry_ib2(struct mtk_foe_entry *entry)
+{
+	int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
+
+	if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
+		return &entry->ipv6.ib2;
+
+	return &entry->ipv4.ib2;
+}
+
+int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
+			  u8 pse_port, u8 *src_mac, u8 *dest_mac)
+{
+	struct mtk_foe_mac_info *l2;
+	u32 ports_pad, val;
+
+	memset(entry, 0, sizeof(*entry));
+
+	val = FIELD_PREP(MTK_FOE_IB1_STATE, MTK_FOE_STATE_BIND) |
+	      FIELD_PREP(MTK_FOE_IB1_PACKET_TYPE, type) |
+	      FIELD_PREP(MTK_FOE_IB1_UDP, l4proto == IPPROTO_UDP) |
+	      MTK_FOE_IB1_BIND_TTL |
+	      MTK_FOE_IB1_BIND_CACHE;
+	entry->ib1 = val;
+
+	val = FIELD_PREP(MTK_FOE_IB2_PORT_MG, 0x3f) |
+	      FIELD_PREP(MTK_FOE_IB2_PORT_AG, 0x1f) |
+	      FIELD_PREP(MTK_FOE_IB2_DEST_PORT, pse_port);
+
+	if (is_multicast_ether_addr(dest_mac))
+		val |= MTK_FOE_IB2_MULTICAST;
+
+	ports_pad = 0xa5a5a500 | (l4proto & 0xff);
+	if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
+		entry->ipv4.orig.ports = ports_pad;
+	if (type == MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
+		entry->ipv6.ports = ports_pad;
+
+	if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
+		entry->ipv6.ib2 = val;
+		l2 = &entry->ipv6.l2;
+	} else {
+		entry->ipv4.ib2 = val;
+		l2 = &entry->ipv4.l2;
+	}
+
+	l2->dest_mac_hi = get_unaligned_be32(dest_mac);
+	l2->dest_mac_lo = get_unaligned_be16(dest_mac + 4);
+	l2->src_mac_hi = get_unaligned_be32(src_mac);
+	l2->src_mac_lo = get_unaligned_be16(src_mac + 4);
+
+	if (type >= MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
+		l2->etype = ETH_P_IPV6;
+	else
+		l2->etype = ETH_P_IP;
+
+	return 0;
+}
+
+int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port)
+{
+	u32 *ib2 = mtk_foe_entry_ib2(entry);
+	u32 val;
+
+	val = *ib2;
+	val &= ~MTK_FOE_IB2_DEST_PORT;
+	val |= FIELD_PREP(MTK_FOE_IB2_DEST_PORT, port);
+	*ib2 = val;
+
+	return 0;
+}
+
+int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool egress,
+				 __be32 src_addr, __be16 src_port,
+				 __be32 dest_addr, __be16 dest_port)
+{
+	int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
+	struct mtk_ipv4_tuple *t;
+
+	switch (type) {
+	case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
+		if (egress) {
+			t = &entry->ipv4.new;
+			break;
+		}
+		fallthrough;
+	case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
+	case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
+		t = &entry->ipv4.orig;
+		break;
+	case MTK_PPE_PKT_TYPE_IPV6_6RD:
+		entry->ipv6_6rd.tunnel_src_ip = be32_to_cpu(src_addr);
+		entry->ipv6_6rd.tunnel_dest_ip = be32_to_cpu(dest_addr);
+		return 0;
+	default:
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	}
+
+	t->src_ip = be32_to_cpu(src_addr);
+	t->dest_ip = be32_to_cpu(dest_addr);
+
+	if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
+		return 0;
+
+	t->src_port = be16_to_cpu(src_port);
+	t->dest_port = be16_to_cpu(dest_port);
+
+	return 0;
+}
+
+int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
+				 __be32 *src_addr, __be16 src_port,
+				 __be32 *dest_addr, __be16 dest_port)
+{
+	int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
+	u32 *src, *dest;
+	int i;
+
+	switch (type) {
+	case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
+		src = entry->dslite.tunnel_src_ip;
+		dest = entry->dslite.tunnel_dest_ip;
+		break;
+	case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
+	case MTK_PPE_PKT_TYPE_IPV6_6RD:
+		entry->ipv6.src_port = be16_to_cpu(src_port);
+		entry->ipv6.dest_port = be16_to_cpu(dest_port);
+		fallthrough;
+	case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
+		src = entry->ipv6.src_ip;
+		dest = entry->ipv6.dest_ip;
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	};
+
+	for (i = 0; i < 4; i++)
+		src[i] = be32_to_cpu(src_addr[i]);
+	for (i = 0; i < 4; i++)
+		dest[i] = be32_to_cpu(dest_addr[i]);
+
+	return 0;
+}
+
+int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port)
+{
+	struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
+
+	l2->etype = BIT(port);
+
+	if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER))
+		entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
+	else
+		l2->etype |= BIT(8);
+
+	entry->ib1 &= ~MTK_FOE_IB1_BIND_VLAN_TAG;
+
+	return 0;
+}
+
+int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid)
+{
+	struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
+
+	switch (FIELD_GET(MTK_FOE_IB1_BIND_VLAN_LAYER, entry->ib1)) {
+	case 0:
+		entry->ib1 |= MTK_FOE_IB1_BIND_VLAN_TAG |
+			      FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
+		l2->vlan1 = vid;
+		return 0;
+	case 1:
+		if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG)) {
+			l2->vlan1 = vid;
+			l2->etype |= BIT(8);
+		} else {
+			l2->vlan2 = vid;
+			entry->ib1 += FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
+		}
+		return 0;
+	default:
+		return -ENOSPC;
+	}
+}
+
+int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid)
+{
+	struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
+
+	if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER) ||
+	    (entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG))
+		l2->etype = ETH_P_PPP_SES;
+
+	entry->ib1 |= MTK_FOE_IB1_BIND_PPPOE;
+	l2->pppoe_id = sid;
+
+	return 0;
+}
+
+static inline bool mtk_foe_entry_usable(struct mtk_foe_entry *entry)
+{
+	return !(entry->ib1 & MTK_FOE_IB1_STATIC) &&
+	       FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1) != MTK_FOE_STATE_BIND;
+}
+
+int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
+			 u16 timestamp)
+{
+	struct mtk_foe_entry *hwe;
+	u32 hash;
+
+	timestamp &= MTK_FOE_IB1_BIND_TIMESTAMP;
+	entry->ib1 &= ~MTK_FOE_IB1_BIND_TIMESTAMP;
+	entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_TIMESTAMP, timestamp);
+
+	hash = mtk_ppe_hash_entry(entry);
+	hwe = &ppe->foe_table[hash];
+	if (!mtk_foe_entry_usable(hwe)) {
+		hwe++;
+		hash++;
+
+		if (!mtk_foe_entry_usable(hwe))
+			return -ENOSPC;
+	}
+
+	memcpy(&hwe->data, &entry->data, sizeof(hwe->data));
+	wmb();
+	hwe->ib1 = entry->ib1;
+
+	dma_wmb();
+
+	mtk_ppe_cache_clear(ppe);
+
+	return hash;
+}
+
+int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
+		 int version)
+{
+	struct mtk_foe_entry *foe;
+
+	/* need to allocate a separate device, since it PPE DMA access is
+	 * not coherent.
+	 */
+	ppe->base = base;
+	ppe->dev = dev;
+	ppe->version = version;
+
+	foe = dmam_alloc_coherent(ppe->dev, MTK_PPE_ENTRIES * sizeof(*foe),
+				  &ppe->foe_phys, GFP_KERNEL);
+	if (!foe)
+		return -ENOMEM;
+
+	ppe->foe_table = foe;
+
+	mtk_ppe_debugfs_init(ppe);
+
+	return 0;
+}
+
+static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe)
+{
+	static const u8 skip[] = { 12, 25, 38, 51, 76, 89, 102 };
+	int i, k;
+
+	memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(ppe->foe_table));
+
+	if (!IS_ENABLED(CONFIG_SOC_MT7621))
+		return;
+
+	/* skip all entries that cross the 1024 byte boundary */
+	for (i = 0; i < MTK_PPE_ENTRIES; i += 128)
+		for (k = 0; k < ARRAY_SIZE(skip); k++)
+			ppe->foe_table[i + skip[k]].ib1 |= MTK_FOE_IB1_STATIC;
+}
+
+int mtk_ppe_start(struct mtk_ppe *ppe)
+{
+	u32 val;
+
+	mtk_ppe_init_foe_table(ppe);
+	ppe_w32(ppe, MTK_PPE_TB_BASE, ppe->foe_phys);
+
+	val = MTK_PPE_TB_CFG_ENTRY_80B |
+	      MTK_PPE_TB_CFG_AGE_NON_L4 |
+	      MTK_PPE_TB_CFG_AGE_UNBIND |
+	      MTK_PPE_TB_CFG_AGE_TCP |
+	      MTK_PPE_TB_CFG_AGE_UDP |
+	      MTK_PPE_TB_CFG_AGE_TCP_FIN |
+	      FIELD_PREP(MTK_PPE_TB_CFG_SEARCH_MISS,
+			 MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD) |
+	      FIELD_PREP(MTK_PPE_TB_CFG_KEEPALIVE,
+			 MTK_PPE_KEEPALIVE_DISABLE) |
+	      FIELD_PREP(MTK_PPE_TB_CFG_HASH_MODE, 1) |
+	      FIELD_PREP(MTK_PPE_TB_CFG_SCAN_MODE,
+			 MTK_PPE_SCAN_MODE_KEEPALIVE_AGE) |
+	      FIELD_PREP(MTK_PPE_TB_CFG_ENTRY_NUM,
+			 MTK_PPE_ENTRIES_SHIFT);
+	ppe_w32(ppe, MTK_PPE_TB_CFG, val);
+
+	ppe_w32(ppe, MTK_PPE_IP_PROTO_CHK,
+		MTK_PPE_IP_PROTO_CHK_IPV4 | MTK_PPE_IP_PROTO_CHK_IPV6);
+
+	mtk_ppe_cache_enable(ppe, true);
+
+	val = MTK_PPE_FLOW_CFG_IP4_TCP_FRAG |
+	      MTK_PPE_FLOW_CFG_IP4_UDP_FRAG |
+	      MTK_PPE_FLOW_CFG_IP6_3T_ROUTE |
+	      MTK_PPE_FLOW_CFG_IP6_5T_ROUTE |
+	      MTK_PPE_FLOW_CFG_IP6_6RD |
+	      MTK_PPE_FLOW_CFG_IP4_NAT |
+	      MTK_PPE_FLOW_CFG_IP4_NAPT |
+	      MTK_PPE_FLOW_CFG_IP4_DSLITE |
+	      MTK_PPE_FLOW_CFG_L2_BRIDGE |
+	      MTK_PPE_FLOW_CFG_IP4_NAT_FRAG;
+	ppe_w32(ppe, MTK_PPE_FLOW_CFG, val);
+
+	val = FIELD_PREP(MTK_PPE_UNBIND_AGE_MIN_PACKETS, 1000) |
+	      FIELD_PREP(MTK_PPE_UNBIND_AGE_DELTA, 3);
+	ppe_w32(ppe, MTK_PPE_UNBIND_AGE, val);
+
+	val = FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_UDP, 12) |
+	      FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_NON_L4, 1);
+	ppe_w32(ppe, MTK_PPE_BIND_AGE0, val);
+
+	val = FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP_FIN, 1) |
+	      FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP, 7);
+	ppe_w32(ppe, MTK_PPE_BIND_AGE1, val);
+
+	val = MTK_PPE_BIND_LIMIT0_QUARTER | MTK_PPE_BIND_LIMIT0_HALF;
+	ppe_w32(ppe, MTK_PPE_BIND_LIMIT0, val);
+
+	val = MTK_PPE_BIND_LIMIT1_FULL |
+	      FIELD_PREP(MTK_PPE_BIND_LIMIT1_NON_L4, 1);
+	ppe_w32(ppe, MTK_PPE_BIND_LIMIT1, val);
+
+	val = FIELD_PREP(MTK_PPE_BIND_RATE_BIND, 30) |
+	      FIELD_PREP(MTK_PPE_BIND_RATE_PREBIND, 1);
+	ppe_w32(ppe, MTK_PPE_BIND_RATE, val);
+
+	/* enable PPE */
+	val = MTK_PPE_GLO_CFG_EN |
+	      MTK_PPE_GLO_CFG_IP4_L4_CS_DROP |
+	      MTK_PPE_GLO_CFG_IP4_CS_DROP |
+	      MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE;
+	ppe_w32(ppe, MTK_PPE_GLO_CFG, val);
+
+	ppe_w32(ppe, MTK_PPE_DEFAULT_CPU_PORT, 0);
+
+	return 0;
+}
+
+int mtk_ppe_stop(struct mtk_ppe *ppe)
+{
+	u32 val;
+	int i;
+
+	for (i = 0; i < MTK_PPE_ENTRIES; i++)
+		ppe->foe_table[i].ib1 = FIELD_PREP(MTK_FOE_IB1_STATE,
+						   MTK_FOE_STATE_INVALID);
+
+	mtk_ppe_cache_enable(ppe, false);
+
+	/* disable offload engine */
+	ppe_clear(ppe, MTK_PPE_GLO_CFG, MTK_PPE_GLO_CFG_EN);
+	ppe_w32(ppe, MTK_PPE_FLOW_CFG, 0);
+
+	/* disable aging */
+	val = MTK_PPE_TB_CFG_AGE_NON_L4 |
+	      MTK_PPE_TB_CFG_AGE_UNBIND |
+	      MTK_PPE_TB_CFG_AGE_TCP |
+	      MTK_PPE_TB_CFG_AGE_UDP |
+	      MTK_PPE_TB_CFG_AGE_TCP_FIN;
+	ppe_clear(ppe, MTK_PPE_TB_CFG, val);
+
+	return mtk_ppe_wait_busy(ppe);
+}
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h
new file mode 100644
index 000000000000..51bd5e75bbbd
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
@@ -0,0 +1,287 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
+
+#ifndef __MTK_PPE_H
+#define __MTK_PPE_H
+
+#include <linux/kernel.h>
+#include <linux/bitfield.h>
+
+#define MTK_ETH_PPE_BASE		0xc00
+
+#define MTK_PPE_ENTRIES_SHIFT		3
+#define MTK_PPE_ENTRIES			(1024 << MTK_PPE_ENTRIES_SHIFT)
+#define MTK_PPE_HASH_MASK		(MTK_PPE_ENTRIES - 1)
+
+#define MTK_FOE_IB1_UNBIND_TIMESTAMP	GENMASK(7, 0)
+#define MTK_FOE_IB1_UNBIND_PACKETS	GENMASK(23, 8)
+#define MTK_FOE_IB1_UNBIND_PREBIND	BIT(24)
+
+#define MTK_FOE_IB1_BIND_TIMESTAMP	GENMASK(14, 0)
+#define MTK_FOE_IB1_BIND_KEEPALIVE	BIT(15)
+#define MTK_FOE_IB1_BIND_VLAN_LAYER	GENMASK(18, 16)
+#define MTK_FOE_IB1_BIND_PPPOE		BIT(19)
+#define MTK_FOE_IB1_BIND_VLAN_TAG	BIT(20)
+#define MTK_FOE_IB1_BIND_PKT_SAMPLE	BIT(21)
+#define MTK_FOE_IB1_BIND_CACHE		BIT(22)
+#define MTK_FOE_IB1_BIND_TUNNEL_DECAP	BIT(23)
+#define MTK_FOE_IB1_BIND_TTL		BIT(24)
+
+#define MTK_FOE_IB1_PACKET_TYPE		GENMASK(27, 25)
+#define MTK_FOE_IB1_STATE		GENMASK(29, 28)
+#define MTK_FOE_IB1_UDP			BIT(30)
+#define MTK_FOE_IB1_STATIC		BIT(31)
+
+enum {
+	MTK_PPE_PKT_TYPE_IPV4_HNAPT = 0,
+	MTK_PPE_PKT_TYPE_IPV4_ROUTE = 1,
+	MTK_PPE_PKT_TYPE_BRIDGE = 2,
+	MTK_PPE_PKT_TYPE_IPV4_DSLITE = 3,
+	MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T = 4,
+	MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T = 5,
+	MTK_PPE_PKT_TYPE_IPV6_6RD = 7,
+};
+
+#define MTK_FOE_IB2_QID			GENMASK(3, 0)
+#define MTK_FOE_IB2_PSE_QOS		BIT(4)
+#define MTK_FOE_IB2_DEST_PORT		GENMASK(7, 5)
+#define MTK_FOE_IB2_MULTICAST		BIT(8)
+
+#define MTK_FOE_IB2_WHNAT_QID2		GENMASK(13, 12)
+#define MTK_FOE_IB2_WHNAT_DEVIDX	BIT(16)
+#define MTK_FOE_IB2_WHNAT_NAT		BIT(17)
+
+#define MTK_FOE_IB2_PORT_MG		GENMASK(17, 12)
+
+#define MTK_FOE_IB2_PORT_AG		GENMASK(23, 18)
+
+#define MTK_FOE_IB2_DSCP		GENMASK(31, 24)
+
+#define MTK_FOE_VLAN2_WHNAT_BSS		GEMMASK(5, 0)
+#define MTK_FOE_VLAN2_WHNAT_WCID	GENMASK(13, 6)
+#define MTK_FOE_VLAN2_WHNAT_RING	GENMASK(15, 14)
+
+enum {
+	MTK_FOE_STATE_INVALID,
+	MTK_FOE_STATE_UNBIND,
+	MTK_FOE_STATE_BIND,
+	MTK_FOE_STATE_FIN
+};
+
+struct mtk_foe_mac_info {
+	u16 vlan1;
+	u16 etype;
+
+	u32 dest_mac_hi;
+
+	u16 vlan2;
+	u16 dest_mac_lo;
+
+	u32 src_mac_hi;
+
+	u16 pppoe_id;
+	u16 src_mac_lo;
+};
+
+struct mtk_foe_bridge {
+	u32 dest_mac_hi;
+
+	u16 src_mac_lo;
+	u16 dest_mac_lo;
+
+	u32 src_mac_hi;
+
+	u32 ib2;
+
+	u32 _rsv[5];
+
+	u32 udf_tsid;
+	struct mtk_foe_mac_info l2;
+};
+
+struct mtk_ipv4_tuple {
+	u32 src_ip;
+	u32 dest_ip;
+	union {
+		struct {
+			u16 dest_port;
+			u16 src_port;
+		};
+		struct {
+			u8 protocol;
+			u8 _pad[3]; /* fill with 0xa5a5a5 */
+		};
+		u32 ports;
+	};
+};
+
+struct mtk_foe_ipv4 {
+	struct mtk_ipv4_tuple orig;
+
+	u32 ib2;
+
+	struct mtk_ipv4_tuple new;
+
+	u16 timestamp;
+	u16 _rsv0[3];
+
+	u32 udf_tsid;
+
+	struct mtk_foe_mac_info l2;
+};
+
+struct mtk_foe_ipv4_dslite {
+	struct mtk_ipv4_tuple ip4;
+
+	u32 tunnel_src_ip[4];
+	u32 tunnel_dest_ip[4];
+
+	u8 flow_label[3];
+	u8 priority;
+
+	u32 udf_tsid;
+
+	u32 ib2;
+
+	struct mtk_foe_mac_info l2;
+};
+
+struct mtk_foe_ipv6 {
+	u32 src_ip[4];
+	u32 dest_ip[4];
+
+	union {
+		struct {
+			u8 protocol;
+			u8 _pad[3]; /* fill with 0xa5a5a5 */
+		}; /* 3-tuple */
+		struct {
+			u16 dest_port;
+			u16 src_port;
+		}; /* 5-tuple */
+		u32 ports;
+	};
+
+	u32 _rsv[3];
+
+	u32 udf;
+
+	u32 ib2;
+	struct mtk_foe_mac_info l2;
+};
+
+struct mtk_foe_ipv6_6rd {
+	u32 src_ip[4];
+	u32 dest_ip[4];
+	u16 dest_port;
+	u16 src_port;
+
+	u32 tunnel_src_ip;
+	u32 tunnel_dest_ip;
+
+	u16 hdr_csum;
+	u8 dscp;
+	u8 ttl;
+
+	u8 flag;
+	u8 pad;
+	u8 per_flow_6rd_id;
+	u8 pad2;
+
+	u32 ib2;
+	struct mtk_foe_mac_info l2;
+};
+
+struct mtk_foe_entry {
+	u32 ib1;
+
+	union {
+		struct mtk_foe_bridge bridge;
+		struct mtk_foe_ipv4 ipv4;
+		struct mtk_foe_ipv4_dslite dslite;
+		struct mtk_foe_ipv6 ipv6;
+		struct mtk_foe_ipv6_6rd ipv6_6rd;
+		u32 data[19];
+	};
+};
+
+enum {
+	MTK_PPE_CPU_REASON_TTL_EXCEEDED			= 0x02,
+	MTK_PPE_CPU_REASON_OPTION_HEADER		= 0x03,
+	MTK_PPE_CPU_REASON_NO_FLOW			= 0x07,
+	MTK_PPE_CPU_REASON_IPV4_FRAG			= 0x08,
+	MTK_PPE_CPU_REASON_IPV4_DSLITE_FRAG		= 0x09,
+	MTK_PPE_CPU_REASON_IPV4_DSLITE_NO_TCP_UDP	= 0x0a,
+	MTK_PPE_CPU_REASON_IPV6_6RD_NO_TCP_UDP		= 0x0b,
+	MTK_PPE_CPU_REASON_TCP_FIN_SYN_RST		= 0x0c,
+	MTK_PPE_CPU_REASON_UN_HIT			= 0x0d,
+	MTK_PPE_CPU_REASON_HIT_UNBIND			= 0x0e,
+	MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED	= 0x0f,
+	MTK_PPE_CPU_REASON_HIT_BIND_TCP_FIN		= 0x10,
+	MTK_PPE_CPU_REASON_HIT_TTL_1			= 0x11,
+	MTK_PPE_CPU_REASON_HIT_BIND_VLAN_VIOLATION	= 0x12,
+	MTK_PPE_CPU_REASON_KEEPALIVE_UC_OLD_HDR		= 0x13,
+	MTK_PPE_CPU_REASON_KEEPALIVE_MC_NEW_HDR		= 0x14,
+	MTK_PPE_CPU_REASON_KEEPALIVE_DUP_OLD_HDR	= 0x15,
+	MTK_PPE_CPU_REASON_HIT_BIND_FORCE_CPU		= 0x16,
+	MTK_PPE_CPU_REASON_TUNNEL_OPTION_HEADER		= 0x17,
+	MTK_PPE_CPU_REASON_MULTICAST_TO_CPU		= 0x18,
+	MTK_PPE_CPU_REASON_MULTICAST_TO_GMAC1_CPU	= 0x19,
+	MTK_PPE_CPU_REASON_HIT_PRE_BIND			= 0x1a,
+	MTK_PPE_CPU_REASON_PACKET_SAMPLING		= 0x1b,
+	MTK_PPE_CPU_REASON_EXCEED_MTU			= 0x1c,
+	MTK_PPE_CPU_REASON_PPE_BYPASS			= 0x1e,
+	MTK_PPE_CPU_REASON_INVALID			= 0x1f,
+};
+
+struct mtk_ppe {
+	struct device *dev;
+	void __iomem *base;
+	int version;
+
+	struct mtk_foe_entry *foe_table;
+	dma_addr_t foe_phys;
+
+	void *acct_table;
+};
+
+int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
+		 int version);
+int mtk_ppe_start(struct mtk_ppe *ppe);
+int mtk_ppe_stop(struct mtk_ppe *ppe);
+
+static inline void
+mtk_foe_entry_clear(struct mtk_ppe *ppe, u16 hash)
+{
+	ppe->foe_table[hash].ib1 = 0;
+	dma_wmb();
+}
+
+static inline int
+mtk_foe_entry_timestamp(struct mtk_ppe *ppe, u16 hash)
+{
+	u32 ib1 = READ_ONCE(ppe->foe_table[hash].ib1);
+
+	if (FIELD_GET(MTK_FOE_IB1_STATE, ib1) != MTK_FOE_STATE_BIND)
+		return -1;
+
+	return FIELD_GET(MTK_FOE_IB1_BIND_TIMESTAMP, ib1);
+}
+
+int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
+			  u8 pse_port, u8 *src_mac, u8 *dest_mac);
+int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port);
+int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool orig,
+				 __be32 src_addr, __be16 src_port,
+				 __be32 dest_addr, __be16 dest_port);
+int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
+				 __be32 *src_addr, __be16 src_port,
+				 __be32 *dest_addr, __be16 dest_port);
+int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port);
+int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid);
+int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid);
+int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
+			 u16 timestamp);
+int mtk_ppe_debugfs_init(struct mtk_ppe *ppe);
+
+#endif
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
new file mode 100644
index 000000000000..8ae9efab6d02
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
+
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+#include "mtk_eth_soc.h"
+
+struct mtk_flow_addr_info
+{
+	void *src, *dest;
+	u16 *src_port, *dest_port;
+	bool ipv6;
+};
+
+static const char *mtk_foe_entry_state_str(int state)
+{
+	static const char * const state_str[] = {
+		[MTK_FOE_STATE_INVALID] = "INV",
+		[MTK_FOE_STATE_UNBIND] = "UNB",
+		[MTK_FOE_STATE_BIND] = "BND",
+		[MTK_FOE_STATE_FIN] = "FIN",
+	};
+
+	if (state >= ARRAY_SIZE(state_str) || !state_str[state])
+		return "UNK";
+
+	return state_str[state];
+}
+
+static const char *mtk_foe_pkt_type_str(int type)
+{
+	static const char * const type_str[] = {
+		[MTK_PPE_PKT_TYPE_IPV4_HNAPT] = "IPv4 5T",
+		[MTK_PPE_PKT_TYPE_IPV4_ROUTE] = "IPv4 3T",
+		[MTK_PPE_PKT_TYPE_BRIDGE] = "L2",
+		[MTK_PPE_PKT_TYPE_IPV4_DSLITE] = "DS-LITE",
+		[MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T] = "IPv6 3T",
+		[MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T] = "IPv6 5T",
+		[MTK_PPE_PKT_TYPE_IPV6_6RD] = "6RD",
+	};
+
+	if (type >= ARRAY_SIZE(type_str) || !type_str[type])
+		return "UNKNOWN";
+
+	return type_str[type];
+}
+
+static void
+mtk_print_addr(struct seq_file *m, u32 *addr, bool ipv6)
+{
+	u32 n_addr[4];
+	int i;
+
+	if (!ipv6) {
+		seq_printf(m, "%pI4h", addr);
+		return;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(n_addr); i++)
+		n_addr[i] = htonl(addr[i]);
+	seq_printf(m, "%pI6", n_addr);
+}
+
+static void
+mtk_print_addr_info(struct seq_file *m, struct mtk_flow_addr_info *ai)
+{
+	mtk_print_addr(m, ai->src, ai->ipv6);
+	if (ai->src_port)
+		seq_printf(m, ":%d", *ai->src_port);
+	seq_printf(m, "->");
+	mtk_print_addr(m, ai->dest, ai->ipv6);
+	if (ai->dest_port)
+		seq_printf(m, ":%d", *ai->dest_port);
+}
+
+static int
+mtk_ppe_debugfs_foe_show(struct seq_file *m, void *private, bool bind)
+{
+	struct mtk_ppe *ppe = m->private;
+	int i, count;
+
+	for (i = 0, count = 0; i < MTK_PPE_ENTRIES; i++) {
+		struct mtk_foe_entry *entry = &ppe->foe_table[i];
+		struct mtk_foe_mac_info *l2;
+		struct mtk_flow_addr_info ai = {};
+		unsigned char h_source[ETH_ALEN];
+		unsigned char h_dest[ETH_ALEN];
+		int type, state;
+		u32 ib2;
+
+
+		state = FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1);
+		if (!state)
+			continue;
+
+		if (bind && state != MTK_FOE_STATE_BIND)
+			continue;
+
+		type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
+		seq_printf(m, "%05x %s %7s", i,
+			   mtk_foe_entry_state_str(state),
+			   mtk_foe_pkt_type_str(type));
+
+		switch (type) {
+		case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
+		case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
+			ai.src_port = &entry->ipv4.orig.src_port;
+			ai.dest_port = &entry->ipv4.orig.dest_port;
+			fallthrough;
+		case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
+			ai.src = &entry->ipv4.orig.src_ip;
+			ai.dest = &entry->ipv4.orig.dest_ip;
+			break;
+		case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
+			ai.src_port = &entry->ipv6.src_port;
+			ai.dest_port = &entry->ipv6.dest_port;
+			fallthrough;
+		case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
+		case MTK_PPE_PKT_TYPE_IPV6_6RD:
+			ai.src = &entry->ipv6.src_ip;
+			ai.dest = &entry->ipv6.dest_ip;
+			ai.ipv6 = true;
+			break;
+		}
+
+		seq_printf(m, " orig=");
+		mtk_print_addr_info(m, &ai);
+
+		switch (type) {
+		case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
+		case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
+			ai.src_port = &entry->ipv4.new.src_port;
+			ai.dest_port = &entry->ipv4.new.dest_port;
+			fallthrough;
+		case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
+			ai.src = &entry->ipv4.new.src_ip;
+			ai.dest = &entry->ipv4.new.dest_ip;
+			seq_printf(m, " new=");
+			mtk_print_addr_info(m, &ai);
+			break;
+		}
+
+		if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
+			l2 = &entry->ipv6.l2;
+			ib2 = entry->ipv6.ib2;
+		} else {
+			l2 = &entry->ipv4.l2;
+			ib2 = entry->ipv4.ib2;
+		}
+
+		*((__be32 *)h_source) = htonl(l2->src_mac_hi);
+		*((__be16 *)&h_source[4]) = htons(l2->src_mac_lo);
+		*((__be32 *)h_dest) = htonl(l2->dest_mac_hi);
+		*((__be16 *)&h_dest[4]) = htons(l2->dest_mac_lo);
+
+		seq_printf(m, " eth=%pM->%pM etype=%04x"
+			      " vlan=%d,%d ib1=%08x ib2=%08x\n",
+			   h_source, h_dest, ntohs(l2->etype),
+			   l2->vlan1, l2->vlan2, entry->ib1, ib2);
+	}
+
+	return 0;
+}
+
+static int
+mtk_ppe_debugfs_foe_show_all(struct seq_file *m, void *private)
+{
+	return mtk_ppe_debugfs_foe_show(m, private, false);
+}
+
+static int
+mtk_ppe_debugfs_foe_show_bind(struct seq_file *m, void *private)
+{
+	return mtk_ppe_debugfs_foe_show(m, private, true);
+}
+
+static int
+mtk_ppe_debugfs_foe_open_all(struct inode *inode, struct file *file)
+{
+	return single_open(file, mtk_ppe_debugfs_foe_show_all,
+			   inode->i_private);
+}
+
+static int
+mtk_ppe_debugfs_foe_open_bind(struct inode *inode, struct file *file)
+{
+	return single_open(file, mtk_ppe_debugfs_foe_show_bind,
+			   inode->i_private);
+}
+
+int mtk_ppe_debugfs_init(struct mtk_ppe *ppe)
+{
+	static const struct file_operations fops_all = {
+		.open = mtk_ppe_debugfs_foe_open_all,
+		.read = seq_read,
+		.llseek = seq_lseek,
+		.release = single_release,
+	};
+
+	static const struct file_operations fops_bind = {
+		.open = mtk_ppe_debugfs_foe_open_bind,
+		.read = seq_read,
+		.llseek = seq_lseek,
+		.release = single_release,
+	};
+
+	struct dentry *root;
+
+	root = debugfs_create_dir("mtk_ppe", NULL);
+	if (!root)
+		return -ENOMEM;
+
+	debugfs_create_file("entries", S_IRUGO, root, ppe, &fops_all);
+	debugfs_create_file("bind", S_IRUGO, root, ppe, &fops_bind);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_regs.h b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
new file mode 100644
index 000000000000..0c45ea0900f1
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
+
+#ifndef __MTK_PPE_REGS_H
+#define __MTK_PPE_REGS_H
+
+#define MTK_PPE_GLO_CFG				0x200
+#define MTK_PPE_GLO_CFG_EN			BIT(0)
+#define MTK_PPE_GLO_CFG_TSID_EN			BIT(1)
+#define MTK_PPE_GLO_CFG_IP4_L4_CS_DROP		BIT(2)
+#define MTK_PPE_GLO_CFG_IP4_CS_DROP		BIT(3)
+#define MTK_PPE_GLO_CFG_TTL0_DROP		BIT(4)
+#define MTK_PPE_GLO_CFG_PPE_BSWAP		BIT(5)
+#define MTK_PPE_GLO_CFG_PSE_HASH_OFS		BIT(6)
+#define MTK_PPE_GLO_CFG_MCAST_TB_EN		BIT(7)
+#define MTK_PPE_GLO_CFG_FLOW_DROP_KA		BIT(8)
+#define MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE	BIT(9)
+#define MTK_PPE_GLO_CFG_UDP_LITE_EN		BIT(10)
+#define MTK_PPE_GLO_CFG_UDP_LEN_DROP		BIT(11)
+#define MTK_PPE_GLO_CFG_MCAST_ENTRIES		GNEMASK(13, 12)
+#define MTK_PPE_GLO_CFG_BUSY			BIT(31)
+
+#define MTK_PPE_FLOW_CFG			0x204
+#define MTK_PPE_FLOW_CFG_IP4_TCP_FRAG		BIT(6)
+#define MTK_PPE_FLOW_CFG_IP4_UDP_FRAG		BIT(7)
+#define MTK_PPE_FLOW_CFG_IP6_3T_ROUTE		BIT(8)
+#define MTK_PPE_FLOW_CFG_IP6_5T_ROUTE		BIT(9)
+#define MTK_PPE_FLOW_CFG_IP6_6RD		BIT(10)
+#define MTK_PPE_FLOW_CFG_IP4_NAT		BIT(12)
+#define MTK_PPE_FLOW_CFG_IP4_NAPT		BIT(13)
+#define MTK_PPE_FLOW_CFG_IP4_DSLITE		BIT(14)
+#define MTK_PPE_FLOW_CFG_L2_BRIDGE		BIT(15)
+#define MTK_PPE_FLOW_CFG_IP_PROTO_BLACKLIST	BIT(16)
+#define MTK_PPE_FLOW_CFG_IP4_NAT_FRAG		BIT(17)
+#define MTK_PPE_FLOW_CFG_IP4_HASH_FLOW_LABEL	BIT(18)
+#define MTK_PPE_FLOW_CFG_IP4_HASH_GRE_KEY	BIT(19)
+#define MTK_PPE_FLOW_CFG_IP6_HASH_GRE_KEY	BIT(20)
+
+#define MTK_PPE_IP_PROTO_CHK			0x208
+#define MTK_PPE_IP_PROTO_CHK_IPV4		GENMASK(15, 0)
+#define MTK_PPE_IP_PROTO_CHK_IPV6		GENMASK(31, 16)
+
+#define MTK_PPE_TB_CFG				0x21c
+#define MTK_PPE_TB_CFG_ENTRY_NUM		GENMASK(2, 0)
+#define MTK_PPE_TB_CFG_ENTRY_80B		BIT(3)
+#define MTK_PPE_TB_CFG_SEARCH_MISS		GENMASK(5, 4)
+#define MTK_PPE_TB_CFG_AGE_PREBIND		BIT(6)
+#define MTK_PPE_TB_CFG_AGE_NON_L4		BIT(7)
+#define MTK_PPE_TB_CFG_AGE_UNBIND		BIT(8)
+#define MTK_PPE_TB_CFG_AGE_TCP			BIT(9)
+#define MTK_PPE_TB_CFG_AGE_UDP			BIT(10)
+#define MTK_PPE_TB_CFG_AGE_TCP_FIN		BIT(11)
+#define MTK_PPE_TB_CFG_KEEPALIVE		GENMASK(13, 12)
+#define MTK_PPE_TB_CFG_HASH_MODE		GENMASK(15, 14)
+#define MTK_PPE_TB_CFG_SCAN_MODE		GENMASK(17, 16)
+#define MTK_PPE_TB_CFG_HASH_DEBUG		GENMASK(19, 18)
+
+enum {
+	MTK_PPE_SCAN_MODE_DISABLED,
+	MTK_PPE_SCAN_MODE_CHECK_AGE,
+	MTK_PPE_SCAN_MODE_KEEPALIVE_AGE,
+};
+
+enum {
+	MTK_PPE_KEEPALIVE_DISABLE,
+	MTK_PPE_KEEPALIVE_UNICAST_CPU,
+	MTK_PPE_KEEPALIVE_DUP_CPU = 3,
+};
+
+enum {
+	MTK_PPE_SEARCH_MISS_ACTION_DROP,
+	MTK_PPE_SEARCH_MISS_ACTION_FORWARD = 2,
+	MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD = 3,
+};
+
+#define MTK_PPE_TB_BASE				0x220
+
+#define MTK_PPE_TB_USED				0x224
+#define MTK_PPE_TB_USED_NUM			GENMASK(13, 0)
+
+#define MTK_PPE_BIND_RATE			0x228
+#define MTK_PPE_BIND_RATE_BIND			GENMASK(15, 0)
+#define MTK_PPE_BIND_RATE_PREBIND		GENMASK(31, 16)
+
+#define MTK_PPE_BIND_LIMIT0			0x22c
+#define MTK_PPE_BIND_LIMIT0_QUARTER		GENMASK(13, 0)
+#define MTK_PPE_BIND_LIMIT0_HALF		GENMASK(29, 16)
+
+#define MTK_PPE_BIND_LIMIT1			0x230
+#define MTK_PPE_BIND_LIMIT1_FULL		GENMASK(13, 0)
+#define MTK_PPE_BIND_LIMIT1_NON_L4		GENMASK(23, 16)
+
+#define MTK_PPE_KEEPALIVE			0x234
+#define MTK_PPE_KEEPALIVE_TIME			GENMASK(15, 0)
+#define MTK_PPE_KEEPALIVE_TIME_TCP		GENMASK(23, 16)
+#define MTK_PPE_KEEPALIVE_TIME_UDP		GENMASK(31, 24)
+
+#define MTK_PPE_UNBIND_AGE			0x238
+#define MTK_PPE_UNBIND_AGE_MIN_PACKETS		GENMASK(31, 16)
+#define MTK_PPE_UNBIND_AGE_DELTA		GENMASK(7, 0)
+
+#define MTK_PPE_BIND_AGE0			0x23c
+#define MTK_PPE_BIND_AGE0_DELTA_NON_L4		GENMASK(30, 16)
+#define MTK_PPE_BIND_AGE0_DELTA_UDP		GENMASK(14, 0)
+
+#define MTK_PPE_BIND_AGE1			0x240
+#define MTK_PPE_BIND_AGE1_DELTA_TCP_FIN		GENMASK(30, 16)
+#define MTK_PPE_BIND_AGE1_DELTA_TCP		GENMASK(14, 0)
+
+#define MTK_PPE_HASH_SEED			0x244
+
+#define MTK_PPE_DEFAULT_CPU_PORT		0x248
+#define MTK_PPE_DEFAULT_CPU_PORT_MASK(_n)	(GENMASK(2, 0) << ((_n) * 4))
+
+#define MTK_PPE_MTU_DROP			0x308
+
+#define MTK_PPE_VLAN_MTU0			0x30c
+#define MTK_PPE_VLAN_MTU0_NONE			GENMASK(13, 0)
+#define MTK_PPE_VLAN_MTU0_1TAG			GENMASK(29, 16)
+
+#define MTK_PPE_VLAN_MTU1			0x310
+#define MTK_PPE_VLAN_MTU1_2TAG			GENMASK(13, 0)
+#define MTK_PPE_VLAN_MTU1_3TAG			GENMASK(29, 16)
+
+#define MTK_PPE_VPM_TPID			0x318
+
+#define MTK_PPE_CACHE_CTL			0x320
+#define MTK_PPE_CACHE_CTL_EN			BIT(0)
+#define MTK_PPE_CACHE_CTL_LOCK_CLR		BIT(4)
+#define MTK_PPE_CACHE_CTL_REQ			BIT(8)
+#define MTK_PPE_CACHE_CTL_CLEAR			BIT(9)
+#define MTK_PPE_CACHE_CTL_CMD			GENMASK(13, 12)
+
+#define MTK_PPE_MIB_CFG				0x334
+#define MTK_PPE_MIB_CFG_EN			BIT(0)
+#define MTK_PPE_MIB_CFG_RD_CLR			BIT(1)
+
+#define MTK_PPE_MIB_TB_BASE			0x338
+
+#define MTK_PPE_MIB_CACHE_CTL			0x350
+#define MTK_PPE_MIB_CACHE_CTL_EN		BIT(0)
+#define MTK_PPE_MIB_CACHE_CTL_FLUSH		BIT(2)
+
+#endif
-- 
cgit v1.2.3


From 502e84e2382d92654a2ecbc52cdbdb5a11cdcec7 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 24 Mar 2021 02:30:54 +0100
Subject: net: ethernet: mtk_eth_soc: add flow offloading support

This adds support for offloading IPv4 routed flows, including SNAT/DNAT,
one VLAN, PPPoE and DSA.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/Makefile          |   2 +-
 drivers/net/ethernet/mediatek/mtk_eth_soc.c     |   5 +
 drivers/net/ethernet/mediatek/mtk_eth_soc.h     |  10 +-
 drivers/net/ethernet/mediatek/mtk_ppe_offload.c | 485 ++++++++++++++++++++++++
 4 files changed, 500 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_offload.c

diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile
index 871dc3e113e2..79d4cdbbcbf5 100644
--- a/drivers/net/ethernet/mediatek/Makefile
+++ b/drivers/net/ethernet/mediatek/Makefile
@@ -4,5 +4,5 @@
 #
 
 obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth.o
-mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o mtk_ppe.o mtk_ppe_debugfs.o
+mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o mtk_ppe.o mtk_ppe_debugfs.o mtk_ppe_offload.o
 obj-$(CONFIG_NET_MEDIATEK_STAR_EMAC) += mtk_star_emac.o
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 67605b9a3916..0396f0db855f 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -2843,6 +2843,7 @@ static const struct net_device_ops mtk_netdev_ops = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= mtk_poll_controller,
 #endif
+	.ndo_setup_tc		= mtk_eth_setup_tc,
 };
 
 static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
@@ -3104,6 +3105,10 @@ static int mtk_probe(struct platform_device *pdev)
 				   eth->base + MTK_ETH_PPE_BASE, 2);
 		if (err)
 			goto err_free_dev;
+
+		err = mtk_eth_offload_init(eth);
+		if (err)
+			goto err_free_dev;
 	}
 
 	for (i = 0; i < MTK_MAX_DEVS; i++) {
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 66ed537bc133..1a6750c08bb9 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -15,6 +15,7 @@
 #include <linux/u64_stats_sync.h>
 #include <linux/refcount.h>
 #include <linux/phylink.h>
+#include <linux/rhashtable.h>
 #include "mtk_ppe.h"
 
 #define MTK_QDMA_PAGE_SIZE	2048
@@ -41,7 +42,8 @@
 				 NETIF_F_HW_VLAN_CTAG_RX | \
 				 NETIF_F_SG | NETIF_F_TSO | \
 				 NETIF_F_TSO6 | \
-				 NETIF_F_IPV6_CSUM)
+				 NETIF_F_IPV6_CSUM |\
+				 NETIF_F_HW_TC)
 #define MTK_HW_FEATURES_MT7628	(NETIF_F_SG | NETIF_F_RXCSUM)
 #define NEXT_DESP_IDX(X, Y)	(((X) + 1) & ((Y) - 1))
 
@@ -914,6 +916,7 @@ struct mtk_eth {
 	int				ip_align;
 
 	struct mtk_ppe			ppe;
+	struct rhashtable		flow_table;
 };
 
 /* struct mtk_mac -	the structure that holds the info about the MACs of the
@@ -958,4 +961,9 @@ int mtk_gmac_sgmii_path_setup(struct mtk_eth *eth, int mac_id);
 int mtk_gmac_gephy_path_setup(struct mtk_eth *eth, int mac_id);
 int mtk_gmac_rgmii_path_setup(struct mtk_eth *eth, int mac_id);
 
+int mtk_eth_offload_init(struct mtk_eth *eth);
+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
+		     void *type_data);
+
+
 #endif /* MTK_ETH_H */
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
new file mode 100644
index 000000000000..d0c46786571f
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
@@ -0,0 +1,485 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  Copyright (C) 2020 Felix Fietkau <nbd@nbd.name>
+ */
+
+#include <linux/if_ether.h>
+#include <linux/rhashtable.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <net/flow_offload.h>
+#include <net/pkt_cls.h>
+#include <net/dsa.h>
+#include "mtk_eth_soc.h"
+
+struct mtk_flow_data {
+	struct ethhdr eth;
+
+	union {
+		struct {
+			__be32 src_addr;
+			__be32 dst_addr;
+		} v4;
+	};
+
+	__be16 src_port;
+	__be16 dst_port;
+
+	struct {
+		u16 id;
+		__be16 proto;
+		u8 num;
+	} vlan;
+	struct {
+		u16 sid;
+		u8 num;
+	} pppoe;
+};
+
+struct mtk_flow_entry {
+	struct rhash_head node;
+	unsigned long cookie;
+	u16 hash;
+};
+
+static const struct rhashtable_params mtk_flow_ht_params = {
+	.head_offset = offsetof(struct mtk_flow_entry, node),
+	.head_offset = offsetof(struct mtk_flow_entry, cookie),
+	.key_len = sizeof(unsigned long),
+	.automatic_shrinking = true,
+};
+
+static u32
+mtk_eth_timestamp(struct mtk_eth *eth)
+{
+	return mtk_r32(eth, 0x0010) & MTK_FOE_IB1_BIND_TIMESTAMP;
+}
+
+static int
+mtk_flow_set_ipv4_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data,
+		       bool egress)
+{
+	return mtk_foe_entry_set_ipv4_tuple(foe, egress,
+					    data->v4.src_addr, data->src_port,
+					    data->v4.dst_addr, data->dst_port);
+}
+
+static void
+mtk_flow_offload_mangle_eth(const struct flow_action_entry *act, void *eth)
+{
+	void *dest = eth + act->mangle.offset;
+	const void *src = &act->mangle.val;
+
+	if (act->mangle.offset > 8)
+		return;
+
+	if (act->mangle.mask == 0xffff) {
+		src += 2;
+		dest += 2;
+	}
+
+	memcpy(dest, src, act->mangle.mask ? 2 : 4);
+}
+
+
+static int
+mtk_flow_mangle_ports(const struct flow_action_entry *act,
+		      struct mtk_flow_data *data)
+{
+	u32 val = ntohl(act->mangle.val);
+
+	switch (act->mangle.offset) {
+	case 0:
+		if (act->mangle.mask == ~htonl(0xffff))
+			data->dst_port = cpu_to_be16(val);
+		else
+			data->src_port = cpu_to_be16(val >> 16);
+		break;
+	case 2:
+		data->dst_port = cpu_to_be16(val);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+mtk_flow_mangle_ipv4(const struct flow_action_entry *act,
+		     struct mtk_flow_data *data)
+{
+	__be32 *dest;
+
+	switch (act->mangle.offset) {
+	case offsetof(struct iphdr, saddr):
+		dest = &data->v4.src_addr;
+		break;
+	case offsetof(struct iphdr, daddr):
+		dest = &data->v4.dst_addr;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	memcpy(dest, &act->mangle.val, sizeof(u32));
+
+	return 0;
+}
+
+static int
+mtk_flow_get_dsa_port(struct net_device **dev)
+{
+#if IS_ENABLED(CONFIG_NET_DSA)
+	struct dsa_port *dp;
+
+	dp = dsa_port_from_netdev(*dev);
+	if (IS_ERR(dp))
+		return -ENODEV;
+
+	if (dp->cpu_dp->tag_ops->proto != DSA_TAG_PROTO_MTK)
+		return -ENODEV;
+
+	*dev = dp->cpu_dp->master;
+
+	return dp->index;
+#else
+	return -ENODEV;
+#endif
+}
+
+static int
+mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
+			   struct net_device *dev)
+{
+	int pse_port, dsa_port;
+
+	dsa_port = mtk_flow_get_dsa_port(&dev);
+	if (dsa_port >= 0)
+		mtk_foe_entry_set_dsa(foe, dsa_port);
+
+	if (dev == eth->netdev[0])
+		pse_port = 1;
+	else if (dev == eth->netdev[1])
+		pse_port = 2;
+	else
+		return -EOPNOTSUPP;
+
+	mtk_foe_entry_set_pse_port(foe, pse_port);
+
+	return 0;
+}
+
+static int
+mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct flow_action_entry *act;
+	struct mtk_flow_data data = {};
+	struct mtk_foe_entry foe;
+	struct net_device *odev = NULL;
+	struct mtk_flow_entry *entry;
+	int offload_type = 0;
+	u16 addr_type = 0;
+	u32 timestamp;
+	u8 l4proto = 0;
+	int err = 0;
+	int hash;
+	int i;
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
+		struct flow_match_meta match;
+
+		flow_rule_match_meta(rule, &match);
+	} else {
+		return -EOPNOTSUPP;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_match_control match;
+
+		flow_rule_match_control(rule, &match);
+		addr_type = match.key->addr_type;
+	} else {
+		return -EOPNOTSUPP;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
+
+		flow_rule_match_basic(rule, &match);
+		l4proto = match.key->ip_proto;
+	} else {
+		return -EOPNOTSUPP;
+	}
+
+	flow_action_for_each(i, act, &rule->action) {
+		switch (act->id) {
+		case FLOW_ACTION_MANGLE:
+			if (act->mangle.htype == FLOW_ACT_MANGLE_HDR_TYPE_ETH)
+				mtk_flow_offload_mangle_eth(act, &data.eth);
+			break;
+		case FLOW_ACTION_REDIRECT:
+			odev = act->dev;
+			break;
+		case FLOW_ACTION_CSUM:
+			break;
+		case FLOW_ACTION_VLAN_PUSH:
+			if (data.vlan.num == 1 ||
+			    act->vlan.proto != htons(ETH_P_8021Q))
+				return -EOPNOTSUPP;
+
+			data.vlan.id = act->vlan.vid;
+			data.vlan.proto = act->vlan.proto;
+			data.vlan.num++;
+			break;
+		case FLOW_ACTION_PPPOE_PUSH:
+			if (data.pppoe.num == 1)
+				return -EOPNOTSUPP;
+
+			data.pppoe.sid = act->pppoe.sid;
+			data.pppoe.num++;
+			break;
+		default:
+			return -EOPNOTSUPP;
+		}
+	}
+
+	switch (addr_type) {
+	case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
+		offload_type = MTK_PPE_PKT_TYPE_IPV4_HNAPT;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if (!is_valid_ether_addr(data.eth.h_source) ||
+	    !is_valid_ether_addr(data.eth.h_dest))
+		return -EINVAL;
+
+	err = mtk_foe_entry_prepare(&foe, offload_type, l4proto, 0,
+				    data.eth.h_source,
+				    data.eth.h_dest);
+	if (err)
+		return err;
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_match_ports ports;
+
+		flow_rule_match_ports(rule, &ports);
+		data.src_port = ports.key->src;
+		data.dst_port = ports.key->dst;
+	} else {
+		return -EOPNOTSUPP;
+	}
+
+	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+		struct flow_match_ipv4_addrs addrs;
+
+		flow_rule_match_ipv4_addrs(rule, &addrs);
+
+		data.v4.src_addr = addrs.key->src;
+		data.v4.dst_addr = addrs.key->dst;
+
+		mtk_flow_set_ipv4_addr(&foe, &data, false);
+	}
+
+	flow_action_for_each(i, act, &rule->action) {
+		if (act->id != FLOW_ACTION_MANGLE)
+			continue;
+
+		switch (act->mangle.htype) {
+		case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
+		case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
+			err = mtk_flow_mangle_ports(act, &data);
+			break;
+		case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
+			err = mtk_flow_mangle_ipv4(act, &data);
+			break;
+		case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
+			/* handled earlier */
+			break;
+		default:
+			return -EOPNOTSUPP;
+		}
+
+		if (err)
+			return err;
+	}
+
+	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+		err = mtk_flow_set_ipv4_addr(&foe, &data, true);
+		if (err)
+			return err;
+	}
+
+	if (data.vlan.num == 1) {
+		if (data.vlan.proto != htons(ETH_P_8021Q))
+			return -EOPNOTSUPP;
+
+		mtk_foe_entry_set_vlan(&foe, data.vlan.id);
+	}
+	if (data.pppoe.num == 1)
+		mtk_foe_entry_set_pppoe(&foe, data.pppoe.sid);
+
+	err = mtk_flow_set_output_device(eth, &foe, odev);
+	if (err)
+		return err;
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	entry->cookie = f->cookie;
+	timestamp = mtk_eth_timestamp(eth);
+	hash = mtk_foe_entry_commit(&eth->ppe, &foe, timestamp);
+	if (hash < 0) {
+		err = hash;
+		goto free;
+	}
+
+	entry->hash = hash;
+	err = rhashtable_insert_fast(&eth->flow_table, &entry->node,
+				     mtk_flow_ht_params);
+	if (err < 0)
+		goto clear_flow;
+
+	return 0;
+clear_flow:
+	mtk_foe_entry_clear(&eth->ppe, hash);
+free:
+	kfree(entry);
+	return err;
+}
+
+static int
+mtk_flow_offload_destroy(struct mtk_eth *eth, struct flow_cls_offload *f)
+{
+	struct mtk_flow_entry *entry;
+
+	entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
+				  mtk_flow_ht_params);
+	if (!entry)
+		return -ENOENT;
+
+	mtk_foe_entry_clear(&eth->ppe, entry->hash);
+	rhashtable_remove_fast(&eth->flow_table, &entry->node,
+			       mtk_flow_ht_params);
+	kfree(entry);
+
+	return 0;
+}
+
+static int
+mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f)
+{
+	struct mtk_flow_entry *entry;
+	int timestamp;
+	u32 idle;
+
+	entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
+				  mtk_flow_ht_params);
+	if (!entry)
+		return -ENOENT;
+
+	timestamp = mtk_foe_entry_timestamp(&eth->ppe, entry->hash);
+	if (timestamp < 0)
+		return -ETIMEDOUT;
+
+	idle = mtk_eth_timestamp(eth) - timestamp;
+	f->stats.lastused = jiffies - idle * HZ;
+
+	return 0;
+}
+
+static int
+mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
+{
+	struct flow_cls_offload *cls = type_data;
+	struct net_device *dev = cb_priv;
+	struct mtk_mac *mac = netdev_priv(dev);
+	struct mtk_eth *eth = mac->hw;
+
+	if (!tc_can_offload(dev))
+		return -EOPNOTSUPP;
+
+	if (type != TC_SETUP_CLSFLOWER)
+		return -EOPNOTSUPP;
+
+	switch (cls->command) {
+	case FLOW_CLS_REPLACE:
+		return mtk_flow_offload_replace(eth, cls);
+	case FLOW_CLS_DESTROY:
+		return mtk_flow_offload_destroy(eth, cls);
+	case FLOW_CLS_STATS:
+		return mtk_flow_offload_stats(eth, cls);
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int
+mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f)
+{
+	struct mtk_mac *mac = netdev_priv(dev);
+	struct mtk_eth *eth = mac->hw;
+	static LIST_HEAD(block_cb_list);
+	struct flow_block_cb *block_cb;
+	flow_setup_cb_t *cb;
+
+	if (!eth->ppe.foe_table)
+		return -EOPNOTSUPP;
+
+	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+		return -EOPNOTSUPP;
+
+	cb = mtk_eth_setup_tc_block_cb;
+	f->driver_block_list = &block_cb_list;
+
+	switch (f->command) {
+	case FLOW_BLOCK_BIND:
+		block_cb = flow_block_cb_lookup(f->block, cb, dev);
+		if (block_cb) {
+			flow_block_cb_incref(block_cb);
+			return 0;
+		}
+		block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
+		if (IS_ERR(block_cb))
+			return PTR_ERR(block_cb);
+
+		flow_block_cb_add(block_cb, f);
+		list_add_tail(&block_cb->driver_list, &block_cb_list);
+		return 0;
+	case FLOW_BLOCK_UNBIND:
+		block_cb = flow_block_cb_lookup(f->block, cb, dev);
+		if (!block_cb)
+			return -ENOENT;
+
+		if (flow_block_cb_decref(block_cb)) {
+			flow_block_cb_remove(block_cb, f);
+			list_del(&block_cb->driver_list);
+		}
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
+		     void *type_data)
+{
+	if (type == TC_SETUP_FT)
+		return mtk_eth_setup_tc_block(dev, type_data);
+
+	return -EOPNOTSUPP;
+}
+
+int mtk_eth_offload_init(struct mtk_eth *eth)
+{
+	if (!eth->ppe.foe_table)
+		return 0;
+
+	return rhashtable_init(&eth->flow_table, &mtk_flow_ht_params);
+}
-- 
cgit v1.2.3


From 143490cde5669e0151dff466a7c2cf70e2884fb7 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 24 Mar 2021 02:30:55 +0100
Subject: docs: nf_flowtable: update documentation with enhancements

This patch updates the flowtable documentation to describe recent
enhancements:

- Offload action is available after the first packets go through the
  classic forwarding path.
- IPv4 and IPv6 are supported. Only TCP and UDP layer 4 are supported at
  this stage.
- Tuple has been augmented to track VLAN id and PPPoE session id.
- Bridge and IP forwarding integration, including bridge VLAN filtering
  support.
- Hardware offload support.
- Describe the [OFFLOAD] and [HW_OFFLOAD] tags in the conntrack table
  listing.
- Replace 'flow offload' by 'flow add' in example rulesets (preferred
  syntax).
- Describe existing cache limitations.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/nf_flowtable.rst | 170 +++++++++++++++++++++++++-----
 1 file changed, 143 insertions(+), 27 deletions(-)

diff --git a/Documentation/networking/nf_flowtable.rst b/Documentation/networking/nf_flowtable.rst
index 6cdf9a1724b6..d87f253b9d39 100644
--- a/Documentation/networking/nf_flowtable.rst
+++ b/Documentation/networking/nf_flowtable.rst
@@ -4,35 +4,38 @@
 Netfilter's flowtable infrastructure
 ====================================
 
-This documentation describes the software flowtable infrastructure available in
-Netfilter since Linux kernel 4.16.
+This documentation describes the Netfilter flowtable infrastructure which allows
+you to define a fastpath through the flowtable datapath. This infrastructure
+also provides hardware offload support. The flowtable supports for the layer 3
+IPv4 and IPv6 and the layer 4 TCP and UDP protocols.
 
 Overview
 --------
 
-Initial packets follow the classic forwarding path, once the flow enters the
-established state according to the conntrack semantics (ie. we have seen traffic
-in both directions), then you can decide to offload the flow to the flowtable
-from the forward chain via the 'flow offload' action available in nftables.
+Once the first packet of the flow successfully goes through the IP forwarding
+path, from the second packet on, you might decide to offload the flow to the
+flowtable through your ruleset. The flowtable infrastructure provides a rule
+action that allows you to specify when to add a flow to the flowtable.
 
-Packets that find an entry in the flowtable (ie. flowtable hit) are sent to the
-output netdevice via neigh_xmit(), hence, they bypass the classic forwarding
-path (the visible effect is that you do not see these packets from any of the
-netfilter hooks coming after the ingress). In case of flowtable miss, the packet
-follows the classic forward path.
+A packet that finds a matching entry in the flowtable (ie. flowtable hit) is
+transmitted to the output netdevice via neigh_xmit(), hence, packets bypass the
+classic IP forwarding path (the visible effect is that you do not see these
+packets from any of the Netfilter hooks coming after ingress). In case that
+there is no matching entry in the flowtable (ie. flowtable miss), the packet
+follows the classic IP forwarding path.
 
-The flowtable uses a resizable hashtable, lookups are based on the following
-7-tuple selectors: source, destination, layer 3 and layer 4 protocols, source
-and destination ports and the input interface (useful in case there are several
-conntrack zones in place).
+The flowtable uses a resizable hashtable. Lookups are based on the following
+n-tuple selectors: layer 2 protocol encapsulation (VLAN and PPPoE), layer 3
+source and destination, layer 4 source and destination ports and the input
+interface (useful in case there are several conntrack zones in place).
 
-Flowtables are populated via the 'flow offload' nftables action, so the user can
-selectively specify what flows are placed into the flow table. Hence, packets
-follow the classic forwarding path unless the user explicitly instruct packets
-to use this new alternative forwarding path via nftables policy.
+The 'flow add' action allows you to populate the flowtable, the user selectively
+specifies what flows are placed into the flowtable. Hence, packets follow the
+classic IP forwarding path unless the user explicitly instruct flows to use this
+new alternative forwarding path via policy.
 
-This is represented in Fig.1, which describes the classic forwarding path
-including the Netfilter hooks and the flowtable fastpath bypass.
+The flowtable datapath is represented in Fig.1, which describes the classic IP
+forwarding path including the Netfilter hooks and the flowtable fastpath bypass.
 
 ::
 
@@ -67,11 +70,13 @@ including the Netfilter hooks and the flowtable fastpath bypass.
 	       Fig.1 Netfilter hooks and flowtable interactions
 
 The flowtable entry also stores the NAT configuration, so all packets are
-mangled according to the NAT policy that matches the initial packets that went
-through the classic forwarding path. The TTL is decremented before calling
-neigh_xmit(). Fragmented traffic is passed up to follow the classic forwarding
-path given that the transport selectors are missing, therefore flowtable lookup
-is not possible.
+mangled according to the NAT policy that is specified from the classic IP
+forwarding path. The TTL is decremented before calling neigh_xmit(). Fragmented
+traffic is passed up to follow the classic IP forwarding path given that the
+transport header is missing, in this case, flowtable lookups are not possible.
+TCP RST and FIN packets are also passed up to the classic IP forwarding path to
+release the flow gracefully. Packets that exceed the MTU are also passed up to
+the classic forwarding path to report packet-too-big ICMP errors to the sender.
 
 Example configuration
 ---------------------
@@ -85,7 +90,7 @@ flowtable and add one rule to your forward chain::
 		}
 		chain y {
 			type filter hook forward priority 0; policy accept;
-			ip protocol tcp flow offload @f
+			ip protocol tcp flow add @f
 			counter packets 0 bytes 0
 		}
 	}
@@ -103,6 +108,117 @@ flow is offloaded, you will observe that the counter rule in the example above
 does not get updated for the packets that are being forwarded through the
 forwarding bypass.
 
+You can identify offloaded flows through the [OFFLOAD] tag when listing your
+connection tracking table.
+
+::
+	# conntrack -L
+	tcp      6 src=10.141.10.2 dst=192.168.10.2 sport=52728 dport=5201 src=192.168.10.2 dst=192.168.10.1 sport=5201 dport=52728 [OFFLOAD] mark=0 use=2
+
+
+Layer 2 encapsulation
+---------------------
+
+Since Linux kernel 5.13, the flowtable infrastructure discovers the real
+netdevice behind VLAN and PPPoE netdevices. The flowtable software datapath
+parses the VLAN and PPPoE layer 2 headers to extract the ethertype and the
+VLAN ID / PPPoE session ID which are used for the flowtable lookups. The
+flowtable datapath also deals with layer 2 decapsulation.
+
+You do not need to add the PPPoE and the VLAN devices to your flowtable,
+instead the real device is sufficient for the flowtable to track your flows.
+
+Bridge and IP forwarding
+------------------------
+
+Since Linux kernel 5.13, you can add bridge ports to the flowtable. The
+flowtable infrastructure discovers the topology behind the bridge device. This
+allows the flowtable to define a fastpath bypass between the bridge ports
+(represented as eth1 and eth2 in the example figure below) and the gateway
+device (represented as eth0) in your switch/router.
+
+::
+                      fastpath bypass
+               .-------------------------.
+              /                           \
+              |           IP forwarding   |
+              |          /             \ \/
+              |       br0               eth0 ..... eth0
+              .       / \                          *host B*
+               -> eth1  eth2
+                   .           *switch/router*
+                   .
+                   .
+                 eth0
+               *host A*
+
+The flowtable infrastructure also supports for bridge VLAN filtering actions
+such as PVID and untagged. You can also stack a classic VLAN device on top of
+your bridge port.
+
+If you would like that your flowtable defines a fastpath between your bridge
+ports and your IP forwarding path, you have to add your bridge ports (as
+represented by the real netdevice) to your flowtable definition.
+
+Counters
+--------
+
+The flowtable can synchronize packet and byte counters with the existing
+connection tracking entry by specifying the counter statement in your flowtable
+definition, e.g.
+
+::
+	table inet x {
+		flowtable f {
+			hook ingress priority 0; devices = { eth0, eth1 };
+			counter
+		}
+		...
+	}
+
+Counter support is available since Linux kernel 5.7.
+
+Hardware offload
+----------------
+
+If your network device provides hardware offload support, you can turn it on by
+means of the 'offload' flag in your flowtable definition, e.g.
+
+::
+	table inet x {
+		flowtable f {
+			hook ingress priority 0; devices = { eth0, eth1 };
+			flags offload;
+		}
+		...
+	}
+
+There is a workqueue that adds the flows to the hardware. Note that a few
+packets might still run over the flowtable software path until the workqueue has
+a chance to offload the flow to the network device.
+
+You can identify hardware offloaded flows through the [HW_OFFLOAD] tag when
+listing your connection tracking table. Please, note that the [OFFLOAD] tag
+refers to the software offload mode, so there is a distinction between [OFFLOAD]
+which refers to the software flowtable fastpath and [HW_OFFLOAD] which refers
+to the hardware offload datapath being used by the flow.
+
+The flowtable hardware offload infrastructure also supports for the DSA
+(Distributed Switch Architecture).
+
+Limitations
+-----------
+
+The flowtable behaves like a cache. The flowtable entries might get stale if
+either the destination MAC address or the egress netdevice that is used for
+transmission changes.
+
+This might be a problem if:
+
+- You run the flowtable in software mode and you combine bridge and IP
+  forwarding in your setup.
+- Hardware offload is enabled.
+
 More reading
 ------------
 
-- 
cgit v1.2.3


From b6c6680b8b3a08040822ef7227a723714d4451e9 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Tue, 23 Mar 2021 15:13:27 +0530
Subject: octeontx2-af: Few mundane typos fixed

s/preceeds/precedes/  .....two different places
s/rsponse/response/
s/cetain/certain/
s/precison/precision/

Fix a sentence construction as per suggestion.

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/mbox.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index 3af4d0ffcf7c..55629c66586e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -74,13 +74,13 @@ struct otx2_mbox {
 	struct otx2_mbox_dev *dev;
 };
 
-/* Header which preceeds all mbox messages */
+/* Header which precedes all mbox messages */
 struct mbox_hdr {
 	u64 msg_size;	/* Total msgs size embedded */
 	u16  num_msgs;   /* No of msgs embedded */
 };
 
-/* Header which preceeds every msg and is also part of it */
+/* Header which precedes every msg and is also part of it */
 struct mbox_msghdr {
 	u16 pcifunc;     /* Who's sending this msg */
 	u16 id;          /* Mbox message ID */
@@ -280,8 +280,8 @@ struct msg_req {
 	struct mbox_msghdr hdr;
 };
 
-/* Generic rsponse msg used a ack or response for those mbox
- * messages which doesn't have a specific rsp msg format.
+/* Generic response msg used an ack or response for those mbox
+ * messages which don't have a specific rsp msg format.
  */
 struct msg_rsp {
 	struct mbox_msghdr hdr;
@@ -302,7 +302,7 @@ struct ready_msg_rsp {
 
 /* Structure for requesting resource provisioning.
  * 'modify' flag to be used when either requesting more
- * or to detach partial of a cetain resource type.
+ * or to detach partial of a certain resource type.
  * Rest of the fields specify how many of what type to
  * be attached.
  * To request LFs from two blocks of same type this mailbox
@@ -492,7 +492,7 @@ struct cgx_set_link_mode_rsp {
 };
 
 #define RVU_LMAC_FEAT_FC		BIT_ULL(0) /* pause frames */
-#define RVU_LMAC_FEAT_PTP		BIT_ULL(1) /* precison time protocol */
+#define RVU_LMAC_FEAT_PTP		BIT_ULL(1) /* precision time protocol */
 #define RVU_MAC_VERSION			BIT_ULL(2)
 #define RVU_MAC_CGX			BIT_ULL(3)
 #define RVU_MAC_RPM			BIT_ULL(4)
-- 
cgit v1.2.3


From 536e11f96b03ab6e382e8d13e57506c8d2944fb3 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Wed, 24 Mar 2021 13:16:37 +0530
Subject: net: sched: Mundane typo fixes

s/procdure/procedure/
s/maintanance/maintenance/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_cbq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 320b3d31fa97..b79a7e27bb31 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -263,7 +263,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		/*
 		 * Step 3+n. If classifier selected a link sharing class,
 		 *	   apply agency specific classifier.
-		 *	   Repeat this procdure until we hit a leaf node.
+		 *	   Repeat this procedure until we hit a leaf node.
 		 */
 		head = cl;
 	}
@@ -859,7 +859,7 @@ cbq_dequeue(struct Qdisc *sch)
 	return NULL;
 }
 
-/* CBQ class maintanance routines */
+/* CBQ class maintenance routines */
 
 static void cbq_adjust_levels(struct cbq_class *this)
 {
-- 
cgit v1.2.3


From bef32aa8e412b2495503134a3c02139151b3ebfc Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Wed, 24 Mar 2021 13:22:04 +0530
Subject: sfc-falcon: Fix a typo

s/maintaning/maintaining/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/falcon/net_driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/sfc/falcon/net_driver.h b/drivers/net/ethernet/sfc/falcon/net_driver.h
index a529ff395ead..a381cf9ec4f3 100644
--- a/drivers/net/ethernet/sfc/falcon/net_driver.h
+++ b/drivers/net/ethernet/sfc/falcon/net_driver.h
@@ -637,7 +637,7 @@ union ef4_multicast_hash {
  * struct ef4_nic - an Efx NIC
  * @name: Device name (net device name or bus id before net device registered)
  * @pci_dev: The PCI device
- * @node: List node for maintaning primary/secondary function lists
+ * @node: List node for maintaining primary/secondary function lists
  * @primary: &struct ef4_nic instance for the primary function of this
  *	controller.  May be the same structure, and may be %NULL if no
  *	primary function is bound.  Serialised by rtnl_lock.
-- 
cgit v1.2.3


From 341f67e424e572bfc034daa534c6fa667533e6a4 Mon Sep 17 00:00:00 2001
From: Tan Tee Min <tee.min.tan@intel.com>
Date: Tue, 23 Mar 2021 19:07:34 +0800
Subject: net: stmmac: Add hardware supported cross-timestamp

Cross timestamping is supported on Integrated Ethernet Controller in
Intel SoC such as EHL and TGL with Always Running Timer.

The hardware cross-timestamp result is made available to
applications through the PTP_SYS_OFFSET_PRECISE ioctl which calls
stmmac_getcrosststamp().

Device time is stored in the MAC Auxiliary register. The 64-bit System
time (ART timestamp) is stored in registers that are only addressable
by using MDIO space.

Signed-off-by: Tan Tee Min <tee.min.tan@intel.com>
Co-developed-by: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Signed-off-by: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/common.h       |   2 +
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c  | 108 +++++++++++++++++++++
 drivers/net/ethernet/stmicro/stmmac/dwmac4.h       |   8 ++
 drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c   |   2 +
 drivers/net/ethernet/stmicro/stmmac/hwif.h         |   3 +
 .../net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c  |  11 +++
 drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c   |  32 ++++++
 drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h   |  23 +++++
 include/linux/stmmac.h                             |   4 +
 9 files changed, 193 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 1c0c60bdf854..95469059dca1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -388,6 +388,8 @@ struct dma_features {
 	unsigned int estsel;
 	unsigned int fpesel;
 	unsigned int tbssel;
+	/* Numbers of Auxiliary Snapshot Inputs */
+	unsigned int aux_snapshot_n;
 };
 
 /* RX Buffer size must be multiple of 4/8/16 bytes */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 763b549e3c2d..992294d25706 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -8,6 +8,7 @@
 #include "dwmac-intel.h"
 #include "dwmac4.h"
 #include "stmmac.h"
+#include "stmmac_ptp.h"
 
 #define INTEL_MGBE_ADHOC_ADDR	0x15
 #define INTEL_MGBE_XPCS_ADDR	0x16
@@ -240,6 +241,108 @@ static void intel_mgbe_ptp_clk_freq_config(void *npriv)
 	writel(gpio_value, priv->ioaddr + GMAC_GPIO_STATUS);
 }
 
+static void get_arttime(struct mii_bus *mii, int intel_adhoc_addr,
+			u64 *art_time)
+{
+	u64 ns;
+
+	ns = mdiobus_read(mii, intel_adhoc_addr, PMC_ART_VALUE3);
+	ns <<= GMAC4_ART_TIME_SHIFT;
+	ns |= mdiobus_read(mii, intel_adhoc_addr, PMC_ART_VALUE2);
+	ns <<= GMAC4_ART_TIME_SHIFT;
+	ns |= mdiobus_read(mii, intel_adhoc_addr, PMC_ART_VALUE1);
+	ns <<= GMAC4_ART_TIME_SHIFT;
+	ns |= mdiobus_read(mii, intel_adhoc_addr, PMC_ART_VALUE0);
+
+	*art_time = ns;
+}
+
+static int intel_crosststamp(ktime_t *device,
+			     struct system_counterval_t *system,
+			     void *ctx)
+{
+	struct intel_priv_data *intel_priv;
+
+	struct stmmac_priv *priv = (struct stmmac_priv *)ctx;
+	void __iomem *ptpaddr = priv->ptpaddr;
+	void __iomem *ioaddr = priv->hw->pcsr;
+	unsigned long flags;
+	u64 art_time = 0;
+	u64 ptp_time = 0;
+	u32 num_snapshot;
+	u32 gpio_value;
+	u32 acr_value;
+	int ret;
+	u32 v;
+	int i;
+
+	if (!boot_cpu_has(X86_FEATURE_ART))
+		return -EOPNOTSUPP;
+
+	intel_priv = priv->plat->bsp_priv;
+
+	/* Enable Internal snapshot trigger */
+	acr_value = readl(ptpaddr + PTP_ACR);
+	acr_value &= ~PTP_ACR_MASK;
+	switch (priv->plat->int_snapshot_num) {
+	case AUX_SNAPSHOT0:
+		acr_value |= PTP_ACR_ATSEN0;
+		break;
+	case AUX_SNAPSHOT1:
+		acr_value |= PTP_ACR_ATSEN1;
+		break;
+	case AUX_SNAPSHOT2:
+		acr_value |= PTP_ACR_ATSEN2;
+		break;
+	case AUX_SNAPSHOT3:
+		acr_value |= PTP_ACR_ATSEN3;
+		break;
+	default:
+		return -EINVAL;
+	}
+	writel(acr_value, ptpaddr + PTP_ACR);
+
+	/* Clear FIFO */
+	acr_value = readl(ptpaddr + PTP_ACR);
+	acr_value |= PTP_ACR_ATSFC;
+	writel(acr_value, ptpaddr + PTP_ACR);
+
+	/* Trigger Internal snapshot signal
+	 * Create a rising edge by just toggle the GPO1 to low
+	 * and back to high.
+	 */
+	gpio_value = readl(ioaddr + GMAC_GPIO_STATUS);
+	gpio_value &= ~GMAC_GPO1;
+	writel(gpio_value, ioaddr + GMAC_GPIO_STATUS);
+	gpio_value |= GMAC_GPO1;
+	writel(gpio_value, ioaddr + GMAC_GPIO_STATUS);
+
+	/* Poll for time sync operation done */
+	ret = readl_poll_timeout(priv->ioaddr + GMAC_INT_STATUS, v,
+				 (v & GMAC_INT_TSIE), 100, 10000);
+
+	if (ret == -ETIMEDOUT) {
+		pr_err("%s: Wait for time sync operation timeout\n", __func__);
+		return ret;
+	}
+
+	num_snapshot = (readl(ioaddr + GMAC_TIMESTAMP_STATUS) &
+			GMAC_TIMESTAMP_ATSNS_MASK) >>
+			GMAC_TIMESTAMP_ATSNS_SHIFT;
+
+	/* Repeat until the timestamps are from the FIFO last segment */
+	for (i = 0; i < num_snapshot; i++) {
+		spin_lock_irqsave(&priv->ptp_lock, flags);
+		stmmac_get_ptptime(priv, ptpaddr, &ptp_time);
+		*device = ns_to_ktime(ptp_time);
+		spin_unlock_irqrestore(&priv->ptp_lock, flags);
+		get_arttime(priv->mii, intel_priv->mdio_adhoc_addr, &art_time);
+		*system = convert_art_to_tsc(art_time);
+	}
+
+	return 0;
+}
+
 static void common_default_data(struct plat_stmmacenet_data *plat)
 {
 	plat->clk_csr = 2;	/* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */
@@ -384,6 +487,11 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
 	plat->mdio_bus_data->phy_mask = 1 << INTEL_MGBE_ADHOC_ADDR;
 	plat->mdio_bus_data->phy_mask |= 1 << INTEL_MGBE_XPCS_ADDR;
 
+	plat->int_snapshot_num = AUX_SNAPSHOT1;
+
+	plat->has_crossts = true;
+	plat->crosststamp = intel_crosststamp;
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index ef8502d2b6e6..462ca7ed095a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -50,6 +50,7 @@
 #define GMAC_L4_ADDR(reg)		(0x904 + (reg) * 0x30)
 #define GMAC_L3_ADDR0(reg)		(0x910 + (reg) * 0x30)
 #define GMAC_L3_ADDR1(reg)		(0x914 + (reg) * 0x30)
+#define GMAC_TIMESTAMP_STATUS		0x00000b20
 
 /* RX Queues Routing */
 #define GMAC_RXQCTRL_AVCPQ_MASK		GENMASK(2, 0)
@@ -144,6 +145,7 @@
 #define GMAC_INT_PCS_PHYIS		BIT(3)
 #define GMAC_INT_PMT_EN			BIT(4)
 #define GMAC_INT_LPI_EN			BIT(5)
+#define GMAC_INT_TSIE			BIT(12)
 
 #define	GMAC_PCS_IRQ_DEFAULT	(GMAC_INT_RGSMIIS | GMAC_INT_PCS_LINK |	\
 				 GMAC_INT_PCS_ANE)
@@ -260,6 +262,7 @@ enum power_event {
 #define GMAC_HW_RXFIFOSIZE		GENMASK(4, 0)
 
 /* MAC HW features2 bitmap */
+#define GMAC_HW_FEAT_AUXSNAPNUM		GENMASK(30, 28)
 #define GMAC_HW_FEAT_PPSOUTNUM		GENMASK(26, 24)
 #define GMAC_HW_FEAT_TXCHCNT		GENMASK(21, 18)
 #define GMAC_HW_FEAT_RXCHCNT		GENMASK(15, 12)
@@ -305,6 +308,11 @@ enum power_event {
 #define GMAC_L4DP0_SHIFT		16
 #define GMAC_L4SP0			GENMASK(15, 0)
 
+/* MAC Timestamp Status */
+#define GMAC_TIMESTAMP_AUXTSTRIG	BIT(2)
+#define GMAC_TIMESTAMP_ATSNS_MASK	GENMASK(29, 25)
+#define GMAC_TIMESTAMP_ATSNS_SHIFT	25
+
 /*  MTL registers */
 #define MTL_OPERATION_MODE		0x00000c00
 #define MTL_FRPE			BIT(15)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
index 8958778d16b7..8954b85eb850 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
@@ -412,6 +412,8 @@ static void dwmac4_get_hw_feature(void __iomem *ioaddr,
 
 	/* IEEE 1588-2002 */
 	dma_cap->time_stamp = 0;
+	/* Number of Auxiliary Snapshot Inputs */
+	dma_cap->aux_snapshot_n = (hw_cap & GMAC_HW_FEAT_AUXSNAPNUM) >> 28;
 
 	/* MAC HW feature3 */
 	hw_cap = readl(ioaddr + GMAC_HW_FEATURE3);
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 692541c7b419..59bf7078a754 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -508,6 +508,7 @@ struct stmmac_hwtimestamp {
 	int (*adjust_systime) (void __iomem *ioaddr, u32 sec, u32 nsec,
 			       int add_sub, int gmac4);
 	void (*get_systime) (void __iomem *ioaddr, u64 *systime);
+	void (*get_ptptime)(void __iomem *ioaddr, u64 *ptp_time);
 };
 
 #define stmmac_config_hw_tstamping(__priv, __args...) \
@@ -522,6 +523,8 @@ struct stmmac_hwtimestamp {
 	stmmac_do_callback(__priv, ptp, adjust_systime, __args)
 #define stmmac_get_systime(__priv, __args...) \
 	stmmac_do_void_callback(__priv, ptp, get_systime, __args)
+#define stmmac_get_ptptime(__priv, __args...) \
+	stmmac_do_void_callback(__priv, ptp, get_ptptime, __args)
 
 /* Helpers to manage the descriptors for chain and ring modes */
 struct stmmac_mode_ops {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
index d291612eeafb..113c51bcc0b5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
@@ -153,6 +153,16 @@ static void get_systime(void __iomem *ioaddr, u64 *systime)
 		*systime = ns;
 }
 
+static void get_ptptime(void __iomem *ptpaddr, u64 *ptp_time)
+{
+	u64 ns;
+
+	ns = readl(ptpaddr + PTP_ATNR);
+	ns += readl(ptpaddr + PTP_ATSR) * NSEC_PER_SEC;
+
+	*ptp_time = ns;
+}
+
 const struct stmmac_hwtimestamp stmmac_ptp = {
 	.config_hw_tstamping = config_hw_tstamping,
 	.init_systime = init_systime,
@@ -160,4 +170,5 @@ const struct stmmac_hwtimestamp stmmac_ptp = {
 	.config_addend = config_addend,
 	.adjust_systime = adjust_systime,
 	.get_systime = get_systime,
+	.get_ptptime = get_ptptime,
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index 8b10fd10446f..b164ae22e35f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -9,6 +9,7 @@
 *******************************************************************************/
 #include "stmmac.h"
 #include "stmmac_ptp.h"
+#include "dwmac4.h"
 
 /**
  * stmmac_adjust_freq
@@ -165,6 +166,36 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
 	return ret;
 }
 
+/**
+ * stmmac_get_syncdevicetime
+ * @device: current device time
+ * @system: system counter value read synchronously with device time
+ * @ctx: context provided by timekeeping code
+ * Description: Read device and system clock simultaneously and return the
+ * corrected clock values in ns.
+ **/
+static int stmmac_get_syncdevicetime(ktime_t *device,
+				     struct system_counterval_t *system,
+				     void *ctx)
+{
+	struct stmmac_priv *priv = (struct stmmac_priv *)ctx;
+
+	if (priv->plat->crosststamp)
+		return priv->plat->crosststamp(device, system, ctx);
+	else
+		return -EOPNOTSUPP;
+}
+
+static int stmmac_getcrosststamp(struct ptp_clock_info *ptp,
+				 struct system_device_crosststamp *xtstamp)
+{
+	struct stmmac_priv *priv =
+		container_of(ptp, struct stmmac_priv, ptp_clock_ops);
+
+	return get_device_system_crosststamp(stmmac_get_syncdevicetime,
+					     priv, NULL, xtstamp);
+}
+
 /* structure describing a PTP hardware clock */
 static struct ptp_clock_info stmmac_ptp_clock_ops = {
 	.owner = THIS_MODULE,
@@ -180,6 +211,7 @@ static struct ptp_clock_info stmmac_ptp_clock_ops = {
 	.gettime64 = stmmac_get_time,
 	.settime64 = stmmac_set_time,
 	.enable = stmmac_enable,
+	.getcrosststamp = stmmac_getcrosststamp,
 };
 
 /**
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h
index 7abb1d47e7da..f88727ce4d30 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h
@@ -23,6 +23,9 @@
 #define	PTP_STSUR	0x10	/* System Time – Seconds Update Reg */
 #define	PTP_STNSUR	0x14	/* System Time – Nanoseconds Update Reg */
 #define	PTP_TAR		0x18	/* Timestamp Addend Reg */
+#define	PTP_ACR		0x40	/* Auxiliary Control Reg */
+#define	PTP_ATNR	0x48	/* Auxiliary Timestamp - Nanoseconds Reg */
+#define	PTP_ATSR	0x4c	/* Auxiliary Timestamp - Seconds Reg */
 
 #define	PTP_STNSUR_ADDSUB_SHIFT	31
 #define	PTP_DIGITAL_ROLLOVER_MODE	0x3B9ACA00	/* 10e9-1 ns */
@@ -64,4 +67,24 @@
 #define	PTP_SSIR_SSINC_MASK		0xff
 #define	GMAC4_PTP_SSIR_SSINC_SHIFT	16
 
+/* Auxiliary Control defines */
+#define	PTP_ACR_ATSFC		BIT(0)	/* Auxiliary Snapshot FIFO Clear */
+#define	PTP_ACR_ATSEN0		BIT(4)	/* Auxiliary Snapshot 0 Enable */
+#define	PTP_ACR_ATSEN1		BIT(5)	/* Auxiliary Snapshot 1 Enable */
+#define	PTP_ACR_ATSEN2		BIT(6)	/* Auxiliary Snapshot 2 Enable */
+#define	PTP_ACR_ATSEN3		BIT(7)	/* Auxiliary Snapshot 3 Enable */
+#define	PTP_ACR_MASK		GENMASK(7, 4)	/* Aux Snapshot Mask */
+#define	PMC_ART_VALUE0		0x01	/* PMC_ART[15:0] timer value */
+#define	PMC_ART_VALUE1		0x02	/* PMC_ART[31:16] timer value */
+#define	PMC_ART_VALUE2		0x03	/* PMC_ART[47:32] timer value */
+#define	PMC_ART_VALUE3		0x04	/* PMC_ART[63:48] timer value */
+#define	GMAC4_ART_TIME_SHIFT	16	/* ART TIME 16-bits shift */
+
+enum aux_snapshot {
+	AUX_SNAPSHOT0 = 0x10,
+	AUX_SNAPSHOT1 = 0x20,
+	AUX_SNAPSHOT2 = 0x40,
+	AUX_SNAPSHOT3 = 0x80,
+};
+
 #endif	/* __STMMAC_PTP_H__ */
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 10abc80b601e..5134e802f39a 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -186,6 +186,8 @@ struct plat_stmmacenet_data {
 	void (*exit)(struct platform_device *pdev, void *priv);
 	struct mac_device_info *(*setup)(void *priv);
 	int (*clks_config)(void *priv, bool enabled);
+	int (*crosststamp)(ktime_t *device, struct system_counterval_t *system,
+			   void *ctx);
 	void *bsp_priv;
 	struct clk *stmmac_clk;
 	struct clk *pclk;
@@ -206,5 +208,7 @@ struct plat_stmmacenet_data {
 	u8 vlan_fail_q;
 	unsigned int eee_usecs_rate;
 	struct pci_dev *pdev;
+	bool has_crossts;
+	int int_snapshot_num;
 };
 #endif
-- 
cgit v1.2.3


From b7fbc88692e60a4955ac54af0bcf7f2162761339 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 23 Mar 2021 13:53:29 +0100
Subject: octeontx2: fix -Wnonnull warning

When compile testing this driver on a platform on which probe() is
known to fail at compile time, gcc warns about the cgx_lmactype_string[]
array being uninitialized:

In function 'strncpy',
    inlined from 'link_status_user_format' at /git/arm-soc/drivers/net/ethernet/marvell/octeontx2/af/cgx.c:838:2,
    inlined from 'cgx_link_change_handler' at /git/arm-soc/drivers/net/ethernet/marvell/octeontx2/af/cgx.c:853:2:
include/linux/fortify-string.h:27:30: error: argument 2 null where non-null expected [-Werror=nonnull]
   27 | #define __underlying_strncpy __builtin_strncpy

Address this by turning the runtime initialization into a fixed array,
which should also produce better code.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Sunil Goutham <sgoutham@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 60 ++++++++++++-------------
 1 file changed, 28 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
index 68deae529bc9..fac6474ad694 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -30,10 +30,35 @@
 static LIST_HEAD(cgx_list);
 
 /* Convert firmware speed encoding to user format(Mbps) */
-static u32 cgx_speed_mbps[CGX_LINK_SPEED_MAX];
+static const u32 cgx_speed_mbps[CGX_LINK_SPEED_MAX] = {
+	[CGX_LINK_NONE] = 0,
+	[CGX_LINK_10M] = 10,
+	[CGX_LINK_100M] = 100,
+	[CGX_LINK_1G] = 1000,
+	[CGX_LINK_2HG] = 2500,
+	[CGX_LINK_5G] = 5000,
+	[CGX_LINK_10G] = 10000,
+	[CGX_LINK_20G] = 20000,
+	[CGX_LINK_25G] = 25000,
+	[CGX_LINK_40G] = 40000,
+	[CGX_LINK_50G] = 50000,
+	[CGX_LINK_80G] = 80000,
+	[CGX_LINK_100G] = 100000,
+};
 
 /* Convert firmware lmac type encoding to string */
-static char *cgx_lmactype_string[LMAC_MODE_MAX];
+static const char *cgx_lmactype_string[LMAC_MODE_MAX] = {
+	[LMAC_MODE_SGMII] = "SGMII",
+	[LMAC_MODE_XAUI] = "XAUI",
+	[LMAC_MODE_RXAUI] = "RXAUI",
+	[LMAC_MODE_10G_R] = "10G_R",
+	[LMAC_MODE_40G_R] = "40G_R",
+	[LMAC_MODE_QSGMII] = "QSGMII",
+	[LMAC_MODE_25G_R] = "25G_R",
+	[LMAC_MODE_50G_R] = "50G_R",
+	[LMAC_MODE_100G_R] = "100G_R",
+	[LMAC_MODE_USXGMII] = "USXGMII",
+};
 
 /* CGX PHY management internal APIs */
 static int cgx_fwi_link_change(struct cgx *cgx, int lmac_id, bool en);
@@ -659,34 +684,6 @@ int cgx_fwi_cmd_generic(u64 req, u64 *resp, struct cgx *cgx, int lmac_id)
 	return err;
 }
 
-static inline void cgx_link_usertable_init(void)
-{
-	cgx_speed_mbps[CGX_LINK_NONE] = 0;
-	cgx_speed_mbps[CGX_LINK_10M] = 10;
-	cgx_speed_mbps[CGX_LINK_100M] = 100;
-	cgx_speed_mbps[CGX_LINK_1G] = 1000;
-	cgx_speed_mbps[CGX_LINK_2HG] = 2500;
-	cgx_speed_mbps[CGX_LINK_5G] = 5000;
-	cgx_speed_mbps[CGX_LINK_10G] = 10000;
-	cgx_speed_mbps[CGX_LINK_20G] = 20000;
-	cgx_speed_mbps[CGX_LINK_25G] = 25000;
-	cgx_speed_mbps[CGX_LINK_40G] = 40000;
-	cgx_speed_mbps[CGX_LINK_50G] = 50000;
-	cgx_speed_mbps[CGX_LINK_80G] = 80000;
-	cgx_speed_mbps[CGX_LINK_100G] = 100000;
-
-	cgx_lmactype_string[LMAC_MODE_SGMII] = "SGMII";
-	cgx_lmactype_string[LMAC_MODE_XAUI] = "XAUI";
-	cgx_lmactype_string[LMAC_MODE_RXAUI] = "RXAUI";
-	cgx_lmactype_string[LMAC_MODE_10G_R] = "10G_R";
-	cgx_lmactype_string[LMAC_MODE_40G_R] = "40G_R";
-	cgx_lmactype_string[LMAC_MODE_QSGMII] = "QSGMII";
-	cgx_lmactype_string[LMAC_MODE_25G_R] = "25G_R";
-	cgx_lmactype_string[LMAC_MODE_50G_R] = "50G_R";
-	cgx_lmactype_string[LMAC_MODE_100G_R] = "100G_R";
-	cgx_lmactype_string[LMAC_MODE_USXGMII] = "USXGMII";
-}
-
 static int cgx_link_usertable_index_map(int speed)
 {
 	switch (speed) {
@@ -828,7 +825,7 @@ static inline void link_status_user_format(u64 lstat,
 					   struct cgx_link_user_info *linfo,
 					   struct cgx *cgx, u8 lmac_id)
 {
-	char *lmac_string;
+	const char *lmac_string;
 
 	linfo->link_up = FIELD_GET(RESP_LINKSTAT_UP, lstat);
 	linfo->full_duplex = FIELD_GET(RESP_LINKSTAT_FDUPLEX, lstat);
@@ -1377,7 +1374,6 @@ static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	list_add(&cgx->cgx_list, &cgx_list);
 
-	cgx_link_usertable_init();
 
 	cgx_populate_features(cgx);
 
-- 
cgit v1.2.3


From 4adec7f81df8e4fbf55f9d5ca98afa39f15b050f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 23 Mar 2021 14:03:32 +0100
Subject: rhashtable: avoid -Wrestrict warning on overlapping sprintf output

sprintf() is declared with a restrict keyword to not allow input and
output to point to the same buffer:

lib/test_rhashtable.c: In function 'print_ht':
lib/test_rhashtable.c:504:4: error: 'sprintf' argument 3 overlaps destination object 'buff' [-Werror=restrict]
  504 |    sprintf(buff, "%s\nbucket[%d] -> ", buff, i);
      |    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
lib/test_rhashtable.c:489:7: note: destination object referenced by 'restrict'-qualified argument 1 was declared here
  489 |  char buff[512] = "";
      |       ^~~~

Rework this function to remember the last offset instead to
avoid the warning.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/test_rhashtable.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 76c607ee6db5..5a1dd4736b56 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -487,6 +487,7 @@ static unsigned int __init print_ht(struct rhltable *rhlt)
 	struct rhashtable *ht;
 	const struct bucket_table *tbl;
 	char buff[512] = "";
+	int offset = 0;
 	unsigned int i, cnt = 0;
 
 	ht = &rhlt->ht;
@@ -501,18 +502,18 @@ static unsigned int __init print_ht(struct rhltable *rhlt)
 		next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL;
 
 		if (!rht_is_a_nulls(pos)) {
-			sprintf(buff, "%s\nbucket[%d] -> ", buff, i);
+			offset += sprintf(buff + offset, "\nbucket[%d] -> ", i);
 		}
 
 		while (!rht_is_a_nulls(pos)) {
 			struct rhlist_head *list = container_of(pos, struct rhlist_head, rhead);
-			sprintf(buff, "%s[[", buff);
+			offset += sprintf(buff + offset, "[[");
 			do {
 				pos = &list->rhead;
 				list = rht_dereference(list->next, ht);
 				p = rht_obj(ht, pos);
 
-				sprintf(buff, "%s val %d (tid=%d)%s", buff, p->value.id, p->value.tid,
+				offset += sprintf(buff + offset, " val %d (tid=%d)%s", p->value.id, p->value.tid,
 					list? ", " : " ");
 				cnt++;
 			} while (list);
@@ -521,7 +522,7 @@ static unsigned int __init print_ht(struct rhltable *rhlt)
 			next = !rht_is_a_nulls(pos) ?
 				rht_dereference(pos->next, ht) : NULL;
 
-			sprintf(buff, "%s]]%s", buff, !rht_is_a_nulls(pos) ? " -> " : "");
+			offset += sprintf(buff + offset, "]]%s", !rht_is_a_nulls(pos) ? " -> " : "");
 		}
 	}
 	printk(KERN_ERR "\n---- ht: ----%s\n-------------\n", buff);
-- 
cgit v1.2.3


From 0ef25ed104ac17fa0586fbb076f24a5e8940b966 Mon Sep 17 00:00:00 2001
From: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Date: Wed, 24 Mar 2021 00:46:40 +0800
Subject: net: phy: add genphy_c45_loopback

Add generic code to enable C45 PHY loopback into the common phy-c45.c
file. This will allow C45 PHY drivers aceess this by setting
.set_loopback.

Suggested-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Reviewed-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy-c45.c | 8 ++++++++
 include/linux/phy.h       | 1 +
 2 files changed, 9 insertions(+)

diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index 077f2929c45e..91e3acb9e397 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -560,6 +560,14 @@ int gen10g_config_aneg(struct phy_device *phydev)
 }
 EXPORT_SYMBOL_GPL(gen10g_config_aneg);
 
+int genphy_c45_loopback(struct phy_device *phydev, bool enable)
+{
+	return phy_modify_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1,
+			      MDIO_PCS_CTRL1_LOOPBACK,
+			      enable ? MDIO_PCS_CTRL1_LOOPBACK : 0);
+}
+EXPORT_SYMBOL_GPL(genphy_c45_loopback);
+
 struct phy_driver genphy_c45_driver = {
 	.phy_id         = 0xffffffff,
 	.phy_id_mask    = 0xffffffff,
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 1a12e4436b5b..8e2cf84b2318 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1532,6 +1532,7 @@ int genphy_c45_read_mdix(struct phy_device *phydev);
 int genphy_c45_pma_read_abilities(struct phy_device *phydev);
 int genphy_c45_read_status(struct phy_device *phydev);
 int genphy_c45_config_aneg(struct phy_device *phydev);
+int genphy_c45_loopback(struct phy_device *phydev, bool enable);
 
 /* Generic C45 PHY driver */
 extern struct phy_driver genphy_c45_driver;
-- 
cgit v1.2.3


From d137c70d0e7a3db91ed0c674acb561c115911100 Mon Sep 17 00:00:00 2001
From: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Date: Wed, 24 Mar 2021 00:46:41 +0800
Subject: net: phy: marvell10g: Add PHY loopback support

Add support for PHY loopback for Marvell 88x2110 and Marvell 88x3310.

This allow user to perform PHY loopback test using ethtool selftest.

Signed-off-by: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell10g.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index b1bb9b8e1e4e..74b64e52ffa2 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -781,6 +781,7 @@ static struct phy_driver mv3310_drivers[] = {
 		.get_tunable	= mv3310_get_tunable,
 		.set_tunable	= mv3310_set_tunable,
 		.remove		= mv3310_remove,
+		.set_loopback	= genphy_c45_loopback,
 	},
 	{
 		.phy_id		= MARVELL_PHY_ID_88E2110,
@@ -796,6 +797,7 @@ static struct phy_driver mv3310_drivers[] = {
 		.get_tunable	= mv3310_get_tunable,
 		.set_tunable	= mv3310_set_tunable,
 		.remove		= mv3310_remove,
+		.set_loopback	= genphy_c45_loopback,
 	},
 };
 
-- 
cgit v1.2.3


From c3dde0ee7163856bf295fdbc09205b61581a7f92 Mon Sep 17 00:00:00 2001
From: Sai Kalyaan Palla <saikalyaan63@gmail.com>
Date: Tue, 23 Mar 2021 23:14:19 +0530
Subject: net: decnet: Fixed multiple Coding Style issues

Made changes to coding style as suggested by checkpatch.pl
    changes are of the type:
            space required before the open parenthesis '('
            space required after that ','

Signed-off-by: Sai Kalyaan Palla <saikalyaan63@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/dn_route.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 2f1497797861..32b1bed8ae51 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -92,7 +92,7 @@ struct dn_rt_hash_bucket {
 extern struct neigh_table dn_neigh_table;
 
 
-static unsigned char dn_hiord_addr[6] = {0xAA,0x00,0x04,0x00,0x00,0x00};
+static unsigned char dn_hiord_addr[6] = {0xAA, 0x00, 0x04, 0x00, 0x00, 0x00};
 
 static const int dn_rt_min_delay = 2 * HZ;
 static const int dn_rt_max_delay = 10 * HZ;
@@ -362,7 +362,7 @@ static void dn_run_flush(struct timer_list *unused)
 		if (!rt)
 			goto nothing_to_declare;
 
-		for(; rt; rt = next) {
+		for (; rt; rt = next) {
 			next = rcu_dereference_raw(rt->dn_next);
 			RCU_INIT_POINTER(rt->dn_next, NULL);
 			dst_dev_put(&rt->dst);
@@ -902,7 +902,7 @@ static inline int dn_match_addr(__le16 addr1, __le16 addr2)
 {
 	__u16 tmp = le16_to_cpu(addr1) ^ le16_to_cpu(addr2);
 	int match = 16;
-	while(tmp) {
+	while (tmp) {
 		tmp >>= 1;
 		match--;
 	}
@@ -1388,7 +1388,7 @@ static int dn_route_input_slow(struct sk_buff *skb)
 		fld.saddr = src_map;
 	}
 
-	switch(res.type) {
+	switch (res.type) {
 	case RTN_UNICAST:
 		/*
 		 * Forwarding check here, we only check for forwarding
@@ -1531,7 +1531,7 @@ static int dn_route_input(struct sk_buff *skb)
 		return 0;
 
 	rcu_read_lock();
-	for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL;
+	for (rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL;
 	    rt = rcu_dereference(rt->dn_next)) {
 		if ((rt->fld.saddr == cb->src) &&
 		    (rt->fld.daddr == cb->dst) &&
@@ -1744,13 +1744,13 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
 
 	s_h = cb->args[0];
 	s_idx = idx = cb->args[1];
-	for(h = 0; h <= dn_rt_hash_mask; h++) {
+	for (h = 0; h <= dn_rt_hash_mask; h++) {
 		if (h < s_h)
 			continue;
 		if (h > s_h)
 			s_idx = 0;
 		rcu_read_lock_bh();
-		for(rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0;
+		for (rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0;
 			rt;
 			rt = rcu_dereference_bh(rt->dn_next), idx++) {
 			if (idx < s_idx)
@@ -1784,7 +1784,7 @@ static struct dn_route *dn_rt_cache_get_first(struct seq_file *seq)
 	struct dn_route *rt = NULL;
 	struct dn_rt_cache_iter_state *s = seq->private;
 
-	for(s->bucket = dn_rt_hash_mask; s->bucket >= 0; --s->bucket) {
+	for (s->bucket = dn_rt_hash_mask; s->bucket >= 0; --s->bucket) {
 		rcu_read_lock_bh();
 		rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain);
 		if (rt)
@@ -1814,7 +1814,7 @@ static void *dn_rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
 	struct dn_route *rt = dn_rt_cache_get_first(seq);
 
 	if (rt) {
-		while(*pos && (rt = dn_rt_cache_get_next(seq, rt)))
+		while (*pos && (rt = dn_rt_cache_get_next(seq, rt)))
 			--*pos;
 	}
 	return *pos ? NULL : rt;
@@ -1869,21 +1869,21 @@ void __init dn_route_init(void)
 
 	goal = totalram_pages() >> (26 - PAGE_SHIFT);
 
-	for(order = 0; (1UL << order) < goal; order++)
+	for (order = 0; (1UL << order) < goal; order++)
 		/* NOTHING */;
 
 	/*
 	 * Only want 1024 entries max, since the table is very, very unlikely
 	 * to be larger than that.
 	 */
-	while(order && ((((1UL << order) * PAGE_SIZE) /
+	while (order && ((((1UL << order) * PAGE_SIZE) /
 				sizeof(struct dn_rt_hash_bucket)) >= 2048))
 		order--;
 
 	do {
 		dn_rt_hash_mask = (1UL << order) * PAGE_SIZE /
 			sizeof(struct dn_rt_hash_bucket);
-		while(dn_rt_hash_mask & (dn_rt_hash_mask - 1))
+		while (dn_rt_hash_mask & (dn_rt_hash_mask - 1))
 			dn_rt_hash_mask--;
 		dn_rt_hash_table = (struct dn_rt_hash_bucket *)
 			__get_free_pages(GFP_ATOMIC, order);
@@ -1898,7 +1898,7 @@ void __init dn_route_init(void)
 		(long)(dn_rt_hash_mask*sizeof(struct dn_rt_hash_bucket))/1024);
 
 	dn_rt_hash_mask--;
-	for(i = 0; i <= dn_rt_hash_mask; i++) {
+	for (i = 0; i <= dn_rt_hash_mask; i++) {
 		spin_lock_init(&dn_rt_hash_table[i].lock);
 		dn_rt_hash_table[i].chain = NULL;
 	}
-- 
cgit v1.2.3


From 20fd4f421cf4c21ab37a8bf31db50c69f1b49355 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 24 Mar 2021 14:42:20 +0000
Subject: netdevsim: switch to memdup_user_nul()

Use memdup_user_nul() helper instead of open-coding to
simplify the code.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/health.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/net/netdevsim/health.c b/drivers/net/netdevsim/health.c
index 21e2974660e7..04aebdf85747 100644
--- a/drivers/net/netdevsim/health.c
+++ b/drivers/net/netdevsim/health.c
@@ -235,15 +235,10 @@ static ssize_t nsim_dev_health_break_write(struct file *file,
 	char *break_msg;
 	int err;
 
-	break_msg = kmalloc(count + 1, GFP_KERNEL);
-	if (!break_msg)
-		return -ENOMEM;
+	break_msg = memdup_user_nul(data, count);
+	if (IS_ERR(break_msg))
+		return PTR_ERR(break_msg);
 
-	if (copy_from_user(break_msg, data, count)) {
-		err = -EFAULT;
-		goto out;
-	}
-	break_msg[count] = '\0';
 	if (break_msg[count - 1] == '\n')
 		break_msg[count - 1] = '\0';
 
-- 
cgit v1.2.3


From 110eccdb2469b9b54e509db1b3ea12a0626b7967 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 24 Mar 2021 17:44:54 +0200
Subject: net: enetc: don't depend on system endianness in
 enetc_set_vlan_ht_filter

ENETC has a 64-entry hash table for VLAN RX filtering per Station
Interface, which is accessed through two 32-bit registers: VHFR0 holding
the low portion, and VHFR1 holding the high portion.

The enetc_set_vlan_ht_filter function looks at the pf->vlan_ht_filter
bitmap, which is fundamentally an unsigned long variable, and casts it
to a u32 array of two elements. It puts the first u32 element into VHFR0
and the second u32 element into VHFR1.

It is easy to imagine that this will not work on big endian systems
(although, yes, we have bigger problems, because currently enetc assumes
that the CPU endianness is equal to the controller endianness, aka
little endian - but let's assume that we could add a cpu_to_le32 in
enetc_wd_reg and a le32_to_cpu in enetc_rd_reg).

Let's use lower_32_bits and upper_32_bits which are designed to work
regardless of endianness.

Tested that both the old and the new method produce the same results:

$ ethtool -K eth1 rx-vlan-filter on
$ ip link add link eth1 name eth1.100 type vlan id 100
enetc_set_vlan_ht_filter: method 1: si_idx 0 VHFR0 0x0 VHFR1 0x20
enetc_set_vlan_ht_filter: method 2: si_idx 0 VHFR0 0x0 VHFR1 0x20
$ ip link add link eth1 name eth1.101 type vlan id 101
enetc_set_vlan_ht_filter: method 1: si_idx 0 VHFR0 0x0 VHFR1 0x30
enetc_set_vlan_ht_filter: method 2: si_idx 0 VHFR0 0x0 VHFR1 0x30
$ ip link add link eth1 name eth1.34 type vlan id 34
enetc_set_vlan_ht_filter: method 1: si_idx 0 VHFR0 0x0 VHFR1 0x34
enetc_set_vlan_ht_filter: method 2: si_idx 0 VHFR0 0x0 VHFR1 0x34
$ ip link add link eth1 name eth1.1024 type vlan id 1024
enetc_set_vlan_ht_filter: method 1: si_idx 0 VHFR0 0x1 VHFR1 0x34
enetc_set_vlan_ht_filter: method 2: si_idx 0 VHFR0 0x1 VHFR1 0x34

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc_pf.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index 3a7a9102eccb..9c69ca516192 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -248,10 +248,10 @@ static void enetc_pf_set_rx_mode(struct net_device *ndev)
 }
 
 static void enetc_set_vlan_ht_filter(struct enetc_hw *hw, int si_idx,
-				     u32 *hash)
+				     unsigned long hash)
 {
-	enetc_port_wr(hw, ENETC_PSIVHFR0(si_idx), *hash);
-	enetc_port_wr(hw, ENETC_PSIVHFR1(si_idx), *(hash + 1));
+	enetc_port_wr(hw, ENETC_PSIVHFR0(si_idx), lower_32_bits(hash));
+	enetc_port_wr(hw, ENETC_PSIVHFR1(si_idx), upper_32_bits(hash));
 }
 
 static int enetc_vid_hash_idx(unsigned int vid)
@@ -279,7 +279,7 @@ static void enetc_sync_vlan_ht_filter(struct enetc_pf *pf, bool rehash)
 		}
 	}
 
-	enetc_set_vlan_ht_filter(&pf->si->hw, 0, (u32 *)pf->vlan_ht_filter);
+	enetc_set_vlan_ht_filter(&pf->si->hw, 0, *pf->vlan_ht_filter);
 }
 
 static int enetc_vlan_rx_add_vid(struct net_device *ndev, __be16 prot, u16 vid)
-- 
cgit v1.2.3


From e366a39208e58ef460b9539e235413dc2b10bc75 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 24 Mar 2021 17:44:55 +0200
Subject: net: enetc: don't depend on system endianness in enetc_set_mac_ht_flt

When enetc runs out of exact match entries for unicast address
filtering, it switches to an approach based on hash tables, where
multiple MAC addresses might end up in the same bucket.

However, the enetc_set_mac_ht_flt function currently depends on the
system endianness, because it interprets the 64-bit hash value as an
array of two u32 elements. Modify this to use lower_32_bits and
upper_32_bits.

Tested by forcing enetc to go into hash table mode by creating two
macvlan upper interfaces:

ip link add link eno0 address 00:01:02:03:00:00 eno0.0 type macvlan && ip link set eno0.0 up
ip link add link eno0 address 00:01:02:03:00:01 eno0.1 type macvlan && ip link set eno0.1 up

and verified that the same bit values are written to the registers
before and after:

enetc_sync_mac_filters: addr 00:00:80:00:40:10 exact match 0
enetc_sync_mac_filters: addr 00:00:00:00:80:00 exact match 0
enetc_set_mac_ht_flt: hash 0x80008000000000 UMHFR0 0x0 UMHFR1 0x800080

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc_pf.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index 9c69ca516192..5e95afd61c87 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -129,16 +129,20 @@ static void enetc_clear_mac_ht_flt(struct enetc_si *si, int si_idx, int type)
 }
 
 static void enetc_set_mac_ht_flt(struct enetc_si *si, int si_idx, int type,
-				 u32 *hash)
+				 unsigned long hash)
 {
 	bool err = si->errata & ENETC_ERR_UCMCSWP;
 
 	if (type == UC) {
-		enetc_port_wr(&si->hw, ENETC_PSIUMHFR0(si_idx, err), *hash);
-		enetc_port_wr(&si->hw, ENETC_PSIUMHFR1(si_idx), *(hash + 1));
+		enetc_port_wr(&si->hw, ENETC_PSIUMHFR0(si_idx, err),
+			      lower_32_bits(hash));
+		enetc_port_wr(&si->hw, ENETC_PSIUMHFR1(si_idx),
+			      upper_32_bits(hash));
 	} else { /* MC */
-		enetc_port_wr(&si->hw, ENETC_PSIMMHFR0(si_idx, err), *hash);
-		enetc_port_wr(&si->hw, ENETC_PSIMMHFR1(si_idx), *(hash + 1));
+		enetc_port_wr(&si->hw, ENETC_PSIMMHFR0(si_idx, err),
+			      lower_32_bits(hash));
+		enetc_port_wr(&si->hw, ENETC_PSIMMHFR1(si_idx),
+			      upper_32_bits(hash));
 	}
 }
 
@@ -182,7 +186,7 @@ static void enetc_sync_mac_filters(struct enetc_pf *pf)
 		if (i == UC)
 			enetc_clear_mac_flt_entry(si, pos);
 
-		enetc_set_mac_ht_flt(si, 0, i, (u32 *)f->mac_hash_table);
+		enetc_set_mac_ht_flt(si, 0, i, *f->mac_hash_table);
 	}
 }
 
-- 
cgit v1.2.3


From 3c85a8b81cc89c712c4f44cb1a4d5547e472fb1f Mon Sep 17 00:00:00 2001
From: Cooper Lees <me@cooperlees.com>
Date: Tue, 23 Mar 2021 20:47:38 -0700
Subject: Add Open Routing Protocol ID to `rtnetlink.h`

- The Open Routing (Open/R) network protocol netlink handler uses ID 99
- Will also add to `/etc/iproute2/rt_protos` once this is accepted
- For more information: https://github.com/facebook/openr
Signed-off-by: From: Cooper Lees <me@cooperlees.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/rtnetlink.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index d35953bc7d53..5888492a5257 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -290,6 +290,7 @@ enum {
 #define RTPROT_MROUTED		17	/* Multicast daemon */
 #define RTPROT_KEEPALIVED	18	/* Keepalived daemon */
 #define RTPROT_BABEL		42	/* Babel daemon */
+#define RTPROT_OPENR		99	/* Open Routing (Open/R) Routes */
 #define RTPROT_BGP		186	/* BGP Routes */
 #define RTPROT_ISIS		187	/* ISIS Routes */
 #define RTPROT_OSPF		188	/* OSPF Routes */
-- 
cgit v1.2.3


From c6fc65f48072c9c6144ea2d145c011317bd03441 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 24 Mar 2021 22:14:15 +0200
Subject: mlxsw: spectrum_router: Add support for resilient nexthop groups

Parse the configuration of resilient nexthop groups to existing mlxsw
data structures. Unlike non-resilient groups, nexthops without a valid
MAC or router interface (RIF) are programmed with a trap action instead
of not being programmed at all.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 26 +++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 75c9fc47cd69..db859c2bd810 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2911,7 +2911,8 @@ struct mlxsw_sp_nexthop_group_info {
 	u16 count;
 	int sum_norm_weight;
 	u8 adj_index_valid:1,
-	   gateway:1; /* routes using the group use a gateway */
+	   gateway:1, /* routes using the group use a gateway */
+	   is_resilient:1;
 	struct mlxsw_sp_nexthop nexthops[0];
 #define nh_rif	nexthops[0].rif
 };
@@ -3905,6 +3906,9 @@ static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
 	if (!removing) {
 		nh->action = MLXSW_SP_NEXTHOP_ACTION_FORWARD;
 		nh->should_offload = 1;
+	} else if (nh->nhgi->is_resilient) {
+		nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP;
+		nh->should_offload = 1;
 	} else {
 		nh->should_offload = 0;
 	}
@@ -4484,6 +4488,15 @@ mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp,
 	if (nh_obj->is_reject)
 		mlxsw_sp_nexthop_obj_blackhole_init(mlxsw_sp, nh);
 
+	/* In a resilient nexthop group, all the nexthops must be written to
+	 * the adjacency table. Even if they do not have a valid neighbour or
+	 * RIF.
+	 */
+	if (nh_grp->nhgi->is_resilient && !nh->should_offload) {
+		nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP;
+		nh->should_offload = 1;
+	}
+
 	return 0;
 
 err_type_init:
@@ -4500,6 +4513,7 @@ static void mlxsw_sp_nexthop_obj_fini(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
 	list_del(&nh->router_list_node);
 	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
+	nh->should_offload = 0;
 }
 
 static int
@@ -4509,6 +4523,7 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
 {
 	struct mlxsw_sp_nexthop_group_info *nhgi;
 	struct mlxsw_sp_nexthop *nh;
+	bool is_resilient = false;
 	unsigned int nhs;
 	int err, i;
 
@@ -4519,6 +4534,10 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
 	case NH_NOTIFIER_INFO_TYPE_GRP:
 		nhs = info->nh_grp->num_nh;
 		break;
+	case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
+		nhs = info->nh_res_table->num_nh_buckets;
+		is_resilient = true;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -4529,6 +4548,7 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
 	nh_grp->nhgi = nhgi;
 	nhgi->nh_grp = nh_grp;
 	nhgi->gateway = mlxsw_sp_nexthop_obj_is_gateway(mlxsw_sp, info);
+	nhgi->is_resilient = is_resilient;
 	nhgi->count = nhs;
 	for (i = 0; i < nhgi->count; i++) {
 		struct nh_notifier_single_info *nh_obj;
@@ -4544,6 +4564,10 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
 			nh_obj = &info->nh_grp->nh_entries[i].nh;
 			weight = info->nh_grp->nh_entries[i].weight;
 			break;
+		case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
+			nh_obj = &info->nh_res_table->nhs[i];
+			weight = 1;
+			break;
 		default:
 			err = -EINVAL;
 			goto err_nexthop_obj_init;
-- 
cgit v1.2.3


From 62b67ff33bee9e1adf408ed3c708fdf3c69b15be Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 24 Mar 2021 22:14:16 +0200
Subject: mlxsw: spectrum_router: Add ability to overwrite adjacency entry only
 when inactive

Allow the driver to instruct the device to only overwrite an adjacency
entry if its activity is cleared. Currently, adjacency entry is always
overwritten, regardless of activity.

This will be used by subsequent patches to prevent replacement of active
nexthop buckets.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_dpipe.c   |  3 +-
 .../net/ethernet/mellanox/mlxsw/spectrum_ipip.c    |  9 ++++--
 .../net/ethernet/mellanox/mlxsw/spectrum_ipip.h    |  3 +-
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 34 +++++++++++++---------
 .../net/ethernet/mellanox/mlxsw/spectrum_router.h  |  2 +-
 5 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
index af2093fc5025..9ff286ba9bf0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
@@ -1196,7 +1196,8 @@ static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable)
 		else
 			mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
 		mlxsw_sp_nexthop_eth_update(mlxsw_sp,
-					    adj_index + adj_hash_index, nh);
+					    adj_index + adj_hash_index, nh,
+					    true);
 	}
 	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
index 6ccca39bae84..23896260720f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
@@ -127,14 +127,17 @@ bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr)
 
 static int
 mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
-				  struct mlxsw_sp_ipip_entry *ipip_entry)
+				  struct mlxsw_sp_ipip_entry *ipip_entry,
+				  bool force)
 {
 	u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
 	__be32 daddr4 = mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev);
 	char ratr_pl[MLXSW_REG_RATR_LEN];
+	enum mlxsw_reg_ratr_op op;
 
-	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
-			    true, MLXSW_REG_RATR_TYPE_IPIP,
+	op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY :
+		     MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY;
+	mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_IPIP,
 			    adj_index, rif_index);
 	mlxsw_reg_ratr_ipip4_entry_pack(ratr_pl, be32_to_cpu(daddr4));
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
index 87bef9880e5e..af7dd19f50e6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
@@ -40,7 +40,8 @@ struct mlxsw_sp_ipip_ops {
 	enum mlxsw_sp_l3proto ul_proto; /* Underlay. */
 
 	int (*nexthop_update)(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
-			      struct mlxsw_sp_ipip_entry *ipip_entry);
+			      struct mlxsw_sp_ipip_entry *ipip_entry,
+			      bool force);
 
 	bool (*can_offload)(const struct mlxsw_sp *mlxsw_sp,
 			    const struct net_device *ol_dev);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index db859c2bd810..63c2c7a84c64 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -3419,16 +3419,19 @@ err_mass_update_vr:
 
 static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp,
 					 u32 adj_index,
-					 struct mlxsw_sp_nexthop *nh)
+					 struct mlxsw_sp_nexthop *nh,
+					 bool force)
 {
 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
 	char ratr_pl[MLXSW_REG_RATR_LEN];
+	enum mlxsw_reg_ratr_op op;
 	u16 rif_index;
 
 	rif_index = nh->rif ? nh->rif->rif_index :
 			      mlxsw_sp->router->lb_rif_index;
-	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
-			    true, MLXSW_REG_RATR_TYPE_ETHERNET,
+	op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY :
+		     MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY;
+	mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_ETHERNET,
 			    adj_index, rif_index);
 	switch (nh->action) {
 	case MLXSW_SP_NEXTHOP_ACTION_FORWARD:
@@ -3456,7 +3459,7 @@ static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp,
 }
 
 int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
-				struct mlxsw_sp_nexthop *nh)
+				struct mlxsw_sp_nexthop *nh, bool force)
 {
 	int i;
 
@@ -3464,7 +3467,7 @@ int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
 		int err;
 
 		err = __mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index + i,
-						    nh);
+						    nh, force);
 		if (err)
 			return err;
 	}
@@ -3474,17 +3477,19 @@ int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
 
 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
 					  u32 adj_index,
-					  struct mlxsw_sp_nexthop *nh)
+					  struct mlxsw_sp_nexthop *nh,
+					  bool force)
 {
 	const struct mlxsw_sp_ipip_ops *ipip_ops;
 
 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
-	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
+	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry,
+					force);
 }
 
 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
 					u32 adj_index,
-					struct mlxsw_sp_nexthop *nh)
+					struct mlxsw_sp_nexthop *nh, bool force)
 {
 	int i;
 
@@ -3492,7 +3497,7 @@ static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
 		int err;
 
 		err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
-						     nh);
+						     nh, force);
 		if (err)
 			return err;
 	}
@@ -3501,7 +3506,7 @@ static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
 }
 
 static int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
-				   struct mlxsw_sp_nexthop *nh)
+				   struct mlxsw_sp_nexthop *nh, bool force)
 {
 	/* When action is discard or trap, the nexthop must be
 	 * programmed as an Ethernet nexthop.
@@ -3509,9 +3514,11 @@ static int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
 	if (nh->type == MLXSW_SP_NEXTHOP_TYPE_ETH ||
 	    nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD ||
 	    nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP)
-		return mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index, nh);
+		return mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index, nh,
+						   force);
 	else
-		return mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index, nh);
+		return mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index, nh,
+						    force);
 }
 
 static int
@@ -3534,7 +3541,8 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
 		if (nh->update || reallocate) {
 			int err = 0;
 
-			err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh);
+			err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh,
+						      true);
 			if (err)
 				return err;
 			nh->update = 0;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index 01fd9a3d5944..3bb2a06359a3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -209,7 +209,7 @@ bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh);
 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
 				 struct mlxsw_sp_nexthop *nh, u64 *p_counter);
 int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
-				struct mlxsw_sp_nexthop *nh);
+				struct mlxsw_sp_nexthop *nh, bool force);
 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
 				    struct mlxsw_sp_nexthop *nh);
 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
-- 
cgit v1.2.3


From 197fdfd107e30a59e96691273b0b175cbf2471b8 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 24 Mar 2021 22:14:17 +0200
Subject: mlxsw: spectrum_router: Pass payload pointer to nexthop update
 function

Have the caller pass a pointer to the payload of the RATR register to
the function updating a single nexthop / adjacency entry.

In a subsequent patch, this will allow the caller to make sure
replacement was successful by querying the state of the adjacency entry
after replacement and comparing with the initial request.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_dpipe.c   |  3 ++-
 .../net/ethernet/mellanox/mlxsw/spectrum_ipip.c    |  3 +--
 .../net/ethernet/mellanox/mlxsw/spectrum_ipip.h    |  2 +-
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 27 ++++++++++++----------
 .../net/ethernet/mellanox/mlxsw/spectrum_router.h  |  3 ++-
 5 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
index 9ff286ba9bf0..1a2fef2a5379 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
@@ -1178,6 +1178,7 @@ out:
 
 static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable)
 {
+	char ratr_pl[MLXSW_REG_RATR_LEN];
 	struct mlxsw_sp *mlxsw_sp = priv;
 	struct mlxsw_sp_nexthop *nh;
 	u32 adj_hash_index = 0;
@@ -1197,7 +1198,7 @@ static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable)
 			mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
 		mlxsw_sp_nexthop_eth_update(mlxsw_sp,
 					    adj_index + adj_hash_index, nh,
-					    true);
+					    true, ratr_pl);
 	}
 	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
index 23896260720f..b8b08a6a1d10 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
@@ -128,11 +128,10 @@ bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr)
 static int
 mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
 				  struct mlxsw_sp_ipip_entry *ipip_entry,
-				  bool force)
+				  bool force, char *ratr_pl)
 {
 	u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
 	__be32 daddr4 = mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev);
-	char ratr_pl[MLXSW_REG_RATR_LEN];
 	enum mlxsw_reg_ratr_op op;
 
 	op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY :
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
index af7dd19f50e6..f0837b42d1d6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
@@ -41,7 +41,7 @@ struct mlxsw_sp_ipip_ops {
 
 	int (*nexthop_update)(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
 			      struct mlxsw_sp_ipip_entry *ipip_entry,
-			      bool force);
+			      bool force, char *ratr_pl);
 
 	bool (*can_offload)(const struct mlxsw_sp *mlxsw_sp,
 			    const struct net_device *ol_dev);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 63c2c7a84c64..02200b183bf7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -3420,10 +3420,9 @@ err_mass_update_vr:
 static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp,
 					 u32 adj_index,
 					 struct mlxsw_sp_nexthop *nh,
-					 bool force)
+					 bool force, char *ratr_pl)
 {
 	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
-	char ratr_pl[MLXSW_REG_RATR_LEN];
 	enum mlxsw_reg_ratr_op op;
 	u16 rif_index;
 
@@ -3459,7 +3458,8 @@ static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp,
 }
 
 int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
-				struct mlxsw_sp_nexthop *nh, bool force)
+				struct mlxsw_sp_nexthop *nh, bool force,
+				char *ratr_pl)
 {
 	int i;
 
@@ -3467,7 +3467,7 @@ int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
 		int err;
 
 		err = __mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index + i,
-						    nh, force);
+						    nh, force, ratr_pl);
 		if (err)
 			return err;
 	}
@@ -3478,18 +3478,19 @@ int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
 					  u32 adj_index,
 					  struct mlxsw_sp_nexthop *nh,
-					  bool force)
+					  bool force, char *ratr_pl)
 {
 	const struct mlxsw_sp_ipip_ops *ipip_ops;
 
 	ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
 	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry,
-					force);
+					force, ratr_pl);
 }
 
 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
 					u32 adj_index,
-					struct mlxsw_sp_nexthop *nh, bool force)
+					struct mlxsw_sp_nexthop *nh, bool force,
+					char *ratr_pl)
 {
 	int i;
 
@@ -3497,7 +3498,7 @@ static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
 		int err;
 
 		err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
-						     nh, force);
+						     nh, force, ratr_pl);
 		if (err)
 			return err;
 	}
@@ -3506,7 +3507,8 @@ static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
 }
 
 static int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
-				   struct mlxsw_sp_nexthop *nh, bool force)
+				   struct mlxsw_sp_nexthop *nh, bool force,
+				   char *ratr_pl)
 {
 	/* When action is discard or trap, the nexthop must be
 	 * programmed as an Ethernet nexthop.
@@ -3515,10 +3517,10 @@ static int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
 	    nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD ||
 	    nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP)
 		return mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index, nh,
-						   force);
+						   force, ratr_pl);
 	else
 		return mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index, nh,
-						    force);
+						    force, ratr_pl);
 }
 
 static int
@@ -3526,6 +3528,7 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
 			      struct mlxsw_sp_nexthop_group_info *nhgi,
 			      bool reallocate)
 {
+	char ratr_pl[MLXSW_REG_RATR_LEN];
 	u32 adj_index = nhgi->adj_index; /* base */
 	struct mlxsw_sp_nexthop *nh;
 	int i;
@@ -3542,7 +3545,7 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
 			int err = 0;
 
 			err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh,
-						      true);
+						      true, ratr_pl);
 			if (err)
 				return err;
 			nh->update = 0;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index 3bb2a06359a3..b85c5f6c2262 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -209,7 +209,8 @@ bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh);
 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
 				 struct mlxsw_sp_nexthop *nh, u64 *p_counter);
 int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
-				struct mlxsw_sp_nexthop *nh, bool force);
+				struct mlxsw_sp_nexthop *nh, bool force,
+				char *ratr_pl);
 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
 				    struct mlxsw_sp_nexthop *nh);
 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
-- 
cgit v1.2.3


From 617a77f044ed7a92bb0c01c939d816f870947d5a Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 24 Mar 2021 22:14:18 +0200
Subject: mlxsw: spectrum_router: Add nexthop bucket replacement support

Replace a single nexthop bucket upon receiving a
'NEXTHOP_EVENT_BUCKET_REPLACE' notification.

When the 'force' parameter is not set, instruct the device to only
overwrite an adjacency entry if its activity is cleared, so as not to
break existing flows using the adjacency entry. The device does not
provide feedback if the replacement was successful in this case, so the
contents of the adjacency entry after the replacement are compared with
the replacement request.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 134 +++++++++++++++++++++
 1 file changed, 134 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 02200b183bf7..d6e91f1f48cc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -4337,6 +4337,8 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
 	}
 }
 
+#define MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL 1000 /* ms */
+
 static int
 mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp,
 				     const struct nh_notifier_single_info *nh,
@@ -4806,6 +4808,134 @@ static void mlxsw_sp_nexthop_obj_del(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp);
 }
 
+static int mlxsw_sp_nexthop_obj_bucket_query(struct mlxsw_sp *mlxsw_sp,
+					     u32 adj_index, char *ratr_pl)
+{
+	MLXSW_REG_ZERO(ratr, ratr_pl);
+	mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ);
+	mlxsw_reg_ratr_adjacency_index_low_set(ratr_pl, adj_index);
+	mlxsw_reg_ratr_adjacency_index_high_set(ratr_pl, adj_index >> 16);
+
+	return mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
+}
+
+static int mlxsw_sp_nexthop_obj_bucket_compare(char *ratr_pl, char *ratr_pl_new)
+{
+	/* Clear the opcode and activity on both the old and new payload as
+	 * they are irrelevant for the comparison.
+	 */
+	mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ);
+	mlxsw_reg_ratr_a_set(ratr_pl, 0);
+	mlxsw_reg_ratr_op_set(ratr_pl_new, MLXSW_REG_RATR_OP_QUERY_READ);
+	mlxsw_reg_ratr_a_set(ratr_pl_new, 0);
+
+	/* If the contents of the adjacency entry are consistent with the
+	 * replacement request, then replacement was successful.
+	 */
+	if (!memcmp(ratr_pl, ratr_pl_new, MLXSW_REG_RATR_LEN))
+		return 0;
+
+	return -EINVAL;
+}
+
+static int
+mlxsw_sp_nexthop_obj_bucket_adj_update(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_nexthop *nh,
+				       struct nh_notifier_info *info)
+{
+	u16 bucket_index = info->nh_res_bucket->bucket_index;
+	struct netlink_ext_ack *extack = info->extack;
+	bool force = info->nh_res_bucket->force;
+	char ratr_pl_new[MLXSW_REG_RATR_LEN];
+	char ratr_pl[MLXSW_REG_RATR_LEN];
+	u32 adj_index;
+	int err;
+
+	/* No point in trying an atomic replacement if the idle timer interval
+	 * is smaller than the interval in which we query and clear activity.
+	 */
+	force = info->nh_res_bucket->idle_timer_ms <
+		MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL;
+
+	adj_index = nh->nhgi->adj_index + bucket_index;
+	err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh, force, ratr_pl);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to overwrite nexthop bucket");
+		return err;
+	}
+
+	if (!force) {
+		err = mlxsw_sp_nexthop_obj_bucket_query(mlxsw_sp, adj_index,
+							ratr_pl_new);
+		if (err) {
+			NL_SET_ERR_MSG_MOD(extack, "Failed to query nexthop bucket state after replacement. State might be inconsistent");
+			return err;
+		}
+
+		err = mlxsw_sp_nexthop_obj_bucket_compare(ratr_pl, ratr_pl_new);
+		if (err) {
+			NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket was not replaced because it was active during replacement");
+			return err;
+		}
+	}
+
+	nh->update = 0;
+	nh->offloaded = 1;
+
+	return 0;
+}
+
+static int mlxsw_sp_nexthop_obj_bucket_replace(struct mlxsw_sp *mlxsw_sp,
+					       struct nh_notifier_info *info)
+{
+	u16 bucket_index = info->nh_res_bucket->bucket_index;
+	struct netlink_ext_ack *extack = info->extack;
+	struct mlxsw_sp_nexthop_group_info *nhgi;
+	struct nh_notifier_single_info *nh_obj;
+	struct mlxsw_sp_nexthop_group *nh_grp;
+	struct mlxsw_sp_nexthop *nh;
+	int err;
+
+	nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id);
+	if (!nh_grp) {
+		NL_SET_ERR_MSG_MOD(extack, "Nexthop group was not found");
+		return -EINVAL;
+	}
+
+	nhgi = nh_grp->nhgi;
+
+	if (bucket_index >= nhgi->count) {
+		NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket index out of range");
+		return -EINVAL;
+	}
+
+	nh = &nhgi->nexthops[bucket_index];
+	mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
+
+	nh_obj = &info->nh_res_bucket->new_nh;
+	err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to initialize nexthop object for nexthop bucket replacement");
+		goto err_nexthop_obj_init;
+	}
+
+	err = mlxsw_sp_nexthop_obj_bucket_adj_update(mlxsw_sp, nh, info);
+	if (err)
+		goto err_nexthop_obj_bucket_adj_update;
+
+	return 0;
+
+err_nexthop_obj_bucket_adj_update:
+	mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh);
+err_nexthop_obj_init:
+	nh_obj = &info->nh_res_bucket->old_nh;
+	mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1);
+	/* The old adjacency entry was not overwritten */
+	nh->update = 0;
+	nh->offloaded = 1;
+	return err;
+}
+
 static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb,
 				      unsigned long event, void *ptr)
 {
@@ -4827,6 +4957,10 @@ static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb,
 	case NEXTHOP_EVENT_DEL:
 		mlxsw_sp_nexthop_obj_del(router->mlxsw_sp, info);
 		break;
+	case NEXTHOP_EVENT_BUCKET_REPLACE:
+		err = mlxsw_sp_nexthop_obj_bucket_replace(router->mlxsw_sp,
+							  info);
+		break;
 	default:
 		break;
 	}
-- 
cgit v1.2.3


From d7761cb30374d5fcd45dd91ec015b9e7f7d5a120 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 24 Mar 2021 22:14:19 +0200
Subject: mlxsw: spectrum_router: Update hardware flags on nexthop buckets

So far, mlxsw only updated hardware flags ('offload' / 'trap') on
nexthop objects. For resilient nexthop groups, these flags need to be
updated on individual nexthop buckets as well.

Update these flags whenever updating the flags of the encapsulating
nexthop object and whenever a nexthop bucket is replaced.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 37 ++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index d6e91f1f48cc..862c8667813b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -3762,10 +3762,30 @@ mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
 		__mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
 }
 
+static void
+mlxsw_sp_nexthop_bucket_offload_refresh(struct mlxsw_sp *mlxsw_sp,
+					const struct mlxsw_sp_nexthop *nh,
+					u16 bucket_index)
+{
+	struct mlxsw_sp_nexthop_group *nh_grp = nh->nhgi->nh_grp;
+	bool offload = false, trap = false;
+
+	if (nh->offloaded) {
+		if (nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP)
+			trap = true;
+		else
+			offload = true;
+	}
+	nexthop_bucket_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
+				    bucket_index, offload, trap);
+}
+
 static void
 mlxsw_sp_nexthop_obj_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
 					   struct mlxsw_sp_nexthop_group *nh_grp)
 {
+	int i;
+
 	/* Do not update the flags if the nexthop group is being destroyed
 	 * since:
 	 * 1. The nexthop objects is being deleted, in which case the flags are
@@ -3779,6 +3799,18 @@ mlxsw_sp_nexthop_obj_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
 
 	nexthop_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
 			     nh_grp->nhgi->adj_index_valid, false);
+
+	/* Update flags of individual nexthop buckets in case of a resilient
+	 * nexthop group.
+	 */
+	if (!nh_grp->nhgi->is_resilient)
+		return;
+
+	for (i = 0; i < nh_grp->nhgi->count; i++) {
+		struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i];
+
+		mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, i);
+	}
 }
 
 static void
@@ -3832,6 +3864,10 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
 			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
 			goto set_trap;
 		}
+		/* Flags of individual nexthop buckets might need to be
+		 * updated.
+		 */
+		mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
 		return 0;
 	}
 	mlxsw_sp_nexthop_group_normalize(nhgi);
@@ -4881,6 +4917,7 @@ mlxsw_sp_nexthop_obj_bucket_adj_update(struct mlxsw_sp *mlxsw_sp,
 
 	nh->update = 0;
 	nh->offloaded = 1;
+	mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, bucket_index);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 75d495b02982f5fa7eeb3fec9d9616bb7ab958bd Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 24 Mar 2021 22:14:20 +0200
Subject: mlxsw: reg: Add Router Adjacency Table Activity Dump Register

The RATRAD register is used to dump and optionally clear activity bits
of router adjacency table entries. Will be used by the next patch to
query and clear the activity of nexthop buckets in a resilient nexthop
group.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 55 +++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index d33c79ad1810..900b4bf5bb5b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -8130,6 +8130,60 @@ mlxsw_reg_rtdp_ipip4_pack(char *payload, u16 irif,
 	mlxsw_reg_rtdp_ipip_expected_gre_key_set(payload, expected_gre_key);
 }
 
+/* RATRAD - Router Adjacency Table Activity Dump Register
+ * ------------------------------------------------------
+ * The RATRAD register is used to dump and optionally clear activity bits of
+ * router adjacency table entries.
+ */
+#define MLXSW_REG_RATRAD_ID 0x8022
+#define MLXSW_REG_RATRAD_LEN 0x210
+
+MLXSW_REG_DEFINE(ratrad, MLXSW_REG_RATRAD_ID, MLXSW_REG_RATRAD_LEN);
+
+enum {
+	/* Read activity */
+	MLXSW_REG_RATRAD_OP_READ_ACTIVITY,
+	/* Read and clear activity */
+	MLXSW_REG_RATRAD_OP_READ_CLEAR_ACTIVITY,
+};
+
+/* reg_ratrad_op
+ * Access: Operation
+ */
+MLXSW_ITEM32(reg, ratrad, op, 0x00, 30, 2);
+
+/* reg_ratrad_ecmp_size
+ * ecmp_size is the amount of sequential entries from adjacency_index. Valid
+ * ranges:
+ * Spectrum-1: 32-64, 512, 1024, 2048, 4096
+ * Spectrum-2/3: 32-128, 256, 512, 1024, 2048, 4096
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ratrad, ecmp_size, 0x00, 0, 13);
+
+/* reg_ratrad_adjacency_index
+ * Index into the adjacency table.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, ratrad, adjacency_index, 0x04, 0, 24);
+
+/* reg_ratrad_activity_vector
+ * Activity bit per adjacency index.
+ * Bits higher than ecmp_size are reserved.
+ * Access: RO
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, ratrad, activity_vector, 0x10, 0x200, 1);
+
+static inline void mlxsw_reg_ratrad_pack(char *payload, u32 adjacency_index,
+					 u16 ecmp_size)
+{
+	MLXSW_REG_ZERO(ratrad, payload);
+	mlxsw_reg_ratrad_op_set(payload,
+				MLXSW_REG_RATRAD_OP_READ_CLEAR_ACTIVITY);
+	mlxsw_reg_ratrad_ecmp_size_set(payload, ecmp_size);
+	mlxsw_reg_ratrad_adjacency_index_set(payload, adjacency_index);
+}
+
 /* RIGR-V2 - Router Interface Group Register Version 2
  * ---------------------------------------------------
  * The RIGR_V2 register is used to add, remove and query egress interface list
@@ -12114,6 +12168,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
 	MLXSW_REG(rtar),
 	MLXSW_REG(ratr),
 	MLXSW_REG(rtdp),
+	MLXSW_REG(ratrad),
 	MLXSW_REG(rdpm),
 	MLXSW_REG(ricnt),
 	MLXSW_REG(rrcr),
-- 
cgit v1.2.3


From debd2b3bf5735ec935f53f01834df6dbec35c8d3 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 24 Mar 2021 22:14:21 +0200
Subject: mlxsw: spectrum_router: Periodically update activity of nexthop
 buckets

The kernel periodically checks the idle time of nexthop buckets to
determine if they are idle and can be re-populated with a new nexthop.

When the resilient nexthop group is offloaded to hardware, the kernel
will not see activity on nexthop buckets unless it is reported from
hardware.

Therefore, periodically (every 1 second) query the hardware for activity
of adjacency entries used as part of a resilient nexthop group and
report it to the nexthop code.

The activity is only queried if resilient nexthop groups are in use. The
delayed work is canceled otherwise.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 100 +++++++++++++++++++++
 .../net/ethernet/mellanox/mlxsw/spectrum_router.h  |   2 +
 2 files changed, 102 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 862c8667813b..1d74341596da 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2913,6 +2913,7 @@ struct mlxsw_sp_nexthop_group_info {
 	u8 adj_index_valid:1,
 	   gateway:1, /* routes using the group use a gateway */
 	   is_resilient:1;
+	struct list_head list; /* member in nh_res_grp_list */
 	struct mlxsw_sp_nexthop nexthops[0];
 #define nh_rif	nexthops[0].rif
 };
@@ -4373,8 +4374,85 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
 	}
 }
 
+static void
+mlxsw_sp_nh_grp_activity_get(struct mlxsw_sp *mlxsw_sp,
+			     const struct mlxsw_sp_nexthop_group *nh_grp,
+			     unsigned long *activity)
+{
+	char *ratrad_pl;
+	int i, err;
+
+	ratrad_pl = kmalloc(MLXSW_REG_RATRAD_LEN, GFP_KERNEL);
+	if (!ratrad_pl)
+		return;
+
+	mlxsw_reg_ratrad_pack(ratrad_pl, nh_grp->nhgi->adj_index,
+			      nh_grp->nhgi->count);
+	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratrad), ratrad_pl);
+	if (err)
+		goto out;
+
+	for (i = 0; i < nh_grp->nhgi->count; i++) {
+		if (!mlxsw_reg_ratrad_activity_vector_get(ratrad_pl, i))
+			continue;
+		bitmap_set(activity, i, 1);
+	}
+
+out:
+	kfree(ratrad_pl);
+}
+
 #define MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL 1000 /* ms */
 
+static void
+mlxsw_sp_nh_grp_activity_update(struct mlxsw_sp *mlxsw_sp,
+				const struct mlxsw_sp_nexthop_group *nh_grp)
+{
+	unsigned long *activity;
+
+	activity = bitmap_zalloc(nh_grp->nhgi->count, GFP_KERNEL);
+	if (!activity)
+		return;
+
+	mlxsw_sp_nh_grp_activity_get(mlxsw_sp, nh_grp, activity);
+	nexthop_res_grp_activity_update(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id,
+					nh_grp->nhgi->count, activity);
+
+	bitmap_free(activity);
+}
+
+static void
+mlxsw_sp_nh_grp_activity_work_schedule(struct mlxsw_sp *mlxsw_sp)
+{
+	unsigned int interval = MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL;
+
+	mlxsw_core_schedule_dw(&mlxsw_sp->router->nh_grp_activity_dw,
+			       msecs_to_jiffies(interval));
+}
+
+static void mlxsw_sp_nh_grp_activity_work(struct work_struct *work)
+{
+	struct mlxsw_sp_nexthop_group_info *nhgi;
+	struct mlxsw_sp_router *router;
+	bool reschedule = false;
+
+	router = container_of(work, struct mlxsw_sp_router,
+			      nh_grp_activity_dw.work);
+
+	mutex_lock(&router->lock);
+
+	list_for_each_entry(nhgi, &router->nh_res_grp_list, list) {
+		mlxsw_sp_nh_grp_activity_update(router->mlxsw_sp, nhgi->nh_grp);
+		reschedule = true;
+	}
+
+	mutex_unlock(&router->lock);
+
+	if (!reschedule)
+		return;
+	mlxsw_sp_nh_grp_activity_work_schedule(router->mlxsw_sp);
+}
+
 static int
 mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp,
 				     const struct nh_notifier_single_info *nh,
@@ -4632,6 +4710,15 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp,
 		goto err_group_refresh;
 	}
 
+	/* Add resilient nexthop groups to a list so that the activity of their
+	 * nexthop buckets will be periodically queried and cleared.
+	 */
+	if (nhgi->is_resilient) {
+		if (list_empty(&mlxsw_sp->router->nh_res_grp_list))
+			mlxsw_sp_nh_grp_activity_work_schedule(mlxsw_sp);
+		list_add(&nhgi->list, &mlxsw_sp->router->nh_res_grp_list);
+	}
+
 	return 0;
 
 err_group_refresh:
@@ -4650,8 +4737,15 @@ mlxsw_sp_nexthop_obj_group_info_fini(struct mlxsw_sp *mlxsw_sp,
 				     struct mlxsw_sp_nexthop_group *nh_grp)
 {
 	struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi;
+	struct mlxsw_sp_router *router = mlxsw_sp->router;
 	int i;
 
+	if (nhgi->is_resilient) {
+		list_del(&nhgi->list);
+		if (list_empty(&mlxsw_sp->router->nh_res_grp_list))
+			cancel_delayed_work(&router->nh_grp_activity_dw);
+	}
+
 	for (i = nhgi->count - 1; i >= 0; i--) {
 		struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i];
 
@@ -9652,6 +9746,10 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		goto err_ll_op_ctx_init;
 
+	INIT_LIST_HEAD(&mlxsw_sp->router->nh_res_grp_list);
+	INIT_DELAYED_WORK(&mlxsw_sp->router->nh_grp_activity_dw,
+			  mlxsw_sp_nh_grp_activity_work);
+
 	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
 	err = __mlxsw_sp_router_init(mlxsw_sp);
 	if (err)
@@ -9775,6 +9873,7 @@ err_ipips_init:
 err_rifs_init:
 	__mlxsw_sp_router_fini(mlxsw_sp);
 err_router_init:
+	cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw);
 	mlxsw_sp_router_ll_op_ctx_fini(router);
 err_ll_op_ctx_init:
 	mlxsw_sp_router_xm_fini(mlxsw_sp);
@@ -9806,6 +9905,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
 	mlxsw_sp_ipips_fini(mlxsw_sp);
 	mlxsw_sp_rifs_fini(mlxsw_sp);
 	__mlxsw_sp_router_fini(mlxsw_sp);
+	cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw);
 	mlxsw_sp_router_ll_op_ctx_fini(mlxsw_sp->router);
 	mlxsw_sp_router_xm_fini(mlxsw_sp);
 	mutex_destroy(&mlxsw_sp->router->lock);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index b85c5f6c2262..be7708a375e1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -80,6 +80,8 @@ struct mlxsw_sp_router {
 	struct mlxsw_sp_router_xm *xm;
 	const struct mlxsw_sp_adj_grp_size_range *adj_grp_size_ranges;
 	size_t adj_grp_size_ranges_count;
+	struct delayed_work nh_grp_activity_dw;
+	struct list_head nh_res_grp_list;
 };
 
 struct mlxsw_sp_fib_entry_priv {
-- 
cgit v1.2.3


From 03490a8239156933366f5595250fe3dc5d0e18aa Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 24 Mar 2021 22:14:22 +0200
Subject: mlxsw: spectrum_router: Enable resilient nexthop groups to be
 programmed

Now that mlxsw supports resilient nexthop groups, allow them to be
programmed after validating that their configuration conforms to the
device's limitations (e.g., number of buckets is within predefined
range).

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 86 +++++++++++++++++++++-
 1 file changed, 85 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 1d74341596da..6ccaa194733b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -4519,11 +4519,86 @@ mlxsw_sp_nexthop_obj_group_validate(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
+static int
+mlxsw_sp_nexthop_obj_res_group_size_validate(struct mlxsw_sp *mlxsw_sp,
+					     const struct nh_notifier_res_table_info *nh_res_table,
+					     struct netlink_ext_ack *extack)
+{
+	unsigned int alloc_size;
+	bool valid_size = false;
+	int err, i;
+
+	if (nh_res_table->num_nh_buckets < 32) {
+		NL_SET_ERR_MSG_MOD(extack, "Minimum number of buckets is 32");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) {
+		const struct mlxsw_sp_adj_grp_size_range *size_range;
+
+		size_range = &mlxsw_sp->router->adj_grp_size_ranges[i];
+
+		if (nh_res_table->num_nh_buckets >= size_range->start &&
+		    nh_res_table->num_nh_buckets <= size_range->end) {
+			valid_size = true;
+			break;
+		}
+	}
+
+	if (!valid_size) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid number of buckets");
+		return -EINVAL;
+	}
+
+	err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
+					      MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
+					      nh_res_table->num_nh_buckets,
+					      &alloc_size);
+	if (err || nh_res_table->num_nh_buckets != alloc_size) {
+		NL_SET_ERR_MSG_MOD(extack, "Number of buckets does not fit allocation size of any KVDL partition");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+mlxsw_sp_nexthop_obj_res_group_validate(struct mlxsw_sp *mlxsw_sp,
+					const struct nh_notifier_res_table_info *nh_res_table,
+					struct netlink_ext_ack *extack)
+{
+	int err;
+	u16 i;
+
+	err = mlxsw_sp_nexthop_obj_res_group_size_validate(mlxsw_sp,
+							   nh_res_table,
+							   extack);
+	if (err)
+		return err;
+
+	for (i = 0; i < nh_res_table->num_nh_buckets; i++) {
+		const struct nh_notifier_single_info *nh;
+		int err;
+
+		nh = &nh_res_table->nhs[i];
+		err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
+								extack);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int mlxsw_sp_nexthop_obj_validate(struct mlxsw_sp *mlxsw_sp,
 					 unsigned long event,
 					 struct nh_notifier_info *info)
 {
-	if (event != NEXTHOP_EVENT_REPLACE)
+	struct nh_notifier_single_info *nh;
+
+	if (event != NEXTHOP_EVENT_REPLACE &&
+	    event != NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE &&
+	    event != NEXTHOP_EVENT_BUCKET_REPLACE)
 		return 0;
 
 	switch (info->type) {
@@ -4534,6 +4609,14 @@ static int mlxsw_sp_nexthop_obj_validate(struct mlxsw_sp *mlxsw_sp,
 		return mlxsw_sp_nexthop_obj_group_validate(mlxsw_sp,
 							   info->nh_grp,
 							   info->extack);
+	case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
+		return mlxsw_sp_nexthop_obj_res_group_validate(mlxsw_sp,
+							       info->nh_res_table,
+							       info->extack);
+	case NH_NOTIFIER_INFO_TYPE_RES_BUCKET:
+		nh = &info->nh_res_bucket->new_nh;
+		return mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh,
+								 info->extack);
 	default:
 		NL_SET_ERR_MSG_MOD(info->extack, "Unsupported nexthop type");
 		return -EOPNOTSUPP;
@@ -4551,6 +4634,7 @@ static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp,
 		return info->nh->gw_family || info->nh->is_reject ||
 		       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
 	case NH_NOTIFIER_INFO_TYPE_GRP:
+	case NH_NOTIFIER_INFO_TYPE_RES_TABLE:
 		/* Already validated earlier. */
 		return true;
 	default:
-- 
cgit v1.2.3


From 861584724c44e63bfb684090c70ade660dae6c69 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 24 Mar 2021 22:14:23 +0200
Subject: selftests: mlxsw: Test unresolved neigh trap with resilient nexthop
 groups

The number of nexthop buckets in a resilient nexthop group never
changes, so when the gateway address of a nexthop cannot be resolved,
the nexthop buckets are programmed to trap packets to the CPU in order
to trigger resolution. For example:

 # ip nexthop add id 1 via 198.51.100.1 dev swp3
 # ip nexthop add id 10 group 1 type resilient buckets 32
 # ip nexthop bucket get id 10 index 0
 id 10 index 0 idle_time 1.44 nhid 1 trap

Where 198.51.100.1 is a made-up IP.

Test that in this case packets are indeed trapped to the CPU via the
unresolved neigh trap.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/mlxsw/devlink_trap_l3_exceptions.sh        | 31 ++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
index 1fedfc9da434..42d44e27802c 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
@@ -446,6 +446,35 @@ __invalid_nexthop_test()
 	log_test "Unresolved neigh: nexthop does not exist: $desc"
 }
 
+__invalid_nexthop_bucket_test()
+{
+	local desc=$1; shift
+	local dip=$1; shift
+	local via_add=$1; shift
+	local trap_name="unresolved_neigh"
+
+	RET=0
+
+	# Check that route to nexthop that does not exist triggers
+	# unresolved_neigh
+	ip nexthop add id 1 via $via_add dev $rp2
+	ip nexthop add id 10 group 1 type resilient buckets 32
+	ip route add $dip nhid 10
+
+	t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+	ping_do $h1 $dip
+	t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+
+	if [[ $t0_packets -eq $t1_packets ]]; then
+		check_err 1 "Trap counter did not increase"
+	fi
+
+	ip route del $dip nhid 10
+	ip nexthop del id 10
+	ip nexthop del id 1
+	log_test "Unresolved neigh: nexthop bucket does not exist: $desc"
+}
+
 unresolved_neigh_test()
 {
 	__host_miss_test "IPv4" 198.51.100.1
@@ -453,6 +482,8 @@ unresolved_neigh_test()
 	__invalid_nexthop_test "IPv4" 198.51.100.1 198.51.100.3 24 198.51.100.4
 	__invalid_nexthop_test "IPv6" 2001:db8:2::1 2001:db8:2::3 64 \
 		2001:db8:2::4
+	__invalid_nexthop_bucket_test "IPv4" 198.51.100.1 198.51.100.4
+	__invalid_nexthop_bucket_test "IPv6" 2001:db8:2::1 2001:db8:2::4
 }
 
 vrf_without_routes_create()
-- 
cgit v1.2.3


From ffd3e9b07b9ee3242fa5f5bc76385b6a0e69437d Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 24 Mar 2021 22:14:24 +0200
Subject: selftests: mlxsw: Add resilient nexthop groups configuration tests

Test that unsupported resilient nexthop group configurations are
rejected and that offload / trap indication is correctly set on nexthop
buckets in a resilient group.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/mlxsw/rtnetlink.sh       | 82 ++++++++++++++++++++++
 tools/testing/selftests/net/forwarding/lib.sh      |  5 ++
 2 files changed, 87 insertions(+)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
index ed346da5d3cb..a217f9f6775b 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
@@ -33,6 +33,7 @@ ALL_TESTS="
 	nexthop_obj_invalid_test
 	nexthop_obj_offload_test
 	nexthop_obj_group_offload_test
+	nexthop_obj_bucket_offload_test
 	nexthop_obj_blackhole_offload_test
 	nexthop_obj_route_offload_test
 	devlink_reload_test
@@ -739,11 +740,28 @@ nexthop_obj_invalid_test()
 
 	ip nexthop add id 1 dev $swp1
 	ip nexthop add id 2 dev $swp1
+	ip nexthop add id 3 via 192.0.2.3 dev $swp1
 	ip nexthop add id 10 group 1/2
 	check_fail $? "managed to configure a nexthop group with device-only nexthops when should not"
 
+	ip nexthop add id 10 group 3 type resilient buckets 7
+	check_fail $? "managed to configure a too small resilient nexthop group when should not"
+
+	ip nexthop add id 10 group 3 type resilient buckets 129
+	check_fail $? "managed to configure a resilient nexthop group with invalid number of buckets when should not"
+
+	ip nexthop add id 10 group 1/2 type resilient buckets 32
+	check_fail $? "managed to configure a resilient nexthop group with device-only nexthops when should not"
+
+	ip nexthop add id 10 group 3 type resilient buckets 32
+	check_err $? "failed to configure a valid resilient nexthop group"
+	ip nexthop replace id 3 dev $swp1
+	check_fail $? "managed to populate a nexthop bucket with a device-only nexthop when should not"
+
 	log_test "nexthop objects - invalid configurations"
 
+	ip nexthop del id 10
+	ip nexthop del id 3
 	ip nexthop del id 2
 	ip nexthop del id 1
 
@@ -858,6 +876,70 @@ nexthop_obj_group_offload_test()
 	simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
 }
 
+nexthop_obj_bucket_offload_test()
+{
+	# Test offload indication of nexthop buckets
+	RET=0
+
+	simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64
+	simple_if_init $swp2
+	setup_wait
+
+	ip nexthop add id 1 via 192.0.2.2 dev $swp1
+	ip nexthop add id 2 via 2001:db8:1::2 dev $swp1
+	ip nexthop add id 10 group 1/2 type resilient buckets 32 idle_timer 0
+	ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \
+		dev $swp1
+	ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud reachable \
+		dev $swp1
+	ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \
+		dev $swp1
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop bucket show nhid 1
+	check_err $? "IPv4 nexthop buckets not marked as offloaded when should"
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop bucket show nhid 2
+	check_err $? "IPv6 nexthop buckets not marked as offloaded when should"
+
+	# Invalidate nexthop id 1
+	ip neigh replace 192.0.2.2 nud failed dev $swp1
+	busywait "$TIMEOUT" wait_for_trap \
+		ip nexthop bucket show nhid 1
+	check_err $? "IPv4 nexthop buckets not marked with trap when should"
+
+	# Invalidate nexthop id 2
+	ip neigh replace 2001:db8:1::2 nud failed dev $swp1
+	busywait "$TIMEOUT" wait_for_trap \
+		ip nexthop bucket show nhid 2
+	check_err $? "IPv6 nexthop buckets not marked with trap when should"
+
+	# Revalidate nexthop id 1 by changing its configuration
+	ip nexthop replace id 1 via 192.0.2.3 dev $swp1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop bucket show nhid 1
+	check_err $? "nexthop bucket not marked as offloaded after revalidating nexthop"
+
+	# Revalidate nexthop id 2 by changing its neighbour
+	ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \
+		dev $swp1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop bucket show nhid 2
+	check_err $? "nexthop bucket not marked as offloaded after revalidating neighbour"
+
+	log_test "nexthop bucket offload indication"
+
+	ip neigh del 2001:db8:1::2 dev $swp1
+	ip neigh del 192.0.2.3 dev $swp1
+	ip neigh del 192.0.2.2 dev $swp1
+	ip nexthop del id 10
+	ip nexthop del id 2
+	ip nexthop del id 1
+
+	simple_if_fini $swp2
+	simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
+}
+
 nexthop_obj_blackhole_offload_test()
 {
 	# Test offload indication of blackhole nexthop objects
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index be71012b8fc5..05c05e02bade 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -353,6 +353,11 @@ wait_for_offload()
 	"$@" | grep -q offload
 }
 
+wait_for_trap()
+{
+	"$@" | grep -q trap
+}
+
 until_counter_is()
 {
 	local expr=$1; shift
-- 
cgit v1.2.3


From aa6dd211e4b1dde9d5dc25d699d35f789ae7eeba Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 24 Mar 2021 14:53:37 -0700
Subject: inet: use bigger hash table for IP ID generation

In commit 73f156a6e8c1 ("inetpeer: get rid of ip_id_count")
I used a very small hash table that could be abused
by patient attackers to reveal sensitive information.

Switch to a dynamic sizing, depending on RAM size.

Typical big hosts will now use 128x more storage (2 MB)
to get a similar increase in security and reduction
of hash collisions.

As a bonus, use of alloc_large_system_hash() spreads
allocated memory among all NUMA nodes.

Fixes: 73f156a6e8c1 ("inetpeer: get rid of ip_id_count")
Reported-by: Amit Klein <aksecurity@gmail.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 42 ++++++++++++++++++++++++++++--------------
 1 file changed, 28 insertions(+), 14 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 0470442ff61d..ea916df1bbf5 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -66,6 +66,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <linux/memblock.h>
 #include <linux/string.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
@@ -452,8 +453,10 @@ static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
 	__ipv4_confirm_neigh(dev, *(__force u32 *)pkey);
 }
 
-#define IP_IDENTS_SZ 2048u
-
+/* Hash tables of size 2048..262144 depending on RAM size.
+ * Each bucket uses 8 bytes.
+ */
+static u32 ip_idents_mask __read_mostly;
 static atomic_t *ip_idents __read_mostly;
 static u32 *ip_tstamps __read_mostly;
 
@@ -463,12 +466,16 @@ static u32 *ip_tstamps __read_mostly;
  */
 u32 ip_idents_reserve(u32 hash, int segs)
 {
-	u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
-	atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
-	u32 old = READ_ONCE(*p_tstamp);
-	u32 now = (u32)jiffies;
+	u32 bucket, old, now = (u32)jiffies;
+	atomic_t *p_id;
+	u32 *p_tstamp;
 	u32 delta = 0;
 
+	bucket = hash & ip_idents_mask;
+	p_tstamp = ip_tstamps + bucket;
+	p_id = ip_idents + bucket;
+	old = READ_ONCE(*p_tstamp);
+
 	if (old != now && cmpxchg(p_tstamp, old, now) == old)
 		delta = prandom_u32_max(now - old);
 
@@ -3557,18 +3564,25 @@ struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
 
 int __init ip_rt_init(void)
 {
+	void *idents_hash;
 	int cpu;
 
-	ip_idents = kmalloc_array(IP_IDENTS_SZ, sizeof(*ip_idents),
-				  GFP_KERNEL);
-	if (!ip_idents)
-		panic("IP: failed to allocate ip_idents\n");
+	/* For modern hosts, this will use 2 MB of memory */
+	idents_hash = alloc_large_system_hash("IP idents",
+					      sizeof(*ip_idents) + sizeof(*ip_tstamps),
+					      0,
+					      16, /* one bucket per 64 KB */
+					      HASH_ZERO,
+					      NULL,
+					      &ip_idents_mask,
+					      2048,
+					      256*1024);
+
+	ip_idents = idents_hash;
 
-	prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
+	prandom_bytes(ip_idents, (ip_idents_mask + 1) * sizeof(*ip_idents));
 
-	ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL);
-	if (!ip_tstamps)
-		panic("IP: failed to allocate ip_tstamps\n");
+	ip_tstamps = idents_hash + (ip_idents_mask + 1) * sizeof(*ip_idents);
 
 	for_each_possible_cpu(cpu) {
 		struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
-- 
cgit v1.2.3


From d1c5688087a0904606a77ae4803f13777f8dd7d5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 24 Mar 2021 15:01:38 -0700
Subject: tcp_metrics: tcpm_hash_bucket is strictly local

After commit 098a697b497e ("tcp_metrics: Use a single hash table
for all network namespaces."), tcpm_hash_bucket is local to
net/ipv4/tcp_metrics.c

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 70a2a085dd1a..9e3cb2722b80 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -11,7 +11,6 @@
 #include <linux/rcupdate.h>
 #include <linux/siphash.h>
 
-struct tcpm_hash_bucket;
 struct ctl_table_header;
 struct ipv4_devconf;
 struct fib_rules_ops;
-- 
cgit v1.2.3


From d7f3087b396d8aa4b4751b61fa8cbab82113bd59 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Wed, 24 Mar 2021 08:15:23 -0500
Subject: net: ipa: reduce IPA version assumptions

Modify conditional tests throughout the IPA code so they do not
assume that IPA v3.5.1 is the oldest version supported.  Also remove
assumptions that IPA v4.5 is the newest version of IPA supported.

Augment versions in comments with "+", to be clearer that the
comment applies to a version and subsequent versions.  (E.g.,
"present for IPA v4.2+" instead of just "present for v4.2".)

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/gsi.c          |  8 ++++----
 drivers/net/ipa/ipa_cmd.c      | 26 +++++++++++++++-----------
 drivers/net/ipa/ipa_endpoint.c | 25 ++++++++++++++-----------
 drivers/net/ipa/ipa_main.c     |  6 +++---
 drivers/net/ipa/ipa_qmi.c      |  2 +-
 5 files changed, 37 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
index b6355827bf90..7f2a8fce5e0d 100644
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -827,14 +827,14 @@ static void gsi_channel_program(struct gsi_channel *channel, bool doorbell)
 
 	/* Max prefetch is 1 segment (do not set MAX_PREFETCH_FMASK) */
 
-	/* We enable the doorbell engine for IPA v3.5.1 */
-	if (gsi->version == IPA_VERSION_3_5_1 && doorbell)
+	/* No need to use the doorbell engine starting at IPA v4.0 */
+	if (gsi->version < IPA_VERSION_4_0 && doorbell)
 		val |= USE_DB_ENG_FMASK;
 
 	/* v4.0 introduces an escape buffer for prefetch.  We use it
 	 * on all but the AP command channel.
 	 */
-	if (gsi->version != IPA_VERSION_3_5_1 && !channel->command) {
+	if (gsi->version >= IPA_VERSION_4_0 && !channel->command) {
 		/* If not otherwise set, prefetch buffers are used */
 		if (gsi->version < IPA_VERSION_4_5)
 			val |= USE_ESCAPE_BUF_ONLY_FMASK;
@@ -973,7 +973,7 @@ void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool doorbell)
 
 	gsi_channel_reset_command(channel);
 	/* Due to a hardware quirk we may need to reset RX channels twice. */
-	if (gsi->version == IPA_VERSION_3_5_1 && !channel->toward_ipa)
+	if (gsi->version < IPA_VERSION_4_0 && !channel->toward_ipa)
 		gsi_channel_reset_command(channel);
 
 	gsi_channel_program(channel, doorbell);
diff --git a/drivers/net/ipa/ipa_cmd.c b/drivers/net/ipa/ipa_cmd.c
index 35e35852c25c..a0be25f1f442 100644
--- a/drivers/net/ipa/ipa_cmd.c
+++ b/drivers/net/ipa/ipa_cmd.c
@@ -71,13 +71,12 @@ struct ipa_cmd_hw_hdr_init_local {
 
 /* IPA_CMD_REGISTER_WRITE */
 
-/* For IPA v4.0+, this opcode gets modified with pipeline clear options */
-
+/* For IPA v4.0+, the pipeline clear options are encoded in the opcode */
 #define REGISTER_WRITE_OPCODE_SKIP_CLEAR_FMASK		GENMASK(8, 8)
 #define REGISTER_WRITE_OPCODE_CLEAR_OPTION_FMASK	GENMASK(10, 9)
 
 struct ipa_cmd_register_write {
-	__le16 flags;		/* Unused/reserved for IPA v3.5.1 */
+	__le16 flags;		/* Unused/reserved prior to IPA v4.0 */
 	__le16 offset;
 	__le32 value;
 	__le32 value_mask;
@@ -85,12 +84,12 @@ struct ipa_cmd_register_write {
 };
 
 /* Field masks for ipa_cmd_register_write structure fields */
-/* The next field is present for IPA v4.0 and above */
+/* The next field is present for IPA v4.0+ */
 #define REGISTER_WRITE_FLAGS_OFFSET_HIGH_FMASK		GENMASK(14, 11)
-/* The next field is present for IPA v3.5.1 only */
+/* The next field is not present for IPA v4.0+ */
 #define REGISTER_WRITE_FLAGS_SKIP_CLEAR_FMASK		GENMASK(15, 15)
 
-/* The next field and its values are present for IPA v3.5.1 only */
+/* The next field and its values are not present for IPA v4.0+ */
 #define REGISTER_WRITE_CLEAR_OPTIONS_FMASK		GENMASK(1, 0)
 
 /* IPA_CMD_IP_PACKET_INIT */
@@ -123,7 +122,7 @@ struct ipa_cmd_hw_dma_mem_mem {
 
 /* Field masks for ipa_cmd_hw_dma_mem_mem structure fields */
 #define DMA_SHARED_MEM_FLAGS_DIRECTION_FMASK		GENMASK(0, 0)
-/* The next two fields are present for IPA v3.5.1 only. */
+/* The next two fields are not present for IPA v4.0+ */
 #define DMA_SHARED_MEM_FLAGS_SKIP_CLEAR_FMASK		GENMASK(1, 1)
 #define DMA_SHARED_MEM_FLAGS_CLEAR_OPTIONS_FMASK	GENMASK(3, 2)
 
@@ -237,11 +236,12 @@ static bool ipa_cmd_register_write_offset_valid(struct ipa *ipa,
 	u32 bit_count;
 
 	/* The maximum offset in a register_write immediate command depends
-	 * on the version of IPA.  IPA v3.5.1 supports a 16 bit offset, but
-	 * newer versions allow some additional high-order bits.
+	 * on the version of IPA.  A 16 bit offset is always supported,
+	 * but starting with IPA v4.0 some additional high-order bits are
+	 * allowed.
 	 */
 	bit_count = BITS_PER_BYTE * sizeof(payload->offset);
-	if (ipa->version != IPA_VERSION_3_5_1)
+	if (ipa->version >= IPA_VERSION_4_0)
 		bit_count += hweight32(REGISTER_WRITE_FLAGS_OFFSET_HIGH_FMASK);
 	BUILD_BUG_ON(bit_count > 32);
 	offset_max = ~0U >> (32 - bit_count);
@@ -440,7 +440,11 @@ void ipa_cmd_register_write_add(struct gsi_trans *trans, u32 offset, u32 value,
 	/* pipeline_clear_src_grp is not used */
 	clear_option = clear_full ? pipeline_clear_full : pipeline_clear_hps;
 
-	if (ipa->version != IPA_VERSION_3_5_1) {
+	/* IPA v4.0+ represents the pipeline clear options in the opcode.  It
+	 * also supports a larger offset by encoding additional high-order
+	 * bits in the payload flags field.
+	 */
+	if (ipa->version >= IPA_VERSION_4_0) {
 		u16 offset_high;
 		u32 val;
 
diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index 88310d358557..5f93bd60c758 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -266,7 +266,7 @@ ipa_endpoint_init_ctrl(struct ipa_endpoint *endpoint, bool suspend_delay)
 	 * if (endpoint->toward_ipa)
 	 * 	assert(ipa->version != IPA_VERSION_4.2);
 	 * else
-	 * 	assert(ipa->version == IPA_VERSION_3_5_1);
+	 *	assert(ipa->version < IPA_VERSION_4_0);
 	 */
 	mask = endpoint->toward_ipa ? ENDP_DELAY_FMASK : ENDP_SUSPEND_FMASK;
 
@@ -347,7 +347,7 @@ ipa_endpoint_program_suspend(struct ipa_endpoint *endpoint, bool enable)
 {
 	bool suspended;
 
-	if (endpoint->ipa->version != IPA_VERSION_3_5_1)
+	if (endpoint->ipa->version >= IPA_VERSION_4_0)
 		return enable;	/* For IPA v4.0+, no change made */
 
 	/* assert(!endpoint->toward_ipa); */
@@ -515,7 +515,7 @@ static void ipa_endpoint_init_hdr(struct ipa_endpoint *endpoint)
 			/* Where IPA will write the length */
 			offset = offsetof(struct rmnet_map_header, pkt_len);
 			/* Upper bits are stored in HDR_EXT with IPA v4.5 */
-			if (version == IPA_VERSION_4_5)
+			if (version >= IPA_VERSION_4_5)
 				offset &= field_mask(HDR_OFST_PKT_SIZE_FMASK);
 
 			val |= HDR_OFST_PKT_SIZE_VALID_FMASK;
@@ -562,7 +562,7 @@ static void ipa_endpoint_init_hdr_ext(struct ipa_endpoint *endpoint)
 	/* IPA v4.5 adds some most-significant bits to a few fields,
 	 * two of which are defined in the HDR (not HDR_EXT) register.
 	 */
-	if (ipa->version == IPA_VERSION_4_5) {
+	if (ipa->version >= IPA_VERSION_4_5) {
 		/* HDR_TOTAL_LEN_OR_PAD_OFFSET is 0, so MSB is 0 */
 		if (endpoint->data->qmap && !endpoint->toward_ipa) {
 			u32 offset;
@@ -776,7 +776,7 @@ static u32 hol_block_timer_val(struct ipa *ipa, u32 microseconds)
 	if (!microseconds)
 		return 0;	/* Nothing to compute if timer period is 0 */
 
-	if (ipa->version == IPA_VERSION_4_5)
+	if (ipa->version >= IPA_VERSION_4_5)
 		return hol_block_timer_qtime_val(ipa, microseconds);
 
 	/* Use 64 bit arithmetic to avoid overflow... */
@@ -1468,8 +1468,7 @@ static void ipa_endpoint_reset(struct ipa_endpoint *endpoint)
 	 * is active, we need to handle things specially to recover.
 	 * All other cases just need to reset the underlying GSI channel.
 	 */
-	special = ipa->version == IPA_VERSION_3_5_1 &&
-			!endpoint->toward_ipa &&
+	special = ipa->version < IPA_VERSION_4_0 && !endpoint->toward_ipa &&
 			endpoint->data->aggregation;
 	if (special && ipa_endpoint_aggr_active(endpoint))
 		ret = ipa_endpoint_reset_rx_aggr(endpoint);
@@ -1567,8 +1566,10 @@ void ipa_endpoint_suspend_one(struct ipa_endpoint *endpoint)
 		(void)ipa_endpoint_program_suspend(endpoint, true);
 	}
 
-	/* IPA v3.5.1 doesn't use channel stop for suspend */
-	stop_channel = endpoint->ipa->version != IPA_VERSION_3_5_1;
+	/* Starting with IPA v4.0, endpoints are suspended by stopping the
+	 * underlying GSI channel rather than using endpoint suspend mode.
+	 */
+	stop_channel = endpoint->ipa->version >= IPA_VERSION_4_0;
 	ret = gsi_channel_suspend(gsi, endpoint->channel_id, stop_channel);
 	if (ret)
 		dev_err(dev, "error %d suspending channel %u\n", ret,
@@ -1588,8 +1589,10 @@ void ipa_endpoint_resume_one(struct ipa_endpoint *endpoint)
 	if (!endpoint->toward_ipa)
 		(void)ipa_endpoint_program_suspend(endpoint, false);
 
-	/* IPA v3.5.1 doesn't use channel start for resume */
-	start_channel = endpoint->ipa->version != IPA_VERSION_3_5_1;
+	/* Starting with IPA v4.0, the underlying GSI channel must be
+	 * restarted for resume.
+	 */
+	start_channel = endpoint->ipa->version >= IPA_VERSION_4_0;
 	ret = gsi_channel_resume(gsi, endpoint->channel_id, start_channel);
 	if (ret)
 		dev_err(dev, "error %d resuming channel %u\n", ret,
diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index 64b92dfdd3f5..62d82d48aed6 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -227,8 +227,8 @@ static void ipa_hardware_config_comp(struct ipa *ipa)
 {
 	u32 val;
 
-	/* Nothing to configure for IPA v3.5.1 */
-	if (ipa->version == IPA_VERSION_3_5_1)
+	/* Nothing to configure prior to IPA v4.0 */
+	if (ipa->version < IPA_VERSION_4_0)
 		return;
 
 	val = ioread32(ipa->reg_virt + IPA_REG_COMP_CFG_OFFSET);
@@ -388,7 +388,7 @@ static void ipa_hardware_config(struct ipa *ipa, const struct ipa_data *data)
 	}
 
 	/* Implement some hardware workarounds */
-	if (version != IPA_VERSION_3_5_1 && version < IPA_VERSION_4_5) {
+	if (version >= IPA_VERSION_4_0 && version < IPA_VERSION_4_5) {
 		/* Enable open global clocks (not needed for IPA v4.5) */
 		val = GLOBAL_FMASK;
 		val |= GLOBAL_2X_CLK_FMASK;
diff --git a/drivers/net/ipa/ipa_qmi.c b/drivers/net/ipa/ipa_qmi.c
index af8666b89b37..7b833e92a0ac 100644
--- a/drivers/net/ipa/ipa_qmi.c
+++ b/drivers/net/ipa/ipa_qmi.c
@@ -377,7 +377,7 @@ init_modem_driver_req(struct ipa_qmi *ipa_qmi)
 
 	/* None of the stats fields are valid (IPA v4.0 and above) */
 
-	if (ipa->version != IPA_VERSION_3_5_1) {
+	if (ipa->version >= IPA_VERSION_4_0) {
 		mem = &ipa->mem[IPA_MEM_STATS_QUOTA_MODEM];
 		if (mem->size) {
 			req.hw_stats_quota_base_addr_valid = 1;
-- 
cgit v1.2.3


From eb09457c9d33b6d270fea099840cda0bb38d4e26 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Wed, 24 Mar 2021 08:15:24 -0500
Subject: net: ipa: update version definitions

Add IPA version definitions for all IPA v3.x and v4.x.  Fix the GSI
version associated with IPA version 4.1.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_version.h | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ipa/ipa_version.h b/drivers/net/ipa/ipa_version.h
index 2944e2a89023..ee2b3d02f3cd 100644
--- a/drivers/net/ipa/ipa_version.h
+++ b/drivers/net/ipa/ipa_version.h
@@ -8,17 +8,32 @@
 
 /**
  * enum ipa_version
+ * @IPA_VERSION_3_0:	IPA version 3.0/GSI version 1.0
+ * @IPA_VERSION_3_1:	IPA version 3.1/GSI version 1.1
+ * @IPA_VERSION_3_5:	IPA version 3.5/GSI version 1.2
+ * @IPA_VERSION_3_5_1:	IPA version 3.5.1/GSI version 1.3
+ * @IPA_VERSION_4_0:	IPA version 4.0/GSI version 2.0
+ * @IPA_VERSION_4_1:	IPA version 4.1/GSI version 2.0
+ * @IPA_VERSION_4_2:	IPA version 4.2/GSI version 2.2
+ * @IPA_VERSION_4_5:	IPA version 4.5/GSI version 2.5
+ * @IPA_VERSION_4_7:	IPA version 4.7/GSI version 2.7
+ * @IPA_VERSION_4_9:	IPA version 4.9/GSI version 2.9
+ * @IPA_VERSION_4_11:	IPA version 4.11/GSI version 2.11 (2.1.1)
  *
  * Defines the version of IPA (and GSI) hardware present on the platform.
- * It seems this might be better defined elsewhere, but having it here gets
- * it where it's needed.
  */
 enum ipa_version {
-	IPA_VERSION_3_5_1,	/* GSI version 1.3.0 */
-	IPA_VERSION_4_0,	/* GSI version 2.0 */
-	IPA_VERSION_4_1,	/* GSI version 2.1 */
-	IPA_VERSION_4_2,	/* GSI version 2.2 */
-	IPA_VERSION_4_5,	/* GSI version 2.5 */
+	IPA_VERSION_3_0,
+	IPA_VERSION_3_1,
+	IPA_VERSION_3_5,
+	IPA_VERSION_3_5_1,
+	IPA_VERSION_4_0,
+	IPA_VERSION_4_1,
+	IPA_VERSION_4_2,
+	IPA_VERSION_4_5,
+	IPA_VERSION_4_7,
+	IPA_VERSION_4_9,
+	IPA_VERSION_4_11,
 };
 
 #endif /* _IPA_VERSION_H_ */
-- 
cgit v1.2.3


From 647a05f3ae98ba98b87dc1177a54d205fbaa1b66 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Wed, 24 Mar 2021 08:15:25 -0500
Subject: net: ipa: define the ENDP_INIT_NAT register

Define the ENDP_INIT_NAT register for setting up the NAT
configuration for an endpoint.  We aren't using NAT at this
time, so explicitly set the type to BYPASS for all endpoints.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_endpoint.c | 17 ++++++++++++++++-
 drivers/net/ipa/ipa_reg.h      | 14 +++++++++++++-
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index 5f93bd60c758..38e83cd467b5 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 /* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved.
- * Copyright (C) 2019-2020 Linaro Ltd.
+ * Copyright (C) 2019-2021 Linaro Ltd.
  */
 
 #include <linux/types.h>
@@ -468,6 +468,20 @@ static void ipa_endpoint_init_cfg(struct ipa_endpoint *endpoint)
 	iowrite32(val, endpoint->ipa->reg_virt + offset);
 }
 
+static void ipa_endpoint_init_nat(struct ipa_endpoint *endpoint)
+{
+	u32 offset;
+	u32 val;
+
+	if (!endpoint->toward_ipa)
+		return;
+
+	offset = IPA_REG_ENDP_INIT_NAT_N_OFFSET(endpoint->endpoint_id);
+	val = u32_encode_bits(IPA_NAT_BYPASS, NAT_EN_FMASK);
+
+	iowrite32(val, endpoint->ipa->reg_virt + offset);
+}
+
 /**
  * ipa_endpoint_init_hdr() - Initialize HDR endpoint configuration register
  * @endpoint:	Endpoint pointer
@@ -1488,6 +1502,7 @@ static void ipa_endpoint_program(struct ipa_endpoint *endpoint)
 	else
 		(void)ipa_endpoint_program_suspend(endpoint, false);
 	ipa_endpoint_init_cfg(endpoint);
+	ipa_endpoint_init_nat(endpoint);
 	ipa_endpoint_init_hdr(endpoint);
 	ipa_endpoint_init_hdr_ext(endpoint);
 	ipa_endpoint_init_hdr_metadata_mask(endpoint);
diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h
index 36fe746575f6..bba088e80cd1 100644
--- a/drivers/net/ipa/ipa_reg.h
+++ b/drivers/net/ipa/ipa_reg.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 
 /* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved.
- * Copyright (C) 2018-2020 Linaro Ltd.
+ * Copyright (C) 2018-2021 Linaro Ltd.
  */
 #ifndef _IPA_REG_H_
 #define _IPA_REG_H_
@@ -388,6 +388,18 @@ enum ipa_cs_offload_en {
 	IPA_CS_OFFLOAD_DL		= 0x2,
 };
 
+/* Valid only for TX (IPA consumer) endpoints */
+#define IPA_REG_ENDP_INIT_NAT_N_OFFSET(ep) \
+					(0x0000080c + 0x0070 * (ep))
+#define NAT_EN_FMASK				GENMASK(1, 0)
+
+/** enum ipa_nat_en - ENDP_INIT_NAT register NAT_EN field value */
+enum ipa_nat_en {
+	IPA_NAT_BYPASS			= 0x0,
+	IPA_NAT_SRC			= 0x1,
+	IPA_NAT_DST			= 0x2,
+};
+
 #define IPA_REG_ENDP_INIT_HDR_N_OFFSET(ep) \
 					(0x00000810 + 0x0070 * (ep))
 #define HDR_LEN_FMASK				GENMASK(5, 0)
-- 
cgit v1.2.3


From e6e49e435512c8bcf104e594d3cc34b8f3818492 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Wed, 24 Mar 2021 08:15:26 -0500
Subject: net: ipa: limit local processing context address

Not all of the bits of the LOCAL_PKT_PROC_CNTXT register are valid.
Until IPA v4.5, there are 17 valid bits (though the bottom three
must be zero).  Starting with IPA v4.5, 18 bits are valid.

Introduce proc_cntxt_base_addr_encoded() to encode the base address
for use in the register using only the valid bits.

Shorten the name of the register (omit "_BASE") to avoid the need to
wrap the line in the one place it's used.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_mem.c |  6 ++++--
 drivers/net/ipa/ipa_reg.h | 14 ++++++++++++--
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index f25029b9ec85..32907dde5dc6 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -61,6 +61,7 @@ int ipa_mem_setup(struct ipa *ipa)
 	struct gsi_trans *trans;
 	u32 offset;
 	u16 size;
+	u32 val;
 
 	/* Get a transaction to define the header memory region and to zero
 	 * the processing context and modem memory regions.
@@ -89,8 +90,9 @@ int ipa_mem_setup(struct ipa *ipa)
 	gsi_trans_commit_wait(trans);
 
 	/* Tell the hardware where the processing context area is located */
-	iowrite32(ipa->mem_offset + ipa->mem[IPA_MEM_MODEM_PROC_CTX].offset,
-		  ipa->reg_virt + IPA_REG_LOCAL_PKT_PROC_CNTXT_BASE_OFFSET);
+	offset = ipa->mem_offset + ipa->mem[IPA_MEM_MODEM_PROC_CTX].offset;
+	val = proc_cntxt_base_addr_encoded(ipa->version, offset);
+	iowrite32(val, ipa->reg_virt + IPA_REG_LOCAL_PKT_PROC_CNTXT_OFFSET);
 
 	return 0;
 }
diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h
index bba088e80cd1..cbfef27bbcf2 100644
--- a/drivers/net/ipa/ipa_reg.h
+++ b/drivers/net/ipa/ipa_reg.h
@@ -217,8 +217,18 @@ static inline u32 ipa_reg_bcr_val(enum ipa_version version)
 	return 0x00000000;
 }
 
-/* The value of the next register must be a multiple of 8 */
-#define IPA_REG_LOCAL_PKT_PROC_CNTXT_BASE_OFFSET	0x000001e8
+/* The value of the next register must be a multiple of 8 (bottom 3 bits 0) */
+#define IPA_REG_LOCAL_PKT_PROC_CNTXT_OFFSET		0x000001e8
+
+/* Encoded value for LOCAL_PKT_PROC_CNTXT register BASE_ADDR field */
+static inline u32 proc_cntxt_base_addr_encoded(enum ipa_version version,
+					       u32 addr)
+{
+	if (version < IPA_VERSION_4_5)
+		return u32_encode_bits(addr, GENMASK(16, 0));
+
+	return u32_encode_bits(addr, GENMASK(17, 0));
+}
 
 /* ipa->available defines the valid bits in the AGGR_FORCE_CLOSE register */
 #define IPA_REG_AGGR_FORCE_CLOSE_OFFSET			0x000001ec
-- 
cgit v1.2.3


From 1910494ee32cb43e295f9bf471bdc1d28ada99f7 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Wed, 24 Mar 2021 08:15:27 -0500
Subject: net: ipa: move ipa_aggr_granularity_val()

We only use ipa_aggr_granularity_val() inside "ipa_main.c", so it
doesn't really need to be defined in a header file.  It makes some
sense to be grouped with the register definitions, but it is unlike
the other inline functions now defined in "ipa_reg.h".  So move it
into "ipa_main.c" where it's used.  TIMER_FREQUENCY is used only
by that function, so move that definition as well.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_main.c | 15 +++++++++++++++
 drivers/net/ipa/ipa_reg.h  | 12 ------------
 2 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index 62d82d48aed6..ba1bfc30210a 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -287,6 +287,21 @@ ipa_hardware_config_qsb(struct ipa *ipa, const struct ipa_data *data)
 	iowrite32(val, ipa->reg_virt + IPA_REG_QSB_MAX_READS_OFFSET);
 }
 
+/* The internal inactivity timer clock is used for the aggregation timer */
+#define TIMER_FREQUENCY	32000		/* 32 KHz inactivity timer clock */
+
+/* Compute the value to use in the COUNTER_CFG register AGGR_GRANULARITY
+ * field to represent the given number of microseconds.  The value is one
+ * less than the number of timer ticks in the requested period.  0 is not
+ * a valid granularity value.
+ */
+static u32 ipa_aggr_granularity_val(u32 usec)
+{
+	/* assert(usec != 0); */
+
+	return DIV_ROUND_CLOSEST(usec * TIMER_FREQUENCY, USEC_PER_SEC) - 1;
+}
+
 /* IPA uses unified Qtime starting at IPA v4.5, implementing various
  * timestamps and timers independent of the IPA core clock rate.  The
  * Qtimer is based on a 56-bit timestamp incremented at each tick of
diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h
index cbfef27bbcf2..86fe2978e810 100644
--- a/drivers/net/ipa/ipa_reg.h
+++ b/drivers/net/ipa/ipa_reg.h
@@ -237,18 +237,6 @@ static inline u32 proc_cntxt_base_addr_encoded(enum ipa_version version,
 #define IPA_REG_COUNTER_CFG_OFFSET			0x000001f0
 #define AGGR_GRANULARITY_FMASK			GENMASK(8, 4)
 
-/* The internal inactivity timer clock is used for the aggregation timer */
-#define TIMER_FREQUENCY	32000		/* 32 KHz inactivity timer clock */
-
-/* Compute the value to use in the AGGR_GRANULARITY field representing the
- * given number of microseconds.  The value is one less than the number of
- * timer ticks in the requested period.  0 not a valid granularity value.
- */
-static inline u32 ipa_aggr_granularity_val(u32 usec)
-{
-	return DIV_ROUND_CLOSEST(usec * TIMER_FREQUENCY, USEC_PER_SEC) - 1;
-}
-
 /* The next register is not present for IPA v4.5 */
 #define IPA_REG_TX_CFG_OFFSET				0x000001fc
 /* The first three fields are present for IPA v3.5.1 only */
-- 
cgit v1.2.3


From 810a2e1f1073983eebcdf3d4e98d6183db863c6f Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Wed, 24 Mar 2021 08:15:28 -0500
Subject: net: ipa: increase channels and events

Increase the maximum number of channels and event rings supported by
the driver, to allow the maximum available on the SDX55.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/gsi.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ipa/gsi.h b/drivers/net/ipa/gsi.h
index efc980f96109..d5996bdb20ef 100644
--- a/drivers/net/ipa/gsi.h
+++ b/drivers/net/ipa/gsi.h
@@ -16,8 +16,8 @@
 #include "ipa_version.h"
 
 /* Maximum number of channels and event rings supported by the driver */
-#define GSI_CHANNEL_COUNT_MAX	17
-#define GSI_EVT_RING_COUNT_MAX	13
+#define GSI_CHANNEL_COUNT_MAX	23
+#define GSI_EVT_RING_COUNT_MAX	20
 
 /* Maximum TLV FIFO size for a channel; 64 here is arbitrary (and high) */
 #define GSI_TLV_MAX		64
-- 
cgit v1.2.3


From d280a2c2b740ed6d90827004625e605c0a06a696 Mon Sep 17 00:00:00 2001
From: Zhichao Cai <caizhichao@yulong.com>
Date: Wed, 24 Mar 2021 10:30:47 +0800
Subject: Simplify the code by using module_platform_driver macro

for ftmac100

Signed-off-by: Zhichao Cai <caizhichao@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/faraday/ftmac100.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c
index 473b337b2e3b..5a1a8f2ea63c 100644
--- a/drivers/net/ethernet/faraday/ftmac100.c
+++ b/drivers/net/ethernet/faraday/ftmac100.c
@@ -1177,18 +1177,7 @@ static struct platform_driver ftmac100_driver = {
 /******************************************************************************
  * initialization / finalization
  *****************************************************************************/
-static int __init ftmac100_init(void)
-{
-	return platform_driver_register(&ftmac100_driver);
-}
-
-static void __exit ftmac100_exit(void)
-{
-	platform_driver_unregister(&ftmac100_driver);
-}
-
-module_init(ftmac100_init);
-module_exit(ftmac100_exit);
+module_platform_driver(ftmac100_driver);
 
 MODULE_AUTHOR("Po-Yu Chuang <ratbert@faraday-tech.com>");
 MODULE_DESCRIPTION("FTMAC100 driver");
-- 
cgit v1.2.3


From 72a0f6d0529296d4c8c0fd2fb1c7f1e2d247a39e Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Wed, 24 Mar 2021 14:16:22 +0800
Subject: net/tls: Fix a typo in tls_device.c

s/beggining/beginning/

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tls/tls_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index d9cd229aa111..790c6b7ecb26 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -601,7 +601,7 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
 	if (!info ||
 	    before(seq, info->end_seq - info->len)) {
 		/* if retransmit_hint is irrelevant start
-		 * from the beggining of the list
+		 * from the beginning of the list
 		 */
 		info = list_first_entry_or_null(&context->records_list,
 						struct tls_record_info, list);
-- 
cgit v1.2.3


From 0e4161d0eda56e3e05456fdc2c346eab0a6e4aa8 Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Wed, 24 Mar 2021 14:19:31 +0800
Subject: net/packet: Fix a typo in af_packet.c

s/sequencially/sequentially/

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index e24b2841c643..118d585337d7 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2057,7 +2057,7 @@ static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
  * and skb->cb are mangled. It works because (and until) packets
  * falling here are owned by current CPU. Output packets are cloned
  * by dev_queue_xmit_nit(), input packets are processed by net_bh
- * sequencially, so that if we return skb to original state on exit,
+ * sequentially, so that if we return skb to original state on exit,
  * we will not harm anyone.
  */
 
-- 
cgit v1.2.3


From da1da87fa7fcf55871635f3f545c3b2932019213 Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Wed, 24 Mar 2021 14:22:24 +0800
Subject: 6lowpan: Fix some typos in nhc_udp.c

s/Orignal/Original/
s/infered/inferred/

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/6lowpan/nhc_udp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/6lowpan/nhc_udp.c b/net/6lowpan/nhc_udp.c
index 8a3507524f7b..33f17bd8cda7 100644
--- a/net/6lowpan/nhc_udp.c
+++ b/net/6lowpan/nhc_udp.c
@@ -5,7 +5,7 @@
  *	Authors:
  *	Alexander Aring	<aar@pengutronix.de>
  *
- *	Orignal written by:
+ *	Original written by:
  *	Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
  *	Jon Smirl <jonsmirl@gmail.com>
  */
@@ -82,7 +82,7 @@ static int udp_uncompress(struct sk_buff *skb, size_t needed)
 	if (fail)
 		return -EINVAL;
 
-	/* UDP length needs to be infered from the lower layers
+	/* UDP length needs to be inferred from the lower layers
 	 * here, we obtain the hint from the remaining size of the
 	 * frame
 	 */
-- 
cgit v1.2.3


From 5a5586112b929546e16029261a987c9197bfdfa2 Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Wed, 24 Mar 2021 17:07:42 +0800
Subject: net: stmmac: support FPE link partner hand-shaking procedure

In order to discover whether remote station supports frame preemption,
local station sends verify mPacket and expects response mPacket in
return from the remote station.

So, we add the functions to send and handle event when verify mPacket
and response mPacket are exchanged between the networked stations.

The mechanism to handle different FPE states between local and remote
station (link partner) is implemented using workqueue which starts a
task each time there is some sign of verify & response mPacket exchange
as check in FPE IRQ event. The task retries couple of times to try to
spot the states that both stations are ready to enter FPE ON. This allows
different end points to enable FPE at different time and verify-response
mPacket can happen asynchronously. Ultimately, the task will only turn
FPE ON when local station have both exchange response in both directions.

Thanks to Voon Weifeng for implementing the core functions for detecting
FPE events and send mPacket and phylink related change.

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Co-developed-by: Voon Weifeng <weifeng.voon@intel.com>
Signed-off-by: Voon Weifeng <weifeng.voon@intel.com>
Co-developed-by: Tan Tee Min <tee.min.tan@intel.com>
Signed-off-by: Tan Tee Min <tee.min.tan@intel.com>
Co-developed-by: Mohammad Athari Bin Ismail <mohammad.athari.ismail@intel.com>
Signed-off-by: Mohammad Athari Bin Ismail <mohammad.athari.ismail@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/common.h      |   7 +
 drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c |   8 +
 drivers/net/ethernet/stmicro/stmmac/dwmac5.c      |  49 ++++++
 drivers/net/ethernet/stmicro/stmmac/dwmac5.h      |  11 ++
 drivers/net/ethernet/stmicro/stmmac/hwif.h        |   7 +
 drivers/net/ethernet/stmicro/stmmac/stmmac.h      |   7 +
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 183 ++++++++++++++++++++++
 drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c   |  39 ++++-
 include/linux/stmmac.h                            |  27 ++++
 9 files changed, 331 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 95469059dca1..d065b11b7b10 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -315,6 +315,13 @@ enum dma_irq_status {
 #define	CORE_IRQ_RX_PATH_IN_LPI_MODE	(1 << 2)
 #define	CORE_IRQ_RX_PATH_EXIT_LPI_MODE	(1 << 3)
 
+/* FPE defines */
+#define FPE_EVENT_UNKNOWN		0
+#define FPE_EVENT_TRSP			BIT(0)
+#define FPE_EVENT_TVER			BIT(1)
+#define FPE_EVENT_RRSP			BIT(2)
+#define FPE_EVENT_RVER			BIT(3)
+
 #define CORE_IRQ_MTL_RX_OVERFLOW	BIT(8)
 
 /* Physical Coding Sublayer */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 29f765a246a0..95864f014ffa 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -53,6 +53,10 @@ static void dwmac4_core_init(struct mac_device_info *hw,
 	if (hw->pcs)
 		value |= GMAC_PCS_IRQ_DEFAULT;
 
+	/* Enable FPE interrupt */
+	if ((GMAC_HW_FEAT_FPESEL & readl(ioaddr + GMAC_HW_FEATURE3)) >> 26)
+		value |= GMAC_INT_FPE_EN;
+
 	writel(value, ioaddr + GMAC_INT_EN);
 }
 
@@ -1245,6 +1249,8 @@ const struct stmmac_ops dwmac410_ops = {
 	.config_l4_filter = dwmac4_config_l4_filter,
 	.est_configure = dwmac5_est_configure,
 	.fpe_configure = dwmac5_fpe_configure,
+	.fpe_send_mpacket = dwmac5_fpe_send_mpacket,
+	.fpe_irq_status = dwmac5_fpe_irq_status,
 	.add_hw_vlan_rx_fltr = dwmac4_add_hw_vlan_rx_fltr,
 	.del_hw_vlan_rx_fltr = dwmac4_del_hw_vlan_rx_fltr,
 	.restore_hw_vlan_rx_fltr = dwmac4_restore_hw_vlan_rx_fltr,
@@ -1294,6 +1300,8 @@ const struct stmmac_ops dwmac510_ops = {
 	.config_l4_filter = dwmac4_config_l4_filter,
 	.est_configure = dwmac5_est_configure,
 	.fpe_configure = dwmac5_fpe_configure,
+	.fpe_send_mpacket = dwmac5_fpe_send_mpacket,
+	.fpe_irq_status = dwmac5_fpe_irq_status,
 	.add_hw_vlan_rx_fltr = dwmac4_add_hw_vlan_rx_fltr,
 	.del_hw_vlan_rx_fltr = dwmac4_del_hw_vlan_rx_fltr,
 	.restore_hw_vlan_rx_fltr = dwmac4_restore_hw_vlan_rx_fltr,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
index 0ae85f8adf67..5b010ebfede9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
@@ -707,3 +707,52 @@ void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq,
 	value |= EFPE;
 	writel(value, ioaddr + MAC_FPE_CTRL_STS);
 }
+
+int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev)
+{
+	u32 value;
+	int status;
+
+	status = FPE_EVENT_UNKNOWN;
+
+	value = readl(ioaddr + MAC_FPE_CTRL_STS);
+
+	if (value & TRSP) {
+		status |= FPE_EVENT_TRSP;
+		netdev_info(dev, "FPE: Respond mPacket is transmitted\n");
+	}
+
+	if (value & TVER) {
+		status |= FPE_EVENT_TVER;
+		netdev_info(dev, "FPE: Verify mPacket is transmitted\n");
+	}
+
+	if (value & RRSP) {
+		status |= FPE_EVENT_RRSP;
+		netdev_info(dev, "FPE: Respond mPacket is received\n");
+	}
+
+	if (value & RVER) {
+		status |= FPE_EVENT_RVER;
+		netdev_info(dev, "FPE: Verify mPacket is received\n");
+	}
+
+	return status;
+}
+
+void dwmac5_fpe_send_mpacket(void __iomem *ioaddr, enum stmmac_mpacket_type type)
+{
+	u32 value;
+
+	value = readl(ioaddr + MAC_FPE_CTRL_STS);
+
+	if (type == MPACKET_VERIFY) {
+		value &= ~SRSP;
+		value |= SVER;
+	} else {
+		value &= ~SVER;
+		value |= SRSP;
+	}
+
+	writel(value, ioaddr + MAC_FPE_CTRL_STS);
+}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
index 709bbfc9ae61..ff555d8b0cdf 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
@@ -12,6 +12,12 @@
 #define TMOUTEN				BIT(0)
 
 #define MAC_FPE_CTRL_STS		0x00000234
+#define TRSP				BIT(19)
+#define TVER				BIT(18)
+#define RRSP				BIT(17)
+#define RVER				BIT(16)
+#define SRSP				BIT(2)
+#define SVER				BIT(1)
 #define EFPE				BIT(0)
 
 #define MAC_PPS_CONTROL			0x00000b70
@@ -128,6 +134,8 @@
 #define GMAC_RXQCTRL_VFFQ_SHIFT		17
 #define GMAC_RXQCTRL_VFFQE		BIT(16)
 
+#define GMAC_INT_FPE_EN			BIT(17)
+
 int dwmac5_safety_feat_config(void __iomem *ioaddr, unsigned int asp);
 int dwmac5_safety_feat_irq_status(struct net_device *ndev,
 		void __iomem *ioaddr, unsigned int asp,
@@ -145,5 +153,8 @@ void dwmac5_est_irq_status(void __iomem *ioaddr, struct net_device *dev,
 			   struct stmmac_extra_stats *x, u32 txqcnt);
 void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq,
 			  bool enable);
+void dwmac5_fpe_send_mpacket(void __iomem *ioaddr,
+			     enum stmmac_mpacket_type type);
+int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev);
 
 #endif /* __DWMAC5_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 59bf7078a754..45edac5f60db 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -397,6 +397,9 @@ struct stmmac_ops {
 			       struct stmmac_extra_stats *x, u32 txqcnt);
 	void (*fpe_configure)(void __iomem *ioaddr, u32 num_txq, u32 num_rxq,
 			      bool enable);
+	void (*fpe_send_mpacket)(void __iomem *ioaddr,
+				 enum stmmac_mpacket_type type);
+	int (*fpe_irq_status)(void __iomem *ioaddr, struct net_device *dev);
 };
 
 #define stmmac_core_init(__priv, __args...) \
@@ -497,6 +500,10 @@ struct stmmac_ops {
 	stmmac_do_void_callback(__priv, mac, est_irq_status, __args)
 #define stmmac_fpe_configure(__priv, __args...) \
 	stmmac_do_void_callback(__priv, mac, fpe_configure, __args)
+#define stmmac_fpe_send_mpacket(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, fpe_send_mpacket, __args)
+#define stmmac_fpe_irq_status(__priv, __args...) \
+	stmmac_do_callback(__priv, mac, fpe_irq_status, __args)
 
 /* PTP and HW Timer helpers */
 struct stmmac_hwtimestamp {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 375c503d2df8..4faad331a4ca 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -234,6 +234,12 @@ struct stmmac_priv {
 	struct workqueue_struct *wq;
 	struct work_struct service_task;
 
+	/* Workqueue for handling FPE hand-shaking */
+	unsigned long fpe_task_state;
+	struct workqueue_struct *fpe_wq;
+	struct work_struct fpe_task;
+	char wq_name[IFNAMSIZ + 4];
+
 	/* TC Handling */
 	unsigned int tc_entries_max;
 	unsigned int tc_off_max;
@@ -273,6 +279,7 @@ bool stmmac_eee_init(struct stmmac_priv *priv);
 int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt);
 int stmmac_reinit_ringparam(struct net_device *dev, u32 rx_size, u32 tx_size);
 int stmmac_bus_clks_config(struct stmmac_priv *priv, bool enabled);
+void stmmac_fpe_handshake(struct stmmac_priv *priv, bool enable);
 
 #if IS_ENABLED(CONFIG_STMMAC_SELFTESTS)
 void stmmac_selftest_run(struct net_device *dev,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 8d7015d3a537..170296820af0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -971,6 +971,21 @@ static void stmmac_mac_an_restart(struct phylink_config *config)
 	/* Not Supported */
 }
 
+static void stmmac_fpe_link_state_handle(struct stmmac_priv *priv, bool is_up)
+{
+	struct stmmac_fpe_cfg *fpe_cfg = priv->plat->fpe_cfg;
+	enum stmmac_fpe_state *lo_state = &fpe_cfg->lo_fpe_state;
+	enum stmmac_fpe_state *lp_state = &fpe_cfg->lp_fpe_state;
+	bool *hs_enable = &fpe_cfg->hs_enable;
+
+	if (is_up && *hs_enable) {
+		stmmac_fpe_send_mpacket(priv, priv->ioaddr, MPACKET_VERIFY);
+	} else {
+		*lo_state = FPE_EVENT_UNKNOWN;
+		*lp_state = FPE_EVENT_UNKNOWN;
+	}
+}
+
 static void stmmac_mac_link_down(struct phylink_config *config,
 				 unsigned int mode, phy_interface_t interface)
 {
@@ -981,6 +996,8 @@ static void stmmac_mac_link_down(struct phylink_config *config,
 	priv->tx_lpi_enabled = false;
 	stmmac_eee_init(priv);
 	stmmac_set_eee_pls(priv, priv->hw, false);
+
+	stmmac_fpe_link_state_handle(priv, false);
 }
 
 static void stmmac_mac_link_up(struct phylink_config *config,
@@ -1079,6 +1096,8 @@ static void stmmac_mac_link_up(struct phylink_config *config,
 		priv->tx_lpi_enabled = priv->eee_enabled;
 		stmmac_set_eee_pls(priv, priv->hw, true);
 	}
+
+	stmmac_fpe_link_state_handle(priv, true);
 }
 
 static const struct phylink_mac_ops stmmac_phylink_mac_ops = {
@@ -2793,6 +2812,26 @@ static void stmmac_safety_feat_configuration(struct stmmac_priv *priv)
 	}
 }
 
+static int stmmac_fpe_start_wq(struct stmmac_priv *priv)
+{
+	char *name;
+
+	clear_bit(__FPE_TASK_SCHED, &priv->fpe_task_state);
+
+	name = priv->wq_name;
+	sprintf(name, "%s-fpe", priv->dev->name);
+
+	priv->fpe_wq = create_singlethread_workqueue(name);
+	if (!priv->fpe_wq) {
+		netdev_err(priv->dev, "%s: Failed to create workqueue\n", name);
+
+		return -ENOMEM;
+	}
+	netdev_info(priv->dev, "FPE workqueue start");
+
+	return 0;
+}
+
 /**
  * stmmac_hw_setup - setup mac in a usable state.
  *  @dev : pointer to the device structure.
@@ -2929,6 +2968,13 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 	/* Start the ball rolling... */
 	stmmac_start_all_dma(priv);
 
+	if (priv->dma_cap.fpesel) {
+		stmmac_fpe_start_wq(priv);
+
+		if (priv->plat->fpe_cfg->enable)
+			stmmac_fpe_handshake(priv, true);
+	}
+
 	return 0;
 }
 
@@ -3090,6 +3136,16 @@ init_phy_error:
 	return ret;
 }
 
+static void stmmac_fpe_stop_wq(struct stmmac_priv *priv)
+{
+	set_bit(__FPE_REMOVING, &priv->fpe_task_state);
+
+	if (priv->fpe_wq)
+		destroy_workqueue(priv->fpe_wq);
+
+	netdev_info(priv->dev, "FPE workqueue stop");
+}
+
 /**
  *  stmmac_release - close entry point of the driver
  *  @dev : device pointer.
@@ -3139,6 +3195,9 @@ static int stmmac_release(struct net_device *dev)
 
 	pm_runtime_put(priv->device);
 
+	if (priv->dma_cap.fpesel)
+		stmmac_fpe_stop_wq(priv);
+
 	return 0;
 }
 
@@ -4280,6 +4339,48 @@ static int stmmac_set_features(struct net_device *netdev,
 	return 0;
 }
 
+static void stmmac_fpe_event_status(struct stmmac_priv *priv, int status)
+{
+	struct stmmac_fpe_cfg *fpe_cfg = priv->plat->fpe_cfg;
+	enum stmmac_fpe_state *lo_state = &fpe_cfg->lo_fpe_state;
+	enum stmmac_fpe_state *lp_state = &fpe_cfg->lp_fpe_state;
+	bool *hs_enable = &fpe_cfg->hs_enable;
+
+	if (status == FPE_EVENT_UNKNOWN || !*hs_enable)
+		return;
+
+	/* If LP has sent verify mPacket, LP is FPE capable */
+	if ((status & FPE_EVENT_RVER) == FPE_EVENT_RVER) {
+		if (*lp_state < FPE_STATE_CAPABLE)
+			*lp_state = FPE_STATE_CAPABLE;
+
+		/* If user has requested FPE enable, quickly response */
+		if (*hs_enable)
+			stmmac_fpe_send_mpacket(priv, priv->ioaddr,
+						MPACKET_RESPONSE);
+	}
+
+	/* If Local has sent verify mPacket, Local is FPE capable */
+	if ((status & FPE_EVENT_TVER) == FPE_EVENT_TVER) {
+		if (*lo_state < FPE_STATE_CAPABLE)
+			*lo_state = FPE_STATE_CAPABLE;
+	}
+
+	/* If LP has sent response mPacket, LP is entering FPE ON */
+	if ((status & FPE_EVENT_RRSP) == FPE_EVENT_RRSP)
+		*lp_state = FPE_STATE_ENTERING_ON;
+
+	/* If Local has sent response mPacket, Local is entering FPE ON */
+	if ((status & FPE_EVENT_TRSP) == FPE_EVENT_TRSP)
+		*lo_state = FPE_STATE_ENTERING_ON;
+
+	if (!test_bit(__FPE_REMOVING, &priv->fpe_task_state) &&
+	    !test_and_set_bit(__FPE_TASK_SCHED, &priv->fpe_task_state) &&
+	    priv->fpe_wq) {
+		queue_work(priv->fpe_wq, &priv->fpe_task);
+	}
+}
+
 /**
  *  stmmac_interrupt - main ISR
  *  @irq: interrupt number.
@@ -4318,6 +4419,13 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
 		stmmac_est_irq_status(priv, priv->ioaddr, priv->dev,
 				      &priv->xstats, tx_cnt);
 
+	if (priv->dma_cap.fpesel) {
+		int status = stmmac_fpe_irq_status(priv, priv->ioaddr,
+						   priv->dev);
+
+		stmmac_fpe_event_status(priv, status);
+	}
+
 	/* To handle GMAC own interrupts */
 	if ((priv->plat->has_gmac) || xmac) {
 		int status = stmmac_host_irq_status(priv, priv->hw, &priv->xstats);
@@ -5065,6 +5173,68 @@ int stmmac_reinit_ringparam(struct net_device *dev, u32 rx_size, u32 tx_size)
 	return ret;
 }
 
+#define SEND_VERIFY_MPAKCET_FMT "Send Verify mPacket lo_state=%d lp_state=%d\n"
+static void stmmac_fpe_lp_task(struct work_struct *work)
+{
+	struct stmmac_priv *priv = container_of(work, struct stmmac_priv,
+						fpe_task);
+	struct stmmac_fpe_cfg *fpe_cfg = priv->plat->fpe_cfg;
+	enum stmmac_fpe_state *lo_state = &fpe_cfg->lo_fpe_state;
+	enum stmmac_fpe_state *lp_state = &fpe_cfg->lp_fpe_state;
+	bool *hs_enable = &fpe_cfg->hs_enable;
+	bool *enable = &fpe_cfg->enable;
+	int retries = 20;
+
+	while (retries-- > 0) {
+		/* Bail out immediately if FPE handshake is OFF */
+		if (*lo_state == FPE_STATE_OFF || !*hs_enable)
+			break;
+
+		if (*lo_state == FPE_STATE_ENTERING_ON &&
+		    *lp_state == FPE_STATE_ENTERING_ON) {
+			stmmac_fpe_configure(priv, priv->ioaddr,
+					     priv->plat->tx_queues_to_use,
+					     priv->plat->rx_queues_to_use,
+					     *enable);
+
+			netdev_info(priv->dev, "configured FPE\n");
+
+			*lo_state = FPE_STATE_ON;
+			*lp_state = FPE_STATE_ON;
+			netdev_info(priv->dev, "!!! BOTH FPE stations ON\n");
+			break;
+		}
+
+		if ((*lo_state == FPE_STATE_CAPABLE ||
+		     *lo_state == FPE_STATE_ENTERING_ON) &&
+		     *lp_state != FPE_STATE_ON) {
+			netdev_info(priv->dev, SEND_VERIFY_MPAKCET_FMT,
+				    *lo_state, *lp_state);
+			stmmac_fpe_send_mpacket(priv, priv->ioaddr,
+						MPACKET_VERIFY);
+		}
+		/* Sleep then retry */
+		msleep(500);
+	}
+
+	clear_bit(__FPE_TASK_SCHED, &priv->fpe_task_state);
+}
+
+void stmmac_fpe_handshake(struct stmmac_priv *priv, bool enable)
+{
+	if (priv->plat->fpe_cfg->hs_enable != enable) {
+		if (enable) {
+			stmmac_fpe_send_mpacket(priv, priv->ioaddr,
+						MPACKET_VERIFY);
+		} else {
+			priv->plat->fpe_cfg->lo_fpe_state = FPE_STATE_OFF;
+			priv->plat->fpe_cfg->lp_fpe_state = FPE_STATE_OFF;
+		}
+
+		priv->plat->fpe_cfg->hs_enable = enable;
+	}
+}
+
 /**
  * stmmac_dvr_probe
  * @device: device pointer
@@ -5122,6 +5292,9 @@ int stmmac_dvr_probe(struct device *device,
 
 	INIT_WORK(&priv->service_task, stmmac_service_task);
 
+	/* Initialize Link Partner FPE workqueue */
+	INIT_WORK(&priv->fpe_task, stmmac_fpe_lp_task);
+
 	/* Override with kernel parameters if supplied XXX CRS XXX
 	 * this needs to have multiple instances
 	 */
@@ -5435,8 +5608,18 @@ int stmmac_suspend(struct device *dev)
 		if (ret)
 			return ret;
 	}
+
 	mutex_unlock(&priv->lock);
 
+	if (priv->dma_cap.fpesel) {
+		/* Disable FPE */
+		stmmac_fpe_configure(priv, priv->ioaddr,
+				     priv->plat->tx_queues_to_use,
+				     priv->plat->rx_queues_to_use, false);
+
+		stmmac_fpe_handshake(priv, false);
+	}
+
 	priv->speed = SPEED_UNKNOWN;
 	return 0;
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
index b80cb2985b39..1d84ee359808 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
@@ -297,6 +297,17 @@ static int tc_init(struct stmmac_priv *priv)
 
 	dev_info(priv->device, "Enabling HW TC (entries=%d, max_off=%d)\n",
 			priv->tc_entries_max, priv->tc_off_max);
+
+	if (!priv->plat->fpe_cfg) {
+		priv->plat->fpe_cfg = devm_kzalloc(priv->device,
+						   sizeof(*priv->plat->fpe_cfg),
+						   GFP_KERNEL);
+		if (!priv->plat->fpe_cfg)
+			return -ENOMEM;
+	} else {
+		memset(priv->plat->fpe_cfg, 0, sizeof(*priv->plat->fpe_cfg));
+	}
+
 	return 0;
 }
 
@@ -829,13 +840,10 @@ static int tc_setup_taprio(struct stmmac_priv *priv,
 	if (fpe && !priv->dma_cap.fpesel)
 		return -EOPNOTSUPP;
 
-	ret = stmmac_fpe_configure(priv, priv->ioaddr,
-				   priv->plat->tx_queues_to_use,
-				   priv->plat->rx_queues_to_use, fpe);
-	if (ret && fpe) {
-		netdev_err(priv->dev, "failed to enable Frame Preemption\n");
-		return ret;
-	}
+	/* Actual FPE register configuration will be done after FPE handshake
+	 * is success.
+	 */
+	priv->plat->fpe_cfg->enable = fpe;
 
 	ret = stmmac_est_configure(priv, priv->ioaddr, priv->plat->est,
 				   priv->plat->clk_ptp_rate);
@@ -845,12 +853,29 @@ static int tc_setup_taprio(struct stmmac_priv *priv,
 	}
 
 	netdev_info(priv->dev, "configured EST\n");
+
+	if (fpe) {
+		stmmac_fpe_handshake(priv, true);
+		netdev_info(priv->dev, "start FPE handshake\n");
+	}
+
 	return 0;
 
 disable:
 	priv->plat->est->enable = false;
 	stmmac_est_configure(priv, priv->ioaddr, priv->plat->est,
 			     priv->plat->clk_ptp_rate);
+
+	priv->plat->fpe_cfg->enable = false;
+	stmmac_fpe_configure(priv, priv->ioaddr,
+			     priv->plat->tx_queues_to_use,
+			     priv->plat->rx_queues_to_use,
+			     false);
+	netdev_info(priv->dev, "disabled FPE\n");
+
+	stmmac_fpe_handshake(priv, false);
+	netdev_info(priv->dev, "stop FPE handshake\n");
+
 	return ret;
 }
 
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 5134e802f39a..febdb43d27e5 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -144,6 +144,32 @@ struct stmmac_txq_cfg {
 	int tbs_en;
 };
 
+/* FPE link state */
+enum stmmac_fpe_state {
+	FPE_STATE_OFF = 0,
+	FPE_STATE_CAPABLE = 1,
+	FPE_STATE_ENTERING_ON = 2,
+	FPE_STATE_ON = 3,
+};
+
+/* FPE link-partner hand-shaking mPacket type */
+enum stmmac_mpacket_type {
+	MPACKET_VERIFY = 0,
+	MPACKET_RESPONSE = 1,
+};
+
+enum stmmac_fpe_task_state_t {
+	__FPE_REMOVING,
+	__FPE_TASK_SCHED,
+};
+
+struct stmmac_fpe_cfg {
+	bool enable;				/* FPE enable */
+	bool hs_enable;				/* FPE handshake enable */
+	enum stmmac_fpe_state lp_fpe_state;	/* Link Partner FPE state */
+	enum stmmac_fpe_state lo_fpe_state;	/* Local station FPE state */
+};
+
 struct plat_stmmacenet_data {
 	int bus_id;
 	int phy_addr;
@@ -155,6 +181,7 @@ struct plat_stmmacenet_data {
 	struct device_node *mdio_node;
 	struct stmmac_dma_cfg *dma_cfg;
 	struct stmmac_est *est;
+	struct stmmac_fpe_cfg *fpe_cfg;
 	int clk_csr;
 	int has_gmac;
 	int enh_desc;
-- 
cgit v1.2.3


From 84c7f6c33f42a12eb036ebf0f0e3670799304120 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 24 Mar 2021 14:07:22 +0100
Subject: hinic: avoid gcc -Wrestrict warning

With extra warnings enabled, gcc complains that snprintf should not
take the same buffer as source and destination:

drivers/net/ethernet/huawei/hinic/hinic_ethtool.c: In function 'hinic_set_settings_to_hw':
drivers/net/ethernet/huawei/hinic/hinic_ethtool.c:480:9: error: 'snprintf' argument 4 overlaps destination object 'set_link_str' [-Werror=restrict]
  480 |   err = snprintf(set_link_str, SET_LINK_STR_MAX_LEN,
      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  481 |           "%sspeed %d ", set_link_str, speed);
      |           ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/net/ethernet/huawei/hinic/hinic_ethtool.c:464:7: note: destination object referenced by 'restrict'-qualified argument 1 was declared here
  464 |  char set_link_str[SET_LINK_STR_MAX_LEN] = {0};

Rewrite this to avoid the nested sprintf and instead use separate
buffers, which is simpler.

Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/huawei/hinic/hinic_ethtool.c | 25 +++++++++--------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
index c7848c382a5e..dc024ef521c0 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
@@ -34,7 +34,7 @@
 #include "hinic_rx.h"
 #include "hinic_dev.h"
 
-#define SET_LINK_STR_MAX_LEN	128
+#define SET_LINK_STR_MAX_LEN	16
 
 #define GET_SUPPORTED_MODE	0
 #define GET_ADVERTISED_MODE	1
@@ -462,24 +462,19 @@ static int hinic_set_settings_to_hw(struct hinic_dev *nic_dev,
 {
 	struct hinic_link_ksettings_info settings = {0};
 	char set_link_str[SET_LINK_STR_MAX_LEN] = {0};
+	const char *autoneg_str;
 	struct net_device *netdev = nic_dev->netdev;
 	enum nic_speed_level speed_level = 0;
 	int err;
 
-	err = snprintf(set_link_str, SET_LINK_STR_MAX_LEN, "%s",
-		       (set_settings & HILINK_LINK_SET_AUTONEG) ?
-		       (autoneg ? "autong enable " : "autong disable ") : "");
-	if (err < 0 || err >= SET_LINK_STR_MAX_LEN) {
-		netif_err(nic_dev, drv, netdev, "Failed to snprintf link state, function return(%d) and dest_len(%d)\n",
-			  err, SET_LINK_STR_MAX_LEN);
-		return -EFAULT;
-	}
+	autoneg_str = (set_settings & HILINK_LINK_SET_AUTONEG) ?
+		      (autoneg ? "autong enable " : "autong disable ") : "";
 
 	if (set_settings & HILINK_LINK_SET_SPEED) {
 		speed_level = hinic_ethtool_to_hw_speed_level(speed);
 		err = snprintf(set_link_str, SET_LINK_STR_MAX_LEN,
-			       "%sspeed %d ", set_link_str, speed);
-		if (err <= 0 || err >= SET_LINK_STR_MAX_LEN) {
+			       "speed %d ", speed);
+		if (err >= SET_LINK_STR_MAX_LEN) {
 			netif_err(nic_dev, drv, netdev, "Failed to snprintf link speed, function return(%d) and dest_len(%d)\n",
 				  err, SET_LINK_STR_MAX_LEN);
 			return -EFAULT;
@@ -494,11 +489,11 @@ static int hinic_set_settings_to_hw(struct hinic_dev *nic_dev,
 	err = hinic_set_link_settings(nic_dev->hwdev, &settings);
 	if (err != HINIC_MGMT_CMD_UNSUPPORTED) {
 		if (err)
-			netif_err(nic_dev, drv, netdev, "Set %s failed\n",
-				  set_link_str);
+			netif_err(nic_dev, drv, netdev, "Set %s%sfailed\n",
+				  autoneg_str, set_link_str);
 		else
-			netif_info(nic_dev, drv, netdev, "Set %s successfully\n",
-				   set_link_str);
+			netif_info(nic_dev, drv, netdev, "Set %s%ssuccessfully\n",
+				   autoneg_str, set_link_str);
 
 		return err;
 	}
-- 
cgit v1.2.3


From 6ad2dd6c14d3989b44cdc17f1e7258bf613dd070 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 22 Mar 2021 12:56:49 +0100
Subject: ipv6: fix clang Wformat warning

When building with 'make W=1', clang warns about a mismatched
format string:

net/ipv6/ah6.c:710:4: error: format specifies type 'unsigned short' but the argument has type 'int' [-Werror,-Wformat]
                        aalg_desc->uinfo.auth.icv_fullbits/8);
                        ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/printk.h:375:34: note: expanded from macro 'pr_info'
        printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
                                ~~~     ^~~~~~~~~~~
net/ipv6/esp6.c:1153:5: error: format specifies type 'unsigned short' but the argument has type 'int' [-Werror,-Wformat]
                                aalg_desc->uinfo.auth.icv_fullbits / 8);
                                ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/printk.h:375:34: note: expanded from macro 'pr_info'
        printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
                                ~~~     ^~~~~~~~~~~

Here, the result of dividing a 16-bit number by a 32-bit number
produces a 32-bit result, which is printed as a 16-bit integer.

Change the %hu format to the normal %u, which has the same effect
but avoids the warning.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv6/ah6.c  | 2 +-
 net/ipv6/esp6.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 440080da805b..01c638f5d8b8 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -705,7 +705,7 @@ static int ah6_init_state(struct xfrm_state *x)
 
 	if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
 	    crypto_ahash_digestsize(ahash)) {
-		pr_info("AH: %s digestsize %u != %hu\n",
+		pr_info("AH: %s digestsize %u != %u\n",
 			x->aalg->alg_name, crypto_ahash_digestsize(ahash),
 			aalg_desc->uinfo.auth.icv_fullbits/8);
 		goto error;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 153ad103ba74..831a588b04a2 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -1147,7 +1147,7 @@ static int esp_init_authenc(struct xfrm_state *x)
 		err = -EINVAL;
 		if (aalg_desc->uinfo.auth.icv_fullbits / 8 !=
 		    crypto_aead_authsize(aead)) {
-			pr_info("ESP: %s digestsize %u != %hu\n",
+			pr_info("ESP: %s digestsize %u != %u\n",
 				x->aalg->alg_name,
 				crypto_aead_authsize(aead),
 				aalg_desc->uinfo.auth.icv_fullbits / 8);
-- 
cgit v1.2.3


From ac0565462e330a2b762ca5849a4140b29d725786 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Tue, 23 Mar 2021 11:58:56 -0700
Subject: Bluetooth: btintel: Check firmware version before download

This checks the firmware build number, week and year against the
repective loaded version. If details are a match, skip the download
process.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Tested-by: Tedd Ho-Jeong An <tedd.an@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btintel.c   | 106 +++++++++++++++++++++++++++++++++---------
 drivers/bluetooth/btintel.h   |   5 +-
 drivers/bluetooth/btusb.c     |  18 ++++++-
 drivers/bluetooth/hci_intel.c |   7 ++-
 4 files changed, 109 insertions(+), 27 deletions(-)

diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c
index fa97324454ea..3ff698a0bd25 100644
--- a/drivers/bluetooth/btintel.c
+++ b/drivers/bluetooth/btintel.c
@@ -24,6 +24,14 @@
 #define ECDSA_OFFSET		644
 #define ECDSA_HEADER_LEN	320
 
+#define CMD_WRITE_BOOT_PARAMS	0xfc0e
+struct cmd_write_boot_params {
+	u32 boot_addr;
+	u8  fw_build_num;
+	u8  fw_build_ww;
+	u8  fw_build_yy;
+} __packed;
+
 int btintel_check_bdaddr(struct hci_dev *hdev)
 {
 	struct hci_rp_read_bd_addr *bda;
@@ -841,7 +849,7 @@ static int btintel_sfi_ecdsa_header_secure_send(struct hci_dev *hdev,
 
 static int btintel_download_firmware_payload(struct hci_dev *hdev,
 					     const struct firmware *fw,
-					     u32 *boot_param, size_t offset)
+					     size_t offset)
 {
 	int err;
 	const u8 *fw_ptr;
@@ -854,21 +862,6 @@ static int btintel_download_firmware_payload(struct hci_dev *hdev,
 	while (fw_ptr - fw->data < fw->size) {
 		struct hci_command_hdr *cmd = (void *)(fw_ptr + frag_len);
 
-		/* Each SKU has a different reset parameter to use in the
-		 * HCI_Intel_Reset command and it is embedded in the firmware
-		 * data. So, instead of using static value per SKU, check
-		 * the firmware data and save it for later use.
-		 */
-		if (le16_to_cpu(cmd->opcode) == 0xfc0e) {
-			/* The boot parameter is the first 32-bit value
-			 * and rest of 3 octets are reserved.
-			 */
-			*boot_param = get_unaligned_le32(fw_ptr + frag_len +
-							 sizeof(*cmd));
-
-			bt_dev_dbg(hdev, "boot_param=0x%x", *boot_param);
-		}
-
 		frag_len += sizeof(*cmd) + cmd->plen;
 
 		/* The parameter length of the secure send command requires
@@ -897,28 +890,101 @@ done:
 	return err;
 }
 
+static bool btintel_firmware_version(struct hci_dev *hdev,
+				     u8 num, u8 ww, u8 yy,
+				     const struct firmware *fw,
+				     u32 *boot_addr)
+{
+	const u8 *fw_ptr;
+
+	fw_ptr = fw->data;
+
+	while (fw_ptr - fw->data < fw->size) {
+		struct hci_command_hdr *cmd = (void *)(fw_ptr);
+
+		/* Each SKU has a different reset parameter to use in the
+		 * HCI_Intel_Reset command and it is embedded in the firmware
+		 * data. So, instead of using static value per SKU, check
+		 * the firmware data and save it for later use.
+		 */
+		if (le16_to_cpu(cmd->opcode) == CMD_WRITE_BOOT_PARAMS) {
+			struct cmd_write_boot_params *params;
+
+			params = (void *)(fw_ptr + sizeof(*cmd));
+
+			bt_dev_info(hdev, "Boot Address: 0x%x",
+				    le32_to_cpu(params->boot_addr));
+
+			bt_dev_info(hdev, "Firmware Version: %u-%u.%u",
+				    params->fw_build_num, params->fw_build_ww,
+				    params->fw_build_yy);
+
+			return (num == params->fw_build_num &&
+				ww == params->fw_build_ww &&
+				yy == params->fw_build_yy);
+		}
+
+		fw_ptr += sizeof(*cmd) + cmd->plen;
+	}
+
+	return false;
+}
+
 int btintel_download_firmware(struct hci_dev *hdev,
+			      struct intel_version *ver,
 			      const struct firmware *fw,
 			      u32 *boot_param)
 {
 	int err;
 
+	/* SfP and WsP don't seem to update the firmware version on file
+	 * so version checking is currently not possible.
+	 */
+	switch (ver->hw_variant) {
+	case 0x0b:	/* SfP */
+	case 0x0c:	/* WsP */
+		/* Skip version checking */
+		break;
+	default:
+		/* Skip download if firmware has the same version */
+		if (btintel_firmware_version(hdev, ver->fw_build_num,
+					     ver->fw_build_ww, ver->fw_build_yy,
+					     fw, boot_param)) {
+			bt_dev_info(hdev, "Firmware already loaded");
+			/* Return -EALREADY to indicate that the firmware has
+			 * already been loaded.
+			 */
+			return -EALREADY;
+		}
+	}
+
 	err = btintel_sfi_rsa_header_secure_send(hdev, fw);
 	if (err)
 		return err;
 
-	return btintel_download_firmware_payload(hdev, fw, boot_param,
-						 RSA_HEADER_LEN);
+	return btintel_download_firmware_payload(hdev, fw, RSA_HEADER_LEN);
 }
 EXPORT_SYMBOL_GPL(btintel_download_firmware);
 
 int btintel_download_firmware_newgen(struct hci_dev *hdev,
+				     struct intel_version_tlv *ver,
 				     const struct firmware *fw, u32 *boot_param,
 				     u8 hw_variant, u8 sbe_type)
 {
 	int err;
 	u32 css_header_ver;
 
+	/* Skip download if firmware has the same version */
+	if (btintel_firmware_version(hdev, ver->min_fw_build_nn,
+				     ver->min_fw_build_cw, ver->min_fw_build_yy,
+				     fw, boot_param)) {
+		bt_dev_info(hdev, "Firmware already loaded");
+		/* Return -EALREADY to indicate that firmware has already been
+		 * loaded.
+		 */
+		return -EALREADY;
+	}
+
 	/* iBT hardware variants 0x0b, 0x0c, 0x11, 0x12, 0x13, 0x14 support
 	 * only RSA secure boot engine. Hence, the corresponding sfi file will
 	 * have RSA header of 644 bytes followed by Command Buffer.
@@ -948,7 +1014,7 @@ int btintel_download_firmware_newgen(struct hci_dev *hdev,
 		if (err)
 			return err;
 
-		err = btintel_download_firmware_payload(hdev, fw, boot_param, RSA_HEADER_LEN);
+		err = btintel_download_firmware_payload(hdev, fw, RSA_HEADER_LEN);
 		if (err)
 			return err;
 	} else if (hw_variant >= 0x17) {
@@ -969,7 +1035,6 @@ int btintel_download_firmware_newgen(struct hci_dev *hdev,
 				return err;
 
 			err = btintel_download_firmware_payload(hdev, fw,
-								boot_param,
 								RSA_HEADER_LEN + ECDSA_HEADER_LEN);
 			if (err)
 				return err;
@@ -979,7 +1044,6 @@ int btintel_download_firmware_newgen(struct hci_dev *hdev,
 				return err;
 
 			err = btintel_download_firmware_payload(hdev, fw,
-								boot_param,
 								RSA_HEADER_LEN + ECDSA_HEADER_LEN);
 			if (err)
 				return err;
diff --git a/drivers/bluetooth/btintel.h b/drivers/bluetooth/btintel.h
index 6511b091caf5..51f1f2c883b4 100644
--- a/drivers/bluetooth/btintel.h
+++ b/drivers/bluetooth/btintel.h
@@ -163,9 +163,10 @@ struct regmap *btintel_regmap_init(struct hci_dev *hdev, u16 opcode_read,
 int btintel_send_intel_reset(struct hci_dev *hdev, u32 boot_param);
 int btintel_read_boot_params(struct hci_dev *hdev,
 			     struct intel_boot_params *params);
-int btintel_download_firmware(struct hci_dev *dev, const struct firmware *fw,
-			      u32 *boot_param);
+int btintel_download_firmware(struct hci_dev *dev, struct intel_version *ver,
+			      const struct firmware *fw, u32 *boot_param);
 int btintel_download_firmware_newgen(struct hci_dev *hdev,
+				     struct intel_version_tlv *ver,
 				     const struct firmware *fw,
 				     u32 *boot_param, u8 hw_variant,
 				     u8 sbe_type);
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 0d355bb45e08..de220ab7f231 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -2557,10 +2557,17 @@ static int btusb_intel_download_firmware_newgen(struct hci_dev *hdev,
 	set_bit(BTUSB_DOWNLOADING, &data->flags);
 
 	/* Start firmware downloading and get boot parameter */
-	err = btintel_download_firmware_newgen(hdev, fw, boot_param,
+	err = btintel_download_firmware_newgen(hdev, ver, fw, boot_param,
 					       INTEL_HW_VARIANT(ver->cnvi_bt),
 					       ver->sbe_type);
 	if (err < 0) {
+		if (err == -EALREADY) {
+			/* Firmware has already been loaded */
+			set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
+			err = 0;
+			goto done;
+		}
+
 		/* When FW download fails, send Intel Reset to retry
 		 * FW download.
 		 */
@@ -2752,8 +2759,15 @@ static int btusb_intel_download_firmware(struct hci_dev *hdev,
 	set_bit(BTUSB_DOWNLOADING, &data->flags);
 
 	/* Start firmware downloading and get boot parameter */
-	err = btintel_download_firmware(hdev, fw, boot_param);
+	err = btintel_download_firmware(hdev, ver, fw, boot_param);
 	if (err < 0) {
+		if (err == -EALREADY) {
+			/* Firmware has already been loaded */
+			set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
+			err = 0;
+			goto done;
+		}
+
 		/* When FW download fails, send Intel Reset to retry
 		 * FW download.
 		 */
diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c
index b20a40fab83e..7249b91d9b91 100644
--- a/drivers/bluetooth/hci_intel.c
+++ b/drivers/bluetooth/hci_intel.c
@@ -735,7 +735,7 @@ static int intel_setup(struct hci_uart *hu)
 	set_bit(STATE_DOWNLOADING, &intel->flags);
 
 	/* Start firmware downloading and get boot parameter */
-	err = btintel_download_firmware(hdev, fw, &boot_param);
+	err = btintel_download_firmware(hdev, &ver, fw, &boot_param);
 	if (err < 0)
 		goto done;
 
@@ -784,7 +784,10 @@ static int intel_setup(struct hci_uart *hu)
 done:
 	release_firmware(fw);
 
-	if (err < 0)
+	/* Check if there was an error and if is not -EALREADY which means the
+	 * firmware has already been loaded.
+	 */
+	if (err < 0 && err != -EALREADY)
 		return err;
 
 	/* We need to restore the default speed before Intel reset */
-- 
cgit v1.2.3


From 9b16bfbf411664697817385096ce59df453e1fb6 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Tue, 23 Mar 2021 11:58:57 -0700
Subject: Bluetooth: btintel: Move operational checks after version check

In order to allow new firmware to load, it first needs to check if the
firmware version on file matches the one loaded if it doesn't, then it
needs to revert to bootloader mode in order to load the new firmware.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Tested-by: Tedd Ho-Jeong An <tedd.an@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btintel.c | 22 +++++++++++++++
 drivers/bluetooth/btusb.c   | 68 ++++++++++++++++-----------------------------
 2 files changed, 46 insertions(+), 44 deletions(-)

diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c
index 3ff698a0bd25..13bc93a986c7 100644
--- a/drivers/bluetooth/btintel.c
+++ b/drivers/bluetooth/btintel.c
@@ -958,6 +958,17 @@ int btintel_download_firmware(struct hci_dev *hdev,
 		}
 	}
 
+	/* The firmware variant determines if the device is in bootloader
+	 * mode or is running operational firmware. The value 0x06 identifies
+	 * the bootloader and the value 0x23 identifies the operational
+	 * firmware.
+	 *
+	 * If the firmware version has changed that means it needs to be reset
+	 * to bootloader when operational so the new firmware can be loaded.
+	 */
+	if (ver->fw_variant == 0x23)
+		return -EINVAL;
+
 	err = btintel_sfi_rsa_header_secure_send(hdev, fw);
 	if (err)
 		return err;
@@ -985,6 +996,17 @@ int btintel_download_firmware_newgen(struct hci_dev *hdev,
 		return -EALREADY;
 	}
 
+	/* The firmware variant determines if the device is in bootloader
+	 * mode or is running operational firmware. The value 0x01 identifies
+	 * the bootloader and the value 0x03 identifies the operational
+	 * firmware.
+	 *
+	 * If the firmware version has changed that means it needs to be reset
+	 * to bootloader when operational so the new firmware can be loaded.
+	 */
+	if (ver->img_type == 0x03)
+		return -EINVAL;
+
 	/* iBT hardware variants 0x0b, 0x0c, 0x11, 0x12, 0x13, 0x14 support
 	 * only RSA secure boot engine. Hence, the corresponding sfi file will
 	 * have RSA header of 644 bytes followed by Command Buffer.
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index de220ab7f231..a30f7ea9be36 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -2402,11 +2402,17 @@ static int btusb_send_frame_intel(struct hci_dev *hdev, struct sk_buff *skb)
 	return -EILSEQ;
 }
 
-static bool btusb_setup_intel_new_get_fw_name(struct intel_version *ver,
+static int btusb_setup_intel_new_get_fw_name(struct intel_version *ver,
 					     struct intel_boot_params *params,
 					     char *fw_name, size_t len,
 					     const char *suffix)
 {
+	/* The hardware platform number has a fixed value of 0x37 and
+	 * for now only accept this single value.
+	 */
+	if (ver->hw_platform != 0x37)
+		return -EINVAL;
+
 	switch (ver->hw_variant) {
 	case 0x0b:	/* SfP */
 	case 0x0c:	/* WsP */
@@ -2426,9 +2432,10 @@ static bool btusb_setup_intel_new_get_fw_name(struct intel_version *ver,
 			suffix);
 		break;
 	default:
-		return false;
+		return -EINVAL;
 	}
-	return true;
+
+	return 0;
 }
 
 static void btusb_setup_intel_newgen_get_fw_name(const struct intel_version_tlv *ver_tlv,
@@ -2483,7 +2490,6 @@ static int btusb_intel_download_firmware_newgen(struct hci_dev *hdev,
 	if (ver->img_type == 0x03) {
 		clear_bit(BTUSB_BOOTLOADER, &data->flags);
 		btintel_check_bdaddr(hdev);
-		return 0;
 	}
 
 	/* Check for supported iBT hardware variants of this firmware
@@ -2628,35 +2634,6 @@ static int btusb_intel_download_firmware(struct hci_dev *hdev,
 	if (!ver || !params)
 		return -EINVAL;
 
-	/* The hardware platform number has a fixed value of 0x37 and
-	 * for now only accept this single value.
-	 */
-	if (ver->hw_platform != 0x37) {
-		bt_dev_err(hdev, "Unsupported Intel hardware platform (%u)",
-			   ver->hw_platform);
-		return -EINVAL;
-	}
-
-	/* Check for supported iBT hardware variants of this firmware
-	 * loading method.
-	 *
-	 * This check has been put in place to ensure correct forward
-	 * compatibility options when newer hardware variants come along.
-	 */
-	switch (ver->hw_variant) {
-	case 0x0b:	/* SfP */
-	case 0x0c:	/* WsP */
-	case 0x11:	/* JfP */
-	case 0x12:	/* ThP */
-	case 0x13:	/* HrP */
-	case 0x14:	/* CcP */
-		break;
-	default:
-		bt_dev_err(hdev, "Unsupported Intel hardware variant (%u)",
-			   ver->hw_variant);
-		return -EINVAL;
-	}
-
 	btintel_version_info(hdev, ver);
 
 	/* The firmware variant determines if the device is in bootloader
@@ -2675,16 +2652,18 @@ static int btusb_intel_download_firmware(struct hci_dev *hdev,
 	if (ver->fw_variant == 0x23) {
 		clear_bit(BTUSB_BOOTLOADER, &data->flags);
 		btintel_check_bdaddr(hdev);
-		return 0;
-	}
 
-	/* If the device is not in bootloader mode, then the only possible
-	 * choice is to return an error and abort the device initialization.
-	 */
-	if (ver->fw_variant != 0x06) {
-		bt_dev_err(hdev, "Unsupported Intel firmware variant (%u)",
-			   ver->fw_variant);
-		return -ENODEV;
+		/* SfP and WsP don't seem to update the firmware version on file
+		 * so version checking is currently possible.
+		 */
+		switch (ver->hw_variant) {
+		case 0x0b:	/* SfP */
+		case 0x0c:	/* WsP */
+			return 0;
+		}
+
+		/* Proceed to download to check if the version matches */
+		goto download;
 	}
 
 	/* Read the secure boot parameters to identify the operating
@@ -2712,6 +2691,7 @@ static int btusb_intel_download_firmware(struct hci_dev *hdev,
 		set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
 	}
 
+download:
 	/* With this Intel bootloader only the hardware variant and device
 	 * revision information are used to select the right firmware for SfP
 	 * and WsP.
@@ -2735,7 +2715,7 @@ static int btusb_intel_download_firmware(struct hci_dev *hdev,
 	 */
 	err = btusb_setup_intel_new_get_fw_name(ver, params, fwname,
 						sizeof(fwname), "sfi");
-	if (!err) {
+	if (err < 0) {
 		bt_dev_err(hdev, "Unsupported Intel firmware naming");
 		return -EINVAL;
 	}
@@ -2908,7 +2888,7 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
 	err = btusb_setup_intel_new_get_fw_name(&ver, &params, ddcname,
 						sizeof(ddcname), "ddc");
 
-	if (!err) {
+	if (err < 0) {
 		bt_dev_err(hdev, "Unsupported Intel firmware naming");
 	} else {
 		/* Once the device is running in operational mode, it needs to
-- 
cgit v1.2.3


From 0a460d8fe2db6887169a19b048ea0c90f8bdc3b7 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Tue, 23 Mar 2021 11:58:58 -0700
Subject: Bluetooth: btintel: Consolidate intel_version_tlv parsing

This moves version checks of intel_version_tlv() to btintel_version_info_tlv().

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Tested-by: Tedd Ho-Jeong An <tedd.an@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btintel.c | 50 ++++++++++++++++++++++++++++++++++++----
 drivers/bluetooth/btintel.h |  7 +++---
 drivers/bluetooth/btusb.c   | 56 +++------------------------------------------
 3 files changed, 53 insertions(+), 60 deletions(-)

diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c
index 13bc93a986c7..bb2546c4374a 100644
--- a/drivers/bluetooth/btintel.c
+++ b/drivers/bluetooth/btintel.c
@@ -372,10 +372,53 @@ int btintel_read_version(struct hci_dev *hdev, struct intel_version *ver)
 }
 EXPORT_SYMBOL_GPL(btintel_read_version);
 
-void btintel_version_info_tlv(struct hci_dev *hdev, struct intel_version_tlv *version)
+int btintel_version_info_tlv(struct hci_dev *hdev, struct intel_version_tlv *version)
 {
 	const char *variant;
 
+	/* The hardware platform number has a fixed value of 0x37 and
+	 * for now only accept this single value.
+	 */
+	if (INTEL_HW_PLATFORM(version->cnvi_bt) != 0x37) {
+		bt_dev_err(hdev, "Unsupported Intel hardware platform (0x%2x)",
+			   INTEL_HW_PLATFORM(version->cnvi_bt));
+		return -EINVAL;
+	}
+
+	/* Check for supported iBT hardware variants of this firmware
+	 * loading method.
+	 *
+	 * This check has been put in place to ensure correct forward
+	 * compatibility options when newer hardware variants come along.
+	 */
+	switch (INTEL_HW_VARIANT(version->cnvi_bt)) {
+	case 0x17:	/* TyP */
+	case 0x18:	/* Slr */
+	case 0x19:	/* Slr-F */
+		break;
+	default:
+		bt_dev_err(hdev, "Unsupported Intel hardware variant (0x%x)",
+			   INTEL_HW_VARIANT(version->cnvi_bt));
+		return -EINVAL;
+	}
+
+	/* It is required that every single firmware fragment is acknowledged
+	 * with a command complete event. If the boot parameters indicate
+	 * that this bootloader does not send them, then abort the setup.
+	 */
+	if (version->limited_cce != 0x00) {
+		bt_dev_err(hdev, "Unsupported Intel firmware loading method (0x%x)",
+			   version->limited_cce);
+		return -EINVAL;
+	}
+
+	/* Secure boot engine type should be either 1 (ECDSA) or 0 (RSA) */
+	if (version->sbe_type > 0x01) {
+		bt_dev_err(hdev, "Unsupported Intel secure boot engine type (0x%x)",
+			   version->sbe_type);
+		return -EINVAL;
+	}
+
 	switch (version->img_type) {
 	case 0x01:
 		variant = "Bootloader";
@@ -397,15 +440,14 @@ void btintel_version_info_tlv(struct hci_dev *hdev, struct intel_version_tlv *ve
 		break;
 	default:
 		bt_dev_err(hdev, "Unsupported image type(%02x)", version->img_type);
-		goto done;
+		return -EINVAL;
 	}
 
 	bt_dev_info(hdev, "%s timestamp %u.%u buildtype %u build %u", variant,
 		    2000 + (version->timestamp >> 8), version->timestamp & 0xff,
 		    version->build_type, version->build_num);
 
-done:
-	return;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(btintel_version_info_tlv);
 
diff --git a/drivers/bluetooth/btintel.h b/drivers/bluetooth/btintel.h
index 51f1f2c883b4..56f30eaa1e3c 100644
--- a/drivers/bluetooth/btintel.h
+++ b/drivers/bluetooth/btintel.h
@@ -149,7 +149,7 @@ int btintel_set_diag_mfg(struct hci_dev *hdev, bool enable);
 void btintel_hw_error(struct hci_dev *hdev, u8 code);
 
 void btintel_version_info(struct hci_dev *hdev, struct intel_version *ver);
-void btintel_version_info_tlv(struct hci_dev *hdev, struct intel_version_tlv *version);
+int btintel_version_info_tlv(struct hci_dev *hdev, struct intel_version_tlv *version);
 int btintel_secure_send(struct hci_dev *hdev, u8 fragment_type, u32 plen,
 			const void *param);
 int btintel_load_ddc_config(struct hci_dev *hdev, const char *ddc_name);
@@ -216,9 +216,10 @@ static inline void btintel_version_info(struct hci_dev *hdev,
 {
 }
 
-static inline void btintel_version_info_tlv(struct hci_dev *hdev,
-					    struct intel_version_tlv *version)
+static inline int btintel_version_info_tlv(struct hci_dev *hdev,
+					   struct intel_version_tlv *version)
 {
+	return -EOPNOTSUPP;
 }
 
 static inline int btintel_secure_send(struct hci_dev *hdev, u8 fragment_type,
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index a30f7ea9be36..f163f174185b 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -2465,15 +2465,6 @@ static int btusb_intel_download_firmware_newgen(struct hci_dev *hdev,
 	if (!ver || !boot_param)
 		return -EINVAL;
 
-	/* The hardware platform number has a fixed value of 0x37 and
-	 * for now only accept this single value.
-	 */
-	if (INTEL_HW_PLATFORM(ver->cnvi_bt) != 0x37) {
-		bt_dev_err(hdev, "Unsupported Intel hardware platform (0x%2x)",
-			   INTEL_HW_PLATFORM(ver->cnvi_bt));
-		return -EINVAL;
-	}
-
 	/* The firmware variant determines if the device is in bootloader
 	 * mode or is running operational firmware. The value 0x03 identifies
 	 * the bootloader and the value 0x23 identifies the operational
@@ -2492,49 +2483,6 @@ static int btusb_intel_download_firmware_newgen(struct hci_dev *hdev,
 		btintel_check_bdaddr(hdev);
 	}
 
-	/* Check for supported iBT hardware variants of this firmware
-	 * loading method.
-	 *
-	 * This check has been put in place to ensure correct forward
-	 * compatibility options when newer hardware variants come along.
-	 */
-	switch (INTEL_HW_VARIANT(ver->cnvi_bt)) {
-	case 0x17:	/* TyP */
-	case 0x18:	/* Slr */
-	case 0x19:	/* Slr-F */
-		break;
-	default:
-		bt_dev_err(hdev, "Unsupported Intel hardware variant (0x%x)",
-			   INTEL_HW_VARIANT(ver->cnvi_bt));
-		return -EINVAL;
-	}
-
-	/* If the device is not in bootloader mode, then the only possible
-	 * choice is to return an error and abort the device initialization.
-	 */
-	if (ver->img_type != 0x01) {
-		bt_dev_err(hdev, "Unsupported Intel firmware variant (0x%x)",
-			   ver->img_type);
-		return -ENODEV;
-	}
-
-	/* It is required that every single firmware fragment is acknowledged
-	 * with a command complete event. If the boot parameters indicate
-	 * that this bootloader does not send them, then abort the setup.
-	 */
-	if (ver->limited_cce != 0x00) {
-		bt_dev_err(hdev, "Unsupported Intel firmware loading method (0x%x)",
-			   ver->limited_cce);
-		return -EINVAL;
-	}
-
-	/* Secure boot engine type should be either 1 (ECDSA) or 0 (RSA) */
-	if (ver->sbe_type > 0x01) {
-		bt_dev_err(hdev, "Unsupported Intel secure boot engine type (0x%x)",
-			   ver->sbe_type);
-		return -EINVAL;
-	}
-
 	/* If the OTP has no valid Bluetooth device address, then there will
 	 * also be no valid address for the operational firmware.
 	 */
@@ -2972,7 +2920,9 @@ static int btusb_setup_intel_newgen(struct hci_dev *hdev)
 		return err;
 	}
 
-	btintel_version_info_tlv(hdev, &version);
+	err = btintel_version_info_tlv(hdev, &version);
+	if (err)
+		return err;
 
 	err = btusb_intel_download_firmware_newgen(hdev, &version, &boot_param);
 	if (err)
-- 
cgit v1.2.3


From d68903da4e220d1e6b7c6ecdb853c36144c6acc9 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Tue, 23 Mar 2021 11:58:59 -0700
Subject: Bluetooth: btintel: Consolidate intel_version parsing

This moves version checks of intel_version() to btintel_version_info().

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Tested-by: Tedd Ho-Jeong An <tedd.an@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btintel.c | 36 ++++++++++++++++++++++++++++++++++--
 drivers/bluetooth/btintel.h |  7 ++++---
 drivers/bluetooth/btusb.c   | 12 ++++--------
 3 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c
index bb2546c4374a..bddaa4f32566 100644
--- a/drivers/bluetooth/btintel.c
+++ b/drivers/bluetooth/btintel.c
@@ -216,10 +216,39 @@ void btintel_hw_error(struct hci_dev *hdev, u8 code)
 }
 EXPORT_SYMBOL_GPL(btintel_hw_error);
 
-void btintel_version_info(struct hci_dev *hdev, struct intel_version *ver)
+int btintel_version_info(struct hci_dev *hdev, struct intel_version *ver)
 {
 	const char *variant;
 
+	/* The hardware platform number has a fixed value of 0x37 and
+	 * for now only accept this single value.
+	 */
+	if (ver->hw_platform != 0x37) {
+		bt_dev_err(hdev, "Unsupported Intel hardware platform (%u)",
+			   ver->hw_platform);
+		return -EINVAL;
+	}
+
+	/* Check for supported iBT hardware variants of this firmware
+	 * loading method.
+	 *
+	 * This check has been put in place to ensure correct forward
+	 * compatibility options when newer hardware variants come along.
+	 */
+	switch (ver->hw_variant) {
+	case 0x0b:      /* SfP */
+	case 0x0c:      /* WsP */
+	case 0x11:      /* JfP */
+	case 0x12:      /* ThP */
+	case 0x13:      /* HrP */
+	case 0x14:      /* CcP */
+		break;
+	default:
+		bt_dev_err(hdev, "Unsupported Intel hardware variant (%u)",
+			   ver->hw_variant);
+		return -EINVAL;
+	}
+
 	switch (ver->fw_variant) {
 	case 0x06:
 		variant = "Bootloader";
@@ -228,13 +257,16 @@ void btintel_version_info(struct hci_dev *hdev, struct intel_version *ver)
 		variant = "Firmware";
 		break;
 	default:
-		return;
+		bt_dev_err(hdev, "Unsupported firmware variant(%02x)", ver->fw_variant);
+		return -EINVAL;
 	}
 
 	bt_dev_info(hdev, "%s revision %u.%u build %u week %u %u",
 		    variant, ver->fw_revision >> 4, ver->fw_revision & 0x0f,
 		    ver->fw_build_num, ver->fw_build_ww,
 		    2000 + ver->fw_build_yy);
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(btintel_version_info);
 
diff --git a/drivers/bluetooth/btintel.h b/drivers/bluetooth/btintel.h
index 56f30eaa1e3c..d184064a5e7c 100644
--- a/drivers/bluetooth/btintel.h
+++ b/drivers/bluetooth/btintel.h
@@ -148,7 +148,7 @@ int btintel_set_diag(struct hci_dev *hdev, bool enable);
 int btintel_set_diag_mfg(struct hci_dev *hdev, bool enable);
 void btintel_hw_error(struct hci_dev *hdev, u8 code);
 
-void btintel_version_info(struct hci_dev *hdev, struct intel_version *ver);
+int btintel_version_info(struct hci_dev *hdev, struct intel_version *ver);
 int btintel_version_info_tlv(struct hci_dev *hdev, struct intel_version_tlv *version);
 int btintel_secure_send(struct hci_dev *hdev, u8 fragment_type, u32 plen,
 			const void *param);
@@ -211,9 +211,10 @@ static inline void btintel_hw_error(struct hci_dev *hdev, u8 code)
 {
 }
 
-static inline void btintel_version_info(struct hci_dev *hdev,
-					struct intel_version *ver)
+static inline int btintel_version_info(struct hci_dev *hdev,
+				       struct intel_version *ver)
 {
+	return -EOPNOTSUPP;
 }
 
 static inline int btintel_version_info_tlv(struct hci_dev *hdev,
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index f163f174185b..2d3498b74296 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -2407,12 +2407,6 @@ static int btusb_setup_intel_new_get_fw_name(struct intel_version *ver,
 					     char *fw_name, size_t len,
 					     const char *suffix)
 {
-	/* The hardware platform number has a fixed value of 0x37 and
-	 * for now only accept this single value.
-	 */
-	if (ver->hw_platform != 0x37)
-		return -EINVAL;
-
 	switch (ver->hw_variant) {
 	case 0x0b:	/* SfP */
 	case 0x0c:	/* WsP */
@@ -2582,8 +2576,6 @@ static int btusb_intel_download_firmware(struct hci_dev *hdev,
 	if (!ver || !params)
 		return -EINVAL;
 
-	btintel_version_info(hdev, ver);
-
 	/* The firmware variant determines if the device is in bootloader
 	 * mode or is running operational firmware. The value 0x06 identifies
 	 * the bootloader and the value 0x23 identifies the operational
@@ -2776,6 +2768,10 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
 		return err;
 	}
 
+	err = btintel_version_info(hdev, &ver);
+	if (err)
+		return err;
+
 	err = btusb_intel_download_firmware(hdev, &ver, &params, &boot_param);
 	if (err)
 		return err;
-- 
cgit v1.2.3


From 21e31c6501d9c24dae8869628ccc80b36f2a3d01 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Tue, 23 Mar 2021 11:59:00 -0700
Subject: Bluetooth: btusb: Consolidate code for waiting firmware download

This moves duplicated code for waiting firmware download completion to
a function that can be reused.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Tested-by: Tedd Ho-Jeong An <tedd.an@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btusb.c | 108 +++++++++++++++++++++-------------------------
 1 file changed, 48 insertions(+), 60 deletions(-)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 2d3498b74296..dcdc434cf794 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -2447,6 +2447,44 @@ static void btusb_setup_intel_newgen_get_fw_name(const struct intel_version_tlv
 		 suffix);
 }
 
+static int btusb_download_wait(struct hci_dev *hdev, ktime_t calltime, int msec)
+{
+	struct btusb_data *data = hci_get_drvdata(hdev);
+	ktime_t delta, rettime;
+	unsigned long long duration;
+	int err;
+
+	set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
+
+	bt_dev_info(hdev, "Waiting for firmware download to complete");
+
+	err = wait_on_bit_timeout(&data->flags, BTUSB_DOWNLOADING,
+				  TASK_INTERRUPTIBLE,
+				  msecs_to_jiffies(msec));
+	if (err == -EINTR) {
+		bt_dev_err(hdev, "Firmware loading interrupted");
+		return err;
+	}
+
+	if (err) {
+		bt_dev_err(hdev, "Firmware loading timeout");
+		return -ETIMEDOUT;
+	}
+
+	if (test_bit(BTUSB_FIRMWARE_FAILED, &data->flags)) {
+		bt_dev_err(hdev, "Firmware loading failed");
+		return -ENOEXEC;
+	}
+
+	rettime = ktime_get();
+	delta = ktime_sub(rettime, calltime);
+	duration = (unsigned long long)ktime_to_ns(delta) >> 10;
+
+	bt_dev_info(hdev, "Firmware loaded in %llu usecs", duration);
+
+	return 0;
+}
+
 static int btusb_intel_download_firmware_newgen(struct hci_dev *hdev,
 						struct intel_version_tlv *ver,
 						u32 *boot_param)
@@ -2455,6 +2493,7 @@ static int btusb_intel_download_firmware_newgen(struct hci_dev *hdev,
 	char fwname[64];
 	int err;
 	struct btusb_data *data = hci_get_drvdata(hdev);
+	ktime_t calltime;
 
 	if (!ver || !boot_param)
 		return -EINVAL;
@@ -2502,6 +2541,8 @@ static int btusb_intel_download_firmware_newgen(struct hci_dev *hdev,
 		goto done;
 	}
 
+	calltime = ktime_get();
+
 	set_bit(BTUSB_DOWNLOADING, &data->flags);
 
 	/* Start firmware downloading and get boot parameter */
@@ -2522,9 +2563,6 @@ static int btusb_intel_download_firmware_newgen(struct hci_dev *hdev,
 		btintel_reset_to_bootloader(hdev);
 		goto done;
 	}
-	set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
-
-	bt_dev_info(hdev, "Waiting for firmware download to complete");
 
 	/* Before switching the device into operational mode and with that
 	 * booting the loaded firmware, wait for the bootloader notification
@@ -2537,26 +2575,9 @@ static int btusb_intel_download_firmware_newgen(struct hci_dev *hdev,
 	 * and thus just timeout if that happens and fail the setup
 	 * of this device.
 	 */
-	err = wait_on_bit_timeout(&data->flags, BTUSB_DOWNLOADING,
-				  TASK_INTERRUPTIBLE,
-				  msecs_to_jiffies(5000));
-	if (err == -EINTR) {
-		bt_dev_err(hdev, "Firmware loading interrupted");
-		goto done;
-	}
-
-	if (err) {
-		bt_dev_err(hdev, "Firmware loading timeout");
-		err = -ETIMEDOUT;
+	err = btusb_download_wait(hdev, calltime, 5000);
+	if (err == -ETIMEDOUT)
 		btintel_reset_to_bootloader(hdev);
-		goto done;
-	}
-
-	if (test_bit(BTUSB_FIRMWARE_FAILED, &data->flags)) {
-		bt_dev_err(hdev, "Firmware loading failed");
-		err = -ENOEXEC;
-		goto done;
-	}
 
 done:
 	release_firmware(fw);
@@ -2572,6 +2593,7 @@ static int btusb_intel_download_firmware(struct hci_dev *hdev,
 	char fwname[64];
 	int err;
 	struct btusb_data *data = hci_get_drvdata(hdev);
+	ktime_t calltime;
 
 	if (!ver || !params)
 		return -EINVAL;
@@ -2676,6 +2698,8 @@ download:
 		goto done;
 	}
 
+	calltime = ktime_get();
+
 	set_bit(BTUSB_DOWNLOADING, &data->flags);
 
 	/* Start firmware downloading and get boot parameter */
@@ -2694,9 +2718,6 @@ download:
 		btintel_reset_to_bootloader(hdev);
 		goto done;
 	}
-	set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
-
-	bt_dev_info(hdev, "Waiting for firmware download to complete");
 
 	/* Before switching the device into operational mode and with that
 	 * booting the loaded firmware, wait for the bootloader notification
@@ -2709,26 +2730,9 @@ download:
 	 * and thus just timeout if that happens and fail the setup
 	 * of this device.
 	 */
-	err = wait_on_bit_timeout(&data->flags, BTUSB_DOWNLOADING,
-				  TASK_INTERRUPTIBLE,
-				  msecs_to_jiffies(5000));
-	if (err == -EINTR) {
-		bt_dev_err(hdev, "Firmware loading interrupted");
-		goto done;
-	}
-
-	if (err) {
-		bt_dev_err(hdev, "Firmware loading timeout");
-		err = -ETIMEDOUT;
+	err = btusb_download_wait(hdev, calltime, 5000);
+	if (err == -ETIMEDOUT)
 		btintel_reset_to_bootloader(hdev);
-		goto done;
-	}
-
-	if (test_bit(BTUSB_FIRMWARE_FAILED, &data->flags)) {
-		bt_dev_err(hdev, "Firmware loading failed");
-		err = -ENOEXEC;
-		goto done;
-	}
 
 done:
 	release_firmware(fw);
@@ -2755,8 +2759,6 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
 	 */
 	boot_param = 0x00000000;
 
-	calltime = ktime_get();
-
 	/* Read the Intel version information to determine if the device
 	 * is in bootloader mode or if it already has operational firmware
 	 * loaded.
@@ -2780,12 +2782,6 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
 	if (ver.fw_variant == 0x23)
 		goto finish;
 
-	rettime = ktime_get();
-	delta = ktime_sub(rettime, calltime);
-	duration = (unsigned long long) ktime_to_ns(delta) >> 10;
-
-	bt_dev_info(hdev, "Firmware loaded in %llu usecs", duration);
-
 	calltime = ktime_get();
 
 	set_bit(BTUSB_BOOTING, &data->flags);
@@ -2903,8 +2899,6 @@ static int btusb_setup_intel_newgen(struct hci_dev *hdev)
 	 */
 	boot_param = 0x00000000;
 
-	calltime = ktime_get();
-
 	/* Read the Intel version information to determine if the device
 	 * is in bootloader mode or if it already has operational firmware
 	 * loaded.
@@ -2928,12 +2922,6 @@ static int btusb_setup_intel_newgen(struct hci_dev *hdev)
 	if (version.img_type == 0x03)
 		goto finish;
 
-	rettime = ktime_get();
-	delta = ktime_sub(rettime, calltime);
-	duration = (unsigned long long)ktime_to_ns(delta) >> 10;
-
-	bt_dev_info(hdev, "Firmware loaded in %llu usecs", duration);
-
 	calltime = ktime_get();
 
 	set_bit(BTUSB_BOOTING, &data->flags);
-- 
cgit v1.2.3


From 604b3cf87fd217ac7d7aeaa94b0bebdf139a8c88 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Tue, 23 Mar 2021 11:59:01 -0700
Subject: Bluetooth: btusb: Consolidate code for waiting firmware to boot

This moves duplicated code for waiting firmware download completion to
a function that can be reused.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Tested-by: Tedd Ho-Jeong An <tedd.an@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btusb.c | 148 +++++++++++++++++++++-------------------------
 1 file changed, 66 insertions(+), 82 deletions(-)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index dcdc434cf794..eb250f533265 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -2739,6 +2739,68 @@ done:
 	return err;
 }
 
+static int btusb_boot_wait(struct hci_dev *hdev, ktime_t calltime, int msec)
+{
+	struct btusb_data *data = hci_get_drvdata(hdev);
+	ktime_t delta, rettime;
+	unsigned long long duration;
+	int err;
+
+	bt_dev_info(hdev, "Waiting for device to boot");
+
+	err = wait_on_bit_timeout(&data->flags, BTUSB_BOOTING,
+				  TASK_INTERRUPTIBLE,
+				  msecs_to_jiffies(msec));
+	if (err == -EINTR) {
+		bt_dev_err(hdev, "Device boot interrupted");
+		return -EINTR;
+	}
+
+	if (err) {
+		bt_dev_err(hdev, "Device boot timeout");
+		return -ETIMEDOUT;
+	}
+
+	rettime = ktime_get();
+	delta = ktime_sub(rettime, calltime);
+	duration = (unsigned long long) ktime_to_ns(delta) >> 10;
+
+	bt_dev_info(hdev, "Device booted in %llu usecs", duration);
+
+	return 0;
+}
+
+static int btusb_intel_boot(struct hci_dev *hdev, u32 boot_addr)
+{
+	struct btusb_data *data = hci_get_drvdata(hdev);
+	ktime_t calltime;
+	int err;
+
+	calltime = ktime_get();
+
+	set_bit(BTUSB_BOOTING, &data->flags);
+
+	err = btintel_send_intel_reset(hdev, boot_addr);
+	if (err) {
+		bt_dev_err(hdev, "Intel Soft Reset failed (%d)", err);
+		btintel_reset_to_bootloader(hdev);
+		return err;
+	}
+
+	/* The bootloader will not indicate when the device is ready. This
+	 * is done by the operational firmware sending bootup notification.
+	 *
+	 * Booting into operational firmware should not take longer than
+	 * 1 second. However if that happens, then just fail the setup
+	 * since something went wrong.
+	 */
+	err = btusb_boot_wait(hdev, calltime, 1000);
+	if (err == -ETIMEDOUT)
+		btintel_reset_to_bootloader(hdev);
+
+	return err;
+}
+
 static int btusb_setup_intel_new(struct hci_dev *hdev)
 {
 	struct btusb_data *data = hci_get_drvdata(hdev);
@@ -2746,8 +2808,6 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
 	struct intel_boot_params params;
 	u32 boot_param;
 	char ddcname[64];
-	ktime_t calltime, delta, rettime;
-	unsigned long long duration;
 	int err;
 	struct intel_debug_features features;
 
@@ -2782,46 +2842,9 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
 	if (ver.fw_variant == 0x23)
 		goto finish;
 
-	calltime = ktime_get();
-
-	set_bit(BTUSB_BOOTING, &data->flags);
-
-	err = btintel_send_intel_reset(hdev, boot_param);
-	if (err) {
-		bt_dev_err(hdev, "Intel Soft Reset failed (%d)", err);
-		btintel_reset_to_bootloader(hdev);
+	err = btusb_intel_boot(hdev, boot_param);
+	if (err)
 		return err;
-	}
-
-	/* The bootloader will not indicate when the device is ready. This
-	 * is done by the operational firmware sending bootup notification.
-	 *
-	 * Booting into operational firmware should not take longer than
-	 * 1 second. However if that happens, then just fail the setup
-	 * since something went wrong.
-	 */
-	bt_dev_info(hdev, "Waiting for device to boot");
-
-	err = wait_on_bit_timeout(&data->flags, BTUSB_BOOTING,
-				  TASK_INTERRUPTIBLE,
-				  msecs_to_jiffies(1000));
-
-	if (err == -EINTR) {
-		bt_dev_err(hdev, "Device boot interrupted");
-		return -EINTR;
-	}
-
-	if (err) {
-		bt_dev_err(hdev, "Device boot timeout");
-		btintel_reset_to_bootloader(hdev);
-		return -ETIMEDOUT;
-	}
-
-	rettime = ktime_get();
-	delta = ktime_sub(rettime, calltime);
-	duration = (unsigned long long) ktime_to_ns(delta) >> 10;
-
-	bt_dev_info(hdev, "Device booted in %llu usecs", duration);
 
 	clear_bit(BTUSB_BOOTLOADER, &data->flags);
 
@@ -2885,8 +2908,6 @@ static int btusb_setup_intel_newgen(struct hci_dev *hdev)
 	struct btusb_data *data = hci_get_drvdata(hdev);
 	u32 boot_param;
 	char ddcname[64];
-	ktime_t calltime, delta, rettime;
-	unsigned long long duration;
 	int err;
 	struct intel_debug_features features;
 	struct intel_version_tlv version;
@@ -2922,46 +2943,9 @@ static int btusb_setup_intel_newgen(struct hci_dev *hdev)
 	if (version.img_type == 0x03)
 		goto finish;
 
-	calltime = ktime_get();
-
-	set_bit(BTUSB_BOOTING, &data->flags);
-
-	err = btintel_send_intel_reset(hdev, boot_param);
-	if (err) {
-		bt_dev_err(hdev, "Intel Soft Reset failed (%d)", err);
-		btintel_reset_to_bootloader(hdev);
+	err = btusb_intel_boot(hdev, boot_param);
+	if (err)
 		return err;
-	}
-
-	/* The bootloader will not indicate when the device is ready. This
-	 * is done by the operational firmware sending bootup notification.
-	 *
-	 * Booting into operational firmware should not take longer than
-	 * 1 second. However if that happens, then just fail the setup
-	 * since something went wrong.
-	 */
-	bt_dev_info(hdev, "Waiting for device to boot");
-
-	err = wait_on_bit_timeout(&data->flags, BTUSB_BOOTING,
-				  TASK_INTERRUPTIBLE,
-				  msecs_to_jiffies(1000));
-
-	if (err == -EINTR) {
-		bt_dev_err(hdev, "Device boot interrupted");
-		return -EINTR;
-	}
-
-	if (err) {
-		bt_dev_err(hdev, "Device boot timeout");
-		btintel_reset_to_bootloader(hdev);
-		return -ETIMEDOUT;
-	}
-
-	rettime = ktime_get();
-	delta = ktime_sub(rettime, calltime);
-	duration = (unsigned long long)ktime_to_ns(delta) >> 10;
-
-	bt_dev_info(hdev, "Device booted in %llu usecs", duration);
 
 	clear_bit(BTUSB_BOOTLOADER, &data->flags);
 
-- 
cgit v1.2.3


From 7de3a42cdc701f4fb09bb4906d69f4014db4af09 Mon Sep 17 00:00:00 2001
From: Lokendra Singh <lokendra.singh@intel.com>
Date: Tue, 23 Mar 2021 11:59:02 -0700
Subject: Bluetooth: btintel: Reorganized bootloader mode tlv checks in
 intel_version_tlv parsing

This moves limited_cce and sbe_type checks under bootloader during tlv parsing
as operational firmware don't have access to these values. Any attempt to read
such values in operational firmware will only fetch garbage data.

Signed-off-by: Lokendra Singh <lokendra.singh@intel.com>
Signed-off-by: Kiran K <kiran.k@intel.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Tested-by: Tedd Ho-Jeong An <tedd.an@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btintel.c | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c
index bddaa4f32566..4ddbf895c382 100644
--- a/drivers/bluetooth/btintel.c
+++ b/drivers/bluetooth/btintel.c
@@ -434,26 +434,26 @@ int btintel_version_info_tlv(struct hci_dev *hdev, struct intel_version_tlv *ver
 		return -EINVAL;
 	}
 
-	/* It is required that every single firmware fragment is acknowledged
-	 * with a command complete event. If the boot parameters indicate
-	 * that this bootloader does not send them, then abort the setup.
-	 */
-	if (version->limited_cce != 0x00) {
-		bt_dev_err(hdev, "Unsupported Intel firmware loading method (0x%x)",
-			   version->limited_cce);
-		return -EINVAL;
-	}
-
-	/* Secure boot engine type should be either 1 (ECDSA) or 0 (RSA) */
-	if (version->sbe_type > 0x01) {
-		bt_dev_err(hdev, "Unsupported Intel secure boot engine type (0x%x)",
-			   version->sbe_type);
-		return -EINVAL;
-	}
-
 	switch (version->img_type) {
 	case 0x01:
 		variant = "Bootloader";
+		/* It is required that every single firmware fragment is acknowledged
+		 * with a command complete event. If the boot parameters indicate
+		 * that this bootloader does not send them, then abort the setup.
+		 */
+		if (version->limited_cce != 0x00) {
+			bt_dev_err(hdev, "Unsupported Intel firmware loading method (0x%x)",
+				   version->limited_cce);
+			return -EINVAL;
+		}
+
+		/* Secure boot engine type should be either 1 (ECDSA) or 0 (RSA) */
+		if (version->sbe_type > 0x01) {
+			bt_dev_err(hdev, "Unsupported Intel secure boot engine type (0x%x)",
+				   version->sbe_type);
+			return -EINVAL;
+		}
+
 		bt_dev_info(hdev, "Device revision is %u", version->dev_rev_id);
 		bt_dev_info(hdev, "Secure boot is %s",
 			    version->secure_boot ? "enabled" : "disabled");
-- 
cgit v1.2.3


From 88981354730ce118a76aa030d19fd1308cd2007b Mon Sep 17 00:00:00 2001
From: Lokendra Singh <lokendra.singh@intel.com>
Date: Tue, 23 Mar 2021 11:59:03 -0700
Subject: Bluetooth: btintel: Collect tlv based active firmware build info in
 FW mode

In Operational firmware mode, 'Minimum FW version' TLV ID is not available.
So, we cannot fetch already running firmware info for comparison against
another build. However, It can be collected using a combination of other
TLV ID's information.

Signed-off-by: Lokendra Singh <lokendra.singh@intel.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Tested-by: Tedd Ho-Jeong An <tedd.an@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btintel.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c
index 4ddbf895c382..6442acba76d1 100644
--- a/drivers/bluetooth/btintel.c
+++ b/drivers/bluetooth/btintel.c
@@ -537,12 +537,23 @@ int btintel_read_version_tlv(struct hci_dev *hdev, struct intel_version_tlv *ver
 			version->img_type = tlv->val[0];
 			break;
 		case INTEL_TLV_TIME_STAMP:
+			/* If image type is Operational firmware (0x03), then
+			 * running FW Calendar Week and Year information can
+			 * be extracted from Timestamp information
+			 */
+			version->min_fw_build_cw = tlv->val[0];
+			version->min_fw_build_yy = tlv->val[1];
 			version->timestamp = get_unaligned_le16(tlv->val);
 			break;
 		case INTEL_TLV_BUILD_TYPE:
 			version->build_type = tlv->val[0];
 			break;
 		case INTEL_TLV_BUILD_NUM:
+			/* If image type is Operational firmware (0x03), then
+			 * running FW build number can be extracted from the
+			 * Build information
+			 */
+			version->min_fw_build_nn = tlv->val[0];
 			version->build_num = get_unaligned_le32(tlv->val);
 			break;
 		case INTEL_TLV_SECURE_BOOT:
-- 
cgit v1.2.3


From 1f4ec585e6618ad75d5a82e58482ea2c5b4b7167 Mon Sep 17 00:00:00 2001
From: Lokendra Singh <lokendra.singh@intel.com>
Date: Tue, 23 Mar 2021 11:59:04 -0700
Subject: Bluetooth: btintel: Skip reading firmware file version while in
 bootloader mode

This skips parsing the firmware version information from the firmware
file while controller is in bootloader mode. As in bootloader mode,
we are supposed to patch unconditionally.

Signed-off-by: Lokendra Singh <lokendra.singh@intel.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Tested-by: Tedd Ho-Jeong An <tedd.an@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btintel.c | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c
index 6442acba76d1..e44b6993cf91 100644
--- a/drivers/bluetooth/btintel.c
+++ b/drivers/bluetooth/btintel.c
@@ -1031,6 +1031,10 @@ int btintel_download_firmware(struct hci_dev *hdev,
 		/* Skip version checking */
 		break;
 	default:
+		/* Skip reading firmware file version in bootloader mode */
+		if (ver->fw_variant == 0x06)
+			break;
+
 		/* Skip download if firmware has the same version */
 		if (btintel_firmware_version(hdev, ver->fw_build_num,
 					     ver->fw_build_ww, ver->fw_build_yy,
@@ -1070,15 +1074,19 @@ int btintel_download_firmware_newgen(struct hci_dev *hdev,
 	int err;
 	u32 css_header_ver;
 
-	/* Skip download if firmware has the same version */
-	if (btintel_firmware_version(hdev, ver->min_fw_build_nn,
-				     ver->min_fw_build_cw, ver->min_fw_build_yy,
-				     fw, boot_param)) {
-		bt_dev_info(hdev, "Firmware already loaded");
-		/* Return -EALREADY to indicate that firmware has already been
-		 * loaded.
-		 */
-		return -EALREADY;
+	/* Skip reading firmware file version in bootloader mode */
+	if (ver->img_type != 0x01) {
+		/* Skip download if firmware has the same version */
+		if (btintel_firmware_version(hdev, ver->min_fw_build_nn,
+					     ver->min_fw_build_cw,
+					     ver->min_fw_build_yy,
+					     fw, boot_param)) {
+			bt_dev_info(hdev, "Firmware already loaded");
+			/* Return -EALREADY to indicate that firmware has
+			 * already been loaded.
+			 */
+			return -EALREADY;
+		}
 	}
 
 	/* The firmware variant determines if the device is in bootloader
-- 
cgit v1.2.3


From 5153ceb9e622f4e27de461404edc73324da70f8c Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Thu, 25 Mar 2021 10:05:44 +0530
Subject: Bluetooth: L2CAP: Rudimentary typo fixes

s/minium/minimum/
s/procdure/procedure/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 56e1975cdef1..7641fdfb2628 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1694,7 +1694,7 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn)
 		smp_conn_security(hcon, hcon->pending_sec_level);
 
 	/* For LE slave connections, make sure the connection interval
-	 * is in the range of the minium and maximum interval that has
+	 * is in the range of the minimum and maximum interval that has
 	 * been configured for this connection. If not, then trigger
 	 * the connection update procedure.
 	 */
@@ -7552,7 +7552,7 @@ static void l2cap_data_channel(struct l2cap_conn *conn, u16 cid,
 	BT_DBG("chan %p, len %d", chan, skb->len);
 
 	/* If we receive data on a fixed channel before the info req/rsp
-	 * procdure is done simply assume that the channel is supported
+	 * procedure is done simply assume that the channel is supported
 	 * and mark it as ready.
 	 */
 	if (chan->chan_type == L2CAP_CHAN_FIXED)
-- 
cgit v1.2.3


From 80847a71b270b172814dc7562f3eb59507fca61e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 26 Mar 2021 00:41:34 +0100
Subject: bpf: Undo ptr_to_map_key alu sanitation for now

Remove PTR_TO_MAP_KEY for the time being from being sanitized on pointer ALU
through sanitize_ptr_alu() mainly for 3 reasons:

  1) It's currently unused and not available from unprivileged. However that by
     itself is not yet a strong reason to drop the code.

  2) Commit 69c087ba6225 ("bpf: Add bpf_for_each_map_elem() helper") implemented
     the sanitation not fully correct in that unlike stack or map_value pointer
     it doesn't probe whether the access to the map key /after/ the simulated ALU
     operation is still in bounds. This means that the generated mask can truncate
     the offset in the non-speculative domain whereas it should only truncate in
     the speculative domain. The verifier should instead reject such program as
     we do for other types.

  3) Given the recent fixes from f232326f6966 ("bpf: Prohibit alu ops for pointer
     types not defining ptr_limit"), 10d2bb2e6b1d ("bpf: Fix off-by-one for area
     size in creating mask to left"), b5871dca250c ("bpf: Simplify alu_limit masking
     for pointer arithmetic") as well as 1b1597e64e1a ("bpf: Add sanity check for
     upper ptr_limit") the code changed quite a bit and the merge in efd13b71a3fa
     broke the PTR_TO_MAP_KEY case due to an incorrect merge conflict.

Remove the relevant pieces for the time being and we can rework the PTR_TO_MAP_KEY
case once everything settles.

Fixes: efd13b71a3fa ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net")
Fixes: 69c087ba6225 ("bpf: Add bpf_for_each_map_elem() helper")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/verifier.c | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 210169c25ead..85f9f842d15c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6062,19 +6062,6 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
 		else
 			*ptr_limit = -off - 1;
 		return *ptr_limit >= max ? -ERANGE : 0;
-	case PTR_TO_MAP_KEY:
-		/* Currently, this code is not exercised as the only use
-		 * is bpf_for_each_map_elem() helper which requires
-		 * bpf_capble. The code has been tested manually for
-		 * future use.
-		 */
-		if (mask_to_left) {
-			*ptr_limit = ptr_reg->umax_value + ptr_reg->off;
-		} else {
-			off = ptr_reg->smin_value + ptr_reg->off;
-			*ptr_limit = ptr_reg->map_ptr->key_size - off;
-		}
-		return 0;
 	case PTR_TO_MAP_VALUE:
 		max = ptr_reg->map_ptr->value_size;
 		if (mask_to_left) {
@@ -6281,7 +6268,6 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
 			dst, reg_type_str[ptr_reg->type]);
 		return -EACCES;
-	case PTR_TO_MAP_KEY:
 	case PTR_TO_MAP_VALUE:
 		if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) {
 			verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n",
-- 
cgit v1.2.3


From ed3038158e7b58dcf966cb7ec4ae98d152a5b794 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 24 Mar 2021 18:11:55 -0700
Subject: ethtool: fec: fix typo in kdoc

s/porte/the port/

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index cde753bb2093..1433d6278018 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1378,7 +1378,7 @@ struct ethtool_per_queue_op {
 /**
  * struct ethtool_fecparam - Ethernet forward error correction(fec) parameters
  * @cmd: Command number = %ETHTOOL_GFECPARAM or %ETHTOOL_SFECPARAM
- * @active_fec: FEC mode which is active on porte
+ * @active_fec: FEC mode which is active on the port
  * @fec: Bitmask of supported/configured FEC modes
  * @rsvd: Reserved for future extensions. i.e FEC bypass feature.
  *
-- 
cgit v1.2.3


From 408386817a9d32c88c9ac528749e9999d0e3f6a1 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 24 Mar 2021 18:11:56 -0700
Subject: ethtool: fec: remove long structure description

Digging through the mailing list archive @autoneg was part
of the first version of the RFC, this left over comment was
pointed out twice in review but wasn't removed.

The sentence is an exact copy-paste from pauseparam.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 1433d6278018..36bf435d232c 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1381,10 +1381,6 @@ struct ethtool_per_queue_op {
  * @active_fec: FEC mode which is active on the port
  * @fec: Bitmask of supported/configured FEC modes
  * @rsvd: Reserved for future extensions. i.e FEC bypass feature.
- *
- * Drivers should reject a non-zero setting of @autoneg when
- * autoneogotiation is disabled (or not supported) for the link.
- *
  */
 struct ethtool_fecparam {
 	__u32   cmd;
-- 
cgit v1.2.3


From 240e114411e74d2ee8121643e0c67717eb7c6982 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 24 Mar 2021 18:11:57 -0700
Subject: ethtool: fec: sanitize ethtool_fecparam->reserved

struct ethtool_fecparam::reserved is never looked at by the core.
Make sure it's actually 0. Unfortunately we can't return an error
because old ethtool doesn't zero-initialize the structure for SET.
On GET we can be more verbose, there are no in tree (ab)users.

Fix up the kdoc on the structure. Remove the mention of FEC
bypass. Seems like a niche thing to configure in the first
place.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 2 +-
 net/ethtool/ioctl.c          | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 36bf435d232c..9e2682a67460 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1380,7 +1380,7 @@ struct ethtool_per_queue_op {
  * @cmd: Command number = %ETHTOOL_GFECPARAM or %ETHTOOL_SFECPARAM
  * @active_fec: FEC mode which is active on the port
  * @fec: Bitmask of supported/configured FEC modes
- * @rsvd: Reserved for future extensions. i.e FEC bypass feature.
+ * @reserved: Reserved for future extensions, ignore on GET, write 0 for SET.
  */
 struct ethtool_fecparam {
 	__u32   cmd;
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 0788cc3b3114..be3549023d89 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -2568,6 +2568,9 @@ static int ethtool_get_fecparam(struct net_device *dev, void __user *useraddr)
 	if (rc)
 		return rc;
 
+	if (WARN_ON_ONCE(fecparam.reserved))
+		fecparam.reserved = 0;
+
 	if (copy_to_user(useraddr, &fecparam, sizeof(fecparam)))
 		return -EFAULT;
 	return 0;
@@ -2583,6 +2586,8 @@ static int ethtool_set_fecparam(struct net_device *dev, void __user *useraddr)
 	if (copy_from_user(&fecparam, useraddr, sizeof(fecparam)))
 		return -EFAULT;
 
+	fecparam.reserved = 0;
+
 	return dev->ethtool_ops->set_fecparam(dev, &fecparam);
 }
 
-- 
cgit v1.2.3


From d3b37fc805d9ef697451730ebdfc7e35e6c2ace8 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 24 Mar 2021 18:11:58 -0700
Subject: ethtool: fec: sanitize ethtool_fecparam->active_fec

struct ethtool_fecparam::active_fec is a GET-only field,
all in-tree drivers correctly ignore it on SET. Clear
the field on SET to avoid any confusion. Again, we can't
reject non-zero now since ethtool user space does not
zero-init the param correctly.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 2 +-
 net/ethtool/ioctl.c          | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 9e2682a67460..517b68c5fcec 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1378,7 +1378,7 @@ struct ethtool_per_queue_op {
 /**
  * struct ethtool_fecparam - Ethernet forward error correction(fec) parameters
  * @cmd: Command number = %ETHTOOL_GFECPARAM or %ETHTOOL_SFECPARAM
- * @active_fec: FEC mode which is active on the port
+ * @active_fec: FEC mode which is active on the port, GET only.
  * @fec: Bitmask of supported/configured FEC modes
  * @reserved: Reserved for future extensions, ignore on GET, write 0 for SET.
  */
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index be3549023d89..237ffe5440ef 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -2586,6 +2586,7 @@ static int ethtool_set_fecparam(struct net_device *dev, void __user *useraddr)
 	if (copy_from_user(&fecparam, useraddr, sizeof(fecparam)))
 		return -EFAULT;
 
+	fecparam.active_fec = 0;
 	fecparam.reserved = 0;
 
 	return dev->ethtool_ops->set_fecparam(dev, &fecparam);
-- 
cgit v1.2.3


From 42ce127d98641f2cb19cd499b5b3beb472c7eff1 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 24 Mar 2021 18:11:59 -0700
Subject: ethtool: fec: sanitize ethtool_fecparam->fec

Reject NONE on set, this mode means device does not support
FEC so it's a little out of place in the set interface.

This should be safe to do - user space ethtool does not allow
the use of NONE on set. A few drivers treat it the same as OFF,
but none use it instead of OFF.

Similarly reject an empty FEC mask. The common user space tool
will not send such requests and most drivers correctly reject
it already.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethtool/ioctl.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 237ffe5440ef..8797533ddc4b 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -2586,6 +2586,9 @@ static int ethtool_set_fecparam(struct net_device *dev, void __user *useraddr)
 	if (copy_from_user(&fecparam, useraddr, sizeof(fecparam)))
 		return -EFAULT;
 
+	if (!fecparam.fec || fecparam.fec & ETHTOOL_FEC_NONE_BIT)
+		return -EINVAL;
+
 	fecparam.active_fec = 0;
 	fecparam.reserved = 0;
 
-- 
cgit v1.2.3


From 6dbf94b264e62641d521975d0eddbeef36bacf3c Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 24 Mar 2021 18:12:00 -0700
Subject: ethtool: clarify the ethtool FEC interface

The definition of the FEC driver interface is quite unclear.
Improve the documentation.

This is based on current driver and user space code, as well
as the discussions about the interface:

RFC v1 (24 Oct 2016): https://lore.kernel.org/netdev/1477363849-36517-1-git-send-email-vidya@cumulusnetworks.com/
 - this version has the autoneg field
 - no active_fec field
 - none vs off confusion is already present

RFC v2 (10 Feb 2017): https://lore.kernel.org/netdev/1486727004-11316-1-git-send-email-vidya@cumulusnetworks.com/
 - autoneg removed
 - active_fec added

v1 (10 Feb 2017): https://lore.kernel.org/netdev/1486751311-42019-1-git-send-email-vidya@cumulusnetworks.com/
 - no changes in the code

v1 (24 Jun 2017):  https://lore.kernel.org/netdev/1498331985-8525-1-git-send-email-roopa@cumulusnetworks.com/
 - include in tree user

v2 (27 Jul 2017): https://lore.kernel.org/netdev/1501199248-24695-1-git-send-email-roopa@cumulusnetworks.com/

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 37 ++++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 517b68c5fcec..f6ef7d42c7a1 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1376,11 +1376,29 @@ struct ethtool_per_queue_op {
 };
 
 /**
- * struct ethtool_fecparam - Ethernet forward error correction(fec) parameters
+ * struct ethtool_fecparam - Ethernet Forward Error Correction parameters
  * @cmd: Command number = %ETHTOOL_GFECPARAM or %ETHTOOL_SFECPARAM
- * @active_fec: FEC mode which is active on the port, GET only.
- * @fec: Bitmask of supported/configured FEC modes
+ * @active_fec: FEC mode which is active on the port, single bit set, GET only.
+ * @fec: Bitmask of configured FEC modes.
  * @reserved: Reserved for future extensions, ignore on GET, write 0 for SET.
+ *
+ * FEC modes supported by the device can be read via %ETHTOOL_GLINKSETTINGS.
+ * FEC settings are configured by link autonegotiation whenever it's enabled.
+ * With autoneg on %ETHTOOL_GFECPARAM can be used to read the current mode.
+ *
+ * When autoneg is disabled %ETHTOOL_SFECPARAM controls the FEC settings.
+ * It is recommended that drivers only accept a single bit set in @fec.
+ * When multiple bits are set in @fec drivers may pick mode in an implementation
+ * dependent way. Drivers should reject mixing %ETHTOOL_FEC_AUTO_BIT with other
+ * FEC modes, because it's unclear whether in this case other modes constrain
+ * AUTO or are independent choices.
+ * Drivers must reject SET requests if they support none of the requested modes.
+ *
+ * If device does not support FEC drivers may use %ETHTOOL_FEC_NONE instead
+ * of returning %EOPNOTSUPP from %ETHTOOL_GFECPARAM.
+ *
+ * See enum ethtool_fec_config_bits for definition of valid bits for both
+ * @fec and @active_fec.
  */
 struct ethtool_fecparam {
 	__u32   cmd;
@@ -1392,11 +1410,16 @@ struct ethtool_fecparam {
 
 /**
  * enum ethtool_fec_config_bits - flags definition of ethtool_fec_configuration
- * @ETHTOOL_FEC_NONE: FEC mode configuration is not supported
- * @ETHTOOL_FEC_AUTO: Default/Best FEC mode provided by driver
+ * @ETHTOOL_FEC_NONE: FEC mode configuration is not supported. Should not
+ *		      be used together with other bits. GET only.
+ * @ETHTOOL_FEC_AUTO: Select default/best FEC mode automatically, usually based
+ *		      link mode and SFP parameters read from module's EEPROM.
+ *		      This bit does _not_ mean autonegotiation.
  * @ETHTOOL_FEC_OFF: No FEC Mode
- * @ETHTOOL_FEC_RS: Reed-Solomon Forward Error Detection mode
- * @ETHTOOL_FEC_BASER: Base-R/Reed-Solomon Forward Error Detection mode
+ * @ETHTOOL_FEC_RS: Reed-Solomon FEC Mode
+ * @ETHTOOL_FEC_BASER: Base-R/Reed-Solomon FEC Mode
+ * @ETHTOOL_FEC_LLRS: Low Latency Reed Solomon FEC Mode (25G/50G Ethernet
+ *		      Consortium)
  */
 enum ethtool_fec_config_bits {
 	ETHTOOL_FEC_NONE_BIT,
-- 
cgit v1.2.3


From b83e214b2e04204f1fc674574362061492c37245 Mon Sep 17 00:00:00 2001
From: Hoang Le <hoang.h.le@dektech.com.au>
Date: Thu, 25 Mar 2021 08:56:41 +0700
Subject: tipc: add extack messages for bearer/media failure

Add extack error messages for -EINVAL errors when enabling bearer,
getting/setting properties for a media/bearer

Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: Hoang Le <hoang.h.le@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bearer.c | 50 ++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 40 insertions(+), 10 deletions(-)

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index a4389ef08a98..1090f21fcfac 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -243,7 +243,8 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest)
  */
 static int tipc_enable_bearer(struct net *net, const char *name,
 			      u32 disc_domain, u32 prio,
-			      struct nlattr *attr[])
+			      struct nlattr *attr[],
+			      struct netlink_ext_ack *extack)
 {
 	struct tipc_net *tn = tipc_net(net);
 	struct tipc_bearer_names b_names;
@@ -257,17 +258,20 @@ static int tipc_enable_bearer(struct net *net, const char *name,
 
 	if (!bearer_name_validate(name, &b_names)) {
 		errstr = "illegal name";
+		NL_SET_ERR_MSG(extack, "Illegal name");
 		goto rejected;
 	}
 
 	if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) {
 		errstr = "illegal priority";
+		NL_SET_ERR_MSG(extack, "Illegal priority");
 		goto rejected;
 	}
 
 	m = tipc_media_find(b_names.media_name);
 	if (!m) {
 		errstr = "media not registered";
+		NL_SET_ERR_MSG(extack, "Media not registered");
 		goto rejected;
 	}
 
@@ -281,6 +285,7 @@ static int tipc_enable_bearer(struct net *net, const char *name,
 			break;
 		if (!strcmp(name, b->name)) {
 			errstr = "already enabled";
+			NL_SET_ERR_MSG(extack, "Already enabled");
 			goto rejected;
 		}
 		bearer_id++;
@@ -292,6 +297,7 @@ static int tipc_enable_bearer(struct net *net, const char *name,
 			name, prio);
 		if (prio == TIPC_MIN_LINK_PRI) {
 			errstr = "cannot adjust to lower";
+			NL_SET_ERR_MSG(extack, "Cannot adjust to lower");
 			goto rejected;
 		}
 		pr_warn("Bearer <%s>: trying with adjusted priority\n", name);
@@ -302,6 +308,7 @@ static int tipc_enable_bearer(struct net *net, const char *name,
 
 	if (bearer_id >= MAX_BEARERS) {
 		errstr = "max 3 bearers permitted";
+		NL_SET_ERR_MSG(extack, "Max 3 bearers permitted");
 		goto rejected;
 	}
 
@@ -315,6 +322,7 @@ static int tipc_enable_bearer(struct net *net, const char *name,
 	if (res) {
 		kfree(b);
 		errstr = "failed to enable media";
+		NL_SET_ERR_MSG(extack, "Failed to enable media");
 		goto rejected;
 	}
 
@@ -331,6 +339,7 @@ static int tipc_enable_bearer(struct net *net, const char *name,
 	if (res) {
 		bearer_disable(net, b);
 		errstr = "failed to create discoverer";
+		NL_SET_ERR_MSG(extack, "Failed to create discoverer");
 		goto rejected;
 	}
 
@@ -909,6 +918,7 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info)
 	bearer = tipc_bearer_find(net, name);
 	if (!bearer) {
 		err = -EINVAL;
+		NL_SET_ERR_MSG(info->extack, "Bearer not found");
 		goto err_out;
 	}
 
@@ -948,8 +958,10 @@ int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
 	name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
 
 	bearer = tipc_bearer_find(net, name);
-	if (!bearer)
+	if (!bearer) {
+		NL_SET_ERR_MSG(info->extack, "Bearer not found");
 		return -EINVAL;
+	}
 
 	bearer_disable(net, bearer);
 
@@ -1007,7 +1019,8 @@ int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
 			prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
 	}
 
-	return tipc_enable_bearer(net, bearer, domain, prio, attrs);
+	return tipc_enable_bearer(net, bearer, domain, prio, attrs,
+				  info->extack);
 }
 
 int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
@@ -1046,6 +1059,7 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
 	b = tipc_bearer_find(net, name);
 	if (!b) {
 		rtnl_unlock();
+		NL_SET_ERR_MSG(info->extack, "Bearer not found");
 		return -EINVAL;
 	}
 
@@ -1086,8 +1100,10 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
 	name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
 
 	b = tipc_bearer_find(net, name);
-	if (!b)
+	if (!b) {
+		NL_SET_ERR_MSG(info->extack, "Bearer not found");
 		return -EINVAL;
+	}
 
 	if (attrs[TIPC_NLA_BEARER_PROP]) {
 		struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
@@ -1106,12 +1122,18 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
 		if (props[TIPC_NLA_PROP_WIN])
 			b->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
 		if (props[TIPC_NLA_PROP_MTU]) {
-			if (b->media->type_id != TIPC_MEDIA_TYPE_UDP)
+			if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) {
+				NL_SET_ERR_MSG(info->extack,
+					       "MTU property is unsupported");
 				return -EINVAL;
+			}
 #ifdef CONFIG_TIPC_MEDIA_UDP
 			if (tipc_udp_mtu_bad(nla_get_u32
-					     (props[TIPC_NLA_PROP_MTU])))
+					     (props[TIPC_NLA_PROP_MTU]))) {
+				NL_SET_ERR_MSG(info->extack,
+					       "MTU value is out-of-range");
 				return -EINVAL;
+			}
 			b->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]);
 			tipc_node_apply_property(net, b, TIPC_NLA_PROP_MTU);
 #endif
@@ -1239,6 +1261,7 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info)
 	rtnl_lock();
 	media = tipc_media_find(name);
 	if (!media) {
+		NL_SET_ERR_MSG(info->extack, "Media not found");
 		err = -EINVAL;
 		goto err_out;
 	}
@@ -1275,9 +1298,10 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
 	name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]);
 
 	m = tipc_media_find(name);
-	if (!m)
+	if (!m) {
+		NL_SET_ERR_MSG(info->extack, "Media not found");
 		return -EINVAL;
-
+	}
 	if (attrs[TIPC_NLA_MEDIA_PROP]) {
 		struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
 
@@ -1293,12 +1317,18 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
 		if (props[TIPC_NLA_PROP_WIN])
 			m->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
 		if (props[TIPC_NLA_PROP_MTU]) {
-			if (m->type_id != TIPC_MEDIA_TYPE_UDP)
+			if (m->type_id != TIPC_MEDIA_TYPE_UDP) {
+				NL_SET_ERR_MSG(info->extack,
+					       "MTU property is unsupported");
 				return -EINVAL;
+			}
 #ifdef CONFIG_TIPC_MEDIA_UDP
 			if (tipc_udp_mtu_bad(nla_get_u32
-					     (props[TIPC_NLA_PROP_MTU])))
+					     (props[TIPC_NLA_PROP_MTU]))) {
+				NL_SET_ERR_MSG(info->extack,
+					       "MTU value is out-of-range");
 				return -EINVAL;
+			}
 			m->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]);
 #endif
 		}
-- 
cgit v1.2.3


From a9bada338b6806c660e4fb20ab742d0d83a3ceac Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Thu, 25 Mar 2021 10:51:08 +0800
Subject: net: usb: lan78xx: remove unused including <linux/version.h>

Remove including <linux/version.h> that don't need it.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Acked-by: Woojung Huh <Woojung.Huh@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/lan78xx.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index e81c5699c952..6acc5e904518 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -2,7 +2,6 @@
 /*
  * Copyright (C) 2015 Microchip Technology
  */
-#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
-- 
cgit v1.2.3


From f1dcffcc8abe3088da876d9eae481c3e31e2014d Mon Sep 17 00:00:00 2001
From: Lu Wei <luwei32@huawei.com>
Date: Thu, 25 Mar 2021 11:01:55 +0800
Subject: net: Fix a misspell in socket.c

s/addres/address

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Lu Wei <luwei32@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/socket.c b/net/socket.c
index 84a8049c2b09..27e3e7d53f8e 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3568,7 +3568,7 @@ EXPORT_SYMBOL(kernel_accept);
  *	@addrlen: address length
  *	@flags: flags (O_NONBLOCK, ...)
  *
- *	For datagram sockets, @addr is the addres to which datagrams are sent
+ *	For datagram sockets, @addr is the address to which datagrams are sent
  *	by default, and the only address from which datagrams are received.
  *	For stream sockets, attempts to connect to @addr.
  *	Returns 0 or an error code.
-- 
cgit v1.2.3


From 711550a0b97e2e4ed2f1da484cf33e9dd1a963fb Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Thu, 25 Mar 2021 11:29:28 +0800
Subject: qede: remove unused including <linux/version.h>

Remove including <linux/version.h> that don't need it.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qede/qede_main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 4d952036ba82..01ac1e93d27a 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -7,7 +7,6 @@
 #include <linux/crash_dump.h>
 #include <linux/module.h>
 #include <linux/pci.h>
-#include <linux/version.h>
 #include <linux/device.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
-- 
cgit v1.2.3


From ba8be0d49caf3d47038c9c8b8a9953adce3db006 Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Thu, 25 Mar 2021 11:29:32 +0800
Subject: net: bcmgenet: remove unused including <linux/version.h>

Remove including <linux/version.h> that don't need it.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
index 1c86eddb1b51..facde824bcaa 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
@@ -18,7 +18,6 @@
 #include <linux/delay.h>
 #include <linux/pm.h>
 #include <linux/clk.h>
-#include <linux/version.h>
 #include <linux/platform_device.h>
 #include <net/arp.h>
 
-- 
cgit v1.2.3


From 01dc080be6b8318c3a6d3df3486f8d919f5b89a0 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Thu, 25 Mar 2021 14:35:55 +0800
Subject: drivers: net: ethernet: struct sk_buff is declared duplicately

struct sk_buff has been declared. Remove the duplicate.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/nfp_app.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h
index 76d13af46a7a..3e9baff07100 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h
@@ -18,7 +18,6 @@ struct netdev_bpf;
 struct netlink_ext_ack;
 struct pci_dev;
 struct sk_buff;
-struct sk_buff;
 struct nfp_app;
 struct nfp_cpp;
 struct nfp_pf;
-- 
cgit v1.2.3


From 3f9143f10c3d5055093b18fd3eaa8fc6d1b460f5 Mon Sep 17 00:00:00 2001
From: Lu Wei <luwei32@huawei.com>
Date: Thu, 25 Mar 2021 14:38:21 +0800
Subject: net: ceph: Fix a typo in osdmap.c

Modify "inital" to "initial" in net/ceph/osdmap.c.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Lu Wei <luwei32@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ceph/osdmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 2b1dd252f231..c959320c4775 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1069,7 +1069,7 @@ again:
 
 		/*
 		 * Do not return the error but go back to waiting.  We
-		 * have the inital workspace and the CRUSH computation
+		 * have the initial workspace and the CRUSH computation
 		 * time is bounded so we will get it eventually.
 		 */
 		WARN_ON(atomic_read(&wsm->total_ws) < 1);
-- 
cgit v1.2.3


From 897b9fae7a8ac1de2372a15234c944831c83ec26 Mon Sep 17 00:00:00 2001
From: Lu Wei <luwei32@huawei.com>
Date: Thu, 25 Mar 2021 14:38:22 +0800
Subject: net: core: Fix a typo in dev_addr_lists.c

Modify "funciton" to "function" in net/core/dev_addr_lists.c.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Lu Wei <luwei32@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev_addr_lists.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index fa1c37ec40c9..1e5bde241185 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -228,7 +228,7 @@ EXPORT_SYMBOL(__hw_addr_unsync);
  *  @sync: function to call if address should be added
  *  @unsync: function to call if address should be removed
  *
- *  This funciton is intended to be called from the ndo_set_rx_mode
+ *  This function is intended to be called from the ndo_set_rx_mode
  *  function of devices that require explicit address add/remove
  *  notifications.  The unsync function may be NULL in which case
  *  the addresses requiring removal will simply be removed without
-- 
cgit v1.2.3


From e51443d54b4e6a2793c55d82fd04b79fbc8ba4d5 Mon Sep 17 00:00:00 2001
From: Lu Wei <luwei32@huawei.com>
Date: Thu, 25 Mar 2021 14:38:23 +0800
Subject: net: decnet: Fix a typo in dn_nsp_in.c

Modify "erronous" to "erroneous" in net/decnet/dn_nsp_in.c.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Lu Wei <luwei32@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/dn_nsp_in.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index c97bdca5ec30..1a12912b88d6 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -520,7 +520,7 @@ static void dn_nsp_linkservice(struct sock *sk, struct sk_buff *skb)
 	fcval = *ptr;
 
 	/*
-	 * Here we ignore erronous packets which should really
+	 * Here we ignore erroneous packets which should really
 	 * should cause a connection abort. It is not critical
 	 * for now though.
 	 */
-- 
cgit v1.2.3


From 952a67f6f6a80ea5b2dae7f433ffc3cd55f8f8b3 Mon Sep 17 00:00:00 2001
From: Lu Wei <luwei32@huawei.com>
Date: Thu, 25 Mar 2021 14:38:24 +0800
Subject: net: dsa: Fix a typo in tag_rtl4_a.c

Modify "Apparantly" to "Apparently" in net/dsa/tag_rtl4_a.c..

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Lu Wei <luwei32@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/tag_rtl4_a.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c
index e9176475bac8..cf8ac316f4c7 100644
--- a/net/dsa/tag_rtl4_a.c
+++ b/net/dsa/tag_rtl4_a.c
@@ -79,7 +79,7 @@ static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb,
 
 	/* The RTL4 header has its own custom Ethertype 0x8899 and that
 	 * starts right at the beginning of the packet, after the src
-	 * ethernet addr. Apparantly skb->data always points 2 bytes in,
+	 * ethernet addr. Apparently skb->data always points 2 bytes in,
 	 * behind the Ethertype.
 	 */
 	tag = skb->data - 2;
-- 
cgit v1.2.3


From cbd801b3b0716c374e050a8366bb43d71d62b6ec Mon Sep 17 00:00:00 2001
From: Lu Wei <luwei32@huawei.com>
Date: Thu, 25 Mar 2021 14:38:25 +0800
Subject: net: ipv4: Fix some typos

Modify "accomodate" to "accommodate" in net/ipv4/esp4.c.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Lu Wei <luwei32@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/esp4.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index a3271ec3e162..1ae920b93f39 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -309,7 +309,7 @@ static struct ip_esp_hdr *esp_output_set_extra(struct sk_buff *skb,
 					       struct esp_output_extra *extra)
 {
 	/* For ESN we move the header forward by 4 bytes to
-	 * accomodate the high bits.  We will move it back after
+	 * accommodate the high bits.  We will move it back after
 	 * encryption.
 	 */
 	if ((x->props.flags & XFRM_STATE_ESN)) {
@@ -854,7 +854,7 @@ static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi)
 	struct ip_esp_hdr *esph;
 
 	/* For ESN we move the header forward by 4 bytes to
-	 * accomodate the high bits.  We will move it back after
+	 * accommodate the high bits.  We will move it back after
 	 * decryption.
 	 */
 	if ((x->props.flags & XFRM_STATE_ESN)) {
-- 
cgit v1.2.3


From c32773c96131fd5a106993efa0078fbde435b2f6 Mon Sep 17 00:00:00 2001
From: Daode Huang <huangdaode@huawei.com>
Date: Thu, 25 Mar 2021 15:56:31 +0800
Subject: net: gve: convert strlcpy to strscpy

Usage of strlcpy in linux kernel has been recently deprecated[1], so
convert gve driver to strscpy

[1] https://lore.kernel.org/lkml/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL
=V6A6G1oUZcprmknw@mail.gmail.com/

Signed-off-by: Daode Huang <huangdaode@huawei.com>
Reviewed-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/gve_ethtool.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c
index 0901fa6853ca..e40e052a8566 100644
--- a/drivers/net/ethernet/google/gve/gve_ethtool.c
+++ b/drivers/net/ethernet/google/gve/gve_ethtool.c
@@ -14,9 +14,9 @@ static void gve_get_drvinfo(struct net_device *netdev,
 {
 	struct gve_priv *priv = netdev_priv(netdev);
 
-	strlcpy(info->driver, "gve", sizeof(info->driver));
-	strlcpy(info->version, gve_version_str, sizeof(info->version));
-	strlcpy(info->bus_info, pci_name(priv->pdev), sizeof(info->bus_info));
+	strscpy(info->driver, "gve", sizeof(info->driver));
+	strscpy(info->version, gve_version_str, sizeof(info->version));
+	strscpy(info->bus_info, pci_name(priv->pdev), sizeof(info->bus_info));
 }
 
 static void gve_set_msglevel(struct net_device *netdev, u32 value)
-- 
cgit v1.2.3


From f67435b555dfde67c830047fdfa1f4b91fbeaa56 Mon Sep 17 00:00:00 2001
From: Daode Huang <huangdaode@huawei.com>
Date: Thu, 25 Mar 2021 15:56:32 +0800
Subject: net: gve: remove duplicated allowed

fix the WARNING of Possible repeated word: 'allowed'

Signed-off-by: Daode Huang <huangdaode@huawei.com>
Reviewed-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/gve_ethtool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c
index e40e052a8566..5fb05cf36b49 100644
--- a/drivers/net/ethernet/google/gve/gve_ethtool.c
+++ b/drivers/net/ethernet/google/gve/gve_ethtool.c
@@ -388,7 +388,7 @@ static int gve_set_channels(struct net_device *netdev,
 
 	gve_get_channels(netdev, &old_settings);
 
-	/* Changing combined is not allowed allowed */
+	/* Changing combined is not allowed */
 	if (cmd->combined_count != old_settings.combined_count)
 		return -EINVAL;
 
-- 
cgit v1.2.3


From 866f1577ba69bde2b9f36c300f603596c7d84a62 Mon Sep 17 00:00:00 2001
From: Qinglang Miao <miaoqinglang@huawei.com>
Date: Thu, 25 Mar 2021 17:19:54 +0800
Subject: net: dsa: b53: spi: add missing MODULE_DEVICE_TABLE

This patch adds missing MODULE_DEVICE_TABLE definition which generates
correct modalias for automatic loading of this driver when it is built
as an external module.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Qinglang Miao <miaoqinglang@huawei.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/b53/b53_spi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/dsa/b53/b53_spi.c b/drivers/net/dsa/b53/b53_spi.c
index 413158275db8..ecb9f7f6b335 100644
--- a/drivers/net/dsa/b53/b53_spi.c
+++ b/drivers/net/dsa/b53/b53_spi.c
@@ -335,6 +335,7 @@ static const struct of_device_id b53_spi_of_match[] = {
 	{ .compatible = "brcm,bcm53128" },
 	{ /* sentinel */ }
 };
+MODULE_DEVICE_TABLE(of, b53_spi_of_match);
 
 static struct spi_driver b53_spi_driver = {
 	.driver = {
-- 
cgit v1.2.3


From 96ef692841e0d7c0ce4c7160c674f57ff83f3b4c Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 25 Mar 2021 10:31:39 +0100
Subject: r8169: remove rtl_hw_start_8168c_3

We can simply use rtl_hw_start_8168c_2() also for chip version 21.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 7a8bb7e833f3..1cd5c6f6d44f 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -2736,11 +2736,6 @@ static void rtl_hw_start_8168c_2(struct rtl8169_private *tp)
 	__rtl_hw_start_8168cp(tp);
 }
 
-static void rtl_hw_start_8168c_3(struct rtl8169_private *tp)
-{
-	rtl_hw_start_8168c_2(tp);
-}
-
 static void rtl_hw_start_8168c_4(struct rtl8169_private *tp)
 {
 	rtl_set_def_aspm_entry_latency(tp);
@@ -3653,7 +3648,7 @@ static void rtl_hw_config(struct rtl8169_private *tp)
 		[RTL_GIGA_MAC_VER_18] = rtl_hw_start_8168cp_1,
 		[RTL_GIGA_MAC_VER_19] = rtl_hw_start_8168c_1,
 		[RTL_GIGA_MAC_VER_20] = rtl_hw_start_8168c_2,
-		[RTL_GIGA_MAC_VER_21] = rtl_hw_start_8168c_3,
+		[RTL_GIGA_MAC_VER_21] = rtl_hw_start_8168c_2,
 		[RTL_GIGA_MAC_VER_22] = rtl_hw_start_8168c_4,
 		[RTL_GIGA_MAC_VER_23] = rtl_hw_start_8168cp_2,
 		[RTL_GIGA_MAC_VER_24] = rtl_hw_start_8168cp_3,
-- 
cgit v1.2.3


From ae8f5867d59086670ef61e0cbeabde927d4e13a0 Mon Sep 17 00:00:00 2001
From: Qiheng Lin <linqiheng@huawei.com>
Date: Thu, 25 Mar 2021 17:31:51 +0800
Subject: net: ethernet: mtk_eth_soc: remove unused variable 'count'

GCC reports the following warning with W=1:

drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c:80:9: warning:
 variable 'count' set but not used [-Wunused-but-set-variable]
   80 |  int i, count;
      |         ^~~~~

This variable is not used in function , this commit
remove it to fix the warning.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Qiheng Lin <linqiheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
index 8ae9efab6d02..98b1d3577bcd 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
@@ -77,9 +77,9 @@ static int
 mtk_ppe_debugfs_foe_show(struct seq_file *m, void *private, bool bind)
 {
 	struct mtk_ppe *ppe = m->private;
-	int i, count;
+	int i;
 
-	for (i = 0, count = 0; i < MTK_PPE_ENTRIES; i++) {
+	for (i = 0; i < MTK_PPE_ENTRIES; i++) {
 		struct mtk_foe_entry *entry = &ppe->foe_table[i];
 		struct mtk_foe_mac_info *l2;
 		struct mtk_flow_addr_info ai = {};
-- 
cgit v1.2.3


From 5d9034938720a15fa0f62db3e195c0c473c72c1b Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda <ribalda@chromium.org>
Date: Thu, 18 Mar 2021 21:22:22 +0100
Subject: bpf: Fix typo 'accesible' into 'accessible'

Trivial fix.

Signed-off-by: Ricardo Ribalda <ribalda@chromium.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210318202223.164873-8-ribalda@chromium.org
---
 include/linux/bpf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 39dce9d3c3a5..24678d6ecbcf 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -56,7 +56,7 @@ struct bpf_iter_seq_info {
 	u32 seq_priv_size;
 };
 
-/* map is generic key/value storage optionally accesible by eBPF programs */
+/* map is generic key/value storage optionally accessible by eBPF programs */
 struct bpf_map_ops {
 	/* funcs callable from userspace (via syscall) */
 	int (*map_alloc_check)(union bpf_attr *attr);
-- 
cgit v1.2.3


From a46410d5e4975d701d526397156fa0815747dc2f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 24 Mar 2021 10:29:41 -0700
Subject: libbpf: Constify few bpf_program getters

bpf_program__get_type() and bpf_program__get_expected_attach_type() shouldn't
modify given bpf_program, so mark input parameter as const struct bpf_program.
This eliminates unnecessary compilation warnings or explicit casts in user
programs.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210324172941.2609884-1-andrii@kernel.org
---
 tools/lib/bpf/libbpf.c | 4 ++--
 tools/lib/bpf/libbpf.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index c0fd2c718a7d..10a0a67699f1 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8458,7 +8458,7 @@ int bpf_program__nth_fd(const struct bpf_program *prog, int n)
 	return fd;
 }
 
-enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog)
+enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog)
 {
 	return prog->type;
 }
@@ -8503,7 +8503,7 @@ BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
 BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP);
 
 enum bpf_attach_type
-bpf_program__get_expected_attach_type(struct bpf_program *prog)
+bpf_program__get_expected_attach_type(const struct bpf_program *prog)
 {
 	return prog->expected_attach_type;
 }
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index a1a424b9b8ff..89ade7d7b31c 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -361,12 +361,12 @@ LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog);
 LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog);
 LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog);
 
-LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog);
+LIBBPF_API enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
 LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,
 				      enum bpf_prog_type type);
 
 LIBBPF_API enum bpf_attach_type
-bpf_program__get_expected_attach_type(struct bpf_program *prog);
+bpf_program__get_expected_attach_type(const struct bpf_program *prog);
 LIBBPF_API void
 bpf_program__set_expected_attach_type(struct bpf_program *prog,
 				      enum bpf_attach_type type);
-- 
cgit v1.2.3


From b8ecdaaaf328cf2914da99217368dc847fb9e968 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Thu, 25 Mar 2021 09:44:32 -0500
Subject: net: ipa: update IPA register comments

Add and update IPA register definitions.  Extend these definitions
to incorporate a fairly small number of new symbols (register
offsets and fields) to support IPA v3.0, v3.1, v3.5, v4.0, v4.1,
v4.7, 4.9, and v4.11, and have the comments reflect when they are
valid.  None of the added symbols require changes elsewhere in the
code.

Update rsrc_grp_encoded() to support these other IPA versions.

Add kerneldoc comments for the IPA IRQ numbers and sequencer type.

Fix a few spots where the version check should be less restrictive
(missed by an earlier patch).

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_reg.h | 185 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 129 insertions(+), 56 deletions(-)

diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h
index 86fe2978e810..735f6e809e04 100644
--- a/drivers/net/ipa/ipa_reg.h
+++ b/drivers/net/ipa/ipa_reg.h
@@ -66,14 +66,16 @@ struct ipa;
  */
 
 #define IPA_REG_COMP_CFG_OFFSET				0x0000003c
-/* The next field is not supported for IPA v4.1 */
+/* The next field is not supported for IPA v4.0+, not present for IPA v4.5+ */
 #define ENABLE_FMASK				GENMASK(0, 0)
+/* The next field is present for IPA v4.7+ */
+#define RAM_ARB_PRI_CLIENT_SAMP_FIX_DIS_FMASK	GENMASK(0, 0)
 #define GSI_SNOC_BYPASS_DIS_FMASK		GENMASK(1, 1)
 #define GEN_QMB_0_SNOC_BYPASS_DIS_FMASK		GENMASK(2, 2)
 #define GEN_QMB_1_SNOC_BYPASS_DIS_FMASK		GENMASK(3, 3)
-/* The next field is not present for IPA v4.5 */
+/* The next field is not present for IPA v4.5+ */
 #define IPA_DCMP_FAST_CLK_EN_FMASK		GENMASK(4, 4)
-/* The remaining fields are not present for IPA v3.5.1 */
+/* The next twelve fields are present for IPA v4.0+ */
 #define IPA_QMB_SELECT_CONS_EN_FMASK		GENMASK(5, 5)
 #define IPA_QMB_SELECT_PROD_EN_FMASK		GENMASK(6, 6)
 #define GSI_MULTI_INORDER_RD_DIS_FMASK		GENMASK(7, 7)
@@ -87,8 +89,14 @@ struct ipa;
 #define GSI_MULTI_AXI_MASTERS_DIS_FMASK		GENMASK(15, 15)
 #define IPA_QMB_SELECT_GLOBAL_EN_FMASK		GENMASK(16, 16)
 #define IPA_ATOMIC_FETCHER_ARB_LOCK_DIS_FMASK	GENMASK(20, 17)
-/* The next field is present for IPA v4.5 */
+/* The next field is present for IPA v4.5 and IPA v4.7 */
 #define IPA_FULL_FLUSH_WAIT_RSC_CLOSE_EN_FMASK	GENMASK(21, 21)
+/* The next five fields are present for IPA v4.9+ */
+#define QMB_RAM_RD_CACHE_DISABLE_FMASK		GENMASK(19, 19)
+#define GENQMB_AOOOWR_FMASK			GENMASK(20, 20)
+#define IF_OUT_OF_BUF_STOP_RESET_MASK_EN_FMASK	GENMASK(21, 21)
+#define GEN_QMB_1_DYNAMIC_ASIZE_FMASK		GENMASK(30, 30)
+#define GEN_QMB_0_DYNAMIC_ASIZE_FMASK		GENMASK(31, 31)
 
 #define IPA_REG_CLKON_CFG_OFFSET			0x00000044
 #define RX_FMASK				GENMASK(0, 0)
@@ -108,13 +116,15 @@ struct ipa;
 #define ACK_MNGR_FMASK				GENMASK(14, 14)
 #define D_DCPH_FMASK				GENMASK(15, 15)
 #define H_DCPH_FMASK				GENMASK(16, 16)
-/* The next field is not present for IPA v4.5 */
+/* The next field is not present for IPA v4.5+ */
 #define DCMP_FMASK				GENMASK(17, 17)
+/* The next three fields are present for IPA v3.5+ */
 #define NTF_TX_CMDQS_FMASK			GENMASK(18, 18)
 #define TX_0_FMASK				GENMASK(19, 19)
 #define TX_1_FMASK				GENMASK(20, 20)
+/* The next field is present for IPA v3.5.1+ */
 #define FNR_FMASK				GENMASK(21, 21)
-/* The remaining fields are not present for IPA v3.5.1 */
+/* The next eight fields are present for IPA v4.0+ */
 #define QSB2AXI_CMDQ_L_FMASK			GENMASK(22, 22)
 #define AGGR_WRAPPER_FMASK			GENMASK(23, 23)
 #define RAM_SLAVEWAY_FMASK			GENMASK(24, 24)
@@ -123,8 +133,10 @@ struct ipa;
 #define GSI_IF_FMASK				GENMASK(27, 27)
 #define GLOBAL_FMASK				GENMASK(28, 28)
 #define GLOBAL_2X_CLK_FMASK			GENMASK(29, 29)
-/* The next field is present for IPA v4.5 */
+/* The next field is present for IPA v4.5+ */
 #define DPL_FIFO_FMASK				GENMASK(30, 30)
+/* The next field is present for IPA v4.7+ */
+#define DRBIP_FMASK				GENMASK(31, 31)
 
 #define IPA_REG_ROUTE_OFFSET				0x00000048
 #define ROUTE_DIS_FMASK				GENMASK(0, 0)
@@ -145,13 +157,13 @@ struct ipa;
 #define IPA_REG_QSB_MAX_READS_OFFSET			0x00000078
 #define GEN_QMB_0_MAX_READS_FMASK		GENMASK(3, 0)
 #define GEN_QMB_1_MAX_READS_FMASK		GENMASK(7, 4)
-/* The next two fields are not present for IPA v3.5.1 */
+/* The next two fields are present for IPA v4.0+ */
 #define GEN_QMB_0_MAX_READS_BEATS_FMASK		GENMASK(23, 16)
 #define GEN_QMB_1_MAX_READS_BEATS_FMASK		GENMASK(31, 24)
 
 static inline u32 ipa_reg_filt_rout_hash_en_offset(enum ipa_version version)
 {
-	if (version == IPA_VERSION_3_5_1)
+	if (version < IPA_VERSION_4_0)
 		return 0x000008c;
 
 	return 0x0000148;
@@ -159,7 +171,7 @@ static inline u32 ipa_reg_filt_rout_hash_en_offset(enum ipa_version version)
 
 static inline u32 ipa_reg_filt_rout_hash_flush_offset(enum ipa_version version)
 {
-	if (version == IPA_VERSION_3_5_1)
+	if (version < IPA_VERSION_4_0)
 		return 0x0000090;
 
 	return 0x000014c;
@@ -174,22 +186,23 @@ static inline u32 ipa_reg_filt_rout_hash_flush_offset(enum ipa_version version)
 /* ipa->available defines the valid bits in the STATE_AGGR_ACTIVE register */
 static inline u32 ipa_reg_state_aggr_active_offset(enum ipa_version version)
 {
-	if (version == IPA_VERSION_3_5_1)
+	if (version < IPA_VERSION_4_0)
 		return 0x0000010c;
 
 	return 0x000000b4;
 }
 
-/* The next register is not present for IPA v4.5 */
+/* The next register is not present for IPA v4.5+ */
 #define IPA_REG_BCR_OFFSET				0x000001d0
-/* The next two fields are not present for IPA v4.2 */
+/* The next two fields are not present for IPA v4.2+ */
 #define BCR_CMDQ_L_LACK_ONE_ENTRY_FMASK		GENMASK(0, 0)
 #define BCR_TX_NOT_USING_BRESP_FMASK		GENMASK(1, 1)
-/* The next field is invalid for IPA v4.1 */
+/* The next field is invalid for IPA v4.0+ */
 #define BCR_TX_SUSPEND_IRQ_ASSERT_ONCE_FMASK	GENMASK(2, 2)
-/* The next two fields are not present for IPA v4.2 */
+/* The next two fields are not present for IPA v4.2+ */
 #define BCR_SUSPEND_L2_IRQ_FMASK		GENMASK(3, 3)
 #define BCR_HOLB_DROP_L2_IRQ_FMASK		GENMASK(4, 4)
+/* The next five fields are present for IPA v3.5+ */
 #define BCR_DUAL_TX_FMASK			GENMASK(5, 5)
 #define BCR_ENABLE_FILTER_DATA_CACHE_FMASK	GENMASK(6, 6)
 #define BCR_NOTIF_PRIORITY_OVER_ZLT_FMASK	GENMASK(7, 7)
@@ -233,35 +246,40 @@ static inline u32 proc_cntxt_base_addr_encoded(enum ipa_version version,
 /* ipa->available defines the valid bits in the AGGR_FORCE_CLOSE register */
 #define IPA_REG_AGGR_FORCE_CLOSE_OFFSET			0x000001ec
 
-/* The next register is not present for IPA v4.5 */
+/* The next register is not present for IPA v4.5+ */
 #define IPA_REG_COUNTER_CFG_OFFSET			0x000001f0
+/* The next field is not present for IPA v3.5+ */
+#define EOT_COAL_GRANULARITY			GENMASK(3, 0)
 #define AGGR_GRANULARITY_FMASK			GENMASK(8, 4)
 
-/* The next register is not present for IPA v4.5 */
+/* The next register is present for IPA v3.5+ */
 #define IPA_REG_TX_CFG_OFFSET				0x000001fc
-/* The first three fields are present for IPA v3.5.1 only */
+/* The next three fields are not present for IPA v4.0+ */
 #define TX0_PREFETCH_DISABLE_FMASK		GENMASK(0, 0)
 #define TX1_PREFETCH_DISABLE_FMASK		GENMASK(1, 1)
 #define PREFETCH_ALMOST_EMPTY_SIZE_FMASK	GENMASK(4, 2)
-/* The next six fields are present for IPA v4.0 and above */
+/* The next six fields are present for IPA v4.0+ */
 #define PREFETCH_ALMOST_EMPTY_SIZE_TX0_FMASK	GENMASK(5, 2)
 #define DMAW_SCND_OUTSD_PRED_THRESHOLD_FMASK	GENMASK(9, 6)
 #define DMAW_SCND_OUTSD_PRED_EN_FMASK		GENMASK(10, 10)
 #define DMAW_MAX_BEATS_256_DIS_FMASK		GENMASK(11, 11)
 #define PA_MASK_EN_FMASK			GENMASK(12, 12)
 #define PREFETCH_ALMOST_EMPTY_SIZE_TX1_FMASK	GENMASK(16, 13)
-/* The next field is present for IPA v4.5 */
+/* The next field is present for IPA v4.5+ */
 #define DUAL_TX_ENABLE_FMASK			GENMASK(17, 17)
-/* The next two fields are present for IPA v4.2 only */
+/* The next field is present for IPA v4.2+, but not IPA v4.5 */
 #define SSPND_PA_NO_START_STATE_FMASK		GENMASK(18, 18)
+/* The next field is present for IPA v4.2 only */
 #define SSPND_PA_NO_BQ_STATE_FMASK		GENMASK(19, 19)
 
+/* The next register is present for IPA v3.5+ */
 #define IPA_REG_FLAVOR_0_OFFSET				0x00000210
 #define IPA_MAX_PIPES_FMASK			GENMASK(3, 0)
 #define IPA_MAX_CONS_PIPES_FMASK		GENMASK(12, 8)
 #define IPA_MAX_PROD_PIPES_FMASK		GENMASK(20, 16)
 #define IPA_PROD_LOWEST_FMASK			GENMASK(27, 24)
 
+/* The next register is present for IPA v3.5+ */
 static inline u32 ipa_reg_idle_indication_cfg_offset(enum ipa_version version)
 {
 	if (version >= IPA_VERSION_4_2)
@@ -273,19 +291,19 @@ static inline u32 ipa_reg_idle_indication_cfg_offset(enum ipa_version version)
 #define ENTER_IDLE_DEBOUNCE_THRESH_FMASK	GENMASK(15, 0)
 #define CONST_NON_IDLE_ENABLE_FMASK		GENMASK(16, 16)
 
-/* The next register is present for IPA v4.5 */
+/* The next register is present for IPA v4.5+ */
 #define IPA_REG_QTIME_TIMESTAMP_CFG_OFFSET		0x0000024c
 #define DPL_TIMESTAMP_LSB_FMASK			GENMASK(4, 0)
 #define DPL_TIMESTAMP_SEL_FMASK			GENMASK(7, 7)
 #define TAG_TIMESTAMP_LSB_FMASK			GENMASK(12, 8)
 #define NAT_TIMESTAMP_LSB_FMASK			GENMASK(20, 16)
 
-/* The next register is present for IPA v4.5 */
+/* The next register is present for IPA v4.5+ */
 #define IPA_REG_TIMERS_XO_CLK_DIV_CFG_OFFSET		0x00000250
 #define DIV_VALUE_FMASK				GENMASK(8, 0)
 #define DIV_ENABLE_FMASK			GENMASK(31, 31)
 
-/* The next register is present for IPA v4.5 */
+/* The next register is present for IPA v4.5+ */
 #define IPA_REG_TIMERS_PULSE_GRAN_CFG_OFFSET		0x00000254
 #define GRAN_0_FMASK				GENMASK(2, 0)
 #define GRAN_1_FMASK				GENMASK(5, 3)
@@ -344,19 +362,17 @@ static inline u32 ipa_resource_group_dst_count(enum ipa_version version)
 	}
 }
 
-/* Not all of the following are valid (depends on the count, above) */
+/* Not all of the following are present (depends on IPA version) */
 #define IPA_REG_SRC_RSRC_GRP_01_RSRC_TYPE_N_OFFSET(rt) \
 					(0x00000400 + 0x0020 * (rt))
 #define IPA_REG_SRC_RSRC_GRP_23_RSRC_TYPE_N_OFFSET(rt) \
 					(0x00000404 + 0x0020 * (rt))
-/* The next register is only present for IPA v4.5 */
 #define IPA_REG_SRC_RSRC_GRP_45_RSRC_TYPE_N_OFFSET(rt) \
 					(0x00000408 + 0x0020 * (rt))
 #define IPA_REG_DST_RSRC_GRP_01_RSRC_TYPE_N_OFFSET(rt) \
 					(0x00000500 + 0x0020 * (rt))
 #define IPA_REG_DST_RSRC_GRP_23_RSRC_TYPE_N_OFFSET(rt) \
 					(0x00000504 + 0x0020 * (rt))
-/* The next register is only present for IPA v4.5 */
 #define IPA_REG_DST_RSRC_GRP_45_RSRC_TYPE_N_OFFSET(rt) \
 					(0x00000508 + 0x0020 * (rt))
 /* The next four fields are used for all resource group registers */
@@ -368,8 +384,9 @@ static inline u32 ipa_resource_group_dst_count(enum ipa_version version)
 
 #define IPA_REG_ENDP_INIT_CTRL_N_OFFSET(ep) \
 					(0x00000800 + 0x0070 * (ep))
-/* The next field should only used for IPA v3.5.1 */
+/* Valid only for RX (IPA producer) endpoints (do not use for IPA v4.0+) */
 #define ENDP_SUSPEND_FMASK			GENMASK(0, 0)
+/* Valid only for TX (IPA consumer) endpoints */
 #define ENDP_DELAY_FMASK			GENMASK(1, 1)
 
 #define IPA_REG_ENDP_INIT_CFG_N_OFFSET(ep) \
@@ -379,11 +396,11 @@ static inline u32 ipa_resource_group_dst_count(enum ipa_version version)
 #define CS_METADATA_HDR_OFFSET_FMASK		GENMASK(6, 3)
 #define CS_GEN_QMB_MASTER_SEL_FMASK		GENMASK(8, 8)
 
-/** enum ipa_cs_offload_en - checksum offload field in ENDP_INIT_CFG_N */
+/** enum ipa_cs_offload_en - ENDP_INIT_CFG register CS_OFFLOAD_EN field value */
 enum ipa_cs_offload_en {
 	IPA_CS_OFFLOAD_NONE		= 0x0,
-	IPA_CS_OFFLOAD_UL		= 0x1,
-	IPA_CS_OFFLOAD_DL		= 0x2,
+	IPA_CS_OFFLOAD_UL		= 0x1,	/* Before IPA v4.5 (TX) */
+	IPA_CS_OFFLOAD_DL		= 0x2,	/* Before IPA v4.5 (RX) */
 };
 
 /* Valid only for TX (IPA consumer) endpoints */
@@ -406,11 +423,12 @@ enum ipa_nat_en {
 #define HDR_ADDITIONAL_CONST_LEN_FMASK		GENMASK(18, 13)
 #define HDR_OFST_PKT_SIZE_VALID_FMASK		GENMASK(19, 19)
 #define HDR_OFST_PKT_SIZE_FMASK			GENMASK(25, 20)
+/* The next field is not present for IPA v4.9+ */
 #define HDR_A5_MUX_FMASK			GENMASK(26, 26)
 #define HDR_LEN_INC_DEAGG_HDR_FMASK		GENMASK(27, 27)
-/* The next field is not present for IPA v4.5 */
+/* The next field is not present for IPA v4.5+ */
 #define HDR_METADATA_REG_VALID_FMASK		GENMASK(28, 28)
-/* The next two fields are present for IPA v4.5 */
+/* The next two fields are present for IPA v4.5+ */
 #define HDR_LEN_MSB_FMASK			GENMASK(29, 28)
 #define HDR_OFST_METADATA_MSB_FMASK		GENMASK(31, 30)
 
@@ -462,7 +480,7 @@ static inline u32 ipa_metadata_offset_encoded(enum ipa_version version,
 #define HDR_PAYLOAD_LEN_INC_PADDING_FMASK	GENMASK(3, 3)
 #define HDR_TOTAL_LEN_OR_PAD_OFFSET_FMASK	GENMASK(9, 4)
 #define HDR_PAD_TO_ALIGNMENT_FMASK		GENMASK(13, 10)
-/* The next three fields are present for IPA v4.5 */
+/* The next three fields are present for IPA v4.5+ */
 #define HDR_TOTAL_LEN_OR_PAD_OFFSET_MSB_FMASK	GENMASK(17, 16)
 #define HDR_OFST_PKT_SIZE_MSB_FMASK		GENMASK(19, 18)
 #define HDR_ADDITIONAL_CONST_LEN_MSB_FMASK	GENMASK(21, 20)
@@ -475,16 +493,18 @@ static inline u32 ipa_metadata_offset_encoded(enum ipa_version version,
 #define IPA_REG_ENDP_INIT_MODE_N_OFFSET(txep) \
 					(0x00000820 + 0x0070 * (txep))
 #define MODE_FMASK				GENMASK(2, 0)
-/* The next field is present for IPA v4.5 */
+/* The next field is present for IPA v4.5+ */
 #define DCPH_ENABLE_FMASK			GENMASK(3, 3)
 #define DEST_PIPE_INDEX_FMASK			GENMASK(8, 4)
 #define BYTE_THRESHOLD_FMASK			GENMASK(27, 12)
 #define PIPE_REPLICATION_EN_FMASK		GENMASK(28, 28)
 #define PAD_EN_FMASK				GENMASK(29, 29)
-/* The next register is not present for IPA v4.5 */
+/* The next field is not present for IPA v4.5+ */
 #define HDR_FTCH_DISABLE_FMASK			GENMASK(30, 30)
+/* The next field is present for IPA v4.9+ */
+#define DRBIP_ACL_ENABLE			GENMASK(30, 30)
 
-/** enum ipa_mode - mode field in ENDP_INIT_MODE_N */
+/** enum ipa_mode - ENDP_INIT_MODE register MODE field value */
 enum ipa_mode {
 	IPA_BASIC			= 0x0,
 	IPA_ENABLE_FRAMING_HDLC		= 0x1,
@@ -496,47 +516,54 @@ enum ipa_mode {
 					(0x00000824 +  0x0070 * (ep))
 #define AGGR_EN_FMASK				GENMASK(1, 0)
 #define AGGR_TYPE_FMASK				GENMASK(4, 2)
+
+/* The legacy value is used for IPA hardware before IPA v4.5 */
 static inline u32 aggr_byte_limit_fmask(bool legacy)
 {
 	return legacy ? GENMASK(9, 5) : GENMASK(10, 5);
 }
 
+/* The legacy value is used for IPA hardware before IPA v4.5 */
 static inline u32 aggr_time_limit_fmask(bool legacy)
 {
 	return legacy ? GENMASK(14, 10) : GENMASK(16, 12);
 }
 
+/* The legacy value is used for IPA hardware before IPA v4.5 */
 static inline u32 aggr_pkt_limit_fmask(bool legacy)
 {
 	return legacy ? GENMASK(20, 15) : GENMASK(22, 17);
 }
 
+/* The legacy value is used for IPA hardware before IPA v4.5 */
 static inline u32 aggr_sw_eof_active_fmask(bool legacy)
 {
 	return legacy ? GENMASK(21, 21) : GENMASK(23, 23);
 }
 
+/* The legacy value is used for IPA hardware before IPA v4.5 */
 static inline u32 aggr_force_close_fmask(bool legacy)
 {
 	return legacy ? GENMASK(22, 22) : GENMASK(24, 24);
 }
 
+/* The legacy value is used for IPA hardware before IPA v4.5 */
 static inline u32 aggr_hard_byte_limit_enable_fmask(bool legacy)
 {
 	return legacy ? GENMASK(24, 24) : GENMASK(26, 26);
 }
 
-/* The next field is present for IPA v4.5 */
+/* The next field is present for IPA v4.5+ */
 #define AGGR_GRAN_SEL_FMASK			GENMASK(27, 27)
 
-/** enum ipa_aggr_en - aggregation enable field in ENDP_INIT_AGGR_N */
+/** enum ipa_aggr_en - ENDP_INIT_AGGR register AGGR_EN field value */
 enum ipa_aggr_en {
-	IPA_BYPASS_AGGR			= 0x0,
-	IPA_ENABLE_AGGR			= 0x1,
-	IPA_ENABLE_DEAGGR		= 0x2,
+	IPA_BYPASS_AGGR			= 0x0,	/* (TX, RX) */
+	IPA_ENABLE_AGGR			= 0x1,	/* (RX) */
+	IPA_ENABLE_DEAGGR		= 0x2,	/* (TX) */
 };
 
-/** enum ipa_aggr_type - aggregation type field in ENDP_INIT_AGGR_N */
+/** enum ipa_aggr_type - ENDP_INIT_AGGR register AGGR_TYPE field value */
 enum ipa_aggr_type {
 	IPA_MBIM_16			= 0x0,
 	IPA_HDLC			= 0x1,
@@ -577,14 +604,13 @@ enum ipa_aggr_type {
 /* Encoded value for ENDP_INIT_RSRC_GRP register RSRC_GRP field */
 static inline u32 rsrc_grp_encoded(enum ipa_version version, u32 rsrc_grp)
 {
-	switch (version) {
-	case IPA_VERSION_4_2:
-		return u32_encode_bits(rsrc_grp, GENMASK(0, 0));
-	case IPA_VERSION_4_5:
+	if (version < IPA_VERSION_3_5 || version == IPA_VERSION_4_5)
 		return u32_encode_bits(rsrc_grp, GENMASK(2, 0));
-	default:
-		return u32_encode_bits(rsrc_grp, GENMASK(1, 0));
-	}
+
+	if (version == IPA_VERSION_4_2 || version == IPA_VERSION_4_7)
+		return u32_encode_bits(rsrc_grp, GENMASK(0, 0));
+
+	return u32_encode_bits(rsrc_grp, GENMASK(1, 0));
 }
 
 /* Valid only for TX (IPA consumer) endpoints */
@@ -595,6 +621,13 @@ static inline u32 rsrc_grp_encoded(enum ipa_version version, u32 rsrc_grp)
 
 /**
  * enum ipa_seq_type - HPS and DPS sequencer type
+ * @IPA_SEQ_DMA:		 Perform DMA only
+ * @IPA_SEQ_1_PASS:		 One pass through the pipeline
+ * @IPA_SEQ_2_PASS_SKIP_LAST_UC: Two passes, skip the microcprocessor
+ * @IPA_SEQ_1_PASS_SKIP_LAST_UC: One pass, skip the microcprocessor
+ * @IPA_SEQ_2_PASS:		 Two passes through the pipeline
+ * @IPA_SEQ_3_PASS_SKIP_LAST_UC: Three passes, skip the microcprocessor
+ * @IPA_SEQ_DECIPHER:		 Optional deciphering step (combined)
  *
  * The low-order byte of the sequencer type register defines the number of
  * passes a packet takes through the IPA pipeline.  The last pass through can
@@ -604,7 +637,6 @@ static inline u32 rsrc_grp_encoded(enum ipa_version version, u32 rsrc_grp)
  * Note: not all combinations of ipa_seq_type and ipa_seq_rep_type are
  * supported (or meaningful).
  */
-#define IPA_SEQ_DECIPHER			0x11
 enum ipa_seq_type {
 	IPA_SEQ_DMA				= 0x00,
 	IPA_SEQ_1_PASS				= 0x02,
@@ -612,10 +644,13 @@ enum ipa_seq_type {
 	IPA_SEQ_1_PASS_SKIP_LAST_UC		= 0x06,
 	IPA_SEQ_2_PASS				= 0x0a,
 	IPA_SEQ_3_PASS_SKIP_LAST_UC		= 0x0c,
+	/* The next value can be ORed with the above */
+	IPA_SEQ_DECIPHER			= 0x11,
 };
 
 /**
  * enum ipa_seq_rep_type - replicated packet sequencer type
+ * @IPA_SEQ_REP_DMA_PARSER:	DMA parser for replicated packets
  *
  * This goes in the second byte of the endpoint sequencer type register.
  *
@@ -630,12 +665,12 @@ enum ipa_seq_rep_type {
 					(0x00000840 + 0x0070 * (ep))
 #define STATUS_EN_FMASK				GENMASK(0, 0)
 #define STATUS_ENDP_FMASK			GENMASK(5, 1)
-/* The next field is not present for IPA v4.5 */
+/* The next field is not present for IPA v4.5+ */
 #define STATUS_LOCATION_FMASK			GENMASK(8, 8)
-/* The next field is not present for IPA v3.5.1 */
+/* The next field is present for IPA v4.0+ */
 #define STATUS_PKT_SUPPRESS_FMASK		GENMASK(9, 9)
 
-/* The next register is only present for IPA versions that support hashing */
+/* The next register is not present for IPA v4.2 (which no hashing support) */
 #define IPA_REG_ENDP_FILTER_ROUTER_HSH_CFG_N_OFFSET(er) \
 					(0x0000085c + 0x0070 * (er))
 #define FILTER_HASH_MSK_SRC_ID_FMASK		GENMASK(0, 0)
@@ -675,12 +710,42 @@ enum ipa_seq_rep_type {
  * @IPA_IRQ_UC_0:	Microcontroller event interrupt
  * @IPA_IRQ_UC_1:	Microcontroller response interrupt
  * @IPA_IRQ_TX_SUSPEND:	Data ready interrupt
+ * @IPA_IRQ_COUNT:	Number of IRQ ids (must be last)
  *
  * IRQ types not described above are not currently used.
+ *
+ * @IPA_IRQ_BAD_SNOC_ACCESS:		(Not currently used)
+ * @IPA_IRQ_EOT_COAL:			(Not currently used)
+ * @IPA_IRQ_UC_2:			(Not currently used)
+ * @IPA_IRQ_UC_3:			(Not currently used)
+ * @IPA_IRQ_UC_IN_Q_NOT_EMPTY:		(Not currently used)
+ * @IPA_IRQ_UC_RX_CMD_Q_NOT_FULL:	(Not currently used)
+ * @IPA_IRQ_PROC_UC_ACK_Q_NOT_EMPTY:	(Not currently used)
+ * @IPA_IRQ_RX_ERR:			(Not currently used)
+ * @IPA_IRQ_DEAGGR_ERR:			(Not currently used)
+ * @IPA_IRQ_TX_ERR:			(Not currently used)
+ * @IPA_IRQ_STEP_MODE:			(Not currently used)
+ * @IPA_IRQ_PROC_ERR:			(Not currently used)
+ * @IPA_IRQ_TX_HOLB_DROP:		(Not currently used)
+ * @IPA_IRQ_BAM_GSI_IDLE:		(Not currently used)
+ * @IPA_IRQ_PIPE_YELLOW_BELOW:		(Not currently used)
+ * @IPA_IRQ_PIPE_RED_BELOW:		(Not currently used)
+ * @IPA_IRQ_PIPE_YELLOW_ABOVE:		(Not currently used)
+ * @IPA_IRQ_PIPE_RED_ABOVE:		(Not currently used)
+ * @IPA_IRQ_UCP:			(Not currently used)
+ * @IPA_IRQ_DCMP:			(Not currently used)
+ * @IPA_IRQ_GSI_EE:			(Not currently used)
+ * @IPA_IRQ_GSI_IPA_IF_TLV_RCVD:	(Not currently used)
+ * @IPA_IRQ_GSI_UC:			(Not currently used)
+ * @IPA_IRQ_TLV_LEN_MIN_DSM:		(Not currently used)
+ * @IPA_IRQ_DRBIP_PKT_EXCEED_MAX_SIZE_EN: (Not currently used)
+ * @IPA_IRQ_DRBIP_DATA_SCTR_CFG_ERROR_EN: (Not currently used)
+ * @IPA_IRQ_DRBIP_IMM_CMD_NO_FLSH_HZRD_EN: (Not currently used)
  */
 enum ipa_irq_id {
 	IPA_IRQ_BAD_SNOC_ACCESS			= 0x0,
-	/* Type (bit) 0x1 is not defined */
+	/* The next bit is not present for IPA v3.5+ */
+	IPA_IRQ_EOT_COAL			= 0x1,
 	IPA_IRQ_UC_0				= 0x2,
 	IPA_IRQ_UC_1				= 0x3,
 	IPA_IRQ_UC_2				= 0x4,
@@ -701,12 +766,17 @@ enum ipa_irq_id {
 	IPA_IRQ_PIPE_YELLOW_ABOVE		= 0x13,
 	IPA_IRQ_PIPE_RED_ABOVE			= 0x14,
 	IPA_IRQ_UCP				= 0x15,
+	/* The next bit is not present for IPA v4.5+ */
 	IPA_IRQ_DCMP				= 0x16,
 	IPA_IRQ_GSI_EE				= 0x17,
 	IPA_IRQ_GSI_IPA_IF_TLV_RCVD		= 0x18,
 	IPA_IRQ_GSI_UC				= 0x19,
-	/* The next bit is present for IPA v4.5 */
+	/* The next bit is present for IPA v4.5+ */
 	IPA_IRQ_TLV_LEN_MIN_DSM			= 0x1a,
+	/* The next three bits are present for IPA v4.9+ */
+	IPA_IRQ_DRBIP_PKT_EXCEED_MAX_SIZE_EN	= 0x1b,
+	IPA_IRQ_DRBIP_DATA_SCTR_CFG_ERROR_EN	= 0x1c,
+	IPA_IRQ_DRBIP_IMM_CMD_NO_FLSH_HZRD_EN	= 0x1d,
 	IPA_IRQ_COUNT,				/* Last; not an id */
 };
 
@@ -719,16 +789,19 @@ enum ipa_irq_id {
 /* ipa->available defines the valid bits in the SUSPEND_INFO register */
 #define IPA_REG_IRQ_SUSPEND_INFO_OFFSET \
 				IPA_REG_IRQ_SUSPEND_INFO_EE_N_OFFSET(GSI_EE_AP)
+/* This register is at (0x00003098 + 0x1000 * (ee)) for IPA v3.0 */
 #define IPA_REG_IRQ_SUSPEND_INFO_EE_N_OFFSET(ee) \
 					(0x00003030 + 0x1000 * (ee))
 
 /* ipa->available defines the valid bits in the IRQ_SUSPEND_EN register */
+/* This register is present for IPA v3.1+ */
 #define IPA_REG_IRQ_SUSPEND_EN_OFFSET \
 				IPA_REG_IRQ_SUSPEND_EN_EE_N_OFFSET(GSI_EE_AP)
 #define IPA_REG_IRQ_SUSPEND_EN_EE_N_OFFSET(ee) \
 					(0x00003034 + 0x1000 * (ee))
 
 /* ipa->available defines the valid bits in the IRQ_SUSPEND_CLR register */
+/* This register is present for IPA v3.1+ */
 #define IPA_REG_IRQ_SUSPEND_CLR_OFFSET \
 				IPA_REG_IRQ_SUSPEND_CLR_EE_N_OFFSET(GSI_EE_AP)
 #define IPA_REG_IRQ_SUSPEND_CLR_EE_N_OFFSET(ee) \
-- 
cgit v1.2.3


From cc5199ed50f2939743185fac94f1bcb47200684a Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Thu, 25 Mar 2021 09:44:33 -0500
Subject: net: ipa: update component config register

IPA version 4.9 and later use a different layout of some fields
found in the COMP_CFG register.

Define arbitration_lock_disable_encoded(), and use it to encode a
value into the ATOMIC_FETCHER_ARB_LOCK_DIS field based on the IPA
version.

And define full_flush_rsc_closure_en_encoded() to encode a value
into the FULL_FLUSH_WAIT_RSC_CLOSE_EN field based on the IPA
version.

The values of these fields are neither modified nor extracted by
current code, but this patch makes this possible for all supported
versions.

Fix a mistaken comment above ipa_hardware_config_comp() intended to
describe the purpose for the register.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_main.c |  2 +-
 drivers/net/ipa/ipa_reg.h  | 32 +++++++++++++++++++++++++++++---
 2 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index ba1bfc30210a..f071e90de540 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -222,7 +222,7 @@ static void ipa_teardown(struct ipa *ipa)
 	gsi_teardown(&ipa->gsi);
 }
 
-/* Configure QMB Core Master Port selection */
+/* Configure bus access behavior for IPA components */
 static void ipa_hardware_config_comp(struct ipa *ipa)
 {
 	u32 val;
diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h
index 735f6e809e04..8a654ccda49e 100644
--- a/drivers/net/ipa/ipa_reg.h
+++ b/drivers/net/ipa/ipa_reg.h
@@ -88,9 +88,6 @@ struct ipa;
 #define GSI_SNOC_CNOC_LOOP_PROT_DISABLE_FMASK	GENMASK(14, 14)
 #define GSI_MULTI_AXI_MASTERS_DIS_FMASK		GENMASK(15, 15)
 #define IPA_QMB_SELECT_GLOBAL_EN_FMASK		GENMASK(16, 16)
-#define IPA_ATOMIC_FETCHER_ARB_LOCK_DIS_FMASK	GENMASK(20, 17)
-/* The next field is present for IPA v4.5 and IPA v4.7 */
-#define IPA_FULL_FLUSH_WAIT_RSC_CLOSE_EN_FMASK	GENMASK(21, 21)
 /* The next five fields are present for IPA v4.9+ */
 #define QMB_RAM_RD_CACHE_DISABLE_FMASK		GENMASK(19, 19)
 #define GENQMB_AOOOWR_FMASK			GENMASK(20, 20)
@@ -98,6 +95,35 @@ struct ipa;
 #define GEN_QMB_1_DYNAMIC_ASIZE_FMASK		GENMASK(30, 30)
 #define GEN_QMB_0_DYNAMIC_ASIZE_FMASK		GENMASK(31, 31)
 
+/* Encoded value for COMP_CFG register ATOMIC_FETCHER_ARB_LOCK_DIS field */
+static inline u32 arbitration_lock_disable_encoded(enum ipa_version version,
+						   u32 mask)
+{
+	/* assert(version >= IPA_VERSION_4_0); */
+
+	if (version < IPA_VERSION_4_9)
+		return u32_encode_bits(mask, GENMASK(20, 17));
+
+	if (version == IPA_VERSION_4_9)
+		return u32_encode_bits(mask, GENMASK(24, 22));
+
+	return u32_encode_bits(mask, GENMASK(23, 22));
+}
+
+/* Encoded value for COMP_CFG register FULL_FLUSH_WAIT_RS_CLOSURE_EN field */
+static inline u32 full_flush_rsc_closure_en_encoded(enum ipa_version version,
+						    bool enable)
+{
+	u32 val = enable ? 1 : 0;
+
+	/* assert(version >= IPA_VERSION_4_5); */
+
+	if (version == IPA_VERSION_4_5 || version == IPA_VERSION_4_7)
+		return u32_encode_bits(val, GENMASK(21, 21));
+
+	return u32_encode_bits(val, GENMASK(17, 17));
+}
+
 #define IPA_REG_CLKON_CFG_OFFSET			0x00000044
 #define RX_FMASK				GENMASK(0, 0)
 #define PROC_FMASK				GENMASK(1, 1)
-- 
cgit v1.2.3


From e666aa978a55d352b76bfa1f9f19c19ef9261467 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Thu, 25 Mar 2021 09:44:34 -0500
Subject: net: ipa: support IPA interrupt addresses for IPA v4.7

Starting with IPA v4.7, registers related to IPA interrupts are
located at a fixed offset 0x1000 above than the addresses used for
earlier versions.  Define and use functions to provide the offset to
use for these registers based on IPA version.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_interrupt.c |  54 +++++++++++----
 drivers/net/ipa/ipa_reg.h       | 143 +++++++++++++++++++++++++++++-----------
 drivers/net/ipa/ipa_uc.c        |   5 +-
 3 files changed, 150 insertions(+), 52 deletions(-)

diff --git a/drivers/net/ipa/ipa_interrupt.c b/drivers/net/ipa/ipa_interrupt.c
index 61dd7605bcb6..c46df0b7c4e5 100644
--- a/drivers/net/ipa/ipa_interrupt.c
+++ b/drivers/net/ipa/ipa_interrupt.c
@@ -54,12 +54,14 @@ static void ipa_interrupt_process(struct ipa_interrupt *interrupt, u32 irq_id)
 	bool uc_irq = ipa_interrupt_uc(interrupt, irq_id);
 	struct ipa *ipa = interrupt->ipa;
 	u32 mask = BIT(irq_id);
+	u32 offset;
 
 	/* For microcontroller interrupts, clear the interrupt right away,
 	 * "to avoid clearing unhandled interrupts."
 	 */
+	offset = ipa_reg_irq_clr_offset(ipa->version);
 	if (uc_irq)
-		iowrite32(mask, ipa->reg_virt + IPA_REG_IRQ_CLR_OFFSET);
+		iowrite32(mask, ipa->reg_virt + offset);
 
 	if (irq_id < IPA_IRQ_COUNT && interrupt->handler[irq_id])
 		interrupt->handler[irq_id](interrupt->ipa, irq_id);
@@ -69,7 +71,7 @@ static void ipa_interrupt_process(struct ipa_interrupt *interrupt, u32 irq_id)
 	 * so defer clearing until after the handler has been called.
 	 */
 	if (!uc_irq)
-		iowrite32(mask, ipa->reg_virt + IPA_REG_IRQ_CLR_OFFSET);
+		iowrite32(mask, ipa->reg_virt + offset);
 }
 
 /* Process all IPA interrupt types that have been signaled */
@@ -77,13 +79,15 @@ static void ipa_interrupt_process_all(struct ipa_interrupt *interrupt)
 {
 	struct ipa *ipa = interrupt->ipa;
 	u32 enabled = interrupt->enabled;
+	u32 offset;
 	u32 mask;
 
 	/* The status register indicates which conditions are present,
 	 * including conditions whose interrupt is not enabled.  Handle
 	 * only the enabled ones.
 	 */
-	mask = ioread32(ipa->reg_virt + IPA_REG_IRQ_STTS_OFFSET);
+	offset = ipa_reg_irq_stts_offset(ipa->version);
+	mask = ioread32(ipa->reg_virt + offset);
 	while ((mask &= enabled)) {
 		do {
 			u32 irq_id = __ffs(mask);
@@ -92,7 +96,7 @@ static void ipa_interrupt_process_all(struct ipa_interrupt *interrupt)
 
 			ipa_interrupt_process(interrupt, irq_id);
 		} while (mask);
-		mask = ioread32(ipa->reg_virt + IPA_REG_IRQ_STTS_OFFSET);
+		mask = ioread32(ipa->reg_virt + offset);
 	}
 }
 
@@ -115,14 +119,17 @@ static irqreturn_t ipa_isr(int irq, void *dev_id)
 {
 	struct ipa_interrupt *interrupt = dev_id;
 	struct ipa *ipa = interrupt->ipa;
+	u32 offset;
 	u32 mask;
 
-	mask = ioread32(ipa->reg_virt + IPA_REG_IRQ_STTS_OFFSET);
+	offset = ipa_reg_irq_stts_offset(ipa->version);
+	mask = ioread32(ipa->reg_virt + offset);
 	if (mask & interrupt->enabled)
 		return IRQ_WAKE_THREAD;
 
 	/* Nothing in the mask was supposed to cause an interrupt */
-	iowrite32(mask, ipa->reg_virt + IPA_REG_IRQ_CLR_OFFSET);
+	offset = ipa_reg_irq_clr_offset(ipa->version);
+	iowrite32(mask, ipa->reg_virt + offset);
 
 	dev_err(&ipa->pdev->dev, "%s: unexpected interrupt, mask 0x%08x\n",
 		__func__, mask);
@@ -136,15 +143,22 @@ static void ipa_interrupt_suspend_control(struct ipa_interrupt *interrupt,
 {
 	struct ipa *ipa = interrupt->ipa;
 	u32 mask = BIT(endpoint_id);
+	u32 offset;
 	u32 val;
 
 	/* assert(mask & ipa->available); */
-	val = ioread32(ipa->reg_virt + IPA_REG_IRQ_SUSPEND_EN_OFFSET);
+
+	/* IPA version 3.0 does not support TX_SUSPEND interrupt control */
+	if (ipa->version == IPA_VERSION_3_0)
+		return;
+
+	offset = ipa_reg_irq_suspend_en_offset(ipa->version);
+	val = ioread32(ipa->reg_virt + offset);
 	if (enable)
 		val |= mask;
 	else
 		val &= ~mask;
-	iowrite32(val, ipa->reg_virt + IPA_REG_IRQ_SUSPEND_EN_OFFSET);
+	iowrite32(val, ipa->reg_virt + offset);
 }
 
 /* Enable TX_SUSPEND for an endpoint */
@@ -165,10 +179,18 @@ ipa_interrupt_suspend_disable(struct ipa_interrupt *interrupt, u32 endpoint_id)
 void ipa_interrupt_suspend_clear_all(struct ipa_interrupt *interrupt)
 {
 	struct ipa *ipa = interrupt->ipa;
+	u32 offset;
 	u32 val;
 
-	val = ioread32(ipa->reg_virt + IPA_REG_IRQ_SUSPEND_INFO_OFFSET);
-	iowrite32(val, ipa->reg_virt + IPA_REG_IRQ_SUSPEND_CLR_OFFSET);
+	offset = ipa_reg_irq_suspend_info_offset(ipa->version);
+	val = ioread32(ipa->reg_virt + offset);
+
+	/* SUSPEND interrupt status isn't cleared on IPA version 3.0 */
+	if (ipa->version == IPA_VERSION_3_0)
+		return;
+
+	offset = ipa_reg_irq_suspend_clr_offset(ipa->version);
+	iowrite32(val, ipa->reg_virt + offset);
 }
 
 /* Simulate arrival of an IPA TX_SUSPEND interrupt */
@@ -182,13 +204,15 @@ void ipa_interrupt_add(struct ipa_interrupt *interrupt,
 		       enum ipa_irq_id ipa_irq, ipa_irq_handler_t handler)
 {
 	struct ipa *ipa = interrupt->ipa;
+	u32 offset;
 
 	/* assert(ipa_irq < IPA_IRQ_COUNT); */
 	interrupt->handler[ipa_irq] = handler;
 
 	/* Update the IPA interrupt mask to enable it */
 	interrupt->enabled |= BIT(ipa_irq);
-	iowrite32(interrupt->enabled, ipa->reg_virt + IPA_REG_IRQ_EN_OFFSET);
+	offset = ipa_reg_irq_en_offset(ipa->version);
+	iowrite32(interrupt->enabled, ipa->reg_virt + offset);
 }
 
 /* Remove the handler for an IPA interrupt type */
@@ -196,11 +220,13 @@ void
 ipa_interrupt_remove(struct ipa_interrupt *interrupt, enum ipa_irq_id ipa_irq)
 {
 	struct ipa *ipa = interrupt->ipa;
+	u32 offset;
 
 	/* assert(ipa_irq < IPA_IRQ_COUNT); */
 	/* Update the IPA interrupt mask to disable it */
 	interrupt->enabled &= ~BIT(ipa_irq);
-	iowrite32(interrupt->enabled, ipa->reg_virt + IPA_REG_IRQ_EN_OFFSET);
+	offset = ipa_reg_irq_en_offset(ipa->version);
+	iowrite32(interrupt->enabled, ipa->reg_virt + offset);
 
 	interrupt->handler[ipa_irq] = NULL;
 }
@@ -211,6 +237,7 @@ struct ipa_interrupt *ipa_interrupt_setup(struct ipa *ipa)
 	struct device *dev = &ipa->pdev->dev;
 	struct ipa_interrupt *interrupt;
 	unsigned int irq;
+	u32 offset;
 	int ret;
 
 	ret = platform_get_irq_byname(ipa->pdev, "ipa");
@@ -228,7 +255,8 @@ struct ipa_interrupt *ipa_interrupt_setup(struct ipa *ipa)
 	interrupt->irq = irq;
 
 	/* Start with all IPA interrupts disabled */
-	iowrite32(0, ipa->reg_virt + IPA_REG_IRQ_EN_OFFSET);
+	offset = ipa_reg_irq_en_offset(ipa->version);
+	iowrite32(0, ipa->reg_virt + offset);
 
 	ret = request_threaded_irq(irq, ipa_isr, ipa_isr_thread, IRQF_ONESHOT,
 				   "ipa", interrupt);
diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h
index 8a654ccda49e..8820e08d2535 100644
--- a/drivers/net/ipa/ipa_reg.h
+++ b/drivers/net/ipa/ipa_reg.h
@@ -717,20 +717,46 @@ enum ipa_seq_rep_type {
 #define ROUTER_HASH_MSK_METADATA_FMASK		GENMASK(22, 22)
 #define IPA_REG_ENDP_ROUTER_HASH_MSK_ALL	GENMASK(22, 16)
 
-#define IPA_REG_IRQ_STTS_OFFSET	\
-				IPA_REG_IRQ_STTS_EE_N_OFFSET(GSI_EE_AP)
-#define IPA_REG_IRQ_STTS_EE_N_OFFSET(ee) \
-					(0x00003008 + 0x1000 * (ee))
-
-#define IPA_REG_IRQ_EN_OFFSET \
-				IPA_REG_IRQ_EN_EE_N_OFFSET(GSI_EE_AP)
-#define IPA_REG_IRQ_EN_EE_N_OFFSET(ee) \
-					(0x0000300c + 0x1000 * (ee))
-
-#define IPA_REG_IRQ_CLR_OFFSET \
-				IPA_REG_IRQ_CLR_EE_N_OFFSET(GSI_EE_AP)
-#define IPA_REG_IRQ_CLR_EE_N_OFFSET(ee) \
-					(0x00003010 + 0x1000 * (ee))
+static inline u32 ipa_reg_irq_stts_ee_n_offset(enum ipa_version version,
+					       u32 ee)
+{
+	if (version < IPA_VERSION_4_9)
+		return 0x00003008 + 0x1000 * ee;
+
+	return 0x00004008 + 0x1000 * ee;
+}
+
+static inline u32 ipa_reg_irq_stts_offset(enum ipa_version version)
+{
+	return ipa_reg_irq_stts_ee_n_offset(version, GSI_EE_AP);
+}
+
+static inline u32 ipa_reg_irq_en_ee_n_offset(enum ipa_version version, u32 ee)
+{
+	if (version < IPA_VERSION_4_9)
+		return 0x0000300c + 0x1000 * ee;
+
+	return 0x0000400c + 0x1000 * ee;
+}
+
+static inline u32 ipa_reg_irq_en_offset(enum ipa_version version)
+{
+	return ipa_reg_irq_en_ee_n_offset(version, GSI_EE_AP);
+}
+
+static inline u32 ipa_reg_irq_clr_ee_n_offset(enum ipa_version version, u32 ee)
+{
+	if (version < IPA_VERSION_4_9)
+		return 0x00003010 + 0x1000 * ee;
+
+	return 0x00004010 + 0x1000 * ee;
+}
+
+static inline u32 ipa_reg_irq_clr_offset(enum ipa_version version)
+{
+	return ipa_reg_irq_clr_ee_n_offset(version, GSI_EE_AP);
+}
+
 /**
  * enum ipa_irq_id - Bit positions representing type of IPA IRQ
  * @IPA_IRQ_UC_0:	Microcontroller event interrupt
@@ -806,32 +832,75 @@ enum ipa_irq_id {
 	IPA_IRQ_COUNT,				/* Last; not an id */
 };
 
-#define IPA_REG_IRQ_UC_OFFSET \
-				IPA_REG_IRQ_UC_EE_N_OFFSET(GSI_EE_AP)
-#define IPA_REG_IRQ_UC_EE_N_OFFSET(ee) \
-					(0x0000301c + 0x1000 * (ee))
+static inline u32 ipa_reg_irq_uc_ee_n_offset(enum ipa_version version, u32 ee)
+{
+	if (version < IPA_VERSION_4_9)
+		return 0x0000301c + 0x1000 * ee;
+
+	return 0x0000401c + 0x1000 * ee;
+}
+
+static inline u32 ipa_reg_irq_uc_offset(enum ipa_version version)
+{
+	return ipa_reg_irq_uc_ee_n_offset(version, GSI_EE_AP);
+}
+
 #define UC_INTR_FMASK				GENMASK(0, 0)
 
 /* ipa->available defines the valid bits in the SUSPEND_INFO register */
-#define IPA_REG_IRQ_SUSPEND_INFO_OFFSET \
-				IPA_REG_IRQ_SUSPEND_INFO_EE_N_OFFSET(GSI_EE_AP)
-/* This register is at (0x00003098 + 0x1000 * (ee)) for IPA v3.0 */
-#define IPA_REG_IRQ_SUSPEND_INFO_EE_N_OFFSET(ee) \
-					(0x00003030 + 0x1000 * (ee))
-
-/* ipa->available defines the valid bits in the IRQ_SUSPEND_EN register */
-/* This register is present for IPA v3.1+ */
-#define IPA_REG_IRQ_SUSPEND_EN_OFFSET \
-				IPA_REG_IRQ_SUSPEND_EN_EE_N_OFFSET(GSI_EE_AP)
-#define IPA_REG_IRQ_SUSPEND_EN_EE_N_OFFSET(ee) \
-					(0x00003034 + 0x1000 * (ee))
-
-/* ipa->available defines the valid bits in the IRQ_SUSPEND_CLR register */
-/* This register is present for IPA v3.1+ */
-#define IPA_REG_IRQ_SUSPEND_CLR_OFFSET \
-				IPA_REG_IRQ_SUSPEND_CLR_EE_N_OFFSET(GSI_EE_AP)
-#define IPA_REG_IRQ_SUSPEND_CLR_EE_N_OFFSET(ee) \
-					(0x00003038 + 0x1000 * (ee))
+static inline u32
+ipa_reg_irq_suspend_info_ee_n_offset(enum ipa_version version, u32 ee)
+{
+	if (version == IPA_VERSION_3_0)
+		return 0x00003098 + 0x1000 * ee;
+
+	if (version < IPA_VERSION_4_9)
+		return 0x00003030 + 0x1000 * ee;
+
+	return 0x00004030 + 0x1000 * ee;
+}
+
+static inline u32
+ipa_reg_irq_suspend_info_offset(enum ipa_version version)
+{
+	return ipa_reg_irq_suspend_info_ee_n_offset(version, GSI_EE_AP);
+}
+
+/* ipa->available defines the valid bits in the SUSPEND_EN register */
+static inline u32
+ipa_reg_irq_suspend_en_ee_n_offset(enum ipa_version version, u32 ee)
+{
+	/* assert(version != IPA_VERSION_3_0); */
+
+	if (version < IPA_VERSION_4_9)
+		return 0x00003034 + 0x1000 * ee;
+
+	return 0x00004034 + 0x1000 * ee;
+}
+
+static inline u32
+ipa_reg_irq_suspend_en_offset(enum ipa_version version)
+{
+	return ipa_reg_irq_suspend_en_ee_n_offset(version, GSI_EE_AP);
+}
+
+/* ipa->available defines the valid bits in the SUSPEND_CLR register */
+static inline u32
+ipa_reg_irq_suspend_clr_ee_n_offset(enum ipa_version version, u32 ee)
+{
+	/* assert(version != IPA_VERSION_3_0); */
+
+	if (version < IPA_VERSION_4_9)
+		return 0x00003038 + 0x1000 * ee;
+
+	return 0x00004038 + 0x1000 * ee;
+}
+
+static inline u32
+ipa_reg_irq_suspend_clr_offset(enum ipa_version version)
+{
+	return ipa_reg_irq_suspend_clr_ee_n_offset(version, GSI_EE_AP);
+}
 
 int ipa_reg_init(struct ipa *ipa);
 void ipa_reg_exit(struct ipa *ipa);
diff --git a/drivers/net/ipa/ipa_uc.c b/drivers/net/ipa/ipa_uc.c
index dee58a6596d4..2756363e6938 100644
--- a/drivers/net/ipa/ipa_uc.c
+++ b/drivers/net/ipa/ipa_uc.c
@@ -192,6 +192,7 @@ void ipa_uc_teardown(struct ipa *ipa)
 static void send_uc_command(struct ipa *ipa, u32 command, u32 command_param)
 {
 	struct ipa_uc_mem_area *shared = ipa_uc_shared(ipa);
+	u32 offset;
 	u32 val;
 
 	/* Fill in the command data */
@@ -203,8 +204,8 @@ static void send_uc_command(struct ipa *ipa, u32 command, u32 command_param)
 
 	/* Use an interrupt to tell the microcontroller the command is ready */
 	val = u32_encode_bits(1, UC_INTR_FMASK);
-
-	iowrite32(val, ipa->reg_virt + IPA_REG_IRQ_UC_OFFSET);
+	offset = ipa_reg_irq_uc_offset(ipa->version);
+	iowrite32(val, ipa->reg_virt + offset);
 }
 
 /* Tell the microcontroller the AP is shutting down */
-- 
cgit v1.2.3


From 4f57b2fa0744f2fffd61936facd258897834aad5 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Thu, 25 Mar 2021 09:44:35 -0500
Subject: net: ipa: GSI register cleanup

The main purpose of this is to extend these GSI register definitions
to support additional IPA versions.

This patch makes some minor updates to "gsi_reg.h":
  - Define a DB_IN_BYTES field in the channel QOS register
  - Add some comments clarifying when certain fields are valid
  - Add the definition of GSI_CH_DB_STOP channel command
  - Add a couple of blank lines
  - Move one comment and indent another
  - Delete two unused register definitions at the end.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/gsi_reg.h | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ipa/gsi_reg.h b/drivers/net/ipa/gsi_reg.h
index 1622d8cf8dea..6b53adbc667a 100644
--- a/drivers/net/ipa/gsi_reg.h
+++ b/drivers/net/ipa/gsi_reg.h
@@ -114,6 +114,9 @@ enum gsi_channel_type {
 /* The next two fields are present for IPA v4.5 and above */
 #define PREFETCH_MODE_FMASK		GENMASK(13, 10)
 #define EMPTY_LVL_THRSHOLD_FMASK	GENMASK(23, 16)
+/* The next field is present for IPA v4.9 and above */
+#define DB_IN_BYTES			GENMASK(24, 24)
+
 /** enum gsi_prefetch_mode - PREFETCH_MODE field in CH_C_QOS */
 enum gsi_prefetch_mode {
 	GSI_USE_PREFETCH_BUFS			= 0x0,
@@ -146,13 +149,13 @@ enum gsi_prefetch_mode {
 		GSI_EE_N_EV_CH_E_CNTXT_0_OFFSET((ev), GSI_EE_AP)
 #define GSI_EE_N_EV_CH_E_CNTXT_0_OFFSET(ev, ee) \
 		(0x0001d000 + 0x4000 * (ee) + 0x80 * (ev))
+/* enum gsi_channel_type defines EV_CHTYPE field values in EV_CH_E_CNTXT_0 */
 #define EV_CHTYPE_FMASK			GENMASK(3, 0)
 #define EV_EE_FMASK			GENMASK(7, 4)
 #define EV_EVCHID_FMASK			GENMASK(15, 8)
 #define EV_INTYPE_FMASK			GENMASK(16, 16)
 #define EV_CHSTATE_FMASK		GENMASK(23, 20)
 #define EV_ELEMENT_SIZE_FMASK		GENMASK(31, 24)
-/* enum gsi_channel_type defines EV_CHTYPE field values in EV_CH_E_CNTXT_0 */
 
 #define GSI_EV_CH_E_CNTXT_1_OFFSET(ev) \
 		GSI_EE_N_EV_CH_E_CNTXT_1_OFFSET((ev), GSI_EE_AP)
@@ -248,6 +251,7 @@ enum gsi_ch_cmd_opcode {
 	GSI_CH_STOP				= 0x2,
 	GSI_CH_RESET				= 0x9,
 	GSI_CH_DE_ALLOC				= 0xa,
+	GSI_CH_DB_STOP				= 0xb,
 };
 
 #define GSI_EV_CH_CMD_OFFSET \
@@ -278,6 +282,7 @@ enum gsi_generic_cmd_opcode {
 	GSI_GENERIC_ALLOCATE_CHANNEL		= 0x2,
 };
 
+/* The next register is present for IPA v3.5.1 and above */
 #define GSI_GSI_HW_PARAM_2_OFFSET \
 			GSI_EE_N_GSI_HW_PARAM_2_OFFSET(GSI_EE_AP)
 #define GSI_EE_N_GSI_HW_PARAM_2_OFFSET(ee) \
@@ -300,7 +305,7 @@ enum gsi_generic_cmd_opcode {
 enum gsi_iram_size {
 	IRAM_SIZE_ONE_KB			= 0x0,
 	IRAM_SIZE_TWO_KB			= 0x1,
-/* The next two values are available for IPA v4.0 and above */
+	/* The next two values are available for IPA v4.0 and above */
 	IRAM_SIZE_TWO_N_HALF_KB			= 0x2,
 	IRAM_SIZE_THREE_KB			= 0x3,
 	/* The next two values are available for IPA v4.5 and above */
@@ -424,6 +429,8 @@ enum gsi_general_id {
 			GSI_EE_N_ERROR_LOG_OFFSET(GSI_EE_AP)
 #define GSI_EE_N_ERROR_LOG_OFFSET(ee) \
 			(0x0001f200 + 0x4000 * (ee))
+
+/* Fields below are present for IPA v3.5.1 and above */
 #define ERR_ARG3_FMASK			GENMASK(3, 0)
 #define ERR_ARG2_FMASK			GENMASK(7, 4)
 #define ERR_ARG1_FMASK			GENMASK(11, 8)
@@ -474,7 +481,4 @@ enum gsi_generic_ee_result {
 	GENERIC_EE_NO_RESOURCES			= 0x7,
 };
 
-#define USB_MAX_PACKET_FMASK		GENMASK(15, 15)	/* 0: HS; 1: SS */
-#define MHI_BASE_CHANNEL_FMASK		GENMASK(31, 24)
-
 #endif	/* _GSI_REG_H_ */
-- 
cgit v1.2.3


From 42839f9585a00b53691bc56e6a238029f1466959 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Thu, 25 Mar 2021 09:44:36 -0500
Subject: net: ipa: update GSI ring size registers

Each GSI channel has a CNTXT_1 register that encodes the size of its
ring buffer.  The size of the field that records that is increased
starting at IPA v4.9.  Replace the use of a fixed-size field mask
with a new inline function that encodes that size value.

Similarly, the size of GSI event rings can be larger starting with
IPA v4.9, so create a function to encode that as well.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/gsi.c     |  7 +++++--
 drivers/net/ipa/gsi_reg.h | 17 +++++++++++++++--
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
index 7f2a8fce5e0d..6f146288f9e4 100644
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -701,7 +701,7 @@ static void gsi_evt_ring_program(struct gsi *gsi, u32 evt_ring_id)
 	val |= u32_encode_bits(GSI_RING_ELEMENT_SIZE, EV_ELEMENT_SIZE_FMASK);
 	iowrite32(val, gsi->virt + GSI_EV_CH_E_CNTXT_0_OFFSET(evt_ring_id));
 
-	val = u32_encode_bits(size, EV_R_LENGTH_FMASK);
+	val = ev_r_length_encoded(gsi->version, size);
 	iowrite32(val, gsi->virt + GSI_EV_CH_E_CNTXT_1_OFFSET(evt_ring_id));
 
 	/* The context 2 and 3 registers store the low-order and
@@ -808,7 +808,7 @@ static void gsi_channel_program(struct gsi_channel *channel, bool doorbell)
 	val |= u32_encode_bits(GSI_RING_ELEMENT_SIZE, ELEMENT_SIZE_FMASK);
 	iowrite32(val, gsi->virt + GSI_CH_C_CNTXT_0_OFFSET(channel_id));
 
-	val = u32_encode_bits(size, R_LENGTH_FMASK);
+	val = r_length_encoded(gsi->version, size);
 	iowrite32(val, gsi->virt + GSI_CH_C_CNTXT_1_OFFSET(channel_id));
 
 	/* The context 2 and 3 registers store the low-order and
@@ -842,6 +842,9 @@ static void gsi_channel_program(struct gsi_channel *channel, bool doorbell)
 			val |= u32_encode_bits(GSI_ESCAPE_BUF_ONLY,
 					       PREFETCH_MODE_FMASK);
 	}
+	/* All channels set DB_IN_BYTES */
+	if (gsi->version >= IPA_VERSION_4_9)
+		val |= DB_IN_BYTES;
 
 	iowrite32(val, gsi->virt + GSI_CH_C_QOS_OFFSET(channel_id));
 
diff --git a/drivers/net/ipa/gsi_reg.h b/drivers/net/ipa/gsi_reg.h
index 6b53adbc667a..d964015a4409 100644
--- a/drivers/net/ipa/gsi_reg.h
+++ b/drivers/net/ipa/gsi_reg.h
@@ -90,7 +90,14 @@ enum gsi_channel_type {
 		GSI_EE_N_CH_C_CNTXT_1_OFFSET((ch), GSI_EE_AP)
 #define GSI_EE_N_CH_C_CNTXT_1_OFFSET(ch, ee) \
 		(0x0001c004 + 0x4000 * (ee) + 0x80 * (ch))
-#define R_LENGTH_FMASK			GENMASK(15, 0)
+
+/* Encoded value for CH_C_CNTXT_1 register R_LENGTH field */
+static inline u32 r_length_encoded(enum ipa_version version, u32 length)
+{
+	if (version < IPA_VERSION_4_9)
+		return u32_encode_bits(length, GENMASK(15, 0));
+	return u32_encode_bits(length, GENMASK(19, 0));
+}
 
 #define GSI_CH_C_CNTXT_2_OFFSET(ch) \
 		GSI_EE_N_CH_C_CNTXT_2_OFFSET((ch), GSI_EE_AP)
@@ -161,7 +168,13 @@ enum gsi_prefetch_mode {
 		GSI_EE_N_EV_CH_E_CNTXT_1_OFFSET((ev), GSI_EE_AP)
 #define GSI_EE_N_EV_CH_E_CNTXT_1_OFFSET(ev, ee) \
 		(0x0001d004 + 0x4000 * (ee) + 0x80 * (ev))
-#define EV_R_LENGTH_FMASK		GENMASK(15, 0)
+/* Encoded value for EV_CH_C_CNTXT_1 register EV_R_LENGTH field */
+static inline u32 ev_r_length_encoded(enum ipa_version version, u32 length)
+{
+	if (version < IPA_VERSION_4_9)
+		return u32_encode_bits(length, GENMASK(15, 0));
+	return u32_encode_bits(length, GENMASK(19, 0));
+}
 
 #define GSI_EV_CH_E_CNTXT_2_OFFSET(ev) \
 		GSI_EE_N_EV_CH_E_CNTXT_2_OFFSET((ev), GSI_EE_AP)
-- 
cgit v1.2.3


From 2ad6f03b59332cd875d20c52ab18bb6a927a3213 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Thu, 25 Mar 2021 09:44:37 -0500
Subject: net: ipa: expand GSI channel types

IPA v4.5 (GSI v2.5) supports a larger set of channel protocols, and
adds an additional field to hold the most-significant bits of the
protocol identifier on a channel.

Add an inline function that encodes the protocol (including the
extra bits for newer versions of IPA), and define some additional
protocols.  At this point we still use only GPI protocol.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/gsi.c     |  2 +-
 drivers/net/ipa/gsi_reg.h | 38 +++++++++++++++++++++++++++++++-------
 2 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
index 6f146288f9e4..585574af36ec 100644
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -801,7 +801,7 @@ static void gsi_channel_program(struct gsi_channel *channel, bool doorbell)
 	channel->tre_ring.index = 0;
 
 	/* We program all channels as GPI type/protocol */
-	val = u32_encode_bits(GSI_CHANNEL_TYPE_GPI, CHTYPE_PROTOCOL_FMASK);
+	val = chtype_protocol_encoded(gsi->version, GSI_CHANNEL_TYPE_GPI);
 	if (channel->toward_ipa)
 		val |= CHTYPE_DIR_FMASK;
 	val |= u32_encode_bits(channel->evt_ring_id, ERINDEX_FMASK);
diff --git a/drivers/net/ipa/gsi_reg.h b/drivers/net/ipa/gsi_reg.h
index d964015a4409..b4ac0258d6e1 100644
--- a/drivers/net/ipa/gsi_reg.h
+++ b/drivers/net/ipa/gsi_reg.h
@@ -64,6 +64,21 @@
 			(0x0000c01c + 0x1000 * (ee))
 
 /* All other register offsets are relative to gsi->virt */
+
+/** enum gsi_channel_type - CHTYPE_PROTOCOL field values in CH_C_CNTXT_0 */
+enum gsi_channel_type {
+	GSI_CHANNEL_TYPE_MHI			= 0x0,
+	GSI_CHANNEL_TYPE_XHCI			= 0x1,
+	GSI_CHANNEL_TYPE_GPI			= 0x2,
+	GSI_CHANNEL_TYPE_XDCI			= 0x3,
+	GSI_CHANNEL_TYPE_WDI2			= 0x4,
+	GSI_CHANNEL_TYPE_GCI			= 0x5,
+	GSI_CHANNEL_TYPE_WDI3			= 0x6,
+	GSI_CHANNEL_TYPE_MHIP			= 0x7,
+	GSI_CHANNEL_TYPE_AQC			= 0x8,
+	GSI_CHANNEL_TYPE_11AD			= 0x9,
+};
+
 #define GSI_CH_C_CNTXT_0_OFFSET(ch) \
 		GSI_EE_N_CH_C_CNTXT_0_OFFSET((ch), GSI_EE_AP)
 #define GSI_EE_N_CH_C_CNTXT_0_OFFSET(ch, ee) \
@@ -78,13 +93,22 @@
 #define CHSTATE_FMASK			GENMASK(23, 20)
 #define ELEMENT_SIZE_FMASK		GENMASK(31, 24)
 
-/** enum gsi_channel_type - CHTYPE_PROTOCOL field values in CH_C_CNTXT_0 */
-enum gsi_channel_type {
-	GSI_CHANNEL_TYPE_MHI			= 0x0,
-	GSI_CHANNEL_TYPE_XHCI			= 0x1,
-	GSI_CHANNEL_TYPE_GPI			= 0x2,
-	GSI_CHANNEL_TYPE_XDCI			= 0x3,
-};
+/* Encoded value for CH_C_CNTXT_0 register channel protocol fields */
+static inline u32
+chtype_protocol_encoded(enum ipa_version version, enum gsi_channel_type type)
+{
+	u32 val;
+
+	val = u32_encode_bits(type, CHTYPE_PROTOCOL_FMASK);
+	if (version < IPA_VERSION_4_5)
+		return val;
+
+	/* Encode upper bit(s) as well */
+	type >>= hweight32(CHTYPE_PROTOCOL_FMASK);
+	val |= u32_encode_bits(type, CHTYPE_PROTOCOL_MSB_FMASK);
+
+	return val;
+}
 
 #define GSI_CH_C_CNTXT_1_OFFSET(ch) \
 		GSI_EE_N_CH_C_CNTXT_1_OFFSET((ch), GSI_EE_AP)
-- 
cgit v1.2.3


From 6c996e19949b34d7edebed4f6b0511145c036404 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Thu, 25 Mar 2021 15:52:45 +0100
Subject: net: change netdev_unregister_timeout_secs min value to 1

netdev_unregister_timeout_secs=0 can lead to printing the
"waiting for dev to become free" message every jiffy.
This is too frequent and unnecessary.
Set the min value to 1 second.

Also fix the merge issue introduced by
"net: make unregister netdev warning timeout configurable":
it changed "refcnt != 1" to "refcnt".

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
Fixes: 5aa3afe107d9 ("net: make unregister netdev warning timeout configurable")
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/admin-guide/sysctl/net.rst | 2 +-
 net/core/dev.c                           | 2 +-
 net/core/sysctl_net_core.c               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index 2090bfc69aa5..c941b214e0b7 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -320,7 +320,7 @@ waiting for a network device refcount to drop to 0 during device
 unregistration. A lower value may be useful during bisection to detect
 a leaked reference faster. A larger value may be useful to prevent false
 warnings on slow/loaded systems.
-Default value is 10, minimum 0, maximum 3600.
+Default value is 10, minimum 1, maximum 3600.
 
 optmem_max
 ----------
diff --git a/net/core/dev.c b/net/core/dev.c
index 48b529d59157..b4c67a5be606 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10460,7 +10460,7 @@ static void netdev_wait_allrefs(struct net_device *dev)
 
 		refcnt = netdev_refcnt_read(dev);
 
-		if (refcnt &&
+		if (refcnt != 1 &&
 		    time_after(jiffies, warning_time +
 			       netdev_unregister_timeout_secs * HZ)) {
 			pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index d84c8a1b280e..c8496c1142c9 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -577,7 +577,7 @@ static struct ctl_table net_core_table[] = {
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
+		.extra1		= SYSCTL_ONE,
 		.extra2		= &int_3600,
 	},
 	{ }
-- 
cgit v1.2.3


From 7e1c520c0d2028e24cf86df811d41dc4205dc5d4 Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Fri, 26 Mar 2021 01:39:12 +0800
Subject: net: stmmac: introduce DMA interrupt status masking per traffic
 direction

In preparation to make stmmac support multi-vector MSI, we introduce the
interrupt status masking according to RX, TX or RXTX. Default to use RXTX
inside stmmac_dma_interrupt(), so there is no run-time logic difference
now.

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: Voon Weifeng <weifeng.voon@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/common.h       |  6 ++++++
 drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c  | 24 +++++++++++++++++++++-
 drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h   | 21 ++++++++++++++++++-
 drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c   |  7 ++++++-
 drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h    | 22 +++++++++++++++++++-
 drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c    |  8 +++++++-
 drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h     |  6 ++++++
 drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c |  8 +++++++-
 drivers/net/ethernet/stmicro/stmmac/hwif.h         |  2 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |  7 ++++---
 10 files changed, 101 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index d065b11b7b10..5afb36a5c94c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -309,6 +309,12 @@ enum dma_irq_status {
 	handle_tx = 0x8,
 };
 
+enum dma_irq_dir {
+	DMA_DIR_RX = 0x1,
+	DMA_DIR_TX = 0x2,
+	DMA_DIR_RXTX = 0x3,
+};
+
 /* EEE and LPI defines */
 #define	CORE_IRQ_TX_PATH_IN_LPI_MODE	(1 << 0)
 #define	CORE_IRQ_TX_PATH_EXIT_LPI_MODE	(1 << 1)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index e62efd166ec8..19e7ec30af4c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -239,6 +239,22 @@ static const struct emac_variant emac_variant_h6 = {
 #define EMAC_RX_EARLY_INT       BIT(13)
 #define EMAC_RGMII_STA_INT      BIT(16)
 
+#define EMAC_INT_MSK_COMMON	EMAC_RGMII_STA_INT
+#define EMAC_INT_MSK_TX		(EMAC_TX_INT | \
+				 EMAC_TX_DMA_STOP_INT | \
+				 EMAC_TX_BUF_UA_INT | \
+				 EMAC_TX_TIMEOUT_INT | \
+				 EMAC_TX_UNDERFLOW_INT | \
+				 EMAC_TX_EARLY_INT |\
+				 EMAC_INT_MSK_COMMON)
+#define EMAC_INT_MSK_RX		(EMAC_RX_INT | \
+				 EMAC_RX_BUF_UA_INT | \
+				 EMAC_RX_DMA_STOP_INT | \
+				 EMAC_RX_TIMEOUT_INT | \
+				 EMAC_RX_OVERFLOW_INT | \
+				 EMAC_RX_EARLY_INT | \
+				 EMAC_INT_MSK_COMMON)
+
 #define MAC_ADDR_TYPE_DST BIT(31)
 
 /* H3 specific bits for EPHY */
@@ -412,13 +428,19 @@ static void sun8i_dwmac_dma_stop_rx(void __iomem *ioaddr, u32 chan)
 }
 
 static int sun8i_dwmac_dma_interrupt(void __iomem *ioaddr,
-				     struct stmmac_extra_stats *x, u32 chan)
+				     struct stmmac_extra_stats *x, u32 chan,
+				     u32 dir)
 {
 	u32 v;
 	int ret = 0;
 
 	v = readl(ioaddr + EMAC_INT_STA);
 
+	if (dir == DMA_DIR_RX)
+		v &= EMAC_INT_MSK_RX;
+	else if (dir == DMA_DIR_TX)
+		v &= EMAC_INT_MSK_TX;
+
 	if (v & EMAC_TX_INT) {
 		ret |= handle_tx;
 		x->tx_normal_irq_n++;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
index 8391ca63d943..5c0c53832adb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
@@ -149,6 +149,25 @@
 #define DMA_CHAN_STATUS_TPS		BIT(1)
 #define DMA_CHAN_STATUS_TI		BIT(0)
 
+#define DMA_CHAN_STATUS_MSK_COMMON	(DMA_CHAN_STATUS_NIS | \
+					 DMA_CHAN_STATUS_AIS | \
+					 DMA_CHAN_STATUS_CDE | \
+					 DMA_CHAN_STATUS_FBE)
+
+#define DMA_CHAN_STATUS_MSK_RX		(DMA_CHAN_STATUS_REB | \
+					 DMA_CHAN_STATUS_ERI | \
+					 DMA_CHAN_STATUS_RWT | \
+					 DMA_CHAN_STATUS_RPS | \
+					 DMA_CHAN_STATUS_RBU | \
+					 DMA_CHAN_STATUS_RI | \
+					 DMA_CHAN_STATUS_MSK_COMMON)
+
+#define DMA_CHAN_STATUS_MSK_TX		(DMA_CHAN_STATUS_ETI | \
+					 DMA_CHAN_STATUS_TBU | \
+					 DMA_CHAN_STATUS_TPS | \
+					 DMA_CHAN_STATUS_TI | \
+					 DMA_CHAN_STATUS_MSK_COMMON)
+
 /* Interrupt enable bits per channel */
 #define DMA_CHAN_INTR_ENA_NIE		BIT(16)
 #define DMA_CHAN_INTR_ENA_AIE		BIT(15)
@@ -206,7 +225,7 @@ void dwmac4_dma_stop_tx(void __iomem *ioaddr, u32 chan);
 void dwmac4_dma_start_rx(void __iomem *ioaddr, u32 chan);
 void dwmac4_dma_stop_rx(void __iomem *ioaddr, u32 chan);
 int dwmac4_dma_interrupt(void __iomem *ioaddr,
-			 struct stmmac_extra_stats *x, u32 chan);
+			 struct stmmac_extra_stats *x, u32 chan, u32 dir);
 void dwmac4_set_rx_ring_len(void __iomem *ioaddr, u32 len, u32 chan);
 void dwmac4_set_tx_ring_len(void __iomem *ioaddr, u32 len, u32 chan);
 void dwmac4_set_rx_tail_ptr(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
index 71e50751ef2d..3fa602dabf49 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
@@ -135,12 +135,17 @@ void dwmac410_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx)
 }
 
 int dwmac4_dma_interrupt(void __iomem *ioaddr,
-			 struct stmmac_extra_stats *x, u32 chan)
+			 struct stmmac_extra_stats *x, u32 chan, u32 dir)
 {
 	u32 intr_status = readl(ioaddr + DMA_CHAN_STATUS(chan));
 	u32 intr_en = readl(ioaddr + DMA_CHAN_INTR_ENA(chan));
 	int ret = 0;
 
+	if (dir == DMA_DIR_RX)
+		intr_status &= DMA_CHAN_STATUS_MSK_RX;
+	else if (dir == DMA_DIR_TX)
+		intr_status &= DMA_CHAN_STATUS_MSK_TX;
+
 	/* ABNORMAL interrupts */
 	if (unlikely(intr_status & DMA_CHAN_STATUS_AIS)) {
 		if (unlikely(intr_status & DMA_CHAN_STATUS_RBU))
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
index e5dbd0bc257e..1914ad698cab 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
@@ -128,6 +128,26 @@
 #define DMA_STATUS_TI	0x00000001	/* Transmit Interrupt */
 #define DMA_CONTROL_FTF		0x00100000	/* Flush transmit FIFO */
 
+#define DMA_STATUS_MSK_COMMON		(DMA_STATUS_NIS | \
+					 DMA_STATUS_AIS | \
+					 DMA_STATUS_FBI)
+
+#define DMA_STATUS_MSK_RX		(DMA_STATUS_ERI | \
+					 DMA_STATUS_RWT | \
+					 DMA_STATUS_RPS | \
+					 DMA_STATUS_RU | \
+					 DMA_STATUS_RI | \
+					 DMA_STATUS_OVF | \
+					 DMA_STATUS_MSK_COMMON)
+
+#define DMA_STATUS_MSK_TX		(DMA_STATUS_ETI | \
+					 DMA_STATUS_UNF | \
+					 DMA_STATUS_TJT | \
+					 DMA_STATUS_TU | \
+					 DMA_STATUS_TPS | \
+					 DMA_STATUS_TI | \
+					 DMA_STATUS_MSK_COMMON)
+
 #define NUM_DWMAC100_DMA_REGS	9
 #define NUM_DWMAC1000_DMA_REGS	23
 
@@ -139,7 +159,7 @@ void dwmac_dma_stop_tx(void __iomem *ioaddr, u32 chan);
 void dwmac_dma_start_rx(void __iomem *ioaddr, u32 chan);
 void dwmac_dma_stop_rx(void __iomem *ioaddr, u32 chan);
 int dwmac_dma_interrupt(void __iomem *ioaddr, struct stmmac_extra_stats *x,
-			u32 chan);
+			u32 chan, u32 dir);
 int dwmac_dma_reset(void __iomem *ioaddr);
 
 #endif /* __DWMAC_DMA_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
index 57a53a600aa5..d1c31200bb91 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
@@ -155,7 +155,7 @@ static void show_rx_process_state(unsigned int status)
 #endif
 
 int dwmac_dma_interrupt(void __iomem *ioaddr,
-			struct stmmac_extra_stats *x, u32 chan)
+			struct stmmac_extra_stats *x, u32 chan, u32 dir)
 {
 	int ret = 0;
 	/* read the status register (CSR5) */
@@ -167,6 +167,12 @@ int dwmac_dma_interrupt(void __iomem *ioaddr,
 	show_tx_process_state(intr_status);
 	show_rx_process_state(intr_status);
 #endif
+
+	if (dir == DMA_DIR_RX)
+		intr_status &= DMA_STATUS_MSK_RX;
+	else if (dir == DMA_DIR_TX)
+		intr_status &= DMA_STATUS_MSK_TX;
+
 	/* ABNORMAL interrupts */
 	if (unlikely(intr_status & DMA_STATUS_AIS)) {
 		if (unlikely(intr_status & DMA_STATUS_UNF)) {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index 6c3b8a950f58..1913385df685 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -412,6 +412,12 @@
 #define XGMAC_TI			BIT(0)
 #define XGMAC_REGSIZE			((0x0000317c + (0x80 * 15)) / 4)
 
+#define XGMAC_DMA_STATUS_MSK_COMMON	(XGMAC_NIS | XGMAC_AIS | XGMAC_FBE)
+#define XGMAC_DMA_STATUS_MSK_RX		(XGMAC_RBU | XGMAC_RI | \
+					 XGMAC_DMA_STATUS_MSK_COMMON)
+#define XGMAC_DMA_STATUS_MSK_TX		(XGMAC_TBU | XGMAC_TPS | XGMAC_TI | \
+					 XGMAC_DMA_STATUS_MSK_COMMON)
+
 /* Descriptors */
 #define XGMAC_TDES0_LTV			BIT(31)
 #define XGMAC_TDES0_LT			GENMASK(7, 0)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index f2cab5b76732..906e985441a9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -323,12 +323,18 @@ static void dwxgmac2_dma_stop_rx(void __iomem *ioaddr, u32 chan)
 }
 
 static int dwxgmac2_dma_interrupt(void __iomem *ioaddr,
-				  struct stmmac_extra_stats *x, u32 chan)
+				  struct stmmac_extra_stats *x, u32 chan,
+				  u32 dir)
 {
 	u32 intr_status = readl(ioaddr + XGMAC_DMA_CH_STATUS(chan));
 	u32 intr_en = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan));
 	int ret = 0;
 
+	if (dir == DMA_DIR_RX)
+		intr_status &= XGMAC_DMA_STATUS_MSK_RX;
+	else if (dir == DMA_DIR_TX)
+		intr_status &= XGMAC_DMA_STATUS_MSK_TX;
+
 	/* ABNORMAL interrupts */
 	if (unlikely(intr_status & XGMAC_AIS)) {
 		if (unlikely(intr_status & XGMAC_RBU)) {
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 45edac5f60db..c9cf89e21a41 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -201,7 +201,7 @@ struct stmmac_dma_ops {
 	void (*start_rx)(void __iomem *ioaddr, u32 chan);
 	void (*stop_rx)(void __iomem *ioaddr, u32 chan);
 	int (*dma_interrupt) (void __iomem *ioaddr,
-			      struct stmmac_extra_stats *x, u32 chan);
+			      struct stmmac_extra_stats *x, u32 chan, u32 dir);
 	/* If supported then get the optional core features */
 	void (*get_hw_feature)(void __iomem *ioaddr,
 			       struct dma_features *dma_cap);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 170296820af0..7c352d017eb2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2337,10 +2337,10 @@ static bool stmmac_safety_feat_interrupt(struct stmmac_priv *priv)
 	return false;
 }
 
-static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan)
+static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan, u32 dir)
 {
 	int status = stmmac_dma_interrupt_status(priv, priv->ioaddr,
-						 &priv->xstats, chan);
+						 &priv->xstats, chan, dir);
 	struct stmmac_channel *ch = &priv->channel[chan];
 	unsigned long flags;
 
@@ -2386,7 +2386,8 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
 		channels_to_check = ARRAY_SIZE(status);
 
 	for (chan = 0; chan < channels_to_check; chan++)
-		status[chan] = stmmac_napi_check(priv, chan);
+		status[chan] = stmmac_napi_check(priv, chan,
+						 DMA_DIR_RXTX);
 
 	for (chan = 0; chan < tx_channel_count; chan++) {
 		if (unlikely(status[chan] & tx_hard_error_bump_tc)) {
-- 
cgit v1.2.3


From 29e6573c61aaa71010e711e6c3249c56a2e61b46 Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Fri, 26 Mar 2021 01:39:13 +0800
Subject: net: stmmac: make stmmac_interrupt() function more friendly to MSI

Refactor stmmac_interrupt() by introducing stmmac_common_interrupt()
so that we prepare the ISR operation to be friendly to MSI later.

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: Voon Weifeng <weifeng.voon@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 54 +++++++++++++----------
 1 file changed, 31 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 7c352d017eb2..abe990b9b07b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -4382,21 +4382,8 @@ static void stmmac_fpe_event_status(struct stmmac_priv *priv, int status)
 	}
 }
 
-/**
- *  stmmac_interrupt - main ISR
- *  @irq: interrupt number.
- *  @dev_id: to pass the net device pointer (must be valid).
- *  Description: this is the main driver interrupt service routine.
- *  It can call:
- *  o DMA service routine (to manage incoming frame reception and transmission
- *    status)
- *  o Core interrupts to manage: remote wake-up, management counter, LPI
- *    interrupts.
- */
-static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
+static void stmmac_common_interrupt(struct stmmac_priv *priv)
 {
-	struct net_device *dev = (struct net_device *)dev_id;
-	struct stmmac_priv *priv = netdev_priv(dev);
 	u32 rx_cnt = priv->plat->rx_queues_to_use;
 	u32 tx_cnt = priv->plat->tx_queues_to_use;
 	u32 queues_count;
@@ -4409,13 +4396,6 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
 	if (priv->irq_wake)
 		pm_wakeup_event(priv->device, 0);
 
-	/* Check if adapter is up */
-	if (test_bit(STMMAC_DOWN, &priv->state))
-		return IRQ_HANDLED;
-	/* Check if a fatal error happened */
-	if (stmmac_safety_feat_interrupt(priv))
-		return IRQ_HANDLED;
-
 	if (priv->dma_cap.estsel)
 		stmmac_est_irq_status(priv, priv->ioaddr, priv->dev,
 				      &priv->xstats, tx_cnt);
@@ -4457,11 +4437,39 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
 		/* PCS link status */
 		if (priv->hw->pcs) {
 			if (priv->xstats.pcs_link)
-				netif_carrier_on(dev);
+				netif_carrier_on(priv->dev);
 			else
-				netif_carrier_off(dev);
+				netif_carrier_off(priv->dev);
 		}
 	}
+}
+
+/**
+ *  stmmac_interrupt - main ISR
+ *  @irq: interrupt number.
+ *  @dev_id: to pass the net device pointer.
+ *  Description: this is the main driver interrupt service routine.
+ *  It can call:
+ *  o DMA service routine (to manage incoming frame reception and transmission
+ *    status)
+ *  o Core interrupts to manage: remote wake-up, management counter, LPI
+ *    interrupts.
+ */
+static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
+{
+	struct net_device *dev = (struct net_device *)dev_id;
+	struct stmmac_priv *priv = netdev_priv(dev);
+
+	/* Check if adapter is up */
+	if (test_bit(STMMAC_DOWN, &priv->state))
+		return IRQ_HANDLED;
+
+	/* Check if a fatal error happened */
+	if (stmmac_safety_feat_interrupt(priv))
+		return IRQ_HANDLED;
+
+	/* To handle Common interrupts */
+	stmmac_common_interrupt(priv);
 
 	/* To handle DMA interrupts */
 	stmmac_dma_interrupt(priv);
-- 
cgit v1.2.3


From 8532f613bc78b6e0e32b486e720848d3f5569287 Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Fri, 26 Mar 2021 01:39:14 +0800
Subject: net: stmmac: introduce MSI Interrupt routines for mac, safety, RX &
 TX

Now we introduce MSI interrupt service routines and hook these routines
up if stmmac_open() sees valid irq line being requested:-

stmmac_mac_interrupt()    :- MAC (dev->irq), WOL (wol_irq), LPI (lpi_irq)
stmmac_safety_interrupt() :- Safety Feat Correctible Error (sfty_ce_irq)
                             & Uncorrectible Error (sfty_ue_irq)
stmmac_msi_intr_rx()      :- For all RX MSI irq (rx_irq)
stmmac_msi_intr_tx()      :- For all TX MSI irq (tx_irq)

Each of IRQs will have its unique name so that we can differentiate
them easily under /proc/interrupts.

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: Voon Weifeng <weifeng.voon@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/common.h      |  15 +
 drivers/net/ethernet/stmicro/stmmac/stmmac.h      |  16 +
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 437 +++++++++++++++++++---
 include/linux/stmmac.h                            |   8 +
 4 files changed, 431 insertions(+), 45 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 5afb36a5c94c..c54a56b732b3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -259,6 +259,9 @@ struct stmmac_safety_stats {
 #define DMA_HW_FEAT_ACTPHYIF	0x70000000	/* Active/selected PHY iface */
 #define DEFAULT_DMA_PBL		8
 
+/* MSI defines */
+#define STMMAC_MSI_VEC_MAX	32
+
 /* PCS status and mask defines */
 #define	PCS_ANE_IRQ		BIT(2)	/* PCS Auto-Negotiation */
 #define	PCS_LINK_IRQ		BIT(1)	/* PCS Link */
@@ -315,6 +318,18 @@ enum dma_irq_dir {
 	DMA_DIR_RXTX = 0x3,
 };
 
+enum request_irq_err {
+	REQ_IRQ_ERR_ALL,
+	REQ_IRQ_ERR_TX,
+	REQ_IRQ_ERR_RX,
+	REQ_IRQ_ERR_SFTY_UE,
+	REQ_IRQ_ERR_SFTY_CE,
+	REQ_IRQ_ERR_LPI,
+	REQ_IRQ_ERR_WOL,
+	REQ_IRQ_ERR_MAC,
+	REQ_IRQ_ERR_NO,
+};
+
 /* EEE and LPI defines */
 #define	CORE_IRQ_TX_PATH_IN_LPI_MODE	(1 << 0)
 #define	CORE_IRQ_TX_PATH_EXIT_LPI_MODE	(1 << 1)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 4faad331a4ca..9966f6f10905 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -30,6 +30,10 @@ struct stmmac_resources {
 	int wol_irq;
 	int lpi_irq;
 	int irq;
+	int sfty_ce_irq;
+	int sfty_ue_irq;
+	int rx_irq[MTL_MAX_RX_QUEUES];
+	int tx_irq[MTL_MAX_TX_QUEUES];
 };
 
 struct stmmac_tx_info {
@@ -225,6 +229,18 @@ struct stmmac_priv {
 	void __iomem *mmcaddr;
 	void __iomem *ptpaddr;
 	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+	int sfty_ce_irq;
+	int sfty_ue_irq;
+	int rx_irq[MTL_MAX_RX_QUEUES];
+	int tx_irq[MTL_MAX_TX_QUEUES];
+	/*irq name */
+	char int_name_mac[IFNAMSIZ + 9];
+	char int_name_wol[IFNAMSIZ + 9];
+	char int_name_lpi[IFNAMSIZ + 9];
+	char int_name_sfty_ce[IFNAMSIZ + 10];
+	char int_name_sfty_ue[IFNAMSIZ + 10];
+	char int_name_rx_irq[MTL_MAX_TX_QUEUES][IFNAMSIZ + 14];
+	char int_name_tx_irq[MTL_MAX_TX_QUEUES][IFNAMSIZ + 18];
 
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *dbgfs_dir;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index abe990b9b07b..459477db455c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -105,6 +105,11 @@ module_param(chain_mode, int, 0444);
 MODULE_PARM_DESC(chain_mode, "To use chain instead of ring mode");
 
 static irqreturn_t stmmac_interrupt(int irq, void *dev_id);
+/* For MSI interrupts handling */
+static irqreturn_t stmmac_mac_interrupt(int irq, void *dev_id);
+static irqreturn_t stmmac_safety_interrupt(int irq, void *dev_id);
+static irqreturn_t stmmac_msi_intr_tx(int irq, void *data);
+static irqreturn_t stmmac_msi_intr_rx(int irq, void *data);
 
 #ifdef CONFIG_DEBUG_FS
 static const struct net_device_ops stmmac_netdev_ops;
@@ -2986,6 +2991,260 @@ static void stmmac_hw_teardown(struct net_device *dev)
 	clk_disable_unprepare(priv->plat->clk_ptp_ref);
 }
 
+static void stmmac_free_irq(struct net_device *dev,
+			    enum request_irq_err irq_err, int irq_idx)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+	int j;
+
+	switch (irq_err) {
+	case REQ_IRQ_ERR_ALL:
+		irq_idx = priv->plat->tx_queues_to_use;
+		fallthrough;
+	case REQ_IRQ_ERR_TX:
+		for (j = irq_idx - 1; j >= 0; j--) {
+			if (priv->tx_irq[j] > 0)
+				free_irq(priv->tx_irq[j], &priv->tx_queue[j]);
+		}
+		irq_idx = priv->plat->rx_queues_to_use;
+		fallthrough;
+	case REQ_IRQ_ERR_RX:
+		for (j = irq_idx - 1; j >= 0; j--) {
+			if (priv->rx_irq[j] > 0)
+				free_irq(priv->rx_irq[j], &priv->rx_queue[j]);
+		}
+
+		if (priv->sfty_ue_irq > 0 && priv->sfty_ue_irq != dev->irq)
+			free_irq(priv->sfty_ue_irq, dev);
+		fallthrough;
+	case REQ_IRQ_ERR_SFTY_UE:
+		if (priv->sfty_ce_irq > 0 && priv->sfty_ce_irq != dev->irq)
+			free_irq(priv->sfty_ce_irq, dev);
+		fallthrough;
+	case REQ_IRQ_ERR_SFTY_CE:
+		if (priv->lpi_irq > 0 && priv->lpi_irq != dev->irq)
+			free_irq(priv->lpi_irq, dev);
+		fallthrough;
+	case REQ_IRQ_ERR_LPI:
+		if (priv->wol_irq > 0 && priv->wol_irq != dev->irq)
+			free_irq(priv->wol_irq, dev);
+		fallthrough;
+	case REQ_IRQ_ERR_WOL:
+		free_irq(dev->irq, dev);
+		fallthrough;
+	case REQ_IRQ_ERR_MAC:
+	case REQ_IRQ_ERR_NO:
+		/* If MAC IRQ request error, no more IRQ to free */
+		break;
+	}
+}
+
+static int stmmac_request_irq_multi_msi(struct net_device *dev)
+{
+	enum request_irq_err irq_err = REQ_IRQ_ERR_NO;
+	struct stmmac_priv *priv = netdev_priv(dev);
+	int irq_idx = 0;
+	char *int_name;
+	int ret;
+	int i;
+
+	/* For common interrupt */
+	int_name = priv->int_name_mac;
+	sprintf(int_name, "%s:%s", dev->name, "mac");
+	ret = request_irq(dev->irq, stmmac_mac_interrupt,
+			  0, int_name, dev);
+	if (unlikely(ret < 0)) {
+		netdev_err(priv->dev,
+			   "%s: alloc mac MSI %d (error: %d)\n",
+			   __func__, dev->irq, ret);
+		irq_err = REQ_IRQ_ERR_MAC;
+		goto irq_error;
+	}
+
+	/* Request the Wake IRQ in case of another line
+	 * is used for WoL
+	 */
+	if (priv->wol_irq > 0 && priv->wol_irq != dev->irq) {
+		int_name = priv->int_name_wol;
+		sprintf(int_name, "%s:%s", dev->name, "wol");
+		ret = request_irq(priv->wol_irq,
+				  stmmac_mac_interrupt,
+				  0, int_name, dev);
+		if (unlikely(ret < 0)) {
+			netdev_err(priv->dev,
+				   "%s: alloc wol MSI %d (error: %d)\n",
+				   __func__, priv->wol_irq, ret);
+			irq_err = REQ_IRQ_ERR_WOL;
+			goto irq_error;
+		}
+	}
+
+	/* Request the LPI IRQ in case of another line
+	 * is used for LPI
+	 */
+	if (priv->lpi_irq > 0 && priv->lpi_irq != dev->irq) {
+		int_name = priv->int_name_lpi;
+		sprintf(int_name, "%s:%s", dev->name, "lpi");
+		ret = request_irq(priv->lpi_irq,
+				  stmmac_mac_interrupt,
+				  0, int_name, dev);
+		if (unlikely(ret < 0)) {
+			netdev_err(priv->dev,
+				   "%s: alloc lpi MSI %d (error: %d)\n",
+				   __func__, priv->lpi_irq, ret);
+			irq_err = REQ_IRQ_ERR_LPI;
+			goto irq_error;
+		}
+	}
+
+	/* Request the Safety Feature Correctible Error line in
+	 * case of another line is used
+	 */
+	if (priv->sfty_ce_irq > 0 && priv->sfty_ce_irq != dev->irq) {
+		int_name = priv->int_name_sfty_ce;
+		sprintf(int_name, "%s:%s", dev->name, "safety-ce");
+		ret = request_irq(priv->sfty_ce_irq,
+				  stmmac_safety_interrupt,
+				  0, int_name, dev);
+		if (unlikely(ret < 0)) {
+			netdev_err(priv->dev,
+				   "%s: alloc sfty ce MSI %d (error: %d)\n",
+				   __func__, priv->sfty_ce_irq, ret);
+			irq_err = REQ_IRQ_ERR_SFTY_CE;
+			goto irq_error;
+		}
+	}
+
+	/* Request the Safety Feature Uncorrectible Error line in
+	 * case of another line is used
+	 */
+	if (priv->sfty_ue_irq > 0 && priv->sfty_ue_irq != dev->irq) {
+		int_name = priv->int_name_sfty_ue;
+		sprintf(int_name, "%s:%s", dev->name, "safety-ue");
+		ret = request_irq(priv->sfty_ue_irq,
+				  stmmac_safety_interrupt,
+				  0, int_name, dev);
+		if (unlikely(ret < 0)) {
+			netdev_err(priv->dev,
+				   "%s: alloc sfty ue MSI %d (error: %d)\n",
+				   __func__, priv->sfty_ue_irq, ret);
+			irq_err = REQ_IRQ_ERR_SFTY_UE;
+			goto irq_error;
+		}
+	}
+
+	/* Request Rx MSI irq */
+	for (i = 0; i < priv->plat->rx_queues_to_use; i++) {
+		if (priv->rx_irq[i] == 0)
+			continue;
+
+		int_name = priv->int_name_rx_irq[i];
+		sprintf(int_name, "%s:%s-%d", dev->name, "rx", i);
+		ret = request_irq(priv->rx_irq[i],
+				  stmmac_msi_intr_rx,
+				  0, int_name, &priv->rx_queue[i]);
+		if (unlikely(ret < 0)) {
+			netdev_err(priv->dev,
+				   "%s: alloc rx-%d  MSI %d (error: %d)\n",
+				   __func__, i, priv->rx_irq[i], ret);
+			irq_err = REQ_IRQ_ERR_RX;
+			irq_idx = i;
+			goto irq_error;
+		}
+	}
+
+	/* Request Tx MSI irq */
+	for (i = 0; i < priv->plat->tx_queues_to_use; i++) {
+		if (priv->tx_irq[i] == 0)
+			continue;
+
+		int_name = priv->int_name_tx_irq[i];
+		sprintf(int_name, "%s:%s-%d", dev->name, "tx", i);
+		ret = request_irq(priv->tx_irq[i],
+				  stmmac_msi_intr_tx,
+				  0, int_name, &priv->tx_queue[i]);
+		if (unlikely(ret < 0)) {
+			netdev_err(priv->dev,
+				   "%s: alloc tx-%d  MSI %d (error: %d)\n",
+				   __func__, i, priv->tx_irq[i], ret);
+			irq_err = REQ_IRQ_ERR_TX;
+			irq_idx = i;
+			goto irq_error;
+		}
+	}
+
+	return 0;
+
+irq_error:
+	stmmac_free_irq(dev, irq_err, irq_idx);
+	return ret;
+}
+
+static int stmmac_request_irq_single(struct net_device *dev)
+{
+	enum request_irq_err irq_err = REQ_IRQ_ERR_NO;
+	struct stmmac_priv *priv = netdev_priv(dev);
+	int ret;
+
+	ret = request_irq(dev->irq, stmmac_interrupt,
+			  IRQF_SHARED, dev->name, dev);
+	if (unlikely(ret < 0)) {
+		netdev_err(priv->dev,
+			   "%s: ERROR: allocating the IRQ %d (error: %d)\n",
+			   __func__, dev->irq, ret);
+		irq_err = REQ_IRQ_ERR_MAC;
+		return ret;
+	}
+
+	/* Request the Wake IRQ in case of another line
+	 * is used for WoL
+	 */
+	if (priv->wol_irq > 0 && priv->wol_irq != dev->irq) {
+		ret = request_irq(priv->wol_irq, stmmac_interrupt,
+				  IRQF_SHARED, dev->name, dev);
+		if (unlikely(ret < 0)) {
+			netdev_err(priv->dev,
+				   "%s: ERROR: allocating the WoL IRQ %d (%d)\n",
+				   __func__, priv->wol_irq, ret);
+			irq_err = REQ_IRQ_ERR_WOL;
+			return ret;
+		}
+	}
+
+	/* Request the IRQ lines */
+	if (priv->lpi_irq > 0 && priv->lpi_irq != dev->irq) {
+		ret = request_irq(priv->lpi_irq, stmmac_interrupt,
+				  IRQF_SHARED, dev->name, dev);
+		if (unlikely(ret < 0)) {
+			netdev_err(priv->dev,
+				   "%s: ERROR: allocating the LPI IRQ %d (%d)\n",
+				   __func__, priv->lpi_irq, ret);
+			irq_err = REQ_IRQ_ERR_LPI;
+			goto irq_error;
+		}
+	}
+
+	return 0;
+
+irq_error:
+	stmmac_free_irq(dev, irq_err, 0);
+	return ret;
+}
+
+static int stmmac_request_irq(struct net_device *dev)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+	int ret;
+
+	/* Request the IRQ lines */
+	if (priv->plat->multi_msi_en)
+		ret = stmmac_request_irq_multi_msi(dev);
+	else
+		ret = stmmac_request_irq_single(dev);
+
+	return ret;
+}
+
 /**
  *  stmmac_open - open entry point of the driver
  *  @dev : pointer to the device structure.
@@ -3077,50 +3336,15 @@ static int stmmac_open(struct net_device *dev)
 	/* We may have called phylink_speed_down before */
 	phylink_speed_up(priv->phylink);
 
-	/* Request the IRQ lines */
-	ret = request_irq(dev->irq, stmmac_interrupt,
-			  IRQF_SHARED, dev->name, dev);
-	if (unlikely(ret < 0)) {
-		netdev_err(priv->dev,
-			   "%s: ERROR: allocating the IRQ %d (error: %d)\n",
-			   __func__, dev->irq, ret);
+	ret = stmmac_request_irq(dev);
+	if (ret)
 		goto irq_error;
-	}
-
-	/* Request the Wake IRQ in case of another line is used for WoL */
-	if (priv->wol_irq != dev->irq) {
-		ret = request_irq(priv->wol_irq, stmmac_interrupt,
-				  IRQF_SHARED, dev->name, dev);
-		if (unlikely(ret < 0)) {
-			netdev_err(priv->dev,
-				   "%s: ERROR: allocating the WoL IRQ %d (%d)\n",
-				   __func__, priv->wol_irq, ret);
-			goto wolirq_error;
-		}
-	}
-
-	/* Request the IRQ lines */
-	if (priv->lpi_irq > 0) {
-		ret = request_irq(priv->lpi_irq, stmmac_interrupt, IRQF_SHARED,
-				  dev->name, dev);
-		if (unlikely(ret < 0)) {
-			netdev_err(priv->dev,
-				   "%s: ERROR: allocating the LPI IRQ %d (%d)\n",
-				   __func__, priv->lpi_irq, ret);
-			goto lpiirq_error;
-		}
-	}
 
 	stmmac_enable_all_queues(priv);
 	netif_tx_start_all_queues(priv->dev);
 
 	return 0;
 
-lpiirq_error:
-	if (priv->wol_irq != dev->irq)
-		free_irq(priv->wol_irq, dev);
-wolirq_error:
-	free_irq(dev->irq, dev);
 irq_error:
 	phylink_stop(priv->phylink);
 
@@ -3170,11 +3394,7 @@ static int stmmac_release(struct net_device *dev)
 		hrtimer_cancel(&priv->tx_queue[chan].txtimer);
 
 	/* Free the IRQ lines */
-	free_irq(dev->irq, dev);
-	if (priv->wol_irq != dev->irq)
-		free_irq(priv->wol_irq, dev);
-	if (priv->lpi_irq > 0)
-		free_irq(priv->lpi_irq, dev);
+	stmmac_free_irq(dev, REQ_IRQ_ERR_ALL, 0);
 
 	if (priv->eee_enabled) {
 		priv->tx_path_in_lpi_mode = false;
@@ -4477,15 +4697,136 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t stmmac_mac_interrupt(int irq, void *dev_id)
+{
+	struct net_device *dev = (struct net_device *)dev_id;
+	struct stmmac_priv *priv = netdev_priv(dev);
+
+	if (unlikely(!dev)) {
+		netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__);
+		return IRQ_NONE;
+	}
+
+	/* Check if adapter is up */
+	if (test_bit(STMMAC_DOWN, &priv->state))
+		return IRQ_HANDLED;
+
+	/* To handle Common interrupts */
+	stmmac_common_interrupt(priv);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t stmmac_safety_interrupt(int irq, void *dev_id)
+{
+	struct net_device *dev = (struct net_device *)dev_id;
+	struct stmmac_priv *priv = netdev_priv(dev);
+
+	if (unlikely(!dev)) {
+		netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__);
+		return IRQ_NONE;
+	}
+
+	/* Check if adapter is up */
+	if (test_bit(STMMAC_DOWN, &priv->state))
+		return IRQ_HANDLED;
+
+	/* Check if a fatal error happened */
+	stmmac_safety_feat_interrupt(priv);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t stmmac_msi_intr_tx(int irq, void *data)
+{
+	struct stmmac_tx_queue *tx_q = (struct stmmac_tx_queue *)data;
+	int chan = tx_q->queue_index;
+	struct stmmac_priv *priv;
+	int status;
+
+	priv = container_of(tx_q, struct stmmac_priv, tx_queue[chan]);
+
+	if (unlikely(!data)) {
+		netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__);
+		return IRQ_NONE;
+	}
+
+	/* Check if adapter is up */
+	if (test_bit(STMMAC_DOWN, &priv->state))
+		return IRQ_HANDLED;
+
+	status = stmmac_napi_check(priv, chan, DMA_DIR_TX);
+
+	if (unlikely(status & tx_hard_error_bump_tc)) {
+		/* Try to bump up the dma threshold on this failure */
+		if (unlikely(priv->xstats.threshold != SF_DMA_MODE) &&
+		    tc <= 256) {
+			tc += 64;
+			if (priv->plat->force_thresh_dma_mode)
+				stmmac_set_dma_operation_mode(priv,
+							      tc,
+							      tc,
+							      chan);
+			else
+				stmmac_set_dma_operation_mode(priv,
+							      tc,
+							      SF_DMA_MODE,
+							      chan);
+			priv->xstats.threshold = tc;
+		}
+	} else if (unlikely(status == tx_hard_error)) {
+		stmmac_tx_err(priv, chan);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t stmmac_msi_intr_rx(int irq, void *data)
+{
+	struct stmmac_rx_queue *rx_q = (struct stmmac_rx_queue *)data;
+	int chan = rx_q->queue_index;
+	struct stmmac_priv *priv;
+
+	priv = container_of(rx_q, struct stmmac_priv, rx_queue[chan]);
+
+	if (unlikely(!data)) {
+		netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__);
+		return IRQ_NONE;
+	}
+
+	/* Check if adapter is up */
+	if (test_bit(STMMAC_DOWN, &priv->state))
+		return IRQ_HANDLED;
+
+	stmmac_napi_check(priv, chan, DMA_DIR_RX);
+
+	return IRQ_HANDLED;
+}
+
 #ifdef CONFIG_NET_POLL_CONTROLLER
 /* Polling receive - used by NETCONSOLE and other diagnostic tools
  * to allow network I/O with interrupts disabled.
  */
 static void stmmac_poll_controller(struct net_device *dev)
 {
-	disable_irq(dev->irq);
-	stmmac_interrupt(dev->irq, dev);
-	enable_irq(dev->irq);
+	struct stmmac_priv *priv = netdev_priv(dev);
+	int i;
+
+	/* If adapter is down, do nothing */
+	if (test_bit(STMMAC_DOWN, &priv->state))
+		return;
+
+	if (priv->plat->multi_msi_en) {
+		for (i = 0; i < priv->plat->rx_queues_to_use; i++)
+			stmmac_msi_intr_rx(0, &priv->rx_queue[i]);
+
+		for (i = 0; i < priv->plat->tx_queues_to_use; i++)
+			stmmac_msi_intr_tx(0, &priv->tx_queue[i]);
+	} else {
+		disable_irq(dev->irq);
+		stmmac_interrupt(dev->irq, dev);
+		enable_irq(dev->irq);
+	}
 }
 #endif
 
@@ -5283,6 +5624,12 @@ int stmmac_dvr_probe(struct device *device,
 	priv->dev->irq = res->irq;
 	priv->wol_irq = res->wol_irq;
 	priv->lpi_irq = res->lpi_irq;
+	priv->sfty_ce_irq = res->sfty_ce_irq;
+	priv->sfty_ue_irq = res->sfty_ue_irq;
+	for (i = 0; i < MTL_MAX_RX_QUEUES; i++)
+		priv->rx_irq[i] = res->rx_irq[i];
+	for (i = 0; i < MTL_MAX_TX_QUEUES; i++)
+		priv->tx_irq[i] = res->tx_irq[i];
 
 	if (!IS_ERR_OR_NULL(res->mac))
 		memcpy(priv->dev->dev_addr, res->mac, ETH_ALEN);
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index febdb43d27e5..afc12b9385db 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -237,5 +237,13 @@ struct plat_stmmacenet_data {
 	struct pci_dev *pdev;
 	bool has_crossts;
 	int int_snapshot_num;
+	bool multi_msi_en;
+	int msi_mac_vec;
+	int msi_wol_vec;
+	int msi_lpi_vec;
+	int msi_sfty_ce_vec;
+	int msi_sfty_ue_vec;
+	int msi_rx_base_vec;
+	int msi_tx_base_vec;
 };
 #endif
-- 
cgit v1.2.3


From b42446b9b37ba444ba12d89dd9d45c2b68768f24 Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Fri, 26 Mar 2021 01:39:15 +0800
Subject: stmmac: intel: add support for multi-vector msi and msi-x

Intel mgbe controller supports multi-vector interrupts:
msi_rx_vec	0,2,4,6,8,10,12,14
msi_tx_vec	1,3,5,7,9,11,13,15
msi_sfty_ue_vec	26
msi_sfty_ce_vec	27
msi_lpi_vec	28
msi_mac_vec	29

During probe(), the driver will starts with request allocation for
multi-vector interrupts. If it fails, then it will automatically fallback
to request allocation for single interrupts.

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Co-developed-by: Voon Weifeng <weifeng.voon@intel.com>
Signed-off-by: Voon Weifeng <weifeng.voon@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 120 ++++++++++++++++++++--
 1 file changed, 111 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 992294d25706..08b4852eed4c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -492,6 +492,14 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
 	plat->has_crossts = true;
 	plat->crosststamp = intel_crosststamp;
 
+	/* Setup MSI vector offset specific to Intel mGbE controller */
+	plat->msi_mac_vec = 29;
+	plat->msi_lpi_vec = 28;
+	plat->msi_sfty_ce_vec = 27;
+	plat->msi_sfty_ue_vec = 26;
+	plat->msi_rx_base_vec = 0;
+	plat->msi_tx_base_vec = 1;
+
 	return 0;
 }
 
@@ -776,6 +784,79 @@ static const struct stmmac_pci_info quark_info = {
 	.setup = quark_default_data,
 };
 
+static int stmmac_config_single_msi(struct pci_dev *pdev,
+				    struct plat_stmmacenet_data *plat,
+				    struct stmmac_resources *res)
+{
+	int ret;
+
+	ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
+	if (ret < 0) {
+		dev_info(&pdev->dev, "%s: Single IRQ enablement failed\n",
+			 __func__);
+		return ret;
+	}
+
+	res->irq = pci_irq_vector(pdev, 0);
+	res->wol_irq = res->irq;
+	plat->multi_msi_en = 0;
+	dev_info(&pdev->dev, "%s: Single IRQ enablement successful\n",
+		 __func__);
+
+	return 0;
+}
+
+static int stmmac_config_multi_msi(struct pci_dev *pdev,
+				   struct plat_stmmacenet_data *plat,
+				   struct stmmac_resources *res)
+{
+	int ret;
+	int i;
+
+	if (plat->msi_rx_base_vec >= STMMAC_MSI_VEC_MAX ||
+	    plat->msi_tx_base_vec >= STMMAC_MSI_VEC_MAX) {
+		dev_info(&pdev->dev, "%s: Invalid RX & TX vector defined\n",
+			 __func__);
+		return -1;
+	}
+
+	ret = pci_alloc_irq_vectors(pdev, 2, STMMAC_MSI_VEC_MAX,
+				    PCI_IRQ_MSI | PCI_IRQ_MSIX);
+	if (ret < 0) {
+		dev_info(&pdev->dev, "%s: multi MSI enablement failed\n",
+			 __func__);
+		return ret;
+	}
+
+	/* For RX MSI */
+	for (i = 0; i < plat->rx_queues_to_use; i++) {
+		res->rx_irq[i] = pci_irq_vector(pdev,
+						plat->msi_rx_base_vec + i * 2);
+	}
+
+	/* For TX MSI */
+	for (i = 0; i < plat->tx_queues_to_use; i++) {
+		res->tx_irq[i] = pci_irq_vector(pdev,
+						plat->msi_tx_base_vec + i * 2);
+	}
+
+	if (plat->msi_mac_vec < STMMAC_MSI_VEC_MAX)
+		res->irq = pci_irq_vector(pdev, plat->msi_mac_vec);
+	if (plat->msi_wol_vec < STMMAC_MSI_VEC_MAX)
+		res->wol_irq = pci_irq_vector(pdev, plat->msi_wol_vec);
+	if (plat->msi_lpi_vec < STMMAC_MSI_VEC_MAX)
+		res->lpi_irq = pci_irq_vector(pdev, plat->msi_lpi_vec);
+	if (plat->msi_sfty_ce_vec < STMMAC_MSI_VEC_MAX)
+		res->sfty_ce_irq = pci_irq_vector(pdev, plat->msi_sfty_ce_vec);
+	if (plat->msi_sfty_ue_vec < STMMAC_MSI_VEC_MAX)
+		res->sfty_ue_irq = pci_irq_vector(pdev, plat->msi_sfty_ue_vec);
+
+	plat->multi_msi_en = 1;
+	dev_info(&pdev->dev, "%s: multi MSI enablement successful\n", __func__);
+
+	return 0;
+}
+
 /**
  * intel_eth_pci_probe
  *
@@ -833,18 +914,24 @@ static int intel_eth_pci_probe(struct pci_dev *pdev,
 	plat->bsp_priv = intel_priv;
 	intel_priv->mdio_adhoc_addr = INTEL_MGBE_ADHOC_ADDR;
 
+	/* Initialize all MSI vectors to invalid so that it can be set
+	 * according to platform data settings below.
+	 * Note: MSI vector takes value from 0 upto 31 (STMMAC_MSI_VEC_MAX)
+	 */
+	plat->msi_mac_vec = STMMAC_MSI_VEC_MAX;
+	plat->msi_wol_vec = STMMAC_MSI_VEC_MAX;
+	plat->msi_lpi_vec = STMMAC_MSI_VEC_MAX;
+	plat->msi_sfty_ce_vec = STMMAC_MSI_VEC_MAX;
+	plat->msi_sfty_ue_vec = STMMAC_MSI_VEC_MAX;
+	plat->msi_rx_base_vec = STMMAC_MSI_VEC_MAX;
+	plat->msi_tx_base_vec = STMMAC_MSI_VEC_MAX;
+
 	ret = info->setup(pdev, plat);
 	if (ret)
 		return ret;
 
-	ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
-	if (ret < 0)
-		return ret;
-
 	memset(&res, 0, sizeof(res));
 	res.addr = pcim_iomap_table(pdev)[0];
-	res.wol_irq = pci_irq_vector(pdev, 0);
-	res.irq = pci_irq_vector(pdev, 0);
 
 	if (plat->eee_usecs_rate > 0) {
 		u32 tx_lpi_usec;
@@ -853,13 +940,28 @@ static int intel_eth_pci_probe(struct pci_dev *pdev,
 		writel(tx_lpi_usec, res.addr + GMAC_1US_TIC_COUNTER);
 	}
 
+	ret = stmmac_config_multi_msi(pdev, plat, &res);
+	if (ret) {
+		ret = stmmac_config_single_msi(pdev, plat, &res);
+		if (ret) {
+			dev_err(&pdev->dev, "%s: ERROR: failed to enable IRQ\n",
+				__func__);
+			goto err_alloc_irq;
+		}
+	}
+
 	ret = stmmac_dvr_probe(&pdev->dev, plat, &res);
 	if (ret) {
-		pci_free_irq_vectors(pdev);
-		clk_disable_unprepare(plat->stmmac_clk);
-		clk_unregister_fixed_rate(plat->stmmac_clk);
+		goto err_dvr_probe;
 	}
 
+	return 0;
+
+err_dvr_probe:
+	pci_free_irq_vectors(pdev);
+err_alloc_irq:
+	clk_disable_unprepare(plat->stmmac_clk);
+	clk_unregister_fixed_rate(plat->stmmac_clk);
 	return ret;
 }
 
-- 
cgit v1.2.3


From 6ccf12ae111e49324b439410066e8cc359aeee6d Mon Sep 17 00:00:00 2001
From: "Wong, Vee Khee" <vee.khee.wong@intel.com>
Date: Fri, 26 Mar 2021 01:39:16 +0800
Subject: net: stmmac: use interrupt mode INTM=1 for multi-MSI

For interrupt mode INTM=0, TX/RX transfer complete will trigger signal
not only on sbd_perch_[tx|rx]_intr_o (Transmit/Receive Per Channel) but
also on the sbd_intr_o (Common).

As for multi-MSI implementation, setting interrupt mode INTM=1 is more
efficient as each TX intr and RX intr (TI/RI) will be handled by TX/RX ISR
without the need of calling the common MAC ISR.

Updated the TX/RX NORMAL interrupts status checking process as the
NIS status bit is not asserted for any RI/TI events for INTM=1.

Signed-off-by: Wong, Vee Khee <vee.khee.wong@intel.com>
Co-developed-by: Voon Weifeng <weifeng.voon@intel.com>
Signed-off-by: Voon Weifeng <weifeng.voon@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c  |  7 +++++++
 drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h  |  3 +++
 drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c  | 23 +++++++++++------------
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |  1 +
 include/linux/stmmac.h                            |  1 +
 5 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
index 8954b85eb850..cb17f6c35e54 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
@@ -161,6 +161,13 @@ static void dwmac4_dma_init(void __iomem *ioaddr,
 		value |= DMA_SYS_BUS_EAME;
 
 	writel(value, ioaddr + DMA_SYS_BUS_MODE);
+
+	if (dma_cfg->multi_msi_en) {
+		value = readl(ioaddr + DMA_BUS_MODE);
+		value &= ~DMA_BUS_MODE_INTM_MASK;
+		value |= (DMA_BUS_MODE_INTM_MODE1 << DMA_BUS_MODE_INTM_SHIFT);
+		writel(value, ioaddr + DMA_BUS_MODE);
+	}
 }
 
 static void _dwmac4_dump_dma_regs(void __iomem *ioaddr, u32 channel,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
index 5c0c53832adb..05481eb13ba6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
@@ -25,6 +25,9 @@
 #define DMA_TBS_CTRL			0x00001050
 
 /* DMA Bus Mode bitmap */
+#define DMA_BUS_MODE_INTM_MASK		GENMASK(17, 16)
+#define DMA_BUS_MODE_INTM_SHIFT		16
+#define DMA_BUS_MODE_INTM_MODE1		0x1
 #define DMA_BUS_MODE_SFT_RESET		BIT(0)
 
 /* DMA SYS Bus Mode bitmap */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
index 3fa602dabf49..e63270267578 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
@@ -166,20 +166,19 @@ int dwmac4_dma_interrupt(void __iomem *ioaddr,
 		}
 	}
 	/* TX/RX NORMAL interrupts */
-	if (likely(intr_status & DMA_CHAN_STATUS_NIS)) {
+	if (likely(intr_status & DMA_CHAN_STATUS_NIS))
 		x->normal_irq_n++;
-		if (likely(intr_status & DMA_CHAN_STATUS_RI)) {
-			x->rx_normal_irq_n++;
-			ret |= handle_rx;
-		}
-		if (likely(intr_status & (DMA_CHAN_STATUS_TI |
-					  DMA_CHAN_STATUS_TBU))) {
-			x->tx_normal_irq_n++;
-			ret |= handle_tx;
-		}
-		if (unlikely(intr_status & DMA_CHAN_STATUS_ERI))
-			x->rx_early_irq++;
+	if (likely(intr_status & DMA_CHAN_STATUS_RI)) {
+		x->rx_normal_irq_n++;
+		ret |= handle_rx;
+	}
+	if (likely(intr_status & (DMA_CHAN_STATUS_TI |
+		DMA_CHAN_STATUS_TBU))) {
+		x->tx_normal_irq_n++;
+		ret |= handle_tx;
 	}
+	if (unlikely(intr_status & DMA_CHAN_STATUS_ERI))
+		x->rx_early_irq++;
 
 	writel(intr_status & intr_en, ioaddr + DMA_CHAN_STATUS(chan));
 	return ret;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 459477db455c..f4fa5402cd64 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -5620,6 +5620,7 @@ int stmmac_dvr_probe(struct device *device,
 	priv->plat = plat_dat;
 	priv->ioaddr = res->addr;
 	priv->dev->base_addr = (unsigned long)res->addr;
+	priv->plat->dma_cfg->multi_msi_en = priv->plat->multi_msi_en;
 
 	priv->dev->irq = res->irq;
 	priv->wol_irq = res->wol_irq;
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index afc12b9385db..e338ef7abc00 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -96,6 +96,7 @@ struct stmmac_dma_cfg {
 	int mixed_burst;
 	bool aal;
 	bool eame;
+	bool multi_msi_en;
 };
 
 #define AXI_BLEN	7
-- 
cgit v1.2.3


From cb9444130662c6c13022579c861098f212db2562 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 25 Mar 2021 11:08:13 -0700
Subject: sysctl: add proc_dou8vec_minmax()

Networking has many sysctls that could fit in one u8.

This patch adds proc_dou8vec_minmax() for this purpose.

Note that the .extra1 and .extra2 fields are pointing
to integers, because it makes conversions easier.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/proc/proc_sysctl.c  |  6 +++++
 include/linux/sysctl.h |  2 ++
 kernel/sysctl.c        | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 73 insertions(+)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 984e42f8cb11..7256b8962e3c 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1108,6 +1108,11 @@ static int sysctl_check_table_array(const char *path, struct ctl_table *table)
 			err |= sysctl_err(path, table, "array not allowed");
 	}
 
+	if (table->proc_handler == proc_dou8vec_minmax) {
+		if (table->maxlen != sizeof(u8))
+			err |= sysctl_err(path, table, "array not allowed");
+	}
+
 	return err;
 }
 
@@ -1123,6 +1128,7 @@ static int sysctl_check_table(const char *path, struct ctl_table *table)
 		    (table->proc_handler == proc_douintvec) ||
 		    (table->proc_handler == proc_douintvec_minmax) ||
 		    (table->proc_handler == proc_dointvec_minmax) ||
+		    (table->proc_handler == proc_dou8vec_minmax) ||
 		    (table->proc_handler == proc_dointvec_jiffies) ||
 		    (table->proc_handler == proc_dointvec_userhz_jiffies) ||
 		    (table->proc_handler == proc_dointvec_ms_jiffies) ||
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 51298a4f4623..d99ca99837de 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -53,6 +53,8 @@ int proc_douintvec(struct ctl_table *, int, void *, size_t *, loff_t *);
 int proc_dointvec_minmax(struct ctl_table *, int, void *, size_t *, loff_t *);
 int proc_douintvec_minmax(struct ctl_table *table, int write, void *buffer,
 		size_t *lenp, loff_t *ppos);
+int proc_dou8vec_minmax(struct ctl_table *table, int write, void *buffer,
+			size_t *lenp, loff_t *ppos);
 int proc_dointvec_jiffies(struct ctl_table *, int, void *, size_t *, loff_t *);
 int proc_dointvec_userhz_jiffies(struct ctl_table *, int, void *, size_t *,
 		loff_t *);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 62fbd09b5dc1..90d2892ef6a3 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1034,6 +1034,65 @@ int proc_douintvec_minmax(struct ctl_table *table, int write,
 				 do_proc_douintvec_minmax_conv, &param);
 }
 
+/**
+ * proc_dou8vec_minmax - read a vector of unsigned chars with min/max values
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * Reads/writes up to table->maxlen/sizeof(u8) unsigned chars
+ * values from/to the user buffer, treated as an ASCII string. Negative
+ * strings are not allowed.
+ *
+ * This routine will ensure the values are within the range specified by
+ * table->extra1 (min) and table->extra2 (max).
+ *
+ * Returns 0 on success or an error on write when the range check fails.
+ */
+int proc_dou8vec_minmax(struct ctl_table *table, int write,
+			void *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct ctl_table tmp;
+	unsigned int min = 0, max = 255U, val;
+	u8 *data = table->data;
+	struct do_proc_douintvec_minmax_conv_param param = {
+		.min = &min,
+		.max = &max,
+	};
+	int res;
+
+	/* Do not support arrays yet. */
+	if (table->maxlen != sizeof(u8))
+		return -EINVAL;
+
+	if (table->extra1) {
+		min = *(unsigned int *) table->extra1;
+		if (min > 255U)
+			return -EINVAL;
+	}
+	if (table->extra2) {
+		max = *(unsigned int *) table->extra2;
+		if (max > 255U)
+			return -EINVAL;
+	}
+
+	tmp = *table;
+
+	tmp.maxlen = sizeof(val);
+	tmp.data = &val;
+	val = *data;
+	res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos,
+				do_proc_douintvec_minmax_conv, &param);
+	if (res)
+		return res;
+	if (write)
+		*data = val;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(proc_dou8vec_minmax);
+
 static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
 					unsigned int *valp,
 					int write, void *data)
@@ -1582,6 +1641,12 @@ int proc_douintvec_minmax(struct ctl_table *table, int write,
 	return -ENOSYS;
 }
 
+int proc_dou8vec_minmax(struct ctl_table *table, int write,
+			void *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
+
 int proc_dointvec_jiffies(struct ctl_table *table, int write,
 		    void *buffer, size_t *lenp, loff_t *ppos)
 {
-- 
cgit v1.2.3


From 4b6bbf17d4e1939afa72821879fc033d725e9491 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 25 Mar 2021 11:08:14 -0700
Subject: ipv4: shrink netns_ipv4 with sysctl conversions

These sysctls that can fit in one byte instead of one int
are converted to save space and thus reduce cache line misses.

 - icmp_echo_ignore_all, icmp_echo_ignore_broadcasts,
 - icmp_ignore_bogus_error_responses, icmp_errors_use_inbound_ifaddr
 - tcp_ecn, tcp_ecn_fallback
 - ip_default_ttl, ip_no_pmtu_disc, ip_fwd_use_pmtu
 - ip_nonlocal_bind, ip_autobind_reuse
 - ip_dynaddr, ip_early_demux, raw_l3mdev_accept
 - nexthop_compat_mode, fwmark_reflect

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h   | 32 +++++++++++------------
 net/ipv4/sysctl_net_ipv4.c | 64 +++++++++++++++++++++++-----------------------
 2 files changed, 48 insertions(+), 48 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 9e3cb2722b80..7b572d468fde 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -83,36 +83,36 @@ struct netns_ipv4 {
 	struct xt_table		*nat_table;
 #endif
 
-	int sysctl_icmp_echo_ignore_all;
-	int sysctl_icmp_echo_ignore_broadcasts;
-	int sysctl_icmp_ignore_bogus_error_responses;
+	u8 sysctl_icmp_echo_ignore_all;
+	u8 sysctl_icmp_echo_ignore_broadcasts;
+	u8 sysctl_icmp_ignore_bogus_error_responses;
+	u8 sysctl_icmp_errors_use_inbound_ifaddr;
 	int sysctl_icmp_ratelimit;
 	int sysctl_icmp_ratemask;
-	int sysctl_icmp_errors_use_inbound_ifaddr;
 
 	struct local_ports ip_local_ports;
 
-	int sysctl_tcp_ecn;
-	int sysctl_tcp_ecn_fallback;
+	u8 sysctl_tcp_ecn;
+	u8 sysctl_tcp_ecn_fallback;
 
-	int sysctl_ip_default_ttl;
-	int sysctl_ip_no_pmtu_disc;
-	int sysctl_ip_fwd_use_pmtu;
+	u8 sysctl_ip_default_ttl;
+	u8 sysctl_ip_no_pmtu_disc;
+	u8 sysctl_ip_fwd_use_pmtu;
 	int sysctl_ip_fwd_update_priority;
-	int sysctl_ip_nonlocal_bind;
-	int sysctl_ip_autobind_reuse;
+	u8 sysctl_ip_nonlocal_bind;
+	u8 sysctl_ip_autobind_reuse;
 	/* Shall we try to damage output packets if routing dev changes? */
-	int sysctl_ip_dynaddr;
-	int sysctl_ip_early_demux;
+	u8 sysctl_ip_dynaddr;
+	u8 sysctl_ip_early_demux;
 #ifdef CONFIG_NET_L3_MASTER_DEV
-	int sysctl_raw_l3mdev_accept;
+	u8 sysctl_raw_l3mdev_accept;
 #endif
 	int sysctl_tcp_early_demux;
 	int sysctl_udp_early_demux;
 
-	int sysctl_nexthop_compat_mode;
+	u8 sysctl_nexthop_compat_mode;
 
-	int sysctl_fwmark_reflect;
+	u8 sysctl_fwmark_reflect;
 	int sysctl_tcp_fwmark_accept;
 #ifdef CONFIG_NET_L3_MASTER_DEV
 	int sysctl_tcp_l3mdev_accept;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index f55095d3ed16..e5ff17526603 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -595,30 +595,30 @@ static struct ctl_table ipv4_net_table[] = {
 	{
 		.procname	= "icmp_echo_ignore_all",
 		.data		= &init_net.ipv4.sysctl_icmp_echo_ignore_all,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "icmp_echo_ignore_broadcasts",
 		.data		= &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "icmp_ignore_bogus_error_responses",
 		.data		= &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "icmp_errors_use_inbound_ifaddr",
 		.data		= &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "icmp_ratelimit",
@@ -645,9 +645,9 @@ static struct ctl_table ipv4_net_table[] = {
 	{
 		.procname	= "raw_l3mdev_accept",
 		.data		= &init_net.ipv4.sysctl_raw_l3mdev_accept,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dou8vec_minmax,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_ONE,
 	},
@@ -655,30 +655,30 @@ static struct ctl_table ipv4_net_table[] = {
 	{
 		.procname	= "tcp_ecn",
 		.data		= &init_net.ipv4.sysctl_tcp_ecn,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "tcp_ecn_fallback",
 		.data		= &init_net.ipv4.sysctl_tcp_ecn_fallback,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "ip_dynaddr",
 		.data		= &init_net.ipv4.sysctl_ip_dynaddr,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "ip_early_demux",
 		.data		= &init_net.ipv4.sysctl_ip_early_demux,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname       = "udp_early_demux",
@@ -697,18 +697,18 @@ static struct ctl_table ipv4_net_table[] = {
 	{
 		.procname       = "nexthop_compat_mode",
 		.data           = &init_net.ipv4.sysctl_nexthop_compat_mode,
-		.maxlen         = sizeof(int),
+		.maxlen         = sizeof(u8),
 		.mode           = 0644,
-		.proc_handler   = proc_dointvec_minmax,
+		.proc_handler   = proc_dou8vec_minmax,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_ONE,
 	},
 	{
 		.procname	= "ip_default_ttl",
 		.data		= &init_net.ipv4.sysctl_ip_default_ttl,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dou8vec_minmax,
 		.extra1		= &ip_ttl_min,
 		.extra2		= &ip_ttl_max,
 	},
@@ -729,16 +729,16 @@ static struct ctl_table ipv4_net_table[] = {
 	{
 		.procname	= "ip_no_pmtu_disc",
 		.data		= &init_net.ipv4.sysctl_ip_no_pmtu_disc,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "ip_forward_use_pmtu",
 		.data		= &init_net.ipv4.sysctl_ip_fwd_use_pmtu,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "ip_forward_update_priority",
@@ -752,25 +752,25 @@ static struct ctl_table ipv4_net_table[] = {
 	{
 		.procname	= "ip_nonlocal_bind",
 		.data		= &init_net.ipv4.sysctl_ip_nonlocal_bind,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "ip_autobind_reuse",
 		.data		= &init_net.ipv4.sysctl_ip_autobind_reuse,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dou8vec_minmax,
 		.extra1         = SYSCTL_ZERO,
 		.extra2         = SYSCTL_ONE,
 	},
 	{
 		.procname	= "fwmark_reflect",
 		.data		= &init_net.ipv4.sysctl_fwmark_reflect,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "tcp_fwmark_accept",
-- 
cgit v1.2.3


From 1c69dedc8fa7c9684d48dc89994b4e0aceeae588 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 25 Mar 2021 11:08:15 -0700
Subject: ipv4: convert ip_forward_update_priority sysctl to u8

This sysctl uses ip_fwd_update_priority() helper,
so the conversion needs to change it.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h   | 2 +-
 net/ipv4/sysctl_net_ipv4.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 7b572d468fde..d2c0a6592ff6 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -98,7 +98,7 @@ struct netns_ipv4 {
 	u8 sysctl_ip_default_ttl;
 	u8 sysctl_ip_no_pmtu_disc;
 	u8 sysctl_ip_fwd_use_pmtu;
-	int sysctl_ip_fwd_update_priority;
+	u8 sysctl_ip_fwd_update_priority;
 	u8 sysctl_ip_nonlocal_bind;
 	u8 sysctl_ip_autobind_reuse;
 	/* Shall we try to damage output packets if routing dev changes? */
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index e5ff17526603..713e0c0c91e9 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -209,7 +209,7 @@ static int ipv4_fwd_update_priority(struct ctl_table *table, int write,
 
 	net = container_of(table->data, struct net,
 			   ipv4.sysctl_ip_fwd_update_priority);
-	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+	ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
 	if (write && ret == 0)
 		call_netevent_notifiers(NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE,
 					net);
@@ -743,7 +743,7 @@ static struct ctl_table ipv4_net_table[] = {
 	{
 		.procname	= "ip_forward_update_priority",
 		.data		= &init_net.ipv4.sysctl_ip_fwd_update_priority,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
 		.proc_handler   = ipv4_fwd_update_priority,
 		.extra1		= SYSCTL_ZERO,
-- 
cgit v1.2.3


From 2932bcda070d9a02548e57119b1ada8f018c40b5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 25 Mar 2021 11:08:16 -0700
Subject: inet: convert tcp_early_demux and udp_early_demux to u8

For these sysctls, their dedicated helpers have
to use proc_dou8vec_minmax().

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h   | 4 ++--
 net/ipv4/sysctl_net_ipv4.c | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index d2c0a6592ff6..00f250ee4419 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -107,8 +107,8 @@ struct netns_ipv4 {
 #ifdef CONFIG_NET_L3_MASTER_DEV
 	u8 sysctl_raw_l3mdev_accept;
 #endif
-	int sysctl_tcp_early_demux;
-	int sysctl_udp_early_demux;
+	u8 sysctl_tcp_early_demux;
+	u8 sysctl_udp_early_demux;
 
 	u8 sysctl_nexthop_compat_mode;
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 713e0c0c91e9..510a32635612 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -389,7 +389,7 @@ static int proc_tcp_early_demux(struct ctl_table *table, int write,
 {
 	int ret = 0;
 
-	ret = proc_dointvec(table, write, buffer, lenp, ppos);
+	ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
 
 	if (write && !ret) {
 		int enabled = init_net.ipv4.sysctl_tcp_early_demux;
@@ -405,7 +405,7 @@ static int proc_udp_early_demux(struct ctl_table *table, int write,
 {
 	int ret = 0;
 
-	ret = proc_dointvec(table, write, buffer, lenp, ppos);
+	ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
 
 	if (write && !ret) {
 		int enabled = init_net.ipv4.sysctl_udp_early_demux;
@@ -683,14 +683,14 @@ static struct ctl_table ipv4_net_table[] = {
 	{
 		.procname       = "udp_early_demux",
 		.data           = &init_net.ipv4.sysctl_udp_early_demux,
-		.maxlen         = sizeof(int),
+		.maxlen         = sizeof(u8),
 		.mode           = 0644,
 		.proc_handler   = proc_udp_early_demux
 	},
 	{
 		.procname       = "tcp_early_demux",
 		.data           = &init_net.ipv4.sysctl_tcp_early_demux,
-		.maxlen         = sizeof(int),
+		.maxlen         = sizeof(u8),
 		.mode           = 0644,
 		.proc_handler   = proc_tcp_early_demux
 	},
-- 
cgit v1.2.3


From 4ecc1baf362c5df2dcabe242511e38ee28486545 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 25 Mar 2021 11:08:17 -0700
Subject: tcp: convert elligible sysctls to u8

Many tcp sysctls are either bools or small ints that can fit into u8.

Reducing space taken by sysctls can save few cache line misses
when sending/receiving data while cpu caches are empty,
for example after cpu idle period.

This is hard to measure with typical network performance tests,
but after this patch, struct netns_ipv4 has shrunk
by three cache lines.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h   |  68 +++++++++++------------
 net/ipv4/sysctl_net_ipv4.c | 136 ++++++++++++++++++++++-----------------------
 2 files changed, 102 insertions(+), 102 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 00f250ee4419..d377266d133f 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -113,11 +113,11 @@ struct netns_ipv4 {
 	u8 sysctl_nexthop_compat_mode;
 
 	u8 sysctl_fwmark_reflect;
-	int sysctl_tcp_fwmark_accept;
+	u8 sysctl_tcp_fwmark_accept;
 #ifdef CONFIG_NET_L3_MASTER_DEV
-	int sysctl_tcp_l3mdev_accept;
+	u8 sysctl_tcp_l3mdev_accept;
 #endif
-	int sysctl_tcp_mtu_probing;
+	u8 sysctl_tcp_mtu_probing;
 	int sysctl_tcp_mtu_probe_floor;
 	int sysctl_tcp_base_mss;
 	int sysctl_tcp_min_snd_mss;
@@ -125,46 +125,47 @@ struct netns_ipv4 {
 	u32 sysctl_tcp_probe_interval;
 
 	int sysctl_tcp_keepalive_time;
-	int sysctl_tcp_keepalive_probes;
 	int sysctl_tcp_keepalive_intvl;
+	u8 sysctl_tcp_keepalive_probes;
 
-	int sysctl_tcp_syn_retries;
-	int sysctl_tcp_synack_retries;
-	int sysctl_tcp_syncookies;
+	u8 sysctl_tcp_syn_retries;
+	u8 sysctl_tcp_synack_retries;
+	u8 sysctl_tcp_syncookies;
 	int sysctl_tcp_reordering;
-	int sysctl_tcp_retries1;
-	int sysctl_tcp_retries2;
-	int sysctl_tcp_orphan_retries;
+	u8 sysctl_tcp_retries1;
+	u8 sysctl_tcp_retries2;
+	u8 sysctl_tcp_orphan_retries;
+	u8 sysctl_tcp_tw_reuse;
 	int sysctl_tcp_fin_timeout;
 	unsigned int sysctl_tcp_notsent_lowat;
-	int sysctl_tcp_tw_reuse;
-	int sysctl_tcp_sack;
-	int sysctl_tcp_window_scaling;
-	int sysctl_tcp_timestamps;
-	int sysctl_tcp_early_retrans;
-	int sysctl_tcp_recovery;
-	int sysctl_tcp_thin_linear_timeouts;
-	int sysctl_tcp_slow_start_after_idle;
-	int sysctl_tcp_retrans_collapse;
-	int sysctl_tcp_stdurg;
-	int sysctl_tcp_rfc1337;
-	int sysctl_tcp_abort_on_overflow;
-	int sysctl_tcp_fack;
+	u8 sysctl_tcp_sack;
+	u8 sysctl_tcp_window_scaling;
+	u8 sysctl_tcp_timestamps;
+	u8 sysctl_tcp_early_retrans;
+	u8 sysctl_tcp_recovery;
+	u8 sysctl_tcp_thin_linear_timeouts;
+	u8 sysctl_tcp_slow_start_after_idle;
+	u8 sysctl_tcp_retrans_collapse;
+	u8 sysctl_tcp_stdurg;
+	u8 sysctl_tcp_rfc1337;
+	u8 sysctl_tcp_abort_on_overflow;
+	u8 sysctl_tcp_fack; /* obsolete */
 	int sysctl_tcp_max_reordering;
-	int sysctl_tcp_dsack;
-	int sysctl_tcp_app_win;
 	int sysctl_tcp_adv_win_scale;
-	int sysctl_tcp_frto;
-	int sysctl_tcp_nometrics_save;
-	int sysctl_tcp_no_ssthresh_metrics_save;
-	int sysctl_tcp_moderate_rcvbuf;
-	int sysctl_tcp_tso_win_divisor;
-	int sysctl_tcp_workaround_signed_windows;
+	u8 sysctl_tcp_dsack;
+	u8 sysctl_tcp_app_win;
+	u8 sysctl_tcp_frto;
+	u8 sysctl_tcp_nometrics_save;
+	u8 sysctl_tcp_no_ssthresh_metrics_save;
+	u8 sysctl_tcp_moderate_rcvbuf;
+	u8 sysctl_tcp_tso_win_divisor;
+	u8 sysctl_tcp_workaround_signed_windows;
 	int sysctl_tcp_limit_output_bytes;
 	int sysctl_tcp_challenge_ack_limit;
-	int sysctl_tcp_min_tso_segs;
 	int sysctl_tcp_min_rtt_wlen;
-	int sysctl_tcp_autocorking;
+	u8 sysctl_tcp_min_tso_segs;
+	u8 sysctl_tcp_autocorking;
+	u8 sysctl_tcp_reflect_tos;
 	int sysctl_tcp_invalid_ratelimit;
 	int sysctl_tcp_pacing_ss_ratio;
 	int sysctl_tcp_pacing_ca_ratio;
@@ -182,7 +183,6 @@ struct netns_ipv4 {
 	unsigned int sysctl_tcp_fastopen_blackhole_timeout;
 	atomic_t tfo_active_disable_times;
 	unsigned long tfo_active_disable_stamp;
-	int sysctl_tcp_reflect_tos;
 
 	int sysctl_udp_wmem_min;
 	int sysctl_udp_rmem_min;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 510a32635612..442ff4be1bde 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -775,17 +775,17 @@ static struct ctl_table ipv4_net_table[] = {
 	{
 		.procname	= "tcp_fwmark_accept",
 		.data		= &init_net.ipv4.sysctl_tcp_fwmark_accept,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 #ifdef CONFIG_NET_L3_MASTER_DEV
 	{
 		.procname	= "tcp_l3mdev_accept",
 		.data		= &init_net.ipv4.sysctl_tcp_l3mdev_accept,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dou8vec_minmax,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_ONE,
 	},
@@ -793,9 +793,9 @@ static struct ctl_table ipv4_net_table[] = {
 	{
 		.procname	= "tcp_mtu_probing",
 		.data		= &init_net.ipv4.sysctl_tcp_mtu_probing,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "tcp_base_mss",
@@ -897,9 +897,9 @@ static struct ctl_table ipv4_net_table[] = {
 	{
 		.procname	= "tcp_keepalive_probes",
 		.data		= &init_net.ipv4.sysctl_tcp_keepalive_probes,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "tcp_keepalive_intvl",
@@ -911,26 +911,26 @@ static struct ctl_table ipv4_net_table[] = {
 	{
 		.procname	= "tcp_syn_retries",
 		.data		= &init_net.ipv4.sysctl_tcp_syn_retries,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dou8vec_minmax,
 		.extra1		= &tcp_syn_retries_min,
 		.extra2		= &tcp_syn_retries_max
 	},
 	{
 		.procname	= "tcp_synack_retries",
 		.data		= &init_net.ipv4.sysctl_tcp_synack_retries,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 #ifdef CONFIG_SYN_COOKIES
 	{
 		.procname	= "tcp_syncookies",
 		.data		= &init_net.ipv4.sysctl_tcp_syncookies,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 #endif
 	{
@@ -943,24 +943,24 @@ static struct ctl_table ipv4_net_table[] = {
 	{
 		.procname	= "tcp_retries1",
 		.data		= &init_net.ipv4.sysctl_tcp_retries1,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dou8vec_minmax,
 		.extra2		= &tcp_retr1_max
 	},
 	{
 		.procname	= "tcp_retries2",
 		.data		= &init_net.ipv4.sysctl_tcp_retries2,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "tcp_orphan_retries",
 		.data		= &init_net.ipv4.sysctl_tcp_orphan_retries,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "tcp_fin_timeout",
@@ -979,9 +979,9 @@ static struct ctl_table ipv4_net_table[] = {
 	{
 		.procname	= "tcp_tw_reuse",
 		.data		= &init_net.ipv4.sysctl_tcp_tw_reuse,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dou8vec_minmax,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= &two,
 	},
@@ -1067,88 +1067,88 @@ static struct ctl_table ipv4_net_table[] = {
 	{
 		.procname	= "tcp_sack",
 		.data		= &init_net.ipv4.sysctl_tcp_sack,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "tcp_window_scaling",
 		.data		= &init_net.ipv4.sysctl_tcp_window_scaling,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "tcp_timestamps",
 		.data		= &init_net.ipv4.sysctl_tcp_timestamps,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "tcp_early_retrans",
 		.data		= &init_net.ipv4.sysctl_tcp_early_retrans,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dou8vec_minmax,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= &four,
 	},
 	{
 		.procname	= "tcp_recovery",
 		.data		= &init_net.ipv4.sysctl_tcp_recovery,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname       = "tcp_thin_linear_timeouts",
 		.data           = &init_net.ipv4.sysctl_tcp_thin_linear_timeouts,
-		.maxlen         = sizeof(int),
+		.maxlen         = sizeof(u8),
 		.mode           = 0644,
-		.proc_handler   = proc_dointvec
+		.proc_handler   = proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "tcp_slow_start_after_idle",
 		.data		= &init_net.ipv4.sysctl_tcp_slow_start_after_idle,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "tcp_retrans_collapse",
 		.data		= &init_net.ipv4.sysctl_tcp_retrans_collapse,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "tcp_stdurg",
 		.data		= &init_net.ipv4.sysctl_tcp_stdurg,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "tcp_rfc1337",
 		.data		= &init_net.ipv4.sysctl_tcp_rfc1337,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "tcp_abort_on_overflow",
 		.data		= &init_net.ipv4.sysctl_tcp_abort_on_overflow,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "tcp_fack",
 		.data		= &init_net.ipv4.sysctl_tcp_fack,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "tcp_max_reordering",
@@ -1160,16 +1160,16 @@ static struct ctl_table ipv4_net_table[] = {
 	{
 		.procname	= "tcp_dsack",
 		.data		= &init_net.ipv4.sysctl_tcp_dsack,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "tcp_app_win",
 		.data		= &init_net.ipv4.sysctl_tcp_app_win,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "tcp_adv_win_scale",
@@ -1183,46 +1183,46 @@ static struct ctl_table ipv4_net_table[] = {
 	{
 		.procname	= "tcp_frto",
 		.data		= &init_net.ipv4.sysctl_tcp_frto,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "tcp_no_metrics_save",
 		.data		= &init_net.ipv4.sysctl_tcp_nometrics_save,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "tcp_no_ssthresh_metrics_save",
 		.data		= &init_net.ipv4.sysctl_tcp_no_ssthresh_metrics_save,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dou8vec_minmax,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_ONE,
 	},
 	{
 		.procname	= "tcp_moderate_rcvbuf",
 		.data		= &init_net.ipv4.sysctl_tcp_moderate_rcvbuf,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "tcp_tso_win_divisor",
 		.data		= &init_net.ipv4.sysctl_tcp_tso_win_divisor,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "tcp_workaround_signed_windows",
 		.data		= &init_net.ipv4.sysctl_tcp_workaround_signed_windows,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "tcp_limit_output_bytes",
@@ -1241,9 +1241,9 @@ static struct ctl_table ipv4_net_table[] = {
 	{
 		.procname	= "tcp_min_tso_segs",
 		.data		= &init_net.ipv4.sysctl_tcp_min_tso_segs,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dou8vec_minmax,
 		.extra1		= SYSCTL_ONE,
 		.extra2		= &gso_max_segs,
 	},
@@ -1259,9 +1259,9 @@ static struct ctl_table ipv4_net_table[] = {
 	{
 		.procname	= "tcp_autocorking",
 		.data		= &init_net.ipv4.sysctl_tcp_autocorking,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dou8vec_minmax,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_ONE,
 	},
@@ -1332,9 +1332,9 @@ static struct ctl_table ipv4_net_table[] = {
 	{
 		.procname       = "tcp_reflect_tos",
 		.data           = &init_net.ipv4.sysctl_tcp_reflect_tos,
-		.maxlen         = sizeof(int),
+		.maxlen         = sizeof(u8),
 		.mode           = 0644,
-		.proc_handler   = proc_dointvec_minmax,
+		.proc_handler   = proc_dou8vec_minmax,
 		.extra1         = SYSCTL_ZERO,
 		.extra2         = SYSCTL_ONE,
 	},
-- 
cgit v1.2.3


From 794d9b25817a813f1d2fb8e133ba6d2f53920853 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 25 Mar 2021 22:10:16 +0100
Subject: docs: nf_flowtable: fix compilation and warnings

... cannot be used in block quote, it breaks compilation, remove it.

Fix warnings due to missing blank line such as:

net-next/Documentation/networking/nf_flowtable.rst:142: WARNING: Block quote ends without a blank line; unexpected unindent.

Fixes: 143490cde566 ("docs: nf_flowtable: update documentation with enhancements")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/nf_flowtable.rst | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/nf_flowtable.rst b/Documentation/networking/nf_flowtable.rst
index d87f253b9d39..d757c21c10f2 100644
--- a/Documentation/networking/nf_flowtable.rst
+++ b/Documentation/networking/nf_flowtable.rst
@@ -112,6 +112,7 @@ You can identify offloaded flows through the [OFFLOAD] tag when listing your
 connection tracking table.
 
 ::
+
 	# conntrack -L
 	tcp      6 src=10.141.10.2 dst=192.168.10.2 sport=52728 dport=5201 src=192.168.10.2 dst=192.168.10.1 sport=5201 dport=52728 [OFFLOAD] mark=0 use=2
 
@@ -138,6 +139,7 @@ allows the flowtable to define a fastpath bypass between the bridge ports
 device (represented as eth0) in your switch/router.
 
 ::
+
                       fastpath bypass
                .-------------------------.
               /                           \
@@ -168,12 +170,12 @@ connection tracking entry by specifying the counter statement in your flowtable
 definition, e.g.
 
 ::
+
 	table inet x {
 		flowtable f {
 			hook ingress priority 0; devices = { eth0, eth1 };
 			counter
 		}
-		...
 	}
 
 Counter support is available since Linux kernel 5.7.
@@ -185,12 +187,12 @@ If your network device provides hardware offload support, you can turn it on by
 means of the 'offload' flag in your flowtable definition, e.g.
 
 ::
+
 	table inet x {
 		flowtable f {
 			hook ingress priority 0; devices = { eth0, eth1 };
 			flags offload;
 		}
-		...
 	}
 
 There is a workqueue that adds the flows to the hardware. Note that a few
-- 
cgit v1.2.3


From b910eaaaa4b89976ef02e5d6448f3f73dc671d91 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 22 Mar 2021 22:51:46 -0700
Subject: bpf: Fix NULL pointer dereference in bpf_get_local_storage() helper

Jiri Olsa reported a bug ([1]) in kernel where cgroup local
storage pointer may be NULL in bpf_get_local_storage() helper.
There are two issues uncovered by this bug:
  (1). kprobe or tracepoint prog incorrectly sets cgroup local storage
       before prog run,
  (2). due to change from preempt_disable to migrate_disable,
       preemption is possible and percpu storage might be overwritten
       by other tasks.

This issue (1) is fixed in [2]. This patch tried to address issue (2).
The following shows how things can go wrong:
  task 1:   bpf_cgroup_storage_set() for percpu local storage
         preemption happens
  task 2:   bpf_cgroup_storage_set() for percpu local storage
         preemption happens
  task 1:   run bpf program

task 1 will effectively use the percpu local storage setting by task 2
which will be either NULL or incorrect ones.

Instead of just one common local storage per cpu, this patch fixed
the issue by permitting 8 local storages per cpu and each local
storage is identified by a task_struct pointer. This way, we
allow at most 8 nested preemption between bpf_cgroup_storage_set()
and bpf_cgroup_storage_unset(). The percpu local storage slot
is released (calling bpf_cgroup_storage_unset()) by the same task
after bpf program finished running.
bpf_test_run() is also fixed to use the new bpf_cgroup_storage_set()
interface.

The patch is tested on top of [2] with reproducer in [1].
Without this patch, kernel will emit error in 2-3 minutes.
With this patch, after one hour, still no error.

 [1] https://lore.kernel.org/bpf/CAKH8qBuXCfUz=w8L+Fj74OaUpbosO29niYwTki7e3Ag044_aww@mail.gmail.com/T
 [2] https://lore.kernel.org/bpf/20210309185028.3763817-1-yhs@fb.com

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Roman Gushchin <guro@fb.com>
Link: https://lore.kernel.org/bpf/20210323055146.3334476-1-yhs@fb.com
---
 include/linux/bpf-cgroup.h | 57 +++++++++++++++++++++++++++++++++++++++-------
 include/linux/bpf.h        | 22 ++++++++++++++----
 kernel/bpf/helpers.c       | 15 ++++++++----
 kernel/bpf/local_storage.c |  5 ++--
 net/bpf/test_run.c         |  6 ++++-
 5 files changed, 86 insertions(+), 19 deletions(-)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index c42e02b4d84b..6a29fe11485d 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -20,14 +20,25 @@ struct bpf_sock_ops_kern;
 struct bpf_cgroup_storage;
 struct ctl_table;
 struct ctl_table_header;
+struct task_struct;
 
 #ifdef CONFIG_CGROUP_BPF
 
 extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE];
 #define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type])
 
-DECLARE_PER_CPU(struct bpf_cgroup_storage*,
-		bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
+#define BPF_CGROUP_STORAGE_NEST_MAX	8
+
+struct bpf_cgroup_storage_info {
+	struct task_struct *task;
+	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
+};
+
+/* For each cpu, permit maximum BPF_CGROUP_STORAGE_NEST_MAX number of tasks
+ * to use bpf cgroup storage simultaneously.
+ */
+DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
+		bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
 
 #define for_each_cgroup_storage_type(stype) \
 	for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
@@ -161,13 +172,42 @@ static inline enum bpf_cgroup_storage_type cgroup_storage_type(
 	return BPF_CGROUP_STORAGE_SHARED;
 }
 
-static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage
-					  *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
+static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage
+					 *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
 {
 	enum bpf_cgroup_storage_type stype;
+	int i, err = 0;
+
+	preempt_disable();
+	for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
+		if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL))
+			continue;
+
+		this_cpu_write(bpf_cgroup_storage_info[i].task, current);
+		for_each_cgroup_storage_type(stype)
+			this_cpu_write(bpf_cgroup_storage_info[i].storage[stype],
+				       storage[stype]);
+		goto out;
+	}
+	err = -EBUSY;
+	WARN_ON_ONCE(1);
+
+out:
+	preempt_enable();
+	return err;
+}
+
+static inline void bpf_cgroup_storage_unset(void)
+{
+	int i;
+
+	for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
+		if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
+			continue;
 
-	for_each_cgroup_storage_type(stype)
-		this_cpu_write(bpf_cgroup_storage[stype], storage[stype]);
+		this_cpu_write(bpf_cgroup_storage_info[i].task, NULL);
+		return;
+	}
 }
 
 struct bpf_cgroup_storage *
@@ -448,8 +488,9 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
 	return -EINVAL;
 }
 
-static inline void bpf_cgroup_storage_set(
-	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {}
+static inline int bpf_cgroup_storage_set(
+	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { return 0; }
+static inline void bpf_cgroup_storage_unset(void) {}
 static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
 					    struct bpf_map *map) { return 0; }
 static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 24678d6ecbcf..5a0801b420ca 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1106,6 +1106,13 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
 /* BPF program asks to set CN on the packet. */
 #define BPF_RET_SET_CN						(1 << 0)
 
+/* For BPF_PROG_RUN_ARRAY_FLAGS and __BPF_PROG_RUN_ARRAY,
+ * if bpf_cgroup_storage_set() failed, the rest of programs
+ * will not execute. This should be a really rare scenario
+ * as it requires BPF_CGROUP_STORAGE_NEST_MAX number of
+ * preemptions all between bpf_cgroup_storage_set() and
+ * bpf_cgroup_storage_unset() on the same cpu.
+ */
 #define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags)		\
 	({								\
 		struct bpf_prog_array_item *_item;			\
@@ -1118,10 +1125,12 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
 		_array = rcu_dereference(array);			\
 		_item = &_array->items[0];				\
 		while ((_prog = READ_ONCE(_item->prog))) {		\
-			bpf_cgroup_storage_set(_item->cgroup_storage);	\
+			if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage)))	\
+				break;					\
 			func_ret = func(_prog, ctx);			\
 			_ret &= (func_ret & 1);				\
 			*(ret_flags) |= (func_ret >> 1);			\
+			bpf_cgroup_storage_unset();			\
 			_item++;					\
 		}							\
 		rcu_read_unlock();					\
@@ -1142,9 +1151,14 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
 			goto _out;			\
 		_item = &_array->items[0];		\
 		while ((_prog = READ_ONCE(_item->prog))) {		\
-			if (set_cg_storage)		\
-				bpf_cgroup_storage_set(_item->cgroup_storage);	\
-			_ret &= func(_prog, ctx);	\
+			if (!set_cg_storage) {			\
+				_ret &= func(_prog, ctx);	\
+			} else {				\
+				if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage)))	\
+					break;			\
+				_ret &= func(_prog, ctx);	\
+				bpf_cgroup_storage_unset();	\
+			}				\
 			_item++;			\
 		}					\
 _out:							\
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 074800226327..f306611c4ddf 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -382,8 +382,8 @@ const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
 };
 
 #ifdef CONFIG_CGROUP_BPF
-DECLARE_PER_CPU(struct bpf_cgroup_storage*,
-		bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
+DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
+		bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
 
 BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
 {
@@ -392,10 +392,17 @@ BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
 	 * verifier checks that its value is correct.
 	 */
 	enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
-	struct bpf_cgroup_storage *storage;
+	struct bpf_cgroup_storage *storage = NULL;
 	void *ptr;
+	int i;
 
-	storage = this_cpu_read(bpf_cgroup_storage[stype]);
+	for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
+		if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
+			continue;
+
+		storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]);
+		break;
+	}
 
 	if (stype == BPF_CGROUP_STORAGE_SHARED)
 		ptr = &READ_ONCE(storage->buf)->data[0];
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 2d4f9ac12377..bd11db9774c3 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -9,10 +9,11 @@
 #include <linux/slab.h>
 #include <uapi/linux/btf.h>
 
-DEFINE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
-
 #ifdef CONFIG_CGROUP_BPF
 
+DEFINE_PER_CPU(struct bpf_cgroup_storage_info,
+	       bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
+
 #include "../cgroup/cgroup-internal.h"
 
 #define LOCAL_STORAGE_CREATE_FLAG_MASK					\
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 0abdd67f44b1..4aabf71cd95d 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -106,12 +106,16 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
 
 	bpf_test_timer_enter(&t);
 	do {
-		bpf_cgroup_storage_set(storage);
+		ret = bpf_cgroup_storage_set(storage);
+		if (ret)
+			break;
 
 		if (xdp)
 			*retval = bpf_prog_run_xdp(prog, ctx);
 		else
 			*retval = BPF_PROG_RUN(prog, ctx);
+
+		bpf_cgroup_storage_unset();
 	} while (bpf_test_timer_continue(&t, repeat, &ret, time));
 	bpf_test_timer_leave(&t);
 
-- 
cgit v1.2.3


From cff908463d91a6b2fb8c8ab6c41d9c308c29fd42 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@kernel.org>
Date: Mon, 22 Mar 2021 17:07:20 +0000
Subject: selftests/bpf: Better error messages for ima_setup.sh failures

The current implementation uses the CHECK_FAIL macro which does not
provide useful error messages when the script fails. Use the CHECK macro
instead and provide more descriptive messages to aid debugging.

Signed-off-by: KP Singh <kpsingh@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210322170720.2926715-1-kpsingh@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/test_ima.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/test_ima.c b/tools/testing/selftests/bpf/prog_tests/test_ima.c
index b54bc0c351b7..0252f61d611a 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_ima.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_ima.c
@@ -68,7 +68,8 @@ void test_test_ima(void)
 		goto close_prog;
 
 	snprintf(cmd, sizeof(cmd), "./ima_setup.sh setup %s", measured_dir);
-	if (CHECK_FAIL(system(cmd)))
+	err = system(cmd);
+	if (CHECK(err, "failed to run command", "%s, errno = %d\n", cmd, errno))
 		goto close_clean;
 
 	err = run_measured_process(measured_dir, &skel->bss->monitored_pid);
@@ -81,7 +82,8 @@ void test_test_ima(void)
 
 close_clean:
 	snprintf(cmd, sizeof(cmd), "./ima_setup.sh cleanup %s", measured_dir);
-	CHECK_FAIL(system(cmd));
+	err = system(cmd);
+	CHECK(err, "failed to run command", "%s, errno = %d\n", cmd, errno);
 close_prog:
 	ima__destroy(skel);
 }
-- 
cgit v1.2.3


From f56387c534cc54d2578b962692e574e3edd8c3f6 Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@mojatatu.com>
Date: Mon, 22 Mar 2021 23:50:53 -0300
Subject: bpf: Add support for batched ops in LPM trie maps

Suggested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210323025058.315763-2-pctammela@gmail.com
---
 kernel/bpf/lpm_trie.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index cec792a17e5f..1b7b8a6f34ee 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -726,6 +726,9 @@ const struct bpf_map_ops trie_map_ops = {
 	.map_lookup_elem = trie_lookup_elem,
 	.map_update_elem = trie_update_elem,
 	.map_delete_elem = trie_delete_elem,
+	.map_lookup_batch = generic_map_lookup_batch,
+	.map_update_batch = generic_map_update_batch,
+	.map_delete_batch = generic_map_delete_batch,
 	.map_check_btf = trie_check_btf,
 	.map_btf_name = "lpm_trie",
 	.map_btf_id = &trie_map_btf_id,
-- 
cgit v1.2.3


From e9bd8cbd970bedd3cca8ee334c76ab90feb78760 Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@mojatatu.com>
Date: Mon, 22 Mar 2021 23:50:54 -0300
Subject: bpf: selftests: Add tests for batched ops in LPM trie maps

Uses the already existing infrastructure for testing batched ops.
The testing code is essentially the same, with minor tweaks for this use
case.

Suggested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210323025058.315763-3-pctammela@gmail.com
---
 .../bpf/map_tests/lpm_trie_map_batch_ops.c         | 158 +++++++++++++++++++++
 1 file changed, 158 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c

diff --git a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c
new file mode 100644
index 000000000000..2e986e5e4cac
--- /dev/null
+++ b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <arpa/inet.h>
+#include <linux/bpf.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include <test_maps.h>
+
+struct test_lpm_key {
+	__u32 prefix;
+	struct in_addr ipv4;
+};
+
+static void map_batch_update(int map_fd, __u32 max_entries,
+			     struct test_lpm_key *keys, int *values)
+{
+	__u32 i;
+	int err;
+	char buff[16] = { 0 };
+	DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+		.elem_flags = 0,
+		.flags = 0,
+	);
+
+	for (i = 0; i < max_entries; i++) {
+		keys[i].prefix = 32;
+		snprintf(buff, 16, "192.168.1.%d", i + 1);
+		inet_pton(AF_INET, buff, &keys[i].ipv4);
+		values[i] = i + 1;
+	}
+
+	err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts);
+	CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno));
+}
+
+static void map_batch_verify(int *visited, __u32 max_entries,
+			     struct test_lpm_key *keys, int *values)
+{
+	char buff[16] = { 0 };
+	int lower_byte = 0;
+	__u32 i;
+
+	memset(visited, 0, max_entries * sizeof(*visited));
+	for (i = 0; i < max_entries; i++) {
+		inet_ntop(AF_INET, &keys[i].ipv4, buff, 32);
+		CHECK(sscanf(buff, "192.168.1.%d", &lower_byte) == EOF,
+		      "sscanf()", "error: i %d\n", i);
+		CHECK(lower_byte != values[i], "key/value checking",
+		      "error: i %d key %s value %d\n", i, buff, values[i]);
+		visited[i] = 1;
+	}
+	for (i = 0; i < max_entries; i++) {
+		CHECK(visited[i] != 1, "visited checking",
+		      "error: keys array at index %d missing\n", i);
+	}
+}
+
+void test_lpm_trie_map_batch_ops(void)
+{
+	struct bpf_create_map_attr xattr = {
+		.name = "lpm_trie_map",
+		.map_type = BPF_MAP_TYPE_LPM_TRIE,
+		.key_size = sizeof(struct test_lpm_key),
+		.value_size = sizeof(int),
+		.map_flags = BPF_F_NO_PREALLOC,
+	};
+	struct test_lpm_key *keys, key;
+	int map_fd, *values, *visited;
+	__u32 step, count, total, total_success;
+	const __u32 max_entries = 10;
+	__u64 batch = 0;
+	int err;
+	DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+		.elem_flags = 0,
+		.flags = 0,
+	);
+
+	xattr.max_entries = max_entries;
+	map_fd = bpf_create_map_xattr(&xattr);
+	CHECK(map_fd == -1, "bpf_create_map_xattr()", "error:%s\n",
+	      strerror(errno));
+
+	keys = malloc(max_entries * sizeof(struct test_lpm_key));
+	values = malloc(max_entries * sizeof(int));
+	visited = malloc(max_entries * sizeof(int));
+	CHECK(!keys || !values || !visited, "malloc()", "error:%s\n",
+	      strerror(errno));
+
+	total_success = 0;
+	for (step = 1; step < max_entries; step++) {
+		map_batch_update(map_fd, max_entries, keys, values);
+		map_batch_verify(visited, max_entries, keys, values);
+		memset(keys, 0, max_entries * sizeof(*keys));
+		memset(values, 0, max_entries * sizeof(*values));
+		batch = 0;
+		total = 0;
+		/* iteratively lookup/delete elements with 'step'
+		 * elements each.
+		 */
+		count = step;
+		while (true) {
+			err = bpf_map_lookup_batch(map_fd,
+				total ? &batch : NULL, &batch,
+				keys + total, values + total, &count, &opts);
+
+			CHECK((err && errno != ENOENT), "lookup with steps",
+			      "error: %s\n", strerror(errno));
+
+			total += count;
+			if (err)
+				break;
+		}
+
+		CHECK(total != max_entries, "lookup with steps",
+		      "total = %u, max_entries = %u\n", total, max_entries);
+
+		map_batch_verify(visited, max_entries, keys, values);
+
+		total = 0;
+		count = step;
+		while (total < max_entries) {
+			if (max_entries - total < step)
+				count = max_entries - total;
+			err = bpf_map_delete_batch(map_fd, keys + total, &count,
+						   &opts);
+			CHECK((err && errno != ENOENT), "delete batch",
+			      "error: %s\n", strerror(errno));
+			total += count;
+			if (err)
+				break;
+		}
+		CHECK(total != max_entries, "delete with steps",
+		      "total = %u, max_entries = %u\n", total, max_entries);
+
+		/* check map is empty, errono == ENOENT */
+		err = bpf_map_get_next_key(map_fd, NULL, &key);
+		CHECK(!err || errno != ENOENT, "bpf_map_get_next_key()",
+		      "error: %s\n", strerror(errno));
+
+		total_success++;
+	}
+
+	CHECK(total_success == 0, "check total_success",
+	      "unexpected failure\n");
+
+	printf("%s:PASS\n", __func__);
+
+	free(keys);
+	free(values);
+	free(visited);
+}
-- 
cgit v1.2.3


From 155f556d64b1a48710f01305e14bb860734ed1e3 Mon Sep 17 00:00:00 2001
From: Rafael David Tinoco <rafaeldtinoco@ubuntu.com>
Date: Tue, 23 Mar 2021 01:09:52 -0300
Subject: libbpf: Add bpf object kern_version attribute setter

Unfortunately some distros don't have their kernel version defined
accurately in <linux/version.h> due to different long term support
reasons.

It is important to have a way to override the bpf kern_version
attribute during runtime: some old kernels might still check for
kern_version attribute during bpf_prog_load().

Signed-off-by: Rafael David Tinoco <rafaeldtinoco@ubuntu.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210323040952.2118241-1-rafaeldtinoco@ubuntu.com
---
 tools/lib/bpf/libbpf.c   | 10 ++++++++++
 tools/lib/bpf/libbpf.h   |  1 +
 tools/lib/bpf/libbpf.map |  1 +
 3 files changed, 12 insertions(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 10a0a67699f1..cebb0e852cf8 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8270,6 +8270,16 @@ int bpf_object__btf_fd(const struct bpf_object *obj)
 	return obj->btf ? btf__fd(obj->btf) : -1;
 }
 
+int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
+{
+	if (obj->loaded)
+		return -EINVAL;
+
+	obj->kern_version = kern_version;
+
+	return 0;
+}
+
 int bpf_object__set_priv(struct bpf_object *obj, void *priv,
 			 bpf_object_clear_priv_t clear_priv)
 {
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 89ade7d7b31c..f500621d28e5 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -143,6 +143,7 @@ LIBBPF_API int bpf_object__unload(struct bpf_object *obj);
 
 LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj);
 LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj);
+LIBBPF_API int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version);
 
 struct btf;
 LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 279ae861f568..f5990f7208ce 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -359,4 +359,5 @@ LIBBPF_0.4.0 {
 		bpf_linker__finalize;
 		bpf_linker__free;
 		bpf_linker__new;
+		bpf_object__set_kversion;
 } LIBBPF_0.3.0;
-- 
cgit v1.2.3


From 6def6e47e24f53a9b1f1666fd36c37088c066cec Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@nvidia.com>
Date: Mon, 15 Mar 2021 11:27:01 -0700
Subject: net/mlx5e: alloc the correct size for indirection_rqt

The cited patch allocated the wrong size for the indirection_rqt table,
fix that.

Fixes: 2119bda642c4 ("net/mlx5e: allocate 'indirection_rqt' buffer dynamically")
CC: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 4bd882a1018c..dbc06c71c170 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -447,11 +447,11 @@ static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
 
 static int mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc)
 {
-	u32 *indirection_rqt, rqn;
 	struct mlx5e_priv *priv = hp->func_priv;
 	int i, ix, sz = MLX5E_INDIR_RQT_SIZE;
+	u32 *indirection_rqt, rqn;
 
-	indirection_rqt = kzalloc(sz, GFP_KERNEL);
+	indirection_rqt = kcalloc(sz, sizeof(*indirection_rqt), GFP_KERNEL);
 	if (!indirection_rqt)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From 6debae2a9d1179e54abd9f45afe39ed196ffc225 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Sun, 7 Mar 2021 15:13:23 +0200
Subject: net/mlx5e: Pass q_counter indentifier as parameter to rq_param
 builders

Pass q_counter idintifier, instead of reading it from mlx5e_priv
parameter.
This is a step towards removing the mlx5e_priv parameter from all
params function and logic in the next patches of the series.

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Aya Levin <ayal@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/params.h    |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/en/trap.c      |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c      | 13 ++++++++-----
 4 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index ea2cfb04b31a..97a5bf50a53b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -115,6 +115,7 @@ void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e
 void mlx5e_build_rq_param(struct mlx5e_priv *priv,
 			  struct mlx5e_params *params,
 			  struct mlx5e_xsk_param *xsk,
+			  u16 q_counter,
 			  struct mlx5e_rq_param *param);
 void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
 				 struct mlx5e_sq_param *param);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
index 41db93883fea..0bfbc8cbe840 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
@@ -245,7 +245,7 @@ static void mlx5e_build_trap_params(struct mlx5e_priv *priv, struct mlx5e_trap *
 	params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC;
 	mlx5e_init_rq_type_params(priv->mdev, params);
 	params->sw_mtu = priv->netdev->max_mtu;
-	mlx5e_build_rq_param(priv, params, NULL, &t->rq_param);
+	mlx5e_build_rq_param(priv, params, NULL, priv->q_counter, &t->rq_param);
 }
 
 static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index f4bce1365639..d84a2299adf4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -40,7 +40,7 @@ static void mlx5e_build_xsk_cparam(struct mlx5e_priv *priv,
 				   struct mlx5e_xsk_param *xsk,
 				   struct mlx5e_channel_param *cparam)
 {
-	mlx5e_build_rq_param(priv, params, xsk, &cparam->rq);
+	mlx5e_build_rq_param(priv, params, xsk, priv->q_counter, &cparam->rq);
 	mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index d40fc2672530..6aae2bce32f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2190,6 +2190,7 @@ static u8 mlx5e_get_rq_log_wq_sz(void *rqc)
 void mlx5e_build_rq_param(struct mlx5e_priv *priv,
 			  struct mlx5e_params *params,
 			  struct mlx5e_xsk_param *xsk,
+			  u16 q_counter,
 			  struct mlx5e_rq_param *param)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -2218,7 +2219,7 @@ void mlx5e_build_rq_param(struct mlx5e_priv *priv,
 	MLX5_SET(wq, wq, log_wq_stride,
 		 mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs));
 	MLX5_SET(wq, wq, pd,               mdev->mlx5e_res.hw_objs.pdn);
-	MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter);
+	MLX5_SET(rqc, rqc, counter_set_id, q_counter);
 	MLX5_SET(rqc, rqc, vsd,            params->vlan_strip_disable);
 	MLX5_SET(rqc, rqc, scatter_fcs,    params->scatter_fcs_en);
 
@@ -2227,6 +2228,7 @@ void mlx5e_build_rq_param(struct mlx5e_priv *priv,
 }
 
 static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv,
+				      u16 q_counter,
 				      struct mlx5e_rq_param *param)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -2236,7 +2238,7 @@ static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv,
 	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
 	MLX5_SET(wq, wq, log_wq_stride,
 		 mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1));
-	MLX5_SET(rqc, rqc, counter_set_id, priv->drop_rq_q_counter);
+	MLX5_SET(rqc, rqc, counter_set_id, q_counter);
 
 	param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
 }
@@ -2386,11 +2388,12 @@ static u8 mlx5e_build_async_icosq_log_wq_sz(struct net_device *netdev)
 
 static void mlx5e_build_channel_param(struct mlx5e_priv *priv,
 				      struct mlx5e_params *params,
+				      u16 q_counter,
 				      struct mlx5e_channel_param *cparam)
 {
 	u8 icosq_log_wq_sz, async_icosq_log_wq_sz;
 
-	mlx5e_build_rq_param(priv, params, NULL, &cparam->rq);
+	mlx5e_build_rq_param(priv, params, NULL, q_counter, &cparam->rq);
 
 	icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(params, &cparam->rq);
 	async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(priv->netdev);
@@ -2415,7 +2418,7 @@ int mlx5e_open_channels(struct mlx5e_priv *priv,
 	if (!chs->c || !cparam)
 		goto err_free;
 
-	mlx5e_build_channel_param(priv, &chs->params, cparam);
+	mlx5e_build_channel_param(priv, &chs->params, priv->q_counter, cparam);
 	for (i = 0; i < chs->num; i++) {
 		struct xsk_buff_pool *xsk_pool = NULL;
 
@@ -3376,7 +3379,7 @@ int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
 	struct mlx5e_cq *cq = &drop_rq->cq;
 	int err;
 
-	mlx5e_build_drop_rq_param(priv, &rq_param);
+	mlx5e_build_drop_rq_param(priv, priv->drop_rq_q_counter, &rq_param);
 
 	err = mlx5e_alloc_drop_cq(priv, cq, &cq_param);
 	if (err)
-- 
cgit v1.2.3


From b3a131c2a1602e0aa3fc6b0fdee519a997a9ca0e Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Sun, 7 Mar 2021 15:58:20 +0200
Subject: net/mlx5e: Move params logic into its dedicated file

Take params logic out of en_main.c, into the dedicated params.c.
Some functions are now hidden and become static.
No functional change here.

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: Aya Levin <ayal@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h       |  12 -
 .../net/ethernet/mellanox/mlx5/core/en/params.c    | 482 ++++++++++++++++++++-
 .../net/ethernet/mellanox/mlx5/core/en/params.h    |  30 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 454 -------------------
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   |   1 +
 .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c  |   1 +
 6 files changed, 497 insertions(+), 483 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 9ea3f3befe74..d1fca0670b12 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -919,8 +919,6 @@ struct mlx5e_profile {
 void mlx5e_build_ptys2ethtool_map(void);
 
 bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev);
-bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
-				struct mlx5e_params *params);
 
 void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
 void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s);
@@ -1024,14 +1022,6 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv);
 void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
 				   int num_channels);
 
-void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode);
-void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode);
-void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode);
-void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode);
-
-void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
-void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
-			       struct mlx5e_params *params);
 int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state);
 void mlx5e_activate_rq(struct mlx5e_rq *rq);
 void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
@@ -1177,8 +1167,6 @@ int mlx5e_netdev_change_profile(struct mlx5e_priv *priv,
 void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv);
 void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv);
 void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu);
-void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
-			   struct mlx5e_params *params);
 void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
 			    u16 num_channels);
 void mlx5e_rx_dim_work(struct work_struct *work);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 36381a2ed5a5..3a22f5760f3b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -3,10 +3,12 @@
 
 #include "en/params.h"
 #include "en/txrx.h"
-#include "en_accel/tls_rxtx.h"
+#include "en/port.h"
+#include "en_accel/en_accel.h"
+#include "accel/ipsec.h"
 
-static inline bool mlx5e_rx_is_xdp(struct mlx5e_params *params,
-				   struct mlx5e_xsk_param *xsk)
+static bool mlx5e_rx_is_xdp(struct mlx5e_params *params,
+			    struct mlx5e_xsk_param *xsk)
 {
 	return params->xdp_prog || xsk;
 }
@@ -37,8 +39,8 @@ u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params,
 	return linear_rq_headroom + hw_mtu;
 }
 
-u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params,
-				struct mlx5e_xsk_param *xsk)
+static u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params,
+				       struct mlx5e_xsk_param *xsk)
 {
 	u32 frag_sz = mlx5e_rx_get_min_frag_sz(params, xsk);
 
@@ -186,3 +188,473 @@ int mlx5e_validate_params(struct mlx5e_priv *priv, struct mlx5e_params *params)
 
 	return 0;
 }
+
+static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
+{
+	struct dim_cq_moder moder;
+
+	moder.cq_period_mode = cq_period_mode;
+	moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
+	moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
+	if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
+		moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE;
+
+	return moder;
+}
+
+static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
+{
+	struct dim_cq_moder moder;
+
+	moder.cq_period_mode = cq_period_mode;
+	moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
+	moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
+	if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
+		moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE;
+
+	return moder;
+}
+
+static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode)
+{
+	return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ?
+		DIM_CQ_PERIOD_MODE_START_FROM_CQE :
+		DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+}
+
+void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode)
+{
+	if (params->tx_dim_enabled) {
+		u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode);
+
+		params->tx_cq_moderation = net_dim_get_def_tx_moderation(dim_period_mode);
+	} else {
+		params->tx_cq_moderation = mlx5e_get_def_tx_moderation(cq_period_mode);
+	}
+}
+
+void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode)
+{
+	if (params->rx_dim_enabled) {
+		u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode);
+
+		params->rx_cq_moderation = net_dim_get_def_rx_moderation(dim_period_mode);
+	} else {
+		params->rx_cq_moderation = mlx5e_get_def_rx_moderation(cq_period_mode);
+	}
+}
+
+void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
+{
+	mlx5e_reset_tx_moderation(params, cq_period_mode);
+	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER,
+			params->tx_cq_moderation.cq_period_mode ==
+				MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
+}
+
+void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
+{
+	mlx5e_reset_rx_moderation(params, cq_period_mode);
+	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER,
+			params->rx_cq_moderation.cq_period_mode ==
+				MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
+}
+
+bool slow_pci_heuristic(struct mlx5_core_dev *mdev)
+{
+	u32 link_speed = 0;
+	u32 pci_bw = 0;
+
+	mlx5e_port_max_linkspeed(mdev, &link_speed);
+	pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL);
+	mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n",
+			   link_speed, pci_bw);
+
+#define MLX5E_SLOW_PCI_RATIO (2)
+
+	return link_speed && pci_bw &&
+		link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw;
+}
+
+bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
+				struct mlx5e_params *params)
+{
+	if (!mlx5e_check_fragmented_striding_rq_cap(mdev))
+		return false;
+
+	if (MLX5_IPSEC_DEV(mdev))
+		return false;
+
+	if (params->xdp_prog) {
+		/* XSK params are not considered here. If striding RQ is in use,
+		 * and an XSK is being opened, mlx5e_rx_mpwqe_is_linear_skb will
+		 * be called with the known XSK params.
+		 */
+		if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
+			return false;
+	}
+
+	return true;
+}
+
+void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
+			       struct mlx5e_params *params)
+{
+	params->log_rq_mtu_frames = is_kdump_kernel() ?
+		MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
+		MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
+
+	mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
+		       params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
+		       params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ?
+		       BIT(mlx5e_mpwqe_get_log_rq_size(params, NULL)) :
+		       BIT(params->log_rq_mtu_frames),
+		       BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)),
+		       MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
+}
+
+void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+{
+	params->rq_wq_type = mlx5e_striding_rq_possible(mdev, params) &&
+		MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ?
+		MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
+		MLX5_WQ_TYPE_CYCLIC;
+}
+
+void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
+			   struct mlx5e_params *params)
+{
+	/* Prefer Striding RQ, unless any of the following holds:
+	 * - Striding RQ configuration is not possible/supported.
+	 * - Slow PCI heuristic.
+	 * - Legacy RQ would use linear SKB while Striding RQ would use non-linear.
+	 *
+	 * No XSK params: checking the availability of striding RQ in general.
+	 */
+	if (!slow_pci_heuristic(mdev) &&
+	    mlx5e_striding_rq_possible(mdev, params) &&
+	    (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ||
+	     !mlx5e_rx_is_linear_skb(params, NULL)))
+		MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true);
+	mlx5e_set_rq_type(mdev, params);
+	mlx5e_init_rq_type_params(mdev, params);
+}
+
+/* Build queue parameters */
+
+void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e_channel *c)
+{
+	*ccp = (struct mlx5e_create_cq_param) {
+		.napi = &c->napi,
+		.ch_stats = c->stats,
+		.node = cpu_to_node(c->cpu),
+		.ix = c->ix,
+	};
+}
+
+#define DEFAULT_FRAG_SIZE (2048)
+
+static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
+				      struct mlx5e_params *params,
+				      struct mlx5e_xsk_param *xsk,
+				      struct mlx5e_rq_frags_info *info)
+{
+	u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+	int frag_size_max = DEFAULT_FRAG_SIZE;
+	u32 buf_size = 0;
+	int i;
+
+	if (MLX5_IPSEC_DEV(mdev))
+		byte_count += MLX5E_METADATA_ETHER_LEN;
+
+	if (mlx5e_rx_is_linear_skb(params, xsk)) {
+		int frag_stride;
+
+		frag_stride = mlx5e_rx_get_linear_frag_sz(params, xsk);
+		frag_stride = roundup_pow_of_two(frag_stride);
+
+		info->arr[0].frag_size = byte_count;
+		info->arr[0].frag_stride = frag_stride;
+		info->num_frags = 1;
+		info->wqe_bulk = PAGE_SIZE / frag_stride;
+		goto out;
+	}
+
+	if (byte_count > PAGE_SIZE +
+	    (MLX5E_MAX_RX_FRAGS - 1) * frag_size_max)
+		frag_size_max = PAGE_SIZE;
+
+	i = 0;
+	while (buf_size < byte_count) {
+		int frag_size = byte_count - buf_size;
+
+		if (i < MLX5E_MAX_RX_FRAGS - 1)
+			frag_size = min(frag_size, frag_size_max);
+
+		info->arr[i].frag_size = frag_size;
+		info->arr[i].frag_stride = roundup_pow_of_two(frag_size);
+
+		buf_size += frag_size;
+		i++;
+	}
+	info->num_frags = i;
+	/* number of different wqes sharing a page */
+	info->wqe_bulk = 1 + (info->num_frags % 2);
+
+out:
+	info->wqe_bulk = max_t(u8, info->wqe_bulk, 8);
+	info->log_num_frags = order_base_2(info->num_frags);
+}
+
+static u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs)
+{
+	int sz = sizeof(struct mlx5_wqe_data_seg) * ndsegs;
+
+	switch (wq_type) {
+	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+		sz += sizeof(struct mlx5e_rx_wqe_ll);
+		break;
+	default: /* MLX5_WQ_TYPE_CYCLIC */
+		sz += sizeof(struct mlx5e_rx_wqe_cyc);
+	}
+
+	return order_base_2(sz);
+}
+
+static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
+					struct mlx5e_cq_param *param)
+{
+	void *cqc = param->cqc;
+
+	MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index);
+	if (MLX5_CAP_GEN(priv->mdev, cqe_128_always) && cache_line_size() >= 128)
+		MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD);
+}
+
+static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
+				    struct mlx5e_params *params,
+				    struct mlx5e_xsk_param *xsk,
+				    struct mlx5e_cq_param *param)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	bool hw_stridx = false;
+	void *cqc = param->cqc;
+	u8 log_cq_size;
+
+	switch (params->rq_wq_type) {
+	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+		log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) +
+			mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
+		hw_stridx = MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index);
+		break;
+	default: /* MLX5_WQ_TYPE_CYCLIC */
+		log_cq_size = params->log_rq_mtu_frames;
+	}
+
+	MLX5_SET(cqc, cqc, log_cq_size, log_cq_size);
+	if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
+		MLX5_SET(cqc, cqc, mini_cqe_res_format, hw_stridx ?
+			 MLX5_CQE_FORMAT_CSUM_STRIDX : MLX5_CQE_FORMAT_CSUM);
+		MLX5_SET(cqc, cqc, cqe_comp_en, 1);
+	}
+
+	mlx5e_build_common_cq_param(priv, param);
+	param->cq_period_mode = params->rx_cq_moderation.cq_period_mode;
+}
+
+void mlx5e_build_rq_param(struct mlx5e_priv *priv,
+			  struct mlx5e_params *params,
+			  struct mlx5e_xsk_param *xsk,
+			  u16 q_counter,
+			  struct mlx5e_rq_param *param)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	void *rqc = param->rqc;
+	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
+	int ndsegs = 1;
+
+	switch (params->rq_wq_type) {
+	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+		MLX5_SET(wq, wq, log_wqe_num_of_strides,
+			 mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk) -
+			 MLX5_MPWQE_LOG_NUM_STRIDES_BASE);
+		MLX5_SET(wq, wq, log_wqe_stride_size,
+			 mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk) -
+			 MLX5_MPWQE_LOG_STRIDE_SZ_BASE);
+		MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk));
+		break;
+	default: /* MLX5_WQ_TYPE_CYCLIC */
+		MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames);
+		mlx5e_build_rq_frags_info(mdev, params, xsk, &param->frags_info);
+		ndsegs = param->frags_info.num_frags;
+	}
+
+	MLX5_SET(wq, wq, wq_type,          params->rq_wq_type);
+	MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
+	MLX5_SET(wq, wq, log_wq_stride,
+		 mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs));
+	MLX5_SET(wq, wq, pd,               mdev->mlx5e_res.hw_objs.pdn);
+	MLX5_SET(rqc, rqc, counter_set_id, q_counter);
+	MLX5_SET(rqc, rqc, vsd,            params->vlan_strip_disable);
+	MLX5_SET(rqc, rqc, scatter_fcs,    params->scatter_fcs_en);
+
+	param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
+	mlx5e_build_rx_cq_param(priv, params, xsk, &param->cqp);
+}
+
+void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv,
+			       u16 q_counter,
+			       struct mlx5e_rq_param *param)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	void *rqc = param->rqc;
+	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
+
+	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+	MLX5_SET(wq, wq, log_wq_stride,
+		 mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1));
+	MLX5_SET(rqc, rqc, counter_set_id, q_counter);
+
+	param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
+}
+
+void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
+			     struct mlx5e_params *params,
+			     struct mlx5e_cq_param *param)
+{
+	void *cqc = param->cqc;
+
+	MLX5_SET(cqc, cqc, log_cq_size, params->log_sq_size);
+
+	mlx5e_build_common_cq_param(priv, param);
+	param->cq_period_mode = params->tx_cq_moderation.cq_period_mode;
+}
+
+void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
+				 struct mlx5e_sq_param *param)
+{
+	void *sqc = param->sqc;
+	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
+	MLX5_SET(wq, wq, pd,            priv->mdev->mlx5e_res.hw_objs.pdn);
+
+	param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(priv->mdev));
+}
+
+void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_params *params,
+			  struct mlx5e_sq_param *param)
+{
+	void *sqc = param->sqc;
+	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+	bool allow_swp;
+
+	allow_swp = mlx5_geneve_tx_allowed(priv->mdev) ||
+		    !!MLX5_IPSEC_DEV(priv->mdev);
+	mlx5e_build_sq_param_common(priv, param);
+	MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
+	MLX5_SET(sqc, sqc, allow_swp, allow_swp);
+	param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE);
+	param->stop_room = mlx5e_calc_sq_stop_room(priv->mdev, params);
+	mlx5e_build_tx_cq_param(priv, params, &param->cqp);
+}
+
+static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
+				     u8 log_wq_size,
+				     struct mlx5e_cq_param *param)
+{
+	void *cqc = param->cqc;
+
+	MLX5_SET(cqc, cqc, log_cq_size, log_wq_size);
+
+	mlx5e_build_common_cq_param(priv, param);
+
+	param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+}
+
+static u8 mlx5e_get_rq_log_wq_sz(void *rqc)
+{
+	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
+
+	return MLX5_GET(wq, wq, log_wq_sz);
+}
+
+static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5e_params *params,
+				      struct mlx5e_rq_param *rqp)
+{
+	switch (params->rq_wq_type) {
+	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+		return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE,
+			     order_base_2(MLX5E_UMR_WQEBBS) +
+			     mlx5e_get_rq_log_wq_sz(rqp->rqc));
+	default: /* MLX5_WQ_TYPE_CYCLIC */
+		return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+	}
+}
+
+static u8 mlx5e_build_async_icosq_log_wq_sz(struct net_device *netdev)
+{
+	if (netdev->hw_features & NETIF_F_HW_TLS_RX)
+		return MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
+
+	return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+}
+
+static void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
+				    u8 log_wq_size,
+				    struct mlx5e_sq_param *param)
+{
+	void *sqc = param->sqc;
+	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+	mlx5e_build_sq_param_common(priv, param);
+
+	MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
+	MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq));
+	mlx5e_build_ico_cq_param(priv, log_wq_size, &param->cqp);
+}
+
+static void mlx5e_build_async_icosq_param(struct mlx5e_priv *priv,
+					  u8 log_wq_size,
+					  struct mlx5e_sq_param *param)
+{
+	void *sqc = param->sqc;
+	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+	mlx5e_build_sq_param_common(priv, param);
+	param->stop_room = mlx5e_stop_room_for_wqe(1); /* for XSK NOP */
+	MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq));
+	MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
+	mlx5e_build_ico_cq_param(priv, log_wq_size, &param->cqp);
+}
+
+void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
+			     struct mlx5e_params *params,
+			     struct mlx5e_sq_param *param)
+{
+	void *sqc = param->sqc;
+	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+	mlx5e_build_sq_param_common(priv, param);
+	MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
+	param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE);
+	mlx5e_build_tx_cq_param(priv, params, &param->cqp);
+}
+
+void mlx5e_build_channel_param(struct mlx5e_priv *priv,
+			       struct mlx5e_params *params,
+			       u16 q_counter,
+			       struct mlx5e_channel_param *cparam)
+{
+	u8 icosq_log_wq_sz, async_icosq_log_wq_sz;
+
+	mlx5e_build_rq_param(priv, params, NULL, q_counter, &cparam->rq);
+
+	icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(params, &cparam->rq);
+	async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(priv->netdev);
+
+	mlx5e_build_sq_param(priv, params, &cparam->txq_sq);
+	mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq);
+	mlx5e_build_icosq_param(priv, icosq_log_wq_sz, &cparam->icosq);
+	mlx5e_build_async_icosq_param(priv, async_icosq_log_wq_sz, &cparam->async_icosq);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index 97a5bf50a53b..a43776e8a86b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -84,12 +84,21 @@ static inline bool mlx5e_qid_validate(const struct mlx5e_profile *profile,
 
 /* Parameter calculations */
 
+void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode);
+void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode);
+void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode);
+void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode);
+
+bool slow_pci_heuristic(struct mlx5_core_dev *mdev);
+bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+
 u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
 				 struct mlx5e_xsk_param *xsk);
 u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params,
 			     struct mlx5e_xsk_param *xsk);
-u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params,
-				struct mlx5e_xsk_param *xsk);
 u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params,
 				struct mlx5e_xsk_param *xsk);
 bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
@@ -117,26 +126,23 @@ void mlx5e_build_rq_param(struct mlx5e_priv *priv,
 			  struct mlx5e_xsk_param *xsk,
 			  u16 q_counter,
 			  struct mlx5e_rq_param *param);
+void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv,
+			       u16 q_counter,
+			       struct mlx5e_rq_param *param);
 void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
 				 struct mlx5e_sq_param *param);
 void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_params *params,
 			  struct mlx5e_sq_param *param);
-void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
-			     struct mlx5e_params *params,
-			     struct mlx5e_xsk_param *xsk,
-			     struct mlx5e_cq_param *param);
 void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
 			     struct mlx5e_params *params,
 			     struct mlx5e_cq_param *param);
-void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
-			      u8 log_wq_size,
-			      struct mlx5e_cq_param *param);
-void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
-			     u8 log_wq_size,
-			     struct mlx5e_sq_param *param);
 void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
 			     struct mlx5e_params *params,
 			     struct mlx5e_sq_param *param);
+void mlx5e_build_channel_param(struct mlx5e_priv *priv,
+			       struct mlx5e_params *params,
+			       u16 q_counter,
+			       struct mlx5e_channel_param *cparam);
 
 u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
 int mlx5e_validate_params(struct mlx5e_priv *priv, struct mlx5e_params *params);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 6aae2bce32f3..efa4ba1f7260 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -87,51 +87,6 @@ bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
 	return true;
 }
 
-void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
-			       struct mlx5e_params *params)
-{
-	params->log_rq_mtu_frames = is_kdump_kernel() ?
-		MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
-		MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
-
-	mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
-		       params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
-		       params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ?
-		       BIT(mlx5e_mpwqe_get_log_rq_size(params, NULL)) :
-		       BIT(params->log_rq_mtu_frames),
-		       BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)),
-		       MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
-}
-
-bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
-				struct mlx5e_params *params)
-{
-	if (!mlx5e_check_fragmented_striding_rq_cap(mdev))
-		return false;
-
-	if (mlx5_fpga_is_ipsec_device(mdev))
-		return false;
-
-	if (params->xdp_prog) {
-		/* XSK params are not considered here. If striding RQ is in use,
-		 * and an XSK is being opened, mlx5e_rx_mpwqe_is_linear_skb will
-		 * be called with the known XSK params.
-		 */
-		if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
-			return false;
-	}
-
-	return true;
-}
-
-void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
-{
-	params->rq_wq_type = mlx5e_striding_rq_possible(mdev, params) &&
-		MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ?
-		MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
-		MLX5_WQ_TYPE_CYCLIC;
-}
-
 void mlx5e_update_carrier(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -1860,16 +1815,6 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
 	return err;
 }
 
-void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e_channel *c)
-{
-	*ccp = (struct mlx5e_create_cq_param) {
-		.napi = &c->napi,
-		.ch_stats = c->stats,
-		.node = cpu_to_node(c->cpu),
-		.ix = c->ix,
-	};
-}
-
 static int mlx5e_open_queues(struct mlx5e_channel *c,
 			     struct mlx5e_params *params,
 			     struct mlx5e_channel_param *cparam)
@@ -2111,299 +2056,6 @@ static void mlx5e_close_channel(struct mlx5e_channel *c)
 	kvfree(c);
 }
 
-#define DEFAULT_FRAG_SIZE (2048)
-
-static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
-				      struct mlx5e_params *params,
-				      struct mlx5e_xsk_param *xsk,
-				      struct mlx5e_rq_frags_info *info)
-{
-	u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu);
-	int frag_size_max = DEFAULT_FRAG_SIZE;
-	u32 buf_size = 0;
-	int i;
-
-	if (mlx5_fpga_is_ipsec_device(mdev))
-		byte_count += MLX5E_METADATA_ETHER_LEN;
-
-	if (mlx5e_rx_is_linear_skb(params, xsk)) {
-		int frag_stride;
-
-		frag_stride = mlx5e_rx_get_linear_frag_sz(params, xsk);
-		frag_stride = roundup_pow_of_two(frag_stride);
-
-		info->arr[0].frag_size = byte_count;
-		info->arr[0].frag_stride = frag_stride;
-		info->num_frags = 1;
-		info->wqe_bulk = PAGE_SIZE / frag_stride;
-		goto out;
-	}
-
-	if (byte_count > PAGE_SIZE +
-	    (MLX5E_MAX_RX_FRAGS - 1) * frag_size_max)
-		frag_size_max = PAGE_SIZE;
-
-	i = 0;
-	while (buf_size < byte_count) {
-		int frag_size = byte_count - buf_size;
-
-		if (i < MLX5E_MAX_RX_FRAGS - 1)
-			frag_size = min(frag_size, frag_size_max);
-
-		info->arr[i].frag_size = frag_size;
-		info->arr[i].frag_stride = roundup_pow_of_two(frag_size);
-
-		buf_size += frag_size;
-		i++;
-	}
-	info->num_frags = i;
-	/* number of different wqes sharing a page */
-	info->wqe_bulk = 1 + (info->num_frags % 2);
-
-out:
-	info->wqe_bulk = max_t(u8, info->wqe_bulk, 8);
-	info->log_num_frags = order_base_2(info->num_frags);
-}
-
-static inline u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs)
-{
-	int sz = sizeof(struct mlx5_wqe_data_seg) * ndsegs;
-
-	switch (wq_type) {
-	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-		sz += sizeof(struct mlx5e_rx_wqe_ll);
-		break;
-	default: /* MLX5_WQ_TYPE_CYCLIC */
-		sz += sizeof(struct mlx5e_rx_wqe_cyc);
-	}
-
-	return order_base_2(sz);
-}
-
-static u8 mlx5e_get_rq_log_wq_sz(void *rqc)
-{
-	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
-
-	return MLX5_GET(wq, wq, log_wq_sz);
-}
-
-void mlx5e_build_rq_param(struct mlx5e_priv *priv,
-			  struct mlx5e_params *params,
-			  struct mlx5e_xsk_param *xsk,
-			  u16 q_counter,
-			  struct mlx5e_rq_param *param)
-{
-	struct mlx5_core_dev *mdev = priv->mdev;
-	void *rqc = param->rqc;
-	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
-	int ndsegs = 1;
-
-	switch (params->rq_wq_type) {
-	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-		MLX5_SET(wq, wq, log_wqe_num_of_strides,
-			 mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk) -
-			 MLX5_MPWQE_LOG_NUM_STRIDES_BASE);
-		MLX5_SET(wq, wq, log_wqe_stride_size,
-			 mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk) -
-			 MLX5_MPWQE_LOG_STRIDE_SZ_BASE);
-		MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk));
-		break;
-	default: /* MLX5_WQ_TYPE_CYCLIC */
-		MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames);
-		mlx5e_build_rq_frags_info(mdev, params, xsk, &param->frags_info);
-		ndsegs = param->frags_info.num_frags;
-	}
-
-	MLX5_SET(wq, wq, wq_type,          params->rq_wq_type);
-	MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
-	MLX5_SET(wq, wq, log_wq_stride,
-		 mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs));
-	MLX5_SET(wq, wq, pd,               mdev->mlx5e_res.hw_objs.pdn);
-	MLX5_SET(rqc, rqc, counter_set_id, q_counter);
-	MLX5_SET(rqc, rqc, vsd,            params->vlan_strip_disable);
-	MLX5_SET(rqc, rqc, scatter_fcs,    params->scatter_fcs_en);
-
-	param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
-	mlx5e_build_rx_cq_param(priv, params, xsk, &param->cqp);
-}
-
-static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv,
-				      u16 q_counter,
-				      struct mlx5e_rq_param *param)
-{
-	struct mlx5_core_dev *mdev = priv->mdev;
-	void *rqc = param->rqc;
-	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
-
-	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
-	MLX5_SET(wq, wq, log_wq_stride,
-		 mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1));
-	MLX5_SET(rqc, rqc, counter_set_id, q_counter);
-
-	param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
-}
-
-void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
-				 struct mlx5e_sq_param *param)
-{
-	void *sqc = param->sqc;
-	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
-
-	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
-	MLX5_SET(wq, wq, pd,            priv->mdev->mlx5e_res.hw_objs.pdn);
-
-	param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(priv->mdev));
-}
-
-void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_params *params,
-			  struct mlx5e_sq_param *param)
-{
-	void *sqc = param->sqc;
-	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
-	bool allow_swp;
-
-	allow_swp = mlx5_geneve_tx_allowed(priv->mdev) ||
-		    !!MLX5_IPSEC_DEV(priv->mdev);
-	mlx5e_build_sq_param_common(priv, param);
-	MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
-	MLX5_SET(sqc, sqc, allow_swp, allow_swp);
-	param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE);
-	param->stop_room = mlx5e_calc_sq_stop_room(priv->mdev, params);
-	mlx5e_build_tx_cq_param(priv, params, &param->cqp);
-}
-
-static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
-					struct mlx5e_cq_param *param)
-{
-	void *cqc = param->cqc;
-
-	MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index);
-	if (MLX5_CAP_GEN(priv->mdev, cqe_128_always) && cache_line_size() >= 128)
-		MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD);
-}
-
-void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
-			     struct mlx5e_params *params,
-			     struct mlx5e_xsk_param *xsk,
-			     struct mlx5e_cq_param *param)
-{
-	struct mlx5_core_dev *mdev = priv->mdev;
-	bool hw_stridx = false;
-	void *cqc = param->cqc;
-	u8 log_cq_size;
-
-	switch (params->rq_wq_type) {
-	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-		log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) +
-			mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
-		hw_stridx = MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index);
-		break;
-	default: /* MLX5_WQ_TYPE_CYCLIC */
-		log_cq_size = params->log_rq_mtu_frames;
-	}
-
-	MLX5_SET(cqc, cqc, log_cq_size, log_cq_size);
-	if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
-		MLX5_SET(cqc, cqc, mini_cqe_res_format, hw_stridx ?
-			 MLX5_CQE_FORMAT_CSUM_STRIDX : MLX5_CQE_FORMAT_CSUM);
-		MLX5_SET(cqc, cqc, cqe_comp_en, 1);
-	}
-
-	mlx5e_build_common_cq_param(priv, param);
-	param->cq_period_mode = params->rx_cq_moderation.cq_period_mode;
-}
-
-void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
-			     struct mlx5e_params *params,
-			     struct mlx5e_cq_param *param)
-{
-	void *cqc = param->cqc;
-
-	MLX5_SET(cqc, cqc, log_cq_size, params->log_sq_size);
-
-	mlx5e_build_common_cq_param(priv, param);
-	param->cq_period_mode = params->tx_cq_moderation.cq_period_mode;
-}
-
-void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
-			      u8 log_wq_size,
-			      struct mlx5e_cq_param *param)
-{
-	void *cqc = param->cqc;
-
-	MLX5_SET(cqc, cqc, log_cq_size, log_wq_size);
-
-	mlx5e_build_common_cq_param(priv, param);
-
-	param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
-}
-
-void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
-			     u8 log_wq_size,
-			     struct mlx5e_sq_param *param)
-{
-	void *sqc = param->sqc;
-	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
-
-	mlx5e_build_sq_param_common(priv, param);
-
-	MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
-	MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq));
-	mlx5e_build_ico_cq_param(priv, log_wq_size, &param->cqp);
-}
-
-void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
-			     struct mlx5e_params *params,
-			     struct mlx5e_sq_param *param)
-{
-	void *sqc = param->sqc;
-	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
-
-	mlx5e_build_sq_param_common(priv, param);
-	MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
-	param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE);
-	mlx5e_build_tx_cq_param(priv, params, &param->cqp);
-}
-
-static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5e_params *params,
-				      struct mlx5e_rq_param *rqp)
-{
-	switch (params->rq_wq_type) {
-	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-		return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE,
-			     order_base_2(MLX5E_UMR_WQEBBS) +
-			     mlx5e_get_rq_log_wq_sz(rqp->rqc));
-	default: /* MLX5_WQ_TYPE_CYCLIC */
-		return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
-	}
-}
-
-static u8 mlx5e_build_async_icosq_log_wq_sz(struct net_device *netdev)
-{
-	if (netdev->hw_features & NETIF_F_HW_TLS_RX)
-		return MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
-
-	return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
-}
-
-static void mlx5e_build_channel_param(struct mlx5e_priv *priv,
-				      struct mlx5e_params *params,
-				      u16 q_counter,
-				      struct mlx5e_channel_param *cparam)
-{
-	u8 icosq_log_wq_sz, async_icosq_log_wq_sz;
-
-	mlx5e_build_rq_param(priv, params, NULL, q_counter, &cparam->rq);
-
-	icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(params, &cparam->rq);
-	async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(priv->netdev);
-
-	mlx5e_build_sq_param(priv, params, &cparam->txq_sq);
-	mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq);
-	mlx5e_build_icosq_param(priv, icosq_log_wq_sz, &cparam->icosq);
-	mlx5e_build_icosq_param(priv, async_icosq_log_wq_sz, &cparam->async_icosq);
-}
-
 int mlx5e_open_channels(struct mlx5e_priv *priv,
 			struct mlx5e_channels *chs)
 {
@@ -4887,93 +4539,6 @@ void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
 		indirection_rqt[i] = i % num_channels;
 }
 
-static bool slow_pci_heuristic(struct mlx5_core_dev *mdev)
-{
-	u32 link_speed = 0;
-	u32 pci_bw = 0;
-
-	mlx5e_port_max_linkspeed(mdev, &link_speed);
-	pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL);
-	mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n",
-			   link_speed, pci_bw);
-
-#define MLX5E_SLOW_PCI_RATIO (2)
-
-	return link_speed && pci_bw &&
-		link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw;
-}
-
-static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
-{
-	struct dim_cq_moder moder;
-
-	moder.cq_period_mode = cq_period_mode;
-	moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
-	moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
-	if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
-		moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE;
-
-	return moder;
-}
-
-static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
-{
-	struct dim_cq_moder moder;
-
-	moder.cq_period_mode = cq_period_mode;
-	moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
-	moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
-	if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
-		moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE;
-
-	return moder;
-}
-
-static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode)
-{
-	return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ?
-		DIM_CQ_PERIOD_MODE_START_FROM_CQE :
-		DIM_CQ_PERIOD_MODE_START_FROM_EQE;
-}
-
-void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode)
-{
-	if (params->tx_dim_enabled) {
-		u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode);
-
-		params->tx_cq_moderation = net_dim_get_def_tx_moderation(dim_period_mode);
-	} else {
-		params->tx_cq_moderation = mlx5e_get_def_tx_moderation(cq_period_mode);
-	}
-}
-
-void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode)
-{
-	if (params->rx_dim_enabled) {
-		u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode);
-
-		params->rx_cq_moderation = net_dim_get_def_rx_moderation(dim_period_mode);
-	} else {
-		params->rx_cq_moderation = mlx5e_get_def_rx_moderation(cq_period_mode);
-	}
-}
-
-void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
-{
-	mlx5e_reset_tx_moderation(params, cq_period_mode);
-	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER,
-			params->tx_cq_moderation.cq_period_mode ==
-				MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
-}
-
-void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
-{
-	mlx5e_reset_rx_moderation(params, cq_period_mode);
-	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER,
-			params->rx_cq_moderation.cq_period_mode ==
-				MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
-}
-
 static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
 {
 	int i;
@@ -4986,25 +4551,6 @@ static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeo
 	return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]);
 }
 
-void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
-			   struct mlx5e_params *params)
-{
-	/* Prefer Striding RQ, unless any of the following holds:
-	 * - Striding RQ configuration is not possible/supported.
-	 * - Slow PCI heuristic.
-	 * - Legacy RQ would use linear SKB while Striding RQ would use non-linear.
-	 *
-	 * No XSK params: checking the availability of striding RQ in general.
-	 */
-	if (!slow_pci_heuristic(mdev) &&
-	    mlx5e_striding_rq_possible(mdev, params) &&
-	    (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ||
-	     !mlx5e_rx_is_linear_skb(params, NULL)))
-		MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true);
-	mlx5e_set_rq_type(mdev, params);
-	mlx5e_init_rq_type_params(mdev, params);
-}
-
 void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
 			    u16 num_channels)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 4cc902e0d71b..e5123e9182c0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -40,6 +40,7 @@
 #include "eswitch.h"
 #include "en.h"
 #include "en_rep.h"
+#include "en/params.h"
 #include "en/txrx.h"
 #include "en_tc.h"
 #include "en/rep/tc.h"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 48303286c133..79f26b56e0a4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -33,6 +33,7 @@
 #include <rdma/ib_verbs.h>
 #include <linux/mlx5/fs.h>
 #include "en.h"
+#include "en/params.h"
 #include "ipoib.h"
 
 #define IB_DEFAULT_Q_KEY   0xb1b
-- 
cgit v1.2.3


From 895649201845508895be99b82f9c2b9282019516 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Wed, 10 Mar 2021 14:46:59 +0200
Subject: net/mlx5e: Restrict usage of mlx5e_priv in params logic functions

Do not use generic struct mlx5e_priv as a parameter to param
functions, as it is too generic. All calculations of the channel's
param should be mainly based on struct mlx5_core_dev and
struct mlx5e_params. Additional info can be explicitly passed.

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/en/params.c    | 94 +++++++++++-----------
 .../net/ethernet/mellanox/mlx5/core/en/params.h    | 17 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c   |  8 +-
 drivers/net/ethernet/mellanox/mlx5/core/en/qos.c   |  4 +-
 drivers/net/ethernet/mellanox/mlx5/core/en/trap.c  | 12 +--
 .../net/ethernet/mellanox/mlx5/core/en/xsk/setup.c |  9 ++-
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  6 +-
 8 files changed, 77 insertions(+), 75 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 3a22f5760f3b..7b2b52e75222 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -174,15 +174,15 @@ u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *par
 	return stop_room;
 }
 
-int mlx5e_validate_params(struct mlx5e_priv *priv, struct mlx5e_params *params)
+int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
 {
 	size_t sq_size = 1 << params->log_sq_size;
 	u16 stop_room;
 
-	stop_room = mlx5e_calc_sq_stop_room(priv->mdev, params);
+	stop_room = mlx5e_calc_sq_stop_room(mdev, params);
 	if (stop_room >= sq_size) {
-		netdev_err(priv->netdev, "Stop room %u is bigger than the SQ size %zu\n",
-			   stop_room, sq_size);
+		mlx5_core_err(mdev, "Stop room %u is bigger than the SQ size %zu\n",
+			      stop_room, sq_size);
 		return -EINVAL;
 	}
 
@@ -421,22 +421,21 @@ static u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs)
 	return order_base_2(sz);
 }
 
-static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
+static void mlx5e_build_common_cq_param(struct mlx5_core_dev *mdev,
 					struct mlx5e_cq_param *param)
 {
 	void *cqc = param->cqc;
 
-	MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index);
-	if (MLX5_CAP_GEN(priv->mdev, cqe_128_always) && cache_line_size() >= 128)
+	MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index);
+	if (MLX5_CAP_GEN(mdev, cqe_128_always) && cache_line_size() >= 128)
 		MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD);
 }
 
-static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
+static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev,
 				    struct mlx5e_params *params,
 				    struct mlx5e_xsk_param *xsk,
 				    struct mlx5e_cq_param *param)
 {
-	struct mlx5_core_dev *mdev = priv->mdev;
 	bool hw_stridx = false;
 	void *cqc = param->cqc;
 	u8 log_cq_size;
@@ -458,17 +457,16 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
 		MLX5_SET(cqc, cqc, cqe_comp_en, 1);
 	}
 
-	mlx5e_build_common_cq_param(priv, param);
+	mlx5e_build_common_cq_param(mdev, param);
 	param->cq_period_mode = params->rx_cq_moderation.cq_period_mode;
 }
 
-void mlx5e_build_rq_param(struct mlx5e_priv *priv,
+void mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
 			  struct mlx5e_params *params,
 			  struct mlx5e_xsk_param *xsk,
 			  u16 q_counter,
 			  struct mlx5e_rq_param *param)
 {
-	struct mlx5_core_dev *mdev = priv->mdev;
 	void *rqc = param->rqc;
 	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
 	int ndsegs = 1;
@@ -499,14 +497,13 @@ void mlx5e_build_rq_param(struct mlx5e_priv *priv,
 	MLX5_SET(rqc, rqc, scatter_fcs,    params->scatter_fcs_en);
 
 	param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
-	mlx5e_build_rx_cq_param(priv, params, xsk, &param->cqp);
+	mlx5e_build_rx_cq_param(mdev, params, xsk, &param->cqp);
 }
 
-void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv,
+void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev,
 			       u16 q_counter,
 			       struct mlx5e_rq_param *param)
 {
-	struct mlx5_core_dev *mdev = priv->mdev;
 	void *rqc = param->rqc;
 	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
 
@@ -518,7 +515,7 @@ void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv,
 	param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
 }
 
-void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
+void mlx5e_build_tx_cq_param(struct mlx5_core_dev *mdev,
 			     struct mlx5e_params *params,
 			     struct mlx5e_cq_param *param)
 {
@@ -526,40 +523,41 @@ void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
 
 	MLX5_SET(cqc, cqc, log_cq_size, params->log_sq_size);
 
-	mlx5e_build_common_cq_param(priv, param);
+	mlx5e_build_common_cq_param(mdev, param);
 	param->cq_period_mode = params->tx_cq_moderation.cq_period_mode;
 }
 
-void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
+void mlx5e_build_sq_param_common(struct mlx5_core_dev *mdev,
 				 struct mlx5e_sq_param *param)
 {
 	void *sqc = param->sqc;
 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
 
 	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
-	MLX5_SET(wq, wq, pd,            priv->mdev->mlx5e_res.hw_objs.pdn);
+	MLX5_SET(wq, wq, pd,            mdev->mlx5e_res.hw_objs.pdn);
 
-	param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(priv->mdev));
+	param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
 }
 
-void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_params *params,
+void mlx5e_build_sq_param(struct mlx5_core_dev *mdev,
+			  struct mlx5e_params *params,
 			  struct mlx5e_sq_param *param)
 {
 	void *sqc = param->sqc;
 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
 	bool allow_swp;
 
-	allow_swp = mlx5_geneve_tx_allowed(priv->mdev) ||
-		    !!MLX5_IPSEC_DEV(priv->mdev);
-	mlx5e_build_sq_param_common(priv, param);
+	allow_swp = mlx5_geneve_tx_allowed(mdev) ||
+		    !!MLX5_IPSEC_DEV(mdev);
+	mlx5e_build_sq_param_common(mdev, param);
 	MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
 	MLX5_SET(sqc, sqc, allow_swp, allow_swp);
 	param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE);
-	param->stop_room = mlx5e_calc_sq_stop_room(priv->mdev, params);
-	mlx5e_build_tx_cq_param(priv, params, &param->cqp);
+	param->stop_room = mlx5e_calc_sq_stop_room(mdev, params);
+	mlx5e_build_tx_cq_param(mdev, params, &param->cqp);
 }
 
-static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
+static void mlx5e_build_ico_cq_param(struct mlx5_core_dev *mdev,
 				     u8 log_wq_size,
 				     struct mlx5e_cq_param *param)
 {
@@ -567,7 +565,7 @@ static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
 
 	MLX5_SET(cqc, cqc, log_cq_size, log_wq_size);
 
-	mlx5e_build_common_cq_param(priv, param);
+	mlx5e_build_common_cq_param(mdev, param);
 
 	param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 }
@@ -592,69 +590,69 @@ static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5e_params *params,
 	}
 }
 
-static u8 mlx5e_build_async_icosq_log_wq_sz(struct net_device *netdev)
+static u8 mlx5e_build_async_icosq_log_wq_sz(struct mlx5_core_dev *mdev)
 {
-	if (netdev->hw_features & NETIF_F_HW_TLS_RX)
+	if (mlx5_accel_is_ktls_rx(mdev))
 		return MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
 
 	return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
 }
 
-static void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
+static void mlx5e_build_icosq_param(struct mlx5_core_dev *mdev,
 				    u8 log_wq_size,
 				    struct mlx5e_sq_param *param)
 {
 	void *sqc = param->sqc;
 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
 
-	mlx5e_build_sq_param_common(priv, param);
+	mlx5e_build_sq_param_common(mdev, param);
 
 	MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
-	MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq));
-	mlx5e_build_ico_cq_param(priv, log_wq_size, &param->cqp);
+	MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq));
+	mlx5e_build_ico_cq_param(mdev, log_wq_size, &param->cqp);
 }
 
-static void mlx5e_build_async_icosq_param(struct mlx5e_priv *priv,
+static void mlx5e_build_async_icosq_param(struct mlx5_core_dev *mdev,
 					  u8 log_wq_size,
 					  struct mlx5e_sq_param *param)
 {
 	void *sqc = param->sqc;
 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
 
-	mlx5e_build_sq_param_common(priv, param);
+	mlx5e_build_sq_param_common(mdev, param);
 	param->stop_room = mlx5e_stop_room_for_wqe(1); /* for XSK NOP */
-	MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq));
+	MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq));
 	MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
-	mlx5e_build_ico_cq_param(priv, log_wq_size, &param->cqp);
+	mlx5e_build_ico_cq_param(mdev, log_wq_size, &param->cqp);
 }
 
-void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
+void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev,
 			     struct mlx5e_params *params,
 			     struct mlx5e_sq_param *param)
 {
 	void *sqc = param->sqc;
 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
 
-	mlx5e_build_sq_param_common(priv, param);
+	mlx5e_build_sq_param_common(mdev, param);
 	MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
 	param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE);
-	mlx5e_build_tx_cq_param(priv, params, &param->cqp);
+	mlx5e_build_tx_cq_param(mdev, params, &param->cqp);
 }
 
-void mlx5e_build_channel_param(struct mlx5e_priv *priv,
+void mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
 			       struct mlx5e_params *params,
 			       u16 q_counter,
 			       struct mlx5e_channel_param *cparam)
 {
 	u8 icosq_log_wq_sz, async_icosq_log_wq_sz;
 
-	mlx5e_build_rq_param(priv, params, NULL, q_counter, &cparam->rq);
+	mlx5e_build_rq_param(mdev, params, NULL, q_counter, &cparam->rq);
 
 	icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(params, &cparam->rq);
-	async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(priv->netdev);
+	async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(mdev);
 
-	mlx5e_build_sq_param(priv, params, &cparam->txq_sq);
-	mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq);
-	mlx5e_build_icosq_param(priv, icosq_log_wq_sz, &cparam->icosq);
-	mlx5e_build_async_icosq_param(priv, async_icosq_log_wq_sz, &cparam->async_icosq);
+	mlx5e_build_sq_param(mdev, params, &cparam->txq_sq);
+	mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq);
+	mlx5e_build_icosq_param(mdev, icosq_log_wq_sz, &cparam->icosq);
+	mlx5e_build_async_icosq_param(mdev, async_icosq_log_wq_sz, &cparam->async_icosq);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index a43776e8a86b..602e41a2bddd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -121,30 +121,31 @@ u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
 /* Build queue parameters */
 
 void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e_channel *c);
-void mlx5e_build_rq_param(struct mlx5e_priv *priv,
+void mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
 			  struct mlx5e_params *params,
 			  struct mlx5e_xsk_param *xsk,
 			  u16 q_counter,
 			  struct mlx5e_rq_param *param);
-void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv,
+void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev,
 			       u16 q_counter,
 			       struct mlx5e_rq_param *param);
-void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
+void mlx5e_build_sq_param_common(struct mlx5_core_dev *mdev,
 				 struct mlx5e_sq_param *param);
-void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_params *params,
+void mlx5e_build_sq_param(struct mlx5_core_dev *mdev,
+			  struct mlx5e_params *params,
 			  struct mlx5e_sq_param *param);
-void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
+void mlx5e_build_tx_cq_param(struct mlx5_core_dev *mdev,
 			     struct mlx5e_params *params,
 			     struct mlx5e_cq_param *param);
-void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
+void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev,
 			     struct mlx5e_params *params,
 			     struct mlx5e_sq_param *param);
-void mlx5e_build_channel_param(struct mlx5e_priv *priv,
+void mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
 			       struct mlx5e_params *params,
 			       u16 q_counter,
 			       struct mlx5e_channel_param *cparam);
 
 u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
-int mlx5e_validate_params(struct mlx5e_priv *priv, struct mlx5e_params *params);
+int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
 
 #endif /* __MLX5_EN_PARAMS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index bb5d108f75d0..27b5c7b143b2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -389,19 +389,19 @@ static void mlx5e_ptp_close_cqs(struct mlx5e_port_ptp *c)
 		mlx5e_close_cq(&c->ptpsq[tc].txqsq.cq);
 }
 
-static void mlx5e_ptp_build_sq_param(struct mlx5e_priv *priv,
+static void mlx5e_ptp_build_sq_param(struct mlx5_core_dev *mdev,
 				     struct mlx5e_params *params,
 				     struct mlx5e_sq_param *param)
 {
 	void *sqc = param->sqc;
 	void *wq;
 
-	mlx5e_build_sq_param_common(priv, param);
+	mlx5e_build_sq_param_common(mdev, param);
 
 	wq = MLX5_ADDR_OF(sqc, sqc, wq);
 	MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
 	param->stop_room = mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
-	mlx5e_build_tx_cq_param(priv, params, &param->cqp);
+	mlx5e_build_tx_cq_param(mdev, params, &param->cqp);
 }
 
 static void mlx5e_ptp_build_params(struct mlx5e_port_ptp *c,
@@ -419,7 +419,7 @@ static void mlx5e_ptp_build_params(struct mlx5e_port_ptp *c,
 	/* SQ */
 	params->log_sq_size = orig->log_sq_size;
 
-	mlx5e_ptp_build_sq_param(c->priv, params, &cparams->txq_sq_param);
+	mlx5e_ptp_build_sq_param(c->mdev, params, &cparams->txq_sq_param);
 }
 
 static int mlx5e_ptp_open_queues(struct mlx5e_port_ptp *c,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
index 12d7ad061237..5efe3278b0f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
@@ -232,8 +232,8 @@ static int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs
 
 	memset(&param_sq, 0, sizeof(param_sq));
 	memset(&param_cq, 0, sizeof(param_cq));
-	mlx5e_build_sq_param(priv, params, &param_sq);
-	mlx5e_build_tx_cq_param(priv, params, &param_cq);
+	mlx5e_build_sq_param(priv->mdev, params, &param_sq);
+	mlx5e_build_tx_cq_param(priv->mdev, params, &param_cq);
 	err = mlx5e_open_cq(priv, params->tx_cq_moderation, &param_cq, &ccp, &sq->cq);
 	if (err)
 		goto err_free_sq;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
index 0bfbc8cbe840..ba36657cd297 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
@@ -238,14 +238,16 @@ static void mlx5e_deactivate_trap_rq(struct mlx5e_rq *rq)
 	clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
 }
 
-static void mlx5e_build_trap_params(struct mlx5e_priv *priv, struct mlx5e_trap *t)
+static void mlx5e_build_trap_params(struct mlx5_core_dev *mdev,
+				    int max_mtu, u16 q_counter,
+				    struct mlx5e_trap *t)
 {
 	struct mlx5e_params *params = &t->params;
 
 	params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC;
-	mlx5e_init_rq_type_params(priv->mdev, params);
-	params->sw_mtu = priv->netdev->max_mtu;
-	mlx5e_build_rq_param(priv, params, NULL, priv->q_counter, &t->rq_param);
+	mlx5e_init_rq_type_params(mdev, params);
+	params->sw_mtu = max_mtu;
+	mlx5e_build_rq_param(mdev, params, NULL, q_counter, &t->rq_param);
 }
 
 static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv)
@@ -259,7 +261,7 @@ static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv)
 	if (!t)
 		return ERR_PTR(-ENOMEM);
 
-	mlx5e_build_trap_params(priv, t);
+	mlx5e_build_trap_params(priv->mdev, netdev->max_mtu, priv->q_counter, t);
 
 	t->priv     = priv;
 	t->mdev     = priv->mdev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index d84a2299adf4..313a708e351b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -35,13 +35,14 @@ bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
 	}
 }
 
-static void mlx5e_build_xsk_cparam(struct mlx5e_priv *priv,
+static void mlx5e_build_xsk_cparam(struct mlx5_core_dev *mdev,
 				   struct mlx5e_params *params,
 				   struct mlx5e_xsk_param *xsk,
+				   u16 q_counter,
 				   struct mlx5e_channel_param *cparam)
 {
-	mlx5e_build_rq_param(priv, params, xsk, priv->q_counter, &cparam->rq);
-	mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq);
+	mlx5e_build_rq_param(mdev, params, xsk, q_counter, &cparam->rq);
+	mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq);
 }
 
 int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
@@ -61,7 +62,7 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
 	if (!cparam)
 		return -ENOMEM;
 
-	mlx5e_build_xsk_cparam(priv, params, xsk, cparam);
+	mlx5e_build_xsk_cparam(priv->mdev, params, xsk, priv->q_counter, cparam);
 
 	err = mlx5e_open_cq(c->priv, params->rx_cq_moderation, &cparam->rq.cqp, &ccp,
 			    &c->xskrq.cq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index f5f2a8fd0046..a2b23cdd2047 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -368,7 +368,7 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv,
 	new_channels.params.log_rq_mtu_frames = log_rq_size;
 	new_channels.params.log_sq_size = log_sq_size;
 
-	err = mlx5e_validate_params(priv, &new_channels.params);
+	err = mlx5e_validate_params(priv->mdev, &new_channels.params);
 	if (err)
 		goto unlock;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index efa4ba1f7260..e3196bba5955 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2070,7 +2070,7 @@ int mlx5e_open_channels(struct mlx5e_priv *priv,
 	if (!chs->c || !cparam)
 		goto err_free;
 
-	mlx5e_build_channel_param(priv, &chs->params, priv->q_counter, cparam);
+	mlx5e_build_channel_param(priv->mdev, &chs->params, priv->q_counter, cparam);
 	for (i = 0; i < chs->num; i++) {
 		struct xsk_buff_pool *xsk_pool = NULL;
 
@@ -3031,7 +3031,7 @@ int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
 	struct mlx5e_cq *cq = &drop_rq->cq;
 	int err;
 
-	mlx5e_build_drop_rq_param(priv, priv->drop_rq_q_counter, &rq_param);
+	mlx5e_build_drop_rq_param(mdev, priv->drop_rq_q_counter, &rq_param);
 
 	err = mlx5e_alloc_drop_cq(priv, cq, &cq_param);
 	if (err)
@@ -3874,7 +3874,7 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
 
 	new_channels.params = *params;
 	new_channels.params.sw_mtu = new_mtu;
-	err = mlx5e_validate_params(priv, &new_channels.params);
+	err = mlx5e_validate_params(priv->mdev, &new_channels.params);
 	if (err)
 		goto out;
 
-- 
cgit v1.2.3


From ea886000a8acc4744bdbacd6f01e1a67b52da04f Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Mon, 8 Feb 2021 16:00:36 +0200
Subject: net/mlx5e: Allow creating mpwqe info without channel

Change the signature of mlx5e_rq_alloc_mpwqe_info from receiving channel
pointer to receive the NUMA node. This allows creating mpwqe_info in
context of different channels types.

Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index e3196bba5955..395b44141ca9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -214,18 +214,17 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
 	ucseg->mkey_mask     = cpu_to_be64(MLX5_MKEY_MASK_FREE);
 }
 
-static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
-				     struct mlx5e_channel *c)
+static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node)
 {
 	int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
 
 	rq->mpwqe.info = kvzalloc_node(array_size(wq_sz,
 						  sizeof(*rq->mpwqe.info)),
-				       GFP_KERNEL, cpu_to_node(c->cpu));
+				       GFP_KERNEL, node);
 	if (!rq->mpwqe.info)
 		return -ENOMEM;
 
-	mlx5e_build_umr_wqe(rq, &c->icosq, &rq->mpwqe.umr_wqe);
+	mlx5e_build_umr_wqe(rq, rq->icosq, &rq->mpwqe.umr_wqe);
 
 	return 0;
 }
@@ -459,7 +458,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 			goto err_rq_drop_page;
 		rq->mkey_be = cpu_to_be32(rq->umr_mkey.key);
 
-		err = mlx5e_rq_alloc_mpwqe_info(rq, c);
+		err = mlx5e_rq_alloc_mpwqe_info(rq, cpu_to_node(c->cpu));
 		if (err)
 			goto err_rq_mkey;
 		break;
-- 
cgit v1.2.3


From 183532b77ddcb90002ff49713f5af08725f71b4c Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Wed, 3 Mar 2021 12:07:01 +0200
Subject: net/mlx5: Add helper to set time-stamp translator on a queue

Translation method on the time-stamp is set by the capabilities. Avoid
code duplication by using a helper to set ptp_cyc2time callback on a
queue.

Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c    |  4 +---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c   |  8 ++------
 drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h | 11 +++++++++++
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index 27b5c7b143b2..fb93edf910b9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -183,9 +183,7 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_port_ptp *c, int txq_ix,
 	if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
 		set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
 	sq->stop_room = param->stop_room;
-	sq->ptp_cyc2time = mlx5_is_real_time_sq(mdev) ?
-			   mlx5_real_time_cyc2time :
-			   mlx5_timecounter_cyc2time;
+	sq->ptp_cyc2time = mlx5_sq_ts_translator(mdev);
 
 	node = dev_to_node(mlx5_core_dma_dev(mdev));
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 395b44141ca9..12d0545fefc4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -404,9 +404,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 	rq->hw_mtu  = MLX5E_SW2HW_MTU(params, params->sw_mtu);
 	rq->xdpsq   = &c->rq_xdpsq;
 	rq->xsk_pool = xsk_pool;
-	rq->ptp_cyc2time = mlx5_is_real_time_rq(mdev) ?
-			   mlx5_real_time_cyc2time :
-			   mlx5_timecounter_cyc2time;
+	rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev);
 
 	if (rq->xsk_pool)
 		rq->stats = &c->priv->channel_stats[c->ix].xskrq;
@@ -1141,9 +1139,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
 	if (param->is_mpw)
 		set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state);
 	sq->stop_room = param->stop_room;
-	sq->ptp_cyc2time = mlx5_is_real_time_sq(mdev) ?
-			   mlx5_real_time_cyc2time :
-			   mlx5_timecounter_cyc2time;
+	sq->ptp_cyc2time = mlx5_sq_ts_translator(mdev);
 
 	param->wq.db_numa_node = cpu_to_node(c->cpu);
 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
index a12c7da618a7..ceae6bc378e0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
@@ -105,4 +105,15 @@ static inline ktime_t mlx5_real_time_cyc2time(struct mlx5_clock *clock,
 }
 #endif
 
+static inline cqe_ts_to_ns mlx5_rq_ts_translator(struct mlx5_core_dev *mdev)
+{
+	return mlx5_is_real_time_rq(mdev) ? mlx5_real_time_cyc2time :
+					    mlx5_timecounter_cyc2time;
+}
+
+static inline cqe_ts_to_ns mlx5_sq_ts_translator(struct mlx5_core_dev *mdev)
+{
+	return mlx5_is_real_time_sq(mdev) ? mlx5_real_time_cyc2time :
+					    mlx5_timecounter_cyc2time;
+}
 #endif
-- 
cgit v1.2.3


From 869c5f9262476aba305082c0a2365847838b2d57 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Sun, 7 Mar 2021 15:29:53 +0200
Subject: net/mlx5e: Generalize open RQ

Unify RQ creation for different RQ types. For each RQ type add a
separate open helper which initializes the RQ specific values and
trigger a call for generic open RQ function. Avoid passing the
mlx5e_channel pointer to the generic open RQ as a container, since the
RQ may reside under a different type of channel.

Signed-off-by: Aya Levin <ayal@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h       |   6 +-
 drivers/net/ethernet/mellanox/mlx5/core/en/trap.c  | 170 ++++-----------------
 .../net/ethernet/mellanox/mlx5/core/en/xsk/setup.c |  47 +++++-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 126 +++++++--------
 4 files changed, 147 insertions(+), 202 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index d1fca0670b12..2a9b40a96b9a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -961,9 +961,9 @@ struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types
 struct mlx5e_xsk_param;
 
 struct mlx5e_rq_param;
-int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
-		  struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk,
-		  struct xsk_buff_pool *xsk_pool, struct mlx5e_rq *rq);
+int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
+		  struct mlx5e_xsk_param *xsk, int node,
+		  struct mlx5e_rq *rq);
 int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time);
 void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
 void mlx5e_close_rq(struct mlx5e_rq *rq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
index ba36657cd297..987035346cfc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
@@ -30,163 +30,63 @@ static int mlx5e_trap_napi_poll(struct napi_struct *napi, int budget)
 	return work_done;
 }
 
-static int mlx5e_alloc_trap_rq(struct mlx5e_priv *priv, struct mlx5e_rq_param *rqp,
-			       struct mlx5e_rq_stats *stats, struct mlx5e_params *params,
-			       struct mlx5e_ch_stats *ch_stats,
-			       struct mlx5e_rq *rq)
+static void mlx5e_free_trap_rq(struct mlx5e_rq *rq)
 {
-	void *rqc_wq = MLX5_ADDR_OF(rqc, rqp->rqc, wq);
-	struct mlx5_core_dev *mdev = priv->mdev;
-	struct page_pool_params pp_params = {};
-	int node = dev_to_node(mdev->device);
-	u32 pool_size;
-	int wq_sz;
-	int err;
-	int i;
-
-	rqp->wq.db_numa_node = node;
-
-	rq->wq_type  = params->rq_wq_type;
-	rq->pdev     = mdev->device;
-	rq->netdev   = priv->netdev;
-	rq->mdev     = mdev;
-	rq->priv     = priv;
-	rq->stats    = stats;
-	rq->clock    = &mdev->clock;
-	rq->tstamp   = &priv->tstamp;
-	rq->hw_mtu   = MLX5E_SW2HW_MTU(params, params->sw_mtu);
-
-	xdp_rxq_info_unused(&rq->xdp_rxq);
-
-	rq->buff.map_dir = DMA_FROM_DEVICE;
-	rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, NULL);
-	pool_size = 1 << params->log_rq_mtu_frames;
-
-	err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq, &rq->wq_ctrl);
-	if (err)
-		return err;
-
-	rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR];
-
-	wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq);
-
-	rq->wqe.info = rqp->frags_info;
-	rq->buff.frame0_sz = rq->wqe.info.arr[0].frag_stride;
-	rq->wqe.frags =	kvzalloc_node(array_size(sizeof(*rq->wqe.frags),
-						 (wq_sz << rq->wqe.info.log_num_frags)),
-				      GFP_KERNEL, node);
-	if (!rq->wqe.frags) {
-		err = -ENOMEM;
-		goto err_wq_cyc_destroy;
-	}
-
-	err = mlx5e_init_di_list(rq, wq_sz, node);
-	if (err)
-		goto err_free_frags;
-
-	rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey.key);
-
-	mlx5e_rq_set_trap_handlers(rq, params);
-
-	/* Create a page_pool and register it with rxq */
-	pp_params.order     = 0;
-	pp_params.flags     = 0; /* No-internal DMA mapping in page_pool */
-	pp_params.pool_size = pool_size;
-	pp_params.nid       = node;
-	pp_params.dev       = mdev->device;
-	pp_params.dma_dir   = rq->buff.map_dir;
-
-	/* page_pool can be used even when there is no rq->xdp_prog,
-	 * given page_pool does not handle DMA mapping there is no
-	 * required state to clear. And page_pool gracefully handle
-	 * elevated refcnt.
-	 */
-	rq->page_pool = page_pool_create(&pp_params);
-	if (IS_ERR(rq->page_pool)) {
-		err = PTR_ERR(rq->page_pool);
-		rq->page_pool = NULL;
-		goto err_free_di_list;
-	}
-	for (i = 0; i < wq_sz; i++) {
-		struct mlx5e_rx_wqe_cyc *wqe =
-			mlx5_wq_cyc_get_wqe(&rq->wqe.wq, i);
-		int f;
-
-		for (f = 0; f < rq->wqe.info.num_frags; f++) {
-			u32 frag_size = rq->wqe.info.arr[f].frag_size |
-				MLX5_HW_START_PADDING;
-
-			wqe->data[f].byte_count = cpu_to_be32(frag_size);
-			wqe->data[f].lkey = rq->mkey_be;
-		}
-		/* check if num_frags is not a pow of two */
-		if (rq->wqe.info.num_frags < (1 << rq->wqe.info.log_num_frags)) {
-			wqe->data[f].byte_count = 0;
-			wqe->data[f].lkey = cpu_to_be32(MLX5_INVALID_LKEY);
-			wqe->data[f].addr = 0;
-		}
-	}
-	return 0;
-
-err_free_di_list:
+	page_pool_destroy(rq->page_pool);
 	mlx5e_free_di_list(rq);
-err_free_frags:
 	kvfree(rq->wqe.frags);
-err_wq_cyc_destroy:
 	mlx5_wq_destroy(&rq->wq_ctrl);
-
-	return err;
 }
 
-static void mlx5e_free_trap_rq(struct mlx5e_rq *rq)
+static void mlx5e_init_trap_rq(struct mlx5e_trap *t, struct mlx5e_params *params,
+			       struct mlx5e_rq *rq)
 {
-	page_pool_destroy(rq->page_pool);
-	mlx5e_free_di_list(rq);
-	kvfree(rq->wqe.frags);
-	mlx5_wq_destroy(&rq->wq_ctrl);
+	struct mlx5_core_dev *mdev = t->mdev;
+	struct mlx5e_priv *priv = t->priv;
+
+	rq->wq_type      = params->rq_wq_type;
+	rq->pdev         = mdev->device;
+	rq->netdev       = priv->netdev;
+	rq->priv         = priv;
+	rq->clock        = &mdev->clock;
+	rq->tstamp       = &priv->tstamp;
+	rq->mdev         = mdev;
+	rq->hw_mtu       = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+	rq->stats        = &priv->trap_stats.rq;
+	rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev);
+	xdp_rxq_info_unused(&rq->xdp_rxq);
+	mlx5e_rq_set_trap_handlers(rq, params);
 }
 
-static int mlx5e_open_trap_rq(struct mlx5e_priv *priv, struct napi_struct *napi,
-			      struct mlx5e_rq_stats *stats, struct mlx5e_params *params,
-			      struct mlx5e_rq_param *rq_param,
-			      struct mlx5e_ch_stats *ch_stats,
-			      struct mlx5e_rq *rq)
+static int mlx5e_open_trap_rq(struct mlx5e_priv *priv, struct mlx5e_trap *t)
 {
+	struct mlx5e_rq_param *rq_param = &t->rq_param;
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_create_cq_param ccp = {};
 	struct dim_cq_moder trap_moder = {};
-	struct mlx5e_cq *cq = &rq->cq;
+	struct mlx5e_rq *rq = &t->rq;
+	int node;
 	int err;
 
-	ccp.node     = dev_to_node(mdev->device);
-	ccp.ch_stats = ch_stats;
-	ccp.napi     = napi;
+	node = dev_to_node(mdev->device);
+
+	ccp.node     = node;
+	ccp.ch_stats = t->stats;
+	ccp.napi     = &t->napi;
 	ccp.ix       = 0;
-	err = mlx5e_open_cq(priv, trap_moder, &rq_param->cqp, &ccp, cq);
+	err = mlx5e_open_cq(priv, trap_moder, &rq_param->cqp, &ccp, &rq->cq);
 	if (err)
 		return err;
 
-	err = mlx5e_alloc_trap_rq(priv, rq_param, stats, params, ch_stats, rq);
+	mlx5e_init_trap_rq(t, &t->params, rq);
+	err = mlx5e_open_rq(&t->params, rq_param, NULL, node, rq);
 	if (err)
 		goto err_destroy_cq;
 
-	err = mlx5e_create_rq(rq, rq_param);
-	if (err)
-		goto err_free_rq;
-
-	err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
-	if (err)
-		goto err_destroy_rq;
-
 	return 0;
 
-err_destroy_rq:
-	mlx5e_destroy_rq(rq);
-	mlx5e_free_rx_descs(rq);
-err_free_rq:
-	mlx5e_free_trap_rq(rq);
 err_destroy_cq:
-	mlx5e_close_cq(cq);
+	mlx5e_close_cq(&rq->cq);
 
 	return err;
 }
@@ -273,11 +173,7 @@ static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv)
 
 	netif_napi_add(netdev, &t->napi, mlx5e_trap_napi_poll, 64);
 
-	err = mlx5e_open_trap_rq(priv, &t->napi,
-				 &priv->trap_stats.rq,
-				 &t->params, &t->rq_param,
-				 &priv->trap_stats.ch,
-				 &t->rq);
+	err = mlx5e_open_trap_rq(priv, t);
 	if (unlikely(err))
 		goto err_napi_del;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index 313a708e351b..a8315f166696 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -45,6 +45,51 @@ static void mlx5e_build_xsk_cparam(struct mlx5_core_dev *mdev,
 	mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq);
 }
 
+static int mlx5e_init_xsk_rq(struct mlx5e_channel *c,
+			     struct mlx5e_params *params,
+			     struct xsk_buff_pool *pool,
+			     struct mlx5e_xsk_param *xsk,
+			     struct mlx5e_rq *rq)
+{
+	struct mlx5_core_dev *mdev = c->mdev;
+	int rq_xdp_ix;
+	int err;
+
+	rq->wq_type      = params->rq_wq_type;
+	rq->pdev         = c->pdev;
+	rq->netdev       = c->netdev;
+	rq->priv         = c->priv;
+	rq->tstamp       = c->tstamp;
+	rq->clock        = &mdev->clock;
+	rq->icosq        = &c->icosq;
+	rq->ix           = c->ix;
+	rq->mdev         = mdev;
+	rq->hw_mtu       = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+	rq->xdpsq        = &c->rq_xdpsq;
+	rq->xsk_pool     = pool;
+	rq->stats        = &c->priv->channel_stats[c->ix].xskrq;
+	rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev);
+	rq_xdp_ix        = c->ix + params->num_channels * MLX5E_RQ_GROUP_XSK;
+	err = mlx5e_rq_set_handlers(rq, params, xsk);
+	if (err)
+		return err;
+
+	return  xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix, 0);
+}
+
+static int mlx5e_open_xsk_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
+			     struct mlx5e_rq_param *rq_params, struct xsk_buff_pool *pool,
+			     struct mlx5e_xsk_param *xsk)
+{
+	int err;
+
+	err = mlx5e_init_xsk_rq(c, params, pool, xsk, &c->xskrq);
+	if (err)
+		return err;
+
+	return mlx5e_open_rq(params, rq_params, xsk, cpu_to_node(c->cpu), &c->xskrq);
+}
+
 int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
 		   struct mlx5e_xsk_param *xsk, struct xsk_buff_pool *pool,
 		   struct mlx5e_channel *c)
@@ -69,7 +114,7 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
 	if (unlikely(err))
 		goto err_free_cparam;
 
-	err = mlx5e_open_rq(c, params, &cparam->rq, xsk, pool, &c->xskrq);
+	err = mlx5e_open_xsk_rq(c, params, &cparam->rq, pool, xsk);
 	if (unlikely(err))
 		goto err_close_rx_cq;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 12d0545fefc4..08188b8b2d9e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -373,56 +373,53 @@ static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
 	 __free_page(rq->wqe_overflow.page);
 }
 
-static int mlx5e_alloc_rq(struct mlx5e_channel *c,
-			  struct mlx5e_params *params,
+static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
+			     struct mlx5e_rq *rq)
+{
+	struct mlx5_core_dev *mdev = c->mdev;
+	int err;
+
+	rq->wq_type      = params->rq_wq_type;
+	rq->pdev         = c->pdev;
+	rq->netdev       = c->netdev;
+	rq->priv         = c->priv;
+	rq->tstamp       = c->tstamp;
+	rq->clock        = &mdev->clock;
+	rq->icosq        = &c->icosq;
+	rq->ix           = c->ix;
+	rq->mdev         = mdev;
+	rq->hw_mtu       = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+	rq->xdpsq        = &c->rq_xdpsq;
+	rq->stats        = &c->priv->channel_stats[c->ix].rq;
+	rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev);
+	err = mlx5e_rq_set_handlers(rq, params, NULL);
+	if (err)
+		return err;
+
+	return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, 0);
+}
+
+static int mlx5e_alloc_rq(struct mlx5e_params *params,
 			  struct mlx5e_xsk_param *xsk,
-			  struct xsk_buff_pool *xsk_pool,
 			  struct mlx5e_rq_param *rqp,
-			  struct mlx5e_rq *rq)
+			  int node, struct mlx5e_rq *rq)
 {
 	struct page_pool_params pp_params = { 0 };
-	struct mlx5_core_dev *mdev = c->mdev;
+	struct mlx5_core_dev *mdev = rq->mdev;
 	void *rqc = rqp->rqc;
 	void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
-	u32 rq_xdp_ix;
 	u32 pool_size;
 	int wq_sz;
 	int err;
 	int i;
 
-	rqp->wq.db_numa_node = cpu_to_node(c->cpu);
-
-	rq->wq_type = params->rq_wq_type;
-	rq->pdev    = c->pdev;
-	rq->netdev  = c->netdev;
-	rq->priv    = c->priv;
-	rq->tstamp  = c->tstamp;
-	rq->clock   = &mdev->clock;
-	rq->icosq   = &c->icosq;
-	rq->ix      = c->ix;
-	rq->mdev    = mdev;
-	rq->hw_mtu  = MLX5E_SW2HW_MTU(params, params->sw_mtu);
-	rq->xdpsq   = &c->rq_xdpsq;
-	rq->xsk_pool = xsk_pool;
-	rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev);
-
-	if (rq->xsk_pool)
-		rq->stats = &c->priv->channel_stats[c->ix].xskrq;
-	else
-		rq->stats = &c->priv->channel_stats[c->ix].rq;
+	rqp->wq.db_numa_node = node;
 	INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work);
 
 	if (params->xdp_prog)
 		bpf_prog_inc(params->xdp_prog);
 	RCU_INIT_POINTER(rq->xdp_prog, params->xdp_prog);
 
-	rq_xdp_ix = rq->ix;
-	if (xsk)
-		rq_xdp_ix += params->num_channels * MLX5E_RQ_GROUP_XSK;
-	err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix, 0);
-	if (err < 0)
-		goto err_rq_xdp_prog;
-
 	rq->buff.map_dir = params->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
 	rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk);
 	pool_size = 1 << params->log_rq_mtu_frames;
@@ -432,7 +429,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 		err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq,
 					&rq->wq_ctrl);
 		if (err)
-			goto err_rq_xdp;
+			goto err_rq_xdp_prog;
 
 		err = mlx5e_alloc_mpwqe_rq_drop_page(rq);
 		if (err)
@@ -456,7 +453,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 			goto err_rq_drop_page;
 		rq->mkey_be = cpu_to_be32(rq->umr_mkey.key);
 
-		err = mlx5e_rq_alloc_mpwqe_info(rq, cpu_to_node(c->cpu));
+		err = mlx5e_rq_alloc_mpwqe_info(rq, node);
 		if (err)
 			goto err_rq_mkey;
 		break;
@@ -464,7 +461,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 		err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq,
 					 &rq->wq_ctrl);
 		if (err)
-			goto err_rq_xdp;
+			goto err_rq_xdp_prog;
 
 		rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR];
 
@@ -476,23 +473,19 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 		rq->wqe.frags =
 			kvzalloc_node(array_size(sizeof(*rq->wqe.frags),
 					(wq_sz << rq->wqe.info.log_num_frags)),
-				      GFP_KERNEL, cpu_to_node(c->cpu));
+				      GFP_KERNEL, node);
 		if (!rq->wqe.frags) {
 			err = -ENOMEM;
 			goto err_rq_wq_destroy;
 		}
 
-		err = mlx5e_init_di_list(rq, wq_sz, cpu_to_node(c->cpu));
+		err = mlx5e_init_di_list(rq, wq_sz, node);
 		if (err)
 			goto err_rq_frags;
 
-		rq->mkey_be = c->mkey_be;
+		rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey.key);
 	}
 
-	err = mlx5e_rq_set_handlers(rq, params, xsk);
-	if (err)
-		goto err_free_by_rq_type;
-
 	if (xsk) {
 		err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
 						 MEM_TYPE_XSK_BUFF_POOL, NULL);
@@ -502,8 +495,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 		pp_params.order     = 0;
 		pp_params.flags     = 0; /* No-internal DMA mapping in page_pool */
 		pp_params.pool_size = pool_size;
-		pp_params.nid       = cpu_to_node(c->cpu);
-		pp_params.dev       = c->pdev;
+		pp_params.nid       = node;
+		pp_params.dev       = rq->pdev;
 		pp_params.dma_dir   = rq->buff.map_dir;
 
 		/* page_pool can be used even when there is no rq->xdp_prog,
@@ -587,8 +580,6 @@ err_rq_frags:
 	}
 err_rq_wq_destroy:
 	mlx5_wq_destroy(&rq->wq_ctrl);
-err_rq_xdp:
-	xdp_rxq_info_unreg(&rq->xdp_rxq);
 err_rq_xdp_prog:
 	if (params->xdp_prog)
 		bpf_prog_put(params->xdp_prog);
@@ -840,13 +831,14 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
 
 }
 
-int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
-		  struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk,
-		  struct xsk_buff_pool *xsk_pool, struct mlx5e_rq *rq)
+int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
+		  struct mlx5e_xsk_param *xsk, int node,
+		  struct mlx5e_rq *rq)
 {
+	struct mlx5_core_dev *mdev = rq->mdev;
 	int err;
 
-	err = mlx5e_alloc_rq(c, params, xsk, xsk_pool, param, rq);
+	err = mlx5e_alloc_rq(params, xsk, param, node, rq);
 	if (err)
 		return err;
 
@@ -858,28 +850,28 @@ int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
 	if (err)
 		goto err_destroy_rq;
 
-	if (mlx5e_is_tls_on(c->priv) && !mlx5_accel_is_ktls_device(c->mdev))
-		__set_bit(MLX5E_RQ_STATE_FPGA_TLS, &c->rq.state); /* must be FPGA */
+	if (mlx5e_is_tls_on(rq->priv) && !mlx5_accel_is_ktls_device(mdev))
+		__set_bit(MLX5E_RQ_STATE_FPGA_TLS, &rq->state); /* must be FPGA */
 
-	if (MLX5_CAP_ETH(c->mdev, cqe_checksum_full))
-		__set_bit(MLX5E_RQ_STATE_CSUM_FULL, &c->rq.state);
+	if (MLX5_CAP_ETH(mdev, cqe_checksum_full))
+		__set_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state);
 
 	if (params->rx_dim_enabled)
-		__set_bit(MLX5E_RQ_STATE_AM, &c->rq.state);
+		__set_bit(MLX5E_RQ_STATE_AM, &rq->state);
 
 	/* We disable csum_complete when XDP is enabled since
 	 * XDP programs might manipulate packets which will render
 	 * skb->checksum incorrect.
 	 */
-	if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp)
-		__set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
+	if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || params->xdp_prog)
+		__set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state);
 
 	/* For CQE compression on striding RQ, use stride index provided by
 	 * HW if capability is supported.
 	 */
 	if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) &&
-	    MLX5_CAP_GEN(c->mdev, mini_cqe_resp_stride_index))
-		__set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &c->rq.state);
+	    MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index))
+		__set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state);
 
 	return 0;
 
@@ -1810,6 +1802,18 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
 	return err;
 }
 
+static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
+			     struct mlx5e_rq_param *rq_params)
+{
+	int err;
+
+	err = mlx5e_init_rxq_rq(c, params, &c->rq);
+	if (err)
+		return err;
+
+	return mlx5e_open_rq(params, rq_params, NULL, cpu_to_node(c->cpu), &c->rq);
+}
+
 static int mlx5e_open_queues(struct mlx5e_channel *c,
 			     struct mlx5e_params *params,
 			     struct mlx5e_channel_param *cparam)
@@ -1870,7 +1874,7 @@ static int mlx5e_open_queues(struct mlx5e_channel *c,
 			goto err_close_sqs;
 	}
 
-	err = mlx5e_open_rq(c, params, &cparam->rq, NULL, NULL, &c->rq);
+	err = mlx5e_open_rxq_rq(c, params, &cparam->rq);
 	if (err)
 		goto err_close_xdp_sq;
 
-- 
cgit v1.2.3


From a8dd7ac12fc3f6d37758e8c1e650600d71554a21 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Mon, 8 Feb 2021 18:25:56 +0200
Subject: net/mlx5e: Generalize RQ activation

Support RQ activation for RQs without an ICOSQ in the main flow, like
existing trap-RQ and like PTP-RQ that will be introduced in the coming
patches in the patchset.
With this patch, remove the wrapper in traps to deactivate the trap-RQ.

Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/trap.c | 15 ++-------------
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |  5 ++++-
 2 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
index 987035346cfc..d6e6641e9288 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
@@ -128,16 +128,6 @@ static void mlx5e_destroy_trap_direct_rq_tir(struct mlx5_core_dev *mdev, struct
 	mlx5e_destroy_tir(mdev, tir);
 }
 
-static void mlx5e_activate_trap_rq(struct mlx5e_rq *rq)
-{
-	set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
-}
-
-static void mlx5e_deactivate_trap_rq(struct mlx5e_rq *rq)
-{
-	clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
-}
-
 static void mlx5e_build_trap_params(struct mlx5_core_dev *mdev,
 				    int max_mtu, u16 q_counter,
 				    struct mlx5e_trap *t)
@@ -202,15 +192,14 @@ void mlx5e_close_trap(struct mlx5e_trap *trap)
 static void mlx5e_activate_trap(struct mlx5e_trap *trap)
 {
 	napi_enable(&trap->napi);
-	mlx5e_activate_trap_rq(&trap->rq);
-	napi_schedule(&trap->napi);
+	mlx5e_activate_rq(&trap->rq);
 }
 
 void mlx5e_deactivate_trap(struct mlx5e_priv *priv)
 {
 	struct mlx5e_trap *trap = priv->en_trap;
 
-	mlx5e_deactivate_trap_rq(&trap->rq);
+	mlx5e_deactivate_rq(&trap->rq);
 	napi_disable(&trap->napi);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 08188b8b2d9e..06df647b2beb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -886,7 +886,10 @@ err_free_rq:
 void mlx5e_activate_rq(struct mlx5e_rq *rq)
 {
 	set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
-	mlx5e_trigger_irq(rq->icosq);
+	if (rq->icosq)
+		mlx5e_trigger_irq(rq->icosq);
+	else
+		napi_schedule(rq->cq.napi);
 }
 
 void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
-- 
cgit v1.2.3


From e078e8df4224d1c422081192e9a6d3db85fa1634 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Mon, 8 Feb 2021 20:56:02 +0200
Subject: net/mlx5e: Generalize close RQ

Allow different flavours of RQ to use the same close flow. Add validity
checks to support different RQ types which not necessarily initialize
all the RQ's functionality.

Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/trap.c | 12 +-----------
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 13 ++++++++-----
 2 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
index d6e6641e9288..86ab4e864fe6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
@@ -30,14 +30,6 @@ static int mlx5e_trap_napi_poll(struct napi_struct *napi, int budget)
 	return work_done;
 }
 
-static void mlx5e_free_trap_rq(struct mlx5e_rq *rq)
-{
-	page_pool_destroy(rq->page_pool);
-	mlx5e_free_di_list(rq);
-	kvfree(rq->wqe.frags);
-	mlx5_wq_destroy(&rq->wq_ctrl);
-}
-
 static void mlx5e_init_trap_rq(struct mlx5e_trap *t, struct mlx5e_params *params,
 			       struct mlx5e_rq *rq)
 {
@@ -93,9 +85,7 @@ err_destroy_cq:
 
 static void mlx5e_close_trap_rq(struct mlx5e_rq *rq)
 {
-	mlx5e_destroy_rq(rq);
-	mlx5e_free_rx_descs(rq);
-	mlx5e_free_trap_rq(rq);
+	mlx5e_close_rq(rq);
 	mlx5e_close_cq(&rq->cq);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 06df647b2beb..f2884edd4b61 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -592,10 +592,12 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
 	struct bpf_prog *old_prog;
 	int i;
 
-	old_prog = rcu_dereference_protected(rq->xdp_prog,
-					     lockdep_is_held(&rq->priv->state_lock));
-	if (old_prog)
-		bpf_prog_put(old_prog);
+	if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) {
+		old_prog = rcu_dereference_protected(rq->xdp_prog,
+						     lockdep_is_held(&rq->priv->state_lock));
+		if (old_prog)
+			bpf_prog_put(old_prog);
+	}
 
 	switch (rq->wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
@@ -901,7 +903,8 @@ void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
 void mlx5e_close_rq(struct mlx5e_rq *rq)
 {
 	cancel_work_sync(&rq->dim.work);
-	cancel_work_sync(&rq->icosq->recover_work);
+	if (rq->icosq)
+		cancel_work_sync(&rq->icosq->recover_work);
 	cancel_work_sync(&rq->recover_work);
 	mlx5e_destroy_rq(rq);
 	mlx5e_free_rx_descs(rq);
-- 
cgit v1.2.3


From 42212d997155c24a51fde5dcba9e9de166e2221f Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Thu, 25 Feb 2021 17:46:25 +0200
Subject: net/mlx5e: Generalize direct-TIRs and direct-RQTs API

Add input parameter indicating the size of direct-TIRs/direct-RQTs array
to be created/destroyed. This allows next patches in the patch-set to
handle a single direct-TIR pointing to a direct-RQT with a single entry.

Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h       |  8 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 43 ++++++++++++----------
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   | 15 +++++---
 .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c  | 15 +++++---
 4 files changed, 45 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 2a9b40a96b9a..1158d8aefbe2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -1080,10 +1080,10 @@ int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv);
 int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc);
 void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv);
 
-int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
-void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
-int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
-void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
+int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n);
+void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n);
+int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n);
+void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n);
 void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt);
 
 int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index f2884edd4b61..df941fe808ab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2223,12 +2223,12 @@ int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv)
 	return err;
 }
 
-int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
+int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n)
 {
 	int err;
 	int ix;
 
-	for (ix = 0; ix < priv->max_nch; ix++) {
+	for (ix = 0; ix < n; ix++) {
 		err = mlx5e_create_rqt(priv, 1 /*size */, &tirs[ix].rqt);
 		if (unlikely(err))
 			goto err_destroy_rqts;
@@ -2244,11 +2244,11 @@ err_destroy_rqts:
 	return err;
 }
 
-void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
+void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n)
 {
 	int i;
 
-	for (i = 0; i < priv->max_nch; i++)
+	for (i = 0; i < n; i++)
 		mlx5e_destroy_rqt(priv, &tirs[i].rqt);
 }
 
@@ -3249,7 +3249,7 @@ err_destroy_inner_tirs:
 	return err;
 }
 
-int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
+int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n)
 {
 	struct mlx5e_tir *tir;
 	void *tirc;
@@ -3263,7 +3263,7 @@ int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
 	if (!in)
 		return -ENOMEM;
 
-	for (ix = 0; ix < priv->max_nch; ix++) {
+	for (ix = 0; ix < n; ix++) {
 		memset(in, 0, inlen);
 		tir = &tirs[ix];
 		tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
@@ -3301,11 +3301,11 @@ void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv)
 		mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]);
 }
 
-void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
+void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n)
 {
 	int i;
 
-	for (i = 0; i < priv->max_nch; i++)
+	for (i = 0; i < n; i++)
 		mlx5e_destroy_tir(priv->mdev, &tirs[i]);
 }
 
@@ -4901,6 +4901,7 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
 static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
+	u16 max_nch = priv->max_nch;
 	int err;
 
 	mlx5e_create_q_counters(priv);
@@ -4915,7 +4916,7 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
 	if (err)
 		goto err_close_drop_rq;
 
-	err = mlx5e_create_direct_rqts(priv, priv->direct_tir);
+	err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch);
 	if (err)
 		goto err_destroy_indirect_rqts;
 
@@ -4923,15 +4924,15 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
 	if (err)
 		goto err_destroy_direct_rqts;
 
-	err = mlx5e_create_direct_tirs(priv, priv->direct_tir);
+	err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch);
 	if (err)
 		goto err_destroy_indirect_tirs;
 
-	err = mlx5e_create_direct_rqts(priv, priv->xsk_tir);
+	err = mlx5e_create_direct_rqts(priv, priv->xsk_tir, max_nch);
 	if (unlikely(err))
 		goto err_destroy_direct_tirs;
 
-	err = mlx5e_create_direct_tirs(priv, priv->xsk_tir);
+	err = mlx5e_create_direct_tirs(priv, priv->xsk_tir, max_nch);
 	if (unlikely(err))
 		goto err_destroy_xsk_rqts;
 
@@ -4960,15 +4961,15 @@ err_tc_nic_cleanup:
 err_destroy_flow_steering:
 	mlx5e_destroy_flow_steering(priv);
 err_destroy_xsk_tirs:
-	mlx5e_destroy_direct_tirs(priv, priv->xsk_tir);
+	mlx5e_destroy_direct_tirs(priv, priv->xsk_tir, max_nch);
 err_destroy_xsk_rqts:
-	mlx5e_destroy_direct_rqts(priv, priv->xsk_tir);
+	mlx5e_destroy_direct_rqts(priv, priv->xsk_tir, max_nch);
 err_destroy_direct_tirs:
-	mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
+	mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch);
 err_destroy_indirect_tirs:
 	mlx5e_destroy_indirect_tirs(priv);
 err_destroy_direct_rqts:
-	mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
+	mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch);
 err_destroy_indirect_rqts:
 	mlx5e_destroy_rqt(priv, &priv->indir_rqt);
 err_close_drop_rq:
@@ -4980,14 +4981,16 @@ err_destroy_q_counters:
 
 static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv)
 {
+	u16 max_nch = priv->max_nch;
+
 	mlx5e_accel_cleanup_rx(priv);
 	mlx5e_tc_nic_cleanup(priv);
 	mlx5e_destroy_flow_steering(priv);
-	mlx5e_destroy_direct_tirs(priv, priv->xsk_tir);
-	mlx5e_destroy_direct_rqts(priv, priv->xsk_tir);
-	mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
+	mlx5e_destroy_direct_tirs(priv, priv->xsk_tir, max_nch);
+	mlx5e_destroy_direct_rqts(priv, priv->xsk_tir, max_nch);
+	mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch);
 	mlx5e_destroy_indirect_tirs(priv);
-	mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
+	mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch);
 	mlx5e_destroy_rqt(priv, &priv->indir_rqt);
 	mlx5e_close_drop_rq(&priv->drop_rq);
 	mlx5e_destroy_q_counters(priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index e5123e9182c0..b597fc3b192b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -753,6 +753,7 @@ int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup)
 static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
+	u16 max_nch = priv->max_nch;
 	int err;
 
 	mlx5e_init_l2_addr(priv);
@@ -767,7 +768,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
 	if (err)
 		goto err_close_drop_rq;
 
-	err = mlx5e_create_direct_rqts(priv, priv->direct_tir);
+	err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch);
 	if (err)
 		goto err_destroy_indirect_rqts;
 
@@ -775,7 +776,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
 	if (err)
 		goto err_destroy_direct_rqts;
 
-	err = mlx5e_create_direct_tirs(priv, priv->direct_tir);
+	err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch);
 	if (err)
 		goto err_destroy_indirect_tirs;
 
@@ -800,11 +801,11 @@ err_destroy_root_ft:
 err_destroy_ttc_table:
 	mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
 err_destroy_direct_tirs:
-	mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
+	mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch);
 err_destroy_indirect_tirs:
 	mlx5e_destroy_indirect_tirs(priv);
 err_destroy_direct_rqts:
-	mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
+	mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch);
 err_destroy_indirect_rqts:
 	mlx5e_destroy_rqt(priv, &priv->indir_rqt);
 err_close_drop_rq:
@@ -814,13 +815,15 @@ err_close_drop_rq:
 
 static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
 {
+	u16 max_nch = priv->max_nch;
+
 	mlx5e_ethtool_cleanup_steering(priv);
 	rep_vport_rx_rule_destroy(priv);
 	mlx5e_destroy_rep_root_ft(priv);
 	mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
-	mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
+	mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch);
 	mlx5e_destroy_indirect_tirs(priv);
-	mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
+	mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch);
 	mlx5e_destroy_rqt(priv, &priv->indir_rqt);
 	mlx5e_close_drop_rq(&priv->drop_rq);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 79f26b56e0a4..63ca73105149 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -373,6 +373,7 @@ static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv)
 static int mlx5i_init_rx(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
+	u16 max_nch = priv->max_nch;
 	int err;
 
 	mlx5e_create_q_counters(priv);
@@ -387,7 +388,7 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
 	if (err)
 		goto err_close_drop_rq;
 
-	err = mlx5e_create_direct_rqts(priv, priv->direct_tir);
+	err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch);
 	if (err)
 		goto err_destroy_indirect_rqts;
 
@@ -395,7 +396,7 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
 	if (err)
 		goto err_destroy_direct_rqts;
 
-	err = mlx5e_create_direct_tirs(priv, priv->direct_tir);
+	err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch);
 	if (err)
 		goto err_destroy_indirect_tirs;
 
@@ -406,11 +407,11 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
 	return 0;
 
 err_destroy_direct_tirs:
-	mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
+	mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch);
 err_destroy_indirect_tirs:
 	mlx5e_destroy_indirect_tirs(priv);
 err_destroy_direct_rqts:
-	mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
+	mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch);
 err_destroy_indirect_rqts:
 	mlx5e_destroy_rqt(priv, &priv->indir_rqt);
 err_close_drop_rq:
@@ -422,10 +423,12 @@ err_destroy_q_counters:
 
 static void mlx5i_cleanup_rx(struct mlx5e_priv *priv)
 {
+	u16 max_nch = priv->max_nch;
+
 	mlx5i_destroy_flow_steering(priv);
-	mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
+	mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch);
 	mlx5e_destroy_indirect_tirs(priv);
-	mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
+	mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch);
 	mlx5e_destroy_rqt(priv, &priv->indir_rqt);
 	mlx5e_close_drop_rq(&priv->drop_rq);
 	mlx5e_destroy_q_counters(priv);
-- 
cgit v1.2.3


From b0d35de441ab8cdb9f2f38976144e652cf0ee837 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Sun, 7 Mar 2021 15:41:27 +0200
Subject: net/mlx5e: Generalize PTP implementation

Following patches in the set add support for RX PTP. Rename PTP prefix
from %s/port_ptp/ptp/g to include RX PTP too.

In addition rename indication (used in statistics context) that PTP-SQ
was opened: %s/port_ptp_opened/tx_ptp_opened/g. This will simplify adding
indication that PTP-RQ was opened.

Signed-off-by: Aya Levin <ayal@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h       | 10 +++---
 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c   | 39 +++++++++++-----------
 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h   | 12 +++----
 .../ethernet/mellanox/mlx5/core/en/reporter_tx.c   |  8 ++---
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 30 ++++++++---------
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 18 +++++-----
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c    |  2 +-
 8 files changed, 59 insertions(+), 62 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 1158d8aefbe2..aad2a752f7e3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -707,11 +707,11 @@ struct mlx5e_channel {
 	int                        cpu;
 };
 
-struct mlx5e_port_ptp;
+struct mlx5e_ptp;
 
 struct mlx5e_channels {
 	struct mlx5e_channel **c;
-	struct mlx5e_port_ptp  *port_ptp;
+	struct mlx5e_ptp      *ptp;
 	unsigned int           num;
 	struct mlx5e_params    params;
 };
@@ -726,7 +726,7 @@ struct mlx5e_channel_stats {
 	struct mlx5e_xdpsq_stats xsksq;
 } ____cacheline_aligned_in_smp;
 
-struct mlx5e_port_ptp_stats {
+struct mlx5e_ptp_stats {
 	struct mlx5e_ch_stats ch;
 	struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC];
 	struct mlx5e_ptp_cq_stats cq[MLX5E_MAX_NUM_TC];
@@ -855,10 +855,10 @@ struct mlx5e_priv {
 	struct mlx5e_stats         stats;
 	struct mlx5e_channel_stats channel_stats[MLX5E_MAX_NUM_CHANNELS];
 	struct mlx5e_channel_stats trap_stats;
-	struct mlx5e_port_ptp_stats port_ptp_stats;
+	struct mlx5e_ptp_stats     ptp_stats;
 	u16                        max_nch;
 	u8                         max_opened_tc;
-	bool                       port_ptp_opened;
+	bool                       tx_ptp_opened;
 	struct hwtstamp_config     tstamp;
 	u16                        q_counter;
 	u16                        drop_rq_q_counter;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index fb93edf910b9..2bc6d4362670 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -116,8 +116,7 @@ static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
 
 static int mlx5e_ptp_napi_poll(struct napi_struct *napi, int budget)
 {
-	struct mlx5e_port_ptp *c = container_of(napi, struct mlx5e_port_ptp,
-						napi);
+	struct mlx5e_ptp *c = container_of(napi, struct mlx5e_ptp, napi);
 	struct mlx5e_ch_stats *ch_stats = c->stats;
 	bool busy = false;
 	int work_done = 0;
@@ -153,7 +152,7 @@ out:
 	return work_done;
 }
 
-static int mlx5e_ptp_alloc_txqsq(struct mlx5e_port_ptp *c, int txq_ix,
+static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix,
 				 struct mlx5e_params *params,
 				 struct mlx5e_sq_param *param,
 				 struct mlx5e_txqsq *sq, int tc,
@@ -177,7 +176,7 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_port_ptp *c, int txq_ix,
 	sq->uar_map   = mdev->mlx5e_res.hw_objs.bfreg.map;
 	sq->min_inline_mode = params->tx_min_inline_mode;
 	sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
-	sq->stats     = &c->priv->port_ptp_stats.sq[tc];
+	sq->stats     = &c->priv->ptp_stats.sq[tc];
 	sq->ptpsq     = ptpsq;
 	INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
 	if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
@@ -241,7 +240,7 @@ static void mlx5e_ptp_free_traffic_db(struct mlx5e_skb_fifo *skb_fifo)
 	kvfree(skb_fifo->fifo);
 }
 
-static int mlx5e_ptp_open_txqsq(struct mlx5e_port_ptp *c, u32 tisn,
+static int mlx5e_ptp_open_txqsq(struct mlx5e_ptp *c, u32 tisn,
 				int txq_ix, struct mlx5e_ptp_params *cparams,
 				int tc, struct mlx5e_ptpsq *ptpsq)
 {
@@ -291,7 +290,7 @@ static void mlx5e_ptp_close_txqsq(struct mlx5e_ptpsq *ptpsq)
 	mlx5e_free_txqsq(sq);
 }
 
-static int mlx5e_ptp_open_txqsqs(struct mlx5e_port_ptp *c,
+static int mlx5e_ptp_open_txqsqs(struct mlx5e_ptp *c,
 				 struct mlx5e_ptp_params *cparams)
 {
 	struct mlx5e_params *params = &cparams->params;
@@ -319,7 +318,7 @@ close_txqsq:
 	return err;
 }
 
-static void mlx5e_ptp_close_txqsqs(struct mlx5e_port_ptp *c)
+static void mlx5e_ptp_close_txqsqs(struct mlx5e_ptp *c)
 {
 	int tc;
 
@@ -327,7 +326,7 @@ static void mlx5e_ptp_close_txqsqs(struct mlx5e_port_ptp *c)
 		mlx5e_ptp_close_txqsq(&c->ptpsq[tc]);
 }
 
-static int mlx5e_ptp_open_cqs(struct mlx5e_port_ptp *c,
+static int mlx5e_ptp_open_cqs(struct mlx5e_ptp *c,
 			      struct mlx5e_ptp_params *cparams)
 {
 	struct mlx5e_params *params = &cparams->params;
@@ -360,7 +359,7 @@ static int mlx5e_ptp_open_cqs(struct mlx5e_port_ptp *c,
 		if (err)
 			goto out_err_ts_cq;
 
-		ptpsq->cq_stats = &c->priv->port_ptp_stats.cq[tc];
+		ptpsq->cq_stats = &c->priv->ptp_stats.cq[tc];
 	}
 
 	return 0;
@@ -376,7 +375,7 @@ out_err_txqsq_cq:
 	return err;
 }
 
-static void mlx5e_ptp_close_cqs(struct mlx5e_port_ptp *c)
+static void mlx5e_ptp_close_cqs(struct mlx5e_ptp *c)
 {
 	int tc;
 
@@ -402,7 +401,7 @@ static void mlx5e_ptp_build_sq_param(struct mlx5_core_dev *mdev,
 	mlx5e_build_tx_cq_param(mdev, params, &param->cqp);
 }
 
-static void mlx5e_ptp_build_params(struct mlx5e_port_ptp *c,
+static void mlx5e_ptp_build_params(struct mlx5e_ptp *c,
 				   struct mlx5e_ptp_params *cparams,
 				   struct mlx5e_params *orig)
 {
@@ -420,7 +419,7 @@ static void mlx5e_ptp_build_params(struct mlx5e_port_ptp *c,
 	mlx5e_ptp_build_sq_param(c->mdev, params, &cparams->txq_sq_param);
 }
 
-static int mlx5e_ptp_open_queues(struct mlx5e_port_ptp *c,
+static int mlx5e_ptp_open_queues(struct mlx5e_ptp *c,
 				 struct mlx5e_ptp_params *cparams)
 {
 	int err;
@@ -441,19 +440,19 @@ close_cqs:
 	return err;
 }
 
-static void mlx5e_ptp_close_queues(struct mlx5e_port_ptp *c)
+static void mlx5e_ptp_close_queues(struct mlx5e_ptp *c)
 {
 	mlx5e_ptp_close_txqsqs(c);
 	mlx5e_ptp_close_cqs(c);
 }
 
-int mlx5e_port_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
-			u8 lag_port, struct mlx5e_port_ptp **cp)
+int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
+		   u8 lag_port, struct mlx5e_ptp **cp)
 {
 	struct net_device *netdev = priv->netdev;
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_ptp_params *cparams;
-	struct mlx5e_port_ptp *c;
+	struct mlx5e_ptp *c;
 	unsigned int irq;
 	int err;
 	int eqn;
@@ -475,7 +474,7 @@ int mlx5e_port_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
 	c->netdev   = priv->netdev;
 	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key);
 	c->num_tc   = params->num_tc;
-	c->stats    = &priv->port_ptp_stats.ch;
+	c->stats    = &priv->ptp_stats.ch;
 	c->lag_port = lag_port;
 
 	netif_napi_add(netdev, &c->napi, mlx5e_ptp_napi_poll, 64);
@@ -500,7 +499,7 @@ err_napi_del:
 	return err;
 }
 
-void mlx5e_port_ptp_close(struct mlx5e_port_ptp *c)
+void mlx5e_ptp_close(struct mlx5e_ptp *c)
 {
 	mlx5e_ptp_close_queues(c);
 	netif_napi_del(&c->napi);
@@ -508,7 +507,7 @@ void mlx5e_port_ptp_close(struct mlx5e_port_ptp *c)
 	kvfree(c);
 }
 
-void mlx5e_ptp_activate_channel(struct mlx5e_port_ptp *c)
+void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c)
 {
 	int tc;
 
@@ -518,7 +517,7 @@ void mlx5e_ptp_activate_channel(struct mlx5e_port_ptp *c)
 		mlx5e_activate_txqsq(&c->ptpsq[tc].txqsq);
 }
 
-void mlx5e_ptp_deactivate_channel(struct mlx5e_port_ptp *c)
+void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c)
 {
 	int tc;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
index 90c98ea63b7f..4cae06f2c312 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
@@ -17,7 +17,7 @@ struct mlx5e_ptpsq {
 	struct mlx5e_ptp_cq_stats *cq_stats;
 };
 
-struct mlx5e_port_ptp {
+struct mlx5e_ptp {
 	/* data path */
 	struct mlx5e_ptpsq         ptpsq[MLX5E_MAX_NUM_TC];
 	struct napi_struct         napi;
@@ -43,11 +43,11 @@ struct mlx5e_ptp_params {
 	struct mlx5e_sq_param      txq_sq_param;
 };
 
-int mlx5e_port_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
-			u8 lag_port, struct mlx5e_port_ptp **cp);
-void mlx5e_port_ptp_close(struct mlx5e_port_ptp *c);
-void mlx5e_ptp_activate_channel(struct mlx5e_port_ptp *c);
-void mlx5e_ptp_deactivate_channel(struct mlx5e_port_ptp *c);
+int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
+		   u8 lag_port, struct mlx5e_ptp **cp);
+void mlx5e_ptp_close(struct mlx5e_ptp *c);
+void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c);
+void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c);
 
 enum {
 	MLX5E_SKB_CB_CQE_HWTSTAMP  = BIT(0),
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 63ee3b9416de..e107801adf48 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -315,8 +315,8 @@ mlx5e_tx_reporter_diagnose_common_config(struct devlink_health_reporter *reporte
 	if (err)
 		return err;
 
-	generic_ptpsq = priv->channels.port_ptp ?
-			&priv->channels.port_ptp->ptpsq[0] :
+	generic_ptpsq = priv->channels.ptp ?
+			&priv->channels.ptp->ptpsq[0] :
 			NULL;
 	if (!generic_ptpsq)
 		goto out;
@@ -346,7 +346,7 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
 				      struct netlink_ext_ack *extack)
 {
 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
-	struct mlx5e_port_ptp *ptp_ch = priv->channels.port_ptp;
+	struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
 
 	int i, tc, err = 0;
 
@@ -460,7 +460,7 @@ static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fms
 static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv,
 					  struct devlink_fmsg *fmsg)
 {
-	struct mlx5e_port_ptp *ptp_ch = priv->channels.port_ptp;
+	struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
 	struct mlx5_rsc_key key = {};
 	int i, tc, err;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index a2b23cdd2047..cf319f06521d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -2032,7 +2032,7 @@ static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable)
 					 mlx5e_num_channels_changed_ctx, NULL);
 out:
 	if (!err)
-		priv->port_ptp_opened = true;
+		priv->tx_ptp_opened = true;
 
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index df941fe808ab..40a62d2e9558 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2088,8 +2088,7 @@ int mlx5e_open_channels(struct mlx5e_priv *priv,
 	}
 
 	if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS)) {
-		err = mlx5e_port_ptp_open(priv, &chs->params, chs->c[0]->lag_port,
-					  &chs->port_ptp);
+		err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp);
 		if (err)
 			goto err_close_channels;
 	}
@@ -2103,8 +2102,8 @@ int mlx5e_open_channels(struct mlx5e_priv *priv,
 	return 0;
 
 err_close_ptp:
-	if (chs->port_ptp)
-		mlx5e_port_ptp_close(chs->port_ptp);
+	if (chs->ptp)
+		mlx5e_ptp_close(chs->ptp);
 
 err_close_channels:
 	for (i--; i >= 0; i--)
@@ -2124,8 +2123,8 @@ static void mlx5e_activate_channels(struct mlx5e_channels *chs)
 	for (i = 0; i < chs->num; i++)
 		mlx5e_activate_channel(chs->c[i]);
 
-	if (chs->port_ptp)
-		mlx5e_ptp_activate_channel(chs->port_ptp);
+	if (chs->ptp)
+		mlx5e_ptp_activate_channel(chs->ptp);
 }
 
 #define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */
@@ -2152,8 +2151,8 @@ static void mlx5e_deactivate_channels(struct mlx5e_channels *chs)
 {
 	int i;
 
-	if (chs->port_ptp)
-		mlx5e_ptp_deactivate_channel(chs->port_ptp);
+	if (chs->ptp)
+		mlx5e_ptp_deactivate_channel(chs->ptp);
 
 	for (i = 0; i < chs->num; i++)
 		mlx5e_deactivate_channel(chs->c[i]);
@@ -2163,11 +2162,10 @@ void mlx5e_close_channels(struct mlx5e_channels *chs)
 {
 	int i;
 
-	if (chs->port_ptp) {
-		mlx5e_port_ptp_close(chs->port_ptp);
-		chs->port_ptp = NULL;
+	if (chs->ptp) {
+		mlx5e_ptp_close(chs->ptp);
+		chs->ptp = NULL;
 	}
-
 	for (i = 0; i < chs->num; i++)
 		mlx5e_close_channel(chs->c[i]);
 
@@ -2758,11 +2756,11 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv)
 		}
 	}
 
-	if (!priv->channels.port_ptp)
+	if (!priv->channels.ptp)
 		return;
 
 	for (tc = 0; tc < num_tc; tc++) {
-		struct mlx5e_port_ptp *c = priv->channels.port_ptp;
+		struct mlx5e_ptp *c = priv->channels.ptp;
 		struct mlx5e_txqsq *sq = &c->ptpsq[tc].txqsq;
 
 		priv->txq2sq[sq->txq_ix] = sq;
@@ -3486,9 +3484,9 @@ void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s)
 			s->tx_dropped    += sq_stats->dropped;
 		}
 	}
-	if (priv->port_ptp_opened) {
+	if (priv->tx_ptp_opened) {
 		for (i = 0; i < priv->max_opened_tc; i++) {
-			struct mlx5e_sq_stats *sq_stats = &priv->port_ptp_stats.sq[i];
+			struct mlx5e_sq_stats *sq_stats = &priv->ptp_stats.sq[i];
 
 			s->tx_packets    += sq_stats->packets;
 			s->tx_bytes      += sq_stats->bytes;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 92c5b81427b9..32331ac9288c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -407,13 +407,13 @@ static void mlx5e_stats_grp_sw_update_stats_ptp(struct mlx5e_priv *priv,
 {
 	int i;
 
-	if (!priv->port_ptp_opened)
+	if (!priv->tx_ptp_opened)
 		return;
 
-	mlx5e_stats_grp_sw_update_stats_ch_stats(s, &priv->port_ptp_stats.ch);
+	mlx5e_stats_grp_sw_update_stats_ch_stats(s, &priv->ptp_stats.ch);
 
 	for (i = 0; i < priv->max_opened_tc; i++) {
-		mlx5e_stats_grp_sw_update_stats_sq(s, &priv->port_ptp_stats.sq[i]);
+		mlx5e_stats_grp_sw_update_stats_sq(s, &priv->ptp_stats.sq[i]);
 
 		/* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */
 		barrier();
@@ -1851,7 +1851,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(qos) { return; }
 
 static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ptp)
 {
-	return priv->port_ptp_opened ?
+	return priv->tx_ptp_opened ?
 	       NUM_PTP_CH_STATS +
 	       ((NUM_PTP_SQ_STATS + NUM_PTP_CQ_STATS) * priv->max_opened_tc) :
 	       0;
@@ -1861,7 +1861,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ptp)
 {
 	int i, tc;
 
-	if (!priv->port_ptp_opened)
+	if (!priv->tx_ptp_opened)
 		return idx;
 
 	for (i = 0; i < NUM_PTP_CH_STATS; i++)
@@ -1884,24 +1884,24 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ptp)
 {
 	int i, tc;
 
-	if (!priv->port_ptp_opened)
+	if (!priv->tx_ptp_opened)
 		return idx;
 
 	for (i = 0; i < NUM_PTP_CH_STATS; i++)
 		data[idx++] =
-			MLX5E_READ_CTR64_CPU(&priv->port_ptp_stats.ch,
+			MLX5E_READ_CTR64_CPU(&priv->ptp_stats.ch,
 					     ptp_ch_stats_desc, i);
 
 	for (tc = 0; tc < priv->max_opened_tc; tc++)
 		for (i = 0; i < NUM_PTP_SQ_STATS; i++)
 			data[idx++] =
-				MLX5E_READ_CTR64_CPU(&priv->port_ptp_stats.sq[tc],
+				MLX5E_READ_CTR64_CPU(&priv->ptp_stats.sq[tc],
 						     ptp_sq_stats_desc, i);
 
 	for (tc = 0; tc < priv->max_opened_tc; tc++)
 		for (i = 0; i < NUM_PTP_CQ_STATS; i++)
 			data[idx++] =
-				MLX5E_READ_CTR64_CPU(&priv->port_ptp_stats.cq[tc],
+				MLX5E_READ_CTR64_CPU(&priv->ptp_stats.cq[tc],
 						     ptp_cq_stats_desc, i);
 
 	return idx;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index d2efe2455955..cfb6ffd7df54 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -142,7 +142,7 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
 				return txq_ix;
 		}
 
-		if (unlikely(priv->channels.port_ptp))
+		if (unlikely(priv->channels.ptp))
 			if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
 			    mlx5e_use_ptpsq(skb))
 				return mlx5e_select_ptpsq(dev, skb);
-- 
cgit v1.2.3


From e569cbd729245972cf9ef8469c3cb502892e5724 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Sun, 17 Jan 2021 15:25:27 +0200
Subject: net/mlx5e: Cleanup PTP

Reduce scope of mlx5e_ptp_params, move to its c file. Remove unneeded
variables from mlx5e_ptp_open and state bitmap from PTP channel. In
addition, remove channel index from PTP channel since it is set to a
hard coded value, use define instead.

Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 18 ++++++++++--------
 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h |  8 --------
 2 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index 2bc6d4362670..92a41b1bcdb0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -3,6 +3,14 @@
 
 #include "en/ptp.h"
 #include "en/txrx.h"
+#include "en/params.h"
+
+#define MLX5E_PTP_CHANNEL_IX 0
+
+struct mlx5e_ptp_params {
+	struct mlx5e_params params;
+	struct mlx5e_sq_param txq_sq_param;
+};
 
 struct mlx5e_skb_cb_hwtstamp {
 	ktime_t cqe_hwtstamp;
@@ -171,7 +179,7 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix,
 	sq->netdev    = c->netdev;
 	sq->priv      = c->priv;
 	sq->mdev      = mdev;
-	sq->ch_ix     = c->ix;
+	sq->ch_ix     = MLX5E_PTP_CHANNEL_IX;
 	sq->txq_ix    = txq_ix;
 	sq->uar_map   = mdev->mlx5e_res.hw_objs.bfreg.map;
 	sq->min_inline_mode = params->tx_min_inline_mode;
@@ -339,7 +347,7 @@ static int mlx5e_ptp_open_cqs(struct mlx5e_ptp *c,
 	ccp.node     = dev_to_node(mlx5_core_dma_dev(c->mdev));
 	ccp.ch_stats = c->stats;
 	ccp.napi     = &c->napi;
-	ccp.ix       = c->ix;
+	ccp.ix       = MLX5E_PTP_CHANNEL_IX;
 
 	cq_param = &cparams->txq_sq_param.cqp;
 
@@ -453,13 +461,8 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_ptp_params *cparams;
 	struct mlx5e_ptp *c;
-	unsigned int irq;
 	int err;
-	int eqn;
 
-	err = mlx5_vector2eqn(priv->mdev, 0, &eqn, &irq);
-	if (err)
-		return err;
 
 	c = kvzalloc_node(sizeof(*c), GFP_KERNEL, dev_to_node(mlx5_core_dma_dev(mdev)));
 	cparams = kvzalloc(sizeof(*cparams), GFP_KERNEL);
@@ -469,7 +472,6 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
 	c->priv     = priv;
 	c->mdev     = priv->mdev;
 	c->tstamp   = &priv->tstamp;
-	c->ix       = 0;
 	c->pdev     = mlx5_core_dma_dev(priv->mdev);
 	c->netdev   = priv->netdev;
 	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
index 4cae06f2c312..937530afaf14 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
@@ -5,7 +5,6 @@
 #define __MLX5_EN_PTP_H__
 
 #include "en.h"
-#include "en/params.h"
 #include "en_stats.h"
 
 struct mlx5e_ptpsq {
@@ -34,13 +33,6 @@ struct mlx5e_ptp {
 	struct mlx5e_priv         *priv;
 	struct mlx5_core_dev      *mdev;
 	struct hwtstamp_config    *tstamp;
-	DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES);
-	int                        ix;
-};
-
-struct mlx5e_ptp_params {
-	struct mlx5e_params        params;
-	struct mlx5e_sq_param      txq_sq_param;
 };
 
 int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
-- 
cgit v1.2.3


From 31a91220a27d49b138af3b67d9252494ef810a18 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 15 Mar 2021 12:30:04 +0000
Subject: net/mlx5: Fix spelling mistakes in mlx5_core_info message

There are two spelling mistakes in a mlx5_core_info message. Fix them.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/health.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index a0a851640804..9ff163c5bcde 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -340,7 +340,7 @@ static int mlx5_health_try_recover(struct mlx5_core_dev *dev)
 		return -EIO;
 	}
 
-	mlx5_core_info(dev, "health revovery succeded\n");
+	mlx5_core_info(dev, "health recovery succeeded\n");
 	return 0;
 }
 
-- 
cgit v1.2.3


From b0c407ec503b8e97bbffd65cc4bed39e008c192f Mon Sep 17 00:00:00 2001
From: Lu Wei <luwei32@huawei.com>
Date: Wed, 24 Mar 2021 16:31:47 +0800
Subject: bpf: Remove unused headers

The header <linux/version.h> is useless in sampleip_kern.c
and trace_event_kern.c, remove it.

Signed-off-by: Lu Wei <luwei32@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210324083147.149278-1-luwei32@huawei.com
---
 samples/bpf/sampleip_kern.c    | 1 -
 samples/bpf/trace_event_kern.c | 1 -
 2 files changed, 2 deletions(-)

diff --git a/samples/bpf/sampleip_kern.c b/samples/bpf/sampleip_kern.c
index f24806ac24e7..a3f8a3998e0a 100644
--- a/samples/bpf/sampleip_kern.c
+++ b/samples/bpf/sampleip_kern.c
@@ -4,7 +4,6 @@
  * modify it under the terms of version 2 of the GNU General Public
  * License as published by the Free Software Foundation.
  */
-#include <linux/version.h>
 #include <linux/ptrace.h>
 #include <uapi/linux/bpf.h>
 #include <uapi/linux/bpf_perf_event.h>
diff --git a/samples/bpf/trace_event_kern.c b/samples/bpf/trace_event_kern.c
index 7d3c66fb3f88..0bba5fcd7d24 100644
--- a/samples/bpf/trace_event_kern.c
+++ b/samples/bpf/trace_event_kern.c
@@ -5,7 +5,6 @@
  * License as published by the Free Software Foundation.
  */
 #include <linux/ptrace.h>
-#include <linux/version.h>
 #include <uapi/linux/bpf.h>
 #include <uapi/linux/bpf_perf_event.h>
 #include <uapi/linux/perf_event.h>
-- 
cgit v1.2.3


From fcb8d0d7587e0f2b7439d6c14a380fd17a450f96 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Thu, 25 Mar 2021 15:06:02 +0800
Subject: bpf: struct sock is declared twice in bpf_sk_storage header

struct sock has been declared twice, therefore remove the duplicate.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210325070602.858024-1-wanjiabing@vivo.com
---
 include/net/bpf_sk_storage.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h
index 0e85713f56df..2926f1f00d65 100644
--- a/include/net/bpf_sk_storage.h
+++ b/include/net/bpf_sk_storage.h
@@ -27,7 +27,6 @@ struct bpf_local_storage_elem;
 struct bpf_sk_storage_diag;
 struct sk_buff;
 struct nlattr;
-struct sock;
 
 #ifdef CONFIG_BPF_SYSCALL
 int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk);
-- 
cgit v1.2.3


From 36e7985160782bc683001afe09e33a288435def0 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 25 Mar 2021 21:30:36 -0700
Subject: libbpf: Preserve empty DATASEC BTFs during static linking

Ensure that BPF static linker preserves all DATASEC BTF types, even if some of
them might not have any variable information at all. This may happen if the
compiler promotes local initialized variable contents into .rodata section and
there are no global or static functions in the program.

For example,

  $ cat t.c
  struct t { char a; char b; char c; };
  void bar(struct t*);
  void find() {
     struct t tmp = {1, 2, 3};
     bar(&tmp);
  }

  $ clang -target bpf -O2 -g -S t.c
         .long   104                             # BTF_KIND_DATASEC(id = 8)
         .long   251658240                       # 0xf000000
         .long   0

         .ascii  ".rodata"                       # string offset=104

  $ clang -target bpf -O2 -g -c t.c
  $ readelf -S t.o | grep data
     [ 4] .rodata           PROGBITS         0000000000000000  00000090

Fixes: 8fd27bf69b86 ("libbpf: Add BPF static linker BTF and BTF.ext support")
Reported-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210326043036.3081011-1-andrii@kernel.org
---
 tools/lib/bpf/linker.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 5e0aa2f2c0ca..a29d62ff8041 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -94,6 +94,7 @@ struct dst_sec {
 	int sec_sym_idx;
 
 	/* section's DATASEC variable info, emitted on BTF finalization */
+	bool has_btf;
 	int sec_var_cnt;
 	struct btf_var_secinfo *sec_vars;
 
@@ -1436,6 +1437,16 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj)
 			continue;
 		dst_sec = &linker->secs[src_sec->dst_id];
 
+		/* Mark section as having BTF regardless of the presence of
+		 * variables. In some cases compiler might generate empty BTF
+		 * with no variables information. E.g., when promoting local
+		 * array/structure variable initial values and BPF object
+		 * file otherwise has no read-only static variables in
+		 * .rodata. We need to preserve such empty BTF and just set
+		 * correct section size.
+		 */
+		dst_sec->has_btf = true;
+
 		t = btf__type_by_id(obj->btf, src_sec->sec_type_id);
 		src_var = btf_var_secinfos(t);
 		n = btf_vlen(t);
@@ -1717,7 +1728,7 @@ static int finalize_btf(struct bpf_linker *linker)
 	for (i = 1; i < linker->sec_cnt; i++) {
 		struct dst_sec *sec = &linker->secs[i];
 
-		if (!sec->sec_var_cnt)
+		if (!sec->has_btf)
 			continue;
 
 		id = btf__add_datasec(btf, sec->sec_name, sec->sec_sz);
-- 
cgit v1.2.3


From 353cac0e108f0484b101fd8cc6c2c0c5d9100ca6 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sun, 14 Mar 2021 14:49:38 +0100
Subject: Bluetooth: Fix mgmt status for LL Privacy experimental feature

The return error when trying to change the setting when a controller is
powered up, shall be MGMT_STATUS_REJECTED. However instead now the error
MGMT_STATUS_NOT_POWERED is used which is exactly the opposite.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 net/bluetooth/mgmt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 90334ac4a135..3df6f9cf23f3 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -3982,7 +3982,7 @@ static int set_exp_feature(struct sock *sk, struct hci_dev *hdev,
 		if (hdev_is_powered(hdev))
 			return mgmt_cmd_status(sk, hdev->id,
 					       MGMT_OP_SET_EXP_FEATURE,
-					       MGMT_STATUS_NOT_POWERED);
+					       MGMT_STATUS_REJECTED);
 
 		/* Parameters are limited to a single octet */
 		if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1)
-- 
cgit v1.2.3


From 21dd118f8de318df2bebfcd44a722168bb705be7 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sun, 14 Mar 2021 14:56:08 +0100
Subject: Bluetooth: Fix wrong opcode error for read advertising features

The read advertising features error handling returns false the opcode
for the set advertising command.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 net/bluetooth/mgmt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 3df6f9cf23f3..51c7e643669a 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -7476,7 +7476,7 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev,
 	 * advertising.
 	 */
 	if (hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY))
-		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING,
+		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_READ_ADV_FEATURES,
 				       MGMT_STATUS_NOT_SUPPORTED);
 
 	hci_dev_lock(hdev);
-- 
cgit v1.2.3


From 02431b6cdb753e099df32a337f083c9502ceb0a0 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 24 Mar 2021 15:10:55 +0100
Subject: Bluetooth: Add missing entries for PHY configuration commands

The list of supported mgmt commands for PHY configuration is missing, so
just add them.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 net/bluetooth/mgmt.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 51c7e643669a..939254924a7b 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -108,6 +108,8 @@ static const u16 mgmt_commands[] = {
 	MGMT_OP_START_LIMITED_DISCOVERY,
 	MGMT_OP_READ_EXT_INFO,
 	MGMT_OP_SET_APPEARANCE,
+	MGMT_OP_GET_PHY_CONFIGURATION,
+	MGMT_OP_SET_PHY_CONFIGURATION,
 	MGMT_OP_SET_BLOCKED_KEYS,
 	MGMT_OP_SET_WIDEBAND_SPEECH,
 	MGMT_OP_READ_CONTROLLER_CAP,
-- 
cgit v1.2.3


From 3d34a71ff8f8f95abd757c4fac70f07091b13314 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 24 Mar 2021 15:10:56 +0100
Subject: Bluetooth: Move the advertisement monitor events to correct list

The list of trusted events should contain the advertisement monitor
events and not the untrusted one, so move entries to the correct list.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 net/bluetooth/mgmt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 939254924a7b..2426a0d38c80 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -168,6 +168,8 @@ static const u16 mgmt_events[] = {
 	MGMT_EV_PHY_CONFIGURATION_CHANGED,
 	MGMT_EV_EXP_FEATURE_CHANGED,
 	MGMT_EV_DEVICE_FLAGS_CHANGED,
+	MGMT_EV_ADV_MONITOR_ADDED,
+	MGMT_EV_ADV_MONITOR_REMOVED,
 	MGMT_EV_CONTROLLER_SUSPEND,
 	MGMT_EV_CONTROLLER_RESUME,
 };
@@ -198,8 +200,6 @@ static const u16 mgmt_untrusted_events[] = {
 	MGMT_EV_EXT_INDEX_REMOVED,
 	MGMT_EV_EXT_INFO_CHANGED,
 	MGMT_EV_EXP_FEATURE_CHANGED,
-	MGMT_EV_ADV_MONITOR_ADDED,
-	MGMT_EV_ADV_MONITOR_REMOVED,
 };
 
 #define CACHE_TIMEOUT	msecs_to_jiffies(2 * 1000)
-- 
cgit v1.2.3


From d58cf00dcedb9882ba6e933443371444d8a23b77 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 24 Mar 2021 15:10:59 +0100
Subject: Bluetooth: Increment management interface revision

Increment the mgmt revision due to recent changes.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 net/bluetooth/mgmt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 2426a0d38c80..09e099c419f2 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -40,7 +40,7 @@
 #include "msft.h"
 
 #define MGMT_VERSION	1
-#define MGMT_REVISION	19
+#define MGMT_REVISION	20
 
 static const u16 mgmt_commands[] = {
 	MGMT_OP_READ_INDEX_LIST,
-- 
cgit v1.2.3


From 43f8b9333d86d4e3a42e55a6e41c78c249ac0216 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Fri, 26 Mar 2021 09:36:20 +0800
Subject: net: hns3: remove unused code of vmdq

Vmdq is not supported yet, the num_vmdq_vport is always 0,
it's a bit confusing when using the num_vport, so remove
these unused codes of vmdq.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h |   2 -
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 240 +++++++++------------
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h    |   2 -
 .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h  |   1 -
 4 files changed, 96 insertions(+), 149 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 565c5aa54f88..7feab8405d7c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -506,8 +506,6 @@ struct hclge_pf_res_cmd {
 #define HCLGE_CFG_RD_LEN_BYTES	16
 #define HCLGE_CFG_RD_LEN_UNIT	4
 
-#define HCLGE_CFG_VMDQ_S	0
-#define HCLGE_CFG_VMDQ_M	GENMASK(7, 0)
 #define HCLGE_CFG_TC_NUM_S	8
 #define HCLGE_CFG_TC_NUM_M	GENMASK(15, 8)
 #define HCLGE_CFG_TQP_DESC_N_S	16
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 058317ce579c..4967e7200d57 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1292,9 +1292,6 @@ static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
 	req = (struct hclge_cfg_param_cmd *)desc[0].data;
 
 	/* get the configuration */
-	cfg->vmdq_vport_num = hnae3_get_field(__le32_to_cpu(req->param[0]),
-					      HCLGE_CFG_VMDQ_M,
-					      HCLGE_CFG_VMDQ_S);
 	cfg->tc_num = hnae3_get_field(__le32_to_cpu(req->param[0]),
 				      HCLGE_CFG_TC_NUM_M, HCLGE_CFG_TC_NUM_S);
 	cfg->tqp_desc_num = hnae3_get_field(__le32_to_cpu(req->param[0]),
@@ -1511,7 +1508,7 @@ static void hclge_init_kdump_kernel_config(struct hclge_dev *hdev)
 		 "Running kdump kernel. Using minimal resources\n");
 
 	/* minimal queue pairs equals to the number of vports */
-	hdev->num_tqps = hdev->num_vmdq_vport + hdev->num_req_vfs + 1;
+	hdev->num_tqps = hdev->num_req_vfs + 1;
 	hdev->num_tx_desc = HCLGE_MIN_TX_DESC;
 	hdev->num_rx_desc = HCLGE_MIN_RX_DESC;
 }
@@ -1526,7 +1523,6 @@ static int hclge_configure(struct hclge_dev *hdev)
 	if (ret)
 		return ret;
 
-	hdev->num_vmdq_vport = cfg.vmdq_vport_num;
 	hdev->base_tqp_pid = 0;
 	hdev->vf_rss_size_max = cfg.vf_rss_size_max;
 	hdev->pf_rss_size_max = cfg.pf_rss_size_max;
@@ -1777,7 +1773,7 @@ static int hclge_map_tqp(struct hclge_dev *hdev)
 	struct hclge_vport *vport = hdev->vport;
 	u16 i, num_vport;
 
-	num_vport = hdev->num_vmdq_vport + hdev->num_req_vfs + 1;
+	num_vport = hdev->num_req_vfs + 1;
 	for (i = 0; i < num_vport; i++)	{
 		int ret;
 
@@ -1819,7 +1815,7 @@ static int hclge_alloc_vport(struct hclge_dev *hdev)
 	int ret;
 
 	/* We need to alloc a vport for main NIC of PF */
-	num_vport = hdev->num_vmdq_vport + hdev->num_req_vfs + 1;
+	num_vport = hdev->num_req_vfs + 1;
 
 	if (hdev->num_tqps < num_vport) {
 		dev_err(&hdev->pdev->dev, "tqps(%u) is less than vports(%d)",
@@ -2889,13 +2885,12 @@ static int hclge_get_mac_phy_link(struct hclge_dev *hdev, int *link_status)
 
 static void hclge_update_link_status(struct hclge_dev *hdev)
 {
+	struct hnae3_handle *rhandle = &hdev->vport[0].roce;
+	struct hnae3_handle *handle = &hdev->vport[0].nic;
 	struct hnae3_client *rclient = hdev->roce_client;
 	struct hnae3_client *client = hdev->nic_client;
-	struct hnae3_handle *rhandle;
-	struct hnae3_handle *handle;
 	int state;
 	int ret;
-	int i;
 
 	if (!client)
 		return;
@@ -2910,15 +2905,11 @@ static void hclge_update_link_status(struct hclge_dev *hdev)
 	}
 
 	if (state != hdev->hw.mac.link) {
-		for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
-			handle = &hdev->vport[i].nic;
-			client->ops->link_status_change(handle, state);
-			hclge_config_mac_tnl_int(hdev, state);
-			rhandle = &hdev->vport[i].roce;
-			if (rclient && rclient->ops->link_status_change)
-				rclient->ops->link_status_change(rhandle,
-								 state);
-		}
+		client->ops->link_status_change(handle, state);
+		hclge_config_mac_tnl_int(hdev, state);
+		if (rclient && rclient->ops->link_status_change)
+			rclient->ops->link_status_change(rhandle, state);
+
 		hdev->hw.mac.link = state;
 	}
 
@@ -3498,8 +3489,9 @@ static void hclge_misc_irq_uninit(struct hclge_dev *hdev)
 int hclge_notify_client(struct hclge_dev *hdev,
 			enum hnae3_reset_notify_type type)
 {
+	struct hnae3_handle *handle = &hdev->vport[0].nic;
 	struct hnae3_client *client = hdev->nic_client;
-	u16 i;
+	int ret;
 
 	if (!test_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state) || !client)
 		return 0;
@@ -3507,27 +3499,20 @@ int hclge_notify_client(struct hclge_dev *hdev,
 	if (!client->ops->reset_notify)
 		return -EOPNOTSUPP;
 
-	for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
-		struct hnae3_handle *handle = &hdev->vport[i].nic;
-		int ret;
-
-		ret = client->ops->reset_notify(handle, type);
-		if (ret) {
-			dev_err(&hdev->pdev->dev,
-				"notify nic client failed %d(%d)\n", type, ret);
-			return ret;
-		}
-	}
+	ret = client->ops->reset_notify(handle, type);
+	if (ret)
+		dev_err(&hdev->pdev->dev, "notify nic client failed %d(%d)\n",
+			type, ret);
 
-	return 0;
+	return ret;
 }
 
 static int hclge_notify_roce_client(struct hclge_dev *hdev,
 				    enum hnae3_reset_notify_type type)
 {
+	struct hnae3_handle *handle = &hdev->vport[0].roce;
 	struct hnae3_client *client = hdev->roce_client;
 	int ret;
-	u16 i;
 
 	if (!test_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state) || !client)
 		return 0;
@@ -3535,17 +3520,10 @@ static int hclge_notify_roce_client(struct hclge_dev *hdev,
 	if (!client->ops->reset_notify)
 		return -EOPNOTSUPP;
 
-	for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
-		struct hnae3_handle *handle = &hdev->vport[i].roce;
-
-		ret = client->ops->reset_notify(handle, type);
-		if (ret) {
-			dev_err(&hdev->pdev->dev,
-				"notify roce client failed %d(%d)",
-				type, ret);
-			return ret;
-		}
-	}
+	ret = client->ops->reset_notify(handle, type);
+	if (ret)
+		dev_err(&hdev->pdev->dev, "notify roce client failed %d(%d)",
+			type, ret);
 
 	return ret;
 }
@@ -3613,7 +3591,7 @@ static int hclge_set_all_vf_rst(struct hclge_dev *hdev, bool reset)
 {
 	int i;
 
-	for (i = hdev->num_vmdq_vport + 1; i < hdev->num_alloc_vport; i++) {
+	for (i = HCLGE_VF_VPORT_START_NUM; i < hdev->num_alloc_vport; i++) {
 		struct hclge_vport *vport = &hdev->vport[i];
 		int ret;
 
@@ -3694,14 +3672,12 @@ void hclge_report_hw_error(struct hclge_dev *hdev,
 			   enum hnae3_hw_error_type type)
 {
 	struct hnae3_client *client = hdev->nic_client;
-	u16 i;
 
 	if (!client || !client->ops->process_hw_error ||
 	    !test_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state))
 		return;
 
-	for (i = 0; i < hdev->num_vmdq_vport + 1; i++)
-		client->ops->process_hw_error(&hdev->vport[i].nic, type);
+	client->ops->process_hw_error(&hdev->vport[0].nic, type);
 }
 
 static void hclge_handle_imp_error(struct hclge_dev *hdev)
@@ -4913,58 +4889,44 @@ int hclge_rss_init_hw(struct hclge_dev *hdev)
 
 void hclge_rss_indir_init_cfg(struct hclge_dev *hdev)
 {
-	struct hclge_vport *vport = hdev->vport;
-	int i, j;
+	struct hclge_vport *vport = &hdev->vport[0];
+	int i;
 
-	for (j = 0; j < hdev->num_vmdq_vport + 1; j++) {
-		for (i = 0; i < hdev->ae_dev->dev_specs.rss_ind_tbl_size; i++)
-			vport[j].rss_indirection_tbl[i] =
-				i % vport[j].alloc_rss_size;
-	}
+	for (i = 0; i < hdev->ae_dev->dev_specs.rss_ind_tbl_size; i++)
+		vport->rss_indirection_tbl[i] = i % vport->alloc_rss_size;
 }
 
 static int hclge_rss_init_cfg(struct hclge_dev *hdev)
 {
 	u16 rss_ind_tbl_size = hdev->ae_dev->dev_specs.rss_ind_tbl_size;
-	int i, rss_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
-	struct hclge_vport *vport = hdev->vport;
+	int rss_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
+	struct hclge_vport *vport = &hdev->vport[0];
+	u16 *rss_ind_tbl;
 
 	if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2)
 		rss_algo = HCLGE_RSS_HASH_ALGO_SIMPLE;
 
-	for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
-		u16 *rss_ind_tbl;
-
-		vport[i].rss_tuple_sets.ipv4_tcp_en =
-			HCLGE_RSS_INPUT_TUPLE_OTHER;
-		vport[i].rss_tuple_sets.ipv4_udp_en =
-			HCLGE_RSS_INPUT_TUPLE_OTHER;
-		vport[i].rss_tuple_sets.ipv4_sctp_en =
-			HCLGE_RSS_INPUT_TUPLE_SCTP;
-		vport[i].rss_tuple_sets.ipv4_fragment_en =
-			HCLGE_RSS_INPUT_TUPLE_OTHER;
-		vport[i].rss_tuple_sets.ipv6_tcp_en =
-			HCLGE_RSS_INPUT_TUPLE_OTHER;
-		vport[i].rss_tuple_sets.ipv6_udp_en =
-			HCLGE_RSS_INPUT_TUPLE_OTHER;
-		vport[i].rss_tuple_sets.ipv6_sctp_en =
-			hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 ?
-			HCLGE_RSS_INPUT_TUPLE_SCTP_NO_PORT :
-			HCLGE_RSS_INPUT_TUPLE_SCTP;
-		vport[i].rss_tuple_sets.ipv6_fragment_en =
-			HCLGE_RSS_INPUT_TUPLE_OTHER;
-
-		vport[i].rss_algo = rss_algo;
-
-		rss_ind_tbl = devm_kcalloc(&hdev->pdev->dev, rss_ind_tbl_size,
-					   sizeof(*rss_ind_tbl), GFP_KERNEL);
-		if (!rss_ind_tbl)
-			return -ENOMEM;
+	vport->rss_tuple_sets.ipv4_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+	vport->rss_tuple_sets.ipv4_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+	vport->rss_tuple_sets.ipv4_sctp_en = HCLGE_RSS_INPUT_TUPLE_SCTP;
+	vport->rss_tuple_sets.ipv4_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+	vport->rss_tuple_sets.ipv6_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+	vport->rss_tuple_sets.ipv6_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+	vport->rss_tuple_sets.ipv6_sctp_en =
+		hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 ?
+		HCLGE_RSS_INPUT_TUPLE_SCTP_NO_PORT :
+		HCLGE_RSS_INPUT_TUPLE_SCTP;
+	vport->rss_tuple_sets.ipv6_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+
+	vport->rss_algo = rss_algo;
+
+	rss_ind_tbl = devm_kcalloc(&hdev->pdev->dev, rss_ind_tbl_size,
+				   sizeof(*rss_ind_tbl), GFP_KERNEL);
+	if (!rss_ind_tbl)
+		return -ENOMEM;
 
-		vport[i].rss_indirection_tbl = rss_ind_tbl;
-		memcpy(vport[i].rss_hash_key, hclge_hash_key,
-		       HCLGE_RSS_KEY_SIZE);
-	}
+	vport->rss_indirection_tbl = rss_ind_tbl;
+	memcpy(vport->rss_hash_key, hclge_hash_key, HCLGE_RSS_KEY_SIZE);
 
 	hclge_rss_indir_init_cfg(hdev);
 
@@ -9130,7 +9092,7 @@ static bool hclge_check_vf_mac_exist(struct hclge_vport *vport, int vf_idx,
 		return true;
 
 	vf_idx += HCLGE_VF_VPORT_START_NUM;
-	for (i = hdev->num_vmdq_vport + 1; i < hdev->num_alloc_vport; i++)
+	for (i = HCLGE_VF_VPORT_START_NUM; i < hdev->num_alloc_vport; i++)
 		if (i != vf_idx &&
 		    ether_addr_equal(mac_addr, hdev->vport[i].vf_info.mac))
 			return true;
@@ -10771,7 +10733,6 @@ static void hclge_info_show(struct hclge_dev *hdev)
 	dev_info(dev, "Desc num per TX queue: %u\n", hdev->num_tx_desc);
 	dev_info(dev, "Desc num per RX queue: %u\n", hdev->num_rx_desc);
 	dev_info(dev, "Numbers of vports: %u\n", hdev->num_alloc_vport);
-	dev_info(dev, "Numbers of vmdp vports: %u\n", hdev->num_vmdq_vport);
 	dev_info(dev, "Numbers of VF for this PF: %u\n", hdev->num_req_vfs);
 	dev_info(dev, "HW tc map: 0x%x\n", hdev->hw_tc_map);
 	dev_info(dev, "Total buffer size for TX/RX: %u\n", hdev->pkt_buf_size);
@@ -10886,39 +10847,35 @@ static int hclge_init_client_instance(struct hnae3_client *client,
 				      struct hnae3_ae_dev *ae_dev)
 {
 	struct hclge_dev *hdev = ae_dev->priv;
-	struct hclge_vport *vport;
-	int i, ret;
-
-	for (i = 0; i <  hdev->num_vmdq_vport + 1; i++) {
-		vport = &hdev->vport[i];
+	struct hclge_vport *vport = &hdev->vport[0];
+	int ret;
 
-		switch (client->type) {
-		case HNAE3_CLIENT_KNIC:
-			hdev->nic_client = client;
-			vport->nic.client = client;
-			ret = hclge_init_nic_client_instance(ae_dev, vport);
-			if (ret)
-				goto clear_nic;
+	switch (client->type) {
+	case HNAE3_CLIENT_KNIC:
+		hdev->nic_client = client;
+		vport->nic.client = client;
+		ret = hclge_init_nic_client_instance(ae_dev, vport);
+		if (ret)
+			goto clear_nic;
 
-			ret = hclge_init_roce_client_instance(ae_dev, vport);
-			if (ret)
-				goto clear_roce;
+		ret = hclge_init_roce_client_instance(ae_dev, vport);
+		if (ret)
+			goto clear_roce;
 
-			break;
-		case HNAE3_CLIENT_ROCE:
-			if (hnae3_dev_roce_supported(hdev)) {
-				hdev->roce_client = client;
-				vport->roce.client = client;
-			}
+		break;
+	case HNAE3_CLIENT_ROCE:
+		if (hnae3_dev_roce_supported(hdev)) {
+			hdev->roce_client = client;
+			vport->roce.client = client;
+		}
 
-			ret = hclge_init_roce_client_instance(ae_dev, vport);
-			if (ret)
-				goto clear_roce;
+		ret = hclge_init_roce_client_instance(ae_dev, vport);
+		if (ret)
+			goto clear_roce;
 
-			break;
-		default:
-			return -EINVAL;
-		}
+		break;
+	default:
+		return -EINVAL;
 	}
 
 	return 0;
@@ -10937,32 +10894,27 @@ static void hclge_uninit_client_instance(struct hnae3_client *client,
 					 struct hnae3_ae_dev *ae_dev)
 {
 	struct hclge_dev *hdev = ae_dev->priv;
-	struct hclge_vport *vport;
-	int i;
+	struct hclge_vport *vport = &hdev->vport[0];
 
-	for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
-		vport = &hdev->vport[i];
-		if (hdev->roce_client) {
-			clear_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state);
-			while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
-				msleep(HCLGE_WAIT_RESET_DONE);
-
-			hdev->roce_client->ops->uninit_instance(&vport->roce,
-								0);
-			hdev->roce_client = NULL;
-			vport->roce.client = NULL;
-		}
-		if (client->type == HNAE3_CLIENT_ROCE)
-			return;
-		if (hdev->nic_client && client->ops->uninit_instance) {
-			clear_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state);
-			while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
-				msleep(HCLGE_WAIT_RESET_DONE);
-
-			client->ops->uninit_instance(&vport->nic, 0);
-			hdev->nic_client = NULL;
-			vport->nic.client = NULL;
-		}
+	if (hdev->roce_client) {
+		clear_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state);
+		while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+			msleep(HCLGE_WAIT_RESET_DONE);
+
+		hdev->roce_client->ops->uninit_instance(&vport->roce, 0);
+		hdev->roce_client = NULL;
+		vport->roce.client = NULL;
+	}
+	if (client->type == HNAE3_CLIENT_ROCE)
+		return;
+	if (hdev->nic_client && client->ops->uninit_instance) {
+		clear_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state);
+		while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+			msleep(HCLGE_WAIT_RESET_DONE);
+
+		client->ops->uninit_instance(&vport->nic, 0);
+		hdev->nic_client = NULL;
+		vport->nic.client = NULL;
 	}
 }
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 97e77e2f7539..dc3d29d41848 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -348,7 +348,6 @@ struct hclge_tc_info {
 };
 
 struct hclge_cfg {
-	u8 vmdq_vport_num;
 	u8 tc_num;
 	u16 tqp_desc_num;
 	u16 rx_buf_len;
@@ -811,7 +810,6 @@ struct hclge_dev {
 	struct hclge_rst_stats rst_stats;
 	struct semaphore reset_sem;	/* protect reset process */
 	u32 fw_version;
-	u16 num_vmdq_vport;		/* Num vmdq vport this PF has set up */
 	u16 num_tqps;			/* Num task queue pairs of this PF */
 	u16 num_req_vfs;		/* Num VFs requested for this PF */
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index 8c27ecd819af..ade6e7f5be5b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -192,7 +192,6 @@ struct hclgevf_tqp {
 };
 
 struct hclgevf_cfg {
-	u8 vmdq_vport_num;
 	u8 tc_num;
 	u16 tqp_desc_num;
 	u16 rx_buf_len;
-- 
cgit v1.2.3


From c0127115ee2329dd57a65dceb139ec7cc39f48c7 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 26 Mar 2021 09:36:21 +0800
Subject: net: hns3: remove redundant blank lines

Remove some redundant blank lines.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c           | 7 -------
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c    | 1 -
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c    | 2 --
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c   | 5 -----
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c  | 1 -
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 2 --
 6 files changed, 18 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 44b775efd5b9..c73de36b3843 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -210,7 +210,6 @@ void hns3_set_vector_coalesce_rl(struct hns3_enet_tqp_vector *tqp_vector,
 	 * Rl defines rate of interrupts i.e. number of interrupts-per-second
 	 * GL and RL(Rate Limiter) are 2 ways to acheive interrupt coalescing
 	 */
-
 	if (rl_reg > 0 && !tqp_vector->tx_group.coal.adapt_enable &&
 	    !tqp_vector->rx_group.coal.adapt_enable)
 		/* According to the hardware, the range of rl_reg is
@@ -883,7 +882,6 @@ static void hns3_set_outer_l2l3l4(struct sk_buff *skb, u8 ol4_proto,
 			hns3_set_field(*ol_type_vlan_len_msec,
 				       HNS3_TXD_OL3T_S,
 				       HNS3_OL3T_IPV4_NO_CSUM);
-
 	} else if (skb->protocol == htons(ETH_P_IPV6)) {
 		hns3_set_field(*ol_type_vlan_len_msec, HNS3_TXD_OL3T_S,
 			       HNS3_OL3T_IPV6);
@@ -1296,7 +1294,6 @@ static unsigned int hns3_tx_bd_num(struct sk_buff *skb, unsigned int *bd_size,
 		return HNS3_MAX_TSO_BD_NUM + 1U;
 
 	bd_num = hns3_skb_bd_num(skb, bd_size, bd_num);
-
 	if (!skb_has_frag_list(skb) || bd_num > HNS3_MAX_TSO_BD_NUM)
 		return bd_num;
 
@@ -2965,7 +2962,6 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb,
 					  HNS3_RXD_L3ID_S);
 		l4_type = hnae3_get_field(l234info, HNS3_RXD_L4ID_M,
 					  HNS3_RXD_L4ID_S);
-
 		/* Can checksum ipv4 or ipv6 + UDP/TCP/SCTP packets */
 		if ((l3_type == HNS3_L3_TYPE_IPV4 ||
 		     l3_type == HNS3_L3_TYPE_IPV6) &&
@@ -3295,7 +3291,6 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring)
 
 	if (!skb) {
 		bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
-
 		/* Check valid BD */
 		if (unlikely(!(bd_base_info & BIT(HNS3_RXD_VLD_B))))
 			return -ENXIO;
@@ -3557,7 +3552,6 @@ static int hns3_nic_common_poll(struct napi_struct *napi, int budget)
 	hns3_for_each_ring(ring, tqp_vector->rx_group) {
 		int rx_cleaned = hns3_clean_rx_ring(ring, rx_budget,
 						    hns3_rx_skb);
-
 		if (rx_cleaned >= rx_budget)
 			clean_complete = false;
 
@@ -4024,7 +4018,6 @@ static void hns3_init_ring_hw(struct hns3_enet_ring *ring)
 			       hns3_buf_size2type(ring->buf_size));
 		hns3_write_dev(q, HNS3_RING_RX_RING_BD_NUM_REG,
 			       ring->desc_num / 8 - 1);
-
 	} else {
 		hns3_write_dev(q, HNS3_RING_TX_RING_BASEADDR_L_REG,
 			       (u32)dma);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index 3284a2cb52e6..76a482456f1f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -366,7 +366,6 @@ static void hclge_parse_capability(struct hclge_dev *hdev,
 	u32 caps;
 
 	caps = __le32_to_cpu(cmd->caps[0]);
-
 	if (hnae3_get_bit(caps, HCLGE_CAP_UDP_GSO_B))
 		set_bit(HNAE3_DEV_SUPPORT_UDP_GSO_B, ae_dev->caps);
 	if (hnae3_get_bit(caps, HCLGE_CAP_PTP_B))
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index 0ca7f1b984bf..7389fe905f53 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -1497,7 +1497,6 @@ hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev)
 	}
 
 	status = le32_to_cpu(desc[0].data[0]);
-
 	if (status & HCLGE_ROCEE_AXI_ERR_INT_MASK) {
 		if (status & HCLGE_ROCEE_RERR_INT_MASK)
 			dev_err(dev, "ROCEE RAS AXI rresp error\n");
@@ -1647,7 +1646,6 @@ pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev)
 	}
 
 	status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG);
-
 	if (status & HCLGE_RAS_REG_NFE_MASK ||
 	    status & HCLGE_RAS_REG_ROCEE_ERR_MASK)
 		ae_dev->hw_err_reset_req = 0;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 4967e7200d57..fbbc2a745fd8 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -553,7 +553,6 @@ static int hclge_mac_update_stats(struct hclge_dev *hdev)
 	int ret;
 
 	ret = hclge_mac_query_reg_num(hdev, &desc_num);
-
 	/* The firmware supports the new statistics acquisition method */
 	if (!ret)
 		ret = hclge_mac_update_stats_complete(hdev, desc_num);
@@ -784,7 +783,6 @@ static int hclge_get_sset_count(struct hnae3_handle *handle, int stringset)
 			count += 1;
 			handle->flags |= HNAE3_SUPPORT_PHY_LOOPBACK;
 		}
-
 	} else if (stringset == ETH_SS_STATS) {
 		count = ARRAY_SIZE(g_mac_stats_string) +
 			hclge_tqps_get_sset_count(handle, stringset);
@@ -2191,7 +2189,6 @@ static int hclge_only_alloc_priv_buff(struct hclge_dev *hdev,
 			COMPENSATE_HALF_MPS_NUM * half_mps;
 	min_rx_priv = round_up(min_rx_priv, HCLGE_BUF_SIZE_UNIT);
 	rx_priv = round_down(rx_priv, HCLGE_BUF_SIZE_UNIT);
-
 	if (rx_priv < min_rx_priv)
 		return false;
 
@@ -8608,7 +8605,6 @@ int hclge_add_mc_addr_common(struct hclge_vport *vport,
 	if (status)
 		return status;
 	status = hclge_add_mac_vlan_tbl(vport, &req, desc);
-
 	/* if already overflow, not to print each time */
 	if (status == -ENOSPC &&
 	    !(vport->overflow_promisc_flags & HNAE3_OVERFLOW_MPE))
@@ -8657,7 +8653,6 @@ int hclge_rm_mc_addr_common(struct hclge_vport *vport,
 		else
 			/* Not all the vfid is zero, update the vfid */
 			status = hclge_add_mac_vlan_tbl(vport, &req, desc);
-
 	} else if (status == -ENOENT) {
 		status = 0;
 	}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
index 46700c427849..d8c5c5810b99 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
@@ -349,7 +349,6 @@ static void hclgevf_parse_capability(struct hclgevf_dev *hdev,
 	u32 caps;
 
 	caps = __le32_to_cpu(cmd->caps[0]);
-
 	if (hnae3_get_bit(caps, HCLGEVF_CAP_UDP_GSO_B))
 		set_bit(HNAE3_DEV_SUPPORT_UDP_GSO_B, ae_dev->caps);
 	if (hnae3_get_bit(caps, HCLGEVF_CAP_INT_QL_B))
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 700e068764c8..5e512cd01a96 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -497,7 +497,6 @@ void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state)
 
 	link_state =
 		test_bit(HCLGEVF_STATE_DOWN, &hdev->state) ? 0 : link_state;
-
 	if (link_state != hdev->hw.mac.link) {
 		client->ops->link_status_change(handle, !!link_state);
 		if (rclient && rclient->ops->link_status_change)
@@ -2356,7 +2355,6 @@ static enum hclgevf_evt_cause hclgevf_check_evt_cause(struct hclgevf_dev *hdev,
 	/* fetch the events from their corresponding regs */
 	cmdq_stat_reg = hclgevf_read_dev(&hdev->hw,
 					 HCLGEVF_VECTOR0_CMDQ_STATE_REG);
-
 	if (BIT(HCLGEVF_VECTOR0_RST_INT_B) & cmdq_stat_reg) {
 		rst_ing_reg = hclgevf_read_dev(&hdev->hw, HCLGEVF_RST_ING);
 		dev_info(&hdev->pdev->dev,
-- 
cgit v1.2.3


From d914971df022e7abdb5f8fdfd901a655c9786c05 Mon Sep 17 00:00:00 2001
From: Jiaran Zhang <zhangjiaran@huawei.com>
Date: Fri, 26 Mar 2021 09:36:22 +0800
Subject: net: hns3: remove redundant query in hclge_config_tm_hw_err_int()

According to the HW manual, the query operation is unnecessary
when the TM QCN error event is enabled, so remove it.

Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index 7389fe905f53..d25291916b31 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -865,13 +865,7 @@ static int hclge_config_tm_hw_err_int(struct hclge_dev *hdev, bool en)
 	}
 
 	/* configure TM QCN hw errors */
-	ret = hclge_cmd_query_error(hdev, &desc, HCLGE_TM_QCN_MEM_INT_CFG, 0);
-	if (ret) {
-		dev_err(dev, "fail(%d) to read TM QCN CFG status\n", ret);
-		return ret;
-	}
-
-	hclge_cmd_reuse_desc(&desc, false);
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_TM_QCN_MEM_INT_CFG, false);
 	if (en)
 		desc.data[1] = cpu_to_le32(HCLGE_TM_QCN_MEM_ERR_INT_EN);
 
-- 
cgit v1.2.3


From 567d1dd3e4bc204e424553ccf2e1d47b0f5d03a8 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Fri, 26 Mar 2021 09:36:23 +0800
Subject: net: hns3: remove unused parameter from hclge_set_vf_vlan_common()

Parameter vf in hclge_set_vf_vlan_common() is unused now,
so remove it.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index fbbc2a745fd8..449ea9eafffc 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -9468,8 +9468,7 @@ static int hclge_check_vf_vlan_cmd_status(struct hclge_dev *hdev, u16 vfid,
 }
 
 static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, u16 vfid,
-				    bool is_kill, u16 vlan,
-				    __be16 proto)
+				    bool is_kill, u16 vlan)
 {
 	struct hclge_vport *vport = &hdev->vport[vfid];
 	struct hclge_desc desc[2];
@@ -9535,8 +9534,7 @@ static int hclge_set_vlan_filter_hw(struct hclge_dev *hdev, __be16 proto,
 	if (is_kill && !vlan_id)
 		return 0;
 
-	ret = hclge_set_vf_vlan_common(hdev, vport_id, is_kill, vlan_id,
-				       proto);
+	ret = hclge_set_vf_vlan_common(hdev, vport_id, is_kill, vlan_id);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
 			"Set %u vport vlan filter config fail, ret =%d.\n",
-- 
cgit v1.2.3


From 1e49432b91d671fd690a6a99fcc082ae44b79faf Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Fri, 26 Mar 2021 09:36:24 +0800
Subject: net: hns3: remove unused parameter from hclge_dbg_dump_loopback()

Parameter cm_buf is never used, so remove it.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 1c699131e8df..85d306459e36 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -1541,8 +1541,7 @@ static void hclge_dbg_dump_ncl_config(struct hclge_dev *hdev,
 	}
 }
 
-static void hclge_dbg_dump_loopback(struct hclge_dev *hdev,
-				    const char *cmd_buf)
+static void hclge_dbg_dump_loopback(struct hclge_dev *hdev)
 {
 	struct phy_device *phydev = hdev->hw.mac.phydev;
 	struct hclge_config_mac_mode_cmd *req_app;
@@ -1778,7 +1777,7 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 		hclge_dbg_dump_mac_tnl_status(hdev);
 	} else if (strncmp(cmd_buf, DUMP_LOOPBACK,
 		   strlen(DUMP_LOOPBACK)) == 0) {
-		hclge_dbg_dump_loopback(hdev, &cmd_buf[sizeof(DUMP_LOOPBACK)]);
+		hclge_dbg_dump_loopback(hdev);
 	} else if (strncmp(cmd_buf, "dump qs shaper", 14) == 0) {
 		hclge_dbg_dump_qs_shaper(hdev,
 					 &cmd_buf[sizeof("dump qs shaper")]);
-- 
cgit v1.2.3


From a1e144d7dc3c55aa4d451e3a23cd8f34cd65ee01 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Fri, 26 Mar 2021 09:36:25 +0800
Subject: net: hns3: fix prototype warning

Correct a report warning in hns3_ethtool.c

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index a1d69c56d119..0ae6140882b7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -307,7 +307,7 @@ out:
 }
 
 /**
- * hns3_nic_self_test - self test
+ * hns3_self_test - self test
  * @ndev: net device
  * @eth_test: test cmd
  * @data: test result
-- 
cgit v1.2.3


From f7be24f00702cf07d025c79ab30a15f7d7db5cac Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Fri, 26 Mar 2021 09:36:26 +0800
Subject: net: hns3: fix some typos in hclge_main.c

s/sucessful/successful/
s/serivce/service/
and remove a redundant new.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 449ea9eafffc..6bfaf14107fe 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2217,7 +2217,7 @@ static int hclge_only_alloc_priv_buff(struct hclge_dev *hdev,
 /* hclge_rx_buffer_calc: calculate the rx private buffer size for all TCs
  * @hdev: pointer to struct hclge_dev
  * @buf_alloc: pointer to buffer calculation data
- * @return: 0: calculate sucessful, negative: fail
+ * @return: 0: calculate successful, negative: fail
  */
 static int hclge_rx_buffer_calc(struct hclge_dev *hdev,
 				struct hclge_pkt_buf_alloc *buf_alloc)
@@ -3358,7 +3358,7 @@ static irqreturn_t hclge_misc_irq_handle(int irq, void *data)
 		 * caused this event. Therefore, we will do below for now:
 		 * 1. Assert HNAE3_UNKNOWN_RESET type of reset. This means we
 		 *    have defered type of reset to be used.
-		 * 2. Schedule the reset serivce task.
+		 * 2. Schedule the reset service task.
 		 * 3. When service task receives  HNAE3_UNKNOWN_RESET type it
 		 *    will fetch the correct type of reset.  This would be done
 		 *    by first decoding the types of errors.
@@ -8416,7 +8416,7 @@ int hclge_update_mac_list(struct hclge_vport *vport,
 
 	/* if the mac addr is already in the mac list, no need to add a new
 	 * one into it, just check the mac addr state, convert it to a new
-	 * new state, or just remove it, or do nothing.
+	 * state, or just remove it, or do nothing.
 	 */
 	mac_node = hclge_find_mac_node(list, addr);
 	if (mac_node) {
-- 
cgit v1.2.3


From 74d439b74ad3e05780d4cf3ab047345b443f7e67 Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Fri, 26 Mar 2021 09:36:27 +0800
Subject: net: hns3: split function hclge_reset_rebuild()

hclge_reset_rebuild() is a bit too long. So add a new function
hclge_update_reset_level() to improve readability.

Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 26 +++++++++++++---------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 6bfaf14107fe..d63951993000 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -3940,6 +3940,21 @@ static bool hclge_reset_err_handle(struct hclge_dev *hdev)
 	return false;
 }
 
+static void hclge_update_reset_level(struct hclge_dev *hdev)
+{
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
+	enum hnae3_reset_type reset_level;
+
+	/* if default_reset_request has a higher level reset request,
+	 * it should be handled as soon as possible. since some errors
+	 * need this kind of reset to fix.
+	 */
+	reset_level = hclge_get_reset_level(ae_dev,
+					    &hdev->default_reset_request);
+	if (reset_level != HNAE3_NONE_RESET)
+		set_bit(reset_level, &hdev->reset_request);
+}
+
 static int hclge_set_rst_done(struct hclge_dev *hdev)
 {
 	struct hclge_pf_rst_done_cmd *req;
@@ -4027,8 +4042,6 @@ static int hclge_reset_prepare(struct hclge_dev *hdev)
 
 static int hclge_reset_rebuild(struct hclge_dev *hdev)
 {
-	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
-	enum hnae3_reset_type reset_level;
 	int ret;
 
 	hdev->rst_stats.hw_reset_done_cnt++;
@@ -4072,14 +4085,7 @@ static int hclge_reset_rebuild(struct hclge_dev *hdev)
 	hdev->rst_stats.reset_done_cnt++;
 	clear_bit(HCLGE_STATE_RST_FAIL, &hdev->state);
 
-	/* if default_reset_request has a higher level reset request,
-	 * it should be handled as soon as possible. since some errors
-	 * need this kind of reset to fix.
-	 */
-	reset_level = hclge_get_reset_level(ae_dev,
-					    &hdev->default_reset_request);
-	if (reset_level != HNAE3_NONE_RESET)
-		set_bit(reset_level, &hdev->reset_request);
+	hclge_update_reset_level(hdev);
 
 	return 0;
 }
-- 
cgit v1.2.3


From b1261897b0902d870c483fb006a9443723a3d58b Mon Sep 17 00:00:00 2001
From: Guojia Liao <liaoguojia@huawei.com>
Date: Fri, 26 Mar 2021 09:36:28 +0800
Subject: net: hns3: split out hclge_tm_vport_tc_info_update()

hclge_tm_vport_tc_info_update() is bloated, so split it into
separate functions for readability and maintainability.

Signed-off-by: Guojia Liao <liaoguojia@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 151afd1f0688..aa81b8c56fce 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -631,13 +631,12 @@ static u16 hclge_vport_get_tqp_num(struct hclge_vport *vport)
 	return sum;
 }
 
-static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
+static void hclge_tm_update_kinfo_rss_size(struct hclge_vport *vport)
 {
 	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
 	struct hclge_dev *hdev = vport->back;
 	u16 vport_max_rss_size;
 	u16 max_rss_size;
-	u8 i;
 
 	/* TC configuration is shared by PF/VF in one port, only allow
 	 * one tc for VF for simplicity. VF's vport_id is non zero.
@@ -677,7 +676,15 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
 		/* Set to the maximum specification value (max_rss_size). */
 		kinfo->rss_size = max_rss_size;
 	}
+}
+
+static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
+{
+	struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo;
+	struct hclge_dev *hdev = vport->back;
+	u8 i;
 
+	hclge_tm_update_kinfo_rss_size(vport);
 	kinfo->num_tqps = hclge_vport_get_tqp_num(vport);
 	vport->dwrr = 100;  /* 100 percent as init */
 	vport->alloc_rss_size = kinfo->rss_size;
-- 
cgit v1.2.3


From ee3e6beaa015ff1526440bf31f1782b6daa772da Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 26 Mar 2021 10:11:11 -0500
Subject: net: ipa: introduce ipa_resource.c

Separate the IPA resource-related code into a new source file,
"ipa_resource.c", and matching header file "ipa_resource.h".

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/Makefile       |   2 +-
 drivers/net/ipa/ipa_main.c     | 148 +-----------------------------
 drivers/net/ipa/ipa_reg.h      |  42 ---------
 drivers/net/ipa/ipa_resource.c | 204 +++++++++++++++++++++++++++++++++++++++++
 drivers/net/ipa/ipa_resource.h |  27 ++++++
 5 files changed, 234 insertions(+), 189 deletions(-)
 create mode 100644 drivers/net/ipa/ipa_resource.c
 create mode 100644 drivers/net/ipa/ipa_resource.h

diff --git a/drivers/net/ipa/Makefile b/drivers/net/ipa/Makefile
index afe5df1e6eee..14a7d8429baa 100644
--- a/drivers/net/ipa/Makefile
+++ b/drivers/net/ipa/Makefile
@@ -7,6 +7,6 @@ ipa-y			:=	ipa_main.o ipa_clock.o ipa_reg.o ipa_mem.o \
 				ipa_table.o ipa_interrupt.o gsi.o gsi_trans.o \
 				ipa_gsi.o ipa_smp2p.o ipa_uc.o \
 				ipa_endpoint.o ipa_cmd.o ipa_modem.o \
-				ipa_qmi.o ipa_qmi_msg.o
+				ipa_resource.o ipa_qmi.o ipa_qmi_msg.o
 
 ipa-y			+=	ipa_data-sdm845.o ipa_data-sc7180.o
diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index f071e90de540..e18029152d78 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 /* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved.
- * Copyright (C) 2018-2020 Linaro Ltd.
+ * Copyright (C) 2018-2021 Linaro Ltd.
  */
 
 #include <linux/types.h>
@@ -22,6 +22,7 @@
 #include "ipa_clock.h"
 #include "ipa_data.h"
 #include "ipa_endpoint.h"
+#include "ipa_resource.h"
 #include "ipa_cmd.h"
 #include "ipa_reg.h"
 #include "ipa_mem.h"
@@ -452,151 +453,6 @@ static void ipa_hardware_deconfig(struct ipa *ipa)
 	ipa_hardware_dcd_deconfig(ipa);
 }
 
-#ifdef IPA_VALIDATION
-
-static bool ipa_resource_limits_valid(struct ipa *ipa,
-				      const struct ipa_resource_data *data)
-{
-	u32 group_count;
-	u32 i;
-	u32 j;
-
-	/* We program at most 6 source or destination resource group limits */
-	BUILD_BUG_ON(IPA_RESOURCE_GROUP_SRC_MAX > 6);
-
-	group_count = ipa_resource_group_src_count(ipa->version);
-	if (!group_count || group_count > IPA_RESOURCE_GROUP_SRC_MAX)
-		return false;
-
-	/* Return an error if a non-zero resource limit is specified
-	 * for a resource group not supported by hardware.
-	 */
-	for (i = 0; i < data->resource_src_count; i++) {
-		const struct ipa_resource_src *resource;
-
-		resource = &data->resource_src[i];
-		for (j = group_count; j < IPA_RESOURCE_GROUP_SRC_MAX; j++)
-			if (resource->limits[j].min || resource->limits[j].max)
-				return false;
-	}
-
-	group_count = ipa_resource_group_dst_count(ipa->version);
-	if (!group_count || group_count > IPA_RESOURCE_GROUP_DST_MAX)
-		return false;
-
-	for (i = 0; i < data->resource_dst_count; i++) {
-		const struct ipa_resource_dst *resource;
-
-		resource = &data->resource_dst[i];
-		for (j = group_count; j < IPA_RESOURCE_GROUP_DST_MAX; j++)
-			if (resource->limits[j].min || resource->limits[j].max)
-				return false;
-	}
-
-	return true;
-}
-
-#else /* !IPA_VALIDATION */
-
-static bool ipa_resource_limits_valid(struct ipa *ipa,
-				      const struct ipa_resource_data *data)
-{
-	return true;
-}
-
-#endif /* !IPA_VALIDATION */
-
-static void
-ipa_resource_config_common(struct ipa *ipa, u32 offset,
-			   const struct ipa_resource_limits *xlimits,
-			   const struct ipa_resource_limits *ylimits)
-{
-	u32 val;
-
-	val = u32_encode_bits(xlimits->min, X_MIN_LIM_FMASK);
-	val |= u32_encode_bits(xlimits->max, X_MAX_LIM_FMASK);
-	if (ylimits) {
-		val |= u32_encode_bits(ylimits->min, Y_MIN_LIM_FMASK);
-		val |= u32_encode_bits(ylimits->max, Y_MAX_LIM_FMASK);
-	}
-
-	iowrite32(val, ipa->reg_virt + offset);
-}
-
-static void ipa_resource_config_src(struct ipa *ipa,
-				    const struct ipa_resource_src *resource)
-{
-	u32 group_count = ipa_resource_group_src_count(ipa->version);
-	const struct ipa_resource_limits *ylimits;
-	u32 offset;
-
-	offset = IPA_REG_SRC_RSRC_GRP_01_RSRC_TYPE_N_OFFSET(resource->type);
-	ylimits = group_count == 1 ? NULL : &resource->limits[1];
-	ipa_resource_config_common(ipa, offset, &resource->limits[0], ylimits);
-
-	if (group_count < 2)
-		return;
-
-	offset = IPA_REG_SRC_RSRC_GRP_23_RSRC_TYPE_N_OFFSET(resource->type);
-	ylimits = group_count == 3 ? NULL : &resource->limits[3];
-	ipa_resource_config_common(ipa, offset, &resource->limits[2], ylimits);
-
-	if (group_count < 4)
-		return;
-
-	offset = IPA_REG_SRC_RSRC_GRP_45_RSRC_TYPE_N_OFFSET(resource->type);
-	ylimits = group_count == 5 ? NULL : &resource->limits[5];
-	ipa_resource_config_common(ipa, offset, &resource->limits[4], ylimits);
-}
-
-static void ipa_resource_config_dst(struct ipa *ipa,
-				    const struct ipa_resource_dst *resource)
-{
-	u32 group_count = ipa_resource_group_dst_count(ipa->version);
-	const struct ipa_resource_limits *ylimits;
-	u32 offset;
-
-	offset = IPA_REG_DST_RSRC_GRP_01_RSRC_TYPE_N_OFFSET(resource->type);
-	ylimits = group_count == 1 ? NULL : &resource->limits[1];
-	ipa_resource_config_common(ipa, offset, &resource->limits[0], ylimits);
-
-	if (group_count < 2)
-		return;
-
-	offset = IPA_REG_DST_RSRC_GRP_23_RSRC_TYPE_N_OFFSET(resource->type);
-	ylimits = group_count == 3 ? NULL : &resource->limits[3];
-	ipa_resource_config_common(ipa, offset, &resource->limits[2], ylimits);
-
-	if (group_count < 4)
-		return;
-
-	offset = IPA_REG_DST_RSRC_GRP_45_RSRC_TYPE_N_OFFSET(resource->type);
-	ylimits = group_count == 5 ? NULL : &resource->limits[5];
-	ipa_resource_config_common(ipa, offset, &resource->limits[4], ylimits);
-}
-
-static int
-ipa_resource_config(struct ipa *ipa, const struct ipa_resource_data *data)
-{
-	u32 i;
-
-	if (!ipa_resource_limits_valid(ipa, data))
-		return -EINVAL;
-
-	for (i = 0; i < data->resource_src_count; i++)
-		ipa_resource_config_src(ipa, &data->resource_src[i]);
-
-	for (i = 0; i < data->resource_dst_count; i++)
-		ipa_resource_config_dst(ipa, &data->resource_dst[i]);
-
-	return 0;
-}
-
-static void ipa_resource_deconfig(struct ipa *ipa)
-{
-	/* Nothing to do */
-}
-
 /**
  * ipa_config() - Configure IPA hardware
  * @ipa:	IPA pointer
diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h
index 8820e08d2535..9c798cef7b2e 100644
--- a/drivers/net/ipa/ipa_reg.h
+++ b/drivers/net/ipa/ipa_reg.h
@@ -346,48 +346,6 @@ enum ipa_pulse_gran {
 	IPA_GRAN_655350_US			= 0x7,
 };
 
-/* # IPA source resource groups available based on version */
-static inline u32 ipa_resource_group_src_count(enum ipa_version version)
-{
-	switch (version) {
-	case IPA_VERSION_3_5_1:
-	case IPA_VERSION_4_0:
-	case IPA_VERSION_4_1:
-		return 4;
-
-	case IPA_VERSION_4_2:
-		return 1;
-
-	case IPA_VERSION_4_5:
-		return 5;
-
-	default:
-		return 0;
-	}
-}
-
-/* # IPA destination resource groups available based on version */
-static inline u32 ipa_resource_group_dst_count(enum ipa_version version)
-{
-	switch (version) {
-	case IPA_VERSION_3_5_1:
-		return 3;
-
-	case IPA_VERSION_4_0:
-	case IPA_VERSION_4_1:
-		return 4;
-
-	case IPA_VERSION_4_2:
-		return 1;
-
-	case IPA_VERSION_4_5:
-		return 5;
-
-	default:
-		return 0;
-	}
-}
-
 /* Not all of the following are present (depends on IPA version) */
 #define IPA_REG_SRC_RSRC_GRP_01_RSRC_TYPE_N_OFFSET(rt) \
 					(0x00000400 + 0x0020 * (rt))
diff --git a/drivers/net/ipa/ipa_resource.c b/drivers/net/ipa/ipa_resource.c
new file mode 100644
index 000000000000..2f0f2dca3678
--- /dev/null
+++ b/drivers/net/ipa/ipa_resource.c
@@ -0,0 +1,204 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved.
+ * Copyright (C) 2018-2021 Linaro Ltd.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+
+#include "ipa.h"
+#include "ipa_data.h"
+#include "ipa_reg.h"
+#include "ipa_resource.h"
+
+/**
+ * DOC: IPA Resources
+ *
+ * The IPA manages a set of resources internally for various purposes.
+ * A given IPA version has a fixed number of resource types, and a fixed
+ * total number of resources of each type.  "Source" resource types
+ * are separate from "destination" resource types.
+ *
+ * Each version of IPA also has some number of resource groups.  Each
+ * endpoint is assigned to a resource group, and all endpoints in the
+ * same group share pools of each type of resource.  A subset of the
+ * total resources of each type is assigned for use by each group.
+ */
+
+/* # IPA source resource groups available based on version */
+static u32 ipa_resource_group_src_count(enum ipa_version version)
+{
+	switch (version) {
+	case IPA_VERSION_3_5_1:
+	case IPA_VERSION_4_0:
+	case IPA_VERSION_4_1:
+		return 4;
+
+	case IPA_VERSION_4_2:
+		return 1;
+
+	case IPA_VERSION_4_5:
+		return 5;
+
+	default:
+		return 0;
+	}
+}
+
+/* # IPA destination resource groups available based on version */
+static u32 ipa_resource_group_dst_count(enum ipa_version version)
+{
+	switch (version) {
+	case IPA_VERSION_3_5_1:
+		return 3;
+
+	case IPA_VERSION_4_0:
+	case IPA_VERSION_4_1:
+		return 4;
+
+	case IPA_VERSION_4_2:
+		return 1;
+
+	case IPA_VERSION_4_5:
+		return 5;
+
+	default:
+		return 0;
+	}
+}
+
+static bool ipa_resource_limits_valid(struct ipa *ipa,
+				      const struct ipa_resource_data *data)
+{
+#ifdef IPA_VALIDATION
+	u32 group_count;
+	u32 i;
+	u32 j;
+
+	/* We program at most 6 source or destination resource group limits */
+	BUILD_BUG_ON(IPA_RESOURCE_GROUP_SRC_MAX > 6);
+
+	group_count = ipa_resource_group_src_count(ipa->version);
+	if (!group_count || group_count > IPA_RESOURCE_GROUP_SRC_MAX)
+		return false;
+
+	/* Return an error if a non-zero resource limit is specified
+	 * for a resource group not supported by hardware.
+	 */
+	for (i = 0; i < data->resource_src_count; i++) {
+		const struct ipa_resource_src *resource;
+
+		resource = &data->resource_src[i];
+		for (j = group_count; j < IPA_RESOURCE_GROUP_SRC_MAX; j++)
+			if (resource->limits[j].min || resource->limits[j].max)
+				return false;
+	}
+
+	group_count = ipa_resource_group_dst_count(ipa->version);
+	if (!group_count || group_count > IPA_RESOURCE_GROUP_DST_MAX)
+		return false;
+
+	for (i = 0; i < data->resource_dst_count; i++) {
+		const struct ipa_resource_dst *resource;
+
+		resource = &data->resource_dst[i];
+		for (j = group_count; j < IPA_RESOURCE_GROUP_DST_MAX; j++)
+			if (resource->limits[j].min || resource->limits[j].max)
+				return false;
+	}
+#endif /* !IPA_VALIDATION */
+	return true;
+}
+
+static void
+ipa_resource_config_common(struct ipa *ipa, u32 offset,
+			   const struct ipa_resource_limits *xlimits,
+			   const struct ipa_resource_limits *ylimits)
+{
+	u32 val;
+
+	val = u32_encode_bits(xlimits->min, X_MIN_LIM_FMASK);
+	val |= u32_encode_bits(xlimits->max, X_MAX_LIM_FMASK);
+	if (ylimits) {
+		val |= u32_encode_bits(ylimits->min, Y_MIN_LIM_FMASK);
+		val |= u32_encode_bits(ylimits->max, Y_MAX_LIM_FMASK);
+	}
+
+	iowrite32(val, ipa->reg_virt + offset);
+}
+
+static void ipa_resource_config_src(struct ipa *ipa,
+				    const struct ipa_resource_src *resource)
+{
+	u32 group_count = ipa_resource_group_src_count(ipa->version);
+	const struct ipa_resource_limits *ylimits;
+	u32 offset;
+
+	offset = IPA_REG_SRC_RSRC_GRP_01_RSRC_TYPE_N_OFFSET(resource->type);
+	ylimits = group_count == 1 ? NULL : &resource->limits[1];
+	ipa_resource_config_common(ipa, offset, &resource->limits[0], ylimits);
+
+	if (group_count < 2)
+		return;
+
+	offset = IPA_REG_SRC_RSRC_GRP_23_RSRC_TYPE_N_OFFSET(resource->type);
+	ylimits = group_count == 3 ? NULL : &resource->limits[3];
+	ipa_resource_config_common(ipa, offset, &resource->limits[2], ylimits);
+
+	if (group_count < 4)
+		return;
+
+	offset = IPA_REG_SRC_RSRC_GRP_45_RSRC_TYPE_N_OFFSET(resource->type);
+	ylimits = group_count == 5 ? NULL : &resource->limits[5];
+	ipa_resource_config_common(ipa, offset, &resource->limits[4], ylimits);
+}
+
+static void ipa_resource_config_dst(struct ipa *ipa,
+				    const struct ipa_resource_dst *resource)
+{
+	u32 group_count = ipa_resource_group_dst_count(ipa->version);
+	const struct ipa_resource_limits *ylimits;
+	u32 offset;
+
+	offset = IPA_REG_DST_RSRC_GRP_01_RSRC_TYPE_N_OFFSET(resource->type);
+	ylimits = group_count == 1 ? NULL : &resource->limits[1];
+	ipa_resource_config_common(ipa, offset, &resource->limits[0], ylimits);
+
+	if (group_count < 2)
+		return;
+
+	offset = IPA_REG_DST_RSRC_GRP_23_RSRC_TYPE_N_OFFSET(resource->type);
+	ylimits = group_count == 3 ? NULL : &resource->limits[3];
+	ipa_resource_config_common(ipa, offset, &resource->limits[2], ylimits);
+
+	if (group_count < 4)
+		return;
+
+	offset = IPA_REG_DST_RSRC_GRP_45_RSRC_TYPE_N_OFFSET(resource->type);
+	ylimits = group_count == 5 ? NULL : &resource->limits[5];
+	ipa_resource_config_common(ipa, offset, &resource->limits[4], ylimits);
+}
+
+/* Configure resources */
+int ipa_resource_config(struct ipa *ipa, const struct ipa_resource_data *data)
+{
+	u32 i;
+
+	if (!ipa_resource_limits_valid(ipa, data))
+		return -EINVAL;
+
+	for (i = 0; i < data->resource_src_count; i++)
+		ipa_resource_config_src(ipa, &data->resource_src[i]);
+
+	for (i = 0; i < data->resource_dst_count; i++)
+		ipa_resource_config_dst(ipa, &data->resource_dst[i]);
+
+	return 0;
+}
+
+/* Inverse of ipa_resource_config() */
+void ipa_resource_deconfig(struct ipa *ipa)
+{
+	/* Nothing to do */
+}
diff --git a/drivers/net/ipa/ipa_resource.h b/drivers/net/ipa/ipa_resource.h
new file mode 100644
index 000000000000..9f74036fb95c
--- /dev/null
+++ b/drivers/net/ipa/ipa_resource.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved.
+ * Copyright (C) 2019-2021 Linaro Ltd.
+ */
+#ifndef _IPA_RESOURCE_H_
+#define _IPA_RESOURCE_H_
+
+struct ipa;
+struct ipa_resource_data;
+
+/**
+ * ipa_resource_config() - Configure resources
+ * @ipa:	IPA pointer
+ * @data:	IPA resource configuration data
+ *
+ * Return:	true if all regions are valid, false otherwise
+ */
+int ipa_resource_config(struct ipa *ipa, const struct ipa_resource_data *data);
+
+/**
+ * ipa_resource_deconfig() - Inverse of ipa_resource_config()
+ * @ipa:	IPA pointer
+ */
+void ipa_resource_deconfig(struct ipa *ipa);
+
+#endif /* _IPA_RESOURCE_H_ */
-- 
cgit v1.2.3


From a749c6c03762f71ba4d04ead60b4f5df9ca2bf5e Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 26 Mar 2021 10:11:12 -0500
Subject: net: ipa: fix bug in resource group limit programming

If the number of resource groups supported by the hardware is less
than a certain number, we return early in ipa_resource_config_src()
and ipa_resource_config_dst() (to avoid programming resource limits
for non-existent groups).

Unfortunately, these checks are off by one.  Fix this problem in the
four places it occurs.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_resource.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ipa/ipa_resource.c b/drivers/net/ipa/ipa_resource.c
index 2f0f2dca3678..edd9d1e5cbb6 100644
--- a/drivers/net/ipa/ipa_resource.c
+++ b/drivers/net/ipa/ipa_resource.c
@@ -139,14 +139,14 @@ static void ipa_resource_config_src(struct ipa *ipa,
 	ylimits = group_count == 1 ? NULL : &resource->limits[1];
 	ipa_resource_config_common(ipa, offset, &resource->limits[0], ylimits);
 
-	if (group_count < 2)
+	if (group_count < 3)
 		return;
 
 	offset = IPA_REG_SRC_RSRC_GRP_23_RSRC_TYPE_N_OFFSET(resource->type);
 	ylimits = group_count == 3 ? NULL : &resource->limits[3];
 	ipa_resource_config_common(ipa, offset, &resource->limits[2], ylimits);
 
-	if (group_count < 4)
+	if (group_count < 5)
 		return;
 
 	offset = IPA_REG_SRC_RSRC_GRP_45_RSRC_TYPE_N_OFFSET(resource->type);
@@ -165,14 +165,14 @@ static void ipa_resource_config_dst(struct ipa *ipa,
 	ylimits = group_count == 1 ? NULL : &resource->limits[1];
 	ipa_resource_config_common(ipa, offset, &resource->limits[0], ylimits);
 
-	if (group_count < 2)
+	if (group_count < 3)
 		return;
 
 	offset = IPA_REG_DST_RSRC_GRP_23_RSRC_TYPE_N_OFFSET(resource->type);
 	ylimits = group_count == 3 ? NULL : &resource->limits[3];
 	ipa_resource_config_common(ipa, offset, &resource->limits[2], ylimits);
 
-	if (group_count < 4)
+	if (group_count < 5)
 		return;
 
 	offset = IPA_REG_DST_RSRC_GRP_45_RSRC_TYPE_N_OFFSET(resource->type);
-- 
cgit v1.2.3


From 47f71d6e677cfa2853203ea77c0a47c23fea92c0 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 26 Mar 2021 10:11:13 -0500
Subject: net: ipa: identify resource groups

Define a new ipa_resource_group_id enumerated type, whose members
have numeric values that match the resource group number used when
programming the hardware.  Each platform supports a different number
of source and destination resource groups, so define the type
separately for each platform in its configuration data file.

Use these new symbolic values when specifying the resource group an
endpoint is associated with.  And use them to index the limits
arrays for source and destination resources, making it clearer how
these values are used.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_data-sc7180.c | 38 +++++++++++++++-----------
 drivers/net/ipa/ipa_data-sdm845.c | 56 ++++++++++++++++++++++++---------------
 2 files changed, 57 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ipa/ipa_data-sc7180.c b/drivers/net/ipa/ipa_data-sc7180.c
index 621ad15c9e67..e9b741832a1d 100644
--- a/drivers/net/ipa/ipa_data-sc7180.c
+++ b/drivers/net/ipa/ipa_data-sc7180.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 
-/* Copyright (C) 2019-2020 Linaro Ltd. */
+/* Copyright (C) 2019-2021 Linaro Ltd. */
 
 #include <linux/log2.h>
 
@@ -9,6 +9,15 @@
 #include "ipa_endpoint.h"
 #include "ipa_mem.h"
 
+/* Resource groups used for the SC7180 SoC */
+enum ipa_rsrc_group_id {
+	/* Source resource group identifiers */
+	IPA_RSRC_GROUP_SRC_UL_DL	= 0,
+
+	/* Destination resource group identifiers */
+	IPA_RSRC_GROUP_DST_UL_DL_DPL	= 0,
+};
+
 /* QSB configuration for the SC7180 SoC. */
 static const struct ipa_qsb_data ipa_qsb_data[] = {
 	[IPA_QSB_MASTER_DDR] = {
@@ -32,7 +41,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 		},
 		.endpoint = {
 			.config = {
-				.resource_group	= 0,
+				.resource_group	= IPA_RSRC_GROUP_SRC_UL_DL,
 				.dma_mode	= true,
 				.dma_endpoint	= IPA_ENDPOINT_AP_LAN_RX,
 				.tx = {
@@ -53,7 +62,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 		},
 		.endpoint = {
 			.config = {
-				.resource_group	= 0,
+				.resource_group	= IPA_RSRC_GROUP_DST_UL_DL_DPL,
 				.aggregation	= true,
 				.status_enable	= true,
 				.rx = {
@@ -75,7 +84,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 		.endpoint = {
 			.filter_support	= true,
 			.config = {
-				.resource_group	= 0,
+				.resource_group	= IPA_RSRC_GROUP_SRC_UL_DL,
 				.checksum	= true,
 				.qmap		= true,
 				.status_enable	= true,
@@ -100,7 +109,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 		},
 		.endpoint = {
 			.config = {
-				.resource_group	= 0,
+				.resource_group	= IPA_RSRC_GROUP_DST_UL_DL_DPL,
 				.checksum	= true,
 				.qmap		= true,
 				.aggregation	= true,
@@ -139,58 +148,57 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 	},
 };
 
-/* For the SC7180, resource groups are allocated this way:
- *   group 0:	UL_DL
- */
+/* Source resource configuration data for the SC7180 SoC */
 static const struct ipa_resource_src ipa_resource_src[] = {
 	{
 		.type = IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS,
-		.limits[0] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
 			.min = 3,
 			.max = 63,
 		},
 	},
 	{
 		.type = IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS,
-		.limits[0] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
 			.min = 3,
 			.max = 3,
 		},
 	},
 	{
 		.type = IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF,
-		.limits[0] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
 			.min = 10,
 			.max = 10,
 		},
 	},
 	{
 		.type = IPA_RESOURCE_TYPE_SRC_HPS_DMARS,
-		.limits[0] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
 			.min = 1,
 			.max = 1,
 		},
 	},
 	{
 		.type = IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES,
-		.limits[0] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
 			.min = 5,
 			.max = 5,
 		},
 	},
 };
 
+/* Destination resource configuration data for the SC7180 SoC */
 static const struct ipa_resource_dst ipa_resource_dst[] = {
 	{
 		.type = IPA_RESOURCE_TYPE_DST_DATA_SECTORS,
-		.limits[0] = {
+		.limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = {
 			.min = 3,
 			.max = 3,
 		},
 	},
 	{
 		.type = IPA_RESOURCE_TYPE_DST_DPS_DMARS,
-		.limits[0] = {
+		.limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = {
 			.min = 1,
 			.max = 63,
 		},
diff --git a/drivers/net/ipa/ipa_data-sdm845.c b/drivers/net/ipa/ipa_data-sdm845.c
index 6b5173f47444..b6ea6295e759 100644
--- a/drivers/net/ipa/ipa_data-sdm845.c
+++ b/drivers/net/ipa/ipa_data-sdm845.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 /* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved.
- * Copyright (C) 2019-2020 Linaro Ltd.
+ * Copyright (C) 2019-2021 Linaro Ltd.
  */
 
 #include <linux/log2.h>
@@ -11,6 +11,20 @@
 #include "ipa_endpoint.h"
 #include "ipa_mem.h"
 
+/* Resource groups used for the SDM845 SoC */
+enum ipa_rsrc_group_id {
+	/* Source resource group identifiers */
+	IPA_RSRC_GROUP_SRC_LWA_DL	= 0,
+	IPA_RSRC_GROUP_SRC_UL_DL,
+	IPA_RSRC_GROUP_SRC_MHI_DMA,
+	IPA_RSRC_GROUP_SRC_UC_RX_Q,
+
+	/* Destination resource group identifiers */
+	IPA_RSRC_GROUP_DST_LWA_DL	= 0,
+	IPA_RSRC_GROUP_DST_UL_DL_DPL,
+	IPA_RSRC_GROUP_DST_UNUSED_2,
+};
+
 /* QSB configuration for the SDM845 SoC. */
 static const struct ipa_qsb_data ipa_qsb_data[] = {
 	[IPA_QSB_MASTER_DDR] = {
@@ -37,7 +51,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 		},
 		.endpoint = {
 			.config = {
-				.resource_group	= 1,
+				.resource_group	= IPA_RSRC_GROUP_SRC_UL_DL,
 				.dma_mode	= true,
 				.dma_endpoint	= IPA_ENDPOINT_AP_LAN_RX,
 				.tx = {
@@ -58,7 +72,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 		},
 		.endpoint = {
 			.config = {
-				.resource_group	= 1,
+				.resource_group	= IPA_RSRC_GROUP_DST_UL_DL_DPL,
 				.aggregation	= true,
 				.status_enable	= true,
 				.rx = {
@@ -80,7 +94,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 		.endpoint = {
 			.filter_support	= true,
 			.config = {
-				.resource_group	= 1,
+				.resource_group	= IPA_RSRC_GROUP_SRC_UL_DL,
 				.checksum	= true,
 				.qmap		= true,
 				.status_enable	= true,
@@ -104,7 +118,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 		},
 		.endpoint = {
 			.config = {
-				.resource_group	= 1,
+				.resource_group	= IPA_RSRC_GROUP_DST_UL_DL_DPL,
 				.checksum	= true,
 				.qmap		= true,
 				.aggregation	= true,
@@ -152,72 +166,70 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 	},
 };
 
-/* For the SDM845, resource groups are allocated this way:
- *   group 0:	LWA_DL
- *   group 1:	UL_DL
- */
+/* Source resource configuration data for the SDM845 SoC */
 static const struct ipa_resource_src ipa_resource_src[] = {
 	{
 		.type = IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS,
-		.limits[0] = {
+		.limits[IPA_RSRC_GROUP_SRC_LWA_DL] = {
 			.min = 1,
 			.max = 255,
 		},
-		.limits[1] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
 			.min = 1,
 			.max = 255,
 		},
 	},
 	{
 		.type = IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS,
-		.limits[0] = {
+		.limits[IPA_RSRC_GROUP_SRC_LWA_DL] = {
 			.min = 10,
 			.max = 10,
 		},
-		.limits[1] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
 			.min = 10,
 			.max = 10,
 		},
 	},
 	{
 		.type = IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF,
-		.limits[0] = {
+		.limits[IPA_RSRC_GROUP_SRC_LWA_DL] = {
 			.min = 12,
 			.max = 12,
 		},
-		.limits[1] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
 			.min = 14,
 			.max = 14,
 		},
 	},
 	{
 		.type = IPA_RESOURCE_TYPE_SRC_HPS_DMARS,
-		.limits[0] = {
+		.limits[IPA_RSRC_GROUP_SRC_LWA_DL] = {
 			.min = 0,
 			.max = 63,
 		},
-		.limits[1] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
 			.min = 0,
 			.max = 63,
 		},
 	},
 	{
 		.type = IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES,
-		.limits[0] = {
+		.limits[IPA_RSRC_GROUP_SRC_LWA_DL] = {
 			.min = 14,
 			.max = 14,
 		},
-		.limits[1] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
 			.min = 20,
 			.max = 20,
 		},
 	},
 };
 
+/* Destination resource configuration data for the SDM845 SoC */
 static const struct ipa_resource_dst ipa_resource_dst[] = {
 	{
 		.type = IPA_RESOURCE_TYPE_DST_DATA_SECTORS,
-		.limits[0] = {
+		.limits[IPA_RSRC_GROUP_DST_LWA_DL] = {
 			.min = 4,
 			.max = 4,
 		},
@@ -228,11 +240,11 @@ static const struct ipa_resource_dst ipa_resource_dst[] = {
 	},
 	{
 		.type = IPA_RESOURCE_TYPE_DST_DPS_DMARS,
-		.limits[0] = {
+		.limits[IPA_RSRC_GROUP_DST_LWA_DL] = {
 			.min = 2,
 			.max = 63,
 		},
-		.limits[1] = {
+		.limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = {
 			.min = 1,
 			.max = 63,
 		},
-- 
cgit v1.2.3


From 9ab7e72882668c5223c55581bdcd44fdf5e9d882 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 26 Mar 2021 10:11:14 -0500
Subject: net: ipa: add some missing resource limits

Currently, the SDM845 configuration data defines resource limits for
the first two resource groups (for both source and destination
resource types).  The hardware supports additional resource groups,
and we should program the resource limits for those groups as well.

Even the "unused" destination resource group (number 2) should have
non-zero limits programmed in some cases, to ensure correct operation.

Add these missing resource group limit definitions to the SDM845
configuration data.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_data-sdm845.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/drivers/net/ipa/ipa_data-sdm845.c b/drivers/net/ipa/ipa_data-sdm845.c
index b6ea6295e759..3bc5fcfdf960 100644
--- a/drivers/net/ipa/ipa_data-sdm845.c
+++ b/drivers/net/ipa/ipa_data-sdm845.c
@@ -178,6 +178,10 @@ static const struct ipa_resource_src ipa_resource_src[] = {
 			.min = 1,
 			.max = 255,
 		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 1,
+			.max = 63,
+		},
 	},
 	{
 		.type = IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS,
@@ -189,6 +193,10 @@ static const struct ipa_resource_src ipa_resource_src[] = {
 			.min = 10,
 			.max = 10,
 		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 8,
+			.max = 8,
+		},
 	},
 	{
 		.type = IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF,
@@ -200,6 +208,10 @@ static const struct ipa_resource_src ipa_resource_src[] = {
 			.min = 14,
 			.max = 14,
 		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 8,
+			.max = 8,
+		},
 	},
 	{
 		.type = IPA_RESOURCE_TYPE_SRC_HPS_DMARS,
@@ -211,6 +223,14 @@ static const struct ipa_resource_src ipa_resource_src[] = {
 			.min = 0,
 			.max = 63,
 		},
+		.limits[IPA_RSRC_GROUP_SRC_MHI_DMA] = {
+			.min = 0,
+			.max = 63,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 0,
+			.max = 63,
+		},
 	},
 	{
 		.type = IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES,
@@ -222,6 +242,10 @@ static const struct ipa_resource_src ipa_resource_src[] = {
 			.min = 20,
 			.max = 20,
 		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 14,
+			.max = 14,
+		},
 	},
 };
 
@@ -237,6 +261,10 @@ static const struct ipa_resource_dst ipa_resource_dst[] = {
 			.min = 4,
 			.max = 4,
 		},
+		.limits[IPA_RSRC_GROUP_DST_UNUSED_2] = {
+			.min = 3,
+			.max = 3,
+		}
 	},
 	{
 		.type = IPA_RESOURCE_TYPE_DST_DPS_DMARS,
@@ -248,6 +276,10 @@ static const struct ipa_resource_dst ipa_resource_dst[] = {
 			.min = 1,
 			.max = 63,
 		},
+		.limits[IPA_RSRC_GROUP_DST_UNUSED_2] = {
+			.min = 1,
+			.max = 2,
+		}
 	},
 };
 
-- 
cgit v1.2.3


From fd2b7bc3211342e003d00625973318613e0aa2fe Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 26 Mar 2021 10:11:15 -0500
Subject: net: ipa: combine resource type definitions

Combine the ipa_resource_type_src and ipa_resource_type_dst
enumerated types into a single enumerated type, ipa_resource_type.

Assign value 0 to the first element for the source and destination
types, so their numeric values are preserved.  Add some additional
commentary where these are defined, stating explicitly that code
assumes the first source and first destination member must have
numeric value 0.

Fix the kerneldoc comments for the ipa_gsi_endpoint_data structure.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_data.h | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h
index 7816583fc14a..ccd7fd0b801a 100644
--- a/drivers/net/ipa/ipa_data.h
+++ b/drivers/net/ipa/ipa_data.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 
 /* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved.
- * Copyright (C) 2019-2020 Linaro Ltd.
+ * Copyright (C) 2019-2021 Linaro Ltd.
  */
 #ifndef _IPA_DATA_H_
 #define _IPA_DATA_H_
@@ -177,12 +177,12 @@ struct ipa_endpoint_data {
 
 /**
  * struct ipa_gsi_endpoint_data - GSI channel/IPA endpoint data
- * ee:		GSI execution environment ID
- * channel_id:	GSI channel ID
- * endpoint_id:	IPA endpoint ID
- * toward_ipa:	direction of data transfer
- * gsi:		GSI channel configuration data (see above)
- * ipa:		IPA endpoint configuration data (see above)
+ * @ee_id:	GSI execution environment ID
+ * @channel_id:	GSI channel ID
+ * @endpoint_id: IPA endpoint ID
+ * @toward_ipa:	direction of data transfer
+ * @channel:	GSI channel configuration data (see above)
+ * @endpoint:	IPA endpoint configuration data (see above)
  */
 struct ipa_gsi_endpoint_data {
 	u8 ee_id;		/* enum gsi_ee_id */
@@ -194,18 +194,17 @@ struct ipa_gsi_endpoint_data {
 	struct ipa_endpoint_data endpoint;
 };
 
-/** enum ipa_resource_type_src - source resource types */
-enum ipa_resource_type_src {
-	IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS,
+/** enum ipa_resource_type - IPA resource types */
+enum ipa_resource_type {
+	/* Source resource types; first must have value 0 */
+	IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS		= 0,
 	IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS,
 	IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF,
 	IPA_RESOURCE_TYPE_SRC_HPS_DMARS,
 	IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES,
-};
 
-/** enum ipa_resource_type_dst - destination resource types */
-enum ipa_resource_type_dst {
-	IPA_RESOURCE_TYPE_DST_DATA_SECTORS,
+	/* Destination resource types; first must have value 0 */
+	IPA_RESOURCE_TYPE_DST_DATA_SECTORS		= 0,
 	IPA_RESOURCE_TYPE_DST_DPS_DMARS,
 };
 
@@ -225,7 +224,7 @@ struct ipa_resource_limits {
  * @limits:	array of limits to use for each resource group
  */
 struct ipa_resource_src {
-	enum ipa_resource_type_src type;
+	enum ipa_resource_type type;	/* IPA_RESOURCE_TYPE_SRC_* */
 	struct ipa_resource_limits limits[IPA_RESOURCE_GROUP_SRC_MAX];
 };
 
@@ -235,7 +234,7 @@ struct ipa_resource_src {
  * @limits:	array of limits to use for each resource group
  */
 struct ipa_resource_dst {
-	enum ipa_resource_type_dst type;
+	enum ipa_resource_type type;	/* IPA_RESOURCE_TYPE_DST_* */
 	struct ipa_resource_limits limits[IPA_RESOURCE_GROUP_DST_MAX];
 };
 
-- 
cgit v1.2.3


From 4bcfb35e7af9413a4715ec2c74f51e20043c70e2 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 26 Mar 2021 10:11:16 -0500
Subject: net: ipa: index resource limits with type

Remove the type field from the ipa_resource_src and ipa_resource_dst
structures, and instead use that value as the index into the arrays
of source and destination resources.

Change ipa_resource_config_src() and ipa_resource_config_dst() so
the resource type is passed in as an argument.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_data-sc7180.c | 21 +++++++--------------
 drivers/net/ipa/ipa_data-sdm845.c | 21 +++++++--------------
 drivers/net/ipa/ipa_data.h        |  4 ----
 drivers/net/ipa/ipa_resource.c    | 20 ++++++++++----------
 4 files changed, 24 insertions(+), 42 deletions(-)

diff --git a/drivers/net/ipa/ipa_data-sc7180.c b/drivers/net/ipa/ipa_data-sc7180.c
index e9b741832a1d..eba14d7bc8ac 100644
--- a/drivers/net/ipa/ipa_data-sc7180.c
+++ b/drivers/net/ipa/ipa_data-sc7180.c
@@ -150,36 +150,31 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 
 /* Source resource configuration data for the SC7180 SoC */
 static const struct ipa_resource_src ipa_resource_src[] = {
-	{
-		.type = IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS,
+	[IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS] = {
 		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
 			.min = 3,
 			.max = 63,
 		},
 	},
-	{
-		.type = IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS,
+	[IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS] = {
 		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
 			.min = 3,
 			.max = 3,
 		},
 	},
-	{
-		.type = IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF,
+	[IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF] = {
 		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
 			.min = 10,
 			.max = 10,
 		},
 	},
-	{
-		.type = IPA_RESOURCE_TYPE_SRC_HPS_DMARS,
+	[IPA_RESOURCE_TYPE_SRC_HPS_DMARS] = {
 		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
 			.min = 1,
 			.max = 1,
 		},
 	},
-	{
-		.type = IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES,
+	[IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES] = {
 		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
 			.min = 5,
 			.max = 5,
@@ -189,15 +184,13 @@ static const struct ipa_resource_src ipa_resource_src[] = {
 
 /* Destination resource configuration data for the SC7180 SoC */
 static const struct ipa_resource_dst ipa_resource_dst[] = {
-	{
-		.type = IPA_RESOURCE_TYPE_DST_DATA_SECTORS,
+	[IPA_RESOURCE_TYPE_DST_DATA_SECTORS] = {
 		.limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = {
 			.min = 3,
 			.max = 3,
 		},
 	},
-	{
-		.type = IPA_RESOURCE_TYPE_DST_DPS_DMARS,
+	[IPA_RESOURCE_TYPE_DST_DPS_DMARS] = {
 		.limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = {
 			.min = 1,
 			.max = 63,
diff --git a/drivers/net/ipa/ipa_data-sdm845.c b/drivers/net/ipa/ipa_data-sdm845.c
index 3bc5fcfdf960..4a4b3bd8a17c 100644
--- a/drivers/net/ipa/ipa_data-sdm845.c
+++ b/drivers/net/ipa/ipa_data-sdm845.c
@@ -168,8 +168,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 
 /* Source resource configuration data for the SDM845 SoC */
 static const struct ipa_resource_src ipa_resource_src[] = {
-	{
-		.type = IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS,
+	[IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS] = {
 		.limits[IPA_RSRC_GROUP_SRC_LWA_DL] = {
 			.min = 1,
 			.max = 255,
@@ -183,8 +182,7 @@ static const struct ipa_resource_src ipa_resource_src[] = {
 			.max = 63,
 		},
 	},
-	{
-		.type = IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS,
+	[IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS] = {
 		.limits[IPA_RSRC_GROUP_SRC_LWA_DL] = {
 			.min = 10,
 			.max = 10,
@@ -198,8 +196,7 @@ static const struct ipa_resource_src ipa_resource_src[] = {
 			.max = 8,
 		},
 	},
-	{
-		.type = IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF,
+	[IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF] = {
 		.limits[IPA_RSRC_GROUP_SRC_LWA_DL] = {
 			.min = 12,
 			.max = 12,
@@ -213,8 +210,7 @@ static const struct ipa_resource_src ipa_resource_src[] = {
 			.max = 8,
 		},
 	},
-	{
-		.type = IPA_RESOURCE_TYPE_SRC_HPS_DMARS,
+	[IPA_RESOURCE_TYPE_SRC_HPS_DMARS] = {
 		.limits[IPA_RSRC_GROUP_SRC_LWA_DL] = {
 			.min = 0,
 			.max = 63,
@@ -232,8 +228,7 @@ static const struct ipa_resource_src ipa_resource_src[] = {
 			.max = 63,
 		},
 	},
-	{
-		.type = IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES,
+	[IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES] = {
 		.limits[IPA_RSRC_GROUP_SRC_LWA_DL] = {
 			.min = 14,
 			.max = 14,
@@ -251,8 +246,7 @@ static const struct ipa_resource_src ipa_resource_src[] = {
 
 /* Destination resource configuration data for the SDM845 SoC */
 static const struct ipa_resource_dst ipa_resource_dst[] = {
-	{
-		.type = IPA_RESOURCE_TYPE_DST_DATA_SECTORS,
+	[IPA_RESOURCE_TYPE_DST_DATA_SECTORS] = {
 		.limits[IPA_RSRC_GROUP_DST_LWA_DL] = {
 			.min = 4,
 			.max = 4,
@@ -266,8 +260,7 @@ static const struct ipa_resource_dst ipa_resource_dst[] = {
 			.max = 3,
 		}
 	},
-	{
-		.type = IPA_RESOURCE_TYPE_DST_DPS_DMARS,
+	[IPA_RESOURCE_TYPE_DST_DPS_DMARS] = {
 		.limits[IPA_RSRC_GROUP_DST_LWA_DL] = {
 			.min = 2,
 			.max = 63,
diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h
index ccd7fd0b801a..44b93f93ee60 100644
--- a/drivers/net/ipa/ipa_data.h
+++ b/drivers/net/ipa/ipa_data.h
@@ -220,21 +220,17 @@ struct ipa_resource_limits {
 
 /**
  * struct ipa_resource_src - source endpoint group resource usage
- * @type:	source group resource type
  * @limits:	array of limits to use for each resource group
  */
 struct ipa_resource_src {
-	enum ipa_resource_type type;	/* IPA_RESOURCE_TYPE_SRC_* */
 	struct ipa_resource_limits limits[IPA_RESOURCE_GROUP_SRC_MAX];
 };
 
 /**
  * struct ipa_resource_dst - destination endpoint group resource usage
- * @type:	destination group resource type
  * @limits:	array of limits to use for each resource group
  */
 struct ipa_resource_dst {
-	enum ipa_resource_type type;	/* IPA_RESOURCE_TYPE_DST_* */
 	struct ipa_resource_limits limits[IPA_RESOURCE_GROUP_DST_MAX];
 };
 
diff --git a/drivers/net/ipa/ipa_resource.c b/drivers/net/ipa/ipa_resource.c
index edd9d1e5cbb6..506bcccaef64 100644
--- a/drivers/net/ipa/ipa_resource.c
+++ b/drivers/net/ipa/ipa_resource.c
@@ -128,54 +128,54 @@ ipa_resource_config_common(struct ipa *ipa, u32 offset,
 	iowrite32(val, ipa->reg_virt + offset);
 }
 
-static void ipa_resource_config_src(struct ipa *ipa,
+static void ipa_resource_config_src(struct ipa *ipa, u32 resource_type,
 				    const struct ipa_resource_src *resource)
 {
 	u32 group_count = ipa_resource_group_src_count(ipa->version);
 	const struct ipa_resource_limits *ylimits;
 	u32 offset;
 
-	offset = IPA_REG_SRC_RSRC_GRP_01_RSRC_TYPE_N_OFFSET(resource->type);
+	offset = IPA_REG_SRC_RSRC_GRP_01_RSRC_TYPE_N_OFFSET(resource_type);
 	ylimits = group_count == 1 ? NULL : &resource->limits[1];
 	ipa_resource_config_common(ipa, offset, &resource->limits[0], ylimits);
 
 	if (group_count < 3)
 		return;
 
-	offset = IPA_REG_SRC_RSRC_GRP_23_RSRC_TYPE_N_OFFSET(resource->type);
+	offset = IPA_REG_SRC_RSRC_GRP_23_RSRC_TYPE_N_OFFSET(resource_type);
 	ylimits = group_count == 3 ? NULL : &resource->limits[3];
 	ipa_resource_config_common(ipa, offset, &resource->limits[2], ylimits);
 
 	if (group_count < 5)
 		return;
 
-	offset = IPA_REG_SRC_RSRC_GRP_45_RSRC_TYPE_N_OFFSET(resource->type);
+	offset = IPA_REG_SRC_RSRC_GRP_45_RSRC_TYPE_N_OFFSET(resource_type);
 	ylimits = group_count == 5 ? NULL : &resource->limits[5];
 	ipa_resource_config_common(ipa, offset, &resource->limits[4], ylimits);
 }
 
-static void ipa_resource_config_dst(struct ipa *ipa,
+static void ipa_resource_config_dst(struct ipa *ipa, u32 resource_type,
 				    const struct ipa_resource_dst *resource)
 {
 	u32 group_count = ipa_resource_group_dst_count(ipa->version);
 	const struct ipa_resource_limits *ylimits;
 	u32 offset;
 
-	offset = IPA_REG_DST_RSRC_GRP_01_RSRC_TYPE_N_OFFSET(resource->type);
+	offset = IPA_REG_DST_RSRC_GRP_01_RSRC_TYPE_N_OFFSET(resource_type);
 	ylimits = group_count == 1 ? NULL : &resource->limits[1];
 	ipa_resource_config_common(ipa, offset, &resource->limits[0], ylimits);
 
 	if (group_count < 3)
 		return;
 
-	offset = IPA_REG_DST_RSRC_GRP_23_RSRC_TYPE_N_OFFSET(resource->type);
+	offset = IPA_REG_DST_RSRC_GRP_23_RSRC_TYPE_N_OFFSET(resource_type);
 	ylimits = group_count == 3 ? NULL : &resource->limits[3];
 	ipa_resource_config_common(ipa, offset, &resource->limits[2], ylimits);
 
 	if (group_count < 5)
 		return;
 
-	offset = IPA_REG_DST_RSRC_GRP_45_RSRC_TYPE_N_OFFSET(resource->type);
+	offset = IPA_REG_DST_RSRC_GRP_45_RSRC_TYPE_N_OFFSET(resource_type);
 	ylimits = group_count == 5 ? NULL : &resource->limits[5];
 	ipa_resource_config_common(ipa, offset, &resource->limits[4], ylimits);
 }
@@ -189,10 +189,10 @@ int ipa_resource_config(struct ipa *ipa, const struct ipa_resource_data *data)
 		return -EINVAL;
 
 	for (i = 0; i < data->resource_src_count; i++)
-		ipa_resource_config_src(ipa, &data->resource_src[i]);
+		ipa_resource_config_src(ipa, i, &data->resource_src[i]);
 
 	for (i = 0; i < data->resource_dst_count; i++)
-		ipa_resource_config_dst(ipa, &data->resource_dst[i]);
+		ipa_resource_config_dst(ipa, i, &data->resource_dst[i]);
 
 	return 0;
 }
-- 
cgit v1.2.3


From cf9a10bd7c49ab59ef476711e8c8467ce738b6d9 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 26 Mar 2021 10:11:17 -0500
Subject: net: ipa: move ipa_resource_type definition

Most platforms have the same set of source and destination resource
types.  But some older platforms have some additional ones, and it's
possible different resources will be used in the future.

Move the definition of the ipa_resource_type enumerated type so it
is defined for each platform in its configuration data file.  This
permits each to have a distinct set of resources.

Shorten the data files slightly, by putting the min and max limit
values on the same line.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_data-sc7180.c | 35 ++++++++++-------
 drivers/net/ipa/ipa_data-sdm845.c | 80 ++++++++++++++++++---------------------
 drivers/net/ipa/ipa_data.h        | 14 -------
 3 files changed, 57 insertions(+), 72 deletions(-)

diff --git a/drivers/net/ipa/ipa_data-sc7180.c b/drivers/net/ipa/ipa_data-sc7180.c
index eba14d7bc8ac..24ff31517565 100644
--- a/drivers/net/ipa/ipa_data-sc7180.c
+++ b/drivers/net/ipa/ipa_data-sc7180.c
@@ -9,6 +9,20 @@
 #include "ipa_endpoint.h"
 #include "ipa_mem.h"
 
+/** enum ipa_resource_type - IPA resource types */
+enum ipa_resource_type {
+	/* Source resource types; first must have value 0 */
+	IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS		= 0,
+	IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS,
+	IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF,
+	IPA_RESOURCE_TYPE_SRC_HPS_DMARS,
+	IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES,
+
+	/* Destination resource types; first must have value 0 */
+	IPA_RESOURCE_TYPE_DST_DATA_SECTORS		= 0,
+	IPA_RESOURCE_TYPE_DST_DPS_DMARS,
+};
+
 /* Resource groups used for the SC7180 SoC */
 enum ipa_rsrc_group_id {
 	/* Source resource group identifiers */
@@ -152,32 +166,27 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 static const struct ipa_resource_src ipa_resource_src[] = {
 	[IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS] = {
 		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
-			.min = 3,
-			.max = 63,
+			.min = 3,	.max = 63,
 		},
 	},
 	[IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS] = {
 		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
-			.min = 3,
-			.max = 3,
+			.min = 3,	.max = 3,
 		},
 	},
 	[IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF] = {
 		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
-			.min = 10,
-			.max = 10,
+			.min = 10,	.max = 10,
 		},
 	},
 	[IPA_RESOURCE_TYPE_SRC_HPS_DMARS] = {
 		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
-			.min = 1,
-			.max = 1,
+			.min = 1,	.max = 1,
 		},
 	},
 	[IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES] = {
 		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
-			.min = 5,
-			.max = 5,
+			.min = 5,	.max = 5,
 		},
 	},
 };
@@ -186,14 +195,12 @@ static const struct ipa_resource_src ipa_resource_src[] = {
 static const struct ipa_resource_dst ipa_resource_dst[] = {
 	[IPA_RESOURCE_TYPE_DST_DATA_SECTORS] = {
 		.limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = {
-			.min = 3,
-			.max = 3,
+			.min = 3,	.max = 3,
 		},
 	},
 	[IPA_RESOURCE_TYPE_DST_DPS_DMARS] = {
 		.limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = {
-			.min = 1,
-			.max = 63,
+			.min = 1,	.max = 63,
 		},
 	},
 };
diff --git a/drivers/net/ipa/ipa_data-sdm845.c b/drivers/net/ipa/ipa_data-sdm845.c
index 4a4b3bd8a17c..357e8ba43a36 100644
--- a/drivers/net/ipa/ipa_data-sdm845.c
+++ b/drivers/net/ipa/ipa_data-sdm845.c
@@ -11,6 +11,20 @@
 #include "ipa_endpoint.h"
 #include "ipa_mem.h"
 
+/** enum ipa_resource_type - IPA resource types */
+enum ipa_resource_type {
+	/* Source resource types; first must have value 0 */
+	IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS		= 0,
+	IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS,
+	IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF,
+	IPA_RESOURCE_TYPE_SRC_HPS_DMARS,
+	IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES,
+
+	/* Destination resource types; first must have value 0 */
+	IPA_RESOURCE_TYPE_DST_DATA_SECTORS		= 0,
+	IPA_RESOURCE_TYPE_DST_DPS_DMARS,
+};
+
 /* Resource groups used for the SDM845 SoC */
 enum ipa_rsrc_group_id {
 	/* Source resource group identifiers */
@@ -170,76 +184,60 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 static const struct ipa_resource_src ipa_resource_src[] = {
 	[IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS] = {
 		.limits[IPA_RSRC_GROUP_SRC_LWA_DL] = {
-			.min = 1,
-			.max = 255,
+			.min = 1,	.max = 255,
 		},
 		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
-			.min = 1,
-			.max = 255,
+			.min = 1,	.max = 255,
 		},
 		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
-			.min = 1,
-			.max = 63,
+			.min = 1,	.max = 63,
 		},
 	},
 	[IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS] = {
 		.limits[IPA_RSRC_GROUP_SRC_LWA_DL] = {
-			.min = 10,
-			.max = 10,
+			.min = 10,	.max = 10,
 		},
 		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
-			.min = 10,
-			.max = 10,
+			.min = 10,	.max = 10,
 		},
 		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
-			.min = 8,
-			.max = 8,
+			.min = 8,	.max = 8,
 		},
 	},
 	[IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF] = {
 		.limits[IPA_RSRC_GROUP_SRC_LWA_DL] = {
-			.min = 12,
-			.max = 12,
+			.min = 12,	.max = 12,
 		},
 		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
-			.min = 14,
-			.max = 14,
+			.min = 14,	.max = 14,
 		},
 		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
-			.min = 8,
-			.max = 8,
+			.min = 8,	.max = 8,
 		},
 	},
 	[IPA_RESOURCE_TYPE_SRC_HPS_DMARS] = {
 		.limits[IPA_RSRC_GROUP_SRC_LWA_DL] = {
-			.min = 0,
-			.max = 63,
+			.min = 0,	.max = 63,
 		},
 		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
-			.min = 0,
-			.max = 63,
+			.min = 0,	.max = 63,
 		},
 		.limits[IPA_RSRC_GROUP_SRC_MHI_DMA] = {
-			.min = 0,
-			.max = 63,
+			.min = 0,	.max = 63,
 		},
 		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
-			.min = 0,
-			.max = 63,
+			.min = 0,	.max = 63,
 		},
 	},
 	[IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES] = {
 		.limits[IPA_RSRC_GROUP_SRC_LWA_DL] = {
-			.min = 14,
-			.max = 14,
+			.min = 14,	.max = 14,
 		},
 		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
-			.min = 20,
-			.max = 20,
+			.min = 20,	.max = 20,
 		},
 		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
-			.min = 14,
-			.max = 14,
+			.min = 14,	.max = 14,
 		},
 	},
 };
@@ -248,30 +246,24 @@ static const struct ipa_resource_src ipa_resource_src[] = {
 static const struct ipa_resource_dst ipa_resource_dst[] = {
 	[IPA_RESOURCE_TYPE_DST_DATA_SECTORS] = {
 		.limits[IPA_RSRC_GROUP_DST_LWA_DL] = {
-			.min = 4,
-			.max = 4,
+			.min = 4,	.max = 4,
 		},
 		.limits[1] = {
-			.min = 4,
-			.max = 4,
+			.min = 4,	.max = 4,
 		},
 		.limits[IPA_RSRC_GROUP_DST_UNUSED_2] = {
-			.min = 3,
-			.max = 3,
+			.min = 3,	.max = 3,
 		}
 	},
 	[IPA_RESOURCE_TYPE_DST_DPS_DMARS] = {
 		.limits[IPA_RSRC_GROUP_DST_LWA_DL] = {
-			.min = 2,
-			.max = 63,
+			.min = 2,	.max = 63,
 		},
 		.limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = {
-			.min = 1,
-			.max = 63,
+			.min = 1,	.max = 63,
 		},
 		.limits[IPA_RSRC_GROUP_DST_UNUSED_2] = {
-			.min = 1,
-			.max = 2,
+			.min = 1,	.max = 2,
 		}
 	},
 };
diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h
index 44b93f93ee60..e1096d8ba575 100644
--- a/drivers/net/ipa/ipa_data.h
+++ b/drivers/net/ipa/ipa_data.h
@@ -194,20 +194,6 @@ struct ipa_gsi_endpoint_data {
 	struct ipa_endpoint_data endpoint;
 };
 
-/** enum ipa_resource_type - IPA resource types */
-enum ipa_resource_type {
-	/* Source resource types; first must have value 0 */
-	IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS		= 0,
-	IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS,
-	IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF,
-	IPA_RESOURCE_TYPE_SRC_HPS_DMARS,
-	IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES,
-
-	/* Destination resource types; first must have value 0 */
-	IPA_RESOURCE_TYPE_DST_DATA_SECTORS		= 0,
-	IPA_RESOURCE_TYPE_DST_DPS_DMARS,
-};
-
 /**
  * struct ipa_resource_limits - minimum and maximum resource counts
  * @min:	minimum number of resources of a given type
-- 
cgit v1.2.3


From d9d1cddf8b98e9752bbe528b7085d3a5d155fb2d Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 26 Mar 2021 10:11:18 -0500
Subject: net: ipa: combine source and destination group limits

Replace IPA_RESOURCE_GROUP_SRC_MAX and IPA_RESOURCE_GROUP_DST_MAX
with a single symbol, IPA_RESOURCE_GROUP_MAX.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_data.h     | 11 +++++------
 drivers/net/ipa/ipa_resource.c | 10 +++++-----
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h
index e1096d8ba575..d6d14818a396 100644
--- a/drivers/net/ipa/ipa_data.h
+++ b/drivers/net/ipa/ipa_data.h
@@ -47,8 +47,7 @@
  */
 
 /* The maximum value returned by ipa_resource_group_{src,dst}_count() */
-#define IPA_RESOURCE_GROUP_SRC_MAX	5
-#define IPA_RESOURCE_GROUP_DST_MAX	5
+#define IPA_RESOURCE_GROUP_MAX	5
 
 /** enum ipa_qsb_master_id - array index for IPA QSB configuration data */
 enum ipa_qsb_master_id {
@@ -206,18 +205,18 @@ struct ipa_resource_limits {
 
 /**
  * struct ipa_resource_src - source endpoint group resource usage
- * @limits:	array of limits to use for each resource group
+ * @limits:	array of source resource limits, indexed by group
  */
 struct ipa_resource_src {
-	struct ipa_resource_limits limits[IPA_RESOURCE_GROUP_SRC_MAX];
+	struct ipa_resource_limits limits[IPA_RESOURCE_GROUP_MAX];
 };
 
 /**
  * struct ipa_resource_dst - destination endpoint group resource usage
- * @limits:	array of limits to use for each resource group
+ * @limits:	array of destination resource limits, indexed by group
  */
 struct ipa_resource_dst {
-	struct ipa_resource_limits limits[IPA_RESOURCE_GROUP_DST_MAX];
+	struct ipa_resource_limits limits[IPA_RESOURCE_GROUP_MAX];
 };
 
 /**
diff --git a/drivers/net/ipa/ipa_resource.c b/drivers/net/ipa/ipa_resource.c
index 506bcccaef64..38b95b6a5193 100644
--- a/drivers/net/ipa/ipa_resource.c
+++ b/drivers/net/ipa/ipa_resource.c
@@ -77,10 +77,10 @@ static bool ipa_resource_limits_valid(struct ipa *ipa,
 	u32 j;
 
 	/* We program at most 6 source or destination resource group limits */
-	BUILD_BUG_ON(IPA_RESOURCE_GROUP_SRC_MAX > 6);
+	BUILD_BUG_ON(IPA_RESOURCE_GROUP_MAX > 6);
 
 	group_count = ipa_resource_group_src_count(ipa->version);
-	if (!group_count || group_count > IPA_RESOURCE_GROUP_SRC_MAX)
+	if (!group_count || group_count > IPA_RESOURCE_GROUP_MAX)
 		return false;
 
 	/* Return an error if a non-zero resource limit is specified
@@ -90,20 +90,20 @@ static bool ipa_resource_limits_valid(struct ipa *ipa,
 		const struct ipa_resource_src *resource;
 
 		resource = &data->resource_src[i];
-		for (j = group_count; j < IPA_RESOURCE_GROUP_SRC_MAX; j++)
+		for (j = group_count; j < IPA_RESOURCE_GROUP_MAX; j++)
 			if (resource->limits[j].min || resource->limits[j].max)
 				return false;
 	}
 
 	group_count = ipa_resource_group_dst_count(ipa->version);
-	if (!group_count || group_count > IPA_RESOURCE_GROUP_DST_MAX)
+	if (!group_count || group_count > IPA_RESOURCE_GROUP_MAX)
 		return false;
 
 	for (i = 0; i < data->resource_dst_count; i++) {
 		const struct ipa_resource_dst *resource;
 
 		resource = &data->resource_dst[i];
-		for (j = group_count; j < IPA_RESOURCE_GROUP_DST_MAX; j++)
+		for (j = group_count; j < IPA_RESOURCE_GROUP_MAX; j++)
 			if (resource->limits[j].min || resource->limits[j].max)
 				return false;
 	}
-- 
cgit v1.2.3


From 7336ce1a7ae70335b895901f4b1893c7f40b6be5 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 26 Mar 2021 10:11:19 -0500
Subject: net: ipa: combine source and destation resource types

The ipa_resource_src and ipa_resource_dst structures are identical
in form, so just replace them with a single structure.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_data-sc7180.c |  4 ++--
 drivers/net/ipa/ipa_data-sdm845.c |  4 ++--
 drivers/net/ipa/ipa_data.h        | 18 +++++-------------
 drivers/net/ipa/ipa_resource.c    |  8 ++++----
 4 files changed, 13 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ipa/ipa_data-sc7180.c b/drivers/net/ipa/ipa_data-sc7180.c
index 24ff31517565..631b50fc8d53 100644
--- a/drivers/net/ipa/ipa_data-sc7180.c
+++ b/drivers/net/ipa/ipa_data-sc7180.c
@@ -163,7 +163,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 };
 
 /* Source resource configuration data for the SC7180 SoC */
-static const struct ipa_resource_src ipa_resource_src[] = {
+static const struct ipa_resource ipa_resource_src[] = {
 	[IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS] = {
 		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
 			.min = 3,	.max = 63,
@@ -192,7 +192,7 @@ static const struct ipa_resource_src ipa_resource_src[] = {
 };
 
 /* Destination resource configuration data for the SC7180 SoC */
-static const struct ipa_resource_dst ipa_resource_dst[] = {
+static const struct ipa_resource ipa_resource_dst[] = {
 	[IPA_RESOURCE_TYPE_DST_DATA_SECTORS] = {
 		.limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = {
 			.min = 3,	.max = 3,
diff --git a/drivers/net/ipa/ipa_data-sdm845.c b/drivers/net/ipa/ipa_data-sdm845.c
index 357e8ba43a36..3c9675ce556c 100644
--- a/drivers/net/ipa/ipa_data-sdm845.c
+++ b/drivers/net/ipa/ipa_data-sdm845.c
@@ -181,7 +181,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 };
 
 /* Source resource configuration data for the SDM845 SoC */
-static const struct ipa_resource_src ipa_resource_src[] = {
+static const struct ipa_resource ipa_resource_src[] = {
 	[IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS] = {
 		.limits[IPA_RSRC_GROUP_SRC_LWA_DL] = {
 			.min = 1,	.max = 255,
@@ -243,7 +243,7 @@ static const struct ipa_resource_src ipa_resource_src[] = {
 };
 
 /* Destination resource configuration data for the SDM845 SoC */
-static const struct ipa_resource_dst ipa_resource_dst[] = {
+static const struct ipa_resource ipa_resource_dst[] = {
 	[IPA_RESOURCE_TYPE_DST_DATA_SECTORS] = {
 		.limits[IPA_RSRC_GROUP_DST_LWA_DL] = {
 			.min = 4,	.max = 4,
diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h
index d6d14818a396..9060586eb7cb 100644
--- a/drivers/net/ipa/ipa_data.h
+++ b/drivers/net/ipa/ipa_data.h
@@ -204,18 +204,10 @@ struct ipa_resource_limits {
 };
 
 /**
- * struct ipa_resource_src - source endpoint group resource usage
- * @limits:	array of source resource limits, indexed by group
+ * struct ipa_resource - resource group source or destination resource usage
+ * @limits:	array of resource limits, indexed by group
  */
-struct ipa_resource_src {
-	struct ipa_resource_limits limits[IPA_RESOURCE_GROUP_MAX];
-};
-
-/**
- * struct ipa_resource_dst - destination endpoint group resource usage
- * @limits:	array of destination resource limits, indexed by group
- */
-struct ipa_resource_dst {
+struct ipa_resource {
 	struct ipa_resource_limits limits[IPA_RESOURCE_GROUP_MAX];
 };
 
@@ -233,9 +225,9 @@ struct ipa_resource_dst {
  */
 struct ipa_resource_data {
 	u32 resource_src_count;
-	const struct ipa_resource_src *resource_src;
+	const struct ipa_resource *resource_src;
 	u32 resource_dst_count;
-	const struct ipa_resource_dst *resource_dst;
+	const struct ipa_resource *resource_dst;
 };
 
 /**
diff --git a/drivers/net/ipa/ipa_resource.c b/drivers/net/ipa/ipa_resource.c
index 38b95b6a5193..c7edacfa7d19 100644
--- a/drivers/net/ipa/ipa_resource.c
+++ b/drivers/net/ipa/ipa_resource.c
@@ -87,7 +87,7 @@ static bool ipa_resource_limits_valid(struct ipa *ipa,
 	 * for a resource group not supported by hardware.
 	 */
 	for (i = 0; i < data->resource_src_count; i++) {
-		const struct ipa_resource_src *resource;
+		const struct ipa_resource *resource;
 
 		resource = &data->resource_src[i];
 		for (j = group_count; j < IPA_RESOURCE_GROUP_MAX; j++)
@@ -100,7 +100,7 @@ static bool ipa_resource_limits_valid(struct ipa *ipa,
 		return false;
 
 	for (i = 0; i < data->resource_dst_count; i++) {
-		const struct ipa_resource_dst *resource;
+		const struct ipa_resource *resource;
 
 		resource = &data->resource_dst[i];
 		for (j = group_count; j < IPA_RESOURCE_GROUP_MAX; j++)
@@ -129,7 +129,7 @@ ipa_resource_config_common(struct ipa *ipa, u32 offset,
 }
 
 static void ipa_resource_config_src(struct ipa *ipa, u32 resource_type,
-				    const struct ipa_resource_src *resource)
+				    const struct ipa_resource *resource)
 {
 	u32 group_count = ipa_resource_group_src_count(ipa->version);
 	const struct ipa_resource_limits *ylimits;
@@ -155,7 +155,7 @@ static void ipa_resource_config_src(struct ipa *ipa, u32 resource_type,
 }
 
 static void ipa_resource_config_dst(struct ipa *ipa, u32 resource_type,
-				    const struct ipa_resource_dst *resource)
+				    const struct ipa_resource *resource)
 {
 	u32 group_count = ipa_resource_group_dst_count(ipa->version);
 	const struct ipa_resource_limits *ylimits;
-- 
cgit v1.2.3


From 93c03729c548ea30b8bb38f2ab51008f11babe2a Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 26 Mar 2021 10:11:20 -0500
Subject: net: ipa: pass data for source and dest resource config

Pass the resource data pointer to ipa_resource_config_src() and
ipa_resource_config_dst() to be used for configuring resource
limits.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_resource.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ipa/ipa_resource.c b/drivers/net/ipa/ipa_resource.c
index c7edacfa7d19..3db4dd3bda9c 100644
--- a/drivers/net/ipa/ipa_resource.c
+++ b/drivers/net/ipa/ipa_resource.c
@@ -129,12 +129,15 @@ ipa_resource_config_common(struct ipa *ipa, u32 offset,
 }
 
 static void ipa_resource_config_src(struct ipa *ipa, u32 resource_type,
-				    const struct ipa_resource *resource)
+				    const struct ipa_resource_data *data)
 {
 	u32 group_count = ipa_resource_group_src_count(ipa->version);
 	const struct ipa_resource_limits *ylimits;
+	const struct ipa_resource *resource;
 	u32 offset;
 
+	resource = &data->resource_src[resource_type];
+
 	offset = IPA_REG_SRC_RSRC_GRP_01_RSRC_TYPE_N_OFFSET(resource_type);
 	ylimits = group_count == 1 ? NULL : &resource->limits[1];
 	ipa_resource_config_common(ipa, offset, &resource->limits[0], ylimits);
@@ -155,12 +158,15 @@ static void ipa_resource_config_src(struct ipa *ipa, u32 resource_type,
 }
 
 static void ipa_resource_config_dst(struct ipa *ipa, u32 resource_type,
-				    const struct ipa_resource *resource)
+				    const struct ipa_resource_data *data)
 {
 	u32 group_count = ipa_resource_group_dst_count(ipa->version);
 	const struct ipa_resource_limits *ylimits;
+	const struct ipa_resource *resource;
 	u32 offset;
 
+	resource = &data->resource_dst[resource_type];
+
 	offset = IPA_REG_DST_RSRC_GRP_01_RSRC_TYPE_N_OFFSET(resource_type);
 	ylimits = group_count == 1 ? NULL : &resource->limits[1];
 	ipa_resource_config_common(ipa, offset, &resource->limits[0], ylimits);
@@ -189,10 +195,10 @@ int ipa_resource_config(struct ipa *ipa, const struct ipa_resource_data *data)
 		return -EINVAL;
 
 	for (i = 0; i < data->resource_src_count; i++)
-		ipa_resource_config_src(ipa, i, &data->resource_src[i]);
+		ipa_resource_config_src(ipa, i, data);
 
 	for (i = 0; i < data->resource_dst_count; i++)
-		ipa_resource_config_dst(ipa, i, &data->resource_dst[i]);
+		ipa_resource_config_dst(ipa, i, data);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 4fd704b3608a4c89260ea33895a694bc5385e00f Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 26 Mar 2021 10:11:21 -0500
Subject: net: ipa: record number of groups in data

The arrays of source and destination resource limits defined in
configuration data are of a fixed size--which is the maximum number
of resource groups supported for any platform.  Most platforms will
use fewer than that many groups.

Add new members to the ipa_rsrc_group_id enumerated type to define
the number of source and destination resource groups are defined for
the platform.  (This type is defined for each platform in its data
file.)

Add a new field to the resource configuration data that indicates
how many of the source and destination resource groups are actually
used for the platform, and initialize it with the count value.  This
allows us to determine the number of groups defined for the platform
without exposing the ipa_rsrc_group_id enumerated type.

As a result, we no longer need ipa_resource_group_src_count()
and ipa_resource_group_dst_count(), because each platform now
defines its supported number of resource groups.  So get rid of
those two functions.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_data-sc7180.c |  4 ++++
 drivers/net/ipa/ipa_data-sdm845.c |  4 ++++
 drivers/net/ipa/ipa_data.h        |  4 ++++
 drivers/net/ipa/ipa_resource.c    | 50 ++++-----------------------------------
 4 files changed, 16 insertions(+), 46 deletions(-)

diff --git a/drivers/net/ipa/ipa_data-sc7180.c b/drivers/net/ipa/ipa_data-sc7180.c
index 631b50fc8d53..c9b6a6aaadac 100644
--- a/drivers/net/ipa/ipa_data-sc7180.c
+++ b/drivers/net/ipa/ipa_data-sc7180.c
@@ -27,9 +27,11 @@ enum ipa_resource_type {
 enum ipa_rsrc_group_id {
 	/* Source resource group identifiers */
 	IPA_RSRC_GROUP_SRC_UL_DL	= 0,
+	IPA_RSRC_GROUP_SRC_COUNT,	/* Last in set; not a source group */
 
 	/* Destination resource group identifiers */
 	IPA_RSRC_GROUP_DST_UL_DL_DPL	= 0,
+	IPA_RSRC_GROUP_DST_COUNT,	/* Last; not a destination group */
 };
 
 /* QSB configuration for the SC7180 SoC. */
@@ -207,6 +209,8 @@ static const struct ipa_resource ipa_resource_dst[] = {
 
 /* Resource configuration for the SC7180 SoC. */
 static const struct ipa_resource_data ipa_resource_data = {
+	.rsrc_group_src_count	= IPA_RSRC_GROUP_SRC_COUNT,
+	.rsrc_group_dst_count	= IPA_RSRC_GROUP_DST_COUNT,
 	.resource_src_count	= ARRAY_SIZE(ipa_resource_src),
 	.resource_src		= ipa_resource_src,
 	.resource_dst_count	= ARRAY_SIZE(ipa_resource_dst),
diff --git a/drivers/net/ipa/ipa_data-sdm845.c b/drivers/net/ipa/ipa_data-sdm845.c
index 3c9675ce556c..e14e3fb1d970 100644
--- a/drivers/net/ipa/ipa_data-sdm845.c
+++ b/drivers/net/ipa/ipa_data-sdm845.c
@@ -32,11 +32,13 @@ enum ipa_rsrc_group_id {
 	IPA_RSRC_GROUP_SRC_UL_DL,
 	IPA_RSRC_GROUP_SRC_MHI_DMA,
 	IPA_RSRC_GROUP_SRC_UC_RX_Q,
+	IPA_RSRC_GROUP_SRC_COUNT,	/* Last in set; not a source group */
 
 	/* Destination resource group identifiers */
 	IPA_RSRC_GROUP_DST_LWA_DL	= 0,
 	IPA_RSRC_GROUP_DST_UL_DL_DPL,
 	IPA_RSRC_GROUP_DST_UNUSED_2,
+	IPA_RSRC_GROUP_DST_COUNT,	/* Last; not a destination group */
 };
 
 /* QSB configuration for the SDM845 SoC. */
@@ -270,6 +272,8 @@ static const struct ipa_resource ipa_resource_dst[] = {
 
 /* Resource configuration for the SDM845 SoC. */
 static const struct ipa_resource_data ipa_resource_data = {
+	.rsrc_group_src_count	= IPA_RSRC_GROUP_SRC_COUNT,
+	.rsrc_group_dst_count	= IPA_RSRC_GROUP_DST_COUNT,
 	.resource_src_count	= ARRAY_SIZE(ipa_resource_src),
 	.resource_src		= ipa_resource_src,
 	.resource_dst_count	= ARRAY_SIZE(ipa_resource_dst),
diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h
index 9060586eb7cb..c5d763a3782f 100644
--- a/drivers/net/ipa/ipa_data.h
+++ b/drivers/net/ipa/ipa_data.h
@@ -213,6 +213,8 @@ struct ipa_resource {
 
 /**
  * struct ipa_resource_data - IPA resource configuration data
+ * @rsrc_group_src_count: number of source resource groups supported
+ * @rsrc_group_dst_count: number of destination resource groups supported
  * @resource_src_count:	number of entries in the resource_src array
  * @resource_src:	source endpoint group resources
  * @resource_dst_count:	number of entries in the resource_dst array
@@ -224,6 +226,8 @@ struct ipa_resource {
  * programming it at initialization time, so we specify it here.
  */
 struct ipa_resource_data {
+	u32 rsrc_group_src_count;
+	u32 rsrc_group_dst_count;
 	u32 resource_src_count;
 	const struct ipa_resource *resource_src;
 	u32 resource_dst_count;
diff --git a/drivers/net/ipa/ipa_resource.c b/drivers/net/ipa/ipa_resource.c
index 3db4dd3bda9c..578ff070d405 100644
--- a/drivers/net/ipa/ipa_resource.c
+++ b/drivers/net/ipa/ipa_resource.c
@@ -26,48 +26,6 @@
  * total resources of each type is assigned for use by each group.
  */
 
-/* # IPA source resource groups available based on version */
-static u32 ipa_resource_group_src_count(enum ipa_version version)
-{
-	switch (version) {
-	case IPA_VERSION_3_5_1:
-	case IPA_VERSION_4_0:
-	case IPA_VERSION_4_1:
-		return 4;
-
-	case IPA_VERSION_4_2:
-		return 1;
-
-	case IPA_VERSION_4_5:
-		return 5;
-
-	default:
-		return 0;
-	}
-}
-
-/* # IPA destination resource groups available based on version */
-static u32 ipa_resource_group_dst_count(enum ipa_version version)
-{
-	switch (version) {
-	case IPA_VERSION_3_5_1:
-		return 3;
-
-	case IPA_VERSION_4_0:
-	case IPA_VERSION_4_1:
-		return 4;
-
-	case IPA_VERSION_4_2:
-		return 1;
-
-	case IPA_VERSION_4_5:
-		return 5;
-
-	default:
-		return 0;
-	}
-}
-
 static bool ipa_resource_limits_valid(struct ipa *ipa,
 				      const struct ipa_resource_data *data)
 {
@@ -79,7 +37,7 @@ static bool ipa_resource_limits_valid(struct ipa *ipa,
 	/* We program at most 6 source or destination resource group limits */
 	BUILD_BUG_ON(IPA_RESOURCE_GROUP_MAX > 6);
 
-	group_count = ipa_resource_group_src_count(ipa->version);
+	group_count = data->rsrc_group_src_count;
 	if (!group_count || group_count > IPA_RESOURCE_GROUP_MAX)
 		return false;
 
@@ -95,7 +53,7 @@ static bool ipa_resource_limits_valid(struct ipa *ipa,
 				return false;
 	}
 
-	group_count = ipa_resource_group_dst_count(ipa->version);
+	group_count = data->rsrc_group_src_count;
 	if (!group_count || group_count > IPA_RESOURCE_GROUP_MAX)
 		return false;
 
@@ -131,7 +89,7 @@ ipa_resource_config_common(struct ipa *ipa, u32 offset,
 static void ipa_resource_config_src(struct ipa *ipa, u32 resource_type,
 				    const struct ipa_resource_data *data)
 {
-	u32 group_count = ipa_resource_group_src_count(ipa->version);
+	u32 group_count = data->rsrc_group_src_count;
 	const struct ipa_resource_limits *ylimits;
 	const struct ipa_resource *resource;
 	u32 offset;
@@ -160,7 +118,7 @@ static void ipa_resource_config_src(struct ipa *ipa, u32 resource_type,
 static void ipa_resource_config_dst(struct ipa *ipa, u32 resource_type,
 				    const struct ipa_resource_data *data)
 {
-	u32 group_count = ipa_resource_group_dst_count(ipa->version);
+	u32 group_count = data->rsrc_group_dst_count;
 	const struct ipa_resource_limits *ylimits;
 	const struct ipa_resource *resource;
 	u32 offset;
-- 
cgit v1.2.3


From 3219953bedc563ef988fa423f8ac67a441f69b4b Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 26 Mar 2021 10:11:22 -0500
Subject: net: ipa: support more than 6 resource groups

IPA versions 3.0 and 3.1 support up to 8 resource groups.  There is
some interest in supporting these older versions of the hardware, so
update the resource configuration code to program resource limits
for these groups if specified.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_data.h     |  4 ++--
 drivers/net/ipa/ipa_reg.h      |  4 ++++
 drivers/net/ipa/ipa_resource.c | 18 ++++++++++++++++--
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h
index c5d763a3782f..ea8f99286228 100644
--- a/drivers/net/ipa/ipa_data.h
+++ b/drivers/net/ipa/ipa_data.h
@@ -46,8 +46,8 @@
  * the IPA endpoint.
  */
 
-/* The maximum value returned by ipa_resource_group_{src,dst}_count() */
-#define IPA_RESOURCE_GROUP_MAX	5
+/* The maximum possible number of source or destination resource groups */
+#define IPA_RESOURCE_GROUP_MAX	8
 
 /** enum ipa_qsb_master_id - array index for IPA QSB configuration data */
 enum ipa_qsb_master_id {
diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h
index 9c798cef7b2e..de2a944bad86 100644
--- a/drivers/net/ipa/ipa_reg.h
+++ b/drivers/net/ipa/ipa_reg.h
@@ -353,12 +353,16 @@ enum ipa_pulse_gran {
 					(0x00000404 + 0x0020 * (rt))
 #define IPA_REG_SRC_RSRC_GRP_45_RSRC_TYPE_N_OFFSET(rt) \
 					(0x00000408 + 0x0020 * (rt))
+#define IPA_REG_SRC_RSRC_GRP_67_RSRC_TYPE_N_OFFSET(rt) \
+					(0x0000040c + 0x0020 * (rt))
 #define IPA_REG_DST_RSRC_GRP_01_RSRC_TYPE_N_OFFSET(rt) \
 					(0x00000500 + 0x0020 * (rt))
 #define IPA_REG_DST_RSRC_GRP_23_RSRC_TYPE_N_OFFSET(rt) \
 					(0x00000504 + 0x0020 * (rt))
 #define IPA_REG_DST_RSRC_GRP_45_RSRC_TYPE_N_OFFSET(rt) \
 					(0x00000508 + 0x0020 * (rt))
+#define IPA_REG_DST_RSRC_GRP_67_RSRC_TYPE_N_OFFSET(rt) \
+					(0x0000050c + 0x0020 * (rt))
 /* The next four fields are used for all resource group registers */
 #define X_MIN_LIM_FMASK				GENMASK(5, 0)
 #define X_MAX_LIM_FMASK				GENMASK(13, 8)
diff --git a/drivers/net/ipa/ipa_resource.c b/drivers/net/ipa/ipa_resource.c
index 578ff070d405..85f922d6f222 100644
--- a/drivers/net/ipa/ipa_resource.c
+++ b/drivers/net/ipa/ipa_resource.c
@@ -34,8 +34,8 @@ static bool ipa_resource_limits_valid(struct ipa *ipa,
 	u32 i;
 	u32 j;
 
-	/* We program at most 6 source or destination resource group limits */
-	BUILD_BUG_ON(IPA_RESOURCE_GROUP_MAX > 6);
+	/* We program at most 8 source or destination resource group limits */
+	BUILD_BUG_ON(IPA_RESOURCE_GROUP_MAX > 8);
 
 	group_count = data->rsrc_group_src_count;
 	if (!group_count || group_count > IPA_RESOURCE_GROUP_MAX)
@@ -113,6 +113,13 @@ static void ipa_resource_config_src(struct ipa *ipa, u32 resource_type,
 	offset = IPA_REG_SRC_RSRC_GRP_45_RSRC_TYPE_N_OFFSET(resource_type);
 	ylimits = group_count == 5 ? NULL : &resource->limits[5];
 	ipa_resource_config_common(ipa, offset, &resource->limits[4], ylimits);
+
+	if (group_count < 7)
+		return;
+
+	offset = IPA_REG_SRC_RSRC_GRP_67_RSRC_TYPE_N_OFFSET(resource_type);
+	ylimits = group_count == 7 ? NULL : &resource->limits[7];
+	ipa_resource_config_common(ipa, offset, &resource->limits[6], ylimits);
 }
 
 static void ipa_resource_config_dst(struct ipa *ipa, u32 resource_type,
@@ -142,6 +149,13 @@ static void ipa_resource_config_dst(struct ipa *ipa, u32 resource_type,
 	offset = IPA_REG_DST_RSRC_GRP_45_RSRC_TYPE_N_OFFSET(resource_type);
 	ylimits = group_count == 5 ? NULL : &resource->limits[5];
 	ipa_resource_config_common(ipa, offset, &resource->limits[4], ylimits);
+
+	if (group_count < 7)
+		return;
+
+	offset = IPA_REG_DST_RSRC_GRP_67_RSRC_TYPE_N_OFFSET(resource_type);
+	ylimits = group_count == 7 ? NULL : &resource->limits[7];
+	ipa_resource_config_common(ipa, offset, &resource->limits[6], ylimits);
 }
 
 /* Configure resources */
-- 
cgit v1.2.3


From 2d6f5a2b5720cd3fdbaa21d8f5a6a192257b2a3c Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 26 Mar 2021 11:26:30 -0700
Subject: mptcp: clean-up the rtx path

After the previous patch we can easily avoid invoking
the workqueue to perform the retransmission, if the
msk socket lock is held at rtx timer expiration.

This also simplifies the relevant code.

Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 42 +++++++++++-------------------------------
 net/mptcp/protocol.h |  1 +
 2 files changed, 12 insertions(+), 31 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 1590b9d4cde2..171b77537dcb 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2047,28 +2047,21 @@ out_err:
 	return copied;
 }
 
-static void mptcp_retransmit_handler(struct sock *sk)
-{
-	struct mptcp_sock *msk = mptcp_sk(sk);
-
-	set_bit(MPTCP_WORK_RTX, &msk->flags);
-	mptcp_schedule_work(sk);
-}
-
 static void mptcp_retransmit_timer(struct timer_list *t)
 {
 	struct inet_connection_sock *icsk = from_timer(icsk, t,
 						       icsk_retransmit_timer);
 	struct sock *sk = &icsk->icsk_inet.sk;
+	struct mptcp_sock *msk = mptcp_sk(sk);
 
 	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk)) {
-		mptcp_retransmit_handler(sk);
+		/* we need a process context to retransmit */
+		if (!test_and_set_bit(MPTCP_WORK_RTX, &msk->flags))
+			mptcp_schedule_work(sk);
 	} else {
 		/* delegate our work to tcp_release_cb() */
-		if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED,
-				      &sk->sk_tsq_flags))
-			sock_hold(sk);
+		set_bit(MPTCP_RETRANSMIT, &msk->flags);
 	}
 	bh_unlock_sock(sk);
 	sock_put(sk);
@@ -2958,17 +2951,16 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
 	}
 }
 
-#define MPTCP_DEFERRED_ALL (TCPF_WRITE_TIMER_DEFERRED)
-
 /* processes deferred events and flush wmem */
 static void mptcp_release_cb(struct sock *sk)
 {
-	unsigned long flags, nflags;
-
 	for (;;) {
-		flags = 0;
+		unsigned long flags = 0;
+
 		if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags))
 			flags |= BIT(MPTCP_PUSH_PENDING);
+		if (test_and_clear_bit(MPTCP_RETRANSMIT, &mptcp_sk(sk)->flags))
+			flags |= BIT(MPTCP_RETRANSMIT);
 		if (!flags)
 			break;
 
@@ -2983,6 +2975,8 @@ static void mptcp_release_cb(struct sock *sk)
 		spin_unlock_bh(&sk->sk_lock.slock);
 		if (flags & BIT(MPTCP_PUSH_PENDING))
 			__mptcp_push_pending(sk, 0);
+		if (flags & BIT(MPTCP_RETRANSMIT))
+			__mptcp_retrans(sk);
 
 		cond_resched();
 		spin_lock_bh(&sk->sk_lock.slock);
@@ -2998,20 +2992,6 @@ static void mptcp_release_cb(struct sock *sk)
 	 */
 	__mptcp_update_wmem(sk);
 	__mptcp_update_rmem(sk);
-
-	do {
-		flags = sk->sk_tsq_flags;
-		if (!(flags & MPTCP_DEFERRED_ALL))
-			return;
-		nflags = flags & ~MPTCP_DEFERRED_ALL;
-	} while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags);
-
-	sock_release_ownership(sk);
-
-	if (flags & TCPF_WRITE_TIMER_DEFERRED) {
-		mptcp_retransmit_handler(sk);
-		__sock_put(sk);
-	}
 }
 
 void mptcp_subflow_process_delegated(struct sock *ssk)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 1111a99b024f..0116308f5f69 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -104,6 +104,7 @@
 #define MPTCP_PUSH_PENDING	6
 #define MPTCP_CLEAN_UNA		7
 #define MPTCP_ERROR_REPORT	8
+#define MPTCP_RETRANSMIT	9
 
 static inline bool before64(__u64 seq1, __u64 seq2)
 {
-- 
cgit v1.2.3


From f7efc7771eac1d149c8981a6d203bce6f1c49210 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 26 Mar 2021 11:26:31 -0700
Subject: mptcp: drop argument port from mptcp_pm_announce_addr

Drop the redundant argument 'port' from mptcp_pm_announce_addr, use the
port field of another argument 'addr' instead.

Fixes: 0f5c9e3f079f ("mptcp: add port parameter for mptcp_pm_announce_addr")
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/pm.c         | 6 +++---
 net/mptcp/pm_netlink.c | 9 +++------
 net/mptcp/protocol.h   | 2 +-
 3 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 4cfd80f90003..51e60582b408 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -14,7 +14,7 @@
 
 int mptcp_pm_announce_addr(struct mptcp_sock *msk,
 			   const struct mptcp_addr_info *addr,
-			   bool echo, bool port)
+			   bool echo)
 {
 	u8 add_addr = READ_ONCE(msk->pm.addr_signal);
 
@@ -33,7 +33,7 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk,
 		add_addr |= BIT(MPTCP_ADD_ADDR_ECHO);
 	if (addr->family == AF_INET6)
 		add_addr |= BIT(MPTCP_ADD_ADDR_IPV6);
-	if (port)
+	if (addr->port)
 		add_addr |= BIT(MPTCP_ADD_ADDR_PORT);
 	WRITE_ONCE(msk->pm.addr_signal, add_addr);
 	return 0;
@@ -188,7 +188,7 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
 	spin_lock_bh(&pm->lock);
 
 	if (!READ_ONCE(pm->accept_addr)) {
-		mptcp_pm_announce_addr(msk, addr, true, addr->port);
+		mptcp_pm_announce_addr(msk, addr, true);
 		mptcp_pm_add_addr_send_ack(msk);
 	} else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) {
 		pm->remote = *addr;
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 5857b82c88bf..53c09db08058 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -308,7 +308,7 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
 
 	if (!mptcp_pm_should_add_signal(msk)) {
 		pr_debug("retransmit ADD_ADDR id=%d", entry->addr.id);
-		mptcp_pm_announce_addr(msk, &entry->addr, false, entry->addr.port);
+		mptcp_pm_announce_addr(msk, &entry->addr, false);
 		mptcp_pm_add_addr_send_ack(msk);
 		entry->retrans_times++;
 	}
@@ -417,7 +417,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
 		if (local) {
 			if (mptcp_pm_alloc_anno_list(msk, local)) {
 				msk->pm.add_addr_signaled++;
-				mptcp_pm_announce_addr(msk, &local->addr, false, local->addr.port);
+				mptcp_pm_announce_addr(msk, &local->addr, false);
 				mptcp_pm_nl_add_addr_send_ack(msk);
 			}
 		} else {
@@ -468,7 +468,6 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
 	struct mptcp_addr_info remote;
 	struct mptcp_addr_info local;
 	unsigned int subflows_max;
-	bool use_port = false;
 
 	add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk);
 	subflows_max = mptcp_pm_get_subflows_max(msk);
@@ -488,8 +487,6 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
 	remote = msk->pm.remote;
 	if (!remote.port)
 		remote.port = sk->sk_dport;
-	else
-		use_port = true;
 	memset(&local, 0, sizeof(local));
 	local.family = remote.family;
 
@@ -497,7 +494,7 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
 	__mptcp_subflow_connect(sk, &local, &remote);
 	spin_lock_bh(&msk->pm.lock);
 
-	mptcp_pm_announce_addr(msk, &remote, true, use_port);
+	mptcp_pm_announce_addr(msk, &msk->pm.remote, true);
 	mptcp_pm_nl_add_addr_send_ack(msk);
 }
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 0116308f5f69..2bcd6897ea7d 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -663,7 +663,7 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
 
 int mptcp_pm_announce_addr(struct mptcp_sock *msk,
 			   const struct mptcp_addr_info *addr,
-			   bool echo, bool port);
+			   bool echo);
 int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
 int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
 
-- 
cgit v1.2.3


From d84ad04941c3e30dec193d4c39fce07a4c513cb4 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 26 Mar 2021 11:26:32 -0700
Subject: mptcp: skip connecting the connected address

This patch added a new helper named lookup_subflow_by_daddr to find
whether the destination address is in the msk's conn_list.

In mptcp_pm_nl_add_addr_received, use lookup_subflow_by_daddr to check
whether the announced address is already connected. If it is, skip
connecting this address and send out the echo.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/pm_netlink.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 53c09db08058..4b4b87803f33 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -140,6 +140,24 @@ static bool lookup_subflow_by_saddr(const struct list_head *list,
 	return false;
 }
 
+static bool lookup_subflow_by_daddr(const struct list_head *list,
+				    struct mptcp_addr_info *daddr)
+{
+	struct mptcp_subflow_context *subflow;
+	struct mptcp_addr_info cur;
+	struct sock_common *skc;
+
+	list_for_each_entry(subflow, list, node) {
+		skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow);
+
+		remote_address(skc, &cur);
+		if (addresses_equal(&cur, daddr, daddr->port))
+			return true;
+	}
+
+	return false;
+}
+
 static struct mptcp_pm_addr_entry *
 select_local_address(const struct pm_nl_pernet *pernet,
 		     struct mptcp_sock *msk)
@@ -475,6 +493,10 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
 	pr_debug("accepted %d:%d remote family %d",
 		 msk->pm.add_addr_accepted, add_addr_accept_max,
 		 msk->pm.remote.family);
+
+	if (lookup_subflow_by_daddr(&msk->conn_list, &msk->pm.remote))
+		goto add_addr_echo;
+
 	msk->pm.add_addr_accepted++;
 	msk->pm.subflows++;
 	if (msk->pm.add_addr_accepted >= add_addr_accept_max ||
@@ -494,6 +516,7 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
 	__mptcp_subflow_connect(sk, &local, &remote);
 	spin_lock_bh(&msk->pm.lock);
 
+add_addr_echo:
 	mptcp_pm_announce_addr(msk, &msk->pm.remote, true);
 	mptcp_pm_nl_add_addr_send_ack(msk);
 }
-- 
cgit v1.2.3


From 62535200be178fe9360420557cfbcb94c1b93694 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 26 Mar 2021 11:26:33 -0700
Subject: mptcp: drop unused subflow in mptcp_pm_subflow_established

This patch drops the unused parameter subflow in
mptcp_pm_subflow_established().

Fixes: 926bdeab5535 ("mptcp: Implement path manager interface commands")
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/options.c  | 2 +-
 net/mptcp/pm.c       | 3 +--
 net/mptcp/protocol.h | 3 +--
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 2b7eec93c9f5..2d2340b22f61 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -882,7 +882,7 @@ fully_established:
 	subflow->pm_notified = 1;
 	if (subflow->mp_join) {
 		clear_3rdack_retransmission(ssk);
-		mptcp_pm_subflow_established(msk, subflow);
+		mptcp_pm_subflow_established(msk);
 	} else {
 		mptcp_pm_fully_established(msk, ssk, GFP_ATOMIC);
 	}
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 51e60582b408..0a06d5947a73 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -152,8 +152,7 @@ void mptcp_pm_connection_closed(struct mptcp_sock *msk)
 	pr_debug("msk=%p", msk);
 }
 
-void mptcp_pm_subflow_established(struct mptcp_sock *msk,
-				  struct mptcp_subflow_context *subflow)
+void mptcp_pm_subflow_established(struct mptcp_sock *msk)
 {
 	struct mptcp_pm_data *pm = &msk->pm;
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 2bcd6897ea7d..d04161ec1cb2 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -643,8 +643,7 @@ void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int
 void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp);
 bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);
 void mptcp_pm_connection_closed(struct mptcp_sock *msk);
-void mptcp_pm_subflow_established(struct mptcp_sock *msk,
-				  struct mptcp_subflow_context *subflow);
+void mptcp_pm_subflow_established(struct mptcp_sock *msk);
 void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id);
 void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
 				const struct mptcp_addr_info *addr);
-- 
cgit v1.2.3


From 348d5c1dec60f5f50869a8e40315a66006897732 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 26 Mar 2021 11:26:34 -0700
Subject: mptcp: move to next addr when timeout

This patch called mptcp_pm_subflow_established to move to the next address
when an ADD_ADDR has been retransmitted the maximum number of times.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/pm_netlink.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 4b4b87803f33..c0c942c101cb 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -337,6 +337,9 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
 
 	spin_unlock_bh(&msk->pm.lock);
 
+	if (entry->retrans_times == ADD_ADDR_RETRANS_MAX)
+		mptcp_pm_subflow_established(msk);
+
 out:
 	__sock_put(sk);
 }
-- 
cgit v1.2.3


From 2e580a63b5c214a89bcc3e243ee2058691cee001 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 26 Mar 2021 11:26:35 -0700
Subject: selftests: mptcp: add cfg_do_w for cfg_remove

In some testcases, we need to slow down the transmitting process. This
patch added a new argument named cfg_do_w for cfg_remove to allow the
caller to pass an argument to cfg_remove.

In do_rnd_write, use this cfg_do_w to control the transmitting speed.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/mptcp_connect.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 77bb62feb872..69d89b5d666f 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -55,6 +55,7 @@ static int cfg_sndbuf;
 static int cfg_rcvbuf;
 static bool cfg_join;
 static bool cfg_remove;
+static unsigned int cfg_do_w;
 static int cfg_wait;
 
 static void die_usage(void)
@@ -272,8 +273,8 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len)
 	if (cfg_join && first && do_w > 100)
 		do_w = 100;
 
-	if (cfg_remove && do_w > 50)
-		do_w = 50;
+	if (cfg_remove && do_w > cfg_do_w)
+		do_w = cfg_do_w;
 
 	bw = write(fd, buf, do_w);
 	if (bw < 0)
@@ -829,7 +830,7 @@ static void parse_opts(int argc, char **argv)
 {
 	int c;
 
-	while ((c = getopt(argc, argv, "6jrlp:s:hut:m:S:R:w:")) != -1) {
+	while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:")) != -1) {
 		switch (c) {
 		case 'j':
 			cfg_join = true;
@@ -840,6 +841,9 @@ static void parse_opts(int argc, char **argv)
 			cfg_remove = true;
 			cfg_mode = CFG_MODE_POLL;
 			cfg_wait = 400000;
+			cfg_do_w = atoi(optarg);
+			if (cfg_do_w <= 0)
+				cfg_do_w = 50;
 			break;
 		case 'l':
 			listen_mode = true;
-- 
cgit v1.2.3


From 8da6229b9524d9a4ea91ed1308f7e45bfe0b2799 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 26 Mar 2021 11:26:36 -0700
Subject: selftests: mptcp: timeout testcases for multi addresses

This patch added the timeout testcases for multi addresses, valid and
invalid.

These testcases need to transmit 8 ADD_ADDRs, so add a new speed level
'least' to set 10 to mptcp_connect to slow down the transmitting process.
The original speed level 'slow' still uses 50.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 26 +++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index fe990d8696a9..32379efa2276 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -234,8 +234,10 @@ do_transfer()
 
 	if [ $speed = "fast" ]; then
 		mptcp_connect="./mptcp_connect -j"
-	else
-		mptcp_connect="./mptcp_connect -r"
+	elif [ $speed = "slow" ]; then
+		mptcp_connect="./mptcp_connect -r 50"
+	elif [ $speed = "least" ]; then
+		mptcp_connect="./mptcp_connect -r 10"
 	fi
 
 	local local_addr
@@ -818,6 +820,26 @@ add_addr_timeout_tests()
 	run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
 	chk_join_nr "signal address, ADD_ADDR6 timeout" 1 1 1
 	chk_add_nr 4 0
+
+	# signal addresses timeout
+	reset_with_add_addr_timeout
+	ip netns exec $ns1 ./pm_nl_ctl limits 2 2
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+	ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+	run_tests $ns1 $ns2 10.0.1.1 0 0 0 least
+	chk_join_nr "signal addresses, ADD_ADDR timeout" 2 2 2
+	chk_add_nr 8 0
+
+	# signal invalid addresses timeout
+	reset_with_add_addr_timeout
+	ip netns exec $ns1 ./pm_nl_ctl limits 2 2
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+	ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+	run_tests $ns1 $ns2 10.0.1.1 0 0 0 least
+	chk_join_nr "invalid address, ADD_ADDR timeout" 1 1 1
+	chk_add_nr 8 0
 }
 
 remove_tests()
-- 
cgit v1.2.3


From d88c476f4a7dd69a2588470f6c4f8b663efa16c6 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 26 Mar 2021 11:26:37 -0700
Subject: mptcp: export lookup_anno_list_by_saddr

This patch exported the static function lookup_anno_list_by_saddr, and
renamed it to mptcp_lookup_anno_list_by_saddr.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/pm_netlink.c | 10 +++++-----
 net/mptcp/protocol.h   |  3 +++
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index c0c942c101cb..56d479b24803 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -263,9 +263,9 @@ static void check_work_pending(struct mptcp_sock *msk)
 		WRITE_ONCE(msk->pm.work_pending, false);
 }
 
-static struct mptcp_pm_add_entry *
-lookup_anno_list_by_saddr(struct mptcp_sock *msk,
-			  struct mptcp_addr_info *addr)
+struct mptcp_pm_add_entry *
+mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk,
+				struct mptcp_addr_info *addr)
 {
 	struct mptcp_pm_add_entry *entry;
 
@@ -352,7 +352,7 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
 	struct sock *sk = (struct sock *)msk;
 
 	spin_lock_bh(&msk->pm.lock);
-	entry = lookup_anno_list_by_saddr(msk, addr);
+	entry = mptcp_lookup_anno_list_by_saddr(msk, addr);
 	if (entry)
 		entry->retrans_times = ADD_ADDR_RETRANS_MAX;
 	spin_unlock_bh(&msk->pm.lock);
@@ -372,7 +372,7 @@ static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
 
 	lockdep_assert_held(&msk->pm.lock);
 
-	if (lookup_anno_list_by_saddr(msk, &entry->addr))
+	if (mptcp_lookup_anno_list_by_saddr(msk, &entry->addr))
 		return false;
 
 	add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d04161ec1cb2..9c51444b26cf 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -659,6 +659,9 @@ bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
 struct mptcp_pm_add_entry *
 mptcp_pm_del_add_timer(struct mptcp_sock *msk,
 		       struct mptcp_addr_info *addr);
+struct mptcp_pm_add_entry *
+mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk,
+				struct mptcp_addr_info *addr);
 
 int mptcp_pm_announce_addr(struct mptcp_sock *msk,
 			   const struct mptcp_addr_info *addr,
-- 
cgit v1.2.3


From 557963c383e8209b14de91bf2a0301a41b94d8c4 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 26 Mar 2021 11:26:38 -0700
Subject: mptcp: move to next addr when subflow creation fail

When an invalid address was announced, the subflow couldn't be created
for this address. Therefore mptcp_pm_nl_subflow_established couldn't be
invoked. Then the next addresses in the local address list didn't have a
chance to be announced.

This patch invokes the new function mptcp_pm_add_addr_echoed when the
address is echoed. In it, use mptcp_lookup_anno_list_by_saddr to check
whether this address is in the anno_list. If it is, PM schedules the
status MPTCP_PM_SUBFLOW_ESTABLISHED to invoke
mptcp_pm_create_subflow_or_signal_addr to deal with the next address in
the local address list.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/options.c  |  1 +
 net/mptcp/pm.c       | 15 +++++++++++++++
 net/mptcp/protocol.h |  2 ++
 3 files changed, 18 insertions(+)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 2d2340b22f61..69cafaacc31b 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -1040,6 +1040,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
 			mptcp_pm_add_addr_received(msk, &addr);
 			MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR);
 		} else {
+			mptcp_pm_add_addr_echoed(msk, &addr);
 			mptcp_pm_del_add_timer(msk, &addr);
 			MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD);
 		}
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 0a06d5947a73..966942d1013f 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -196,6 +196,21 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
 	spin_unlock_bh(&pm->lock);
 }
 
+void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk,
+			      struct mptcp_addr_info *addr)
+{
+	struct mptcp_pm_data *pm = &msk->pm;
+
+	pr_debug("msk=%p", msk);
+
+	spin_lock_bh(&pm->lock);
+
+	if (mptcp_lookup_anno_list_by_saddr(msk, addr) && READ_ONCE(pm->work_pending))
+		mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED);
+
+	spin_unlock_bh(&pm->lock);
+}
+
 void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk)
 {
 	if (!mptcp_pm_should_add_signal(msk))
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 9c51444b26cf..b417b3591e07 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -647,6 +647,8 @@ void mptcp_pm_subflow_established(struct mptcp_sock *msk);
 void mptcp_pm_subflow_closed(struct mptcp_sock *msk, u8 id);
 void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
 				const struct mptcp_addr_info *addr);
+void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk,
+			      struct mptcp_addr_info *addr);
 void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk);
 void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
 			       const struct mptcp_rm_list *rm_list);
-- 
cgit v1.2.3


From b65d95adb802b41a501b75ee4646f4a49fc66eb4 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 26 Mar 2021 11:26:39 -0700
Subject: mptcp: drop useless addr_signal clear

msk->pm.addr_signal is cleared in mptcp_pm_add_addr_signal, no need to
clear it in mptcp_pm_nl_add_addr_send_ack again. Drop it.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/pm_netlink.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 56d479b24803..743bd23b1f78 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -538,7 +538,6 @@ static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk)
 	subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node);
 	if (subflow) {
 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-		u8 add_addr;
 
 		spin_unlock_bh(&msk->pm.lock);
 		pr_debug("send ack for add_addr%s%s",
@@ -549,13 +548,6 @@ static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk)
 		tcp_send_ack(ssk);
 		release_sock(ssk);
 		spin_lock_bh(&msk->pm.lock);
-
-		add_addr = READ_ONCE(msk->pm.addr_signal);
-		if (mptcp_pm_should_add_signal_ipv6(msk))
-			add_addr &= ~BIT(MPTCP_ADD_ADDR_IPV6);
-		if (mptcp_pm_should_add_signal_port(msk))
-			add_addr &= ~BIT(MPTCP_ADD_ADDR_PORT);
-		WRITE_ONCE(msk->pm.addr_signal, add_addr);
 	}
 }
 
-- 
cgit v1.2.3


From 8dd5efb1f91b09975295bd162441fe4a23edb3e2 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 26 Mar 2021 11:26:40 -0700
Subject: mptcp: send ack for rm_addr

This patch changes the sending ACK conditions for the ADD_ADDR, send an
ACK packet for RM_ADDR too.

In mptcp_pm_remove_addr, invoke mptcp_pm_nl_add_addr_send_ack to send
the ACK packet.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/pm.c         |  1 +
 net/mptcp/pm_netlink.c | 10 +++++-----
 net/mptcp/protocol.h   |  1 +
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 966942d1013f..efa7deb96139 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -53,6 +53,7 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_
 	msk->pm.rm_list_tx = *rm_list;
 	rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL);
 	WRITE_ONCE(msk->pm.addr_signal, rm_addr);
+	mptcp_pm_nl_add_addr_send_ack(msk);
 	return 0;
 }
 
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 743bd23b1f78..f71e910670bf 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -56,8 +56,6 @@ struct pm_nl_pernet {
 #define MPTCP_PM_ADDR_MAX	8
 #define ADD_ADDR_RETRANS_MAX	3
 
-static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk);
-
 static bool addresses_equal(const struct mptcp_addr_info *a,
 			    struct mptcp_addr_info *b, bool use_port)
 {
@@ -524,14 +522,15 @@ add_addr_echo:
 	mptcp_pm_nl_add_addr_send_ack(msk);
 }
 
-static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk)
+void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk)
 {
 	struct mptcp_subflow_context *subflow;
 
 	msk_owned_by_me(msk);
 	lockdep_assert_held(&msk->pm.lock);
 
-	if (!mptcp_pm_should_add_signal(msk))
+	if (!mptcp_pm_should_add_signal(msk) &&
+	    !mptcp_pm_should_rm_signal(msk))
 		return;
 
 	__mptcp_flush_join_list(msk);
@@ -540,7 +539,8 @@ static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk)
 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 
 		spin_unlock_bh(&msk->pm.lock);
-		pr_debug("send ack for add_addr%s%s",
+		pr_debug("send ack for %s%s%s",
+			 mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr",
 			 mptcp_pm_should_add_signal_ipv6(msk) ? " [ipv6]" : "",
 			 mptcp_pm_should_add_signal_port(msk) ? " [port]" : "");
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index b417b3591e07..6ce6ef58f092 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -650,6 +650,7 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
 void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk,
 			      struct mptcp_addr_info *addr);
 void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk);
+void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk);
 void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
 			       const struct mptcp_rm_list *rm_list);
 void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
-- 
cgit v1.2.3


From b46a023810939c2839250711282bb66946666d27 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 26 Mar 2021 11:26:41 -0700
Subject: mptcp: rename mptcp_pm_nl_add_addr_send_ack

Since mptcp_pm_nl_add_addr_send_ack is now used for both ADD_ADDR and
RM_ADDR cases, rename it to mptcp_pm_nl_addr_send_ack.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/pm.c         | 2 +-
 net/mptcp/pm_netlink.c | 8 ++++----
 net/mptcp/protocol.h   | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index efa7deb96139..9d00fa6d22e9 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -53,7 +53,7 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_
 	msk->pm.rm_list_tx = *rm_list;
 	rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL);
 	WRITE_ONCE(msk->pm.addr_signal, rm_addr);
-	mptcp_pm_nl_add_addr_send_ack(msk);
+	mptcp_pm_nl_addr_send_ack(msk);
 	return 0;
 }
 
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index f71e910670bf..73b9245c87b2 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -437,7 +437,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
 			if (mptcp_pm_alloc_anno_list(msk, local)) {
 				msk->pm.add_addr_signaled++;
 				mptcp_pm_announce_addr(msk, &local->addr, false);
-				mptcp_pm_nl_add_addr_send_ack(msk);
+				mptcp_pm_nl_addr_send_ack(msk);
 			}
 		} else {
 			/* pick failed, avoid fourther attempts later */
@@ -519,10 +519,10 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
 
 add_addr_echo:
 	mptcp_pm_announce_addr(msk, &msk->pm.remote, true);
-	mptcp_pm_nl_add_addr_send_ack(msk);
+	mptcp_pm_nl_addr_send_ack(msk);
 }
 
-void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk)
+void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
 {
 	struct mptcp_subflow_context *subflow;
 
@@ -642,7 +642,7 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk)
 	}
 	if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) {
 		pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK);
-		mptcp_pm_nl_add_addr_send_ack(msk);
+		mptcp_pm_nl_addr_send_ack(msk);
 	}
 	if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) {
 		pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 6ce6ef58f092..e8c5ff2b8ace 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -650,7 +650,7 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
 void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk,
 			      struct mptcp_addr_info *addr);
 void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk);
-void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk);
+void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk);
 void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
 			       const struct mptcp_rm_list *rm_list);
 void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
-- 
cgit v1.2.3


From ef360019db4043d53d631aec1e630bd6e6ce54f4 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 26 Mar 2021 11:26:42 -0700
Subject: selftests: mptcp: signal addresses testcases

This patch adds testcases for signalling multi valid and invalid
addresses for both signal_address_tests and remove_tests.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 58 +++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 32379efa2276..679de3abaf34 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -785,6 +785,28 @@ signal_address_tests()
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "multiple subflows and signal" 3 3 3
 	chk_add_nr 1 1
+
+	# signal addresses
+	reset
+	ip netns exec $ns1 ./pm_nl_ctl limits 3 3
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
+	ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+	run_tests $ns1 $ns2 10.0.1.1
+	chk_join_nr "signal addresses" 3 3 3
+	chk_add_nr 3 3
+
+	# signal invalid addresses
+	reset
+	ip netns exec $ns1 ./pm_nl_ctl limits 3 3
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.14.1 flags signal
+	ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+	run_tests $ns1 $ns2 10.0.1.1
+	chk_join_nr "signal invalid addresses" 1 1 1
+	chk_add_nr 3 3
 }
 
 link_failure_tests()
@@ -896,6 +918,30 @@ remove_tests()
 	chk_add_nr 1 1
 	chk_rm_nr 2 2
 
+	# addresses remove
+	reset
+	ip netns exec $ns1 ./pm_nl_ctl limits 3 3
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal id 250
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
+	ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+	run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow
+	chk_join_nr "remove addresses" 3 3 3
+	chk_add_nr 3 3
+	chk_rm_nr 3 3 invert
+
+	# invalid addresses remove
+	reset
+	ip netns exec $ns1 ./pm_nl_ctl limits 3 3
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.14.1 flags signal
+	ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+	run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow
+	chk_join_nr "remove invalid addresses" 1 1 1
+	chk_add_nr 3 3
+	chk_rm_nr 3 1 invert
+
 	# subflows and signal, flush
 	reset
 	ip netns exec $ns1 ./pm_nl_ctl limits 0 3
@@ -930,6 +976,18 @@ remove_tests()
 	chk_join_nr "flush addresses" 3 3 3
 	chk_add_nr 3 3
 	chk_rm_nr 3 3 invert
+
+	# invalid addresses flush
+	reset
+	ip netns exec $ns1 ./pm_nl_ctl limits 3 3
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.14.1 flags signal
+	ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+	run_tests $ns1 $ns2 10.0.1.1 0 -8 0 slow
+	chk_join_nr "flush invalid addresses" 1 1 1
+	chk_add_nr 3 3
+	chk_rm_nr 3 1 invert
 }
 
 add_tests()
-- 
cgit v1.2.3


From ad1cd7856d870e5861ef80fbf3e4b0d68bb82a69 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 26 Mar 2021 13:22:21 -0700
Subject: ethtool: fec: add note about reuse of reserved

struct ethtool_fecparam::reserved can't be used in SET, because
ethtool user space doesn't zero-initialize the structure.
Make this clear.

Suggested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index f6ef7d42c7a1..9a47c3efd8ca 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1382,6 +1382,10 @@ struct ethtool_per_queue_op {
  * @fec: Bitmask of configured FEC modes.
  * @reserved: Reserved for future extensions, ignore on GET, write 0 for SET.
  *
+ * Note that @reserved was never validated on input and ethtool user space
+ * left it uninitialized when calling SET. Hence going forward it can only be
+ * used to return a value to userspace with GET.
+ *
  * FEC modes supported by the device can be read via %ETHTOOL_GLINKSETTINGS.
  * FEC settings are configured by link autonegotiation whenever it's enabled.
  * With autoneg on %ETHTOOL_GFECPARAM can be used to read the current mode.
-- 
cgit v1.2.3


From cf2cc0bf4fde7b9db68d605bbe26457aea3685a0 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 26 Mar 2021 13:22:22 -0700
Subject: ethtool: fec: fix FEC_NONE check

Dan points out we need to use the mask not the bit (which is 0).

Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Fixes: 42ce127d9864 ("ethtool: fec: sanitize ethtool_fecparam->fec")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethtool/ioctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 8797533ddc4b..26b3e7086075 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -2586,7 +2586,7 @@ static int ethtool_set_fecparam(struct net_device *dev, void __user *useraddr)
 	if (copy_from_user(&fecparam, useraddr, sizeof(fecparam)))
 		return -EFAULT;
 
-	if (!fecparam.fec || fecparam.fec & ETHTOOL_FEC_NONE_BIT)
+	if (!fecparam.fec || fecparam.fec & ETHTOOL_FEC_NONE)
 		return -EINVAL;
 
 	fecparam.active_fec = 0;
-- 
cgit v1.2.3


From d04feecaf1543e538e856166e494daebe808d1fe Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 26 Mar 2021 13:22:23 -0700
Subject: ethtool: document the enum values not defines

kdoc does not have good support for documenting defines,
and we can't abuse the enum documentation because it
generates warnings.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 9a47c3efd8ca..868b513d4f54 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1414,16 +1414,16 @@ struct ethtool_fecparam {
 
 /**
  * enum ethtool_fec_config_bits - flags definition of ethtool_fec_configuration
- * @ETHTOOL_FEC_NONE: FEC mode configuration is not supported. Should not
- *		      be used together with other bits. GET only.
- * @ETHTOOL_FEC_AUTO: Select default/best FEC mode automatically, usually based
- *		      link mode and SFP parameters read from module's EEPROM.
- *		      This bit does _not_ mean autonegotiation.
- * @ETHTOOL_FEC_OFF: No FEC Mode
- * @ETHTOOL_FEC_RS: Reed-Solomon FEC Mode
- * @ETHTOOL_FEC_BASER: Base-R/Reed-Solomon FEC Mode
- * @ETHTOOL_FEC_LLRS: Low Latency Reed Solomon FEC Mode (25G/50G Ethernet
- *		      Consortium)
+ * @ETHTOOL_FEC_NONE_BIT: FEC mode configuration is not supported. Should not
+ *			be used together with other bits. GET only.
+ * @ETHTOOL_FEC_AUTO_BIT: Select default/best FEC mode automatically, usually
+ *			based link mode and SFP parameters read from module's
+ *			EEPROM. This bit does _not_ mean autonegotiation.
+ * @ETHTOOL_FEC_OFF_BIT: No FEC Mode
+ * @ETHTOOL_FEC_RS_BIT: Reed-Solomon FEC Mode
+ * @ETHTOOL_FEC_BASER_BIT: Base-R/Reed-Solomon FEC Mode
+ * @ETHTOOL_FEC_LLRS_BIT: Low Latency Reed Solomon FEC Mode (25G/50G Ethernet
+ *			Consortium)
  */
 enum ethtool_fec_config_bits {
 	ETHTOOL_FEC_NONE_BIT,
-- 
cgit v1.2.3


From 2d9a93b4902be6a5504b5941dd15e9cd776aadca Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Thu, 25 Mar 2021 16:16:51 +0000
Subject: mld: convert from timer to delayed work

mcast.c has several timers for delaying works.
Timer's expire handler is working under atomic context so it can't use
sleepable things such as GFP_KERNEL, mutex, etc.
In order to use sleepable APIs, it converts from timers to delayed work.
But there are some critical sections, which is used by both process
and BH context. So that it still uses spin_lock_bh() and rwlock.

Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/if_inet6.h |   8 +--
 net/ipv6/mcast.c       | 140 ++++++++++++++++++++++++++++---------------------
 2 files changed, 83 insertions(+), 65 deletions(-)

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 8bf5906073bc..af5244c9ca5c 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -120,7 +120,7 @@ struct ifmcaddr6 {
 	unsigned int		mca_sfmode;
 	unsigned char		mca_crcount;
 	unsigned long		mca_sfcount[2];
-	struct timer_list	mca_timer;
+	struct delayed_work	mca_work;
 	unsigned int		mca_flags;
 	int			mca_users;
 	refcount_t		mca_refcnt;
@@ -179,9 +179,9 @@ struct inet6_dev {
 	unsigned long		mc_qri;		/* Query Response Interval */
 	unsigned long		mc_maxdelay;
 
-	struct timer_list	mc_gq_timer;	/* general query timer */
-	struct timer_list	mc_ifc_timer;	/* interface change timer */
-	struct timer_list	mc_dad_timer;	/* dad complete mc timer */
+	struct delayed_work	mc_gq_work;	/* general query work */
+	struct delayed_work	mc_ifc_work;	/* interface change work */
+	struct delayed_work	mc_dad_work;	/* dad complete mc work */
 
 	struct ifacaddr6	*ac_list;
 	rwlock_t		lock;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 6c8604390266..692a6dec8959 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -29,7 +29,6 @@
 #include <linux/socket.h>
 #include <linux/sockios.h>
 #include <linux/jiffies.h>
-#include <linux/times.h>
 #include <linux/net.h>
 #include <linux/in.h>
 #include <linux/in6.h>
@@ -42,6 +41,7 @@
 #include <linux/slab.h>
 #include <linux/pkt_sched.h>
 #include <net/mld.h>
+#include <linux/workqueue.h>
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
@@ -67,14 +67,13 @@ static int __mld2_query_bugs[] __attribute__((__unused__)) = {
 	BUILD_BUG_ON_ZERO(offsetof(struct mld2_grec, grec_mca) % 4)
 };
 
+static struct workqueue_struct *mld_wq;
 static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT;
 
 static void igmp6_join_group(struct ifmcaddr6 *ma);
 static void igmp6_leave_group(struct ifmcaddr6 *ma);
-static void igmp6_timer_handler(struct timer_list *t);
+static void mld_mca_work(struct work_struct *work);
 
-static void mld_gq_timer_expire(struct timer_list *t);
-static void mld_ifc_timer_expire(struct timer_list *t);
 static void mld_ifc_event(struct inet6_dev *idev);
 static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc);
 static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc);
@@ -713,7 +712,7 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc)
 		igmp6_leave_group(mc);
 
 	spin_lock_bh(&mc->mca_lock);
-	if (del_timer(&mc->mca_timer))
+	if (cancel_delayed_work(&mc->mca_work))
 		refcount_dec(&mc->mca_refcnt);
 	spin_unlock_bh(&mc->mca_lock);
 }
@@ -854,7 +853,7 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
 	if (!mc)
 		return NULL;
 
-	timer_setup(&mc->mca_timer, igmp6_timer_handler, 0);
+	INIT_DELAYED_WORK(&mc->mca_work, mld_mca_work);
 
 	mc->mca_addr = *addr;
 	mc->idev = idev; /* reference taken by caller */
@@ -1027,48 +1026,48 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
 	return rv;
 }
 
-static void mld_gq_start_timer(struct inet6_dev *idev)
+static void mld_gq_start_work(struct inet6_dev *idev)
 {
 	unsigned long tv = prandom_u32() % idev->mc_maxdelay;
 
 	idev->mc_gq_running = 1;
-	if (!mod_timer(&idev->mc_gq_timer, jiffies+tv+2))
+	if (!mod_delayed_work(mld_wq, &idev->mc_gq_work, tv + 2))
 		in6_dev_hold(idev);
 }
 
-static void mld_gq_stop_timer(struct inet6_dev *idev)
+static void mld_gq_stop_work(struct inet6_dev *idev)
 {
 	idev->mc_gq_running = 0;
-	if (del_timer(&idev->mc_gq_timer))
+	if (cancel_delayed_work(&idev->mc_gq_work))
 		__in6_dev_put(idev);
 }
 
-static void mld_ifc_start_timer(struct inet6_dev *idev, unsigned long delay)
+static void mld_ifc_start_work(struct inet6_dev *idev, unsigned long delay)
 {
 	unsigned long tv = prandom_u32() % delay;
 
-	if (!mod_timer(&idev->mc_ifc_timer, jiffies+tv+2))
+	if (!mod_delayed_work(mld_wq, &idev->mc_ifc_work, tv + 2))
 		in6_dev_hold(idev);
 }
 
-static void mld_ifc_stop_timer(struct inet6_dev *idev)
+static void mld_ifc_stop_work(struct inet6_dev *idev)
 {
 	idev->mc_ifc_count = 0;
-	if (del_timer(&idev->mc_ifc_timer))
+	if (cancel_delayed_work(&idev->mc_ifc_work))
 		__in6_dev_put(idev);
 }
 
-static void mld_dad_start_timer(struct inet6_dev *idev, unsigned long delay)
+static void mld_dad_start_work(struct inet6_dev *idev, unsigned long delay)
 {
 	unsigned long tv = prandom_u32() % delay;
 
-	if (!mod_timer(&idev->mc_dad_timer, jiffies+tv+2))
+	if (!mod_delayed_work(mld_wq, &idev->mc_dad_work, tv + 2))
 		in6_dev_hold(idev);
 }
 
-static void mld_dad_stop_timer(struct inet6_dev *idev)
+static void mld_dad_stop_work(struct inet6_dev *idev)
 {
-	if (del_timer(&idev->mc_dad_timer))
+	if (cancel_delayed_work(&idev->mc_dad_work))
 		__in6_dev_put(idev);
 }
 
@@ -1080,21 +1079,20 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
 {
 	unsigned long delay = resptime;
 
-	/* Do not start timer for these addresses */
+	/* Do not start work for these addresses */
 	if (ipv6_addr_is_ll_all_nodes(&ma->mca_addr) ||
 	    IPV6_ADDR_MC_SCOPE(&ma->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL)
 		return;
 
-	if (del_timer(&ma->mca_timer)) {
+	if (cancel_delayed_work(&ma->mca_work)) {
 		refcount_dec(&ma->mca_refcnt);
-		delay = ma->mca_timer.expires - jiffies;
+		delay = ma->mca_work.timer.expires - jiffies;
 	}
 
 	if (delay >= resptime)
 		delay = prandom_u32() % resptime;
 
-	ma->mca_timer.expires = jiffies + delay;
-	if (!mod_timer(&ma->mca_timer, jiffies + delay))
+	if (!mod_delayed_work(mld_wq, &ma->mca_work, delay))
 		refcount_inc(&ma->mca_refcnt);
 	ma->mca_flags |= MAF_TIMER_RUNNING;
 }
@@ -1305,10 +1303,10 @@ static int mld_process_v1(struct inet6_dev *idev, struct mld_msg *mld,
 	if (v1_query)
 		mld_set_v1_mode(idev);
 
-	/* cancel MLDv2 report timer */
-	mld_gq_stop_timer(idev);
-	/* cancel the interface change timer */
-	mld_ifc_stop_timer(idev);
+	/* cancel MLDv2 report work */
+	mld_gq_stop_work(idev);
+	/* cancel the interface change work */
+	mld_ifc_stop_work(idev);
 	/* clear deleted report items */
 	mld_clear_delrec(idev);
 
@@ -1398,7 +1396,7 @@ int igmp6_event_query(struct sk_buff *skb)
 			if (mlh2->mld2q_nsrcs)
 				return -EINVAL; /* no sources allowed */
 
-			mld_gq_start_timer(idev);
+			mld_gq_start_work(idev);
 			return 0;
 		}
 		/* mark sources to include, if group & source-specific */
@@ -1482,14 +1480,14 @@ int igmp6_event_report(struct sk_buff *skb)
 		return -ENODEV;
 
 	/*
-	 *	Cancel the timer for this group
+	 *	Cancel the work for this group
 	 */
 
 	read_lock_bh(&idev->lock);
 	for (ma = idev->mc_list; ma; ma = ma->next) {
 		if (ipv6_addr_equal(&ma->mca_addr, &mld->mld_mca)) {
 			spin_lock(&ma->mca_lock);
-			if (del_timer(&ma->mca_timer))
+			if (cancel_delayed_work(&ma->mca_work))
 				refcount_dec(&ma->mca_refcnt);
 			ma->mca_flags &= ~(MAF_LAST_REPORTER|MAF_TIMER_RUNNING);
 			spin_unlock(&ma->mca_lock);
@@ -2103,21 +2101,23 @@ void ipv6_mc_dad_complete(struct inet6_dev *idev)
 		mld_send_initial_cr(idev);
 		idev->mc_dad_count--;
 		if (idev->mc_dad_count)
-			mld_dad_start_timer(idev,
-					    unsolicited_report_interval(idev));
+			mld_dad_start_work(idev,
+					   unsolicited_report_interval(idev));
 	}
 }
 
-static void mld_dad_timer_expire(struct timer_list *t)
+static void mld_dad_work(struct work_struct *work)
 {
-	struct inet6_dev *idev = from_timer(idev, t, mc_dad_timer);
+	struct inet6_dev *idev = container_of(to_delayed_work(work),
+					      struct inet6_dev,
+					      mc_dad_work);
 
 	mld_send_initial_cr(idev);
 	if (idev->mc_dad_count) {
 		idev->mc_dad_count--;
 		if (idev->mc_dad_count)
-			mld_dad_start_timer(idev,
-					    unsolicited_report_interval(idev));
+			mld_dad_start_work(idev,
+					   unsolicited_report_interval(idev));
 	}
 	in6_dev_put(idev);
 }
@@ -2416,12 +2416,12 @@ static void igmp6_join_group(struct ifmcaddr6 *ma)
 	delay = prandom_u32() % unsolicited_report_interval(ma->idev);
 
 	spin_lock_bh(&ma->mca_lock);
-	if (del_timer(&ma->mca_timer)) {
+	if (cancel_delayed_work(&ma->mca_work)) {
 		refcount_dec(&ma->mca_refcnt);
-		delay = ma->mca_timer.expires - jiffies;
+		delay = ma->mca_work.timer.expires - jiffies;
 	}
 
-	if (!mod_timer(&ma->mca_timer, jiffies + delay))
+	if (!mod_delayed_work(mld_wq, &ma->mca_work, delay))
 		refcount_inc(&ma->mca_refcnt);
 	ma->mca_flags |= MAF_TIMER_RUNNING | MAF_LAST_REPORTER;
 	spin_unlock_bh(&ma->mca_lock);
@@ -2458,25 +2458,29 @@ static void igmp6_leave_group(struct ifmcaddr6 *ma)
 	}
 }
 
-static void mld_gq_timer_expire(struct timer_list *t)
+static void mld_gq_work(struct work_struct *work)
 {
-	struct inet6_dev *idev = from_timer(idev, t, mc_gq_timer);
+	struct inet6_dev *idev = container_of(to_delayed_work(work),
+					      struct inet6_dev,
+					      mc_gq_work);
 
 	idev->mc_gq_running = 0;
 	mld_send_report(idev, NULL);
 	in6_dev_put(idev);
 }
 
-static void mld_ifc_timer_expire(struct timer_list *t)
+static void mld_ifc_work(struct work_struct *work)
 {
-	struct inet6_dev *idev = from_timer(idev, t, mc_ifc_timer);
+	struct inet6_dev *idev = container_of(to_delayed_work(work),
+					      struct inet6_dev,
+					      mc_ifc_work);
 
 	mld_send_cr(idev);
 	if (idev->mc_ifc_count) {
 		idev->mc_ifc_count--;
 		if (idev->mc_ifc_count)
-			mld_ifc_start_timer(idev,
-					    unsolicited_report_interval(idev));
+			mld_ifc_start_work(idev,
+					   unsolicited_report_interval(idev));
 	}
 	in6_dev_put(idev);
 }
@@ -2486,22 +2490,23 @@ static void mld_ifc_event(struct inet6_dev *idev)
 	if (mld_in_v1_mode(idev))
 		return;
 	idev->mc_ifc_count = idev->mc_qrv;
-	mld_ifc_start_timer(idev, 1);
+	mld_ifc_start_work(idev, 1);
 }
 
-static void igmp6_timer_handler(struct timer_list *t)
+static void mld_mca_work(struct work_struct *work)
 {
-	struct ifmcaddr6 *ma = from_timer(ma, t, mca_timer);
+	struct ifmcaddr6 *ma = container_of(to_delayed_work(work),
+					    struct ifmcaddr6, mca_work);
 
 	if (mld_in_v1_mode(ma->idev))
 		igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
 	else
 		mld_send_report(ma->idev, ma);
 
-	spin_lock(&ma->mca_lock);
+	spin_lock_bh(&ma->mca_lock);
 	ma->mca_flags |=  MAF_LAST_REPORTER;
 	ma->mca_flags &= ~MAF_TIMER_RUNNING;
-	spin_unlock(&ma->mca_lock);
+	spin_unlock_bh(&ma->mca_lock);
 	ma_put(ma);
 }
 
@@ -2537,12 +2542,12 @@ void ipv6_mc_down(struct inet6_dev *idev)
 	for (i = idev->mc_list; i; i = i->next)
 		igmp6_group_dropped(i);
 
-	/* Should stop timer after group drop. or we will
-	 * start timer again in mld_ifc_event()
+	/* Should stop work after group drop. or we will
+	 * start work again in mld_ifc_event()
 	 */
-	mld_ifc_stop_timer(idev);
-	mld_gq_stop_timer(idev);
-	mld_dad_stop_timer(idev);
+	mld_ifc_stop_work(idev);
+	mld_gq_stop_work(idev);
+	mld_dad_stop_work(idev);
 	read_unlock_bh(&idev->lock);
 }
 
@@ -2579,11 +2584,11 @@ void ipv6_mc_init_dev(struct inet6_dev *idev)
 	write_lock_bh(&idev->lock);
 	spin_lock_init(&idev->mc_lock);
 	idev->mc_gq_running = 0;
-	timer_setup(&idev->mc_gq_timer, mld_gq_timer_expire, 0);
+	INIT_DELAYED_WORK(&idev->mc_gq_work, mld_gq_work);
 	idev->mc_tomb = NULL;
 	idev->mc_ifc_count = 0;
-	timer_setup(&idev->mc_ifc_timer, mld_ifc_timer_expire, 0);
-	timer_setup(&idev->mc_dad_timer, mld_dad_timer_expire, 0);
+	INIT_DELAYED_WORK(&idev->mc_ifc_work, mld_ifc_work);
+	INIT_DELAYED_WORK(&idev->mc_dad_work, mld_dad_work);
 	ipv6_mc_reset(idev);
 	write_unlock_bh(&idev->lock);
 }
@@ -2596,7 +2601,7 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev)
 {
 	struct ifmcaddr6 *i;
 
-	/* Deactivate timers */
+	/* Deactivate works */
 	ipv6_mc_down(idev);
 	mld_clear_delrec(idev);
 
@@ -2763,7 +2768,7 @@ static int igmp6_mc_seq_show(struct seq_file *seq, void *v)
 		   &im->mca_addr,
 		   im->mca_users, im->mca_flags,
 		   (im->mca_flags&MAF_TIMER_RUNNING) ?
-		   jiffies_to_clock_t(im->mca_timer.expires-jiffies) : 0);
+		   jiffies_to_clock_t(im->mca_work.timer.expires - jiffies) : 0);
 	return 0;
 }
 
@@ -3002,7 +3007,19 @@ static struct pernet_operations igmp6_net_ops = {
 
 int __init igmp6_init(void)
 {
-	return register_pernet_subsys(&igmp6_net_ops);
+	int err;
+
+	err = register_pernet_subsys(&igmp6_net_ops);
+	if (err)
+		return err;
+
+	mld_wq = create_workqueue("mld");
+	if (!mld_wq) {
+		unregister_pernet_subsys(&igmp6_net_ops);
+		return -ENOMEM;
+	}
+
+	return err;
 }
 
 int __init igmp6_late_init(void)
@@ -3013,6 +3030,7 @@ int __init igmp6_late_init(void)
 void igmp6_cleanup(void)
 {
 	unregister_pernet_subsys(&igmp6_net_ops);
+	destroy_workqueue(mld_wq);
 }
 
 void igmp6_late_cleanup(void)
-- 
cgit v1.2.3


From cf2ce339b401bc53ee131f0ce38bae32a949925e Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Thu, 25 Mar 2021 16:16:52 +0000
Subject: mld: get rid of inet6_dev->mc_lock

The purpose of mc_lock is to protect inet6_dev->mc_tomb.
But mc_tomb is already protected by RTNL and all functions,
which manipulate mc_tomb are called under RTNL.
So, mc_lock is not needed.
Furthermore, it is spinlock so the critical section is atomic.
In order to reduce atomic context, it should be removed.

Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/if_inet6.h | 1 -
 net/ipv6/mcast.c       | 9 ---------
 2 files changed, 10 deletions(-)

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index af5244c9ca5c..1080d2248304 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -167,7 +167,6 @@ struct inet6_dev {
 
 	struct ifmcaddr6	*mc_list;
 	struct ifmcaddr6	*mc_tomb;
-	spinlock_t		mc_lock;
 
 	unsigned char		mc_qrv;		/* Query Robustness Variable */
 	unsigned char		mc_gq_running;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 692a6dec8959..35962aa3cc22 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -752,10 +752,8 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 	}
 	spin_unlock_bh(&im->mca_lock);
 
-	spin_lock_bh(&idev->mc_lock);
 	pmc->next = idev->mc_tomb;
 	idev->mc_tomb = pmc;
-	spin_unlock_bh(&idev->mc_lock);
 }
 
 static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
@@ -764,7 +762,6 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 	struct ip6_sf_list *psf;
 	struct in6_addr *pmca = &im->mca_addr;
 
-	spin_lock_bh(&idev->mc_lock);
 	pmc_prev = NULL;
 	for (pmc = idev->mc_tomb; pmc; pmc = pmc->next) {
 		if (ipv6_addr_equal(&pmc->mca_addr, pmca))
@@ -777,7 +774,6 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 		else
 			idev->mc_tomb = pmc->next;
 	}
-	spin_unlock_bh(&idev->mc_lock);
 
 	spin_lock_bh(&im->mca_lock);
 	if (pmc) {
@@ -801,10 +797,8 @@ static void mld_clear_delrec(struct inet6_dev *idev)
 {
 	struct ifmcaddr6 *pmc, *nextpmc;
 
-	spin_lock_bh(&idev->mc_lock);
 	pmc = idev->mc_tomb;
 	idev->mc_tomb = NULL;
-	spin_unlock_bh(&idev->mc_lock);
 
 	for (; pmc; pmc = nextpmc) {
 		nextpmc = pmc->next;
@@ -1907,7 +1901,6 @@ static void mld_send_cr(struct inet6_dev *idev)
 	int type, dtype;
 
 	read_lock_bh(&idev->lock);
-	spin_lock(&idev->mc_lock);
 
 	/* deleted MCA's */
 	pmc_prev = NULL;
@@ -1941,7 +1934,6 @@ static void mld_send_cr(struct inet6_dev *idev)
 		} else
 			pmc_prev = pmc;
 	}
-	spin_unlock(&idev->mc_lock);
 
 	/* change recs */
 	for (pmc = idev->mc_list; pmc; pmc = pmc->next) {
@@ -2582,7 +2574,6 @@ void ipv6_mc_up(struct inet6_dev *idev)
 void ipv6_mc_init_dev(struct inet6_dev *idev)
 {
 	write_lock_bh(&idev->lock);
-	spin_lock_init(&idev->mc_lock);
 	idev->mc_gq_running = 0;
 	INIT_DELAYED_WORK(&idev->mc_gq_work, mld_gq_work);
 	idev->mc_tomb = NULL;
-- 
cgit v1.2.3


From 882ba1f73c06831f2a21044ebd8864c485ac04f2 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Thu, 25 Mar 2021 16:16:53 +0000
Subject: mld: convert ipv6_mc_socklist->sflist to RCU

The sflist has been protected by rwlock so that the critical section
is atomic context.
In order to switch this context, changing locking is needed.
The sflist actually already protected by RTNL So if it's converted
to use RCU, its control path context can be switched to sleepable.

Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/if_inet6.h |  4 ++--
 net/ipv6/mcast.c       | 52 +++++++++++++++++++++-----------------------------
 2 files changed, 24 insertions(+), 32 deletions(-)

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 1080d2248304..062294aeeb6d 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -78,6 +78,7 @@ struct inet6_ifaddr {
 struct ip6_sf_socklist {
 	unsigned int		sl_max;
 	unsigned int		sl_count;
+	struct rcu_head		rcu;
 	struct in6_addr		sl_addr[];
 };
 
@@ -91,8 +92,7 @@ struct ipv6_mc_socklist {
 	int			ifindex;
 	unsigned int		sfmode;		/* MCAST_{INCLUDE,EXCLUDE} */
 	struct ipv6_mc_socklist __rcu *next;
-	rwlock_t		sflock;
-	struct ip6_sf_socklist	*sflist;
+	struct ip6_sf_socklist	__rcu *sflist;
 	struct rcu_head		rcu;
 };
 
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 35962aa3cc22..9da55d23a13c 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -178,8 +178,7 @@ static int __ipv6_sock_mc_join(struct sock *sk, int ifindex,
 
 	mc_lst->ifindex = dev->ifindex;
 	mc_lst->sfmode = mode;
-	rwlock_init(&mc_lst->sflock);
-	mc_lst->sflist = NULL;
+	RCU_INIT_POINTER(mc_lst->sflist, NULL);
 
 	/*
 	 *	now add/increase the group membership on the device
@@ -335,7 +334,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 	struct net *net = sock_net(sk);
 	int i, j, rv;
 	int leavegroup = 0;
-	int pmclocked = 0;
 	int err;
 
 	source = &((struct sockaddr_in6 *)&pgsr->gsr_source)->sin6_addr;
@@ -364,7 +362,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 		goto done;
 	}
 	/* if a source filter was set, must be the same mode as before */
-	if (pmc->sflist) {
+	if (rcu_access_pointer(pmc->sflist)) {
 		if (pmc->sfmode != omode) {
 			err = -EINVAL;
 			goto done;
@@ -376,10 +374,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 		pmc->sfmode = omode;
 	}
 
-	write_lock(&pmc->sflock);
-	pmclocked = 1;
-
-	psl = pmc->sflist;
+	psl = rtnl_dereference(pmc->sflist);
 	if (!add) {
 		if (!psl)
 			goto done;	/* err = -EADDRNOTAVAIL */
@@ -429,9 +424,11 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 		if (psl) {
 			for (i = 0; i < psl->sl_count; i++)
 				newpsl->sl_addr[i] = psl->sl_addr[i];
-			sock_kfree_s(sk, psl, IP6_SFLSIZE(psl->sl_max));
+			atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+			kfree_rcu(psl, rcu);
 		}
-		pmc->sflist = psl = newpsl;
+		psl = newpsl;
+		rcu_assign_pointer(pmc->sflist, psl);
 	}
 	rv = 1;	/* > 0 for insert logic below if sl_count is 0 */
 	for (i = 0; i < psl->sl_count; i++) {
@@ -447,8 +444,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 	/* update the interface list */
 	ip6_mc_add_src(idev, group, omode, 1, source, 1);
 done:
-	if (pmclocked)
-		write_unlock(&pmc->sflock);
 	read_unlock_bh(&idev->lock);
 	rcu_read_unlock();
 	if (leavegroup)
@@ -526,17 +521,16 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
 		(void) ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0);
 	}
 
-	write_lock(&pmc->sflock);
-	psl = pmc->sflist;
+	psl = rtnl_dereference(pmc->sflist);
 	if (psl) {
 		(void) ip6_mc_del_src(idev, group, pmc->sfmode,
 			psl->sl_count, psl->sl_addr, 0);
-		sock_kfree_s(sk, psl, IP6_SFLSIZE(psl->sl_max));
+		atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+		kfree_rcu(psl, rcu);
 	} else
 		(void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
-	pmc->sflist = newpsl;
+	rcu_assign_pointer(pmc->sflist, newpsl);
 	pmc->sfmode = gsf->gf_fmode;
-	write_unlock(&pmc->sflock);
 	err = 0;
 done:
 	read_unlock_bh(&idev->lock);
@@ -585,16 +579,14 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 	if (!pmc)		/* must have a prior join */
 		goto done;
 	gsf->gf_fmode = pmc->sfmode;
-	psl = pmc->sflist;
+	psl = rtnl_dereference(pmc->sflist);
 	count = psl ? psl->sl_count : 0;
 	read_unlock_bh(&idev->lock);
 	rcu_read_unlock();
 
 	copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
 	gsf->gf_numsrc = count;
-	/* changes to psl require the socket lock, and a write lock
-	 * on pmc->sflock. We have the socket lock so reading here is safe.
-	 */
+
 	for (i = 0; i < copycount; i++, p++) {
 		struct sockaddr_in6 *psin6;
 		struct sockaddr_storage ss;
@@ -630,8 +622,7 @@ bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
 		rcu_read_unlock();
 		return np->mc_all;
 	}
-	read_lock(&mc->sflock);
-	psl = mc->sflist;
+	psl = rcu_dereference(mc->sflist);
 	if (!psl) {
 		rv = mc->sfmode == MCAST_EXCLUDE;
 	} else {
@@ -646,7 +637,6 @@ bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
 		if (mc->sfmode == MCAST_EXCLUDE && i < psl->sl_count)
 			rv = false;
 	}
-	read_unlock(&mc->sflock);
 	rcu_read_unlock();
 
 	return rv;
@@ -2422,19 +2412,21 @@ static void igmp6_join_group(struct ifmcaddr6 *ma)
 static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
 			    struct inet6_dev *idev)
 {
+	struct ip6_sf_socklist *psl;
 	int err;
 
-	write_lock_bh(&iml->sflock);
-	if (!iml->sflist) {
+	psl = rtnl_dereference(iml->sflist);
+
+	if (!psl) {
 		/* any-source empty exclude case */
 		err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0);
 	} else {
 		err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode,
-				iml->sflist->sl_count, iml->sflist->sl_addr, 0);
-		sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max));
-		iml->sflist = NULL;
+				psl->sl_count, psl->sl_addr, 0);
+		RCU_INIT_POINTER(iml->sflist, NULL);
+		atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+		kfree_rcu(psl, rcu);
 	}
-	write_unlock_bh(&iml->sflock);
 	return err;
 }
 
-- 
cgit v1.2.3


From 4b200e398953c237c86d32bf26d4cb2a96556a6f Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Thu, 25 Mar 2021 16:16:54 +0000
Subject: mld: convert ip6_sf_list to RCU

The ip6_sf_list has been protected by mca_lock(spin_lock) so that the
critical section is atomic context. In order to switch this context,
changing locking is needed. The ip6_sf_list actually already protected
by RTNL So if it's converted to use RCU, its control path context can
be switched to sleepable.
But It doesn't remove mca_lock yet because ifmcaddr6 isn't converted
to RCU yet. So, It's not fully converted to the sleepable context.

Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/if_inet6.h |   7 +-
 net/ipv6/mcast.c       | 200 +++++++++++++++++++++++++++++++------------------
 2 files changed, 130 insertions(+), 77 deletions(-)

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 062294aeeb6d..7875a3208426 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -97,12 +97,13 @@ struct ipv6_mc_socklist {
 };
 
 struct ip6_sf_list {
-	struct ip6_sf_list	*sf_next;
+	struct ip6_sf_list __rcu *sf_next;
 	struct in6_addr		sf_addr;
 	unsigned long		sf_count[2];	/* include/exclude counts */
 	unsigned char		sf_gsresp;	/* include in g & s response? */
 	unsigned char		sf_oldin;	/* change state */
 	unsigned char		sf_crcount;	/* retrans. left to send */
+	struct rcu_head		rcu;
 };
 
 #define MAF_TIMER_RUNNING	0x01
@@ -115,8 +116,8 @@ struct ifmcaddr6 {
 	struct in6_addr		mca_addr;
 	struct inet6_dev	*idev;
 	struct ifmcaddr6	*next;
-	struct ip6_sf_list	*mca_sources;
-	struct ip6_sf_list	*mca_tomb;
+	struct ip6_sf_list	__rcu *mca_sources;
+	struct ip6_sf_list	__rcu *mca_tomb;
 	unsigned int		mca_sfmode;
 	unsigned char		mca_crcount;
 	unsigned long		mca_sfcount[2];
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 9da55d23a13c..bc0fb4815c97 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -113,10 +113,25 @@ int sysctl_mld_qrv __read_mostly = MLD_QRV_DEFAULT;
  */
 
 #define for_each_pmc_rcu(np, pmc)				\
-	for (pmc = rcu_dereference(np->ipv6_mc_list);		\
-	     pmc != NULL;					\
+	for (pmc = rcu_dereference((np)->ipv6_mc_list);		\
+	     pmc;						\
 	     pmc = rcu_dereference(pmc->next))
 
+#define for_each_psf_rtnl(mc, psf)				\
+	for (psf = rtnl_dereference((mc)->mca_sources);		\
+	     psf;						\
+	     psf = rtnl_dereference(psf->sf_next))
+
+#define for_each_psf_rcu(mc, psf)				\
+	for (psf = rcu_dereference((mc)->mca_sources);		\
+	     psf;						\
+	     psf = rcu_dereference(psf->sf_next))
+
+#define for_each_psf_tomb(mc, psf)				\
+	for (psf = rtnl_dereference((mc)->mca_tomb);		\
+	     psf;						\
+	     psf = rtnl_dereference(psf->sf_next))
+
 static int unsolicited_report_interval(struct inet6_dev *idev)
 {
 	int iv;
@@ -734,10 +749,14 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 	if (pmc->mca_sfmode == MCAST_INCLUDE) {
 		struct ip6_sf_list *psf;
 
-		pmc->mca_tomb = im->mca_tomb;
-		pmc->mca_sources = im->mca_sources;
-		im->mca_tomb = im->mca_sources = NULL;
-		for (psf = pmc->mca_sources; psf; psf = psf->sf_next)
+		rcu_assign_pointer(pmc->mca_tomb,
+				   rtnl_dereference(im->mca_tomb));
+		rcu_assign_pointer(pmc->mca_sources,
+				   rtnl_dereference(im->mca_sources));
+		RCU_INIT_POINTER(im->mca_tomb, NULL);
+		RCU_INIT_POINTER(im->mca_sources, NULL);
+
+		for_each_psf_rtnl(pmc, psf)
 			psf->sf_crcount = pmc->mca_crcount;
 	}
 	spin_unlock_bh(&im->mca_lock);
@@ -748,9 +767,9 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 
 static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 {
-	struct ifmcaddr6 *pmc, *pmc_prev;
-	struct ip6_sf_list *psf;
+	struct ip6_sf_list *psf, *sources, *tomb;
 	struct in6_addr *pmca = &im->mca_addr;
+	struct ifmcaddr6 *pmc, *pmc_prev;
 
 	pmc_prev = NULL;
 	for (pmc = idev->mc_tomb; pmc; pmc = pmc->next) {
@@ -769,9 +788,16 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 	if (pmc) {
 		im->idev = pmc->idev;
 		if (im->mca_sfmode == MCAST_INCLUDE) {
-			swap(im->mca_tomb, pmc->mca_tomb);
-			swap(im->mca_sources, pmc->mca_sources);
-			for (psf = im->mca_sources; psf; psf = psf->sf_next)
+			tomb = rcu_replace_pointer(im->mca_tomb,
+						   rtnl_dereference(pmc->mca_tomb),
+						   lockdep_rtnl_is_held());
+			rcu_assign_pointer(pmc->mca_tomb, tomb);
+
+			sources = rcu_replace_pointer(im->mca_sources,
+						      rtnl_dereference(pmc->mca_sources),
+						      lockdep_rtnl_is_held());
+			rcu_assign_pointer(pmc->mca_sources, sources);
+			for_each_psf_rtnl(im, psf)
 				psf->sf_crcount = idev->mc_qrv;
 		} else {
 			im->mca_crcount = idev->mc_qrv;
@@ -803,12 +829,12 @@ static void mld_clear_delrec(struct inet6_dev *idev)
 		struct ip6_sf_list *psf, *psf_next;
 
 		spin_lock_bh(&pmc->mca_lock);
-		psf = pmc->mca_tomb;
-		pmc->mca_tomb = NULL;
+		psf = rtnl_dereference(pmc->mca_tomb);
+		RCU_INIT_POINTER(pmc->mca_tomb, NULL);
 		spin_unlock_bh(&pmc->mca_lock);
 		for (; psf; psf = psf_next) {
-			psf_next = psf->sf_next;
-			kfree(psf);
+			psf_next = rtnl_dereference(psf->sf_next);
+			kfree_rcu(psf, rcu);
 		}
 	}
 	read_unlock_bh(&idev->lock);
@@ -990,7 +1016,7 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
 				struct ip6_sf_list *psf;
 
 				spin_lock_bh(&mc->mca_lock);
-				for (psf = mc->mca_sources; psf; psf = psf->sf_next) {
+				for_each_psf_rcu(mc, psf) {
 					if (ipv6_addr_equal(&psf->sf_addr, src_addr))
 						break;
 				}
@@ -1089,7 +1115,7 @@ static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
 	int i, scount;
 
 	scount = 0;
-	for (psf = pmc->mca_sources; psf; psf = psf->sf_next) {
+	for_each_psf_rcu(pmc, psf) {
 		if (scount == nsrcs)
 			break;
 		for (i = 0; i < nsrcs; i++) {
@@ -1122,7 +1148,7 @@ static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
 	/* mark INCLUDE-mode sources */
 
 	scount = 0;
-	for (psf = pmc->mca_sources; psf; psf = psf->sf_next) {
+	for_each_psf_rcu(pmc, psf) {
 		if (scount == nsrcs)
 			break;
 		for (i = 0; i < nsrcs; i++) {
@@ -1532,7 +1558,7 @@ mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted)
 	struct ip6_sf_list *psf;
 	int scount = 0;
 
-	for (psf = pmc->mca_sources; psf; psf = psf->sf_next) {
+	for_each_psf_rtnl(pmc, psf) {
 		if (!is_in(pmc, psf, type, gdeleted, sdeleted))
 			continue;
 		scount++;
@@ -1707,14 +1733,16 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 #define AVAILABLE(skb)	((skb) ? skb_availroom(skb) : 0)
 
 static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
-	int type, int gdeleted, int sdeleted, int crsend)
+				int type, int gdeleted, int sdeleted,
+				int crsend)
 {
+	struct ip6_sf_list *psf, *psf_prev, *psf_next;
+	int scount, stotal, first, isquery, truncate;
+	struct ip6_sf_list __rcu **psf_list;
 	struct inet6_dev *idev = pmc->idev;
 	struct net_device *dev = idev->dev;
-	struct mld2_report *pmr;
 	struct mld2_grec *pgr = NULL;
-	struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list;
-	int scount, stotal, first, isquery, truncate;
+	struct mld2_report *pmr;
 	unsigned int mtu;
 
 	if (pmc->mca_flags & MAF_NOREPORT)
@@ -1733,7 +1761,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 
 	psf_list = sdeleted ? &pmc->mca_tomb : &pmc->mca_sources;
 
-	if (!*psf_list)
+	if (!rcu_access_pointer(*psf_list))
 		goto empty_source;
 
 	pmr = skb ? (struct mld2_report *)skb_transport_header(skb) : NULL;
@@ -1749,10 +1777,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 	}
 	first = 1;
 	psf_prev = NULL;
-	for (psf = *psf_list; psf; psf = psf_next) {
+	for (psf = rtnl_dereference(*psf_list);
+	     psf;
+	     psf = psf_next) {
 		struct in6_addr *psrc;
 
-		psf_next = psf->sf_next;
+		psf_next = rtnl_dereference(psf->sf_next);
 
 		if (!is_in(pmc, psf, type, gdeleted, sdeleted) && !crsend) {
 			psf_prev = psf;
@@ -1799,10 +1829,12 @@ decrease_sf_crcount:
 			psf->sf_crcount--;
 			if ((sdeleted || gdeleted) && psf->sf_crcount == 0) {
 				if (psf_prev)
-					psf_prev->sf_next = psf->sf_next;
+					rcu_assign_pointer(psf_prev->sf_next,
+							   rtnl_dereference(psf->sf_next));
 				else
-					*psf_list = psf->sf_next;
-				kfree(psf);
+					rcu_assign_pointer(*psf_list,
+							   rtnl_dereference(psf->sf_next));
+				kfree_rcu(psf, rcu);
 				continue;
 			}
 		}
@@ -1866,21 +1898,26 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
 /*
  * remove zero-count source records from a source filter list
  */
-static void mld_clear_zeros(struct ip6_sf_list **ppsf)
+static void mld_clear_zeros(struct ip6_sf_list __rcu **ppsf)
 {
 	struct ip6_sf_list *psf_prev, *psf_next, *psf;
 
 	psf_prev = NULL;
-	for (psf = *ppsf; psf; psf = psf_next) {
-		psf_next = psf->sf_next;
+	for (psf = rtnl_dereference(*ppsf);
+	     psf;
+	     psf = psf_next) {
+		psf_next = rtnl_dereference(psf->sf_next);
 		if (psf->sf_crcount == 0) {
 			if (psf_prev)
-				psf_prev->sf_next = psf->sf_next;
+				rcu_assign_pointer(psf_prev->sf_next,
+						   rtnl_dereference(psf->sf_next));
 			else
-				*ppsf = psf->sf_next;
-			kfree(psf);
-		} else
+				rcu_assign_pointer(*ppsf,
+						   rtnl_dereference(psf->sf_next));
+			kfree_rcu(psf, rcu);
+		} else {
 			psf_prev = psf;
+		}
 	}
 }
 
@@ -1913,8 +1950,9 @@ static void mld_send_cr(struct inet6_dev *idev)
 				mld_clear_zeros(&pmc->mca_sources);
 			}
 		}
-		if (pmc->mca_crcount == 0 && !pmc->mca_tomb &&
-		    !pmc->mca_sources) {
+		if (pmc->mca_crcount == 0 &&
+		    !rcu_access_pointer(pmc->mca_tomb) &&
+		    !rcu_access_pointer(pmc->mca_sources)) {
 			if (pmc_prev)
 				pmc_prev->next = pmc_next;
 			else
@@ -2111,7 +2149,7 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
 	int rv = 0;
 
 	psf_prev = NULL;
-	for (psf = pmc->mca_sources; psf; psf = psf->sf_next) {
+	for_each_psf_rtnl(pmc, psf) {
 		if (ipv6_addr_equal(&psf->sf_addr, psfsrc))
 			break;
 		psf_prev = psf;
@@ -2126,17 +2164,22 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
 
 		/* no more filters for this source */
 		if (psf_prev)
-			psf_prev->sf_next = psf->sf_next;
+			rcu_assign_pointer(psf_prev->sf_next,
+					   rtnl_dereference(psf->sf_next));
 		else
-			pmc->mca_sources = psf->sf_next;
+			rcu_assign_pointer(pmc->mca_sources,
+					   rtnl_dereference(psf->sf_next));
+
 		if (psf->sf_oldin && !(pmc->mca_flags & MAF_NOREPORT) &&
 		    !mld_in_v1_mode(idev)) {
 			psf->sf_crcount = idev->mc_qrv;
-			psf->sf_next = pmc->mca_tomb;
-			pmc->mca_tomb = psf;
+			rcu_assign_pointer(psf->sf_next,
+					   rtnl_dereference(pmc->mca_tomb));
+			rcu_assign_pointer(pmc->mca_tomb, psf);
 			rv = 1;
-		} else
-			kfree(psf);
+		} else {
+			kfree_rcu(psf, rcu);
+		}
 	}
 	return rv;
 }
@@ -2188,7 +2231,7 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca,
 		pmc->mca_sfmode = MCAST_INCLUDE;
 		pmc->mca_crcount = idev->mc_qrv;
 		idev->mc_ifc_count = pmc->mca_crcount;
-		for (psf = pmc->mca_sources; psf; psf = psf->sf_next)
+		for_each_psf_rtnl(pmc, psf)
 			psf->sf_crcount = 0;
 		mld_ifc_event(pmc->idev);
 	} else if (sf_setstate(pmc) || changerec)
@@ -2207,7 +2250,7 @@ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode,
 	struct ip6_sf_list *psf, *psf_prev;
 
 	psf_prev = NULL;
-	for (psf = pmc->mca_sources; psf; psf = psf->sf_next) {
+	for_each_psf_rtnl(pmc, psf) {
 		if (ipv6_addr_equal(&psf->sf_addr, psfsrc))
 			break;
 		psf_prev = psf;
@@ -2219,9 +2262,10 @@ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode,
 
 		psf->sf_addr = *psfsrc;
 		if (psf_prev) {
-			psf_prev->sf_next = psf;
-		} else
-			pmc->mca_sources = psf;
+			rcu_assign_pointer(psf_prev->sf_next, psf);
+		} else {
+			rcu_assign_pointer(pmc->mca_sources, psf);
+		}
 	}
 	psf->sf_count[sfmode]++;
 	return 0;
@@ -2232,13 +2276,15 @@ static void sf_markstate(struct ifmcaddr6 *pmc)
 	struct ip6_sf_list *psf;
 	int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE];
 
-	for (psf = pmc->mca_sources; psf; psf = psf->sf_next)
+	for_each_psf_rtnl(pmc, psf) {
 		if (pmc->mca_sfcount[MCAST_EXCLUDE]) {
 			psf->sf_oldin = mca_xcount ==
 				psf->sf_count[MCAST_EXCLUDE] &&
 				!psf->sf_count[MCAST_INCLUDE];
-		} else
+		} else {
 			psf->sf_oldin = psf->sf_count[MCAST_INCLUDE] != 0;
+		}
+	}
 }
 
 static int sf_setstate(struct ifmcaddr6 *pmc)
@@ -2249,7 +2295,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc)
 	int new_in, rv;
 
 	rv = 0;
-	for (psf = pmc->mca_sources; psf; psf = psf->sf_next) {
+	for_each_psf_rtnl(pmc, psf) {
 		if (pmc->mca_sfcount[MCAST_EXCLUDE]) {
 			new_in = mca_xcount == psf->sf_count[MCAST_EXCLUDE] &&
 				!psf->sf_count[MCAST_INCLUDE];
@@ -2259,8 +2305,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc)
 			if (!psf->sf_oldin) {
 				struct ip6_sf_list *prev = NULL;
 
-				for (dpsf = pmc->mca_tomb; dpsf;
-				     dpsf = dpsf->sf_next) {
+				for_each_psf_tomb(pmc, dpsf) {
 					if (ipv6_addr_equal(&dpsf->sf_addr,
 					    &psf->sf_addr))
 						break;
@@ -2268,10 +2313,12 @@ static int sf_setstate(struct ifmcaddr6 *pmc)
 				}
 				if (dpsf) {
 					if (prev)
-						prev->sf_next = dpsf->sf_next;
+						rcu_assign_pointer(prev->sf_next,
+								   rtnl_dereference(dpsf->sf_next));
 					else
-						pmc->mca_tomb = dpsf->sf_next;
-					kfree(dpsf);
+						rcu_assign_pointer(pmc->mca_tomb,
+								   rtnl_dereference(dpsf->sf_next));
+					kfree_rcu(dpsf, rcu);
 				}
 				psf->sf_crcount = qrv;
 				rv++;
@@ -2282,7 +2329,8 @@ static int sf_setstate(struct ifmcaddr6 *pmc)
 			 * add or update "delete" records if an active filter
 			 * is now inactive
 			 */
-			for (dpsf = pmc->mca_tomb; dpsf; dpsf = dpsf->sf_next)
+
+			for_each_psf_tomb(pmc, dpsf)
 				if (ipv6_addr_equal(&dpsf->sf_addr,
 				    &psf->sf_addr))
 					break;
@@ -2291,9 +2339,9 @@ static int sf_setstate(struct ifmcaddr6 *pmc)
 				if (!dpsf)
 					continue;
 				*dpsf = *psf;
-				/* pmc->mca_lock held by callers */
-				dpsf->sf_next = pmc->mca_tomb;
-				pmc->mca_tomb = dpsf;
+				rcu_assign_pointer(dpsf->sf_next,
+						   rtnl_dereference(pmc->mca_tomb));
+				rcu_assign_pointer(pmc->mca_tomb, dpsf);
 			}
 			dpsf->sf_crcount = qrv;
 			rv++;
@@ -2356,7 +2404,7 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
 
 		pmc->mca_crcount = idev->mc_qrv;
 		idev->mc_ifc_count = pmc->mca_crcount;
-		for (psf = pmc->mca_sources; psf; psf = psf->sf_next)
+		for_each_psf_rtnl(pmc, psf)
 			psf->sf_crcount = 0;
 		mld_ifc_event(idev);
 	} else if (sf_setstate(pmc))
@@ -2370,16 +2418,20 @@ static void ip6_mc_clear_src(struct ifmcaddr6 *pmc)
 {
 	struct ip6_sf_list *psf, *nextpsf;
 
-	for (psf = pmc->mca_tomb; psf; psf = nextpsf) {
-		nextpsf = psf->sf_next;
-		kfree(psf);
+	for (psf = rtnl_dereference(pmc->mca_tomb);
+	     psf;
+	     psf = nextpsf) {
+		nextpsf = rtnl_dereference(psf->sf_next);
+		kfree_rcu(psf, rcu);
 	}
-	pmc->mca_tomb = NULL;
-	for (psf = pmc->mca_sources; psf; psf = nextpsf) {
-		nextpsf = psf->sf_next;
-		kfree(psf);
+	RCU_INIT_POINTER(pmc->mca_tomb, NULL);
+	for (psf = rtnl_dereference(pmc->mca_sources);
+	     psf;
+	     psf = nextpsf) {
+		nextpsf = rtnl_dereference(psf->sf_next);
+		kfree_rcu(psf, rcu);
 	}
-	pmc->mca_sources = NULL;
+	RCU_INIT_POINTER(pmc->mca_sources, NULL);
 	pmc->mca_sfmode = MCAST_EXCLUDE;
 	pmc->mca_sfcount[MCAST_INCLUDE] = 0;
 	pmc->mca_sfcount[MCAST_EXCLUDE] = 1;
@@ -2789,7 +2841,7 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq)
 		im = idev->mc_list;
 		if (likely(im)) {
 			spin_lock_bh(&im->mca_lock);
-			psf = im->mca_sources;
+			psf = rcu_dereference(im->mca_sources);
 			if (likely(psf)) {
 				state->im = im;
 				state->idev = idev;
@@ -2806,7 +2858,7 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s
 {
 	struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
 
-	psf = psf->sf_next;
+	psf = rcu_dereference(psf->sf_next);
 	while (!psf) {
 		spin_unlock_bh(&state->im->mca_lock);
 		state->im = state->im->next;
@@ -2828,7 +2880,7 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s
 		if (!state->im)
 			break;
 		spin_lock_bh(&state->im->mca_lock);
-		psf = state->im->mca_sources;
+		psf = rcu_dereference(state->im->mca_sources);
 	}
 out:
 	return psf;
-- 
cgit v1.2.3


From 88e2ca3080947fe22eb520c1f8231e79a105d011 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Thu, 25 Mar 2021 16:16:55 +0000
Subject: mld: convert ifmcaddr6 to RCU

The ifmcaddr6 has been protected by inet6_dev->lock(rwlock) so that
the critical section is atomic context. In order to switch this context,
changing locking is needed. The ifmcaddr6 actually already protected by
RTNL So if it's converted to use RCU, its control path context can be
switched to sleepable.

Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l3_main.c |   6 +-
 include/net/if_inet6.h          |   7 +-
 net/batman-adv/multicast.c      |   6 +-
 net/ipv6/addrconf.c             |   9 +-
 net/ipv6/addrconf_core.c        |   2 +-
 net/ipv6/af_inet6.c             |   2 +-
 net/ipv6/mcast.c                | 296 +++++++++++++++++-----------------------
 7 files changed, 140 insertions(+), 188 deletions(-)

diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 35b42275a06c..d308ff744a29 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1098,8 +1098,9 @@ walk_ipv6:
 	tmp.disp_flag = QETH_DISP_ADDR_ADD;
 	tmp.is_multicast = 1;
 
-	read_lock_bh(&in6_dev->lock);
-	for (im6 = in6_dev->mc_list; im6 != NULL; im6 = im6->next) {
+	for (im6 = rtnl_dereference(in6_dev->mc_list);
+	     im6;
+	     im6 = rtnl_dereference(im6->next)) {
 		tmp.u.a6.addr = im6->mca_addr;
 
 		ipm = qeth_l3_find_addr_by_ip(card, &tmp);
@@ -1117,7 +1118,6 @@ walk_ipv6:
 			 qeth_l3_ipaddr_hash(ipm));
 
 	}
-	read_unlock_bh(&in6_dev->lock);
 
 out:
 	return 0;
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 7875a3208426..521158e05c18 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -115,7 +115,7 @@ struct ip6_sf_list {
 struct ifmcaddr6 {
 	struct in6_addr		mca_addr;
 	struct inet6_dev	*idev;
-	struct ifmcaddr6	*next;
+	struct ifmcaddr6	__rcu *next;
 	struct ip6_sf_list	__rcu *mca_sources;
 	struct ip6_sf_list	__rcu *mca_tomb;
 	unsigned int		mca_sfmode;
@@ -128,6 +128,7 @@ struct ifmcaddr6 {
 	spinlock_t		mca_lock;
 	unsigned long		mca_cstamp;
 	unsigned long		mca_tstamp;
+	struct rcu_head		rcu;
 };
 
 /* Anycast stuff */
@@ -166,8 +167,8 @@ struct inet6_dev {
 
 	struct list_head	addr_list;
 
-	struct ifmcaddr6	*mc_list;
-	struct ifmcaddr6	*mc_tomb;
+	struct ifmcaddr6	__rcu *mc_list;
+	struct ifmcaddr6	__rcu *mc_tomb;
 
 	unsigned char		mc_qrv;		/* Query Robustness Variable */
 	unsigned char		mc_gq_running;
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 28166402d30c..1d63c8cbbfe7 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -454,8 +454,9 @@ batadv_mcast_mla_softif_get_ipv6(struct net_device *dev,
 		return 0;
 	}
 
-	read_lock_bh(&in6_dev->lock);
-	for (pmc6 = in6_dev->mc_list; pmc6; pmc6 = pmc6->next) {
+	for (pmc6 = rcu_dereference(in6_dev->mc_list);
+	     pmc6;
+	     pmc6 = rcu_dereference(pmc6->next)) {
 		if (IPV6_ADDR_MC_SCOPE(&pmc6->mca_addr) <
 		    IPV6_ADDR_SCOPE_LINKLOCAL)
 			continue;
@@ -484,7 +485,6 @@ batadv_mcast_mla_softif_get_ipv6(struct net_device *dev,
 		hlist_add_head(&new->list, mcast_list);
 		ret++;
 	}
-	read_unlock_bh(&in6_dev->lock);
 	rcu_read_unlock();
 
 	return ret;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f2337fb756ac..b502f78d5091 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5107,17 +5107,20 @@ next:
 		break;
 	}
 	case MULTICAST_ADDR:
+		read_unlock_bh(&idev->lock);
 		fillargs->event = RTM_GETMULTICAST;
 
 		/* multicast address */
-		for (ifmca = idev->mc_list; ifmca;
-		     ifmca = ifmca->next, ip_idx++) {
+		for (ifmca = rcu_dereference(idev->mc_list);
+		     ifmca;
+		     ifmca = rcu_dereference(ifmca->next), ip_idx++) {
 			if (ip_idx < s_ip_idx)
 				continue;
 			err = inet6_fill_ifmcaddr(skb, ifmca, fillargs);
 			if (err < 0)
 				break;
 		}
+		read_lock_bh(&idev->lock);
 		break;
 	case ANYCAST_ADDR:
 		fillargs->event = RTM_GETANYCAST;
@@ -6093,10 +6096,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 
 static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 {
-	rcu_read_lock_bh();
 	if (likely(ifp->idev->dead == 0))
 		__ipv6_ifa_notify(event, ifp);
-	rcu_read_unlock_bh();
 }
 
 #ifdef CONFIG_SYSCTL
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index c70c192bc91b..a36626afbc02 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -250,7 +250,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
 	struct net_device *dev = idev->dev;
 
 	WARN_ON(!list_empty(&idev->addr_list));
-	WARN_ON(idev->mc_list);
+	WARN_ON(rcu_access_pointer(idev->mc_list));
 	WARN_ON(timer_pending(&idev->rs_timer));
 
 #ifdef NET_REFCNT_DEBUG
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 802f5111805a..3c9bacffc9c3 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -222,7 +222,7 @@ lookup_protocol:
 	inet->mc_loop	= 1;
 	inet->mc_ttl	= 1;
 	inet->mc_index	= 0;
-	inet->mc_list	= NULL;
+	RCU_INIT_POINTER(inet->mc_list, NULL);
 	inet->rcv_tos	= 0;
 
 	if (net->ipv4.sysctl_ip_no_pmtu_disc)
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index bc0fb4815c97..75541cf53153 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -112,6 +112,11 @@ int sysctl_mld_qrv __read_mostly = MLD_QRV_DEFAULT;
  *	socket join on multicast group
  */
 
+#define for_each_pmc_rtnl(np, pmc)				\
+	for (pmc = rtnl_dereference((np)->ipv6_mc_list);	\
+	     pmc;						\
+	     pmc = rtnl_dereference(pmc->next))
+
 #define for_each_pmc_rcu(np, pmc)				\
 	for (pmc = rcu_dereference((np)->ipv6_mc_list);		\
 	     pmc;						\
@@ -132,6 +137,21 @@ int sysctl_mld_qrv __read_mostly = MLD_QRV_DEFAULT;
 	     psf;						\
 	     psf = rtnl_dereference(psf->sf_next))
 
+#define for_each_mc_rtnl(idev, mc)				\
+	for (mc = rtnl_dereference((idev)->mc_list);		\
+	     mc;						\
+	     mc = rtnl_dereference(mc->next))
+
+#define for_each_mc_rcu(idev, mc)				\
+	for (mc = rcu_dereference((idev)->mc_list);             \
+	     mc;                                                \
+	     mc = rcu_dereference(mc->next))
+
+#define for_each_mc_tomb(idev, mc)				\
+	for (mc = rtnl_dereference((idev)->mc_tomb);		\
+	     mc;						\
+	     mc = rtnl_dereference(mc->next))
+
 static int unsolicited_report_interval(struct inet6_dev *idev)
 {
 	int iv;
@@ -158,15 +178,11 @@ static int __ipv6_sock_mc_join(struct sock *sk, int ifindex,
 	if (!ipv6_addr_is_multicast(addr))
 		return -EINVAL;
 
-	rcu_read_lock();
-	for_each_pmc_rcu(np, mc_lst) {
+	for_each_pmc_rtnl(np, mc_lst) {
 		if ((ifindex == 0 || mc_lst->ifindex == ifindex) &&
-		    ipv6_addr_equal(&mc_lst->addr, addr)) {
-			rcu_read_unlock();
+		    ipv6_addr_equal(&mc_lst->addr, addr))
 			return -EADDRINUSE;
-		}
 	}
-	rcu_read_unlock();
 
 	mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL);
 
@@ -268,10 +284,9 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 }
 EXPORT_SYMBOL(ipv6_sock_mc_drop);
 
-/* called with rcu_read_lock() */
-static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
-					     const struct in6_addr *group,
-					     int ifindex)
+static struct inet6_dev *ip6_mc_find_dev_rtnl(struct net *net,
+					      const struct in6_addr *group,
+					      int ifindex)
 {
 	struct net_device *dev = NULL;
 	struct inet6_dev *idev = NULL;
@@ -283,19 +298,17 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
 			dev = rt->dst.dev;
 			ip6_rt_put(rt);
 		}
-	} else
-		dev = dev_get_by_index_rcu(net, ifindex);
+	} else {
+		dev = __dev_get_by_index(net, ifindex);
+	}
 
 	if (!dev)
 		return NULL;
 	idev = __in6_dev_get(dev);
 	if (!idev)
 		return NULL;
-	read_lock_bh(&idev->lock);
-	if (idev->dead) {
-		read_unlock_bh(&idev->lock);
+	if (idev->dead)
 		return NULL;
-	}
 	return idev;
 }
 
@@ -357,16 +370,13 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 	if (!ipv6_addr_is_multicast(group))
 		return -EINVAL;
 
-	rcu_read_lock();
-	idev = ip6_mc_find_dev_rcu(net, group, pgsr->gsr_interface);
-	if (!idev) {
-		rcu_read_unlock();
+	idev = ip6_mc_find_dev_rtnl(net, group, pgsr->gsr_interface);
+	if (!idev)
 		return -ENODEV;
-	}
 
 	err = -EADDRNOTAVAIL;
 
-	for_each_pmc_rcu(inet6, pmc) {
+	for_each_pmc_rtnl(inet6, pmc) {
 		if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface)
 			continue;
 		if (ipv6_addr_equal(&pmc->addr, group))
@@ -459,8 +469,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 	/* update the interface list */
 	ip6_mc_add_src(idev, group, omode, 1, source, 1);
 done:
-	read_unlock_bh(&idev->lock);
-	rcu_read_unlock();
 	if (leavegroup)
 		err = ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group);
 	return err;
@@ -486,13 +494,9 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
 	    gsf->gf_fmode != MCAST_EXCLUDE)
 		return -EINVAL;
 
-	rcu_read_lock();
-	idev = ip6_mc_find_dev_rcu(net, group, gsf->gf_interface);
-
-	if (!idev) {
-		rcu_read_unlock();
+	idev = ip6_mc_find_dev_rtnl(net, group, gsf->gf_interface);
+	if (!idev)
 		return -ENODEV;
-	}
 
 	err = 0;
 
@@ -501,7 +505,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
 		goto done;
 	}
 
-	for_each_pmc_rcu(inet6, pmc) {
+	for_each_pmc_rtnl(inet6, pmc) {
 		if (pmc->ifindex != gsf->gf_interface)
 			continue;
 		if (ipv6_addr_equal(&pmc->addr, group))
@@ -548,8 +552,6 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
 	pmc->sfmode = gsf->gf_fmode;
 	err = 0;
 done:
-	read_unlock_bh(&idev->lock);
-	rcu_read_unlock();
 	if (leavegroup)
 		err = ipv6_sock_mc_drop(sk, gsf->gf_interface, group);
 	return err;
@@ -571,13 +573,9 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 	if (!ipv6_addr_is_multicast(group))
 		return -EINVAL;
 
-	rcu_read_lock();
-	idev = ip6_mc_find_dev_rcu(net, group, gsf->gf_interface);
-
-	if (!idev) {
-		rcu_read_unlock();
+	idev = ip6_mc_find_dev_rtnl(net, group, gsf->gf_interface);
+	if (!idev)
 		return -ENODEV;
-	}
 
 	err = -EADDRNOTAVAIL;
 	/* changes to the ipv6_mc_list require the socket lock and
@@ -585,19 +583,18 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 	 * so reading the list is safe.
 	 */
 
-	for_each_pmc_rcu(inet6, pmc) {
+	for_each_pmc_rtnl(inet6, pmc) {
 		if (pmc->ifindex != gsf->gf_interface)
 			continue;
 		if (ipv6_addr_equal(group, &pmc->addr))
 			break;
 	}
 	if (!pmc)		/* must have a prior join */
-		goto done;
+		return err;
+
 	gsf->gf_fmode = pmc->sfmode;
 	psl = rtnl_dereference(pmc->sflist);
 	count = psl ? psl->sl_count : 0;
-	read_unlock_bh(&idev->lock);
-	rcu_read_unlock();
 
 	copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
 	gsf->gf_numsrc = count;
@@ -614,10 +611,6 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 			return -EFAULT;
 	}
 	return 0;
-done:
-	read_unlock_bh(&idev->lock);
-	rcu_read_unlock();
-	return err;
 }
 
 bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
@@ -761,8 +754,8 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 	}
 	spin_unlock_bh(&im->mca_lock);
 
-	pmc->next = idev->mc_tomb;
-	idev->mc_tomb = pmc;
+	rcu_assign_pointer(pmc->next, idev->mc_tomb);
+	rcu_assign_pointer(idev->mc_tomb, pmc);
 }
 
 static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
@@ -772,16 +765,16 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 	struct ifmcaddr6 *pmc, *pmc_prev;
 
 	pmc_prev = NULL;
-	for (pmc = idev->mc_tomb; pmc; pmc = pmc->next) {
+	for_each_mc_tomb(idev, pmc) {
 		if (ipv6_addr_equal(&pmc->mca_addr, pmca))
 			break;
 		pmc_prev = pmc;
 	}
 	if (pmc) {
 		if (pmc_prev)
-			pmc_prev->next = pmc->next;
+			rcu_assign_pointer(pmc_prev->next, pmc->next);
 		else
-			idev->mc_tomb = pmc->next;
+			rcu_assign_pointer(idev->mc_tomb, pmc->next);
 	}
 
 	spin_lock_bh(&im->mca_lock);
@@ -804,7 +797,7 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 		}
 		in6_dev_put(pmc->idev);
 		ip6_mc_clear_src(pmc);
-		kfree(pmc);
+		kfree_rcu(pmc, rcu);
 	}
 	spin_unlock_bh(&im->mca_lock);
 }
@@ -813,19 +806,18 @@ static void mld_clear_delrec(struct inet6_dev *idev)
 {
 	struct ifmcaddr6 *pmc, *nextpmc;
 
-	pmc = idev->mc_tomb;
-	idev->mc_tomb = NULL;
+	pmc = rtnl_dereference(idev->mc_tomb);
+	RCU_INIT_POINTER(idev->mc_tomb, NULL);
 
 	for (; pmc; pmc = nextpmc) {
-		nextpmc = pmc->next;
+		nextpmc = rtnl_dereference(pmc->next);
 		ip6_mc_clear_src(pmc);
 		in6_dev_put(pmc->idev);
-		kfree(pmc);
+		kfree_rcu(pmc, rcu);
 	}
 
 	/* clear dead sources, too */
-	read_lock_bh(&idev->lock);
-	for (pmc = idev->mc_list; pmc; pmc = pmc->next) {
+	for_each_mc_rtnl(idev, pmc) {
 		struct ip6_sf_list *psf, *psf_next;
 
 		spin_lock_bh(&pmc->mca_lock);
@@ -837,7 +829,6 @@ static void mld_clear_delrec(struct inet6_dev *idev)
 			kfree_rcu(psf, rcu);
 		}
 	}
-	read_unlock_bh(&idev->lock);
 }
 
 static void mca_get(struct ifmcaddr6 *mc)
@@ -849,7 +840,7 @@ static void ma_put(struct ifmcaddr6 *mc)
 {
 	if (refcount_dec_and_test(&mc->mca_refcnt)) {
 		in6_dev_put(mc->idev);
-		kfree(mc);
+		kfree_rcu(mc, rcu);
 	}
 }
 
@@ -900,17 +891,14 @@ static int __ipv6_dev_mc_inc(struct net_device *dev,
 	if (!idev)
 		return -EINVAL;
 
-	write_lock_bh(&idev->lock);
 	if (idev->dead) {
-		write_unlock_bh(&idev->lock);
 		in6_dev_put(idev);
 		return -ENODEV;
 	}
 
-	for (mc = idev->mc_list; mc; mc = mc->next) {
+	for_each_mc_rtnl(idev, mc) {
 		if (ipv6_addr_equal(&mc->mca_addr, addr)) {
 			mc->mca_users++;
-			write_unlock_bh(&idev->lock);
 			ip6_mc_add_src(idev, &mc->mca_addr, mode, 0, NULL, 0);
 			in6_dev_put(idev);
 			return 0;
@@ -919,19 +907,14 @@ static int __ipv6_dev_mc_inc(struct net_device *dev,
 
 	mc = mca_alloc(idev, addr, mode);
 	if (!mc) {
-		write_unlock_bh(&idev->lock);
 		in6_dev_put(idev);
 		return -ENOMEM;
 	}
 
-	mc->next = idev->mc_list;
-	idev->mc_list = mc;
+	rcu_assign_pointer(mc->next, idev->mc_list);
+	rcu_assign_pointer(idev->mc_list, mc);
 
-	/* Hold this for the code below before we unlock,
-	 * it is already exposed via idev->mc_list.
-	 */
 	mca_get(mc);
-	write_unlock_bh(&idev->lock);
 
 	mld_del_delrec(idev, mc);
 	igmp6_group_added(mc);
@@ -950,16 +933,16 @@ EXPORT_SYMBOL(ipv6_dev_mc_inc);
  */
 int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 {
-	struct ifmcaddr6 *ma, **map;
+	struct ifmcaddr6 *ma, __rcu **map;
 
 	ASSERT_RTNL();
 
-	write_lock_bh(&idev->lock);
-	for (map = &idev->mc_list; (ma = *map) != NULL; map = &ma->next) {
+	for (map = &idev->mc_list;
+	     (ma = rtnl_dereference(*map));
+	     map = &ma->next) {
 		if (ipv6_addr_equal(&ma->mca_addr, addr)) {
 			if (--ma->mca_users == 0) {
 				*map = ma->next;
-				write_unlock_bh(&idev->lock);
 
 				igmp6_group_dropped(ma);
 				ip6_mc_clear_src(ma);
@@ -967,11 +950,9 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 				ma_put(ma);
 				return 0;
 			}
-			write_unlock_bh(&idev->lock);
 			return 0;
 		}
 	}
-	write_unlock_bh(&idev->lock);
 
 	return -ENOENT;
 }
@@ -1006,8 +987,7 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
 	rcu_read_lock();
 	idev = __in6_dev_get(dev);
 	if (idev) {
-		read_lock_bh(&idev->lock);
-		for (mc = idev->mc_list; mc; mc = mc->next) {
+		for_each_mc_rcu(idev, mc) {
 			if (ipv6_addr_equal(&mc->mca_addr, group))
 				break;
 		}
@@ -1030,7 +1010,6 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
 			} else
 				rv = true; /* don't filter unspecified source */
 		}
-		read_unlock_bh(&idev->lock);
 	}
 	rcu_read_unlock();
 	return rv;
@@ -1082,9 +1061,8 @@ static void mld_dad_stop_work(struct inet6_dev *idev)
 }
 
 /*
- *	IGMP handling (alias multicast ICMPv6 messages)
+ * IGMP handling (alias multicast ICMPv6 messages)
  */
-
 static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
 {
 	unsigned long delay = resptime;
@@ -1422,15 +1400,14 @@ int igmp6_event_query(struct sk_buff *skb)
 		return -EINVAL;
 	}
 
-	read_lock_bh(&idev->lock);
 	if (group_type == IPV6_ADDR_ANY) {
-		for (ma = idev->mc_list; ma; ma = ma->next) {
+		for_each_mc_rcu(idev, ma) {
 			spin_lock_bh(&ma->mca_lock);
 			igmp6_group_queried(ma, max_delay);
 			spin_unlock_bh(&ma->mca_lock);
 		}
 	} else {
-		for (ma = idev->mc_list; ma; ma = ma->next) {
+		for_each_mc_rcu(idev, ma) {
 			if (!ipv6_addr_equal(group, &ma->mca_addr))
 				continue;
 			spin_lock_bh(&ma->mca_lock);
@@ -1452,7 +1429,6 @@ int igmp6_event_query(struct sk_buff *skb)
 			break;
 		}
 	}
-	read_unlock_bh(&idev->lock);
 
 	return 0;
 }
@@ -1493,18 +1469,17 @@ int igmp6_event_report(struct sk_buff *skb)
 	 *	Cancel the work for this group
 	 */
 
-	read_lock_bh(&idev->lock);
-	for (ma = idev->mc_list; ma; ma = ma->next) {
+	for_each_mc_rcu(idev, ma) {
 		if (ipv6_addr_equal(&ma->mca_addr, &mld->mld_mca)) {
 			spin_lock(&ma->mca_lock);
 			if (cancel_delayed_work(&ma->mca_work))
 				refcount_dec(&ma->mca_refcnt);
-			ma->mca_flags &= ~(MAF_LAST_REPORTER|MAF_TIMER_RUNNING);
+			ma->mca_flags &= ~(MAF_LAST_REPORTER |
+					   MAF_TIMER_RUNNING);
 			spin_unlock(&ma->mca_lock);
 			break;
 		}
 	}
-	read_unlock_bh(&idev->lock);
 	return 0;
 }
 
@@ -1868,9 +1843,8 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
 	struct sk_buff *skb = NULL;
 	int type;
 
-	read_lock_bh(&idev->lock);
 	if (!pmc) {
-		for (pmc = idev->mc_list; pmc; pmc = pmc->next) {
+		for_each_mc_rtnl(idev, pmc) {
 			if (pmc->mca_flags & MAF_NOREPORT)
 				continue;
 			spin_lock_bh(&pmc->mca_lock);
@@ -1890,7 +1864,6 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
 		skb = add_grec(skb, pmc, type, 0, 0, 0);
 		spin_unlock_bh(&pmc->mca_lock);
 	}
-	read_unlock_bh(&idev->lock);
 	if (skb)
 		mld_sendpack(skb);
 }
@@ -1927,12 +1900,12 @@ static void mld_send_cr(struct inet6_dev *idev)
 	struct sk_buff *skb = NULL;
 	int type, dtype;
 
-	read_lock_bh(&idev->lock);
-
 	/* deleted MCA's */
 	pmc_prev = NULL;
-	for (pmc = idev->mc_tomb; pmc; pmc = pmc_next) {
-		pmc_next = pmc->next;
+	for (pmc = rtnl_dereference(idev->mc_tomb);
+	     pmc;
+	     pmc = pmc_next) {
+		pmc_next = rtnl_dereference(pmc->next);
 		if (pmc->mca_sfmode == MCAST_INCLUDE) {
 			type = MLD2_BLOCK_OLD_SOURCES;
 			dtype = MLD2_BLOCK_OLD_SOURCES;
@@ -1954,17 +1927,17 @@ static void mld_send_cr(struct inet6_dev *idev)
 		    !rcu_access_pointer(pmc->mca_tomb) &&
 		    !rcu_access_pointer(pmc->mca_sources)) {
 			if (pmc_prev)
-				pmc_prev->next = pmc_next;
+				rcu_assign_pointer(pmc_prev->next, pmc_next);
 			else
-				idev->mc_tomb = pmc_next;
+				rcu_assign_pointer(idev->mc_tomb, pmc_next);
 			in6_dev_put(pmc->idev);
-			kfree(pmc);
+			kfree_rcu(pmc, rcu);
 		} else
 			pmc_prev = pmc;
 	}
 
 	/* change recs */
-	for (pmc = idev->mc_list; pmc; pmc = pmc->next) {
+	for_each_mc_rtnl(idev, pmc) {
 		spin_lock_bh(&pmc->mca_lock);
 		if (pmc->mca_sfcount[MCAST_EXCLUDE]) {
 			type = MLD2_BLOCK_OLD_SOURCES;
@@ -1987,7 +1960,6 @@ static void mld_send_cr(struct inet6_dev *idev)
 		}
 		spin_unlock_bh(&pmc->mca_lock);
 	}
-	read_unlock_bh(&idev->lock);
 	if (!skb)
 		return;
 	(void) mld_sendpack(skb);
@@ -2099,8 +2071,7 @@ static void mld_send_initial_cr(struct inet6_dev *idev)
 		return;
 
 	skb = NULL;
-	read_lock_bh(&idev->lock);
-	for (pmc = idev->mc_list; pmc; pmc = pmc->next) {
+	for_each_mc_rtnl(idev, pmc) {
 		spin_lock_bh(&pmc->mca_lock);
 		if (pmc->mca_sfcount[MCAST_EXCLUDE])
 			type = MLD2_CHANGE_TO_EXCLUDE;
@@ -2109,7 +2080,6 @@ static void mld_send_initial_cr(struct inet6_dev *idev)
 		skb = add_grec(skb, pmc, type, 0, 0, 1);
 		spin_unlock_bh(&pmc->mca_lock);
 	}
-	read_unlock_bh(&idev->lock);
 	if (skb)
 		mld_sendpack(skb);
 }
@@ -2132,7 +2102,9 @@ static void mld_dad_work(struct work_struct *work)
 					      struct inet6_dev,
 					      mc_dad_work);
 
+	rtnl_lock();
 	mld_send_initial_cr(idev);
+	rtnl_unlock();
 	if (idev->mc_dad_count) {
 		idev->mc_dad_count--;
 		if (idev->mc_dad_count)
@@ -2194,24 +2166,22 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca,
 
 	if (!idev)
 		return -ENODEV;
-	read_lock_bh(&idev->lock);
-	for (pmc = idev->mc_list; pmc; pmc = pmc->next) {
+
+	for_each_mc_rtnl(idev, pmc) {
 		if (ipv6_addr_equal(pmca, &pmc->mca_addr))
 			break;
 	}
-	if (!pmc) {
-		/* MCA not found?? bug */
-		read_unlock_bh(&idev->lock);
+	if (!pmc)
 		return -ESRCH;
-	}
 	spin_lock_bh(&pmc->mca_lock);
+
 	sf_markstate(pmc);
 	if (!delta) {
 		if (!pmc->mca_sfcount[sfmode]) {
 			spin_unlock_bh(&pmc->mca_lock);
-			read_unlock_bh(&idev->lock);
 			return -EINVAL;
 		}
+
 		pmc->mca_sfcount[sfmode]--;
 	}
 	err = 0;
@@ -2237,7 +2207,6 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca,
 	} else if (sf_setstate(pmc) || changerec)
 		mld_ifc_event(pmc->idev);
 	spin_unlock_bh(&pmc->mca_lock);
-	read_unlock_bh(&idev->lock);
 	return err;
 }
 
@@ -2363,16 +2332,13 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
 
 	if (!idev)
 		return -ENODEV;
-	read_lock_bh(&idev->lock);
-	for (pmc = idev->mc_list; pmc; pmc = pmc->next) {
+
+	for_each_mc_rtnl(idev, pmc) {
 		if (ipv6_addr_equal(pmca, &pmc->mca_addr))
 			break;
 	}
-	if (!pmc) {
-		/* MCA not found?? bug */
-		read_unlock_bh(&idev->lock);
+	if (!pmc)
 		return -ESRCH;
-	}
 	spin_lock_bh(&pmc->mca_lock);
 
 	sf_markstate(pmc);
@@ -2407,10 +2373,10 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
 		for_each_psf_rtnl(pmc, psf)
 			psf->sf_crcount = 0;
 		mld_ifc_event(idev);
-	} else if (sf_setstate(pmc))
+	} else if (sf_setstate(pmc)) {
 		mld_ifc_event(idev);
+	}
 	spin_unlock_bh(&pmc->mca_lock);
-	read_unlock_bh(&idev->lock);
 	return err;
 }
 
@@ -2485,9 +2451,10 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
 static void igmp6_leave_group(struct ifmcaddr6 *ma)
 {
 	if (mld_in_v1_mode(ma->idev)) {
-		if (ma->mca_flags & MAF_LAST_REPORTER)
+		if (ma->mca_flags & MAF_LAST_REPORTER) {
 			igmp6_send(&ma->mca_addr, ma->idev->dev,
 				ICMPV6_MGM_REDUCTION);
+		}
 	} else {
 		mld_add_delrec(ma->idev, ma);
 		mld_ifc_event(ma->idev);
@@ -2500,8 +2467,12 @@ static void mld_gq_work(struct work_struct *work)
 					      struct inet6_dev,
 					      mc_gq_work);
 
-	idev->mc_gq_running = 0;
+	rtnl_lock();
 	mld_send_report(idev, NULL);
+	rtnl_unlock();
+
+	idev->mc_gq_running = 0;
+
 	in6_dev_put(idev);
 }
 
@@ -2511,7 +2482,10 @@ static void mld_ifc_work(struct work_struct *work)
 					      struct inet6_dev,
 					      mc_ifc_work);
 
+	rtnl_lock();
 	mld_send_cr(idev);
+	rtnl_unlock();
+
 	if (idev->mc_ifc_count) {
 		idev->mc_ifc_count--;
 		if (idev->mc_ifc_count)
@@ -2525,6 +2499,7 @@ static void mld_ifc_event(struct inet6_dev *idev)
 {
 	if (mld_in_v1_mode(idev))
 		return;
+
 	idev->mc_ifc_count = idev->mc_qrv;
 	mld_ifc_start_work(idev, 1);
 }
@@ -2534,10 +2509,12 @@ static void mld_mca_work(struct work_struct *work)
 	struct ifmcaddr6 *ma = container_of(to_delayed_work(work),
 					    struct ifmcaddr6, mca_work);
 
+	rtnl_lock();
 	if (mld_in_v1_mode(ma->idev))
 		igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
 	else
 		mld_send_report(ma->idev, ma);
+	rtnl_unlock();
 
 	spin_lock_bh(&ma->mca_lock);
 	ma->mca_flags |=  MAF_LAST_REPORTER;
@@ -2554,10 +2531,8 @@ void ipv6_mc_unmap(struct inet6_dev *idev)
 
 	/* Install multicast list, except for all-nodes (already installed) */
 
-	read_lock_bh(&idev->lock);
-	for (i = idev->mc_list; i; i = i->next)
+	for_each_mc_rtnl(idev, i)
 		igmp6_group_dropped(i);
-	read_unlock_bh(&idev->lock);
 }
 
 void ipv6_mc_remap(struct inet6_dev *idev)
@@ -2572,10 +2547,7 @@ void ipv6_mc_down(struct inet6_dev *idev)
 	struct ifmcaddr6 *i;
 
 	/* Withdraw multicast list */
-
-	read_lock_bh(&idev->lock);
-
-	for (i = idev->mc_list; i; i = i->next)
+	for_each_mc_rtnl(idev, i)
 		igmp6_group_dropped(i);
 
 	/* Should stop work after group drop. or we will
@@ -2584,7 +2556,6 @@ void ipv6_mc_down(struct inet6_dev *idev)
 	mld_ifc_stop_work(idev);
 	mld_gq_stop_work(idev);
 	mld_dad_stop_work(idev);
-	read_unlock_bh(&idev->lock);
 }
 
 static void ipv6_mc_reset(struct inet6_dev *idev)
@@ -2604,28 +2575,24 @@ void ipv6_mc_up(struct inet6_dev *idev)
 
 	/* Install multicast list, except for all-nodes (already installed) */
 
-	read_lock_bh(&idev->lock);
 	ipv6_mc_reset(idev);
-	for (i = idev->mc_list; i; i = i->next) {
+	for_each_mc_rtnl(idev, i) {
 		mld_del_delrec(idev, i);
 		igmp6_group_added(i);
 	}
-	read_unlock_bh(&idev->lock);
 }
 
 /* IPv6 device initialization. */
 
 void ipv6_mc_init_dev(struct inet6_dev *idev)
 {
-	write_lock_bh(&idev->lock);
 	idev->mc_gq_running = 0;
 	INIT_DELAYED_WORK(&idev->mc_gq_work, mld_gq_work);
-	idev->mc_tomb = NULL;
+	RCU_INIT_POINTER(idev->mc_tomb, NULL);
 	idev->mc_ifc_count = 0;
 	INIT_DELAYED_WORK(&idev->mc_ifc_work, mld_ifc_work);
 	INIT_DELAYED_WORK(&idev->mc_dad_work, mld_dad_work);
 	ipv6_mc_reset(idev);
-	write_unlock_bh(&idev->lock);
 }
 
 /*
@@ -2650,16 +2617,12 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev)
 	if (idev->cnf.forwarding)
 		__ipv6_dev_mc_dec(idev, &in6addr_linklocal_allrouters);
 
-	write_lock_bh(&idev->lock);
-	while ((i = idev->mc_list) != NULL) {
-		idev->mc_list = i->next;
+	while ((i = rtnl_dereference(idev->mc_list))) {
+		rcu_assign_pointer(idev->mc_list, rtnl_dereference(i->next));
 
-		write_unlock_bh(&idev->lock);
 		ip6_mc_clear_src(i);
 		ma_put(i);
-		write_lock_bh(&idev->lock);
 	}
-	write_unlock_bh(&idev->lock);
 }
 
 static void ipv6_mc_rejoin_groups(struct inet6_dev *idev)
@@ -2669,12 +2632,11 @@ static void ipv6_mc_rejoin_groups(struct inet6_dev *idev)
 	ASSERT_RTNL();
 
 	if (mld_in_v1_mode(idev)) {
-		read_lock_bh(&idev->lock);
-		for (pmc = idev->mc_list; pmc; pmc = pmc->next)
+		for_each_mc_rtnl(idev, pmc)
 			igmp6_join_group(pmc);
-		read_unlock_bh(&idev->lock);
-	} else
+	} else {
 		mld_send_report(idev, NULL);
+	}
 }
 
 static int ipv6_mc_netdev_event(struct notifier_block *this,
@@ -2721,13 +2683,12 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq)
 		idev = __in6_dev_get(state->dev);
 		if (!idev)
 			continue;
-		read_lock_bh(&idev->lock);
-		im = idev->mc_list;
+
+		im = rcu_dereference(idev->mc_list);
 		if (im) {
 			state->idev = idev;
 			break;
 		}
-		read_unlock_bh(&idev->lock);
 	}
 	return im;
 }
@@ -2736,11 +2697,8 @@ static struct ifmcaddr6 *igmp6_mc_get_next(struct seq_file *seq, struct ifmcaddr
 {
 	struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
 
-	im = im->next;
+	im = rcu_dereference(im->next);
 	while (!im) {
-		if (likely(state->idev))
-			read_unlock_bh(&state->idev->lock);
-
 		state->dev = next_net_device_rcu(state->dev);
 		if (!state->dev) {
 			state->idev = NULL;
@@ -2749,8 +2707,7 @@ static struct ifmcaddr6 *igmp6_mc_get_next(struct seq_file *seq, struct ifmcaddr
 		state->idev = __in6_dev_get(state->dev);
 		if (!state->idev)
 			continue;
-		read_lock_bh(&state->idev->lock);
-		im = state->idev->mc_list;
+		im = rcu_dereference(state->idev->mc_list);
 	}
 	return im;
 }
@@ -2784,10 +2741,8 @@ static void igmp6_mc_seq_stop(struct seq_file *seq, void *v)
 {
 	struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
 
-	if (likely(state->idev)) {
-		read_unlock_bh(&state->idev->lock);
+	if (likely(state->idev))
 		state->idev = NULL;
-	}
 	state->dev = NULL;
 	rcu_read_unlock();
 }
@@ -2802,7 +2757,7 @@ static int igmp6_mc_seq_show(struct seq_file *seq, void *v)
 		   state->dev->ifindex, state->dev->name,
 		   &im->mca_addr,
 		   im->mca_users, im->mca_flags,
-		   (im->mca_flags&MAF_TIMER_RUNNING) ?
+		   (im->mca_flags & MAF_TIMER_RUNNING) ?
 		   jiffies_to_clock_t(im->mca_work.timer.expires - jiffies) : 0);
 	return 0;
 }
@@ -2837,8 +2792,8 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq)
 		idev = __in6_dev_get(state->dev);
 		if (unlikely(idev == NULL))
 			continue;
-		read_lock_bh(&idev->lock);
-		im = idev->mc_list;
+
+		im = rcu_dereference(idev->mc_list);
 		if (likely(im)) {
 			spin_lock_bh(&im->mca_lock);
 			psf = rcu_dereference(im->mca_sources);
@@ -2849,7 +2804,6 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq)
 			}
 			spin_unlock_bh(&im->mca_lock);
 		}
-		read_unlock_bh(&idev->lock);
 	}
 	return psf;
 }
@@ -2861,11 +2815,8 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s
 	psf = rcu_dereference(psf->sf_next);
 	while (!psf) {
 		spin_unlock_bh(&state->im->mca_lock);
-		state->im = state->im->next;
+		state->im = rcu_dereference(state->im->next);
 		while (!state->im) {
-			if (likely(state->idev))
-				read_unlock_bh(&state->idev->lock);
-
 			state->dev = next_net_device_rcu(state->dev);
 			if (!state->dev) {
 				state->idev = NULL;
@@ -2874,8 +2825,7 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s
 			state->idev = __in6_dev_get(state->dev);
 			if (!state->idev)
 				continue;
-			read_lock_bh(&state->idev->lock);
-			state->im = state->idev->mc_list;
+			state->im = rcu_dereference(state->idev->mc_list);
 		}
 		if (!state->im)
 			break;
@@ -2917,14 +2867,14 @@ static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v)
 	__releases(RCU)
 {
 	struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
+
 	if (likely(state->im)) {
 		spin_unlock_bh(&state->im->mca_lock);
 		state->im = NULL;
 	}
-	if (likely(state->idev)) {
-		read_unlock_bh(&state->idev->lock);
+	if (likely(state->idev))
 		state->idev = NULL;
-	}
+
 	state->dev = NULL;
 	rcu_read_unlock();
 }
-- 
cgit v1.2.3


From f185de28d9ae6c978135993769352e523ee8df06 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Thu, 25 Mar 2021 16:16:56 +0000
Subject: mld: add new workqueues for process mld events

When query/report packets are received, mld module processes them.
But they are processed under BH context so it couldn't use sleepable
functions. So, in order to switch context, the two workqueues are
added which processes query and report event.

In the struct inet6_dev, mc_{query | report}_queue are added so it
is per-interface queue.
And mc_{query | report}_work are workqueue structure.

When the query or report event is received, skb is queued to proper
queue and worker function is scheduled immediately.
Workqueues and queues are protected by spinlock, which is
mc_{query | report}_lock, and worker functions are protected by RTNL.

Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/if_inet6.h |   9 +-
 include/net/mld.h      |   3 +
 net/ipv6/icmp.c        |   4 +-
 net/ipv6/mcast.c       | 280 ++++++++++++++++++++++++++++++++++---------------
 4 files changed, 210 insertions(+), 86 deletions(-)

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 521158e05c18..882e0f88756f 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -125,7 +125,6 @@ struct ifmcaddr6 {
 	unsigned int		mca_flags;
 	int			mca_users;
 	refcount_t		mca_refcnt;
-	spinlock_t		mca_lock;
 	unsigned long		mca_cstamp;
 	unsigned long		mca_tstamp;
 	struct rcu_head		rcu;
@@ -183,6 +182,14 @@ struct inet6_dev {
 	struct delayed_work	mc_gq_work;	/* general query work */
 	struct delayed_work	mc_ifc_work;	/* interface change work */
 	struct delayed_work	mc_dad_work;	/* dad complete mc work */
+	struct delayed_work	mc_query_work;	/* mld query work */
+	struct delayed_work	mc_report_work;	/* mld report work */
+
+	struct sk_buff_head	mc_query_queue;		/* mld query queue */
+	struct sk_buff_head	mc_report_queue;	/* mld report queue */
+
+	spinlock_t		mc_query_lock;	/* mld query queue lock */
+	spinlock_t		mc_report_lock;	/* mld query report lock */
 
 	struct ifacaddr6	*ac_list;
 	rwlock_t		lock;
diff --git a/include/net/mld.h b/include/net/mld.h
index 496bddb59942..c07359808493 100644
--- a/include/net/mld.h
+++ b/include/net/mld.h
@@ -92,6 +92,9 @@ struct mld2_query {
 #define MLD_EXP_MIN_LIMIT	32768UL
 #define MLDV1_MRD_MAX_COMPAT	(MLD_EXP_MIN_LIMIT - 1)
 
+#define MLD_MAX_QUEUE		8
+#define MLD_MAX_SKBS		32
+
 static inline unsigned long mldv2_mrc(const struct mld2_query *mlh2)
 {
 	/* RFC3810, 5.1.3. Maximum Response Code */
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index fd1f896115c1..29d38d6b55fb 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -944,11 +944,11 @@ static int icmpv6_rcv(struct sk_buff *skb)
 
 	case ICMPV6_MGM_QUERY:
 		igmp6_event_query(skb);
-		break;
+		return 0;
 
 	case ICMPV6_MGM_REPORT:
 		igmp6_event_report(skb);
-		break;
+		return 0;
 
 	case ICMPV6_MGM_REDUCTION:
 	case ICMPV6_NI_QUERY:
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 75541cf53153..3ad754388933 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -439,7 +439,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 
 		if (psl)
 			count += psl->sl_max;
-		newpsl = sock_kmalloc(sk, IP6_SFLSIZE(count), GFP_ATOMIC);
+		newpsl = sock_kmalloc(sk, IP6_SFLSIZE(count), GFP_KERNEL);
 		if (!newpsl) {
 			err = -ENOBUFS;
 			goto done;
@@ -517,7 +517,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
 	}
 	if (gsf->gf_numsrc) {
 		newpsl = sock_kmalloc(sk, IP6_SFLSIZE(gsf->gf_numsrc),
-							  GFP_ATOMIC);
+							  GFP_KERNEL);
 		if (!newpsl) {
 			err = -ENOBUFS;
 			goto done;
@@ -659,13 +659,11 @@ static void igmp6_group_added(struct ifmcaddr6 *mc)
 	    IPV6_ADDR_SCOPE_LINKLOCAL)
 		return;
 
-	spin_lock_bh(&mc->mca_lock);
 	if (!(mc->mca_flags&MAF_LOADED)) {
 		mc->mca_flags |= MAF_LOADED;
 		if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0)
 			dev_mc_add(dev, buf);
 	}
-	spin_unlock_bh(&mc->mca_lock);
 
 	if (!(dev->flags & IFF_UP) || (mc->mca_flags & MAF_NOREPORT))
 		return;
@@ -695,24 +693,20 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc)
 	    IPV6_ADDR_SCOPE_LINKLOCAL)
 		return;
 
-	spin_lock_bh(&mc->mca_lock);
 	if (mc->mca_flags&MAF_LOADED) {
 		mc->mca_flags &= ~MAF_LOADED;
 		if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0)
 			dev_mc_del(dev, buf);
 	}
 
-	spin_unlock_bh(&mc->mca_lock);
 	if (mc->mca_flags & MAF_NOREPORT)
 		return;
 
 	if (!mc->idev->dead)
 		igmp6_leave_group(mc);
 
-	spin_lock_bh(&mc->mca_lock);
 	if (cancel_delayed_work(&mc->mca_work))
 		refcount_dec(&mc->mca_refcnt);
-	spin_unlock_bh(&mc->mca_lock);
 }
 
 /*
@@ -728,12 +722,10 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 	 * for deleted items allows change reports to use common code with
 	 * non-deleted or query-response MCA's.
 	 */
-	pmc = kzalloc(sizeof(*pmc), GFP_ATOMIC);
+	pmc = kzalloc(sizeof(*pmc), GFP_KERNEL);
 	if (!pmc)
 		return;
 
-	spin_lock_bh(&im->mca_lock);
-	spin_lock_init(&pmc->mca_lock);
 	pmc->idev = im->idev;
 	in6_dev_hold(idev);
 	pmc->mca_addr = im->mca_addr;
@@ -752,7 +744,6 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 		for_each_psf_rtnl(pmc, psf)
 			psf->sf_crcount = pmc->mca_crcount;
 	}
-	spin_unlock_bh(&im->mca_lock);
 
 	rcu_assign_pointer(pmc->next, idev->mc_tomb);
 	rcu_assign_pointer(idev->mc_tomb, pmc);
@@ -777,7 +768,6 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 			rcu_assign_pointer(idev->mc_tomb, pmc->next);
 	}
 
-	spin_lock_bh(&im->mca_lock);
 	if (pmc) {
 		im->idev = pmc->idev;
 		if (im->mca_sfmode == MCAST_INCLUDE) {
@@ -799,7 +789,6 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 		ip6_mc_clear_src(pmc);
 		kfree_rcu(pmc, rcu);
 	}
-	spin_unlock_bh(&im->mca_lock);
 }
 
 static void mld_clear_delrec(struct inet6_dev *idev)
@@ -820,10 +809,8 @@ static void mld_clear_delrec(struct inet6_dev *idev)
 	for_each_mc_rtnl(idev, pmc) {
 		struct ip6_sf_list *psf, *psf_next;
 
-		spin_lock_bh(&pmc->mca_lock);
 		psf = rtnl_dereference(pmc->mca_tomb);
 		RCU_INIT_POINTER(pmc->mca_tomb, NULL);
-		spin_unlock_bh(&pmc->mca_lock);
 		for (; psf; psf = psf_next) {
 			psf_next = rtnl_dereference(psf->sf_next);
 			kfree_rcu(psf, rcu);
@@ -831,6 +818,26 @@ static void mld_clear_delrec(struct inet6_dev *idev)
 	}
 }
 
+static void mld_clear_query(struct inet6_dev *idev)
+{
+	struct sk_buff *skb;
+
+	spin_lock_bh(&idev->mc_query_lock);
+	while ((skb = __skb_dequeue(&idev->mc_query_queue)))
+		kfree_skb(skb);
+	spin_unlock_bh(&idev->mc_query_lock);
+}
+
+static void mld_clear_report(struct inet6_dev *idev)
+{
+	struct sk_buff *skb;
+
+	spin_lock_bh(&idev->mc_report_lock);
+	while ((skb = __skb_dequeue(&idev->mc_report_queue)))
+		kfree_skb(skb);
+	spin_unlock_bh(&idev->mc_report_lock);
+}
+
 static void mca_get(struct ifmcaddr6 *mc)
 {
 	refcount_inc(&mc->mca_refcnt);
@@ -850,7 +857,7 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
 {
 	struct ifmcaddr6 *mc;
 
-	mc = kzalloc(sizeof(*mc), GFP_ATOMIC);
+	mc = kzalloc(sizeof(*mc), GFP_KERNEL);
 	if (!mc)
 		return NULL;
 
@@ -862,7 +869,6 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
 	/* mca_stamp should be updated upon changes */
 	mc->mca_cstamp = mc->mca_tstamp = jiffies;
 	refcount_set(&mc->mca_refcnt, 1);
-	spin_lock_init(&mc->mca_lock);
 
 	mc->mca_sfmode = mode;
 	mc->mca_sfcount[mode] = 1;
@@ -995,7 +1001,6 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
 			if (src_addr && !ipv6_addr_any(src_addr)) {
 				struct ip6_sf_list *psf;
 
-				spin_lock_bh(&mc->mca_lock);
 				for_each_psf_rcu(mc, psf) {
 					if (ipv6_addr_equal(&psf->sf_addr, src_addr))
 						break;
@@ -1006,7 +1011,6 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
 						mc->mca_sfcount[MCAST_EXCLUDE];
 				else
 					rv = mc->mca_sfcount[MCAST_EXCLUDE] != 0;
-				spin_unlock_bh(&mc->mca_lock);
 			} else
 				rv = true; /* don't filter unspecified source */
 		}
@@ -1060,6 +1064,20 @@ static void mld_dad_stop_work(struct inet6_dev *idev)
 		__in6_dev_put(idev);
 }
 
+static void mld_query_stop_work(struct inet6_dev *idev)
+{
+	spin_lock_bh(&idev->mc_query_lock);
+	if (cancel_delayed_work(&idev->mc_query_work))
+		__in6_dev_put(idev);
+	spin_unlock_bh(&idev->mc_query_lock);
+}
+
+static void mld_report_stop_work(struct inet6_dev *idev)
+{
+	if (cancel_delayed_work_sync(&idev->mc_report_work))
+		__in6_dev_put(idev);
+}
+
 /*
  * IGMP handling (alias multicast ICMPv6 messages)
  */
@@ -1093,7 +1111,7 @@ static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
 	int i, scount;
 
 	scount = 0;
-	for_each_psf_rcu(pmc, psf) {
+	for_each_psf_rtnl(pmc, psf) {
 		if (scount == nsrcs)
 			break;
 		for (i = 0; i < nsrcs; i++) {
@@ -1126,7 +1144,7 @@ static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
 	/* mark INCLUDE-mode sources */
 
 	scount = 0;
-	for_each_psf_rcu(pmc, psf) {
+	for_each_psf_rtnl(pmc, psf) {
 		if (scount == nsrcs)
 			break;
 		for (i = 0; i < nsrcs; i++) {
@@ -1317,19 +1335,42 @@ static int mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld,
 
 /* called with rcu_read_lock() */
 int igmp6_event_query(struct sk_buff *skb)
+{
+	struct inet6_dev *idev = __in6_dev_get(skb->dev);
+
+	if (!idev)
+		return -EINVAL;
+
+	if (idev->dead) {
+		kfree_skb(skb);
+		return -ENODEV;
+	}
+
+	spin_lock_bh(&idev->mc_query_lock);
+	if (skb_queue_len(&idev->mc_query_queue) < MLD_MAX_SKBS) {
+		__skb_queue_tail(&idev->mc_query_queue, skb);
+		if (!mod_delayed_work(mld_wq, &idev->mc_query_work, 0))
+			in6_dev_hold(idev);
+	}
+	spin_unlock_bh(&idev->mc_query_lock);
+
+	return 0;
+}
+
+static void __mld_query_work(struct sk_buff *skb)
 {
 	struct mld2_query *mlh2 = NULL;
-	struct ifmcaddr6 *ma;
 	const struct in6_addr *group;
 	unsigned long max_delay;
 	struct inet6_dev *idev;
+	struct ifmcaddr6 *ma;
 	struct mld_msg *mld;
 	int group_type;
 	int mark = 0;
 	int len, err;
 
 	if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
-		return -EINVAL;
+		goto out;
 
 	/* compute payload length excluding extension headers */
 	len = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr);
@@ -1346,11 +1387,11 @@ int igmp6_event_query(struct sk_buff *skb)
 	    ipv6_hdr(skb)->hop_limit != 1 ||
 	    !(IP6CB(skb)->flags & IP6SKB_ROUTERALERT) ||
 	    IP6CB(skb)->ra != htons(IPV6_OPT_ROUTERALERT_MLD))
-		return -EINVAL;
+		goto out;
 
 	idev = __in6_dev_get(skb->dev);
 	if (!idev)
-		return 0;
+		goto out;
 
 	mld = (struct mld_msg *)icmp6_hdr(skb);
 	group = &mld->mld_mca;
@@ -1358,59 +1399,56 @@ int igmp6_event_query(struct sk_buff *skb)
 
 	if (group_type != IPV6_ADDR_ANY &&
 	    !(group_type&IPV6_ADDR_MULTICAST))
-		return -EINVAL;
+		goto out;
 
 	if (len < MLD_V1_QUERY_LEN) {
-		return -EINVAL;
+		goto out;
 	} else if (len == MLD_V1_QUERY_LEN || mld_in_v1_mode(idev)) {
 		err = mld_process_v1(idev, mld, &max_delay,
 				     len == MLD_V1_QUERY_LEN);
 		if (err < 0)
-			return err;
+			goto out;
 	} else if (len >= MLD_V2_QUERY_LEN_MIN) {
 		int srcs_offset = sizeof(struct mld2_query) -
 				  sizeof(struct icmp6hdr);
 
 		if (!pskb_may_pull(skb, srcs_offset))
-			return -EINVAL;
+			goto out;
 
 		mlh2 = (struct mld2_query *)skb_transport_header(skb);
 
 		err = mld_process_v2(idev, mlh2, &max_delay);
 		if (err < 0)
-			return err;
+			goto out;
 
 		if (group_type == IPV6_ADDR_ANY) { /* general query */
 			if (mlh2->mld2q_nsrcs)
-				return -EINVAL; /* no sources allowed */
+				goto out; /* no sources allowed */
 
 			mld_gq_start_work(idev);
-			return 0;
+			goto out;
 		}
 		/* mark sources to include, if group & source-specific */
 		if (mlh2->mld2q_nsrcs != 0) {
 			if (!pskb_may_pull(skb, srcs_offset +
 			    ntohs(mlh2->mld2q_nsrcs) * sizeof(struct in6_addr)))
-				return -EINVAL;
+				goto out;
 
 			mlh2 = (struct mld2_query *)skb_transport_header(skb);
 			mark = 1;
 		}
 	} else {
-		return -EINVAL;
+		goto out;
 	}
 
 	if (group_type == IPV6_ADDR_ANY) {
-		for_each_mc_rcu(idev, ma) {
-			spin_lock_bh(&ma->mca_lock);
+		for_each_mc_rtnl(idev, ma) {
 			igmp6_group_queried(ma, max_delay);
-			spin_unlock_bh(&ma->mca_lock);
 		}
 	} else {
-		for_each_mc_rcu(idev, ma) {
+		for_each_mc_rtnl(idev, ma) {
 			if (!ipv6_addr_equal(group, &ma->mca_addr))
 				continue;
-			spin_lock_bh(&ma->mca_lock);
 			if (ma->mca_flags & MAF_TIMER_RUNNING) {
 				/* gsquery <- gsquery && mark */
 				if (!mark)
@@ -1425,16 +1463,72 @@ int igmp6_event_query(struct sk_buff *skb)
 			if (!(ma->mca_flags & MAF_GSQUERY) ||
 			    mld_marksources(ma, ntohs(mlh2->mld2q_nsrcs), mlh2->mld2q_srcs))
 				igmp6_group_queried(ma, max_delay);
-			spin_unlock_bh(&ma->mca_lock);
 			break;
 		}
 	}
 
-	return 0;
+out:
+	consume_skb(skb);
+}
+
+static void mld_query_work(struct work_struct *work)
+{
+	struct inet6_dev *idev = container_of(to_delayed_work(work),
+					      struct inet6_dev,
+					      mc_query_work);
+	struct sk_buff_head q;
+	struct sk_buff *skb;
+	bool rework = false;
+	int cnt = 0;
+
+	skb_queue_head_init(&q);
+
+	spin_lock_bh(&idev->mc_query_lock);
+	while ((skb = __skb_dequeue(&idev->mc_query_queue))) {
+		__skb_queue_tail(&q, skb);
+
+		if (++cnt >= MLD_MAX_QUEUE) {
+			rework = true;
+			schedule_delayed_work(&idev->mc_query_work, 0);
+			break;
+		}
+	}
+	spin_unlock_bh(&idev->mc_query_lock);
+
+	rtnl_lock();
+	while ((skb = __skb_dequeue(&q)))
+		__mld_query_work(skb);
+	rtnl_unlock();
+
+	if (!rework)
+		in6_dev_put(idev);
 }
 
 /* called with rcu_read_lock() */
 int igmp6_event_report(struct sk_buff *skb)
+{
+	struct inet6_dev *idev = __in6_dev_get(skb->dev);
+
+	if (!idev)
+		return -EINVAL;
+
+	if (idev->dead) {
+		kfree_skb(skb);
+		return -ENODEV;
+	}
+
+	spin_lock_bh(&idev->mc_report_lock);
+	if (skb_queue_len(&idev->mc_report_queue) < MLD_MAX_SKBS) {
+		__skb_queue_tail(&idev->mc_report_queue, skb);
+		if (!mod_delayed_work(mld_wq, &idev->mc_report_work, 0))
+			in6_dev_hold(idev);
+	}
+	spin_unlock_bh(&idev->mc_report_lock);
+
+	return 0;
+}
+
+static void __mld_report_work(struct sk_buff *skb)
 {
 	struct ifmcaddr6 *ma;
 	struct inet6_dev *idev;
@@ -1443,15 +1537,15 @@ int igmp6_event_report(struct sk_buff *skb)
 
 	/* Our own report looped back. Ignore it. */
 	if (skb->pkt_type == PACKET_LOOPBACK)
-		return 0;
+		goto out;
 
 	/* send our report if the MC router may not have heard this report */
 	if (skb->pkt_type != PACKET_MULTICAST &&
 	    skb->pkt_type != PACKET_BROADCAST)
-		return 0;
+		goto out;
 
 	if (!pskb_may_pull(skb, sizeof(*mld) - sizeof(struct icmp6hdr)))
-		return -EINVAL;
+		goto out;
 
 	mld = (struct mld_msg *)icmp6_hdr(skb);
 
@@ -1459,28 +1553,60 @@ int igmp6_event_report(struct sk_buff *skb)
 	addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr);
 	if (addr_type != IPV6_ADDR_ANY &&
 	    !(addr_type&IPV6_ADDR_LINKLOCAL))
-		return -EINVAL;
+		goto out;
 
 	idev = __in6_dev_get(skb->dev);
 	if (!idev)
-		return -ENODEV;
+		goto out;
 
 	/*
 	 *	Cancel the work for this group
 	 */
 
-	for_each_mc_rcu(idev, ma) {
+	for_each_mc_rtnl(idev, ma) {
 		if (ipv6_addr_equal(&ma->mca_addr, &mld->mld_mca)) {
-			spin_lock(&ma->mca_lock);
 			if (cancel_delayed_work(&ma->mca_work))
 				refcount_dec(&ma->mca_refcnt);
 			ma->mca_flags &= ~(MAF_LAST_REPORTER |
 					   MAF_TIMER_RUNNING);
-			spin_unlock(&ma->mca_lock);
 			break;
 		}
 	}
-	return 0;
+
+out:
+	consume_skb(skb);
+}
+
+static void mld_report_work(struct work_struct *work)
+{
+	struct inet6_dev *idev = container_of(to_delayed_work(work),
+					      struct inet6_dev,
+					      mc_report_work);
+	struct sk_buff_head q;
+	struct sk_buff *skb;
+	bool rework = false;
+	int cnt = 0;
+
+	skb_queue_head_init(&q);
+	spin_lock_bh(&idev->mc_report_lock);
+	while ((skb = __skb_dequeue(&idev->mc_report_queue))) {
+		__skb_queue_tail(&q, skb);
+
+		if (++cnt >= MLD_MAX_QUEUE) {
+			rework = true;
+			schedule_delayed_work(&idev->mc_report_work, 0);
+			break;
+		}
+	}
+	spin_unlock_bh(&idev->mc_report_lock);
+
+	rtnl_lock();
+	while ((skb = __skb_dequeue(&q)))
+		__mld_report_work(skb);
+	rtnl_unlock();
+
+	if (!rework)
+		in6_dev_put(idev);
 }
 
 static bool is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type,
@@ -1847,22 +1973,18 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
 		for_each_mc_rtnl(idev, pmc) {
 			if (pmc->mca_flags & MAF_NOREPORT)
 				continue;
-			spin_lock_bh(&pmc->mca_lock);
 			if (pmc->mca_sfcount[MCAST_EXCLUDE])
 				type = MLD2_MODE_IS_EXCLUDE;
 			else
 				type = MLD2_MODE_IS_INCLUDE;
 			skb = add_grec(skb, pmc, type, 0, 0, 0);
-			spin_unlock_bh(&pmc->mca_lock);
 		}
 	} else {
-		spin_lock_bh(&pmc->mca_lock);
 		if (pmc->mca_sfcount[MCAST_EXCLUDE])
 			type = MLD2_MODE_IS_EXCLUDE;
 		else
 			type = MLD2_MODE_IS_INCLUDE;
 		skb = add_grec(skb, pmc, type, 0, 0, 0);
-		spin_unlock_bh(&pmc->mca_lock);
 	}
 	if (skb)
 		mld_sendpack(skb);
@@ -1938,7 +2060,6 @@ static void mld_send_cr(struct inet6_dev *idev)
 
 	/* change recs */
 	for_each_mc_rtnl(idev, pmc) {
-		spin_lock_bh(&pmc->mca_lock);
 		if (pmc->mca_sfcount[MCAST_EXCLUDE]) {
 			type = MLD2_BLOCK_OLD_SOURCES;
 			dtype = MLD2_ALLOW_NEW_SOURCES;
@@ -1958,7 +2079,6 @@ static void mld_send_cr(struct inet6_dev *idev)
 			skb = add_grec(skb, pmc, type, 0, 0, 0);
 			pmc->mca_crcount--;
 		}
-		spin_unlock_bh(&pmc->mca_lock);
 	}
 	if (!skb)
 		return;
@@ -2072,13 +2192,11 @@ static void mld_send_initial_cr(struct inet6_dev *idev)
 
 	skb = NULL;
 	for_each_mc_rtnl(idev, pmc) {
-		spin_lock_bh(&pmc->mca_lock);
 		if (pmc->mca_sfcount[MCAST_EXCLUDE])
 			type = MLD2_CHANGE_TO_EXCLUDE;
 		else
 			type = MLD2_ALLOW_NEW_SOURCES;
 		skb = add_grec(skb, pmc, type, 0, 0, 1);
-		spin_unlock_bh(&pmc->mca_lock);
 	}
 	if (skb)
 		mld_sendpack(skb);
@@ -2104,13 +2222,13 @@ static void mld_dad_work(struct work_struct *work)
 
 	rtnl_lock();
 	mld_send_initial_cr(idev);
-	rtnl_unlock();
 	if (idev->mc_dad_count) {
 		idev->mc_dad_count--;
 		if (idev->mc_dad_count)
 			mld_dad_start_work(idev,
 					   unsolicited_report_interval(idev));
 	}
+	rtnl_unlock();
 	in6_dev_put(idev);
 }
 
@@ -2173,12 +2291,10 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca,
 	}
 	if (!pmc)
 		return -ESRCH;
-	spin_lock_bh(&pmc->mca_lock);
 
 	sf_markstate(pmc);
 	if (!delta) {
 		if (!pmc->mca_sfcount[sfmode]) {
-			spin_unlock_bh(&pmc->mca_lock);
 			return -EINVAL;
 		}
 
@@ -2206,7 +2322,6 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca,
 		mld_ifc_event(pmc->idev);
 	} else if (sf_setstate(pmc) || changerec)
 		mld_ifc_event(pmc->idev);
-	spin_unlock_bh(&pmc->mca_lock);
 	return err;
 }
 
@@ -2225,7 +2340,7 @@ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode,
 		psf_prev = psf;
 	}
 	if (!psf) {
-		psf = kzalloc(sizeof(*psf), GFP_ATOMIC);
+		psf = kzalloc(sizeof(*psf), GFP_KERNEL);
 		if (!psf)
 			return -ENOBUFS;
 
@@ -2304,7 +2419,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc)
 				    &psf->sf_addr))
 					break;
 			if (!dpsf) {
-				dpsf = kmalloc(sizeof(*dpsf), GFP_ATOMIC);
+				dpsf = kmalloc(sizeof(*dpsf), GFP_KERNEL);
 				if (!dpsf)
 					continue;
 				*dpsf = *psf;
@@ -2339,7 +2454,6 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
 	}
 	if (!pmc)
 		return -ESRCH;
-	spin_lock_bh(&pmc->mca_lock);
 
 	sf_markstate(pmc);
 	isexclude = pmc->mca_sfmode == MCAST_EXCLUDE;
@@ -2376,7 +2490,6 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
 	} else if (sf_setstate(pmc)) {
 		mld_ifc_event(idev);
 	}
-	spin_unlock_bh(&pmc->mca_lock);
 	return err;
 }
 
@@ -2415,7 +2528,6 @@ static void igmp6_join_group(struct ifmcaddr6 *ma)
 
 	delay = prandom_u32() % unsolicited_report_interval(ma->idev);
 
-	spin_lock_bh(&ma->mca_lock);
 	if (cancel_delayed_work(&ma->mca_work)) {
 		refcount_dec(&ma->mca_refcnt);
 		delay = ma->mca_work.timer.expires - jiffies;
@@ -2424,7 +2536,6 @@ static void igmp6_join_group(struct ifmcaddr6 *ma)
 	if (!mod_delayed_work(mld_wq, &ma->mca_work, delay))
 		refcount_inc(&ma->mca_refcnt);
 	ma->mca_flags |= MAF_TIMER_RUNNING | MAF_LAST_REPORTER;
-	spin_unlock_bh(&ma->mca_lock);
 }
 
 static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
@@ -2469,9 +2580,8 @@ static void mld_gq_work(struct work_struct *work)
 
 	rtnl_lock();
 	mld_send_report(idev, NULL);
-	rtnl_unlock();
-
 	idev->mc_gq_running = 0;
+	rtnl_unlock();
 
 	in6_dev_put(idev);
 }
@@ -2484,7 +2594,6 @@ static void mld_ifc_work(struct work_struct *work)
 
 	rtnl_lock();
 	mld_send_cr(idev);
-	rtnl_unlock();
 
 	if (idev->mc_ifc_count) {
 		idev->mc_ifc_count--;
@@ -2492,6 +2601,7 @@ static void mld_ifc_work(struct work_struct *work)
 			mld_ifc_start_work(idev,
 					   unsolicited_report_interval(idev));
 	}
+	rtnl_unlock();
 	in6_dev_put(idev);
 }
 
@@ -2514,12 +2624,10 @@ static void mld_mca_work(struct work_struct *work)
 		igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
 	else
 		mld_send_report(ma->idev, ma);
-	rtnl_unlock();
-
-	spin_lock_bh(&ma->mca_lock);
 	ma->mca_flags |=  MAF_LAST_REPORTER;
 	ma->mca_flags &= ~MAF_TIMER_RUNNING;
-	spin_unlock_bh(&ma->mca_lock);
+	rtnl_unlock();
+
 	ma_put(ma);
 }
 
@@ -2553,6 +2661,9 @@ void ipv6_mc_down(struct inet6_dev *idev)
 	/* Should stop work after group drop. or we will
 	 * start work again in mld_ifc_event()
 	 */
+	synchronize_net();
+	mld_query_stop_work(idev);
+	mld_report_stop_work(idev);
 	mld_ifc_stop_work(idev);
 	mld_gq_stop_work(idev);
 	mld_dad_stop_work(idev);
@@ -2592,6 +2703,12 @@ void ipv6_mc_init_dev(struct inet6_dev *idev)
 	idev->mc_ifc_count = 0;
 	INIT_DELAYED_WORK(&idev->mc_ifc_work, mld_ifc_work);
 	INIT_DELAYED_WORK(&idev->mc_dad_work, mld_dad_work);
+	INIT_DELAYED_WORK(&idev->mc_query_work, mld_query_work);
+	INIT_DELAYED_WORK(&idev->mc_report_work, mld_report_work);
+	skb_queue_head_init(&idev->mc_query_queue);
+	skb_queue_head_init(&idev->mc_report_queue);
+	spin_lock_init(&idev->mc_query_lock);
+	spin_lock_init(&idev->mc_report_lock);
 	ipv6_mc_reset(idev);
 }
 
@@ -2606,6 +2723,8 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev)
 	/* Deactivate works */
 	ipv6_mc_down(idev);
 	mld_clear_delrec(idev);
+	mld_clear_query(idev);
+	mld_clear_report(idev);
 
 	/* Delete all-nodes address. */
 	/* We cannot call ipv6_dev_mc_dec() directly, our caller in
@@ -2795,14 +2914,12 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq)
 
 		im = rcu_dereference(idev->mc_list);
 		if (likely(im)) {
-			spin_lock_bh(&im->mca_lock);
 			psf = rcu_dereference(im->mca_sources);
 			if (likely(psf)) {
 				state->im = im;
 				state->idev = idev;
 				break;
 			}
-			spin_unlock_bh(&im->mca_lock);
 		}
 	}
 	return psf;
@@ -2814,7 +2931,6 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s
 
 	psf = rcu_dereference(psf->sf_next);
 	while (!psf) {
-		spin_unlock_bh(&state->im->mca_lock);
 		state->im = rcu_dereference(state->im->next);
 		while (!state->im) {
 			state->dev = next_net_device_rcu(state->dev);
@@ -2829,7 +2945,6 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s
 		}
 		if (!state->im)
 			break;
-		spin_lock_bh(&state->im->mca_lock);
 		psf = rcu_dereference(state->im->mca_sources);
 	}
 out:
@@ -2868,10 +2983,8 @@ static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v)
 {
 	struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
 
-	if (likely(state->im)) {
-		spin_unlock_bh(&state->im->mca_lock);
+	if (likely(state->im))
 		state->im = NULL;
-	}
 	if (likely(state->idev))
 		state->idev = NULL;
 
@@ -2955,6 +3068,7 @@ static int __net_init igmp6_net_init(struct net *net)
 	}
 
 	inet6_sk(net->ipv6.igmp_sk)->hop_limit = 1;
+	net->ipv6.igmp_sk->sk_allocation = GFP_KERNEL;
 
 	err = inet_ctl_sock_create(&net->ipv6.mc_autojoin_sk, PF_INET6,
 				   SOCK_RAW, IPPROTO_ICMPV6, net);
-- 
cgit v1.2.3


From 63ed8de4be81b699ca727e9f8e3344bd487806d7 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Thu, 25 Mar 2021 16:16:57 +0000
Subject: mld: add mc_lock for protecting per-interface mld data

The purpose of this lock is to avoid a bottleneck in the query/report
event handler logic.

By previous patches, almost all mld data is protected by RTNL.
So, the query and report event handler, which is data path logic
acquires RTNL too. Therefore if a lot of query and report events
are received, it uses RTNL for a long time.
So it makes the control-plane bottleneck because of using RTNL.
In order to avoid this bottleneck, mc_lock is added.

mc_lock protect only per-interface mld data and per-interface mld
data is used in the query/report event handler logic.
So, no longer rtnl_lock is needed in the query/report event handler logic.
Therefore bottleneck will be disappeared by mc_lock.

Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/if_inet6.h |   1 +
 net/ipv6/mcast.c       | 309 ++++++++++++++++++++++++++++++-------------------
 2 files changed, 194 insertions(+), 116 deletions(-)

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 882e0f88756f..71bb4cc4d05d 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -190,6 +190,7 @@ struct inet6_dev {
 
 	spinlock_t		mc_query_lock;	/* mld query queue lock */
 	spinlock_t		mc_report_lock;	/* mld query report lock */
+	struct mutex		mc_lock;	/* mld global lock */
 
 	struct ifacaddr6	*ac_list;
 	rwlock_t		lock;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 3ad754388933..49b0cebfdcdc 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -111,6 +111,8 @@ int sysctl_mld_qrv __read_mostly = MLD_QRV_DEFAULT;
 /*
  *	socket join on multicast group
  */
+#define mc_dereference(e, idev) \
+	rcu_dereference_protected(e, lockdep_is_held(&(idev)->mc_lock))
 
 #define for_each_pmc_rtnl(np, pmc)				\
 	for (pmc = rtnl_dereference((np)->ipv6_mc_list);	\
@@ -122,10 +124,10 @@ int sysctl_mld_qrv __read_mostly = MLD_QRV_DEFAULT;
 	     pmc;						\
 	     pmc = rcu_dereference(pmc->next))
 
-#define for_each_psf_rtnl(mc, psf)				\
-	for (psf = rtnl_dereference((mc)->mca_sources);		\
+#define for_each_psf_mclock(mc, psf)				\
+	for (psf = mc_dereference((mc)->mca_sources, mc->idev);	\
 	     psf;						\
-	     psf = rtnl_dereference(psf->sf_next))
+	     psf = mc_dereference(psf->sf_next, mc->idev))
 
 #define for_each_psf_rcu(mc, psf)				\
 	for (psf = rcu_dereference((mc)->mca_sources);		\
@@ -133,14 +135,14 @@ int sysctl_mld_qrv __read_mostly = MLD_QRV_DEFAULT;
 	     psf = rcu_dereference(psf->sf_next))
 
 #define for_each_psf_tomb(mc, psf)				\
-	for (psf = rtnl_dereference((mc)->mca_tomb);		\
+	for (psf = mc_dereference((mc)->mca_tomb, mc->idev);	\
 	     psf;						\
-	     psf = rtnl_dereference(psf->sf_next))
+	     psf = mc_dereference(psf->sf_next, mc->idev))
 
-#define for_each_mc_rtnl(idev, mc)				\
-	for (mc = rtnl_dereference((idev)->mc_list);		\
+#define for_each_mc_mclock(idev, mc)				\
+	for (mc = mc_dereference((idev)->mc_list, idev);	\
 	     mc;						\
-	     mc = rtnl_dereference(mc->next))
+	     mc = mc_dereference(mc->next, idev))
 
 #define for_each_mc_rcu(idev, mc)				\
 	for (mc = rcu_dereference((idev)->mc_list);             \
@@ -148,9 +150,9 @@ int sysctl_mld_qrv __read_mostly = MLD_QRV_DEFAULT;
 	     mc = rcu_dereference(mc->next))
 
 #define for_each_mc_tomb(idev, mc)				\
-	for (mc = rtnl_dereference((idev)->mc_tomb);		\
+	for (mc = mc_dereference((idev)->mc_tomb, idev);	\
 	     mc;						\
-	     mc = rtnl_dereference(mc->next))
+	     mc = mc_dereference(mc->next, idev))
 
 static int unsolicited_report_interval(struct inet6_dev *idev)
 {
@@ -268,11 +270,12 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 			if (dev) {
 				struct inet6_dev *idev = __in6_dev_get(dev);
 
-				(void) ip6_mc_leave_src(sk, mc_lst, idev);
+				ip6_mc_leave_src(sk, mc_lst, idev);
 				if (idev)
 					__ipv6_dev_mc_dec(idev, &mc_lst->addr);
-			} else
-				(void) ip6_mc_leave_src(sk, mc_lst, NULL);
+			} else {
+				ip6_mc_leave_src(sk, mc_lst, NULL);
+			}
 
 			atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
 			kfree_rcu(mc_lst, rcu);
@@ -329,11 +332,12 @@ void __ipv6_sock_mc_close(struct sock *sk)
 		if (dev) {
 			struct inet6_dev *idev = __in6_dev_get(dev);
 
-			(void) ip6_mc_leave_src(sk, mc_lst, idev);
+			ip6_mc_leave_src(sk, mc_lst, idev);
 			if (idev)
 				__ipv6_dev_mc_dec(idev, &mc_lst->addr);
-		} else
-			(void) ip6_mc_leave_src(sk, mc_lst, NULL);
+		} else {
+			ip6_mc_leave_src(sk, mc_lst, NULL);
+		}
 
 		atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
 		kfree_rcu(mc_lst, rcu);
@@ -376,6 +380,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 
 	err = -EADDRNOTAVAIL;
 
+	mutex_lock(&idev->mc_lock);
 	for_each_pmc_rtnl(inet6, pmc) {
 		if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface)
 			continue;
@@ -469,6 +474,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 	/* update the interface list */
 	ip6_mc_add_src(idev, group, omode, 1, source, 1);
 done:
+	mutex_unlock(&idev->mc_lock);
 	if (leavegroup)
 		err = ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group);
 	return err;
@@ -529,25 +535,33 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
 			psin6 = (struct sockaddr_in6 *)list;
 			newpsl->sl_addr[i] = psin6->sin6_addr;
 		}
+		mutex_lock(&idev->mc_lock);
 		err = ip6_mc_add_src(idev, group, gsf->gf_fmode,
-			newpsl->sl_count, newpsl->sl_addr, 0);
+				     newpsl->sl_count, newpsl->sl_addr, 0);
 		if (err) {
+			mutex_unlock(&idev->mc_lock);
 			sock_kfree_s(sk, newpsl, IP6_SFLSIZE(newpsl->sl_max));
 			goto done;
 		}
+		mutex_unlock(&idev->mc_lock);
 	} else {
 		newpsl = NULL;
-		(void) ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0);
+		mutex_lock(&idev->mc_lock);
+		ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0);
+		mutex_unlock(&idev->mc_lock);
 	}
 
+	mutex_lock(&idev->mc_lock);
 	psl = rtnl_dereference(pmc->sflist);
 	if (psl) {
-		(void) ip6_mc_del_src(idev, group, pmc->sfmode,
-			psl->sl_count, psl->sl_addr, 0);
+		ip6_mc_del_src(idev, group, pmc->sfmode,
+			       psl->sl_count, psl->sl_addr, 0);
 		atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
 		kfree_rcu(psl, rcu);
-	} else
-		(void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
+	} else {
+		ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
+	}
+	mutex_unlock(&idev->mc_lock);
 	rcu_assign_pointer(pmc->sflist, newpsl);
 	pmc->sfmode = gsf->gf_fmode;
 	err = 0;
@@ -650,6 +664,7 @@ bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
 	return rv;
 }
 
+/* called with mc_lock */
 static void igmp6_group_added(struct ifmcaddr6 *mc)
 {
 	struct net_device *dev = mc->idev->dev;
@@ -684,6 +699,7 @@ static void igmp6_group_added(struct ifmcaddr6 *mc)
 	mld_ifc_event(mc->idev);
 }
 
+/* called with mc_lock */
 static void igmp6_group_dropped(struct ifmcaddr6 *mc)
 {
 	struct net_device *dev = mc->idev->dev;
@@ -711,6 +727,7 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc)
 
 /*
  * deleted ifmcaddr6 manipulation
+ * called with mc_lock
  */
 static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 {
@@ -735,13 +752,13 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 		struct ip6_sf_list *psf;
 
 		rcu_assign_pointer(pmc->mca_tomb,
-				   rtnl_dereference(im->mca_tomb));
+				   mc_dereference(im->mca_tomb, idev));
 		rcu_assign_pointer(pmc->mca_sources,
-				   rtnl_dereference(im->mca_sources));
+				   mc_dereference(im->mca_sources, idev));
 		RCU_INIT_POINTER(im->mca_tomb, NULL);
 		RCU_INIT_POINTER(im->mca_sources, NULL);
 
-		for_each_psf_rtnl(pmc, psf)
+		for_each_psf_mclock(pmc, psf)
 			psf->sf_crcount = pmc->mca_crcount;
 	}
 
@@ -749,6 +766,7 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 	rcu_assign_pointer(idev->mc_tomb, pmc);
 }
 
+/* called with mc_lock */
 static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 {
 	struct ip6_sf_list *psf, *sources, *tomb;
@@ -772,15 +790,15 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 		im->idev = pmc->idev;
 		if (im->mca_sfmode == MCAST_INCLUDE) {
 			tomb = rcu_replace_pointer(im->mca_tomb,
-						   rtnl_dereference(pmc->mca_tomb),
-						   lockdep_rtnl_is_held());
+						   mc_dereference(pmc->mca_tomb, pmc->idev),
+						   lockdep_is_held(&im->idev->mc_lock));
 			rcu_assign_pointer(pmc->mca_tomb, tomb);
 
 			sources = rcu_replace_pointer(im->mca_sources,
-						      rtnl_dereference(pmc->mca_sources),
-						      lockdep_rtnl_is_held());
+						      mc_dereference(pmc->mca_sources, pmc->idev),
+						      lockdep_is_held(&im->idev->mc_lock));
 			rcu_assign_pointer(pmc->mca_sources, sources);
-			for_each_psf_rtnl(im, psf)
+			for_each_psf_mclock(im, psf)
 				psf->sf_crcount = idev->mc_qrv;
 		} else {
 			im->mca_crcount = idev->mc_qrv;
@@ -791,28 +809,29 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 	}
 }
 
+/* called with mc_lock */
 static void mld_clear_delrec(struct inet6_dev *idev)
 {
 	struct ifmcaddr6 *pmc, *nextpmc;
 
-	pmc = rtnl_dereference(idev->mc_tomb);
+	pmc = mc_dereference(idev->mc_tomb, idev);
 	RCU_INIT_POINTER(idev->mc_tomb, NULL);
 
 	for (; pmc; pmc = nextpmc) {
-		nextpmc = rtnl_dereference(pmc->next);
+		nextpmc = mc_dereference(pmc->next, idev);
 		ip6_mc_clear_src(pmc);
 		in6_dev_put(pmc->idev);
 		kfree_rcu(pmc, rcu);
 	}
 
 	/* clear dead sources, too */
-	for_each_mc_rtnl(idev, pmc) {
+	for_each_mc_mclock(idev, pmc) {
 		struct ip6_sf_list *psf, *psf_next;
 
-		psf = rtnl_dereference(pmc->mca_tomb);
+		psf = mc_dereference(pmc->mca_tomb, idev);
 		RCU_INIT_POINTER(pmc->mca_tomb, NULL);
 		for (; psf; psf = psf_next) {
-			psf_next = rtnl_dereference(psf->sf_next);
+			psf_next = mc_dereference(psf->sf_next, idev);
 			kfree_rcu(psf, rcu);
 		}
 	}
@@ -851,6 +870,7 @@ static void ma_put(struct ifmcaddr6 *mc)
 	}
 }
 
+/* called with mc_lock */
 static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
 				   const struct in6_addr *addr,
 				   unsigned int mode)
@@ -902,10 +922,12 @@ static int __ipv6_dev_mc_inc(struct net_device *dev,
 		return -ENODEV;
 	}
 
-	for_each_mc_rtnl(idev, mc) {
+	mutex_lock(&idev->mc_lock);
+	for_each_mc_mclock(idev, mc) {
 		if (ipv6_addr_equal(&mc->mca_addr, addr)) {
 			mc->mca_users++;
 			ip6_mc_add_src(idev, &mc->mca_addr, mode, 0, NULL, 0);
+			mutex_unlock(&idev->mc_lock);
 			in6_dev_put(idev);
 			return 0;
 		}
@@ -913,6 +935,7 @@ static int __ipv6_dev_mc_inc(struct net_device *dev,
 
 	mc = mca_alloc(idev, addr, mode);
 	if (!mc) {
+		mutex_unlock(&idev->mc_lock);
 		in6_dev_put(idev);
 		return -ENOMEM;
 	}
@@ -924,6 +947,7 @@ static int __ipv6_dev_mc_inc(struct net_device *dev,
 
 	mld_del_delrec(idev, mc);
 	igmp6_group_added(mc);
+	mutex_unlock(&idev->mc_lock);
 	ma_put(mc);
 	return 0;
 }
@@ -935,7 +959,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
 EXPORT_SYMBOL(ipv6_dev_mc_inc);
 
 /*
- *	device multicast group del
+ * device multicast group del
  */
 int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 {
@@ -943,8 +967,9 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 
 	ASSERT_RTNL();
 
+	mutex_lock(&idev->mc_lock);
 	for (map = &idev->mc_list;
-	     (ma = rtnl_dereference(*map));
+	     (ma = mc_dereference(*map, idev));
 	     map = &ma->next) {
 		if (ipv6_addr_equal(&ma->mca_addr, addr)) {
 			if (--ma->mca_users == 0) {
@@ -952,14 +977,17 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 
 				igmp6_group_dropped(ma);
 				ip6_mc_clear_src(ma);
+				mutex_unlock(&idev->mc_lock);
 
 				ma_put(ma);
 				return 0;
 			}
+			mutex_unlock(&idev->mc_lock);
 			return 0;
 		}
 	}
 
+	mutex_unlock(&idev->mc_lock);
 	return -ENOENT;
 }
 
@@ -1019,6 +1047,7 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
 	return rv;
 }
 
+/* called with mc_lock */
 static void mld_gq_start_work(struct inet6_dev *idev)
 {
 	unsigned long tv = prandom_u32() % idev->mc_maxdelay;
@@ -1028,6 +1057,7 @@ static void mld_gq_start_work(struct inet6_dev *idev)
 		in6_dev_hold(idev);
 }
 
+/* called with mc_lock */
 static void mld_gq_stop_work(struct inet6_dev *idev)
 {
 	idev->mc_gq_running = 0;
@@ -1035,6 +1065,7 @@ static void mld_gq_stop_work(struct inet6_dev *idev)
 		__in6_dev_put(idev);
 }
 
+/* called with mc_lock */
 static void mld_ifc_start_work(struct inet6_dev *idev, unsigned long delay)
 {
 	unsigned long tv = prandom_u32() % delay;
@@ -1043,6 +1074,7 @@ static void mld_ifc_start_work(struct inet6_dev *idev, unsigned long delay)
 		in6_dev_hold(idev);
 }
 
+/* called with mc_lock */
 static void mld_ifc_stop_work(struct inet6_dev *idev)
 {
 	idev->mc_ifc_count = 0;
@@ -1050,6 +1082,7 @@ static void mld_ifc_stop_work(struct inet6_dev *idev)
 		__in6_dev_put(idev);
 }
 
+/* called with mc_lock */
 static void mld_dad_start_work(struct inet6_dev *idev, unsigned long delay)
 {
 	unsigned long tv = prandom_u32() % delay;
@@ -1080,6 +1113,7 @@ static void mld_report_stop_work(struct inet6_dev *idev)
 
 /*
  * IGMP handling (alias multicast ICMPv6 messages)
+ * called with mc_lock
  */
 static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
 {
@@ -1103,7 +1137,9 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
 	ma->mca_flags |= MAF_TIMER_RUNNING;
 }
 
-/* mark EXCLUDE-mode sources */
+/* mark EXCLUDE-mode sources
+ * called with mc_lock
+ */
 static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
 			     const struct in6_addr *srcs)
 {
@@ -1111,7 +1147,7 @@ static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
 	int i, scount;
 
 	scount = 0;
-	for_each_psf_rtnl(pmc, psf) {
+	for_each_psf_mclock(pmc, psf) {
 		if (scount == nsrcs)
 			break;
 		for (i = 0; i < nsrcs; i++) {
@@ -1132,6 +1168,7 @@ static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
 	return true;
 }
 
+/* called with mc_lock */
 static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
 			    const struct in6_addr *srcs)
 {
@@ -1144,7 +1181,7 @@ static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
 	/* mark INCLUDE-mode sources */
 
 	scount = 0;
-	for_each_psf_rtnl(pmc, psf) {
+	for_each_psf_mclock(pmc, psf) {
 		if (scount == nsrcs)
 			break;
 		for (i = 0; i < nsrcs; i++) {
@@ -1370,7 +1407,7 @@ static void __mld_query_work(struct sk_buff *skb)
 	int len, err;
 
 	if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
-		goto out;
+		goto kfree_skb;
 
 	/* compute payload length excluding extension headers */
 	len = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr);
@@ -1387,11 +1424,11 @@ static void __mld_query_work(struct sk_buff *skb)
 	    ipv6_hdr(skb)->hop_limit != 1 ||
 	    !(IP6CB(skb)->flags & IP6SKB_ROUTERALERT) ||
 	    IP6CB(skb)->ra != htons(IPV6_OPT_ROUTERALERT_MLD))
-		goto out;
+		goto kfree_skb;
 
-	idev = __in6_dev_get(skb->dev);
+	idev = in6_dev_get(skb->dev);
 	if (!idev)
-		goto out;
+		goto kfree_skb;
 
 	mld = (struct mld_msg *)icmp6_hdr(skb);
 	group = &mld->mld_mca;
@@ -1442,11 +1479,11 @@ static void __mld_query_work(struct sk_buff *skb)
 	}
 
 	if (group_type == IPV6_ADDR_ANY) {
-		for_each_mc_rtnl(idev, ma) {
+		for_each_mc_mclock(idev, ma) {
 			igmp6_group_queried(ma, max_delay);
 		}
 	} else {
-		for_each_mc_rtnl(idev, ma) {
+		for_each_mc_mclock(idev, ma) {
 			if (!ipv6_addr_equal(group, &ma->mca_addr))
 				continue;
 			if (ma->mca_flags & MAF_TIMER_RUNNING) {
@@ -1468,6 +1505,8 @@ static void __mld_query_work(struct sk_buff *skb)
 	}
 
 out:
+	in6_dev_put(idev);
+kfree_skb:
 	consume_skb(skb);
 }
 
@@ -1495,10 +1534,10 @@ static void mld_query_work(struct work_struct *work)
 	}
 	spin_unlock_bh(&idev->mc_query_lock);
 
-	rtnl_lock();
+	mutex_lock(&idev->mc_lock);
 	while ((skb = __skb_dequeue(&q)))
 		__mld_query_work(skb);
-	rtnl_unlock();
+	mutex_unlock(&idev->mc_lock);
 
 	if (!rework)
 		in6_dev_put(idev);
@@ -1530,22 +1569,22 @@ int igmp6_event_report(struct sk_buff *skb)
 
 static void __mld_report_work(struct sk_buff *skb)
 {
-	struct ifmcaddr6 *ma;
 	struct inet6_dev *idev;
+	struct ifmcaddr6 *ma;
 	struct mld_msg *mld;
 	int addr_type;
 
 	/* Our own report looped back. Ignore it. */
 	if (skb->pkt_type == PACKET_LOOPBACK)
-		goto out;
+		goto kfree_skb;
 
 	/* send our report if the MC router may not have heard this report */
 	if (skb->pkt_type != PACKET_MULTICAST &&
 	    skb->pkt_type != PACKET_BROADCAST)
-		goto out;
+		goto kfree_skb;
 
 	if (!pskb_may_pull(skb, sizeof(*mld) - sizeof(struct icmp6hdr)))
-		goto out;
+		goto kfree_skb;
 
 	mld = (struct mld_msg *)icmp6_hdr(skb);
 
@@ -1553,17 +1592,17 @@ static void __mld_report_work(struct sk_buff *skb)
 	addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr);
 	if (addr_type != IPV6_ADDR_ANY &&
 	    !(addr_type&IPV6_ADDR_LINKLOCAL))
-		goto out;
+		goto kfree_skb;
 
-	idev = __in6_dev_get(skb->dev);
+	idev = in6_dev_get(skb->dev);
 	if (!idev)
-		goto out;
+		goto kfree_skb;
 
 	/*
 	 *	Cancel the work for this group
 	 */
 
-	for_each_mc_rtnl(idev, ma) {
+	for_each_mc_mclock(idev, ma) {
 		if (ipv6_addr_equal(&ma->mca_addr, &mld->mld_mca)) {
 			if (cancel_delayed_work(&ma->mca_work))
 				refcount_dec(&ma->mca_refcnt);
@@ -1573,7 +1612,8 @@ static void __mld_report_work(struct sk_buff *skb)
 		}
 	}
 
-out:
+	in6_dev_put(idev);
+kfree_skb:
 	consume_skb(skb);
 }
 
@@ -1600,10 +1640,10 @@ static void mld_report_work(struct work_struct *work)
 	}
 	spin_unlock_bh(&idev->mc_report_lock);
 
-	rtnl_lock();
+	mutex_lock(&idev->mc_lock);
 	while ((skb = __skb_dequeue(&q)))
 		__mld_report_work(skb);
-	rtnl_unlock();
+	mutex_unlock(&idev->mc_lock);
 
 	if (!rework)
 		in6_dev_put(idev);
@@ -1659,7 +1699,7 @@ mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted)
 	struct ip6_sf_list *psf;
 	int scount = 0;
 
-	for_each_psf_rtnl(pmc, psf) {
+	for_each_psf_mclock(pmc, psf) {
 		if (!is_in(pmc, psf, type, gdeleted, sdeleted))
 			continue;
 		scount++;
@@ -1833,6 +1873,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 
 #define AVAILABLE(skb)	((skb) ? skb_availroom(skb) : 0)
 
+/* called with mc_lock */
 static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 				int type, int gdeleted, int sdeleted,
 				int crsend)
@@ -1878,12 +1919,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 	}
 	first = 1;
 	psf_prev = NULL;
-	for (psf = rtnl_dereference(*psf_list);
+	for (psf = mc_dereference(*psf_list, idev);
 	     psf;
 	     psf = psf_next) {
 		struct in6_addr *psrc;
 
-		psf_next = rtnl_dereference(psf->sf_next);
+		psf_next = mc_dereference(psf->sf_next, idev);
 
 		if (!is_in(pmc, psf, type, gdeleted, sdeleted) && !crsend) {
 			psf_prev = psf;
@@ -1931,10 +1972,10 @@ decrease_sf_crcount:
 			if ((sdeleted || gdeleted) && psf->sf_crcount == 0) {
 				if (psf_prev)
 					rcu_assign_pointer(psf_prev->sf_next,
-							   rtnl_dereference(psf->sf_next));
+							   mc_dereference(psf->sf_next, idev));
 				else
 					rcu_assign_pointer(*psf_list,
-							   rtnl_dereference(psf->sf_next));
+							   mc_dereference(psf->sf_next, idev));
 				kfree_rcu(psf, rcu);
 				continue;
 			}
@@ -1964,13 +2005,14 @@ empty_source:
 	return skb;
 }
 
+/* called with mc_lock */
 static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
 {
 	struct sk_buff *skb = NULL;
 	int type;
 
 	if (!pmc) {
-		for_each_mc_rtnl(idev, pmc) {
+		for_each_mc_mclock(idev, pmc) {
 			if (pmc->mca_flags & MAF_NOREPORT)
 				continue;
 			if (pmc->mca_sfcount[MCAST_EXCLUDE])
@@ -1992,23 +2034,24 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
 
 /*
  * remove zero-count source records from a source filter list
+ * called with mc_lock
  */
-static void mld_clear_zeros(struct ip6_sf_list __rcu **ppsf)
+static void mld_clear_zeros(struct ip6_sf_list __rcu **ppsf, struct inet6_dev *idev)
 {
 	struct ip6_sf_list *psf_prev, *psf_next, *psf;
 
 	psf_prev = NULL;
-	for (psf = rtnl_dereference(*ppsf);
+	for (psf = mc_dereference(*ppsf, idev);
 	     psf;
 	     psf = psf_next) {
-		psf_next = rtnl_dereference(psf->sf_next);
+		psf_next = mc_dereference(psf->sf_next, idev);
 		if (psf->sf_crcount == 0) {
 			if (psf_prev)
 				rcu_assign_pointer(psf_prev->sf_next,
-						   rtnl_dereference(psf->sf_next));
+						   mc_dereference(psf->sf_next, idev));
 			else
 				rcu_assign_pointer(*ppsf,
-						   rtnl_dereference(psf->sf_next));
+						   mc_dereference(psf->sf_next, idev));
 			kfree_rcu(psf, rcu);
 		} else {
 			psf_prev = psf;
@@ -2016,6 +2059,7 @@ static void mld_clear_zeros(struct ip6_sf_list __rcu **ppsf)
 	}
 }
 
+/* called with mc_lock */
 static void mld_send_cr(struct inet6_dev *idev)
 {
 	struct ifmcaddr6 *pmc, *pmc_prev, *pmc_next;
@@ -2024,10 +2068,10 @@ static void mld_send_cr(struct inet6_dev *idev)
 
 	/* deleted MCA's */
 	pmc_prev = NULL;
-	for (pmc = rtnl_dereference(idev->mc_tomb);
+	for (pmc = mc_dereference(idev->mc_tomb, idev);
 	     pmc;
 	     pmc = pmc_next) {
-		pmc_next = rtnl_dereference(pmc->next);
+		pmc_next = mc_dereference(pmc->next, idev);
 		if (pmc->mca_sfmode == MCAST_INCLUDE) {
 			type = MLD2_BLOCK_OLD_SOURCES;
 			dtype = MLD2_BLOCK_OLD_SOURCES;
@@ -2041,8 +2085,8 @@ static void mld_send_cr(struct inet6_dev *idev)
 			}
 			pmc->mca_crcount--;
 			if (pmc->mca_crcount == 0) {
-				mld_clear_zeros(&pmc->mca_tomb);
-				mld_clear_zeros(&pmc->mca_sources);
+				mld_clear_zeros(&pmc->mca_tomb, idev);
+				mld_clear_zeros(&pmc->mca_sources, idev);
 			}
 		}
 		if (pmc->mca_crcount == 0 &&
@@ -2059,7 +2103,7 @@ static void mld_send_cr(struct inet6_dev *idev)
 	}
 
 	/* change recs */
-	for_each_mc_rtnl(idev, pmc) {
+	for_each_mc_mclock(idev, pmc) {
 		if (pmc->mca_sfcount[MCAST_EXCLUDE]) {
 			type = MLD2_BLOCK_OLD_SOURCES;
 			dtype = MLD2_ALLOW_NEW_SOURCES;
@@ -2181,6 +2225,7 @@ err_out:
 	goto out;
 }
 
+/* called with mc_lock */
 static void mld_send_initial_cr(struct inet6_dev *idev)
 {
 	struct sk_buff *skb;
@@ -2191,7 +2236,7 @@ static void mld_send_initial_cr(struct inet6_dev *idev)
 		return;
 
 	skb = NULL;
-	for_each_mc_rtnl(idev, pmc) {
+	for_each_mc_mclock(idev, pmc) {
 		if (pmc->mca_sfcount[MCAST_EXCLUDE])
 			type = MLD2_CHANGE_TO_EXCLUDE;
 		else
@@ -2204,6 +2249,7 @@ static void mld_send_initial_cr(struct inet6_dev *idev)
 
 void ipv6_mc_dad_complete(struct inet6_dev *idev)
 {
+	mutex_lock(&idev->mc_lock);
 	idev->mc_dad_count = idev->mc_qrv;
 	if (idev->mc_dad_count) {
 		mld_send_initial_cr(idev);
@@ -2212,6 +2258,7 @@ void ipv6_mc_dad_complete(struct inet6_dev *idev)
 			mld_dad_start_work(idev,
 					   unsolicited_report_interval(idev));
 	}
+	mutex_unlock(&idev->mc_lock);
 }
 
 static void mld_dad_work(struct work_struct *work)
@@ -2219,8 +2266,7 @@ static void mld_dad_work(struct work_struct *work)
 	struct inet6_dev *idev = container_of(to_delayed_work(work),
 					      struct inet6_dev,
 					      mc_dad_work);
-
-	rtnl_lock();
+	mutex_lock(&idev->mc_lock);
 	mld_send_initial_cr(idev);
 	if (idev->mc_dad_count) {
 		idev->mc_dad_count--;
@@ -2228,10 +2274,11 @@ static void mld_dad_work(struct work_struct *work)
 			mld_dad_start_work(idev,
 					   unsolicited_report_interval(idev));
 	}
-	rtnl_unlock();
+	mutex_unlock(&idev->mc_lock);
 	in6_dev_put(idev);
 }
 
+/* called with mc_lock */
 static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
 	const struct in6_addr *psfsrc)
 {
@@ -2239,7 +2286,7 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
 	int rv = 0;
 
 	psf_prev = NULL;
-	for_each_psf_rtnl(pmc, psf) {
+	for_each_psf_mclock(pmc, psf) {
 		if (ipv6_addr_equal(&psf->sf_addr, psfsrc))
 			break;
 		psf_prev = psf;
@@ -2255,16 +2302,16 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
 		/* no more filters for this source */
 		if (psf_prev)
 			rcu_assign_pointer(psf_prev->sf_next,
-					   rtnl_dereference(psf->sf_next));
+					   mc_dereference(psf->sf_next, idev));
 		else
 			rcu_assign_pointer(pmc->mca_sources,
-					   rtnl_dereference(psf->sf_next));
+					   mc_dereference(psf->sf_next, idev));
 
 		if (psf->sf_oldin && !(pmc->mca_flags & MAF_NOREPORT) &&
 		    !mld_in_v1_mode(idev)) {
 			psf->sf_crcount = idev->mc_qrv;
 			rcu_assign_pointer(psf->sf_next,
-					   rtnl_dereference(pmc->mca_tomb));
+					   mc_dereference(pmc->mca_tomb, idev));
 			rcu_assign_pointer(pmc->mca_tomb, psf);
 			rv = 1;
 		} else {
@@ -2274,6 +2321,7 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
 	return rv;
 }
 
+/* called with mc_lock */
 static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca,
 			  int sfmode, int sfcount, const struct in6_addr *psfsrc,
 			  int delta)
@@ -2285,7 +2333,7 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca,
 	if (!idev)
 		return -ENODEV;
 
-	for_each_mc_rtnl(idev, pmc) {
+	for_each_mc_mclock(idev, pmc) {
 		if (ipv6_addr_equal(pmca, &pmc->mca_addr))
 			break;
 	}
@@ -2294,9 +2342,8 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca,
 
 	sf_markstate(pmc);
 	if (!delta) {
-		if (!pmc->mca_sfcount[sfmode]) {
+		if (!pmc->mca_sfcount[sfmode])
 			return -EINVAL;
-		}
 
 		pmc->mca_sfcount[sfmode]--;
 	}
@@ -2317,16 +2364,19 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca,
 		pmc->mca_sfmode = MCAST_INCLUDE;
 		pmc->mca_crcount = idev->mc_qrv;
 		idev->mc_ifc_count = pmc->mca_crcount;
-		for_each_psf_rtnl(pmc, psf)
+		for_each_psf_mclock(pmc, psf)
 			psf->sf_crcount = 0;
 		mld_ifc_event(pmc->idev);
-	} else if (sf_setstate(pmc) || changerec)
+	} else if (sf_setstate(pmc) || changerec) {
 		mld_ifc_event(pmc->idev);
+	}
+
 	return err;
 }
 
 /*
  * Add multicast single-source filter to the interface list
+ * called with mc_lock
  */
 static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode,
 	const struct in6_addr *psfsrc)
@@ -2334,7 +2384,7 @@ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode,
 	struct ip6_sf_list *psf, *psf_prev;
 
 	psf_prev = NULL;
-	for_each_psf_rtnl(pmc, psf) {
+	for_each_psf_mclock(pmc, psf) {
 		if (ipv6_addr_equal(&psf->sf_addr, psfsrc))
 			break;
 		psf_prev = psf;
@@ -2355,12 +2405,13 @@ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode,
 	return 0;
 }
 
+/* called with mc_lock */
 static void sf_markstate(struct ifmcaddr6 *pmc)
 {
 	struct ip6_sf_list *psf;
 	int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE];
 
-	for_each_psf_rtnl(pmc, psf) {
+	for_each_psf_mclock(pmc, psf) {
 		if (pmc->mca_sfcount[MCAST_EXCLUDE]) {
 			psf->sf_oldin = mca_xcount ==
 				psf->sf_count[MCAST_EXCLUDE] &&
@@ -2371,6 +2422,7 @@ static void sf_markstate(struct ifmcaddr6 *pmc)
 	}
 }
 
+/* called with mc_lock */
 static int sf_setstate(struct ifmcaddr6 *pmc)
 {
 	struct ip6_sf_list *psf, *dpsf;
@@ -2379,7 +2431,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc)
 	int new_in, rv;
 
 	rv = 0;
-	for_each_psf_rtnl(pmc, psf) {
+	for_each_psf_mclock(pmc, psf) {
 		if (pmc->mca_sfcount[MCAST_EXCLUDE]) {
 			new_in = mca_xcount == psf->sf_count[MCAST_EXCLUDE] &&
 				!psf->sf_count[MCAST_INCLUDE];
@@ -2398,10 +2450,12 @@ static int sf_setstate(struct ifmcaddr6 *pmc)
 				if (dpsf) {
 					if (prev)
 						rcu_assign_pointer(prev->sf_next,
-								   rtnl_dereference(dpsf->sf_next));
+								   mc_dereference(dpsf->sf_next,
+										  pmc->idev));
 					else
 						rcu_assign_pointer(pmc->mca_tomb,
-								   rtnl_dereference(dpsf->sf_next));
+								   mc_dereference(dpsf->sf_next,
+										  pmc->idev));
 					kfree_rcu(dpsf, rcu);
 				}
 				psf->sf_crcount = qrv;
@@ -2424,7 +2478,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc)
 					continue;
 				*dpsf = *psf;
 				rcu_assign_pointer(dpsf->sf_next,
-						   rtnl_dereference(pmc->mca_tomb));
+						   mc_dereference(pmc->mca_tomb, pmc->idev));
 				rcu_assign_pointer(pmc->mca_tomb, dpsf);
 			}
 			dpsf->sf_crcount = qrv;
@@ -2436,6 +2490,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc)
 
 /*
  * Add multicast source filter list to the interface list
+ * called with mc_lock
  */
 static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
 			  int sfmode, int sfcount, const struct in6_addr *psfsrc,
@@ -2448,7 +2503,7 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
 	if (!idev)
 		return -ENODEV;
 
-	for_each_mc_rtnl(idev, pmc) {
+	for_each_mc_mclock(idev, pmc) {
 		if (ipv6_addr_equal(pmca, &pmc->mca_addr))
 			break;
 	}
@@ -2484,7 +2539,7 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
 
 		pmc->mca_crcount = idev->mc_qrv;
 		idev->mc_ifc_count = pmc->mca_crcount;
-		for_each_psf_rtnl(pmc, psf)
+		for_each_psf_mclock(pmc, psf)
 			psf->sf_crcount = 0;
 		mld_ifc_event(idev);
 	} else if (sf_setstate(pmc)) {
@@ -2493,21 +2548,22 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
 	return err;
 }
 
+/* called with mc_lock */
 static void ip6_mc_clear_src(struct ifmcaddr6 *pmc)
 {
 	struct ip6_sf_list *psf, *nextpsf;
 
-	for (psf = rtnl_dereference(pmc->mca_tomb);
+	for (psf = mc_dereference(pmc->mca_tomb, pmc->idev);
 	     psf;
 	     psf = nextpsf) {
-		nextpsf = rtnl_dereference(psf->sf_next);
+		nextpsf = mc_dereference(psf->sf_next, pmc->idev);
 		kfree_rcu(psf, rcu);
 	}
 	RCU_INIT_POINTER(pmc->mca_tomb, NULL);
-	for (psf = rtnl_dereference(pmc->mca_sources);
+	for (psf = mc_dereference(pmc->mca_sources, pmc->idev);
 	     psf;
 	     psf = nextpsf) {
-		nextpsf = rtnl_dereference(psf->sf_next);
+		nextpsf = mc_dereference(psf->sf_next, pmc->idev);
 		kfree_rcu(psf, rcu);
 	}
 	RCU_INIT_POINTER(pmc->mca_sources, NULL);
@@ -2516,7 +2572,7 @@ static void ip6_mc_clear_src(struct ifmcaddr6 *pmc)
 	pmc->mca_sfcount[MCAST_EXCLUDE] = 1;
 }
 
-
+/* called with mc_lock */
 static void igmp6_join_group(struct ifmcaddr6 *ma)
 {
 	unsigned long delay;
@@ -2546,19 +2602,27 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
 
 	psl = rtnl_dereference(iml->sflist);
 
+	if (idev)
+		mutex_lock(&idev->mc_lock);
+
 	if (!psl) {
 		/* any-source empty exclude case */
 		err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0);
 	} else {
 		err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode,
-				psl->sl_count, psl->sl_addr, 0);
+				     psl->sl_count, psl->sl_addr, 0);
 		RCU_INIT_POINTER(iml->sflist, NULL);
 		atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
 		kfree_rcu(psl, rcu);
 	}
+
+	if (idev)
+		mutex_unlock(&idev->mc_lock);
+
 	return err;
 }
 
+/* called with mc_lock */
 static void igmp6_leave_group(struct ifmcaddr6 *ma)
 {
 	if (mld_in_v1_mode(ma->idev)) {
@@ -2578,10 +2642,10 @@ static void mld_gq_work(struct work_struct *work)
 					      struct inet6_dev,
 					      mc_gq_work);
 
-	rtnl_lock();
+	mutex_lock(&idev->mc_lock);
 	mld_send_report(idev, NULL);
 	idev->mc_gq_running = 0;
-	rtnl_unlock();
+	mutex_unlock(&idev->mc_lock);
 
 	in6_dev_put(idev);
 }
@@ -2592,7 +2656,7 @@ static void mld_ifc_work(struct work_struct *work)
 					      struct inet6_dev,
 					      mc_ifc_work);
 
-	rtnl_lock();
+	mutex_lock(&idev->mc_lock);
 	mld_send_cr(idev);
 
 	if (idev->mc_ifc_count) {
@@ -2601,10 +2665,11 @@ static void mld_ifc_work(struct work_struct *work)
 			mld_ifc_start_work(idev,
 					   unsolicited_report_interval(idev));
 	}
-	rtnl_unlock();
+	mutex_unlock(&idev->mc_lock);
 	in6_dev_put(idev);
 }
 
+/* called with mc_lock */
 static void mld_ifc_event(struct inet6_dev *idev)
 {
 	if (mld_in_v1_mode(idev))
@@ -2619,14 +2684,14 @@ static void mld_mca_work(struct work_struct *work)
 	struct ifmcaddr6 *ma = container_of(to_delayed_work(work),
 					    struct ifmcaddr6, mca_work);
 
-	rtnl_lock();
+	mutex_lock(&ma->idev->mc_lock);
 	if (mld_in_v1_mode(ma->idev))
 		igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
 	else
 		mld_send_report(ma->idev, ma);
 	ma->mca_flags |=  MAF_LAST_REPORTER;
 	ma->mca_flags &= ~MAF_TIMER_RUNNING;
-	rtnl_unlock();
+	mutex_unlock(&ma->idev->mc_lock);
 
 	ma_put(ma);
 }
@@ -2639,8 +2704,10 @@ void ipv6_mc_unmap(struct inet6_dev *idev)
 
 	/* Install multicast list, except for all-nodes (already installed) */
 
-	for_each_mc_rtnl(idev, i)
+	mutex_lock(&idev->mc_lock);
+	for_each_mc_mclock(idev, i)
 		igmp6_group_dropped(i);
+	mutex_unlock(&idev->mc_lock);
 }
 
 void ipv6_mc_remap(struct inet6_dev *idev)
@@ -2649,14 +2716,15 @@ void ipv6_mc_remap(struct inet6_dev *idev)
 }
 
 /* Device going down */
-
 void ipv6_mc_down(struct inet6_dev *idev)
 {
 	struct ifmcaddr6 *i;
 
+	mutex_lock(&idev->mc_lock);
 	/* Withdraw multicast list */
-	for_each_mc_rtnl(idev, i)
+	for_each_mc_mclock(idev, i)
 		igmp6_group_dropped(i);
+	mutex_unlock(&idev->mc_lock);
 
 	/* Should stop work after group drop. or we will
 	 * start work again in mld_ifc_event()
@@ -2687,10 +2755,12 @@ void ipv6_mc_up(struct inet6_dev *idev)
 	/* Install multicast list, except for all-nodes (already installed) */
 
 	ipv6_mc_reset(idev);
-	for_each_mc_rtnl(idev, i) {
+	mutex_lock(&idev->mc_lock);
+	for_each_mc_mclock(idev, i) {
 		mld_del_delrec(idev, i);
 		igmp6_group_added(i);
 	}
+	mutex_unlock(&idev->mc_lock);
 }
 
 /* IPv6 device initialization. */
@@ -2709,6 +2779,7 @@ void ipv6_mc_init_dev(struct inet6_dev *idev)
 	skb_queue_head_init(&idev->mc_report_queue);
 	spin_lock_init(&idev->mc_query_lock);
 	spin_lock_init(&idev->mc_report_lock);
+	mutex_init(&idev->mc_lock);
 	ipv6_mc_reset(idev);
 }
 
@@ -2722,7 +2793,9 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev)
 
 	/* Deactivate works */
 	ipv6_mc_down(idev);
+	mutex_lock(&idev->mc_lock);
 	mld_clear_delrec(idev);
+	mutex_unlock(&idev->mc_lock);
 	mld_clear_query(idev);
 	mld_clear_report(idev);
 
@@ -2736,12 +2809,14 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev)
 	if (idev->cnf.forwarding)
 		__ipv6_dev_mc_dec(idev, &in6addr_linklocal_allrouters);
 
-	while ((i = rtnl_dereference(idev->mc_list))) {
-		rcu_assign_pointer(idev->mc_list, rtnl_dereference(i->next));
+	mutex_lock(&idev->mc_lock);
+	while ((i = mc_dereference(idev->mc_list, idev))) {
+		rcu_assign_pointer(idev->mc_list, mc_dereference(i->next, idev));
 
 		ip6_mc_clear_src(i);
 		ma_put(i);
 	}
+	mutex_unlock(&idev->mc_lock);
 }
 
 static void ipv6_mc_rejoin_groups(struct inet6_dev *idev)
@@ -2750,12 +2825,14 @@ static void ipv6_mc_rejoin_groups(struct inet6_dev *idev)
 
 	ASSERT_RTNL();
 
+	mutex_lock(&idev->mc_lock);
 	if (mld_in_v1_mode(idev)) {
-		for_each_mc_rtnl(idev, pmc)
+		for_each_mc_mclock(idev, pmc)
 			igmp6_join_group(pmc);
 	} else {
 		mld_send_report(idev, NULL);
 	}
+	mutex_unlock(&idev->mc_lock);
 }
 
 static int ipv6_mc_netdev_event(struct notifier_block *this,
-- 
cgit v1.2.3


From a0e55dcd2fa9198fae0e9e088a65d36897748760 Mon Sep 17 00:00:00 2001
From: Robert Hancock <robert.hancock@calian.com>
Date: Thu, 25 Mar 2021 18:04:37 -0600
Subject: dt-bindings: net: xilinx_axienet: Document additional clocks

Update DT bindings to describe all of the clocks that the axienet
driver will now be able to make use of.

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Robert Hancock <robert.hancock@calian.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/xilinx_axienet.txt     | 25 ++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/xilinx_axienet.txt b/Documentation/devicetree/bindings/net/xilinx_axienet.txt
index 2cd452419ed0..b8e4894bc634 100644
--- a/Documentation/devicetree/bindings/net/xilinx_axienet.txt
+++ b/Documentation/devicetree/bindings/net/xilinx_axienet.txt
@@ -42,11 +42,23 @@ Optional properties:
 		  support both 1000BaseX and SGMII modes. If set, the phy-mode
 		  should be set to match the mode selected on core reset (i.e.
 		  by the basex_or_sgmii core input line).
-- clocks	: AXI bus clock for the device. Refer to common clock bindings.
-		  Used to calculate MDIO clock divisor. If not specified, it is
-		  auto-detected from the CPU clock (but only on platforms where
-		  this is possible). New device trees should specify this - the
-		  auto detection is only for backward compatibility.
+- clock-names: 	  Tuple listing input clock names. Possible clocks:
+		  s_axi_lite_clk: Clock for AXI register slave interface
+		  axis_clk: AXI4-Stream clock for TXD RXD TXC and RXS interfaces
+		  ref_clk: Ethernet reference clock, used by signal delay
+			   primitives and transceivers
+		  mgt_clk: MGT reference clock (used by optional internal
+			   PCS/PMA PHY)
+
+		  Note that if s_axi_lite_clk is not specified by name, the
+		  first clock of any name is used for this. If that is also not
+		  specified, the clock rate is auto-detected from the CPU clock
+		  (but only on platforms where this is possible). New device
+		  trees should specify all applicable clocks by name - the
+		  fallbacks to an unnamed clock or to CPU clock are only for
+		  backward compatibility.
+- clocks: 	  Phandles to input clocks matching clock-names. Refer to common
+		  clock bindings.
 - axistream-connected: Reference to another node which contains the resources
 		       for the AXI DMA controller used by this device.
 		       If this is specified, the DMA-related resources from that
@@ -62,7 +74,8 @@ Example:
 		device_type = "network";
 		interrupt-parent = <&microblaze_0_axi_intc>;
 		interrupts = <2 0 1>;
-		clocks = <&axi_clk>;
+		clock-names = "s_axi_lite_clk", "axis_clk", "ref_clk", "mgt_clk";
+		clocks = <&axi_clk>, <&axi_clk>, <&pl_enet_ref_clk>, <&mgt_clk>;
 		phy-mode = "mii";
 		reg = <0x40c00000 0x40000 0x50c00000 0x40000>;
 		xlnx,rxcsum = <0x2>;
-- 
cgit v1.2.3


From b11bfb9a19f9d790eea10cbd338b6b7f086c6dca Mon Sep 17 00:00:00 2001
From: Robert Hancock <robert.hancock@calian.com>
Date: Thu, 25 Mar 2021 18:04:38 -0600
Subject: net: axienet: Enable more clocks

This driver was only enabling the first clock on the device, regardless
of its name. However, this controller logic can have multiple clocks
which should all be enabled. Add support for enabling additional clocks.
The clock names used are matching those used in the Xilinx version of this
driver as well as the Xilinx device tree generator, except for mgt_clk
which is not present there.

For backward compatibility, if no named clocks are present, the first
clock present is used for determining the MDIO bus clock divider.

Reviewed-by: Radhey Shyam Pandey <radhey.shyam.pandey@xilinx.com>
Signed-off-by: Robert Hancock <robert.hancock@calian.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/xilinx/xilinx_axienet.h      |  8 ++++--
 drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 34 ++++++++++++++++++-----
 drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c |  4 +--
 3 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h
index 1e966a39967e..708769349f76 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet.h
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h
@@ -376,6 +376,8 @@ struct axidma_bd {
 	struct sk_buff *skb;
 } __aligned(XAXIDMA_BD_MINIMUM_ALIGNMENT);
 
+#define XAE_NUM_MISC_CLOCKS 3
+
 /**
  * struct axienet_local - axienet private per device data
  * @ndev:	Pointer for net_device to which it will be attached.
@@ -385,7 +387,8 @@ struct axidma_bd {
  * @phylink_config: phylink configuration settings
  * @pcs_phy:	Reference to PCS/PMA PHY if used
  * @switch_x_sgmii: Whether switchable 1000BaseX/SGMII mode is enabled in the core
- * @clk:	Clock for AXI bus
+ * @axi_clk:	AXI4-Lite bus clock
+ * @misc_clks:	Misc ethernet clocks (AXI4-Stream, Ref, MGT clocks)
  * @mii_bus:	Pointer to MII bus structure
  * @mii_clk_div: MII bus clock divider value
  * @regs_start: Resource start for axienet device addresses
@@ -434,7 +437,8 @@ struct axienet_local {
 
 	bool switch_x_sgmii;
 
-	struct clk *clk;
+	struct clk *axi_clk;
+	struct clk_bulk_data misc_clks[XAE_NUM_MISC_CLOCKS];
 
 	struct mii_bus *mii_bus;
 	u8 mii_clk_div;
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 5d677db0aee5..9635101fbb88 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -1863,17 +1863,35 @@ static int axienet_probe(struct platform_device *pdev)
 	lp->rx_bd_num = RX_BD_NUM_DEFAULT;
 	lp->tx_bd_num = TX_BD_NUM_DEFAULT;
 
-	lp->clk = devm_clk_get_optional(&pdev->dev, NULL);
-	if (IS_ERR(lp->clk)) {
-		ret = PTR_ERR(lp->clk);
+	lp->axi_clk = devm_clk_get_optional(&pdev->dev, "s_axi_lite_clk");
+	if (!lp->axi_clk) {
+		/* For backward compatibility, if named AXI clock is not present,
+		 * treat the first clock specified as the AXI clock.
+		 */
+		lp->axi_clk = devm_clk_get_optional(&pdev->dev, NULL);
+	}
+	if (IS_ERR(lp->axi_clk)) {
+		ret = PTR_ERR(lp->axi_clk);
 		goto free_netdev;
 	}
-	ret = clk_prepare_enable(lp->clk);
+	ret = clk_prepare_enable(lp->axi_clk);
 	if (ret) {
-		dev_err(&pdev->dev, "Unable to enable clock: %d\n", ret);
+		dev_err(&pdev->dev, "Unable to enable AXI clock: %d\n", ret);
 		goto free_netdev;
 	}
 
+	lp->misc_clks[0].id = "axis_clk";
+	lp->misc_clks[1].id = "ref_clk";
+	lp->misc_clks[2].id = "mgt_clk";
+
+	ret = devm_clk_bulk_get_optional(&pdev->dev, XAE_NUM_MISC_CLOCKS, lp->misc_clks);
+	if (ret)
+		goto cleanup_clk;
+
+	ret = clk_bulk_prepare_enable(XAE_NUM_MISC_CLOCKS, lp->misc_clks);
+	if (ret)
+		goto cleanup_clk;
+
 	/* Map device registers */
 	ethres = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	lp->regs = devm_ioremap_resource(&pdev->dev, ethres);
@@ -2109,7 +2127,8 @@ cleanup_mdio:
 	of_node_put(lp->phy_node);
 
 cleanup_clk:
-	clk_disable_unprepare(lp->clk);
+	clk_bulk_disable_unprepare(XAE_NUM_MISC_CLOCKS, lp->misc_clks);
+	clk_disable_unprepare(lp->axi_clk);
 
 free_netdev:
 	free_netdev(ndev);
@@ -2132,7 +2151,8 @@ static int axienet_remove(struct platform_device *pdev)
 
 	axienet_mdio_teardown(lp);
 
-	clk_disable_unprepare(lp->clk);
+	clk_bulk_disable_unprepare(XAE_NUM_MISC_CLOCKS, lp->misc_clks);
+	clk_disable_unprepare(lp->axi_clk);
 
 	of_node_put(lp->phy_node);
 	lp->phy_node = NULL;
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c
index 9c014cee34b2..48f544f6c999 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c
@@ -159,8 +159,8 @@ int axienet_mdio_enable(struct axienet_local *lp)
 
 	lp->mii_clk_div = 0;
 
-	if (lp->clk) {
-		host_clock = clk_get_rate(lp->clk);
+	if (lp->axi_clk) {
+		host_clock = clk_get_rate(lp->axi_clk);
 	} else {
 		struct device_node *np1;
 
-- 
cgit v1.2.3


From aeab5cfbc8c79c33f367f65c0566c9a33a50cd30 Mon Sep 17 00:00:00 2001
From: Xu Jia <xujia39@huawei.com>
Date: Fri, 26 Mar 2021 10:40:46 +0800
Subject: net: ethernet: remove duplicated include

Remove duplicated include from mtk_ppe_offload.c.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Xu Jia <xujia39@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_ppe_offload.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
index d0c46786571f..4975106fbc42 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
@@ -5,7 +5,6 @@
 
 #include <linux/if_ether.h>
 #include <linux/rhashtable.h>
-#include <linux/if_ether.h>
 #include <linux/ip.h>
 #include <net/flow_offload.h>
 #include <net/pkt_cls.h>
-- 
cgit v1.2.3


From 63c173ff7aa3b58b1f5cd4227f53455a78cea627 Mon Sep 17 00:00:00 2001
From: Mohammad Athari Bin Ismail <mohammad.athari.ismail@intel.com>
Date: Fri, 26 Mar 2021 17:10:46 +0800
Subject: net: stmmac: Fix kernel panic due to NULL pointer dereference of
 fpe_cfg

In this patch, "net: stmmac: support FPE link partner hand-shaking
procedure", priv->plat->fpe_cfg wouldn`t be "devm_kzalloc"ed if
dma_cap->frpsel is 0 (Flexible Rx Parser is not supported in SoC) in
tc_init(). So, fpe_cfg will be remain as NULL and accessing it will cause
kernel panic.

To fix this, move the "devm_kzalloc"ing of priv->plat->fpe_cfg before
dma_cap->frpsel checking in tc_init(). Additionally, checking of
priv->dma_cap.fpesel is added before calling stmmac_fpe_link_state_handle()
as only FPE supported SoC is allowed to call the function.

Below is the kernel panic dump reported by Marek Szyprowski
<m.szyprowski@samsung.com>:

meson8b-dwmac ff3f0000.ethernet eth0: PHY [0.0:00] driver [RTL8211F Gigabit Ethernet] (irq=35)
meson8b-dwmac ff3f0000.ethernet eth0: No Safety Features support found
meson8b-dwmac ff3f0000.ethernet eth0: PTP not supported by HW
meson8b-dwmac ff3f0000.ethernet eth0: configuring for phy/rgmii link mode
Unable to handle kernel NULL pointer dereference at virtual address 0000000000000001
Mem abort info:
...
user pgtable: 4k pages, 48-bit VAs, pgdp=00000000044eb000
[0000000000000001] pgd=0000000000000000, p4d=0000000000000000
Internal error: Oops: 96000004 [#1] PREEMPT SMP
Modules linked in: dw_hdmi_i2s_audio dw_hdmi_cec meson_gxl realtek meson_gxbb_wdt snd_soc_meson_axg_sound_card dwmac_generic axg_audio meson_dw_hdmi crct10dif_ce snd_soc_meson_card_utils snd_soc_meson_axg_tdmout panfrost rc_odroid gpu_sched reset_meson_audio_arb meson_ir snd_soc_meson_g12a_tohdmitx snd_soc_meson_axg_frddr sclk_div clk_phase snd_soc_meson_codec_glue dwmac_meson8b snd_soc_meson_axg_fifo stmmac_platform meson_rng meson_drm stmmac rtc_meson_vrtc rng_core meson_canvas pwm_meson dw_hdmi mdio_mux_meson_g12a pcs_xpcs snd_soc_meson_axg_tdm_interface snd_soc_meson_axg_tdm_formatter nvmem_meson_efuse display_connector
CPU: 1 PID: 7 Comm: kworker/u8:0 Not tainted 5.12.0-rc4-next-20210325+
Hardware name: Hardkernel ODROID-C4 (DT)
Workqueue: events_power_efficient phylink_resolve
pstate: 20400009 (nzCv daif +PAN -UAO -TCO BTYPE=--)
pc : stmmac_mac_link_up+0x14c/0x348 [stmmac]
lr : stmmac_mac_link_up+0x284/0x348 [stmmac] ...
Call trace:
 stmmac_mac_link_up+0x14c/0x348 [stmmac]
 phylink_resolve+0x104/0x420
 process_one_work+0x2a8/0x718
 worker_thread+0x48/0x460
 kthread+0x134/0x160
 ret_from_fork+0x10/0x18
Code: b971ba60 350007c0 f958c260 f9402000 (39400401)
---[ end trace 0c9deb6c510228aa ]---

Fixes: 5a5586112b92 ("net: stmmac: support FPE link partner hand-shaking
procedure")
Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Mohammad Athari Bin Ismail <mohammad.athari.ismail@intel.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |  6 ++++--
 drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c   | 20 ++++++++++----------
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index f4fa5402cd64..3a4f0c2d42a3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1002,7 +1002,8 @@ static void stmmac_mac_link_down(struct phylink_config *config,
 	stmmac_eee_init(priv);
 	stmmac_set_eee_pls(priv, priv->hw, false);
 
-	stmmac_fpe_link_state_handle(priv, false);
+	if (priv->dma_cap.fpesel)
+		stmmac_fpe_link_state_handle(priv, false);
 }
 
 static void stmmac_mac_link_up(struct phylink_config *config,
@@ -1102,7 +1103,8 @@ static void stmmac_mac_link_up(struct phylink_config *config,
 		stmmac_set_eee_pls(priv, priv->hw, true);
 	}
 
-	stmmac_fpe_link_state_handle(priv, true);
+	if (priv->dma_cap.fpesel)
+		stmmac_fpe_link_state_handle(priv, true);
 }
 
 static const struct phylink_mac_ops stmmac_phylink_mac_ops = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
index 1d84ee359808..4e70efc45458 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
@@ -254,6 +254,16 @@ static int tc_init(struct stmmac_priv *priv)
 			 priv->flow_entries_max);
 	}
 
+	if (!priv->plat->fpe_cfg) {
+		priv->plat->fpe_cfg = devm_kzalloc(priv->device,
+						   sizeof(*priv->plat->fpe_cfg),
+						   GFP_KERNEL);
+		if (!priv->plat->fpe_cfg)
+			return -ENOMEM;
+	} else {
+		memset(priv->plat->fpe_cfg, 0, sizeof(*priv->plat->fpe_cfg));
+	}
+
 	/* Fail silently as we can still use remaining features, e.g. CBS */
 	if (!dma_cap->frpsel)
 		return 0;
@@ -298,16 +308,6 @@ static int tc_init(struct stmmac_priv *priv)
 	dev_info(priv->device, "Enabling HW TC (entries=%d, max_off=%d)\n",
 			priv->tc_entries_max, priv->tc_off_max);
 
-	if (!priv->plat->fpe_cfg) {
-		priv->plat->fpe_cfg = devm_kzalloc(priv->device,
-						   sizeof(*priv->plat->fpe_cfg),
-						   GFP_KERNEL);
-		if (!priv->plat->fpe_cfg)
-			return -ENOMEM;
-	} else {
-		memset(priv->plat->fpe_cfg, 0, sizeof(*priv->plat->fpe_cfg));
-	}
-
 	return 0;
 }
 
-- 
cgit v1.2.3


From bc556d3edd0d3062b22fb5ce4d192650c4ddc2a6 Mon Sep 17 00:00:00 2001
From: Hoang Le <hoang.h.le@dektech.com.au>
Date: Fri, 26 Mar 2021 16:14:14 +0700
Subject: tipc: fix kernel-doc warnings

Fix kernel-doc warning introduced in
commit b83e214b2e04 ("tipc: add extack messages for bearer/media failure"):

net/tipc/bearer.c:248: warning: Function parameter or member 'extack' not described in 'tipc_enable_bearer'

Fixes: b83e214b2e04 ("tipc: add extack messages for bearer/media failure")
Signed-off-by: Hoang Le <hoang.h.le@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bearer.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 1090f21fcfac..d47e0b940ac9 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -240,6 +240,7 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest)
  * @disc_domain: bearer domain
  * @prio: bearer priority
  * @attr: nlattr array
+ * @extack: netlink extended ack
  */
 static int tipc_enable_bearer(struct net *net, const char *name,
 			      u32 disc_domain, u32 prio,
-- 
cgit v1.2.3


From 26440a63a1ac59e76bbe727dde8f89b7efef3e18 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 26 Mar 2021 18:13:48 +0800
Subject: net: llc: Correct some function names in header

Fix the following make W=1 kernel build warning:

 net/llc/llc_c_ev.c:622: warning: expecting prototype for conn_ev_qlfy_last_frame_eq_1(). Prototype was for llc_conn_ev_qlfy_last_frame_eq_1() instead
 net/llc/llc_c_ev.c:636: warning: expecting prototype for conn_ev_qlfy_last_frame_eq_0(). Prototype was for llc_conn_ev_qlfy_last_frame_eq_0() instead

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/llc_c_ev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/llc/llc_c_ev.c b/net/llc/llc_c_ev.c
index 523fdd1cf781..d6627a80cb45 100644
--- a/net/llc/llc_c_ev.c
+++ b/net/llc/llc_c_ev.c
@@ -608,7 +608,7 @@ int llc_conn_ev_qlfy_p_flag_eq_1(struct sock *sk, struct sk_buff *skb)
 }
 
 /**
- *	conn_ev_qlfy_last_frame_eq_1 - checks if frame is last in tx window
+ *	llc_conn_ev_qlfy_last_frame_eq_1 - checks if frame is last in tx window
  *	@sk: current connection structure.
  *	@skb: current event.
  *
@@ -624,7 +624,7 @@ int llc_conn_ev_qlfy_last_frame_eq_1(struct sock *sk, struct sk_buff *skb)
 }
 
 /**
- *	conn_ev_qlfy_last_frame_eq_0 - checks if frame isn't last in tx window
+ *	llc_conn_ev_qlfy_last_frame_eq_0 - checks if frame isn't last in tx window
  *	@sk: current connection structure.
  *	@skb: current event.
  *
-- 
cgit v1.2.3


From 8114f099d93729642f70fe4fc40f159e208acfc4 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 26 Mar 2021 18:13:49 +0800
Subject: net: llc: Correct function name llc_sap_action_unitdata_ind() in
 header

Fix the following make W=1 kernel build warning:

  net/llc/llc_s_ac.c:38: warning: expecting prototype for llc_sap_action_unit_data_ind(). Prototype was for llc_sap_action_unitdata_ind() instead

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/llc_s_ac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c
index 7ae4cc684d3a..b554f26c68ee 100644
--- a/net/llc/llc_s_ac.c
+++ b/net/llc/llc_s_ac.c
@@ -27,7 +27,7 @@
 
 
 /**
- *	llc_sap_action_unit_data_ind - forward UI PDU to network layer
+ *	llc_sap_action_unitdata_ind - forward UI PDU to network layer
  *	@sap: SAP
  *	@skb: the event to forward
  *
-- 
cgit v1.2.3


From 72e6afe6b4b3aee69b2dae1ac8b32efc503b48ab Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 26 Mar 2021 18:13:50 +0800
Subject: net: llc: Correct function name llc_pdu_set_pf_bit() in header

Fix the following make W=1 kernel build warning:

 net/llc/llc_pdu.c:36: warning: expecting prototype for pdu_set_pf_bit(). Prototype was for llc_pdu_set_pf_bit() instead

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/llc_pdu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/llc/llc_pdu.c b/net/llc/llc_pdu.c
index 792d195c8bae..63749dde542f 100644
--- a/net/llc/llc_pdu.c
+++ b/net/llc/llc_pdu.c
@@ -24,7 +24,7 @@ void llc_pdu_set_cmd_rsp(struct sk_buff *skb, u8 pdu_type)
 }
 
 /**
- *	pdu_set_pf_bit - sets poll/final bit in LLC header
+ *	llc_pdu_set_pf_bit - sets poll/final bit in LLC header
  *	@skb: Frame to set bit in
  *	@bit_value: poll/final bit (0 or 1).
  *
-- 
cgit v1.2.3


From a1281601f88e924a2e8c7572065d3e9fecf3c3fb Mon Sep 17 00:00:00 2001
From: Liu Jian <liujian56@huawei.com>
Date: Fri, 26 Mar 2021 20:31:38 +0800
Subject: farsync: use DEFINE_SPINLOCK() for spinlock

spinlock can be initialized automatically with DEFINE_SPINLOCK()
rather than explicitly calling spin_lock_init().

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Liu Jian <liujian56@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/farsync.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index 686a25d3b512..5de71e44fc5a 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -573,7 +573,7 @@ static DECLARE_TASKLET(fst_tx_task, fst_process_tx_work_q);
 static DECLARE_TASKLET(fst_int_task, fst_process_int_work_q);
 
 static struct fst_card_info *fst_card_array[FST_MAX_CARDS];
-static spinlock_t fst_work_q_lock;
+static DEFINE_SPINLOCK(fst_work_q_lock);
 static u64 fst_work_txq;
 static u64 fst_work_intq;
 
@@ -2648,7 +2648,6 @@ fst_init(void)
 
 	for (i = 0; i < FST_MAX_CARDS; i++)
 		fst_card_array[i] = NULL;
-	spin_lock_init(&fst_work_q_lock);
 	return pci_register_driver(&fst_driver);
 }
 
-- 
cgit v1.2.3


From e16301fbe1837c9594f9c1957c28fd1bb18fbd15 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 24 Mar 2021 18:51:30 -0700
Subject: bpf: Simplify freeing logic in linfo and jited_linfo

This patch simplifies the linfo freeing logic by combining
"bpf_prog_free_jited_linfo()" and "bpf_prog_free_unused_jited_linfo()"
into the new "bpf_prog_jit_attempt_done()".
It is a prep work for the kernel function call support.  In a later
patch, freeing the kernel function call descriptors will also
be done in the "bpf_prog_jit_attempt_done()".

"bpf_prog_free_linfo()" is removed since it is only called by
"__bpf_prog_put_noref()".  The kvfree() are directly called
instead.

It also takes this chance to s/kcalloc/kvcalloc/ for the jited_linfo
allocation.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015130.1544323-1-kafai@fb.com
---
 include/linux/filter.h |  3 +--
 kernel/bpf/core.c      | 35 ++++++++++++-----------------------
 kernel/bpf/syscall.c   |  3 ++-
 kernel/bpf/verifier.c  |  4 ++--
 4 files changed, 17 insertions(+), 28 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index b2b85b2cad8e..0d9c710eb050 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -877,8 +877,7 @@ void bpf_prog_free_linfo(struct bpf_prog *prog);
 void bpf_prog_fill_jited_linfo(struct bpf_prog *prog,
 			       const u32 *insn_to_jit_off);
 int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog);
-void bpf_prog_free_jited_linfo(struct bpf_prog *prog);
-void bpf_prog_free_unused_jited_linfo(struct bpf_prog *prog);
+void bpf_prog_jit_attempt_done(struct bpf_prog *prog);
 
 struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags);
 struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 75244ecb2389..a35eb3d7b126 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -143,25 +143,22 @@ int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog)
 	if (!prog->aux->nr_linfo || !prog->jit_requested)
 		return 0;
 
-	prog->aux->jited_linfo = kcalloc(prog->aux->nr_linfo,
-					 sizeof(*prog->aux->jited_linfo),
-					 GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
+	prog->aux->jited_linfo = kvcalloc(prog->aux->nr_linfo,
+					  sizeof(*prog->aux->jited_linfo),
+					  GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
 	if (!prog->aux->jited_linfo)
 		return -ENOMEM;
 
 	return 0;
 }
 
-void bpf_prog_free_jited_linfo(struct bpf_prog *prog)
+void bpf_prog_jit_attempt_done(struct bpf_prog *prog)
 {
-	kfree(prog->aux->jited_linfo);
-	prog->aux->jited_linfo = NULL;
-}
-
-void bpf_prog_free_unused_jited_linfo(struct bpf_prog *prog)
-{
-	if (prog->aux->jited_linfo && !prog->aux->jited_linfo[0])
-		bpf_prog_free_jited_linfo(prog);
+	if (prog->aux->jited_linfo &&
+	    (!prog->jited || !prog->aux->jited_linfo[0])) {
+		kvfree(prog->aux->jited_linfo);
+		prog->aux->jited_linfo = NULL;
+	}
 }
 
 /* The jit engine is responsible to provide an array
@@ -217,12 +214,6 @@ void bpf_prog_fill_jited_linfo(struct bpf_prog *prog,
 			insn_to_jit_off[linfo[i].insn_off - insn_start - 1];
 }
 
-void bpf_prog_free_linfo(struct bpf_prog *prog)
-{
-	bpf_prog_free_jited_linfo(prog);
-	kvfree(prog->aux->linfo);
-}
-
 struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
 				  gfp_t gfp_extra_flags)
 {
@@ -1866,15 +1857,13 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 			return fp;
 
 		fp = bpf_int_jit_compile(fp);
-		if (!fp->jited) {
-			bpf_prog_free_jited_linfo(fp);
+		bpf_prog_jit_attempt_done(fp);
 #ifdef CONFIG_BPF_JIT_ALWAYS_ON
+		if (!fp->jited) {
 			*err = -ENOTSUPP;
 			return fp;
-#endif
-		} else {
-			bpf_prog_free_unused_jited_linfo(fp);
 		}
+#endif
 	} else {
 		*err = bpf_prog_offload_compile(fp);
 		if (*err)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 250503482cda..eaf85bf51c5a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1694,7 +1694,8 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
 {
 	bpf_prog_kallsyms_del_all(prog);
 	btf_put(prog->aux->btf);
-	bpf_prog_free_linfo(prog);
+	kvfree(prog->aux->jited_linfo);
+	kvfree(prog->aux->linfo);
 	if (prog->aux->attach_btf)
 		btf_put(prog->aux->attach_btf);
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 85f9f842d15c..b7df3f06a279 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -11741,7 +11741,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 	prog->bpf_func = func[0]->bpf_func;
 	prog->aux->func = func;
 	prog->aux->func_cnt = env->subprog_cnt;
-	bpf_prog_free_unused_jited_linfo(prog);
+	bpf_prog_jit_attempt_done(prog);
 	return 0;
 out_free:
 	for (i = 0; i < env->subprog_cnt; i++) {
@@ -11764,7 +11764,7 @@ out_undo_insn:
 		insn->off = 0;
 		insn->imm = env->insn_aux_data[i].call_imm;
 	}
-	bpf_prog_free_jited_linfo(prog);
+	bpf_prog_jit_attempt_done(prog);
 	return err;
 }
 
-- 
cgit v1.2.3


From 34747c4120418143097d4343312a0ca96c986d86 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 24 Mar 2021 18:51:36 -0700
Subject: bpf: Refactor btf_check_func_arg_match

This patch moved the subprog specific logic from
btf_check_func_arg_match() to the new btf_check_subprog_arg_match().
The core logic is left in btf_check_func_arg_match() which
will be reused later to check the kernel function call.

The "if (!btf_type_is_ptr(t))" is checked first to improve the
indentation which will be useful for a later patch.

Some of the "btf_kind_str[]" usages is replaced with the shortcut
"btf_type_str(t)".

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015136.1544504-1-kafai@fb.com
---
 include/linux/bpf.h   |   4 +-
 include/linux/btf.h   |   5 ++
 kernel/bpf/btf.c      | 159 +++++++++++++++++++++++++++-----------------------
 kernel/bpf/verifier.c |   4 +-
 4 files changed, 95 insertions(+), 77 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5a0801b420ca..eaae618a90b5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1545,8 +1545,8 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
 			   struct btf_func_model *m);
 
 struct bpf_reg_state;
-int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
-			     struct bpf_reg_state *regs);
+int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
+				struct bpf_reg_state *regs);
 int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
 			  struct bpf_reg_state *reg);
 int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog,
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 9c1b52738bbe..8a05687a4ee2 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -141,6 +141,11 @@ static inline bool btf_type_is_enum(const struct btf_type *t)
 	return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM;
 }
 
+static inline bool btf_type_is_scalar(const struct btf_type *t)
+{
+	return btf_type_is_int(t) || btf_type_is_enum(t);
+}
+
 static inline bool btf_type_is_typedef(const struct btf_type *t)
 {
 	return BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF;
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 369faeddf1df..3c489adacf3b 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -4377,7 +4377,7 @@ static u8 bpf_ctx_convert_map[] = {
 #undef BPF_LINK_TYPE
 
 static const struct btf_member *
-btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf,
+btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
 		      const struct btf_type *t, enum bpf_prog_type prog_type,
 		      int arg)
 {
@@ -5362,122 +5362,135 @@ int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *pr
 	return btf_check_func_type_match(log, btf1, t1, btf2, t2);
 }
 
-/* Compare BTF of a function with given bpf_reg_state.
- * Returns:
- * EFAULT - there is a verifier bug. Abort verification.
- * EINVAL - there is a type mismatch or BTF is not available.
- * 0 - BTF matches with what bpf_reg_state expects.
- * Only PTR_TO_CTX and SCALAR_VALUE states are recognized.
- */
-int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
-			     struct bpf_reg_state *regs)
+static int btf_check_func_arg_match(struct bpf_verifier_env *env,
+				    const struct btf *btf, u32 func_id,
+				    struct bpf_reg_state *regs,
+				    bool ptr_to_mem_ok)
 {
 	struct bpf_verifier_log *log = &env->log;
-	struct bpf_prog *prog = env->prog;
-	struct btf *btf = prog->aux->btf;
-	const struct btf_param *args;
+	const char *func_name, *ref_tname;
 	const struct btf_type *t, *ref_t;
-	u32 i, nargs, btf_id, type_size;
-	const char *tname;
-	bool is_global;
-
-	if (!prog->aux->func_info)
-		return -EINVAL;
-
-	btf_id = prog->aux->func_info[subprog].type_id;
-	if (!btf_id)
-		return -EFAULT;
-
-	if (prog->aux->func_info_aux[subprog].unreliable)
-		return -EINVAL;
+	const struct btf_param *args;
+	u32 i, nargs;
 
-	t = btf_type_by_id(btf, btf_id);
+	t = btf_type_by_id(btf, func_id);
 	if (!t || !btf_type_is_func(t)) {
 		/* These checks were already done by the verifier while loading
 		 * struct bpf_func_info
 		 */
-		bpf_log(log, "BTF of func#%d doesn't point to KIND_FUNC\n",
-			subprog);
+		bpf_log(log, "BTF of func_id %u doesn't point to KIND_FUNC\n",
+			func_id);
 		return -EFAULT;
 	}
-	tname = btf_name_by_offset(btf, t->name_off);
+	func_name = btf_name_by_offset(btf, t->name_off);
 
 	t = btf_type_by_id(btf, t->type);
 	if (!t || !btf_type_is_func_proto(t)) {
-		bpf_log(log, "Invalid BTF of func %s\n", tname);
+		bpf_log(log, "Invalid BTF of func %s\n", func_name);
 		return -EFAULT;
 	}
 	args = (const struct btf_param *)(t + 1);
 	nargs = btf_type_vlen(t);
 	if (nargs > MAX_BPF_FUNC_REG_ARGS) {
-		bpf_log(log, "Function %s has %d > %d args\n", tname, nargs,
+		bpf_log(log, "Function %s has %d > %d args\n", func_name, nargs,
 			MAX_BPF_FUNC_REG_ARGS);
-		goto out;
+		return -EINVAL;
 	}
 
-	is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
 	/* check that BTF function arguments match actual types that the
 	 * verifier sees.
 	 */
 	for (i = 0; i < nargs; i++) {
-		struct bpf_reg_state *reg = &regs[i + 1];
+		u32 regno = i + 1;
+		struct bpf_reg_state *reg = &regs[regno];
 
-		t = btf_type_by_id(btf, args[i].type);
-		while (btf_type_is_modifier(t))
-			t = btf_type_by_id(btf, t->type);
-		if (btf_type_is_int(t) || btf_type_is_enum(t)) {
+		t = btf_type_skip_modifiers(btf, args[i].type, NULL);
+		if (btf_type_is_scalar(t)) {
 			if (reg->type == SCALAR_VALUE)
 				continue;
-			bpf_log(log, "R%d is not a scalar\n", i + 1);
-			goto out;
+			bpf_log(log, "R%d is not a scalar\n", regno);
+			return -EINVAL;
 		}
-		if (btf_type_is_ptr(t)) {
+
+		if (!btf_type_is_ptr(t)) {
+			bpf_log(log, "Unrecognized arg#%d type %s\n",
+				i, btf_type_str(t));
+			return -EINVAL;
+		}
+
+		ref_t = btf_type_skip_modifiers(btf, t->type, NULL);
+		ref_tname = btf_name_by_offset(btf, ref_t->name_off);
+		if (btf_get_prog_ctx_type(log, btf, t, env->prog->type, i)) {
 			/* If function expects ctx type in BTF check that caller
 			 * is passing PTR_TO_CTX.
 			 */
-			if (btf_get_prog_ctx_type(log, btf, t, prog->type, i)) {
-				if (reg->type != PTR_TO_CTX) {
-					bpf_log(log,
-						"arg#%d expected pointer to ctx, but got %s\n",
-						i, btf_kind_str[BTF_INFO_KIND(t->info)]);
-					goto out;
-				}
-				if (check_ctx_reg(env, reg, i + 1))
-					goto out;
-				continue;
+			if (reg->type != PTR_TO_CTX) {
+				bpf_log(log,
+					"arg#%d expected pointer to ctx, but got %s\n",
+					i, btf_type_str(t));
+				return -EINVAL;
 			}
+			if (check_ctx_reg(env, reg, regno))
+				return -EINVAL;
+		} else if (ptr_to_mem_ok) {
+			const struct btf_type *resolve_ret;
+			u32 type_size;
 
-			if (!is_global)
-				goto out;
-
-			t = btf_type_skip_modifiers(btf, t->type, NULL);
-
-			ref_t = btf_resolve_size(btf, t, &type_size);
-			if (IS_ERR(ref_t)) {
+			resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
+			if (IS_ERR(resolve_ret)) {
 				bpf_log(log,
-				    "arg#%d reference type('%s %s') size cannot be determined: %ld\n",
-				    i, btf_type_str(t), btf_name_by_offset(btf, t->name_off),
-					PTR_ERR(ref_t));
-				goto out;
+					"arg#%d reference type('%s %s') size cannot be determined: %ld\n",
+					i, btf_type_str(ref_t), ref_tname,
+					PTR_ERR(resolve_ret));
+				return -EINVAL;
 			}
 
-			if (check_mem_reg(env, reg, i + 1, type_size))
-				goto out;
-
-			continue;
+			if (check_mem_reg(env, reg, regno, type_size))
+				return -EINVAL;
+		} else {
+			return -EINVAL;
 		}
-		bpf_log(log, "Unrecognized arg#%d type %s\n",
-			i, btf_kind_str[BTF_INFO_KIND(t->info)]);
-		goto out;
 	}
+
 	return 0;
-out:
+}
+
+/* Compare BTF of a function with given bpf_reg_state.
+ * Returns:
+ * EFAULT - there is a verifier bug. Abort verification.
+ * EINVAL - there is a type mismatch or BTF is not available.
+ * 0 - BTF matches with what bpf_reg_state expects.
+ * Only PTR_TO_CTX and SCALAR_VALUE states are recognized.
+ */
+int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
+				struct bpf_reg_state *regs)
+{
+	struct bpf_prog *prog = env->prog;
+	struct btf *btf = prog->aux->btf;
+	bool is_global;
+	u32 btf_id;
+	int err;
+
+	if (!prog->aux->func_info)
+		return -EINVAL;
+
+	btf_id = prog->aux->func_info[subprog].type_id;
+	if (!btf_id)
+		return -EFAULT;
+
+	if (prog->aux->func_info_aux[subprog].unreliable)
+		return -EINVAL;
+
+	is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
+	err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global);
+
 	/* Compiler optimizations can remove arguments from static functions
 	 * or mismatched type can be passed into a global function.
 	 * In such cases mark the function as unreliable from BTF point of view.
 	 */
-	prog->aux->func_info_aux[subprog].unreliable = true;
-	return -EINVAL;
+	if (err)
+		prog->aux->func_info_aux[subprog].unreliable = true;
+	return err;
 }
 
 /* Convert BTF of a function into bpf_reg_state if possible
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b7df3f06a279..b31e62daafbd 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5365,7 +5365,7 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 	func_info_aux = env->prog->aux->func_info_aux;
 	if (func_info_aux)
 		is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
-	err = btf_check_func_arg_match(env, subprog, caller->regs);
+	err = btf_check_subprog_arg_match(env, subprog, caller->regs);
 	if (err == -EFAULT)
 		return err;
 	if (is_global) {
@@ -12288,7 +12288,7 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
 		/* 1st arg to a function */
 		regs[BPF_REG_1].type = PTR_TO_CTX;
 		mark_reg_known_zero(env, regs, BPF_REG_1);
-		ret = btf_check_func_arg_match(env, subprog, regs);
+		ret = btf_check_subprog_arg_match(env, subprog, regs);
 		if (ret == -EFAULT)
 			/* unlikely verifier bug. abort.
 			 * ret == 0 and ret < 0 are sadly acceptable for
-- 
cgit v1.2.3


From e6ac2450d6dee3121cd8bbf2907b78a68a8a353d Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 24 Mar 2021 18:51:42 -0700
Subject: bpf: Support bpf program calling kernel function

This patch adds support to BPF verifier to allow bpf program calling
kernel function directly.

The use case included in this set is to allow bpf-tcp-cc to directly
call some tcp-cc helper functions (e.g. "tcp_cong_avoid_ai()").  Those
functions have already been used by some kernel tcp-cc implementations.

This set will also allow the bpf-tcp-cc program to directly call the
kernel tcp-cc implementation,  For example, a bpf_dctcp may only want to
implement its own dctcp_cwnd_event() and reuse other dctcp_*() directly
from the kernel tcp_dctcp.c instead of reimplementing (or
copy-and-pasting) them.

The tcp-cc kernel functions mentioned above will be white listed
for the struct_ops bpf-tcp-cc programs to use in a later patch.
The white listed functions are not bounded to a fixed ABI contract.
Those functions have already been used by the existing kernel tcp-cc.
If any of them has changed, both in-tree and out-of-tree kernel tcp-cc
implementations have to be changed.  The same goes for the struct_ops
bpf-tcp-cc programs which have to be adjusted accordingly.

This patch is to make the required changes in the bpf verifier.

First change is in btf.c, it adds a case in "btf_check_func_arg_match()".
When the passed in "btf->kernel_btf == true", it means matching the
verifier regs' states with a kernel function.  This will handle the
PTR_TO_BTF_ID reg.  It also maps PTR_TO_SOCK_COMMON, PTR_TO_SOCKET,
and PTR_TO_TCP_SOCK to its kernel's btf_id.

In the later libbpf patch, the insn calling a kernel function will
look like:

insn->code == (BPF_JMP | BPF_CALL)
insn->src_reg == BPF_PSEUDO_KFUNC_CALL /* <- new in this patch */
insn->imm == func_btf_id /* btf_id of the running kernel */

[ For the future calling function-in-kernel-module support, an array
  of module btf_fds can be passed at the load time and insn->off
  can be used to index into this array. ]

At the early stage of verifier, the verifier will collect all kernel
function calls into "struct bpf_kfunc_desc".  Those
descriptors are stored in "prog->aux->kfunc_tab" and will
be available to the JIT.  Since this "add" operation is similar
to the current "add_subprog()" and looking for the same insn->code,
they are done together in the new "add_subprog_and_kfunc()".

In the "do_check()" stage, the new "check_kfunc_call()" is added
to verify the kernel function call instruction:
1. Ensure the kernel function can be used by a particular BPF_PROG_TYPE.
   A new bpf_verifier_ops "check_kfunc_call" is added to do that.
   The bpf-tcp-cc struct_ops program will implement this function in
   a later patch.
2. Call "btf_check_kfunc_args_match()" to ensure the regs can be
   used as the args of a kernel function.
3. Mark the regs' type, subreg_def, and zext_dst.

At the later do_misc_fixups() stage, the new fixup_kfunc_call()
will replace the insn->imm with the function address (relative
to __bpf_call_base).  If needed, the jit can find the btf_func_model
by calling the new bpf_jit_find_kfunc_model(prog, insn).
With the imm set to the function address, "bpftool prog dump xlated"
will be able to display the kernel function calls the same way as
it displays other bpf helper calls.

gpl_compatible program is required to call kernel function.

This feature currently requires JIT.

The verifier selftests are adjusted because of the changes in
the verbose log in add_subprog_and_kfunc().

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015142.1544736-1-kafai@fb.com
---
 arch/x86/net/bpf_jit_comp.c                      |   5 +
 include/linux/bpf.h                              |  24 ++
 include/linux/btf.h                              |   1 +
 include/linux/filter.h                           |   1 +
 include/uapi/linux/bpf.h                         |   4 +
 kernel/bpf/btf.c                                 |  65 +++-
 kernel/bpf/core.c                                |  18 +-
 kernel/bpf/disasm.c                              |  13 +-
 kernel/bpf/syscall.c                             |   1 +
 kernel/bpf/verifier.c                            | 368 +++++++++++++++++++++--
 tools/include/uapi/linux/bpf.h                   |   4 +
 tools/testing/selftests/bpf/verifier/calls.c     |  12 +-
 tools/testing/selftests/bpf/verifier/dead_code.c |  10 +-
 13 files changed, 480 insertions(+), 46 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index b35fc8023884..9eead60f0301 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -2346,3 +2346,8 @@ out:
 					   tmp : orig_prog);
 	return prog;
 }
+
+bool bpf_jit_supports_kfunc_call(void)
+{
+	return true;
+}
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index eaae618a90b5..b5b7967e3ff3 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -427,6 +427,7 @@ enum bpf_reg_type {
 	PTR_TO_PERCPU_BTF_ID,	 /* reg points to a percpu kernel variable */
 	PTR_TO_FUNC,		 /* reg points to a bpf program function */
 	PTR_TO_MAP_KEY,		 /* reg points to a map element key */
+	__BPF_REG_TYPE_MAX,
 };
 
 /* The information passed from prog-specific *_is_valid_access
@@ -480,6 +481,7 @@ struct bpf_verifier_ops {
 				 const struct btf_type *t, int off, int size,
 				 enum bpf_access_type atype,
 				 u32 *next_btf_id);
+	bool (*check_kfunc_call)(u32 kfunc_btf_id);
 };
 
 struct bpf_prog_offload_ops {
@@ -796,6 +798,8 @@ struct btf_mod_pair {
 	struct module *module;
 };
 
+struct bpf_kfunc_desc_tab;
+
 struct bpf_prog_aux {
 	atomic64_t refcnt;
 	u32 used_map_cnt;
@@ -832,6 +836,7 @@ struct bpf_prog_aux {
 	struct bpf_prog **func;
 	void *jit_data; /* JIT specific data. arch dependent */
 	struct bpf_jit_poke_descriptor *poke_tab;
+	struct bpf_kfunc_desc_tab *kfunc_tab;
 	u32 size_poke_tab;
 	struct bpf_ksym ksym;
 	const struct bpf_prog_ops *ops;
@@ -1547,6 +1552,9 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
 struct bpf_reg_state;
 int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
 				struct bpf_reg_state *regs);
+int btf_check_kfunc_arg_match(struct bpf_verifier_env *env,
+			      const struct btf *btf, u32 func_id,
+			      struct bpf_reg_state *regs);
 int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
 			  struct bpf_reg_state *reg);
 int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog,
@@ -1557,6 +1565,10 @@ struct bpf_link *bpf_link_by_id(u32 id);
 
 const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
 void bpf_task_storage_free(struct task_struct *task);
+bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog);
+const struct btf_func_model *
+bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
+			 const struct bpf_insn *insn);
 #else /* !CONFIG_BPF_SYSCALL */
 static inline struct bpf_prog *bpf_prog_get(u32 ufd)
 {
@@ -1737,6 +1749,18 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 static inline void bpf_task_storage_free(struct task_struct *task)
 {
 }
+
+static inline bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
+{
+	return false;
+}
+
+static inline const struct btf_func_model *
+bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
+			 const struct bpf_insn *insn)
+{
+	return NULL;
+}
 #endif /* CONFIG_BPF_SYSCALL */
 
 void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 8a05687a4ee2..3bac66e0183a 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -110,6 +110,7 @@ const struct btf_type *btf_type_resolve_func_ptr(const struct btf *btf,
 const struct btf_type *
 btf_resolve_size(const struct btf *btf, const struct btf_type *type,
 		 u32 *type_size);
+const char *btf_type_str(const struct btf_type *t);
 
 #define for_each_member(i, struct_type, member)			\
 	for (i = 0, member = btf_type_member(struct_type);	\
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 0d9c710eb050..eecfd82db648 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -918,6 +918,7 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
 void bpf_jit_compile(struct bpf_prog *prog);
 bool bpf_jit_needs_zext(void);
+bool bpf_jit_supports_kfunc_call(void);
 bool bpf_helper_changes_pkt_data(void *func);
 
 static inline bool bpf_dump_raw_ok(const struct cred *cred)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 008edc1dc8c1..598716742593 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1117,6 +1117,10 @@ enum bpf_link_type {
  * offset to another bpf function
  */
 #define BPF_PSEUDO_CALL		1
+/* when bpf_call->src_reg == BPF_PSEUDO_KFUNC_CALL,
+ * bpf_call->imm == btf_id of a BTF_KIND_FUNC in the running kernel
+ */
+#define BPF_PSEUDO_KFUNC_CALL	2
 
 /* flags for BPF_MAP_UPDATE_ELEM command */
 enum {
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 3c489adacf3b..ec8afc4bc560 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -283,7 +283,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
 	[BTF_KIND_FLOAT]	= "FLOAT",
 };
 
-static const char *btf_type_str(const struct btf_type *t)
+const char *btf_type_str(const struct btf_type *t)
 {
 	return btf_kind_str[BTF_INFO_KIND(t->info)];
 }
@@ -5362,6 +5362,14 @@ int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *pr
 	return btf_check_func_type_match(log, btf1, t1, btf2, t2);
 }
 
+static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
+#ifdef CONFIG_NET
+	[PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
+	[PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
+	[PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
+#endif
+};
+
 static int btf_check_func_arg_match(struct bpf_verifier_env *env,
 				    const struct btf *btf, u32 func_id,
 				    struct bpf_reg_state *regs,
@@ -5371,12 +5379,12 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
 	const char *func_name, *ref_tname;
 	const struct btf_type *t, *ref_t;
 	const struct btf_param *args;
-	u32 i, nargs;
+	u32 i, nargs, ref_id;
 
 	t = btf_type_by_id(btf, func_id);
 	if (!t || !btf_type_is_func(t)) {
 		/* These checks were already done by the verifier while loading
-		 * struct bpf_func_info
+		 * struct bpf_func_info or in add_kfunc_call().
 		 */
 		bpf_log(log, "BTF of func_id %u doesn't point to KIND_FUNC\n",
 			func_id);
@@ -5418,9 +5426,49 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
 			return -EINVAL;
 		}
 
-		ref_t = btf_type_skip_modifiers(btf, t->type, NULL);
+		ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
 		ref_tname = btf_name_by_offset(btf, ref_t->name_off);
-		if (btf_get_prog_ctx_type(log, btf, t, env->prog->type, i)) {
+		if (btf_is_kernel(btf)) {
+			const struct btf_type *reg_ref_t;
+			const struct btf *reg_btf;
+			const char *reg_ref_tname;
+			u32 reg_ref_id;
+
+			if (!btf_type_is_struct(ref_t)) {
+				bpf_log(log, "kernel function %s args#%d pointer type %s %s is not supported\n",
+					func_name, i, btf_type_str(ref_t),
+					ref_tname);
+				return -EINVAL;
+			}
+
+			if (reg->type == PTR_TO_BTF_ID) {
+				reg_btf = reg->btf;
+				reg_ref_id = reg->btf_id;
+			} else if (reg2btf_ids[reg->type]) {
+				reg_btf = btf_vmlinux;
+				reg_ref_id = *reg2btf_ids[reg->type];
+			} else {
+				bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d is not a pointer to btf_id\n",
+					func_name, i,
+					btf_type_str(ref_t), ref_tname, regno);
+				return -EINVAL;
+			}
+
+			reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id,
+							    &reg_ref_id);
+			reg_ref_tname = btf_name_by_offset(reg_btf,
+							   reg_ref_t->name_off);
+			if (!btf_struct_ids_match(log, reg_btf, reg_ref_id,
+						  reg->off, btf, ref_id)) {
+				bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
+					func_name, i,
+					btf_type_str(ref_t), ref_tname,
+					regno, btf_type_str(reg_ref_t),
+					reg_ref_tname);
+				return -EINVAL;
+			}
+		} else if (btf_get_prog_ctx_type(log, btf, t,
+						 env->prog->type, i)) {
 			/* If function expects ctx type in BTF check that caller
 			 * is passing PTR_TO_CTX.
 			 */
@@ -5493,6 +5541,13 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
 	return err;
 }
 
+int btf_check_kfunc_arg_match(struct bpf_verifier_env *env,
+			      const struct btf *btf, u32 func_id,
+			      struct bpf_reg_state *regs)
+{
+	return btf_check_func_arg_match(env, btf, func_id, regs, false);
+}
+
 /* Convert BTF of a function into bpf_reg_state if possible
  * Returns:
  * EFAULT - there is a verifier bug. Abort verification.
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index a35eb3d7b126..f5423251c118 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -159,6 +159,9 @@ void bpf_prog_jit_attempt_done(struct bpf_prog *prog)
 		kvfree(prog->aux->jited_linfo);
 		prog->aux->jited_linfo = NULL;
 	}
+
+	kfree(prog->aux->kfunc_tab);
+	prog->aux->kfunc_tab = NULL;
 }
 
 /* The jit engine is responsible to provide an array
@@ -1840,9 +1843,15 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 	/* In case of BPF to BPF calls, verifier did all the prep
 	 * work with regards to JITing, etc.
 	 */
+	bool jit_needed = false;
+
 	if (fp->bpf_func)
 		goto finalize;
 
+	if (IS_ENABLED(CONFIG_BPF_JIT_ALWAYS_ON) ||
+	    bpf_prog_has_kfunc_call(fp))
+		jit_needed = true;
+
 	bpf_prog_select_func(fp);
 
 	/* eBPF JITs can rewrite the program in case constant
@@ -1858,12 +1867,10 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 
 		fp = bpf_int_jit_compile(fp);
 		bpf_prog_jit_attempt_done(fp);
-#ifdef CONFIG_BPF_JIT_ALWAYS_ON
-		if (!fp->jited) {
+		if (!fp->jited && jit_needed) {
 			*err = -ENOTSUPP;
 			return fp;
 		}
-#endif
 	} else {
 		*err = bpf_prog_offload_compile(fp);
 		if (*err)
@@ -2343,6 +2350,11 @@ bool __weak bpf_jit_needs_zext(void)
 	return false;
 }
 
+bool __weak bpf_jit_supports_kfunc_call(void)
+{
+	return false;
+}
+
 /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
  * skb_copy_bits(), so provide a weak definition of it for NET-less config.
  */
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index 3acc7e0b6916..dad821c8ecd0 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -19,16 +19,23 @@ static const char *__func_get_name(const struct bpf_insn_cbs *cbs,
 {
 	BUILD_BUG_ON(ARRAY_SIZE(func_id_str) != __BPF_FUNC_MAX_ID);
 
-	if (insn->src_reg != BPF_PSEUDO_CALL &&
+	if (!insn->src_reg &&
 	    insn->imm >= 0 && insn->imm < __BPF_FUNC_MAX_ID &&
 	    func_id_str[insn->imm])
 		return func_id_str[insn->imm];
 
-	if (cbs && cbs->cb_call)
-		return cbs->cb_call(cbs->private_data, insn);
+	if (cbs && cbs->cb_call) {
+		const char *res;
+
+		res = cbs->cb_call(cbs->private_data, insn);
+		if (res)
+			return res;
+	}
 
 	if (insn->src_reg == BPF_PSEUDO_CALL)
 		snprintf(buff, len, "%+d", insn->imm);
+	else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
+		snprintf(buff, len, "kernel-function");
 
 	return buff;
 }
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index eaf85bf51c5a..9603de81811a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1696,6 +1696,7 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
 	btf_put(prog->aux->btf);
 	kvfree(prog->aux->jited_linfo);
 	kvfree(prog->aux->linfo);
+	kfree(prog->aux->kfunc_tab);
 	if (prog->aux->attach_btf)
 		btf_put(prog->aux->attach_btf);
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b31e62daafbd..852541a435ef 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -234,6 +234,12 @@ static bool bpf_pseudo_call(const struct bpf_insn *insn)
 	       insn->src_reg == BPF_PSEUDO_CALL;
 }
 
+static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
+{
+	return insn->code == (BPF_JMP | BPF_CALL) &&
+	       insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
+}
+
 static bool bpf_pseudo_func(const struct bpf_insn *insn)
 {
 	return insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
@@ -1554,47 +1560,205 @@ static int add_subprog(struct bpf_verifier_env *env, int off)
 		verbose(env, "too many subprograms\n");
 		return -E2BIG;
 	}
+	/* determine subprog starts. The end is one before the next starts */
 	env->subprog_info[env->subprog_cnt++].start = off;
 	sort(env->subprog_info, env->subprog_cnt,
 	     sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
 	return env->subprog_cnt - 1;
 }
 
-static int check_subprogs(struct bpf_verifier_env *env)
+struct bpf_kfunc_desc {
+	struct btf_func_model func_model;
+	u32 func_id;
+	s32 imm;
+};
+
+#define MAX_KFUNC_DESCS 256
+struct bpf_kfunc_desc_tab {
+	struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
+	u32 nr_descs;
+};
+
+static int kfunc_desc_cmp_by_id(const void *a, const void *b)
+{
+	const struct bpf_kfunc_desc *d0 = a;
+	const struct bpf_kfunc_desc *d1 = b;
+
+	/* func_id is not greater than BTF_MAX_TYPE */
+	return d0->func_id - d1->func_id;
+}
+
+static const struct bpf_kfunc_desc *
+find_kfunc_desc(const struct bpf_prog *prog, u32 func_id)
+{
+	struct bpf_kfunc_desc desc = {
+		.func_id = func_id,
+	};
+	struct bpf_kfunc_desc_tab *tab;
+
+	tab = prog->aux->kfunc_tab;
+	return bsearch(&desc, tab->descs, tab->nr_descs,
+		       sizeof(tab->descs[0]), kfunc_desc_cmp_by_id);
+}
+
+static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id)
+{
+	const struct btf_type *func, *func_proto;
+	struct bpf_kfunc_desc_tab *tab;
+	struct bpf_prog_aux *prog_aux;
+	struct bpf_kfunc_desc *desc;
+	const char *func_name;
+	unsigned long addr;
+	int err;
+
+	prog_aux = env->prog->aux;
+	tab = prog_aux->kfunc_tab;
+	if (!tab) {
+		if (!btf_vmlinux) {
+			verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
+			return -ENOTSUPP;
+		}
+
+		if (!env->prog->jit_requested) {
+			verbose(env, "JIT is required for calling kernel function\n");
+			return -ENOTSUPP;
+		}
+
+		if (!bpf_jit_supports_kfunc_call()) {
+			verbose(env, "JIT does not support calling kernel function\n");
+			return -ENOTSUPP;
+		}
+
+		if (!env->prog->gpl_compatible) {
+			verbose(env, "cannot call kernel function from non-GPL compatible program\n");
+			return -EINVAL;
+		}
+
+		tab = kzalloc(sizeof(*tab), GFP_KERNEL);
+		if (!tab)
+			return -ENOMEM;
+		prog_aux->kfunc_tab = tab;
+	}
+
+	if (find_kfunc_desc(env->prog, func_id))
+		return 0;
+
+	if (tab->nr_descs == MAX_KFUNC_DESCS) {
+		verbose(env, "too many different kernel function calls\n");
+		return -E2BIG;
+	}
+
+	func = btf_type_by_id(btf_vmlinux, func_id);
+	if (!func || !btf_type_is_func(func)) {
+		verbose(env, "kernel btf_id %u is not a function\n",
+			func_id);
+		return -EINVAL;
+	}
+	func_proto = btf_type_by_id(btf_vmlinux, func->type);
+	if (!func_proto || !btf_type_is_func_proto(func_proto)) {
+		verbose(env, "kernel function btf_id %u does not have a valid func_proto\n",
+			func_id);
+		return -EINVAL;
+	}
+
+	func_name = btf_name_by_offset(btf_vmlinux, func->name_off);
+	addr = kallsyms_lookup_name(func_name);
+	if (!addr) {
+		verbose(env, "cannot find address for kernel function %s\n",
+			func_name);
+		return -EINVAL;
+	}
+
+	desc = &tab->descs[tab->nr_descs++];
+	desc->func_id = func_id;
+	desc->imm = BPF_CAST_CALL(addr) - __bpf_call_base;
+	err = btf_distill_func_proto(&env->log, btf_vmlinux,
+				     func_proto, func_name,
+				     &desc->func_model);
+	if (!err)
+		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
+		     kfunc_desc_cmp_by_id, NULL);
+	return err;
+}
+
+static int kfunc_desc_cmp_by_imm(const void *a, const void *b)
+{
+	const struct bpf_kfunc_desc *d0 = a;
+	const struct bpf_kfunc_desc *d1 = b;
+
+	if (d0->imm > d1->imm)
+		return 1;
+	else if (d0->imm < d1->imm)
+		return -1;
+	return 0;
+}
+
+static void sort_kfunc_descs_by_imm(struct bpf_prog *prog)
+{
+	struct bpf_kfunc_desc_tab *tab;
+
+	tab = prog->aux->kfunc_tab;
+	if (!tab)
+		return;
+
+	sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
+	     kfunc_desc_cmp_by_imm, NULL);
+}
+
+bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
+{
+	return !!prog->aux->kfunc_tab;
+}
+
+const struct btf_func_model *
+bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
+			 const struct bpf_insn *insn)
+{
+	const struct bpf_kfunc_desc desc = {
+		.imm = insn->imm,
+	};
+	const struct bpf_kfunc_desc *res;
+	struct bpf_kfunc_desc_tab *tab;
+
+	tab = prog->aux->kfunc_tab;
+	res = bsearch(&desc, tab->descs, tab->nr_descs,
+		      sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm);
+
+	return res ? &res->func_model : NULL;
+}
+
+static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
 {
-	int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
 	struct bpf_subprog_info *subprog = env->subprog_info;
 	struct bpf_insn *insn = env->prog->insnsi;
-	int insn_cnt = env->prog->len;
+	int i, ret, insn_cnt = env->prog->len;
 
 	/* Add entry function. */
 	ret = add_subprog(env, 0);
-	if (ret < 0)
+	if (ret)
 		return ret;
 
-	/* determine subprog starts. The end is one before the next starts */
-	for (i = 0; i < insn_cnt; i++) {
-		if (bpf_pseudo_func(insn + i)) {
-			if (!env->bpf_capable) {
-				verbose(env,
-					"function pointers are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
-				return -EPERM;
-			}
-			ret = add_subprog(env, i + insn[i].imm + 1);
-			if (ret < 0)
-				return ret;
-			/* remember subprog */
-			insn[i + 1].imm = ret;
-			continue;
-		}
-		if (!bpf_pseudo_call(insn + i))
+	for (i = 0; i < insn_cnt; i++, insn++) {
+		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
+		    !bpf_pseudo_kfunc_call(insn))
 			continue;
+
 		if (!env->bpf_capable) {
-			verbose(env,
-				"function calls to other bpf functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
+			verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
 			return -EPERM;
 		}
-		ret = add_subprog(env, i + insn[i].imm + 1);
+
+		if (bpf_pseudo_func(insn)) {
+			ret = add_subprog(env, i + insn->imm + 1);
+			if (ret >= 0)
+				/* remember subprog */
+				insn[1].imm = ret;
+		} else if (bpf_pseudo_call(insn)) {
+			ret = add_subprog(env, i + insn->imm + 1);
+		} else {
+			ret = add_kfunc_call(env, insn->imm);
+		}
+
 		if (ret < 0)
 			return ret;
 	}
@@ -1608,6 +1772,16 @@ static int check_subprogs(struct bpf_verifier_env *env)
 		for (i = 0; i < env->subprog_cnt; i++)
 			verbose(env, "func#%d @%d\n", i, subprog[i].start);
 
+	return 0;
+}
+
+static int check_subprogs(struct bpf_verifier_env *env)
+{
+	int i, subprog_start, subprog_end, off, cur_subprog = 0;
+	struct bpf_subprog_info *subprog = env->subprog_info;
+	struct bpf_insn *insn = env->prog->insnsi;
+	int insn_cnt = env->prog->len;
+
 	/* now check that all jumps are within the same subprog */
 	subprog_start = subprog[cur_subprog].start;
 	subprog_end = subprog[cur_subprog + 1].start;
@@ -1916,6 +2090,17 @@ static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
 	return i;
 }
 
+static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
+{
+	const struct btf_type *func;
+
+	if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
+		return NULL;
+
+	func = btf_type_by_id(btf_vmlinux, insn->imm);
+	return btf_name_by_offset(btf_vmlinux, func->name_off);
+}
+
 /* For given verifier state backtrack_insn() is called from the last insn to
  * the first insn. Its purpose is to compute a bitmask of registers and
  * stack slots that needs precision in the parent verifier state.
@@ -1924,6 +2109,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
 			  u32 *reg_mask, u64 *stack_mask)
 {
 	const struct bpf_insn_cbs cbs = {
+		.cb_call	= disasm_kfunc_name,
 		.cb_print	= verbose,
 		.private_data	= env,
 	};
@@ -5960,6 +6146,98 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 	return 0;
 }
 
+/* mark_btf_func_reg_size() is used when the reg size is determined by
+ * the BTF func_proto's return value size and argument.
+ */
+static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
+				   size_t reg_size)
+{
+	struct bpf_reg_state *reg = &cur_regs(env)[regno];
+
+	if (regno == BPF_REG_0) {
+		/* Function return value */
+		reg->live |= REG_LIVE_WRITTEN;
+		reg->subreg_def = reg_size == sizeof(u64) ?
+			DEF_NOT_SUBREG : env->insn_idx + 1;
+	} else {
+		/* Function argument */
+		if (reg_size == sizeof(u64)) {
+			mark_insn_zext(env, reg);
+			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
+		} else {
+			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32);
+		}
+	}
+}
+
+static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
+{
+	const struct btf_type *t, *func, *func_proto, *ptr_type;
+	struct bpf_reg_state *regs = cur_regs(env);
+	const char *func_name, *ptr_type_name;
+	u32 i, nargs, func_id, ptr_type_id;
+	const struct btf_param *args;
+	int err;
+
+	func_id = insn->imm;
+	func = btf_type_by_id(btf_vmlinux, func_id);
+	func_name = btf_name_by_offset(btf_vmlinux, func->name_off);
+	func_proto = btf_type_by_id(btf_vmlinux, func->type);
+
+	if (!env->ops->check_kfunc_call ||
+	    !env->ops->check_kfunc_call(func_id)) {
+		verbose(env, "calling kernel function %s is not allowed\n",
+			func_name);
+		return -EACCES;
+	}
+
+	/* Check the arguments */
+	err = btf_check_kfunc_arg_match(env, btf_vmlinux, func_id, regs);
+	if (err)
+		return err;
+
+	for (i = 0; i < CALLER_SAVED_REGS; i++)
+		mark_reg_not_init(env, regs, caller_saved[i]);
+
+	/* Check return type */
+	t = btf_type_skip_modifiers(btf_vmlinux, func_proto->type, NULL);
+	if (btf_type_is_scalar(t)) {
+		mark_reg_unknown(env, regs, BPF_REG_0);
+		mark_btf_func_reg_size(env, BPF_REG_0, t->size);
+	} else if (btf_type_is_ptr(t)) {
+		ptr_type = btf_type_skip_modifiers(btf_vmlinux, t->type,
+						   &ptr_type_id);
+		if (!btf_type_is_struct(ptr_type)) {
+			ptr_type_name = btf_name_by_offset(btf_vmlinux,
+							   ptr_type->name_off);
+			verbose(env, "kernel function %s returns pointer type %s %s is not supported\n",
+				func_name, btf_type_str(ptr_type),
+				ptr_type_name);
+			return -EINVAL;
+		}
+		mark_reg_known_zero(env, regs, BPF_REG_0);
+		regs[BPF_REG_0].btf = btf_vmlinux;
+		regs[BPF_REG_0].type = PTR_TO_BTF_ID;
+		regs[BPF_REG_0].btf_id = ptr_type_id;
+		mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
+	} /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
+
+	nargs = btf_type_vlen(func_proto);
+	args = (const struct btf_param *)(func_proto + 1);
+	for (i = 0; i < nargs; i++) {
+		u32 regno = i + 1;
+
+		t = btf_type_skip_modifiers(btf_vmlinux, args[i].type, NULL);
+		if (btf_type_is_ptr(t))
+			mark_btf_func_reg_size(env, regno, sizeof(void *));
+		else
+			/* scalar. ensured by btf_check_kfunc_arg_match() */
+			mark_btf_func_reg_size(env, regno, t->size);
+	}
+
+	return 0;
+}
+
 static bool signed_add_overflows(s64 a, s64 b)
 {
 	/* Do the add in u64, where overflow is well-defined */
@@ -10162,6 +10440,7 @@ static int do_check(struct bpf_verifier_env *env)
 
 		if (env->log.level & BPF_LOG_LEVEL) {
 			const struct bpf_insn_cbs cbs = {
+				.cb_call	= disasm_kfunc_name,
 				.cb_print	= verbose,
 				.private_data	= env,
 			};
@@ -10309,7 +10588,8 @@ static int do_check(struct bpf_verifier_env *env)
 				if (BPF_SRC(insn->code) != BPF_K ||
 				    insn->off != 0 ||
 				    (insn->src_reg != BPF_REG_0 &&
-				     insn->src_reg != BPF_PSEUDO_CALL) ||
+				     insn->src_reg != BPF_PSEUDO_CALL &&
+				     insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
 				    insn->dst_reg != BPF_REG_0 ||
 				    class == BPF_JMP32) {
 					verbose(env, "BPF_CALL uses reserved fields\n");
@@ -10324,6 +10604,8 @@ static int do_check(struct bpf_verifier_env *env)
 				}
 				if (insn->src_reg == BPF_PSEUDO_CALL)
 					err = check_func_call(env, insn, &env->insn_idx);
+				else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
+					err = check_kfunc_call(env, insn);
 				else
 					err = check_helper_call(env, insn, &env->insn_idx);
 				if (err)
@@ -11634,6 +11916,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 		func[i]->aux->name[0] = 'F';
 		func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
 		func[i]->jit_requested = 1;
+		func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
 		func[i]->aux->linfo = prog->aux->linfo;
 		func[i]->aux->nr_linfo = prog->aux->nr_linfo;
 		func[i]->aux->jited_linfo = prog->aux->jited_linfo;
@@ -11773,6 +12056,7 @@ static int fixup_call_args(struct bpf_verifier_env *env)
 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
 	struct bpf_prog *prog = env->prog;
 	struct bpf_insn *insn = prog->insnsi;
+	bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
 	int i, depth;
 #endif
 	int err = 0;
@@ -11786,6 +12070,10 @@ static int fixup_call_args(struct bpf_verifier_env *env)
 			return err;
 	}
 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
+	if (has_kfunc_call) {
+		verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
+		return -EINVAL;
+	}
 	if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
 		/* When JIT fails the progs with bpf2bpf calls and tail_calls
 		 * have to be rejected, since interpreter doesn't support them yet.
@@ -11814,6 +12102,26 @@ static int fixup_call_args(struct bpf_verifier_env *env)
 	return err;
 }
 
+static int fixup_kfunc_call(struct bpf_verifier_env *env,
+			    struct bpf_insn *insn)
+{
+	const struct bpf_kfunc_desc *desc;
+
+	/* insn->imm has the btf func_id. Replace it with
+	 * an address (relative to __bpf_base_call).
+	 */
+	desc = find_kfunc_desc(env->prog, insn->imm);
+	if (!desc) {
+		verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n",
+			insn->imm);
+		return -EFAULT;
+	}
+
+	insn->imm = desc->imm;
+
+	return 0;
+}
+
 /* Do various post-verification rewrites in a single program pass.
  * These rewrites simplify JIT and interpreter implementations.
  */
@@ -11949,6 +12257,12 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
 			continue;
 		if (insn->src_reg == BPF_PSEUDO_CALL)
 			continue;
+		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
+			ret = fixup_kfunc_call(env, insn);
+			if (ret)
+				return ret;
+			continue;
+		}
 
 		if (insn->imm == BPF_FUNC_get_route_realm)
 			prog->dst_needed = 1;
@@ -12178,6 +12492,8 @@ patch_call_imm:
 		}
 	}
 
+	sort_kfunc_descs_by_imm(env->prog);
+
 	return 0;
 }
 
@@ -12883,6 +13199,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
 	if (!env->explored_states)
 		goto skip_full_check;
 
+	ret = add_subprog_and_kfunc(env);
+	if (ret < 0)
+		goto skip_full_check;
+
 	ret = check_subprogs(env);
 	if (ret < 0)
 		goto skip_full_check;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 2d3036e292a9..ab9f2233607c 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1117,6 +1117,10 @@ enum bpf_link_type {
  * offset to another bpf function
  */
 #define BPF_PSEUDO_CALL		1
+/* when bpf_call->src_reg == BPF_PSEUDO_KFUNC_CALL,
+ * bpf_call->imm == btf_id of a BTF_KIND_FUNC in the running kernel
+ */
+#define BPF_PSEUDO_KFUNC_CALL	2
 
 /* flags for BPF_MAP_UPDATE_ELEM command */
 enum {
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index eb888c8479c3..336a749673d1 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -19,7 +19,7 @@
 	BPF_MOV64_IMM(BPF_REG_0, 2),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "function calls to other bpf functions are allowed for",
+	.errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 1,
@@ -136,7 +136,7 @@
 {
 	"calls: wrong src reg",
 	.insns = {
-	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 3, 0, 0),
 	BPF_MOV64_IMM(BPF_REG_0, 1),
 	BPF_EXIT_INSN(),
 	},
@@ -397,7 +397,7 @@
 	BPF_MOV64_IMM(BPF_REG_0, 1),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "function calls to other bpf functions are allowed for",
+	.errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
 	.fixup_map_hash_48b = { 3 },
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
@@ -1977,7 +1977,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
-	.errstr_unpriv = "function calls to other bpf functions are allowed for",
+	.errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 },
@@ -2003,7 +2003,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
-	.errstr_unpriv = "function calls to other bpf functions are allowed for",
+	.errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
 	.errstr = "!read_ok",
 	.result = REJECT,
 },
@@ -2028,7 +2028,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
-	.errstr_unpriv = "function calls to other bpf functions are allowed for",
+	.errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
 	.errstr = "!read_ok",
 	.result = REJECT,
 },
diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c
index 5cf361d8eb1c..17fe33a75034 100644
--- a/tools/testing/selftests/bpf/verifier/dead_code.c
+++ b/tools/testing/selftests/bpf/verifier/dead_code.c
@@ -85,7 +85,7 @@
 	BPF_MOV64_IMM(BPF_REG_0, 12),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "function calls to other bpf functions are allowed for",
+	.errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 7,
@@ -103,7 +103,7 @@
 	BPF_MOV64_IMM(BPF_REG_0, 12),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "function calls to other bpf functions are allowed for",
+	.errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 7,
@@ -121,7 +121,7 @@
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -5),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "function calls to other bpf functions are allowed for",
+	.errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 7,
@@ -137,7 +137,7 @@
 	BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "function calls to other bpf functions are allowed for",
+	.errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
@@ -152,7 +152,7 @@
 	BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "function calls to other bpf functions are allowed for",
+	.errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
-- 
cgit v1.2.3


From 797b84f727bce9c64ea2c85899c1ba283df54c16 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 24 Mar 2021 18:51:49 -0700
Subject: bpf: Support kernel function call in x86-32

This patch adds kernel function call support to the x86-32 bpf jit.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015149.1545267-1-kafai@fb.com
---
 arch/x86/net/bpf_jit_comp32.c | 198 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 198 insertions(+)

diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index d17b67c69f89..0a7a2870f111 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -1390,6 +1390,19 @@ static inline void emit_push_r64(const u8 src[], u8 **pprog)
 	*pprog = prog;
 }
 
+static void emit_push_r32(const u8 src[], u8 **pprog)
+{
+	u8 *prog = *pprog;
+	int cnt = 0;
+
+	/* mov ecx,dword ptr [ebp+off] */
+	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo));
+	/* push ecx */
+	EMIT1(0x51);
+
+	*pprog = prog;
+}
+
 static u8 get_cond_jmp_opcode(const u8 op, bool is_cmp_lo)
 {
 	u8 jmp_cond;
@@ -1459,6 +1472,174 @@ static u8 get_cond_jmp_opcode(const u8 op, bool is_cmp_lo)
 	return jmp_cond;
 }
 
+/* i386 kernel compiles with "-mregparm=3".  From gcc document:
+ *
+ * ==== snippet ====
+ * regparm (number)
+ *	On x86-32 targets, the regparm attribute causes the compiler
+ *	to pass arguments number one to (number) if they are of integral
+ *	type in registers EAX, EDX, and ECX instead of on the stack.
+ *	Functions that take a variable number of arguments continue
+ *	to be passed all of their arguments on the stack.
+ * ==== snippet ====
+ *
+ * The first three args of a function will be considered for
+ * putting into the 32bit register EAX, EDX, and ECX.
+ *
+ * Two 32bit registers are used to pass a 64bit arg.
+ *
+ * For example,
+ * void foo(u32 a, u32 b, u32 c, u32 d):
+ *	u32 a: EAX
+ *	u32 b: EDX
+ *	u32 c: ECX
+ *	u32 d: stack
+ *
+ * void foo(u64 a, u32 b, u32 c):
+ *	u64 a: EAX (lo32) EDX (hi32)
+ *	u32 b: ECX
+ *	u32 c: stack
+ *
+ * void foo(u32 a, u64 b, u32 c):
+ *	u32 a: EAX
+ *	u64 b: EDX (lo32) ECX (hi32)
+ *	u32 c: stack
+ *
+ * void foo(u32 a, u32 b, u64 c):
+ *	u32 a: EAX
+ *	u32 b: EDX
+ *	u64 c: stack
+ *
+ * The return value will be stored in the EAX (and EDX for 64bit value).
+ *
+ * For example,
+ * u32 foo(u32 a, u32 b, u32 c):
+ *	return value: EAX
+ *
+ * u64 foo(u32 a, u32 b, u32 c):
+ *	return value: EAX (lo32) EDX (hi32)
+ *
+ * Notes:
+ *	The verifier only accepts function having integer and pointers
+ *	as its args and return value, so it does not have
+ *	struct-by-value.
+ *
+ * emit_kfunc_call() finds out the btf_func_model by calling
+ * bpf_jit_find_kfunc_model().  A btf_func_model
+ * has the details about the number of args, size of each arg,
+ * and the size of the return value.
+ *
+ * It first decides how many args can be passed by EAX, EDX, and ECX.
+ * That will decide what args should be pushed to the stack:
+ * [first_stack_regno, last_stack_regno] are the bpf regnos
+ * that should be pushed to the stack.
+ *
+ * It will first push all args to the stack because the push
+ * will need to use ECX.  Then, it moves
+ * [BPF_REG_1, first_stack_regno) to EAX, EDX, and ECX.
+ *
+ * When emitting a call (0xE8), it needs to figure out
+ * the jmp_offset relative to the jit-insn address immediately
+ * following the call (0xE8) instruction.  At this point, it knows
+ * the end of the jit-insn address after completely translated the
+ * current (BPF_JMP | BPF_CALL) bpf-insn.  It is passed as "end_addr"
+ * to the emit_kfunc_call().  Thus, it can learn the "immediate-follow-call"
+ * address by figuring out how many jit-insn is generated between
+ * the call (0xE8) and the end_addr:
+ *	- 0-1 jit-insn (3 bytes each) to restore the esp pointer if there
+ *	  is arg pushed to the stack.
+ *	- 0-2 jit-insns (3 bytes each) to handle the return value.
+ */
+static int emit_kfunc_call(const struct bpf_prog *bpf_prog, u8 *end_addr,
+			   const struct bpf_insn *insn, u8 **pprog)
+{
+	const u8 arg_regs[] = { IA32_EAX, IA32_EDX, IA32_ECX };
+	int i, cnt = 0, first_stack_regno, last_stack_regno;
+	int free_arg_regs = ARRAY_SIZE(arg_regs);
+	const struct btf_func_model *fm;
+	int bytes_in_stack = 0;
+	const u8 *cur_arg_reg;
+	u8 *prog = *pprog;
+	s64 jmp_offset;
+
+	fm = bpf_jit_find_kfunc_model(bpf_prog, insn);
+	if (!fm)
+		return -EINVAL;
+
+	first_stack_regno = BPF_REG_1;
+	for (i = 0; i < fm->nr_args; i++) {
+		int regs_needed = fm->arg_size[i] > sizeof(u32) ? 2 : 1;
+
+		if (regs_needed > free_arg_regs)
+			break;
+
+		free_arg_regs -= regs_needed;
+		first_stack_regno++;
+	}
+
+	/* Push the args to the stack */
+	last_stack_regno = BPF_REG_0 + fm->nr_args;
+	for (i = last_stack_regno; i >= first_stack_regno; i--) {
+		if (fm->arg_size[i - 1] > sizeof(u32)) {
+			emit_push_r64(bpf2ia32[i], &prog);
+			bytes_in_stack += 8;
+		} else {
+			emit_push_r32(bpf2ia32[i], &prog);
+			bytes_in_stack += 4;
+		}
+	}
+
+	cur_arg_reg = &arg_regs[0];
+	for (i = BPF_REG_1; i < first_stack_regno; i++) {
+		/* mov e[adc]x,dword ptr [ebp+off] */
+		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, *cur_arg_reg++),
+		      STACK_VAR(bpf2ia32[i][0]));
+		if (fm->arg_size[i - 1] > sizeof(u32))
+			/* mov e[adc]x,dword ptr [ebp+off] */
+			EMIT3(0x8B, add_2reg(0x40, IA32_EBP, *cur_arg_reg++),
+			      STACK_VAR(bpf2ia32[i][1]));
+	}
+
+	if (bytes_in_stack)
+		/* add esp,"bytes_in_stack" */
+		end_addr -= 3;
+
+	/* mov dword ptr [ebp+off],edx */
+	if (fm->ret_size > sizeof(u32))
+		end_addr -= 3;
+
+	/* mov dword ptr [ebp+off],eax */
+	if (fm->ret_size)
+		end_addr -= 3;
+
+	jmp_offset = (u8 *)__bpf_call_base + insn->imm - end_addr;
+	if (!is_simm32(jmp_offset)) {
+		pr_err("unsupported BPF kernel function jmp_offset:%lld\n",
+		       jmp_offset);
+		return -EINVAL;
+	}
+
+	EMIT1_off32(0xE8, jmp_offset);
+
+	if (fm->ret_size)
+		/* mov dword ptr [ebp+off],eax */
+		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
+		      STACK_VAR(bpf2ia32[BPF_REG_0][0]));
+
+	if (fm->ret_size > sizeof(u32))
+		/* mov dword ptr [ebp+off],edx */
+		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
+		      STACK_VAR(bpf2ia32[BPF_REG_0][1]));
+
+	if (bytes_in_stack)
+		/* add esp,"bytes_in_stack" */
+		EMIT3(0x83, add_1reg(0xC0, IA32_ESP), bytes_in_stack);
+
+	*pprog = prog;
+
+	return 0;
+}
+
 static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		  int oldproglen, struct jit_context *ctx)
 {
@@ -1888,6 +2069,18 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			if (insn->src_reg == BPF_PSEUDO_CALL)
 				goto notyet;
 
+			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
+				int err;
+
+				err = emit_kfunc_call(bpf_prog,
+						      image + addrs[i],
+						      insn, &prog);
+
+				if (err)
+					return err;
+				break;
+			}
+
 			func = (u8 *) __bpf_call_base + imm32;
 			jmp_offset = func - (image + addrs[i]);
 
@@ -2393,3 +2586,8 @@ out:
 					   tmp : orig_prog);
 	return prog;
 }
+
+bool bpf_jit_supports_kfunc_call(void)
+{
+	return true;
+}
-- 
cgit v1.2.3


From d22f6ad18709e93622b6115ec9a5e42ed96b5d82 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 24 Mar 2021 18:51:55 -0700
Subject: tcp: Rename bictcp function prefix to cubictcp

The cubic functions in tcp_cubic.c are using the bictcp prefix as
in tcp_bic.c.  This patch gives it the proper name cubictcp
because the later patch will allow the bpf prog to directly
call the cubictcp implementation.  Renaming them will avoid
the name collision when trying to find the intended
one to call during bpf prog load time.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015155.1545532-1-kafai@fb.com
---
 net/ipv4/tcp_cubic.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index ffcbe46dacdb..4a30deaa9a37 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -124,7 +124,7 @@ static inline void bictcp_hystart_reset(struct sock *sk)
 	ca->sample_cnt = 0;
 }
 
-static void bictcp_init(struct sock *sk)
+static void cubictcp_init(struct sock *sk)
 {
 	struct bictcp *ca = inet_csk_ca(sk);
 
@@ -137,7 +137,7 @@ static void bictcp_init(struct sock *sk)
 		tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
 }
 
-static void bictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+static void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event)
 {
 	if (event == CA_EVENT_TX_START) {
 		struct bictcp *ca = inet_csk_ca(sk);
@@ -319,7 +319,7 @@ tcp_friendliness:
 	ca->cnt = max(ca->cnt, 2U);
 }
 
-static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+static void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bictcp *ca = inet_csk_ca(sk);
@@ -338,7 +338,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 	tcp_cong_avoid_ai(tp, ca->cnt, acked);
 }
 
-static u32 bictcp_recalc_ssthresh(struct sock *sk)
+static u32 cubictcp_recalc_ssthresh(struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct bictcp *ca = inet_csk_ca(sk);
@@ -355,7 +355,7 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk)
 	return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
 }
 
-static void bictcp_state(struct sock *sk, u8 new_state)
+static void cubictcp_state(struct sock *sk, u8 new_state)
 {
 	if (new_state == TCP_CA_Loss) {
 		bictcp_reset(inet_csk_ca(sk));
@@ -442,7 +442,7 @@ static void hystart_update(struct sock *sk, u32 delay)
 	}
 }
 
-static void bictcp_acked(struct sock *sk, const struct ack_sample *sample)
+static void cubictcp_acked(struct sock *sk, const struct ack_sample *sample)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct bictcp *ca = inet_csk_ca(sk);
@@ -471,13 +471,13 @@ static void bictcp_acked(struct sock *sk, const struct ack_sample *sample)
 }
 
 static struct tcp_congestion_ops cubictcp __read_mostly = {
-	.init		= bictcp_init,
-	.ssthresh	= bictcp_recalc_ssthresh,
-	.cong_avoid	= bictcp_cong_avoid,
-	.set_state	= bictcp_state,
+	.init		= cubictcp_init,
+	.ssthresh	= cubictcp_recalc_ssthresh,
+	.cong_avoid	= cubictcp_cong_avoid,
+	.set_state	= cubictcp_state,
 	.undo_cwnd	= tcp_reno_undo_cwnd,
-	.cwnd_event	= bictcp_cwnd_event,
-	.pkts_acked     = bictcp_acked,
+	.cwnd_event	= cubictcp_cwnd_event,
+	.pkts_acked     = cubictcp_acked,
 	.owner		= THIS_MODULE,
 	.name		= "cubic",
 };
-- 
cgit v1.2.3


From e78aea8b2170be1b88c96a4d138422986a737336 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 24 Mar 2021 18:52:01 -0700
Subject: bpf: tcp: Put some tcp cong functions in allowlist for bpf-tcp-cc

This patch puts some tcp cong helper functions, tcp_slow_start()
and tcp_cong_avoid_ai(), into the allowlist for the bpf-tcp-cc
program.

A few tcp cc implementation functions are also put into the
allowlist.  A potential use case is the bpf-tcp-cc implementation
may only want to override a subset of a tcp_congestion_ops.  For others,
the bpf-tcp-cc can directly call the kernel counter parts instead of
re-implementing (or copy-and-pasting) them to the bpf program.

They will only be available to the bpf-tcp-cc typed program.
The allowlist functions are not bounded to a fixed ABI contract.
When any of them has changed, the bpf-tcp-cc program has to be changed
like any in-tree/out-of-tree kernel tcp-cc implementations do also.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015201.1546345-1-kafai@fb.com
---
 net/ipv4/bpf_tcp_ca.c | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index d520e61649c8..40520b77a307 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -5,6 +5,7 @@
 #include <linux/bpf_verifier.h>
 #include <linux/bpf.h>
 #include <linux/btf.h>
+#include <linux/btf_ids.h>
 #include <linux/filter.h>
 #include <net/tcp.h>
 #include <net/bpf_sk_storage.h>
@@ -178,10 +179,50 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
 	}
 }
 
+BTF_SET_START(bpf_tcp_ca_kfunc_ids)
+BTF_ID(func, tcp_reno_ssthresh)
+BTF_ID(func, tcp_reno_cong_avoid)
+BTF_ID(func, tcp_reno_undo_cwnd)
+BTF_ID(func, tcp_slow_start)
+BTF_ID(func, tcp_cong_avoid_ai)
+#if IS_BUILTIN(CONFIG_TCP_CONG_CUBIC)
+BTF_ID(func, cubictcp_init)
+BTF_ID(func, cubictcp_recalc_ssthresh)
+BTF_ID(func, cubictcp_cong_avoid)
+BTF_ID(func, cubictcp_state)
+BTF_ID(func, cubictcp_cwnd_event)
+BTF_ID(func, cubictcp_acked)
+#endif
+#if IS_BUILTIN(CONFIG_TCP_CONG_DCTCP)
+BTF_ID(func, dctcp_init)
+BTF_ID(func, dctcp_update_alpha)
+BTF_ID(func, dctcp_cwnd_event)
+BTF_ID(func, dctcp_ssthresh)
+BTF_ID(func, dctcp_cwnd_undo)
+BTF_ID(func, dctcp_state)
+#endif
+#if IS_BUILTIN(CONFIG_TCP_CONG_BBR)
+BTF_ID(func, bbr_init)
+BTF_ID(func, bbr_main)
+BTF_ID(func, bbr_sndbuf_expand)
+BTF_ID(func, bbr_undo_cwnd)
+BTF_ID(func, bbr_cwnd_even),
+BTF_ID(func, bbr_ssthresh)
+BTF_ID(func, bbr_min_tso_segs)
+BTF_ID(func, bbr_set_state)
+#endif
+BTF_SET_END(bpf_tcp_ca_kfunc_ids)
+
+static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id)
+{
+	return btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id);
+}
+
 static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = {
 	.get_func_proto		= bpf_tcp_ca_get_func_proto,
 	.is_valid_access	= bpf_tcp_ca_is_valid_access,
 	.btf_struct_access	= bpf_tcp_ca_btf_struct_access,
+	.check_kfunc_call	= bpf_tcp_ca_check_kfunc_call,
 };
 
 static int bpf_tcp_ca_init_member(const struct btf_type *t,
-- 
cgit v1.2.3


From 933d1aa32409ef4209c8065ee4ede68236659cd2 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 24 Mar 2021 18:52:07 -0700
Subject: libbpf: Refactor bpf_object__resolve_ksyms_btf_id

This patch refactors most of the logic from
bpf_object__resolve_ksyms_btf_id() into a new function
bpf_object__resolve_ksym_var_btf_id().
It is to get ready for a later patch adding
bpf_object__resolve_ksym_func_btf_id() which resolves
a kernel function to the running kernel btf_id.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015207.1546749-1-kafai@fb.com
---
 tools/lib/bpf/libbpf.c | 124 ++++++++++++++++++++++++++-----------------------
 1 file changed, 67 insertions(+), 57 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index cebb0e852cf8..e289155510ca 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -7395,75 +7395,85 @@ out:
 	return err;
 }
 
-static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
+static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
+					       struct extern_desc *ext)
 {
-	struct extern_desc *ext;
+	const struct btf_type *targ_var, *targ_type;
+	__u32 targ_type_id, local_type_id;
+	const char *targ_var_name;
+	int i, id, btf_fd, err;
 	struct btf *btf;
-	int i, j, id, btf_fd, err;
 
-	for (i = 0; i < obj->nr_extern; i++) {
-		const struct btf_type *targ_var, *targ_type;
-		__u32 targ_type_id, local_type_id;
-		const char *targ_var_name;
-		int ret;
+	btf = obj->btf_vmlinux;
+	btf_fd = 0;
+	id = btf__find_by_name_kind(btf, ext->name, BTF_KIND_VAR);
+	if (id == -ENOENT) {
+		err = load_module_btfs(obj);
+		if (err)
+			return err;
 
-		ext = &obj->externs[i];
-		if (ext->type != EXT_KSYM || !ext->ksym.type_id)
-			continue;
+		for (i = 0; i < obj->btf_module_cnt; i++) {
+			btf = obj->btf_modules[i].btf;
+			/* we assume module BTF FD is always >0 */
+			btf_fd = obj->btf_modules[i].fd;
+			id = btf__find_by_name_kind(btf, ext->name, BTF_KIND_VAR);
+			if (id != -ENOENT)
+				break;
+		}
+	}
+	if (id <= 0) {
+		pr_warn("extern (var ksym) '%s': failed to find BTF ID in kernel BTF(s).\n",
+			ext->name);
+		return -ESRCH;
+	}
 
-		btf = obj->btf_vmlinux;
-		btf_fd = 0;
-		id = btf__find_by_name_kind(btf, ext->name, BTF_KIND_VAR);
-		if (id == -ENOENT) {
-			err = load_module_btfs(obj);
-			if (err)
-				return err;
+	/* find local type_id */
+	local_type_id = ext->ksym.type_id;
 
-			for (j = 0; j < obj->btf_module_cnt; j++) {
-				btf = obj->btf_modules[j].btf;
-				/* we assume module BTF FD is always >0 */
-				btf_fd = obj->btf_modules[j].fd;
-				id = btf__find_by_name_kind(btf, ext->name, BTF_KIND_VAR);
-				if (id != -ENOENT)
-					break;
-			}
-		}
-		if (id <= 0) {
-			pr_warn("extern (ksym) '%s': failed to find BTF ID in kernel BTF(s).\n",
-				ext->name);
-			return -ESRCH;
-		}
+	/* find target type_id */
+	targ_var = btf__type_by_id(btf, id);
+	targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
+	targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
 
-		/* find local type_id */
-		local_type_id = ext->ksym.type_id;
+	err = bpf_core_types_are_compat(obj->btf, local_type_id,
+					btf, targ_type_id);
+	if (err <= 0) {
+		const struct btf_type *local_type;
+		const char *targ_name, *local_name;
 
-		/* find target type_id */
-		targ_var = btf__type_by_id(btf, id);
-		targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
-		targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
+		local_type = btf__type_by_id(obj->btf, local_type_id);
+		local_name = btf__name_by_offset(obj->btf, local_type->name_off);
+		targ_name = btf__name_by_offset(btf, targ_type->name_off);
 
-		ret = bpf_core_types_are_compat(obj->btf, local_type_id,
-						btf, targ_type_id);
-		if (ret <= 0) {
-			const struct btf_type *local_type;
-			const char *targ_name, *local_name;
+		pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
+			ext->name, local_type_id,
+			btf_kind_str(local_type), local_name, targ_type_id,
+			btf_kind_str(targ_type), targ_name);
+		return -EINVAL;
+	}
 
-			local_type = btf__type_by_id(obj->btf, local_type_id);
-			local_name = btf__name_by_offset(obj->btf, local_type->name_off);
-			targ_name = btf__name_by_offset(btf, targ_type->name_off);
+	ext->is_set = true;
+	ext->ksym.kernel_btf_obj_fd = btf_fd;
+	ext->ksym.kernel_btf_id = id;
+	pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n",
+		 ext->name, id, btf_kind_str(targ_var), targ_var_name);
 
-			pr_warn("extern (ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
-				ext->name, local_type_id,
-				btf_kind_str(local_type), local_name, targ_type_id,
-				btf_kind_str(targ_type), targ_name);
-			return -EINVAL;
-		}
+	return 0;
+}
+
+static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
+{
+	struct extern_desc *ext;
+	int i, err;
+
+	for (i = 0; i < obj->nr_extern; i++) {
+		ext = &obj->externs[i];
+		if (ext->type != EXT_KSYM || !ext->ksym.type_id)
+			continue;
 
-		ext->is_set = true;
-		ext->ksym.kernel_btf_obj_fd = btf_fd;
-		ext->ksym.kernel_btf_id = id;
-		pr_debug("extern (ksym) '%s': resolved to [%d] %s %s\n",
-			 ext->name, id, btf_kind_str(targ_var), targ_var_name);
+		err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
+		if (err)
+			return err;
 	}
 	return 0;
 }
-- 
cgit v1.2.3


From 774e132e83d0f10a7ebbfe7db1debdaed6013f83 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 24 Mar 2021 18:52:14 -0700
Subject: libbpf: Refactor codes for finding btf id of a kernel symbol

This patch refactors code, that finds kernel btf_id by kind
and symbol name, to a new function find_ksym_btf_id().

It also adds a new helper __btf_kind_str() to return
a string by the numeric kind value.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015214.1547069-1-kafai@fb.com
---
 tools/lib/bpf/libbpf.c | 44 +++++++++++++++++++++++++++++++++-----------
 1 file changed, 33 insertions(+), 11 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e289155510ca..2f5595ae0b84 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1921,9 +1921,9 @@ resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
 	return btf_is_func_proto(t) ? t : NULL;
 }
 
-static const char *btf_kind_str(const struct btf_type *t)
+static const char *__btf_kind_str(__u16 kind)
 {
-	switch (btf_kind(t)) {
+	switch (kind) {
 	case BTF_KIND_UNKN: return "void";
 	case BTF_KIND_INT: return "int";
 	case BTF_KIND_PTR: return "ptr";
@@ -1945,6 +1945,11 @@ static const char *btf_kind_str(const struct btf_type *t)
 	}
 }
 
+static const char *btf_kind_str(const struct btf_type *t)
+{
+	return __btf_kind_str(btf_kind(t));
+}
+
 /*
  * Fetch integer attribute of BTF map definition. Such attributes are
  * represented using a pointer to an array, in which dimensionality of array
@@ -7395,18 +7400,17 @@ out:
 	return err;
 }
 
-static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
-					       struct extern_desc *ext)
+static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
+			    __u16 kind, struct btf **res_btf,
+			    int *res_btf_fd)
 {
-	const struct btf_type *targ_var, *targ_type;
-	__u32 targ_type_id, local_type_id;
-	const char *targ_var_name;
 	int i, id, btf_fd, err;
 	struct btf *btf;
 
 	btf = obj->btf_vmlinux;
 	btf_fd = 0;
-	id = btf__find_by_name_kind(btf, ext->name, BTF_KIND_VAR);
+	id = btf__find_by_name_kind(btf, ksym_name, kind);
+
 	if (id == -ENOENT) {
 		err = load_module_btfs(obj);
 		if (err)
@@ -7416,17 +7420,35 @@ static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
 			btf = obj->btf_modules[i].btf;
 			/* we assume module BTF FD is always >0 */
 			btf_fd = obj->btf_modules[i].fd;
-			id = btf__find_by_name_kind(btf, ext->name, BTF_KIND_VAR);
+			id = btf__find_by_name_kind(btf, ksym_name, kind);
 			if (id != -ENOENT)
 				break;
 		}
 	}
 	if (id <= 0) {
-		pr_warn("extern (var ksym) '%s': failed to find BTF ID in kernel BTF(s).\n",
-			ext->name);
+		pr_warn("extern (%s ksym) '%s': failed to find BTF ID in kernel BTF(s).\n",
+			__btf_kind_str(kind), ksym_name);
 		return -ESRCH;
 	}
 
+	*res_btf = btf;
+	*res_btf_fd = btf_fd;
+	return id;
+}
+
+static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
+					       struct extern_desc *ext)
+{
+	const struct btf_type *targ_var, *targ_type;
+	__u32 targ_type_id, local_type_id;
+	const char *targ_var_name;
+	int id, btf_fd = 0, err;
+	struct btf *btf = NULL;
+
+	id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &btf_fd);
+	if (id < 0)
+		return id;
+
 	/* find local type_id */
 	local_type_id = ext->ksym.type_id;
 
-- 
cgit v1.2.3


From 0c091e5c2d37696589a3e0131a809b5499899995 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 24 Mar 2021 18:52:21 -0700
Subject: libbpf: Rename RELO_EXTERN to RELO_EXTERN_VAR

This patch renames RELO_EXTERN to RELO_EXTERN_VAR.
It is to avoid the confusion with a later patch adding
RELO_EXTERN_FUNC.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015221.1547722-1-kafai@fb.com
---
 tools/lib/bpf/libbpf.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 2f5595ae0b84..0ac53dce37b7 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -185,7 +185,7 @@ enum reloc_type {
 	RELO_LD64,
 	RELO_CALL,
 	RELO_DATA,
-	RELO_EXTERN,
+	RELO_EXTERN_VAR,
 	RELO_SUBPROG_ADDR,
 };
 
@@ -3455,7 +3455,7 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
 		}
 		pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
 			 prog->name, i, ext->name, ext->sym_idx, insn_idx);
-		reloc_desc->type = RELO_EXTERN;
+		reloc_desc->type = RELO_EXTERN_VAR;
 		reloc_desc->insn_idx = insn_idx;
 		reloc_desc->sym_off = i; /* sym_off stores extern index */
 		return 0;
@@ -6218,7 +6218,7 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
 			insn[0].imm = obj->maps[relo->map_idx].fd;
 			relo->processed = true;
 			break;
-		case RELO_EXTERN:
+		case RELO_EXTERN_VAR:
 			ext = &obj->externs[relo->sym_off];
 			if (ext->type == EXT_KCFG) {
 				insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
-- 
cgit v1.2.3


From aa0b8d43e9537d371cbd3f272d3403f2b15201af Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 24 Mar 2021 18:52:27 -0700
Subject: libbpf: Record extern sym relocation first

This patch records the extern sym relocs first before recording
subprog relocs.  The later patch will have relocs for extern
kernel function call which is also using BPF_JMP | BPF_CALL.
It will be easier to handle the extern symbols first in
the later patch.

is_call_insn() helper is added.  The existing is_ldimm64() helper
is renamed to is_ldimm64_insn() for consistency.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015227.1548623-1-kafai@fb.com
---
 tools/lib/bpf/libbpf.c | 63 +++++++++++++++++++++++++++-----------------------
 1 file changed, 34 insertions(+), 29 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 0ac53dce37b7..e1615c274250 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -573,14 +573,19 @@ static bool insn_is_subprog_call(const struct bpf_insn *insn)
 	       insn->off == 0;
 }
 
-static bool is_ldimm64(struct bpf_insn *insn)
+static bool is_ldimm64_insn(struct bpf_insn *insn)
 {
 	return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
 }
 
+static bool is_call_insn(const struct bpf_insn *insn)
+{
+	return insn->code == (BPF_JMP | BPF_CALL);
+}
+
 static bool insn_is_pseudo_func(struct bpf_insn *insn)
 {
-	return is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
+	return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
 }
 
 static int
@@ -3408,31 +3413,7 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
 
 	reloc_desc->processed = false;
 
-	/* sub-program call relocation */
-	if (insn->code == (BPF_JMP | BPF_CALL)) {
-		if (insn->src_reg != BPF_PSEUDO_CALL) {
-			pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
-			return -LIBBPF_ERRNO__RELOC;
-		}
-		/* text_shndx can be 0, if no default "main" program exists */
-		if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
-			sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
-			pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
-				prog->name, sym_name, sym_sec_name);
-			return -LIBBPF_ERRNO__RELOC;
-		}
-		if (sym->st_value % BPF_INSN_SZ) {
-			pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
-				prog->name, sym_name, (size_t)sym->st_value);
-			return -LIBBPF_ERRNO__RELOC;
-		}
-		reloc_desc->type = RELO_CALL;
-		reloc_desc->insn_idx = insn_idx;
-		reloc_desc->sym_off = sym->st_value;
-		return 0;
-	}
-
-	if (!is_ldimm64(insn)) {
+	if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) {
 		pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
 			prog->name, sym_name, insn_idx, insn->code);
 		return -LIBBPF_ERRNO__RELOC;
@@ -3461,6 +3442,30 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
 		return 0;
 	}
 
+	/* sub-program call relocation */
+	if (is_call_insn(insn)) {
+		if (insn->src_reg != BPF_PSEUDO_CALL) {
+			pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
+			return -LIBBPF_ERRNO__RELOC;
+		}
+		/* text_shndx can be 0, if no default "main" program exists */
+		if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
+			sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
+			pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
+				prog->name, sym_name, sym_sec_name);
+			return -LIBBPF_ERRNO__RELOC;
+		}
+		if (sym->st_value % BPF_INSN_SZ) {
+			pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
+				prog->name, sym_name, (size_t)sym->st_value);
+			return -LIBBPF_ERRNO__RELOC;
+		}
+		reloc_desc->type = RELO_CALL;
+		reloc_desc->insn_idx = insn_idx;
+		reloc_desc->sym_off = sym->st_value;
+		return 0;
+	}
+
 	if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
 		pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
 			prog->name, sym_name, shdr_idx);
@@ -5700,7 +5705,7 @@ poison:
 		/* poison second part of ldimm64 to avoid confusing error from
 		 * verifier about "unknown opcode 00"
 		 */
-		if (is_ldimm64(insn))
+		if (is_ldimm64_insn(insn))
 			bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1);
 		bpf_core_poison_insn(prog, relo_idx, insn_idx, insn);
 		return 0;
@@ -5776,7 +5781,7 @@ poison:
 	case BPF_LD: {
 		__u64 imm;
 
-		if (!is_ldimm64(insn) ||
+		if (!is_ldimm64_insn(insn) ||
 		    insn[0].src_reg != 0 || insn[0].off != 0 ||
 		    insn_idx + 1 >= prog->insns_cnt ||
 		    insn[1].code != 0 || insn[1].dst_reg != 0 ||
-- 
cgit v1.2.3


From 5bd022ec01f060f30672cc6383b8b04e75a4310d Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 24 Mar 2021 18:52:34 -0700
Subject: libbpf: Support extern kernel function

This patch is to make libbpf able to handle the following extern
kernel function declaration and do the needed relocations before
loading the bpf program to the kernel.

extern int foo(struct sock *) __attribute__((section(".ksyms")))

In the collect extern phase, needed changes is made to
bpf_object__collect_externs() and find_extern_btf_id() to collect
extern function in ".ksyms" section.  The func in the BTF datasec also
needs to be replaced by an int var.  The idea is similar to the existing
handling in extern var.  In case the BTF may not have a var, a dummy ksym
var is added at the beginning of bpf_object__collect_externs()
if there is func under ksyms datasec.  It will also change the
func linkage from extern to global which the kernel can support.
It also assigns a param name if it does not have one.

In the collect relo phase, it will record the kernel function
call as RELO_EXTERN_FUNC.

bpf_object__resolve_ksym_func_btf_id() is added to find the func
btf_id of the running kernel.

During actual relocation, it will patch the BPF_CALL instruction with
src_reg = BPF_PSEUDO_FUNC_CALL and insn->imm set to the running
kernel func's btf_id.

The required LLVM patch: https://reviews.llvm.org/D93563

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015234.1548923-1-kafai@fb.com
---
 tools/lib/bpf/libbpf.c | 174 +++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 162 insertions(+), 12 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e1615c274250..7aad78dbb4b4 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -186,6 +186,7 @@ enum reloc_type {
 	RELO_CALL,
 	RELO_DATA,
 	RELO_EXTERN_VAR,
+	RELO_EXTERN_FUNC,
 	RELO_SUBPROG_ADDR,
 };
 
@@ -1955,6 +1956,11 @@ static const char *btf_kind_str(const struct btf_type *t)
 	return __btf_kind_str(btf_kind(t));
 }
 
+static enum btf_func_linkage btf_func_linkage(const struct btf_type *t)
+{
+	return (enum btf_func_linkage)BTF_INFO_VLEN(t->info);
+}
+
 /*
  * Fetch integer attribute of BTF map definition. Such attributes are
  * represented using a pointer to an array, in which dimensionality of array
@@ -3019,7 +3025,7 @@ static bool sym_is_subprog(const GElf_Sym *sym, int text_shndx)
 static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
 {
 	const struct btf_type *t;
-	const char *var_name;
+	const char *tname;
 	int i, n;
 
 	if (!btf)
@@ -3029,14 +3035,18 @@ static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
 	for (i = 1; i <= n; i++) {
 		t = btf__type_by_id(btf, i);
 
-		if (!btf_is_var(t))
+		if (!btf_is_var(t) && !btf_is_func(t))
 			continue;
 
-		var_name = btf__name_by_offset(btf, t->name_off);
-		if (strcmp(var_name, ext_name))
+		tname = btf__name_by_offset(btf, t->name_off);
+		if (strcmp(tname, ext_name))
 			continue;
 
-		if (btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
+		if (btf_is_var(t) &&
+		    btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
+			return -EINVAL;
+
+		if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN)
 			return -EINVAL;
 
 		return i;
@@ -3149,12 +3159,48 @@ static int find_int_btf_id(const struct btf *btf)
 	return 0;
 }
 
+static int add_dummy_ksym_var(struct btf *btf)
+{
+	int i, int_btf_id, sec_btf_id, dummy_var_btf_id;
+	const struct btf_var_secinfo *vs;
+	const struct btf_type *sec;
+
+	sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC,
+					    BTF_KIND_DATASEC);
+	if (sec_btf_id < 0)
+		return 0;
+
+	sec = btf__type_by_id(btf, sec_btf_id);
+	vs = btf_var_secinfos(sec);
+	for (i = 0; i < btf_vlen(sec); i++, vs++) {
+		const struct btf_type *vt;
+
+		vt = btf__type_by_id(btf, vs->type);
+		if (btf_is_func(vt))
+			break;
+	}
+
+	/* No func in ksyms sec.  No need to add dummy var. */
+	if (i == btf_vlen(sec))
+		return 0;
+
+	int_btf_id = find_int_btf_id(btf);
+	dummy_var_btf_id = btf__add_var(btf,
+					"dummy_ksym",
+					BTF_VAR_GLOBAL_ALLOCATED,
+					int_btf_id);
+	if (dummy_var_btf_id < 0)
+		pr_warn("cannot create a dummy_ksym var\n");
+
+	return dummy_var_btf_id;
+}
+
 static int bpf_object__collect_externs(struct bpf_object *obj)
 {
 	struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
 	const struct btf_type *t;
 	struct extern_desc *ext;
-	int i, n, off;
+	int i, n, off, dummy_var_btf_id;
 	const char *ext_name, *sec_name;
 	Elf_Scn *scn;
 	GElf_Shdr sh;
@@ -3166,6 +3212,10 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
 	if (elf_sec_hdr(obj, scn, &sh))
 		return -LIBBPF_ERRNO__FORMAT;
 
+	dummy_var_btf_id = add_dummy_ksym_var(obj->btf);
+	if (dummy_var_btf_id < 0)
+		return dummy_var_btf_id;
+
 	n = sh.sh_size / sh.sh_entsize;
 	pr_debug("looking for externs among %d symbols...\n", n);
 
@@ -3210,6 +3260,11 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
 		sec_name = btf__name_by_offset(obj->btf, sec->name_off);
 
 		if (strcmp(sec_name, KCONFIG_SEC) == 0) {
+			if (btf_is_func(t)) {
+				pr_warn("extern function %s is unsupported under %s section\n",
+					ext->name, KCONFIG_SEC);
+				return -ENOTSUP;
+			}
 			kcfg_sec = sec;
 			ext->type = EXT_KCFG;
 			ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
@@ -3231,6 +3286,11 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
 				return -ENOTSUP;
 			}
 		} else if (strcmp(sec_name, KSYMS_SEC) == 0) {
+			if (btf_is_func(t) && ext->is_weak) {
+				pr_warn("extern weak function %s is unsupported\n",
+					ext->name);
+				return -ENOTSUP;
+			}
 			ksym_sec = sec;
 			ext->type = EXT_KSYM;
 			skip_mods_and_typedefs(obj->btf, t->type,
@@ -3257,7 +3317,14 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
 		 * extern variables in DATASEC
 		 */
 		int int_btf_id = find_int_btf_id(obj->btf);
+		/* For extern function, a dummy_var added earlier
+		 * will be used to replace the vs->type and
+		 * its name string will be used to refill
+		 * the missing param's name.
+		 */
+		const struct btf_type *dummy_var;
 
+		dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id);
 		for (i = 0; i < obj->nr_extern; i++) {
 			ext = &obj->externs[i];
 			if (ext->type != EXT_KSYM)
@@ -3276,12 +3343,32 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
 			ext_name = btf__name_by_offset(obj->btf, vt->name_off);
 			ext = find_extern_by_name(obj, ext_name);
 			if (!ext) {
-				pr_warn("failed to find extern definition for BTF var '%s'\n",
-					ext_name);
+				pr_warn("failed to find extern definition for BTF %s '%s'\n",
+					btf_kind_str(vt), ext_name);
 				return -ESRCH;
 			}
-			btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
-			vt->type = int_btf_id;
+			if (btf_is_func(vt)) {
+				const struct btf_type *func_proto;
+				struct btf_param *param;
+				int j;
+
+				func_proto = btf__type_by_id(obj->btf,
+							     vt->type);
+				param = btf_params(func_proto);
+				/* Reuse the dummy_var string if the
+				 * func proto does not have param name.
+				 */
+				for (j = 0; j < btf_vlen(func_proto); j++)
+					if (param[j].type && !param[j].name_off)
+						param[j].name_off =
+							dummy_var->name_off;
+				vs->type = dummy_var_btf_id;
+				vt->info &= ~0xffff;
+				vt->info |= BTF_FUNC_GLOBAL;
+			} else {
+				btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+				vt->type = int_btf_id;
+			}
 			vs->offset = off;
 			vs->size = sizeof(int);
 		}
@@ -3436,7 +3523,10 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
 		}
 		pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
 			 prog->name, i, ext->name, ext->sym_idx, insn_idx);
-		reloc_desc->type = RELO_EXTERN_VAR;
+		if (insn->code == (BPF_JMP | BPF_CALL))
+			reloc_desc->type = RELO_EXTERN_FUNC;
+		else
+			reloc_desc->type = RELO_EXTERN_VAR;
 		reloc_desc->insn_idx = insn_idx;
 		reloc_desc->sym_off = i; /* sym_off stores extern index */
 		return 0;
@@ -6241,6 +6331,12 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
 			}
 			relo->processed = true;
 			break;
+		case RELO_EXTERN_FUNC:
+			ext = &obj->externs[relo->sym_off];
+			insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
+			insn[0].imm = ext->ksym.kernel_btf_id;
+			relo->processed = true;
+			break;
 		case RELO_SUBPROG_ADDR:
 			insn[0].src_reg = BPF_PSEUDO_FUNC;
 			/* will be handled as a follow up pass */
@@ -7361,6 +7457,7 @@ static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
 {
 	char sym_type, sym_name[500];
 	unsigned long long sym_addr;
+	const struct btf_type *t;
 	struct extern_desc *ext;
 	int ret, err = 0;
 	FILE *f;
@@ -7387,6 +7484,10 @@ static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
 		if (!ext || ext->type != EXT_KSYM)
 			continue;
 
+		t = btf__type_by_id(obj->btf, ext->btf_id);
+		if (!btf_is_var(t))
+			continue;
+
 		if (ext->is_set && ext->ksym.addr != sym_addr) {
 			pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n",
 				sym_name, ext->ksym.addr, sym_addr);
@@ -7488,8 +7589,53 @@ static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
 	return 0;
 }
 
+static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj,
+						struct extern_desc *ext)
+{
+	int local_func_proto_id, kfunc_proto_id, kfunc_id;
+	const struct btf_type *kern_func;
+	struct btf *kern_btf = NULL;
+	int ret, kern_btf_fd = 0;
+
+	local_func_proto_id = ext->ksym.type_id;
+
+	kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC,
+				    &kern_btf, &kern_btf_fd);
+	if (kfunc_id < 0) {
+		pr_warn("extern (func ksym) '%s': not found in kernel BTF\n",
+			ext->name);
+		return kfunc_id;
+	}
+
+	if (kern_btf != obj->btf_vmlinux) {
+		pr_warn("extern (func ksym) '%s': function in kernel module is not supported\n",
+			ext->name);
+		return -ENOTSUP;
+	}
+
+	kern_func = btf__type_by_id(kern_btf, kfunc_id);
+	kfunc_proto_id = kern_func->type;
+
+	ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id,
+					kern_btf, kfunc_proto_id);
+	if (ret <= 0) {
+		pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with kernel [%d]\n",
+			ext->name, local_func_proto_id, kfunc_proto_id);
+		return -EINVAL;
+	}
+
+	ext->is_set = true;
+	ext->ksym.kernel_btf_obj_fd = kern_btf_fd;
+	ext->ksym.kernel_btf_id = kfunc_id;
+	pr_debug("extern (func ksym) '%s': resolved to kernel [%d]\n",
+		 ext->name, kfunc_id);
+
+	return 0;
+}
+
 static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
 {
+	const struct btf_type *t;
 	struct extern_desc *ext;
 	int i, err;
 
@@ -7498,7 +7644,11 @@ static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
 		if (ext->type != EXT_KSYM || !ext->ksym.type_id)
 			continue;
 
-		err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
+		t = btf__type_by_id(obj->btf, ext->btf_id);
+		if (btf_is_var(t))
+			err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
+		else
+			err = bpf_object__resolve_ksym_func_btf_id(obj, ext);
 		if (err)
 			return err;
 	}
-- 
cgit v1.2.3


From 39cd9e0f6783fd2dd2b0e95500e34575b3707ed8 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 24 Mar 2021 18:52:40 -0700
Subject: bpf: selftests: Rename bictcp to bpf_cubic

As a similar chanage in the kernel, this patch gives the proper
name to the bpf cubic.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015240.1550074-1-kafai@fb.com
---
 tools/testing/selftests/bpf/progs/bpf_cubic.c | 30 +++++++++++++--------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c
index 6939bfd8690f..33c4d2bded64 100644
--- a/tools/testing/selftests/bpf/progs/bpf_cubic.c
+++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c
@@ -174,8 +174,8 @@ static __always_inline void bictcp_hystart_reset(struct sock *sk)
  * as long as it is used in one of the func ptr
  * under SEC(".struct_ops").
  */
-SEC("struct_ops/bictcp_init")
-void BPF_PROG(bictcp_init, struct sock *sk)
+SEC("struct_ops/bpf_cubic_init")
+void BPF_PROG(bpf_cubic_init, struct sock *sk)
 {
 	struct bictcp *ca = inet_csk_ca(sk);
 
@@ -192,7 +192,7 @@ void BPF_PROG(bictcp_init, struct sock *sk)
  * The remaining tcp-cubic functions have an easier way.
  */
 SEC("no-sec-prefix-bictcp_cwnd_event")
-void BPF_PROG(bictcp_cwnd_event, struct sock *sk, enum tcp_ca_event event)
+void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event)
 {
 	if (event == CA_EVENT_TX_START) {
 		struct bictcp *ca = inet_csk_ca(sk);
@@ -384,7 +384,7 @@ tcp_friendliness:
 }
 
 /* Or simply use the BPF_STRUCT_OPS to avoid the SEC boiler plate. */
-void BPF_STRUCT_OPS(bictcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
+void BPF_STRUCT_OPS(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bictcp *ca = inet_csk_ca(sk);
@@ -403,7 +403,7 @@ void BPF_STRUCT_OPS(bictcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
 	tcp_cong_avoid_ai(tp, ca->cnt, acked);
 }
 
-__u32 BPF_STRUCT_OPS(bictcp_recalc_ssthresh, struct sock *sk)
+__u32 BPF_STRUCT_OPS(bpf_cubic_recalc_ssthresh, struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct bictcp *ca = inet_csk_ca(sk);
@@ -420,7 +420,7 @@ __u32 BPF_STRUCT_OPS(bictcp_recalc_ssthresh, struct sock *sk)
 	return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
 }
 
-void BPF_STRUCT_OPS(bictcp_state, struct sock *sk, __u8 new_state)
+void BPF_STRUCT_OPS(bpf_cubic_state, struct sock *sk, __u8 new_state)
 {
 	if (new_state == TCP_CA_Loss) {
 		bictcp_reset(inet_csk_ca(sk));
@@ -496,7 +496,7 @@ static __always_inline void hystart_update(struct sock *sk, __u32 delay)
 	}
 }
 
-void BPF_STRUCT_OPS(bictcp_acked, struct sock *sk,
+void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk,
 		    const struct ack_sample *sample)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
@@ -525,7 +525,7 @@ void BPF_STRUCT_OPS(bictcp_acked, struct sock *sk,
 		hystart_update(sk, delay);
 }
 
-__u32 BPF_STRUCT_OPS(tcp_reno_undo_cwnd, struct sock *sk)
+__u32 BPF_STRUCT_OPS(bpf_cubic_undo_cwnd, struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 
@@ -534,12 +534,12 @@ __u32 BPF_STRUCT_OPS(tcp_reno_undo_cwnd, struct sock *sk)
 
 SEC(".struct_ops")
 struct tcp_congestion_ops cubic = {
-	.init		= (void *)bictcp_init,
-	.ssthresh	= (void *)bictcp_recalc_ssthresh,
-	.cong_avoid	= (void *)bictcp_cong_avoid,
-	.set_state	= (void *)bictcp_state,
-	.undo_cwnd	= (void *)tcp_reno_undo_cwnd,
-	.cwnd_event	= (void *)bictcp_cwnd_event,
-	.pkts_acked     = (void *)bictcp_acked,
+	.init		= (void *)bpf_cubic_init,
+	.ssthresh	= (void *)bpf_cubic_recalc_ssthresh,
+	.cong_avoid	= (void *)bpf_cubic_cong_avoid,
+	.set_state	= (void *)bpf_cubic_state,
+	.undo_cwnd	= (void *)bpf_cubic_undo_cwnd,
+	.cwnd_event	= (void *)bpf_cubic_cwnd_event,
+	.pkts_acked     = (void *)bpf_cubic_acked,
 	.name		= "bpf_cubic",
 };
-- 
cgit v1.2.3


From 78e60bbbe8e8f614b6a453d8a780d9e6f77749a8 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 24 Mar 2021 18:52:46 -0700
Subject: bpf: selftests: Bpf_cubic and bpf_dctcp calling kernel functions

This patch removes the bpf implementation of tcp_slow_start()
and tcp_cong_avoid_ai().  Instead, it directly uses the kernel
implementation.

It also replaces the bpf_cubic_undo_cwnd implementation by directly
calling tcp_reno_undo_cwnd().  bpf_dctcp also directly calls
tcp_reno_cong_avoid() instead.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015246.1551062-1-kafai@fb.com
---
 tools/testing/selftests/bpf/bpf_tcp_helpers.h | 29 ++-------------------------
 tools/testing/selftests/bpf/progs/bpf_cubic.c |  6 +++---
 tools/testing/selftests/bpf/progs/bpf_dctcp.c | 22 ++++++--------------
 3 files changed, 11 insertions(+), 46 deletions(-)

diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index 91f0fac632f4..029589c008c9 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -187,16 +187,6 @@ struct tcp_congestion_ops {
 	typeof(y) __y = (y);			\
 	__x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
 
-static __always_inline __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked)
-{
-	__u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh);
-
-	acked -= cwnd - tp->snd_cwnd;
-	tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
-
-	return acked;
-}
-
 static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp)
 {
 	return tp->snd_cwnd < tp->snd_ssthresh;
@@ -213,22 +203,7 @@ static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk)
 	return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
 }
 
-static __always_inline void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked)
-{
-	/* If credits accumulated at a higher w, apply them gently now. */
-	if (tp->snd_cwnd_cnt >= w) {
-		tp->snd_cwnd_cnt = 0;
-		tp->snd_cwnd++;
-	}
-
-	tp->snd_cwnd_cnt += acked;
-	if (tp->snd_cwnd_cnt >= w) {
-		__u32 delta = tp->snd_cwnd_cnt / w;
-
-		tp->snd_cwnd_cnt -= delta * w;
-		tp->snd_cwnd += delta;
-	}
-	tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp);
-}
+extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
+extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
 
 #endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c
index 33c4d2bded64..f62df4d023f9 100644
--- a/tools/testing/selftests/bpf/progs/bpf_cubic.c
+++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c
@@ -525,11 +525,11 @@ void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk,
 		hystart_update(sk, delay);
 }
 
+extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym;
+
 __u32 BPF_STRUCT_OPS(bpf_cubic_undo_cwnd, struct sock *sk)
 {
-	const struct tcp_sock *tp = tcp_sk(sk);
-
-	return max(tp->snd_cwnd, tp->prior_cwnd);
+	return tcp_reno_undo_cwnd(sk);
 }
 
 SEC(".struct_ops")
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
index 4dc1a967776a..fd42247da8b4 100644
--- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
@@ -194,22 +194,12 @@ __u32 BPF_PROG(dctcp_cwnd_undo, struct sock *sk)
 	return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
 }
 
-SEC("struct_ops/tcp_reno_cong_avoid")
-void BPF_PROG(tcp_reno_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	if (!tcp_is_cwnd_limited(sk))
-		return;
+extern void tcp_reno_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym;
 
-	/* In "safe" area, increase. */
-	if (tcp_in_slow_start(tp)) {
-		acked = tcp_slow_start(tp, acked);
-		if (!acked)
-			return;
-	}
-	/* In dangerous area, increase slowly. */
-	tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
+SEC("struct_ops/dctcp_reno_cong_avoid")
+void BPF_PROG(dctcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
+{
+	tcp_reno_cong_avoid(sk, ack, acked);
 }
 
 SEC(".struct_ops")
@@ -226,7 +216,7 @@ struct tcp_congestion_ops dctcp = {
 	.in_ack_event   = (void *)dctcp_update_alpha,
 	.cwnd_event	= (void *)dctcp_cwnd_event,
 	.ssthresh	= (void *)dctcp_ssthresh,
-	.cong_avoid	= (void *)tcp_reno_cong_avoid,
+	.cong_avoid	= (void *)dctcp_cong_avoid,
 	.undo_cwnd	= (void *)dctcp_cwnd_undo,
 	.set_state	= (void *)dctcp_state,
 	.flags		= TCP_CONG_NEEDS_ECN,
-- 
cgit v1.2.3


From 7bd1590d4eba1583f6ee85e8cfe556505f761e19 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 24 Mar 2021 18:52:52 -0700
Subject: bpf: selftests: Add kfunc_call test

This patch adds a few kernel function bpf_kfunc_call_test*() for the
selftest's test_run purpose.  They will be allowed for tc_cls prog.

The selftest calling the kernel function bpf_kfunc_call_test*()
is also added in this patch.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015252.1551395-1-kafai@fb.com
---
 include/linux/bpf.h                                |  6 +++
 net/bpf/test_run.c                                 | 28 ++++++++++
 net/core/filter.c                                  |  1 +
 .../testing/selftests/bpf/prog_tests/kfunc_call.c  | 59 ++++++++++++++++++++++
 .../testing/selftests/bpf/progs/kfunc_call_test.c  | 47 +++++++++++++++++
 .../selftests/bpf/progs/kfunc_call_test_subprog.c  | 42 +++++++++++++++
 6 files changed, 183 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/kfunc_call.c
 create mode 100644 tools/testing/selftests/bpf/progs/kfunc_call_test.c
 create mode 100644 tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b5b7967e3ff3..9fdd839b418c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1532,6 +1532,7 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
 int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
 				const union bpf_attr *kattr,
 				union bpf_attr __user *uattr);
+bool bpf_prog_test_check_kfunc_call(u32 kfunc_id);
 bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 		    const struct bpf_prog *prog,
 		    struct bpf_insn_access_aux *info);
@@ -1731,6 +1732,11 @@ static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
 	return -ENOTSUPP;
 }
 
+static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id)
+{
+	return false;
+}
+
 static inline void bpf_map_put(struct bpf_map *map)
 {
 }
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 4aabf71cd95d..a5d72c48fb66 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2017 Facebook
  */
 #include <linux/bpf.h>
+#include <linux/btf_ids.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/etherdevice.h>
@@ -213,10 +214,37 @@ int noinline bpf_modify_return_test(int a, int *b)
 	*b += 1;
 	return a + *b;
 }
+
+u64 noinline bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d)
+{
+	return a + b + c + d;
+}
+
+int noinline bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b)
+{
+	return a + b;
+}
+
+struct sock * noinline bpf_kfunc_call_test3(struct sock *sk)
+{
+	return sk;
+}
+
 __diag_pop();
 
 ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
 
+BTF_SET_START(test_sk_kfunc_ids)
+BTF_ID(func, bpf_kfunc_call_test1)
+BTF_ID(func, bpf_kfunc_call_test2)
+BTF_ID(func, bpf_kfunc_call_test3)
+BTF_SET_END(test_sk_kfunc_ids)
+
+bool bpf_prog_test_check_kfunc_call(u32 kfunc_id)
+{
+	return btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id);
+}
+
 static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
 			   u32 headroom, u32 tailroom)
 {
diff --git a/net/core/filter.c b/net/core/filter.c
index 17dc159ec40c..cae56d08a670 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -9813,6 +9813,7 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
 	.convert_ctx_access	= tc_cls_act_convert_ctx_access,
 	.gen_prologue		= tc_cls_act_prologue,
 	.gen_ld_abs		= bpf_gen_ld_abs,
+	.check_kfunc_call	= bpf_prog_test_check_kfunc_call,
 };
 
 const struct bpf_prog_ops tc_cls_act_prog_ops = {
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
new file mode 100644
index 000000000000..7fc0951ee75f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "kfunc_call_test.skel.h"
+#include "kfunc_call_test_subprog.skel.h"
+
+static void test_main(void)
+{
+	struct kfunc_call_test *skel;
+	int prog_fd, retval, err;
+
+	skel = kfunc_call_test__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel"))
+		return;
+
+	prog_fd = bpf_program__fd(skel->progs.kfunc_call_test1);
+	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+				NULL, NULL, (__u32 *)&retval, NULL);
+	ASSERT_OK(err, "bpf_prog_test_run(test1)");
+	ASSERT_EQ(retval, 12, "test1-retval");
+
+	prog_fd = bpf_program__fd(skel->progs.kfunc_call_test2);
+	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+				NULL, NULL, (__u32 *)&retval, NULL);
+	ASSERT_OK(err, "bpf_prog_test_run(test2)");
+	ASSERT_EQ(retval, 3, "test2-retval");
+
+	kfunc_call_test__destroy(skel);
+}
+
+static void test_subprog(void)
+{
+	struct kfunc_call_test_subprog *skel;
+	int prog_fd, retval, err;
+
+	skel = kfunc_call_test_subprog__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel"))
+		return;
+
+	prog_fd = bpf_program__fd(skel->progs.kfunc_call_test1);
+	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+				NULL, NULL, (__u32 *)&retval, NULL);
+	ASSERT_OK(err, "bpf_prog_test_run(test1)");
+	ASSERT_EQ(retval, 10, "test1-retval");
+	ASSERT_NEQ(skel->data->active_res, -1, "active_res");
+	ASSERT_EQ(skel->data->sk_state, BPF_TCP_CLOSE, "sk_state");
+
+	kfunc_call_test_subprog__destroy(skel);
+}
+
+void test_kfunc_call(void)
+{
+	if (test__start_subtest("main"))
+		test_main();
+
+	if (test__start_subtest("subprog"))
+		test_subprog();
+}
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
new file mode 100644
index 000000000000..470f8723e463
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym;
+extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
+				  __u32 c, __u64 d) __ksym;
+
+SEC("classifier")
+int kfunc_call_test2(struct __sk_buff *skb)
+{
+	struct bpf_sock *sk = skb->sk;
+
+	if (!sk)
+		return -1;
+
+	sk = bpf_sk_fullsock(sk);
+	if (!sk)
+		return -1;
+
+	return bpf_kfunc_call_test2((struct sock *)sk, 1, 2);
+}
+
+SEC("classifier")
+int kfunc_call_test1(struct __sk_buff *skb)
+{
+	struct bpf_sock *sk = skb->sk;
+	__u64 a = 1ULL << 32;
+	__u32 ret;
+
+	if (!sk)
+		return -1;
+
+	sk = bpf_sk_fullsock(sk);
+	if (!sk)
+		return -1;
+
+	a = bpf_kfunc_call_test1((struct sock *)sk, 1, a | 2, 3, a | 4);
+	ret = a >> 32;   /* ret should be 2 */
+	ret += (__u32)a; /* ret should be 12 */
+
+	return ret;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
new file mode 100644
index 000000000000..b2dcb7d9cb03
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+extern const int bpf_prog_active __ksym;
+extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
+				  __u32 c, __u64 d) __ksym;
+extern struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym;
+int active_res = -1;
+int sk_state = -1;
+
+int __noinline f1(struct __sk_buff *skb)
+{
+	struct bpf_sock *sk = skb->sk;
+	int *active;
+
+	if (!sk)
+		return -1;
+
+	sk = bpf_sk_fullsock(sk);
+	if (!sk)
+		return -1;
+
+	active = (int *)bpf_per_cpu_ptr(&bpf_prog_active,
+					bpf_get_smp_processor_id());
+	if (active)
+		active_res = *active;
+
+	sk_state = bpf_kfunc_call_test3((struct sock *)sk)->__sk_common.skc_state;
+
+	return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4);
+}
+
+SEC("classifier")
+int kfunc_call_test1(struct __sk_buff *skb)
+{
+	return f1(skb);
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From 7e32a09fdcb38d97f148f72bf78a3b49e8d1612d Mon Sep 17 00:00:00 2001
From: Atul Gopinathan <atulgopinathan@gmail.com>
Date: Sun, 28 Mar 2021 17:35:15 +0530
Subject: bpf: tcp: Remove comma which is causing build error

Currently, building the bpf-next source with the CONFIG_BPF_SYSCALL
enabled is causing a compilation error:

"net/ipv4/bpf_tcp_ca.c:209:28: error: expected identifier or '(' before
',' token"

Fix this by removing an unnecessary comma.

Fixes: e78aea8b2170 ("bpf: tcp: Put some tcp cong functions in allowlist for bpf-tcp-cc")
Reported-by: syzbot+0b74d8ec3bf0cc4e4209@syzkaller.appspotmail.com
Signed-off-by: Atul Gopinathan <atulgopinathan@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210328120515.113895-1-atulgopinathan@gmail.com
---
 net/ipv4/bpf_tcp_ca.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 40520b77a307..12777d444d0f 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -206,7 +206,7 @@ BTF_ID(func, bbr_init)
 BTF_ID(func, bbr_main)
 BTF_ID(func, bbr_sndbuf_expand)
 BTF_ID(func, bbr_undo_cwnd)
-BTF_ID(func, bbr_cwnd_even),
+BTF_ID(func, bbr_cwnd_even)
 BTF_ID(func, bbr_ssthresh)
 BTF_ID(func, bbr_min_tso_segs)
 BTF_ID(func, bbr_set_state)
-- 
cgit v1.2.3


From 8406d38fde5c3a2d3182b30f9a3b457aa79949e4 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Sat, 27 Mar 2021 04:42:36 +0530
Subject: af_x25.c: Fix a spello

s/facilties/facilities/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index ff687b97b2d9..44d6566dd23e 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1018,7 +1018,7 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
 
 	/*
 	 * current neighbour/link might impose additional limits
-	 * on certain facilties
+	 * on certain facilities
 	 */
 
 	x25_limit_facilities(&facilities, nb);
-- 
cgit v1.2.3


From e919ee389c18c04c2eb9d4b0fdbc9b52545cce37 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Sat, 27 Mar 2021 04:42:37 +0530
Subject: bearer.h: Spellos fixed

s/initalized/initialized/ ...three different places

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bearer.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 6bf4550aa1ac..57c6a1a719e2 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -154,9 +154,9 @@ struct tipc_media {
  * care of initializing all other fields.
  */
 struct tipc_bearer {
-	void __rcu *media_ptr;			/* initalized by media */
-	u32 mtu;				/* initalized by media */
-	struct tipc_media_addr addr;		/* initalized by media */
+	void __rcu *media_ptr;			/* initialized by media */
+	u32 mtu;				/* initialized by media */
+	struct tipc_media_addr addr;		/* initialized by media */
 	char name[TIPC_MAX_BEARER_NAME];
 	struct tipc_media *media;
 	struct tipc_media_addr bcast_addr;
-- 
cgit v1.2.3


From a66e04ce0e01ec8be981a583ae200ac1f0dbd736 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Sat, 27 Mar 2021 04:42:38 +0530
Subject: ipv4: ip_output.c: Couple of typo fixes

s/readibility/readability/
s/insufficent/insufficient/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 3aab53beb4ea..c3efc7d658f6 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -34,7 +34,7 @@
  *		Andi Kleen	: 	Replace ip_reply with ip_send_reply.
  *		Andi Kleen	:	Split fast and slow ip_build_xmit path
  *					for decreased register pressure on x86
- *					and more readibility.
+ *					and more readability.
  *		Marc Boucher	:	When call_out_firewall returns FW_QUEUE,
  *					silently drop skb instead of failing with -EPERM.
  *		Detlev Wengorz	:	Copy protocol for fragments.
@@ -262,7 +262,7 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
 	 *    interface with a smaller MTU.
 	 *  - Arriving GRO skb (or GSO skb in a virtualized environment) that is
 	 *    bridged to a NETIF_F_TSO tunnel stacked over an interface with an
-	 *    insufficent MTU.
+	 *    insufficient MTU.
 	 */
 	features = netif_skb_features(skb);
 	BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_GSO_CB_OFFSET);
-- 
cgit v1.2.3


From e5ca43e82d91212e24686fafca118f25cf985bfb Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Sat, 27 Mar 2021 04:42:39 +0530
Subject: ipv4: tcp_lp.c: Couple of typo fixes

s/resrved/reserved/
s/within/within/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_lp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index e6459537d4d2..82b36ec3f2f8 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -63,7 +63,7 @@ enum tcp_lp_state {
  * @sowd: smoothed OWD << 3
  * @owd_min: min OWD
  * @owd_max: max OWD
- * @owd_max_rsv: resrved max owd
+ * @owd_max_rsv: reserved max owd
  * @remote_hz: estimated remote HZ
  * @remote_ref_time: remote reference time
  * @local_ref_time: local reference time
@@ -305,7 +305,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
 
 	/* FIXME: try to reset owd_min and owd_max here
 	 * so decrease the chance the min/max is no longer suitable
-	 * and will usually within threshold when whithin inference */
+	 * and will usually within threshold when within inference */
 	lp->owd_min = lp->sowd >> 3;
 	lp->owd_max = lp->sowd >> 2;
 	lp->owd_max_rsv = lp->sowd >> 2;
-- 
cgit v1.2.3


From 912b519afc8f13743b473910504e8bfb9eb7de77 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Sat, 27 Mar 2021 04:42:40 +0530
Subject: ipv6: addrconf.c: Fix a typo

s/Identifers/Identifiers/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b502f78d5091..120073ffb666 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2358,7 +2358,7 @@ regen:
 	/* <draft-ietf-6man-rfc4941bis-08.txt>, Section 3.3.1:
 	 * check if generated address is not inappropriate:
 	 *
-	 * - Reserved IPv6 Interface Identifers
+	 * - Reserved IPv6 Interface Identifiers
 	 * - XXX: already assigned to an address on the device
 	 */
 
-- 
cgit v1.2.3


From 89e8347f0ff4b8bfe23e174e6661902582754394 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Sat, 27 Mar 2021 04:42:41 +0530
Subject: ipv6: route.c: A spello fix

s/notfication/notification/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ebb7519bec2a..d99dd4d36252 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -6075,7 +6075,7 @@ void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
 
 	if (!rcu_access_pointer(f6i->fib6_node))
 		/* The route was removed from the tree, do not send
-		 * notfication.
+		 * notification.
 		 */
 		return;
 
-- 
cgit v1.2.3


From bf05d48dbda80d864dbdb46c6641954df3bf45d3 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Sat, 27 Mar 2021 04:42:42 +0530
Subject: iucv: af_iucv.c: Couple of typo fixes

s/unitialized/uninitialized/
s/notifcations/notifications/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/af_iucv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 6092d5cb7168..0fdb389c3390 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -621,7 +621,7 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr,
 	for_each_netdev_rcu(&init_net, dev) {
 		if (!memcmp(dev->perm_addr, uid, 8)) {
 			memcpy(iucv->src_user_id, sa->siucv_user_id, 8);
-			/* Check for unitialized siucv_name */
+			/* Check for uninitialized siucv_name */
 			if (strncmp(sa->siucv_name, "        ", 8) == 0)
 				__iucv_auto_name(iucv);
 			else
@@ -2134,7 +2134,7 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
 }
 
 /**
- * afiucv_hs_callback_txnotify() - handle send notifcations from HiperSockets
+ * afiucv_hs_callback_txnotify() - handle send notifications from HiperSockets
  *                                 transport
  **/
 static void afiucv_hs_callback_txnotify(struct sock *sk, enum iucv_tx_notify n)
-- 
cgit v1.2.3


From 71a2fae50895b32cd600c0c4eff5df9c9c9933da Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Sat, 27 Mar 2021 04:42:43 +0530
Subject: kcm: kcmsock.c: Couple of typo fixes

s/synchonization/synchronization/
s/aready/already/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/kcm/kcmsock.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index d0b56ffbb057..6201965bd822 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -663,7 +663,7 @@ do_frag:
 
 				/* Hard failure in sending message, abort this
 				 * psock since it has lost framing
-				 * synchonization and retry sending the
+				 * synchronization and retry sending the
 				 * message from the beginning.
 				 */
 				kcm_abort_tx_psock(psock, ret ? -ret : EPIPE,
@@ -1419,7 +1419,7 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
 
 	write_lock_bh(&csk->sk_callback_lock);
 
-	/* Check if sk_user_data is aready by KCM or someone else.
+	/* Check if sk_user_data is already by KCM or someone else.
 	 * Must be done under lock to prevent race conditions.
 	 */
 	if (csk->sk_user_data) {
-- 
cgit v1.2.3


From 61f8406010843584eaf04d195fbd707f654cfb89 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Sat, 27 Mar 2021 04:42:44 +0530
Subject: llc: llc_core.c: COuple of typo fixes

s/searchs/searches/   ....two different places.

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/llc_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index 64d4bef04e73..6e387aadffce 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -59,10 +59,10 @@ out:
 }
 
 /**
- *	llc_sap_find - searchs a SAP in station
+ *	llc_sap_find - searches a SAP in station
  *	@sap_value: sap to be found
  *
- *	Searchs for a sap in the sap list of the LLC's station upon the sap ID.
+ *	Searches for a sap in the sap list of the LLC's station upon the sap ID.
  *	If the sap is found it will be refcounted and the user will have to do
  *	a llc_sap_put after use.
  *	Returns the sap or %NULL if not found.
-- 
cgit v1.2.3


From b18dacab6bc4a31b08b134a23d67f9fb2dd5a844 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Sat, 27 Mar 2021 04:42:45 +0530
Subject: mac80211: cfg.c: A typo fix

s/assocaited/associated/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/cfg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 68a0de02b561..a0a11624a5be 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1486,7 +1486,7 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 		sta->sta.wme = set & BIT(NL80211_STA_FLAG_WME);
 
 	/* auth flags will be set later for TDLS,
-	 * and for unassociated stations that move to assocaited */
+	 * and for unassociated stations that move to associated */
 	if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
 	    !((mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) &&
 	      (set & BIT(NL80211_STA_FLAG_ASSOCIATED)))) {
-- 
cgit v1.2.3


From 55320b82d634b15a6ac6c5cdbbde1ced2cbfa06d Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Sat, 27 Mar 2021 04:42:46 +0530
Subject: mptcp: subflow.c: Fix a typo

s/concerened/concerned/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/subflow.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index d17d39ccdf34..6c074d3db0ed 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1081,7 +1081,7 @@ bool mptcp_subflow_data_available(struct sock *sk)
  * In mptcp, rwin is about the mptcp-level connection data.
  *
  * Data that is still on the ssk rx queue can thus be ignored,
- * as far as mptcp peer is concerened that data is still inflight.
+ * as far as mptcp peer is concerned that data is still inflight.
  * DSS ACK is updated when skb is moved to the mptcp rx queue.
  */
 void mptcp_space(const struct sock *ssk, int *space, int *full_space)
-- 
cgit v1.2.3


From 195a8ec4033b4124f6864892e71dcef24ba74a5a Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Sat, 27 Mar 2021 04:42:47 +0530
Subject: ncsi: internal.h: Fix a spello

s/Firware/Firmware/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ncsi/internal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index e37102546be6..49031f804276 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -100,7 +100,7 @@ enum {
 struct ncsi_channel_version {
 	u32 version;		/* Supported BCD encoded NCSI version */
 	u32 alpha2;		/* Supported BCD encoded NCSI version */
-	u8  fw_name[12];	/* Firware name string                */
+	u8  fw_name[12];	/* Firmware name string                */
 	u32 fw_version;		/* Firmware version                   */
 	u16 pci_ids[4];		/* PCI identification                 */
 	u32 mf_id;		/* Manufacture ID                     */
-- 
cgit v1.2.3


From f60d94f0d7b42dd1caed258ff23b93e038bde745 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Sat, 27 Mar 2021 04:42:48 +0530
Subject: netfilter: ipvs: A spello fix

s/registerd/registered/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Reviewed-by: Simon Horman <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/ipvs/ip_vs_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 0c132ff9b446..128690c512df 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -2398,7 +2398,7 @@ static int __net_init __ip_vs_init(struct net *net)
 	if (ipvs == NULL)
 		return -ENOMEM;
 
-	/* Hold the beast until a service is registerd */
+	/* Hold the beast until a service is registered */
 	ipvs->enable = 0;
 	ipvs->net = net;
 	/* Counters used for creating unique names */
-- 
cgit v1.2.3


From bcae6d5faf3fe4746b9e96a8a3d6918cc05dc252 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Sat, 27 Mar 2021 04:42:49 +0530
Subject: netfilter: nf_conntrack_acct.c: A typo fix

s/Accouting/Accounting/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_acct.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index 2ccda8ace796..91bc8df3e4b0 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
-/* Accouting handling for netfilter. */
+/* Accounting handling for netfilter. */
 
 /*
  * (C) 2008 Krzysztof Piotr Oledzki <ole@ans.pl>
-- 
cgit v1.2.3


From 0184235ec6d1decb56740d9c99fdd0035b1d4c9d Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Sat, 27 Mar 2021 04:42:50 +0530
Subject: node.c: A typo fix

s/synching/syncing/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/node.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tipc/node.c b/net/tipc/node.c
index 61c38eaaa298..707d0dc71fad 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -2014,7 +2014,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
 		return true;
 	}
 
-	/* No synching needed if only one link */
+	/* No syncing needed if only one link */
 	if (!pl || !tipc_link_is_up(pl))
 		return true;
 
-- 
cgit v1.2.3


From f2e3093172b9726f3e16a47d5d83ce2edf4060f0 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Sat, 27 Mar 2021 04:42:51 +0530
Subject: reg.c: Fix a spello

s/ingoring/ignoring/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/reg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 21536c48deec..68db914df642 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -3404,7 +3404,7 @@ static void restore_custom_reg_settings(struct wiphy *wiphy)
 }
 
 /*
- * Restoring regulatory settings involves ingoring any
+ * Restoring regulatory settings involves ignoring any
  * possibly stale country IE information and user regulatory
  * settings if so desired, this includes any beacon hints
  * learned as we could have traveled outside to another country
-- 
cgit v1.2.3


From fb373c8455af40faf72d7b8c7f53ed302bd554d9 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Sat, 27 Mar 2021 04:42:52 +0530
Subject: sm_statefuns.c: Mundane spello fixes

s/simulataneous/simultaneous/    ....in three dirrent places.
s/tempory/temporary/
s/interpeter/interpreter/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_statefuns.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index af2b7041fa4e..7632714c1e5b 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1452,7 +1452,7 @@ static char sctp_tietags_compare(struct sctp_association *new_asoc,
 	return 'E';
 }
 
-/* Common helper routine for both duplicate and simulataneous INIT
+/* Common helper routine for both duplicate and simultaneous INIT
  * chunk handling.
  */
 static enum sctp_disposition sctp_sf_do_unexpected_init(
@@ -1685,7 +1685,7 @@ enum sctp_disposition sctp_sf_do_5_2_1_siminit(
 					void *arg,
 					struct sctp_cmd_seq *commands)
 {
-	/* Call helper to do the real work for both simulataneous and
+	/* Call helper to do the real work for both simultaneous and
 	 * duplicate INIT chunk handling.
 	 */
 	return sctp_sf_do_unexpected_init(net, ep, asoc, type, arg, commands);
@@ -1740,7 +1740,7 @@ enum sctp_disposition sctp_sf_do_5_2_2_dupinit(
 					void *arg,
 					struct sctp_cmd_seq *commands)
 {
-	/* Call helper to do the real work for both simulataneous and
+	/* Call helper to do the real work for both simultaneous and
 	 * duplicate INIT chunk handling.
 	 */
 	return sctp_sf_do_unexpected_init(net, ep, asoc, type, arg, commands);
@@ -2221,11 +2221,11 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook(
 		break;
 	}
 
-	/* Delete the tempory new association. */
+	/* Delete the temporary new association. */
 	sctp_add_cmd_sf(commands, SCTP_CMD_SET_ASOC, SCTP_ASOC(new_asoc));
 	sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
 
-	/* Restore association pointer to provide SCTP command interpeter
+	/* Restore association pointer to provide SCTP command interpreter
 	 * with a valid context in case it needs to manipulate
 	 * the queues */
 	sctp_add_cmd_sf(commands, SCTP_CMD_SET_ASOC,
-- 
cgit v1.2.3


From aa8ef1b9abd413d5c062d16e3d6b2fb418f9091c Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Sat, 27 Mar 2021 04:42:53 +0530
Subject: xfrm_policy.c : Mundane typo fix

s/sucessful/successful/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index b74f28cabe24..156347fd7e2e 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -688,7 +688,7 @@ static void xfrm_hash_resize(struct work_struct *work)
 }
 
 /* Make sure *pol can be inserted into fastbin.
- * Useful to check that later insert requests will be sucessful
+ * Useful to check that later insert requests will be successful
  * (provided xfrm_policy_lock is held throughout).
  */
 static struct xfrm_pol_inexact_bin *
-- 
cgit v1.2.3


From a7fd0e6d758f0f29268438287ecf7873c069a3ae Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Sat, 27 Mar 2021 04:42:54 +0530
Subject: xfrm_user.c: Added a punctuation

s/wouldnt/wouldn\'t/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 5a0ef4361e43..df8bc8fc724c 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1761,7 +1761,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	/* shouldn't excl be based on nlh flags??
 	 * Aha! this is anti-netlink really i.e  more pfkey derived
-	 * in netlink excl is a flag and you wouldnt need
+	 * in netlink excl is a flag and you wouldn't need
 	 * a type XFRM_MSG_UPDPOLICY - JHS */
 	excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY;
 	err = xfrm_policy_insert(p->dir, xp, excl);
-- 
cgit v1.2.3


From c127ffa23e41e6cb2251d5c30e54d60777034caa Mon Sep 17 00:00:00 2001
From: Baowen Zheng <baowen.zheng@corigine.com>
Date: Fri, 26 Mar 2021 14:09:37 +0100
Subject: selftests: tc-testing: add action police selftest for packets per
 second

Add selftest cases in action police for packets per second.
These tests depend on corresponding iproute2 command support.

Signed-off-by: Baowen Zheng <baowen.zheng@corigine.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/tc-tests/actions/police.json        | 48 ++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
index b8268da5adaa..8e45792703ed 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
@@ -764,5 +764,53 @@
         "teardown": [
             "$TC actions flush action police"
         ]
+    },
+    {
+        "id": "cdd7",
+        "name": "Add valid police action with packets per second rate limit",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police pkts_rate 1000 pkts_burst 200 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 0bit burst 0b mtu 4096Mb pkts_rate 1000 pkts_burst 200",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "f5bc",
+        "name": "Add invalid police action with both bps and pps",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 1kbit burst 10k pkts_rate 1000 pkts_burst 200 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x1 ",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
     }
 ]
-- 
cgit v1.2.3


From 53b61f29367df398243b7298ad1e5793c289a493 Mon Sep 17 00:00:00 2001
From: Baowen Zheng <baowen.zheng@corigine.com>
Date: Fri, 26 Mar 2021 14:09:38 +0100
Subject: selftests: forwarding: Add tc-police tests for packets per second

Test tc-police action for packets per second.
The test is mainly in scenarios Rx policing and Tx policing.
The test passes with veth pairs ports.

Signed-off-by: Baowen Zheng <baowen.zheng@corigine.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/lib.sh      |  9 ++++
 .../testing/selftests/net/forwarding/tc_police.sh  | 56 ++++++++++++++++++++++
 2 files changed, 65 insertions(+)

diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 05c05e02bade..42e28c983d41 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -772,6 +772,15 @@ rate()
 	echo $((8 * (t1 - t0) / interval))
 }
 
+packets_rate()
+{
+	local t0=$1; shift
+	local t1=$1; shift
+	local interval=$1; shift
+
+	echo $(((t1 - t0) / interval))
+}
+
 mac_get()
 {
 	local if_name=$1
diff --git a/tools/testing/selftests/net/forwarding/tc_police.sh b/tools/testing/selftests/net/forwarding/tc_police.sh
index 160f9cccdfb7..4f9f17cb45d6 100755
--- a/tools/testing/selftests/net/forwarding/tc_police.sh
+++ b/tools/testing/selftests/net/forwarding/tc_police.sh
@@ -35,6 +35,8 @@ ALL_TESTS="
 	police_shared_test
 	police_rx_mirror_test
 	police_tx_mirror_test
+	police_pps_rx_test
+	police_pps_tx_test
 "
 NUM_NETIFS=6
 source tc_common.sh
@@ -290,6 +292,60 @@ police_tx_mirror_test()
 	police_mirror_common_test $rp2 egress "police tx and mirror"
 }
 
+police_pps_common_test()
+{
+	local test_name=$1; shift
+
+	RET=0
+
+	# Rule to measure bandwidth on ingress of $h2
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+		action drop
+
+	mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
+		-t udp sp=12345,dp=54321 -p 1000 -c 0 -q &
+
+	local t0=$(tc_rule_stats_get $h2 1 ingress .packets)
+	sleep 10
+	local t1=$(tc_rule_stats_get $h2 1 ingress .packets)
+
+	local er=$((2000))
+	local nr=$(packets_rate $t0 $t1 10)
+	local nr_pct=$((100 * (nr - er) / er))
+	((-10 <= nr_pct && nr_pct <= 10))
+	check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%."
+
+	log_test "$test_name"
+
+	{ kill %% && wait %%; } 2>/dev/null
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+}
+
+police_pps_rx_test()
+{
+	# Rule to police traffic destined to $h2 on ingress of $rp1
+	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+		dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+		action police pkts_rate 2000 pkts_burst 400 conform-exceed drop/ok
+
+	police_pps_common_test "police pps on rx"
+
+	tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+}
+
+police_pps_tx_test()
+{
+	# Rule to police traffic destined to $h2 on egress of $rp2
+	tc filter add dev $rp2 egress protocol ip pref 1 handle 101 flower \
+		dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+		action police pkts_rate 2000 pkts_burst 400 conform-exceed drop/ok
+
+	police_pps_common_test "police pps on tx"
+
+	tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
+}
+
 setup_prepare()
 {
 	h1=${NETIFS[p1]}
-- 
cgit v1.2.3


From 214037a146ffb796d3f4b012e818360d2bb29f6b Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 26 Mar 2021 19:28:47 +0000
Subject: drivers: net: smc91x: remove redundant initialization of pointer gpio

The pointer gpio is being initialized with a value that is
never read and it is being updated later with a new value. The
initialization is redundant and can be removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/smsc/smc91x.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index 891b49281bc6..cbde83f620a0 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -2204,7 +2204,7 @@ static int try_toggle_control_gpio(struct device *dev,
 				   const char *name, int index,
 				   int value, unsigned int nsdelay)
 {
-	struct gpio_desc *gpio = *desc;
+	struct gpio_desc *gpio;
 	enum gpiod_flags flags = value ? GPIOD_OUT_LOW : GPIOD_OUT_HIGH;
 
 	gpio = devm_gpiod_get_index_optional(dev, name, index, flags);
-- 
cgit v1.2.3


From ebf893958c131f75c4cfa77e43e8efd67628bd31 Mon Sep 17 00:00:00 2001
From: Lu Wei <luwei32@huawei.com>
Date: Sat, 27 Mar 2021 10:27:22 +0800
Subject: net: rds: Fix a typo

Modify "beween" to "between" in net/rds/send.c.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Lu Wei <luwei32@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/send.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/rds/send.c b/net/rds/send.c
index 985d0b7713ac..53444397de66 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1225,7 +1225,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 		}
 		/* If the socket is already bound to a link local address,
 		 * it can only send to peers on the same link.  But allow
-		 * communicating beween link local and non-link local address.
+		 * communicating between link local and non-link local address.
 		 */
 		if (scope_id != rs->rs_bound_scope_id) {
 			if (!scope_id) {
-- 
cgit v1.2.3


From 21c00a186fac6e035eef5e6751f1e2d2609f969c Mon Sep 17 00:00:00 2001
From: Lu Wei <luwei32@huawei.com>
Date: Sat, 27 Mar 2021 10:27:23 +0800
Subject: net: sctp: Fix some typos

Modify "unkown" to "unknown" in net/sctp/sm_make_chunk.c and
Modify "orginal" to "original" in net/sctp/socket.c.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Lu Wei <luwei32@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_make_chunk.c | 2 +-
 net/sctp/socket.c        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index f77484df097b..54e6a708d06e 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -3217,7 +3217,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc,
 				return false;
 			break;
 		default:
-			/* This is unkown to us, reject! */
+			/* This is unknown to us, reject! */
 			return false;
 		}
 	}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index a710917c5ac7..76a388b5021c 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -9327,7 +9327,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
 	if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
 		net_enable_timestamp();
 
-	/* Set newsk security attributes from orginal sk and connection
+	/* Set newsk security attributes from original sk and connection
 	 * security attribute from ep.
 	 */
 	security_sctp_sk_clone(ep, sk, newsk);
-- 
cgit v1.2.3


From 9195f06b2d0fd0d1cc1552970d890c21f6b9492f Mon Sep 17 00:00:00 2001
From: Lu Wei <luwei32@huawei.com>
Date: Sat, 27 Mar 2021 10:27:24 +0800
Subject: net: vsock: Fix a typo

Modify "occured" to "occurred" in net/vmw_vsock/af_vsock.c.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Lu Wei <luwei32@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/af_vsock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index bc7fb9bf3351..92a72f0e0d94 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1855,7 +1855,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	if (!transport || sk->sk_state != TCP_ESTABLISHED) {
 		/* Recvmsg is supposed to return 0 if a peer performs an
 		 * orderly shutdown. Differentiate between that case and when a
-		 * peer has not connected or a local shutdown occured with the
+		 * peer has not connected or a local shutdown occurred with the
 		 * SOCK_DONE flag.
 		 */
 		if (sock_flag(sk, SOCK_DONE))
-- 
cgit v1.2.3


From de1d1ee3e3e9f028623e7beb4c090a2b68572f10 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Fri, 26 Mar 2021 14:20:22 +0100
Subject: nexthop: Rename artifacts related to legacy multipath nexthop groups

After resilient next-hop groups have been added recently, there are two
types of multipath next-hop groups: the legacy "mpath", and the new
"resilient". Calling the legacy next-hop group type "mpath" is unfortunate,
because that describes the fact that a packet could be forwarded in one of
several paths, which is also true for the resilient next-hop groups.

Therefore, to make the naming clearer, rename various artifacts to reflect
the assumptions made. Therefore as of this patch:

- The flag for multipath groups is nh_grp_entry::is_multipath. This
  includes the legacy and resilient groups, as well as any future group
  types that behave as multipath groups.
  Functions that assume this have "mpath" in the name.

- The flag for legacy multipath groups is nh_grp_entry::hash_threshold.
  Functions that assume this have "hthr" in the name.

- The flag for resilient groups is nh_grp_entry::resilient.
  Functions that assume this have "res" in the name.

Besides the above, struct nh_grp_entry::mpath was renamed to ::hthr as
well.

UAPI artifacts were obviously left intact.

Suggested-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/nexthop.h |  4 ++--
 net/ipv4/nexthop.c    | 56 +++++++++++++++++++++++++--------------------------
 2 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index 28145f714801..10e1777877e6 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -102,7 +102,7 @@ struct nh_grp_entry {
 	union {
 		struct {
 			atomic_t	upper_bound;
-		} mpath;
+		} hthr;
 		struct {
 			/* Member on uw_nh_entries. */
 			struct list_head	uw_nh_entry;
@@ -120,7 +120,7 @@ struct nh_group {
 	struct nh_group		*spare; /* spare group for removals */
 	u16			num_nh;
 	bool			is_multipath;
-	bool			mpath;
+	bool			hash_threshold;
 	bool			resilient;
 	bool			fdb_nh;
 	bool			has_v4;
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index f09fe3a5608f..5a2fc8798d20 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -116,8 +116,8 @@ static void nh_notifier_single_info_fini(struct nh_notifier_info *info)
 	kfree(info->nh);
 }
 
-static int nh_notifier_mp_info_init(struct nh_notifier_info *info,
-				    struct nh_group *nhg)
+static int nh_notifier_mpath_info_init(struct nh_notifier_info *info,
+				       struct nh_group *nhg)
 {
 	u16 num_nh = nhg->num_nh;
 	int i;
@@ -181,8 +181,8 @@ static int nh_notifier_grp_info_init(struct nh_notifier_info *info,
 {
 	struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
 
-	if (nhg->mpath)
-		return nh_notifier_mp_info_init(info, nhg);
+	if (nhg->hash_threshold)
+		return nh_notifier_mpath_info_init(info, nhg);
 	else if (nhg->resilient)
 		return nh_notifier_res_table_info_init(info, nhg);
 	return -EINVAL;
@@ -193,7 +193,7 @@ static void nh_notifier_grp_info_fini(struct nh_notifier_info *info,
 {
 	struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
 
-	if (nhg->mpath)
+	if (nhg->hash_threshold)
 		kfree(info->nh_grp);
 	else if (nhg->resilient)
 		vfree(info->nh_res_table);
@@ -406,7 +406,7 @@ static int call_nexthop_res_table_notifiers(struct net *net, struct nexthop *nh,
 	 * could potentially veto it in case of unsupported configuration.
 	 */
 	nhg = rtnl_dereference(nh->nh_grp);
-	err = nh_notifier_mp_info_init(&info, nhg);
+	err = nh_notifier_mpath_info_init(&info, nhg);
 	if (err) {
 		NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info");
 		return err;
@@ -661,7 +661,7 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
 	u16 group_type = 0;
 	int i;
 
-	if (nhg->mpath)
+	if (nhg->hash_threshold)
 		group_type = NEXTHOP_GRP_TYPE_MPATH;
 	else if (nhg->resilient)
 		group_type = NEXTHOP_GRP_TYPE_RES;
@@ -992,9 +992,9 @@ static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
 		struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
 
 		/* Nesting groups within groups is not supported. */
-		if (nhg->mpath) {
+		if (nhg->hash_threshold) {
 			NL_SET_ERR_MSG(extack,
-				       "Multipath group can not be a nexthop within a group");
+				       "Hash-threshold group can not be a nexthop within a group");
 			return false;
 		}
 		if (nhg->resilient) {
@@ -1151,7 +1151,7 @@ static bool ipv4_good_nh(const struct fib_nh *nh)
 	return !!(state & NUD_VALID);
 }
 
-static struct nexthop *nexthop_select_path_mp(struct nh_group *nhg, int hash)
+static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
 {
 	struct nexthop *rc = NULL;
 	int i;
@@ -1160,7 +1160,7 @@ static struct nexthop *nexthop_select_path_mp(struct nh_group *nhg, int hash)
 		struct nh_grp_entry *nhge = &nhg->nh_entries[i];
 		struct nh_info *nhi;
 
-		if (hash > atomic_read(&nhge->mpath.upper_bound))
+		if (hash > atomic_read(&nhge->hthr.upper_bound))
 			continue;
 
 		nhi = rcu_dereference(nhge->nh->nh_info);
@@ -1212,8 +1212,8 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
 		return nh;
 
 	nhg = rcu_dereference(nh->nh_grp);
-	if (nhg->mpath)
-		return nexthop_select_path_mp(nhg, hash);
+	if (nhg->hash_threshold)
+		return nexthop_select_path_hthr(nhg, hash);
 	else if (nhg->resilient)
 		return nexthop_select_path_res(nhg, hash);
 
@@ -1710,7 +1710,7 @@ static void replace_nexthop_grp_res(struct nh_group *oldg,
 	nh_res_table_upkeep(old_res_table, true, false);
 }
 
-static void nh_mp_group_rebalance(struct nh_group *nhg)
+static void nh_hthr_group_rebalance(struct nh_group *nhg)
 {
 	int total = 0;
 	int w = 0;
@@ -1725,7 +1725,7 @@ static void nh_mp_group_rebalance(struct nh_group *nhg)
 
 		w += nhge->weight;
 		upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1;
-		atomic_set(&nhge->mpath.upper_bound, upper_bound);
+		atomic_set(&nhge->hthr.upper_bound, upper_bound);
 	}
 }
 
@@ -1752,7 +1752,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
 
 	newg->has_v4 = false;
 	newg->is_multipath = nhg->is_multipath;
-	newg->mpath = nhg->mpath;
+	newg->hash_threshold = nhg->hash_threshold;
 	newg->resilient = nhg->resilient;
 	newg->fdb_nh = nhg->fdb_nh;
 	newg->num_nh = nhg->num_nh;
@@ -1781,8 +1781,8 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
 		j++;
 	}
 
-	if (newg->mpath)
-		nh_mp_group_rebalance(newg);
+	if (newg->hash_threshold)
+		nh_hthr_group_rebalance(newg);
 	else if (newg->resilient)
 		replace_nexthop_grp_res(nhg, newg);
 
@@ -1794,7 +1794,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
 	/* Removal of a NH from a resilient group is notified through
 	 * bucket notifications.
 	 */
-	if (newg->mpath) {
+	if (newg->hash_threshold) {
 		err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp,
 					     &extack);
 		if (err)
@@ -1928,12 +1928,12 @@ static int replace_nexthop_grp(struct net *net, struct nexthop *old,
 	oldg = rtnl_dereference(old->nh_grp);
 	newg = rtnl_dereference(new->nh_grp);
 
-	if (newg->mpath != oldg->mpath) {
+	if (newg->hash_threshold != oldg->hash_threshold) {
 		NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with one of a different type.");
 		return -EINVAL;
 	}
 
-	if (newg->mpath) {
+	if (newg->hash_threshold) {
 		err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new,
 					     extack);
 		if (err)
@@ -2063,7 +2063,7 @@ static int replace_nexthop_single_notify(struct net *net,
 	struct nh_group *nhg = rtnl_dereference(group_nh->nh_grp);
 	struct nh_res_table *res_table;
 
-	if (nhg->mpath) {
+	if (nhg->hash_threshold) {
 		return call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE,
 					      group_nh, extack);
 	} else if (nhg->resilient) {
@@ -2328,8 +2328,8 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh,
 	rb_link_node_rcu(&new_nh->rb_node, parent, pp);
 	rb_insert_color(&new_nh->rb_node, root);
 
-	/* The initial insertion is a full notification for mpath as well
-	 * as resilient groups.
+	/* The initial insertion is a full notification for hash-threshold as
+	 * well as resilient groups.
 	 */
 	rc = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new_nh, extack);
 	if (rc)
@@ -2438,7 +2438,7 @@ static struct nexthop *nexthop_create_group(struct net *net,
 	}
 
 	if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) {
-		nhg->mpath = 1;
+		nhg->hash_threshold = 1;
 		nhg->is_multipath = true;
 	} else if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_RES) {
 		struct nh_res_table *res_table;
@@ -2455,10 +2455,10 @@ static struct nexthop *nexthop_create_group(struct net *net,
 		nhg->is_multipath = true;
 	}
 
-	WARN_ON_ONCE(nhg->mpath + nhg->resilient != 1);
+	WARN_ON_ONCE(nhg->hash_threshold + nhg->resilient != 1);
 
-	if (nhg->mpath)
-		nh_mp_group_rebalance(nhg);
+	if (nhg->hash_threshold)
+		nh_hthr_group_rebalance(nhg);
 
 	if (cfg->nh_fdb)
 		nhg->fdb_nh = 1;
-- 
cgit v1.2.3


From 54422bd436e084e6c74aff6026c1767f1570ab26 Mon Sep 17 00:00:00 2001
From: Liu Jian <liujian56@huawei.com>
Date: Sat, 27 Mar 2021 12:33:39 +0800
Subject: net: hns3: no return statement in hclge_clear_arfs_rules

drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c: In function 'hclge_clear_arfs_rules':
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c:7173:1: error: no return statement in function returning non-void [-Werror=return-type]
 7173 | }
      | ^
cc1: some warnings being treated as errors
make[6]: *** [scripts/Makefile.build:273: drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.o] Error 1
make[5]: *** [scripts/Makefile.build:534: drivers/net/ethernet/hisilicon/hns3/hns3pf] Error 2
make[4]: *** [scripts/Makefile.build:534: drivers/net/ethernet/hisilicon/hns3] Error 2
make[4]: *** Waiting for unfinished jobs....
make[3]: *** [scripts/Makefile.build:534: drivers/net/ethernet/hisilicon] Error 2
make[2]: *** [scripts/Makefile.build:534: drivers/net/ethernet] Error 2
make[1]: *** [scripts/Makefile.build:534: drivers/net] Error 2
make[1]: *** Waiting for unfinished jobs....
make: *** [Makefile:1980: drivers] Error 2

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Liu Jian <liujian56@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index d63951993000..d964e2c527f0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -7133,8 +7133,8 @@ static int hclge_clear_arfs_rules(struct hclge_dev *hdev)
 	}
 	hclge_sync_fd_state(hdev);
 
-	return 0;
 #endif
+	return 0;
 }
 
 static void hclge_get_cls_key_basic(const struct flow_rule *flow,
-- 
cgit v1.2.3


From 4732315ca9fe9f9f9327d6e4b0a2140446e9c48c Mon Sep 17 00:00:00 2001
From: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Date: Fri, 26 Mar 2021 23:07:52 -0700
Subject: net: dsa: mt7530: clean up core and TRGMII clock setup

Three minor changes:

- When disabling PLL, there is no need to call core_write_mmd_indirect
  directly, use the core_write wrapper instead like the rest of the code
  in the function does. This change helps with consistency and
  readability. Move the comment to the definition of
  core_read_mmd_indirect where it belongs.

- Disable both core and TRGMII Tx clocks prior to reconfiguring.
  Previously, only the core clock was disabled, but not TRGMII Tx clock.
  So disable both, then configure them, then re-enable both, for
  consistency.

- The core clock enable bit (REG_GSWCK_EN) is written redundantly three
  times. Simplify the code and only write the register only once at the
  end of clock reconfiguration to enable both core and TRGMII Tx clocks.

Tested on Ubiquiti ER-X running the GMAC0 and MT7530 in TRGMII mode.

Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mt7530.c | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index c442a5885fca..2bd1bab71497 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -67,6 +67,11 @@ static const struct mt7530_mib_desc mt7530_mib[] = {
 	MIB_DESC(1, 0xb8, "RxArlDrop"),
 };
 
+/* Since phy_device has not yet been created and
+ * phy_{read,write}_mmd_indirect is not available, we provide our own
+ * core_{read,write}_mmd_indirect with core_{clear,write,set} wrappers
+ * to complete this function.
+ */
 static int
 core_read_mmd_indirect(struct mt7530_priv *priv, int prtad, int devad)
 {
@@ -435,19 +440,13 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface)
 		mt7530_write(priv, MT7530_TRGMII_TD_ODT(i),
 			     TD_DM_DRVP(8) | TD_DM_DRVN(8));
 
-	/* Setup core clock for MT7530 */
-	/* Disable MT7530 core clock */
-	core_clear(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN);
+	/* Disable MT7530 core and TRGMII Tx clocks */
+	core_clear(priv, CORE_TRGMII_GSW_CLK_CG,
+		   REG_GSWCK_EN | REG_TRGMIICK_EN);
 
-	/* Disable PLL, since phy_device has not yet been created
-	 * provided for phy_[read,write]_mmd_indirect is called, we
-	 * provide our own core_write_mmd_indirect to complete this
-	 * function.
-	 */
-	core_write_mmd_indirect(priv,
-				CORE_GSWPLL_GRP1,
-				MDIO_MMD_VEND2,
-				0);
+	/* Setup core clock for MT7530 */
+	/* Disable PLL */
+	core_write(priv, CORE_GSWPLL_GRP1, 0);
 
 	/* Set core clock into 500Mhz */
 	core_write(priv, CORE_GSWPLL_GRP2,
@@ -460,11 +459,7 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface)
 		   RG_GSWPLL_POSDIV_200M(2) |
 		   RG_GSWPLL_FBKDIV_200M(32));
 
-	/* Enable MT7530 core clock */
-	core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN);
-
 	/* Setup the MT7530 TRGMII Tx Clock */
-	core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN);
 	core_write(priv, CORE_PLL_GROUP5, RG_LCDDS_PCW_NCPO1(ncpo1));
 	core_write(priv, CORE_PLL_GROUP6, RG_LCDDS_PCW_NCPO0(0));
 	core_write(priv, CORE_PLL_GROUP10, RG_LCDDS_SSC_DELTA(ssc_delta));
@@ -478,6 +473,8 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface)
 	core_write(priv, CORE_PLL_GROUP7,
 		   RG_LCDDS_PCW_NCPO_CHG | RG_LCCDS_C(3) |
 		   RG_LCDDS_PWDB | RG_LCDDS_ISO_EN);
+
+	/* Enable MT7530 core and TRGMII Tx clocks */
 	core_set(priv, CORE_TRGMII_GSW_CLK_CG,
 		 REG_GSWCK_EN | REG_TRGMIICK_EN);
 
-- 
cgit v1.2.3


From 37569287cba1246a5057de32ac42d6c8941c714b Mon Sep 17 00:00:00 2001
From: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Date: Sat, 27 Mar 2021 16:15:48 +0800
Subject: l3mdev: Correct function names in the kerneldoc comments

Fix the following W=1 kernel build warning(s):

 net/l3mdev/l3mdev.c:111: warning: expecting prototype for l3mdev_master_ifindex(). Prototype was for l3mdev_master_ifindex_rcu() instead
 net/l3mdev/l3mdev.c:145: warning: expecting prototype for l3mdev_master_upper_ifindex_by_index(). Prototype was for l3mdev_master_upper_ifindex_by_index_rcu() instead

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l3mdev/l3mdev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index ad7730b68772..17927966abb3 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -103,7 +103,7 @@ unlock:
 EXPORT_SYMBOL_GPL(l3mdev_ifindex_lookup_by_table_id);
 
 /**
- *	l3mdev_master_ifindex - get index of L3 master device
+ *	l3mdev_master_ifindex_rcu - get index of L3 master device
  *	@dev: targeted interface
  */
 
@@ -136,7 +136,7 @@ int l3mdev_master_ifindex_rcu(const struct net_device *dev)
 EXPORT_SYMBOL_GPL(l3mdev_master_ifindex_rcu);
 
 /**
- *	l3mdev_master_upper_ifindex_by_index - get index of upper l3 master
+ *	l3mdev_master_upper_ifindex_by_index_rcu - get index of upper l3 master
  *					       device
  *	@net: network namespace for device index lookup
  *	@ifindex: targeted interface
-- 
cgit v1.2.3


From 3ba937fb95e877f73b33c5b482f303dd9a8bf4fa Mon Sep 17 00:00:00 2001
From: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Date: Sat, 27 Mar 2021 16:15:49 +0800
Subject: netlabel: Correct function name netlbl_mgmt_add() in the kerneldoc
 comments

Fix the following W=1 kernel build warning(s):

 net/netlabel/netlabel_mgmt.c:78: warning: expecting prototype for netlbl_mgmt_add(). Prototype was for netlbl_mgmt_add_common() instead

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlabel/netlabel_mgmt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index df1b41ed73fd..ca52f5085989 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -63,7 +63,7 @@ static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = {
  */
 
 /**
- * netlbl_mgmt_add - Handle an ADD message
+ * netlbl_mgmt_add_common - Handle an ADD message
  * @info: the Generic NETLINK info block
  * @audit_info: NetLabel audit information
  *
-- 
cgit v1.2.3


From af825087433fb94a431edf387c1265463fce3bd1 Mon Sep 17 00:00:00 2001
From: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Date: Sat, 27 Mar 2021 16:15:50 +0800
Subject: net: core: Correct function name dev_uc_flush() in the kerneldoc

Fix the following W=1 kernel build warning(s):

 net/core/dev_addr_lists.c:732: warning: expecting prototype for dev_uc_flush(). Prototype was for dev_uc_init() instead

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev_addr_lists.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 1e5bde241185..45ae6eeb2964 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -723,7 +723,7 @@ void dev_uc_flush(struct net_device *dev)
 EXPORT_SYMBOL(dev_uc_flush);
 
 /**
- *	dev_uc_flush - Init unicast address list
+ *	dev_uc_init - Init unicast address list
  *	@dev: device
  *
  *	Init unicast address list.
-- 
cgit v1.2.3


From bb2882bc6c54672b4c57a2108a18ec3acc7c878c Mon Sep 17 00:00:00 2001
From: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Date: Sat, 27 Mar 2021 16:15:51 +0800
Subject: net: core: Correct function name netevent_unregister_notifier() in
 the kerneldoc

Fix the following W=1 kernel build warning(s):

 net/core/netevent.c:45: warning: expecting prototype for netevent_unregister_notifier(). Prototype was for unregister_netevent_notifier() instead

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netevent.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/netevent.c b/net/core/netevent.c
index d76ed7739c70..5bb615e963cc 100644
--- a/net/core/netevent.c
+++ b/net/core/netevent.c
@@ -32,7 +32,7 @@ int register_netevent_notifier(struct notifier_block *nb)
 EXPORT_SYMBOL_GPL(register_netevent_notifier);
 
 /**
- *	netevent_unregister_notifier - unregister a netevent notifier block
+ *	unregister_netevent_notifier - unregister a netevent notifier block
  *	@nb: notifier
  *
  *	Unregister a notifier previously registered by
-- 
cgit v1.2.3


From 8bf94a92505e6e9d46a76230b4900238685aa2ae Mon Sep 17 00:00:00 2001
From: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Date: Sat, 27 Mar 2021 16:15:52 +0800
Subject: net: 9p: Correct function name errstr2errno() in the kerneldoc
 comments

Fix the following W=1 kernel build warning(s):

 net/9p/error.c:207: warning: expecting prototype for errstr2errno(). Prototype was for p9_errstr2errno() instead

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/9p/error.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/9p/error.c b/net/9p/error.c
index 231f355fa9c6..61c18daf3050 100644
--- a/net/9p/error.c
+++ b/net/9p/error.c
@@ -197,7 +197,7 @@ int p9_error_init(void)
 EXPORT_SYMBOL(p9_error_init);
 
 /**
- * errstr2errno - convert error string to error number
+ * p9_errstr2errno - convert error string to error number
  * @errstr: error string
  * @len: length of error string
  *
-- 
cgit v1.2.3


From 54e625e3bd1dd59f6d0b95730fa9be2604aceb1c Mon Sep 17 00:00:00 2001
From: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Date: Sat, 27 Mar 2021 16:15:53 +0800
Subject: 9p/trans_fd: Correct function name p9_mux_destroy() in the kerneldoc

Fix the following W=1 kernel build warning(s):

 net/9p/trans_fd.c:881: warning: expecting prototype for p9_mux_destroy(). Prototype was for p9_conn_destroy() instead

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/9p/trans_fd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index fa158397bb63..f4dd0456beaf 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -872,7 +872,7 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket)
 }
 
 /**
- * p9_mux_destroy - cancels all pending requests of mux
+ * p9_conn_destroy - cancels all pending requests of mux
  * @m: mux to destroy
  *
  */
-- 
cgit v1.2.3


From 03ff7371cba41903b9f53617a44093051ead3fe7 Mon Sep 17 00:00:00 2001
From: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Date: Sat, 27 Mar 2021 16:15:54 +0800
Subject: net: 9p: Correct function names in the kerneldoc comments

Fix the following W=1 kernel build warning(s):

 net/9p/client.c:133: warning: expecting prototype for parse_options(). Prototype was for parse_opts() instead
 net/9p/client.c:269: warning: expecting prototype for p9_req_alloc(). Prototype was for p9_tag_alloc() instead

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/9p/client.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/9p/client.c b/net/9p/client.c
index 0a9019da18f3..b7b958f61faf 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -122,7 +122,7 @@ static int get_protocol_version(char *s)
 }
 
 /**
- * parse_options - parse mount options into client structure
+ * parse_opts - parse mount options into client structure
  * @opts: options string passed from mount
  * @clnt: existing v9fs client information
  *
@@ -256,7 +256,7 @@ EXPORT_SYMBOL(p9_fcall_fini);
 static struct kmem_cache *p9_req_cache;
 
 /**
- * p9_req_alloc - Allocate a new request.
+ * p9_tag_alloc - Allocate a new request.
  * @c: Client session.
  * @type: Transaction type.
  * @max_size: Maximum packet size for this request.
-- 
cgit v1.2.3


From f7b88985a1ae71d302596c2f65c6e22eea207fc8 Mon Sep 17 00:00:00 2001
From: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Date: Sat, 27 Mar 2021 16:15:55 +0800
Subject: ip6_tunnel:: Correct function name parse_tvl_tnl_enc_lim() in the
 kerneldoc comments

Fix the following W=1 kernel build warning(s):

 net/ipv6/ip6_tunnel.c:401: warning: expecting prototype for parse_tvl_tnl_enc_lim(). Prototype was for ip6_tnl_parse_tlv_enc_lim() instead

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 3fa0eca5a06f..cd78f5b2cd75 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -388,7 +388,7 @@ ip6_tnl_dev_uninit(struct net_device *dev)
 }
 
 /**
- * parse_tvl_tnl_enc_lim - handle encapsulation limit option
+ * ip6_tnl_parse_tlv_enc_lim - handle encapsulation limit option
  *   @skb: received socket buffer
  *   @raw: the ICMPv6 error message data
  *
-- 
cgit v1.2.3


From b6908cf795e9687d6323834cf5c6c67a52f64464 Mon Sep 17 00:00:00 2001
From: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Date: Sat, 27 Mar 2021 16:15:56 +0800
Subject: NFC: digital: Correct function name in the kerneldoc comments

Fix the following W=1 kernel build warning(s):

 net/nfc/digital_core.c:473: warning: expecting prototype for start_poll operation(). Prototype was for digital_start_poll() instead

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/nfc/digital_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index da7e2112771f..5044c7db577e 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -457,7 +457,7 @@ static void digital_add_poll_tech(struct nfc_digital_dev *ddev, u8 rf_tech,
 }
 
 /**
- * start_poll operation
+ * digital_start_poll - start_poll operation
  * @nfc_dev: device to be polled
  * @im_protocols: bitset of nfc initiator protocols to be used for polling
  * @tm_protocols: bitset of nfc transport protocols to be used for polling
-- 
cgit v1.2.3


From 284fda1eff8a8b27d2cafd7dc8fb423d13720f21 Mon Sep 17 00:00:00 2001
From: kernel test robot <lkp@intel.com>
Date: Sat, 27 Mar 2021 10:29:32 +0100
Subject: sit: use min

Opportunity for min()

Generated by: scripts/coccinelle/misc/minmax.cocci

CC: Denis Efremov <efremov@linux.com>
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: kernel test robot <lkp@intel.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 63ccd9f2dccc..b9bd2723f89a 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -325,7 +325,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr)
 
 	rcu_read_lock();
 
-	ca = t->prl_count < cmax ? t->prl_count : cmax;
+	ca = min(t->prl_count, cmax);
 
 	if (!kp) {
 		/* We don't try hard to allocate much memory for
-- 
cgit v1.2.3


From 30f347ae7cc1178c431f968a89d4b4a375bc0d39 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Sat, 27 Mar 2021 17:33:22 +0800
Subject: net: stmmac: fix missing unlock on error in stmmac_suspend()

Add the missing unlock before return from stmmac_suspend()
in the error handling case.

Fixes: 5ec55823438e ("net: stmmac: add clocks management for gmac driver")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 3a4f0c2d42a3..d34388b1ffcc 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -5964,8 +5964,10 @@ int stmmac_suspend(struct device *dev)
 		/* Disable clock in case of PWM is off */
 		clk_disable_unprepare(priv->plat->clk_ptp_ref);
 		ret = pm_runtime_force_suspend(dev);
-		if (ret)
+		if (ret) {
+			mutex_unlock(&priv->lock);
 			return ret;
+		}
 	}
 
 	mutex_unlock(&priv->lock);
-- 
cgit v1.2.3


From 8d93a4f9ccfdeba2c6a2b6d1e070e4974734fe8c Mon Sep 17 00:00:00 2001
From: Junlin Yang <yangjunlin@yulong.com>
Date: Sat, 27 Mar 2021 17:56:17 +0800
Subject: mt76: Convert to DEFINE_SHOW_ATTRIBUTE

Use DEFINE_SHOW_ATTRIBUTE macro to simplify the code.

Signed-off-by: Junlin Yang <yangjunlin@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/wireless/mediatek/mt76/mt7915/debugfs.c    | 36 ++++------------------
 .../net/wireless/mediatek/mt76/mt7921/debugfs.c    | 18 ++---------
 2 files changed, 9 insertions(+), 45 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
index 77dcd71e49a5..7bef36feb9c7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
@@ -192,7 +192,7 @@ mt7915_txbf_stat_read_phy(struct mt7915_phy *phy, struct seq_file *s)
 }
 
 static int
-mt7915_tx_stats_read(struct seq_file *file, void *data)
+mt7915_tx_stats_show(struct seq_file *file, void *data)
 {
 	struct mt7915_dev *dev = file->private;
 	int stat[8], i, n;
@@ -222,19 +222,7 @@ mt7915_tx_stats_read(struct seq_file *file, void *data)
 	return 0;
 }
 
-static int
-mt7915_tx_stats_open(struct inode *inode, struct file *f)
-{
-	return single_open(f, mt7915_tx_stats_read, inode->i_private);
-}
-
-static const struct file_operations fops_tx_stats = {
-	.open = mt7915_tx_stats_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-	.owner = THIS_MODULE,
-};
+DEFINE_SHOW_ATTRIBUTE(mt7915_tx_stats);
 
 static int mt7915_read_temperature(struct seq_file *s, void *data)
 {
@@ -379,7 +367,7 @@ int mt7915_init_debugfs(struct mt7915_dev *dev)
 				    mt7915_queues_read);
 	debugfs_create_devm_seqfile(dev->mt76.dev, "acq", dir,
 				    mt7915_queues_acq);
-	debugfs_create_file("tx_stats", 0400, dir, dev, &fops_tx_stats);
+	debugfs_create_file("tx_stats", 0400, dir, dev, &mt7915_tx_stats_fops);
 	debugfs_create_file("fw_debug", 0600, dir, dev, &fops_fw_debug);
 	debugfs_create_file("implicit_txbf", 0600, dir, dev,
 			    &fops_implicit_txbf);
@@ -412,7 +400,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_fixed_rate, NULL,
 			 mt7915_sta_fixed_rate_set, "%llx\n");
 
 static int
-mt7915_sta_stats_read(struct seq_file *s, void *data)
+mt7915_sta_stats_show(struct seq_file *s, void *data)
 {
 	struct ieee80211_sta *sta = s->private;
 	struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv;
@@ -455,24 +443,12 @@ mt7915_sta_stats_read(struct seq_file *s, void *data)
 	return 0;
 }
 
-static int
-mt7915_sta_stats_open(struct inode *inode, struct file *f)
-{
-	return single_open(f, mt7915_sta_stats_read, inode->i_private);
-}
-
-static const struct file_operations fops_sta_stats = {
-	.open = mt7915_sta_stats_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-	.owner = THIS_MODULE,
-};
+DEFINE_SHOW_ATTRIBUTE(mt7915_sta_stats);
 
 void mt7915_sta_add_debugfs(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			    struct ieee80211_sta *sta, struct dentry *dir)
 {
 	debugfs_create_file("fixed_rate", 0600, dir, sta, &fops_fixed_rate);
-	debugfs_create_file("stats", 0400, dir, sta, &fops_sta_stats);
+	debugfs_create_file("stats", 0400, dir, sta, &mt7915_sta_stats_fops);
 }
 #endif
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
index 0dc8e25e18e4..c1a64ff6a197 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
@@ -62,7 +62,7 @@ mt7921_ampdu_stat_read_phy(struct mt7921_phy *phy,
 }
 
 static int
-mt7921_tx_stats_read(struct seq_file *file, void *data)
+mt7921_tx_stats_show(struct seq_file *file, void *data)
 {
 	struct mt7921_dev *dev = file->private;
 	int stat[8], i, n;
@@ -88,19 +88,7 @@ mt7921_tx_stats_read(struct seq_file *file, void *data)
 	return 0;
 }
 
-static int
-mt7921_tx_stats_open(struct inode *inode, struct file *f)
-{
-	return single_open(f, mt7921_tx_stats_read, inode->i_private);
-}
-
-static const struct file_operations fops_tx_stats = {
-	.open = mt7921_tx_stats_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-	.owner = THIS_MODULE,
-};
+DEFINE_SHOW_ATTRIBUTE(mt7921_tx_stats);
 
 static int
 mt7921_queues_acq(struct seq_file *s, void *data)
@@ -239,7 +227,7 @@ int mt7921_init_debugfs(struct mt7921_dev *dev)
 				    mt7921_queues_read);
 	debugfs_create_devm_seqfile(dev->mt76.dev, "acq", dir,
 				    mt7921_queues_acq);
-	debugfs_create_file("tx_stats", 0400, dir, dev, &fops_tx_stats);
+	debugfs_create_file("tx_stats", 0400, dir, dev, &mt7921_tx_stats_fops);
 	debugfs_create_file("fw_debug", 0600, dir, dev, &fops_fw_debug);
 	debugfs_create_file("runtime-pm", 0600, dir, dev, &fops_pm);
 	debugfs_create_file("idle-timeout", 0600, dir, dev,
-- 
cgit v1.2.3


From 8e99ca3fdb31051372b9e8f3a563e59147e0ee10 Mon Sep 17 00:00:00 2001
From: Guobin Huang <huangguobin4@huawei.com>
Date: Sat, 27 Mar 2021 17:56:18 +0800
Subject: mt76: mt7615: remove redundant dev_err call in mt7622_wmac_probe()

There is a error message within devm_ioremap_resource
already, so remove the dev_err call to avoid redundant
error message.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Guobin Huang <huangguobin4@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wireless/mediatek/mt76/mt7615/soc.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/soc.c b/drivers/net/wireless/mediatek/mt76/mt7615/soc.c
index 9aa5183c7a56..be9a69fe1b38 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/soc.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/soc.c
@@ -40,10 +40,8 @@ static int mt7622_wmac_probe(struct platform_device *pdev)
 		return irq;
 
 	mem_base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(mem_base)) {
-		dev_err(&pdev->dev, "Failed to get memory resource\n");
+	if (IS_ERR(mem_base))
 		return PTR_ERR(mem_base);
-	}
 
 	return mt7615_mmio_probe(&pdev->dev, mem_base, irq, mt7615e_reg_map);
 }
-- 
cgit v1.2.3


From 656151aaa623c20bbfcbd5b4e43acbea8d23410f Mon Sep 17 00:00:00 2001
From: Guobin Huang <huangguobin4@huawei.com>
Date: Sat, 27 Mar 2021 18:07:18 +0800
Subject: net: dsa: hellcreek: Remove redundant dev_err call in
 hellcreek_probe()

There is a error message within devm_ioremap_resource
already, so remove the dev_err call to avoid redundant
error message.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Guobin Huang <huangguobin4@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/hirschmann/hellcreek.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c
index 918be7eb626f..4d78219da253 100644
--- a/drivers/net/dsa/hirschmann/hellcreek.c
+++ b/drivers/net/dsa/hirschmann/hellcreek.c
@@ -1842,10 +1842,8 @@ static int hellcreek_probe(struct platform_device *pdev)
 	}
 
 	hellcreek->base = devm_ioremap_resource(dev, res);
-	if (IS_ERR(hellcreek->base)) {
-		dev_err(dev, "No memory available!\n");
+	if (IS_ERR(hellcreek->base))
 		return PTR_ERR(hellcreek->base);
-	}
 
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ptp");
 	if (!res) {
@@ -1854,10 +1852,8 @@ static int hellcreek_probe(struct platform_device *pdev)
 	}
 
 	hellcreek->ptp_base = devm_ioremap_resource(dev, res);
-	if (IS_ERR(hellcreek->ptp_base)) {
-		dev_err(dev, "No memory available!\n");
+	if (IS_ERR(hellcreek->ptp_base))
 		return PTR_ERR(hellcreek->ptp_base);
-	}
 
 	ret = hellcreek_detect(hellcreek);
 	if (ret) {
-- 
cgit v1.2.3


From d759c1bd2696c8d72d0f824eb32eadac6eb210c8 Mon Sep 17 00:00:00 2001
From: Guobin Huang <huangguobin4@huawei.com>
Date: Sat, 27 Mar 2021 18:31:51 +0800
Subject: net: lantiq: Remove redundant dev_err call in xrx200_probe()

There is a error message within devm_ioremap_resource
already, so remove the dev_err call to avoid redundant
error message.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Guobin Huang <huangguobin4@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/lantiq_xrx200.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c
index 51ed8a54d380..0f8ef8f1232c 100644
--- a/drivers/net/ethernet/lantiq_xrx200.c
+++ b/drivers/net/ethernet/lantiq_xrx200.c
@@ -460,10 +460,8 @@ static int xrx200_probe(struct platform_device *pdev)
 	}
 
 	priv->pmac_reg = devm_ioremap_resource(dev, res);
-	if (IS_ERR(priv->pmac_reg)) {
-		dev_err(dev, "failed to request and remap io ranges\n");
+	if (IS_ERR(priv->pmac_reg))
 		return PTR_ERR(priv->pmac_reg);
-	}
 
 	priv->chan_rx.dma.irq = platform_get_irq_byname(pdev, "rx");
 	if (priv->chan_rx.dma.irq < 0)
-- 
cgit v1.2.3


From 9d0365448b5b954bba1b551ade5b273d629446bb Mon Sep 17 00:00:00 2001
From: Guobin Huang <huangguobin4@huawei.com>
Date: Sat, 27 Mar 2021 18:37:54 +0800
Subject: net: moxa: remove redundant dev_err call in moxart_mac_probe()

There is a error message within devm_ioremap_resource
already, so remove the dev_err call to avoid redundant
error message.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Guobin Huang <huangguobin4@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/moxa/moxart_ether.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c
index 49fd843c4c8a..b85733942053 100644
--- a/drivers/net/ethernet/moxa/moxart_ether.c
+++ b/drivers/net/ethernet/moxa/moxart_ether.c
@@ -485,7 +485,6 @@ static int moxart_mac_probe(struct platform_device *pdev)
 	ndev->base_addr = res->start;
 	priv->base = devm_ioremap_resource(p_dev, res);
 	if (IS_ERR(priv->base)) {
-		dev_err(p_dev, "devm_ioremap_resource failed\n");
 		ret = PTR_ERR(priv->base);
 		goto init_fail;
 	}
-- 
cgit v1.2.3


From 862d3f2c9bd100198fb1b0f7e7e155541b98e2bf Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Sun, 28 Mar 2021 12:31:05 -0500
Subject: net: ipa: fix all kernel-doc warnings

Fix all warnings produced when running:
  scripts/kernel-doc -none drivers/net/ipa/*.[ch]

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/gsi_private.h   |  2 +-
 drivers/net/ipa/gsi_trans.h     |  5 +++--
 drivers/net/ipa/ipa.h           |  7 ++++---
 drivers/net/ipa/ipa_cmd.h       | 19 +++++++++++++------
 drivers/net/ipa/ipa_endpoint.h  | 18 +++++++++++++++---
 drivers/net/ipa/ipa_interrupt.h |  1 +
 drivers/net/ipa/ipa_mem.h       |  2 +-
 drivers/net/ipa/ipa_qmi.h       | 14 +++++++++-----
 drivers/net/ipa/ipa_smp2p.h     |  2 +-
 drivers/net/ipa/ipa_table.h     |  3 ++-
 10 files changed, 50 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ipa/gsi_private.h b/drivers/net/ipa/gsi_private.h
index 1785c9d3344d..ed7bc26f85e9 100644
--- a/drivers/net/ipa/gsi_private.h
+++ b/drivers/net/ipa/gsi_private.h
@@ -100,7 +100,7 @@ void gsi_channel_doorbell(struct gsi_channel *channel);
 /**
  * gsi_ring_virt() - Return virtual address for a ring entry
  * @ring:	Ring whose address is to be translated
- * @addr:	Index (slot number) of entry
+ * @index:	Index (slot number) of entry
  */
 void *gsi_ring_virt(struct gsi_ring *ring, u32 index);
 
diff --git a/drivers/net/ipa/gsi_trans.h b/drivers/net/ipa/gsi_trans.h
index 3a4ab8a94d82..17fd1822d8a9 100644
--- a/drivers/net/ipa/gsi_trans.h
+++ b/drivers/net/ipa/gsi_trans.h
@@ -71,7 +71,7 @@ struct gsi_trans {
 
 /**
  * gsi_trans_pool_init() - Initialize a pool of structures for transactions
- * @gsi:	GSI pointer
+ * @pool:	GSI transaction poll pointer
  * @size:	Size of elements in the pool
  * @count:	Minimum number of elements in the pool
  * @max_alloc:	Maximum number of elements allocated at a time from pool
@@ -123,7 +123,8 @@ int gsi_trans_pool_init_dma(struct device *dev, struct gsi_trans_pool *pool,
 void *gsi_trans_pool_alloc_dma(struct gsi_trans_pool *pool, dma_addr_t *addr);
 
 /**
- * gsi_trans_pool_exit() - Inverse of gsi_trans_pool_init()
+ * gsi_trans_pool_exit_dma() - Inverse of gsi_trans_pool_init_dma()
+ * @dev:	Device used for DMA
  * @pool:	Pool pointer
  */
 void gsi_trans_pool_exit_dma(struct device *dev, struct gsi_trans_pool *pool);
diff --git a/drivers/net/ipa/ipa.h b/drivers/net/ipa/ipa.h
index 802077631371..e7ff376cb5b7 100644
--- a/drivers/net/ipa/ipa.h
+++ b/drivers/net/ipa/ipa.h
@@ -44,6 +44,8 @@ enum ipa_flag {
  * @version:		IPA hardware version
  * @pdev:		Platform device
  * @completion:		Used to signal pipeline clear transfer complete
+ * @nb:			Notifier block used for remoteproc SSR
+ * @notifier:		Remoteproc SSR notifier
  * @smp2p:		SMP2P information
  * @clock:		IPA clocking information
  * @table_addr:		DMA address of filter/route table content
@@ -58,13 +60,12 @@ enum ipa_flag {
  * @mem_size:		Total size (bytes) of memory at @mem_virt
  * @mem:		Array of IPA-local memory region descriptors
  * @imem_iova:		I/O virtual address of IPA region in IMEM
- * @imem_size;		Size of IMEM region
+ * @imem_size:		Size of IMEM region
  * @smem_iova:		I/O virtual address of IPA region in SMEM
- * @smem_size;		Size of SMEM region
+ * @smem_size:		Size of SMEM region
  * @zero_addr:		DMA address of preallocated zero-filled memory
  * @zero_virt:		Virtual address of preallocated zero-filled memory
  * @zero_size:		Size (bytes) of preallocated zero-filled memory
- * @wakeup_source:	Wakeup source information
  * @available:		Bit mask indicating endpoints hardware supports
  * @filter_map:		Bit mask indicating endpoints that support filtering
  * @initialized:	Bit mask indicating endpoints initialized
diff --git a/drivers/net/ipa/ipa_cmd.h b/drivers/net/ipa/ipa_cmd.h
index 6dd3d35cf315..b99262281f41 100644
--- a/drivers/net/ipa/ipa_cmd.h
+++ b/drivers/net/ipa/ipa_cmd.h
@@ -20,11 +20,18 @@ struct gsi_channel;
 /**
  * enum ipa_cmd_opcode:	IPA immediate commands
  *
- * All immediate commands are issued using the AP command TX endpoint.
- * The numeric values here are the opcodes for IPA v3.5.1 hardware.
+ * @IPA_CMD_IP_V4_FILTER_INIT:	Initialize IPv4 filter table
+ * @IPA_CMD_IP_V6_FILTER_INIT:	Initialize IPv6 filter table
+ * @IPA_CMD_IP_V4_ROUTING_INIT:	Initialize IPv4 routing table
+ * @IPA_CMD_IP_V6_ROUTING_INIT:	Initialize IPv6 routing table
+ * @IPA_CMD_HDR_INIT_LOCAL:	Initialize IPA-local header memory
+ * @IPA_CMD_REGISTER_WRITE:	Register write performed by IPA
+ * @IPA_CMD_IP_PACKET_INIT:	Set up next packet's destination endpoint
+ * @IPA_CMD_DMA_SHARED_MEM:	DMA command performed by IPA
+ * @IPA_CMD_IP_PACKET_TAG_STATUS: Have next packet generate tag * status
+ * @IPA_CMD_NONE:		Special (invalid) "not a command" value
  *
- * IPA_CMD_NONE is a special (invalid) value that's used to indicate
- * a request is *not* an immediate command.
+ * All immediate commands are issued using the AP command TX endpoint.
  */
 enum ipa_cmd_opcode {
 	IPA_CMD_NONE			= 0x0,
@@ -96,7 +103,7 @@ static inline bool ipa_cmd_data_valid(struct ipa *ipa)
  *
  * Return:	0 if successful, or a negative error code
  */
-int ipa_cmd_pool_init(struct gsi_channel *gsi_channel, u32 tre_count);
+int ipa_cmd_pool_init(struct gsi_channel *channel, u32 tre_count);
 
 /**
  * ipa_cmd_pool_exit() - Inverse of ipa_cmd_pool_init()
@@ -124,7 +131,7 @@ void ipa_cmd_table_init_add(struct gsi_trans *trans, enum ipa_cmd_opcode opcode,
 
 /**
  * ipa_cmd_hdr_init_local_add() - Add a header init command to a transaction
- * @ipa:	IPA structure
+ * @trans:	GSI transaction
  * @offset:	Offset of header memory in IPA local space
  * @size:	Size of header memory
  * @addr:	DMA address of buffer to be written from
diff --git a/drivers/net/ipa/ipa_endpoint.h b/drivers/net/ipa/ipa_endpoint.h
index c6c55ea35394..0d410b050456 100644
--- a/drivers/net/ipa/ipa_endpoint.h
+++ b/drivers/net/ipa/ipa_endpoint.h
@@ -41,8 +41,20 @@ enum ipa_endpoint_name {
 
 /**
  * struct ipa_endpoint - IPA endpoint information
- * @channel_id:	EP's GSI channel
- * @evt_ring_id: EP's GSI channel event ring
+ * @ipa:		IPA pointer
+ * @ee_id:		Execution environmnent endpoint is associated with
+ * @channel_id:		GSI channel used by the endpoint
+ * @endpoint_id:	IPA endpoint number
+ * @toward_ipa:		Endpoint direction (true = TX, false = RX)
+ * @data:		Endpoint configuration data
+ * @trans_tre_max:	Maximum number of TRE descriptors per transaction
+ * @evt_ring_id:	GSI event ring used by the endpoint
+ * @netdev:		Network device pointer, if endpoint uses one
+ * @replenish_enabled:	Whether receive buffer replenishing is enabled
+ * @replenish_ready:	Number of replenish transactions without doorbell
+ * @replenish_saved:	Replenish requests held while disabled
+ * @replenish_backlog:	Number of buffers needed to fill hardware queue
+ * @replenish_work:	Work item used for repeated replenish failures
  */
 struct ipa_endpoint {
 	struct ipa *ipa;
@@ -52,7 +64,7 @@ struct ipa_endpoint {
 	bool toward_ipa;
 	const struct ipa_endpoint_config_data *data;
 
-	u32 trans_tre_max;	/* maximum descriptors per transaction */
+	u32 trans_tre_max;
 	u32 evt_ring_id;
 
 	/* Net device this endpoint is associated with, if any */
diff --git a/drivers/net/ipa/ipa_interrupt.h b/drivers/net/ipa/ipa_interrupt.h
index b5d63a0cd19e..d5c486a6800d 100644
--- a/drivers/net/ipa/ipa_interrupt.h
+++ b/drivers/net/ipa/ipa_interrupt.h
@@ -24,6 +24,7 @@ typedef void (*ipa_irq_handler_t)(struct ipa *ipa, enum ipa_irq_id irq_id);
 
 /**
  * ipa_interrupt_add() - Register a handler for an IPA interrupt type
+ * @interrupt:	IPA interrupt structure
  * @irq_id:	IPA interrupt type
  * @handler:	Handler function for the interrupt
  *
diff --git a/drivers/net/ipa/ipa_mem.h b/drivers/net/ipa/ipa_mem.h
index f82e8939622b..df61ef48df36 100644
--- a/drivers/net/ipa/ipa_mem.h
+++ b/drivers/net/ipa/ipa_mem.h
@@ -77,7 +77,7 @@ enum ipa_mem_id {
  * struct ipa_mem - IPA local memory region description
  * @offset:		offset in IPA memory space to base of the region
  * @size:		size in bytes base of the region
- * @canary_count	# 32-bit "canary" values that precede region
+ * @canary_count:	Number of 32-bit "canary" values that precede region
  */
 struct ipa_mem {
 	u32 offset;
diff --git a/drivers/net/ipa/ipa_qmi.h b/drivers/net/ipa/ipa_qmi.h
index 3993687593d0..b6f2055d35a6 100644
--- a/drivers/net/ipa/ipa_qmi.h
+++ b/drivers/net/ipa/ipa_qmi.h
@@ -13,11 +13,15 @@ struct ipa;
 
 /**
  * struct ipa_qmi - QMI state associated with an IPA
- * @client_handle	- used to send an QMI requests to the modem
- * @server_handle	- used to handle QMI requests from the modem
- * @initialized		- whether QMI initialization has completed
- * @indication_register_received - tracks modem request receipt
- * @init_driver_response_received - tracks modem response receipt
+ * @client_handle:	Used to send an QMI requests to the modem
+ * @server_handle:	Used to handle QMI requests from the modem
+ * @modem_sq:		QMAP socket address for the modem QMI server
+ * @init_driver_work:	Work structure used for INIT_DRIVER message handling
+ * @initial_boot:	True if first boot has not yet completed
+ * @uc_ready:		True once DRIVER_INIT_COMPLETE request received
+ * @modem_ready:	True when INIT_DRIVER response received
+ * @indication_requested: True when INDICATION_REGISTER request received
+ * @indication_sent:	True when INIT_COMPLETE indication sent
  */
 struct ipa_qmi {
 	struct qmi_handle client_handle;
diff --git a/drivers/net/ipa/ipa_smp2p.h b/drivers/net/ipa/ipa_smp2p.h
index bf0e4063cfd9..20319438a841 100644
--- a/drivers/net/ipa/ipa_smp2p.h
+++ b/drivers/net/ipa/ipa_smp2p.h
@@ -28,7 +28,7 @@ void ipa_smp2p_exit(struct ipa *ipa);
 
 /**
  * ipa_smp2p_disable() - Prevent "ipa-setup-ready" interrupt handling
- * @IPA:	IPA pointer
+ * @ipa:	IPA pointer
  *
  * Prevent handling of the "setup ready" interrupt from the modem.
  * This is used before initiating shutdown of the driver.
diff --git a/drivers/net/ipa/ipa_table.h b/drivers/net/ipa/ipa_table.h
index 889c2e93b122..e14108ed62bd 100644
--- a/drivers/net/ipa/ipa_table.h
+++ b/drivers/net/ipa/ipa_table.h
@@ -24,7 +24,7 @@ struct ipa;
 /**
  * ipa_table_valid() - Validate route and filter table memory regions
  * @ipa:	IPA pointer
-
+ *
  * Return:	true if all regions are valid, false otherwise
  */
 bool ipa_table_valid(struct ipa *ipa);
@@ -32,6 +32,7 @@ bool ipa_table_valid(struct ipa *ipa);
 /**
  * ipa_filter_map_valid() - Validate a filter table endpoint bitmap
  * @ipa:	IPA pointer
+ * @filter_mask: Filter table endpoint bitmap to check
  *
  * Return:	true if all regions are valid, false otherwise
  */
-- 
cgit v1.2.3


From e695bed28a5da539144676d979f76c111f9f1020 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Sun, 28 Mar 2021 12:31:06 -0500
Subject: net: ipa: store BCR register values in config data

The backward compatibility register value is a platform-specific
property that is not stored in the platform data.  Create a data
field where this can be represented, and get rid ipa_reg_bcr_val().

This register is not present starting with IPA v4.5.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_data-sc7180.c |  1 +
 drivers/net/ipa/ipa_data-sdm845.c |  5 +++++
 drivers/net/ipa/ipa_data.h        |  2 ++
 drivers/net/ipa/ipa_main.c        |  4 ++--
 drivers/net/ipa/ipa_reg.h         | 21 ---------------------
 5 files changed, 10 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ipa/ipa_data-sc7180.c b/drivers/net/ipa/ipa_data-sc7180.c
index c9b6a6aaadac..810c673be56e 100644
--- a/drivers/net/ipa/ipa_data-sc7180.c
+++ b/drivers/net/ipa/ipa_data-sc7180.c
@@ -349,6 +349,7 @@ static const struct ipa_clock_data ipa_clock_data = {
 /* Configuration data for the SC7180 SoC. */
 const struct ipa_data ipa_data_sc7180 = {
 	.version	= IPA_VERSION_4_2,
+	/* backward_compat value is 0 */
 	.qsb_count	= ARRAY_SIZE(ipa_qsb_data),
 	.qsb_data	= ipa_qsb_data,
 	.endpoint_count	= ARRAY_SIZE(ipa_gsi_endpoint_data),
diff --git a/drivers/net/ipa/ipa_data-sdm845.c b/drivers/net/ipa/ipa_data-sdm845.c
index e14e3fb1d970..49a18b1047c5 100644
--- a/drivers/net/ipa/ipa_data-sdm845.c
+++ b/drivers/net/ipa/ipa_data-sdm845.c
@@ -397,6 +397,11 @@ static const struct ipa_clock_data ipa_clock_data = {
 /* Configuration data for the SDM845 SoC. */
 const struct ipa_data ipa_data_sdm845 = {
 	.version	= IPA_VERSION_3_5_1,
+	.backward_compat = BCR_CMDQ_L_LACK_ONE_ENTRY_FMASK |
+			   BCR_TX_NOT_USING_BRESP_FMASK |
+			   BCR_SUSPEND_L2_IRQ_FMASK |
+			   BCR_HOLB_DROP_L2_IRQ_FMASK |
+			   BCR_DUAL_TX_FMASK,
 	.qsb_count	= ARRAY_SIZE(ipa_qsb_data),
 	.qsb_data	= ipa_qsb_data,
 	.endpoint_count	= ARRAY_SIZE(ipa_gsi_endpoint_data),
diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h
index ea8f99286228..843d818f78e1 100644
--- a/drivers/net/ipa/ipa_data.h
+++ b/drivers/net/ipa/ipa_data.h
@@ -279,6 +279,7 @@ struct ipa_clock_data {
 /**
  * struct ipa_data - combined IPA/GSI configuration data
  * @version:		IPA hardware version
+ * @backward_compat:	BCR register value (prior to IPA v4.5 only)
  * @qsb_count:		number of entries in the qsb_data array
  * @qsb_data:		Qualcomm System Bus configuration data
  * @endpoint_count:	number of entries in the endpoint_data array
@@ -289,6 +290,7 @@ struct ipa_clock_data {
  */
 struct ipa_data {
 	enum ipa_version version;
+	u32 backward_compat;
 	u32 qsb_count;		/* number of entries in qsb_data[] */
 	const struct ipa_qsb_data *qsb_data;
 	u32 endpoint_count;	/* number of entries in endpoint_data[] */
diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index e18029152d78..afb8eb5618f7 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -397,9 +397,9 @@ static void ipa_hardware_config(struct ipa *ipa, const struct ipa_data *data)
 	u32 granularity;
 	u32 val;
 
-	/* IPA v4.5 has no backward compatibility register */
+	/* IPA v4.5+ has no backward compatibility register */
 	if (version < IPA_VERSION_4_5) {
-		val = ipa_reg_bcr_val(version);
+		val = data->backward_compat;
 		iowrite32(val, ipa->reg_virt + IPA_REG_BCR_OFFSET);
 	}
 
diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h
index de2a944bad86..286ea9634c49 100644
--- a/drivers/net/ipa/ipa_reg.h
+++ b/drivers/net/ipa/ipa_reg.h
@@ -235,27 +235,6 @@ static inline u32 ipa_reg_state_aggr_active_offset(enum ipa_version version)
 #define BCR_FILTER_PREFETCH_EN_FMASK		GENMASK(8, 8)
 #define BCR_ROUTER_PREFETCH_EN_FMASK		GENMASK(9, 9)
 
-/* Backward compatibility register value to use for each version */
-static inline u32 ipa_reg_bcr_val(enum ipa_version version)
-{
-	if (version == IPA_VERSION_3_5_1)
-		return BCR_CMDQ_L_LACK_ONE_ENTRY_FMASK |
-			BCR_TX_NOT_USING_BRESP_FMASK |
-			BCR_SUSPEND_L2_IRQ_FMASK |
-			BCR_HOLB_DROP_L2_IRQ_FMASK |
-			BCR_DUAL_TX_FMASK;
-
-	if (version == IPA_VERSION_4_0 || version == IPA_VERSION_4_1)
-		return BCR_CMDQ_L_LACK_ONE_ENTRY_FMASK |
-			BCR_SUSPEND_L2_IRQ_FMASK |
-			BCR_HOLB_DROP_L2_IRQ_FMASK |
-			BCR_DUAL_TX_FMASK;
-
-	/* assert(version != IPA_VERSION_4_5); */
-
-	return 0x00000000;
-}
-
 /* The value of the next register must be a multiple of 8 (bottom 3 bits 0) */
 #define IPA_REG_LOCAL_PKT_PROC_CNTXT_OFFSET		0x000001e8
 
-- 
cgit v1.2.3


From d21d1f33b190f7c18e83899dfbc5a48ab173ef15 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Sun, 28 Mar 2021 12:31:07 -0500
Subject: net: ipa: don't define endpoints unnecessarily

We don't typically need much information about modem endpoints.
Normally we need to specify information about modem endpoints in
configuration data in only two cases:
  - When a modem TX endpoint supports filtering
  - When another endpoint's configuration refers to it

For the first case, the AP initializes the filter table, and must
know how many endpoints (AP and modem) support filtering.  An
example of the second case is the AP->modem TX endpoint, which
defines the modem<-AP RX endpoint as its status endpoint.

There is one exception to this, and it's due to a hardware quirk.
For IPA v4.2 (only) there is a problem related to allocating GSI
channels.  And to work around this, the AP allocates *all* GSI
channels at startup time--including those used by the modem.

Get rid of the configuration information for two endpoints not
required for the SDM845.  SC7180 runs IPA v4.2, so we can't
eliminate any modem endpoint definitions there.

Two more minor changes:
  - Reorder the members defined for the ipa_endpoint_name enumerated
    type to match the order used in configuration data files when
    defining endpoints.
  - Add a new name, IPA_ENDPOINT_MODEM_DL_NLO_TX, which can be used
    for IPA v4.5+.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_data-sdm845.c | 12 ------------
 drivers/net/ipa/ipa_endpoint.h    | 11 ++++++-----
 2 files changed, 6 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ipa/ipa_data-sdm845.c b/drivers/net/ipa/ipa_data-sdm845.c
index 49a18b1047c5..ed0bfe0634d9 100644
--- a/drivers/net/ipa/ipa_data-sdm845.c
+++ b/drivers/net/ipa/ipa_data-sdm845.c
@@ -144,12 +144,6 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 			},
 		},
 	},
-	[IPA_ENDPOINT_MODEM_COMMAND_TX] = {
-		.ee_id		= GSI_EE_MODEM,
-		.channel_id	= 1,
-		.endpoint_id	= 4,
-		.toward_ipa	= true,
-	},
 	[IPA_ENDPOINT_MODEM_LAN_TX] = {
 		.ee_id		= GSI_EE_MODEM,
 		.channel_id	= 0,
@@ -159,12 +153,6 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 			.filter_support	= true,
 		},
 	},
-	[IPA_ENDPOINT_MODEM_LAN_RX] = {
-		.ee_id		= GSI_EE_MODEM,
-		.channel_id	= 3,
-		.endpoint_id	= 13,
-		.toward_ipa	= false,
-	},
 	[IPA_ENDPOINT_MODEM_AP_TX] = {
 		.ee_id		= GSI_EE_MODEM,
 		.channel_id	= 4,
diff --git a/drivers/net/ipa/ipa_endpoint.h b/drivers/net/ipa/ipa_endpoint.h
index 0d410b050456..f034a9e6ef21 100644
--- a/drivers/net/ipa/ipa_endpoint.h
+++ b/drivers/net/ipa/ipa_endpoint.h
@@ -25,15 +25,16 @@ struct ipa_gsi_endpoint_data;
 #define IPA_MTU			ETH_DATA_LEN
 
 enum ipa_endpoint_name {
-	IPA_ENDPOINT_AP_MODEM_TX,
-	IPA_ENDPOINT_MODEM_LAN_TX,
-	IPA_ENDPOINT_MODEM_COMMAND_TX,
 	IPA_ENDPOINT_AP_COMMAND_TX,
-	IPA_ENDPOINT_MODEM_AP_TX,
 	IPA_ENDPOINT_AP_LAN_RX,
+	IPA_ENDPOINT_AP_MODEM_TX,
 	IPA_ENDPOINT_AP_MODEM_RX,
-	IPA_ENDPOINT_MODEM_AP_RX,
+	IPA_ENDPOINT_MODEM_COMMAND_TX,
+	IPA_ENDPOINT_MODEM_LAN_TX,
 	IPA_ENDPOINT_MODEM_LAN_RX,
+	IPA_ENDPOINT_MODEM_AP_TX,
+	IPA_ENDPOINT_MODEM_AP_RX,
+	IPA_ENDPOINT_MODEM_DL_NLO_TX,
 	IPA_ENDPOINT_COUNT,	/* Number of names (not an index) */
 };
 
-- 
cgit v1.2.3


From fc566dab45f9eab9b07c971daedd0843e688e777 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Sun, 28 Mar 2021 12:31:08 -0500
Subject: net: ipa: switch to version based configuration

Rename the SDM845 configuration data file so that its name is
derived from its IPA version.  I am not aware of any special IPA
behavior or handling that would be based on a specific SoC (as
opposed to a specific version of the IPA it contains).

Update a few other references to the code that talk about the SDM845
rather than just IPA v3.5.1.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/Kconfig           |   3 +-
 drivers/net/ipa/Makefile          |   2 +-
 drivers/net/ipa/ipa_data-sdm845.c | 400 -------------------------------------
 drivers/net/ipa/ipa_data-v3.5.1.c | 402 ++++++++++++++++++++++++++++++++++++++
 drivers/net/ipa/ipa_data.h        |   2 +-
 drivers/net/ipa/ipa_main.c        |   2 +-
 6 files changed, 406 insertions(+), 405 deletions(-)
 delete mode 100644 drivers/net/ipa/ipa_data-sdm845.c
 create mode 100644 drivers/net/ipa/ipa_data-v3.5.1.c

diff --git a/drivers/net/ipa/Kconfig b/drivers/net/ipa/Kconfig
index 90a90262e0d0..8f99cfa14680 100644
--- a/drivers/net/ipa/Kconfig
+++ b/drivers/net/ipa/Kconfig
@@ -12,8 +12,7 @@ config QCOM_IPA
 	  that is capable of generic hardware handling of IP packets,
 	  including routing, filtering, and NAT.  Currently the IPA
 	  driver supports only basic transport of network traffic
-	  between the AP and modem, on the Qualcomm SDM845 and SC7180
-	  SoCs.
+	  between the AP and modem.
 
 	  Note that if selected, the selection type must match that
 	  of QCOM_Q6V5_COMMON (Y or M).
diff --git a/drivers/net/ipa/Makefile b/drivers/net/ipa/Makefile
index 14a7d8429baa..a4a42fc7b840 100644
--- a/drivers/net/ipa/Makefile
+++ b/drivers/net/ipa/Makefile
@@ -9,4 +9,4 @@ ipa-y			:=	ipa_main.o ipa_clock.o ipa_reg.o ipa_mem.o \
 				ipa_endpoint.o ipa_cmd.o ipa_modem.o \
 				ipa_resource.o ipa_qmi.o ipa_qmi_msg.o
 
-ipa-y			+=	ipa_data-sdm845.o ipa_data-sc7180.o
+ipa-y			+=	ipa_data-v3.5.1.o ipa_data-sc7180.o
diff --git a/drivers/net/ipa/ipa_data-sdm845.c b/drivers/net/ipa/ipa_data-sdm845.c
deleted file mode 100644
index ed0bfe0634d9..000000000000
--- a/drivers/net/ipa/ipa_data-sdm845.c
+++ /dev/null
@@ -1,400 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved.
- * Copyright (C) 2019-2021 Linaro Ltd.
- */
-
-#include <linux/log2.h>
-
-#include "gsi.h"
-#include "ipa_data.h"
-#include "ipa_endpoint.h"
-#include "ipa_mem.h"
-
-/** enum ipa_resource_type - IPA resource types */
-enum ipa_resource_type {
-	/* Source resource types; first must have value 0 */
-	IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS		= 0,
-	IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS,
-	IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF,
-	IPA_RESOURCE_TYPE_SRC_HPS_DMARS,
-	IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES,
-
-	/* Destination resource types; first must have value 0 */
-	IPA_RESOURCE_TYPE_DST_DATA_SECTORS		= 0,
-	IPA_RESOURCE_TYPE_DST_DPS_DMARS,
-};
-
-/* Resource groups used for the SDM845 SoC */
-enum ipa_rsrc_group_id {
-	/* Source resource group identifiers */
-	IPA_RSRC_GROUP_SRC_LWA_DL	= 0,
-	IPA_RSRC_GROUP_SRC_UL_DL,
-	IPA_RSRC_GROUP_SRC_MHI_DMA,
-	IPA_RSRC_GROUP_SRC_UC_RX_Q,
-	IPA_RSRC_GROUP_SRC_COUNT,	/* Last in set; not a source group */
-
-	/* Destination resource group identifiers */
-	IPA_RSRC_GROUP_DST_LWA_DL	= 0,
-	IPA_RSRC_GROUP_DST_UL_DL_DPL,
-	IPA_RSRC_GROUP_DST_UNUSED_2,
-	IPA_RSRC_GROUP_DST_COUNT,	/* Last; not a destination group */
-};
-
-/* QSB configuration for the SDM845 SoC. */
-static const struct ipa_qsb_data ipa_qsb_data[] = {
-	[IPA_QSB_MASTER_DDR] = {
-		.max_writes	= 8,
-		.max_reads	= 8,
-	},
-	[IPA_QSB_MASTER_PCIE] = {
-		.max_writes	= 4,
-		.max_reads	= 12,
-	},
-};
-
-/* Endpoint configuration for the SDM845 SoC. */
-static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
-	[IPA_ENDPOINT_AP_COMMAND_TX] = {
-		.ee_id		= GSI_EE_AP,
-		.channel_id	= 4,
-		.endpoint_id	= 5,
-		.toward_ipa	= true,
-		.channel = {
-			.tre_count	= 512,
-			.event_count	= 256,
-			.tlv_count	= 20,
-		},
-		.endpoint = {
-			.config = {
-				.resource_group	= IPA_RSRC_GROUP_SRC_UL_DL,
-				.dma_mode	= true,
-				.dma_endpoint	= IPA_ENDPOINT_AP_LAN_RX,
-				.tx = {
-					.seq_type = IPA_SEQ_DMA,
-				},
-			},
-		},
-	},
-	[IPA_ENDPOINT_AP_LAN_RX] = {
-		.ee_id		= GSI_EE_AP,
-		.channel_id	= 5,
-		.endpoint_id	= 9,
-		.toward_ipa	= false,
-		.channel = {
-			.tre_count	= 256,
-			.event_count	= 256,
-			.tlv_count	= 8,
-		},
-		.endpoint = {
-			.config = {
-				.resource_group	= IPA_RSRC_GROUP_DST_UL_DL_DPL,
-				.aggregation	= true,
-				.status_enable	= true,
-				.rx = {
-					.pad_align	= ilog2(sizeof(u32)),
-				},
-			},
-		},
-	},
-	[IPA_ENDPOINT_AP_MODEM_TX] = {
-		.ee_id		= GSI_EE_AP,
-		.channel_id	= 3,
-		.endpoint_id	= 2,
-		.toward_ipa	= true,
-		.channel = {
-			.tre_count	= 512,
-			.event_count	= 512,
-			.tlv_count	= 16,
-		},
-		.endpoint = {
-			.filter_support	= true,
-			.config = {
-				.resource_group	= IPA_RSRC_GROUP_SRC_UL_DL,
-				.checksum	= true,
-				.qmap		= true,
-				.status_enable	= true,
-				.tx = {
-					.seq_type = IPA_SEQ_2_PASS_SKIP_LAST_UC,
-					.status_endpoint =
-						IPA_ENDPOINT_MODEM_AP_RX,
-				},
-			},
-		},
-	},
-	[IPA_ENDPOINT_AP_MODEM_RX] = {
-		.ee_id		= GSI_EE_AP,
-		.channel_id	= 6,
-		.endpoint_id	= 10,
-		.toward_ipa	= false,
-		.channel = {
-			.tre_count	= 256,
-			.event_count	= 256,
-			.tlv_count	= 8,
-		},
-		.endpoint = {
-			.config = {
-				.resource_group	= IPA_RSRC_GROUP_DST_UL_DL_DPL,
-				.checksum	= true,
-				.qmap		= true,
-				.aggregation	= true,
-				.rx = {
-					.aggr_close_eof	= true,
-				},
-			},
-		},
-	},
-	[IPA_ENDPOINT_MODEM_LAN_TX] = {
-		.ee_id		= GSI_EE_MODEM,
-		.channel_id	= 0,
-		.endpoint_id	= 3,
-		.toward_ipa	= true,
-		.endpoint = {
-			.filter_support	= true,
-		},
-	},
-	[IPA_ENDPOINT_MODEM_AP_TX] = {
-		.ee_id		= GSI_EE_MODEM,
-		.channel_id	= 4,
-		.endpoint_id	= 6,
-		.toward_ipa	= true,
-		.endpoint = {
-			.filter_support	= true,
-		},
-	},
-	[IPA_ENDPOINT_MODEM_AP_RX] = {
-		.ee_id		= GSI_EE_MODEM,
-		.channel_id	= 2,
-		.endpoint_id	= 12,
-		.toward_ipa	= false,
-	},
-};
-
-/* Source resource configuration data for the SDM845 SoC */
-static const struct ipa_resource ipa_resource_src[] = {
-	[IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS] = {
-		.limits[IPA_RSRC_GROUP_SRC_LWA_DL] = {
-			.min = 1,	.max = 255,
-		},
-		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
-			.min = 1,	.max = 255,
-		},
-		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
-			.min = 1,	.max = 63,
-		},
-	},
-	[IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS] = {
-		.limits[IPA_RSRC_GROUP_SRC_LWA_DL] = {
-			.min = 10,	.max = 10,
-		},
-		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
-			.min = 10,	.max = 10,
-		},
-		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
-			.min = 8,	.max = 8,
-		},
-	},
-	[IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF] = {
-		.limits[IPA_RSRC_GROUP_SRC_LWA_DL] = {
-			.min = 12,	.max = 12,
-		},
-		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
-			.min = 14,	.max = 14,
-		},
-		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
-			.min = 8,	.max = 8,
-		},
-	},
-	[IPA_RESOURCE_TYPE_SRC_HPS_DMARS] = {
-		.limits[IPA_RSRC_GROUP_SRC_LWA_DL] = {
-			.min = 0,	.max = 63,
-		},
-		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
-			.min = 0,	.max = 63,
-		},
-		.limits[IPA_RSRC_GROUP_SRC_MHI_DMA] = {
-			.min = 0,	.max = 63,
-		},
-		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
-			.min = 0,	.max = 63,
-		},
-	},
-	[IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES] = {
-		.limits[IPA_RSRC_GROUP_SRC_LWA_DL] = {
-			.min = 14,	.max = 14,
-		},
-		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
-			.min = 20,	.max = 20,
-		},
-		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
-			.min = 14,	.max = 14,
-		},
-	},
-};
-
-/* Destination resource configuration data for the SDM845 SoC */
-static const struct ipa_resource ipa_resource_dst[] = {
-	[IPA_RESOURCE_TYPE_DST_DATA_SECTORS] = {
-		.limits[IPA_RSRC_GROUP_DST_LWA_DL] = {
-			.min = 4,	.max = 4,
-		},
-		.limits[1] = {
-			.min = 4,	.max = 4,
-		},
-		.limits[IPA_RSRC_GROUP_DST_UNUSED_2] = {
-			.min = 3,	.max = 3,
-		}
-	},
-	[IPA_RESOURCE_TYPE_DST_DPS_DMARS] = {
-		.limits[IPA_RSRC_GROUP_DST_LWA_DL] = {
-			.min = 2,	.max = 63,
-		},
-		.limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = {
-			.min = 1,	.max = 63,
-		},
-		.limits[IPA_RSRC_GROUP_DST_UNUSED_2] = {
-			.min = 1,	.max = 2,
-		}
-	},
-};
-
-/* Resource configuration for the SDM845 SoC. */
-static const struct ipa_resource_data ipa_resource_data = {
-	.rsrc_group_src_count	= IPA_RSRC_GROUP_SRC_COUNT,
-	.rsrc_group_dst_count	= IPA_RSRC_GROUP_DST_COUNT,
-	.resource_src_count	= ARRAY_SIZE(ipa_resource_src),
-	.resource_src		= ipa_resource_src,
-	.resource_dst_count	= ARRAY_SIZE(ipa_resource_dst),
-	.resource_dst		= ipa_resource_dst,
-};
-
-/* IPA-resident memory region configuration for the SDM845 SoC. */
-static const struct ipa_mem ipa_mem_local_data[] = {
-	[IPA_MEM_UC_SHARED] = {
-		.offset		= 0x0000,
-		.size		= 0x0080,
-		.canary_count	= 0,
-	},
-	[IPA_MEM_UC_INFO] = {
-		.offset		= 0x0080,
-		.size		= 0x0200,
-		.canary_count	= 0,
-	},
-	[IPA_MEM_V4_FILTER_HASHED] = {
-		.offset		= 0x0288,
-		.size		= 0x0078,
-		.canary_count	= 2,
-	},
-	[IPA_MEM_V4_FILTER] = {
-		.offset		= 0x0308,
-		.size		= 0x0078,
-		.canary_count	= 2,
-	},
-	[IPA_MEM_V6_FILTER_HASHED] = {
-		.offset		= 0x0388,
-		.size		= 0x0078,
-		.canary_count	= 2,
-	},
-	[IPA_MEM_V6_FILTER] = {
-		.offset		= 0x0408,
-		.size		= 0x0078,
-		.canary_count	= 2,
-	},
-	[IPA_MEM_V4_ROUTE_HASHED] = {
-		.offset		= 0x0488,
-		.size		= 0x0078,
-		.canary_count	= 2,
-	},
-	[IPA_MEM_V4_ROUTE] = {
-		.offset		= 0x0508,
-		.size		= 0x0078,
-		.canary_count	= 2,
-	},
-	[IPA_MEM_V6_ROUTE_HASHED] = {
-		.offset		= 0x0588,
-		.size		= 0x0078,
-		.canary_count	= 2,
-	},
-	[IPA_MEM_V6_ROUTE] = {
-		.offset		= 0x0608,
-		.size		= 0x0078,
-		.canary_count	= 2,
-	},
-	[IPA_MEM_MODEM_HEADER] = {
-		.offset		= 0x0688,
-		.size		= 0x0140,
-		.canary_count	= 2,
-	},
-	[IPA_MEM_MODEM_PROC_CTX] = {
-		.offset		= 0x07d0,
-		.size		= 0x0200,
-		.canary_count	= 2,
-	},
-	[IPA_MEM_AP_PROC_CTX] = {
-		.offset		= 0x09d0,
-		.size		= 0x0200,
-		.canary_count	= 0,
-	},
-	[IPA_MEM_MODEM] = {
-		.offset		= 0x0bd8,
-		.size		= 0x1024,
-		.canary_count	= 0,
-	},
-	[IPA_MEM_UC_EVENT_RING] = {
-		.offset		= 0x1c00,
-		.size		= 0x0400,
-		.canary_count	= 1,
-	},
-};
-
-static const struct ipa_mem_data ipa_mem_data = {
-	.local_count	= ARRAY_SIZE(ipa_mem_local_data),
-	.local		= ipa_mem_local_data,
-	.imem_addr	= 0x146bd000,
-	.imem_size	= 0x00002000,
-	.smem_id	= 497,
-	.smem_size	= 0x00002000,
-};
-
-/* Interconnect bandwidths are in 1000 byte/second units */
-static const struct ipa_interconnect_data ipa_interconnect_data[] = {
-	{
-		.name			= "memory",
-		.peak_bandwidth		= 600000,	/* 600 MBps */
-		.average_bandwidth	= 80000,	/* 80 MBps */
-	},
-	/* Average bandwidth is unused for the next two interconnects */
-	{
-		.name			= "imem",
-		.peak_bandwidth		= 350000,	/* 350 MBps */
-		.average_bandwidth	= 0,		/* unused */
-	},
-	{
-		.name			= "config",
-		.peak_bandwidth		= 40000,	/* 40 MBps */
-		.average_bandwidth	= 0,		/* unused */
-	},
-};
-
-static const struct ipa_clock_data ipa_clock_data = {
-	.core_clock_rate	= 75 * 1000 * 1000,	/* Hz */
-	.interconnect_count	= ARRAY_SIZE(ipa_interconnect_data),
-	.interconnect_data	= ipa_interconnect_data,
-};
-
-/* Configuration data for the SDM845 SoC. */
-const struct ipa_data ipa_data_sdm845 = {
-	.version	= IPA_VERSION_3_5_1,
-	.backward_compat = BCR_CMDQ_L_LACK_ONE_ENTRY_FMASK |
-			   BCR_TX_NOT_USING_BRESP_FMASK |
-			   BCR_SUSPEND_L2_IRQ_FMASK |
-			   BCR_HOLB_DROP_L2_IRQ_FMASK |
-			   BCR_DUAL_TX_FMASK,
-	.qsb_count	= ARRAY_SIZE(ipa_qsb_data),
-	.qsb_data	= ipa_qsb_data,
-	.endpoint_count	= ARRAY_SIZE(ipa_gsi_endpoint_data),
-	.endpoint_data	= ipa_gsi_endpoint_data,
-	.resource_data	= &ipa_resource_data,
-	.mem_data	= &ipa_mem_data,
-	.clock_data	= &ipa_clock_data,
-};
diff --git a/drivers/net/ipa/ipa_data-v3.5.1.c b/drivers/net/ipa/ipa_data-v3.5.1.c
new file mode 100644
index 000000000000..57703e95a3f9
--- /dev/null
+++ b/drivers/net/ipa/ipa_data-v3.5.1.c
@@ -0,0 +1,402 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved.
+ * Copyright (C) 2019-2021 Linaro Ltd.
+ */
+
+#include <linux/log2.h>
+
+#include "gsi.h"
+#include "ipa_data.h"
+#include "ipa_endpoint.h"
+#include "ipa_mem.h"
+
+/** enum ipa_resource_type - IPA resource types for an SoC having IPA v3.5.1 */
+enum ipa_resource_type {
+	/* Source resource types; first must have value 0 */
+	IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS		= 0,
+	IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS,
+	IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF,
+	IPA_RESOURCE_TYPE_SRC_HPS_DMARS,
+	IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES,
+
+	/* Destination resource types; first must have value 0 */
+	IPA_RESOURCE_TYPE_DST_DATA_SECTORS		= 0,
+	IPA_RESOURCE_TYPE_DST_DPS_DMARS,
+};
+
+/* Resource groups used for an SoC having IPA v3.5.1 */
+enum ipa_rsrc_group_id {
+	/* Source resource group identifiers */
+	IPA_RSRC_GROUP_SRC_LWA_DL	= 0,
+	IPA_RSRC_GROUP_SRC_UL_DL,
+	IPA_RSRC_GROUP_SRC_MHI_DMA,
+	IPA_RSRC_GROUP_SRC_UC_RX_Q,
+	IPA_RSRC_GROUP_SRC_COUNT,	/* Last in set; not a source group */
+
+	/* Destination resource group identifiers */
+	IPA_RSRC_GROUP_DST_LWA_DL	= 0,
+	IPA_RSRC_GROUP_DST_UL_DL_DPL,
+	IPA_RSRC_GROUP_DST_UNUSED_2,
+	IPA_RSRC_GROUP_DST_COUNT,	/* Last; not a destination group */
+};
+
+/* QSB configuration data for an SoC having IPA v3.5.1 */
+static const struct ipa_qsb_data ipa_qsb_data[] = {
+	[IPA_QSB_MASTER_DDR] = {
+		.max_writes	= 8,
+		.max_reads	= 8,
+	},
+	[IPA_QSB_MASTER_PCIE] = {
+		.max_writes	= 4,
+		.max_reads	= 12,
+	},
+};
+
+/* Endpoint datdata for an SoC having IPA v3.5.1 */
+static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
+	[IPA_ENDPOINT_AP_COMMAND_TX] = {
+		.ee_id		= GSI_EE_AP,
+		.channel_id	= 4,
+		.endpoint_id	= 5,
+		.toward_ipa	= true,
+		.channel = {
+			.tre_count	= 512,
+			.event_count	= 256,
+			.tlv_count	= 20,
+		},
+		.endpoint = {
+			.config = {
+				.resource_group	= IPA_RSRC_GROUP_SRC_UL_DL,
+				.dma_mode	= true,
+				.dma_endpoint	= IPA_ENDPOINT_AP_LAN_RX,
+				.tx = {
+					.seq_type = IPA_SEQ_DMA,
+				},
+			},
+		},
+	},
+	[IPA_ENDPOINT_AP_LAN_RX] = {
+		.ee_id		= GSI_EE_AP,
+		.channel_id	= 5,
+		.endpoint_id	= 9,
+		.toward_ipa	= false,
+		.channel = {
+			.tre_count	= 256,
+			.event_count	= 256,
+			.tlv_count	= 8,
+		},
+		.endpoint = {
+			.config = {
+				.resource_group	= IPA_RSRC_GROUP_DST_UL_DL_DPL,
+				.aggregation	= true,
+				.status_enable	= true,
+				.rx = {
+					.pad_align	= ilog2(sizeof(u32)),
+				},
+			},
+		},
+	},
+	[IPA_ENDPOINT_AP_MODEM_TX] = {
+		.ee_id		= GSI_EE_AP,
+		.channel_id	= 3,
+		.endpoint_id	= 2,
+		.toward_ipa	= true,
+		.channel = {
+			.tre_count	= 512,
+			.event_count	= 512,
+			.tlv_count	= 16,
+		},
+		.endpoint = {
+			.filter_support	= true,
+			.config = {
+				.resource_group	= IPA_RSRC_GROUP_SRC_UL_DL,
+				.checksum	= true,
+				.qmap		= true,
+				.status_enable	= true,
+				.tx = {
+					.seq_type = IPA_SEQ_2_PASS_SKIP_LAST_UC,
+					.status_endpoint =
+						IPA_ENDPOINT_MODEM_AP_RX,
+				},
+			},
+		},
+	},
+	[IPA_ENDPOINT_AP_MODEM_RX] = {
+		.ee_id		= GSI_EE_AP,
+		.channel_id	= 6,
+		.endpoint_id	= 10,
+		.toward_ipa	= false,
+		.channel = {
+			.tre_count	= 256,
+			.event_count	= 256,
+			.tlv_count	= 8,
+		},
+		.endpoint = {
+			.config = {
+				.resource_group	= IPA_RSRC_GROUP_DST_UL_DL_DPL,
+				.checksum	= true,
+				.qmap		= true,
+				.aggregation	= true,
+				.rx = {
+					.aggr_close_eof	= true,
+				},
+			},
+		},
+	},
+	[IPA_ENDPOINT_MODEM_LAN_TX] = {
+		.ee_id		= GSI_EE_MODEM,
+		.channel_id	= 0,
+		.endpoint_id	= 3,
+		.toward_ipa	= true,
+		.endpoint = {
+			.filter_support	= true,
+		},
+	},
+	[IPA_ENDPOINT_MODEM_AP_TX] = {
+		.ee_id		= GSI_EE_MODEM,
+		.channel_id	= 4,
+		.endpoint_id	= 6,
+		.toward_ipa	= true,
+		.endpoint = {
+			.filter_support	= true,
+		},
+	},
+	[IPA_ENDPOINT_MODEM_AP_RX] = {
+		.ee_id		= GSI_EE_MODEM,
+		.channel_id	= 2,
+		.endpoint_id	= 12,
+		.toward_ipa	= false,
+	},
+};
+
+/* Source resource configuration data for an SoC having IPA v3.5.1 */
+static const struct ipa_resource ipa_resource_src[] = {
+	[IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS] = {
+		.limits[IPA_RSRC_GROUP_SRC_LWA_DL] = {
+			.min = 1,	.max = 255,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
+			.min = 1,	.max = 255,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 1,	.max = 63,
+		},
+	},
+	[IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS] = {
+		.limits[IPA_RSRC_GROUP_SRC_LWA_DL] = {
+			.min = 10,	.max = 10,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
+			.min = 10,	.max = 10,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 8,	.max = 8,
+		},
+	},
+	[IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF] = {
+		.limits[IPA_RSRC_GROUP_SRC_LWA_DL] = {
+			.min = 12,	.max = 12,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
+			.min = 14,	.max = 14,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 8,	.max = 8,
+		},
+	},
+	[IPA_RESOURCE_TYPE_SRC_HPS_DMARS] = {
+		.limits[IPA_RSRC_GROUP_SRC_LWA_DL] = {
+			.min = 0,	.max = 63,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
+			.min = 0,	.max = 63,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_MHI_DMA] = {
+			.min = 0,	.max = 63,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 0,	.max = 63,
+		},
+	},
+	[IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES] = {
+		.limits[IPA_RSRC_GROUP_SRC_LWA_DL] = {
+			.min = 14,	.max = 14,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
+			.min = 20,	.max = 20,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 14,	.max = 14,
+		},
+	},
+};
+
+/* Destination resource configuration data for an SoC having IPA v3.5.1 */
+static const struct ipa_resource ipa_resource_dst[] = {
+	[IPA_RESOURCE_TYPE_DST_DATA_SECTORS] = {
+		.limits[IPA_RSRC_GROUP_DST_LWA_DL] = {
+			.min = 4,	.max = 4,
+		},
+		.limits[1] = {
+			.min = 4,	.max = 4,
+		},
+		.limits[IPA_RSRC_GROUP_DST_UNUSED_2] = {
+			.min = 3,	.max = 3,
+		}
+	},
+	[IPA_RESOURCE_TYPE_DST_DPS_DMARS] = {
+		.limits[IPA_RSRC_GROUP_DST_LWA_DL] = {
+			.min = 2,	.max = 63,
+		},
+		.limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = {
+			.min = 1,	.max = 63,
+		},
+		.limits[IPA_RSRC_GROUP_DST_UNUSED_2] = {
+			.min = 1,	.max = 2,
+		}
+	},
+};
+
+/* Resource configuration data for an SoC having IPA v3.5.1 */
+static const struct ipa_resource_data ipa_resource_data = {
+	.rsrc_group_src_count	= IPA_RSRC_GROUP_SRC_COUNT,
+	.rsrc_group_dst_count	= IPA_RSRC_GROUP_DST_COUNT,
+	.resource_src_count	= ARRAY_SIZE(ipa_resource_src),
+	.resource_src		= ipa_resource_src,
+	.resource_dst_count	= ARRAY_SIZE(ipa_resource_dst),
+	.resource_dst		= ipa_resource_dst,
+};
+
+/* IPA-resident memory region data for an SoC having IPA v3.5.1 */
+static const struct ipa_mem ipa_mem_local_data[] = {
+	[IPA_MEM_UC_SHARED] = {
+		.offset		= 0x0000,
+		.size		= 0x0080,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_UC_INFO] = {
+		.offset		= 0x0080,
+		.size		= 0x0200,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_V4_FILTER_HASHED] = {
+		.offset		= 0x0288,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V4_FILTER] = {
+		.offset		= 0x0308,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V6_FILTER_HASHED] = {
+		.offset		= 0x0388,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V6_FILTER] = {
+		.offset		= 0x0408,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V4_ROUTE_HASHED] = {
+		.offset		= 0x0488,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V4_ROUTE] = {
+		.offset		= 0x0508,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V6_ROUTE_HASHED] = {
+		.offset		= 0x0588,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V6_ROUTE] = {
+		.offset		= 0x0608,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_MODEM_HEADER] = {
+		.offset		= 0x0688,
+		.size		= 0x0140,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_MODEM_PROC_CTX] = {
+		.offset		= 0x07d0,
+		.size		= 0x0200,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_AP_PROC_CTX] = {
+		.offset		= 0x09d0,
+		.size		= 0x0200,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_MODEM] = {
+		.offset		= 0x0bd8,
+		.size		= 0x1024,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_UC_EVENT_RING] = {
+		.offset		= 0x1c00,
+		.size		= 0x0400,
+		.canary_count	= 1,
+	},
+};
+
+/* Memory configuration data for an SoC having IPA v3.5.1 */
+static const struct ipa_mem_data ipa_mem_data = {
+	.local_count	= ARRAY_SIZE(ipa_mem_local_data),
+	.local		= ipa_mem_local_data,
+	.imem_addr	= 0x146bd000,
+	.imem_size	= 0x00002000,
+	.smem_id	= 497,
+	.smem_size	= 0x00002000,
+};
+
+/* Interconnect bandwidths are in 1000 byte/second units */
+static const struct ipa_interconnect_data ipa_interconnect_data[] = {
+	{
+		.name			= "memory",
+		.peak_bandwidth		= 600000,	/* 600 MBps */
+		.average_bandwidth	= 80000,	/* 80 MBps */
+	},
+	/* Average bandwidth is unused for the next two interconnects */
+	{
+		.name			= "imem",
+		.peak_bandwidth		= 350000,	/* 350 MBps */
+		.average_bandwidth	= 0,		/* unused */
+	},
+	{
+		.name			= "config",
+		.peak_bandwidth		= 40000,	/* 40 MBps */
+		.average_bandwidth	= 0,		/* unused */
+	},
+};
+
+/* Clock and interconnect configuration data for an SoC having IPA v3.5.1 */
+static const struct ipa_clock_data ipa_clock_data = {
+	.core_clock_rate	= 75 * 1000 * 1000,	/* Hz */
+	.interconnect_count	= ARRAY_SIZE(ipa_interconnect_data),
+	.interconnect_data	= ipa_interconnect_data,
+};
+
+/* Configuration data for an SoC having IPA v3.5.1 */
+const struct ipa_data ipa_data_v3_5_1 = {
+	.version	= IPA_VERSION_3_5_1,
+	.backward_compat = BCR_CMDQ_L_LACK_ONE_ENTRY_FMASK |
+			   BCR_TX_NOT_USING_BRESP_FMASK |
+			   BCR_SUSPEND_L2_IRQ_FMASK |
+			   BCR_HOLB_DROP_L2_IRQ_FMASK |
+			   BCR_DUAL_TX_FMASK,
+	.qsb_count	= ARRAY_SIZE(ipa_qsb_data),
+	.qsb_data	= ipa_qsb_data,
+	.endpoint_count	= ARRAY_SIZE(ipa_gsi_endpoint_data),
+	.endpoint_data	= ipa_gsi_endpoint_data,
+	.resource_data	= &ipa_resource_data,
+	.mem_data	= &ipa_mem_data,
+	.clock_data	= &ipa_clock_data,
+};
diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h
index 843d818f78e1..17cdc376e95f 100644
--- a/drivers/net/ipa/ipa_data.h
+++ b/drivers/net/ipa/ipa_data.h
@@ -300,7 +300,7 @@ struct ipa_data {
 	const struct ipa_clock_data *clock_data;
 };
 
-extern const struct ipa_data ipa_data_sdm845;
+extern const struct ipa_data ipa_data_v3_5_1;
 extern const struct ipa_data ipa_data_sc7180;
 
 #endif /* _IPA_DATA_H_ */
diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index afb8eb5618f7..37e85a5ab75c 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -578,7 +578,7 @@ out_release_firmware:
 static const struct of_device_id ipa_match[] = {
 	{
 		.compatible	= "qcom,sdm845-ipa",
-		.data		= &ipa_data_sdm845,
+		.data		= &ipa_data_v3_5_1,
 	},
 	{
 		.compatible	= "qcom,sc7180-ipa",
-- 
cgit v1.2.3


From 782d767a2d0fb08bc4d5d26f789769a84b88400b Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Sun, 28 Mar 2021 12:31:09 -0500
Subject: net: ipa: use version based configuration for SC7180

Rename the SC7180 configuration data file so that its name is
derived from its IPA version.

Update a few other references to the code that talk about the SC7180
rather than just IPA v4.2.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/Makefile          |   2 +-
 drivers/net/ipa/ipa_data-sc7180.c | 360 -------------------------------------
 drivers/net/ipa/ipa_data-v4.2.c   | 362 ++++++++++++++++++++++++++++++++++++++
 drivers/net/ipa/ipa_data.h        |   2 +-
 drivers/net/ipa/ipa_main.c        |   2 +-
 5 files changed, 365 insertions(+), 363 deletions(-)
 delete mode 100644 drivers/net/ipa/ipa_data-sc7180.c
 create mode 100644 drivers/net/ipa/ipa_data-v4.2.c

diff --git a/drivers/net/ipa/Makefile b/drivers/net/ipa/Makefile
index a4a42fc7b840..6abd1db9fe33 100644
--- a/drivers/net/ipa/Makefile
+++ b/drivers/net/ipa/Makefile
@@ -9,4 +9,4 @@ ipa-y			:=	ipa_main.o ipa_clock.o ipa_reg.o ipa_mem.o \
 				ipa_endpoint.o ipa_cmd.o ipa_modem.o \
 				ipa_resource.o ipa_qmi.o ipa_qmi_msg.o
 
-ipa-y			+=	ipa_data-v3.5.1.o ipa_data-sc7180.o
+ipa-y			+=	ipa_data-v3.5.1.o ipa_data-v4.2.o
diff --git a/drivers/net/ipa/ipa_data-sc7180.c b/drivers/net/ipa/ipa_data-sc7180.c
deleted file mode 100644
index 810c673be56e..000000000000
--- a/drivers/net/ipa/ipa_data-sc7180.c
+++ /dev/null
@@ -1,360 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/* Copyright (C) 2019-2021 Linaro Ltd. */
-
-#include <linux/log2.h>
-
-#include "gsi.h"
-#include "ipa_data.h"
-#include "ipa_endpoint.h"
-#include "ipa_mem.h"
-
-/** enum ipa_resource_type - IPA resource types */
-enum ipa_resource_type {
-	/* Source resource types; first must have value 0 */
-	IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS		= 0,
-	IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS,
-	IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF,
-	IPA_RESOURCE_TYPE_SRC_HPS_DMARS,
-	IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES,
-
-	/* Destination resource types; first must have value 0 */
-	IPA_RESOURCE_TYPE_DST_DATA_SECTORS		= 0,
-	IPA_RESOURCE_TYPE_DST_DPS_DMARS,
-};
-
-/* Resource groups used for the SC7180 SoC */
-enum ipa_rsrc_group_id {
-	/* Source resource group identifiers */
-	IPA_RSRC_GROUP_SRC_UL_DL	= 0,
-	IPA_RSRC_GROUP_SRC_COUNT,	/* Last in set; not a source group */
-
-	/* Destination resource group identifiers */
-	IPA_RSRC_GROUP_DST_UL_DL_DPL	= 0,
-	IPA_RSRC_GROUP_DST_COUNT,	/* Last; not a destination group */
-};
-
-/* QSB configuration for the SC7180 SoC. */
-static const struct ipa_qsb_data ipa_qsb_data[] = {
-	[IPA_QSB_MASTER_DDR] = {
-		.max_writes	= 8,
-		.max_reads	= 12,
-		/* no outstanding read byte (beat) limit */
-	},
-};
-
-/* Endpoint configuration for the SC7180 SoC. */
-static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
-	[IPA_ENDPOINT_AP_COMMAND_TX] = {
-		.ee_id		= GSI_EE_AP,
-		.channel_id	= 1,
-		.endpoint_id	= 6,
-		.toward_ipa	= true,
-		.channel = {
-			.tre_count	= 256,
-			.event_count	= 256,
-			.tlv_count	= 20,
-		},
-		.endpoint = {
-			.config = {
-				.resource_group	= IPA_RSRC_GROUP_SRC_UL_DL,
-				.dma_mode	= true,
-				.dma_endpoint	= IPA_ENDPOINT_AP_LAN_RX,
-				.tx = {
-					.seq_type = IPA_SEQ_DMA,
-				},
-			},
-		},
-	},
-	[IPA_ENDPOINT_AP_LAN_RX] = {
-		.ee_id		= GSI_EE_AP,
-		.channel_id	= 2,
-		.endpoint_id	= 8,
-		.toward_ipa	= false,
-		.channel = {
-			.tre_count	= 256,
-			.event_count	= 256,
-			.tlv_count	= 6,
-		},
-		.endpoint = {
-			.config = {
-				.resource_group	= IPA_RSRC_GROUP_DST_UL_DL_DPL,
-				.aggregation	= true,
-				.status_enable	= true,
-				.rx = {
-					.pad_align	= ilog2(sizeof(u32)),
-				},
-			},
-		},
-	},
-	[IPA_ENDPOINT_AP_MODEM_TX] = {
-		.ee_id		= GSI_EE_AP,
-		.channel_id	= 0,
-		.endpoint_id	= 1,
-		.toward_ipa	= true,
-		.channel = {
-			.tre_count	= 512,
-			.event_count	= 512,
-			.tlv_count	= 8,
-		},
-		.endpoint = {
-			.filter_support	= true,
-			.config = {
-				.resource_group	= IPA_RSRC_GROUP_SRC_UL_DL,
-				.checksum	= true,
-				.qmap		= true,
-				.status_enable	= true,
-				.tx = {
-					.seq_type = IPA_SEQ_1_PASS_SKIP_LAST_UC,
-					.seq_rep_type = IPA_SEQ_REP_DMA_PARSER,
-					.status_endpoint =
-						IPA_ENDPOINT_MODEM_AP_RX,
-				},
-			},
-		},
-	},
-	[IPA_ENDPOINT_AP_MODEM_RX] = {
-		.ee_id		= GSI_EE_AP,
-		.channel_id	= 3,
-		.endpoint_id	= 9,
-		.toward_ipa	= false,
-		.channel = {
-			.tre_count	= 256,
-			.event_count	= 256,
-			.tlv_count	= 6,
-		},
-		.endpoint = {
-			.config = {
-				.resource_group	= IPA_RSRC_GROUP_DST_UL_DL_DPL,
-				.checksum	= true,
-				.qmap		= true,
-				.aggregation	= true,
-				.rx = {
-					.aggr_close_eof	= true,
-				},
-			},
-		},
-	},
-	[IPA_ENDPOINT_MODEM_COMMAND_TX] = {
-		.ee_id		= GSI_EE_MODEM,
-		.channel_id	= 1,
-		.endpoint_id	= 5,
-		.toward_ipa	= true,
-	},
-	[IPA_ENDPOINT_MODEM_LAN_RX] = {
-		.ee_id		= GSI_EE_MODEM,
-		.channel_id	= 3,
-		.endpoint_id	= 11,
-		.toward_ipa	= false,
-	},
-	[IPA_ENDPOINT_MODEM_AP_TX] = {
-		.ee_id		= GSI_EE_MODEM,
-		.channel_id	= 0,
-		.endpoint_id	= 4,
-		.toward_ipa	= true,
-		.endpoint = {
-			.filter_support	= true,
-		},
-	},
-	[IPA_ENDPOINT_MODEM_AP_RX] = {
-		.ee_id		= GSI_EE_MODEM,
-		.channel_id	= 2,
-		.endpoint_id	= 10,
-		.toward_ipa	= false,
-	},
-};
-
-/* Source resource configuration data for the SC7180 SoC */
-static const struct ipa_resource ipa_resource_src[] = {
-	[IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS] = {
-		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
-			.min = 3,	.max = 63,
-		},
-	},
-	[IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS] = {
-		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
-			.min = 3,	.max = 3,
-		},
-	},
-	[IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF] = {
-		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
-			.min = 10,	.max = 10,
-		},
-	},
-	[IPA_RESOURCE_TYPE_SRC_HPS_DMARS] = {
-		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
-			.min = 1,	.max = 1,
-		},
-	},
-	[IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES] = {
-		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
-			.min = 5,	.max = 5,
-		},
-	},
-};
-
-/* Destination resource configuration data for the SC7180 SoC */
-static const struct ipa_resource ipa_resource_dst[] = {
-	[IPA_RESOURCE_TYPE_DST_DATA_SECTORS] = {
-		.limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = {
-			.min = 3,	.max = 3,
-		},
-	},
-	[IPA_RESOURCE_TYPE_DST_DPS_DMARS] = {
-		.limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = {
-			.min = 1,	.max = 63,
-		},
-	},
-};
-
-/* Resource configuration for the SC7180 SoC. */
-static const struct ipa_resource_data ipa_resource_data = {
-	.rsrc_group_src_count	= IPA_RSRC_GROUP_SRC_COUNT,
-	.rsrc_group_dst_count	= IPA_RSRC_GROUP_DST_COUNT,
-	.resource_src_count	= ARRAY_SIZE(ipa_resource_src),
-	.resource_src		= ipa_resource_src,
-	.resource_dst_count	= ARRAY_SIZE(ipa_resource_dst),
-	.resource_dst		= ipa_resource_dst,
-};
-
-/* IPA-resident memory region configuration for the SC7180 SoC. */
-static const struct ipa_mem ipa_mem_local_data[] = {
-	[IPA_MEM_UC_SHARED] = {
-		.offset		= 0x0000,
-		.size		= 0x0080,
-		.canary_count	= 0,
-	},
-	[IPA_MEM_UC_INFO] = {
-		.offset		= 0x0080,
-		.size		= 0x0200,
-		.canary_count	= 0,
-	},
-	[IPA_MEM_V4_FILTER_HASHED] = {
-		.offset		= 0x0288,
-		.size		= 0,
-		.canary_count	= 2,
-	},
-	[IPA_MEM_V4_FILTER] = {
-		.offset		= 0x0290,
-		.size		= 0x0078,
-		.canary_count	= 2,
-	},
-	[IPA_MEM_V6_FILTER_HASHED] = {
-		.offset		= 0x0310,
-		.size		= 0,
-		.canary_count	= 2,
-	},
-	[IPA_MEM_V6_FILTER] = {
-		.offset		= 0x0318,
-		.size		= 0x0078,
-		.canary_count	= 2,
-	},
-	[IPA_MEM_V4_ROUTE_HASHED] = {
-		.offset		= 0x0398,
-		.size		= 0,
-		.canary_count	= 2,
-	},
-	[IPA_MEM_V4_ROUTE] = {
-		.offset		= 0x03a0,
-		.size		= 0x0078,
-		.canary_count	= 2,
-	},
-	[IPA_MEM_V6_ROUTE_HASHED] = {
-		.offset		= 0x0420,
-		.size		= 0,
-		.canary_count	= 2,
-	},
-	[IPA_MEM_V6_ROUTE] = {
-		.offset		= 0x0428,
-		.size		= 0x0078,
-		.canary_count	= 2,
-	},
-	[IPA_MEM_MODEM_HEADER] = {
-		.offset		= 0x04a8,
-		.size		= 0x0140,
-		.canary_count	= 2,
-	},
-	[IPA_MEM_MODEM_PROC_CTX] = {
-		.offset		= 0x05f0,
-		.size		= 0x0200,
-		.canary_count	= 2,
-	},
-	[IPA_MEM_AP_PROC_CTX] = {
-		.offset		= 0x07f0,
-		.size		= 0x0200,
-		.canary_count	= 0,
-	},
-	[IPA_MEM_PDN_CONFIG] = {
-		.offset		= 0x09f8,
-		.size		= 0x0050,
-		.canary_count	= 2,
-	},
-	[IPA_MEM_STATS_QUOTA_MODEM] = {
-		.offset		= 0x0a50,
-		.size		= 0x0060,
-		.canary_count	= 2,
-	},
-	[IPA_MEM_STATS_TETHERING] = {
-		.offset		= 0x0ab0,
-		.size		= 0x0140,
-		.canary_count	= 0,
-	},
-	[IPA_MEM_MODEM] = {
-		.offset		= 0x0bf0,
-		.size		= 0x140c,
-		.canary_count	= 0,
-	},
-	[IPA_MEM_UC_EVENT_RING] = {
-		.offset		= 0x2000,
-		.size		= 0,
-		.canary_count	= 1,
-	},
-};
-
-static const struct ipa_mem_data ipa_mem_data = {
-	.local_count	= ARRAY_SIZE(ipa_mem_local_data),
-	.local		= ipa_mem_local_data,
-	.imem_addr	= 0x146a8000,
-	.imem_size	= 0x00002000,
-	.smem_id	= 497,
-	.smem_size	= 0x00002000,
-};
-
-/* Interconnect bandwidths are in 1000 byte/second units */
-static const struct ipa_interconnect_data ipa_interconnect_data[] = {
-	{
-		.name			= "memory",
-		.peak_bandwidth		= 465000,	/* 465 MBps */
-		.average_bandwidth	= 80000,	/* 80 MBps */
-	},
-	/* Average bandwidth is unused for the next two interconnects */
-	{
-		.name			= "imem",
-		.peak_bandwidth		= 68570,	/* 68.570 MBps */
-		.average_bandwidth	= 0,		/* unused */
-	},
-	{
-		.name			= "config",
-		.peak_bandwidth		= 30000,	/* 30 MBps */
-		.average_bandwidth	= 0,		/* unused */
-	},
-};
-
-static const struct ipa_clock_data ipa_clock_data = {
-	.core_clock_rate	= 100 * 1000 * 1000,	/* Hz */
-	.interconnect_count	= ARRAY_SIZE(ipa_interconnect_data),
-	.interconnect_data	= ipa_interconnect_data,
-};
-
-/* Configuration data for the SC7180 SoC. */
-const struct ipa_data ipa_data_sc7180 = {
-	.version	= IPA_VERSION_4_2,
-	/* backward_compat value is 0 */
-	.qsb_count	= ARRAY_SIZE(ipa_qsb_data),
-	.qsb_data	= ipa_qsb_data,
-	.endpoint_count	= ARRAY_SIZE(ipa_gsi_endpoint_data),
-	.endpoint_data	= ipa_gsi_endpoint_data,
-	.resource_data	= &ipa_resource_data,
-	.mem_data	= &ipa_mem_data,
-	.clock_data	= &ipa_clock_data,
-};
diff --git a/drivers/net/ipa/ipa_data-v4.2.c b/drivers/net/ipa/ipa_data-v4.2.c
new file mode 100644
index 000000000000..8744f19c6401
--- /dev/null
+++ b/drivers/net/ipa/ipa_data-v4.2.c
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Copyright (C) 2019-2021 Linaro Ltd. */
+
+#include <linux/log2.h>
+
+#include "gsi.h"
+#include "ipa_data.h"
+#include "ipa_endpoint.h"
+#include "ipa_mem.h"
+
+/** enum ipa_resource_type - IPA resource types for an SoC having IPA v4.2 */
+enum ipa_resource_type {
+	/* Source resource types; first must have value 0 */
+	IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS		= 0,
+	IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS,
+	IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF,
+	IPA_RESOURCE_TYPE_SRC_HPS_DMARS,
+	IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES,
+
+	/* Destination resource types; first must have value 0 */
+	IPA_RESOURCE_TYPE_DST_DATA_SECTORS		= 0,
+	IPA_RESOURCE_TYPE_DST_DPS_DMARS,
+};
+
+/* Resource groups used for an SoC having IPA v4.2 */
+enum ipa_rsrc_group_id {
+	/* Source resource group identifiers */
+	IPA_RSRC_GROUP_SRC_UL_DL	= 0,
+	IPA_RSRC_GROUP_SRC_COUNT,	/* Last in set; not a source group */
+
+	/* Destination resource group identifiers */
+	IPA_RSRC_GROUP_DST_UL_DL_DPL	= 0,
+	IPA_RSRC_GROUP_DST_COUNT,	/* Last; not a destination group */
+};
+
+/* QSB configuration data for an SoC having IPA v4.2 */
+static const struct ipa_qsb_data ipa_qsb_data[] = {
+	[IPA_QSB_MASTER_DDR] = {
+		.max_writes	= 8,
+		.max_reads	= 12,
+		/* no outstanding read byte (beat) limit */
+	},
+};
+
+/* Endpoint configuration data for an SoC having IPA v4.2 */
+static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
+	[IPA_ENDPOINT_AP_COMMAND_TX] = {
+		.ee_id		= GSI_EE_AP,
+		.channel_id	= 1,
+		.endpoint_id	= 6,
+		.toward_ipa	= true,
+		.channel = {
+			.tre_count	= 256,
+			.event_count	= 256,
+			.tlv_count	= 20,
+		},
+		.endpoint = {
+			.config = {
+				.resource_group	= IPA_RSRC_GROUP_SRC_UL_DL,
+				.dma_mode	= true,
+				.dma_endpoint	= IPA_ENDPOINT_AP_LAN_RX,
+				.tx = {
+					.seq_type = IPA_SEQ_DMA,
+				},
+			},
+		},
+	},
+	[IPA_ENDPOINT_AP_LAN_RX] = {
+		.ee_id		= GSI_EE_AP,
+		.channel_id	= 2,
+		.endpoint_id	= 8,
+		.toward_ipa	= false,
+		.channel = {
+			.tre_count	= 256,
+			.event_count	= 256,
+			.tlv_count	= 6,
+		},
+		.endpoint = {
+			.config = {
+				.resource_group	= IPA_RSRC_GROUP_DST_UL_DL_DPL,
+				.aggregation	= true,
+				.status_enable	= true,
+				.rx = {
+					.pad_align	= ilog2(sizeof(u32)),
+				},
+			},
+		},
+	},
+	[IPA_ENDPOINT_AP_MODEM_TX] = {
+		.ee_id		= GSI_EE_AP,
+		.channel_id	= 0,
+		.endpoint_id	= 1,
+		.toward_ipa	= true,
+		.channel = {
+			.tre_count	= 512,
+			.event_count	= 512,
+			.tlv_count	= 8,
+		},
+		.endpoint = {
+			.filter_support	= true,
+			.config = {
+				.resource_group	= IPA_RSRC_GROUP_SRC_UL_DL,
+				.checksum	= true,
+				.qmap		= true,
+				.status_enable	= true,
+				.tx = {
+					.seq_type = IPA_SEQ_1_PASS_SKIP_LAST_UC,
+					.seq_rep_type = IPA_SEQ_REP_DMA_PARSER,
+					.status_endpoint =
+						IPA_ENDPOINT_MODEM_AP_RX,
+				},
+			},
+		},
+	},
+	[IPA_ENDPOINT_AP_MODEM_RX] = {
+		.ee_id		= GSI_EE_AP,
+		.channel_id	= 3,
+		.endpoint_id	= 9,
+		.toward_ipa	= false,
+		.channel = {
+			.tre_count	= 256,
+			.event_count	= 256,
+			.tlv_count	= 6,
+		},
+		.endpoint = {
+			.config = {
+				.resource_group	= IPA_RSRC_GROUP_DST_UL_DL_DPL,
+				.checksum	= true,
+				.qmap		= true,
+				.aggregation	= true,
+				.rx = {
+					.aggr_close_eof	= true,
+				},
+			},
+		},
+	},
+	[IPA_ENDPOINT_MODEM_COMMAND_TX] = {
+		.ee_id		= GSI_EE_MODEM,
+		.channel_id	= 1,
+		.endpoint_id	= 5,
+		.toward_ipa	= true,
+	},
+	[IPA_ENDPOINT_MODEM_LAN_RX] = {
+		.ee_id		= GSI_EE_MODEM,
+		.channel_id	= 3,
+		.endpoint_id	= 11,
+		.toward_ipa	= false,
+	},
+	[IPA_ENDPOINT_MODEM_AP_TX] = {
+		.ee_id		= GSI_EE_MODEM,
+		.channel_id	= 0,
+		.endpoint_id	= 4,
+		.toward_ipa	= true,
+		.endpoint = {
+			.filter_support	= true,
+		},
+	},
+	[IPA_ENDPOINT_MODEM_AP_RX] = {
+		.ee_id		= GSI_EE_MODEM,
+		.channel_id	= 2,
+		.endpoint_id	= 10,
+		.toward_ipa	= false,
+	},
+};
+
+/* Source resource configuration data for an SoC having IPA v4.2 */
+static const struct ipa_resource ipa_resource_src[] = {
+	[IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
+			.min = 3,	.max = 63,
+		},
+	},
+	[IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
+			.min = 3,	.max = 3,
+		},
+	},
+	[IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
+			.min = 10,	.max = 10,
+		},
+	},
+	[IPA_RESOURCE_TYPE_SRC_HPS_DMARS] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
+			.min = 1,	.max = 1,
+		},
+	},
+	[IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
+			.min = 5,	.max = 5,
+		},
+	},
+};
+
+/* Destination resource configuration data for an SoC having IPA v4.2 */
+static const struct ipa_resource ipa_resource_dst[] = {
+	[IPA_RESOURCE_TYPE_DST_DATA_SECTORS] = {
+		.limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = {
+			.min = 3,	.max = 3,
+		},
+	},
+	[IPA_RESOURCE_TYPE_DST_DPS_DMARS] = {
+		.limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = {
+			.min = 1,	.max = 63,
+		},
+	},
+};
+
+/* Resource configuration data for an SoC having IPA v4.2 */
+static const struct ipa_resource_data ipa_resource_data = {
+	.rsrc_group_src_count	= IPA_RSRC_GROUP_SRC_COUNT,
+	.rsrc_group_dst_count	= IPA_RSRC_GROUP_DST_COUNT,
+	.resource_src_count	= ARRAY_SIZE(ipa_resource_src),
+	.resource_src		= ipa_resource_src,
+	.resource_dst_count	= ARRAY_SIZE(ipa_resource_dst),
+	.resource_dst		= ipa_resource_dst,
+};
+
+/* IPA-resident memory region data for an SoC having IPA v4.2 */
+static const struct ipa_mem ipa_mem_local_data[] = {
+	[IPA_MEM_UC_SHARED] = {
+		.offset		= 0x0000,
+		.size		= 0x0080,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_UC_INFO] = {
+		.offset		= 0x0080,
+		.size		= 0x0200,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_V4_FILTER_HASHED] = {
+		.offset		= 0x0288,
+		.size		= 0,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V4_FILTER] = {
+		.offset		= 0x0290,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V6_FILTER_HASHED] = {
+		.offset		= 0x0310,
+		.size		= 0,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V6_FILTER] = {
+		.offset		= 0x0318,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V4_ROUTE_HASHED] = {
+		.offset		= 0x0398,
+		.size		= 0,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V4_ROUTE] = {
+		.offset		= 0x03a0,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V6_ROUTE_HASHED] = {
+		.offset		= 0x0420,
+		.size		= 0,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V6_ROUTE] = {
+		.offset		= 0x0428,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_MODEM_HEADER] = {
+		.offset		= 0x04a8,
+		.size		= 0x0140,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_MODEM_PROC_CTX] = {
+		.offset		= 0x05f0,
+		.size		= 0x0200,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_AP_PROC_CTX] = {
+		.offset		= 0x07f0,
+		.size		= 0x0200,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_PDN_CONFIG] = {
+		.offset		= 0x09f8,
+		.size		= 0x0050,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_STATS_QUOTA_MODEM] = {
+		.offset		= 0x0a50,
+		.size		= 0x0060,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_STATS_TETHERING] = {
+		.offset		= 0x0ab0,
+		.size		= 0x0140,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_MODEM] = {
+		.offset		= 0x0bf0,
+		.size		= 0x140c,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_UC_EVENT_RING] = {
+		.offset		= 0x2000,
+		.size		= 0,
+		.canary_count	= 1,
+	},
+};
+
+/* Memory configuration data for an SoC having IPA v4.2 */
+static const struct ipa_mem_data ipa_mem_data = {
+	.local_count	= ARRAY_SIZE(ipa_mem_local_data),
+	.local		= ipa_mem_local_data,
+	.imem_addr	= 0x146a8000,
+	.imem_size	= 0x00002000,
+	.smem_id	= 497,
+	.smem_size	= 0x00002000,
+};
+
+/* Interconnect rates are in 1000 byte/second units */
+static const struct ipa_interconnect_data ipa_interconnect_data[] = {
+	{
+		.name			= "memory",
+		.peak_bandwidth		= 465000,	/* 465 MBps */
+		.average_bandwidth	= 80000,	/* 80 MBps */
+	},
+	/* Average bandwidth is unused for the next two interconnects */
+	{
+		.name			= "imem",
+		.peak_bandwidth		= 68570,	/* 68.570 MBps */
+		.average_bandwidth	= 0,		/* unused */
+	},
+	{
+		.name			= "config",
+		.peak_bandwidth		= 30000,	/* 30 MBps */
+		.average_bandwidth	= 0,		/* unused */
+	},
+};
+
+/* Clock and interconnect configuration data for an SoC having IPA v4.2 */
+static const struct ipa_clock_data ipa_clock_data = {
+	.core_clock_rate	= 100 * 1000 * 1000,	/* Hz */
+	.interconnect_count	= ARRAY_SIZE(ipa_interconnect_data),
+	.interconnect_data	= ipa_interconnect_data,
+};
+
+/* Configuration data for an SoC having IPA v4.2 */
+const struct ipa_data ipa_data_v4_2 = {
+	.version	= IPA_VERSION_4_2,
+	/* backward_compat value is 0 */
+	.qsb_count	= ARRAY_SIZE(ipa_qsb_data),
+	.qsb_data	= ipa_qsb_data,
+	.endpoint_count	= ARRAY_SIZE(ipa_gsi_endpoint_data),
+	.endpoint_data	= ipa_gsi_endpoint_data,
+	.resource_data	= &ipa_resource_data,
+	.mem_data	= &ipa_mem_data,
+	.clock_data	= &ipa_clock_data,
+};
diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h
index 17cdc376e95f..e97342e80d53 100644
--- a/drivers/net/ipa/ipa_data.h
+++ b/drivers/net/ipa/ipa_data.h
@@ -301,6 +301,6 @@ struct ipa_data {
 };
 
 extern const struct ipa_data ipa_data_v3_5_1;
-extern const struct ipa_data ipa_data_sc7180;
+extern const struct ipa_data ipa_data_v4_2;
 
 #endif /* _IPA_DATA_H_ */
diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index 37e85a5ab75c..a970d10e650e 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -582,7 +582,7 @@ static const struct of_device_id ipa_match[] = {
 	},
 	{
 		.compatible	= "qcom,sc7180-ipa",
-		.data		= &ipa_data_sc7180,
+		.data		= &ipa_data_v4_2,
 	},
 	{ },
 };
-- 
cgit v1.2.3


From 19aaf72c0c7a26ab7ffc655a6d84da6a379f899b Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Sun, 28 Mar 2021 12:31:10 -0500
Subject: net: ipa: DMA addresses are nicely aligned

A recent patch avoided doing 64-bit modulo operations by checking
the alignment of some DMA allocations using only the lower 32 bits
of the address.

David Laight pointed out (after the fix was committed) that DMA
allocations might already satisfy the alignment requirements.  And
he was right.

Remove the alignment checks that occur after DMA allocation requests,
and update comments to explain why the constraint is satisfied.  The
only place IPA_TABLE_ALIGN was used was to check the alignment; it is
therefore no longer needed, so get rid of it.

Add comments where GSI_RING_ELEMENT_SIZE and the tre_count and
event_count channel data fields are defined to make explicit they
are required to be powers of 2.

Revise a comment in gsi_trans_pool_init_dma(), taking into account
that dma_alloc_coherent() guarantees its result is aligned to a page
size (or order thereof).

Don't bother printing an error if a DMA allocation fails.

Suggested-by: David Laight <David.Laight@ACULAB.COM>
Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/gsi.c         | 13 ++++---------
 drivers/net/ipa/gsi_private.h |  2 +-
 drivers/net/ipa/gsi_trans.c   |  9 ++++-----
 drivers/net/ipa/ipa_data.h    |  4 ++--
 drivers/net/ipa/ipa_table.c   | 24 ++++++------------------
 5 files changed, 17 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
index 585574af36ec..1c835b3e1a43 100644
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -1444,18 +1444,13 @@ static int gsi_ring_alloc(struct gsi *gsi, struct gsi_ring *ring, u32 count)
 	dma_addr_t addr;
 
 	/* Hardware requires a 2^n ring size, with alignment equal to size.
-	 * The size is a power of 2, so we can check alignment using just
-	 * the bottom 32 bits for a DMA address of any size.
+	 * The DMA address returned by dma_alloc_coherent() is guaranteed to
+	 * be a power-of-2 number of pages, which satisfies the requirement.
 	 */
 	ring->virt = dma_alloc_coherent(dev, size, &addr, GFP_KERNEL);
-	if (ring->virt && lower_32_bits(addr) % size) {
-		dma_free_coherent(dev, size, ring->virt, addr);
-		dev_err(dev, "unable to alloc 0x%x-aligned ring buffer\n",
-			size);
-		return -EINVAL;	/* Not a good error value, but distinct */
-	} else if (!ring->virt) {
+	if (!ring->virt)
 		return -ENOMEM;
-	}
+
 	ring->addr = addr;
 	ring->count = count;
 
diff --git a/drivers/net/ipa/gsi_private.h b/drivers/net/ipa/gsi_private.h
index ed7bc26f85e9..ea333a244cf5 100644
--- a/drivers/net/ipa/gsi_private.h
+++ b/drivers/net/ipa/gsi_private.h
@@ -14,7 +14,7 @@ struct gsi_trans;
 struct gsi_ring;
 struct gsi_channel;
 
-#define GSI_RING_ELEMENT_SIZE	16	/* bytes */
+#define GSI_RING_ELEMENT_SIZE	16	/* bytes; must be a power of 2 */
 
 /* Return the entry that follows one provided in a transaction pool */
 void *gsi_trans_pool_next(struct gsi_trans_pool *pool, void *element);
diff --git a/drivers/net/ipa/gsi_trans.c b/drivers/net/ipa/gsi_trans.c
index 6c3ed5b17b80..70c2b585f98d 100644
--- a/drivers/net/ipa/gsi_trans.c
+++ b/drivers/net/ipa/gsi_trans.c
@@ -153,11 +153,10 @@ int gsi_trans_pool_init_dma(struct device *dev, struct gsi_trans_pool *pool,
 	size = __roundup_pow_of_two(size);
 	total_size = (count + max_alloc - 1) * size;
 
-	/* The allocator will give us a power-of-2 number of pages.  But we
-	 * can't guarantee that, so request it.  That way we won't waste any
-	 * memory that would be available beyond the required space.
-	 *
-	 * Note that gsi_trans_pool_exit_dma() assumes the total allocated
+	/* The allocator will give us a power-of-2 number of pages
+	 * sufficient to satisfy our request.  Round up our requested
+	 * size to avoid any unused space in the allocation.  This way
+	 * gsi_trans_pool_exit_dma() can assume the total allocated
 	 * size is exactly (count * size).
 	 */
 	total_size = get_order(total_size) << PAGE_SHIFT;
diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h
index e97342e80d53..769f68923527 100644
--- a/drivers/net/ipa/ipa_data.h
+++ b/drivers/net/ipa/ipa_data.h
@@ -90,8 +90,8 @@ struct ipa_qsb_data {
  * that can be included in a single transaction.
  */
 struct gsi_channel_data {
-	u16 tre_count;
-	u16 event_count;
+	u16 tre_count;			/* must be a power of 2 */
+	u16 event_count;		/* must be a power of 2 */
 	u8 tlv_count;
 };
 
diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c
index 4236a50ff03a..d9538661755f 100644
--- a/drivers/net/ipa/ipa_table.c
+++ b/drivers/net/ipa/ipa_table.c
@@ -96,9 +96,6 @@
  *                 ----------------------
  */
 
-/* IPA hardware constrains filter and route tables alignment */
-#define IPA_TABLE_ALIGN			128	/* Minimum table alignment */
-
 /* Assignment of route table entries to the modem and AP */
 #define IPA_ROUTE_MODEM_MIN		0
 #define IPA_ROUTE_MODEM_COUNT		8
@@ -652,26 +649,17 @@ int ipa_table_init(struct ipa *ipa)
 
 	ipa_table_validate_build();
 
+	/* The IPA hardware requires route and filter table rules to be
+	 * aligned on a 128-byte boundary.  We put the "zero rule" at the
+	 * base of the table area allocated here.  The DMA address returned
+	 * by dma_alloc_coherent() is guaranteed to be a power-of-2 number
+	 * of pages, which satisfies the rule alignment requirement.
+	 */
 	size = IPA_ZERO_RULE_SIZE + (1 + count) * IPA_TABLE_ENTRY_SIZE;
 	virt = dma_alloc_coherent(dev, size, &addr, GFP_KERNEL);
 	if (!virt)
 		return -ENOMEM;
 
-	/* We put the "zero rule" at the base of our table area.  The IPA
-	 * hardware requires route and filter table rules to be aligned
-	 * on a 128-byte boundary.  As long as the alignment constraint
-	 * is a power of 2, we can check alignment using just the bottom
-	 * 32 bits for a DMA address of any size.
-	 */
-	BUILD_BUG_ON(!is_power_of_2(IPA_TABLE_ALIGN));
-	if (lower_32_bits(addr) % IPA_TABLE_ALIGN) {
-		dev_err(dev, "table address %pad not %u-byte aligned\n",
-			&addr, IPA_TABLE_ALIGN);
-		dma_free_coherent(dev, size, virt, addr);
-
-		return -ERANGE;
-	}
-
 	ipa->table_virt = virt;
 	ipa->table_addr = addr;
 
-- 
cgit v1.2.3


From 4ea29143ebe6c453f5fddc80ffe4ed046f44aa3a Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Sun, 28 Mar 2021 12:31:11 -0500
Subject: net: ipa: kill IPA_TABLE_ENTRY_SIZE

Entries in an IPA route or filter table are 64-bit little-endian
addresses, each of which refers to a routing or filtering rule.

The format of these table slots are fixed, but IPA_TABLE_ENTRY_SIZE
is used to define their size.  This symbol doesn't really add value,
and I think it unnecessarily obscures what a table entry *is*.

So get rid of IPA_TABLE_ENTRY_SIZE, and just use sizeof(__le64) in
its place throughout the code.

Update the comments in "ipa_table.c" to provide a little better
explanation of these table slots.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_cmd.c   |  2 +-
 drivers/net/ipa/ipa_qmi.c   | 10 +++-----
 drivers/net/ipa/ipa_table.c | 59 ++++++++++++++++++++++++++-------------------
 drivers/net/ipa/ipa_table.h |  3 ---
 4 files changed, 39 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ipa/ipa_cmd.c b/drivers/net/ipa/ipa_cmd.c
index 2ac6dd8413de..525cdf28d9ea 100644
--- a/drivers/net/ipa/ipa_cmd.c
+++ b/drivers/net/ipa/ipa_cmd.c
@@ -153,7 +153,7 @@ static void ipa_cmd_validate_build(void)
 	 * of entries, as and IPv4 and IPv6 route tables have the same number
 	 * of entries.
 	 */
-#define TABLE_SIZE	(TABLE_COUNT_MAX * IPA_TABLE_ENTRY_SIZE)
+#define TABLE_SIZE	(TABLE_COUNT_MAX * sizeof(__le64))
 #define TABLE_COUNT_MAX	max_t(u32, IPA_ROUTE_COUNT_MAX, IPA_FILTER_COUNT_MAX)
 	BUILD_BUG_ON(TABLE_SIZE > field_max(IP_FLTRT_FLAGS_HASH_SIZE_FMASK));
 	BUILD_BUG_ON(TABLE_SIZE > field_max(IP_FLTRT_FLAGS_NHASH_SIZE_FMASK));
diff --git a/drivers/net/ipa/ipa_qmi.c b/drivers/net/ipa/ipa_qmi.c
index ccdb4a6a4c75..593665efbcf9 100644
--- a/drivers/net/ipa/ipa_qmi.c
+++ b/drivers/net/ipa/ipa_qmi.c
@@ -308,12 +308,12 @@ init_modem_driver_req(struct ipa_qmi *ipa_qmi)
 	mem = &ipa->mem[IPA_MEM_V4_ROUTE];
 	req.v4_route_tbl_info_valid = 1;
 	req.v4_route_tbl_info.start = ipa->mem_offset + mem->offset;
-	req.v4_route_tbl_info.count = mem->size / IPA_TABLE_ENTRY_SIZE;
+	req.v4_route_tbl_info.count = mem->size / sizeof(__le64);
 
 	mem = &ipa->mem[IPA_MEM_V6_ROUTE];
 	req.v6_route_tbl_info_valid = 1;
 	req.v6_route_tbl_info.start = ipa->mem_offset + mem->offset;
-	req.v6_route_tbl_info.count = mem->size / IPA_TABLE_ENTRY_SIZE;
+	req.v6_route_tbl_info.count = mem->size / sizeof(__le64);
 
 	mem = &ipa->mem[IPA_MEM_V4_FILTER];
 	req.v4_filter_tbl_start_valid = 1;
@@ -352,8 +352,7 @@ init_modem_driver_req(struct ipa_qmi *ipa_qmi)
 		req.v4_hash_route_tbl_info_valid = 1;
 		req.v4_hash_route_tbl_info.start =
 				ipa->mem_offset + mem->offset;
-		req.v4_hash_route_tbl_info.count =
-				mem->size / IPA_TABLE_ENTRY_SIZE;
+		req.v4_hash_route_tbl_info.count = mem->size / sizeof(__le64);
 	}
 
 	mem = &ipa->mem[IPA_MEM_V6_ROUTE_HASHED];
@@ -361,8 +360,7 @@ init_modem_driver_req(struct ipa_qmi *ipa_qmi)
 		req.v6_hash_route_tbl_info_valid = 1;
 		req.v6_hash_route_tbl_info.start =
 			ipa->mem_offset + mem->offset;
-		req.v6_hash_route_tbl_info.count =
-			mem->size / IPA_TABLE_ENTRY_SIZE;
+		req.v6_hash_route_tbl_info.count = mem->size / sizeof(__le64);
 	}
 
 	mem = &ipa->mem[IPA_MEM_V4_FILTER_HASHED];
diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c
index d9538661755f..401b568df6a3 100644
--- a/drivers/net/ipa/ipa_table.c
+++ b/drivers/net/ipa/ipa_table.c
@@ -27,28 +27,38 @@
 /**
  * DOC: IPA Filter and Route Tables
  *
- * The IPA has tables defined in its local shared memory that define filter
- * and routing rules.  Each entry in these tables contains a 64-bit DMA
- * address that refers to DRAM (system memory) containing a rule definition.
+ * The IPA has tables defined in its local (IPA-resident) memory that define
+ * filter and routing rules.  An entry in either of these tables is a little
+ * endian 64-bit "slot" that holds the address of a rule definition.  (The
+ * size of these slots is 64 bits regardless of the host DMA address size.)
+ *
+ * Separate tables (both filter and route) used for IPv4 and IPv6.  There
+ * are normally another set of "hashed" filter and route tables, which are
+ * used with a hash of message metadata.  Hashed operation is not supported
+ * by all IPA hardware (IPA v4.2 doesn't support hashed tables).
+ *
+ * Rules can be in local memory or in DRAM (system memory).  The offset of
+ * an object (such as a route or filter table) in IPA-resident memory must
+ * 128-byte aligned.  An object in system memory (such as a route or filter
+ * rule) must be at an 8-byte aligned address.  We currently only place
+ * route or filter rules in system memory.
+ *
  * A rule consists of a contiguous block of 32-bit values terminated with
  * 32 zero bits.  A special "zero entry" rule consisting of 64 zero bits
  * represents "no filtering" or "no routing," and is the reset value for
- * filter or route table rules.  Separate tables (both filter and route)
- * used for IPv4 and IPv6.  Additionally, there can be hashed filter or
- * route tables, which are used when a hash of message metadata matches.
- * Hashed operation is not supported by all IPA hardware.
+ * filter or route table rules.
  *
  * Each filter rule is associated with an AP or modem TX endpoint, though
- * not all TX endpoints support filtering.  The first 64-bit entry in a
+ * not all TX endpoints support filtering.  The first 64-bit slot in a
  * filter table is a bitmap indicating which endpoints have entries in
  * the table.  The low-order bit (bit 0) in this bitmap represents a
  * special global filter, which applies to all traffic.  This is not
  * used in the current code.  Bit 1, if set, indicates that there is an
- * entry (i.e. a DMA address referring to a rule) for endpoint 0 in the
- * table.  Bit 2, if set, indicates there is an entry for endpoint 1,
- * and so on.  Space is set aside in IPA local memory to hold as many
- * filter table entries as might be required, but typically they are not
- * all used.
+ * entry (i.e. slot containing a system address referring to a rule) for
+ * endpoint 0 in the table.  Bit 3, if set, indicates there is an entry
+ * for endpoint 2, and so on.  Space is set aside in IPA local memory to
+ * hold as many filter table entries as might be required, but typically
+ * they are not all used.
  *
  * The AP initializes all entries in a filter table to refer to a "zero"
  * entry.  Once initialized the modem and AP update the entries for
@@ -122,8 +132,7 @@ static void ipa_table_validate_build(void)
 	 * code in ipa_table_init() uses a pointer to __le64 to
 	 * initialize tables.
 	 */
-	BUILD_BUG_ON(sizeof(dma_addr_t) > IPA_TABLE_ENTRY_SIZE);
-	BUILD_BUG_ON(sizeof(__le64) != IPA_TABLE_ENTRY_SIZE);
+	BUILD_BUG_ON(sizeof(dma_addr_t) > sizeof(__le64));
 
 	/* A "zero rule" is used to represent no filtering or no routing.
 	 * It is a 64-bit block of zeroed memory.  Code in ipa_table_init()
@@ -154,7 +163,7 @@ ipa_table_valid_one(struct ipa *ipa, bool route, bool ipv6, bool hashed)
 		else
 			mem = hashed ? &ipa->mem[IPA_MEM_V4_ROUTE_HASHED]
 				     : &ipa->mem[IPA_MEM_V4_ROUTE];
-		size = IPA_ROUTE_COUNT_MAX * IPA_TABLE_ENTRY_SIZE;
+		size = IPA_ROUTE_COUNT_MAX * sizeof(__le64);
 	} else {
 		if (ipv6)
 			mem = hashed ? &ipa->mem[IPA_MEM_V6_FILTER_HASHED]
@@ -162,7 +171,7 @@ ipa_table_valid_one(struct ipa *ipa, bool route, bool ipv6, bool hashed)
 		else
 			mem = hashed ? &ipa->mem[IPA_MEM_V4_FILTER_HASHED]
 				     : &ipa->mem[IPA_MEM_V4_FILTER];
-		size = (1 + IPA_FILTER_COUNT_MAX) * IPA_TABLE_ENTRY_SIZE;
+		size = (1 + IPA_FILTER_COUNT_MAX) * sizeof(__le64);
 	}
 
 	if (!ipa_cmd_table_valid(ipa, mem, route, ipv6, hashed))
@@ -261,8 +270,8 @@ static void ipa_table_reset_add(struct gsi_trans *trans, bool filter,
 	if (filter)
 		first++;	/* skip over bitmap */
 
-	offset = mem->offset + first * IPA_TABLE_ENTRY_SIZE;
-	size = count * IPA_TABLE_ENTRY_SIZE;
+	offset = mem->offset + first * sizeof(__le64);
+	size = count * sizeof(__le64);
 	addr = ipa_table_addr(ipa, false, count);
 
 	ipa_cmd_dma_shared_mem_add(trans, offset, size, addr, true);
@@ -444,11 +453,11 @@ static void ipa_table_init_add(struct gsi_trans *trans, bool filter,
 		count = hweight32(ipa->filter_map);
 		hash_count = hash_mem->size ? count : 0;
 	} else {
-		count = mem->size / IPA_TABLE_ENTRY_SIZE;
-		hash_count = hash_mem->size / IPA_TABLE_ENTRY_SIZE;
+		count = mem->size / sizeof(__le64);
+		hash_count = hash_mem->size / sizeof(__le64);
 	}
-	size = count * IPA_TABLE_ENTRY_SIZE;
-	hash_size = hash_count * IPA_TABLE_ENTRY_SIZE;
+	size = count * sizeof(__le64);
+	hash_size = hash_count * sizeof(__le64);
 
 	addr = ipa_table_addr(ipa, filter, count);
 	hash_addr = ipa_table_addr(ipa, filter, hash_count);
@@ -655,7 +664,7 @@ int ipa_table_init(struct ipa *ipa)
 	 * by dma_alloc_coherent() is guaranteed to be a power-of-2 number
 	 * of pages, which satisfies the rule alignment requirement.
 	 */
-	size = IPA_ZERO_RULE_SIZE + (1 + count) * IPA_TABLE_ENTRY_SIZE;
+	size = IPA_ZERO_RULE_SIZE + (1 + count) * sizeof(__le64);
 	virt = dma_alloc_coherent(dev, size, &addr, GFP_KERNEL);
 	if (!virt)
 		return -ENOMEM;
@@ -687,7 +696,7 @@ void ipa_table_exit(struct ipa *ipa)
 	struct device *dev = &ipa->pdev->dev;
 	size_t size;
 
-	size = IPA_ZERO_RULE_SIZE + (1 + count) * IPA_TABLE_ENTRY_SIZE;
+	size = IPA_ZERO_RULE_SIZE + (1 + count) * sizeof(__le64);
 
 	dma_free_coherent(dev, size, ipa->table_virt, ipa->table_addr);
 	ipa->table_addr = 0;
diff --git a/drivers/net/ipa/ipa_table.h b/drivers/net/ipa/ipa_table.h
index e14108ed62bd..018045b95aad 100644
--- a/drivers/net/ipa/ipa_table.h
+++ b/drivers/net/ipa/ipa_table.h
@@ -10,9 +10,6 @@
 
 struct ipa;
 
-/* The size of a filter or route table entry */
-#define IPA_TABLE_ENTRY_SIZE	sizeof(__le64)	/* Holds a physical address */
-
 /* The maximum number of filter table entries (IPv4, IPv6; hashed or not) */
 #define IPA_FILTER_COUNT_MAX	14
 
-- 
cgit v1.2.3


From 21cfd2db9f51c0454d44a103ff12398c2236d3a8 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Sun, 28 Mar 2021 17:32:13 -0700
Subject: bpf: tcp: Fix an error in the bpf_tcp_ca_kfunc_ids list

There is a typo in the bbr function, s/even/event/.
This patch fixes it.

Fixes: e78aea8b2170 ("bpf: tcp: Put some tcp cong functions in allowlist for bpf-tcp-cc")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210329003213.2274210-1-kafai@fb.com
---
 net/ipv4/bpf_tcp_ca.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 12777d444d0f..6bb7b335ff9f 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -206,7 +206,7 @@ BTF_ID(func, bbr_init)
 BTF_ID(func, bbr_main)
 BTF_ID(func, bbr_sndbuf_expand)
 BTF_ID(func, bbr_undo_cwnd)
-BTF_ID(func, bbr_cwnd_even)
+BTF_ID(func, bbr_cwnd_event)
 BTF_ID(func, bbr_ssthresh)
 BTF_ID(func, bbr_min_tso_segs)
 BTF_ID(func, bbr_set_state)
-- 
cgit v1.2.3


From 2f019ebd5330c38e3c28fc8d35cc0bce6f4ddfe4 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Tue, 9 Mar 2021 23:13:15 -0800
Subject: igc: Remove unused argument from igc_tx_cmd_type()

The 'skb' argument from igc_tx_cmd_type() is not used so remove it.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Signed-off-by: Vedang Patel <vedang.patel@intel.com>
Signed-off-by: Jithu Joseph <jithu.joseph@intel.com>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index baa45a1f3a65..e1e38d797f29 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -1029,7 +1029,7 @@ static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
 	 ((u32)((_input) & (_flag)) * ((_result) / (_flag))) :	\
 	 ((u32)((_input) & (_flag)) / ((_flag) / (_result))))
 
-static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
+static u32 igc_tx_cmd_type(u32 tx_flags)
 {
 	/* set type for advanced descriptor with frame checksum insertion */
 	u32 cmd_type = IGC_ADVTXD_DTYP_DATA |
@@ -1078,7 +1078,7 @@ static int igc_tx_map(struct igc_ring *tx_ring,
 	u16 i = tx_ring->next_to_use;
 	unsigned int data_len, size;
 	dma_addr_t dma;
-	u32 cmd_type = igc_tx_cmd_type(skb, tx_flags);
+	u32 cmd_type = igc_tx_cmd_type(tx_flags);
 
 	tx_desc = IGC_TX_DESC(tx_ring, i);
 
-- 
cgit v1.2.3


From 613cf199fd10a610d46dd6b3a6eb3b14fe2b1104 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Tue, 9 Mar 2021 23:13:16 -0800
Subject: igc: Introduce igc_rx_buffer_flip() helper

The igc driver implements the same page recycling scheme from other
Intel drivers which reuses the page by flipping the buffer. The code
to handle buffer flips is duplicated in many locations so introduce
the igc_rx_buffer_flip() helper and use it where applicable.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Signed-off-by: Vedang Patel <vedang.patel@intel.com>
Signed-off-by: Jithu Joseph <jithu.joseph@intel.com>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 43 +++++++++++++++----------------
 1 file changed, 21 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index e1e38d797f29..1afbc903aaae 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -1499,6 +1499,16 @@ static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring,
 	return rx_buffer;
 }
 
+static void igc_rx_buffer_flip(struct igc_rx_buffer *buffer,
+			       unsigned int truesize)
+{
+#if (PAGE_SIZE < 8192)
+	buffer->page_offset ^= truesize;
+#else
+	buffer->page_offset += truesize;
+#endif
+}
+
 /**
  * igc_add_rx_frag - Add contents of Rx buffer to sk_buff
  * @rx_ring: rx descriptor ring to transact packets on
@@ -1513,20 +1523,19 @@ static void igc_add_rx_frag(struct igc_ring *rx_ring,
 			    struct sk_buff *skb,
 			    unsigned int size)
 {
-#if (PAGE_SIZE < 8192)
-	unsigned int truesize = igc_rx_pg_size(rx_ring) / 2;
+	unsigned int truesize;
 
-	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
-			rx_buffer->page_offset, size, truesize);
-	rx_buffer->page_offset ^= truesize;
+#if (PAGE_SIZE < 8192)
+	truesize = igc_rx_pg_size(rx_ring) / 2;
 #else
-	unsigned int truesize = ring_uses_build_skb(rx_ring) ?
-				SKB_DATA_ALIGN(IGC_SKB_PAD + size) :
-				SKB_DATA_ALIGN(size);
+	truesize = ring_uses_build_skb(rx_ring) ?
+		   SKB_DATA_ALIGN(IGC_SKB_PAD + size) :
+		   SKB_DATA_ALIGN(size);
+#endif
 	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
 			rx_buffer->page_offset, size, truesize);
-	rx_buffer->page_offset += truesize;
-#endif
+
+	igc_rx_buffer_flip(rx_buffer, truesize);
 }
 
 static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
@@ -1555,13 +1564,7 @@ static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
 	skb_reserve(skb, IGC_SKB_PAD);
 	__skb_put(skb, size);
 
-	/* update buffer offset */
-#if (PAGE_SIZE < 8192)
-	rx_buffer->page_offset ^= truesize;
-#else
-	rx_buffer->page_offset += truesize;
-#endif
-
+	igc_rx_buffer_flip(rx_buffer, truesize);
 	return skb;
 }
 
@@ -1607,11 +1610,7 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
 		skb_add_rx_frag(skb, 0, rx_buffer->page,
 				(va + headlen) - page_address(rx_buffer->page),
 				size, truesize);
-#if (PAGE_SIZE < 8192)
-		rx_buffer->page_offset ^= truesize;
-#else
-		rx_buffer->page_offset += truesize;
-#endif
+		igc_rx_buffer_flip(rx_buffer, truesize);
 	} else {
 		rx_buffer->pagecnt_bias++;
 	}
-- 
cgit v1.2.3


From a39f5e530559e0e39ab26b28ce5fa9550fd3ec5b Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Tue, 9 Mar 2021 23:13:17 -0800
Subject: igc: Introduce igc_get_rx_frame_truesize() helper

The RX frame truesize calculation is scattered throughout the RX code.
This patch creates the helper function igc_get_rx_frame_truesize() and
uses it where applicable.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Signed-off-by: Vedang Patel <vedang.patel@intel.com>
Signed-off-by: Jithu Joseph <jithu.joseph@intel.com>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 1afbc903aaae..d6947c1d6f49 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -1509,6 +1509,22 @@ static void igc_rx_buffer_flip(struct igc_rx_buffer *buffer,
 #endif
 }
 
+static unsigned int igc_get_rx_frame_truesize(struct igc_ring *ring,
+					      unsigned int size)
+{
+	unsigned int truesize;
+
+#if (PAGE_SIZE < 8192)
+	truesize = igc_rx_pg_size(ring) / 2;
+#else
+	truesize = ring_uses_build_skb(ring) ?
+		   SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+		   SKB_DATA_ALIGN(IGC_SKB_PAD + size) :
+		   SKB_DATA_ALIGN(size);
+#endif
+	return truesize;
+}
+
 /**
  * igc_add_rx_frag - Add contents of Rx buffer to sk_buff
  * @rx_ring: rx descriptor ring to transact packets on
@@ -1544,12 +1560,7 @@ static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
 				     unsigned int size)
 {
 	void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
-#if (PAGE_SIZE < 8192)
-	unsigned int truesize = igc_rx_pg_size(rx_ring) / 2;
-#else
-	unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
-				SKB_DATA_ALIGN(IGC_SKB_PAD + size);
-#endif
+	unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size);
 	struct sk_buff *skb;
 
 	/* prefetch first cache line of first page */
@@ -1574,11 +1585,7 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
 					 unsigned int size)
 {
 	void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
-#if (PAGE_SIZE < 8192)
-	unsigned int truesize = igc_rx_pg_size(rx_ring) / 2;
-#else
-	unsigned int truesize = SKB_DATA_ALIGN(size);
-#endif
+	unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size);
 	unsigned int headlen;
 	struct sk_buff *skb;
 
-- 
cgit v1.2.3


From e1ed4f92a6256ec11445de51c2b5a7d89289f8d1 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Tue, 9 Mar 2021 23:13:18 -0800
Subject: igc: Refactor Rx timestamp handling

Refactor the Rx timestamp handling in preparation to land
XDP support.

Rx timestamps are put in the Rx buffer by hardware, before the packet
data. When creating the xdp buffer, we will need to check the Rx
descriptor to determine if the buffer contains timestamp information
and consider the offset when setting xdp.data.

The Rx descriptor check is already done in igc_construct_skb(). To
avoid code duplication, this patch moves the timestamp handling to
igc_clean_rx_irq() so both skb and xdp paths can reuse it.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Signed-off-by: Vedang Patel <vedang.patel@intel.com>
Signed-off-by: Jithu Joseph <jithu.joseph@intel.com>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h      |  3 +--
 drivers/net/ethernet/intel/igc/igc_main.c | 30 ++++++++++++++++++++----------
 drivers/net/ethernet/intel/igc/igc_ptp.c  | 25 +++++++++++++------------
 3 files changed, 34 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 1b08a7dc7bc4..c6d38c6a1a6a 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -547,8 +547,7 @@ void igc_ptp_init(struct igc_adapter *adapter);
 void igc_ptp_reset(struct igc_adapter *adapter);
 void igc_ptp_suspend(struct igc_adapter *adapter);
 void igc_ptp_stop(struct igc_adapter *adapter);
-void igc_ptp_rx_pktstamp(struct igc_q_vector *q_vector, __le32 *va,
-			 struct sk_buff *skb);
+ktime_t igc_ptp_rx_pktstamp(struct igc_adapter *adapter, __le32 *buf);
 int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr);
 int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr);
 void igc_ptp_tx_hang(struct igc_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index d6947c1d6f49..1eacb4ebd9f6 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -1581,10 +1581,11 @@ static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
 
 static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
 					 struct igc_rx_buffer *rx_buffer,
-					 union igc_adv_rx_desc *rx_desc,
-					 unsigned int size)
+					 unsigned int size, int pkt_offset,
+					 ktime_t timestamp)
 {
-	void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+	void *va = page_address(rx_buffer->page) + rx_buffer->page_offset +
+		   pkt_offset;
 	unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size);
 	unsigned int headlen;
 	struct sk_buff *skb;
@@ -1597,11 +1598,8 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
 	if (unlikely(!skb))
 		return NULL;
 
-	if (unlikely(igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP))) {
-		igc_ptp_rx_pktstamp(rx_ring->q_vector, va, skb);
-		va += IGC_TS_HDR_LEN;
-		size -= IGC_TS_HDR_LEN;
-	}
+	if (timestamp)
+		skb_hwtstamps(skb)->hwtstamp = timestamp;
 
 	/* Determine available headroom for copy */
 	headlen = size;
@@ -1895,6 +1893,8 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
 	while (likely(total_packets < budget)) {
 		union igc_adv_rx_desc *rx_desc;
 		struct igc_rx_buffer *rx_buffer;
+		ktime_t timestamp = 0;
+		int pkt_offset = 0;
 		unsigned int size;
 
 		/* return some buffers to hardware, one at a time is too slow */
@@ -1916,14 +1916,24 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
 
 		rx_buffer = igc_get_rx_buffer(rx_ring, size);
 
+		if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) {
+			void *pktbuf = page_address(rx_buffer->page) +
+				       rx_buffer->page_offset;
+
+			timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
+							pktbuf);
+			pkt_offset = IGC_TS_HDR_LEN;
+			size -= IGC_TS_HDR_LEN;
+		}
+
 		/* retrieve a buffer from the ring */
 		if (skb)
 			igc_add_rx_frag(rx_ring, rx_buffer, skb, size);
 		else if (ring_uses_build_skb(rx_ring))
 			skb = igc_build_skb(rx_ring, rx_buffer, rx_desc, size);
 		else
-			skb = igc_construct_skb(rx_ring, rx_buffer,
-						rx_desc, size);
+			skb = igc_construct_skb(rx_ring, rx_buffer, size,
+						pkt_offset, timestamp);
 
 		/* exit if we failed to retrieve a buffer */
 		if (!skb) {
diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
index 545f4d0e67cf..dfa3b247fcd8 100644
--- a/drivers/net/ethernet/intel/igc/igc_ptp.c
+++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
@@ -153,20 +153,20 @@ static void igc_ptp_systim_to_hwtstamp(struct igc_adapter *adapter,
 
 /**
  * igc_ptp_rx_pktstamp - Retrieve timestamp from Rx packet buffer
- * @q_vector: Pointer to interrupt specific structure
- * @va: Pointer to address containing Rx buffer
- * @skb: Buffer containing timestamp and packet
+ * @adapter: Pointer to adapter the packet buffer belongs to
+ * @buf: Pointer to packet buffer
  *
  * This function retrieves the timestamp saved in the beginning of packet
  * buffer. While two timestamps are available, one in timer0 reference and the
  * other in timer1 reference, this function considers only the timestamp in
  * timer0 reference.
+ *
+ * Returns timestamp value.
  */
-void igc_ptp_rx_pktstamp(struct igc_q_vector *q_vector, __le32 *va,
-			 struct sk_buff *skb)
+ktime_t igc_ptp_rx_pktstamp(struct igc_adapter *adapter, __le32 *buf)
 {
-	struct igc_adapter *adapter = q_vector->adapter;
-	u64 regval;
+	ktime_t timestamp;
+	u32 secs, nsecs;
 	int adjust;
 
 	/* Timestamps are saved in little endian at the beginning of the packet
@@ -178,9 +178,10 @@ void igc_ptp_rx_pktstamp(struct igc_q_vector *q_vector, __le32 *va,
 	 * SYSTIML holds the nanoseconds part while SYSTIMH holds the seconds
 	 * part of the timestamp.
 	 */
-	regval = le32_to_cpu(va[2]);
-	regval |= (u64)le32_to_cpu(va[3]) << 32;
-	igc_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
+	nsecs = le32_to_cpu(buf[2]);
+	secs = le32_to_cpu(buf[3]);
+
+	timestamp = ktime_set(secs, nsecs);
 
 	/* Adjust timestamp for the RX latency based on link speed */
 	switch (adapter->link_speed) {
@@ -201,8 +202,8 @@ void igc_ptp_rx_pktstamp(struct igc_q_vector *q_vector, __le32 *va,
 		netdev_warn_once(adapter->netdev, "Imprecise timestamp\n");
 		break;
 	}
-	skb_hwtstamps(skb)->hwtstamp =
-		ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
+
+	return ktime_sub_ns(timestamp, adjust);
 }
 
 static void igc_ptp_disable_rx_timestamp(struct igc_adapter *adapter)
-- 
cgit v1.2.3


From 1bf33f71f9813688bf5ca5a4db0743bb4a4dd563 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Tue, 9 Mar 2021 23:13:19 -0800
Subject: igc: Add set/clear large buffer helpers

While commit 13b5b7fd6a4a ("igc: Add support for Tx/Rx rings")
introduced code to handle larger packet buffers, it missed the
set/clear helpers which enable/disable that feature. Introduce
the missing helpers which will be use in the next patch.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Signed-off-by: Vedang Patel <vedang.patel@intel.com>
Signed-off-by: Jithu Joseph <jithu.joseph@intel.com>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index c6d38c6a1a6a..b00cd8696b6d 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -504,6 +504,10 @@ enum igc_ring_flags_t {
 
 #define ring_uses_large_buffer(ring) \
 	test_bit(IGC_RING_FLAG_RX_3K_BUFFER, &(ring)->flags)
+#define set_ring_uses_large_buffer(ring) \
+	set_bit(IGC_RING_FLAG_RX_3K_BUFFER, &(ring)->flags)
+#define clear_ring_uses_large_buffer(ring) \
+	clear_bit(IGC_RING_FLAG_RX_3K_BUFFER, &(ring)->flags)
 
 #define ring_uses_build_skb(ring) \
 	test_bit(IGC_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags)
-- 
cgit v1.2.3


From 26575105d6ed8e2a8e43bd008fc7d98b75b90d5c Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Tue, 9 Mar 2021 23:13:20 -0800
Subject: igc: Add initial XDP support

Add the initial XDP support to the igc driver. For now, only XDP_PASS,
XDP_DROP, XDP_ABORTED actions are supported. Upcoming patches will add
support for the remaining XDP actions.

XDP configuration helpers are defined in a new file, igc_xdp.c. These
helpers are utilized in igc_main.c to implement the ndo_bpf callback.
XDP-related code that belongs to the driver's hot path is landed in
igc_main.c.

By default, the driver uses Rx buffers with 2 KB size. When XDP is
enabled, it uses larger buffers so we have enough space to accommodate
the headroom and tailroom required by XDP infrastructure. Also, the
driver doesn't support XDP functionality with frames that span over
multiple buffers so jumbo frames are not allowed for now.

The approach implemented follows the approach implemented in other Intel
drivers as much as possible for the sake of consistency across the
drivers.

Quick comment regarding igc_build_skb(): this patch doesn't touch it
because the function is never called. It seems its support is
incomplete/in progress. The function was added by commit 0507ef8a0372b
("igc: Add transmit and receive fastpath and interrupt handlers") but
ring_uses_build_skb() always return False since the IGC_RING_FLAG_RX_
BUILD_SKB_ENABLED isn't set anywhere in the driver code.

This patch has been tested with the sample app "xdp1" located in
samples/bpf/ dir.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Signed-off-by: Vedang Patel <vedang.patel@intel.com>
Signed-off-by: Jithu Joseph <jithu.joseph@intel.com>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/Makefile   |   2 +-
 drivers/net/ethernet/intel/igc/igc.h      |   2 +
 drivers/net/ethernet/intel/igc/igc_main.c | 118 +++++++++++++++++++++++++++---
 drivers/net/ethernet/intel/igc/igc_xdp.c  |  33 +++++++++
 drivers/net/ethernet/intel/igc/igc_xdp.h  |  10 +++
 5 files changed, 153 insertions(+), 12 deletions(-)
 create mode 100644 drivers/net/ethernet/intel/igc/igc_xdp.c
 create mode 100644 drivers/net/ethernet/intel/igc/igc_xdp.h

diff --git a/drivers/net/ethernet/intel/igc/Makefile b/drivers/net/ethernet/intel/igc/Makefile
index 1c3051db9085..95d1e8c490a4 100644
--- a/drivers/net/ethernet/intel/igc/Makefile
+++ b/drivers/net/ethernet/intel/igc/Makefile
@@ -8,4 +8,4 @@
 obj-$(CONFIG_IGC) += igc.o
 
 igc-objs := igc_main.o igc_mac.o igc_i225.o igc_base.o igc_nvm.o igc_phy.o \
-igc_diag.o igc_ethtool.o igc_ptp.o igc_dump.o igc_tsn.o
+igc_diag.o igc_ethtool.o igc_ptp.o igc_dump.o igc_tsn.o igc_xdp.o
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index b00cd8696b6d..3a1737227222 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -219,6 +219,8 @@ struct igc_adapter {
 	ktime_t ptp_reset_start; /* Reset time in clock mono */
 
 	char fw_version[32];
+
+	struct bpf_prog *xdp_prog;
 };
 
 void igc_up(struct igc_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 1eacb4ebd9f6..7da31da523c8 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -10,17 +10,22 @@
 #include <linux/ip.h>
 #include <linux/pm_runtime.h>
 #include <net/pkt_sched.h>
+#include <linux/bpf_trace.h>
 
 #include <net/ipv6.h>
 
 #include "igc.h"
 #include "igc_hw.h"
 #include "igc_tsn.h"
+#include "igc_xdp.h"
 
 #define DRV_SUMMARY	"Intel(R) 2.5G Ethernet Linux Driver"
 
 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
 
+#define IGC_XDP_PASS		0
+#define IGC_XDP_CONSUMED	BIT(0)
+
 static int debug = -1;
 
 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
@@ -375,6 +380,8 @@ static void igc_clean_rx_ring(struct igc_ring *rx_ring)
 			i = 0;
 	}
 
+	clear_ring_uses_large_buffer(rx_ring);
+
 	rx_ring->next_to_alloc = 0;
 	rx_ring->next_to_clean = 0;
 	rx_ring->next_to_use = 0;
@@ -497,6 +504,11 @@ static int igc_setup_all_rx_resources(struct igc_adapter *adapter)
 	return err;
 }
 
+static bool igc_xdp_is_enabled(struct igc_adapter *adapter)
+{
+	return !!adapter->xdp_prog;
+}
+
 /**
  * igc_configure_rx_ring - Configure a receive ring after Reset
  * @adapter: board private structure
@@ -513,6 +525,9 @@ static void igc_configure_rx_ring(struct igc_adapter *adapter,
 	u32 srrctl = 0, rxdctl = 0;
 	u64 rdba = ring->dma;
 
+	if (igc_xdp_is_enabled(adapter))
+		set_ring_uses_large_buffer(ring);
+
 	/* disable the queue */
 	wr32(IGC_RXDCTL(reg_idx), 0);
 
@@ -1581,12 +1596,12 @@ static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
 
 static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
 					 struct igc_rx_buffer *rx_buffer,
-					 unsigned int size, int pkt_offset,
+					 struct xdp_buff *xdp,
 					 ktime_t timestamp)
 {
-	void *va = page_address(rx_buffer->page) + rx_buffer->page_offset +
-		   pkt_offset;
+	unsigned int size = xdp->data_end - xdp->data;
 	unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size);
+	void *va = xdp->data;
 	unsigned int headlen;
 	struct sk_buff *skb;
 
@@ -1730,6 +1745,10 @@ static bool igc_cleanup_headers(struct igc_ring *rx_ring,
 				union igc_adv_rx_desc *rx_desc,
 				struct sk_buff *skb)
 {
+	/* XDP packets use error pointer so abort at this point */
+	if (IS_ERR(skb))
+		return true;
+
 	if (unlikely(igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_RXE))) {
 		struct net_device *netdev = rx_ring->netdev;
 
@@ -1769,7 +1788,14 @@ static void igc_put_rx_buffer(struct igc_ring *rx_ring,
 
 static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring)
 {
-	return ring_uses_build_skb(rx_ring) ? IGC_SKB_PAD : 0;
+	struct igc_adapter *adapter = rx_ring->q_vector->adapter;
+
+	if (ring_uses_build_skb(rx_ring))
+		return IGC_SKB_PAD;
+	if (igc_xdp_is_enabled(adapter))
+		return XDP_PACKET_HEADROOM;
+
+	return 0;
 }
 
 static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
@@ -1883,6 +1909,42 @@ static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count)
 	}
 }
 
+static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
+					struct xdp_buff *xdp)
+{
+	struct bpf_prog *prog;
+	int res;
+	u32 act;
+
+	rcu_read_lock();
+
+	prog = READ_ONCE(adapter->xdp_prog);
+	if (!prog) {
+		res = IGC_XDP_PASS;
+		goto unlock;
+	}
+
+	act = bpf_prog_run_xdp(prog, xdp);
+	switch (act) {
+	case XDP_PASS:
+		res = IGC_XDP_PASS;
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		fallthrough;
+	case XDP_ABORTED:
+		trace_xdp_exception(adapter->netdev, prog, act);
+		fallthrough;
+	case XDP_DROP:
+		res = IGC_XDP_CONSUMED;
+		break;
+	}
+
+unlock:
+	rcu_read_unlock();
+	return ERR_PTR(-res);
+}
+
 static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
 {
 	unsigned int total_bytes = 0, total_packets = 0;
@@ -1894,8 +1956,10 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
 		union igc_adv_rx_desc *rx_desc;
 		struct igc_rx_buffer *rx_buffer;
 		ktime_t timestamp = 0;
+		struct xdp_buff xdp;
 		int pkt_offset = 0;
 		unsigned int size;
+		void *pktbuf;
 
 		/* return some buffers to hardware, one at a time is too slow */
 		if (cleaned_count >= IGC_RX_BUFFER_WRITE) {
@@ -1916,24 +1980,38 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
 
 		rx_buffer = igc_get_rx_buffer(rx_ring, size);
 
-		if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) {
-			void *pktbuf = page_address(rx_buffer->page) +
-				       rx_buffer->page_offset;
+		pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset;
 
+		if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) {
 			timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
 							pktbuf);
 			pkt_offset = IGC_TS_HDR_LEN;
 			size -= IGC_TS_HDR_LEN;
 		}
 
-		/* retrieve a buffer from the ring */
-		if (skb)
+		if (!skb) {
+			struct igc_adapter *adapter = q_vector->adapter;
+
+			xdp.data = pktbuf + pkt_offset;
+			xdp.data_end = xdp.data + size;
+			xdp.data_hard_start = pktbuf - igc_rx_offset(rx_ring);
+			xdp_set_data_meta_invalid(&xdp);
+			xdp.frame_sz = igc_get_rx_frame_truesize(rx_ring, size);
+
+			skb = igc_xdp_run_prog(adapter, &xdp);
+		}
+
+		if (IS_ERR(skb)) {
+			rx_buffer->pagecnt_bias++;
+			total_packets++;
+			total_bytes += size;
+		} else if (skb)
 			igc_add_rx_frag(rx_ring, rx_buffer, skb, size);
 		else if (ring_uses_build_skb(rx_ring))
 			skb = igc_build_skb(rx_ring, rx_buffer, rx_desc, size);
 		else
-			skb = igc_construct_skb(rx_ring, rx_buffer, size,
-						pkt_offset, timestamp);
+			skb = igc_construct_skb(rx_ring, rx_buffer, &xdp,
+						timestamp);
 
 		/* exit if we failed to retrieve a buffer */
 		if (!skb) {
@@ -3874,6 +3952,11 @@ static int igc_change_mtu(struct net_device *netdev, int new_mtu)
 	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
 	struct igc_adapter *adapter = netdev_priv(netdev);
 
+	if (igc_xdp_is_enabled(adapter) && new_mtu > ETH_DATA_LEN) {
+		netdev_dbg(netdev, "Jumbo frames not supported with XDP");
+		return -EINVAL;
+	}
+
 	/* adjust max frame to be at least the size of a standard frame */
 	if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
 		max_frame = ETH_FRAME_LEN + ETH_FCS_LEN;
@@ -4860,6 +4943,18 @@ static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type,
 	}
 }
 
+static int igc_bpf(struct net_device *dev, struct netdev_bpf *bpf)
+{
+	struct igc_adapter *adapter = netdev_priv(dev);
+
+	switch (bpf->command) {
+	case XDP_SETUP_PROG:
+		return igc_xdp_set_prog(adapter, bpf->prog, bpf->extack);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static const struct net_device_ops igc_netdev_ops = {
 	.ndo_open		= igc_open,
 	.ndo_stop		= igc_close,
@@ -4873,6 +4968,7 @@ static const struct net_device_ops igc_netdev_ops = {
 	.ndo_features_check	= igc_features_check,
 	.ndo_do_ioctl		= igc_ioctl,
 	.ndo_setup_tc		= igc_setup_tc,
+	.ndo_bpf		= igc_bpf,
 };
 
 /* PCIe configuration access */
diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.c b/drivers/net/ethernet/intel/igc/igc_xdp.c
new file mode 100644
index 000000000000..27c886a254f1
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_xdp.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Intel Corporation. */
+
+#include "igc.h"
+#include "igc_xdp.h"
+
+int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog,
+		     struct netlink_ext_ack *extack)
+{
+	struct net_device *dev = adapter->netdev;
+	bool if_running = netif_running(dev);
+	struct bpf_prog *old_prog;
+
+	if (dev->mtu > ETH_DATA_LEN) {
+		/* For now, the driver doesn't support XDP functionality with
+		 * jumbo frames so we return error.
+		 */
+		NL_SET_ERR_MSG_MOD(extack, "Jumbo frames not supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (if_running)
+		igc_close(dev);
+
+	old_prog = xchg(&adapter->xdp_prog, prog);
+	if (old_prog)
+		bpf_prog_put(old_prog);
+
+	if (if_running)
+		igc_open(dev);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.h b/drivers/net/ethernet/intel/igc/igc_xdp.h
new file mode 100644
index 000000000000..8a410bcefe1a
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_xdp.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020, Intel Corporation. */
+
+#ifndef _IGC_XDP_H_
+#define _IGC_XDP_H_
+
+int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog,
+		     struct netlink_ext_ack *extack);
+
+#endif /* _IGC_XDP_H_ */
-- 
cgit v1.2.3


From 73f1071c1d2952b8c93cd6cd99744768c59ec840 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Tue, 9 Mar 2021 23:13:21 -0800
Subject: igc: Add support for XDP_TX action

Add support for XDP_TX action which enables XDP programs to transmit
back receiving frames.

I225 controller has only 4 Tx hardware queues. Since XDP programs may
not even issue an XDP_TX action, this patch doesn't reserve dedicated
queues just for XDP like other Intel drivers do. Instead, the queues
are shared between the network stack and XDP. The netdev queue lock is
used to ensure mutual exclusion.

Since frames can now be transmitted via XDP_TX, the igc_tx_buffer
structure is modified so we are able to save a reference to the xdp
frame for later clean up once the packet is transmitted. The tx_buffer
is mapped to either a skb or a xdpf so we use a union to save the skb
or xdpf pointer and have a bit in tx_flags to indicate which field to
use.

This patch has been tested with the sample app "xdp2" located in
samples/bpf/ dir.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Signed-off-by: Vedang Patel <vedang.patel@intel.com>
Signed-off-by: Jithu Joseph <jithu.joseph@intel.com>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h      |   9 +-
 drivers/net/ethernet/intel/igc/igc_main.c | 176 ++++++++++++++++++++++++++++--
 drivers/net/ethernet/intel/igc/igc_xdp.c  |  27 +++++
 drivers/net/ethernet/intel/igc/igc_xdp.h  |   3 +
 4 files changed, 204 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 3a1737227222..91493a73355d 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -111,6 +111,8 @@ struct igc_ring {
 			struct sk_buff *skb;
 		};
 	};
+
+	struct xdp_rxq_info xdp_rxq;
 } ____cacheline_internodealigned_in_smp;
 
 /* Board specific private data structure */
@@ -375,6 +377,8 @@ enum igc_tx_flags {
 	/* olinfo flags */
 	IGC_TX_FLAGS_IPV4	= 0x10,
 	IGC_TX_FLAGS_CSUM	= 0x20,
+
+	IGC_TX_FLAGS_XDP	= 0x100,
 };
 
 enum igc_boards {
@@ -397,7 +401,10 @@ enum igc_boards {
 struct igc_tx_buffer {
 	union igc_adv_tx_desc *next_to_watch;
 	unsigned long time_stamp;
-	struct sk_buff *skb;
+	union {
+		struct sk_buff *skb;
+		struct xdp_frame *xdpf;
+	};
 	unsigned int bytecount;
 	u16 gso_segs;
 	__be16 protocol;
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 7da31da523c8..5ad360e52038 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -25,6 +25,7 @@
 
 #define IGC_XDP_PASS		0
 #define IGC_XDP_CONSUMED	BIT(0)
+#define IGC_XDP_TX		BIT(1)
 
 static int debug = -1;
 
@@ -181,8 +182,10 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring)
 	while (i != tx_ring->next_to_use) {
 		union igc_adv_tx_desc *eop_desc, *tx_desc;
 
-		/* Free all the Tx ring sk_buffs */
-		dev_kfree_skb_any(tx_buffer->skb);
+		if (tx_buffer->tx_flags & IGC_TX_FLAGS_XDP)
+			xdp_return_frame(tx_buffer->xdpf);
+		else
+			dev_kfree_skb_any(tx_buffer->skb);
 
 		/* unmap skb header data */
 		dma_unmap_single(tx_ring->dev,
@@ -410,6 +413,8 @@ void igc_free_rx_resources(struct igc_ring *rx_ring)
 {
 	igc_clean_rx_ring(rx_ring);
 
+	igc_xdp_unregister_rxq_info(rx_ring);
+
 	vfree(rx_ring->rx_buffer_info);
 	rx_ring->rx_buffer_info = NULL;
 
@@ -447,7 +452,11 @@ int igc_setup_rx_resources(struct igc_ring *rx_ring)
 {
 	struct net_device *ndev = rx_ring->netdev;
 	struct device *dev = rx_ring->dev;
-	int size, desc_len;
+	int size, desc_len, res;
+
+	res = igc_xdp_register_rxq_info(rx_ring);
+	if (res < 0)
+		return res;
 
 	size = sizeof(struct igc_rx_buffer) * rx_ring->count;
 	rx_ring->rx_buffer_info = vzalloc(size);
@@ -473,6 +482,7 @@ int igc_setup_rx_resources(struct igc_ring *rx_ring)
 	return 0;
 
 err:
+	igc_xdp_unregister_rxq_info(rx_ring);
 	vfree(rx_ring->rx_buffer_info);
 	rx_ring->rx_buffer_info = NULL;
 	netdev_err(ndev, "Unable to allocate memory for Rx descriptor ring\n");
@@ -1909,6 +1919,101 @@ static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count)
 	}
 }
 
+static int igc_xdp_init_tx_buffer(struct igc_tx_buffer *buffer,
+				  struct xdp_frame *xdpf,
+				  struct igc_ring *ring)
+{
+	dma_addr_t dma;
+
+	dma = dma_map_single(ring->dev, xdpf->data, xdpf->len, DMA_TO_DEVICE);
+	if (dma_mapping_error(ring->dev, dma)) {
+		netdev_err_once(ring->netdev, "Failed to map DMA for TX\n");
+		return -ENOMEM;
+	}
+
+	buffer->xdpf = xdpf;
+	buffer->tx_flags = IGC_TX_FLAGS_XDP;
+	buffer->protocol = 0;
+	buffer->bytecount = xdpf->len;
+	buffer->gso_segs = 1;
+	buffer->time_stamp = jiffies;
+	dma_unmap_len_set(buffer, len, xdpf->len);
+	dma_unmap_addr_set(buffer, dma, dma);
+	return 0;
+}
+
+/* This function requires __netif_tx_lock is held by the caller. */
+static int igc_xdp_init_tx_descriptor(struct igc_ring *ring,
+				      struct xdp_frame *xdpf)
+{
+	struct igc_tx_buffer *buffer;
+	union igc_adv_tx_desc *desc;
+	u32 cmd_type, olinfo_status;
+	int err;
+
+	if (!igc_desc_unused(ring))
+		return -EBUSY;
+
+	buffer = &ring->tx_buffer_info[ring->next_to_use];
+	err = igc_xdp_init_tx_buffer(buffer, xdpf, ring);
+	if (err)
+		return err;
+
+	cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
+		   IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD |
+		   buffer->bytecount;
+	olinfo_status = buffer->bytecount << IGC_ADVTXD_PAYLEN_SHIFT;
+
+	desc = IGC_TX_DESC(ring, ring->next_to_use);
+	desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+	desc->read.olinfo_status = cpu_to_le32(olinfo_status);
+	desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(buffer, dma));
+
+	netdev_tx_sent_queue(txring_txq(ring), buffer->bytecount);
+
+	buffer->next_to_watch = desc;
+
+	ring->next_to_use++;
+	if (ring->next_to_use == ring->count)
+		ring->next_to_use = 0;
+
+	return 0;
+}
+
+static struct igc_ring *igc_xdp_get_tx_ring(struct igc_adapter *adapter,
+					    int cpu)
+{
+	int index = cpu;
+
+	if (unlikely(index < 0))
+		index = 0;
+
+	while (index >= adapter->num_tx_queues)
+		index -= adapter->num_tx_queues;
+
+	return adapter->tx_ring[index];
+}
+
+static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp)
+{
+	struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
+	int cpu = smp_processor_id();
+	struct netdev_queue *nq;
+	struct igc_ring *ring;
+	int res;
+
+	if (unlikely(!xdpf))
+		return -EFAULT;
+
+	ring = igc_xdp_get_tx_ring(adapter, cpu);
+	nq = txring_txq(ring);
+
+	__netif_tx_lock(nq, cpu);
+	res = igc_xdp_init_tx_descriptor(ring, xdpf);
+	__netif_tx_unlock(nq);
+	return res;
+}
+
 static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
 					struct xdp_buff *xdp)
 {
@@ -1929,6 +2034,12 @@ static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
 	case XDP_PASS:
 		res = IGC_XDP_PASS;
 		break;
+	case XDP_TX:
+		if (igc_xdp_xmit_back(adapter, xdp) < 0)
+			res = IGC_XDP_CONSUMED;
+		else
+			res = IGC_XDP_TX;
+		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 		fallthrough;
@@ -1945,20 +2056,49 @@ unlock:
 	return ERR_PTR(-res);
 }
 
+/* This function assumes __netif_tx_lock is held by the caller. */
+static void igc_flush_tx_descriptors(struct igc_ring *ring)
+{
+	/* Once tail pointer is updated, hardware can fetch the descriptors
+	 * any time so we issue a write membar here to ensure all memory
+	 * writes are complete before the tail pointer is updated.
+	 */
+	wmb();
+	writel(ring->next_to_use, ring->tail);
+}
+
+static void igc_finalize_xdp(struct igc_adapter *adapter, int status)
+{
+	int cpu = smp_processor_id();
+	struct netdev_queue *nq;
+	struct igc_ring *ring;
+
+	if (status & IGC_XDP_TX) {
+		ring = igc_xdp_get_tx_ring(adapter, cpu);
+		nq = txring_txq(ring);
+
+		__netif_tx_lock(nq, cpu);
+		igc_flush_tx_descriptors(ring);
+		__netif_tx_unlock(nq);
+	}
+}
+
 static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
 {
 	unsigned int total_bytes = 0, total_packets = 0;
+	struct igc_adapter *adapter = q_vector->adapter;
 	struct igc_ring *rx_ring = q_vector->rx.ring;
 	struct sk_buff *skb = rx_ring->skb;
 	u16 cleaned_count = igc_desc_unused(rx_ring);
+	int xdp_status = 0;
 
 	while (likely(total_packets < budget)) {
 		union igc_adv_rx_desc *rx_desc;
 		struct igc_rx_buffer *rx_buffer;
+		unsigned int size, truesize;
 		ktime_t timestamp = 0;
 		struct xdp_buff xdp;
 		int pkt_offset = 0;
-		unsigned int size;
 		void *pktbuf;
 
 		/* return some buffers to hardware, one at a time is too slow */
@@ -1979,6 +2119,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
 		dma_rmb();
 
 		rx_buffer = igc_get_rx_buffer(rx_ring, size);
+		truesize = igc_get_rx_frame_truesize(rx_ring, size);
 
 		pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset;
 
@@ -1990,19 +2131,29 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
 		}
 
 		if (!skb) {
-			struct igc_adapter *adapter = q_vector->adapter;
-
 			xdp.data = pktbuf + pkt_offset;
 			xdp.data_end = xdp.data + size;
 			xdp.data_hard_start = pktbuf - igc_rx_offset(rx_ring);
 			xdp_set_data_meta_invalid(&xdp);
-			xdp.frame_sz = igc_get_rx_frame_truesize(rx_ring, size);
+			xdp.frame_sz = truesize;
+			xdp.rxq = &rx_ring->xdp_rxq;
 
 			skb = igc_xdp_run_prog(adapter, &xdp);
 		}
 
 		if (IS_ERR(skb)) {
-			rx_buffer->pagecnt_bias++;
+			unsigned int xdp_res = -PTR_ERR(skb);
+
+			switch (xdp_res) {
+			case IGC_XDP_CONSUMED:
+				rx_buffer->pagecnt_bias++;
+				break;
+			case IGC_XDP_TX:
+				igc_rx_buffer_flip(rx_buffer, truesize);
+				xdp_status |= xdp_res;
+				break;
+			}
+
 			total_packets++;
 			total_bytes += size;
 		} else if (skb)
@@ -2048,6 +2199,9 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
 		total_packets++;
 	}
 
+	if (xdp_status)
+		igc_finalize_xdp(adapter, xdp_status);
+
 	/* place incomplete frames back on ring for completion */
 	rx_ring->skb = skb;
 
@@ -2109,8 +2263,10 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
 		total_bytes += tx_buffer->bytecount;
 		total_packets += tx_buffer->gso_segs;
 
-		/* free the skb */
-		napi_consume_skb(tx_buffer->skb, napi_budget);
+		if (tx_buffer->tx_flags & IGC_TX_FLAGS_XDP)
+			xdp_return_frame(tx_buffer->xdpf);
+		else
+			napi_consume_skb(tx_buffer->skb, napi_budget);
 
 		/* unmap skb header data */
 		dma_unmap_single(tx_ring->dev,
diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.c b/drivers/net/ethernet/intel/igc/igc_xdp.c
index 27c886a254f1..11133c4619bb 100644
--- a/drivers/net/ethernet/intel/igc/igc_xdp.c
+++ b/drivers/net/ethernet/intel/igc/igc_xdp.c
@@ -31,3 +31,30 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog,
 
 	return 0;
 }
+
+int igc_xdp_register_rxq_info(struct igc_ring *ring)
+{
+	struct net_device *dev = ring->netdev;
+	int err;
+
+	err = xdp_rxq_info_reg(&ring->xdp_rxq, dev, ring->queue_index, 0);
+	if (err) {
+		netdev_err(dev, "Failed to register xdp rxq info\n");
+		return err;
+	}
+
+	err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_PAGE_SHARED,
+					 NULL);
+	if (err) {
+		netdev_err(dev, "Failed to register xdp rxq mem model\n");
+		xdp_rxq_info_unreg(&ring->xdp_rxq);
+		return err;
+	}
+
+	return 0;
+}
+
+void igc_xdp_unregister_rxq_info(struct igc_ring *ring)
+{
+	xdp_rxq_info_unreg(&ring->xdp_rxq);
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.h b/drivers/net/ethernet/intel/igc/igc_xdp.h
index 8a410bcefe1a..cfecb515b718 100644
--- a/drivers/net/ethernet/intel/igc/igc_xdp.h
+++ b/drivers/net/ethernet/intel/igc/igc_xdp.h
@@ -7,4 +7,7 @@
 int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog,
 		     struct netlink_ext_ack *extack);
 
+int igc_xdp_register_rxq_info(struct igc_ring *ring);
+void igc_xdp_unregister_rxq_info(struct igc_ring *ring);
+
 #endif /* _IGC_XDP_H_ */
-- 
cgit v1.2.3


From 4ff3203610928cac82d5627ce803559e78d61b91 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Tue, 9 Mar 2021 23:13:22 -0800
Subject: igc: Add support for XDP_REDIRECT action

Add support for the XDP_REDIRECT action which enables XDP programs to
redirect packets arriving at I225 NIC. It also implements the
ndo_xdp_xmit ops, enabling the igc driver to transmit packets forwarded
to it by xdp programs running on other interfaces.

The patch tweaks the driver's page counting and recycling scheme as
described in the following two commits and implemented by other Intel
drivers in order to properly support XDP_REDIRECT action:
  commit 8ce29c679a6e ("i40e: tweak page counting for XDP_REDIRECT")
  commit 75aab4e10ae6 ("i40e: avoid premature Rx buffer reuse")

This patch has been tested with the sample apps "xdp_redirect_cpu" and
"xdp_redirect_map" located in samples/bpf/.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Signed-off-by: Vedang Patel <vedang.patel@intel.com>
Signed-off-by: Jithu Joseph <jithu.joseph@intel.com>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 84 +++++++++++++++++++++++++++----
 1 file changed, 73 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 5ad360e52038..10765491e357 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -26,6 +26,7 @@
 #define IGC_XDP_PASS		0
 #define IGC_XDP_CONSUMED	BIT(0)
 #define IGC_XDP_TX		BIT(1)
+#define IGC_XDP_REDIRECT	BIT(2)
 
 static int debug = -1;
 
@@ -1505,11 +1506,18 @@ static void igc_process_skb_fields(struct igc_ring *rx_ring,
 }
 
 static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring,
-					       const unsigned int size)
+					       const unsigned int size,
+					       int *rx_buffer_pgcnt)
 {
 	struct igc_rx_buffer *rx_buffer;
 
 	rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+	*rx_buffer_pgcnt =
+#if (PAGE_SIZE < 8192)
+		page_count(rx_buffer->page);
+#else
+		0;
+#endif
 	prefetchw(rx_buffer->page);
 
 	/* we are reusing so sync this buffer for CPU use */
@@ -1677,7 +1685,8 @@ static void igc_reuse_rx_page(struct igc_ring *rx_ring,
 	new_buff->pagecnt_bias	= old_buff->pagecnt_bias;
 }
 
-static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer)
+static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer,
+				  int rx_buffer_pgcnt)
 {
 	unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
 	struct page *page = rx_buffer->page;
@@ -1688,7 +1697,7 @@ static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer)
 
 #if (PAGE_SIZE < 8192)
 	/* if we are only owner of page we can reuse it */
-	if (unlikely((page_ref_count(page) - pagecnt_bias) > 1))
+	if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1))
 		return false;
 #else
 #define IGC_LAST_OFFSET \
@@ -1702,8 +1711,8 @@ static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer)
 	 * the pagecnt_bias and page count so that we fully restock the
 	 * number of references the driver holds.
 	 */
-	if (unlikely(!pagecnt_bias)) {
-		page_ref_add(page, USHRT_MAX);
+	if (unlikely(pagecnt_bias == 1)) {
+		page_ref_add(page, USHRT_MAX - 1);
 		rx_buffer->pagecnt_bias = USHRT_MAX;
 	}
 
@@ -1776,9 +1785,10 @@ static bool igc_cleanup_headers(struct igc_ring *rx_ring,
 }
 
 static void igc_put_rx_buffer(struct igc_ring *rx_ring,
-			      struct igc_rx_buffer *rx_buffer)
+			      struct igc_rx_buffer *rx_buffer,
+			      int rx_buffer_pgcnt)
 {
-	if (igc_can_reuse_rx_page(rx_buffer)) {
+	if (igc_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) {
 		/* hand second half of page back to the ring */
 		igc_reuse_rx_page(rx_ring, rx_buffer);
 	} else {
@@ -1844,7 +1854,8 @@ static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
 	bi->dma = dma;
 	bi->page = page;
 	bi->page_offset = igc_rx_offset(rx_ring);
-	bi->pagecnt_bias = 1;
+	page_ref_add(page, USHRT_MAX - 1);
+	bi->pagecnt_bias = USHRT_MAX;
 
 	return true;
 }
@@ -2040,6 +2051,12 @@ static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
 		else
 			res = IGC_XDP_TX;
 		break;
+	case XDP_REDIRECT:
+		if (xdp_do_redirect(adapter->netdev, xdp, prog) < 0)
+			res = IGC_XDP_CONSUMED;
+		else
+			res = IGC_XDP_REDIRECT;
+		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 		fallthrough;
@@ -2081,6 +2098,9 @@ static void igc_finalize_xdp(struct igc_adapter *adapter, int status)
 		igc_flush_tx_descriptors(ring);
 		__netif_tx_unlock(nq);
 	}
+
+	if (status & IGC_XDP_REDIRECT)
+		xdp_do_flush();
 }
 
 static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
@@ -2090,7 +2110,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
 	struct igc_ring *rx_ring = q_vector->rx.ring;
 	struct sk_buff *skb = rx_ring->skb;
 	u16 cleaned_count = igc_desc_unused(rx_ring);
-	int xdp_status = 0;
+	int xdp_status = 0, rx_buffer_pgcnt;
 
 	while (likely(total_packets < budget)) {
 		union igc_adv_rx_desc *rx_desc;
@@ -2118,7 +2138,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
 		 */
 		dma_rmb();
 
-		rx_buffer = igc_get_rx_buffer(rx_ring, size);
+		rx_buffer = igc_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt);
 		truesize = igc_get_rx_frame_truesize(rx_ring, size);
 
 		pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset;
@@ -2149,6 +2169,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
 				rx_buffer->pagecnt_bias++;
 				break;
 			case IGC_XDP_TX:
+			case IGC_XDP_REDIRECT:
 				igc_rx_buffer_flip(rx_buffer, truesize);
 				xdp_status |= xdp_res;
 				break;
@@ -2171,7 +2192,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
 			break;
 		}
 
-		igc_put_rx_buffer(rx_ring, rx_buffer);
+		igc_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt);
 		cleaned_count++;
 
 		/* fetch next buffer in frame if non-eop */
@@ -5111,6 +5132,46 @@ static int igc_bpf(struct net_device *dev, struct netdev_bpf *bpf)
 	}
 }
 
+static int igc_xdp_xmit(struct net_device *dev, int num_frames,
+			struct xdp_frame **frames, u32 flags)
+{
+	struct igc_adapter *adapter = netdev_priv(dev);
+	int cpu = smp_processor_id();
+	struct netdev_queue *nq;
+	struct igc_ring *ring;
+	int i, drops;
+
+	if (unlikely(test_bit(__IGC_DOWN, &adapter->state)))
+		return -ENETDOWN;
+
+	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+		return -EINVAL;
+
+	ring = igc_xdp_get_tx_ring(adapter, cpu);
+	nq = txring_txq(ring);
+
+	__netif_tx_lock(nq, cpu);
+
+	drops = 0;
+	for (i = 0; i < num_frames; i++) {
+		int err;
+		struct xdp_frame *xdpf = frames[i];
+
+		err = igc_xdp_init_tx_descriptor(ring, xdpf);
+		if (err) {
+			xdp_return_frame_rx_napi(xdpf);
+			drops++;
+		}
+	}
+
+	if (flags & XDP_XMIT_FLUSH)
+		igc_flush_tx_descriptors(ring);
+
+	__netif_tx_unlock(nq);
+
+	return num_frames - drops;
+}
+
 static const struct net_device_ops igc_netdev_ops = {
 	.ndo_open		= igc_open,
 	.ndo_stop		= igc_close,
@@ -5125,6 +5186,7 @@ static const struct net_device_ops igc_netdev_ops = {
 	.ndo_do_ioctl		= igc_ioctl,
 	.ndo_setup_tc		= igc_setup_tc,
 	.ndo_bpf		= igc_bpf,
+	.ndo_xdp_xmit		= igc_xdp_xmit,
 };
 
 /* PCIe configuration access */
-- 
cgit v1.2.3


From 37f368d8d09d2190e64cdb1cd73d56e3ad50c3df Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sun, 28 Mar 2021 22:46:47 +0100
Subject: lan743x: remove redundant intializations of pointers adapter and
 phydev

The pointers adapter and phydev are being initialized with values that
are never read and are being updated later with a new value. The
initialization is redundant and can be removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/lan743x_ethtool.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.c b/drivers/net/ethernet/microchip/lan743x_ethtool.c
index c5de8f46cdd3..91a755efe2e6 100644
--- a/drivers/net/ethernet/microchip/lan743x_ethtool.c
+++ b/drivers/net/ethernet/microchip/lan743x_ethtool.c
@@ -730,8 +730,8 @@ static int lan743x_ethtool_get_eee(struct net_device *netdev,
 static int lan743x_ethtool_set_eee(struct net_device *netdev,
 				   struct ethtool_eee *eee)
 {
-	struct lan743x_adapter *adapter = netdev_priv(netdev);
-	struct phy_device *phydev = NULL;
+	struct lan743x_adapter *adapter;
+	struct phy_device *phydev;
 	u32 buf = 0;
 	int ret = 0;
 
-- 
cgit v1.2.3


From 6be836818872329b5c8daa58ea2ac33945850cfe Mon Sep 17 00:00:00 2001
From: Guobin Huang <huangguobin4@huawei.com>
Date: Mon, 29 Mar 2021 09:38:32 +0800
Subject: net: mdio: Remove redundant dev_err call in mdio_mux_iproc_probe()

There is a error message within devm_ioremap_resource
already, so remove the dev_err call to avoid redundant
error message.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Guobin Huang <huangguobin4@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mdio/mdio-mux-bcm-iproc.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/mdio/mdio-mux-bcm-iproc.c b/drivers/net/mdio/mdio-mux-bcm-iproc.c
index 641cfa41f492..03261e6b9ceb 100644
--- a/drivers/net/mdio/mdio-mux-bcm-iproc.c
+++ b/drivers/net/mdio/mdio-mux-bcm-iproc.c
@@ -197,10 +197,8 @@ static int mdio_mux_iproc_probe(struct platform_device *pdev)
 		res->end = res->start + MDIO_REG_ADDR_SPACE_SIZE - 1;
 	}
 	md->base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(md->base)) {
-		dev_err(&pdev->dev, "failed to ioremap register\n");
+	if (IS_ERR(md->base))
 		return PTR_ERR(md->base);
-	}
 
 	md->mii_bus = devm_mdiobus_alloc(&pdev->dev);
 	if (!md->mii_bus) {
-- 
cgit v1.2.3


From a956b21596f300506d20dd9d2a2a039772193a8f Mon Sep 17 00:00:00 2001
From: Guobin Huang <huangguobin4@huawei.com>
Date: Mon, 29 Mar 2021 09:45:13 +0800
Subject: net: axienet: Remove redundant dev_err call in axienet_probe()

There is a error message within devm_ioremap_resource
already, so remove the dev_err call to avoid redundant
error message.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Guobin Huang <huangguobin4@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 9635101fbb88..92cf9051d557 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -1896,7 +1896,6 @@ static int axienet_probe(struct platform_device *pdev)
 	ethres = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	lp->regs = devm_ioremap_resource(&pdev->dev, ethres);
 	if (IS_ERR(lp->regs)) {
-		dev_err(&pdev->dev, "could not map Axi Ethernet regs.\n");
 		ret = PTR_ERR(lp->regs);
 		goto cleanup_clk;
 	}
-- 
cgit v1.2.3


From 3d0dbd546345689aba8c4a00b486abc51622e920 Mon Sep 17 00:00:00 2001
From: Guobin Huang <huangguobin4@huawei.com>
Date: Mon, 29 Mar 2021 09:49:32 +0800
Subject: net: stmmac: remove redundant dev_err call in qcom_ethqos_probe()

There is a error message within devm_ioremap_resource
already, so remove the dev_err call to avoid redundant
error message.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Guobin Huang <huangguobin4@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
index bfc4a92f1d92..a674b7d6b49a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
@@ -477,7 +477,6 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "rgmii");
 	ethqos->rgmii_base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(ethqos->rgmii_base)) {
-		dev_err(&pdev->dev, "Can't get rgmii base\n");
 		ret = PTR_ERR(ethqos->rgmii_base);
 		goto err_mem;
 	}
-- 
cgit v1.2.3


From a180be79db4a7eeab575b76b0838087932953caf Mon Sep 17 00:00:00 2001
From: Guobin Huang <huangguobin4@huawei.com>
Date: Mon, 29 Mar 2021 09:54:05 +0800
Subject: net: mscc: ocelot: remove redundant dev_err call in
 vsc9959_mdio_bus_alloc()

There is a error message within devm_ioremap_resource
already, so remove the dev_err call to avoid redundant
error message.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Guobin Huang <huangguobin4@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/ocelot/felix_vsc9959.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
index 5ff623ee76a6..789fe08cae50 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -1057,10 +1057,8 @@ static int vsc9959_mdio_bus_alloc(struct ocelot *ocelot)
 	res.end += felix->imdio_base;
 
 	imdio_regs = devm_ioremap_resource(dev, &res);
-	if (IS_ERR(imdio_regs)) {
-		dev_err(dev, "failed to map internal MDIO registers\n");
+	if (IS_ERR(imdio_regs))
 		return PTR_ERR(imdio_regs);
-	}
 
 	hw = enetc_hw_alloc(dev, imdio_regs);
 	if (IS_ERR(hw)) {
-- 
cgit v1.2.3


From 989f7178b06666ec23ada614b2047c21d85b2a0a Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Mon, 29 Mar 2021 11:57:45 +0800
Subject: net: hns3: fix missing rule state assignment

Currently, when adding flow director rule, it missed to set
rule state. Which may cause the rule state in software is
unconsistent with hardware.

Fixes: fc4243b8de8b ("net: hns3: refactor flow director configuration")
Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index d964e2c527f0..57cee12329a4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -6439,7 +6439,8 @@ static int hclge_add_fd_entry_common(struct hclge_dev *hdev,
 	if (ret)
 		goto out;
 
-	hclge_update_fd_list(hdev, HCLGE_FD_ACTIVE, rule->location, rule);
+	rule->state = HCLGE_FD_ACTIVE;
+	hclge_update_fd_list(hdev, rule->state, rule->location, rule);
 	hdev->fd_active_type = rule->rule_type;
 
 out:
@@ -7002,6 +7003,7 @@ static void hclge_fd_build_arfs_rule(const struct hclge_fd_rule_tuples *tuples,
 	rule->action = 0;
 	rule->vf_id = 0;
 	rule->rule_type = HCLGE_FD_ARFS_ACTIVE;
+	rule->state = HCLGE_FD_TO_ADD;
 	if (tuples->ether_proto == ETH_P_IP) {
 		if (tuples->ip_proto == IPPROTO_TCP)
 			rule->flow_type = TCP_V4_FLOW;
@@ -7064,8 +7066,7 @@ static int hclge_add_fd_entry_by_arfs(struct hnae3_handle *handle, u16 queue_id,
 		rule->arfs.flow_id = flow_id;
 		rule->queue_id = queue_id;
 		hclge_fd_build_arfs_rule(&new_tuples, rule);
-		hclge_update_fd_list(hdev, HCLGE_FD_TO_ADD, rule->location,
-				     rule);
+		hclge_update_fd_list(hdev, rule->state, rule->location, rule);
 		hdev->fd_active_type = HCLGE_FD_ARFS_ACTIVE;
 	} else if (rule->queue_id != queue_id) {
 		rule->queue_id = queue_id;
-- 
cgit v1.2.3


From 64ff58fa3bfcf26bc893ea425a0553b561ca5298 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Mon, 29 Mar 2021 11:57:46 +0800
Subject: net: hns3: fix use-after-free issue for hclge_add_fd_entry_common()

When new rule state is TO_ADD or ACTIVE, and there is already a
rule with same location in the fd_rule_list, the new rule will
be freed after modifying the old rule. It may cause user-after-free
issue when access rule again in hclge_add_fd_entry_common().

Fixes: fc4243b8de8b ("net: hns3: refactor flow director configuration")
Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 57cee12329a4..cf99feef987b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -6440,8 +6440,8 @@ static int hclge_add_fd_entry_common(struct hclge_dev *hdev,
 		goto out;
 
 	rule->state = HCLGE_FD_ACTIVE;
-	hclge_update_fd_list(hdev, rule->state, rule->location, rule);
 	hdev->fd_active_type = rule->rule_type;
+	hclge_update_fd_list(hdev, rule->state, rule->location, rule);
 
 out:
 	spin_unlock_bh(&hdev->fd_rule_lock);
-- 
cgit v1.2.3


From a2ee6fd28a190588e142ad8ea9d40069cd3c9f98 Mon Sep 17 00:00:00 2001
From: Guangbin Huang <huangguangbin2@huawei.com>
Date: Mon, 29 Mar 2021 11:57:47 +0800
Subject: net: hns3: remediate a potential overflow risk of bd_num_list

The array size of bd_num_list is a fixed value, it may have potential
overflow risk when array size of hclge_dfx_bd_offset_list is greater
than that fixed value. So modify bd_num_list as a pointer and allocate
memory for it according to array size of hclge_dfx_bd_offset_list.

Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 27 ++++++++++++++++------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index cf99feef987b..a814fab9d54d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -11930,7 +11930,6 @@ static int hclge_get_64_bit_regs(struct hclge_dev *hdev, u32 regs_num,
 #define REG_LEN_PER_LINE	(REG_NUM_PER_LINE * sizeof(u32))
 #define REG_SEPARATOR_LINE	1
 #define REG_NUM_REMAIN_MASK	3
-#define BD_LIST_MAX_NUM		30
 
 int hclge_query_bd_num_cmd_send(struct hclge_dev *hdev, struct hclge_desc *desc)
 {
@@ -12024,15 +12023,19 @@ static int hclge_get_dfx_reg_len(struct hclge_dev *hdev, int *len)
 {
 	u32 dfx_reg_type_num = ARRAY_SIZE(hclge_dfx_bd_offset_list);
 	int data_len_per_desc, bd_num, i;
-	int bd_num_list[BD_LIST_MAX_NUM];
+	int *bd_num_list;
 	u32 data_len;
 	int ret;
 
+	bd_num_list = kcalloc(dfx_reg_type_num, sizeof(int), GFP_KERNEL);
+	if (!bd_num_list)
+		return -ENOMEM;
+
 	ret = hclge_get_dfx_reg_bd_num(hdev, bd_num_list, dfx_reg_type_num);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
 			"Get dfx reg bd num fail, status is %d.\n", ret);
-		return ret;
+		goto out;
 	}
 
 	data_len_per_desc = sizeof_field(struct hclge_desc, data);
@@ -12043,6 +12046,8 @@ static int hclge_get_dfx_reg_len(struct hclge_dev *hdev, int *len)
 		*len += (data_len / REG_LEN_PER_LINE + 1) * REG_LEN_PER_LINE;
 	}
 
+out:
+	kfree(bd_num_list);
 	return ret;
 }
 
@@ -12050,16 +12055,20 @@ static int hclge_get_dfx_reg(struct hclge_dev *hdev, void *data)
 {
 	u32 dfx_reg_type_num = ARRAY_SIZE(hclge_dfx_bd_offset_list);
 	int bd_num, bd_num_max, buf_len, i;
-	int bd_num_list[BD_LIST_MAX_NUM];
 	struct hclge_desc *desc_src;
+	int *bd_num_list;
 	u32 *reg = data;
 	int ret;
 
+	bd_num_list = kcalloc(dfx_reg_type_num, sizeof(int), GFP_KERNEL);
+	if (!bd_num_list)
+		return -ENOMEM;
+
 	ret = hclge_get_dfx_reg_bd_num(hdev, bd_num_list, dfx_reg_type_num);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
 			"Get dfx reg bd num fail, status is %d.\n", ret);
-		return ret;
+		goto out;
 	}
 
 	bd_num_max = bd_num_list[0];
@@ -12068,8 +12077,10 @@ static int hclge_get_dfx_reg(struct hclge_dev *hdev, void *data)
 
 	buf_len = sizeof(*desc_src) * bd_num_max;
 	desc_src = kzalloc(buf_len, GFP_KERNEL);
-	if (!desc_src)
-		return -ENOMEM;
+	if (!desc_src) {
+		ret = -ENOMEM;
+		goto out;
+	}
 
 	for (i = 0; i < dfx_reg_type_num; i++) {
 		bd_num = bd_num_list[i];
@@ -12085,6 +12096,8 @@ static int hclge_get_dfx_reg(struct hclge_dev *hdev, void *data)
 	}
 
 	kfree(desc_src);
+out:
+	kfree(bd_num_list);
 	return ret;
 }
 
-- 
cgit v1.2.3


From 5be36fb7855442e0299cf483d40457315a84cdf1 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Mon, 29 Mar 2021 11:57:48 +0800
Subject: net: hns3: remove the rss_size limitation by vector num

Currently, if user hasn't change channel number, the rss_size
is limited to be no more than the vector number, in order to
keep one vector only being mapped to one queue. But the queue
number of each tc can be different, and one vector also can
be mapped by multiple queues. So remove this limitation.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index aa81b8c56fce..ebb962bad451 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -664,15 +664,6 @@ static void hclge_tm_update_kinfo_rss_size(struct hclge_vport *vport)
 		kinfo->rss_size = kinfo->req_rss_size;
 	} else if (kinfo->rss_size > max_rss_size ||
 		   (!kinfo->req_rss_size && kinfo->rss_size < max_rss_size)) {
-		/* if user not set rss, the rss_size should compare with the
-		 * valid msi numbers to ensure one to one map between tqp and
-		 * irq as default.
-		 */
-		if (!kinfo->req_rss_size)
-			max_rss_size = min_t(u16, max_rss_size,
-					     (hdev->num_nic_msi - 1) /
-					     kinfo->tc_info.num_tc);
-
 		/* Set to the maximum specification value (max_rss_size). */
 		kinfo->rss_size = max_rss_size;
 	}
-- 
cgit v1.2.3


From 8fa865510069aa1364f50761d38418dec4c163df Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Mon, 29 Mar 2021 11:57:49 +0800
Subject: net: hns3: optimize the process of queue reset

Currently, the queue reset process needs to be performed one by
one, which is inefficient. However, the queue reset of the same
function is always performed at the same time. Therefore, according
to the UM, command HCLGE_OPC_CFG_RST_TRIGGER can be used to reset
all queues of the same function at a time, in order to optimize
the queue reset process.

Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h        |   2 +-
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c    |   8 +-
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h |   8 +-
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 185 ++++++++++++---------
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h    |   3 +-
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c |  26 ++-
 .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c  |  59 +++++--
 7 files changed, 182 insertions(+), 109 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 01d6bfc0917c..a234116ba0e5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -579,7 +579,7 @@ struct hnae3_ae_ops {
 				      int vector_num,
 				      struct hnae3_ring_chain_node *vr_chain);
 
-	int (*reset_queue)(struct hnae3_handle *handle, u16 queue_id);
+	int (*reset_queue)(struct hnae3_handle *handle);
 	u32 (*get_fw_version)(struct hnae3_handle *handle);
 	void (*get_mdix_mode)(struct hnae3_handle *handle,
 			      u8 *tp_mdix_ctrl, u8 *tp_mdix);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index c73de36b3843..384800949d18 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -4455,11 +4455,11 @@ int hns3_nic_reset_all_ring(struct hnae3_handle *h)
 	int i, j;
 	int ret;
 
-	for (i = 0; i < h->kinfo.num_tqps; i++) {
-		ret = h->ae_algo->ops->reset_queue(h, i);
-		if (ret)
-			return ret;
+	ret = h->ae_algo->ops->reset_queue(h);
+	if (ret)
+		return ret;
 
+	for (i = 0; i < h->kinfo.num_tqps; i++) {
 		hns3_init_ring_hw(&priv->ring[i]);
 
 		/* We need to clear tx ring here because self test will
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 7feab8405d7c..c6fc22e29581 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -948,10 +948,16 @@ struct hclge_reset_tqp_queue_cmd {
 
 #define HCLGE_CFG_RESET_MAC_B		3
 #define HCLGE_CFG_RESET_FUNC_B		7
+#define HCLGE_CFG_RESET_RCB_B		1
 struct hclge_reset_cmd {
 	u8 mac_func_reset;
 	u8 fun_reset_vfid;
-	u8 rsv[22];
+	u8 fun_reset_rcb;
+	u8 rsv;
+	__le16 fun_reset_rcb_vqid_start;
+	__le16 fun_reset_rcb_vqid_num;
+	u8 fun_reset_rcb_return_status;
+	u8 rsv1[15];
 };
 
 #define HCLGE_PF_RESET_DONE_BIT		BIT(0)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index a814fab9d54d..bc805d5fb16e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -7797,13 +7797,12 @@ static int hclge_set_phy_loopback(struct hclge_dev *hdev, bool en)
 	return ret;
 }
 
-static int hclge_tqp_enable(struct hclge_dev *hdev, unsigned int tqp_id,
-			    int stream_id, bool enable)
+static int hclge_tqp_enable_cmd_send(struct hclge_dev *hdev, u16 tqp_id,
+				     u16 stream_id, bool enable)
 {
 	struct hclge_desc desc;
 	struct hclge_cfg_com_tqp_queue_cmd *req =
 		(struct hclge_cfg_com_tqp_queue_cmd *)desc.data;
-	int ret;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_COM_TQP_QUEUE, false);
 	req->tqp_id = cpu_to_le16(tqp_id);
@@ -7811,20 +7810,30 @@ static int hclge_tqp_enable(struct hclge_dev *hdev, unsigned int tqp_id,
 	if (enable)
 		req->enable |= 1U << HCLGE_TQP_ENABLE_B;
 
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-	if (ret)
-		dev_err(&hdev->pdev->dev,
-			"Tqp enable fail, status =%d.\n", ret);
-	return ret;
+	return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_tqp_enable(struct hnae3_handle *handle, bool enable)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	int ret;
+	u16 i;
+
+	for (i = 0; i < handle->kinfo.num_tqps; i++) {
+		ret = hclge_tqp_enable_cmd_send(hdev, i, 0, enable);
+		if (ret)
+			return ret;
+	}
+	return 0;
 }
 
 static int hclge_set_loopback(struct hnae3_handle *handle,
 			      enum hnae3_loop loop_mode, bool en)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
-	struct hnae3_knic_private_info *kinfo;
 	struct hclge_dev *hdev = vport->back;
-	int i, ret;
+	int ret;
 
 	/* Loopback can be enabled in three places: SSU, MAC, and serdes. By
 	 * default, SSU loopback is enabled, so if the SMAC and the DMAC are
@@ -7861,14 +7870,12 @@ static int hclge_set_loopback(struct hnae3_handle *handle,
 	if (ret)
 		return ret;
 
-	kinfo = &vport->nic.kinfo;
-	for (i = 0; i < kinfo->num_tqps; i++) {
-		ret = hclge_tqp_enable(hdev, i, 0, en);
-		if (ret)
-			return ret;
-	}
+	ret = hclge_tqp_enable(handle, en);
+	if (ret)
+		dev_err(&hdev->pdev->dev, "failed to %s tqp in loopback, ret = %d\n",
+			en ? "enable" : "disable", ret);
 
-	return 0;
+	return ret;
 }
 
 static int hclge_set_default_loopback(struct hclge_dev *hdev)
@@ -7955,7 +7962,6 @@ static void hclge_ae_stop(struct hnae3_handle *handle)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
-	int i;
 
 	set_bit(HCLGE_STATE_DOWN, &hdev->state);
 	spin_lock_bh(&hdev->fd_rule_lock);
@@ -7972,8 +7978,7 @@ static void hclge_ae_stop(struct hnae3_handle *handle)
 		return;
 	}
 
-	for (i = 0; i < handle->kinfo.num_tqps; i++)
-		hclge_reset_tqp(handle, i);
+	hclge_reset_tqp(handle);
 
 	hclge_config_mac_tnl_int(hdev, false);
 
@@ -10347,7 +10352,7 @@ out:
 	return ret;
 }
 
-static int hclge_send_reset_tqp_cmd(struct hclge_dev *hdev, u16 queue_id,
+static int hclge_reset_tqp_cmd_send(struct hclge_dev *hdev, u16 queue_id,
 				    bool enable)
 {
 	struct hclge_reset_tqp_queue_cmd *req;
@@ -10403,94 +10408,114 @@ u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle, u16 queue_id)
 	return tqp->index;
 }
 
-int hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
+static int hclge_reset_tqp_cmd(struct hnae3_handle *handle)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
-	int reset_try_times = 0;
+	u16 reset_try_times = 0;
 	int reset_status;
 	u16 queue_gid;
 	int ret;
+	u16 i;
 
-	queue_gid = hclge_covert_handle_qid_global(handle, queue_id);
-
-	ret = hclge_tqp_enable(hdev, queue_id, 0, false);
-	if (ret) {
-		dev_err(&hdev->pdev->dev, "Disable tqp fail, ret = %d\n", ret);
-		return ret;
-	}
+	for (i = 0; i < handle->kinfo.num_tqps; i++) {
+		queue_gid = hclge_covert_handle_qid_global(handle, i);
+		ret = hclge_reset_tqp_cmd_send(hdev, queue_gid, true);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"failed to send reset tqp cmd, ret = %d\n",
+				ret);
+			return ret;
+		}
 
-	ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true);
-	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"Send reset tqp cmd fail, ret = %d\n", ret);
-		return ret;
-	}
+		while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) {
+			reset_status = hclge_get_reset_status(hdev, queue_gid);
+			if (reset_status)
+				break;
 
-	while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) {
-		reset_status = hclge_get_reset_status(hdev, queue_gid);
-		if (reset_status)
-			break;
+			/* Wait for tqp hw reset */
+			usleep_range(1000, 1200);
+		}
 
-		/* Wait for tqp hw reset */
-		usleep_range(1000, 1200);
-	}
+		if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) {
+			dev_err(&hdev->pdev->dev,
+				"wait for tqp hw reset timeout\n");
+			return -ETIME;
+		}
 
-	if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) {
-		dev_err(&hdev->pdev->dev, "Reset TQP fail\n");
-		return ret;
+		ret = hclge_reset_tqp_cmd_send(hdev, queue_gid, false);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"failed to deassert soft reset, ret = %d\n",
+				ret);
+			return ret;
+		}
+		reset_try_times = 0;
 	}
-
-	ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, false);
-	if (ret)
-		dev_err(&hdev->pdev->dev,
-			"Deassert the soft reset fail, ret = %d\n", ret);
-
-	return ret;
+	return 0;
 }
 
-void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id)
+static int hclge_reset_rcb(struct hnae3_handle *handle)
 {
-	struct hnae3_handle *handle = &vport->nic;
+#define HCLGE_RESET_RCB_NOT_SUPPORT	0U
+#define HCLGE_RESET_RCB_SUCCESS		1U
+
+	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
-	int reset_try_times = 0;
-	int reset_status;
+	struct hclge_reset_cmd *req;
+	struct hclge_desc desc;
+	u8 return_status;
 	u16 queue_gid;
 	int ret;
 
-	if (queue_id >= handle->kinfo.num_tqps) {
-		dev_warn(&hdev->pdev->dev, "Invalid vf queue id(%u)\n",
-			 queue_id);
-		return;
-	}
+	queue_gid = hclge_covert_handle_qid_global(handle, 0);
 
-	queue_gid = hclge_covert_handle_qid_global(&vport->nic, queue_id);
+	req = (struct hclge_reset_cmd *)desc.data;
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_RST_TRIGGER, false);
+	hnae3_set_bit(req->fun_reset_rcb, HCLGE_CFG_RESET_RCB_B, 1);
+	req->fun_reset_rcb_vqid_start = cpu_to_le16(queue_gid);
+	req->fun_reset_rcb_vqid_num = cpu_to_le16(handle->kinfo.num_tqps);
 
-	ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret) {
-		dev_warn(&hdev->pdev->dev,
-			 "Send reset tqp cmd fail, ret = %d\n", ret);
-		return;
+		dev_err(&hdev->pdev->dev,
+			"failed to send rcb reset cmd, ret = %d\n", ret);
+		return ret;
 	}
 
-	while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) {
-		reset_status = hclge_get_reset_status(hdev, queue_gid);
-		if (reset_status)
-			break;
+	return_status = req->fun_reset_rcb_return_status;
+	if (return_status == HCLGE_RESET_RCB_SUCCESS)
+		return 0;
 
-		/* Wait for tqp hw reset */
-		usleep_range(1000, 1200);
+	if (return_status != HCLGE_RESET_RCB_NOT_SUPPORT) {
+		dev_err(&hdev->pdev->dev, "failed to reset rcb, ret = %u\n",
+			return_status);
+		return -EIO;
 	}
 
-	if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) {
-		dev_warn(&hdev->pdev->dev, "Reset TQP fail\n");
-		return;
+	/* if reset rcb cmd is unsupported, we need to send reset tqp cmd
+	 * again to reset all tqps
+	 */
+	return hclge_reset_tqp_cmd(handle);
+}
+
+int hclge_reset_tqp(struct hnae3_handle *handle)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	int ret;
+
+	/* only need to disable PF's tqp */
+	if (!vport->vport_id) {
+		ret = hclge_tqp_enable(handle, false);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"failed to disable tqp, ret = %d\n", ret);
+			return ret;
+		}
 	}
 
-	ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, false);
-	if (ret)
-		dev_warn(&hdev->pdev->dev,
-			 "Deassert the soft reset fail, ret = %d\n", ret);
+	return hclge_reset_rcb(handle);
 }
 
 static u32 hclge_get_fw_version(struct hnae3_handle *handle)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index dc3d29d41848..c1aaf7c534c9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -1053,8 +1053,7 @@ int hclge_rss_init_hw(struct hclge_dev *hdev);
 void hclge_rss_indir_init_cfg(struct hclge_dev *hdev);
 
 void hclge_mbx_handler(struct hclge_dev *hdev);
-int hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id);
-void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id);
+int hclge_reset_tqp(struct hnae3_handle *handle);
 int hclge_cfg_flowctrl(struct hclge_dev *hdev);
 int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id);
 int hclge_vport_start(struct hclge_vport *vport);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 51a36e74f088..c88607bdda59 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -550,14 +550,32 @@ static void hclge_get_link_mode(struct hclge_vport *vport,
 			   HCLGE_MBX_LINK_STAT_MODE, dest_vfid);
 }
 
-static void hclge_mbx_reset_vf_queue(struct hclge_vport *vport,
-				     struct hclge_mbx_vf_to_pf_cmd *mbx_req)
+static int hclge_mbx_reset_vf_queue(struct hclge_vport *vport,
+				    struct hclge_mbx_vf_to_pf_cmd *mbx_req,
+				    struct hclge_respond_to_vf_msg *resp_msg)
 {
+#define HCLGE_RESET_ALL_QUEUE_DONE	1U
+	struct hnae3_handle *handle = &vport->nic;
+	struct hclge_dev *hdev = vport->back;
 	u16 queue_id;
+	int ret;
 
 	memcpy(&queue_id, mbx_req->msg.data, sizeof(queue_id));
+	resp_msg->data[0] = HCLGE_RESET_ALL_QUEUE_DONE;
+	resp_msg->len = sizeof(u8);
 
-	hclge_reset_vf_queue(vport, queue_id);
+	/* pf will reset vf's all queues at a time. So it is unnecessary
+	 * to reset queues if queue_id > 0, just return success.
+	 */
+	if (queue_id > 0)
+		return 0;
+
+	ret = hclge_reset_tqp(handle);
+	if (ret)
+		dev_err(&hdev->pdev->dev, "failed to reset vf %u queue, ret = %d\n",
+			vport->vport_id - HCLGE_VF_VPORT_START_NUM, ret);
+
+	return ret;
 }
 
 static int hclge_reset_vf(struct hclge_vport *vport)
@@ -783,7 +801,7 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 					ret);
 			break;
 		case HCLGE_MBX_QUEUE_RESET:
-			hclge_mbx_reset_vf_queue(vport, req);
+			ret = hclge_mbx_reset_vf_queue(vport, req, &resp_msg);
 			break;
 		case HCLGE_MBX_RESET:
 			ret = hclge_reset_vf(vport);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 5e512cd01a96..ac3afacbc4ce 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -1240,12 +1240,11 @@ static void hclgevf_sync_promisc_mode(struct hclgevf_dev *hdev)
 	}
 }
 
-static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, unsigned int tqp_id,
-			      int stream_id, bool enable)
+static int hclgevf_tqp_enable_cmd_send(struct hclgevf_dev *hdev, u16 tqp_id,
+				       u16 stream_id, bool enable)
 {
 	struct hclgevf_cfg_com_tqp_queue_cmd *req;
 	struct hclgevf_desc desc;
-	int status;
 
 	req = (struct hclgevf_cfg_com_tqp_queue_cmd *)desc.data;
 
@@ -1256,12 +1255,22 @@ static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, unsigned int tqp_id,
 	if (enable)
 		req->enable |= 1U << HCLGEVF_TQP_ENABLE_B;
 
-	status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
-	if (status)
-		dev_err(&hdev->pdev->dev,
-			"TQP enable fail, status =%d.\n", status);
+	return hclgevf_cmd_send(&hdev->hw, &desc, 1);
+}
 
-	return status;
+static int hclgevf_tqp_enable(struct hnae3_handle *handle, bool enable)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+	int ret;
+	u16 i;
+
+	for (i = 0; i < handle->kinfo.num_tqps; i++) {
+		ret = hclgevf_tqp_enable_cmd_send(hdev, i, 0, enable);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
 }
 
 static void hclgevf_reset_tqp_stats(struct hnae3_handle *handle)
@@ -1710,20 +1719,39 @@ static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable)
 	return hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0);
 }
 
-static int hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
+static int hclgevf_reset_tqp(struct hnae3_handle *handle)
 {
+#define HCLGEVF_RESET_ALL_QUEUE_DONE	1U
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 	struct hclge_vf_to_pf_msg send_msg;
+	u8 return_status = 0;
 	int ret;
+	u16 i;
 
 	/* disable vf queue before send queue reset msg to PF */
-	ret = hclgevf_tqp_enable(hdev, queue_id, 0, false);
-	if (ret)
+	ret = hclgevf_tqp_enable(handle, false);
+	if (ret) {
+		dev_err(&hdev->pdev->dev, "failed to disable tqp, ret = %d\n",
+			ret);
 		return ret;
+	}
 
 	hclgevf_build_send_msg(&send_msg, HCLGE_MBX_QUEUE_RESET, 0);
-	memcpy(send_msg.data, &queue_id, sizeof(queue_id));
-	return hclgevf_send_mbx_msg(hdev, &send_msg, true, NULL, 0);
+
+	ret = hclgevf_send_mbx_msg(hdev, &send_msg, true, &return_status,
+				   sizeof(return_status));
+	if (ret || return_status == HCLGEVF_RESET_ALL_QUEUE_DONE)
+		return ret;
+
+	for (i = 1; i < handle->kinfo.num_tqps; i++) {
+		hclgevf_build_send_msg(&send_msg, HCLGE_MBX_QUEUE_RESET, 0);
+		memcpy(send_msg.data, &i, sizeof(i));
+		ret = hclgevf_send_mbx_msg(hdev, &send_msg, true, NULL, 0);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
 }
 
 static int hclgevf_set_mtu(struct hnae3_handle *handle, int new_mtu)
@@ -2636,14 +2664,11 @@ static int hclgevf_ae_start(struct hnae3_handle *handle)
 static void hclgevf_ae_stop(struct hnae3_handle *handle)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
-	int i;
 
 	set_bit(HCLGEVF_STATE_DOWN, &hdev->state);
 
 	if (hdev->reset_type != HNAE3_VF_RESET)
-		for (i = 0; i < handle->kinfo.num_tqps; i++)
-			if (hclgevf_reset_tqp(handle, i))
-				break;
+		hclgevf_reset_tqp(handle);
 
 	hclgevf_reset_tqp_stats(handle);
 	hclgevf_update_link_status(hdev, 0);
-- 
cgit v1.2.3


From d5d5e0193ee8f88efbbc7f1471087255657bc19a Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Mon, 29 Mar 2021 11:57:50 +0800
Subject: net: hns3: add handling for xmit skb with recursive fraglist

Currently hns3 driver only handle the xmit skb with one level of
fraglist skb, add handling for multi level by calling hns3_tx_bd_num()
recursively when calculating bd num and calling hns3_fill_skb_to_desc()
recursively when filling tx desc.

When the skb has a fraglist level of 24, the skb is simply dropped and
stats.max_recursion_level is added to record the error. Move the stat
handling from hns3_nic_net_xmit() to hns3_nic_maybe_stop_tx() in order
to handle different error stat and add the 'max_recursion_level' and
'hw_limitation' stat.

Note that the max recursive level as 24 is chose according to below:
commit 48a1df65334b ("skbuff: return -EMSGSIZE in skb_to_sgvec to
prevent overflow").

And that we are not able to find a testcase to verify the recursive
fraglist case, so Fixes tag is not provided.

Reported-by: Barry Song <song.bao.hua@hisilicon.com>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c    | 115 +++++++++++++--------
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.h    |   2 +
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c |   2 +
 3 files changed, 78 insertions(+), 41 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 384800949d18..2dbcd959a356 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -1275,30 +1275,29 @@ static unsigned int hns3_skb_bd_num(struct sk_buff *skb, unsigned int *bd_size,
 }
 
 static unsigned int hns3_tx_bd_num(struct sk_buff *skb, unsigned int *bd_size,
-				   u8 max_non_tso_bd_num)
+				   u8 max_non_tso_bd_num, unsigned int bd_num,
+				   unsigned int recursion_level)
 {
+#define HNS3_MAX_RECURSION_LEVEL	24
+
 	struct sk_buff *frag_skb;
-	unsigned int bd_num = 0;
 
 	/* If the total len is within the max bd limit */
-	if (likely(skb->len <= HNS3_MAX_BD_SIZE && !skb_has_frag_list(skb) &&
+	if (likely(skb->len <= HNS3_MAX_BD_SIZE && !recursion_level &&
+		   !skb_has_frag_list(skb) &&
 		   skb_shinfo(skb)->nr_frags < max_non_tso_bd_num))
 		return skb_shinfo(skb)->nr_frags + 1U;
 
-	/* The below case will always be linearized, return
-	 * HNS3_MAX_BD_NUM_TSO + 1U to make sure it is linearized.
-	 */
-	if (unlikely(skb->len > HNS3_MAX_TSO_SIZE ||
-		     (!skb_is_gso(skb) && skb->len >
-		      HNS3_MAX_NON_TSO_SIZE(max_non_tso_bd_num))))
-		return HNS3_MAX_TSO_BD_NUM + 1U;
+	if (unlikely(recursion_level >= HNS3_MAX_RECURSION_LEVEL))
+		return UINT_MAX;
 
 	bd_num = hns3_skb_bd_num(skb, bd_size, bd_num);
 	if (!skb_has_frag_list(skb) || bd_num > HNS3_MAX_TSO_BD_NUM)
 		return bd_num;
 
 	skb_walk_frags(skb, frag_skb) {
-		bd_num = hns3_skb_bd_num(frag_skb, bd_size, bd_num);
+		bd_num = hns3_tx_bd_num(frag_skb, bd_size, max_non_tso_bd_num,
+					bd_num, recursion_level + 1);
 		if (bd_num > HNS3_MAX_TSO_BD_NUM)
 			return bd_num;
 	}
@@ -1358,6 +1357,43 @@ void hns3_shinfo_pack(struct skb_shared_info *shinfo, __u32 *size)
 		size[i] = skb_frag_size(&shinfo->frags[i]);
 }
 
+static int hns3_skb_linearize(struct hns3_enet_ring *ring,
+			      struct sk_buff *skb,
+			      u8 max_non_tso_bd_num,
+			      unsigned int bd_num)
+{
+	/* 'bd_num == UINT_MAX' means the skb' fraglist has a
+	 * recursion level of over HNS3_MAX_RECURSION_LEVEL.
+	 */
+	if (bd_num == UINT_MAX) {
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.over_max_recursion++;
+		u64_stats_update_end(&ring->syncp);
+		return -ENOMEM;
+	}
+
+	/* The skb->len has exceeded the hw limitation, linearization
+	 * will not help.
+	 */
+	if (skb->len > HNS3_MAX_TSO_SIZE ||
+	    (!skb_is_gso(skb) && skb->len >
+	     HNS3_MAX_NON_TSO_SIZE(max_non_tso_bd_num))) {
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.hw_limitation++;
+		u64_stats_update_end(&ring->syncp);
+		return -ENOMEM;
+	}
+
+	if (__skb_linearize(skb)) {
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.sw_err_cnt++;
+		u64_stats_update_end(&ring->syncp);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
 static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring,
 				  struct net_device *netdev,
 				  struct sk_buff *skb)
@@ -1367,7 +1403,7 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring,
 	unsigned int bd_size[HNS3_MAX_TSO_BD_NUM + 1U];
 	unsigned int bd_num;
 
-	bd_num = hns3_tx_bd_num(skb, bd_size, max_non_tso_bd_num);
+	bd_num = hns3_tx_bd_num(skb, bd_size, max_non_tso_bd_num, 0, 0);
 	if (unlikely(bd_num > max_non_tso_bd_num)) {
 		if (bd_num <= HNS3_MAX_TSO_BD_NUM && skb_is_gso(skb) &&
 		    !hns3_skb_need_linearized(skb, bd_size, bd_num,
@@ -1376,16 +1412,11 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring,
 			goto out;
 		}
 
-		if (__skb_linearize(skb))
+		if (hns3_skb_linearize(ring, skb, max_non_tso_bd_num,
+				       bd_num))
 			return -ENOMEM;
 
 		bd_num = hns3_tx_bd_count(skb->len);
-		if ((skb_is_gso(skb) && bd_num > HNS3_MAX_TSO_BD_NUM) ||
-		    (!skb_is_gso(skb) &&
-		     bd_num > max_non_tso_bd_num)) {
-			trace_hns3_over_max_bd(skb);
-			return -ENOMEM;
-		}
 
 		u64_stats_update_begin(&ring->syncp);
 		ring->stats.tx_copy++;
@@ -1409,6 +1440,10 @@ out:
 		return bd_num;
 	}
 
+	u64_stats_update_begin(&ring->syncp);
+	ring->stats.tx_busy++;
+	u64_stats_update_end(&ring->syncp);
+
 	return -EBUSY;
 }
 
@@ -1456,6 +1491,7 @@ static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring,
 				 struct sk_buff *skb, enum hns_desc_type type)
 {
 	unsigned int size = skb_headlen(skb);
+	struct sk_buff *frag_skb;
 	int i, ret, bd_num = 0;
 
 	if (size) {
@@ -1480,6 +1516,15 @@ static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring,
 		bd_num += ret;
 	}
 
+	skb_walk_frags(skb, frag_skb) {
+		ret = hns3_fill_skb_to_desc(ring, frag_skb,
+					    DESC_TYPE_FRAGLIST_SKB);
+		if (unlikely(ret < 0))
+			return ret;
+
+		bd_num += ret;
+	}
+
 	return bd_num;
 }
 
@@ -1510,8 +1555,6 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 	struct hns3_enet_ring *ring = &priv->ring[skb->queue_mapping];
 	struct netdev_queue *dev_queue;
 	int pre_ntu, next_to_use_head;
-	struct sk_buff *frag_skb;
-	int bd_num = 0;
 	bool doorbell;
 	int ret;
 
@@ -1527,15 +1570,8 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 	ret = hns3_nic_maybe_stop_tx(ring, netdev, skb);
 	if (unlikely(ret <= 0)) {
 		if (ret == -EBUSY) {
-			u64_stats_update_begin(&ring->syncp);
-			ring->stats.tx_busy++;
-			u64_stats_update_end(&ring->syncp);
 			hns3_tx_doorbell(ring, 0, true);
 			return NETDEV_TX_BUSY;
-		} else if (ret == -ENOMEM) {
-			u64_stats_update_begin(&ring->syncp);
-			ring->stats.sw_err_cnt++;
-			u64_stats_update_end(&ring->syncp);
 		}
 
 		hns3_rl_err(netdev, "xmit error: %d!\n", ret);
@@ -1548,21 +1584,14 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 	if (unlikely(ret < 0))
 		goto fill_err;
 
+	/* 'ret < 0' means filling error, 'ret == 0' means skb->len is
+	 * zero, which is unlikely, and 'ret > 0' means how many tx desc
+	 * need to be notified to the hw.
+	 */
 	ret = hns3_fill_skb_to_desc(ring, skb, DESC_TYPE_SKB);
-	if (unlikely(ret < 0))
+	if (unlikely(ret <= 0))
 		goto fill_err;
 
-	bd_num += ret;
-
-	skb_walk_frags(skb, frag_skb) {
-		ret = hns3_fill_skb_to_desc(ring, frag_skb,
-					    DESC_TYPE_FRAGLIST_SKB);
-		if (unlikely(ret < 0))
-			goto fill_err;
-
-		bd_num += ret;
-	}
-
 	pre_ntu = ring->next_to_use ? (ring->next_to_use - 1) :
 					(ring->desc_num - 1);
 	ring->desc[pre_ntu].tx.bdtp_fe_sc_vld_ra_ri |=
@@ -1573,7 +1602,7 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 	dev_queue = netdev_get_tx_queue(netdev, ring->queue_index);
 	doorbell = __netdev_tx_sent_queue(dev_queue, skb->len,
 					  netdev_xmit_more());
-	hns3_tx_doorbell(ring, bd_num, doorbell);
+	hns3_tx_doorbell(ring, ret, doorbell);
 
 	return NETDEV_TX_OK;
 
@@ -1745,11 +1774,15 @@ static void hns3_nic_get_stats64(struct net_device *netdev,
 			tx_drop += ring->stats.tx_l4_proto_err;
 			tx_drop += ring->stats.tx_l2l3l4_err;
 			tx_drop += ring->stats.tx_tso_err;
+			tx_drop += ring->stats.over_max_recursion;
+			tx_drop += ring->stats.hw_limitation;
 			tx_errors += ring->stats.sw_err_cnt;
 			tx_errors += ring->stats.tx_vlan_err;
 			tx_errors += ring->stats.tx_l4_proto_err;
 			tx_errors += ring->stats.tx_l2l3l4_err;
 			tx_errors += ring->stats.tx_tso_err;
+			tx_errors += ring->stats.over_max_recursion;
+			tx_errors += ring->stats.hw_limitation;
 		} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
 
 		/* fetch the rx stats */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index d069b04ee587..e44224e23315 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -376,6 +376,8 @@ struct ring_stats {
 			u64 tx_l4_proto_err;
 			u64 tx_l2l3l4_err;
 			u64 tx_tso_err;
+			u64 over_max_recursion;
+			u64 hw_limitation;
 		};
 		struct {
 			u64 rx_pkts;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 0ae6140882b7..b48faf769b1c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -44,6 +44,8 @@ static const struct hns3_stats hns3_txq_stats[] = {
 	HNS3_TQP_STAT("l4_proto_err", tx_l4_proto_err),
 	HNS3_TQP_STAT("l2l3l4_err", tx_l2l3l4_err),
 	HNS3_TQP_STAT("tso_err", tx_tso_err),
+	HNS3_TQP_STAT("over_max_recursion", over_max_recursion),
+	HNS3_TQP_STAT("hw_limitation", hw_limitation),
 };
 
 #define HNS3_TXQ_STATS_COUNT ARRAY_SIZE(hns3_txq_stats)
-- 
cgit v1.2.3


From 811c0830eb4ca8811ed80fe40378f622b9844835 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Mon, 29 Mar 2021 11:57:51 +0800
Subject: net: hns3: add tx send size handling for tso skb

The actual size on wire for tso skb should be (gso_segs - 1) *
hdr + skb->len instead of skb->len, which can be seen by user using
'ethtool -S ethX' cmd, and 'Byte Queue Limit' also use the send size
stat to do the queue limiting, so add send_bytes in the desc_cb to
record the actual send size for a skb. And send_bytes is only for tx
desc_cb and page_offset is only for rx desc, so reuse the same space
for both of them.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 25 ++++++++++++++++++-------
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.h |  7 ++++++-
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 2dbcd959a356..5e08ac9fac7c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -694,7 +694,7 @@ void hns3_enable_vlan_filter(struct net_device *netdev, bool enable)
 }
 
 static int hns3_set_tso(struct sk_buff *skb, u32 *paylen_fdop_ol4cs,
-			u16 *mss, u32 *type_cs_vlan_tso)
+			u16 *mss, u32 *type_cs_vlan_tso, u32 *send_bytes)
 {
 	u32 l4_offset, hdr_len;
 	union l3_hdr_info l3;
@@ -750,6 +750,8 @@ static int hns3_set_tso(struct sk_buff *skb, u32 *paylen_fdop_ol4cs,
 				     (__force __wsum)htonl(l4_paylen));
 	}
 
+	*send_bytes = (skb_shinfo(skb)->gso_segs - 1) * hdr_len + skb->len;
+
 	/* find the txbd field values */
 	*paylen_fdop_ol4cs = skb->len - hdr_len;
 	hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_TSO_B, 1);
@@ -1076,7 +1078,8 @@ static bool hns3_check_hw_tx_csum(struct sk_buff *skb)
 }
 
 static int hns3_fill_skb_desc(struct hns3_enet_ring *ring,
-			      struct sk_buff *skb, struct hns3_desc *desc)
+			      struct sk_buff *skb, struct hns3_desc *desc,
+			      struct hns3_desc_cb *desc_cb)
 {
 	u32 ol_type_vlan_len_msec = 0;
 	u32 paylen_ol4cs = skb->len;
@@ -1105,6 +1108,8 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring,
 			       1);
 	}
 
+	desc_cb->send_bytes = skb->len;
+
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		u8 ol4_proto, il4_proto;
 
@@ -1140,7 +1145,7 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring,
 		}
 
 		ret = hns3_set_tso(skb, &paylen_ol4cs, &mss_hw_csum,
-				   &type_cs_vlan_tso);
+				   &type_cs_vlan_tso, &desc_cb->send_bytes);
 		if (unlikely(ret < 0)) {
 			u64_stats_update_begin(&ring->syncp);
 			ring->stats.tx_tso_err++;
@@ -1553,6 +1558,7 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	struct hns3_enet_ring *ring = &priv->ring[skb->queue_mapping];
+	struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use];
 	struct netdev_queue *dev_queue;
 	int pre_ntu, next_to_use_head;
 	bool doorbell;
@@ -1580,7 +1586,8 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	next_to_use_head = ring->next_to_use;
 
-	ret = hns3_fill_skb_desc(ring, skb, &ring->desc[ring->next_to_use]);
+	ret = hns3_fill_skb_desc(ring, skb, &ring->desc[ring->next_to_use],
+				 desc_cb);
 	if (unlikely(ret < 0))
 		goto fill_err;
 
@@ -1600,7 +1607,7 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	/* Complete translate all packets */
 	dev_queue = netdev_get_tx_queue(netdev, ring->queue_index);
-	doorbell = __netdev_tx_sent_queue(dev_queue, skb->len,
+	doorbell = __netdev_tx_sent_queue(dev_queue, desc_cb->send_bytes,
 					  netdev_xmit_more());
 	hns3_tx_doorbell(ring, ret, doorbell);
 
@@ -2721,8 +2728,12 @@ static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring,
 			break;
 
 		desc_cb = &ring->desc_cb[ntc];
-		(*pkts) += (desc_cb->type == DESC_TYPE_SKB);
-		(*bytes) += desc_cb->length;
+
+		if (desc_cb->type == DESC_TYPE_SKB) {
+			(*pkts)++;
+			(*bytes) += desc_cb->send_bytes;
+		}
+
 		/* desc_cb will be cleaned, after hnae3_free_buffer_detach */
 		hns3_free_buffer_detach(ring, ntc, budget);
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index e44224e23315..daa04aeb0942 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -298,7 +298,12 @@ struct hns3_desc_cb {
 
 	/* priv data for the desc, e.g. skb when use with ip stack */
 	void *priv;
-	u32 page_offset;
+
+	union {
+		u32 page_offset;	/* for rx */
+		u32 send_bytes;		/* for tx */
+	};
+
 	u32 length;     /* length of the buffer */
 
 	u16 reuse_flag;
-- 
cgit v1.2.3


From 33a8f7649913e4aeda34bc1294302688112c9c56 Mon Sep 17 00:00:00 2001
From: Guojia Liao <liaoguojia@huawei.com>
Date: Mon, 29 Mar 2021 11:57:52 +0800
Subject: net: hns3: expand the tc config command

The device HNAE3_DEVICE_VERSION_V3 supports up to 1280 queues
and qsets for one function, so the bitwidth of tc_offset, meaning
the tqps index, needs to expand from 10 bits to 11 bits.

The device HNAE3_DEVICE_VERSION_V3 supports up to 512 queues on
one TC. The tc_size, meaning the exponent with base 2 of queues
supported on TC, which needs to expand from 3 bits to 4 bits.

Signed-off-by: Guojia Liao <liaoguojia@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h  | 7 +++++--
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 3 +++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
index 8a37a22a176b..c6dc11b32aa7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
@@ -223,11 +223,14 @@ struct hclgevf_rss_indirection_table_cmd {
 };
 
 #define HCLGEVF_RSS_TC_OFFSET_S		0
-#define HCLGEVF_RSS_TC_OFFSET_M		(0x3ff << HCLGEVF_RSS_TC_OFFSET_S)
+#define HCLGEVF_RSS_TC_OFFSET_M		GENMASK(10, 0)
+#define HCLGEVF_RSS_TC_SIZE_MSB_B	11
 #define HCLGEVF_RSS_TC_SIZE_S		12
-#define HCLGEVF_RSS_TC_SIZE_M		(0x7 << HCLGEVF_RSS_TC_SIZE_S)
+#define HCLGEVF_RSS_TC_SIZE_M		GENMASK(14, 12)
 #define HCLGEVF_RSS_TC_VALID_B		15
 #define HCLGEVF_MAX_TC_NUM		8
+#define HCLGEVF_RSS_TC_SIZE_MSB_OFFSET	3
+
 struct hclgevf_rss_tc_mode_cmd {
 	__le16 rss_tc_mode[HCLGEVF_MAX_TC_NUM];
 	u8 rsv[8];
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index ac3afacbc4ce..1682769112d0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -706,6 +706,9 @@ static int hclgevf_set_rss_tc_mode(struct hclgevf_dev *hdev,  u16 rss_size)
 			      (tc_valid[i] & 0x1));
 		hnae3_set_field(mode, HCLGEVF_RSS_TC_SIZE_M,
 				HCLGEVF_RSS_TC_SIZE_S, tc_size[i]);
+		hnae3_set_bit(mode, HCLGEVF_RSS_TC_SIZE_MSB_B,
+			      tc_size[i] >> HCLGEVF_RSS_TC_SIZE_MSB_OFFSET &
+			      0x1);
 		hnae3_set_field(mode, HCLGEVF_RSS_TC_OFFSET_M,
 				HCLGEVF_RSS_TC_OFFSET_S, tc_offset[i]);
 
-- 
cgit v1.2.3


From 97b9e5c131f16e2e487139ba596f9e6df927ae87 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Mon, 29 Mar 2021 11:57:53 +0800
Subject: net: hns3: add stats logging when skb padding fails

skb_put_padto() may fails because of memory failure, sw_err_cnt
is already used to log memory failure in hns3_skb_linearize(),
so use it to log the memory failure for skb_put_padto() too.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 5e08ac9fac7c..f59cd719b5b4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -1567,6 +1567,11 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 	/* Hardware can only handle short frames above 32 bytes */
 	if (skb_put_padto(skb, HNS3_MIN_TX_LEN)) {
 		hns3_tx_doorbell(ring, 0, !netdev_xmit_more());
+
+		u64_stats_update_begin(&ring->syncp);
+		ring->stats.sw_err_cnt++;
+		u64_stats_update_end(&ring->syncp);
+
 		return NETDEV_TX_OK;
 	}
 
-- 
cgit v1.2.3


From b52f6425481ce7963b86ef1a171b28f422ffbea1 Mon Sep 17 00:00:00 2001
From: Yangyang Li <liyangyang20@huawei.com>
Date: Mon, 29 Mar 2021 16:01:09 +0800
Subject: net: marvell: Delete duplicate word in comments

Delete duplicate word in two comments.

Signed-off-by: Yangyang Li <liyangyang20@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvneta.c | 4 +---
 drivers/net/ethernet/marvell/skge.c   | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 20307eec8988..d7104c29d625 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -4175,9 +4175,7 @@ static void mvneta_percpu_elect(struct mvneta_port *pp)
 				rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq);
 
 		if (cpu == elected_cpu)
-			/* Map the default receive queue queue to the
-			 * elected CPU
-			 */
+			/* Map the default receive queue to the elected CPU */
 			rxq_map |= MVNETA_CPU_RXQ_ACCESS(pp->rxq_def);
 
 		/* We update the TX queue map only if we have one
diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c
index 8a9c0f490bfb..c0882c77b302 100644
--- a/drivers/net/ethernet/marvell/skge.c
+++ b/drivers/net/ethernet/marvell/skge.c
@@ -1617,7 +1617,7 @@ static void genesis_mac_init(struct skge_hw *hw, int port)
 		xm_write16(hw, port, XM_TX_THR, 512);
 
 	/*
-	 * Enable the reception of all error frames. This is is
+	 * Enable the reception of all error frames. This is
 	 * a necessary evil due to the design of the XMAC. The
 	 * XMAC's receive FIFO is only 8K in size, however jumbo
 	 * frames can be up to 9000 bytes in length. When bad
-- 
cgit v1.2.3


From df4a17a98d7f08569b3d2d1e666041f083452aa3 Mon Sep 17 00:00:00 2001
From: Yangyang Li <liyangyang20@huawei.com>
Date: Mon, 29 Mar 2021 16:01:10 +0800
Subject: net: marvell: Fix the trailing format of some block comments

Use a trailing */ on a separate line for block comments.

Signed-off-by: Yangyang Li <liyangyang20@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mv643xx_eth.c | 6 ++++--
 drivers/net/ethernet/marvell/mvneta.c      | 6 ++++--
 drivers/net/ethernet/marvell/sky2.c        | 9 ++++++---
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 3bfb659b5c99..ca1681aa951a 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -700,7 +700,8 @@ static int skb_tx_csum(struct mv643xx_eth_private *mp, struct sk_buff *skb,
 			   ip_hdr(skb)->ihl << TX_IHL_SHIFT;
 
 		/* TODO: Revisit this. With the usage of GEN_TCP_UDP_CHK_FULL
-		 * it seems we don't need to pass the initial checksum. */
+		 * it seems we don't need to pass the initial checksum.
+		 */
 		switch (ip_hdr(skb)->protocol) {
 		case IPPROTO_UDP:
 			cmd |= UDP_FRAME;
@@ -790,7 +791,8 @@ txq_put_hdr_tso(struct sk_buff *skb, struct tx_queue *txq, int length,
 		WARN(1, "failed to prepare checksum!");
 
 	/* Should we set this? Can't use the value from skb_tx_csum()
-	 * as it's not the correct initial L4 checksum to use. */
+	 * as it's not the correct initial L4 checksum to use.
+	 */
 	desc->l4i_chk = 0;
 
 	desc->byte_cnt = hdr_len;
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index d7104c29d625..d514ec68e47b 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -3993,7 +3993,8 @@ static void mvneta_mac_config(struct phylink_config *config, unsigned int mode,
 
 	/* Armada 370 documentation says we can only change the port mode
 	 * and in-band enable when the link is down, so force it down
-	 * while making these changes. We also do this for GMAC_CTRL2 */
+	 * while making these changes. We also do this for GMAC_CTRL2
+	 */
 	if ((new_ctrl0 ^ gmac_ctrl0) & MVNETA_GMAC0_PORT_1000BASE_X ||
 	    (new_ctrl2 ^ gmac_ctrl2) & MVNETA_GMAC2_INBAND_AN_ENABLE ||
 	    (new_an  ^ gmac_an) & MVNETA_GMAC_INBAND_AN_ENABLE) {
@@ -4905,7 +4906,8 @@ static int mvneta_ethtool_set_eee(struct net_device *dev,
 	u32 lpi_ctl0;
 
 	/* The Armada 37x documents do not give limits for this other than
-	 * it being an 8-bit register. */
+	 * it being an 8-bit register.
+	 */
 	if (eee->tx_lpi_enabled && eee->tx_lpi_timer > 255)
 		return -EINVAL;
 
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index 2a752fb6b758..68c154d715d6 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -55,7 +55,8 @@
 #define RX_DEF_PENDING		RX_MAX_PENDING
 
 /* This is the worst case number of transmit list elements for a single skb:
-   VLAN:GSO + CKSUM + Data + skb_frags * DMA */
+ * VLAN:GSO + CKSUM + Data + skb_frags * DMA
+ */
 #define MAX_SKB_TX_LE	(2 + (sizeof(dma_addr_t)/sizeof(u32))*(MAX_SKB_FRAGS+1))
 #define TX_MIN_PENDING		(MAX_SKB_TX_LE+1)
 #define TX_MAX_PENDING		1024
@@ -1529,7 +1530,8 @@ static void sky2_rx_start(struct sky2_port *sky2)
 		sky2_write32(hw, Q_ADDR(rxq, Q_WM), BMU_WM_PEX);
 
 	/* These chips have no ram buffer?
-	 * MAC Rx RAM Read is controlled by hardware */
+	 * MAC Rx RAM Read is controlled by hardware
+	 */
 	if (hw->chip_id == CHIP_ID_YUKON_EC_U &&
 	    hw->chip_rev > CHIP_REV_YU_EC_U_A0)
 		sky2_write32(hw, Q_ADDR(rxq, Q_TEST), F_M_RX_RAM_DIS);
@@ -4684,7 +4686,8 @@ static __exit void sky2_debug_cleanup(void)
 #endif
 
 /* Two copies of network device operations to handle special case of
-   not allowing netpoll on second port */
+ * not allowing netpoll on second port
+ */
 static const struct net_device_ops sky2_netdev_ops[2] = {
   {
 	.ndo_open		= sky2_open,
-- 
cgit v1.2.3


From 9abcaa96ce6d7ba2c54d2c6093ecffd53d46bbb2 Mon Sep 17 00:00:00 2001
From: Yangyang Li <liyangyang20@huawei.com>
Date: Mon, 29 Mar 2021 16:01:11 +0800
Subject: net: marvell: Delete extra spaces

Just delete three extra spaces.

Signed-off-by: Yangyang Li <liyangyang20@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvneta.c | 2 +-
 drivers/net/ethernet/marvell/skge.c   | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index d514ec68e47b..f20dfd1d7a6b 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1087,7 +1087,7 @@ static int mvneta_mbus_io_win_set(struct mvneta_port *pp, u32 base, u32 wsize,
 	return 0;
 }
 
-static  int mvneta_bm_port_mbus_init(struct mvneta_port *pp)
+static int mvneta_bm_port_mbus_init(struct mvneta_port *pp)
 {
 	u32 wsize;
 	u8 target, attr;
diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c
index c0882c77b302..69072dd12305 100644
--- a/drivers/net/ethernet/marvell/skge.c
+++ b/drivers/net/ethernet/marvell/skge.c
@@ -2959,8 +2959,9 @@ static void genesis_set_multicast(struct net_device *dev)
 
 static void yukon_add_filter(u8 filter[8], const u8 *addr)
 {
-	 u32 bit = ether_crc(ETH_ALEN, addr) & 0x3f;
-	 filter[bit/8] |= 1 << (bit%8);
+	u32 bit = ether_crc(ETH_ALEN, addr) & 0x3f;
+
+	filter[bit / 8] |= 1 << (bit % 8);
 }
 
 static void yukon_set_multicast(struct net_device *dev)
-- 
cgit v1.2.3


From 9568387c9f5193257414bc213c6022dcdeacee09 Mon Sep 17 00:00:00 2001
From: Yangyang Li <liyangyang20@huawei.com>
Date: Mon, 29 Mar 2021 16:01:12 +0800
Subject: net: marvell: Fix an alignment problem

Use tab instead of space to align the code.

Signed-off-by: Yangyang Li <liyangyang20@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/skge.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c
index 69072dd12305..d4bb27ba1419 100644
--- a/drivers/net/ethernet/marvell/skge.c
+++ b/drivers/net/ethernet/marvell/skge.c
@@ -3850,7 +3850,7 @@ static struct net_device *skge_devinit(struct skge_hw *hw, int port,
 
 	/* Only used for Genesis XMAC */
 	if (is_genesis(hw))
-	    timer_setup(&skge->link_timer, xm_link_timer, 0);
+		timer_setup(&skge->link_timer, xm_link_timer, 0);
 	else {
 		dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG |
 		                   NETIF_F_RXCSUM;
-- 
cgit v1.2.3


From 4947e7309a31fdab1078b477b98b4cb1a28f80d8 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 29 Mar 2021 13:09:43 +0300
Subject: mlxsw: spectrum_matchall: Perform protocol check earlier

Perform the protocol check earlier in the function instead of repeating
it for every action. Example:

 # tc filter add dev swp1 ingress proto ip matchall skip_sw action sample group 1 rate 100
 Error: matchall rules only supported with 'all' protocol.
 We have an error talking to the kernel

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
index ce58a795c6fc..9252e23fd082 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
@@ -238,6 +238,11 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp *mlxsw_sp,
 		flower_prio_valid = true;
 	}
 
+	if (protocol != htons(ETH_P_ALL)) {
+		NL_SET_ERR_MSG(f->common.extack, "matchall rules only supported with 'all' protocol");
+		return -EOPNOTSUPP;
+	}
+
 	mall_entry = kzalloc(sizeof(*mall_entry), GFP_KERNEL);
 	if (!mall_entry)
 		return -ENOMEM;
@@ -247,7 +252,7 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp *mlxsw_sp,
 
 	act = &f->rule->action.entries[0];
 
-	if (act->id == FLOW_ACTION_MIRRED && protocol == htons(ETH_P_ALL)) {
+	if (act->id == FLOW_ACTION_MIRRED) {
 		if (flower_prio_valid && mall_entry->ingress &&
 		    mall_entry->priority >= flower_min_prio) {
 			NL_SET_ERR_MSG(f->common.extack, "Failed to add behind existing flower rules");
@@ -262,8 +267,7 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp *mlxsw_sp,
 		}
 		mall_entry->type = MLXSW_SP_MALL_ACTION_TYPE_MIRROR;
 		mall_entry->mirror.to_dev = act->dev;
-	} else if (act->id == FLOW_ACTION_SAMPLE &&
-		   protocol == htons(ETH_P_ALL)) {
+	} else if (act->id == FLOW_ACTION_SAMPLE) {
 		if (flower_prio_valid &&
 		    mall_entry->priority >= flower_min_prio) {
 			NL_SET_ERR_MSG(f->common.extack, "Failed to add behind existing flower rules");
-- 
cgit v1.2.3


From 50401f292434d5fb99f3f0c9b861f8a84c151ecc Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 29 Mar 2021 13:09:44 +0300
Subject: mlxsw: spectrum_matchall: Convert if statements to a switch statement

Previous patch moved the protocol check out of the action check, so
these if statements can now be converted to a switch statement. Perform
the conversion.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
index 9252e23fd082..af0a20581a37 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
@@ -252,7 +252,8 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp *mlxsw_sp,
 
 	act = &f->rule->action.entries[0];
 
-	if (act->id == FLOW_ACTION_MIRRED) {
+	switch (act->id) {
+	case FLOW_ACTION_MIRRED:
 		if (flower_prio_valid && mall_entry->ingress &&
 		    mall_entry->priority >= flower_min_prio) {
 			NL_SET_ERR_MSG(f->common.extack, "Failed to add behind existing flower rules");
@@ -267,7 +268,8 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp *mlxsw_sp,
 		}
 		mall_entry->type = MLXSW_SP_MALL_ACTION_TYPE_MIRROR;
 		mall_entry->mirror.to_dev = act->dev;
-	} else if (act->id == FLOW_ACTION_SAMPLE) {
+		break;
+	case FLOW_ACTION_SAMPLE:
 		if (flower_prio_valid &&
 		    mall_entry->priority >= flower_min_prio) {
 			NL_SET_ERR_MSG(f->common.extack, "Failed to add behind existing flower rules");
@@ -279,7 +281,8 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp *mlxsw_sp,
 		mall_entry->sample.params.truncate = act->sample.truncate;
 		mall_entry->sample.params.trunc_size = act->sample.trunc_size;
 		mall_entry->sample.params.rate = act->sample.rate;
-	} else {
+		break;
+	default:
 		err = -EOPNOTSUPP;
 		goto errout;
 	}
-- 
cgit v1.2.3


From b24303048a6b23d27c4c12b9843265c0eef80ffd Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 29 Mar 2021 13:09:45 +0300
Subject: mlxsw: spectrum_matchall: Perform priority checks earlier

Perform the priority check earlier in the function instead of repeating
it for every action. This fixes a bug that allowed matchall rules with
sample action to be added in front of flower rules on egress.

Fixes: 54d0e963f683 ("mlxsw: spectrum_matchall: Add support for egress sampling")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_matchall.c    | 31 +++++++++-------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
index af0a20581a37..07b371cd9818 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
@@ -250,32 +250,27 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp *mlxsw_sp,
 	mall_entry->priority = f->common.prio;
 	mall_entry->ingress = mlxsw_sp_flow_block_is_ingress_bound(block);
 
+	if (flower_prio_valid && mall_entry->ingress &&
+	    mall_entry->priority >= flower_min_prio) {
+		NL_SET_ERR_MSG(f->common.extack, "Failed to add behind existing flower rules");
+		err = -EOPNOTSUPP;
+		goto errout;
+	}
+	if (flower_prio_valid && !mall_entry->ingress &&
+	    mall_entry->priority <= flower_max_prio) {
+		NL_SET_ERR_MSG(f->common.extack, "Failed to add in front of existing flower rules");
+		err = -EOPNOTSUPP;
+		goto errout;
+	}
+
 	act = &f->rule->action.entries[0];
 
 	switch (act->id) {
 	case FLOW_ACTION_MIRRED:
-		if (flower_prio_valid && mall_entry->ingress &&
-		    mall_entry->priority >= flower_min_prio) {
-			NL_SET_ERR_MSG(f->common.extack, "Failed to add behind existing flower rules");
-			err = -EOPNOTSUPP;
-			goto errout;
-		}
-		if (flower_prio_valid && !mall_entry->ingress &&
-		    mall_entry->priority <= flower_max_prio) {
-			NL_SET_ERR_MSG(f->common.extack, "Failed to add in front of existing flower rules");
-			err = -EOPNOTSUPP;
-			goto errout;
-		}
 		mall_entry->type = MLXSW_SP_MALL_ACTION_TYPE_MIRROR;
 		mall_entry->mirror.to_dev = act->dev;
 		break;
 	case FLOW_ACTION_SAMPLE:
-		if (flower_prio_valid &&
-		    mall_entry->priority >= flower_min_prio) {
-			NL_SET_ERR_MSG(f->common.extack, "Failed to add behind existing flower rules");
-			err = -EOPNOTSUPP;
-			goto errout;
-		}
 		mall_entry->type = MLXSW_SP_MALL_ACTION_TYPE_SAMPLE;
 		mall_entry->sample.params.psample_group = act->sample.psample_group;
 		mall_entry->sample.params.truncate = act->sample.truncate;
-- 
cgit v1.2.3


From c3572a0b731fc0de13afef300f1f5afb929e4d4c Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 29 Mar 2021 13:09:46 +0300
Subject: selftests: mlxsw: Test matchall failure with protocol match

The driver can only offload matchall rules that do not match on a
protocol. Test that matchall rules that match on a protocol are vetoed.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/mlxsw/tc_restrictions.sh      | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
index b4dbda706c4d..5ec3beb637c8 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
@@ -11,6 +11,7 @@ ALL_TESTS="
 	matchall_mirror_behind_flower_ingress_test
 	matchall_sample_behind_flower_ingress_test
 	matchall_mirror_behind_flower_egress_test
+	matchall_proto_match_test
 	police_limits_test
 	multi_police_test
 "
@@ -291,6 +292,22 @@ matchall_mirror_behind_flower_egress_test()
 	matchall_behind_flower_egress_test "mirror" "mirred egress mirror dev $swp2"
 }
 
+matchall_proto_match_test()
+{
+	RET=0
+
+	tc qdisc add dev $swp1 clsact
+
+	tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+		matchall skip_sw \
+		action sample group 1 rate 100
+	check_fail $? "Incorrect success to add matchall rule with protocol match"
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "matchall protocol match"
+}
+
 police_limits_test()
 {
 	RET=0
-- 
cgit v1.2.3


From 17b96a5cbe3d0c743c49776e90d892844c226a56 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 29 Mar 2021 13:09:47 +0300
Subject: mlxsw: spectrum: Veto sampling if already enabled on port

The per-port sampling triggers (i.e., ingress / egress) cannot be
enabled twice. Meaning, the below configuration will not result in
packets being sampled twice:

 # tc filter add dev swp1 ingress matchall skip_sw action sample rate 100 group 1
 # tc filter add dev swp1 ingress matchall skip_sw action sample rate 100 group 1

Therefore, reject such configurations.

Fixes: 90f53c53ec4a ("mlxsw: spectrum: Start using sampling triggers hash table")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index efc7acb4842c..bca0354482cb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2668,6 +2668,11 @@ mlxsw_sp_sample_trigger_params_set(struct mlxsw_sp *mlxsw_sp,
 		return mlxsw_sp_sample_trigger_node_init(mlxsw_sp, &key,
 							 params);
 
+	if (trigger_node->trigger.local_port) {
+		NL_SET_ERR_MSG_MOD(extack, "Sampling already enabled on port");
+		return -EINVAL;
+	}
+
 	if (trigger_node->params.psample_group != params->psample_group ||
 	    trigger_node->params.truncate != params->truncate ||
 	    trigger_node->params.rate != params->rate ||
-- 
cgit v1.2.3


From 7ede22e6583290c832306e23414cc2c1e336c4b7 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 29 Mar 2021 13:09:48 +0300
Subject: selftests: mlxsw: Test vetoing of double sampling

Test that two sampling rules cannot be configured on the same port with
the same trigger.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/mlxsw/tc_sample.sh       | 30 ++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
index 57b05f042787..093bed088ad0 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
@@ -34,6 +34,7 @@ lib_dir=$(dirname $0)/../../../net/forwarding
 ALL_TESTS="
 	tc_sample_rate_test
 	tc_sample_max_rate_test
+	tc_sample_conflict_test
 	tc_sample_group_conflict_test
 	tc_sample_md_iif_test
 	tc_sample_md_lag_iif_test
@@ -272,6 +273,35 @@ tc_sample_max_rate_test()
 	log_test "tc sample maximum rate"
 }
 
+tc_sample_conflict_test()
+{
+	RET=0
+
+	# Test that two sampling rules cannot be configured on the same port,
+	# even when they share the same parameters.
+
+	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+		skip_sw action sample rate 1024 group 1
+	check_err $? "Failed to configure sampling rule"
+
+	tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
+		skip_sw action sample rate 1024 group 1 &> /dev/null
+	check_fail $? "Managed to configure second sampling rule"
+
+	# Delete the first rule and make sure the second rule can now be
+	# configured.
+
+	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+
+	tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
+		skip_sw action sample rate 1024 group 1
+	check_err $? "Failed to configure sampling rule after deletion"
+
+	log_test "tc sample conflict test"
+
+	tc filter del dev $rp1 ingress protocol all pref 2 handle 102 matchall
+}
+
 tc_sample_group_conflict_test()
 {
 	RET=0
-- 
cgit v1.2.3


From 4db0964a75a2835c7e4a5051cf99e0e63f9775f9 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 29 Mar 2021 20:40:46 +0800
Subject: net: phy: Correct function name mdiobus_register_board_info() in
 comment

Fix the following make W=1 kernel build warning:

 drivers/net/phy/mdio-boardinfo.c:63: warning: expecting prototype for mdio_register_board_info(). Prototype was for mdiobus_register_board_info() instead

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-boardinfo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/mdio-boardinfo.c b/drivers/net/phy/mdio-boardinfo.c
index 033df435f76c..2de679a68115 100644
--- a/drivers/net/phy/mdio-boardinfo.c
+++ b/drivers/net/phy/mdio-boardinfo.c
@@ -50,7 +50,7 @@ void mdiobus_setup_mdiodev_from_board_info(struct mii_bus *bus,
 EXPORT_SYMBOL(mdiobus_setup_mdiodev_from_board_info);
 
 /**
- * mdio_register_board_info - register MDIO devices for a given board
+ * mdiobus_register_board_info - register MDIO devices for a given board
  * @info: array of devices descriptors
  * @n: number of descriptors provided
  * Context: can sleep
-- 
cgit v1.2.3


From acf61b3d84ccb325924a1ae320403f82858dba0f Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 29 Mar 2021 20:42:57 +0800
Subject: net: bonding: Correct function name bond_change_active_slave() in
 comment

Fix the following make W=1 kernel build warning:

 drivers/net/bonding/bond_main.c:982: warning: expecting prototype for change_active_interface(). Prototype was for bond_change_active_slave() instead

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 74cbbb22470b..d5ca38aa8aa9 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -964,7 +964,7 @@ static bool bond_should_notify_peers(struct bonding *bond)
 }
 
 /**
- * change_active_interface - change the active slave into the specified one
+ * bond_change_active_slave - change the active slave into the specified one
  * @bond: our bonding struct
  * @new_active: the new slave to make the active one
  *
-- 
cgit v1.2.3


From 177cb7876dced42e7ab8736e108afd1fe8dc03ea Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 29 Mar 2021 20:44:27 +0800
Subject: net: mdio: Correct function name mdio45_links_ok() in comment

Fix the following make W=1 kernel build warning:

 drivers/net/mdio.c:95: warning: expecting prototype for mdio_link_ok(). Prototype was for mdio45_links_ok() instead

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mdio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/mdio.c b/drivers/net/mdio.c
index 5e72cc55afbd..e08c90ac0c6e 100644
--- a/drivers/net/mdio.c
+++ b/drivers/net/mdio.c
@@ -83,7 +83,7 @@ int mdio_set_flag(const struct mdio_if_info *mdio,
 EXPORT_SYMBOL(mdio_set_flag);
 
 /**
- * mdio_link_ok - is link status up/OK
+ * mdio45_links_ok - is link status up/OK
  * @mdio: MDIO interface
  * @mmd_mask: Mask for MMDs to check
  *
-- 
cgit v1.2.3


From 87f2c6716f6408b9992d7f2247d1fcc190de2c92 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Mon, 29 Mar 2021 17:57:31 +0200
Subject: Documentation: net: Document resilient next-hop groups

Add a document describing the principles behind resilient next-hop groups,
and some notes about how to configure and offload them.

Suggested-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst                 |   1 +
 .../networking/nexthop-group-resilient.rst         | 293 +++++++++++++++++++++
 2 files changed, 294 insertions(+)
 create mode 100644 Documentation/networking/nexthop-group-resilient.rst

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index b8a29997d433..e9ce55992aa9 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -76,6 +76,7 @@ Contents:
    netdevices
    netfilter-sysctl
    netif-msg
+   nexthop-group-resilient
    nf_conntrack-sysctl
    nf_flowtable
    openvswitch
diff --git a/Documentation/networking/nexthop-group-resilient.rst b/Documentation/networking/nexthop-group-resilient.rst
new file mode 100644
index 000000000000..fabecee24d85
--- /dev/null
+++ b/Documentation/networking/nexthop-group-resilient.rst
@@ -0,0 +1,293 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================
+Resilient Next-hop Groups
+=========================
+
+Resilient groups are a type of next-hop group that is aimed at minimizing
+disruption in flow routing across changes to the group composition and
+weights of constituent next hops.
+
+The idea behind resilient hashing groups is best explained in contrast to
+the legacy multipath next-hop group, which uses the hash-threshold
+algorithm, described in RFC 2992.
+
+To select a next hop, hash-threshold algorithm first assigns a range of
+hashes to each next hop in the group, and then selects the next hop by
+comparing the SKB hash with the individual ranges. When a next hop is
+removed from the group, the ranges are recomputed, which leads to
+reassignment of parts of hash space from one next hop to another. RFC 2992
+illustrates it thus::
+
+             +-------+-------+-------+-------+-------+
+             |   1   |   2   |   3   |   4   |   5   |
+             +-------+-+-----+---+---+-----+-+-------+
+             |    1    |    2    |    4    |    5    |
+             +---------+---------+---------+---------+
+
+              Before and after deletion of next hop 3
+	      under the hash-threshold algorithm.
+
+Note how next hop 2 gave up part of the hash space in favor of next hop 1,
+and 4 in favor of 5. While there will usually be some overlap between the
+previous and the new distribution, some traffic flows change the next hop
+that they resolve to.
+
+If a multipath group is used for load-balancing between multiple servers,
+this hash space reassignment causes an issue that packets from a single
+flow suddenly end up arriving at a server that does not expect them. This
+can result in TCP connections being reset.
+
+If a multipath group is used for load-balancing among available paths to
+the same server, the issue is that different latencies and reordering along
+the way causes the packets to arrive in the wrong order, resulting in
+degraded application performance.
+
+To mitigate the above-mentioned flow redirection, resilient next-hop groups
+insert another layer of indirection between the hash space and its
+constituent next hops: a hash table. The selection algorithm uses SKB hash
+to choose a hash table bucket, then reads the next hop that this bucket
+contains, and forwards traffic there.
+
+This indirection brings an important feature. In the hash-threshold
+algorithm, the range of hashes associated with a next hop must be
+continuous. With a hash table, mapping between the hash table buckets and
+the individual next hops is arbitrary. Therefore when a next hop is deleted
+the buckets that held it are simply reassigned to other next hops::
+
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |1|1|1|1|2|2|2|2|3|3|3|3|4|4|4|4|5|5|5|5|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	                     v v v v
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	    |1|1|1|1|2|2|2|2|1|2|4|5|4|4|4|4|5|5|5|5|
+	    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+	    Before and after deletion of next hop 3
+	    under the resilient hashing algorithm.
+
+When weights of next hops in a group are altered, it may be possible to
+choose a subset of buckets that are currently not used for forwarding
+traffic, and use those to satisfy the new next-hop distribution demands,
+keeping the "busy" buckets intact. This way, established flows are ideally
+kept being forwarded to the same endpoints through the same paths as before
+the next-hop group change.
+
+Algorithm
+---------
+
+In a nutshell, the algorithm works as follows. Each next hop deserves a
+certain number of buckets, according to its weight and the number of
+buckets in the hash table. In accordance with the source code, we will call
+this number a "wants count" of a next hop. In case of an event that might
+cause bucket allocation change, the wants counts for individual next hops
+are updated.
+
+Next hops that have fewer buckets than their wants count, are called
+"underweight". Those that have more are "overweight". If there are no
+overweight (and therefore no underweight) next hops in the group, it is
+said to be "balanced".
+
+Each bucket maintains a last-used timer. Every time a packet is forwarded
+through a bucket, this timer is updated to current jiffies value. One
+attribute of a resilient group is then the "idle timer", which is the
+amount of time that a bucket must not be hit by traffic in order for it to
+be considered "idle". Buckets that are not idle are busy.
+
+After assigning wants counts to next hops, an "upkeep" algorithm runs. For
+buckets:
+
+1) that have no assigned next hop, or
+2) whose next hop has been removed, or
+3) that are idle and their next hop is overweight,
+
+upkeep changes the next hop that the bucket references to one of the
+underweight next hops. If, after considering all buckets in this manner,
+there are still underweight next hops, another upkeep run is scheduled to a
+future time.
+
+There may not be enough "idle" buckets to satisfy the updated wants counts
+of all next hops. Another attribute of a resilient group is the "unbalanced
+timer". This timer can be set to 0, in which case the table will stay out
+of balance until idle buckets do appear, possibly never. If set to a
+non-zero value, the value represents the period of time that the table is
+permitted to stay out of balance.
+
+With this in mind, we update the above list of conditions with one more
+item. Thus buckets:
+
+4) whose next hop is overweight, and the amount of time that the table has
+   been out of balance exceeds the unbalanced timer, if that is non-zero,
+
+\... are migrated as well.
+
+Offloading & Driver Feedback
+----------------------------
+
+When offloading resilient groups, the algorithm that distributes buckets
+among next hops is still the one in SW. Drivers are notified of updates to
+next hop groups in the following three ways:
+
+- Full group notification with the type
+  ``NH_NOTIFIER_INFO_TYPE_RES_TABLE``. This is used just after the group is
+  created and buckets populated for the first time.
+
+- Single-bucket notifications of the type
+  ``NH_NOTIFIER_INFO_TYPE_RES_BUCKET``, which is used for notifications of
+  individual migrations within an already-established group.
+
+- Pre-replace notification, ``NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE``. This
+  is sent before the group is replaced, and is a way for the driver to veto
+  the group before committing anything to the HW.
+
+Some single-bucket notifications are forced, as indicated by the "force"
+flag in the notification. Those are used for the cases where e.g. the next
+hop associated with the bucket was removed, and the bucket really must be
+migrated.
+
+Non-forced notifications can be overridden by the driver by returning an
+error code. The use case for this is that the driver notifies the HW that a
+bucket should be migrated, but the HW discovers that the bucket has in fact
+been hit by traffic.
+
+A second way for the HW to report that a bucket is busy is through the
+``nexthop_res_grp_activity_update()`` API. The buckets identified this way
+as busy are treated as if traffic hit them.
+
+Offloaded buckets should be flagged as either "offload" or "trap". This is
+done through the ``nexthop_bucket_set_hw_flags()`` API.
+
+Netlink UAPI
+------------
+
+Resilient Group Replacement
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Resilient groups are configured using the ``RTM_NEWNEXTHOP`` message in the
+same manner as other multipath groups. The following changes apply to the
+attributes passed in the netlink message:
+
+  =================== =========================================================
+  ``NHA_GROUP_TYPE``  Should be ``NEXTHOP_GRP_TYPE_RES`` for resilient group.
+  ``NHA_RES_GROUP``   A nest that contains attributes specific to resilient
+                      groups.
+  =================== =========================================================
+
+``NHA_RES_GROUP`` payload:
+
+  =================================== =========================================
+  ``NHA_RES_GROUP_BUCKETS``           Number of buckets in the hash table.
+  ``NHA_RES_GROUP_IDLE_TIMER``        Idle timer in units of clock_t.
+  ``NHA_RES_GROUP_UNBALANCED_TIMER``  Unbalanced timer in units of clock_t.
+  =================================== =========================================
+
+Next Hop Get
+^^^^^^^^^^^^
+
+Requests to get resilient next-hop groups use the ``RTM_GETNEXTHOP``
+message in exactly the same way as other next hop get requests. The
+response attributes match the replacement attributes cited above, except
+``NHA_RES_GROUP`` payload will include the following attribute:
+
+  =================================== =========================================
+  ``NHA_RES_GROUP_UNBALANCED_TIME``   How long has the resilient group been out
+                                      of balance, in units of clock_t.
+  =================================== =========================================
+
+Bucket Get
+^^^^^^^^^^
+
+The message ``RTM_GETNEXTHOPBUCKET`` without the ``NLM_F_DUMP`` flag is
+used to request a single bucket. The attributes recognized at get requests
+are:
+
+  =================== =========================================================
+  ``NHA_ID``          ID of the next-hop group that the bucket belongs to.
+  ``NHA_RES_BUCKET``  A nest that contains attributes specific to bucket.
+  =================== =========================================================
+
+``NHA_RES_BUCKET`` payload:
+
+  ======================== ====================================================
+  ``NHA_RES_BUCKET_INDEX`` Index of bucket in the resilient table.
+  ======================== ====================================================
+
+Bucket Dumps
+^^^^^^^^^^^^
+
+The message ``RTM_GETNEXTHOPBUCKET`` with the ``NLM_F_DUMP`` flag is used
+to request a dump of matching buckets. The attributes recognized at dump
+requests are:
+
+  =================== =========================================================
+  ``NHA_ID``          If specified, limits the dump to just the next-hop group
+                      with this ID.
+  ``NHA_OIF``         If specified, limits the dump to buckets that contain
+                      next hops that use the device with this ifindex.
+  ``NHA_MASTER``      If specified, limits the dump to buckets that contain
+                      next hops that use a device in the VRF with this ifindex.
+  ``NHA_RES_BUCKET``  A nest that contains attributes specific to bucket.
+  =================== =========================================================
+
+``NHA_RES_BUCKET`` payload:
+
+  ======================== ====================================================
+  ``NHA_RES_BUCKET_NH_ID`` If specified, limits the dump to just the buckets
+                           that contain the next hop with this ID.
+  ======================== ====================================================
+
+Usage
+-----
+
+To illustrate the usage, consider the following commands::
+
+	# ip nexthop add id 1 via 192.0.2.2 dev eth0
+	# ip nexthop add id 2 via 192.0.2.3 dev eth0
+	# ip nexthop add id 10 group 1/2 type resilient \
+		buckets 8 idle_timer 60 unbalanced_timer 300
+
+The last command creates a resilient next-hop group. It will have 8 buckets
+(which is unusually low number, and used here for demonstration purposes
+only), each bucket will be considered idle when no traffic hits it for at
+least 60 seconds, and if the table remains out of balance for 300 seconds,
+it will be forcefully brought into balance.
+
+Changing next-hop weights leads to change in bucket allocation::
+
+	# ip nexthop replace id 10 group 1,3/2 type resilient
+
+This can be confirmed by looking at individual buckets::
+
+	# ip nexthop bucket show id 10
+	id 10 index 0 idle_time 5.59 nhid 1
+	id 10 index 1 idle_time 5.59 nhid 1
+	id 10 index 2 idle_time 8.74 nhid 2
+	id 10 index 3 idle_time 8.74 nhid 2
+	id 10 index 4 idle_time 8.74 nhid 1
+	id 10 index 5 idle_time 8.74 nhid 1
+	id 10 index 6 idle_time 8.74 nhid 1
+	id 10 index 7 idle_time 8.74 nhid 1
+
+Note the two buckets that have a shorter idle time. Those are the ones that
+were migrated after the next-hop replace command to satisfy the new demand
+that next hop 1 be given 6 buckets instead of 4.
+
+Netdevsim
+---------
+
+The netdevsim driver implements a mock offload of resilient groups, and
+exposes debugfs interface that allows marking individual buckets as busy.
+For example, the following will mark bucket 23 in next-hop group 10 as
+active::
+
+	# echo 10 23 > /sys/kernel/debug/netdevsim/netdevsim10/fib/nexthop_bucket_activity
+
+In addition, another debugfs interface can be used to configure that the
+next attempt to migrate a bucket should fail::
+
+	# echo 1 > /sys/kernel/debug/netdevsim/netdevsim10/fib/fail_nexthop_bucket_replace
+
+Besides serving as an example, the interfaces that netdevsim exposes are
+useful in automated testing, and
+``tools/testing/selftests/drivers/net/netdevsim/nexthop.sh`` makes use of
+them to test the algorithm.
-- 
cgit v1.2.3


From 24ad92c841c9fa548dc944821721944405ef7963 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 29 Mar 2021 12:23:54 +0100
Subject: ieee802154: hwsim: remove redundant initialization of variable res

The variable res is being initialized with a value that is
never read and it is being updated later with a new value.
The initialization is redundant and can be removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ieee802154/mac802154_hwsim.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c
index c0bf7d78276e..da9135231c07 100644
--- a/drivers/net/ieee802154/mac802154_hwsim.c
+++ b/drivers/net/ieee802154/mac802154_hwsim.c
@@ -268,7 +268,7 @@ static int hwsim_get_radio(struct sk_buff *skb, struct hwsim_phy *phy,
 			   struct netlink_callback *cb, int flags)
 {
 	void *hdr;
-	int res = -EMSGSIZE;
+	int res;
 
 	hdr = genlmsg_put(skb, portid, seq, &hwsim_genl_family, flags,
 			  MAC802154_HWSIM_CMD_GET_RADIO);
-- 
cgit v1.2.3


From d9f0713c9217fdd31077f890c2e15232ad2f0772 Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@linaro.org>
Date: Mon, 29 Mar 2021 17:39:31 +0200
Subject: net: mhi: Add support for non-linear MBIM skb processing

Currently, if skb is non-linear, due to MHI skb chaining, it is
linearized in MBIM RX handler prior MBIM decoding, causing extra
allocation and copy that can be as large as the maximum MBIM frame
size (32K).

This change introduces MBIM decoding for non-linear skb, allowing to
process 'large' non-linear MBIM packets without skb linearization.
The IP packets are simply extracted from the MBIM frame using the
skb_copy_bits helper.

Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mhi/proto_mbim.c | 51 ++++++++++++++++++++++----------------------
 1 file changed, 25 insertions(+), 26 deletions(-)

diff --git a/drivers/net/mhi/proto_mbim.c b/drivers/net/mhi/proto_mbim.c
index 75b5484c40d5..5a176c3eed53 100644
--- a/drivers/net/mhi/proto_mbim.c
+++ b/drivers/net/mhi/proto_mbim.c
@@ -91,20 +91,11 @@ static int mbim_rx_verify_nth16(struct sk_buff *skb)
 	return le16_to_cpu(nth16->wNdpIndex);
 }
 
-static int mbim_rx_verify_ndp16(struct sk_buff *skb, int ndpoffset)
+static int mbim_rx_verify_ndp16(struct sk_buff *skb, struct usb_cdc_ncm_ndp16 *ndp16)
 {
 	struct mhi_net_dev *dev = netdev_priv(skb->dev);
-	struct usb_cdc_ncm_ndp16 *ndp16;
 	int ret;
 
-	if (ndpoffset + sizeof(struct usb_cdc_ncm_ndp16) > skb->len) {
-		netif_dbg(dev, rx_err, dev->ndev, "invalid NDP offset  <%u>\n",
-			  ndpoffset);
-		return -EINVAL;
-	}
-
-	ndp16 = (struct usb_cdc_ncm_ndp16 *)(skb->data + ndpoffset);
-
 	if (le16_to_cpu(ndp16->wLength) < USB_CDC_NCM_NDP16_LENGTH_MIN) {
 		netif_dbg(dev, rx_err, dev->ndev, "invalid DPT16 length <%u>\n",
 			  le16_to_cpu(ndp16->wLength));
@@ -130,9 +121,6 @@ static void mbim_rx(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb)
 	struct net_device *ndev = mhi_netdev->ndev;
 	int ndpoffset;
 
-	if (skb_linearize(skb))
-		goto error;
-
 	/* Check NTB header and retrieve first NDP offset */
 	ndpoffset = mbim_rx_verify_nth16(skb);
 	if (ndpoffset < 0) {
@@ -142,12 +130,19 @@ static void mbim_rx(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb)
 
 	/* Process each NDP */
 	while (1) {
-		struct usb_cdc_ncm_ndp16 *ndp16;
-		struct usb_cdc_ncm_dpe16 *dpe16;
-		int nframes, n;
+		struct usb_cdc_ncm_ndp16 ndp16;
+		struct usb_cdc_ncm_dpe16 dpe16;
+		int nframes, n, dpeoffset;
+
+		if (skb_copy_bits(skb, ndpoffset, &ndp16, sizeof(ndp16))) {
+			net_err_ratelimited("%s: Incorrect NDP offset (%u)\n",
+					    ndev->name, ndpoffset);
+			__mbim_length_errors_inc(mhi_netdev);
+			goto error;
+		}
 
 		/* Check NDP header and retrieve number of datagrams */
-		nframes = mbim_rx_verify_ndp16(skb, ndpoffset);
+		nframes = mbim_rx_verify_ndp16(skb, &ndp16);
 		if (nframes < 0) {
 			net_err_ratelimited("%s: Incorrect NDP16\n", ndev->name);
 			__mbim_length_errors_inc(mhi_netdev);
@@ -155,8 +150,7 @@ static void mbim_rx(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb)
 		}
 
 		 /* Only IP data type supported, no DSS in MHI context */
-		ndp16 = (struct usb_cdc_ncm_ndp16 *)(skb->data + ndpoffset);
-		if ((ndp16->dwSignature & cpu_to_le32(MBIM_NDP16_SIGN_MASK))
+		if ((ndp16.dwSignature & cpu_to_le32(MBIM_NDP16_SIGN_MASK))
 				!= cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN)) {
 			net_err_ratelimited("%s: Unsupported NDP type\n", ndev->name);
 			__mbim_errors_inc(mhi_netdev);
@@ -164,19 +158,24 @@ static void mbim_rx(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb)
 		}
 
 		/* Only primary IP session 0 (0x00) supported for now */
-		if (ndp16->dwSignature & ~cpu_to_le32(MBIM_NDP16_SIGN_MASK)) {
+		if (ndp16.dwSignature & ~cpu_to_le32(MBIM_NDP16_SIGN_MASK)) {
 			net_err_ratelimited("%s: bad packet session\n", ndev->name);
 			__mbim_errors_inc(mhi_netdev);
 			goto next_ndp;
 		}
 
 		/* de-aggregate and deliver IP packets */
-		dpe16 = ndp16->dpe16;
-		for (n = 0; n < nframes; n++, dpe16++) {
-			u16 dgram_offset = le16_to_cpu(dpe16->wDatagramIndex);
-			u16 dgram_len = le16_to_cpu(dpe16->wDatagramLength);
+		dpeoffset = ndpoffset + sizeof(struct usb_cdc_ncm_ndp16);
+		for (n = 0; n < nframes; n++, dpeoffset += sizeof(dpe16)) {
+			u16 dgram_offset, dgram_len;
 			struct sk_buff *skbn;
 
+			if (skb_copy_bits(skb, dpeoffset, &dpe16, sizeof(dpe16)))
+				break;
+
+			dgram_offset = le16_to_cpu(dpe16.wDatagramIndex);
+			dgram_len = le16_to_cpu(dpe16.wDatagramLength);
+
 			if (!dgram_offset || !dgram_len)
 				break; /* null terminator */
 
@@ -185,7 +184,7 @@ static void mbim_rx(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb)
 				continue;
 
 			skb_put(skbn, dgram_len);
-			memcpy(skbn->data, skb->data + dgram_offset, dgram_len);
+			skb_copy_bits(skb, dgram_offset, skbn->data, dgram_len);
 
 			switch (skbn->data[0] & 0xf0) {
 			case 0x40:
@@ -206,7 +205,7 @@ static void mbim_rx(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb)
 		}
 next_ndp:
 		/* Other NDP to process? */
-		ndpoffset = (int)le16_to_cpu(ndp16->wNextNdpIndex);
+		ndpoffset = (int)le16_to_cpu(ndp16.wNextNdpIndex);
 		if (!ndpoffset)
 			break;
 	}
-- 
cgit v1.2.3


From 3af562a37b7f1a5dbeb50a00ace280ba2d984762 Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@linaro.org>
Date: Mon, 29 Mar 2021 17:39:32 +0200
Subject: net: mhi: Allow decoupled MTU/MRU

MBIM protocol makes the mhi network interface asymmetric, ingress data
received from MHI is MBIM protocol, possibly containing multiple
aggregated IP packets, while egress data received from network stack is
IP protocol.

This changes allows a 'protocol' to specify its own MRU, that when
specified is used to allocate MHI RX buffers (skb).

For MBIM, Set the default MTU to 1500, which is the usual network MTU
for WWAN IP packets, and MRU to 3.5K (for allocation efficiency),
allowing skb to fit in an usual 4K page (including padding,
skb_shared_info, ...).

Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mhi/mhi.h        |  1 +
 drivers/net/mhi/net.c        |  4 +++-
 drivers/net/mhi/proto_mbim.c | 11 +++++++++++
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mhi/mhi.h b/drivers/net/mhi/mhi.h
index 12e7407d712a..1d0c499d27a3 100644
--- a/drivers/net/mhi/mhi.h
+++ b/drivers/net/mhi/mhi.h
@@ -29,6 +29,7 @@ struct mhi_net_dev {
 	struct mhi_net_stats stats;
 	u32 rx_queue_sz;
 	int msg_enable;
+	unsigned int mru;
 };
 
 struct mhi_net_proto {
diff --git a/drivers/net/mhi/net.c b/drivers/net/mhi/net.c
index f59960876083..5ec7a29f668d 100644
--- a/drivers/net/mhi/net.c
+++ b/drivers/net/mhi/net.c
@@ -265,10 +265,12 @@ static void mhi_net_rx_refill_work(struct work_struct *work)
 						      rx_refill.work);
 	struct net_device *ndev = mhi_netdev->ndev;
 	struct mhi_device *mdev = mhi_netdev->mdev;
-	int size = READ_ONCE(ndev->mtu);
 	struct sk_buff *skb;
+	unsigned int size;
 	int err;
 
+	size = mhi_netdev->mru ? mhi_netdev->mru : READ_ONCE(ndev->mtu);
+
 	while (!mhi_queue_is_full(mdev, DMA_FROM_DEVICE)) {
 		skb = netdev_alloc_skb(ndev, size);
 		if (unlikely(!skb))
diff --git a/drivers/net/mhi/proto_mbim.c b/drivers/net/mhi/proto_mbim.c
index 5a176c3eed53..fc72b3f6ec9e 100644
--- a/drivers/net/mhi/proto_mbim.c
+++ b/drivers/net/mhi/proto_mbim.c
@@ -26,6 +26,15 @@
 
 #define MBIM_NDP16_SIGN_MASK 0x00ffffff
 
+/* Usual WWAN MTU */
+#define MHI_MBIM_DEFAULT_MTU 1500
+
+/* 3500 allows to optimize skb allocation, the skbs will basically fit in
+ * one 4K page. Large MBIM packets will simply be split over several MHI
+ * transfers and chained by the MHI net layer (zerocopy).
+ */
+#define MHI_MBIM_DEFAULT_MRU 3500
+
 struct mbim_context {
 	u16 rx_seq;
 	u16 tx_seq;
@@ -281,6 +290,8 @@ static int mbim_init(struct mhi_net_dev *mhi_netdev)
 		return -ENOMEM;
 
 	ndev->needed_headroom = sizeof(struct mbim_tx_hdr);
+	ndev->mtu = MHI_MBIM_DEFAULT_MTU;
+	mhi_netdev->mru = MHI_MBIM_DEFAULT_MRU;
 
 	return 0;
 }
-- 
cgit v1.2.3


From 02fdc14d9bf15d2a2b6aab1f3fd247fe2d70cf1b Mon Sep 17 00:00:00 2001
From: Jon Maloy <jmaloy@redhat.com>
Date: Mon, 29 Mar 2021 13:17:31 -0400
Subject: tipc: fix htmldoc and smatch warnings

We fix a warning from the htmldoc tool and an indentation error reported
by smatch. There are no functional changes in this commit.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 2 +-
 net/tipc/subscr.h | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 117a472a8e61..f21162aa0cf7 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1450,7 +1450,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 		ua = (struct tipc_uaddr *)&tsk->peer;
 		if (!syn && ua->family != AF_TIPC)
 			return -EDESTADDRREQ;
-		 atype = ua->addrtype;
+		atype = ua->addrtype;
 	}
 
 	if (unlikely(syn)) {
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index ddea6554ec46..60b877531b66 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -49,12 +49,13 @@ struct tipc_conn;
 
 /**
  * struct tipc_subscription - TIPC network topology subscription object
+ * @s: host-endian copy of the user subscription
+ * @evt: template for events generated by subscription
  * @kref: reference count for this subscription
  * @net: network namespace associated with subscription
  * @timer: timer governing subscription duration (optional)
  * @service_list: adjacent subscriptions in name sequence's subscription list
  * @sub_list: adjacent subscriptions in subscriber's subscription list
- * @evt: template for events generated by subscription
  * @conid: connection identifier of topology server
  * @inactive: true if this subscription is inactive
  * @lock: serialize up/down and timer events
-- 
cgit v1.2.3


From 7f700334be9aeb91d5d86ef9ad2d901b9b453e9b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 29 Mar 2021 11:39:51 -0700
Subject: ip6_gre: proper dev_{hold|put} in ndo_[un]init methods

After adopting CONFIG_PCPU_DEV_REFCNT=n option, syzbot was able to trigger
a warning [1]

Issue here is that:

- all dev_put() should be paired with a corresponding dev_hold(),
  and vice versa.

- A driver doing a dev_put() in its ndo_uninit() MUST also
  do a dev_hold() in its ndo_init(), only when ndo_init()
  is returning 0.

Otherwise, register_netdevice() would call ndo_uninit()
in its error path and release a refcount too soon.

ip6_gre for example (among others problematic drivers)
has to use dev_hold() in ip6gre_tunnel_init_common()
instead of from ip6gre_newlink_common(), covering
both ip6gre_tunnel_init() and ip6gre_tap_init()/

Note that ip6gre_tunnel_init_common() is not called from
ip6erspan_tap_init() thus we also need to add a dev_hold() there,
as ip6erspan_tunnel_uninit() does call dev_put()

[1]
refcount_t: decrement hit 0; leaking memory.
WARNING: CPU: 0 PID: 8422 at lib/refcount.c:31 refcount_warn_saturate+0xbf/0x1e0 lib/refcount.c:31
Modules linked in:
CPU: 1 PID: 8422 Comm: syz-executor854 Not tainted 5.12.0-rc4-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:refcount_warn_saturate+0xbf/0x1e0 lib/refcount.c:31
Code: 1d 6a 5a e8 09 31 ff 89 de e8 8d 1a ab fd 84 db 75 e0 e8 d4 13 ab fd 48 c7 c7 a0 e1 c1 89 c6 05 4a 5a e8 09 01 e8 2e 36 fb 04 <0f> 0b eb c4 e8 b8 13 ab fd 0f b6 1d 39 5a e8 09 31 ff 89 de e8 58
RSP: 0018:ffffc900018befd0 EFLAGS: 00010282
RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
RDX: ffff88801ef19c40 RSI: ffffffff815c51f5 RDI: fffff52000317dec
RBP: 0000000000000004 R08: 0000000000000000 R09: 0000000000000000
R10: ffffffff815bdf8e R11: 0000000000000000 R12: ffff888018cf4568
R13: ffff888018cf4c00 R14: ffff8880228f2000 R15: ffffffff8d659b80
FS:  00000000014eb300(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000055d7bf2b3138 CR3: 0000000014933000 CR4: 00000000001506f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 __refcount_dec include/linux/refcount.h:344 [inline]
 refcount_dec include/linux/refcount.h:359 [inline]
 dev_put include/linux/netdevice.h:4135 [inline]
 ip6gre_tunnel_uninit+0x3d7/0x440 net/ipv6/ip6_gre.c:420
 register_netdevice+0xadf/0x1500 net/core/dev.c:10308
 ip6gre_newlink_common.constprop.0+0x158/0x410 net/ipv6/ip6_gre.c:1984
 ip6gre_newlink+0x275/0x7a0 net/ipv6/ip6_gre.c:2017
 __rtnl_newlink+0x1062/0x1710 net/core/rtnetlink.c:3443
 rtnl_newlink+0x64/0xa0 net/core/rtnetlink.c:3491
 rtnetlink_rcv_msg+0x44e/0xad0 net/core/rtnetlink.c:5553
 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2502
 netlink_unicast_kernel net/netlink/af_netlink.c:1312 [inline]
 netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1338
 netlink_sendmsg+0x856/0xd90 net/netlink/af_netlink.c:1927
 sock_sendmsg_nosec net/socket.c:654 [inline]
 sock_sendmsg+0xcf/0x120 net/socket.c:674
 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2350
 ___sys_sendmsg+0xf3/0x170 net/socket.c:2404
 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2433
 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46

Fixes: 919067cc845f ("net: add CONFIG_PCPU_DEV_REFCNT")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_gre.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 1baf43aacb2e..9689bf9f46f3 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1496,6 +1496,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
 	}
 	ip6gre_tnl_init_features(dev);
 
+	dev_hold(dev);
 	return 0;
 
 cleanup_dst_cache_init:
@@ -1889,6 +1890,7 @@ static int ip6erspan_tap_init(struct net_device *dev)
 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 	ip6erspan_tnl_link_config(tunnel, 1);
 
+	dev_hold(dev);
 	return 0;
 
 cleanup_dst_cache_init:
@@ -1988,8 +1990,6 @@ static int ip6gre_newlink_common(struct net *src_net, struct net_device *dev,
 	if (tb[IFLA_MTU])
 		ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
 
-	dev_hold(dev);
-
 out:
 	return err;
 }
-- 
cgit v1.2.3


From 40cb881b5aaa0b69a7d93dec8440d5c62dae299f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 29 Mar 2021 12:12:54 -0700
Subject: ip6_vti: proper dev_{hold|put} in ndo_[un]init methods

After adopting CONFIG_PCPU_DEV_REFCNT=n option, syzbot was able to trigger
a warning [1]

Issue here is that:

- all dev_put() should be paired with a corresponding prior dev_hold().

- A driver doing a dev_put() in its ndo_uninit() MUST also
  do a dev_hold() in its ndo_init(), only when ndo_init()
  is returning 0.

Otherwise, register_netdevice() would call ndo_uninit()
in its error path and release a refcount too soon.

Therefore, we need to move dev_hold() call from
vti6_tnl_create2() to vti6_dev_init_gen()

[1]
WARNING: CPU: 0 PID: 15951 at lib/refcount.c:31 refcount_warn_saturate+0xbf/0x1e0 lib/refcount.c:31
Modules linked in:
CPU: 0 PID: 15951 Comm: syz-executor.3 Not tainted 5.12.0-rc4-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:refcount_warn_saturate+0xbf/0x1e0 lib/refcount.c:31
Code: 1d 6a 5a e8 09 31 ff 89 de e8 8d 1a ab fd 84 db 75 e0 e8 d4 13 ab fd 48 c7 c7 a0 e1 c1 89 c6 05 4a 5a e8 09 01 e8 2e 36 fb 04 <0f> 0b eb c4 e8 b8 13 ab fd 0f b6 1d 39 5a e8 09 31 ff 89 de e8 58
RSP: 0018:ffffc90001eaef28 EFLAGS: 00010282
RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
RDX: 0000000000040000 RSI: ffffffff815c51f5 RDI: fffff520003d5dd7
RBP: 0000000000000004 R08: 0000000000000000 R09: 0000000000000000
R10: ffffffff815bdf8e R11: 0000000000000000 R12: ffff88801bb1c568
R13: ffff88801f69e800 R14: 00000000ffffffff R15: ffff888050889d40
FS:  00007fc79314e700(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f1c1ff47108 CR3: 0000000020fd5000 CR4: 00000000001506f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 __refcount_dec include/linux/refcount.h:344 [inline]
 refcount_dec include/linux/refcount.h:359 [inline]
 dev_put include/linux/netdevice.h:4135 [inline]
 vti6_dev_uninit+0x31a/0x360 net/ipv6/ip6_vti.c:297
 register_netdevice+0xadf/0x1500 net/core/dev.c:10308
 vti6_tnl_create2+0x1b5/0x400 net/ipv6/ip6_vti.c:190
 vti6_newlink+0x9d/0xd0 net/ipv6/ip6_vti.c:1020
 __rtnl_newlink+0x1062/0x1710 net/core/rtnetlink.c:3443
 rtnl_newlink+0x64/0xa0 net/core/rtnetlink.c:3491
 rtnetlink_rcv_msg+0x44e/0xad0 net/core/rtnetlink.c:5553
 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2502
 netlink_unicast_kernel net/netlink/af_netlink.c:1312 [inline]
 netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1338
 netlink_sendmsg+0x856/0xd90 net/netlink/af_netlink.c:1927
 sock_sendmsg_nosec net/socket.c:654 [inline]
 sock_sendmsg+0xcf/0x120 net/socket.c:674
 ____sys_sendmsg+0x331/0x810 net/socket.c:2350
 ___sys_sendmsg+0xf3/0x170 net/socket.c:2404
 __sys_sendmmsg+0x195/0x470 net/socket.c:2490
 __do_sys_sendmmsg net/socket.c:2519 [inline]
 __se_sys_sendmmsg net/socket.c:2516 [inline]
 __x64_sys_sendmmsg+0x99/0x100 net/socket.c:2516

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_vti.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index f10e7a72ea62..a018afdb3e06 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -193,7 +193,6 @@ static int vti6_tnl_create2(struct net_device *dev)
 
 	strcpy(t->parms.name, dev->name);
 
-	dev_hold(dev);
 	vti6_tnl_link(ip6n, t);
 
 	return 0;
@@ -932,6 +931,7 @@ static inline int vti6_dev_init_gen(struct net_device *dev)
 	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
 	if (!dev->tstats)
 		return -ENOMEM;
+	dev_hold(dev);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 6289a98f0817a4a457750d6345e754838eae9439 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 29 Mar 2021 12:25:22 -0700
Subject: sit: proper dev_{hold|put} in ndo_[un]init methods

After adopting CONFIG_PCPU_DEV_REFCNT=n option, syzbot was able to trigger
a warning [1]

Issue here is that:

- all dev_put() should be paired with a corresponding prior dev_hold().

- A driver doing a dev_put() in its ndo_uninit() MUST also
  do a dev_hold() in its ndo_init(), only when ndo_init()
  is returning 0.

Otherwise, register_netdevice() would call ndo_uninit()
in its error path and release a refcount too soon.

Fixes: 919067cc845f ("net: add CONFIG_PCPU_DEV_REFCNT")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index b9bd2723f89a..488d3181aec3 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -218,8 +218,6 @@ static int ipip6_tunnel_create(struct net_device *dev)
 
 	ipip6_tunnel_clone_6rd(dev, sitn);
 
-	dev_hold(dev);
-
 	ipip6_tunnel_link(sitn, t);
 	return 0;
 
@@ -1456,7 +1454,7 @@ static int ipip6_tunnel_init(struct net_device *dev)
 		dev->tstats = NULL;
 		return err;
 	}
-
+	dev_hold(dev);
 	return 0;
 }
 
-- 
cgit v1.2.3


From d24f511b04b8b159b705ec32a3b8782667d1b06a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 29 Mar 2021 10:40:49 -0700
Subject: tcp: fix tcp_min_tso_segs sysctl

tcp_min_tso_segs is now stored in u8, so max value is 255.

255 limit is enforced by proc_dou8vec_minmax().

We can therefore remove the gso_max_segs variable.

Fixes: 47996b489bdc ("tcp: convert elligible sysctls to u8")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/sysctl_net_ipv4.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 442ff4be1bde..3a7e5cf5d6cc 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -31,7 +31,6 @@
 static int two = 2;
 static int four = 4;
 static int thousand = 1000;
-static int gso_max_segs = GSO_MAX_SEGS;
 static int tcp_retr1_max = 255;
 static int ip_local_port_range_min[] = { 1, 1 };
 static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -1245,7 +1244,6 @@ static struct ctl_table ipv4_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dou8vec_minmax,
 		.extra1		= SYSCTL_ONE,
-		.extra2		= &gso_max_segs,
 	},
 	{
 		.procname	= "tcp_min_rtt_wlen",
-- 
cgit v1.2.3


From d0922bf7981799fd86e248de330fb4152399d6c2 Mon Sep 17 00:00:00 2001
From: Haiyang Zhang <haiyangz@microsoft.com>
Date: Mon, 29 Mar 2021 16:21:35 -0700
Subject: hv_netvsc: Add error handling while switching data path

Add error handling in case of failure to send switching data path message
to the host.

Reported-by: Shachar Raindel <shacharr@microsoft.com>
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/hyperv_net.h |  6 +++++-
 drivers/net/hyperv/netvsc.c     | 35 +++++++++++++++++++++++++++++++----
 drivers/net/hyperv/netvsc_drv.c | 18 ++++++++++++------
 3 files changed, 48 insertions(+), 11 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 59ac04a610ad..442c520ab8f3 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -269,7 +269,7 @@ int rndis_filter_receive(struct net_device *ndev,
 int rndis_filter_set_device_mac(struct netvsc_device *ndev,
 				const char *mac);
 
-void netvsc_switch_datapath(struct net_device *nv_dev, bool vf);
+int netvsc_switch_datapath(struct net_device *nv_dev, bool vf);
 
 #define NVSP_INVALID_PROTOCOL_VERSION	((u32)0xFFFFFFFF)
 
@@ -1718,4 +1718,8 @@ struct rndis_message {
 #define TRANSPORT_INFO_IPV6_TCP 0x10
 #define TRANSPORT_INFO_IPV6_UDP 0x20
 
+#define RETRY_US_LO	5000
+#define RETRY_US_HI	10000
+#define RETRY_MAX	2000	/* >10 sec */
+
 #endif /* _HYPERV_NET_H */
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 5bce24731502..9d07c9ce4be2 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -31,12 +31,13 @@
  * Switch the data path from the synthetic interface to the VF
  * interface.
  */
-void netvsc_switch_datapath(struct net_device *ndev, bool vf)
+int netvsc_switch_datapath(struct net_device *ndev, bool vf)
 {
 	struct net_device_context *net_device_ctx = netdev_priv(ndev);
 	struct hv_device *dev = net_device_ctx->device_ctx;
 	struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev);
 	struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt;
+	int ret, retry = 0;
 
 	/* Block sending traffic to VF if it's about to be gone */
 	if (!vf)
@@ -51,15 +52,41 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
 		init_pkt->msg.v4_msg.active_dp.active_datapath =
 			NVSP_DATAPATH_SYNTHETIC;
 
+again:
 	trace_nvsp_send(ndev, init_pkt);
 
-	vmbus_sendpacket(dev->channel, init_pkt,
+	ret = vmbus_sendpacket(dev->channel, init_pkt,
 			       sizeof(struct nvsp_message),
-			       (unsigned long)init_pkt,
-			       VM_PKT_DATA_INBAND,
+			       (unsigned long)init_pkt, VM_PKT_DATA_INBAND,
 			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+
+	/* If failed to switch to/from VF, let data_path_is_vf stay false,
+	 * so we use synthetic path to send data.
+	 */
+	if (ret) {
+		if (ret != -EAGAIN) {
+			netdev_err(ndev,
+				   "Unable to send sw datapath msg, err: %d\n",
+				   ret);
+			return ret;
+		}
+
+		if (retry++ < RETRY_MAX) {
+			usleep_range(RETRY_US_LO, RETRY_US_HI);
+			goto again;
+		} else {
+			netdev_err(
+				ndev,
+				"Retry failed to send sw datapath msg, err: %d\n",
+				ret);
+			return ret;
+		}
+	}
+
 	wait_for_completion(&nv_dev->channel_init_wait);
 	net_device_ctx->data_path_is_vf = vf;
+
+	return 0;
 }
 
 /* Worker to setup sub channels on initial setup
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 97b5c9b60503..7349a70af083 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -38,9 +38,6 @@
 #include "hyperv_net.h"
 
 #define RING_SIZE_MIN	64
-#define RETRY_US_LO	5000
-#define RETRY_US_HI	10000
-#define RETRY_MAX	2000	/* >10 sec */
 
 #define LINKCHANGE_INT (2 * HZ)
 #define VF_TAKEOVER_INT (HZ / 10)
@@ -2402,6 +2399,7 @@ static int netvsc_vf_changed(struct net_device *vf_netdev, unsigned long event)
 	struct netvsc_device *netvsc_dev;
 	struct net_device *ndev;
 	bool vf_is_up = false;
+	int ret;
 
 	if (event != NETDEV_GOING_DOWN)
 		vf_is_up = netif_running(vf_netdev);
@@ -2418,9 +2416,17 @@ static int netvsc_vf_changed(struct net_device *vf_netdev, unsigned long event)
 	if (net_device_ctx->data_path_is_vf == vf_is_up)
 		return NOTIFY_OK;
 
-	netvsc_switch_datapath(ndev, vf_is_up);
-	netdev_info(ndev, "Data path switched %s VF: %s\n",
-		    vf_is_up ? "to" : "from", vf_netdev->name);
+	ret = netvsc_switch_datapath(ndev, vf_is_up);
+
+	if (ret) {
+		netdev_err(ndev,
+			   "Data path failed to switch %s VF: %s, err: %d\n",
+			   vf_is_up ? "to" : "from", vf_netdev->name, ret);
+		return NOTIFY_DONE;
+	} else {
+		netdev_info(ndev, "Data path switched %s VF: %s\n",
+			    vf_is_up ? "to" : "from", vf_netdev->name);
+	}
 
 	return NOTIFY_OK;
 }
-- 
cgit v1.2.3


From 7aae231ac93b9d9c45487dcafd844fa756069f3b Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Mon, 29 Mar 2021 15:13:57 -0700
Subject: bpf: tcp: Limit calling some tcp cc functions to
 CONFIG_DYNAMIC_FTRACE

pahole currently only generates the btf_id for external function and
ftrace-able function.  Some functions in the bpf_tcp_ca_kfunc_ids
are static (e.g. cubictcp_init).  Thus, unless CONFIG_DYNAMIC_FTRACE
is set, btf_ids for those functions will not be generated and the
compilation fails during resolve_btfids.

This patch limits those functions to CONFIG_DYNAMIC_FTRACE.  I will
address the pahole generation in a followup and then remove the
CONFIG_DYNAMIC_FTRACE limitation.

Fixes: e78aea8b2170 ("bpf: tcp: Put some tcp cong functions in allowlist for bpf-tcp-cc")
Reported-by: Cong Wang <xiyou.wangcong@gmail.com>
Reported-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210329221357.834438-1-kafai@fb.com
---
 net/ipv4/bpf_tcp_ca.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 6bb7b335ff9f..dff4f0eb96b0 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -185,6 +185,7 @@ BTF_ID(func, tcp_reno_cong_avoid)
 BTF_ID(func, tcp_reno_undo_cwnd)
 BTF_ID(func, tcp_slow_start)
 BTF_ID(func, tcp_cong_avoid_ai)
+#ifdef CONFIG_DYNAMIC_FTRACE
 #if IS_BUILTIN(CONFIG_TCP_CONG_CUBIC)
 BTF_ID(func, cubictcp_init)
 BTF_ID(func, cubictcp_recalc_ssthresh)
@@ -211,6 +212,7 @@ BTF_ID(func, bbr_ssthresh)
 BTF_ID(func, bbr_min_tso_segs)
 BTF_ID(func, bbr_set_state)
 #endif
+#endif  /* CONFIG_DYNAMIC_FTRACE */
 BTF_SET_END(bpf_tcp_ca_kfunc_ids)
 
 static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id)
-- 
cgit v1.2.3


From 24c22dd0918bd136b7bea0f3a521ccf355fb432b Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Mon, 11 Jan 2021 16:45:21 +0200
Subject: net/mlx5e: Add states to PTP channel

Add PTP TX state to PTP channel, which indicates the corresponding SQ is
available. Further patches in the set extend PTP channel to include RQ.
The PTP channel state will be used for separation and coexistence of RX
and TX PTP. Enhance conditions to verify the TX PTP state is set.

Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c   | 73 +++++++++++++++-------
 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h   |  6 ++
 .../ethernet/mellanox/mlx5/core/en/reporter_tx.c   | 12 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  3 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c    | 11 ++--
 5 files changed, 71 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index 92a41b1bcdb0..286bd2345da1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -134,9 +134,11 @@ static int mlx5e_ptp_napi_poll(struct napi_struct *napi, int budget)
 
 	ch_stats->poll++;
 
-	for (i = 0; i < c->num_tc; i++) {
-		busy |= mlx5e_poll_tx_cq(&c->ptpsq[i].txqsq.cq, budget);
-		busy |= mlx5e_ptp_poll_ts_cq(&c->ptpsq[i].ts_cq, budget);
+	if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+		for (i = 0; i < c->num_tc; i++) {
+			busy |= mlx5e_poll_tx_cq(&c->ptpsq[i].txqsq.cq, budget);
+			busy |= mlx5e_ptp_poll_ts_cq(&c->ptpsq[i].ts_cq, budget);
+		}
 	}
 
 	if (busy) {
@@ -149,9 +151,11 @@ static int mlx5e_ptp_napi_poll(struct napi_struct *napi, int budget)
 
 	ch_stats->arm++;
 
-	for (i = 0; i < c->num_tc; i++) {
-		mlx5e_cq_arm(&c->ptpsq[i].txqsq.cq);
-		mlx5e_cq_arm(&c->ptpsq[i].ts_cq);
+	if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+		for (i = 0; i < c->num_tc; i++) {
+			mlx5e_cq_arm(&c->ptpsq[i].txqsq.cq);
+			mlx5e_cq_arm(&c->ptpsq[i].ts_cq);
+		}
 	}
 
 out:
@@ -422,9 +426,10 @@ static void mlx5e_ptp_build_params(struct mlx5e_ptp *c,
 	params->num_tc = orig->num_tc;
 
 	/* SQ */
-	params->log_sq_size = orig->log_sq_size;
-
-	mlx5e_ptp_build_sq_param(c->mdev, params, &cparams->txq_sq_param);
+	if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+		params->log_sq_size = orig->log_sq_size;
+		mlx5e_ptp_build_sq_param(c->mdev, params, &cparams->txq_sq_param);
+	}
 }
 
 static int mlx5e_ptp_open_queues(struct mlx5e_ptp *c,
@@ -432,26 +437,38 @@ static int mlx5e_ptp_open_queues(struct mlx5e_ptp *c,
 {
 	int err;
 
-	err = mlx5e_ptp_open_cqs(c, cparams);
-	if (err)
-		return err;
-
-	err = mlx5e_ptp_open_txqsqs(c, cparams);
-	if (err)
-		goto close_cqs;
+	if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+		err = mlx5e_ptp_open_cqs(c, cparams);
+		if (err)
+			return err;
 
+		err = mlx5e_ptp_open_txqsqs(c, cparams);
+		if (err)
+			goto close_cqs;
+	}
 	return 0;
 
 close_cqs:
-	mlx5e_ptp_close_cqs(c);
+	if (test_bit(MLX5E_PTP_STATE_TX, c->state))
+		mlx5e_ptp_close_cqs(c);
 
 	return err;
 }
 
 static void mlx5e_ptp_close_queues(struct mlx5e_ptp *c)
 {
-	mlx5e_ptp_close_txqsqs(c);
-	mlx5e_ptp_close_cqs(c);
+	if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+		mlx5e_ptp_close_txqsqs(c);
+		mlx5e_ptp_close_cqs(c);
+	}
+}
+
+static int mlx5e_ptp_set_state(struct mlx5e_ptp *c, struct mlx5e_params *params)
+{
+	if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_TX_PORT_TS))
+		__set_bit(MLX5E_PTP_STATE_TX, c->state);
+
+	return bitmap_empty(c->state, MLX5E_PTP_STATE_NUM_STATES) ? -EINVAL : 0;
 }
 
 int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
@@ -479,6 +496,10 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
 	c->stats    = &priv->ptp_stats.ch;
 	c->lag_port = lag_port;
 
+	err = mlx5e_ptp_set_state(c, params);
+	if (err)
+		goto err_free;
+
 	netif_napi_add(netdev, &c->napi, mlx5e_ptp_napi_poll, 64);
 
 	mlx5e_ptp_build_params(c, cparams, params);
@@ -495,7 +516,7 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
 
 err_napi_del:
 	netif_napi_del(&c->napi);
-
+err_free:
 	kvfree(cparams);
 	kvfree(c);
 	return err;
@@ -515,16 +536,20 @@ void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c)
 
 	napi_enable(&c->napi);
 
-	for (tc = 0; tc < c->num_tc; tc++)
-		mlx5e_activate_txqsq(&c->ptpsq[tc].txqsq);
+	if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+		for (tc = 0; tc < c->num_tc; tc++)
+			mlx5e_activate_txqsq(&c->ptpsq[tc].txqsq);
+	}
 }
 
 void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c)
 {
 	int tc;
 
-	for (tc = 0; tc < c->num_tc; tc++)
-		mlx5e_deactivate_txqsq(&c->ptpsq[tc].txqsq);
+	if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
+		for (tc = 0; tc < c->num_tc; tc++)
+			mlx5e_deactivate_txqsq(&c->ptpsq[tc].txqsq);
+	}
 
 	napi_disable(&c->napi);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
index 937530afaf14..36c46274a46a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
@@ -16,6 +16,11 @@ struct mlx5e_ptpsq {
 	struct mlx5e_ptp_cq_stats *cq_stats;
 };
 
+enum {
+	MLX5E_PTP_STATE_TX,
+	MLX5E_PTP_STATE_NUM_STATES,
+};
+
 struct mlx5e_ptp {
 	/* data path */
 	struct mlx5e_ptpsq         ptpsq[MLX5E_MAX_NUM_TC];
@@ -33,6 +38,7 @@ struct mlx5e_ptp {
 	struct mlx5e_priv         *priv;
 	struct mlx5_core_dev      *mdev;
 	struct hwtstamp_config    *tstamp;
+	DECLARE_BITMAP(state, MLX5E_PTP_STATE_NUM_STATES);
 };
 
 int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index e107801adf48..1a0505bd1e9a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -304,6 +304,7 @@ mlx5e_tx_reporter_diagnose_common_config(struct devlink_health_reporter *reporte
 {
 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
 	struct mlx5e_txqsq *generic_sq = priv->txq2sq[0];
+	struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
 	struct mlx5e_ptpsq *generic_ptpsq;
 	int err;
 
@@ -315,12 +316,11 @@ mlx5e_tx_reporter_diagnose_common_config(struct devlink_health_reporter *reporte
 	if (err)
 		return err;
 
-	generic_ptpsq = priv->channels.ptp ?
-			&priv->channels.ptp->ptpsq[0] :
-			NULL;
-	if (!generic_ptpsq)
+	if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state))
 		goto out;
 
+	generic_ptpsq = &ptp_ch->ptpsq[0];
+
 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP");
 	if (err)
 		return err;
@@ -375,7 +375,7 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
 		}
 	}
 
-	if (!ptp_ch)
+	if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state))
 		goto close_sqs_nest;
 
 	for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
@@ -497,7 +497,7 @@ static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv,
 		}
 	}
 
-	if (ptp_ch) {
+	if (ptp_ch && test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) {
 		for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
 			struct mlx5e_txqsq *sq = &ptp_ch->ptpsq[tc].txqsq;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 40a62d2e9558..458dd079ca89 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2759,6 +2759,9 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv)
 	if (!priv->channels.ptp)
 		return;
 
+	if (!test_bit(MLX5E_PTP_STATE_TX, priv->channels.ptp->state))
+		return;
+
 	for (tc = 0; tc < num_tc; tc++) {
 		struct mlx5e_ptp *c = priv->channels.ptp;
 		struct mlx5e_txqsq *sq = &c->ptpsq[tc].txqsq;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index cfb6ffd7df54..8ba62671f5f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -133,6 +133,8 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
 	/* Sync with mlx5e_update_num_tc_x_num_ch - avoid refetching. */
 	num_tc_x_num_ch = READ_ONCE(priv->num_tc_x_num_ch);
 	if (unlikely(dev->real_num_tx_queues > num_tc_x_num_ch)) {
+		struct mlx5e_ptp *ptp_channel;
+
 		/* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */
 		u16 htb_maj_id = smp_load_acquire(&priv->htb.maj_id);
 
@@ -142,10 +144,11 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
 				return txq_ix;
 		}
 
-		if (unlikely(priv->channels.ptp))
-			if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
-			    mlx5e_use_ptpsq(skb))
-				return mlx5e_select_ptpsq(dev, skb);
+		ptp_channel = READ_ONCE(priv->channels.ptp);
+		if (unlikely(ptp_channel) &&
+		    test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state) &&
+		    mlx5e_use_ptpsq(skb))
+			return mlx5e_select_ptpsq(dev, skb);
 
 		txq_ix = netdev_pick_tx(dev, skb, NULL);
 		/* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs.
-- 
cgit v1.2.3


From a099da8ffcf6d4f6d41719bd41878ff529ab6b55 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Sun, 7 Mar 2021 15:47:37 +0200
Subject: net/mlx5e: Add RQ to PTP channel

Enhance PTP channel to allow PTP without disabling CQE compression. Add
RQ, TIR and PTP_RX_STATE to PTP channel. When this bit is set, PTP
channel manages its RQ, and PTP traffic is directed to the PTP-RQ which
is not affected by compression.

Signed-off-by: Aya Levin <ayal@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h     |   1 +
 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 123 +++++++++++++++++++++--
 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h |   2 +
 3 files changed, 118 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index aad2a752f7e3..33db35980970 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -730,6 +730,7 @@ struct mlx5e_ptp_stats {
 	struct mlx5e_ch_stats ch;
 	struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC];
 	struct mlx5e_ptp_cq_stats cq[MLX5E_MAX_NUM_TC];
+	struct mlx5e_rq_stats rq;
 } ____cacheline_aligned_in_smp;
 
 enum {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index 286bd2345da1..7f7dfaed9fb4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -10,6 +10,7 @@
 struct mlx5e_ptp_params {
 	struct mlx5e_params params;
 	struct mlx5e_sq_param txq_sq_param;
+	struct mlx5e_rq_param rq_param;
 };
 
 struct mlx5e_skb_cb_hwtstamp {
@@ -126,6 +127,7 @@ static int mlx5e_ptp_napi_poll(struct napi_struct *napi, int budget)
 {
 	struct mlx5e_ptp *c = container_of(napi, struct mlx5e_ptp, napi);
 	struct mlx5e_ch_stats *ch_stats = c->stats;
+	struct mlx5e_rq *rq = &c->rq;
 	bool busy = false;
 	int work_done = 0;
 	int i;
@@ -140,6 +142,14 @@ static int mlx5e_ptp_napi_poll(struct napi_struct *napi, int budget)
 			busy |= mlx5e_ptp_poll_ts_cq(&c->ptpsq[i].ts_cq, budget);
 		}
 	}
+	if (test_bit(MLX5E_PTP_STATE_RX, c->state) && likely(budget)) {
+		work_done = mlx5e_poll_rx_cq(&rq->cq, budget);
+		busy |= work_done == budget;
+		busy |= INDIRECT_CALL_2(rq->post_wqes,
+					mlx5e_post_rx_mpwqes,
+					mlx5e_post_rx_wqes,
+					rq);
+	}
 
 	if (busy) {
 		work_done = budget;
@@ -157,6 +167,8 @@ static int mlx5e_ptp_napi_poll(struct napi_struct *napi, int budget)
 			mlx5e_cq_arm(&c->ptpsq[i].ts_cq);
 		}
 	}
+	if (test_bit(MLX5E_PTP_STATE_RX, c->state))
+		mlx5e_cq_arm(&rq->cq);
 
 out:
 	rcu_read_unlock();
@@ -338,8 +350,8 @@ static void mlx5e_ptp_close_txqsqs(struct mlx5e_ptp *c)
 		mlx5e_ptp_close_txqsq(&c->ptpsq[tc]);
 }
 
-static int mlx5e_ptp_open_cqs(struct mlx5e_ptp *c,
-			      struct mlx5e_ptp_params *cparams)
+static int mlx5e_ptp_open_tx_cqs(struct mlx5e_ptp *c,
+				 struct mlx5e_ptp_params *cparams)
 {
 	struct mlx5e_params *params = &cparams->params;
 	struct mlx5e_create_cq_param ccp = {};
@@ -387,7 +399,25 @@ out_err_txqsq_cq:
 	return err;
 }
 
-static void mlx5e_ptp_close_cqs(struct mlx5e_ptp *c)
+static int mlx5e_ptp_open_rx_cq(struct mlx5e_ptp *c,
+				struct mlx5e_ptp_params *cparams)
+{
+	struct mlx5e_create_cq_param ccp = {};
+	struct dim_cq_moder ptp_moder = {};
+	struct mlx5e_cq_param *cq_param;
+	struct mlx5e_cq *cq = &c->rq.cq;
+
+	ccp.node     = dev_to_node(mlx5_core_dma_dev(c->mdev));
+	ccp.ch_stats = c->stats;
+	ccp.napi     = &c->napi;
+	ccp.ix       = MLX5E_PTP_CHANNEL_IX;
+
+	cq_param = &cparams->rq_param.cqp;
+
+	return mlx5e_open_cq(c->priv, ptp_moder, cq_param, &ccp, cq);
+}
+
+static void mlx5e_ptp_close_tx_cqs(struct mlx5e_ptp *c)
 {
 	int tc;
 
@@ -413,6 +443,20 @@ static void mlx5e_ptp_build_sq_param(struct mlx5_core_dev *mdev,
 	mlx5e_build_tx_cq_param(mdev, params, &param->cqp);
 }
 
+static void mlx5e_ptp_build_rq_param(struct mlx5_core_dev *mdev,
+				     struct net_device *netdev,
+				     u16 q_counter,
+				     struct mlx5e_ptp_params *ptp_params)
+{
+	struct mlx5e_rq_param *rq_params = &ptp_params->rq_param;
+	struct mlx5e_params *params = &ptp_params->params;
+
+	params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC;
+	mlx5e_init_rq_type_params(mdev, params);
+	params->sw_mtu = netdev->max_mtu;
+	mlx5e_build_rq_param(mdev, params, NULL, q_counter, rq_params);
+}
+
 static void mlx5e_ptp_build_params(struct mlx5e_ptp *c,
 				   struct mlx5e_ptp_params *cparams,
 				   struct mlx5e_params *orig)
@@ -430,6 +474,45 @@ static void mlx5e_ptp_build_params(struct mlx5e_ptp *c,
 		params->log_sq_size = orig->log_sq_size;
 		mlx5e_ptp_build_sq_param(c->mdev, params, &cparams->txq_sq_param);
 	}
+	if (test_bit(MLX5E_PTP_STATE_RX, c->state))
+		mlx5e_ptp_build_rq_param(c->mdev, c->netdev, c->priv->q_counter, cparams);
+}
+
+static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params,
+			     struct mlx5e_rq *rq)
+{
+	struct mlx5_core_dev *mdev = c->mdev;
+	struct mlx5e_priv *priv = c->priv;
+	int err;
+
+	rq->wq_type      = params->rq_wq_type;
+	rq->pdev         = mdev->device;
+	rq->netdev       = priv->netdev;
+	rq->priv         = priv;
+	rq->clock        = &mdev->clock;
+	rq->tstamp       = &priv->tstamp;
+	rq->mdev         = mdev;
+	rq->hw_mtu       = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+	rq->stats        = &c->priv->ptp_stats.rq;
+	rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev);
+	err = mlx5e_rq_set_handlers(rq, params, false);
+	if (err)
+		return err;
+
+	return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, 0);
+}
+
+static int mlx5e_ptp_open_rq(struct mlx5e_ptp *c, struct mlx5e_params *params,
+			     struct mlx5e_rq_param *rq_param)
+{
+	int node = dev_to_node(c->mdev->device);
+	int err;
+
+	err = mlx5e_init_ptp_rq(c, params, &c->rq);
+	if (err)
+		return err;
+
+	return mlx5e_open_rq(params, rq_param, NULL, node, &c->rq);
 }
 
 static int mlx5e_ptp_open_queues(struct mlx5e_ptp *c,
@@ -438,28 +521,47 @@ static int mlx5e_ptp_open_queues(struct mlx5e_ptp *c,
 	int err;
 
 	if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
-		err = mlx5e_ptp_open_cqs(c, cparams);
+		err = mlx5e_ptp_open_tx_cqs(c, cparams);
 		if (err)
 			return err;
 
 		err = mlx5e_ptp_open_txqsqs(c, cparams);
 		if (err)
-			goto close_cqs;
+			goto close_tx_cqs;
+	}
+	if (test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+		err = mlx5e_ptp_open_rx_cq(c, cparams);
+		if (err)
+			goto close_txqsq;
+
+		err = mlx5e_ptp_open_rq(c, &cparams->params, &cparams->rq_param);
+		if (err)
+			goto close_rx_cq;
 	}
 	return 0;
 
-close_cqs:
+close_rx_cq:
+	if (test_bit(MLX5E_PTP_STATE_RX, c->state))
+		mlx5e_close_cq(&c->rq.cq);
+close_txqsq:
+	if (test_bit(MLX5E_PTP_STATE_TX, c->state))
+		mlx5e_ptp_close_txqsqs(c);
+close_tx_cqs:
 	if (test_bit(MLX5E_PTP_STATE_TX, c->state))
-		mlx5e_ptp_close_cqs(c);
+		mlx5e_ptp_close_tx_cqs(c);
 
 	return err;
 }
 
 static void mlx5e_ptp_close_queues(struct mlx5e_ptp *c)
 {
+	if (test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+		mlx5e_close_rq(&c->rq);
+		mlx5e_close_cq(&c->rq.cq);
+	}
 	if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
 		mlx5e_ptp_close_txqsqs(c);
-		mlx5e_ptp_close_cqs(c);
+		mlx5e_ptp_close_tx_cqs(c);
 	}
 }
 
@@ -540,12 +642,17 @@ void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c)
 		for (tc = 0; tc < c->num_tc; tc++)
 			mlx5e_activate_txqsq(&c->ptpsq[tc].txqsq);
 	}
+	if (test_bit(MLX5E_PTP_STATE_RX, c->state))
+		mlx5e_activate_rq(&c->rq);
 }
 
 void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c)
 {
 	int tc;
 
+	if (test_bit(MLX5E_PTP_STATE_RX, c->state))
+		mlx5e_deactivate_rq(&c->rq);
+
 	if (test_bit(MLX5E_PTP_STATE_TX, c->state)) {
 		for (tc = 0; tc < c->num_tc; tc++)
 			mlx5e_deactivate_txqsq(&c->ptpsq[tc].txqsq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
index 36c46274a46a..cc6a48a43233 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
@@ -18,12 +18,14 @@ struct mlx5e_ptpsq {
 
 enum {
 	MLX5E_PTP_STATE_TX,
+	MLX5E_PTP_STATE_RX,
 	MLX5E_PTP_STATE_NUM_STATES,
 };
 
 struct mlx5e_ptp {
 	/* data path */
 	struct mlx5e_ptpsq         ptpsq[MLX5E_MAX_NUM_TC];
+	struct mlx5e_rq            rq;
 	struct napi_struct         napi;
 	struct device             *pdev;
 	struct net_device         *netdev;
-- 
cgit v1.2.3


From a28359e922c681a23c01e4858175d9e98c5ff88a Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Sun, 7 Mar 2021 15:55:04 +0200
Subject: net/mlx5e: Add PTP-RX statistics

Like PTP-TX, once the PTP-RX is opened, corresponding statistics appear.
Add indication that PTP-RX was ever opened: rx_ptp_opened. If any of the
PTP RX or TX were opened, display the PTP channel's statistics.

Signed-off-by: Aya Levin <ayal@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h       |   1 +
 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c   |   3 +
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |   7 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 114 ++++++++++++++++-----
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.h |   1 +
 5 files changed, 100 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 33db35980970..8f6ccd54057a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -860,6 +860,7 @@ struct mlx5e_priv {
 	u16                        max_nch;
 	u8                         max_opened_tc;
 	bool                       tx_ptp_opened;
+	bool                       rx_ptp_opened;
 	struct hwtstamp_config     tstamp;
 	u16                        q_counter;
 	u16                        drop_rq_q_counter;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index 7f7dfaed9fb4..4595d2388d83 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -610,6 +610,9 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
 	if (unlikely(err))
 		goto err_napi_del;
 
+	if (test_bit(MLX5E_PTP_STATE_RX, c->state))
+		priv->rx_ptp_opened = true;
+
 	*cp = c;
 
 	kvfree(cparams);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 458dd079ca89..7ecde284c57c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3496,6 +3496,13 @@ void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s)
 			s->tx_dropped    += sq_stats->dropped;
 		}
 	}
+	if (priv->rx_ptp_opened) {
+		struct mlx5e_rq_stats *rq_stats = &priv->ptp_stats.rq;
+
+		s->rx_packets   += rq_stats->packets;
+		s->rx_bytes     += rq_stats->bytes;
+		s->multicast    += rq_stats->mcast_packets;
+	}
 }
 
 void
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 32331ac9288c..f67e51d8291a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -407,13 +407,21 @@ static void mlx5e_stats_grp_sw_update_stats_ptp(struct mlx5e_priv *priv,
 {
 	int i;
 
-	if (!priv->tx_ptp_opened)
+	if (!priv->tx_ptp_opened && !priv->rx_ptp_opened)
 		return;
 
 	mlx5e_stats_grp_sw_update_stats_ch_stats(s, &priv->ptp_stats.ch);
 
-	for (i = 0; i < priv->max_opened_tc; i++) {
-		mlx5e_stats_grp_sw_update_stats_sq(s, &priv->ptp_stats.sq[i]);
+	if (priv->tx_ptp_opened) {
+		for (i = 0; i < priv->max_opened_tc; i++) {
+			mlx5e_stats_grp_sw_update_stats_sq(s, &priv->ptp_stats.sq[i]);
+
+			/* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */
+			barrier();
+		}
+	}
+	if (priv->rx_ptp_opened) {
+		mlx5e_stats_grp_sw_update_stats_rq_stats(s, &priv->ptp_stats.rq);
 
 		/* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */
 		barrier();
@@ -1760,6 +1768,38 @@ static const struct counter_desc ptp_cq_stats_desc[] = {
 	{ MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort_abs_diff_ns) },
 };
 
+static const struct counter_desc ptp_rq_stats_desc[] = {
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, packets) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, bytes) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete_tail) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_unnecessary) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_none) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, xdp_drop) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, xdp_redirect) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, lro_packets) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, lro_bytes) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, ecn_mark) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, removed_vlan_packets) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, wqe_err) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_reuse) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_full) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_empty) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_busy) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_waive) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, congst_umr) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, arfs_err) },
+	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, recover) },
+};
+
 static const struct counter_desc qos_sq_stats_desc[] = {
 	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, packets) },
 	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, bytes) },
@@ -1805,6 +1845,7 @@ static const struct counter_desc qos_sq_stats_desc[] = {
 #define NUM_PTP_SQ_STATS		ARRAY_SIZE(ptp_sq_stats_desc)
 #define NUM_PTP_CH_STATS		ARRAY_SIZE(ptp_ch_stats_desc)
 #define NUM_PTP_CQ_STATS		ARRAY_SIZE(ptp_cq_stats_desc)
+#define NUM_PTP_RQ_STATS                ARRAY_SIZE(ptp_rq_stats_desc)
 #define NUM_QOS_SQ_STATS		ARRAY_SIZE(qos_sq_stats_desc)
 
 static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(qos)
@@ -1851,32 +1892,46 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(qos) { return; }
 
 static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ptp)
 {
-	return priv->tx_ptp_opened ?
-	       NUM_PTP_CH_STATS +
-	       ((NUM_PTP_SQ_STATS + NUM_PTP_CQ_STATS) * priv->max_opened_tc) :
-	       0;
+	int num = NUM_PTP_CH_STATS;
+
+	if (!priv->tx_ptp_opened && !priv->rx_ptp_opened)
+		return 0;
+
+	if (priv->tx_ptp_opened)
+		num += (NUM_PTP_SQ_STATS + NUM_PTP_CQ_STATS) * priv->max_opened_tc;
+	if (priv->rx_ptp_opened)
+		num += NUM_PTP_RQ_STATS;
+
+	return num;
 }
 
 static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ptp)
 {
 	int i, tc;
 
-	if (!priv->tx_ptp_opened)
+	if (!priv->tx_ptp_opened && !priv->rx_ptp_opened)
 		return idx;
 
 	for (i = 0; i < NUM_PTP_CH_STATS; i++)
 		sprintf(data + (idx++) * ETH_GSTRING_LEN,
 			ptp_ch_stats_desc[i].format);
 
-	for (tc = 0; tc < priv->max_opened_tc; tc++)
-		for (i = 0; i < NUM_PTP_SQ_STATS; i++)
-			sprintf(data + (idx++) * ETH_GSTRING_LEN,
-				ptp_sq_stats_desc[i].format, tc);
+	if (priv->tx_ptp_opened) {
+		for (tc = 0; tc < priv->max_opened_tc; tc++)
+			for (i = 0; i < NUM_PTP_SQ_STATS; i++)
+				sprintf(data + (idx++) * ETH_GSTRING_LEN,
+					ptp_sq_stats_desc[i].format, tc);
 
-	for (tc = 0; tc < priv->max_opened_tc; tc++)
-		for (i = 0; i < NUM_PTP_CQ_STATS; i++)
+		for (tc = 0; tc < priv->max_opened_tc; tc++)
+			for (i = 0; i < NUM_PTP_CQ_STATS; i++)
+				sprintf(data + (idx++) * ETH_GSTRING_LEN,
+					ptp_cq_stats_desc[i].format, tc);
+	}
+	if (priv->rx_ptp_opened) {
+		for (i = 0; i < NUM_PTP_RQ_STATS; i++)
 			sprintf(data + (idx++) * ETH_GSTRING_LEN,
-				ptp_cq_stats_desc[i].format, tc);
+				ptp_rq_stats_desc[i].format);
+	}
 	return idx;
 }
 
@@ -1884,7 +1939,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ptp)
 {
 	int i, tc;
 
-	if (!priv->tx_ptp_opened)
+	if (!priv->tx_ptp_opened && !priv->rx_ptp_opened)
 		return idx;
 
 	for (i = 0; i < NUM_PTP_CH_STATS; i++)
@@ -1892,18 +1947,25 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ptp)
 			MLX5E_READ_CTR64_CPU(&priv->ptp_stats.ch,
 					     ptp_ch_stats_desc, i);
 
-	for (tc = 0; tc < priv->max_opened_tc; tc++)
-		for (i = 0; i < NUM_PTP_SQ_STATS; i++)
-			data[idx++] =
-				MLX5E_READ_CTR64_CPU(&priv->ptp_stats.sq[tc],
-						     ptp_sq_stats_desc, i);
+	if (priv->tx_ptp_opened) {
+		for (tc = 0; tc < priv->max_opened_tc; tc++)
+			for (i = 0; i < NUM_PTP_SQ_STATS; i++)
+				data[idx++] =
+					MLX5E_READ_CTR64_CPU(&priv->ptp_stats.sq[tc],
+							     ptp_sq_stats_desc, i);
 
-	for (tc = 0; tc < priv->max_opened_tc; tc++)
-		for (i = 0; i < NUM_PTP_CQ_STATS; i++)
+		for (tc = 0; tc < priv->max_opened_tc; tc++)
+			for (i = 0; i < NUM_PTP_CQ_STATS; i++)
+				data[idx++] =
+					MLX5E_READ_CTR64_CPU(&priv->ptp_stats.cq[tc],
+							     ptp_cq_stats_desc, i);
+	}
+	if (priv->rx_ptp_opened) {
+		for (i = 0; i < NUM_PTP_RQ_STATS; i++)
 			data[idx++] =
-				MLX5E_READ_CTR64_CPU(&priv->ptp_stats.cq[tc],
-						     ptp_cq_stats_desc, i);
-
+				MLX5E_READ_CTR64_CPU(&priv->ptp_stats.rq,
+						     ptp_rq_stats_desc, i);
+	}
 	return idx;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 93c41312fb03..ca398eac09c1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -54,6 +54,7 @@
 #define MLX5E_DECLARE_PTP_TX_STAT(type, fld) "ptp_tx%d_"#fld, offsetof(type, fld)
 #define MLX5E_DECLARE_PTP_CH_STAT(type, fld) "ptp_ch_"#fld, offsetof(type, fld)
 #define MLX5E_DECLARE_PTP_CQ_STAT(type, fld) "ptp_cq%d_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_PTP_RQ_STAT(type, fld) "ptp_rq%d_"#fld, offsetof(type, fld)
 
 #define MLX5E_DECLARE_QOS_TX_STAT(type, fld) "qos_tx%d_"#fld, offsetof(type, fld)
 
-- 
cgit v1.2.3


From 3adb60b6a3ed9f233daa632d35cec79fe4781372 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Thu, 25 Feb 2021 19:55:20 +0200
Subject: net:mlx5e: Add PTP-TIR and PTP-RQT

Add PTP-TIR and initiate its RQT to allow PTP-RQ to integrate into the
safe-reopen flow on configuration change. Add rx_ptp_support flag on a
profile and turn it on for ETH driver. With this flag set, create a
redirect-RQT for PTP-RQ.

Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h       |  2 ++
 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c   |  9 +++++
 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h   |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 40 +++++++++++++++++++---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   |  2 ++
 .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c  |  1 +
 .../ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c |  1 +
 7 files changed, 52 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 8f6ccd54057a..f31b5ccc27d0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -837,6 +837,7 @@ struct mlx5e_priv {
 	struct mlx5e_tir           inner_indir_tir[MLX5E_NUM_INDIR_TIRS];
 	struct mlx5e_tir           direct_tir[MLX5E_MAX_NUM_CHANNELS];
 	struct mlx5e_tir           xsk_tir[MLX5E_MAX_NUM_CHANNELS];
+	struct mlx5e_tir           ptp_tir;
 	struct mlx5e_rss_params    rss_params;
 	u32                        tx_rates[MLX5E_MAX_NUM_SQS];
 
@@ -916,6 +917,7 @@ struct mlx5e_profile {
 	const struct mlx5e_rx_handlers *rx_handlers;
 	int	max_tc;
 	u8	rq_groups;
+	bool	rx_ptp_support;
 };
 
 void mlx5e_build_ptys2ethtool_map(void);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index 4595d2388d83..c1c41c8656dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -663,3 +663,12 @@ void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c)
 
 	napi_disable(&c->napi);
 }
+
+int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn)
+{
+	if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state))
+		return -EINVAL;
+
+	*rqn = c->rq.rqn;
+	return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
index cc6a48a43233..460b167887bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
@@ -48,6 +48,7 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
 void mlx5e_ptp_close(struct mlx5e_ptp *c);
 void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c);
 void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c);
+int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn);
 
 enum {
 	MLX5E_SKB_CB_CQE_HWTSTAMP  = BIT(0),
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 7ecde284c57c..7cf12342afe6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2329,7 +2329,8 @@ static u32 mlx5e_get_direct_rqn(struct mlx5e_priv *priv, int ix,
 }
 
 static void mlx5e_redirect_rqts(struct mlx5e_priv *priv,
-				struct mlx5e_redirect_rqt_param rrp)
+				struct mlx5e_redirect_rqt_param rrp,
+				struct mlx5e_redirect_rqt_param *ptp_rrp)
 {
 	u32 rqtn;
 	int ix;
@@ -2355,11 +2356,17 @@ static void mlx5e_redirect_rqts(struct mlx5e_priv *priv,
 		rqtn = priv->direct_tir[ix].rqt.rqtn;
 		mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp);
 	}
+	if (ptp_rrp) {
+		rqtn = priv->ptp_tir.rqt.rqtn;
+		mlx5e_redirect_rqt(priv, rqtn, 1, *ptp_rrp);
+	}
 }
 
 static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv,
 					    struct mlx5e_channels *chs)
 {
+	bool rx_ptp_support = priv->profile->rx_ptp_support;
+	struct mlx5e_redirect_rqt_param *ptp_rrp_p = NULL;
 	struct mlx5e_redirect_rqt_param rrp = {
 		.is_rss        = true,
 		{
@@ -2369,12 +2376,22 @@ static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv,
 			}
 		},
 	};
+	struct mlx5e_redirect_rqt_param ptp_rrp;
+
+	if (rx_ptp_support) {
+		u32 ptp_rqn;
 
-	mlx5e_redirect_rqts(priv, rrp);
+		ptp_rrp.is_rss = false;
+		ptp_rrp.rqn = mlx5e_ptp_get_rqn(priv->channels.ptp, &ptp_rqn) ?
+			      priv->drop_rq.rqn : ptp_rqn;
+		ptp_rrp_p = &ptp_rrp;
+	}
+	mlx5e_redirect_rqts(priv, rrp, ptp_rrp_p);
 }
 
 static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv)
 {
+	bool rx_ptp_support = priv->profile->rx_ptp_support;
 	struct mlx5e_redirect_rqt_param drop_rrp = {
 		.is_rss = false,
 		{
@@ -2382,7 +2399,7 @@ static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv)
 		},
 	};
 
-	mlx5e_redirect_rqts(priv, drop_rrp);
+	mlx5e_redirect_rqts(priv, drop_rrp, rx_ptp_support ? &drop_rrp : NULL);
 }
 
 static const struct mlx5e_tirc_config tirc_default_config[MLX5E_NUM_INDIR_TIRS] = {
@@ -4944,10 +4961,18 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
 	if (unlikely(err))
 		goto err_destroy_xsk_rqts;
 
+	err = mlx5e_create_direct_rqts(priv, &priv->ptp_tir, 1);
+	if (err)
+		goto err_destroy_xsk_tirs;
+
+	err = mlx5e_create_direct_tirs(priv, &priv->ptp_tir, 1);
+	if (err)
+		goto err_destroy_ptp_rqt;
+
 	err = mlx5e_create_flow_steering(priv);
 	if (err) {
 		mlx5_core_warn(mdev, "create flow steering failed, %d\n", err);
-		goto err_destroy_xsk_tirs;
+		goto err_destroy_ptp_direct_tir;
 	}
 
 	err = mlx5e_tc_nic_init(priv);
@@ -4968,6 +4993,10 @@ err_tc_nic_cleanup:
 	mlx5e_tc_nic_cleanup(priv);
 err_destroy_flow_steering:
 	mlx5e_destroy_flow_steering(priv);
+err_destroy_ptp_direct_tir:
+	mlx5e_destroy_direct_tirs(priv, &priv->ptp_tir, 1);
+err_destroy_ptp_rqt:
+	mlx5e_destroy_direct_rqts(priv, &priv->ptp_tir, 1);
 err_destroy_xsk_tirs:
 	mlx5e_destroy_direct_tirs(priv, priv->xsk_tir, max_nch);
 err_destroy_xsk_rqts:
@@ -4994,6 +5023,8 @@ static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv)
 	mlx5e_accel_cleanup_rx(priv);
 	mlx5e_tc_nic_cleanup(priv);
 	mlx5e_destroy_flow_steering(priv);
+	mlx5e_destroy_direct_tirs(priv, &priv->ptp_tir, 1);
+	mlx5e_destroy_direct_rqts(priv, &priv->ptp_tir, 1);
 	mlx5e_destroy_direct_tirs(priv, priv->xsk_tir, max_nch);
 	mlx5e_destroy_direct_rqts(priv, priv->xsk_tir, max_nch);
 	mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch);
@@ -5106,6 +5137,7 @@ static const struct mlx5e_profile mlx5e_nic_profile = {
 	.rq_groups	   = MLX5E_NUM_RQ_GROUPS(XSK),
 	.stats_grps	   = mlx5e_nic_stats_grps,
 	.stats_grps_num	   = mlx5e_nic_stats_grps_num,
+	.rx_ptp_support    = true,
 };
 
 /* mlx5e generic netdev management API (move to en_common.c) */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index b597fc3b192b..9ef8e4a671a7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -1062,6 +1062,7 @@ static const struct mlx5e_profile mlx5e_rep_profile = {
 	.rq_groups		= MLX5E_NUM_RQ_GROUPS(REGULAR),
 	.stats_grps		= mlx5e_rep_stats_grps,
 	.stats_grps_num		= mlx5e_rep_stats_grps_num,
+	.rx_ptp_support		= false,
 };
 
 static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
@@ -1082,6 +1083,7 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
 	.rq_groups		= MLX5E_NUM_RQ_GROUPS(XSK),
 	.stats_grps		= mlx5e_ul_rep_stats_grps,
 	.stats_grps_num		= mlx5e_ul_rep_stats_grps_num,
+	.rx_ptp_support		= false,
 };
 
 /* e-Switch vport representors */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 63ca73105149..b65b0cefc5b3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -473,6 +473,7 @@ static const struct mlx5e_profile mlx5i_nic_profile = {
 	.rq_groups	   = MLX5E_NUM_RQ_GROUPS(REGULAR),
 	.stats_grps        = mlx5i_stats_grps,
 	.stats_grps_num    = mlx5i_stats_grps_num,
+	.rx_ptp_support    = false,
 };
 
 /* mlx5i netdev NDos */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
index 3d0a18a0bed4..18ee21b06a00 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
@@ -350,6 +350,7 @@ static const struct mlx5e_profile mlx5i_pkey_nic_profile = {
 	.rx_handlers       = &mlx5i_rx_handlers,
 	.max_tc		   = MLX5I_MAX_NUM_TC,
 	.rq_groups	   = MLX5E_NUM_RQ_GROUPS(REGULAR),
+	.rx_ptp_support	   = false,
 };
 
 const struct mlx5e_profile *mlx5i_pkey_get_profile(void)
-- 
cgit v1.2.3


From 19cfa36b18d8dc76d72f74b5209875e31641e219 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Tue, 12 Jan 2021 17:26:02 +0200
Subject: net/mlx5e: Refactor RX reporter diagnostics

Break RX diagnostics function into smaller helpers. This enables easier
enhancement in the next patch in the set.

Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/en/reporter_rx.c   | 104 +++++++++++++--------
 1 file changed, 66 insertions(+), 38 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index 34b3b316b688..78d801bac8f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -230,8 +230,9 @@ static int mlx5e_reporter_icosq_diagnose(struct mlx5e_icosq *icosq, u8 hw_state,
 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
 }
 
-static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq,
-						   struct devlink_fmsg *fmsg)
+static int
+mlx5e_rx_reporter_build_diagnose_output_rq_common(struct mlx5e_rq *rq,
+						  struct devlink_fmsg *fmsg)
 {
 	u16 wqe_counter;
 	int wqes_sz;
@@ -247,14 +248,6 @@ static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq,
 	wq_head = mlx5e_rqwq_get_head(rq);
 	wqe_counter = mlx5e_rqwq_get_wqe_counter(rq);
 
-	err = devlink_fmsg_obj_nest_start(fmsg);
-	if (err)
-		return err;
-
-	err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", rq->ix);
-	if (err)
-		return err;
-
 	err = devlink_fmsg_u32_pair_put(fmsg, "rqn", rq->rqn);
 	if (err)
 		return err;
@@ -300,61 +293,96 @@ static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq,
 			return err;
 	}
 
-	err = devlink_fmsg_obj_nest_end(fmsg);
+	return 0;
+}
+
+static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq,
+						   struct devlink_fmsg *fmsg)
+{
+	int err;
+
+	err = devlink_fmsg_obj_nest_start(fmsg);
 	if (err)
 		return err;
 
-	return 0;
+	err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", rq->ix);
+	if (err)
+		return err;
+
+	err = mlx5e_rx_reporter_build_diagnose_output_rq_common(rq, fmsg);
+	if (err)
+		return err;
+
+	return devlink_fmsg_obj_nest_end(fmsg);
 }
 
-static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter,
-				      struct devlink_fmsg *fmsg,
-				      struct netlink_ext_ack *extack)
+static int mlx5e_rx_reporter_diagnose_generic_rq(struct mlx5e_rq *rq,
+						 struct devlink_fmsg *fmsg)
 {
-	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
-	struct mlx5e_params *params = &priv->channels.params;
-	struct mlx5e_rq *generic_rq;
+	struct mlx5e_priv *priv = rq->priv;
+	struct mlx5e_params *params;
 	u32 rq_stride, rq_sz;
-	int i, err = 0;
-
-	mutex_lock(&priv->state_lock);
-
-	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
-		goto unlock;
+	int err;
 
-	generic_rq = &priv->channels.c[0]->rq;
-	rq_sz = mlx5e_rqwq_get_size(generic_rq);
+	params = &priv->channels.params;
+	rq_sz = mlx5e_rqwq_get_size(rq);
 	rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(priv->mdev, params, NULL));
 
-	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common config");
-	if (err)
-		goto unlock;
-
 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ");
 	if (err)
-		goto unlock;
+		return err;
 
 	err = devlink_fmsg_u8_pair_put(fmsg, "type", params->rq_wq_type);
 	if (err)
-		goto unlock;
+		return err;
 
 	err = devlink_fmsg_u64_pair_put(fmsg, "stride size", rq_stride);
 	if (err)
-		goto unlock;
+		return err;
 
 	err = devlink_fmsg_u32_pair_put(fmsg, "size", rq_sz);
 	if (err)
-		goto unlock;
+		return err;
 
-	err = mlx5e_health_cq_common_diag_fmsg(&generic_rq->cq, fmsg);
+	err = mlx5e_health_cq_common_diag_fmsg(&rq->cq, fmsg);
 	if (err)
-		goto unlock;
+		return err;
 
-	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int
+mlx5e_rx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter,
+					 struct devlink_fmsg *fmsg)
+{
+	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+	struct mlx5e_rq *generic_rq = &priv->channels.c[0]->rq;
+	int err;
+
+	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common config");
 	if (err)
+		return err;
+
+	err = mlx5e_rx_reporter_diagnose_generic_rq(generic_rq, fmsg);
+	if (err)
+		return err;
+
+	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
+static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter,
+				      struct devlink_fmsg *fmsg,
+				      struct netlink_ext_ack *extack)
+{
+	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+	int i, err = 0;
+
+	mutex_lock(&priv->state_lock);
+
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
 		goto unlock;
 
-	err = mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+	err = mlx5e_rx_reporter_diagnose_common_config(reporter, fmsg);
 	if (err)
 		goto unlock;
 
-- 
cgit v1.2.3


From b8fb10939ff43be7599df3a264474c180ba7234c Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Tue, 12 Jan 2021 18:50:03 +0200
Subject: net/mlx5e: Add PTP RQ to RX reporter

When present, add the PTP RQ to the RX reporter.

Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/en/reporter_rx.c   | 68 +++++++++++++++++++++-
 1 file changed, 66 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index 78d801bac8f5..f9fdf3606bbd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -5,6 +5,7 @@
 #include "params.h"
 #include "txrx.h"
 #include "devlink.h"
+#include "ptp.h"
 
 static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state)
 {
@@ -351,12 +352,34 @@ static int mlx5e_rx_reporter_diagnose_generic_rq(struct mlx5e_rq *rq,
 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
 }
 
+static int
+mlx5e_rx_reporter_diagnose_common_ptp_config(struct mlx5e_priv *priv, struct mlx5e_ptp *ptp_ch,
+					     struct devlink_fmsg *fmsg)
+{
+	int err;
+
+	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP");
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "filter_type", priv->tstamp.rx_filter);
+	if (err)
+		return err;
+
+	err = mlx5e_rx_reporter_diagnose_generic_rq(&ptp_ch->rq, fmsg);
+	if (err)
+		return err;
+
+	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
+}
+
 static int
 mlx5e_rx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter,
 					 struct devlink_fmsg *fmsg)
 {
 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
 	struct mlx5e_rq *generic_rq = &priv->channels.c[0]->rq;
+	struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
 	int err;
 
 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common config");
@@ -367,14 +390,45 @@ mlx5e_rx_reporter_diagnose_common_config(struct devlink_health_reporter *reporte
 	if (err)
 		return err;
 
+	if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) {
+		err = mlx5e_rx_reporter_diagnose_common_ptp_config(priv, ptp_ch, fmsg);
+		if (err)
+			return err;
+	}
+
 	return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
 }
 
+static int mlx5e_rx_reporter_build_diagnose_output_ptp_rq(struct mlx5e_rq *rq,
+							  struct devlink_fmsg *fmsg)
+{
+	int err;
+
+	err = devlink_fmsg_obj_nest_start(fmsg);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp");
+	if (err)
+		return err;
+
+	err = mlx5e_rx_reporter_build_diagnose_output_rq_common(rq, fmsg);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_obj_nest_end(fmsg);
+	if (err)
+		return err;
+
+	return 0;
+}
+
 static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter,
 				      struct devlink_fmsg *fmsg,
 				      struct netlink_ext_ack *extack)
 {
 	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+	struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
 	int i, err = 0;
 
 	mutex_lock(&priv->state_lock);
@@ -397,9 +451,12 @@ static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter,
 		if (err)
 			goto unlock;
 	}
+	if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) {
+		err = mlx5e_rx_reporter_build_diagnose_output_ptp_rq(&ptp_ch->rq, fmsg);
+		if (err)
+			goto unlock;
+	}
 	err = devlink_fmsg_arr_pair_nest_end(fmsg);
-	if (err)
-		goto unlock;
 unlock:
 	mutex_unlock(&priv->state_lock);
 	return err;
@@ -531,6 +588,7 @@ static int mlx5e_rx_reporter_dump_rq(struct mlx5e_priv *priv, struct devlink_fms
 static int mlx5e_rx_reporter_dump_all_rqs(struct mlx5e_priv *priv,
 					  struct devlink_fmsg *fmsg)
 {
+	struct mlx5e_ptp *ptp_ch = priv->channels.ptp;
 	struct mlx5_rsc_key key = {};
 	int i, err;
 
@@ -563,6 +621,12 @@ static int mlx5e_rx_reporter_dump_all_rqs(struct mlx5e_priv *priv,
 			return err;
 	}
 
+	if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) {
+		err = mlx5e_health_queue_dump(priv, fmsg, ptp_ch->rq.rqn, "PTP RQ");
+		if (err)
+			return err;
+	}
+
 	return devlink_fmsg_arr_pair_nest_end(fmsg);
 }
 
-- 
cgit v1.2.3


From c809cf665e28449ba7fec93089718bc8751a52cc Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Thu, 21 Jan 2021 09:32:52 +0200
Subject: net/mlx5e: Cleanup Flow Steering level

Flow Steering levels are used to determine the order between the tables.
As of today, each one of these tables follows the TTC table, and hijacks
its traffic, and cannot be combined together for now. Putting them in
the same layer better reflects the situation.

Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/fs.h   | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index a16297e7e2ac..3dfec5943a33 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -138,10 +138,10 @@ enum {
 	MLX5E_TTC_FT_LEVEL,
 	MLX5E_INNER_TTC_FT_LEVEL,
 #ifdef CONFIG_MLX5_EN_TLS
-	MLX5E_ACCEL_FS_TCP_FT_LEVEL,
+	MLX5E_ACCEL_FS_TCP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
 #endif
 #ifdef CONFIG_MLX5_EN_ARFS
-	MLX5E_ARFS_FT_LEVEL,
+	MLX5E_ARFS_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
 #endif
 #ifdef CONFIG_MLX5_EN_IPSEC
 	MLX5E_ACCEL_FS_ESP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index f5517ea2f6be..d2c0e61527ab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -105,7 +105,7 @@
 #define ETHTOOL_PRIO_NUM_LEVELS 1
 #define ETHTOOL_NUM_PRIOS 11
 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS)
-/* Promiscuous, Vlan, mac, ttc, inner ttc, {aRFS/accel and esp/esp_err} */
+/* Promiscuous, Vlan, mac, ttc, inner ttc, {aRFS/accel/{esp, esp_err}} */
 #define KERNEL_NIC_PRIO_NUM_LEVELS 7
 #define KERNEL_NIC_NUM_PRIOS 1
 /* One more level for tc */
-- 
cgit v1.2.3


From 1c80bd6843881fbef7d198981ea0decc184954fd Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Thu, 14 Jan 2021 17:26:35 +0200
Subject: net/mlx5e: Introduce Flow Steering UDP API

Add a new FS API which captures the UDP traffic from the traffic
classifier into a dedicated FS table. This API handles both UDP over
IPv4 and IPv6 in the same manner. The tables (one for UDPv4 and another
for UDPv6) consist of a group matching the UDP destination port and a
must-be-last group which contains a default rule redirecting the
unmatched packets back to the RSS logic.

Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en/fs.h    |   4 +
 .../mellanox/mlx5/core/en/fs_tt_redirect.c         | 343 +++++++++++++++++++++
 .../mellanox/mlx5/core/en/fs_tt_redirect.h         |  19 ++
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  |   2 +-
 5 files changed, 368 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 8cb2625472c3..9cf7de72df52 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -27,7 +27,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
 		en_selftest.o en/port.o en/monitor_stats.o en/health.o \
 		en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \
 		en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \
-		en/qos.o en/trap.o
+		en/qos.o en/trap.o en/fs_tt_redirect.o
 
 #
 # Netdev extra
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index 3dfec5943a33..496f5b9fe070 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -137,6 +137,7 @@ enum {
 	MLX5E_L2_FT_LEVEL,
 	MLX5E_TTC_FT_LEVEL,
 	MLX5E_INNER_TTC_FT_LEVEL,
+	MLX5E_FS_TT_UDP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
 #ifdef CONFIG_MLX5_EN_TLS
 	MLX5E_ACCEL_FS_TCP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
 #endif
@@ -241,6 +242,8 @@ static inline int mlx5e_arfs_disable(struct mlx5e_priv *priv) {	return -EOPNOTSU
 struct mlx5e_accel_fs_tcp;
 #endif
 
+struct mlx5e_fs_udp;
+
 struct mlx5e_flow_steering {
 	struct mlx5_flow_namespace      *ns;
 	struct mlx5_flow_namespace      *egress_ns;
@@ -259,6 +262,7 @@ struct mlx5e_flow_steering {
 #ifdef CONFIG_MLX5_EN_TLS
 	struct mlx5e_accel_fs_tcp      *accel_tcp;
 #endif
+	struct mlx5e_fs_udp            *udp;
 };
 
 struct ttc_params {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
new file mode 100644
index 000000000000..c37a7a7929c3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
@@ -0,0 +1,343 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#include <linux/netdevice.h>
+#include "en/fs_tt_redirect.h"
+#include "fs_core.h"
+
+enum fs_udp_type {
+	FS_IPV4_UDP,
+	FS_IPV6_UDP,
+	FS_UDP_NUM_TYPES,
+};
+
+struct mlx5e_fs_udp {
+	struct mlx5e_flow_table tables[FS_UDP_NUM_TYPES];
+	struct mlx5_flow_handle *default_rules[FS_UDP_NUM_TYPES];
+	int ref_cnt;
+};
+
+static char *fs_udp_type2str(enum fs_udp_type i)
+{
+	switch (i) {
+	case FS_IPV4_UDP:
+		return "UDP v4";
+	default: /* FS_IPV6_UDP */
+		return "UDP v6";
+	}
+}
+
+static enum mlx5e_traffic_types fs_udp2tt(enum fs_udp_type i)
+{
+	switch (i) {
+	case FS_IPV4_UDP:
+		return MLX5E_TT_IPV4_UDP;
+	default: /* FS_IPV6_UDP */
+		return MLX5E_TT_IPV6_UDP;
+	}
+}
+
+static enum fs_udp_type tt2fs_udp(enum mlx5e_traffic_types i)
+{
+	switch (i) {
+	case MLX5E_TT_IPV4_UDP:
+		return FS_IPV4_UDP;
+	case MLX5E_TT_IPV6_UDP:
+		return FS_IPV6_UDP;
+	default:
+		return FS_UDP_NUM_TYPES;
+	}
+}
+
+void mlx5e_fs_tt_redirect_del_rule(struct mlx5_flow_handle *rule)
+{
+	mlx5_del_flow_rules(rule);
+}
+
+static void fs_udp_set_dport_flow(struct mlx5_flow_spec *spec, enum fs_udp_type type,
+				  u16 udp_dport)
+{
+	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
+	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_UDP);
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
+	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version,
+		 type == FS_IPV4_UDP ? 4 : 6);
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.udp_dport);
+	MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport, udp_dport);
+}
+
+struct mlx5_flow_handle *
+mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv,
+				  enum mlx5e_traffic_types ttc_type,
+				  u32 tir_num, u16 d_port)
+{
+	enum fs_udp_type type = tt2fs_udp(ttc_type);
+	struct mlx5_flow_destination dest = {};
+	struct mlx5_flow_table *ft = NULL;
+	MLX5_DECLARE_FLOW_ACT(flow_act);
+	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_spec *spec;
+	struct mlx5e_fs_udp *fs_udp;
+	int err;
+
+	if (type == FS_UDP_NUM_TYPES)
+		return ERR_PTR(-EINVAL);
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return ERR_PTR(-ENOMEM);
+
+	fs_udp = priv->fs.udp;
+	ft = fs_udp->tables[type].t;
+
+	fs_udp_set_dport_flow(spec, type, d_port);
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+	dest.tir_num = tir_num;
+
+	rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+	kvfree(spec);
+
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		netdev_err(priv->netdev, "%s: add %s rule failed, err %d\n",
+			   __func__, fs_udp_type2str(type), err);
+	}
+	return rule;
+}
+
+static int fs_udp_add_default_rule(struct mlx5e_priv *priv, enum fs_udp_type type)
+{
+	struct mlx5e_flow_table *fs_udp_t;
+	struct mlx5_flow_destination dest;
+	MLX5_DECLARE_FLOW_ACT(flow_act);
+	struct mlx5_flow_handle *rule;
+	struct mlx5e_fs_udp *fs_udp;
+	int err;
+
+	fs_udp = priv->fs.udp;
+	fs_udp_t = &fs_udp->tables[type];
+
+	dest = mlx5e_ttc_get_default_dest(priv, fs_udp2tt(type));
+	rule = mlx5_add_flow_rules(fs_udp_t->t, NULL, &flow_act, &dest, 1);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		netdev_err(priv->netdev,
+			   "%s: add default rule failed, fs type=%d, err %d\n",
+			   __func__, type, err);
+		return err;
+	}
+
+	fs_udp->default_rules[type] = rule;
+	return 0;
+}
+
+#define MLX5E_FS_UDP_NUM_GROUPS	(2)
+#define MLX5E_FS_UDP_GROUP1_SIZE	(BIT(16))
+#define MLX5E_FS_UDP_GROUP2_SIZE	(BIT(0))
+#define MLX5E_FS_UDP_TABLE_SIZE		(MLX5E_FS_UDP_GROUP1_SIZE +\
+					 MLX5E_FS_UDP_GROUP2_SIZE)
+static int fs_udp_create_groups(struct mlx5e_flow_table *ft, enum fs_udp_type type)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	void *outer_headers_c;
+	int ix = 0;
+	u32 *in;
+	int err;
+	u8 *mc;
+
+	ft->g = kcalloc(MLX5E_FS_UDP_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if  (!in || !ft->g) {
+		kfree(ft->g);
+		kvfree(in);
+		return -ENOMEM;
+	}
+
+	mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+	outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers);
+	MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol);
+	MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_version);
+
+	switch (type) {
+	case FS_IPV4_UDP:
+	case FS_IPV6_UDP:
+		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport);
+		break;
+	default:
+		err = -EINVAL;
+		goto out;
+	}
+	/* Match on udp protocol, Ipv4/6 and dport */
+	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5E_FS_UDP_GROUP1_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+	if (IS_ERR(ft->g[ft->num_groups]))
+		goto err;
+	ft->num_groups++;
+
+	/* Default Flow Group */
+	memset(in, 0, inlen);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5E_FS_UDP_GROUP2_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+	if (IS_ERR(ft->g[ft->num_groups]))
+		goto err;
+	ft->num_groups++;
+
+	kvfree(in);
+	return 0;
+
+err:
+	err = PTR_ERR(ft->g[ft->num_groups]);
+	ft->g[ft->num_groups] = NULL;
+out:
+	kvfree(in);
+
+	return err;
+}
+
+static int fs_udp_create_table(struct mlx5e_priv *priv, enum fs_udp_type type)
+{
+	struct mlx5e_flow_table *ft = &priv->fs.udp->tables[type];
+	struct mlx5_flow_table_attr ft_attr = {};
+	int err;
+
+	ft->num_groups = 0;
+
+	ft_attr.max_fte = MLX5E_FS_UDP_TABLE_SIZE;
+	ft_attr.level = MLX5E_FS_TT_UDP_FT_LEVEL;
+	ft_attr.prio = MLX5E_NIC_PRIO;
+
+	ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
+	if (IS_ERR(ft->t)) {
+		err = PTR_ERR(ft->t);
+		ft->t = NULL;
+		return err;
+	}
+
+	netdev_dbg(priv->netdev, "Created fs %s table id %u level %u\n",
+		   fs_udp_type2str(type), ft->t->id, ft->t->level);
+
+	err = fs_udp_create_groups(ft, type);
+	if (err)
+		goto err;
+
+	err = fs_udp_add_default_rule(priv, type);
+	if (err)
+		goto err;
+
+	return 0;
+
+err:
+	mlx5e_destroy_flow_table(ft);
+	return err;
+}
+
+static void fs_udp_destroy_table(struct mlx5e_fs_udp *fs_udp, int i)
+{
+	if (IS_ERR_OR_NULL(fs_udp->tables[i].t))
+		return;
+
+	mlx5_del_flow_rules(fs_udp->default_rules[i]);
+	mlx5e_destroy_flow_table(&fs_udp->tables[i]);
+	fs_udp->tables[i].t = NULL;
+}
+
+static int fs_udp_disable(struct mlx5e_priv *priv)
+{
+	int err, i;
+
+	for (i = 0; i < FS_UDP_NUM_TYPES; i++) {
+		/* Modify ttc rules destination to point back to the indir TIRs */
+		err = mlx5e_ttc_fwd_default_dest(priv, fs_udp2tt(i));
+		if (err) {
+			netdev_err(priv->netdev,
+				   "%s: modify ttc[%d] default destination failed, err(%d)\n",
+				   __func__, fs_udp2tt(i), err);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static int fs_udp_enable(struct mlx5e_priv *priv)
+{
+	struct mlx5_flow_destination dest = {};
+	int err, i;
+
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	for (i = 0; i < FS_UDP_NUM_TYPES; i++) {
+		dest.ft = priv->fs.udp->tables[i].t;
+
+		/* Modify ttc rules destination to point on the accel_fs FTs */
+		err = mlx5e_ttc_fwd_dest(priv, fs_udp2tt(i), &dest);
+		if (err) {
+			netdev_err(priv->netdev,
+				   "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
+				   __func__, fs_udp2tt(i), err);
+			return err;
+		}
+	}
+	return 0;
+}
+
+void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_priv *priv)
+{
+	struct mlx5e_fs_udp *fs_udp = priv->fs.udp;
+	int i;
+
+	if (!fs_udp)
+		return;
+
+	if (--fs_udp->ref_cnt)
+		return;
+
+	fs_udp_disable(priv);
+
+	for (i = 0; i < FS_UDP_NUM_TYPES; i++)
+		fs_udp_destroy_table(fs_udp, i);
+
+	kfree(fs_udp);
+	priv->fs.udp = NULL;
+}
+
+int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_priv *priv)
+{
+	int i, err;
+
+	if (priv->fs.udp) {
+		priv->fs.udp->ref_cnt++;
+		return 0;
+	}
+
+	priv->fs.udp = kzalloc(sizeof(*priv->fs.udp), GFP_KERNEL);
+	if (!priv->fs.udp)
+		return -ENOMEM;
+
+	for (i = 0; i < FS_UDP_NUM_TYPES; i++) {
+		err = fs_udp_create_table(priv, i);
+		if (err)
+			goto err_destroy_tables;
+	}
+
+	err = fs_udp_enable(priv);
+	if (err)
+		goto err_destroy_tables;
+
+	priv->fs.udp->ref_cnt = 1;
+
+	return 0;
+
+err_destroy_tables:
+	while (--i >= 0)
+		fs_udp_destroy_table(priv->fs.udp, i);
+
+	kfree(priv->fs.udp);
+	priv->fs.udp = NULL;
+	return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h
new file mode 100644
index 000000000000..b840d5cafb57
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5E_FS_TT_REDIRECT_H__
+#define __MLX5E_FS_TT_REDIRECT_H__
+
+#include "en.h"
+#include "en/fs.h"
+
+void mlx5e_fs_tt_redirect_del_rule(struct mlx5_flow_handle *rule);
+
+/* UDP traffic type redirect */
+struct mlx5_flow_handle *
+mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv,
+				  enum mlx5e_traffic_types ttc_type,
+				  u32 tir_num, u16 d_port);
+void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_priv *priv);
+int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_priv *priv);
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index d2c0e61527ab..b9ebacdcbdfe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -105,7 +105,7 @@
 #define ETHTOOL_PRIO_NUM_LEVELS 1
 #define ETHTOOL_NUM_PRIOS 11
 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS)
-/* Promiscuous, Vlan, mac, ttc, inner ttc, {aRFS/accel/{esp, esp_err}} */
+/* Promiscuous, Vlan, mac, ttc, inner ttc, {UDP/aRFS/accel/{esp, esp_err}} */
 #define KERNEL_NIC_PRIO_NUM_LEVELS 7
 #define KERNEL_NIC_NUM_PRIOS 1
 /* One more level for tc */
-- 
cgit v1.2.3


From 0f575c20bf0686caf3d82d6c626c2e1e4a4c36e6 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Sun, 17 Jan 2021 08:58:04 +0200
Subject: net/mlx5e: Introduce Flow Steering ANY API

Add a new FS API which captures the ANY traffic from the traffic
classifier into a dedicated FS table. The table consists of a group
matching the ethertype and a must-be-last group which contains a default
rule redirecting the unmatched packets back to the RSS logic.

Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/fs.h    |   3 +
 .../mellanox/mlx5/core/en/fs_tt_redirect.c         | 262 +++++++++++++++++++++
 .../mellanox/mlx5/core/en/fs_tt_redirect.h         |   7 +
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  |   2 +-
 4 files changed, 273 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index 496f5b9fe070..c61fbb9c6fa8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -138,6 +138,7 @@ enum {
 	MLX5E_TTC_FT_LEVEL,
 	MLX5E_INNER_TTC_FT_LEVEL,
 	MLX5E_FS_TT_UDP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
+	MLX5E_FS_TT_ANY_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
 #ifdef CONFIG_MLX5_EN_TLS
 	MLX5E_ACCEL_FS_TCP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1,
 #endif
@@ -243,6 +244,7 @@ struct mlx5e_accel_fs_tcp;
 #endif
 
 struct mlx5e_fs_udp;
+struct mlx5e_fs_any;
 
 struct mlx5e_flow_steering {
 	struct mlx5_flow_namespace      *ns;
@@ -263,6 +265,7 @@ struct mlx5e_flow_steering {
 	struct mlx5e_accel_fs_tcp      *accel_tcp;
 #endif
 	struct mlx5e_fs_udp            *udp;
+	struct mlx5e_fs_any            *any;
 };
 
 struct ttc_params {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
index c37a7a7929c3..909faa6c89d7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
@@ -17,6 +17,12 @@ struct mlx5e_fs_udp {
 	int ref_cnt;
 };
 
+struct mlx5e_fs_any {
+	struct mlx5e_flow_table table;
+	struct mlx5_flow_handle *default_rule;
+	int ref_cnt;
+};
+
 static char *fs_udp_type2str(enum fs_udp_type i)
 {
 	switch (i) {
@@ -341,3 +347,259 @@ err_destroy_tables:
 	priv->fs.udp = NULL;
 	return err;
 }
+
+static void fs_any_set_ethertype_flow(struct mlx5_flow_spec *spec, u16 ether_type)
+{
+	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
+	MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ether_type);
+}
+
+struct mlx5_flow_handle *
+mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_priv *priv,
+				  u32 tir_num, u16 ether_type)
+{
+	struct mlx5_flow_destination dest = {};
+	struct mlx5_flow_table *ft = NULL;
+	MLX5_DECLARE_FLOW_ACT(flow_act);
+	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_spec *spec;
+	struct mlx5e_fs_any *fs_any;
+	int err;
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return ERR_PTR(-ENOMEM);
+
+	fs_any = priv->fs.any;
+	ft = fs_any->table.t;
+
+	fs_any_set_ethertype_flow(spec, ether_type);
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+	dest.tir_num = tir_num;
+
+	rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
+	kvfree(spec);
+
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		netdev_err(priv->netdev, "%s: add ANY rule failed, err %d\n",
+			   __func__, err);
+	}
+	return rule;
+}
+
+static int fs_any_add_default_rule(struct mlx5e_priv *priv)
+{
+	struct mlx5e_flow_table *fs_any_t;
+	struct mlx5_flow_destination dest;
+	MLX5_DECLARE_FLOW_ACT(flow_act);
+	struct mlx5_flow_handle *rule;
+	struct mlx5e_fs_any *fs_any;
+	int err;
+
+	fs_any = priv->fs.any;
+	fs_any_t = &fs_any->table;
+
+	dest = mlx5e_ttc_get_default_dest(priv, MLX5E_TT_ANY);
+	rule = mlx5_add_flow_rules(fs_any_t->t, NULL, &flow_act, &dest, 1);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		netdev_err(priv->netdev,
+			   "%s: add default rule failed, fs type=ANY, err %d\n",
+			   __func__, err);
+		return err;
+	}
+
+	fs_any->default_rule = rule;
+	return 0;
+}
+
+#define MLX5E_FS_ANY_NUM_GROUPS	(2)
+#define MLX5E_FS_ANY_GROUP1_SIZE	(BIT(16))
+#define MLX5E_FS_ANY_GROUP2_SIZE	(BIT(0))
+#define MLX5E_FS_ANY_TABLE_SIZE		(MLX5E_FS_ANY_GROUP1_SIZE +\
+					 MLX5E_FS_ANY_GROUP2_SIZE)
+
+static int fs_any_create_groups(struct mlx5e_flow_table *ft)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	void *outer_headers_c;
+	int ix = 0;
+	u32 *in;
+	int err;
+	u8 *mc;
+
+	ft->g = kcalloc(MLX5E_FS_UDP_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if  (!in || !ft->g) {
+		kfree(ft->g);
+		kvfree(in);
+		return -ENOMEM;
+	}
+
+	/* Match on ethertype */
+	mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+	outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers);
+	MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ethertype);
+	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5E_FS_ANY_GROUP1_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+	if (IS_ERR(ft->g[ft->num_groups]))
+		goto err;
+	ft->num_groups++;
+
+	/* Default Flow Group */
+	memset(in, 0, inlen);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5E_FS_ANY_GROUP2_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+	if (IS_ERR(ft->g[ft->num_groups]))
+		goto err;
+	ft->num_groups++;
+
+	kvfree(in);
+	return 0;
+
+err:
+	err = PTR_ERR(ft->g[ft->num_groups]);
+	ft->g[ft->num_groups] = NULL;
+	kvfree(in);
+
+	return err;
+}
+
+static int fs_any_create_table(struct mlx5e_priv *priv)
+{
+	struct mlx5e_flow_table *ft = &priv->fs.any->table;
+	struct mlx5_flow_table_attr ft_attr = {};
+	int err;
+
+	ft->num_groups = 0;
+
+	ft_attr.max_fte = MLX5E_FS_UDP_TABLE_SIZE;
+	ft_attr.level = MLX5E_FS_TT_ANY_FT_LEVEL;
+	ft_attr.prio = MLX5E_NIC_PRIO;
+
+	ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
+	if (IS_ERR(ft->t)) {
+		err = PTR_ERR(ft->t);
+		ft->t = NULL;
+		return err;
+	}
+
+	netdev_dbg(priv->netdev, "Created fs ANY table id %u level %u\n",
+		   ft->t->id, ft->t->level);
+
+	err = fs_any_create_groups(ft);
+	if (err)
+		goto err;
+
+	err = fs_any_add_default_rule(priv);
+	if (err)
+		goto err;
+
+	return 0;
+
+err:
+	mlx5e_destroy_flow_table(ft);
+	return err;
+}
+
+static int fs_any_disable(struct mlx5e_priv *priv)
+{
+	int err;
+
+	/* Modify ttc rules destination to point back to the indir TIRs */
+	err = mlx5e_ttc_fwd_default_dest(priv, MLX5E_TT_ANY);
+	if (err) {
+		netdev_err(priv->netdev,
+			   "%s: modify ttc[%d] default destination failed, err(%d)\n",
+			   __func__, MLX5E_TT_ANY, err);
+		return err;
+	}
+	return 0;
+}
+
+static int fs_any_enable(struct mlx5e_priv *priv)
+{
+	struct mlx5_flow_destination dest = {};
+	int err;
+
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	dest.ft = priv->fs.any->table.t;
+
+	/* Modify ttc rules destination to point on the accel_fs FTs */
+	err = mlx5e_ttc_fwd_dest(priv, MLX5E_TT_ANY, &dest);
+	if (err) {
+		netdev_err(priv->netdev,
+			   "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
+			   __func__, MLX5E_TT_ANY, err);
+		return err;
+	}
+	return 0;
+}
+
+static void fs_any_destroy_table(struct mlx5e_fs_any *fs_any)
+{
+	if (IS_ERR_OR_NULL(fs_any->table.t))
+		return;
+
+	mlx5_del_flow_rules(fs_any->default_rule);
+	mlx5e_destroy_flow_table(&fs_any->table);
+	fs_any->table.t = NULL;
+}
+
+void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_priv *priv)
+{
+	struct mlx5e_fs_any *fs_any = priv->fs.any;
+
+	if (!fs_any)
+		return;
+
+	if (--fs_any->ref_cnt)
+		return;
+
+	fs_any_disable(priv);
+
+	fs_any_destroy_table(fs_any);
+
+	kfree(fs_any);
+	priv->fs.any = NULL;
+}
+
+int mlx5e_fs_tt_redirect_any_create(struct mlx5e_priv *priv)
+{
+	int err;
+
+	if (priv->fs.any) {
+		priv->fs.any->ref_cnt++;
+		return 0;
+	}
+
+	priv->fs.any = kzalloc(sizeof(*priv->fs.any), GFP_KERNEL);
+	if (!priv->fs.any)
+		return -ENOMEM;
+
+	err = fs_any_create_table(priv);
+	if (err)
+		return err;
+
+	err = fs_any_enable(priv);
+	if (err)
+		goto err_destroy_table;
+
+	priv->fs.any->ref_cnt = 1;
+
+	return 0;
+
+err_destroy_table:
+	fs_any_destroy_table(priv->fs.any);
+
+	kfree(priv->fs.any);
+	priv->fs.any = NULL;
+	return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h
index b840d5cafb57..8385df24eb99 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h
@@ -16,4 +16,11 @@ mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv,
 				  u32 tir_num, u16 d_port);
 void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_priv *priv);
 int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_priv *priv);
+
+/* ANY traffic type redirect*/
+struct mlx5_flow_handle *
+mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_priv *priv,
+				  u32 tir_num, u16 ether_type);
+void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_priv *priv);
+int mlx5e_fs_tt_redirect_any_create(struct mlx5e_priv *priv);
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index b9ebacdcbdfe..dbd910656574 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -105,7 +105,7 @@
 #define ETHTOOL_PRIO_NUM_LEVELS 1
 #define ETHTOOL_NUM_PRIOS 11
 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS)
-/* Promiscuous, Vlan, mac, ttc, inner ttc, {UDP/aRFS/accel/{esp, esp_err}} */
+/* Promiscuous, Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}} */
 #define KERNEL_NIC_PRIO_NUM_LEVELS 7
 #define KERNEL_NIC_NUM_PRIOS 1
 /* One more level for tc */
-- 
cgit v1.2.3


From e5fe49465d463ca6c029869e42e9ba5e895cce02 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Tue, 16 Feb 2021 12:32:48 +0200
Subject: net/mlx5e: Add PTP Flow Steering support

When opening PTP channel with MLX5E_PTP_STATE_RX set, add the
corresponding flow steering rules. Capture UDP packets with destination
port 319 and L2 packets with ethertype 0x88F7 and steer them into the RQ
of the PTP channel.
Add API that manages the flow steering rules to be used in the following
patches via safe_reopen_channels mechanism.

Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/fs.h  |   2 +
 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 134 ++++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h |   3 +
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c  |   8 ++
 4 files changed, 146 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index c61fbb9c6fa8..d53fb1e31b05 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -245,6 +245,7 @@ struct mlx5e_accel_fs_tcp;
 
 struct mlx5e_fs_udp;
 struct mlx5e_fs_any;
+struct mlx5e_ptp_fs;
 
 struct mlx5e_flow_steering {
 	struct mlx5_flow_namespace      *ns;
@@ -266,6 +267,7 @@ struct mlx5e_flow_steering {
 #endif
 	struct mlx5e_fs_udp            *udp;
 	struct mlx5e_fs_any            *any;
+	struct mlx5e_ptp_fs            *ptp_fs;
 };
 
 struct ttc_params {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index c1c41c8656dc..995a0947b2d5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -1,9 +1,18 @@
 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 // Copyright (c) 2020 Mellanox Technologies
 
+#include <linux/ptp_classify.h>
 #include "en/ptp.h"
 #include "en/txrx.h"
 #include "en/params.h"
+#include "en/fs_tt_redirect.h"
+
+struct mlx5e_ptp_fs {
+	struct mlx5_flow_handle *l2_rule;
+	struct mlx5_flow_handle *udp_v4_rule;
+	struct mlx5_flow_handle *udp_v6_rule;
+	bool valid;
+};
 
 #define MLX5E_PTP_CHANNEL_IX 0
 
@@ -573,6 +582,78 @@ static int mlx5e_ptp_set_state(struct mlx5e_ptp *c, struct mlx5e_params *params)
 	return bitmap_empty(c->state, MLX5E_PTP_STATE_NUM_STATES) ? -EINVAL : 0;
 }
 
+static void mlx5e_ptp_rx_unset_fs(struct mlx5e_priv *priv)
+{
+	struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs;
+
+	if (!ptp_fs->valid)
+		return;
+
+	mlx5e_fs_tt_redirect_del_rule(ptp_fs->l2_rule);
+	mlx5e_fs_tt_redirect_any_destroy(priv);
+
+	mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v6_rule);
+	mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v4_rule);
+	mlx5e_fs_tt_redirect_udp_destroy(priv);
+	ptp_fs->valid = false;
+}
+
+static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
+{
+	struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs;
+	struct mlx5_flow_handle *rule;
+	u32 tirn = priv->ptp_tir.tirn;
+	int err;
+
+	if (ptp_fs->valid)
+		return 0;
+
+	err = mlx5e_fs_tt_redirect_udp_create(priv);
+	if (err)
+		goto out_free;
+
+	rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5E_TT_IPV4_UDP,
+						 tirn, PTP_EV_PORT);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		goto out_destroy_fs_udp;
+	}
+	ptp_fs->udp_v4_rule = rule;
+
+	rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5E_TT_IPV6_UDP,
+						 tirn, PTP_EV_PORT);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		goto out_destroy_udp_v4_rule;
+	}
+	ptp_fs->udp_v6_rule = rule;
+
+	err = mlx5e_fs_tt_redirect_any_create(priv);
+	if (err)
+		goto out_destroy_udp_v6_rule;
+
+	rule = mlx5e_fs_tt_redirect_any_add_rule(priv, tirn, ETH_P_1588);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		goto out_destroy_fs_any;
+	}
+	ptp_fs->l2_rule = rule;
+	ptp_fs->valid = true;
+
+	return 0;
+
+out_destroy_fs_any:
+	mlx5e_fs_tt_redirect_any_destroy(priv);
+out_destroy_udp_v6_rule:
+	mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v6_rule);
+out_destroy_udp_v4_rule:
+	mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v4_rule);
+out_destroy_fs_udp:
+	mlx5e_fs_tt_redirect_udp_destroy(priv);
+out_free:
+	return err;
+}
+
 int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
 		   u8 lag_port, struct mlx5e_ptp **cp)
 {
@@ -645,8 +726,10 @@ void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c)
 		for (tc = 0; tc < c->num_tc; tc++)
 			mlx5e_activate_txqsq(&c->ptpsq[tc].txqsq);
 	}
-	if (test_bit(MLX5E_PTP_STATE_RX, c->state))
+	if (test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+		mlx5e_ptp_rx_set_fs(c->priv);
 		mlx5e_activate_rq(&c->rq);
+	}
 }
 
 void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c)
@@ -672,3 +755,52 @@ int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn)
 	*rqn = c->rq.rqn;
 	return 0;
 }
+
+int mlx5e_ptp_alloc_rx_fs(struct mlx5e_priv *priv)
+{
+	struct mlx5e_ptp_fs *ptp_fs;
+
+	if (!priv->profile->rx_ptp_support)
+		return 0;
+
+	ptp_fs = kzalloc(sizeof(*ptp_fs), GFP_KERNEL);
+	if (!ptp_fs)
+		return -ENOMEM;
+
+	priv->fs.ptp_fs = ptp_fs;
+	return 0;
+}
+
+void mlx5e_ptp_free_rx_fs(struct mlx5e_priv *priv)
+{
+	struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs;
+
+	if (!priv->profile->rx_ptp_support)
+		return;
+
+	mlx5e_ptp_rx_unset_fs(priv);
+	kfree(ptp_fs);
+}
+
+int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set)
+{
+	struct mlx5e_ptp *c = priv->channels.ptp;
+
+	if (!priv->profile->rx_ptp_support)
+		return 0;
+
+	if (set) {
+		if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+			netdev_WARN_ONCE(priv->netdev, "Don't try to add PTP RX-FS rules");
+			return -EINVAL;
+		}
+		return mlx5e_ptp_rx_set_fs(priv);
+	}
+	/* set == false */
+	if (c && test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+		netdev_WARN_ONCE(priv->netdev, "Don't try to remove PTP RX-FS rules");
+		return -EINVAL;
+	}
+	mlx5e_ptp_rx_unset_fs(priv);
+	return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
index 460b167887bc..ab935cce952b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
@@ -49,6 +49,9 @@ void mlx5e_ptp_close(struct mlx5e_ptp *c);
 void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c);
 void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c);
 int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn);
+int mlx5e_ptp_alloc_rx_fs(struct mlx5e_priv *priv);
+void mlx5e_ptp_free_rx_fs(struct mlx5e_priv *priv);
+int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set);
 
 enum {
 	MLX5E_SKB_CB_CQE_HWTSTAMP  = BIT(0),
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index cf1d3c9c88af..98f0b857947e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -38,6 +38,7 @@
 #include "en.h"
 #include "en_rep.h"
 #include "lib/mpfs.h"
+#include "en/ptp.h"
 
 static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
 				  struct mlx5e_l2_rule *ai, int type);
@@ -1792,10 +1793,16 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
 		goto err_destroy_l2_table;
 	}
 
+	err = mlx5e_ptp_alloc_rx_fs(priv);
+	if (err)
+		goto err_destory_vlan_table;
+
 	mlx5e_ethtool_init_steering(priv);
 
 	return 0;
 
+err_destory_vlan_table:
+	mlx5e_destroy_vlan_table(priv);
 err_destroy_l2_table:
 	mlx5e_destroy_l2_table(priv);
 err_destroy_ttc_table:
@@ -1810,6 +1817,7 @@ err_destroy_arfs_tables:
 
 void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv)
 {
+	mlx5e_ptp_free_rx_fs(priv);
 	mlx5e_destroy_vlan_table(priv);
 	mlx5e_destroy_l2_table(priv);
 	mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
-- 
cgit v1.2.3


From 960fbfe222a490622cfb3949061b20f83ef46fb0 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Wed, 20 Jan 2021 16:59:27 +0200
Subject: net/mlx5e: Allow coexistence of CQE compression and HW TS PTP

Update setting HW time-stamp to allow coexistence with CQE compression.
Turn on RX PTP indication and try to reopen the channels. On success,
coexistence with CQE compression is enabled. Otherwise, fall-back to
turning off CQE compression.

Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h      |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c  |  3 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 43 ++++++++++++++++-------
 3 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index f31b5ccc27d0..2ad12ee9d100 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -269,6 +269,7 @@ struct mlx5e_params {
 	struct mlx5e_xsk *xsk;
 	unsigned int sw_mtu;
 	int hard_mtu;
+	bool ptp_rx;
 };
 
 enum {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index 995a0947b2d5..72e7dd6d78c0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -579,6 +579,9 @@ static int mlx5e_ptp_set_state(struct mlx5e_ptp *c, struct mlx5e_params *params)
 	if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_TX_PORT_TS))
 		__set_bit(MLX5E_PTP_STATE_TX, c->state);
 
+	if (params->ptp_rx)
+		__set_bit(MLX5E_PTP_STATE_RX, c->state);
+
 	return bitmap_empty(c->state, MLX5E_PTP_STATE_NUM_STATES) ? -EINVAL : 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 7cf12342afe6..c6227725733a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2087,7 +2087,7 @@ int mlx5e_open_channels(struct mlx5e_priv *priv,
 			goto err_close_channels;
 	}
 
-	if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS)) {
+	if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS) || chs->params.ptp_rx) {
 		err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp);
 		if (err)
 			goto err_close_channels;
@@ -2688,6 +2688,8 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
 	nch = priv->channels.params.num_channels;
 	ntc = priv->channels.params.num_tc;
 	num_rxqs = nch * priv->profile->rq_groups;
+	if (priv->channels.params.ptp_rx)
+		num_rxqs++;
 
 	mlx5e_netdev_set_tcs(netdev, nch, ntc);
 
@@ -3968,9 +3970,18 @@ static int mlx5e_change_nic_mtu(struct net_device *netdev, int new_mtu)
 	return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu_ctx);
 }
 
+static int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx)
+{
+	bool set  = *(bool *)ctx;
+
+	return mlx5e_ptp_rx_manage_fs(priv, set);
+}
+
 int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
 {
+	struct mlx5e_channels new_channels = {};
 	struct hwtstamp_config config;
+	bool rx_cqe_compress_def;
 	int err;
 
 	if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) ||
@@ -3990,11 +4001,13 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
 	}
 
 	mutex_lock(&priv->state_lock);
+	new_channels.params = priv->channels.params;
+	rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def;
+
 	/* RX HW timestamp */
 	switch (config.rx_filter) {
 	case HWTSTAMP_FILTER_NONE:
-		/* Reset CQE compression to Admin default */
-		mlx5e_modify_rx_cqe_compression_locked(priv, priv->channels.params.rx_cqe_compress_def);
+		new_channels.params.ptp_rx = false;
 		break;
 	case HWTSTAMP_FILTER_ALL:
 	case HWTSTAMP_FILTER_SOME:
@@ -4011,15 +4024,7 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
 	case HWTSTAMP_FILTER_PTP_V2_SYNC:
 	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
 	case HWTSTAMP_FILTER_NTP_ALL:
-		/* Disable CQE compression */
-		if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS))
-			netdev_warn(priv->netdev, "Disabling RX cqe compression\n");
-		err = mlx5e_modify_rx_cqe_compression_locked(priv, false);
-		if (err) {
-			netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err);
-			mutex_unlock(&priv->state_lock);
-			return err;
-		}
+		new_channels.params.ptp_rx = rx_cqe_compress_def;
 		config.rx_filter = HWTSTAMP_FILTER_ALL;
 		break;
 	default:
@@ -4027,6 +4032,20 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
 		return -ERANGE;
 	}
 
+	if (new_channels.params.ptp_rx == priv->channels.params.ptp_rx)
+		goto out;
+
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+		priv->channels.params = new_channels.params;
+		goto out;
+	}
+	err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_ptp_rx_manage_fs_ctx,
+					 &new_channels.params.ptp_rx);
+	if (err) {
+		mutex_unlock(&priv->state_lock);
+		return err;
+	}
+out:
 	memcpy(&priv->tstamp, &config, sizeof(config));
 	mutex_unlock(&priv->state_lock);
 
-- 
cgit v1.2.3


From 885b8cfb161ed3d8f41e7b37e14d35bd8d3aaf6b Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Wed, 13 Jan 2021 09:54:22 +0200
Subject: net/mlx5e: Update ethtool setting of CQE compression

Remove restriction blocking configuration of CQE compression when PTP rx
filter is set. Instead turn on indication for RX PTP, and try to reopen
the channels.

Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h         |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 14 ++++++++------
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c    |  2 +-
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 2ad12ee9d100..b425b4a539bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -1023,6 +1023,7 @@ int mlx5e_num_channels_changed(struct mlx5e_priv *priv);
 int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context);
 void mlx5e_activate_priv_channels(struct mlx5e_priv *priv);
 void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv);
+int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx);
 
 void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
 				   int num_channels);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index cf319f06521d..964558086ad6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -34,6 +34,7 @@
 #include "en/port.h"
 #include "en/params.h"
 #include "en/xsk/pool.h"
+#include "en/ptp.h"
 #include "lib/clock.h"
 
 void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
@@ -1865,13 +1866,19 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
 
 	new_channels.params = priv->channels.params;
 	MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val);
+	if (priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE)
+		new_channels.params.ptp_rx = new_val;
 
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
 		priv->channels.params = new_channels.params;
 		return 0;
 	}
 
-	err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL);
+	if (new_channels.params.ptp_rx == priv->channels.params.ptp_rx)
+		err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL);
+	else
+		err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_ptp_rx_manage_fs_ctx,
+						 &new_channels.params.ptp_rx);
 	if (err)
 		return err;
 
@@ -1892,11 +1899,6 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev,
 	if (!MLX5_CAP_GEN(mdev, cqe_compression))
 		return -EOPNOTSUPP;
 
-	if (enable && priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) {
-		netdev_err(netdev, "Can't enable cqe compression while timestamping is enabled.\n");
-		return -EINVAL;
-	}
-
 	err = mlx5e_modify_rx_cqe_compression_locked(priv, enable);
 	if (err)
 		return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index c6227725733a..db2942b61fd5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3970,7 +3970,7 @@ static int mlx5e_change_nic_mtu(struct net_device *netdev, int new_mtu)
 	return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu_ctx);
 }
 
-static int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx)
+int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx)
 {
 	bool set  = *(bool *)ctx;
 
-- 
cgit v1.2.3


From 8560b0e7633b97be53a7209fb1ca3efeaae7aa88 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Sun, 28 Feb 2021 09:38:35 +0100
Subject: MAINTAINERS: remove Dan Murphy from m_can and tcan4x5x

Dan Murphy's email address at ti.com doesn't work anymore, mails
bounce with:

| 550 Invalid recipient <dmurphy@ti.com> (#5.1.1)

For now remove all CAN related entries of Dan from the Maintainers
file.

Link: https://lore.kernel.org/r/20210228094218.40015-1-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 MAINTAINERS | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 11177892c373..45c400e9d5fc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10910,7 +10910,6 @@ T:	git git://linuxtv.org/media_tree.git
 F:	drivers/media/radio/radio-maxiradio*
 
 MCAN MMIO DEVICE DRIVER
-M:	Dan Murphy <dmurphy@ti.com>
 M:	Pankaj Sharma <pankj.sharma@samsung.com>
 L:	linux-can@vger.kernel.org
 S:	Maintained
@@ -17983,13 +17982,6 @@ L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:	Odd Fixes
 F:	sound/soc/codecs/tas571x*
 
-TI TCAN4X5X DEVICE DRIVER
-M:	Dan Murphy <dmurphy@ti.com>
-L:	linux-can@vger.kernel.org
-S:	Maintained
-F:	Documentation/devicetree/bindings/net/can/tcan4x5x.txt
-F:	drivers/net/can/m_can/tcan4x5x*
-
 TI TRF7970A NFC DRIVER
 M:	Mark Greer <mgreer@animalcreek.com>
 L:	linux-wireless@vger.kernel.org
-- 
cgit v1.2.3


From ba23dc6dcab5da1d421a8811f7ed0bc40a4efabb Mon Sep 17 00:00:00 2001
From: Pankaj Sharma <pankj.sharma@samsung.com>
Date: Thu, 18 Mar 2021 16:56:34 +0530
Subject: MAINTAINERS: Update MCAN MMIO device driver maintainer

Update Chandrasekar Ramakrishnan as maintainer for mcan mmio device driver as I
will be moving to a different role.

Signed-off-by: Pankaj Sharma <pankj.sharma@samsung.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 45c400e9d5fc..217c7470bfa9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10910,7 +10910,7 @@ T:	git git://linuxtv.org/media_tree.git
 F:	drivers/media/radio/radio-maxiradio*
 
 MCAN MMIO DEVICE DRIVER
-M:	Pankaj Sharma <pankj.sharma@samsung.com>
+M:	Chandrasekar Ramakrishnan <rcsekar@samsung.com>
 L:	linux-can@vger.kernel.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/net/can/bosch,m_can.yaml
-- 
cgit v1.2.3


From 7119d7864bc5a02e1ec31497a16666ffafcc2465 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Wed, 10 Feb 2021 08:42:04 +0100
Subject: can: dev: always create TX echo skb

So far the creation of the TX echo skb was optional and can be
controlled by the local sender of a CAN frame.

It turns out that the TX echo CAN skb can be piggybacked to carry
information in the driver from the TX- to the TX-complete handler.

Several drivers already use the return value of
can_get_echo_skb() (which is the length of the data field in the CAN
frame) for their number of transferred bytes statistics. The
statistics are not working if CAN echo skbs are disabled.

Another use case is to calculate and set the CAN frame length on the
wire, which is needed for BQL support in both the TX and TX-completion
handler.

For now in can_put_echo_skb(), which is called from the TX handler,
the skb carrying the CAN frame is discarded if no TX echo is
requested, leading to the above illustrated problems.

This patch changes the can_put_echo_skb() function, so that the echo
skb is always generated. If the sender requests no echo, the echo skb
is consumed in __can_get_echo_skb() without being passed into the RX
handler of the networking stack, but the CAN data length and CAN frame
length information is properly returned.

Link: https://lore.kernel.org/r/20210309211904.3348700-1-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev/skb.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/dev/skb.c b/drivers/net/can/dev/skb.c
index 6a64fe410987..22b0472a5fad 100644
--- a/drivers/net/can/dev/skb.c
+++ b/drivers/net/can/dev/skb.c
@@ -45,7 +45,7 @@ int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev,
 	BUG_ON(idx >= priv->echo_skb_max);
 
 	/* check flag whether this packet has to be looped back */
-	if (!(dev->flags & IFF_ECHO) || skb->pkt_type != PACKET_LOOPBACK ||
+	if (!(dev->flags & IFF_ECHO) ||
 	    (skb->protocol != htons(ETH_P_CAN) &&
 	     skb->protocol != htons(ETH_P_CANFD))) {
 		kfree_skb(skb);
@@ -58,7 +58,6 @@ int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev,
 			return -ENOMEM;
 
 		/* make settings for echo to reduce code in irq context */
-		skb->pkt_type = PACKET_BROADCAST;
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 		skb->dev = dev;
 
@@ -111,6 +110,13 @@ __can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr,
 
 		priv->echo_skb[idx] = NULL;
 
+		if (skb->pkt_type == PACKET_LOOPBACK) {
+			skb->pkt_type = PACKET_BROADCAST;
+		} else {
+			dev_consume_skb_any(skb);
+			return NULL;
+		}
+
 		return skb;
 	}
 
-- 
cgit v1.2.3


From 4168d079aa41498639b2c64b4583375bcdf360d9 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Fri, 19 Mar 2021 15:08:13 +0100
Subject: can: dev: can_free_echo_skb(): don't crash the kernel if
 can_priv::echo_skb is accessed out of bounds

A out of bounds access to "struct can_priv::echo_skb" leads to a
kernel crash. Better print a sensible warning message instead and try
to recover.

This patch is similar to:

| e7a6994d043a ("can: dev: __can_get_echo_skb(): Don't crash the kernel
|               if can_priv::echo_skb is accessed out of bounds")

Link: https://lore.kernel.org/r/20210319142700.305648-2-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev/skb.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/can/dev/skb.c b/drivers/net/can/dev/skb.c
index 22b0472a5fad..2256391ddbb3 100644
--- a/drivers/net/can/dev/skb.c
+++ b/drivers/net/can/dev/skb.c
@@ -157,7 +157,11 @@ void can_free_echo_skb(struct net_device *dev, unsigned int idx)
 {
 	struct can_priv *priv = netdev_priv(dev);
 
-	BUG_ON(idx >= priv->echo_skb_max);
+	if (idx >= priv->echo_skb_max) {
+		netdev_err(dev, "%s: BUG! Trying to access can_priv::echo_skb out of bounds (%u/max %u)\n",
+			   __func__, idx, priv->echo_skb_max);
+		return;
+	}
 
 	if (priv->echo_skb[idx]) {
 		dev_kfree_skb_any(priv->echo_skb[idx]);
-- 
cgit v1.2.3


From f318482a1c57315d0efccd2861f153f55c2117c6 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Fri, 19 Mar 2021 15:21:32 +0100
Subject: can: dev: can_free_echo_skb(): extend to return can frame length

In order to implement byte queue limits (bql) in CAN drivers, the
length of the CAN frame needs to be passed into the networking stack
even if the transmission failed for some reason.

To avoid to calculate this length twice, extend can_free_echo_skb() to
return that value. Convert all users of this function, too.

This patch is the natural extension of commit:

| 9420e1d495e2 ("can: dev: can_get_echo_skb(): extend to return can
|                frame length")

Link: https://lore.kernel.org/r/20210319142700.305648-3-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev/skb.c                        | 11 +++++++++--
 drivers/net/can/grcan.c                          |  2 +-
 drivers/net/can/m_can/m_can.c                    |  2 +-
 drivers/net/can/rcar/rcar_can.c                  |  2 +-
 drivers/net/can/rcar/rcar_canfd.c                |  2 +-
 drivers/net/can/sja1000/sja1000.c                |  2 +-
 drivers/net/can/spi/hi311x.c                     |  2 +-
 drivers/net/can/spi/mcp251x.c                    |  2 +-
 drivers/net/can/usb/ems_usb.c                    |  2 +-
 drivers/net/can/usb/esd_usb2.c                   |  4 ++--
 drivers/net/can/usb/gs_usb.c                     |  2 +-
 drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c |  2 +-
 drivers/net/can/usb/mcba_usb.c                   |  2 +-
 drivers/net/can/usb/peak_usb/pcan_usb_core.c     |  2 +-
 drivers/net/can/usb/ucan.c                       |  6 +++---
 drivers/net/can/usb/usb_8dev.c                   |  2 +-
 include/linux/can/skb.h                          |  3 ++-
 17 files changed, 29 insertions(+), 21 deletions(-)

diff --git a/drivers/net/can/dev/skb.c b/drivers/net/can/dev/skb.c
index 2256391ddbb3..387c0bc0fb9c 100644
--- a/drivers/net/can/dev/skb.c
+++ b/drivers/net/can/dev/skb.c
@@ -153,7 +153,8 @@ EXPORT_SYMBOL_GPL(can_get_echo_skb);
  *
  * The function is typically called when TX failed.
  */
-void can_free_echo_skb(struct net_device *dev, unsigned int idx)
+void can_free_echo_skb(struct net_device *dev, unsigned int idx,
+		       unsigned int *frame_len_ptr)
 {
 	struct can_priv *priv = netdev_priv(dev);
 
@@ -164,7 +165,13 @@ void can_free_echo_skb(struct net_device *dev, unsigned int idx)
 	}
 
 	if (priv->echo_skb[idx]) {
-		dev_kfree_skb_any(priv->echo_skb[idx]);
+		struct sk_buff *skb = priv->echo_skb[idx];
+		struct can_skb_priv *can_skb_priv = can_skb_prv(skb);
+
+		if (frame_len_ptr)
+			*frame_len_ptr = can_skb_priv->frame_len;
+
+		dev_kfree_skb_any(skb);
 		priv->echo_skb[idx] = NULL;
 	}
 }
diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c
index 4a8453290530..78e27940b2af 100644
--- a/drivers/net/can/grcan.c
+++ b/drivers/net/can/grcan.c
@@ -520,7 +520,7 @@ static int catch_up_echo_skb(struct net_device *dev, int budget, bool echo)
 			can_get_echo_skb(dev, i, NULL);
 		} else {
 			/* For cleanup of untransmitted messages */
-			can_free_echo_skb(dev, i);
+			can_free_echo_skb(dev, i, NULL);
 		}
 
 		priv->eskbp = grcan_ring_add(priv->eskbp, GRCAN_MSG_SIZE,
diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index 0c8d36bc668c..2ae3da16cbfe 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -425,7 +425,7 @@ static void m_can_clean(struct net_device *net)
 			putidx = ((m_can_read(cdev, M_CAN_TXFQS) &
 				   TXFQS_TFQPI_MASK) >> TXFQS_TFQPI_SHIFT);
 
-		can_free_echo_skb(cdev->net, putidx);
+		can_free_echo_skb(cdev->net, putidx, NULL);
 		cdev->tx_skb = NULL;
 	}
 }
diff --git a/drivers/net/can/rcar/rcar_can.c b/drivers/net/can/rcar/rcar_can.c
index 4870c4ea190a..00e4533c8bdd 100644
--- a/drivers/net/can/rcar/rcar_can.c
+++ b/drivers/net/can/rcar/rcar_can.c
@@ -217,7 +217,7 @@ static void tx_failure_cleanup(struct net_device *ndev)
 	int i;
 
 	for (i = 0; i < RCAR_CAN_FIFO_DEPTH; i++)
-		can_free_echo_skb(ndev, i);
+		can_free_echo_skb(ndev, i, NULL);
 }
 
 static void rcar_can_error(struct net_device *ndev)
diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c
index d8d233e62990..311e6ca3bdc4 100644
--- a/drivers/net/can/rcar/rcar_canfd.c
+++ b/drivers/net/can/rcar/rcar_canfd.c
@@ -617,7 +617,7 @@ static void rcar_canfd_tx_failure_cleanup(struct net_device *ndev)
 	u32 i;
 
 	for (i = 0; i < RCANFD_FIFO_DEPTH; i++)
-		can_free_echo_skb(ndev, i);
+		can_free_echo_skb(ndev, i, NULL);
 }
 
 static int rcar_canfd_reset_controller(struct rcar_canfd_global *gpriv)
diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c
index 9e86488ba55f..3fad54646746 100644
--- a/drivers/net/can/sja1000/sja1000.c
+++ b/drivers/net/can/sja1000/sja1000.c
@@ -525,7 +525,7 @@ irqreturn_t sja1000_interrupt(int irq, void *dev_id)
 			if (priv->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT &&
 			    !(status & SR_TCS)) {
 				stats->tx_errors++;
-				can_free_echo_skb(dev, 0);
+				can_free_echo_skb(dev, 0, NULL);
 			} else {
 				/* transmission complete */
 				stats->tx_bytes +=
diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c
index c3e020c90111..6f5d6d04a8b9 100644
--- a/drivers/net/can/spi/hi311x.c
+++ b/drivers/net/can/spi/hi311x.c
@@ -179,7 +179,7 @@ static void hi3110_clean(struct net_device *net)
 		net->stats.tx_errors++;
 	dev_kfree_skb(priv->tx_skb);
 	if (priv->tx_len)
-		can_free_echo_skb(priv->net, 0);
+		can_free_echo_skb(priv->net, 0, NULL);
 	priv->tx_skb = NULL;
 	priv->tx_len = 0;
 }
diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c
index f69fb4238a65..80ab1593ca31 100644
--- a/drivers/net/can/spi/mcp251x.c
+++ b/drivers/net/can/spi/mcp251x.c
@@ -276,7 +276,7 @@ static void mcp251x_clean(struct net_device *net)
 		net->stats.tx_errors++;
 	dev_kfree_skb(priv->tx_skb);
 	if (priv->tx_len)
-		can_free_echo_skb(priv->net, 0);
+		can_free_echo_skb(priv->net, 0, NULL);
 	priv->tx_skb = NULL;
 	priv->tx_len = 0;
 }
diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index 18f40eb20360..5af69787d9d5 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -807,7 +807,7 @@ static netdev_tx_t ems_usb_start_xmit(struct sk_buff *skb, struct net_device *ne
 
 	err = usb_submit_urb(urb, GFP_ATOMIC);
 	if (unlikely(err)) {
-		can_free_echo_skb(netdev, context->echo_index);
+		can_free_echo_skb(netdev, context->echo_index, NULL);
 
 		usb_unanchor_urb(urb);
 		usb_free_coherent(dev->udev, size, buf, urb->transfer_dma);
diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c
index 562acbf454fd..65b58f8fc328 100644
--- a/drivers/net/can/usb/esd_usb2.c
+++ b/drivers/net/can/usb/esd_usb2.c
@@ -360,7 +360,7 @@ static void esd_usb2_tx_done_msg(struct esd_usb2_net_priv *priv,
 		can_get_echo_skb(netdev, context->echo_index, NULL);
 	} else {
 		stats->tx_errors++;
-		can_free_echo_skb(netdev, context->echo_index);
+		can_free_echo_skb(netdev, context->echo_index, NULL);
 	}
 
 	/* Release context */
@@ -793,7 +793,7 @@ static netdev_tx_t esd_usb2_start_xmit(struct sk_buff *skb,
 
 	err = usb_submit_urb(urb, GFP_ATOMIC);
 	if (err) {
-		can_free_echo_skb(netdev, context->echo_index);
+		can_free_echo_skb(netdev, context->echo_index, NULL);
 
 		atomic_dec(&priv->active_tx_jobs);
 		usb_unanchor_urb(urb);
diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index a00dc1904415..5e892bef46b0 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -533,7 +533,7 @@ static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb,
 	if (unlikely(rc)) {			/* usb send failed */
 		atomic_dec(&dev->active_tx_urbs);
 
-		can_free_echo_skb(netdev, idx);
+		can_free_echo_skb(netdev, idx, NULL);
 		gs_free_tx_context(txc);
 
 		usb_unanchor_urb(urb);
diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
index 4e97da8434ab..90ebcae13409 100644
--- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
+++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
@@ -593,7 +593,7 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb,
 	if (unlikely(err)) {
 		spin_lock_irqsave(&priv->tx_contexts_lock, flags);
 
-		can_free_echo_skb(netdev, context->echo_index);
+		can_free_echo_skb(netdev, context->echo_index, NULL);
 		context->echo_index = dev->max_tx_urbs;
 		--priv->active_tx_contexts;
 		netif_wake_queue(netdev);
diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c
index 1f649d178010..029e77dfa773 100644
--- a/drivers/net/can/usb/mcba_usb.c
+++ b/drivers/net/can/usb/mcba_usb.c
@@ -364,7 +364,7 @@ static netdev_tx_t mcba_usb_start_xmit(struct sk_buff *skb,
 	return NETDEV_TX_OK;
 
 xmit_failed:
-	can_free_echo_skb(priv->netdev, ctx->ndx);
+	can_free_echo_skb(priv->netdev, ctx->ndx, NULL);
 	mcba_usb_free_ctx(ctx);
 	dev_kfree_skb(skb);
 	stats->tx_dropped++;
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
index 573b11559d73..29227b5851fe 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
@@ -371,7 +371,7 @@ static netdev_tx_t peak_usb_ndo_start_xmit(struct sk_buff *skb,
 
 	err = usb_submit_urb(urb, GFP_ATOMIC);
 	if (err) {
-		can_free_echo_skb(netdev, context->echo_index);
+		can_free_echo_skb(netdev, context->echo_index, NULL);
 
 		usb_unanchor_urb(urb);
 
diff --git a/drivers/net/can/usb/ucan.c b/drivers/net/can/usb/ucan.c
index fa403c080871..11fddedc36d4 100644
--- a/drivers/net/can/usb/ucan.c
+++ b/drivers/net/can/usb/ucan.c
@@ -675,7 +675,7 @@ static void ucan_tx_complete_msg(struct ucan_priv *up,
 			can_get_echo_skb(up->netdev, echo_index, NULL);
 		} else {
 			up->netdev->stats.tx_dropped++;
-			can_free_echo_skb(up->netdev, echo_index);
+			can_free_echo_skb(up->netdev, echo_index, NULL);
 		}
 		spin_unlock_irqrestore(&up->echo_skb_lock, flags);
 	}
@@ -843,7 +843,7 @@ static void ucan_write_bulk_callback(struct urb *urb)
 
 		/* update counters an cleanup */
 		spin_lock_irqsave(&up->echo_skb_lock, flags);
-		can_free_echo_skb(up->netdev, context - up->context_array);
+		can_free_echo_skb(up->netdev, context - up->context_array, NULL);
 		spin_unlock_irqrestore(&up->echo_skb_lock, flags);
 
 		up->netdev->stats.tx_dropped++;
@@ -1157,7 +1157,7 @@ static netdev_tx_t ucan_start_xmit(struct sk_buff *skb,
 		 * frees the skb
 		 */
 		spin_lock_irqsave(&up->echo_skb_lock, flags);
-		can_free_echo_skb(up->netdev, echo_index);
+		can_free_echo_skb(up->netdev, echo_index, NULL);
 		spin_unlock_irqrestore(&up->echo_skb_lock, flags);
 
 		if (ret == -ENODEV) {
diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c
index e8c42430a4fc..b6e7ef0d5bc6 100644
--- a/drivers/net/can/usb/usb_8dev.c
+++ b/drivers/net/can/usb/usb_8dev.c
@@ -691,7 +691,7 @@ nofreecontext:
 	return NETDEV_TX_BUSY;
 
 failed:
-	can_free_echo_skb(netdev, context->echo_index);
+	can_free_echo_skb(netdev, context->echo_index, NULL);
 
 	usb_unanchor_urb(urb);
 	usb_free_coherent(priv->udev, size, buf, urb->transfer_dma);
diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h
index d438eb058069..d311bc369a39 100644
--- a/include/linux/can/skb.h
+++ b/include/linux/can/skb.h
@@ -23,7 +23,8 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx,
 				   u8 *len_ptr, unsigned int *frame_len_ptr);
 unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx,
 			      unsigned int *frame_len_ptr);
-void can_free_echo_skb(struct net_device *dev, unsigned int idx);
+void can_free_echo_skb(struct net_device *dev, unsigned int idx,
+		       unsigned int *frame_len_ptr);
 struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf);
 struct sk_buff *alloc_canfd_skb(struct net_device *dev,
 				struct canfd_frame **cfd);
-- 
cgit v1.2.3


From 289ea9e4ae595545e736a63ccaadba65f880e9a4 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Wed, 24 Feb 2021 09:20:04 +0900
Subject: can: add new CAN FD bittiming parameters: Transmitter Delay
 Compensation (TDC)

At high bit rates, the propagation delay from the TX pin to the RX pin
of the transceiver causes measurement errors: the sample point on the
RX pin might occur on the previous bit.

This issue is addressed in ISO 11898-1 section 11.3.3 "Transmitter
delay compensation" (TDC).

This patch adds two new structures: can_tdc and can_tdc_const in order
to implement this TDC.

The structures are then added to can_priv.

A controller supports TDC if an only if can_priv::tdc_const is not
NULL.

TDC is active if and only if:
  - fd flag is on
  - can_priv::tdc.tdco is not zero.
It is the driver responsibility to check those two conditions are met.

No new controller modes are introduced (i.e. no CAN_CTRL_MODE_TDC) in
order not to be redundant with above logic.

The names of the parameters are chosen to match existing CAN
controllers specification. References:
  - Bosch C_CAN FD8:
https://www.bosch-semiconductors.com/media/ip_modules/pdf_2/c_can_fd8/users_manual_c_can_fd8_r210_1.pdf
  - Microchip CAN FD Controller Module:
http://ww1.microchip.com/downloads/en/DeviceDoc/MCP251XXFD-CAN-FD-Controller-Module-Family-Reference-Manual-20005678B.pdf
  - SAM E701/S70/V70/V71 Family:
https://www.mouser.com/datasheet/2/268/60001527A-1284321.pdf

Link: https://lore.kernel.org/r/20210224002008.4158-2-mailhol.vincent@wanadoo.fr
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/bittiming.h | 65 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/can/dev.h       |  3 ++
 2 files changed, 68 insertions(+)

diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h
index 707575c668f4..b31a49f19b47 100644
--- a/include/linux/can/bittiming.h
+++ b/include/linux/can/bittiming.h
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /* Copyright (c) 2020 Pengutronix, Marc Kleine-Budde <kernel@pengutronix.de>
+ * Copyright (c) 2021 Vincent Mailhol <mailhol.vincent@wanadoo.fr>
  */
 
 #ifndef _CAN_BITTIMING_H
@@ -10,6 +11,70 @@
 
 #define CAN_SYNC_SEG 1
 
+/*
+ * struct can_tdc - CAN FD Transmission Delay Compensation parameters
+ *
+ * At high bit rates, the propagation delay from the TX pin to the RX
+ * pin of the transceiver causes measurement errors: the sample point
+ * on the RX pin might occur on the previous bit.
+ *
+ * To solve this issue, ISO 11898-1 introduces in section 11.3.3
+ * "Transmitter delay compensation" a SSP (Secondary Sample Point)
+ * equal to the distance, in time quanta, from the start of the bit
+ * time on the TX pin to the actual measurement on the RX pin.
+ *
+ * This structure contains the parameters to calculate that SSP.
+ *
+ * @tdcv: Transmitter Delay Compensation Value. Distance, in time
+ *	quanta, from when the bit is sent on the TX pin to when it is
+ *	received on the RX pin of the transmitter. Possible options:
+ *
+ *	  O: automatic mode. The controller dynamically measure @tdcv
+ *	  for each transmitted CAN FD frame.
+ *
+ *	  Other values: manual mode. Use the fixed provided value.
+ *
+ * @tdco: Transmitter Delay Compensation Offset. Offset value, in time
+ *	quanta, defining the distance between the start of the bit
+ *	reception on the RX pin of the transceiver and the SSP
+ *	position such as SSP = @tdcv + @tdco.
+ *
+ *	If @tdco is zero, then TDC is disabled and both @tdcv and
+ *	@tdcf should be ignored.
+ *
+ * @tdcf: Transmitter Delay Compensation Filter window. Defines the
+ *	minimum value for the SSP position in time quanta. If SSP is
+ *	less than @tdcf, then no delay compensations occur and the
+ *	normal sampling point is used instead. The feature is enabled
+ *	if and only if @tdcv is set to zero (automatic mode) and @tdcf
+ *	is configured to a value greater than @tdco.
+ */
+struct can_tdc {
+	u32 tdcv;
+	u32 tdco;
+	u32 tdcf;
+};
+
+/*
+ * struct can_tdc_const - CAN hardware-dependent constant for
+ *	Transmission Delay Compensation
+ *
+ * @tdcv_max: Transmitter Delay Compensation Value maximum value.
+ *	Should be set to zero if the controller does not support
+ *	manual mode for tdcv.
+ * @tdco_max: Transmitter Delay Compensation Offset maximum value.
+ *	Should not be zero. If the controller does not support TDC,
+ *	then the pointer to this structure should be NULL.
+ * @tdcf_max: Transmitter Delay Compensation Filter window maximum
+ *	value. Should be set to zero if the controller does not
+ *	support this feature.
+ */
+struct can_tdc_const {
+	u32 tdcv_max;
+	u32 tdco_max;
+	u32 tdcf_max;
+};
+
 #ifdef CONFIG_CAN_CALC_BITTIMING
 int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt,
 		       const struct can_bittiming_const *btc);
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index ac4d83a1ab81..4795da0eb949 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -42,6 +42,9 @@ struct can_priv {
 	struct can_bittiming bittiming, data_bittiming;
 	const struct can_bittiming_const *bittiming_const,
 		*data_bittiming_const;
+	struct can_tdc tdc;
+	const struct can_tdc_const *tdc_const;
+
 	const u16 *termination_const;
 	unsigned int termination_const_cnt;
 	u16 termination;
-- 
cgit v1.2.3


From 4c9258dd26fdb3bacb35e767fa55c9a03a78a08e Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Wed, 24 Feb 2021 09:20:05 +0900
Subject: can: dev: reorder struct can_priv members for better packing

Save eight bytes of holes on x86-64 architectures by reordering struct
can_priv members.

Before:

$ pahole -C can_priv drivers/net/can/dev/dev.o
struct can_priv {
	struct net_device *        dev;                  /*     0     8 */
	struct can_device_stats    can_stats;            /*     8    24 */
	struct can_bittiming       bittiming;            /*    32    32 */
	/* --- cacheline 1 boundary (64 bytes) --- */
	struct can_bittiming       data_bittiming;       /*    64    32 */
	const struct can_bittiming_const  * bittiming_const; /*    96     8 */
	const struct can_bittiming_const  * data_bittiming_const; /*   104     8 */
	struct can_tdc             tdc;                  /*   112    12 */

	/* XXX 4 bytes hole, try to pack */

	/* --- cacheline 2 boundary (128 bytes) --- */
	const struct can_tdc_const  * tdc_const;         /*   128     8 */
	const u16  *               termination_const;    /*   136     8 */
	unsigned int               termination_const_cnt; /*   144     4 */
	u16                        termination;          /*   148     2 */

	/* XXX 2 bytes hole, try to pack */

	const u32  *               bitrate_const;        /*   152     8 */
	unsigned int               bitrate_const_cnt;    /*   160     4 */

	/* XXX 4 bytes hole, try to pack */

	const u32  *               data_bitrate_const;   /*   168     8 */
	unsigned int               data_bitrate_const_cnt; /*   176     4 */
	u32                        bitrate_max;          /*   180     4 */
	struct can_clock           clock;                /*   184     4 */
	enum can_state             state;                /*   188     4 */
	/* --- cacheline 3 boundary (192 bytes) --- */
	u32                        ctrlmode;             /*   192     4 */
	u32                        ctrlmode_supported;   /*   196     4 */
	u32                        ctrlmode_static;      /*   200     4 */
	int                        restart_ms;           /*   204     4 */
	struct delayed_work        restart_work;         /*   208   168 */

	/* XXX last struct has 4 bytes of padding */

	/* --- cacheline 5 boundary (320 bytes) was 56 bytes ago --- */
	int                        (*do_set_bittiming)(struct net_device *); /*   376     8 */
	/* --- cacheline 6 boundary (384 bytes) --- */
	int                        (*do_set_data_bittiming)(struct net_device *); /*   384     8 */
	int                        (*do_set_mode)(struct net_device *, enum can_mode); /*   392     8 */
	int                        (*do_set_termination)(struct net_device *, u16); /*   400     8 */
	int                        (*do_get_state)(const struct net_device  *, enum can_state *); /*   408     8 */
	int                        (*do_get_berr_counter)(const struct net_device  *, struct can_berr_counter *); /*   416     8 */
	unsigned int               echo_skb_max;         /*   424     4 */

	/* XXX 4 bytes hole, try to pack */

	struct sk_buff * *         echo_skb;             /*   432     8 */

	/* size: 440, cachelines: 7, members: 31 */
	/* sum members: 426, holes: 4, sum holes: 14 */
	/* paddings: 1, sum paddings: 4 */
	/* last cacheline: 56 bytes */
};

After:

$ pahole -C can_priv drivers/net/can/dev/dev.o
struct can_priv {
	struct net_device *        dev;                  /*     0     8 */
	struct can_device_stats    can_stats;            /*     8    24 */
	const struct can_bittiming_const  * bittiming_const; /*    32     8 */
	const struct can_bittiming_const  * data_bittiming_const; /*    40     8 */
	struct can_bittiming       bittiming;            /*    48    32 */
	/* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */
	struct can_bittiming       data_bittiming;       /*    80    32 */
	const struct can_tdc_const  * tdc_const;         /*   112     8 */
	struct can_tdc             tdc;                  /*   120    12 */
	/* --- cacheline 2 boundary (128 bytes) was 4 bytes ago --- */
	unsigned int               bitrate_const_cnt;    /*   132     4 */
	const u32  *               bitrate_const;        /*   136     8 */
	const u32  *               data_bitrate_const;   /*   144     8 */
	unsigned int               data_bitrate_const_cnt; /*   152     4 */
	u32                        bitrate_max;          /*   156     4 */
	struct can_clock           clock;                /*   160     4 */
	unsigned int               termination_const_cnt; /*   164     4 */
	const u16  *               termination_const;    /*   168     8 */
	u16                        termination;          /*   176     2 */

	/* XXX 2 bytes hole, try to pack */

	enum can_state             state;                /*   180     4 */
	u32                        ctrlmode;             /*   184     4 */
	u32                        ctrlmode_supported;   /*   188     4 */
	/* --- cacheline 3 boundary (192 bytes) --- */
	u32                        ctrlmode_static;      /*   192     4 */
	int                        restart_ms;           /*   196     4 */
	struct delayed_work        restart_work;         /*   200   168 */

	/* XXX last struct has 4 bytes of padding */

	/* --- cacheline 5 boundary (320 bytes) was 48 bytes ago --- */
	int                        (*do_set_bittiming)(struct net_device *); /*   368     8 */
	int                        (*do_set_data_bittiming)(struct net_device *); /*   376     8 */
	/* --- cacheline 6 boundary (384 bytes) --- */
	int                        (*do_set_mode)(struct net_device *, enum can_mode); /*   384     8 */
	int                        (*do_set_termination)(struct net_device *, u16); /*   392     8 */
	int                        (*do_get_state)(const struct net_device  *, enum can_state *); /*   400     8 */
	int                        (*do_get_berr_counter)(const struct net_device  *, struct can_berr_counter *); /*   408     8 */
	unsigned int               echo_skb_max;         /*   416     4 */

	/* XXX 4 bytes hole, try to pack */

	struct sk_buff * *         echo_skb;             /*   424     8 */

	/* size: 432, cachelines: 7, members: 31 */
	/* sum members: 426, holes: 2, sum holes: 6 */
	/* paddings: 1, sum paddings: 4 */
	/* last cacheline: 48 bytes */
};

Link: https://lore.kernel.org/r/20210224002008.4158-3-mailhol.vincent@wanadoo.fr
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/dev.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 4795da0eb949..27b275e463da 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -39,22 +39,23 @@ struct can_priv {
 	struct net_device *dev;
 	struct can_device_stats can_stats;
 
-	struct can_bittiming bittiming, data_bittiming;
 	const struct can_bittiming_const *bittiming_const,
 		*data_bittiming_const;
-	struct can_tdc tdc;
+	struct can_bittiming bittiming, data_bittiming;
 	const struct can_tdc_const *tdc_const;
+	struct can_tdc tdc;
 
-	const u16 *termination_const;
-	unsigned int termination_const_cnt;
-	u16 termination;
-	const u32 *bitrate_const;
 	unsigned int bitrate_const_cnt;
+	const u32 *bitrate_const;
 	const u32 *data_bitrate_const;
 	unsigned int data_bitrate_const_cnt;
 	u32 bitrate_max;
 	struct can_clock clock;
 
+	unsigned int termination_const_cnt;
+	const u16 *termination_const;
+	u16 termination;
+
 	enum can_state state;
 
 	/* CAN controller features - see include/uapi/linux/can/netlink.h */
-- 
cgit v1.2.3


From cfd98c838cbea6d084830d841f06ebaf0bea36de Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Wed, 24 Feb 2021 09:20:06 +0900
Subject: can: netlink: move '=' operators back to previous line (checkpatch
 fix)

Fix the warning triggered by having an '=' at the beginning of the
line by moving it back to the previous line. Also replace all
indentations with a single space so that future entries can be more
easily added.

Extract of ./scripts/checkpatch.pl -f drivers/net/can/dev/netlink.c:

CHECK: Assignment operator '=' should be on the previous line
+       [IFLA_CAN_BITTIMING_CONST]
+                               = { .len = sizeof(struct can_bittiming_const) },

CHECK: Assignment operator '=' should be on the previous line
+       [IFLA_CAN_DATA_BITTIMING]
+                               = { .len = sizeof(struct can_bittiming) },

CHECK: Assignment operator '=' should be on the previous line
+       [IFLA_CAN_DATA_BITTIMING_CONST]
+                               = { .len = sizeof(struct can_bittiming_const) },

Link: https://lore.kernel.org/r/20210224002008.4158-4-mailhol.vincent@wanadoo.fr
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev/netlink.c | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c
index f5d79e6e5483..8443480a703d 100644
--- a/drivers/net/can/dev/netlink.c
+++ b/drivers/net/can/dev/netlink.c
@@ -8,20 +8,17 @@
 #include <net/rtnetlink.h>
 
 static const struct nla_policy can_policy[IFLA_CAN_MAX + 1] = {
-	[IFLA_CAN_STATE]	= { .type = NLA_U32 },
-	[IFLA_CAN_CTRLMODE]	= { .len = sizeof(struct can_ctrlmode) },
-	[IFLA_CAN_RESTART_MS]	= { .type = NLA_U32 },
-	[IFLA_CAN_RESTART]	= { .type = NLA_U32 },
-	[IFLA_CAN_BITTIMING]	= { .len = sizeof(struct can_bittiming) },
-	[IFLA_CAN_BITTIMING_CONST]
-				= { .len = sizeof(struct can_bittiming_const) },
-	[IFLA_CAN_CLOCK]	= { .len = sizeof(struct can_clock) },
-	[IFLA_CAN_BERR_COUNTER]	= { .len = sizeof(struct can_berr_counter) },
-	[IFLA_CAN_DATA_BITTIMING]
-				= { .len = sizeof(struct can_bittiming) },
-	[IFLA_CAN_DATA_BITTIMING_CONST]
-				= { .len = sizeof(struct can_bittiming_const) },
-	[IFLA_CAN_TERMINATION]	= { .type = NLA_U16 },
+	[IFLA_CAN_STATE] = { .type = NLA_U32 },
+	[IFLA_CAN_CTRLMODE] = { .len = sizeof(struct can_ctrlmode) },
+	[IFLA_CAN_RESTART_MS] = { .type = NLA_U32 },
+	[IFLA_CAN_RESTART] = { .type = NLA_U32 },
+	[IFLA_CAN_BITTIMING] = { .len = sizeof(struct can_bittiming) },
+	[IFLA_CAN_BITTIMING_CONST] = { .len = sizeof(struct can_bittiming_const) },
+	[IFLA_CAN_CLOCK] = { .len = sizeof(struct can_clock) },
+	[IFLA_CAN_BERR_COUNTER] = { .len = sizeof(struct can_berr_counter) },
+	[IFLA_CAN_DATA_BITTIMING] = { .len = sizeof(struct can_bittiming) },
+	[IFLA_CAN_DATA_BITTIMING_CONST]	= { .len = sizeof(struct can_bittiming_const) },
+	[IFLA_CAN_TERMINATION] = { .type = NLA_U16 },
 };
 
 static int can_validate(struct nlattr *tb[], struct nlattr *data[],
-- 
cgit v1.2.3


From c25cc7993243fdc00ab7e608e3764819538015ab Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Wed, 24 Feb 2021 09:20:08 +0900
Subject: can: bittiming: add calculation for CAN FD Transmitter Delay
 Compensation (TDC)

The logic for the tdco calculation is to just reuse the normal sample
point: tdco = sp. Because the sample point is expressed in tenth of
percent and the tdco is expressed in time quanta, a conversion is
needed.

At the end,
     ssp = tdcv + tdco
         = tdcv + sp.

Another popular method is to set tdco to the middle of the bit:
     tdc->tdco = can_bit_time(dbt) / 2
During benchmark tests, we could not find a clear advantages for one
of the two methods.

The tdco calculation is triggered each time the data_bittiming is
changed so that users relying on automated calculation can use the
netlink interface the exact same way without need of new parameters.
For example, a command such as:
	ip link set canX type can bitrate 500000 dbitrate 4000000 fd on
would trigger the calculation.

The user using CONFIG_CAN_CALC_BITTIMING who does not want automated
calculation needs to manually set tdco to zero.
For example with:
	ip link set canX type can tdco 0 bitrate 500000 dbitrate 4000000 fd on
(if the tdco parameter is provided in a previous command, it will be
overwritten).

If tdcv is set to zero (default), it is automatically calculated by
the transiver for each frame. As such, there is no code in the kernel
to calculate it.

tdcf has no automated calculation functions because we could not
figure out a formula for this parameter.

Link: https://lore.kernel.org/r/20210224002008.4158-6-mailhol.vincent@wanadoo.fr
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev/bittiming.c | 24 ++++++++++++++++++++++++
 drivers/net/can/dev/netlink.c   |  2 ++
 include/linux/can/bittiming.h   |  6 ++++++
 3 files changed, 32 insertions(+)

diff --git a/drivers/net/can/dev/bittiming.c b/drivers/net/can/dev/bittiming.c
index f7fe226bb395..2907e60c9a57 100644
--- a/drivers/net/can/dev/bittiming.c
+++ b/drivers/net/can/dev/bittiming.c
@@ -174,6 +174,30 @@ int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt,
 
 	return 0;
 }
+
+void can_calc_tdco(struct net_device *dev)
+{
+	struct can_priv *priv = netdev_priv(dev);
+	const struct can_bittiming *dbt = &priv->data_bittiming;
+	struct can_tdc *tdc = &priv->tdc;
+	const struct can_tdc_const *tdc_const = priv->tdc_const;
+
+	if (!tdc_const)
+		return;
+
+	/* As specified in ISO 11898-1 section 11.3.3 "Transmitter
+	 * delay compensation" (TDC) is only applicable if data BRP is
+	 * one or two.
+	 */
+	if (dbt->brp == 1 || dbt->brp == 2) {
+		/* Reuse "normal" sample point and convert it to time quanta */
+		u32 sample_point_in_tq = can_bit_time(dbt) * dbt->sample_point / 1000;
+
+		tdc->tdco = min(sample_point_in_tq, tdc_const->tdco_max);
+	} else {
+		tdc->tdco = 0;
+	}
+}
 #endif /* CONFIG_CAN_CALC_BITTIMING */
 
 /* Checks the validity of the specified bit-timing parameters prop_seg,
diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c
index 8443480a703d..e38c2566aff4 100644
--- a/drivers/net/can/dev/netlink.c
+++ b/drivers/net/can/dev/netlink.c
@@ -186,6 +186,8 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[],
 
 		memcpy(&priv->data_bittiming, &dbt, sizeof(dbt));
 
+		can_calc_tdco(dev);
+
 		if (priv->do_set_data_bittiming) {
 			/* Finally, set the bit-timing registers */
 			err = priv->do_set_data_bittiming(dev);
diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h
index b31a49f19b47..3c4cad7b52c0 100644
--- a/include/linux/can/bittiming.h
+++ b/include/linux/can/bittiming.h
@@ -78,6 +78,8 @@ struct can_tdc_const {
 #ifdef CONFIG_CAN_CALC_BITTIMING
 int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt,
 		       const struct can_bittiming_const *btc);
+
+void can_calc_tdco(struct net_device *dev);
 #else /* !CONFIG_CAN_CALC_BITTIMING */
 static inline int
 can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt,
@@ -86,6 +88,10 @@ can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt,
 	netdev_err(dev, "bit-timing calculation not available\n");
 	return -EINVAL;
 }
+
+static inline void can_calc_tdco(struct net_device *dev)
+{
+}
 #endif /* CONFIG_CAN_CALC_BITTIMING */
 
 int can_get_bittiming(struct net_device *dev, struct can_bittiming *bt,
-- 
cgit v1.2.3


From 1d7750760b70ba8b0e641146eee1b3a343d1b292 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Sat, 6 Mar 2021 14:40:40 +0900
Subject: can: bittiming: add CAN_KBPS, CAN_MBPS and CAN_MHZ macros

Add three macro to simplify the readability of big bit timing numbers:
  - CAN_KBPS: kilobits per second (one thousand)
  - CAN_MBPS: megabits per second (one million)
  - CAN_MHZ: megahertz per second (one million)

Example:
	u32 bitrate_max = 8 * CAN_MBPS;
	struct can_clock clock = {.freq = 80 * CAN_MHZ};
instead of:
	u32 bitrate_max = 8000000;
	struct can_clock clock = {.freq = 80000000};

Apply the new macro to driver/net/can/dev/bittiming.c.

Link: https://lore.kernel.org/r/20210306054040.76483-1-mailhol.vincent@wanadoo.fr
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev/bittiming.c | 4 ++--
 include/linux/can/bittiming.h   | 8 ++++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/dev/bittiming.c b/drivers/net/can/dev/bittiming.c
index 2907e60c9a57..f49170eadd54 100644
--- a/drivers/net/can/dev/bittiming.c
+++ b/drivers/net/can/dev/bittiming.c
@@ -81,9 +81,9 @@ int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt,
 	if (bt->sample_point) {
 		sample_point_nominal = bt->sample_point;
 	} else {
-		if (bt->bitrate > 800000)
+		if (bt->bitrate > 800 * CAN_KBPS)
 			sample_point_nominal = 750;
-		else if (bt->bitrate > 500000)
+		else if (bt->bitrate > 500 * CAN_KBPS)
 			sample_point_nominal = 800;
 		else
 			sample_point_nominal = 875;
diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h
index 3c4cad7b52c0..ae7a3411167c 100644
--- a/include/linux/can/bittiming.h
+++ b/include/linux/can/bittiming.h
@@ -11,6 +11,14 @@
 
 #define CAN_SYNC_SEG 1
 
+
+/* Kilobits and Megabits per second */
+#define CAN_KBPS 1000UL
+#define CAN_MBPS 1000000UL
+
+/* Megahertz */
+#define CAN_MHZ 1000000UL
+
 /*
  * struct can_tdc - CAN FD Transmission Delay Compensation parameters
  *
-- 
cgit v1.2.3


From 51894cbae49e8c8dba01ee9f7f5030d1f81f4fa9 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Sat, 13 Feb 2021 12:48:28 +0100
Subject: can: grcan: add missing Kconfig dependency to HAS_IOMEM

On ARCHs without IOMEM support the grcan driver fails to link due to
missing iomem functionality. This patch adds the missing Kconfig
dependency to HAS_IOMEM.

Link: https://lore.kernel.org/r/20210309140424.3331010-1-mkl@pengutronix.de
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index 1c28eade6bec..e355d3974977 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -103,7 +103,7 @@ config CAN_FLEXCAN
 
 config CAN_GRCAN
 	tristate "Aeroflex Gaisler GRCAN and GRHCAN CAN devices"
-	depends on OF && HAS_DMA
+	depends on OF && HAS_DMA && HAS_IOMEM
 	help
 	  Say Y here if you want to use Aeroflex Gaisler GRCAN or GRHCAN.
 	  Note that the driver supports little endian, even though little
-- 
cgit v1.2.3


From a3497afbe9268cf64e431e9808916f1318d26b3d Mon Sep 17 00:00:00 2001
From: Michal Simek <michal.simek@xilinx.com>
Date: Thu, 4 Feb 2021 13:42:48 +0100
Subject: can: xilinx_can: Simplify code by using dev_err_probe()

Use already prepared dev_err_probe() introduced by commit a787e5400a1c
("driver core: add device probe log helper").
It simplifies EPROBE_DEFER handling.

Also unify message format for similar error cases.

Link: https://lore.kernel.org/r/91af0945ed7397b08f1af0c829450620bd92b804.1612442564.git.michal.simek@xilinx.com
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/xilinx_can.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
index 37fa19c62d73..3b883e607d8b 100644
--- a/drivers/net/can/xilinx_can.c
+++ b/drivers/net/can/xilinx_can.c
@@ -1772,17 +1772,15 @@ static int xcan_probe(struct platform_device *pdev)
 	/* Getting the CAN can_clk info */
 	priv->can_clk = devm_clk_get(&pdev->dev, "can_clk");
 	if (IS_ERR(priv->can_clk)) {
-		if (PTR_ERR(priv->can_clk) != -EPROBE_DEFER)
-			dev_err(&pdev->dev, "Device clock not found.\n");
-		ret = PTR_ERR(priv->can_clk);
+		ret = dev_err_probe(&pdev->dev, PTR_ERR(priv->can_clk),
+				    "device clock not found\n");
 		goto err_free;
 	}
 
 	priv->bus_clk = devm_clk_get(&pdev->dev, devtype->bus_clk_name);
 	if (IS_ERR(priv->bus_clk)) {
-		if (PTR_ERR(priv->bus_clk) != -EPROBE_DEFER)
-			dev_err(&pdev->dev, "bus clock not found\n");
-		ret = PTR_ERR(priv->bus_clk);
+		ret = dev_err_probe(&pdev->dev, PTR_ERR(priv->bus_clk),
+				    "bus clock not found\n");
 		goto err_free;
 	}
 
-- 
cgit v1.2.3


From 27868a8fc1d0ba7abeac3e24b1a723c21c76581b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 4 Feb 2021 17:26:13 +0100
Subject: can: ucan: fix alignment constraints

struct ucan_message_in contains member with 4-byte alignment
but is itself marked as unaligned, which triggers a warning:

drivers/net/can/usb/ucan.c:249:1: warning: alignment 1 of 'struct ucan_message_in' is less than 4 [-Wpacked-not-aligned]

Mark the outer structure to have the same alignment as the inner
one.

Link: https://lore.kernel.org/r/20210204162625.3099392-1-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/ucan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/usb/ucan.c b/drivers/net/can/usb/ucan.c
index 11fddedc36d4..1679cbe45ded 100644
--- a/drivers/net/can/usb/ucan.c
+++ b/drivers/net/can/usb/ucan.c
@@ -246,7 +246,7 @@ struct ucan_message_in {
 		 */
 		struct ucan_tx_complete_entry_t can_tx_complete_msg[0];
 	} __aligned(0x4) msg;
-} __packed;
+} __packed __aligned(0x4);
 
 /* Macros to calculate message lengths */
 #define UCAN_OUT_HDR_SIZE offsetof(struct ucan_message_out, msg)
-- 
cgit v1.2.3


From cfe2a4ca1e0691c3e1f899e04e883c3d584e89fd Mon Sep 17 00:00:00 2001
From: Stephane Grosjean <s.grosjean@peak-system.com>
Date: Tue, 9 Mar 2021 09:21:28 +0100
Subject: can: peak_usb: pcan_usb_pro_encode_msg(): use macros for flags
 instead of plain integers

This patch replaces the plain integers used for flags in
pcan_usb_pro_encode_msg() by macros which are already defined.

Link: https://lore.kernel.org/r/20210309082128.23125-4-s.grosjean@peak-system.com
Signed-off-by: Stephane Grosjean <s.grosjean@peak-system.com>
[mkl: split into two patches]
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/peak_usb/pcan_usb_pro.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c
index 18fa180ecc81..902900d4f7c1 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c
@@ -776,9 +776,9 @@ static int pcan_usb_pro_encode_msg(struct peak_usb_device *dev,
 
 	flags = 0;
 	if (cf->can_id & CAN_EFF_FLAG)
-		flags |= 0x02;
+		flags |= PCAN_USBPRO_EXT;
 	if (cf->can_id & CAN_RTR_FLAG)
-		flags |= 0x01;
+		flags |= PCAN_USBPRO_RTR;
 
 	pcan_msg_add_rec(&usb_msg, data_type, 0, flags, len, cf->can_id,
 			 cf->data);
-- 
cgit v1.2.3


From a7e8511ffda6a81ba6b49c22d0ed296caeff438c Mon Sep 17 00:00:00 2001
From: Stephane Grosjean <s.grosjean@peak-system.com>
Date: Tue, 9 Mar 2021 09:21:26 +0100
Subject: can: peak_usb: add support of ethtool set_phys_id()

This patch makes it possible to specifically flash the LED of a CAN
port of the CAN-USB interfaces of PEAK-System.

Link: https://lore.kernel.org/r/20210309122141.3276927-1-mkl@pengutronix.de
Signed-off-by: Stephane Grosjean <s.grosjean@peak-system.com>
[mkl: use common prefix PCAN_ for defines]
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/peak_usb/pcan_usb.c      | 47 ++++++++++++++++++++++++++++
 drivers/net/can/usb/peak_usb/pcan_usb_core.c |  4 +++
 drivers/net/can/usb/peak_usb/pcan_usb_core.h |  2 ++
 drivers/net/can/usb/peak_usb/pcan_usb_fd.c   | 34 ++++++++++++++++++++
 drivers/net/can/usb/peak_usb/pcan_usb_pro.c  | 34 +++++++++++++++++++-
 drivers/net/can/usb/peak_usb/pcan_usb_pro.h  |  6 ++++
 6 files changed, 126 insertions(+), 1 deletion(-)

diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c
index e393e8457d77..ba509aed7b4c 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb.c
@@ -11,6 +11,7 @@
 #include <linux/netdevice.h>
 #include <linux/usb.h>
 #include <linux/module.h>
+#include <linux/ethtool.h>
 
 #include <linux/can.h>
 #include <linux/can/dev.h>
@@ -40,6 +41,7 @@
 #define PCAN_USB_CMD_REGISTER	9
 #define PCAN_USB_CMD_EXT_VCC	10
 #define PCAN_USB_CMD_ERR_FR	11
+#define PCAN_USB_CMD_LED	12
 
 /* PCAN_USB_CMD_SET_BUS number arg */
 #define PCAN_USB_BUS_XCVER		2
@@ -248,6 +250,15 @@ static int pcan_usb_set_ext_vcc(struct peak_usb_device *dev, u8 onoff)
 	return pcan_usb_send_cmd(dev, PCAN_USB_CMD_EXT_VCC, PCAN_USB_SET, args);
 }
 
+static int pcan_usb_set_led(struct peak_usb_device *dev, u8 onoff)
+{
+	u8 args[PCAN_USB_CMD_ARGS_LEN] = {
+		[0] = !!onoff,
+	};
+
+	return pcan_usb_send_cmd(dev, PCAN_USB_CMD_LED, PCAN_USB_SET, args);
+}
+
 /*
  * set bittiming value to can
  */
@@ -971,6 +982,40 @@ static int pcan_usb_probe(struct usb_interface *intf)
 	return 0;
 }
 
+static int pcan_usb_set_phys_id(struct net_device *netdev,
+				enum ethtool_phys_id_state state)
+{
+	struct peak_usb_device *dev = netdev_priv(netdev);
+	int err = 0;
+
+	switch (state) {
+	case ETHTOOL_ID_ACTIVE:
+		/* call ON/OFF twice a second */
+		return 2;
+
+	case ETHTOOL_ID_OFF:
+		err = pcan_usb_set_led(dev, 0);
+		break;
+
+	case ETHTOOL_ID_ON:
+		fallthrough;
+
+	case ETHTOOL_ID_INACTIVE:
+		/* restore LED default */
+		err = pcan_usb_set_led(dev, 1);
+		break;
+
+	default:
+		break;
+	}
+
+	return err;
+}
+
+static const struct ethtool_ops pcan_usb_ethtool_ops = {
+	.set_phys_id = pcan_usb_set_phys_id,
+};
+
 /*
  * describe the PCAN-USB adapter
  */
@@ -1001,6 +1046,8 @@ const struct peak_usb_adapter pcan_usb = {
 	/* size of device private data */
 	.sizeof_dev_private = sizeof(struct pcan_usb),
 
+	.ethtool_ops = &pcan_usb_ethtool_ops,
+
 	/* timestamps usage */
 	.ts_used_bits = 16,
 	.ts_period = 24575, /* calibration period in ts. */
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
index 29227b5851fe..ad006edf474d 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/usb.h>
+#include <linux/ethtool.h>
 
 #include <linux/can.h>
 #include <linux/can/dev.h>
@@ -820,6 +821,9 @@ static int peak_usb_create_dev(const struct peak_usb_adapter *peak_usb_adapter,
 
 	netdev->flags |= IFF_ECHO; /* we support local echo */
 
+	/* add ethtool support */
+	netdev->ethtool_ops = peak_usb_adapter->ethtool_ops;
+
 	init_usb_anchor(&dev->rx_submitted);
 
 	init_usb_anchor(&dev->tx_submitted);
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.h b/drivers/net/can/usb/peak_usb/pcan_usb_core.h
index 4b1528a42a7b..e15b4c78f309 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.h
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.h
@@ -46,6 +46,8 @@ struct peak_usb_adapter {
 	const struct can_bittiming_const * const data_bittiming_const;
 	unsigned int ctrl_count;
 
+	const struct ethtool_ops *ethtool_ops;
+
 	int (*intf_probe)(struct usb_interface *intf);
 
 	int (*dev_init)(struct peak_usb_device *dev);
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
index bae078579c0d..b3e56ee03cd5 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
@@ -7,6 +7,7 @@
 #include <linux/netdevice.h>
 #include <linux/usb.h>
 #include <linux/module.h>
+#include <linux/ethtool.h>
 
 #include <linux/can.h>
 #include <linux/can/dev.h>
@@ -1006,6 +1007,31 @@ static void pcan_usb_fd_free(struct peak_usb_device *dev)
 	}
 }
 
+/* blink LED's */
+static int pcan_usb_fd_set_phys_id(struct net_device *netdev,
+				   enum ethtool_phys_id_state state)
+{
+	struct peak_usb_device *dev = netdev_priv(netdev);
+	int err = 0;
+
+	switch (state) {
+	case ETHTOOL_ID_ACTIVE:
+		err = pcan_usb_fd_set_can_led(dev, PCAN_UFD_LED_FAST);
+		break;
+	case ETHTOOL_ID_INACTIVE:
+		err = pcan_usb_fd_set_can_led(dev, PCAN_UFD_LED_DEF);
+		break;
+	default:
+		break;
+	}
+
+	return err;
+}
+
+static const struct ethtool_ops pcan_usb_fd_ethtool_ops = {
+	.set_phys_id = pcan_usb_fd_set_phys_id,
+};
+
 /* describes the PCAN-USB FD adapter */
 static const struct can_bittiming_const pcan_usb_fd_const = {
 	.name = "pcan_usb_fd",
@@ -1047,6 +1073,8 @@ const struct peak_usb_adapter pcan_usb_fd = {
 	/* size of device private data */
 	.sizeof_dev_private = sizeof(struct pcan_usb_fd_device),
 
+	.ethtool_ops = &pcan_usb_fd_ethtool_ops,
+
 	/* timestamps usage */
 	.ts_used_bits = 32,
 	.ts_period = 1000000, /* calibration period in ts. */
@@ -1120,6 +1148,8 @@ const struct peak_usb_adapter pcan_usb_chip = {
 	/* size of device private data */
 	.sizeof_dev_private = sizeof(struct pcan_usb_fd_device),
 
+	.ethtool_ops = &pcan_usb_fd_ethtool_ops,
+
 	/* timestamps usage */
 	.ts_used_bits = 32,
 	.ts_period = 1000000, /* calibration period in ts. */
@@ -1193,6 +1223,8 @@ const struct peak_usb_adapter pcan_usb_pro_fd = {
 	/* size of device private data */
 	.sizeof_dev_private = sizeof(struct pcan_usb_fd_device),
 
+	.ethtool_ops = &pcan_usb_fd_ethtool_ops,
+
 	/* timestamps usage */
 	.ts_used_bits = 32,
 	.ts_period = 1000000, /* calibration period in ts. */
@@ -1266,6 +1298,8 @@ const struct peak_usb_adapter pcan_usb_x6 = {
 	/* size of device private data */
 	.sizeof_dev_private = sizeof(struct pcan_usb_fd_device),
 
+	.ethtool_ops = &pcan_usb_fd_ethtool_ops,
+
 	/* timestamps usage */
 	.ts_used_bits = 32,
 	.ts_period = 1000000, /* calibration period in ts. */
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c
index 902900d4f7c1..b314d2eaaece 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c
@@ -9,6 +9,7 @@
 #include <linux/netdevice.h>
 #include <linux/usb.h>
 #include <linux/module.h>
+#include <linux/ethtool.h>
 
 #include <linux/can.h>
 #include <linux/can/dev.h>
@@ -906,7 +907,7 @@ static int pcan_usb_pro_init(struct peak_usb_device *dev)
 	usb_if->dev[dev->ctrl_idx] = dev;
 
 	/* set LED in default state (end of init phase) */
-	pcan_usb_pro_set_led(dev, 0, 1);
+	pcan_usb_pro_set_led(dev, PCAN_USBPRO_LED_DEVICE, 1);
 
 	kfree(bi);
 	kfree(fi);
@@ -990,6 +991,35 @@ int pcan_usb_pro_probe(struct usb_interface *intf)
 	return 0;
 }
 
+static int pcan_usb_pro_set_phys_id(struct net_device *netdev,
+				    enum ethtool_phys_id_state state)
+{
+	struct peak_usb_device *dev = netdev_priv(netdev);
+	int err = 0;
+
+	switch (state) {
+	case ETHTOOL_ID_ACTIVE:
+		/* fast blinking forever */
+		err = pcan_usb_pro_set_led(dev, PCAN_USBPRO_LED_BLINK_FAST,
+					   0xffffffff);
+		break;
+
+	case ETHTOOL_ID_INACTIVE:
+		/* restore LED default */
+		err = pcan_usb_pro_set_led(dev, PCAN_USBPRO_LED_DEVICE, 1);
+		break;
+
+	default:
+		break;
+	}
+
+	return err;
+}
+
+static const struct ethtool_ops pcan_usb_pro_ethtool_ops = {
+	.set_phys_id = pcan_usb_pro_set_phys_id,
+};
+
 /*
  * describe the PCAN-USB Pro adapter
  */
@@ -1018,6 +1048,8 @@ const struct peak_usb_adapter pcan_usb_pro = {
 	/* size of device private data */
 	.sizeof_dev_private = sizeof(struct pcan_usb_pro_device),
 
+	.ethtool_ops = &pcan_usb_pro_ethtool_ops,
+
 	/* timestamps usage */
 	.ts_used_bits = 32,
 	.ts_period = 1000000, /* calibration period in ts. */
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_pro.h b/drivers/net/can/usb/peak_usb/pcan_usb_pro.h
index 6bb12357d078..6f4504300e23 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_pro.h
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_pro.h
@@ -115,6 +115,12 @@ struct __packed pcan_usb_pro_devid {
 	__le32 serial_num;
 };
 
+#define PCAN_USBPRO_LED_DEVICE		0x00
+#define PCAN_USBPRO_LED_BLINK_FAST	0x01
+#define PCAN_USBPRO_LED_BLINK_SLOW	0x02
+#define PCAN_USBPRO_LED_ON		0x03
+#define PCAN_USBPRO_LED_OFF		0x04
+
 struct __packed pcan_usb_pro_setled {
 	u8  data_type;
 	u8  channel;
-- 
cgit v1.2.3


From 58b29aa9d47128ec6ee8fd731b0f137a82f0b9ea Mon Sep 17 00:00:00 2001
From: Stephane Grosjean <s.grosjean@peak-system.com>
Date: Tue, 9 Mar 2021 09:21:28 +0100
Subject: can: peak_usb: add support of ONE_SHOT mode

This patch adds "ONE-SHOT" mode support to the following CAN-USB
PEAK-System GmbH interfaces:
- PCAN-USB X6
- PCAN-USB FD
- PCAN-USB Pro FD
- PCAN-Chip USB
- PCAN-USB Pro

Signed-off-by: Stephane Grosjean <s.grosjean@peak-system.com>
[mkl: split into two patches]
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/peak_usb/pcan_usb_fd.c  | 12 ++++++++----
 drivers/net/can/usb/peak_usb/pcan_usb_pro.c |  8 +++++++-
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
index b3e56ee03cd5..6f62b6f51051 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
@@ -774,6 +774,10 @@ static int pcan_usb_fd_encode_msg(struct peak_usb_device *dev,
 			tx_msg_flags |= PUCAN_MSG_RTR;
 	}
 
+	/* Single-Shot frame */
+	if (dev->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT)
+		tx_msg_flags |= PUCAN_MSG_SINGLE_SHOT;
+
 	tx_msg->flags = cpu_to_le16(tx_msg_flags);
 	tx_msg->channel_dlc = PUCAN_MSG_CHANNEL_DLC(dev->ctrl_idx, dlc);
 	memcpy(tx_msg->d, cfd->data, cfd->len);
@@ -1063,7 +1067,7 @@ const struct peak_usb_adapter pcan_usb_fd = {
 	.ctrl_count = PCAN_USBFD_CHANNEL_COUNT,
 	.ctrlmode_supported = CAN_CTRLMODE_FD |
 			CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY |
-			CAN_CTRLMODE_CC_LEN8_DLC,
+			CAN_CTRLMODE_ONE_SHOT | CAN_CTRLMODE_CC_LEN8_DLC,
 	.clock = {
 		.freq = PCAN_UFD_CRYSTAL_HZ,
 	},
@@ -1138,7 +1142,7 @@ const struct peak_usb_adapter pcan_usb_chip = {
 	.ctrl_count = PCAN_USBFD_CHANNEL_COUNT,
 	.ctrlmode_supported = CAN_CTRLMODE_FD |
 		CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY |
-		CAN_CTRLMODE_CC_LEN8_DLC,
+		CAN_CTRLMODE_ONE_SHOT | CAN_CTRLMODE_CC_LEN8_DLC,
 	.clock = {
 		.freq = PCAN_UFD_CRYSTAL_HZ,
 	},
@@ -1213,7 +1217,7 @@ const struct peak_usb_adapter pcan_usb_pro_fd = {
 	.ctrl_count = PCAN_USBPROFD_CHANNEL_COUNT,
 	.ctrlmode_supported = CAN_CTRLMODE_FD |
 			CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY |
-			CAN_CTRLMODE_CC_LEN8_DLC,
+			CAN_CTRLMODE_ONE_SHOT | CAN_CTRLMODE_CC_LEN8_DLC,
 	.clock = {
 		.freq = PCAN_UFD_CRYSTAL_HZ,
 	},
@@ -1288,7 +1292,7 @@ const struct peak_usb_adapter pcan_usb_x6 = {
 	.ctrl_count = PCAN_USBPROFD_CHANNEL_COUNT,
 	.ctrlmode_supported = CAN_CTRLMODE_FD |
 			CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY |
-			CAN_CTRLMODE_CC_LEN8_DLC,
+			CAN_CTRLMODE_ONE_SHOT | CAN_CTRLMODE_CC_LEN8_DLC,
 	.clock = {
 		.freq = PCAN_UFD_CRYSTAL_HZ,
 	},
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c
index b314d2eaaece..2d1b645af76c 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c
@@ -37,6 +37,7 @@
 
 #define PCAN_USBPRO_RTR			0x01
 #define PCAN_USBPRO_EXT			0x02
+#define PCAN_USBPRO_SS			0x08
 
 #define PCAN_USBPRO_CMD_BUFFER_SIZE	512
 
@@ -781,6 +782,10 @@ static int pcan_usb_pro_encode_msg(struct peak_usb_device *dev,
 	if (cf->can_id & CAN_RTR_FLAG)
 		flags |= PCAN_USBPRO_RTR;
 
+	/* Single-Shot frame */
+	if (dev->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT)
+		flags |= PCAN_USBPRO_SS;
+
 	pcan_msg_add_rec(&usb_msg, data_type, 0, flags, len, cf->can_id,
 			 cf->data);
 
@@ -1039,7 +1044,8 @@ const struct peak_usb_adapter pcan_usb_pro = {
 	.name = "PCAN-USB Pro",
 	.device_id = PCAN_USBPRO_PRODUCT_ID,
 	.ctrl_count = PCAN_USBPRO_CHANNEL_COUNT,
-	.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY,
+	.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY |
+			      CAN_CTRLMODE_ONE_SHOT,
 	.clock = {
 		.freq = PCAN_USBPRO_CRYSTAL_HZ,
 	},
-- 
cgit v1.2.3


From 8fa12201b6521a8752a2474229a81b0aa09c2b93 Mon Sep 17 00:00:00 2001
From: Xulin Sun <xulin.sun@windriver.com>
Date: Fri, 5 Feb 2021 15:25:59 +0800
Subject: can: m_can: m_can_class_allocate_dev(): remove impossible error
 return judgment

If the CAN net device has been successfully allocated, its private
data structure is impossible to be empty, remove this redundant error
return judgment.

Link: https://lore.kernel.org/r/20210205072559.13241-2-xulin.sun@windriver.com
Signed-off-by: Xulin Sun <xulin.sun@windriver.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/m_can/m_can.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index 2ae3da16cbfe..12a75ebe9ce1 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -1787,11 +1787,6 @@ struct m_can_classdev *m_can_class_allocate_dev(struct device *dev,
 	}
 
 	class_dev = netdev_priv(net_dev);
-	if (!class_dev) {
-		dev_err(dev, "Failed to init netdev cdevate");
-		goto out;
-	}
-
 	class_dev->net = net_dev;
 	class_dev->dev = dev;
 	SET_NETDEV_DEV(net_dev, dev);
-- 
cgit v1.2.3


From 17447f08202d7599a61bc218343b8e7da0a23fa6 Mon Sep 17 00:00:00 2001
From: Torin Cooper-Bennun <torin@maxiluxsystems.com>
Date: Mon, 8 Mar 2021 10:24:26 +0000
Subject: can: m_can: add infrastructure for internal timestamps

Add infrastucture to allow internal timestamps from the M_CAN to be
configured and retrieved.

Link: https://lore.kernel.org/r/20210308102427.63916-2-torin@maxiluxsystems.com
Signed-off-by: Torin Cooper-Bennun <torin@maxiluxsystems.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/m_can/m_can.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index 12a75ebe9ce1..9f7cfe91f7ff 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -8,6 +8,7 @@
  * https://github.com/linux-can/can-doc/tree/master/m_can
  */
 
+#include <linux/bitfield.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
@@ -148,6 +149,16 @@ enum m_can_reg {
 #define NBTP_NTSEG2_SHIFT	0
 #define NBTP_NTSEG2_MASK	(0x7f << NBTP_NTSEG2_SHIFT)
 
+/* Timestamp Counter Configuration Register (TSCC) */
+#define TSCC_TCP_MASK		GENMASK(19, 16)
+#define TSCC_TSS_MASK		GENMASK(1, 0)
+#define TSCC_TSS_DISABLE	0x0
+#define TSCC_TSS_INTERNAL	0x1
+#define TSCC_TSS_EXTERNAL	0x2
+
+/* Timestamp Counter Value Register (TSCV) */
+#define TSCV_TSC_MASK		GENMASK(15, 0)
+
 /* Error Counter Register(ECR) */
 #define ECR_RP			BIT(15)
 #define ECR_REC_SHIFT		8
@@ -302,6 +313,7 @@ enum m_can_reg {
 #define RX_BUF_ANMF		BIT(31)
 #define RX_BUF_FDF		BIT(21)
 #define RX_BUF_BRS		BIT(20)
+#define RX_BUF_RXTS_MASK	GENMASK(15, 0)
 
 /* Tx Buffer Element */
 /* T0 */
@@ -319,6 +331,7 @@ enum m_can_reg {
 /* E1 */
 #define TX_EVENT_MM_SHIFT	TX_BUF_MM_SHIFT
 #define TX_EVENT_MM_MASK	(0xff << TX_EVENT_MM_SHIFT)
+#define TX_EVENT_TXTS_MASK	GENMASK(15, 0)
 
 static inline u32 m_can_read(struct m_can_classdev *cdev, enum m_can_reg reg)
 {
@@ -413,6 +426,20 @@ static inline void m_can_disable_all_interrupts(struct m_can_classdev *cdev)
 	m_can_write(cdev, M_CAN_ILE, 0x0);
 }
 
+/* Retrieve internal timestamp counter from TSCV.TSC, and shift it to 32-bit
+ * width.
+ */
+static u32 m_can_get_timestamp(struct m_can_classdev *cdev)
+{
+	u32 tscv;
+	u32 tsc;
+
+	tscv = m_can_read(cdev, M_CAN_TSCV);
+	tsc = FIELD_GET(TSCV_TSC_MASK, tscv);
+
+	return (tsc << 16);
+}
+
 static void m_can_clean(struct net_device *net)
 {
 	struct m_can_classdev *cdev = netdev_priv(net);
-- 
cgit v1.2.3


From df06fd678260bca919ea894281ec54ce10e45ce6 Mon Sep 17 00:00:00 2001
From: Torin Cooper-Bennun <torin@maxiluxsystems.com>
Date: Mon, 8 Mar 2021 10:24:27 +0000
Subject: can: m_can: m_can_chip_config(): enable and configure internal
 timestamps

This is a prerequisite for transitioning the m_can driver to rx-offload,
which works best with TX and RX timestamps.

The timestamps provided by M_CAN are 16-bit, timed according to the
nominal bit timing, and may be prescaled by a multiplier up to 16. We
choose the highest prescalar so that the timestamp wraps every 2^20 bit
times, or 209 ms at a bus speed of 5 Mbit/s. Timestamps will have a
precision of 16 bit times.

Link: https://lore.kernel.org/r/20210308102427.63916-3-torin@maxiluxsystems.com
Signed-off-by: Torin Cooper-Bennun <torin@maxiluxsystems.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/m_can/m_can.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index 9f7cfe91f7ff..7df81e38b043 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -1135,6 +1135,7 @@ static int m_can_set_bittiming(struct net_device *dev)
  *		- >= v3.1.x: TX FIFO is used
  * - configure mode
  * - setup bittiming
+ * - configure timestamp generation
  */
 static void m_can_chip_config(struct net_device *dev)
 {
@@ -1246,6 +1247,10 @@ static void m_can_chip_config(struct net_device *dev)
 	/* set bittiming params */
 	m_can_set_bittiming(dev);
 
+	/* enable internal timestamp generation, with a prescalar of 16. The
+	 * prescalar is applied to the nominal bit timing */
+	m_can_write(cdev, M_CAN_TSCC, FIELD_PREP(TSCC_TCP_MASK, 0xf));
+
 	m_can_config_endisable(cdev, false);
 
 	if (cdev->ops->init)
-- 
cgit v1.2.3


From 1be37d3b0414e3db47f6fcba6c16286bbae0cb65 Mon Sep 17 00:00:00 2001
From: Torin Cooper-Bennun <torin@maxiluxsystems.com>
Date: Mon, 8 Mar 2021 10:24:28 +0000
Subject: can: m_can: fix periph RX path: use rx-offload to ensure skbs are
 sent from softirq context

For peripheral devices, m_can sent skbs directly from a threaded irq
instead of from a softirq context, breaking the tcan4x5x peripheral
driver completely. This patch transitions the driver to use the
rx-offload helper for peripherals, ensuring the skbs are sent from the
correct context, with h/w timestamping to ensure correct ordering.

Link: https://lore.kernel.org/r/20210308102427.63916-4-torin@maxiluxsystems.com
Signed-off-by: Torin Cooper-Bennun <torin@maxiluxsystems.com>
[mkl: m_can_class_register(): update error handling]
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/m_can/m_can.c | 121 ++++++++++++++++++++++++++++++++++++------
 drivers/net/can/m_can/m_can.h |   2 +
 2 files changed, 107 insertions(+), 16 deletions(-)

diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index 7df81e38b043..890ed826a355 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -457,6 +457,21 @@ static void m_can_clean(struct net_device *net)
 	}
 }
 
+/* For peripherals, pass skb to rx-offload, which will push skb from
+ * napi. For non-peripherals, RX is done in napi already, so push
+ * directly. timestamp is used to ensure good skb ordering in
+ * rx-offload and is ignored for non-peripherals.
+*/
+static void m_can_receive_skb(struct m_can_classdev *cdev,
+			      struct sk_buff *skb,
+			      u32 timestamp)
+{
+	if (cdev->is_peripheral)
+		can_rx_offload_queue_sorted(&cdev->offload, skb, timestamp);
+	else
+		netif_receive_skb(skb);
+}
+
 static void m_can_read_fifo(struct net_device *dev, u32 rxfs)
 {
 	struct net_device_stats *stats = &dev->stats;
@@ -464,6 +479,7 @@ static void m_can_read_fifo(struct net_device *dev, u32 rxfs)
 	struct canfd_frame *cf;
 	struct sk_buff *skb;
 	u32 id, fgi, dlc;
+	u32 timestamp = 0;
 	int i;
 
 	/* calculate the fifo get index for where to read data */
@@ -512,7 +528,9 @@ static void m_can_read_fifo(struct net_device *dev, u32 rxfs)
 	stats->rx_packets++;
 	stats->rx_bytes += cf->len;
 
-	netif_receive_skb(skb);
+	timestamp = FIELD_GET(RX_BUF_RXTS_MASK, dlc);
+
+	m_can_receive_skb(cdev, skb, timestamp);
 }
 
 static int m_can_do_rx_poll(struct net_device *dev, int quota)
@@ -543,9 +561,11 @@ static int m_can_do_rx_poll(struct net_device *dev, int quota)
 
 static int m_can_handle_lost_msg(struct net_device *dev)
 {
+	struct m_can_classdev *cdev = netdev_priv(dev);
 	struct net_device_stats *stats = &dev->stats;
 	struct sk_buff *skb;
 	struct can_frame *frame;
+	u32 timestamp = 0;
 
 	netdev_err(dev, "msg lost in rxf0\n");
 
@@ -559,7 +579,10 @@ static int m_can_handle_lost_msg(struct net_device *dev)
 	frame->can_id |= CAN_ERR_CRTL;
 	frame->data[1] = CAN_ERR_CRTL_RX_OVERFLOW;
 
-	netif_receive_skb(skb);
+	if (cdev->is_peripheral)
+		timestamp = m_can_get_timestamp(cdev);
+
+	m_can_receive_skb(cdev, skb, timestamp);
 
 	return 1;
 }
@@ -571,6 +594,7 @@ static int m_can_handle_lec_err(struct net_device *dev,
 	struct net_device_stats *stats = &dev->stats;
 	struct can_frame *cf;
 	struct sk_buff *skb;
+	u32 timestamp = 0;
 
 	cdev->can.can_stats.bus_error++;
 	stats->rx_errors++;
@@ -616,7 +640,11 @@ static int m_can_handle_lec_err(struct net_device *dev,
 
 	stats->rx_packets++;
 	stats->rx_bytes += cf->len;
-	netif_receive_skb(skb);
+
+	if (cdev->is_peripheral)
+		timestamp = m_can_get_timestamp(cdev);
+
+	m_can_receive_skb(cdev, skb, timestamp);
 
 	return 1;
 }
@@ -674,6 +702,7 @@ static int m_can_handle_state_change(struct net_device *dev,
 	struct sk_buff *skb;
 	struct can_berr_counter bec;
 	unsigned int ecr;
+	u32 timestamp = 0;
 
 	switch (new_state) {
 	case CAN_STATE_ERROR_WARNING:
@@ -735,7 +764,11 @@ static int m_can_handle_state_change(struct net_device *dev,
 
 	stats->rx_packets++;
 	stats->rx_bytes += cf->len;
-	netif_receive_skb(skb);
+
+	if (cdev->is_peripheral)
+		timestamp = m_can_get_timestamp(cdev);
+
+	m_can_receive_skb(cdev, skb, timestamp);
 
 	return 1;
 }
@@ -800,6 +833,7 @@ static int m_can_handle_protocol_error(struct net_device *dev, u32 irqstatus)
 	struct m_can_classdev *cdev = netdev_priv(dev);
 	struct can_frame *cf;
 	struct sk_buff *skb;
+	u32 timestamp = 0;
 
 	/* propagate the error condition to the CAN stack */
 	skb = alloc_can_err_skb(dev, &cf);
@@ -821,7 +855,11 @@ static int m_can_handle_protocol_error(struct net_device *dev, u32 irqstatus)
 		netdev_dbg(dev, "allocation of skb failed\n");
 		return 0;
 	}
-	netif_receive_skb(skb);
+
+	if (cdev->is_peripheral)
+		timestamp = m_can_get_timestamp(cdev);
+
+	m_can_receive_skb(cdev, skb, timestamp);
 
 	return 1;
 }
@@ -922,6 +960,29 @@ static int m_can_poll(struct napi_struct *napi, int quota)
 	return work_done;
 }
 
+/* Echo tx skb and update net stats. Peripherals use rx-offload for
+ * echo. timestamp is used for peripherals to ensure correct ordering
+ * by rx-offload, and is ignored for non-peripherals.
+*/
+static void m_can_tx_update_stats(struct m_can_classdev *cdev,
+				  unsigned int msg_mark,
+				  u32 timestamp)
+{
+	struct net_device *dev = cdev->net;
+	struct net_device_stats *stats = &dev->stats;
+
+	if (cdev->is_peripheral)
+		stats->tx_bytes +=
+			can_rx_offload_get_echo_skb(&cdev->offload,
+						    msg_mark,
+						    timestamp,
+						    NULL);
+	else
+		stats->tx_bytes += can_get_echo_skb(dev, msg_mark, NULL);
+
+	stats->tx_packets++;
+}
+
 static void m_can_echo_tx_event(struct net_device *dev)
 {
 	u32 txe_count = 0;
@@ -931,7 +992,6 @@ static void m_can_echo_tx_event(struct net_device *dev)
 	unsigned int msg_mark;
 
 	struct m_can_classdev *cdev = netdev_priv(dev);
-	struct net_device_stats *stats = &dev->stats;
 
 	/* read tx event fifo status */
 	m_can_txefs = m_can_read(cdev, M_CAN_TXEFS);
@@ -941,21 +1001,23 @@ static void m_can_echo_tx_event(struct net_device *dev)
 
 	/* Get and process all sent elements */
 	for (i = 0; i < txe_count; i++) {
+		u32 txe, timestamp = 0;
+
 		/* retrieve get index */
 		fgi = (m_can_read(cdev, M_CAN_TXEFS) & TXEFS_EFGI_MASK) >>
 			TXEFS_EFGI_SHIFT;
 
-		/* get message marker */
-		msg_mark = (m_can_txe_fifo_read(cdev, fgi, 4) &
-			    TX_EVENT_MM_MASK) >> TX_EVENT_MM_SHIFT;
+		/* get message marker, timestamp */
+		txe = m_can_txe_fifo_read(cdev, fgi, 4);
+		msg_mark = (txe & TX_EVENT_MM_MASK) >> TX_EVENT_MM_SHIFT;
+		timestamp = FIELD_GET(TX_EVENT_TXTS_MASK, txe);
 
 		/* ack txe element */
 		m_can_write(cdev, M_CAN_TXEFA, (TXEFA_EFAI_MASK &
 						(fgi << TXEFA_EFAI_SHIFT)));
 
 		/* update stats */
-		stats->tx_bytes += can_get_echo_skb(dev, msg_mark, NULL);
-		stats->tx_packets++;
+		m_can_tx_update_stats(cdev, msg_mark, timestamp);
 	}
 }
 
@@ -963,7 +1025,6 @@ static irqreturn_t m_can_isr(int irq, void *dev_id)
 {
 	struct net_device *dev = (struct net_device *)dev_id;
 	struct m_can_classdev *cdev = netdev_priv(dev);
-	struct net_device_stats *stats = &dev->stats;
 	u32 ir;
 
 	if (pm_runtime_suspended(cdev->dev))
@@ -996,8 +1057,12 @@ static irqreturn_t m_can_isr(int irq, void *dev_id)
 	if (cdev->version == 30) {
 		if (ir & IR_TC) {
 			/* Transmission Complete Interrupt*/
-			stats->tx_bytes += can_get_echo_skb(dev, 0, NULL);
-			stats->tx_packets++;
+			u32 timestamp = 0;
+
+			if (cdev->is_peripheral)
+				timestamp = m_can_get_timestamp(cdev);
+			m_can_tx_update_stats(cdev, 0, timestamp);
+
 			can_led_event(dev, CAN_LED_EVENT_TX);
 			netif_wake_queue(dev);
 		}
@@ -1458,6 +1523,9 @@ static int m_can_close(struct net_device *dev)
 		cdev->tx_wq = NULL;
 	}
 
+	if (cdev->is_peripheral)
+		can_rx_offload_disable(&cdev->offload);
+
 	close_candev(dev);
 	can_led_event(dev, CAN_LED_EVENT_STOP);
 
@@ -1656,6 +1724,9 @@ static int m_can_open(struct net_device *dev)
 		goto exit_disable_clks;
 	}
 
+	if (cdev->is_peripheral)
+		can_rx_offload_enable(&cdev->offload);
+
 	/* register interrupt handler */
 	if (cdev->is_peripheral) {
 		cdev->tx_skb = NULL;
@@ -1697,6 +1768,8 @@ exit_irq_fail:
 	if (cdev->is_peripheral)
 		destroy_workqueue(cdev->tx_wq);
 out_wq_fail:
+	if (cdev->is_peripheral)
+		can_rx_offload_disable(&cdev->offload);
 	close_candev(dev);
 exit_disable_clks:
 	m_can_clk_stop(cdev);
@@ -1845,15 +1918,22 @@ int m_can_class_register(struct m_can_classdev *cdev)
 			return ret;
 	}
 
+	if (cdev->is_peripheral) {
+		ret = can_rx_offload_add_manual(cdev->net, &cdev->offload,
+						M_CAN_NAPI_WEIGHT);
+		if (ret)
+			goto clk_disable;
+	}
+
 	ret = m_can_dev_setup(cdev);
 	if (ret)
-		goto clk_disable;
+		goto rx_offload_del;
 
 	ret = register_m_can_dev(cdev->net);
 	if (ret) {
 		dev_err(cdev->dev, "registering %s failed (err=%d)\n",
 			cdev->net->name, ret);
-		goto clk_disable;
+		goto rx_offload_del;
 	}
 
 	devm_can_led_init(cdev->net);
@@ -1866,6 +1946,13 @@ int m_can_class_register(struct m_can_classdev *cdev)
 	/* Probe finished
 	 * Stop clocks. They will be reactivated once the M_CAN device is opened
 	 */
+	m_can_clk_stop(cdev);
+
+	return 0;
+
+rx_offload_del:
+	if (cdev->is_peripheral)
+		can_rx_offload_del(&cdev->offload);
 clk_disable:
 	m_can_clk_stop(cdev);
 
@@ -1875,6 +1962,8 @@ EXPORT_SYMBOL_GPL(m_can_class_register);
 
 void m_can_class_unregister(struct m_can_classdev *cdev)
 {
+	if (cdev->is_peripheral)
+		can_rx_offload_del(&cdev->offload);
 	unregister_candev(cdev->net);
 }
 EXPORT_SYMBOL_GPL(m_can_class_unregister);
diff --git a/drivers/net/can/m_can/m_can.h b/drivers/net/can/m_can/m_can.h
index 3fda84cef351..ace071c3e58c 100644
--- a/drivers/net/can/m_can/m_can.h
+++ b/drivers/net/can/m_can/m_can.h
@@ -8,6 +8,7 @@
 
 #include <linux/can/core.h>
 #include <linux/can/led.h>
+#include <linux/can/rx-offload.h>
 #include <linux/completion.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
@@ -71,6 +72,7 @@ struct m_can_ops {
 
 struct m_can_classdev {
 	struct can_priv can;
+	struct can_rx_offload offload;
 	struct napi_struct napi;
 	struct net_device *net;
 	struct device *dev;
-- 
cgit v1.2.3


From 6c23fe67e8dc825ef2fd34d1dac12fc970140a8b Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Tue, 23 Mar 2021 10:10:25 +0800
Subject: can: tcan4x5x: remove duplicate include of regmap.h

linux/regmap.h has been included at line 13, so remove the duplicate
one at line 14.

Fixes: 67def4ef8bb9 ("can: tcan4x5x: move regmap code into seperate file")
Link: https://lore.kernel.org/r/20210323021026.140460-1-wanjiabing@vivo.com
Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/m_can/tcan4x5x.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/can/m_can/tcan4x5x.h b/drivers/net/can/m_can/tcan4x5x.h
index c66da829b795..e62c030d3e1e 100644
--- a/drivers/net/can/m_can/tcan4x5x.h
+++ b/drivers/net/can/m_can/tcan4x5x.h
@@ -11,7 +11,6 @@
 
 #include <linux/gpio/consumer.h>
 #include <linux/regmap.h>
-#include <linux/regmap.h>
 #include <linux/regulator/consumer.h>
 #include <linux/spi/spi.h>
 
-- 
cgit v1.2.3


From e0ab3dd5f98fcca95a8290578833552e496fabaf Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Wed, 23 Dec 2020 16:50:25 +0100
Subject: can: mcp251xfd: add dev coredump support

For easier debugging this patch adds dev coredump support to the
driver. A dev coredump is generated in case the chip fails to start or
an error in the interrupt handler is detected.

The dev coredump consists of all chip registers and chip memory, as
well as the driver's internal state of the TEF-, RX- and TX-FIFOs, it
can be analyzed with the mcp251xfd-dump tool of the can-utils:

https://github.com/linux-can/can-utils/tree/master/mcp251xfd
Link: https://lore.kernel.org/r/20210304160328.2752293-2-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/spi/mcp251xfd/Kconfig          |   1 +
 drivers/net/can/spi/mcp251xfd/Makefile         |   2 +
 drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c |   2 +
 drivers/net/can/spi/mcp251xfd/mcp251xfd-dump.c | 285 +++++++++++++++++++++++++
 drivers/net/can/spi/mcp251xfd/mcp251xfd-dump.h |  45 ++++
 drivers/net/can/spi/mcp251xfd/mcp251xfd.h      |   8 +
 6 files changed, 343 insertions(+)
 create mode 100644 drivers/net/can/spi/mcp251xfd/mcp251xfd-dump.c
 create mode 100644 drivers/net/can/spi/mcp251xfd/mcp251xfd-dump.h

diff --git a/drivers/net/can/spi/mcp251xfd/Kconfig b/drivers/net/can/spi/mcp251xfd/Kconfig
index f5a147a92cb2..dd0fc0a54be1 100644
--- a/drivers/net/can/spi/mcp251xfd/Kconfig
+++ b/drivers/net/can/spi/mcp251xfd/Kconfig
@@ -3,6 +3,7 @@
 config CAN_MCP251XFD
 	tristate "Microchip MCP251xFD SPI CAN controllers"
 	select REGMAP
+	select WANT_DEV_COREDUMP
 	help
 	  Driver for the Microchip MCP251XFD SPI FD-CAN controller
 	  family.
diff --git a/drivers/net/can/spi/mcp251xfd/Makefile b/drivers/net/can/spi/mcp251xfd/Makefile
index cb71244cbe89..e87e668a08a0 100644
--- a/drivers/net/can/spi/mcp251xfd/Makefile
+++ b/drivers/net/can/spi/mcp251xfd/Makefile
@@ -6,3 +6,5 @@ mcp251xfd-objs :=
 mcp251xfd-objs += mcp251xfd-core.o
 mcp251xfd-objs += mcp251xfd-crc16.o
 mcp251xfd-objs += mcp251xfd-regmap.o
+
+mcp251xfd-$(CONFIG_DEV_COREDUMP) += mcp251xfd-dump.o
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index 799e9d5d3481..88c6efeebe06 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -1097,6 +1097,7 @@ static int mcp251xfd_chip_start(struct mcp251xfd_priv *priv)
 	return 0;
 
  out_chip_stop:
+	mcp251xfd_dump(priv);
 	mcp251xfd_chip_stop(priv, CAN_STATE_STOPPED);
 
 	return err;
@@ -2277,6 +2278,7 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id)
  out_fail:
 	netdev_err(priv->ndev, "IRQ handler returned %d (intf=0x%08x).\n",
 		   err, priv->regs_status.intf);
+	mcp251xfd_dump(priv);
 	mcp251xfd_chip_interrupts_disable(priv);
 
 	return handled;
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-dump.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-dump.c
new file mode 100644
index 000000000000..ffae8fdd3af0
--- /dev/null
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-dump.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// mcp251xfd - Microchip MCP251xFD Family CAN controller driver
+//
+// Copyright (c) 2020, 2021 Pengutronix,
+//               Marc Kleine-Budde <kernel@pengutronix.de>
+// Copyright (C) 2015-2018 Etnaviv Project
+//
+
+#include <linux/devcoredump.h>
+
+#include "mcp251xfd.h"
+#include "mcp251xfd-dump.h"
+
+struct mcp251xfd_dump_iter {
+	void *start;
+	struct mcp251xfd_dump_object_header *hdr;
+	void *data;
+};
+
+struct mcp251xfd_dump_reg_space {
+	u16 base;
+	u16 size;
+};
+
+struct mcp251xfd_dump_ring {
+	enum mcp251xfd_dump_object_ring_key key;
+	u32 val;
+};
+
+static const struct mcp251xfd_dump_reg_space mcp251xfd_dump_reg_space[] = {
+	{
+		.base = MCP251XFD_REG_CON,
+		.size = MCP251XFD_REG_FLTOBJ(32) - MCP251XFD_REG_CON,
+	}, {
+		.base = MCP251XFD_RAM_START,
+		.size = MCP251XFD_RAM_SIZE,
+	}, {
+		.base = MCP251XFD_REG_OSC,
+		.size = MCP251XFD_REG_DEVID - MCP251XFD_REG_OSC,
+	},
+};
+
+static void mcp251xfd_dump_header(struct mcp251xfd_dump_iter *iter,
+				  enum mcp251xfd_dump_object_type object_type,
+				  const void *data_end)
+{
+	struct mcp251xfd_dump_object_header *hdr = iter->hdr;
+	unsigned int len;
+
+	len = data_end - iter->data;
+	if (!len)
+		return;
+
+	hdr->magic = cpu_to_le32(MCP251XFD_DUMP_MAGIC);
+	hdr->type = cpu_to_le32(object_type);
+	hdr->offset = cpu_to_le32(iter->data - iter->start);
+	hdr->len = cpu_to_le32(len);
+
+	iter->hdr++;
+	iter->data += len;
+}
+
+static void mcp251xfd_dump_registers(const struct mcp251xfd_priv *priv,
+				     struct mcp251xfd_dump_iter *iter)
+{
+	const int val_bytes = regmap_get_val_bytes(priv->map_rx);
+	struct mcp251xfd_dump_object_reg *reg = iter->data;
+	unsigned int i, j;
+	int err;
+
+	for (i = 0; i < ARRAY_SIZE(mcp251xfd_dump_reg_space); i++) {
+		const struct mcp251xfd_dump_reg_space *reg_space;
+		void *buf;
+
+		reg_space = &mcp251xfd_dump_reg_space[i];
+
+		buf = kmalloc(reg_space->size, GFP_KERNEL);
+		if (!buf)
+			goto out;
+
+		err = regmap_bulk_read(priv->map_reg, reg_space->base,
+				       buf, reg_space->size / val_bytes);
+		if (err) {
+			kfree(buf);
+			continue;
+		}
+
+		for (j = 0; j < reg_space->size; j += sizeof(u32), reg++) {
+			reg->reg = cpu_to_le32(reg_space->base + j);
+			reg->val = cpu_to_le32p(buf + j);
+		}
+
+		kfree(buf);
+	}
+
+ out:
+	mcp251xfd_dump_header(iter, MCP251XFD_DUMP_OBJECT_TYPE_REG, reg);
+}
+
+static void mcp251xfd_dump_ring(struct mcp251xfd_dump_iter *iter,
+				enum mcp251xfd_dump_object_type object_type,
+				const struct mcp251xfd_dump_ring *dump_ring,
+				unsigned int len)
+{
+	struct mcp251xfd_dump_object_reg *reg = iter->data;
+	unsigned int i;
+
+	for (i = 0; i < len; i++, reg++) {
+		reg->reg = cpu_to_le32(dump_ring[i].key);
+		reg->val = cpu_to_le32(dump_ring[i].val);
+	}
+
+	mcp251xfd_dump_header(iter, object_type, reg);
+}
+
+static void mcp251xfd_dump_tef_ring(const struct mcp251xfd_priv *priv,
+				    struct mcp251xfd_dump_iter *iter)
+{
+	const struct mcp251xfd_tef_ring *tef = priv->tef;
+	const struct mcp251xfd_tx_ring *tx = priv->tx;
+	const struct mcp251xfd_dump_ring dump_ring[] = {
+		{
+			.key = MCP251XFD_DUMP_OBJECT_RING_KEY_HEAD,
+			.val = tef->head,
+		}, {
+			.key = MCP251XFD_DUMP_OBJECT_RING_KEY_TAIL,
+			.val = tef->tail,
+		}, {
+			.key = MCP251XFD_DUMP_OBJECT_RING_KEY_BASE,
+			.val = 0,
+		}, {
+			.key = MCP251XFD_DUMP_OBJECT_RING_KEY_NR,
+			.val = 0,
+		}, {
+			.key = MCP251XFD_DUMP_OBJECT_RING_KEY_FIFO_NR,
+			.val = 0,
+		}, {
+			.key = MCP251XFD_DUMP_OBJECT_RING_KEY_OBJ_NUM,
+			.val = tx->obj_num,
+		}, {
+			.key = MCP251XFD_DUMP_OBJECT_RING_KEY_OBJ_SIZE,
+			.val = sizeof(struct mcp251xfd_hw_tef_obj),
+		},
+	};
+
+	mcp251xfd_dump_ring(iter, MCP251XFD_DUMP_OBJECT_TYPE_TEF,
+			    dump_ring, ARRAY_SIZE(dump_ring));
+}
+
+static void mcp251xfd_dump_rx_ring_one(const struct mcp251xfd_priv *priv,
+				       struct mcp251xfd_dump_iter *iter,
+				       const struct mcp251xfd_rx_ring *rx)
+{
+	const struct mcp251xfd_dump_ring dump_ring[] = {
+		{
+			.key = MCP251XFD_DUMP_OBJECT_RING_KEY_HEAD,
+			.val = rx->head,
+		}, {
+			.key = MCP251XFD_DUMP_OBJECT_RING_KEY_TAIL,
+			.val = rx->tail,
+		}, {
+			.key = MCP251XFD_DUMP_OBJECT_RING_KEY_BASE,
+			.val = rx->base,
+		}, {
+			.key = MCP251XFD_DUMP_OBJECT_RING_KEY_NR,
+			.val = rx->nr,
+		}, {
+			.key = MCP251XFD_DUMP_OBJECT_RING_KEY_FIFO_NR,
+			.val = rx->fifo_nr,
+		}, {
+			.key = MCP251XFD_DUMP_OBJECT_RING_KEY_OBJ_NUM,
+			.val = rx->obj_num,
+		}, {
+			.key = MCP251XFD_DUMP_OBJECT_RING_KEY_OBJ_SIZE,
+			.val = rx->obj_size,
+		},
+	};
+
+	mcp251xfd_dump_ring(iter, MCP251XFD_DUMP_OBJECT_TYPE_RX,
+			    dump_ring, ARRAY_SIZE(dump_ring));
+}
+
+static void mcp251xfd_dump_rx_ring(const struct mcp251xfd_priv *priv,
+				   struct mcp251xfd_dump_iter *iter)
+{
+	struct mcp251xfd_rx_ring *rx_ring;
+	unsigned int i;
+
+	mcp251xfd_for_each_rx_ring(priv, rx_ring, i)
+		mcp251xfd_dump_rx_ring_one(priv, iter, rx_ring);
+}
+
+static void mcp251xfd_dump_tx_ring(const struct mcp251xfd_priv *priv,
+				   struct mcp251xfd_dump_iter *iter)
+{
+	const struct mcp251xfd_tx_ring *tx = priv->tx;
+	const struct mcp251xfd_dump_ring dump_ring[] = {
+		{
+			.key = MCP251XFD_DUMP_OBJECT_RING_KEY_HEAD,
+			.val = tx->head,
+		}, {
+			.key = MCP251XFD_DUMP_OBJECT_RING_KEY_TAIL,
+			.val = tx->tail,
+		}, {
+			.key = MCP251XFD_DUMP_OBJECT_RING_KEY_BASE,
+			.val = tx->base,
+		}, {
+			.key = MCP251XFD_DUMP_OBJECT_RING_KEY_NR,
+			.val = 0,
+		}, {
+			.key = MCP251XFD_DUMP_OBJECT_RING_KEY_FIFO_NR,
+			.val = MCP251XFD_TX_FIFO,
+		}, {
+			.key = MCP251XFD_DUMP_OBJECT_RING_KEY_OBJ_NUM,
+			.val = tx->obj_num,
+		}, {
+			.key = MCP251XFD_DUMP_OBJECT_RING_KEY_OBJ_SIZE,
+			.val = tx->obj_size,
+		},
+	};
+
+	mcp251xfd_dump_ring(iter, MCP251XFD_DUMP_OBJECT_TYPE_TX,
+			    dump_ring, ARRAY_SIZE(dump_ring));
+}
+
+static void mcp251xfd_dump_end(const struct mcp251xfd_priv *priv,
+			       struct mcp251xfd_dump_iter *iter)
+{
+	struct mcp251xfd_dump_object_header *hdr = iter->hdr;
+
+	hdr->magic = cpu_to_le32(MCP251XFD_DUMP_MAGIC);
+	hdr->type = cpu_to_le32(MCP251XFD_DUMP_OBJECT_TYPE_END);
+	hdr->offset = cpu_to_le32(0);
+	hdr->len = cpu_to_le32(0);
+
+	/* provoke NULL pointer access, if used after END object */
+	iter->hdr = NULL;
+}
+
+void mcp251xfd_dump(const struct mcp251xfd_priv *priv)
+{
+	struct mcp251xfd_dump_iter iter;
+	unsigned int rings_num, obj_num;
+	unsigned int file_size = 0;
+	unsigned int i;
+
+	/* register space + end marker */
+	obj_num = 2;
+
+	/* register space */
+	for (i = 0; i < ARRAY_SIZE(mcp251xfd_dump_reg_space); i++)
+		file_size += mcp251xfd_dump_reg_space[i].size / sizeof(u32) *
+			sizeof(struct mcp251xfd_dump_object_reg);
+
+	/* TEF ring, RX ring, TX rings */
+	rings_num = 1 + priv->rx_ring_num + 1;
+	obj_num += rings_num;
+	file_size += rings_num * __MCP251XFD_DUMP_OBJECT_RING_KEY_MAX  *
+		sizeof(struct mcp251xfd_dump_object_reg);
+
+	/* size of the headers */
+	file_size += sizeof(*iter.hdr) * obj_num;
+
+	/* allocate the file in vmalloc memory, it's likely to be big */
+	iter.start = __vmalloc(file_size, GFP_KERNEL | __GFP_NOWARN |
+			       __GFP_ZERO | __GFP_NORETRY);
+	if (!iter.start) {
+		netdev_warn(priv->ndev, "Failed to allocate devcoredump file.\n");
+		return;
+	}
+
+	/* point the data member after the headers */
+	iter.hdr = iter.start;
+	iter.data = &iter.hdr[obj_num];
+
+	mcp251xfd_dump_registers(priv, &iter);
+	mcp251xfd_dump_tef_ring(priv, &iter);
+	mcp251xfd_dump_rx_ring(priv, &iter);
+	mcp251xfd_dump_tx_ring(priv, &iter);
+	mcp251xfd_dump_end(priv, &iter);
+
+	dev_coredumpv(&priv->spi->dev, iter.start,
+		      iter.data - iter.start, GFP_KERNEL);
+}
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-dump.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd-dump.h
new file mode 100644
index 000000000000..e7560b0712eb
--- /dev/null
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-dump.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * mcp251xfd - Microchip MCP251xFD Family CAN controller driver
+ *
+ * Copyright (c) 2019, 2020, 2021 Pengutronix,
+ *               Marc Kleine-Budde <kernel@pengutronix.de>
+ */
+
+#ifndef _MCP251XFD_DUMP_H
+#define _MCP251XFD_DUMP_H
+
+#define MCP251XFD_DUMP_MAGIC 0x1825434d
+
+enum mcp251xfd_dump_object_type {
+	MCP251XFD_DUMP_OBJECT_TYPE_REG,
+	MCP251XFD_DUMP_OBJECT_TYPE_TEF,
+	MCP251XFD_DUMP_OBJECT_TYPE_RX,
+	MCP251XFD_DUMP_OBJECT_TYPE_TX,
+	MCP251XFD_DUMP_OBJECT_TYPE_END = -1,
+};
+
+enum mcp251xfd_dump_object_ring_key {
+	MCP251XFD_DUMP_OBJECT_RING_KEY_HEAD,
+	MCP251XFD_DUMP_OBJECT_RING_KEY_TAIL,
+	MCP251XFD_DUMP_OBJECT_RING_KEY_BASE,
+	MCP251XFD_DUMP_OBJECT_RING_KEY_NR,
+	MCP251XFD_DUMP_OBJECT_RING_KEY_FIFO_NR,
+	MCP251XFD_DUMP_OBJECT_RING_KEY_OBJ_NUM,
+	MCP251XFD_DUMP_OBJECT_RING_KEY_OBJ_SIZE,
+	__MCP251XFD_DUMP_OBJECT_RING_KEY_MAX,
+};
+
+struct mcp251xfd_dump_object_header {
+	__le32 magic;
+	__le32 type;
+	__le32 offset;
+	__le32 len;
+};
+
+struct mcp251xfd_dump_object_reg {
+	__le32 reg;
+	__le32 val;
+};
+
+#endif
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
index 480bd4480bdf..fe8be4a80798 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
@@ -838,4 +838,12 @@ u16 mcp251xfd_crc16_compute2(const void *cmd, size_t cmd_size,
 			     const void *data, size_t data_size);
 u16 mcp251xfd_crc16_compute(const void *data, size_t data_size);
 
+#if IS_ENABLED(CONFIG_DEV_COREDUMP)
+void mcp251xfd_dump(const struct mcp251xfd_priv *priv);
+#else
+static inline void mcp251xfd_dump(const struct mcp251xfd_priv *priv)
+{
+}
+#endif
+
 #endif
-- 
cgit v1.2.3


From eb94b74ccda607f3c0e441d793ff9f90fc3b09ea Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Sun, 14 Feb 2021 00:25:45 +0100
Subject: can: mcp251xfd: simplify UINC handling

In the patches:

| 1f652bb6bae7 can: mcp25xxfd: rx-path: reduce number of SPI core requests to set UINC bit
| 68c0c1c7f966 can: mcp251xfd: tef-path: reduce number of SPI core requests to set UINC bit

the setting of the UINC bit in the TEF and RX FIFO was batched into a
single SPI message consisting of several transfers. All transfers but
the last need to have the cs_change set to 1.

In the original patches the array of prepared transfers is send from
the beginning with the length depending on the number of read TEF/RX
objects. The cs_change of the last transfer is temporarily set to
0 during send.

This patch removes the modification of cs_change by preparing the last
transfer with cs_change to 0 and all other to 1. When sending the SPI
message the driver now starts with an offset into the array, so that
it always ends on the last entry in the array, which has the cs_change
set to 0.

Link: https://lore.kernel.org/r/20210304160328.2752293-3-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 69 ++++++++++++++------------
 1 file changed, 37 insertions(+), 32 deletions(-)

diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index 88c6efeebe06..0c7bd1aa7719 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -2,8 +2,8 @@
 //
 // mcp251xfd - Microchip MCP251xFD Family CAN controller driver
 //
-// Copyright (c) 2019, 2020 Pengutronix,
-//                          Marc Kleine-Budde <kernel@pengutronix.de>
+// Copyright (c) 2019, 2020, 2021 Pengutronix,
+//               Marc Kleine-Budde <kernel@pengutronix.de>
 //
 // Based on:
 //
@@ -330,6 +330,7 @@ static void mcp251xfd_ring_init(struct mcp251xfd_priv *priv)
 	struct mcp251xfd_tx_ring *tx_ring;
 	struct mcp251xfd_rx_ring *rx_ring, *prev_rx_ring = NULL;
 	struct mcp251xfd_tx_obj *tx_obj;
+	struct spi_transfer *xfer;
 	u32 val;
 	u16 addr;
 	u8 len;
@@ -347,8 +348,6 @@ static void mcp251xfd_ring_init(struct mcp251xfd_priv *priv)
 					      addr, val, val);
 
 	for (j = 0; j < ARRAY_SIZE(tef_ring->uinc_xfer); j++) {
-		struct spi_transfer *xfer;
-
 		xfer = &tef_ring->uinc_xfer[j];
 		xfer->tx_buf = &tef_ring->uinc_buf;
 		xfer->len = len;
@@ -357,6 +356,15 @@ static void mcp251xfd_ring_init(struct mcp251xfd_priv *priv)
 		xfer->cs_change_delay.unit = SPI_DELAY_UNIT_NSECS;
 	}
 
+	/* "cs_change == 1" on the last transfer results in an active
+	 * chip select after the complete SPI message. This causes the
+	 * controller to interpret the next register access as
+	 * data. Set "cs_change" of the last transfer to "0" to
+	 * properly deactivate the chip select at the end of the
+	 * message.
+	 */
+	xfer->cs_change = 0;
+
 	/* TX */
 	tx_ring = priv->tx;
 	tx_ring->head = 0;
@@ -397,8 +405,6 @@ static void mcp251xfd_ring_init(struct mcp251xfd_priv *priv)
 						      addr, val, val);
 
 		for (j = 0; j < ARRAY_SIZE(rx_ring->uinc_xfer); j++) {
-			struct spi_transfer *xfer;
-
 			xfer = &rx_ring->uinc_xfer[j];
 			xfer->tx_buf = &rx_ring->uinc_buf;
 			xfer->len = len;
@@ -406,6 +412,15 @@ static void mcp251xfd_ring_init(struct mcp251xfd_priv *priv)
 			xfer->cs_change_delay.value = 0;
 			xfer->cs_change_delay.unit = SPI_DELAY_UNIT_NSECS;
 		}
+
+		/* "cs_change == 1" on the last transfer results in an
+		 * active chip select after the complete SPI
+		 * message. This causes the controller to interpret
+		 * the next register access as data. Set "cs_change"
+		 * of the last transfer to "0" to properly deactivate
+		 * the chip select at the end of the message.
+		 */
+		xfer->cs_change = 0;
 	}
 }
 
@@ -1366,25 +1381,20 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
 	if (len) {
 		struct mcp251xfd_tef_ring *ring = priv->tef;
 		struct mcp251xfd_tx_ring *tx_ring = priv->tx;
-		struct spi_transfer *last_xfer;
+		int offset;
 
 		/* Increment the TEF FIFO tail pointer 'len' times in
 		 * a single SPI message.
 		 *
 		 * Note:
-		 *
-		 * "cs_change == 1" on the last transfer results in an
-		 * active chip select after the complete SPI
-		 * message. This causes the controller to interpret
-		 * the next register access as data. Temporary set
-		 * "cs_change" of the last transfer to "0" to properly
-		 * deactivate the chip select at the end of the
-		 * message.
+		 * Calculate offset, so that the SPI transfer ends on
+		 * the last message of the uinc_xfer array, which has
+		 * "cs_change == 0", to properly deactivate the chip
+		 * select.
 		 */
-		last_xfer = &ring->uinc_xfer[len - 1];
-		last_xfer->cs_change = 0;
-		err = spi_sync_transfer(priv->spi, ring->uinc_xfer, len);
-		last_xfer->cs_change = 1;
+		offset = ARRAY_SIZE(ring->uinc_xfer) - len;
+		err = spi_sync_transfer(priv->spi,
+					ring->uinc_xfer + offset, len);
 		if (err)
 			return err;
 
@@ -1536,7 +1546,7 @@ mcp251xfd_handle_rxif_ring(struct mcp251xfd_priv *priv,
 		return err;
 
 	while ((len = mcp251xfd_get_rx_linear_len(ring))) {
-		struct spi_transfer *last_xfer;
+		int offset;
 
 		rx_tail = mcp251xfd_get_rx_tail(ring);
 
@@ -1557,19 +1567,14 @@ mcp251xfd_handle_rxif_ring(struct mcp251xfd_priv *priv,
 		 * single SPI message.
 		 *
 		 * Note:
-		 *
-		 * "cs_change == 1" on the last transfer results in an
-		 * active chip select after the complete SPI
-		 * message. This causes the controller to interpret
-		 * the next register access as data. Temporary set
-		 * "cs_change" of the last transfer to "0" to properly
-		 * deactivate the chip select at the end of the
-		 * message.
+		 * Calculate offset, so that the SPI transfer ends on
+		 * the last message of the uinc_xfer array, which has
+		 * "cs_change == 0", to properly deactivate the chip
+		 * select.
 		 */
-		last_xfer = &ring->uinc_xfer[len - 1];
-		last_xfer->cs_change = 0;
-		err = spi_sync_transfer(priv->spi, ring->uinc_xfer, len);
-		last_xfer->cs_change = 1;
+		offset = ARRAY_SIZE(ring->uinc_xfer) - len;
+		err = spi_sync_transfer(priv->spi,
+					ring->uinc_xfer + offset, len);
 		if (err)
 			return err;
 
-- 
cgit v1.2.3


From ae2e9940112064ca21a807f543822a1eea2731d6 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Tue, 2 Mar 2021 16:46:52 +0100
Subject: can: mcp251xfd: move netdevice.h to mcp251xfd.h

The netdevice.h header is needed in mcp251xfd.h, so that it can be
included without further headers.

Link: https://lore.kernel.org/r/20210304160328.2752293-4-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 1 -
 drivers/net/can/spi/mcp251xfd/mcp251xfd.h      | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index 0c7bd1aa7719..e2e5b5c7f736 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -16,7 +16,6 @@
 #include <linux/clk.h>
 #include <linux/device.h>
 #include <linux/module.h>
-#include <linux/netdevice.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/pm_runtime.h>
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
index fe8be4a80798..d0a0d2f91dac 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
@@ -15,6 +15,7 @@
 #include <linux/can/rx-offload.h>
 #include <linux/gpio/consumer.h>
 #include <linux/kernel.h>
+#include <linux/netdevice.h>
 #include <linux/regmap.h>
 #include <linux/regulator/consumer.h>
 #include <linux/spi/spi.h>
-- 
cgit v1.2.3


From dc09e7e37152d1d18511cd590980d3982a3a0daa Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Wed, 3 Mar 2021 11:43:41 +0100
Subject: can: mcp251xfd: mcp251xfd_get_timestamp(): move to mcp251xfd.h

This is a preparation patch, it moves the mcp251xfd_get_timestamp()
function into the mcp251xfd.h file.

Link: https://lore.kernel.org/r/20210304160328.2752293-5-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 6 ------
 drivers/net/can/spi/mcp251xfd/mcp251xfd.h      | 6 ++++++
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index e2e5b5c7f736..41e322954d9a 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -1597,12 +1597,6 @@ static int mcp251xfd_handle_rxif(struct mcp251xfd_priv *priv)
 	return 0;
 }
 
-static inline int mcp251xfd_get_timestamp(const struct mcp251xfd_priv *priv,
-					  u32 *timestamp)
-{
-	return regmap_read(priv->map_reg, MCP251XFD_REG_TBC, timestamp);
-}
-
 static struct sk_buff *
 mcp251xfd_alloc_can_err_skb(const struct mcp251xfd_priv *priv,
 			    struct can_frame **cf, u32 *timestamp)
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
index d0a0d2f91dac..074c5adf9b94 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
@@ -728,6 +728,12 @@ mcp251xfd_spi_cmd_write(const struct mcp251xfd_priv *priv,
 	return data;
 }
 
+static inline int mcp251xfd_get_timestamp(const struct mcp251xfd_priv *priv,
+					  u32 *timestamp)
+{
+	return regmap_read(priv->map_reg, MCP251XFD_REG_TBC, timestamp);
+}
+
 static inline u16 mcp251xfd_get_tef_obj_addr(u8 n)
 {
 	return MCP251XFD_RAM_START +
-- 
cgit v1.2.3


From efd8d98dfb900f96370cc7722ccb7959e58557c7 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Tue, 2 Mar 2021 15:58:18 +0100
Subject: can: mcp251xfd: add HW timestamp infrastructure

This patch add the HW timestamping infrastructure. The mcp251xfd has a
free running timer of 32 bit width, running at max 40MHz, which wraps
around every 107 seconds. The current timestamp is latched into RX and
TEF objects automatically be the CAN controller.

This patch sets up a cyclecounter, timecounter and delayed worker
infrastructure (which runs every 45 seconds) to convert the timer into
a proper 64 bit based ns timestamp.

Link: https://lore.kernel.org/r/20210304160328.2752293-6-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/spi/mcp251xfd/Makefile             |  1 +
 drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c     |  3 +
 .../net/can/spi/mcp251xfd/mcp251xfd-timestamp.c    | 71 ++++++++++++++++++++++
 drivers/net/can/spi/mcp251xfd/mcp251xfd.h          | 13 ++++
 4 files changed, 88 insertions(+)
 create mode 100644 drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c

diff --git a/drivers/net/can/spi/mcp251xfd/Makefile b/drivers/net/can/spi/mcp251xfd/Makefile
index e87e668a08a0..3cba3b9447ea 100644
--- a/drivers/net/can/spi/mcp251xfd/Makefile
+++ b/drivers/net/can/spi/mcp251xfd/Makefile
@@ -6,5 +6,6 @@ mcp251xfd-objs :=
 mcp251xfd-objs += mcp251xfd-core.o
 mcp251xfd-objs += mcp251xfd-crc16.o
 mcp251xfd-objs += mcp251xfd-regmap.o
+mcp251xfd-objs += mcp251xfd-timestamp.o
 
 mcp251xfd-$(CONFIG_DEV_COREDUMP) += mcp251xfd-dump.o
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index 41e322954d9a..6cdc05b02403 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -2493,6 +2493,7 @@ static int mcp251xfd_open(struct net_device *ndev)
 	if (err)
 		goto out_transceiver_disable;
 
+	mcp251xfd_timestamp_init(priv);
 	can_rx_offload_enable(&priv->offload);
 
 	err = request_threaded_irq(spi->irq, NULL, mcp251xfd_irq,
@@ -2513,6 +2514,7 @@ static int mcp251xfd_open(struct net_device *ndev)
 	free_irq(spi->irq, priv);
  out_can_rx_offload_disable:
 	can_rx_offload_disable(&priv->offload);
+	mcp251xfd_timestamp_stop(priv);
  out_transceiver_disable:
 	mcp251xfd_transceiver_disable(priv);
  out_mcp251xfd_ring_free:
@@ -2534,6 +2536,7 @@ static int mcp251xfd_stop(struct net_device *ndev)
 	mcp251xfd_chip_interrupts_disable(priv);
 	free_irq(ndev->irq, priv);
 	can_rx_offload_disable(&priv->offload);
+	mcp251xfd_timestamp_stop(priv);
 	mcp251xfd_chip_stop(priv, CAN_STATE_STOPPED);
 	mcp251xfd_transceiver_disable(priv);
 	mcp251xfd_ring_free(priv);
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c
new file mode 100644
index 000000000000..ed3169274d24
--- /dev/null
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// mcp251xfd - Microchip MCP251xFD Family CAN controller driver
+//
+// Copyright (c) 2021 Pengutronix,
+//               Marc Kleine-Budde <kernel@pengutronix.de>
+//
+
+#include <linux/clocksource.h>
+#include <linux/workqueue.h>
+
+#include "mcp251xfd.h"
+
+static u64 mcp251xfd_timestamp_read(const struct cyclecounter *cc)
+{
+	struct mcp251xfd_priv *priv;
+	u32 timestamp = 0;
+	int err;
+
+	priv = container_of(cc, struct mcp251xfd_priv, cc);
+	err = mcp251xfd_get_timestamp(priv, &timestamp);
+	if (err)
+		netdev_err(priv->ndev,
+			   "Error %d while reading timestamp. HW timestamps may be inaccurate.",
+			   err);
+
+	return timestamp;
+}
+
+static void mcp251xfd_timestamp_work(struct work_struct *work)
+{
+	struct delayed_work *delayed_work = to_delayed_work(work);
+	struct mcp251xfd_priv *priv;
+
+	priv = container_of(delayed_work, struct mcp251xfd_priv, timestamp);
+	timecounter_read(&priv->tc);
+
+	schedule_delayed_work(&priv->timestamp,
+			      MCP251XFD_TIMESTAMP_WORK_DELAY_SEC * HZ);
+}
+
+void mcp251xfd_skb_set_timestamp(struct mcp251xfd_priv *priv,
+				 struct sk_buff *skb, u32 timestamp)
+{
+	struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb);
+	u64 ns;
+
+	ns = timecounter_cyc2time(&priv->tc, timestamp);
+	hwtstamps->hwtstamp = ns_to_ktime(ns);
+}
+
+void mcp251xfd_timestamp_init(struct mcp251xfd_priv *priv)
+{
+	struct cyclecounter *cc = &priv->cc;
+
+	cc->read = mcp251xfd_timestamp_read;
+	cc->mask = CYCLECOUNTER_MASK(32);
+	cc->shift = 1;
+	cc->mult = clocksource_hz2mult(priv->can.clock.freq, cc->shift);
+
+	timecounter_init(&priv->tc, &priv->cc, ktime_get_real_ns());
+
+	INIT_DELAYED_WORK(&priv->timestamp, mcp251xfd_timestamp_work);
+	schedule_delayed_work(&priv->timestamp,
+			      MCP251XFD_TIMESTAMP_WORK_DELAY_SEC * HZ);
+}
+
+void mcp251xfd_timestamp_stop(struct mcp251xfd_priv *priv)
+{
+	cancel_delayed_work_sync(&priv->timestamp);
+}
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
index 074c5adf9b94..1002f3902ad2 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
@@ -19,6 +19,8 @@
 #include <linux/regmap.h>
 #include <linux/regulator/consumer.h>
 #include <linux/spi/spi.h>
+#include <linux/timecounter.h>
+#include <linux/workqueue.h>
 
 /* MPC251x registers */
 
@@ -395,6 +397,9 @@
 #define MCP251XFD_SYSCLOCK_HZ_MAX 40000000
 #define MCP251XFD_SYSCLOCK_HZ_MIN 1000000
 #define MCP251XFD_SPICLOCK_HZ_MAX 20000000
+#define MCP251XFD_TIMESTAMP_WORK_DELAY_SEC 45
+static_assert(MCP251XFD_TIMESTAMP_WORK_DELAY_SEC <
+	      CYCLECOUNTER_MASK(32) / MCP251XFD_SYSCLOCK_HZ_MAX / 2);
 #define MCP251XFD_OSC_PLL_MULTIPLIER 10
 #define MCP251XFD_OSC_STAB_SLEEP_US (3 * USEC_PER_MSEC)
 #define MCP251XFD_OSC_STAB_TIMEOUT_US (10 * MCP251XFD_OSC_STAB_SLEEP_US)
@@ -596,6 +601,10 @@ struct mcp251xfd_priv {
 	struct mcp251xfd_ecc ecc;
 	struct mcp251xfd_regs_status regs_status;
 
+	struct cyclecounter cc;
+	struct timecounter tc;
+	struct delayed_work timestamp;
+
 	struct gpio_desc *rx_int;
 	struct clk *clk;
 	struct regulator *reg_vdd;
@@ -844,6 +853,10 @@ int mcp251xfd_regmap_init(struct mcp251xfd_priv *priv);
 u16 mcp251xfd_crc16_compute2(const void *cmd, size_t cmd_size,
 			     const void *data, size_t data_size);
 u16 mcp251xfd_crc16_compute(const void *data, size_t data_size);
+void mcp251xfd_skb_set_timestamp(struct mcp251xfd_priv *priv,
+				 struct sk_buff *skb, u32 timestamp);
+void mcp251xfd_timestamp_init(struct mcp251xfd_priv *priv);
+void mcp251xfd_timestamp_stop(struct mcp251xfd_priv *priv);
 
 #if IS_ENABLED(CONFIG_DEV_COREDUMP)
 void mcp251xfd_dump(const struct mcp251xfd_priv *priv);
-- 
cgit v1.2.3


From 5f02a49c6605fbd85c00acd19a10e149bba5c162 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Tue, 2 Mar 2021 15:58:18 +0100
Subject: can: mcp251xfd: add HW timestamp to RX, TX and error CAN frames

This patch uses the previously added mcp251xfd_skb_set_timestamp()
function to convert the timestamp done by the CAN controller into a
proper skb hw timestamp.

Link: https://lore.kernel.org/r/20210304161209.2754463-1-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index 6cdc05b02403..142eb4506b55 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -1265,7 +1265,8 @@ mcp251xfd_handle_tefif_one(struct mcp251xfd_priv *priv,
 			   const struct mcp251xfd_hw_tef_obj *hw_tef_obj)
 {
 	struct net_device_stats *stats = &priv->ndev->stats;
-	u32 seq, seq_masked, tef_tail_masked;
+	struct sk_buff *skb;
+	u32 seq, seq_masked, tef_tail_masked, tef_tail;
 
 	seq = FIELD_GET(MCP251XFD_OBJ_FLAGS_SEQ_MCP2518FD_MASK,
 			hw_tef_obj->flags);
@@ -1281,9 +1282,13 @@ mcp251xfd_handle_tefif_one(struct mcp251xfd_priv *priv,
 	if (seq_masked != tef_tail_masked)
 		return mcp251xfd_handle_tefif_recover(priv, seq);
 
+	tef_tail = mcp251xfd_get_tef_tail(priv);
+	skb = priv->can.echo_skb[tef_tail];
+	if (skb)
+		mcp251xfd_skb_set_timestamp(priv, skb, hw_tef_obj->ts);
 	stats->tx_bytes +=
 		can_rx_offload_get_echo_skb(&priv->offload,
-					    mcp251xfd_get_tef_tail(priv),
+					    tef_tail,
 					    hw_tef_obj->ts, NULL);
 	stats->tx_packets++;
 	priv->tef->tail++;
@@ -1442,7 +1447,7 @@ mcp251xfd_rx_ring_update(const struct mcp251xfd_priv *priv,
 }
 
 static void
-mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv,
+mcp251xfd_hw_rx_obj_to_skb(struct mcp251xfd_priv *priv,
 			   const struct mcp251xfd_hw_rx_obj_canfd *hw_rx_obj,
 			   struct sk_buff *skb)
 {
@@ -1485,6 +1490,8 @@ mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv,
 
 	if (!(hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_RTR))
 		memcpy(cfd->data, hw_rx_obj->data, cfd->len);
+
+	mcp251xfd_skb_set_timestamp(priv, skb, hw_rx_obj->ts);
 }
 
 static int
@@ -1598,16 +1605,21 @@ static int mcp251xfd_handle_rxif(struct mcp251xfd_priv *priv)
 }
 
 static struct sk_buff *
-mcp251xfd_alloc_can_err_skb(const struct mcp251xfd_priv *priv,
+mcp251xfd_alloc_can_err_skb(struct mcp251xfd_priv *priv,
 			    struct can_frame **cf, u32 *timestamp)
 {
+	struct sk_buff *skb;
 	int err;
 
 	err = mcp251xfd_get_timestamp(priv, timestamp);
 	if (err)
 		return NULL;
 
-	return alloc_can_err_skb(priv->ndev, cf);
+	skb = alloc_can_err_skb(priv->ndev, cf);
+	if (skb)
+		mcp251xfd_skb_set_timestamp(priv, skb, *timestamp);
+
+	return skb;
 }
 
 static int mcp251xfd_handle_rxovif(struct mcp251xfd_priv *priv)
@@ -1759,6 +1771,7 @@ static int mcp251xfd_handle_ivmif(struct mcp251xfd_priv *priv)
 	if (!cf)
 		return 0;
 
+	mcp251xfd_skb_set_timestamp(priv, skb, timestamp);
 	err = can_rx_offload_queue_sorted(&priv->offload, skb, timestamp);
 	if (err)
 		stats->rx_fifo_errors++;
-- 
cgit v1.2.3


From 172f6d3a031b5ecb22e7dd8c4462f4eeabde3d63 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Thu, 4 Mar 2021 15:16:14 +0100
Subject: can: c_can: convert block comments to network style comments

This patch converts all block comments to network subsystem style
block comments.

Link: https://lore.kernel.org/r/20210304154240.2747987-2-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/c_can/c_can.c     | 52 +++++++++++++--------------------------
 drivers/net/can/c_can/c_can_pci.c |  3 +--
 2 files changed, 18 insertions(+), 37 deletions(-)

diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index 6958830cb983..6629951e4935 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -161,9 +161,7 @@
 
 #define IF_MCONT_TX		(IF_MCONT_TXIE | IF_MCONT_EOB)
 
-/*
- * Use IF1 for RX and IF2 for TX
- */
+/* Use IF1 for RX and IF2 for TX */
 #define IF_RX			0
 #define IF_TX			1
 
@@ -189,8 +187,7 @@ enum c_can_lec_type {
 	LEC_MASK = LEC_UNUSED,
 };
 
-/*
- * c_can error types:
+/* c_can error types:
  * Bus errors (BUS_OFF, ERROR_WARNING, ERROR_PASSIVE) are supported
  */
 enum c_can_bus_error_types {
@@ -268,8 +265,7 @@ static inline void c_can_object_put(struct net_device *dev, int iface,
 	c_can_obj_update(dev, iface, cmd | IF_COMM_WR, obj);
 }
 
-/*
- * Note: According to documentation clearing TXIE while MSGVAL is set
+/* Note: According to documentation clearing TXIE while MSGVAL is set
  * is not allowed, but works nicely on C/DCAN. And that lowers the I/O
  * load significantly.
  */
@@ -309,8 +305,7 @@ static void c_can_setup_tx_object(struct net_device *dev, int iface,
 	if (!rtr)
 		arb |= IF_ARB_TRANSMIT;
 
-	/*
-	 * If we change the DIR bit, we need to invalidate the buffer
+	/* If we change the DIR bit, we need to invalidate the buffer
 	 * first, i.e. clear the MSGVAL flag in the arbiter.
 	 */
 	if (rtr != (bool)test_bit(idx, &priv->tx_dir)) {
@@ -447,8 +442,7 @@ static netdev_tx_t c_can_start_xmit(struct sk_buff *skb,
 
 	if (can_dropped_invalid_skb(dev, skb))
 		return NETDEV_TX_OK;
-	/*
-	 * This is not a FIFO. C/D_CAN sends out the buffers
+	/* This is not a FIFO. C/D_CAN sends out the buffers
 	 * prioritized. The lowest buffer number wins.
 	 */
 	idx = fls(atomic_read(&priv->tx_active));
@@ -457,8 +451,7 @@ static netdev_tx_t c_can_start_xmit(struct sk_buff *skb,
 	/* If this is the last buffer, stop the xmit queue */
 	if (idx == C_CAN_MSG_OBJ_TX_NUM - 1)
 		netif_stop_queue(dev);
-	/*
-	 * Store the message in the interface so we can call
+	/* Store the message in the interface so we can call
 	 * can_put_echo_skb(). We must do this before we enable
 	 * transmit as we might race against do_tx().
 	 */
@@ -527,8 +520,7 @@ static int c_can_set_bittiming(struct net_device *dev)
 	return c_can_wait_for_ctrl_init(dev, priv, 0);
 }
 
-/*
- * Configure C_CAN message objects for Tx and Rx purposes:
+/* Configure C_CAN message objects for Tx and Rx purposes:
  * C_CAN provides a total of 32 message objects that can be configured
  * either for Tx or Rx purposes. Here the first 16 message objects are used as
  * a reception FIFO. The end of reception FIFO is signified by the EoB bit
@@ -572,8 +564,7 @@ static int c_can_software_reset(struct net_device *dev)
 	return 0;
 }
 
-/*
- * Configure C_CAN chip:
+/* Configure C_CAN chip:
  * - enable/disable auto-retransmission
  * - set operating mode
  * - configure message objects
@@ -739,8 +730,7 @@ static void c_can_do_tx(struct net_device *dev)
 	}
 }
 
-/*
- * If we have a gap in the pending bits, that means we either
+/* If we have a gap in the pending bits, that means we either
  * raced with the hardware or failed to readout all upper
  * objects in the last run due to quota limit.
  */
@@ -751,8 +741,7 @@ static u32 c_can_adjust_pending(u32 pend)
 	if (pend == RECEIVE_OBJECT_BITS)
 		return pend;
 
-	/*
-	 * If the last set bit is larger than the number of pending
+	/* If the last set bit is larger than the number of pending
 	 * bits we have a gap.
 	 */
 	weight = hweight32(pend);
@@ -762,8 +751,7 @@ static u32 c_can_adjust_pending(u32 pend)
 	if (lasts == weight)
 		return pend;
 
-	/*
-	 * Find the first set bit after the gap. We walk backwards
+	/* Find the first set bit after the gap. We walk backwards
 	 * from the last set bit.
 	 */
 	for (lasts--; pend & (1 << (lasts - 1)); lasts--);
@@ -803,8 +791,7 @@ static int c_can_read_objects(struct net_device *dev, struct c_can_priv *priv,
 			continue;
 		}
 
-		/*
-		 * This really should not happen, but this covers some
+		/* This really should not happen, but this covers some
 		 * odd HW behaviour. Do not remove that unless you
 		 * want to brick your machine.
 		 */
@@ -830,8 +817,7 @@ static inline u32 c_can_get_pending(struct c_can_priv *priv)
 	return pend;
 }
 
-/*
- * theory of operation:
+/* theory of operation:
  *
  * c_can core saves a received CAN message into the first free message
  * object it finds free (starting with the lowest). Bits NEWDAT and
@@ -848,8 +834,7 @@ static int c_can_do_rx_poll(struct net_device *dev, int quota)
 	struct c_can_priv *priv = netdev_priv(dev);
 	u32 pkts = 0, pend = 0, toread, n;
 
-	/*
-	 * It is faster to read only one 16bit register. This is only possible
+	/* It is faster to read only one 16bit register. This is only possible
 	 * for a maximum number of 16 objects.
 	 */
 	BUILD_BUG_ON_MSG(C_CAN_MSG_OBJ_RX_LAST > 16,
@@ -860,8 +845,7 @@ static int c_can_do_rx_poll(struct net_device *dev, int quota)
 			pend = c_can_get_pending(priv);
 			if (!pend)
 				break;
-			/*
-			 * If the pending field has a gap, handle the
+			/* If the pending field has a gap, handle the
 			 * bits above the gap first.
 			 */
 			toread = c_can_adjust_pending(pend);
@@ -979,8 +963,7 @@ static int c_can_handle_bus_err(struct net_device *dev,
 	struct can_frame *cf;
 	struct sk_buff *skb;
 
-	/*
-	 * early exit if no lec update or no error.
+	/* early exit if no lec update or no error.
 	 * no lec update means that no CAN bus event has been detected
 	 * since CPU wrote 0x7 value to status reg.
 	 */
@@ -999,8 +982,7 @@ static int c_can_handle_bus_err(struct net_device *dev,
 	if (unlikely(!skb))
 		return 0;
 
-	/*
-	 * check for 'last error code' which tells us the
+	/* check for 'last error code' which tells us the
 	 * type of the last error to occur on the CAN bus
 	 */
 	cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR;
diff --git a/drivers/net/can/c_can/c_can_pci.c b/drivers/net/can/c_can/c_can_pci.c
index 7efb60b50876..9d7a4a335249 100644
--- a/drivers/net/can/c_can/c_can_pci.c
+++ b/drivers/net/can/c_can/c_can_pci.c
@@ -41,8 +41,7 @@ struct c_can_pci_data {
 	void (*init)(const struct c_can_priv *priv, bool enable);
 };
 
-/*
- * 16-bit c_can registers can be arranged differently in the memory
+/* 16-bit c_can registers can be arranged differently in the memory
  * architecture of different implementations. For example: 16-bit
  * registers can be aligned to a 16-bit boundary or 32-bit boundary etc.
  * Handle the same by providing a common read/write interface.
-- 
cgit v1.2.3


From beb7e88a2650ae7bb8ec6e4b73d2de816893d68e Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Thu, 4 Mar 2021 15:16:14 +0100
Subject: can: c_can: remove unnecessary blank lines and add suggested ones

This patch removes unnecessary blank lines and add suggested ones, so
that checkpatch doesn't complain anymore.

Link: https://lore.kernel.org/r/20210304154240.2747987-3-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/c_can/c_can.c     | 2 --
 drivers/net/can/c_can/c_can_pci.c | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index 6629951e4935..cb0707a899d5 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -132,7 +132,6 @@
 /* For the high buffers we clear the interrupt bit and newdat */
 #define IF_COMM_RCV_HIGH	(IF_COMM_RCV_LOW | IF_COMM_CLR_NEWDAT)
 
-
 /* Receive setup of message objects */
 #define IF_COMM_RCV_SETUP	(IF_COMM_MASK | IF_COMM_ARB | IF_COMM_CONTROL)
 
@@ -250,7 +249,6 @@ static void c_can_obj_update(struct net_device *dev, int iface, u32 cmd, u32 obj
 		udelay(1);
 	}
 	netdev_err(dev, "Updating object timed out\n");
-
 }
 
 static inline void c_can_object_get(struct net_device *dev, int iface,
diff --git a/drivers/net/can/c_can/c_can_pci.c b/drivers/net/can/c_can/c_can_pci.c
index 9d7a4a335249..35a914e70cc9 100644
--- a/drivers/net/can/c_can/c_can_pci.c
+++ b/drivers/net/can/c_can/c_can_pci.c
@@ -278,6 +278,7 @@ static const struct pci_device_id c_can_pci_tbl[] = {
 		 c_can_pch),
 	{},
 };
+
 static struct pci_driver c_can_pci_driver = {
 	.name = KBUILD_MODNAME,
 	.id_table = c_can_pci_tbl,
-- 
cgit v1.2.3


From 2de0ea97ade0d087699af329ecd212b2967bcf58 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Thu, 4 Mar 2021 15:16:14 +0100
Subject: can: c_can: fix indention

This patch fixes the indention in the driver.

Link: https://lore.kernel.org/r/20210304154240.2747987-4-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/c_can/c_can.c     | 12 ++++++------
 drivers/net/can/c_can/c_can.h     | 10 +++++-----
 drivers/net/can/c_can/c_can_pci.c | 16 ++++++++--------
 3 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index cb0707a899d5..cd98d30dd083 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -502,7 +502,7 @@ static int c_can_set_bittiming(struct net_device *dev)
 	reg_brpe = brpe & BRP_EXT_BRPE_MASK;
 
 	netdev_info(dev,
-		"setting BTR=%04x BRPE=%04x\n", reg_btr, reg_brpe);
+		    "setting BTR=%04x BRPE=%04x\n", reg_btr, reg_brpe);
 
 	ctrl_save = priv->read_reg(priv, C_CAN_CTRL_REG);
 	ctrl_save &= ~CONTROL_INIT;
@@ -836,7 +836,7 @@ static int c_can_do_rx_poll(struct net_device *dev, int quota)
 	 * for a maximum number of 16 objects.
 	 */
 	BUILD_BUG_ON_MSG(C_CAN_MSG_OBJ_RX_LAST > 16,
-			"Implementation does not support more message objects than 16");
+			 "Implementation does not support more message objects than 16");
 
 	while (quota > 0) {
 		if (!pend) {
@@ -865,7 +865,7 @@ static int c_can_do_rx_poll(struct net_device *dev, int quota)
 }
 
 static int c_can_handle_state_change(struct net_device *dev,
-				enum c_can_bus_error_types error_type)
+				     enum c_can_bus_error_types error_type)
 {
 	unsigned int reg_err_counter;
 	unsigned int rx_err_passive;
@@ -1127,7 +1127,7 @@ static int c_can_open(struct net_device *dev)
 
 	/* register interrupt handler */
 	err = request_irq(dev->irq, &c_can_isr, IRQF_SHARED, dev->name,
-				dev);
+			  dev);
 	if (err < 0) {
 		netdev_err(dev, "failed to request interrupt\n");
 		goto exit_irq_fail;
@@ -1219,7 +1219,7 @@ int c_can_power_down(struct net_device *dev)
 	/* Wait for the PDA bit to get set */
 	time_out = jiffies + msecs_to_jiffies(INIT_WAIT_MS);
 	while (!(priv->read_reg(priv, C_CAN_STS_REG) & STATUS_PDA) &&
-				time_after(time_out, jiffies))
+	       time_after(time_out, jiffies))
 		cpu_relax();
 
 	if (time_after(jiffies, time_out))
@@ -1260,7 +1260,7 @@ int c_can_power_up(struct net_device *dev)
 	/* Wait for the PDA bit to get clear */
 	time_out = jiffies + msecs_to_jiffies(INIT_WAIT_MS);
 	while ((priv->read_reg(priv, C_CAN_STS_REG) & STATUS_PDA) &&
-				time_after(time_out, jiffies))
+	       time_after(time_out, jiffies))
 		cpu_relax();
 
 	if (time_after(jiffies, time_out)) {
diff --git a/drivers/net/can/c_can/c_can.h b/drivers/net/can/c_can/c_can.h
index 92213d3d96eb..3d07285e46c1 100644
--- a/drivers/net/can/c_can/c_can.h
+++ b/drivers/net/can/c_can/c_can.h
@@ -201,16 +201,16 @@ struct c_can_priv {
 	atomic_t sie_pending;
 	unsigned long tx_dir;
 	int last_status;
-	u16 (*read_reg) (const struct c_can_priv *priv, enum reg index);
-	void (*write_reg) (const struct c_can_priv *priv, enum reg index, u16 val);
-	u32 (*read_reg32) (const struct c_can_priv *priv, enum reg index);
-	void (*write_reg32) (const struct c_can_priv *priv, enum reg index, u32 val);
+	u16 (*read_reg)(const struct c_can_priv *priv, enum reg index);
+	void (*write_reg)(const struct c_can_priv *priv, enum reg index, u16 val);
+	u32 (*read_reg32)(const struct c_can_priv *priv, enum reg index);
+	void (*write_reg32)(const struct c_can_priv *priv, enum reg index, u32 val);
 	void __iomem *base;
 	const u16 *regs;
 	void *priv;		/* for board-specific data */
 	enum c_can_dev_id type;
 	struct c_can_raminit raminit_sys;	/* RAMINIT via syscon regmap */
-	void (*raminit) (const struct c_can_priv *priv, bool enable);
+	void (*raminit)(const struct c_can_priv *priv, bool enable);
 	u32 comm_rcv_high;
 	u32 rxmasked;
 	u32 dlc[C_CAN_MSG_OBJ_TX_NUM];
diff --git a/drivers/net/can/c_can/c_can_pci.c b/drivers/net/can/c_can/c_can_pci.c
index 35a914e70cc9..248baa350ef0 100644
--- a/drivers/net/can/c_can/c_can_pci.c
+++ b/drivers/net/can/c_can/c_can_pci.c
@@ -47,25 +47,25 @@ struct c_can_pci_data {
  * Handle the same by providing a common read/write interface.
  */
 static u16 c_can_pci_read_reg_aligned_to_16bit(const struct c_can_priv *priv,
-						enum reg index)
+					       enum reg index)
 {
 	return readw(priv->base + priv->regs[index]);
 }
 
 static void c_can_pci_write_reg_aligned_to_16bit(const struct c_can_priv *priv,
-						enum reg index, u16 val)
+						 enum reg index, u16 val)
 {
 	writew(val, priv->base + priv->regs[index]);
 }
 
 static u16 c_can_pci_read_reg_aligned_to_32bit(const struct c_can_priv *priv,
-						enum reg index)
+					       enum reg index)
 {
 	return readw(priv->base + 2 * priv->regs[index]);
 }
 
 static void c_can_pci_write_reg_aligned_to_32bit(const struct c_can_priv *priv,
-						enum reg index, u16 val)
+						 enum reg index, u16 val)
 {
 	writew(val, priv->base + 2 * priv->regs[index]);
 }
@@ -87,13 +87,13 @@ static u32 c_can_pci_read_reg32(const struct c_can_priv *priv, enum reg index)
 	u32 val;
 
 	val = priv->read_reg(priv, index);
-	val |= ((u32) priv->read_reg(priv, index + 1)) << 16;
+	val |= ((u32)priv->read_reg(priv, index + 1)) << 16;
 
 	return val;
 }
 
 static void c_can_pci_write_reg32(const struct c_can_priv *priv, enum reg index,
-		u32 val)
+				  u32 val)
 {
 	priv->write_reg(priv, index + 1, val >> 16);
 	priv->write_reg(priv, index, val);
@@ -216,7 +216,7 @@ static int c_can_pci_probe(struct pci_dev *pdev,
 	}
 
 	dev_dbg(&pdev->dev, "%s device registered (regs=%p, irq=%d)\n",
-		 KBUILD_MODNAME, priv->regs, dev->irq);
+		KBUILD_MODNAME, priv->regs, dev->irq);
 
 	return 0;
 
@@ -251,7 +251,7 @@ static void c_can_pci_remove(struct pci_dev *pdev)
 	pci_disable_device(pdev);
 }
 
-static const struct c_can_pci_data c_can_sta2x11= {
+static const struct c_can_pci_data c_can_sta2x11 = {
 	.type = BOSCH_C_CAN,
 	.reg_align = C_CAN_REG_ALIGN_32,
 	.freq = 52000000, /* 52 Mhz */
-- 
cgit v1.2.3


From 0c1b0138d641316bba1871e127fc1c7ef0e029e3 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Thu, 4 Mar 2021 15:20:31 +0100
Subject: can: c_can: fix print formating string

This patch fixes the print format string in the driver, so that it
stays in a single line.

Link: https://lore.kernel.org/r/20210304154240.2747987-5-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/c_can/c_can_pci.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/can/c_can/c_can_pci.c b/drivers/net/can/c_can/c_can_pci.c
index 248baa350ef0..8a3628f76594 100644
--- a/drivers/net/can/c_can/c_can_pci.c
+++ b/drivers/net/can/c_can/c_can_pci.c
@@ -141,8 +141,7 @@ static int c_can_pci_probe(struct pci_dev *pdev,
 			 pci_resource_len(pdev, c_can_pci_data->bar));
 	if (!addr) {
 		dev_err(&pdev->dev,
-			"device has no PCI memory resources, "
-			"failing adapter\n");
+			"device has no PCI memory resources, failing adapter\n");
 		ret = -ENOMEM;
 		goto out_release_regions;
 	}
-- 
cgit v1.2.3


From 995380f3fbfbce3d700293f375aae0a1ddad3266 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Thu, 4 Mar 2021 15:21:36 +0100
Subject: can: c_can: replace double assignments by two single ones

This patch replaces the double assignments by two single ones, to make
checkpatch happy.

Link: https://lore.kernel.org/r/20210304154240.2747987-6-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/c_can/c_can.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index cd98d30dd083..7d63b227275d 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -703,7 +703,8 @@ static void c_can_do_tx(struct net_device *dev)
 	struct net_device_stats *stats = &dev->stats;
 	u32 idx, obj, pkts = 0, bytes = 0, pend, clr;
 
-	clr = pend = priv->read_reg(priv, C_CAN_INTPND2_REG);
+	pend = priv->read_reg(priv, C_CAN_INTPND2_REG);
+	clr = pend;
 
 	while ((idx = ffs(pend))) {
 		idx--;
@@ -1029,7 +1030,8 @@ static int c_can_poll(struct napi_struct *napi, int quota)
 
 	/* Only read the status register if a status interrupt was pending */
 	if (atomic_xchg(&priv->sie_pending, 0)) {
-		priv->last_status = curr = priv->read_reg(priv, C_CAN_STS_REG);
+		priv->last_status = priv->read_reg(priv, C_CAN_STS_REG);
+		curr = priv->last_status;
 		/* Ack status on C_CAN. D_CAN is self clearing */
 		if (priv->type != BOSCH_D_CAN)
 			priv->write_reg(priv, C_CAN_STS_REG, LEC_UNUSED);
-- 
cgit v1.2.3


From dd477500c70b9e721bcf612bce1ddf5752a2de2b Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Thu, 4 Mar 2021 15:23:19 +0100
Subject: can: c_can: fix remaining checkpatch warnings

This patch fixes the remaining checkpatch warnings in the driver.

Link: https://lore.kernel.org/r/20210304154240.2747987-7-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/c_can/c_can.c     | 3 ++-
 drivers/net/can/c_can/c_can_pci.c | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index 7d63b227275d..eeef1f7b53c7 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -753,7 +753,8 @@ static u32 c_can_adjust_pending(u32 pend)
 	/* Find the first set bit after the gap. We walk backwards
 	 * from the last set bit.
 	 */
-	for (lasts--; pend & (1 << (lasts - 1)); lasts--);
+	for (lasts--; pend & (1 << (lasts - 1)); lasts--)
+		;
 
 	return pend & ~((1 << lasts) - 1);
 }
diff --git a/drivers/net/can/c_can/c_can_pci.c b/drivers/net/can/c_can/c_can_pci.c
index 8a3628f76594..9a8d5e8eb645 100644
--- a/drivers/net/can/c_can/c_can_pci.c
+++ b/drivers/net/can/c_can/c_can_pci.c
@@ -267,7 +267,7 @@ static const struct c_can_pci_data c_can_pch = {
 
 #define C_CAN_ID(_vend, _dev, _driverdata) {		\
 	PCI_DEVICE(_vend, _dev),			\
-	.driver_data = (unsigned long)&_driverdata,	\
+	.driver_data = (unsigned long)&(_driverdata),	\
 }
 
 static const struct pci_device_id c_can_pci_tbl[] = {
-- 
cgit v1.2.3


From f65735c203d5af0c32f0c89d6a431900fb8b83e2 Mon Sep 17 00:00:00 2001
From: Dario Binacchi <dariobin@libero.it>
Date: Tue, 2 Mar 2021 22:54:30 +0100
Subject: can: c_can: remove unused code

Commit 9d23a9818cb1 ("can: c_can: Remove unused inline function") left
behind C_CAN_MSG_OBJ_TX_LAST constant.

Commit fa39b54ccf28 ("can: c_can: Get rid of pointless interrupts") left
behind C_CAN_MSG_RX_LOW_LAST and C_CAN_MSG_OBJ_RX_SPLIT constants.

The removed code also made a comment useless and misleading.

Link: https://lore.kernel.org/r/20210302215435.18286-2-dariobin@libero.it
Signed-off-by: Dario Binacchi <dariobin@libero.it>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/c_can/c_can.c | 3 +--
 drivers/net/can/c_can/c_can.h | 4 ----
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index eeef1f7b53c7..b31c3beddec8 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -822,8 +822,7 @@ static inline u32 c_can_get_pending(struct c_can_priv *priv)
  * c_can core saves a received CAN message into the first free message
  * object it finds free (starting with the lowest). Bits NEWDAT and
  * INTPND are set for this message object indicating that a new message
- * has arrived. To work-around this issue, we keep two groups of message
- * objects whose partitioning is defined by C_CAN_MSG_OBJ_RX_SPLIT.
+ * has arrived.
  *
  * We clear the newdat bit right away.
  *
diff --git a/drivers/net/can/c_can/c_can.h b/drivers/net/can/c_can/c_can.h
index 3d07285e46c1..7a7d3fb4b1c3 100644
--- a/drivers/net/can/c_can/c_can.h
+++ b/drivers/net/can/c_can/c_can.h
@@ -32,11 +32,7 @@
 				C_CAN_MSG_OBJ_RX_NUM - 1)
 
 #define C_CAN_MSG_OBJ_TX_FIRST	(C_CAN_MSG_OBJ_RX_LAST + 1)
-#define C_CAN_MSG_OBJ_TX_LAST	(C_CAN_MSG_OBJ_TX_FIRST + \
-				C_CAN_MSG_OBJ_TX_NUM - 1)
 
-#define C_CAN_MSG_OBJ_RX_SPLIT	9
-#define C_CAN_MSG_RX_LOW_LAST	(C_CAN_MSG_OBJ_RX_SPLIT - 1)
 #define RECEIVE_OBJECT_BITS	0x0000ffff
 
 enum reg {
-- 
cgit v1.2.3


From c8a6b44388cb60a3851520902e286c202fc5c725 Mon Sep 17 00:00:00 2001
From: Dario Binacchi <dariobin@libero.it>
Date: Tue, 2 Mar 2021 22:54:31 +0100
Subject: can: c_can: fix indentation

Commit 524369e2391f ("can: c_can: remove obsolete STRICT_FRAME_ORDERING Kconfig option")
left behind wrong indentation, fix it.

Link: https://lore.kernel.org/r/20210302215435.18286-3-dariobin@libero.it
Signed-off-by: Dario Binacchi <dariobin@libero.it>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/c_can/c_can.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index b31c3beddec8..8212f3d98aa9 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -762,7 +762,7 @@ static u32 c_can_adjust_pending(u32 pend)
 static inline void c_can_rx_object_get(struct net_device *dev,
 				       struct c_can_priv *priv, u32 obj)
 {
-		c_can_object_get(dev, IF_RX, obj, priv->comm_rcv_high);
+	c_can_object_get(dev, IF_RX, obj, priv->comm_rcv_high);
 }
 
 static inline void c_can_rx_finalize(struct net_device *dev,
-- 
cgit v1.2.3


From eddf67115040b9e875e8a153816df89f66b4c5b6 Mon Sep 17 00:00:00 2001
From: Dario Binacchi <dariobin@libero.it>
Date: Tue, 2 Mar 2021 22:54:32 +0100
Subject: can: c_can: add a comment about IF_RX interface's use

After reading the commit 640916db2bf7 ("can: c_can: Make it SMP safe")
it may sound strange to see the IF_RX interface used by the
can_inval_tx_object function. A comment was added to avoid any
misunderstanding.

Link: https://lore.kernel.org/r/20210302215435.18286-4-dariobin@libero.it
Signed-off-by: Dario Binacchi <dariobin@libero.it>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/c_can/c_can.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index 8212f3d98aa9..980abf6a8609 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -710,6 +710,11 @@ static void c_can_do_tx(struct net_device *dev)
 		idx--;
 		pend &= ~(1 << idx);
 		obj = idx + C_CAN_MSG_OBJ_TX_FIRST;
+
+		/* We use IF_RX interface instead of IF_TX because we
+		 * are called from c_can_poll(), which runs inside
+		 * NAPI. We are not trasmitting.
+		 */
 		c_can_inval_tx_object(dev, IF_RX, obj);
 		can_get_echo_skb(dev, idx, NULL);
 		bytes += priv->dlc[idx];
-- 
cgit v1.2.3


From fcbded019855136a3d99d74ef8b44e8f87120fb2 Mon Sep 17 00:00:00 2001
From: Dario Binacchi <dariobin@libero.it>
Date: Tue, 2 Mar 2021 22:54:33 +0100
Subject: can: c_can: use 32-bit write to set arbitration register

The arbitration register is already set up with 32-bit writes in the
other parts of the code except for this point.

Link: https://lore.kernel.org/r/20210302215435.18286-5-dariobin@libero.it
Signed-off-by: Dario Binacchi <dariobin@libero.it>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/c_can/c_can.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index 980abf6a8609..8fd20304f1ec 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -279,8 +279,7 @@ static void c_can_inval_msg_object(struct net_device *dev, int iface, int obj)
 {
 	struct c_can_priv *priv = netdev_priv(dev);
 
-	priv->write_reg(priv, C_CAN_IFACE(ARB1_REG, iface), 0);
-	priv->write_reg(priv, C_CAN_IFACE(ARB2_REG, iface), 0);
+	priv->write_reg32(priv, C_CAN_IFACE(ARB1_REG, iface), 0);
 	c_can_inval_tx_object(dev, iface, obj);
 }
 
-- 
cgit v1.2.3


From 13831ce69c775fb8186275fdeb91fa6daff2196c Mon Sep 17 00:00:00 2001
From: Dario Binacchi <dariobin@libero.it>
Date: Tue, 2 Mar 2021 22:54:34 +0100
Subject: can: c_can: prepare to up the message objects number

As pointed by commit c0a9f4d396c9 ("can: c_can: Reduce register
access") the "driver casts the 16 message objects in stone, which is
completely braindead as contemporary hardware has up to 128 message
objects".

The patch prepares the module to extend the number of message objects
beyond the 32 currently managed. This was achieved by transforming the
constants used to manage RX/TX messages into variables without
changing the driver policy.

Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/r/20210302215435.18286-6-dariobin@libero.it
Signed-off-by: Dario Binacchi <dariobin@libero.it>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/c_can/c_can.c          | 50 +++++++++++++++++++++-------------
 drivers/net/can/c_can/c_can.h          | 23 +++++++---------
 drivers/net/can/c_can/c_can_pci.c      |  2 +-
 drivers/net/can/c_can/c_can_platform.c |  2 +-
 4 files changed, 43 insertions(+), 34 deletions(-)

diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index 8fd20304f1ec..032f8200668a 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -170,9 +170,6 @@
 /* Wait for ~1 sec for INIT bit */
 #define INIT_WAIT_MS		1000
 
-/* napi related */
-#define C_CAN_NAPI_WEIGHT	C_CAN_MSG_OBJ_RX_NUM
-
 /* c_can lec values */
 enum c_can_lec_type {
 	LEC_NO_ERROR = 0,
@@ -306,7 +303,7 @@ static void c_can_setup_tx_object(struct net_device *dev, int iface,
 	 * first, i.e. clear the MSGVAL flag in the arbiter.
 	 */
 	if (rtr != (bool)test_bit(idx, &priv->tx_dir)) {
-		u32 obj = idx + C_CAN_MSG_OBJ_TX_FIRST;
+		u32 obj = idx + priv->msg_obj_tx_first;
 
 		c_can_inval_msg_object(dev, iface, obj);
 		change_bit(idx, &priv->tx_dir);
@@ -443,10 +440,10 @@ static netdev_tx_t c_can_start_xmit(struct sk_buff *skb,
 	 * prioritized. The lowest buffer number wins.
 	 */
 	idx = fls(atomic_read(&priv->tx_active));
-	obj = idx + C_CAN_MSG_OBJ_TX_FIRST;
+	obj = idx + priv->msg_obj_tx_first;
 
 	/* If this is the last buffer, stop the xmit queue */
-	if (idx == C_CAN_MSG_OBJ_TX_NUM - 1)
+	if (idx == priv->msg_obj_tx_num - 1)
 		netif_stop_queue(dev);
 	/* Store the message in the interface so we can call
 	 * can_put_echo_skb(). We must do this before we enable
@@ -527,17 +524,18 @@ static int c_can_set_bittiming(struct net_device *dev)
  */
 static void c_can_configure_msg_objects(struct net_device *dev)
 {
+	struct c_can_priv *priv = netdev_priv(dev);
 	int i;
 
 	/* first invalidate all message objects */
-	for (i = C_CAN_MSG_OBJ_RX_FIRST; i <= C_CAN_NO_OF_OBJECTS; i++)
+	for (i = priv->msg_obj_rx_first; i <= priv->msg_obj_num; i++)
 		c_can_inval_msg_object(dev, IF_RX, i);
 
 	/* setup receive message objects */
-	for (i = C_CAN_MSG_OBJ_RX_FIRST; i < C_CAN_MSG_OBJ_RX_LAST; i++)
+	for (i = priv->msg_obj_rx_first; i < priv->msg_obj_rx_last; i++)
 		c_can_setup_receive_object(dev, IF_RX, i, 0, 0, IF_MCONT_RCV);
 
-	c_can_setup_receive_object(dev, IF_RX, C_CAN_MSG_OBJ_RX_LAST, 0, 0,
+	c_can_setup_receive_object(dev, IF_RX, priv->msg_obj_rx_last, 0, 0,
 				   IF_MCONT_RCV_EOB);
 }
 
@@ -708,7 +706,7 @@ static void c_can_do_tx(struct net_device *dev)
 	while ((idx = ffs(pend))) {
 		idx--;
 		pend &= ~(1 << idx);
-		obj = idx + C_CAN_MSG_OBJ_TX_FIRST;
+		obj = idx + priv->msg_obj_tx_first;
 
 		/* We use IF_RX interface instead of IF_TX because we
 		 * are called from c_can_poll(), which runs inside
@@ -723,7 +721,7 @@ static void c_can_do_tx(struct net_device *dev)
 	/* Clear the bits in the tx_active mask */
 	atomic_sub(clr, &priv->tx_active);
 
-	if (clr & (1 << (C_CAN_MSG_OBJ_TX_NUM - 1)))
+	if (clr & (1 << (priv->msg_obj_tx_num - 1)))
 		netif_wake_queue(dev);
 
 	if (pkts) {
@@ -737,11 +735,11 @@ static void c_can_do_tx(struct net_device *dev)
  * raced with the hardware or failed to readout all upper
  * objects in the last run due to quota limit.
  */
-static u32 c_can_adjust_pending(u32 pend)
+static u32 c_can_adjust_pending(u32 pend, u32 rx_mask)
 {
 	u32 weight, lasts;
 
-	if (pend == RECEIVE_OBJECT_BITS)
+	if (pend == rx_mask)
 		return pend;
 
 	/* If the last set bit is larger than the number of pending
@@ -840,8 +838,7 @@ static int c_can_do_rx_poll(struct net_device *dev, int quota)
 	/* It is faster to read only one 16bit register. This is only possible
 	 * for a maximum number of 16 objects.
 	 */
-	BUILD_BUG_ON_MSG(C_CAN_MSG_OBJ_RX_LAST > 16,
-			 "Implementation does not support more message objects than 16");
+	WARN_ON(priv->msg_obj_rx_last > 16);
 
 	while (quota > 0) {
 		if (!pend) {
@@ -851,7 +848,8 @@ static int c_can_do_rx_poll(struct net_device *dev, int quota)
 			/* If the pending field has a gap, handle the
 			 * bits above the gap first.
 			 */
-			toread = c_can_adjust_pending(pend);
+			toread = c_can_adjust_pending(pend,
+						      priv->msg_obj_rx_mask);
 		} else {
 			toread = pend;
 		}
@@ -1181,17 +1179,31 @@ static int c_can_close(struct net_device *dev)
 	return 0;
 }
 
-struct net_device *alloc_c_can_dev(void)
+struct net_device *alloc_c_can_dev(int msg_obj_num)
 {
 	struct net_device *dev;
 	struct c_can_priv *priv;
+	int msg_obj_tx_num = msg_obj_num / 2;
 
-	dev = alloc_candev(sizeof(struct c_can_priv), C_CAN_MSG_OBJ_TX_NUM);
+	dev = alloc_candev(struct_size(priv, dlc, msg_obj_tx_num),
+			   msg_obj_tx_num);
 	if (!dev)
 		return NULL;
 
 	priv = netdev_priv(dev);
-	netif_napi_add(dev, &priv->napi, c_can_poll, C_CAN_NAPI_WEIGHT);
+	priv->msg_obj_num = msg_obj_num;
+	priv->msg_obj_rx_num = msg_obj_num - msg_obj_tx_num;
+	priv->msg_obj_rx_first = 1;
+	priv->msg_obj_rx_last =
+		priv->msg_obj_rx_first + priv->msg_obj_rx_num - 1;
+	priv->msg_obj_rx_mask = GENMASK(priv->msg_obj_rx_num - 1, 0);
+
+	priv->msg_obj_tx_num = msg_obj_tx_num;
+	priv->msg_obj_tx_first = priv->msg_obj_rx_last + 1;
+	priv->msg_obj_tx_last =
+		priv->msg_obj_tx_first + priv->msg_obj_tx_num - 1;
+
+	netif_napi_add(dev, &priv->napi, c_can_poll, priv->msg_obj_rx_num);
 
 	priv->dev = dev;
 	priv->can.bittiming_const = &c_can_bittiming_const;
diff --git a/drivers/net/can/c_can/c_can.h b/drivers/net/can/c_can/c_can.h
index 7a7d3fb4b1c3..ee703b7ed42d 100644
--- a/drivers/net/can/c_can/c_can.h
+++ b/drivers/net/can/c_can/c_can.h
@@ -22,18 +22,7 @@
 #ifndef C_CAN_H
 #define C_CAN_H
 
-/* message object split */
 #define C_CAN_NO_OF_OBJECTS	32
-#define C_CAN_MSG_OBJ_RX_NUM	16
-#define C_CAN_MSG_OBJ_TX_NUM	16
-
-#define C_CAN_MSG_OBJ_RX_FIRST	1
-#define C_CAN_MSG_OBJ_RX_LAST	(C_CAN_MSG_OBJ_RX_FIRST + \
-				C_CAN_MSG_OBJ_RX_NUM - 1)
-
-#define C_CAN_MSG_OBJ_TX_FIRST	(C_CAN_MSG_OBJ_RX_LAST + 1)
-
-#define RECEIVE_OBJECT_BITS	0x0000ffff
 
 enum reg {
 	C_CAN_CTRL_REG = 0,
@@ -193,6 +182,14 @@ struct c_can_priv {
 	struct napi_struct napi;
 	struct net_device *dev;
 	struct device *device;
+	unsigned int msg_obj_num;
+	unsigned int msg_obj_rx_num;
+	unsigned int msg_obj_tx_num;
+	unsigned int msg_obj_rx_first;
+	unsigned int msg_obj_rx_last;
+	unsigned int msg_obj_tx_first;
+	unsigned int msg_obj_tx_last;
+	u32 msg_obj_rx_mask;
 	atomic_t tx_active;
 	atomic_t sie_pending;
 	unsigned long tx_dir;
@@ -209,10 +206,10 @@ struct c_can_priv {
 	void (*raminit)(const struct c_can_priv *priv, bool enable);
 	u32 comm_rcv_high;
 	u32 rxmasked;
-	u32 dlc[C_CAN_MSG_OBJ_TX_NUM];
+	u32 dlc[];
 };
 
-struct net_device *alloc_c_can_dev(void);
+struct net_device *alloc_c_can_dev(int msg_obj_num);
 void free_c_can_dev(struct net_device *dev);
 int register_c_can_dev(struct net_device *dev);
 void unregister_c_can_dev(struct net_device *dev);
diff --git a/drivers/net/can/c_can/c_can_pci.c b/drivers/net/can/c_can/c_can_pci.c
index 9a8d5e8eb645..fa419e795498 100644
--- a/drivers/net/can/c_can/c_can_pci.c
+++ b/drivers/net/can/c_can/c_can_pci.c
@@ -147,7 +147,7 @@ static int c_can_pci_probe(struct pci_dev *pdev,
 	}
 
 	/* allocate the c_can device */
-	dev = alloc_c_can_dev();
+	dev = alloc_c_can_dev(C_CAN_NO_OF_OBJECTS);
 	if (!dev) {
 		ret = -ENOMEM;
 		goto out_iounmap;
diff --git a/drivers/net/can/c_can/c_can_platform.c b/drivers/net/can/c_can/c_can_platform.c
index 47b251b1607c..7ece36f61c6f 100644
--- a/drivers/net/can/c_can/c_can_platform.c
+++ b/drivers/net/can/c_can/c_can_platform.c
@@ -294,7 +294,7 @@ static int c_can_plat_probe(struct platform_device *pdev)
 	}
 
 	/* allocate the c_can device */
-	dev = alloc_c_can_dev();
+	dev = alloc_c_can_dev(C_CAN_NO_OF_OBJECTS);
 	if (!dev) {
 		ret = -ENOMEM;
 		goto exit;
-- 
cgit v1.2.3


From 132f2d45fb2302a582aef617ea766f3fa52a084c Mon Sep 17 00:00:00 2001
From: Dario Binacchi <dariobin@libero.it>
Date: Tue, 2 Mar 2021 22:54:35 +0100
Subject: can: c_can: add support to 64 message objects

D_CAN controller supports 16, 32, 64 or 128 message objects, comparing
to 32 on C_CAN. AM335x/AM437x Sitara processors and DRA7 SOC all
instantiate a D_CAN controller with 64 message objects, as described
in the "DCAN features" subsection of the CAN chapter of their
technical reference manuals.

The driver policy has been kept unchanged, and as in the previous
version, the first half of the message objects is used for reception
and the second for transmission.

The I/O load is increased only in the case of 64 message objects,
keeping it unchanged in the case of 32. Two 32-bit read accesses are
in fact required, which however remained at 16-bit for configurations
with 32 message objects.

Link: https://lore.kernel.org/r/20210302215435.18286-7-dariobin@libero.it
Signed-off-by: Dario Binacchi <dariobin@libero.it>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/c_can/c_can.c          | 27 +++++++++++++++------------
 drivers/net/can/c_can/c_can.h          |  5 +++--
 drivers/net/can/c_can/c_can_pci.c      |  6 +++++-
 drivers/net/can/c_can/c_can_platform.c |  6 +++++-
 4 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index 032f8200668a..313793f6922d 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -454,7 +454,7 @@ static netdev_tx_t c_can_start_xmit(struct sk_buff *skb,
 	can_put_echo_skb(skb, dev, idx, 0);
 
 	/* Update the active bits */
-	atomic_add((1 << idx), &priv->tx_active);
+	atomic_add(BIT(idx), &priv->tx_active);
 	/* Start transmission */
 	c_can_object_put(dev, IF_TX, obj, IF_COMM_TX);
 
@@ -700,12 +700,15 @@ static void c_can_do_tx(struct net_device *dev)
 	struct net_device_stats *stats = &dev->stats;
 	u32 idx, obj, pkts = 0, bytes = 0, pend, clr;
 
-	pend = priv->read_reg(priv, C_CAN_INTPND2_REG);
+	if (priv->msg_obj_tx_last > 32)
+		pend = priv->read_reg32(priv, C_CAN_INTPND3_REG);
+	else
+		pend = priv->read_reg(priv, C_CAN_INTPND2_REG);
 	clr = pend;
 
 	while ((idx = ffs(pend))) {
 		idx--;
-		pend &= ~(1 << idx);
+		pend &= ~BIT(idx);
 		obj = idx + priv->msg_obj_tx_first;
 
 		/* We use IF_RX interface instead of IF_TX because we
@@ -721,7 +724,7 @@ static void c_can_do_tx(struct net_device *dev)
 	/* Clear the bits in the tx_active mask */
 	atomic_sub(clr, &priv->tx_active);
 
-	if (clr & (1 << (priv->msg_obj_tx_num - 1)))
+	if (clr & BIT(priv->msg_obj_tx_num - 1))
 		netif_wake_queue(dev);
 
 	if (pkts) {
@@ -755,10 +758,10 @@ static u32 c_can_adjust_pending(u32 pend, u32 rx_mask)
 	/* Find the first set bit after the gap. We walk backwards
 	 * from the last set bit.
 	 */
-	for (lasts--; pend & (1 << (lasts - 1)); lasts--)
+	for (lasts--; pend & BIT(lasts - 1); lasts--)
 		;
 
-	return pend & ~((1 << lasts) - 1);
+	return pend & ~GENMASK(lasts - 1, 0);
 }
 
 static inline void c_can_rx_object_get(struct net_device *dev,
@@ -814,7 +817,12 @@ static int c_can_read_objects(struct net_device *dev, struct c_can_priv *priv,
 
 static inline u32 c_can_get_pending(struct c_can_priv *priv)
 {
-	u32 pend = priv->read_reg(priv, C_CAN_NEWDAT1_REG);
+	u32 pend;
+
+	if (priv->msg_obj_rx_last > 16)
+		pend = priv->read_reg32(priv, C_CAN_NEWDAT1_REG);
+	else
+		pend = priv->read_reg(priv, C_CAN_NEWDAT1_REG);
 
 	return pend;
 }
@@ -835,11 +843,6 @@ static int c_can_do_rx_poll(struct net_device *dev, int quota)
 	struct c_can_priv *priv = netdev_priv(dev);
 	u32 pkts = 0, pend = 0, toread, n;
 
-	/* It is faster to read only one 16bit register. This is only possible
-	 * for a maximum number of 16 objects.
-	 */
-	WARN_ON(priv->msg_obj_rx_last > 16);
-
 	while (quota > 0) {
 		if (!pend) {
 			pend = c_can_get_pending(priv);
diff --git a/drivers/net/can/c_can/c_can.h b/drivers/net/can/c_can/c_can.h
index ee703b7ed42d..8acedd9e63a7 100644
--- a/drivers/net/can/c_can/c_can.h
+++ b/drivers/net/can/c_can/c_can.h
@@ -22,8 +22,6 @@
 #ifndef C_CAN_H
 #define C_CAN_H
 
-#define C_CAN_NO_OF_OBJECTS	32
-
 enum reg {
 	C_CAN_CTRL_REG = 0,
 	C_CAN_CTRL_EX_REG,
@@ -61,6 +59,7 @@ enum reg {
 	C_CAN_NEWDAT2_REG,
 	C_CAN_INTPND1_REG,
 	C_CAN_INTPND2_REG,
+	C_CAN_INTPND3_REG,
 	C_CAN_MSGVAL1_REG,
 	C_CAN_MSGVAL2_REG,
 	C_CAN_FUNCTION_REG,
@@ -122,6 +121,7 @@ static const u16 __maybe_unused reg_map_d_can[] = {
 	[C_CAN_NEWDAT2_REG]	= 0x9E,
 	[C_CAN_INTPND1_REG]	= 0xB0,
 	[C_CAN_INTPND2_REG]	= 0xB2,
+	[C_CAN_INTPND3_REG]	= 0xB4,
 	[C_CAN_MSGVAL1_REG]	= 0xC4,
 	[C_CAN_MSGVAL2_REG]	= 0xC6,
 	[C_CAN_IF1_COMREQ_REG]	= 0x100,
@@ -161,6 +161,7 @@ struct raminit_bits {
 
 struct c_can_driver_data {
 	enum c_can_dev_id id;
+	unsigned int msg_obj_num;
 
 	/* RAMINIT register description. Optional. */
 	const struct raminit_bits *raminit_bits; /* Array of START/DONE bit positions */
diff --git a/drivers/net/can/c_can/c_can_pci.c b/drivers/net/can/c_can/c_can_pci.c
index fa419e795498..bf2f8c3da1c1 100644
--- a/drivers/net/can/c_can/c_can_pci.c
+++ b/drivers/net/can/c_can/c_can_pci.c
@@ -31,6 +31,8 @@ enum c_can_pci_reg_align {
 struct c_can_pci_data {
 	/* Specify if is C_CAN or D_CAN */
 	enum c_can_dev_id type;
+	/* Number of message objects */
+	unsigned int msg_obj_num;
 	/* Set the register alignment in the memory */
 	enum c_can_pci_reg_align reg_align;
 	/* Set the frequency */
@@ -147,7 +149,7 @@ static int c_can_pci_probe(struct pci_dev *pdev,
 	}
 
 	/* allocate the c_can device */
-	dev = alloc_c_can_dev(C_CAN_NO_OF_OBJECTS);
+	dev = alloc_c_can_dev(c_can_pci_data->msg_obj_num);
 	if (!dev) {
 		ret = -ENOMEM;
 		goto out_iounmap;
@@ -252,6 +254,7 @@ static void c_can_pci_remove(struct pci_dev *pdev)
 
 static const struct c_can_pci_data c_can_sta2x11 = {
 	.type = BOSCH_C_CAN,
+	.msg_obj_num = 32,
 	.reg_align = C_CAN_REG_ALIGN_32,
 	.freq = 52000000, /* 52 Mhz */
 	.bar = 0,
@@ -259,6 +262,7 @@ static const struct c_can_pci_data c_can_sta2x11 = {
 
 static const struct c_can_pci_data c_can_pch = {
 	.type = BOSCH_C_CAN,
+	.msg_obj_num = 32,
 	.reg_align = C_CAN_REG_32,
 	.freq = 50000000, /* 50 MHz */
 	.init = c_can_pci_reset_pch,
diff --git a/drivers/net/can/c_can/c_can_platform.c b/drivers/net/can/c_can/c_can_platform.c
index 7ece36f61c6f..36950363682f 100644
--- a/drivers/net/can/c_can/c_can_platform.c
+++ b/drivers/net/can/c_can/c_can_platform.c
@@ -193,10 +193,12 @@ static void c_can_hw_raminit(const struct c_can_priv *priv, bool enable)
 
 static const struct c_can_driver_data c_can_drvdata = {
 	.id = BOSCH_C_CAN,
+	.msg_obj_num = 32,
 };
 
 static const struct c_can_driver_data d_can_drvdata = {
 	.id = BOSCH_D_CAN,
+	.msg_obj_num = 32,
 };
 
 static const struct raminit_bits dra7_raminit_bits[] = {
@@ -206,6 +208,7 @@ static const struct raminit_bits dra7_raminit_bits[] = {
 
 static const struct c_can_driver_data dra7_dcan_drvdata = {
 	.id = BOSCH_D_CAN,
+	.msg_obj_num = 64,
 	.raminit_num = ARRAY_SIZE(dra7_raminit_bits),
 	.raminit_bits = dra7_raminit_bits,
 	.raminit_pulse = true,
@@ -218,6 +221,7 @@ static const struct raminit_bits am3352_raminit_bits[] = {
 
 static const struct c_can_driver_data am3352_dcan_drvdata = {
 	.id = BOSCH_D_CAN,
+	.msg_obj_num = 64,
 	.raminit_num = ARRAY_SIZE(am3352_raminit_bits),
 	.raminit_bits = am3352_raminit_bits,
 };
@@ -294,7 +298,7 @@ static int c_can_plat_probe(struct platform_device *pdev)
 	}
 
 	/* allocate the c_can device */
-	dev = alloc_c_can_dev(C_CAN_NO_OF_OBJECTS);
+	dev = alloc_c_can_dev(drvdata->msg_obj_num);
 	if (!dev) {
 		ret = -ENOMEM;
 		goto exit;
-- 
cgit v1.2.3


From 5bdca94ff30d99168c3c09394da664c3c37c6834 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Mon, 29 Mar 2021 22:41:50 -0700
Subject: bpf: Update bpf_design_QA.rst to clarify the kfunc call is not ABI

This patch updates bpf_design_QA.rst to clarify that the kernel
function callable by bpf program is not an ABI.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20210330054150.2933542-1-kafai@fb.com
---
 Documentation/bpf/bpf_design_QA.rst | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/Documentation/bpf/bpf_design_QA.rst b/Documentation/bpf/bpf_design_QA.rst
index 0e15f9b05c9d..437de2a7a5de 100644
--- a/Documentation/bpf/bpf_design_QA.rst
+++ b/Documentation/bpf/bpf_design_QA.rst
@@ -258,3 +258,18 @@ Q: Can BPF functionality such as new program or map types, new
 helpers, etc be added out of kernel module code?
 
 A: NO.
+
+Q: Directly calling kernel function is an ABI?
+----------------------------------------------
+Q: Some kernel functions (e.g. tcp_slow_start) can be called
+by BPF programs.  Do these kernel functions become an ABI?
+
+A: NO.
+
+The kernel function protos will change and the bpf programs will be
+rejected by the verifier.  Also, for example, some of the bpf-callable
+kernel functions have already been used by other kernel tcp
+cc (congestion-control) implementations.  If any of these kernel
+functions has changed, both the in-tree and out-of-tree kernel tcp cc
+implementations have to be changed.  The same goes for the bpf
+programs and they have to be adjusted accordingly.
-- 
cgit v1.2.3


From 2ba4badca9977b64c966b0177920daadbd5501fe Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Mon, 29 Mar 2021 22:41:56 -0700
Subject: bpf: selftests: Update clang requirement in README.rst for testing
 kfunc call

This patch updates the README.rst to specify the clang requirement
to compile the bpf selftests that call kernel function.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20210330054156.2933804-1-kafai@fb.com
---
 tools/testing/selftests/bpf/README.rst | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index 3464161c8eea..65fe318d1e71 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -179,3 +179,17 @@ types, which was introduced in `Clang 13`__. The older Clang versions will
 either crash when compiling these tests, or generate an incorrect BTF.
 
 __  https://reviews.llvm.org/D83289
+
+Kernel function call test and Clang version
+===========================================
+
+Some selftests (e.g. kfunc_call and bpf_tcp_ca) require a LLVM support
+to generate extern function in BTF.  It was introduced in `Clang 13`__.
+
+Without it, the error from compiling bpf selftests looks like:
+
+.. code-block:: console
+
+  libbpf: failed to find BTF for extern 'tcp_slow_start' [25] section: -2
+
+__ https://reviews.llvm.org/D93563
-- 
cgit v1.2.3


From 05d817031ff9686a8206039b19e37616cf9e1d44 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 26 Mar 2021 21:25:02 -0700
Subject: libbpf: Fix memory leak when emitting final btf_ext

Free temporary allocated memory used to construct finalized .BTF.ext data.
Found by Coverity static analysis on libbpf's Github repo.

Fixes: 8fd27bf69b86 ("libbpf: Add BPF static linker BTF and BTF.ext support")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20210327042502.969745-1-andrii@kernel.org
---
 tools/lib/bpf/linker.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index a29d62ff8041..46b16cbdcda3 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -1906,8 +1906,10 @@ static int finalize_btf_ext(struct bpf_linker *linker)
 			struct dst_sec *sec = &linker->secs[i];
 
 			sz = emit_btf_ext_data(linker, cur, sec->sec_name, &sec->func_info);
-			if (sz < 0)
-				return sz;
+			if (sz < 0) {
+				err = sz;
+				goto out;
+			}
 
 			cur += sz;
 		}
@@ -1921,8 +1923,10 @@ static int finalize_btf_ext(struct bpf_linker *linker)
 			struct dst_sec *sec = &linker->secs[i];
 
 			sz = emit_btf_ext_data(linker, cur, sec->sec_name, &sec->line_info);
-			if (sz < 0)
-				return sz;
+			if (sz < 0) {
+				err = sz;
+				goto out;
+			}
 
 			cur += sz;
 		}
@@ -1936,8 +1940,10 @@ static int finalize_btf_ext(struct bpf_linker *linker)
 			struct dst_sec *sec = &linker->secs[i];
 
 			sz = emit_btf_ext_data(linker, cur, sec->sec_name, &sec->core_relo_info);
-			if (sz < 0)
-				return sz;
+			if (sz < 0) {
+				err = sz;
+				goto out;
+			}
 
 			cur += sz;
 		}
@@ -1948,8 +1954,10 @@ static int finalize_btf_ext(struct bpf_linker *linker)
 	if (err) {
 		linker->btf_ext = NULL;
 		pr_warn("failed to parse final .BTF.ext data: %d\n", err);
-		return err;
+		goto out;
 	}
 
-	return 0;
+out:
+	free(data);
+	return err;
 }
-- 
cgit v1.2.3


From 9f33df73a929ed91dddad036f518d690b3094eda Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Tue, 30 Mar 2021 00:43:00 +0200
Subject: selftests: xsk: Don't call worker_pkt_dump() for stats test

For TEST_TYPE_STATS, worker_pkt_validate() that places frames onto
pkt_buf is not called. Therefore, when dump mode is set, don't call
worker_pkt_dump() for mentioned test type, so that it won't crash on
pkt_buf() access.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210329224316.17793-2-maciej.fijalkowski@intel.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 1e21a3172687..09429ed2ddf6 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -999,7 +999,7 @@ static void testapp_validate(void)
 	pthread_join(t1, NULL);
 	pthread_join(t0, NULL);
 
-	if (debug_pkt_dump) {
+	if (debug_pkt_dump && test_type != TEST_TYPE_STATS) {
 		worker_pkt_dump();
 		for (int iter = 0; iter < num_frames - 1; iter++) {
 			free(pkt_buf[iter]->payload);
-- 
cgit v1.2.3


From e623bfdef713ddda1b9f57d1759df3a1cd97255a Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Tue, 30 Mar 2021 00:43:01 +0200
Subject: selftests: xsk: Remove struct ifaceconfigobj

ifaceconfigobj is not really useful, it is possible to keep the
functionality and simplify the code.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210329224316.17793-3-maciej.fijalkowski@intel.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 65 +++++++++++++++-----------------
 tools/testing/selftests/bpf/xdpxceiver.h |  9 -----
 2 files changed, 30 insertions(+), 44 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 09429ed2ddf6..e2ff3cd48ccf 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -93,6 +93,13 @@ typedef __u16 __sum16;
 #include "xdpxceiver.h"
 #include "../kselftest.h"
 
+static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62";
+static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61";
+static const char *IP1 = "192.168.100.162";
+static const char *IP2 = "192.168.100.161";
+static const u16 UDP_PORT1 = 2020;
+static const u16 UDP_PORT2 = 2121;
+
 static void __exit_with_error(int error, const char *file, const char *func, int line)
 {
 	if (configured_mode == TEST_MODE_UNCONFIGURED) {
@@ -1053,25 +1060,24 @@ static void testapp_stats(void)
 	print_ksft_result();
 }
 
-static void init_iface_config(struct ifaceconfigobj *ifaceconfig)
+static void init_iface(struct ifobject *ifobj, const char *dst_mac,
+		       const char *src_mac, const char *dst_ip,
+		       const char *src_ip, const u16 dst_port,
+		       const u16 src_port)
 {
-	/*Init interface0 */
-	ifdict[0]->fv.vector = tx;
-	memcpy(ifdict[0]->dst_mac, ifaceconfig->dst_mac, ETH_ALEN);
-	memcpy(ifdict[0]->src_mac, ifaceconfig->src_mac, ETH_ALEN);
-	ifdict[0]->dst_ip = ifaceconfig->dst_ip.s_addr;
-	ifdict[0]->src_ip = ifaceconfig->src_ip.s_addr;
-	ifdict[0]->dst_port = ifaceconfig->dst_port;
-	ifdict[0]->src_port = ifaceconfig->src_port;
-
-	/*Init interface1 */
-	ifdict[1]->fv.vector = rx;
-	memcpy(ifdict[1]->dst_mac, ifaceconfig->src_mac, ETH_ALEN);
-	memcpy(ifdict[1]->src_mac, ifaceconfig->dst_mac, ETH_ALEN);
-	ifdict[1]->dst_ip = ifaceconfig->src_ip.s_addr;
-	ifdict[1]->src_ip = ifaceconfig->dst_ip.s_addr;
-	ifdict[1]->dst_port = ifaceconfig->src_port;
-	ifdict[1]->src_port = ifaceconfig->dst_port;
+	struct in_addr ip;
+
+	memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN);
+	memcpy(ifobj->src_mac, src_mac, ETH_ALEN);
+
+	inet_aton(dst_ip, &ip);
+	ifobj->dst_ip = ip.s_addr;
+
+	inet_aton(src_ip, &ip);
+	ifobj->src_ip = ip.s_addr;
+
+	ifobj->dst_port = dst_port;
+	ifobj->src_port = src_port;
 }
 
 static void *nsdisablemodethread(void *args)
@@ -1175,26 +1181,11 @@ static void run_pkt_test(int mode, int type)
 int main(int argc, char **argv)
 {
 	struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY };
+	int i, j;
 
 	if (setrlimit(RLIMIT_MEMLOCK, &_rlim))
 		exit_with_error(errno);
 
-	const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62";
-	const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61";
-	const char *IP1 = "192.168.100.162";
-	const char *IP2 = "192.168.100.161";
-	u16 UDP_DST_PORT = 2020;
-	u16 UDP_SRC_PORT = 2121;
-	int i, j;
-
-	ifaceconfig = malloc(sizeof(struct ifaceconfigobj));
-	memcpy(ifaceconfig->dst_mac, MAC1, ETH_ALEN);
-	memcpy(ifaceconfig->src_mac, MAC2, ETH_ALEN);
-	inet_aton(IP1, &ifaceconfig->dst_ip);
-	inet_aton(IP2, &ifaceconfig->src_ip);
-	ifaceconfig->dst_port = UDP_DST_PORT;
-	ifaceconfig->src_port = UDP_SRC_PORT;
-
 	for (int i = 0; i < MAX_INTERFACES; i++) {
 		ifdict[i] = malloc(sizeof(struct ifobject));
 		if (!ifdict[i])
@@ -1209,7 +1200,11 @@ int main(int argc, char **argv)
 
 	num_frames = ++opt_pkt_count;
 
-	init_iface_config(ifaceconfig);
+	ifdict[0]->fv.vector = tx;
+	init_iface(ifdict[0], MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2);
+
+	ifdict[1]->fv.vector = rx;
+	init_iface(ifdict[1], MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1);
 
 	disable_xdp_mode(XDP_FLAGS_DRV_MODE);
 
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 30314ef305c2..8f9308099318 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -125,15 +125,6 @@ struct generic_data {
 	u32 seqnum;
 };
 
-struct ifaceconfigobj {
-	u8 dst_mac[ETH_ALEN];
-	u8 src_mac[ETH_ALEN];
-	struct in_addr dst_ip;
-	struct in_addr src_ip;
-	u16 src_port;
-	u16 dst_port;
-} *ifaceconfig;
-
 struct ifobject {
 	int ifindex;
 	int ifdict_index;
-- 
cgit v1.2.3


From 7519c387e69d367075bf493de8a9ea427c9d2a1b Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Tue, 30 Mar 2021 00:43:02 +0200
Subject: selftests: xsk: Remove unused function

Probably it was ported from xdpsock but is not used anywhere.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210329224316.17793-4-maciej.fijalkowski@intel.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index e2ff3cd48ccf..593e8be41fce 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -153,19 +153,6 @@ static void *memset32_htonl(void *dest, u32 val, u32 size)
 	return dest;
 }
 
-/*
- * This function code has been taken from
- * Linux kernel lib/checksum.c
- */
-static inline unsigned short from32to16(unsigned int x)
-{
-	/* add up 16-bit and 16-bit for 16+c bit */
-	x = (x & 0xffff) + (x >> 16);
-	/* add up carry.. */
-	x = (x & 0xffff) + (x >> 16);
-	return x;
-}
-
 /*
  * Fold a partial checksum
  * This function code has been taken from
-- 
cgit v1.2.3


From 965d2cb0f675e5f60af441c9bd430c284d66179d Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Tue, 30 Mar 2021 00:43:03 +0200
Subject: selftests: xsk: Remove inline keyword from source file

Follow the kernel coding style guidelines and let compiler do the
decision about inlining.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210329224316.17793-5-maciej.fijalkowski@intel.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 593e8be41fce..77880abd818c 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -158,7 +158,7 @@ static void *memset32_htonl(void *dest, u32 val, u32 size)
  * This function code has been taken from
  * Linux kernel include/asm-generic/checksum.h
  */
-static inline __u16 csum_fold(__u32 csum)
+static __u16 csum_fold(__u32 csum)
 {
 	u32 sum = (__force u32)csum;
 
@@ -171,7 +171,7 @@ static inline __u16 csum_fold(__u32 csum)
  * This function code has been taken from
  * Linux kernel lib/checksum.c
  */
-static inline u32 from64to32(u64 x)
+static u32 from64to32(u64 x)
 {
 	/* add up 32-bit and 32-bit for 32+c bit */
 	x = (x & 0xffffffff) + (x >> 32);
@@ -180,13 +180,11 @@ static inline u32 from64to32(u64 x)
 	return (u32)x;
 }
 
-__u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum);
-
 /*
  * This function code has been taken from
  * Linux kernel lib/checksum.c
  */
-__u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
+static __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
 {
 	unsigned long long s = (__force u32)sum;
 
@@ -204,13 +202,12 @@ __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u3
  * This function has been taken from
  * Linux kernel include/asm-generic/checksum.h
  */
-static inline __u16
-csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
+static __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
 {
 	return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
 }
 
-static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt)
+static u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt)
 {
 	u32 csum = 0;
 	u32 cnt = 0;
@@ -500,7 +497,7 @@ static void kick_tx(struct xsk_socket_info *xsk)
 	exit_with_error(errno);
 }
 
-static inline void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
+static void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
 {
 	unsigned int rcvd;
 	u32 idx;
@@ -605,7 +602,7 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frameptr, int batch_size)
 	complete_tx_only(xsk, batch_size);
 }
 
-static inline int get_batch_size(int pkt_cnt)
+static int get_batch_size(int pkt_cnt)
 {
 	if (!opt_pkt_count)
 		return BATCH_SIZE;
-- 
cgit v1.2.3


From aa2d61c154f9387883664e3873bc0700419640a3 Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Tue, 30 Mar 2021 00:43:04 +0200
Subject: selftests: xsk: Simplify frame traversal in dumping thread
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Store offsets to each layer in a separate variables rather than compute
them every single time.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210329224316.17793-6-maciej.fijalkowski@intel.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 47 ++++++++++++++------------------
 1 file changed, 21 insertions(+), 26 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 77880abd818c..ae2f7d71f041 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -658,45 +658,40 @@ static void tx_only_all(struct ifobject *ifobject)
 
 static void worker_pkt_dump(void)
 {
-	struct in_addr ipaddr;
+	struct ethhdr *ethhdr;
+	struct iphdr *iphdr;
+	struct udphdr *udphdr;
+	char s[128];
+	int payload;
+	void *ptr;
 
 	fprintf(stdout, "---------------------------------------\n");
 	for (int iter = 0; iter < num_frames - 1; iter++) {
+		ptr = pkt_buf[iter]->payload;
+		ethhdr = ptr;
+		iphdr = ptr + sizeof(*ethhdr);
+		udphdr = ptr + sizeof(*ethhdr) + sizeof(*iphdr);
+
 		/*extract L2 frame */
 		fprintf(stdout, "DEBUG>> L2: dst mac: ");
 		for (int i = 0; i < ETH_ALEN; i++)
-			fprintf(stdout, "%02X", ((struct ethhdr *)
-						 pkt_buf[iter]->payload)->h_dest[i]);
+			fprintf(stdout, "%02X", ethhdr->h_dest[i]);
 
 		fprintf(stdout, "\nDEBUG>> L2: src mac: ");
 		for (int i = 0; i < ETH_ALEN; i++)
-			fprintf(stdout, "%02X", ((struct ethhdr *)
-						 pkt_buf[iter]->payload)->h_source[i]);
+			fprintf(stdout, "%02X", ethhdr->h_source[i]);
 
 		/*extract L3 frame */
-		fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n",
-			((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->ihl);
-
-		ipaddr.s_addr =
-		    ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->saddr;
-		fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n", inet_ntoa(ipaddr));
-
-		ipaddr.s_addr =
-		    ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->daddr;
-		fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n", inet_ntoa(ipaddr));
-
+		fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl);
+		fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n",
+			inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s)));
+		fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n",
+			inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s)));
 		/*extract L4 frame */
-		fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n",
-			ntohs(((struct udphdr *)(pkt_buf[iter]->payload +
-						 sizeof(struct ethhdr) +
-						 sizeof(struct iphdr)))->source));
-
-		fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n",
-			ntohs(((struct udphdr *)(pkt_buf[iter]->payload +
-						 sizeof(struct ethhdr) +
-						 sizeof(struct iphdr)))->dest));
+		fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source));
+		fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest));
 		/*extract L5 frame */
-		int payload = *((uint32_t *)(pkt_buf[iter]->payload + PKT_HDR_SIZE));
+		payload = *((uint32_t *)(ptr + PKT_HDR_SIZE));
 
 		if (payload == EOT) {
 			print_verbose("End-of-transmission frame received\n");
-- 
cgit v1.2.3


From 10397994d30f2de51bfd9321ed9ddb789464f572 Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Tue, 30 Mar 2021 00:43:05 +0200
Subject: libbpf: xsk: Use bpf_link

Currently, if there are multiple xdpsock instances running on a single
interface and in case one of the instances is terminated, the rest of
them are left in an inoperable state due to the fact of unloaded XDP
prog from interface.

Consider the scenario below:

// load xdp prog and xskmap and add entry to xskmap at idx 10
$ sudo ./xdpsock -i ens801f0 -t -q 10

// add entry to xskmap at idx 11
$ sudo ./xdpsock -i ens801f0 -t -q 11

terminate one of the processes and another one is unable to work due to
the fact that the XDP prog was unloaded from interface.

To address that, step away from setting bpf prog in favour of bpf_link.
This means that refcounting of BPF resources will be done automatically
by bpf_link itself.

Provide backward compatibility by checking if underlying system is
bpf_link capable. Do this by looking up/creating bpf_link on loopback
device. If it failed in any way, stick with netlink-based XDP prog.
therwise, use bpf_link-based logic.

When setting up BPF resources during xsk socket creation, check whether
bpf_link for a given ifindex already exists via set of calls to
bpf_link_get_next_id -> bpf_link_get_fd_by_id -> bpf_obj_get_info_by_fd
and comparing the ifindexes from bpf_link and xsk socket.

For case where resources exist but they are not AF_XDP related, bail out
and ask user to remove existing prog and then retry.

Lastly, do a bit of refactoring within __xsk_setup_xdp_prog and pull out
existing code branches based on prog_id value onto separate functions
that are responsible for resource initialization if prog_id was 0 and
for lookup existing resources for non-zero prog_id as that implies that
XDP program is present on the underlying net device. This in turn makes
it easier to follow, especially the teardown part of both branches.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210329224316.17793-7-maciej.fijalkowski@intel.com
---
 tools/lib/bpf/xsk.c | 258 +++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 213 insertions(+), 45 deletions(-)

diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 526fc35c0b23..95da0e19f4a5 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -28,6 +28,7 @@
 #include <sys/mman.h>
 #include <sys/socket.h>
 #include <sys/types.h>
+#include <linux/if_link.h>
 
 #include "bpf.h"
 #include "libbpf.h"
@@ -70,8 +71,10 @@ struct xsk_ctx {
 	int ifindex;
 	struct list_head list;
 	int prog_fd;
+	int link_fd;
 	int xsks_map_fd;
 	char ifname[IFNAMSIZ];
+	bool has_bpf_link;
 };
 
 struct xsk_socket {
@@ -409,7 +412,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
 	static const int log_buf_size = 16 * 1024;
 	struct xsk_ctx *ctx = xsk->ctx;
 	char log_buf[log_buf_size];
-	int err, prog_fd;
+	int prog_fd;
 
 	/* This is the fallback C-program:
 	 * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
@@ -499,14 +502,41 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
 		return prog_fd;
 	}
 
-	err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, prog_fd,
-				  xsk->config.xdp_flags);
+	ctx->prog_fd = prog_fd;
+	return 0;
+}
+
+static int xsk_create_bpf_link(struct xsk_socket *xsk)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+	struct xsk_ctx *ctx = xsk->ctx;
+	__u32 prog_id = 0;
+	int link_fd;
+	int err;
+
+	err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags);
 	if (err) {
-		close(prog_fd);
+		pr_warn("getting XDP prog id failed\n");
 		return err;
 	}
 
-	ctx->prog_fd = prog_fd;
+	/* if there's a netlink-based XDP prog loaded on interface, bail out
+	 * and ask user to do the removal by himself
+	 */
+	if (prog_id) {
+		pr_warn("Netlink-based XDP prog detected, please unload it in order to launch AF_XDP prog\n");
+		return -EINVAL;
+	}
+
+	opts.flags = xsk->config.xdp_flags & ~(XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_REPLACE);
+
+	link_fd = bpf_link_create(ctx->prog_fd, ctx->ifindex, BPF_XDP, &opts);
+	if (link_fd < 0) {
+		pr_warn("bpf_link_create failed: %s\n", strerror(errno));
+		return link_fd;
+	}
+
+	ctx->link_fd = link_fd;
 	return 0;
 }
 
@@ -625,7 +655,6 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
 		close(fd);
 	}
 
-	err = 0;
 	if (ctx->xsks_map_fd == -1)
 		err = -ENOENT;
 
@@ -642,6 +671,98 @@ static int xsk_set_bpf_maps(struct xsk_socket *xsk)
 				   &xsk->fd, 0);
 }
 
+static int xsk_link_lookup(int ifindex, __u32 *prog_id, int *link_fd)
+{
+	struct bpf_link_info link_info;
+	__u32 link_len;
+	__u32 id = 0;
+	int err;
+	int fd;
+
+	while (true) {
+		err = bpf_link_get_next_id(id, &id);
+		if (err) {
+			if (errno == ENOENT) {
+				err = 0;
+				break;
+			}
+			pr_warn("can't get next link: %s\n", strerror(errno));
+			break;
+		}
+
+		fd = bpf_link_get_fd_by_id(id);
+		if (fd < 0) {
+			if (errno == ENOENT)
+				continue;
+			pr_warn("can't get link by id (%u): %s\n", id, strerror(errno));
+			err = -errno;
+			break;
+		}
+
+		link_len = sizeof(struct bpf_link_info);
+		memset(&link_info, 0, link_len);
+		err = bpf_obj_get_info_by_fd(fd, &link_info, &link_len);
+		if (err) {
+			pr_warn("can't get link info: %s\n", strerror(errno));
+			close(fd);
+			break;
+		}
+		if (link_info.type == BPF_LINK_TYPE_XDP) {
+			if (link_info.xdp.ifindex == ifindex) {
+				*link_fd = fd;
+				if (prog_id)
+					*prog_id = link_info.prog_id;
+				break;
+			}
+		}
+		close(fd);
+	}
+
+	return err;
+}
+
+static bool xsk_probe_bpf_link(void)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
+			    .flags = XDP_FLAGS_SKB_MODE);
+	struct bpf_load_program_attr prog_attr;
+	struct bpf_insn insns[2] = {
+		BPF_MOV64_IMM(BPF_REG_0, XDP_PASS),
+		BPF_EXIT_INSN()
+	};
+	int prog_fd, link_fd = -1;
+	int ifindex_lo = 1;
+	bool ret = false;
+	int err;
+
+	err = xsk_link_lookup(ifindex_lo, NULL, &link_fd);
+	if (err)
+		return ret;
+
+	if (link_fd >= 0)
+		return true;
+
+	memset(&prog_attr, 0, sizeof(prog_attr));
+	prog_attr.prog_type = BPF_PROG_TYPE_XDP;
+	prog_attr.insns = insns;
+	prog_attr.insns_cnt = ARRAY_SIZE(insns);
+	prog_attr.license = "GPL";
+
+	prog_fd = bpf_load_program_xattr(&prog_attr, NULL, 0);
+	if (prog_fd < 0)
+		return ret;
+
+	link_fd = bpf_link_create(prog_fd, ifindex_lo, BPF_XDP, &opts);
+	close(prog_fd);
+
+	if (link_fd >= 0) {
+		ret = true;
+		close(link_fd);
+	}
+
+	return ret;
+}
+
 static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)
 {
 	char ifname[IFNAMSIZ];
@@ -663,64 +784,108 @@ static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)
 	ctx->ifname[IFNAMSIZ - 1] = 0;
 
 	xsk->ctx = ctx;
+	xsk->ctx->has_bpf_link = xsk_probe_bpf_link();
 
 	return 0;
 }
 
-static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp,
-				int *xsks_map_fd)
+static int xsk_init_xdp_res(struct xsk_socket *xsk,
+			    int *xsks_map_fd)
 {
-	struct xsk_socket *xsk = _xdp;
 	struct xsk_ctx *ctx = xsk->ctx;
-	__u32 prog_id = 0;
 	int err;
 
-	err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id,
-				  xsk->config.xdp_flags);
+	err = xsk_create_bpf_maps(xsk);
 	if (err)
 		return err;
 
-	if (!prog_id) {
-		err = xsk_create_bpf_maps(xsk);
-		if (err)
-			return err;
+	err = xsk_load_xdp_prog(xsk);
+	if (err)
+		goto err_load_xdp_prog;
 
-		err = xsk_load_xdp_prog(xsk);
-		if (err) {
-			goto err_load_xdp_prog;
-		}
-	} else {
-		ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
-		if (ctx->prog_fd < 0)
-			return -errno;
-		err = xsk_lookup_bpf_maps(xsk);
-		if (err) {
-			close(ctx->prog_fd);
-			return err;
-		}
-	}
+	if (ctx->has_bpf_link)
+		err = xsk_create_bpf_link(xsk);
+	else
+		err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, ctx->prog_fd,
+					  xsk->config.xdp_flags);
 
-	if (xsk->rx) {
-		err = xsk_set_bpf_maps(xsk);
-		if (err) {
-			if (!prog_id) {
-				goto err_set_bpf_maps;
-			} else {
-				close(ctx->prog_fd);
-				return err;
-			}
-		}
-	}
-	if (xsks_map_fd)
-		*xsks_map_fd = ctx->xsks_map_fd;
+	if (err)
+		goto err_attach_xdp_prog;
 
-	return 0;
+	if (!xsk->rx)
+		return err;
+
+	err = xsk_set_bpf_maps(xsk);
+	if (err)
+		goto err_set_bpf_maps;
+
+	return err;
 
 err_set_bpf_maps:
+	if (ctx->has_bpf_link)
+		close(ctx->link_fd);
+	else
+		bpf_set_link_xdp_fd(ctx->ifindex, -1, 0);
+err_attach_xdp_prog:
 	close(ctx->prog_fd);
-	bpf_set_link_xdp_fd(ctx->ifindex, -1, 0);
 err_load_xdp_prog:
 	xsk_delete_bpf_maps(xsk);
+	return err;
+}
+
+static int xsk_lookup_xdp_res(struct xsk_socket *xsk, int *xsks_map_fd, int prog_id)
+{
+	struct xsk_ctx *ctx = xsk->ctx;
+	int err;
+
+	ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
+	if (ctx->prog_fd < 0) {
+		err = -errno;
+		goto err_prog_fd;
+	}
+	err = xsk_lookup_bpf_maps(xsk);
+	if (err)
+		goto err_lookup_maps;
+
+	if (!xsk->rx)
+		return err;
+
+	err = xsk_set_bpf_maps(xsk);
+	if (err)
+		goto err_set_maps;
+
+	return err;
+
+err_set_maps:
+	close(ctx->xsks_map_fd);
+err_lookup_maps:
+	close(ctx->prog_fd);
+err_prog_fd:
+	if (ctx->has_bpf_link)
+		close(ctx->link_fd);
+	return err;
+}
+
+static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, int *xsks_map_fd)
+{
+	struct xsk_socket *xsk = _xdp;
+	struct xsk_ctx *ctx = xsk->ctx;
+	__u32 prog_id = 0;
+	int err;
+
+	if (ctx->has_bpf_link)
+		err = xsk_link_lookup(ctx->ifindex, &prog_id, &ctx->link_fd);
+	else
+		err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags);
+
+	if (err)
+		return err;
+
+	err = !prog_id ? xsk_init_xdp_res(xsk, xsks_map_fd) :
+			 xsk_lookup_xdp_res(xsk, xsks_map_fd, prog_id);
+
+	if (!err && xsks_map_fd)
+		*xsks_map_fd = ctx->xsks_map_fd;
 
 	return err;
 }
@@ -898,6 +1063,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
 		}
 	}
 	xsk->ctx = ctx;
+	xsk->ctx->has_bpf_link = xsk_probe_bpf_link();
 
 	if (rx) {
 		err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
@@ -1054,6 +1220,8 @@ void xsk_socket__delete(struct xsk_socket *xsk)
 	if (ctx->prog_fd != -1) {
 		xsk_delete_bpf_maps(xsk);
 		close(ctx->prog_fd);
+		if (ctx->has_bpf_link)
+			close(ctx->link_fd);
 	}
 
 	err = xsk_get_mmap_offsets(xsk->fd, &off);
-- 
cgit v1.2.3


From c9d27c9e8dc7ccced65007a4d5d3640cad42adfc Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Tue, 30 Mar 2021 00:43:06 +0200
Subject: samples: bpf: Do not unload prog within xdpsock

With the introduction of bpf_link in xsk's libbpf part, there's no
further need for explicit unload of prog on xdpsock's termination. When
process dies, the bpf_link's refcount will be decremented and resources
will be unloaded/freed under the hood in case when there are no more
active users.

While at it, don't dump stats on error path.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210329224316.17793-8-maciej.fijalkowski@intel.com
---
 samples/bpf/xdpsock_user.c | 55 ++++++++++++----------------------------------
 1 file changed, 14 insertions(+), 41 deletions(-)

diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index 1e2a1105d0e6..aa696854be78 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -96,7 +96,6 @@ static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
 static int opt_timeout = 1000;
 static bool opt_need_wakeup = true;
 static u32 opt_num_xsks = 1;
-static u32 prog_id;
 static bool opt_busy_poll;
 static bool opt_reduced_cap;
 
@@ -462,59 +461,37 @@ static void *poller(void *arg)
 	return NULL;
 }
 
-static void remove_xdp_program(void)
+static void int_exit(int sig)
 {
-	u32 curr_prog_id = 0;
-	int cmd = CLOSE_CONN;
-
-	if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) {
-		printf("bpf_get_link_xdp_id failed\n");
-		exit(EXIT_FAILURE);
-	}
-	if (prog_id == curr_prog_id)
-		bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags);
-	else if (!curr_prog_id)
-		printf("couldn't find a prog id on a given interface\n");
-	else
-		printf("program on interface changed, not removing\n");
-
-	if (opt_reduced_cap) {
-		if (write(sock, &cmd, sizeof(int)) < 0) {
-			fprintf(stderr, "Error writing into stream socket: %s", strerror(errno));
-			exit(EXIT_FAILURE);
-		}
-	}
+	benchmark_done = true;
 }
 
-static void int_exit(int sig)
+static void __exit_with_error(int error, const char *file, const char *func,
+			      int line)
 {
-	benchmark_done = true;
+	fprintf(stderr, "%s:%s:%i: errno: %d/\"%s\"\n", file, func,
+		line, error, strerror(error));
+	exit(EXIT_FAILURE);
 }
 
+#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
+
 static void xdpsock_cleanup(void)
 {
 	struct xsk_umem *umem = xsks[0]->umem->umem;
-	int i;
+	int i, cmd = CLOSE_CONN;
 
 	dump_stats();
 	for (i = 0; i < num_socks; i++)
 		xsk_socket__delete(xsks[i]->xsk);
 	(void)xsk_umem__delete(umem);
-	remove_xdp_program();
-}
 
-static void __exit_with_error(int error, const char *file, const char *func,
-			      int line)
-{
-	fprintf(stderr, "%s:%s:%i: errno: %d/\"%s\"\n", file, func,
-		line, error, strerror(error));
-	dump_stats();
-	remove_xdp_program();
-	exit(EXIT_FAILURE);
+	if (opt_reduced_cap) {
+		if (write(sock, &cmd, sizeof(int)) < 0)
+			exit_with_error(errno);
+	}
 }
 
-#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, \
-						 __LINE__)
 static void swap_mac_addresses(void *data)
 {
 	struct ether_header *eth = (struct ether_header *)data;
@@ -880,10 +857,6 @@ static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem,
 	if (ret)
 		exit_with_error(-ret);
 
-	ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags);
-	if (ret)
-		exit_with_error(-ret);
-
 	xsk->app_stats.rx_empty_polls = 0;
 	xsk->app_stats.fill_fail_polls = 0;
 	xsk->app_stats.copy_tx_sendtos = 0;
-- 
cgit v1.2.3


From ef9280789773c974b45f809d58b47b481f2cf9f5 Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Tue, 30 Mar 2021 00:43:07 +0200
Subject: selftests: xsk: Remove thread for netns switch

Currently, there is a dedicated thread for following remote ns operations:
- grabbing the ifindex of the interface moved to remote netns
- removing xdp prog from that interface

With bpf_link usage in place, this can be simply omitted, so remove
mentioned thread, as BPF resources will be managed by bpf_link itself,
so there's no further need for creating the thread that will switch to
remote netns and do the cleanup.

Keep most of the logic for switching the ns, though, but make
switch_namespace() return the fd so that it will be possible to close it
at the process termination time. Get rid of logic around making sure
that it's possible to switch ns in validate_interfaces().

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210329224316.17793-9-maciej.fijalkowski@intel.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 123 +++----------------------------
 tools/testing/selftests/bpf/xdpxceiver.h |  10 +--
 2 files changed, 14 insertions(+), 119 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index ae2f7d71f041..a856b2935d66 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -355,12 +355,15 @@ static void usage(const char *prog)
 	ksft_print_msg(str, prog);
 }
 
-static bool switch_namespace(int idx)
+static int switch_namespace(const char *nsname)
 {
 	char fqns[26] = "/var/run/netns/";
 	int nsfd;
 
-	strncat(fqns, ifdict[idx]->nsname, sizeof(fqns) - strlen(fqns) - 1);
+	if (!nsname || strlen(nsname) == 0)
+		return -1;
+
+	strncat(fqns, nsname, sizeof(fqns) - strlen(fqns) - 1);
 	nsfd = open(fqns, O_RDONLY);
 
 	if (nsfd == -1)
@@ -369,26 +372,9 @@ static bool switch_namespace(int idx)
 	if (setns(nsfd, 0) == -1)
 		exit_with_error(errno);
 
-	return true;
-}
-
-static void *nsswitchthread(void *args)
-{
-	struct targs *targs = args;
+	print_verbose("NS switched: %s\n", nsname);
 
-	targs->retptr = false;
-
-	if (switch_namespace(targs->idx)) {
-		ifdict[targs->idx]->ifindex = if_nametoindex(ifdict[targs->idx]->ifname);
-		if (!ifdict[targs->idx]->ifindex) {
-			ksft_test_result_fail("ERROR: [%s] interface \"%s\" does not exist\n",
-					      __func__, ifdict[targs->idx]->ifname);
-		} else {
-			print_verbose("Interface found: %s\n", ifdict[targs->idx]->ifname);
-			targs->retptr = true;
-		}
-	}
-	pthread_exit(NULL);
+	return nsfd;
 }
 
 static int validate_interfaces(void)
@@ -400,33 +386,6 @@ static int validate_interfaces(void)
 			ret = false;
 			ksft_test_result_fail("ERROR: interfaces: -i <int>,<ns> -i <int>,<ns>.");
 		}
-		if (strcmp(ifdict[i]->nsname, "")) {
-			struct targs *targs;
-
-			targs = malloc(sizeof(*targs));
-			if (!targs)
-				exit_with_error(errno);
-
-			targs->idx = i;
-			if (pthread_create(&ns_thread, NULL, nsswitchthread, targs))
-				exit_with_error(errno);
-
-			pthread_join(ns_thread, NULL);
-
-			if (targs->retptr)
-				print_verbose("NS switched: %s\n", ifdict[i]->nsname);
-
-			free(targs);
-		} else {
-			ifdict[i]->ifindex = if_nametoindex(ifdict[i]->ifname);
-			if (!ifdict[i]->ifindex) {
-				ksft_test_result_fail
-				    ("ERROR: interface \"%s\" does not exist\n", ifdict[i]->ifname);
-				ret = false;
-			} else {
-				print_verbose("Interface found: %s\n", ifdict[i]->ifname);
-			}
-		}
 	}
 	return ret;
 }
@@ -843,8 +802,7 @@ static void *worker_testapp_validate(void *arg)
 		if (bufs == MAP_FAILED)
 			exit_with_error(errno);
 
-		if (strcmp(ifobject->nsname, ""))
-			switch_namespace(ifobject->ifdict_index);
+		ifobject->ns_fd = switch_namespace(ifobject->nsname);
 	}
 
 	if (ifobject->fv.vector == tx) {
@@ -1059,60 +1017,6 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac,
 	ifobj->src_port = src_port;
 }
 
-static void *nsdisablemodethread(void *args)
-{
-	struct targs *targs = args;
-
-	targs->retptr = false;
-
-	if (switch_namespace(targs->idx)) {
-		targs->retptr = bpf_set_link_xdp_fd(ifdict[targs->idx]->ifindex, -1, targs->flags);
-	} else {
-		targs->retptr = errno;
-		print_verbose("Failed to switch namespace to %s\n", ifdict[targs->idx]->nsname);
-	}
-
-	pthread_exit(NULL);
-}
-
-static void disable_xdp_mode(int mode)
-{
-	int err = 0;
-	__u32 flags = XDP_FLAGS_UPDATE_IF_NOEXIST | mode;
-	char *mode_str = mode & XDP_FLAGS_SKB_MODE ? "skb" : "drv";
-
-	for (int i = 0; i < MAX_INTERFACES; i++) {
-		if (strcmp(ifdict[i]->nsname, "")) {
-			struct targs *targs;
-
-			targs = malloc(sizeof(*targs));
-			memset(targs, 0, sizeof(*targs));
-			if (!targs)
-				exit_with_error(errno);
-
-			targs->idx = i;
-			targs->flags = flags;
-			if (pthread_create(&ns_thread, NULL, nsdisablemodethread, targs))
-				exit_with_error(errno);
-
-			pthread_join(ns_thread, NULL);
-			err = targs->retptr;
-			free(targs);
-		} else {
-			err = bpf_set_link_xdp_fd(ifdict[i]->ifindex, -1, flags);
-		}
-
-		if (err) {
-			print_verbose("Failed to disable %s mode on interface %s\n",
-						mode_str, ifdict[i]->ifname);
-			exit_with_error(err);
-		}
-
-		print_verbose("Disabled %s mode for interface: %s\n", mode_str, ifdict[i]->ifname);
-		configured_mode = mode & XDP_FLAGS_SKB_MODE ? TEST_MODE_DRV : TEST_MODE_SKB;
-	}
-}
-
 static void run_pkt_test(int mode, int type)
 {
 	test_type = type;
@@ -1132,13 +1036,9 @@ static void run_pkt_test(int mode, int type)
 
 	switch (mode) {
 	case (TEST_MODE_SKB):
-		if (configured_mode == TEST_MODE_DRV)
-			disable_xdp_mode(XDP_FLAGS_DRV_MODE);
 		xdp_flags |= XDP_FLAGS_SKB_MODE;
 		break;
 	case (TEST_MODE_DRV):
-		if (configured_mode == TEST_MODE_SKB)
-			disable_xdp_mode(XDP_FLAGS_SKB_MODE);
 		xdp_flags |= XDP_FLAGS_DRV_MODE;
 		break;
 	default:
@@ -1185,8 +1085,6 @@ int main(int argc, char **argv)
 	ifdict[1]->fv.vector = rx;
 	init_iface(ifdict[1], MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1);
 
-	disable_xdp_mode(XDP_FLAGS_DRV_MODE);
-
 	ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX);
 
 	for (i = 0; i < TEST_MODE_MAX; i++) {
@@ -1194,8 +1092,11 @@ int main(int argc, char **argv)
 			run_pkt_test(i, j);
 	}
 
-	for (int i = 0; i < MAX_INTERFACES; i++)
+	for (int i = 0; i < MAX_INTERFACES; i++) {
+		if (ifdict[i]->ns_fd != -1)
+			close(ifdict[i]->ns_fd);
 		free(ifdict[i]);
+	}
 
 	ksft_exit_pass();
 
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 8f9308099318..493f7498d40e 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -126,7 +126,7 @@ struct generic_data {
 };
 
 struct ifobject {
-	int ifindex;
+	int ns_fd;
 	int ifdict_index;
 	char ifname[MAX_INTERFACE_NAME_CHARS];
 	char nsname[MAX_INTERFACES_NAMESPACE_CHARS];
@@ -150,15 +150,9 @@ pthread_mutex_t sync_mutex;
 pthread_mutex_t sync_mutex_tx;
 pthread_cond_t signal_rx_condition;
 pthread_cond_t signal_tx_condition;
-pthread_t t0, t1, ns_thread;
+pthread_t t0, t1;
 pthread_attr_t attr;
 
-struct targs {
-	u8 retptr;
-	int idx;
-	u32 flags;
-};
-
 TAILQ_HEAD(head_s, pkt) head = TAILQ_HEAD_INITIALIZER(head);
 struct head_s *head_p;
 struct pkt {
-- 
cgit v1.2.3


From 9866bcd6635c264aaf3de1d89e44773a269048eb Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Tue, 30 Mar 2021 00:43:08 +0200
Subject: selftests: xsk: Split worker thread

Let's a have a separate Tx/Rx worker threads instead of a one common
thread packed with Tx/Rx specific checks.

Move mmap for umem buffer space and a switch_namespace() call to
thread_common_ops.

This also allows for a bunch of simplifactions that are the subject of
the next commits. The final result will be a code base that is much
easier to follow.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210329224316.17793-10-maciej.fijalkowski@intel.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 156 +++++++++++++++----------------
 1 file changed, 77 insertions(+), 79 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index a856b2935d66..cb6583b8ad67 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -760,6 +760,15 @@ static void thread_common_ops(struct ifobject *ifobject, void *bufs, pthread_mut
 	int ctr = 0;
 	int ret;
 
+	pthread_attr_setstacksize(&attr, THREAD_STACK);
+
+	bufs = mmap(NULL, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE,
+		    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (bufs == MAP_FAILED)
+		exit_with_error(errno);
+
+	ifobject->ns_fd = switch_namespace(ifobject->nsname);
+
 	xsk_configure_umem(ifobject, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE);
 	ret = xsk_configure_socket(ifobject);
 
@@ -782,9 +791,12 @@ static void thread_common_ops(struct ifobject *ifobject, void *bufs, pthread_mut
 
 	if (ctr >= SOCK_RECONF_CTR)
 		exit_with_error(ret);
+
+	print_verbose("Interface [%s] vector [%s]\n",
+		      ifobject->ifname, ifobject->fv.vector == tx ? "Tx" : "Rx");
 }
 
-static void *worker_testapp_validate(void *arg)
+static void *worker_testapp_validate_tx(void *arg)
 {
 	struct udphdr *udp_hdr =
 	    (struct udphdr *)(pkt_data + sizeof(struct ethhdr) + sizeof(struct iphdr));
@@ -792,97 +804,83 @@ static void *worker_testapp_validate(void *arg)
 	struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
 	struct ifobject *ifobject = (struct ifobject *)arg;
 	struct generic_data data;
+	int spinningrxctr = 0;
 	void *bufs = NULL;
 
-	pthread_attr_setstacksize(&attr, THREAD_STACK);
-
-	if (!bidi_pass) {
-		bufs = mmap(NULL, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE,
-			    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-		if (bufs == MAP_FAILED)
-			exit_with_error(errno);
+	if (!bidi_pass)
+		thread_common_ops(ifobject, bufs, &sync_mutex_tx, &spinning_tx);
 
-		ifobject->ns_fd = switch_namespace(ifobject->nsname);
+	while (atomic_load(&spinning_rx) && spinningrxctr < SOCK_RECONF_CTR) {
+		spinningrxctr++;
+		usleep(USLEEP_MAX);
 	}
 
-	if (ifobject->fv.vector == tx) {
-		int spinningrxctr = 0;
+	for (int i = 0; i < num_frames; i++) {
+		/*send EOT frame */
+		if (i == (num_frames - 1))
+			data.seqnum = -1;
+		else
+			data.seqnum = i;
+		gen_udp_hdr(&data, ifobject, udp_hdr);
+		gen_ip_hdr(ifobject, ip_hdr);
+		gen_udp_csum(udp_hdr, ip_hdr);
+		gen_eth_hdr(ifobject, eth_hdr);
+		gen_eth_frame(ifobject->umem, i * XSK_UMEM__DEFAULT_FRAME_SIZE);
+	}
 
-		if (!bidi_pass)
-			thread_common_ops(ifobject, bufs, &sync_mutex_tx, &spinning_tx);
+	print_verbose("Sending %d packets on interface %s\n",
+		      (opt_pkt_count - 1), ifobject->ifname);
+	tx_only_all(ifobject);
 
-		while (atomic_load(&spinning_rx) && spinningrxctr < SOCK_RECONF_CTR) {
-			spinningrxctr++;
-			usleep(USLEEP_MAX);
-		}
+	if (test_type != TEST_TYPE_BIDI || bidi_pass) {
+		xsk_socket__delete(ifobject->xsk->xsk);
+		(void)xsk_umem__delete(ifobject->umem->umem);
+	}
+	pthread_exit(NULL);
+}
 
-		print_verbose("Interface [%s] vector [Tx]\n", ifobject->ifname);
-		for (int i = 0; i < num_frames; i++) {
-			/*send EOT frame */
-			if (i == (num_frames - 1))
-				data.seqnum = -1;
-			else
-				data.seqnum = i;
-			gen_udp_hdr(&data, ifobject, udp_hdr);
-			gen_ip_hdr(ifobject, ip_hdr);
-			gen_udp_csum(udp_hdr, ip_hdr);
-			gen_eth_hdr(ifobject, eth_hdr);
-			gen_eth_frame(ifobject->umem, i * XSK_UMEM__DEFAULT_FRAME_SIZE);
-		}
+static void *worker_testapp_validate_rx(void *arg)
+{
+	struct ifobject *ifobject = (struct ifobject *)arg;
+	struct pollfd fds[MAX_SOCKS] = { };
+	void *bufs = NULL;
 
-		print_verbose("Sending %d packets on interface %s\n",
-			       (opt_pkt_count - 1), ifobject->ifname);
-		tx_only_all(ifobject);
-	} else if (ifobject->fv.vector == rx) {
-		struct pollfd fds[MAX_SOCKS] = { };
-		int ret;
-
-		if (!bidi_pass)
-			thread_common_ops(ifobject, bufs, &sync_mutex_tx, &spinning_rx);
-
-		print_verbose("Interface [%s] vector [Rx]\n", ifobject->ifname);
-		if (stat_test_type != STAT_TEST_RX_FILL_EMPTY)
-			xsk_populate_fill_ring(ifobject->umem);
-
-		TAILQ_INIT(&head);
-		if (debug_pkt_dump) {
-			pkt_buf = calloc(num_frames, sizeof(*pkt_buf));
-			if (!pkt_buf)
-				exit_with_error(errno);
-		}
+	if (!bidi_pass)
+		thread_common_ops(ifobject, bufs, &sync_mutex_tx, &spinning_rx);
 
-		fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
-		fds[0].events = POLLIN;
+	if (stat_test_type != STAT_TEST_RX_FILL_EMPTY)
+		xsk_populate_fill_ring(ifobject->umem);
 
-		pthread_mutex_lock(&sync_mutex);
-		pthread_cond_signal(&signal_rx_condition);
-		pthread_mutex_unlock(&sync_mutex);
+	TAILQ_INIT(&head);
+	if (debug_pkt_dump) {
+		pkt_buf = calloc(num_frames, sizeof(*pkt_buf));
+		if (!pkt_buf)
+			exit_with_error(errno);
+	}
 
-		while (1) {
-			if (test_type == TEST_TYPE_POLL) {
-				ret = poll(fds, 1, POLL_TMOUT);
-				if (ret <= 0)
-					continue;
-			}
+	fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
+	fds[0].events = POLLIN;
 
-			if (test_type != TEST_TYPE_STATS) {
-				rx_pkt(ifobject->xsk, fds);
-				worker_pkt_validate();
-			} else {
-				worker_stats_validate(ifobject);
-			}
+	pthread_mutex_lock(&sync_mutex);
+	pthread_cond_signal(&signal_rx_condition);
+	pthread_mutex_unlock(&sync_mutex);
 
-			if (sigvar)
-				break;
+	while (1) {
+		if (test_type != TEST_TYPE_STATS) {
+			rx_pkt(ifobject->xsk, fds);
+			worker_pkt_validate();
+		} else {
+			worker_stats_validate(ifobject);
 		}
+		if (sigvar)
+			break;
+	}
 
-		if (test_type != TEST_TYPE_STATS)
-			print_verbose("Received %d packets on interface %s\n",
-				pkt_counter, ifobject->ifname);
+	print_verbose("Received %d packets on interface %s\n",
+		      pkt_counter, ifobject->ifname);
 
-		if (test_type == TEST_TYPE_TEARDOWN)
-			print_verbose("Destroying socket\n");
-	}
+	if (test_type == TEST_TYPE_TEARDOWN)
+		print_verbose("Destroying socket\n");
 
 	if ((test_type != TEST_TYPE_BIDI) || bidi_pass) {
 		xsk_socket__delete(ifobject->xsk->xsk);
@@ -911,12 +909,12 @@ static void testapp_validate(void)
 
 	/*Spawn RX thread */
 	if (!bidi || !bidi_pass) {
-		if (pthread_create(&t0, &attr, worker_testapp_validate, ifdict[1]))
+		if (pthread_create(&t0, &attr, worker_testapp_validate_rx, ifdict[1]))
 			exit_with_error(errno);
 	} else if (bidi && bidi_pass) {
 		/*switch Tx/Rx vectors */
 		ifdict[0]->fv.vector = rx;
-		if (pthread_create(&t0, &attr, worker_testapp_validate, ifdict[0]))
+		if (pthread_create(&t0, &attr, worker_testapp_validate_rx, ifdict[0]))
 			exit_with_error(errno);
 	}
 
@@ -931,12 +929,12 @@ static void testapp_validate(void)
 
 	/*Spawn TX thread */
 	if (!bidi || !bidi_pass) {
-		if (pthread_create(&t1, &attr, worker_testapp_validate, ifdict[0]))
+		if (pthread_create(&t1, &attr, worker_testapp_validate_tx, ifdict[0]))
 			exit_with_error(errno);
 	} else if (bidi && bidi_pass) {
 		/*switch Tx/Rx vectors */
 		ifdict[1]->fv.vector = tx;
-		if (pthread_create(&t1, &attr, worker_testapp_validate, ifdict[1]))
+		if (pthread_create(&t1, &attr, worker_testapp_validate_tx, ifdict[1]))
 			exit_with_error(errno);
 	}
 
-- 
cgit v1.2.3


From 99f9bcb65705dda07288b759c569d30d8a4f297c Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Tue, 30 Mar 2021 00:43:09 +0200
Subject: selftests: xsk: Remove Tx synchronization resources

Tx thread needs to be started after the Rx side is fully initialized so
that packets are not xmitted until xsk Rx socket is ready to be used.

It can be observed that atomic variable spinning_tx is not checked from
Rx side in any way, so thread_common_ops can be modified to only address
the spinning_rx. This means that spinning_tx can be removed altogheter.

signal_tx_condition is never utilized, so simply remove it.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210329224316.17793-11-maciej.fijalkowski@intel.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 15 +++++++--------
 tools/testing/selftests/bpf/xdpxceiver.h |  2 --
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index cb6583b8ad67..9f7ee9a38752 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -126,7 +126,6 @@ static void pthread_init_mutex(void)
 	pthread_mutex_init(&sync_mutex, NULL);
 	pthread_mutex_init(&sync_mutex_tx, NULL);
 	pthread_cond_init(&signal_rx_condition, NULL);
-	pthread_cond_init(&signal_tx_condition, NULL);
 }
 
 static void pthread_destroy_mutex(void)
@@ -134,7 +133,6 @@ static void pthread_destroy_mutex(void)
 	pthread_mutex_destroy(&sync_mutex);
 	pthread_mutex_destroy(&sync_mutex_tx);
 	pthread_cond_destroy(&signal_rx_condition);
-	pthread_cond_destroy(&signal_tx_condition);
 }
 
 static void *memset32_htonl(void *dest, u32 val, u32 size)
@@ -754,8 +752,7 @@ static void worker_pkt_validate(void)
 	}
 }
 
-static void thread_common_ops(struct ifobject *ifobject, void *bufs, pthread_mutex_t *mutexptr,
-			      atomic_int *spinningptr)
+static void thread_common_ops(struct ifobject *ifobject, void *bufs, pthread_mutex_t *mutexptr)
 {
 	int ctr = 0;
 	int ret;
@@ -780,13 +777,15 @@ static void thread_common_ops(struct ifobject *ifobject, void *bufs, pthread_mut
 	 */
 	pthread_mutex_lock(mutexptr);
 	while (ret && ctr < SOCK_RECONF_CTR) {
-		atomic_store(spinningptr, 1);
+		if (ifobject->fv.vector == rx)
+			atomic_store(&spinning_rx, 1);
 		xsk_configure_umem(ifobject, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE);
 		ret = xsk_configure_socket(ifobject);
 		usleep(USLEEP_MAX);
 		ctr++;
 	}
-	atomic_store(spinningptr, 0);
+	if (ifobject->fv.vector == rx)
+		atomic_store(&spinning_rx, 0);
 	pthread_mutex_unlock(mutexptr);
 
 	if (ctr >= SOCK_RECONF_CTR)
@@ -808,7 +807,7 @@ static void *worker_testapp_validate_tx(void *arg)
 	void *bufs = NULL;
 
 	if (!bidi_pass)
-		thread_common_ops(ifobject, bufs, &sync_mutex_tx, &spinning_tx);
+		thread_common_ops(ifobject, bufs, &sync_mutex_tx);
 
 	while (atomic_load(&spinning_rx) && spinningrxctr < SOCK_RECONF_CTR) {
 		spinningrxctr++;
@@ -846,7 +845,7 @@ static void *worker_testapp_validate_rx(void *arg)
 	void *bufs = NULL;
 
 	if (!bidi_pass)
-		thread_common_ops(ifobject, bufs, &sync_mutex_tx, &spinning_rx);
+		thread_common_ops(ifobject, bufs, &sync_mutex_tx);
 
 	if (stat_test_type != STAT_TEST_RX_FILL_EMPTY)
 		xsk_populate_fill_ring(ifobject->umem);
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 493f7498d40e..483be41229c6 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -144,12 +144,10 @@ struct ifobject {
 static struct ifobject *ifdict[MAX_INTERFACES];
 
 /*threads*/
-atomic_int spinning_tx;
 atomic_int spinning_rx;
 pthread_mutex_t sync_mutex;
 pthread_mutex_t sync_mutex_tx;
 pthread_cond_t signal_rx_condition;
-pthread_cond_t signal_tx_condition;
 pthread_t t0, t1;
 pthread_attr_t attr;
 
-- 
cgit v1.2.3


From 9445f8c765838edf84dd0d3910ff309bdab8f95f Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Tue, 30 Mar 2021 00:43:10 +0200
Subject: selftests: xsk: Refactor teardown/bidi test cases and
 testapp_validate

Currently, there is a testapp_sockets() that acts like a wrapper around
testapp_validate() and it is called for bidi and teardown test types.
Other test types call testapp_validate() directly.

Split testapp_sockets() onto two separate functions so a bunch of bidi
specific logic can be moved there and out of testapp_validate() itself.

Introduce function pointer to ifobject struct which will be used for
assigning the Rx/Tx function that is assigned to worker thread. Let's
also have a global ifobject Rx/Tx pointers so it's easier to swap the
vectors on a second run of a bi-directional test. Thread creation now is
easey to follow.

switching_notify variable is useless, info about vector switch can be
printed based on bidi_pass state.

Last but not least, init/destroy synchronization variables only once,
not per each test.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210329224316.17793-12-maciej.fijalkowski@intel.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 117 ++++++++++++++++++-------------
 tools/testing/selftests/bpf/xdpxceiver.h |  14 ++--
 2 files changed, 78 insertions(+), 53 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 9f7ee9a38752..c0ebca8f55f8 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -896,26 +896,10 @@ static void testapp_validate(void)
 	pthread_attr_init(&attr);
 	pthread_attr_setstacksize(&attr, THREAD_STACK);
 
-	if ((test_type == TEST_TYPE_BIDI) && bidi_pass) {
-		pthread_init_mutex();
-		if (!switching_notify) {
-			print_verbose("Switching Tx/Rx vectors\n");
-			switching_notify++;
-		}
-	}
-
 	pthread_mutex_lock(&sync_mutex);
 
 	/*Spawn RX thread */
-	if (!bidi || !bidi_pass) {
-		if (pthread_create(&t0, &attr, worker_testapp_validate_rx, ifdict[1]))
-			exit_with_error(errno);
-	} else if (bidi && bidi_pass) {
-		/*switch Tx/Rx vectors */
-		ifdict[0]->fv.vector = rx;
-		if (pthread_create(&t0, &attr, worker_testapp_validate_rx, ifdict[0]))
-			exit_with_error(errno);
-	}
+	pthread_create(&t0, &attr, ifdict_rx->func_ptr, ifdict_rx);
 
 	if (clock_gettime(CLOCK_REALTIME, &max_wait))
 		exit_with_error(errno);
@@ -927,15 +911,7 @@ static void testapp_validate(void)
 	pthread_mutex_unlock(&sync_mutex);
 
 	/*Spawn TX thread */
-	if (!bidi || !bidi_pass) {
-		if (pthread_create(&t1, &attr, worker_testapp_validate_tx, ifdict[0]))
-			exit_with_error(errno);
-	} else if (bidi && bidi_pass) {
-		/*switch Tx/Rx vectors */
-		ifdict[1]->fv.vector = tx;
-		if (pthread_create(&t1, &attr, worker_testapp_validate_tx, ifdict[1]))
-			exit_with_error(errno);
-	}
+	pthread_create(&t1, &attr, ifdict_tx->func_ptr, ifdict_tx);
 
 	pthread_join(t1, NULL);
 	pthread_join(t0, NULL);
@@ -953,18 +929,53 @@ static void testapp_validate(void)
 		print_ksft_result();
 }
 
-static void testapp_sockets(void)
+static void testapp_teardown(void)
+{
+	int i;
+
+	for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
+		pkt_counter = 0;
+		prev_pkt = -1;
+		sigvar = 0;
+		print_verbose("Creating socket\n");
+		testapp_validate();
+	}
+
+	print_ksft_result();
+}
+
+static void swap_vectors(struct ifobject *ifobj1, struct ifobject *ifobj2)
+{
+	void *(*tmp_func_ptr)(void *) = ifobj1->func_ptr;
+	enum fvector tmp_vector = ifobj1->fv.vector;
+
+	ifobj1->func_ptr = ifobj2->func_ptr;
+	ifobj1->fv.vector = ifobj2->fv.vector;
+
+	ifobj2->func_ptr = tmp_func_ptr;
+	ifobj2->fv.vector = tmp_vector;
+
+	ifdict_tx = ifobj1;
+	ifdict_rx = ifobj2;
+}
+
+static void testapp_bidi(void)
 {
-	for (int i = 0; i < ((test_type == TEST_TYPE_TEARDOWN) ? MAX_TEARDOWN_ITER : MAX_BIDI_ITER);
-	     i++) {
+	for (int i = 0; i < MAX_BIDI_ITER; i++) {
 		pkt_counter = 0;
 		prev_pkt = -1;
 		sigvar = 0;
 		print_verbose("Creating socket\n");
 		testapp_validate();
-		test_type == TEST_TYPE_BIDI ? bidi_pass++ : bidi_pass;
+		if (!bidi_pass) {
+			print_verbose("Switching Tx/Rx vectors\n");
+			swap_vectors(ifdict[1], ifdict[0]);
+		}
+		bidi_pass++;
 	}
 
+	swap_vectors(ifdict[0], ifdict[1]);
+
 	print_ksft_result();
 }
 
@@ -997,7 +1008,7 @@ static void testapp_stats(void)
 static void init_iface(struct ifobject *ifobj, const char *dst_mac,
 		       const char *src_mac, const char *dst_ip,
 		       const char *src_ip, const u16 dst_port,
-		       const u16 src_port)
+		       const u16 src_port, enum fvector vector)
 {
 	struct in_addr ip;
 
@@ -1012,6 +1023,16 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac,
 
 	ifobj->dst_port = dst_port;
 	ifobj->src_port = src_port;
+
+	if (vector == tx) {
+		ifobj->fv.vector = tx;
+		ifobj->func_ptr = worker_testapp_validate_tx;
+		ifdict_tx = ifobj;
+	} else {
+		ifobj->fv.vector = rx;
+		ifobj->func_ptr = worker_testapp_validate_rx;
+		ifdict_rx = ifobj;
+	}
 }
 
 static void run_pkt_test(int mode, int type)
@@ -1021,11 +1042,8 @@ static void run_pkt_test(int mode, int type)
 	/* reset defaults after potential previous test */
 	xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 	pkt_counter = 0;
-	switching_notify = 0;
 	bidi_pass = 0;
 	prev_pkt = -1;
-	ifdict[0]->fv.vector = tx;
-	ifdict[1]->fv.vector = rx;
 	sigvar = 0;
 	stat_test_type = -1;
 	rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
@@ -1042,16 +1060,20 @@ static void run_pkt_test(int mode, int type)
 		break;
 	}
 
-	pthread_init_mutex();
-
-	if (test_type == TEST_TYPE_STATS)
+	switch (test_type) {
+	case TEST_TYPE_STATS:
 		testapp_stats();
-	else if ((test_type != TEST_TYPE_TEARDOWN) && (test_type != TEST_TYPE_BIDI))
+		break;
+	case TEST_TYPE_TEARDOWN:
+		testapp_teardown();
+		break;
+	case TEST_TYPE_BIDI:
+		testapp_bidi();
+		break;
+	default:
 		testapp_validate();
-	else
-		testapp_sockets();
-
-	pthread_destroy_mutex();
+		break;
+	}
 }
 
 int main(int argc, char **argv)
@@ -1076,14 +1098,13 @@ int main(int argc, char **argv)
 
 	num_frames = ++opt_pkt_count;
 
-	ifdict[0]->fv.vector = tx;
-	init_iface(ifdict[0], MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2);
-
-	ifdict[1]->fv.vector = rx;
-	init_iface(ifdict[1], MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1);
+	init_iface(ifdict[0], MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, tx);
+	init_iface(ifdict[1], MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, rx);
 
 	ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX);
 
+	pthread_init_mutex();
+
 	for (i = 0; i < TEST_MODE_MAX; i++) {
 		for (j = 0; j < TEST_TYPE_MAX; j++)
 			run_pkt_test(i, j);
@@ -1095,6 +1116,8 @@ int main(int argc, char **argv)
 		free(ifdict[i]);
 	}
 
+	pthread_destroy_mutex();
+
 	ksft_exit_pass();
 
 	return 0;
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 483be41229c6..3945746900af 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -77,7 +77,6 @@ enum STAT_TEST_TYPES {
 static int configured_mode = TEST_MODE_UNCONFIGURED;
 static u8 debug_pkt_dump;
 static u32 num_frames;
-static u8 switching_notify;
 static u8 bidi_pass;
 static int test_type;
 
@@ -126,22 +125,25 @@ struct generic_data {
 };
 
 struct ifobject {
-	int ns_fd;
-	int ifdict_index;
 	char ifname[MAX_INTERFACE_NAME_CHARS];
 	char nsname[MAX_INTERFACES_NAMESPACE_CHARS];
-	struct flow_vector fv;
 	struct xsk_socket_info *xsk;
 	struct xsk_umem_info *umem;
-	u8 dst_mac[ETH_ALEN];
-	u8 src_mac[ETH_ALEN];
+	void *(*func_ptr)(void *arg);
+	struct flow_vector fv;
+	int ns_fd;
+	int ifdict_index;
 	u32 dst_ip;
 	u32 src_ip;
 	u16 src_port;
 	u16 dst_port;
+	u8 dst_mac[ETH_ALEN];
+	u8 src_mac[ETH_ALEN];
 };
 
 static struct ifobject *ifdict[MAX_INTERFACES];
+static struct ifobject *ifdict_rx;
+static struct ifobject *ifdict_tx;
 
 /*threads*/
 atomic_int spinning_rx;
-- 
cgit v1.2.3


From 0464b1ed07677f869005bde3fade1580b48de67e Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Tue, 30 Mar 2021 00:43:11 +0200
Subject: selftests: xsk: Remove sync_mutex_tx and atomic var

Although thread_common_ops() are called in both Tx and Rx threads,
testapp_validate() will not spawn Tx thread until Rx thread signals that
it has finished its initialization via condition variable.

Therefore, locking in thread_common_ops is not needed and furthermore Tx
thread does not have to spin on atomic variable.

Note that this simplification wouldn't be possible if there would still
be a common worker thread.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210329224316.17793-13-maciej.fijalkowski@intel.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 31 +++++++------------------------
 tools/testing/selftests/bpf/xdpxceiver.h |  2 --
 2 files changed, 7 insertions(+), 26 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index c0ebca8f55f8..35e14ad8097e 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -121,17 +121,15 @@ static void __exit_with_error(int error, const char *file, const char *func, int
 			       test_type == TEST_TYPE_BIDI ? "Bi-directional Sockets" : "",\
 			       test_type == TEST_TYPE_STATS ? "Stats" : ""))
 
-static void pthread_init_mutex(void)
+static void init_sync_resources(void)
 {
 	pthread_mutex_init(&sync_mutex, NULL);
-	pthread_mutex_init(&sync_mutex_tx, NULL);
 	pthread_cond_init(&signal_rx_condition, NULL);
 }
 
-static void pthread_destroy_mutex(void)
+static void destroy_sync_resources(void)
 {
 	pthread_mutex_destroy(&sync_mutex);
-	pthread_mutex_destroy(&sync_mutex_tx);
 	pthread_cond_destroy(&signal_rx_condition);
 }
 
@@ -752,7 +750,7 @@ static void worker_pkt_validate(void)
 	}
 }
 
-static void thread_common_ops(struct ifobject *ifobject, void *bufs, pthread_mutex_t *mutexptr)
+static void thread_common_ops(struct ifobject *ifobject, void *bufs)
 {
 	int ctr = 0;
 	int ret;
@@ -771,22 +769,13 @@ static void thread_common_ops(struct ifobject *ifobject, void *bufs, pthread_mut
 
 	/* Retry Create Socket if it fails as xsk_socket__create()
 	 * is asynchronous
-	 *
-	 * Essential to lock Mutex here to prevent Tx thread from
-	 * entering before Rx and causing a deadlock
 	 */
-	pthread_mutex_lock(mutexptr);
 	while (ret && ctr < SOCK_RECONF_CTR) {
-		if (ifobject->fv.vector == rx)
-			atomic_store(&spinning_rx, 1);
 		xsk_configure_umem(ifobject, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE);
 		ret = xsk_configure_socket(ifobject);
 		usleep(USLEEP_MAX);
 		ctr++;
 	}
-	if (ifobject->fv.vector == rx)
-		atomic_store(&spinning_rx, 0);
-	pthread_mutex_unlock(mutexptr);
 
 	if (ctr >= SOCK_RECONF_CTR)
 		exit_with_error(ret);
@@ -803,16 +792,10 @@ static void *worker_testapp_validate_tx(void *arg)
 	struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
 	struct ifobject *ifobject = (struct ifobject *)arg;
 	struct generic_data data;
-	int spinningrxctr = 0;
 	void *bufs = NULL;
 
 	if (!bidi_pass)
-		thread_common_ops(ifobject, bufs, &sync_mutex_tx);
-
-	while (atomic_load(&spinning_rx) && spinningrxctr < SOCK_RECONF_CTR) {
-		spinningrxctr++;
-		usleep(USLEEP_MAX);
-	}
+		thread_common_ops(ifobject, bufs);
 
 	for (int i = 0; i < num_frames; i++) {
 		/*send EOT frame */
@@ -845,7 +828,7 @@ static void *worker_testapp_validate_rx(void *arg)
 	void *bufs = NULL;
 
 	if (!bidi_pass)
-		thread_common_ops(ifobject, bufs, &sync_mutex_tx);
+		thread_common_ops(ifobject, bufs);
 
 	if (stat_test_type != STAT_TEST_RX_FILL_EMPTY)
 		xsk_populate_fill_ring(ifobject->umem);
@@ -1103,7 +1086,7 @@ int main(int argc, char **argv)
 
 	ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX);
 
-	pthread_init_mutex();
+	init_sync_resources();
 
 	for (i = 0; i < TEST_MODE_MAX; i++) {
 		for (j = 0; j < TEST_TYPE_MAX; j++)
@@ -1116,7 +1099,7 @@ int main(int argc, char **argv)
 		free(ifdict[i]);
 	}
 
-	pthread_destroy_mutex();
+	destroy_sync_resources();
 
 	ksft_exit_pass();
 
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 3945746900af..e304229d8a4c 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -146,9 +146,7 @@ static struct ifobject *ifdict_rx;
 static struct ifobject *ifdict_tx;
 
 /*threads*/
-atomic_int spinning_rx;
 pthread_mutex_t sync_mutex;
-pthread_mutex_t sync_mutex_tx;
 pthread_cond_t signal_rx_condition;
 pthread_t t0, t1;
 pthread_attr_t attr;
-- 
cgit v1.2.3


From 34829eec3b698219a5cbc09a174b2a7407b3b4c1 Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Tue, 30 Mar 2021 00:43:12 +0200
Subject: veth: Implement ethtool's get_channels() callback

Libbpf's xsk part calls get_channels() API to retrieve the queue count
of the underlying driver so that XSKMAP is sized accordingly.

Implement that in veth so multi queue scenarios can work properly.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210329224316.17793-14-maciej.fijalkowski@intel.com
---
 drivers/net/veth.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 91b73db37555..9e525646df1d 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -218,6 +218,17 @@ static void veth_get_ethtool_stats(struct net_device *dev,
 	}
 }
 
+static void veth_get_channels(struct net_device *dev,
+			      struct ethtool_channels *channels)
+{
+	channels->tx_count = dev->real_num_tx_queues;
+	channels->rx_count = dev->real_num_rx_queues;
+	channels->max_tx = dev->real_num_tx_queues;
+	channels->max_rx = dev->real_num_rx_queues;
+	channels->combined_count = min(dev->real_num_rx_queues, dev->real_num_tx_queues);
+	channels->max_combined = min(dev->real_num_rx_queues, dev->real_num_tx_queues);
+}
+
 static const struct ethtool_ops veth_ethtool_ops = {
 	.get_drvinfo		= veth_get_drvinfo,
 	.get_link		= ethtool_op_get_link,
@@ -226,6 +237,7 @@ static const struct ethtool_ops veth_ethtool_ops = {
 	.get_ethtool_stats	= veth_get_ethtool_stats,
 	.get_link_ksettings	= veth_get_link_ksettings,
 	.get_ts_info		= ethtool_op_get_ts_info,
+	.get_channels		= veth_get_channels,
 };
 
 /* general routines */
-- 
cgit v1.2.3


From 27e1ca2525de264901b5c2d9d0c4403c3fe8608c Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Tue, 30 Mar 2021 00:43:13 +0200
Subject: selftests: xsk: Implement bpf_link test

Introduce a test that is supposed to verify the persistence of BPF
resources based on underlying bpf_link usage.

Test will:
1) create and bind two sockets on queue ids 0 and 1
2) run a traffic on queue ids 0
3) remove xsk sockets from queue 0 on both veth interfaces
4) run a traffic on queues ids 1

Running traffic successfully on qids 1 means that BPF resources were
not removed on step 3).

In order to make it work, change the command that creates veth pair to
have the 4 queue pairs by default.

Introduce the arrays of xsks and umems to ifobject struct but keep a
pointers to single entities, so rest of the logic around Rx/Tx can be
kept as-is.

For umem handling, double the size of mmapped space and split that
between the two sockets.

Rename also bidi_pass to a variable 'second_step' of a boolean type as
it's now used also for the test that is introduced here and it doesn't
have anything in common with bi-directional testing.

Drop opt_queue command line argument as it wasn't working before anyway.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210329224316.17793-15-maciej.fijalkowski@intel.com
---
 tools/testing/selftests/bpf/test_xsk.sh  |   3 +-
 tools/testing/selftests/bpf/xdpxceiver.c | 179 +++++++++++++++++++++++--------
 tools/testing/selftests/bpf/xdpxceiver.h |   7 +-
 3 files changed, 139 insertions(+), 50 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 56d4474e2c83..46633a3bfb0b 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -107,7 +107,7 @@ setup_vethPairs() {
 	        echo "setting up ${VETH0}: namespace: ${NS0}"
 	fi
 	ip netns add ${NS1}
-	ip link add ${VETH0} type veth peer name ${VETH1}
+	ip link add ${VETH0} numtxqueues 4 numrxqueues 4 type veth peer name ${VETH1} numtxqueues 4 numrxqueues 4
 	if [ -f /proc/net/if_inet6 ]; then
 		echo 1 > /proc/sys/net/ipv6/conf/${VETH0}/disable_ipv6
 	fi
@@ -118,6 +118,7 @@ setup_vethPairs() {
 	ip netns exec ${NS1} ip link set ${VETH1} mtu ${MTU}
 	ip link set ${VETH0} mtu ${MTU}
 	ip netns exec ${NS1} ip link set ${VETH1} up
+	ip netns exec ${NS1} ip link set dev lo up
 	ip link set ${VETH0} up
 }
 
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 35e14ad8097e..5c0d6dbd076f 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -41,8 +41,12 @@
  *            Reduce the size of the RX ring to a fraction of the fill ring size.
  *       iv.  fill queue empty
  *            Do not populate the fill queue and then try to receive pkts.
+ *    f. bpf_link resource persistence
+ *       Configure sockets at indexes 0 and 1, run a traffic on queue ids 0,
+ *       then remove xsk sockets from queue 0 on both veth interfaces and
+ *       finally run a traffic on queues ids 1
  *
- * Total tests: 10
+ * Total tests: 12
  *
  * Flow:
  * -----
@@ -115,11 +119,12 @@ static void __exit_with_error(int error, const char *file, const char *func, int
 #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
 
 #define print_ksft_result(void)\
-	(ksft_test_result_pass("PASS: %s %s %s%s%s\n", configured_mode ? "DRV" : "SKB",\
+	(ksft_test_result_pass("PASS: %s %s %s%s%s%s\n", configured_mode ? "DRV" : "SKB",\
 			       test_type == TEST_TYPE_POLL ? "POLL" : "NOPOLL",\
 			       test_type == TEST_TYPE_TEARDOWN ? "Socket Teardown" : "",\
 			       test_type == TEST_TYPE_BIDI ? "Bi-directional Sockets" : "",\
-			       test_type == TEST_TYPE_STATS ? "Stats" : ""))
+			       test_type == TEST_TYPE_STATS ? "Stats" : "",\
+			       test_type == TEST_TYPE_BPF_RES ? "BPF RES" : ""))
 
 static void init_sync_resources(void)
 {
@@ -258,9 +263,8 @@ static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
 	memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, PKT_SIZE);
 }
 
-static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size)
+static void xsk_configure_umem(struct ifobject *data, void *buffer, int idx)
 {
-	int ret;
 	struct xsk_umem_config cfg = {
 		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
 		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
@@ -268,17 +272,22 @@ static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size)
 		.frame_headroom = frame_headroom,
 		.flags = XSK_UMEM__DEFAULT_FLAGS
 	};
+	int size = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
+	struct xsk_umem_info *umem;
+	int ret;
 
-	data->umem = calloc(1, sizeof(struct xsk_umem_info));
-	if (!data->umem)
+	umem = calloc(1, sizeof(struct xsk_umem_info));
+	if (!umem)
 		exit_with_error(errno);
 
-	ret = xsk_umem__create(&data->umem->umem, buffer, size,
-			       &data->umem->fq, &data->umem->cq, &cfg);
+	ret = xsk_umem__create(&umem->umem, buffer, size,
+			       &umem->fq, &umem->cq, &cfg);
 	if (ret)
 		exit_with_error(ret);
 
-	data->umem->buffer = buffer;
+	umem->buffer = buffer;
+
+	data->umem_arr[idx] = umem;
 }
 
 static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
@@ -294,18 +303,19 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
 	xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS);
 }
 
-static int xsk_configure_socket(struct ifobject *ifobject)
+static int xsk_configure_socket(struct ifobject *ifobject, int idx)
 {
 	struct xsk_socket_config cfg;
+	struct xsk_socket_info *xsk;
 	struct xsk_ring_cons *rxr;
 	struct xsk_ring_prod *txr;
 	int ret;
 
-	ifobject->xsk = calloc(1, sizeof(struct xsk_socket_info));
-	if (!ifobject->xsk)
+	xsk = calloc(1, sizeof(struct xsk_socket_info));
+	if (!xsk)
 		exit_with_error(errno);
 
-	ifobject->xsk->umem = ifobject->umem;
+	xsk->umem = ifobject->umem;
 	cfg.rx_size = rxqsize;
 	cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
 	cfg.libbpf_flags = 0;
@@ -313,19 +323,20 @@ static int xsk_configure_socket(struct ifobject *ifobject)
 	cfg.bind_flags = xdp_bind_flags;
 
 	if (test_type != TEST_TYPE_BIDI) {
-		rxr = (ifobject->fv.vector == rx) ? &ifobject->xsk->rx : NULL;
-		txr = (ifobject->fv.vector == tx) ? &ifobject->xsk->tx : NULL;
+		rxr = (ifobject->fv.vector == rx) ? &xsk->rx : NULL;
+		txr = (ifobject->fv.vector == tx) ? &xsk->tx : NULL;
 	} else {
-		rxr = &ifobject->xsk->rx;
-		txr = &ifobject->xsk->tx;
+		rxr = &xsk->rx;
+		txr = &xsk->tx;
 	}
 
-	ret = xsk_socket__create(&ifobject->xsk->xsk, ifobject->ifname,
-				 opt_queue, ifobject->umem->umem, rxr, txr, &cfg);
-
+	ret = xsk_socket__create(&xsk->xsk, ifobject->ifname, idx,
+				 ifobject->umem->umem, rxr, txr, &cfg);
 	if (ret)
 		return 1;
 
+	ifobject->xsk_arr[idx] = xsk;
+
 	return 0;
 }
 
@@ -393,7 +404,7 @@ static void parse_command_line(int argc, char **argv)
 	opterr = 0;
 
 	for (;;) {
-		c = getopt_long(argc, argv, "i:q:DC:v", long_options, &option_index);
+		c = getopt_long(argc, argv, "i:DC:v", long_options, &option_index);
 
 		if (c == -1)
 			break;
@@ -413,9 +424,6 @@ static void parse_command_line(int argc, char **argv)
 				       MAX_INTERFACES_NAMESPACE_CHARS);
 			interface_index++;
 			break;
-		case 'q':
-			opt_queue = atoi(optarg);
-			break;
 		case 'D':
 			debug_pkt_dump = 1;
 			break;
@@ -752,27 +760,33 @@ static void worker_pkt_validate(void)
 
 static void thread_common_ops(struct ifobject *ifobject, void *bufs)
 {
+	int umem_sz = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
 	int ctr = 0;
 	int ret;
 
 	pthread_attr_setstacksize(&attr, THREAD_STACK);
 
-	bufs = mmap(NULL, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE,
+	ifobject->ns_fd = switch_namespace(ifobject->nsname);
+
+	if (test_type == TEST_TYPE_BPF_RES)
+		umem_sz *= 2;
+
+	bufs = mmap(NULL, umem_sz,
 		    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 	if (bufs == MAP_FAILED)
 		exit_with_error(errno);
 
-	ifobject->ns_fd = switch_namespace(ifobject->nsname);
-
-	xsk_configure_umem(ifobject, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE);
-	ret = xsk_configure_socket(ifobject);
+	xsk_configure_umem(ifobject, bufs, 0);
+	ifobject->umem = ifobject->umem_arr[0];
+	ret = xsk_configure_socket(ifobject, 0);
 
 	/* Retry Create Socket if it fails as xsk_socket__create()
 	 * is asynchronous
 	 */
 	while (ret && ctr < SOCK_RECONF_CTR) {
-		xsk_configure_umem(ifobject, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE);
-		ret = xsk_configure_socket(ifobject);
+		xsk_configure_umem(ifobject, bufs, 0);
+		ifobject->umem = ifobject->umem_arr[0];
+		ret = xsk_configure_socket(ifobject, 0);
 		usleep(USLEEP_MAX);
 		ctr++;
 	}
@@ -780,10 +794,34 @@ static void thread_common_ops(struct ifobject *ifobject, void *bufs)
 	if (ctr >= SOCK_RECONF_CTR)
 		exit_with_error(ret);
 
+	ifobject->umem = ifobject->umem_arr[0];
+	ifobject->xsk = ifobject->xsk_arr[0];
+
+	if (test_type == TEST_TYPE_BPF_RES) {
+		xsk_configure_umem(ifobject, (u8 *)bufs + (umem_sz / 2), 1);
+		ifobject->umem = ifobject->umem_arr[1];
+		ret = xsk_configure_socket(ifobject, 1);
+	}
+
+	ifobject->umem = ifobject->umem_arr[0];
+	ifobject->xsk = ifobject->xsk_arr[0];
 	print_verbose("Interface [%s] vector [%s]\n",
 		      ifobject->ifname, ifobject->fv.vector == tx ? "Tx" : "Rx");
 }
 
+static bool testapp_is_test_two_stepped(void)
+{
+	return (test_type != TEST_TYPE_BIDI && test_type != TEST_TYPE_BPF_RES) || second_step;
+}
+
+static void testapp_cleanup_xsk_res(struct ifobject *ifobj)
+{
+	if (testapp_is_test_two_stepped()) {
+		xsk_socket__delete(ifobj->xsk->xsk);
+		(void)xsk_umem__delete(ifobj->umem->umem);
+	}
+}
+
 static void *worker_testapp_validate_tx(void *arg)
 {
 	struct udphdr *udp_hdr =
@@ -794,7 +832,7 @@ static void *worker_testapp_validate_tx(void *arg)
 	struct generic_data data;
 	void *bufs = NULL;
 
-	if (!bidi_pass)
+	if (!second_step)
 		thread_common_ops(ifobject, bufs);
 
 	for (int i = 0; i < num_frames; i++) {
@@ -814,10 +852,7 @@ static void *worker_testapp_validate_tx(void *arg)
 		      (opt_pkt_count - 1), ifobject->ifname);
 	tx_only_all(ifobject);
 
-	if (test_type != TEST_TYPE_BIDI || bidi_pass) {
-		xsk_socket__delete(ifobject->xsk->xsk);
-		(void)xsk_umem__delete(ifobject->umem->umem);
-	}
+	testapp_cleanup_xsk_res(ifobject);
 	pthread_exit(NULL);
 }
 
@@ -827,7 +862,7 @@ static void *worker_testapp_validate_rx(void *arg)
 	struct pollfd fds[MAX_SOCKS] = { };
 	void *bufs = NULL;
 
-	if (!bidi_pass)
+	if (!second_step)
 		thread_common_ops(ifobject, bufs);
 
 	if (stat_test_type != STAT_TEST_RX_FILL_EMPTY)
@@ -864,10 +899,7 @@ static void *worker_testapp_validate_rx(void *arg)
 	if (test_type == TEST_TYPE_TEARDOWN)
 		print_verbose("Destroying socket\n");
 
-	if ((test_type != TEST_TYPE_BIDI) || bidi_pass) {
-		xsk_socket__delete(ifobject->xsk->xsk);
-		(void)xsk_umem__delete(ifobject->umem->umem);
-	}
+	testapp_cleanup_xsk_res(ifobject);
 	pthread_exit(NULL);
 }
 
@@ -875,6 +907,7 @@ static void testapp_validate(void)
 {
 	struct timespec max_wait = { 0, 0 };
 	bool bidi = test_type == TEST_TYPE_BIDI;
+	bool bpf = test_type == TEST_TYPE_BPF_RES;
 
 	pthread_attr_init(&attr);
 	pthread_attr_setstacksize(&attr, THREAD_STACK);
@@ -908,7 +941,7 @@ static void testapp_validate(void)
 		free(pkt_buf);
 	}
 
-	if (!(test_type == TEST_TYPE_TEARDOWN) && !bidi && !(test_type == TEST_TYPE_STATS))
+	if (!(test_type == TEST_TYPE_TEARDOWN) && !bidi && !bpf && !(test_type == TEST_TYPE_STATS))
 		print_ksft_result();
 }
 
@@ -950,11 +983,11 @@ static void testapp_bidi(void)
 		sigvar = 0;
 		print_verbose("Creating socket\n");
 		testapp_validate();
-		if (!bidi_pass) {
+		if (!second_step) {
 			print_verbose("Switching Tx/Rx vectors\n");
 			swap_vectors(ifdict[1], ifdict[0]);
 		}
-		bidi_pass++;
+		second_step = true;
 	}
 
 	swap_vectors(ifdict[0], ifdict[1]);
@@ -962,6 +995,36 @@ static void testapp_bidi(void)
 	print_ksft_result();
 }
 
+static void swap_xsk_res(void)
+{
+	xsk_socket__delete(ifdict_tx->xsk->xsk);
+	xsk_umem__delete(ifdict_tx->umem->umem);
+	xsk_socket__delete(ifdict_rx->xsk->xsk);
+	xsk_umem__delete(ifdict_rx->umem->umem);
+	ifdict_tx->umem = ifdict_tx->umem_arr[1];
+	ifdict_tx->xsk = ifdict_tx->xsk_arr[1];
+	ifdict_rx->umem = ifdict_rx->umem_arr[1];
+	ifdict_rx->xsk = ifdict_rx->xsk_arr[1];
+}
+
+static void testapp_bpf_res(void)
+{
+	int i;
+
+	for (i = 0; i < MAX_BPF_ITER; i++) {
+		pkt_counter = 0;
+		prev_pkt = -1;
+		sigvar = 0;
+		print_verbose("Creating socket\n");
+		testapp_validate();
+		if (!second_step)
+			swap_xsk_res();
+		second_step = true;
+	}
+
+	print_ksft_result();
+}
+
 static void testapp_stats(void)
 {
 	for (int i = 0; i < STAT_TEST_TYPE_MAX; i++) {
@@ -1025,13 +1088,15 @@ static void run_pkt_test(int mode, int type)
 	/* reset defaults after potential previous test */
 	xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 	pkt_counter = 0;
-	bidi_pass = 0;
+	second_step = 0;
 	prev_pkt = -1;
 	sigvar = 0;
 	stat_test_type = -1;
 	rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
 	frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
 
+	configured_mode = mode;
+
 	switch (mode) {
 	case (TEST_MODE_SKB):
 		xdp_flags |= XDP_FLAGS_SKB_MODE;
@@ -1053,6 +1118,9 @@ static void run_pkt_test(int mode, int type)
 	case TEST_TYPE_BIDI:
 		testapp_bidi();
 		break;
+	case TEST_TYPE_BPF_RES:
+		testapp_bpf_res();
+		break;
 	default:
 		testapp_validate();
 		break;
@@ -1062,6 +1130,7 @@ static void run_pkt_test(int mode, int type)
 int main(int argc, char **argv)
 {
 	struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY };
+	bool failure = false;
 	int i, j;
 
 	if (setrlimit(RLIMIT_MEMLOCK, &_rlim))
@@ -1073,6 +1142,16 @@ int main(int argc, char **argv)
 			exit_with_error(errno);
 
 		ifdict[i]->ifdict_index = i;
+		ifdict[i]->xsk_arr = calloc(2, sizeof(struct xsk_socket_info *));
+		if (!ifdict[i]->xsk_arr) {
+			failure = true;
+			goto cleanup;
+		}
+		ifdict[i]->umem_arr = calloc(2, sizeof(struct xsk_umem_info *));
+		if (!ifdict[i]->umem_arr) {
+			failure = true;
+			goto cleanup;
+		}
 	}
 
 	setlocale(LC_ALL, "");
@@ -1093,13 +1172,19 @@ int main(int argc, char **argv)
 			run_pkt_test(i, j);
 	}
 
+	destroy_sync_resources();
+
+cleanup:
 	for (int i = 0; i < MAX_INTERFACES; i++) {
 		if (ifdict[i]->ns_fd != -1)
 			close(ifdict[i]->ns_fd);
+		free(ifdict[i]->xsk_arr);
+		free(ifdict[i]->umem_arr);
 		free(ifdict[i]);
 	}
 
-	destroy_sync_resources();
+	if (failure)
+		exit_with_error(errno);
 
 	ksft_exit_pass();
 
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index e304229d8a4c..e431ecb9bb95 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -23,6 +23,7 @@
 #define MAX_SOCKS 1
 #define MAX_TEARDOWN_ITER 10
 #define MAX_BIDI_ITER 2
+#define MAX_BPF_ITER 2
 #define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
 			sizeof(struct udphdr))
 #define MIN_PKT_SIZE 64
@@ -63,6 +64,7 @@ enum TEST_TYPES {
 	TEST_TYPE_TEARDOWN,
 	TEST_TYPE_BIDI,
 	TEST_TYPE_STATS,
+	TEST_TYPE_BPF_RES,
 	TEST_TYPE_MAX
 };
 
@@ -77,10 +79,9 @@ enum STAT_TEST_TYPES {
 static int configured_mode = TEST_MODE_UNCONFIGURED;
 static u8 debug_pkt_dump;
 static u32 num_frames;
-static u8 bidi_pass;
+static bool second_step;
 static int test_type;
 
-static int opt_queue;
 static int opt_pkt_count;
 static u8 opt_verbose;
 
@@ -128,6 +129,8 @@ struct ifobject {
 	char ifname[MAX_INTERFACE_NAME_CHARS];
 	char nsname[MAX_INTERFACES_NAMESPACE_CHARS];
 	struct xsk_socket_info *xsk;
+	struct xsk_socket_info **xsk_arr;
+	struct xsk_umem_info **umem_arr;
 	struct xsk_umem_info *umem;
 	void *(*func_ptr)(void *arg);
 	struct flow_vector fv;
-- 
cgit v1.2.3


From 7651910257c8fb1ec76b50bef0330fcf739105c7 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Tue, 30 Mar 2021 00:43:14 +0200
Subject: selftests: xsk: Remove thread attribute
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is really no reason to have a non-default thread stack
size. Remove that.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210329224316.17793-16-maciej.fijalkowski@intel.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 9 ++-------
 tools/testing/selftests/bpf/xdpxceiver.h | 2 --
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 5c0d6dbd076f..ec09b8fe1178 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -764,8 +764,6 @@ static void thread_common_ops(struct ifobject *ifobject, void *bufs)
 	int ctr = 0;
 	int ret;
 
-	pthread_attr_setstacksize(&attr, THREAD_STACK);
-
 	ifobject->ns_fd = switch_namespace(ifobject->nsname);
 
 	if (test_type == TEST_TYPE_BPF_RES)
@@ -909,13 +907,10 @@ static void testapp_validate(void)
 	bool bidi = test_type == TEST_TYPE_BIDI;
 	bool bpf = test_type == TEST_TYPE_BPF_RES;
 
-	pthread_attr_init(&attr);
-	pthread_attr_setstacksize(&attr, THREAD_STACK);
-
 	pthread_mutex_lock(&sync_mutex);
 
 	/*Spawn RX thread */
-	pthread_create(&t0, &attr, ifdict_rx->func_ptr, ifdict_rx);
+	pthread_create(&t0, NULL, ifdict_rx->func_ptr, ifdict_rx);
 
 	if (clock_gettime(CLOCK_REALTIME, &max_wait))
 		exit_with_error(errno);
@@ -927,7 +922,7 @@ static void testapp_validate(void)
 	pthread_mutex_unlock(&sync_mutex);
 
 	/*Spawn TX thread */
-	pthread_create(&t1, &attr, ifdict_tx->func_ptr, ifdict_tx);
+	pthread_create(&t1, NULL, ifdict_tx->func_ptr, ifdict_tx);
 
 	pthread_join(t1, NULL);
 	pthread_join(t0, NULL);
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index e431ecb9bb95..78863820fb81 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -37,7 +37,6 @@
 #define TMOUT_SEC (3)
 #define EOT (-1)
 #define USLEEP_MAX 200000
-#define THREAD_STACK 60000000
 #define SOCK_RECONF_CTR 10
 #define BATCH_SIZE 64
 #define POLL_TMOUT 1000
@@ -152,7 +151,6 @@ static struct ifobject *ifdict_tx;
 pthread_mutex_t sync_mutex;
 pthread_cond_t signal_rx_condition;
 pthread_t t0, t1;
-pthread_attr_t attr;
 
 TAILQ_HEAD(head_s, pkt) head = TAILQ_HEAD_INITIALIZER(head);
 struct head_s *head_p;
-- 
cgit v1.2.3


From 96539f1c5efb0022b94412e8623722aad23dee6b Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Tue, 30 Mar 2021 00:43:15 +0200
Subject: selftests: xsk: Remove mutex and condition variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The usage of the condition variable is broken, and overkill. Replace it
with a pthread barrier.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210329224316.17793-17-maciej.fijalkowski@intel.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 33 +++++---------------------------
 tools/testing/selftests/bpf/xdpxceiver.h |  3 +--
 2 files changed, 6 insertions(+), 30 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index ec09b8fe1178..6d87bdf3574a 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -126,18 +126,6 @@ static void __exit_with_error(int error, const char *file, const char *func, int
 			       test_type == TEST_TYPE_STATS ? "Stats" : "",\
 			       test_type == TEST_TYPE_BPF_RES ? "BPF RES" : ""))
 
-static void init_sync_resources(void)
-{
-	pthread_mutex_init(&sync_mutex, NULL);
-	pthread_cond_init(&signal_rx_condition, NULL);
-}
-
-static void destroy_sync_resources(void)
-{
-	pthread_mutex_destroy(&sync_mutex);
-	pthread_cond_destroy(&signal_rx_condition);
-}
-
 static void *memset32_htonl(void *dest, u32 val, u32 size)
 {
 	u32 *ptr = (u32 *)dest;
@@ -876,9 +864,7 @@ static void *worker_testapp_validate_rx(void *arg)
 	fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
 	fds[0].events = POLLIN;
 
-	pthread_mutex_lock(&sync_mutex);
-	pthread_cond_signal(&signal_rx_condition);
-	pthread_mutex_unlock(&sync_mutex);
+	pthread_barrier_wait(&barr);
 
 	while (1) {
 		if (test_type != TEST_TYPE_STATS) {
@@ -903,24 +889,19 @@ static void *worker_testapp_validate_rx(void *arg)
 
 static void testapp_validate(void)
 {
-	struct timespec max_wait = { 0, 0 };
 	bool bidi = test_type == TEST_TYPE_BIDI;
 	bool bpf = test_type == TEST_TYPE_BPF_RES;
 
-	pthread_mutex_lock(&sync_mutex);
+	if (pthread_barrier_init(&barr, NULL, 2))
+		exit_with_error(errno);
 
 	/*Spawn RX thread */
 	pthread_create(&t0, NULL, ifdict_rx->func_ptr, ifdict_rx);
 
-	if (clock_gettime(CLOCK_REALTIME, &max_wait))
-		exit_with_error(errno);
-	max_wait.tv_sec += TMOUT_SEC;
-
-	if (pthread_cond_timedwait(&signal_rx_condition, &sync_mutex, &max_wait) == ETIMEDOUT)
+	pthread_barrier_wait(&barr);
+	if (pthread_barrier_destroy(&barr))
 		exit_with_error(errno);
 
-	pthread_mutex_unlock(&sync_mutex);
-
 	/*Spawn TX thread */
 	pthread_create(&t1, NULL, ifdict_tx->func_ptr, ifdict_tx);
 
@@ -1160,15 +1141,11 @@ int main(int argc, char **argv)
 
 	ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX);
 
-	init_sync_resources();
-
 	for (i = 0; i < TEST_MODE_MAX; i++) {
 		for (j = 0; j < TEST_TYPE_MAX; j++)
 			run_pkt_test(i, j);
 	}
 
-	destroy_sync_resources();
-
 cleanup:
 	for (int i = 0; i < MAX_INTERFACES; i++) {
 		if (ifdict[i]->ns_fd != -1)
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 78863820fb81..ef219c0785eb 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -148,8 +148,7 @@ static struct ifobject *ifdict_rx;
 static struct ifobject *ifdict_tx;
 
 /*threads*/
-pthread_mutex_t sync_mutex;
-pthread_cond_t signal_rx_condition;
+pthread_barrier_t barr;
 pthread_t t0, t1;
 
 TAILQ_HEAD(head_s, pkt) head = TAILQ_HEAD_INITIALIZER(head);
-- 
cgit v1.2.3


From ae6b6a17800f34dd5215286b44a4e99a0a1cf862 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Tue, 30 Mar 2021 00:43:16 +0200
Subject: selftests: xsk: Remove unused defines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove two unused defines.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210329224316.17793-18-maciej.fijalkowski@intel.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 7 +++----
 tools/testing/selftests/bpf/xdpxceiver.h | 2 --
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 6d87bdf3574a..1135fb980814 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -456,7 +456,7 @@ static void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
 	if (!xsk->outstanding_tx)
 		return;
 
-	if (!NEED_WAKEUP || xsk_ring_prod__needs_wakeup(&xsk->tx))
+	if (xsk_ring_prod__needs_wakeup(&xsk->tx))
 		kick_tx(xsk);
 
 	rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
@@ -544,9 +544,8 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frameptr, int batch_size)
 	xsk_ring_prod__submit(&xsk->tx, batch_size);
 	if (!tx_invalid_test) {
 		xsk->outstanding_tx += batch_size;
-	} else {
-		if (!NEED_WAKEUP || xsk_ring_prod__needs_wakeup(&xsk->tx))
-			kick_tx(xsk);
+	} else if (xsk_ring_prod__needs_wakeup(&xsk->tx)) {
+		kick_tx(xsk);
 	}
 	*frameptr += batch_size;
 	*frameptr %= num_frames;
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index ef219c0785eb..6c428b276ab6 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -34,13 +34,11 @@
 #define IP_PKT_TOS 0x9
 #define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
 #define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
-#define TMOUT_SEC (3)
 #define EOT (-1)
 #define USLEEP_MAX 200000
 #define SOCK_RECONF_CTR 10
 #define BATCH_SIZE 64
 #define POLL_TMOUT 1000
-#define NEED_WAKEUP true
 #define DEFAULT_PKT_CNT 10000
 #define RX_FULL_RXQSIZE 32
 
-- 
cgit v1.2.3


From 35796c1d343871fa75a6e6b0f4584182cbeae6ac Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Tue, 30 Mar 2021 21:15:26 +0200
Subject: batman-adv: Fix misspelled "wont"

checkpatch started to complain about the mispelling of:

  CHECK: 'wont' may be misspelled - perhaps 'won't'?
  #459: FILE: ./net/batman-adv/bat_iv_ogm.c:459:
  +    * - the resulting packet wont be bigger than

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/bat_iv_ogm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index a5e313cd6f44..789f257be24f 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -456,7 +456,7 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet,
 	 * if:
 	 *
 	 * - the send time is within our MAX_AGGREGATION_MS time
-	 * - the resulting packet wont be bigger than
+	 * - the resulting packet won't be bigger than
 	 *   MAX_AGGREGATION_BYTES
 	 * otherwise aggregation is not possible
 	 */
-- 
cgit v1.2.3


From fdb5cc6ab3b6a1c0122d3644a63ef9dc7a610d35 Mon Sep 17 00:00:00 2001
From: Andre Edich <andre.edich@microchip.com>
Date: Mon, 29 Mar 2021 11:45:36 +0200
Subject: net: phy: lan87xx: fix access to wrong register of LAN87xx
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The function lan87xx_config_aneg_ext was introduced to configure
LAN95xxA but as well writes to undocumented register of LAN87xx.
This fix prevents that access.

The function lan87xx_config_aneg_ext gets more suitable for the new
behavior name.

Reported-by: Måns Rullgård <mans@mansr.com>
Fixes: 05b35e7eb9a1 ("smsc95xx: add phylib support")
Signed-off-by: Andre Edich <andre.edich@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/smsc.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
index ddb78fb4d6dc..d8cac02a79b9 100644
--- a/drivers/net/phy/smsc.c
+++ b/drivers/net/phy/smsc.c
@@ -185,10 +185,13 @@ static int lan87xx_config_aneg(struct phy_device *phydev)
 	return genphy_config_aneg(phydev);
 }
 
-static int lan87xx_config_aneg_ext(struct phy_device *phydev)
+static int lan95xx_config_aneg_ext(struct phy_device *phydev)
 {
 	int rc;
 
+	if (phydev->phy_id != 0x0007c0f0) /* not (LAN9500A or LAN9505A) */
+		return lan87xx_config_aneg(phydev);
+
 	/* Extend Manual AutoMDIX timer */
 	rc = phy_read(phydev, PHY_EDPD_CONFIG);
 	if (rc < 0)
@@ -441,7 +444,7 @@ static struct phy_driver smsc_phy_driver[] = {
 	.read_status	= lan87xx_read_status,
 	.config_init	= smsc_phy_config_init,
 	.soft_reset	= smsc_phy_reset,
-	.config_aneg	= lan87xx_config_aneg_ext,
+	.config_aneg	= lan95xx_config_aneg_ext,
 
 	/* IRQ related */
 	.config_intr	= smsc_phy_config_intr,
-- 
cgit v1.2.3


From 2b246b2569cd2ac6ff700d0dce56b8bae29b1842 Mon Sep 17 00:00:00 2001
From: Andreas Roeseler <andreas.a.roeseler@gmail.com>
Date: Mon, 29 Mar 2021 18:45:15 -0700
Subject: icmp: add support for RFC 8335 PROBE

Add definitions for PROBE ICMP types and codes.

Add AFI definitions for IP and IPV6 as specified by IANA

Add a struct to represent the additional header when probing by IP
address (ctype == 3) for use in parsing incoming PROBE messages

Add a struct to represent the entire Interface Identification Object
(IIO) section of an incoming PROBE packet

Signed-off-by: Andreas Roeseler <andreas.a.roeseler@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/icmp.h | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/include/uapi/linux/icmp.h b/include/uapi/linux/icmp.h
index fb169a50895e..222325d1d80e 100644
--- a/include/uapi/linux/icmp.h
+++ b/include/uapi/linux/icmp.h
@@ -20,6 +20,9 @@
 
 #include <linux/types.h>
 #include <asm/byteorder.h>
+#include <linux/in.h>
+#include <linux/if.h>
+#include <linux/in6.h>
 
 #define ICMP_ECHOREPLY		0	/* Echo Reply			*/
 #define ICMP_DEST_UNREACH	3	/* Destination Unreachable	*/
@@ -66,6 +69,23 @@
 #define ICMP_EXC_TTL		0	/* TTL count exceeded		*/
 #define ICMP_EXC_FRAGTIME	1	/* Fragment Reass time exceeded	*/
 
+/* Codes for EXT_ECHO (PROBE) */
+#define ICMP_EXT_ECHO		42
+#define ICMP_EXT_ECHOREPLY	43
+#define ICMP_EXT_MAL_QUERY	1	/* Malformed Query */
+#define ICMP_EXT_NO_IF		2	/* No such Interface */
+#define ICMP_EXT_NO_TABLE_ENT	3	/* No such Table Entry */
+#define ICMP_EXT_MULT_IFS	4	/* Multiple Interfaces Satisfy Query */
+
+/* Constants for EXT_ECHO (PROBE) */
+#define EXT_ECHOREPLY_ACTIVE	(1 << 2)/* active bit in reply message */
+#define EXT_ECHOREPLY_IPV4	(1 << 1)/* ipv4 bit in reply message */
+#define EXT_ECHOREPLY_IPV6	1	/* ipv6 bit in reply message */
+#define EXT_ECHO_CTYPE_NAME	1
+#define EXT_ECHO_CTYPE_INDEX	2
+#define EXT_ECHO_CTYPE_ADDR	3
+#define ICMP_AFI_IP		1	/* Address Family Identifier for ipv4 */
+#define ICMP_AFI_IP6		2	/* Address Family Identifier for ipv6 */
 
 struct icmphdr {
   __u8		type;
@@ -118,4 +138,26 @@ struct icmp_extobj_hdr {
 	__u8		class_type;
 };
 
+/* RFC 8335: 2.1 Header for c-type 3 payload */
+struct icmp_ext_echo_ctype3_hdr {
+	__be16		afi;
+	__u8		addrlen;
+	__u8		reserved;
+};
+
+/* RFC 8335: 2.1 Interface Identification Object */
+struct icmp_ext_echo_iio {
+	struct icmp_extobj_hdr extobj_hdr;
+	union {
+		char name[IFNAMSIZ];
+		__be32 ifindex;
+		struct {
+			struct icmp_ext_echo_ctype3_hdr ctype3_hdr;
+			union {
+				struct in_addr	ipv4_addr;
+				struct in6_addr	ipv6_addr;
+			} ip_addr;
+		} addr;
+	} ident;
+};
 #endif /* _UAPI_LINUX_ICMP_H */
-- 
cgit v1.2.3


From 750f4fc2a12f6632b5aa04526bf57fa06bfe8467 Mon Sep 17 00:00:00 2001
From: Andreas Roeseler <andreas.a.roeseler@gmail.com>
Date: Mon, 29 Mar 2021 18:45:21 -0700
Subject: ICMPV6: add support for RFC 8335 PROBE

Add definitions for the ICMPV6 type of Extended Echo Request and
Extended Echo Reply, as defined by sections 2 and 3 of RFC 8335.

Signed-off-by: Andreas Roeseler <andreas.a.roeseler@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/icmpv6.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/uapi/linux/icmpv6.h b/include/uapi/linux/icmpv6.h
index 0564fd7ccde4..ecaece3af38d 100644
--- a/include/uapi/linux/icmpv6.h
+++ b/include/uapi/linux/icmpv6.h
@@ -140,6 +140,9 @@ struct icmp6hdr {
 #define ICMPV6_UNK_OPTION		2
 #define ICMPV6_HDR_INCOMP		3
 
+/* Codes for EXT_ECHO (PROBE) */
+#define ICMPV6_EXT_ECHO_REQUEST		160
+#define ICMPV6_EXT_ECHO_REPLY		161
 /*
  *	constants for (set|get)sockopt
  */
-- 
cgit v1.2.3


From f1b8fa9fa5865c58c093cde6d782104c22df9088 Mon Sep 17 00:00:00 2001
From: Andreas Roeseler <andreas.a.roeseler@gmail.com>
Date: Mon, 29 Mar 2021 18:45:29 -0700
Subject: net: add sysctl for enabling RFC 8335 PROBE messages

Section 8 of RFC 8335 specifies potential security concerns of
responding to PROBE requests, and states that nodes that support PROBE
functionality MUST be able to enable/disable responses and that
responses MUST be disabled by default

Signed-off-by: Andreas Roeseler <andreas.a.roeseler@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.rst | 6 ++++++
 include/net/netns/ipv4.h               | 1 +
 net/ipv4/sysctl_net_ipv4.c             | 9 +++++++++
 3 files changed, 16 insertions(+)

diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index c7952ac5bd2f..4130bce40765 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -1143,6 +1143,12 @@ icmp_echo_ignore_all - BOOLEAN
 
 	Default: 0
 
+icmp_echo_enable_probe - BOOLEAN
+        If set to one, then the kernel will respond to RFC 8335 PROBE
+        requests sent to it.
+
+        Default: 0
+
 icmp_echo_ignore_broadcasts - BOOLEAN
 	If set non-zero, then the kernel will ignore all ICMP ECHO and
 	TIMESTAMP requests sent to it via broadcast/multicast.
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index d377266d133f..9c8dd424d79b 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -84,6 +84,7 @@ struct netns_ipv4 {
 #endif
 
 	u8 sysctl_icmp_echo_ignore_all;
+	u8 sysctl_icmp_echo_enable_probe;
 	u8 sysctl_icmp_echo_ignore_broadcasts;
 	u8 sysctl_icmp_ignore_bogus_error_responses;
 	u8 sysctl_icmp_errors_use_inbound_ifaddr;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 3a7e5cf5d6cc..e3cb2d96b55e 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -598,6 +598,15 @@ static struct ctl_table ipv4_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dou8vec_minmax,
 	},
+	{
+		.procname	= "icmp_echo_enable_probe",
+		.data		= &init_net.ipv4.sysctl_icmp_echo_enable_probe,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE
+	},
 	{
 		.procname	= "icmp_echo_ignore_broadcasts",
 		.data		= &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts,
-- 
cgit v1.2.3


From 08baf54f01f5b5f06fe242cfc236629c76969cd4 Mon Sep 17 00:00:00 2001
From: Andreas Roeseler <andreas.a.roeseler@gmail.com>
Date: Mon, 29 Mar 2021 18:45:36 -0700
Subject: net: add support for sending RFC 8335 PROBE messages

Modify the ping_supported function to support PROBE message types. This
allows tools such as the ping command in the iputils package to be
modified to send PROBE requests through the existing framework for
sending ping requests.

Signed-off-by: Andreas Roeseler <andreas.a.roeseler@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ping.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 8b943f85fff9..1c9f71a37258 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -453,7 +453,9 @@ EXPORT_SYMBOL_GPL(ping_bind);
 static inline int ping_supported(int family, int type, int code)
 {
 	return (family == AF_INET && type == ICMP_ECHO && code == 0) ||
-	       (family == AF_INET6 && type == ICMPV6_ECHO_REQUEST && code == 0);
+	       (family == AF_INET && type == ICMP_EXT_ECHO && code == 0) ||
+	       (family == AF_INET6 && type == ICMPV6_ECHO_REQUEST && code == 0) ||
+	       (family == AF_INET6 && type == ICMPV6_EXT_ECHO_REQUEST && code == 0);
 }
 
 /*
-- 
cgit v1.2.3


From 504a40113cc4f329dd75bbf6e4b060603224d814 Mon Sep 17 00:00:00 2001
From: Andreas Roeseler <andreas.a.roeseler@gmail.com>
Date: Mon, 29 Mar 2021 18:45:43 -0700
Subject: ipv6: add ipv6_dev_find to stubs

Add ipv6_dev_find to ipv6_stub to allow lookup of net_devices by IPV6
address in net/ipv4/icmp.c.

Signed-off-by: Andreas Roeseler <andreas.a.roeseler@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6_stubs.h | 2 ++
 net/ipv6/addrconf_core.c | 7 +++++++
 net/ipv6/af_inet6.c      | 1 +
 3 files changed, 10 insertions(+)

diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index 8fce558b5fea..afbce90c4480 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -66,6 +66,8 @@ struct ipv6_stub {
 
 	int (*ipv6_fragment)(struct net *net, struct sock *sk, struct sk_buff *skb,
 			     int (*output)(struct net *, struct sock *, struct sk_buff *));
+	struct net_device *(*ipv6_dev_find)(struct net *net, const struct in6_addr *addr,
+					    struct net_device *dev);
 };
 extern const struct ipv6_stub *ipv6_stub __read_mostly;
 
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index a36626afbc02..1d4054bb345b 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -198,6 +198,12 @@ static int eafnosupport_ipv6_fragment(struct net *net, struct sock *sk, struct s
 	return -EAFNOSUPPORT;
 }
 
+static struct net_device *eafnosupport_ipv6_dev_find(struct net *net, const struct in6_addr *addr,
+						     struct net_device *dev)
+{
+	return ERR_PTR(-EAFNOSUPPORT);
+}
+
 const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
 	.ipv6_dst_lookup_flow = eafnosupport_ipv6_dst_lookup_flow,
 	.ipv6_route_input  = eafnosupport_ipv6_route_input,
@@ -209,6 +215,7 @@ const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
 	.fib6_nh_init	   = eafnosupport_fib6_nh_init,
 	.ip6_del_rt	   = eafnosupport_ip6_del_rt,
 	.ipv6_fragment	   = eafnosupport_ipv6_fragment,
+	.ipv6_dev_find     = eafnosupport_ipv6_dev_find,
 };
 EXPORT_SYMBOL_GPL(ipv6_stub);
 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3c9bacffc9c3..4f7ca5807046 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1032,6 +1032,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
 #endif
 	.nd_tbl	= &nd_tbl,
 	.ipv6_fragment = ip6_fragment,
+	.ipv6_dev_find = ipv6_dev_find,
 };
 
 static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
-- 
cgit v1.2.3


From d329ea5bd8845f0b196bf41b18b6173340d6e0e4 Mon Sep 17 00:00:00 2001
From: Andreas Roeseler <andreas.a.roeseler@gmail.com>
Date: Mon, 29 Mar 2021 18:45:51 -0700
Subject: icmp: add response to RFC 8335 PROBE messages

Modify the icmp_rcv function to check PROBE messages and call icmp_echo
if a PROBE request is detected.

Modify the existing icmp_echo function to respond ot both ping and PROBE
requests.

This was tested using a custom modification to the iputils package and
wireshark. It supports IPV4 probing by name, ifindex, and probing by
both IPV4 and IPV6 addresses. It currently does not support responding
to probes off the proxy node (see RFC 8335 Section 2).

The modification to the iputils package is still in development and can
be found here: https://github.com/Juniper-Clinic-2020/iputils.git. It
supports full sending functionality of PROBE requests, but currently
does not parse the response messages, which is why Wireshark is required
to verify the sent and recieved PROBE messages. The modification adds
the ``-e'' flag to the command which allows the user to specify the
interface identifier to query the probed host. An example usage would be
<./ping -4 -e 1 [destination]> to send a PROBE request of ifindex 1 to the
destination node.

Signed-off-by: Andreas Roeseler <andreas.a.roeseler@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/icmp.c | 134 ++++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 121 insertions(+), 13 deletions(-)

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 616e2dc1c8fa..76990e13a2f9 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -971,7 +971,7 @@ static bool icmp_redirect(struct sk_buff *skb)
 }
 
 /*
- *	Handle ICMP_ECHO ("ping") requests.
+ *	Handle ICMP_ECHO ("ping") and ICMP_EXT_ECHO ("PROBE") requests.
  *
  *	RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo
  *		  requests.
@@ -979,27 +979,125 @@ static bool icmp_redirect(struct sk_buff *skb)
  *		  included in the reply.
  *	RFC 1812: 4.3.3.6 SHOULD have a config option for silently ignoring
  *		  echo requests, MUST have default=NOT.
+ *	RFC 8335: 8 MUST have a config option to enable/disable ICMP
+ *		  Extended Echo Functionality, MUST be disabled by default
  *	See also WRT handling of options once they are done and working.
  */
 
 static bool icmp_echo(struct sk_buff *skb)
 {
+	struct icmp_ext_hdr *ext_hdr, _ext_hdr;
+	struct icmp_ext_echo_iio *iio, _iio;
+	struct icmp_bxm icmp_param;
+	struct net_device *dev;
+	char buff[IFNAMSIZ];
 	struct net *net;
+	u16 ident_len;
+	u8 status;
 
 	net = dev_net(skb_dst(skb)->dev);
-	if (!net->ipv4.sysctl_icmp_echo_ignore_all) {
-		struct icmp_bxm icmp_param;
+	/* should there be an ICMP stat for ignored echos? */
+	if (net->ipv4.sysctl_icmp_echo_ignore_all)
+		return true;
+
+	icmp_param.data.icmph	   = *icmp_hdr(skb);
+	icmp_param.skb		   = skb;
+	icmp_param.offset	   = 0;
+	icmp_param.data_len	   = skb->len;
+	icmp_param.head_len	   = sizeof(struct icmphdr);
 
-		icmp_param.data.icmph	   = *icmp_hdr(skb);
+	if (icmp_param.data.icmph.type == ICMP_ECHO) {
 		icmp_param.data.icmph.type = ICMP_ECHOREPLY;
-		icmp_param.skb		   = skb;
-		icmp_param.offset	   = 0;
-		icmp_param.data_len	   = skb->len;
-		icmp_param.head_len	   = sizeof(struct icmphdr);
-		icmp_reply(&icmp_param, skb);
+		goto send_reply;
 	}
-	/* should there be an ICMP stat for ignored echos? */
-	return true;
+	if (!net->ipv4.sysctl_icmp_echo_enable_probe)
+		return true;
+	/* We currently only support probing interfaces on the proxy node
+	 * Check to ensure L-bit is set
+	 */
+	if (!(ntohs(icmp_param.data.icmph.un.echo.sequence) & 1))
+		return true;
+	/* Clear status bits in reply message */
+	icmp_param.data.icmph.un.echo.sequence &= htons(0xFF00);
+	icmp_param.data.icmph.type = ICMP_EXT_ECHOREPLY;
+	ext_hdr = skb_header_pointer(skb, 0, sizeof(_ext_hdr), &_ext_hdr);
+	/* Size of iio is class_type dependent.
+	 * Only check header here and assign length based on ctype in the switch statement
+	 */
+	iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr), &_iio);
+	if (!ext_hdr || !iio)
+		goto send_mal_query;
+	if (ntohs(iio->extobj_hdr.length) <= sizeof(iio->extobj_hdr))
+		goto send_mal_query;
+	ident_len = ntohs(iio->extobj_hdr.length) - sizeof(iio->extobj_hdr);
+	status = 0;
+	dev = NULL;
+	switch (iio->extobj_hdr.class_type) {
+	case EXT_ECHO_CTYPE_NAME:
+		iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(_iio), &_iio);
+		if (ident_len >= IFNAMSIZ)
+			goto send_mal_query;
+		memset(buff, 0, sizeof(buff));
+		memcpy(buff, &iio->ident.name, ident_len);
+		dev = dev_get_by_name(net, buff);
+		break;
+	case EXT_ECHO_CTYPE_INDEX:
+		iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr) +
+					 sizeof(iio->ident.ifindex), &_iio);
+		if (ident_len != sizeof(iio->ident.ifindex))
+			goto send_mal_query;
+		dev = dev_get_by_index(net, ntohl(iio->ident.ifindex));
+		break;
+	case EXT_ECHO_CTYPE_ADDR:
+		if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
+				 iio->ident.addr.ctype3_hdr.addrlen)
+			goto send_mal_query;
+		switch (ntohs(iio->ident.addr.ctype3_hdr.afi)) {
+		case ICMP_AFI_IP:
+			iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr) +
+						 sizeof(struct in_addr), &_iio);
+			if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
+					 sizeof(struct in_addr))
+				goto send_mal_query;
+			dev = ip_dev_find(net, iio->ident.addr.ip_addr.ipv4_addr.s_addr);
+			break;
+#if IS_ENABLED(CONFIG_IPV6)
+		case ICMP_AFI_IP6:
+			iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(_iio), &_iio);
+			if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
+					 sizeof(struct in6_addr))
+				goto send_mal_query;
+			dev = ipv6_stub->ipv6_dev_find(net, &iio->ident.addr.ip_addr.ipv6_addr, dev);
+			if (dev)
+				dev_hold(dev);
+			break;
+#endif
+		default:
+			goto send_mal_query;
+		}
+		break;
+	default:
+		goto send_mal_query;
+	}
+	if (!dev) {
+		icmp_param.data.icmph.code = ICMP_EXT_NO_IF;
+		goto send_reply;
+	}
+	/* Fill bits in reply message */
+	if (dev->flags & IFF_UP)
+		status |= EXT_ECHOREPLY_ACTIVE;
+	if (__in_dev_get_rcu(dev) && __in_dev_get_rcu(dev)->ifa_list)
+		status |= EXT_ECHOREPLY_IPV4;
+	if (!list_empty(&rcu_dereference(dev->ip6_ptr)->addr_list))
+		status |= EXT_ECHOREPLY_IPV6;
+	dev_put(dev);
+	icmp_param.data.icmph.un.echo.sequence |= htons(status);
+send_reply:
+	icmp_reply(&icmp_param, skb);
+		return true;
+send_mal_query:
+	icmp_param.data.icmph.code = ICMP_EXT_MAL_QUERY;
+	goto send_reply;
 }
 
 /*
@@ -1088,6 +1186,16 @@ int icmp_rcv(struct sk_buff *skb)
 	icmph = icmp_hdr(skb);
 
 	ICMPMSGIN_INC_STATS(net, icmph->type);
+
+	/* Check for ICMP Extended Echo (PROBE) messages */
+	if (icmph->type == ICMP_EXT_ECHO) {
+		/* We can't use icmp_pointers[].handler() because it is an array of
+		 * size NR_ICMP_TYPES + 1 (19 elements) and PROBE has code 42.
+		 */
+		success = icmp_echo(skb);
+		goto success_check;
+	}
+
 	/*
 	 *	18 is the highest 'known' ICMP type. Anything else is a mystery
 	 *
@@ -1097,7 +1205,6 @@ int icmp_rcv(struct sk_buff *skb)
 	if (icmph->type > NR_ICMP_TYPES)
 		goto error;
 
-
 	/*
 	 *	Parse the ICMP message
 	 */
@@ -1123,7 +1230,7 @@ int icmp_rcv(struct sk_buff *skb)
 	}
 
 	success = icmp_pointers[icmph->type].handler(skb);
-
+success_check:
 	if (success)  {
 		consume_skb(skb);
 		return NET_RX_SUCCESS;
@@ -1340,6 +1447,7 @@ static int __net_init icmp_sk_init(struct net *net)
 
 	/* Control parameters for ECHO replies. */
 	net->ipv4.sysctl_icmp_echo_ignore_all = 0;
+	net->ipv4.sysctl_icmp_echo_enable_probe = 0;
 	net->ipv4.sysctl_icmp_echo_ignore_broadcasts = 1;
 
 	/* Control parameter - ignore bogus broadcast responses? */
-- 
cgit v1.2.3


From 77053fb7b428099bfdc2ec81a923587a2692d84b Mon Sep 17 00:00:00 2001
From: Shixin Liu <liushixin2@huawei.com>
Date: Tue, 30 Mar 2021 10:24:14 +0800
Subject: mISDN: Use DEFINE_SPINLOCK() for spinlock

spinlock can be initialized automatically with DEFINE_SPINLOCK()
rather than explicitly calling spin_lock_init().

Changelog:
From v1:
1. fix the mistake reported by kernel test robot.

Signed-off-by: Shixin Liu <liushixin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/hardware/mISDN/hfcmulti.c | 7 ++-----
 drivers/isdn/mISDN/dsp_core.c          | 3 +--
 drivers/isdn/mISDN/l1oip_core.c        | 3 +--
 3 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c
index 14092152b786..4f7eaa17fb27 100644
--- a/drivers/isdn/hardware/mISDN/hfcmulti.c
+++ b/drivers/isdn/hardware/mISDN/hfcmulti.c
@@ -173,13 +173,13 @@
 #define	MAX_FRAGS	(32 * MAX_CARDS)
 
 static LIST_HEAD(HFClist);
-static spinlock_t HFClock; /* global hfc list lock */
+static DEFINE_SPINLOCK(HFClock); /* global hfc list lock */
 
 static void ph_state_change(struct dchannel *);
 
 static struct hfc_multi *syncmaster;
 static int plxsd_master; /* if we have a master card (yet) */
-static spinlock_t plx_lock; /* may not acquire other lock inside */
+static DEFINE_SPINLOCK(plx_lock); /* may not acquire other lock inside */
 
 #define	TYP_E1		1
 #define	TYP_4S		4
@@ -5480,9 +5480,6 @@ HFCmulti_init(void)
 	printk(KERN_DEBUG "%s: IRQ_DEBUG IS ENABLED!\n", __func__);
 #endif
 
-	spin_lock_init(&HFClock);
-	spin_lock_init(&plx_lock);
-
 	if (debug & DEBUG_HFCMULTI_INIT)
 		printk(KERN_DEBUG "%s: init entered\n", __func__);
 
diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c
index 4946ea14bf74..8766095cd6e7 100644
--- a/drivers/isdn/mISDN/dsp_core.c
+++ b/drivers/isdn/mISDN/dsp_core.c
@@ -176,7 +176,7 @@ MODULE_LICENSE("GPL");
 
 /*int spinnest = 0;*/
 
-spinlock_t dsp_lock; /* global dsp lock */
+DEFINE_SPINLOCK(dsp_lock); /* global dsp lock */
 struct list_head dsp_ilist;
 struct list_head conf_ilist;
 int dsp_debug;
@@ -1169,7 +1169,6 @@ static int __init dsp_init(void)
 	printk(KERN_INFO "mISDN_dsp: DSP clocks every %d samples. This equals "
 	       "%d jiffies.\n", dsp_poll, dsp_tics);
 
-	spin_lock_init(&dsp_lock);
 	INIT_LIST_HEAD(&dsp_ilist);
 	INIT_LIST_HEAD(&conf_ilist);
 
diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c
index facbd886ee1c..62fad8f1fc42 100644
--- a/drivers/isdn/mISDN/l1oip_core.c
+++ b/drivers/isdn/mISDN/l1oip_core.c
@@ -229,7 +229,7 @@
 static const char *l1oip_revision = "2.00";
 
 static int l1oip_cnt;
-static spinlock_t l1oip_lock;
+static DEFINE_SPINLOCK(l1oip_lock);
 static struct list_head l1oip_ilist;
 
 #define MAX_CARDS	16
@@ -1441,7 +1441,6 @@ l1oip_init(void)
 	       l1oip_revision);
 
 	INIT_LIST_HEAD(&l1oip_ilist);
-	spin_lock_init(&l1oip_lock);
 
 	if (l1oip_4bit_alloc(ulaw))
 		return -ENOMEM;
-- 
cgit v1.2.3


From 5979415d00d42fd66d4227a34a6fa528003ae88a Mon Sep 17 00:00:00 2001
From: Shixin Liu <liushixin2@huawei.com>
Date: Tue, 30 Mar 2021 10:24:15 +0800
Subject: mISDN: Use LIST_HEAD() for list_head

There's no need to declare a list and then init it manually,
just use the LIST_HEAD() macro.

Signed-off-by: Shixin Liu <liushixin2@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/mISDN/dsp_core.c   | 7 ++-----
 drivers/isdn/mISDN/l1oip_core.c | 4 +---
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c
index 8766095cd6e7..386084530c2f 100644
--- a/drivers/isdn/mISDN/dsp_core.c
+++ b/drivers/isdn/mISDN/dsp_core.c
@@ -177,8 +177,8 @@ MODULE_LICENSE("GPL");
 /*int spinnest = 0;*/
 
 DEFINE_SPINLOCK(dsp_lock); /* global dsp lock */
-struct list_head dsp_ilist;
-struct list_head conf_ilist;
+LIST_HEAD(dsp_ilist);
+LIST_HEAD(conf_ilist);
 int dsp_debug;
 int dsp_options;
 int dsp_poll, dsp_tics;
@@ -1169,9 +1169,6 @@ static int __init dsp_init(void)
 	printk(KERN_INFO "mISDN_dsp: DSP clocks every %d samples. This equals "
 	       "%d jiffies.\n", dsp_poll, dsp_tics);
 
-	INIT_LIST_HEAD(&dsp_ilist);
-	INIT_LIST_HEAD(&conf_ilist);
-
 	/* init conversion tables */
 	dsp_audio_generate_law_tables();
 	dsp_silence = (dsp_options & DSP_OPT_ULAW) ? 0xff : 0x2a;
diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c
index 62fad8f1fc42..2c40412466e6 100644
--- a/drivers/isdn/mISDN/l1oip_core.c
+++ b/drivers/isdn/mISDN/l1oip_core.c
@@ -230,7 +230,7 @@ static const char *l1oip_revision = "2.00";
 
 static int l1oip_cnt;
 static DEFINE_SPINLOCK(l1oip_lock);
-static struct list_head l1oip_ilist;
+static LIST_HEAD(l1oip_ilist);
 
 #define MAX_CARDS	16
 static u_int type[MAX_CARDS];
@@ -1440,8 +1440,6 @@ l1oip_init(void)
 	printk(KERN_INFO "mISDN: Layer-1-over-IP driver Rev. %s\n",
 	       l1oip_revision);
 
-	INIT_LIST_HEAD(&l1oip_ilist);
-
 	if (l1oip_4bit_alloc(ulaw))
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From 913d55037616659c04763e756f948fcbaef0bbee Mon Sep 17 00:00:00 2001
From: He Fengqing <hefengqing@huawei.com>
Date: Tue, 30 Mar 2021 02:48:43 +0000
Subject: bpf: Remove unused bpf_load_pointer

Remove unused bpf_load_pointer function in filter.h. The last user of it has
been removed with 24dea04767e6 ("bpf, x32: remove ld_abs/ld_ind").

Signed-off-by: He Fengqing <hefengqing@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20210330024843.3479844-1-hefengqing@huawei.com
---
 include/linux/filter.h | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index eecfd82db648..9a09547bc7ba 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1246,15 +1246,6 @@ static inline u16 bpf_anc_helper(const struct sock_filter *ftest)
 void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb,
 					   int k, unsigned int size);
 
-static inline void *bpf_load_pointer(const struct sk_buff *skb, int k,
-				     unsigned int size, void *buffer)
-{
-	if (k >= 0)
-		return skb_header_pointer(skb, k, size, buffer);
-
-	return bpf_internal_load_pointer_neg_helper(skb, k, size);
-}
-
 static inline int bpf_tell_extensions(void)
 {
 	return SKF_AD_MAX;
-- 
cgit v1.2.3


From 1c137d4777b5b66c64854ba469ab0650ff0dc5bd Mon Sep 17 00:00:00 2001
From: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Date: Tue, 30 Mar 2021 10:46:53 +0800
Subject: stmmac: intel: add cross time-stamping freq difference adjustment

Cross time-stamping mechanism used in certain instance of Intel mGbE
may run at different clock frequency in comparison to the clock
frequency used by processor, so we introduce cross T/S frequency
adjustment to ensure TSC calculation is correct when processor got the
cross time-stamps.

Signed-off-by: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 27 +++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 08b4852eed4c..3d9a57043af2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -22,8 +22,13 @@
 #define PCH_PTP_CLK_FREQ_19_2MHZ	(GMAC_GPO0)
 #define PCH_PTP_CLK_FREQ_200MHZ		(0)
 
+/* Cross-timestamping defines */
+#define ART_CPUID_LEAF		0x15
+#define EHL_PSE_ART_MHZ		19200000
+
 struct intel_priv_data {
 	int mdio_adhoc_addr;	/* mdio address for serdes & etc */
+	unsigned long crossts_adj;
 	bool is_pse;
 };
 
@@ -340,9 +345,26 @@ static int intel_crosststamp(ktime_t *device,
 		*system = convert_art_to_tsc(art_time);
 	}
 
+	system->cycles *= intel_priv->crossts_adj;
+
 	return 0;
 }
 
+static void intel_mgbe_pse_crossts_adj(struct intel_priv_data *intel_priv,
+				       int base)
+{
+	if (boot_cpu_has(X86_FEATURE_ART)) {
+		unsigned int art_freq;
+
+		/* On systems that support ART, ART frequency can be obtained
+		 * from ECX register of CPUID leaf (0x15).
+		 */
+		art_freq = cpuid_ecx(ART_CPUID_LEAF);
+		do_div(art_freq, base);
+		intel_priv->crossts_adj = art_freq;
+	}
+}
+
 static void common_default_data(struct plat_stmmacenet_data *plat)
 {
 	plat->clk_csr = 2;	/* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */
@@ -551,6 +573,8 @@ static int ehl_pse0_common_data(struct pci_dev *pdev,
 	plat->bus_id = 2;
 	plat->addr64 = 32;
 
+	intel_mgbe_pse_crossts_adj(intel_priv, EHL_PSE_ART_MHZ);
+
 	return ehl_common_data(pdev, plat);
 }
 
@@ -587,6 +611,8 @@ static int ehl_pse1_common_data(struct pci_dev *pdev,
 	plat->bus_id = 3;
 	plat->addr64 = 32;
 
+	intel_mgbe_pse_crossts_adj(intel_priv, EHL_PSE_ART_MHZ);
+
 	return ehl_common_data(pdev, plat);
 }
 
@@ -913,6 +939,7 @@ static int intel_eth_pci_probe(struct pci_dev *pdev,
 
 	plat->bsp_priv = intel_priv;
 	intel_priv->mdio_adhoc_addr = INTEL_MGBE_ADHOC_ADDR;
+	intel_priv->crossts_adj = 1;
 
 	/* Initialize all MSI vectors to invalid so that it can be set
 	 * according to platform data settings below.
-- 
cgit v1.2.3


From cda1893e9f7c1d78e391dbb6ef1798cd32354113 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Tue, 30 Mar 2021 20:55:39 +0800
Subject: net: mhi: remove pointless conditional before kfree_skb()

It already has null pointer check in kfree_skb(),
remove pointless pointer check before kfree_skb().

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mhi/net.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/mhi/net.c b/drivers/net/mhi/net.c
index 5ec7a29f668d..0d8293a47a56 100644
--- a/drivers/net/mhi/net.c
+++ b/drivers/net/mhi/net.c
@@ -361,8 +361,7 @@ static void mhi_net_remove(struct mhi_device *mhi_dev)
 
 	mhi_unprepare_from_transfer(mhi_netdev->mdev);
 
-	if (mhi_netdev->skbagg_head)
-		kfree_skb(mhi_netdev->skbagg_head);
+	kfree_skb(mhi_netdev->skbagg_head);
 
 	free_netdev(mhi_netdev->ndev);
 }
-- 
cgit v1.2.3


From 235fc0e36d35270e1ff426f26bbea6154b8863a5 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 26 Mar 2021 19:43:48 +0000
Subject: bpf: Remove redundant assignment of variable id

The variable id is being assigned a value that is never read, the
assignment is redundant and can be removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20210326194348.623782-1-colin.king@canonical.com
---
 kernel/bpf/btf.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index ec8afc4bc560..0600ed325fa0 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -789,7 +789,6 @@ static const struct btf_type *btf_type_skip_qualifiers(const struct btf *btf,
 
 	while (btf_type_is_modifier(t) &&
 	       BTF_INFO_KIND(t->info) != BTF_KIND_TYPEDEF) {
-		id = t->type;
 		t = btf_type_by_id(btf, t->type);
 	}
 
-- 
cgit v1.2.3


From db3187ae21bb0cff44430a7510cf2d2b23e41cd8 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 25 Mar 2021 18:25:05 +0100
Subject: netfilter: nf_log_ipv4: rename to nf_log_syslog

Netfilter has multiple log modules:
 nf_log_arp
 nf_log_bridge
 nf_log_ipv4
 nf_log_ipv6
 nf_log_netdev
 nfnetlink_log
 nf_log_common

With the exception of nfnetlink_log (packet is sent to userspace for
dissection/logging), all of them log to the kernel ringbuffer.

This is the first part of a series to merge all modules except
nfnetlink_log into a single module: nf_log_syslog.

This allows to reduce code.  After the series, only two log modules remain:
nfnetlink_log and nf_log_syslog. The latter provides the same
functionality as the old per-af log modules.

This renames nf_log_ipv4 to nf_log_syslog.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/Kconfig       |   5 +-
 net/ipv4/netfilter/Makefile      |   1 -
 net/ipv4/netfilter/nf_log_ipv4.c | 395 ---------------------------------------
 net/netfilter/Kconfig            |  14 +-
 net/netfilter/Makefile           |   1 +
 net/netfilter/nf_log_syslog.c    | 391 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 409 insertions(+), 398 deletions(-)
 delete mode 100644 net/ipv4/netfilter/nf_log_ipv4.c
 create mode 100644 net/netfilter/nf_log_syslog.c

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index a2f4f894be2b..aadb98e43fb1 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -81,7 +81,10 @@ config NF_LOG_ARP
 config NF_LOG_IPV4
 	tristate "IPv4 packet logging"
 	default m if NETFILTER_ADVANCED=n
-	select NF_LOG_COMMON
+	select NF_LOG_SYSLOG
+	help
+	This is a backwards-compat option for the user's convenience
+	(e.g. when running oldconfig). It selects CONFIG_NF_LOG_SYSLOG.
 
 config NF_REJECT_IPV4
 	tristate "IPv4 packet rejection"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 7c497c78105f..abd133048b42 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -11,7 +11,6 @@ obj-$(CONFIG_NF_TPROXY_IPV4) += nf_tproxy_ipv4.o
 
 # logging
 obj-$(CONFIG_NF_LOG_ARP) += nf_log_arp.o
-obj-$(CONFIG_NF_LOG_IPV4) += nf_log_ipv4.o
 
 # reject
 obj-$(CONFIG_NF_REJECT_IPV4) += nf_reject_ipv4.o
diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c
deleted file mode 100644
index d07583fac8f8..000000000000
--- a/net/ipv4/netfilter/nf_log_ipv4.c
+++ /dev/null
@@ -1,395 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/skbuff.h>
-#include <linux/if_arp.h>
-#include <linux/ip.h>
-#include <net/ipv6.h>
-#include <net/icmp.h>
-#include <net/udp.h>
-#include <net/tcp.h>
-#include <net/route.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter/xt_LOG.h>
-#include <net/netfilter/nf_log.h>
-
-static const struct nf_loginfo default_loginfo = {
-	.type	= NF_LOG_TYPE_LOG,
-	.u = {
-		.log = {
-			.level	  = LOGLEVEL_NOTICE,
-			.logflags = NF_LOG_DEFAULT_MASK,
-		},
-	},
-};
-
-/* One level of recursion won't kill us */
-static void dump_ipv4_packet(struct net *net, struct nf_log_buf *m,
-			     const struct nf_loginfo *info,
-			     const struct sk_buff *skb, unsigned int iphoff)
-{
-	struct iphdr _iph;
-	const struct iphdr *ih;
-	unsigned int logflags;
-
-	if (info->type == NF_LOG_TYPE_LOG)
-		logflags = info->u.log.logflags;
-	else
-		logflags = NF_LOG_DEFAULT_MASK;
-
-	ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
-	if (ih == NULL) {
-		nf_log_buf_add(m, "TRUNCATED");
-		return;
-	}
-
-	/* Important fields:
-	 * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */
-	/* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */
-	nf_log_buf_add(m, "SRC=%pI4 DST=%pI4 ", &ih->saddr, &ih->daddr);
-
-	/* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
-	nf_log_buf_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
-		       ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK,
-		       ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
-
-	/* Max length: 6 "CE DF MF " */
-	if (ntohs(ih->frag_off) & IP_CE)
-		nf_log_buf_add(m, "CE ");
-	if (ntohs(ih->frag_off) & IP_DF)
-		nf_log_buf_add(m, "DF ");
-	if (ntohs(ih->frag_off) & IP_MF)
-		nf_log_buf_add(m, "MF ");
-
-	/* Max length: 11 "FRAG:65535 " */
-	if (ntohs(ih->frag_off) & IP_OFFSET)
-		nf_log_buf_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
-
-	if ((logflags & NF_LOG_IPOPT) &&
-	    ih->ihl * 4 > sizeof(struct iphdr)) {
-		const unsigned char *op;
-		unsigned char _opt[4 * 15 - sizeof(struct iphdr)];
-		unsigned int i, optsize;
-
-		optsize = ih->ihl * 4 - sizeof(struct iphdr);
-		op = skb_header_pointer(skb, iphoff+sizeof(_iph),
-					optsize, _opt);
-		if (op == NULL) {
-			nf_log_buf_add(m, "TRUNCATED");
-			return;
-		}
-
-		/* Max length: 127 "OPT (" 15*4*2chars ") " */
-		nf_log_buf_add(m, "OPT (");
-		for (i = 0; i < optsize; i++)
-			nf_log_buf_add(m, "%02X", op[i]);
-		nf_log_buf_add(m, ") ");
-	}
-
-	switch (ih->protocol) {
-	case IPPROTO_TCP:
-		if (nf_log_dump_tcp_header(m, skb, ih->protocol,
-					   ntohs(ih->frag_off) & IP_OFFSET,
-					   iphoff+ih->ihl*4, logflags))
-			return;
-		break;
-	case IPPROTO_UDP:
-	case IPPROTO_UDPLITE:
-		if (nf_log_dump_udp_header(m, skb, ih->protocol,
-					   ntohs(ih->frag_off) & IP_OFFSET,
-					   iphoff+ih->ihl*4))
-			return;
-		break;
-	case IPPROTO_ICMP: {
-		struct icmphdr _icmph;
-		const struct icmphdr *ich;
-		static const size_t required_len[NR_ICMP_TYPES+1]
-			= { [ICMP_ECHOREPLY] = 4,
-			    [ICMP_DEST_UNREACH]
-			    = 8 + sizeof(struct iphdr),
-			    [ICMP_SOURCE_QUENCH]
-			    = 8 + sizeof(struct iphdr),
-			    [ICMP_REDIRECT]
-			    = 8 + sizeof(struct iphdr),
-			    [ICMP_ECHO] = 4,
-			    [ICMP_TIME_EXCEEDED]
-			    = 8 + sizeof(struct iphdr),
-			    [ICMP_PARAMETERPROB]
-			    = 8 + sizeof(struct iphdr),
-			    [ICMP_TIMESTAMP] = 20,
-			    [ICMP_TIMESTAMPREPLY] = 20,
-			    [ICMP_ADDRESS] = 12,
-			    [ICMP_ADDRESSREPLY] = 12 };
-
-		/* Max length: 11 "PROTO=ICMP " */
-		nf_log_buf_add(m, "PROTO=ICMP ");
-
-		if (ntohs(ih->frag_off) & IP_OFFSET)
-			break;
-
-		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
-		ich = skb_header_pointer(skb, iphoff + ih->ihl * 4,
-					 sizeof(_icmph), &_icmph);
-		if (ich == NULL) {
-			nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
-				       skb->len - iphoff - ih->ihl*4);
-			break;
-		}
-
-		/* Max length: 18 "TYPE=255 CODE=255 " */
-		nf_log_buf_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code);
-
-		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
-		if (ich->type <= NR_ICMP_TYPES &&
-		    required_len[ich->type] &&
-		    skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) {
-			nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
-				       skb->len - iphoff - ih->ihl*4);
-			break;
-		}
-
-		switch (ich->type) {
-		case ICMP_ECHOREPLY:
-		case ICMP_ECHO:
-			/* Max length: 19 "ID=65535 SEQ=65535 " */
-			nf_log_buf_add(m, "ID=%u SEQ=%u ",
-				       ntohs(ich->un.echo.id),
-				       ntohs(ich->un.echo.sequence));
-			break;
-
-		case ICMP_PARAMETERPROB:
-			/* Max length: 14 "PARAMETER=255 " */
-			nf_log_buf_add(m, "PARAMETER=%u ",
-				       ntohl(ich->un.gateway) >> 24);
-			break;
-		case ICMP_REDIRECT:
-			/* Max length: 24 "GATEWAY=255.255.255.255 " */
-			nf_log_buf_add(m, "GATEWAY=%pI4 ", &ich->un.gateway);
-			fallthrough;
-		case ICMP_DEST_UNREACH:
-		case ICMP_SOURCE_QUENCH:
-		case ICMP_TIME_EXCEEDED:
-			/* Max length: 3+maxlen */
-			if (!iphoff) { /* Only recurse once. */
-				nf_log_buf_add(m, "[");
-				dump_ipv4_packet(net, m, info, skb,
-					    iphoff + ih->ihl*4+sizeof(_icmph));
-				nf_log_buf_add(m, "] ");
-			}
-
-			/* Max length: 10 "MTU=65535 " */
-			if (ich->type == ICMP_DEST_UNREACH &&
-			    ich->code == ICMP_FRAG_NEEDED) {
-				nf_log_buf_add(m, "MTU=%u ",
-					       ntohs(ich->un.frag.mtu));
-			}
-		}
-		break;
-	}
-	/* Max Length */
-	case IPPROTO_AH: {
-		struct ip_auth_hdr _ahdr;
-		const struct ip_auth_hdr *ah;
-
-		if (ntohs(ih->frag_off) & IP_OFFSET)
-			break;
-
-		/* Max length: 9 "PROTO=AH " */
-		nf_log_buf_add(m, "PROTO=AH ");
-
-		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
-		ah = skb_header_pointer(skb, iphoff+ih->ihl*4,
-					sizeof(_ahdr), &_ahdr);
-		if (ah == NULL) {
-			nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
-				       skb->len - iphoff - ih->ihl*4);
-			break;
-		}
-
-		/* Length: 15 "SPI=0xF1234567 " */
-		nf_log_buf_add(m, "SPI=0x%x ", ntohl(ah->spi));
-		break;
-	}
-	case IPPROTO_ESP: {
-		struct ip_esp_hdr _esph;
-		const struct ip_esp_hdr *eh;
-
-		/* Max length: 10 "PROTO=ESP " */
-		nf_log_buf_add(m, "PROTO=ESP ");
-
-		if (ntohs(ih->frag_off) & IP_OFFSET)
-			break;
-
-		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
-		eh = skb_header_pointer(skb, iphoff+ih->ihl*4,
-					sizeof(_esph), &_esph);
-		if (eh == NULL) {
-			nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
-				       skb->len - iphoff - ih->ihl*4);
-			break;
-		}
-
-		/* Length: 15 "SPI=0xF1234567 " */
-		nf_log_buf_add(m, "SPI=0x%x ", ntohl(eh->spi));
-		break;
-	}
-	/* Max length: 10 "PROTO 255 " */
-	default:
-		nf_log_buf_add(m, "PROTO=%u ", ih->protocol);
-	}
-
-	/* Max length: 15 "UID=4294967295 " */
-	if ((logflags & NF_LOG_UID) && !iphoff)
-		nf_log_dump_sk_uid_gid(net, m, skb->sk);
-
-	/* Max length: 16 "MARK=0xFFFFFFFF " */
-	if (!iphoff && skb->mark)
-		nf_log_buf_add(m, "MARK=0x%x ", skb->mark);
-
-	/* Proto    Max log string length */
-	/* IP:	    40+46+6+11+127 = 230 */
-	/* TCP:     10+max(25,20+30+13+9+32+11+127) = 252 */
-	/* UDP:     10+max(25,20) = 35 */
-	/* UDPLITE: 14+max(25,20) = 39 */
-	/* ICMP:    11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */
-	/* ESP:     10+max(25)+15 = 50 */
-	/* AH:	    9+max(25)+15 = 49 */
-	/* unknown: 10 */
-
-	/* (ICMP allows recursion one level deep) */
-	/* maxlen =  IP + ICMP +  IP + max(TCP,UDP,ICMP,unknown) */
-	/* maxlen = 230+   91  + 230 + 252 = 803 */
-}
-
-static void dump_ipv4_mac_header(struct nf_log_buf *m,
-			    const struct nf_loginfo *info,
-			    const struct sk_buff *skb)
-{
-	struct net_device *dev = skb->dev;
-	unsigned int logflags = 0;
-
-	if (info->type == NF_LOG_TYPE_LOG)
-		logflags = info->u.log.logflags;
-
-	if (!(logflags & NF_LOG_MACDECODE))
-		goto fallback;
-
-	switch (dev->type) {
-	case ARPHRD_ETHER:
-		nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
-			       eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
-		nf_log_dump_vlan(m, skb);
-		nf_log_buf_add(m, "MACPROTO=%04x ",
-			       ntohs(eth_hdr(skb)->h_proto));
-		return;
-	default:
-		break;
-	}
-
-fallback:
-	nf_log_buf_add(m, "MAC=");
-	if (dev->hard_header_len &&
-	    skb->mac_header != skb->network_header) {
-		const unsigned char *p = skb_mac_header(skb);
-		unsigned int i;
-
-		nf_log_buf_add(m, "%02x", *p++);
-		for (i = 1; i < dev->hard_header_len; i++, p++)
-			nf_log_buf_add(m, ":%02x", *p);
-	}
-	nf_log_buf_add(m, " ");
-}
-
-static void nf_log_ip_packet(struct net *net, u_int8_t pf,
-			     unsigned int hooknum, const struct sk_buff *skb,
-			     const struct net_device *in,
-			     const struct net_device *out,
-			     const struct nf_loginfo *loginfo,
-			     const char *prefix)
-{
-	struct nf_log_buf *m;
-
-	/* FIXME: Disabled from containers until syslog ns is supported */
-	if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns)
-		return;
-
-	m = nf_log_buf_open();
-
-	if (!loginfo)
-		loginfo = &default_loginfo;
-
-	nf_log_dump_packet_common(m, pf, hooknum, skb, in,
-				  out, loginfo, prefix);
-
-	if (in != NULL)
-		dump_ipv4_mac_header(m, loginfo, skb);
-
-	dump_ipv4_packet(net, m, loginfo, skb, 0);
-
-	nf_log_buf_close(m);
-}
-
-static struct nf_logger nf_ip_logger __read_mostly = {
-	.name		= "nf_log_ipv4",
-	.type		= NF_LOG_TYPE_LOG,
-	.logfn		= nf_log_ip_packet,
-	.me		= THIS_MODULE,
-};
-
-static int __net_init nf_log_ipv4_net_init(struct net *net)
-{
-	return nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger);
-}
-
-static void __net_exit nf_log_ipv4_net_exit(struct net *net)
-{
-	nf_log_unset(net, &nf_ip_logger);
-}
-
-static struct pernet_operations nf_log_ipv4_net_ops = {
-	.init = nf_log_ipv4_net_init,
-	.exit = nf_log_ipv4_net_exit,
-};
-
-static int __init nf_log_ipv4_init(void)
-{
-	int ret;
-
-	ret = register_pernet_subsys(&nf_log_ipv4_net_ops);
-	if (ret < 0)
-		return ret;
-
-	ret = nf_log_register(NFPROTO_IPV4, &nf_ip_logger);
-	if (ret < 0) {
-		pr_err("failed to register logger\n");
-		goto err1;
-	}
-
-	return 0;
-
-err1:
-	unregister_pernet_subsys(&nf_log_ipv4_net_ops);
-	return ret;
-}
-
-static void __exit nf_log_ipv4_exit(void)
-{
-	unregister_pernet_subsys(&nf_log_ipv4_net_ops);
-	nf_log_unregister(&nf_ip_logger);
-}
-
-module_init(nf_log_ipv4_init);
-module_exit(nf_log_ipv4_exit);
-
-MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-MODULE_DESCRIPTION("Netfilter IPv4 packet logging");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NF_LOGGER(AF_INET, 0);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 1a92063c73a4..d5c047190eb9 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -78,6 +78,18 @@ config NF_LOG_NETDEV
 	tristate "Netdev packet logging"
 	select NF_LOG_COMMON
 
+config NF_LOG_SYSLOG
+	tristate "Syslog packet logging"
+	default m if NETFILTER_ADVANCED=n
+	select NF_LOG_COMMON
+	help
+	  This option enable support for packet logging via syslog.
+	  It supports IPv4 and common transport protocols such as TCP and UDP.
+	  This is a simpler but less flexible logging method compared to
+	  CONFIG_NETFILTER_NETLINK_LOG.
+	  If both are enabled the backend to use can be configured at run-time
+	  by means of per-address-family sysctl tunables.
+
 if NF_CONNTRACK
 config NETFILTER_CONNCOUNT
 	tristate
@@ -923,7 +935,7 @@ config NETFILTER_XT_TARGET_LED
 config NETFILTER_XT_TARGET_LOG
 	tristate "LOG target support"
 	select NF_LOG_COMMON
-	select NF_LOG_IPV4
+	select NF_LOG_SYSLOG
 	select NF_LOG_IPV6 if IP6_NF_IPTABLES
 	default m if NETFILTER_ADVANCED=n
 	help
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 33da7bf1b68e..59642d9ab7a5 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -50,6 +50,7 @@ nf_nat-y	:= nf_nat_core.o nf_nat_proto.o nf_nat_helper.o
 
 # generic transport layer logging
 obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o
+obj-$(CONFIG_NF_LOG_SYSLOG) += nf_log_syslog.o
 
 # packet logging for netdev family
 obj-$(CONFIG_NF_LOG_NETDEV) += nf_log_netdev.o
diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c
new file mode 100644
index 000000000000..e6fe156e77c7
--- /dev/null
+++ b/net/netfilter/nf_log_syslog.c
@@ -0,0 +1,391 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/if_arp.h>
+#include <linux/ip.h>
+#include <net/ipv6.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/tcp.h>
+#include <net/route.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/xt_LOG.h>
+#include <net/netfilter/nf_log.h>
+
+static const struct nf_loginfo default_loginfo = {
+	.type	= NF_LOG_TYPE_LOG,
+	.u = {
+		.log = {
+			.level	  = LOGLEVEL_NOTICE,
+			.logflags = NF_LOG_DEFAULT_MASK,
+		},
+	},
+};
+
+/* One level of recursion won't kill us */
+static noinline_for_stack void
+dump_ipv4_packet(struct net *net, struct nf_log_buf *m,
+		 const struct nf_loginfo *info,
+		 const struct sk_buff *skb, unsigned int iphoff)
+{
+	const struct iphdr *ih;
+	unsigned int logflags;
+	struct iphdr _iph;
+
+	if (info->type == NF_LOG_TYPE_LOG)
+		logflags = info->u.log.logflags;
+	else
+		logflags = NF_LOG_DEFAULT_MASK;
+
+	ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
+	if (!ih) {
+		nf_log_buf_add(m, "TRUNCATED");
+		return;
+	}
+
+	/* Important fields:
+	 * TOS, len, DF/MF, fragment offset, TTL, src, dst, options.
+	 * Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 "
+	 */
+	nf_log_buf_add(m, "SRC=%pI4 DST=%pI4 ", &ih->saddr, &ih->daddr);
+
+	/* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
+	nf_log_buf_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
+		       ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK,
+		       ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
+
+	/* Max length: 6 "CE DF MF " */
+	if (ntohs(ih->frag_off) & IP_CE)
+		nf_log_buf_add(m, "CE ");
+	if (ntohs(ih->frag_off) & IP_DF)
+		nf_log_buf_add(m, "DF ");
+	if (ntohs(ih->frag_off) & IP_MF)
+		nf_log_buf_add(m, "MF ");
+
+	/* Max length: 11 "FRAG:65535 " */
+	if (ntohs(ih->frag_off) & IP_OFFSET)
+		nf_log_buf_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
+
+	if ((logflags & NF_LOG_IPOPT) &&
+	    ih->ihl * 4 > sizeof(struct iphdr)) {
+		unsigned char _opt[4 * 15 - sizeof(struct iphdr)];
+		const unsigned char *op;
+		unsigned int i, optsize;
+
+		optsize = ih->ihl * 4 - sizeof(struct iphdr);
+		op = skb_header_pointer(skb, iphoff + sizeof(_iph),
+					optsize, _opt);
+		if (!op) {
+			nf_log_buf_add(m, "TRUNCATED");
+			return;
+		}
+
+		/* Max length: 127 "OPT (" 15*4*2chars ") " */
+		nf_log_buf_add(m, "OPT (");
+		for (i = 0; i < optsize; i++)
+			nf_log_buf_add(m, "%02X", op[i]);
+		nf_log_buf_add(m, ") ");
+	}
+
+	switch (ih->protocol) {
+	case IPPROTO_TCP:
+		if (nf_log_dump_tcp_header(m, skb, ih->protocol,
+					   ntohs(ih->frag_off) & IP_OFFSET,
+					   iphoff + ih->ihl * 4, logflags))
+			return;
+		break;
+	case IPPROTO_UDP:
+	case IPPROTO_UDPLITE:
+		if (nf_log_dump_udp_header(m, skb, ih->protocol,
+					   ntohs(ih->frag_off) & IP_OFFSET,
+					   iphoff + ih->ihl * 4))
+			return;
+		break;
+	case IPPROTO_ICMP: {
+		static const size_t required_len[NR_ICMP_TYPES + 1] = {
+			[ICMP_ECHOREPLY] = 4,
+			[ICMP_DEST_UNREACH] = 8 + sizeof(struct iphdr),
+			[ICMP_SOURCE_QUENCH] = 8 + sizeof(struct iphdr),
+			[ICMP_REDIRECT] = 8 + sizeof(struct iphdr),
+			[ICMP_ECHO] = 4,
+			[ICMP_TIME_EXCEEDED] = 8 + sizeof(struct iphdr),
+			[ICMP_PARAMETERPROB] = 8 + sizeof(struct iphdr),
+			[ICMP_TIMESTAMP] = 20,
+			[ICMP_TIMESTAMPREPLY] = 20,
+			[ICMP_ADDRESS] = 12,
+			[ICMP_ADDRESSREPLY] = 12 };
+		const struct icmphdr *ich;
+		struct icmphdr _icmph;
+
+		/* Max length: 11 "PROTO=ICMP " */
+		nf_log_buf_add(m, "PROTO=ICMP ");
+
+		if (ntohs(ih->frag_off) & IP_OFFSET)
+			break;
+
+		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+		ich = skb_header_pointer(skb, iphoff + ih->ihl * 4,
+					 sizeof(_icmph), &_icmph);
+		if (!ich) {
+			nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
+				       skb->len - iphoff - ih->ihl * 4);
+			break;
+		}
+
+		/* Max length: 18 "TYPE=255 CODE=255 " */
+		nf_log_buf_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code);
+
+		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+		if (ich->type <= NR_ICMP_TYPES &&
+		    required_len[ich->type] &&
+		    skb->len - iphoff - ih->ihl * 4 < required_len[ich->type]) {
+			nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
+				       skb->len - iphoff - ih->ihl * 4);
+			break;
+		}
+
+		switch (ich->type) {
+		case ICMP_ECHOREPLY:
+		case ICMP_ECHO:
+			/* Max length: 19 "ID=65535 SEQ=65535 " */
+			nf_log_buf_add(m, "ID=%u SEQ=%u ",
+				       ntohs(ich->un.echo.id),
+				       ntohs(ich->un.echo.sequence));
+			break;
+
+		case ICMP_PARAMETERPROB:
+			/* Max length: 14 "PARAMETER=255 " */
+			nf_log_buf_add(m, "PARAMETER=%u ",
+				       ntohl(ich->un.gateway) >> 24);
+			break;
+		case ICMP_REDIRECT:
+			/* Max length: 24 "GATEWAY=255.255.255.255 " */
+			nf_log_buf_add(m, "GATEWAY=%pI4 ", &ich->un.gateway);
+			fallthrough;
+		case ICMP_DEST_UNREACH:
+		case ICMP_SOURCE_QUENCH:
+		case ICMP_TIME_EXCEEDED:
+			/* Max length: 3+maxlen */
+			if (!iphoff) { /* Only recurse once. */
+				nf_log_buf_add(m, "[");
+				dump_ipv4_packet(net, m, info, skb,
+						 iphoff + ih->ihl * 4 + sizeof(_icmph));
+				nf_log_buf_add(m, "] ");
+			}
+
+			/* Max length: 10 "MTU=65535 " */
+			if (ich->type == ICMP_DEST_UNREACH &&
+			    ich->code == ICMP_FRAG_NEEDED) {
+				nf_log_buf_add(m, "MTU=%u ",
+					       ntohs(ich->un.frag.mtu));
+			}
+		}
+		break;
+	}
+	/* Max Length */
+	case IPPROTO_AH: {
+		const struct ip_auth_hdr *ah;
+		struct ip_auth_hdr _ahdr;
+
+		if (ntohs(ih->frag_off) & IP_OFFSET)
+			break;
+
+		/* Max length: 9 "PROTO=AH " */
+		nf_log_buf_add(m, "PROTO=AH ");
+
+		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+		ah = skb_header_pointer(skb, iphoff + ih->ihl * 4,
+					sizeof(_ahdr), &_ahdr);
+		if (!ah) {
+			nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
+				       skb->len - iphoff - ih->ihl * 4);
+			break;
+		}
+
+		/* Length: 15 "SPI=0xF1234567 " */
+		nf_log_buf_add(m, "SPI=0x%x ", ntohl(ah->spi));
+		break;
+	}
+	case IPPROTO_ESP: {
+		const struct ip_esp_hdr *eh;
+		struct ip_esp_hdr _esph;
+
+		/* Max length: 10 "PROTO=ESP " */
+		nf_log_buf_add(m, "PROTO=ESP ");
+
+		if (ntohs(ih->frag_off) & IP_OFFSET)
+			break;
+
+		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+		eh = skb_header_pointer(skb, iphoff + ih->ihl * 4,
+					sizeof(_esph), &_esph);
+		if (!eh) {
+			nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
+				       skb->len - iphoff - ih->ihl * 4);
+			break;
+		}
+
+		/* Length: 15 "SPI=0xF1234567 " */
+		nf_log_buf_add(m, "SPI=0x%x ", ntohl(eh->spi));
+		break;
+	}
+	/* Max length: 10 "PROTO 255 " */
+	default:
+		nf_log_buf_add(m, "PROTO=%u ", ih->protocol);
+	}
+
+	/* Max length: 15 "UID=4294967295 " */
+	if ((logflags & NF_LOG_UID) && !iphoff)
+		nf_log_dump_sk_uid_gid(net, m, skb->sk);
+
+	/* Max length: 16 "MARK=0xFFFFFFFF " */
+	if (!iphoff && skb->mark)
+		nf_log_buf_add(m, "MARK=0x%x ", skb->mark);
+
+	/* Proto    Max log string length */
+	/* IP:	    40+46+6+11+127 = 230 */
+	/* TCP:     10+max(25,20+30+13+9+32+11+127) = 252 */
+	/* UDP:     10+max(25,20) = 35 */
+	/* UDPLITE: 14+max(25,20) = 39 */
+	/* ICMP:    11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */
+	/* ESP:     10+max(25)+15 = 50 */
+	/* AH:	    9+max(25)+15 = 49 */
+	/* unknown: 10 */
+
+	/* (ICMP allows recursion one level deep) */
+	/* maxlen =  IP + ICMP +  IP + max(TCP,UDP,ICMP,unknown) */
+	/* maxlen = 230+   91  + 230 + 252 = 803 */
+}
+
+static void dump_ipv4_mac_header(struct nf_log_buf *m,
+				 const struct nf_loginfo *info,
+				 const struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+	unsigned int logflags = 0;
+
+	if (info->type == NF_LOG_TYPE_LOG)
+		logflags = info->u.log.logflags;
+
+	if (!(logflags & NF_LOG_MACDECODE))
+		goto fallback;
+
+	switch (dev->type) {
+	case ARPHRD_ETHER:
+		nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
+			       eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
+		nf_log_dump_vlan(m, skb);
+		nf_log_buf_add(m, "MACPROTO=%04x ",
+			       ntohs(eth_hdr(skb)->h_proto));
+		return;
+	default:
+		break;
+	}
+
+fallback:
+	nf_log_buf_add(m, "MAC=");
+	if (dev->hard_header_len &&
+	    skb->mac_header != skb->network_header) {
+		const unsigned char *p = skb_mac_header(skb);
+		unsigned int i;
+
+		nf_log_buf_add(m, "%02x", *p++);
+		for (i = 1; i < dev->hard_header_len; i++, p++)
+			nf_log_buf_add(m, ":%02x", *p);
+	}
+	nf_log_buf_add(m, " ");
+}
+
+static void nf_log_ip_packet(struct net *net, u_int8_t pf,
+			     unsigned int hooknum, const struct sk_buff *skb,
+			     const struct net_device *in,
+			     const struct net_device *out,
+			     const struct nf_loginfo *loginfo,
+			     const char *prefix)
+{
+	struct nf_log_buf *m;
+
+	/* FIXME: Disabled from containers until syslog ns is supported */
+	if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns)
+		return;
+
+	m = nf_log_buf_open();
+
+	if (!loginfo)
+		loginfo = &default_loginfo;
+
+	nf_log_dump_packet_common(m, pf, hooknum, skb, in,
+				  out, loginfo, prefix);
+
+	if (in)
+		dump_ipv4_mac_header(m, loginfo, skb);
+
+	dump_ipv4_packet(net, m, loginfo, skb, 0);
+
+	nf_log_buf_close(m);
+}
+
+static struct nf_logger nf_ip_logger __read_mostly = {
+	.name		= "nf_log_ipv4",
+	.type		= NF_LOG_TYPE_LOG,
+	.logfn		= nf_log_ip_packet,
+	.me		= THIS_MODULE,
+};
+
+static int __net_init nf_log_syslog_net_init(struct net *net)
+{
+	return nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger);
+}
+
+static void __net_exit nf_log_syslog_net_exit(struct net *net)
+{
+	nf_log_unset(net, &nf_ip_logger);
+}
+
+static struct pernet_operations nf_log_syslog_net_ops = {
+	.init = nf_log_syslog_net_init,
+	.exit = nf_log_syslog_net_exit,
+};
+
+static int __init nf_log_syslog_init(void)
+{
+	int ret;
+
+	ret = register_pernet_subsys(&nf_log_syslog_net_ops);
+	if (ret < 0)
+		return ret;
+
+	ret = nf_log_register(NFPROTO_IPV4, &nf_ip_logger);
+	if (ret < 0)
+		goto err1;
+
+	return 0;
+
+err1:
+	unregister_pernet_subsys(&nf_log_syslog_net_ops);
+	return ret;
+}
+
+static void __exit nf_log_syslog_exit(void)
+{
+	unregister_pernet_subsys(&nf_log_syslog_net_ops);
+	nf_log_unregister(&nf_ip_logger);
+}
+
+module_init(nf_log_syslog_init);
+module_exit(nf_log_syslog_exit);
+
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("Netfilter syslog packet logging");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("nf_log_ipv4");
+MODULE_ALIAS_NF_LOGGER(AF_INET, 0);
-- 
cgit v1.2.3


From f11d61e7957d9dd54c4d87bb59da83fa949c15cb Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 25 Mar 2021 18:25:06 +0100
Subject: netfilter: nf_log_arp: merge with nf_log_syslog

similar to previous change: nf_log_syslog now covers ARP logging
as well, the old nf_log_arp module is removed.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/Kconfig      |   5 +-
 net/ipv4/netfilter/Makefile     |   3 -
 net/ipv4/netfilter/nf_log_arp.c | 172 ----------------------------------------
 net/netfilter/nf_log_syslog.c   | 113 +++++++++++++++++++++++++-
 4 files changed, 115 insertions(+), 178 deletions(-)
 delete mode 100644 net/ipv4/netfilter/nf_log_arp.c

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index aadb98e43fb1..63cb953bd019 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -76,7 +76,10 @@ config NF_DUP_IPV4
 config NF_LOG_ARP
 	tristate "ARP packet logging"
 	default m if NETFILTER_ADVANCED=n
-	select NF_LOG_COMMON
+	select NF_LOG_SYSLOG
+	help
+	This is a backwards-compat option for the user's convenience
+	(e.g. when running oldconfig). It selects CONFIG_NF_LOG_SYSLOG.
 
 config NF_LOG_IPV4
 	tristate "IPv4 packet logging"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index abd133048b42..f38fb1368ddb 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -9,9 +9,6 @@ obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o
 obj-$(CONFIG_NF_SOCKET_IPV4) += nf_socket_ipv4.o
 obj-$(CONFIG_NF_TPROXY_IPV4) += nf_tproxy_ipv4.o
 
-# logging
-obj-$(CONFIG_NF_LOG_ARP) += nf_log_arp.o
-
 # reject
 obj-$(CONFIG_NF_REJECT_IPV4) += nf_reject_ipv4.o
 
diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c
deleted file mode 100644
index 136030ad2e54..000000000000
--- a/net/ipv4/netfilter/nf_log_arp.c
+++ /dev/null
@@ -1,172 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * (C) 2014 by Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * Based on code from ebt_log from:
- *
- * Bart De Schuymer <bdschuym@pandora.be>
- * Harald Welte <laforge@netfilter.org>
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/skbuff.h>
-#include <linux/if_arp.h>
-#include <linux/ip.h>
-#include <net/route.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter/xt_LOG.h>
-#include <net/netfilter/nf_log.h>
-
-static const struct nf_loginfo default_loginfo = {
-	.type	= NF_LOG_TYPE_LOG,
-	.u = {
-		.log = {
-			.level	  = LOGLEVEL_NOTICE,
-			.logflags = NF_LOG_DEFAULT_MASK,
-		},
-	},
-};
-
-struct arppayload {
-	unsigned char mac_src[ETH_ALEN];
-	unsigned char ip_src[4];
-	unsigned char mac_dst[ETH_ALEN];
-	unsigned char ip_dst[4];
-};
-
-static void dump_arp_packet(struct nf_log_buf *m,
-			    const struct nf_loginfo *info,
-			    const struct sk_buff *skb, unsigned int nhoff)
-{
-	const struct arppayload *ap;
-	struct arppayload _arpp;
-	const struct arphdr *ah;
-	unsigned int logflags;
-	struct arphdr _arph;
-
-	ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
-	if (ah == NULL) {
-		nf_log_buf_add(m, "TRUNCATED");
-		return;
-	}
-
-	if (info->type == NF_LOG_TYPE_LOG)
-		logflags = info->u.log.logflags;
-	else
-		logflags = NF_LOG_DEFAULT_MASK;
-
-	if (logflags & NF_LOG_MACDECODE) {
-		nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
-			       eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
-		nf_log_dump_vlan(m, skb);
-		nf_log_buf_add(m, "MACPROTO=%04x ",
-			       ntohs(eth_hdr(skb)->h_proto));
-	}
-
-	nf_log_buf_add(m, "ARP HTYPE=%d PTYPE=0x%04x OPCODE=%d",
-		       ntohs(ah->ar_hrd), ntohs(ah->ar_pro), ntohs(ah->ar_op));
-
-	/* If it's for Ethernet and the lengths are OK, then log the ARP
-	 * payload.
-	 */
-	if (ah->ar_hrd != htons(ARPHRD_ETHER) ||
-	    ah->ar_hln != ETH_ALEN ||
-	    ah->ar_pln != sizeof(__be32))
-		return;
-
-	ap = skb_header_pointer(skb, sizeof(_arph), sizeof(_arpp), &_arpp);
-	if (ap == NULL) {
-		nf_log_buf_add(m, " INCOMPLETE [%zu bytes]",
-			       skb->len - sizeof(_arph));
-		return;
-	}
-	nf_log_buf_add(m, " MACSRC=%pM IPSRC=%pI4 MACDST=%pM IPDST=%pI4",
-		       ap->mac_src, ap->ip_src, ap->mac_dst, ap->ip_dst);
-}
-
-static void nf_log_arp_packet(struct net *net, u_int8_t pf,
-			      unsigned int hooknum, const struct sk_buff *skb,
-			      const struct net_device *in,
-			      const struct net_device *out,
-			      const struct nf_loginfo *loginfo,
-			      const char *prefix)
-{
-	struct nf_log_buf *m;
-
-	/* FIXME: Disabled from containers until syslog ns is supported */
-	if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns)
-		return;
-
-	m = nf_log_buf_open();
-
-	if (!loginfo)
-		loginfo = &default_loginfo;
-
-	nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo,
-				  prefix);
-	dump_arp_packet(m, loginfo, skb, 0);
-
-	nf_log_buf_close(m);
-}
-
-static struct nf_logger nf_arp_logger __read_mostly = {
-	.name		= "nf_log_arp",
-	.type		= NF_LOG_TYPE_LOG,
-	.logfn		= nf_log_arp_packet,
-	.me		= THIS_MODULE,
-};
-
-static int __net_init nf_log_arp_net_init(struct net *net)
-{
-	return nf_log_set(net, NFPROTO_ARP, &nf_arp_logger);
-}
-
-static void __net_exit nf_log_arp_net_exit(struct net *net)
-{
-	nf_log_unset(net, &nf_arp_logger);
-}
-
-static struct pernet_operations nf_log_arp_net_ops = {
-	.init = nf_log_arp_net_init,
-	.exit = nf_log_arp_net_exit,
-};
-
-static int __init nf_log_arp_init(void)
-{
-	int ret;
-
-	ret = register_pernet_subsys(&nf_log_arp_net_ops);
-	if (ret < 0)
-		return ret;
-
-	ret = nf_log_register(NFPROTO_ARP, &nf_arp_logger);
-	if (ret < 0) {
-		pr_err("failed to register logger\n");
-		goto err1;
-	}
-
-	return 0;
-
-err1:
-	unregister_pernet_subsys(&nf_log_arp_net_ops);
-	return ret;
-}
-
-static void __exit nf_log_arp_exit(void)
-{
-	unregister_pernet_subsys(&nf_log_arp_net_ops);
-	nf_log_unregister(&nf_arp_logger);
-}
-
-module_init(nf_log_arp_init);
-module_exit(nf_log_arp_exit);
-
-MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
-MODULE_DESCRIPTION("Netfilter ARP packet logging");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NF_LOGGER(3, 0);
diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c
index e6fe156e77c7..c01769c6d641 100644
--- a/net/netfilter/nf_log_syslog.c
+++ b/net/netfilter/nf_log_syslog.c
@@ -31,6 +31,95 @@ static const struct nf_loginfo default_loginfo = {
 	},
 };
 
+struct arppayload {
+	unsigned char mac_src[ETH_ALEN];
+	unsigned char ip_src[4];
+	unsigned char mac_dst[ETH_ALEN];
+	unsigned char ip_dst[4];
+};
+
+static void noinline_for_stack
+dump_arp_packet(struct nf_log_buf *m,
+		const struct nf_loginfo *info,
+		const struct sk_buff *skb, unsigned int nhoff)
+{
+	const struct arppayload *ap;
+	struct arppayload _arpp;
+	const struct arphdr *ah;
+	unsigned int logflags;
+	struct arphdr _arph;
+
+	ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
+	if (!ah) {
+		nf_log_buf_add(m, "TRUNCATED");
+		return;
+	}
+
+	if (info->type == NF_LOG_TYPE_LOG)
+		logflags = info->u.log.logflags;
+	else
+		logflags = NF_LOG_DEFAULT_MASK;
+
+	if (logflags & NF_LOG_MACDECODE) {
+		nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
+			       eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
+		nf_log_dump_vlan(m, skb);
+		nf_log_buf_add(m, "MACPROTO=%04x ",
+			       ntohs(eth_hdr(skb)->h_proto));
+	}
+
+	nf_log_buf_add(m, "ARP HTYPE=%d PTYPE=0x%04x OPCODE=%d",
+		       ntohs(ah->ar_hrd), ntohs(ah->ar_pro), ntohs(ah->ar_op));
+	/* If it's for Ethernet and the lengths are OK, then log the ARP
+	 * payload.
+	 */
+	if (ah->ar_hrd != htons(ARPHRD_ETHER) ||
+	    ah->ar_hln != ETH_ALEN ||
+	    ah->ar_pln != sizeof(__be32))
+		return;
+
+	ap = skb_header_pointer(skb, sizeof(_arph), sizeof(_arpp), &_arpp);
+	if (!ap) {
+		nf_log_buf_add(m, " INCOMPLETE [%zu bytes]",
+			       skb->len - sizeof(_arph));
+		return;
+	}
+	nf_log_buf_add(m, " MACSRC=%pM IPSRC=%pI4 MACDST=%pM IPDST=%pI4",
+		       ap->mac_src, ap->ip_src, ap->mac_dst, ap->ip_dst);
+}
+
+static void nf_log_arp_packet(struct net *net, u_int8_t pf,
+			      unsigned int hooknum, const struct sk_buff *skb,
+			      const struct net_device *in,
+			      const struct net_device *out,
+			      const struct nf_loginfo *loginfo,
+			      const char *prefix)
+{
+	struct nf_log_buf *m;
+
+	/* FIXME: Disabled from containers until syslog ns is supported */
+	if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns)
+		return;
+
+	m = nf_log_buf_open();
+
+	if (!loginfo)
+		loginfo = &default_loginfo;
+
+	nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo,
+				  prefix);
+	dump_arp_packet(m, loginfo, skb, 0);
+
+	nf_log_buf_close(m);
+}
+
+static struct nf_logger nf_arp_logger __read_mostly = {
+	.name		= "nf_log_arp",
+	.type		= NF_LOG_TYPE_LOG,
+	.logfn		= nf_log_arp_packet,
+	.me		= THIS_MODULE,
+};
+
 /* One level of recursion won't kill us */
 static noinline_for_stack void
 dump_ipv4_packet(struct net *net, struct nf_log_buf *m,
@@ -343,12 +432,23 @@ static struct nf_logger nf_ip_logger __read_mostly = {
 
 static int __net_init nf_log_syslog_net_init(struct net *net)
 {
-	return nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger);
+	int ret = nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger);
+
+	if (ret)
+		return ret;
+
+	ret = nf_log_set(net, NFPROTO_ARP, &nf_arp_logger);
+	if (ret)
+		goto err1;
+err1:
+	nf_log_unset(net, &nf_arp_logger);
+	return ret;
 }
 
 static void __net_exit nf_log_syslog_net_exit(struct net *net)
 {
 	nf_log_unset(net, &nf_ip_logger);
+	nf_log_unset(net, &nf_arp_logger);
 }
 
 static struct pernet_operations nf_log_syslog_net_ops = {
@@ -368,9 +468,15 @@ static int __init nf_log_syslog_init(void)
 	if (ret < 0)
 		goto err1;
 
-	return 0;
+	ret = nf_log_register(NFPROTO_ARP, &nf_arp_logger);
+	if (ret < 0)
+		goto err2;
 
+	return 0;
+err2:
+	nf_log_unregister(&nf_arp_logger);
 err1:
+	pr_err("failed to register logger\n");
 	unregister_pernet_subsys(&nf_log_syslog_net_ops);
 	return ret;
 }
@@ -379,6 +485,7 @@ static void __exit nf_log_syslog_exit(void)
 {
 	unregister_pernet_subsys(&nf_log_syslog_net_ops);
 	nf_log_unregister(&nf_ip_logger);
+	nf_log_unregister(&nf_arp_logger);
 }
 
 module_init(nf_log_syslog_init);
@@ -387,5 +494,7 @@ module_exit(nf_log_syslog_exit);
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("Netfilter syslog packet logging");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("nf_log_arp");
 MODULE_ALIAS("nf_log_ipv4");
 MODULE_ALIAS_NF_LOGGER(AF_INET, 0);
+MODULE_ALIAS_NF_LOGGER(3, 0);
-- 
cgit v1.2.3


From f5466caab9a8d2f363d2e0730a99f5916df892f5 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 25 Mar 2021 18:25:07 +0100
Subject: netfilter: nf_log_ipv6: merge with nf_log_syslog

This removes the nf_log_ipv6 module, the functionality is now
provided by nf_log_syslog.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv6/netfilter/Kconfig       |   5 +-
 net/ipv6/netfilter/Makefile      |   3 -
 net/ipv6/netfilter/nf_log_ipv6.c | 427 ---------------------------------------
 net/netfilter/nf_log_syslog.c    | 358 +++++++++++++++++++++++++++++++-
 4 files changed, 360 insertions(+), 433 deletions(-)
 delete mode 100644 net/ipv6/netfilter/nf_log_ipv6.c

diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 262bb51a2d99..f22233e44ee9 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -69,7 +69,10 @@ config NF_REJECT_IPV6
 config NF_LOG_IPV6
 	tristate "IPv6 packet logging"
 	default m if NETFILTER_ADVANCED=n
-	select NF_LOG_COMMON
+	select NF_LOG_SYSLOG
+	help
+	  This is a backwards-compat option for the user's convenience
+	  (e.g. when running oldconfig). It selects CONFIG_NF_LOG_SYSLOG.
 
 config IP6_NF_IPTABLES
 	tristate "IP6 tables support (required for filtering)"
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 731a74c60dca..b85383606df7 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -18,9 +18,6 @@ obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
 obj-$(CONFIG_NF_SOCKET_IPV6) += nf_socket_ipv6.o
 obj-$(CONFIG_NF_TPROXY_IPV6) += nf_tproxy_ipv6.o
 
-# logging
-obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o
-
 # reject
 obj-$(CONFIG_NF_REJECT_IPV6) += nf_reject_ipv6.o
 
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
deleted file mode 100644
index 8210ff34ed9b..000000000000
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ /dev/null
@@ -1,427 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/skbuff.h>
-#include <linux/if_arp.h>
-#include <linux/ip.h>
-#include <net/ipv6.h>
-#include <net/icmp.h>
-#include <net/udp.h>
-#include <net/tcp.h>
-#include <net/route.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv6.h>
-#include <linux/netfilter/xt_LOG.h>
-#include <net/netfilter/nf_log.h>
-
-static const struct nf_loginfo default_loginfo = {
-	.type	= NF_LOG_TYPE_LOG,
-	.u = {
-		.log = {
-			.level	  = LOGLEVEL_NOTICE,
-			.logflags = NF_LOG_DEFAULT_MASK,
-		},
-	},
-};
-
-/* One level of recursion won't kill us */
-static void dump_ipv6_packet(struct net *net, struct nf_log_buf *m,
-			     const struct nf_loginfo *info,
-			     const struct sk_buff *skb, unsigned int ip6hoff,
-			     int recurse)
-{
-	u_int8_t currenthdr;
-	int fragment;
-	struct ipv6hdr _ip6h;
-	const struct ipv6hdr *ih;
-	unsigned int ptr;
-	unsigned int hdrlen = 0;
-	unsigned int logflags;
-
-	if (info->type == NF_LOG_TYPE_LOG)
-		logflags = info->u.log.logflags;
-	else
-		logflags = NF_LOG_DEFAULT_MASK;
-
-	ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h);
-	if (ih == NULL) {
-		nf_log_buf_add(m, "TRUNCATED");
-		return;
-	}
-
-	/* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */
-	nf_log_buf_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr);
-
-	/* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */
-	nf_log_buf_add(m, "LEN=%zu TC=%u HOPLIMIT=%u FLOWLBL=%u ",
-	       ntohs(ih->payload_len) + sizeof(struct ipv6hdr),
-	       (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20,
-	       ih->hop_limit,
-	       (ntohl(*(__be32 *)ih) & 0x000fffff));
-
-	fragment = 0;
-	ptr = ip6hoff + sizeof(struct ipv6hdr);
-	currenthdr = ih->nexthdr;
-	while (currenthdr != NEXTHDR_NONE && nf_ip6_ext_hdr(currenthdr)) {
-		struct ipv6_opt_hdr _hdr;
-		const struct ipv6_opt_hdr *hp;
-
-		hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
-		if (hp == NULL) {
-			nf_log_buf_add(m, "TRUNCATED");
-			return;
-		}
-
-		/* Max length: 48 "OPT (...) " */
-		if (logflags & NF_LOG_IPOPT)
-			nf_log_buf_add(m, "OPT ( ");
-
-		switch (currenthdr) {
-		case IPPROTO_FRAGMENT: {
-			struct frag_hdr _fhdr;
-			const struct frag_hdr *fh;
-
-			nf_log_buf_add(m, "FRAG:");
-			fh = skb_header_pointer(skb, ptr, sizeof(_fhdr),
-						&_fhdr);
-			if (fh == NULL) {
-				nf_log_buf_add(m, "TRUNCATED ");
-				return;
-			}
-
-			/* Max length: 6 "65535 " */
-			nf_log_buf_add(m, "%u ", ntohs(fh->frag_off) & 0xFFF8);
-
-			/* Max length: 11 "INCOMPLETE " */
-			if (fh->frag_off & htons(0x0001))
-				nf_log_buf_add(m, "INCOMPLETE ");
-
-			nf_log_buf_add(m, "ID:%08x ",
-				       ntohl(fh->identification));
-
-			if (ntohs(fh->frag_off) & 0xFFF8)
-				fragment = 1;
-
-			hdrlen = 8;
-
-			break;
-		}
-		case IPPROTO_DSTOPTS:
-		case IPPROTO_ROUTING:
-		case IPPROTO_HOPOPTS:
-			if (fragment) {
-				if (logflags & NF_LOG_IPOPT)
-					nf_log_buf_add(m, ")");
-				return;
-			}
-			hdrlen = ipv6_optlen(hp);
-			break;
-		/* Max Length */
-		case IPPROTO_AH:
-			if (logflags & NF_LOG_IPOPT) {
-				struct ip_auth_hdr _ahdr;
-				const struct ip_auth_hdr *ah;
-
-				/* Max length: 3 "AH " */
-				nf_log_buf_add(m, "AH ");
-
-				if (fragment) {
-					nf_log_buf_add(m, ")");
-					return;
-				}
-
-				ah = skb_header_pointer(skb, ptr, sizeof(_ahdr),
-							&_ahdr);
-				if (ah == NULL) {
-					/*
-					 * Max length: 26 "INCOMPLETE [65535
-					 *  bytes] )"
-					 */
-					nf_log_buf_add(m, "INCOMPLETE [%u bytes] )",
-						       skb->len - ptr);
-					return;
-				}
-
-				/* Length: 15 "SPI=0xF1234567 */
-				nf_log_buf_add(m, "SPI=0x%x ", ntohl(ah->spi));
-
-			}
-
-			hdrlen = ipv6_authlen(hp);
-			break;
-		case IPPROTO_ESP:
-			if (logflags & NF_LOG_IPOPT) {
-				struct ip_esp_hdr _esph;
-				const struct ip_esp_hdr *eh;
-
-				/* Max length: 4 "ESP " */
-				nf_log_buf_add(m, "ESP ");
-
-				if (fragment) {
-					nf_log_buf_add(m, ")");
-					return;
-				}
-
-				/*
-				 * Max length: 26 "INCOMPLETE [65535 bytes] )"
-				 */
-				eh = skb_header_pointer(skb, ptr, sizeof(_esph),
-							&_esph);
-				if (eh == NULL) {
-					nf_log_buf_add(m, "INCOMPLETE [%u bytes] )",
-						       skb->len - ptr);
-					return;
-				}
-
-				/* Length: 16 "SPI=0xF1234567 )" */
-				nf_log_buf_add(m, "SPI=0x%x )",
-					       ntohl(eh->spi));
-			}
-			return;
-		default:
-			/* Max length: 20 "Unknown Ext Hdr 255" */
-			nf_log_buf_add(m, "Unknown Ext Hdr %u", currenthdr);
-			return;
-		}
-		if (logflags & NF_LOG_IPOPT)
-			nf_log_buf_add(m, ") ");
-
-		currenthdr = hp->nexthdr;
-		ptr += hdrlen;
-	}
-
-	switch (currenthdr) {
-	case IPPROTO_TCP:
-		if (nf_log_dump_tcp_header(m, skb, currenthdr, fragment,
-					   ptr, logflags))
-			return;
-		break;
-	case IPPROTO_UDP:
-	case IPPROTO_UDPLITE:
-		if (nf_log_dump_udp_header(m, skb, currenthdr, fragment, ptr))
-			return;
-		break;
-	case IPPROTO_ICMPV6: {
-		struct icmp6hdr _icmp6h;
-		const struct icmp6hdr *ic;
-
-		/* Max length: 13 "PROTO=ICMPv6 " */
-		nf_log_buf_add(m, "PROTO=ICMPv6 ");
-
-		if (fragment)
-			break;
-
-		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
-		ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h);
-		if (ic == NULL) {
-			nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
-				       skb->len - ptr);
-			return;
-		}
-
-		/* Max length: 18 "TYPE=255 CODE=255 " */
-		nf_log_buf_add(m, "TYPE=%u CODE=%u ",
-			       ic->icmp6_type, ic->icmp6_code);
-
-		switch (ic->icmp6_type) {
-		case ICMPV6_ECHO_REQUEST:
-		case ICMPV6_ECHO_REPLY:
-			/* Max length: 19 "ID=65535 SEQ=65535 " */
-			nf_log_buf_add(m, "ID=%u SEQ=%u ",
-				ntohs(ic->icmp6_identifier),
-				ntohs(ic->icmp6_sequence));
-			break;
-		case ICMPV6_MGM_QUERY:
-		case ICMPV6_MGM_REPORT:
-		case ICMPV6_MGM_REDUCTION:
-			break;
-
-		case ICMPV6_PARAMPROB:
-			/* Max length: 17 "POINTER=ffffffff " */
-			nf_log_buf_add(m, "POINTER=%08x ",
-				       ntohl(ic->icmp6_pointer));
-			fallthrough;
-		case ICMPV6_DEST_UNREACH:
-		case ICMPV6_PKT_TOOBIG:
-		case ICMPV6_TIME_EXCEED:
-			/* Max length: 3+maxlen */
-			if (recurse) {
-				nf_log_buf_add(m, "[");
-				dump_ipv6_packet(net, m, info, skb,
-						 ptr + sizeof(_icmp6h), 0);
-				nf_log_buf_add(m, "] ");
-			}
-
-			/* Max length: 10 "MTU=65535 " */
-			if (ic->icmp6_type == ICMPV6_PKT_TOOBIG) {
-				nf_log_buf_add(m, "MTU=%u ",
-					       ntohl(ic->icmp6_mtu));
-			}
-		}
-		break;
-	}
-	/* Max length: 10 "PROTO=255 " */
-	default:
-		nf_log_buf_add(m, "PROTO=%u ", currenthdr);
-	}
-
-	/* Max length: 15 "UID=4294967295 " */
-	if ((logflags & NF_LOG_UID) && recurse)
-		nf_log_dump_sk_uid_gid(net, m, skb->sk);
-
-	/* Max length: 16 "MARK=0xFFFFFFFF " */
-	if (recurse && skb->mark)
-		nf_log_buf_add(m, "MARK=0x%x ", skb->mark);
-}
-
-static void dump_ipv6_mac_header(struct nf_log_buf *m,
-				 const struct nf_loginfo *info,
-				 const struct sk_buff *skb)
-{
-	struct net_device *dev = skb->dev;
-	unsigned int logflags = 0;
-
-	if (info->type == NF_LOG_TYPE_LOG)
-		logflags = info->u.log.logflags;
-
-	if (!(logflags & NF_LOG_MACDECODE))
-		goto fallback;
-
-	switch (dev->type) {
-	case ARPHRD_ETHER:
-		nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
-			       eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
-		nf_log_dump_vlan(m, skb);
-		nf_log_buf_add(m, "MACPROTO=%04x ",
-			       ntohs(eth_hdr(skb)->h_proto));
-		return;
-	default:
-		break;
-	}
-
-fallback:
-	nf_log_buf_add(m, "MAC=");
-	if (dev->hard_header_len &&
-	    skb->mac_header != skb->network_header) {
-		const unsigned char *p = skb_mac_header(skb);
-		unsigned int len = dev->hard_header_len;
-		unsigned int i;
-
-		if (dev->type == ARPHRD_SIT) {
-			p -= ETH_HLEN;
-
-			if (p < skb->head)
-				p = NULL;
-		}
-
-		if (p != NULL) {
-			nf_log_buf_add(m, "%02x", *p++);
-			for (i = 1; i < len; i++)
-				nf_log_buf_add(m, ":%02x", *p++);
-		}
-		nf_log_buf_add(m, " ");
-
-		if (dev->type == ARPHRD_SIT) {
-			const struct iphdr *iph =
-				(struct iphdr *)skb_mac_header(skb);
-			nf_log_buf_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr,
-				       &iph->daddr);
-		}
-	} else {
-		nf_log_buf_add(m, " ");
-	}
-}
-
-static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
-			      unsigned int hooknum, const struct sk_buff *skb,
-			      const struct net_device *in,
-			      const struct net_device *out,
-			      const struct nf_loginfo *loginfo,
-			      const char *prefix)
-{
-	struct nf_log_buf *m;
-
-	/* FIXME: Disabled from containers until syslog ns is supported */
-	if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns)
-		return;
-
-	m = nf_log_buf_open();
-
-	if (!loginfo)
-		loginfo = &default_loginfo;
-
-	nf_log_dump_packet_common(m, pf, hooknum, skb, in, out,
-				  loginfo, prefix);
-
-	if (in != NULL)
-		dump_ipv6_mac_header(m, loginfo, skb);
-
-	dump_ipv6_packet(net, m, loginfo, skb, skb_network_offset(skb), 1);
-
-	nf_log_buf_close(m);
-}
-
-static struct nf_logger nf_ip6_logger __read_mostly = {
-	.name		= "nf_log_ipv6",
-	.type		= NF_LOG_TYPE_LOG,
-	.logfn		= nf_log_ip6_packet,
-	.me		= THIS_MODULE,
-};
-
-static int __net_init nf_log_ipv6_net_init(struct net *net)
-{
-	return nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger);
-}
-
-static void __net_exit nf_log_ipv6_net_exit(struct net *net)
-{
-	nf_log_unset(net, &nf_ip6_logger);
-}
-
-static struct pernet_operations nf_log_ipv6_net_ops = {
-	.init = nf_log_ipv6_net_init,
-	.exit = nf_log_ipv6_net_exit,
-};
-
-static int __init nf_log_ipv6_init(void)
-{
-	int ret;
-
-	ret = register_pernet_subsys(&nf_log_ipv6_net_ops);
-	if (ret < 0)
-		return ret;
-
-	ret = nf_log_register(NFPROTO_IPV6, &nf_ip6_logger);
-	if (ret < 0) {
-		pr_err("failed to register logger\n");
-		goto err1;
-	}
-
-	return 0;
-
-err1:
-	unregister_pernet_subsys(&nf_log_ipv6_net_ops);
-	return ret;
-}
-
-static void __exit nf_log_ipv6_exit(void)
-{
-	unregister_pernet_subsys(&nf_log_ipv6_net_ops);
-	nf_log_unregister(&nf_ip6_logger);
-}
-
-module_init(nf_log_ipv6_init);
-module_exit(nf_log_ipv6_exit);
-
-MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-MODULE_DESCRIPTION("Netfilter IPv6 packet logging");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NF_LOGGER(AF_INET6, 0);
diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c
index c01769c6d641..9ba71bc2ef84 100644
--- a/net/netfilter/nf_log_syslog.c
+++ b/net/netfilter/nf_log_syslog.c
@@ -18,6 +18,7 @@
 #include <net/route.h>
 
 #include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
 #include <linux/netfilter/xt_LOG.h>
 #include <net/netfilter/nf_log.h>
 
@@ -355,6 +356,249 @@ dump_ipv4_packet(struct net *net, struct nf_log_buf *m,
 	/* maxlen = 230+   91  + 230 + 252 = 803 */
 }
 
+static noinline_for_stack void
+dump_ipv6_packet(struct net *net, struct nf_log_buf *m,
+		 const struct nf_loginfo *info,
+		 const struct sk_buff *skb, unsigned int ip6hoff,
+		 int recurse)
+{
+	const struct ipv6hdr *ih;
+	unsigned int hdrlen = 0;
+	unsigned int logflags;
+	struct ipv6hdr _ip6h;
+	unsigned int ptr;
+	u8 currenthdr;
+	int fragment;
+
+	if (info->type == NF_LOG_TYPE_LOG)
+		logflags = info->u.log.logflags;
+	else
+		logflags = NF_LOG_DEFAULT_MASK;
+
+	ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h);
+	if (!ih) {
+		nf_log_buf_add(m, "TRUNCATED");
+		return;
+	}
+
+	/* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */
+	nf_log_buf_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr);
+
+	/* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */
+	nf_log_buf_add(m, "LEN=%zu TC=%u HOPLIMIT=%u FLOWLBL=%u ",
+		       ntohs(ih->payload_len) + sizeof(struct ipv6hdr),
+		       (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20,
+		       ih->hop_limit,
+		       (ntohl(*(__be32 *)ih) & 0x000fffff));
+
+	fragment = 0;
+	ptr = ip6hoff + sizeof(struct ipv6hdr);
+	currenthdr = ih->nexthdr;
+	while (currenthdr != NEXTHDR_NONE && nf_ip6_ext_hdr(currenthdr)) {
+		struct ipv6_opt_hdr _hdr;
+		const struct ipv6_opt_hdr *hp;
+
+		hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
+		if (!hp) {
+			nf_log_buf_add(m, "TRUNCATED");
+			return;
+		}
+
+		/* Max length: 48 "OPT (...) " */
+		if (logflags & NF_LOG_IPOPT)
+			nf_log_buf_add(m, "OPT ( ");
+
+		switch (currenthdr) {
+		case IPPROTO_FRAGMENT: {
+			struct frag_hdr _fhdr;
+			const struct frag_hdr *fh;
+
+			nf_log_buf_add(m, "FRAG:");
+			fh = skb_header_pointer(skb, ptr, sizeof(_fhdr),
+						&_fhdr);
+			if (!fh) {
+				nf_log_buf_add(m, "TRUNCATED ");
+				return;
+			}
+
+			/* Max length: 6 "65535 " */
+			nf_log_buf_add(m, "%u ", ntohs(fh->frag_off) & 0xFFF8);
+
+			/* Max length: 11 "INCOMPLETE " */
+			if (fh->frag_off & htons(0x0001))
+				nf_log_buf_add(m, "INCOMPLETE ");
+
+			nf_log_buf_add(m, "ID:%08x ",
+				       ntohl(fh->identification));
+
+			if (ntohs(fh->frag_off) & 0xFFF8)
+				fragment = 1;
+
+			hdrlen = 8;
+			break;
+		}
+		case IPPROTO_DSTOPTS:
+		case IPPROTO_ROUTING:
+		case IPPROTO_HOPOPTS:
+			if (fragment) {
+				if (logflags & NF_LOG_IPOPT)
+					nf_log_buf_add(m, ")");
+				return;
+			}
+			hdrlen = ipv6_optlen(hp);
+			break;
+		/* Max Length */
+		case IPPROTO_AH:
+			if (logflags & NF_LOG_IPOPT) {
+				struct ip_auth_hdr _ahdr;
+				const struct ip_auth_hdr *ah;
+
+				/* Max length: 3 "AH " */
+				nf_log_buf_add(m, "AH ");
+
+				if (fragment) {
+					nf_log_buf_add(m, ")");
+					return;
+				}
+
+				ah = skb_header_pointer(skb, ptr, sizeof(_ahdr),
+							&_ahdr);
+				if (!ah) {
+					/* Max length: 26 "INCOMPLETE [65535 bytes] )" */
+					nf_log_buf_add(m, "INCOMPLETE [%u bytes] )",
+						       skb->len - ptr);
+					return;
+				}
+
+				/* Length: 15 "SPI=0xF1234567 */
+				nf_log_buf_add(m, "SPI=0x%x ", ntohl(ah->spi));
+			}
+
+			hdrlen = ipv6_authlen(hp);
+			break;
+		case IPPROTO_ESP:
+			if (logflags & NF_LOG_IPOPT) {
+				struct ip_esp_hdr _esph;
+				const struct ip_esp_hdr *eh;
+
+				/* Max length: 4 "ESP " */
+				nf_log_buf_add(m, "ESP ");
+
+				if (fragment) {
+					nf_log_buf_add(m, ")");
+					return;
+				}
+
+				/* Max length: 26 "INCOMPLETE [65535 bytes] )" */
+				eh = skb_header_pointer(skb, ptr, sizeof(_esph),
+							&_esph);
+				if (!eh) {
+					nf_log_buf_add(m, "INCOMPLETE [%u bytes] )",
+						       skb->len - ptr);
+					return;
+				}
+
+				/* Length: 16 "SPI=0xF1234567 )" */
+				nf_log_buf_add(m, "SPI=0x%x )",
+					       ntohl(eh->spi));
+			}
+			return;
+		default:
+			/* Max length: 20 "Unknown Ext Hdr 255" */
+			nf_log_buf_add(m, "Unknown Ext Hdr %u", currenthdr);
+			return;
+		}
+		if (logflags & NF_LOG_IPOPT)
+			nf_log_buf_add(m, ") ");
+
+		currenthdr = hp->nexthdr;
+		ptr += hdrlen;
+	}
+
+	switch (currenthdr) {
+	case IPPROTO_TCP:
+		if (nf_log_dump_tcp_header(m, skb, currenthdr, fragment,
+					   ptr, logflags))
+			return;
+		break;
+	case IPPROTO_UDP:
+	case IPPROTO_UDPLITE:
+		if (nf_log_dump_udp_header(m, skb, currenthdr, fragment, ptr))
+			return;
+		break;
+	case IPPROTO_ICMPV6: {
+		struct icmp6hdr _icmp6h;
+		const struct icmp6hdr *ic;
+
+		/* Max length: 13 "PROTO=ICMPv6 " */
+		nf_log_buf_add(m, "PROTO=ICMPv6 ");
+
+		if (fragment)
+			break;
+
+		/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+		ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h);
+		if (!ic) {
+			nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
+				       skb->len - ptr);
+			return;
+		}
+
+		/* Max length: 18 "TYPE=255 CODE=255 " */
+		nf_log_buf_add(m, "TYPE=%u CODE=%u ",
+			       ic->icmp6_type, ic->icmp6_code);
+
+		switch (ic->icmp6_type) {
+		case ICMPV6_ECHO_REQUEST:
+		case ICMPV6_ECHO_REPLY:
+			/* Max length: 19 "ID=65535 SEQ=65535 " */
+			nf_log_buf_add(m, "ID=%u SEQ=%u ",
+				       ntohs(ic->icmp6_identifier),
+				       ntohs(ic->icmp6_sequence));
+			break;
+		case ICMPV6_MGM_QUERY:
+		case ICMPV6_MGM_REPORT:
+		case ICMPV6_MGM_REDUCTION:
+			break;
+
+		case ICMPV6_PARAMPROB:
+			/* Max length: 17 "POINTER=ffffffff " */
+			nf_log_buf_add(m, "POINTER=%08x ",
+				       ntohl(ic->icmp6_pointer));
+			fallthrough;
+		case ICMPV6_DEST_UNREACH:
+		case ICMPV6_PKT_TOOBIG:
+		case ICMPV6_TIME_EXCEED:
+			/* Max length: 3+maxlen */
+			if (recurse) {
+				nf_log_buf_add(m, "[");
+				dump_ipv6_packet(net, m, info, skb,
+						 ptr + sizeof(_icmp6h), 0);
+				nf_log_buf_add(m, "] ");
+			}
+
+			/* Max length: 10 "MTU=65535 " */
+			if (ic->icmp6_type == ICMPV6_PKT_TOOBIG) {
+				nf_log_buf_add(m, "MTU=%u ",
+					       ntohl(ic->icmp6_mtu));
+			}
+		}
+		break;
+	}
+	/* Max length: 10 "PROTO=255 " */
+	default:
+		nf_log_buf_add(m, "PROTO=%u ", currenthdr);
+	}
+
+	/* Max length: 15 "UID=4294967295 " */
+	if ((logflags & NF_LOG_UID) && recurse)
+		nf_log_dump_sk_uid_gid(net, m, skb->sk);
+
+	/* Max length: 16 "MARK=0xFFFFFFFF " */
+	if (recurse && skb->mark)
+		nf_log_buf_add(m, "MARK=0x%x ", skb->mark);
+}
+
 static void dump_ipv4_mac_header(struct nf_log_buf *m,
 				 const struct nf_loginfo *info,
 				 const struct sk_buff *skb)
@@ -430,6 +674,100 @@ static struct nf_logger nf_ip_logger __read_mostly = {
 	.me		= THIS_MODULE,
 };
 
+static void dump_ipv6_mac_header(struct nf_log_buf *m,
+				 const struct nf_loginfo *info,
+				 const struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+	unsigned int logflags = 0;
+
+	if (info->type == NF_LOG_TYPE_LOG)
+		logflags = info->u.log.logflags;
+
+	if (!(logflags & NF_LOG_MACDECODE))
+		goto fallback;
+
+	switch (dev->type) {
+	case ARPHRD_ETHER:
+		nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
+			       eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
+		nf_log_dump_vlan(m, skb);
+		nf_log_buf_add(m, "MACPROTO=%04x ",
+			       ntohs(eth_hdr(skb)->h_proto));
+		return;
+	default:
+		break;
+	}
+
+fallback:
+	nf_log_buf_add(m, "MAC=");
+	if (dev->hard_header_len &&
+	    skb->mac_header != skb->network_header) {
+		const unsigned char *p = skb_mac_header(skb);
+		unsigned int len = dev->hard_header_len;
+		unsigned int i;
+
+		if (dev->type == ARPHRD_SIT) {
+			p -= ETH_HLEN;
+
+			if (p < skb->head)
+				p = NULL;
+		}
+
+		if (p) {
+			nf_log_buf_add(m, "%02x", *p++);
+			for (i = 1; i < len; i++)
+				nf_log_buf_add(m, ":%02x", *p++);
+		}
+		nf_log_buf_add(m, " ");
+
+		if (dev->type == ARPHRD_SIT) {
+			const struct iphdr *iph =
+				(struct iphdr *)skb_mac_header(skb);
+			nf_log_buf_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr,
+				       &iph->daddr);
+		}
+	} else {
+		nf_log_buf_add(m, " ");
+	}
+}
+
+static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
+			      unsigned int hooknum, const struct sk_buff *skb,
+			      const struct net_device *in,
+			      const struct net_device *out,
+			      const struct nf_loginfo *loginfo,
+			      const char *prefix)
+{
+	struct nf_log_buf *m;
+
+	/* FIXME: Disabled from containers until syslog ns is supported */
+	if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns)
+		return;
+
+	m = nf_log_buf_open();
+
+	if (!loginfo)
+		loginfo = &default_loginfo;
+
+	nf_log_dump_packet_common(m, pf, hooknum, skb, in, out,
+				  loginfo, prefix);
+
+	if (in)
+		dump_ipv6_mac_header(m, loginfo, skb);
+
+	dump_ipv6_packet(net, m, loginfo, skb, skb_network_offset(skb), 1);
+
+	nf_log_buf_close(m);
+}
+
+static struct nf_logger nf_ip6_logger __read_mostly = {
+	.name		= "nf_log_ipv6",
+	.type		= NF_LOG_TYPE_LOG,
+	.logfn		= nf_log_ip6_packet,
+	.me		= THIS_MODULE,
+};
+
 static int __net_init nf_log_syslog_net_init(struct net *net)
 {
 	int ret = nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger);
@@ -440,8 +778,15 @@ static int __net_init nf_log_syslog_net_init(struct net *net)
 	ret = nf_log_set(net, NFPROTO_ARP, &nf_arp_logger);
 	if (ret)
 		goto err1;
-err1:
+
+	ret = nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger);
+	if (ret)
+		goto err2;
+	return 0;
+err2:
 	nf_log_unset(net, &nf_arp_logger);
+err1:
+	nf_log_unset(net, &nf_ip_logger);
 	return ret;
 }
 
@@ -472,9 +817,15 @@ static int __init nf_log_syslog_init(void)
 	if (ret < 0)
 		goto err2;
 
+	ret = nf_log_register(NFPROTO_IPV6, &nf_ip6_logger);
+	if (ret < 0)
+		goto err3;
+
 	return 0;
-err2:
+err3:
 	nf_log_unregister(&nf_arp_logger);
+err2:
+	nf_log_unregister(&nf_ip_logger);
 err1:
 	pr_err("failed to register logger\n");
 	unregister_pernet_subsys(&nf_log_syslog_net_ops);
@@ -486,6 +837,7 @@ static void __exit nf_log_syslog_exit(void)
 	unregister_pernet_subsys(&nf_log_syslog_net_ops);
 	nf_log_unregister(&nf_ip_logger);
 	nf_log_unregister(&nf_arp_logger);
+	nf_log_unregister(&nf_ip6_logger);
 }
 
 module_init(nf_log_syslog_init);
@@ -496,5 +848,7 @@ MODULE_DESCRIPTION("Netfilter syslog packet logging");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("nf_log_arp");
 MODULE_ALIAS("nf_log_ipv4");
+MODULE_ALIAS("nf_log_ipv6");
 MODULE_ALIAS_NF_LOGGER(AF_INET, 0);
 MODULE_ALIAS_NF_LOGGER(3, 0);
+MODULE_ALIAS_NF_LOGGER(AF_INET6, 0);
-- 
cgit v1.2.3


From 1510618e45cb9fb77aede5544f8309b64f8b7400 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 25 Mar 2021 18:25:08 +0100
Subject: netfilter: nf_log_netdev: merge with nf_log_syslog

Provide netdev family support from the nf_log_syslog module.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/Kconfig         |  4 ---
 net/netfilter/Makefile        |  3 --
 net/netfilter/nf_log_netdev.c | 78 -------------------------------------------
 net/netfilter/nf_log_syslog.c | 36 ++++++++++++++++++++
 4 files changed, 36 insertions(+), 85 deletions(-)
 delete mode 100644 net/netfilter/nf_log_netdev.c

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index d5c047190eb9..6aef981a8446 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -74,10 +74,6 @@ config NF_CONNTRACK
 config NF_LOG_COMMON
 	tristate
 
-config NF_LOG_NETDEV
-	tristate "Netdev packet logging"
-	select NF_LOG_COMMON
-
 config NF_LOG_SYSLOG
 	tristate "Syslog packet logging"
 	default m if NETFILTER_ADVANCED=n
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 59642d9ab7a5..429be36fe4c7 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -52,9 +52,6 @@ nf_nat-y	:= nf_nat_core.o nf_nat_proto.o nf_nat_helper.o
 obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o
 obj-$(CONFIG_NF_LOG_SYSLOG) += nf_log_syslog.o
 
-# packet logging for netdev family
-obj-$(CONFIG_NF_LOG_NETDEV) += nf_log_netdev.o
-
 obj-$(CONFIG_NF_NAT) += nf_nat.o
 nf_nat-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o
 nf_nat-$(CONFIG_NF_NAT_MASQUERADE) += nf_nat_masquerade.o
diff --git a/net/netfilter/nf_log_netdev.c b/net/netfilter/nf_log_netdev.c
deleted file mode 100644
index 968dafa684c9..000000000000
--- a/net/netfilter/nf_log_netdev.c
+++ /dev/null
@@ -1,78 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * (C) 2016 by Pablo Neira Ayuso <pablo@netfilter.org>
- */
-
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/route.h>
-
-#include <linux/netfilter.h>
-#include <net/netfilter/nf_log.h>
-
-static void nf_log_netdev_packet(struct net *net, u_int8_t pf,
-				 unsigned int hooknum,
-				 const struct sk_buff *skb,
-				 const struct net_device *in,
-				 const struct net_device *out,
-				 const struct nf_loginfo *loginfo,
-				 const char *prefix)
-{
-	nf_log_l2packet(net, pf, skb->protocol, hooknum, skb, in, out,
-			loginfo, prefix);
-}
-
-static struct nf_logger nf_netdev_logger __read_mostly = {
-	.name		= "nf_log_netdev",
-	.type		= NF_LOG_TYPE_LOG,
-	.logfn		= nf_log_netdev_packet,
-	.me		= THIS_MODULE,
-};
-
-static int __net_init nf_log_netdev_net_init(struct net *net)
-{
-	return nf_log_set(net, NFPROTO_NETDEV, &nf_netdev_logger);
-}
-
-static void __net_exit nf_log_netdev_net_exit(struct net *net)
-{
-	nf_log_unset(net, &nf_netdev_logger);
-}
-
-static struct pernet_operations nf_log_netdev_net_ops = {
-	.init = nf_log_netdev_net_init,
-	.exit = nf_log_netdev_net_exit,
-};
-
-static int __init nf_log_netdev_init(void)
-{
-	int ret;
-
-	/* Request to load the real packet loggers. */
-	nf_logger_request_module(NFPROTO_IPV4, NF_LOG_TYPE_LOG);
-	nf_logger_request_module(NFPROTO_IPV6, NF_LOG_TYPE_LOG);
-	nf_logger_request_module(NFPROTO_ARP, NF_LOG_TYPE_LOG);
-
-	ret = register_pernet_subsys(&nf_log_netdev_net_ops);
-	if (ret < 0)
-		return ret;
-
-	nf_log_register(NFPROTO_NETDEV, &nf_netdev_logger);
-	return 0;
-}
-
-static void __exit nf_log_netdev_exit(void)
-{
-	unregister_pernet_subsys(&nf_log_netdev_net_ops);
-	nf_log_unregister(&nf_netdev_logger);
-}
-
-module_init(nf_log_netdev_init);
-module_exit(nf_log_netdev_exit);
-
-MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
-MODULE_DESCRIPTION("Netfilter netdev packet logging");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NF_LOGGER(5, 0); /* NFPROTO_NETDEV */
diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c
index 9ba71bc2ef84..617e0071c0c4 100644
--- a/net/netfilter/nf_log_syslog.c
+++ b/net/netfilter/nf_log_syslog.c
@@ -768,6 +768,25 @@ static struct nf_logger nf_ip6_logger __read_mostly = {
 	.me		= THIS_MODULE,
 };
 
+static void nf_log_netdev_packet(struct net *net, u_int8_t pf,
+				 unsigned int hooknum,
+				 const struct sk_buff *skb,
+				 const struct net_device *in,
+				 const struct net_device *out,
+				 const struct nf_loginfo *loginfo,
+				 const char *prefix)
+{
+	nf_log_l2packet(net, pf, skb->protocol, hooknum, skb, in, out,
+			loginfo, prefix);
+}
+
+static struct nf_logger nf_netdev_logger __read_mostly = {
+	.name		= "nf_log_netdev",
+	.type		= NF_LOG_TYPE_LOG,
+	.logfn		= nf_log_netdev_packet,
+	.me		= THIS_MODULE,
+};
+
 static int __net_init nf_log_syslog_net_init(struct net *net)
 {
 	int ret = nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger);
@@ -782,7 +801,13 @@ static int __net_init nf_log_syslog_net_init(struct net *net)
 	ret = nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger);
 	if (ret)
 		goto err2;
+
+	ret = nf_log_set(net, NFPROTO_NETDEV, &nf_netdev_logger);
+	if (ret)
+		goto err3;
 	return 0;
+err3:
+	nf_log_unset(net, &nf_ip6_logger);
 err2:
 	nf_log_unset(net, &nf_arp_logger);
 err1:
@@ -794,6 +819,8 @@ static void __net_exit nf_log_syslog_net_exit(struct net *net)
 {
 	nf_log_unset(net, &nf_ip_logger);
 	nf_log_unset(net, &nf_arp_logger);
+	nf_log_unset(net, &nf_ip6_logger);
+	nf_log_unset(net, &nf_netdev_logger);
 }
 
 static struct pernet_operations nf_log_syslog_net_ops = {
@@ -821,7 +848,13 @@ static int __init nf_log_syslog_init(void)
 	if (ret < 0)
 		goto err3;
 
+	ret = nf_log_register(NFPROTO_NETDEV, &nf_netdev_logger);
+	if (ret < 0)
+		goto err4;
+
 	return 0;
+err4:
+	nf_log_unregister(&nf_ip6_logger);
 err3:
 	nf_log_unregister(&nf_arp_logger);
 err2:
@@ -838,6 +871,7 @@ static void __exit nf_log_syslog_exit(void)
 	nf_log_unregister(&nf_ip_logger);
 	nf_log_unregister(&nf_arp_logger);
 	nf_log_unregister(&nf_ip6_logger);
+	nf_log_unregister(&nf_netdev_logger);
 }
 
 module_init(nf_log_syslog_init);
@@ -849,6 +883,8 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS("nf_log_arp");
 MODULE_ALIAS("nf_log_ipv4");
 MODULE_ALIAS("nf_log_ipv6");
+MODULE_ALIAS("nf_log_netdev");
 MODULE_ALIAS_NF_LOGGER(AF_INET, 0);
 MODULE_ALIAS_NF_LOGGER(3, 0);
+MODULE_ALIAS_NF_LOGGER(5, 0); /* NFPROTO_NETDEV */
 MODULE_ALIAS_NF_LOGGER(AF_INET6, 0);
-- 
cgit v1.2.3


From 0f1b2a4912b270395d4f61bfcc1ff8d4903dde4a Mon Sep 17 00:00:00 2001
From: Wang Qing <wangqing@vivo.com>
Date: Tue, 30 Mar 2021 15:02:44 +0800
Subject: mips/sgi-ip27: Delete obsolete TODO file

The TODO file here has not been updated for 15 years, and the function
development described in the file have been implemented or abandoned.

Its existence will mislead developers seeking to view outdated information.

Signed-off-by: Wang Qing <wangqing@vivo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/mips/sgi-ip27/TODO | 19 -------------------
 1 file changed, 19 deletions(-)
 delete mode 100644 arch/mips/sgi-ip27/TODO

diff --git a/arch/mips/sgi-ip27/TODO b/arch/mips/sgi-ip27/TODO
deleted file mode 100644
index 160857ff1483..000000000000
--- a/arch/mips/sgi-ip27/TODO
+++ /dev/null
@@ -1,19 +0,0 @@
-1. Need to figure out why PCI writes to the IOC3 hang, and if it is okay
-not to write to the IOC3 ever.
-2. Need to figure out RRB allocation in bridge_startup().
-3. Need to figure out why address swaizzling is needed in inw/outw for
-Qlogic scsi controllers.
-4. Need to integrate ip27-klconfig.c:find_lboard and
-ip27-init.c:find_lbaord_real. DONE
-5. Is it okay to set calias space on all nodes as 0, instead of 8k as
-in irix?
-6. Investigate why things do not work without the setup_test() call
-being invoked on all nodes in ip27-memory.c.
-8. Too many do_page_faults invoked - investigate.
-9. start_thread must turn off UX64 ... and define tlb_refill_debug.
-10. Need a bad pmd table, bad pte table. __bad_pmd_table/__bad_pagetable
-does not agree with pgd_bad/pmd_bad.
-11. All intrs (ip27_do_irq handlers) are targeted at cpu A on the node.
-This might need to change later. Only the timer intr is set up to be
-received on both Cpu A and B. (ip27_do_irq()/bridge_startup())
-13. Cache flushing (specially the SMP version) has to be investigated.
-- 
cgit v1.2.3


From ef843f261b881aaf5a8e4fa90815a97b37dab1f2 Mon Sep 17 00:00:00 2001
From: Wang Qing <wangqing@vivo.com>
Date: Tue, 30 Mar 2021 15:02:45 +0800
Subject: scsi/aacraid: Delete obsolete TODO file

The TODO file here has not been updated from 2.6.12 for more than 15 years.
Its existence will mislead developers seeking to view outdated information.

Signed-off-by: Wang Qing <wangqing@vivo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/scsi/aacraid/TODO | 3 ---
 1 file changed, 3 deletions(-)
 delete mode 100644 drivers/scsi/aacraid/TODO

diff --git a/drivers/scsi/aacraid/TODO b/drivers/scsi/aacraid/TODO
deleted file mode 100644
index 78dc863eff4f..000000000000
--- a/drivers/scsi/aacraid/TODO
+++ /dev/null
@@ -1,3 +0,0 @@
-o	Testing
-o	More testing
-o	I/O size increase
-- 
cgit v1.2.3


From 22612b4e60397c038ef91ed06e741f1063d6a56a Mon Sep 17 00:00:00 2001
From: Wang Qing <wangqing@vivo.com>
Date: Tue, 30 Mar 2021 15:02:46 +0800
Subject: fs/befs: Delete obsolete TODO file

The TODO file here has not been updated from 2005, and the function
development described in the file have been implemented or abandoned.

Its existence will mislead developers seeking to view outdated information.

Signed-off-by: Wang Qing <wangqing@vivo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/befs/TODO | 14 --------------
 1 file changed, 14 deletions(-)
 delete mode 100644 fs/befs/TODO

diff --git a/fs/befs/TODO b/fs/befs/TODO
deleted file mode 100644
index 3250921aa2e6..000000000000
--- a/fs/befs/TODO
+++ /dev/null
@@ -1,14 +0,0 @@
-TODO
-==========
-
-* Convert comments to the Kernel-Doc format.
-
-* Befs_fs.h has gotten big and messy. No reason not to break it up into 
-	smaller peices.
-
-* See if Alexander Viro's option parser made it into the kernel tree. 
-	Use that if we can. (include/linux/parser.h)
-
-* See if we really need separate types for on-disk and in-memory 
-	representations of the superblock and inode.
-
-- 
cgit v1.2.3


From ab36ba4f3a81c5562cabab9988c6aa8ef65a5543 Mon Sep 17 00:00:00 2001
From: Wang Qing <wangqing@vivo.com>
Date: Tue, 30 Mar 2021 15:02:47 +0800
Subject: fs/jffs2: Delete obsolete TODO file

The TODO file here has not been updated for 14 years, and the function
development described in the file have been implemented or abandoned.

Its existence will mislead developers seeking to view outdated information.

Signed-off-by: Wang Qing <wangqing@vivo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/jffs2/TODO | 37 -------------------------------------
 1 file changed, 37 deletions(-)
 delete mode 100644 fs/jffs2/TODO

diff --git a/fs/jffs2/TODO b/fs/jffs2/TODO
deleted file mode 100644
index ca28964abd4b..000000000000
--- a/fs/jffs2/TODO
+++ /dev/null
@@ -1,37 +0,0 @@
-
- - support asynchronous operation -- add a per-fs 'reserved_space' count,
-   let each outstanding write reserve the _maximum_ amount of physical
-   space it could take. Let GC flush the outstanding writes because the
-   reservations will necessarily be pessimistic. With this we could even
-   do shared writable mmap, if we can have a fs hook for do_wp_page() to
-   make the reservation.
- - disable compression in commit_write()?
- - fine-tune the allocation / GC thresholds
- - chattr support - turning on/off and tuning compression per-inode
- - checkpointing (do we need this? scan is quite fast)
- - make the scan code populate real inodes so read_inode just after 
-	mount doesn't have to read the flash twice for large files.
-	Make this a per-inode option, changeable with chattr, so you can
-	decide which inodes should be in-core immediately after mount.
- - test, test, test
-
- - NAND flash support:
-	- almost done :)
-	- use bad block check instead of the hardwired byte check
-
- - Optimisations:
-   - Split writes so they go to two separate blocks rather than just c->nextblock.
-	By writing _new_ nodes to one block, and garbage-collected REF_PRISTINE
-	nodes to a different one, we can separate clean nodes from those which
-	are likely to become dirty, and end up with blocks which are each far
-	closer to 100% or 0% clean, hence speeding up later GC progress dramatically.
-   - Stop keeping name in-core with struct jffs2_full_dirent. If we keep the hash in 
-     the full dirent, we only need to go to the flash in lookup() when we think we've
-     got a match, and in readdir(). 
-   - Doubly-linked next_in_ino list to allow us to free obsoleted raw_node_refs immediately?
-   - Remove size from jffs2_raw_node_frag. 
-
-dedekind:
-1. __jffs2_flush_wbuf() has a strange 'pad' parameter. Eliminate.
-2. get_sb()->build_fs()->scan() path... Why get_sb() removes scan()'s crap in
-   case of failure? scan() does not clean everything. Fix.
-- 
cgit v1.2.3


From 8d9e5bbf5c68c25ab2d93e910a258ec7193df1e2 Mon Sep 17 00:00:00 2001
From: Wang Qing <wangqing@vivo.com>
Date: Tue, 30 Mar 2021 15:02:48 +0800
Subject: net/ax25: Delete obsolete TODO file

The TODO file here has not been updated for 13 years, and the function
development described in the file have been implemented or abandoned.

Its existence will mislead developers seeking to view outdated information.

Signed-off-by: Wang Qing <wangqing@vivo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ax25/TODO | 20 --------------------
 1 file changed, 20 deletions(-)
 delete mode 100644 net/ax25/TODO

diff --git a/net/ax25/TODO b/net/ax25/TODO
deleted file mode 100644
index 69fb4e368d92..000000000000
--- a/net/ax25/TODO
+++ /dev/null
@@ -1,20 +0,0 @@
-Do the ax25_list_lock, ax25_dev_lock, linkfail_lockreally, ax25_frag_lock and
-listen_lock have to be bh-safe?
-
-Do the netrom and rose locks have to be bh-safe?
-
-A device might be deleted after lookup in the SIOCADDRT ioctl but before it's
-being used.
-
-Routes to a device being taken down might be deleted by ax25_rt_device_down
-but added by somebody else before the device has been deleted fully.
-
-The ax25_rt_find_route synopsys is pervert but I somehow had to deal with
-the race caused by the static variable in it's previous implementation.
-
-Implement proper socket locking in netrom and rose.
-
-Check socket locking when ax25_rcv is sending to raw sockets.  In particular
-ax25_send_to_raw() seems fishy.  Heck - ax25_rcv is fishy.
-
-Handle XID and TEST frames properly.
-- 
cgit v1.2.3


From b9aa074b896b8d33269d085f360cd614c36e52d2 Mon Sep 17 00:00:00 2001
From: Wang Qing <wangqing@vivo.com>
Date: Tue, 30 Mar 2021 15:02:49 +0800
Subject: net/decnet: Delete obsolete TODO file

The TODO file here has not been updated from 2005, and the function
development described in the file have been implemented or abandoned.

Its existence will mislead developers seeking to view outdated information.

Signed-off-by: Wang Qing <wangqing@vivo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/TODO | 40 ----------------------------------------
 1 file changed, 40 deletions(-)
 delete mode 100644 net/decnet/TODO

diff --git a/net/decnet/TODO b/net/decnet/TODO
deleted file mode 100644
index 358e9eb49016..000000000000
--- a/net/decnet/TODO
+++ /dev/null
@@ -1,40 +0,0 @@
-Steve's quick list of things that need finishing off:
-[they are in no particular order and range from the trivial to the long winded]
-
- o Proper timeouts on each neighbour (in routing mode) rather than
-   just the 60 second On-Ethernet cache value.
-
- o Support for X.25 linklayer
-
- o Support for DDCMP link layer
-
- o The DDCMP device itself
-
- o PPP support (rfc1762)
-
- o Lots of testing with real applications
-
- o Verify errors etc. against POSIX 1003.1g (draft)
-
- o Using send/recvmsg() to get at connect/disconnect data (POSIX 1003.1g)
-   [maybe this should be done at socket level... the control data in the
-    send/recvmsg() calls should simply be a vector of set/getsockopt()
-    calls]
-
- o check MSG_CTRUNC is set where it should be.
-
- o Find all the commonality between DECnet and IPv4 routing code and extract
-   it into a small library of routines. [probably a project for 2.7.xx]
-
- o Add perfect socket hashing - an idea suggested by Paul Koning. Currently
-   we have a half-way house scheme which seems to work reasonably well, but
-   the full scheme is still worth implementing, its not not top of my list
-   right now.
-
- o Add session control message flow control
-
- o Add NSP message flow control
-
- o DECnet sendpages() function
-
- o AIO for DECnet
-- 
cgit v1.2.3


From 059ff70c8cabe0b9d170033ae82bc052aeaac059 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 30 Mar 2021 15:27:53 +0800
Subject: net: i40e: remove repeated words

Remove repeated words "to" and "try".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 0f84ed0143e4..1555d6009bf5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -7339,7 +7339,7 @@ static void i40e_vsi_set_default_tc_config(struct i40e_vsi *vsi)
 	qcount = min_t(int, vsi->alloc_queue_pairs,
 		       i40e_pf_get_max_q_per_tc(vsi->back));
 	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
-		/* For the TC that is not enabled set the offset to to default
+		/* For the TC that is not enabled set the offset to default
 		 * queue and allocate one queue for the given TC.
 		 */
 		vsi->tc_config.tc_info[i].qoffset = 0;
@@ -10625,7 +10625,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
 	 * need to rebuild the switch model in the HW.
 	 *
 	 * If there were VEBs but the reconstitution failed, we'll try
-	 * try to recover minimal use by getting the basic PF VSI working.
+	 * to recover minimal use by getting the basic PF VSI working.
 	 */
 	if (vsi->uplink_seid != pf->mac_seid) {
 		dev_dbg(&pf->pdev->dev, "attempting to rebuild switch\n");
-- 
cgit v1.2.3


From 252b5d373564c37e35c982bdfa473ebe0dd5f30e Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 30 Mar 2021 15:27:54 +0800
Subject: net: bonding: remove repeated word

Remove repeated word "that".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_alb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index c3091e00dd5f..3455f2cc13f2 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -1098,7 +1098,7 @@ static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1,
  * If @slave's permanent hw address is different both from its current
  * address and from @bond's address, then somewhere in the bond there's
  * a slave that has @slave's permanet address as its current address.
- * We'll make sure that that slave no longer uses @slave's permanent address.
+ * We'll make sure that slave no longer uses @slave's permanent address.
  *
  * Caller must hold RTNL and no other locks
  */
-- 
cgit v1.2.3


From fec76125baf7390e1ad1354e32ee6b2f986cd01c Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 30 Mar 2021 15:27:55 +0800
Subject: net: phy: remove repeated word

Remove repeated word "to".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio_bus.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 823518554079..dadf75ff3ab9 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -510,7 +510,7 @@ static int mdiobus_create_device(struct mii_bus *bus,
  *   on a given bus, and attach them to the bus. Drivers should use
  *   mdiobus_register() rather than __mdiobus_register() unless they
  *   need to pass a specific owner module. MDIO devices which are not
- *   PHYs will not be brought up by this function. They are expected to
+ *   PHYs will not be brought up by this function. They are expected
  *   to be explicitly listed in DT and instantiated by of_mdiobus_register().
  *
  * Returns 0 on success or < 0 on error.
-- 
cgit v1.2.3


From 497abc87cf9969601f4213d9db0e2f90eb044088 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Tue, 30 Mar 2021 15:27:56 +0800
Subject: net: ipa: remove repeated words

Remove repeated words "that" and "the".

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Acked-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_endpoint.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index 38e83cd467b5..dd24179383c1 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -809,7 +809,7 @@ static u32 hol_block_timer_val(struct ipa *ipa, u32 microseconds)
 	 * The best precision is achieved when the base value is as
 	 * large as possible.  Find the highest set bit in the tick
 	 * count, and extract the number of bits in the base field
-	 * such that that high bit is included.
+	 * such that high bit is included.
 	 */
 	high = fls(ticks);		/* 1..32 */
 	width = HWEIGHT32(BASE_VALUE_FMASK);
@@ -1448,7 +1448,7 @@ static int ipa_endpoint_reset_rx_aggr(struct ipa_endpoint *endpoint)
 	if (ret)
 		goto out_suspend_again;
 
-	/* Finally, reset and reconfigure the channel again (re-enabling the
+	/* Finally, reset and reconfigure the channel again (re-enabling
 	 * the doorbell engine if appropriate).  Sleep for 1 millisecond to
 	 * complete the channel reset sequence.  Finish by suspending the
 	 * channel again (if necessary).
-- 
cgit v1.2.3


From dc5fa2073f63965faad232af0adf9b7e50b5f340 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Tue, 30 Mar 2021 17:51:30 +0800
Subject: ppp: deflate: Remove useless call "zlib_inflateEnd"

Fix the following whitescan warning:

Calling "zlib_inflateEnd(&state->strm)" is only useful for its return
value, which is ignored.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp/ppp_deflate.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ppp/ppp_deflate.c b/drivers/net/ppp/ppp_deflate.c
index c457f849e553..e6d48e5c65a3 100644
--- a/drivers/net/ppp/ppp_deflate.c
+++ b/drivers/net/ppp/ppp_deflate.c
@@ -279,7 +279,6 @@ static void z_decomp_free(void *arg)
 	struct ppp_deflate_state *state = (struct ppp_deflate_state *) arg;
 
 	if (state) {
-		zlib_inflateEnd(&state->strm);
 		vfree(state->strm.workspace);
 		kfree(state);
 	}
-- 
cgit v1.2.3


From 000ac44da7d0adfc5e62e6c019246a4afeeffd04 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 30 Mar 2021 12:28:49 +0200
Subject: udp: fixup csum for GSO receive slow path

When UDP packets generated locally by a socket with UDP_SEGMENT
traverse the following path:

UDP tunnel(xmit) -> veth (segmentation) -> veth (gro) ->
	UDP tunnel (rx) -> UDP socket (no UDP_GRO)

ip_summed will be set to CHECKSUM_PARTIAL at creation time and
such checksum mode will be preserved in the above path up to the
UDP tunnel receive code where we have:

 __iptunnel_pull_header() -> skb_pull_rcsum() ->
skb_postpull_rcsum() -> __skb_postpull_rcsum()

The latter will convert the skb to CHECKSUM_NONE.

The UDP GSO packet will be later segmented as part of the rx socket
receive operation, and will present a CHECKSUM_NONE after segmentation.

Additionally the segmented packets UDP CB still refers to the original
GSO packet len. Overall that causes unexpected/wrong csum validation
errors later in the UDP receive path.

We could possibly address the issue with some additional checks and
csum mangling in the UDP tunnel code. Since the issue affects only
this UDP receive slow path, let's set a suitable csum status there.

Note that SKB_GSO_UDP_L4 or SKB_GSO_FRAGLIST packets lacking an UDP
encapsulation present a valid checksum when landing to udp_queue_rcv_skb(),
as the UDP checksum has been validated by the GRO engine.

v2 -> v3:
 - even more verbose commit message and comments

v1 -> v2:
 - restrict the csum update to the packets strictly needing them
 - hopefully clarify the commit message and code comments

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp.h | 23 +++++++++++++++++++++++
 net/ipv4/udp.c    |  2 ++
 net/ipv6/udp.c    |  1 +
 3 files changed, 26 insertions(+)

diff --git a/include/net/udp.h b/include/net/udp.h
index d4d064c59232..adf2ff8ac87c 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -515,6 +515,29 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
 	return segs;
 }
 
+static inline void udp_post_segment_fix_csum(struct sk_buff *skb)
+{
+	/* UDP-lite can't land here - no GRO */
+	WARN_ON_ONCE(UDP_SKB_CB(skb)->partial_cov);
+
+	/* UDP packets generated with UDP_SEGMENT and traversing:
+	 *
+	 * UDP tunnel(xmit) -> veth (segmentation) -> veth (gro) -> UDP tunnel (rx)
+	 *
+	 * can reach an UDP socket with CHECKSUM_NONE, because
+	 * __iptunnel_pull_header() converts CHECKSUM_PARTIAL into NONE.
+	 * SKB_GSO_UDP_L4 or SKB_GSO_FRAGLIST packets with no UDP tunnel will
+	 * have a valid checksum, as the GRO engine validates the UDP csum
+	 * before the aggregation and nobody strips such info in between.
+	 * Instead of adding another check in the tunnel fastpath, we can force
+	 * a valid csum after the segmentation.
+	 * Additionally fixup the UDP CB.
+	 */
+	UDP_SKB_CB(skb)->cscov = skb->len;
+	if (skb->ip_summed == CHECKSUM_NONE && !skb->csum_valid)
+		skb->csum_valid = 1;
+}
+
 #ifdef CONFIG_BPF_SYSCALL
 struct sk_psock;
 struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 4a0478b17243..fe85dcf8c008 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2178,6 +2178,8 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	segs = udp_rcv_segment(sk, skb, true);
 	skb_list_walk_safe(segs, skb, next) {
 		__skb_pull(skb, skb_transport_offset(skb));
+
+		udp_post_segment_fix_csum(skb);
 		ret = udp_queue_rcv_one_skb(sk, skb);
 		if (ret > 0)
 			ip_protocol_deliver_rcu(dev_net(skb->dev), skb, ret);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index d25e5a9252fd..fa2f54738392 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -749,6 +749,7 @@ static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	skb_list_walk_safe(segs, skb, next) {
 		__skb_pull(skb, skb_transport_offset(skb));
 
+		udp_post_segment_fix_csum(skb);
 		ret = udpv6_queue_rcv_one_skb(sk, skb);
 		if (ret > 0)
 			ip6_protocol_deliver_rcu(dev_net(skb->dev), skb, ret,
-- 
cgit v1.2.3


From 18f25dc399901426dff61e676ba603ff52c666f7 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 30 Mar 2021 12:28:50 +0200
Subject: udp: skip L4 aggregation for UDP tunnel packets

If NETIF_F_GRO_FRAGLIST or NETIF_F_GRO_UDP_FWD are enabled, and there
are UDP tunnels available in the system, udp_gro_receive() could end-up
doing L4 aggregation (either SKB_GSO_UDP_L4 or SKB_GSO_FRAGLIST) at
the outer UDP tunnel level for packets effectively carrying and UDP
tunnel header.

That could cause inner protocol corruption. If e.g. the relevant
packets carry a vxlan header, different vxlan ids will be ignored/
aggregated to the same GSO packet. Inner headers will be ignored, too,
so that e.g. TCP over vxlan push packets will be held in the GRO
engine till the next flush, etc.

Just skip the SKB_GSO_UDP_L4 and SKB_GSO_FRAGLIST code path if the
current packet could land in a UDP tunnel, and let udp_gro_receive()
do GRO via udp_sk(sk)->gro_receive.

The check implemented in this patch is broader than what is strictly
needed, as the existing UDP tunnel could be e.g. configured on top of
a different device: we could end-up skipping GRO at-all for some packets.

Anyhow, that is a very thin corner case and covering it will add quite
a bit of complexity.

v1 -> v2:
 - hopefully clarify the commit message

Fixes: 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.")
Fixes: 36707061d6ba ("udp: allow forwarding of plain (non-fraglisted) UDP GRO packets")
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp_offload.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index c5b4b586570f..25134a3548e9 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -515,21 +515,24 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
 	unsigned int off = skb_gro_offset(skb);
 	int flush = 1;
 
+	/* we can do L4 aggregation only if the packet can't land in a tunnel
+	 * otherwise we could corrupt the inner stream
+	 */
 	NAPI_GRO_CB(skb)->is_flist = 0;
-	if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
-		NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled: 1;
+	if (!sk || !udp_sk(sk)->gro_receive) {
+		if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
+			NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled : 1;
 
-	if ((!sk && (skb->dev->features & NETIF_F_GRO_UDP_FWD)) ||
-	    (sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) {
-		pp = call_gro_receive(udp_gro_receive_segment, head, skb);
+		if ((!sk && (skb->dev->features & NETIF_F_GRO_UDP_FWD)) ||
+		    (sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist)
+			pp = call_gro_receive(udp_gro_receive_segment, head, skb);
 		return pp;
 	}
 
-	if (!sk || NAPI_GRO_CB(skb)->encap_mark ||
+	if (NAPI_GRO_CB(skb)->encap_mark ||
 	    (uh->check && skb->ip_summed != CHECKSUM_PARTIAL &&
 	     NAPI_GRO_CB(skb)->csum_cnt == 0 &&
-	     !NAPI_GRO_CB(skb)->csum_valid) ||
-	    !udp_sk(sk)->gro_receive)
+	     !NAPI_GRO_CB(skb)->csum_valid))
 		goto out;
 
 	/* mark that this skb passed once through the tunnel gro layer */
-- 
cgit v1.2.3


From e0e3070a9bc9bb3d3e00b37da872ed5b0adbd077 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 30 Mar 2021 12:28:51 +0200
Subject: udp: properly complete L4 GRO over UDP tunnel packet

After the previous patch, the stack can do L4 UDP aggregation
on top of a UDP tunnel.

In such scenario, udp{4,6}_gro_complete will be called twice. This function
will enter its is_flist branch immediately, even though that is only
correct on the second call, as GSO_FRAGLIST is only relevant for the
inner packet.

Instead, we need to try first UDP tunnel-based aggregation, if the GRO
packet requires that.

This patch changes udp{4,6}_gro_complete to skip the frag list processing
when while encap_mark == 1, identifying processing of the outer tunnel
header.
Additionally, clears the field in udp_gro_complete() so that we can enter
the frag list path on the next round, for the inner header.

v1 -> v2:
 - hopefully clarified the commit message

Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp_offload.c | 8 +++++++-
 net/ipv6/udp_offload.c | 3 ++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 25134a3548e9..54e06b88af69 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -642,6 +642,11 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
 		skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM
 					: SKB_GSO_UDP_TUNNEL;
 
+		/* clear the encap mark, so that inner frag_list gro_complete
+		 * can take place
+		 */
+		NAPI_GRO_CB(skb)->encap_mark = 0;
+
 		/* Set encapsulation before calling into inner gro_complete()
 		 * functions to make them set up the inner offsets.
 		 */
@@ -665,7 +670,8 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
 	const struct iphdr *iph = ip_hdr(skb);
 	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
 
-	if (NAPI_GRO_CB(skb)->is_flist) {
+	/* do fraglist only if there is no outer UDP encap (or we already processed it) */
+	if (NAPI_GRO_CB(skb)->is_flist && !NAPI_GRO_CB(skb)->encap_mark) {
 		uh->len = htons(skb->len - nhoff);
 
 		skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index faa823c24292..b3d9ed96e5ea 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -163,7 +163,8 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
 	const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
 
-	if (NAPI_GRO_CB(skb)->is_flist) {
+	/* do fraglist only if there is no outer UDP encap (or we already processed it) */
+	if (NAPI_GRO_CB(skb)->is_flist && !NAPI_GRO_CB(skb)->encap_mark) {
 		uh->len = htons(skb->len - nhoff);
 
 		skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
-- 
cgit v1.2.3


From 78352f73dc5047f3f744764cc45912498c52f3c9 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 30 Mar 2021 12:28:52 +0200
Subject: udp: never accept GSO_FRAGLIST packets

Currently the UDP protocol delivers GSO_FRAGLIST packets to
the sockets without the expected segmentation.

This change addresses the issue introducing and maintaining
a couple of new fields to explicitly accept SKB_GSO_UDP_L4
or GSO_FRAGLIST packets. Additionally updates  udp_unexpected_gso()
accordingly.

UDP sockets enabling UDP_GRO stil keep accept_udp_fraglist
zeroed.

v1 -> v2:
 - use 2 bits instead of a whole GSO bitmask (Willem)

Fixes: 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/udp.h | 16 +++++++++++++---
 net/ipv4/udp.c      |  3 +++
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/include/linux/udp.h b/include/linux/udp.h
index aa84597bdc33..ae58ff3b6b5b 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -51,7 +51,9 @@ struct udp_sock {
 					   * different encapsulation layer set
 					   * this
 					   */
-			 gro_enabled:1;	/* Can accept GRO packets */
+			 gro_enabled:1,	/* Request GRO aggregation */
+			 accept_udp_l4:1,
+			 accept_udp_fraglist:1;
 	/*
 	 * Following member retains the information to create a UDP header
 	 * when the socket is uncorked.
@@ -131,8 +133,16 @@ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk,
 
 static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
 {
-	return !udp_sk(sk)->gro_enabled && skb_is_gso(skb) &&
-	       skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4;
+	if (!skb_is_gso(skb))
+		return false;
+
+	if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && !udp_sk(sk)->accept_udp_l4)
+		return true;
+
+	if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && !udp_sk(sk)->accept_udp_fraglist)
+		return true;
+
+	return false;
 }
 
 #define udp_portaddr_for_each_entry(__sk, list) \
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index fe85dcf8c008..c0695ce42dc5 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2666,9 +2666,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 
 	case UDP_GRO:
 		lock_sock(sk);
+
+		/* when enabling GRO, accept the related GSO packet type */
 		if (valbool)
 			udp_tunnel_encap_enable(sk->sk_socket);
 		up->gro_enabled = valbool;
+		up->accept_udp_l4 = valbool;
 		release_sock(sk);
 		break;
 
-- 
cgit v1.2.3


From d18931a92a0b5feddd8a39d097b90ae2867db02f Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 30 Mar 2021 12:28:53 +0200
Subject: vxlan: allow L4 GRO passthrough

When passing up an UDP GSO packet with L4 aggregation, there is
no need to segment it at the vxlan level. We can propagate the
packet untouched and let it be segmented later, if needed.

Introduce an helper to allow let the UDP socket to accept any
L4 aggregation and use it in the vxlan driver.

v1 -> v2:
 - updated to use the newly introduced UDP socket 'accept*' fields

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c | 1 +
 include/linux/udp.h | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 7665817f3cb6..39ee1300cdd9 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -3484,6 +3484,7 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
 	if (err < 0)
 		return ERR_PTR(err);
 
+	udp_allow_gso(sock->sk);
 	return sock;
 }
 
diff --git a/include/linux/udp.h b/include/linux/udp.h
index ae58ff3b6b5b..ae66dadd8543 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -145,6 +145,12 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
 	return false;
 }
 
+static inline void udp_allow_gso(struct sock *sk)
+{
+	udp_sk(sk)->accept_udp_l4 = 1;
+	udp_sk(sk)->accept_udp_fraglist = 1;
+}
+
 #define udp_portaddr_for_each_entry(__sk, list) \
 	hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node)
 
-- 
cgit v1.2.3


From 61630c4f052b197191d1d8f3a749d204a5cbdeae Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 30 Mar 2021 12:28:54 +0200
Subject: geneve: allow UDP L4 GRO passthrou

Similar to the previous commit, let even geneve
passthrou the L4 GRO packets

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/geneve.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 4ac0373326ef..5d7a2b1469f4 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -461,6 +461,7 @@ static struct socket *geneve_create_sock(struct net *net, bool ipv6,
 	if (err < 0)
 		return ERR_PTR(err);
 
+	udp_allow_gso(sock->sk);
 	return sock;
 }
 
-- 
cgit v1.2.3


From b03ef676ba6dc53c728e936a1709d4bf35362184 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 30 Mar 2021 12:28:55 +0200
Subject: bareudp: allow UDP L4 GRO passthrou

Similar to the previous commit, let even geneve
passthrou the L4 GRO packets

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bareudp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c
index 7511bca9c15e..edfad93e7b68 100644
--- a/drivers/net/bareudp.c
+++ b/drivers/net/bareudp.c
@@ -218,6 +218,7 @@ static struct socket *bareudp_create_sock(struct net *net, __be16 port)
 	if (err < 0)
 		return ERR_PTR(err);
 
+	udp_allow_gso(sock->sk);
 	return sock;
 }
 
-- 
cgit v1.2.3


From a062260a9d5fce22977744ed6dab64b7b1508ab3 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 30 Mar 2021 12:28:56 +0200
Subject: selftests: net: add UDP GRO forwarding self-tests

Create a bunch of virtual topologies and verify that
NETIF_F_GRO_FRAGLIST or NETIF_F_GRO_UDP_FWD-enabled
devices aggregate the ingress packets as expected.
Additionally check that the aggregate packets are
segmented correctly when landing on a socket

Also test SKB_GSO_FRAGLIST and SKB_GSO_UDP_L4 aggregation
on top of UDP tunnel (vxlan)

v1 -> v2:
 - hopefully clarify the commit message
 - moved the overlay network ipv6 range into the 'documentation'
   reserved range (Willem)

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/Makefile      |   1 +
 tools/testing/selftests/net/udpgro_fwd.sh | 251 ++++++++++++++++++++++++++++++
 2 files changed, 252 insertions(+)
 create mode 100755 tools/testing/selftests/net/udpgro_fwd.sh

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 25f198bec0b2..2d71b283dde3 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -23,6 +23,7 @@ TEST_PROGS += drop_monitor_tests.sh
 TEST_PROGS += vrf_route_leaking.sh
 TEST_PROGS += bareudp.sh
 TEST_PROGS += unicast_extensions.sh
+TEST_PROGS += udpgro_fwd.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket nettest
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
new file mode 100755
index 000000000000..a8fa64136282
--- /dev/null
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -0,0 +1,251 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+readonly BASE="ns-$(mktemp -u XXXXXX)"
+readonly SRC=2
+readonly DST=1
+readonly DST_NAT=100
+readonly NS_SRC=$BASE$SRC
+readonly NS_DST=$BASE$DST
+
+# "baremetal" network used for raw UDP traffic
+readonly BM_NET_V4=192.168.1.
+readonly BM_NET_V6=2001:db8::
+
+# "overlay" network used for UDP over UDP tunnel traffic
+readonly OL_NET_V4=172.16.1.
+readonly OL_NET_V6=2001:db8:1::
+readonly NPROCS=`nproc`
+
+cleanup() {
+	local ns
+	local -r jobs="$(jobs -p)"
+	[ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null
+
+	for ns in $NS_SRC $NS_DST; do
+		ip netns del $ns 2>/dev/null
+	done
+}
+
+trap cleanup EXIT
+
+create_ns() {
+	local net
+	local ns
+
+	for ns in $NS_SRC $NS_DST; do
+		ip netns add $ns
+		ip -n $ns link set dev lo up
+	done
+
+	ip link add name veth$SRC type veth peer name veth$DST
+
+	for ns in $SRC $DST; do
+		ip link set dev veth$ns netns $BASE$ns
+		ip -n $BASE$ns link set dev veth$ns up
+		ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24
+		ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad
+	done
+	ip -n $NS_DST link set veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null
+}
+
+create_vxlan_endpoint() {
+	local -r netns=$1
+	local -r bm_dev=$2
+	local -r bm_rem_addr=$3
+	local -r vxlan_dev=$4
+	local -r vxlan_id=$5
+	local -r vxlan_port=4789
+
+	ip -n $netns link set dev $bm_dev up
+	ip -n $netns link add dev $vxlan_dev type vxlan id $vxlan_id \
+				dstport $vxlan_port remote $bm_rem_addr
+	ip -n $netns link set dev $vxlan_dev up
+}
+
+create_vxlan_pair() {
+	local ns
+
+	create_ns
+
+	for ns in $SRC $DST; do
+		# note that 3 - $SRC == $DST and 3 - $DST == $SRC
+		create_vxlan_endpoint $BASE$ns veth$ns $BM_NET_V4$((3 - $ns)) vxlan$ns 4
+		ip -n $BASE$ns addr add dev vxlan$ns $OL_NET_V4$ns/24
+	done
+	for ns in $SRC $DST; do
+		create_vxlan_endpoint $BASE$ns veth$ns $BM_NET_V6$((3 - $ns)) vxlan6$ns 6
+		ip -n $BASE$ns addr add dev vxlan6$ns $OL_NET_V6$ns/24 nodad
+	done
+}
+
+is_ipv6() {
+	if [[ $1 =~ .*:.* ]]; then
+		return 0
+	fi
+	return 1
+}
+
+run_test() {
+	local -r msg=$1
+	local -r dst=$2
+	local -r pkts=$3
+	local -r vxpkts=$4
+	local bind=$5
+	local rx_args=""
+	local rx_family="-4"
+	local family=-4
+	local filter=IpInReceives
+	local ipt=iptables
+
+	printf "%-40s" "$msg"
+
+	if is_ipv6 $dst; then
+		# rx program does not support '-6' and implies ipv6 usage by default
+		rx_family=""
+		family=-6
+		filter=Ip6InReceives
+		ipt=ip6tables
+	fi
+
+	rx_args="$rx_family"
+	[ -n "$bind" ] && rx_args="$rx_args -b $bind"
+
+	# send a single GSO packet, segmented in 10 UDP frames.
+	# Always expect 10 UDP frames on RX side as rx socket does
+	# not enable GRO
+	ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 4789
+	ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 8000
+	ip netns exec $NS_DST ./udpgso_bench_rx -C 1000 -R 10 -n 10 -l 1300 $rx_args &
+	local spid=$!
+	sleep 0.1
+	ip netns exec $NS_SRC ./udpgso_bench_tx $family -M 1 -s 13000 -S 1300 -D $dst
+	local retc=$?
+	wait $spid
+	local rets=$?
+	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+		echo " fail client exit code $retc, server $rets"
+		ret=1
+		return
+	fi
+
+	local rcv=`ip netns exec $NS_DST $ipt"-save" -c | grep 'dport 8000' | \
+							  sed -e 's/\[//' -e 's/:.*//'`
+	if [ $rcv != $pkts ]; then
+		echo " fail - received $rvs packets, expected $pkts"
+		ret=1
+		return
+	fi
+
+	local vxrcv=`ip netns exec $NS_DST $ipt"-save" -c | grep 'dport 4789' | \
+							    sed -e 's/\[//' -e 's/:.*//'`
+
+	# upper net can generate a little noise, allow some tolerance
+	if [ $vxrcv -lt $vxpkts -o $vxrcv -gt $((vxpkts + 3)) ]; then
+		echo " fail - received $vxrcv vxlan packets, expected $vxpkts"
+		ret=1
+		return
+	fi
+	echo " ok"
+}
+
+run_bench() {
+	local -r msg=$1
+	local -r dst=$2
+	local family=-4
+
+	printf "%-40s" "$msg"
+	if [ $NPROCS -lt 2 ]; then
+		echo " skip - needed 2 CPUs found $NPROCS"
+		return
+	fi
+
+	is_ipv6 $dst && family=-6
+
+	# bind the sender and the receiver to different CPUs to try
+	# get reproducible results
+	ip netns exec $NS_DST bash -c "echo 2 > /sys/class/net/veth$DST/queues/rx-0/rps_cpus"
+	ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 1000 -R 10  &
+	local spid=$!
+	sleep 0.1
+	ip netns exec $NS_SRC taskset 0x1 ./udpgso_bench_tx $family -l 3 -S 1300 -D $dst
+	local retc=$?
+	wait $spid
+	local rets=$?
+	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+		echo " fail client exit code $retc, server $rets"
+		ret=1
+		return
+	fi
+}
+
+for family in 4 6; do
+	BM_NET=$BM_NET_V4
+	OL_NET=$OL_NET_V4
+	IPT=iptables
+	SUFFIX=24
+	VXDEV=vxlan
+
+	if [ $family = 6 ]; then
+		BM_NET=$BM_NET_V6
+		OL_NET=$OL_NET_V6
+		SUFFIX="64 nodad"
+		VXDEV=vxlan6
+		IPT=ip6tables
+	fi
+
+	echo "IPv$family"
+
+	create_ns
+	run_test "No GRO" $BM_NET$DST 10 0
+	cleanup
+
+	create_ns
+	ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on
+	run_test "GRO frag list" $BM_NET$DST 1 0
+	cleanup
+
+	# UDP GRO fwd skips aggregation when find an udp socket with the GRO option
+	# if there is an UDP tunnel in the running system, such lookup happen
+	# take place.
+	# use NAT to circumvent GRO FWD check
+	create_ns
+	ip -n $NS_DST addr add dev veth$DST $BM_NET$DST_NAT/$SUFFIX
+	ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+	ip netns exec $NS_DST $IPT -t nat -I PREROUTING -d $BM_NET$DST_NAT \
+					-j DNAT --to-destination $BM_NET$DST
+	run_test "GRO fwd" $BM_NET$DST_NAT 1 0 $BM_NET$DST
+	cleanup
+
+	create_ns
+	run_bench "UDP fwd perf" $BM_NET$DST
+	ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+	run_bench "UDP GRO fwd perf" $BM_NET$DST
+	cleanup
+
+	create_vxlan_pair
+	ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on
+	run_test "GRO frag list over UDP tunnel" $OL_NET$DST 1 1
+	cleanup
+
+	# use NAT to circumvent GRO FWD check
+	create_vxlan_pair
+	ip -n $NS_DST addr add dev $VXDEV$DST $OL_NET$DST_NAT/$SUFFIX
+	ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+	ip netns exec $NS_DST $IPT -t nat -I PREROUTING -d $OL_NET$DST_NAT \
+					-j DNAT --to-destination $OL_NET$DST
+
+	# load arp cache before running the test to reduce the amount of
+	# stray traffic on top of the UDP tunnel
+	ip netns exec $NS_SRC ping -q -c 1 $OL_NET$DST_NAT >/dev/null
+	run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST
+	cleanup
+
+	create_vxlan_pair
+	run_bench "UDP tunnel fwd perf" $OL_NET$DST
+	ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+	run_bench "UDP tunnel GRO fwd perf" $OL_NET$DST
+	cleanup
+done
+
+exit $ret
-- 
cgit v1.2.3


From e48792a9ec7898444fec6577611a6205e36dfdb9 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@nvidia.com>
Date: Tue, 30 Mar 2021 13:41:10 +0300
Subject: tc-testing: add simple action change test

Use act_simple to verify that action created with 'tc actions change'
command exists after command returns. The goal is to verify internal action
API reference counting to ensure that the case when netlink message has
NLM_F_REPLACE flag set but action with specified index doesn't exist is
handled correctly.

Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/tc-tests/actions/simple.json        | 24 ++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
index 8e8c1ae12260..e15f708b0fa4 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
@@ -23,6 +23,30 @@
             "$TC actions flush action simple"
         ]
     },
+    {
+        "id": "4297",
+        "name": "Add simple action with change command",
+        "category": [
+            "actions",
+            "simple"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action simple",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions change action simple sdata \"Not changed\" index 60",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action simple",
+        "matchPattern": "action order [0-9]*: Simple <Not changed>.*index 60 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action simple"
+        ]
+    },
     {
         "id": "6d4c",
         "name": "Add simple action with duplicate index",
-- 
cgit v1.2.3


From 6aa6791d1a0fa9cf371287d2134f95a936da408a Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Tue, 30 Mar 2021 17:54:15 +0300
Subject: dpaa2-switch: fix the translation between the bridge and dpsw STP
 states

The numerical values used for STP states are different between the
bridge and the MC ABI therefore, the direct usage of the
BR_STATE_* macros directly in the structures passed to the firmware is
incorrect.

Create a separate function that translates between the bridge STP states
and the enum that holds the STP state as seen by the Management Complex.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/freescale/dpaa2/dpaa2-switch.c    | 23 +++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index a9b30a72ddad..073316d0a77c 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -318,17 +318,34 @@ static int dpaa2_switch_port_add_vlan(struct ethsw_port_priv *port_priv,
 	return 0;
 }
 
+static enum dpsw_stp_state br_stp_state_to_dpsw(u8 state)
+{
+	switch (state) {
+	case BR_STATE_DISABLED:
+		return DPSW_STP_STATE_DISABLED;
+	case BR_STATE_LISTENING:
+		return DPSW_STP_STATE_LISTENING;
+	case BR_STATE_LEARNING:
+		return DPSW_STP_STATE_LEARNING;
+	case BR_STATE_FORWARDING:
+		return DPSW_STP_STATE_FORWARDING;
+	case BR_STATE_BLOCKING:
+		return DPSW_STP_STATE_BLOCKING;
+	default:
+		return DPSW_STP_STATE_DISABLED;
+	}
+}
+
 static int dpaa2_switch_port_set_stp_state(struct ethsw_port_priv *port_priv, u8 state)
 {
-	struct dpsw_stp_cfg stp_cfg = {
-		.state = state,
-	};
+	struct dpsw_stp_cfg stp_cfg = {0};
 	int err;
 	u16 vid;
 
 	if (!netif_running(port_priv->netdev) || state == port_priv->stp_state)
 		return 0;	/* Nothing to do */
 
+	stp_cfg.state = br_stp_state_to_dpsw(state);
 	for (vid = 0; vid <= VLAN_VID_MASK; vid++) {
 		if (port_priv->vlans[vid] & ETHSW_VLAN_MEMBER) {
 			stp_cfg.vlan_id = vid;
-- 
cgit v1.2.3


From 90f07102352945efcd75ade6e9293bfe0306b3fe Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Tue, 30 Mar 2021 17:54:16 +0300
Subject: dpaa2-switch: create and assign an ACL table per port

In order to trap frames to the CPU, the DPAA2 switch uses the ACL table.
At probe time, create an ACL table for each switch port so that in the
next patches we can use this to trap STP frames and redirect them to the
control interface.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/freescale/dpaa2/dpaa2-switch.c    |  23 ++++-
 .../net/ethernet/freescale/dpaa2/dpaa2-switch.h    |   4 +
 drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h    |  26 +++++
 drivers/net/ethernet/freescale/dpaa2/dpsw.c        | 112 +++++++++++++++++++++
 drivers/net/ethernet/freescale/dpaa2/dpsw.h        |  29 ++++++
 5 files changed, 193 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 073316d0a77c..0683aa34f49c 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -2659,8 +2659,10 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
 	struct net_device *netdev = port_priv->netdev;
 	struct ethsw_core *ethsw = port_priv->ethsw_data;
 	struct dpsw_fdb_cfg fdb_cfg = {0};
-	struct dpaa2_switch_fdb *fdb;
+	struct dpsw_acl_if_cfg acl_if_cfg;
 	struct dpsw_if_attr dpsw_if_attr;
+	struct dpaa2_switch_fdb *fdb;
+	struct dpsw_acl_cfg acl_cfg;
 	u16 fdb_id;
 	int err;
 
@@ -2702,6 +2704,25 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
 	if (err)
 		return err;
 
+	/* Create an ACL table to be used by this switch port */
+	acl_cfg.max_entries = DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES;
+	err = dpsw_acl_add(ethsw->mc_io, 0, ethsw->dpsw_handle,
+			   &port_priv->acl_tbl, &acl_cfg);
+	if (err) {
+		netdev_err(netdev, "dpsw_acl_add err %d\n", err);
+		return err;
+	}
+
+	acl_if_cfg.if_id[0] = port_priv->idx;
+	acl_if_cfg.num_ifs = 1;
+	err = dpsw_acl_add_if(ethsw->mc_io, 0, ethsw->dpsw_handle,
+			      port_priv->acl_tbl, &acl_if_cfg);
+	if (err) {
+		netdev_err(netdev, "dpsw_acl_add_if err %d\n", err);
+		dpsw_acl_remove(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				port_priv->acl_tbl);
+	}
+
 	return err;
 }
 
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
index 549218994243..655937887960 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
@@ -79,6 +79,8 @@
 #define DPAA2_SWITCH_NEEDED_HEADROOM \
 	(DPAA2_SWITCH_TX_DATA_OFFSET + DPAA2_SWITCH_TX_BUF_ALIGN)
 
+#define DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES	16
+
 extern const struct ethtool_ops dpaa2_switch_port_ethtool_ops;
 
 struct ethsw_core;
@@ -113,6 +115,8 @@ struct ethsw_port_priv {
 	struct dpaa2_switch_fdb	*fdb;
 	bool			bcast_flood;
 	bool			ucast_flood;
+
+	u16			acl_tbl;
 };
 
 /* Switch data */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
index 24b17d6e09af..89f757a2de77 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
@@ -74,6 +74,11 @@
 #define DPSW_CMDID_FDB_REMOVE_MULTICAST     DPSW_CMD_ID(0x087)
 #define DPSW_CMDID_FDB_DUMP                 DPSW_CMD_ID(0x08A)
 
+#define DPSW_CMDID_ACL_ADD                  DPSW_CMD_ID(0x090)
+#define DPSW_CMDID_ACL_REMOVE               DPSW_CMD_ID(0x091)
+#define DPSW_CMDID_ACL_ADD_IF               DPSW_CMD_ID(0x094)
+#define DPSW_CMDID_ACL_REMOVE_IF            DPSW_CMD_ID(0x095)
+
 #define DPSW_CMDID_IF_GET_PORT_MAC_ADDR     DPSW_CMD_ID(0x0A7)
 
 #define DPSW_CMDID_CTRL_IF_GET_ATTR         DPSW_CMD_ID(0x0A0)
@@ -457,5 +462,26 @@ struct dpsw_cmd_if_set_learning_mode {
 	/* only the first 4 bits from LSB */
 	u8 mode;
 };
+
+struct dpsw_cmd_acl_add {
+	__le16 pad;
+	__le16 max_entries;
+};
+
+struct dpsw_rsp_acl_add {
+	__le16 acl_id;
+};
+
+struct dpsw_cmd_acl_remove {
+	__le16 acl_id;
+};
+
+struct dpsw_cmd_acl_if {
+	__le16 acl_id;
+	__le16 num_ifs;
+	__le32 pad;
+	__le64 if_id;
+};
+
 #pragma pack(pop)
 #endif /* __FSL_DPSW_CMD_H */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.c b/drivers/net/ethernet/freescale/dpaa2/dpsw.c
index 6c787d4b85f9..ad4b62b3c669 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.c
@@ -1354,3 +1354,115 @@ int dpsw_if_set_learning_mode(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
 
 	return mc_send_command(mc_io, &cmd);
 }
+
+/**
+ * dpsw_acl_add() - Create an ACL table
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @acl_id:	Returned ACL ID, for future references
+ * @cfg:	ACL configuration
+ *
+ * Create Access Control List table. Multiple ACLs can be created and
+ * co-exist in L2 switch
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_acl_add(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 *acl_id,
+		 const struct dpsw_acl_cfg *cfg)
+{
+	struct dpsw_cmd_acl_add *cmd_params;
+	struct dpsw_rsp_acl_add *rsp_params;
+	struct fsl_mc_command cmd = { 0 };
+	int err;
+
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_ACL_ADD, cmd_flags, token);
+	cmd_params = (struct dpsw_cmd_acl_add *)cmd.params;
+	cmd_params->max_entries = cpu_to_le16(cfg->max_entries);
+
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	rsp_params = (struct dpsw_rsp_acl_add *)cmd.params;
+	*acl_id = le16_to_cpu(rsp_params->acl_id);
+
+	return 0;
+}
+
+/**
+ * dpsw_acl_remove() - Remove an ACL table from L2 switch.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @acl_id:	ACL ID
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_acl_remove(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+		    u16 acl_id)
+{
+	struct dpsw_cmd_acl_remove *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_ACL_REMOVE, cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_acl_remove *)cmd.params;
+	cmd_params->acl_id = cpu_to_le16(acl_id);
+
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_acl_add_if() - Associate interface/interfaces with an ACL table.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @acl_id:	ACL ID
+ * @cfg:	Interfaces list
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_acl_add_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+		    u16 acl_id, const struct dpsw_acl_if_cfg *cfg)
+{
+	struct dpsw_cmd_acl_if *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_ACL_ADD_IF, cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_acl_if *)cmd.params;
+	cmd_params->acl_id = cpu_to_le16(acl_id);
+	cmd_params->num_ifs = cpu_to_le16(cfg->num_ifs);
+	build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs);
+
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpsw_acl_remove_if() - De-associate interface/interfaces from an ACL table
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @acl_id:	ACL ID
+ * @cfg:	Interfaces list
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_acl_remove_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+		       u16 acl_id, const struct dpsw_acl_if_cfg *cfg)
+{
+	struct dpsw_cmd_acl_if *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_ACL_REMOVE_IF, cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_acl_if *)cmd.params;
+	cmd_params->acl_id = cpu_to_le16(acl_id);
+	cmd_params->num_ifs = cpu_to_le16(cfg->num_ifs);
+	build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.h b/drivers/net/ethernet/freescale/dpaa2/dpsw.h
index 96837b10cc94..35b4749cdcdb 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.h
@@ -628,4 +628,33 @@ int dpsw_set_egress_flood(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
 int dpsw_if_set_learning_mode(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
 			      u16 if_id, enum dpsw_learning_mode mode);
 
+/**
+ * struct dpsw_acl_cfg - ACL Configuration
+ * @max_entries: Number of ACL rules
+ */
+struct dpsw_acl_cfg {
+	u16 max_entries;
+};
+
+int dpsw_acl_add(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 *acl_id,
+		 const struct dpsw_acl_cfg *cfg);
+
+int dpsw_acl_remove(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+		    u16 acl_id);
+
+/**
+ * struct dpsw_acl_if_cfg - List of interfaces to associate with an ACL table
+ * @num_ifs: Number of interfaces
+ * @if_id: List of interfaces
+ */
+struct dpsw_acl_if_cfg {
+	u16 num_ifs;
+	u16 if_id[DPSW_MAX_IF];
+};
+
+int dpsw_acl_add_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+		    u16 acl_id, const struct dpsw_acl_if_cfg *cfg);
+
+int dpsw_acl_remove_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+		       u16 acl_id, const struct dpsw_acl_if_cfg *cfg);
 #endif /* __FSL_DPSW_H */
-- 
cgit v1.2.3


From 62734c7405b749a760c88c3e33d250c9efde263e Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Tue, 30 Mar 2021 17:54:17 +0300
Subject: dpaa2-switch: keep track of the current learning state per port

Keep track of the current learning state per port so that we can
reference it in the next patches when setting up a STP state.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c | 4 ++++
 drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h | 1 +
 2 files changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 0683aa34f49c..45090d003b3d 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -1329,6 +1329,7 @@ static int dpaa2_switch_port_bridge_flags(struct net_device *netdev,
 		err = dpaa2_switch_port_set_learning(port_priv, learn_ena);
 		if (err)
 			return err;
+		port_priv->learn_ena = learn_ena;
 	}
 
 	if (flags.mask & (BR_BCAST_FLOOD | BR_FLOOD | BR_MCAST_FLOOD)) {
@@ -1637,6 +1638,7 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
 	/* Inherit the initial bridge port learning state */
 	learn_ena = br_port_flag_is_set(netdev, BR_LEARNING);
 	err = dpaa2_switch_port_set_learning(port_priv, learn_ena);
+	port_priv->learn_ena = learn_ena;
 
 	/* Setup the egress flood policy (broadcast, unknown unicast) */
 	err = dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id);
@@ -1719,6 +1721,7 @@ static int dpaa2_switch_port_bridge_leave(struct net_device *netdev)
 	err = dpaa2_switch_port_set_learning(port_priv, false);
 	if (err)
 		return err;
+	port_priv->learn_ena = false;
 
 	/* Add the VLAN 1 as PVID when not under a bridge. We need this since
 	 * the dpaa2 switch interfaces are not capable to be VLAN unaware
@@ -2839,6 +2842,7 @@ static int dpaa2_switch_probe_port(struct ethsw_core *ethsw,
 	err = dpaa2_switch_port_set_learning(port_priv, false);
 	if (err)
 		goto err_port_probe;
+	port_priv->learn_ena = false;
 
 	return 0;
 
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
index 655937887960..35990761ce8f 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
@@ -115,6 +115,7 @@ struct ethsw_port_priv {
 	struct dpaa2_switch_fdb	*fdb;
 	bool			bcast_flood;
 	bool			ucast_flood;
+	bool			learn_ena;
 
 	u16			acl_tbl;
 };
-- 
cgit v1.2.3


From 1a64ed129cce958921bebbde0842d9a0ba0205ba Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Tue, 30 Mar 2021 17:54:18 +0300
Subject: dpaa2-switch: trap STP frames to the CPU

Add an ACL entry in each port's ACL table to redirect any frame that
has the destination MAC address equal to the STP dmac to the control
interface.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/freescale/dpaa2/dpaa2-switch.c    | 68 ++++++++++++++++
 .../net/ethernet/freescale/dpaa2/dpaa2-switch.h    |  2 +
 drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h    | 49 ++++++++++++
 drivers/net/ethernet/freescale/dpaa2/dpsw.c        | 78 ++++++++++++++++++
 drivers/net/ethernet/freescale/dpaa2/dpsw.h        | 92 ++++++++++++++++++++++
 5 files changed, 289 insertions(+)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 45090d003b3d..72b7ba003538 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -2652,8 +2652,72 @@ err_close:
 	return err;
 }
 
+/* Add an ACL to redirect frames with specific destination MAC address to
+ * control interface
+ */
+static int dpaa2_switch_port_trap_mac_addr(struct ethsw_port_priv *port_priv,
+					   const char *mac)
+{
+	struct net_device *netdev = port_priv->netdev;
+	struct dpsw_acl_entry_cfg acl_entry_cfg;
+	struct dpsw_acl_fields *acl_h;
+	struct dpsw_acl_fields *acl_m;
+	struct dpsw_acl_key acl_key;
+	struct device *dev;
+	u8 *cmd_buff;
+	int err;
+
+	dev = port_priv->netdev->dev.parent;
+	acl_h = &acl_key.match;
+	acl_m = &acl_key.mask;
+
+	if (port_priv->acl_num_rules >= DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES) {
+		netdev_err(netdev, "ACL full\n");
+		return -ENOMEM;
+	}
+
+	memset(&acl_entry_cfg, 0, sizeof(acl_entry_cfg));
+	memset(&acl_key, 0, sizeof(acl_key));
+
+	/* Match on the destination MAC address */
+	ether_addr_copy(acl_h->l2_dest_mac, mac);
+	eth_broadcast_addr(acl_m->l2_dest_mac);
+
+	cmd_buff = kzalloc(DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE, GFP_KERNEL);
+	if (!cmd_buff)
+		return -ENOMEM;
+	dpsw_acl_prepare_entry_cfg(&acl_key, cmd_buff);
+
+	memset(&acl_entry_cfg, 0, sizeof(acl_entry_cfg));
+	acl_entry_cfg.precedence = port_priv->acl_num_rules;
+	acl_entry_cfg.result.action = DPSW_ACL_ACTION_REDIRECT_TO_CTRL_IF;
+	acl_entry_cfg.key_iova = dma_map_single(dev, cmd_buff,
+						DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE,
+						DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(dev, acl_entry_cfg.key_iova))) {
+		netdev_err(netdev, "DMA mapping failed\n");
+		return -EFAULT;
+	}
+
+	err = dpsw_acl_add_entry(port_priv->ethsw_data->mc_io, 0,
+				 port_priv->ethsw_data->dpsw_handle,
+				 port_priv->acl_tbl, &acl_entry_cfg);
+
+	dma_unmap_single(dev, acl_entry_cfg.key_iova, sizeof(cmd_buff),
+			 DMA_TO_DEVICE);
+	if (err) {
+		netdev_err(netdev, "dpsw_acl_add_entry() failed %d\n", err);
+		return err;
+	}
+
+	port_priv->acl_num_rules++;
+
+	return 0;
+}
+
 static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
 {
+	const char stpa[ETH_ALEN] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00};
 	struct switchdev_obj_port_vlan vlan = {
 		.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
 		.vid = DEFAULT_VLAN_ID,
@@ -2726,6 +2790,10 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
 				port_priv->acl_tbl);
 	}
 
+	err = dpaa2_switch_port_trap_mac_addr(port_priv, stpa);
+	if (err)
+		return err;
+
 	return err;
 }
 
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
index 35990761ce8f..0ae1d27c811e 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
@@ -80,6 +80,7 @@
 	(DPAA2_SWITCH_TX_DATA_OFFSET + DPAA2_SWITCH_TX_BUF_ALIGN)
 
 #define DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES	16
+#define DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE	256
 
 extern const struct ethtool_ops dpaa2_switch_port_ethtool_ops;
 
@@ -118,6 +119,7 @@ struct ethsw_port_priv {
 	bool			learn_ena;
 
 	u16			acl_tbl;
+	u8			acl_num_rules;
 };
 
 /* Switch data */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
index 89f757a2de77..1747cee19a72 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
@@ -76,6 +76,7 @@
 
 #define DPSW_CMDID_ACL_ADD                  DPSW_CMD_ID(0x090)
 #define DPSW_CMDID_ACL_REMOVE               DPSW_CMD_ID(0x091)
+#define DPSW_CMDID_ACL_ADD_ENTRY            DPSW_CMD_ID(0x092)
 #define DPSW_CMDID_ACL_ADD_IF               DPSW_CMD_ID(0x094)
 #define DPSW_CMDID_ACL_REMOVE_IF            DPSW_CMD_ID(0x095)
 
@@ -483,5 +484,53 @@ struct dpsw_cmd_acl_if {
 	__le64 if_id;
 };
 
+struct dpsw_prep_acl_entry {
+	u8 match_l2_dest_mac[6];
+	__le16 match_l2_tpid;
+
+	u8 match_l2_source_mac[6];
+	__le16 match_l2_vlan_id;
+
+	__le32 match_l3_dest_ip;
+	__le32 match_l3_source_ip;
+
+	__le16 match_l4_dest_port;
+	__le16 match_l4_source_port;
+	__le16 match_l2_ether_type;
+	u8 match_l2_pcp_dei;
+	u8 match_l3_dscp;
+
+	u8 mask_l2_dest_mac[6];
+	__le16 mask_l2_tpid;
+
+	u8 mask_l2_source_mac[6];
+	__le16 mask_l2_vlan_id;
+
+	__le32 mask_l3_dest_ip;
+	__le32 mask_l3_source_ip;
+
+	__le16 mask_l4_dest_port;
+	__le16 mask_l4_source_port;
+	__le16 mask_l2_ether_type;
+	u8 mask_l2_pcp_dei;
+	u8 mask_l3_dscp;
+
+	u8 match_l3_protocol;
+	u8 mask_l3_protocol;
+};
+
+#define DPSW_RESULT_ACTION_SHIFT	0
+#define DPSW_RESULT_ACTION_SIZE		4
+
+struct dpsw_cmd_acl_entry {
+	__le16 acl_id;
+	__le16 result_if_id;
+	__le32 precedence;
+	/* from LSB only the first 4 bits */
+	u8 result_action;
+	u8 pad[7];
+	__le64 pad2[4];
+	__le64 key_iova;
+};
 #pragma pack(pop)
 #endif /* __FSL_DPSW_CMD_H */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.c b/drivers/net/ethernet/freescale/dpaa2/dpsw.c
index ad4b62b3c669..6704efe89bc1 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.c
@@ -1466,3 +1466,81 @@ int dpsw_acl_remove_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
 }
+
+/**
+ * dpsw_acl_prepare_entry_cfg() - Setup an ACL entry
+ * @key:		Key
+ * @entry_cfg_buf:	Zeroed 256 bytes of memory before mapping it to DMA
+ *
+ * This function has to be called before adding or removing acl_entry
+ *
+ */
+void dpsw_acl_prepare_entry_cfg(const struct dpsw_acl_key *key,
+				u8 *entry_cfg_buf)
+{
+	struct dpsw_prep_acl_entry *ext_params;
+	int i;
+
+	ext_params = (struct dpsw_prep_acl_entry *)entry_cfg_buf;
+
+	for (i = 0; i < 6; i++) {
+		ext_params->match_l2_dest_mac[i] = key->match.l2_dest_mac[5 - i];
+		ext_params->match_l2_source_mac[i] = key->match.l2_source_mac[5 - i];
+		ext_params->mask_l2_dest_mac[i] = key->mask.l2_dest_mac[5 - i];
+		ext_params->mask_l2_source_mac[i] = key->mask.l2_source_mac[5 - i];
+	}
+
+	ext_params->match_l2_tpid = cpu_to_le16(key->match.l2_tpid);
+	ext_params->match_l2_vlan_id = cpu_to_le16(key->match.l2_vlan_id);
+	ext_params->match_l3_dest_ip = cpu_to_le32(key->match.l3_dest_ip);
+	ext_params->match_l3_source_ip = cpu_to_le32(key->match.l3_source_ip);
+	ext_params->match_l4_dest_port = cpu_to_le16(key->match.l4_dest_port);
+	ext_params->match_l4_source_port = cpu_to_le16(key->match.l4_source_port);
+	ext_params->match_l2_ether_type = cpu_to_le16(key->match.l2_ether_type);
+	ext_params->match_l2_pcp_dei = key->match.l2_pcp_dei;
+	ext_params->match_l3_dscp = key->match.l3_dscp;
+
+	ext_params->mask_l2_tpid = cpu_to_le16(key->mask.l2_tpid);
+	ext_params->mask_l2_vlan_id = cpu_to_le16(key->mask.l2_vlan_id);
+	ext_params->mask_l3_dest_ip = cpu_to_le32(key->mask.l3_dest_ip);
+	ext_params->mask_l3_source_ip = cpu_to_le32(key->mask.l3_source_ip);
+	ext_params->mask_l4_dest_port = cpu_to_le16(key->mask.l4_dest_port);
+	ext_params->mask_l4_source_port = cpu_to_le16(key->mask.l4_source_port);
+	ext_params->mask_l2_ether_type = cpu_to_le16(key->mask.l2_ether_type);
+	ext_params->mask_l2_pcp_dei = key->mask.l2_pcp_dei;
+	ext_params->mask_l3_dscp = key->mask.l3_dscp;
+	ext_params->match_l3_protocol = key->match.l3_protocol;
+	ext_params->mask_l3_protocol = key->mask.l3_protocol;
+}
+
+/**
+ * dpsw_acl_add_entry() - Add a rule to the ACL table.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @acl_id:	ACL ID
+ * @cfg:	Entry configuration
+ *
+ * warning: This function has to be called after dpsw_acl_prepare_entry_cfg()
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_acl_add_entry(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+		       u16 acl_id, const struct dpsw_acl_entry_cfg *cfg)
+{
+	struct dpsw_cmd_acl_entry *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_ACL_ADD_ENTRY, cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_acl_entry *)cmd.params;
+	cmd_params->acl_id = cpu_to_le16(acl_id);
+	cmd_params->result_if_id = cpu_to_le16(cfg->result.if_id);
+	cmd_params->precedence = cpu_to_le32(cfg->precedence);
+	cmd_params->key_iova = cpu_to_le64(cfg->key_iova);
+	dpsw_set_field(cmd_params->result_action,
+		       RESULT_ACTION,
+		       cfg->result.action);
+
+	return mc_send_command(mc_io, &cmd);
+}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.h b/drivers/net/ethernet/freescale/dpaa2/dpsw.h
index 35b4749cdcdb..08e37c475ae8 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.h
@@ -657,4 +657,96 @@ int dpsw_acl_add_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
 
 int dpsw_acl_remove_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
 		       u16 acl_id, const struct dpsw_acl_if_cfg *cfg);
+
+/**
+ * struct dpsw_acl_fields - ACL fields.
+ * @l2_dest_mac: Destination MAC address: BPDU, Multicast, Broadcast, Unicast,
+ *			slow protocols, MVRP, STP
+ * @l2_source_mac: Source MAC address
+ * @l2_tpid: Layer 2 (Ethernet) protocol type, used to identify the following
+ *		protocols: MPLS, PTP, PFC, ARP, Jumbo frames, LLDP, IEEE802.1ae,
+ *		Q-in-Q, IPv4, IPv6, PPPoE
+ * @l2_pcp_dei: indicate which protocol is encapsulated in the payload
+ * @l2_vlan_id: layer 2 VLAN ID
+ * @l2_ether_type: layer 2 Ethernet type
+ * @l3_dscp: Layer 3 differentiated services code point
+ * @l3_protocol: Tells the Network layer at the destination host, to which
+ *		Protocol this packet belongs to. The following protocol are
+ *		supported: ICMP, IGMP, IPv4 (encapsulation), TCP, IPv6
+ *		(encapsulation), GRE, PTP
+ * @l3_source_ip: Source IPv4 IP
+ * @l3_dest_ip: Destination IPv4 IP
+ * @l4_source_port: Source TCP/UDP Port
+ * @l4_dest_port: Destination TCP/UDP Port
+ */
+struct dpsw_acl_fields {
+	u8 l2_dest_mac[6];
+	u8 l2_source_mac[6];
+	u16 l2_tpid;
+	u8 l2_pcp_dei;
+	u16 l2_vlan_id;
+	u16 l2_ether_type;
+	u8 l3_dscp;
+	u8 l3_protocol;
+	u32 l3_source_ip;
+	u32 l3_dest_ip;
+	u16 l4_source_port;
+	u16 l4_dest_port;
+};
+
+/**
+ * struct dpsw_acl_key - ACL key
+ * @match: Match fields
+ * @mask: Mask: b'1 - valid, b'0 don't care
+ */
+struct dpsw_acl_key {
+	struct dpsw_acl_fields match;
+	struct dpsw_acl_fields mask;
+};
+
+/**
+ * enum dpsw_acl_action - action to be run on the ACL rule match
+ * @DPSW_ACL_ACTION_DROP: Drop frame
+ * @DPSW_ACL_ACTION_REDIRECT: Redirect to certain port
+ * @DPSW_ACL_ACTION_ACCEPT: Accept frame
+ * @DPSW_ACL_ACTION_REDIRECT_TO_CTRL_IF: Redirect to control interface
+ */
+enum dpsw_acl_action {
+	DPSW_ACL_ACTION_DROP,
+	DPSW_ACL_ACTION_REDIRECT,
+	DPSW_ACL_ACTION_ACCEPT,
+	DPSW_ACL_ACTION_REDIRECT_TO_CTRL_IF
+};
+
+/**
+ * struct dpsw_acl_result - ACL action
+ * @action: Action should be taken when	ACL entry hit
+ * @if_id:  Interface IDs to redirect frame. Valid only if redirect selected for
+ *		 action
+ */
+struct dpsw_acl_result {
+	enum dpsw_acl_action action;
+	u16 if_id;
+};
+
+/**
+ * struct dpsw_acl_entry_cfg - ACL entry
+ * @key_iova: I/O virtual address of DMA-able memory filled with key after call
+ *				to dpsw_acl_prepare_entry_cfg()
+ * @result: Required action when entry hit occurs
+ * @precedence: Precedence inside ACL 0 is lowest; This priority can not change
+ *		during the lifetime of a Policy. It is user responsibility to
+ *		space the priorities according to consequent rule additions.
+ */
+struct dpsw_acl_entry_cfg {
+	u64 key_iova;
+	struct dpsw_acl_result result;
+	int precedence;
+};
+
+void dpsw_acl_prepare_entry_cfg(const struct dpsw_acl_key *key,
+				u8 *entry_cfg_buf);
+
+int dpsw_acl_add_entry(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+		       u16 acl_id, const struct dpsw_acl_entry_cfg *cfg);
 #endif /* __FSL_DPSW_H */
-- 
cgit v1.2.3


From bc96781a895996c66f1d20411202f24000bfbab8 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Tue, 30 Mar 2021 17:54:19 +0300
Subject: dpaa2-switch: setup learning state on STP state change

Depending on what STP state a port is in, the learning on that port
should be enabled or disabled.

When the STP state is DISABLED, BLOCKING or LISTENING no learning should
be happening irrespective of what the bridge previously requested. The
learning state is changed to be the one setup by the bridge when the STP
state is LEARNING or FORWARDING.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/freescale/dpaa2/dpaa2-switch.c    | 34 +++++++++++++++++-----
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 72b7ba003538..80efc8116963 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -1250,14 +1250,6 @@ static void dpaa2_switch_teardown_irqs(struct fsl_mc_device *sw_dev)
 	fsl_mc_free_irqs(sw_dev);
 }
 
-static int dpaa2_switch_port_attr_stp_state_set(struct net_device *netdev,
-						u8 state)
-{
-	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-
-	return dpaa2_switch_port_set_stp_state(port_priv, state);
-}
-
 static int dpaa2_switch_port_set_learning(struct ethsw_port_priv *port_priv, bool enable)
 {
 	struct ethsw_core *ethsw = port_priv->ethsw_data;
@@ -1280,6 +1272,32 @@ static int dpaa2_switch_port_set_learning(struct ethsw_port_priv *port_priv, boo
 	return err;
 }
 
+static int dpaa2_switch_port_attr_stp_state_set(struct net_device *netdev,
+						u8 state)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	int err;
+
+	err = dpaa2_switch_port_set_stp_state(port_priv, state);
+	if (err)
+		return err;
+
+	switch (state) {
+	case BR_STATE_DISABLED:
+	case BR_STATE_BLOCKING:
+	case BR_STATE_LISTENING:
+		err = dpaa2_switch_port_set_learning(port_priv, false);
+		break;
+	case BR_STATE_LEARNING:
+	case BR_STATE_FORWARDING:
+		err = dpaa2_switch_port_set_learning(port_priv,
+						     port_priv->learn_ena);
+		break;
+	}
+
+	return err;
+}
+
 static int dpaa2_switch_port_flood(struct ethsw_port_priv *port_priv,
 				   struct switchdev_brport_flags flags)
 {
-- 
cgit v1.2.3


From 0f4e7f4e77b2078955f3cf78f0b818a0d2b898e1 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Tue, 30 Mar 2021 12:52:07 -0700
Subject: ionic: count dma errors

Increment our dma-error counter in a couple of spots
that were missed before.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 5985f7c504a9..42d29cd2ca47 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -609,6 +609,7 @@ static int ionic_tx_map_skb(struct ionic_queue *q, struct sk_buff *skb,
 			    struct ionic_desc_info *desc_info)
 {
 	struct ionic_buf_info *buf_info = desc_info->bufs;
+	struct ionic_tx_stats *stats = q_to_tx_stats(q);
 	struct device *dev = q->dev;
 	dma_addr_t dma_addr;
 	unsigned int nfrags;
@@ -616,8 +617,10 @@ static int ionic_tx_map_skb(struct ionic_queue *q, struct sk_buff *skb,
 	int frag_idx;
 
 	dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb));
-	if (dma_mapping_error(dev, dma_addr))
+	if (dma_mapping_error(dev, dma_addr)) {
+		stats->dma_map_err++;
 		return -EIO;
+	}
 	buf_info->dma_addr = dma_addr;
 	buf_info->len = skb_headlen(skb);
 	buf_info++;
@@ -626,8 +629,10 @@ static int ionic_tx_map_skb(struct ionic_queue *q, struct sk_buff *skb,
 	nfrags = skb_shinfo(skb)->nr_frags;
 	for (frag_idx = 0; frag_idx < nfrags; frag_idx++, frag++) {
 		dma_addr = ionic_tx_map_frag(q, frag, 0, skb_frag_size(frag));
-		if (dma_mapping_error(dev, dma_addr))
+		if (dma_mapping_error(dev, dma_addr)) {
+			stats->dma_map_err++;
 			goto dma_fail;
+		}
 		buf_info->dma_addr = dma_addr;
 		buf_info->len = skb_frag_size(frag);
 		buf_info++;
-- 
cgit v1.2.3


From 230efff47adbea8274810b593a9caf8e46c2c1f7 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Tue, 30 Mar 2021 12:52:08 -0700
Subject: ionic: fix sizeof usage

Use the actual pointer that we care about as the subject of the
sizeof, rather than a struct name.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_lif.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 889d234e2ffa..a51be25723a5 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -676,20 +676,20 @@ static int ionic_qcqs_alloc(struct ionic_lif *lif)
 
 	err = -ENOMEM;
 	lif->txqcqs = devm_kcalloc(dev, lif->ionic->ntxqs_per_lif,
-				   sizeof(struct ionic_qcq *), GFP_KERNEL);
+				   sizeof(*lif->txqcqs), GFP_KERNEL);
 	if (!lif->txqcqs)
 		goto err_out;
 	lif->rxqcqs = devm_kcalloc(dev, lif->ionic->nrxqs_per_lif,
-				   sizeof(struct ionic_qcq *), GFP_KERNEL);
+				   sizeof(*lif->rxqcqs), GFP_KERNEL);
 	if (!lif->rxqcqs)
 		goto err_out;
 
 	lif->txqstats = devm_kcalloc(dev, lif->ionic->ntxqs_per_lif,
-				     sizeof(struct ionic_tx_stats), GFP_KERNEL);
+				     sizeof(*lif->txqstats), GFP_KERNEL);
 	if (!lif->txqstats)
 		goto err_out;
 	lif->rxqstats = devm_kcalloc(dev, lif->ionic->nrxqs_per_lif,
-				     sizeof(struct ionic_rx_stats), GFP_KERNEL);
+				     sizeof(*lif->rxqstats), GFP_KERNEL);
 	if (!lif->rxqstats)
 		goto err_out;
 
-- 
cgit v1.2.3


From b2b9a8d7ed134ae72e5ef8d0003d054af8e5e2bf Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Tue, 30 Mar 2021 12:52:09 -0700
Subject: ionic: avoid races in ionic_heartbeat_check

Rework the heartbeat checks to be sure that we're getting an
atomic operation.  Through testing we found occasions where a
separate thread could clash with this check and cause erroneous
heartbeat check results.

Signed-off-by: Allen Hubbe <allenbh@pensando.io>
Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_dev.c | 93 +++++++++++++++----------
 drivers/net/ethernet/pensando/ionic/ionic_dev.h |  7 +-
 2 files changed, 63 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
index 0532f7cf086d..0e8e88c69e1c 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
@@ -24,6 +24,9 @@ static void ionic_watchdog_cb(struct timer_list *t)
 		return;
 
 	hb = ionic_heartbeat_check(ionic);
+	dev_dbg(ionic->dev, "%s: hb %d running %d UP %d\n",
+		__func__, hb, netif_running(lif->netdev),
+		test_bit(IONIC_LIF_F_UP, lif->state));
 
 	if (hb >= 0 &&
 	    !test_bit(IONIC_LIF_F_FW_RESET, lif->state))
@@ -91,9 +94,17 @@ int ionic_dev_setup(struct ionic *ionic)
 		return -EFAULT;
 	}
 
-	idev->last_fw_status = 0xff;
 	timer_setup(&ionic->watchdog_timer, ionic_watchdog_cb, 0);
 	ionic->watchdog_period = IONIC_WATCHDOG_SECS * HZ;
+
+	/* set times to ensure the first check will proceed */
+	atomic_long_set(&idev->last_check_time, jiffies - 2 * HZ);
+	idev->last_hb_time = jiffies - 2 * ionic->watchdog_period;
+	/* init as ready, so no transition if the first check succeeds */
+	idev->last_fw_hb = 0;
+	idev->fw_hb_ready = true;
+	idev->fw_status_ready = true;
+
 	mod_timer(&ionic->watchdog_timer,
 		  round_jiffies(jiffies + ionic->watchdog_period));
 
@@ -107,29 +118,38 @@ int ionic_dev_setup(struct ionic *ionic)
 int ionic_heartbeat_check(struct ionic *ionic)
 {
 	struct ionic_dev *idev = &ionic->idev;
-	unsigned long hb_time;
+	unsigned long check_time, last_check_time;
+	bool fw_status_ready, fw_hb_ready;
 	u8 fw_status;
-	u32 hb;
+	u32 fw_hb;
 
-	/* wait a little more than one second before testing again */
-	hb_time = jiffies;
-	if (time_before(hb_time, (idev->last_hb_time + ionic->watchdog_period)))
+	/* wait a least one second before testing again */
+	check_time = jiffies;
+	last_check_time = atomic_long_read(&idev->last_check_time);
+do_check_time:
+	if (time_before(check_time, last_check_time + HZ))
 		return 0;
+	if (!atomic_long_try_cmpxchg_relaxed(&idev->last_check_time,
+					     &last_check_time, check_time)) {
+		/* if called concurrently, only the first should proceed. */
+		dev_dbg(ionic->dev, "%s: do_check_time again\n", __func__);
+		goto do_check_time;
+	}
 
 	/* firmware is useful only if the running bit is set and
 	 * fw_status != 0xff (bad PCI read)
 	 */
 	fw_status = ioread8(&idev->dev_info_regs->fw_status);
-	if (fw_status != 0xff)
-		fw_status &= IONIC_FW_STS_F_RUNNING;  /* use only the run bit */
+	fw_status_ready = (fw_status != 0xff) && (fw_status & IONIC_FW_STS_F_RUNNING);
 
 	/* is this a transition? */
-	if (fw_status != idev->last_fw_status &&
-	    idev->last_fw_status != 0xff) {
+	if (fw_status_ready != idev->fw_status_ready) {
 		struct ionic_lif *lif = ionic->lif;
 		bool trigger = false;
 
-		if (!fw_status || fw_status == 0xff) {
+		idev->fw_status_ready = fw_status_ready;
+
+		if (!fw_status_ready) {
 			dev_info(ionic->dev, "FW stopped %u\n", fw_status);
 			if (lif && !test_bit(IONIC_LIF_F_FW_RESET, lif->state))
 				trigger = true;
@@ -143,44 +163,47 @@ int ionic_heartbeat_check(struct ionic *ionic)
 			struct ionic_deferred_work *work;
 
 			work = kzalloc(sizeof(*work), GFP_ATOMIC);
-			if (!work) {
-				dev_err(ionic->dev, "LIF reset trigger dropped\n");
-			} else {
+			if (work) {
 				work->type = IONIC_DW_TYPE_LIF_RESET;
-				if (fw_status & IONIC_FW_STS_F_RUNNING &&
-				    fw_status != 0xff)
-					work->fw_status = 1;
+				work->fw_status = fw_status_ready;
 				ionic_lif_deferred_enqueue(&lif->deferred, work);
 			}
 		}
 	}
-	idev->last_fw_status = fw_status;
 
-	if (!fw_status || fw_status == 0xff)
+	if (!fw_status_ready)
 		return -ENXIO;
 
-	/* early FW has no heartbeat, else FW will return non-zero */
-	hb = ioread32(&idev->dev_info_regs->fw_heartbeat);
-	if (!hb)
+	/* wait at least one watchdog period since the last heartbeat */
+	last_check_time = idev->last_hb_time;
+	if (time_before(check_time, last_check_time + ionic->watchdog_period))
 		return 0;
 
-	/* are we stalled? */
-	if (hb == idev->last_hb) {
-		/* only complain once for each stall seen */
-		if (idev->last_hb_time != 1) {
-			dev_info(ionic->dev, "FW heartbeat stalled at %d\n",
-				 idev->last_hb);
-			idev->last_hb_time = 1;
-		}
+	fw_hb = ioread32(&idev->dev_info_regs->fw_heartbeat);
+	fw_hb_ready = fw_hb != idev->last_fw_hb;
 
-		return -ENXIO;
+	/* early FW version had no heartbeat, so fake it */
+	if (!fw_hb_ready && !fw_hb)
+		fw_hb_ready = true;
+
+	dev_dbg(ionic->dev, "%s: fw_hb %u last_fw_hb %u ready %u\n",
+		__func__, fw_hb, idev->last_fw_hb, fw_hb_ready);
+
+	idev->last_fw_hb = fw_hb;
+
+	/* log a transition */
+	if (fw_hb_ready != idev->fw_hb_ready) {
+		idev->fw_hb_ready = fw_hb_ready;
+		if (!fw_hb_ready)
+			dev_info(ionic->dev, "FW heartbeat stalled at %d\n", fw_hb);
+		else
+			dev_info(ionic->dev, "FW heartbeat restored at %d\n", fw_hb);
 	}
 
-	if (idev->last_hb_time == 1)
-		dev_info(ionic->dev, "FW heartbeat restored at %d\n", hb);
+	if (!fw_hb_ready)
+		return -ENXIO;
 
-	idev->last_hb = hb;
-	idev->last_hb_time = hb_time;
+	idev->last_hb_time = check_time;
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index ca7e55455165..0c0533737b2b 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -4,6 +4,7 @@
 #ifndef _IONIC_DEV_H_
 #define _IONIC_DEV_H_
 
+#include <linux/atomic.h>
 #include <linux/mutex.h>
 #include <linux/workqueue.h>
 
@@ -135,9 +136,11 @@ struct ionic_dev {
 	union ionic_dev_info_regs __iomem *dev_info_regs;
 	union ionic_dev_cmd_regs __iomem *dev_cmd_regs;
 
+	atomic_long_t last_check_time;
 	unsigned long last_hb_time;
-	u32 last_hb;
-	u8 last_fw_status;
+	u32 last_fw_hb;
+	bool fw_hb_ready;
+	bool fw_status_ready;
 
 	u64 __iomem *db_pages;
 	dma_addr_t phy_db_pages;
-- 
cgit v1.2.3


From aa620993b1e58e19a556bc3a7f0e15ac08bf2e46 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Tue, 30 Mar 2021 12:52:10 -0700
Subject: ionic: pull per-q stats work out of queue loops

Abstract out the per-queue data collection work into separate
functions from the per-queue loops in the stats reporting,
similar to what Alex did for the data label strings in
commit acebe5b6107c ("ionic: Update driver to use ethtool_sprintf")

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_stats.c | 219 ++++++++++++----------
 1 file changed, 125 insertions(+), 94 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_stats.c b/drivers/net/ethernet/pensando/ionic/ionic_stats.c
index 308b4ac6c57b..ed9cf93d9acd 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_stats.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_stats.c
@@ -177,31 +177,42 @@ static const struct ionic_stat_desc ionic_dbg_napi_stats_desc[] = {
 
 #define MAX_Q(lif)   ((lif)->netdev->real_num_tx_queues)
 
+static void ionic_add_lif_txq_stats(struct ionic_lif *lif, int q_num,
+				    struct ionic_lif_sw_stats *stats)
+{
+	struct ionic_tx_stats *txstats = &lif->txqstats[q_num];
+
+	stats->tx_packets += txstats->pkts;
+	stats->tx_bytes += txstats->bytes;
+	stats->tx_tso += txstats->tso;
+	stats->tx_tso_bytes += txstats->tso_bytes;
+	stats->tx_csum_none += txstats->csum_none;
+	stats->tx_csum += txstats->csum;
+}
+
+static void ionic_add_lif_rxq_stats(struct ionic_lif *lif, int q_num,
+				    struct ionic_lif_sw_stats *stats)
+{
+	struct ionic_rx_stats *rxstats = &lif->rxqstats[q_num];
+
+	stats->rx_packets += rxstats->pkts;
+	stats->rx_bytes += rxstats->bytes;
+	stats->rx_csum_none += rxstats->csum_none;
+	stats->rx_csum_complete += rxstats->csum_complete;
+	stats->rx_csum_error += rxstats->csum_error;
+}
+
 static void ionic_get_lif_stats(struct ionic_lif *lif,
 				struct ionic_lif_sw_stats *stats)
 {
-	struct ionic_tx_stats *txstats;
-	struct ionic_rx_stats *rxstats;
 	struct rtnl_link_stats64 ns;
 	int q_num;
 
 	memset(stats, 0, sizeof(*stats));
 
 	for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
-		txstats = &lif->txqstats[q_num];
-		stats->tx_packets += txstats->pkts;
-		stats->tx_bytes += txstats->bytes;
-		stats->tx_tso += txstats->tso;
-		stats->tx_tso_bytes += txstats->tso_bytes;
-		stats->tx_csum_none += txstats->csum_none;
-		stats->tx_csum += txstats->csum;
-
-		rxstats = &lif->rxqstats[q_num];
-		stats->rx_packets += rxstats->pkts;
-		stats->rx_bytes += rxstats->bytes;
-		stats->rx_csum_none += rxstats->csum_none;
-		stats->rx_csum_complete += rxstats->csum_complete;
-		stats->rx_csum_error += rxstats->csum_error;
+		ionic_add_lif_txq_stats(lif, q_num, stats);
+		ionic_add_lif_rxq_stats(lif, q_num, stats);
 	}
 
 	ionic_get_stats64(lif->netdev, &ns);
@@ -214,16 +225,12 @@ static void ionic_get_lif_stats(struct ionic_lif *lif,
 
 static u64 ionic_sw_stats_get_count(struct ionic_lif *lif)
 {
-	u64 total = 0;
+	u64 total = 0, tx_queues = MAX_Q(lif), rx_queues = MAX_Q(lif);
 
 	/* lif stats */
 	total += IONIC_NUM_LIF_STATS;
-
-	/* tx stats */
-	total += MAX_Q(lif) * IONIC_NUM_TX_STATS;
-
-	/* rx stats */
-	total += MAX_Q(lif) * IONIC_NUM_RX_STATS;
+	total += tx_queues * IONIC_NUM_TX_STATS;
+	total += rx_queues * IONIC_NUM_RX_STATS;
 
 	/* port stats */
 	total += IONIC_NUM_PORT_STATS;
@@ -231,13 +238,13 @@ static u64 ionic_sw_stats_get_count(struct ionic_lif *lif)
 	if (test_bit(IONIC_LIF_F_UP, lif->state) &&
 	    test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state)) {
 		/* tx debug stats */
-		total += MAX_Q(lif) * (IONIC_NUM_DBG_CQ_STATS +
+		total += tx_queues * (IONIC_NUM_DBG_CQ_STATS +
 				      IONIC_NUM_TX_Q_STATS +
 				      IONIC_NUM_DBG_INTR_STATS +
 				      IONIC_MAX_NUM_SG_CNTR);
 
 		/* rx debug stats */
-		total += MAX_Q(lif) * (IONIC_NUM_DBG_CQ_STATS +
+		total += rx_queues * (IONIC_NUM_DBG_CQ_STATS +
 				      IONIC_NUM_DBG_INTR_STATS +
 				      IONIC_NUM_DBG_NAPI_STATS +
 				      IONIC_MAX_NUM_NAPI_CNTR);
@@ -315,13 +322,99 @@ static void ionic_sw_stats_get_strings(struct ionic_lif *lif, u8 **buf)
 		ionic_sw_stats_get_rx_strings(lif, buf, q_num);
 }
 
+static void ionic_sw_stats_get_txq_values(struct ionic_lif *lif, u64 **buf,
+					  int q_num)
+{
+	struct ionic_tx_stats *txstats;
+	struct ionic_qcq *txqcq;
+	int i;
+
+	txstats = &lif->txqstats[q_num];
+
+	for (i = 0; i < IONIC_NUM_TX_STATS; i++) {
+		**buf = IONIC_READ_STAT64(txstats, &ionic_tx_stats_desc[i]);
+		(*buf)++;
+	}
+
+	if (!test_bit(IONIC_LIF_F_UP, lif->state) ||
+	    !test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state))
+		return;
+
+	txqcq = lif->txqcqs[q_num];
+	for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) {
+		**buf = IONIC_READ_STAT64(&txqcq->q,
+					  &ionic_txq_stats_desc[i]);
+		(*buf)++;
+	}
+	for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) {
+		**buf = IONIC_READ_STAT64(&txqcq->cq,
+					  &ionic_dbg_cq_stats_desc[i]);
+		(*buf)++;
+	}
+	for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) {
+		**buf = IONIC_READ_STAT64(&txqcq->intr,
+					  &ionic_dbg_intr_stats_desc[i]);
+		(*buf)++;
+	}
+	for (i = 0; i < IONIC_NUM_DBG_NAPI_STATS; i++) {
+		**buf = IONIC_READ_STAT64(&txqcq->napi_stats,
+					  &ionic_dbg_napi_stats_desc[i]);
+		(*buf)++;
+	}
+	for (i = 0; i < IONIC_MAX_NUM_NAPI_CNTR; i++) {
+		**buf = txqcq->napi_stats.work_done_cntr[i];
+		(*buf)++;
+	}
+	for (i = 0; i < IONIC_MAX_NUM_SG_CNTR; i++) {
+		**buf = txstats->sg_cntr[i];
+		(*buf)++;
+	}
+}
+
+static void ionic_sw_stats_get_rxq_values(struct ionic_lif *lif, u64 **buf,
+					  int q_num)
+{
+	struct ionic_rx_stats *rxstats;
+	struct ionic_qcq *rxqcq;
+	int i;
+
+	rxstats = &lif->rxqstats[q_num];
+
+	for (i = 0; i < IONIC_NUM_RX_STATS; i++) {
+		**buf = IONIC_READ_STAT64(rxstats, &ionic_rx_stats_desc[i]);
+		(*buf)++;
+	}
+
+	if (!test_bit(IONIC_LIF_F_UP, lif->state) ||
+	    !test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state))
+		return;
+
+	rxqcq = lif->rxqcqs[q_num];
+	for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) {
+		**buf = IONIC_READ_STAT64(&rxqcq->cq,
+					  &ionic_dbg_cq_stats_desc[i]);
+		(*buf)++;
+	}
+	for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) {
+		**buf = IONIC_READ_STAT64(&rxqcq->intr,
+					  &ionic_dbg_intr_stats_desc[i]);
+		(*buf)++;
+	}
+	for (i = 0; i < IONIC_NUM_DBG_NAPI_STATS; i++) {
+		**buf = IONIC_READ_STAT64(&rxqcq->napi_stats,
+					  &ionic_dbg_napi_stats_desc[i]);
+		(*buf)++;
+	}
+	for (i = 0; i < IONIC_MAX_NUM_NAPI_CNTR; i++) {
+		**buf = rxqcq->napi_stats.work_done_cntr[i];
+		(*buf)++;
+	}
+}
+
 static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf)
 {
 	struct ionic_port_stats *port_stats;
 	struct ionic_lif_sw_stats lif_stats;
-	struct ionic_qcq *txqcq, *rxqcq;
-	struct ionic_tx_stats *txstats;
-	struct ionic_rx_stats *rxstats;
 	int i, q_num;
 
 	ionic_get_lif_stats(lif, &lif_stats);
@@ -338,73 +431,11 @@ static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf)
 		(*buf)++;
 	}
 
-	for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
-		txstats = &lif->txqstats[q_num];
-
-		for (i = 0; i < IONIC_NUM_TX_STATS; i++) {
-			**buf = IONIC_READ_STAT64(txstats,
-						  &ionic_tx_stats_desc[i]);
-			(*buf)++;
-		}
-
-		if (test_bit(IONIC_LIF_F_UP, lif->state) &&
-		    test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state)) {
-			txqcq = lif->txqcqs[q_num];
-			for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) {
-				**buf = IONIC_READ_STAT64(&txqcq->q,
-						      &ionic_txq_stats_desc[i]);
-				(*buf)++;
-			}
-			for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) {
-				**buf = IONIC_READ_STAT64(&txqcq->cq,
-						   &ionic_dbg_cq_stats_desc[i]);
-				(*buf)++;
-			}
-			for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) {
-				**buf = IONIC_READ_STAT64(&txqcq->intr,
-						 &ionic_dbg_intr_stats_desc[i]);
-				(*buf)++;
-			}
-			for (i = 0; i < IONIC_MAX_NUM_SG_CNTR; i++) {
-				**buf = txstats->sg_cntr[i];
-				(*buf)++;
-			}
-		}
-	}
+	for (q_num = 0; q_num < MAX_Q(lif); q_num++)
+		ionic_sw_stats_get_txq_values(lif, buf, q_num);
 
-	for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
-		rxstats = &lif->rxqstats[q_num];
-
-		for (i = 0; i < IONIC_NUM_RX_STATS; i++) {
-			**buf = IONIC_READ_STAT64(rxstats,
-						  &ionic_rx_stats_desc[i]);
-			(*buf)++;
-		}
-
-		if (test_bit(IONIC_LIF_F_UP, lif->state) &&
-		    test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state)) {
-			rxqcq = lif->rxqcqs[q_num];
-			for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) {
-				**buf = IONIC_READ_STAT64(&rxqcq->cq,
-						   &ionic_dbg_cq_stats_desc[i]);
-				(*buf)++;
-			}
-			for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) {
-				**buf = IONIC_READ_STAT64(&rxqcq->intr,
-						 &ionic_dbg_intr_stats_desc[i]);
-				(*buf)++;
-			}
-			for (i = 0; i < IONIC_NUM_DBG_NAPI_STATS; i++) {
-				**buf = IONIC_READ_STAT64(&rxqcq->napi_stats,
-						 &ionic_dbg_napi_stats_desc[i]);
-				(*buf)++;
-			}
-			for (i = 0; i < IONIC_MAX_NUM_NAPI_CNTR; i++) {
-				**buf = rxqcq->napi_stats.work_done_cntr[i];
-				(*buf)++;
-			}
-		}
-	}
+	for (q_num = 0; q_num < MAX_Q(lif); q_num++)
+		ionic_sw_stats_get_rxq_values(lif, buf, q_num);
 }
 
 const struct ionic_stats_group_intf ionic_stats_groups[] = {
-- 
cgit v1.2.3


From b8128656a5edd23a91542c9da849cd878a54148d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 30 Mar 2021 14:06:13 -0700
Subject: net: fix icmp_echo_enable_probe sysctl

sysctl_icmp_echo_enable_probe is an u8.

ipv4_net_table entry should use
 .maxlen       = sizeof(u8).
 .proc_handler = proc_dou8vec_minmax,

Fixes: f1b8fa9fa586 ("net: add sysctl for enabling RFC 8335 PROBE messages")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Andreas Roeseler <andreas.a.roeseler@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/sysctl_net_ipv4.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index e3cb2d96b55e..9199f507a005 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -601,9 +601,9 @@ static struct ctl_table ipv4_net_table[] = {
 	{
 		.procname	= "icmp_echo_enable_probe",
 		.data		= &init_net.ipv4.sysctl_icmp_echo_enable_probe,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dou8vec_minmax,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_ONE
 	},
-- 
cgit v1.2.3


From 774c8a8dcb3cba72e37394dbc7803fe575e1292c Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Tue, 30 Mar 2021 17:08:51 -0700
Subject: mptcp: remove all subflows involving id 0 address

There's only one subflow involving the non-zero id address, but there
may be multi subflows involving the id 0 address.

Here's an example:

 local_id=0, remote_id=0
 local_id=1, remote_id=0
 local_id=0, remote_id=1

If the removing address id is 0, all the subflows involving the id 0
address need to be removed.

In mptcp_pm_nl_rm_addr_received/mptcp_pm_nl_rm_subflow_received, the
"break" prevents the iteration to the next subflow, so this patch
dropped them.

Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/pm_netlink.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 73b9245c87b2..87a6133fd778 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -621,8 +621,6 @@ static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
 			WRITE_ONCE(msk->pm.accept_addr, true);
 
 			__MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMADDR);
-
-			break;
 		}
 	}
 }
@@ -695,8 +693,6 @@ void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
 			msk->pm.subflows--;
 
 			__MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW);
-
-			break;
 		}
 	}
 }
-- 
cgit v1.2.3


From 9f12e97bf16cb4032ae199537e5a1500dfafee90 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Tue, 30 Mar 2021 17:08:52 -0700
Subject: mptcp: unify RM_ADDR and RM_SUBFLOW receiving

There are some duplicate code in mptcp_pm_nl_rm_addr_received and
mptcp_pm_nl_rm_subflow_received. This patch unifies them into a new
function named mptcp_pm_nl_rm_addr_or_subflow. In it, use the input
parameter rm_type to identify it's now removing an address or a subflow.

Suggested-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/pm_netlink.c | 82 ++++++++++++++++++++------------------------------
 1 file changed, 33 insertions(+), 49 deletions(-)

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 87a6133fd778..e00397f2abf1 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -586,45 +586,68 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
 	return -EINVAL;
 }
 
-static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
+static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
+					   const struct mptcp_rm_list *rm_list,
+					   enum linux_mptcp_mib_field rm_type)
 {
 	struct mptcp_subflow_context *subflow, *tmp;
 	struct sock *sk = (struct sock *)msk;
 	u8 i;
 
-	pr_debug("address rm_list_nr %d", msk->pm.rm_list_rx.nr);
+	pr_debug("%s rm_list_nr %d",
+		 rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", rm_list->nr);
 
 	msk_owned_by_me(msk);
 
-	if (!msk->pm.rm_list_rx.nr)
+	if (!rm_list->nr)
 		return;
 
 	if (list_empty(&msk->conn_list))
 		return;
 
-	for (i = 0; i < msk->pm.rm_list_rx.nr; i++) {
+	for (i = 0; i < rm_list->nr; i++) {
 		list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
 			struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 			int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
+			u8 id = subflow->local_id;
+
+			if (rm_type == MPTCP_MIB_RMADDR)
+				id = subflow->remote_id;
 
-			if (msk->pm.rm_list_rx.ids[i] != subflow->remote_id)
+			if (rm_list->ids[i] != id)
 				continue;
 
-			pr_debug(" -> address rm_list_ids[%d]=%u", i, msk->pm.rm_list_rx.ids[i]);
+			pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u",
+				 rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow",
+				 i, rm_list->ids[i], subflow->local_id, subflow->remote_id);
 			spin_unlock_bh(&msk->pm.lock);
 			mptcp_subflow_shutdown(sk, ssk, how);
 			mptcp_close_ssk(sk, ssk, subflow);
 			spin_lock_bh(&msk->pm.lock);
 
-			msk->pm.add_addr_accepted--;
+			if (rm_type == MPTCP_MIB_RMADDR) {
+				msk->pm.add_addr_accepted--;
+				WRITE_ONCE(msk->pm.accept_addr, true);
+			} else if (rm_type == MPTCP_MIB_RMSUBFLOW) {
+				msk->pm.local_addr_used--;
+			}
 			msk->pm.subflows--;
-			WRITE_ONCE(msk->pm.accept_addr, true);
-
-			__MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMADDR);
+			__MPTCP_INC_STATS(sock_net(sk), rm_type);
 		}
 	}
 }
 
+static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
+{
+	mptcp_pm_nl_rm_addr_or_subflow(msk, &msk->pm.rm_list_rx, MPTCP_MIB_RMADDR);
+}
+
+void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
+				     const struct mptcp_rm_list *rm_list)
+{
+	mptcp_pm_nl_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW);
+}
+
 void mptcp_pm_nl_work(struct mptcp_sock *msk)
 {
 	struct mptcp_pm_data *pm = &msk->pm;
@@ -658,45 +681,6 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk)
 	spin_unlock_bh(&msk->pm.lock);
 }
 
-void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
-				     const struct mptcp_rm_list *rm_list)
-{
-	struct mptcp_subflow_context *subflow, *tmp;
-	struct sock *sk = (struct sock *)msk;
-	u8 i;
-
-	pr_debug("subflow rm_list_nr %d", rm_list->nr);
-
-	msk_owned_by_me(msk);
-
-	if (!rm_list->nr)
-		return;
-
-	if (list_empty(&msk->conn_list))
-		return;
-
-	for (i = 0; i < rm_list->nr; i++) {
-		list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
-			struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-			int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
-
-			if (rm_list->ids[i] != subflow->local_id)
-				continue;
-
-			pr_debug(" -> subflow rm_list_ids[%d]=%u", i, rm_list->ids[i]);
-			spin_unlock_bh(&msk->pm.lock);
-			mptcp_subflow_shutdown(sk, ssk, how);
-			mptcp_close_ssk(sk, ssk, subflow);
-			spin_lock_bh(&msk->pm.lock);
-
-			msk->pm.local_addr_used--;
-			msk->pm.subflows--;
-
-			__MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW);
-		}
-	}
-}
-
 static bool address_use_port(struct mptcp_pm_addr_entry *entry)
 {
 	return (entry->addr.flags &
-- 
cgit v1.2.3


From 740d798e8767d8a449902b1a1bbc70facfce19b5 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Tue, 30 Mar 2021 17:08:53 -0700
Subject: mptcp: remove id 0 address

This patch added a new function mptcp_nl_remove_id_zero_address to
remove the id 0 address.

In this function, traverse all the existing msk sockets to find the
msk matched the input IP address. Then fill the removing list with
id 0, and pass it to mptcp_pm_remove_addr and mptcp_pm_remove_subflow.

Suggested-by: Paolo Abeni <pabeni@redhat.com>
Suggested-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/pm_netlink.c | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index e00397f2abf1..cadafafa1049 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -1156,6 +1156,41 @@ static void mptcp_pm_free_addr_entry(struct mptcp_pm_addr_entry *entry)
 	}
 }
 
+static int mptcp_nl_remove_id_zero_address(struct net *net,
+					   struct mptcp_addr_info *addr)
+{
+	struct mptcp_rm_list list = { .nr = 0 };
+	long s_slot = 0, s_num = 0;
+	struct mptcp_sock *msk;
+
+	list.ids[list.nr++] = 0;
+
+	while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
+		struct sock *sk = (struct sock *)msk;
+		struct mptcp_addr_info msk_local;
+
+		if (list_empty(&msk->conn_list))
+			goto next;
+
+		local_address((struct sock_common *)msk, &msk_local);
+		if (!addresses_equal(&msk_local, addr, addr->port))
+			goto next;
+
+		lock_sock(sk);
+		spin_lock_bh(&msk->pm.lock);
+		mptcp_pm_remove_addr(msk, &list);
+		mptcp_pm_nl_rm_subflow_received(msk, &list);
+		spin_unlock_bh(&msk->pm.lock);
+		release_sock(sk);
+
+next:
+		sock_put(sk);
+		cond_resched();
+	}
+
+	return 0;
+}
+
 static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
 {
 	struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
@@ -1168,6 +1203,14 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
 	if (ret < 0)
 		return ret;
 
+	/* the zero id address is special: the first address used by the msk
+	 * always gets such an id, so different subflows can have different zero
+	 * id addresses. Additionally zero id is not accounted for in id_bitmap.
+	 * Let's use an 'mptcp_rm_list' instead of the common remove code.
+	 */
+	if (addr.addr.id == 0)
+		return mptcp_nl_remove_id_zero_address(sock_net(skb->sk), &addr.addr);
+
 	spin_lock_bh(&pernet->lock);
 	entry = __lookup_addr_by_id(pernet, addr.addr.id);
 	if (!entry) {
-- 
cgit v1.2.3


From 6254ad408820ca84283798c8fdbda3b01259a9a8 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Tue, 30 Mar 2021 17:08:54 -0700
Subject: selftests: mptcp: avoid calling pm_nl_ctl with bad IDs

IDs are supposed to be between 0 and 255.

In pm_nl_ctl, for both the 'add' and 'get' instruction, the ID is casted
in a u_int8_t. So if we give 256, we will delete ID 0. Obviously, the
goal is not to delete this ID by giving 256.

We could modify pm_nl_ctl and stop if the ID is negative or higher than
255 but probably better not to increase the number of lines for such
things in this tool which is only used in selftests. Instead, we use it
within the limits.

This modification also means that we will no longer add a new ID for the
2nd entry. That's why we removed an expected entry from the dump and
introduced with
commit dc8eb10e95a8 ("selftests: mptcp: add testcases for setting the address ID").

So now we delete ID 9 like before and we add entries for IDs 10 to 255
that are deleted just after.

Note that this could be seen as a fix but it was not really an issue so
far: we were simply playing with ID 0/1 once again. With the following
commit ("selftests: mptcp: add addr argument for del_addr"), it will be
different because ID 0 is going to required an address. We don't want
errors when trying to delete ID 0 without the address argument.

Acked-and-tested-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/pm_netlink.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index a617e293734c..3c741abe034e 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -100,12 +100,12 @@ done
 check "ip netns exec $ns1 ./pm_nl_ctl get 9" "id 9 flags signal 10.0.1.9" "hard addr limit"
 check "ip netns exec $ns1 ./pm_nl_ctl get 10" "" "above hard addr limit"
 
-for i in `seq 9 256`; do
+ip netns exec $ns1 ./pm_nl_ctl del 9
+for i in `seq 10 255`; do
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 id $i
 	ip netns exec $ns1 ./pm_nl_ctl del $i
-	ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 id $((i+1))
 done
 check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags  10.0.1.1
-id 2 flags  10.0.0.9
 id 3 flags signal,backup 10.0.1.3
 id 4 flags signal 10.0.1.4
 id 5 flags signal 10.0.1.5
-- 
cgit v1.2.3


From 2d121c9a882a93d5b229fc13deb10036a1b35967 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Tue, 30 Mar 2021 17:08:55 -0700
Subject: selftests: mptcp: add addr argument for del_addr

For the id 0 address, different MPTCP connections could be using
different IP addresses for id 0.

This patch added an extra argument IP address for del_addr when
using id 0.

Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/pm_nl_ctl.c | 34 ++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 7b4167f3f9a2..115decfdc1ef 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -26,7 +26,7 @@ static void syntax(char *argv[])
 {
 	fprintf(stderr, "%s add|get|set|del|flush|dump|accept [<args>]\n", argv[0]);
 	fprintf(stderr, "\tadd [flags signal|subflow|backup] [id <nr>] [dev <name>] <ip>\n");
-	fprintf(stderr, "\tdel <id>\n");
+	fprintf(stderr, "\tdel <id> [<ip>]\n");
 	fprintf(stderr, "\tget <id>\n");
 	fprintf(stderr, "\tset <ip> [flags backup|nobackup]\n");
 	fprintf(stderr, "\tflush\n");
@@ -301,6 +301,7 @@ int del_addr(int fd, int pm_family, int argc, char *argv[])
 		  1024];
 	struct rtattr *rta, *nest;
 	struct nlmsghdr *nh;
+	u_int16_t family;
 	int nest_start;
 	u_int8_t id;
 	int off = 0;
@@ -310,11 +311,14 @@ int del_addr(int fd, int pm_family, int argc, char *argv[])
 	off = init_genl_req(data, pm_family, MPTCP_PM_CMD_DEL_ADDR,
 			    MPTCP_PM_VER);
 
-	/* the only argument is the address id */
-	if (argc != 3)
+	/* the only argument is the address id (nonzero) */
+	if (argc != 3 && argc != 4)
 		syntax(argv);
 
 	id = atoi(argv[2]);
+	/* zero id with the IP address */
+	if (!id && argc != 4)
+		syntax(argv);
 
 	nest_start = off;
 	nest = (void *)(data + off);
@@ -328,6 +332,30 @@ int del_addr(int fd, int pm_family, int argc, char *argv[])
 	rta->rta_len = RTA_LENGTH(1);
 	memcpy(RTA_DATA(rta), &id, 1);
 	off += NLMSG_ALIGN(rta->rta_len);
+
+	if (!id) {
+		/* addr data */
+		rta = (void *)(data + off);
+		if (inet_pton(AF_INET, argv[3], RTA_DATA(rta))) {
+			family = AF_INET;
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+			rta->rta_len = RTA_LENGTH(4);
+		} else if (inet_pton(AF_INET6, argv[3], RTA_DATA(rta))) {
+			family = AF_INET6;
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+			rta->rta_len = RTA_LENGTH(16);
+		} else {
+			error(1, errno, "can't parse ip %s", argv[3]);
+		}
+		off += NLMSG_ALIGN(rta->rta_len);
+
+		/* family */
+		rta = (void *)(data + off);
+		rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+		rta->rta_len = RTA_LENGTH(2);
+		memcpy(RTA_DATA(rta), &family, 2);
+		off += NLMSG_ALIGN(rta->rta_len);
+	}
 	nest->rta_len = off - nest_start;
 
 	do_nl_req(fd, nh, off, 0);
-- 
cgit v1.2.3


From 5e287fe761495eb669b5e2543919bd5124edecf1 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Tue, 30 Mar 2021 17:08:56 -0700
Subject: selftests: mptcp: remove id 0 address testcases

This patch added the testcases for removing the id 0 subflow and the id 0
address.

In do_transfer, use the removing addresses number '9' for deleting the id
0 address.

Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 35 +++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 679de3abaf34..d2273b88e72c 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -294,9 +294,12 @@ do_transfer()
 					let id+=1
 				done
 			fi
-		else
+		elif [ $rm_nr_ns1 -eq 8 ]; then
 			sleep 1
 			ip netns exec ${listener_ns} ./pm_nl_ctl flush
+		elif [ $rm_nr_ns1 -eq 9 ]; then
+			sleep 1
+			ip netns exec ${listener_ns} ./pm_nl_ctl del 0 ${connect_addr}
 		fi
 	fi
 
@@ -333,9 +336,18 @@ do_transfer()
 					let id+=1
 				done
 			fi
-		else
+		elif [ $rm_nr_ns2 -eq 8 ]; then
 			sleep 1
 			ip netns exec ${connector_ns} ./pm_nl_ctl flush
+		elif [ $rm_nr_ns2 -eq 9 ]; then
+			local addr
+			if is_v6 "${connect_addr}"; then
+				addr="dead:beef:1::2"
+			else
+				addr="10.0.1.2"
+			fi
+			sleep 1
+			ip netns exec ${connector_ns} ./pm_nl_ctl del 0 $addr
 		fi
 	fi
 
@@ -988,6 +1000,25 @@ remove_tests()
 	chk_join_nr "flush invalid addresses" 1 1 1
 	chk_add_nr 3 3
 	chk_rm_nr 3 1 invert
+
+	# remove id 0 subflow
+	reset
+	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+	run_tests $ns1 $ns2 10.0.1.1 0 0 -9 slow
+	chk_join_nr "remove id 0 subflow" 1 1 1
+	chk_rm_nr 1 1
+
+	# remove id 0 address
+	reset
+	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+	ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+	run_tests $ns1 $ns2 10.0.1.1 0 -9 0 slow
+	chk_join_nr "remove id 0 address" 1 1 1
+	chk_add_nr 1 1
+	chk_rm_nr 1 1 invert
 }
 
 add_tests()
-- 
cgit v1.2.3


From 7866f265b824589f2cf45fce8bf82d152557bd7c Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 30 Mar 2021 09:58:41 +0300
Subject: mlxsw: spectrum_router: Only perform atomic nexthop bucket
 replacement when requested

When cleared, the 'force' parameter in nexthop bucket replacement
notifications indicates that a driver should try to perform an atomic
replacement. Meaning, only update the contents of the bucket if it is
inactive.

Since mlxsw only queries buckets' activity once every second, there is
no point in trying an atomic replacement if the idle timer interval is
smaller than 1 second.

Currently, mlxsw ignores the original value of 'force' and will always
try an atomic replacement if the idle timer is not smaller than 1
second.

Fix this by taking the original value of 'force' into account and never
promoting a non-atomic replacement to an atomic one.

Fixes: 617a77f044ed ("mlxsw: spectrum_router: Add nexthop bucket replacement support")
Reported-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 6ccaa194733b..41259c0004d1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -5068,8 +5068,9 @@ mlxsw_sp_nexthop_obj_bucket_adj_update(struct mlxsw_sp *mlxsw_sp,
 	/* No point in trying an atomic replacement if the idle timer interval
 	 * is smaller than the interval in which we query and clear activity.
 	 */
-	force = info->nh_res_bucket->idle_timer_ms <
-		MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL;
+	if (!force && info->nh_res_bucket->idle_timer_ms <
+	    MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL)
+		force = true;
 
 	adj_index = nh->nhgi->adj_index + bucket_index;
 	err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh, force, ratr_pl);
-- 
cgit v1.2.3


From 28110056f2d07a576ca045a38f80de051b13582a Mon Sep 17 00:00:00 2001
From: Eric Lin <dslin1010@gmail.com>
Date: Wed, 31 Mar 2021 09:04:17 +0800
Subject: net: ethernet: Fix typo of 'network' in comment

Signed-off-by: Eric Lin <dslin1010@gmail.com>
Reported-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/via/via-velocity.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c
index b65767f9e499..fecc4d7b00b0 100644
--- a/drivers/net/ethernet/via/via-velocity.c
+++ b/drivers/net/ethernet/via/via-velocity.c
@@ -2525,7 +2525,7 @@ static int velocity_close(struct net_device *dev)
  *	@skb: buffer to transmit
  *	@dev: network device
  *
- *	Called by the networ layer to request a packet is queued to
+ *	Called by the network layer to request a packet is queued to
  *	the velocity. Returns zero on success.
  */
 static netdev_tx_t velocity_xmit(struct sk_buff *skb,
-- 
cgit v1.2.3


From 63f8af0fc34197a276674fa0d4d865aeff1f0172 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@kernel.org>
Date: Tue, 23 Mar 2021 01:47:52 +0000
Subject: selftests/bpf: Add an option for a debug shell in vmtest.sh

The newly introduced -s command line option starts an interactive shell.
If a command is specified, the shell is started after the command
finishes executing. It's useful to have a shell especially when
debugging failing tests or developing new tests.

Since the user may terminate the VM forcefully, an extra "sync" is added
after the execution of the command to persist any logs from the command
into the log file.

Signed-off-by: KP Singh <kpsingh@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210323014752.3198283-1-kpsingh@kernel.org
---
 tools/testing/selftests/bpf/vmtest.sh | 39 +++++++++++++++++++++++++----------
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh
index 22554894db99..8889b3f55236 100755
--- a/tools/testing/selftests/bpf/vmtest.sh
+++ b/tools/testing/selftests/bpf/vmtest.sh
@@ -24,15 +24,15 @@ EXIT_STATUS_FILE="${LOG_FILE_BASE}.exit_status"
 usage()
 {
 	cat <<EOF
-Usage: $0 [-i] [-d <output_dir>] -- [<command>]
+Usage: $0 [-i] [-s] [-d <output_dir>] -- [<command>]
 
 <command> is the command you would normally run when you are in
 tools/testing/selftests/bpf. e.g:
 
 	$0 -- ./test_progs -t test_lsm
 
-If no command is specified, "${DEFAULT_COMMAND}" will be run by
-default.
+If no command is specified and a debug shell (-s) is not requested,
+"${DEFAULT_COMMAND}" will be run by default.
 
 If you build your kernel using KBUILD_OUTPUT= or O= options, these
 can be passed as environment variables to the script:
@@ -49,6 +49,9 @@ Options:
 	-d)		Update the output directory (default: ${OUTPUT_DIR})
 	-j)		Number of jobs for compilation, similar to -j in make
 			(default: ${NUM_COMPILE_JOBS})
+	-s)		Instead of powering off the VM, start an interactive
+			shell. If <command> is specified, the shell runs after
+			the command finishes executing
 EOF
 }
 
@@ -149,6 +152,7 @@ update_init_script()
 	local init_script_dir="${OUTPUT_DIR}/${MOUNT_DIR}/etc/rcS.d"
 	local init_script="${init_script_dir}/S50-startup"
 	local command="$1"
+	local exit_command="$2"
 
 	mount_image
 
@@ -162,9 +166,10 @@ EOF
 
 	fi
 
-	sudo bash -c "cat >${init_script}" <<EOF
-#!/bin/bash
+	sudo bash -c "echo '#!/bin/bash' > ${init_script}"
 
+	if [[ "${command}" != "" ]]; then
+		sudo bash -c "cat >>${init_script}" <<EOF
 # Have a default value in the exit status file
 # incase the VM is forcefully stopped.
 echo "130" > "/root/${EXIT_STATUS_FILE}"
@@ -175,9 +180,12 @@ echo "130" > "/root/${EXIT_STATUS_FILE}"
 	stdbuf -oL -eL ${command}
 	echo "\$?" > "/root/${EXIT_STATUS_FILE}"
 } 2>&1 | tee "/root/${LOG_FILE}"
-poweroff -f
+# Ensure that the logs are written to disk
+sync
 EOF
+	fi
 
+	sudo bash -c "echo ${exit_command} >> ${init_script}"
 	sudo chmod a+x "${init_script}"
 	unmount_image
 }
@@ -277,8 +285,10 @@ main()
 	local kernel_bzimage="${kernel_checkout}/${X86_BZIMAGE}"
 	local command="${DEFAULT_COMMAND}"
 	local update_image="no"
+	local exit_command="poweroff -f"
+	local debug_shell="no"
 
-	while getopts 'hkid:j:' opt; do
+	while getopts 'hskid:j:' opt; do
 		case ${opt} in
 		i)
 			update_image="yes"
@@ -289,6 +299,11 @@ main()
 		j)
 			NUM_COMPILE_JOBS="$OPTARG"
 			;;
+		s)
+			command=""
+			debug_shell="yes"
+			exit_command="bash"
+			;;
 		h)
 			usage
 			exit 0
@@ -307,7 +322,7 @@ main()
 	done
 	shift $((OPTIND -1))
 
-	if [[ $# -eq 0 ]]; then
+	if [[ $# -eq 0  && "${debug_shell}" == "no" ]]; then
 		echo "No command specified, will run ${DEFAULT_COMMAND} in the vm"
 	else
 		command="$@"
@@ -355,10 +370,12 @@ main()
 	fi
 
 	update_selftests "${kernel_checkout}" "${make_command}"
-	update_init_script "${command}"
+	update_init_script "${command}" "${exit_command}"
 	run_vm "${kernel_bzimage}"
-	copy_logs
-	echo "Logs saved in ${OUTPUT_DIR}/${LOG_FILE}"
+	if [[ "${command}" != "" ]]; then
+		copy_logs
+		echo "Logs saved in ${OUTPUT_DIR}/${LOG_FILE}"
+	fi
 }
 
 catch()
-- 
cgit v1.2.3


From e27bfefb21f28d5295432f042b5d9d7871100c35 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 29 Mar 2021 15:31:43 -0700
Subject: tools/resolve_btfids: Fix warnings

* make eprintf static, used only in main.c
* initialize ret in eprintf
* remove unused *tmp

v3:
* remove another err (Song Liu)

v2:
* remove unused 'int err = -1'

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20210329223143.3659983-1-sdf@google.com
---
 tools/bpf/resolve_btfids/main.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index 80d966cfcaa1..7550fd9c3188 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -115,10 +115,10 @@ struct object {
 
 static int verbose;
 
-int eprintf(int level, int var, const char *fmt, ...)
+static int eprintf(int level, int var, const char *fmt, ...)
 {
 	va_list args;
-	int ret;
+	int ret = 0;
 
 	if (var >= level) {
 		va_start(args, fmt);
@@ -385,7 +385,7 @@ static int elf_collect(struct object *obj)
 static int symbols_collect(struct object *obj)
 {
 	Elf_Scn *scn = NULL;
-	int n, i, err = 0;
+	int n, i;
 	GElf_Shdr sh;
 	char *name;
 
@@ -402,11 +402,10 @@ static int symbols_collect(struct object *obj)
 	 * Scan symbols and look for the ones starting with
 	 * __BTF_ID__* over .BTF_ids section.
 	 */
-	for (i = 0; !err && i < n; i++) {
-		char *tmp, *prefix;
+	for (i = 0; i < n; i++) {
+		char *prefix;
 		struct btf_id *id;
 		GElf_Sym sym;
-		int err = -1;
 
 		if (!gelf_getsym(obj->efile.symbols, i, &sym))
 			return -1;
-- 
cgit v1.2.3


From 77ccee96a67422ac05fc47327cac4e4287fd0d8a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 25 Mar 2021 18:25:09 +0100
Subject: netfilter: nf_log_bridge: merge with nf_log_syslog

Provide bridge log support from nf_log_syslog.

After the merge there is no need to load the "real packet loggers",
all of them now reside in the same module.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_log.h       |  1 -
 net/bridge/netfilter/Kconfig         |  4 --
 net/bridge/netfilter/Makefile        |  3 --
 net/bridge/netfilter/nf_log_bridge.c | 79 ------------------------------------
 net/netfilter/nf_log.c               |  7 ----
 net/netfilter/nf_log_syslog.c        | 22 ++++++++++
 6 files changed, 22 insertions(+), 94 deletions(-)
 delete mode 100644 net/bridge/netfilter/nf_log_bridge.c

diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index 716db4a0fed8..a6b85068c294 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -68,7 +68,6 @@ void nf_log_unbind_pf(struct net *net, u_int8_t pf);
 
 int nf_logger_find_get(int pf, enum nf_log_type type);
 void nf_logger_put(int pf, enum nf_log_type type);
-void nf_logger_request_module(int pf, enum nf_log_type type);
 
 #define MODULE_ALIAS_NF_LOGGER(family, type) \
 	MODULE_ALIAS("nf-logger-" __stringify(family) "-" __stringify(type))
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index ac5372121e60..7f304a19ac1b 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -23,10 +23,6 @@ config NFT_BRIDGE_REJECT
 	help
 	  Add support to reject packets.
 
-config NF_LOG_BRIDGE
-	tristate "Bridge packet logging"
-	select NF_LOG_COMMON
-
 endif # NF_TABLES_BRIDGE
 
 config NF_CONNTRACK_BRIDGE
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index 8e2c5759d964..1c9ce49ab651 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -9,9 +9,6 @@ obj-$(CONFIG_NFT_BRIDGE_REJECT)  += nft_reject_bridge.o
 # connection tracking
 obj-$(CONFIG_NF_CONNTRACK_BRIDGE) += nf_conntrack_bridge.o
 
-# packet logging
-obj-$(CONFIG_NF_LOG_BRIDGE) += nf_log_bridge.o
-
 obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o
 
 # tables
diff --git a/net/bridge/netfilter/nf_log_bridge.c b/net/bridge/netfilter/nf_log_bridge.c
deleted file mode 100644
index 1ad61d1017b6..000000000000
--- a/net/bridge/netfilter/nf_log_bridge.c
+++ /dev/null
@@ -1,79 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * (C) 2014 by Pablo Neira Ayuso <pablo@netfilter.org>
- */
-
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/skbuff.h>
-#include <linux/if_bridge.h>
-#include <linux/ip.h>
-#include <net/route.h>
-
-#include <linux/netfilter.h>
-#include <net/netfilter/nf_log.h>
-
-static void nf_log_bridge_packet(struct net *net, u_int8_t pf,
-				 unsigned int hooknum,
-				 const struct sk_buff *skb,
-				 const struct net_device *in,
-				 const struct net_device *out,
-				 const struct nf_loginfo *loginfo,
-				 const char *prefix)
-{
-	nf_log_l2packet(net, pf, eth_hdr(skb)->h_proto, hooknum, skb,
-			in, out, loginfo, prefix);
-}
-
-static struct nf_logger nf_bridge_logger __read_mostly = {
-	.name		= "nf_log_bridge",
-	.type		= NF_LOG_TYPE_LOG,
-	.logfn		= nf_log_bridge_packet,
-	.me		= THIS_MODULE,
-};
-
-static int __net_init nf_log_bridge_net_init(struct net *net)
-{
-	return nf_log_set(net, NFPROTO_BRIDGE, &nf_bridge_logger);
-}
-
-static void __net_exit nf_log_bridge_net_exit(struct net *net)
-{
-	nf_log_unset(net, &nf_bridge_logger);
-}
-
-static struct pernet_operations nf_log_bridge_net_ops = {
-	.init = nf_log_bridge_net_init,
-	.exit = nf_log_bridge_net_exit,
-};
-
-static int __init nf_log_bridge_init(void)
-{
-	int ret;
-
-	/* Request to load the real packet loggers. */
-	nf_logger_request_module(NFPROTO_IPV4, NF_LOG_TYPE_LOG);
-	nf_logger_request_module(NFPROTO_IPV6, NF_LOG_TYPE_LOG);
-	nf_logger_request_module(NFPROTO_ARP, NF_LOG_TYPE_LOG);
-
-	ret = register_pernet_subsys(&nf_log_bridge_net_ops);
-	if (ret < 0)
-		return ret;
-
-	nf_log_register(NFPROTO_BRIDGE, &nf_bridge_logger);
-	return 0;
-}
-
-static void __exit nf_log_bridge_exit(void)
-{
-	unregister_pernet_subsys(&nf_log_bridge_net_ops);
-	nf_log_unregister(&nf_bridge_logger);
-}
-
-module_init(nf_log_bridge_init);
-module_exit(nf_log_bridge_exit);
-
-MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
-MODULE_DESCRIPTION("Netfilter bridge packet logging");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 0);
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 6cb9f9474b05..eaa8181f5ef7 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -151,13 +151,6 @@ void nf_log_unbind_pf(struct net *net, u_int8_t pf)
 }
 EXPORT_SYMBOL(nf_log_unbind_pf);
 
-void nf_logger_request_module(int pf, enum nf_log_type type)
-{
-	if (loggers[pf][type] == NULL)
-		request_module("nf-logger-%u-%u", pf, type);
-}
-EXPORT_SYMBOL_GPL(nf_logger_request_module);
-
 int nf_logger_find_get(int pf, enum nf_log_type type)
 {
 	struct nf_logger *logger;
diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c
index 617e0071c0c4..6b56251de22a 100644
--- a/net/netfilter/nf_log_syslog.c
+++ b/net/netfilter/nf_log_syslog.c
@@ -787,6 +787,13 @@ static struct nf_logger nf_netdev_logger __read_mostly = {
 	.me		= THIS_MODULE,
 };
 
+static struct nf_logger nf_bridge_logger __read_mostly = {
+	.name		= "nf_log_bridge",
+	.type		= NF_LOG_TYPE_LOG,
+	.logfn		= nf_log_netdev_packet,
+	.me		= THIS_MODULE,
+};
+
 static int __net_init nf_log_syslog_net_init(struct net *net)
 {
 	int ret = nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger);
@@ -805,7 +812,13 @@ static int __net_init nf_log_syslog_net_init(struct net *net)
 	ret = nf_log_set(net, NFPROTO_NETDEV, &nf_netdev_logger);
 	if (ret)
 		goto err3;
+
+	ret = nf_log_set(net, NFPROTO_BRIDGE, &nf_bridge_logger);
+	if (ret)
+		goto err4;
 	return 0;
+err4:
+	nf_log_unset(net, &nf_netdev_logger);
 err3:
 	nf_log_unset(net, &nf_ip6_logger);
 err2:
@@ -852,7 +865,13 @@ static int __init nf_log_syslog_init(void)
 	if (ret < 0)
 		goto err4;
 
+	ret = nf_log_register(NFPROTO_BRIDGE, &nf_bridge_logger);
+	if (ret < 0)
+		goto err5;
+
 	return 0;
+err5:
+	nf_log_unregister(&nf_netdev_logger);
 err4:
 	nf_log_unregister(&nf_ip6_logger);
 err3:
@@ -872,6 +891,7 @@ static void __exit nf_log_syslog_exit(void)
 	nf_log_unregister(&nf_arp_logger);
 	nf_log_unregister(&nf_ip6_logger);
 	nf_log_unregister(&nf_netdev_logger);
+	nf_log_unregister(&nf_bridge_logger);
 }
 
 module_init(nf_log_syslog_init);
@@ -881,9 +901,11 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("Netfilter syslog packet logging");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("nf_log_arp");
+MODULE_ALIAS("nf_log_bridge");
 MODULE_ALIAS("nf_log_ipv4");
 MODULE_ALIAS("nf_log_ipv6");
 MODULE_ALIAS("nf_log_netdev");
+MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 0);
 MODULE_ALIAS_NF_LOGGER(AF_INET, 0);
 MODULE_ALIAS_NF_LOGGER(3, 0);
 MODULE_ALIAS_NF_LOGGER(5, 0); /* NFPROTO_NETDEV */
-- 
cgit v1.2.3


From e465cccd0b9de113a81280bd52ee717bf5e3d1a2 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 25 Mar 2021 18:25:10 +0100
Subject: netfilter: nf_log_common: merge with nf_log_syslog

Remove nf_log_common.  Now that all per-af modules have been merged
there is no longer a need to provide a helper module.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_log.h |  24 -----
 net/netfilter/Kconfig          |   8 +-
 net/netfilter/Makefile         |   2 -
 net/netfilter/nf_log_common.c  | 224 -----------------------------------------
 net/netfilter/nf_log_syslog.c  | 181 ++++++++++++++++++++++++++++++++-
 5 files changed, 181 insertions(+), 258 deletions(-)
 delete mode 100644 net/netfilter/nf_log_common.c

diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index a6b85068c294..e55eedc84ed7 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -98,28 +98,4 @@ struct nf_log_buf;
 struct nf_log_buf *nf_log_buf_open(void);
 __printf(2, 3) int nf_log_buf_add(struct nf_log_buf *m, const char *f, ...);
 void nf_log_buf_close(struct nf_log_buf *m);
-
-/* common logging functions */
-int nf_log_dump_udp_header(struct nf_log_buf *m, const struct sk_buff *skb,
-			   u8 proto, int fragment, unsigned int offset);
-int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb,
-			   u8 proto, int fragment, unsigned int offset,
-			   unsigned int logflags);
-void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m,
-			    struct sock *sk);
-void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb);
-void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
-			       unsigned int hooknum, const struct sk_buff *skb,
-			       const struct net_device *in,
-			       const struct net_device *out,
-			       const struct nf_loginfo *loginfo,
-			       const char *prefix);
-void nf_log_l2packet(struct net *net, u_int8_t pf,
-		     __be16 protocol,
-		     unsigned int hooknum,
-		     const struct sk_buff *skb,
-		     const struct net_device *in,
-		     const struct net_device *out,
-		     const struct nf_loginfo *loginfo, const char *prefix);
-
 #endif /* _NF_LOG_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 6aef981a8446..fcd8682704c4 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -71,16 +71,13 @@ config NF_CONNTRACK
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config NF_LOG_COMMON
-	tristate
-
 config NF_LOG_SYSLOG
 	tristate "Syslog packet logging"
 	default m if NETFILTER_ADVANCED=n
-	select NF_LOG_COMMON
 	help
 	  This option enable support for packet logging via syslog.
-	  It supports IPv4 and common transport protocols such as TCP and UDP.
+	  It supports IPv4, IPV6, ARP and common transport protocols such
+	  as TCP and UDP.
 	  This is a simpler but less flexible logging method compared to
 	  CONFIG_NETFILTER_NETLINK_LOG.
 	  If both are enabled the backend to use can be configured at run-time
@@ -930,7 +927,6 @@ config NETFILTER_XT_TARGET_LED
 
 config NETFILTER_XT_TARGET_LOG
 	tristate "LOG target support"
-	select NF_LOG_COMMON
 	select NF_LOG_SYSLOG
 	select NF_LOG_IPV6 if IP6_NF_IPTABLES
 	default m if NETFILTER_ADVANCED=n
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 429be36fe4c7..e80e010354b1 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -48,8 +48,6 @@ obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o
 
 nf_nat-y	:= nf_nat_core.o nf_nat_proto.o nf_nat_helper.o
 
-# generic transport layer logging
-obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o
 obj-$(CONFIG_NF_LOG_SYSLOG) += nf_log_syslog.o
 
 obj-$(CONFIG_NF_NAT) += nf_nat.o
diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c
deleted file mode 100644
index fd7c5f0f5c25..000000000000
--- a/net/netfilter/nf_log_common.c
+++ /dev/null
@@ -1,224 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- */
-
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/skbuff.h>
-#include <linux/if_arp.h>
-#include <linux/ip.h>
-#include <net/icmp.h>
-#include <net/udp.h>
-#include <net/tcp.h>
-#include <net/route.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_bridge.h>
-#include <linux/netfilter/xt_LOG.h>
-#include <net/netfilter/nf_log.h>
-
-int nf_log_dump_udp_header(struct nf_log_buf *m, const struct sk_buff *skb,
-			   u8 proto, int fragment, unsigned int offset)
-{
-	struct udphdr _udph;
-	const struct udphdr *uh;
-
-	if (proto == IPPROTO_UDP)
-		/* Max length: 10 "PROTO=UDP "     */
-		nf_log_buf_add(m, "PROTO=UDP ");
-	else	/* Max length: 14 "PROTO=UDPLITE " */
-		nf_log_buf_add(m, "PROTO=UDPLITE ");
-
-	if (fragment)
-		goto out;
-
-	/* Max length: 25 "INCOMPLETE [65535 bytes] " */
-	uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph);
-	if (uh == NULL) {
-		nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset);
-
-		return 1;
-	}
-
-	/* Max length: 20 "SPT=65535 DPT=65535 " */
-	nf_log_buf_add(m, "SPT=%u DPT=%u LEN=%u ",
-		       ntohs(uh->source), ntohs(uh->dest), ntohs(uh->len));
-
-out:
-	return 0;
-}
-EXPORT_SYMBOL_GPL(nf_log_dump_udp_header);
-
-int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb,
-			   u8 proto, int fragment, unsigned int offset,
-			   unsigned int logflags)
-{
-	struct tcphdr _tcph;
-	const struct tcphdr *th;
-
-	/* Max length: 10 "PROTO=TCP " */
-	nf_log_buf_add(m, "PROTO=TCP ");
-
-	if (fragment)
-		return 0;
-
-	/* Max length: 25 "INCOMPLETE [65535 bytes] " */
-	th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
-	if (th == NULL) {
-		nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset);
-		return 1;
-	}
-
-	/* Max length: 20 "SPT=65535 DPT=65535 " */
-	nf_log_buf_add(m, "SPT=%u DPT=%u ",
-		       ntohs(th->source), ntohs(th->dest));
-	/* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
-	if (logflags & NF_LOG_TCPSEQ) {
-		nf_log_buf_add(m, "SEQ=%u ACK=%u ",
-			       ntohl(th->seq), ntohl(th->ack_seq));
-	}
-
-	/* Max length: 13 "WINDOW=65535 " */
-	nf_log_buf_add(m, "WINDOW=%u ", ntohs(th->window));
-	/* Max length: 9 "RES=0x3C " */
-	nf_log_buf_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) &
-					    TCP_RESERVED_BITS) >> 22));
-	/* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
-	if (th->cwr)
-		nf_log_buf_add(m, "CWR ");
-	if (th->ece)
-		nf_log_buf_add(m, "ECE ");
-	if (th->urg)
-		nf_log_buf_add(m, "URG ");
-	if (th->ack)
-		nf_log_buf_add(m, "ACK ");
-	if (th->psh)
-		nf_log_buf_add(m, "PSH ");
-	if (th->rst)
-		nf_log_buf_add(m, "RST ");
-	if (th->syn)
-		nf_log_buf_add(m, "SYN ");
-	if (th->fin)
-		nf_log_buf_add(m, "FIN ");
-	/* Max length: 11 "URGP=65535 " */
-	nf_log_buf_add(m, "URGP=%u ", ntohs(th->urg_ptr));
-
-	if ((logflags & NF_LOG_TCPOPT) && th->doff*4 > sizeof(struct tcphdr)) {
-		u_int8_t _opt[60 - sizeof(struct tcphdr)];
-		const u_int8_t *op;
-		unsigned int i;
-		unsigned int optsize = th->doff*4 - sizeof(struct tcphdr);
-
-		op = skb_header_pointer(skb, offset + sizeof(struct tcphdr),
-					optsize, _opt);
-		if (op == NULL) {
-			nf_log_buf_add(m, "OPT (TRUNCATED)");
-			return 1;
-		}
-
-		/* Max length: 127 "OPT (" 15*4*2chars ") " */
-		nf_log_buf_add(m, "OPT (");
-		for (i = 0; i < optsize; i++)
-			nf_log_buf_add(m, "%02X", op[i]);
-
-		nf_log_buf_add(m, ") ");
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(nf_log_dump_tcp_header);
-
-void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m,
-			    struct sock *sk)
-{
-	if (!sk || !sk_fullsock(sk) || !net_eq(net, sock_net(sk)))
-		return;
-
-	read_lock_bh(&sk->sk_callback_lock);
-	if (sk->sk_socket && sk->sk_socket->file) {
-		const struct cred *cred = sk->sk_socket->file->f_cred;
-		nf_log_buf_add(m, "UID=%u GID=%u ",
-			from_kuid_munged(&init_user_ns, cred->fsuid),
-			from_kgid_munged(&init_user_ns, cred->fsgid));
-	}
-	read_unlock_bh(&sk->sk_callback_lock);
-}
-EXPORT_SYMBOL_GPL(nf_log_dump_sk_uid_gid);
-
-void
-nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
-			  unsigned int hooknum, const struct sk_buff *skb,
-			  const struct net_device *in,
-			  const struct net_device *out,
-			  const struct nf_loginfo *loginfo, const char *prefix)
-{
-	const struct net_device *physoutdev __maybe_unused;
-	const struct net_device *physindev __maybe_unused;
-
-	nf_log_buf_add(m, KERN_SOH "%c%sIN=%s OUT=%s ",
-	       '0' + loginfo->u.log.level, prefix,
-	       in ? in->name : "",
-	       out ? out->name : "");
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-	physindev = nf_bridge_get_physindev(skb);
-	if (physindev && in != physindev)
-		nf_log_buf_add(m, "PHYSIN=%s ", physindev->name);
-	physoutdev = nf_bridge_get_physoutdev(skb);
-	if (physoutdev && out != physoutdev)
-		nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name);
-#endif
-}
-EXPORT_SYMBOL_GPL(nf_log_dump_packet_common);
-
-void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb)
-{
-	u16 vid;
-
-	if (!skb_vlan_tag_present(skb))
-		return;
-
-	vid = skb_vlan_tag_get(skb);
-	nf_log_buf_add(m, "VPROTO=%04x VID=%u ", ntohs(skb->vlan_proto), vid);
-}
-EXPORT_SYMBOL_GPL(nf_log_dump_vlan);
-
-/* bridge and netdev logging families share this code. */
-void nf_log_l2packet(struct net *net, u_int8_t pf,
-		     __be16 protocol,
-		     unsigned int hooknum,
-		     const struct sk_buff *skb,
-		     const struct net_device *in,
-		     const struct net_device *out,
-		     const struct nf_loginfo *loginfo,
-		     const char *prefix)
-{
-	switch (protocol) {
-	case htons(ETH_P_IP):
-		nf_log_packet(net, NFPROTO_IPV4, hooknum, skb, in, out,
-			      loginfo, "%s", prefix);
-		break;
-	case htons(ETH_P_IPV6):
-		nf_log_packet(net, NFPROTO_IPV6, hooknum, skb, in, out,
-			      loginfo, "%s", prefix);
-		break;
-	case htons(ETH_P_ARP):
-	case htons(ETH_P_RARP):
-		nf_log_packet(net, NFPROTO_ARP, hooknum, skb, in, out,
-			      loginfo, "%s", prefix);
-		break;
-	}
-}
-EXPORT_SYMBOL_GPL(nf_log_l2packet);
-
-static int __init nf_log_common_init(void)
-{
-	return 0;
-}
-
-static void __exit nf_log_common_exit(void) {}
-
-module_init(nf_log_common_init);
-module_exit(nf_log_common_exit);
-
-MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c
index 6b56251de22a..2518818ed479 100644
--- a/net/netfilter/nf_log_syslog.c
+++ b/net/netfilter/nf_log_syslog.c
@@ -18,6 +18,7 @@
 #include <net/route.h>
 
 #include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
 #include <linux/netfilter_ipv6.h>
 #include <linux/netfilter/xt_LOG.h>
 #include <net/netfilter/nf_log.h>
@@ -39,6 +40,16 @@ struct arppayload {
 	unsigned char ip_dst[4];
 };
 
+static void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb)
+{
+	u16 vid;
+
+	if (!skb_vlan_tag_present(skb))
+		return;
+
+	vid = skb_vlan_tag_get(skb);
+	nf_log_buf_add(m, "VPROTO=%04x VID=%u ", ntohs(skb->vlan_proto), vid);
+}
 static void noinline_for_stack
 dump_arp_packet(struct nf_log_buf *m,
 		const struct nf_loginfo *info,
@@ -89,6 +100,30 @@ dump_arp_packet(struct nf_log_buf *m,
 		       ap->mac_src, ap->ip_src, ap->mac_dst, ap->ip_dst);
 }
 
+static void
+nf_log_dump_packet_common(struct nf_log_buf *m, u8 pf,
+			  unsigned int hooknum, const struct sk_buff *skb,
+			  const struct net_device *in,
+			  const struct net_device *out,
+			  const struct nf_loginfo *loginfo, const char *prefix)
+{
+	const struct net_device *physoutdev __maybe_unused;
+	const struct net_device *physindev __maybe_unused;
+
+	nf_log_buf_add(m, KERN_SOH "%c%sIN=%s OUT=%s ",
+		       '0' + loginfo->u.log.level, prefix,
+			in ? in->name : "",
+			out ? out->name : "");
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+	physindev = nf_bridge_get_physindev(skb);
+	if (physindev && in != physindev)
+		nf_log_buf_add(m, "PHYSIN=%s ", physindev->name);
+	physoutdev = nf_bridge_get_physoutdev(skb);
+	if (physoutdev && out != physoutdev)
+		nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name);
+#endif
+}
+
 static void nf_log_arp_packet(struct net *net, u_int8_t pf,
 			      unsigned int hooknum, const struct sk_buff *skb,
 			      const struct net_device *in,
@@ -121,6 +156,138 @@ static struct nf_logger nf_arp_logger __read_mostly = {
 	.me		= THIS_MODULE,
 };
 
+static void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m,
+				   struct sock *sk)
+{
+	if (!sk || !sk_fullsock(sk) || !net_eq(net, sock_net(sk)))
+		return;
+
+	read_lock_bh(&sk->sk_callback_lock);
+	if (sk->sk_socket && sk->sk_socket->file) {
+		const struct cred *cred = sk->sk_socket->file->f_cred;
+
+		nf_log_buf_add(m, "UID=%u GID=%u ",
+			       from_kuid_munged(&init_user_ns, cred->fsuid),
+			       from_kgid_munged(&init_user_ns, cred->fsgid));
+	}
+	read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static noinline_for_stack int
+nf_log_dump_tcp_header(struct nf_log_buf *m,
+		       const struct sk_buff *skb,
+		       u8 proto, int fragment,
+		       unsigned int offset,
+		       unsigned int logflags)
+{
+	struct tcphdr _tcph;
+	const struct tcphdr *th;
+
+	/* Max length: 10 "PROTO=TCP " */
+	nf_log_buf_add(m, "PROTO=TCP ");
+
+	if (fragment)
+		return 0;
+
+	/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+	th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
+	if (!th) {
+		nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset);
+		return 1;
+	}
+
+	/* Max length: 20 "SPT=65535 DPT=65535 " */
+	nf_log_buf_add(m, "SPT=%u DPT=%u ",
+		       ntohs(th->source), ntohs(th->dest));
+	/* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
+	if (logflags & NF_LOG_TCPSEQ) {
+		nf_log_buf_add(m, "SEQ=%u ACK=%u ",
+			       ntohl(th->seq), ntohl(th->ack_seq));
+	}
+
+	/* Max length: 13 "WINDOW=65535 " */
+	nf_log_buf_add(m, "WINDOW=%u ", ntohs(th->window));
+	/* Max length: 9 "RES=0x3C " */
+	nf_log_buf_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) &
+					    TCP_RESERVED_BITS) >> 22));
+	/* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
+	if (th->cwr)
+		nf_log_buf_add(m, "CWR ");
+	if (th->ece)
+		nf_log_buf_add(m, "ECE ");
+	if (th->urg)
+		nf_log_buf_add(m, "URG ");
+	if (th->ack)
+		nf_log_buf_add(m, "ACK ");
+	if (th->psh)
+		nf_log_buf_add(m, "PSH ");
+	if (th->rst)
+		nf_log_buf_add(m, "RST ");
+	if (th->syn)
+		nf_log_buf_add(m, "SYN ");
+	if (th->fin)
+		nf_log_buf_add(m, "FIN ");
+	/* Max length: 11 "URGP=65535 " */
+	nf_log_buf_add(m, "URGP=%u ", ntohs(th->urg_ptr));
+
+	if ((logflags & NF_LOG_TCPOPT) && th->doff * 4 > sizeof(struct tcphdr)) {
+		unsigned int optsize = th->doff * 4 - sizeof(struct tcphdr);
+		u8 _opt[60 - sizeof(struct tcphdr)];
+		unsigned int i;
+		const u8 *op;
+
+		op = skb_header_pointer(skb, offset + sizeof(struct tcphdr),
+					optsize, _opt);
+		if (!op) {
+			nf_log_buf_add(m, "OPT (TRUNCATED)");
+			return 1;
+		}
+
+		/* Max length: 127 "OPT (" 15*4*2chars ") " */
+		nf_log_buf_add(m, "OPT (");
+		for (i = 0; i < optsize; i++)
+			nf_log_buf_add(m, "%02X", op[i]);
+
+		nf_log_buf_add(m, ") ");
+	}
+
+	return 0;
+}
+
+static noinline_for_stack int
+nf_log_dump_udp_header(struct nf_log_buf *m,
+		       const struct sk_buff *skb,
+		       u8 proto, int fragment,
+		       unsigned int offset)
+{
+	struct udphdr _udph;
+	const struct udphdr *uh;
+
+	if (proto == IPPROTO_UDP)
+		/* Max length: 10 "PROTO=UDP "     */
+		nf_log_buf_add(m, "PROTO=UDP ");
+	else	/* Max length: 14 "PROTO=UDPLITE " */
+		nf_log_buf_add(m, "PROTO=UDPLITE ");
+
+	if (fragment)
+		goto out;
+
+	/* Max length: 25 "INCOMPLETE [65535 bytes] " */
+	uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph);
+	if (!uh) {
+		nf_log_buf_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset);
+
+		return 1;
+	}
+
+	/* Max length: 20 "SPT=65535 DPT=65535 " */
+	nf_log_buf_add(m, "SPT=%u DPT=%u LEN=%u ",
+		       ntohs(uh->source), ntohs(uh->dest), ntohs(uh->len));
+
+out:
+	return 0;
+}
+
 /* One level of recursion won't kill us */
 static noinline_for_stack void
 dump_ipv4_packet(struct net *net, struct nf_log_buf *m,
@@ -776,8 +943,18 @@ static void nf_log_netdev_packet(struct net *net, u_int8_t pf,
 				 const struct nf_loginfo *loginfo,
 				 const char *prefix)
 {
-	nf_log_l2packet(net, pf, skb->protocol, hooknum, skb, in, out,
-			loginfo, prefix);
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		nf_log_ip_packet(net, pf, hooknum, skb, in, out, loginfo, prefix);
+		break;
+	case htons(ETH_P_IPV6):
+		nf_log_ip6_packet(net, pf, hooknum, skb, in, out, loginfo, prefix);
+		break;
+	case htons(ETH_P_ARP):
+	case htons(ETH_P_RARP):
+		nf_log_arp_packet(net, pf, hooknum, skb, in, out, loginfo, prefix);
+		break;
+	}
 }
 
 static struct nf_logger nf_netdev_logger __read_mostly = {
-- 
cgit v1.2.3


From a38b5b56d6f4678a04c3c1943000953593391bf6 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 25 Mar 2021 18:25:11 +0100
Subject: netfilter: nf_log: add module softdeps

xt_LOG has no direct dependency on the syslog-based logger, it relies
on the nf_log core to probe the requested backend.

Now that all syslog-based loggers reside in the same module, we can
just add a soft dependency on nf_log_syslog and let modprobe take
care of it.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_LOG.c   | 1 +
 net/netfilter/xt_NFLOG.c | 1 +
 net/netfilter/xt_TRACE.c | 1 +
 3 files changed, 3 insertions(+)

diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c
index a1e79b517c01..2ff75f7637b0 100644
--- a/net/netfilter/xt_LOG.c
+++ b/net/netfilter/xt_LOG.c
@@ -108,3 +108,4 @@ MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>");
 MODULE_DESCRIPTION("Xtables: IPv4/IPv6 packet logging");
 MODULE_ALIAS("ipt_LOG");
 MODULE_ALIAS("ip6t_LOG");
+MODULE_SOFTDEP("pre: nf_log_syslog");
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index 6e83ce3000db..fb5793208059 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -79,3 +79,4 @@ static void __exit nflog_tg_exit(void)
 
 module_init(nflog_tg_init);
 module_exit(nflog_tg_exit);
+MODULE_SOFTDEP("pre: nfnetlink_log");
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
index 349ab5609b1b..5582dce98cae 100644
--- a/net/netfilter/xt_TRACE.c
+++ b/net/netfilter/xt_TRACE.c
@@ -52,3 +52,4 @@ static void __exit trace_tg_exit(void)
 
 module_init(trace_tg_init);
 module_exit(trace_tg_exit);
+MODULE_SOFTDEP("pre: nf_log_syslog");
-- 
cgit v1.2.3


From cefa31a9d46112c0706c218ea549bccf298a0068 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 25 Mar 2021 18:25:12 +0100
Subject: netfilter: nft_log: perform module load from nf_tables

modprobe calls from the nf_logger_find_get() API causes deadlock in very
special cases because they occur with the nf_tables transaction mutex held.

In the specific case of nf_log, deadlock is via:

 A nf_tables -> transaction mutex -> nft_log -> modprobe -> nf_log_syslog \
	    -> pernet_ops rwsem -> wait for C
 B netlink event -> rtnl_mutex -> nf_tables transaction mutex -> wait for A
 C close() -> ip6mr_sk_done -> rtnl_mutex -> wait for B

Earlier patch added NFLOG/xt_LOG module softdeps to avoid the need to load
the backend module during a transaction.

For nft_log we would have to add a softdep for both nfnetlink_log or
nf_log_syslog, since we do not know in advance which of the two backends
are going to be configured.

This defers the modprobe op until after the transaction mutex is released.

Tested-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  5 +++++
 net/netfilter/nf_log.c            |  3 ---
 net/netfilter/nf_tables_api.c     |  5 +++--
 net/netfilter/nft_log.c           | 20 +++++++++++++++++++-
 4 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 0cef5ad9768a..8fefa112ae89 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1562,4 +1562,9 @@ void nf_tables_trans_destroy_flush_work(void);
 int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result);
 __be64 nf_jiffies64_to_msecs(u64 input);
 
+#ifdef CONFIG_MODULES
+__printf(2, 3) int nft_request_module(struct net *net, const char *fmt, ...);
+#else
+static inline int nft_request_module(struct net *net, const char *fmt, ...) { return -ENOENT; }
+#endif
 #endif /* _NET_NF_TABLES_H */
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index eaa8181f5ef7..edee7fa944c1 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -170,9 +170,6 @@ int nf_logger_find_get(int pf, enum nf_log_type type)
 		return 0;
 	}
 
-	if (rcu_access_pointer(loggers[pf][type]) == NULL)
-		request_module("nf-logger-%u-%u", pf, type);
-
 	rcu_read_lock();
 	logger = rcu_dereference(loggers[pf][type]);
 	if (logger == NULL)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index fc2526b8bd55..c09b67f2f64c 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -586,8 +586,8 @@ struct nft_module_request {
 };
 
 #ifdef CONFIG_MODULES
-static __printf(2, 3) int nft_request_module(struct net *net, const char *fmt,
-					     ...)
+__printf(2, 3) int nft_request_module(struct net *net, const char *fmt,
+				      ...)
 {
 	char module_name[MODULE_NAME_LEN];
 	struct nft_module_request *req;
@@ -620,6 +620,7 @@ static __printf(2, 3) int nft_request_module(struct net *net, const char *fmt,
 
 	return -EAGAIN;
 }
+EXPORT_SYMBOL_GPL(nft_request_module);
 #endif
 
 static void lockdep_nfnl_nft_mutex_not_held(void)
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index a06a46b039c5..54f6c2035e84 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -128,6 +128,20 @@ static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = {
 	[NFTA_LOG_FLAGS]	= { .type = NLA_U32 },
 };
 
+static int nft_log_modprobe(struct net *net, enum nf_log_type t)
+{
+	switch (t) {
+	case NF_LOG_TYPE_LOG:
+		return nft_request_module(net, "%s", "nf_log_syslog");
+	case NF_LOG_TYPE_ULOG:
+		return nft_request_module(net, "%s", "nfnetlink_log");
+	case NF_LOG_TYPE_MAX:
+		break;
+	}
+
+	return -ENOENT;
+}
+
 static int nft_log_init(const struct nft_ctx *ctx,
 			const struct nft_expr *expr,
 			const struct nlattr * const tb[])
@@ -197,8 +211,12 @@ static int nft_log_init(const struct nft_ctx *ctx,
 		return 0;
 
 	err = nf_logger_find_get(ctx->family, li->type);
-	if (err < 0)
+	if (err < 0) {
+		if (nft_log_modprobe(ctx->net, li->type) == -EAGAIN)
+			err = -EAGAIN;
+
 		goto err1;
+	}
 
 	return 0;
 
-- 
cgit v1.2.3


From c520292f29b8047285bcfbc2322fa2a9bf02521a Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Fri, 26 Mar 2021 13:38:59 -0400
Subject: audit: log nftables configuration change events once per table

Reduce logging of nftables events to a level similar to iptables.
Restore the table field to list the table, adding the generation.

Indicate the op as the most significant operation in the event.

A couple of sample events:

type=PROCTITLE msg=audit(2021-03-18 09:30:49.801:143) : proctitle=/usr/bin/python3 -s /usr/sbin/firewalld --nofork --nopid
type=SYSCALL msg=audit(2021-03-18 09:30:49.801:143) : arch=x86_64 syscall=sendmsg success=yes exit=172 a0=0x6 a1=0x7ffdcfcbe650 a2=0x0 a3=0x7ffdcfcbd52c items=0 ppid=1 pid=367 auid=unset uid=root gid=root euid=root suid=root fsuid=root egid=roo
t sgid=root fsgid=root tty=(none) ses=unset comm=firewalld exe=/usr/bin/python3.9 subj=system_u:system_r:firewalld_t:s0 key=(null)
type=NETFILTER_CFG msg=audit(2021-03-18 09:30:49.801:143) : table=firewalld:2 family=ipv6 entries=1 op=nft_register_table pid=367 subj=system_u:system_r:firewalld_t:s0 comm=firewalld
type=NETFILTER_CFG msg=audit(2021-03-18 09:30:49.801:143) : table=firewalld:2 family=ipv4 entries=1 op=nft_register_table pid=367 subj=system_u:system_r:firewalld_t:s0 comm=firewalld
type=NETFILTER_CFG msg=audit(2021-03-18 09:30:49.801:143) : table=firewalld:2 family=inet entries=1 op=nft_register_table pid=367 subj=system_u:system_r:firewalld_t:s0 comm=firewalld

type=PROCTITLE msg=audit(2021-03-18 09:30:49.839:144) : proctitle=/usr/bin/python3 -s /usr/sbin/firewalld --nofork --nopid
type=SYSCALL msg=audit(2021-03-18 09:30:49.839:144) : arch=x86_64 syscall=sendmsg success=yes exit=22792 a0=0x6 a1=0x7ffdcfcbe650 a2=0x0 a3=0x7ffdcfcbd52c items=0 ppid=1 pid=367 auid=unset uid=root gid=root euid=root suid=root fsuid=root egid=r
oot sgid=root fsgid=root tty=(none) ses=unset comm=firewalld exe=/usr/bin/python3.9 subj=system_u:system_r:firewalld_t:s0 key=(null)
type=NETFILTER_CFG msg=audit(2021-03-18 09:30:49.839:144) : table=firewalld:3 family=ipv6 entries=30 op=nft_register_chain pid=367 subj=system_u:system_r:firewalld_t:s0 comm=firewalld
type=NETFILTER_CFG msg=audit(2021-03-18 09:30:49.839:144) : table=firewalld:3 family=ipv4 entries=30 op=nft_register_chain pid=367 subj=system_u:system_r:firewalld_t:s0 comm=firewalld
type=NETFILTER_CFG msg=audit(2021-03-18 09:30:49.839:144) : table=firewalld:3 family=inet entries=165 op=nft_register_chain pid=367 subj=system_u:system_r:firewalld_t:s0 comm=firewalld

The issue was originally documented in
https://github.com/linux-audit/audit-kernel/issues/124

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 186 +++++++++++++++++++++++-------------------
 1 file changed, 103 insertions(+), 83 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index c09b67f2f64c..b59fba717a39 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -66,6 +66,41 @@ static const struct rhashtable_params nft_objname_ht_params = {
 	.automatic_shrinking	= true,
 };
 
+struct nft_audit_data {
+	struct nft_table *table;
+	int entries;
+	int op;
+	struct list_head list;
+};
+
+static const u8 nft2audit_op[NFT_MSG_MAX] = { // enum nf_tables_msg_types
+	[NFT_MSG_NEWTABLE]	= AUDIT_NFT_OP_TABLE_REGISTER,
+	[NFT_MSG_GETTABLE]	= AUDIT_NFT_OP_INVALID,
+	[NFT_MSG_DELTABLE]	= AUDIT_NFT_OP_TABLE_UNREGISTER,
+	[NFT_MSG_NEWCHAIN]	= AUDIT_NFT_OP_CHAIN_REGISTER,
+	[NFT_MSG_GETCHAIN]	= AUDIT_NFT_OP_INVALID,
+	[NFT_MSG_DELCHAIN]	= AUDIT_NFT_OP_CHAIN_UNREGISTER,
+	[NFT_MSG_NEWRULE]	= AUDIT_NFT_OP_RULE_REGISTER,
+	[NFT_MSG_GETRULE]	= AUDIT_NFT_OP_INVALID,
+	[NFT_MSG_DELRULE]	= AUDIT_NFT_OP_RULE_UNREGISTER,
+	[NFT_MSG_NEWSET]	= AUDIT_NFT_OP_SET_REGISTER,
+	[NFT_MSG_GETSET]	= AUDIT_NFT_OP_INVALID,
+	[NFT_MSG_DELSET]	= AUDIT_NFT_OP_SET_UNREGISTER,
+	[NFT_MSG_NEWSETELEM]	= AUDIT_NFT_OP_SETELEM_REGISTER,
+	[NFT_MSG_GETSETELEM]	= AUDIT_NFT_OP_INVALID,
+	[NFT_MSG_DELSETELEM]	= AUDIT_NFT_OP_SETELEM_UNREGISTER,
+	[NFT_MSG_NEWGEN]	= AUDIT_NFT_OP_GEN_REGISTER,
+	[NFT_MSG_GETGEN]	= AUDIT_NFT_OP_INVALID,
+	[NFT_MSG_TRACE]		= AUDIT_NFT_OP_INVALID,
+	[NFT_MSG_NEWOBJ]	= AUDIT_NFT_OP_OBJ_REGISTER,
+	[NFT_MSG_GETOBJ]	= AUDIT_NFT_OP_INVALID,
+	[NFT_MSG_DELOBJ]	= AUDIT_NFT_OP_OBJ_UNREGISTER,
+	[NFT_MSG_GETOBJ_RESET]	= AUDIT_NFT_OP_OBJ_RESET,
+	[NFT_MSG_NEWFLOWTABLE]	= AUDIT_NFT_OP_FLOWTABLE_REGISTER,
+	[NFT_MSG_GETFLOWTABLE]	= AUDIT_NFT_OP_INVALID,
+	[NFT_MSG_DELFLOWTABLE]	= AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,
+};
+
 static void nft_validate_state_update(struct net *net, u8 new_validate_state)
 {
 	switch (net->nft.validate_state) {
@@ -718,17 +753,6 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event)
 {
 	struct sk_buff *skb;
 	int err;
-	char *buf = kasprintf(GFP_KERNEL, "%s:%llu;?:0",
-			      ctx->table->name, ctx->table->handle);
-
-	audit_log_nfcfg(buf,
-			ctx->family,
-			ctx->table->use,
-			event == NFT_MSG_NEWTABLE ?
-				AUDIT_NFT_OP_TABLE_REGISTER :
-				AUDIT_NFT_OP_TABLE_UNREGISTER,
-			GFP_KERNEL);
-	kfree(buf);
 
 	if (!ctx->report &&
 	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
@@ -1502,18 +1526,6 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
 {
 	struct sk_buff *skb;
 	int err;
-	char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu",
-			      ctx->table->name, ctx->table->handle,
-			      ctx->chain->name, ctx->chain->handle);
-
-	audit_log_nfcfg(buf,
-			ctx->family,
-			ctx->chain->use,
-			event == NFT_MSG_NEWCHAIN ?
-				AUDIT_NFT_OP_CHAIN_REGISTER :
-				AUDIT_NFT_OP_CHAIN_UNREGISTER,
-			GFP_KERNEL);
-	kfree(buf);
 
 	if (!ctx->report &&
 	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
@@ -2866,18 +2878,6 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx,
 {
 	struct sk_buff *skb;
 	int err;
-	char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu",
-			      ctx->table->name, ctx->table->handle,
-			      ctx->chain->name, ctx->chain->handle);
-
-	audit_log_nfcfg(buf,
-			ctx->family,
-			rule->handle,
-			event == NFT_MSG_NEWRULE ?
-				AUDIT_NFT_OP_RULE_REGISTER :
-				AUDIT_NFT_OP_RULE_UNREGISTER,
-			GFP_KERNEL);
-	kfree(buf);
 
 	if (!ctx->report &&
 	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
@@ -3912,18 +3912,6 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx,
 	struct sk_buff *skb;
 	u32 portid = ctx->portid;
 	int err;
-	char *buf = kasprintf(gfp_flags, "%s:%llu;%s:%llu",
-			      ctx->table->name, ctx->table->handle,
-			      set->name, set->handle);
-
-	audit_log_nfcfg(buf,
-			ctx->family,
-			set->field_count,
-			event == NFT_MSG_NEWSET ?
-				AUDIT_NFT_OP_SET_REGISTER :
-				AUDIT_NFT_OP_SET_UNREGISTER,
-			gfp_flags);
-	kfree(buf);
 
 	if (!ctx->report &&
 	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
@@ -5108,18 +5096,6 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
 	u32 portid = ctx->portid;
 	struct sk_buff *skb;
 	int err;
-	char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu",
-			      ctx->table->name, ctx->table->handle,
-			      set->name, set->handle);
-
-	audit_log_nfcfg(buf,
-			ctx->family,
-			set->handle,
-			event == NFT_MSG_NEWSETELEM ?
-				AUDIT_NFT_OP_SETELEM_REGISTER :
-				AUDIT_NFT_OP_SETELEM_UNREGISTER,
-			GFP_KERNEL);
-	kfree(buf);
 
 	if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
 		return;
@@ -6321,12 +6297,11 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
 			    filter->type != NFT_OBJECT_UNSPEC &&
 			    obj->ops->type->type != filter->type)
 				goto cont;
-
 			if (reset) {
 				char *buf = kasprintf(GFP_ATOMIC,
-						      "%s:%llu;?:0",
+						      "%s:%u",
 						      table->name,
-						      table->handle);
+						      net->nft.base_seq);
 
 				audit_log_nfcfg(buf,
 						family,
@@ -6447,8 +6422,8 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
 		reset = true;
 
 	if (reset) {
-		char *buf = kasprintf(GFP_ATOMIC, "%s:%llu;?:0",
-				      table->name, table->handle);
+		char *buf = kasprintf(GFP_ATOMIC, "%s:%u",
+				      table->name, net->nft.base_seq);
 
 		audit_log_nfcfg(buf,
 				family,
@@ -6536,15 +6511,15 @@ void nft_obj_notify(struct net *net, const struct nft_table *table,
 {
 	struct sk_buff *skb;
 	int err;
-	char *buf = kasprintf(gfp, "%s:%llu;?:0",
-			      table->name, table->handle);
+	char *buf = kasprintf(gfp, "%s:%u",
+			      table->name, net->nft.base_seq);
 
 	audit_log_nfcfg(buf,
 			family,
 			obj->handle,
 			event == NFT_MSG_NEWOBJ ?
-				AUDIT_NFT_OP_OBJ_REGISTER :
-				AUDIT_NFT_OP_OBJ_UNREGISTER,
+				 AUDIT_NFT_OP_OBJ_REGISTER :
+				 AUDIT_NFT_OP_OBJ_UNREGISTER,
 			gfp);
 	kfree(buf);
 
@@ -7362,18 +7337,6 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
 {
 	struct sk_buff *skb;
 	int err;
-	char *buf = kasprintf(GFP_KERNEL, "%s:%llu;%s:%llu",
-			      flowtable->table->name, flowtable->table->handle,
-			      flowtable->name, flowtable->handle);
-
-	audit_log_nfcfg(buf,
-			ctx->family,
-			flowtable->hooknum,
-			event == NFT_MSG_NEWFLOWTABLE ?
-				AUDIT_NFT_OP_FLOWTABLE_REGISTER :
-				AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,
-			GFP_KERNEL);
-	kfree(buf);
 
 	if (!ctx->report &&
 	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
@@ -7494,9 +7457,6 @@ static void nf_tables_gen_notify(struct net *net, struct sk_buff *skb,
 	struct sk_buff *skb2;
 	int err;
 
-	audit_log_nfcfg("?:0;?:0", 0, net->nft.base_seq,
-			AUDIT_NFT_OP_GEN_REGISTER, GFP_KERNEL);
-
 	if (!nlmsg_report(nlh) &&
 	    !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
 		return;
@@ -8035,12 +7995,64 @@ new_batch:
 	WARN_ON_ONCE(!list_empty(&net->nft.notify_list));
 }
 
+static int nf_tables_commit_audit_alloc(struct list_head *adl,
+					struct nft_table *table)
+{
+	struct nft_audit_data *adp;
+
+	list_for_each_entry(adp, adl, list) {
+		if (adp->table == table)
+			return 0;
+	}
+	adp = kzalloc(sizeof(*adp), GFP_KERNEL);
+	if (!adp)
+		return -ENOMEM;
+	adp->table = table;
+	list_add(&adp->list, adl);
+	return 0;
+}
+
+static void nf_tables_commit_audit_collect(struct list_head *adl,
+					   struct nft_table *table, u32 op)
+{
+	struct nft_audit_data *adp;
+
+	list_for_each_entry(adp, adl, list) {
+		if (adp->table == table)
+			goto found;
+	}
+	WARN_ONCE("table=%s not expected in commit list", table->name);
+	return;
+found:
+	adp->entries++;
+	if (!adp->op || adp->op > op)
+		adp->op = op;
+}
+
+#define AUNFTABLENAMELEN (NFT_TABLE_MAXNAMELEN + 22)
+
+static void nf_tables_commit_audit_log(struct list_head *adl, u32 generation)
+{
+	struct nft_audit_data *adp, *adn;
+	char aubuf[AUNFTABLENAMELEN];
+
+	list_for_each_entry_safe(adp, adn, adl, list) {
+		snprintf(aubuf, AUNFTABLENAMELEN, "%s:%u", adp->table->name,
+			 generation);
+		audit_log_nfcfg(aubuf, adp->table->family, adp->entries,
+				nft2audit_op[adp->op], GFP_KERNEL);
+		list_del(&adp->list);
+		kfree(adp);
+	}
+}
+
 static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 {
 	struct nft_trans *trans, *next;
 	struct nft_trans_elem *te;
 	struct nft_chain *chain;
 	struct nft_table *table;
+	LIST_HEAD(adl);
 	int err;
 
 	if (list_empty(&net->nft.commit_list)) {
@@ -8060,6 +8072,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
 		int ret;
 
+		ret = nf_tables_commit_audit_alloc(&adl, trans->ctx.table);
+		if (ret) {
+			nf_tables_commit_chain_prepare_cancel(net);
+			return ret;
+		}
 		if (trans->msg_type == NFT_MSG_NEWRULE ||
 		    trans->msg_type == NFT_MSG_DELRULE) {
 			chain = trans->ctx.chain;
@@ -8088,6 +8105,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 	net->nft.gencursor = nft_gencursor_next(net);
 
 	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+		nf_tables_commit_audit_collect(&adl, trans->ctx.table,
+					       trans->msg_type);
 		switch (trans->msg_type) {
 		case NFT_MSG_NEWTABLE:
 			if (nft_trans_table_update(trans)) {
@@ -8240,6 +8259,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 
 	nft_commit_notify(net, NETLINK_CB(skb).portid);
 	nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
+	nf_tables_commit_audit_log(&adl, net->nft.base_seq);
 	nf_tables_commit_release(net);
 
 	return 0;
-- 
cgit v1.2.3


From 5c701e71961af0ec8227ea615f1646dbe98aea1a Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Sat, 27 Mar 2021 10:54:47 +0800
Subject: netfilter: ipset: Remove duplicate declaration

struct ip_set is declared twice. One is declared at 79th line,
so remove the duplicate.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Acked-by: Jozsef Kadlecsik <kadlec@netfilter.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/ipset/ip_set.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 46d9a0c26c67..10279c4830ac 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -124,8 +124,6 @@ struct ip_set_ext {
 	bool target;
 };
 
-struct ip_set;
-
 #define ext_timeout(e, s)	\
 ((unsigned long *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_TIMEOUT]))
 #define ext_counter(e, s)	\
-- 
cgit v1.2.3


From 8b9229d15877ec77775633f058d14145f6eb98fa Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 28 Mar 2021 23:08:55 +0200
Subject: netfilter: flowtable: dst_check() from garbage collector path

Move dst_check() to the garbage collector path. Stale routes trigger the
flow entry teardown state which makes affected flows go back to the
classic forwarding path to re-evaluate flow offloading.

IPv6 requires the dst cookie to work, store it in the flow_tuple,
otherwise dst_check() always fails.

Fixes: e5075c0badaa ("netfilter: flowtable: call dst_check() to fall back to classic forwarding")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_flow_table.h |  5 ++++-
 net/netfilter/nf_flow_table_core.c    | 37 ++++++++++++++++++++++++++++++++++-
 net/netfilter/nf_flow_table_ip.c      | 22 ++++-----------------
 3 files changed, 44 insertions(+), 20 deletions(-)

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 4d991c1e93ef..583b327d8fc0 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -129,7 +129,10 @@ struct flow_offload_tuple {
 					in_vlan_ingress:2;
 	u16				mtu;
 	union {
-		struct dst_entry	*dst_cache;
+		struct {
+			struct dst_entry *dst_cache;
+			u32		dst_cookie;
+		};
 		struct {
 			u32		ifidx;
 			u32		hw_ifidx;
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 1bce1d2805c4..76573bae6664 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -74,6 +74,18 @@ err_ct_refcnt:
 }
 EXPORT_SYMBOL_GPL(flow_offload_alloc);
 
+static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
+{
+	const struct rt6_info *rt;
+
+	if (flow_tuple->l3proto == NFPROTO_IPV6) {
+		rt = (const struct rt6_info *)flow_tuple->dst_cache;
+		return rt6_get_cookie(rt);
+	}
+
+	return 0;
+}
+
 static int flow_offload_fill_route(struct flow_offload *flow,
 				   const struct nf_flow_route *route,
 				   enum flow_offload_tuple_dir dir)
@@ -116,6 +128,7 @@ static int flow_offload_fill_route(struct flow_offload *flow,
 			return -1;
 
 		flow_tuple->dst_cache = dst;
+		flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
 		break;
 	}
 	flow_tuple->xmit_type = route->tuple[dir].xmit_type;
@@ -390,11 +403,33 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
 	return err;
 }
 
+static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple)
+{
+	struct dst_entry *dst;
+
+	if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
+	    tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
+		dst = tuple->dst_cache;
+		if (!dst_check(dst, tuple->dst_cookie))
+			return true;
+	}
+
+	return false;
+}
+
+static bool nf_flow_has_stale_dst(struct flow_offload *flow)
+{
+	return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) ||
+	       flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple);
+}
+
 static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
 {
 	struct nf_flowtable *flow_table = data;
 
-	if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct))
+	if (nf_flow_has_expired(flow) ||
+	    nf_ct_is_dying(flow->ct) ||
+	    nf_flow_has_stale_dst(flow))
 		set_bit(NF_FLOW_TEARDOWN, &flow->flags);
 
 	if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 12cb0cc6958c..889cf88d3dba 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -364,15 +364,6 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 	if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
 		return NF_ACCEPT;
 
-	if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
-	    tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
-		rt = (struct rtable *)tuplehash->tuple.dst_cache;
-		if (!dst_check(&rt->dst, 0)) {
-			flow_offload_teardown(flow);
-			return NF_ACCEPT;
-		}
-	}
-
 	if (skb_try_make_writable(skb, thoff + hdrsize))
 		return NF_DROP;
 
@@ -391,6 +382,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 		nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
 
 	if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
+		rt = (struct rtable *)tuplehash->tuple.dst_cache;
 		memset(skb->cb, 0, sizeof(struct inet_skb_parm));
 		IPCB(skb)->iif = skb->dev->ifindex;
 		IPCB(skb)->flags = IPSKB_FORWARDED;
@@ -399,6 +391,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 
 	switch (tuplehash->tuple.xmit_type) {
 	case FLOW_OFFLOAD_XMIT_NEIGH:
+		rt = (struct rtable *)tuplehash->tuple.dst_cache;
 		outdev = rt->dst.dev;
 		skb->dev = outdev;
 		nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
@@ -607,15 +600,6 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 	if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
 		return NF_ACCEPT;
 
-	if (tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
-	    tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
-		rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
-		if (!dst_check(&rt->dst, 0)) {
-			flow_offload_teardown(flow);
-			return NF_ACCEPT;
-		}
-	}
-
 	if (skb_try_make_writable(skb, thoff + hdrsize))
 		return NF_DROP;
 
@@ -633,6 +617,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 		nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
 
 	if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
+		rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
 		memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
 		IP6CB(skb)->iif = skb->dev->ifindex;
 		IP6CB(skb)->flags = IP6SKB_FORWARDED;
@@ -641,6 +626,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 
 	switch (tuplehash->tuple.xmit_type) {
 	case FLOW_OFFLOAD_XMIT_NEIGH:
+		rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
 		outdev = rt->dst.dev;
 		skb->dev = outdev;
 		nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
-- 
cgit v1.2.3


From 7726c9ce71b047924ee55fe87b5cc10a8714d120 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 29 Mar 2021 21:55:41 +0800
Subject: netfilter: nftables: remove unnecessary spin_lock_init()

The spinlock nf_tables_destroy_list_lock is initialized statically.
It is unnecessary to initialize by spin_lock_init().

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index b59fba717a39..b30eee645513 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -9214,7 +9214,6 @@ static int __init nf_tables_module_init(void)
 {
 	int err;
 
-	spin_lock_init(&nf_tables_destroy_list_lock);
 	err = register_pernet_subsys(&nf_tables_net_ops);
 	if (err < 0)
 		return err;
-- 
cgit v1.2.3


From 802b805162a1b7d8391c40ac8a878e9e63287aff Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 31 Mar 2021 00:18:02 +0200
Subject: netfilter: nftables: add helper function to set the base sequence
 number

This patch adds a helper function to calculate the base sequence number
field that is stored in the nfnetlink header. Use the helper function
whenever possible.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index b30eee645513..e894d70b5d5f 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -688,6 +688,11 @@ nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla,
 	return ERR_PTR(-ENOENT);
 }
 
+static __be16 nft_base_seq(const struct net *net)
+{
+	return htons(net->nft.base_seq & 0xffff);
+}
+
 static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
 	[NFTA_TABLE_NAME]	= { .type = NLA_STRING,
 				    .len = NFT_TABLE_MAXNAMELEN - 1 },
@@ -712,7 +717,7 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
 	nfmsg = nlmsg_data(nlh);
 	nfmsg->nfgen_family	= family;
 	nfmsg->version		= NFNETLINK_V0;
-	nfmsg->res_id		= htons(net->nft.base_seq & 0xffff);
+	nfmsg->res_id		= nft_base_seq(net);
 
 	if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
 	    nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) ||
@@ -1473,7 +1478,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
 	nfmsg = nlmsg_data(nlh);
 	nfmsg->nfgen_family	= family;
 	nfmsg->version		= NFNETLINK_V0;
-	nfmsg->res_id		= htons(net->nft.base_seq & 0xffff);
+	nfmsg->res_id		= nft_base_seq(net);
 
 	if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name))
 		goto nla_put_failure;
@@ -2832,7 +2837,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
 	nfmsg = nlmsg_data(nlh);
 	nfmsg->nfgen_family	= family;
 	nfmsg->version		= NFNETLINK_V0;
-	nfmsg->res_id		= htons(net->nft.base_seq & 0xffff);
+	nfmsg->res_id		= nft_base_seq(net);
 
 	if (nla_put_string(skb, NFTA_RULE_TABLE, table->name))
 		goto nla_put_failure;
@@ -3820,7 +3825,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
 	nfmsg = nlmsg_data(nlh);
 	nfmsg->nfgen_family	= ctx->family;
 	nfmsg->version		= NFNETLINK_V0;
-	nfmsg->res_id		= htons(ctx->net->nft.base_seq & 0xffff);
+	nfmsg->res_id		= nft_base_seq(ctx->net);
 
 	if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
 		goto nla_put_failure;
@@ -4831,7 +4836,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 	nfmsg = nlmsg_data(nlh);
 	nfmsg->nfgen_family = table->family;
 	nfmsg->version      = NFNETLINK_V0;
-	nfmsg->res_id	    = htons(net->nft.base_seq & 0xffff);
+	nfmsg->res_id	    = nft_base_seq(net);
 
 	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, table->name))
 		goto nla_put_failure;
@@ -4903,7 +4908,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb,
 	nfmsg = nlmsg_data(nlh);
 	nfmsg->nfgen_family	= ctx->family;
 	nfmsg->version		= NFNETLINK_V0;
-	nfmsg->res_id		= htons(ctx->net->nft.base_seq & 0xffff);
+	nfmsg->res_id		= nft_base_seq(ctx->net);
 
 	if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
 		goto nla_put_failure;
@@ -6233,7 +6238,7 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
 	nfmsg = nlmsg_data(nlh);
 	nfmsg->nfgen_family	= family;
 	nfmsg->version		= NFNETLINK_V0;
-	nfmsg->res_id		= htons(net->nft.base_seq & 0xffff);
+	nfmsg->res_id		= nft_base_seq(net);
 
 	if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) ||
 	    nla_put_string(skb, NFTA_OBJ_NAME, obj->key.name) ||
@@ -7146,7 +7151,7 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
 	nfmsg = nlmsg_data(nlh);
 	nfmsg->nfgen_family	= family;
 	nfmsg->version		= NFNETLINK_V0;
-	nfmsg->res_id		= htons(net->nft.base_seq & 0xffff);
+	nfmsg->res_id		= nft_base_seq(net);
 
 	if (nla_put_string(skb, NFTA_FLOWTABLE_TABLE, flowtable->table->name) ||
 	    nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) ||
@@ -7391,7 +7396,7 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
 	nfmsg = nlmsg_data(nlh);
 	nfmsg->nfgen_family	= AF_UNSPEC;
 	nfmsg->version		= NFNETLINK_V0;
-	nfmsg->res_id		= htons(net->nft.base_seq & 0xffff);
+	nfmsg->res_id		= nft_base_seq(net);
 
 	if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)) ||
 	    nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) ||
-- 
cgit v1.2.3


From 19c28b1374fb1073a9ec873a6c10bf5f16b10b9d Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 30 Mar 2021 16:58:37 +0200
Subject: netfilter: add helper function to set up the nfnetlink header and use
 it

This patch adds a helper function to set up the netlink and nfnetlink headers.
Update existing codebase to use it.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nfnetlink.h  |  27 ++++++++++
 net/netfilter/ipset/ip_set_core.c    |  17 ++----
 net/netfilter/nf_conntrack_netlink.c |  77 ++++++++------------------
 net/netfilter/nf_tables_api.c        | 102 ++++++++++-------------------------
 net/netfilter/nf_tables_trace.c      |   9 +---
 net/netfilter/nfnetlink_acct.c       |  11 ++--
 net/netfilter/nfnetlink_cthelper.c   |  11 ++--
 net/netfilter/nfnetlink_cttimeout.c  |  22 +++-----
 net/netfilter/nfnetlink_log.c        |  11 ++--
 net/netfilter/nfnetlink_queue.c      |  12 ++---
 net/netfilter/nft_compat.c           |  11 ++--
 11 files changed, 102 insertions(+), 208 deletions(-)

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index f6267e2883f2..791d516e1e88 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -57,6 +57,33 @@ static inline u16 nfnl_msg_type(u8 subsys, u8 msg_type)
 	return subsys << 8 | msg_type;
 }
 
+static inline void nfnl_fill_hdr(struct nlmsghdr *nlh, u8 family, u8 version,
+				 __be16 res_id)
+{
+	struct nfgenmsg *nfmsg;
+
+	nfmsg = nlmsg_data(nlh);
+	nfmsg->nfgen_family = family;
+	nfmsg->version = version;
+	nfmsg->res_id = res_id;
+}
+
+static inline struct nlmsghdr *nfnl_msg_put(struct sk_buff *skb, u32 portid,
+					    u32 seq, int type, int flags,
+					    u8 family, u8 version,
+					    __be16 res_id)
+{
+	struct nlmsghdr *nlh;
+
+	nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), flags);
+	if (!nlh)
+		return NULL;
+
+	nfnl_fill_hdr(nlh, family, version, res_id);
+
+	return nlh;
+}
+
 void nfnl_lock(__u8 subsys_id);
 void nfnl_unlock(__u8 subsys_id);
 #ifdef CONFIG_PROVE_LOCKING
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 89009c82a6b2..359ff8ec236a 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -963,20 +963,9 @@ static struct nlmsghdr *
 start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags,
 	  enum ipset_cmd cmd)
 {
-	struct nlmsghdr *nlh;
-	struct nfgenmsg *nfmsg;
-
-	nlh = nlmsg_put(skb, portid, seq, nfnl_msg_type(NFNL_SUBSYS_IPSET, cmd),
-			sizeof(*nfmsg), flags);
-	if (!nlh)
-		return NULL;
-
-	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family = NFPROTO_IPV4;
-	nfmsg->version = NFNETLINK_V0;
-	nfmsg->res_id = 0;
-
-	return nlh;
+	return nfnl_msg_put(skb, portid, seq,
+			    nfnl_msg_type(NFNL_SUBSYS_IPSET, cmd), flags,
+			    NFPROTO_IPV4, NFNETLINK_V0, 0);
 }
 
 /* Create a set */
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 1d519b0e51a5..c67a6ec22a74 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -555,22 +555,17 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 {
 	const struct nf_conntrack_zone *zone;
 	struct nlmsghdr *nlh;
-	struct nfgenmsg *nfmsg;
 	struct nlattr *nest_parms;
 	unsigned int event;
 
 	if (portid)
 		flags |= NLM_F_MULTI;
 	event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_NEW);
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
-	if (nlh == NULL)
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, nf_ct_l3num(ct),
+			   NFNETLINK_V0, 0);
+	if (!nlh)
 		goto nlmsg_failure;
 
-	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family = nf_ct_l3num(ct);
-	nfmsg->version      = NFNETLINK_V0;
-	nfmsg->res_id	    = 0;
-
 	zone = nf_ct_zone(ct);
 
 	nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG);
@@ -713,7 +708,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 	const struct nf_conntrack_zone *zone;
 	struct net *net;
 	struct nlmsghdr *nlh;
-	struct nfgenmsg *nfmsg;
 	struct nlattr *nest_parms;
 	struct nf_conn *ct = item->ct;
 	struct sk_buff *skb;
@@ -743,15 +737,11 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 		goto errout;
 
 	type = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, type);
-	nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags);
-	if (nlh == NULL)
+	nlh = nfnl_msg_put(skb, item->portid, 0, type, flags, nf_ct_l3num(ct),
+			   NFNETLINK_V0, 0);
+	if (!nlh)
 		goto nlmsg_failure;
 
-	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family = nf_ct_l3num(ct);
-	nfmsg->version	= NFNETLINK_V0;
-	nfmsg->res_id	= 0;
-
 	zone = nf_ct_zone(ct);
 
 	nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG);
@@ -2490,20 +2480,15 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
 				__u16 cpu, const struct ip_conntrack_stat *st)
 {
 	struct nlmsghdr *nlh;
-	struct nfgenmsg *nfmsg;
 	unsigned int flags = portid ? NLM_F_MULTI : 0, event;
 
 	event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK,
 			      IPCTNL_MSG_CT_GET_STATS_CPU);
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
-	if (nlh == NULL)
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+			   NFNETLINK_V0, htons(cpu));
+	if (!nlh)
 		goto nlmsg_failure;
 
-	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family = AF_UNSPEC;
-	nfmsg->version      = NFNETLINK_V0;
-	nfmsg->res_id	    = htons(cpu);
-
 	if (nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) ||
 	    nla_put_be32(skb, CTA_STATS_INVALID, htonl(st->invalid)) ||
 	    nla_put_be32(skb, CTA_STATS_INSERT, htonl(st->insert)) ||
@@ -2575,20 +2560,15 @@ ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 			    struct net *net)
 {
 	struct nlmsghdr *nlh;
-	struct nfgenmsg *nfmsg;
 	unsigned int flags = portid ? NLM_F_MULTI : 0, event;
 	unsigned int nr_conntracks = atomic_read(&net->ct.count);
 
 	event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_GET_STATS);
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
-	if (nlh == NULL)
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+			   NFNETLINK_V0, 0);
+	if (!nlh)
 		goto nlmsg_failure;
 
-	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family = AF_UNSPEC;
-	nfmsg->version      = NFNETLINK_V0;
-	nfmsg->res_id	    = 0;
-
 	if (nla_put_be32(skb, CTA_STATS_GLOBAL_ENTRIES, htonl(nr_conntracks)))
 		goto nla_put_failure;
 
@@ -3085,19 +3065,14 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
 			int event, const struct nf_conntrack_expect *exp)
 {
 	struct nlmsghdr *nlh;
-	struct nfgenmsg *nfmsg;
 	unsigned int flags = portid ? NLM_F_MULTI : 0;
 
 	event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_EXP, event);
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
-	if (nlh == NULL)
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags,
+			   exp->tuple.src.l3num, NFNETLINK_V0, 0);
+	if (!nlh)
 		goto nlmsg_failure;
 
-	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family = exp->tuple.src.l3num;
-	nfmsg->version	    = NFNETLINK_V0;
-	nfmsg->res_id	    = 0;
-
 	if (ctnetlink_exp_dump_expect(skb, exp) < 0)
 		goto nla_put_failure;
 
@@ -3117,7 +3092,6 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
 	struct nf_conntrack_expect *exp = item->exp;
 	struct net *net = nf_ct_exp_net(exp);
 	struct nlmsghdr *nlh;
-	struct nfgenmsg *nfmsg;
 	struct sk_buff *skb;
 	unsigned int type, group;
 	int flags = 0;
@@ -3140,15 +3114,11 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
 		goto errout;
 
 	type = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_EXP, type);
-	nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags);
-	if (nlh == NULL)
+	nlh = nfnl_msg_put(skb, item->portid, 0, type, flags,
+			   exp->tuple.src.l3num, NFNETLINK_V0, 0);
+	if (!nlh)
 		goto nlmsg_failure;
 
-	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family = exp->tuple.src.l3num;
-	nfmsg->version	    = NFNETLINK_V0;
-	nfmsg->res_id	    = 0;
-
 	if (ctnetlink_exp_dump_expect(skb, exp) < 0)
 		goto nla_put_failure;
 
@@ -3716,20 +3686,15 @@ ctnetlink_exp_stat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int cpu,
 			     const struct ip_conntrack_stat *st)
 {
 	struct nlmsghdr *nlh;
-	struct nfgenmsg *nfmsg;
 	unsigned int flags = portid ? NLM_F_MULTI : 0, event;
 
 	event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK,
 			      IPCTNL_MSG_EXP_GET_STATS_CPU);
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
-	if (nlh == NULL)
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+			   NFNETLINK_V0, htons(cpu));
+	if (!nlh)
 		goto nlmsg_failure;
 
-	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family = AF_UNSPEC;
-	nfmsg->version      = NFNETLINK_V0;
-	nfmsg->res_id	    = htons(cpu);
-
 	if (nla_put_be32(skb, CTA_STATS_EXP_NEW, htonl(st->expect_new)) ||
 	    nla_put_be32(skb, CTA_STATS_EXP_CREATE, htonl(st->expect_create)) ||
 	    nla_put_be32(skb, CTA_STATS_EXP_DELETE, htonl(st->expect_delete)))
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index e894d70b5d5f..005f1c620fc0 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -707,18 +707,13 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
 				     int family, const struct nft_table *table)
 {
 	struct nlmsghdr *nlh;
-	struct nfgenmsg *nfmsg;
 
 	event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
-	if (nlh == NULL)
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
+			   NFNETLINK_V0, nft_base_seq(net));
+	if (!nlh)
 		goto nla_put_failure;
 
-	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family	= family;
-	nfmsg->version		= NFNETLINK_V0;
-	nfmsg->res_id		= nft_base_seq(net);
-
 	if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
 	    nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) ||
 	    nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) ||
@@ -1468,18 +1463,13 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
 				     const struct nft_chain *chain)
 {
 	struct nlmsghdr *nlh;
-	struct nfgenmsg *nfmsg;
 
 	event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
-	if (nlh == NULL)
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
+			   NFNETLINK_V0, nft_base_seq(net));
+	if (!nlh)
 		goto nla_put_failure;
 
-	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family	= family;
-	nfmsg->version		= NFNETLINK_V0;
-	nfmsg->res_id		= nft_base_seq(net);
-
 	if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name))
 		goto nla_put_failure;
 	if (nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle),
@@ -2825,20 +2815,15 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
 				    const struct nft_rule *prule)
 {
 	struct nlmsghdr *nlh;
-	struct nfgenmsg *nfmsg;
 	const struct nft_expr *expr, *next;
 	struct nlattr *list;
 	u16 type = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
 
-	nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), flags);
-	if (nlh == NULL)
+	nlh = nfnl_msg_put(skb, portid, seq, type, flags, family, NFNETLINK_V0,
+			   nft_base_seq(net));
+	if (!nlh)
 		goto nla_put_failure;
 
-	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family	= family;
-	nfmsg->version		= NFNETLINK_V0;
-	nfmsg->res_id		= nft_base_seq(net);
-
 	if (nla_put_string(skb, NFTA_RULE_TABLE, table->name))
 		goto nla_put_failure;
 	if (nla_put_string(skb, NFTA_RULE_CHAIN, chain->name))
@@ -3809,7 +3794,6 @@ static int nf_tables_fill_set_concat(struct sk_buff *skb,
 static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
 			      const struct nft_set *set, u16 event, u16 flags)
 {
-	struct nfgenmsg *nfmsg;
 	struct nlmsghdr *nlh;
 	u32 portid = ctx->portid;
 	struct nlattr *nest;
@@ -3817,16 +3801,11 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
 	int i;
 
 	event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
-			flags);
-	if (nlh == NULL)
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, ctx->family,
+			   NFNETLINK_V0, nft_base_seq(ctx->net));
+	if (!nlh)
 		goto nla_put_failure;
 
-	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family	= ctx->family;
-	nfmsg->version		= NFNETLINK_V0;
-	nfmsg->res_id		= nft_base_seq(ctx->net);
-
 	if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
 		goto nla_put_failure;
 	if (nla_put_string(skb, NFTA_SET_NAME, set->name))
@@ -4795,7 +4774,6 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 	struct nft_set *set;
 	struct nft_set_dump_args args;
 	bool set_found = false;
-	struct nfgenmsg *nfmsg;
 	struct nlmsghdr *nlh;
 	struct nlattr *nest;
 	u32 portid, seq;
@@ -4828,16 +4806,11 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 	portid = NETLINK_CB(cb->skb).portid;
 	seq    = cb->nlh->nlmsg_seq;
 
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
-			NLM_F_MULTI);
-	if (nlh == NULL)
+	nlh = nfnl_msg_put(skb, portid, seq, event, NLM_F_MULTI,
+			   table->family, NFNETLINK_V0, nft_base_seq(net));
+	if (!nlh)
 		goto nla_put_failure;
 
-	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family = table->family;
-	nfmsg->version      = NFNETLINK_V0;
-	nfmsg->res_id	    = nft_base_seq(net);
-
 	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, table->name))
 		goto nla_put_failure;
 	if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name))
@@ -4894,22 +4867,16 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb,
 				       const struct nft_set *set,
 				       const struct nft_set_elem *elem)
 {
-	struct nfgenmsg *nfmsg;
 	struct nlmsghdr *nlh;
 	struct nlattr *nest;
 	int err;
 
 	event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
-			flags);
-	if (nlh == NULL)
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, ctx->family,
+			   NFNETLINK_V0, nft_base_seq(ctx->net));
+	if (!nlh)
 		goto nla_put_failure;
 
-	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family	= ctx->family;
-	nfmsg->version		= NFNETLINK_V0;
-	nfmsg->res_id		= nft_base_seq(ctx->net);
-
 	if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
 		goto nla_put_failure;
 	if (nla_put_string(skb, NFTA_SET_NAME, set->name))
@@ -6227,19 +6194,14 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
 				   int family, const struct nft_table *table,
 				   struct nft_object *obj, bool reset)
 {
-	struct nfgenmsg *nfmsg;
 	struct nlmsghdr *nlh;
 
 	event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
-	if (nlh == NULL)
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
+			   NFNETLINK_V0, nft_base_seq(net));
+	if (!nlh)
 		goto nla_put_failure;
 
-	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family	= family;
-	nfmsg->version		= NFNETLINK_V0;
-	nfmsg->res_id		= nft_base_seq(net);
-
 	if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) ||
 	    nla_put_string(skb, NFTA_OBJ_NAME, obj->key.name) ||
 	    nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) ||
@@ -7139,20 +7101,15 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
 					 struct list_head *hook_list)
 {
 	struct nlattr *nest, *nest_devs;
-	struct nfgenmsg *nfmsg;
 	struct nft_hook *hook;
 	struct nlmsghdr *nlh;
 
 	event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
-	if (nlh == NULL)
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
+			   NFNETLINK_V0, nft_base_seq(net));
+	if (!nlh)
 		goto nla_put_failure;
 
-	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family	= family;
-	nfmsg->version		= NFNETLINK_V0;
-	nfmsg->res_id		= nft_base_seq(net);
-
 	if (nla_put_string(skb, NFTA_FLOWTABLE_TABLE, flowtable->table->name) ||
 	    nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) ||
 	    nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) ||
@@ -7385,19 +7342,14 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
 				   u32 portid, u32 seq)
 {
 	struct nlmsghdr *nlh;
-	struct nfgenmsg *nfmsg;
 	char buf[TASK_COMM_LEN];
 	int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN);
 
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0);
-	if (nlh == NULL)
+	nlh = nfnl_msg_put(skb, portid, seq, event, 0, AF_UNSPEC,
+			   NFNETLINK_V0, nft_base_seq(net));
+	if (!nlh)
 		goto nla_put_failure;
 
-	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family	= AF_UNSPEC;
-	nfmsg->version		= NFNETLINK_V0;
-	nfmsg->res_id		= nft_base_seq(net);
-
 	if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)) ||
 	    nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) ||
 	    nla_put_string(skb, NFTA_GEN_PROC_NAME, get_task_comm(buf, current)))
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index 87b36da5cd98..0cf3278007ba 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -183,7 +183,6 @@ static bool nft_trace_have_verdict_chain(struct nft_traceinfo *info)
 void nft_trace_notify(struct nft_traceinfo *info)
 {
 	const struct nft_pktinfo *pkt = info->pkt;
-	struct nfgenmsg *nfmsg;
 	struct nlmsghdr *nlh;
 	struct sk_buff *skb;
 	unsigned int size;
@@ -219,15 +218,11 @@ void nft_trace_notify(struct nft_traceinfo *info)
 		return;
 
 	event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_TRACE);
-	nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct nfgenmsg), 0);
+	nlh = nfnl_msg_put(skb, 0, 0, event, 0, info->basechain->type->family,
+			   NFNETLINK_V0, 0);
 	if (!nlh)
 		goto nla_put_failure;
 
-	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family	= info->basechain->type->family;
-	nfmsg->version		= NFNETLINK_V0;
-	nfmsg->res_id		= 0;
-
 	if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(nft_pf(pkt))))
 		goto nla_put_failure;
 
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 0fa1653b5f19..bb930f3b06c7 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -145,21 +145,16 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 		   int event, struct nf_acct *acct)
 {
 	struct nlmsghdr *nlh;
-	struct nfgenmsg *nfmsg;
 	unsigned int flags = portid ? NLM_F_MULTI : 0;
 	u64 pkts, bytes;
 	u32 old_flags;
 
 	event = nfnl_msg_type(NFNL_SUBSYS_ACCT, event);
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
-	if (nlh == NULL)
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+			   NFNETLINK_V0, 0);
+	if (!nlh)
 		goto nlmsg_failure;
 
-	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family = AF_UNSPEC;
-	nfmsg->version = NFNETLINK_V0;
-	nfmsg->res_id = 0;
-
 	if (nla_put_string(skb, NFACCT_NAME, acct->name))
 		goto nla_put_failure;
 
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 0f94fce1d3ed..22f6f7fcc724 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -526,20 +526,15 @@ nfnl_cthelper_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 			int event, struct nf_conntrack_helper *helper)
 {
 	struct nlmsghdr *nlh;
-	struct nfgenmsg *nfmsg;
 	unsigned int flags = portid ? NLM_F_MULTI : 0;
 	int status;
 
 	event = nfnl_msg_type(NFNL_SUBSYS_CTHELPER, event);
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
-	if (nlh == NULL)
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+			   NFNETLINK_V0, 0);
+	if (!nlh)
 		goto nlmsg_failure;
 
-	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family = AF_UNSPEC;
-	nfmsg->version = NFNETLINK_V0;
-	nfmsg->res_id = 0;
-
 	if (nla_put_string(skb, NFCTH_NAME, helper->name))
 		goto nla_put_failure;
 
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 89a381f7f945..de831a257512 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -160,22 +160,17 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 		       int event, struct ctnl_timeout *timeout)
 {
 	struct nlmsghdr *nlh;
-	struct nfgenmsg *nfmsg;
 	unsigned int flags = portid ? NLM_F_MULTI : 0;
 	const struct nf_conntrack_l4proto *l4proto = timeout->timeout.l4proto;
 	struct nlattr *nest_parms;
 	int ret;
 
 	event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
-	if (nlh == NULL)
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+			   NFNETLINK_V0, 0);
+	if (!nlh)
 		goto nlmsg_failure;
 
-	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family = AF_UNSPEC;
-	nfmsg->version = NFNETLINK_V0;
-	nfmsg->res_id = 0;
-
 	if (nla_put_string(skb, CTA_TIMEOUT_NAME, timeout->name) ||
 	    nla_put_be16(skb, CTA_TIMEOUT_L3PROTO,
 			 htons(timeout->timeout.l3num)) ||
@@ -382,21 +377,16 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
 			    const unsigned int *timeouts)
 {
 	struct nlmsghdr *nlh;
-	struct nfgenmsg *nfmsg;
 	unsigned int flags = portid ? NLM_F_MULTI : 0;
 	struct nlattr *nest_parms;
 	int ret;
 
 	event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
-	if (nlh == NULL)
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+			   NFNETLINK_V0, 0);
+	if (!nlh)
 		goto nlmsg_failure;
 
-	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family = AF_UNSPEC;
-	nfmsg->version = NFNETLINK_V0;
-	nfmsg->res_id = 0;
-
 	if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l3num)) ||
 	    nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto))
 		goto nla_put_failure;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 26776b88a539..d5f458d0ff3d 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -456,20 +456,15 @@ __build_packet_message(struct nfnl_log_net *log,
 {
 	struct nfulnl_msg_packet_hdr pmsg;
 	struct nlmsghdr *nlh;
-	struct nfgenmsg *nfmsg;
 	sk_buff_data_t old_tail = inst->skb->tail;
 	struct sock *sk;
 	const unsigned char *hwhdrp;
 
-	nlh = nlmsg_put(inst->skb, 0, 0,
-			nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET),
-			sizeof(struct nfgenmsg), 0);
+	nlh = nfnl_msg_put(inst->skb, 0, 0,
+			   nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET),
+			   0, pf, NFNETLINK_V0, htons(inst->group_num));
 	if (!nlh)
 		return -1;
-	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family = pf;
-	nfmsg->version = NFNETLINK_V0;
-	nfmsg->res_id = htons(inst->group_num);
 
 	memset(&pmsg, 0, sizeof(pmsg));
 	pmsg.hw_protocol	= skb->protocol;
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 48a07914fd94..37e81d895e61 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -383,7 +383,6 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 	struct nlattr *nla;
 	struct nfqnl_msg_packet_hdr *pmsg;
 	struct nlmsghdr *nlh;
-	struct nfgenmsg *nfmsg;
 	struct sk_buff *entskb = entry->skb;
 	struct net_device *indev;
 	struct net_device *outdev;
@@ -471,18 +470,15 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 		goto nlmsg_failure;
 	}
 
-	nlh = nlmsg_put(skb, 0, 0,
-			nfnl_msg_type(NFNL_SUBSYS_QUEUE, NFQNL_MSG_PACKET),
-			sizeof(struct nfgenmsg), 0);
+	nlh = nfnl_msg_put(skb, 0, 0,
+			   nfnl_msg_type(NFNL_SUBSYS_QUEUE, NFQNL_MSG_PACKET),
+			   0, entry->state.pf, NFNETLINK_V0,
+			   htons(queue->queue_num));
 	if (!nlh) {
 		skb_tx_error(entskb);
 		kfree_skb(skb);
 		goto nlmsg_failure;
 	}
-	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family = entry->state.pf;
-	nfmsg->version = NFNETLINK_V0;
-	nfmsg->res_id = htons(queue->queue_num);
 
 	nla = __nla_reserve(skb, NFQA_PACKET_HDR, sizeof(*pmsg));
 	pmsg = nla_data(nla);
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 8e56f353ff35..b8dbd20a6a4c 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -591,19 +591,14 @@ nfnl_compat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 		      int rev, int target)
 {
 	struct nlmsghdr *nlh;
-	struct nfgenmsg *nfmsg;
 	unsigned int flags = portid ? NLM_F_MULTI : 0;
 
 	event = nfnl_msg_type(NFNL_SUBSYS_NFT_COMPAT, event);
-	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
-	if (nlh == NULL)
+	nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
+			   NFNETLINK_V0, 0);
+	if (!nlh)
 		goto nlmsg_failure;
 
-	nfmsg = nlmsg_data(nlh);
-	nfmsg->nfgen_family = family;
-	nfmsg->version = NFNETLINK_V0;
-	nfmsg->res_id = 0;
-
 	if (nla_put_string(skb, NFTA_COMPAT_NAME, name) ||
 	    nla_put_be32(skb, NFTA_COMPAT_REV, htonl(rev)) ||
 	    nla_put_be32(skb, NFTA_COMPAT_TYPE, htonl(target)))
-- 
cgit v1.2.3


From 1e5d1f69d9fb8ea0679f9e85915e8e7fdacfbe7a Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 29 Mar 2021 20:59:52 -0700
Subject: ethtool: support FEC settings over netlink

Add FEC API to netlink.

This is not a 1-to-1 conversion.

FEC settings already depend on link modes to tell user which
modes are supported. Take this further an use link modes for
manual configuration. Old struct ethtool_fecparam is still
used to talk to the drivers, so we need to translate back
and forth. We can revisit the internal API if number of FEC
encodings starts to grow.

Enforce only one active FEC bit (by using a bit position
rather than another mask).

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ethtool-netlink.rst |  62 ++++++-
 include/uapi/linux/ethtool_netlink.h         |  17 ++
 net/ethtool/Makefile                         |   2 +-
 net/ethtool/fec.c                            | 238 +++++++++++++++++++++++++++
 net/ethtool/netlink.c                        |  19 +++
 net/ethtool/netlink.h                        |   4 +
 6 files changed, 339 insertions(+), 3 deletions(-)
 create mode 100644 net/ethtool/fec.c

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index 05073482db05..4bdb4298f178 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -208,6 +208,8 @@ Userspace to kernel:
   ``ETHTOOL_MSG_CABLE_TEST_ACT``        action start cable test
   ``ETHTOOL_MSG_CABLE_TEST_TDR_ACT``    action start raw TDR cable test
   ``ETHTOOL_MSG_TUNNEL_INFO_GET``       get tunnel offload info
+  ``ETHTOOL_MSG_FEC_GET``               get FEC settings
+  ``ETHTOOL_MSG_FEC_SET``               set FEC settings
   ===================================== ================================
 
 Kernel to userspace:
@@ -242,6 +244,8 @@ Kernel to userspace:
   ``ETHTOOL_MSG_CABLE_TEST_NTF``        Cable test results
   ``ETHTOOL_MSG_CABLE_TEST_TDR_NTF``    Cable test TDR results
   ``ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY`` tunnel offload info
+  ``ETHTOOL_MSG_FEC_GET_REPLY``         FEC settings
+  ``ETHTOOL_MSG_FEC_NTF``               FEC settings
   ===================================== =================================
 
 ``GET`` requests are sent by userspace applications to retrieve device
@@ -1280,6 +1284,60 @@ Kernel response contents:
 For UDP tunnel table empty ``ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES`` indicates that
 the table contains static entries, hard-coded by the NIC.
 
+FEC_GET
+=======
+
+Gets FEC configuration and state like ``ETHTOOL_GFECPARAM`` ioctl request.
+
+Request contents:
+
+  =====================================  ======  ==========================
+  ``ETHTOOL_A_FEC_HEADER``               nested  request header
+  =====================================  ======  ==========================
+
+Kernel response contents:
+
+  =====================================  ======  ==========================
+  ``ETHTOOL_A_FEC_HEADER``               nested  request header
+  ``ETHTOOL_A_FEC_MODES``                bitset  configured modes
+  ``ETHTOOL_A_FEC_AUTO``                 bool    FEC mode auto selection
+  ``ETHTOOL_A_FEC_ACTIVE``               u32     index of active FEC mode
+  =====================================  ======  ==========================
+
+``ETHTOOL_A_FEC_ACTIVE`` is the bit index of the FEC link mode currently
+active on the interface. This attribute may not be present if device does
+not support FEC.
+
+``ETHTOOL_A_FEC_MODES`` and ``ETHTOOL_A_FEC_AUTO`` are only meaningful when
+autonegotiation is disabled. If ``ETHTOOL_A_FEC_AUTO`` is non-zero driver will
+select the FEC mode automatically based on the parameters of the SFP module.
+This is equivalent to the ``ETHTOOL_FEC_AUTO`` bit of the ioctl interface.
+``ETHTOOL_A_FEC_MODES`` carry the current FEC configuration using link mode
+bits (rather than old ``ETHTOOL_FEC_*`` bits).
+
+FEC_SET
+=======
+
+Sets FEC parameters like ``ETHTOOL_SFECPARAM`` ioctl request.
+
+Request contents:
+
+  =====================================  ======  ==========================
+  ``ETHTOOL_A_FEC_HEADER``               nested  request header
+  ``ETHTOOL_A_FEC_MODES``                bitset  configured modes
+  ``ETHTOOL_A_FEC_AUTO``                 bool    FEC mode auto selection
+  =====================================  ======  ==========================
+
+``FEC_SET`` is only meaningful when autonegotiation is disabled. Otherwise
+FEC mode is selected as part of autonegotiation.
+
+``ETHTOOL_A_FEC_MODES`` selects which FEC mode should be used. It's recommended
+to set only one bit, if multiple bits are set driver may choose between them
+in an implementation specific way.
+
+``ETHTOOL_A_FEC_AUTO`` requests the driver to choose FEC mode based on SFP
+module parameters. This does not mean autonegotiation.
+
 Request translation
 ===================
 
@@ -1373,8 +1431,8 @@ are netlink only.
                                       ``ETHTOOL_MSG_LINKMODES_SET``
   ``ETHTOOL_PHY_GTUNABLE``            n/a
   ``ETHTOOL_PHY_STUNABLE``            n/a
-  ``ETHTOOL_GFECPARAM``               n/a
-  ``ETHTOOL_SFECPARAM``               n/a
+  ``ETHTOOL_GFECPARAM``               ``ETHTOOL_MSG_FEC_GET``
+  ``ETHTOOL_SFECPARAM``               ``ETHTOOL_MSG_FEC_SET``
   n/a                                 ''ETHTOOL_MSG_CABLE_TEST_ACT''
   n/a                                 ''ETHTOOL_MSG_CABLE_TEST_TDR_ACT''
   n/a                                 ``ETHTOOL_MSG_TUNNEL_INFO_GET``
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index a286635ac9b8..7f1bdb5b31ba 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -42,6 +42,8 @@ enum {
 	ETHTOOL_MSG_CABLE_TEST_ACT,
 	ETHTOOL_MSG_CABLE_TEST_TDR_ACT,
 	ETHTOOL_MSG_TUNNEL_INFO_GET,
+	ETHTOOL_MSG_FEC_GET,
+	ETHTOOL_MSG_FEC_SET,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_USER_CNT,
@@ -80,6 +82,8 @@ enum {
 	ETHTOOL_MSG_CABLE_TEST_NTF,
 	ETHTOOL_MSG_CABLE_TEST_TDR_NTF,
 	ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY,
+	ETHTOOL_MSG_FEC_GET_REPLY,
+	ETHTOOL_MSG_FEC_NTF,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_KERNEL_CNT,
@@ -629,6 +633,19 @@ enum {
 	ETHTOOL_A_TUNNEL_INFO_MAX = (__ETHTOOL_A_TUNNEL_INFO_CNT - 1)
 };
 
+/* FEC */
+
+enum {
+	ETHTOOL_A_FEC_UNSPEC,
+	ETHTOOL_A_FEC_HEADER,				/* nest - _A_HEADER_* */
+	ETHTOOL_A_FEC_MODES,				/* bitset */
+	ETHTOOL_A_FEC_AUTO,				/* u8 */
+	ETHTOOL_A_FEC_ACTIVE,				/* u32 */
+
+	__ETHTOOL_A_FEC_CNT,
+	ETHTOOL_A_FEC_MAX = (__ETHTOOL_A_FEC_CNT - 1)
+};
+
 /* generic netlink info */
 #define ETHTOOL_GENL_NAME "ethtool"
 #define ETHTOOL_GENL_VERSION 1
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
index 7a849ff22dad..c2dc9033a8f7 100644
--- a/net/ethtool/Makefile
+++ b/net/ethtool/Makefile
@@ -7,4 +7,4 @@ obj-$(CONFIG_ETHTOOL_NETLINK)	+= ethtool_nl.o
 ethtool_nl-y	:= netlink.o bitset.o strset.o linkinfo.o linkmodes.o \
 		   linkstate.o debug.o wol.o features.o privflags.o rings.o \
 		   channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \
-		   tunnels.o
+		   tunnels.o fec.o
diff --git a/net/ethtool/fec.c b/net/ethtool/fec.c
new file mode 100644
index 000000000000..31454b9188bd
--- /dev/null
+++ b/net/ethtool/fec.c
@@ -0,0 +1,238 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "netlink.h"
+#include "common.h"
+#include "bitset.h"
+
+struct fec_req_info {
+	struct ethnl_req_info		base;
+};
+
+struct fec_reply_data {
+	struct ethnl_reply_data		base;
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(fec_link_modes);
+	u32 active_fec;
+	u8 fec_auto;
+};
+
+#define FEC_REPDATA(__reply_base) \
+	container_of(__reply_base, struct fec_reply_data, base)
+
+#define ETHTOOL_FEC_MASK	((ETHTOOL_FEC_LLRS << 1) - 1)
+
+const struct nla_policy ethnl_fec_get_policy[ETHTOOL_A_FEC_HEADER + 1] = {
+	[ETHTOOL_A_FEC_HEADER]	= NLA_POLICY_NESTED(ethnl_header_policy),
+};
+
+static void
+ethtool_fec_to_link_modes(u32 fec, unsigned long *link_modes, u8 *fec_auto)
+{
+	if (fec_auto)
+		*fec_auto = !!(fec & ETHTOOL_FEC_AUTO);
+
+	if (fec & ETHTOOL_FEC_OFF)
+		__set_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, link_modes);
+	if (fec & ETHTOOL_FEC_RS)
+		__set_bit(ETHTOOL_LINK_MODE_FEC_RS_BIT, link_modes);
+	if (fec & ETHTOOL_FEC_BASER)
+		__set_bit(ETHTOOL_LINK_MODE_FEC_BASER_BIT, link_modes);
+	if (fec & ETHTOOL_FEC_LLRS)
+		__set_bit(ETHTOOL_LINK_MODE_FEC_LLRS_BIT, link_modes);
+}
+
+static int
+ethtool_link_modes_to_fecparam(struct ethtool_fecparam *fec,
+			       unsigned long *link_modes, u8 fec_auto)
+{
+	memset(fec, 0, sizeof(*fec));
+
+	if (fec_auto)
+		fec->fec |= ETHTOOL_FEC_AUTO;
+
+	if (__test_and_clear_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, link_modes))
+		fec->fec |= ETHTOOL_FEC_OFF;
+	if (__test_and_clear_bit(ETHTOOL_LINK_MODE_FEC_RS_BIT, link_modes))
+		fec->fec |= ETHTOOL_FEC_RS;
+	if (__test_and_clear_bit(ETHTOOL_LINK_MODE_FEC_BASER_BIT, link_modes))
+		fec->fec |= ETHTOOL_FEC_BASER;
+	if (__test_and_clear_bit(ETHTOOL_LINK_MODE_FEC_LLRS_BIT, link_modes))
+		fec->fec |= ETHTOOL_FEC_LLRS;
+
+	if (!bitmap_empty(link_modes, __ETHTOOL_LINK_MODE_MASK_NBITS))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int fec_prepare_data(const struct ethnl_req_info *req_base,
+			    struct ethnl_reply_data *reply_base,
+			    struct genl_info *info)
+{
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(active_fec_modes) = {};
+	struct fec_reply_data *data = FEC_REPDATA(reply_base);
+	struct net_device *dev = reply_base->dev;
+	struct ethtool_fecparam fec = {};
+	int ret;
+
+	if (!dev->ethtool_ops->get_fecparam)
+		return -EOPNOTSUPP;
+	ret = ethnl_ops_begin(dev);
+	if (ret < 0)
+		return ret;
+	ret = dev->ethtool_ops->get_fecparam(dev, &fec);
+	ethnl_ops_complete(dev);
+	if (ret)
+		return ret;
+
+	WARN_ON_ONCE(fec.reserved);
+
+	ethtool_fec_to_link_modes(fec.fec, data->fec_link_modes,
+				  &data->fec_auto);
+
+	ethtool_fec_to_link_modes(fec.active_fec, active_fec_modes, NULL);
+	data->active_fec = find_first_bit(active_fec_modes,
+					  __ETHTOOL_LINK_MODE_MASK_NBITS);
+	/* Don't report attr if no FEC mode set. Note that
+	 * ethtool_fecparam_to_link_modes() ignores NONE and AUTO.
+	 */
+	if (data->active_fec == __ETHTOOL_LINK_MODE_MASK_NBITS)
+		data->active_fec = 0;
+
+	return 0;
+}
+
+static int fec_reply_size(const struct ethnl_req_info *req_base,
+			  const struct ethnl_reply_data *reply_base)
+{
+	bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS;
+	const struct fec_reply_data *data = FEC_REPDATA(reply_base);
+	int len = 0;
+	int ret;
+
+	ret = ethnl_bitset_size(data->fec_link_modes, NULL,
+				__ETHTOOL_LINK_MODE_MASK_NBITS,
+				link_mode_names, compact);
+	if (ret < 0)
+		return ret;
+	len += ret;
+
+	len += nla_total_size(sizeof(u8)) +	/* _FEC_AUTO */
+	       nla_total_size(sizeof(u32));	/* _FEC_ACTIVE */
+
+	return len;
+}
+
+static int fec_fill_reply(struct sk_buff *skb,
+			  const struct ethnl_req_info *req_base,
+			  const struct ethnl_reply_data *reply_base)
+{
+	bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS;
+	const struct fec_reply_data *data = FEC_REPDATA(reply_base);
+	int ret;
+
+	ret = ethnl_put_bitset(skb, ETHTOOL_A_FEC_MODES,
+			       data->fec_link_modes, NULL,
+			       __ETHTOOL_LINK_MODE_MASK_NBITS,
+			       link_mode_names, compact);
+	if (ret < 0)
+		return ret;
+
+	if (nla_put_u8(skb, ETHTOOL_A_FEC_AUTO, data->fec_auto) ||
+	    (data->active_fec &&
+	     nla_put_u32(skb, ETHTOOL_A_FEC_ACTIVE, data->active_fec)))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+const struct ethnl_request_ops ethnl_fec_request_ops = {
+	.request_cmd		= ETHTOOL_MSG_FEC_GET,
+	.reply_cmd		= ETHTOOL_MSG_FEC_GET_REPLY,
+	.hdr_attr		= ETHTOOL_A_FEC_HEADER,
+	.req_info_size		= sizeof(struct fec_req_info),
+	.reply_data_size	= sizeof(struct fec_reply_data),
+
+	.prepare_data		= fec_prepare_data,
+	.reply_size		= fec_reply_size,
+	.fill_reply		= fec_fill_reply,
+};
+
+/* FEC_SET */
+
+const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1] = {
+	[ETHTOOL_A_FEC_HEADER]	= NLA_POLICY_NESTED(ethnl_header_policy),
+	[ETHTOOL_A_FEC_MODES]	= { .type = NLA_NESTED },
+	[ETHTOOL_A_FEC_AUTO]	= NLA_POLICY_MAX(NLA_U8, 1),
+};
+
+int ethnl_set_fec(struct sk_buff *skb, struct genl_info *info)
+{
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(fec_link_modes) = {};
+	struct ethnl_req_info req_info = {};
+	struct nlattr **tb = info->attrs;
+	struct ethtool_fecparam fec = {};
+	const struct ethtool_ops *ops;
+	struct net_device *dev;
+	bool mod = false;
+	u8 fec_auto;
+	int ret;
+
+	ret = ethnl_parse_header_dev_get(&req_info, tb[ETHTOOL_A_FEC_HEADER],
+					 genl_info_net(info), info->extack,
+					 true);
+	if (ret < 0)
+		return ret;
+	dev = req_info.dev;
+	ops = dev->ethtool_ops;
+	ret = -EOPNOTSUPP;
+	if (!ops->get_fecparam || !ops->set_fecparam)
+		goto out_dev;
+
+	rtnl_lock();
+	ret = ethnl_ops_begin(dev);
+	if (ret < 0)
+		goto out_rtnl;
+	ret = ops->get_fecparam(dev, &fec);
+	if (ret < 0)
+		goto out_ops;
+
+	ethtool_fec_to_link_modes(fec.fec, fec_link_modes, &fec_auto);
+
+	ret = ethnl_update_bitset(fec_link_modes,
+				  __ETHTOOL_LINK_MODE_MASK_NBITS,
+				  tb[ETHTOOL_A_FEC_MODES],
+				  link_mode_names, info->extack, &mod);
+	if (ret < 0)
+		goto out_ops;
+	ethnl_update_u8(&fec_auto, tb[ETHTOOL_A_FEC_AUTO], &mod);
+
+	ret = 0;
+	if (!mod)
+		goto out_ops;
+
+	ret = ethtool_link_modes_to_fecparam(&fec, fec_link_modes, fec_auto);
+	if (ret) {
+		NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_FEC_MODES],
+				    "invalid FEC modes requested");
+		goto out_ops;
+	}
+	if (!fec.fec) {
+		ret = -EINVAL;
+		NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_FEC_MODES],
+				    "no FEC modes set");
+		goto out_ops;
+	}
+
+	ret = dev->ethtool_ops->set_fecparam(dev, &fec);
+	if (ret < 0)
+		goto out_ops;
+	ethtool_notify(dev, ETHTOOL_MSG_FEC_NTF, NULL);
+
+out_ops:
+	ethnl_ops_complete(dev);
+out_rtnl:
+	rtnl_unlock();
+out_dev:
+	dev_put(dev);
+	return ret;
+}
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 50d3c8896f91..705a4b201564 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -244,6 +244,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = {
 	[ETHTOOL_MSG_COALESCE_GET]	= &ethnl_coalesce_request_ops,
 	[ETHTOOL_MSG_PAUSE_GET]		= &ethnl_pause_request_ops,
 	[ETHTOOL_MSG_EEE_GET]		= &ethnl_eee_request_ops,
+	[ETHTOOL_MSG_FEC_GET]		= &ethnl_fec_request_ops,
 	[ETHTOOL_MSG_TSINFO_GET]	= &ethnl_tsinfo_request_ops,
 };
 
@@ -551,6 +552,7 @@ ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = {
 	[ETHTOOL_MSG_COALESCE_NTF]	= &ethnl_coalesce_request_ops,
 	[ETHTOOL_MSG_PAUSE_NTF]		= &ethnl_pause_request_ops,
 	[ETHTOOL_MSG_EEE_NTF]		= &ethnl_eee_request_ops,
+	[ETHTOOL_MSG_FEC_NTF]		= &ethnl_fec_request_ops,
 };
 
 /* default notification handler */
@@ -643,6 +645,7 @@ static const ethnl_notify_handler_t ethnl_notify_handlers[] = {
 	[ETHTOOL_MSG_COALESCE_NTF]	= ethnl_default_notify,
 	[ETHTOOL_MSG_PAUSE_NTF]		= ethnl_default_notify,
 	[ETHTOOL_MSG_EEE_NTF]		= ethnl_default_notify,
+	[ETHTOOL_MSG_FEC_NTF]		= ethnl_default_notify,
 };
 
 void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data)
@@ -912,6 +915,22 @@ static const struct genl_ops ethtool_genl_ops[] = {
 		.policy = ethnl_tunnel_info_get_policy,
 		.maxattr = ARRAY_SIZE(ethnl_tunnel_info_get_policy) - 1,
 	},
+	{
+		.cmd	= ETHTOOL_MSG_FEC_GET,
+		.doit	= ethnl_default_doit,
+		.start	= ethnl_default_start,
+		.dumpit	= ethnl_default_dumpit,
+		.done	= ethnl_default_done,
+		.policy = ethnl_fec_get_policy,
+		.maxattr = ARRAY_SIZE(ethnl_fec_get_policy) - 1,
+	},
+	{
+		.cmd	= ETHTOOL_MSG_FEC_SET,
+		.flags	= GENL_UNS_ADMIN_PERM,
+		.doit	= ethnl_set_fec,
+		.policy = ethnl_fec_set_policy,
+		.maxattr = ARRAY_SIZE(ethnl_fec_set_policy) - 1,
+	},
 };
 
 static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 6eabd58d81bf..785f7ee45930 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -344,6 +344,7 @@ extern const struct ethnl_request_ops ethnl_coalesce_request_ops;
 extern const struct ethnl_request_ops ethnl_pause_request_ops;
 extern const struct ethnl_request_ops ethnl_eee_request_ops;
 extern const struct ethnl_request_ops ethnl_tsinfo_request_ops;
+extern const struct ethnl_request_ops ethnl_fec_request_ops;
 
 extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];
 extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1];
@@ -375,6 +376,8 @@ extern const struct nla_policy ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_HEADER +
 extern const struct nla_policy ethnl_cable_test_act_policy[ETHTOOL_A_CABLE_TEST_HEADER + 1];
 extern const struct nla_policy ethnl_cable_test_tdr_act_policy[ETHTOOL_A_CABLE_TEST_TDR_CFG + 1];
 extern const struct nla_policy ethnl_tunnel_info_get_policy[ETHTOOL_A_TUNNEL_INFO_HEADER + 1];
+extern const struct nla_policy ethnl_fec_get_policy[ETHTOOL_A_FEC_HEADER + 1];
+extern const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1];
 
 int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info);
 int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info);
@@ -392,5 +395,6 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info);
 int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info);
 int ethnl_tunnel_info_start(struct netlink_callback *cb);
 int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int ethnl_set_fec(struct sk_buff *skb, struct genl_info *info);
 
 #endif /* _NET_ETHTOOL_NETLINK_H */
-- 
cgit v1.2.3


From 0d7f76dc11e6df6b883f625c8343aa8fa1f6874b Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 29 Mar 2021 20:59:53 -0700
Subject: netdevsim: add FEC settings support

Add support for ethtool FEC and some ethtool error injection.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/ethtool.c   | 36 ++++++++++++++++++++++++++++++++++++
 drivers/net/netdevsim/netdevsim.h |  3 +++
 2 files changed, 39 insertions(+)

diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c
index 166f0d6cbcf7..c9ae52595a8f 100644
--- a/drivers/net/netdevsim/ethtool.c
+++ b/drivers/net/netdevsim/ethtool.c
@@ -77,6 +77,34 @@ static int nsim_set_ringparam(struct net_device *dev,
 	return 0;
 }
 
+static int
+nsim_get_fecparam(struct net_device *dev, struct ethtool_fecparam *fecparam)
+{
+	struct netdevsim *ns = netdev_priv(dev);
+
+	if (ns->ethtool.get_err)
+		return -ns->ethtool.get_err;
+	memcpy(fecparam, &ns->ethtool.fec, sizeof(ns->ethtool.fec));
+	return 0;
+}
+
+static int
+nsim_set_fecparam(struct net_device *dev, struct ethtool_fecparam *fecparam)
+{
+	struct netdevsim *ns = netdev_priv(dev);
+	u32 fec;
+
+	if (ns->ethtool.set_err)
+		return -ns->ethtool.set_err;
+	memcpy(&ns->ethtool.fec, fecparam, sizeof(ns->ethtool.fec));
+	fec = fecparam->fec;
+	if (fec == ETHTOOL_FEC_AUTO)
+		fec |= ETHTOOL_FEC_OFF;
+	fec |= ETHTOOL_FEC_NONE;
+	ns->ethtool.fec.active_fec = 1 << (fls(fec) - 1);
+	return 0;
+}
+
 static const struct ethtool_ops nsim_ethtool_ops = {
 	.supported_coalesce_params	= ETHTOOL_COALESCE_ALL_PARAMS,
 	.get_pause_stats	        = nsim_get_pause_stats,
@@ -86,6 +114,8 @@ static const struct ethtool_ops nsim_ethtool_ops = {
 	.get_coalesce			= nsim_get_coalesce,
 	.get_ringparam			= nsim_get_ringparam,
 	.set_ringparam			= nsim_set_ringparam,
+	.get_fecparam			= nsim_get_fecparam,
+	.set_fecparam			= nsim_set_fecparam,
 };
 
 static void nsim_ethtool_ring_init(struct netdevsim *ns)
@@ -104,8 +134,14 @@ void nsim_ethtool_init(struct netdevsim *ns)
 
 	nsim_ethtool_ring_init(ns);
 
+	ns->ethtool.fec.fec = ETHTOOL_FEC_NONE;
+	ns->ethtool.fec.active_fec = ETHTOOL_FEC_NONE;
+
 	ethtool = debugfs_create_dir("ethtool", ns->nsim_dev_port->ddir);
 
+	debugfs_create_u32("get_err", 0600, ethtool, &ns->ethtool.get_err);
+	debugfs_create_u32("set_err", 0600, ethtool, &ns->ethtool.set_err);
+
 	dir = debugfs_create_dir("pause", ethtool);
 	debugfs_create_bool("report_stats_rx", 0600, dir,
 			    &ns->ethtool.pauseparam.report_stats_rx);
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index d735c21def4b..7ff24e03577b 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -60,9 +60,12 @@ struct nsim_ethtool_pauseparam {
 };
 
 struct nsim_ethtool {
+	u32 get_err;
+	u32 set_err;
 	struct nsim_ethtool_pauseparam pauseparam;
 	struct ethtool_coalesce coalesce;
 	struct ethtool_ringparam ring;
+	struct ethtool_fecparam fec;
 };
 
 struct netdevsim {
-- 
cgit v1.2.3


From 1da07e5db3564789eb598bc772ea50b547194691 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 29 Mar 2021 20:59:54 -0700
Subject: selftests: ethtool: add a netdevsim FEC test

Test FEC settings, iterate over configs.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../drivers/net/netdevsim/ethtool-common.sh        |   5 +-
 .../selftests/drivers/net/netdevsim/ethtool-fec.sh | 110 +++++++++++++++++++++
 2 files changed, 114 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh

diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
index 9f64d5c7107b..7ca1f030d209 100644
--- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
@@ -24,8 +24,11 @@ function check {
     local code=$1
     local str=$2
     local exp_str=$3
+    local exp_fail=$4
 
-    if [ $code -ne 0 ]; then
+    [ -z "$exp_fail" ] && cop="-ne" || cop="-eq"
+
+    if [ $code $cop 0 ]; then
 	((num_errors++))
 	return
     fi
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh
new file mode 100755
index 000000000000..0c56746e9ce0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh
@@ -0,0 +1,110 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ethtool-common.sh
+
+NSIM_NETDEV=$(make_netdev)
+[ a$ETHTOOL == a ] && ETHTOOL=ethtool
+
+set -o pipefail
+
+# netdevsim starts out with None/None
+s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+check $? "$s" "Configured FEC encodings: None
+Active FEC encoding: None"
+
+# Test Auto
+$ETHTOOL --set-fec $NSIM_NETDEV encoding auto
+check $?
+s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+check $? "$s" "Configured FEC encodings: Auto
+Active FEC encoding: Off"
+
+# Test case in-sensitivity
+for o in off Off OFF; do
+    $ETHTOOL --set-fec $NSIM_NETDEV encoding $o
+    check $?
+    s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+    check $? "$s" "Configured FEC encodings: Off
+Active FEC encoding: Off"
+done
+
+for o in BaseR baser BAser; do
+    $ETHTOOL --set-fec $NSIM_NETDEV encoding $o
+    check $?
+    s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+    check $? "$s" "Configured FEC encodings: BaseR
+Active FEC encoding: BaseR"
+done
+
+for o in llrs rs; do
+    $ETHTOOL --set-fec $NSIM_NETDEV encoding $o
+    check $?
+    s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+    check $? "$s" "Configured FEC encodings: ${o^^}
+Active FEC encoding: ${o^^}"
+done
+
+# Test mutliple bits
+$ETHTOOL --set-fec $NSIM_NETDEV encoding rs llrs
+check $?
+s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+check $? "$s" "Configured FEC encodings: RS LLRS
+Active FEC encoding: LLRS"
+
+$ETHTOOL --set-fec $NSIM_NETDEV encoding rs off auto
+check $?
+s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+check $? "$s" "Configured FEC encodings: Auto Off RS
+Active FEC encoding: RS"
+
+# Make sure other link modes are rejected
+$ETHTOOL --set-fec $NSIM_NETDEV encoding FIBRE 2>/dev/null
+check $? '' '' 1
+
+$ETHTOOL --set-fec $NSIM_NETDEV encoding bla-bla-bla 2>/dev/null
+check $? '' '' 1
+
+# Try JSON
+$ETHTOOL --json --show-fec $NSIM_NETDEV | jq empty >>/dev/null 2>&1
+if [ $? -eq 0 ]; then
+    $ETHTOOL --set-fec $NSIM_NETDEV encoding auto
+    check $?
+
+    s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].config[]')
+    check $? "$s" '"Auto"'
+    s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].active[]')
+    check $? "$s" '"Off"'
+
+    $ETHTOOL --set-fec $NSIM_NETDEV encoding auto RS
+    check $?
+
+    s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].config[]')
+    check $? "$s" '"Auto"
+"RS"'
+    s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].active[]')
+    check $? "$s" '"RS"'
+fi
+
+# Test error injection
+echo 11 > $NSIM_DEV_DFS/ethtool/get_err
+
+$ETHTOOL --show-fec $NSIM_NETDEV >>/dev/null 2>&1
+check $? '' '' 1
+
+echo 0 > $NSIM_DEV_DFS/ethtool/get_err
+echo 11 > $NSIM_DEV_DFS/ethtool/set_err
+
+$ETHTOOL --show-fec $NSIM_NETDEV  >>/dev/null 2>&1
+check $?
+
+$ETHTOOL --set-fec $NSIM_NETDEV encoding RS 2>/dev/null
+check $? '' '' 1
+
+if [ $num_errors -eq 0 ]; then
+    echo "PASSED all $((num_passes)) checks"
+    exit 0
+else
+    echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
+    exit 1
+fi
-- 
cgit v1.2.3


From 48bb5697269a7cbe5194dbb044dc38c517e34c58 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 29 Mar 2021 23:45:51 -0700
Subject: ip6_tunnel: sit: proper dev_{hold|put} in ndo_[un]init methods

Same reasons than for the previous commits :
6289a98f0817 ("sit: proper dev_{hold|put} in ndo_[un]init methods")
40cb881b5aaa ("ip6_vti: proper dev_{hold|put} in ndo_[un]init methods")
7f700334be9a ("ip6_gre: proper dev_{hold|put} in ndo_[un]init methods")

After adopting CONFIG_PCPU_DEV_REFCNT=n option, syzbot was able to trigger
a warning [1]

Issue here is that:

- all dev_put() should be paired with a corresponding prior dev_hold().

- A driver doing a dev_put() in its ndo_uninit() MUST also
  do a dev_hold() in its ndo_init(), only when ndo_init()
  is returning 0.

Otherwise, register_netdevice() would call ndo_uninit()
in its error path and release a refcount too soon.

[1]
WARNING: CPU: 1 PID: 21059 at lib/refcount.c:31 refcount_warn_saturate+0xbf/0x1e0 lib/refcount.c:31
Modules linked in:
CPU: 1 PID: 21059 Comm: syz-executor.4 Not tainted 5.12.0-rc4-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:refcount_warn_saturate+0xbf/0x1e0 lib/refcount.c:31
Code: 1d 6a 5a e8 09 31 ff 89 de e8 8d 1a ab fd 84 db 75 e0 e8 d4 13 ab fd 48 c7 c7 a0 e1 c1 89 c6 05 4a 5a e8 09 01 e8 2e 36 fb 04 <0f> 0b eb c4 e8 b8 13 ab fd 0f b6 1d 39 5a e8 09 31 ff 89 de e8 58
RSP: 0018:ffffc900025aefe8 EFLAGS: 00010282
RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
RDX: 0000000000040000 RSI: ffffffff815c51f5 RDI: fffff520004b5def
RBP: 0000000000000004 R08: 0000000000000000 R09: 0000000000000000
R10: ffffffff815bdf8e R11: 0000000000000000 R12: ffff888023488568
R13: ffff8880254e9000 R14: 00000000dfd82cfd R15: ffff88802ee2d7c0
FS:  00007f13bc590700(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f0943e74000 CR3: 0000000025273000 CR4: 00000000001506f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 __refcount_dec include/linux/refcount.h:344 [inline]
 refcount_dec include/linux/refcount.h:359 [inline]
 dev_put include/linux/netdevice.h:4135 [inline]
 ip6_tnl_dev_uninit+0x370/0x3d0 net/ipv6/ip6_tunnel.c:387
 register_netdevice+0xadf/0x1500 net/core/dev.c:10308
 ip6_tnl_create2+0x1b5/0x400 net/ipv6/ip6_tunnel.c:263
 ip6_tnl_newlink+0x312/0x580 net/ipv6/ip6_tunnel.c:2052
 __rtnl_newlink+0x1062/0x1710 net/core/rtnetlink.c:3443
 rtnl_newlink+0x64/0xa0 net/core/rtnetlink.c:3491
 rtnetlink_rcv_msg+0x44e/0xad0 net/core/rtnetlink.c:5553
 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2502
 netlink_unicast_kernel net/netlink/af_netlink.c:1312 [inline]
 netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1338
 netlink_sendmsg+0x856/0xd90 net/netlink/af_netlink.c:1927
 sock_sendmsg_nosec net/socket.c:654 [inline]
 sock_sendmsg+0xcf/0x120 net/socket.c:674
 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2350
 ___sys_sendmsg+0xf3/0x170 net/socket.c:2404
 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2433
 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xae

Fixes: 919067cc845f ("net: add CONFIG_PCPU_DEV_REFCNT")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index cd78f5b2cd75..67ee9d58ec5e 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -266,7 +266,6 @@ static int ip6_tnl_create2(struct net_device *dev)
 
 	strcpy(t->parms.name, dev->name);
 
-	dev_hold(dev);
 	ip6_tnl_link(ip6n, t);
 	return 0;
 
@@ -1882,6 +1881,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
 	dev->min_mtu = ETH_MIN_MTU;
 	dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len;
 
+	dev_hold(dev);
 	return 0;
 
 destroy_dst:
-- 
cgit v1.2.3


From 634da4c118434cf8a0c5eabce9eb58502ef1521c Mon Sep 17 00:00:00 2001
From: Benita Bose <benita.bose@intel.com>
Date: Tue, 2 Mar 2021 10:12:02 -0800
Subject: ice: Add Support for XPS

Enable and configure XPS. The driver code implemented sets up the Transmit
Packet Steering Map, which in turn will be used by the kernel in queue
selection during Tx.

Signed-off-by: Benita Bose <benita.bose@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_base.c | 23 +++++++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_txrx.h |  6 ++++++
 2 files changed, 29 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index 1148d768f8ed..be26775a7dfe 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -214,6 +214,26 @@ static u16 ice_calc_q_handle(struct ice_vsi *vsi, struct ice_ring *ring, u8 tc)
 	return ring->q_index - vsi->tc_cfg.tc_info[tc].qoffset;
 }
 
+/**
+ * ice_cfg_xps_tx_ring - Configure XPS for a Tx ring
+ * @ring: The Tx ring to configure
+ *
+ * This enables/disables XPS for a given Tx descriptor ring
+ * based on the TCs enabled for the VSI that ring belongs to.
+ */
+static void ice_cfg_xps_tx_ring(struct ice_ring *ring)
+{
+	if (!ring->q_vector || !ring->netdev)
+		return;
+
+	/* We only initialize XPS once, so as not to overwrite user settings */
+	if (test_and_set_bit(ICE_TX_XPS_INIT_DONE, ring->xps_state))
+		return;
+
+	netif_set_xps_queue(ring->netdev, &ring->q_vector->affinity_mask,
+			    ring->q_index);
+}
+
 /**
  * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance
  * @ring: The Tx ring to configure
@@ -664,6 +684,9 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring,
 	u16 pf_q;
 	u8 tc;
 
+	/* Configure XPS */
+	ice_cfg_xps_tx_ring(ring);
+
 	pf_q = ring->reg_idx;
 	ice_setup_tx_ctx(ring, &tlan_ctx, pf_q);
 	/* copy context contents into the qg_buf */
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 5dab77504fa5..40e34ede6e58 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -195,6 +195,11 @@ struct ice_rxq_stats {
 	u64 gro_dropped; /* GRO returned dropped */
 };
 
+enum ice_ring_state_t {
+	ICE_TX_XPS_INIT_DONE,
+	ICE_TX_NBITS,
+};
+
 /* this enum matches hardware bits and is meant to be used by DYN_CTLN
  * registers and QINT registers or more generally anywhere in the manual
  * mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any
@@ -292,6 +297,7 @@ struct ice_ring {
 	};
 
 	struct rcu_head rcu;		/* to avoid race on free */
+	DECLARE_BITMAP(xps_state, ICE_TX_NBITS);	/* XPS Config State */
 	struct bpf_prog *xdp_prog;
 	struct xsk_buff_pool *xsk_pool;
 	u16 rx_offset;
-- 
cgit v1.2.3


From 1e23f076b25424508d4576fba176888944b9b2b0 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Tue, 2 Mar 2021 10:12:03 -0800
Subject: ice: Delay netdev registration

Once a netdev is registered, the corresponding network interface can
be immediately used by userspace utilities (like say NetworkManager).
This can be problematic if the driver technically isn't fully up yet.

Move netdev registration to the end of probe, as by this time the
driver data structures and device will be initialized as expected.

However, delaying netdev registration causes a failure in the aRFS flow
where netdev->reg_state == NETREG_REGISTERED condition is checked. It's
not clear why this check was added to begin with, so remove it.
Local testing didn't indicate any issues with this change.

The state bit check in ice_open was put in as a stop-gap measure to
prevent a premature interface up operation. This is no longer needed,
so remove it.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_arfs.c |  6 +-
 drivers/net/ethernet/intel/ice/ice_main.c | 93 +++++++++++++++----------------
 2 files changed, 47 insertions(+), 52 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c
index 6560acd76c94..88d98c9e5f91 100644
--- a/drivers/net/ethernet/intel/ice/ice_arfs.c
+++ b/drivers/net/ethernet/intel/ice/ice_arfs.c
@@ -581,8 +581,7 @@ void ice_free_cpu_rx_rmap(struct ice_vsi *vsi)
 		return;
 
 	netdev = vsi->netdev;
-	if (!netdev || !netdev->rx_cpu_rmap ||
-	    netdev->reg_state != NETREG_REGISTERED)
+	if (!netdev || !netdev->rx_cpu_rmap)
 		return;
 
 	free_irq_cpu_rmap(netdev->rx_cpu_rmap);
@@ -604,8 +603,7 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi)
 
 	pf = vsi->back;
 	netdev = vsi->netdev;
-	if (!pf || !netdev || !vsi->num_q_vectors ||
-	    vsi->netdev->reg_state != NETREG_REGISTERED)
+	if (!pf || !netdev || !vsi->num_q_vectors)
 		return -EINVAL;
 
 	netdev_dbg(netdev, "Setup CPU RMAP: vsi type 0x%x, ifname %s, q_vectors %d\n",
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index f318d7f607e4..a881b4b6bce5 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -140,21 +140,10 @@ static int ice_init_mac_fltr(struct ice_pf *pf)
 
 	perm_addr = vsi->port_info->mac.perm_addr;
 	status = ice_fltr_add_mac_and_broadcast(vsi, perm_addr, ICE_FWD_TO_VSI);
-	if (!status)
-		return 0;
-
-	/* We aren't useful with no MAC filters, so unregister if we
-	 * had an error
-	 */
-	if (vsi->netdev->reg_state == NETREG_REGISTERED) {
-		dev_err(ice_pf_to_dev(pf), "Could not add MAC filters error %s. Unregistering device\n",
-			ice_stat_str(status));
-		unregister_netdev(vsi->netdev);
-		free_netdev(vsi->netdev);
-		vsi->netdev = NULL;
-	}
+	if (status)
+		return -EIO;
 
-	return -EIO;
+	return 0;
 }
 
 /**
@@ -2982,18 +2971,11 @@ static int ice_cfg_netdev(struct ice_vsi *vsi)
 	struct ice_netdev_priv *np;
 	struct net_device *netdev;
 	u8 mac_addr[ETH_ALEN];
-	int err;
-
-	err = ice_devlink_create_port(vsi);
-	if (err)
-		return err;
 
 	netdev = alloc_etherdev_mqs(sizeof(*np), vsi->alloc_txq,
 				    vsi->alloc_rxq);
-	if (!netdev) {
-		err = -ENOMEM;
-		goto err_destroy_devlink_port;
-	}
+	if (!netdev)
+		return -ENOMEM;
 
 	vsi->netdev = netdev;
 	np = netdev_priv(netdev);
@@ -3021,25 +3003,7 @@ static int ice_cfg_netdev(struct ice_vsi *vsi)
 	netdev->min_mtu = ETH_MIN_MTU;
 	netdev->max_mtu = ICE_MAX_MTU;
 
-	err = register_netdev(vsi->netdev);
-	if (err)
-		goto err_free_netdev;
-
-	devlink_port_type_eth_set(&vsi->devlink_port, vsi->netdev);
-
-	netif_carrier_off(vsi->netdev);
-
-	/* make sure transmit queues start off as stopped */
-	netif_tx_stop_all_queues(vsi->netdev);
-
 	return 0;
-
-err_free_netdev:
-	free_netdev(vsi->netdev);
-	vsi->netdev = NULL;
-err_destroy_devlink_port:
-	ice_devlink_destroy_port(vsi);
-	return err;
 }
 
 /**
@@ -3237,8 +3201,6 @@ unroll_napi_add:
 	if (vsi) {
 		ice_napi_del(vsi);
 		if (vsi->netdev) {
-			if (vsi->netdev->reg_state == NETREG_REGISTERED)
-				unregister_netdev(vsi->netdev);
 			free_netdev(vsi->netdev);
 			vsi->netdev = NULL;
 		}
@@ -3992,6 +3954,40 @@ static void ice_print_wake_reason(struct ice_pf *pf)
 	dev_info(ice_pf_to_dev(pf), "Wake reason: %s", wake_str);
 }
 
+/**
+ * ice_register_netdev - register netdev and devlink port
+ * @pf: pointer to the PF struct
+ */
+static int ice_register_netdev(struct ice_pf *pf)
+{
+	struct ice_vsi *vsi;
+	int err = 0;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi || !vsi->netdev)
+		return -EIO;
+
+	err = register_netdev(vsi->netdev);
+	if (err)
+		goto err_register_netdev;
+
+	netif_carrier_off(vsi->netdev);
+	netif_tx_stop_all_queues(vsi->netdev);
+	err = ice_devlink_create_port(vsi);
+	if (err)
+		goto err_devlink_create;
+
+	devlink_port_type_eth_set(&vsi->devlink_port, vsi->netdev);
+
+	return 0;
+err_devlink_create:
+	unregister_netdev(vsi->netdev);
+err_register_netdev:
+	free_netdev(vsi->netdev);
+	vsi->netdev = NULL;
+	return err;
+}
+
 /**
  * ice_probe - Device initialization routine
  * @pdev: PCI device information struct
@@ -4272,10 +4268,16 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 	pcie_print_link_status(pf->pdev);
 
 probe_done:
+	err = ice_register_netdev(pf);
+	if (err)
+		goto err_netdev_reg;
+
 	/* ready to go, so clear down state bit */
 	clear_bit(__ICE_DOWN, pf->state);
+
 	return 0;
 
+err_netdev_reg:
 err_send_version_unroll:
 	ice_vsi_release_all(pf);
 err_alloc_sw_unroll:
@@ -6654,11 +6656,6 @@ int ice_open(struct net_device *netdev)
 		return -EIO;
 	}
 
-	if (test_bit(__ICE_DOWN, pf->state)) {
-		netdev_err(netdev, "device is not ready yet\n");
-		return -EBUSY;
-	}
-
 	netif_carrier_off(netdev);
 
 	pi = vsi->port_info;
-- 
cgit v1.2.3


From a05983c3d024d173af0a80dc7197a21f64d52df6 Mon Sep 17 00:00:00 2001
From: Dan Nowlin <dan.nowlin@intel.com>
Date: Tue, 2 Mar 2021 10:12:04 -0800
Subject: ice: Update to use package info from ice segment

There are two package versions in the package binary. Today, these two
version numbers are the same. However, in the future that may change.

Update code to use the package info from the ice segment metadata
section, which is the package information that is actually downloaded to
the firmware during the download package process.

Signed-off-by: Dan Nowlin <dan.nowlin@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_adminq_cmd.h |  1 +
 drivers/net/ethernet/intel/ice/ice_flex_pipe.c  | 40 ++++++++++++++-----------
 drivers/net/ethernet/intel/ice/ice_flex_type.h  |  9 ++++++
 drivers/net/ethernet/intel/ice/ice_type.h       |  8 ++---
 4 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 80186589153b..5108046476f1 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -1790,6 +1790,7 @@ struct ice_pkg_ver {
 };
 
 #define ICE_PKG_NAME_SIZE	32
+#define ICE_SEG_ID_SIZE		28
 #define ICE_SEG_NAME_SIZE	28
 
 struct ice_aqc_get_pkg_info {
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
index afe77f7a3199..4b83960876f4 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
@@ -1063,32 +1063,36 @@ ice_download_pkg(struct ice_hw *hw, struct ice_seg *ice_seg)
 static enum ice_status
 ice_init_pkg_info(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr)
 {
-	struct ice_global_metadata_seg *meta_seg;
 	struct ice_generic_seg_hdr *seg_hdr;
 
 	if (!pkg_hdr)
 		return ICE_ERR_PARAM;
 
-	meta_seg = (struct ice_global_metadata_seg *)
-		   ice_find_seg_in_pkg(hw, SEGMENT_TYPE_METADATA, pkg_hdr);
-	if (meta_seg) {
-		hw->pkg_ver = meta_seg->pkg_ver;
-		memcpy(hw->pkg_name, meta_seg->pkg_name, sizeof(hw->pkg_name));
+	seg_hdr = ice_find_seg_in_pkg(hw, SEGMENT_TYPE_ICE, pkg_hdr);
+	if (seg_hdr) {
+		struct ice_meta_sect *meta;
+		struct ice_pkg_enum state;
+
+		memset(&state, 0, sizeof(state));
+
+		/* Get package information from the Metadata Section */
+		meta = ice_pkg_enum_section((struct ice_seg *)seg_hdr, &state,
+					    ICE_SID_METADATA);
+		if (!meta) {
+			ice_debug(hw, ICE_DBG_INIT, "Did not find ice metadata section in package\n");
+			return ICE_ERR_CFG;
+		}
+
+		hw->pkg_ver = meta->ver;
+		memcpy(hw->pkg_name, meta->name, sizeof(meta->name));
 
 		ice_debug(hw, ICE_DBG_PKG, "Pkg: %d.%d.%d.%d, %s\n",
-			  meta_seg->pkg_ver.major, meta_seg->pkg_ver.minor,
-			  meta_seg->pkg_ver.update, meta_seg->pkg_ver.draft,
-			  meta_seg->pkg_name);
-	} else {
-		ice_debug(hw, ICE_DBG_INIT, "Did not find metadata segment in driver package\n");
-		return ICE_ERR_CFG;
-	}
+			  meta->ver.major, meta->ver.minor, meta->ver.update,
+			  meta->ver.draft, meta->name);
 
-	seg_hdr = ice_find_seg_in_pkg(hw, SEGMENT_TYPE_ICE, pkg_hdr);
-	if (seg_hdr) {
-		hw->ice_pkg_ver = seg_hdr->seg_format_ver;
-		memcpy(hw->ice_pkg_name, seg_hdr->seg_id,
-		       sizeof(hw->ice_pkg_name));
+		hw->ice_seg_fmt_ver = seg_hdr->seg_format_ver;
+		memcpy(hw->ice_seg_id, seg_hdr->seg_id,
+		       sizeof(hw->ice_seg_id));
 
 		ice_debug(hw, ICE_DBG_PKG, "Ice Seg: %d.%d.%d.%d, %s\n",
 			  seg_hdr->seg_format_ver.major,
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_type.h b/drivers/net/ethernet/intel/ice/ice_flex_type.h
index abc156ce9d8c..9806183920de 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_flex_type.h
@@ -109,6 +109,7 @@ struct ice_buf_hdr {
 	(ent_sz))
 
 /* ice package section IDs */
+#define ICE_SID_METADATA		1
 #define ICE_SID_XLT0_SW			10
 #define ICE_SID_XLT_KEY_BUILDER_SW	11
 #define ICE_SID_XLT1_SW			12
@@ -117,6 +118,14 @@ struct ice_buf_hdr {
 #define ICE_SID_PROFID_REDIR_SW		15
 #define ICE_SID_FLD_VEC_SW		16
 #define ICE_SID_CDID_KEY_BUILDER_SW	17
+
+struct ice_meta_sect {
+	struct ice_pkg_ver ver;
+#define ICE_META_SECT_NAME_SIZE	28
+	char name[ICE_META_SECT_NAME_SIZE];
+	__le32 track_id;
+};
+
 #define ICE_SID_CDID_REDIR_SW		18
 
 #define ICE_SID_XLT0_ACL		20
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 2893143d9e62..ba157b383127 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -720,13 +720,13 @@ struct ice_hw {
 
 	enum ice_aq_err pkg_dwnld_status;
 
-	/* Driver's package ver - (from the Metadata seg) */
+	/* Driver's package ver - (from the Ice Metadata section) */
 	struct ice_pkg_ver pkg_ver;
 	u8 pkg_name[ICE_PKG_NAME_SIZE];
 
-	/* Driver's Ice package version (from the Ice seg) */
-	struct ice_pkg_ver ice_pkg_ver;
-	u8 ice_pkg_name[ICE_PKG_NAME_SIZE];
+	/* Driver's Ice segment format version and ID (from the Ice seg) */
+	struct ice_pkg_ver ice_seg_fmt_ver;
+	u8 ice_seg_id[ICE_SEG_ID_SIZE];
 
 	/* Pointer to the ice segment */
 	struct ice_seg *seg;
-- 
cgit v1.2.3


From 2ec5638559c13b923250eccf495d2a033fccb3e7 Mon Sep 17 00:00:00 2001
From: Paul M Stillwell Jr <paul.m.stillwell.jr@intel.com>
Date: Tue, 2 Mar 2021 10:12:05 -0800
Subject: ice: handle increasing Tx or Rx ring sizes

There is an issue when the Tx or Rx ring size increases using
'ethtool -L ...' where the new rings don't get the correct ITR
values because when we rebuild the VSI we don't know that some
of the rings may be new.

Fix this by looking at the original number of rings and
determining if the rings in ice_vsi_rebuild_set_coalesce()
were not present in the original rings received in
ice_vsi_rebuild_get_coalesce().

Also change the code to return an error if we can't allocate
memory for the coalesce data in ice_vsi_rebuild().

Signed-off-by: Paul M Stillwell Jr <paul.m.stillwell.jr@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_lib.c  | 123 ++++++++++++++++++++++--------
 drivers/net/ethernet/intel/ice/ice_txrx.h |   2 +
 2 files changed, 92 insertions(+), 33 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index c345432fac72..0763696c3d08 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2866,38 +2866,46 @@ int ice_vsi_release(struct ice_vsi *vsi)
 }
 
 /**
- * ice_vsi_rebuild_update_coalesce - set coalesce for a q_vector
+ * ice_vsi_rebuild_update_coalesce_intrl - set interrupt rate limit for a q_vector
  * @q_vector: pointer to q_vector which is being updated
- * @coalesce: pointer to array of struct with stored coalesce
+ * @stored_intrl_setting: original INTRL setting
  *
  * Set coalesce param in q_vector and update these parameters in HW.
  */
 static void
-ice_vsi_rebuild_update_coalesce(struct ice_q_vector *q_vector,
-				struct ice_coalesce_stored *coalesce)
+ice_vsi_rebuild_update_coalesce_intrl(struct ice_q_vector *q_vector,
+				      u16 stored_intrl_setting)
 {
-	struct ice_ring_container *rx_rc = &q_vector->rx;
-	struct ice_ring_container *tx_rc = &q_vector->tx;
 	struct ice_hw *hw = &q_vector->vsi->back->hw;
 
-	tx_rc->itr_setting = coalesce->itr_tx;
-	rx_rc->itr_setting = coalesce->itr_rx;
-
-	/* dynamic ITR values will be updated during Tx/Rx */
-	if (!ITR_IS_DYNAMIC(tx_rc->itr_setting))
-		wr32(hw, GLINT_ITR(tx_rc->itr_idx, q_vector->reg_idx),
-		     ITR_REG_ALIGN(tx_rc->itr_setting) >>
-		     ICE_ITR_GRAN_S);
-	if (!ITR_IS_DYNAMIC(rx_rc->itr_setting))
-		wr32(hw, GLINT_ITR(rx_rc->itr_idx, q_vector->reg_idx),
-		     ITR_REG_ALIGN(rx_rc->itr_setting) >>
-		     ICE_ITR_GRAN_S);
-
-	q_vector->intrl = coalesce->intrl;
+	q_vector->intrl = stored_intrl_setting;
 	wr32(hw, GLINT_RATE(q_vector->reg_idx),
 	     ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran));
 }
 
+/**
+ * ice_vsi_rebuild_update_coalesce_itr - set coalesce for a q_vector
+ * @q_vector: pointer to q_vector which is being updated
+ * @rc: pointer to ring container
+ * @stored_itr_setting: original ITR setting
+ *
+ * Set coalesce param in q_vector and update these parameters in HW.
+ */
+static void
+ice_vsi_rebuild_update_coalesce_itr(struct ice_q_vector *q_vector,
+				    struct ice_ring_container *rc,
+				    u16 stored_itr_setting)
+{
+	struct ice_hw *hw = &q_vector->vsi->back->hw;
+
+	rc->itr_setting = stored_itr_setting;
+
+	/* dynamic ITR values will be updated during Tx/Rx */
+	if (!ITR_IS_DYNAMIC(rc->itr_setting))
+		wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
+		     ITR_REG_ALIGN(rc->itr_setting) >> ICE_ITR_GRAN_S);
+}
+
 /**
  * ice_vsi_rebuild_get_coalesce - get coalesce from all q_vectors
  * @vsi: VSI connected with q_vectors
@@ -2917,6 +2925,11 @@ ice_vsi_rebuild_get_coalesce(struct ice_vsi *vsi,
 		coalesce[i].itr_tx = q_vector->tx.itr_setting;
 		coalesce[i].itr_rx = q_vector->rx.itr_setting;
 		coalesce[i].intrl = q_vector->intrl;
+
+		if (i < vsi->num_txq)
+			coalesce[i].tx_valid = true;
+		if (i < vsi->num_rxq)
+			coalesce[i].rx_valid = true;
 	}
 
 	return vsi->num_q_vectors;
@@ -2941,17 +2954,59 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi,
 	if ((size && !coalesce) || !vsi)
 		return;
 
-	for (i = 0; i < size && i < vsi->num_q_vectors; i++)
-		ice_vsi_rebuild_update_coalesce(vsi->q_vectors[i],
-						&coalesce[i]);
-
-	/* number of q_vectors increased, so assume coalesce settings were
-	 * changed globally (i.e. ethtool -C eth0 instead of per-queue) and use
-	 * the previous settings from q_vector 0 for all of the new q_vectors
+	/* There are a couple of cases that have to be handled here:
+	 *   1. The case where the number of queue vectors stays the same, but
+	 *      the number of Tx or Rx rings changes (the first for loop)
+	 *   2. The case where the number of queue vectors increased (the
+	 *      second for loop)
 	 */
-	for (; i < vsi->num_q_vectors; i++)
-		ice_vsi_rebuild_update_coalesce(vsi->q_vectors[i],
-						&coalesce[0]);
+	for (i = 0; i < size && i < vsi->num_q_vectors; i++) {
+		/* There are 2 cases to handle here and they are the same for
+		 * both Tx and Rx:
+		 *   if the entry was valid previously (coalesce[i].[tr]x_valid
+		 *   and the loop variable is less than the number of rings
+		 *   allocated, then write the previous values
+		 *
+		 *   if the entry was not valid previously, but the number of
+		 *   rings is less than are allocated (this means the number of
+		 *   rings increased from previously), then write out the
+		 *   values in the first element
+		 */
+		if (i < vsi->alloc_rxq && coalesce[i].rx_valid)
+			ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i],
+							    &vsi->q_vectors[i]->rx,
+							    coalesce[i].itr_rx);
+		else if (i < vsi->alloc_rxq)
+			ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i],
+							    &vsi->q_vectors[i]->rx,
+							    coalesce[0].itr_rx);
+
+		if (i < vsi->alloc_txq && coalesce[i].tx_valid)
+			ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i],
+							    &vsi->q_vectors[i]->tx,
+							    coalesce[i].itr_tx);
+		else if (i < vsi->alloc_txq)
+			ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i],
+							    &vsi->q_vectors[i]->tx,
+							    coalesce[0].itr_tx);
+
+		ice_vsi_rebuild_update_coalesce_intrl(vsi->q_vectors[i],
+						      coalesce[i].intrl);
+	}
+
+	/* the number of queue vectors increased so write whatever is in
+	 * the first element
+	 */
+	for (; i < vsi->num_q_vectors; i++) {
+		ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i],
+						    &vsi->q_vectors[i]->tx,
+						    coalesce[0].itr_tx);
+		ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i],
+						    &vsi->q_vectors[i]->rx,
+						    coalesce[0].itr_rx);
+		ice_vsi_rebuild_update_coalesce_intrl(vsi->q_vectors[i],
+						      coalesce[0].intrl);
+	}
 }
 
 /**
@@ -2980,9 +3035,11 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
 
 	coalesce = kcalloc(vsi->num_q_vectors,
 			   sizeof(struct ice_coalesce_stored), GFP_KERNEL);
-	if (coalesce)
-		prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi,
-								  coalesce);
+	if (!coalesce)
+		return -ENOMEM;
+
+	prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce);
+
 	ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx);
 	ice_vsi_free_q_vectors(vsi);
 
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 40e34ede6e58..ffe0d271dec7 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -357,6 +357,8 @@ struct ice_coalesce_stored {
 	u16 itr_tx;
 	u16 itr_rx;
 	u8 intrl;
+	u8 tx_valid;
+	u8 rx_valid;
 };
 
 /* iterator for handling rings in ring container */
-- 
cgit v1.2.3


From 5c57145a49bd93b72878ef9b4a1d798753b9faff Mon Sep 17 00:00:00 2001
From: Paul Greenwalt <paul.greenwalt@intel.com>
Date: Tue, 2 Mar 2021 10:12:07 -0800
Subject: ice: change link misconfiguration message

Change link misconfiguration message since the configuration
could be intended by the user.

Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index a881b4b6bce5..336db16d0e12 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -598,7 +598,7 @@ static void ice_print_topo_conflict(struct ice_vsi *vsi)
 	case ICE_AQ_LINK_TOPO_UNREACH_PRT:
 	case ICE_AQ_LINK_TOPO_UNDRUTIL_PRT:
 	case ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA:
-		netdev_info(vsi->netdev, "Possible mis-configuration of the Ethernet port detected, please use the Intel(R) Ethernet Port Configuration Tool application to address the issue.\n");
+		netdev_info(vsi->netdev, "Potential misconfiguration of the Ethernet port detected. If it was not intended, please use the Intel (R) Ethernet Port Configuration Tool to address the issue.\n");
 		break;
 	case ICE_AQ_LINK_TOPO_UNSUPP_MEDIA:
 		netdev_info(vsi->netdev, "Rx/Tx is disabled on this device because an unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
-- 
cgit v1.2.3


From 800c1443cbe1bd17f11e3580744894c3ee1a5c8e Mon Sep 17 00:00:00 2001
From: Bruce Allan <bruce.w.allan@intel.com>
Date: Tue, 2 Mar 2021 10:12:08 -0800
Subject: ice: remove unnecessary duplicated AQ command flag setting

Commit a012dca9f7a2 ("ice: add ethtool -m support for reading i2c eeprom
modules") unnecessarily added the ICE_AQ_FLAG_BUF flag to the descriptor
when sending the indirect Read/Write SFF EEPROM AQ command. The flag is
already added later in the code flow for all indirect AQ commands, i.e.
commands that provide an additional data buffer.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 1898325e62b5..b906ca4dad36 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -3186,7 +3186,7 @@ ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr,
 
 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_sff_eeprom);
 	cmd = &desc.params.read_write_sff_param;
-	desc.flags = cpu_to_le16(ICE_AQ_FLAG_RD | ICE_AQ_FLAG_BUF);
+	desc.flags = cpu_to_le16(ICE_AQ_FLAG_RD);
 	cmd->lport_num = (u8)(lport & 0xff);
 	cmd->lport_num_valid = (u8)((lport >> 8) & 0x01);
 	cmd->i2c_bus_addr = cpu_to_le16(((bus_addr >> 1) &
-- 
cgit v1.2.3


From 805f980bfe0e7d07cd4cd5d1183722f0355c0f4a Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Tue, 2 Mar 2021 10:12:09 -0800
Subject: ice: Check for bail out condition early

Check for bail out condition before calling ice_aq_sff_eeprom

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_ethtool.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 4f738425fb44..109dd6962b1d 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -3926,14 +3926,14 @@ ice_get_module_eeprom(struct net_device *netdev,
 	u8 value = 0;
 	u8 page = 0;
 
-	status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0,
-				   &value, 1, 0, NULL);
-	if (status)
-		return -EIO;
-
 	if (!ee || !ee->len || !data)
 		return -EINVAL;
 
+	status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0, &value, 1, 0,
+				   NULL);
+	if (status)
+		return -EIO;
+
 	if (value == ICE_MODULE_TYPE_SFP)
 		is_sfp = true;
 
-- 
cgit v1.2.3


From 36ac7911fae7575b8cebf9326a23901cba28c015 Mon Sep 17 00:00:00 2001
From: Bruce Allan <bruce.w.allan@intel.com>
Date: Tue, 2 Mar 2021 10:12:10 -0800
Subject: ice: correct memory allocation call

Use *malloc() instead of *calloc() when allocating only a single object as
opposed to an array of objects.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_switch.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 67c965a3f5d2..5e5683a3eb23 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -920,7 +920,7 @@ ice_create_vsi_list_map(struct ice_hw *hw, u16 *vsi_handle_arr, u16 num_vsi,
 	struct ice_vsi_list_map_info *v_map;
 	int i;
 
-	v_map = devm_kcalloc(ice_hw_to_dev(hw), 1, sizeof(*v_map), GFP_KERNEL);
+	v_map = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*v_map), GFP_KERNEL);
 	if (!v_map)
 		return NULL;
 
-- 
cgit v1.2.3


From 94a936981a3eae3f1e9d1bf02c32d3f559a7aa95 Mon Sep 17 00:00:00 2001
From: Qi Zhang <qi.z.zhang@intel.com>
Date: Tue, 2 Mar 2021 10:12:11 -0800
Subject: ice: rename ptype bitmap

Align all ptype bitmap to follow ice_ptypes_xxx prefix.

Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_flow.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c
index 8e8bfc6fa2b4..571245799646 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.c
+++ b/drivers/net/ethernet/intel/ice/ice_flow.c
@@ -238,7 +238,7 @@ static const u32 ice_ptypes_ipv6_il[] = {
 };
 
 /* Packet types for packets with an Outer/First/Single IPv4 header - no L4 */
-static const u32 ice_ipv4_ofos_no_l4[] = {
+static const u32 ice_ptypes_ipv4_ofos_no_l4[] = {
 	0x10C00000, 0x04000800, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
@@ -262,7 +262,7 @@ static const u32 ice_ptypes_arp_of[] = {
 };
 
 /* Packet types for packets with an Innermost/Last IPv4 header - no L4 */
-static const u32 ice_ipv4_il_no_l4[] = {
+static const u32 ice_ptypes_ipv4_il_no_l4[] = {
 	0x60000000, 0x18043008, 0x80000002, 0x6010c021,
 	0x00000008, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
@@ -274,7 +274,7 @@ static const u32 ice_ipv4_il_no_l4[] = {
 };
 
 /* Packet types for packets with an Outer/First/Single IPv6 header - no L4 */
-static const u32 ice_ipv6_ofos_no_l4[] = {
+static const u32 ice_ptypes_ipv6_ofos_no_l4[] = {
 	0x00000000, 0x00000000, 0x43000000, 0x10002000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
@@ -286,7 +286,7 @@ static const u32 ice_ipv6_ofos_no_l4[] = {
 };
 
 /* Packet types for packets with an Innermost/Last IPv6 header - no L4 */
-static const u32 ice_ipv6_il_no_l4[] = {
+static const u32 ice_ptypes_ipv6_il_no_l4[] = {
 	0x00000000, 0x02180430, 0x0000010c, 0x086010c0,
 	0x00000430, 0x00000000, 0x00000000, 0x00000000,
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
@@ -749,8 +749,8 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params)
 				   ICE_FLOW_PTYPE_MAX);
 		} else if ((hdrs & ICE_FLOW_SEG_HDR_IPV4) &&
 			   !(hdrs & ICE_FLOW_SEG_HDRS_L4_MASK_NO_OTHER)) {
-			src = !i ? (const unsigned long *)ice_ipv4_ofos_no_l4 :
-				(const unsigned long *)ice_ipv4_il_no_l4;
+			src = !i ? (const unsigned long *)ice_ptypes_ipv4_ofos_no_l4 :
+				(const unsigned long *)ice_ptypes_ipv4_il_no_l4;
 			bitmap_and(params->ptypes, params->ptypes, src,
 				   ICE_FLOW_PTYPE_MAX);
 		} else if (hdrs & ICE_FLOW_SEG_HDR_IPV4) {
@@ -760,8 +760,8 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params)
 				   ICE_FLOW_PTYPE_MAX);
 		} else if ((hdrs & ICE_FLOW_SEG_HDR_IPV6) &&
 			   !(hdrs & ICE_FLOW_SEG_HDRS_L4_MASK_NO_OTHER)) {
-			src = !i ? (const unsigned long *)ice_ipv6_ofos_no_l4 :
-				(const unsigned long *)ice_ipv6_il_no_l4;
+			src = !i ? (const unsigned long *)ice_ptypes_ipv6_ofos_no_l4 :
+				(const unsigned long *)ice_ptypes_ipv6_il_no_l4;
 			bitmap_and(params->ptypes, params->ptypes, src,
 				   ICE_FLOW_PTYPE_MAX);
 		} else if (hdrs & ICE_FLOW_SEG_HDR_IPV6) {
-- 
cgit v1.2.3


From 8134d5ff9788d3e7f63f963a211927a60ce462d6 Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Tue, 2 Mar 2021 10:15:33 -0800
Subject: ice: Change ice_vsi_setup_q_map() to not depend on RSS

Currently, ice_vsi_setup_q_map() depends on the VSI's rss_size. However,
the Rx Queue Mapping section of the VSI context has no dependency on RSS.
Instead, limit the maximum number of Rx queues per TC based on the Rx
Queue mapping section of the VSI context, which currently allows for up
to 256 Rx queues per TC.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h     |  1 +
 drivers/net/ethernet/intel/ice/ice_lib.c | 50 ++++++++++----------------------
 2 files changed, 17 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 9bf346133cbd..ecbf62eddcc9 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -89,6 +89,7 @@
 #define ICE_INVAL_Q_INDEX	0xffff
 #define ICE_INVAL_VFID		256
 
+#define ICE_MAX_RXQS_PER_TC		256	/* Used when setting VSI context per TC Rx queues */
 #define ICE_MAX_RESET_WAIT		20
 
 #define ICE_VSIQF_HKEY_ARRAY_SIZE	((VSIQF_HKEY_MAX_INDEX + 1) *	4)
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 0763696c3d08..f2cf914cba69 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -742,11 +742,10 @@ static void ice_set_dflt_vsi_ctx(struct ice_vsi_ctx *ctxt)
  */
 static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
 {
-	u16 offset = 0, qmap = 0, tx_count = 0;
+	u16 offset = 0, qmap = 0, tx_count = 0, pow = 0;
+	u16 num_txq_per_tc, num_rxq_per_tc;
 	u16 qcount_tx = vsi->alloc_txq;
 	u16 qcount_rx = vsi->alloc_rxq;
-	u16 tx_numq_tc, rx_numq_tc;
-	u16 pow = 0, max_rss = 0;
 	bool ena_tc0 = false;
 	u8 netdev_tc = 0;
 	int i;
@@ -764,12 +763,15 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
 		vsi->tc_cfg.ena_tc |= 1;
 	}
 
-	rx_numq_tc = qcount_rx / vsi->tc_cfg.numtc;
-	if (!rx_numq_tc)
-		rx_numq_tc = 1;
-	tx_numq_tc = qcount_tx / vsi->tc_cfg.numtc;
-	if (!tx_numq_tc)
-		tx_numq_tc = 1;
+	num_rxq_per_tc = min_t(u16, qcount_rx / vsi->tc_cfg.numtc, ICE_MAX_RXQS_PER_TC);
+	if (!num_rxq_per_tc)
+		num_rxq_per_tc = 1;
+	num_txq_per_tc = qcount_tx / vsi->tc_cfg.numtc;
+	if (!num_txq_per_tc)
+		num_txq_per_tc = 1;
+
+	/* find the (rounded up) power-of-2 of qcount */
+	pow = (u16)order_base_2(num_rxq_per_tc);
 
 	/* TC mapping is a function of the number of Rx queues assigned to the
 	 * VSI for each traffic class and the offset of these queues.
@@ -782,26 +784,6 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
 	 *
 	 * Setup number and offset of Rx queues for all TCs for the VSI
 	 */
-
-	qcount_rx = rx_numq_tc;
-
-	/* qcount will change if RSS is enabled */
-	if (test_bit(ICE_FLAG_RSS_ENA, vsi->back->flags)) {
-		if (vsi->type == ICE_VSI_PF || vsi->type == ICE_VSI_VF) {
-			if (vsi->type == ICE_VSI_PF)
-				max_rss = ICE_MAX_LG_RSS_QS;
-			else
-				max_rss = ICE_MAX_RSS_QS_PER_VF;
-			qcount_rx = min_t(u16, rx_numq_tc, max_rss);
-			if (!vsi->req_rxq)
-				qcount_rx = min_t(u16, qcount_rx,
-						  vsi->rss_size);
-		}
-	}
-
-	/* find the (rounded up) power-of-2 of qcount */
-	pow = (u16)order_base_2(qcount_rx);
-
 	ice_for_each_traffic_class(i) {
 		if (!(vsi->tc_cfg.ena_tc & BIT(i))) {
 			/* TC is not enabled */
@@ -815,16 +797,16 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
 
 		/* TC is enabled */
 		vsi->tc_cfg.tc_info[i].qoffset = offset;
-		vsi->tc_cfg.tc_info[i].qcount_rx = qcount_rx;
-		vsi->tc_cfg.tc_info[i].qcount_tx = tx_numq_tc;
+		vsi->tc_cfg.tc_info[i].qcount_rx = num_rxq_per_tc;
+		vsi->tc_cfg.tc_info[i].qcount_tx = num_txq_per_tc;
 		vsi->tc_cfg.tc_info[i].netdev_tc = netdev_tc++;
 
 		qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) &
 			ICE_AQ_VSI_TC_Q_OFFSET_M) |
 			((pow << ICE_AQ_VSI_TC_Q_NUM_S) &
 			 ICE_AQ_VSI_TC_Q_NUM_M);
-		offset += qcount_rx;
-		tx_count += tx_numq_tc;
+		offset += num_rxq_per_tc;
+		tx_count += num_txq_per_tc;
 		ctxt->info.tc_mapping[i] = cpu_to_le16(qmap);
 	}
 
@@ -837,7 +819,7 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
 	if (offset)
 		vsi->num_rxq = offset;
 	else
-		vsi->num_rxq = qcount_rx;
+		vsi->num_rxq = num_rxq_per_tc;
 
 	vsi->num_txq = tx_count;
 
-- 
cgit v1.2.3


From e3c53928a3b2b3ec983955a838547aa7344822be Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Tue, 2 Mar 2021 10:15:35 -0800
Subject: ice: Refactor get/set RSS LUT to use struct parameter

Update ice_aq_get_rss_lut() and ice_aq_set_rss_lut() to take a new
structure ice_aq_get_set_rss_params instead of passing individual
parameters. This is done for 2 reasons:

1. Reduce the number of parameters passed to the functions.
2. Reduce the amount of change required if the arguments ever need to be
   updated in the future.

Also, reduce duplicate code that was checking for an invalid vsi_handle
and lut parameter by moving the checks to the lower level
__ice_aq_get_set_rss_lut().

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_common.c  | 54 +++++++++++++---------------
 drivers/net/ethernet/intel/ice/ice_common.h  |  6 ++--
 drivers/net/ethernet/intel/ice/ice_ethtool.c |  9 +++--
 drivers/net/ethernet/intel/ice/ice_lib.c     |  9 +++--
 drivers/net/ethernet/intel/ice/ice_main.c    | 18 +++++++---
 drivers/net/ethernet/intel/ice/ice_type.h    |  8 +++++
 6 files changed, 62 insertions(+), 42 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index b906ca4dad36..54df00ee912b 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -3206,23 +3206,33 @@ ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr,
 /**
  * __ice_aq_get_set_rss_lut
  * @hw: pointer to the hardware structure
- * @vsi_id: VSI FW index
- * @lut_type: LUT table type
- * @lut: pointer to the LUT buffer provided by the caller
- * @lut_size: size of the LUT buffer
- * @glob_lut_idx: global LUT index
+ * @params: RSS LUT parameters
  * @set: set true to set the table, false to get the table
  *
  * Internal function to get (0x0B05) or set (0x0B03) RSS look up table
  */
 static enum ice_status
-__ice_aq_get_set_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut,
-			 u16 lut_size, u8 glob_lut_idx, bool set)
+__ice_aq_get_set_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *params, bool set)
 {
+	u16 flags = 0, vsi_id, lut_type, lut_size, glob_lut_idx, vsi_handle;
 	struct ice_aqc_get_set_rss_lut *cmd_resp;
 	struct ice_aq_desc desc;
 	enum ice_status status;
-	u16 flags = 0;
+	u8 *lut;
+
+	if (!params)
+		return ICE_ERR_PARAM;
+
+	vsi_handle = params->vsi_handle;
+	lut = params->lut;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle) || !lut)
+		return ICE_ERR_PARAM;
+
+	lut_size = params->lut_size;
+	lut_type = params->lut_type;
+	glob_lut_idx = params->global_lut_id;
+	vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
 
 	cmd_resp = &desc.params.get_set_rss_lut;
 
@@ -3296,43 +3306,27 @@ ice_aq_get_set_rss_lut_exit:
 /**
  * ice_aq_get_rss_lut
  * @hw: pointer to the hardware structure
- * @vsi_handle: software VSI handle
- * @lut_type: LUT table type
- * @lut: pointer to the LUT buffer provided by the caller
- * @lut_size: size of the LUT buffer
+ * @get_params: RSS LUT parameters used to specify which RSS LUT to get
  *
  * get the RSS lookup table, PF or VSI type
  */
 enum ice_status
-ice_aq_get_rss_lut(struct ice_hw *hw, u16 vsi_handle, u8 lut_type,
-		   u8 *lut, u16 lut_size)
+ice_aq_get_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *get_params)
 {
-	if (!ice_is_vsi_valid(hw, vsi_handle) || !lut)
-		return ICE_ERR_PARAM;
-
-	return __ice_aq_get_set_rss_lut(hw, ice_get_hw_vsi_num(hw, vsi_handle),
-					lut_type, lut, lut_size, 0, false);
+	return __ice_aq_get_set_rss_lut(hw, get_params, false);
 }
 
 /**
  * ice_aq_set_rss_lut
  * @hw: pointer to the hardware structure
- * @vsi_handle: software VSI handle
- * @lut_type: LUT table type
- * @lut: pointer to the LUT buffer provided by the caller
- * @lut_size: size of the LUT buffer
+ * @set_params: RSS LUT parameters used to specify how to set the RSS LUT
  *
  * set the RSS lookup table, PF or VSI type
  */
 enum ice_status
-ice_aq_set_rss_lut(struct ice_hw *hw, u16 vsi_handle, u8 lut_type,
-		   u8 *lut, u16 lut_size)
+ice_aq_set_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *set_params)
 {
-	if (!ice_is_vsi_valid(hw, vsi_handle) || !lut)
-		return ICE_ERR_PARAM;
-
-	return __ice_aq_get_set_rss_lut(hw, ice_get_hw_vsi_num(hw, vsi_handle),
-					lut_type, lut, lut_size, 0, true);
+	return __ice_aq_get_set_rss_lut(hw, set_params, true);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index baf4064fcbfe..81fd69cb1485 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -51,11 +51,9 @@ ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
 		  u32 rxq_index);
 
 enum ice_status
-ice_aq_get_rss_lut(struct ice_hw *hw, u16 vsi_handle, u8 lut_type, u8 *lut,
-		   u16 lut_size);
+ice_aq_get_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *get_params);
 enum ice_status
-ice_aq_set_rss_lut(struct ice_hw *hw, u16 vsi_handle, u8 lut_type, u8 *lut,
-		   u16 lut_size);
+ice_aq_set_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *set_params);
 enum ice_status
 ice_aq_get_rss_key(struct ice_hw *hw, u16 vsi_handle,
 		   struct ice_aqc_get_set_rss_keys *keys);
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 109dd6962b1d..0840fcf2f2cc 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -3328,6 +3328,7 @@ static int ice_get_valid_rss_size(struct ice_hw *hw, int new_size)
  */
 static int ice_vsi_set_dflt_rss_lut(struct ice_vsi *vsi, int req_rss_size)
 {
+	struct ice_aq_get_set_rss_lut_params set_params = {};
 	struct ice_pf *pf = vsi->back;
 	enum ice_status status;
 	struct device *dev;
@@ -3353,8 +3354,12 @@ static int ice_vsi_set_dflt_rss_lut(struct ice_vsi *vsi, int req_rss_size)
 
 	/* create/set RSS LUT */
 	ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size);
-	status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type, lut,
-				    vsi->rss_table_size);
+	set_params.vsi_handle = vsi->idx;
+	set_params.lut_size = vsi->rss_table_size;
+	set_params.lut_type = vsi->rss_lut_type;
+	set_params.lut = lut;
+	set_params.global_lut_id = 0;
+	status = ice_aq_set_rss_lut(hw, &set_params);
 	if (status) {
 		dev_err(dev, "Cannot set RSS lut, err %s aq_err %s\n",
 			ice_stat_str(status),
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index f2cf914cba69..22bcccd1073b 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1337,6 +1337,7 @@ int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena)
  */
 static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi)
 {
+	struct ice_aq_get_set_rss_lut_params set_params = {};
 	struct ice_aqc_get_set_rss_keys *key;
 	struct ice_pf *pf = vsi->back;
 	enum ice_status status;
@@ -1356,8 +1357,12 @@ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi)
 	else
 		ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size);
 
-	status = ice_aq_set_rss_lut(&pf->hw, vsi->idx, vsi->rss_lut_type, lut,
-				    vsi->rss_table_size);
+	set_params.vsi_handle = vsi->idx;
+	set_params.lut_size = vsi->rss_table_size;
+	set_params.lut_type = vsi->rss_lut_type;
+	set_params.lut = lut;
+	set_params.global_lut_id = 0;
+	status = ice_aq_set_rss_lut(&pf->hw, &set_params);
 
 	if (status) {
 		dev_err(dev, "set_rss_lut failed, error %s\n",
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 336db16d0e12..8085f0ab2441 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -6348,8 +6348,13 @@ int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
 	}
 
 	if (lut) {
-		status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type,
-					    lut, lut_size);
+		struct ice_aq_get_set_rss_lut_params set_params = {
+			.vsi_handle = vsi->idx, .lut_size = lut_size,
+			.lut_type = vsi->rss_lut_type, .lut = lut,
+			.global_lut_id = 0
+		};
+
+		status = ice_aq_set_rss_lut(hw, &set_params);
 		if (status) {
 			dev_err(dev, "Cannot set RSS lut, err %s aq_err %s\n",
 				ice_stat_str(status),
@@ -6392,8 +6397,13 @@ int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
 	}
 
 	if (lut) {
-		status = ice_aq_get_rss_lut(hw, vsi->idx, vsi->rss_lut_type,
-					    lut, lut_size);
+		struct ice_aq_get_set_rss_lut_params get_params = {
+			.vsi_handle = vsi->idx, .lut_size = lut_size,
+			.lut_type = vsi->rss_lut_type, .lut = lut,
+			.global_lut_id = 0
+		};
+
+		status = ice_aq_get_rss_lut(hw, &get_params);
 		if (status) {
 			dev_err(dev, "Cannot get RSS lut, err %s aq_err %s\n",
 				ice_stat_str(status),
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index ba157b383127..276ebcc309dc 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -827,6 +827,14 @@ struct ice_hw_port_stats {
 	u64 fd_sb_match;
 };
 
+struct ice_aq_get_set_rss_lut_params {
+	u16 vsi_handle;		/* software VSI handle */
+	u16 lut_size;		/* size of the LUT buffer */
+	u8 lut_type;		/* type of the LUT (i.e. VSI, PF, Global) */
+	u8 *lut;		/* input RSS LUT for set and output RSS LUT for get */
+	u8 global_lut_id;	/* only valid when lut_type is global */
+};
+
 /* Checksum and Shadow RAM pointers */
 #define ICE_SR_NVM_CTRL_WORD		0x00
 #define ICE_SR_BOOT_CFG_PTR		0x132
-- 
cgit v1.2.3


From b66a972abb6b4a2fe8e0444ba2a2d3718c79d5ad Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Tue, 2 Mar 2021 10:15:36 -0800
Subject: ice: Refactor ice_set/get_rss into LUT and key specific functions

Currently ice_set/get_rss are used to set/get the RSS LUT and/or RSS
key. However nearly everywhere these functions are called only the LUT
or key are set/get. Also, making this change reduces how many things
ice_set/get_rss are doing. Fix this by adding ice_set/get_rss_lut and
ice_set/get_rss_key functions.

Also, consolidate all calls for setting/getting the RSS LUT and RSS Key
to use ice_set/get_rss_lut() and ice_set/get_rss_key().

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h             |   6 +-
 drivers/net/ethernet/intel/ice/ice_ethtool.c     |  43 ++++---
 drivers/net/ethernet/intel/ice/ice_lib.c         |  42 ++-----
 drivers/net/ethernet/intel/ice/ice_main.c        | 139 +++++++++++++----------
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c |   4 +-
 5 files changed, 117 insertions(+), 117 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index ecbf62eddcc9..17705db51022 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -621,8 +621,10 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi);
 int
 ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
 	     u32 flags);
-int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
-int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
+int ice_set_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size);
+int ice_get_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size);
+int ice_set_rss_key(struct ice_vsi *vsi, u8 *seed);
+int ice_get_rss_key(struct ice_vsi *vsi, u8 *seed);
 void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size);
 int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset);
 void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 0840fcf2f2cc..998629b9767f 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -3140,7 +3140,7 @@ ice_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc)
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
 	struct ice_pf *pf = vsi->back;
-	int ret = 0, i;
+	int err, i;
 	u8 *lut;
 
 	if (hfunc)
@@ -3159,17 +3159,20 @@ ice_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc)
 	if (!lut)
 		return -ENOMEM;
 
-	if (ice_get_rss(vsi, key, lut, vsi->rss_table_size)) {
-		ret = -EIO;
+	err = ice_get_rss_key(vsi, key);
+	if (err)
+		goto out;
+
+	err = ice_get_rss_lut(vsi, lut, vsi->rss_table_size);
+	if (err)
 		goto out;
-	}
 
 	for (i = 0; i < vsi->rss_table_size; i++)
 		indir[i] = (u32)(lut[i]);
 
 out:
 	kfree(lut);
-	return ret;
+	return err;
 }
 
 /**
@@ -3190,7 +3193,7 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key,
 	struct ice_vsi *vsi = np->vsi;
 	struct ice_pf *pf = vsi->back;
 	struct device *dev;
-	u8 *seed = NULL;
+	int err;
 
 	dev = ice_pf_to_dev(pf);
 	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
@@ -3211,7 +3214,10 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key,
 				return -ENOMEM;
 		}
 		memcpy(vsi->rss_hkey_user, key, ICE_VSIQF_HKEY_ARRAY_SIZE);
-		seed = vsi->rss_hkey_user;
+
+		err = ice_set_rss_key(vsi, vsi->rss_hkey_user);
+		if (err)
+			return err;
 	}
 
 	if (!vsi->rss_lut_user) {
@@ -3232,8 +3238,9 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key,
 				 vsi->rss_size);
 	}
 
-	if (ice_set_rss(vsi, seed, vsi->rss_lut_user, vsi->rss_table_size))
-		return -EIO;
+	err = ice_set_rss_lut(vsi, vsi->rss_lut_user, vsi->rss_table_size);
+	if (err)
+		return err;
 
 	return 0;
 }
@@ -3328,12 +3335,10 @@ static int ice_get_valid_rss_size(struct ice_hw *hw, int new_size)
  */
 static int ice_vsi_set_dflt_rss_lut(struct ice_vsi *vsi, int req_rss_size)
 {
-	struct ice_aq_get_set_rss_lut_params set_params = {};
 	struct ice_pf *pf = vsi->back;
-	enum ice_status status;
 	struct device *dev;
 	struct ice_hw *hw;
-	int err = 0;
+	int err;
 	u8 *lut;
 
 	dev = ice_pf_to_dev(pf);
@@ -3354,18 +3359,10 @@ static int ice_vsi_set_dflt_rss_lut(struct ice_vsi *vsi, int req_rss_size)
 
 	/* create/set RSS LUT */
 	ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size);
-	set_params.vsi_handle = vsi->idx;
-	set_params.lut_size = vsi->rss_table_size;
-	set_params.lut_type = vsi->rss_lut_type;
-	set_params.lut = lut;
-	set_params.global_lut_id = 0;
-	status = ice_aq_set_rss_lut(hw, &set_params);
-	if (status) {
-		dev_err(dev, "Cannot set RSS lut, err %s aq_err %s\n",
-			ice_stat_str(status),
+	err = ice_set_rss_lut(vsi, lut, vsi->rss_table_size);
+	if (err)
+		dev_err(dev, "Cannot set RSS lut, err %d aq_err %s\n", err,
 			ice_aq_str(hw->adminq.sq_last_status));
-		err = -EIO;
-	}
 
 	kfree(lut);
 	return err;
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 22bcccd1073b..d3b99b1ea32a 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1326,7 +1326,7 @@ int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena)
 					 vsi->rss_size);
 	}
 
-	err = ice_set_rss(vsi, NULL, lut, vsi->rss_table_size);
+	err = ice_set_rss_lut(vsi, lut, vsi->rss_table_size);
 	kfree(lut);
 	return err;
 }
@@ -1337,13 +1337,10 @@ int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena)
  */
 static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi)
 {
-	struct ice_aq_get_set_rss_lut_params set_params = {};
-	struct ice_aqc_get_set_rss_keys *key;
 	struct ice_pf *pf = vsi->back;
-	enum ice_status status;
 	struct device *dev;
-	int err = 0;
-	u8 *lut;
+	u8 *lut, *key;
+	int err;
 
 	dev = ice_pf_to_dev(pf);
 	vsi->rss_size = min_t(u16, vsi->rss_size, vsi->num_rxq);
@@ -1357,41 +1354,26 @@ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi)
 	else
 		ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size);
 
-	set_params.vsi_handle = vsi->idx;
-	set_params.lut_size = vsi->rss_table_size;
-	set_params.lut_type = vsi->rss_lut_type;
-	set_params.lut = lut;
-	set_params.global_lut_id = 0;
-	status = ice_aq_set_rss_lut(&pf->hw, &set_params);
-
-	if (status) {
-		dev_err(dev, "set_rss_lut failed, error %s\n",
-			ice_stat_str(status));
-		err = -EIO;
+	err = ice_set_rss_lut(vsi, lut, vsi->rss_table_size);
+	if (err) {
+		dev_err(dev, "set_rss_lut failed, error %d\n", err);
 		goto ice_vsi_cfg_rss_exit;
 	}
 
-	key = kzalloc(sizeof(*key), GFP_KERNEL);
+	key = kzalloc(ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE, GFP_KERNEL);
 	if (!key) {
 		err = -ENOMEM;
 		goto ice_vsi_cfg_rss_exit;
 	}
 
 	if (vsi->rss_hkey_user)
-		memcpy(key,
-		       (struct ice_aqc_get_set_rss_keys *)vsi->rss_hkey_user,
-		       ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE);
+		memcpy(key, vsi->rss_hkey_user, ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE);
 	else
-		netdev_rss_key_fill((void *)key,
-				    ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE);
-
-	status = ice_aq_set_rss_key(&pf->hw, vsi->idx, key);
+		netdev_rss_key_fill((void *)key, ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE);
 
-	if (status) {
-		dev_err(dev, "set_rss_key failed, error %s\n",
-			ice_stat_str(status));
-		err = -EIO;
-	}
+	err = ice_set_rss_key(vsi, key);
+	if (err)
+		dev_err(dev, "set_rss_key failed, error %d\n", err);
 
 	kfree(key);
 ice_vsi_cfg_rss_exit:
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 8085f0ab2441..9410af2067d2 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -6317,99 +6317,118 @@ const char *ice_stat_str(enum ice_status stat_err)
 }
 
 /**
- * ice_set_rss - Set RSS keys and lut
+ * ice_set_rss_lut - Set RSS LUT
  * @vsi: Pointer to VSI structure
- * @seed: RSS hash seed
  * @lut: Lookup table
  * @lut_size: Lookup table size
  *
  * Returns 0 on success, negative on failure
  */
-int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
+int ice_set_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size)
 {
-	struct ice_pf *pf = vsi->back;
-	struct ice_hw *hw = &pf->hw;
+	struct ice_aq_get_set_rss_lut_params params = {};
+	struct ice_hw *hw = &vsi->back->hw;
 	enum ice_status status;
-	struct device *dev;
 
-	dev = ice_pf_to_dev(pf);
-	if (seed) {
-		struct ice_aqc_get_set_rss_keys *buf =
-				  (struct ice_aqc_get_set_rss_keys *)seed;
+	if (!lut)
+		return -EINVAL;
 
-		status = ice_aq_set_rss_key(hw, vsi->idx, buf);
+	params.vsi_handle = vsi->idx;
+	params.lut_size = lut_size;
+	params.lut_type = vsi->rss_lut_type;
+	params.lut = lut;
 
-		if (status) {
-			dev_err(dev, "Cannot set RSS key, err %s aq_err %s\n",
-				ice_stat_str(status),
-				ice_aq_str(hw->adminq.sq_last_status));
-			return -EIO;
-		}
+	status = ice_aq_set_rss_lut(hw, &params);
+	if (status) {
+		dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS lut, err %s aq_err %s\n",
+			ice_stat_str(status),
+			ice_aq_str(hw->adminq.sq_last_status));
+		return -EIO;
 	}
 
-	if (lut) {
-		struct ice_aq_get_set_rss_lut_params set_params = {
-			.vsi_handle = vsi->idx, .lut_size = lut_size,
-			.lut_type = vsi->rss_lut_type, .lut = lut,
-			.global_lut_id = 0
-		};
+	return 0;
+}
 
-		status = ice_aq_set_rss_lut(hw, &set_params);
-		if (status) {
-			dev_err(dev, "Cannot set RSS lut, err %s aq_err %s\n",
-				ice_stat_str(status),
-				ice_aq_str(hw->adminq.sq_last_status));
-			return -EIO;
-		}
+/**
+ * ice_set_rss_key - Set RSS key
+ * @vsi: Pointer to the VSI structure
+ * @seed: RSS hash seed
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ice_set_rss_key(struct ice_vsi *vsi, u8 *seed)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	enum ice_status status;
+
+	if (!seed)
+		return -EINVAL;
+
+	status = ice_aq_set_rss_key(hw, vsi->idx, (struct ice_aqc_get_set_rss_keys *)seed);
+	if (status) {
+		dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS key, err %s aq_err %s\n",
+			ice_stat_str(status),
+			ice_aq_str(hw->adminq.sq_last_status));
+		return -EIO;
 	}
 
 	return 0;
 }
 
 /**
- * ice_get_rss - Get RSS keys and lut
+ * ice_get_rss_lut - Get RSS LUT
  * @vsi: Pointer to VSI structure
- * @seed: Buffer to store the keys
  * @lut: Buffer to store the lookup table entries
  * @lut_size: Size of buffer to store the lookup table entries
  *
  * Returns 0 on success, negative on failure
  */
-int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
+int ice_get_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size)
 {
-	struct ice_pf *pf = vsi->back;
-	struct ice_hw *hw = &pf->hw;
+	struct ice_aq_get_set_rss_lut_params params = {};
+	struct ice_hw *hw = &vsi->back->hw;
 	enum ice_status status;
-	struct device *dev;
 
-	dev = ice_pf_to_dev(pf);
-	if (seed) {
-		struct ice_aqc_get_set_rss_keys *buf =
-				  (struct ice_aqc_get_set_rss_keys *)seed;
+	if (!lut)
+		return -EINVAL;
 
-		status = ice_aq_get_rss_key(hw, vsi->idx, buf);
-		if (status) {
-			dev_err(dev, "Cannot get RSS key, err %s aq_err %s\n",
-				ice_stat_str(status),
-				ice_aq_str(hw->adminq.sq_last_status));
-			return -EIO;
-		}
+	params.vsi_handle = vsi->idx;
+	params.lut_size = lut_size;
+	params.lut_type = vsi->rss_lut_type;
+	params.lut = lut;
+
+	status = ice_aq_get_rss_lut(hw, &params);
+	if (status) {
+		dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS lut, err %s aq_err %s\n",
+			ice_stat_str(status),
+			ice_aq_str(hw->adminq.sq_last_status));
+		return -EIO;
 	}
 
-	if (lut) {
-		struct ice_aq_get_set_rss_lut_params get_params = {
-			.vsi_handle = vsi->idx, .lut_size = lut_size,
-			.lut_type = vsi->rss_lut_type, .lut = lut,
-			.global_lut_id = 0
-		};
+	return 0;
+}
 
-		status = ice_aq_get_rss_lut(hw, &get_params);
-		if (status) {
-			dev_err(dev, "Cannot get RSS lut, err %s aq_err %s\n",
-				ice_stat_str(status),
-				ice_aq_str(hw->adminq.sq_last_status));
-			return -EIO;
-		}
+/**
+ * ice_get_rss_key - Get RSS key
+ * @vsi: Pointer to VSI structure
+ * @seed: Buffer to store the key in
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ice_get_rss_key(struct ice_vsi *vsi, u8 *seed)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	enum ice_status status;
+
+	if (!seed)
+		return -EINVAL;
+
+	status = ice_aq_get_rss_key(hw, vsi->idx, (struct ice_aqc_get_set_rss_keys *)seed);
+	if (status) {
+		dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS key, err %s aq_err %s\n",
+			ice_stat_str(status),
+			ice_aq_str(hw->adminq.sq_last_status));
+		return -EIO;
 	}
 
 	return 0;
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 78679ece2e08..e68d52a6b11d 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -2233,7 +2233,7 @@ static int ice_vc_config_rss_key(struct ice_vf *vf, u8 *msg)
 		goto error_param;
 	}
 
-	if (ice_set_rss(vsi, vrk->key, NULL, 0))
+	if (ice_set_rss_key(vsi, vrk->key))
 		v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
 error_param:
 	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_KEY, v_ret,
@@ -2280,7 +2280,7 @@ static int ice_vc_config_rss_lut(struct ice_vf *vf, u8 *msg)
 		goto error_param;
 	}
 
-	if (ice_set_rss(vsi, NULL, vrl->lut, ICE_VSIQF_HLUT_ARRAY_SIZE))
+	if (ice_set_rss_lut(vsi, vrl->lut, ICE_VSIQF_HLUT_ARRAY_SIZE))
 		v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
 error_param:
 	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_LUT, v_ret,
-- 
cgit v1.2.3


From e97fb1aea9056299d013aa30783d6a995136a2c8 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Tue, 2 Mar 2021 10:15:37 -0800
Subject: ice: Consolidate VSI state and flags

struct ice_vsi has two fields, state and flags which seem to
be serving the same purpose. Consolidate them into one field
'state'.

enum ice_state is used to represent state information of the PF.
While some of these enum values can be use to represent VSI state,
it makes more sense to represent VSI state with its own enum. So
derive a new enum ice_vsi_state from ice_vsi_flags and ice_state
and use it. Also rename enum ice_state to ice_pf_state for clarity.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h         | 21 ++++++------
 drivers/net/ethernet/intel/ice/ice_ethtool.c |  2 +-
 drivers/net/ethernet/intel/ice/ice_lib.c     | 14 ++++----
 drivers/net/ethernet/intel/ice/ice_main.c    | 50 ++++++++++++++--------------
 drivers/net/ethernet/intel/ice/ice_txrx.c    |  6 ++--
 5 files changed, 47 insertions(+), 46 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 17705db51022..02badaaf818c 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -193,7 +193,7 @@ struct ice_sw {
 	u8 dflt_vsi_ena:1;	/* true if above dflt_vsi is enabled */
 };
 
-enum ice_state {
+enum ice_pf_state {
 	__ICE_TESTING,
 	__ICE_DOWN,
 	__ICE_NEEDS_RESTART,
@@ -236,12 +236,14 @@ enum ice_state {
 	__ICE_STATE_NBITS		/* must be last */
 };
 
-enum ice_vsi_flags {
-	ICE_VSI_FLAG_UMAC_FLTR_CHANGED,
-	ICE_VSI_FLAG_MMAC_FLTR_CHANGED,
-	ICE_VSI_FLAG_VLAN_FLTR_CHANGED,
-	ICE_VSI_FLAG_PROMISC_CHANGED,
-	ICE_VSI_FLAG_NBITS		/* must be last */
+enum ice_vsi_state {
+	ICE_VSI_DOWN,
+	ICE_VSI_NEEDS_RESTART,
+	ICE_VSI_UMAC_FLTR_CHANGED,
+	ICE_VSI_MMAC_FLTR_CHANGED,
+	ICE_VSI_VLAN_FLTR_CHANGED,
+	ICE_VSI_PROMISC_CHANGED,
+	ICE_VSI_STATE_NBITS		/* must be last */
 };
 
 /* struct that defines a VSI, associated with a dev */
@@ -257,8 +259,7 @@ struct ice_vsi {
 	irqreturn_t (*irq_handler)(int irq, void *data);
 
 	u64 tx_linearize;
-	DECLARE_BITMAP(state, __ICE_STATE_NBITS);
-	DECLARE_BITMAP(flags, ICE_VSI_FLAG_NBITS);
+	DECLARE_BITMAP(state, ICE_VSI_STATE_NBITS);
 	unsigned int current_netdev_flags;
 	u32 tx_restart;
 	u32 tx_busy;
@@ -504,7 +505,7 @@ ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi,
 	val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M |
 	      (itr << GLINT_DYN_CTL_ITR_INDX_S);
 	if (vsi)
-		if (test_bit(__ICE_DOWN, vsi->state))
+		if (test_bit(ICE_VSI_DOWN, vsi->state))
 			return;
 	wr32(hw, GLINT_DYN_CTL(vector), val);
 }
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 998629b9767f..15152e63f204 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -2886,7 +2886,7 @@ process_link:
 	/* Bring interface down, copy in the new ring info, then restore the
 	 * interface. if VSI is up, bring it down and then back up
 	 */
-	if (!test_and_set_bit(__ICE_DOWN, vsi->state)) {
+	if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) {
 		ice_down(vsi);
 
 		if (tx_rings) {
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index d3b99b1ea32a..6041ca2830de 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -422,7 +422,7 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, u16 vf_id)
 
 	vsi->type = vsi_type;
 	vsi->back = pf;
-	set_bit(__ICE_DOWN, vsi->state);
+	set_bit(ICE_VSI_DOWN, vsi->state);
 
 	if (vsi_type == ICE_VSI_VF)
 		ice_vsi_set_num_qs(vsi, vf_id);
@@ -2593,7 +2593,7 @@ void ice_vsi_free_rx_rings(struct ice_vsi *vsi)
  */
 void ice_vsi_close(struct ice_vsi *vsi)
 {
-	if (!test_and_set_bit(__ICE_DOWN, vsi->state))
+	if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state))
 		ice_down(vsi);
 
 	ice_vsi_free_irq(vsi);
@@ -2610,10 +2610,10 @@ int ice_ena_vsi(struct ice_vsi *vsi, bool locked)
 {
 	int err = 0;
 
-	if (!test_bit(__ICE_NEEDS_RESTART, vsi->state))
+	if (!test_bit(ICE_VSI_NEEDS_RESTART, vsi->state))
 		return 0;
 
-	clear_bit(__ICE_NEEDS_RESTART, vsi->state);
+	clear_bit(ICE_VSI_NEEDS_RESTART, vsi->state);
 
 	if (vsi->netdev && vsi->type == ICE_VSI_PF) {
 		if (netif_running(vsi->netdev)) {
@@ -2639,10 +2639,10 @@ int ice_ena_vsi(struct ice_vsi *vsi, bool locked)
  */
 void ice_dis_vsi(struct ice_vsi *vsi, bool locked)
 {
-	if (test_bit(__ICE_DOWN, vsi->state))
+	if (test_bit(ICE_VSI_DOWN, vsi->state))
 		return;
 
-	set_bit(__ICE_NEEDS_RESTART, vsi->state);
+	set_bit(ICE_VSI_NEEDS_RESTART, vsi->state);
 
 	if (vsi->type == ICE_VSI_PF && vsi->netdev) {
 		if (netif_running(vsi->netdev)) {
@@ -2812,7 +2812,7 @@ int ice_vsi_release(struct ice_vsi *vsi)
 	ice_vsi_free_q_vectors(vsi);
 
 	/* make sure unregister_netdev() was called by checking __ICE_DOWN */
-	if (vsi->netdev && test_bit(__ICE_DOWN, vsi->state)) {
+	if (vsi->netdev && test_bit(ICE_VSI_DOWN, vsi->state)) {
 		free_netdev(vsi->netdev);
 		vsi->netdev = NULL;
 	}
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 9410af2067d2..b3c1cadecf21 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -84,7 +84,7 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf)
 			break;
 		}
 
-	if (!vsi || test_bit(__ICE_DOWN, vsi->state))
+	if (!vsi || test_bit(ICE_VSI_DOWN, vsi->state))
 		return;
 
 	if (!(vsi->netdev && netif_carrier_ok(vsi->netdev)))
@@ -198,9 +198,9 @@ static int ice_add_mac_to_unsync_list(struct net_device *netdev, const u8 *addr)
  */
 static bool ice_vsi_fltr_changed(struct ice_vsi *vsi)
 {
-	return test_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags) ||
-	       test_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags) ||
-	       test_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags);
+	return test_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state) ||
+	       test_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state) ||
+	       test_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state);
 }
 
 /**
@@ -267,9 +267,9 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
 	INIT_LIST_HEAD(&vsi->tmp_unsync_list);
 
 	if (ice_vsi_fltr_changed(vsi)) {
-		clear_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags);
-		clear_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags);
-		clear_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags);
+		clear_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state);
+		clear_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
+		clear_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state);
 
 		/* grab the netdev's addr_list_lock */
 		netif_addr_lock_bh(netdev);
@@ -350,8 +350,8 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
 	}
 
 	if (((changed_flags & IFF_PROMISC) || promisc_forced_on) ||
-	    test_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags)) {
-		clear_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags);
+	    test_bit(ICE_VSI_PROMISC_CHANGED, vsi->state)) {
+		clear_bit(ICE_VSI_PROMISC_CHANGED, vsi->state);
 		if (vsi->current_netdev_flags & IFF_PROMISC) {
 			/* Apply Rx filter rule to get traffic from wire */
 			if (!ice_is_dflt_vsi_in_use(pf->first_sw)) {
@@ -384,12 +384,12 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
 	goto exit;
 
 out_promisc:
-	set_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags);
+	set_bit(ICE_VSI_PROMISC_CHANGED, vsi->state);
 	goto exit;
 out:
 	/* if something went wrong then set the changed flag so we try again */
-	set_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags);
-	set_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags);
+	set_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state);
+	set_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
 exit:
 	clear_bit(__ICE_CFG_BUSY, vsi->state);
 	return err;
@@ -753,7 +753,7 @@ static void ice_vsi_link_event(struct ice_vsi *vsi, bool link_up)
 	if (!vsi)
 		return;
 
-	if (test_bit(__ICE_DOWN, vsi->state) || !vsi->netdev)
+	if (test_bit(ICE_VSI_DOWN, vsi->state) || !vsi->netdev)
 		return;
 
 	if (vsi->type == ICE_VSI_PF) {
@@ -2009,7 +2009,7 @@ static void ice_check_media_subtask(struct ice_pf *pf)
 		/* PHY settings are reset on media insertion, reconfigure
 		 * PHY to preserve settings.
 		 */
-		if (test_bit(__ICE_DOWN, vsi->state) &&
+		if (test_bit(ICE_VSI_DOWN, vsi->state) &&
 		    test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags))
 			return;
 
@@ -2520,7 +2520,7 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
 	}
 
 	/* need to stop netdev while setting up the program for Rx rings */
-	if (if_running && !test_and_set_bit(__ICE_DOWN, vsi->state)) {
+	if (if_running && !test_and_set_bit(ICE_VSI_DOWN, vsi->state)) {
 		ret = ice_down(vsi);
 		if (ret) {
 			NL_SET_ERR_MSG_MOD(extack, "Preparing device for XDP attach failed");
@@ -3103,7 +3103,7 @@ ice_vlan_rx_add_vid(struct net_device *netdev, __always_unused __be16 proto,
 	 */
 	ret = ice_vsi_add_vlan(vsi, vid, ICE_FWD_TO_VSI);
 	if (!ret)
-		set_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags);
+		set_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state);
 
 	return ret;
 }
@@ -3142,7 +3142,7 @@ ice_vlan_rx_kill_vid(struct net_device *netdev, __always_unused __be16 proto,
 	if (vsi->num_vlan == 1 && ice_vsi_is_vlan_pruning_ena(vsi))
 		ret = ice_cfg_vlan_pruning(vsi, false, false);
 
-	set_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags);
+	set_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state);
 	return ret;
 }
 
@@ -4973,8 +4973,8 @@ static void ice_set_rx_mode(struct net_device *netdev)
 	 * ndo_set_rx_mode may be triggered even without a change in netdev
 	 * flags
 	 */
-	set_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags);
-	set_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags);
+	set_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state);
+	set_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
 	set_bit(ICE_FLAG_FLTR_SYNC, vsi->back->flags);
 
 	/* schedule our worker thread which will take care of
@@ -5247,7 +5247,7 @@ static int ice_up_complete(struct ice_vsi *vsi)
 	if (err)
 		return err;
 
-	clear_bit(__ICE_DOWN, vsi->state);
+	clear_bit(ICE_VSI_DOWN, vsi->state);
 	ice_napi_enable_all(vsi);
 	ice_vsi_ena_irq(vsi);
 
@@ -5390,7 +5390,7 @@ void ice_update_vsi_stats(struct ice_vsi *vsi)
 	struct ice_eth_stats *cur_es = &vsi->eth_stats;
 	struct ice_pf *pf = vsi->back;
 
-	if (test_bit(__ICE_DOWN, vsi->state) ||
+	if (test_bit(ICE_VSI_DOWN, vsi->state) ||
 	    test_bit(__ICE_CFG_BUSY, pf->state))
 		return;
 
@@ -5595,7 +5595,7 @@ void ice_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats)
 	 * But, only call the update routine and read the registers if VSI is
 	 * not down.
 	 */
-	if (!test_bit(__ICE_DOWN, vsi->state))
+	if (!test_bit(ICE_VSI_DOWN, vsi->state))
 		ice_update_vsi_ring_stats(vsi);
 	stats->tx_packets = vsi_stats->tx_packets;
 	stats->tx_bytes = vsi_stats->tx_bytes;
@@ -5795,7 +5795,7 @@ int ice_vsi_open_ctrl(struct ice_vsi *vsi)
 	if (err)
 		goto err_up_complete;
 
-	clear_bit(__ICE_DOWN, vsi->state);
+	clear_bit(ICE_VSI_DOWN, vsi->state);
 	ice_vsi_ena_irq(vsi);
 
 	return 0;
@@ -6182,7 +6182,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
 	netdev->mtu = (unsigned int)new_mtu;
 
 	/* if VSI is up, bring it down and then back up */
-	if (!test_and_set_bit(__ICE_DOWN, vsi->state)) {
+	if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) {
 		int err;
 
 		err = ice_down(vsi);
@@ -6651,7 +6651,7 @@ static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 	default:
 		netdev_err(netdev, "tx_timeout recovery unsuccessful, device is in unrecoverable state.\n");
 		set_bit(__ICE_DOWN, pf->state);
-		set_bit(__ICE_NEEDS_RESTART, vsi->state);
+		set_bit(ICE_VSI_NEEDS_RESTART, vsi->state);
 		set_bit(__ICE_SERVICE_DIS, pf->state);
 		break;
 	}
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index c71f2fbbb262..dfdf2c1fa9d3 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -309,7 +309,7 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget)
 		smp_mb();
 		if (__netif_subqueue_stopped(tx_ring->netdev,
 					     tx_ring->q_index) &&
-		    !test_bit(__ICE_DOWN, vsi->state)) {
+		    !test_bit(ICE_VSI_DOWN, vsi->state)) {
 			netif_wake_subqueue(tx_ring->netdev,
 					    tx_ring->q_index);
 			++tx_ring->tx_stats.restart_q;
@@ -569,7 +569,7 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
 	struct ice_ring *xdp_ring;
 	int nxmit = 0, i;
 
-	if (test_bit(__ICE_DOWN, vsi->state))
+	if (test_bit(ICE_VSI_DOWN, vsi->state))
 		return -ENETDOWN;
 
 	if (!ice_is_xdp_ena_vsi(vsi) || queue_index >= vsi->num_xdp_txq)
@@ -1520,7 +1520,7 @@ static void ice_update_ena_itr(struct ice_q_vector *q_vector)
 			q_vector->itr_countdown--;
 	}
 
-	if (!test_bit(__ICE_DOWN, vsi->state))
+	if (!test_bit(ICE_VSI_DOWN, vsi->state))
 		wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val);
 }
 
-- 
cgit v1.2.3


From 0c3e94c247938b63218e661fcd1a935edb0db215 Mon Sep 17 00:00:00 2001
From: Bruce Allan <bruce.w.allan@intel.com>
Date: Tue, 2 Mar 2021 10:15:42 -0800
Subject: ice: cleanup style issues

A few style issues reported by checkpatch have snuck into the code; resolve
the style issues.

COMPLEX_MACRO: Macros with complex values should be enclosed in parentheses

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_controlq.h  | 4 ++--
 drivers/net/ethernet/intel/ice/ice_flex_type.h | 4 ++--
 drivers/net/ethernet/intel/ice/ice_flow.c      | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h
index faaa08e8171b..7d0905f25ddc 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.h
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.h
@@ -14,8 +14,8 @@
 	(&(((struct ice_aq_desc *)((R).desc_buf.va))[i]))
 
 #define ICE_CTL_Q_DESC_UNUSED(R) \
-	(u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
-	      (R)->next_to_clean - (R)->next_to_use - 1)
+	((u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
+	       (R)->next_to_clean - (R)->next_to_use - 1))
 
 /* Defines that help manage the driver vs FW API checks.
  * Take a look at ice_aq_ver_check in ice_controlq.c for actual usage.
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_type.h b/drivers/net/ethernet/intel/ice/ice_flex_type.h
index 9806183920de..7d8b517a63c9 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_flex_type.h
@@ -497,8 +497,8 @@ struct ice_xlt1 {
 #define ICE_PF_NUM_S	13
 #define ICE_PF_NUM_M	(0x07 << ICE_PF_NUM_S)
 #define ICE_VSIG_VALUE(vsig, pf_id) \
-	(u16)((((u16)(vsig)) & ICE_VSIG_IDX_M) | \
-	      (((u16)(pf_id) << ICE_PF_NUM_S) & ICE_PF_NUM_M))
+	((u16)((((u16)(vsig)) & ICE_VSIG_IDX_M) | \
+	       (((u16)(pf_id) << ICE_PF_NUM_S) & ICE_PF_NUM_M)))
 #define ICE_DEFAULT_VSIG	0
 
 /* XLT2 Table */
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c
index 571245799646..4d59eb96383b 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.c
+++ b/drivers/net/ethernet/intel/ice/ice_flow.c
@@ -2008,9 +2008,9 @@ ice_add_rss_list(struct ice_hw *hw, u16 vsi_handle, struct ice_flow_prof *prof)
  * [63] - Encapsulation flag, 0 if non-tunneled, 1 if tunneled
  */
 #define ICE_FLOW_GEN_PROFID(hash, hdr, segs_cnt) \
-	(u64)(((u64)(hash) & ICE_FLOW_PROF_HASH_M) | \
-	      (((u64)(hdr) << ICE_FLOW_PROF_HDR_S) & ICE_FLOW_PROF_HDR_M) | \
-	      ((u8)((segs_cnt) - 1) ? ICE_FLOW_PROF_ENCAP_M : 0))
+	((u64)(((u64)(hash) & ICE_FLOW_PROF_HASH_M) | \
+	       (((u64)(hdr) << ICE_FLOW_PROF_HDR_S) & ICE_FLOW_PROF_HDR_M) | \
+	       ((u8)((segs_cnt) - 1) ? ICE_FLOW_PROF_ENCAP_M : 0)))
 
 /**
  * ice_add_rss_cfg_sync - add an RSS configuration
-- 
cgit v1.2.3


From a07cc1786dab69b896af226cf58163237837ee72 Mon Sep 17 00:00:00 2001
From: Tony Nguyen <anthony.l.nguyen@intel.com>
Date: Tue, 2 Mar 2021 10:15:43 -0800
Subject: ice: Correct comment block style

The following is reported by checkpatch, correct it.

-----------------------------------------------
drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
-----------------------------------------------
WARNING:NETWORKING_BLOCK_COMMENT_STYLE: networking block comments don't use an empty /* line, use /* Comment...
FILE: drivers/net/ethernet/intel/ice/ice_adminq_cmd.h:1428:
+/*
+ * Send to PF command (indirect 0x0801) ID is only used by PF

Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_adminq_cmd.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 5108046476f1..b9491ef5f21c 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -1407,8 +1407,7 @@ struct ice_aqc_nvm_comp_tbl {
 	u8 cvs[]; /* Component Version String */
 } __packed;
 
-/*
- * Send to PF command (indirect 0x0801) ID is only used by PF
+/* Send to PF command (indirect 0x0801) ID is only used by PF
  *
  * Send to VF command (indirect 0x0802) ID is only used by PF
  *
-- 
cgit v1.2.3


From 53f7c5e1406110b9b8da4b7e2c66023a16bb8714 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Wed, 31 Mar 2021 10:35:53 +0800
Subject: net: ethernet: stmicro: Remove duplicate struct declaration

struct stmmac_safety_stats is declared twice. One has been
declared at 29th line. Remove the duplicate.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/hwif.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index c9cf89e21a41..2b5022ef1e52 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -280,7 +280,6 @@ struct stmmac_dma_ops {
 struct mac_device_info;
 struct net_device;
 struct rgmii_adv;
-struct stmmac_safety_stats;
 struct stmmac_tc_entry;
 struct stmmac_pps_cfg;
 struct stmmac_rss;
-- 
cgit v1.2.3


From 3cbf7530a163d048a6376cd22fecb9cdcb23b192 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 31 Mar 2021 05:36:42 +0100
Subject: qrtr: Convert qrtr_ports from IDR to XArray

The XArray interface is easier for this driver to use.  Also fixes a
bug reported by the improper use of GFP_ATOMIC.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/qrtr/qrtr.c | 42 ++++++++++++++----------------------------
 1 file changed, 14 insertions(+), 28 deletions(-)

diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index dfc820ee553a..4b46c69e14ab 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -20,6 +20,8 @@
 /* auto-bind range */
 #define QRTR_MIN_EPH_SOCKET 0x4000
 #define QRTR_MAX_EPH_SOCKET 0x7fff
+#define QRTR_EPH_PORT_RANGE \
+		XA_LIMIT(QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET)
 
 /**
  * struct qrtr_hdr_v1 - (I|R)PCrouter packet header version 1
@@ -106,8 +108,7 @@ static LIST_HEAD(qrtr_all_nodes);
 static DEFINE_MUTEX(qrtr_node_lock);
 
 /* local port allocation management */
-static DEFINE_IDR(qrtr_ports);
-static DEFINE_MUTEX(qrtr_port_lock);
+static DEFINE_XARRAY_ALLOC(qrtr_ports);
 
 /**
  * struct qrtr_node - endpoint node
@@ -653,7 +654,7 @@ static struct qrtr_sock *qrtr_port_lookup(int port)
 		port = 0;
 
 	rcu_read_lock();
-	ipc = idr_find(&qrtr_ports, port);
+	ipc = xa_load(&qrtr_ports, port);
 	if (ipc)
 		sock_hold(&ipc->sk);
 	rcu_read_unlock();
@@ -695,9 +696,7 @@ static void qrtr_port_remove(struct qrtr_sock *ipc)
 
 	__sock_put(&ipc->sk);
 
-	mutex_lock(&qrtr_port_lock);
-	idr_remove(&qrtr_ports, port);
-	mutex_unlock(&qrtr_port_lock);
+	xa_erase(&qrtr_ports, port);
 
 	/* Ensure that if qrtr_port_lookup() did enter the RCU read section we
 	 * wait for it to up increment the refcount */
@@ -716,29 +715,20 @@ static void qrtr_port_remove(struct qrtr_sock *ipc)
  */
 static int qrtr_port_assign(struct qrtr_sock *ipc, int *port)
 {
-	u32 min_port;
 	int rc;
 
-	mutex_lock(&qrtr_port_lock);
 	if (!*port) {
-		min_port = QRTR_MIN_EPH_SOCKET;
-		rc = idr_alloc_u32(&qrtr_ports, ipc, &min_port, QRTR_MAX_EPH_SOCKET, GFP_ATOMIC);
-		if (!rc)
-			*port = min_port;
+		rc = xa_alloc(&qrtr_ports, port, ipc, QRTR_EPH_PORT_RANGE,
+				GFP_KERNEL);
 	} else if (*port < QRTR_MIN_EPH_SOCKET && !capable(CAP_NET_ADMIN)) {
 		rc = -EACCES;
 	} else if (*port == QRTR_PORT_CTRL) {
-		min_port = 0;
-		rc = idr_alloc_u32(&qrtr_ports, ipc, &min_port, 0, GFP_ATOMIC);
+		rc = xa_insert(&qrtr_ports, 0, ipc, GFP_KERNEL);
 	} else {
-		min_port = *port;
-		rc = idr_alloc_u32(&qrtr_ports, ipc, &min_port, *port, GFP_ATOMIC);
-		if (!rc)
-			*port = min_port;
+		rc = xa_insert(&qrtr_ports, *port, ipc, GFP_KERNEL);
 	}
-	mutex_unlock(&qrtr_port_lock);
 
-	if (rc == -ENOSPC)
+	if (rc == -EBUSY)
 		return -EADDRINUSE;
 	else if (rc < 0)
 		return rc;
@@ -752,20 +742,16 @@ static int qrtr_port_assign(struct qrtr_sock *ipc, int *port)
 static void qrtr_reset_ports(void)
 {
 	struct qrtr_sock *ipc;
-	int id;
-
-	mutex_lock(&qrtr_port_lock);
-	idr_for_each_entry(&qrtr_ports, ipc, id) {
-		/* Don't reset control port */
-		if (id == 0)
-			continue;
+	unsigned long index;
 
+	rcu_read_lock();
+	xa_for_each_start(&qrtr_ports, index, ipc, 1) {
 		sock_hold(&ipc->sk);
 		ipc->sk.sk_err = ENETRESET;
 		ipc->sk.sk_error_report(&ipc->sk);
 		sock_put(&ipc->sk);
 	}
-	mutex_unlock(&qrtr_port_lock);
+	rcu_read_unlock();
 }
 
 /* Bind socket to address.
-- 
cgit v1.2.3


From b788ff0a7d7dc04da4c938d56cbe96c7fa261983 Mon Sep 17 00:00:00 2001
From: Yixing Liu <liuyixing1@huawei.com>
Date: Wed, 31 Mar 2021 16:18:28 +0800
Subject: net: ena: fix inaccurate print type

Use "%u" to replace "hu%".

Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_com.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 02087d443e73..764852ead1d6 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -863,7 +863,7 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset)
 
 	if (unlikely(i == timeout)) {
 		netdev_err(ena_dev->net_device,
-			   "Reading reg failed for timeout. expected: req id[%hu] offset[%hu] actual: req id[%hu] offset[%hu]\n",
+			   "Reading reg failed for timeout. expected: req id[%u] offset[%u] actual: req id[%u] offset[%u]\n",
 			   mmio_read->seq_num, offset, read_resp->req_id,
 			   read_resp->reg_off);
 		ret = ENA_MMIO_READ_TIMEOUT;
@@ -2396,7 +2396,7 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev,
 		if (key) {
 			if (key_len != sizeof(hash_key->key)) {
 				netdev_err(ena_dev->net_device,
-					   "key len (%hu) doesn't equal the supported size (%zu)\n",
+					   "key len (%u) doesn't equal the supported size (%zu)\n",
 					   key_len, sizeof(hash_key->key));
 				return -EINVAL;
 			}
-- 
cgit v1.2.3


From e355fa6a3f405d3a3a1d86f2e2e332fb3d4e05d8 Mon Sep 17 00:00:00 2001
From: Yixing Liu <liuyixing1@huawei.com>
Date: Wed, 31 Mar 2021 16:18:29 +0800
Subject: net: ena: remove extra words from comments

Remove the redundant "for" from the commment.

Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 5c062c51b4cb..881f88754bf6 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -3975,7 +3975,7 @@ static u32 ena_calc_max_io_queue_num(struct pci_dev *pdev,
 	max_num_io_queues = min_t(u32, max_num_io_queues, io_rx_num);
 	max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_sq_num);
 	max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_cq_num);
-	/* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */
+	/* 1 IRQ for mgmnt and 1 IRQs for each IO direction */
 	max_num_io_queues = min_t(u32, max_num_io_queues, pci_msix_vec_count(pdev) - 1);
 	if (unlikely(!max_num_io_queues)) {
 		dev_err(&pdev->dev, "The device doesn't have io queues\n");
-- 
cgit v1.2.3


From ca3fc0aa08370260a1180ac4366cf58fbefc841c Mon Sep 17 00:00:00 2001
From: Yixing Liu <liuyixing1@huawei.com>
Date: Wed, 31 Mar 2021 16:18:30 +0800
Subject: net: amd8111e: fix inappropriate spaces

Delete unncecessary spaces and add some reasonable spaces according to the
coding-style of kernel.

Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/amd8111e.c | 362 ++++++++++++++++++------------------
 1 file changed, 177 insertions(+), 185 deletions(-)

diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c
index 960d483e8997..4a1220cc6f10 100644
--- a/drivers/net/ethernet/amd/amd8111e.c
+++ b/drivers/net/ethernet/amd/amd8111e.c
@@ -100,19 +100,19 @@ static int amd8111e_read_phy(struct amd8111e_priv *lp,
 {
 	void __iomem *mmio = lp->mmio;
 	unsigned int reg_val;
-	unsigned int repeat= REPEAT_CNT;
+	unsigned int repeat = REPEAT_CNT;
 
 	reg_val = readl(mmio + PHY_ACCESS);
 	while (reg_val & PHY_CMD_ACTIVE)
-		reg_val = readl( mmio + PHY_ACCESS );
+		reg_val = readl(mmio + PHY_ACCESS);
 
-	writel( PHY_RD_CMD | ((phy_id & 0x1f) << 21) |
-			   ((reg & 0x1f) << 16),  mmio +PHY_ACCESS);
-	do{
+	writel(PHY_RD_CMD | ((phy_id & 0x1f) << 21) |
+			   ((reg & 0x1f) << 16), mmio + PHY_ACCESS);
+	do {
 		reg_val = readl(mmio + PHY_ACCESS);
 		udelay(30);  /* It takes 30 us to read/write data */
 	} while (--repeat && (reg_val & PHY_CMD_ACTIVE));
-	if(reg_val & PHY_RD_ERR)
+	if (reg_val & PHY_RD_ERR)
 		goto err_phy_read;
 
 	*val = reg_val & 0xffff;
@@ -133,17 +133,17 @@ static int amd8111e_write_phy(struct amd8111e_priv *lp,
 
 	reg_val = readl(mmio + PHY_ACCESS);
 	while (reg_val & PHY_CMD_ACTIVE)
-		reg_val = readl( mmio + PHY_ACCESS );
+		reg_val = readl(mmio + PHY_ACCESS);
 
-	writel( PHY_WR_CMD | ((phy_id & 0x1f) << 21) |
+	writel(PHY_WR_CMD | ((phy_id & 0x1f) << 21) |
 			   ((reg & 0x1f) << 16)|val, mmio + PHY_ACCESS);
 
-	do{
+	do {
 		reg_val = readl(mmio + PHY_ACCESS);
 		udelay(30);  /* It takes 30 us to read/write the data */
 	} while (--repeat && (reg_val & PHY_CMD_ACTIVE));
 
-	if(reg_val & PHY_RD_ERR)
+	if (reg_val & PHY_RD_ERR)
 		goto err_phy_write;
 
 	return 0;
@@ -159,7 +159,7 @@ static int amd8111e_mdio_read(struct net_device *dev, int phy_id, int reg_num)
 	struct amd8111e_priv *lp = netdev_priv(dev);
 	unsigned int reg_val;
 
-	amd8111e_read_phy(lp,phy_id,reg_num,&reg_val);
+	amd8111e_read_phy(lp, phy_id, reg_num, &reg_val);
 	return reg_val;
 
 }
@@ -179,17 +179,17 @@ static void amd8111e_mdio_write(struct net_device *dev,
 static void amd8111e_set_ext_phy(struct net_device *dev)
 {
 	struct amd8111e_priv *lp = netdev_priv(dev);
-	u32 bmcr,advert,tmp;
+	u32 bmcr, advert, tmp;
 
 	/* Determine mii register values to set the speed */
 	advert = amd8111e_mdio_read(dev, lp->ext_phy_addr, MII_ADVERTISE);
 	tmp = advert & ~(ADVERTISE_ALL | ADVERTISE_100BASE4);
-	switch (lp->ext_phy_option){
+	switch (lp->ext_phy_option) {
 
 		default:
 		case SPEED_AUTONEG: /* advertise all values */
-			tmp |= ( ADVERTISE_10HALF|ADVERTISE_10FULL|
-				ADVERTISE_100HALF|ADVERTISE_100FULL) ;
+			tmp |= (ADVERTISE_10HALF | ADVERTISE_10FULL |
+				ADVERTISE_100HALF | ADVERTISE_100FULL);
 			break;
 		case SPEED10_HALF:
 			tmp |= ADVERTISE_10HALF;
@@ -224,20 +224,20 @@ static int amd8111e_free_skbs(struct net_device *dev)
 	int i;
 
 	/* Freeing transmit skbs */
-	for(i = 0; i < NUM_TX_BUFFERS; i++){
-		if(lp->tx_skbuff[i]){
+	for (i = 0; i < NUM_TX_BUFFERS; i++) {
+		if (lp->tx_skbuff[i]) {
 			dma_unmap_single(&lp->pci_dev->dev,
 					 lp->tx_dma_addr[i],
 					 lp->tx_skbuff[i]->len, DMA_TO_DEVICE);
-			dev_kfree_skb (lp->tx_skbuff[i]);
+			dev_kfree_skb(lp->tx_skbuff[i]);
 			lp->tx_skbuff[i] = NULL;
 			lp->tx_dma_addr[i] = 0;
 		}
 	}
 	/* Freeing previously allocated receive buffers */
-	for (i = 0; i < NUM_RX_BUFFERS; i++){
+	for (i = 0; i < NUM_RX_BUFFERS; i++) {
 		rx_skbuff = lp->rx_skbuff[i];
-		if(rx_skbuff != NULL){
+		if (rx_skbuff != NULL) {
 			dma_unmap_single(&lp->pci_dev->dev,
 					 lp->rx_dma_addr[i],
 					 lp->rx_buff_len - 2, DMA_FROM_DEVICE);
@@ -258,13 +258,13 @@ static inline void amd8111e_set_rx_buff_len(struct net_device *dev)
 	struct amd8111e_priv *lp = netdev_priv(dev);
 	unsigned int mtu = dev->mtu;
 
-	if (mtu > ETH_DATA_LEN){
+	if (mtu > ETH_DATA_LEN) {
 		/* MTU + ethernet header + FCS
 		 * + optional VLAN tag + skb reserve space 2
 		 */
 		lp->rx_buff_len = mtu + ETH_HLEN + 10;
 		lp->options |= OPTION_JUMBO_ENABLE;
-	} else{
+	} else {
 		lp->rx_buff_len = PKT_BUFF_SZ;
 		lp->options &= ~OPTION_JUMBO_ENABLE;
 	}
@@ -285,11 +285,11 @@ static int amd8111e_init_ring(struct net_device *dev)
 	lp->tx_ring_idx = 0;
 
 
-	if(lp->opened)
+	if (lp->opened)
 		/* Free previously allocated transmit and receive skbs */
 		amd8111e_free_skbs(dev);
 
-	else{
+	else {
 		/* allocate the tx and rx descriptors */
 		lp->tx_ring = dma_alloc_coherent(&lp->pci_dev->dev,
 			sizeof(struct amd8111e_tx_dr) * NUM_TX_RING_DR,
@@ -312,12 +312,12 @@ static int amd8111e_init_ring(struct net_device *dev)
 
 		lp->rx_skbuff[i] = netdev_alloc_skb(dev, lp->rx_buff_len);
 		if (!lp->rx_skbuff[i]) {
-				/* Release previos allocated skbs */
-				for(--i; i >= 0 ;i--)
-					dev_kfree_skb(lp->rx_skbuff[i]);
-				goto err_free_rx_ring;
+			/* Release previos allocated skbs */
+			for (--i; i >= 0; i--)
+				dev_kfree_skb(lp->rx_skbuff[i]);
+			goto err_free_rx_ring;
 		}
-		skb_reserve(lp->rx_skbuff[i],2);
+		skb_reserve(lp->rx_skbuff[i], 2);
 	}
         /* Initilaizing receive descriptors */
 	for (i = 0; i < NUM_RX_BUFFERS; i++) {
@@ -375,40 +375,40 @@ static int amd8111e_set_coalesce(struct net_device *dev, enum coal_mode cmod)
 		case RX_INTR_COAL :
 			timeout = coal_conf->rx_timeout;
 			event_count = coal_conf->rx_event_count;
-			if( timeout > MAX_TIMEOUT ||
-					event_count > MAX_EVENT_COUNT )
+			if (timeout > MAX_TIMEOUT ||
+			    event_count > MAX_EVENT_COUNT)
 				return -EINVAL;
 
 			timeout = timeout * DELAY_TIMER_CONV;
 			writel(VAL0|STINTEN, mmio+INTEN0);
-			writel((u32)DLY_INT_A_R0|( event_count<< 16 )|timeout,
-							mmio+DLY_INT_A);
+			writel((u32)DLY_INT_A_R0 | (event_count << 16) |
+				timeout, mmio + DLY_INT_A);
 			break;
 
-		case TX_INTR_COAL :
+		case TX_INTR_COAL:
 			timeout = coal_conf->tx_timeout;
 			event_count = coal_conf->tx_event_count;
-			if( timeout > MAX_TIMEOUT ||
-					event_count > MAX_EVENT_COUNT )
+			if (timeout > MAX_TIMEOUT ||
+			    event_count > MAX_EVENT_COUNT)
 				return -EINVAL;
 
 
 			timeout = timeout * DELAY_TIMER_CONV;
-			writel(VAL0|STINTEN,mmio+INTEN0);
-			writel((u32)DLY_INT_B_T0|( event_count<< 16 )|timeout,
-							 mmio+DLY_INT_B);
+			writel(VAL0 | STINTEN, mmio + INTEN0);
+			writel((u32)DLY_INT_B_T0 | (event_count << 16) |
+				timeout, mmio + DLY_INT_B);
 			break;
 
 		case DISABLE_COAL:
-			writel(0,mmio+STVAL);
-			writel(STINTEN, mmio+INTEN0);
-			writel(0, mmio +DLY_INT_B);
-			writel(0, mmio+DLY_INT_A);
+			writel(0, mmio + STVAL);
+			writel(STINTEN, mmio + INTEN0);
+			writel(0, mmio + DLY_INT_B);
+			writel(0, mmio + DLY_INT_A);
 			break;
 		 case ENABLE_COAL:
 		       /* Start the timer */
-			writel((u32)SOFT_TIMER_FREQ, mmio+STVAL); /*  0.5 sec */
-			writel(VAL0|STINTEN, mmio+INTEN0);
+			writel((u32)SOFT_TIMER_FREQ, mmio + STVAL); /* 0.5 sec */
+			writel(VAL0 | STINTEN, mmio + INTEN0);
 			break;
 		default:
 			break;
@@ -423,67 +423,67 @@ static int amd8111e_restart(struct net_device *dev)
 {
 	struct amd8111e_priv *lp = netdev_priv(dev);
 	void __iomem *mmio = lp->mmio;
-	int i,reg_val;
+	int i, reg_val;
 
 	/* stop the chip */
 	writel(RUN, mmio + CMD0);
 
-	if(amd8111e_init_ring(dev))
+	if (amd8111e_init_ring(dev))
 		return -ENOMEM;
 
 	/* enable the port manager and set auto negotiation always */
-	writel((u32) VAL1|EN_PMGR, mmio + CMD3 );
-	writel((u32)XPHYANE|XPHYRST , mmio + CTRL2);
+	writel((u32)VAL1 | EN_PMGR, mmio + CMD3);
+	writel((u32)XPHYANE | XPHYRST, mmio + CTRL2);
 
 	amd8111e_set_ext_phy(dev);
 
 	/* set control registers */
 	reg_val = readl(mmio + CTRL1);
 	reg_val &= ~XMTSP_MASK;
-	writel( reg_val| XMTSP_128 | CACHE_ALIGN, mmio + CTRL1 );
+	writel(reg_val | XMTSP_128 | CACHE_ALIGN, mmio + CTRL1);
 
 	/* enable interrupt */
-	writel( APINT5EN | APINT4EN | APINT3EN | APINT2EN | APINT1EN |
+	writel(APINT5EN | APINT4EN | APINT3EN | APINT2EN | APINT1EN |
 		APINT0EN | MIIPDTINTEN | MCCIINTEN | MCCINTEN | MREINTEN |
 		SPNDINTEN | MPINTEN | SINTEN | STINTEN, mmio + INTEN0);
 
 	writel(VAL3 | LCINTEN | VAL1 | TINTEN0 | VAL0 | RINTEN0, mmio + INTEN0);
 
 	/* initialize tx and rx ring base addresses */
-	writel((u32)lp->tx_ring_dma_addr,mmio + XMT_RING_BASE_ADDR0);
-	writel((u32)lp->rx_ring_dma_addr,mmio+ RCV_RING_BASE_ADDR0);
+	writel((u32)lp->tx_ring_dma_addr, mmio + XMT_RING_BASE_ADDR0);
+	writel((u32)lp->rx_ring_dma_addr, mmio + RCV_RING_BASE_ADDR0);
 
 	writew((u32)NUM_TX_RING_DR, mmio + XMT_RING_LEN0);
 	writew((u16)NUM_RX_RING_DR, mmio + RCV_RING_LEN0);
 
 	/* set default IPG to 96 */
-	writew((u32)DEFAULT_IPG,mmio+IPG);
+	writew((u32)DEFAULT_IPG, mmio + IPG);
 	writew((u32)(DEFAULT_IPG-IFS1_DELTA), mmio + IFS1);
 
-	if(lp->options & OPTION_JUMBO_ENABLE){
+	if (lp->options & OPTION_JUMBO_ENABLE) {
 		writel((u32)VAL2|JUMBO, mmio + CMD3);
 		/* Reset REX_UFLO */
-		writel( REX_UFLO, mmio + CMD2);
+		writel(REX_UFLO, mmio + CMD2);
 		/* Should not set REX_UFLO for jumbo frames */
-		writel( VAL0 | APAD_XMT|REX_RTRY , mmio + CMD2);
-	}else{
-		writel( VAL0 | APAD_XMT | REX_RTRY|REX_UFLO, mmio + CMD2);
+		writel(VAL0 | APAD_XMT | REX_RTRY, mmio + CMD2);
+	} else {
+		writel(VAL0 | APAD_XMT | REX_RTRY | REX_UFLO, mmio + CMD2);
 		writel((u32)JUMBO, mmio + CMD3);
 	}
 
 #if AMD8111E_VLAN_TAG_USED
-	writel((u32) VAL2|VSIZE|VL_TAG_DEL, mmio + CMD3);
+	writel((u32)VAL2 | VSIZE | VL_TAG_DEL, mmio + CMD3);
 #endif
-	writel( VAL0 | APAD_XMT | REX_RTRY, mmio + CMD2 );
+	writel(VAL0 | APAD_XMT | REX_RTRY, mmio + CMD2);
 
 	/* Setting the MAC address to the device */
 	for (i = 0; i < ETH_ALEN; i++)
-		writeb( dev->dev_addr[i], mmio + PADR + i );
+		writeb(dev->dev_addr[i], mmio + PADR + i);
 
 	/* Enable interrupt coalesce */
-	if(lp->options & OPTION_INTR_COAL_ENABLE){
+	if (lp->options & OPTION_INTR_COAL_ENABLE) {
 		netdev_info(dev, "Interrupt Coalescing Enabled.\n");
-		amd8111e_set_coalesce(dev,ENABLE_COAL);
+		amd8111e_set_coalesce(dev, ENABLE_COAL);
 	}
 
 	/* set RUN bit to start the chip */
@@ -499,11 +499,11 @@ static int amd8111e_restart(struct net_device *dev)
 static void amd8111e_init_hw_default(struct amd8111e_priv *lp)
 {
 	unsigned int reg_val;
-	unsigned int logic_filter[2] ={0,};
+	unsigned int logic_filter[2] = {0,};
 	void __iomem *mmio = lp->mmio;
 
 
-        /* stop the chip */
+	/* stop the chip */
 	writel(RUN, mmio + CMD0);
 
 	/* AUTOPOLL0 Register *//*TBD default value is 8100 in FPS */
@@ -519,13 +519,13 @@ static void amd8111e_init_hw_default(struct amd8111e_priv *lp)
 	writel(0, mmio + XMT_RING_BASE_ADDR3);
 
 	/* Clear CMD0  */
-	writel(CMD0_CLEAR,mmio + CMD0);
+	writel(CMD0_CLEAR, mmio + CMD0);
 
 	/* Clear CMD2 */
-	writel(CMD2_CLEAR, mmio +CMD2);
+	writel(CMD2_CLEAR, mmio + CMD2);
 
 	/* Clear CMD7 */
-	writel(CMD7_CLEAR , mmio + CMD7);
+	writel(CMD7_CLEAR, mmio + CMD7);
 
 	/* Clear DLY_INT_A and DLY_INT_B */
 	writel(0x0, mmio + DLY_INT_A);
@@ -542,16 +542,16 @@ static void amd8111e_init_hw_default(struct amd8111e_priv *lp)
 	writel(0x0, mmio + STVAL);
 
 	/* Clear INTEN0 */
-	writel( INTEN0_CLEAR, mmio + INTEN0);
+	writel(INTEN0_CLEAR, mmio + INTEN0);
 
 	/* Clear LADRF */
-	writel(0x0 , mmio + LADRF);
+	writel(0x0, mmio + LADRF);
 
 	/* Set SRAM_SIZE & SRAM_BOUNDARY registers  */
-	writel( 0x80010,mmio + SRAM_SIZE);
+	writel(0x80010, mmio + SRAM_SIZE);
 
 	/* Clear RCV_RING0_LEN */
-	writel(0x0, mmio +  RCV_RING_LEN0);
+	writel(0x0, mmio + RCV_RING_LEN0);
 
 	/* Clear XMT_RING0/1/2/3_LEN */
 	writel(0x0, mmio +  XMT_RING_LEN0);
@@ -571,10 +571,10 @@ static void amd8111e_init_hw_default(struct amd8111e_priv *lp)
 	/* SRAM_SIZE register */
 	reg_val = readl(mmio + SRAM_SIZE);
 
-	if(lp->options & OPTION_JUMBO_ENABLE)
-		writel( VAL2|JUMBO, mmio + CMD3);
+	if (lp->options & OPTION_JUMBO_ENABLE)
+		writel(VAL2 | JUMBO, mmio + CMD3);
 #if AMD8111E_VLAN_TAG_USED
-	writel(VAL2|VSIZE|VL_TAG_DEL, mmio + CMD3 );
+	writel(VAL2 | VSIZE | VL_TAG_DEL, mmio + CMD3);
 #endif
 	/* Set default value to CTRL1 Register */
 	writel(CTRL1_DEFAULT, mmio + CTRL1);
@@ -616,14 +616,14 @@ static void amd8111e_stop_chip(struct amd8111e_priv *lp)
 static void amd8111e_free_ring(struct amd8111e_priv *lp)
 {
 	/* Free transmit and receive descriptor rings */
-	if(lp->rx_ring){
+	if (lp->rx_ring) {
 		dma_free_coherent(&lp->pci_dev->dev,
 				  sizeof(struct amd8111e_rx_dr) * NUM_RX_RING_DR,
 				  lp->rx_ring, lp->rx_ring_dma_addr);
 		lp->rx_ring = NULL;
 	}
 
-	if(lp->tx_ring){
+	if (lp->tx_ring) {
 		dma_free_coherent(&lp->pci_dev->dev,
 				  sizeof(struct amd8111e_tx_dr) * NUM_TX_RING_DR,
 				  lp->tx_ring, lp->tx_ring_dma_addr);
@@ -643,11 +643,11 @@ static int amd8111e_tx(struct net_device *dev)
 	int tx_index;
 	int status;
 	/* Complete all the transmit packet */
-	while (lp->tx_complete_idx != lp->tx_idx){
+	while (lp->tx_complete_idx != lp->tx_idx) {
 		tx_index =  lp->tx_complete_idx & TX_RING_DR_MOD_MASK;
 		status = le16_to_cpu(lp->tx_ring[tx_index].tx_flags);
 
-		if(status & OWN_BIT)
+		if (status & OWN_BIT)
 			break;	/* It still hasn't been Txed */
 
 		lp->tx_ring[tx_index].buff_phy_addr = 0;
@@ -669,10 +669,10 @@ static int amd8111e_tx(struct net_device *dev)
 			le16_to_cpu(lp->tx_ring[tx_index].buff_count);
 
 		if (netif_queue_stopped(dev) &&
-			lp->tx_complete_idx > lp->tx_idx - NUM_TX_BUFFERS +2){
+			lp->tx_complete_idx > lp->tx_idx - NUM_TX_BUFFERS + 2) {
 			/* The ring is no longer full, clear tbusy. */
 			/* lp->tx_full = 0; */
-			netif_wake_queue (dev);
+			netif_wake_queue(dev);
 		}
 	}
 	return 0;
@@ -685,7 +685,7 @@ static int amd8111e_rx_poll(struct napi_struct *napi, int budget)
 	struct net_device *dev = lp->amd8111e_net_dev;
 	int rx_index = lp->rx_idx & RX_RING_DR_MOD_MASK;
 	void __iomem *mmio = lp->mmio;
-	struct sk_buff *skb,*new_skb;
+	struct sk_buff *skb, *new_skb;
 	int min_pkt_len, status;
 	int num_rx_pkt = 0;
 	short pkt_len;
@@ -710,7 +710,7 @@ static int amd8111e_rx_poll(struct napi_struct *napi, int budget)
 			goto err_next_pkt;
 		}
 		/* check for STP and ENP */
-		if (!((status & STP_BIT) && (status & ENP_BIT))){
+		if (!((status & STP_BIT) && (status & ENP_BIT))) {
 			/* resetting flags */
 			lp->rx_ring[rx_index].rx_flags &= RESET_RX_FLAGS;
 			goto err_next_pkt;
@@ -755,7 +755,7 @@ static int amd8111e_rx_poll(struct napi_struct *napi, int budget)
 		skb->protocol = eth_type_trans(skb, dev);
 
 #if AMD8111E_VLAN_TAG_USED
-		if (vtag == TT_VLAN_TAGGED){
+		if (vtag == TT_VLAN_TAGGED) {
 			u16 vlan_tag = le16_to_cpu(lp->rx_ring[rx_index].tag_ctrl_info);
 			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
 		}
@@ -793,25 +793,25 @@ err_next_pkt:
 static int amd8111e_link_change(struct net_device *dev)
 {
 	struct amd8111e_priv *lp = netdev_priv(dev);
-	int status0,speed;
+	int status0, speed;
 
 	/* read the link change */
-     	status0 = readl(lp->mmio + STAT0);
+	status0 = readl(lp->mmio + STAT0);
 
-	if(status0 & LINK_STATS){
-		if(status0 & AUTONEG_COMPLETE)
+	if (status0 & LINK_STATS) {
+		if (status0 & AUTONEG_COMPLETE)
 			lp->link_config.autoneg = AUTONEG_ENABLE;
 		else
 			lp->link_config.autoneg = AUTONEG_DISABLE;
 
-		if(status0 & FULL_DPLX)
+		if (status0 & FULL_DPLX)
 			lp->link_config.duplex = DUPLEX_FULL;
 		else
 			lp->link_config.duplex = DUPLEX_HALF;
 		speed = (status0 & SPEED_MASK) >> 7;
-		if(speed == PHY_SPEED_10)
+		if (speed == PHY_SPEED_10)
 			lp->link_config.speed = SPEED_10;
-		else if(speed == PHY_SPEED_100)
+		else if (speed == PHY_SPEED_100)
 			lp->link_config.speed = SPEED_100;
 
 		netdev_info(dev, "Link is Up. Speed is %s Mbps %s Duplex\n",
@@ -821,8 +821,7 @@ static int amd8111e_link_change(struct net_device *dev)
 							"Full" : "Half");
 
 		netif_carrier_on(dev);
-	}
-	else{
+	} else {
 		lp->link_config.speed = SPEED_INVALID;
 		lp->link_config.duplex = DUPLEX_INVALID;
 		lp->link_config.autoneg = AUTONEG_INVALID;
@@ -840,7 +839,7 @@ static int amd8111e_read_mib(void __iomem *mmio, u8 MIB_COUNTER)
 	unsigned  int data;
 	unsigned int repeat = REPEAT_CNT;
 
-	writew( MIB_RD_CMD | MIB_COUNTER, mmio + MIB_ADDR);
+	writew(MIB_RD_CMD | MIB_COUNTER, mmio + MIB_ADDR);
 	do {
 		status = readw(mmio + MIB_ADDR);
 		udelay(2);	/* controller takes MAX 2 us to get mib data */
@@ -863,7 +862,7 @@ static struct net_device_stats *amd8111e_get_stats(struct net_device *dev)
 
 	if (!lp->opened)
 		return new_stats;
-	spin_lock_irqsave (&lp->lock, flags);
+	spin_lock_irqsave(&lp->lock, flags);
 
 	/* stats.rx_packets */
 	new_stats->rx_packets = amd8111e_read_mib(mmio, rcv_broadcast_pkts)+
@@ -943,7 +942,7 @@ static struct net_device_stats *amd8111e_get_stats(struct net_device *dev)
 	/* Reset the mibs for collecting new statistics */
 	/* writew(MIB_CLEAR, mmio + MIB_ADDR);*/
 
-	spin_unlock_irqrestore (&lp->lock, flags);
+	spin_unlock_irqrestore(&lp->lock, flags);
 
 	return new_stats;
 }
@@ -974,96 +973,90 @@ static int amd8111e_calc_coalesce(struct net_device *dev)
 	rx_data_rate = coal_conf->rx_bytes - coal_conf->rx_prev_bytes;
 	coal_conf->rx_prev_bytes =  coal_conf->rx_bytes;
 
-	if(rx_pkt_rate < 800){
-		if(coal_conf->rx_coal_type != NO_COALESCE){
+	if (rx_pkt_rate < 800) {
+		if (coal_conf->rx_coal_type != NO_COALESCE) {
 
 			coal_conf->rx_timeout = 0x0;
 			coal_conf->rx_event_count = 0;
-			amd8111e_set_coalesce(dev,RX_INTR_COAL);
+			amd8111e_set_coalesce(dev, RX_INTR_COAL);
 			coal_conf->rx_coal_type = NO_COALESCE;
 		}
-	}
-	else{
+	} else {
 
 		rx_pkt_size = rx_data_rate/rx_pkt_rate;
-		if (rx_pkt_size < 128){
-			if(coal_conf->rx_coal_type != NO_COALESCE){
+		if (rx_pkt_size < 128) {
+			if (coal_conf->rx_coal_type != NO_COALESCE) {
 
 				coal_conf->rx_timeout = 0;
 				coal_conf->rx_event_count = 0;
-				amd8111e_set_coalesce(dev,RX_INTR_COAL);
+				amd8111e_set_coalesce(dev, RX_INTR_COAL);
 				coal_conf->rx_coal_type = NO_COALESCE;
 			}
 
-		}
-		else if ( (rx_pkt_size >= 128) && (rx_pkt_size < 512) ){
+		} else if ((rx_pkt_size >= 128) && (rx_pkt_size < 512)) {
 
-			if(coal_conf->rx_coal_type !=  LOW_COALESCE){
+			if (coal_conf->rx_coal_type !=  LOW_COALESCE) {
 				coal_conf->rx_timeout = 1;
 				coal_conf->rx_event_count = 4;
-				amd8111e_set_coalesce(dev,RX_INTR_COAL);
+				amd8111e_set_coalesce(dev, RX_INTR_COAL);
 				coal_conf->rx_coal_type = LOW_COALESCE;
 			}
-		}
-		else if ((rx_pkt_size >= 512) && (rx_pkt_size < 1024)){
+		} else if ((rx_pkt_size >= 512) && (rx_pkt_size < 1024)) {
 
-			if(coal_conf->rx_coal_type !=  MEDIUM_COALESCE){
+			if (coal_conf->rx_coal_type != MEDIUM_COALESCE) {
 				coal_conf->rx_timeout = 1;
 				coal_conf->rx_event_count = 4;
-				amd8111e_set_coalesce(dev,RX_INTR_COAL);
+				amd8111e_set_coalesce(dev, RX_INTR_COAL);
 				coal_conf->rx_coal_type = MEDIUM_COALESCE;
 			}
 
-		}
-		else if(rx_pkt_size >= 1024){
-			if(coal_conf->rx_coal_type !=  HIGH_COALESCE){
+		} else if (rx_pkt_size >= 1024) {
+
+			if (coal_conf->rx_coal_type !=  HIGH_COALESCE) {
 				coal_conf->rx_timeout = 2;
 				coal_conf->rx_event_count = 3;
-				amd8111e_set_coalesce(dev,RX_INTR_COAL);
+				amd8111e_set_coalesce(dev, RX_INTR_COAL);
 				coal_conf->rx_coal_type = HIGH_COALESCE;
 			}
 		}
 	}
-    	/* NOW FOR TX INTR COALESC */
-	if(tx_pkt_rate < 800){
-		if(coal_conf->tx_coal_type != NO_COALESCE){
+	/* NOW FOR TX INTR COALESC */
+	if (tx_pkt_rate < 800) {
+		if (coal_conf->tx_coal_type != NO_COALESCE) {
 
 			coal_conf->tx_timeout = 0x0;
 			coal_conf->tx_event_count = 0;
-			amd8111e_set_coalesce(dev,TX_INTR_COAL);
+			amd8111e_set_coalesce(dev, TX_INTR_COAL);
 			coal_conf->tx_coal_type = NO_COALESCE;
 		}
-	}
-	else{
+	} else {
 
 		tx_pkt_size = tx_data_rate/tx_pkt_rate;
-		if (tx_pkt_size < 128){
+		if (tx_pkt_size < 128) {
 
-			if(coal_conf->tx_coal_type != NO_COALESCE){
+			if (coal_conf->tx_coal_type != NO_COALESCE) {
 
 				coal_conf->tx_timeout = 0;
 				coal_conf->tx_event_count = 0;
-				amd8111e_set_coalesce(dev,TX_INTR_COAL);
+				amd8111e_set_coalesce(dev, TX_INTR_COAL);
 				coal_conf->tx_coal_type = NO_COALESCE;
 			}
 
-		}
-		else if ( (tx_pkt_size >= 128) && (tx_pkt_size < 512) ){
+		} else if ((tx_pkt_size >= 128) && (tx_pkt_size < 512)) {
 
-			if(coal_conf->tx_coal_type !=  LOW_COALESCE){
+			if (coal_conf->tx_coal_type != LOW_COALESCE) {
 				coal_conf->tx_timeout = 1;
 				coal_conf->tx_event_count = 2;
-				amd8111e_set_coalesce(dev,TX_INTR_COAL);
+				amd8111e_set_coalesce(dev, TX_INTR_COAL);
 				coal_conf->tx_coal_type = LOW_COALESCE;
 
 			}
-		}
-		else if ((tx_pkt_size >= 512) && (tx_pkt_size < 1024)){
+		} else if ((tx_pkt_size >= 512) && (tx_pkt_size < 1024)) {
 
-			if(coal_conf->tx_coal_type !=  MEDIUM_COALESCE){
+			if (coal_conf->tx_coal_type != MEDIUM_COALESCE) {
 				coal_conf->tx_timeout = 2;
 				coal_conf->tx_event_count = 5;
-				amd8111e_set_coalesce(dev,TX_INTR_COAL);
+				amd8111e_set_coalesce(dev, TX_INTR_COAL);
 				coal_conf->tx_coal_type = MEDIUM_COALESCE;
 			}
 		} else if (tx_pkt_size >= 1024) {
@@ -1091,7 +1084,7 @@ static irqreturn_t amd8111e_interrupt(int irq, void *dev_id)
 	unsigned int intr0, intren0;
 	unsigned int handled = 1;
 
-	if(unlikely(dev == NULL))
+	if (unlikely(dev == NULL))
 		return IRQ_NONE;
 
 	spin_lock(&lp->lock);
@@ -1105,7 +1098,7 @@ static irqreturn_t amd8111e_interrupt(int irq, void *dev_id)
 
 	/* Process all the INT event until INTR bit is clear. */
 
-	if (!(intr0 & INTR)){
+	if (!(intr0 & INTR)) {
 		handled = 0;
 		goto err_no_interrupt;
 	}
@@ -1140,7 +1133,7 @@ static irqreturn_t amd8111e_interrupt(int irq, void *dev_id)
 		amd8111e_calc_coalesce(dev);
 
 err_no_interrupt:
-	writel( VAL0 | INTREN,mmio + CMD0);
+	writel(VAL0 | INTREN, mmio + CMD0);
 
 	spin_unlock(&lp->lock);
 
@@ -1180,7 +1173,7 @@ static int amd8111e_close(struct net_device *dev)
 	netif_carrier_off(lp->amd8111e_net_dev);
 
 	/* Delete ipg timer */
-	if(lp->options & OPTION_DYN_IPG_ENABLE)
+	if (lp->options & OPTION_DYN_IPG_ENABLE)
 		del_timer_sync(&lp->ipg_data.ipg_timer);
 
 	spin_unlock_irq(&lp->lock);
@@ -1200,8 +1193,8 @@ static int amd8111e_open(struct net_device *dev)
 {
 	struct amd8111e_priv *lp = netdev_priv(dev);
 
-	if(dev->irq ==0 || request_irq(dev->irq, amd8111e_interrupt, IRQF_SHARED,
-					 dev->name, dev))
+	if (dev->irq == 0 || request_irq(dev->irq, amd8111e_interrupt,
+					 IRQF_SHARED, dev->name, dev))
 		return -EAGAIN;
 
 	napi_enable(&lp->napi);
@@ -1210,7 +1203,7 @@ static int amd8111e_open(struct net_device *dev)
 
 	amd8111e_init_hw_default(lp);
 
-	if(amd8111e_restart(dev)){
+	if (amd8111e_restart(dev)) {
 		spin_unlock_irq(&lp->lock);
 		napi_disable(&lp->napi);
 		if (dev->irq)
@@ -1218,7 +1211,7 @@ static int amd8111e_open(struct net_device *dev)
 		return -ENOMEM;
 	}
 	/* Start ipg timer */
-	if(lp->options & OPTION_DYN_IPG_ENABLE){
+	if (lp->options & OPTION_DYN_IPG_ENABLE) {
 		add_timer(&lp->ipg_data.ipg_timer);
 		netdev_info(dev, "Dynamic IPG Enabled\n");
 	}
@@ -1289,10 +1282,10 @@ static netdev_tx_t amd8111e_start_xmit(struct sk_buff *skb,
 	lp->tx_idx++;
 
 	/* Trigger an immediate send poll. */
-	writel( VAL1 | TDMD0, lp->mmio + CMD0);
-	writel( VAL2 | RDMD0,lp->mmio + CMD0);
+	writel(VAL1 | TDMD0, lp->mmio + CMD0);
+	writel(VAL2 | RDMD0, lp->mmio + CMD0);
 
-	if(amd8111e_tx_queue_avail(lp) < 0){
+	if (amd8111e_tx_queue_avail(lp) < 0) {
 		netif_stop_queue(dev);
 	}
 	spin_unlock_irqrestore(&lp->lock, flags);
@@ -1326,15 +1319,15 @@ static void amd8111e_set_multicast_list(struct net_device *dev)
 {
 	struct netdev_hw_addr *ha;
 	struct amd8111e_priv *lp = netdev_priv(dev);
-	u32 mc_filter[2] ;
+	u32 mc_filter[2];
 	int bit_num;
 
-	if(dev->flags & IFF_PROMISC){
-		writel( VAL2 | PROM, lp->mmio + CMD2);
+	if (dev->flags & IFF_PROMISC) {
+		writel(VAL2 | PROM, lp->mmio + CMD2);
 		return;
 	}
 	else
-		writel( PROM, lp->mmio + CMD2);
+		writel(PROM, lp->mmio + CMD2);
 	if (dev->flags & IFF_ALLMULTI ||
 	    netdev_mc_count(dev) > MAX_FILTER_SIZE) {
 		/* get all multicast packet */
@@ -1439,7 +1432,7 @@ static int amd8111e_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol_
 	if (wol_info->wolopts & WAKE_MAGIC)
 		lp->options |=
 			(OPTION_WOL_ENABLE | OPTION_WAKE_MAGIC_ENABLE);
-	else if(wol_info->wolopts & WAKE_PHY)
+	else if (wol_info->wolopts & WAKE_PHY)
 		lp->options |=
 			(OPTION_WOL_ENABLE | OPTION_WAKE_PHY_ENABLE);
 	else
@@ -1464,14 +1457,14 @@ static const struct ethtool_ops ops = {
  * gets/sets driver speed, gets memory mapped register values, forces
  * auto negotiation, sets/gets WOL options for ethtool application.
  */
-static int amd8111e_ioctl(struct net_device *dev , struct ifreq *ifr, int cmd)
+static int amd8111e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	struct mii_ioctl_data *data = if_mii(ifr);
 	struct amd8111e_priv *lp = netdev_priv(dev);
 	int err;
 	u32 mii_regval;
 
-	switch(cmd) {
+	switch (cmd) {
 	case SIOCGMIIPHY:
 		data->phy_id = lp->ext_phy_addr;
 
@@ -1511,7 +1504,7 @@ static int amd8111e_set_mac_address(struct net_device *dev, void *p)
 	spin_lock_irq(&lp->lock);
 	/* Setting the MAC address to the device */
 	for (i = 0; i < ETH_ALEN; i++)
-		writeb( dev->dev_addr[i], lp->mmio + PADR + i );
+		writeb(dev->dev_addr[i], lp->mmio + PADR + i);
 
 	spin_unlock_irq(&lp->lock);
 
@@ -1536,22 +1529,22 @@ static int amd8111e_change_mtu(struct net_device *dev, int new_mtu)
 
 	spin_lock_irq(&lp->lock);
 
-        /* stop the chip */
+	/* stop the chip */
 	writel(RUN, lp->mmio + CMD0);
 
 	dev->mtu = new_mtu;
 
 	err = amd8111e_restart(dev);
 	spin_unlock_irq(&lp->lock);
-	if(!err)
+	if (!err)
 		netif_start_queue(dev);
 	return err;
 }
 
 static int amd8111e_enable_magicpkt(struct amd8111e_priv *lp)
 {
-	writel( VAL1|MPPLBA, lp->mmio + CMD3);
-	writel( VAL0|MPEN_SW, lp->mmio + CMD7);
+	writel(VAL1 | MPPLBA, lp->mmio + CMD3);
+	writel(VAL0 | MPEN_SW, lp->mmio + CMD7);
 
 	/* To eliminate PCI posting bug */
 	readl(lp->mmio + CMD7);
@@ -1562,7 +1555,7 @@ static int amd8111e_enable_link_change(struct amd8111e_priv *lp)
 {
 
 	/* Adapter is already stoped/suspended/interrupt-disabled */
-	writel(VAL0|LCMODE_SW,lp->mmio + CMD7);
+	writel(VAL0 | LCMODE_SW, lp->mmio + CMD7);
 
 	/* To eliminate PCI posting bug */
 	readl(lp->mmio + CMD7);
@@ -1584,7 +1577,7 @@ static void amd8111e_tx_timeout(struct net_device *dev, unsigned int txqueue)
 	spin_lock_irq(&lp->lock);
 	err = amd8111e_restart(dev);
 	spin_unlock_irq(&lp->lock);
-	if(!err)
+	if (!err)
 		netif_wake_queue(dev);
 }
 
@@ -1605,22 +1598,21 @@ static int __maybe_unused amd8111e_suspend(struct device *dev_d)
 
 	/* stop chip */
 	spin_lock_irq(&lp->lock);
-	if(lp->options & OPTION_DYN_IPG_ENABLE)
+	if (lp->options & OPTION_DYN_IPG_ENABLE)
 		del_timer_sync(&lp->ipg_data.ipg_timer);
 	amd8111e_stop_chip(lp);
 	spin_unlock_irq(&lp->lock);
 
-	if(lp->options & OPTION_WOL_ENABLE){
+	if (lp->options & OPTION_WOL_ENABLE) {
 		 /* enable wol */
-		if(lp->options & OPTION_WAKE_MAGIC_ENABLE)
+		if (lp->options & OPTION_WAKE_MAGIC_ENABLE)
 			amd8111e_enable_magicpkt(lp);
-		if(lp->options & OPTION_WAKE_PHY_ENABLE)
+		if (lp->options & OPTION_WAKE_PHY_ENABLE)
 			amd8111e_enable_link_change(lp);
 
 		device_set_wakeup_enable(dev_d, 1);
 
-	}
-	else{
+	} else {
 		device_set_wakeup_enable(dev_d, 0);
 	}
 
@@ -1640,7 +1632,7 @@ static int __maybe_unused amd8111e_resume(struct device *dev_d)
 	spin_lock_irq(&lp->lock);
 	amd8111e_restart(dev);
 	/* Restart ipg timer */
-	if(lp->options & OPTION_DYN_IPG_ENABLE)
+	if (lp->options & OPTION_DYN_IPG_ENABLE)
 		mod_timer(&lp->ipg_data.ipg_timer,
 				jiffies + IPG_CONVERGE_JIFFIES);
 	spin_unlock_irq(&lp->lock);
@@ -1657,14 +1649,14 @@ static void amd8111e_config_ipg(struct timer_list *t)
 	unsigned int total_col_cnt;
 	unsigned int tmp_ipg;
 
-	if(lp->link_config.duplex == DUPLEX_FULL){
+	if (lp->link_config.duplex == DUPLEX_FULL) {
 		ipg_data->ipg = DEFAULT_IPG;
 		return;
 	}
 
-	if(ipg_data->ipg_state == SSTATE){
+	if (ipg_data->ipg_state == SSTATE) {
 
-		if(ipg_data->timer_tick == IPG_STABLE_TIME){
+		if (ipg_data->timer_tick == IPG_STABLE_TIME) {
 
 			ipg_data->timer_tick = 0;
 			ipg_data->ipg = MIN_IPG - IPG_STEP;
@@ -1676,7 +1668,7 @@ static void amd8111e_config_ipg(struct timer_list *t)
 			ipg_data->timer_tick++;
 	}
 
-	if(ipg_data->ipg_state == CSTATE){
+	if (ipg_data->ipg_state == CSTATE) {
 
 		/* Get the current collision count */
 
@@ -1684,10 +1676,10 @@ static void amd8111e_config_ipg(struct timer_list *t)
 				amd8111e_read_mib(mmio, xmt_collisions);
 
 		if ((total_col_cnt - prev_col_cnt) <
-				(ipg_data->diff_col_cnt)){
+				(ipg_data->diff_col_cnt)) {
 
 			ipg_data->diff_col_cnt =
-				total_col_cnt - prev_col_cnt ;
+				total_col_cnt - prev_col_cnt;
 
 			ipg_data->ipg = ipg_data->current_ipg;
 		}
@@ -1696,7 +1688,7 @@ static void amd8111e_config_ipg(struct timer_list *t)
 
 		if (ipg_data->current_ipg <= MAX_IPG)
 			tmp_ipg = ipg_data->current_ipg;
-		else{
+		else {
 			tmp_ipg = ipg_data->ipg;
 			ipg_data->ipg_state = SSTATE;
 		}
@@ -1748,24 +1740,24 @@ static int amd8111e_probe_one(struct pci_dev *pdev,
 				  const struct pci_device_id *ent)
 {
 	int err, i;
-	unsigned long reg_addr,reg_len;
+	unsigned long reg_addr, reg_len;
 	struct amd8111e_priv *lp;
 	struct net_device *dev;
 
 	err = pci_enable_device(pdev);
-	if(err){
+	if (err) {
 		dev_err(&pdev->dev, "Cannot enable new PCI device\n");
 		return err;
 	}
 
-	if(!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)){
+	if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
 		dev_err(&pdev->dev, "Cannot find PCI base address\n");
 		err = -ENODEV;
 		goto err_disable_pdev;
 	}
 
 	err = pci_request_regions(pdev, MODULE_NAME);
-	if(err){
+	if (err) {
 		dev_err(&pdev->dev, "Cannot obtain PCI resources\n");
 		goto err_disable_pdev;
 	}
@@ -1798,7 +1790,7 @@ static int amd8111e_probe_one(struct pci_dev *pdev,
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
 #if AMD8111E_VLAN_TAG_USED
-	dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX ;
+	dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
 #endif
 
 	lp = netdev_priv(dev);
@@ -1821,16 +1813,16 @@ static int amd8111e_probe_one(struct pci_dev *pdev,
 
 	/* Setting user defined parametrs */
 	lp->ext_phy_option = speed_duplex[card_idx];
-	if(coalesce[card_idx])
+	if (coalesce[card_idx])
 		lp->options |= OPTION_INTR_COAL_ENABLE;
-	if(dynamic_ipg[card_idx++])
+	if (dynamic_ipg[card_idx++])
 		lp->options |= OPTION_DYN_IPG_ENABLE;
 
 
 	/* Initialize driver entry points */
 	dev->netdev_ops = &amd8111e_netdev_ops;
 	dev->ethtool_ops = &ops;
-	dev->irq =pdev->irq;
+	dev->irq = pdev->irq;
 	dev->watchdog_timeo = AMD8111E_TX_TIMEOUT;
 	dev->min_mtu = AMD8111E_MIN_MTU;
 	dev->max_mtu = AMD8111E_MAX_MTU;
@@ -1861,7 +1853,7 @@ static int amd8111e_probe_one(struct pci_dev *pdev,
 	pci_set_drvdata(pdev, dev);
 
 	/* Initialize software ipg timer */
-	if(lp->options & OPTION_DYN_IPG_ENABLE){
+	if (lp->options & OPTION_DYN_IPG_ENABLE) {
 		timer_setup(&lp->ipg_data.ipg_timer, amd8111e_config_ipg, 0);
 		lp->ipg_data.ipg_timer.expires = jiffies +
 						 IPG_CONVERGE_JIFFIES;
@@ -1870,7 +1862,7 @@ static int amd8111e_probe_one(struct pci_dev *pdev,
 	}
 
 	/*  display driver and device information */
-    	chip_version = (readl(lp->mmio + CHIPID) & 0xf0000000)>>28;
+	chip_version = (readl(lp->mmio + CHIPID) & 0xf0000000) >> 28;
 	dev_info(&pdev->dev, "[ Rev %x ] PCI 10/100BaseT Ethernet %pM\n",
 		 chip_version, dev->dev_addr);
 	if (lp->ext_phy_id)
@@ -1879,7 +1871,7 @@ static int amd8111e_probe_one(struct pci_dev *pdev,
 	else
 		dev_info(&pdev->dev, "Couldn't detect MII PHY, assuming address 0x01\n");
 
-    	return 0;
+	return 0;
 
 err_free_dev:
 	free_netdev(dev);
@@ -1919,7 +1911,7 @@ MODULE_DEVICE_TABLE(pci, amd8111e_pci_tbl);
 static SIMPLE_DEV_PM_OPS(amd8111e_pm_ops, amd8111e_suspend, amd8111e_resume);
 
 static struct pci_driver amd8111e_driver = {
-	.name   	= MODULE_NAME,
+	.name		= MODULE_NAME,
 	.id_table	= amd8111e_pci_tbl,
 	.probe		= amd8111e_probe_one,
 	.remove		= amd8111e_remove_one,
-- 
cgit v1.2.3


From 3f6ebcffaf673490ec95024a8d6e67b890cc53e2 Mon Sep 17 00:00:00 2001
From: Yixing Liu <liuyixing1@huawei.com>
Date: Wed, 31 Mar 2021 16:18:31 +0800
Subject: net: amd: correct some format issues

There should be a blank line after declarations.

Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/hplance.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/amd/hplance.c b/drivers/net/ethernet/amd/hplance.c
index e10aceb2b767..6784f8748638 100644
--- a/drivers/net/ethernet/amd/hplance.c
+++ b/drivers/net/ethernet/amd/hplance.c
@@ -170,6 +170,7 @@ static void hplance_init(struct net_device *dev, struct dio_dev *d)
 static void hplance_writerap(void *priv, unsigned short value)
 {
 	struct lance_private *lp = (struct lance_private *)priv;
+
 	do {
 		out_be16(lp->base + HPLANCE_REGOFF + LANCE_RAP, value);
 	} while ((in_8(lp->base + HPLANCE_STATUS) & LE_ACK) == 0);
@@ -178,6 +179,7 @@ static void hplance_writerap(void *priv, unsigned short value)
 static void hplance_writerdp(void *priv, unsigned short value)
 {
 	struct lance_private *lp = (struct lance_private *)priv;
+
 	do {
 		out_be16(lp->base + HPLANCE_REGOFF + LANCE_RDP, value);
 	} while ((in_8(lp->base + HPLANCE_STATUS) & LE_ACK) == 0);
@@ -187,6 +189,7 @@ static unsigned short hplance_readrdp(void *priv)
 {
 	struct lance_private *lp = (struct lance_private *)priv;
 	__u16 value;
+
 	do {
 		value = in_be16(lp->base + HPLANCE_REGOFF + LANCE_RDP);
 	} while ((in_8(lp->base + HPLANCE_STATUS) & LE_ACK) == 0);
-- 
cgit v1.2.3


From 1f78ff4ff7089b8265278d0bbf937fd8e5958dcf Mon Sep 17 00:00:00 2001
From: Yixing Liu <liuyixing1@huawei.com>
Date: Wed, 31 Mar 2021 16:18:32 +0800
Subject: net: ocelot: fix a trailling format issue with block comments

Use a tralling */ on a separate line for block comments.

Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 1a36b416fd9b..8d06ffaf318a 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -484,7 +484,8 @@ void ocelot_adjust_link(struct ocelot *ocelot, int port,
 			   DEV_MAC_ENA_CFG_TX_ENA, DEV_MAC_ENA_CFG);
 
 	/* Take MAC, Port, Phy (intern) and PCS (SGMII/Serdes) clock out of
-	 * reset */
+	 * reset
+	 */
 	ocelot_port_writel(ocelot_port, DEV_CLOCK_CFG_LINK_SPEED(speed),
 			   DEV_CLOCK_CFG);
 
-- 
cgit v1.2.3


From 142c1d2ed96604ec09bbc4076d2a8d09271850d3 Mon Sep 17 00:00:00 2001
From: Yixing Liu <liuyixing1@huawei.com>
Date: Wed, 31 Mar 2021 16:18:33 +0800
Subject: net: toshiba: fix the trailing format of some block comments

Use a trailling */ on a separate line for block comments.

Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/toshiba/spider_net.c | 42 ++++++++++++++++++++-----------
 drivers/net/ethernet/toshiba/tc35815.c    |  3 ++-
 2 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c
index d5a75ef7e3ca..226a76633e65 100644
--- a/drivers/net/ethernet/toshiba/spider_net.c
+++ b/drivers/net/ethernet/toshiba/spider_net.c
@@ -146,7 +146,8 @@ spider_net_read_phy(struct net_device *netdev, int mii_id, int reg)
 
 	/* we don't use semaphores to wait for an SPIDER_NET_GPROPCMPINT
 	 * interrupt, as we poll for the completion of the read operation
-	 * in spider_net_read_phy. Should take about 50 us */
+	 * in spider_net_read_phy. Should take about 50 us
+	 */
 	do {
 		readvalue = spider_net_read_reg(card, SPIDER_NET_GPCROPCMD);
 	} while (readvalue & SPIDER_NET_GPREXEC);
@@ -387,7 +388,8 @@ spider_net_prepare_rx_descr(struct spider_net_card *card,
 		(~(SPIDER_NET_RXBUF_ALIGN - 1));
 
 	/* and we need to have it 128 byte aligned, therefore we allocate a
-	 * bit more */
+	 * bit more
+	 */
 	/* allocate an skb */
 	descr->skb = netdev_alloc_skb(card->netdev,
 				      bufsize + SPIDER_NET_RXBUF_ALIGN - 1);
@@ -488,7 +490,8 @@ spider_net_refill_rx_chain(struct spider_net_card *card)
 	/* one context doing the refill (and a second context seeing that
 	 * and omitting it) is ok. If called by NAPI, we'll be called again
 	 * as spider_net_decode_one_descr is called several times. If some
-	 * interrupt calls us, the NAPI is about to clean up anyway. */
+	 * interrupt calls us, the NAPI is about to clean up anyway.
+	 */
 	if (!spin_trylock_irqsave(&chain->lock, flags))
 		return;
 
@@ -523,14 +526,16 @@ spider_net_alloc_rx_skbs(struct spider_net_card *card)
 
 	/* Put at least one buffer into the chain. if this fails,
 	 * we've got a problem. If not, spider_net_refill_rx_chain
-	 * will do the rest at the end of this function. */
+	 * will do the rest at the end of this function.
+	 */
 	if (spider_net_prepare_rx_descr(card, chain->head))
 		goto error;
 	else
 		chain->head = chain->head->next;
 
 	/* This will allocate the rest of the rx buffers;
-	 * if not, it's business as usual later on. */
+	 * if not, it's business as usual later on.
+	 */
 	spider_net_refill_rx_chain(card);
 	spider_net_enable_rxdmac(card);
 	return 0;
@@ -706,7 +711,8 @@ spider_net_set_low_watermark(struct spider_net_card *card)
 	int i;
 
 	/* Measure the length of the queue. Measurement does not
-	 * need to be precise -- does not need a lock. */
+	 * need to be precise -- does not need a lock.
+	 */
 	while (descr != card->tx_chain.head) {
 		status = descr->hwdescr->dmac_cmd_status & SPIDER_NET_DESCR_NOT_IN_USE;
 		if (status == SPIDER_NET_DESCR_NOT_IN_USE)
@@ -786,7 +792,8 @@ spider_net_release_tx_chain(struct spider_net_card *card, int brutal)
 
 			/* fallthrough, if we release the descriptors
 			 * brutally (then we don't care about
-			 * SPIDER_NET_DESCR_CARDOWNED) */
+			 * SPIDER_NET_DESCR_CARDOWNED)
+			 */
 			fallthrough;
 
 		case SPIDER_NET_DESCR_RESPONSE_ERROR:
@@ -948,7 +955,8 @@ spider_net_pass_skb_up(struct spider_net_descr *descr,
 	skb_put(skb, hwdescr->valid_size);
 
 	/* the card seems to add 2 bytes of junk in front
-	 * of the ethernet frame */
+	 * of the ethernet frame
+	 */
 #define SPIDER_MISALIGN		2
 	skb_pull(skb, SPIDER_MISALIGN);
 	skb->protocol = eth_type_trans(skb, netdev);
@@ -1382,7 +1390,8 @@ spider_net_handle_error_irq(struct spider_net_card *card, u32 status_reg,
 		/* PHY read operation completed */
 		/* we don't use semaphores, as we poll for the completion
 		 * of the read operation in spider_net_read_phy. Should take
-		 * about 50 us */
+		 * about 50 us
+		 */
 		show_error = 0;
 		break;
 	case SPIDER_NET_GPWFFINT:
@@ -1450,7 +1459,8 @@ spider_net_handle_error_irq(struct spider_net_card *card, u32 status_reg,
 	{
 	case SPIDER_NET_GTMFLLINT:
 		/* TX RAM full may happen on a usual case.
-		 * Logging is not needed. */
+		 * Logging is not needed.
+		 */
 		show_error = 0;
 		break;
 	case SPIDER_NET_GRFDFLLINT:
@@ -1694,7 +1704,8 @@ spider_net_enable_card(struct spider_net_card *card)
 {
 	int i;
 	/* the following array consists of (register),(value) pairs
-	 * that are set in this function. A register of 0 ends the list */
+	 * that are set in this function. A register of 0 ends the list
+	 */
 	u32 regs[][2] = {
 		{ SPIDER_NET_GRESUMINTNUM, 0 },
 		{ SPIDER_NET_GREINTNUM, 0 },
@@ -1757,7 +1768,8 @@ spider_net_enable_card(struct spider_net_card *card)
 	spider_net_write_reg(card, SPIDER_NET_ECMODE, SPIDER_NET_ECMODE_VALUE);
 
 	/* set chain tail address for RX chains and
-	 * enable DMA */
+	 * enable DMA
+	 */
 	spider_net_enable_rxchtails(card);
 	spider_net_enable_rxdmac(card);
 
@@ -1995,7 +2007,8 @@ static void spider_net_link_phy(struct timer_list *t)
 
 		case BCM54XX_UNKNOWN:
 			/* copper, fiber with and without failed,
-			 * retry from beginning */
+			 * retry from beginning
+			 */
 			spider_net_setup_aneg(card);
 			card->medium = BCM54XX_COPPER;
 			break;
@@ -2263,7 +2276,8 @@ spider_net_setup_netdev(struct spider_net_card *card)
 		netdev->features |= NETIF_F_RXCSUM;
 	netdev->features |= NETIF_F_IP_CSUM | NETIF_F_LLTX;
 	/* some time: NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
-	 *		NETIF_F_HW_VLAN_CTAG_FILTER */
+	 *		NETIF_F_HW_VLAN_CTAG_FILTER
+	 */
 
 	/* MTU range: 64 - 2294 */
 	netdev->min_mtu = SPIDER_NET_MIN_MTU;
diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c
index 7a6e5ff8e5d4..fedb2bf69261 100644
--- a/drivers/net/ethernet/toshiba/tc35815.c
+++ b/drivers/net/ethernet/toshiba/tc35815.c
@@ -1914,7 +1914,8 @@ tc35815_set_multicast_list(struct net_device *dev)
 
 	if (dev->flags & IFF_PROMISC) {
 		/* With some (all?) 100MHalf HUB, controller will hang
-		 * if we enabled promiscuous mode before linkup... */
+		 * if we enabled promiscuous mode before linkup...
+		 */
 		struct tc35815_local *lp = netdev_priv(dev);
 
 		if (!lp->link)
-- 
cgit v1.2.3


From 44d043b53d3867523960f79c6a909c976e15f3f7 Mon Sep 17 00:00:00 2001
From: Yangyang Li <liyangyang20@huawei.com>
Date: Wed, 31 Mar 2021 16:18:34 +0800
Subject: net: lpc_eth: fix format warnings of block comments

Fix the following format warning:
1. Block comments use * on subsequent lines
2. Block comments use a trailing */ on a separate line

Signed-off-by: Yangyang Li <liyangyang20@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/nxp/lpc_eth.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index d3cbb4215f5c..e72fd33a214c 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -1044,7 +1044,8 @@ static netdev_tx_t lpc_eth_hard_start_xmit(struct sk_buff *skb,
 
 	if (pldat->num_used_tx_buffs >= (ENET_TX_DESC - 1)) {
 		/* This function should never be called when there are no
-		   buffers */
+		 * buffers
+		 */
 		netif_stop_queue(ndev);
 		spin_unlock_irq(&pldat->lock);
 		WARN(1, "BUG! TX request when no free TX buffers!\n");
@@ -1318,7 +1319,8 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
 		pldat->dma_buff_size = PAGE_ALIGN(pldat->dma_buff_size);
 
 		/* Allocate a chunk of memory for the DMA ethernet buffers
-		   and descriptors */
+		 * and descriptors
+		 */
 		pldat->dma_buff_base_v =
 			dma_alloc_coherent(dev,
 					   pldat->dma_buff_size, &dma_handle,
@@ -1365,7 +1367,8 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
 	__lpc_mii_mngt_reset(pldat);
 
 	/* Force default PHY interface setup in chip, this will probably be
-	   changed by the PHY driver */
+	 * changed by the PHY driver
+	 */
 	pldat->link = 0;
 	pldat->speed = 100;
 	pldat->duplex = DUPLEX_FULL;
-- 
cgit v1.2.3


From 1caf8d39c58f3f63193d02928c8dce3fa07cee52 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 31 Mar 2021 10:52:05 -0700
Subject: inet: shrink inet_timewait_death_row by 48 bytes

struct inet_timewait_death_row uses two cache lines, because we want
tw_count to use a full cache line to avoid false sharing.

Rework its definition and placement in netns_ipv4 so that:

1) We add 60 bytes of padding after tw_count to avoid
  false sharing, knowing that tcp_death_row will
  have ____cacheline_aligned_in_smp attribute.

2) We do not risk padding before tcp_death_row, because
  we move it at the beginning of netns_ipv4, even if new
 fields are added later.

3) We do not waste 48 bytes of padding after it.

Note that I have not changed dccp.

pahole result for struct netns_ipv4 before/after the patch :

/* size: 832, cachelines: 13, members: 139 */
/* sum members: 721, holes: 12, sum holes: 95 */
/* padding: 16 */
/* paddings: 2, sum paddings: 55 */

->

/* size: 768, cachelines: 12, members: 139 */
/* sum members: 673, holes: 11, sum holes: 39 */
/* padding: 56 */
/* paddings: 2, sum paddings: 7 */
/* forced alignments: 1 */

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 9c8dd424d79b..1085ed4e0788 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -32,14 +32,18 @@ struct inet_hashinfo;
 
 struct inet_timewait_death_row {
 	atomic_t		tw_count;
+	char			tw_pad[L1_CACHE_BYTES - sizeof(atomic_t)];
 
-	struct inet_hashinfo 	*hashinfo ____cacheline_aligned_in_smp;
+	struct inet_hashinfo 	*hashinfo;
 	int			sysctl_max_tw_buckets;
 };
 
 struct tcp_fastopen_context;
 
 struct netns_ipv4 {
+	/* Please keep tcp_death_row at first field in netns_ipv4 */
+	struct inet_timewait_death_row tcp_death_row ____cacheline_aligned_in_smp;
+
 #ifdef CONFIG_SYSCTL
 	struct ctl_table_header	*forw_hdr;
 	struct ctl_table_header	*frags_hdr;
@@ -175,7 +179,6 @@ struct netns_ipv4 {
 	int sysctl_tcp_comp_sack_nr;
 	unsigned long sysctl_tcp_comp_sack_delay_ns;
 	unsigned long sysctl_tcp_comp_sack_slack_ns;
-	struct inet_timewait_death_row tcp_death_row;
 	int sysctl_max_syn_backlog;
 	int sysctl_tcp_fastopen;
 	const struct tcp_congestion_ops __rcu  *tcp_congestion_control;
-- 
cgit v1.2.3


From 490f33c4e70431d0a4d01666a6525fdd43299cde Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 31 Mar 2021 10:52:06 -0700
Subject: inet: shrink netns_ipv4 by another cache line

By shuffling around some fields to remove 8 bytes of hole,
we can save one cache line.

pahole result before/after the patch :

/* size: 768, cachelines: 12, members: 139 */
/* sum members: 673, holes: 11, sum holes: 39 */
/* padding: 56 */
/* paddings: 2, sum paddings: 7 */
/* forced alignments: 1 */

->

/* size: 704, cachelines: 11, members: 139 */
/* sum members: 673, holes: 10, sum holes: 31 */
/* paddings: 2, sum paddings: 7 */
/* forced alignments: 1 */

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 1085ed4e0788..538ed69919dc 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -57,17 +57,17 @@ struct netns_ipv4 {
 	struct mutex		ra_mutex;
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	struct fib_rules_ops	*rules_ops;
-	bool			fib_has_custom_rules;
-	unsigned int		fib_rules_require_fldissect;
 	struct fib_table __rcu	*fib_main;
 	struct fib_table __rcu	*fib_default;
+	unsigned int		fib_rules_require_fldissect;
+	bool			fib_has_custom_rules;
 #endif
 	bool			fib_has_custom_local_routes;
+	bool			fib_offload_disabled;
 #ifdef CONFIG_IP_ROUTE_CLASSID
 	int			fib_num_tclassid_users;
 #endif
 	struct hlist_head	*fib_table_hash;
-	bool			fib_offload_disabled;
 	struct sock		*fibnl;
 
 	struct sock  * __percpu	*icmp_sk;
-- 
cgit v1.2.3


From b2908fac5b7b23c03fa1d3e1055ad95ba305c871 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 31 Mar 2021 10:52:07 -0700
Subject: ipv4: convert fib_notify_on_flag_change sysctl to u8

Reduce footprint of sysctls.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h   | 2 +-
 net/ipv4/sysctl_net_ipv4.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 538ed69919dc..b187ac597b8c 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -191,7 +191,7 @@ struct netns_ipv4 {
 	int sysctl_udp_wmem_min;
 	int sysctl_udp_rmem_min;
 
-	int sysctl_fib_notify_on_flag_change;
+	u8 sysctl_fib_notify_on_flag_change;
 
 #ifdef CONFIG_NET_L3_MASTER_DEV
 	int sysctl_udp_l3mdev_accept;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 9199f507a005..a2352d8d88cc 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1364,9 +1364,9 @@ static struct ctl_table ipv4_net_table[] = {
 	{
 		.procname	= "fib_notify_on_flag_change",
 		.data		= &init_net.ipv4.sysctl_fib_notify_on_flag_change,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dou8vec_minmax,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= &two,
 	},
-- 
cgit v1.2.3


From cd04bd022258f4aa6e8392c8133dbbf31da0f12f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 31 Mar 2021 10:52:08 -0700
Subject: ipv4: convert udp_l3mdev_accept sysctl to u8

Reduce footprint of sysctls.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h   | 2 +-
 net/ipv4/sysctl_net_ipv4.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index b187ac597b8c..d309b1b89715 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -194,7 +194,7 @@ struct netns_ipv4 {
 	u8 sysctl_fib_notify_on_flag_change;
 
 #ifdef CONFIG_NET_L3_MASTER_DEV
-	int sysctl_udp_l3mdev_accept;
+	u8 sysctl_udp_l3mdev_accept;
 #endif
 
 	int sysctl_igmp_max_memberships;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index a2352d8d88cc..1b6ce649a433 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1065,9 +1065,9 @@ static struct ctl_table ipv4_net_table[] = {
 	{
 		.procname	= "udp_l3mdev_accept",
 		.data		= &init_net.ipv4.sysctl_udp_l3mdev_accept,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dou8vec_minmax,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_ONE,
 	},
-- 
cgit v1.2.3


From be205fe6ec4ffd6875f69e61205163fb686a5c74 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 31 Mar 2021 10:52:09 -0700
Subject: ipv4: convert fib_multipath_{use_neigh|hash_policy} sysctls to u8

Make room for better packing of netns_ipv4

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h   | 4 ++--
 net/ipv4/sysctl_net_ipv4.c | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index d309b1b89715..eb6ca07d3b0f 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -220,8 +220,8 @@ struct netns_ipv4 {
 #endif
 #endif
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-	int sysctl_fib_multipath_use_neigh;
-	int sysctl_fib_multipath_hash_policy;
+	u8 sysctl_fib_multipath_use_neigh;
+	u8 sysctl_fib_multipath_hash_policy;
 #endif
 
 	struct fib_notifier_ops	*notifier_ops;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 1b6ce649a433..ad75d6bb2df7 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -456,7 +456,7 @@ static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
 	    ipv4.sysctl_fib_multipath_hash_policy);
 	int ret;
 
-	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+	ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
 	if (write && ret == 0)
 		call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net);
 
@@ -1038,16 +1038,16 @@ static struct ctl_table ipv4_net_table[] = {
 	{
 		.procname	= "fib_multipath_use_neigh",
 		.data		= &init_net.ipv4.sysctl_fib_multipath_use_neigh,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dou8vec_minmax,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_ONE,
 	},
 	{
 		.procname	= "fib_multipath_hash_policy",
 		.data		= &init_net.ipv4.sysctl_fib_multipath_hash_policy,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
 		.proc_handler	= proc_fib_multipath_hash_policy,
 		.extra1		= SYSCTL_ZERO,
-- 
cgit v1.2.3


From 7d4b37ebb934aa32a54666fe9153d127c33ff89a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 31 Mar 2021 10:52:10 -0700
Subject: ipv4: convert igmp_link_local_mcast_reports sysctl to u8

This sysctl is a bool, can use less storage.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h   | 2 +-
 net/ipv4/sysctl_net_ipv4.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index eb6ca07d3b0f..fafcedf64383 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -197,9 +197,9 @@ struct netns_ipv4 {
 	u8 sysctl_udp_l3mdev_accept;
 #endif
 
+	u8 sysctl_igmp_llm_reports;
 	int sysctl_igmp_max_memberships;
 	int sysctl_igmp_max_msf;
-	int sysctl_igmp_llm_reports;
 	int sysctl_igmp_qrv;
 
 	struct ping_group_range ping_group_range;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index ad75d6bb2df7..fd2b35065bb2 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -848,9 +848,9 @@ static struct ctl_table ipv4_net_table[] = {
 	{
 		.procname	= "igmp_link_local_mcast_reports",
 		.data		= &init_net.ipv4.sysctl_igmp_llm_reports,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "igmp_max_memberships",
-- 
cgit v1.2.3


From 1c3289c931740f235b29be5182e5f2dfb004593d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 31 Mar 2021 10:52:11 -0700
Subject: tcp: convert tcp_comp_sack_nr sysctl to u8

tcp_comp_sack_nr max value was already 255.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h   | 2 +-
 net/ipv4/sysctl_net_ipv4.c | 6 ++----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index fafcedf64383..87e1612497ea 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -171,12 +171,12 @@ struct netns_ipv4 {
 	u8 sysctl_tcp_min_tso_segs;
 	u8 sysctl_tcp_autocorking;
 	u8 sysctl_tcp_reflect_tos;
+	u8 sysctl_tcp_comp_sack_nr;
 	int sysctl_tcp_invalid_ratelimit;
 	int sysctl_tcp_pacing_ss_ratio;
 	int sysctl_tcp_pacing_ca_ratio;
 	int sysctl_tcp_wmem[3];
 	int sysctl_tcp_rmem[3];
-	int sysctl_tcp_comp_sack_nr;
 	unsigned long sysctl_tcp_comp_sack_delay_ns;
 	unsigned long sysctl_tcp_comp_sack_slack_ns;
 	int sysctl_max_syn_backlog;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index fd2b35065bb2..a09e466ce11d 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -46,7 +46,6 @@ static int tcp_syn_retries_min = 1;
 static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
 static int ip_ping_group_range_min[] = { 0, 0 };
 static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
-static int comp_sack_nr_max = 255;
 static u32 u32_max_div_HZ = UINT_MAX / HZ;
 static int one_day_secs = 24 * 3600;
 
@@ -1330,11 +1329,10 @@ static struct ctl_table ipv4_net_table[] = {
 	{
 		.procname	= "tcp_comp_sack_nr",
 		.data		= &init_net.ipv4.sysctl_tcp_comp_sack_nr,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dou8vec_minmax,
 		.extra1		= SYSCTL_ZERO,
-		.extra2		= &comp_sack_nr_max,
 	},
 	{
 		.procname       = "tcp_reflect_tos",
-- 
cgit v1.2.3


From a6175633a2af0eae07127311563d2a75096c111a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 31 Mar 2021 10:52:12 -0700
Subject: ipv6: convert elligible sysctls to u8

Convert most sysctls that can fit in a byte.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv6.h   | 24 ++++++++++++------------
 net/ipv6/icmp.c            | 12 ++++++------
 net/ipv6/sysctl_net_ipv6.c | 38 ++++++++++++++++++--------------------
 3 files changed, 36 insertions(+), 38 deletions(-)

diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 21c0debbd39e..84f4a8bec397 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -20,7 +20,6 @@ struct netns_sysctl_ipv6 {
 	struct ctl_table_header *frags_hdr;
 	struct ctl_table_header *xfrm6_hdr;
 #endif
-	int bindv6only;
 	int flush_delay;
 	int ip6_rt_max_size;
 	int ip6_rt_gc_min_interval;
@@ -29,21 +28,22 @@ struct netns_sysctl_ipv6 {
 	int ip6_rt_gc_elasticity;
 	int ip6_rt_mtu_expires;
 	int ip6_rt_min_advmss;
-	int multipath_hash_policy;
-	int flowlabel_consistency;
-	int auto_flowlabels;
+	u8 bindv6only;
+	u8 multipath_hash_policy;
+	u8 flowlabel_consistency;
+	u8 auto_flowlabels;
 	int icmpv6_time;
-	int icmpv6_echo_ignore_all;
-	int icmpv6_echo_ignore_multicast;
-	int icmpv6_echo_ignore_anycast;
+	u8 icmpv6_echo_ignore_all;
+	u8 icmpv6_echo_ignore_multicast;
+	u8 icmpv6_echo_ignore_anycast;
 	DECLARE_BITMAP(icmpv6_ratemask, ICMPV6_MSG_MAX + 1);
 	unsigned long *icmpv6_ratemask_ptr;
-	int anycast_src_echo_reply;
-	int ip_nonlocal_bind;
-	int fwmark_reflect;
+	u8 anycast_src_echo_reply;
+	u8 ip_nonlocal_bind;
+	u8 fwmark_reflect;
+	u8 flowlabel_state_ranges;
 	int idgen_retries;
 	int idgen_delay;
-	int flowlabel_state_ranges;
 	int flowlabel_reflect;
 	int max_dst_opts_cnt;
 	int max_hbh_opts_cnt;
@@ -51,7 +51,7 @@ struct netns_sysctl_ipv6 {
 	int max_hbh_opts_len;
 	int seg6_flowlabel;
 	bool skip_notify_on_dev_down;
-	int fib_notify_on_flag_change;
+	u8 fib_notify_on_flag_change;
 };
 
 struct netns_ipv6 {
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 29d38d6b55fb..1bca2b09d77e 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -1169,23 +1169,23 @@ static struct ctl_table ipv6_icmp_table_template[] = {
 	{
 		.procname	= "echo_ignore_all",
 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler = proc_dointvec,
+		.proc_handler = proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "echo_ignore_multicast",
 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler = proc_dointvec,
+		.proc_handler = proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "echo_ignore_anycast",
 		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler = proc_dointvec,
+		.proc_handler = proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "ratemask",
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 263ab43ed06b..27102c3d6e1d 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -23,7 +23,6 @@
 
 static int two = 2;
 static int flowlabel_reflect_max = 0x7;
-static int auto_flowlabels_min;
 static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
 
 static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
@@ -34,7 +33,7 @@ static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
 
 	net = container_of(table->data, struct net,
 			   ipv6.sysctl.multipath_hash_policy);
-	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+	ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
 	if (write && ret == 0)
 		call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net);
 
@@ -45,39 +44,38 @@ static struct ctl_table ipv6_table_template[] = {
 	{
 		.procname	= "bindv6only",
 		.data		= &init_net.ipv6.sysctl.bindv6only,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "anycast_src_echo_reply",
 		.data		= &init_net.ipv6.sysctl.anycast_src_echo_reply,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "flowlabel_consistency",
 		.data		= &init_net.ipv6.sysctl.flowlabel_consistency,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "auto_flowlabels",
 		.data		= &init_net.ipv6.sysctl.auto_flowlabels,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &auto_flowlabels_min,
+		.proc_handler	= proc_dou8vec_minmax,
 		.extra2		= &auto_flowlabels_max
 	},
 	{
 		.procname	= "fwmark_reflect",
 		.data		= &init_net.ipv6.sysctl.fwmark_reflect,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "idgen_retries",
@@ -96,16 +94,16 @@ static struct ctl_table ipv6_table_template[] = {
 	{
 		.procname	= "flowlabel_state_ranges",
 		.data		= &init_net.ipv6.sysctl.flowlabel_state_ranges,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "ip_nonlocal_bind",
 		.data		= &init_net.ipv6.sysctl.ip_nonlocal_bind,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	{
 		.procname	= "flowlabel_reflect",
@@ -147,7 +145,7 @@ static struct ctl_table ipv6_table_template[] = {
 	{
 		.procname	= "fib_multipath_hash_policy",
 		.data		= &init_net.ipv6.sysctl.multipath_hash_policy,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
 		.proc_handler   = proc_rt6_multipath_hash_policy,
 		.extra1		= SYSCTL_ZERO,
@@ -163,9 +161,9 @@ static struct ctl_table ipv6_table_template[] = {
 	{
 		.procname	= "fib_notify_on_flag_change",
 		.data		= &init_net.ipv6.sysctl.fib_notify_on_flag_change,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dou8vec_minmax,
 		.extra1         = SYSCTL_ZERO,
 		.extra2         = &two,
 	},
-- 
cgit v1.2.3


From 0dd39d952f75a678b2ebcac8bd60f449f303c755 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 31 Mar 2021 10:52:13 -0700
Subject: ipv6: move ip6_dst_ops first in netns_ipv6

ip6_dst_ops have cache line alignement.

Moving it at beginning of netns_ipv6
removes a 48 byte hole, and shrinks netns_ipv6
from 12 to 11 cache lines.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv6.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 84f4a8bec397..808f0f79ea9c 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -55,6 +55,9 @@ struct netns_sysctl_ipv6 {
 };
 
 struct netns_ipv6 {
+	/* Keep ip6_dst_ops at the beginning of netns_sysctl_ipv6 */
+	struct dst_ops		ip6_dst_ops;
+
 	struct netns_sysctl_ipv6 sysctl;
 	struct ipv6_devconf	*devconf_all;
 	struct ipv6_devconf	*devconf_dflt;
@@ -76,7 +79,6 @@ struct netns_ipv6 {
 	struct hlist_head       *fib_table_hash;
 	struct fib6_table       *fib6_main_tbl;
 	struct list_head	fib6_walkers;
-	struct dst_ops		ip6_dst_ops;
 	rwlock_t		fib6_walker_lock;
 	spinlock_t		fib6_gc_lock;
 	unsigned int		 ip6_rt_gc_expire;
-- 
cgit v1.2.3


From ac1db7acea67777be1ba86e36e058c479eab6508 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 31 Mar 2021 16:36:02 +0800
Subject: net/tipc: fix missing destroy_workqueue() on error in
 tipc_crypto_start()

Add the missing destroy_workqueue() before return from
tipc_crypto_start() in the error handling case.

Fixes: 1ef6f7c9390f ("tipc: add automatic session key exchange")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/crypto.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index 6f64acef73dc..76b8428c94a7 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -1492,6 +1492,8 @@ int tipc_crypto_start(struct tipc_crypto **crypto, struct net *net,
 	/* Allocate statistic structure */
 	c->stats = alloc_percpu_gfp(struct tipc_crypto_stats, GFP_ATOMIC);
 	if (!c->stats) {
+		if (c->wq)
+			destroy_workqueue(c->wq);
 		kfree_sensitive(c);
 		return -ENOMEM;
 	}
-- 
cgit v1.2.3


From 0d7a7b2014b1a499a0fe24c9f3063d7856b5aaaf Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 31 Mar 2021 14:38:11 -0700
Subject: ipv6: remove extra dev_hold() for fallback tunnels

My previous commits added a dev_hold() in tunnels ndo_init(),
but forgot to remove it from special functions setting up fallback tunnels.

Fallback tunnels do call their respective ndo_init()

This leads to various reports like :

unregister_netdevice: waiting for ip6gre0 to become free. Usage count = 2

Fixes: 48bb5697269a ("ip6_tunnel: sit: proper dev_{hold|put} in ndo_[un]init methods")
Fixes: 6289a98f0817 ("sit: proper dev_{hold|put} in ndo_[un]init methods")
Fixes: 40cb881b5aaa ("ip6_vti: proper dev_{hold|put} in ndo_[un]init methods")
Fixes: 7f700334be9a ("ip6_gre: proper dev_{hold|put} in ndo_[un]init methods")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_gre.c    | 3 ---
 net/ipv6/ip6_tunnel.c | 1 -
 net/ipv6/ip6_vti.c    | 1 -
 net/ipv6/sit.c        | 1 -
 4 files changed, 6 deletions(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 9689bf9f46f3..bc224f917bbd 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -387,7 +387,6 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
 	if (!(nt->parms.o_flags & TUNNEL_SEQ))
 		dev->features |= NETIF_F_LLTX;
 
-	dev_hold(dev);
 	ip6gre_tunnel_link(ign, nt);
 	return nt;
 
@@ -1539,8 +1538,6 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev)
 	strcpy(tunnel->parms.name, dev->name);
 
 	tunnel->hlen		= sizeof(struct ipv6hdr) + 4;
-
-	dev_hold(dev);
 }
 
 static struct inet6_protocol ip6gre_protocol __read_mostly = {
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 67ee9d58ec5e..07a0a06a9b52 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1925,7 +1925,6 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
 	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 
 	t->parms.proto = IPPROTO_IPV6;
-	dev_hold(dev);
 
 	rcu_assign_pointer(ip6n->tnls_wc[0], t);
 	return 0;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index a018afdb3e06..856e46ad0895 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -963,7 +963,6 @@ static int __net_init vti6_fb_tnl_dev_init(struct net_device *dev)
 	struct vti6_net *ip6n = net_generic(net, vti6_net_id);
 
 	t->parms.proto = IPPROTO_IPV6;
-	dev_hold(dev);
 
 	rcu_assign_pointer(ip6n->tnls_wc[0], t);
 	return 0;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 488d3181aec3..ff2ca2e7c7f5 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1470,7 +1470,6 @@ static void __net_init ipip6_fb_tunnel_init(struct net_device *dev)
 	iph->ihl		= 5;
 	iph->ttl		= 64;
 
-	dev_hold(dev);
 	rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
 }
 
-- 
cgit v1.2.3


From 2fa423f5f0c6891effd4d5c8bdb91d418001da11 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 31 Mar 2021 23:08:49 +0300
Subject: net: enetc: consume the error RX buffer descriptors in a dedicated
 function

We can and should check the RX BD errors before starting to build the
skb. The only apparent reason why things are done in this backwards
order is to spare one call to enetc_rxbd_next.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 43 +++++++++++++++++-----------
 1 file changed, 27 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 5a54976e6a28..362cfba7ce14 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -605,6 +605,28 @@ static void enetc_add_rx_buff_to_skb(struct enetc_bdr *rx_ring, int i,
 	enetc_put_rx_buff(rx_ring, rx_swbd);
 }
 
+static bool enetc_check_bd_errors_and_consume(struct enetc_bdr *rx_ring,
+					      u32 bd_status,
+					      union enetc_rx_bd **rxbd, int *i)
+{
+	if (likely(!(bd_status & ENETC_RXBD_LSTATUS(ENETC_RXBD_ERR_MASK))))
+		return false;
+
+	enetc_rxbd_next(rx_ring, rxbd, i);
+
+	while (!(bd_status & ENETC_RXBD_LSTATUS_F)) {
+		dma_rmb();
+		bd_status = le32_to_cpu((*rxbd)->r.lstatus);
+
+		enetc_rxbd_next(rx_ring, rxbd, i);
+	}
+
+	rx_ring->ndev->stats.rx_dropped++;
+	rx_ring->ndev->stats.rx_errors++;
+
+	return true;
+}
+
 #define ENETC_RXBD_BUNDLE 16 /* # of BDs to update at once */
 
 static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
@@ -634,6 +656,11 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 
 		enetc_wr_reg_hot(rx_ring->idr, BIT(rx_ring->index));
 		dma_rmb(); /* for reading other rxbd fields */
+
+		if (enetc_check_bd_errors_and_consume(rx_ring, bd_status,
+						      &rxbd, &i))
+			break;
+
 		size = le16_to_cpu(rxbd->r.buf_len);
 		skb = enetc_map_rx_buff_to_skb(rx_ring, i, size);
 		if (!skb)
@@ -645,22 +672,6 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 
 		enetc_rxbd_next(rx_ring, &rxbd, &i);
 
-		if (unlikely(bd_status &
-			     ENETC_RXBD_LSTATUS(ENETC_RXBD_ERR_MASK))) {
-			dev_kfree_skb(skb);
-			while (!(bd_status & ENETC_RXBD_LSTATUS_F)) {
-				dma_rmb();
-				bd_status = le32_to_cpu(rxbd->r.lstatus);
-
-				enetc_rxbd_next(rx_ring, &rxbd, &i);
-			}
-
-			rx_ring->ndev->stats.rx_dropped++;
-			rx_ring->ndev->stats.rx_errors++;
-
-			break;
-		}
-
 		/* not last BD in frame? */
 		while (!(bd_status & ENETC_RXBD_LSTATUS_F)) {
 			bd_status = le32_to_cpu(rxbd->r.lstatus);
-- 
cgit v1.2.3


From a800abd3ecb9acc55821f7ac9bba6c956b36a595 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 31 Mar 2021 23:08:50 +0300
Subject: net: enetc: move skb creation into enetc_build_skb

We need to build an skb from two code paths now: from the plain RX data
path and from the XDP data path when the verdict is XDP_PASS.

Create a new enetc_build_skb function which contains the essential steps
for building an skb based on the first and last positions of buffer
descriptors within the RX ring.

We also squash the enetc_process_skb function into enetc_build_skb,
because what that function did wasn't very meaningful on its own.

The "rx_frm_cnt++" instruction has been moved around napi_gro_receive
for cosmetic reasons, to be in the same spot as rx_byte_cnt++, which
itself must be before napi_gro_receive, because that's when we lose
ownership of the skb.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 81 +++++++++++++++-------------
 1 file changed, 44 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 362cfba7ce14..b2071b8dc316 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -513,13 +513,6 @@ static void enetc_get_offloads(struct enetc_bdr *rx_ring,
 #endif
 }
 
-static void enetc_process_skb(struct enetc_bdr *rx_ring,
-			      struct sk_buff *skb)
-{
-	skb_record_rx_queue(skb, rx_ring->index);
-	skb->protocol = eth_type_trans(skb, rx_ring->ndev);
-}
-
 static bool enetc_page_reusable(struct page *page)
 {
 	return (!page_is_pfmemalloc(page) && page_ref_count(page) == 1);
@@ -627,6 +620,47 @@ static bool enetc_check_bd_errors_and_consume(struct enetc_bdr *rx_ring,
 	return true;
 }
 
+static struct sk_buff *enetc_build_skb(struct enetc_bdr *rx_ring,
+				       u32 bd_status, union enetc_rx_bd **rxbd,
+				       int *i, int *cleaned_cnt)
+{
+	struct sk_buff *skb;
+	u16 size;
+
+	size = le16_to_cpu((*rxbd)->r.buf_len);
+	skb = enetc_map_rx_buff_to_skb(rx_ring, *i, size);
+	if (!skb)
+		return NULL;
+
+	enetc_get_offloads(rx_ring, *rxbd, skb);
+
+	(*cleaned_cnt)++;
+
+	enetc_rxbd_next(rx_ring, rxbd, i);
+
+	/* not last BD in frame? */
+	while (!(bd_status & ENETC_RXBD_LSTATUS_F)) {
+		bd_status = le32_to_cpu((*rxbd)->r.lstatus);
+		size = ENETC_RXB_DMA_SIZE;
+
+		if (bd_status & ENETC_RXBD_LSTATUS_F) {
+			dma_rmb();
+			size = le16_to_cpu((*rxbd)->r.buf_len);
+		}
+
+		enetc_add_rx_buff_to_skb(rx_ring, *i, size, skb);
+
+		(*cleaned_cnt)++;
+
+		enetc_rxbd_next(rx_ring, rxbd, i);
+	}
+
+	skb_record_rx_queue(skb, rx_ring->index);
+	skb->protocol = eth_type_trans(skb, rx_ring->ndev);
+
+	return skb;
+}
+
 #define ENETC_RXBD_BUNDLE 16 /* # of BDs to update at once */
 
 static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
@@ -643,7 +677,6 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 		union enetc_rx_bd *rxbd;
 		struct sk_buff *skb;
 		u32 bd_status;
-		u16 size;
 
 		if (cleaned_cnt >= ENETC_RXBD_BUNDLE)
 			cleaned_cnt -= enetc_refill_rx_ring(rx_ring,
@@ -661,41 +694,15 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 						      &rxbd, &i))
 			break;
 
-		size = le16_to_cpu(rxbd->r.buf_len);
-		skb = enetc_map_rx_buff_to_skb(rx_ring, i, size);
+		skb = enetc_build_skb(rx_ring, bd_status, &rxbd, &i,
+				      &cleaned_cnt);
 		if (!skb)
 			break;
 
-		enetc_get_offloads(rx_ring, rxbd, skb);
-
-		cleaned_cnt++;
-
-		enetc_rxbd_next(rx_ring, &rxbd, &i);
-
-		/* not last BD in frame? */
-		while (!(bd_status & ENETC_RXBD_LSTATUS_F)) {
-			bd_status = le32_to_cpu(rxbd->r.lstatus);
-			size = ENETC_RXB_DMA_SIZE;
-
-			if (bd_status & ENETC_RXBD_LSTATUS_F) {
-				dma_rmb();
-				size = le16_to_cpu(rxbd->r.buf_len);
-			}
-
-			enetc_add_rx_buff_to_skb(rx_ring, i, size, skb);
-
-			cleaned_cnt++;
-
-			enetc_rxbd_next(rx_ring, &rxbd, &i);
-		}
-
 		rx_byte_cnt += skb->len;
-
-		enetc_process_skb(rx_ring, skb);
+		rx_frm_cnt++;
 
 		napi_gro_receive(napi, skb);
-
-		rx_frm_cnt++;
 	}
 
 	rx_ring->next_to_clean = i;
-- 
cgit v1.2.3


From d504498d2eb3bfcbef4ddf3f51eb9f1391c8149f Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 31 Mar 2021 23:08:51 +0300
Subject: net: enetc: add a dedicated is_eof bit in the TX software BD

In the transmit path, if we have a scatter/gather frame, it is put into
multiple software buffer descriptors, the last of which has the skb
pointer populated (which is necessary for rearming the TX MSI vector and
for collecting the two-step TX timestamp from the TX confirmation path).

At the moment, this is sufficient, but with XDP_TX, we'll need to
service TX software buffer descriptors that don't have an skb pointer,
however they might be final nonetheless. So add a dedicated bit for
final software BDs that we populate and check explicitly. Also, we keep
looking just for an skb when doing TX timestamping, because we don't
want/need that for XDP.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 7 +++----
 drivers/net/ethernet/freescale/enetc/enetc.h | 1 +
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index b2071b8dc316..37d2d142a744 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -157,6 +157,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
 	temp_bd.flags = flags;
 	*txbd = temp_bd;
 
+	tx_ring->tx_swbd[i].is_eof = true;
 	tx_ring->tx_swbd[i].skb = skb;
 
 	enetc_bdr_idx_inc(tx_ring, &i);
@@ -316,8 +317,6 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
 	do_tstamp = false;
 
 	while (bds_to_clean && tx_frm_cnt < ENETC_DEFAULT_TX_WORK) {
-		bool is_eof = !!tx_swbd->skb;
-
 		if (unlikely(tx_swbd->check_wb)) {
 			struct enetc_ndev_priv *priv = netdev_priv(ndev);
 			union enetc_tx_bd *txbd;
@@ -335,7 +334,7 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
 		if (likely(tx_swbd->dma))
 			enetc_unmap_tx_buff(tx_ring, tx_swbd);
 
-		if (is_eof) {
+		if (tx_swbd->skb) {
 			if (unlikely(do_tstamp)) {
 				enetc_tstamp_tx(tx_swbd->skb, tstamp);
 				do_tstamp = false;
@@ -355,7 +354,7 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
 		}
 
 		/* BD iteration loop end */
-		if (is_eof) {
+		if (tx_swbd->is_eof) {
 			tx_frm_cnt++;
 			/* re-arm interrupt source */
 			enetc_wr_reg_hot(tx_ring->idr, BIT(tx_ring->index) |
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index 773e412b9f4e..d9e75644b89c 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -25,6 +25,7 @@ struct enetc_tx_swbd {
 	u8 is_dma_page:1;
 	u8 check_wb:1;
 	u8 do_tstamp:1;
+	u8 is_eof:1;
 };
 
 #define ENETC_RX_MAXFRM_SIZE	ENETC_MAC_MAXFRM_SIZE
-- 
cgit v1.2.3


From 1ee8d6f3bebbdaa7692732c91685b27ae4c612be Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 31 Mar 2021 23:08:52 +0300
Subject: net: enetc: clean the TX software BD on the TX confirmation path

With the future introduction of some new fields into enetc_tx_swbd such
as is_xdp_tx, is_xdp_redirect etc, we need not only to set these bits
to true from the XDP_TX/XDP_REDIRECT code path, but also to false from
the old code paths.

This is because TX software buffer descriptors are kept in a ring that
is shadow of the hardware TX ring, so these structures keep getting
reused, and there is always the possibility that when a software BD is
reused (after we ran a full circle through the TX ring), the old user of
the tx_swbd had set is_xdp_tx = true, and now we are sending a regular
skb, which would need to set is_xdp_tx = false.

To be minimally invasive to the old code paths, let's just scrub the
software TX BD in the TX confirmation path (enetc_clean_tx_ring), once
we know that nobody uses this software TX BD (tx_ring->next_to_clean
hasn't yet been updated, and the TX paths check enetc_bd_unused which
tells them if there's any more space in the TX ring for a new enqueue).

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 37d2d142a744..ade05518b496 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -344,6 +344,10 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
 		}
 
 		tx_byte_cnt += tx_swbd->len;
+		/* Scrub the swbd here so we don't have to do that
+		 * when we reuse it during xmit
+		 */
+		memset(tx_swbd, 0, sizeof(*tx_swbd));
 
 		bds_to_clean--;
 		tx_swbd++;
-- 
cgit v1.2.3


From 65d0cbb414cee012ceee9991d09f5e7c30b49fcc Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 31 Mar 2021 23:08:53 +0300
Subject: net: enetc: move up enetc_reuse_page and enetc_page_reusable

For XDP_TX, we need to call enetc_reuse_page from enetc_clean_tx_ring,
so we need to avoid a forward declaration.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 38 ++++++++++++++--------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index ade05518b496..38301d0d7f0c 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -275,6 +275,25 @@ static int enetc_bd_ready_count(struct enetc_bdr *tx_ring, int ci)
 	return pi >= ci ? pi - ci : tx_ring->bd_count - ci + pi;
 }
 
+static bool enetc_page_reusable(struct page *page)
+{
+	return (!page_is_pfmemalloc(page) && page_ref_count(page) == 1);
+}
+
+static void enetc_reuse_page(struct enetc_bdr *rx_ring,
+			     struct enetc_rx_swbd *old)
+{
+	struct enetc_rx_swbd *new;
+
+	new = &rx_ring->rx_swbd[rx_ring->next_to_alloc];
+
+	/* next buf that may reuse a page */
+	enetc_bdr_idx_inc(rx_ring, &rx_ring->next_to_alloc);
+
+	/* copy page reference */
+	*new = *old;
+}
+
 static void enetc_get_tx_tstamp(struct enetc_hw *hw, union enetc_tx_bd *txbd,
 				u64 *tstamp)
 {
@@ -516,25 +535,6 @@ static void enetc_get_offloads(struct enetc_bdr *rx_ring,
 #endif
 }
 
-static bool enetc_page_reusable(struct page *page)
-{
-	return (!page_is_pfmemalloc(page) && page_ref_count(page) == 1);
-}
-
-static void enetc_reuse_page(struct enetc_bdr *rx_ring,
-			     struct enetc_rx_swbd *old)
-{
-	struct enetc_rx_swbd *new;
-
-	new = &rx_ring->rx_swbd[rx_ring->next_to_alloc];
-
-	/* next buf that may reuse a page */
-	enetc_bdr_idx_inc(rx_ring, &rx_ring->next_to_alloc);
-
-	/* copy page reference */
-	*new = *old;
-}
-
 static struct enetc_rx_swbd *enetc_get_rx_buff(struct enetc_bdr *rx_ring,
 					       int i, u16 size)
 {
-- 
cgit v1.2.3


From d1b15102dd16adc17fd5e4db8a485e6459f98906 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 31 Mar 2021 23:08:54 +0300
Subject: net: enetc: add support for XDP_DROP and XDP_PASS

For the RX ring, enetc uses an allocation scheme based on pages split
into two buffers, which is already very efficient in terms of preventing
reallocations / maximizing reuse, so I see no reason why I would change
that.

 +--------+--------+--------+--------+--------+--------+--------+
 |        |        |        |        |        |        |        |
 | half B | half B | half B | half B | half B | half B | half B |
 |        |        |        |        |        |        |        |
 +--------+--------+--------+--------+--------+--------+--------+
 |        |        |        |        |        |        |        |
 | half A | half A | half A | half A | half A | half A | half A | RX ring
 |        |        |        |        |        |        |        |
 +--------+--------+--------+--------+--------+--------+--------+
     ^                                                     ^
     |                                                     |
 next_to_clean                                       next_to_alloc
                                                      next_to_use

                   +--------+--------+--------+--------+--------+
                   |        |        |        |        |        |
                   | half B | half B | half B | half B | half B |
                   |        |        |        |        |        |
 +--------+--------+--------+--------+--------+--------+--------+
 |        |        |        |        |        |        |        |
 | half B | half B | half A | half A | half A | half A | half A | RX ring
 |        |        |        |        |        |        |        |
 +--------+--------+--------+--------+--------+--------+--------+
 |        |        |   ^                                   ^
 | half A | half A |   |                                   |
 |        |        | next_to_clean                   next_to_use
 +--------+--------+
              ^
              |
         next_to_alloc

then when enetc_refill_rx_ring is called, whose purpose is to advance
next_to_use, it sees that it can take buffers up to next_to_alloc, and
it says "oh, hey, rx_swbd->page isn't NULL, I don't need to allocate
one!".

The only problem is that for default PAGE_SIZE values of 4096, buffer
sizes are 2048 bytes. While this is enough for normal skb allocations at
an MTU of 1500 bytes, for XDP it isn't, because the XDP headroom is 256
bytes, and including skb_shared_info and alignment, we end up being able
to make use of only 1472 bytes, which is insufficient for the default
MTU.

To solve that problem, we implement scatter/gather processing in the
driver, because we would really like to keep the existing allocation
scheme. A packet of 1500 bytes is received in a buffer of 1472 bytes and
another one of 28 bytes.

Because the headroom required by XDP is different (and much larger) than
the one required by the network stack, whenever a BPF program is added
or deleted on the port, we drain the existing RX buffers and seed new
ones with the required headroom. We also keep the required headroom in
rx_ring->buffer_offset.

The simplest way to implement XDP_PASS, where an skb must be created, is
to create an xdp_buff based on the next_to_clean RX BDs, but not clear
those BDs from the RX ring yet, just keep the original index at which
the BDs for this frame started. Then, if the verdict is XDP_PASS,
instead of converting the xdb_buff to an skb, we replay a call to
enetc_build_skb (just as in the normal enetc_clean_rx_ring case),
starting from the original BD index.

We would also like to be minimally invasive to the regular RX data path,
and not check whether there is a BPF program attached to the ring on
every packet. So we create a separate RX ring processing function for
XDP.

Because we only install/remove the BPF program while the interface is
down, we forgo the rcu_read_lock() in enetc_clean_rx_ring, since there
shouldn't be any circumstance in which we are processing packets and
there is a potentially freed BPF program attached to the RX ring.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c       | 284 +++++++++++++++++++--
 drivers/net/ethernet/freescale/enetc/enetc.h       |  14 +
 .../net/ethernet/freescale/enetc/enetc_ethtool.c   |   2 +
 drivers/net/ethernet/freescale/enetc/enetc_pf.c    |   1 +
 4 files changed, 281 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 38301d0d7f0c..58bb0b78585a 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -2,6 +2,7 @@
 /* Copyright 2017-2019 NXP */
 
 #include "enetc.h"
+#include <linux/bpf_trace.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/vmalloc.h>
@@ -420,7 +421,7 @@ static bool enetc_new_page(struct enetc_bdr *rx_ring,
 
 	rx_swbd->dma = addr;
 	rx_swbd->page = page;
-	rx_swbd->page_offset = ENETC_RXB_PAD;
+	rx_swbd->page_offset = rx_ring->buffer_offset;
 
 	return true;
 }
@@ -550,6 +551,8 @@ static void enetc_put_rx_buff(struct enetc_bdr *rx_ring,
 			      struct enetc_rx_swbd *rx_swbd)
 {
 	if (likely(enetc_page_reusable(rx_swbd->page))) {
+		size_t buffer_size = ENETC_RXB_TRUESIZE - rx_ring->buffer_offset;
+
 		rx_swbd->page_offset ^= ENETC_RXB_TRUESIZE;
 		page_ref_inc(rx_swbd->page);
 
@@ -558,8 +561,7 @@ static void enetc_put_rx_buff(struct enetc_bdr *rx_ring,
 		/* sync for use by the device */
 		dma_sync_single_range_for_device(rx_ring->dev, rx_swbd->dma,
 						 rx_swbd->page_offset,
-						 ENETC_RXB_DMA_SIZE,
-						 DMA_FROM_DEVICE);
+						 buffer_size, DMA_FROM_DEVICE);
 	} else {
 		dma_unmap_page(rx_ring->dev, rx_swbd->dma,
 			       PAGE_SIZE, DMA_FROM_DEVICE);
@@ -576,13 +578,13 @@ static struct sk_buff *enetc_map_rx_buff_to_skb(struct enetc_bdr *rx_ring,
 	void *ba;
 
 	ba = page_address(rx_swbd->page) + rx_swbd->page_offset;
-	skb = build_skb(ba - ENETC_RXB_PAD, ENETC_RXB_TRUESIZE);
+	skb = build_skb(ba - rx_ring->buffer_offset, ENETC_RXB_TRUESIZE);
 	if (unlikely(!skb)) {
 		rx_ring->stats.rx_alloc_errs++;
 		return NULL;
 	}
 
-	skb_reserve(skb, ENETC_RXB_PAD);
+	skb_reserve(skb, rx_ring->buffer_offset);
 	__skb_put(skb, size);
 
 	enetc_put_rx_buff(rx_ring, rx_swbd);
@@ -625,7 +627,7 @@ static bool enetc_check_bd_errors_and_consume(struct enetc_bdr *rx_ring,
 
 static struct sk_buff *enetc_build_skb(struct enetc_bdr *rx_ring,
 				       u32 bd_status, union enetc_rx_bd **rxbd,
-				       int *i, int *cleaned_cnt)
+				       int *i, int *cleaned_cnt, int buffer_size)
 {
 	struct sk_buff *skb;
 	u16 size;
@@ -644,7 +646,7 @@ static struct sk_buff *enetc_build_skb(struct enetc_bdr *rx_ring,
 	/* not last BD in frame? */
 	while (!(bd_status & ENETC_RXBD_LSTATUS_F)) {
 		bd_status = le32_to_cpu((*rxbd)->r.lstatus);
-		size = ENETC_RXB_DMA_SIZE;
+		size = buffer_size;
 
 		if (bd_status & ENETC_RXBD_LSTATUS_F) {
 			dma_rmb();
@@ -698,7 +700,7 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 			break;
 
 		skb = enetc_build_skb(rx_ring, bd_status, &rxbd, &i,
-				      &cleaned_cnt);
+				      &cleaned_cnt, ENETC_RXB_DMA_SIZE);
 		if (!skb)
 			break;
 
@@ -716,10 +718,174 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 	return rx_frm_cnt;
 }
 
+static void enetc_map_rx_buff_to_xdp(struct enetc_bdr *rx_ring, int i,
+				     struct xdp_buff *xdp_buff, u16 size)
+{
+	struct enetc_rx_swbd *rx_swbd = enetc_get_rx_buff(rx_ring, i, size);
+	void *hard_start = page_address(rx_swbd->page) + rx_swbd->page_offset;
+	struct skb_shared_info *shinfo;
+
+	xdp_prepare_buff(xdp_buff, hard_start - rx_ring->buffer_offset,
+			 rx_ring->buffer_offset, size, false);
+
+	shinfo = xdp_get_shared_info_from_buff(xdp_buff);
+	shinfo->nr_frags = 0;
+}
+
+static void enetc_add_rx_buff_to_xdp(struct enetc_bdr *rx_ring, int i,
+				     u16 size, struct xdp_buff *xdp_buff)
+{
+	struct skb_shared_info *shinfo = xdp_get_shared_info_from_buff(xdp_buff);
+	struct enetc_rx_swbd *rx_swbd = enetc_get_rx_buff(rx_ring, i, size);
+	skb_frag_t *frag = &shinfo->frags[shinfo->nr_frags];
+
+	skb_frag_off_set(frag, rx_swbd->page_offset);
+	skb_frag_size_set(frag, size);
+	__skb_frag_set_page(frag, rx_swbd->page);
+
+	shinfo->nr_frags++;
+}
+
+static void enetc_build_xdp_buff(struct enetc_bdr *rx_ring, u32 bd_status,
+				 union enetc_rx_bd **rxbd, int *i,
+				 int *cleaned_cnt, struct xdp_buff *xdp_buff)
+{
+	u16 size = le16_to_cpu((*rxbd)->r.buf_len);
+
+	xdp_init_buff(xdp_buff, ENETC_RXB_TRUESIZE, &rx_ring->xdp.rxq);
+
+	enetc_map_rx_buff_to_xdp(rx_ring, *i, xdp_buff, size);
+	(*cleaned_cnt)++;
+	enetc_rxbd_next(rx_ring, rxbd, i);
+
+	/* not last BD in frame? */
+	while (!(bd_status & ENETC_RXBD_LSTATUS_F)) {
+		bd_status = le32_to_cpu((*rxbd)->r.lstatus);
+		size = ENETC_RXB_DMA_SIZE_XDP;
+
+		if (bd_status & ENETC_RXBD_LSTATUS_F) {
+			dma_rmb();
+			size = le16_to_cpu((*rxbd)->r.buf_len);
+		}
+
+		enetc_add_rx_buff_to_xdp(rx_ring, *i, size, xdp_buff);
+		(*cleaned_cnt)++;
+		enetc_rxbd_next(rx_ring, rxbd, i);
+	}
+}
+
+/* Reuse the current page without performing half-page buffer flipping */
+static void enetc_put_xdp_buff(struct enetc_bdr *rx_ring,
+			       struct enetc_rx_swbd *rx_swbd)
+{
+	enetc_reuse_page(rx_ring, rx_swbd);
+
+	/* sync for use by the device */
+	dma_sync_single_range_for_device(rx_ring->dev, rx_swbd->dma,
+					 rx_swbd->page_offset,
+					 ENETC_RXB_DMA_SIZE_XDP,
+					 DMA_FROM_DEVICE);
+
+	rx_swbd->page = NULL;
+}
+
+static void enetc_xdp_drop(struct enetc_bdr *rx_ring, int rx_ring_first,
+			   int rx_ring_last)
+{
+	while (rx_ring_first != rx_ring_last) {
+		enetc_put_xdp_buff(rx_ring,
+				   &rx_ring->rx_swbd[rx_ring_first]);
+		enetc_bdr_idx_inc(rx_ring, &rx_ring_first);
+	}
+	rx_ring->stats.xdp_drops++;
+}
+
+static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
+				   struct napi_struct *napi, int work_limit,
+				   struct bpf_prog *prog)
+{
+	int rx_frm_cnt = 0, rx_byte_cnt = 0;
+	int cleaned_cnt, i;
+	u32 xdp_act;
+
+	cleaned_cnt = enetc_bd_unused(rx_ring);
+	/* next descriptor to process */
+	i = rx_ring->next_to_clean;
+
+	while (likely(rx_frm_cnt < work_limit)) {
+		union enetc_rx_bd *rxbd, *orig_rxbd;
+		int orig_i, orig_cleaned_cnt;
+		struct xdp_buff xdp_buff;
+		struct sk_buff *skb;
+		u32 bd_status;
+
+		if (cleaned_cnt >= ENETC_RXBD_BUNDLE)
+			cleaned_cnt -= enetc_refill_rx_ring(rx_ring,
+							    cleaned_cnt);
+
+		rxbd = enetc_rxbd(rx_ring, i);
+		bd_status = le32_to_cpu(rxbd->r.lstatus);
+		if (!bd_status)
+			break;
+
+		enetc_wr_reg_hot(rx_ring->idr, BIT(rx_ring->index));
+		dma_rmb(); /* for reading other rxbd fields */
+
+		if (enetc_check_bd_errors_and_consume(rx_ring, bd_status,
+						      &rxbd, &i))
+			break;
+
+		orig_rxbd = rxbd;
+		orig_cleaned_cnt = cleaned_cnt;
+		orig_i = i;
+
+		enetc_build_xdp_buff(rx_ring, bd_status, &rxbd, &i,
+				     &cleaned_cnt, &xdp_buff);
+
+		xdp_act = bpf_prog_run_xdp(prog, &xdp_buff);
+
+		switch (xdp_act) {
+		case XDP_ABORTED:
+			trace_xdp_exception(rx_ring->ndev, prog, xdp_act);
+			fallthrough;
+		case XDP_DROP:
+			enetc_xdp_drop(rx_ring, orig_i, i);
+			break;
+		case XDP_PASS:
+			rxbd = orig_rxbd;
+			cleaned_cnt = orig_cleaned_cnt;
+			i = orig_i;
+
+			skb = enetc_build_skb(rx_ring, bd_status, &rxbd,
+					      &i, &cleaned_cnt,
+					      ENETC_RXB_DMA_SIZE_XDP);
+			if (unlikely(!skb))
+				/* Exit the switch/case, not the loop */
+				break;
+
+			napi_gro_receive(napi, skb);
+			break;
+		default:
+			bpf_warn_invalid_xdp_action(xdp_act);
+		}
+
+		rx_frm_cnt++;
+	}
+
+	rx_ring->next_to_clean = i;
+
+	rx_ring->stats.packets += rx_frm_cnt;
+	rx_ring->stats.bytes += rx_byte_cnt;
+
+	return rx_frm_cnt;
+}
+
 static int enetc_poll(struct napi_struct *napi, int budget)
 {
 	struct enetc_int_vector
 		*v = container_of(napi, struct enetc_int_vector, napi);
+	struct enetc_bdr *rx_ring = &v->rx_ring;
+	struct bpf_prog *prog;
 	bool complete = true;
 	int work_done;
 	int i;
@@ -730,7 +896,11 @@ static int enetc_poll(struct napi_struct *napi, int budget)
 		if (!enetc_clean_tx_ring(&v->tx_ring[i], budget))
 			complete = false;
 
-	work_done = enetc_clean_rx_ring(&v->rx_ring, napi, budget);
+	prog = rx_ring->xdp.prog;
+	if (prog)
+		work_done = enetc_clean_rx_ring_xdp(rx_ring, napi, budget, prog);
+	else
+		work_done = enetc_clean_rx_ring(rx_ring, napi, budget);
 	if (work_done == budget)
 		complete = false;
 	if (work_done)
@@ -1120,7 +1290,10 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring)
 	enetc_rxbdr_wr(hw, idx, ENETC_RBLENR,
 		       ENETC_RTBLENR_LEN(rx_ring->bd_count));
 
-	enetc_rxbdr_wr(hw, idx, ENETC_RBBSR, ENETC_RXB_DMA_SIZE);
+	if (rx_ring->xdp.prog)
+		enetc_rxbdr_wr(hw, idx, ENETC_RBBSR, ENETC_RXB_DMA_SIZE_XDP);
+	else
+		enetc_rxbdr_wr(hw, idx, ENETC_RBBSR, ENETC_RXB_DMA_SIZE);
 
 	enetc_rxbdr_wr(hw, idx, ENETC_RBPIR, 0);
 
@@ -1511,6 +1684,54 @@ int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
 	}
 }
 
+static int enetc_setup_xdp_prog(struct net_device *dev, struct bpf_prog *prog,
+				struct netlink_ext_ack *extack)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(dev);
+	struct bpf_prog *old_prog;
+	bool is_up;
+	int i;
+
+	/* The buffer layout is changing, so we need to drain the old
+	 * RX buffers and seed new ones.
+	 */
+	is_up = netif_running(dev);
+	if (is_up)
+		dev_close(dev);
+
+	old_prog = xchg(&priv->xdp_prog, prog);
+	if (old_prog)
+		bpf_prog_put(old_prog);
+
+	for (i = 0; i < priv->num_rx_rings; i++) {
+		struct enetc_bdr *rx_ring = priv->rx_ring[i];
+
+		rx_ring->xdp.prog = prog;
+
+		if (prog)
+			rx_ring->buffer_offset = XDP_PACKET_HEADROOM;
+		else
+			rx_ring->buffer_offset = ENETC_RXB_PAD;
+	}
+
+	if (is_up)
+		return dev_open(dev, extack);
+
+	return 0;
+}
+
+int enetc_setup_bpf(struct net_device *dev, struct netdev_bpf *xdp)
+{
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return enetc_setup_xdp_prog(dev, xdp->prog, xdp->extack);
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 struct net_device_stats *enetc_get_stats(struct net_device *ndev)
 {
 	struct enetc_ndev_priv *priv = netdev_priv(ndev);
@@ -1727,6 +1948,28 @@ int enetc_alloc_msix(struct enetc_ndev_priv *priv)
 
 		priv->int_vector[i] = v;
 
+		bdr = &v->rx_ring;
+		bdr->index = i;
+		bdr->ndev = priv->ndev;
+		bdr->dev = priv->dev;
+		bdr->bd_count = priv->rx_bd_count;
+		bdr->buffer_offset = ENETC_RXB_PAD;
+		priv->rx_ring[i] = bdr;
+
+		err = xdp_rxq_info_reg(&bdr->xdp.rxq, priv->ndev, i, 0);
+		if (err) {
+			kfree(v);
+			goto fail;
+		}
+
+		err = xdp_rxq_info_reg_mem_model(&bdr->xdp.rxq,
+						 MEM_TYPE_PAGE_SHARED, NULL);
+		if (err) {
+			xdp_rxq_info_unreg(&bdr->xdp.rxq);
+			kfree(v);
+			goto fail;
+		}
+
 		/* init defaults for adaptive IC */
 		if (priv->ic_mode & ENETC_IC_RX_ADAPTIVE) {
 			v->rx_ictt = 0x1;
@@ -1754,22 +1997,20 @@ int enetc_alloc_msix(struct enetc_ndev_priv *priv)
 			bdr->bd_count = priv->tx_bd_count;
 			priv->tx_ring[idx] = bdr;
 		}
-
-		bdr = &v->rx_ring;
-		bdr->index = i;
-		bdr->ndev = priv->ndev;
-		bdr->dev = priv->dev;
-		bdr->bd_count = priv->rx_bd_count;
-		priv->rx_ring[i] = bdr;
 	}
 
 	return 0;
 
 fail:
 	while (i--) {
-		netif_napi_del(&priv->int_vector[i]->napi);
-		cancel_work_sync(&priv->int_vector[i]->rx_dim.work);
-		kfree(priv->int_vector[i]);
+		struct enetc_int_vector *v = priv->int_vector[i];
+		struct enetc_bdr *rx_ring = &v->rx_ring;
+
+		xdp_rxq_info_unreg_mem_model(&rx_ring->xdp.rxq);
+		xdp_rxq_info_unreg(&rx_ring->xdp.rxq);
+		netif_napi_del(&v->napi);
+		cancel_work_sync(&v->rx_dim.work);
+		kfree(v);
 	}
 
 	pci_free_irq_vectors(pdev);
@@ -1783,7 +2024,10 @@ void enetc_free_msix(struct enetc_ndev_priv *priv)
 
 	for (i = 0; i < priv->bdr_int_num; i++) {
 		struct enetc_int_vector *v = priv->int_vector[i];
+		struct enetc_bdr *rx_ring = &v->rx_ring;
 
+		xdp_rxq_info_unreg_mem_model(&rx_ring->xdp.rxq);
+		xdp_rxq_info_unreg(&rx_ring->xdp.rxq);
 		netif_napi_del(&v->napi);
 		cancel_work_sync(&v->rx_dim.work);
 	}
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index d9e75644b89c..5815addfe966 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -33,6 +33,8 @@ struct enetc_tx_swbd {
 #define ENETC_RXB_PAD		NET_SKB_PAD /* add extra space if needed */
 #define ENETC_RXB_DMA_SIZE	\
 	(SKB_WITH_OVERHEAD(ENETC_RXB_TRUESIZE) - ENETC_RXB_PAD)
+#define ENETC_RXB_DMA_SIZE_XDP	\
+	(SKB_WITH_OVERHEAD(ENETC_RXB_TRUESIZE) - XDP_PACKET_HEADROOM)
 
 struct enetc_rx_swbd {
 	dma_addr_t dma;
@@ -44,6 +46,12 @@ struct enetc_ring_stats {
 	unsigned int packets;
 	unsigned int bytes;
 	unsigned int rx_alloc_errs;
+	unsigned int xdp_drops;
+};
+
+struct enetc_xdp_data {
+	struct xdp_rxq_info rxq;
+	struct bpf_prog *prog;
 };
 
 #define ENETC_RX_RING_DEFAULT_SIZE	512
@@ -72,6 +80,9 @@ struct enetc_bdr {
 	};
 	void __iomem *idr; /* Interrupt Detect Register pointer */
 
+	int buffer_offset;
+	struct enetc_xdp_data xdp;
+
 	struct enetc_ring_stats stats;
 
 	dma_addr_t bd_dma_base;
@@ -276,6 +287,8 @@ struct enetc_ndev_priv {
 	struct phylink *phylink;
 	int ic_mode;
 	u32 tx_ictt;
+
+	struct bpf_prog *xdp_prog;
 };
 
 /* Messaging */
@@ -315,6 +328,7 @@ int enetc_set_features(struct net_device *ndev,
 int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd);
 int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
 		   void *type_data);
+int enetc_setup_bpf(struct net_device *dev, struct netdev_bpf *xdp);
 
 /* ethtool */
 void enetc_set_ethtool_ops(struct net_device *ndev);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
index 89e558135432..0183c13f8c1e 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
@@ -192,6 +192,7 @@ static const struct {
 static const char rx_ring_stats[][ETH_GSTRING_LEN] = {
 	"Rx ring %2d frames",
 	"Rx ring %2d alloc errors",
+	"Rx ring %2d XDP drops",
 };
 
 static const char tx_ring_stats[][ETH_GSTRING_LEN] = {
@@ -273,6 +274,7 @@ static void enetc_get_ethtool_stats(struct net_device *ndev,
 	for (i = 0; i < priv->num_rx_rings; i++) {
 		data[o++] = priv->rx_ring[i]->stats.packets;
 		data[o++] = priv->rx_ring[i]->stats.rx_alloc_errs;
+		data[o++] = priv->rx_ring[i]->stats.xdp_drops;
 	}
 
 	if (!enetc_si_is_pf(priv->si))
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index 5e95afd61c87..0484dbe13422 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -707,6 +707,7 @@ static const struct net_device_ops enetc_ndev_ops = {
 	.ndo_set_features	= enetc_pf_set_features,
 	.ndo_do_ioctl		= enetc_ioctl,
 	.ndo_setup_tc		= enetc_setup_tc,
+	.ndo_bpf		= enetc_setup_bpf,
 };
 
 static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev,
-- 
cgit v1.2.3


From 7ed2bc80074ed4ed30e0cab323305bde851f7a87 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 31 Mar 2021 23:08:55 +0300
Subject: net: enetc: add support for XDP_TX

For reflecting packets back into the interface they came from, we create
an array of TX software BDs derived from the RX software BDs. Therefore,
we need to extend the TX software BD structure to contain most of the
stuff that's already present in the RX software BD structure, for
reasons that will become evident in a moment.

For a frame with the XDP_TX verdict, we don't reuse any buffer right
away as we do for XDP_DROP (the same page half) or XDP_PASS (the other
page half, same as the skb code path).

Because the buffer transfers ownership from the RX ring to the TX ring,
reusing any page half right away is very dangerous. So what we can do is
we can recycle the same page half as soon as TX is complete.

The code path is:
enetc_poll
-> enetc_clean_rx_ring_xdp
   -> enetc_xdp_tx
   -> enetc_refill_rx_ring
(time passes, another MSI interrupt is raised)
enetc_poll
-> enetc_clean_tx_ring
   -> enetc_recycle_xdp_tx_buff

But that creates a problem, because there is a potentially large time
window between enetc_xdp_tx and enetc_recycle_xdp_tx_buff, period in
which we'll have less and less RX buffers.

Basically, when the ship starts sinking, the knee-jerk reaction is to
let enetc_refill_rx_ring do what it does for the standard skb code path
(refill every 16 consumed buffers), but that turns out to be very
inefficient. The problem is that we have no rx_swbd->page at our
disposal from the enetc_reuse_page path, so enetc_refill_rx_ring would
have to call enetc_new_page for every buffer that we refill (if we
choose to refill at this early stage). Very inefficient, it only makes
the problem worse, because page allocation is an expensive process, and
CPU time is exactly what we're lacking.

Additionally, there is an even bigger problem: if we let
enetc_refill_rx_ring top up the ring's buffers again from the RX path,
remember that the buffers sent to transmission haven't disappeared
anywhere. They will be eventually sent, and processed in
enetc_clean_tx_ring, and an attempt will be made to recycle them.
But surprise, the RX ring is already full of new buffers, because we
were premature in deciding that we should refill. So not only we took
the expensive decision of allocating new pages, but now we must throw
away perfectly good and reusable buffers.

So what we do is we implement an elastic refill mechanism, which keeps
track of the number of in-flight XDP_TX buffer descriptors. We top up
the RX ring only up to the total ring capacity minus the number of BDs
that are in flight (because we know that those BDs will return to us
eventually).

The enetc driver manages 1 RX ring per CPU, and the default TX ring
management is the same. So we do XDP_TX towards the TX ring of the same
index, because it is affined to the same CPU. This will probably not
produce great results when we have a tc-taprio/tc-mqprio qdisc on the
interface, because in that case, the number of TX rings might be
greater, but I didn't add any checks for that yet (mostly because I
didn't know what checks to add).

It should also be noted that we need to change the DMA mapping direction
for RX buffers, since they may now be reflected into the TX ring of the
same device. We choose to use DMA_BIDIRECTIONAL instead of unmapping and
remapping as DMA_TO_DEVICE, because performance is better this way.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c       | 217 ++++++++++++++++++---
 drivers/net/ethernet/freescale/enetc/enetc.h       |  25 +++
 .../net/ethernet/freescale/enetc/enetc_ethtool.c   |  11 +-
 3 files changed, 228 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 58bb0b78585a..ba5313a5d7a4 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -8,21 +8,20 @@
 #include <linux/vmalloc.h>
 #include <net/pkt_sched.h>
 
-/* ENETC overhead: optional extension BD + 1 BD gap */
-#define ENETC_TXBDS_NEEDED(val)	((val) + 2)
-/* max # of chained Tx BDs is 15, including head and extension BD */
-#define ENETC_MAX_SKB_FRAGS	13
-#define ENETC_TXBDS_MAX_NEEDED	ENETC_TXBDS_NEEDED(ENETC_MAX_SKB_FRAGS + 1)
-
 static void enetc_unmap_tx_buff(struct enetc_bdr *tx_ring,
 				struct enetc_tx_swbd *tx_swbd)
 {
+	/* For XDP_TX, pages come from RX, whereas for the other contexts where
+	 * we have is_dma_page_set, those come from skb_frag_dma_map. We need
+	 * to match the DMA mapping length, so we need to differentiate those.
+	 */
 	if (tx_swbd->is_dma_page)
 		dma_unmap_page(tx_ring->dev, tx_swbd->dma,
-			       tx_swbd->len, DMA_TO_DEVICE);
+			       tx_swbd->is_xdp_tx ? PAGE_SIZE : tx_swbd->len,
+			       tx_swbd->dir);
 	else
 		dma_unmap_single(tx_ring->dev, tx_swbd->dma,
-				 tx_swbd->len, DMA_TO_DEVICE);
+				 tx_swbd->len, tx_swbd->dir);
 	tx_swbd->dma = 0;
 }
 
@@ -38,6 +37,13 @@ static void enetc_free_tx_skb(struct enetc_bdr *tx_ring,
 	}
 }
 
+/* Let H/W know BD ring has been updated */
+static void enetc_update_tx_ring_tail(struct enetc_bdr *tx_ring)
+{
+	/* includes wmb() */
+	enetc_wr_reg_hot(tx_ring->tpir, tx_ring->next_to_use);
+}
+
 static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
 			      int active_offloads)
 {
@@ -68,6 +74,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
 	tx_swbd->dma = dma;
 	tx_swbd->len = len;
 	tx_swbd->is_dma_page = 0;
+	tx_swbd->dir = DMA_TO_DEVICE;
 	count++;
 
 	do_vlan = skb_vlan_tag_present(skb);
@@ -150,6 +157,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
 		tx_swbd->dma = dma;
 		tx_swbd->len = len;
 		tx_swbd->is_dma_page = 1;
+		tx_swbd->dir = DMA_TO_DEVICE;
 		count++;
 	}
 
@@ -166,8 +174,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
 
 	skb_tx_timestamp(skb);
 
-	/* let H/W know BD ring has been updated */
-	enetc_wr_reg_hot(tx_ring->tpir, i); /* includes wmb() */
+	enetc_update_tx_ring_tail(tx_ring);
 
 	return count;
 
@@ -320,6 +327,43 @@ static void enetc_tstamp_tx(struct sk_buff *skb, u64 tstamp)
 	}
 }
 
+static void enetc_recycle_xdp_tx_buff(struct enetc_bdr *tx_ring,
+				      struct enetc_tx_swbd *tx_swbd)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(tx_ring->ndev);
+	struct enetc_bdr *rx_ring = priv->rx_ring[tx_ring->index];
+	struct enetc_rx_swbd rx_swbd = {
+		.dma = tx_swbd->dma,
+		.page = tx_swbd->page,
+		.page_offset = tx_swbd->page_offset,
+		.dir = tx_swbd->dir,
+		.len = tx_swbd->len,
+	};
+
+	if (likely(enetc_swbd_unused(rx_ring))) {
+		enetc_reuse_page(rx_ring, &rx_swbd);
+
+		/* sync for use by the device */
+		dma_sync_single_range_for_device(rx_ring->dev, rx_swbd.dma,
+						 rx_swbd.page_offset,
+						 ENETC_RXB_DMA_SIZE_XDP,
+						 rx_swbd.dir);
+
+		rx_ring->stats.recycles++;
+	} else {
+		/* RX ring is already full, we need to unmap and free the
+		 * page, since there's nothing useful we can do with it.
+		 */
+		rx_ring->stats.recycle_failures++;
+
+		dma_unmap_page(rx_ring->dev, rx_swbd.dma, PAGE_SIZE,
+			       rx_swbd.dir);
+		__free_page(rx_swbd.page);
+	}
+
+	rx_ring->xdp.xdp_tx_in_flight--;
+}
+
 static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
 {
 	struct net_device *ndev = tx_ring->ndev;
@@ -351,7 +395,9 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
 			}
 		}
 
-		if (likely(tx_swbd->dma))
+		if (tx_swbd->is_xdp_tx)
+			enetc_recycle_xdp_tx_buff(tx_ring, tx_swbd);
+		else if (likely(tx_swbd->dma))
 			enetc_unmap_tx_buff(tx_ring, tx_swbd);
 
 		if (tx_swbd->skb) {
@@ -405,6 +451,7 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
 static bool enetc_new_page(struct enetc_bdr *rx_ring,
 			   struct enetc_rx_swbd *rx_swbd)
 {
+	bool xdp = !!(rx_ring->xdp.prog);
 	struct page *page;
 	dma_addr_t addr;
 
@@ -412,7 +459,10 @@ static bool enetc_new_page(struct enetc_bdr *rx_ring,
 	if (unlikely(!page))
 		return false;
 
-	addr = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+	/* For XDP_TX, we forgo dma_unmap -> dma_map */
+	rx_swbd->dir = xdp ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
+
+	addr = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, rx_swbd->dir);
 	if (unlikely(dma_mapping_error(rx_ring->dev, addr))) {
 		__free_page(page);
 
@@ -536,6 +586,10 @@ static void enetc_get_offloads(struct enetc_bdr *rx_ring,
 #endif
 }
 
+/* This gets called during the non-XDP NAPI poll cycle as well as on XDP_PASS,
+ * so it needs to work with both DMA_FROM_DEVICE as well as DMA_BIDIRECTIONAL
+ * mapped buffers.
+ */
 static struct enetc_rx_swbd *enetc_get_rx_buff(struct enetc_bdr *rx_ring,
 					       int i, u16 size)
 {
@@ -543,7 +597,7 @@ static struct enetc_rx_swbd *enetc_get_rx_buff(struct enetc_bdr *rx_ring,
 
 	dma_sync_single_range_for_cpu(rx_ring->dev, rx_swbd->dma,
 				      rx_swbd->page_offset,
-				      size, DMA_FROM_DEVICE);
+				      size, rx_swbd->dir);
 	return rx_swbd;
 }
 
@@ -561,10 +615,10 @@ static void enetc_put_rx_buff(struct enetc_bdr *rx_ring,
 		/* sync for use by the device */
 		dma_sync_single_range_for_device(rx_ring->dev, rx_swbd->dma,
 						 rx_swbd->page_offset,
-						 buffer_size, DMA_FROM_DEVICE);
+						 buffer_size, rx_swbd->dir);
 	} else {
-		dma_unmap_page(rx_ring->dev, rx_swbd->dma,
-			       PAGE_SIZE, DMA_FROM_DEVICE);
+		dma_unmap_page(rx_ring->dev, rx_swbd->dma, PAGE_SIZE,
+			       rx_swbd->dir);
 	}
 
 	rx_swbd->page = NULL;
@@ -718,6 +772,61 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 	return rx_frm_cnt;
 }
 
+static void enetc_xdp_map_tx_buff(struct enetc_bdr *tx_ring, int i,
+				  struct enetc_tx_swbd *tx_swbd,
+				  int frm_len)
+{
+	union enetc_tx_bd *txbd = ENETC_TXBD(*tx_ring, i);
+
+	prefetchw(txbd);
+
+	enetc_clear_tx_bd(txbd);
+	txbd->addr = cpu_to_le64(tx_swbd->dma + tx_swbd->page_offset);
+	txbd->buf_len = cpu_to_le16(tx_swbd->len);
+	txbd->frm_len = cpu_to_le16(frm_len);
+
+	memcpy(&tx_ring->tx_swbd[i], tx_swbd, sizeof(*tx_swbd));
+}
+
+/* Puts in the TX ring one XDP frame, mapped as an array of TX software buffer
+ * descriptors.
+ */
+static bool enetc_xdp_tx(struct enetc_bdr *tx_ring,
+			 struct enetc_tx_swbd *xdp_tx_arr, int num_tx_swbd)
+{
+	struct enetc_tx_swbd *tmp_tx_swbd = xdp_tx_arr;
+	int i, k, frm_len = tmp_tx_swbd->len;
+
+	if (unlikely(enetc_bd_unused(tx_ring) < ENETC_TXBDS_NEEDED(num_tx_swbd)))
+		return false;
+
+	while (unlikely(!tmp_tx_swbd->is_eof)) {
+		tmp_tx_swbd++;
+		frm_len += tmp_tx_swbd->len;
+	}
+
+	i = tx_ring->next_to_use;
+
+	for (k = 0; k < num_tx_swbd; k++) {
+		struct enetc_tx_swbd *xdp_tx_swbd = &xdp_tx_arr[k];
+
+		enetc_xdp_map_tx_buff(tx_ring, i, xdp_tx_swbd, frm_len);
+
+		/* last BD needs 'F' bit set */
+		if (xdp_tx_swbd->is_eof) {
+			union enetc_tx_bd *txbd = ENETC_TXBD(*tx_ring, i);
+
+			txbd->flags = ENETC_TXBD_FLAGS_F;
+		}
+
+		enetc_bdr_idx_inc(tx_ring, &i);
+	}
+
+	tx_ring->next_to_use = i;
+
+	return true;
+}
+
 static void enetc_map_rx_buff_to_xdp(struct enetc_bdr *rx_ring, int i,
 				     struct xdp_buff *xdp_buff, u16 size)
 {
@@ -725,6 +834,9 @@ static void enetc_map_rx_buff_to_xdp(struct enetc_bdr *rx_ring, int i,
 	void *hard_start = page_address(rx_swbd->page) + rx_swbd->page_offset;
 	struct skb_shared_info *shinfo;
 
+	/* To be used for XDP_TX */
+	rx_swbd->len = size;
+
 	xdp_prepare_buff(xdp_buff, hard_start - rx_ring->buffer_offset,
 			 rx_ring->buffer_offset, size, false);
 
@@ -739,6 +851,9 @@ static void enetc_add_rx_buff_to_xdp(struct enetc_bdr *rx_ring, int i,
 	struct enetc_rx_swbd *rx_swbd = enetc_get_rx_buff(rx_ring, i, size);
 	skb_frag_t *frag = &shinfo->frags[shinfo->nr_frags];
 
+	/* To be used for XDP_TX */
+	rx_swbd->len = size;
+
 	skb_frag_off_set(frag, rx_swbd->page_offset);
 	skb_frag_size_set(frag, size);
 	__skb_frag_set_page(frag, rx_swbd->page);
@@ -780,15 +895,48 @@ static void enetc_put_xdp_buff(struct enetc_bdr *rx_ring,
 {
 	enetc_reuse_page(rx_ring, rx_swbd);
 
-	/* sync for use by the device */
 	dma_sync_single_range_for_device(rx_ring->dev, rx_swbd->dma,
 					 rx_swbd->page_offset,
 					 ENETC_RXB_DMA_SIZE_XDP,
-					 DMA_FROM_DEVICE);
+					 rx_swbd->dir);
 
 	rx_swbd->page = NULL;
 }
 
+/* Convert RX buffer descriptors to TX buffer descriptors. These will be
+ * recycled back into the RX ring in enetc_clean_tx_ring. We need to scrub the
+ * RX software BDs because the ownership of the buffer no longer belongs to the
+ * RX ring, so enetc_refill_rx_ring may not reuse rx_swbd->page.
+ */
+static int enetc_rx_swbd_to_xdp_tx_swbd(struct enetc_tx_swbd *xdp_tx_arr,
+					struct enetc_bdr *rx_ring,
+					int rx_ring_first, int rx_ring_last)
+{
+	int n = 0;
+
+	for (; rx_ring_first != rx_ring_last;
+	     n++, enetc_bdr_idx_inc(rx_ring, &rx_ring_first)) {
+		struct enetc_rx_swbd *rx_swbd = &rx_ring->rx_swbd[rx_ring_first];
+		struct enetc_tx_swbd *tx_swbd = &xdp_tx_arr[n];
+
+		/* No need to dma_map, we already have DMA_BIDIRECTIONAL */
+		tx_swbd->dma = rx_swbd->dma;
+		tx_swbd->dir = rx_swbd->dir;
+		tx_swbd->page = rx_swbd->page;
+		tx_swbd->page_offset = rx_swbd->page_offset;
+		tx_swbd->len = rx_swbd->len;
+		tx_swbd->is_dma_page = true;
+		tx_swbd->is_xdp_tx = true;
+		tx_swbd->is_eof = false;
+		memset(rx_swbd, 0, sizeof(*rx_swbd));
+	}
+
+	/* We rely on caller providing an rx_ring_last > rx_ring_first */
+	xdp_tx_arr[n - 1].is_eof = true;
+
+	return n;
+}
+
 static void enetc_xdp_drop(struct enetc_bdr *rx_ring, int rx_ring_first,
 			   int rx_ring_last)
 {
@@ -804,6 +952,10 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
 				   struct napi_struct *napi, int work_limit,
 				   struct bpf_prog *prog)
 {
+	struct enetc_tx_swbd xdp_tx_arr[ENETC_MAX_SKB_FRAGS] = {0};
+	struct enetc_ndev_priv *priv = netdev_priv(rx_ring->ndev);
+	struct enetc_bdr *tx_ring = priv->tx_ring[rx_ring->index];
+	int xdp_tx_bd_cnt, xdp_tx_frm_cnt = 0;
 	int rx_frm_cnt = 0, rx_byte_cnt = 0;
 	int cleaned_cnt, i;
 	u32 xdp_act;
@@ -819,10 +971,6 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
 		struct sk_buff *skb;
 		u32 bd_status;
 
-		if (cleaned_cnt >= ENETC_RXBD_BUNDLE)
-			cleaned_cnt -= enetc_refill_rx_ring(rx_ring,
-							    cleaned_cnt);
-
 		rxbd = enetc_rxbd(rx_ring, i);
 		bd_status = le32_to_cpu(rxbd->r.lstatus);
 		if (!bd_status)
@@ -865,6 +1013,20 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
 
 			napi_gro_receive(napi, skb);
 			break;
+		case XDP_TX:
+			xdp_tx_bd_cnt = enetc_rx_swbd_to_xdp_tx_swbd(xdp_tx_arr,
+								     rx_ring,
+								     orig_i, i);
+
+			if (!enetc_xdp_tx(tx_ring, xdp_tx_arr, xdp_tx_bd_cnt)) {
+				enetc_xdp_drop(rx_ring, orig_i, i);
+				tx_ring->stats.xdp_tx_drops++;
+			} else {
+				tx_ring->stats.xdp_tx += xdp_tx_bd_cnt;
+				rx_ring->xdp.xdp_tx_in_flight += xdp_tx_bd_cnt;
+				xdp_tx_frm_cnt++;
+			}
+			break;
 		default:
 			bpf_warn_invalid_xdp_action(xdp_act);
 		}
@@ -877,6 +1039,13 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
 	rx_ring->stats.packets += rx_frm_cnt;
 	rx_ring->stats.bytes += rx_byte_cnt;
 
+	if (xdp_tx_frm_cnt)
+		enetc_update_tx_ring_tail(tx_ring);
+
+	if (cleaned_cnt > rx_ring->xdp.xdp_tx_in_flight)
+		enetc_refill_rx_ring(rx_ring, enetc_bd_unused(rx_ring) -
+				     rx_ring->xdp.xdp_tx_in_flight);
+
 	return rx_frm_cnt;
 }
 
@@ -1141,8 +1310,8 @@ static void enetc_free_rx_ring(struct enetc_bdr *rx_ring)
 		if (!rx_swbd->page)
 			continue;
 
-		dma_unmap_page(rx_ring->dev, rx_swbd->dma,
-			       PAGE_SIZE, DMA_FROM_DEVICE);
+		dma_unmap_page(rx_ring->dev, rx_swbd->dma, PAGE_SIZE,
+			       rx_swbd->dir);
 		__free_page(rx_swbd->page);
 		rx_swbd->page = NULL;
 	}
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index 5815addfe966..864da962ae21 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -21,11 +21,15 @@
 struct enetc_tx_swbd {
 	struct sk_buff *skb;
 	dma_addr_t dma;
+	struct page *page;	/* valid only if is_xdp_tx */
+	u16 page_offset;	/* valid only if is_xdp_tx */
 	u16 len;
+	enum dma_data_direction dir;
 	u8 is_dma_page:1;
 	u8 check_wb:1;
 	u8 do_tstamp:1;
 	u8 is_eof:1;
+	u8 is_xdp_tx:1;
 };
 
 #define ENETC_RX_MAXFRM_SIZE	ENETC_MAC_MAXFRM_SIZE
@@ -40,18 +44,31 @@ struct enetc_rx_swbd {
 	dma_addr_t dma;
 	struct page *page;
 	u16 page_offset;
+	enum dma_data_direction dir;
+	u16 len;
 };
 
+/* ENETC overhead: optional extension BD + 1 BD gap */
+#define ENETC_TXBDS_NEEDED(val)	((val) + 2)
+/* max # of chained Tx BDs is 15, including head and extension BD */
+#define ENETC_MAX_SKB_FRAGS	13
+#define ENETC_TXBDS_MAX_NEEDED	ENETC_TXBDS_NEEDED(ENETC_MAX_SKB_FRAGS + 1)
+
 struct enetc_ring_stats {
 	unsigned int packets;
 	unsigned int bytes;
 	unsigned int rx_alloc_errs;
 	unsigned int xdp_drops;
+	unsigned int xdp_tx;
+	unsigned int xdp_tx_drops;
+	unsigned int recycles;
+	unsigned int recycle_failures;
 };
 
 struct enetc_xdp_data {
 	struct xdp_rxq_info rxq;
 	struct bpf_prog *prog;
+	int xdp_tx_in_flight;
 };
 
 #define ENETC_RX_RING_DEFAULT_SIZE	512
@@ -104,6 +121,14 @@ static inline int enetc_bd_unused(struct enetc_bdr *bdr)
 	return bdr->bd_count + bdr->next_to_clean - bdr->next_to_use - 1;
 }
 
+static inline int enetc_swbd_unused(struct enetc_bdr *bdr)
+{
+	if (bdr->next_to_clean > bdr->next_to_alloc)
+		return bdr->next_to_clean - bdr->next_to_alloc - 1;
+
+	return bdr->bd_count + bdr->next_to_clean - bdr->next_to_alloc - 1;
+}
+
 /* Control BD ring */
 #define ENETC_CBDR_DEFAULT_SIZE	64
 struct enetc_cbdr {
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
index 0183c13f8c1e..37821a8b225e 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
@@ -193,10 +193,14 @@ static const char rx_ring_stats[][ETH_GSTRING_LEN] = {
 	"Rx ring %2d frames",
 	"Rx ring %2d alloc errors",
 	"Rx ring %2d XDP drops",
+	"Rx ring %2d recycles",
+	"Rx ring %2d recycle failures",
 };
 
 static const char tx_ring_stats[][ETH_GSTRING_LEN] = {
 	"Tx ring %2d frames",
+	"Tx ring %2d XDP frames",
+	"Tx ring %2d XDP drops",
 };
 
 static int enetc_get_sset_count(struct net_device *ndev, int sset)
@@ -268,13 +272,18 @@ static void enetc_get_ethtool_stats(struct net_device *ndev,
 	for (i = 0; i < ARRAY_SIZE(enetc_si_counters); i++)
 		data[o++] = enetc_rd64(hw, enetc_si_counters[i].reg);
 
-	for (i = 0; i < priv->num_tx_rings; i++)
+	for (i = 0; i < priv->num_tx_rings; i++) {
 		data[o++] = priv->tx_ring[i]->stats.packets;
+		data[o++] = priv->tx_ring[i]->stats.xdp_tx;
+		data[o++] = priv->tx_ring[i]->stats.xdp_tx_drops;
+	}
 
 	for (i = 0; i < priv->num_rx_rings; i++) {
 		data[o++] = priv->rx_ring[i]->stats.packets;
 		data[o++] = priv->rx_ring[i]->stats.rx_alloc_errs;
 		data[o++] = priv->rx_ring[i]->stats.xdp_drops;
+		data[o++] = priv->rx_ring[i]->stats.recycles;
+		data[o++] = priv->rx_ring[i]->stats.recycle_failures;
 	}
 
 	if (!enetc_si_is_pf(priv->si))
-- 
cgit v1.2.3


From d6a2829e82cff9e5ec10b8ee293488b57399ed01 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 31 Mar 2021 23:08:56 +0300
Subject: net: enetc: increase RX ring default size

As explained in the XDP_TX patch, when receiving a burst of frames with
the XDP_TX verdict, there is a momentary dip in the number of available
RX buffers. The system will eventually recover as TX completions will
start kicking in and refilling our RX BD ring again. But until that
happens, we need to survive with as few out-of-buffer discards as
possible.

This increases the memory footprint of the driver in order to avoid
discards at 2.5Gbps line rate 64B packet sizes, the maximum speed
available for testing on 1 port on NXP LS1028A.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index 864da962ae21..d0619fcbbe97 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -71,7 +71,7 @@ struct enetc_xdp_data {
 	int xdp_tx_in_flight;
 };
 
-#define ENETC_RX_RING_DEFAULT_SIZE	512
+#define ENETC_RX_RING_DEFAULT_SIZE	2048
 #define ENETC_TX_RING_DEFAULT_SIZE	256
 #define ENETC_DEFAULT_TX_WORK		(ENETC_TX_RING_DEFAULT_SIZE / 2)
 
-- 
cgit v1.2.3


From 9d2b68cc108db2fdb35022ed2d88cfb305c441a6 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 31 Mar 2021 23:08:57 +0300
Subject: net: enetc: add support for XDP_REDIRECT

The driver implementation of the XDP_REDIRECT action reuses parts from
XDP_TX, most notably the enetc_xdp_tx function which transmits an array
of TX software BDs. Only this time, the buffers don't have DMA mappings,
we need to create them.

When a BPF program reaches the XDP_REDIRECT verdict for a frame, we can
employ the same buffer reuse strategy as for the normal processing path
and for XDP_PASS: we can flip to the other page half and seed that to
the RX ring.

Note that scatter/gather support is there, but disabled due to lack of
multi-buffer support in XDP (which is added by this series):
https://patchwork.kernel.org/project/netdevbpf/cover/cover.1616179034.git.lorenzo@kernel.org/

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c       | 212 +++++++++++++++++++--
 drivers/net/ethernet/freescale/enetc/enetc.h       |  11 +-
 .../net/ethernet/freescale/enetc/enetc_ethtool.c   |   6 +
 drivers/net/ethernet/freescale/enetc/enetc_pf.c    |   1 +
 4 files changed, 218 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index ba5313a5d7a4..57049ae97201 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -8,6 +8,23 @@
 #include <linux/vmalloc.h>
 #include <net/pkt_sched.h>
 
+static struct sk_buff *enetc_tx_swbd_get_skb(struct enetc_tx_swbd *tx_swbd)
+{
+	if (tx_swbd->is_xdp_tx || tx_swbd->is_xdp_redirect)
+		return NULL;
+
+	return tx_swbd->skb;
+}
+
+static struct xdp_frame *
+enetc_tx_swbd_get_xdp_frame(struct enetc_tx_swbd *tx_swbd)
+{
+	if (tx_swbd->is_xdp_redirect)
+		return tx_swbd->xdp_frame;
+
+	return NULL;
+}
+
 static void enetc_unmap_tx_buff(struct enetc_bdr *tx_ring,
 				struct enetc_tx_swbd *tx_swbd)
 {
@@ -25,14 +42,20 @@ static void enetc_unmap_tx_buff(struct enetc_bdr *tx_ring,
 	tx_swbd->dma = 0;
 }
 
-static void enetc_free_tx_skb(struct enetc_bdr *tx_ring,
-			      struct enetc_tx_swbd *tx_swbd)
+static void enetc_free_tx_frame(struct enetc_bdr *tx_ring,
+				struct enetc_tx_swbd *tx_swbd)
 {
+	struct xdp_frame *xdp_frame = enetc_tx_swbd_get_xdp_frame(tx_swbd);
+	struct sk_buff *skb = enetc_tx_swbd_get_skb(tx_swbd);
+
 	if (tx_swbd->dma)
 		enetc_unmap_tx_buff(tx_ring, tx_swbd);
 
-	if (tx_swbd->skb) {
-		dev_kfree_skb_any(tx_swbd->skb);
+	if (xdp_frame) {
+		xdp_return_frame(tx_swbd->xdp_frame);
+		tx_swbd->xdp_frame = NULL;
+	} else if (skb) {
+		dev_kfree_skb_any(skb);
 		tx_swbd->skb = NULL;
 	}
 }
@@ -183,7 +206,7 @@ dma_err:
 
 	do {
 		tx_swbd = &tx_ring->tx_swbd[i];
-		enetc_free_tx_skb(tx_ring, tx_swbd);
+		enetc_free_tx_frame(tx_ring, tx_swbd);
 		if (i == 0)
 			i = tx_ring->bd_count;
 		i--;
@@ -381,6 +404,9 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
 	do_tstamp = false;
 
 	while (bds_to_clean && tx_frm_cnt < ENETC_DEFAULT_TX_WORK) {
+		struct xdp_frame *xdp_frame = enetc_tx_swbd_get_xdp_frame(tx_swbd);
+		struct sk_buff *skb = enetc_tx_swbd_get_skb(tx_swbd);
+
 		if (unlikely(tx_swbd->check_wb)) {
 			struct enetc_ndev_priv *priv = netdev_priv(ndev);
 			union enetc_tx_bd *txbd;
@@ -400,12 +426,15 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
 		else if (likely(tx_swbd->dma))
 			enetc_unmap_tx_buff(tx_ring, tx_swbd);
 
-		if (tx_swbd->skb) {
+		if (xdp_frame) {
+			xdp_return_frame(xdp_frame);
+			tx_swbd->xdp_frame = NULL;
+		} else if (skb) {
 			if (unlikely(do_tstamp)) {
-				enetc_tstamp_tx(tx_swbd->skb, tstamp);
+				enetc_tstamp_tx(skb, tstamp);
 				do_tstamp = false;
 			}
-			napi_consume_skb(tx_swbd->skb, napi_budget);
+			napi_consume_skb(skb, napi_budget);
 			tx_swbd->skb = NULL;
 		}
 
@@ -827,6 +856,109 @@ static bool enetc_xdp_tx(struct enetc_bdr *tx_ring,
 	return true;
 }
 
+static int enetc_xdp_frame_to_xdp_tx_swbd(struct enetc_bdr *tx_ring,
+					  struct enetc_tx_swbd *xdp_tx_arr,
+					  struct xdp_frame *xdp_frame)
+{
+	struct enetc_tx_swbd *xdp_tx_swbd = &xdp_tx_arr[0];
+	struct skb_shared_info *shinfo;
+	void *data = xdp_frame->data;
+	int len = xdp_frame->len;
+	skb_frag_t *frag;
+	dma_addr_t dma;
+	unsigned int f;
+	int n = 0;
+
+	dma = dma_map_single(tx_ring->dev, data, len, DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(tx_ring->dev, dma))) {
+		netdev_err(tx_ring->ndev, "DMA map error\n");
+		return -1;
+	}
+
+	xdp_tx_swbd->dma = dma;
+	xdp_tx_swbd->dir = DMA_TO_DEVICE;
+	xdp_tx_swbd->len = len;
+	xdp_tx_swbd->is_xdp_redirect = true;
+	xdp_tx_swbd->is_eof = false;
+	xdp_tx_swbd->xdp_frame = NULL;
+
+	n++;
+	xdp_tx_swbd = &xdp_tx_arr[n];
+
+	shinfo = xdp_get_shared_info_from_frame(xdp_frame);
+
+	for (f = 0, frag = &shinfo->frags[0]; f < shinfo->nr_frags;
+	     f++, frag++) {
+		data = skb_frag_address(frag);
+		len = skb_frag_size(frag);
+
+		dma = dma_map_single(tx_ring->dev, data, len, DMA_TO_DEVICE);
+		if (unlikely(dma_mapping_error(tx_ring->dev, dma))) {
+			/* Undo the DMA mapping for all fragments */
+			while (n-- >= 0)
+				enetc_unmap_tx_buff(tx_ring, &xdp_tx_arr[n]);
+
+			netdev_err(tx_ring->ndev, "DMA map error\n");
+			return -1;
+		}
+
+		xdp_tx_swbd->dma = dma;
+		xdp_tx_swbd->dir = DMA_TO_DEVICE;
+		xdp_tx_swbd->len = len;
+		xdp_tx_swbd->is_xdp_redirect = true;
+		xdp_tx_swbd->is_eof = false;
+		xdp_tx_swbd->xdp_frame = NULL;
+
+		n++;
+		xdp_tx_swbd = &xdp_tx_arr[n];
+	}
+
+	xdp_tx_arr[n - 1].is_eof = true;
+	xdp_tx_arr[n - 1].xdp_frame = xdp_frame;
+
+	return n;
+}
+
+int enetc_xdp_xmit(struct net_device *ndev, int num_frames,
+		   struct xdp_frame **frames, u32 flags)
+{
+	struct enetc_tx_swbd xdp_redirect_arr[ENETC_MAX_SKB_FRAGS] = {0};
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct enetc_bdr *tx_ring;
+	int xdp_tx_bd_cnt, i, k;
+	int xdp_tx_frm_cnt = 0;
+
+	tx_ring = priv->tx_ring[smp_processor_id()];
+
+	prefetchw(ENETC_TXBD(*tx_ring, tx_ring->next_to_use));
+
+	for (k = 0; k < num_frames; k++) {
+		xdp_tx_bd_cnt = enetc_xdp_frame_to_xdp_tx_swbd(tx_ring,
+							       xdp_redirect_arr,
+							       frames[k]);
+		if (unlikely(xdp_tx_bd_cnt < 0))
+			break;
+
+		if (unlikely(!enetc_xdp_tx(tx_ring, xdp_redirect_arr,
+					   xdp_tx_bd_cnt))) {
+			for (i = 0; i < xdp_tx_bd_cnt; i++)
+				enetc_unmap_tx_buff(tx_ring,
+						    &xdp_redirect_arr[i]);
+			tx_ring->stats.xdp_tx_drops++;
+			break;
+		}
+
+		xdp_tx_frm_cnt++;
+	}
+
+	if (unlikely((flags & XDP_XMIT_FLUSH) || k != xdp_tx_frm_cnt))
+		enetc_update_tx_ring_tail(tx_ring);
+
+	tx_ring->stats.xdp_tx += xdp_tx_frm_cnt;
+
+	return xdp_tx_frm_cnt;
+}
+
 static void enetc_map_rx_buff_to_xdp(struct enetc_bdr *rx_ring, int i,
 				     struct xdp_buff *xdp_buff, u16 size)
 {
@@ -948,14 +1080,31 @@ static void enetc_xdp_drop(struct enetc_bdr *rx_ring, int rx_ring_first,
 	rx_ring->stats.xdp_drops++;
 }
 
+static void enetc_xdp_free(struct enetc_bdr *rx_ring, int rx_ring_first,
+			   int rx_ring_last)
+{
+	while (rx_ring_first != rx_ring_last) {
+		struct enetc_rx_swbd *rx_swbd = &rx_ring->rx_swbd[rx_ring_first];
+
+		if (rx_swbd->page) {
+			dma_unmap_page(rx_ring->dev, rx_swbd->dma, PAGE_SIZE,
+				       rx_swbd->dir);
+			__free_page(rx_swbd->page);
+			rx_swbd->page = NULL;
+		}
+		enetc_bdr_idx_inc(rx_ring, &rx_ring_first);
+	}
+	rx_ring->stats.xdp_redirect_failures++;
+}
+
 static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
 				   struct napi_struct *napi, int work_limit,
 				   struct bpf_prog *prog)
 {
+	int xdp_tx_bd_cnt, xdp_tx_frm_cnt = 0, xdp_redirect_frm_cnt = 0;
 	struct enetc_tx_swbd xdp_tx_arr[ENETC_MAX_SKB_FRAGS] = {0};
 	struct enetc_ndev_priv *priv = netdev_priv(rx_ring->ndev);
 	struct enetc_bdr *tx_ring = priv->tx_ring[rx_ring->index];
-	int xdp_tx_bd_cnt, xdp_tx_frm_cnt = 0;
 	int rx_frm_cnt = 0, rx_byte_cnt = 0;
 	int cleaned_cnt, i;
 	u32 xdp_act;
@@ -969,6 +1118,7 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
 		int orig_i, orig_cleaned_cnt;
 		struct xdp_buff xdp_buff;
 		struct sk_buff *skb;
+		int tmp_orig_i, err;
 		u32 bd_status;
 
 		rxbd = enetc_rxbd(rx_ring, i);
@@ -1026,6 +1176,43 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
 				rx_ring->xdp.xdp_tx_in_flight += xdp_tx_bd_cnt;
 				xdp_tx_frm_cnt++;
 			}
+			break;
+		case XDP_REDIRECT:
+			/* xdp_return_frame does not support S/G in the sense
+			 * that it leaks the fragments (__xdp_return should not
+			 * call page_frag_free only for the initial buffer).
+			 * Until XDP_REDIRECT gains support for S/G let's keep
+			 * the code structure in place, but dead. We drop the
+			 * S/G frames ourselves to avoid memory leaks which
+			 * would otherwise leave the kernel OOM.
+			 */
+			if (unlikely(cleaned_cnt - orig_cleaned_cnt != 1)) {
+				enetc_xdp_drop(rx_ring, orig_i, i);
+				rx_ring->stats.xdp_redirect_sg++;
+				break;
+			}
+
+			tmp_orig_i = orig_i;
+
+			while (orig_i != i) {
+				enetc_put_rx_buff(rx_ring,
+						  &rx_ring->rx_swbd[orig_i]);
+				enetc_bdr_idx_inc(rx_ring, &orig_i);
+			}
+
+			err = xdp_do_redirect(rx_ring->ndev, &xdp_buff, prog);
+			if (unlikely(err)) {
+				enetc_xdp_free(rx_ring, tmp_orig_i, i);
+			} else {
+				xdp_redirect_frm_cnt++;
+				rx_ring->stats.xdp_redirect++;
+			}
+
+			if (unlikely(xdp_redirect_frm_cnt > ENETC_DEFAULT_TX_WORK)) {
+				xdp_do_flush_map();
+				xdp_redirect_frm_cnt = 0;
+			}
+
 			break;
 		default:
 			bpf_warn_invalid_xdp_action(xdp_act);
@@ -1039,6 +1226,9 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
 	rx_ring->stats.packets += rx_frm_cnt;
 	rx_ring->stats.bytes += rx_byte_cnt;
 
+	if (xdp_redirect_frm_cnt)
+		xdp_do_flush_map();
+
 	if (xdp_tx_frm_cnt)
 		enetc_update_tx_ring_tail(tx_ring);
 
@@ -1173,7 +1363,7 @@ static void enetc_free_txbdr(struct enetc_bdr *txr)
 	int size, i;
 
 	for (i = 0; i < txr->bd_count; i++)
-		enetc_free_tx_skb(txr, &txr->tx_swbd[i]);
+		enetc_free_tx_frame(txr, &txr->tx_swbd[i]);
 
 	size = txr->bd_count * sizeof(union enetc_tx_bd);
 
@@ -1290,7 +1480,7 @@ static void enetc_free_tx_ring(struct enetc_bdr *tx_ring)
 	for (i = 0; i < tx_ring->bd_count; i++) {
 		struct enetc_tx_swbd *tx_swbd = &tx_ring->tx_swbd[i];
 
-		enetc_free_tx_skb(tx_ring, tx_swbd);
+		enetc_free_tx_frame(tx_ring, tx_swbd);
 	}
 
 	tx_ring->next_to_clean = 0;
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index d0619fcbbe97..05474f46b0d9 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -19,7 +19,10 @@
 				(ETH_FCS_LEN + ETH_HLEN + VLAN_HLEN))
 
 struct enetc_tx_swbd {
-	struct sk_buff *skb;
+	union {
+		struct sk_buff *skb;
+		struct xdp_frame *xdp_frame;
+	};
 	dma_addr_t dma;
 	struct page *page;	/* valid only if is_xdp_tx */
 	u16 page_offset;	/* valid only if is_xdp_tx */
@@ -30,6 +33,7 @@ struct enetc_tx_swbd {
 	u8 do_tstamp:1;
 	u8 is_eof:1;
 	u8 is_xdp_tx:1;
+	u8 is_xdp_redirect:1;
 };
 
 #define ENETC_RX_MAXFRM_SIZE	ENETC_MAC_MAXFRM_SIZE
@@ -61,6 +65,9 @@ struct enetc_ring_stats {
 	unsigned int xdp_drops;
 	unsigned int xdp_tx;
 	unsigned int xdp_tx_drops;
+	unsigned int xdp_redirect;
+	unsigned int xdp_redirect_failures;
+	unsigned int xdp_redirect_sg;
 	unsigned int recycles;
 	unsigned int recycle_failures;
 };
@@ -354,6 +361,8 @@ int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd);
 int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
 		   void *type_data);
 int enetc_setup_bpf(struct net_device *dev, struct netdev_bpf *xdp);
+int enetc_xdp_xmit(struct net_device *ndev, int num_frames,
+		   struct xdp_frame **frames, u32 flags);
 
 /* ethtool */
 void enetc_set_ethtool_ops(struct net_device *ndev);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
index 37821a8b225e..7cc81b453bd7 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
@@ -195,6 +195,9 @@ static const char rx_ring_stats[][ETH_GSTRING_LEN] = {
 	"Rx ring %2d XDP drops",
 	"Rx ring %2d recycles",
 	"Rx ring %2d recycle failures",
+	"Rx ring %2d redirects",
+	"Rx ring %2d redirect failures",
+	"Rx ring %2d redirect S/G",
 };
 
 static const char tx_ring_stats[][ETH_GSTRING_LEN] = {
@@ -284,6 +287,9 @@ static void enetc_get_ethtool_stats(struct net_device *ndev,
 		data[o++] = priv->rx_ring[i]->stats.xdp_drops;
 		data[o++] = priv->rx_ring[i]->stats.recycles;
 		data[o++] = priv->rx_ring[i]->stats.recycle_failures;
+		data[o++] = priv->rx_ring[i]->stats.xdp_redirect;
+		data[o++] = priv->rx_ring[i]->stats.xdp_redirect_failures;
+		data[o++] = priv->rx_ring[i]->stats.xdp_redirect_sg;
 	}
 
 	if (!enetc_si_is_pf(priv->si))
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index 0484dbe13422..f61fedf462e5 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -708,6 +708,7 @@ static const struct net_device_ops enetc_ndev_ops = {
 	.ndo_do_ioctl		= enetc_ioctl,
 	.ndo_setup_tc		= enetc_setup_tc,
 	.ndo_bpf		= enetc_setup_bpf,
+	.ndo_xdp_xmit		= enetc_xdp_xmit,
 };
 
 static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev,
-- 
cgit v1.2.3


From b494ba5a3cf822fa99fb941cd1c293da21f4f927 Mon Sep 17 00:00:00 2001
From: Voon Weifeng <weifeng.voon@intel.com>
Date: Thu, 1 Apr 2021 00:18:25 +0800
Subject: net: stmmac: enable MTL ECC Error Address Status Over-ride by default

Turn on the MEEAO field of MTL_ECC_Control_Register by default.

As the MTL ECC Error Address Status Over-ride(MEEAO) is set by default,
the following error address fields will hold the last valid address
where the error is detected.

Signed-off-by: Voon Weifeng <weifeng.voon@intel.com>
Signed-off-by: Tan Tee Min <tee.min.tan@intel.com>
Co-developed-by: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Signed-off-by: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac5.c | 1 +
 drivers/net/ethernet/stmicro/stmmac/dwmac5.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
index 5b010ebfede9..d8c6ff725237 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
@@ -192,6 +192,7 @@ int dwmac5_safety_feat_config(void __iomem *ioaddr, unsigned int asp)
 
 	/* 1. Enable Safety Features */
 	value = readl(ioaddr + MTL_ECC_CONTROL);
+	value |= MEEAO; /* MTL ECC Error Addr Status Override */
 	value |= TSOEE; /* TSO ECC */
 	value |= MRXPEE; /* MTL RX Parser ECC */
 	value |= MESTEE; /* MTL EST ECC */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
index ff555d8b0cdf..6b2fd37b29ad 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
@@ -98,6 +98,7 @@
 #define ADDR				GENMASK(15, 0)
 #define MTL_RXP_IACC_DATA		0x00000cb4
 #define MTL_ECC_CONTROL			0x00000cc0
+#define MEEAO				BIT(8)
 #define TSOEE				BIT(4)
 #define MRXPEE				BIT(3)
 #define MESTEE				BIT(2)
-- 
cgit v1.2.3


From 917e2e6c57980e2255c5eb8ddd77ed670ae49752 Mon Sep 17 00:00:00 2001
From: Frank Wunderlich <frank-w@public-files.de>
Date: Wed, 31 Mar 2021 15:34:37 +0200
Subject: net: mediatek: add flow offload for mt7623

mt7623 uses offload version 2 too

tested on Bananapi-R2

Signed-off-by: Frank Wunderlich <frank-w@public-files.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 0396f0db855f..810def064f11 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -3202,6 +3202,7 @@ static const struct mtk_soc_data mt7623_data = {
 	.hw_features = MTK_HW_FEATURES,
 	.required_clks = MT7623_CLKS_BITMAP,
 	.required_pctl = true,
+	.offload_version = 2,
 };
 
 static const struct mtk_soc_data mt7629_data = {
-- 
cgit v1.2.3


From 040806343bb4ef6365166eae666ced8a91b95321 Mon Sep 17 00:00:00 2001
From: Carlos Llamas <cmllamas@google.com>
Date: Thu, 1 Apr 2021 00:40:20 +0000
Subject: selftests/net: so_txtime multi-host support

SO_TXTIME hardware offload requires testing across devices, either
between machines or separate network namespaces.

Split up SO_TXTIME test into tx and rx modes, so traffic can be
sent from one process to another. Create a veth-pair on different
namespaces and bind each process to an end point via [-S]ource and
[-D]estination parameters. Optional start [-t]ime parameter can be
passed to synchronize the test across the hosts (with synchorinzed
clocks).

Signed-off-by: Carlos Llamas <cmllamas@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/so_txtime.c  | 247 +++++++++++++++++++++++--------
 tools/testing/selftests/net/so_txtime.sh |  97 +++++++++---
 2 files changed, 259 insertions(+), 85 deletions(-)

diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c
index b4cca382d125..59067f64b775 100644
--- a/tools/testing/selftests/net/so_txtime.c
+++ b/tools/testing/selftests/net/so_txtime.c
@@ -2,9 +2,12 @@
 /*
  * Test the SO_TXTIME API
  *
- * Takes two streams of { payload, delivery time }[], one input and one output.
- * Sends the input stream and verifies arrival matches the output stream.
- * The two streams can differ due to out-of-order delivery and drops.
+ * Takes a stream of { payload, delivery time }[], to be sent across two
+ * processes. Start this program on two separate network namespaces or
+ * connected hosts, one instance in transmit mode and the other in receive
+ * mode using the '-r' option. Receiver will compare arrival timestamps to
+ * the expected stream. Sender will read transmit timestamps from the error
+ * queue. The streams can differ due to out-of-order delivery and drops.
  */
 
 #define _GNU_SOURCE
@@ -28,14 +31,17 @@
 #include <sys/types.h>
 #include <time.h>
 #include <unistd.h>
+#include <poll.h>
 
 static int	cfg_clockid	= CLOCK_TAI;
-static bool	cfg_do_ipv4;
-static bool	cfg_do_ipv6;
 static uint16_t	cfg_port	= 8000;
 static int	cfg_variance_us	= 4000;
+static uint64_t	cfg_start_time_ns;
+static int	cfg_mark;
+static bool	cfg_rx;
 
 static uint64_t glob_tstart;
+static uint64_t tdeliver_max;
 
 /* encode one timed transmission (of a 1B payload) */
 struct timed_send {
@@ -44,18 +50,21 @@ struct timed_send {
 };
 
 #define MAX_NUM_PKT	8
-static struct timed_send cfg_in[MAX_NUM_PKT];
-static struct timed_send cfg_out[MAX_NUM_PKT];
+static struct timed_send cfg_buf[MAX_NUM_PKT];
 static int cfg_num_pkt;
 
 static int cfg_errq_level;
 static int cfg_errq_type;
 
-static uint64_t gettime_ns(void)
+static struct sockaddr_storage cfg_dst_addr;
+static struct sockaddr_storage cfg_src_addr;
+static socklen_t cfg_alen;
+
+static uint64_t gettime_ns(clockid_t clock)
 {
 	struct timespec ts;
 
-	if (clock_gettime(cfg_clockid, &ts))
+	if (clock_gettime(clock, &ts))
 		error(1, errno, "gettime");
 
 	return ts.tv_sec * (1000ULL * 1000 * 1000) + ts.tv_nsec;
@@ -75,6 +84,8 @@ static void do_send_one(int fdt, struct timed_send *ts)
 
 	msg.msg_iov = &iov;
 	msg.msg_iovlen = 1;
+	msg.msg_name = (struct sockaddr *)&cfg_dst_addr;
+	msg.msg_namelen = cfg_alen;
 
 	if (ts->delay_us >= 0) {
 		memset(control, 0, sizeof(control));
@@ -82,6 +93,8 @@ static void do_send_one(int fdt, struct timed_send *ts)
 		msg.msg_controllen = sizeof(control);
 
 		tdeliver = glob_tstart + ts->delay_us * 1000;
+		tdeliver_max = tdeliver_max > tdeliver ?
+			       tdeliver_max : tdeliver;
 
 		cm = CMSG_FIRSTHDR(&msg);
 		cm->cmsg_level = SOL_SOCKET;
@@ -98,7 +111,7 @@ static void do_send_one(int fdt, struct timed_send *ts)
 
 }
 
-static bool do_recv_one(int fdr, struct timed_send *ts)
+static void do_recv_one(int fdr, struct timed_send *ts)
 {
 	int64_t tstop, texpect;
 	char rbuf[2];
@@ -106,13 +119,13 @@ static bool do_recv_one(int fdr, struct timed_send *ts)
 
 	ret = recv(fdr, rbuf, sizeof(rbuf), 0);
 	if (ret == -1 && errno == EAGAIN)
-		return true;
+		error(1, EAGAIN, "recv: timeout");
 	if (ret == -1)
 		error(1, errno, "read");
 	if (ret != 1)
 		error(1, 0, "read: %dB", ret);
 
-	tstop = (gettime_ns() - glob_tstart) / 1000;
+	tstop = (gettime_ns(cfg_clockid) - glob_tstart) / 1000;
 	texpect = ts->delay_us >= 0 ? ts->delay_us : 0;
 
 	fprintf(stderr, "payload:%c delay:%lld expected:%lld (us)\n",
@@ -123,8 +136,6 @@ static bool do_recv_one(int fdr, struct timed_send *ts)
 
 	if (llabs(tstop - texpect) > cfg_variance_us)
 		error(1, 0, "exceeds variance (%d us)", cfg_variance_us);
-
-	return false;
 }
 
 static void do_recv_verify_empty(int fdr)
@@ -137,18 +148,18 @@ static void do_recv_verify_empty(int fdr)
 		error(1, 0, "recv: not empty as expected (%d, %d)", ret, errno);
 }
 
-static void do_recv_errqueue_timeout(int fdt)
+static int do_recv_errqueue_timeout(int fdt)
 {
 	char control[CMSG_SPACE(sizeof(struct sock_extended_err)) +
 		     CMSG_SPACE(sizeof(struct sockaddr_in6))] = {0};
 	char data[sizeof(struct ethhdr) + sizeof(struct ipv6hdr) +
 		  sizeof(struct udphdr) + 1];
 	struct sock_extended_err *err;
+	int ret, num_tstamp = 0;
 	struct msghdr msg = {0};
 	struct iovec iov = {0};
 	struct cmsghdr *cm;
 	int64_t tstamp = 0;
-	int ret;
 
 	iov.iov_base = data;
 	iov.iov_len = sizeof(data);
@@ -206,9 +217,47 @@ static void do_recv_errqueue_timeout(int fdt)
 
 		msg.msg_flags = 0;
 		msg.msg_controllen = sizeof(control);
+		num_tstamp++;
 	}
 
-	error(1, 0, "recv: timeout");
+	return num_tstamp;
+}
+
+static void recv_errqueue_msgs(int fdt)
+{
+	struct pollfd pfd = { .fd = fdt, .events = POLLERR };
+	const int timeout_ms = 10;
+	int ret, num_tstamp = 0;
+
+	do {
+		ret = poll(&pfd, 1, timeout_ms);
+		if (ret == -1)
+			error(1, errno, "poll");
+
+		if (ret && (pfd.revents & POLLERR))
+			num_tstamp += do_recv_errqueue_timeout(fdt);
+
+		if (num_tstamp == cfg_num_pkt)
+			break;
+
+	} while (gettime_ns(cfg_clockid) < tdeliver_max);
+}
+
+static void start_time_wait(void)
+{
+	uint64_t now;
+	int err;
+
+	if (!cfg_start_time_ns)
+		return;
+
+	now = gettime_ns(CLOCK_REALTIME);
+	if (cfg_start_time_ns < now)
+		return;
+
+	err = usleep((cfg_start_time_ns - now) / 1000);
+	if (err)
+		error(1, errno, "usleep");
 }
 
 static void setsockopt_txtime(int fd)
@@ -245,6 +294,10 @@ static int setup_tx(struct sockaddr *addr, socklen_t alen)
 
 	setsockopt_txtime(fd);
 
+	if (cfg_mark &&
+	    setsockopt(fd, SOL_SOCKET, SO_MARK, &cfg_mark, sizeof(cfg_mark)))
+		error(1, errno, "setsockopt mark");
+
 	return fd;
 }
 
@@ -266,31 +319,70 @@ static int setup_rx(struct sockaddr *addr, socklen_t alen)
 	return fd;
 }
 
-static void do_test(struct sockaddr *addr, socklen_t alen)
+static void do_test_tx(struct sockaddr *addr, socklen_t alen)
 {
-	int fdt, fdr, i;
+	int fdt, i;
 
 	fprintf(stderr, "\nSO_TXTIME ipv%c clock %s\n",
 			addr->sa_family == PF_INET ? '4' : '6',
 			cfg_clockid == CLOCK_TAI ? "tai" : "monotonic");
 
 	fdt = setup_tx(addr, alen);
-	fdr = setup_rx(addr, alen);
 
-	glob_tstart = gettime_ns();
+	start_time_wait();
+	glob_tstart = gettime_ns(cfg_clockid);
 
 	for (i = 0; i < cfg_num_pkt; i++)
-		do_send_one(fdt, &cfg_in[i]);
+		do_send_one(fdt, &cfg_buf[i]);
+
+	recv_errqueue_msgs(fdt);
+
+	if (close(fdt))
+		error(1, errno, "close t");
+}
+
+static void do_test_rx(struct sockaddr *addr, socklen_t alen)
+{
+	int fdr, i;
+
+	fdr = setup_rx(addr, alen);
+
+	start_time_wait();
+	glob_tstart = gettime_ns(cfg_clockid);
+
 	for (i = 0; i < cfg_num_pkt; i++)
-		if (do_recv_one(fdr, &cfg_out[i]))
-			do_recv_errqueue_timeout(fdt);
+		do_recv_one(fdr, &cfg_buf[i]);
 
 	do_recv_verify_empty(fdr);
 
 	if (close(fdr))
 		error(1, errno, "close r");
-	if (close(fdt))
-		error(1, errno, "close t");
+}
+
+static void setup_sockaddr(int domain, const char *str_addr,
+			   struct sockaddr_storage *sockaddr)
+{
+	struct sockaddr_in6 *addr6 = (void *) sockaddr;
+	struct sockaddr_in *addr4 = (void *) sockaddr;
+
+	switch (domain) {
+	case PF_INET:
+		memset(addr4, 0, sizeof(*addr4));
+		addr4->sin_family = AF_INET;
+		addr4->sin_port = htons(cfg_port);
+		if (str_addr &&
+		    inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+			error(1, 0, "ipv4 parse error: %s", str_addr);
+		break;
+	case PF_INET6:
+		memset(addr6, 0, sizeof(*addr6));
+		addr6->sin6_family = AF_INET6;
+		addr6->sin6_port = htons(cfg_port);
+		if (str_addr &&
+		    inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+			error(1, 0, "ipv6 parse error: %s", str_addr);
+		break;
+	}
 }
 
 static int parse_io(const char *optarg, struct timed_send *array)
@@ -323,17 +415,46 @@ static int parse_io(const char *optarg, struct timed_send *array)
 	return aoff / 2;
 }
 
+static void usage(const char *progname)
+{
+	fprintf(stderr, "\nUsage: %s [options] <payload>\n"
+			"Options:\n"
+			"  -4            only IPv4\n"
+			"  -6            only IPv6\n"
+			"  -c <clock>    monotonic (default) or tai\n"
+			"  -D <addr>     destination IP address (server)\n"
+			"  -S <addr>     source IP address (client)\n"
+			"  -r            run rx mode\n"
+			"  -t <nsec>     start time (UTC nanoseconds)\n"
+			"  -m <mark>     socket mark\n"
+			"\n",
+			progname);
+	exit(1);
+}
+
 static void parse_opts(int argc, char **argv)
 {
-	int c, ilen, olen;
+	char *daddr = NULL, *saddr = NULL;
+	int domain = PF_UNSPEC;
+	int c;
 
-	while ((c = getopt(argc, argv, "46c:")) != -1) {
+	while ((c = getopt(argc, argv, "46c:S:D:rt:m:")) != -1) {
 		switch (c) {
 		case '4':
-			cfg_do_ipv4 = true;
+			if (domain != PF_UNSPEC)
+				error(1, 0, "Pass one of -4 or -6");
+			domain = PF_INET;
+			cfg_alen = sizeof(struct sockaddr_in);
+			cfg_errq_level = SOL_IP;
+			cfg_errq_type = IP_RECVERR;
 			break;
 		case '6':
-			cfg_do_ipv6 = true;
+			if (domain != PF_UNSPEC)
+				error(1, 0, "Pass one of -4 or -6");
+			domain = PF_INET6;
+			cfg_alen = sizeof(struct sockaddr_in6);
+			cfg_errq_level = SOL_IPV6;
+			cfg_errq_type = IPV6_RECVERR;
 			break;
 		case 'c':
 			if (!strcmp(optarg, "tai"))
@@ -344,50 +465,50 @@ static void parse_opts(int argc, char **argv)
 			else
 				error(1, 0, "unknown clock id %s", optarg);
 			break;
+		case 'S':
+			saddr = optarg;
+			break;
+		case 'D':
+			daddr = optarg;
+			break;
+		case 'r':
+			cfg_rx = true;
+			break;
+		case 't':
+			cfg_start_time_ns = strtol(optarg, NULL, 0);
+			break;
+		case 'm':
+			cfg_mark = strtol(optarg, NULL, 0);
+			break;
 		default:
-			error(1, 0, "parse error at %d", optind);
+			usage(argv[0]);
 		}
 	}
 
-	if (argc - optind != 2)
-		error(1, 0, "Usage: %s [-46] -c <clock> <in> <out>", argv[0]);
+	if (argc - optind != 1)
+		usage(argv[0]);
+
+	if (domain == PF_UNSPEC)
+		error(1, 0, "Pass one of -4 or -6");
+	if (!daddr)
+		error(1, 0, "-D <server addr> required\n");
+	if (!cfg_rx && !saddr)
+		error(1, 0, "-S <client addr> required\n");
 
-	ilen = parse_io(argv[optind], cfg_in);
-	olen = parse_io(argv[optind + 1], cfg_out);
-	if (ilen != olen)
-		error(1, 0, "i/o streams len mismatch (%d, %d)\n", ilen, olen);
-	cfg_num_pkt = ilen;
+	setup_sockaddr(domain, daddr, &cfg_dst_addr);
+	setup_sockaddr(domain, saddr, &cfg_src_addr);
+
+	cfg_num_pkt = parse_io(argv[optind], cfg_buf);
 }
 
 int main(int argc, char **argv)
 {
 	parse_opts(argc, argv);
 
-	if (cfg_do_ipv6) {
-		struct sockaddr_in6 addr6 = {0};
-
-		addr6.sin6_family = AF_INET6;
-		addr6.sin6_port = htons(cfg_port);
-		addr6.sin6_addr = in6addr_loopback;
-
-		cfg_errq_level = SOL_IPV6;
-		cfg_errq_type = IPV6_RECVERR;
-
-		do_test((void *)&addr6, sizeof(addr6));
-	}
-
-	if (cfg_do_ipv4) {
-		struct sockaddr_in addr4 = {0};
-
-		addr4.sin_family = AF_INET;
-		addr4.sin_port = htons(cfg_port);
-		addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
-
-		cfg_errq_level = SOL_IP;
-		cfg_errq_type = IP_RECVERR;
-
-		do_test((void *)&addr4, sizeof(addr4));
-	}
+	if (cfg_rx)
+		do_test_rx((void *)&cfg_dst_addr, cfg_alen);
+	else
+		do_test_tx((void *)&cfg_src_addr, cfg_alen);
 
 	return 0;
 }
diff --git a/tools/testing/selftests/net/so_txtime.sh b/tools/testing/selftests/net/so_txtime.sh
index 3f7800eaecb1..3f06f4d286a9 100755
--- a/tools/testing/selftests/net/so_txtime.sh
+++ b/tools/testing/selftests/net/so_txtime.sh
@@ -3,32 +3,85 @@
 #
 # Regression tests for the SO_TXTIME interface
 
-# Run in network namespace
-if [[ $# -eq 0 ]]; then
-	if ! ./in_netns.sh $0 __subprocess; then
-		# test is time sensitive, can be flaky
-		echo "test failed: retry once"
-		./in_netns.sh $0 __subprocess
+set -e
+
+readonly DEV="veth0"
+readonly BIN="./so_txtime"
+
+readonly RAND="$(mktemp -u XXXXXX)"
+readonly NSPREFIX="ns-${RAND}"
+readonly NS1="${NSPREFIX}1"
+readonly NS2="${NSPREFIX}2"
+
+readonly SADDR4='192.168.1.1'
+readonly DADDR4='192.168.1.2'
+readonly SADDR6='fd::1'
+readonly DADDR6='fd::2'
+
+cleanup() {
+	ip netns del "${NS2}"
+	ip netns del "${NS1}"
+}
+
+trap cleanup EXIT
+
+# Create virtual ethernet pair between network namespaces
+ip netns add "${NS1}"
+ip netns add "${NS2}"
+
+ip link add "${DEV}" netns "${NS1}" type veth \
+  peer name "${DEV}" netns "${NS2}"
+
+# Bring the devices up
+ip -netns "${NS1}" link set "${DEV}" up
+ip -netns "${NS2}" link set "${DEV}" up
+
+# Set fixed MAC addresses on the devices
+ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02
+ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06
+
+# Add fixed IP addresses to the devices
+ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}"
+ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}"
+ip -netns "${NS1}" addr add       fd::1/64 dev "${DEV}" nodad
+ip -netns "${NS2}" addr add       fd::2/64 dev "${DEV}" nodad
+
+do_test() {
+	local readonly IP="$1"
+	local readonly CLOCK="$2"
+	local readonly TXARGS="$3"
+	local readonly RXARGS="$4"
+
+	if [[ "${IP}" == "4" ]]; then
+		local readonly SADDR="${SADDR4}"
+		local readonly DADDR="${DADDR4}"
+	elif [[ "${IP}" == "6" ]]; then
+		local readonly SADDR="${SADDR6}"
+		local readonly DADDR="${DADDR6}"
+	else
+		echo "Invalid IP version ${IP}"
+		exit 1
 	fi
 
-	exit $?
-fi
+	local readonly START="$(date +%s%N --date="+ 0.1 seconds")"
+	ip netns exec "${NS2}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${RXARGS}" -r &
+	ip netns exec "${NS1}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${TXARGS}"
+	wait "$!"
+}
 
-set -e
+ip netns exec "${NS1}" tc qdisc add dev "${DEV}" root fq
+do_test 4 mono a,-1 a,-1
+do_test 6 mono a,0 a,0
+do_test 6 mono a,10 a,10
+do_test 4 mono a,10,b,20 a,10,b,20
+do_test 6 mono a,20,b,10 b,20,a,20
 
-tc qdisc add dev lo root fq
-./so_txtime -4 -6 -c mono a,-1 a,-1
-./so_txtime -4 -6 -c mono a,0 a,0
-./so_txtime -4 -6 -c mono a,10 a,10
-./so_txtime -4 -6 -c mono a,10,b,20 a,10,b,20
-./so_txtime -4 -6 -c mono a,20,b,10 b,20,a,20
-
-if tc qdisc replace dev lo root etf clockid CLOCK_TAI delta 400000; then
-	! ./so_txtime -4 -6 -c tai a,-1 a,-1
-	! ./so_txtime -4 -6 -c tai a,0 a,0
-	./so_txtime -4 -6 -c tai a,10 a,10
-	./so_txtime -4 -6 -c tai a,10,b,20 a,10,b,20
-	./so_txtime -4 -6 -c tai a,20,b,10 b,10,a,20
+if ip netns exec "${NS1}" tc qdisc replace dev "${DEV}" root etf clockid CLOCK_TAI delta 400000; then
+	! do_test 4 tai a,-1 a,-1
+	! do_test 6 tai a,0 a,0
+	do_test 6 tai a,10 a,10
+	do_test 4 tai a,10,b,20 a,10,b,20
+	do_test 6 tai a,20,b,10 b,10,a,20
 else
 	echo "tc ($(tc -V)) does not support qdisc etf. skipping"
 fi
-- 
cgit v1.2.3


From 37f0e514db660f03f8982b8f4fbbd4b2740abe7d Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 30 Mar 2021 19:32:22 -0700
Subject: skmsg: Lock ingress_skb when purging

Currently we purge the ingress_skb queue only when psock
refcnt goes down to 0, so locking the queue is not necessary,
but in order to be called during ->close, we have to lock it
here.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210331023237.41094-2-xiyou.wangcong@gmail.com
---
 net/core/skmsg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 07f54015238a..bebf84ed4e30 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -634,7 +634,7 @@ static void sk_psock_zap_ingress(struct sk_psock *psock)
 {
 	struct sk_buff *skb;
 
-	while ((skb = __skb_dequeue(&psock->ingress_skb)) != NULL) {
+	while ((skb = skb_dequeue(&psock->ingress_skb)) != NULL) {
 		skb_bpf_redirect_clear(skb);
 		kfree_skb(skb);
 	}
-- 
cgit v1.2.3


From b01fd6e802b6d0a635176f943315670b679d8d7b Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 30 Mar 2021 19:32:23 -0700
Subject: skmsg: Introduce a spinlock to protect ingress_msg

Currently we rely on lock_sock to protect ingress_msg,
it is too big for this, we can actually just use a spinlock
to protect this list like protecting other skb queues.

__tcp_bpf_recvmsg() is still special because of peeking,
it still has to use lock_sock.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210331023237.41094-3-xiyou.wangcong@gmail.com
---
 include/linux/skmsg.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 net/core/skmsg.c      |  3 +++
 net/ipv4/tcp_bpf.c    | 18 ++++++------------
 3 files changed, 55 insertions(+), 12 deletions(-)

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 6c09d94be2e9..f2d45a73b2b2 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -89,6 +89,7 @@ struct sk_psock {
 #endif
 	struct sk_buff_head		ingress_skb;
 	struct list_head		ingress_msg;
+	spinlock_t			ingress_lock;
 	unsigned long			state;
 	struct list_head		link;
 	spinlock_t			link_lock;
@@ -284,7 +285,45 @@ static inline struct sk_psock *sk_psock(const struct sock *sk)
 static inline void sk_psock_queue_msg(struct sk_psock *psock,
 				      struct sk_msg *msg)
 {
+	spin_lock_bh(&psock->ingress_lock);
 	list_add_tail(&msg->list, &psock->ingress_msg);
+	spin_unlock_bh(&psock->ingress_lock);
+}
+
+static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock)
+{
+	struct sk_msg *msg;
+
+	spin_lock_bh(&psock->ingress_lock);
+	msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list);
+	if (msg)
+		list_del(&msg->list);
+	spin_unlock_bh(&psock->ingress_lock);
+	return msg;
+}
+
+static inline struct sk_msg *sk_psock_peek_msg(struct sk_psock *psock)
+{
+	struct sk_msg *msg;
+
+	spin_lock_bh(&psock->ingress_lock);
+	msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list);
+	spin_unlock_bh(&psock->ingress_lock);
+	return msg;
+}
+
+static inline struct sk_msg *sk_psock_next_msg(struct sk_psock *psock,
+					       struct sk_msg *msg)
+{
+	struct sk_msg *ret;
+
+	spin_lock_bh(&psock->ingress_lock);
+	if (list_is_last(&msg->list, &psock->ingress_msg))
+		ret = NULL;
+	else
+		ret = list_next_entry(msg, list);
+	spin_unlock_bh(&psock->ingress_lock);
+	return ret;
 }
 
 static inline bool sk_psock_queue_empty(const struct sk_psock *psock)
@@ -292,6 +331,13 @@ static inline bool sk_psock_queue_empty(const struct sk_psock *psock)
 	return psock ? list_empty(&psock->ingress_msg) : true;
 }
 
+static inline void kfree_sk_msg(struct sk_msg *msg)
+{
+	if (msg->skb)
+		consume_skb(msg->skb);
+	kfree(msg);
+}
+
 static inline void sk_psock_report_error(struct sk_psock *psock, int err)
 {
 	struct sock *sk = psock->sk;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index bebf84ed4e30..305dddc51857 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -592,6 +592,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
 
 	INIT_WORK(&psock->work, sk_psock_backlog);
 	INIT_LIST_HEAD(&psock->ingress_msg);
+	spin_lock_init(&psock->ingress_lock);
 	skb_queue_head_init(&psock->ingress_skb);
 
 	sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED);
@@ -638,7 +639,9 @@ static void sk_psock_zap_ingress(struct sk_psock *psock)
 		skb_bpf_redirect_clear(skb);
 		kfree_skb(skb);
 	}
+	spin_lock_bh(&psock->ingress_lock);
 	__sk_psock_purge_ingress_msg(psock);
+	spin_unlock_bh(&psock->ingress_lock);
 }
 
 static void sk_psock_link_destroy(struct sk_psock *psock)
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 17c322b875fd..ae980716d896 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -18,9 +18,7 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
 	struct sk_msg *msg_rx;
 	int i, copied = 0;
 
-	msg_rx = list_first_entry_or_null(&psock->ingress_msg,
-					  struct sk_msg, list);
-
+	msg_rx = sk_psock_peek_msg(psock);
 	while (copied != len) {
 		struct scatterlist *sge;
 
@@ -68,22 +66,18 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
 		} while (i != msg_rx->sg.end);
 
 		if (unlikely(peek)) {
-			if (msg_rx == list_last_entry(&psock->ingress_msg,
-						      struct sk_msg, list))
+			msg_rx = sk_psock_next_msg(psock, msg_rx);
+			if (!msg_rx)
 				break;
-			msg_rx = list_next_entry(msg_rx, list);
 			continue;
 		}
 
 		msg_rx->sg.start = i;
 		if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) {
-			list_del(&msg_rx->list);
-			if (msg_rx->skb)
-				consume_skb(msg_rx->skb);
-			kfree(msg_rx);
+			msg_rx = sk_psock_dequeue_msg(psock);
+			kfree_sk_msg(msg_rx);
 		}
-		msg_rx = list_first_entry_or_null(&psock->ingress_msg,
-						  struct sk_msg, list);
+		msg_rx = sk_psock_peek_msg(psock);
 	}
 
 	return copied;
-- 
cgit v1.2.3


From 0739cd28f2645e814586c7536ba5da9825cb8029 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 30 Mar 2021 19:32:24 -0700
Subject: net: Introduce skb_send_sock() for sock_map

We only have skb_send_sock_locked() which requires callers
to use lock_sock(). Introduce a variant skb_send_sock()
which locks on its own, callers do not need to lock it
any more. This will save us from adding a ->sendmsg_locked
for each protocol.

To reuse the code, pass function pointers to __skb_send_sock()
and build skb_send_sock() and skb_send_sock_locked() on top.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/20210331023237.41094-4-xiyou.wangcong@gmail.com
---
 include/linux/skbuff.h |  1 +
 net/core/skbuff.c      | 55 +++++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c8def85fcc22..dbf820a50a39 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3626,6 +3626,7 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
 		    unsigned int flags);
 int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
 			 int len);
+int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len);
 void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
 unsigned int skb_zerocopy_headlen(const struct sk_buff *from);
 int skb_zerocopy(struct sk_buff *to, struct sk_buff *from,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e8320b5d651a..3ad9e8425ab2 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2500,9 +2500,32 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
 }
 EXPORT_SYMBOL_GPL(skb_splice_bits);
 
-/* Send skb data on a socket. Socket must be locked. */
-int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
-			 int len)
+static int sendmsg_unlocked(struct sock *sk, struct msghdr *msg,
+			    struct kvec *vec, size_t num, size_t size)
+{
+	struct socket *sock = sk->sk_socket;
+
+	if (!sock)
+		return -EINVAL;
+	return kernel_sendmsg(sock, msg, vec, num, size);
+}
+
+static int sendpage_unlocked(struct sock *sk, struct page *page, int offset,
+			     size_t size, int flags)
+{
+	struct socket *sock = sk->sk_socket;
+
+	if (!sock)
+		return -EINVAL;
+	return kernel_sendpage(sock, page, offset, size, flags);
+}
+
+typedef int (*sendmsg_func)(struct sock *sk, struct msghdr *msg,
+			    struct kvec *vec, size_t num, size_t size);
+typedef int (*sendpage_func)(struct sock *sk, struct page *page, int offset,
+			     size_t size, int flags);
+static int __skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset,
+			   int len, sendmsg_func sendmsg, sendpage_func sendpage)
 {
 	unsigned int orig_len = len;
 	struct sk_buff *head = skb;
@@ -2522,7 +2545,8 @@ do_frag_list:
 		memset(&msg, 0, sizeof(msg));
 		msg.msg_flags = MSG_DONTWAIT;
 
-		ret = kernel_sendmsg_locked(sk, &msg, &kv, 1, slen);
+		ret = INDIRECT_CALL_2(sendmsg, kernel_sendmsg_locked,
+				      sendmsg_unlocked, sk, &msg, &kv, 1, slen);
 		if (ret <= 0)
 			goto error;
 
@@ -2553,9 +2577,11 @@ do_frag_list:
 		slen = min_t(size_t, len, skb_frag_size(frag) - offset);
 
 		while (slen) {
-			ret = kernel_sendpage_locked(sk, skb_frag_page(frag),
-						     skb_frag_off(frag) + offset,
-						     slen, MSG_DONTWAIT);
+			ret = INDIRECT_CALL_2(sendpage, kernel_sendpage_locked,
+					      sendpage_unlocked, sk,
+					      skb_frag_page(frag),
+					      skb_frag_off(frag) + offset,
+					      slen, MSG_DONTWAIT);
 			if (ret <= 0)
 				goto error;
 
@@ -2587,8 +2613,23 @@ out:
 error:
 	return orig_len == len ? ret : orig_len - len;
 }
+
+/* Send skb data on a socket. Socket must be locked. */
+int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
+			 int len)
+{
+	return __skb_send_sock(sk, skb, offset, len, kernel_sendmsg_locked,
+			       kernel_sendpage_locked);
+}
 EXPORT_SYMBOL_GPL(skb_send_sock_locked);
 
+/* Send skb data on a socket. Socket must be unlocked. */
+int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len)
+{
+	return __skb_send_sock(sk, skb, offset, len, sendmsg_unlocked,
+			       sendpage_unlocked);
+}
+
 /**
  *	skb_store_bits - store bits from kernel buffer to skb
  *	@skb: destination buffer
-- 
cgit v1.2.3


From 799aa7f98d53e0f541fa6b4dc9aa47b4ff2178e3 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 30 Mar 2021 19:32:25 -0700
Subject: skmsg: Avoid lock_sock() in sk_psock_backlog()

We do not have to lock the sock to avoid losing sk_socket,
instead we can purge all the ingress queues when we close
the socket. Sending or receiving packets after orphaning
socket makes no sense.

We do purge these queues when psock refcnt reaches zero but
here we want to purge them explicitly in sock_map_close().
There are also some nasty race conditions on testing bit
SK_PSOCK_TX_ENABLED and queuing/canceling the psock work,
we can expand psock->ingress_lock a bit to protect them too.

As noticed by John, we still have to lock the psock->work,
because the same work item could be running concurrently on
different CPU's.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210331023237.41094-5-xiyou.wangcong@gmail.com
---
 include/linux/skmsg.h |  2 ++
 net/core/skmsg.c      | 50 ++++++++++++++++++++++++++++++++++----------------
 net/core/sock_map.c   |  1 +
 3 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index f2d45a73b2b2..7382c4b518d7 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -99,6 +99,7 @@ struct sk_psock {
 	void (*saved_write_space)(struct sock *sk);
 	void (*saved_data_ready)(struct sock *sk);
 	struct proto			*sk_proto;
+	struct mutex			work_mutex;
 	struct sk_psock_work_state	work_state;
 	struct work_struct		work;
 	union {
@@ -347,6 +348,7 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err)
 }
 
 struct sk_psock *sk_psock_init(struct sock *sk, int node);
+void sk_psock_stop(struct sk_psock *psock, bool wait);
 
 #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
 int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 305dddc51857..9c25020086a9 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -497,7 +497,7 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
 	if (!ingress) {
 		if (!sock_writeable(psock->sk))
 			return -EAGAIN;
-		return skb_send_sock_locked(psock->sk, skb, off, len);
+		return skb_send_sock(psock->sk, skb, off, len);
 	}
 	return sk_psock_skb_ingress(psock, skb);
 }
@@ -511,8 +511,7 @@ static void sk_psock_backlog(struct work_struct *work)
 	u32 len, off;
 	int ret;
 
-	/* Lock sock to avoid losing sk_socket during loop. */
-	lock_sock(psock->sk);
+	mutex_lock(&psock->work_mutex);
 	if (state->skb) {
 		skb = state->skb;
 		len = state->len;
@@ -529,7 +528,7 @@ start:
 		skb_bpf_redirect_clear(skb);
 		do {
 			ret = -EIO;
-			if (likely(psock->sk->sk_socket))
+			if (!sock_flag(psock->sk, SOCK_DEAD))
 				ret = sk_psock_handle_skb(psock, skb, off,
 							  len, ingress);
 			if (ret <= 0) {
@@ -553,7 +552,7 @@ start:
 			kfree_skb(skb);
 	}
 end:
-	release_sock(psock->sk);
+	mutex_unlock(&psock->work_mutex);
 }
 
 struct sk_psock *sk_psock_init(struct sock *sk, int node)
@@ -591,6 +590,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
 	spin_lock_init(&psock->link_lock);
 
 	INIT_WORK(&psock->work, sk_psock_backlog);
+	mutex_init(&psock->work_mutex);
 	INIT_LIST_HEAD(&psock->ingress_msg);
 	spin_lock_init(&psock->ingress_lock);
 	skb_queue_head_init(&psock->ingress_skb);
@@ -631,7 +631,7 @@ static void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
 	}
 }
 
-static void sk_psock_zap_ingress(struct sk_psock *psock)
+static void __sk_psock_zap_ingress(struct sk_psock *psock)
 {
 	struct sk_buff *skb;
 
@@ -639,9 +639,7 @@ static void sk_psock_zap_ingress(struct sk_psock *psock)
 		skb_bpf_redirect_clear(skb);
 		kfree_skb(skb);
 	}
-	spin_lock_bh(&psock->ingress_lock);
 	__sk_psock_purge_ingress_msg(psock);
-	spin_unlock_bh(&psock->ingress_lock);
 }
 
 static void sk_psock_link_destroy(struct sk_psock *psock)
@@ -654,6 +652,18 @@ static void sk_psock_link_destroy(struct sk_psock *psock)
 	}
 }
 
+void sk_psock_stop(struct sk_psock *psock, bool wait)
+{
+	spin_lock_bh(&psock->ingress_lock);
+	sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
+	sk_psock_cork_free(psock);
+	__sk_psock_zap_ingress(psock);
+	spin_unlock_bh(&psock->ingress_lock);
+
+	if (wait)
+		cancel_work_sync(&psock->work);
+}
+
 static void sk_psock_done_strp(struct sk_psock *psock);
 
 static void sk_psock_destroy_deferred(struct work_struct *gc)
@@ -665,12 +675,12 @@ static void sk_psock_destroy_deferred(struct work_struct *gc)
 	sk_psock_done_strp(psock);
 
 	cancel_work_sync(&psock->work);
+	mutex_destroy(&psock->work_mutex);
 
 	psock_progs_drop(&psock->progs);
 
 	sk_psock_link_destroy(psock);
 	sk_psock_cork_free(psock);
-	sk_psock_zap_ingress(psock);
 
 	if (psock->sk_redir)
 		sock_put(psock->sk_redir);
@@ -688,8 +698,7 @@ static void sk_psock_destroy(struct rcu_head *rcu)
 
 void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
 {
-	sk_psock_cork_free(psock);
-	sk_psock_zap_ingress(psock);
+	sk_psock_stop(psock, false);
 
 	write_lock_bh(&sk->sk_callback_lock);
 	sk_psock_restore_proto(sk, psock);
@@ -699,7 +708,6 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
 	else if (psock->progs.stream_verdict)
 		sk_psock_stop_verdict(sk, psock);
 	write_unlock_bh(&sk->sk_callback_lock);
-	sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
 
 	call_rcu(&psock->rcu, sk_psock_destroy);
 }
@@ -770,14 +778,20 @@ static void sk_psock_skb_redirect(struct sk_buff *skb)
 	 * error that caused the pipe to break. We can't send a packet on
 	 * a socket that is in this state so we drop the skb.
 	 */
-	if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
-	    !sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
+	if (!psock_other || sock_flag(sk_other, SOCK_DEAD)) {
+		kfree_skb(skb);
+		return;
+	}
+	spin_lock_bh(&psock_other->ingress_lock);
+	if (!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
+		spin_unlock_bh(&psock_other->ingress_lock);
 		kfree_skb(skb);
 		return;
 	}
 
 	skb_queue_tail(&psock_other->ingress_skb, skb);
 	schedule_work(&psock_other->work);
+	spin_unlock_bh(&psock_other->ingress_lock);
 }
 
 static void sk_psock_tls_verdict_apply(struct sk_buff *skb, struct sock *sk, int verdict)
@@ -845,8 +859,12 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
 			err = sk_psock_skb_ingress_self(psock, skb);
 		}
 		if (err < 0) {
-			skb_queue_tail(&psock->ingress_skb, skb);
-			schedule_work(&psock->work);
+			spin_lock_bh(&psock->ingress_lock);
+			if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
+				skb_queue_tail(&psock->ingress_skb, skb);
+				schedule_work(&psock->work);
+			}
+			spin_unlock_bh(&psock->ingress_lock);
 		}
 		break;
 	case __SK_REDIRECT:
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index dd53a7771d7e..e564fdeaada1 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1540,6 +1540,7 @@ void sock_map_close(struct sock *sk, long timeout)
 	saved_close = psock->saved_close;
 	sock_map_remove_links(sk, psock);
 	rcu_read_unlock();
+	sk_psock_stop(psock, true);
 	release_sock(sk);
 	saved_close(sk, timeout);
 }
-- 
cgit v1.2.3


From 7786dfc41a74e0567557b5c4a28fc8482f5f5691 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 30 Mar 2021 19:32:26 -0700
Subject: skmsg: Use rcu work for destroying psock

The RCU callback sk_psock_destroy() only queues work psock->gc,
so we can just switch to rcu work to simplify the code.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210331023237.41094-6-xiyou.wangcong@gmail.com
---
 include/linux/skmsg.h |  5 +----
 net/core/skmsg.c      | 17 +++++------------
 2 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 7382c4b518d7..e7aba150539d 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -102,10 +102,7 @@ struct sk_psock {
 	struct mutex			work_mutex;
 	struct sk_psock_work_state	work_state;
 	struct work_struct		work;
-	union {
-		struct rcu_head		rcu;
-		struct work_struct	gc;
-	};
+	struct rcu_work			rwork;
 };
 
 int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 9c25020086a9..d43d43905d2c 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -666,10 +666,10 @@ void sk_psock_stop(struct sk_psock *psock, bool wait)
 
 static void sk_psock_done_strp(struct sk_psock *psock);
 
-static void sk_psock_destroy_deferred(struct work_struct *gc)
+static void sk_psock_destroy(struct work_struct *work)
 {
-	struct sk_psock *psock = container_of(gc, struct sk_psock, gc);
-
+	struct sk_psock *psock = container_of(to_rcu_work(work),
+					      struct sk_psock, rwork);
 	/* No sk_callback_lock since already detached. */
 
 	sk_psock_done_strp(psock);
@@ -688,14 +688,6 @@ static void sk_psock_destroy_deferred(struct work_struct *gc)
 	kfree(psock);
 }
 
-static void sk_psock_destroy(struct rcu_head *rcu)
-{
-	struct sk_psock *psock = container_of(rcu, struct sk_psock, rcu);
-
-	INIT_WORK(&psock->gc, sk_psock_destroy_deferred);
-	schedule_work(&psock->gc);
-}
-
 void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
 {
 	sk_psock_stop(psock, false);
@@ -709,7 +701,8 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
 		sk_psock_stop_verdict(sk, psock);
 	write_unlock_bh(&sk->sk_callback_lock);
 
-	call_rcu(&psock->rcu, sk_psock_destroy);
+	INIT_RCU_WORK(&psock->rwork, sk_psock_destroy);
+	queue_rcu_work(system_wq, &psock->rwork);
 }
 EXPORT_SYMBOL_GPL(sk_psock_drop);
 
-- 
cgit v1.2.3


From 190179f65ba8bc18dc1d38435b7932505ca5544f Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 30 Mar 2021 19:32:27 -0700
Subject: skmsg: Use GFP_KERNEL in sk_psock_create_ingress_msg()

This function is only called in process context.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210331023237.41094-7-xiyou.wangcong@gmail.com
---
 net/core/skmsg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index d43d43905d2c..656eceab73bc 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -410,7 +410,7 @@ static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
 	if (!sk_rmem_schedule(sk, skb, skb->truesize))
 		return NULL;
 
-	msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
+	msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_KERNEL);
 	if (unlikely(!msg))
 		return NULL;
 
-- 
cgit v1.2.3


From 2004fdbd8a2b56757691717639f86d0eea3ab5b4 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 30 Mar 2021 19:32:28 -0700
Subject: sock_map: Simplify sock_map_link() a bit

sock_map_link() passes down map progs, but it is confusing
to see both map progs and psock progs. Make the map progs
more obvious by retrieving it directly with sock_map_progs()
inside sock_map_link(). Now it is aligned with
sock_map_link_no_progs() too.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210331023237.41094-8-xiyou.wangcong@gmail.com
---
 net/core/sock_map.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index e564fdeaada1..d06face0f16c 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -26,6 +26,7 @@ struct bpf_stab {
 
 static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
 				struct bpf_prog *old, u32 which);
+static struct sk_psock_progs *sock_map_progs(struct bpf_map *map);
 
 static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
 {
@@ -224,10 +225,10 @@ out:
 	return psock;
 }
 
-static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
-			 struct sock *sk)
+static int sock_map_link(struct bpf_map *map, struct sock *sk)
 {
 	struct bpf_prog *msg_parser, *stream_parser, *stream_verdict;
+	struct sk_psock_progs *progs = sock_map_progs(map);
 	struct sk_psock *psock;
 	int ret;
 
@@ -492,7 +493,7 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
 	 * and sk_write_space callbacks overridden.
 	 */
 	if (sock_map_redirect_allowed(sk))
-		ret = sock_map_link(map, &stab->progs, sk);
+		ret = sock_map_link(map, sk);
 	else
 		ret = sock_map_link_no_progs(map, sk);
 	if (ret < 0)
@@ -1004,7 +1005,7 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
 	 * and sk_write_space callbacks overridden.
 	 */
 	if (sock_map_redirect_allowed(sk))
-		ret = sock_map_link(map, &htab->progs, sk);
+		ret = sock_map_link(map, sk);
 	else
 		ret = sock_map_link_no_progs(map, sk);
 	if (ret < 0)
-- 
cgit v1.2.3


From b017055255d620b365299c3824610e0098414664 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 30 Mar 2021 19:32:29 -0700
Subject: sock_map: Kill sock_map_link_no_progs()

Now we can fold sock_map_link_no_progs() into sock_map_link()
and get rid of sock_map_link_no_progs().

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210331023237.41094-9-xiyou.wangcong@gmail.com
---
 net/core/sock_map.c | 55 +++++++++++++++--------------------------------------
 1 file changed, 15 insertions(+), 40 deletions(-)

diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index d06face0f16c..42d797291d34 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -225,13 +225,24 @@ out:
 	return psock;
 }
 
+static bool sock_map_redirect_allowed(const struct sock *sk);
+
 static int sock_map_link(struct bpf_map *map, struct sock *sk)
 {
-	struct bpf_prog *msg_parser, *stream_parser, *stream_verdict;
 	struct sk_psock_progs *progs = sock_map_progs(map);
+	struct bpf_prog *stream_verdict = NULL;
+	struct bpf_prog *stream_parser = NULL;
+	struct bpf_prog *msg_parser = NULL;
 	struct sk_psock *psock;
 	int ret;
 
+	/* Only sockets we can redirect into/from in BPF need to hold
+	 * refs to parser/verdict progs and have their sk_data_ready
+	 * and sk_write_space callbacks overridden.
+	 */
+	if (!sock_map_redirect_allowed(sk))
+		goto no_progs;
+
 	stream_verdict = READ_ONCE(progs->stream_verdict);
 	if (stream_verdict) {
 		stream_verdict = bpf_prog_inc_not_zero(stream_verdict);
@@ -257,6 +268,7 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
 		}
 	}
 
+no_progs:
 	psock = sock_map_psock_get_checked(sk);
 	if (IS_ERR(psock)) {
 		ret = PTR_ERR(psock);
@@ -316,27 +328,6 @@ out_put_stream_verdict:
 	return ret;
 }
 
-static int sock_map_link_no_progs(struct bpf_map *map, struct sock *sk)
-{
-	struct sk_psock *psock;
-	int ret;
-
-	psock = sock_map_psock_get_checked(sk);
-	if (IS_ERR(psock))
-		return PTR_ERR(psock);
-
-	if (!psock) {
-		psock = sk_psock_init(sk, map->numa_node);
-		if (IS_ERR(psock))
-			return PTR_ERR(psock);
-	}
-
-	ret = sock_map_init_proto(sk, psock);
-	if (ret < 0)
-		sk_psock_put(sk, psock);
-	return ret;
-}
-
 static void sock_map_free(struct bpf_map *map)
 {
 	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
@@ -467,8 +458,6 @@ static int sock_map_get_next_key(struct bpf_map *map, void *key, void *next)
 	return 0;
 }
 
-static bool sock_map_redirect_allowed(const struct sock *sk);
-
 static int sock_map_update_common(struct bpf_map *map, u32 idx,
 				  struct sock *sk, u64 flags)
 {
@@ -488,14 +477,7 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
 	if (!link)
 		return -ENOMEM;
 
-	/* Only sockets we can redirect into/from in BPF need to hold
-	 * refs to parser/verdict progs and have their sk_data_ready
-	 * and sk_write_space callbacks overridden.
-	 */
-	if (sock_map_redirect_allowed(sk))
-		ret = sock_map_link(map, sk);
-	else
-		ret = sock_map_link_no_progs(map, sk);
+	ret = sock_map_link(map, sk);
 	if (ret < 0)
 		goto out_free;
 
@@ -1000,14 +982,7 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
 	if (!link)
 		return -ENOMEM;
 
-	/* Only sockets we can redirect into/from in BPF need to hold
-	 * refs to parser/verdict progs and have their sk_data_ready
-	 * and sk_write_space callbacks overridden.
-	 */
-	if (sock_map_redirect_allowed(sk))
-		ret = sock_map_link(map, sk);
-	else
-		ret = sock_map_link_no_progs(map, sk);
+	ret = sock_map_link(map, sk);
 	if (ret < 0)
 		goto out_free;
 
-- 
cgit v1.2.3


From a7ba4558e69a3c2ae4ca521f015832ef44799538 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 30 Mar 2021 19:32:30 -0700
Subject: sock_map: Introduce BPF_SK_SKB_VERDICT

Reusing BPF_SK_SKB_STREAM_VERDICT is possible but its name is
confusing and more importantly we still want to distinguish them
from user-space. So we can just reuse the stream verdict code but
introduce a new type of eBPF program, skb_verdict. Users are not
allowed to attach stream_verdict and skb_verdict programs to the
same map.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210331023237.41094-10-xiyou.wangcong@gmail.com
---
 include/linux/skmsg.h          |  2 ++
 include/uapi/linux/bpf.h       |  1 +
 kernel/bpf/syscall.c           |  1 +
 net/core/skmsg.c               |  4 +++-
 net/core/sock_map.c            | 28 ++++++++++++++++++++++++++++
 tools/bpf/bpftool/common.c     |  1 +
 tools/bpf/bpftool/prog.c       |  1 +
 tools/include/uapi/linux/bpf.h |  1 +
 8 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index e7aba150539d..c83dbc2d81d9 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -58,6 +58,7 @@ struct sk_psock_progs {
 	struct bpf_prog			*msg_parser;
 	struct bpf_prog			*stream_parser;
 	struct bpf_prog			*stream_verdict;
+	struct bpf_prog			*skb_verdict;
 };
 
 enum sk_psock_state_bits {
@@ -487,6 +488,7 @@ static inline void psock_progs_drop(struct sk_psock_progs *progs)
 	psock_set_prog(&progs->msg_parser, NULL);
 	psock_set_prog(&progs->stream_parser, NULL);
 	psock_set_prog(&progs->stream_verdict, NULL);
+	psock_set_prog(&progs->skb_verdict, NULL);
 }
 
 int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 598716742593..49371eba98ba 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -957,6 +957,7 @@ enum bpf_attach_type {
 	BPF_XDP_CPUMAP,
 	BPF_SK_LOOKUP,
 	BPF_XDP,
+	BPF_SK_SKB_VERDICT,
 	__MAX_BPF_ATTACH_TYPE
 };
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 9603de81811a..6428634da57e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2948,6 +2948,7 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
 		return BPF_PROG_TYPE_SK_MSG;
 	case BPF_SK_SKB_STREAM_PARSER:
 	case BPF_SK_SKB_STREAM_VERDICT:
+	case BPF_SK_SKB_VERDICT:
 		return BPF_PROG_TYPE_SK_SKB;
 	case BPF_LIRC_MODE2:
 		return BPF_PROG_TYPE_LIRC_MODE2;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 656eceab73bc..a045812d7c78 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -697,7 +697,7 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
 	rcu_assign_sk_user_data(sk, NULL);
 	if (psock->progs.stream_parser)
 		sk_psock_stop_strp(sk, psock);
-	else if (psock->progs.stream_verdict)
+	else if (psock->progs.stream_verdict || psock->progs.skb_verdict)
 		sk_psock_stop_verdict(sk, psock);
 	write_unlock_bh(&sk->sk_callback_lock);
 
@@ -1024,6 +1024,8 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
 	}
 	skb_set_owner_r(skb, sk);
 	prog = READ_ONCE(psock->progs.stream_verdict);
+	if (!prog)
+		prog = READ_ONCE(psock->progs.skb_verdict);
 	if (likely(prog)) {
 		skb_dst_drop(skb);
 		skb_bpf_redirect_clear(skb);
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 42d797291d34..c2a0411e08a8 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -156,6 +156,8 @@ static void sock_map_del_link(struct sock *sk,
 				strp_stop = true;
 			if (psock->saved_data_ready && stab->progs.stream_verdict)
 				verdict_stop = true;
+			if (psock->saved_data_ready && stab->progs.skb_verdict)
+				verdict_stop = true;
 			list_del(&link->list);
 			sk_psock_free_link(link);
 		}
@@ -232,6 +234,7 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
 	struct sk_psock_progs *progs = sock_map_progs(map);
 	struct bpf_prog *stream_verdict = NULL;
 	struct bpf_prog *stream_parser = NULL;
+	struct bpf_prog *skb_verdict = NULL;
 	struct bpf_prog *msg_parser = NULL;
 	struct sk_psock *psock;
 	int ret;
@@ -268,6 +271,15 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
 		}
 	}
 
+	skb_verdict = READ_ONCE(progs->skb_verdict);
+	if (skb_verdict) {
+		skb_verdict = bpf_prog_inc_not_zero(skb_verdict);
+		if (IS_ERR(skb_verdict)) {
+			ret = PTR_ERR(skb_verdict);
+			goto out_put_msg_parser;
+		}
+	}
+
 no_progs:
 	psock = sock_map_psock_get_checked(sk);
 	if (IS_ERR(psock)) {
@@ -278,6 +290,9 @@ no_progs:
 	if (psock) {
 		if ((msg_parser && READ_ONCE(psock->progs.msg_parser)) ||
 		    (stream_parser  && READ_ONCE(psock->progs.stream_parser)) ||
+		    (skb_verdict && READ_ONCE(psock->progs.skb_verdict)) ||
+		    (skb_verdict && READ_ONCE(psock->progs.stream_verdict)) ||
+		    (stream_verdict && READ_ONCE(psock->progs.skb_verdict)) ||
 		    (stream_verdict && READ_ONCE(psock->progs.stream_verdict))) {
 			sk_psock_put(sk, psock);
 			ret = -EBUSY;
@@ -309,6 +324,9 @@ no_progs:
 	} else if (!stream_parser && stream_verdict && !psock->saved_data_ready) {
 		psock_set_prog(&psock->progs.stream_verdict, stream_verdict);
 		sk_psock_start_verdict(sk,psock);
+	} else if (!stream_verdict && skb_verdict && !psock->saved_data_ready) {
+		psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
+		sk_psock_start_verdict(sk, psock);
 	}
 	write_unlock_bh(&sk->sk_callback_lock);
 	return 0;
@@ -317,6 +335,9 @@ out_unlock_drop:
 out_drop:
 	sk_psock_put(sk, psock);
 out_progs:
+	if (skb_verdict)
+		bpf_prog_put(skb_verdict);
+out_put_msg_parser:
 	if (msg_parser)
 		bpf_prog_put(msg_parser);
 out_put_stream_parser:
@@ -1442,8 +1463,15 @@ static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
 		break;
 #endif
 	case BPF_SK_SKB_STREAM_VERDICT:
+		if (progs->skb_verdict)
+			return -EBUSY;
 		pprog = &progs->stream_verdict;
 		break;
+	case BPF_SK_SKB_VERDICT:
+		if (progs->stream_verdict)
+			return -EBUSY;
+		pprog = &progs->skb_verdict;
+		break;
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 65303664417e..1828bba19020 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -57,6 +57,7 @@ const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
 
 	[BPF_SK_SKB_STREAM_PARSER]	= "sk_skb_stream_parser",
 	[BPF_SK_SKB_STREAM_VERDICT]	= "sk_skb_stream_verdict",
+	[BPF_SK_SKB_VERDICT]		= "sk_skb_verdict",
 	[BPF_SK_MSG_VERDICT]		= "sk_msg_verdict",
 	[BPF_LIRC_MODE2]		= "lirc_mode2",
 	[BPF_FLOW_DISSECTOR]		= "flow_dissector",
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index f2b915b20546..3f067d2d7584 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -76,6 +76,7 @@ enum dump_mode {
 static const char * const attach_type_strings[] = {
 	[BPF_SK_SKB_STREAM_PARSER] = "stream_parser",
 	[BPF_SK_SKB_STREAM_VERDICT] = "stream_verdict",
+	[BPF_SK_SKB_VERDICT] = "skb_verdict",
 	[BPF_SK_MSG_VERDICT] = "msg_verdict",
 	[BPF_FLOW_DISSECTOR] = "flow_dissector",
 	[__MAX_BPF_ATTACH_TYPE] = NULL,
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index ab9f2233607c..69902603012c 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -957,6 +957,7 @@ enum bpf_attach_type {
 	BPF_XDP_CPUMAP,
 	BPF_SK_LOOKUP,
 	BPF_XDP,
+	BPF_SK_SKB_VERDICT,
 	__MAX_BPF_ATTACH_TYPE
 };
 
-- 
cgit v1.2.3


From 8a59f9d1e3d4340659fdfee8879dc09a6f2546e1 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 30 Mar 2021 19:32:31 -0700
Subject: sock: Introduce sk->sk_prot->psock_update_sk_prot()

Currently sockmap calls into each protocol to update the struct
proto and replace it. This certainly won't work when the protocol
is implemented as a module, for example, AF_UNIX.

Introduce a new ops sk->sk_prot->psock_update_sk_prot(), so each
protocol can implement its own way to replace the struct proto.
This also helps get rid of symbol dependencies on CONFIG_INET.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210331023237.41094-11-xiyou.wangcong@gmail.com
---
 include/linux/skmsg.h | 18 +++---------------
 include/net/sock.h    |  3 +++
 include/net/tcp.h     |  1 +
 include/net/udp.h     |  1 +
 net/core/skmsg.c      |  5 -----
 net/core/sock_map.c   | 24 ++++--------------------
 net/ipv4/tcp_bpf.c    | 24 +++++++++++++++++++++---
 net/ipv4/tcp_ipv4.c   |  3 +++
 net/ipv4/udp.c        |  3 +++
 net/ipv4/udp_bpf.c    | 15 +++++++++++++--
 net/ipv6/tcp_ipv6.c   |  3 +++
 net/ipv6/udp.c        |  3 +++
 12 files changed, 58 insertions(+), 45 deletions(-)

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index c83dbc2d81d9..5e800ddc2dc6 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -99,6 +99,7 @@ struct sk_psock {
 	void (*saved_close)(struct sock *sk, long timeout);
 	void (*saved_write_space)(struct sock *sk);
 	void (*saved_data_ready)(struct sock *sk);
+	int  (*psock_update_sk_prot)(struct sock *sk, bool restore);
 	struct proto			*sk_proto;
 	struct mutex			work_mutex;
 	struct sk_psock_work_state	work_state;
@@ -395,25 +396,12 @@ static inline void sk_psock_cork_free(struct sk_psock *psock)
 	}
 }
 
-static inline void sk_psock_update_proto(struct sock *sk,
-					 struct sk_psock *psock,
-					 struct proto *ops)
-{
-	/* Pairs with lockless read in sk_clone_lock() */
-	WRITE_ONCE(sk->sk_prot, ops);
-}
-
 static inline void sk_psock_restore_proto(struct sock *sk,
 					  struct sk_psock *psock)
 {
 	sk->sk_prot->unhash = psock->saved_unhash;
-	if (inet_csk_has_ulp(sk)) {
-		tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space);
-	} else {
-		sk->sk_write_space = psock->saved_write_space;
-		/* Pairs with lockless read in sk_clone_lock() */
-		WRITE_ONCE(sk->sk_prot, psock->sk_proto);
-	}
+	if (psock->psock_update_sk_prot)
+		psock->psock_update_sk_prot(sk, true);
 }
 
 static inline void sk_psock_set_state(struct sk_psock *psock,
diff --git a/include/net/sock.h b/include/net/sock.h
index 0b6266fd6bf6..8b4155e756c2 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1184,6 +1184,9 @@ struct proto {
 	void			(*unhash)(struct sock *sk);
 	void			(*rehash)(struct sock *sk);
 	int			(*get_port)(struct sock *sk, unsigned short snum);
+#ifdef CONFIG_BPF_SYSCALL
+	int			(*psock_update_sk_prot)(struct sock *sk, bool restore);
+#endif
 
 	/* Keeping track of sockets in use */
 #ifdef CONFIG_PROC_FS
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 075de26f449d..2efa4e5ea23d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2203,6 +2203,7 @@ struct sk_psock;
 
 #ifdef CONFIG_BPF_SYSCALL
 struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
+int tcp_bpf_update_proto(struct sock *sk, bool restore);
 void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
 #endif /* CONFIG_BPF_SYSCALL */
 
diff --git a/include/net/udp.h b/include/net/udp.h
index d4d064c59232..df7cc1edc200 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -518,6 +518,7 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
 #ifdef CONFIG_BPF_SYSCALL
 struct sk_psock;
 struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
+int udp_bpf_update_proto(struct sock *sk, bool restore);
 #endif
 
 #endif	/* _UDP_H */
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index a045812d7c78..9fc83f7cc1a0 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -562,11 +562,6 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
 
 	write_lock_bh(&sk->sk_callback_lock);
 
-	if (inet_csk_has_ulp(sk)) {
-		psock = ERR_PTR(-EINVAL);
-		goto out;
-	}
-
 	if (sk->sk_user_data) {
 		psock = ERR_PTR(-EBUSY);
 		goto out;
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index c2a0411e08a8..2915c7c8778b 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -185,26 +185,10 @@ static void sock_map_unref(struct sock *sk, void *link_raw)
 
 static int sock_map_init_proto(struct sock *sk, struct sk_psock *psock)
 {
-	struct proto *prot;
-
-	switch (sk->sk_type) {
-	case SOCK_STREAM:
-		prot = tcp_bpf_get_proto(sk, psock);
-		break;
-
-	case SOCK_DGRAM:
-		prot = udp_bpf_get_proto(sk, psock);
-		break;
-
-	default:
+	if (!sk->sk_prot->psock_update_sk_prot)
 		return -EINVAL;
-	}
-
-	if (IS_ERR(prot))
-		return PTR_ERR(prot);
-
-	sk_psock_update_proto(sk, psock, prot);
-	return 0;
+	psock->psock_update_sk_prot = sk->sk_prot->psock_update_sk_prot;
+	return sk->sk_prot->psock_update_sk_prot(sk, false);
 }
 
 static struct sk_psock *sock_map_psock_get_checked(struct sock *sk)
@@ -556,7 +540,7 @@ static bool sock_map_redirect_allowed(const struct sock *sk)
 
 static bool sock_map_sk_is_suitable(const struct sock *sk)
 {
-	return sk_is_tcp(sk) || sk_is_udp(sk);
+	return !!sk->sk_prot->psock_update_sk_prot;
 }
 
 static bool sock_map_sk_state_allowed(const struct sock *sk)
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index ae980716d896..ac8cfbaeacd2 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -595,20 +595,38 @@ static int tcp_bpf_assert_proto_ops(struct proto *ops)
 	       ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP;
 }
 
-struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock)
+int tcp_bpf_update_proto(struct sock *sk, bool restore)
 {
+	struct sk_psock *psock = sk_psock(sk);
 	int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
 	int config = psock->progs.msg_parser   ? TCP_BPF_TX   : TCP_BPF_BASE;
 
+	if (restore) {
+		if (inet_csk_has_ulp(sk)) {
+			tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space);
+		} else {
+			sk->sk_write_space = psock->saved_write_space;
+			/* Pairs with lockless read in sk_clone_lock() */
+			WRITE_ONCE(sk->sk_prot, psock->sk_proto);
+		}
+		return 0;
+	}
+
+	if (inet_csk_has_ulp(sk))
+		return -EINVAL;
+
 	if (sk->sk_family == AF_INET6) {
 		if (tcp_bpf_assert_proto_ops(psock->sk_proto))
-			return ERR_PTR(-EINVAL);
+			return -EINVAL;
 
 		tcp_bpf_check_v6_needs_rebuild(psock->sk_proto);
 	}
 
-	return &tcp_bpf_prots[family][config];
+	/* Pairs with lockless read in sk_clone_lock() */
+	WRITE_ONCE(sk->sk_prot, &tcp_bpf_prots[family][config]);
+	return 0;
 }
+EXPORT_SYMBOL_GPL(tcp_bpf_update_proto);
 
 /* If a child got cloned from a listening socket that had tcp_bpf
  * protocol callbacks installed, we need to restore the callbacks to
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index daad4f99db32..dfc6d1c0e710 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2806,6 +2806,9 @@ struct proto tcp_prot = {
 	.hash			= inet_hash,
 	.unhash			= inet_unhash,
 	.get_port		= inet_csk_get_port,
+#ifdef CONFIG_BPF_SYSCALL
+	.psock_update_sk_prot	= tcp_bpf_update_proto,
+#endif
 	.enter_memory_pressure	= tcp_enter_memory_pressure,
 	.leave_memory_pressure	= tcp_leave_memory_pressure,
 	.stream_memory_free	= tcp_stream_memory_free,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 4a0478b17243..38952aaee3a1 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2849,6 +2849,9 @@ struct proto udp_prot = {
 	.unhash			= udp_lib_unhash,
 	.rehash			= udp_v4_rehash,
 	.get_port		= udp_v4_get_port,
+#ifdef CONFIG_BPF_SYSCALL
+	.psock_update_sk_prot	= udp_bpf_update_proto,
+#endif
 	.memory_allocated	= &udp_memory_allocated,
 	.sysctl_mem		= sysctl_udp_mem,
 	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_udp_wmem_min),
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index 7a94791efc1a..6001f93cd3a0 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -41,12 +41,23 @@ static int __init udp_bpf_v4_build_proto(void)
 }
 core_initcall(udp_bpf_v4_build_proto);
 
-struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock)
+int udp_bpf_update_proto(struct sock *sk, bool restore)
 {
 	int family = sk->sk_family == AF_INET ? UDP_BPF_IPV4 : UDP_BPF_IPV6;
+	struct sk_psock *psock = sk_psock(sk);
+
+	if (restore) {
+		sk->sk_write_space = psock->saved_write_space;
+		/* Pairs with lockless read in sk_clone_lock() */
+		WRITE_ONCE(sk->sk_prot, psock->sk_proto);
+		return 0;
+	}
 
 	if (sk->sk_family == AF_INET6)
 		udp_bpf_check_v6_needs_rebuild(psock->sk_proto);
 
-	return &udp_bpf_prots[family];
+	/* Pairs with lockless read in sk_clone_lock() */
+	WRITE_ONCE(sk->sk_prot, &udp_bpf_prots[family]);
+	return 0;
 }
+EXPORT_SYMBOL_GPL(udp_bpf_update_proto);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d0f007741e8e..bff22d6ef516 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2139,6 +2139,9 @@ struct proto tcpv6_prot = {
 	.hash			= inet6_hash,
 	.unhash			= inet_unhash,
 	.get_port		= inet_csk_get_port,
+#ifdef CONFIG_BPF_SYSCALL
+	.psock_update_sk_prot	= tcp_bpf_update_proto,
+#endif
 	.enter_memory_pressure	= tcp_enter_memory_pressure,
 	.leave_memory_pressure	= tcp_leave_memory_pressure,
 	.stream_memory_free	= tcp_stream_memory_free,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index d25e5a9252fd..ef2c75bb4771 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1713,6 +1713,9 @@ struct proto udpv6_prot = {
 	.unhash			= udp_lib_unhash,
 	.rehash			= udp_v6_rehash,
 	.get_port		= udp_v6_get_port,
+#ifdef CONFIG_BPF_SYSCALL
+	.psock_update_sk_prot	= udp_bpf_update_proto,
+#endif
 	.memory_allocated	= &udp_memory_allocated,
 	.sysctl_mem		= sysctl_udp_mem,
 	.sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
-- 
cgit v1.2.3


From d7f571188ecf25c244789b883c878ec7c64b5b08 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 30 Mar 2021 19:32:32 -0700
Subject: udp: Implement ->read_sock() for sockmap

This is similar to tcp_read_sock(), except we do not need
to worry about connections, we just need to retrieve skb
from UDP receive queue.

Note, the return value of ->read_sock() is unused in
sk_psock_verdict_data_ready(), and UDP still does not
support splice() due to lack of ->splice_read(), so users
can not reach udp_read_sock() directly.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210331023237.41094-12-xiyou.wangcong@gmail.com
---
 include/net/udp.h   |  2 ++
 net/ipv4/af_inet.c  |  1 +
 net/ipv4/udp.c      | 29 +++++++++++++++++++++++++++++
 net/ipv6/af_inet6.c |  1 +
 4 files changed, 33 insertions(+)

diff --git a/include/net/udp.h b/include/net/udp.h
index df7cc1edc200..347b62a753c3 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -329,6 +329,8 @@ struct sock *__udp6_lib_lookup(struct net *net,
 			       struct sk_buff *skb);
 struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
 				 __be16 sport, __be16 dport);
+int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
+		  sk_read_actor_t recv_actor);
 
 /* UDP uses skb->dev_scratch to cache as much information as possible and avoid
  * possibly multiple cache miss on dequeue()
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1355e6c0d567..f17870ee558b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1070,6 +1070,7 @@ const struct proto_ops inet_dgram_ops = {
 	.setsockopt	   = sock_common_setsockopt,
 	.getsockopt	   = sock_common_getsockopt,
 	.sendmsg	   = inet_sendmsg,
+	.read_sock	   = udp_read_sock,
 	.recvmsg	   = inet_recvmsg,
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = inet_sendpage,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 38952aaee3a1..4d02f6839e38 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1782,6 +1782,35 @@ busy_check:
 }
 EXPORT_SYMBOL(__skb_recv_udp);
 
+int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
+		  sk_read_actor_t recv_actor)
+{
+	int copied = 0;
+
+	while (1) {
+		struct sk_buff *skb;
+		int err, used;
+
+		skb = skb_recv_udp(sk, 0, 1, &err);
+		if (!skb)
+			return err;
+		used = recv_actor(desc, skb, 0, skb->len);
+		if (used <= 0) {
+			if (!copied)
+				copied = used;
+			break;
+		} else if (used <= skb->len) {
+			copied += used;
+		}
+
+		if (!desc->count)
+			break;
+	}
+
+	return copied;
+}
+EXPORT_SYMBOL(udp_read_sock);
+
 /*
  * 	This should be easy, if there is something there we
  * 	return it, otherwise we block.
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 802f5111805a..71de739b4a9e 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -714,6 +714,7 @@ const struct proto_ops inet6_dgram_ops = {
 	.getsockopt	   = sock_common_getsockopt,	/* ok		*/
 	.sendmsg	   = inet6_sendmsg,		/* retpoline's sake */
 	.recvmsg	   = inet6_recvmsg,		/* retpoline's sake */
+	.read_sock	   = udp_read_sock,
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = sock_no_sendpage,
 	.set_peek_off	   = sk_set_peek_off,
-- 
cgit v1.2.3


From 2bc793e3272a13e337416c057cb81c5396ad91d1 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 30 Mar 2021 19:32:33 -0700
Subject: skmsg: Extract __tcp_bpf_recvmsg() and tcp_bpf_wait_data()

Although these two functions are only used by TCP, they are not
specific to TCP at all, both operate on skmsg and ingress_msg,
so fit in net/core/skmsg.c very well.

And we will need them for non-TCP, so rename and move them to
skmsg.c and export them to modules.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210331023237.41094-13-xiyou.wangcong@gmail.com
---
 include/linux/skmsg.h |   4 ++
 include/net/tcp.h     |   2 -
 net/core/skmsg.c      |  98 +++++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/tcp_bpf.c    | 100 +-------------------------------------------------
 net/tls/tls_sw.c      |   4 +-
 5 files changed, 106 insertions(+), 102 deletions(-)

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 5e800ddc2dc6..f78e90a04a69 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -125,6 +125,10 @@ int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
 			      struct sk_msg *msg, u32 bytes);
 int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
 			     struct sk_msg *msg, u32 bytes);
+int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags,
+		     long timeo, int *err);
+int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
+		   int len, int flags);
 
 static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes)
 {
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 2efa4e5ea23d..31b1696c62ba 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2209,8 +2209,6 @@ void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
 
 int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
 			  int flags);
-int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
-		      struct msghdr *msg, int len, int flags);
 #endif /* CONFIG_NET_SOCK_MSG */
 
 #if !defined(CONFIG_BPF_SYSCALL) || !defined(CONFIG_NET_SOCK_MSG)
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 9fc83f7cc1a0..92a83c02562a 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -399,6 +399,104 @@ out:
 }
 EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
 
+int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags,
+		     long timeo, int *err)
+{
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+	int ret = 0;
+
+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		return 1;
+
+	if (!timeo)
+		return ret;
+
+	add_wait_queue(sk_sleep(sk), &wait);
+	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+	ret = sk_wait_event(sk, &timeo,
+			    !list_empty(&psock->ingress_msg) ||
+			    !skb_queue_empty(&sk->sk_receive_queue), &wait);
+	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+	remove_wait_queue(sk_sleep(sk), &wait);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sk_msg_wait_data);
+
+/* Receive sk_msg from psock->ingress_msg to @msg. */
+int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
+		   int len, int flags)
+{
+	struct iov_iter *iter = &msg->msg_iter;
+	int peek = flags & MSG_PEEK;
+	struct sk_msg *msg_rx;
+	int i, copied = 0;
+
+	msg_rx = sk_psock_peek_msg(psock);
+	while (copied != len) {
+		struct scatterlist *sge;
+
+		if (unlikely(!msg_rx))
+			break;
+
+		i = msg_rx->sg.start;
+		do {
+			struct page *page;
+			int copy;
+
+			sge = sk_msg_elem(msg_rx, i);
+			copy = sge->length;
+			page = sg_page(sge);
+			if (copied + copy > len)
+				copy = len - copied;
+			copy = copy_page_to_iter(page, sge->offset, copy, iter);
+			if (!copy)
+				return copied ? copied : -EFAULT;
+
+			copied += copy;
+			if (likely(!peek)) {
+				sge->offset += copy;
+				sge->length -= copy;
+				if (!msg_rx->skb)
+					sk_mem_uncharge(sk, copy);
+				msg_rx->sg.size -= copy;
+
+				if (!sge->length) {
+					sk_msg_iter_var_next(i);
+					if (!msg_rx->skb)
+						put_page(page);
+				}
+			} else {
+				/* Lets not optimize peek case if copy_page_to_iter
+				 * didn't copy the entire length lets just break.
+				 */
+				if (copy != sge->length)
+					return copied;
+				sk_msg_iter_var_next(i);
+			}
+
+			if (copied == len)
+				break;
+		} while (i != msg_rx->sg.end);
+
+		if (unlikely(peek)) {
+			msg_rx = sk_psock_next_msg(psock, msg_rx);
+			if (!msg_rx)
+				break;
+			continue;
+		}
+
+		msg_rx->sg.start = i;
+		if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) {
+			msg_rx = sk_psock_dequeue_msg(psock);
+			kfree_sk_msg(msg_rx);
+		}
+		msg_rx = sk_psock_peek_msg(psock);
+	}
+
+	return copied;
+}
+EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
+
 static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
 						  struct sk_buff *skb)
 {
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index ac8cfbaeacd2..3d622a0d0753 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -10,80 +10,6 @@
 #include <net/inet_common.h>
 #include <net/tls.h>
 
-int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
-		      struct msghdr *msg, int len, int flags)
-{
-	struct iov_iter *iter = &msg->msg_iter;
-	int peek = flags & MSG_PEEK;
-	struct sk_msg *msg_rx;
-	int i, copied = 0;
-
-	msg_rx = sk_psock_peek_msg(psock);
-	while (copied != len) {
-		struct scatterlist *sge;
-
-		if (unlikely(!msg_rx))
-			break;
-
-		i = msg_rx->sg.start;
-		do {
-			struct page *page;
-			int copy;
-
-			sge = sk_msg_elem(msg_rx, i);
-			copy = sge->length;
-			page = sg_page(sge);
-			if (copied + copy > len)
-				copy = len - copied;
-			copy = copy_page_to_iter(page, sge->offset, copy, iter);
-			if (!copy)
-				return copied ? copied : -EFAULT;
-
-			copied += copy;
-			if (likely(!peek)) {
-				sge->offset += copy;
-				sge->length -= copy;
-				if (!msg_rx->skb)
-					sk_mem_uncharge(sk, copy);
-				msg_rx->sg.size -= copy;
-
-				if (!sge->length) {
-					sk_msg_iter_var_next(i);
-					if (!msg_rx->skb)
-						put_page(page);
-				}
-			} else {
-				/* Lets not optimize peek case if copy_page_to_iter
-				 * didn't copy the entire length lets just break.
-				 */
-				if (copy != sge->length)
-					return copied;
-				sk_msg_iter_var_next(i);
-			}
-
-			if (copied == len)
-				break;
-		} while (i != msg_rx->sg.end);
-
-		if (unlikely(peek)) {
-			msg_rx = sk_psock_next_msg(psock, msg_rx);
-			if (!msg_rx)
-				break;
-			continue;
-		}
-
-		msg_rx->sg.start = i;
-		if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) {
-			msg_rx = sk_psock_dequeue_msg(psock);
-			kfree_sk_msg(msg_rx);
-		}
-		msg_rx = sk_psock_peek_msg(psock);
-	}
-
-	return copied;
-}
-EXPORT_SYMBOL_GPL(__tcp_bpf_recvmsg);
-
 static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
 			   struct sk_msg *msg, u32 apply_bytes, int flags)
 {
@@ -237,28 +163,6 @@ static bool tcp_bpf_stream_read(const struct sock *sk)
 	return !empty;
 }
 
-static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock,
-			     int flags, long timeo, int *err)
-{
-	DEFINE_WAIT_FUNC(wait, woken_wake_function);
-	int ret = 0;
-
-	if (sk->sk_shutdown & RCV_SHUTDOWN)
-		return 1;
-
-	if (!timeo)
-		return ret;
-
-	add_wait_queue(sk_sleep(sk), &wait);
-	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-	ret = sk_wait_event(sk, &timeo,
-			    !list_empty(&psock->ingress_msg) ||
-			    !skb_queue_empty(&sk->sk_receive_queue), &wait);
-	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-	remove_wait_queue(sk_sleep(sk), &wait);
-	return ret;
-}
-
 static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		    int nonblock, int flags, int *addr_len)
 {
@@ -278,13 +182,13 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	}
 	lock_sock(sk);
 msg_bytes_ready:
-	copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags);
+	copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
 	if (!copied) {
 		int data, err = 0;
 		long timeo;
 
 		timeo = sock_rcvtimeo(sk, nonblock);
-		data = tcp_bpf_wait_data(sk, psock, flags, timeo, &err);
+		data = sk_msg_wait_data(sk, psock, flags, timeo, &err);
 		if (data) {
 			if (!sk_psock_queue_empty(psock))
 				goto msg_bytes_ready;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 01d933ae5f16..1dcb34dfd56b 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1789,8 +1789,8 @@ int tls_sw_recvmsg(struct sock *sk,
 		skb = tls_wait_data(sk, psock, flags, timeo, &err);
 		if (!skb) {
 			if (psock) {
-				int ret = __tcp_bpf_recvmsg(sk, psock,
-							    msg, len, flags);
+				int ret = sk_msg_recvmsg(sk, psock, msg, len,
+							 flags);
 
 				if (ret > 0) {
 					decrypted += ret;
-- 
cgit v1.2.3


From 1f5be6b3b063767202f815d5571f7d03729f1282 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 30 Mar 2021 19:32:34 -0700
Subject: udp: Implement udp_bpf_recvmsg() for sockmap

We have to implement udp_bpf_recvmsg() to replace the ->recvmsg()
to retrieve skmsg from ingress_msg.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210331023237.41094-14-xiyou.wangcong@gmail.com
---
 net/ipv4/udp_bpf.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 63 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index 6001f93cd3a0..7d5c4ebf42fe 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -4,6 +4,68 @@
 #include <linux/skmsg.h>
 #include <net/sock.h>
 #include <net/udp.h>
+#include <net/inet_common.h>
+
+#include "udp_impl.h"
+
+static struct proto *udpv6_prot_saved __read_mostly;
+
+static int sk_udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+			  int noblock, int flags, int *addr_len)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	if (sk->sk_family == AF_INET6)
+		return udpv6_prot_saved->recvmsg(sk, msg, len, noblock, flags,
+						 addr_len);
+#endif
+	return udp_prot.recvmsg(sk, msg, len, noblock, flags, addr_len);
+}
+
+static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+			   int nonblock, int flags, int *addr_len)
+{
+	struct sk_psock *psock;
+	int copied, ret;
+
+	if (unlikely(flags & MSG_ERRQUEUE))
+		return inet_recv_error(sk, msg, len, addr_len);
+
+	psock = sk_psock_get(sk);
+	if (unlikely(!psock))
+		return sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+
+	lock_sock(sk);
+	if (sk_psock_queue_empty(psock)) {
+		ret = sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+		goto out;
+	}
+
+msg_bytes_ready:
+	copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
+	if (!copied) {
+		int data, err = 0;
+		long timeo;
+
+		timeo = sock_rcvtimeo(sk, nonblock);
+		data = sk_msg_wait_data(sk, psock, flags, timeo, &err);
+		if (data) {
+			if (!sk_psock_queue_empty(psock))
+				goto msg_bytes_ready;
+			ret = sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+			goto out;
+		}
+		if (err) {
+			ret = err;
+			goto out;
+		}
+		copied = -EAGAIN;
+	}
+	ret = copied;
+out:
+	release_sock(sk);
+	sk_psock_put(sk, psock);
+	return ret;
+}
 
 enum {
 	UDP_BPF_IPV4,
@@ -11,7 +73,6 @@ enum {
 	UDP_BPF_NUM_PROTS,
 };
 
-static struct proto *udpv6_prot_saved __read_mostly;
 static DEFINE_SPINLOCK(udpv6_prot_lock);
 static struct proto udp_bpf_prots[UDP_BPF_NUM_PROTS];
 
@@ -20,6 +81,7 @@ static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
 	*prot        = *base;
 	prot->unhash = sock_map_unhash;
 	prot->close  = sock_map_close;
+	prot->recvmsg = udp_bpf_recvmsg;
 }
 
 static void udp_bpf_check_v6_needs_rebuild(struct proto *ops)
-- 
cgit v1.2.3


From 122e6c79efe1c25816118aca9cfabe54e99c2432 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 30 Mar 2021 19:32:35 -0700
Subject: sock_map: Update sock type checks for UDP

Now UDP supports sockmap and redirection, we can safely update
the sock type checks for it accordingly.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210331023237.41094-15-xiyou.wangcong@gmail.com
---
 net/core/sock_map.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 2915c7c8778b..3d190d22b0d8 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -535,7 +535,10 @@ static bool sk_is_udp(const struct sock *sk)
 
 static bool sock_map_redirect_allowed(const struct sock *sk)
 {
-	return sk_is_tcp(sk) && sk->sk_state != TCP_LISTEN;
+	if (sk_is_tcp(sk))
+		return sk->sk_state != TCP_LISTEN;
+	else
+		return sk->sk_state == TCP_ESTABLISHED;
 }
 
 static bool sock_map_sk_is_suitable(const struct sock *sk)
-- 
cgit v1.2.3


From d6378af615275435ce6e390a538c980ac19b6659 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 30 Mar 2021 19:32:36 -0700
Subject: selftests/bpf: Add a test case for udp sockmap

Add a test case to ensure redirection between two UDP sockets work.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210331023237.41094-16-xiyou.wangcong@gmail.com
---
 .../selftests/bpf/prog_tests/sockmap_listen.c      | 136 +++++++++++++++++++++
 .../selftests/bpf/progs/test_sockmap_listen.c      |  22 ++++
 2 files changed, 158 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index c26e6bf05e49..648d9ae898d2 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -1603,6 +1603,141 @@ static void test_reuseport(struct test_sockmap_listen *skel,
 	}
 }
 
+static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
+				   int verd_mapfd, enum redir_mode mode)
+{
+	const char *log_prefix = redir_mode_str(mode);
+	struct sockaddr_storage addr;
+	int c0, c1, p0, p1;
+	unsigned int pass;
+	socklen_t len;
+	int err, n;
+	u64 value;
+	u32 key;
+	char b;
+
+	zero_verdict_count(verd_mapfd);
+
+	p0 = socket_loopback(family, sotype | SOCK_NONBLOCK);
+	if (p0 < 0)
+		return;
+	len = sizeof(addr);
+	err = xgetsockname(p0, sockaddr(&addr), &len);
+	if (err)
+		goto close_peer0;
+
+	c0 = xsocket(family, sotype | SOCK_NONBLOCK, 0);
+	if (c0 < 0)
+		goto close_peer0;
+	err = xconnect(c0, sockaddr(&addr), len);
+	if (err)
+		goto close_cli0;
+	err = xgetsockname(c0, sockaddr(&addr), &len);
+	if (err)
+		goto close_cli0;
+	err = xconnect(p0, sockaddr(&addr), len);
+	if (err)
+		goto close_cli0;
+
+	p1 = socket_loopback(family, sotype | SOCK_NONBLOCK);
+	if (p1 < 0)
+		goto close_cli0;
+	err = xgetsockname(p1, sockaddr(&addr), &len);
+	if (err)
+		goto close_cli0;
+
+	c1 = xsocket(family, sotype | SOCK_NONBLOCK, 0);
+	if (c1 < 0)
+		goto close_peer1;
+	err = xconnect(c1, sockaddr(&addr), len);
+	if (err)
+		goto close_cli1;
+	err = xgetsockname(c1, sockaddr(&addr), &len);
+	if (err)
+		goto close_cli1;
+	err = xconnect(p1, sockaddr(&addr), len);
+	if (err)
+		goto close_cli1;
+
+	key = 0;
+	value = p0;
+	err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+	if (err)
+		goto close_cli1;
+
+	key = 1;
+	value = p1;
+	err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+	if (err)
+		goto close_cli1;
+
+	n = write(c1, "a", 1);
+	if (n < 0)
+		FAIL_ERRNO("%s: write", log_prefix);
+	if (n == 0)
+		FAIL("%s: incomplete write", log_prefix);
+	if (n < 1)
+		goto close_cli1;
+
+	key = SK_PASS;
+	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
+	if (err)
+		goto close_cli1;
+	if (pass != 1)
+		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
+
+	n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
+	if (n < 0)
+		FAIL_ERRNO("%s: read", log_prefix);
+	if (n == 0)
+		FAIL("%s: incomplete read", log_prefix);
+
+close_cli1:
+	xclose(c1);
+close_peer1:
+	xclose(p1);
+close_cli0:
+	xclose(c0);
+close_peer0:
+	xclose(p0);
+}
+
+static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
+				       struct bpf_map *inner_map, int family)
+{
+	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+	int sock_map = bpf_map__fd(inner_map);
+	int err;
+
+	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
+	if (err)
+		return;
+
+	skel->bss->test_ingress = false;
+	udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+			       REDIR_EGRESS);
+	skel->bss->test_ingress = true;
+	udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+			       REDIR_INGRESS);
+
+	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
+}
+
+static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
+			   int family)
+{
+	const char *family_name, *map_name;
+	char s[MAX_TEST_NAME];
+
+	family_name = family_str(family);
+	map_name = map_type_str(map);
+	snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
+	if (!test__start_subtest(s))
+		return;
+	udp_skb_redir_to_connected(skel, map, family);
+}
+
 static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
 		      int family)
 {
@@ -1611,6 +1746,7 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
 	test_redir(skel, map, family, SOCK_STREAM);
 	test_reuseport(skel, map, family, SOCK_STREAM);
 	test_reuseport(skel, map, family, SOCK_DGRAM);
+	test_udp_redir(skel, map, family);
 }
 
 void test_sockmap_listen(void)
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
index fa221141e9c1..a39eba9f5201 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
@@ -29,6 +29,7 @@ struct {
 } verdict_map SEC(".maps");
 
 static volatile bool test_sockmap; /* toggled by user-space */
+static volatile bool test_ingress; /* toggled by user-space */
 
 SEC("sk_skb/stream_parser")
 int prog_stream_parser(struct __sk_buff *skb)
@@ -55,6 +56,27 @@ int prog_stream_verdict(struct __sk_buff *skb)
 	return verdict;
 }
 
+SEC("sk_skb/skb_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+	unsigned int *count;
+	__u32 zero = 0;
+	int verdict;
+
+	if (test_sockmap)
+		verdict = bpf_sk_redirect_map(skb, &sock_map, zero,
+					      test_ingress ? BPF_F_INGRESS : 0);
+	else
+		verdict = bpf_sk_redirect_hash(skb, &sock_hash, &zero,
+					       test_ingress ? BPF_F_INGRESS : 0);
+
+	count = bpf_map_lookup_elem(&verdict_map, &verdict);
+	if (count)
+		(*count)++;
+
+	return verdict;
+}
+
 SEC("sk_msg")
 int prog_msg_verdict(struct sk_msg_md *msg)
 {
-- 
cgit v1.2.3


From 8d7cb74f2ccb5486ab8c631a8fcdc7621bbbc42c Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 30 Mar 2021 19:32:37 -0700
Subject: selftests/bpf: Add a test case for loading BPF_SK_SKB_VERDICT

This adds a test case to ensure BPF_SK_SKB_VERDICT and
BPF_SK_STREAM_VERDICT will never be attached at the same time.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210331023237.41094-17-xiyou.wangcong@gmail.com
---
 .../selftests/bpf/prog_tests/sockmap_basic.c       | 40 ++++++++++++++++++++++
 .../bpf/progs/test_sockmap_skb_verdict_attach.c    | 18 ++++++++++
 2 files changed, 58 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c

diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index b8b48cac2ac3..ab77596b64e3 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -7,6 +7,7 @@
 #include "test_skmsg_load_helpers.skel.h"
 #include "test_sockmap_update.skel.h"
 #include "test_sockmap_invalid_update.skel.h"
+#include "test_sockmap_skb_verdict_attach.skel.h"
 #include "bpf_iter_sockmap.skel.h"
 
 #define TCP_REPAIR		19	/* TCP sock is under repair right now */
@@ -281,6 +282,39 @@ out:
 	bpf_iter_sockmap__destroy(skel);
 }
 
+static void test_sockmap_skb_verdict_attach(enum bpf_attach_type first,
+					    enum bpf_attach_type second)
+{
+	struct test_sockmap_skb_verdict_attach *skel;
+	int err, map, verdict;
+
+	skel = test_sockmap_skb_verdict_attach__open_and_load();
+	if (CHECK_FAIL(!skel)) {
+		perror("test_sockmap_skb_verdict_attach__open_and_load");
+		return;
+	}
+
+	verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+	map = bpf_map__fd(skel->maps.sock_map);
+
+	err = bpf_prog_attach(verdict, map, first, 0);
+	if (CHECK_FAIL(err)) {
+		perror("bpf_prog_attach");
+		goto out;
+	}
+
+	err = bpf_prog_attach(verdict, map, second, 0);
+	assert(err == -1 && errno == EBUSY);
+
+	err = bpf_prog_detach2(verdict, map, first);
+	if (CHECK_FAIL(err)) {
+		perror("bpf_prog_detach2");
+		goto out;
+	}
+out:
+	test_sockmap_skb_verdict_attach__destroy(skel);
+}
+
 void test_sockmap_basic(void)
 {
 	if (test__start_subtest("sockmap create_update_free"))
@@ -301,4 +335,10 @@ void test_sockmap_basic(void)
 		test_sockmap_copy(BPF_MAP_TYPE_SOCKMAP);
 	if (test__start_subtest("sockhash copy"))
 		test_sockmap_copy(BPF_MAP_TYPE_SOCKHASH);
+	if (test__start_subtest("sockmap skb_verdict attach")) {
+		test_sockmap_skb_verdict_attach(BPF_SK_SKB_VERDICT,
+						BPF_SK_SKB_STREAM_VERDICT);
+		test_sockmap_skb_verdict_attach(BPF_SK_SKB_STREAM_VERDICT,
+						BPF_SK_SKB_VERDICT);
+	}
 }
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c b/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c
new file mode 100644
index 000000000000..2d31f66e4f23
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 2);
+	__type(key, __u32);
+	__type(value, __u64);
+} sock_map SEC(".maps");
+
+SEC("sk_skb/skb_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+	return SK_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From f20a46c3044c3f75232b3d0e2d09af9b25efaf45 Mon Sep 17 00:00:00 2001
From: Hoang Le <hoang.h.le@dektech.com.au>
Date: Thu, 1 Apr 2021 09:30:48 +0700
Subject: tipc: fix unique bearer names sanity check

When enabling a bearer by name, we don't sanity check its name with
higher slot in bearer list. This may have the effect that the name
of an already enabled bearer bypasses the check.

To fix the above issue, we just perform an extra checking with all
existing bearers.

Fixes: cb30a63384bc9 ("tipc: refactor function tipc_enable_bearer()")
Cc: stable@vger.kernel.org
Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: Hoang Le <hoang.h.le@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bearer.c | 46 +++++++++++++++++++++++++++-------------------
 1 file changed, 27 insertions(+), 19 deletions(-)

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index d47e0b940ac9..443f8e5b9477 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -256,6 +256,7 @@ static int tipc_enable_bearer(struct net *net, const char *name,
 	int bearer_id = 0;
 	int res = -EINVAL;
 	char *errstr = "";
+	u32 i;
 
 	if (!bearer_name_validate(name, &b_names)) {
 		errstr = "illegal name";
@@ -280,31 +281,38 @@ static int tipc_enable_bearer(struct net *net, const char *name,
 		prio = m->priority;
 
 	/* Check new bearer vs existing ones and find free bearer id if any */
-	while (bearer_id < MAX_BEARERS) {
-		b = rtnl_dereference(tn->bearer_list[bearer_id]);
-		if (!b)
-			break;
+	bearer_id = MAX_BEARERS;
+	i = MAX_BEARERS;
+	while (i-- != 0) {
+		b = rtnl_dereference(tn->bearer_list[i]);
+		if (!b) {
+			bearer_id = i;
+			continue;
+		}
 		if (!strcmp(name, b->name)) {
 			errstr = "already enabled";
 			NL_SET_ERR_MSG(extack, "Already enabled");
 			goto rejected;
 		}
-		bearer_id++;
-		if (b->priority != prio)
-			continue;
-		if (++with_this_prio <= 2)
-			continue;
-		pr_warn("Bearer <%s>: already 2 bearers with priority %u\n",
-			name, prio);
-		if (prio == TIPC_MIN_LINK_PRI) {
-			errstr = "cannot adjust to lower";
-			NL_SET_ERR_MSG(extack, "Cannot adjust to lower");
-			goto rejected;
+
+		if (b->priority == prio &&
+		    (++with_this_prio > 2)) {
+			pr_warn("Bearer <%s>: already 2 bearers with priority %u\n",
+				name, prio);
+
+			if (prio == TIPC_MIN_LINK_PRI) {
+				errstr = "cannot adjust to lower";
+				NL_SET_ERR_MSG(extack, "Cannot adjust to lower");
+				goto rejected;
+			}
+
+			pr_warn("Bearer <%s>: trying with adjusted priority\n",
+				name);
+			prio--;
+			bearer_id = MAX_BEARERS;
+			i = MAX_BEARERS;
+			with_this_prio = 1;
 		}
-		pr_warn("Bearer <%s>: trying with adjusted priority\n", name);
-		prio--;
-		bearer_id = 0;
-		with_this_prio = 1;
 	}
 
 	if (bearer_id >= MAX_BEARERS) {
-- 
cgit v1.2.3


From b7a320c3a1ecb20d24173b668288441dbb7fe7dd Mon Sep 17 00:00:00 2001
From: Xu Jia <xujia39@huawei.com>
Date: Thu, 1 Apr 2021 11:22:23 +0800
Subject: net: ipv6: Refactor in rt6_age_examine_exception

The logic in rt6_age_examine_exception is confusing. The commit is
to refactor the code.

Signed-off-by: Xu Jia <xujia39@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d99dd4d36252..28801ae80548 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2085,13 +2085,10 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
 
 	if (rt->rt6i_flags & RTF_GATEWAY) {
 		struct neighbour *neigh;
-		__u8 neigh_flags = 0;
 
 		neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
-		if (neigh)
-			neigh_flags = neigh->flags;
 
-		if (!(neigh_flags & NTF_ROUTER)) {
+		if (!(neigh && (neigh->flags & NTF_ROUTER))) {
 			RT6_TRACE("purging route %p via non-router but gateway\n",
 				  rt);
 			rt6_remove_exception(bucket, rt6_ex);
-- 
cgit v1.2.3


From 8accc467758efd3a03368f9fec633b04dde80895 Mon Sep 17 00:00:00 2001
From: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Date: Thu, 1 Apr 2021 14:02:50 +0800
Subject: stmmac: intel: use managed PCI function on probe and resume

Update dwmac-intel to use managed function, i.e. pcim_enable_device().

This will allow devres framework to call resource free function for us.

Signed-off-by: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 3d9a57043af2..add95e20548d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -924,7 +924,7 @@ static int intel_eth_pci_probe(struct pci_dev *pdev,
 		return -ENOMEM;
 
 	/* Enable pci device */
-	ret = pci_enable_device(pdev);
+	ret = pcim_enable_device(pdev);
 	if (ret) {
 		dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n",
 			__func__);
@@ -1006,13 +1006,9 @@ static void intel_eth_pci_remove(struct pci_dev *pdev)
 
 	stmmac_dvr_remove(&pdev->dev);
 
-	pci_free_irq_vectors(pdev);
-
 	clk_unregister_fixed_rate(priv->plat->stmmac_clk);
 
 	pcim_iounmap_regions(pdev, BIT(0));
-
-	pci_disable_device(pdev);
 }
 
 static int __maybe_unused intel_eth_pci_suspend(struct device *dev)
@@ -1028,7 +1024,6 @@ static int __maybe_unused intel_eth_pci_suspend(struct device *dev)
 	if (ret)
 		return ret;
 
-	pci_disable_device(pdev);
 	pci_wake_from_d3(pdev, true);
 	return 0;
 }
@@ -1041,7 +1036,7 @@ static int __maybe_unused intel_eth_pci_resume(struct device *dev)
 	pci_restore_state(pdev);
 	pci_set_power_state(pdev, PCI_D0);
 
-	ret = pci_enable_device(pdev);
+	ret = pcim_enable_device(pdev);
 	if (ret)
 		return ret;
 
-- 
cgit v1.2.3


From 2237778d8c215fe7921da02d25fd32e964aa8ee5 Mon Sep 17 00:00:00 2001
From: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Date: Thu, 1 Apr 2021 14:06:28 +0800
Subject: net: stmmac: remove unnecessary pci_enable_msi() call

The commit d2a029bde37b ("stmmac: pci: add MSI support for Intel Quark
X1000") introduced a pci_enable_msi() call in stmmac_pci.c.

With the commit 58da0cfa6cf1 ("net: stmmac: create dwmac-intel.c to
contain all Intel platform"), Intel Quark platform related codes
have been moved to the newly created driver.

Removing this unnecessary pci_enable_msi() call as there are no other
devices that uses stmmac-pci and need MSI to be enabled.

Signed-off-by: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index 272cb47af9f2..95e0e4d6f74d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -198,8 +198,6 @@ static int stmmac_pci_probe(struct pci_dev *pdev,
 	if (ret)
 		return ret;
 
-	pci_enable_msi(pdev);
-
 	memset(&res, 0, sizeof(res));
 	res.addr = pcim_iomap_table(pdev)[i];
 	res.wol_irq = pdev->irq;
-- 
cgit v1.2.3


From 9fadafa46f4813b9e3d8f76d3525c83499a26d36 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Thu, 1 Apr 2021 15:08:22 +0800
Subject: include: net: Remove repeated struct declaration

struct ctl_table_header is declared twice. One is declared
at 46th line. The blew one is not needed. Remove the duplicate.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index dcaee24a4d87..47457048ab86 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -407,7 +407,6 @@ int register_pernet_device(struct pernet_operations *);
 void unregister_pernet_device(struct pernet_operations *);
 
 struct ctl_table;
-struct ctl_table_header;
 
 #ifdef CONFIG_SYSCTL
 int net_sysctl_init(void);
-- 
cgit v1.2.3


From ec7e48ca4bc765e5460ec118ff84de6260dac855 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Thu, 1 Apr 2021 16:40:29 +0800
Subject: net: smc: Remove repeated struct declaration

struct smc_clc_msg_local is declared twice. One is declared at
301st line. The blew one is not needed. Remove the duplicate.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Acked-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index e8e448771f85..6d6fd1397c87 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -410,7 +410,6 @@ static inline void smc_set_pci_values(struct pci_dev *pci_dev,
 
 struct smc_sock;
 struct smc_clc_msg_accept_confirm;
-struct smc_clc_msg_local;
 
 void smc_lgr_cleanup_early(struct smc_connection *conn);
 void smc_lgr_terminate_sched(struct smc_link_group *lgr);
-- 
cgit v1.2.3


From c8ad0cf37c0061043e52a992f6d5f4dc54dea8f1 Mon Sep 17 00:00:00 2001
From: Lu Wei <luwei32@huawei.com>
Date: Thu, 1 Apr 2021 17:27:01 +0800
Subject: net: hns: Fix some typos

Fix some typos.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Lu Wei <luwei32@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_enet.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 7a1a0a90a1a3..5e349c0bdecc 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -1235,7 +1235,7 @@ static int hns_nic_init_affinity_mask(int q_num, int ring_idx,
 {
 	int cpu;
 
-	/* Diffrent irq banlance between 16core and 32core.
+	/* Different irq balance between 16core and 32core.
 	 * The cpu mask set by ring index according to the ring flag
 	 * which indicate the ring is tx or rx.
 	 */
@@ -1592,7 +1592,7 @@ static void hns_disable_serdes_lb(struct net_device *ndev)
  *       which buffer size is 4096.
  *    2. we set the chip serdes loopback and set rss indirection to the ring.
  *    3. construct 64-bytes ip broadcast packages, wait the associated rx ring
- *       recieving all packages and it will fetch new descriptions.
+ *       receiving all packages and it will fetch new descriptions.
  *    4. recover to the original state.
  *
  *@ndev: net device
@@ -1621,7 +1621,7 @@ static int hns_nic_clear_all_rx_fetch(struct net_device *ndev)
 	if (!org_indir)
 		return -ENOMEM;
 
-	/* store the orginal indirection */
+	/* store the original indirection */
 	ops->get_rss(h, org_indir, NULL, NULL);
 
 	cur_indir = kzalloc(indir_size, GFP_KERNEL);
-- 
cgit v1.2.3


From e228c0de904c8c5eb732dafb49a8446c1794dc72 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Thu, 1 Apr 2021 22:20:15 +0800
Subject: lan743x: remove redundant semi-colon

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/lan743x_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index 1c3e204d727c..e7ab5f3f73fd 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -3004,7 +3004,7 @@ static int lan743x_pm_suspend(struct device *dev)
 		lan743x_pm_set_wol(adapter);
 
 	/* Host sets PME_En, put D3hot */
-	return pci_prepare_to_sleep(pdev);;
+	return pci_prepare_to_sleep(pdev);
 }
 
 static int lan743x_pm_resume(struct device *dev)
-- 
cgit v1.2.3


From a7a80b17c750c0f4b2c15cc61be5ff7c1d29f8d6 Mon Sep 17 00:00:00 2001
From: Otto Hollmann <otto.hollmann@suse.com>
Date: Thu, 1 Apr 2021 17:57:05 +0200
Subject: net: document a side effect of ip_local_reserved_ports

If there is overlapp between ip_local_port_range and ip_local_reserved_ports with a huge reserved block, it will affect probability of selecting ephemeral ports, see file net/ipv4/inet_hashtables.c:723

    int __inet_hash_connect(
    ...
            for (i = 0; i < remaining; i += 2, port += 2) {
                    if (unlikely(port >= high))
                            port -= remaining;
                    if (inet_is_local_reserved_port(net, port))
                            continue;

    E.g. if there is reserved block of 10000 ports, two ports right after this block will be 5000 more likely selected than others.
    If this was intended, we can/should add note into documentation as proposed in this commit, otherwise we should think about different solution. One option could be mapping table of continuous port ranges. Second option could be letting user to modify step (port+=2) in above loop, e.g. using new sysctl parameter.

Signed-off-by: Otto Hollmann <otto.hollmann@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 4130bce40765..9701906f63f9 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -1073,7 +1073,9 @@ ip_local_reserved_ports - list of comma separated ranges
 
 	although this is redundant. However such a setting is useful
 	if later the port range is changed to a value that will
-	include the reserved ports.
+	include the reserved ports. Also keep in mind, that overlapping
+	of these ranges may affect probability of selecting ephemeral
+	ports which are right after block of reserved ports.
 
 	Default: Empty
 
-- 
cgit v1.2.3


From 5a32fcdb1e686e90a2c5f2d095fb79aa2fb3f176 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 1 Apr 2021 09:42:33 -0700
Subject: net: phy: broadcom: Add statistics for all Gigabit PHYs

All Gigabit PHYs use the same register layout as far as fetching
statistics goes. Fast Ethernet PHYs do not all support statistics, and
the BCM54616S would require some switching between the coper and fiber
modes to fetch the appropriate statistics which is not supported yet.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/broadcom.c | 76 ++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 66 insertions(+), 10 deletions(-)

diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 82fe5f43f0e9..7bf3011b8e77 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -671,13 +671,13 @@ static irqreturn_t brcm_fet_handle_interrupt(struct phy_device *phydev)
 	return IRQ_HANDLED;
 }
 
-struct bcm53xx_phy_priv {
+struct bcm54xx_phy_priv {
 	u64	*stats;
 };
 
-static int bcm53xx_phy_probe(struct phy_device *phydev)
+static int bcm54xx_phy_probe(struct phy_device *phydev)
 {
-	struct bcm53xx_phy_priv *priv;
+	struct bcm54xx_phy_priv *priv;
 
 	priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
@@ -694,10 +694,10 @@ static int bcm53xx_phy_probe(struct phy_device *phydev)
 	return 0;
 }
 
-static void bcm53xx_phy_get_stats(struct phy_device *phydev,
-				  struct ethtool_stats *stats, u64 *data)
+static void bcm54xx_get_stats(struct phy_device *phydev,
+			      struct ethtool_stats *stats, u64 *data)
 {
-	struct bcm53xx_phy_priv *priv = phydev->priv;
+	struct bcm54xx_phy_priv *priv = phydev->priv;
 
 	bcm_phy_get_stats(phydev, priv->stats, stats, data);
 }
@@ -708,6 +708,10 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM5411",
 	/* PHY_GBIT_FEATURES */
+	.get_sset_count	= bcm_phy_get_sset_count,
+	.get_strings	= bcm_phy_get_strings,
+	.get_stats	= bcm54xx_get_stats,
+	.probe		= bcm54xx_phy_probe,
 	.config_init	= bcm54xx_config_init,
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
@@ -716,6 +720,10 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM5421",
 	/* PHY_GBIT_FEATURES */
+	.get_sset_count	= bcm_phy_get_sset_count,
+	.get_strings	= bcm_phy_get_strings,
+	.get_stats	= bcm54xx_get_stats,
+	.probe		= bcm54xx_phy_probe,
 	.config_init	= bcm54xx_config_init,
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
@@ -724,6 +732,10 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM54210E",
 	/* PHY_GBIT_FEATURES */
+	.get_sset_count	= bcm_phy_get_sset_count,
+	.get_strings	= bcm_phy_get_strings,
+	.get_stats	= bcm54xx_get_stats,
+	.probe		= bcm54xx_phy_probe,
 	.config_init	= bcm54xx_config_init,
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
@@ -732,6 +744,10 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM5461",
 	/* PHY_GBIT_FEATURES */
+	.get_sset_count	= bcm_phy_get_sset_count,
+	.get_strings	= bcm_phy_get_strings,
+	.get_stats	= bcm54xx_get_stats,
+	.probe		= bcm54xx_phy_probe,
 	.config_init	= bcm54xx_config_init,
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
@@ -740,6 +756,10 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM54612E",
 	/* PHY_GBIT_FEATURES */
+	.get_sset_count	= bcm_phy_get_sset_count,
+	.get_strings	= bcm_phy_get_strings,
+	.get_stats	= bcm54xx_get_stats,
+	.probe		= bcm54xx_phy_probe,
 	.config_init	= bcm54xx_config_init,
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
@@ -759,6 +779,10 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM5464",
 	/* PHY_GBIT_FEATURES */
+	.get_sset_count	= bcm_phy_get_sset_count,
+	.get_strings	= bcm_phy_get_strings,
+	.get_stats	= bcm54xx_get_stats,
+	.probe		= bcm54xx_phy_probe,
 	.config_init	= bcm54xx_config_init,
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
@@ -769,6 +793,10 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM5481",
 	/* PHY_GBIT_FEATURES */
+	.get_sset_count	= bcm_phy_get_sset_count,
+	.get_strings	= bcm_phy_get_strings,
+	.get_stats	= bcm54xx_get_stats,
+	.probe		= bcm54xx_phy_probe,
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= bcm5481_config_aneg,
 	.config_intr	= bcm_phy_config_intr,
@@ -778,6 +806,10 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask    = 0xfffffff0,
 	.name           = "Broadcom BCM54810",
 	/* PHY_GBIT_FEATURES */
+	.get_sset_count	= bcm_phy_get_sset_count,
+	.get_strings	= bcm_phy_get_strings,
+	.get_stats	= bcm54xx_get_stats,
+	.probe		= bcm54xx_phy_probe,
 	.config_init    = bcm54xx_config_init,
 	.config_aneg    = bcm5481_config_aneg,
 	.config_intr    = bcm_phy_config_intr,
@@ -789,6 +821,10 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask    = 0xfffffff0,
 	.name           = "Broadcom BCM54811",
 	/* PHY_GBIT_FEATURES */
+	.get_sset_count	= bcm_phy_get_sset_count,
+	.get_strings	= bcm_phy_get_strings,
+	.get_stats	= bcm54xx_get_stats,
+	.probe		= bcm54xx_phy_probe,
 	.config_init    = bcm54811_config_init,
 	.config_aneg    = bcm5481_config_aneg,
 	.config_intr    = bcm_phy_config_intr,
@@ -800,6 +836,10 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM5482",
 	/* PHY_GBIT_FEATURES */
+	.get_sset_count	= bcm_phy_get_sset_count,
+	.get_strings	= bcm_phy_get_strings,
+	.get_stats	= bcm54xx_get_stats,
+	.probe		= bcm54xx_phy_probe,
 	.config_init	= bcm54xx_config_init,
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
@@ -808,6 +848,10 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM50610",
 	/* PHY_GBIT_FEATURES */
+	.get_sset_count	= bcm_phy_get_sset_count,
+	.get_strings	= bcm_phy_get_strings,
+	.get_stats	= bcm54xx_get_stats,
+	.probe		= bcm54xx_phy_probe,
 	.config_init	= bcm54xx_config_init,
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
@@ -816,6 +860,10 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM50610M",
 	/* PHY_GBIT_FEATURES */
+	.get_sset_count	= bcm_phy_get_sset_count,
+	.get_strings	= bcm_phy_get_strings,
+	.get_stats	= bcm54xx_get_stats,
+	.probe		= bcm54xx_phy_probe,
 	.config_init	= bcm54xx_config_init,
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
@@ -824,6 +872,10 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM57780",
 	/* PHY_GBIT_FEATURES */
+	.get_sset_count	= bcm_phy_get_sset_count,
+	.get_strings	= bcm_phy_get_strings,
+	.get_stats	= bcm54xx_get_stats,
+	.probe		= bcm54xx_phy_probe,
 	.config_init	= bcm54xx_config_init,
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
@@ -851,8 +903,8 @@ static struct phy_driver broadcom_drivers[] = {
 	/* PHY_GBIT_FEATURES */
 	.get_sset_count	= bcm_phy_get_sset_count,
 	.get_strings	= bcm_phy_get_strings,
-	.get_stats	= bcm53xx_phy_get_stats,
-	.probe		= bcm53xx_phy_probe,
+	.get_stats	= bcm54xx_get_stats,
+	.probe		= bcm54xx_phy_probe,
 }, {
 	.phy_id		= PHY_ID_BCM53125,
 	.phy_id_mask	= 0xfffffff0,
@@ -861,8 +913,8 @@ static struct phy_driver broadcom_drivers[] = {
 	/* PHY_GBIT_FEATURES */
 	.get_sset_count	= bcm_phy_get_sset_count,
 	.get_strings	= bcm_phy_get_strings,
-	.get_stats	= bcm53xx_phy_get_stats,
-	.probe		= bcm53xx_phy_probe,
+	.get_stats	= bcm54xx_get_stats,
+	.probe		= bcm54xx_phy_probe,
 	.config_init	= bcm54xx_config_init,
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
@@ -871,6 +923,10 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask    = 0xfffffff0,
 	.name           = "Broadcom BCM89610",
 	/* PHY_GBIT_FEATURES */
+	.get_sset_count	= bcm_phy_get_sset_count,
+	.get_strings	= bcm_phy_get_strings,
+	.get_stats	= bcm54xx_get_stats,
+	.probe		= bcm54xx_phy_probe,
 	.config_init    = bcm54xx_config_init,
 	.config_intr    = bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
-- 
cgit v1.2.3


From bd78980be1a68d14524c51c4b4170782fada622b Mon Sep 17 00:00:00 2001
From: Phillip Potter <phil@philpotter.co.uk>
Date: Thu, 1 Apr 2021 23:36:07 +0100
Subject: net: usb: ax88179_178a: initialize local variables before use

Use memset to initialize local array in drivers/net/usb/ax88179_178a.c, and
also set a local u16 and u32 variable to 0. Fixes a KMSAN found uninit-value bug
reported by syzbot at:
https://syzkaller.appspot.com/bug?id=00371c73c72f72487c1d0bfe0cc9d00de339d5aa

Reported-by: syzbot+4993e4a0e237f1b53747@syzkaller.appspotmail.com
Signed-off-by: Phillip Potter <phil@philpotter.co.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/ax88179_178a.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index d650b39b6e5d..c1316718304d 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -296,12 +296,12 @@ static int ax88179_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
 	int ret;
 
 	if (2 == size) {
-		u16 buf;
+		u16 buf = 0;
 		ret = __ax88179_read_cmd(dev, cmd, value, index, size, &buf, 0);
 		le16_to_cpus(&buf);
 		*((u16 *)data) = buf;
 	} else if (4 == size) {
-		u32 buf;
+		u32 buf = 0;
 		ret = __ax88179_read_cmd(dev, cmd, value, index, size, &buf, 0);
 		le32_to_cpus(&buf);
 		*((u32 *)data) = buf;
@@ -1296,6 +1296,8 @@ static void ax88179_get_mac_addr(struct usbnet *dev)
 {
 	u8 mac[ETH_ALEN];
 
+	memset(mac, 0, sizeof(mac));
+
 	/* Maybe the boot loader passed the MAC address via device tree */
 	if (!eth_platform_get_mac_address(&dev->udev->dev, mac)) {
 		netif_dbg(dev, ifup, dev->net,
-- 
cgit v1.2.3


From 82a1242619d0db39ca710538fa2a53623a1022c8 Mon Sep 17 00:00:00 2001
From: Meng Yu <yumeng18@huawei.com>
Date: Thu, 1 Apr 2021 14:50:38 +0800
Subject: Bluetooth: Remove 'return' in void function

void function return statements are not generally useful.

Signed-off-by: Meng Yu <yumeng18@huawei.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/6lowpan.c    | 2 --
 net/bluetooth/l2cap_core.c | 2 --
 2 files changed, 4 deletions(-)

diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index cff4944d5b66..19f7e42db1b0 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -841,8 +841,6 @@ static void chan_close_cb(struct l2cap_chan *chan)
 	} else {
 		spin_unlock(&devices_lock);
 	}
-
-	return;
 }
 
 static void chan_state_change_cb(struct l2cap_chan *chan, int state, int err)
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 7641fdfb2628..a3b1d40b8750 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -676,8 +676,6 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err)
 		skb_queue_purge(&chan->tx_q);
 		break;
 	}
-
-	return;
 }
 EXPORT_SYMBOL_GPL(l2cap_chan_del);
 
-- 
cgit v1.2.3


From 149b3f13b4b11175c81105f32b048260e63fdc34 Mon Sep 17 00:00:00 2001
From: Meng Yu <yumeng18@huawei.com>
Date: Thu, 1 Apr 2021 14:50:39 +0800
Subject: Bluetooth: Coding style fix

1. Add space when needed;
2. Block comments style fix;
3. Move open brace '{' following function definitions to the next line;
4. Remove unnecessary braces '{}' for single statement blocks.

Signed-off-by: Meng Yu <yumeng18@huawei.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/6lowpan.c     | 3 +--
 net/bluetooth/hci_debugfs.c | 8 ++++----
 net/bluetooth/hci_event.c   | 2 +-
 net/bluetooth/hci_request.c | 6 ++----
 net/bluetooth/l2cap_core.c  | 5 +++--
 5 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 19f7e42db1b0..97617d02c8f9 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -205,8 +205,7 @@ static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_btle_dev *dev,
 		}
 	}
 
-	/* use the neighbour cache for matching addresses assigned by SLAAC
-	*/
+	/* use the neighbour cache for matching addresses assigned by SLAAC */
 	neigh = __ipv6_neigh_lookup(dev->netdev, nexthop);
 	if (neigh) {
 		list_for_each_entry_rcu(peer, &dev->peers, list) {
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 1a0ab58bfad0..47f4f21fbc1a 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -274,7 +274,7 @@ static ssize_t use_debug_keys_read(struct file *file, char __user *user_buf,
 	struct hci_dev *hdev = file->private_data;
 	char buf[3];
 
-	buf[0] = hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS) ? 'Y': 'N';
+	buf[0] = hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS) ? 'Y' : 'N';
 	buf[1] = '\n';
 	buf[2] = '\0';
 	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
@@ -292,7 +292,7 @@ static ssize_t sc_only_mode_read(struct file *file, char __user *user_buf,
 	struct hci_dev *hdev = file->private_data;
 	char buf[3];
 
-	buf[0] = hci_dev_test_flag(hdev, HCI_SC_ONLY) ? 'Y': 'N';
+	buf[0] = hci_dev_test_flag(hdev, HCI_SC_ONLY) ? 'Y' : 'N';
 	buf[1] = '\n';
 	buf[2] = '\0';
 	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
@@ -428,7 +428,7 @@ static ssize_t ssp_debug_mode_read(struct file *file, char __user *user_buf,
 	struct hci_dev *hdev = file->private_data;
 	char buf[3];
 
-	buf[0] = hdev->ssp_debug_mode ? 'Y': 'N';
+	buf[0] = hdev->ssp_debug_mode ? 'Y' : 'N';
 	buf[1] = '\n';
 	buf[2] = '\0';
 	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
@@ -742,7 +742,7 @@ static ssize_t force_static_address_read(struct file *file,
 	struct hci_dev *hdev = file->private_data;
 	char buf[3];
 
-	buf[0] = hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) ? 'Y': 'N';
+	buf[0] = hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) ? 'Y' : 'N';
 	buf[1] = '\n';
 	buf[2] = '\0';
 	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 341c8ce93648..593eafa282e3 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -5891,7 +5891,7 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev,
 			params->conn_latency = latency;
 			params->supervision_timeout = timeout;
 			store_hint = 0x01;
-		} else{
+		} else {
 			store_hint = 0x00;
 		}
 
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 8ace5d34b01e..6cd9d41eda44 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -1647,9 +1647,8 @@ static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr)
 {
 	u8 scan_rsp_len = 0;
 
-	if (hdev->appearance) {
+	if (hdev->appearance)
 		scan_rsp_len = append_appearance(hdev, ptr, scan_rsp_len);
-	}
 
 	return append_local_name(hdev, ptr, scan_rsp_len);
 }
@@ -1667,9 +1666,8 @@ static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance,
 
 	instance_flags = adv_instance->flags;
 
-	if ((instance_flags & MGMT_ADV_FLAG_APPEARANCE) && hdev->appearance) {
+	if ((instance_flags & MGMT_ADV_FLAG_APPEARANCE) && hdev->appearance)
 		scan_rsp_len = append_appearance(hdev, ptr, scan_rsp_len);
-	}
 
 	memcpy(&ptr[scan_rsp_len], adv_instance->scan_rsp_data,
 	       adv_instance->scan_rsp_len);
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index a3b1d40b8750..140669456b6f 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -652,7 +652,7 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err)
 	if (test_bit(CONF_NOT_COMPLETE, &chan->conf_state))
 		return;
 
-	switch(chan->mode) {
+	switch (chan->mode) {
 	case L2CAP_MODE_BASIC:
 		break;
 
@@ -7770,7 +7770,8 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon)
 	return conn;
 }
 
-static bool is_valid_psm(u16 psm, u8 dst_type) {
+static bool is_valid_psm(u16 psm, u8 dst_type)
+{
 	if (!psm)
 		return false;
 
-- 
cgit v1.2.3


From 06752d1678b49ac471f11950ebd6d29541828bba Mon Sep 17 00:00:00 2001
From: Archie Pusaka <apusaka@chromium.org>
Date: Thu, 1 Apr 2021 11:11:33 +0800
Subject: Bluetooth: Check inquiry status before sending one

There is a possibility where HCI_INQUIRY flag is set but we still
send HCI_OP_INQUIRY anyway.

Such a case can be reproduced by connecting to an LE device while
active scanning. When the device is discovered, we initiate a
connection, stop LE Scan, and send Discovery MGMT with status
disabled, but we don't cancel the inquiry.

Signed-off-by: Archie Pusaka <apusaka@chromium.org>
Reviewed-by: Sonny Sasaka <sonnysasaka@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_request.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 6cd9d41eda44..5b4eb87b19f0 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -2950,6 +2950,9 @@ static int bredr_inquiry(struct hci_request *req, unsigned long opt)
 	const u8 liac[3] = { 0x00, 0x8b, 0x9e };
 	struct hci_cp_inquiry cp;
 
+	if (test_bit(HCI_INQUIRY, &req->hdev->flags))
+		return 0;
+
 	bt_dev_dbg(req->hdev, "");
 
 	hci_dev_lock(req->hdev);
-- 
cgit v1.2.3


From 0ae8ef674eb391d5a832967eb03cfe06904b31d0 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Mon, 29 Mar 2021 10:27:04 -0700
Subject: Bluetooth: SMP: Fix variable dereferenced before check 'conn'

This fixes kbuild findings:

smatch warnings:
net/bluetooth/smp.c:1633 smp_user_confirm_reply() warn: variable
dereferenced before check 'conn' (see line 1631)

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/smp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 2def90668173..5c17acfb1645 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -1628,11 +1628,11 @@ int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey)
 	u32 value;
 	int err;
 
-	bt_dev_dbg(conn->hcon->hdev, "");
-
 	if (!conn)
 		return -ENOTCONN;
 
+	bt_dev_dbg(conn->hcon->hdev, "");
+
 	chan = conn->smp;
 	if (!chan)
 		return -ENOTCONN;
-- 
cgit v1.2.3


From 27e554a4fcd84e499bf0a82122b8c4c3f1de38b6 Mon Sep 17 00:00:00 2001
From: "mark-yw.chen" <mark-yw.chen@mediatek.com>
Date: Mon, 29 Mar 2021 01:18:33 +0800
Subject: Bluetooth: btusb: Enable quirk boolean flag for Mediatek Chip.

Adding support LE scatternet and WBS for Mediatek Chip

Signed-off-by: mark-yw.chen <mark-yw.chen@mediatek.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btusb.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index eb250f533265..192cb8c191bc 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -399,7 +399,9 @@ static const struct usb_device_id blacklist_table[] = {
 
 	/* MediaTek Bluetooth devices */
 	{ USB_VENDOR_AND_INTERFACE_INFO(0x0e8d, 0xe0, 0x01, 0x01),
-	  .driver_info = BTUSB_MEDIATEK },
+	  .driver_info = BTUSB_MEDIATEK |
+			 BTUSB_WIDEBAND_SPEECH |
+			 BTUSB_VALID_LE_STATES },
 
 	/* Additional MediaTek MT7615E Bluetooth devices */
 	{ USB_DEVICE(0x13d3, 0x3560), .driver_info = BTUSB_MEDIATEK},
-- 
cgit v1.2.3


From 57a3a98d7c0ab17a264e30fdc166a0646f900b99 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Thu, 1 Apr 2021 10:55:59 -0700
Subject: ionic: add new queue features to interface

Add queue feature extensions to prepare for features that
can be queue specific, in addition to the general queue
features already defined.  While we're here, change the
existing feature ids from #defines to enum.

Signed-off-by: Allen Hubbe <allenbh@pensando.io>
Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_dev.h |  1 +
 drivers/net/ethernet/pensando/ionic/ionic_if.h  | 27 +++++++++++++++++++------
 drivers/net/ethernet/pensando/ionic/ionic_lif.c |  3 +++
 3 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index 0c0533737b2b..68e5e7a97801 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -222,6 +222,7 @@ struct ionic_queue {
 	u64 stop;
 	u64 wake;
 	u64 drop;
+	u64 features;
 	struct ionic_dev *idev;
 	unsigned int type;
 	unsigned int hw_index;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h
index 88210142395d..23043ce0a5d8 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_if.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h
@@ -345,6 +345,23 @@ enum ionic_logical_qtype {
 	IONIC_QTYPE_MAX     = 16,
 };
 
+/**
+ * enum ionic_q_feature - Common Features for most queue types
+ *
+ * Common features use bits 0-15. Per-queue-type features use higher bits.
+ *
+ * @IONIC_QIDENT_F_CQ:      Queue has completion ring
+ * @IONIC_QIDENT_F_SG:      Queue has scatter/gather ring
+ * @IONIC_QIDENT_F_EQ:      Queue can use event queue
+ * @IONIC_QIDENT_F_CMB:     Queue is in cmb bar
+ */
+enum ionic_q_feature {
+	IONIC_QIDENT_F_CQ		= BIT_ULL(0),
+	IONIC_QIDENT_F_SG		= BIT_ULL(1),
+	IONIC_QIDENT_F_EQ		= BIT_ULL(2),
+	IONIC_QIDENT_F_CMB		= BIT_ULL(3),
+};
+
 /**
  * struct ionic_lif_logical_qtype - Descriptor of logical to HW queue type
  * @qtype:          Hardware Queue Type
@@ -529,7 +546,7 @@ struct ionic_q_identify_comp {
  * union ionic_q_identity - queue identity information
  *     @version:        Queue type version that can be used with FW
  *     @supported:      Bitfield of queue versions, first bit = ver 0
- *     @features:       Queue features
+ *     @features:       Queue features (enum ionic_q_feature, etc)
  *     @desc_sz:        Descriptor size
  *     @comp_sz:        Completion descriptor size
  *     @sg_desc_sz:     Scatter/Gather descriptor size
@@ -541,10 +558,6 @@ union ionic_q_identity {
 		u8      version;
 		u8      supported;
 		u8      rsvd[6];
-#define IONIC_QIDENT_F_CQ	0x01	/* queue has completion ring */
-#define IONIC_QIDENT_F_SG	0x02	/* queue has scatter/gather ring */
-#define IONIC_QIDENT_F_EQ	0x04	/* queue can use event queue */
-#define IONIC_QIDENT_F_CMB	0x08	/* queue is in cmb bar */
 		__le64  features;
 		__le16  desc_sz;
 		__le16  comp_sz;
@@ -585,6 +598,7 @@ union ionic_q_identity {
  * @ring_base:    Queue ring base address
  * @cq_ring_base: Completion queue ring base address
  * @sg_ring_base: Scatter/Gather ring base address
+ * @features:     Mask of queue features to enable, if not in the flags above.
  */
 struct ionic_q_init_cmd {
 	u8     opcode;
@@ -608,7 +622,8 @@ struct ionic_q_init_cmd {
 	__le64 ring_base;
 	__le64 cq_ring_base;
 	__le64 sg_ring_base;
-	u8     rsvd2[20];
+	u8     rsvd2[12];
+	__le64 features;
 } __packed;
 
 /**
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index a51be25723a5..1b89549b243b 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -731,6 +731,7 @@ static int ionic_lif_txq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
 			.ring_base = cpu_to_le64(q->base_pa),
 			.cq_ring_base = cpu_to_le64(cq->base_pa),
 			.sg_ring_base = cpu_to_le64(q->sg_base_pa),
+			.features = cpu_to_le64(q->features),
 		},
 	};
 	unsigned int intr_index;
@@ -791,6 +792,7 @@ static int ionic_lif_rxq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
 			.ring_base = cpu_to_le64(q->base_pa),
 			.cq_ring_base = cpu_to_le64(cq->base_pa),
 			.sg_ring_base = cpu_to_le64(q->sg_base_pa),
+			.features = cpu_to_le64(q->features),
 		},
 	};
 	int err;
@@ -2214,6 +2216,7 @@ static const struct net_device_ops ionic_netdev_ops = {
 static void ionic_swap_queues(struct ionic_qcq *a, struct ionic_qcq *b)
 {
 	/* only swapping the queues, not the napi, flags, or other stuff */
+	swap(a->q.features,   b->q.features);
 	swap(a->q.num_descs,  b->q.num_descs);
 	swap(a->q.base,       b->q.base);
 	swap(a->q.base_pa,    b->q.base_pa);
-- 
cgit v1.2.3


From 0ec9f6669a7dc3041a0d9619489de05ac0146e3b Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Thu, 1 Apr 2021 10:56:00 -0700
Subject: ionic: add handling of larger descriptors

In preparating for hardware timestamping, we need to support
large Tx and Rx completion descriptors.  Here we add the new
queue feature ids and handling for the completion descriptor
sizes.

We only are adding support for the Rx 2x sized completion
descriptors in the general Rx queues for now as we will be
using it for PTP Rx support, and we don't have an immediate
use for the large descriptors in the general Tx queues yet;
it will be used in a special Tx queues added in one of the
next few patches.

Signed-off-by: Allen Hubbe <allenbh@pensando.io>
Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_if.h   | 12 ++++
 drivers/net/ethernet/pensando/ionic/ionic_lif.c  | 75 ++++++++++++++++--------
 drivers/net/ethernet/pensando/ionic/ionic_lif.h  |  3 +
 drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 12 +++-
 4 files changed, 73 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h
index 23043ce0a5d8..1299630fcde8 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_if.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h
@@ -354,12 +354,24 @@ enum ionic_logical_qtype {
  * @IONIC_QIDENT_F_SG:      Queue has scatter/gather ring
  * @IONIC_QIDENT_F_EQ:      Queue can use event queue
  * @IONIC_QIDENT_F_CMB:     Queue is in cmb bar
+ * @IONIC_Q_F_2X_DESC:      Double main descriptor size
+ * @IONIC_Q_F_2X_CQ_DESC:   Double cq descriptor size
+ * @IONIC_Q_F_2X_SG_DESC:   Double sg descriptor size
+ * @IONIC_Q_F_4X_DESC:      Quadruple main descriptor size
+ * @IONIC_Q_F_4X_CQ_DESC:   Quadruple cq descriptor size
+ * @IONIC_Q_F_4X_SG_DESC:   Quadruple sg descriptor size
  */
 enum ionic_q_feature {
 	IONIC_QIDENT_F_CQ		= BIT_ULL(0),
 	IONIC_QIDENT_F_SG		= BIT_ULL(1),
 	IONIC_QIDENT_F_EQ		= BIT_ULL(2),
 	IONIC_QIDENT_F_CMB		= BIT_ULL(3),
+	IONIC_Q_F_2X_DESC		= BIT_ULL(4),
+	IONIC_Q_F_2X_CQ_DESC		= BIT_ULL(5),
+	IONIC_Q_F_2X_SG_DESC		= BIT_ULL(6),
+	IONIC_Q_F_4X_DESC		= BIT_ULL(7),
+	IONIC_Q_F_4X_CQ_DESC		= BIT_ULL(8),
+	IONIC_Q_F_4X_SG_DESC		= BIT_ULL(9),
 };
 
 /**
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 1b89549b243b..9a61b2bbb652 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -1722,11 +1722,15 @@ static void ionic_txrx_free(struct ionic_lif *lif)
 
 static int ionic_txrx_alloc(struct ionic_lif *lif)
 {
-	unsigned int sg_desc_sz;
+	unsigned int num_desc, desc_sz, comp_sz, sg_desc_sz;
 	unsigned int flags;
 	unsigned int i;
 	int err = 0;
 
+	num_desc = lif->ntxq_descs;
+	desc_sz = sizeof(struct ionic_txq_desc);
+	comp_sz = sizeof(struct ionic_txq_comp);
+
 	if (lif->qtype_info[IONIC_QTYPE_TXQ].version >= 1 &&
 	    lif->qtype_info[IONIC_QTYPE_TXQ].sg_desc_sz ==
 					  sizeof(struct ionic_txq_sg_desc_v1))
@@ -1739,10 +1743,7 @@ static int ionic_txrx_alloc(struct ionic_lif *lif)
 		flags |= IONIC_QCQ_F_INTR;
 	for (i = 0; i < lif->nxqs; i++) {
 		err = ionic_qcq_alloc(lif, IONIC_QTYPE_TXQ, i, "tx", flags,
-				      lif->ntxq_descs,
-				      sizeof(struct ionic_txq_desc),
-				      sizeof(struct ionic_txq_comp),
-				      sg_desc_sz,
+				      num_desc, desc_sz, comp_sz, sg_desc_sz,
 				      lif->kern_pid, &lif->txqcqs[i]);
 		if (err)
 			goto err_out;
@@ -1759,16 +1760,24 @@ static int ionic_txrx_alloc(struct ionic_lif *lif)
 	}
 
 	flags = IONIC_QCQ_F_RX_STATS | IONIC_QCQ_F_SG | IONIC_QCQ_F_INTR;
+
+	num_desc = lif->nrxq_descs;
+	desc_sz = sizeof(struct ionic_rxq_desc);
+	comp_sz = sizeof(struct ionic_rxq_comp);
+	sg_desc_sz = sizeof(struct ionic_rxq_sg_desc);
+
+	if (lif->rxq_features & IONIC_Q_F_2X_CQ_DESC)
+		comp_sz *= 2;
+
 	for (i = 0; i < lif->nxqs; i++) {
 		err = ionic_qcq_alloc(lif, IONIC_QTYPE_RXQ, i, "rx", flags,
-				      lif->nrxq_descs,
-				      sizeof(struct ionic_rxq_desc),
-				      sizeof(struct ionic_rxq_comp),
-				      sizeof(struct ionic_rxq_sg_desc),
+				      num_desc, desc_sz, comp_sz, sg_desc_sz,
 				      lif->kern_pid, &lif->rxqcqs[i]);
 		if (err)
 			goto err_out;
 
+		lif->rxqcqs[i]->q.features = lif->rxq_features;
+
 		ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
 				     lif->rxqcqs[i]->intr.index,
 				     lif->rx_coalesce_hw);
@@ -2218,6 +2227,7 @@ static void ionic_swap_queues(struct ionic_qcq *a, struct ionic_qcq *b)
 	/* only swapping the queues, not the napi, flags, or other stuff */
 	swap(a->q.features,   b->q.features);
 	swap(a->q.num_descs,  b->q.num_descs);
+	swap(a->q.desc_size,  b->q.desc_size);
 	swap(a->q.base,       b->q.base);
 	swap(a->q.base_pa,    b->q.base_pa);
 	swap(a->q.info,       b->q.info);
@@ -2225,6 +2235,7 @@ static void ionic_swap_queues(struct ionic_qcq *a, struct ionic_qcq *b)
 	swap(a->q_base_pa,    b->q_base_pa);
 	swap(a->q_size,       b->q_size);
 
+	swap(a->q.sg_desc_size, b->q.sg_desc_size);
 	swap(a->q.sg_base,    b->q.sg_base);
 	swap(a->q.sg_base_pa, b->q.sg_base_pa);
 	swap(a->sg_base,      b->sg_base);
@@ -2232,6 +2243,7 @@ static void ionic_swap_queues(struct ionic_qcq *a, struct ionic_qcq *b)
 	swap(a->sg_size,      b->sg_size);
 
 	swap(a->cq.num_descs, b->cq.num_descs);
+	swap(a->cq.desc_size, b->cq.desc_size);
 	swap(a->cq.base,      b->cq.base);
 	swap(a->cq.base_pa,   b->cq.base_pa);
 	swap(a->cq.info,      b->cq.info);
@@ -2246,9 +2258,9 @@ static void ionic_swap_queues(struct ionic_qcq *a, struct ionic_qcq *b)
 int ionic_reconfigure_queues(struct ionic_lif *lif,
 			     struct ionic_queue_params *qparam)
 {
+	unsigned int num_desc, desc_sz, comp_sz, sg_desc_sz;
 	struct ionic_qcq **tx_qcqs = NULL;
 	struct ionic_qcq **rx_qcqs = NULL;
-	unsigned int sg_desc_sz;
 	unsigned int flags;
 	int err = -ENOMEM;
 	unsigned int i;
@@ -2260,7 +2272,9 @@ int ionic_reconfigure_queues(struct ionic_lif *lif,
 		if (!tx_qcqs)
 			goto err_out;
 	}
-	if (qparam->nxqs != lif->nxqs || qparam->nrxq_descs != lif->nrxq_descs) {
+	if (qparam->nxqs != lif->nxqs ||
+	    qparam->nrxq_descs != lif->nrxq_descs ||
+	    qparam->rxq_features != lif->rxq_features) {
 		rx_qcqs = devm_kcalloc(lif->ionic->dev, lif->ionic->nrxqs_per_lif,
 				       sizeof(struct ionic_qcq *), GFP_KERNEL);
 		if (!rx_qcqs)
@@ -2270,21 +2284,22 @@ int ionic_reconfigure_queues(struct ionic_lif *lif,
 	/* allocate new desc_info and rings, but leave the interrupt setup
 	 * until later so as to not mess with the still-running queues
 	 */
-	if (lif->qtype_info[IONIC_QTYPE_TXQ].version >= 1 &&
-	    lif->qtype_info[IONIC_QTYPE_TXQ].sg_desc_sz ==
-					  sizeof(struct ionic_txq_sg_desc_v1))
-		sg_desc_sz = sizeof(struct ionic_txq_sg_desc_v1);
-	else
-		sg_desc_sz = sizeof(struct ionic_txq_sg_desc);
-
 	if (tx_qcqs) {
+		num_desc = qparam->ntxq_descs;
+		desc_sz = sizeof(struct ionic_txq_desc);
+		comp_sz = sizeof(struct ionic_txq_comp);
+
+		if (lif->qtype_info[IONIC_QTYPE_TXQ].version >= 1 &&
+		    lif->qtype_info[IONIC_QTYPE_TXQ].sg_desc_sz ==
+		    sizeof(struct ionic_txq_sg_desc_v1))
+			sg_desc_sz = sizeof(struct ionic_txq_sg_desc_v1);
+		else
+			sg_desc_sz = sizeof(struct ionic_txq_sg_desc);
+
 		for (i = 0; i < qparam->nxqs; i++) {
 			flags = lif->txqcqs[i]->flags & ~IONIC_QCQ_F_INTR;
 			err = ionic_qcq_alloc(lif, IONIC_QTYPE_TXQ, i, "tx", flags,
-					      qparam->ntxq_descs,
-					      sizeof(struct ionic_txq_desc),
-					      sizeof(struct ionic_txq_comp),
-					      sg_desc_sz,
+					      num_desc, desc_sz, comp_sz, sg_desc_sz,
 					      lif->kern_pid, &tx_qcqs[i]);
 			if (err)
 				goto err_out;
@@ -2292,16 +2307,23 @@ int ionic_reconfigure_queues(struct ionic_lif *lif,
 	}
 
 	if (rx_qcqs) {
+		num_desc = qparam->nrxq_descs;
+		desc_sz = sizeof(struct ionic_rxq_desc);
+		comp_sz = sizeof(struct ionic_rxq_comp);
+		sg_desc_sz = sizeof(struct ionic_rxq_sg_desc);
+
+		if (qparam->rxq_features & IONIC_Q_F_2X_CQ_DESC)
+			comp_sz *= 2;
+
 		for (i = 0; i < qparam->nxqs; i++) {
 			flags = lif->rxqcqs[i]->flags & ~IONIC_QCQ_F_INTR;
 			err = ionic_qcq_alloc(lif, IONIC_QTYPE_RXQ, i, "rx", flags,
-					      qparam->nrxq_descs,
-					      sizeof(struct ionic_rxq_desc),
-					      sizeof(struct ionic_rxq_comp),
-					      sizeof(struct ionic_rxq_sg_desc),
+					      num_desc, desc_sz, comp_sz, sg_desc_sz,
 					      lif->kern_pid, &rx_qcqs[i]);
 			if (err)
 				goto err_out;
+
+			rx_qcqs[i]->q.features = qparam->rxq_features;
 		}
 	}
 
@@ -2388,6 +2410,7 @@ int ionic_reconfigure_queues(struct ionic_lif *lif,
 	}
 
 	swap(lif->nxqs, qparam->nxqs);
+	swap(lif->rxq_features, qparam->rxq_features);
 
 err_out_reinit_unlock:
 	/* re-init the queues, but don't lose an error code */
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
index be5cc89b2bd9..93d3058aed77 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -183,6 +183,7 @@ struct ionic_lif {
 	unsigned int ntxq_descs;
 	unsigned int nrxq_descs;
 	u32 rx_copybreak;
+	u64 rxq_features;
 	unsigned int rx_mode;
 	u64 hw_features;
 	bool registered;
@@ -221,6 +222,7 @@ struct ionic_queue_params {
 	unsigned int ntxq_descs;
 	unsigned int nrxq_descs;
 	unsigned int intr_split;
+	u64 rxq_features;
 };
 
 static inline void ionic_init_queue_params(struct ionic_lif *lif,
@@ -230,6 +232,7 @@ static inline void ionic_init_queue_params(struct ionic_lif *lif,
 	qparam->ntxq_descs = lif->ntxq_descs;
 	qparam->nrxq_descs = lif->nrxq_descs;
 	qparam->intr_split = test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state);
+	qparam->rxq_features = lif->rxq_features;
 }
 
 static inline u32 ionic_coal_usec_to_hw(struct ionic *ionic, u32 usecs)
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 42d29cd2ca47..4859120eec72 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -229,12 +229,14 @@ static void ionic_rx_clean(struct ionic_queue *q,
 			   struct ionic_cq_info *cq_info,
 			   void *cb_arg)
 {
-	struct ionic_rxq_comp *comp = cq_info->rxcq;
 	struct net_device *netdev = q->lif->netdev;
 	struct ionic_qcq *qcq = q_to_qcq(q);
 	struct ionic_rx_stats *stats;
+	struct ionic_rxq_comp *comp;
 	struct sk_buff *skb;
 
+	comp = cq_info->cq_desc + qcq->cq.desc_size - sizeof(*comp);
+
 	stats = q_to_rx_stats(q);
 
 	if (comp->status) {
@@ -304,9 +306,11 @@ static void ionic_rx_clean(struct ionic_queue *q,
 
 static bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
 {
-	struct ionic_rxq_comp *comp = cq_info->rxcq;
 	struct ionic_queue *q = cq->bound_q;
 	struct ionic_desc_info *desc_info;
+	struct ionic_rxq_comp *comp;
+
+	comp = cq_info->cq_desc + cq->desc_size - sizeof(*comp);
 
 	if (!color_match(comp->pkt_type_color, cq->done_color))
 		return false;
@@ -693,13 +697,15 @@ static void ionic_tx_clean(struct ionic_queue *q,
 
 static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
 {
-	struct ionic_txq_comp *comp = cq_info->txcq;
 	struct ionic_queue *q = cq->bound_q;
 	struct ionic_desc_info *desc_info;
+	struct ionic_txq_comp *comp;
 	int bytes = 0;
 	int pkts = 0;
 	u16 index;
 
+	comp = cq_info->cq_desc + cq->desc_size - sizeof(*comp);
+
 	if (!color_match(comp->color, cq->done_color))
 		return false;
 
-- 
cgit v1.2.3


From 3da258439e89d03515e5bc4ff021012f2ec87260 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Thu, 1 Apr 2021 10:56:01 -0700
Subject: ionic: add hw timestamp structs to interface

The interface for hardware timestamping includes a new FW
request, device identity fields, Tx and Rx queue feature bits, a
new Rx filter type, the beginnings of Rx packet classifications,
and hardware timestamp registers.

If the IONIC_ETH_HW_TIMESTAMP bit is shown in the
ionic_lif_config features bit string, then we have support
for the hw clock registers.  If the IONIC_RXQ_F_HWSTAMP and
IONIC_TXQ_F_HWSTAMP features are shown in the ionic_q_identity
features, then the queues can support HW timestamps on packets.

Signed-off-by: Allen Hubbe <allenbh@pensando.io>
Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_dev.h |   1 +
 drivers/net/ethernet/pensando/ionic/ionic_if.h  | 175 +++++++++++++++++++++++-
 2 files changed, 170 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index 68e5e7a97801..25c52c042246 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -59,6 +59,7 @@ static_assert(sizeof(struct ionic_dev_getattr_cmd) == 64);
 static_assert(sizeof(struct ionic_dev_getattr_comp) == 16);
 static_assert(sizeof(struct ionic_dev_setattr_cmd) == 64);
 static_assert(sizeof(struct ionic_dev_setattr_comp) == 16);
+static_assert(sizeof(struct ionic_lif_setphc_cmd) == 64);
 
 /* Port commands */
 static_assert(sizeof(struct ionic_port_identify_cmd) == 64);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h
index 1299630fcde8..0478b48d9895 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_if.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h
@@ -34,6 +34,7 @@ enum ionic_cmd_opcode {
 	IONIC_CMD_LIF_RESET			= 22,
 	IONIC_CMD_LIF_GETATTR			= 23,
 	IONIC_CMD_LIF_SETATTR			= 24,
+	IONIC_CMD_LIF_SETPHC			= 25,
 
 	IONIC_CMD_RX_MODE_SET			= 30,
 	IONIC_CMD_RX_FILTER_ADD			= 31,
@@ -269,6 +270,9 @@ union ionic_drv_identity {
  *                    value in usecs to device units using:
  *                    device units = usecs * mult / div
  * @eq_count:         Number of shared event queues
+ * @hwstamp_mask:     Bitmask for subtraction of hardware tick values.
+ * @hwstamp_mult:     Hardware tick to nanosecond multiplier.
+ * @hwstamp_shift:    Hardware tick to nanosecond divisor (power of two).
  */
 union ionic_dev_identity {
 	struct {
@@ -283,6 +287,9 @@ union ionic_dev_identity {
 		__le32 intr_coal_mult;
 		__le32 intr_coal_div;
 		__le32 eq_count;
+		__le64 hwstamp_mask;
+		__le32 hwstamp_mult;
+		__le32 hwstamp_shift;
 	};
 	__le32 words[478];
 };
@@ -374,6 +381,39 @@ enum ionic_q_feature {
 	IONIC_Q_F_4X_SG_DESC		= BIT_ULL(9),
 };
 
+/**
+ * enum ionic_rxq_feature - RXQ-specific Features
+ *
+ * Per-queue-type features use bits 16 and higher.
+ *
+ * @IONIC_RXQ_F_HWSTAMP:   Queue supports Hardware Timestamping
+ */
+enum ionic_rxq_feature {
+	IONIC_RXQ_F_HWSTAMP		= BIT_ULL(16),
+};
+
+/**
+ * enum ionic_txq_feature - TXQ-specific Features
+ *
+ * Per-queue-type features use bits 16 and higher.
+ *
+ * @IONIC_TXQ_F_HWSTAMP:   Queue supports Hardware Timestamping
+ */
+enum ionic_txq_feature {
+	IONIC_TXQ_F_HWSTAMP		= BIT(16),
+};
+
+/**
+ * struct ionic_hwstamp_bits - Hardware timestamp decoding bits
+ * @IONIC_HWSTAMP_INVALID:          Invalid hardware timestamp value
+ * @IONIC_HWSTAMP_CQ_NEGOFFSET:     Timestamp field negative offset
+ *                                  from the base cq descriptor.
+ */
+enum ionic_hwstamp_bits {
+	IONIC_HWSTAMP_INVALID	    = ~0ull,
+	IONIC_HWSTAMP_CQ_NEGOFFSET  = 8,
+};
+
 /**
  * struct ionic_lif_logical_qtype - Descriptor of logical to HW queue type
  * @qtype:          Hardware Queue Type
@@ -434,6 +474,8 @@ union ionic_lif_config {
  *     @max_mcast_filters:  Number of perfect multicast addresses supported
  *     @min_frame_size:     Minimum size of frames to be sent
  *     @max_frame_size:     Maximum size of frames to be sent
+ *     @hwstamp_tx_modes:   Bitmask of BIT_ULL(enum ionic_txstamp_mode)
+ *     @hwstamp_rx_filters: Bitmask of enum ionic_pkt_class
  *     @config:             LIF config struct with features, mtu, mac, q counts
  *
  * @rdma:                RDMA identify structure
@@ -467,7 +509,10 @@ union ionic_lif_identity {
 			__le16 rss_ind_tbl_sz;
 			__le32 min_frame_size;
 			__le32 max_frame_size;
-			u8 rsvd2[106];
+			u8 rsvd2[2];
+			__le64 hwstamp_tx_modes;
+			__le64 hwstamp_rx_filters;
+			u8 rsvd3[88];
 			union ionic_lif_config config;
 		} __packed eth;
 
@@ -1046,7 +1091,64 @@ enum ionic_eth_hw_features {
 	IONIC_ETH_HW_TSO_UDP_CSUM	= BIT(16),
 	IONIC_ETH_HW_RX_CSUM_GENEVE	= BIT(17),
 	IONIC_ETH_HW_TX_CSUM_GENEVE	= BIT(18),
-	IONIC_ETH_HW_TSO_GENEVE		= BIT(19)
+	IONIC_ETH_HW_TSO_GENEVE		= BIT(19),
+	IONIC_ETH_HW_TIMESTAMP		= BIT(20),
+};
+
+/**
+ * enum ionic_pkt_class - Packet classification mask.
+ *
+ * Used with rx steering filter, packets indicated by the mask can be steered
+ * toward a specific receive queue.
+ *
+ * @IONIC_PKT_CLS_NTP_ALL:          All NTP packets.
+ * @IONIC_PKT_CLS_PTP1_SYNC:        PTPv1 sync
+ * @IONIC_PKT_CLS_PTP1_DREQ:        PTPv1 delay-request
+ * @IONIC_PKT_CLS_PTP1_ALL:         PTPv1 all packets
+ * @IONIC_PKT_CLS_PTP2_L4_SYNC:     PTPv2-UDP sync
+ * @IONIC_PKT_CLS_PTP2_L4_DREQ:     PTPv2-UDP delay-request
+ * @IONIC_PKT_CLS_PTP2_L4_ALL:      PTPv2-UDP all packets
+ * @IONIC_PKT_CLS_PTP2_L2_SYNC:     PTPv2-ETH sync
+ * @IONIC_PKT_CLS_PTP2_L2_DREQ:     PTPv2-ETH delay-request
+ * @IONIC_PKT_CLS_PTP2_L2_ALL:      PTPv2-ETH all packets
+ * @IONIC_PKT_CLS_PTP2_SYNC:        PTPv2 sync
+ * @IONIC_PKT_CLS_PTP2_DREQ:        PTPv2 delay-request
+ * @IONIC_PKT_CLS_PTP2_ALL:         PTPv2 all packets
+ * @IONIC_PKT_CLS_PTP_SYNC:         PTP sync
+ * @IONIC_PKT_CLS_PTP_DREQ:         PTP delay-request
+ * @IONIC_PKT_CLS_PTP_ALL:          PTP all packets
+ */
+enum ionic_pkt_class {
+	IONIC_PKT_CLS_NTP_ALL		= BIT(0),
+
+	IONIC_PKT_CLS_PTP1_SYNC		= BIT(1),
+	IONIC_PKT_CLS_PTP1_DREQ		= BIT(2),
+	IONIC_PKT_CLS_PTP1_ALL		= BIT(3) |
+		IONIC_PKT_CLS_PTP1_SYNC | IONIC_PKT_CLS_PTP1_DREQ,
+
+	IONIC_PKT_CLS_PTP2_L4_SYNC	= BIT(4),
+	IONIC_PKT_CLS_PTP2_L4_DREQ	= BIT(5),
+	IONIC_PKT_CLS_PTP2_L4_ALL	= BIT(6) |
+		IONIC_PKT_CLS_PTP2_L4_SYNC | IONIC_PKT_CLS_PTP2_L4_DREQ,
+
+	IONIC_PKT_CLS_PTP2_L2_SYNC	= BIT(7),
+	IONIC_PKT_CLS_PTP2_L2_DREQ	= BIT(8),
+	IONIC_PKT_CLS_PTP2_L2_ALL	= BIT(9) |
+		IONIC_PKT_CLS_PTP2_L2_SYNC | IONIC_PKT_CLS_PTP2_L2_DREQ,
+
+	IONIC_PKT_CLS_PTP2_SYNC		=
+		IONIC_PKT_CLS_PTP2_L4_SYNC | IONIC_PKT_CLS_PTP2_L2_SYNC,
+	IONIC_PKT_CLS_PTP2_DREQ		=
+		IONIC_PKT_CLS_PTP2_L4_DREQ | IONIC_PKT_CLS_PTP2_L2_DREQ,
+	IONIC_PKT_CLS_PTP2_ALL		=
+		IONIC_PKT_CLS_PTP2_L4_ALL | IONIC_PKT_CLS_PTP2_L2_ALL,
+
+	IONIC_PKT_CLS_PTP_SYNC		=
+		IONIC_PKT_CLS_PTP1_SYNC | IONIC_PKT_CLS_PTP2_SYNC,
+	IONIC_PKT_CLS_PTP_DREQ		=
+		IONIC_PKT_CLS_PTP1_DREQ | IONIC_PKT_CLS_PTP2_DREQ,
+	IONIC_PKT_CLS_PTP_ALL		=
+		IONIC_PKT_CLS_PTP1_ALL | IONIC_PKT_CLS_PTP2_ALL,
 };
 
 /**
@@ -1355,6 +1457,20 @@ enum ionic_stats_ctl_cmd {
 	IONIC_STATS_CTL_RESET		= 0,
 };
 
+/**
+ * enum ionic_txstamp_mode - List of TX Timestamping Modes
+ * @IONIC_TXSTAMP_OFF:           Disable TX hardware timetamping.
+ * @IONIC_TXSTAMP_ON:            Enable local TX hardware timetamping.
+ * @IONIC_TXSTAMP_ONESTEP_SYNC:  Modify TX PTP Sync packets.
+ * @IONIC_TXSTAMP_ONESTEP_P2P:   Modify TX PTP Sync and PDelayResp.
+ */
+enum ionic_txstamp_mode {
+	IONIC_TXSTAMP_OFF		= 0,
+	IONIC_TXSTAMP_ON		= 1,
+	IONIC_TXSTAMP_ONESTEP_SYNC	= 2,
+	IONIC_TXSTAMP_ONESTEP_P2P	= 3,
+};
+
 /**
  * enum ionic_port_attr - List of device attributes
  * @IONIC_PORT_ATTR_STATE:      Port state attribute
@@ -1597,6 +1713,7 @@ enum ionic_rss_hash_types {
  * @IONIC_LIF_ATTR_FEATURES:    LIF features attribute
  * @IONIC_LIF_ATTR_RSS:         LIF RSS attribute
  * @IONIC_LIF_ATTR_STATS_CTRL:  LIF statistics control attribute
+ * @IONIC_LIF_ATTR_TXSTAMP:     LIF TX timestamping mode
  */
 enum ionic_lif_attr {
 	IONIC_LIF_ATTR_STATE        = 0,
@@ -1606,6 +1723,7 @@ enum ionic_lif_attr {
 	IONIC_LIF_ATTR_FEATURES     = 4,
 	IONIC_LIF_ATTR_RSS          = 5,
 	IONIC_LIF_ATTR_STATS_CTRL   = 6,
+	IONIC_LIF_ATTR_TXSTAMP      = 7,
 };
 
 /**
@@ -1623,6 +1741,7 @@ enum ionic_lif_attr {
  *              @key:       The hash secret key
  *              @addr:      Address for the indirection table shared memory
  * @stats_ctl:  stats control commands (enum ionic_stats_ctl_cmd)
+ * @txstamp:    TX Timestamping Mode (enum ionic_txstamp_mode)
  */
 struct ionic_lif_setattr_cmd {
 	u8     opcode;
@@ -1641,6 +1760,7 @@ struct ionic_lif_setattr_cmd {
 			__le64 addr;
 		} rss;
 		u8      stats_ctl;
+		__le16 txstamp_mode;
 		u8      rsvd[60];
 	} __packed;
 };
@@ -1685,6 +1805,7 @@ struct ionic_lif_getattr_cmd {
  * @mtu:        Mtu
  * @mac:        Station mac
  * @features:   Features (enum ionic_eth_hw_features)
+ * @txstamp:    TX Timestamping Mode (enum ionic_txstamp_mode)
  * @color:      Color bit
  */
 struct ionic_lif_getattr_comp {
@@ -1696,11 +1817,35 @@ struct ionic_lif_getattr_comp {
 		__le32  mtu;
 		u8      mac[6];
 		__le64  features;
+		__le16  txstamp_mode;
 		u8      rsvd2[11];
 	} __packed;
 	u8     color;
 };
 
+/**
+ * struct ionic_lif_setphc_cmd - Set LIF PTP Hardware Clock
+ * @opcode:     Opcode
+ * @lif_index:  LIF index
+ * @tick:       Hardware stamp tick of an instant in time.
+ * @nsec:       Nanosecond stamp of the same instant.
+ * @frac:       Fractional nanoseconds at the same instant.
+ * @mult:       Cycle to nanosecond multiplier.
+ * @shift:      Cycle to nanosecond divisor (power of two).
+ */
+struct ionic_lif_setphc_cmd {
+	u8	opcode;
+	u8	rsvd1;
+	__le16  lif_index;
+	u8      rsvd2[4];
+	__le64	tick;
+	__le64	nsec;
+	__le64	frac;
+	__le32	mult;
+	__le32	shift;
+	u8     rsvd3[24];
+};
+
 enum ionic_rx_mode {
 	IONIC_RX_MODE_F_UNICAST		= BIT(0),
 	IONIC_RX_MODE_F_MULTICAST	= BIT(1),
@@ -1733,9 +1878,10 @@ struct ionic_rx_mode_set_cmd {
 typedef struct ionic_admin_comp ionic_rx_mode_set_comp;
 
 enum ionic_rx_filter_match_type {
-	IONIC_RX_FILTER_MATCH_VLAN = 0,
-	IONIC_RX_FILTER_MATCH_MAC,
-	IONIC_RX_FILTER_MATCH_MAC_VLAN,
+	IONIC_RX_FILTER_MATCH_VLAN	= 0x0,
+	IONIC_RX_FILTER_MATCH_MAC	= 0x1,
+	IONIC_RX_FILTER_MATCH_MAC_VLAN	= 0x2,
+	IONIC_RX_FILTER_STEER_PKTCLASS	= 0x10,
 };
 
 /**
@@ -1752,6 +1898,7 @@ enum ionic_rx_filter_match_type {
  * @mac_vlan:   MACVLAN filter
  *              @vlan:  VLAN ID
  *              @addr:  MAC address (network-byte order)
+ * @pkt_class:  Packet classification filter
  */
 struct ionic_rx_filter_add_cmd {
 	u8     opcode;
@@ -1770,8 +1917,9 @@ struct ionic_rx_filter_add_cmd {
 			__le16 vlan;
 			u8     addr[6];
 		} mac_vlan;
+		__le64 pkt_class;
 		u8 rsvd[54];
-	};
+	} __packed;
 };
 
 /**
@@ -2771,6 +2919,16 @@ union ionic_dev_cmd_comp {
 	struct ionic_fw_control_comp fw_control;
 };
 
+/**
+ * struct ionic_hwstamp_regs - Hardware current timestamp registers
+ * @tick_low:        Low 32 bits of hardware timestamp
+ * @tick_high:       High 32 bits of hardware timestamp
+ */
+struct ionic_hwstamp_regs {
+	u32    tick_low;
+	u32    tick_high;
+};
+
 /**
  * union ionic_dev_info_regs - Device info register format (read-only)
  * @signature:       Signature value of 0x44455649 ('DEVI')
@@ -2781,6 +2939,7 @@ union ionic_dev_cmd_comp {
  * @fw_heartbeat:    Firmware heartbeat counter
  * @serial_num:      Serial number
  * @fw_version:      Firmware version
+ * @hwstamp_regs:    Hardware current timestamp registers
  */
 union ionic_dev_info_regs {
 #define IONIC_DEVINFO_FWVERS_BUFLEN 32
@@ -2795,6 +2954,8 @@ union ionic_dev_info_regs {
 		u32    fw_heartbeat;
 		char   fw_version[IONIC_DEVINFO_FWVERS_BUFLEN];
 		char   serial_num[IONIC_DEVINFO_SERIAL_BUFLEN];
+		u8     rsvd_pad1024[948];
+		struct ionic_hwstamp_regs hwstamp;
 	};
 	u32 words[512];
 };
@@ -2842,6 +3003,7 @@ union ionic_adminq_cmd {
 	struct ionic_q_control_cmd q_control;
 	struct ionic_lif_setattr_cmd lif_setattr;
 	struct ionic_lif_getattr_cmd lif_getattr;
+	struct ionic_lif_setphc_cmd lif_setphc;
 	struct ionic_rx_mode_set_cmd rx_mode_set;
 	struct ionic_rx_filter_add_cmd rx_filter_add;
 	struct ionic_rx_filter_del_cmd rx_filter_del;
@@ -2858,6 +3020,7 @@ union ionic_adminq_comp {
 	struct ionic_q_init_comp q_init;
 	struct ionic_lif_setattr_comp lif_setattr;
 	struct ionic_lif_getattr_comp lif_getattr;
+	struct ionic_admin_comp lif_setphc;
 	struct ionic_rx_filter_add_comp rx_filter_add;
 	struct ionic_fw_control_comp fw_control;
 };
-- 
cgit v1.2.3


From 4f1704faa0131ab70a097145a6f144c00051e46a Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Thu, 1 Apr 2021 10:56:02 -0700
Subject: ionic: split adminq post and wait calls

Split the wait part out of adminq_post_wait() into a separate
function so that a caller can have finer grain control over
the sequencing of operations and locking.

Signed-off-by: Allen Hubbe <allenbh@pensando.io>
Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic.h      |  2 ++
 drivers/net/ethernet/pensando/ionic/ionic_main.c | 15 +++++++++++----
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic.h b/drivers/net/ethernet/pensando/ionic/ionic.h
index 084a924431d5..18e92103c711 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic.h
@@ -64,6 +64,8 @@ struct ionic_admin_ctx {
 	union ionic_adminq_comp comp;
 };
 
+int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx);
+int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx, int err);
 int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx);
 int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_wait);
 int ionic_set_dma_mask(struct ionic *ionic);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
index c4b2906a2ae6..8c27fbe0e312 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -256,7 +256,7 @@ static void ionic_adminq_cb(struct ionic_queue *q,
 	complete_all(&ctx->work);
 }
 
-static int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
+int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
 {
 	struct ionic_desc_info *desc_info;
 	unsigned long irqflags;
@@ -295,14 +295,12 @@ err_out:
 	return err;
 }
 
-int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
+int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx, int err)
 {
 	struct net_device *netdev = lif->netdev;
 	unsigned long remaining;
 	const char *name;
-	int err;
 
-	err = ionic_adminq_post(lif, ctx);
 	if (err) {
 		if (!test_bit(IONIC_LIF_F_FW_RESET, lif->state)) {
 			name = ionic_opcode_to_str(ctx->cmd.cmd.opcode);
@@ -317,6 +315,15 @@ int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
 	return ionic_adminq_check_err(lif, ctx, (remaining == 0));
 }
 
+int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx)
+{
+	int err;
+
+	err = ionic_adminq_post(lif, ctx);
+
+	return ionic_adminq_wait(lif, ctx, err);
+}
+
 static void ionic_dev_cmd_clean(struct ionic *ionic)
 {
 	union __iomem ionic_dev_cmd_regs *regs = ionic->idev.dev_cmd_regs;
-- 
cgit v1.2.3


From fee6efce565d1e1c45c31de8e05faed530d0a28c Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Thu, 1 Apr 2021 10:56:03 -0700
Subject: ionic: add hw timestamp support files

This adds the file of code for supporting Tx and Rx hardware
timestamps and the raw clock interface, but does not yet link
it in for compiling or use.

Signed-off-by: Allen Hubbe <allenbh@pensando.io>
Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_phc.c | 589 ++++++++++++++++++++++++
 1 file changed, 589 insertions(+)
 create mode 100644 drivers/net/ethernet/pensando/ionic/ionic_phc.c

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_phc.c b/drivers/net/ethernet/pensando/ionic/ionic_phc.c
new file mode 100644
index 000000000000..86ae5011ac9b
--- /dev/null
+++ b/drivers/net/ethernet/pensando/ionic/ionic_phc.c
@@ -0,0 +1,589 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 - 2021 Pensando Systems, Inc */
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+
+#include "ionic.h"
+#include "ionic_bus.h"
+#include "ionic_lif.h"
+#include "ionic_ethtool.h"
+
+static int ionic_hwstamp_tx_mode(int config_tx_type)
+{
+	switch (config_tx_type) {
+	case HWTSTAMP_TX_OFF:
+		return IONIC_TXSTAMP_OFF;
+	case HWTSTAMP_TX_ON:
+		return IONIC_TXSTAMP_ON;
+	case HWTSTAMP_TX_ONESTEP_SYNC:
+		return IONIC_TXSTAMP_ONESTEP_SYNC;
+#ifdef HAVE_HWSTAMP_TX_ONESTEP_P2P
+	case HWTSTAMP_TX_ONESTEP_P2P:
+		return IONIC_TXSTAMP_ONESTEP_P2P;
+#endif
+	default:
+		return -ERANGE;
+	}
+}
+
+static u64 ionic_hwstamp_rx_filt(int config_rx_filter)
+{
+	switch (config_rx_filter) {
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+		return IONIC_PKT_CLS_PTP1_ALL;
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+		return IONIC_PKT_CLS_PTP1_SYNC;
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+		return IONIC_PKT_CLS_PTP1_SYNC | IONIC_PKT_CLS_PTP1_DREQ;
+
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+		return IONIC_PKT_CLS_PTP2_L4_ALL;
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+		return IONIC_PKT_CLS_PTP2_L4_SYNC;
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+		return IONIC_PKT_CLS_PTP2_L4_SYNC | IONIC_PKT_CLS_PTP2_L4_DREQ;
+
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+		return IONIC_PKT_CLS_PTP2_L2_ALL;
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+		return IONIC_PKT_CLS_PTP2_L2_SYNC;
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+		return IONIC_PKT_CLS_PTP2_L2_SYNC | IONIC_PKT_CLS_PTP2_L2_DREQ;
+
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+		return IONIC_PKT_CLS_PTP2_ALL;
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+		return IONIC_PKT_CLS_PTP2_SYNC;
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+		return IONIC_PKT_CLS_PTP2_SYNC | IONIC_PKT_CLS_PTP2_DREQ;
+
+	case HWTSTAMP_FILTER_NTP_ALL:
+		return IONIC_PKT_CLS_NTP_ALL;
+
+	default:
+		return 0;
+	}
+}
+
+int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr)
+{
+	struct ionic *ionic = lif->ionic;
+	struct hwtstamp_config config;
+	int tx_mode = 0;
+	u64 rx_filt = 0;
+	int err, err2;
+	bool rx_all;
+	__le64 mask;
+
+	if (!lif->phc || !lif->phc->ptp)
+		return -EOPNOTSUPP;
+
+	if (ifr) {
+		if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+			return -EFAULT;
+	} else {
+		/* if called with ifr == NULL, behave as if called with the
+		 * current ts_config from the initial cleared state.
+		 */
+		memcpy(&config, &lif->phc->ts_config, sizeof(config));
+		memset(&lif->phc->ts_config, 0, sizeof(config));
+	}
+
+	tx_mode = ionic_hwstamp_tx_mode(config.tx_type);
+	if (tx_mode < 0)
+		return tx_mode;
+
+	mask = cpu_to_le64(BIT_ULL(tx_mode));
+	if ((ionic->ident.lif.eth.hwstamp_tx_modes & mask) != mask)
+		return -ERANGE;
+
+	rx_filt = ionic_hwstamp_rx_filt(config.rx_filter);
+	rx_all = config.rx_filter != HWTSTAMP_FILTER_NONE && !rx_filt;
+
+	mask = cpu_to_le64(rx_filt);
+	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) != mask) {
+		rx_filt = 0;
+		rx_all = true;
+		config.rx_filter = HWTSTAMP_FILTER_ALL;
+	}
+
+	dev_dbg(ionic->dev, "config_rx_filter %d rx_filt %#llx rx_all %d\n",
+		config.rx_filter, rx_filt, rx_all);
+
+	mutex_lock(&lif->phc->config_lock);
+
+	if (tx_mode) {
+		err = ionic_lif_create_hwstamp_txq(lif);
+		if (err)
+			goto err_queues;
+	}
+
+	if (rx_filt) {
+		err = ionic_lif_create_hwstamp_rxq(lif);
+		if (err)
+			goto err_queues;
+	}
+
+	if (tx_mode != lif->phc->ts_config_tx_mode) {
+		err = ionic_lif_set_hwstamp_txmode(lif, tx_mode);
+		if (err)
+			goto err_txmode;
+	}
+
+	if (rx_filt != lif->phc->ts_config_rx_filt) {
+		err = ionic_lif_set_hwstamp_rxfilt(lif, rx_filt);
+		if (err)
+			goto err_rxfilt;
+	}
+
+	if (rx_all != (lif->phc->ts_config.rx_filter == HWTSTAMP_FILTER_ALL)) {
+		err = ionic_lif_config_hwstamp_rxq_all(lif, rx_all);
+		if (err)
+			goto err_rxall;
+	}
+
+	if (ifr) {
+		err = copy_to_user(ifr->ifr_data, &config, sizeof(config));
+		if (err) {
+			err = -EFAULT;
+			goto err_final;
+		}
+	}
+
+	memcpy(&lif->phc->ts_config, &config, sizeof(config));
+	lif->phc->ts_config_rx_filt = rx_filt;
+	lif->phc->ts_config_tx_mode = tx_mode;
+
+	mutex_unlock(&lif->phc->config_lock);
+
+	return 0;
+
+err_final:
+	if (rx_all != (lif->phc->ts_config.rx_filter == HWTSTAMP_FILTER_ALL)) {
+		rx_all = lif->phc->ts_config.rx_filter == HWTSTAMP_FILTER_ALL;
+		err2 = ionic_lif_config_hwstamp_rxq_all(lif, rx_all);
+		if (err2)
+			dev_err(ionic->dev,
+				"Failed to revert all-rxq timestamp config: %d\n", err2);
+	}
+err_rxall:
+	if (rx_filt != lif->phc->ts_config_rx_filt) {
+		rx_filt = lif->phc->ts_config_rx_filt;
+		err2 = ionic_lif_set_hwstamp_rxfilt(lif, rx_filt);
+		if (err2)
+			dev_err(ionic->dev,
+				"Failed to revert rx timestamp filter: %d\n", err2);
+	}
+err_rxfilt:
+	if (tx_mode != lif->phc->ts_config_tx_mode) {
+		tx_mode = lif->phc->ts_config_tx_mode;
+		err2 = ionic_lif_set_hwstamp_txmode(lif, tx_mode);
+		if (err2)
+			dev_err(ionic->dev,
+				"Failed to revert tx timestamp mode: %d\n", err2);
+	}
+err_txmode:
+	/* special queues remain allocated, just unused */
+err_queues:
+	mutex_unlock(&lif->phc->config_lock);
+	return err;
+}
+
+int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr)
+{
+	struct hwtstamp_config config;
+
+	if (!lif->phc || !lif->phc->ptp)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&lif->phc->config_lock);
+	memcpy(&config, &lif->phc->ts_config, sizeof(config));
+	mutex_unlock(&lif->phc->config_lock);
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config));
+}
+
+static u64 ionic_hwstamp_read(struct ionic *ionic,
+			      struct ptp_system_timestamp *sts)
+{
+	u32 tick_high_before, tick_high, tick_low;
+
+	/* read and discard low part to defeat hw staging of high part */
+	(void)ioread32(&ionic->idev.hwstamp_regs->tick_low);
+
+	tick_high_before = ioread32(&ionic->idev.hwstamp_regs->tick_high);
+
+	ptp_read_system_prets(sts);
+	tick_low = ioread32(&ionic->idev.hwstamp_regs->tick_low);
+	ptp_read_system_postts(sts);
+
+	tick_high = ioread32(&ionic->idev.hwstamp_regs->tick_high);
+
+	/* If tick_high changed, re-read tick_low once more.  Assume tick_high
+	 * cannot change again so soon as in the span of re-reading tick_low.
+	 */
+	if (tick_high != tick_high_before) {
+		ptp_read_system_prets(sts);
+		tick_low = ioread32(&ionic->idev.hwstamp_regs->tick_low);
+		ptp_read_system_postts(sts);
+	}
+
+	return (u64)tick_low | ((u64)tick_high << 32);
+}
+
+static u64 ionic_cc_read(const struct cyclecounter *cc)
+{
+	struct ionic_phc *phc = container_of(cc, struct ionic_phc, cc);
+	struct ionic *ionic = phc->lif->ionic;
+
+	return ionic_hwstamp_read(ionic, NULL);
+}
+
+static int ionic_setphc_cmd(struct ionic_phc *phc, struct ionic_admin_ctx *ctx)
+{
+	ctx->work = COMPLETION_INITIALIZER_ONSTACK(ctx->work);
+
+	ctx->cmd.lif_setphc.opcode = IONIC_CMD_LIF_SETPHC;
+	ctx->cmd.lif_setphc.lif_index = cpu_to_le16(phc->lif->index);
+
+	ctx->cmd.lif_setphc.tick = cpu_to_le64(phc->tc.cycle_last);
+	ctx->cmd.lif_setphc.nsec = cpu_to_le64(phc->tc.nsec);
+	ctx->cmd.lif_setphc.frac = cpu_to_le64(phc->tc.frac);
+	ctx->cmd.lif_setphc.mult = cpu_to_le32(phc->cc.mult);
+	ctx->cmd.lif_setphc.shift = cpu_to_le32(phc->cc.shift);
+
+	return ionic_adminq_post(phc->lif, ctx);
+}
+
+static int ionic_phc_adjfine(struct ptp_clock_info *info, long scaled_ppm)
+{
+	struct ionic_phc *phc = container_of(info, struct ionic_phc, ptp_info);
+	struct ionic_admin_ctx ctx = {};
+	unsigned long irqflags;
+	s64 adj;
+	int err;
+
+	/* Reject phc adjustments during device upgrade */
+	if (test_bit(IONIC_LIF_F_FW_RESET, phc->lif->state))
+		return -EBUSY;
+
+	/* Adjustment value scaled by 2^16 million */
+	adj = (s64)scaled_ppm * phc->init_cc_mult;
+
+	/* Adjustment value to scale */
+	adj /= (s64)SCALED_PPM;
+
+	/* Final adjusted multiplier */
+	adj += phc->init_cc_mult;
+
+	spin_lock_irqsave(&phc->lock, irqflags);
+
+	/* update the point-in-time basis to now, before adjusting the rate */
+	timecounter_read(&phc->tc);
+	phc->cc.mult = adj;
+
+	/* Setphc commands are posted in-order, sequenced by phc->lock.  We
+	 * need to drop the lock before waiting for the command to complete.
+	 */
+	err = ionic_setphc_cmd(phc, &ctx);
+
+	spin_unlock_irqrestore(&phc->lock, irqflags);
+
+	return ionic_adminq_wait(phc->lif, &ctx, err);
+}
+
+static int ionic_phc_adjtime(struct ptp_clock_info *info, s64 delta)
+{
+	struct ionic_phc *phc = container_of(info, struct ionic_phc, ptp_info);
+	struct ionic_admin_ctx ctx = {};
+	unsigned long irqflags;
+	int err;
+
+	/* Reject phc adjustments during device upgrade */
+	if (test_bit(IONIC_LIF_F_FW_RESET, phc->lif->state))
+		return -EBUSY;
+
+	spin_lock_irqsave(&phc->lock, irqflags);
+
+	timecounter_adjtime(&phc->tc, delta);
+
+	/* Setphc commands are posted in-order, sequenced by phc->lock.  We
+	 * need to drop the lock before waiting for the command to complete.
+	 */
+	err = ionic_setphc_cmd(phc, &ctx);
+
+	spin_unlock_irqrestore(&phc->lock, irqflags);
+
+	return ionic_adminq_wait(phc->lif, &ctx, err);
+}
+
+static int ionic_phc_settime64(struct ptp_clock_info *info,
+			       const struct timespec64 *ts)
+{
+	struct ionic_phc *phc = container_of(info, struct ionic_phc, ptp_info);
+	struct ionic_admin_ctx ctx = {};
+	unsigned long irqflags;
+	int err;
+	u64 ns;
+
+	/* Reject phc adjustments during device upgrade */
+	if (test_bit(IONIC_LIF_F_FW_RESET, phc->lif->state))
+		return -EBUSY;
+
+	ns = timespec64_to_ns(ts);
+
+	spin_lock_irqsave(&phc->lock, irqflags);
+
+	timecounter_init(&phc->tc, &phc->cc, ns);
+
+	/* Setphc commands are posted in-order, sequenced by phc->lock.  We
+	 * need to drop the lock before waiting for the command to complete.
+	 */
+	err = ionic_setphc_cmd(phc, &ctx);
+
+	spin_unlock_irqrestore(&phc->lock, irqflags);
+
+	return ionic_adminq_wait(phc->lif, &ctx, err);
+}
+
+static int ionic_phc_gettimex64(struct ptp_clock_info *info,
+				struct timespec64 *ts,
+				struct ptp_system_timestamp *sts)
+{
+	struct ionic_phc *phc = container_of(info, struct ionic_phc, ptp_info);
+	struct ionic *ionic = phc->lif->ionic;
+	unsigned long irqflags;
+	u64 tick, ns;
+
+	/* Do not attempt to read device time during upgrade */
+	if (test_bit(IONIC_LIF_F_FW_RESET, phc->lif->state))
+		return -EBUSY;
+
+	spin_lock_irqsave(&phc->lock, irqflags);
+
+	tick = ionic_hwstamp_read(ionic, sts);
+
+	ns = timecounter_cyc2time(&phc->tc, tick);
+
+	spin_unlock_irqrestore(&phc->lock, irqflags);
+
+	*ts = ns_to_timespec64(ns);
+
+	return 0;
+}
+
+static long ionic_phc_aux_work(struct ptp_clock_info *info)
+{
+	struct ionic_phc *phc = container_of(info, struct ionic_phc, ptp_info);
+	struct ionic_admin_ctx ctx = {};
+	unsigned long irqflags;
+	int err;
+
+	/* Do not update phc during device upgrade, but keep polling to resume
+	 * after upgrade.  Since we don't update the point in time basis, there
+	 * is no expectation that we are maintaining the phc time during the
+	 * upgrade.  After upgrade, it will need to be readjusted back to the
+	 * correct time by the ptp daemon.
+	 */
+	if (test_bit(IONIC_LIF_F_FW_RESET, phc->lif->state))
+		return phc->aux_work_delay;
+
+	spin_lock_irqsave(&phc->lock, irqflags);
+
+	/* update point-in-time basis to now */
+	timecounter_read(&phc->tc);
+
+	/* Setphc commands are posted in-order, sequenced by phc->lock.  We
+	 * need to drop the lock before waiting for the command to complete.
+	 */
+	err = ionic_setphc_cmd(phc, &ctx);
+
+	spin_unlock_irqrestore(&phc->lock, irqflags);
+
+	ionic_adminq_wait(phc->lif, &ctx, err);
+
+	return phc->aux_work_delay;
+}
+
+ktime_t ionic_lif_phc_ktime(struct ionic_lif *lif, u64 tick)
+{
+	unsigned long irqflags;
+	u64 ns;
+
+	if (!lif->phc)
+		return 0;
+
+	spin_lock_irqsave(&lif->phc->lock, irqflags);
+	ns = timecounter_cyc2time(&lif->phc->tc, tick);
+	spin_unlock_irqrestore(&lif->phc->lock, irqflags);
+
+	return ns_to_ktime(ns);
+}
+
+static const struct ptp_clock_info ionic_ptp_info = {
+	.owner		= THIS_MODULE,
+	.name		= "ionic_ptp",
+	.adjfine	= ionic_phc_adjfine,
+	.adjtime	= ionic_phc_adjtime,
+	.gettimex64	= ionic_phc_gettimex64,
+	.settime64	= ionic_phc_settime64,
+	.do_aux_work	= ionic_phc_aux_work,
+};
+
+void ionic_lif_register_phc(struct ionic_lif *lif)
+{
+	if (!lif->phc || !(lif->hw_features & IONIC_ETH_HW_TIMESTAMP))
+		return;
+
+	lif->phc->ptp = ptp_clock_register(&lif->phc->ptp_info, lif->ionic->dev);
+
+	if (IS_ERR(lif->phc->ptp)) {
+		dev_warn(lif->ionic->dev, "Cannot register phc device: %ld\n",
+			 PTR_ERR(lif->phc->ptp));
+
+		lif->phc->ptp = NULL;
+	}
+
+	if (lif->phc->ptp)
+		ptp_schedule_worker(lif->phc->ptp, lif->phc->aux_work_delay);
+}
+
+void ionic_lif_unregister_phc(struct ionic_lif *lif)
+{
+	if (!lif->phc || !lif->phc->ptp)
+		return;
+
+	ptp_clock_unregister(lif->phc->ptp);
+
+	lif->phc->ptp = NULL;
+}
+
+void ionic_lif_alloc_phc(struct ionic_lif *lif)
+{
+	struct ionic *ionic = lif->ionic;
+	struct ionic_phc *phc;
+	u64 delay, diff, mult;
+	u64 frac = 0;
+	u64 features;
+	u32 shift;
+
+	if (!ionic->idev.hwstamp_regs)
+		return;
+
+	features = le64_to_cpu(ionic->ident.lif.eth.config.features);
+	if (!(features & IONIC_ETH_HW_TIMESTAMP))
+		return;
+
+	phc = devm_kzalloc(ionic->dev, sizeof(*phc), GFP_KERNEL);
+	if (!phc)
+		return;
+
+	phc->lif = lif;
+
+	phc->cc.read = ionic_cc_read;
+	phc->cc.mask = le64_to_cpu(ionic->ident.dev.hwstamp_mask);
+	phc->cc.mult = le32_to_cpu(ionic->ident.dev.hwstamp_mult);
+	phc->cc.shift = le32_to_cpu(ionic->ident.dev.hwstamp_shift);
+
+	if (!phc->cc.mult) {
+		dev_err(lif->ionic->dev,
+			"Invalid device PHC mask multiplier %u, disabling HW timestamp support\n",
+			phc->cc.mult);
+		devm_kfree(lif->ionic->dev, phc);
+		lif->phc = NULL;
+		return;
+	}
+
+	dev_dbg(lif->ionic->dev, "Device PHC mask %#llx mult %u shift %u\n",
+		phc->cc.mask, phc->cc.mult, phc->cc.shift);
+
+	spin_lock_init(&phc->lock);
+	mutex_init(&phc->config_lock);
+
+	/* max ticks is limited by the multiplier, or by the update period. */
+	if (phc->cc.shift + 2 + ilog2(IONIC_PHC_UPDATE_NS) >= 64) {
+		/* max ticks that do not overflow when multiplied by max
+		 * adjusted multiplier (twice the initial multiplier)
+		 */
+		diff = U64_MAX / phc->cc.mult / 2;
+	} else {
+		/* approx ticks at four times the update period */
+		diff = (u64)IONIC_PHC_UPDATE_NS << (phc->cc.shift + 2);
+		diff = DIV_ROUND_UP(diff, phc->cc.mult);
+	}
+
+	/* transform to bitmask */
+	diff |= diff >> 1;
+	diff |= diff >> 2;
+	diff |= diff >> 4;
+	diff |= diff >> 8;
+	diff |= diff >> 16;
+	diff |= diff >> 32;
+
+	/* constrain to the hardware bitmask, and use this as the bitmask */
+	diff &= phc->cc.mask;
+	phc->cc.mask = diff;
+
+	/* the wrap period is now defined by diff (or phc->cc.mask)
+	 *
+	 * we will update the time basis at about 1/4 the wrap period, so
+	 * should not see a difference of more than +/- diff/4.
+	 *
+	 * this is sufficient not see a difference of more than +/- diff/2, as
+	 * required by timecounter_cyc2time, to detect an old time stamp.
+	 *
+	 * adjust the initial multiplier, being careful to avoid overflow:
+	 *  - do not overflow 63 bits: init_cc_mult * SCALED_PPM
+	 *  - do not overflow 64 bits: max_mult * (diff / 2)
+	 *
+	 * we want to increase the initial multiplier as much as possible, to
+	 * allow for more precise adjustment in ionic_phc_adjfine.
+	 *
+	 * only adjust the multiplier if we can double it or more.
+	 */
+	mult = U64_MAX / 2 / max(diff / 2, SCALED_PPM);
+	shift = mult / phc->cc.mult;
+	if (shift >= 2) {
+		/* initial multiplier will be 2^n of hardware cc.mult */
+		shift = fls(shift);
+		/* increase cc.mult and cc.shift by the same 2^n and n. */
+		phc->cc.mult <<= shift;
+		phc->cc.shift += shift;
+	}
+
+	dev_dbg(lif->ionic->dev, "Initial PHC mask %#llx mult %u shift %u\n",
+		phc->cc.mask, phc->cc.mult, phc->cc.shift);
+
+	/* frequency adjustments are relative to the initial multiplier */
+	phc->init_cc_mult = phc->cc.mult;
+
+	timecounter_init(&phc->tc, &phc->cc, ktime_get_real_ns());
+
+	/* Update cycle_last at 1/4 the wrap period, or IONIC_PHC_UPDATE_NS */
+	delay = min_t(u64, IONIC_PHC_UPDATE_NS,
+		      cyclecounter_cyc2ns(&phc->cc, diff / 4, 0, &frac));
+	dev_dbg(lif->ionic->dev, "Work delay %llu ms\n", delay / NSEC_PER_MSEC);
+
+	phc->aux_work_delay = nsecs_to_jiffies(delay);
+
+	phc->ptp_info = ionic_ptp_info;
+
+	/* We have allowed to adjust the multiplier up to +/- 1 part per 1.
+	 * Here expressed as NORMAL_PPB (1 billion parts per billion).
+	 */
+	phc->ptp_info.max_adj = NORMAL_PPB;
+
+	lif->phc = phc;
+}
+
+void ionic_lif_free_phc(struct ionic_lif *lif)
+{
+	if (!lif->phc)
+		return;
+
+	mutex_destroy(&lif->phc->config_lock);
+
+	devm_kfree(lif->ionic->dev, lif->phc);
+	lif->phc = NULL;
+}
-- 
cgit v1.2.3


From 61db421da31b6fb3767df46c9963e87f2a1d2418 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Thu, 1 Apr 2021 10:56:04 -0700
Subject: ionic: link in the new hw timestamp code

These are changes to compile and link the new code, but no
new feature support is available or advertised yet.

Signed-off-by: Allen Hubbe <allenbh@pensando.io>
Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/Makefile     |  1 +
 drivers/net/ethernet/pensando/ionic/ionic.h      |  4 ++
 drivers/net/ethernet/pensando/ionic/ionic_dev.c  |  2 +
 drivers/net/ethernet/pensando/ionic/ionic_dev.h  |  1 +
 drivers/net/ethernet/pensando/ionic/ionic_lif.c  | 25 ++++++++
 drivers/net/ethernet/pensando/ionic/ionic_lif.h  | 72 ++++++++++++++++++++++++
 drivers/net/ethernet/pensando/ionic/ionic_main.c |  2 +
 7 files changed, 107 insertions(+)

diff --git a/drivers/net/ethernet/pensando/ionic/Makefile b/drivers/net/ethernet/pensando/ionic/Makefile
index 8d3c2d3cb10d..4e7642a2d25f 100644
--- a/drivers/net/ethernet/pensando/ionic/Makefile
+++ b/drivers/net/ethernet/pensando/ionic/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_IONIC) := ionic.o
 ionic-y := ionic_main.o ionic_bus_pci.o ionic_devlink.o ionic_dev.o \
 	   ionic_debugfs.o ionic_lif.o ionic_rx_filter.o ionic_ethtool.o \
 	   ionic_txrx.o ionic_stats.o ionic_fw.o
+ionic-$(CONFIG_PTP_1588_CLOCK) += ionic_phc.o
diff --git a/drivers/net/ethernet/pensando/ionic/ionic.h b/drivers/net/ethernet/pensando/ionic/ionic.h
index 18e92103c711..66204106f83e 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic.h
@@ -20,6 +20,10 @@ struct ionic_lif;
 
 #define DEVCMD_TIMEOUT  10
 
+#define IONIC_PHC_UPDATE_NS	10000000000	    /* 10s in nanoseconds */
+#define NORMAL_PPB		1000000000	    /* one billion parts per billion */
+#define SCALED_PPM		(1000000ull << 16)  /* 2^16 million parts per 2^16 million */
+
 struct ionic_vf {
 	u16	 index;
 	u8	 macaddr[6];
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
index 0e8e88c69e1c..1dfe962e22e0 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
@@ -79,6 +79,8 @@ int ionic_dev_setup(struct ionic *ionic)
 	idev->intr_status = bar->vaddr + IONIC_BAR0_INTR_STATUS_OFFSET;
 	idev->intr_ctrl = bar->vaddr + IONIC_BAR0_INTR_CTRL_OFFSET;
 
+	idev->hwstamp_regs = &idev->dev_info_regs->hwstamp;
+
 	sig = ioread32(&idev->dev_info_regs->signature);
 	if (sig != IONIC_DEV_INFO_SIGNATURE) {
 		dev_err(dev, "Incompatible firmware signature %x", sig);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index 25c52c042246..28994e01fa0a 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -136,6 +136,7 @@ struct ionic_devinfo {
 struct ionic_dev {
 	union ionic_dev_info_regs __iomem *dev_info_regs;
 	union ionic_dev_cmd_regs __iomem *dev_cmd_regs;
+	struct ionic_hwstamp_regs __iomem *hwstamp_regs;
 
 	atomic_long_t last_check_time;
 	unsigned long last_hb_time;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 9a61b2bbb652..ced80de4d92a 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -830,6 +830,31 @@ static int ionic_lif_rxq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
 	return 0;
 }
 
+int ionic_lif_create_hwstamp_txq(struct ionic_lif *lif)
+{
+	return 0;
+}
+
+int ionic_lif_create_hwstamp_rxq(struct ionic_lif *lif)
+{
+	return 0;
+}
+
+int ionic_lif_config_hwstamp_rxq_all(struct ionic_lif *lif, bool rx_all)
+{
+	return 0;
+}
+
+int ionic_lif_set_hwstamp_txmode(struct ionic_lif *lif, u16 txstamp_mode)
+{
+	return 0;
+}
+
+int ionic_lif_set_hwstamp_rxfilt(struct ionic_lif *lif, u64 pkt_class)
+{
+	return 0;
+}
+
 static bool ionic_notifyq_service(struct ionic_cq *cq,
 				  struct ionic_cq_info *cq_info)
 {
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
index 93d3058aed77..ea3b086af179 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -4,6 +4,9 @@
 #ifndef _IONIC_LIF_H_
 #define _IONIC_LIF_H_
 
+#include <linux/ptp_clock_kernel.h>
+#include <linux/timecounter.h>
+#include <uapi/linux/net_tstamp.h>
 #include <linux/dim.h>
 #include <linux/pci.h>
 #include "ionic_rx_filter.h"
@@ -36,6 +39,8 @@ struct ionic_tx_stats {
 	u64 crc32_csum;
 	u64 sg_cntr[IONIC_MAX_NUM_SG_CNTR];
 	u64 dma_map_err;
+	u64 hwstamp_valid;
+	u64 hwstamp_invalid;
 };
 
 struct ionic_rx_stats {
@@ -49,6 +54,8 @@ struct ionic_rx_stats {
 	u64 csum_error;
 	u64 dma_map_err;
 	u64 alloc_err;
+	u64 hwstamp_valid;
+	u64 hwstamp_invalid;
 };
 
 #define IONIC_QCQ_F_INITED		BIT(0)
@@ -125,6 +132,10 @@ struct ionic_lif_sw_stats {
 	u64 rx_csum_none;
 	u64 rx_csum_complete;
 	u64 rx_csum_error;
+	u64 tx_hwstamp_valid;
+	u64 tx_hwstamp_invalid;
+	u64 rx_hwstamp_valid;
+	u64 rx_hwstamp_invalid;
 	u64 hw_tx_dropped;
 	u64 hw_rx_dropped;
 	u64 hw_rx_over_errors;
@@ -158,6 +169,8 @@ struct ionic_qtype_info {
 	u16 sg_desc_stride;
 };
 
+struct ionic_phc;
+
 #define IONIC_LIF_NAME_MAX_SZ		32
 struct ionic_lif {
 	struct net_device *netdev;
@@ -170,8 +183,10 @@ struct ionic_lif {
 	struct ionic_qcq *adminqcq;
 	struct ionic_qcq *notifyqcq;
 	struct ionic_qcq **txqcqs;
+	struct ionic_qcq *hwstamp_txq;
 	struct ionic_tx_stats *txqstats;
 	struct ionic_qcq **rxqcqs;
+	struct ionic_qcq *hwstamp_rxq;
 	struct ionic_rx_stats *rxqstats;
 	struct ionic_deferred deferred;
 	struct work_struct tx_timeout_work;
@@ -214,9 +229,29 @@ struct ionic_lif {
 	unsigned long *dbid_inuse;
 	unsigned int dbid_count;
 
+	struct ionic_phc *phc;
+
 	struct dentry *dentry;
 };
 
+struct ionic_phc {
+	spinlock_t lock; /* lock for cc and tc */
+	struct cyclecounter cc;
+	struct timecounter tc;
+
+	struct mutex config_lock; /* lock for ts_config */
+	struct hwtstamp_config ts_config;
+	u64 ts_config_rx_filt;
+	u32 ts_config_tx_mode;
+
+	u32 init_cc_mult;
+	long aux_work_delay;
+
+	struct ptp_clock_info ptp_info;
+	struct ptp_clock *ptp;
+	struct ionic_lif *lif;
+};
+
 struct ionic_queue_params {
 	unsigned int nxqs;
 	unsigned int ntxq_descs;
@@ -265,6 +300,43 @@ void ionic_lif_unregister(struct ionic_lif *lif);
 int ionic_lif_identify(struct ionic *ionic, u8 lif_type,
 		       union ionic_lif_identity *lif_ident);
 int ionic_lif_size(struct ionic *ionic);
+
+#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
+int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr);
+int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr);
+ktime_t ionic_lif_phc_ktime(struct ionic_lif *lif, u64 counter);
+void ionic_lif_register_phc(struct ionic_lif *lif);
+void ionic_lif_unregister_phc(struct ionic_lif *lif);
+void ionic_lif_alloc_phc(struct ionic_lif *lif);
+void ionic_lif_free_phc(struct ionic_lif *lif);
+#else
+static inline int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline ktime_t ionic_lif_phc_ktime(struct ionic_lif *lif, u64 counter)
+{
+	return ns_to_ktime(0);
+}
+
+static inline void ionic_lif_register_phc(struct ionic_lif *lif) {}
+static inline void ionic_lif_unregister_phc(struct ionic_lif *lif) {}
+static inline void ionic_lif_alloc_phc(struct ionic_lif *lif) {}
+static inline void ionic_lif_free_phc(struct ionic_lif *lif) {}
+#endif
+
+int ionic_lif_create_hwstamp_txq(struct ionic_lif *lif);
+int ionic_lif_create_hwstamp_rxq(struct ionic_lif *lif);
+int ionic_lif_config_hwstamp_rxq_all(struct ionic_lif *lif, bool rx_all);
+int ionic_lif_set_hwstamp_txmode(struct ionic_lif *lif, u16 txstamp_mode);
+int ionic_lif_set_hwstamp_rxfilt(struct ionic_lif *lif, u64 pkt_class);
+
 int ionic_lif_rss_config(struct ionic_lif *lif, u16 types,
 			 const u8 *key, const u32 *indir);
 int ionic_reconfigure_queues(struct ionic_lif *lif,
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
index 8c27fbe0e312..61cfe2120817 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -148,6 +148,8 @@ static const char *ionic_opcode_to_str(enum ionic_cmd_opcode opcode)
 		return "IONIC_CMD_LIF_SETATTR";
 	case IONIC_CMD_LIF_GETATTR:
 		return "IONIC_CMD_LIF_GETATTR";
+	case IONIC_CMD_LIF_SETPHC:
+		return "IONIC_CMD_LIF_SETPHC";
 	case IONIC_CMD_RX_MODE_SET:
 		return "IONIC_CMD_RX_MODE_SET";
 	case IONIC_CMD_RX_FILTER_ADD:
-- 
cgit v1.2.3


From ab470bbe7aba1f9cbed52965ccdc41652951f110 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Thu, 1 Apr 2021 10:56:05 -0700
Subject: ionic: add rx filtering for hw timestamp steering

Add handling of the new Rx packet classification filter type.
This simple bit of classification allows for steering packets
to a separate Rx queue for processing.

Signed-off-by: Allen Hubbe <allenbh@pensando.io>
Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/pensando/ionic/ionic_rx_filter.c   | 21 +++++++++++++++++++++
 .../net/ethernet/pensando/ionic/ionic_rx_filter.h   |  1 +
 2 files changed, 22 insertions(+)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c
index cd0076fc3044..d71316d9ded2 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c
@@ -140,6 +140,9 @@ int ionic_rx_filter_save(struct ionic_lif *lif, u32 flow_id, u16 rxq_index,
 	case IONIC_RX_FILTER_MATCH_MAC_VLAN:
 		key = le16_to_cpu(ac->mac_vlan.vlan);
 		break;
+	case IONIC_RX_FILTER_STEER_PKTCLASS:
+		key = 0;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -210,3 +213,21 @@ struct ionic_rx_filter *ionic_rx_filter_by_addr(struct ionic_lif *lif,
 
 	return NULL;
 }
+
+struct ionic_rx_filter *ionic_rx_filter_rxsteer(struct ionic_lif *lif)
+{
+	struct ionic_rx_filter *f;
+	struct hlist_head *head;
+	unsigned int key;
+
+	key = hash_32(0, IONIC_RX_FILTER_HASH_BITS);
+	head = &lif->rx_filters.by_hash[key];
+
+	hlist_for_each_entry(f, head, by_hash) {
+		if (le16_to_cpu(f->cmd.match) != IONIC_RX_FILTER_STEER_PKTCLASS)
+			continue;
+		return f;
+	}
+
+	return NULL;
+}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h
index cf8f4c0a961c..1ead48be3c83 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h
@@ -31,5 +31,6 @@ int ionic_rx_filter_save(struct ionic_lif *lif, u32 flow_id, u16 rxq_index,
 			 u32 hash, struct ionic_admin_ctx *ctx);
 struct ionic_rx_filter *ionic_rx_filter_by_vlan(struct ionic_lif *lif, u16 vid);
 struct ionic_rx_filter *ionic_rx_filter_by_addr(struct ionic_lif *lif, const u8 *addr);
+struct ionic_rx_filter *ionic_rx_filter_rxsteer(struct ionic_lif *lif);
 
 #endif /* _IONIC_RX_FILTER_H_ */
-- 
cgit v1.2.3


From f0790bcd36063f2850301982f167128139a51f62 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Thu, 1 Apr 2021 10:56:06 -0700
Subject: ionic: set up hw timestamp queues

We do hardware timestamping through a separate Tx queue,
and optionally through a separate Rx queue.  These queues
are allocated, freed, and tracked separately from the basic
queue arrays.

Signed-off-by: Allen Hubbe <allenbh@pensando.io>
Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_lif.c | 266 +++++++++++++++++++++++-
 1 file changed, 261 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index ced80de4d92a..26c066ed74c4 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -684,11 +684,11 @@ static int ionic_qcqs_alloc(struct ionic_lif *lif)
 	if (!lif->rxqcqs)
 		goto err_out;
 
-	lif->txqstats = devm_kcalloc(dev, lif->ionic->ntxqs_per_lif,
+	lif->txqstats = devm_kcalloc(dev, lif->ionic->ntxqs_per_lif + 1,
 				     sizeof(*lif->txqstats), GFP_KERNEL);
 	if (!lif->txqstats)
 		goto err_out;
-	lif->rxqstats = devm_kcalloc(dev, lif->ionic->nrxqs_per_lif,
+	lif->rxqstats = devm_kcalloc(dev, lif->ionic->nrxqs_per_lif + 1,
 				     sizeof(*lif->rxqstats), GFP_KERNEL);
 	if (!lif->rxqstats)
 		goto err_out;
@@ -832,27 +832,250 @@ static int ionic_lif_rxq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
 
 int ionic_lif_create_hwstamp_txq(struct ionic_lif *lif)
 {
+	unsigned int num_desc, desc_sz, comp_sz, sg_desc_sz;
+	unsigned int txq_i, flags;
+	struct ionic_qcq *txq;
+	u64 features;
+	int err;
+
+	mutex_lock(&lif->queue_lock);
+
+	if (lif->hwstamp_txq)
+		goto out;
+
+	features = IONIC_Q_F_2X_CQ_DESC | IONIC_TXQ_F_HWSTAMP;
+
+	num_desc = IONIC_MIN_TXRX_DESC;
+	desc_sz = sizeof(struct ionic_txq_desc);
+	comp_sz = 2 * sizeof(struct ionic_txq_comp);
+
+	if (lif->qtype_info[IONIC_QTYPE_TXQ].version >= 1 &&
+	    lif->qtype_info[IONIC_QTYPE_TXQ].sg_desc_sz == sizeof(struct ionic_txq_sg_desc_v1))
+		sg_desc_sz = sizeof(struct ionic_txq_sg_desc_v1);
+	else
+		sg_desc_sz = sizeof(struct ionic_txq_sg_desc);
+
+	txq_i = lif->ionic->ntxqs_per_lif;
+	flags = IONIC_QCQ_F_TX_STATS | IONIC_QCQ_F_SG;
+
+	err = ionic_qcq_alloc(lif, IONIC_QTYPE_TXQ, txq_i, "hwstamp_tx", flags,
+			      num_desc, desc_sz, comp_sz, sg_desc_sz,
+			      lif->kern_pid, &txq);
+	if (err)
+		goto err_qcq_alloc;
+
+	txq->q.features = features;
+
+	ionic_link_qcq_interrupts(lif->adminqcq, txq);
+	ionic_debugfs_add_qcq(lif, txq);
+
+	lif->hwstamp_txq = txq;
+
+	if (netif_running(lif->netdev)) {
+		err = ionic_lif_txq_init(lif, txq);
+		if (err)
+			goto err_qcq_init;
+
+		if (test_bit(IONIC_LIF_F_UP, lif->state)) {
+			err = ionic_qcq_enable(txq);
+			if (err)
+				goto err_qcq_enable;
+		}
+	}
+
+out:
+	mutex_unlock(&lif->queue_lock);
+
 	return 0;
+
+err_qcq_enable:
+	ionic_lif_qcq_deinit(lif, txq);
+err_qcq_init:
+	lif->hwstamp_txq = NULL;
+	ionic_debugfs_del_qcq(txq);
+	ionic_qcq_free(lif, txq);
+	devm_kfree(lif->ionic->dev, txq);
+err_qcq_alloc:
+	mutex_unlock(&lif->queue_lock);
+	return err;
 }
 
 int ionic_lif_create_hwstamp_rxq(struct ionic_lif *lif)
 {
+	unsigned int num_desc, desc_sz, comp_sz, sg_desc_sz;
+	unsigned int rxq_i, flags;
+	struct ionic_qcq *rxq;
+	u64 features;
+	int err;
+
+	mutex_lock(&lif->queue_lock);
+
+	if (lif->hwstamp_rxq)
+		goto out;
+
+	features = IONIC_Q_F_2X_CQ_DESC | IONIC_RXQ_F_HWSTAMP;
+
+	num_desc = IONIC_MIN_TXRX_DESC;
+	desc_sz = sizeof(struct ionic_rxq_desc);
+	comp_sz = 2 * sizeof(struct ionic_rxq_comp);
+	sg_desc_sz = sizeof(struct ionic_rxq_sg_desc);
+
+	rxq_i = lif->ionic->nrxqs_per_lif;
+	flags = IONIC_QCQ_F_RX_STATS | IONIC_QCQ_F_SG;
+
+	err = ionic_qcq_alloc(lif, IONIC_QTYPE_RXQ, rxq_i, "hwstamp_rx", flags,
+			      num_desc, desc_sz, comp_sz, sg_desc_sz,
+			      lif->kern_pid, &rxq);
+	if (err)
+		goto err_qcq_alloc;
+
+	rxq->q.features = features;
+
+	ionic_link_qcq_interrupts(lif->adminqcq, rxq);
+	ionic_debugfs_add_qcq(lif, rxq);
+
+	lif->hwstamp_rxq = rxq;
+
+	if (netif_running(lif->netdev)) {
+		err = ionic_lif_rxq_init(lif, rxq);
+		if (err)
+			goto err_qcq_init;
+
+		if (test_bit(IONIC_LIF_F_UP, lif->state)) {
+			ionic_rx_fill(&rxq->q);
+			err = ionic_qcq_enable(rxq);
+			if (err)
+				goto err_qcq_enable;
+		}
+	}
+
+out:
+	mutex_unlock(&lif->queue_lock);
+
 	return 0;
+
+err_qcq_enable:
+	ionic_lif_qcq_deinit(lif, rxq);
+err_qcq_init:
+	lif->hwstamp_rxq = NULL;
+	ionic_debugfs_del_qcq(rxq);
+	ionic_qcq_free(lif, rxq);
+	devm_kfree(lif->ionic->dev, rxq);
+err_qcq_alloc:
+	mutex_unlock(&lif->queue_lock);
+	return err;
 }
 
 int ionic_lif_config_hwstamp_rxq_all(struct ionic_lif *lif, bool rx_all)
 {
-	return 0;
+	struct ionic_queue_params qparam;
+
+	ionic_init_queue_params(lif, &qparam);
+
+	if (rx_all)
+		qparam.rxq_features = IONIC_Q_F_2X_CQ_DESC | IONIC_RXQ_F_HWSTAMP;
+	else
+		qparam.rxq_features = 0;
+
+	/* if we're not running, just set the values and return */
+	if (!netif_running(lif->netdev)) {
+		lif->rxq_features = qparam.rxq_features;
+		return 0;
+	}
+
+	return ionic_reconfigure_queues(lif, &qparam);
 }
 
 int ionic_lif_set_hwstamp_txmode(struct ionic_lif *lif, u16 txstamp_mode)
 {
-	return 0;
+	struct ionic_admin_ctx ctx = {
+		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+		.cmd.lif_setattr = {
+			.opcode = IONIC_CMD_LIF_SETATTR,
+			.index = cpu_to_le16(lif->index),
+			.attr = IONIC_LIF_ATTR_TXSTAMP,
+			.txstamp_mode = cpu_to_le16(txstamp_mode),
+		},
+	};
+
+	return ionic_adminq_post_wait(lif, &ctx);
+}
+
+static void ionic_lif_del_hwstamp_rxfilt(struct ionic_lif *lif)
+{
+	struct ionic_admin_ctx ctx = {
+		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+		.cmd.rx_filter_del = {
+			.opcode = IONIC_CMD_RX_FILTER_DEL,
+			.lif_index = cpu_to_le16(lif->index),
+		},
+	};
+	struct ionic_rx_filter *f;
+	u32 filter_id;
+	int err;
+
+	spin_lock_bh(&lif->rx_filters.lock);
+
+	f = ionic_rx_filter_rxsteer(lif);
+	if (!f) {
+		spin_unlock_bh(&lif->rx_filters.lock);
+		return;
+	}
+
+	filter_id = f->filter_id;
+	ionic_rx_filter_free(lif, f);
+
+	spin_unlock_bh(&lif->rx_filters.lock);
+
+	netdev_dbg(lif->netdev, "rx_filter del RXSTEER (id %d)\n", filter_id);
+
+	ctx.cmd.rx_filter_del.filter_id = cpu_to_le32(filter_id);
+
+	err = ionic_adminq_post_wait(lif, &ctx);
+	if (err && err != -EEXIST)
+		netdev_dbg(lif->netdev, "failed to delete rx_filter RXSTEER (id %d)\n", filter_id);
+}
+
+static int ionic_lif_add_hwstamp_rxfilt(struct ionic_lif *lif, u64 pkt_class)
+{
+	struct ionic_admin_ctx ctx = {
+		.work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+		.cmd.rx_filter_add = {
+			.opcode = IONIC_CMD_RX_FILTER_ADD,
+			.lif_index = cpu_to_le16(lif->index),
+			.match = cpu_to_le16(IONIC_RX_FILTER_STEER_PKTCLASS),
+			.pkt_class = cpu_to_le64(pkt_class),
+		},
+	};
+	u8 qtype;
+	u32 qid;
+	int err;
+
+	if (!lif->hwstamp_rxq)
+		return -EINVAL;
+
+	qtype = lif->hwstamp_rxq->q.type;
+	ctx.cmd.rx_filter_add.qtype = qtype;
+
+	qid = lif->hwstamp_rxq->q.index;
+	ctx.cmd.rx_filter_add.qid = cpu_to_le32(qid);
+
+	netdev_dbg(lif->netdev, "rx_filter add RXSTEER\n");
+	err = ionic_adminq_post_wait(lif, &ctx);
+	if (err && err != -EEXIST)
+		return err;
+
+	return ionic_rx_filter_save(lif, 0, qid, 0, &ctx);
 }
 
 int ionic_lif_set_hwstamp_rxfilt(struct ionic_lif *lif, u64 pkt_class)
 {
-	return 0;
+	ionic_lif_del_hwstamp_rxfilt(lif);
+
+	if (!pkt_class)
+		return 0;
+
+	return ionic_lif_add_hwstamp_rxfilt(lif, pkt_class);
 }
 
 static bool ionic_notifyq_service(struct ionic_cq *cq,
@@ -1695,11 +1918,17 @@ static void ionic_txrx_disable(struct ionic_lif *lif)
 			err = ionic_qcq_disable(lif->txqcqs[i], (err != -ETIMEDOUT));
 	}
 
+	if (lif->hwstamp_txq)
+		err = ionic_qcq_disable(lif->hwstamp_txq, (err != -ETIMEDOUT));
+
 	if (lif->rxqcqs) {
 		for (i = 0; i < lif->nxqs; i++)
 			err = ionic_qcq_disable(lif->rxqcqs[i], (err != -ETIMEDOUT));
 	}
 
+	if (lif->hwstamp_rxq)
+		err = ionic_qcq_disable(lif->hwstamp_rxq, (err != -ETIMEDOUT));
+
 	ionic_lif_quiesce(lif);
 }
 
@@ -1743,6 +1972,18 @@ static void ionic_txrx_free(struct ionic_lif *lif)
 			lif->rxqcqs[i] = NULL;
 		}
 	}
+
+	if (lif->hwstamp_txq) {
+		ionic_qcq_free(lif, lif->hwstamp_txq);
+		devm_kfree(lif->ionic->dev, lif->hwstamp_txq);
+		lif->hwstamp_txq = NULL;
+	}
+
+	if (lif->hwstamp_rxq) {
+		ionic_qcq_free(lif, lif->hwstamp_rxq);
+		devm_kfree(lif->ionic->dev, lif->hwstamp_rxq);
+		lif->hwstamp_rxq = NULL;
+	}
 }
 
 static int ionic_txrx_alloc(struct ionic_lif *lif)
@@ -2587,6 +2828,8 @@ int ionic_lif_alloc(struct ionic *ionic)
 	}
 	netdev_rss_key_fill(lif->rss_hash_key, IONIC_RSS_HASH_KEY_SIZE);
 
+	ionic_lif_alloc_phc(lif);
+
 	return 0;
 
 err_out_free_qcqs:
@@ -2707,6 +2950,8 @@ void ionic_lif_free(struct ionic_lif *lif)
 {
 	struct device *dev = lif->ionic->dev;
 
+	ionic_lif_free_phc(lif);
+
 	/* free rss indirection table */
 	dma_free_coherent(dev, lif->rss_ind_tbl_sz, lif->rss_ind_tbl,
 			  lif->rss_ind_tbl_pa);
@@ -3075,6 +3320,7 @@ void ionic_lif_unregister(struct ionic_lif *lif)
 
 	if (lif->netdev->reg_state == NETREG_REGISTERED)
 		unregister_netdev(lif->netdev);
+
 	lif->registered = false;
 }
 
@@ -3214,6 +3460,16 @@ int ionic_lif_size(struct ionic *ionic)
 	ntxqs_per_lif = le32_to_cpu(lc->queue_count[IONIC_QTYPE_TXQ]);
 	nrxqs_per_lif = le32_to_cpu(lc->queue_count[IONIC_QTYPE_RXQ]);
 
+	/* reserve last queue id for hardware timestamping */
+	if (lc->features & cpu_to_le64(IONIC_ETH_HW_TIMESTAMP)) {
+		if (ntxqs_per_lif <= 1 || nrxqs_per_lif <= 1) {
+			lc->features &= cpu_to_le64(~IONIC_ETH_HW_TIMESTAMP);
+		} else {
+			ntxqs_per_lif -= 1;
+			nrxqs_per_lif -= 1;
+		}
+	}
+
 	nxqs = min(ntxqs_per_lif, nrxqs_per_lif);
 	nxqs = min(nxqs, num_online_cpus());
 	neqs = min(neqs_per_lif, num_online_cpus());
-- 
cgit v1.2.3


From a8771bfe05549f3068532c60e3682441ff8159fd Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Thu, 1 Apr 2021 10:56:07 -0700
Subject: ionic: add and enable tx and rx timestamp handling

The Tx and Rx timestamped packets are handled through separate
queues.  Here we set them up, service them, and tear them down
along with the normal Tx and Rx queues.

Signed-off-by: Allen Hubbe <allenbh@pensando.io>
Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_dev.h  |   2 +-
 drivers/net/ethernet/pensando/ionic/ionic_lif.c  |  51 ++++++++-
 drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 126 +++++++++++++++++++----
 drivers/net/ethernet/pensando/ionic/ionic_txrx.h |   3 +
 4 files changed, 157 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index 28994e01fa0a..c25cf9b744c5 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -220,11 +220,11 @@ struct ionic_queue {
 	unsigned int index;
 	unsigned int num_descs;
 	unsigned int max_sg_elems;
+	u64 features;
 	u64 dbell_count;
 	u64 stop;
 	u64 wake;
 	u64 drop;
-	u64 features;
 	struct ionic_dev *idev;
 	unsigned int type;
 	unsigned int hw_index;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 26c066ed74c4..e14c93fbbd68 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -1145,9 +1145,12 @@ static int ionic_adminq_napi(struct napi_struct *napi, int budget)
 	struct ionic_dev *idev = &lif->ionic->idev;
 	unsigned long irqflags;
 	unsigned int flags = 0;
+	int rx_work = 0;
+	int tx_work = 0;
 	int n_work = 0;
 	int a_work = 0;
 	int work_done;
+	int credits;
 
 	if (lif->notifyqcq && lif->notifyqcq->flags & IONIC_QCQ_F_INITED)
 		n_work = ionic_cq_service(&lif->notifyqcq->cq, budget,
@@ -1159,7 +1162,15 @@ static int ionic_adminq_napi(struct napi_struct *napi, int budget)
 					  ionic_adminq_service, NULL, NULL);
 	spin_unlock_irqrestore(&lif->adminq_lock, irqflags);
 
-	work_done = max(n_work, a_work);
+	if (lif->hwstamp_rxq)
+		rx_work = ionic_cq_service(&lif->hwstamp_rxq->cq, budget,
+					   ionic_rx_service, NULL, NULL);
+
+	if (lif->hwstamp_txq)
+		tx_work = ionic_cq_service(&lif->hwstamp_txq->cq, budget,
+					   ionic_tx_service, NULL, NULL);
+
+	work_done = max(max(n_work, a_work), max(rx_work, tx_work));
 	if (work_done < budget && napi_complete_done(napi, work_done)) {
 		flags |= IONIC_INTR_CRED_UNMASK;
 		intr->rearm_count++;
@@ -1167,9 +1178,8 @@ static int ionic_adminq_napi(struct napi_struct *napi, int budget)
 
 	if (work_done || flags) {
 		flags |= IONIC_INTR_CRED_RESET_COALESCE;
-		ionic_intr_credits(idev->intr_ctrl,
-				   intr->index,
-				   n_work + a_work, flags);
+		credits = n_work + a_work + rx_work + tx_work;
+		ionic_intr_credits(idev->intr_ctrl, intr->index, credits, flags);
 	}
 
 	return work_done;
@@ -1529,6 +1539,7 @@ static int ionic_set_nic_features(struct ionic_lif *lif,
 	int err;
 
 	ctx.cmd.lif_setattr.features = ionic_netdev_features_to_nic(features);
+
 	err = ionic_adminq_post_wait(lif, &ctx);
 	if (err)
 		return err;
@@ -1951,6 +1962,17 @@ static void ionic_txrx_deinit(struct ionic_lif *lif)
 		}
 	}
 	lif->rx_mode = 0;
+
+	if (lif->hwstamp_txq) {
+		ionic_lif_qcq_deinit(lif, lif->hwstamp_txq);
+		ionic_tx_flush(&lif->hwstamp_txq->cq);
+		ionic_tx_empty(&lif->hwstamp_txq->q);
+	}
+
+	if (lif->hwstamp_rxq) {
+		ionic_lif_qcq_deinit(lif, lif->hwstamp_rxq);
+		ionic_rx_empty(&lif->hwstamp_rxq->q);
+	}
 }
 
 static void ionic_txrx_free(struct ionic_lif *lif)
@@ -2122,8 +2144,26 @@ static int ionic_txrx_enable(struct ionic_lif *lif)
 		}
 	}
 
+	if (lif->hwstamp_rxq) {
+		ionic_rx_fill(&lif->hwstamp_rxq->q);
+		err = ionic_qcq_enable(lif->hwstamp_rxq);
+		if (err)
+			goto err_out_hwstamp_rx;
+	}
+
+	if (lif->hwstamp_txq) {
+		err = ionic_qcq_enable(lif->hwstamp_txq);
+		if (err)
+			goto err_out_hwstamp_tx;
+	}
+
 	return 0;
 
+err_out_hwstamp_tx:
+	if (lif->hwstamp_rxq)
+		derr = ionic_qcq_disable(lif->hwstamp_rxq, (derr != -ETIMEDOUT));
+err_out_hwstamp_rx:
+	i = lif->nxqs;
 err_out:
 	while (i--) {
 		derr = ionic_qcq_disable(lif->txqcqs[i], (derr != -ETIMEDOUT));
@@ -2929,6 +2969,9 @@ static void ionic_lif_handle_fw_up(struct ionic_lif *lif)
 			goto err_txrx_free;
 	}
 
+	/* restore the hardware timestamping queues */
+	ionic_lif_hwstamp_set(lif, NULL);
+
 	clear_bit(IONIC_LIF_F_FW_RESET, lif->state);
 	ionic_link_status_check_request(lif, CAN_SLEEP);
 	netif_device_attach(lif->netdev);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 4859120eec72..3478b0f2495f 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -11,8 +11,6 @@
 #include "ionic_txrx.h"
 
 
-static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info);
-
 static inline void ionic_txq_post(struct ionic_queue *q, bool ring_dbell,
 				  ionic_desc_cb cb_func, void *cb_arg)
 {
@@ -298,13 +296,33 @@ static void ionic_rx_clean(struct ionic_queue *q,
 		stats->vlan_stripped++;
 	}
 
+	if (unlikely(q->features & IONIC_RXQ_F_HWSTAMP)) {
+		__le64 *cq_desc_hwstamp;
+		u64 hwstamp;
+
+		cq_desc_hwstamp =
+			cq_info->cq_desc +
+			qcq->cq.desc_size -
+			sizeof(struct ionic_rxq_comp) -
+			IONIC_HWSTAMP_CQ_NEGOFFSET;
+
+		hwstamp = le64_to_cpu(*cq_desc_hwstamp);
+
+		if (hwstamp != IONIC_HWSTAMP_INVALID) {
+			skb_hwtstamps(skb)->hwtstamp = ionic_lif_phc_ktime(q->lif, hwstamp);
+			stats->hwstamp_valid++;
+		} else {
+			stats->hwstamp_invalid++;
+		}
+	}
+
 	if (le16_to_cpu(comp->len) <= q->lif->rx_copybreak)
 		napi_gro_receive(&qcq->napi, skb);
 	else
 		napi_gro_frags(&qcq->napi);
 }
 
-static bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
+bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
 {
 	struct ionic_queue *q = cq->bound_q;
 	struct ionic_desc_info *desc_info;
@@ -665,9 +683,11 @@ static void ionic_tx_clean(struct ionic_queue *q,
 {
 	struct ionic_buf_info *buf_info = desc_info->bufs;
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
+	struct ionic_qcq *qcq = q_to_qcq(q);
+	struct sk_buff *skb = cb_arg;
 	struct device *dev = q->dev;
-	u16 queue_index;
 	unsigned int i;
+	u16 qi;
 
 	if (desc_info->nbufs) {
 		dma_unmap_single(dev, (dma_addr_t)buf_info->dma_addr,
@@ -678,24 +698,49 @@ static void ionic_tx_clean(struct ionic_queue *q,
 				       buf_info->len, DMA_TO_DEVICE);
 	}
 
-	if (cb_arg) {
-		struct sk_buff *skb = cb_arg;
+	if (!skb)
+		return;
 
-		queue_index = skb_get_queue_mapping(skb);
-		if (unlikely(__netif_subqueue_stopped(q->lif->netdev,
-						      queue_index))) {
-			netif_wake_subqueue(q->lif->netdev, queue_index);
-			q->wake++;
-		}
+	qi = skb_get_queue_mapping(skb);
+
+	if (unlikely(q->features & IONIC_TXQ_F_HWSTAMP)) {
+		if (cq_info) {
+			struct skb_shared_hwtstamps hwts = {};
+			__le64 *cq_desc_hwstamp;
+			u64 hwstamp;
+
+			cq_desc_hwstamp =
+				cq_info->cq_desc +
+				qcq->cq.desc_size -
+				sizeof(struct ionic_txq_comp) -
+				IONIC_HWSTAMP_CQ_NEGOFFSET;
+
+			hwstamp = le64_to_cpu(*cq_desc_hwstamp);
 
-		desc_info->bytes = skb->len;
-		stats->clean++;
+			if (hwstamp != IONIC_HWSTAMP_INVALID) {
+				hwts.hwtstamp = ionic_lif_phc_ktime(q->lif, hwstamp);
 
-		dev_consume_skb_any(skb);
+				skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+				skb_tstamp_tx(skb, &hwts);
+
+				stats->hwstamp_valid++;
+			} else {
+				stats->hwstamp_invalid++;
+			}
+		}
+
+	} else if (unlikely(__netif_subqueue_stopped(q->lif->netdev, qi))) {
+		netif_wake_subqueue(q->lif->netdev, qi);
+		q->wake++;
 	}
+
+	desc_info->bytes = skb->len;
+	stats->clean++;
+
+	dev_consume_skb_any(skb);
 }
 
-static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
+bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
 {
 	struct ionic_queue *q = cq->bound_q;
 	struct ionic_desc_info *desc_info;
@@ -726,7 +771,7 @@ static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
 		desc_info->cb_arg = NULL;
 	} while (index != le16_to_cpu(comp->comp_index));
 
-	if (pkts && bytes)
+	if (pkts && bytes && !unlikely(q->features & IONIC_TXQ_F_HWSTAMP))
 		netdev_tx_completed_queue(q_to_ndq(q), pkts, bytes);
 
 	return true;
@@ -764,7 +809,7 @@ void ionic_tx_empty(struct ionic_queue *q)
 		desc_info->cb_arg = NULL;
 	}
 
-	if (pkts && bytes)
+	if (pkts && bytes && !unlikely(q->features & IONIC_TXQ_F_HWSTAMP))
 		netdev_tx_completed_queue(q_to_ndq(q), pkts, bytes);
 }
 
@@ -838,7 +883,8 @@ static void ionic_tx_tso_post(struct ionic_queue *q, struct ionic_txq_desc *desc
 
 	if (start) {
 		skb_tx_timestamp(skb);
-		netdev_tx_sent_queue(q_to_ndq(q), skb->len);
+		if (!unlikely(q->features & IONIC_TXQ_F_HWSTAMP))
+			netdev_tx_sent_queue(q_to_ndq(q), skb->len);
 		ionic_txq_post(q, false, ionic_tx_clean, skb);
 	} else {
 		ionic_txq_post(q, done, NULL, NULL);
@@ -1085,7 +1131,8 @@ static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb)
 	stats->pkts++;
 	stats->bytes += skb->len;
 
-	netdev_tx_sent_queue(q_to_ndq(q), skb->len);
+	if (!unlikely(q->features & IONIC_TXQ_F_HWSTAMP))
+		netdev_tx_sent_queue(q_to_ndq(q), skb->len);
 	ionic_txq_post(q, !netdev_xmit_more(), ionic_tx_clean, skb);
 
 	return 0;
@@ -1137,6 +1184,41 @@ static int ionic_maybe_stop_tx(struct ionic_queue *q, int ndescs)
 	return stopped;
 }
 
+static netdev_tx_t ionic_start_hwstamp_xmit(struct sk_buff *skb,
+					    struct net_device *netdev)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic_queue *q = &lif->hwstamp_txq->q;
+	int err, ndescs;
+
+	/* Does not stop/start txq, because we post to a separate tx queue
+	 * for timestamping, and if a packet can't be posted immediately to
+	 * the timestamping queue, it is dropped.
+	 */
+
+	ndescs = ionic_tx_descs_needed(q, skb);
+	if (unlikely(ndescs < 0))
+		goto err_out_drop;
+
+	if (unlikely(!ionic_q_has_space(q, ndescs)))
+		goto err_out_drop;
+
+	if (skb_is_gso(skb))
+		err = ionic_tx_tso(q, skb);
+	else
+		err = ionic_tx(q, skb);
+
+	if (err)
+		goto err_out_drop;
+
+	return NETDEV_TX_OK;
+
+err_out_drop:
+	q->drop++;
+	dev_kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
 netdev_tx_t ionic_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
 	u16 queue_index = skb_get_queue_mapping(skb);
@@ -1150,6 +1232,10 @@ netdev_tx_t ionic_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 		return NETDEV_TX_OK;
 	}
 
+	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+		if (lif->hwstamp_txq)
+			return ionic_start_hwstamp_xmit(skb, netdev);
+
 	if (unlikely(queue_index >= lif->nxqs))
 		queue_index = 0;
 	q = &lif->txqcqs[queue_index]->q;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.h b/drivers/net/ethernet/pensando/ionic/ionic_txrx.h
index 7667b72232b8..d7cbaad8a6fb 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.h
@@ -14,4 +14,7 @@ int ionic_tx_napi(struct napi_struct *napi, int budget);
 int ionic_txrx_napi(struct napi_struct *napi, int budget);
 netdev_tx_t ionic_start_xmit(struct sk_buff *skb, struct net_device *netdev);
 
+bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info);
+bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info);
+
 #endif /* _IONIC_TXRX_H_ */
-- 
cgit v1.2.3


From f8ba81da73fc56b693dc5d91c767c882618e0004 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Thu, 1 Apr 2021 10:56:08 -0700
Subject: ionic: add ethtool support for PTP

Add the get_ts_info() callback for ethtool support of
timestamping information.

Signed-off-by: Allen Hubbe <allenbh@pensando.io>
Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/pensando/ionic/ionic_ethtool.c    | 93 ++++++++++++++++++++++
 1 file changed, 93 insertions(+)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
index b1e78b452fad..71db1e2c7d8a 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
@@ -849,6 +849,98 @@ static int ionic_get_module_eeprom(struct net_device *netdev,
 	return 0;
 }
 
+static int ionic_get_ts_info(struct net_device *netdev,
+			     struct ethtool_ts_info *info)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+	struct ionic *ionic = lif->ionic;
+	__le64 mask;
+
+	if (!lif->phc || !lif->phc->ptp)
+		return ethtool_op_get_ts_info(netdev, info);
+
+	info->phc_index = ptp_clock_index(lif->phc->ptp);
+
+	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
+				SOF_TIMESTAMPING_RX_SOFTWARE |
+				SOF_TIMESTAMPING_SOFTWARE |
+				SOF_TIMESTAMPING_TX_HARDWARE |
+				SOF_TIMESTAMPING_RX_HARDWARE |
+				SOF_TIMESTAMPING_RAW_HARDWARE;
+
+	/* tx modes */
+
+	info->tx_types = BIT(HWTSTAMP_TX_OFF) |
+			 BIT(HWTSTAMP_TX_ON);
+
+	mask = cpu_to_le64(BIT_ULL(IONIC_TXSTAMP_ONESTEP_SYNC));
+	if (ionic->ident.lif.eth.hwstamp_tx_modes & mask)
+		info->tx_types |= BIT(HWTSTAMP_TX_ONESTEP_SYNC);
+
+	mask = cpu_to_le64(BIT_ULL(IONIC_TXSTAMP_ONESTEP_P2P));
+	if (ionic->ident.lif.eth.hwstamp_tx_modes & mask)
+		info->tx_types |= BIT(HWTSTAMP_TX_ONESTEP_P2P);
+
+	/* rx filters */
+
+	info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) |
+			   BIT(HWTSTAMP_FILTER_ALL);
+
+	mask = cpu_to_le64(IONIC_PKT_CLS_NTP_ALL);
+	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask)
+		info->rx_filters |= HWTSTAMP_FILTER_NTP_ALL;
+
+	mask = cpu_to_le64(IONIC_PKT_CLS_PTP1_SYNC);
+	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask)
+		info->rx_filters |= HWTSTAMP_FILTER_PTP_V1_L4_SYNC;
+
+	mask = cpu_to_le64(IONIC_PKT_CLS_PTP1_DREQ);
+	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask)
+		info->rx_filters |= HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ;
+
+	mask = cpu_to_le64(IONIC_PKT_CLS_PTP1_ALL);
+	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask)
+		info->rx_filters |= HWTSTAMP_FILTER_PTP_V1_L4_EVENT;
+
+	mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_L4_SYNC);
+	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask)
+		info->rx_filters |= HWTSTAMP_FILTER_PTP_V2_L4_SYNC;
+
+	mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_L4_DREQ);
+	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask)
+		info->rx_filters |= HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ;
+
+	mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_L4_ALL);
+	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask)
+		info->rx_filters |= HWTSTAMP_FILTER_PTP_V2_L4_EVENT;
+
+	mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_L2_SYNC);
+	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask)
+		info->rx_filters |= HWTSTAMP_FILTER_PTP_V2_L2_SYNC;
+
+	mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_L2_DREQ);
+	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask)
+		info->rx_filters |= HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ;
+
+	mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_L2_ALL);
+	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask)
+		info->rx_filters |= HWTSTAMP_FILTER_PTP_V2_L2_EVENT;
+
+	mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_SYNC);
+	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask)
+		info->rx_filters |= HWTSTAMP_FILTER_PTP_V2_SYNC;
+
+	mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_DREQ);
+	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask)
+		info->rx_filters |= HWTSTAMP_FILTER_PTP_V2_DELAY_REQ;
+
+	mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_ALL);
+	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask)
+		info->rx_filters |= HWTSTAMP_FILTER_PTP_V2_EVENT;
+
+	return 0;
+}
+
 static int ionic_nway_reset(struct net_device *netdev)
 {
 	struct ionic_lif *lif = netdev_priv(netdev);
@@ -906,6 +998,7 @@ static const struct ethtool_ops ionic_ethtool_ops = {
 	.set_pauseparam		= ionic_set_pauseparam,
 	.get_fecparam		= ionic_get_fecparam,
 	.set_fecparam		= ionic_set_fecparam,
+	.get_ts_info		= ionic_get_ts_info,
 	.nway_reset		= ionic_nway_reset,
 };
 
-- 
cgit v1.2.3


From 196f56c07f91fb24d33c5026036d97d3d3154dd2 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Thu, 1 Apr 2021 10:56:09 -0700
Subject: ionic: ethtool ptp stats

Add the new hwstamp stats to our ethtool stats output.

Signed-off-by: Allen Hubbe <allenbh@pensando.io>
Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_stats.c | 38 ++++++++++++++++++++---
 1 file changed, 34 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_stats.c b/drivers/net/ethernet/pensando/ionic/ionic_stats.c
index ed9cf93d9acd..58a854666c62 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_stats.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_stats.c
@@ -130,6 +130,8 @@ static const struct ionic_stat_desc ionic_tx_stats_desc[] = {
 	IONIC_TX_STAT_DESC(frags),
 	IONIC_TX_STAT_DESC(tso),
 	IONIC_TX_STAT_DESC(tso_bytes),
+	IONIC_TX_STAT_DESC(hwstamp_valid),
+	IONIC_TX_STAT_DESC(hwstamp_invalid),
 	IONIC_TX_STAT_DESC(csum_none),
 	IONIC_TX_STAT_DESC(csum),
 	IONIC_TX_STAT_DESC(vlan_inserted),
@@ -143,6 +145,8 @@ static const struct ionic_stat_desc ionic_rx_stats_desc[] = {
 	IONIC_RX_STAT_DESC(csum_none),
 	IONIC_RX_STAT_DESC(csum_complete),
 	IONIC_RX_STAT_DESC(csum_error),
+	IONIC_RX_STAT_DESC(hwstamp_valid),
+	IONIC_RX_STAT_DESC(hwstamp_invalid),
 	IONIC_RX_STAT_DESC(dropped),
 	IONIC_RX_STAT_DESC(vlan_stripped),
 };
@@ -188,6 +192,8 @@ static void ionic_add_lif_txq_stats(struct ionic_lif *lif, int q_num,
 	stats->tx_tso_bytes += txstats->tso_bytes;
 	stats->tx_csum_none += txstats->csum_none;
 	stats->tx_csum += txstats->csum;
+	stats->tx_hwstamp_valid += txstats->hwstamp_valid;
+	stats->tx_hwstamp_invalid += txstats->hwstamp_invalid;
 }
 
 static void ionic_add_lif_rxq_stats(struct ionic_lif *lif, int q_num,
@@ -200,6 +206,8 @@ static void ionic_add_lif_rxq_stats(struct ionic_lif *lif, int q_num,
 	stats->rx_csum_none += rxstats->csum_none;
 	stats->rx_csum_complete += rxstats->csum_complete;
 	stats->rx_csum_error += rxstats->csum_error;
+	stats->rx_hwstamp_valid += rxstats->hwstamp_valid;
+	stats->rx_hwstamp_invalid += rxstats->hwstamp_invalid;
 }
 
 static void ionic_get_lif_stats(struct ionic_lif *lif,
@@ -215,6 +223,12 @@ static void ionic_get_lif_stats(struct ionic_lif *lif,
 		ionic_add_lif_rxq_stats(lif, q_num, stats);
 	}
 
+	if (lif->hwstamp_txq)
+		ionic_add_lif_txq_stats(lif, lif->hwstamp_txq->q.index, stats);
+
+	if (lif->hwstamp_rxq)
+		ionic_add_lif_rxq_stats(lif, lif->hwstamp_rxq->q.index, stats);
+
 	ionic_get_stats64(lif->netdev, &ns);
 	stats->hw_tx_dropped = ns.tx_dropped;
 	stats->hw_rx_dropped = ns.rx_dropped;
@@ -227,14 +241,18 @@ static u64 ionic_sw_stats_get_count(struct ionic_lif *lif)
 {
 	u64 total = 0, tx_queues = MAX_Q(lif), rx_queues = MAX_Q(lif);
 
-	/* lif stats */
+	if (lif->hwstamp_txq)
+		tx_queues += 1;
+
+	if (lif->hwstamp_rxq)
+		rx_queues += 1;
+
 	total += IONIC_NUM_LIF_STATS;
+	total += IONIC_NUM_PORT_STATS;
+
 	total += tx_queues * IONIC_NUM_TX_STATS;
 	total += rx_queues * IONIC_NUM_RX_STATS;
 
-	/* port stats */
-	total += IONIC_NUM_PORT_STATS;
-
 	if (test_bit(IONIC_LIF_F_UP, lif->state) &&
 	    test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state)) {
 		/* tx debug stats */
@@ -318,8 +336,14 @@ static void ionic_sw_stats_get_strings(struct ionic_lif *lif, u8 **buf)
 	for (q_num = 0; q_num < MAX_Q(lif); q_num++)
 		ionic_sw_stats_get_tx_strings(lif, buf, q_num);
 
+	if (lif->hwstamp_txq)
+		ionic_sw_stats_get_tx_strings(lif, buf, lif->hwstamp_txq->q.index);
+
 	for (q_num = 0; q_num < MAX_Q(lif); q_num++)
 		ionic_sw_stats_get_rx_strings(lif, buf, q_num);
+
+	if (lif->hwstamp_rxq)
+		ionic_sw_stats_get_rx_strings(lif, buf, lif->hwstamp_rxq->q.index);
 }
 
 static void ionic_sw_stats_get_txq_values(struct ionic_lif *lif, u64 **buf,
@@ -434,8 +458,14 @@ static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf)
 	for (q_num = 0; q_num < MAX_Q(lif); q_num++)
 		ionic_sw_stats_get_txq_values(lif, buf, q_num);
 
+	if (lif->hwstamp_txq)
+		ionic_sw_stats_get_txq_values(lif, buf, lif->hwstamp_txq->q.index);
+
 	for (q_num = 0; q_num < MAX_Q(lif); q_num++)
 		ionic_sw_stats_get_rxq_values(lif, buf, q_num);
+
+	if (lif->hwstamp_rxq)
+		ionic_sw_stats_get_rxq_values(lif, buf, lif->hwstamp_rxq->q.index);
 }
 
 const struct ionic_stats_group_intf ionic_stats_groups[] = {
-- 
cgit v1.2.3


From afeefec6773607552c450ad2eeb43f39173c2d5c Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Thu, 1 Apr 2021 10:56:10 -0700
Subject: ionic: advertise support for hardware timestamps

Let the network stack know we've got support for timestamping
the packets.

Signed-off-by: Allen Hubbe <allenbh@pensando.io>
Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_lif.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index e14c93fbbd68..ee56fed12e07 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -1540,6 +1540,9 @@ static int ionic_set_nic_features(struct ionic_lif *lif,
 
 	ctx.cmd.lif_setattr.features = ionic_netdev_features_to_nic(features);
 
+	if (lif->phc)
+		ctx.cmd.lif_setattr.features |= cpu_to_le64(IONIC_ETH_HW_TIMESTAMP);
+
 	err = ionic_adminq_post_wait(lif, &ctx);
 	if (err)
 		return err;
@@ -1587,6 +1590,8 @@ static int ionic_set_nic_features(struct ionic_lif *lif,
 		dev_dbg(dev, "feature ETH_HW_TSO_UDP\n");
 	if (lif->hw_features & IONIC_ETH_HW_TSO_UDP_CSUM)
 		dev_dbg(dev, "feature ETH_HW_TSO_UDP_CSUM\n");
+	if (lif->hw_features & IONIC_ETH_HW_TIMESTAMP)
+		dev_dbg(dev, "feature ETH_HW_TIMESTAMP\n");
 
 	return 0;
 }
@@ -2260,6 +2265,20 @@ static int ionic_stop(struct net_device *netdev)
 	return 0;
 }
 
+static int ionic_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+	struct ionic_lif *lif = netdev_priv(netdev);
+
+	switch (cmd) {
+	case SIOCSHWTSTAMP:
+		return ionic_lif_hwstamp_set(lif, ifr);
+	case SIOCGHWTSTAMP:
+		return ionic_lif_hwstamp_get(lif, ifr);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static int ionic_get_vf_config(struct net_device *netdev,
 			       int vf, struct ifla_vf_info *ivf)
 {
@@ -2508,6 +2527,7 @@ static int ionic_set_vf_link_state(struct net_device *netdev, int vf, int set)
 static const struct net_device_ops ionic_netdev_ops = {
 	.ndo_open               = ionic_open,
 	.ndo_stop               = ionic_stop,
+	.ndo_do_ioctl		= ionic_do_ioctl,
 	.ndo_start_xmit		= ionic_start_xmit,
 	.ndo_get_stats64	= ionic_get_stats64,
 	.ndo_set_rx_mode	= ionic_ndo_set_rx_mode,
@@ -3331,6 +3351,8 @@ int ionic_lif_register(struct ionic_lif *lif)
 {
 	int err;
 
+	ionic_lif_register_phc(lif);
+
 	INIT_WORK(&lif->ionic->nb_work, ionic_lif_notify_work);
 
 	lif->ionic->nb.notifier_call = ionic_lif_notify;
@@ -3343,6 +3365,7 @@ int ionic_lif_register(struct ionic_lif *lif)
 	err = register_netdev(lif->netdev);
 	if (err) {
 		dev_err(lif->ionic->dev, "Cannot register net device, aborting\n");
+		ionic_lif_unregister_phc(lif);
 		return err;
 	}
 
@@ -3364,6 +3387,8 @@ void ionic_lif_unregister(struct ionic_lif *lif)
 	if (lif->netdev->reg_state == NETREG_REGISTERED)
 		unregister_netdev(lif->netdev);
 
+	ionic_lif_unregister_phc(lif);
+
 	lif->registered = false;
 }
 
-- 
cgit v1.2.3


From a16195e35cd013fc10023e79640bb1612e5d4457 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 1 Apr 2021 16:19:41 -0700
Subject: mptcp: add mib for token creation fallback

If the MPTCP protocol is unable to create a new token,
the socket fallback to plain TCP, let's keep track
of such events via a specific MIB.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/mib.c      | 1 +
 net/mptcp/mib.h      | 1 +
 net/mptcp/protocol.c | 4 +++-
 net/mptcp/subflow.c  | 3 +++
 4 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index 3780c29c321d..b0429aca4f76 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -13,6 +13,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
 	SNMP_MIB_ITEM("MPCapableACKRX", MPTCP_MIB_MPCAPABLEPASSIVEACK),
 	SNMP_MIB_ITEM("MPCapableFallbackACK", MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK),
 	SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK),
+	SNMP_MIB_ITEM("MPFallbackTokenInit", MPTCP_MIB_TOKENFALLBACKINIT),
 	SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS),
 	SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN),
 	SNMP_MIB_ITEM("MPJoinSynRx", MPTCP_MIB_JOINSYNRX),
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 72afbc135f8e..50e1668c9a01 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -6,6 +6,7 @@ enum linux_mptcp_mib_field {
 	MPTCP_MIB_MPCAPABLEPASSIVEACK,	/* Received third ACK with MP_CAPABLE */
 	MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK,/* Server-side fallback during 3-way handshake */
 	MPTCP_MIB_MPCAPABLEACTIVEFALLBACK, /* Client-side fallback during 3-way handshake */
+	MPTCP_MIB_TOKENFALLBACKINIT,	/* Could not init/allocate token */
 	MPTCP_MIB_RETRANSSEGS,		/* Segments retransmitted at the MPTCP-level */
 	MPTCP_MIB_JOINNOTOKEN,		/* Received MP_JOIN but the token was not found */
 	MPTCP_MIB_JOINSYNRX,		/* Received a SYN + MP_JOIN */
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 171b77537dcb..3b50e8cc0c5f 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3224,8 +3224,10 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 	if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
 		mptcp_subflow_early_fallback(msk, subflow);
 #endif
-	if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk))
+	if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) {
+		MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT);
 		mptcp_subflow_early_fallback(msk, subflow);
+	}
 
 do_connect:
 	err = ssock->ops->connect(ssock, uaddr, addr_len, flags);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 6c074d3db0ed..b96e8dc01f08 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -165,6 +165,7 @@ again:
 			if (mptcp_token_exists(subflow_req->token)) {
 				if (retries-- > 0)
 					goto again;
+				SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_TOKENFALLBACKINIT);
 			} else {
 				subflow_req->mp_capable = 1;
 			}
@@ -176,6 +177,8 @@ again:
 			subflow_req->mp_capable = 1;
 		else if (retries-- > 0)
 			goto again;
+		else
+			SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_TOKENFALLBACKINIT);
 
 	} else if (mp_opt.mp_join && listener->request_mptcp) {
 		subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
-- 
cgit v1.2.3


From 5695eb8891f9eb5317ca0c3af7a773468524022d Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 1 Apr 2021 16:19:42 -0700
Subject: mptcp: add active MPC mibs

We are not currently tracking the active MPTCP connection
attempts. Let's add the related counters.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/mib.c      | 2 ++
 net/mptcp/mib.h      | 2 ++
 net/mptcp/protocol.c | 2 ++
 net/mptcp/subflow.c  | 1 +
 4 files changed, 7 insertions(+)

diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index b0429aca4f76..eb2dc6dbe212 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -10,6 +10,8 @@
 
 static const struct snmp_mib mptcp_snmp_list[] = {
 	SNMP_MIB_ITEM("MPCapableSYNRX", MPTCP_MIB_MPCAPABLEPASSIVE),
+	SNMP_MIB_ITEM("MPCapableSYNTX", MPTCP_MIB_MPCAPABLEACTIVE),
+	SNMP_MIB_ITEM("MPCapableSYNACKRX", MPTCP_MIB_MPCAPABLEACTIVEACK),
 	SNMP_MIB_ITEM("MPCapableACKRX", MPTCP_MIB_MPCAPABLEPASSIVEACK),
 	SNMP_MIB_ITEM("MPCapableFallbackACK", MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK),
 	SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK),
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 50e1668c9a01..f0da4f060fe1 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -3,6 +3,8 @@
 enum linux_mptcp_mib_field {
 	MPTCP_MIB_NUM = 0,
 	MPTCP_MIB_MPCAPABLEPASSIVE,	/* Received SYN with MP_CAPABLE */
+	MPTCP_MIB_MPCAPABLEACTIVE,	/* Sent SYN with MP_CAPABLE */
+	MPTCP_MIB_MPCAPABLEACTIVEACK,	/* Received SYN/ACK with MP_CAPABLE */
 	MPTCP_MIB_MPCAPABLEPASSIVEACK,	/* Received third ACK with MP_CAPABLE */
 	MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK,/* Server-side fallback during 3-way handshake */
 	MPTCP_MIB_MPCAPABLEACTIVEFALLBACK, /* Client-side fallback during 3-way handshake */
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3b50e8cc0c5f..0c916d48cad8 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3228,6 +3228,8 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 		MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT);
 		mptcp_subflow_early_fallback(msk, subflow);
 	}
+	if (likely(!__mptcp_check_fallback(msk)))
+		MPTCP_INC_STATS(sock_net(sock->sk), MPTCP_MIB_MPCAPABLEACTIVE);
 
 do_connect:
 	err = ssock->ops->connect(ssock, uaddr, addr_len, flags);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index b96e8dc01f08..7a5f50d00d4b 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -395,6 +395,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
 		subflow->remote_key = mp_opt.sndr_key;
 		pr_debug("subflow=%p, remote_key=%llu", subflow,
 			 subflow->remote_key);
+		MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
 		mptcp_finish_connect(sk);
 	} else if (subflow->request_join) {
 		u8 hmac[SHA256_DIGEST_SIZE];
-- 
cgit v1.2.3


From 781bf13d4f3b033002f7f6728ac0b0d1ebe8f176 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 1 Apr 2021 16:19:43 -0700
Subject: mptcp: remove unneeded check on first subflow

Currently we explicitly check for the first subflow being
NULL in a couple of places, even if we don't need any
special actions in such scenario.

Just drop the unneeded checks, to avoid confusion.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/options.c  | 2 +-
 net/mptcp/protocol.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 69cafaacc31b..68361d28dc67 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -952,7 +952,7 @@ bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool us
 	 * should match. If they mismatch, the peer is misbehaving and
 	 * we will prefer the most recent information.
 	 */
-	if (READ_ONCE(msk->rcv_data_fin) || !READ_ONCE(msk->first))
+	if (READ_ONCE(msk->rcv_data_fin))
 		return false;
 
 	WRITE_ONCE(msk->rcv_data_fin_seq,
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 0c916d48cad8..531ee24aa827 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -493,7 +493,7 @@ static bool mptcp_check_data_fin(struct sock *sk)
 	u64 rcv_data_fin_seq;
 	bool ret = false;
 
-	if (__mptcp_check_fallback(msk) || !msk->first)
+	if (__mptcp_check_fallback(msk))
 		return ret;
 
 	/* Need to ack a DATA_FIN received from a peer while this side
-- 
cgit v1.2.3


From dc87efdb1a5cd46134a9d490480160e303bc6eef Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 1 Apr 2021 16:19:44 -0700
Subject: mptcp: add mptcp reset option support

The MPTCP reset option allows to carry a mptcp-specific error code that
provides more information on the nature of a connection reset.

Reset option data received gets stored in the subflow context so it can
be sent to userspace via the 'subflow closed' netlink event.

When a subflow is closed, the desired error code that should be sent to
the peer is also placed in the subflow context structure.

If a reset is sent before subflow establishment could complete, e.g. on
HMAC failure during an MP_JOIN operation, the mptcp skb extension is
used to store the reset information.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/mptcp.h        | 18 +++++++++++--
 include/uapi/linux/mptcp.h | 11 ++++++++
 net/ipv4/tcp_ipv4.c        | 21 ++++++++++++---
 net/ipv6/tcp_ipv6.c        | 14 +++++++++-
 net/mptcp/options.c        | 67 ++++++++++++++++++++++++++++++++++++++++++----
 net/mptcp/pm_netlink.c     | 12 +++++++++
 net/mptcp/protocol.c       | 12 ++++++---
 net/mptcp/protocol.h       | 14 +++++++++-
 net/mptcp/subflow.c        | 30 ++++++++++++++++++---
 9 files changed, 180 insertions(+), 19 deletions(-)

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index cea69c801595..16fe34d139c3 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -30,8 +30,8 @@ struct mptcp_ext {
 			ack64:1,
 			mpc_map:1,
 			frozen:1,
-			__unused:1;
-	/* one byte hole */
+			reset_transient:1;
+	u8		reset_reason:4;
 };
 
 #define MPTCP_RM_IDS_MAX	8
@@ -58,6 +58,8 @@ struct mptcp_out_options {
 	struct mptcp_rm_list rm_list;
 	u8 join_id;
 	u8 backup;
+	u8 reset_reason:4;
+	u8 reset_transient:1;
 	u32 nonce;
 	u64 thmac;
 	u32 token;
@@ -156,6 +158,16 @@ void mptcp_seq_show(struct seq_file *seq);
 int mptcp_subflow_init_cookie_req(struct request_sock *req,
 				  const struct sock *sk_listener,
 				  struct sk_buff *skb);
+
+__be32 mptcp_get_reset_option(const struct sk_buff *skb);
+
+static inline __be32 mptcp_reset_option(const struct sk_buff *skb)
+{
+	if (skb_ext_exist(skb, SKB_EXT_MPTCP))
+		return mptcp_get_reset_option(skb);
+
+	return htonl(0u);
+}
 #else
 
 static inline void mptcp_init(void)
@@ -236,6 +248,8 @@ static inline int mptcp_subflow_init_cookie_req(struct request_sock *req,
 {
 	return 0; /* TCP fallback */
 }
+
+static inline __be32 mptcp_reset_option(const struct sk_buff *skb)  { return htonl(0u); }
 #endif /* CONFIG_MPTCP */
 
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index e1172c1ffdfd..8eb3c0844bff 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -174,10 +174,21 @@ enum mptcp_event_attr {
 	MPTCP_ATTR_FLAGS,	/* u16 */
 	MPTCP_ATTR_TIMEOUT,	/* u32 */
 	MPTCP_ATTR_IF_IDX,	/* s32 */
+	MPTCP_ATTR_RESET_REASON,/* u32 */
+	MPTCP_ATTR_RESET_FLAGS, /* u32 */
 
 	__MPTCP_ATTR_AFTER_LAST
 };
 
 #define MPTCP_ATTR_MAX (__MPTCP_ATTR_AFTER_LAST - 1)
 
+/* MPTCP Reset reason codes, rfc8684 */
+#define MPTCP_RST_EUNSPEC	0
+#define MPTCP_RST_EMPTCP	1
+#define MPTCP_RST_ERESOURCE	2
+#define MPTCP_RST_EPROHIBIT	3
+#define MPTCP_RST_EWQ2BIG	4
+#define MPTCP_RST_EBADPERF	5
+#define MPTCP_RST_EMIDDLEBOX	6
+
 #endif /* _UAPI_MPTCP_H */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index dfc6d1c0e710..312184cead57 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -655,14 +655,18 @@ EXPORT_SYMBOL(tcp_v4_send_check);
  *	Exception: precedence violation. We do not implement it in any case.
  */
 
+#ifdef CONFIG_TCP_MD5SIG
+#define OPTION_BYTES TCPOLEN_MD5SIG_ALIGNED
+#else
+#define OPTION_BYTES sizeof(__be32)
+#endif
+
 static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
 {
 	const struct tcphdr *th = tcp_hdr(skb);
 	struct {
 		struct tcphdr th;
-#ifdef CONFIG_TCP_MD5SIG
-		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
-#endif
+		__be32 opt[OPTION_BYTES / sizeof(__be32)];
 	} rep;
 	struct ip_reply_arg arg;
 #ifdef CONFIG_TCP_MD5SIG
@@ -770,6 +774,17 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
 				     ip_hdr(skb)->daddr, &rep.th);
 	}
 #endif
+	/* Can't co-exist with TCPMD5, hence check rep.opt[0] */
+	if (rep.opt[0] == 0) {
+		__be32 mrst = mptcp_reset_option(skb);
+
+		if (mrst) {
+			rep.opt[0] = mrst;
+			arg.iov[0].iov_len += sizeof(mrst);
+			rep.th.doff = arg.iov[0].iov_len / 4;
+		}
+	}
+
 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 				      ip_hdr(skb)->saddr, /* XXX */
 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index bff22d6ef516..5f47c0b6e3de 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -879,8 +879,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
 	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
 	struct sock *ctl_sk = net->ipv6.tcp_sk;
 	unsigned int tot_len = sizeof(struct tcphdr);
+	__be32 mrst = 0, *topt;
 	struct dst_entry *dst;
-	__be32 *topt;
 	__u32 mark = 0;
 
 	if (tsecr)
@@ -890,6 +890,15 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
 #endif
 
+#ifdef CONFIG_MPTCP
+	if (rst && !key) {
+		mrst = mptcp_reset_option(skb);
+
+		if (mrst)
+			tot_len += sizeof(__be32);
+	}
+#endif
+
 	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
 			 GFP_ATOMIC);
 	if (!buff)
@@ -920,6 +929,9 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
 		*topt++ = htonl(tsecr);
 	}
 
+	if (mrst)
+		*topt++ = mrst;
+
 #ifdef CONFIG_TCP_MD5SIG
 	if (key) {
 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 68361d28dc67..4b7119eb2c31 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -305,6 +305,18 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 		mp_opt->fastclose = 1;
 		break;
 
+	case MPTCPOPT_RST:
+		if (opsize != TCPOLEN_MPTCP_RST)
+			break;
+
+		if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST))
+			break;
+		mp_opt->reset = 1;
+		flags = *ptr++;
+		mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT;
+		mp_opt->reset_reason = *ptr;
+		break;
+
 	default:
 		break;
 	}
@@ -327,6 +339,7 @@ void mptcp_get_options(const struct sk_buff *skb,
 	mp_opt->rm_addr = 0;
 	mp_opt->dss = 0;
 	mp_opt->mp_prio = 0;
+	mp_opt->reset = 0;
 
 	length = (th->doff * 4) - sizeof(struct tcphdr);
 	ptr = (const unsigned char *)(th + 1);
@@ -726,6 +739,22 @@ static bool mptcp_established_options_mp_prio(struct sock *sk,
 	return true;
 }
 
+static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_buff *skb,
+						   unsigned int *size,
+						   unsigned int remaining,
+						   struct mptcp_out_options *opts)
+{
+	const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+
+	if (remaining < TCPOLEN_MPTCP_RST)
+		return;
+
+	*size = TCPOLEN_MPTCP_RST;
+	opts->suboptions |= OPTION_MPTCP_RST;
+	opts->reset_transient = subflow->reset_transient;
+	opts->reset_reason = subflow->reset_reason;
+}
+
 bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
 			       unsigned int *size, unsigned int remaining,
 			       struct mptcp_out_options *opts)
@@ -741,11 +770,10 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
 	if (unlikely(__mptcp_check_fallback(msk)))
 		return false;
 
-	/* prevent adding of any MPTCP related options on reset packet
-	 * until we support MP_TCPRST/MP_FASTCLOSE
-	 */
-	if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST))
-		return false;
+	if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
+		mptcp_established_options_rst(sk, skb, size, remaining, opts);
+		return true;
+	}
 
 	snd_data_fin = mptcp_data_fin_enabled(msk);
 	if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts))
@@ -1062,6 +1090,12 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
 		mp_opt.mp_prio = 0;
 	}
 
+	if (mp_opt.reset) {
+		subflow->reset_seen = 1;
+		subflow->reset_reason = mp_opt.reset_reason;
+		subflow->reset_transient = mp_opt.reset_transient;
+	}
+
 	if (!mp_opt.dss)
 		return;
 
@@ -1289,6 +1323,12 @@ mp_capable_done:
 		ptr += 5;
 	}
 
+	if (OPTION_MPTCP_RST & opts->suboptions)
+		*ptr++ = mptcp_option(MPTCPOPT_RST,
+				      TCPOLEN_MPTCP_RST,
+				      opts->reset_transient,
+				      opts->reset_reason);
+
 	if (opts->ext_copy.use_ack || opts->ext_copy.use_map) {
 		struct mptcp_ext *mpext = &opts->ext_copy;
 		u8 len = TCPOLEN_MPTCP_DSS_BASE;
@@ -1340,3 +1380,20 @@ mp_capable_done:
 	if (tp)
 		mptcp_set_rwin(tp);
 }
+
+__be32 mptcp_get_reset_option(const struct sk_buff *skb)
+{
+	const struct mptcp_ext *ext = mptcp_get_ext(skb);
+	u8 flags, reason;
+
+	if (ext) {
+		flags = ext->reset_transient;
+		reason = ext->reset_reason;
+
+		return mptcp_option(MPTCPOPT_RST, TCPOLEN_MPTCP_RST,
+				    flags, reason);
+	}
+
+	return htonl(0u);
+}
+EXPORT_SYMBOL_GPL(mptcp_get_reset_option);
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index cadafafa1049..51be6c34b339 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -1687,9 +1687,21 @@ static int mptcp_event_sub_closed(struct sk_buff *skb,
 				  const struct mptcp_sock *msk,
 				  const struct sock *ssk)
 {
+	const struct mptcp_subflow_context *sf;
+
 	if (mptcp_event_put_token_and_ssk(skb, msk, ssk))
 		return -EMSGSIZE;
 
+	sf = mptcp_subflow_ctx(ssk);
+	if (!sf->reset_seen)
+		return 0;
+
+	if (nla_put_u32(skb, MPTCP_ATTR_RESET_REASON, sf->reset_reason))
+		return -EMSGSIZE;
+
+	if (nla_put_u32(skb, MPTCP_ATTR_RESET_FLAGS, sf->reset_transient))
+		return -EMSGSIZE;
+
 	return 0;
 }
 
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 531ee24aa827..e894345d10c1 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3090,14 +3090,18 @@ bool mptcp_finish_join(struct sock *ssk)
 	pr_debug("msk=%p, subflow=%p", msk, subflow);
 
 	/* mptcp socket already closing? */
-	if (!mptcp_is_fully_established(parent))
+	if (!mptcp_is_fully_established(parent)) {
+		subflow->reset_reason = MPTCP_RST_EMPTCP;
 		return false;
+	}
 
 	if (!msk->pm.server_side)
 		goto out;
 
-	if (!mptcp_pm_allow_new_subflow(msk))
+	if (!mptcp_pm_allow_new_subflow(msk)) {
+		subflow->reset_reason = MPTCP_RST_EPROHIBIT;
 		return false;
+	}
 
 	/* active connections are already on conn_list, and we can't acquire
 	 * msk lock here.
@@ -3111,8 +3115,10 @@ bool mptcp_finish_join(struct sock *ssk)
 		sock_hold(ssk);
 	}
 	spin_unlock_bh(&msk->join_list_lock);
-	if (!ret)
+	if (!ret) {
+		subflow->reset_reason = MPTCP_RST_EPROHIBIT;
 		return false;
+	}
 
 	/* attach to msk socket only after we are sure he will deal with us
 	 * at close time
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index e8c5ff2b8ace..40e9b05856cd 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -26,6 +26,7 @@
 #define OPTION_MPTCP_RM_ADDR	BIT(8)
 #define OPTION_MPTCP_FASTCLOSE	BIT(9)
 #define OPTION_MPTCP_PRIO	BIT(10)
+#define OPTION_MPTCP_RST	BIT(11)
 
 /* MPTCP option subtypes */
 #define MPTCPOPT_MP_CAPABLE	0
@@ -36,6 +37,7 @@
 #define MPTCPOPT_MP_PRIO	5
 #define MPTCPOPT_MP_FAIL	6
 #define MPTCPOPT_MP_FASTCLOSE	7
+#define MPTCPOPT_RST		8
 
 /* MPTCP suboption lengths */
 #define TCPOLEN_MPTCP_MPC_SYN		4
@@ -65,6 +67,7 @@
 #define TCPOLEN_MPTCP_PRIO		3
 #define TCPOLEN_MPTCP_PRIO_ALIGN	4
 #define TCPOLEN_MPTCP_FASTCLOSE		12
+#define TCPOLEN_MPTCP_RST		4
 
 /* MPTCP MP_JOIN flags */
 #define MPTCPOPT_BACKUP		BIT(0)
@@ -94,6 +97,9 @@
 /* MPTCP MP_PRIO flags */
 #define MPTCP_PRIO_BKUP		BIT(0)
 
+/* MPTCP TCPRST flags */
+#define MPTCP_RST_TRANSIENT	BIT(0)
+
 /* MPTCP socket flags */
 #define MPTCP_DATA_READY	0
 #define MPTCP_NOSPACE		1
@@ -123,6 +129,7 @@ struct mptcp_options_received {
 	u16	mp_capable : 1,
 		mp_join : 1,
 		fastclose : 1,
+		reset : 1,
 		dss : 1,
 		add_addr : 1,
 		rm_addr : 1,
@@ -152,6 +159,8 @@ struct mptcp_options_received {
 	};
 	u64	ahmac;
 	u16	port;
+	u8	reset_reason:4;
+	u8	reset_transient:1;
 };
 
 static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
@@ -422,6 +431,9 @@ struct mptcp_subflow_context {
 	u8	hmac[MPTCPOPT_HMAC_LEN];
 	u8	local_id;
 	u8	remote_id;
+	u8	reset_seen:1;
+	u8	reset_transient:1;
+	u8	reset_reason:4;
 
 	long	delegated_status;
 	struct	list_head delegated_node;   /* link into delegated_action, protected by local BH */
@@ -742,7 +754,7 @@ unsigned int mptcp_pm_get_add_addr_accept_max(struct mptcp_sock *msk);
 unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk);
 unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk);
 
-static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb)
+static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb)
 {
 	return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP);
 }
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 7a5f50d00d4b..223d6be5fc3b 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -115,6 +115,16 @@ static bool subflow_use_different_sport(struct mptcp_sock *msk, const struct soc
 	return inet_sk(sk)->inet_sport != inet_sk((struct sock *)msk)->inet_sport;
 }
 
+static void subflow_add_reset_reason(struct sk_buff *skb, u8 reason)
+{
+	struct mptcp_ext *mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
+
+	if (mpext) {
+		memset(mpext, 0, sizeof(*mpext));
+		mpext->reset_reason = reason;
+	}
+}
+
 /* Init mptcp request socket.
  *
  * Returns an error code if a JOIN has failed and a TCP reset
@@ -190,8 +200,10 @@ again:
 		subflow_req->msk = subflow_token_join_request(req);
 
 		/* Can't fall back to TCP in this case. */
-		if (!subflow_req->msk)
+		if (!subflow_req->msk) {
+			subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP);
 			return -EPERM;
+		}
 
 		if (subflow_use_different_sport(subflow_req->msk, sk_listener)) {
 			pr_debug("syn inet_sport=%d %d",
@@ -400,8 +412,10 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
 	} else if (subflow->request_join) {
 		u8 hmac[SHA256_DIGEST_SIZE];
 
-		if (!mp_opt.mp_join)
+		if (!mp_opt.mp_join) {
+			subflow->reset_reason = MPTCP_RST_EMPTCP;
 			goto do_reset;
+		}
 
 		subflow->thmac = mp_opt.thmac;
 		subflow->remote_nonce = mp_opt.nonce;
@@ -410,6 +424,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
 
 		if (!subflow_thmac_valid(subflow)) {
 			MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
+			subflow->reset_reason = MPTCP_RST_EMPTCP;
 			goto do_reset;
 		}
 
@@ -438,6 +453,7 @@ fallback:
 	return;
 
 do_reset:
+	subflow->reset_transient = 0;
 	mptcp_subflow_reset(sk);
 }
 
@@ -654,8 +670,10 @@ create_child:
 		 * to reset the context to non MPTCP status.
 		 */
 		if (!ctx || fallback) {
-			if (fallback_is_fatal)
+			if (fallback_is_fatal) {
+				subflow_add_reset_reason(skb, MPTCP_RST_EMPTCP);
 				goto dispose_child;
+			}
 
 			subflow_drop_ctx(child);
 			goto out;
@@ -690,8 +708,10 @@ create_child:
 			struct mptcp_sock *owner;
 
 			owner = subflow_req->msk;
-			if (!owner)
+			if (!owner) {
+				subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);
 				goto dispose_child;
+			}
 
 			/* move the msk reference ownership to the subflow */
 			subflow_req->msk = NULL;
@@ -1056,6 +1076,8 @@ fatal:
 	smp_wmb();
 	ssk->sk_error_report(ssk);
 	tcp_set_state(ssk, TCP_CLOSE);
+	subflow->reset_transient = 0;
+	subflow->reset_reason = MPTCP_RST_EMPTCP;
 	tcp_send_active_reset(ssk, GFP_ATOMIC);
 	subflow->data_avail = 0;
 	return false;
-- 
cgit v1.2.3


From 5888a61cb4e00695075bbacfd86f3fa73af00413 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Thu, 1 Apr 2021 16:19:45 -0700
Subject: selftests: mptcp: launch mptcp_connect with timeout

'mptcp_connect' already has a timeout for poll() but in some cases, it
is not enough.

With "timeout" tool, we will force the command to fail if it doesn't
finish on time. Thanks to that, the script will continue and display
details about the current state before marking the test as failed.
Displaying this state is very important to be able to understand the
issue. Best to have our CI reporting the issue than just "the test
hanged".

Note that in mptcp_connect.sh, we were using a long timeout to validate
the fact we cannot create a socket if a sysctl is set. We don't need
this timeout.

In diag.sh, we want to send signals to mptcp_connect instances that have
been started in the netns. But we cannot send this signal to 'timeout'
otherwise that will stop the timeout and messages telling us SIGUSR1 has
been received will be printed. Instead of trying to find the right PID
and storing them in an array, we can simply use the output of
'ip netns pids' which is all the PIDs we want to send signal to.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/160
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/diag.sh          | 55 ++++++++++++++--------
 tools/testing/selftests/net/mptcp/mptcp_connect.sh | 15 ++++--
 tools/testing/selftests/net/mptcp/mptcp_join.sh    | 22 ++++++---
 tools/testing/selftests/net/mptcp/simult_flows.sh  | 13 +++--
 4 files changed, 72 insertions(+), 33 deletions(-)

diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index 39edce4f541c..2674ba20d524 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -5,8 +5,9 @@ rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
 ns="ns1-$rndh"
 ksft_skip=4
 test_cnt=1
+timeout_poll=100
+timeout_test=$((timeout_poll * 2 + 1))
 ret=0
-pids=()
 
 flush_pids()
 {
@@ -14,18 +15,14 @@ flush_pids()
 	# give it some time
 	sleep 1.1
 
-	for pid in ${pids[@]}; do
-		[ -d /proc/$pid ] && kill -SIGUSR1 $pid >/dev/null 2>&1
-	done
-	pids=()
+	ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGUSR1 &>/dev/null
 }
 
 cleanup()
 {
+	ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGKILL &>/dev/null
+
 	ip netns del $ns
-	for pid in ${pids[@]}; do
-		[ -d /proc/$pid ] && kill -9 $pid >/dev/null 2>&1
-	done
 }
 
 ip -Version > /dev/null 2>&1
@@ -79,39 +76,57 @@ trap cleanup EXIT
 ip netns add $ns
 ip -n $ns link set dev lo up
 
-echo "a" | ip netns exec $ns ./mptcp_connect -p 10000 -l 0.0.0.0 -t 100 >/dev/null &
+echo "a" | \
+	timeout ${timeout_test} \
+		ip netns exec $ns \
+			./mptcp_connect -p 10000 -l -t ${timeout_poll} \
+				0.0.0.0 >/dev/null &
 sleep 0.1
-pids[0]=$!
 chk_msk_nr 0 "no msk on netns creation"
 
-echo "b" | ip netns exec $ns ./mptcp_connect -p 10000 127.0.0.1 -j -t 100 >/dev/null &
+echo "b" | \
+	timeout ${timeout_test} \
+		ip netns exec $ns \
+			./mptcp_connect -p 10000 -j -t ${timeout_poll} \
+				127.0.0.1 >/dev/null &
 sleep 0.1
-pids[1]=$!
 chk_msk_nr 2 "after MPC handshake "
 chk_msk_remote_key_nr 2 "....chk remote_key"
 chk_msk_fallback_nr 0 "....chk no fallback"
 flush_pids
 
 
-echo "a" | ip netns exec $ns ./mptcp_connect -p 10001 -s TCP -l 0.0.0.0 -t 100 >/dev/null &
-pids[0]=$!
+echo "a" | \
+	timeout ${timeout_test} \
+		ip netns exec $ns \
+			./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} \
+				0.0.0.0 >/dev/null &
 sleep 0.1
-echo "b" | ip netns exec $ns ./mptcp_connect -p 10001 127.0.0.1 -j -t 100 >/dev/null &
-pids[1]=$!
+echo "b" | \
+	timeout ${timeout_test} \
+		ip netns exec $ns \
+			./mptcp_connect -p 10001 -j -t ${timeout_poll} \
+				127.0.0.1 >/dev/null &
 sleep 0.1
 chk_msk_fallback_nr 1 "check fallback"
 flush_pids
 
 NR_CLIENTS=100
 for I in `seq 1 $NR_CLIENTS`; do
-	echo "a" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) -l 0.0.0.0 -t 100 -w 10 >/dev/null  &
-	pids[$((I*2))]=$!
+	echo "a" | \
+		timeout ${timeout_test} \
+			ip netns exec $ns \
+				./mptcp_connect -p $((I+10001)) -l -w 10 \
+					-t ${timeout_poll} 0.0.0.0 >/dev/null &
 done
 sleep 0.1
 
 for I in `seq 1 $NR_CLIENTS`; do
-	echo "b" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) 127.0.0.1 -t 100 -w 10 >/dev/null &
-	pids[$((I*2 + 1))]=$!
+	echo "b" | \
+		timeout ${timeout_test} \
+			ip netns exec $ns \
+				./mptcp_connect -p $((I+10001)) -w 10 \
+					-t ${timeout_poll} 127.0.0.1 >/dev/null &
 done
 sleep 1.5
 
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 10a030b53b23..65b3b983efc2 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -11,7 +11,8 @@ cin=""
 cout=""
 ksft_skip=4
 capture=false
-timeout=30
+timeout_poll=30
+timeout_test=$((timeout_poll * 2 + 1))
 ipv6=true
 ethtool_random_on=true
 tc_delay="$((RANDOM%50))"
@@ -273,7 +274,7 @@ check_mptcp_disabled()
 	ip netns exec ${disabled_ns} sysctl -q net.mptcp.enabled=0
 
 	local err=0
-	LANG=C ip netns exec ${disabled_ns} ./mptcp_connect -t $timeout -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \
+	LANG=C ip netns exec ${disabled_ns} ./mptcp_connect -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \
 		grep -q "^socket: Protocol not available$" && err=1
 	ip netns delete ${disabled_ns}
 
@@ -430,14 +431,20 @@ do_transfer()
 	local stat_cookietx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent")
 	local stat_cookierx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv")
 
-	ip netns exec ${listener_ns} ./mptcp_connect -t $timeout -l -p $port -s ${srv_proto} $extra_args $local_addr < "$sin" > "$sout" &
+	timeout ${timeout_test} \
+		ip netns exec ${listener_ns} \
+			./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+				$extra_args $local_addr < "$sin" > "$sout" &
 	local spid=$!
 
 	wait_local_port_listen "${listener_ns}" "${port}"
 
 	local start
 	start=$(date +%s%3N)
-	ip netns exec ${connector_ns} ./mptcp_connect -t $timeout -p $port -s ${cl_proto} $extra_args $connect_addr < "$cin" > "$cout" &
+	timeout ${timeout_test} \
+		ip netns exec ${connector_ns} \
+			./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+				$extra_args $connect_addr < "$cin" > "$cout" &
 	local cpid=$!
 
 	wait $cpid
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index d2273b88e72c..56ffdad01526 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -8,7 +8,8 @@ cin=""
 cinsent=""
 cout=""
 ksft_skip=4
-timeout=30
+timeout_poll=30
+timeout_test=$((timeout_poll * 2 + 1))
 mptcp_connect=""
 capture=0
 do_all_tests=1
@@ -247,17 +248,26 @@ do_transfer()
 		local_addr="0.0.0.0"
 	fi
 
-	ip netns exec ${listener_ns} $mptcp_connect -t $timeout -l -p $port \
-		-s ${srv_proto} ${local_addr} < "$sin" > "$sout" &
+	timeout ${timeout_test} \
+		ip netns exec ${listener_ns} \
+			$mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+				${local_addr} < "$sin" > "$sout" &
 	spid=$!
 
 	sleep 1
 
 	if [ "$test_link_fail" -eq 0 ];then
-		ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" &
+		timeout ${timeout_test} \
+			ip netns exec ${connector_ns} \
+				$mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+					$connect_addr < "$cin" > "$cout" &
 	else
-		( cat "$cin" ; sleep 2; link_failure $listener_ns ; cat "$cin" ) | tee "$cinsent" | \
-		ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr > "$cout" &
+		( cat "$cin" ; sleep 2; link_failure $listener_ns ; cat "$cin" ) | \
+			tee "$cinsent" | \
+			timeout ${timeout_test} \
+				ip netns exec ${connector_ns} \
+					$mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+						$connect_addr > "$cout" &
 	fi
 	cpid=$!
 
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index f039ee57eb3c..3aeef3bcb101 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -7,7 +7,8 @@ ns2="ns2-$rndh"
 ns3="ns3-$rndh"
 capture=false
 ksft_skip=4
-timeout=30
+timeout_poll=30
+timeout_test=$((timeout_poll * 2 + 1))
 test_cnt=1
 ret=0
 bail=0
@@ -157,14 +158,20 @@ do_transfer()
 		sleep 1
 	fi
 
-	ip netns exec ${ns3} ./mptcp_connect -jt $timeout -l -p $port 0.0.0.0 < "$sin" > "$sout" &
+	timeout ${timeout_test} \
+		ip netns exec ${ns3} \
+			./mptcp_connect -jt ${timeout_poll} -l -p $port \
+				0.0.0.0 < "$sin" > "$sout" &
 	local spid=$!
 
 	wait_local_port_listen "${ns3}" "${port}"
 
 	local start
 	start=$(date +%s%3N)
-	ip netns exec ${ns1} ./mptcp_connect -jt $timeout -p $port 10.0.3.3 < "$cin" > "$cout" &
+	timeout ${timeout_test} \
+		ip netns exec ${ns1} \
+			./mptcp_connect -jt ${timeout_poll} -p $port \
+				10.0.3.3 < "$cin" > "$cout" &
 	local cpid=$!
 
 	wait $cpid
-- 
cgit v1.2.3


From 76e5e27ca98748242ba7c1fc24f06c09002eee45 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Thu, 1 Apr 2021 16:19:46 -0700
Subject: selftests: mptcp: init nstat history

Not to be impacted by packets sent between sub-tests.

Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/mptcp_connect.sh | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 65b3b983efc2..385cdc98aed8 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -426,6 +426,13 @@ do_transfer()
 		sleep 1
 	fi
 
+	NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
+		nstat -n
+	if [ ${listener_ns} != ${connector_ns} ]; then
+		NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
+			nstat -n
+	fi
+
 	local stat_synrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
 	local stat_ackrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
 	local stat_cookietx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent")
-- 
cgit v1.2.3


From c2a55e8fd80f1cd9a16a24a08f6df50fc20a65ed Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Thu, 1 Apr 2021 16:19:47 -0700
Subject: selftests: mptcp: dump more info on mpjoin errors

Very occasionally, MPTCP selftests fail. Yeah, I saw that at least once!

Here we provide more details in case of errors with mptcp_join.sh script
like it was done with mptcp_connect.sh, see
commit 767389c8dd55 ("selftests: mptcp: dump more info on errors")

Suggested-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 56ffdad01526..abeb24b7f8ec 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -78,6 +78,7 @@ cleanup_partial()
 
 	for netns in "$ns1" "$ns2"; do
 		ip netns del $netns
+		rm -f /tmp/$netns.{nstat,out}
 	done
 }
 
@@ -233,6 +234,11 @@ do_transfer()
 		sleep 1
 	fi
 
+	NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
+		nstat -n
+	NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
+		nstat -n
+
 	if [ $speed = "fast" ]; then
 		mptcp_connect="./mptcp_connect -j"
 	elif [ $speed = "slow" ]; then
@@ -383,12 +389,19 @@ do_transfer()
 	    kill $cappid
 	fi
 
+	NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
+		nstat | grep Tcp > /tmp/${listener_ns}.out
+	NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
+		nstat | grep Tcp > /tmp/${connector_ns}.out
+
 	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
 		echo " client exit code $retc, server $rets" 1>&2
 		echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
-		ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port"
+		ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
+		cat /tmp/${listener_ns}.out
 		echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
-		ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port"
+		ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port"
+		cat /tmp/${connector_ns}.out
 
 		cat "$capout"
 		ret=1
-- 
cgit v1.2.3


From 28d137cc8c0bd2c9501b8eb0855b631289c7b4a3 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Fri, 2 Apr 2021 12:55:30 +0300
Subject: dpaa2-eth: rename dpaa2_eth_xdp_release_buf into
 dpaa2_eth_recycle_buf

Rename the dpaa2_eth_xdp_release_buf function into dpaa2_eth_recycle_buf
since in the next patches we'll be using the same recycle mechanism for
the normal stack path beside for XDP_DROP.

Also, rename the array which holds the buffers to be recycled so that it
does not have any reference to XDP.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 26 ++++++++++++------------
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h |  6 ++++--
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index fc0eb82cdd6a..f545cb99388a 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -223,31 +223,31 @@ static void dpaa2_eth_free_bufs(struct dpaa2_eth_priv *priv, u64 *buf_array,
 	}
 }
 
-static void dpaa2_eth_xdp_release_buf(struct dpaa2_eth_priv *priv,
-				      struct dpaa2_eth_channel *ch,
-				      dma_addr_t addr)
+static void dpaa2_eth_recycle_buf(struct dpaa2_eth_priv *priv,
+				  struct dpaa2_eth_channel *ch,
+				  dma_addr_t addr)
 {
 	int retries = 0;
 	int err;
 
-	ch->xdp.drop_bufs[ch->xdp.drop_cnt++] = addr;
-	if (ch->xdp.drop_cnt < DPAA2_ETH_BUFS_PER_CMD)
+	ch->recycled_bufs[ch->recycled_bufs_cnt++] = addr;
+	if (ch->recycled_bufs_cnt < DPAA2_ETH_BUFS_PER_CMD)
 		return;
 
 	while ((err = dpaa2_io_service_release(ch->dpio, priv->bpid,
-					       ch->xdp.drop_bufs,
-					       ch->xdp.drop_cnt)) == -EBUSY) {
+					       ch->recycled_bufs,
+					       ch->recycled_bufs_cnt)) == -EBUSY) {
 		if (retries++ >= DPAA2_ETH_SWP_BUSY_RETRIES)
 			break;
 		cpu_relax();
 	}
 
 	if (err) {
-		dpaa2_eth_free_bufs(priv, ch->xdp.drop_bufs, ch->xdp.drop_cnt);
-		ch->buf_count -= ch->xdp.drop_cnt;
+		dpaa2_eth_free_bufs(priv, ch->recycled_bufs, ch->recycled_bufs_cnt);
+		ch->buf_count -= ch->recycled_bufs_cnt;
 	}
 
-	ch->xdp.drop_cnt = 0;
+	ch->recycled_bufs_cnt = 0;
 }
 
 static int dpaa2_eth_xdp_flush(struct dpaa2_eth_priv *priv,
@@ -300,7 +300,7 @@ static void dpaa2_eth_xdp_tx_flush(struct dpaa2_eth_priv *priv,
 		ch->stats.xdp_tx++;
 	}
 	for (i = enqueued; i < fq->xdp_tx_fds.num; i++) {
-		dpaa2_eth_xdp_release_buf(priv, ch, dpaa2_fd_get_addr(&fds[i]));
+		dpaa2_eth_recycle_buf(priv, ch, dpaa2_fd_get_addr(&fds[i]));
 		percpu_stats->tx_errors++;
 		ch->stats.xdp_tx_err++;
 	}
@@ -382,7 +382,7 @@ static u32 dpaa2_eth_run_xdp(struct dpaa2_eth_priv *priv,
 		trace_xdp_exception(priv->net_dev, xdp_prog, xdp_act);
 		fallthrough;
 	case XDP_DROP:
-		dpaa2_eth_xdp_release_buf(priv, ch, addr);
+		dpaa2_eth_recycle_buf(priv, ch, addr);
 		ch->stats.xdp_drop++;
 		break;
 	case XDP_REDIRECT:
@@ -403,7 +403,7 @@ static u32 dpaa2_eth_run_xdp(struct dpaa2_eth_priv *priv,
 				free_pages((unsigned long)vaddr, 0);
 			} else {
 				ch->buf_count++;
-				dpaa2_eth_xdp_release_buf(priv, ch, addr);
+				dpaa2_eth_recycle_buf(priv, ch, addr);
 			}
 			ch->stats.xdp_drop++;
 		} else {
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index 9b6a89709ce1..9ba31c2706bb 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -438,8 +438,6 @@ struct dpaa2_eth_fq {
 
 struct dpaa2_eth_ch_xdp {
 	struct bpf_prog *prog;
-	u64 drop_bufs[DPAA2_ETH_BUFS_PER_CMD];
-	int drop_cnt;
 	unsigned int res;
 };
 
@@ -457,6 +455,10 @@ struct dpaa2_eth_channel {
 	struct dpaa2_eth_ch_xdp xdp;
 	struct xdp_rxq_info xdp_rxq;
 	struct list_head *rx_list;
+
+	/* Buffers to be recycled back in the buffer pool */
+	u64 recycled_bufs[DPAA2_ETH_BUFS_PER_CMD];
+	int recycled_bufs_cnt;
 };
 
 struct dpaa2_eth_dist_fields {
-- 
cgit v1.2.3


From 50f826999a80a100218b0cbf4f14057bc0edb3a3 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Fri, 2 Apr 2021 12:55:31 +0300
Subject: dpaa2-eth: add rx copybreak support

DMA unmapping, allocating a new buffer and DMA mapping it back on the
refill path is really not that efficient. Proper buffer recycling (page
pool, flipping the page and using the other half) cannot be done for
DPAA2 since it's not a ring based controller but it rather deals with
multiple queues which all get their buffers from the same buffer pool on
Rx.

To circumvent these limitations, add support for Rx copybreak. For small
sized packets instead of creating a skb around the buffer in which the
frame was received, allocate a new sk buffer altogether, copy the
contents of the frame and release the initial page back into the buffer
pool.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 37 ++++++++++++++++++++++--
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h |  2 ++
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index f545cb99388a..535b9079943c 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -418,6 +418,34 @@ out:
 	return xdp_act;
 }
 
+static struct sk_buff *dpaa2_eth_copybreak(struct dpaa2_eth_channel *ch,
+					   const struct dpaa2_fd *fd,
+					   void *fd_vaddr)
+{
+	u16 fd_offset = dpaa2_fd_get_offset(fd);
+	u32 fd_length = dpaa2_fd_get_len(fd);
+	struct sk_buff *skb = NULL;
+	unsigned int skb_len;
+
+	if (fd_length > DPAA2_ETH_DEFAULT_COPYBREAK)
+		return NULL;
+
+	skb_len = fd_length + dpaa2_eth_needed_headroom(NULL);
+
+	skb = napi_alloc_skb(&ch->napi, skb_len);
+	if (!skb)
+		return NULL;
+
+	skb_reserve(skb, dpaa2_eth_needed_headroom(NULL));
+	skb_put(skb, fd_length);
+
+	memcpy(skb->data, fd_vaddr + fd_offset, fd_length);
+
+	dpaa2_eth_recycle_buf(ch->priv, ch, dpaa2_fd_get_addr(fd));
+
+	return skb;
+}
+
 /* Main Rx frame processing routine */
 static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
 			 struct dpaa2_eth_channel *ch,
@@ -459,9 +487,12 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
 			return;
 		}
 
-		dma_unmap_page(dev, addr, priv->rx_buf_size,
-			       DMA_BIDIRECTIONAL);
-		skb = dpaa2_eth_build_linear_skb(ch, fd, vaddr);
+		skb = dpaa2_eth_copybreak(ch, fd, vaddr);
+		if (!skb) {
+			dma_unmap_page(dev, addr, priv->rx_buf_size,
+				       DMA_BIDIRECTIONAL);
+			skb = dpaa2_eth_build_linear_skb(ch, fd, vaddr);
+		}
 	} else if (fd_format == dpaa2_fd_sg) {
 		WARN_ON(priv->xdp_prog);
 
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index 9ba31c2706bb..f8d2b4769983 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -489,6 +489,8 @@ struct dpaa2_eth_trap_data {
 	struct dpaa2_eth_priv *priv;
 };
 
+#define DPAA2_ETH_DEFAULT_COPYBREAK	512
+
 /* Driver private data */
 struct dpaa2_eth_priv {
 	struct net_device *net_dev;
-- 
cgit v1.2.3


From 8ed3cefc260e2ef2107cbd9484e4025f60c37bb5 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Fri, 2 Apr 2021 12:55:32 +0300
Subject: dpaa2-eth: export the rx copybreak value as an ethtool tunable

It's useful, especially for debugging purposes, to have the Rx copybreak
value changeable at runtime. Export it as an ethtool tunable.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c   |  7 ++--
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h   |  2 ++
 .../net/ethernet/freescale/dpaa2/dpaa2-ethtool.c   | 40 ++++++++++++++++++++++
 3 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 535b9079943c..e0c3c58e2ac7 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -423,11 +423,12 @@ static struct sk_buff *dpaa2_eth_copybreak(struct dpaa2_eth_channel *ch,
 					   void *fd_vaddr)
 {
 	u16 fd_offset = dpaa2_fd_get_offset(fd);
+	struct dpaa2_eth_priv *priv = ch->priv;
 	u32 fd_length = dpaa2_fd_get_len(fd);
 	struct sk_buff *skb = NULL;
 	unsigned int skb_len;
 
-	if (fd_length > DPAA2_ETH_DEFAULT_COPYBREAK)
+	if (fd_length > priv->rx_copybreak)
 		return NULL;
 
 	skb_len = fd_length + dpaa2_eth_needed_headroom(NULL);
@@ -441,7 +442,7 @@ static struct sk_buff *dpaa2_eth_copybreak(struct dpaa2_eth_channel *ch,
 
 	memcpy(skb->data, fd_vaddr + fd_offset, fd_length);
 
-	dpaa2_eth_recycle_buf(ch->priv, ch, dpaa2_fd_get_addr(fd));
+	dpaa2_eth_recycle_buf(priv, ch, dpaa2_fd_get_addr(fd));
 
 	return skb;
 }
@@ -4333,6 +4334,8 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
 
 	skb_queue_head_init(&priv->tx_skbs);
 
+	priv->rx_copybreak = DPAA2_ETH_DEFAULT_COPYBREAK;
+
 	/* Obtain a MC portal */
 	err = fsl_mc_portal_allocate(dpni_dev, FSL_MC_IO_ATOMIC_CONTEXT_PORTAL,
 				     &priv->mc_io);
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index f8d2b4769983..cdb623d5f2c1 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -571,6 +571,8 @@ struct dpaa2_eth_priv {
 	struct devlink *devlink;
 	struct dpaa2_eth_trap_data *trap_data;
 	struct devlink_port devlink_port;
+
+	u32 rx_copybreak;
 };
 
 struct dpaa2_eth_devlink_priv {
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
index bf59708b869e..ad5e374eeccf 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
@@ -782,6 +782,44 @@ static int dpaa2_eth_get_ts_info(struct net_device *dev,
 	return 0;
 }
 
+static int dpaa2_eth_get_tunable(struct net_device *net_dev,
+				 const struct ethtool_tunable *tuna,
+				 void *data)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	int err = 0;
+
+	switch (tuna->id) {
+	case ETHTOOL_RX_COPYBREAK:
+		*(u32 *)data = priv->rx_copybreak;
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+	return err;
+}
+
+static int dpaa2_eth_set_tunable(struct net_device *net_dev,
+				 const struct ethtool_tunable *tuna,
+				 const void *data)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	int err = 0;
+
+	switch (tuna->id) {
+	case ETHTOOL_RX_COPYBREAK:
+		priv->rx_copybreak = *(u32 *)data;
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+	return err;
+}
+
 const struct ethtool_ops dpaa2_ethtool_ops = {
 	.get_drvinfo = dpaa2_eth_get_drvinfo,
 	.nway_reset = dpaa2_eth_nway_reset,
@@ -796,4 +834,6 @@ const struct ethtool_ops dpaa2_ethtool_ops = {
 	.get_rxnfc = dpaa2_eth_get_rxnfc,
 	.set_rxnfc = dpaa2_eth_set_rxnfc,
 	.get_ts_info = dpaa2_eth_get_ts_info,
+	.get_tunable = dpaa2_eth_get_tunable,
+	.set_tunable = dpaa2_eth_set_tunable,
 };
-- 
cgit v1.2.3


From ca4d4c34ae9aa5c3c0da76662c5e549d2fc0cc86 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 2 Apr 2021 14:44:42 +0300
Subject: nfc: pn533: prevent potential memory corruption

If the "type_a->nfcid_len" is too large then it would lead to memory
corruption in pn533_target_found_type_a() when we do:

	memcpy(nfc_tgt->nfcid1, tgt_type_a->nfcid_data, nfc_tgt->nfcid1_len);

Fixes: c3b1e1e8a76f ("NFC: Export NFCID1 from pn533")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nfc/pn533/pn533.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/nfc/pn533/pn533.c b/drivers/nfc/pn533/pn533.c
index f1469ac8ff42..3fe5b81eda2d 100644
--- a/drivers/nfc/pn533/pn533.c
+++ b/drivers/nfc/pn533/pn533.c
@@ -706,6 +706,9 @@ static bool pn533_target_type_a_is_valid(struct pn533_target_type_a *type_a,
 	if (PN533_TYPE_A_SEL_CASCADE(type_a->sel_res) != 0)
 		return false;
 
+	if (type_a->nfcid_len > NFC_NFCID1_MAXSIZE)
+		return false;
+
 	return true;
 }
 
-- 
cgit v1.2.3


From 7f040aa322617acf1978b8de140f1bf9ff916617 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 2 Apr 2021 11:07:46 -0700
Subject: net: reorganize fields in netns_mib

Order fields to increase locality for most used protocols.

udplite and icmp are moved at the end.

Same for proc_net_devsnmp6 which is not used in fast path.

This potentially saves one cache line miss for typical TCP/UDP over IPv4/IPv6.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/mib.h | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h
index 59b2c3a3db42..7e373664b1e7 100644
--- a/include/net/netns/mib.h
+++ b/include/net/netns/mib.h
@@ -5,22 +5,19 @@
 #include <net/snmp.h>
 
 struct netns_mib {
-	DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics);
 	DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics);
+#if IS_ENABLED(CONFIG_IPV6)
+	DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics);
+#endif
+
+	DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics);
 	DEFINE_SNMP_STAT(struct linux_mib, net_statistics);
-	DEFINE_SNMP_STAT(struct udp_mib, udp_statistics);
-	DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics);
-	DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics);
-	DEFINE_SNMP_STAT_ATOMIC(struct icmpmsg_mib, icmpmsg_statistics);
 
+	DEFINE_SNMP_STAT(struct udp_mib, udp_statistics);
 #if IS_ENABLED(CONFIG_IPV6)
-	struct proc_dir_entry *proc_net_devsnmp6;
 	DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6);
-	DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6);
-	DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics);
-	DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
-	DEFINE_SNMP_STAT_ATOMIC(struct icmpv6msg_mib, icmpv6msg_statistics);
 #endif
+
 #ifdef CONFIG_XFRM_STATISTICS
 	DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics);
 #endif
@@ -30,6 +27,19 @@ struct netns_mib {
 #ifdef CONFIG_MPTCP
 	DEFINE_SNMP_STAT(struct mptcp_mib, mptcp_statistics);
 #endif
+
+	DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics);
+#if IS_ENABLED(CONFIG_IPV6)
+	DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6);
+#endif
+
+	DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics);
+	DEFINE_SNMP_STAT_ATOMIC(struct icmpmsg_mib, icmpmsg_statistics);
+#if IS_ENABLED(CONFIG_IPV6)
+	DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
+	DEFINE_SNMP_STAT_ATOMIC(struct icmpv6msg_mib, icmpv6msg_statistics);
+	struct proc_dir_entry *proc_net_devsnmp6;
+#endif
 };
 
 #endif
-- 
cgit v1.2.3


From 82506665179209e43d3c9d39ffa42f8c8ff968bd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 2 Apr 2021 11:10:37 -0700
Subject: tcp: reorder tcp_congestion_ops for better cache locality

Group all the often used fields in the first cache line,
to reduce cache line misses.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 42 +++++++++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 31b1696c62ba..eaea43afcc97 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1035,44 +1035,56 @@ struct rate_sample {
 };
 
 struct tcp_congestion_ops {
-	struct list_head	list;
-	u32 key;
-	u32 flags;
-
-	/* initialize private data (optional) */
-	void (*init)(struct sock *sk);
-	/* cleanup private data  (optional) */
-	void (*release)(struct sock *sk);
+/* fast path fields are put first to fill one cache line */
 
 	/* return slow start threshold (required) */
 	u32 (*ssthresh)(struct sock *sk);
+
 	/* do new cwnd calculation (required) */
 	void (*cong_avoid)(struct sock *sk, u32 ack, u32 acked);
+
 	/* call before changing ca_state (optional) */
 	void (*set_state)(struct sock *sk, u8 new_state);
+
 	/* call when cwnd event occurs (optional) */
 	void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
+
 	/* call when ack arrives (optional) */
 	void (*in_ack_event)(struct sock *sk, u32 flags);
-	/* new value of cwnd after loss (required) */
-	u32  (*undo_cwnd)(struct sock *sk);
+
 	/* hook for packet ack accounting (optional) */
 	void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
+
 	/* override sysctl_tcp_min_tso_segs */
 	u32 (*min_tso_segs)(struct sock *sk);
-	/* returns the multiplier used in tcp_sndbuf_expand (optional) */
-	u32 (*sndbuf_expand)(struct sock *sk);
+
 	/* call when packets are delivered to update cwnd and pacing rate,
 	 * after all the ca_state processing. (optional)
 	 */
 	void (*cong_control)(struct sock *sk, const struct rate_sample *rs);
+
+
+	/* new value of cwnd after loss (required) */
+	u32  (*undo_cwnd)(struct sock *sk);
+	/* returns the multiplier used in tcp_sndbuf_expand (optional) */
+	u32 (*sndbuf_expand)(struct sock *sk);
+
+/* control/slow paths put last */
 	/* get info for inet_diag (optional) */
 	size_t (*get_info)(struct sock *sk, u32 ext, int *attr,
 			   union tcp_cc_info *info);
 
-	char 		name[TCP_CA_NAME_MAX];
-	struct module 	*owner;
-};
+	char 			name[TCP_CA_NAME_MAX];
+	struct module		*owner;
+	struct list_head	list;
+	u32			key;
+	u32			flags;
+
+	/* initialize private data (optional) */
+	void (*init)(struct sock *sk);
+	/* cleanup private data  (optional) */
+	void (*release)(struct sock *sk);
+} ____cacheline_aligned_in_smp;
 
 int tcp_register_congestion_control(struct tcp_congestion_ops *type);
 void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
-- 
cgit v1.2.3


From 116c76c51035ec49af0894e8e8ad71334f9ad7aa Mon Sep 17 00:00:00 2001
From: Ariel Levkovich <lariel@nvidia.com>
Date: Mon, 15 Mar 2021 19:59:52 +0200
Subject: net/mlx5: CT: Add support for matching on ct_state inv and rel flags

Add support for matching on ct_state inv and rel flags.

Currently the support is only for match on -inv and -rel.
Matching on +inv and +rel will be rejected.

Example:
$ tc filter add dev ens1f0_0 ingress prio 1 chain 1 proto ip flower \
  ct_state -est-rel+trk \
  action mirred egress redirect dev ens1f0_1
$ tc filter add dev ens1f0_1 ingress prio 1 chain 1 proto ip flower \
  ct_state +trk+est-inv \
  action mirred egress redirect dev ens1f0_0

Signed-off-by: Ariel Levkovich <lariel@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 26 ++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index df13e5094034..1c44000ad675 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -29,6 +29,8 @@
 #define MLX5_CT_STATE_TRK_BIT BIT(2)
 #define MLX5_CT_STATE_NAT_BIT BIT(3)
 #define MLX5_CT_STATE_REPLY_BIT BIT(4)
+#define MLX5_CT_STATE_RELATED_BIT BIT(5)
+#define MLX5_CT_STATE_INVALID_BIT BIT(6)
 
 #define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8)
 #define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0)
@@ -1207,8 +1209,8 @@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
 		     struct mlx5_ct_attr *ct_attr,
 		     struct netlink_ext_ack *extack)
 {
+	bool trk, est, untrk, unest, new, rpl, unrpl, rel, unrel, inv, uninv;
 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
-	bool trk, est, untrk, unest, new, rpl, unrpl;
 	struct flow_dissector_key_ct *mask, *key;
 	u32 ctstate = 0, ctstate_mask = 0;
 	u16 ct_state_on, ct_state_off;
@@ -1236,7 +1238,9 @@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
 	if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
 			      TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
 			      TCA_FLOWER_KEY_CT_FLAGS_NEW |
-			      TCA_FLOWER_KEY_CT_FLAGS_REPLY)) {
+			      TCA_FLOWER_KEY_CT_FLAGS_REPLY |
+			      TCA_FLOWER_KEY_CT_FLAGS_RELATED |
+			      TCA_FLOWER_KEY_CT_FLAGS_INVALID)) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "only ct_state trk, est, new and rpl are supported for offload");
 		return -EOPNOTSUPP;
@@ -1248,9 +1252,13 @@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
 	new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
 	est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
 	rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
+	rel = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
+	inv = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
 	untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
 	unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
 	unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
+	unrel = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
+	uninv = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
 
 	ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
 	ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
@@ -1258,6 +1266,20 @@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
 	ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
 	ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
 	ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0;
+	ctstate_mask |= unrel ? MLX5_CT_STATE_RELATED_BIT : 0;
+	ctstate_mask |= uninv ? MLX5_CT_STATE_INVALID_BIT : 0;
+
+	if (rel) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "matching on ct_state +rel isn't supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (inv) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "matching on ct_state +inv isn't supported");
+		return -EOPNOTSUPP;
+	}
 
 	if (new) {
 		NL_SET_ERR_MSG_MOD(extack,
-- 
cgit v1.2.3


From cadb129ffdfecd2aada3b590cd3132060e665e33 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Wed, 3 Feb 2021 06:59:32 +0200
Subject: net/mlx5: E-Switch, cut down mlx5_vport_info structure size by 8
 bytes

Structure mlx5_vport_info consumes 40 bytes of space due to a hole
in it. After packing it reduces to 32 bytes.

Currently:
pahole -C mlx5_vport_info drivers/net/ethernet/mellanox/mlx5/core/eswitch.o
struct mlx5_vport_info {
        u8                         mac[6];               /*     0     6 */
        u16                        vlan;                 /*     6     2 */
        u8                         qos;                  /*     8     1 */

        /* XXX 7 bytes hole, try to pack */

        u64                        node_guid;            /*    16     8 */
        int                        link_state;           /*    24     4 */
        u32                        min_rate;             /*    28     4 */
        u32                        max_rate;             /*    32     4 */
        bool                       spoofchk;             /*    36     1 */
        bool                       trusted;              /*    37     1 */

        /* size: 40, cachelines: 1, members: 9 */
        /* sum members: 31, holes: 1, sum holes: 7 */
        /* padding: 2 */
        /* last cacheline: 40 bytes */
};

After packing:

$ pahole -C mlx5_vport_info drivers/net/ethernet/mellanox/mlx5/core/eswitch.o

struct mlx5_vport_info {
        u8                         mac[6];               /*     0     6 */
        u16                        vlan;                 /*     6     2 */
        u64                        node_guid;            /*     8     8 */
        int                        link_state;           /*    16     4 */
        u32                        min_rate;             /*    20     4 */
        u32                        max_rate;             /*    24     4 */
        u8                         qos;                  /*    28     1 */
        u8                         spoofchk:1;           /*    29: 0  1 */
        u8                         trusted:1;            /*    29: 1  1 */

        /* size: 32, cachelines: 1, members: 9 */
        /* padding: 2 */
        /* bit_padding: 6 bits */
        /* last cacheline: 32 bytes */
};

Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 56d85cedb9bd..0adffab20244 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -118,13 +118,13 @@ struct mlx5_vport_drop_stats {
 struct mlx5_vport_info {
 	u8                      mac[ETH_ALEN];
 	u16                     vlan;
-	u8                      qos;
 	u64                     node_guid;
 	int                     link_state;
 	u32                     min_rate;
 	u32                     max_rate;
-	bool                    spoofchk;
-	bool                    trusted;
+	u8                      qos;
+	u8                      spoofchk: 1;
+	u8                      trusted: 1;
 };
 
 /* Vport context events */
-- 
cgit v1.2.3


From e591605f801ecfa2202b2682bf7af96e1d3129df Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Wed, 3 Feb 2021 15:32:50 +0200
Subject: net/mlx5: E-Switch, move QoS specific fields to existing qos struct

Function QoS related fields are already defined in qos related struct.
min and max rate are left out to mlx5_vport_info struct.

Move them to existing qos struct.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 26 +++++++++++------------
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h |  4 ++--
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 6b260dacf853..6cf04a366f99 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1235,7 +1235,7 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
 		return err;
 
 	/* Attach vport to the eswitch rate limiter */
-	esw_vport_enable_qos(esw, vport, vport->info.max_rate, vport->qos.bw_share);
+	esw_vport_enable_qos(esw, vport, vport->qos.max_rate, vport->qos.bw_share);
 
 	if (mlx5_esw_is_manager_vport(esw, vport_num))
 		return 0;
@@ -2078,8 +2078,8 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
 	ivi->qos = evport->info.qos;
 	ivi->spoofchk = evport->info.spoofchk;
 	ivi->trusted = evport->info.trusted;
-	ivi->min_tx_rate = evport->info.min_rate;
-	ivi->max_tx_rate = evport->info.max_rate;
+	ivi->min_tx_rate = evport->qos.min_rate;
+	ivi->max_tx_rate = evport->qos.max_rate;
 	mutex_unlock(&esw->state_lock);
 
 	return 0;
@@ -2319,9 +2319,9 @@ static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw)
 	int i;
 
 	mlx5_esw_for_all_vports(esw, i, evport) {
-		if (!evport->enabled || evport->info.min_rate < max_guarantee)
+		if (!evport->enabled || evport->qos.min_rate < max_guarantee)
 			continue;
-		max_guarantee = evport->info.min_rate;
+		max_guarantee = evport->qos.min_rate;
 	}
 
 	if (max_guarantee)
@@ -2343,8 +2343,8 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw)
 	mlx5_esw_for_all_vports(esw, i, evport) {
 		if (!evport->enabled)
 			continue;
-		vport_min_rate = evport->info.min_rate;
-		vport_max_rate = evport->info.max_rate;
+		vport_min_rate = evport->qos.min_rate;
+		vport_max_rate = evport->qos.max_rate;
 		bw_share = 0;
 
 		if (divider)
@@ -2391,24 +2391,24 @@ int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
 
 	mutex_lock(&esw->state_lock);
 
-	if (min_rate == evport->info.min_rate)
+	if (min_rate == evport->qos.min_rate)
 		goto set_max_rate;
 
-	previous_min_rate = evport->info.min_rate;
-	evport->info.min_rate = min_rate;
+	previous_min_rate = evport->qos.min_rate;
+	evport->qos.min_rate = min_rate;
 	err = normalize_vports_min_rate(esw);
 	if (err) {
-		evport->info.min_rate = previous_min_rate;
+		evport->qos.min_rate = previous_min_rate;
 		goto unlock;
 	}
 
 set_max_rate:
-	if (max_rate == evport->info.max_rate)
+	if (max_rate == evport->qos.max_rate)
 		goto unlock;
 
 	err = esw_vport_qos_config(esw, evport, max_rate, evport->qos.bw_share);
 	if (!err)
-		evport->info.max_rate = max_rate;
+		evport->qos.max_rate = max_rate;
 
 unlock:
 	mutex_unlock(&esw->state_lock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 0adffab20244..64db903068c1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -120,8 +120,6 @@ struct mlx5_vport_info {
 	u16                     vlan;
 	u64                     node_guid;
 	int                     link_state;
-	u32                     min_rate;
-	u32                     max_rate;
 	u8                      qos;
 	u8                      spoofchk: 1;
 	u8                      trusted: 1;
@@ -154,6 +152,8 @@ struct mlx5_vport {
 		bool            enabled;
 		u32             esw_tsar_ix;
 		u32             bw_share;
+		u32 min_rate;
+		u32 max_rate;
 	} qos;
 
 	bool                    enabled;
-- 
cgit v1.2.3


From c6baac47d9e64d42f98ef745ed1f2925efc78440 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Tue, 9 Feb 2021 09:44:27 +0200
Subject: net/mlx5: Use unsigned int for free_count

Fix the warning due to missing int.

WARNING: Prefer 'unsigned int' to bare use of 'unsigned'
+       unsigned free_count;

Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index c0656d4782e1..110c0837f95b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -61,7 +61,7 @@ struct fw_page {
 	u32			function;
 	unsigned long		bitmask;
 	struct list_head	list;
-	unsigned		free_count;
+	unsigned int free_count;
 };
 
 enum {
-- 
cgit v1.2.3


From 4c4c0a89abd5c08e91df9bcce4ebcb3433bbb9bf Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Fri, 19 Feb 2021 08:18:12 +0200
Subject: net/mlx5: Pack mlx5_rl_entry structure

mlx5_rl_entry structure is not properly packed as shown below. Due to this
an array of size 9144 bytes allocated which is aligned to 16Kbytes.
Hence, pack the structure and avoid the wastage.

This offers 8Kbytes of saving per mlx5_core_dev struct.

pahole -C mlx5_rl_entry  drivers/net/ethernet/mellanox/mlx5/core/en_main.o

Existing layout:

struct mlx5_rl_entry {
        u8                         rl_raw[48];           /*     0    48 */
        u16                        index;                /*    48     2 */

        /* XXX 6 bytes hole, try to pack */

        u64                        refcount;             /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        u16                        uid;                  /*    64     2 */
        u8                         dedicated:1;          /*    66: 0  1 */

        /* size: 72, cachelines: 2, members: 5 */
        /* sum members: 60, holes: 1, sum holes: 6 */
        /* sum bitfield members: 1 bits (0 bytes) */
        /* padding: 5 */
        /* bit_padding: 7 bits */
        /* last cacheline: 8 bytes */
};

After alignment:

struct mlx5_rl_entry {
        u8                         rl_raw[48];           /*     0    48 */
        u64                        refcount;             /*    48     8 */
        u16                        index;                /*    56     2 */
        u16                        uid;                  /*    58     2 */
        u8                         dedicated:1;          /*    60: 0  1 */

        /* size: 64, cachelines: 1, members: 5 */
        /* padding: 3 */
        /* bit_padding: 7 bits */
};

Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 23bb01d7c9b9..a9bd7e3bd554 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -517,8 +517,8 @@ struct mlx5_rate_limit {
 
 struct mlx5_rl_entry {
 	u8 rl_raw[MLX5_ST_SZ_BYTES(set_pp_rate_limit_context)];
-	u16 index;
 	u64 refcount;
+	u16 index;
 	u16 uid;
 	u8 dedicated : 1;
 };
-- 
cgit v1.2.3


From 16e74672a21b95ebc6fb42420d9a8b91b7e947b0 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Wed, 24 Feb 2021 11:04:19 +0200
Subject: net/mlx5: Do not hold mutex while reading table constants

Table max_size, min and max rate are constants initialized while table
is created. Reading it doesn't need to hold a table mutex. Hence, read
them without holding table mutex.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/rl.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
index 99039c47ef33..2accc0f324f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
@@ -180,16 +180,18 @@ int mlx5_rl_add_rate_raw(struct mlx5_core_dev *dev, void *rl_in, u16 uid,
 	int err = 0;
 	u32 rate;
 
-	rate = MLX5_GET(set_pp_rate_limit_context, rl_in, rate_limit);
-	mutex_lock(&table->rl_lock);
+	if (!table->max_size)
+		return -EOPNOTSUPP;
 
+	rate = MLX5_GET(set_pp_rate_limit_context, rl_in, rate_limit);
 	if (!rate || !mlx5_rl_is_in_range(dev, rate)) {
 		mlx5_core_err(dev, "Invalid rate: %u, should be %u to %u\n",
 			      rate, table->min_rate, table->max_rate);
-		err = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
 
+	mutex_lock(&table->rl_lock);
+
 	entry = find_rl_entry(table, rl_in, uid, dedicated_entry);
 	if (!entry) {
 		mlx5_core_err(dev, "Max number of %u rates reached\n",
-- 
cgit v1.2.3


From 51ccc9f5f106caf293230e10dcd494a4e33cc68e Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Fri, 19 Feb 2021 09:29:56 +0200
Subject: net/mlx5: Use helpers to allocate and free rl table entries

User helper routines to allocate and free rate limit table entries.
Subsequent patch extends use of these helpers to do allocation
during rate entry allocation callback.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/rl.c | 55 ++++++++++++++++++----------
 1 file changed, 36 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
index 2accc0f324f3..208fd3cad970 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
@@ -172,6 +172,35 @@ bool mlx5_rl_are_equal(struct mlx5_rate_limit *rl_0,
 }
 EXPORT_SYMBOL(mlx5_rl_are_equal);
 
+static int mlx5_rl_table_alloc(struct mlx5_rl_table *table)
+{
+	int i;
+
+	table->rl_entry = kcalloc(table->max_size, sizeof(struct mlx5_rl_entry),
+				  GFP_KERNEL);
+	if (!table->rl_entry)
+		return -ENOMEM;
+
+	/* The index represents the index in HW rate limit table
+	 * Index 0 is reserved for unlimited rate
+	 */
+	for (i = 0; i < table->max_size; i++)
+		table->rl_entry[i].index = i + 1;
+
+	return 0;
+}
+
+static void mlx5_rl_table_free(struct mlx5_core_dev *dev, struct mlx5_rl_table *table)
+{
+	int i;
+
+	/* Clear all configured rates */
+	for (i = 0; i < table->max_size; i++)
+		if (table->rl_entry[i].refcount)
+			mlx5_set_pp_rate_limit_cmd(dev, &table->rl_entry[i], false);
+	kfree(table->rl_entry);
+}
+
 int mlx5_rl_add_rate_raw(struct mlx5_core_dev *dev, void *rl_in, u16 uid,
 			 bool dedicated_entry, u16 *index)
 {
@@ -302,7 +331,7 @@ EXPORT_SYMBOL(mlx5_rl_remove_rate);
 int mlx5_init_rl_table(struct mlx5_core_dev *dev)
 {
 	struct mlx5_rl_table *table = &dev->priv.rl_table;
-	int i;
+	int err;
 
 	mutex_init(&table->rl_lock);
 	if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, packet_pacing)) {
@@ -315,18 +344,10 @@ int mlx5_init_rl_table(struct mlx5_core_dev *dev)
 	table->max_rate = MLX5_CAP_QOS(dev, packet_pacing_max_rate);
 	table->min_rate = MLX5_CAP_QOS(dev, packet_pacing_min_rate);
 
-	table->rl_entry = kcalloc(table->max_size, sizeof(struct mlx5_rl_entry),
-				  GFP_KERNEL);
-	if (!table->rl_entry)
-		return -ENOMEM;
-
-	/* The index represents the index in HW rate limit table
-	 * Index 0 is reserved for unlimited rate
-	 */
-	for (i = 0; i < table->max_size; i++)
-		table->rl_entry[i].index = i + 1;
+	err = mlx5_rl_table_alloc(table);
+	if (err)
+		return err;
 
-	/* Index 0 is reserved */
 	mlx5_core_info(dev, "Rate limit: %u rates are supported, range: %uMbps to %uMbps\n",
 		       table->max_size,
 		       table->min_rate >> 10,
@@ -338,13 +359,9 @@ int mlx5_init_rl_table(struct mlx5_core_dev *dev)
 void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev)
 {
 	struct mlx5_rl_table *table = &dev->priv.rl_table;
-	int i;
 
-	/* Clear all configured rates */
-	for (i = 0; i < table->max_size; i++)
-		if (table->rl_entry[i].refcount)
-			mlx5_set_pp_rate_limit_cmd(dev, &table->rl_entry[i],
-						   false);
+	if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, packet_pacing))
+		return;
 
-	kfree(dev->priv.rl_table.rl_entry);
+	mlx5_rl_table_free(dev, table);
 }
-- 
cgit v1.2.3


From 97d85aba2543af790aaf7f4eb1c9f643a1718519 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Thu, 25 Feb 2021 13:38:07 +0200
Subject: net/mlx5: Use helper to increment, decrement rate entry refcount

Rate limit entry refcount can be incremented uniformly when it is newly
allocated or reused.
So simplify the code to increment refcount at one place.

Use decrement refcount helper in two routines.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/rl.c | 34 +++++++++++++++-------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
index 208fd3cad970..08792fe701e3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
@@ -201,6 +201,19 @@ static void mlx5_rl_table_free(struct mlx5_core_dev *dev, struct mlx5_rl_table *
 	kfree(table->rl_entry);
 }
 
+static void mlx5_rl_entry_get(struct mlx5_rl_entry *entry)
+{
+	entry->refcount++;
+}
+
+static void
+mlx5_rl_entry_put(struct mlx5_core_dev *dev, struct mlx5_rl_entry *entry)
+{
+	entry->refcount--;
+	if (!entry->refcount)
+		mlx5_set_pp_rate_limit_cmd(dev, entry, false);
+}
+
 int mlx5_rl_add_rate_raw(struct mlx5_core_dev *dev, void *rl_in, u16 uid,
 			 bool dedicated_entry, u16 *index)
 {
@@ -228,13 +241,10 @@ int mlx5_rl_add_rate_raw(struct mlx5_core_dev *dev, void *rl_in, u16 uid,
 		err = -ENOSPC;
 		goto out;
 	}
-	if (entry->refcount) {
-		/* rate already configured */
-		entry->refcount++;
-	} else {
+	if (!entry->refcount) {
+		/* new rate limit */
 		memcpy(entry->rl_raw, rl_in, sizeof(entry->rl_raw));
 		entry->uid = uid;
-		/* new rate limit */
 		err = mlx5_set_pp_rate_limit_cmd(dev, entry, true);
 		if (err) {
 			mlx5_core_err(
@@ -248,9 +258,9 @@ int mlx5_rl_add_rate_raw(struct mlx5_core_dev *dev, void *rl_in, u16 uid,
 			goto out;
 		}
 
-		entry->refcount = 1;
 		entry->dedicated = dedicated_entry;
 	}
+	mlx5_rl_entry_get(entry);
 	*index = entry->index;
 
 out:
@@ -266,10 +276,7 @@ void mlx5_rl_remove_rate_raw(struct mlx5_core_dev *dev, u16 index)
 
 	mutex_lock(&table->rl_lock);
 	entry = &table->rl_entry[index - 1];
-	entry->refcount--;
-	if (!entry->refcount)
-		/* need to remove rate */
-		mlx5_set_pp_rate_limit_cmd(dev, entry, false);
+	mlx5_rl_entry_put(dev, entry);
 	mutex_unlock(&table->rl_lock);
 }
 EXPORT_SYMBOL(mlx5_rl_remove_rate_raw);
@@ -317,12 +324,7 @@ void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, struct mlx5_rate_limit *rl)
 			       rl->rate, rl->max_burst_sz, rl->typical_pkt_sz);
 		goto out;
 	}
-
-	entry->refcount--;
-	if (!entry->refcount)
-		/* need to remove rate */
-		mlx5_set_pp_rate_limit_cmd(dev, entry, false);
-
+	mlx5_rl_entry_put(dev, entry);
 out:
 	mutex_unlock(&table->rl_lock);
 }
-- 
cgit v1.2.3


From 6b30b6d4d36c978e0ab0f22e85bf3c646732e98b Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Fri, 19 Feb 2021 12:06:54 +0200
Subject: net/mlx5: Allocate rate limit table when rate is configured

A device supports 128 rate limiters. A static table allocation consumes
8KB of memory even when rate is not configured.

Instead, allocate the table when at least one rate is configured.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/rl.c | 46 ++++++++++++++++++++++------
 include/linux/mlx5/driver.h                  |  1 +
 2 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
index 08792fe701e3..0526e3798c09 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
@@ -117,6 +117,9 @@ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table,
 	bool empty_found = false;
 	int i;
 
+	lockdep_assert_held(&table->rl_lock);
+	WARN_ON(!table->rl_entry);
+
 	for (i = 0; i < table->max_size; i++) {
 		if (dedicated) {
 			if (!table->rl_entry[i].refcount)
@@ -172,10 +175,17 @@ bool mlx5_rl_are_equal(struct mlx5_rate_limit *rl_0,
 }
 EXPORT_SYMBOL(mlx5_rl_are_equal);
 
-static int mlx5_rl_table_alloc(struct mlx5_rl_table *table)
+static int mlx5_rl_table_get(struct mlx5_rl_table *table)
 {
 	int i;
 
+	lockdep_assert_held(&table->rl_lock);
+
+	if (table->rl_entry) {
+		table->refcount++;
+		return 0;
+	}
+
 	table->rl_entry = kcalloc(table->max_size, sizeof(struct mlx5_rl_entry),
 				  GFP_KERNEL);
 	if (!table->rl_entry)
@@ -187,13 +197,27 @@ static int mlx5_rl_table_alloc(struct mlx5_rl_table *table)
 	for (i = 0; i < table->max_size; i++)
 		table->rl_entry[i].index = i + 1;
 
+	table->refcount++;
 	return 0;
 }
 
+static void mlx5_rl_table_put(struct mlx5_rl_table *table)
+{
+	lockdep_assert_held(&table->rl_lock);
+	if (--table->refcount)
+		return;
+
+	kfree(table->rl_entry);
+	table->rl_entry = NULL;
+}
+
 static void mlx5_rl_table_free(struct mlx5_core_dev *dev, struct mlx5_rl_table *table)
 {
 	int i;
 
+	if (!table->rl_entry)
+		return;
+
 	/* Clear all configured rates */
 	for (i = 0; i < table->max_size; i++)
 		if (table->rl_entry[i].refcount)
@@ -219,8 +243,8 @@ int mlx5_rl_add_rate_raw(struct mlx5_core_dev *dev, void *rl_in, u16 uid,
 {
 	struct mlx5_rl_table *table = &dev->priv.rl_table;
 	struct mlx5_rl_entry *entry;
-	int err = 0;
 	u32 rate;
+	int err;
 
 	if (!table->max_size)
 		return -EOPNOTSUPP;
@@ -233,13 +257,16 @@ int mlx5_rl_add_rate_raw(struct mlx5_core_dev *dev, void *rl_in, u16 uid,
 	}
 
 	mutex_lock(&table->rl_lock);
+	err = mlx5_rl_table_get(table);
+	if (err)
+		goto out;
 
 	entry = find_rl_entry(table, rl_in, uid, dedicated_entry);
 	if (!entry) {
 		mlx5_core_err(dev, "Max number of %u rates reached\n",
 			      table->max_size);
 		err = -ENOSPC;
-		goto out;
+		goto rl_err;
 	}
 	if (!entry->refcount) {
 		/* new rate limit */
@@ -255,14 +282,18 @@ int mlx5_rl_add_rate_raw(struct mlx5_core_dev *dev, void *rl_in, u16 uid,
 					 burst_upper_bound),
 				MLX5_GET(set_pp_rate_limit_context, rl_in,
 					 typical_packet_size));
-			goto out;
+			goto rl_err;
 		}
 
 		entry->dedicated = dedicated_entry;
 	}
 	mlx5_rl_entry_get(entry);
 	*index = entry->index;
+	mutex_unlock(&table->rl_lock);
+	return 0;
 
+rl_err:
+	mlx5_rl_table_put(table);
 out:
 	mutex_unlock(&table->rl_lock);
 	return err;
@@ -277,6 +308,7 @@ void mlx5_rl_remove_rate_raw(struct mlx5_core_dev *dev, u16 index)
 	mutex_lock(&table->rl_lock);
 	entry = &table->rl_entry[index - 1];
 	mlx5_rl_entry_put(dev, entry);
+	mlx5_rl_table_put(table);
 	mutex_unlock(&table->rl_lock);
 }
 EXPORT_SYMBOL(mlx5_rl_remove_rate_raw);
@@ -325,6 +357,7 @@ void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, struct mlx5_rate_limit *rl)
 		goto out;
 	}
 	mlx5_rl_entry_put(dev, entry);
+	mlx5_rl_table_put(table);
 out:
 	mutex_unlock(&table->rl_lock);
 }
@@ -333,7 +366,6 @@ EXPORT_SYMBOL(mlx5_rl_remove_rate);
 int mlx5_init_rl_table(struct mlx5_core_dev *dev)
 {
 	struct mlx5_rl_table *table = &dev->priv.rl_table;
-	int err;
 
 	mutex_init(&table->rl_lock);
 	if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, packet_pacing)) {
@@ -346,10 +378,6 @@ int mlx5_init_rl_table(struct mlx5_core_dev *dev)
 	table->max_rate = MLX5_CAP_QOS(dev, packet_pacing_max_rate);
 	table->min_rate = MLX5_CAP_QOS(dev, packet_pacing_min_rate);
 
-	err = mlx5_rl_table_alloc(table);
-	if (err)
-		return err;
-
 	mlx5_core_info(dev, "Rate limit: %u rates are supported, range: %uMbps to %uMbps\n",
 		       table->max_size,
 		       table->min_rate >> 10,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index a9bd7e3bd554..baf38b5a2a8c 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -530,6 +530,7 @@ struct mlx5_rl_table {
 	u32                     max_rate;
 	u32                     min_rate;
 	struct mlx5_rl_entry   *rl_entry;
+	u64 refcount;
 };
 
 struct mlx5_core_roce {
-- 
cgit v1.2.3


From 19779f28c96dfcc748fa079a2ffdec624feee5cd Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Fri, 19 Feb 2021 09:36:54 +0200
Subject: net/mlx5: Pair mutex_destory with mutex_init for rate limit table

Add missing mutex_destroy() to pair with mutex_init().

This should be done only when table is initialized, hence perform
mutex_init() only when table is initialized.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/rl.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
index 0526e3798c09..7161220afe30 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
@@ -367,12 +367,13 @@ int mlx5_init_rl_table(struct mlx5_core_dev *dev)
 {
 	struct mlx5_rl_table *table = &dev->priv.rl_table;
 
-	mutex_init(&table->rl_lock);
 	if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, packet_pacing)) {
 		table->max_size = 0;
 		return 0;
 	}
 
+	mutex_init(&table->rl_lock);
+
 	/* First entry is reserved for unlimited rate */
 	table->max_size = MLX5_CAP_QOS(dev, packet_pacing_rate_table_size) - 1;
 	table->max_rate = MLX5_CAP_QOS(dev, packet_pacing_max_rate);
@@ -394,4 +395,5 @@ void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev)
 		return;
 
 	mlx5_rl_table_free(dev, table);
+	mutex_destroy(&table->rl_lock);
 }
-- 
cgit v1.2.3


From b47e1056257cdeae9cbe55926c13a98519f950b1 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Wed, 3 Feb 2021 06:59:32 +0200
Subject: net/mlx5: E-Switch, cut down mlx5_vport_info structure size by 8
 bytes

Structure mlx5_vport_info consumes 40 bytes of space due to a hole
in it. After packing it reduces to 32 bytes.

Currently:
pahole -C mlx5_vport_info drivers/net/ethernet/mellanox/mlx5/core/eswitch.o
struct mlx5_vport_info {
        u8                         mac[6];               /*     0     6 */
        u16                        vlan;                 /*     6     2 */
        u8                         qos;                  /*     8     1 */

        /* XXX 7 bytes hole, try to pack */

        u64                        node_guid;            /*    16     8 */
        int                        link_state;           /*    24     4 */
        u32                        min_rate;             /*    28     4 */
        u32                        max_rate;             /*    32     4 */
        bool                       spoofchk;             /*    36     1 */
        bool                       trusted;              /*    37     1 */

        /* size: 40, cachelines: 1, members: 9 */
        /* sum members: 31, holes: 1, sum holes: 7 */
        /* padding: 2 */
        /* last cacheline: 40 bytes */
};

After packing:

$ pahole -C mlx5_vport_info drivers/net/ethernet/mellanox/mlx5/core/eswitch.o

struct mlx5_vport_info {
        u8                         mac[6];               /*     0     6 */
        u16                        vlan;                 /*     6     2 */
        u64                        node_guid;            /*     8     8 */
        int                        link_state;           /*    16     4 */
        u32                        min_rate;             /*    20     4 */
        u32                        max_rate;             /*    24     4 */
        u8                         qos;                  /*    28     1 */
        u8                         spoofchk:1;           /*    29: 0  1 */
        u8                         trusted:1;            /*    29: 1  1 */

        /* size: 32, cachelines: 1, members: 9 */
        /* padding: 2 */
        /* bit_padding: 6 bits */
        /* last cacheline: 32 bytes */
};

Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 64db903068c1..5d50b127f81a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -120,6 +120,8 @@ struct mlx5_vport_info {
 	u16                     vlan;
 	u64                     node_guid;
 	int                     link_state;
+	u32                     min_rate;
+	u32                     max_rate;
 	u8                      qos;
 	u8                      spoofchk: 1;
 	u8                      trusted: 1;
-- 
cgit v1.2.3


From 233dd7d6565e5f4a691e33306eda2d047e98a4bb Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Wed, 3 Feb 2021 15:32:50 +0200
Subject: net/mlx5: E-Switch, move QoS specific fields to existing qos struct

Function QoS related fields are already defined in qos related struct.
min and max rate are left out to mlx5_vport_info struct.

Move them to existing qos struct.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 5d50b127f81a..64db903068c1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -120,8 +120,6 @@ struct mlx5_vport_info {
 	u16                     vlan;
 	u64                     node_guid;
 	int                     link_state;
-	u32                     min_rate;
-	u32                     max_rate;
 	u8                      qos;
 	u8                      spoofchk: 1;
 	u8                      trusted: 1;
-- 
cgit v1.2.3


From 8802b8a44ef869d5985be817654373311212acfb Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Mon, 15 Mar 2021 13:31:40 +0200
Subject: net/mlx5: Use ida_alloc_range() instead of ida_simple_alloc()

ida_simple_alloc() and remove functions are deprecated.
Related change:
commit 3264ceec8f17 ("lib/idr.c: document that ida_simple_{get,remove}() are deprecated")

Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c |  6 +++---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c    |  6 +++---
 drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c    | 10 +++++-----
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index 22bee4990232..d43a05e77f67 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -707,7 +707,7 @@ static void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
 	}
 
 	if (accel_xfrm->attrs.action == MLX5_ACCEL_ESP_ACTION_DECRYPT) {
-		err = ida_simple_get(&fipsec->halloc, 1, 0, GFP_KERNEL);
+		err = ida_alloc_min(&fipsec->halloc, 1, GFP_KERNEL);
 		if (err < 0) {
 			context = ERR_PTR(err);
 			goto exists;
@@ -758,7 +758,7 @@ delete_hash:
 unlock_hash:
 	mutex_unlock(&fipsec->sa_hash_lock);
 	if (accel_xfrm->attrs.action == MLX5_ACCEL_ESP_ACTION_DECRYPT)
-		ida_simple_remove(&fipsec->halloc, sa_ctx->sa_handle);
+		ida_free(&fipsec->halloc, sa_ctx->sa_handle);
 exists:
 	mutex_unlock(&fpga_xfrm->lock);
 	kfree(sa_ctx);
@@ -852,7 +852,7 @@ mlx5_fpga_ipsec_release_sa_ctx(struct mlx5_fpga_ipsec_sa_ctx *sa_ctx)
 
 	if (sa_ctx->fpga_xfrm->accel_xfrm.attrs.action &
 	    MLX5_ACCEL_ESP_ACTION_DECRYPT)
-		ida_simple_remove(&fipsec->halloc, sa_ctx->sa_handle);
+		ida_free(&fipsec->halloc, sa_ctx->sa_handle);
 
 	mutex_lock(&fipsec->sa_hash_lock);
 	WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index dbd910656574..0216bd63a42d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -590,7 +590,7 @@ static void del_sw_fte(struct fs_node *node)
 				     &fte->hash,
 				     rhash_fte);
 	WARN_ON(err);
-	ida_simple_remove(&fg->fte_allocator, fte->index - fg->start_index);
+	ida_free(&fg->fte_allocator, fte->index - fg->start_index);
 	kmem_cache_free(steering->ftes_cache, fte);
 }
 
@@ -640,7 +640,7 @@ static int insert_fte(struct mlx5_flow_group *fg, struct fs_fte *fte)
 	int index;
 	int ret;
 
-	index = ida_simple_get(&fg->fte_allocator, 0, fg->max_ftes, GFP_KERNEL);
+	index = ida_alloc_max(&fg->fte_allocator, fg->max_ftes - 1, GFP_KERNEL);
 	if (index < 0)
 		return index;
 
@@ -656,7 +656,7 @@ static int insert_fte(struct mlx5_flow_group *fg, struct fs_fte *fte)
 	return 0;
 
 err_ida_remove:
-	ida_simple_remove(&fg->fte_allocator, index);
+	ida_free(&fg->fte_allocator, index);
 	return ret;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
index 97324d9d4f2a..3f9869c7e326 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
@@ -88,12 +88,12 @@ void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count)
 int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index)
 {
 	int end = dev->roce.reserved_gids.start +
-		  dev->roce.reserved_gids.count;
+		  dev->roce.reserved_gids.count - 1;
 	int index = 0;
 
-	index = ida_simple_get(&dev->roce.reserved_gids.ida,
-			       dev->roce.reserved_gids.start, end,
-			       GFP_KERNEL);
+	index = ida_alloc_range(&dev->roce.reserved_gids.ida,
+				dev->roce.reserved_gids.start, end,
+				GFP_KERNEL);
 	if (index < 0)
 		return index;
 
@@ -105,7 +105,7 @@ int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index)
 void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index)
 {
 	mlx5_core_dbg(dev, "Freeing reserved GID %u\n", gid_index);
-	ida_simple_remove(&dev->roce.reserved_gids.ida, gid_index);
+	ida_free(&dev->roce.reserved_gids.ida, gid_index);
 }
 
 unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev)
-- 
cgit v1.2.3


From bb5696570b0b670dbbb45ed57dc246020b4e2a06 Mon Sep 17 00:00:00 2001
From: Ariel Levkovich <lariel@nvidia.com>
Date: Thu, 11 Mar 2021 20:37:21 +0200
Subject: net/mlx5e: Reject tc rules which redirect from a VF to itself

Since there are self loopback prevention mechanisms at the
VF level, offloading such rules which redirect from a VF
to itself in the eswitch will break the datapath since the
packets will be dropped once they go back to the vport they
came from.

Therefore, offloading such rules will be rejected and left to
be handled by SW.

Signed-off-by: Ariel Levkovich <lariel@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index dbc06c71c170..a4a4cdecbdea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -3111,6 +3111,13 @@ static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
 	return (fsystem_guid == psystem_guid);
 }
 
+static bool same_vf_reps(struct mlx5e_priv *priv,
+			 struct net_device *out_dev)
+{
+	return mlx5e_eswitch_vf_rep(priv->netdev) &&
+	       priv->netdev == out_dev;
+}
+
 static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace,
 				   const struct flow_action_entry *act,
 				   struct mlx5e_tc_flow_parse_attr *parse_attr,
@@ -3796,6 +3803,12 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 					return -EOPNOTSUPP;
 				}
 
+				if (same_vf_reps(priv, out_dev)) {
+					NL_SET_ERR_MSG_MOD(extack,
+							   "can't forward from a VF to itself");
+					return -EOPNOTSUPP;
+				}
+
 				out_priv = netdev_priv(out_dev);
 				rpriv = out_priv->ppriv;
 				esw_attr->dests[esw_attr->out_count].rep = rpriv->rep;
-- 
cgit v1.2.3


From f6755b80d693c585c87b56d1e3034528609d25c2 Mon Sep 17 00:00:00 2001
From: Vu Pham <vuhuong@nvidia.com>
Date: Fri, 26 Feb 2021 15:51:12 -0800
Subject: net/mlx5e: Dynamic alloc arfs table for netdev when needed

Dynamic allocate arfs table in mlx5e_priv for EN netdev
when needed. Don't allocate it for representor netdev.

Signed-off-by: Vu Pham <vuhuong@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/fs.h   | 28 +------
 drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c | 95 ++++++++++++++++-------
 2 files changed, 69 insertions(+), 54 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index d53fb1e31b05..cbfe31fb2b12 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -200,31 +200,7 @@ static inline int mlx5e_ethtool_get_rxnfc(struct net_device *dev,
 #endif /* CONFIG_MLX5_EN_RXNFC */
 
 #ifdef CONFIG_MLX5_EN_ARFS
-#define ARFS_HASH_SHIFT BITS_PER_BYTE
-#define ARFS_HASH_SIZE BIT(BITS_PER_BYTE)
-
-struct arfs_table {
-	struct mlx5e_flow_table  ft;
-	struct mlx5_flow_handle	 *default_rule;
-	struct hlist_head	 rules_hash[ARFS_HASH_SIZE];
-};
-
-enum  arfs_type {
-	ARFS_IPV4_TCP,
-	ARFS_IPV6_TCP,
-	ARFS_IPV4_UDP,
-	ARFS_IPV6_UDP,
-	ARFS_NUM_TYPES,
-};
-
-struct mlx5e_arfs_tables {
-	struct arfs_table arfs_tables[ARFS_NUM_TYPES];
-	/* Protect aRFS rules list */
-	spinlock_t                     arfs_lock;
-	struct list_head               rules;
-	int                            last_filter_id;
-	struct workqueue_struct        *wq;
-};
+struct mlx5e_arfs_tables;
 
 int mlx5e_arfs_create_tables(struct mlx5e_priv *priv);
 void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv);
@@ -260,7 +236,7 @@ struct mlx5e_flow_steering {
 	struct mlx5e_ttc_table          ttc;
 	struct mlx5e_ttc_table          inner_ttc;
 #ifdef CONFIG_MLX5_EN_ARFS
-	struct mlx5e_arfs_tables        arfs;
+	struct mlx5e_arfs_tables       *arfs;
 #endif
 #ifdef CONFIG_MLX5_EN_TLS
 	struct mlx5e_accel_fs_tcp      *accel_tcp;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index 39475f6565c7..d5b1eb74d5e5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -36,6 +36,32 @@
 #include <linux/ipv6.h>
 #include "en.h"
 
+#define ARFS_HASH_SHIFT BITS_PER_BYTE
+#define ARFS_HASH_SIZE BIT(BITS_PER_BYTE)
+
+struct arfs_table {
+	struct mlx5e_flow_table  ft;
+	struct mlx5_flow_handle	 *default_rule;
+	struct hlist_head	 rules_hash[ARFS_HASH_SIZE];
+};
+
+enum arfs_type {
+	ARFS_IPV4_TCP,
+	ARFS_IPV6_TCP,
+	ARFS_IPV4_UDP,
+	ARFS_IPV6_UDP,
+	ARFS_NUM_TYPES,
+};
+
+struct mlx5e_arfs_tables {
+	struct arfs_table arfs_tables[ARFS_NUM_TYPES];
+	/* Protect aRFS rules list */
+	spinlock_t                     arfs_lock;
+	struct list_head               rules;
+	int                            last_filter_id;
+	struct workqueue_struct        *wq;
+};
+
 struct arfs_tuple {
 	__be16 etype;
 	u8     ip_proto;
@@ -121,7 +147,7 @@ int mlx5e_arfs_enable(struct mlx5e_priv *priv)
 
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 	for (i = 0; i < ARFS_NUM_TYPES; i++) {
-		dest.ft = priv->fs.arfs.arfs_tables[i].ft.t;
+		dest.ft = priv->fs.arfs->arfs_tables[i].ft.t;
 		/* Modify ttc rules destination to point on the aRFS FTs */
 		err = mlx5e_ttc_fwd_dest(priv, arfs_get_tt(i), &dest);
 		if (err) {
@@ -141,25 +167,31 @@ static void arfs_destroy_table(struct arfs_table *arfs_t)
 	mlx5e_destroy_flow_table(&arfs_t->ft);
 }
 
-void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv)
+static void _mlx5e_cleanup_tables(struct mlx5e_priv *priv)
 {
 	int i;
 
-	if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
-		return;
-
 	arfs_del_rules(priv);
-	destroy_workqueue(priv->fs.arfs.wq);
+	destroy_workqueue(priv->fs.arfs->wq);
 	for (i = 0; i < ARFS_NUM_TYPES; i++) {
-		if (!IS_ERR_OR_NULL(priv->fs.arfs.arfs_tables[i].ft.t))
-			arfs_destroy_table(&priv->fs.arfs.arfs_tables[i]);
+		if (!IS_ERR_OR_NULL(priv->fs.arfs->arfs_tables[i].ft.t))
+			arfs_destroy_table(&priv->fs.arfs->arfs_tables[i]);
 	}
 }
 
+void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv)
+{
+	if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
+		return;
+
+	_mlx5e_cleanup_tables(priv);
+	kvfree(priv->fs.arfs);
+}
+
 static int arfs_add_default_rule(struct mlx5e_priv *priv,
 				 enum arfs_type type)
 {
-	struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type];
+	struct arfs_table *arfs_t = &priv->fs.arfs->arfs_tables[type];
 	struct mlx5e_tir *tir = priv->indir_tir;
 	struct mlx5_flow_destination dest = {};
 	MLX5_DECLARE_FLOW_ACT(flow_act);
@@ -290,7 +322,7 @@ out:
 static int arfs_create_table(struct mlx5e_priv *priv,
 			     enum arfs_type type)
 {
-	struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
+	struct mlx5e_arfs_tables *arfs = priv->fs.arfs;
 	struct mlx5e_flow_table *ft = &arfs->arfs_tables[type].ft;
 	struct mlx5_flow_table_attr ft_attr = {};
 	int err;
@@ -330,20 +362,27 @@ int mlx5e_arfs_create_tables(struct mlx5e_priv *priv)
 	if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
 		return 0;
 
-	spin_lock_init(&priv->fs.arfs.arfs_lock);
-	INIT_LIST_HEAD(&priv->fs.arfs.rules);
-	priv->fs.arfs.wq = create_singlethread_workqueue("mlx5e_arfs");
-	if (!priv->fs.arfs.wq)
+	priv->fs.arfs = kvzalloc(sizeof(*priv->fs.arfs), GFP_KERNEL);
+	if (!priv->fs.arfs)
 		return -ENOMEM;
 
+	spin_lock_init(&priv->fs.arfs->arfs_lock);
+	INIT_LIST_HEAD(&priv->fs.arfs->rules);
+	priv->fs.arfs->wq = create_singlethread_workqueue("mlx5e_arfs");
+	if (!priv->fs.arfs->wq)
+		goto err;
+
 	for (i = 0; i < ARFS_NUM_TYPES; i++) {
 		err = arfs_create_table(priv, i);
 		if (err)
-			goto err;
+			goto err_des;
 	}
 	return 0;
+
+err_des:
+	_mlx5e_cleanup_tables(priv);
 err:
-	mlx5e_arfs_destroy_tables(priv);
+	kvfree(priv->fs.arfs);
 	return err;
 }
 
@@ -358,8 +397,8 @@ static void arfs_may_expire_flow(struct mlx5e_priv *priv)
 	int j;
 
 	HLIST_HEAD(del_list);
-	spin_lock_bh(&priv->fs.arfs.arfs_lock);
-	mlx5e_for_each_arfs_rule(arfs_rule, htmp, priv->fs.arfs.arfs_tables, i, j) {
+	spin_lock_bh(&priv->fs.arfs->arfs_lock);
+	mlx5e_for_each_arfs_rule(arfs_rule, htmp, priv->fs.arfs->arfs_tables, i, j) {
 		if (!work_pending(&arfs_rule->arfs_work) &&
 		    rps_may_expire_flow(priv->netdev,
 					arfs_rule->rxq, arfs_rule->flow_id,
@@ -370,7 +409,7 @@ static void arfs_may_expire_flow(struct mlx5e_priv *priv)
 				break;
 		}
 	}
-	spin_unlock_bh(&priv->fs.arfs.arfs_lock);
+	spin_unlock_bh(&priv->fs.arfs->arfs_lock);
 	hlist_for_each_entry_safe(arfs_rule, htmp, &del_list, hlist) {
 		if (arfs_rule->rule)
 			mlx5_del_flow_rules(arfs_rule->rule);
@@ -387,12 +426,12 @@ static void arfs_del_rules(struct mlx5e_priv *priv)
 	int j;
 
 	HLIST_HEAD(del_list);
-	spin_lock_bh(&priv->fs.arfs.arfs_lock);
-	mlx5e_for_each_arfs_rule(rule, htmp, priv->fs.arfs.arfs_tables, i, j) {
+	spin_lock_bh(&priv->fs.arfs->arfs_lock);
+	mlx5e_for_each_arfs_rule(rule, htmp, priv->fs.arfs->arfs_tables, i, j) {
 		hlist_del_init(&rule->hlist);
 		hlist_add_head(&rule->hlist, &del_list);
 	}
-	spin_unlock_bh(&priv->fs.arfs.arfs_lock);
+	spin_unlock_bh(&priv->fs.arfs->arfs_lock);
 
 	hlist_for_each_entry_safe(rule, htmp, &del_list, hlist) {
 		cancel_work_sync(&rule->arfs_work);
@@ -436,7 +475,7 @@ static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs,
 static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv,
 					      struct arfs_rule *arfs_rule)
 {
-	struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
+	struct mlx5e_arfs_tables *arfs = priv->fs.arfs;
 	struct arfs_tuple *tuple = &arfs_rule->tuple;
 	struct mlx5_flow_handle *rule = NULL;
 	struct mlx5_flow_destination dest = {};
@@ -554,9 +593,9 @@ static void arfs_handle_work(struct work_struct *work)
 
 	mutex_lock(&priv->state_lock);
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
-		spin_lock_bh(&priv->fs.arfs.arfs_lock);
+		spin_lock_bh(&priv->fs.arfs->arfs_lock);
 		hlist_del(&arfs_rule->hlist);
-		spin_unlock_bh(&priv->fs.arfs.arfs_lock);
+		spin_unlock_bh(&priv->fs.arfs->arfs_lock);
 
 		mutex_unlock(&priv->state_lock);
 		kfree(arfs_rule);
@@ -609,7 +648,7 @@ static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv,
 	tuple->dst_port = fk->ports.dst;
 
 	rule->flow_id = flow_id;
-	rule->filter_id = priv->fs.arfs.last_filter_id++ % RPS_NO_FILTER;
+	rule->filter_id = priv->fs.arfs->last_filter_id++ % RPS_NO_FILTER;
 
 	hlist_add_head(&rule->hlist,
 		       arfs_hash_bucket(arfs_t, tuple->src_port,
@@ -653,7 +692,7 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
 			u16 rxq_index, u32 flow_id)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
-	struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
+	struct mlx5e_arfs_tables *arfs = priv->fs.arfs;
 	struct arfs_table *arfs_t;
 	struct arfs_rule *arfs_rule;
 	struct flow_keys fk;
@@ -687,7 +726,7 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
 			return -ENOMEM;
 		}
 	}
-	queue_work(priv->fs.arfs.wq, &arfs_rule->arfs_work);
+	queue_work(priv->fs.arfs->wq, &arfs_rule->arfs_work);
 	spin_unlock_bh(&arfs->arfs_lock);
 	return arfs_rule->filter_id;
 }
-- 
cgit v1.2.3


From 6783f0a21a3ce60e2ac9e56814afcbb01f98cc2e Mon Sep 17 00:00:00 2001
From: Vu Pham <vuhuong@nvidia.com>
Date: Fri, 26 Feb 2021 21:17:12 -0800
Subject: net/mlx5e: Dynamic alloc vlan table for netdev when needed

Dynamic allocate vlan table in mlx5e_priv for EN netdev
when needed. Don't allocate it for representor netdev.

Signed-off-by: Vu Pham <vuhuong@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/fs.h   |  18 +--
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c   | 132 +++++++++++++---------
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |   3 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c   |   4 +-
 4 files changed, 90 insertions(+), 67 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index cbfe31fb2b12..373bd89484cb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -49,18 +49,10 @@ struct mlx5e_promisc_table {
 	struct mlx5_flow_handle	*rule;
 };
 
-struct mlx5e_vlan_table {
-	struct mlx5e_flow_table		ft;
-	DECLARE_BITMAP(active_cvlans, VLAN_N_VID);
-	DECLARE_BITMAP(active_svlans, VLAN_N_VID);
-	struct mlx5_flow_handle	*active_cvlans_rule[VLAN_N_VID];
-	struct mlx5_flow_handle	*active_svlans_rule[VLAN_N_VID];
-	struct mlx5_flow_handle	*untagged_rule;
-	struct mlx5_flow_handle	*any_cvlan_rule;
-	struct mlx5_flow_handle	*any_svlan_rule;
-	struct mlx5_flow_handle	*trap_rule;
-	bool			cvlan_filter_disabled;
-};
+/* Forward declaration and APIs to get private fields of vlan_table */
+struct mlx5e_vlan_table;
+unsigned long *mlx5e_vlan_get_active_svlans(struct mlx5e_vlan_table *vlan);
+struct mlx5_flow_table *mlx5e_vlan_get_flowtable(struct mlx5e_vlan_table *vlan);
 
 struct mlx5e_l2_table {
 	struct mlx5e_flow_table    ft;
@@ -231,7 +223,7 @@ struct mlx5e_flow_steering {
 #endif
 	struct mlx5e_tc_table           tc;
 	struct mlx5e_promisc_table      promisc;
-	struct mlx5e_vlan_table         vlan;
+	struct mlx5e_vlan_table         *vlan;
 	struct mlx5e_l2_table           l2;
 	struct mlx5e_ttc_table          ttc;
 	struct mlx5e_ttc_table          inner_ttc;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 98f0b857947e..0d571a0c76d9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -108,6 +108,29 @@ static void mlx5e_del_l2_from_hash(struct mlx5e_l2_hash_node *hn)
 	kfree(hn);
 }
 
+struct mlx5e_vlan_table {
+	struct mlx5e_flow_table		ft;
+	DECLARE_BITMAP(active_cvlans, VLAN_N_VID);
+	DECLARE_BITMAP(active_svlans, VLAN_N_VID);
+	struct mlx5_flow_handle	*active_cvlans_rule[VLAN_N_VID];
+	struct mlx5_flow_handle	*active_svlans_rule[VLAN_N_VID];
+	struct mlx5_flow_handle	*untagged_rule;
+	struct mlx5_flow_handle	*any_cvlan_rule;
+	struct mlx5_flow_handle	*any_svlan_rule;
+	struct mlx5_flow_handle	*trap_rule;
+	bool			cvlan_filter_disabled;
+};
+
+unsigned long *mlx5e_vlan_get_active_svlans(struct mlx5e_vlan_table *vlan)
+{
+	return vlan->active_svlans;
+}
+
+struct mlx5_flow_table *mlx5e_vlan_get_flowtable(struct mlx5e_vlan_table *vlan)
+{
+	return vlan->ft.t;
+}
+
 static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv)
 {
 	struct net_device *ndev = priv->netdev;
@@ -119,7 +142,7 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv)
 	int i;
 
 	list_size = 0;
-	for_each_set_bit(vlan, priv->fs.vlan.active_cvlans, VLAN_N_VID)
+	for_each_set_bit(vlan, priv->fs.vlan->active_cvlans, VLAN_N_VID)
 		list_size++;
 
 	max_list_size = 1 << MLX5_CAP_GEN(priv->mdev, log_max_vlan_list);
@@ -136,7 +159,7 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv)
 		return -ENOMEM;
 
 	i = 0;
-	for_each_set_bit(vlan, priv->fs.vlan.active_cvlans, VLAN_N_VID) {
+	for_each_set_bit(vlan, priv->fs.vlan->active_cvlans, VLAN_N_VID) {
 		if (i >= list_size)
 			break;
 		vlans[i++] = vlan;
@@ -163,7 +186,7 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
 				 enum mlx5e_vlan_rule_type rule_type,
 				 u16 vid, struct mlx5_flow_spec *spec)
 {
-	struct mlx5_flow_table *ft = priv->fs.vlan.ft.t;
+	struct mlx5_flow_table *ft = priv->fs.vlan->ft.t;
 	struct mlx5_flow_destination dest = {};
 	struct mlx5_flow_handle **rule_p;
 	MLX5_DECLARE_FLOW_ACT(flow_act);
@@ -180,24 +203,24 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
 		 * disabled in match value means both S & C tags
 		 * don't exist (untagged of both)
 		 */
-		rule_p = &priv->fs.vlan.untagged_rule;
+		rule_p = &priv->fs.vlan->untagged_rule;
 		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 				 outer_headers.cvlan_tag);
 		break;
 	case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID:
-		rule_p = &priv->fs.vlan.any_cvlan_rule;
+		rule_p = &priv->fs.vlan->any_cvlan_rule;
 		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 				 outer_headers.cvlan_tag);
 		MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1);
 		break;
 	case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID:
-		rule_p = &priv->fs.vlan.any_svlan_rule;
+		rule_p = &priv->fs.vlan->any_svlan_rule;
 		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 				 outer_headers.svlan_tag);
 		MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1);
 		break;
 	case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID:
-		rule_p = &priv->fs.vlan.active_svlans_rule[vid];
+		rule_p = &priv->fs.vlan->active_svlans_rule[vid];
 		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 				 outer_headers.svlan_tag);
 		MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1);
@@ -207,7 +230,7 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
 			 vid);
 		break;
 	default: /* MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID */
-		rule_p = &priv->fs.vlan.active_cvlans_rule[vid];
+		rule_p = &priv->fs.vlan->active_cvlans_rule[vid];
 		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 				 outer_headers.cvlan_tag);
 		MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1);
@@ -257,33 +280,33 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv,
 {
 	switch (rule_type) {
 	case MLX5E_VLAN_RULE_TYPE_UNTAGGED:
-		if (priv->fs.vlan.untagged_rule) {
-			mlx5_del_flow_rules(priv->fs.vlan.untagged_rule);
-			priv->fs.vlan.untagged_rule = NULL;
+		if (priv->fs.vlan->untagged_rule) {
+			mlx5_del_flow_rules(priv->fs.vlan->untagged_rule);
+			priv->fs.vlan->untagged_rule = NULL;
 		}
 		break;
 	case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID:
-		if (priv->fs.vlan.any_cvlan_rule) {
-			mlx5_del_flow_rules(priv->fs.vlan.any_cvlan_rule);
-			priv->fs.vlan.any_cvlan_rule = NULL;
+		if (priv->fs.vlan->any_cvlan_rule) {
+			mlx5_del_flow_rules(priv->fs.vlan->any_cvlan_rule);
+			priv->fs.vlan->any_cvlan_rule = NULL;
 		}
 		break;
 	case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID:
-		if (priv->fs.vlan.any_svlan_rule) {
-			mlx5_del_flow_rules(priv->fs.vlan.any_svlan_rule);
-			priv->fs.vlan.any_svlan_rule = NULL;
+		if (priv->fs.vlan->any_svlan_rule) {
+			mlx5_del_flow_rules(priv->fs.vlan->any_svlan_rule);
+			priv->fs.vlan->any_svlan_rule = NULL;
 		}
 		break;
 	case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID:
-		if (priv->fs.vlan.active_svlans_rule[vid]) {
-			mlx5_del_flow_rules(priv->fs.vlan.active_svlans_rule[vid]);
-			priv->fs.vlan.active_svlans_rule[vid] = NULL;
+		if (priv->fs.vlan->active_svlans_rule[vid]) {
+			mlx5_del_flow_rules(priv->fs.vlan->active_svlans_rule[vid]);
+			priv->fs.vlan->active_svlans_rule[vid] = NULL;
 		}
 		break;
 	case MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID:
-		if (priv->fs.vlan.active_cvlans_rule[vid]) {
-			mlx5_del_flow_rules(priv->fs.vlan.active_cvlans_rule[vid]);
-			priv->fs.vlan.active_cvlans_rule[vid] = NULL;
+		if (priv->fs.vlan->active_cvlans_rule[vid]) {
+			mlx5_del_flow_rules(priv->fs.vlan->active_cvlans_rule[vid]);
+			priv->fs.vlan->active_cvlans_rule[vid] = NULL;
 		}
 		mlx5e_vport_context_update_vlans(priv);
 		break;
@@ -330,27 +353,27 @@ mlx5e_add_trap_rule(struct mlx5_flow_table *ft, int trap_id, int tir_num)
 
 int mlx5e_add_vlan_trap(struct mlx5e_priv *priv, int trap_id, int tir_num)
 {
-	struct mlx5_flow_table *ft = priv->fs.vlan.ft.t;
+	struct mlx5_flow_table *ft = priv->fs.vlan->ft.t;
 	struct mlx5_flow_handle *rule;
 	int err;
 
 	rule = mlx5e_add_trap_rule(ft, trap_id, tir_num);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
-		priv->fs.vlan.trap_rule = NULL;
+		priv->fs.vlan->trap_rule = NULL;
 		netdev_err(priv->netdev, "%s: add VLAN trap rule failed, err %d\n",
 			   __func__, err);
 		return err;
 	}
-	priv->fs.vlan.trap_rule = rule;
+	priv->fs.vlan->trap_rule = rule;
 	return 0;
 }
 
 void mlx5e_remove_vlan_trap(struct mlx5e_priv *priv)
 {
-	if (priv->fs.vlan.trap_rule) {
-		mlx5_del_flow_rules(priv->fs.vlan.trap_rule);
-		priv->fs.vlan.trap_rule = NULL;
+	if (priv->fs.vlan->trap_rule) {
+		mlx5_del_flow_rules(priv->fs.vlan->trap_rule);
+		priv->fs.vlan->trap_rule = NULL;
 	}
 }
 
@@ -382,10 +405,10 @@ void mlx5e_remove_mac_trap(struct mlx5e_priv *priv)
 
 void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv)
 {
-	if (!priv->fs.vlan.cvlan_filter_disabled)
+	if (!priv->fs.vlan->cvlan_filter_disabled)
 		return;
 
-	priv->fs.vlan.cvlan_filter_disabled = false;
+	priv->fs.vlan->cvlan_filter_disabled = false;
 	if (priv->netdev->flags & IFF_PROMISC)
 		return;
 	mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
@@ -393,10 +416,10 @@ void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv)
 
 void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv)
 {
-	if (priv->fs.vlan.cvlan_filter_disabled)
+	if (priv->fs.vlan->cvlan_filter_disabled)
 		return;
 
-	priv->fs.vlan.cvlan_filter_disabled = true;
+	priv->fs.vlan->cvlan_filter_disabled = true;
 	if (priv->netdev->flags & IFF_PROMISC)
 		return;
 	mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0);
@@ -406,11 +429,11 @@ static int mlx5e_vlan_rx_add_cvid(struct mlx5e_priv *priv, u16 vid)
 {
 	int err;
 
-	set_bit(vid, priv->fs.vlan.active_cvlans);
+	set_bit(vid, priv->fs.vlan->active_cvlans);
 
 	err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid);
 	if (err)
-		clear_bit(vid, priv->fs.vlan.active_cvlans);
+		clear_bit(vid, priv->fs.vlan->active_cvlans);
 
 	return err;
 }
@@ -420,11 +443,11 @@ static int mlx5e_vlan_rx_add_svid(struct mlx5e_priv *priv, u16 vid)
 	struct net_device *netdev = priv->netdev;
 	int err;
 
-	set_bit(vid, priv->fs.vlan.active_svlans);
+	set_bit(vid, priv->fs.vlan->active_svlans);
 
 	err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid);
 	if (err) {
-		clear_bit(vid, priv->fs.vlan.active_svlans);
+		clear_bit(vid, priv->fs.vlan->active_svlans);
 		return err;
 	}
 
@@ -456,10 +479,10 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
 		return 0; /* no vlan table for uplink rep */
 
 	if (be16_to_cpu(proto) == ETH_P_8021Q) {
-		clear_bit(vid, priv->fs.vlan.active_cvlans);
+		clear_bit(vid, priv->fs.vlan->active_cvlans);
 		mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid);
 	} else if (be16_to_cpu(proto) == ETH_P_8021AD) {
-		clear_bit(vid, priv->fs.vlan.active_svlans);
+		clear_bit(vid, priv->fs.vlan->active_svlans);
 		mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid);
 		netdev_update_features(dev);
 	}
@@ -473,14 +496,14 @@ static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv)
 
 	mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
 
-	for_each_set_bit(i, priv->fs.vlan.active_cvlans, VLAN_N_VID) {
+	for_each_set_bit(i, priv->fs.vlan->active_cvlans, VLAN_N_VID) {
 		mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i);
 	}
 
-	for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID)
+	for_each_set_bit(i, priv->fs.vlan->active_svlans, VLAN_N_VID)
 		mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i);
 
-	if (priv->fs.vlan.cvlan_filter_disabled)
+	if (priv->fs.vlan->cvlan_filter_disabled)
 		mlx5e_add_any_vid_rules(priv);
 }
 
@@ -490,11 +513,11 @@ static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv)
 
 	mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
 
-	for_each_set_bit(i, priv->fs.vlan.active_cvlans, VLAN_N_VID) {
+	for_each_set_bit(i, priv->fs.vlan->active_cvlans, VLAN_N_VID) {
 		mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i);
 	}
 
-	for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID)
+	for_each_set_bit(i, priv->fs.vlan->active_svlans, VLAN_N_VID)
 		mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i);
 
 	WARN_ON_ONCE(!(test_bit(MLX5E_STATE_DESTROYING, &priv->state)));
@@ -504,7 +527,7 @@ static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv)
 	/* must be called after DESTROY bit is set and
 	 * set_rx_mode is called and flushed
 	 */
-	if (priv->fs.vlan.cvlan_filter_disabled)
+	if (priv->fs.vlan->cvlan_filter_disabled)
 		mlx5e_del_any_vid_rules(priv);
 }
 
@@ -1692,10 +1715,15 @@ static int mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft)
 
 static int mlx5e_create_vlan_table(struct mlx5e_priv *priv)
 {
-	struct mlx5e_flow_table *ft = &priv->fs.vlan.ft;
 	struct mlx5_flow_table_attr ft_attr = {};
+	struct mlx5e_flow_table *ft;
 	int err;
 
+	priv->fs.vlan = kvzalloc(sizeof(*priv->fs.vlan), GFP_KERNEL);
+	if (!priv->fs.vlan)
+		return -ENOMEM;
+
+	ft = &priv->fs.vlan->ft;
 	ft->num_groups = 0;
 
 	ft_attr.max_fte = MLX5E_VLAN_TABLE_SIZE;
@@ -1703,12 +1731,11 @@ static int mlx5e_create_vlan_table(struct mlx5e_priv *priv)
 	ft_attr.prio = MLX5E_NIC_PRIO;
 
 	ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
-
 	if (IS_ERR(ft->t)) {
 		err = PTR_ERR(ft->t);
-		ft->t = NULL;
-		return err;
+		goto err_free_t;
 	}
+
 	ft->g = kcalloc(MLX5E_NUM_VLAN_GROUPS, sizeof(*ft->g), GFP_KERNEL);
 	if (!ft->g) {
 		err = -ENOMEM;
@@ -1727,7 +1754,9 @@ err_free_g:
 	kfree(ft->g);
 err_destroy_vlan_table:
 	mlx5_destroy_flow_table(ft->t);
-	ft->t = NULL;
+err_free_t:
+	kvfree(priv->fs.vlan);
+	priv->fs.vlan = NULL;
 
 	return err;
 }
@@ -1735,7 +1764,8 @@ err_destroy_vlan_table:
 static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv)
 {
 	mlx5e_del_vlan_rules(priv);
-	mlx5e_destroy_flow_table(&priv->fs.vlan.ft);
+	mlx5e_destroy_flow_table(&priv->fs.vlan->ft);
+	kvfree(priv->fs.vlan);
 }
 
 int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index db2942b61fd5..773449c1424b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3823,7 +3823,8 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
 
 	mutex_lock(&priv->state_lock);
 	params = &priv->channels.params;
-	if (!bitmap_empty(priv->fs.vlan.active_svlans, VLAN_N_VID)) {
+	if (!priv->fs.vlan ||
+	    !bitmap_empty(mlx5e_vlan_get_active_svlans(priv->fs.vlan), VLAN_N_VID)) {
 		/* HW strips the outer C-tag header, this is a problem
 		 * for S-tag traffic.
 		 */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index a4a4cdecbdea..bb1e0d442b5c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -906,7 +906,7 @@ mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
 			if (IS_ERR(dest[dest_ix].ft))
 				return ERR_CAST(dest[dest_ix].ft);
 		} else {
-			dest[dest_ix].ft = priv->fs.vlan.ft.t;
+			dest[dest_ix].ft = mlx5e_vlan_get_flowtable(priv->fs.vlan);
 		}
 		dest_ix++;
 	}
@@ -4753,7 +4753,7 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
 	attr.ns = MLX5_FLOW_NAMESPACE_KERNEL;
 	attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev);
 	attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS;
-	attr.default_ft = priv->fs.vlan.ft.t;
+	attr.default_ft = mlx5e_vlan_get_flowtable(priv->fs.vlan);
 
 	tc->chains = mlx5_chains_create(dev, &attr);
 	if (IS_ERR(tc->chains)) {
-- 
cgit v1.2.3


From 007bdc12d4b466566784c1a39b9fb3985f875b3d Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Thu, 1 Apr 2021 15:25:56 -0700
Subject: bpf, selftests: test_maps generating unrecognized data section

With a relatively recent clang master branch test_map skips a section,

 libbpf: elf: skipping unrecognized data section(5) .rodata.str1.1

the cause is some pointless strings from bpf_printks in the BPF program
loaded during testing. After just removing the prints to fix above error
Daniel points out the program is a bit pointless and could be simply the
empty program returning SK_PASS.

Here we do just that and return simply SK_PASS. This program is used with
test_maps selftests to test insert/remove of a program into the sockmap
and sockhash maps. Its not testing actual functionality of the TCP
sockmap programs, these are tested from test_sockmap. So we shouldn't
lose in test coverage and fix above warnings. This original test was
added before test_sockmap existed and has been copied around ever since,
clean it up now.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/161731595664.74613.1603087410166945302.stgit@john-XPS-13-9370
---
 tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
index fdb4bf4408fa..eeaf6e75c9a2 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
@@ -8,18 +8,6 @@ int _version SEC("version") = 1;
 SEC("sk_msg1")
 int bpf_prog1(struct sk_msg_md *msg)
 {
-	void *data_end = (void *)(long) msg->data_end;
-	void *data = (void *)(long) msg->data;
-
-	char *d;
-
-	if (data + 8 > data_end)
-		return SK_DROP;
-
-	bpf_printk("data length %i\n", (__u64)msg->data_end - (__u64)msg->data);
-	d = (char *)data;
-	bpf_printk("hello sendmsg hook %i %i\n", d[0], d[1]);
-
 	return SK_PASS;
 }
 
-- 
cgit v1.2.3


From 2ec9898e9c70b93a5741af3f6af6dbceca569a47 Mon Sep 17 00:00:00 2001
From: He Fengqing <hefengqing@huawei.com>
Date: Wed, 31 Mar 2021 07:51:35 +0000
Subject: bpf: Remove unused parameter from ___bpf_prog_run

'stack' parameter is not used in ___bpf_prog_run() after f696b8f471ec
("bpf: split bpf core interpreter"), the base address have been set to
FP reg. So consequently remove it.

Signed-off-by: He Fengqing <hefengqing@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20210331075135.3850782-1-hefengqing@huawei.com
---
 kernel/bpf/core.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f5423251c118..5e31ee9f7512 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1363,11 +1363,10 @@ u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
  *	__bpf_prog_run - run eBPF program on a given context
  *	@regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
  *	@insn: is the array of eBPF instructions
- *	@stack: is the eBPF storage stack
  *
  * Decode and execute eBPF instructions.
  */
-static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
+static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
 {
 #define BPF_INSN_2_LBL(x, y)    [BPF_##x | BPF_##y] = &&x##_##y
 #define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z
@@ -1701,7 +1700,7 @@ static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn
 \
 	FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
 	ARG1 = (u64) (unsigned long) ctx; \
-	return ___bpf_prog_run(regs, insn, stack); \
+	return ___bpf_prog_run(regs, insn); \
 }
 
 #define PROG_NAME_ARGS(stack_size) __bpf_prog_run_args##stack_size
@@ -1718,7 +1717,7 @@ static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \
 	BPF_R3 = r3; \
 	BPF_R4 = r4; \
 	BPF_R5 = r5; \
-	return ___bpf_prog_run(regs, insn, stack); \
+	return ___bpf_prog_run(regs, insn); \
 }
 
 #define EVAL1(FN, X) FN(X)
-- 
cgit v1.2.3


From 2daae89666ad253281bb3d6a027c00a702c02eff Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Thu, 1 Apr 2021 14:46:37 +0800
Subject: bpf, cgroup: Delete repeated struct bpf_prog declaration

struct bpf_prog is declared twice. There is one declaration which is
independent on the macro at 18th line. So the below one is not needed
though. Remove the duplicate.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20210401064637.993327-1-wanjiabing@vivo.com
---
 include/linux/bpf-cgroup.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 6a29fe11485d..8b77d08d4b47 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -458,7 +458,6 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
 			  union bpf_attr __user *uattr);
 #else
 
-struct bpf_prog;
 struct cgroup_bpf {};
 static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
 static inline void cgroup_bpf_offline(struct cgroup *cgrp) {}
-- 
cgit v1.2.3


From 6ac4c6f887f5a8efb6a6952798c09a2562022966 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Thu, 1 Apr 2021 15:20:37 +0800
Subject: bpf: Remove repeated struct btf_type declaration

struct btf_type is declared twice. One is declared at 35th line. The below
one is not needed, hence remove the duplicate.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20210401072037.995849-1-wanjiabing@vivo.com
---
 include/linux/bpf.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9fdd839b418c..ff8cd68c01b3 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -928,7 +928,6 @@ struct bpf_link_primer {
 };
 
 struct bpf_struct_ops_value;
-struct btf_type;
 struct btf_member;
 
 #define BPF_STRUCT_OPS_MAX_NR_MEMBERS 64
-- 
cgit v1.2.3


From f07669df4c8df0b7134ae94be20a8b61bd157168 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 2 Apr 2021 09:26:34 +0800
Subject: libbpf: Remove redundant semi-colon

Remove redundant semi-colon in finalize_btf_ext().

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210402012634.1965453-1-yangyingliang@huawei.com
---
 tools/lib/bpf/linker.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 46b16cbdcda3..4e08bc07e635 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -1895,7 +1895,7 @@ static int finalize_btf_ext(struct bpf_linker *linker)
 	hdr->func_info_len = funcs_sz;
 	hdr->line_info_off = funcs_sz;
 	hdr->line_info_len = lines_sz;
-	hdr->core_relo_off = funcs_sz + lines_sz;;
+	hdr->core_relo_off = funcs_sz + lines_sz;
 	hdr->core_relo_len = core_relos_sz;
 
 	if (funcs_sz) {
-- 
cgit v1.2.3


From f73ea1eb4cce66376cc1dd94b4a083ffb9eeb123 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Fri, 2 Apr 2021 17:29:21 -0700
Subject: bpf: selftests: Specify CONFIG_DYNAMIC_FTRACE in the testing config

The tracing test and the recent kfunc call test require
CONFIG_DYNAMIC_FTRACE.  This patch adds it to the config file.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210403002921.3419721-1-kafai@fb.com
---
 tools/testing/selftests/bpf/config | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 37e1f303fc11..5192305159ec 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -44,3 +44,5 @@ CONFIG_SECURITYFS=y
 CONFIG_IMA_WRITE_POLICY=y
 CONFIG_IMA_READ_POLICY=y
 CONFIG_BLK_DEV_LOOP=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_DYNAMIC_FTRACE=y
-- 
cgit v1.2.3


From daf47a7c109a166c812b2d701d66aa8905c54ec1 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 30 Mar 2021 08:42:32 +0200
Subject: netfilter: ipvs: do not printk on netns creation

This causes dmesg spew during normal operation, so remove this.

Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Julian Anastasov <ja@ssi.bg>
Reviewed-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipvs/ip_vs_ftp.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index cf925906f59b..ef1f45e43b63 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -591,8 +591,6 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
 		ret = register_ip_vs_app_inc(ipvs, app, app->protocol, ports[i]);
 		if (ret)
 			goto err_unreg;
-		pr_info("%s: loaded support on port[%d] = %u\n",
-			app->name, i, ports[i]);
 	}
 	return 0;
 
-- 
cgit v1.2.3


From dadf33c9f6b5f694e842d224a4d071f59ac665ee Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 2 Apr 2021 14:45:44 +0300
Subject: netfilter: nftables: fix a warning message in
 nf_tables_commit_audit_collect()

The first argument of a WARN_ONCE() is a condition.  This WARN_ONCE()
will only print the table name, and is potentially problematic if the
table name has a %s in it.

Fixes: c520292f29b8 ("audit: log nftables configuration change events once per table")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 005f1c620fc0..edb51c9ebab0 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -7978,7 +7978,7 @@ static void nf_tables_commit_audit_collect(struct list_head *adl,
 		if (adp->table == table)
 			goto found;
 	}
-	WARN_ONCE("table=%s not expected in commit list", table->name);
+	WARN_ONCE(1, "table=%s not expected in commit list", table->name);
 	return;
 found:
 	adp->entries++;
-- 
cgit v1.2.3


From 8c56049fec6c3f6dd2388d9e90f4d669fb33ae0d Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 31 Mar 2021 23:10:15 +0200
Subject: netfilter: nftables: remove documentation on static functions

Since 4f16d25c68ec ("netfilter: nftables: add nft_parse_register_load()
and use it") and 345023b0db31 ("netfilter: nftables: add
nft_parse_register_store() and use it"), the following functions are not
exported symbols anymore:

- nft_parse_register()
- nft_validate_register_load()
- nft_validate_register_store()

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 32 --------------------------------
 1 file changed, 32 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index edb51c9ebab0..a24de59e6c69 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -8615,15 +8615,6 @@ int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest)
 }
 EXPORT_SYMBOL_GPL(nft_parse_u32_check);
 
-/**
- *	nft_parse_register - parse a register value from a netlink attribute
- *
- *	@attr: netlink attribute
- *
- *	Parse and translate a register value from a netlink attribute.
- *	Registers used to be 128 bit wide, these register numbers will be
- *	mapped to the corresponding 32 bit register numbers.
- */
 static unsigned int nft_parse_register(const struct nlattr *attr)
 {
 	unsigned int reg;
@@ -8659,15 +8650,6 @@ int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg)
 }
 EXPORT_SYMBOL_GPL(nft_dump_register);
 
-/**
- *	nft_validate_register_load - validate a load from a register
- *
- *	@reg: the register number
- *	@len: the length of the data
- *
- * 	Validate that the input register is one of the general purpose
- * 	registers and that the length of the load is within the bounds.
- */
 static int nft_validate_register_load(enum nft_registers reg, unsigned int len)
 {
 	if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE)
@@ -8695,20 +8677,6 @@ int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len)
 }
 EXPORT_SYMBOL_GPL(nft_parse_register_load);
 
-/**
- *	nft_validate_register_store - validate an expressions' register store
- *
- *	@ctx: context of the expression performing the load
- * 	@reg: the destination register number
- * 	@data: the data to load
- * 	@type: the data type
- * 	@len: the length of the data
- *
- * 	Validate that a data load uses the appropriate data type for
- * 	the destination register and the length is within the bounds.
- * 	A value of NULL for the data means that its runtime gathered
- * 	data.
- */
 static int nft_validate_register_store(const struct nft_ctx *ctx,
 				       enum nft_registers reg,
 				       const struct nft_data *data,
-- 
cgit v1.2.3


From 8deec94c6040bb4a767f6e9456a0a44c7f2e713e Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Thu, 1 Apr 2021 10:11:12 +0800
Subject: net: stmmac: set IRQ affinity hint for multi MSI vectors

Certain platform likes Intel mGBE has independent hardware IRQ resources
for TX and RX DMA operation. In preparation to support XDP TX, we add IRQ
affinity hint to group both RX and TX queue of the same queue ID to the
same CPU.

Changes in v2:
 - IRQ affinity hint need to set to null before IRQ is released.
   Thanks to issue reported by Song, Yoong Siang.

Reported-by: Song, Yoong Siang <yoong.siang.song@intel.com>
Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index d34388b1ffcc..9d63e8c365ae 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3005,15 +3005,19 @@ static void stmmac_free_irq(struct net_device *dev,
 		fallthrough;
 	case REQ_IRQ_ERR_TX:
 		for (j = irq_idx - 1; j >= 0; j--) {
-			if (priv->tx_irq[j] > 0)
+			if (priv->tx_irq[j] > 0) {
+				irq_set_affinity_hint(priv->tx_irq[j], NULL);
 				free_irq(priv->tx_irq[j], &priv->tx_queue[j]);
+			}
 		}
 		irq_idx = priv->plat->rx_queues_to_use;
 		fallthrough;
 	case REQ_IRQ_ERR_RX:
 		for (j = irq_idx - 1; j >= 0; j--) {
-			if (priv->rx_irq[j] > 0)
+			if (priv->rx_irq[j] > 0) {
+				irq_set_affinity_hint(priv->rx_irq[j], NULL);
 				free_irq(priv->rx_irq[j], &priv->rx_queue[j]);
+			}
 		}
 
 		if (priv->sfty_ue_irq > 0 && priv->sfty_ue_irq != dev->irq)
@@ -3045,6 +3049,7 @@ static int stmmac_request_irq_multi_msi(struct net_device *dev)
 {
 	enum request_irq_err irq_err = REQ_IRQ_ERR_NO;
 	struct stmmac_priv *priv = netdev_priv(dev);
+	cpumask_t cpu_mask;
 	int irq_idx = 0;
 	char *int_name;
 	int ret;
@@ -3153,6 +3158,9 @@ static int stmmac_request_irq_multi_msi(struct net_device *dev)
 			irq_idx = i;
 			goto irq_error;
 		}
+		cpumask_clear(&cpu_mask);
+		cpumask_set_cpu(i % num_online_cpus(), &cpu_mask);
+		irq_set_affinity_hint(priv->rx_irq[i], &cpu_mask);
 	}
 
 	/* Request Tx MSI irq */
@@ -3173,6 +3181,9 @@ static int stmmac_request_irq_multi_msi(struct net_device *dev)
 			irq_idx = i;
 			goto irq_error;
 		}
+		cpumask_clear(&cpu_mask);
+		cpumask_set_cpu(i % num_online_cpus(), &cpu_mask);
+		irq_set_affinity_hint(priv->tx_irq[i], &cpu_mask);
 	}
 
 	return 0;
-- 
cgit v1.2.3


From d08d32d101938a26e692c7aee0d452fbeec9d3f3 Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Thu, 1 Apr 2021 10:11:13 +0800
Subject: net: stmmac: make SPH enable/disable to be configurable

SPH functionality splits header and payload according to split mode and
offsef fields (SPLM and SPLOFST). It is beneficials for Linux network
stack RX processing however it adds a lot of complexity in XDP
processing.

So, this patch makes the split-header (SPH) capability of the controller
is stored in "priv->sph_cap" and the enabling/disabling of SPH is decided
by "priv->sph".

This is to prepare initial XDP enabling for stmmac to disable the use of
SPH whenever XDP is enabled.

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac.h      |  1 +
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 12 +++++++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 9966f6f10905..e293423f98c3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -160,6 +160,7 @@ struct stmmac_priv {
 	bool tx_path_in_lpi_mode;
 	bool tso;
 	int sph;
+	int sph_cap;
 	u32 sarc_type;
 
 	unsigned int dma_buf_sz;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 9d63e8c365ae..18e34a1e2367 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2858,6 +2858,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 	struct stmmac_priv *priv = netdev_priv(dev);
 	u32 rx_cnt = priv->plat->rx_queues_to_use;
 	u32 tx_cnt = priv->plat->tx_queues_to_use;
+	bool sph_en;
 	u32 chan;
 	int ret;
 
@@ -2952,10 +2953,10 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 	}
 
 	/* Enable Split Header */
-	if (priv->sph && priv->hw->rx_csum) {
-		for (chan = 0; chan < rx_cnt; chan++)
-			stmmac_enable_sph(priv, priv->ioaddr, 1, chan);
-	}
+	sph_en = (priv->hw->rx_csum > 0) && priv->sph;
+	for (chan = 0; chan < rx_cnt; chan++)
+		stmmac_enable_sph(priv, priv->ioaddr, sph_en, chan);
+
 
 	/* VLAN Tag Insertion */
 	if (priv->dma_cap.vlins)
@@ -5708,7 +5709,8 @@ int stmmac_dvr_probe(struct device *device,
 
 	if (priv->dma_cap.sphen) {
 		ndev->hw_features |= NETIF_F_GRO;
-		priv->sph = true;
+		priv->sph_cap = true;
+		priv->sph = priv->sph_cap;
 		dev_info(priv->device, "SPH feature enabled\n");
 	}
 
-- 
cgit v1.2.3


From d96febedfde250735ac6ed50966ec5453feda355 Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Thu, 1 Apr 2021 10:11:14 +0800
Subject: net: stmmac: arrange Tx tail pointer update to
 stmmac_flush_tx_descriptors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch organizes TX tail pointer update into a new function called
stmmac_flush_tx_descriptors() so that we can reuse it in stmmac_xmit(),
stmmac_tso_xmit() and up-coming XDP implementation.

Changes to v2:
 - Fix for warning: unused variable ‘desc_size’
   https://patchwork.hopto.org/static/nipa/457321/12170149/build_32bit/stderr

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 56 +++++++++++------------
 1 file changed, 26 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 18e34a1e2367..cb1b2a180429 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3518,6 +3518,28 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, dma_addr_t des,
 	}
 }
 
+static void stmmac_flush_tx_descriptors(struct stmmac_priv *priv, int queue)
+{
+	struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+	int desc_size;
+
+	if (likely(priv->extend_desc))
+		desc_size = sizeof(struct dma_extended_desc);
+	else if (tx_q->tbs & STMMAC_TBS_AVAIL)
+		desc_size = sizeof(struct dma_edesc);
+	else
+		desc_size = sizeof(struct dma_desc);
+
+	/* The own bit must be the latest setting done when prepare the
+	 * descriptor and then barrier is needed to make sure that
+	 * all is coherent before granting the DMA engine.
+	 */
+	wmb();
+
+	tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * desc_size);
+	stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
+}
+
 /**
  *  stmmac_tso_xmit - Tx entry point of the driver for oversized frames (TSO)
  *  @skb : the socket buffer
@@ -3549,10 +3571,10 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dma_desc *desc, *first, *mss_desc = NULL;
 	struct stmmac_priv *priv = netdev_priv(dev);
-	int desc_size, tmp_pay_len = 0, first_tx;
 	int nfrags = skb_shinfo(skb)->nr_frags;
 	u32 queue = skb_get_queue_mapping(skb);
 	unsigned int first_entry, tx_packets;
+	int tmp_pay_len = 0, first_tx;
 	struct stmmac_tx_queue *tx_q;
 	bool has_vlan, set_ic;
 	u8 proto_hdr_len, hdr;
@@ -3750,12 +3772,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 		stmmac_set_tx_owner(priv, mss_desc);
 	}
 
-	/* The own bit must be the latest setting done when prepare the
-	 * descriptor and then barrier is needed to make sure that
-	 * all is coherent before granting the DMA engine.
-	 */
-	wmb();
-
 	if (netif_msg_pktdata(priv)) {
 		pr_info("%s: curr=%d dirty=%d f=%d, e=%d, f_p=%p, nfrags %d\n",
 			__func__, tx_q->cur_tx, tx_q->dirty_tx, first_entry,
@@ -3766,13 +3782,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
 
-	if (tx_q->tbs & STMMAC_TBS_AVAIL)
-		desc_size = sizeof(struct dma_edesc);
-	else
-		desc_size = sizeof(struct dma_desc);
-
-	tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * desc_size);
-	stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
+	stmmac_flush_tx_descriptors(priv, queue);
 	stmmac_tx_timer_arm(priv, queue);
 
 	return NETDEV_TX_OK;
@@ -3802,10 +3812,10 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	int nfrags = skb_shinfo(skb)->nr_frags;
 	int gso = skb_shinfo(skb)->gso_type;
 	struct dma_edesc *tbs_desc = NULL;
-	int entry, desc_size, first_tx;
 	struct dma_desc *desc, *first;
 	struct stmmac_tx_queue *tx_q;
 	bool has_vlan, set_ic;
+	int entry, first_tx;
 	dma_addr_t des;
 
 	tx_q = &priv->tx_queue[queue];
@@ -4007,25 +4017,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	stmmac_set_tx_owner(priv, first);
 
-	/* The own bit must be the latest setting done when prepare the
-	 * descriptor and then barrier is needed to make sure that
-	 * all is coherent before granting the DMA engine.
-	 */
-	wmb();
-
 	netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
 
 	stmmac_enable_dma_transmission(priv, priv->ioaddr);
 
-	if (likely(priv->extend_desc))
-		desc_size = sizeof(struct dma_extended_desc);
-	else if (tx_q->tbs & STMMAC_TBS_AVAIL)
-		desc_size = sizeof(struct dma_edesc);
-	else
-		desc_size = sizeof(struct dma_desc);
-
-	tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * desc_size);
-	stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
+	stmmac_flush_tx_descriptors(priv, queue);
 	stmmac_tx_timer_arm(priv, queue);
 
 	return NETDEV_TX_OK;
-- 
cgit v1.2.3


From 5fabb01207a2d3439a6abe1d08640de9c942945f Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Thu, 1 Apr 2021 10:11:15 +0800
Subject: net: stmmac: Add initial XDP support

This patch adds the initial XDP support to stmmac driver. It supports
XDP_PASS, XDP_DROP and XDP_ABORTED actions. Upcoming patches will add
support for XDP_TX and XDP_REDIRECT.

To support XDP headroom, this patch adds page_offset into RX buffer and
change the dma_sync_single_for_device|cpu(). The DMA address used for
RX operation are changed to take into page_offset too. As page_pool
can handle dma_sync_single_for_device() on behalf of driver with
PP_FLAG_DMA_SYNC_DEV flag, we skip doing that in stmmac driver.

Current stmmac driver supports split header support (SPH) in RX but
the flexibility of splitting header and payload at different position
makes it very complex to be supported for XDP processing. In addition,
jumbo frame is not supported in XDP to keep the initial codes simple.

This patch has been tested with the sample app "xdp1" located in
samples/bpf directory for both SKB and Native (XDP) mode. The burst
traffic generated using pktgen_sample03_burst_single_flow.sh in
samples/pktgen directory.

Changes in v3:
 - factor in xdp header and tail adjustment done by XDP program.
   Thanks to Jakub Kicinski for pointing out the gap in v2.

Changes in v2:
 - fix for "warning: variable 'len' set but not used" reported by lkp.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/Makefile      |   1 +
 drivers/net/ethernet/stmicro/stmmac/stmmac.h      |  21 ++-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 150 ++++++++++++++++++----
 drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c  |  40 ++++++
 drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.h  |  12 ++
 5 files changed, 197 insertions(+), 27 deletions(-)
 create mode 100644 drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c
 create mode 100644 drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.h

diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index 366740ab9c5a..f2e478b884b0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -6,6 +6,7 @@ stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o ring_mode.o	\
 	      mmc_core.o stmmac_hwtstamp.o stmmac_ptp.o dwmac4_descs.o	\
 	      dwmac4_dma.o dwmac4_lib.o dwmac4_core.o dwmac5.o hwif.o \
 	      stmmac_tc.o dwxgmac2_core.o dwxgmac2_dma.o dwxgmac2_descs.o \
+	      stmmac_xdp.o \
 	      $(stmmac-y)
 
 stmmac-$(CONFIG_STMMAC_SELFTESTS) += stmmac_selftests.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index e293423f98c3..e72224c8fbac 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -68,8 +68,9 @@ struct stmmac_tx_queue {
 
 struct stmmac_rx_buffer {
 	struct page *page;
-	struct page *sec_page;
 	dma_addr_t addr;
+	__u32 page_offset;
+	struct page *sec_page;
 	dma_addr_t sec_addr;
 };
 
@@ -269,6 +270,9 @@ struct stmmac_priv {
 
 	/* Receive Side Scaling */
 	struct stmmac_rss rss;
+
+	/* XDP BPF Program */
+	struct bpf_prog *xdp_prog;
 };
 
 enum stmmac_state {
@@ -285,6 +289,8 @@ void stmmac_set_ethtool_ops(struct net_device *netdev);
 
 void stmmac_ptp_register(struct stmmac_priv *priv);
 void stmmac_ptp_unregister(struct stmmac_priv *priv);
+int stmmac_open(struct net_device *dev);
+int stmmac_release(struct net_device *dev);
 int stmmac_resume(struct device *dev);
 int stmmac_suspend(struct device *dev);
 int stmmac_dvr_remove(struct device *dev);
@@ -298,6 +304,19 @@ int stmmac_reinit_ringparam(struct net_device *dev, u32 rx_size, u32 tx_size);
 int stmmac_bus_clks_config(struct stmmac_priv *priv, bool enabled);
 void stmmac_fpe_handshake(struct stmmac_priv *priv, bool enable);
 
+static inline bool stmmac_xdp_is_enabled(struct stmmac_priv *priv)
+{
+	return !!priv->xdp_prog;
+}
+
+static inline unsigned int stmmac_rx_offset(struct stmmac_priv *priv)
+{
+	if (stmmac_xdp_is_enabled(priv))
+		return XDP_PACKET_HEADROOM;
+
+	return 0;
+}
+
 #if IS_ENABLED(CONFIG_STMMAC_SELFTESTS)
 void stmmac_selftest_run(struct net_device *dev,
 			 struct ethtool_test *etest, u64 *buf);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index cb1b2a180429..0dad8ab93eb5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -38,9 +38,11 @@
 #include <linux/net_tstamp.h>
 #include <linux/phylink.h>
 #include <linux/udp.h>
+#include <linux/bpf_trace.h>
 #include <net/pkt_cls.h>
 #include "stmmac_ptp.h"
 #include "stmmac.h"
+#include "stmmac_xdp.h"
 #include <linux/reset.h>
 #include <linux/of_mdio.h>
 #include "dwmac1000.h"
@@ -67,6 +69,9 @@ MODULE_PARM_DESC(phyaddr, "Physical device address");
 #define STMMAC_TX_THRESH(x)	((x)->dma_tx_size / 4)
 #define STMMAC_RX_THRESH(x)	((x)->dma_rx_size / 4)
 
+#define STMMAC_XDP_PASS		0
+#define STMMAC_XDP_CONSUMED	BIT(0)
+
 static int flow_ctrl = FLOW_AUTO;
 module_param(flow_ctrl, int, 0644);
 MODULE_PARM_DESC(flow_ctrl, "Flow control ability [on/off]");
@@ -1384,6 +1389,7 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p,
 	buf->page = page_pool_dev_alloc_pages(rx_q->page_pool);
 	if (!buf->page)
 		return -ENOMEM;
+	buf->page_offset = stmmac_rx_offset(priv);
 
 	if (priv->sph) {
 		buf->sec_page = page_pool_dev_alloc_pages(rx_q->page_pool);
@@ -1397,7 +1403,8 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p,
 		stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, false);
 	}
 
-	buf->addr = page_pool_get_dma_addr(buf->page);
+	buf->addr = page_pool_get_dma_addr(buf->page) + buf->page_offset;
+
 	stmmac_set_desc_addr(priv, p, buf->addr);
 	if (priv->dma_buf_sz == BUF_SIZE_16KiB)
 		stmmac_init_desc3(priv, p);
@@ -1503,7 +1510,8 @@ static void stmmac_reinit_rx_buffers(struct stmmac_priv *priv)
 				if (!buf->page)
 					goto err_reinit_rx_buffers;
 
-				buf->addr = page_pool_get_dma_addr(buf->page);
+				buf->addr = page_pool_get_dma_addr(buf->page) +
+					    buf->page_offset;
 			}
 
 			if (priv->sph && !buf->sec_page) {
@@ -1821,6 +1829,7 @@ static void free_dma_tx_desc_resources(struct stmmac_priv *priv)
  */
 static int alloc_dma_rx_desc_resources(struct stmmac_priv *priv)
 {
+	bool xdp_prog = stmmac_xdp_is_enabled(priv);
 	u32 rx_count = priv->plat->rx_queues_to_use;
 	int ret = -ENOMEM;
 	u32 queue;
@@ -1834,13 +1843,15 @@ static int alloc_dma_rx_desc_resources(struct stmmac_priv *priv)
 		rx_q->queue_index = queue;
 		rx_q->priv_data = priv;
 
-		pp_params.flags = PP_FLAG_DMA_MAP;
+		pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
 		pp_params.pool_size = priv->dma_rx_size;
 		num_pages = DIV_ROUND_UP(priv->dma_buf_sz, PAGE_SIZE);
 		pp_params.order = ilog2(num_pages);
 		pp_params.nid = dev_to_node(priv->device);
 		pp_params.dev = priv->device;
-		pp_params.dma_dir = DMA_FROM_DEVICE;
+		pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
+		pp_params.offset = stmmac_rx_offset(priv);
+		pp_params.max_len = STMMAC_MAX_RX_BUF_SIZE(num_pages);
 
 		rx_q->page_pool = page_pool_create(&pp_params);
 		if (IS_ERR(rx_q->page_pool)) {
@@ -3268,7 +3279,7 @@ static int stmmac_request_irq(struct net_device *dev)
  *  0 on success and an appropriate (-)ve integer as defined in errno.h
  *  file on failure.
  */
-static int stmmac_open(struct net_device *dev)
+int stmmac_open(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
 	int bfsize = 0;
@@ -3391,7 +3402,7 @@ static void stmmac_fpe_stop_wq(struct stmmac_priv *priv)
  *  Description:
  *  This is the stop entry point of the driver.
  */
-static int stmmac_release(struct net_device *dev)
+int stmmac_release(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
 	u32 chan;
@@ -4064,11 +4075,9 @@ static void stmmac_rx_vlan(struct net_device *dev, struct sk_buff *skb)
 static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
 {
 	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
-	int len, dirty = stmmac_rx_dirty(priv, queue);
+	int dirty = stmmac_rx_dirty(priv, queue);
 	unsigned int entry = rx_q->dirty_rx;
 
-	len = DIV_ROUND_UP(priv->dma_buf_sz, PAGE_SIZE) * PAGE_SIZE;
-
 	while (dirty-- > 0) {
 		struct stmmac_rx_buffer *buf = &rx_q->buf_pool[entry];
 		struct dma_desc *p;
@@ -4091,18 +4100,9 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
 				break;
 
 			buf->sec_addr = page_pool_get_dma_addr(buf->sec_page);
-
-			dma_sync_single_for_device(priv->device, buf->sec_addr,
-						   len, DMA_FROM_DEVICE);
 		}
 
-		buf->addr = page_pool_get_dma_addr(buf->page);
-
-		/* Sync whole allocation to device. This will invalidate old
-		 * data.
-		 */
-		dma_sync_single_for_device(priv->device, buf->addr, len,
-					   DMA_FROM_DEVICE);
+		buf->addr = page_pool_get_dma_addr(buf->page) + buf->page_offset;
 
 		stmmac_set_desc_addr(priv, p, buf->addr);
 		if (priv->sph)
@@ -4181,6 +4181,42 @@ static unsigned int stmmac_rx_buf2_len(struct stmmac_priv *priv,
 	return plen - len;
 }
 
+static struct sk_buff *stmmac_xdp_run_prog(struct stmmac_priv *priv,
+					   struct xdp_buff *xdp)
+{
+	struct bpf_prog *prog;
+	int res;
+	u32 act;
+
+	rcu_read_lock();
+
+	prog = READ_ONCE(priv->xdp_prog);
+	if (!prog) {
+		res = STMMAC_XDP_PASS;
+		goto unlock;
+	}
+
+	act = bpf_prog_run_xdp(prog, xdp);
+	switch (act) {
+	case XDP_PASS:
+		res = STMMAC_XDP_PASS;
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		fallthrough;
+	case XDP_ABORTED:
+		trace_xdp_exception(priv->dev, prog, act);
+		fallthrough;
+	case XDP_DROP:
+		res = STMMAC_XDP_CONSUMED;
+		break;
+	}
+
+unlock:
+	rcu_read_unlock();
+	return ERR_PTR(-res);
+}
+
 /**
  * stmmac_rx - manage the receive process
  * @priv: driver private structure
@@ -4196,8 +4232,14 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 	unsigned int count = 0, error = 0, len = 0;
 	int status = 0, coe = priv->hw->rx_csum;
 	unsigned int next_entry = rx_q->cur_rx;
+	enum dma_data_direction dma_dir;
 	unsigned int desc_size;
 	struct sk_buff *skb = NULL;
+	struct xdp_buff xdp;
+	int buf_sz;
+
+	dma_dir = page_pool_get_dma_dir(rx_q->page_pool);
+	buf_sz = DIV_ROUND_UP(priv->dma_buf_sz, PAGE_SIZE) * PAGE_SIZE;
 
 	if (netif_msg_rx_status(priv)) {
 		void *rx_head;
@@ -4315,6 +4357,45 @@ read_again:
 		}
 
 		if (!skb) {
+			dma_sync_single_for_cpu(priv->device, buf->addr,
+						buf1_len, dma_dir);
+
+			xdp.data = page_address(buf->page) + buf->page_offset;
+			xdp.data_end = xdp.data + buf1_len;
+			xdp.data_hard_start = page_address(buf->page);
+			xdp_set_data_meta_invalid(&xdp);
+			xdp.frame_sz = buf_sz;
+
+			skb = stmmac_xdp_run_prog(priv, &xdp);
+
+			/* For Not XDP_PASS verdict */
+			if (IS_ERR(skb)) {
+				unsigned int xdp_res = -PTR_ERR(skb);
+
+				if (xdp_res & STMMAC_XDP_CONSUMED) {
+					page_pool_recycle_direct(rx_q->page_pool,
+								 buf->page);
+					buf->page = NULL;
+					priv->dev->stats.rx_dropped++;
+
+					/* Clear skb as it was set as
+					 * status by XDP program.
+					 */
+					skb = NULL;
+
+					if (unlikely((status & rx_not_ls)))
+						goto read_again;
+
+					count++;
+					continue;
+				}
+			}
+		}
+
+		if (!skb) {
+			/* XDP program may expand or reduce tail */
+			buf1_len = xdp.data_end - xdp.data;
+
 			skb = napi_alloc_skb(&ch->rx_napi, buf1_len);
 			if (!skb) {
 				priv->dev->stats.rx_dropped++;
@@ -4322,10 +4403,8 @@ read_again:
 				goto drain_data;
 			}
 
-			dma_sync_single_for_cpu(priv->device, buf->addr,
-						buf1_len, DMA_FROM_DEVICE);
-			skb_copy_to_linear_data(skb, page_address(buf->page),
-						buf1_len);
+			/* XDP program may adjust header */
+			skb_copy_to_linear_data(skb, xdp.data, buf1_len);
 			skb_put(skb, buf1_len);
 
 			/* Data payload copied into SKB, page ready for recycle */
@@ -4333,9 +4412,9 @@ read_again:
 			buf->page = NULL;
 		} else if (buf1_len) {
 			dma_sync_single_for_cpu(priv->device, buf->addr,
-						buf1_len, DMA_FROM_DEVICE);
+						buf1_len, dma_dir);
 			skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
-					buf->page, 0, buf1_len,
+					buf->page, buf->page_offset, buf1_len,
 					priv->dma_buf_sz);
 
 			/* Data payload appended into SKB */
@@ -4345,7 +4424,7 @@ read_again:
 
 		if (buf2_len) {
 			dma_sync_single_for_cpu(priv->device, buf->sec_addr,
-						buf2_len, DMA_FROM_DEVICE);
+						buf2_len, dma_dir);
 			skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
 					buf->sec_page, 0, buf2_len,
 					priv->dma_buf_sz);
@@ -4503,6 +4582,11 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu)
 		return -EBUSY;
 	}
 
+	if (stmmac_xdp_is_enabled(priv) && new_mtu > ETH_DATA_LEN) {
+		netdev_dbg(priv->dev, "Jumbo frames not supported for XDP\n");
+		return -EINVAL;
+	}
+
 	new_mtu = STMMAC_ALIGN(new_mtu);
 
 	/* If condition true, FIFO is too small or MTU too large */
@@ -4564,6 +4648,7 @@ static int stmmac_set_features(struct net_device *netdev,
 	stmmac_rx_ipc(priv, priv->hw);
 
 	sph_en = (priv->hw->rx_csum > 0) && priv->sph;
+
 	for (chan = 0; chan < priv->plat->rx_queues_to_use; chan++)
 		stmmac_enable_sph(priv, priv->ioaddr, sph_en, chan);
 
@@ -5299,6 +5384,18 @@ del_vlan_error:
 	return ret;
 }
 
+static int stmmac_bpf(struct net_device *dev, struct netdev_bpf *bpf)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+
+	switch (bpf->command) {
+	case XDP_SETUP_PROG:
+		return stmmac_xdp_set_prog(priv, bpf->prog, bpf->extack);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static const struct net_device_ops stmmac_netdev_ops = {
 	.ndo_open = stmmac_open,
 	.ndo_start_xmit = stmmac_xmit,
@@ -5317,6 +5414,7 @@ static const struct net_device_ops stmmac_netdev_ops = {
 	.ndo_set_mac_address = stmmac_set_mac_address,
 	.ndo_vlan_rx_add_vid = stmmac_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid = stmmac_vlan_rx_kill_vid,
+	.ndo_bpf = stmmac_bpf,
 };
 
 static void stmmac_reset_subtask(struct stmmac_priv *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c
new file mode 100644
index 000000000000..bf38d231860b
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021, Intel Corporation. */
+
+#include "stmmac.h"
+#include "stmmac_xdp.h"
+
+int stmmac_xdp_set_prog(struct stmmac_priv *priv, struct bpf_prog *prog,
+			struct netlink_ext_ack *extack)
+{
+	struct net_device *dev = priv->dev;
+	struct bpf_prog *old_prog;
+	bool need_update;
+	bool if_running;
+
+	if_running = netif_running(dev);
+
+	if (prog && dev->mtu > ETH_DATA_LEN) {
+		/* For now, the driver doesn't support XDP functionality with
+		 * jumbo frames so we return error.
+		 */
+		NL_SET_ERR_MSG_MOD(extack, "Jumbo frames not supported");
+		return -EOPNOTSUPP;
+	}
+
+	need_update = !!priv->xdp_prog != !!prog;
+	if (if_running && need_update)
+		stmmac_release(dev);
+
+	old_prog = xchg(&priv->xdp_prog, prog);
+	if (old_prog)
+		bpf_prog_put(old_prog);
+
+	/* Disable RX SPH for XDP operation */
+	priv->sph = priv->sph_cap && !stmmac_xdp_is_enabled(priv);
+
+	if (if_running && need_update)
+		stmmac_open(dev);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.h
new file mode 100644
index 000000000000..93948569d92a
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021, Intel Corporation. */
+
+#ifndef _STMMAC_XDP_H_
+#define _STMMAC_XDP_H_
+
+#define STMMAC_MAX_RX_BUF_SIZE(num)	(((num) * PAGE_SIZE) - XDP_PACKET_HEADROOM)
+
+int stmmac_xdp_set_prog(struct stmmac_priv *priv, struct bpf_prog *prog,
+			struct netlink_ext_ack *extack);
+
+#endif /* _STMMAC_XDP_H_ */
-- 
cgit v1.2.3


From be8b38a722e68ffa069b7dfa887369c33d6ea886 Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Thu, 1 Apr 2021 10:11:16 +0800
Subject: net: stmmac: Add support for XDP_TX action

This patch adds support for XDP_TX action which enables XDP program to
transmit back received frames.

This patch has been tested with the "xdp2" app located in samples/bpf
dir. The DUT receives burst traffic packet generated using pktgen script
'pktgen_sample03_burst_single_flow.sh'.

v4: Moved stmmac_tx_timer_arm() to be done once at the end of NAPI RX.
    Fixed stmmac_xdp_xmit_back() to return STMMAC_XDP_CONSUMED if
    XDP buffer to frame conversion fails. Thanks to Jakub's input.

v3: Added 'nq->trans_start = jiffies' to avoid TX time-out as we are
    sharing TX queue between slow path and XDP. Thanks to Jakub Kicinski
    for pointing out.

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac.h      |  12 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 238 ++++++++++++++++++++--
 2 files changed, 232 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index e72224c8fbac..a93e22a6be59 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -36,12 +36,18 @@ struct stmmac_resources {
 	int tx_irq[MTL_MAX_TX_QUEUES];
 };
 
+enum stmmac_txbuf_type {
+	STMMAC_TXBUF_T_SKB,
+	STMMAC_TXBUF_T_XDP_TX,
+};
+
 struct stmmac_tx_info {
 	dma_addr_t buf;
 	bool map_as_page;
 	unsigned len;
 	bool last_segment;
 	bool is_jumbo;
+	enum stmmac_txbuf_type buf_type;
 };
 
 #define STMMAC_TBS_AVAIL	BIT(0)
@@ -57,7 +63,10 @@ struct stmmac_tx_queue {
 	struct dma_extended_desc *dma_etx ____cacheline_aligned_in_smp;
 	struct dma_edesc *dma_entx;
 	struct dma_desc *dma_tx;
-	struct sk_buff **tx_skbuff;
+	union {
+		struct sk_buff **tx_skbuff;
+		struct xdp_frame **xdpf;
+	};
 	struct stmmac_tx_info *tx_skbuff_dma;
 	unsigned int cur_tx;
 	unsigned int dirty_tx;
@@ -77,6 +86,7 @@ struct stmmac_rx_buffer {
 struct stmmac_rx_queue {
 	u32 rx_count_frames;
 	u32 queue_index;
+	struct xdp_rxq_info xdp_rxq;
 	struct page_pool *page_pool;
 	struct stmmac_rx_buffer *buf_pool;
 	struct stmmac_priv *priv_data;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 0dad8ab93eb5..65163b51f8ad 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -71,6 +71,7 @@ MODULE_PARM_DESC(phyaddr, "Physical device address");
 
 #define STMMAC_XDP_PASS		0
 #define STMMAC_XDP_CONSUMED	BIT(0)
+#define STMMAC_XDP_TX		BIT(1)
 
 static int flow_ctrl = FLOW_AUTO;
 module_param(flow_ctrl, int, 0644);
@@ -1442,7 +1443,8 @@ static void stmmac_free_tx_buffer(struct stmmac_priv *priv, u32 queue, int i)
 {
 	struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
 
-	if (tx_q->tx_skbuff_dma[i].buf) {
+	if (tx_q->tx_skbuff_dma[i].buf &&
+	    tx_q->tx_skbuff_dma[i].buf_type != STMMAC_TXBUF_T_XDP_TX) {
 		if (tx_q->tx_skbuff_dma[i].map_as_page)
 			dma_unmap_page(priv->device,
 				       tx_q->tx_skbuff_dma[i].buf,
@@ -1455,12 +1457,20 @@ static void stmmac_free_tx_buffer(struct stmmac_priv *priv, u32 queue, int i)
 					 DMA_TO_DEVICE);
 	}
 
-	if (tx_q->tx_skbuff[i]) {
+	if (tx_q->xdpf[i] &&
+	    tx_q->tx_skbuff_dma[i].buf_type == STMMAC_TXBUF_T_XDP_TX) {
+		xdp_return_frame(tx_q->xdpf[i]);
+		tx_q->xdpf[i] = NULL;
+	}
+
+	if (tx_q->tx_skbuff[i] &&
+	    tx_q->tx_skbuff_dma[i].buf_type == STMMAC_TXBUF_T_SKB) {
 		dev_kfree_skb_any(tx_q->tx_skbuff[i]);
 		tx_q->tx_skbuff[i] = NULL;
-		tx_q->tx_skbuff_dma[i].buf = 0;
-		tx_q->tx_skbuff_dma[i].map_as_page = false;
 	}
+
+	tx_q->tx_skbuff_dma[i].buf = 0;
+	tx_q->tx_skbuff_dma[i].map_as_page = false;
 }
 
 /**
@@ -1568,6 +1578,7 @@ static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags)
 
 	for (queue = 0; queue < rx_count; queue++) {
 		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+		int ret;
 
 		netif_dbg(priv, probe, priv->dev,
 			  "(%s) dma_rx_phy=0x%08x\n", __func__,
@@ -1575,6 +1586,14 @@ static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags)
 
 		stmmac_clear_rx_descriptors(priv, queue);
 
+		WARN_ON(xdp_rxq_info_reg_mem_model(&rx_q->xdp_rxq,
+						   MEM_TYPE_PAGE_POOL,
+						   rx_q->page_pool));
+
+		netdev_info(priv->dev,
+			    "Register MEM_TYPE_PAGE_POOL RxQ-%d\n",
+			    rx_q->queue_index);
+
 		for (i = 0; i < priv->dma_rx_size; i++) {
 			struct dma_desc *p;
 
@@ -1775,6 +1794,9 @@ static void free_dma_rx_desc_resources(struct stmmac_priv *priv)
 					  sizeof(struct dma_extended_desc),
 					  rx_q->dma_erx, rx_q->dma_rx_phy);
 
+		if (xdp_rxq_info_is_reg(&rx_q->xdp_rxq))
+			xdp_rxq_info_unreg(&rx_q->xdp_rxq);
+
 		kfree(rx_q->buf_pool);
 		if (rx_q->page_pool)
 			page_pool_destroy(rx_q->page_pool);
@@ -1837,8 +1859,10 @@ static int alloc_dma_rx_desc_resources(struct stmmac_priv *priv)
 	/* RX queues buffers and DMA */
 	for (queue = 0; queue < rx_count; queue++) {
 		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+		struct stmmac_channel *ch = &priv->channel[queue];
 		struct page_pool_params pp_params = { 0 };
 		unsigned int num_pages;
+		int ret;
 
 		rx_q->queue_index = queue;
 		rx_q->priv_data = priv;
@@ -1884,6 +1908,14 @@ static int alloc_dma_rx_desc_resources(struct stmmac_priv *priv)
 			if (!rx_q->dma_rx)
 				goto err_dma;
 		}
+
+		ret = xdp_rxq_info_reg(&rx_q->xdp_rxq, priv->dev,
+				       rx_q->queue_index,
+				       ch->rx_napi.napi_id);
+		if (ret) {
+			netdev_err(priv->dev, "Failed to register xdp rxq info\n");
+			goto err_dma;
+		}
 	}
 
 	return 0;
@@ -1985,11 +2017,13 @@ static int alloc_dma_desc_resources(struct stmmac_priv *priv)
  */
 static void free_dma_desc_resources(struct stmmac_priv *priv)
 {
-	/* Release the DMA RX socket buffers */
-	free_dma_rx_desc_resources(priv);
-
 	/* Release the DMA TX socket buffers */
 	free_dma_tx_desc_resources(priv);
+
+	/* Release the DMA RX socket buffers later
+	 * to ensure all pending XDP_TX buffers are returned.
+	 */
+	free_dma_rx_desc_resources(priv);
 }
 
 /**
@@ -2181,10 +2215,22 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 
 	entry = tx_q->dirty_tx;
 	while ((entry != tx_q->cur_tx) && (count < budget)) {
-		struct sk_buff *skb = tx_q->tx_skbuff[entry];
+		struct xdp_frame *xdpf;
+		struct sk_buff *skb;
 		struct dma_desc *p;
 		int status;
 
+		if (tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_XDP_TX) {
+			xdpf = tx_q->xdpf[entry];
+			skb = NULL;
+		} else if (tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_SKB) {
+			xdpf = NULL;
+			skb = tx_q->tx_skbuff[entry];
+		} else {
+			xdpf = NULL;
+			skb = NULL;
+		}
+
 		if (priv->extend_desc)
 			p = (struct dma_desc *)(tx_q->dma_etx + entry);
 		else if (tx_q->tbs & STMMAC_TBS_AVAIL)
@@ -2214,10 +2260,12 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 				priv->dev->stats.tx_packets++;
 				priv->xstats.tx_pkt_n++;
 			}
-			stmmac_get_tx_hwtstamp(priv, p, skb);
+			if (skb)
+				stmmac_get_tx_hwtstamp(priv, p, skb);
 		}
 
-		if (likely(tx_q->tx_skbuff_dma[entry].buf)) {
+		if (likely(tx_q->tx_skbuff_dma[entry].buf &&
+			   tx_q->tx_skbuff_dma[entry].buf_type != STMMAC_TXBUF_T_XDP_TX)) {
 			if (tx_q->tx_skbuff_dma[entry].map_as_page)
 				dma_unmap_page(priv->device,
 					       tx_q->tx_skbuff_dma[entry].buf,
@@ -2238,11 +2286,19 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 		tx_q->tx_skbuff_dma[entry].last_segment = false;
 		tx_q->tx_skbuff_dma[entry].is_jumbo = false;
 
-		if (likely(skb != NULL)) {
-			pkts_compl++;
-			bytes_compl += skb->len;
-			dev_consume_skb_any(skb);
-			tx_q->tx_skbuff[entry] = NULL;
+		if (xdpf &&
+		    tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_XDP_TX) {
+			xdp_return_frame_rx_napi(xdpf);
+			tx_q->xdpf[entry] = NULL;
+		}
+
+		if (tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_SKB) {
+			if (likely(skb)) {
+				pkts_compl++;
+				bytes_compl += skb->len;
+				dev_consume_skb_any(skb);
+				tx_q->tx_skbuff[entry] = NULL;
+			}
 		}
 
 		stmmac_release_tx_desc(priv, p, priv->mode);
@@ -3667,6 +3723,8 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	tx_q->tx_skbuff_dma[first_entry].buf = des;
 	tx_q->tx_skbuff_dma[first_entry].len = skb_headlen(skb);
+	tx_q->tx_skbuff_dma[first_entry].map_as_page = false;
+	tx_q->tx_skbuff_dma[first_entry].buf_type = STMMAC_TXBUF_T_SKB;
 
 	if (priv->dma_cap.addr64 <= 32) {
 		first->des0 = cpu_to_le32(des);
@@ -3702,12 +3760,14 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 		tx_q->tx_skbuff_dma[tx_q->cur_tx].buf = des;
 		tx_q->tx_skbuff_dma[tx_q->cur_tx].len = skb_frag_size(frag);
 		tx_q->tx_skbuff_dma[tx_q->cur_tx].map_as_page = true;
+		tx_q->tx_skbuff_dma[tx_q->cur_tx].buf_type = STMMAC_TXBUF_T_SKB;
 	}
 
 	tx_q->tx_skbuff_dma[tx_q->cur_tx].last_segment = true;
 
 	/* Only the last descriptor gets to point to the skb. */
 	tx_q->tx_skbuff[tx_q->cur_tx] = skb;
+	tx_q->tx_skbuff_dma[tx_q->cur_tx].buf_type = STMMAC_TXBUF_T_SKB;
 
 	/* Manage tx mitigation */
 	tx_packets = (tx_q->cur_tx + 1) - first_tx;
@@ -3914,6 +3974,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		tx_q->tx_skbuff_dma[entry].map_as_page = true;
 		tx_q->tx_skbuff_dma[entry].len = len;
 		tx_q->tx_skbuff_dma[entry].last_segment = last_segment;
+		tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_SKB;
 
 		/* Prepare the descriptor and set the own bit too */
 		stmmac_prepare_tx_desc(priv, desc, 0, len, csum_insertion,
@@ -3922,6 +3983,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/* Only the last descriptor gets to point to the skb. */
 	tx_q->tx_skbuff[entry] = skb;
+	tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_SKB;
 
 	/* According to the coalesce parameter the IC bit for the latest
 	 * segment is reset and the timer re-started to clean the tx status.
@@ -4000,6 +4062,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 			goto dma_map_err;
 
 		tx_q->tx_skbuff_dma[first_entry].buf = des;
+		tx_q->tx_skbuff_dma[first_entry].buf_type = STMMAC_TXBUF_T_SKB;
+		tx_q->tx_skbuff_dma[first_entry].map_as_page = false;
 
 		stmmac_set_desc_addr(priv, first, des);
 
@@ -4181,6 +4245,110 @@ static unsigned int stmmac_rx_buf2_len(struct stmmac_priv *priv,
 	return plen - len;
 }
 
+static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue,
+				struct xdp_frame *xdpf)
+{
+	struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+	struct page *page = virt_to_page(xdpf->data);
+	unsigned int entry = tx_q->cur_tx;
+	struct dma_desc *tx_desc;
+	dma_addr_t dma_addr;
+	bool set_ic;
+
+	if (stmmac_tx_avail(priv, queue) < STMMAC_TX_THRESH(priv))
+		return STMMAC_XDP_CONSUMED;
+
+	if (likely(priv->extend_desc))
+		tx_desc = (struct dma_desc *)(tx_q->dma_etx + entry);
+	else if (tx_q->tbs & STMMAC_TBS_AVAIL)
+		tx_desc = &tx_q->dma_entx[entry].basic;
+	else
+		tx_desc = tx_q->dma_tx + entry;
+
+	dma_addr = page_pool_get_dma_addr(page) + sizeof(*xdpf) +
+		   xdpf->headroom;
+	dma_sync_single_for_device(priv->device, dma_addr,
+				   xdpf->len, DMA_BIDIRECTIONAL);
+
+	tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_XDP_TX;
+
+	tx_q->tx_skbuff_dma[entry].buf = dma_addr;
+	tx_q->tx_skbuff_dma[entry].map_as_page = false;
+	tx_q->tx_skbuff_dma[entry].len = xdpf->len;
+	tx_q->tx_skbuff_dma[entry].last_segment = true;
+	tx_q->tx_skbuff_dma[entry].is_jumbo = false;
+
+	tx_q->xdpf[entry] = xdpf;
+
+	stmmac_set_desc_addr(priv, tx_desc, dma_addr);
+
+	stmmac_prepare_tx_desc(priv, tx_desc, 1, xdpf->len,
+			       true, priv->mode, true, true,
+			       xdpf->len);
+
+	tx_q->tx_count_frames++;
+
+	if (tx_q->tx_count_frames % priv->tx_coal_frames[queue] == 0)
+		set_ic = true;
+	else
+		set_ic = false;
+
+	if (set_ic) {
+		tx_q->tx_count_frames = 0;
+		stmmac_set_tx_ic(priv, tx_desc);
+		priv->xstats.tx_set_ic_bit++;
+	}
+
+	stmmac_enable_dma_transmission(priv, priv->ioaddr);
+
+	entry = STMMAC_GET_ENTRY(entry, priv->dma_tx_size);
+	tx_q->cur_tx = entry;
+
+	return STMMAC_XDP_TX;
+}
+
+static int stmmac_xdp_get_tx_queue(struct stmmac_priv *priv,
+				   int cpu)
+{
+	int index = cpu;
+
+	if (unlikely(index < 0))
+		index = 0;
+
+	while (index >= priv->plat->tx_queues_to_use)
+		index -= priv->plat->tx_queues_to_use;
+
+	return index;
+}
+
+static int stmmac_xdp_xmit_back(struct stmmac_priv *priv,
+				struct xdp_buff *xdp)
+{
+	struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
+	int cpu = smp_processor_id();
+	struct netdev_queue *nq;
+	int queue;
+	int res;
+
+	if (unlikely(!xdpf))
+		return STMMAC_XDP_CONSUMED;
+
+	queue = stmmac_xdp_get_tx_queue(priv, cpu);
+	nq = netdev_get_tx_queue(priv->dev, queue);
+
+	__netif_tx_lock(nq, cpu);
+	/* Avoids TX time-out as we are sharing with slow path */
+	nq->trans_start = jiffies;
+
+	res = stmmac_xdp_xmit_xdpf(priv, queue, xdpf);
+	if (res == STMMAC_XDP_TX)
+		stmmac_flush_tx_descriptors(priv, queue);
+
+	__netif_tx_unlock(nq);
+
+	return res;
+}
+
 static struct sk_buff *stmmac_xdp_run_prog(struct stmmac_priv *priv,
 					   struct xdp_buff *xdp)
 {
@@ -4201,6 +4369,9 @@ static struct sk_buff *stmmac_xdp_run_prog(struct stmmac_priv *priv,
 	case XDP_PASS:
 		res = STMMAC_XDP_PASS;
 		break;
+	case XDP_TX:
+		res = stmmac_xdp_xmit_back(priv, xdp);
+		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 		fallthrough;
@@ -4217,6 +4388,18 @@ unlock:
 	return ERR_PTR(-res);
 }
 
+static void stmmac_finalize_xdp_rx(struct stmmac_priv *priv,
+				   int xdp_status)
+{
+	int cpu = smp_processor_id();
+	int queue;
+
+	queue = stmmac_xdp_get_tx_queue(priv, cpu);
+
+	if (xdp_status & STMMAC_XDP_TX)
+		stmmac_tx_timer_arm(priv, queue);
+}
+
 /**
  * stmmac_rx - manage the receive process
  * @priv: driver private structure
@@ -4236,6 +4419,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 	unsigned int desc_size;
 	struct sk_buff *skb = NULL;
 	struct xdp_buff xdp;
+	int xdp_status = 0;
 	int buf_sz;
 
 	dma_dir = page_pool_get_dma_dir(rx_q->page_pool);
@@ -4357,6 +4541,8 @@ read_again:
 		}
 
 		if (!skb) {
+			unsigned int pre_len, sync_len;
+
 			dma_sync_single_for_cpu(priv->device, buf->addr,
 						buf1_len, dma_dir);
 
@@ -4365,16 +4551,26 @@ read_again:
 			xdp.data_hard_start = page_address(buf->page);
 			xdp_set_data_meta_invalid(&xdp);
 			xdp.frame_sz = buf_sz;
+			xdp.rxq = &rx_q->xdp_rxq;
 
+			pre_len = xdp.data_end - xdp.data_hard_start -
+				  buf->page_offset;
 			skb = stmmac_xdp_run_prog(priv, &xdp);
+			/* Due xdp_adjust_tail: DMA sync for_device
+			 * cover max len CPU touch
+			 */
+			sync_len = xdp.data_end - xdp.data_hard_start -
+				   buf->page_offset;
+			sync_len = max(sync_len, pre_len);
 
 			/* For Not XDP_PASS verdict */
 			if (IS_ERR(skb)) {
 				unsigned int xdp_res = -PTR_ERR(skb);
 
 				if (xdp_res & STMMAC_XDP_CONSUMED) {
-					page_pool_recycle_direct(rx_q->page_pool,
-								 buf->page);
+					page_pool_put_page(rx_q->page_pool,
+							   virt_to_head_page(xdp.data),
+							   sync_len, true);
 					buf->page = NULL;
 					priv->dev->stats.rx_dropped++;
 
@@ -4388,6 +4584,12 @@ read_again:
 
 					count++;
 					continue;
+				} else if (xdp_res & STMMAC_XDP_TX) {
+					xdp_status |= xdp_res;
+					buf->page = NULL;
+					skb = NULL;
+					count++;
+					continue;
 				}
 			}
 		}
@@ -4470,6 +4672,8 @@ drain_data:
 		rx_q->state.len = len;
 	}
 
+	stmmac_finalize_xdp_rx(priv, xdp_status);
+
 	stmmac_rx_refill(priv, queue);
 
 	priv->xstats.rx_pkt_n += count;
-- 
cgit v1.2.3


From 8b278a5b69a2298055b99895ddc5b5f4ab430df5 Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Thu, 1 Apr 2021 10:11:17 +0800
Subject: net: stmmac: Add support for XDP_REDIRECT action

This patch adds the support of XDP_REDIRECT to another remote cpu for
further action. It also implements ndo_xdp_xmit ops, enabling the driver
to transmit packets forwarded to it by XDP program running on another
interface.

This patch has been tested using "xdp_redirect_cpu" for XDP_REDIRECT
+ drop testing. It also been tested with "xdp_redirect" sample app
which can be used to exercise ndo_xdp_xmit ops. The burst traffics are
generated using pktgen_sample03_burst_single_flow.sh in samples/pktgen
directory.

v4: Move xdp_do_flush() processing into stmmac_finalize_xdp_rx() and
    combined the XDP verdict of XDP TX and REDIRECT together.

v3: Added 'nq->trans_start = jiffies' to avoid TX time-out as we are
    sharing TX queue between slow path and XDP. Thanks to Jakub Kicinski
    for point out.

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac.h      |  1 +
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 94 ++++++++++++++++++++---
 2 files changed, 84 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index a93e22a6be59..c49debb62b05 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -39,6 +39,7 @@ struct stmmac_resources {
 enum stmmac_txbuf_type {
 	STMMAC_TXBUF_T_SKB,
 	STMMAC_TXBUF_T_XDP_TX,
+	STMMAC_TXBUF_T_XDP_NDO,
 };
 
 struct stmmac_tx_info {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 65163b51f8ad..77285646c5fc 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -72,6 +72,7 @@ MODULE_PARM_DESC(phyaddr, "Physical device address");
 #define STMMAC_XDP_PASS		0
 #define STMMAC_XDP_CONSUMED	BIT(0)
 #define STMMAC_XDP_TX		BIT(1)
+#define STMMAC_XDP_REDIRECT	BIT(2)
 
 static int flow_ctrl = FLOW_AUTO;
 module_param(flow_ctrl, int, 0644);
@@ -1458,7 +1459,8 @@ static void stmmac_free_tx_buffer(struct stmmac_priv *priv, u32 queue, int i)
 	}
 
 	if (tx_q->xdpf[i] &&
-	    tx_q->tx_skbuff_dma[i].buf_type == STMMAC_TXBUF_T_XDP_TX) {
+	    (tx_q->tx_skbuff_dma[i].buf_type == STMMAC_TXBUF_T_XDP_TX ||
+	     tx_q->tx_skbuff_dma[i].buf_type == STMMAC_TXBUF_T_XDP_NDO)) {
 		xdp_return_frame(tx_q->xdpf[i]);
 		tx_q->xdpf[i] = NULL;
 	}
@@ -2220,7 +2222,8 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 		struct dma_desc *p;
 		int status;
 
-		if (tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_XDP_TX) {
+		if (tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_XDP_TX ||
+		    tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_XDP_NDO) {
 			xdpf = tx_q->xdpf[entry];
 			skb = NULL;
 		} else if (tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_SKB) {
@@ -2292,6 +2295,12 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 			tx_q->xdpf[entry] = NULL;
 		}
 
+		if (xdpf &&
+		    tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_XDP_NDO) {
+			xdp_return_frame(xdpf);
+			tx_q->xdpf[entry] = NULL;
+		}
+
 		if (tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_SKB) {
 			if (likely(skb)) {
 				pkts_compl++;
@@ -4246,10 +4255,9 @@ static unsigned int stmmac_rx_buf2_len(struct stmmac_priv *priv,
 }
 
 static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue,
-				struct xdp_frame *xdpf)
+				struct xdp_frame *xdpf, bool dma_map)
 {
 	struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
-	struct page *page = virt_to_page(xdpf->data);
 	unsigned int entry = tx_q->cur_tx;
 	struct dma_desc *tx_desc;
 	dma_addr_t dma_addr;
@@ -4265,12 +4273,23 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue,
 	else
 		tx_desc = tx_q->dma_tx + entry;
 
-	dma_addr = page_pool_get_dma_addr(page) + sizeof(*xdpf) +
-		   xdpf->headroom;
-	dma_sync_single_for_device(priv->device, dma_addr,
-				   xdpf->len, DMA_BIDIRECTIONAL);
+	if (dma_map) {
+		dma_addr = dma_map_single(priv->device, xdpf->data,
+					  xdpf->len, DMA_TO_DEVICE);
+		if (dma_mapping_error(priv->device, dma_addr))
+			return STMMAC_XDP_CONSUMED;
+
+		tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_XDP_NDO;
+	} else {
+		struct page *page = virt_to_page(xdpf->data);
+
+		dma_addr = page_pool_get_dma_addr(page) + sizeof(*xdpf) +
+			   xdpf->headroom;
+		dma_sync_single_for_device(priv->device, dma_addr,
+					   xdpf->len, DMA_BIDIRECTIONAL);
 
-	tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_XDP_TX;
+		tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_XDP_TX;
+	}
 
 	tx_q->tx_skbuff_dma[entry].buf = dma_addr;
 	tx_q->tx_skbuff_dma[entry].map_as_page = false;
@@ -4340,7 +4359,7 @@ static int stmmac_xdp_xmit_back(struct stmmac_priv *priv,
 	/* Avoids TX time-out as we are sharing with slow path */
 	nq->trans_start = jiffies;
 
-	res = stmmac_xdp_xmit_xdpf(priv, queue, xdpf);
+	res = stmmac_xdp_xmit_xdpf(priv, queue, xdpf, false);
 	if (res == STMMAC_XDP_TX)
 		stmmac_flush_tx_descriptors(priv, queue);
 
@@ -4372,6 +4391,12 @@ static struct sk_buff *stmmac_xdp_run_prog(struct stmmac_priv *priv,
 	case XDP_TX:
 		res = stmmac_xdp_xmit_back(priv, xdp);
 		break;
+	case XDP_REDIRECT:
+		if (xdp_do_redirect(priv->dev, xdp, prog) < 0)
+			res = STMMAC_XDP_CONSUMED;
+		else
+			res = STMMAC_XDP_REDIRECT;
+		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 		fallthrough;
@@ -4398,6 +4423,9 @@ static void stmmac_finalize_xdp_rx(struct stmmac_priv *priv,
 
 	if (xdp_status & STMMAC_XDP_TX)
 		stmmac_tx_timer_arm(priv, queue);
+
+	if (xdp_status & STMMAC_XDP_REDIRECT)
+		xdp_do_flush();
 }
 
 /**
@@ -4584,7 +4612,8 @@ read_again:
 
 					count++;
 					continue;
-				} else if (xdp_res & STMMAC_XDP_TX) {
+				} else if (xdp_res & (STMMAC_XDP_TX |
+						      STMMAC_XDP_REDIRECT)) {
 					xdp_status |= xdp_res;
 					buf->page = NULL;
 					skb = NULL;
@@ -5600,6 +5629,48 @@ static int stmmac_bpf(struct net_device *dev, struct netdev_bpf *bpf)
 	}
 }
 
+static int stmmac_xdp_xmit(struct net_device *dev, int num_frames,
+			   struct xdp_frame **frames, u32 flags)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+	int cpu = smp_processor_id();
+	struct netdev_queue *nq;
+	int i, nxmit = 0;
+	int queue;
+
+	if (unlikely(test_bit(STMMAC_DOWN, &priv->state)))
+		return -ENETDOWN;
+
+	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+		return -EINVAL;
+
+	queue = stmmac_xdp_get_tx_queue(priv, cpu);
+	nq = netdev_get_tx_queue(priv->dev, queue);
+
+	__netif_tx_lock(nq, cpu);
+	/* Avoids TX time-out as we are sharing with slow path */
+	nq->trans_start = jiffies;
+
+	for (i = 0; i < num_frames; i++) {
+		int res;
+
+		res = stmmac_xdp_xmit_xdpf(priv, queue, frames[i], true);
+		if (res == STMMAC_XDP_CONSUMED)
+			break;
+
+		nxmit++;
+	}
+
+	if (flags & XDP_XMIT_FLUSH) {
+		stmmac_flush_tx_descriptors(priv, queue);
+		stmmac_tx_timer_arm(priv, queue);
+	}
+
+	__netif_tx_unlock(nq);
+
+	return nxmit;
+}
+
 static const struct net_device_ops stmmac_netdev_ops = {
 	.ndo_open = stmmac_open,
 	.ndo_start_xmit = stmmac_xmit,
@@ -5619,6 +5690,7 @@ static const struct net_device_ops stmmac_netdev_ops = {
 	.ndo_vlan_rx_add_vid = stmmac_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid = stmmac_vlan_rx_kill_vid,
 	.ndo_bpf = stmmac_bpf,
+	.ndo_xdp_xmit = stmmac_xdp_xmit,
 };
 
 static void stmmac_reset_subtask(struct stmmac_priv *priv)
-- 
cgit v1.2.3


From c3d5c2d96d69f2578d6fbf66e39cc2cf840d9812 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Sun, 4 Apr 2021 10:22:18 +0300
Subject: PCI/IOV: Add sysfs MSI-X vector assignment interface

A typical cloud provider SR-IOV use case is to create many VFs for use by
guest VMs. The VFs may not be assigned to a VM until a customer requests a
VM of a certain size, e.g., number of CPUs. A VF may need MSI-X vectors
proportional to the number of CPUs in the VM, but there is no standard way
to change the number of MSI-X vectors supported by a VF.

Some Mellanox ConnectX devices support dynamic assignment of MSI-X vectors
to SR-IOV VFs. This can be done by the PF driver after VFs are enabled,
and it can be done without affecting VFs that are already in use. The
hardware supports a limited pool of MSI-X vectors that can be assigned to
the PF or to individual VFs.  This is device-specific behavior that
requires support in the PF driver.

Add a read-only "sriov_vf_total_msix" sysfs file for the PF and a writable
"sriov_vf_msix_count" file for each VF. Management software may use these
to learn how many MSI-X vectors are available and to dynamically assign
them to VFs before the VFs are passed through to a VM.

If the PF driver implements the ->sriov_get_vf_total_msix() callback,
"sriov_vf_total_msix" contains the total number of MSI-X vectors available
for distribution among VFs.

If no driver is bound to the VF, writing "N" to "sriov_vf_msix_count" uses
the PF driver ->sriov_set_msix_vec_count() callback to assign "N" MSI-X
vectors to the VF.  When a VF driver subsequently reads the MSI-X Message
Control register, it will see the new Table Size "N".

Link: https://lore.kernel.org/linux-pci/20210314124256.70253-2-leon@kernel.org
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 Documentation/ABI/testing/sysfs-bus-pci |  29 +++++++++
 drivers/pci/iov.c                       | 102 ++++++++++++++++++++++++++++++--
 drivers/pci/pci-sysfs.c                 |   3 +-
 drivers/pci/pci.h                       |   3 +-
 include/linux/pci.h                     |   8 +++
 5 files changed, 137 insertions(+), 8 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index 25c9c39770c6..e5cfd170b491 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -375,3 +375,32 @@ Description:
 		The value comes from the PCI kernel device state and can be one
 		of: "unknown", "error", "D0", D1", "D2", "D3hot", "D3cold".
 		The file is read only.
+
+What:		/sys/bus/pci/devices/.../sriov_vf_total_msix
+Date:		January 2021
+Contact:	Leon Romanovsky <leonro@nvidia.com>
+Description:
+		This file is associated with a SR-IOV physical function (PF).
+		It contains the total number of MSI-X vectors available for
+		assignment to all virtual functions (VFs) associated with PF.
+		The value will be zero if the device doesn't support this
+		functionality. For supported devices, the value will be
+		constant and won't be changed after MSI-X vectors assignment.
+
+What:		/sys/bus/pci/devices/.../sriov_vf_msix_count
+Date:		January 2021
+Contact:	Leon Romanovsky <leonro@nvidia.com>
+Description:
+		This file is associated with a SR-IOV virtual function (VF).
+		It allows configuration of the number of MSI-X vectors for
+		the VF. This allows devices that have a global pool of MSI-X
+		vectors to optimally divide them between VFs based on VF usage.
+
+		The values accepted are:
+		 * > 0 - this number will be reported as the Table Size in the
+			 VF's MSI-X capability
+		 * < 0 - not valid
+		 * = 0 - will reset to the device default value
+
+		The file is writable if the PF is bound to a driver that
+		implements ->sriov_set_msix_vec_count().
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 4afd4ee4f7f0..afc06e6ce115 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -31,6 +31,7 @@ int pci_iov_virtfn_devfn(struct pci_dev *dev, int vf_id)
 	return (dev->devfn + dev->sriov->offset +
 		dev->sriov->stride * vf_id) & 0xff;
 }
+EXPORT_SYMBOL_GPL(pci_iov_virtfn_devfn);
 
 /*
  * Per SR-IOV spec sec 3.3.10 and 3.3.11, First VF Offset and VF Stride may
@@ -157,6 +158,92 @@ failed:
 	return rc;
 }
 
+#ifdef CONFIG_PCI_MSI
+static ssize_t sriov_vf_total_msix_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	u32 vf_total_msix = 0;
+
+	device_lock(dev);
+	if (!pdev->driver || !pdev->driver->sriov_get_vf_total_msix)
+		goto unlock;
+
+	vf_total_msix = pdev->driver->sriov_get_vf_total_msix(pdev);
+unlock:
+	device_unlock(dev);
+	return sysfs_emit(buf, "%u\n", vf_total_msix);
+}
+static DEVICE_ATTR_RO(sriov_vf_total_msix);
+
+static ssize_t sriov_vf_msix_count_store(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t count)
+{
+	struct pci_dev *vf_dev = to_pci_dev(dev);
+	struct pci_dev *pdev = pci_physfn(vf_dev);
+	int val, ret;
+
+	ret = kstrtoint(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	if (val < 0)
+		return -EINVAL;
+
+	device_lock(&pdev->dev);
+	if (!pdev->driver || !pdev->driver->sriov_set_msix_vec_count) {
+		ret = -EOPNOTSUPP;
+		goto err_pdev;
+	}
+
+	device_lock(&vf_dev->dev);
+	if (vf_dev->driver) {
+		/*
+		 * A driver is already attached to this VF and has configured
+		 * itself based on the current MSI-X vector count. Changing
+		 * the vector size could mess up the driver, so block it.
+		 */
+		ret = -EBUSY;
+		goto err_dev;
+	}
+
+	ret = pdev->driver->sriov_set_msix_vec_count(vf_dev, val);
+
+err_dev:
+	device_unlock(&vf_dev->dev);
+err_pdev:
+	device_unlock(&pdev->dev);
+	return ret ? : count;
+}
+static DEVICE_ATTR_WO(sriov_vf_msix_count);
+#endif
+
+static struct attribute *sriov_vf_dev_attrs[] = {
+#ifdef CONFIG_PCI_MSI
+	&dev_attr_sriov_vf_msix_count.attr,
+#endif
+	NULL,
+};
+
+static umode_t sriov_vf_attrs_are_visible(struct kobject *kobj,
+					  struct attribute *a, int n)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	if (!pdev->is_virtfn)
+		return 0;
+
+	return a->mode;
+}
+
+const struct attribute_group sriov_vf_dev_attr_group = {
+	.attrs = sriov_vf_dev_attrs,
+	.is_visible = sriov_vf_attrs_are_visible,
+};
+
 int pci_iov_add_virtfn(struct pci_dev *dev, int id)
 {
 	int i;
@@ -400,18 +487,21 @@ static DEVICE_ATTR_RO(sriov_stride);
 static DEVICE_ATTR_RO(sriov_vf_device);
 static DEVICE_ATTR_RW(sriov_drivers_autoprobe);
 
-static struct attribute *sriov_dev_attrs[] = {
+static struct attribute *sriov_pf_dev_attrs[] = {
 	&dev_attr_sriov_totalvfs.attr,
 	&dev_attr_sriov_numvfs.attr,
 	&dev_attr_sriov_offset.attr,
 	&dev_attr_sriov_stride.attr,
 	&dev_attr_sriov_vf_device.attr,
 	&dev_attr_sriov_drivers_autoprobe.attr,
+#ifdef CONFIG_PCI_MSI
+	&dev_attr_sriov_vf_total_msix.attr,
+#endif
 	NULL,
 };
 
-static umode_t sriov_attrs_are_visible(struct kobject *kobj,
-				       struct attribute *a, int n)
+static umode_t sriov_pf_attrs_are_visible(struct kobject *kobj,
+					  struct attribute *a, int n)
 {
 	struct device *dev = kobj_to_dev(kobj);
 
@@ -421,9 +511,9 @@ static umode_t sriov_attrs_are_visible(struct kobject *kobj,
 	return a->mode;
 }
 
-const struct attribute_group sriov_dev_attr_group = {
-	.attrs = sriov_dev_attrs,
-	.is_visible = sriov_attrs_are_visible,
+const struct attribute_group sriov_pf_dev_attr_group = {
+	.attrs = sriov_pf_dev_attrs,
+	.is_visible = sriov_pf_attrs_are_visible,
 };
 
 int __weak pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index f8afd54ca3e1..a6b8fbbba6d2 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -1567,7 +1567,8 @@ static const struct attribute_group *pci_dev_attr_groups[] = {
 	&pci_dev_attr_group,
 	&pci_dev_hp_attr_group,
 #ifdef CONFIG_PCI_IOV
-	&sriov_dev_attr_group,
+	&sriov_pf_dev_attr_group,
+	&sriov_vf_dev_attr_group,
 #endif
 	&pci_bridge_attr_group,
 	&pcie_dev_attr_group,
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index ef7c4661314f..afb87b917f07 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -501,7 +501,8 @@ void pci_iov_update_resource(struct pci_dev *dev, int resno);
 resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno);
 void pci_restore_iov_state(struct pci_dev *dev);
 int pci_iov_bus_range(struct pci_bus *bus);
-extern const struct attribute_group sriov_dev_attr_group;
+extern const struct attribute_group sriov_pf_dev_attr_group;
+extern const struct attribute_group sriov_vf_dev_attr_group;
 #else
 static inline int pci_iov_init(struct pci_dev *dev)
 {
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 86c799c97b77..9b575a676888 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -856,6 +856,12 @@ struct module;
  *		e.g. drivers/net/e100.c.
  * @sriov_configure: Optional driver callback to allow configuration of
  *		number of VFs to enable via sysfs "sriov_numvfs" file.
+ * @sriov_set_msix_vec_count: PF Driver callback to change number of MSI-X
+ *              vectors on a VF. Triggered via sysfs "sriov_vf_msix_count".
+ *              This will change MSI-X Table Size in the VF Message Control
+ *              registers.
+ * @sriov_get_vf_total_msix: PF driver callback to get the total number of
+ *              MSI-X vectors available for distribution to the VFs.
  * @err_handler: See Documentation/PCI/pci-error-recovery.rst
  * @groups:	Sysfs attribute groups.
  * @driver:	Driver model structure.
@@ -871,6 +877,8 @@ struct pci_driver {
 	int  (*resume)(struct pci_dev *dev);	/* Device woken up */
 	void (*shutdown)(struct pci_dev *dev);
 	int  (*sriov_configure)(struct pci_dev *dev, int num_vfs); /* On PF */
+	int  (*sriov_set_msix_vec_count)(struct pci_dev *vf, int msix_vec_count); /* On PF */
+	u32  (*sriov_get_vf_total_msix)(struct pci_dev *pf);
 	const struct pci_error_handlers *err_handler;
 	const struct attribute_group **groups;
 	struct device_driver	driver;
-- 
cgit v1.2.3


From 0b989c1e37053196676b2238f82195bd5a339d58 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Sun, 14 Mar 2021 14:42:54 +0200
Subject: net/mlx5: Add dynamic MSI-X capabilities bits

These new fields declare the number of MSI-X vectors that is possible to
allocate on the VF through PF configuration.

Value must be in range defined by min_dynamic_vf_msix_table_size and
max_dynamic_vf_msix_table_size.

The driver should continue to query its MSI-X table through PCI
configuration header.

Link: https://lore.kernel.org/linux-pci/20210314124256.70253-3-leon@kernel.org
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 include/linux/mlx5/mlx5_ifc.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 3ee7a86f39e4..432290b58a0b 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1683,7 +1683,16 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8	   reserved_at_6e0[0x10];
 	u8	   sf_base_id[0x10];
 
-	u8	   reserved_at_700[0x80];
+	u8	   reserved_at_700[0x8];
+	u8	   num_total_dynamic_vf_msix[0x18];
+	u8	   reserved_at_720[0x14];
+	u8	   dynamic_msix_table_size[0xc];
+	u8	   reserved_at_740[0xc];
+	u8	   min_dynamic_vf_msix_table_size[0x4];
+	u8	   reserved_at_750[0x4];
+	u8	   max_dynamic_vf_msix_table_size[0xc];
+
+	u8	   reserved_at_760[0x20];
 	u8	   vhca_tunnel_commands[0x40];
 	u8	   reserved_at_7c0[0x40];
 };
-- 
cgit v1.2.3


From 604774add516c6847745cb381d46907e229ab0bf Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Sun, 14 Mar 2021 14:42:55 +0200
Subject: net/mlx5: Dynamically assign MSI-X vectors count

The number of MSI-X vectors is a PCI property visible through lspci. The
field is read-only and configured by the device. The mlx5 devices work in
a static or dynamic assignment mode.

Static assignment means that all newly created VFs have a preset number of
MSI-X vectors determined by device configuration parameters. This can
result in some VFs having too many or too few MSI-X vectors. Till now this
has been the only means of fine-tuning the MSI-X vector count and it was
acceptable for small numbers of VFs.

With dynamic assignment the inefficiency of having a fixed number of MSI-X
vectors can be avoided with each VF having exactly the required
vectors. Userspace will provide this information while provisioning the VF
for use, based on the intended use. For instance if being used with a VM,
the MSI-X vector count might be matched to the CPU count of the VM.

For compatibility mlx5 continues to start up with MSI-X vector assignment,
but the kernel can now access a larger dynamic vector pool and assign more
vectors to created VFs.

Link: https://lore.kernel.org/linux-pci/20210314124256.70253-4-leon@kernel.org
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c     |  4 ++
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h    |  5 ++
 drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c  | 73 ++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/sriov.c    | 13 +++-
 4 files changed, 93 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index c568896cfb23..0489712865b7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -571,6 +571,10 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
 
 	mlx5_vhca_state_cap_handle(dev, set_hca_cap);
 
+	if (MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix))
+		MLX5_SET(cmd_hca_cap, set_hca_cap, num_total_dynamic_vf_msix,
+			 MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix));
+
 	return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index efe403c7e354..f0aed664dd35 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -174,6 +174,11 @@ int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx,
 		       struct notifier_block *nb);
 int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx,
 		       struct notifier_block *nb);
+
+int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int devfn,
+			    int msix_vec_count);
+int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs);
+
 struct cpumask *
 mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx);
 struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *table);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index a61e09aff152..19e3e978267e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -61,6 +61,79 @@ static struct mlx5_irq *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx)
 	return &irq_table->irq[vecidx];
 }
 
+/**
+ * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors
+ *                                   to be ssigned to each VF.
+ * @dev: PF to work on
+ * @num_vfs: Number of enabled VFs
+ */
+int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs)
+{
+	int num_vf_msix, min_msix, max_msix;
+
+	num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
+	if (!num_vf_msix)
+		return 0;
+
+	min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
+	max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
+
+	/* Limit maximum number of MSI-X vectors so the default configuration
+	 * has some available in the pool. This will allow the user to increase
+	 * the number of vectors in a VF without having to first size-down other
+	 * VFs.
+	 */
+	return max(min(num_vf_msix / num_vfs, max_msix / 2), min_msix);
+}
+
+/**
+ * mlx5_set_msix_vec_count - Set dynamically allocated MSI-X on the VF
+ * @dev: PF to work on
+ * @function_id: Internal PCI VF function IDd
+ * @msix_vec_count: Number of MSI-X vectors to set
+ */
+int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
+			    int msix_vec_count)
+{
+	int sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+	int num_vf_msix, min_msix, max_msix;
+	void *hca_cap, *cap;
+	int ret;
+
+	num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
+	if (!num_vf_msix)
+		return 0;
+
+	if (!MLX5_CAP_GEN(dev, vport_group_manager) || !mlx5_core_is_pf(dev))
+		return -EOPNOTSUPP;
+
+	min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
+	max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
+
+	if (msix_vec_count < min_msix)
+		return -EINVAL;
+
+	if (msix_vec_count > max_msix)
+		return -EOVERFLOW;
+
+	hca_cap = kzalloc(sz, GFP_KERNEL);
+	if (!hca_cap)
+		return -ENOMEM;
+
+	cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability);
+	MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count);
+
+	MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP);
+	MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1);
+	MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id);
+
+	MLX5_SET(set_hca_cap_in, hca_cap, op_mod,
+		 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1);
+	ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap);
+	kfree(hca_cap);
+	return ret;
+}
+
 int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx,
 		       struct notifier_block *nb)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index 3094d20297a9..f0ec86a1c8a6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -71,8 +71,7 @@ static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf)
 static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
 {
 	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
-	int err;
-	int vf;
+	int err, vf, num_msix_count;
 
 	if (!MLX5_ESWITCH_MANAGER(dev))
 		goto enable_vfs_hca;
@@ -85,12 +84,22 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
 	}
 
 enable_vfs_hca:
+	num_msix_count = mlx5_get_default_msix_vec_count(dev, num_vfs);
 	for (vf = 0; vf < num_vfs; vf++) {
 		err = mlx5_core_enable_hca(dev, vf + 1);
 		if (err) {
 			mlx5_core_warn(dev, "failed to enable VF %d (%d)\n", vf, err);
 			continue;
 		}
+
+		err = mlx5_set_msix_vec_count(dev, vf + 1, num_msix_count);
+		if (err) {
+			mlx5_core_warn(dev,
+				       "failed to set MSI-X vector counts VF %d, err %d\n",
+				       vf, err);
+			continue;
+		}
+
 		sriov->vfs_ctx[vf].enabled = 1;
 		if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) {
 			err = sriov_restore_guids(dev, vf);
-- 
cgit v1.2.3


From e71b75f73763d88665b3a19c5a4d52d559aa7732 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Sun, 14 Mar 2021 14:42:56 +0200
Subject: net/mlx5: Implement sriov_get_vf_total_msix/count() callbacks

The mlx5 implementation executes a firmware command on the PF to change
the configuration of the selected VF.

Link: https://lore.kernel.org/linux-pci/20210314124256.70253-5-leon@kernel.org
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c     |  2 ++
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h    |  7 +++++
 drivers/net/ethernet/mellanox/mlx5/core/sriov.c    | 35 ++++++++++++++++++++++
 3 files changed, 44 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 0489712865b7..edca6bc87639 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1700,6 +1700,8 @@ static struct pci_driver mlx5_core_driver = {
 	.shutdown	= shutdown,
 	.err_handler	= &mlx5_err_handler,
 	.sriov_configure   = mlx5_core_sriov_configure,
+	.sriov_get_vf_total_msix = mlx5_sriov_get_vf_total_msix,
+	.sriov_set_msix_vec_count = mlx5_core_sriov_set_msix_vec_count,
 };
 
 static void mlx5_core_verify_params(void)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index f0aed664dd35..99007f2d0424 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -140,6 +140,7 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev);
 int mlx5_sriov_attach(struct mlx5_core_dev *dev);
 void mlx5_sriov_detach(struct mlx5_core_dev *dev);
 int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs);
+int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count);
 int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
 int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id);
 int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
@@ -278,4 +279,10 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot);
 int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out);
 
 void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work);
+static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev)
+{
+	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+
+	return MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
+}
 #endif /* __MLX5_CORE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index f0ec86a1c8a6..2338989d4403 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -187,6 +187,41 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
 	return err ? err : num_vfs;
 }
 
+int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count)
+{
+	struct pci_dev *pf = pci_physfn(vf);
+	struct mlx5_core_sriov *sriov;
+	struct mlx5_core_dev *dev;
+	int num_vf_msix, id;
+
+	dev = pci_get_drvdata(pf);
+	num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
+	if (!num_vf_msix)
+		return -EOPNOTSUPP;
+
+	if (!msix_vec_count)
+		msix_vec_count =
+			mlx5_get_default_msix_vec_count(dev, pci_num_vf(pf));
+
+	sriov = &dev->priv.sriov;
+
+	/* Reversed translation of PCI VF function number to the internal
+	 * function_id, which exists in the name of virtfn symlink.
+	 */
+	for (id = 0; id < pci_num_vf(pf); id++) {
+		if (!sriov->vfs_ctx[id].enabled)
+			continue;
+
+		if (vf->devfn == pci_iov_virtfn_devfn(pf, id))
+			break;
+	}
+
+	if (id == pci_num_vf(pf) || !sriov->vfs_ctx[id].enabled)
+		return -EINVAL;
+
+	return mlx5_set_msix_vec_count(dev, id + 1, msix_vec_count);
+}
+
 int mlx5_sriov_attach(struct mlx5_core_dev *dev)
 {
 	if (!mlx5_core_is_pf(dev) || !pci_num_vf(dev->pdev))
-- 
cgit v1.2.3


From 7d42e84eb99daf9b7feef37e8f2ea1eaf975346b Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 4 Apr 2021 09:11:03 +0200
Subject: net: openvswitch: Use 'skb_push_rcsum()' instead of hand coding it

'skb_push()'/'skb_postpush_rcsum()' can be replaced by an equivalent
'skb_push_rcsum()' which is less verbose.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/conntrack.c    | 6 ++----
 net/openvswitch/vport-netdev.c | 7 +++----
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 71cec03e8612..c29b0ef1fc27 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -809,8 +809,7 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
 
 	err = nf_nat_packet(ct, ctinfo, hooknum, skb);
 push:
-	skb_push(skb, nh_off);
-	skb_postpush_rcsum(skb, skb->data, nh_off);
+	skb_push_rcsum(skb, nh_off);
 
 	return err;
 }
@@ -1322,8 +1321,7 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
 	else
 		err = ovs_ct_lookup(net, key, info, skb);
 
-	skb_push(skb, nh_ofs);
-	skb_postpush_rcsum(skb, skb->data, nh_ofs);
+	skb_push_rcsum(skb, nh_ofs);
 	if (err)
 		kfree_skb(skb);
 	return err;
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 57d6436e6f6a..8e1a88f13622 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -44,10 +44,9 @@ static void netdev_port_receive(struct sk_buff *skb)
 	if (unlikely(!skb))
 		return;
 
-	if (skb->dev->type == ARPHRD_ETHER) {
-		skb_push(skb, ETH_HLEN);
-		skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
-	}
+	if (skb->dev->type == ARPHRD_ETHER)
+		skb_push_rcsum(skb, ETH_HLEN);
+
 	ovs_vport_receive(vport, skb, skb_tunnel_info(skb));
 	return;
 error:
-- 
cgit v1.2.3


From 1e1032b0c4afaed7739a6681ff6b4cb120b82994 Mon Sep 17 00:00:00 2001
From: Hengqi Chen <hengqi.chen@gmail.com>
Date: Mon, 5 Apr 2021 12:01:19 +0800
Subject: libbpf: Fix KERNEL_VERSION macro

Add missing ')' for KERNEL_VERSION macro.

Signed-off-by: Hengqi Chen <hengqi.chen@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210405040119.802188-1-hengqi.chen@gmail.com
---
 tools/lib/bpf/bpf_helpers.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index cc2e51c64a54..b904128626c2 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -51,7 +51,7 @@
 #endif
 
 #ifndef KERNEL_VERSION
-#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + ((c) > 255 ? 255 : (c))
+#define KERNEL_VERSION(a, b, c) (((a) << 16) + ((b) << 8) + ((c) > 255 ? 255 : (c)))
 #endif
 
 /*
-- 
cgit v1.2.3


From 514e1150da9cd8d7978d990a353636cf1a7a87c2 Mon Sep 17 00:00:00 2001
From: Xie He <xie.he.0141@gmail.com>
Date: Fri, 2 Apr 2021 02:30:00 -0700
Subject: net: x25: Queue received packets in the drivers instead of per-CPU
 queues

X.25 Layer 3 (the Packet Layer) expects layer 2 to provide a reliable
datalink service such that no packets are reordered or dropped. And
X.25 Layer 2 (the LAPB layer) is indeed designed to provide such service.

However, this reliability is not preserved when a driver calls "netif_rx"
to deliver the received packets to layer 3, because "netif_rx" will put
the packets into per-CPU queues before they are delivered to layer 3.
If there are multiple CPUs, the order of the packets may not be preserved.
The per-CPU queues may also drop packets if there are too many.

Therefore, we should not call "netif_rx" to let it queue the packets.
Instead, we should use our own queue that won't reorder or drop packets.

This patch changes all X.25 drivers to use their own queues instead of
calling "netif_rx". The patch also documents this requirement in the
"x25-iface" documentation.

Cc: Martin Schiller <ms@dev.tdt.de>
Signed-off-by: Xie He <xie.he.0141@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/x25-iface.rst | 65 +++++-----------------------------
 drivers/net/wan/hdlc_x25.c             | 30 ++++++++++++++--
 drivers/net/wan/lapbether.c            | 49 ++++++++++++++++++++++---
 3 files changed, 80 insertions(+), 64 deletions(-)

diff --git a/Documentation/networking/x25-iface.rst b/Documentation/networking/x25-iface.rst
index df401891dce6..f34e9ec64937 100644
--- a/Documentation/networking/x25-iface.rst
+++ b/Documentation/networking/x25-iface.rst
@@ -70,60 +70,13 @@ First Byte = 0x03 (X25_IFACE_PARAMS)
 LAPB parameters. To be defined.
 
 
+Requirements for the device driver
+----------------------------------
 
-Possible Problems
-=================
-
-(Henner Eisen, 2000-10-28)
-
-The X.25 packet layer protocol depends on a reliable datalink service.
-The LAPB protocol provides such reliable service. But this reliability
-is not preserved by the Linux network device driver interface:
-
-- With Linux 2.4.x (and above) SMP kernels, packet ordering is not
-  preserved. Even if a device driver calls netif_rx(skb1) and later
-  netif_rx(skb2), skb2 might be delivered to the network layer
-  earlier that skb1.
-- Data passed upstream by means of netif_rx() might be dropped by the
-  kernel if the backlog queue is congested.
-
-The X.25 packet layer protocol will detect this and reset the virtual
-call in question. But many upper layer protocols are not designed to
-handle such N-Reset events gracefully. And frequent N-Reset events
-will always degrade performance.
-
-Thus, driver authors should make netif_rx() as reliable as possible:
-
-SMP re-ordering will not occur if the driver's interrupt handler is
-always executed on the same CPU. Thus,
-
-- Driver authors should use irq affinity for the interrupt handler.
-
-The probability of packet loss due to backlog congestion can be
-reduced by the following measures or a combination thereof:
-
-(1) Drivers for kernel versions 2.4.x and above should always check the
-    return value of netif_rx(). If it returns NET_RX_DROP, the
-    driver's LAPB protocol must not confirm reception of the frame
-    to the peer.
-    This will reliably suppress packet loss. The LAPB protocol will
-    automatically cause the peer to re-transmit the dropped packet
-    later.
-    The lapb module interface was modified to support this. Its
-    data_indication() method should now transparently pass the
-    netif_rx() return value to the (lapb module) caller.
-(2) Drivers for kernel versions 2.2.x should always check the global
-    variable netdev_dropping when a new frame is received. The driver
-    should only call netif_rx() if netdev_dropping is zero. Otherwise
-    the driver should not confirm delivery of the frame and drop it.
-    Alternatively, the driver can queue the frame internally and call
-    netif_rx() later when netif_dropping is 0 again. In that case, delivery
-    confirmation should also be deferred such that the internal queue
-    cannot grow to much.
-    This will not reliably avoid packet loss, but the probability
-    of packet loss in netif_rx() path will be significantly reduced.
-(3) Additionally, driver authors might consider to support
-    CONFIG_NET_HW_FLOWCONTROL. This allows the driver to be woken up
-    when a previously congested backlog queue becomes empty again.
-    The driver could uses this for flow-controlling the peer by means
-    of the LAPB protocol's flow-control service.
+Packets should not be reordered or dropped when delivering between the
+Packet Layer and the device driver.
+
+To avoid packets from being reordered or dropped when delivering from
+the device driver to the Packet Layer, the device driver should not
+call "netif_rx" to deliver the received packets. Instead, it should
+call "netif_receive_skb_core" from softirq context to deliver them.
diff --git a/drivers/net/wan/hdlc_x25.c b/drivers/net/wan/hdlc_x25.c
index 5a6a945f6c81..ba8c36c7ea91 100644
--- a/drivers/net/wan/hdlc_x25.c
+++ b/drivers/net/wan/hdlc_x25.c
@@ -25,6 +25,8 @@ struct x25_state {
 	x25_hdlc_proto settings;
 	bool up;
 	spinlock_t up_lock; /* Protects "up" */
+	struct sk_buff_head rx_queue;
+	struct tasklet_struct rx_tasklet;
 };
 
 static int x25_ioctl(struct net_device *dev, struct ifreq *ifr);
@@ -34,14 +36,27 @@ static struct x25_state *state(hdlc_device *hdlc)
 	return hdlc->state;
 }
 
+static void x25_rx_queue_kick(struct tasklet_struct *t)
+{
+	struct x25_state *x25st = from_tasklet(x25st, t, rx_tasklet);
+	struct sk_buff *skb = skb_dequeue(&x25st->rx_queue);
+
+	while (skb) {
+		netif_receive_skb_core(skb);
+		skb = skb_dequeue(&x25st->rx_queue);
+	}
+}
+
 /* These functions are callbacks called by LAPB layer */
 
 static void x25_connect_disconnect(struct net_device *dev, int reason, int code)
 {
+	struct x25_state *x25st = state(dev_to_hdlc(dev));
 	struct sk_buff *skb;
 	unsigned char *ptr;
 
-	if ((skb = dev_alloc_skb(1)) == NULL) {
+	skb = __dev_alloc_skb(1, GFP_ATOMIC | __GFP_NOMEMALLOC);
+	if (!skb) {
 		netdev_err(dev, "out of memory\n");
 		return;
 	}
@@ -50,7 +65,9 @@ static void x25_connect_disconnect(struct net_device *dev, int reason, int code)
 	*ptr = code;
 
 	skb->protocol = x25_type_trans(skb, dev);
-	netif_rx(skb);
+
+	skb_queue_tail(&x25st->rx_queue, skb);
+	tasklet_schedule(&x25st->rx_tasklet);
 }
 
 
@@ -71,6 +88,7 @@ static void x25_disconnected(struct net_device *dev, int reason)
 
 static int x25_data_indication(struct net_device *dev, struct sk_buff *skb)
 {
+	struct x25_state *x25st = state(dev_to_hdlc(dev));
 	unsigned char *ptr;
 
 	if (skb_cow(skb, 1)) {
@@ -84,7 +102,10 @@ static int x25_data_indication(struct net_device *dev, struct sk_buff *skb)
 	*ptr = X25_IFACE_DATA;
 
 	skb->protocol = x25_type_trans(skb, dev);
-	return netif_rx(skb);
+
+	skb_queue_tail(&x25st->rx_queue, skb);
+	tasklet_schedule(&x25st->rx_tasklet);
+	return NET_RX_SUCCESS;
 }
 
 
@@ -223,6 +244,7 @@ static void x25_close(struct net_device *dev)
 	spin_unlock_bh(&x25st->up_lock);
 
 	lapb_unregister(dev);
+	tasklet_kill(&x25st->rx_tasklet);
 }
 
 
@@ -338,6 +360,8 @@ static int x25_ioctl(struct net_device *dev, struct ifreq *ifr)
 		memcpy(&state(hdlc)->settings, &new_settings, size);
 		state(hdlc)->up = false;
 		spin_lock_init(&state(hdlc)->up_lock);
+		skb_queue_head_init(&state(hdlc)->rx_queue);
+		tasklet_setup(&state(hdlc)->rx_tasklet, x25_rx_queue_kick);
 
 		/* There's no header_ops so hard_header_len should be 0. */
 		dev->hard_header_len = 0;
diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c
index 45d74285265a..59646865a3a4 100644
--- a/drivers/net/wan/lapbether.c
+++ b/drivers/net/wan/lapbether.c
@@ -53,6 +53,8 @@ struct lapbethdev {
 	struct net_device	*axdev;		/* lapbeth device (lapb#) */
 	bool			up;
 	spinlock_t		up_lock;	/* Protects "up" */
+	struct sk_buff_head	rx_queue;
+	struct napi_struct	napi;
 };
 
 static LIST_HEAD(lapbeth_devices);
@@ -83,6 +85,26 @@ static __inline__ int dev_is_ethdev(struct net_device *dev)
 
 /* ------------------------------------------------------------------------ */
 
+static int lapbeth_napi_poll(struct napi_struct *napi, int budget)
+{
+	struct lapbethdev *lapbeth = container_of(napi, struct lapbethdev,
+						  napi);
+	struct sk_buff *skb;
+	int processed = 0;
+
+	for (; processed < budget; ++processed) {
+		skb = skb_dequeue(&lapbeth->rx_queue);
+		if (!skb)
+			break;
+		netif_receive_skb_core(skb);
+	}
+
+	if (processed < budget)
+		napi_complete(napi);
+
+	return processed;
+}
+
 /*
  *	Receive a LAPB frame via an ethernet interface.
  */
@@ -135,6 +157,7 @@ drop:
 
 static int lapbeth_data_indication(struct net_device *dev, struct sk_buff *skb)
 {
+	struct lapbethdev *lapbeth = netdev_priv(dev);
 	unsigned char *ptr;
 
 	if (skb_cow(skb, 1)) {
@@ -148,7 +171,10 @@ static int lapbeth_data_indication(struct net_device *dev, struct sk_buff *skb)
 	*ptr = X25_IFACE_DATA;
 
 	skb->protocol = x25_type_trans(skb, dev);
-	return netif_rx(skb);
+
+	skb_queue_tail(&lapbeth->rx_queue, skb);
+	napi_schedule(&lapbeth->napi);
+	return NET_RX_SUCCESS;
 }
 
 /*
@@ -233,8 +259,9 @@ static void lapbeth_data_transmit(struct net_device *ndev, struct sk_buff *skb)
 
 static void lapbeth_connected(struct net_device *dev, int reason)
 {
+	struct lapbethdev *lapbeth = netdev_priv(dev);
 	unsigned char *ptr;
-	struct sk_buff *skb = dev_alloc_skb(1);
+	struct sk_buff *skb = __dev_alloc_skb(1, GFP_ATOMIC | __GFP_NOMEMALLOC);
 
 	if (!skb) {
 		pr_err("out of memory\n");
@@ -245,13 +272,16 @@ static void lapbeth_connected(struct net_device *dev, int reason)
 	*ptr = X25_IFACE_CONNECT;
 
 	skb->protocol = x25_type_trans(skb, dev);
-	netif_rx(skb);
+
+	skb_queue_tail(&lapbeth->rx_queue, skb);
+	napi_schedule(&lapbeth->napi);
 }
 
 static void lapbeth_disconnected(struct net_device *dev, int reason)
 {
+	struct lapbethdev *lapbeth = netdev_priv(dev);
 	unsigned char *ptr;
-	struct sk_buff *skb = dev_alloc_skb(1);
+	struct sk_buff *skb = __dev_alloc_skb(1, GFP_ATOMIC | __GFP_NOMEMALLOC);
 
 	if (!skb) {
 		pr_err("out of memory\n");
@@ -262,7 +292,9 @@ static void lapbeth_disconnected(struct net_device *dev, int reason)
 	*ptr = X25_IFACE_DISCONNECT;
 
 	skb->protocol = x25_type_trans(skb, dev);
-	netif_rx(skb);
+
+	skb_queue_tail(&lapbeth->rx_queue, skb);
+	napi_schedule(&lapbeth->napi);
 }
 
 /*
@@ -293,6 +325,8 @@ static int lapbeth_open(struct net_device *dev)
 	struct lapbethdev *lapbeth = netdev_priv(dev);
 	int err;
 
+	napi_enable(&lapbeth->napi);
+
 	if ((err = lapb_register(dev, &lapbeth_callbacks)) != LAPB_OK) {
 		pr_err("lapb_register error: %d\n", err);
 		return -ENODEV;
@@ -317,6 +351,8 @@ static int lapbeth_close(struct net_device *dev)
 	if ((err = lapb_unregister(dev)) != LAPB_OK)
 		pr_err("lapb_unregister error: %d\n", err);
 
+	napi_disable(&lapbeth->napi);
+
 	return 0;
 }
 
@@ -374,6 +410,9 @@ static int lapbeth_new_device(struct net_device *dev)
 	lapbeth->up = false;
 	spin_lock_init(&lapbeth->up_lock);
 
+	skb_queue_head_init(&lapbeth->rx_queue);
+	netif_napi_add(ndev, &lapbeth->napi, lapbeth_napi_poll, 16);
+
 	rc = -EIO;
 	if (register_netdevice(ndev))
 		goto fail;
-- 
cgit v1.2.3


From 0282bc6ae86d84b035afac792b5d5ff67707cdd2 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 4 Apr 2021 08:33:44 +0200
Subject: net: ag71xx: Slightly simplify 'ag71xx_rx_packets()'

There is no need to use 'list_for_each_entry_safe' here, as nothing is
removed from the list in the 'for' loop.
Use 'list_for_each_entry' instead, it is slightly less verbose.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/ag71xx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c
index a60ce9030581..7352f98123c7 100644
--- a/drivers/net/ethernet/atheros/ag71xx.c
+++ b/drivers/net/ethernet/atheros/ag71xx.c
@@ -1658,9 +1658,9 @@ static int ag71xx_rx_packets(struct ag71xx *ag, int limit)
 	struct net_device *ndev = ag->ndev;
 	int ring_mask, ring_size, done = 0;
 	unsigned int pktlen_mask, offset;
-	struct sk_buff *next, *skb;
 	struct ag71xx_ring *ring;
 	struct list_head rx_list;
+	struct sk_buff *skb;
 
 	ring = &ag->rx_ring;
 	pktlen_mask = ag->dcfg->desc_pktlen_mask;
@@ -1725,7 +1725,7 @@ next:
 
 	ag71xx_ring_rx_refill(ag);
 
-	list_for_each_entry_safe(skb, next, &rx_list, list)
+	list_for_each_entry(skb, &rx_list, list)
 		skb->protocol = eth_type_trans(skb, ndev);
 	netif_receive_skb_list(&rx_list);
 
-- 
cgit v1.2.3


From c3105f8485775943b2ccde09c7163510c161c965 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 4 Apr 2021 10:54:37 +0200
Subject: ibmvnic: Use 'skb_frag_address()' instead of hand coding it

'page_address(skb_frag_page()) + skb_frag_off()' can be replaced by an
equivalent 'skb_frag_address()' which is less verbose.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 9c6438d3b3a5..473411542911 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1678,9 +1678,8 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 			const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
-			memcpy(dst + cur,
-			       page_address(skb_frag_page(frag)) +
-			       skb_frag_off(frag), skb_frag_size(frag));
+			memcpy(dst + cur, skb_frag_address(frag),
+			       skb_frag_size(frag));
 			cur += skb_frag_size(frag);
 		}
 	} else {
-- 
cgit v1.2.3


From c438a801e0bb2a4db1d3183a9482af891577a281 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 4 Apr 2021 11:45:11 +0200
Subject: sfc: Use 'skb_add_rx_frag()' instead of hand coding it

Some lines of code can be merged into an equivalent 'skb_add_rx_frag()'
call which is less verbose.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/rx.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 89c5c75f479f..17b8119c48e5 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -94,12 +94,11 @@ static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel,
 		rx_buf->len -= hdr_len;
 
 		for (;;) {
-			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
-					   rx_buf->page, rx_buf->page_offset,
-					   rx_buf->len);
+			skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+					rx_buf->page, rx_buf->page_offset,
+					rx_buf->len, efx->rx_buffer_truesize);
 			rx_buf->page = NULL;
-			skb->len += rx_buf->len;
-			skb->data_len += rx_buf->len;
+
 			if (skb_shinfo(skb)->nr_frags == n_frags)
 				break;
 
@@ -111,8 +110,6 @@ static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel,
 		n_frags = 0;
 	}
 
-	skb->truesize += n_frags * efx->rx_buffer_truesize;
-
 	/* Move past the ethernet header */
 	skb->protocol = eth_type_trans(skb, efx->net_dev);
 
-- 
cgit v1.2.3


From 1ec3d02f9cdf2a4b89664145c7965aa46398d31e Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 4 Apr 2021 14:42:44 +0200
Subject: qede: Remove a erroneous ++ in 'qede_rx_build_jumbo()'

This ++ is confusing. It looks duplicated with the one already performed in
'skb_fill_page_desc()'.

In fact, it is harmless. 'nr_frags' is written twice with the same value.
Once, because of the nr_frags++, and once because of the 'nr_frags = i + 1'
in 'skb_fill_page_desc()'.

So axe this post-increment to avoid confusion.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Manish Chopra <manishc@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qede/qede_fp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index 102d0e0808d5..ee3e45e38cb7 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
@@ -1209,7 +1209,7 @@ static int qede_rx_build_jumbo(struct qede_dev *edev,
 		dma_unmap_page(rxq->dev, bd->mapping,
 			       PAGE_SIZE, DMA_FROM_DEVICE);
 
-		skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
+		skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
 				   bd->data, rxq->rx_headroom, cur_size);
 
 		skb->truesize += PAGE_SIZE;
-- 
cgit v1.2.3


From 7190e9d8e13164cdc38241095edcc392ed32f3ea Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 4 Apr 2021 14:42:57 +0200
Subject: qede: Use 'skb_add_rx_frag()' instead of hand coding it

Some lines of code can be merged into an equivalent 'skb_add_rx_frag()'
call which is less verbose.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Manish Chopra <manishc@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qede/qede_fp.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index ee3e45e38cb7..8e150dd4f899 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
@@ -1209,12 +1209,9 @@ static int qede_rx_build_jumbo(struct qede_dev *edev,
 		dma_unmap_page(rxq->dev, bd->mapping,
 			       PAGE_SIZE, DMA_FROM_DEVICE);
 
-		skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
-				   bd->data, rxq->rx_headroom, cur_size);
+		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, bd->data,
+				rxq->rx_headroom, cur_size, PAGE_SIZE);
 
-		skb->truesize += PAGE_SIZE;
-		skb->data_len += cur_size;
-		skb->len += cur_size;
 		pkt_len -= cur_size;
 	}
 
-- 
cgit v1.2.3


From 4b4b84468aa27d0a18be8cb727f246aa35a3406d Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Sun, 4 Apr 2021 13:38:23 +0000
Subject: mld: change lockdep annotation for ip6_sf_socklist and
 ipv6_mc_socklist

struct ip6_sf_socklist and ipv6_mc_socklist are per-socket MLD data.
These data are protected by rtnl lock, socket lock, and RCU.
So, when these are used, it verifies whether rtnl lock is acquired or not.

ip6_mc_msfget() is called by do_ipv6_getsockopt().
But caller doesn't acquire rtnl lock.
So, when these data are used in the ip6_mc_msfget() lockdep warns about it.
But accessing these is actually safe because socket lock was acquired by
do_ipv6_getsockopt().

So, it changes lockdep annotation from rtnl lock to socket lock.
(rtnl_dereference -> sock_dereference)

Locking graph for mld data is like below:

When writing mld data:
do_ipv6_setsockopt()
    rtnl_lock
    lock_sock
    (mld functions)
        idev->mc_lock(if per-interface mld data is modified)

When reading mld data:
do_ipv6_getsockopt()
    lock_sock
    ip6_mc_msfget()

Splat looks like:
=============================
WARNING: suspicious RCU usage
5.12.0-rc4+ #503 Not tainted
-----------------------------
net/ipv6/mcast.c:610 suspicious rcu_dereference_protected() usage!

other info that might help us debug this:

rcu_scheduler_active = 2, debug_locks = 1
1 lock held by mcast-listener-/923:
 #0: ffff888007958a70 (sk_lock-AF_INET6){+.+.}-{0:0}, at:
ipv6_get_msfilter+0xaf/0x190

stack backtrace:
CPU: 1 PID: 923 Comm: mcast-listener- Not tainted 5.12.0-rc4+ #503
Call Trace:
 dump_stack+0xa4/0xe5
 ip6_mc_msfget+0x553/0x6c0
 ? ipv6_sock_mc_join_ssm+0x10/0x10
 ? lockdep_hardirqs_on_prepare+0x3e0/0x3e0
 ? mark_held_locks+0xb7/0x120
 ? lockdep_hardirqs_on_prepare+0x27c/0x3e0
 ? __local_bh_enable_ip+0xa5/0xf0
 ? lock_sock_nested+0x82/0xf0
 ipv6_get_msfilter+0xc3/0x190
 ? compat_ipv6_get_msfilter+0x300/0x300
 ? lock_downgrade+0x690/0x690
 do_ipv6_getsockopt.isra.6.constprop.13+0x1809/0x29e0
 ? do_ipv6_mcast_group_source+0x150/0x150
 ? register_lock_class+0x1750/0x1750
 ? kvm_sched_clock_read+0x14/0x30
 ? sched_clock+0x5/0x10
 ? sched_clock_cpu+0x18/0x170
 ? find_held_lock+0x3a/0x1c0
 ? lock_downgrade+0x690/0x690
 ? ipv6_getsockopt+0xdb/0x1b0
 ipv6_getsockopt+0xdb/0x1b0
[ ... ]

Fixes: 88e2ca308094 ("mld: convert ifmcaddr6 to RCU")
Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/mcast.c     | 48 +++++++++++++++++++++++-------------------------
 net/tipc/udp_media.c |  2 ++
 2 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 49b0cebfdcdc..ff536a158b85 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -114,10 +114,13 @@ int sysctl_mld_qrv __read_mostly = MLD_QRV_DEFAULT;
 #define mc_dereference(e, idev) \
 	rcu_dereference_protected(e, lockdep_is_held(&(idev)->mc_lock))
 
-#define for_each_pmc_rtnl(np, pmc)				\
-	for (pmc = rtnl_dereference((np)->ipv6_mc_list);	\
+#define sock_dereference(e, sk) \
+	rcu_dereference_protected(e, lockdep_sock_is_held(sk))
+
+#define for_each_pmc_socklock(np, sk, pmc)			\
+	for (pmc = sock_dereference((np)->ipv6_mc_list, sk);	\
 	     pmc;						\
-	     pmc = rtnl_dereference(pmc->next))
+	     pmc = sock_dereference(pmc->next, sk))
 
 #define for_each_pmc_rcu(np, pmc)				\
 	for (pmc = rcu_dereference((np)->ipv6_mc_list);		\
@@ -180,7 +183,7 @@ static int __ipv6_sock_mc_join(struct sock *sk, int ifindex,
 	if (!ipv6_addr_is_multicast(addr))
 		return -EINVAL;
 
-	for_each_pmc_rtnl(np, mc_lst) {
+	for_each_pmc_socklock(np, sk, mc_lst) {
 		if ((ifindex == 0 || mc_lst->ifindex == ifindex) &&
 		    ipv6_addr_equal(&mc_lst->addr, addr))
 			return -EADDRINUSE;
@@ -258,7 +261,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 		return -EINVAL;
 
 	for (lnk = &np->ipv6_mc_list;
-	     (mc_lst = rtnl_dereference(*lnk)) != NULL;
+	     (mc_lst = sock_dereference(*lnk, sk)) != NULL;
 	      lnk = &mc_lst->next) {
 		if ((ifindex == 0 || mc_lst->ifindex == ifindex) &&
 		    ipv6_addr_equal(&mc_lst->addr, addr)) {
@@ -323,7 +326,7 @@ void __ipv6_sock_mc_close(struct sock *sk)
 
 	ASSERT_RTNL();
 
-	while ((mc_lst = rtnl_dereference(np->ipv6_mc_list)) != NULL) {
+	while ((mc_lst = sock_dereference(np->ipv6_mc_list, sk)) != NULL) {
 		struct net_device *dev;
 
 		np->ipv6_mc_list = mc_lst->next;
@@ -350,8 +353,11 @@ void ipv6_sock_mc_close(struct sock *sk)
 
 	if (!rcu_access_pointer(np->ipv6_mc_list))
 		return;
+
 	rtnl_lock();
+	lock_sock(sk);
 	__ipv6_sock_mc_close(sk);
+	release_sock(sk);
 	rtnl_unlock();
 }
 
@@ -381,7 +387,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 	err = -EADDRNOTAVAIL;
 
 	mutex_lock(&idev->mc_lock);
-	for_each_pmc_rtnl(inet6, pmc) {
+	for_each_pmc_socklock(inet6, sk, pmc) {
 		if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface)
 			continue;
 		if (ipv6_addr_equal(&pmc->addr, group))
@@ -404,7 +410,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 		pmc->sfmode = omode;
 	}
 
-	psl = rtnl_dereference(pmc->sflist);
+	psl = sock_dereference(pmc->sflist, sk);
 	if (!add) {
 		if (!psl)
 			goto done;	/* err = -EADDRNOTAVAIL */
@@ -511,7 +517,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
 		goto done;
 	}
 
-	for_each_pmc_rtnl(inet6, pmc) {
+	for_each_pmc_socklock(inet6, sk, pmc) {
 		if (pmc->ifindex != gsf->gf_interface)
 			continue;
 		if (ipv6_addr_equal(&pmc->addr, group))
@@ -552,7 +558,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
 	}
 
 	mutex_lock(&idev->mc_lock);
-	psl = rtnl_dereference(pmc->sflist);
+	psl = sock_dereference(pmc->sflist, sk);
 	if (psl) {
 		ip6_mc_del_src(idev, group, pmc->sfmode,
 			       psl->sl_count, psl->sl_addr, 0);
@@ -574,40 +580,32 @@ done:
 int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 		  struct sockaddr_storage __user *p)
 {
-	int err, i, count, copycount;
+	struct ipv6_pinfo *inet6 = inet6_sk(sk);
 	const struct in6_addr *group;
 	struct ipv6_mc_socklist *pmc;
-	struct inet6_dev *idev;
-	struct ipv6_pinfo *inet6 = inet6_sk(sk);
 	struct ip6_sf_socklist *psl;
-	struct net *net = sock_net(sk);
+	int i, count, copycount;
 
 	group = &((struct sockaddr_in6 *)&gsf->gf_group)->sin6_addr;
 
 	if (!ipv6_addr_is_multicast(group))
 		return -EINVAL;
 
-	idev = ip6_mc_find_dev_rtnl(net, group, gsf->gf_interface);
-	if (!idev)
-		return -ENODEV;
-
-	err = -EADDRNOTAVAIL;
 	/* changes to the ipv6_mc_list require the socket lock and
-	 * rtnl lock. We have the socket lock and rcu read lock,
-	 * so reading the list is safe.
+	 * rtnl lock. We have the socket lock, so reading the list is safe.
 	 */
 
-	for_each_pmc_rtnl(inet6, pmc) {
+	for_each_pmc_socklock(inet6, sk, pmc) {
 		if (pmc->ifindex != gsf->gf_interface)
 			continue;
 		if (ipv6_addr_equal(group, &pmc->addr))
 			break;
 	}
 	if (!pmc)		/* must have a prior join */
-		return err;
+		return -EADDRNOTAVAIL;
 
 	gsf->gf_fmode = pmc->sfmode;
-	psl = rtnl_dereference(pmc->sflist);
+	psl = sock_dereference(pmc->sflist, sk);
 	count = psl ? psl->sl_count : 0;
 
 	copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
@@ -2600,7 +2598,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
 	struct ip6_sf_socklist *psl;
 	int err;
 
-	psl = rtnl_dereference(iml->sflist);
+	psl = sock_dereference(iml->sflist, sk);
 
 	if (idev)
 		mutex_lock(&idev->mc_lock);
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 21e75e28e86a..e556d2cdc064 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -414,8 +414,10 @@ static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote)
 		err = ip_mc_join_group(sk, &mreqn);
 #if IS_ENABLED(CONFIG_IPV6)
 	} else {
+		lock_sock(sk);
 		err = ipv6_stub->ipv6_sock_mc_join(sk, ub->ifindex,
 						   &remote->ipv6);
+		release_sock(sk);
 #endif
 	}
 	return err;
-- 
cgit v1.2.3


From 85d091a794f883ea19b421c38e158558f9b24b60 Mon Sep 17 00:00:00 2001
From: Wu XiangCheng <bobwxc@email.cn>
Date: Sun, 4 Apr 2021 22:23:15 +0800
Subject: tipc: Fix a kernel-doc warning in name_table.c

Fix kernel-doc warning:

Documentation/networking/tipc:66: /home/sfr/next/next/net/tipc/name_table.c
  :558: WARNING: Unexpected indentation.
Documentation/networking/tipc:66: /home/sfr/next/next/net/tipc/name_table.c
  :559: WARNING: Block quote ends without a blank line; unexpected unindent.

Due to blank line missing.

Fixes: 908148bc5046 ("tipc: refactor tipc_sendmsg() and tipc_lookup_anycast()")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Link: https://lore.kernel.org/netdev/20210318172255.63185609@canb.auug.org.au/
Signed-off-by: Wu XiangCheng <bobwxc@email.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_table.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 6db9f9e7c0ac..b4017945d3e5 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -553,7 +553,9 @@ exit:
  *
  * On entry, a non-zero 'sk->node' indicates the node where we want lookup to be
  * performed, which may not be this one.
+ *
  * On exit:
+ *
  * - If lookup is deferred to another node, leave 'sk->node' unchanged and
  *   return 'true'.
  * - If lookup is successful, set the 'sk->node' and 'sk->ref' (== portid) which
-- 
cgit v1.2.3


From d3295869c40cb69a2c599000009b3fde43cec2ec Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Mon, 5 Apr 2021 18:54:35 +0800
Subject: net: nfc: Fix spelling errors in net/nfc module

These patches fix a series of spelling errors in net/nfc module.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/nfc/digital_dep.c | 2 +-
 net/nfc/nci/core.c    | 2 +-
 net/nfc/nci/uart.c    | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/nfc/digital_dep.c b/net/nfc/digital_dep.c
index 5971fb6f51cc..1150731126e2 100644
--- a/net/nfc/digital_dep.c
+++ b/net/nfc/digital_dep.c
@@ -1217,7 +1217,7 @@ static void digital_tg_recv_dep_req(struct nfc_digital_dev *ddev, void *arg,
 
 		/* ACK */
 		if (ddev->atn_count) {
-			/* The target has previously recevied one or more ATN
+			/* The target has previously received one or more ATN
 			 * PDUs.
 			 */
 			ddev->atn_count = 0;
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 59257400697d..9a585332ea84 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -1507,7 +1507,7 @@ static void nci_rx_work(struct work_struct *work)
 		}
 	}
 
-	/* check if a data exchange timout has occurred */
+	/* check if a data exchange timeout has occurred */
 	if (test_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags)) {
 		/* complete the data exchange transaction, if exists */
 		if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags))
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
index 1204c438e87d..6af5752cde09 100644
--- a/net/nfc/nci/uart.c
+++ b/net/nfc/nci/uart.c
@@ -234,7 +234,7 @@ static void nci_uart_tty_wakeup(struct tty_struct *tty)
  *     Called by tty low level driver when receive data is
  *     available.
  *
- * Arguments:  tty          pointer to tty isntance data
+ * Arguments:  tty          pointer to tty instance data
  *             data         pointer to received data
  *             flags        pointer to flags for data
  *             count        count of received data in bytes
@@ -374,7 +374,7 @@ static int nci_uart_default_recv_buf(struct nci_uart *nu, const u8 *data,
 		data += chunk_len;
 		count -= chunk_len;
 
-		/* Chcek if packet is fully received */
+		/* Check if packet is fully received */
 		if (nu->rx_packet_len == nu->rx_skb->len) {
 			/* Pass RX packet to driver */
 			if (nu->ops.recv(nu, nu->rx_skb) != 0)
-- 
cgit v1.2.3


From eeb85a14ee3494febb85ccfbee0772eda0823b13 Mon Sep 17 00:00:00 2001
From: Andrei Vagin <avagin@gmail.com>
Date: Mon, 5 Apr 2021 00:12:23 -0700
Subject: net: Allow to specify ifindex when device is moved to another
 namespace

Currently, we can specify ifindex on link creation. This change allows
to specify ifindex when a device is moved to another network namespace.

Even now, a device ifindex can be changed if there is another device
with the same ifindex in the target namespace. So this change doesn't
introduce completely new behavior, it adds more control to the process.

CRIU users want to restore containers with pre-created network devices.
A user will provide network devices and instructions where they have to
be restored, then CRIU will restore network namespaces and move devices
into them. The problem is that devices have to be restored with the same
indexes that they have before C/R.

Cc: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
Suggested-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Andrei Vagin <avagin@gmail.com>
Reviewed-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/netvsc_drv.c |  2 +-
 include/linux/netdevice.h       |  3 ++-
 net/core/dev.c                  | 24 +++++++++++++++++-------
 net/core/rtnetlink.c            | 19 +++++++++++++++----
 net/ieee802154/core.c           |  4 ++--
 net/wireless/core.c             |  4 ++--
 6 files changed, 39 insertions(+), 17 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 7349a70af083..8c0c70e1da77 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -2354,7 +2354,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
 	 */
 	if (!net_eq(dev_net(ndev), dev_net(vf_netdev))) {
 		ret = dev_change_net_namespace(vf_netdev,
-					       dev_net(ndev), "eth%d");
+					       dev_net(ndev), "eth%d", 0);
 		if (ret)
 			netdev_err(vf_netdev,
 				   "could not move to same namespace as %s: %d\n",
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f57b70fc251f..b482236c0e99 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4026,7 +4026,8 @@ void __dev_notify_flags(struct net_device *, unsigned int old_flags,
 int dev_change_name(struct net_device *, const char *);
 int dev_set_alias(struct net_device *, const char *, size_t);
 int dev_get_alias(const struct net_device *, char *, size_t);
-int dev_change_net_namespace(struct net_device *, struct net *, const char *);
+int dev_change_net_namespace(struct net_device *dev, struct net *net,
+			     const char *pat, int new_ifindex);
 int __dev_set_mtu(struct net_device *, int);
 int dev_validate_mtu(struct net_device *dev, int mtu,
 		     struct netlink_ext_ack *extack);
diff --git a/net/core/dev.c b/net/core/dev.c
index b4c67a5be606..9d1a8fac793f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -11067,6 +11067,8 @@ EXPORT_SYMBOL(unregister_netdev);
  *	@net: network namespace
  *	@pat: If not NULL name pattern to try if the current device name
  *	      is already taken in the destination network namespace.
+ *	@new_ifindex: If not zero, specifies device index in the target
+ *	              namespace.
  *
  *	This function shuts down a device interface and moves it
  *	to a new network namespace. On success 0 is returned, on
@@ -11075,10 +11077,11 @@ EXPORT_SYMBOL(unregister_netdev);
  *	Callers must hold the rtnl semaphore.
  */
 
-int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
+int dev_change_net_namespace(struct net_device *dev, struct net *net,
+			     const char *pat, int new_ifindex)
 {
 	struct net *net_old = dev_net(dev);
-	int err, new_nsid, new_ifindex;
+	int err, new_nsid;
 
 	ASSERT_RTNL();
 
@@ -11109,6 +11112,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 			goto out;
 	}
 
+	/* Check that new_ifindex isn't used yet. */
+	err = -EBUSY;
+	if (new_ifindex && __dev_get_by_index(net, new_ifindex))
+		goto out;
+
 	/*
 	 * And now a mini version of register_netdevice unregister_netdevice.
 	 */
@@ -11136,10 +11144,12 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 
 	new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL);
 	/* If there is an ifindex conflict assign a new one */
-	if (__dev_get_by_index(net, dev->ifindex))
-		new_ifindex = dev_new_index(net);
-	else
-		new_ifindex = dev->ifindex;
+	if (!new_ifindex) {
+		if (__dev_get_by_index(net, dev->ifindex))
+			new_ifindex = dev_new_index(net);
+		else
+			new_ifindex = dev->ifindex;
+	}
 
 	rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid,
 			    new_ifindex);
@@ -11448,7 +11458,7 @@ static void __net_exit default_device_exit(struct net *net)
 		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
 		if (__dev_get_by_name(&init_net, fb_name))
 			snprintf(fb_name, IFNAMSIZ, "dev%%d");
-		err = dev_change_net_namespace(dev, &init_net, fb_name);
+		err = dev_change_net_namespace(dev, &init_net, fb_name, 0);
 		if (err) {
 			pr_emerg("%s: failed to move %s to init_net: %d\n",
 				 __func__, dev->name, err);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1bdcb33fb561..d51252afde0a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2266,6 +2266,9 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
 			return -EINVAL;
 	}
 
+	if (tb[IFLA_NEW_IFINDEX] && nla_get_s32(tb[IFLA_NEW_IFINDEX]) <= 0)
+		return -EINVAL;
+
 	if (tb[IFLA_AF_SPEC]) {
 		struct nlattr *af;
 		int rem, err;
@@ -2603,14 +2606,22 @@ static int do_setlink(const struct sk_buff *skb,
 		return err;
 
 	if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_TARGET_NETNSID]) {
-		struct net *net = rtnl_link_get_net_capable(skb, dev_net(dev),
-							    tb, CAP_NET_ADMIN);
+		struct net *net;
+		int new_ifindex;
+
+		net = rtnl_link_get_net_capable(skb, dev_net(dev),
+						tb, CAP_NET_ADMIN);
 		if (IS_ERR(net)) {
 			err = PTR_ERR(net);
 			goto errout;
 		}
 
-		err = dev_change_net_namespace(dev, net, ifname);
+		if (tb[IFLA_NEW_IFINDEX])
+			new_ifindex = nla_get_s32(tb[IFLA_NEW_IFINDEX]);
+		else
+			new_ifindex = 0;
+
+		err = dev_change_net_namespace(dev, net, ifname, new_ifindex);
 		put_net(net);
 		if (err)
 			goto errout;
@@ -3452,7 +3463,7 @@ replay:
 	if (err < 0)
 		goto out_unregister;
 	if (link_net) {
-		err = dev_change_net_namespace(dev, dest_net, ifname);
+		err = dev_change_net_namespace(dev, dest_net, ifname, 0);
 		if (err < 0)
 			goto out_unregister;
 	}
diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c
index de259b5170ab..ec3068937fc3 100644
--- a/net/ieee802154/core.c
+++ b/net/ieee802154/core.c
@@ -205,7 +205,7 @@ int cfg802154_switch_netns(struct cfg802154_registered_device *rdev,
 		if (!wpan_dev->netdev)
 			continue;
 		wpan_dev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
-		err = dev_change_net_namespace(wpan_dev->netdev, net, "wpan%d");
+		err = dev_change_net_namespace(wpan_dev->netdev, net, "wpan%d", 0);
 		if (err)
 			break;
 		wpan_dev->netdev->features |= NETIF_F_NETNS_LOCAL;
@@ -222,7 +222,7 @@ int cfg802154_switch_netns(struct cfg802154_registered_device *rdev,
 				continue;
 			wpan_dev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
 			err = dev_change_net_namespace(wpan_dev->netdev, net,
-						       "wpan%d");
+						       "wpan%d", 0);
 			WARN_ON(err);
 			wpan_dev->netdev->features |= NETIF_F_NETNS_LOCAL;
 		}
diff --git a/net/wireless/core.c b/net/wireless/core.c
index a2785379df6e..fabb677b7d58 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -165,7 +165,7 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
 		if (!wdev->netdev)
 			continue;
 		wdev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
-		err = dev_change_net_namespace(wdev->netdev, net, "wlan%d");
+		err = dev_change_net_namespace(wdev->netdev, net, "wlan%d", 0);
 		if (err)
 			break;
 		wdev->netdev->features |= NETIF_F_NETNS_LOCAL;
@@ -182,7 +182,7 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
 				continue;
 			wdev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
 			err = dev_change_net_namespace(wdev->netdev, net,
-							"wlan%d");
+							"wlan%d", 0);
 			WARN_ON(err);
 			wdev->netdev->features |= NETIF_F_NETNS_LOCAL;
 		}
-- 
cgit v1.2.3


From d392ecd1bc29ae15b0e284d5f732c2d36f244271 Mon Sep 17 00:00:00 2001
From: Salil Mehta <salil.mehta@huawei.com>
Date: Mon, 5 Apr 2021 18:28:25 +0100
Subject: net: hns3: Limiting the scope of vector_ring_chain variable

Limiting the scope of the variable vector_ring_chain to the block where it
is used.

Fixes: 424eb834a9be ("net: hns3: Unified HNS3 {VF|PF} Ethernet Driver for hip08 SoC")
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index f59cd719b5b4..076bfb76bdb9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -3747,7 +3747,6 @@ static void hns3_nic_set_cpumask(struct hns3_nic_priv *priv)
 
 static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv)
 {
-	struct hnae3_ring_chain_node vector_ring_chain;
 	struct hnae3_handle *h = priv->ae_handle;
 	struct hns3_enet_tqp_vector *tqp_vector;
 	int ret;
@@ -3779,6 +3778,8 @@ static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv)
 	}
 
 	for (i = 0; i < priv->vector_num; i++) {
+		struct hnae3_ring_chain_node vector_ring_chain;
+
 		tqp_vector = &priv->tqp_vector[i];
 
 		tqp_vector->rx_group.total_bytes = 0;
-- 
cgit v1.2.3


From cc0626c2aaed8e475efdd85fa374b497a7192e35 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Mon, 5 Apr 2021 20:15:48 +0200
Subject: net: smsc911x: skip acpi_device_id table when !CONFIG_ACPI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The driver can match via multiple methods.  Its acpi_device_id table is
referenced via ACPI_PTR() so it will be unused for !CONFIG_ACPI builds:

  drivers/net/ethernet/smsc/smsc911x.c:2652:36: warning:
    ‘smsc911x_acpi_match’ defined but not used [-Wunused-const-variable=]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/smsc/smsc911x.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index 606c79de93a6..556a9790cdcf 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -2649,11 +2649,13 @@ static const struct of_device_id smsc911x_dt_ids[] = {
 MODULE_DEVICE_TABLE(of, smsc911x_dt_ids);
 #endif
 
+#ifdef CONFIG_ACPI
 static const struct acpi_device_id smsc911x_acpi_match[] = {
 	{ "ARMH9118", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(acpi, smsc911x_acpi_match);
+#endif
 
 static struct platform_driver smsc911x_driver = {
 	.probe = smsc911x_drv_probe,
-- 
cgit v1.2.3


From 237c609f8744a8d5415f40a7ee731957934b0eef Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 1 Apr 2021 16:11:04 +0200
Subject: netfilter: nfnetlink: add and use nfnetlink_broadcast

This removes the only reference of net->nfnl outside of the nfnetlink
module.  This allows to move net->nfnl to net_generic infra.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nfnetlink.h | 2 ++
 net/netfilter/nfnetlink.c           | 7 +++++++
 net/netfilter/nfnetlink_acct.c      | 3 +--
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 791d516e1e88..d4c14257db5d 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -51,6 +51,8 @@ int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,
 		   unsigned int group, int echo, gfp_t flags);
 int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error);
 int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid);
+void nfnetlink_broadcast(struct net *net, struct sk_buff *skb, __u32 portid,
+			 __u32 group, gfp_t allocation);
 
 static inline u16 nfnl_msg_type(u8 subsys, u8 msg_type)
 {
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index d3df66a39b5e..06e106b3ed85 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -178,6 +178,13 @@ int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid)
 }
 EXPORT_SYMBOL_GPL(nfnetlink_unicast);
 
+void nfnetlink_broadcast(struct net *net, struct sk_buff *skb, __u32 portid,
+			 __u32 group, gfp_t allocation)
+{
+	netlink_broadcast(net->nfnl, skb, portid, group, allocation);
+}
+EXPORT_SYMBOL_GPL(nfnetlink_broadcast);
+
 /* Process one complete nfnetlink message. */
 static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 			     struct netlink_ext_ack *extack)
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index bb930f3b06c7..6895f31c5fbb 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -469,8 +469,7 @@ static void nfnl_overquota_report(struct net *net, struct nf_acct *nfacct)
 		kfree_skb(skb);
 		return;
 	}
-	netlink_broadcast(net->nfnl, skb, 0, NFNLGRP_ACCT_QUOTA,
-			  GFP_ATOMIC);
+	nfnetlink_broadcast(net, skb, 0, NFNLGRP_ACCT_QUOTA, GFP_ATOMIC);
 }
 
 int nfnl_acct_overquota(struct net *net, struct nf_acct *nfacct)
-- 
cgit v1.2.3


From 1be05ea766d763c52b99edf258dfe8bd1d054697 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 1 Apr 2021 16:11:05 +0200
Subject: netfilter: nfnetlink: use net_generic infra

No need to place it in struct net, nfnetlink is a module and usage
doesn't occur in fastpath.

Also remove rcu usage:

Not a single reader of net->nfnl uses rcu accessors.

When exit_batch callbacks are executed the net namespace is already dead
so no calls to these functions are possible anymore (else we'd get NULL
deref crash too).

If the module is removed, then modules that call any of those functions
have been removed too so no calls to nfnl functions are possible either.

The nfnl and nfl_stash pointers in struct net are no longer used, they
will be removed in a followup patch to minimize changes to struct net
(causes rebuild for entire network stack).

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink.c | 62 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 44 insertions(+), 18 deletions(-)

diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 06e106b3ed85..06f5886f652e 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -28,6 +28,7 @@
 #include <linux/sched/signal.h>
 
 #include <net/netlink.h>
+#include <net/netns/generic.h>
 #include <linux/netfilter/nfnetlink.h>
 
 MODULE_LICENSE("GPL");
@@ -41,6 +42,12 @@ MODULE_DESCRIPTION("Netfilter messages via netlink socket");
 
 #define NFNL_MAX_ATTR_COUNT	32
 
+static unsigned int nfnetlink_pernet_id __read_mostly;
+
+struct nfnl_net {
+	struct sock *nfnl;
+};
+
 static struct {
 	struct mutex				mutex;
 	const struct nfnetlink_subsystem __rcu	*subsys;
@@ -75,6 +82,11 @@ static const int nfnl_group2type[NFNLGRP_MAX+1] = {
 	[NFNLGRP_NFTRACE]		= NFNL_SUBSYS_NFTABLES,
 };
 
+static struct nfnl_net *nfnl_pernet(struct net *net)
+{
+	return net_generic(net, nfnetlink_pernet_id);
+}
+
 void nfnl_lock(__u8 subsys_id)
 {
 	mutex_lock(&table[subsys_id].mutex);
@@ -149,28 +161,35 @@ nfnetlink_find_client(u16 type, const struct nfnetlink_subsystem *ss)
 
 int nfnetlink_has_listeners(struct net *net, unsigned int group)
 {
-	return netlink_has_listeners(net->nfnl, group);
+	struct nfnl_net *nfnlnet = nfnl_pernet(net);
+
+	return netlink_has_listeners(nfnlnet->nfnl, group);
 }
 EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
 
 int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,
 		   unsigned int group, int echo, gfp_t flags)
 {
-	return nlmsg_notify(net->nfnl, skb, portid, group, echo, flags);
+	struct nfnl_net *nfnlnet = nfnl_pernet(net);
+
+	return nlmsg_notify(nfnlnet->nfnl, skb, portid, group, echo, flags);
 }
 EXPORT_SYMBOL_GPL(nfnetlink_send);
 
 int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error)
 {
-	return netlink_set_err(net->nfnl, portid, group, error);
+	struct nfnl_net *nfnlnet = nfnl_pernet(net);
+
+	return netlink_set_err(nfnlnet->nfnl, portid, group, error);
 }
 EXPORT_SYMBOL_GPL(nfnetlink_set_err);
 
 int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid)
 {
+	struct nfnl_net *nfnlnet = nfnl_pernet(net);
 	int err;
 
-	err = nlmsg_unicast(net->nfnl, skb, portid);
+	err = nlmsg_unicast(nfnlnet->nfnl, skb, portid);
 	if (err == -EAGAIN)
 		err = -ENOBUFS;
 
@@ -181,7 +200,9 @@ EXPORT_SYMBOL_GPL(nfnetlink_unicast);
 void nfnetlink_broadcast(struct net *net, struct sk_buff *skb, __u32 portid,
 			 __u32 group, gfp_t allocation)
 {
-	netlink_broadcast(net->nfnl, skb, portid, group, allocation);
+	struct nfnl_net *nfnlnet = nfnl_pernet(net);
+
+	netlink_broadcast(nfnlnet->nfnl, skb, portid, group, allocation);
 }
 EXPORT_SYMBOL_GPL(nfnetlink_broadcast);
 
@@ -201,6 +222,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 	type = nlh->nlmsg_type;
 replay:
 	rcu_read_lock();
+
 	ss = nfnetlink_get_subsys(type);
 	if (!ss) {
 #ifdef CONFIG_MODULES
@@ -224,6 +246,7 @@ replay:
 
 	{
 		int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
+		struct nfnl_net *nfnlnet = nfnl_pernet(net);
 		u8 cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
 		struct nlattr *cda[NFNL_MAX_ATTR_COUNT + 1];
 		struct nlattr *attr = (void *)nlh + min_len;
@@ -245,7 +268,7 @@ replay:
 		}
 
 		if (nc->call_rcu) {
-			err = nc->call_rcu(net, net->nfnl, skb, nlh,
+			err = nc->call_rcu(net, nfnlnet->nfnl, skb, nlh,
 					   (const struct nlattr **)cda,
 					   extack);
 			rcu_read_unlock();
@@ -256,7 +279,7 @@ replay:
 			    nfnetlink_find_client(type, ss) != nc)
 				err = -EAGAIN;
 			else if (nc->call)
-				err = nc->call(net, net->nfnl, skb, nlh,
+				err = nc->call(net, nfnlnet->nfnl, skb, nlh,
 					       (const struct nlattr **)cda,
 					       extack);
 			else
@@ -460,7 +483,9 @@ replay_abort:
 				goto ack;
 
 			if (nc->call_batch) {
-				err = nc->call_batch(net, net->nfnl, skb, nlh,
+				struct nfnl_net *nfnlnet = nfnl_pernet(net);
+
+				err = nc->call_batch(net, nfnlnet->nfnl, skb, nlh,
 						     (const struct nlattr **)cda,
 						     &extack);
 			}
@@ -629,7 +654,7 @@ static int nfnetlink_bind(struct net *net, int group)
 
 static int __net_init nfnetlink_net_init(struct net *net)
 {
-	struct sock *nfnl;
+	struct nfnl_net *nfnlnet = nfnl_pernet(net);
 	struct netlink_kernel_cfg cfg = {
 		.groups	= NFNLGRP_MAX,
 		.input	= nfnetlink_rcv,
@@ -638,28 +663,29 @@ static int __net_init nfnetlink_net_init(struct net *net)
 #endif
 	};
 
-	nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, &cfg);
-	if (!nfnl)
+	nfnlnet->nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, &cfg);
+	if (!nfnlnet->nfnl)
 		return -ENOMEM;
-	net->nfnl_stash = nfnl;
-	rcu_assign_pointer(net->nfnl, nfnl);
 	return 0;
 }
 
 static void __net_exit nfnetlink_net_exit_batch(struct list_head *net_exit_list)
 {
+	struct nfnl_net *nfnlnet;
 	struct net *net;
 
-	list_for_each_entry(net, net_exit_list, exit_list)
-		RCU_INIT_POINTER(net->nfnl, NULL);
-	synchronize_net();
-	list_for_each_entry(net, net_exit_list, exit_list)
-		netlink_kernel_release(net->nfnl_stash);
+	list_for_each_entry(net, net_exit_list, exit_list) {
+		nfnlnet = nfnl_pernet(net);
+
+		netlink_kernel_release(nfnlnet->nfnl);
+	}
 }
 
 static struct pernet_operations nfnetlink_net_ops = {
 	.init		= nfnetlink_net_init,
 	.exit_batch	= nfnetlink_net_exit_batch,
+	.id		= &nfnetlink_pernet_id,
+	.size		= sizeof(struct nfnl_net),
 };
 
 static int __init nfnetlink_init(void)
-- 
cgit v1.2.3


From ebfbe67568a734a449203097b02c4ab884257535 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 1 Apr 2021 16:11:06 +0200
Subject: netfilter: cttimeout: use net_generic infra

reduce size of struct net and make this self-contained.
The member in struct net is kept to minimize changes to struct net
layout, it will be removed in a separate patch.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_cttimeout.c | 41 +++++++++++++++++++++++++++++--------
 1 file changed, 32 insertions(+), 9 deletions(-)

diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index de831a257512..46da5548d0b3 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -20,6 +20,7 @@
 
 #include <linux/netfilter.h>
 #include <net/netlink.h>
+#include <net/netns/generic.h>
 #include <net/sock.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
@@ -30,6 +31,12 @@
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_cttimeout.h>
 
+static unsigned int nfct_timeout_id __read_mostly;
+
+struct nfct_timeout_pernet {
+	struct list_head	nfct_timeout_list;
+};
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
 MODULE_DESCRIPTION("cttimeout: Extended Netfilter Connection Tracking timeout tuning");
@@ -42,6 +49,11 @@ static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = {
 	[CTA_TIMEOUT_DATA]	= { .type = NLA_NESTED },
 };
 
+static struct nfct_timeout_pernet *nfct_timeout_pernet(struct net *net)
+{
+	return net_generic(net, nfct_timeout_id);
+}
+
 static int
 ctnl_timeout_parse_policy(void *timeout,
 			  const struct nf_conntrack_l4proto *l4proto,
@@ -77,6 +89,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
 				 const struct nlattr * const cda[],
 				 struct netlink_ext_ack *extack)
 {
+	struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net);
 	__u16 l3num;
 	__u8 l4num;
 	const struct nf_conntrack_l4proto *l4proto;
@@ -94,7 +107,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
 	l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
 	l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
 
-	list_for_each_entry(timeout, &net->nfct_timeout_list, head) {
+	list_for_each_entry(timeout, &pernet->nfct_timeout_list, head) {
 		if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
 			continue;
 
@@ -146,7 +159,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
 	timeout->timeout.l3num = l3num;
 	timeout->timeout.l4proto = l4proto;
 	refcount_set(&timeout->refcnt, 1);
-	list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list);
+	list_add_tail_rcu(&timeout->head, &pernet->nfct_timeout_list);
 
 	return 0;
 err:
@@ -201,6 +214,7 @@ nla_put_failure:
 static int
 ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct nfct_timeout_pernet *pernet;
 	struct net *net = sock_net(skb->sk);
 	struct ctnl_timeout *cur, *last;
 
@@ -212,7 +226,8 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		cb->args[1] = 0;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(cur, &net->nfct_timeout_list, head) {
+	pernet = nfct_timeout_pernet(net);
+	list_for_each_entry_rcu(cur, &pernet->nfct_timeout_list, head) {
 		if (last) {
 			if (cur != last)
 				continue;
@@ -239,6 +254,7 @@ static int cttimeout_get_timeout(struct net *net, struct sock *ctnl,
 				 const struct nlattr * const cda[],
 				 struct netlink_ext_ack *extack)
 {
+	struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net);
 	int ret = -ENOENT;
 	char *name;
 	struct ctnl_timeout *cur;
@@ -254,7 +270,7 @@ static int cttimeout_get_timeout(struct net *net, struct sock *ctnl,
 		return -EINVAL;
 	name = nla_data(cda[CTA_TIMEOUT_NAME]);
 
-	list_for_each_entry(cur, &net->nfct_timeout_list, head) {
+	list_for_each_entry(cur, &pernet->nfct_timeout_list, head) {
 		struct sk_buff *skb2;
 
 		if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
@@ -310,12 +326,13 @@ static int cttimeout_del_timeout(struct net *net, struct sock *ctnl,
 				 const struct nlattr * const cda[],
 				 struct netlink_ext_ack *extack)
 {
+	struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net);
 	struct ctnl_timeout *cur, *tmp;
 	int ret = -ENOENT;
 	char *name;
 
 	if (!cda[CTA_TIMEOUT_NAME]) {
-		list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list,
+		list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_list,
 					 head)
 			ctnl_timeout_try_del(net, cur);
 
@@ -323,7 +340,7 @@ static int cttimeout_del_timeout(struct net *net, struct sock *ctnl,
 	}
 	name = nla_data(cda[CTA_TIMEOUT_NAME]);
 
-	list_for_each_entry(cur, &net->nfct_timeout_list, head) {
+	list_for_each_entry(cur, &pernet->nfct_timeout_list, head) {
 		if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
 			continue;
 
@@ -503,9 +520,10 @@ err:
 static struct nf_ct_timeout *ctnl_timeout_find_get(struct net *net,
 						   const char *name)
 {
+	struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net);
 	struct ctnl_timeout *timeout, *matching = NULL;
 
-	list_for_each_entry_rcu(timeout, &net->nfct_timeout_list, head) {
+	list_for_each_entry_rcu(timeout, &pernet->nfct_timeout_list, head) {
 		if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
 			continue;
 
@@ -563,19 +581,22 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_TIMEOUT);
 
 static int __net_init cttimeout_net_init(struct net *net)
 {
-	INIT_LIST_HEAD(&net->nfct_timeout_list);
+	struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net);
+
+	INIT_LIST_HEAD(&pernet->nfct_timeout_list);
 
 	return 0;
 }
 
 static void __net_exit cttimeout_net_exit(struct net *net)
 {
+	struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net);
 	struct ctnl_timeout *cur, *tmp;
 
 	nf_ct_unconfirmed_destroy(net);
 	nf_ct_untimeout(net, NULL);
 
-	list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) {
+	list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_list, head) {
 		list_del_rcu(&cur->head);
 
 		if (refcount_dec_and_test(&cur->refcnt))
@@ -586,6 +607,8 @@ static void __net_exit cttimeout_net_exit(struct net *net)
 static struct pernet_operations cttimeout_ops = {
 	.init	= cttimeout_net_init,
 	.exit	= cttimeout_net_exit,
+	.id     = &nfct_timeout_id,
+	.size   = sizeof(struct nfct_timeout_pernet),
 };
 
 static int __init cttimeout_init(void)
-- 
cgit v1.2.3


From 8b0adbe3e38dbe5aae9edf6f5159ffdca7cfbdf1 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 1 Apr 2021 16:11:07 +0200
Subject: netfilter: nf_defrag_ipv6: use net_generic infra

This allows followup patch to remove these members from struct net.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/ipv6/nf_defrag_ipv6.h |  6 +++
 net/ipv6/netfilter/nf_conntrack_reasm.c     | 68 +++++++++++++++--------------
 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c   | 15 ++++---
 3 files changed, 52 insertions(+), 37 deletions(-)

diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
index 6d31cd041143..ece923e2035b 100644
--- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
@@ -13,4 +13,10 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user);
 
 struct inet_frags_ctl;
 
+struct nft_ct_frag6_pernet {
+	struct ctl_table_header *nf_frag_frags_hdr;
+	struct fqdir	*fqdir;
+	unsigned int users;
+};
+
 #endif /* _NF_DEFRAG_IPV6_H */
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index c129ad334eb3..a0108415275f 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -15,28 +15,13 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/string.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/jiffies.h>
 #include <linux/net.h>
-#include <linux/list.h>
 #include <linux/netdevice.h>
-#include <linux/in6.h>
 #include <linux/ipv6.h>
-#include <linux/icmpv6.h>
-#include <linux/random.h>
 #include <linux/slab.h>
 
-#include <net/sock.h>
-#include <net/snmp.h>
 #include <net/ipv6_frag.h>
 
-#include <net/protocol.h>
-#include <net/transp_v6.h>
-#include <net/rawv6.h>
-#include <net/ndisc.h>
-#include <net/addrconf.h>
-#include <net/inet_ecn.h>
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
 #include <linux/sysctl.h>
 #include <linux/netfilter.h>
@@ -44,11 +29,18 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#include <net/netns/generic.h>
 
 static const char nf_frags_cache_name[] = "nf-frags";
 
+unsigned int nf_frag_pernet_id __read_mostly;
 static struct inet_frags nf_frags;
 
+static struct nft_ct_frag6_pernet *nf_frag_pernet(struct net *net)
+{
+	return net_generic(net, nf_frag_pernet_id);
+}
+
 #ifdef CONFIG_SYSCTL
 
 static struct ctl_table nf_ct_frag6_sysctl_table[] = {
@@ -75,6 +67,7 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
 
 static int nf_ct_frag6_sysctl_register(struct net *net)
 {
+	struct nft_ct_frag6_pernet *nf_frag;
 	struct ctl_table *table;
 	struct ctl_table_header *hdr;
 
@@ -86,18 +79,20 @@ static int nf_ct_frag6_sysctl_register(struct net *net)
 			goto err_alloc;
 	}
 
-	table[0].data	= &net->nf_frag.fqdir->timeout;
-	table[1].data	= &net->nf_frag.fqdir->low_thresh;
-	table[1].extra2	= &net->nf_frag.fqdir->high_thresh;
-	table[2].data	= &net->nf_frag.fqdir->high_thresh;
-	table[2].extra1	= &net->nf_frag.fqdir->low_thresh;
-	table[2].extra2	= &init_net.nf_frag.fqdir->high_thresh;
+	nf_frag = nf_frag_pernet(net);
+
+	table[0].data	= &nf_frag->fqdir->timeout;
+	table[1].data	= &nf_frag->fqdir->low_thresh;
+	table[1].extra2	= &nf_frag->fqdir->high_thresh;
+	table[2].data	= &nf_frag->fqdir->high_thresh;
+	table[2].extra1	= &nf_frag->fqdir->low_thresh;
+	table[2].extra2	= &nf_frag->fqdir->high_thresh;
 
 	hdr = register_net_sysctl(net, "net/netfilter", table);
 	if (hdr == NULL)
 		goto err_reg;
 
-	net->nf_frag_frags_hdr = hdr;
+	nf_frag->nf_frag_frags_hdr = hdr;
 	return 0;
 
 err_reg:
@@ -109,10 +104,11 @@ err_alloc:
 
 static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
 {
+	struct nft_ct_frag6_pernet *nf_frag = nf_frag_pernet(net);
 	struct ctl_table *table;
 
-	table = net->nf_frag_frags_hdr->ctl_table_arg;
-	unregister_net_sysctl_table(net->nf_frag_frags_hdr);
+	table = nf_frag->nf_frag_frags_hdr->ctl_table_arg;
+	unregister_net_sysctl_table(nf_frag->nf_frag_frags_hdr);
 	if (!net_eq(net, &init_net))
 		kfree(table);
 }
@@ -149,6 +145,7 @@ static void nf_ct_frag6_expire(struct timer_list *t)
 static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
 				  const struct ipv6hdr *hdr, int iif)
 {
+	struct nft_ct_frag6_pernet *nf_frag = nf_frag_pernet(net);
 	struct frag_v6_compare_key key = {
 		.id = id,
 		.saddr = hdr->saddr,
@@ -158,7 +155,7 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
 	};
 	struct inet_frag_queue *q;
 
-	q = inet_frag_find(net->nf_frag.fqdir, &key);
+	q = inet_frag_find(nf_frag->fqdir, &key);
 	if (!q)
 		return NULL;
 
@@ -495,37 +492,44 @@ EXPORT_SYMBOL_GPL(nf_ct_frag6_gather);
 
 static int nf_ct_net_init(struct net *net)
 {
+	struct nft_ct_frag6_pernet *nf_frag  = nf_frag_pernet(net);
 	int res;
 
-	res = fqdir_init(&net->nf_frag.fqdir, &nf_frags, net);
+	res = fqdir_init(&nf_frag->fqdir, &nf_frags, net);
 	if (res < 0)
 		return res;
 
-	net->nf_frag.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
-	net->nf_frag.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
-	net->nf_frag.fqdir->timeout = IPV6_FRAG_TIMEOUT;
+	nf_frag->fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
+	nf_frag->fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
+	nf_frag->fqdir->timeout = IPV6_FRAG_TIMEOUT;
 
 	res = nf_ct_frag6_sysctl_register(net);
 	if (res < 0)
-		fqdir_exit(net->nf_frag.fqdir);
+		fqdir_exit(nf_frag->fqdir);
 	return res;
 }
 
 static void nf_ct_net_pre_exit(struct net *net)
 {
-	fqdir_pre_exit(net->nf_frag.fqdir);
+	struct nft_ct_frag6_pernet *nf_frag  = nf_frag_pernet(net);
+
+	fqdir_pre_exit(nf_frag->fqdir);
 }
 
 static void nf_ct_net_exit(struct net *net)
 {
+	struct nft_ct_frag6_pernet *nf_frag  = nf_frag_pernet(net);
+
 	nf_ct_frags6_sysctl_unregister(net);
-	fqdir_exit(net->nf_frag.fqdir);
+	fqdir_exit(nf_frag->fqdir);
 }
 
 static struct pernet_operations nf_ct_net_ops = {
 	.init		= nf_ct_net_init,
 	.pre_exit	= nf_ct_net_pre_exit,
 	.exit		= nf_ct_net_exit,
+	.id		= &nf_frag_pernet_id,
+	.size		= sizeof(struct nft_ct_frag6_pernet),
 };
 
 static const struct rhashtable_params nfct_rhash_params = {
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index 6646a87fb5dc..402dc4ca9504 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -25,6 +25,8 @@
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 
+extern unsigned int nf_frag_pernet_id;
+
 static DEFINE_MUTEX(defrag6_mutex);
 
 static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
@@ -89,10 +91,12 @@ static const struct nf_hook_ops ipv6_defrag_ops[] = {
 
 static void __net_exit defrag6_net_exit(struct net *net)
 {
-	if (net->nf.defrag_ipv6) {
+	struct nft_ct_frag6_pernet *nf_frag = net_generic(net, nf_frag_pernet_id);
+
+	if (nf_frag->users) {
 		nf_unregister_net_hooks(net, ipv6_defrag_ops,
 					ARRAY_SIZE(ipv6_defrag_ops));
-		net->nf.defrag_ipv6 = false;
+		nf_frag->users = 0;
 	}
 }
 
@@ -130,21 +134,22 @@ static void __exit nf_defrag_fini(void)
 
 int nf_defrag_ipv6_enable(struct net *net)
 {
+	struct nft_ct_frag6_pernet *nf_frag = net_generic(net, nf_frag_pernet_id);
 	int err = 0;
 
 	might_sleep();
 
-	if (net->nf.defrag_ipv6)
+	if (nf_frag->users)
 		return 0;
 
 	mutex_lock(&defrag6_mutex);
-	if (net->nf.defrag_ipv6)
+	if (nf_frag->users)
 		goto out_unlock;
 
 	err = nf_register_net_hooks(net, ipv6_defrag_ops,
 				    ARRAY_SIZE(ipv6_defrag_ops));
 	if (err == 0)
-		net->nf.defrag_ipv6 = true;
+		nf_frag->users = 1;
 
  out_unlock:
 	mutex_unlock(&defrag6_mutex);
-- 
cgit v1.2.3


From 7b1957b04956bfd8ea0280bf84a724e5cd0a7f44 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 1 Apr 2021 16:11:08 +0200
Subject: netfilter: nf_defrag_ipv4: use net_generic infra

This allows followup patch to remove the defrag_ipv4 member from struct
net.  It also allows to auto-remove the hooks later on by adding a
_disable() function.  This will be done later in a follow patch series.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_defrag_ipv4.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 8115611aa47d..ffdcc2b9360f 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -20,8 +20,13 @@
 #endif
 #include <net/netfilter/nf_conntrack_zones.h>
 
+static unsigned int defrag4_pernet_id __read_mostly;
 static DEFINE_MUTEX(defrag4_mutex);
 
+struct defrag4_pernet {
+	unsigned int users;
+};
+
 static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb,
 				   u_int32_t user)
 {
@@ -106,15 +111,19 @@ static const struct nf_hook_ops ipv4_defrag_ops[] = {
 
 static void __net_exit defrag4_net_exit(struct net *net)
 {
-	if (net->nf.defrag_ipv4) {
+	struct defrag4_pernet *nf_defrag = net_generic(net, defrag4_pernet_id);
+
+	if (nf_defrag->users) {
 		nf_unregister_net_hooks(net, ipv4_defrag_ops,
 					ARRAY_SIZE(ipv4_defrag_ops));
-		net->nf.defrag_ipv4 = false;
+		nf_defrag->users = 0;
 	}
 }
 
 static struct pernet_operations defrag4_net_ops = {
 	.exit = defrag4_net_exit,
+	.id   = &defrag4_pernet_id,
+	.size = sizeof(struct defrag4_pernet),
 };
 
 static int __init nf_defrag_init(void)
@@ -129,21 +138,22 @@ static void __exit nf_defrag_fini(void)
 
 int nf_defrag_ipv4_enable(struct net *net)
 {
+	struct defrag4_pernet *nf_defrag = net_generic(net, defrag4_pernet_id);
 	int err = 0;
 
 	might_sleep();
 
-	if (net->nf.defrag_ipv4)
+	if (nf_defrag->users)
 		return 0;
 
 	mutex_lock(&defrag4_mutex);
-	if (net->nf.defrag_ipv4)
+	if (nf_defrag->users)
 		goto out_unlock;
 
 	err = nf_register_net_hooks(net, ipv4_defrag_ops,
 				    ARRAY_SIZE(ipv4_defrag_ops));
 	if (err == 0)
-		net->nf.defrag_ipv4 = true;
+		nf_defrag->users = 1;
 
  out_unlock:
 	mutex_unlock(&defrag4_mutex);
-- 
cgit v1.2.3


From 5b53951cfc85329d29df9d07b1e905f4563546a5 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 1 Apr 2021 16:11:09 +0200
Subject: netfilter: ebtables: use net_generic infra

ebtables currently uses net->xt.tables[BRIDGE], but upcoming
patch will move net->xt.tables away from struct net.

To avoid exposing x_tables internals to ebtables, use a private list
instead.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/netfilter/ebtables.c | 39 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 34 insertions(+), 5 deletions(-)

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index ebe33b60efd6..11625d05bbbc 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -24,6 +24,7 @@
 #include <linux/cpumask.h>
 #include <linux/audit.h>
 #include <net/sock.h>
+#include <net/netns/generic.h>
 /* needed for logical [in,out]-dev filtering */
 #include "../br_private.h"
 
@@ -39,8 +40,11 @@
 #define COUNTER_BASE(c, n, cpu) ((struct ebt_counter *)(((char *)c) + \
 				 COUNTER_OFFSET(n) * cpu))
 
+struct ebt_pernet {
+	struct list_head tables;
+};
 
-
+static unsigned int ebt_pernet_id __read_mostly;
 static DEFINE_MUTEX(ebt_mutex);
 
 #ifdef CONFIG_COMPAT
@@ -336,7 +340,9 @@ static inline struct ebt_table *
 find_table_lock(struct net *net, const char *name, int *error,
 		struct mutex *mutex)
 {
-	return find_inlist_lock(&net->xt.tables[NFPROTO_BRIDGE], name,
+	struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id);
+
+	return find_inlist_lock(&ebt_net->tables, name,
 				"ebtable_", error, mutex);
 }
 
@@ -1136,6 +1142,7 @@ static void __ebt_unregister_table(struct net *net, struct ebt_table *table)
 int ebt_register_table(struct net *net, const struct ebt_table *input_table,
 		       const struct nf_hook_ops *ops, struct ebt_table **res)
 {
+	struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id);
 	struct ebt_table_info *newinfo;
 	struct ebt_table *t, *table;
 	struct ebt_replace_kernel *repl;
@@ -1194,7 +1201,7 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table,
 	table->private = newinfo;
 	rwlock_init(&table->lock);
 	mutex_lock(&ebt_mutex);
-	list_for_each_entry(t, &net->xt.tables[NFPROTO_BRIDGE], list) {
+	list_for_each_entry(t, &ebt_net->tables, list) {
 		if (strcmp(t->name, table->name) == 0) {
 			ret = -EEXIST;
 			goto free_unlock;
@@ -1206,7 +1213,7 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table,
 		ret = -ENOENT;
 		goto free_unlock;
 	}
-	list_add(&table->list, &net->xt.tables[NFPROTO_BRIDGE]);
+	list_add(&table->list, &ebt_net->tables);
 	mutex_unlock(&ebt_mutex);
 
 	WRITE_ONCE(*res, table);
@@ -2412,6 +2419,20 @@ static struct nf_sockopt_ops ebt_sockopts = {
 	.owner		= THIS_MODULE,
 };
 
+static int __net_init ebt_pernet_init(struct net *net)
+{
+	struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id);
+
+	INIT_LIST_HEAD(&ebt_net->tables);
+	return 0;
+}
+
+static struct pernet_operations ebt_net_ops = {
+	.init = ebt_pernet_init,
+	.id   = &ebt_pernet_id,
+	.size = sizeof(struct ebt_pernet),
+};
+
 static int __init ebtables_init(void)
 {
 	int ret;
@@ -2425,13 +2446,21 @@ static int __init ebtables_init(void)
 		return ret;
 	}
 
+	ret = register_pernet_subsys(&ebt_net_ops);
+	if (ret < 0) {
+		nf_unregister_sockopt(&ebt_sockopts);
+		xt_unregister_target(&ebt_standard_target);
+		return ret;
+	}
+
 	return 0;
 }
 
-static void __exit ebtables_fini(void)
+static void ebtables_fini(void)
 {
 	nf_unregister_sockopt(&ebt_sockopts);
 	xt_unregister_target(&ebt_standard_target);
+	unregister_pernet_subsys(&ebt_net_ops);
 }
 
 EXPORT_SYMBOL(ebt_register_table);
-- 
cgit v1.2.3


From 0854db2aaef3fcdd3498a9d299c60adea2aa3dc6 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 1 Apr 2021 16:11:10 +0200
Subject: netfilter: nf_tables: use net_generic infra for transaction data

This moves all nf_tables pernet data from struct net to a net_generic
extension, with the exception of the gencursor.

The latter is used in the data path and also outside of the nf_tables
core. All others are only used from the configuration plane.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  11 ++
 net/netfilter/nf_tables_api.c     | 313 ++++++++++++++++++++++++--------------
 net/netfilter/nf_tables_offload.c |  30 ++--
 net/netfilter/nft_chain_filter.c  |  11 +-
 net/netfilter/nft_dynset.c        |   6 +-
 5 files changed, 243 insertions(+), 128 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 8fefa112ae89..f0f7a3c5da6a 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1567,4 +1567,15 @@ __printf(2, 3) int nft_request_module(struct net *net, const char *fmt, ...);
 #else
 static inline int nft_request_module(struct net *net, const char *fmt, ...) { return -ENOENT; }
 #endif
+
+struct nftables_pernet {
+	struct list_head	tables;
+	struct list_head	commit_list;
+	struct list_head	module_list;
+	struct list_head	notify_list;
+	struct mutex		commit_mutex;
+	unsigned int		base_seq;
+	u8			validate_state;
+};
+
 #endif /* _NET_NF_TABLES_H */
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index a24de59e6c69..1b881a84bd01 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -21,10 +21,13 @@
 #include <net/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables_offload.h>
 #include <net/net_namespace.h>
+#include <net/netns/generic.h>
 #include <net/sock.h>
 
 #define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-"))
 
+unsigned int nf_tables_net_id __read_mostly;
+
 static LIST_HEAD(nf_tables_expressions);
 static LIST_HEAD(nf_tables_objects);
 static LIST_HEAD(nf_tables_flowtables);
@@ -103,7 +106,9 @@ static const u8 nft2audit_op[NFT_MSG_MAX] = { // enum nf_tables_msg_types
 
 static void nft_validate_state_update(struct net *net, u8 new_validate_state)
 {
-	switch (net->nft.validate_state) {
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+
+	switch (nft_net->validate_state) {
 	case NFT_VALIDATE_SKIP:
 		WARN_ON_ONCE(new_validate_state == NFT_VALIDATE_DO);
 		break;
@@ -114,7 +119,7 @@ static void nft_validate_state_update(struct net *net, u8 new_validate_state)
 			return;
 	}
 
-	net->nft.validate_state = new_validate_state;
+	nft_net->validate_state = new_validate_state;
 }
 static void nf_tables_trans_destroy_work(struct work_struct *w);
 static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work);
@@ -169,13 +174,15 @@ static void nft_trans_destroy(struct nft_trans *trans)
 
 static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
 {
+	struct nftables_pernet *nft_net;
 	struct net *net = ctx->net;
 	struct nft_trans *trans;
 
 	if (!nft_set_is_anonymous(set))
 		return;
 
-	list_for_each_entry_reverse(trans, &net->nft.commit_list, list) {
+	nft_net = net_generic(net, nf_tables_net_id);
+	list_for_each_entry_reverse(trans, &nft_net->commit_list, list) {
 		switch (trans->msg_type) {
 		case NFT_MSG_NEWSET:
 			if (nft_trans_set(trans) == set)
@@ -269,6 +276,14 @@ static void nf_tables_unregister_hook(struct net *net,
 		nf_unregister_net_hook(net, &basechain->ops);
 }
 
+static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans)
+{
+	struct nftables_pernet *nft_net;
+
+	nft_net = net_generic(net, nf_tables_net_id);
+	list_add_tail(&trans->list, &nft_net->commit_list);
+}
+
 static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
 {
 	struct nft_trans *trans;
@@ -280,7 +295,7 @@ static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
 	if (msg_type == NFT_MSG_NEWTABLE)
 		nft_activate_next(ctx->net, ctx->table);
 
-	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+	nft_trans_commit_list_add_tail(ctx->net, trans);
 	return 0;
 }
 
@@ -313,7 +328,7 @@ static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
 		}
 	}
 
-	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+	nft_trans_commit_list_add_tail(ctx->net, trans);
 	return trans;
 }
 
@@ -386,7 +401,7 @@ static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type,
 			ntohl(nla_get_be32(ctx->nla[NFTA_RULE_ID]));
 	}
 	nft_trans_rule(trans) = rule;
-	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+	nft_trans_commit_list_add_tail(ctx->net, trans);
 
 	return trans;
 }
@@ -452,7 +467,7 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
 		nft_activate_next(ctx->net, set);
 	}
 	nft_trans_set(trans) = set;
-	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+	nft_trans_commit_list_add_tail(ctx->net, trans);
 
 	return 0;
 }
@@ -484,7 +499,7 @@ static int nft_trans_obj_add(struct nft_ctx *ctx, int msg_type,
 		nft_activate_next(ctx->net, obj);
 
 	nft_trans_obj(trans) = obj;
-	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+	nft_trans_commit_list_add_tail(ctx->net, trans);
 
 	return 0;
 }
@@ -517,7 +532,7 @@ static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
 		nft_activate_next(ctx->net, flowtable);
 
 	nft_trans_flowtable(trans) = flowtable;
-	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+	nft_trans_commit_list_add_tail(ctx->net, trans);
 
 	return 0;
 }
@@ -545,13 +560,15 @@ static struct nft_table *nft_table_lookup(const struct net *net,
 					  const struct nlattr *nla,
 					  u8 family, u8 genmask, u32 nlpid)
 {
+	struct nftables_pernet *nft_net;
 	struct nft_table *table;
 
 	if (nla == NULL)
 		return ERR_PTR(-EINVAL);
 
-	list_for_each_entry_rcu(table, &net->nft.tables, list,
-				lockdep_is_held(&net->nft.commit_mutex)) {
+	nft_net = net_generic(net, nf_tables_net_id);
+	list_for_each_entry_rcu(table, &nft_net->tables, list,
+				lockdep_is_held(&nft_net->commit_mutex)) {
 		if (!nla_strcmp(nla, table->name) &&
 		    table->family == family &&
 		    nft_active_genmask(table, genmask)) {
@@ -570,9 +587,11 @@ static struct nft_table *nft_table_lookup_byhandle(const struct net *net,
 						   const struct nlattr *nla,
 						   u8 genmask)
 {
+	struct nftables_pernet *nft_net;
 	struct nft_table *table;
 
-	list_for_each_entry(table, &net->nft.tables, list) {
+	nft_net = net_generic(net, nf_tables_net_id);
+	list_for_each_entry(table, &nft_net->tables, list) {
 		if (be64_to_cpu(nla_get_be64(nla)) == table->handle &&
 		    nft_active_genmask(table, genmask))
 			return table;
@@ -625,6 +644,7 @@ __printf(2, 3) int nft_request_module(struct net *net, const char *fmt,
 				      ...)
 {
 	char module_name[MODULE_NAME_LEN];
+	struct nftables_pernet *nft_net;
 	struct nft_module_request *req;
 	va_list args;
 	int ret;
@@ -635,7 +655,8 @@ __printf(2, 3) int nft_request_module(struct net *net, const char *fmt,
 	if (ret >= MODULE_NAME_LEN)
 		return 0;
 
-	list_for_each_entry(req, &net->nft.module_list, list) {
+	nft_net = net_generic(net, nf_tables_net_id);
+	list_for_each_entry(req, &nft_net->module_list, list) {
 		if (!strcmp(req->module, module_name)) {
 			if (req->done)
 				return 0;
@@ -651,7 +672,7 @@ __printf(2, 3) int nft_request_module(struct net *net, const char *fmt,
 
 	req->done = false;
 	strlcpy(req->module, module_name, MODULE_NAME_LEN);
-	list_add_tail(&req->list, &net->nft.module_list);
+	list_add_tail(&req->list, &nft_net->module_list);
 
 	return -EAGAIN;
 }
@@ -690,7 +711,9 @@ nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla,
 
 static __be16 nft_base_seq(const struct net *net)
 {
-	return htons(net->nft.base_seq & 0xffff);
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+
+	return htons(nft_net->base_seq & 0xffff);
 }
 
 static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
@@ -751,6 +774,7 @@ static void nft_notify_enqueue(struct sk_buff *skb, bool report,
 
 static void nf_tables_table_notify(const struct nft_ctx *ctx, int event)
 {
+	struct nftables_pernet *nft_net;
 	struct sk_buff *skb;
 	int err;
 
@@ -769,7 +793,8 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event)
 		goto err;
 	}
 
-	nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list);
+	nft_net = net_generic(ctx->net, nf_tables_net_id);
+	nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list);
 	return;
 err:
 	nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
@@ -779,15 +804,17 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
 				 struct netlink_callback *cb)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+	struct nftables_pernet *nft_net;
 	const struct nft_table *table;
 	unsigned int idx = 0, s_idx = cb->args[0];
 	struct net *net = sock_net(skb->sk);
 	int family = nfmsg->nfgen_family;
 
 	rcu_read_lock();
-	cb->seq = net->nft.base_seq;
+	nft_net = net_generic(net, nf_tables_net_id);
+	cb->seq = nft_net->base_seq;
 
-	list_for_each_entry_rcu(table, &net->nft.tables, list) {
+	list_for_each_entry_rcu(table, &nft_net->tables, list) {
 		if (family != NFPROTO_UNSPEC && family != table->family)
 			continue;
 
@@ -972,7 +999,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
 
 	nft_trans_table_flags(trans) = flags;
 	nft_trans_table_update(trans) = true;
-	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+	nft_trans_commit_list_add_tail(ctx->net, trans);
 	return 0;
 err:
 	nft_trans_destroy(trans);
@@ -1035,6 +1062,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 			      const struct nlattr * const nla[],
 			      struct netlink_ext_ack *extack)
 {
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
 	int family = nfmsg->nfgen_family;
@@ -1044,7 +1072,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 	u32 flags = 0;
 	int err;
 
-	lockdep_assert_held(&net->nft.commit_mutex);
+	lockdep_assert_held(&nft_net->commit_mutex);
 	attr = nla[NFTA_TABLE_NAME];
 	table = nft_table_lookup(net, attr, family, genmask,
 				 NETLINK_CB(skb).portid);
@@ -1105,7 +1133,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 	if (err < 0)
 		goto err_trans;
 
-	list_add_tail_rcu(&table->list, &net->nft.tables);
+	list_add_tail_rcu(&table->list, &nft_net->tables);
 	return 0;
 err_trans:
 	rhltable_destroy(&table->chains_ht);
@@ -1193,11 +1221,12 @@ out:
 
 static int nft_flush(struct nft_ctx *ctx, int family)
 {
+	struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id);
 	struct nft_table *table, *nt;
 	const struct nlattr * const *nla = ctx->nla;
 	int err = 0;
 
-	list_for_each_entry_safe(table, nt, &ctx->net->nft.tables, list) {
+	list_for_each_entry_safe(table, nt, &nft_net->tables, list) {
 		if (family != AF_UNSPEC && table->family != family)
 			continue;
 
@@ -1316,7 +1345,9 @@ nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask)
 static bool lockdep_commit_lock_is_held(const struct net *net)
 {
 #ifdef CONFIG_PROVE_LOCKING
-	return lockdep_is_held(&net->nft.commit_mutex);
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+
+	return lockdep_is_held(&nft_net->commit_mutex);
 #else
 	return true;
 #endif
@@ -1519,6 +1550,7 @@ nla_put_failure:
 
 static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
 {
+	struct nftables_pernet *nft_net;
 	struct sk_buff *skb;
 	int err;
 
@@ -1538,7 +1570,8 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
 		goto err;
 	}
 
-	nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list);
+	nft_net = net_generic(ctx->net, nf_tables_net_id);
+	nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list);
 	return;
 err:
 	nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
@@ -1553,11 +1586,13 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
 	unsigned int idx = 0, s_idx = cb->args[0];
 	struct net *net = sock_net(skb->sk);
 	int family = nfmsg->nfgen_family;
+	struct nftables_pernet *nft_net;
 
 	rcu_read_lock();
-	cb->seq = net->nft.base_seq;
+	nft_net = net_generic(net, nf_tables_net_id);
+	cb->seq = nft_net->base_seq;
 
-	list_for_each_entry_rcu(table, &net->nft.tables, list) {
+	list_for_each_entry_rcu(table, &nft_net->tables, list) {
 		if (family != NFPROTO_UNSPEC && family != table->family)
 			continue;
 
@@ -1873,11 +1908,12 @@ static int nft_chain_parse_hook(struct net *net,
 				struct nft_chain_hook *hook, u8 family,
 				bool autoload)
 {
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
 	struct nlattr *ha[NFTA_HOOK_MAX + 1];
 	const struct nft_chain_type *type;
 	int err;
 
-	lockdep_assert_held(&net->nft.commit_mutex);
+	lockdep_assert_held(&nft_net->commit_mutex);
 	lockdep_nfnl_nft_mutex_not_held();
 
 	err = nla_parse_nested_deprecated(ha, NFTA_HOOK_MAX,
@@ -2266,6 +2302,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
 
 	if (nla[NFTA_CHAIN_HANDLE] &&
 	    nla[NFTA_CHAIN_NAME]) {
+		struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id);
 		struct nft_trans *tmp;
 		char *name;
 
@@ -2275,7 +2312,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
 			goto err;
 
 		err = -EEXIST;
-		list_for_each_entry(tmp, &ctx->net->nft.commit_list, list) {
+		list_for_each_entry(tmp, &nft_net->commit_list, list) {
 			if (tmp->msg_type == NFT_MSG_NEWCHAIN &&
 			    tmp->ctx.table == table &&
 			    nft_trans_chain_update(tmp) &&
@@ -2289,7 +2326,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
 
 		nft_trans_chain_name(trans) = name;
 	}
-	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+	nft_trans_commit_list_add_tail(ctx->net, trans);
 
 	return 0;
 err:
@@ -2301,10 +2338,11 @@ err:
 static struct nft_chain *nft_chain_lookup_byid(const struct net *net,
 					       const struct nlattr *nla)
 {
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
 	u32 id = ntohl(nla_get_be32(nla));
 	struct nft_trans *trans;
 
-	list_for_each_entry(trans, &net->nft.commit_list, list) {
+	list_for_each_entry(trans, &nft_net->commit_list, list) {
 		struct nft_chain *chain = trans->ctx.chain;
 
 		if (trans->msg_type == NFT_MSG_NEWCHAIN &&
@@ -2319,6 +2357,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 			      const struct nlattr * const nla[],
 			      struct netlink_ext_ack *extack)
 {
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
 	int family = nfmsg->nfgen_family;
@@ -2330,7 +2369,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 	u64 handle = 0;
 	u32 flags = 0;
 
-	lockdep_assert_held(&net->nft.commit_mutex);
+	lockdep_assert_held(&nft_net->commit_mutex);
 
 	table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask,
 				 NETLINK_CB(skb).portid);
@@ -2866,6 +2905,7 @@ nla_put_failure:
 static void nf_tables_rule_notify(const struct nft_ctx *ctx,
 				  const struct nft_rule *rule, int event)
 {
+	struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id);
 	struct sk_buff *skb;
 	int err;
 
@@ -2885,7 +2925,7 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx,
 		goto err;
 	}
 
-	nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list);
+	nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list);
 	return;
 err:
 	nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
@@ -2943,11 +2983,13 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
 	unsigned int idx = 0;
 	struct net *net = sock_net(skb->sk);
 	int family = nfmsg->nfgen_family;
+	struct nftables_pernet *nft_net;
 
 	rcu_read_lock();
-	cb->seq = net->nft.base_seq;
+	nft_net = net_generic(net, nf_tables_net_id);
+	cb->seq = nft_net->base_seq;
 
-	list_for_each_entry_rcu(table, &net->nft.tables, list) {
+	list_for_each_entry_rcu(table, &nft_net->tables, list) {
 		if (family != NFPROTO_UNSPEC && family != table->family)
 			continue;
 
@@ -3178,6 +3220,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 			     const struct nlattr * const nla[],
 			     struct netlink_ext_ack *extack)
 {
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
 	struct nft_expr_info *info = NULL;
@@ -3195,7 +3238,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 	int err, rem;
 	u64 handle, pos_handle;
 
-	lockdep_assert_held(&net->nft.commit_mutex);
+	lockdep_assert_held(&nft_net->commit_mutex);
 
 	table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask,
 				 NETLINK_CB(skb).portid);
@@ -3367,7 +3410,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 	kvfree(info);
 	chain->use++;
 
-	if (net->nft.validate_state == NFT_VALIDATE_DO)
+	if (nft_net->validate_state == NFT_VALIDATE_DO)
 		return nft_table_validate(net, table);
 
 	if (chain->flags & NFT_CHAIN_HW_OFFLOAD) {
@@ -3396,10 +3439,11 @@ err1:
 static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
 					     const struct nlattr *nla)
 {
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
 	u32 id = ntohl(nla_get_be32(nla));
 	struct nft_trans *trans;
 
-	list_for_each_entry(trans, &net->nft.commit_list, list) {
+	list_for_each_entry(trans, &nft_net->commit_list, list) {
 		struct nft_rule *rule = nft_trans_rule(trans);
 
 		if (trans->msg_type == NFT_MSG_NEWRULE &&
@@ -3512,13 +3556,14 @@ nft_select_set_ops(const struct nft_ctx *ctx,
 		   const struct nft_set_desc *desc,
 		   enum nft_set_policies policy)
 {
+	struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id);
 	const struct nft_set_ops *ops, *bops;
 	struct nft_set_estimate est, best;
 	const struct nft_set_type *type;
 	u32 flags = 0;
 	int i;
 
-	lockdep_assert_held(&ctx->net->nft.commit_mutex);
+	lockdep_assert_held(&nft_net->commit_mutex);
 	lockdep_nfnl_nft_mutex_not_held();
 
 	if (nla[NFTA_SET_FLAGS] != NULL)
@@ -3656,10 +3701,11 @@ static struct nft_set *nft_set_lookup_byhandle(const struct nft_table *table,
 static struct nft_set *nft_set_lookup_byid(const struct net *net,
 					   const struct nlattr *nla, u8 genmask)
 {
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
 	struct nft_trans *trans;
 	u32 id = ntohl(nla_get_be32(nla));
 
-	list_for_each_entry(trans, &net->nft.commit_list, list) {
+	list_for_each_entry(trans, &nft_net->commit_list, list) {
 		if (trans->msg_type == NFT_MSG_NEWSET) {
 			struct nft_set *set = nft_trans_set(trans);
 
@@ -3893,6 +3939,7 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx,
 				 const struct nft_set *set, int event,
 			         gfp_t gfp_flags)
 {
+	struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id);
 	struct sk_buff *skb;
 	u32 portid = ctx->portid;
 	int err;
@@ -3911,7 +3958,7 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx,
 		goto err;
 	}
 
-	nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list);
+	nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list);
 	return;
 err:
 	nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
@@ -3924,14 +3971,16 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
 	struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2];
 	struct net *net = sock_net(skb->sk);
 	struct nft_ctx *ctx = cb->data, ctx_set;
+	struct nftables_pernet *nft_net;
 
 	if (cb->args[1])
 		return skb->len;
 
 	rcu_read_lock();
-	cb->seq = net->nft.base_seq;
+	nft_net = net_generic(net, nf_tables_net_id);
+	cb->seq = nft_net->base_seq;
 
-	list_for_each_entry_rcu(table, &net->nft.tables, list) {
+	list_for_each_entry_rcu(table, &nft_net->tables, list) {
 		if (ctx->family != NFPROTO_UNSPEC &&
 		    ctx->family != table->family)
 			continue;
@@ -4770,6 +4819,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct nft_set_dump_ctx *dump_ctx = cb->data;
 	struct net *net = sock_net(skb->sk);
+	struct nftables_pernet *nft_net;
 	struct nft_table *table;
 	struct nft_set *set;
 	struct nft_set_dump_args args;
@@ -4780,7 +4830,8 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 	int event;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(table, &net->nft.tables, list) {
+	nft_net = net_generic(net, nf_tables_net_id);
+	list_for_each_entry_rcu(table, &nft_net->tables, list) {
 		if (dump_ctx->ctx.family != NFPROTO_UNSPEC &&
 		    dump_ctx->ctx.family != table->family)
 			continue;
@@ -5064,6 +5115,7 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
 				     const struct nft_set_elem *elem,
 				     int event, u16 flags)
 {
+	struct nftables_pernet *nft_net;
 	struct net *net = ctx->net;
 	u32 portid = ctx->portid;
 	struct sk_buff *skb;
@@ -5083,7 +5135,8 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
 		goto err;
 	}
 
-	nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list);
+	nft_net = net_generic(net, nf_tables_net_id);
+	nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list);
 	return;
 err:
 	nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
@@ -5551,7 +5604,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 	}
 
 	nft_trans_elem(trans) = elem;
-	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+	nft_trans_commit_list_add_tail(ctx->net, trans);
 	return 0;
 
 err_set_full:
@@ -5582,6 +5635,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
 				const struct nlattr * const nla[],
 				struct netlink_ext_ack *extack)
 {
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
 	u8 genmask = nft_genmask_next(net);
 	const struct nlattr *attr;
 	struct nft_set *set;
@@ -5610,7 +5664,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
 			return err;
 	}
 
-	if (net->nft.validate_state == NFT_VALIDATE_DO)
+	if (nft_net->validate_state == NFT_VALIDATE_DO)
 		return nft_table_validate(net, ctx.table);
 
 	return 0;
@@ -5746,7 +5800,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
 	nft_set_elem_deactivate(ctx->net, set, &elem);
 
 	nft_trans_elem(trans) = elem;
-	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+	nft_trans_commit_list_add_tail(ctx->net, trans);
 	return 0;
 
 fail_ops:
@@ -5780,7 +5834,7 @@ static int nft_flush_set(const struct nft_ctx *ctx,
 	nft_set_elem_deactivate(ctx->net, set, elem);
 	nft_trans_elem_set(trans) = set;
 	nft_trans_elem(trans) = *elem;
-	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+	nft_trans_commit_list_add_tail(ctx->net, trans);
 
 	return 0;
 err1:
@@ -6074,7 +6128,7 @@ static int nf_tables_updobj(const struct nft_ctx *ctx,
 	nft_trans_obj(trans) = obj;
 	nft_trans_obj_update(trans) = true;
 	nft_trans_obj_newobj(trans) = newobj;
-	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+	nft_trans_commit_list_add_tail(ctx->net, trans);
 
 	return 0;
 
@@ -6236,6 +6290,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
 	struct nft_obj_filter *filter = cb->data;
 	struct net *net = sock_net(skb->sk);
 	int family = nfmsg->nfgen_family;
+	struct nftables_pernet *nft_net;
 	struct nft_object *obj;
 	bool reset = false;
 
@@ -6243,9 +6298,10 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
 		reset = true;
 
 	rcu_read_lock();
-	cb->seq = net->nft.base_seq;
+	nft_net = net_generic(net, nf_tables_net_id);
+	cb->seq = nft_net->base_seq;
 
-	list_for_each_entry_rcu(table, &net->nft.tables, list) {
+	list_for_each_entry_rcu(table, &nft_net->tables, list) {
 		if (family != NFPROTO_UNSPEC && family != table->family)
 			continue;
 
@@ -6268,7 +6324,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
 				char *buf = kasprintf(GFP_ATOMIC,
 						      "%s:%u",
 						      table->name,
-						      net->nft.base_seq);
+						      nft_net->base_seq);
 
 				audit_log_nfcfg(buf,
 						family,
@@ -6389,8 +6445,11 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
 		reset = true;
 
 	if (reset) {
-		char *buf = kasprintf(GFP_ATOMIC, "%s:%u",
-				      table->name, net->nft.base_seq);
+		const struct nftables_pernet *nft_net;
+		char *buf;
+
+		nft_net = net_generic(net, nf_tables_net_id);
+		buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, nft_net->base_seq);
 
 		audit_log_nfcfg(buf,
 				family,
@@ -6476,10 +6535,11 @@ void nft_obj_notify(struct net *net, const struct nft_table *table,
 		    struct nft_object *obj, u32 portid, u32 seq, int event,
 		    int family, int report, gfp_t gfp)
 {
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
 	struct sk_buff *skb;
 	int err;
 	char *buf = kasprintf(gfp, "%s:%u",
-			      table->name, net->nft.base_seq);
+			      table->name, nft_net->base_seq);
 
 	audit_log_nfcfg(buf,
 			family,
@@ -6505,7 +6565,7 @@ void nft_obj_notify(struct net *net, const struct nft_table *table,
 		goto err;
 	}
 
-	nft_notify_enqueue(skb, report, &net->nft.notify_list);
+	nft_notify_enqueue(skb, report, &nft_net->notify_list);
 	return;
 err:
 	nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
@@ -6837,7 +6897,7 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh,
 	INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans));
 	list_splice(&flowtable_hook.list, &nft_trans_flowtable_hooks(trans));
 
-	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+	nft_trans_commit_list_add_tail(ctx->net, trans);
 
 	return 0;
 
@@ -7025,7 +7085,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx,
 	INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans));
 	nft_flowtable_hook_release(&flowtable_hook);
 
-	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+	nft_trans_commit_list_add_tail(ctx->net, trans);
 
 	return 0;
 
@@ -7157,12 +7217,14 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb,
 	struct net *net = sock_net(skb->sk);
 	int family = nfmsg->nfgen_family;
 	struct nft_flowtable *flowtable;
+	struct nftables_pernet *nft_net;
 	const struct nft_table *table;
 
 	rcu_read_lock();
-	cb->seq = net->nft.base_seq;
+	nft_net = net_generic(net, nf_tables_net_id);
+	cb->seq = nft_net->base_seq;
 
-	list_for_each_entry_rcu(table, &net->nft.tables, list) {
+	list_for_each_entry_rcu(table, &nft_net->tables, list) {
 		if (family != NFPROTO_UNSPEC && family != table->family)
 			continue;
 
@@ -7297,6 +7359,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
 				       struct list_head *hook_list,
 				       int event)
 {
+	struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id);
 	struct sk_buff *skb;
 	int err;
 
@@ -7316,7 +7379,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
 		goto err;
 	}
 
-	nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list);
+	nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list);
 	return;
 err:
 	nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
@@ -7341,6 +7404,7 @@ static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
 static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
 				   u32 portid, u32 seq)
 {
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
 	struct nlmsghdr *nlh;
 	char buf[TASK_COMM_LEN];
 	int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN);
@@ -7350,7 +7414,7 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
 	if (!nlh)
 		goto nla_put_failure;
 
-	if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)) ||
+	if (nla_put_be32(skb, NFTA_GEN_ID, htonl(nft_net->base_seq)) ||
 	    nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) ||
 	    nla_put_string(skb, NFTA_GEN_PROC_NAME, get_task_comm(buf, current)))
 		goto nla_put_failure;
@@ -7385,6 +7449,7 @@ static int nf_tables_flowtable_event(struct notifier_block *this,
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct nft_flowtable *flowtable;
+	struct nftables_pernet *nft_net;
 	struct nft_table *table;
 	struct net *net;
 
@@ -7392,13 +7457,14 @@ static int nf_tables_flowtable_event(struct notifier_block *this,
 		return 0;
 
 	net = dev_net(dev);
-	mutex_lock(&net->nft.commit_mutex);
-	list_for_each_entry(table, &net->nft.tables, list) {
+	nft_net = net_generic(net, nf_tables_net_id);
+	mutex_lock(&nft_net->commit_mutex);
+	list_for_each_entry(table, &nft_net->tables, list) {
 		list_for_each_entry(flowtable, &table->flowtables, list) {
 			nft_flowtable_event(event, dev, flowtable);
 		}
 	}
-	mutex_unlock(&net->nft.commit_mutex);
+	mutex_unlock(&nft_net->commit_mutex);
 
 	return NOTIFY_DONE;
 }
@@ -7579,16 +7645,17 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
 
 static int nf_tables_validate(struct net *net)
 {
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
 	struct nft_table *table;
 
-	switch (net->nft.validate_state) {
+	switch (nft_net->validate_state) {
 	case NFT_VALIDATE_SKIP:
 		break;
 	case NFT_VALIDATE_NEED:
 		nft_validate_state_update(net, NFT_VALIDATE_DO);
 		fallthrough;
 	case NFT_VALIDATE_DO:
-		list_for_each_entry(table, &net->nft.tables, list) {
+		list_for_each_entry(table, &nft_net->tables, list) {
 			if (nft_table_validate(net, table) < 0)
 				return -EAGAIN;
 		}
@@ -7763,9 +7830,10 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
 
 static void nf_tables_commit_chain_prepare_cancel(struct net *net)
 {
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
 	struct nft_trans *trans, *next;
 
-	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+	list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) {
 		struct nft_chain *chain = trans->ctx.chain;
 
 		if (trans->msg_type == NFT_MSG_NEWRULE ||
@@ -7874,10 +7942,11 @@ static void nft_flowtable_hooks_del(struct nft_flowtable *flowtable,
 
 static void nf_tables_module_autoload_cleanup(struct net *net)
 {
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
 	struct nft_module_request *req, *next;
 
-	WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
-	list_for_each_entry_safe(req, next, &net->nft.module_list, list) {
+	WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
+	list_for_each_entry_safe(req, next, &nft_net->module_list, list) {
 		WARN_ON_ONCE(!req->done);
 		list_del(&req->list);
 		kfree(req);
@@ -7886,6 +7955,7 @@ static void nf_tables_module_autoload_cleanup(struct net *net)
 
 static void nf_tables_commit_release(struct net *net)
 {
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
 	struct nft_trans *trans;
 
 	/* all side effects have to be made visible.
@@ -7895,35 +7965,36 @@ static void nf_tables_commit_release(struct net *net)
 	 * Memory reclaim happens asynchronously from work queue
 	 * to prevent expensive synchronize_rcu() in commit phase.
 	 */
-	if (list_empty(&net->nft.commit_list)) {
+	if (list_empty(&nft_net->commit_list)) {
 		nf_tables_module_autoload_cleanup(net);
-		mutex_unlock(&net->nft.commit_mutex);
+		mutex_unlock(&nft_net->commit_mutex);
 		return;
 	}
 
-	trans = list_last_entry(&net->nft.commit_list,
+	trans = list_last_entry(&nft_net->commit_list,
 				struct nft_trans, list);
 	get_net(trans->ctx.net);
 	WARN_ON_ONCE(trans->put_net);
 
 	trans->put_net = true;
 	spin_lock(&nf_tables_destroy_list_lock);
-	list_splice_tail_init(&net->nft.commit_list, &nf_tables_destroy_list);
+	list_splice_tail_init(&nft_net->commit_list, &nf_tables_destroy_list);
 	spin_unlock(&nf_tables_destroy_list_lock);
 
 	nf_tables_module_autoload_cleanup(net);
 	schedule_work(&trans_destroy_work);
 
-	mutex_unlock(&net->nft.commit_mutex);
+	mutex_unlock(&nft_net->commit_mutex);
 }
 
 static void nft_commit_notify(struct net *net, u32 portid)
 {
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
 	struct sk_buff *batch_skb = NULL, *nskb, *skb;
 	unsigned char *data;
 	int len;
 
-	list_for_each_entry_safe(skb, nskb, &net->nft.notify_list, list) {
+	list_for_each_entry_safe(skb, nskb, &nft_net->notify_list, list) {
 		if (!batch_skb) {
 new_batch:
 			batch_skb = skb;
@@ -7949,7 +8020,7 @@ new_batch:
 			       NFT_CB(batch_skb).report, GFP_KERNEL);
 	}
 
-	WARN_ON_ONCE(!list_empty(&net->nft.notify_list));
+	WARN_ON_ONCE(!list_empty(&nft_net->notify_list));
 }
 
 static int nf_tables_commit_audit_alloc(struct list_head *adl,
@@ -8005,6 +8076,7 @@ static void nf_tables_commit_audit_log(struct list_head *adl, u32 generation)
 
 static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 {
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
 	struct nft_trans *trans, *next;
 	struct nft_trans_elem *te;
 	struct nft_chain *chain;
@@ -8012,8 +8084,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 	LIST_HEAD(adl);
 	int err;
 
-	if (list_empty(&net->nft.commit_list)) {
-		mutex_unlock(&net->nft.commit_mutex);
+	if (list_empty(&nft_net->commit_list)) {
+		mutex_unlock(&nft_net->commit_mutex);
 		return 0;
 	}
 
@@ -8026,7 +8098,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 		return err;
 
 	/* 1.  Allocate space for next generation rules_gen_X[] */
-	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+	list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) {
 		int ret;
 
 		ret = nf_tables_commit_audit_alloc(&adl, trans->ctx.table);
@@ -8047,7 +8119,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 	}
 
 	/* step 2.  Make rules_gen_X visible to packet path */
-	list_for_each_entry(table, &net->nft.tables, list) {
+	list_for_each_entry(table, &nft_net->tables, list) {
 		list_for_each_entry(chain, &table->chains, list)
 			nf_tables_commit_chain(net, chain);
 	}
@@ -8056,12 +8128,13 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 	 * Bump generation counter, invalidate any dump in progress.
 	 * Cannot fail after this point.
 	 */
-	while (++net->nft.base_seq == 0);
+	while (++nft_net->base_seq == 0)
+		;
 
 	/* step 3. Start new generation, rules_gen_X now in use. */
 	net->nft.gencursor = nft_gencursor_next(net);
 
-	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+	list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) {
 		nf_tables_commit_audit_collect(&adl, trans->ctx.table,
 					       trans->msg_type);
 		switch (trans->msg_type) {
@@ -8216,7 +8289,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 
 	nft_commit_notify(net, NETLINK_CB(skb).portid);
 	nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
-	nf_tables_commit_audit_log(&adl, net->nft.base_seq);
+	nf_tables_commit_audit_log(&adl, nft_net->base_seq);
 	nf_tables_commit_release(net);
 
 	return 0;
@@ -8224,17 +8297,18 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 
 static void nf_tables_module_autoload(struct net *net)
 {
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
 	struct nft_module_request *req, *next;
 	LIST_HEAD(module_list);
 
-	list_splice_init(&net->nft.module_list, &module_list);
-	mutex_unlock(&net->nft.commit_mutex);
+	list_splice_init(&nft_net->module_list, &module_list);
+	mutex_unlock(&nft_net->commit_mutex);
 	list_for_each_entry_safe(req, next, &module_list, list) {
 		request_module("%s", req->module);
 		req->done = true;
 	}
-	mutex_lock(&net->nft.commit_mutex);
-	list_splice(&module_list, &net->nft.module_list);
+	mutex_lock(&nft_net->commit_mutex);
+	list_splice(&module_list, &nft_net->module_list);
 }
 
 static void nf_tables_abort_release(struct nft_trans *trans)
@@ -8271,6 +8345,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
 
 static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
 {
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
 	struct nft_trans *trans, *next;
 	struct nft_trans_elem *te;
 	struct nft_hook *hook;
@@ -8279,7 +8354,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
 	    nf_tables_validate(net) < 0)
 		return -EAGAIN;
 
-	list_for_each_entry_safe_reverse(trans, next, &net->nft.commit_list,
+	list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list,
 					 list) {
 		switch (trans->msg_type) {
 		case NFT_MSG_NEWTABLE:
@@ -8403,7 +8478,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
 	synchronize_rcu();
 
 	list_for_each_entry_safe_reverse(trans, next,
-					 &net->nft.commit_list, list) {
+					 &nft_net->commit_list, list) {
 		list_del(&trans->list);
 		nf_tables_abort_release(trans);
 	}
@@ -8424,22 +8499,24 @@ static void nf_tables_cleanup(struct net *net)
 static int nf_tables_abort(struct net *net, struct sk_buff *skb,
 			   enum nfnl_abort_action action)
 {
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
 	int ret = __nf_tables_abort(net, action);
 
-	mutex_unlock(&net->nft.commit_mutex);
+	mutex_unlock(&nft_net->commit_mutex);
 
 	return ret;
 }
 
 static bool nf_tables_valid_genid(struct net *net, u32 genid)
 {
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
 	bool genid_ok;
 
-	mutex_lock(&net->nft.commit_mutex);
+	mutex_lock(&nft_net->commit_mutex);
 
-	genid_ok = genid == 0 || net->nft.base_seq == genid;
+	genid_ok = genid == 0 || nft_net->base_seq == genid;
 	if (!genid_ok)
-		mutex_unlock(&net->nft.commit_mutex);
+		mutex_unlock(&nft_net->commit_mutex);
 
 	/* else, commit mutex has to be released by commit or abort function */
 	return genid_ok;
@@ -8994,9 +9071,10 @@ static void __nft_release_hook(struct net *net, struct nft_table *table)
 
 static void __nft_release_hooks(struct net *net)
 {
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
 	struct nft_table *table;
 
-	list_for_each_entry(table, &net->nft.tables, list) {
+	list_for_each_entry(table, &nft_net->tables, list) {
 		if (nft_table_has_owner(table))
 			continue;
 
@@ -9053,9 +9131,10 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
 
 static void __nft_release_tables(struct net *net)
 {
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
 	struct nft_table *table, *nt;
 
-	list_for_each_entry_safe(table, nt, &net->nft.tables, list) {
+	list_for_each_entry_safe(table, nt, &nft_net->tables, list) {
 		if (nft_table_has_owner(table))
 			continue;
 
@@ -9066,6 +9145,7 @@ static void __nft_release_tables(struct net *net)
 static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
 			    void *ptr)
 {
+	struct nftables_pernet *nft_net;
 	struct netlink_notify *n = ptr;
 	struct nft_table *table, *nt;
 	struct net *net = n->net;
@@ -9074,8 +9154,9 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
 	if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER)
 		return NOTIFY_DONE;
 
-	mutex_lock(&net->nft.commit_mutex);
-	list_for_each_entry(table, &net->nft.tables, list) {
+	nft_net = net_generic(net, nf_tables_net_id);
+	mutex_lock(&nft_net->commit_mutex);
+	list_for_each_entry(table, &nft_net->tables, list) {
 		if (nft_table_has_owner(table) &&
 		    n->portid == table->nlpid) {
 			__nft_release_hook(net, table);
@@ -9084,13 +9165,13 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
 	}
 	if (release) {
 		synchronize_rcu();
-		list_for_each_entry_safe(table, nt, &net->nft.tables, list) {
+		list_for_each_entry_safe(table, nt, &nft_net->tables, list) {
 			if (nft_table_has_owner(table) &&
 			    n->portid == table->nlpid)
 				__nft_release_table(net, table);
 		}
 	}
-	mutex_unlock(&net->nft.commit_mutex);
+	mutex_unlock(&nft_net->commit_mutex);
 
 	return NOTIFY_DONE;
 }
@@ -9101,13 +9182,15 @@ static struct notifier_block nft_nl_notifier = {
 
 static int __net_init nf_tables_init_net(struct net *net)
 {
-	INIT_LIST_HEAD(&net->nft.tables);
-	INIT_LIST_HEAD(&net->nft.commit_list);
-	INIT_LIST_HEAD(&net->nft.module_list);
-	INIT_LIST_HEAD(&net->nft.notify_list);
-	mutex_init(&net->nft.commit_mutex);
-	net->nft.base_seq = 1;
-	net->nft.validate_state = NFT_VALIDATE_SKIP;
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+
+	INIT_LIST_HEAD(&nft_net->tables);
+	INIT_LIST_HEAD(&nft_net->commit_list);
+	INIT_LIST_HEAD(&nft_net->module_list);
+	INIT_LIST_HEAD(&nft_net->notify_list);
+	mutex_init(&nft_net->commit_mutex);
+	nft_net->base_seq = 1;
+	nft_net->validate_state = NFT_VALIDATE_SKIP;
 
 	return 0;
 }
@@ -9119,20 +9202,24 @@ static void __net_exit nf_tables_pre_exit_net(struct net *net)
 
 static void __net_exit nf_tables_exit_net(struct net *net)
 {
-	mutex_lock(&net->nft.commit_mutex);
-	if (!list_empty(&net->nft.commit_list))
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+
+	mutex_lock(&nft_net->commit_mutex);
+	if (!list_empty(&nft_net->commit_list))
 		__nf_tables_abort(net, NFNL_ABORT_NONE);
 	__nft_release_tables(net);
-	mutex_unlock(&net->nft.commit_mutex);
-	WARN_ON_ONCE(!list_empty(&net->nft.tables));
-	WARN_ON_ONCE(!list_empty(&net->nft.module_list));
-	WARN_ON_ONCE(!list_empty(&net->nft.notify_list));
+	mutex_unlock(&nft_net->commit_mutex);
+	WARN_ON_ONCE(!list_empty(&nft_net->tables));
+	WARN_ON_ONCE(!list_empty(&nft_net->module_list));
+	WARN_ON_ONCE(!list_empty(&nft_net->notify_list));
 }
 
 static struct pernet_operations nf_tables_net_ops = {
 	.init		= nf_tables_init_net,
 	.pre_exit	= nf_tables_pre_exit_net,
 	.exit		= nf_tables_exit_net,
+	.id		= &nf_tables_net_id,
+	.size		= sizeof(struct nftables_pernet),
 };
 
 static int __init nf_tables_module_init(void)
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 9ae14270c543..43b56eff3b04 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -7,6 +7,8 @@
 #include <net/netfilter/nf_tables_offload.h>
 #include <net/pkt_cls.h>
 
+extern unsigned int nf_tables_net_id;
+
 static struct nft_flow_rule *nft_flow_rule_alloc(int num_actions)
 {
 	struct nft_flow_rule *flow;
@@ -307,16 +309,18 @@ static void nft_indr_block_cleanup(struct flow_block_cb *block_cb)
 	struct nft_base_chain *basechain = block_cb->indr.data;
 	struct net_device *dev = block_cb->indr.dev;
 	struct netlink_ext_ack extack = {};
+	struct nftables_pernet *nft_net;
 	struct net *net = dev_net(dev);
 	struct flow_block_offload bo;
 
 	nft_flow_block_offload_init(&bo, dev_net(dev), FLOW_BLOCK_UNBIND,
 				    basechain, &extack);
-	mutex_lock(&net->nft.commit_mutex);
+	nft_net = net_generic(net, nf_tables_net_id);
+	mutex_lock(&nft_net->commit_mutex);
 	list_del(&block_cb->driver_list);
 	list_move(&block_cb->list, &bo.cb_list);
 	nft_flow_offload_unbind(&bo, basechain);
-	mutex_unlock(&net->nft.commit_mutex);
+	mutex_unlock(&nft_net->commit_mutex);
 }
 
 static int nft_indr_block_offload_cmd(struct nft_base_chain *basechain,
@@ -412,9 +416,10 @@ static int nft_flow_offload_chain(struct nft_chain *chain, u8 *ppolicy,
 static void nft_flow_rule_offload_abort(struct net *net,
 					struct nft_trans *trans)
 {
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
 	int err = 0;
 
-	list_for_each_entry_continue_reverse(trans, &net->nft.commit_list, list) {
+	list_for_each_entry_continue_reverse(trans, &nft_net->commit_list, list) {
 		if (trans->ctx.family != NFPROTO_NETDEV)
 			continue;
 
@@ -460,11 +465,12 @@ static void nft_flow_rule_offload_abort(struct net *net,
 
 int nft_flow_rule_offload_commit(struct net *net)
 {
+	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
 	struct nft_trans *trans;
 	int err = 0;
 	u8 policy;
 
-	list_for_each_entry(trans, &net->nft.commit_list, list) {
+	list_for_each_entry(trans, &nft_net->commit_list, list) {
 		if (trans->ctx.family != NFPROTO_NETDEV)
 			continue;
 
@@ -516,7 +522,7 @@ int nft_flow_rule_offload_commit(struct net *net)
 		}
 	}
 
-	list_for_each_entry(trans, &net->nft.commit_list, list) {
+	list_for_each_entry(trans, &nft_net->commit_list, list) {
 		if (trans->ctx.family != NFPROTO_NETDEV)
 			continue;
 
@@ -536,15 +542,15 @@ int nft_flow_rule_offload_commit(struct net *net)
 	return err;
 }
 
-static struct nft_chain *__nft_offload_get_chain(struct net_device *dev)
+static struct nft_chain *__nft_offload_get_chain(const struct nftables_pernet *nft_net,
+						 struct net_device *dev)
 {
 	struct nft_base_chain *basechain;
-	struct net *net = dev_net(dev);
 	struct nft_hook *hook, *found;
 	const struct nft_table *table;
 	struct nft_chain *chain;
 
-	list_for_each_entry(table, &net->nft.tables, list) {
+	list_for_each_entry(table, &nft_net->tables, list) {
 		if (table->family != NFPROTO_NETDEV)
 			continue;
 
@@ -576,19 +582,21 @@ static int nft_offload_netdev_event(struct notifier_block *this,
 				    unsigned long event, void *ptr)
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct nftables_pernet *nft_net;
 	struct net *net = dev_net(dev);
 	struct nft_chain *chain;
 
 	if (event != NETDEV_UNREGISTER)
 		return NOTIFY_DONE;
 
-	mutex_lock(&net->nft.commit_mutex);
-	chain = __nft_offload_get_chain(dev);
+	nft_net = net_generic(net, nf_tables_net_id);
+	mutex_lock(&nft_net->commit_mutex);
+	chain = __nft_offload_get_chain(nft_net, dev);
 	if (chain)
 		nft_flow_block_chain(nft_base_chain(chain), dev,
 				     FLOW_BLOCK_UNBIND);
 
-	mutex_unlock(&net->nft.commit_mutex);
+	mutex_unlock(&nft_net->commit_mutex);
 
 	return NOTIFY_DONE;
 }
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index ff8528ad3dc6..7a9aa57b195b 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -2,6 +2,7 @@
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 #include <net/net_namespace.h>
+#include <net/netns/generic.h>
 #include <net/netfilter/nf_tables.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv6.h>
@@ -10,6 +11,8 @@
 #include <net/netfilter/nf_tables_ipv4.h>
 #include <net/netfilter/nf_tables_ipv6.h>
 
+extern unsigned int nf_tables_net_id;
+
 #ifdef CONFIG_NF_TABLES_IPV4
 static unsigned int nft_do_chain_ipv4(void *priv,
 				      struct sk_buff *skb,
@@ -355,6 +358,7 @@ static int nf_tables_netdev_event(struct notifier_block *this,
 				  unsigned long event, void *ptr)
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct nftables_pernet *nft_net;
 	struct nft_table *table;
 	struct nft_chain *chain, *nr;
 	struct nft_ctx ctx = {
@@ -365,8 +369,9 @@ static int nf_tables_netdev_event(struct notifier_block *this,
 	    event != NETDEV_CHANGENAME)
 		return NOTIFY_DONE;
 
-	mutex_lock(&ctx.net->nft.commit_mutex);
-	list_for_each_entry(table, &ctx.net->nft.tables, list) {
+	nft_net = net_generic(ctx.net, nf_tables_net_id);
+	mutex_lock(&nft_net->commit_mutex);
+	list_for_each_entry(table, &nft_net->tables, list) {
 		if (table->family != NFPROTO_NETDEV)
 			continue;
 
@@ -380,7 +385,7 @@ static int nf_tables_netdev_event(struct notifier_block *this,
 			nft_netdev_event(event, dev, &ctx);
 		}
 	}
-	mutex_unlock(&ctx.net->nft.commit_mutex);
+	mutex_unlock(&nft_net->commit_mutex);
 
 	return NOTIFY_DONE;
 }
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index d44a70c11b3f..f9437a0dcfef 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -11,6 +11,9 @@
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables_core.h>
+#include <net/netns/generic.h>
+
+extern unsigned int nf_tables_net_id;
 
 struct nft_dynset {
 	struct nft_set			*set;
@@ -161,13 +164,14 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
 			   const struct nft_expr *expr,
 			   const struct nlattr * const tb[])
 {
+	struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id);
 	struct nft_dynset *priv = nft_expr_priv(expr);
 	u8 genmask = nft_genmask_next(ctx->net);
 	struct nft_set *set;
 	u64 timeout;
 	int err, i;
 
-	lockdep_assert_held(&ctx->net->nft.commit_mutex);
+	lockdep_assert_held(&nft_net->commit_mutex);
 
 	if (tb[NFTA_DYNSET_SET_NAME] == NULL ||
 	    tb[NFTA_DYNSET_OP] == NULL ||
-- 
cgit v1.2.3


From 1d610d4d31a8ed36d57f88108331edc7585570b7 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 1 Apr 2021 16:11:11 +0200
Subject: netfilter: x_tables: move known table lists to net_generic infra

Will reduce struct net size by 208 bytes.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/x_tables.c | 46 +++++++++++++++++++++++++++++++++-------------
 1 file changed, 33 insertions(+), 13 deletions(-)

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 6bd31a7a27fc..8e23ef2673e4 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -24,6 +24,7 @@
 #include <linux/audit.h>
 #include <linux/user_namespace.h>
 #include <net/net_namespace.h>
+#include <net/netns/generic.h>
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_arp.h>
@@ -38,6 +39,10 @@ MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
 #define XT_PCPU_BLOCK_SIZE 4096
 #define XT_MAX_TABLE_SIZE	(512 * 1024 * 1024)
 
+struct xt_pernet {
+	struct list_head tables[NFPROTO_NUMPROTO];
+};
+
 struct compat_delta {
 	unsigned int offset; /* offset in kernel */
 	int delta; /* delta in 32bit user land */
@@ -55,7 +60,8 @@ struct xt_af {
 #endif
 };
 
-static struct xt_af *xt;
+static unsigned int xt_pernet_id __read_mostly;
+static struct xt_af *xt __read_mostly;
 
 static const char *const xt_prefix[NFPROTO_NUMPROTO] = {
 	[NFPROTO_UNSPEC] = "x",
@@ -1203,10 +1209,11 @@ EXPORT_SYMBOL(xt_free_table_info);
 struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
 				    const char *name)
 {
+	struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
 	struct xt_table *t, *found = NULL;
 
 	mutex_lock(&xt[af].mutex);
-	list_for_each_entry(t, &net->xt.tables[af], list)
+	list_for_each_entry(t, &xt_net->tables[af], list)
 		if (strcmp(t->name, name) == 0 && try_module_get(t->me))
 			return t;
 
@@ -1214,7 +1221,8 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
 		goto out;
 
 	/* Table doesn't exist in this netns, re-try init */
-	list_for_each_entry(t, &init_net.xt.tables[af], list) {
+	xt_net = net_generic(&init_net, xt_pernet_id);
+	list_for_each_entry(t, &xt_net->tables[af], list) {
 		int err;
 
 		if (strcmp(t->name, name))
@@ -1237,8 +1245,9 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
 	if (!found)
 		goto out;
 
+	xt_net = net_generic(net, xt_pernet_id);
 	/* and once again: */
-	list_for_each_entry(t, &net->xt.tables[af], list)
+	list_for_each_entry(t, &xt_net->tables[af], list)
 		if (strcmp(t->name, name) == 0)
 			return t;
 
@@ -1423,9 +1432,10 @@ struct xt_table *xt_register_table(struct net *net,
 				   struct xt_table_info *bootstrap,
 				   struct xt_table_info *newinfo)
 {
-	int ret;
+	struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
 	struct xt_table_info *private;
 	struct xt_table *t, *table;
+	int ret;
 
 	/* Don't add one object to multiple lists. */
 	table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL);
@@ -1436,7 +1446,7 @@ struct xt_table *xt_register_table(struct net *net,
 
 	mutex_lock(&xt[table->af].mutex);
 	/* Don't autoload: we'd eat our tail... */
-	list_for_each_entry(t, &net->xt.tables[table->af], list) {
+	list_for_each_entry(t, &xt_net->tables[table->af], list) {
 		if (strcmp(t->name, table->name) == 0) {
 			ret = -EEXIST;
 			goto unlock;
@@ -1455,7 +1465,7 @@ struct xt_table *xt_register_table(struct net *net,
 	/* save number of initial entries */
 	private->initial_entries = private->number;
 
-	list_add(&table->list, &net->xt.tables[table->af]);
+	list_add(&table->list, &xt_net->tables[table->af]);
 	mutex_unlock(&xt[table->af].mutex);
 	return table;
 
@@ -1486,19 +1496,25 @@ EXPORT_SYMBOL_GPL(xt_unregister_table);
 #ifdef CONFIG_PROC_FS
 static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos)
 {
+	u8 af = (unsigned long)PDE_DATA(file_inode(seq->file));
 	struct net *net = seq_file_net(seq);
-	u_int8_t af = (unsigned long)PDE_DATA(file_inode(seq->file));
+	struct xt_pernet *xt_net;
+
+	xt_net = net_generic(net, xt_pernet_id);
 
 	mutex_lock(&xt[af].mutex);
-	return seq_list_start(&net->xt.tables[af], *pos);
+	return seq_list_start(&xt_net->tables[af], *pos);
 }
 
 static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
+	u8 af = (unsigned long)PDE_DATA(file_inode(seq->file));
 	struct net *net = seq_file_net(seq);
-	u_int8_t af = (unsigned long)PDE_DATA(file_inode(seq->file));
+	struct xt_pernet *xt_net;
+
+	xt_net = net_generic(net, xt_pernet_id);
 
-	return seq_list_next(v, &net->xt.tables[af], pos);
+	return seq_list_next(v, &xt_net->tables[af], pos);
 }
 
 static void xt_table_seq_stop(struct seq_file *seq, void *v)
@@ -1864,24 +1880,28 @@ EXPORT_SYMBOL_GPL(xt_percpu_counter_free);
 
 static int __net_init xt_net_init(struct net *net)
 {
+	struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
 	int i;
 
 	for (i = 0; i < NFPROTO_NUMPROTO; i++)
-		INIT_LIST_HEAD(&net->xt.tables[i]);
+		INIT_LIST_HEAD(&xt_net->tables[i]);
 	return 0;
 }
 
 static void __net_exit xt_net_exit(struct net *net)
 {
+	struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
 	int i;
 
 	for (i = 0; i < NFPROTO_NUMPROTO; i++)
-		WARN_ON_ONCE(!list_empty(&net->xt.tables[i]));
+		WARN_ON_ONCE(!list_empty(&xt_net->tables[i]));
 }
 
 static struct pernet_operations xt_net_ops = {
 	.init = xt_net_init,
 	.exit = xt_net_exit,
+	.id   = &xt_pernet_id,
+	.size = sizeof(struct xt_pernet),
 };
 
 static int __init xt_init(void)
-- 
cgit v1.2.3


From 7b5974709faf7628a036d3f0f14d4f403f536eac Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 1 Apr 2021 16:11:12 +0200
Subject: netfilter: conntrack: move sysctl pointer to net_generic infra

No need to keep this in struct net, place it in the net_generic data.
The sysctl pointer is removed from struct net in a followup patch.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h    |  3 +++
 net/netfilter/nf_conntrack_standalone.c | 10 ++++++----
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 439379ca9ffa..ef405a134307 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -47,6 +47,9 @@ struct nf_conntrack_net {
 	unsigned int users4;
 	unsigned int users6;
 	unsigned int users_bridge;
+#ifdef CONFIG_SYSCTL
+	struct ctl_table_header	*sysctl_header;
+#endif
 };
 
 #include <linux/types.h>
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 0ee702d374b0..3f2cc7b04b20 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -1027,6 +1027,7 @@ static void nf_conntrack_standalone_init_gre_sysctl(struct net *net,
 
 static int nf_conntrack_standalone_init_sysctl(struct net *net)
 {
+	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
 	struct nf_udp_net *un = nf_udp_pernet(net);
 	struct ctl_table *table;
 
@@ -1072,8 +1073,8 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
 		table[NF_SYSCTL_CT_BUCKETS].mode = 0444;
 	}
 
-	net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", table);
-	if (!net->ct.sysctl_header)
+	cnet->sysctl_header = register_net_sysctl(net, "net/netfilter", table);
+	if (!cnet->sysctl_header)
 		goto out_unregister_netfilter;
 
 	return 0;
@@ -1085,10 +1086,11 @@ out_unregister_netfilter:
 
 static void nf_conntrack_standalone_fini_sysctl(struct net *net)
 {
+	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
 	struct ctl_table *table;
 
-	table = net->ct.sysctl_header->ctl_table_arg;
-	unregister_net_sysctl_table(net->ct.sysctl_header);
+	table = cnet->sysctl_header->ctl_table_arg;
+	unregister_net_sysctl_table(cnet->sysctl_header);
 	kfree(table);
 }
 #else
-- 
cgit v1.2.3


From 1379940bf809ba643eb10950c932f72d0191aa43 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 1 Apr 2021 16:11:13 +0200
Subject: netfilter: conntrack: move ecache dwork to net_generic infra

dwork struct is large (>128 byte) and not needed when conntrack module
is not loaded.

Place it in net_generic data instead.  The struct net dwork member is now
obsolete and will be removed in a followup patch.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h        |  4 ++++
 include/net/netfilter/nf_conntrack_ecache.h | 33 +++++++++++------------------
 net/netfilter/nf_conntrack_core.c           |  7 ++++--
 net/netfilter/nf_conntrack_ecache.c         | 31 ++++++++++++++++++++++-----
 4 files changed, 47 insertions(+), 28 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index ef405a134307..86d86c860ede 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -50,6 +50,10 @@ struct nf_conntrack_net {
 #ifdef CONFIG_SYSCTL
 	struct ctl_table_header	*sysctl_header;
 #endif
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	struct delayed_work ecache_dwork;
+	struct netns_ct *ct_net;
+#endif
 };
 
 #include <linux/types.h>
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index eb81f9195e28..d00ba6048e44 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -171,12 +171,18 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
 			       struct nf_conntrack_expect *exp,
 			       u32 portid, int report);
 
+void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state);
+
 void nf_conntrack_ecache_pernet_init(struct net *net);
 void nf_conntrack_ecache_pernet_fini(struct net *net);
 
 int nf_conntrack_ecache_init(void);
 void nf_conntrack_ecache_fini(void);
 
+static inline bool nf_conntrack_ecache_dwork_pending(const struct net *net)
+{
+	return net->ct.ecache_dwork_pending;
+}
 #else /* CONFIG_NF_CONNTRACK_EVENTS */
 
 static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e,
@@ -186,6 +192,11 @@ static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e,
 {
 }
 
+static inline void nf_conntrack_ecache_work(struct net *net,
+					    enum nf_ct_ecache_state s)
+{
+}
+
 static inline void nf_conntrack_ecache_pernet_init(struct net *net)
 {
 }
@@ -203,26 +214,6 @@ static inline void nf_conntrack_ecache_fini(void)
 {
 }
 
+static inline bool nf_conntrack_ecache_dwork_pending(const struct net *net) { return false; }
 #endif /* CONFIG_NF_CONNTRACK_EVENTS */
-
-static inline void nf_conntrack_ecache_delayed_work(struct net *net)
-{
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
-	if (!delayed_work_pending(&net->ct.ecache_dwork)) {
-		schedule_delayed_work(&net->ct.ecache_dwork, HZ);
-		net->ct.ecache_dwork_pending = true;
-	}
-#endif
-}
-
-static inline void nf_conntrack_ecache_work(struct net *net)
-{
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
-	if (net->ct.ecache_dwork_pending) {
-		net->ct.ecache_dwork_pending = false;
-		mod_delayed_work(system_wq, &net->ct.ecache_dwork, 0);
-	}
-#endif
-}
-
 #endif /*_NF_CONNTRACK_ECACHE_H*/
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index ff0168736f6e..ace3e8265e0a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -656,6 +656,7 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
 bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
 {
 	struct nf_conn_tstamp *tstamp;
+	struct net *net;
 
 	if (test_and_set_bit(IPS_DYING_BIT, &ct->status))
 		return false;
@@ -670,11 +671,13 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
 		 * be done by event cache worker on redelivery.
 		 */
 		nf_ct_delete_from_lists(ct);
-		nf_conntrack_ecache_delayed_work(nf_ct_net(ct));
+		nf_conntrack_ecache_work(nf_ct_net(ct), NFCT_ECACHE_DESTROY_FAIL);
 		return false;
 	}
 
-	nf_conntrack_ecache_work(nf_ct_net(ct));
+	net = nf_ct_net(ct);
+	if (nf_conntrack_ecache_dwork_pending(net))
+		nf_conntrack_ecache_work(net, NFCT_ECACHE_DESTROY_SENT);
 	nf_ct_delete_from_lists(ct);
 	nf_ct_put(ct);
 	return true;
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 7956c9f19899..759d87aef95f 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -27,6 +27,8 @@
 #include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_conntrack_extend.h>
 
+extern unsigned int nf_conntrack_net_id;
+
 static DEFINE_MUTEX(nf_ct_ecache_mutex);
 
 #define ECACHE_RETRY_WAIT (HZ/10)
@@ -96,8 +98,8 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
 
 static void ecache_work(struct work_struct *work)
 {
-	struct netns_ct *ctnet =
-		container_of(work, struct netns_ct, ecache_dwork.work);
+	struct nf_conntrack_net *cnet = container_of(work, struct nf_conntrack_net, ecache_dwork.work);
+	struct netns_ct *ctnet = cnet->ct_net;
 	int cpu, delay = -1;
 	struct ct_pcpu *pcpu;
 
@@ -127,7 +129,7 @@ static void ecache_work(struct work_struct *work)
 
 	ctnet->ecache_dwork_pending = delay > 0;
 	if (delay >= 0)
-		schedule_delayed_work(&ctnet->ecache_dwork, delay);
+		schedule_delayed_work(&cnet->ecache_dwork, delay);
 }
 
 int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
@@ -344,6 +346,20 @@ void nf_ct_expect_unregister_notifier(struct net *net,
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
 
+void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state)
+{
+	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+
+	if (state == NFCT_ECACHE_DESTROY_FAIL &&
+	    !delayed_work_pending(&cnet->ecache_dwork)) {
+		schedule_delayed_work(&cnet->ecache_dwork, HZ);
+		net->ct.ecache_dwork_pending = true;
+	} else if (state == NFCT_ECACHE_DESTROY_SENT) {
+		net->ct.ecache_dwork_pending = false;
+		mod_delayed_work(system_wq, &cnet->ecache_dwork, 0);
+	}
+}
+
 #define NF_CT_EVENTS_DEFAULT 1
 static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
 
@@ -355,13 +371,18 @@ static const struct nf_ct_ext_type event_extend = {
 
 void nf_conntrack_ecache_pernet_init(struct net *net)
 {
+	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+
 	net->ct.sysctl_events = nf_ct_events;
-	INIT_DELAYED_WORK(&net->ct.ecache_dwork, ecache_work);
+	cnet->ct_net = &net->ct;
+	INIT_DELAYED_WORK(&cnet->ecache_dwork, ecache_work);
 }
 
 void nf_conntrack_ecache_pernet_fini(struct net *net)
 {
-	cancel_delayed_work_sync(&net->ct.ecache_dwork);
+	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+
+	cancel_delayed_work_sync(&cnet->ecache_dwork);
 }
 
 int nf_conntrack_ecache_init(void)
-- 
cgit v1.2.3


From db3685b4046f8b629bbf73caa33751ce567ea8ff Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 1 Apr 2021 16:11:14 +0200
Subject: net: remove obsolete members from struct net

all have been moved to generic_net infra. On x86_64, this reduces
struct net size from 70 to 63 cache lines (4480 to 4032 byte).

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/net_namespace.h   | 9 ---------
 include/net/netns/conntrack.h | 4 ----
 include/net/netns/netfilter.h | 6 ------
 include/net/netns/nftables.h  | 7 -------
 include/net/netns/x_tables.h  | 1 -
 5 files changed, 27 deletions(-)

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index dcaee24a4d87..fdb16dc33703 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -142,15 +142,6 @@ struct net {
 #if defined(CONFIG_NF_TABLES) || defined(CONFIG_NF_TABLES_MODULE)
 	struct netns_nftables	nft;
 #endif
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
-	struct netns_nf_frag	nf_frag;
-	struct ctl_table_header *nf_frag_frags_hdr;
-#endif
-	struct sock		*nfnl;
-	struct sock		*nfnl_stash;
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
-	struct list_head	nfct_timeout_list;
-#endif
 #endif
 #ifdef CONFIG_WEXT_CORE
 	struct sk_buff_head	wext_nlevents;
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 806454e767bf..e5f664d69ead 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -96,13 +96,9 @@ struct netns_ct {
 	atomic_t		count;
 	unsigned int		expect_count;
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
-	struct delayed_work ecache_dwork;
 	bool ecache_dwork_pending;
 #endif
 	bool			auto_assign_helper_warned;
-#ifdef CONFIG_SYSCTL
-	struct ctl_table_header	*sysctl_header;
-#endif
 	unsigned int		sysctl_log_invalid; /* Log invalid packets */
 	int			sysctl_events;
 	int			sysctl_acct;
diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h
index ca043342c0eb..15e2b13fb0c0 100644
--- a/include/net/netns/netfilter.h
+++ b/include/net/netns/netfilter.h
@@ -28,11 +28,5 @@ struct netns_nf {
 #if IS_ENABLED(CONFIG_DECNET)
 	struct nf_hook_entries __rcu *hooks_decnet[NF_DN_NUMHOOKS];
 #endif
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
-	bool			defrag_ipv4;
-#endif
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
-	bool			defrag_ipv6;
-#endif
 };
 #endif
diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h
index 6c0806bd8d1e..8c77832d0240 100644
--- a/include/net/netns/nftables.h
+++ b/include/net/netns/nftables.h
@@ -5,14 +5,7 @@
 #include <linux/list.h>
 
 struct netns_nftables {
-	struct list_head	tables;
-	struct list_head	commit_list;
-	struct list_head	module_list;
-	struct list_head	notify_list;
-	struct mutex		commit_mutex;
-	unsigned int		base_seq;
 	u8			gencursor;
-	u8			validate_state;
 };
 
 #endif
diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h
index 9bc5a12fdbb0..83c8ea2e87a6 100644
--- a/include/net/netns/x_tables.h
+++ b/include/net/netns/x_tables.h
@@ -8,7 +8,6 @@
 struct ebt_table;
 
 struct netns_xt {
-	struct list_head tables[NFPROTO_NUMPROTO];
 	bool notrack_deprecated_warning;
 	bool clusterip_deprecated_warning;
 #if defined(CONFIG_BRIDGE_NF_EBTABLES) || \
-- 
cgit v1.2.3


From 93917fd224fdaebd0864143468e358300d1ffe36 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Tue, 6 Apr 2021 14:26:24 +0800
Subject: Bluetooth: use the correct print format for L2CAP debug statements

Use the correct print format. Printing an unsigned int value should use %u
instead of %d. For details, please read document:
Documentation/core-api/printk-formats.rst

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap_core.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 140669456b6f..b6a88b8256c7 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -492,14 +492,14 @@ static void l2cap_chan_destroy(struct kref *kref)
 
 void l2cap_chan_hold(struct l2cap_chan *c)
 {
-	BT_DBG("chan %p orig refcnt %d", c, kref_read(&c->kref));
+	BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref));
 
 	kref_get(&c->kref);
 }
 
 void l2cap_chan_put(struct l2cap_chan *c)
 {
-	BT_DBG("chan %p orig refcnt %d", c, kref_read(&c->kref));
+	BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref));
 
 	kref_put(&c->kref, l2cap_chan_destroy);
 }
@@ -7261,7 +7261,7 @@ static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control,
 	    L2CAP_TXSEQ_EXPECTED) {
 		l2cap_pass_to_tx(chan, control);
 
-		BT_DBG("buffer_seq %d->%d", chan->buffer_seq,
+		BT_DBG("buffer_seq %u->%u", chan->buffer_seq,
 		       __next_seq(chan, chan->buffer_seq));
 
 		chan->buffer_seq = __next_seq(chan, chan->buffer_seq);
@@ -8365,7 +8365,7 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
 	if (!conn)
 		goto drop;
 
-	BT_DBG("conn %p len %d flags 0x%x", conn, skb->len, flags);
+	BT_DBG("conn %p len %u flags 0x%x", conn, skb->len, flags);
 
 	switch (flags) {
 	case ACL_START:
@@ -8395,10 +8395,10 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
 			return;
 		}
 
-		BT_DBG("Start: total len %d, frag len %d", len, skb->len);
+		BT_DBG("Start: total len %d, frag len %u", len, skb->len);
 
 		if (skb->len > len) {
-			BT_ERR("Frame is too long (len %d, expected len %d)",
+			BT_ERR("Frame is too long (len %u, expected len %d)",
 			       skb->len, len);
 			l2cap_conn_unreliable(conn, ECOMM);
 			goto drop;
@@ -8411,7 +8411,7 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
 		break;
 
 	case ACL_CONT:
-		BT_DBG("Cont: frag len %d (expecting %d)", skb->len, conn->rx_len);
+		BT_DBG("Cont: frag len %u (expecting %u)", skb->len, conn->rx_len);
 
 		if (!conn->rx_skb) {
 			BT_ERR("Unexpected continuation frame (len %d)", skb->len);
@@ -8432,7 +8432,7 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
 		}
 
 		if (skb->len > conn->rx_len) {
-			BT_ERR("Fragment is too long (len %d, expected %d)",
+			BT_ERR("Fragment is too long (len %u, expected %u)",
 			       skb->len, conn->rx_len);
 			l2cap_recv_reset(conn);
 			l2cap_conn_unreliable(conn, ECOMM);
-- 
cgit v1.2.3


From 25e70886c20005ac7facbd997ff4c3526dcd0de3 Mon Sep 17 00:00:00 2001
From: Daniel Winkler <danielwinkler@google.com>
Date: Mon, 5 Apr 2021 16:33:04 -0700
Subject: Bluetooth: Use ext adv handle from requests in CCs

Some extended advertising hci command complete events are still using
hdev->cur_adv_instance to map the request to the correct advertisement
handle. However, with extended advertising, "current instance" doesn't
make sense as we can have multiple concurrent advertisements. This
change switches these command complete handlers to use the advertising
handle from the request/event, to ensure we will always use the correct
advertising handle regardless of the state of hdev->cur_adv_instance.

This change is tested on hatch and kefka chromebooks and run through
single- and multi-advertising automated tests to confirm callbacks
report tx power to the correct advertising handle, etc.

Reviewed-by: Miao-chen Chou <mcchou@chromium.org>
Signed-off-by: Daniel Winkler <danielwinkler@google.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 593eafa282e3..016b2999f219 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1212,12 +1212,11 @@ static void hci_cc_le_set_adv_set_random_addr(struct hci_dev *hdev,
 
 	hci_dev_lock(hdev);
 
-	if (!hdev->cur_adv_instance) {
+	if (!cp->handle) {
 		/* Store in hdev for instance 0 (Set adv and Directed advs) */
 		bacpy(&hdev->random_addr, &cp->bdaddr);
 	} else {
-		adv_instance = hci_find_adv_instance(hdev,
-						     hdev->cur_adv_instance);
+		adv_instance = hci_find_adv_instance(hdev, cp->handle);
 		if (adv_instance)
 			bacpy(&adv_instance->random_addr, &cp->bdaddr);
 	}
@@ -1778,17 +1777,16 @@ static void hci_cc_set_ext_adv_param(struct hci_dev *hdev, struct sk_buff *skb)
 
 	hci_dev_lock(hdev);
 	hdev->adv_addr_type = cp->own_addr_type;
-	if (!hdev->cur_adv_instance) {
+	if (!cp->handle) {
 		/* Store in hdev for instance 0 */
 		hdev->adv_tx_power = rp->tx_power;
 	} else {
-		adv_instance = hci_find_adv_instance(hdev,
-						     hdev->cur_adv_instance);
+		adv_instance = hci_find_adv_instance(hdev, cp->handle);
 		if (adv_instance)
 			adv_instance->tx_power = rp->tx_power;
 	}
 	/* Update adv data as tx power is known now */
-	hci_req_update_adv_data(hdev, hdev->cur_adv_instance);
+	hci_req_update_adv_data(hdev, cp->handle);
 
 	hci_dev_unlock(hdev);
 }
@@ -5308,12 +5306,12 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		if (hdev->adv_addr_type != ADDR_LE_DEV_RANDOM)
 			return;
 
-		if (!hdev->cur_adv_instance) {
+		if (!ev->handle) {
 			bacpy(&conn->resp_addr, &hdev->random_addr);
 			return;
 		}
 
-		adv_instance = hci_find_adv_instance(hdev, hdev->cur_adv_instance);
+		adv_instance = hci_find_adv_instance(hdev, ev->handle);
 		if (adv_instance)
 			bacpy(&conn->resp_addr, &adv_instance->random_addr);
 	}
-- 
cgit v1.2.3


From b6f1b79deabd32f89adbf24ef7b30f82d029808a Mon Sep 17 00:00:00 2001
From: Daniel Winkler <danielwinkler@google.com>
Date: Mon, 5 Apr 2021 16:33:05 -0700
Subject: Bluetooth: Do not set cur_adv_instance in adv param MGMT request

We set hdev->cur_adv_instance in the adv param MGMT request to allow the
callback to the hci param request to set the tx power to the correct
instance. Now that the callbacks use the advertising handle from the hci
request (as they should), this workaround is no longer necessary.

Furthermore, this change resolves a race condition that is more
prevalent when using the extended advertising MGMT calls - if
hdev->cur_adv_instance is set in the params request, then when the data
request is called, we believe our new instance is already active. This
treats it as an update and immediately schedules the instance with the
controller, which has a potential race with the software rotation adv
update. By not setting hdev->cur_adv_instance too early, the new
instance is queued as it should be, to be used when the rotation comes
around again.

This change is tested on harrison peak to confirm that it resolves the
race condition on registration, and that there is no regression in
single- and multi-advertising automated tests.

Reviewed-by: Miao-chen Chou <mcchou@chromium.org>
Signed-off-by: Daniel Winkler <danielwinkler@google.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/mgmt.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 09e099c419f2..59f8016c4866 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -7979,7 +7979,6 @@ static int add_ext_adv_params(struct sock *sk, struct hci_dev *hdev,
 		goto unlock;
 	}
 
-	hdev->cur_adv_instance = cp->instance;
 	/* Submit request for advertising params if ext adv available */
 	if (ext_adv_capable(hdev)) {
 		hci_req_init(&req, hdev);
-- 
cgit v1.2.3


From abb638b311da86eba61c4b2be8eb13375a56b7be Mon Sep 17 00:00:00 2001
From: Sathish Narasimman <sathish.narasimman@intel.com>
Date: Mon, 5 Apr 2021 20:00:23 +0530
Subject: Bluetooth: Handle own address type change with HCI_ENABLE_LL_PRIVACY

own_address_type has to changed to 0x02 and 0x03 only when
HCI_ENABLE_LL_PRIVACY flag is set.

Signed-off-by: Sathish Narasimman <sathish.narasimman@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_request.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 5b4eb87b19f0..0d78122342d5 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -2043,7 +2043,8 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
 		/* If Controller supports LL Privacy use own address type is
 		 * 0x03
 		 */
-		if (use_ll_privacy(hdev))
+		if (use_ll_privacy(hdev) &&
+		    hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY))
 			*own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED;
 		else
 			*own_addr_type = ADDR_LE_DEV_RANDOM;
@@ -2517,7 +2518,8 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
 		/* If Controller supports LL Privacy use own address type is
 		 * 0x03
 		 */
-		if (use_ll_privacy(hdev))
+		if (use_ll_privacy(hdev) &&
+		    hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY))
 			*own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED;
 		else
 			*own_addr_type = ADDR_LE_DEV_RANDOM;
-- 
cgit v1.2.3


From 8ce85ada0a05e21a5386ba5c417c52ab00fcd0d1 Mon Sep 17 00:00:00 2001
From: Sathish Narasimman <sathish.narasimman@intel.com>
Date: Mon, 5 Apr 2021 20:00:41 +0530
Subject: Bluetooth: LL privacy allow RPA

allow RPA to add bd address to whitelist

Signed-off-by: Sathish Narasimman <sathish.narasimman@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_request.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 0d78122342d5..560b74d421a8 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -847,6 +847,10 @@ static u8 update_white_list(struct hci_request *req)
 	 */
 	bool allow_rpa = hdev->suspended;
 
+	if (use_ll_privacy(hdev) &&
+	    hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY))
+		allow_rpa = true;
+
 	/* Go through the current white list programmed into the
 	 * controller one by one and check if that address is still
 	 * in the list of pending connections or list of devices to
-- 
cgit v1.2.3


From f67743f9e03a67dbbf931d1787e6faf50766e521 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Tue, 6 Apr 2021 21:55:52 +0200
Subject: Bluetooth: Add support for reading AOSP vendor capabilities

When drivers indicate support for AOSP vendor extension, initialize them
and read its capabilities.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/hci_core.h | 11 +++++++++++
 net/bluetooth/Kconfig            |  7 +++++++
 net/bluetooth/Makefile           |  1 +
 net/bluetooth/aosp.c             | 35 +++++++++++++++++++++++++++++++++++
 net/bluetooth/aosp.h             | 16 ++++++++++++++++
 net/bluetooth/hci_core.c         |  3 +++
 6 files changed, 73 insertions(+)
 create mode 100644 net/bluetooth/aosp.c
 create mode 100644 net/bluetooth/aosp.h

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index ca4ac6603b9a..aa2879a3b0dd 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -586,6 +586,10 @@ struct hci_dev {
 	void			*msft_data;
 #endif
 
+#if IS_ENABLED(CONFIG_BT_AOSPEXT)
+	bool			aosp_capable;
+#endif
+
 	int (*open)(struct hci_dev *hdev);
 	int (*close)(struct hci_dev *hdev);
 	int (*flush)(struct hci_dev *hdev);
@@ -1239,6 +1243,13 @@ static inline void hci_set_msft_opcode(struct hci_dev *hdev, __u16 opcode)
 #endif
 }
 
+static inline void hci_set_aosp_capable(struct hci_dev *hdev)
+{
+#if IS_ENABLED(CONFIG_BT_AOSPEXT)
+	hdev->aosp_capable = true;
+#endif
+}
+
 int hci_dev_open(__u16 dev);
 int hci_dev_close(__u16 dev);
 int hci_dev_do_close(struct hci_dev *hdev);
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index 400c5130dc0a..e0ab4cd7afc3 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -99,6 +99,13 @@ config BT_MSFTEXT
 	  This options enables support for the Microsoft defined HCI
 	  vendor extensions.
 
+config BT_AOSPEXT
+	bool "Enable Android Open Source Project extensions"
+	depends on BT
+	help
+	  This options enables support for the Android Open Source
+	  Project defined HCI vendor extensions.
+
 config BT_DEBUGFS
 	bool "Export Bluetooth internals in debugfs"
 	depends on BT && DEBUG_FS
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index 1c645fba8c49..cc0995301f93 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -20,5 +20,6 @@ bluetooth-$(CONFIG_BT_BREDR) += sco.o
 bluetooth-$(CONFIG_BT_HS) += a2mp.o amp.o
 bluetooth-$(CONFIG_BT_LEDS) += leds.o
 bluetooth-$(CONFIG_BT_MSFTEXT) += msft.o
+bluetooth-$(CONFIG_BT_AOSPEXT) += aosp.o
 bluetooth-$(CONFIG_BT_DEBUGFS) += hci_debugfs.o
 bluetooth-$(CONFIG_BT_SELFTEST) += selftest.o
diff --git a/net/bluetooth/aosp.c b/net/bluetooth/aosp.c
new file mode 100644
index 000000000000..a1b7762335a5
--- /dev/null
+++ b/net/bluetooth/aosp.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 Intel Corporation
+ */
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+
+#include "aosp.h"
+
+void aosp_do_open(struct hci_dev *hdev)
+{
+	struct sk_buff *skb;
+
+	if (!hdev->aosp_capable)
+		return;
+
+	bt_dev_dbg(hdev, "Initialize AOSP extension");
+
+	/* LE Get Vendor Capabilities Command */
+	skb = __hci_cmd_sync(hdev, hci_opcode_pack(0x3f, 0x153), 0, NULL,
+			     HCI_CMD_TIMEOUT);
+	if (IS_ERR(skb))
+		return;
+
+	kfree_skb(skb);
+}
+
+void aosp_do_close(struct hci_dev *hdev)
+{
+	if (!hdev->aosp_capable)
+		return;
+
+	bt_dev_dbg(hdev, "Cleanup of AOSP extension");
+}
diff --git a/net/bluetooth/aosp.h b/net/bluetooth/aosp.h
new file mode 100644
index 000000000000..328fc6d39f70
--- /dev/null
+++ b/net/bluetooth/aosp.h
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 Intel Corporation
+ */
+
+#if IS_ENABLED(CONFIG_BT_AOSPEXT)
+
+void aosp_do_open(struct hci_dev *hdev);
+void aosp_do_close(struct hci_dev *hdev);
+
+#else
+
+static inline void aosp_do_open(struct hci_dev *hdev) {}
+static inline void aosp_do_close(struct hci_dev *hdev) {}
+
+#endif
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index b0d9c36acc03..0da9b3274986 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -44,6 +44,7 @@
 #include "smp.h"
 #include "leds.h"
 #include "msft.h"
+#include "aosp.h"
 
 static void hci_rx_work(struct work_struct *work);
 static void hci_cmd_work(struct work_struct *work);
@@ -1586,6 +1587,7 @@ setup_failed:
 		ret = hdev->set_diag(hdev, true);
 
 	msft_do_open(hdev);
+	aosp_do_open(hdev);
 
 	clear_bit(HCI_INIT, &hdev->flags);
 
@@ -1782,6 +1784,7 @@ int hci_dev_do_close(struct hci_dev *hdev)
 
 	hci_sock_dev_event(hdev, HCI_DEV_DOWN);
 
+	aosp_do_close(hdev);
 	msft_do_close(hdev);
 
 	if (hdev->flush)
-- 
cgit v1.2.3


From 928dc406802dc4547b5ef84c3075fe144d4cbcf1 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Fri, 2 Apr 2021 22:27:15 -0700
Subject: bpf, udp: Remove some pointless comments

These comments in udp_bpf_update_proto() are copied from the
original TCP code and apparently do not apply to UDP. Just
remove them.

Reported-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210403052715.13854-1-xiyou.wangcong@gmail.com
---
 net/ipv4/udp_bpf.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index 7d5c4ebf42fe..4a7e38c5d842 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -110,7 +110,6 @@ int udp_bpf_update_proto(struct sock *sk, bool restore)
 
 	if (restore) {
 		sk->sk_write_space = psock->saved_write_space;
-		/* Pairs with lockless read in sk_clone_lock() */
 		WRITE_ONCE(sk->sk_prot, psock->sk_proto);
 		return 0;
 	}
@@ -118,7 +117,6 @@ int udp_bpf_update_proto(struct sock *sk, bool restore)
 	if (sk->sk_family == AF_INET6)
 		udp_bpf_check_v6_needs_rebuild(psock->sk_proto);
 
-	/* Pairs with lockless read in sk_clone_lock() */
 	WRITE_ONCE(sk->sk_prot, &udp_bpf_prots[family]);
 	return 0;
 }
-- 
cgit v1.2.3


From 957dca3df624abcbf895f5081fc664693aa0b363 Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <musamaanjum@gmail.com>
Date: Tue, 6 Apr 2021 00:49:04 +0500
Subject: bpf, inode: Remove second initialization of the bpf_preload_lock

bpf_preload_lock is already defined with DEFINE_MUTEX(). There is no
need to initialize it again. Remove the extraneous initialization.

Signed-off-by: Muhammad Usama Anjum <musamaanjum@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210405194904.GA148013@LEGION
---
 kernel/bpf/inode.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 1576ff331ee4..f441d521ef77 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -816,8 +816,6 @@ static int __init bpf_init(void)
 {
 	int ret;
 
-	mutex_init(&bpf_preload_lock);
-
 	ret = sysfs_create_mount_point(fs_kobj, "bpf");
 	if (ret)
 		return ret;
-- 
cgit v1.2.3


From e880f8b3a24a73704731a7227ed5fee14bd90192 Mon Sep 17 00:00:00 2001
From: Manoj Basapathi <manojbm@codeaurora.org>
Date: Mon, 5 Apr 2021 22:32:42 +0530
Subject: tcp: Reset tcp connections in SYN-SENT state

Userspace sends tcp connection (sock) destroy on network switch
i.e switching the default network of the device between multiple
networks(Cellular/Wifi/Ethernet).

Kernel though doesn't send reset for the connections in SYN-SENT state
and these connections continue to remain.
Even as per RFC 793, there is no hard rule to not send RST on ABORT in
this state.

Modify tcp_abort and tcp_disconnect behavior to send RST for connections
in syn-sent state to avoid lingering connections on network switch.

Signed-off-by: Manoj Basapathi <manojbm@codeaurora.org>
Signed-off-by: Sauvik Saha <ssaha@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e14fd0c50c10..627a472161fb 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2888,7 +2888,7 @@ static inline bool tcp_need_reset(int state)
 {
 	return (1 << state) &
 	       (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |
-		TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
+		TCPF_FIN_WAIT2 | TCPF_SYN_RECV | TCPF_SYN_SENT);
 }
 
 static void tcp_rtx_queue_purge(struct sock *sk)
@@ -2954,8 +2954,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 		 */
 		tcp_send_active_reset(sk, gfp_any());
 		sk->sk_err = ECONNRESET;
-	} else if (old_state == TCP_SYN_SENT)
-		sk->sk_err = ECONNRESET;
+	}
 
 	tcp_clear_xmit_timers(sk);
 	__skb_queue_purge(&sk->sk_receive_queue);
-- 
cgit v1.2.3


From 77651900cede54930cd8a039dcd4583bfa308807 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Mon, 5 Apr 2021 16:13:41 -0700
Subject: usbnet: add _mii suffix to usbnet_set/get_link_ksettings

The generic functions assumed devices provided an MDIO interface (accessed
via older mii code, not phylib). This is true only for genuine ethernet.

Devices with a higher level of abstraction or based on different
technologies do not have MDIO. To support this case, first rename
the existing functions with _mii suffix.

v2: rebased on changed upstream
v3: changed names to clearly say that this does NOT use phylib
v4: moved hunks to correct patch; reworded commmit messages

Signed-off-by : Oliver Neukum <oneukum@suse.com>
Tested-by: Roland Dreier <roland@kernel.org>
Reviewed-by: Grant Grundler <grundler@chromium.org>
Tested-by: Grant Grundler <grundler@chromium.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/asix_devices.c | 12 ++++++------
 drivers/net/usb/cdc_ncm.c      |  4 ++--
 drivers/net/usb/dm9601.c       |  4 ++--
 drivers/net/usb/mcs7830.c      |  4 ++--
 drivers/net/usb/sierra_net.c   |  4 ++--
 drivers/net/usb/smsc75xx.c     |  4 ++--
 drivers/net/usb/sr9700.c       |  4 ++--
 drivers/net/usb/sr9800.c       |  4 ++--
 drivers/net/usb/usbnet.c       | 15 +++++++++------
 include/linux/usb/usbnet.h     |  4 ++--
 10 files changed, 31 insertions(+), 28 deletions(-)

diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index 6e13d8165852..19a8fafb8f04 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -125,8 +125,8 @@ static const struct ethtool_ops ax88172_ethtool_ops = {
 	.get_eeprom		= asix_get_eeprom,
 	.set_eeprom		= asix_set_eeprom,
 	.nway_reset		= usbnet_nway_reset,
-	.get_link_ksettings	= usbnet_get_link_ksettings,
-	.set_link_ksettings	= usbnet_set_link_ksettings,
+	.get_link_ksettings	= usbnet_get_link_ksettings_mii,
+	.set_link_ksettings	= usbnet_set_link_ksettings_mii,
 };
 
 static void ax88172_set_multicast(struct net_device *net)
@@ -291,8 +291,8 @@ static const struct ethtool_ops ax88772_ethtool_ops = {
 	.get_eeprom		= asix_get_eeprom,
 	.set_eeprom		= asix_set_eeprom,
 	.nway_reset		= usbnet_nway_reset,
-	.get_link_ksettings	= usbnet_get_link_ksettings,
-	.set_link_ksettings	= usbnet_set_link_ksettings,
+	.get_link_ksettings	= usbnet_get_link_ksettings_mii,
+	.set_link_ksettings	= usbnet_set_link_ksettings_mii,
 };
 
 static int ax88772_link_reset(struct usbnet *dev)
@@ -782,8 +782,8 @@ static const struct ethtool_ops ax88178_ethtool_ops = {
 	.get_eeprom		= asix_get_eeprom,
 	.set_eeprom		= asix_set_eeprom,
 	.nway_reset		= usbnet_nway_reset,
-	.get_link_ksettings	= usbnet_get_link_ksettings,
-	.set_link_ksettings	= usbnet_set_link_ksettings,
+	.get_link_ksettings	= usbnet_get_link_ksettings_mii,
+	.set_link_ksettings	= usbnet_set_link_ksettings_mii,
 };
 
 static int marvell_phy_init(struct usbnet *dev)
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 8ae565a801b5..04f3851dd48b 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -142,8 +142,8 @@ static const struct ethtool_ops cdc_ncm_ethtool_ops = {
 	.get_sset_count    = cdc_ncm_get_sset_count,
 	.get_strings       = cdc_ncm_get_strings,
 	.get_ethtool_stats = cdc_ncm_get_ethtool_stats,
-	.get_link_ksettings      = usbnet_get_link_ksettings,
-	.set_link_ksettings      = usbnet_set_link_ksettings,
+	.get_link_ksettings      = usbnet_get_link_ksettings_mii,
+	.set_link_ksettings      = usbnet_set_link_ksettings_mii,
 };
 
 static u32 cdc_ncm_check_rx_max(struct usbnet *dev, u32 new_rx)
diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c
index b5d2ac55a874..89cc61d7a675 100644
--- a/drivers/net/usb/dm9601.c
+++ b/drivers/net/usb/dm9601.c
@@ -282,8 +282,8 @@ static const struct ethtool_ops dm9601_ethtool_ops = {
 	.get_eeprom_len	= dm9601_get_eeprom_len,
 	.get_eeprom	= dm9601_get_eeprom,
 	.nway_reset	= usbnet_nway_reset,
-	.get_link_ksettings	= usbnet_get_link_ksettings,
-	.set_link_ksettings	= usbnet_set_link_ksettings,
+	.get_link_ksettings	= usbnet_get_link_ksettings_mii,
+	.set_link_ksettings	= usbnet_set_link_ksettings_mii,
 };
 
 static void dm9601_set_multicast(struct net_device *net)
diff --git a/drivers/net/usb/mcs7830.c b/drivers/net/usb/mcs7830.c
index fc512b780d15..9f9352a4522f 100644
--- a/drivers/net/usb/mcs7830.c
+++ b/drivers/net/usb/mcs7830.c
@@ -452,8 +452,8 @@ static const struct ethtool_ops mcs7830_ethtool_ops = {
 	.get_msglevel		= usbnet_get_msglevel,
 	.set_msglevel		= usbnet_set_msglevel,
 	.nway_reset		= usbnet_nway_reset,
-	.get_link_ksettings	= usbnet_get_link_ksettings,
-	.set_link_ksettings	= usbnet_set_link_ksettings,
+	.get_link_ksettings	= usbnet_get_link_ksettings_mii,
+	.set_link_ksettings	= usbnet_set_link_ksettings_mii,
 };
 
 static const struct net_device_ops mcs7830_netdev_ops = {
diff --git a/drivers/net/usb/sierra_net.c b/drivers/net/usb/sierra_net.c
index 55a244eca5ca..55025202dc4f 100644
--- a/drivers/net/usb/sierra_net.c
+++ b/drivers/net/usb/sierra_net.c
@@ -629,8 +629,8 @@ static const struct ethtool_ops sierra_net_ethtool_ops = {
 	.get_msglevel = usbnet_get_msglevel,
 	.set_msglevel = usbnet_set_msglevel,
 	.nway_reset = usbnet_nway_reset,
-	.get_link_ksettings = usbnet_get_link_ksettings,
-	.set_link_ksettings = usbnet_set_link_ksettings,
+	.get_link_ksettings = usbnet_get_link_ksettings_mii,
+	.set_link_ksettings = usbnet_set_link_ksettings_mii,
 };
 
 static int sierra_net_get_fw_attr(struct usbnet *dev, u16 *datap)
diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index 4353b370249f..f8cdabb9ef5a 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -741,8 +741,8 @@ static const struct ethtool_ops smsc75xx_ethtool_ops = {
 	.set_eeprom	= smsc75xx_ethtool_set_eeprom,
 	.get_wol	= smsc75xx_ethtool_get_wol,
 	.set_wol	= smsc75xx_ethtool_set_wol,
-	.get_link_ksettings	= usbnet_get_link_ksettings,
-	.set_link_ksettings	= usbnet_set_link_ksettings,
+	.get_link_ksettings	= usbnet_get_link_ksettings_mii,
+	.set_link_ksettings	= usbnet_set_link_ksettings_mii,
 };
 
 static int smsc75xx_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd)
diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c
index 878557ad03ad..ce29261263cd 100644
--- a/drivers/net/usb/sr9700.c
+++ b/drivers/net/usb/sr9700.c
@@ -250,8 +250,8 @@ static const struct ethtool_ops sr9700_ethtool_ops = {
 	.get_eeprom_len	= sr9700_get_eeprom_len,
 	.get_eeprom	= sr9700_get_eeprom,
 	.nway_reset	= usbnet_nway_reset,
-	.get_link_ksettings	= usbnet_get_link_ksettings,
-	.set_link_ksettings	= usbnet_set_link_ksettings,
+	.get_link_ksettings	= usbnet_get_link_ksettings_mii,
+	.set_link_ksettings	= usbnet_set_link_ksettings_mii,
 };
 
 static void sr9700_set_multicast(struct net_device *netdev)
diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c
index da56735d7755..a822d81310d5 100644
--- a/drivers/net/usb/sr9800.c
+++ b/drivers/net/usb/sr9800.c
@@ -527,8 +527,8 @@ static const struct ethtool_ops sr9800_ethtool_ops = {
 	.get_eeprom_len	= sr_get_eeprom_len,
 	.get_eeprom	= sr_get_eeprom,
 	.nway_reset	= usbnet_nway_reset,
-	.get_link_ksettings	= usbnet_get_link_ksettings,
-	.set_link_ksettings	= usbnet_set_link_ksettings,
+	.get_link_ksettings	= usbnet_get_link_ksettings_mii,
+	.set_link_ksettings	= usbnet_set_link_ksettings_mii,
 };
 
 static int sr9800_link_reset(struct usbnet *dev)
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index f4f37ecfed58..5b4629c80b4b 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -944,7 +944,10 @@ EXPORT_SYMBOL_GPL(usbnet_open);
  * they'll probably want to use this base set.
  */
 
-int usbnet_get_link_ksettings(struct net_device *net,
+/* These methods are written on the assumption that the device
+ * uses MII
+ */
+int usbnet_get_link_ksettings_mii(struct net_device *net,
 			      struct ethtool_link_ksettings *cmd)
 {
 	struct usbnet *dev = netdev_priv(net);
@@ -956,9 +959,9 @@ int usbnet_get_link_ksettings(struct net_device *net,
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(usbnet_get_link_ksettings);
+EXPORT_SYMBOL_GPL(usbnet_get_link_ksettings_mii);
 
-int usbnet_set_link_ksettings(struct net_device *net,
+int usbnet_set_link_ksettings_mii(struct net_device *net,
 			      const struct ethtool_link_ksettings *cmd)
 {
 	struct usbnet *dev = netdev_priv(net);
@@ -978,7 +981,7 @@ int usbnet_set_link_ksettings(struct net_device *net,
 
 	return retval;
 }
-EXPORT_SYMBOL_GPL(usbnet_set_link_ksettings);
+EXPORT_SYMBOL_GPL(usbnet_set_link_ksettings_mii);
 
 u32 usbnet_get_link (struct net_device *net)
 {
@@ -1043,8 +1046,8 @@ static const struct ethtool_ops usbnet_ethtool_ops = {
 	.get_msglevel		= usbnet_get_msglevel,
 	.set_msglevel		= usbnet_set_msglevel,
 	.get_ts_info		= ethtool_op_get_ts_info,
-	.get_link_ksettings	= usbnet_get_link_ksettings,
-	.set_link_ksettings	= usbnet_set_link_ksettings,
+	.get_link_ksettings	= usbnet_get_link_ksettings_mii,
+	.set_link_ksettings	= usbnet_set_link_ksettings_mii,
 };
 
 /*-------------------------------------------------------------------------*/
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index cfbfd6fe01df..a89e1452107d 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -267,9 +267,9 @@ extern void usbnet_pause_rx(struct usbnet *);
 extern void usbnet_resume_rx(struct usbnet *);
 extern void usbnet_purge_paused_rxq(struct usbnet *);
 
-extern int usbnet_get_link_ksettings(struct net_device *net,
+extern int usbnet_get_link_ksettings_mii(struct net_device *net,
 				     struct ethtool_link_ksettings *cmd);
-extern int usbnet_set_link_ksettings(struct net_device *net,
+extern int usbnet_set_link_ksettings_mii(struct net_device *net,
 				     const struct ethtool_link_ksettings *cmd);
 extern u32 usbnet_get_link(struct net_device *net);
 extern u32 usbnet_get_msglevel(struct net_device *);
-- 
cgit v1.2.3


From 956baa99571bbaf88f3e91190dfb498c685b0e21 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Mon, 5 Apr 2021 16:13:42 -0700
Subject: usbnet: add method for reporting speed without MII

The old method for reporting link speed assumed a driver uses the
generic phy (mii) MDIO read/write functions. CDC devices don't
expose the phy.

Add a primitive internal version reporting back directly what
the CDC notification/status operations recorded.

v2: rebased on upstream
v3: changed names and made clear which units are used
v4: moved hunks to correct patch; rewrote commmit messages

Signed-off-by: Oliver Neukum <oneukum@suse.com>
Tested-by: Roland Dreier <roland@kernel.org>
Reviewed-by: Grant Grundler <grundler@chromium.org>
Tested-by: Grant Grundler <grundler@chromium.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/usbnet.c   | 23 +++++++++++++++++++++++
 include/linux/usb/usbnet.h |  7 +++++--
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 5b4629c80b4b..ecf62849f4c1 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -961,6 +961,27 @@ int usbnet_get_link_ksettings_mii(struct net_device *net,
 }
 EXPORT_SYMBOL_GPL(usbnet_get_link_ksettings_mii);
 
+int usbnet_get_link_ksettings_internal(struct net_device *net,
+					struct ethtool_link_ksettings *cmd)
+{
+	struct usbnet *dev = netdev_priv(net);
+
+	/* the assumption that speed is equal on tx and rx
+	 * is deeply engrained into the networking layer.
+	 * For wireless stuff it is not true.
+	 * We assume that rx_speed matters more.
+	 */
+	if (dev->rx_speed != SPEED_UNSET)
+		cmd->base.speed = dev->rx_speed / 1000000;
+	else if (dev->tx_speed != SPEED_UNSET)
+		cmd->base.speed = dev->tx_speed / 1000000;
+	else
+		cmd->base.speed = SPEED_UNKNOWN;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(usbnet_get_link_ksettings_internal);
+
 int usbnet_set_link_ksettings_mii(struct net_device *net,
 			      const struct ethtool_link_ksettings *cmd)
 {
@@ -1664,6 +1685,8 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
 	dev->intf = udev;
 	dev->driver_info = info;
 	dev->driver_name = name;
+	dev->rx_speed = SPEED_UNSET;
+	dev->tx_speed = SPEED_UNSET;
 
 	net->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
 	if (!net->tstats)
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index a89e1452107d..8336e86ce606 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -53,6 +53,9 @@ struct usbnet {
 	u32			hard_mtu;	/* count any extra framing */
 	size_t			rx_urb_size;	/* size for rx urbs */
 	struct mii_if_info	mii;
+	long			rx_speed;	/* If MII not used */
+	long			tx_speed;	/* If MII not used */
+#		define SPEED_UNSET	-1
 
 	/* various kinds of pending driver work */
 	struct sk_buff_head	rxq;
@@ -81,8 +84,6 @@ struct usbnet {
 #		define EVENT_LINK_CHANGE	11
 #		define EVENT_SET_RX_MODE	12
 #		define EVENT_NO_IP_ALIGN	13
-	u32			rx_speed;	/* in bps - NOT Mbps */
-	u32			tx_speed;	/* in bps - NOT Mbps */
 };
 
 static inline struct usb_driver *driver_of(struct usb_interface *intf)
@@ -271,6 +272,8 @@ extern int usbnet_get_link_ksettings_mii(struct net_device *net,
 				     struct ethtool_link_ksettings *cmd);
 extern int usbnet_set_link_ksettings_mii(struct net_device *net,
 				     const struct ethtool_link_ksettings *cmd);
+extern int usbnet_get_link_ksettings_internal(struct net_device *net,
+				     struct ethtool_link_ksettings *cmd);
 extern u32 usbnet_get_link(struct net_device *net);
 extern u32 usbnet_get_msglevel(struct net_device *);
 extern void usbnet_set_msglevel(struct net_device *, u32);
-- 
cgit v1.2.3


From eb47c274d8c44e1a1d7e44f9e82776e5adb5649b Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Mon, 5 Apr 2021 16:13:43 -0700
Subject: net: cdc_ncm: record speed in status method

Until very recently, the usbnet framework only had support functions
for devices which reported the link speed by explicitly querying the
PHY over a MDIO interface. However, the cdc_ncm devices send
notifications when the link state or link speeds change and do not
expose the PHY (or modem) directly.

Support funtions (e.g. usbnet_get_link_ksettings_internal()) to directly
query state recorded by the cdc_ncm driver were added in a previous patch.

So instead of cdc_ncm spewing the link speed into the dmesg buffer,
record the link speed encoded in these notifications and tell the
usbnet framework to use the new functions to get link speed/state.
Link speed/state is now available via ethtool.

This is especially useful given all current RTL8156 devices emit
a connection/speed status notification every 32ms and this would
fill the dmesg buffer. This implementation replaces the one
recently submitted in de658a195ee23ca6aaffe197d1d2ea040beea0a2 :
   "net: usb: cdc_ncm: don't spew notifications"

v2: rebased on upstream
v3: changed variable names
v4: rewrote commit message

Signed-off-by: Oliver Neukum <oneukum@suse.com>
Tested-by: Roland Dreier <roland@kernel.org>
Signed-off-by: Grant Grundler <grundler@chromium.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c | 55 +++++++++++++++--------------------------------
 1 file changed, 17 insertions(+), 38 deletions(-)

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 04f3851dd48b..b04055fd1b79 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -133,17 +133,17 @@ static void cdc_ncm_get_strings(struct net_device __always_unused *netdev, u32 s
 static void cdc_ncm_update_rxtx_max(struct usbnet *dev, u32 new_rx, u32 new_tx);
 
 static const struct ethtool_ops cdc_ncm_ethtool_ops = {
-	.get_link          = usbnet_get_link,
-	.nway_reset        = usbnet_nway_reset,
-	.get_drvinfo       = usbnet_get_drvinfo,
-	.get_msglevel      = usbnet_get_msglevel,
-	.set_msglevel      = usbnet_set_msglevel,
-	.get_ts_info       = ethtool_op_get_ts_info,
-	.get_sset_count    = cdc_ncm_get_sset_count,
-	.get_strings       = cdc_ncm_get_strings,
-	.get_ethtool_stats = cdc_ncm_get_ethtool_stats,
-	.get_link_ksettings      = usbnet_get_link_ksettings_mii,
-	.set_link_ksettings      = usbnet_set_link_ksettings_mii,
+	.get_link		= usbnet_get_link,
+	.nway_reset		= usbnet_nway_reset,
+	.get_drvinfo		= usbnet_get_drvinfo,
+	.get_msglevel		= usbnet_get_msglevel,
+	.set_msglevel		= usbnet_set_msglevel,
+	.get_ts_info		= ethtool_op_get_ts_info,
+	.get_sset_count		= cdc_ncm_get_sset_count,
+	.get_strings		= cdc_ncm_get_strings,
+	.get_ethtool_stats	= cdc_ncm_get_ethtool_stats,
+	.get_link_ksettings	= usbnet_get_link_ksettings_internal,
+	.set_link_ksettings	= NULL,
 };
 
 static u32 cdc_ncm_check_rx_max(struct usbnet *dev, u32 new_rx)
@@ -1825,33 +1825,9 @@ static void
 cdc_ncm_speed_change(struct usbnet *dev,
 		     struct usb_cdc_speed_change *data)
 {
-	uint32_t rx_speed = le32_to_cpu(data->DLBitRRate);
-	uint32_t tx_speed = le32_to_cpu(data->ULBitRate);
-
-	/* if the speed hasn't changed, don't report it.
-	 * RTL8156 shipped before 2021 sends notification about every 32ms.
-	 */
-	if (dev->rx_speed == rx_speed && dev->tx_speed == tx_speed)
-		return;
-
-	dev->rx_speed = rx_speed;
-	dev->tx_speed = tx_speed;
-
-	/*
-	 * Currently the USB-NET API does not support reporting the actual
-	 * device speed. Do print it instead.
-	 */
-	if ((tx_speed > 1000000) && (rx_speed > 1000000)) {
-		netif_info(dev, link, dev->net,
-			   "%u mbit/s downlink %u mbit/s uplink\n",
-			   (unsigned int)(rx_speed / 1000000U),
-			   (unsigned int)(tx_speed / 1000000U));
-	} else {
-		netif_info(dev, link, dev->net,
-			   "%u kbit/s downlink %u kbit/s uplink\n",
-			   (unsigned int)(rx_speed / 1000U),
-			   (unsigned int)(tx_speed / 1000U));
-	}
+	/* RTL8156 shipped before 2021 sends notification about every 32ms. */
+	dev->rx_speed = le32_to_cpu(data->DLBitRRate);
+	dev->tx_speed = le32_to_cpu(data->ULBitRate);
 }
 
 static void cdc_ncm_status(struct usbnet *dev, struct urb *urb)
@@ -1877,6 +1853,9 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb)
 		 * USB_CDC_NOTIFY_NETWORK_CONNECTION notification shall be
 		 * sent by device after USB_CDC_NOTIFY_SPEED_CHANGE.
 		 */
+		/* RTL8156 shipped before 2021 sends notification about
+		 * every 32ms. Don't forward notification if state is same.
+		 */
 		if (netif_carrier_ok(dev->net) != !!event->wValue)
 			usbnet_link_change(dev, !!event->wValue, 0);
 		break;
-- 
cgit v1.2.3


From d42ebcbb635386d759985c775b5728c2f93037d1 Mon Sep 17 00:00:00 2001
From: Grant Grundler <grundler@chromium.org>
Date: Mon, 5 Apr 2021 16:13:44 -0700
Subject: net: cdc_ether: record speed in status method

Until very recently, the usbnet framework only had support functions
for devices which reported the link speed by explicitly querying the
PHY over a MDIO interface. However, the cdc_ether devices send
notifications when the link state or link speeds change and do not
expose the PHY (or modem) directly.

Support funtions (e.g. usbnet_get_link_ksettings_internal()) to directly
query state recorded by the cdc_ether driver were added in a previous patch.

Instead of cdc_ether spewing the link speed into the dmesg buffer,
record the link speed encoded in these notifications and tell the
usbnet framework to use the new functions to get link speed/state.

User space can now get the most recent link speed/state using ethtool.

v4: added to series since cdc_ether uses same notifications
    as cdc_ncm driver.

Signed-off-by: Grant Grundler <grundler@chromium.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ether.c | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index a9b551028659..7eb0109e9baa 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -92,6 +92,18 @@ void usbnet_cdc_update_filter(struct usbnet *dev)
 }
 EXPORT_SYMBOL_GPL(usbnet_cdc_update_filter);
 
+/* We need to override usbnet_*_link_ksettings in bind() */
+static const struct ethtool_ops cdc_ether_ethtool_ops = {
+	.get_link		= usbnet_get_link,
+	.nway_reset		= usbnet_nway_reset,
+	.get_drvinfo		= usbnet_get_drvinfo,
+	.get_msglevel		= usbnet_get_msglevel,
+	.set_msglevel		= usbnet_set_msglevel,
+	.get_ts_info		= ethtool_op_get_ts_info,
+	.get_link_ksettings	= usbnet_get_link_ksettings_internal,
+	.set_link_ksettings	= NULL,
+};
+
 /* probes control interface, claims data interface, collects the bulk
  * endpoints, activates data interface (if needed), maybe sets MTU.
  * all pure cdc, except for certain firmware workarounds, and knowing
@@ -310,6 +322,9 @@ skip:
 		return -ENODEV;
 	}
 
+	/* override ethtool_ops */
+	dev->net->ethtool_ops = &cdc_ether_ethtool_ops;
+
 	return 0;
 
 bad_desc:
@@ -379,12 +394,10 @@ EXPORT_SYMBOL_GPL(usbnet_cdc_unbind);
  * (by Brad Hards) talked with, with more functionality.
  */
 
-static void dumpspeed(struct usbnet *dev, __le32 *speeds)
+static void speed_change(struct usbnet *dev, __le32 *speeds)
 {
-	netif_info(dev, timer, dev->net,
-		   "link speeds: %u kbps up, %u kbps down\n",
-		   __le32_to_cpu(speeds[0]) / 1000,
-		   __le32_to_cpu(speeds[1]) / 1000);
+	dev->tx_speed = __le32_to_cpu(speeds[0]);
+	dev->rx_speed = __le32_to_cpu(speeds[1]);
 }
 
 void usbnet_cdc_status(struct usbnet *dev, struct urb *urb)
@@ -396,7 +409,7 @@ void usbnet_cdc_status(struct usbnet *dev, struct urb *urb)
 
 	/* SPEED_CHANGE can get split into two 8-byte packets */
 	if (test_and_clear_bit(EVENT_STS_SPLIT, &dev->flags)) {
-		dumpspeed(dev, (__le32 *) urb->transfer_buffer);
+		speed_change(dev, (__le32 *) urb->transfer_buffer);
 		return;
 	}
 
@@ -413,7 +426,7 @@ void usbnet_cdc_status(struct usbnet *dev, struct urb *urb)
 		if (urb->actual_length != (sizeof(*event) + 8))
 			set_bit(EVENT_STS_SPLIT, &dev->flags);
 		else
-			dumpspeed(dev, (__le32 *) &event[1]);
+			speed_change(dev, (__le32 *) &event[1]);
 		break;
 	/* USB_CDC_NOTIFY_RESPONSE_AVAILABLE can happen too (e.g. RNDIS),
 	 * but there are no standard formats for the response data.
-- 
cgit v1.2.3


From b58c4649d94eebb1257e0716e4e948dcaa4ae5b5 Mon Sep 17 00:00:00 2001
From: wengjianfeng <wengjianfeng@yulong.com>
Date: Tue, 6 Apr 2021 09:59:54 +0800
Subject: nfc: s3fwrn5: remove unnecessary label

In function s3fwrn5_nci_post_setup, the variable ret is assigned then
goto out label, which just return ret, so we use return to replace it.
Other goto sentences are similar, we use return sentences to replace
goto sentences and delete out label.

Signed-off-by: wengjianfeng <wengjianfeng@yulong.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nfc/s3fwrn5/core.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/nfc/s3fwrn5/core.c b/drivers/nfc/s3fwrn5/core.c
index c00b7a07c3ee..865d3e3d1528 100644
--- a/drivers/nfc/s3fwrn5/core.c
+++ b/drivers/nfc/s3fwrn5/core.c
@@ -124,13 +124,12 @@ static int s3fwrn5_nci_post_setup(struct nci_dev *ndev)
 
 	if (s3fwrn5_firmware_init(info)) {
 		//skip bootloader mode
-		ret = 0;
-		goto out;
+		return 0;
 	}
 
 	ret = s3fwrn5_firmware_update(info);
 	if (ret < 0)
-		goto out;
+		return ret;
 
 	/* NCI core reset */
 
@@ -139,12 +138,9 @@ static int s3fwrn5_nci_post_setup(struct nci_dev *ndev)
 
 	ret = nci_core_reset(info->ndev);
 	if (ret < 0)
-		goto out;
-
-	ret = nci_core_init(info->ndev);
+		return ret;
 
-out:
-	return ret;
+	return nci_core_init(info->ndev);
 }
 
 static struct nci_ops s3fwrn5_nci_ops = {
-- 
cgit v1.2.3


From 3b2c32f96edc0a7602da1b17f5b5eed09e16ad37 Mon Sep 17 00:00:00 2001
From: Qiheng Lin <linqiheng@huawei.com>
Date: Tue, 6 Apr 2021 11:04:33 +0800
Subject: net: ethernet: mtk_eth_soc: remove unneeded semicolon

Eliminate the following coccicheck warning:
 drivers/net/ethernet/mediatek/mtk_ppe.c:270:2-3: Unneeded semicolon

Signed-off-by: Qiheng Lin <linqiheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_ppe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
index a1a9959a2461..71e1ccea6e72 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
@@ -267,7 +267,7 @@ int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
 	default:
 		WARN_ON_ONCE(1);
 		return -EINVAL;
-	};
+	}
 
 	for (i = 0; i < 4; i++)
 		src[i] = be32_to_cpu(src_addr[i]);
-- 
cgit v1.2.3


From be107538c5296fb888938ec3a32da21bb1733655 Mon Sep 17 00:00:00 2001
From: Qiheng Lin <linqiheng@huawei.com>
Date: Tue, 6 Apr 2021 11:18:13 +0800
Subject: netdevsim: remove unneeded semicolon

Eliminate the following coccicheck warning:
 drivers/net/netdevsim/fib.c:569:2-3: Unneeded semicolon

Signed-off-by: Qiheng Lin <linqiheng@huawei.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/fib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c
index fda6f37e7055..213d3e5056c8 100644
--- a/drivers/net/netdevsim/fib.c
+++ b/drivers/net/netdevsim/fib.c
@@ -566,7 +566,7 @@ nsim_fib6_rt_create(struct nsim_fib_data *data,
 err_fib6_rt_nh_del:
 	for (i--; i >= 0; i--) {
 		nsim_fib6_rt_nh_del(fib6_rt, rt_arr[i]);
-	};
+	}
 	nsim_fib_rt_fini(&fib6_rt->common);
 	kfree(fib6_rt);
 	return ERR_PTR(err);
-- 
cgit v1.2.3


From 3036ec035c4dc7e88439a61f5a714cc3988ab4cf Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 6 Apr 2021 13:13:06 +0300
Subject: stmmac: intel: Drop duplicate ID in the list of PCI device IDs

The PCI device IDs are defined with a prefix PCI_DEVICE_ID.
There is no need to repeat the ID part at the end of each definition.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 60 +++++++++++------------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index add95e20548d..8ba87c0be976 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -1048,41 +1048,41 @@ static int __maybe_unused intel_eth_pci_resume(struct device *dev)
 static SIMPLE_DEV_PM_OPS(intel_eth_pm_ops, intel_eth_pci_suspend,
 			 intel_eth_pci_resume);
 
-#define PCI_DEVICE_ID_INTEL_QUARK_ID			0x0937
-#define PCI_DEVICE_ID_INTEL_EHL_RGMII1G_ID		0x4b30
-#define PCI_DEVICE_ID_INTEL_EHL_SGMII1G_ID		0x4b31
-#define PCI_DEVICE_ID_INTEL_EHL_SGMII2G5_ID		0x4b32
+#define PCI_DEVICE_ID_INTEL_QUARK		0x0937
+#define PCI_DEVICE_ID_INTEL_EHL_RGMII1G		0x4b30
+#define PCI_DEVICE_ID_INTEL_EHL_SGMII1G		0x4b31
+#define PCI_DEVICE_ID_INTEL_EHL_SGMII2G5	0x4b32
 /* Intel(R) Programmable Services Engine (Intel(R) PSE) consist of 2 MAC
  * which are named PSE0 and PSE1
  */
-#define PCI_DEVICE_ID_INTEL_EHL_PSE0_RGMII1G_ID		0x4ba0
-#define PCI_DEVICE_ID_INTEL_EHL_PSE0_SGMII1G_ID		0x4ba1
-#define PCI_DEVICE_ID_INTEL_EHL_PSE0_SGMII2G5_ID	0x4ba2
-#define PCI_DEVICE_ID_INTEL_EHL_PSE1_RGMII1G_ID		0x4bb0
-#define PCI_DEVICE_ID_INTEL_EHL_PSE1_SGMII1G_ID		0x4bb1
-#define PCI_DEVICE_ID_INTEL_EHL_PSE1_SGMII2G5_ID	0x4bb2
-#define PCI_DEVICE_ID_INTEL_TGLH_SGMII1G_0_ID		0x43ac
-#define PCI_DEVICE_ID_INTEL_TGLH_SGMII1G_1_ID		0x43a2
-#define PCI_DEVICE_ID_INTEL_TGL_SGMII1G_ID		0xa0ac
-#define PCI_DEVICE_ID_INTEL_ADLS_SGMII1G_0_ID		0x7aac
-#define PCI_DEVICE_ID_INTEL_ADLS_SGMII1G_1_ID		0x7aad
+#define PCI_DEVICE_ID_INTEL_EHL_PSE0_RGMII1G	0x4ba0
+#define PCI_DEVICE_ID_INTEL_EHL_PSE0_SGMII1G	0x4ba1
+#define PCI_DEVICE_ID_INTEL_EHL_PSE0_SGMII2G5	0x4ba2
+#define PCI_DEVICE_ID_INTEL_EHL_PSE1_RGMII1G	0x4bb0
+#define PCI_DEVICE_ID_INTEL_EHL_PSE1_SGMII1G	0x4bb1
+#define PCI_DEVICE_ID_INTEL_EHL_PSE1_SGMII2G5	0x4bb2
+#define PCI_DEVICE_ID_INTEL_TGLH_SGMII1G_0	0x43ac
+#define PCI_DEVICE_ID_INTEL_TGLH_SGMII1G_1	0x43a2
+#define PCI_DEVICE_ID_INTEL_TGL_SGMII1G		0xa0ac
+#define PCI_DEVICE_ID_INTEL_ADLS_SGMII1G_0	0x7aac
+#define PCI_DEVICE_ID_INTEL_ADLS_SGMII1G_1	0x7aad
 
 static const struct pci_device_id intel_eth_pci_id_table[] = {
-	{ PCI_DEVICE_DATA(INTEL, QUARK_ID, &quark_info) },
-	{ PCI_DEVICE_DATA(INTEL, EHL_RGMII1G_ID, &ehl_rgmii1g_info) },
-	{ PCI_DEVICE_DATA(INTEL, EHL_SGMII1G_ID, &ehl_sgmii1g_info) },
-	{ PCI_DEVICE_DATA(INTEL, EHL_SGMII2G5_ID, &ehl_sgmii1g_info) },
-	{ PCI_DEVICE_DATA(INTEL, EHL_PSE0_RGMII1G_ID, &ehl_pse0_rgmii1g_info) },
-	{ PCI_DEVICE_DATA(INTEL, EHL_PSE0_SGMII1G_ID, &ehl_pse0_sgmii1g_info) },
-	{ PCI_DEVICE_DATA(INTEL, EHL_PSE0_SGMII2G5_ID, &ehl_pse0_sgmii1g_info) },
-	{ PCI_DEVICE_DATA(INTEL, EHL_PSE1_RGMII1G_ID, &ehl_pse1_rgmii1g_info) },
-	{ PCI_DEVICE_DATA(INTEL, EHL_PSE1_SGMII1G_ID, &ehl_pse1_sgmii1g_info) },
-	{ PCI_DEVICE_DATA(INTEL, EHL_PSE1_SGMII2G5_ID, &ehl_pse1_sgmii1g_info) },
-	{ PCI_DEVICE_DATA(INTEL, TGL_SGMII1G_ID, &tgl_sgmii1g_phy0_info) },
-	{ PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_0_ID, &tgl_sgmii1g_phy0_info) },
-	{ PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_1_ID, &tgl_sgmii1g_phy1_info) },
-	{ PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_0_ID, &adls_sgmii1g_phy0_info) },
-	{ PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_1_ID, &adls_sgmii1g_phy1_info) },
+	{ PCI_DEVICE_DATA(INTEL, QUARK, &quark_info) },
+	{ PCI_DEVICE_DATA(INTEL, EHL_RGMII1G, &ehl_rgmii1g_info) },
+	{ PCI_DEVICE_DATA(INTEL, EHL_SGMII1G, &ehl_sgmii1g_info) },
+	{ PCI_DEVICE_DATA(INTEL, EHL_SGMII2G5, &ehl_sgmii1g_info) },
+	{ PCI_DEVICE_DATA(INTEL, EHL_PSE0_RGMII1G, &ehl_pse0_rgmii1g_info) },
+	{ PCI_DEVICE_DATA(INTEL, EHL_PSE0_SGMII1G, &ehl_pse0_sgmii1g_info) },
+	{ PCI_DEVICE_DATA(INTEL, EHL_PSE0_SGMII2G5, &ehl_pse0_sgmii1g_info) },
+	{ PCI_DEVICE_DATA(INTEL, EHL_PSE1_RGMII1G, &ehl_pse1_rgmii1g_info) },
+	{ PCI_DEVICE_DATA(INTEL, EHL_PSE1_SGMII1G, &ehl_pse1_sgmii1g_info) },
+	{ PCI_DEVICE_DATA(INTEL, EHL_PSE1_SGMII2G5, &ehl_pse1_sgmii1g_info) },
+	{ PCI_DEVICE_DATA(INTEL, TGL_SGMII1G, &tgl_sgmii1g_phy0_info) },
+	{ PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_0, &tgl_sgmii1g_phy0_info) },
+	{ PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_1, &tgl_sgmii1g_phy1_info) },
+	{ PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_0, &adls_sgmii1g_phy0_info) },
+	{ PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_1, &adls_sgmii1g_phy1_info) },
 	{}
 };
 MODULE_DEVICE_TABLE(pci, intel_eth_pci_id_table);
-- 
cgit v1.2.3


From a460513ed4b6994bfeb7bd86f72853140bc1ac12 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 6 Apr 2021 13:22:51 +0300
Subject: time64.h: Consolidated PSEC_PER_SEC definition

We have currently three users of the PSEC_PER_SEC each of them defining it
individually. Instead, move it to time64.h to be available for everyone.

There is a new user coming with the same constant in use. It will also
make its life easier.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot_ptp.c           | 2 ++
 drivers/phy/phy-core-mipi-dphy.c                 | 2 --
 drivers/phy/rockchip/phy-rockchip-inno-dsidphy.c | 8 ++++----
 include/soc/mscc/ocelot_ptp.h                    | 2 --
 include/vdso/time64.h                            | 1 +
 5 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c
index a33ab315cc6b..87ad2137ba06 100644
--- a/drivers/net/ethernet/mscc/ocelot_ptp.c
+++ b/drivers/net/ethernet/mscc/ocelot_ptp.c
@@ -4,6 +4,8 @@
  * Copyright (c) 2017 Microsemi Corporation
  * Copyright 2020 NXP
  */
+#include <linux/time64.h>
+
 #include <soc/mscc/ocelot_ptp.h>
 #include <soc/mscc/ocelot_sys.h>
 #include <soc/mscc/ocelot.h>
diff --git a/drivers/phy/phy-core-mipi-dphy.c b/drivers/phy/phy-core-mipi-dphy.c
index 14e0551cd319..77fe65367ce5 100644
--- a/drivers/phy/phy-core-mipi-dphy.c
+++ b/drivers/phy/phy-core-mipi-dphy.c
@@ -12,8 +12,6 @@
 #include <linux/phy/phy.h>
 #include <linux/phy/phy-mipi-dphy.h>
 
-#define PSEC_PER_SEC	1000000000000LL
-
 /*
  * Minimum D-PHY timings based on MIPI D-PHY specification. Derived
  * from the valid ranges specified in Section 6.9, Table 14, Page 41
diff --git a/drivers/phy/rockchip/phy-rockchip-inno-dsidphy.c b/drivers/phy/rockchip/phy-rockchip-inno-dsidphy.c
index 8af8c6c5cc02..347dc79a18c1 100644
--- a/drivers/phy/rockchip/phy-rockchip-inno-dsidphy.c
+++ b/drivers/phy/rockchip/phy-rockchip-inno-dsidphy.c
@@ -11,16 +11,16 @@
 #include <linux/clk-provider.h>
 #include <linux/delay.h>
 #include <linux/init.h>
+#include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/reset.h>
+#include <linux/time64.h>
+
 #include <linux/phy/phy.h>
 #include <linux/phy/phy-mipi-dphy.h>
-#include <linux/pm_runtime.h>
-#include <linux/mfd/syscon.h>
-
-#define PSEC_PER_SEC	1000000000000LL
 
 #define UPDATE(x, h, l)	(((x) << (l)) & GENMASK((h), (l)))
 
diff --git a/include/soc/mscc/ocelot_ptp.h b/include/soc/mscc/ocelot_ptp.h
index 6a7388fa7cc5..ded497d72bdb 100644
--- a/include/soc/mscc/ocelot_ptp.h
+++ b/include/soc/mscc/ocelot_ptp.h
@@ -37,8 +37,6 @@ enum {
 
 #define PTP_CFG_MISC_PTP_EN		BIT(2)
 
-#define PSEC_PER_SEC			1000000000000LL
-
 #define PTP_CFG_CLK_ADJ_CFG_ENA		BIT(0)
 #define PTP_CFG_CLK_ADJ_CFG_DIR		BIT(1)
 
diff --git a/include/vdso/time64.h b/include/vdso/time64.h
index 9d43c3f5e89d..b40cfa2aa33c 100644
--- a/include/vdso/time64.h
+++ b/include/vdso/time64.h
@@ -9,6 +9,7 @@
 #define NSEC_PER_MSEC	1000000L
 #define USEC_PER_SEC	1000000L
 #define NSEC_PER_SEC	1000000000L
+#define PSEC_PER_SEC	1000000000000LL
 #define FSEC_PER_SEC	1000000000000000LL
 
 #endif /* __VDSO_TIME64_H */
-- 
cgit v1.2.3


From 631a44ed2560672c27a149134a4d8cb963c07779 Mon Sep 17 00:00:00 2001
From: Peng Zhang <peng.zhang@corigine.com>
Date: Tue, 6 Apr 2021 17:54:52 +0200
Subject: nfp: flower: add support for packet-per-second policing

Allow hardware offload of a policer action attached to a matchall filter
which enforces a packets-per-second rate-limit.

e.g.
tc filter add dev tap1 parent ffff: u32 match \
        u32 0 0 police pkts_rate 3000 pkts_burst 1000

Signed-off-by: Peng Zhang <peng.zhang@corigine.com>
Signed-off-by: Baowen Zheng <baowen.zheng@corigine.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Louis Peens <louis.peens@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/flower/main.h   |   4 +-
 .../net/ethernet/netronome/nfp/flower/qos_conf.c   | 161 +++++++++++++++------
 2 files changed, 119 insertions(+), 46 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index caf12eec9945..e13e26e72ca0 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -47,6 +47,7 @@ struct nfp_app;
 #define NFP_FL_FEATS_PRE_TUN_RULES	BIT(6)
 #define NFP_FL_FEATS_IPV6_TUN		BIT(7)
 #define NFP_FL_FEATS_VLAN_QINQ		BIT(8)
+#define NFP_FL_FEATS_QOS_PPS		BIT(9)
 #define NFP_FL_FEATS_HOST_ACK		BIT(31)
 
 #define NFP_FL_ENABLE_FLOW_MERGE	BIT(0)
@@ -61,7 +62,8 @@ struct nfp_app;
 	NFP_FL_FEATS_FLOW_MOD | \
 	NFP_FL_FEATS_PRE_TUN_RULES | \
 	NFP_FL_FEATS_IPV6_TUN | \
-	NFP_FL_FEATS_VLAN_QINQ)
+	NFP_FL_FEATS_VLAN_QINQ | \
+	NFP_FL_FEATS_QOS_PPS)
 
 struct nfp_fl_mask_id {
 	struct circ_buf mask_id_free_list;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c
index 88bea6ad59bc..784c6dbf8bc4 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c
@@ -10,19 +10,26 @@
 #include "../nfp_port.h"
 
 #define NFP_FL_QOS_UPDATE		msecs_to_jiffies(1000)
+#define NFP_FL_QOS_PPS  BIT(15)
 
 struct nfp_police_cfg_head {
 	__be32 flags_opts;
 	__be32 port;
 };
 
+enum NFP_FL_QOS_TYPES {
+	NFP_FL_QOS_TYPE_BPS,
+	NFP_FL_QOS_TYPE_PPS,
+	NFP_FL_QOS_TYPE_MAX,
+};
+
 /* Police cmsg for configuring a trTCM traffic conditioner (8W/32B)
  * See RFC 2698 for more details.
  * ----------------------------------------------------------------
  *    3                   2                   1
  *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * |                          Flag options                         |
+ * |             Reserved          |p|         Reserved            |
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  * |                          Port Ingress                         |
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -38,6 +45,9 @@ struct nfp_police_cfg_head {
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  * |                    Committed Information Rate                 |
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * Word[0](FLag options):
+ * [15] p(pps) 1 for pps ,0 for bps
+ *
  */
 struct nfp_police_config {
 	struct nfp_police_cfg_head head;
@@ -62,13 +72,18 @@ nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev,
 				struct tc_cls_matchall_offload *flow,
 				struct netlink_ext_ack *extack)
 {
-	struct flow_action_entry *action = &flow->rule->action.entries[0];
+	struct flow_action_entry *paction = &flow->rule->action.entries[0];
+	u32 action_num = flow->rule->action.num_entries;
 	struct nfp_flower_priv *fl_priv = app->priv;
+	struct flow_action_entry *action = NULL;
 	struct nfp_flower_repr_priv *repr_priv;
 	struct nfp_police_config *config;
+	u32 netdev_port_id, i;
 	struct nfp_repr *repr;
 	struct sk_buff *skb;
-	u32 netdev_port_id;
+	bool pps_support;
+	u32 bps_num = 0;
+	u32 pps_num = 0;
 	u32 burst;
 	u64 rate;
 
@@ -78,6 +93,8 @@ nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev,
 	}
 	repr = netdev_priv(netdev);
 	repr_priv = repr->app_priv;
+	netdev_port_id = nfp_repr_get_port_id(netdev);
+	pps_support = !!(fl_priv->flower_ext_feats & NFP_FL_FEATS_QOS_PPS);
 
 	if (repr_priv->block_shared) {
 		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload not supported on shared blocks");
@@ -89,9 +106,18 @@ nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev,
 		return -EOPNOTSUPP;
 	}
 
-	if (!flow_offload_has_one_action(&flow->rule->action)) {
-		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload requires a single action");
-		return -EOPNOTSUPP;
+	if (pps_support) {
+		if (action_num > 2 || action_num == 0) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "unsupported offload: qos rate limit offload only support action number 1 or 2");
+			return -EOPNOTSUPP;
+		}
+	} else {
+		if (!flow_offload_has_one_action(&flow->rule->action)) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "unsupported offload: qos rate limit offload requires a single action");
+			return -EOPNOTSUPP;
+		}
 	}
 
 	if (flow->common.prio != 1) {
@@ -99,36 +125,69 @@ nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev,
 		return -EOPNOTSUPP;
 	}
 
-	if (action->id != FLOW_ACTION_POLICE) {
-		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload requires police action");
-		return -EOPNOTSUPP;
-	}
-
-	if (action->police.rate_pkt_ps) {
-		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload not support packets per second");
-		return -EOPNOTSUPP;
+	for (i = 0 ; i < action_num; i++) {
+		action = paction + i;
+		if (action->id != FLOW_ACTION_POLICE) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "unsupported offload: qos rate limit offload requires police action");
+			return -EOPNOTSUPP;
+		}
+		if (action->police.rate_bytes_ps > 0) {
+			if (bps_num++) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "unsupported offload: qos rate limit offload only support one BPS action");
+				return -EOPNOTSUPP;
+			}
+		}
+		if (action->police.rate_pkt_ps > 0) {
+			if (!pps_support) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "unsupported offload: FW does not support PPS action");
+				return -EOPNOTSUPP;
+			}
+			if (pps_num++) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "unsupported offload: qos rate limit offload only support one PPS action");
+				return -EOPNOTSUPP;
+			}
+		}
 	}
 
-	rate = action->police.rate_bytes_ps;
-	burst = action->police.burst;
-	netdev_port_id = nfp_repr_get_port_id(netdev);
-
-	skb = nfp_flower_cmsg_alloc(repr->app, sizeof(struct nfp_police_config),
-				    NFP_FLOWER_CMSG_TYPE_QOS_MOD, GFP_KERNEL);
-	if (!skb)
-		return -ENOMEM;
-
-	config = nfp_flower_cmsg_get_data(skb);
-	memset(config, 0, sizeof(struct nfp_police_config));
-	config->head.port = cpu_to_be32(netdev_port_id);
-	config->bkt_tkn_p = cpu_to_be32(burst);
-	config->bkt_tkn_c = cpu_to_be32(burst);
-	config->pbs = cpu_to_be32(burst);
-	config->cbs = cpu_to_be32(burst);
-	config->pir = cpu_to_be32(rate);
-	config->cir = cpu_to_be32(rate);
-	nfp_ctrl_tx(repr->app->ctrl, skb);
+	for (i = 0 ; i < action_num; i++) {
+		/* Set QoS data for this interface */
+		action = paction + i;
+		if (action->police.rate_bytes_ps > 0) {
+			rate = action->police.rate_bytes_ps;
+			burst = action->police.burst;
+		} else if (action->police.rate_pkt_ps > 0) {
+			rate = action->police.rate_pkt_ps;
+			burst = action->police.burst_pkt;
+		} else {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "unsupported offload: qos rate limit is not BPS or PPS");
+			continue;
+		}
 
+		if (rate != 0) {
+			skb = nfp_flower_cmsg_alloc(repr->app, sizeof(struct nfp_police_config),
+						    NFP_FLOWER_CMSG_TYPE_QOS_MOD, GFP_KERNEL);
+			if (!skb)
+				return -ENOMEM;
+
+			config = nfp_flower_cmsg_get_data(skb);
+			memset(config, 0, sizeof(struct nfp_police_config));
+			if (action->police.rate_pkt_ps > 0)
+				config->head.flags_opts = cpu_to_be32(NFP_FL_QOS_PPS);
+			config->head.port = cpu_to_be32(netdev_port_id);
+			config->bkt_tkn_p = cpu_to_be32(burst);
+			config->bkt_tkn_c = cpu_to_be32(burst);
+			config->pbs = cpu_to_be32(burst);
+			config->cbs = cpu_to_be32(burst);
+			config->pir = cpu_to_be32(rate);
+			config->cir = cpu_to_be32(rate);
+			nfp_ctrl_tx(repr->app->ctrl, skb);
+		}
+	}
 	repr_priv->qos_table.netdev_port_id = netdev_port_id;
 	fl_priv->qos_rate_limiters++;
 	if (fl_priv->qos_rate_limiters == 1)
@@ -146,9 +205,10 @@ nfp_flower_remove_rate_limiter(struct nfp_app *app, struct net_device *netdev,
 	struct nfp_flower_priv *fl_priv = app->priv;
 	struct nfp_flower_repr_priv *repr_priv;
 	struct nfp_police_config *config;
+	u32 netdev_port_id, i;
 	struct nfp_repr *repr;
 	struct sk_buff *skb;
-	u32 netdev_port_id;
+	bool pps_support;
 
 	if (!nfp_netdev_is_nfp_repr(netdev)) {
 		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload not supported on higher level port");
@@ -158,27 +218,38 @@ nfp_flower_remove_rate_limiter(struct nfp_app *app, struct net_device *netdev,
 
 	netdev_port_id = nfp_repr_get_port_id(netdev);
 	repr_priv = repr->app_priv;
+	pps_support = !!(fl_priv->flower_ext_feats & NFP_FL_FEATS_QOS_PPS);
 
 	if (!repr_priv->qos_table.netdev_port_id) {
 		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot remove qos entry that does not exist");
 		return -EOPNOTSUPP;
 	}
 
-	skb = nfp_flower_cmsg_alloc(repr->app, sizeof(struct nfp_police_config),
-				    NFP_FLOWER_CMSG_TYPE_QOS_DEL, GFP_KERNEL);
-	if (!skb)
-		return -ENOMEM;
-
-	/* Clear all qos associate data for this interface */
 	memset(&repr_priv->qos_table, 0, sizeof(struct nfp_fl_qos));
 	fl_priv->qos_rate_limiters--;
 	if (!fl_priv->qos_rate_limiters)
 		cancel_delayed_work_sync(&fl_priv->qos_stats_work);
-
-	config = nfp_flower_cmsg_get_data(skb);
-	memset(config, 0, sizeof(struct nfp_police_config));
-	config->head.port = cpu_to_be32(netdev_port_id);
-	nfp_ctrl_tx(repr->app->ctrl, skb);
+	for (i = 0 ; i < NFP_FL_QOS_TYPE_MAX; i++) {
+		if (i == NFP_FL_QOS_TYPE_PPS && !pps_support)
+			break;
+		/* 0:bps 1:pps
+		 * Clear QoS data for this interface.
+		 * There is no need to check if a specific QOS_TYPE was
+		 * configured as the firmware handles clearing a QoS entry
+		 * safely, even if it wasn't explicitly added.
+		 */
+		skb = nfp_flower_cmsg_alloc(repr->app, sizeof(struct nfp_police_config),
+					    NFP_FLOWER_CMSG_TYPE_QOS_DEL, GFP_KERNEL);
+		if (!skb)
+			return -ENOMEM;
+
+		config = nfp_flower_cmsg_get_data(skb);
+		memset(config, 0, sizeof(struct nfp_police_config));
+		if (i == NFP_FL_QOS_TYPE_PPS)
+			config->head.flags_opts = cpu_to_be32(NFP_FL_QOS_PPS);
+		config->head.port = cpu_to_be32(netdev_port_id);
+		nfp_ctrl_tx(repr->app->ctrl, skb);
+	}
 
 	return 0;
 }
-- 
cgit v1.2.3


From 0b35e0deb5bee7d4882356d6663522c1562a8321 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 6 Apr 2021 15:59:31 -0700
Subject: docs: ethtool: correct quotes

Quotes to backticks. All commands use backticks since the names
are constants.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ethtool-netlink.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index 4bdb4298f178..fd84f4ed898a 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -1433,7 +1433,7 @@ are netlink only.
   ``ETHTOOL_PHY_STUNABLE``            n/a
   ``ETHTOOL_GFECPARAM``               ``ETHTOOL_MSG_FEC_GET``
   ``ETHTOOL_SFECPARAM``               ``ETHTOOL_MSG_FEC_SET``
-  n/a                                 ''ETHTOOL_MSG_CABLE_TEST_ACT''
-  n/a                                 ''ETHTOOL_MSG_CABLE_TEST_TDR_ACT''
+  n/a                                 ``ETHTOOL_MSG_CABLE_TEST_ACT``
+  n/a                                 ``ETHTOOL_MSG_CABLE_TEST_TDR_ACT``
   n/a                                 ``ETHTOOL_MSG_TUNNEL_INFO_GET``
   =================================== =====================================
-- 
cgit v1.2.3


From 4c7f40287aa5eb6ea8f3a6c3225ab5ebc6a45415 Mon Sep 17 00:00:00 2001
From: Chris Mi <cmi@nvidia.com>
Date: Mon, 31 Aug 2020 13:22:20 +0800
Subject: net/mlx5: E-switch, Move vport table functions to a new file

Currently, the vport table functions are in common eswitch offload
file. This file is too big. Move the vport table create, delete and
lookup functions to a separate file. Put the file in esw directory.

Pre-step for generalizing its functionality for serving both the
mirroring and the sample features.

Signed-off-by: Chris Mi <cmi@nvidia.com>
Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
 .../net/ethernet/mellanox/mlx5/core/esw/vporttbl.c | 136 +++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  12 +-
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 215 ++++-----------------
 4 files changed, 183 insertions(+), 182 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 9cf7de72df52..4c86b68ad26c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -53,7 +53,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o eswitch_offlo
 mlx5_core-$(CONFIG_MLX5_ESWITCH)   += esw/acl/helper.o \
 				      esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \
 				      esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o \
-				      esw/devlink_port.o
+				      esw/devlink_port.o esw/vporttbl.o
 
 mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
 mlx5_core-$(CONFIG_VXLAN)          += lib/vxlan.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c
new file mode 100644
index 000000000000..8219c5d50db0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021 Mellanox Technologies.
+
+#include "eswitch.h"
+
+#define MLX5_ESW_VPORT_TABLE_SIZE 128
+#define MLX5_ESW_VPORT_TBL_NUM_GROUPS  4
+
+/* This struct is used as a key to the hash table and we need it to be packed
+ * so hash result is consistent
+ */
+struct mlx5_vport_key {
+	u32 chain;
+	u16 prio;
+	u16 vport;
+	u16 vhca_id;
+} __packed;
+
+struct mlx5_vport_table {
+	struct hlist_node hlist;
+	struct mlx5_flow_table *fdb;
+	u32 num_rules;
+	struct mlx5_vport_key key;
+};
+
+static struct mlx5_flow_table *
+esw_vport_tbl_create(struct mlx5_eswitch *esw, struct mlx5_flow_namespace *ns)
+{
+	struct mlx5_flow_table_attr ft_attr = {};
+	struct mlx5_flow_table *fdb;
+
+	ft_attr.autogroup.max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS;
+	ft_attr.max_fte = MLX5_ESW_VPORT_TABLE_SIZE;
+	ft_attr.prio = FDB_PER_VPORT;
+	fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
+	if (IS_ERR(fdb)) {
+		esw_warn(esw->dev, "Failed to create per vport FDB Table err %ld\n",
+			 PTR_ERR(fdb));
+	}
+
+	return fdb;
+}
+
+static u32 flow_attr_to_vport_key(struct mlx5_eswitch *esw,
+				  struct mlx5_vport_tbl_attr *attr,
+				  struct mlx5_vport_key *key)
+{
+	key->vport = attr->vport;
+	key->chain = attr->chain;
+	key->prio = attr->prio;
+	key->vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
+	return jhash(key, sizeof(*key), 0);
+}
+
+/* caller must hold vports.lock */
+static struct mlx5_vport_table *
+esw_vport_tbl_lookup(struct mlx5_eswitch *esw, struct mlx5_vport_key *skey, u32 key)
+{
+	struct mlx5_vport_table *e;
+
+	hash_for_each_possible(esw->fdb_table.offloads.vports.table, e, hlist, key)
+		if (!memcmp(&e->key, skey, sizeof(*skey)))
+			return e;
+
+	return NULL;
+}
+
+struct mlx5_flow_table *
+esw_vport_tbl_get(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr)
+{
+	struct mlx5_core_dev *dev = esw->dev;
+	struct mlx5_flow_namespace *ns;
+	struct mlx5_flow_table *fdb;
+	struct mlx5_vport_table *e;
+	struct mlx5_vport_key skey;
+	u32 hkey;
+
+	mutex_lock(&esw->fdb_table.offloads.vports.lock);
+	hkey = flow_attr_to_vport_key(esw, attr, &skey);
+	e = esw_vport_tbl_lookup(esw, &skey, hkey);
+	if (e) {
+		e->num_rules++;
+		goto out;
+	}
+
+	e = kzalloc(sizeof(*e), GFP_KERNEL);
+	if (!e) {
+		fdb = ERR_PTR(-ENOMEM);
+		goto err_alloc;
+	}
+
+	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+	if (!ns) {
+		esw_warn(dev, "Failed to get FDB namespace\n");
+		fdb = ERR_PTR(-ENOENT);
+		goto err_ns;
+	}
+
+	fdb = esw_vport_tbl_create(esw, ns);
+	if (IS_ERR(fdb))
+		goto err_ns;
+
+	e->fdb = fdb;
+	e->num_rules = 1;
+	e->key = skey;
+	hash_add(esw->fdb_table.offloads.vports.table, &e->hlist, hkey);
+out:
+	mutex_unlock(&esw->fdb_table.offloads.vports.lock);
+	return e->fdb;
+
+err_ns:
+	kfree(e);
+err_alloc:
+	mutex_unlock(&esw->fdb_table.offloads.vports.lock);
+	return fdb;
+}
+
+void
+esw_vport_tbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr)
+{
+	struct mlx5_vport_table *e;
+	struct mlx5_vport_key key;
+	u32 hkey;
+
+	mutex_lock(&esw->fdb_table.offloads.vports.lock);
+	hkey = flow_attr_to_vport_key(esw, attr, &key);
+	e = esw_vport_tbl_lookup(esw, &key, hkey);
+	if (!e || --e->num_rules)
+		goto out;
+
+	hash_del(&e->hlist);
+	mlx5_destroy_flow_table(e->fdb);
+	kfree(e);
+out:
+	mutex_unlock(&esw->fdb_table.offloads.vports.lock);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 64db903068c1..70eeb8d1ae03 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -713,8 +713,16 @@ void
 esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw,
 				      struct mlx5_vport *vport);
 
-int mlx5_esw_vport_tbl_get(struct mlx5_eswitch *esw);
-void mlx5_esw_vport_tbl_put(struct mlx5_eswitch *esw);
+struct mlx5_vport_tbl_attr {
+	u16 chain;
+	u16 prio;
+	u16 vport;
+};
+
+struct mlx5_flow_table *
+esw_vport_tbl_get(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr);
+void
+esw_vport_tbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr);
 
 struct mlx5_flow_handle *
 esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index d5de6bf622ce..3caf6c0b3296 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -54,185 +54,6 @@
 #define MLX5_ESW_MISS_FLOWS (2)
 #define UPLINK_REP_INDEX 0
 
-/* Per vport tables */
-
-#define MLX5_ESW_VPORT_TABLE_SIZE 128
-
-/* This struct is used as a key to the hash table and we need it to be packed
- * so hash result is consistent
- */
-struct mlx5_vport_key {
-	u32 chain;
-	u16 prio;
-	u16 vport;
-	u16 vhca_id;
-} __packed;
-
-struct mlx5_vport_tbl_attr {
-	u16 chain;
-	u16 prio;
-	u16 vport;
-};
-
-struct mlx5_vport_table {
-	struct hlist_node hlist;
-	struct mlx5_flow_table *fdb;
-	u32 num_rules;
-	struct mlx5_vport_key key;
-};
-
-#define MLX5_ESW_VPORT_TBL_NUM_GROUPS  4
-
-static struct mlx5_flow_table *
-esw_vport_tbl_create(struct mlx5_eswitch *esw, struct mlx5_flow_namespace *ns)
-{
-	struct mlx5_flow_table_attr ft_attr = {};
-	struct mlx5_flow_table *fdb;
-
-	ft_attr.autogroup.max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS;
-	ft_attr.max_fte = MLX5_ESW_VPORT_TABLE_SIZE;
-	ft_attr.prio = FDB_PER_VPORT;
-	fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
-	if (IS_ERR(fdb)) {
-		esw_warn(esw->dev, "Failed to create per vport FDB Table err %ld\n",
-			 PTR_ERR(fdb));
-	}
-
-	return fdb;
-}
-
-static u32 flow_attr_to_vport_key(struct mlx5_eswitch *esw,
-				  struct mlx5_vport_tbl_attr *attr,
-				  struct mlx5_vport_key *key)
-{
-	key->vport = attr->vport;
-	key->chain = attr->chain;
-	key->prio = attr->prio;
-	key->vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
-	return jhash(key, sizeof(*key), 0);
-}
-
-/* caller must hold vports.lock */
-static struct mlx5_vport_table *
-esw_vport_tbl_lookup(struct mlx5_eswitch *esw, struct mlx5_vport_key *skey, u32 key)
-{
-	struct mlx5_vport_table *e;
-
-	hash_for_each_possible(esw->fdb_table.offloads.vports.table, e, hlist, key)
-		if (!memcmp(&e->key, skey, sizeof(*skey)))
-			return e;
-
-	return NULL;
-}
-
-static void
-esw_vport_tbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr)
-{
-	struct mlx5_vport_table *e;
-	struct mlx5_vport_key key;
-	u32 hkey;
-
-	mutex_lock(&esw->fdb_table.offloads.vports.lock);
-	hkey = flow_attr_to_vport_key(esw, attr, &key);
-	e = esw_vport_tbl_lookup(esw, &key, hkey);
-	if (!e || --e->num_rules)
-		goto out;
-
-	hash_del(&e->hlist);
-	mlx5_destroy_flow_table(e->fdb);
-	kfree(e);
-out:
-	mutex_unlock(&esw->fdb_table.offloads.vports.lock);
-}
-
-static struct mlx5_flow_table *
-esw_vport_tbl_get(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr)
-{
-	struct mlx5_core_dev *dev = esw->dev;
-	struct mlx5_flow_namespace *ns;
-	struct mlx5_flow_table *fdb;
-	struct mlx5_vport_table *e;
-	struct mlx5_vport_key skey;
-	u32 hkey;
-
-	mutex_lock(&esw->fdb_table.offloads.vports.lock);
-	hkey = flow_attr_to_vport_key(esw, attr, &skey);
-	e = esw_vport_tbl_lookup(esw, &skey, hkey);
-	if (e) {
-		e->num_rules++;
-		goto out;
-	}
-
-	e = kzalloc(sizeof(*e), GFP_KERNEL);
-	if (!e) {
-		fdb = ERR_PTR(-ENOMEM);
-		goto err_alloc;
-	}
-
-	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
-	if (!ns) {
-		esw_warn(dev, "Failed to get FDB namespace\n");
-		fdb = ERR_PTR(-ENOENT);
-		goto err_ns;
-	}
-
-	fdb = esw_vport_tbl_create(esw, ns);
-	if (IS_ERR(fdb))
-		goto err_ns;
-
-	e->fdb = fdb;
-	e->num_rules = 1;
-	e->key = skey;
-	hash_add(esw->fdb_table.offloads.vports.table, &e->hlist, hkey);
-out:
-	mutex_unlock(&esw->fdb_table.offloads.vports.lock);
-	return e->fdb;
-
-err_ns:
-	kfree(e);
-err_alloc:
-	mutex_unlock(&esw->fdb_table.offloads.vports.lock);
-	return fdb;
-}
-
-int mlx5_esw_vport_tbl_get(struct mlx5_eswitch *esw)
-{
-	struct mlx5_vport_tbl_attr attr;
-	struct mlx5_flow_table *fdb;
-	struct mlx5_vport *vport;
-	int i;
-
-	attr.chain = 0;
-	attr.prio = 1;
-	mlx5_esw_for_all_vports(esw, i, vport) {
-		attr.vport = vport->vport;
-		fdb = esw_vport_tbl_get(esw, &attr);
-		if (IS_ERR(fdb))
-			goto out;
-	}
-	return 0;
-
-out:
-	mlx5_esw_vport_tbl_put(esw);
-	return PTR_ERR(fdb);
-}
-
-void mlx5_esw_vport_tbl_put(struct mlx5_eswitch *esw)
-{
-	struct mlx5_vport_tbl_attr attr;
-	struct mlx5_vport *vport;
-	int i;
-
-	attr.chain = 0;
-	attr.prio = 1;
-	mlx5_esw_for_all_vports(esw, i, vport) {
-		attr.vport = vport->vport;
-		esw_vport_tbl_put(esw, &attr);
-	}
-}
-
-/* End: Per vport tables */
-
 static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw,
 						     u16 vport_num)
 {
@@ -1514,6 +1335,42 @@ static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw,
 }
 
 #if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+static void mlx5_esw_vport_tbl_put(struct mlx5_eswitch *esw)
+{
+	struct mlx5_vport_tbl_attr attr;
+	struct mlx5_vport *vport;
+	int i;
+
+	attr.chain = 0;
+	attr.prio = 1;
+	mlx5_esw_for_all_vports(esw, i, vport) {
+		attr.vport = vport->vport;
+		esw_vport_tbl_put(esw, &attr);
+	}
+}
+
+static int mlx5_esw_vport_tbl_get(struct mlx5_eswitch *esw)
+{
+	struct mlx5_vport_tbl_attr attr;
+	struct mlx5_flow_table *fdb;
+	struct mlx5_vport *vport;
+	int i;
+
+	attr.chain = 0;
+	attr.prio = 1;
+	mlx5_esw_for_all_vports(esw, i, vport) {
+		attr.vport = vport->vport;
+		fdb = esw_vport_tbl_get(esw, &attr);
+		if (IS_ERR(fdb))
+			goto out;
+	}
+	return 0;
+
+out:
+	mlx5_esw_vport_tbl_put(esw);
+	return PTR_ERR(fdb);
+}
+
 #define fdb_modify_header_fwd_to_table_supported(esw) \
 	(MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table))
 static void esw_init_chains_offload_flags(struct mlx5_eswitch *esw, u32 *flags)
-- 
cgit v1.2.3


From 0a9e2307870bdb627421b4b0bbac04b1ab8e1688 Mon Sep 17 00:00:00 2001
From: Chris Mi <cmi@nvidia.com>
Date: Thu, 14 Jan 2021 15:12:36 +0800
Subject: net/mlx5: E-switch, Rename functions to follow naming convention.

Public api starts with mlx5 and remove mlx5 for non-public api.

Signed-off-by: Chris Mi <cmi@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/esw/vporttbl.c |  4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  4 ++--
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 26 +++++++++++-----------
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c
index 8219c5d50db0..6c4246181615 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c
@@ -66,7 +66,7 @@ esw_vport_tbl_lookup(struct mlx5_eswitch *esw, struct mlx5_vport_key *skey, u32
 }
 
 struct mlx5_flow_table *
-esw_vport_tbl_get(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr)
+mlx5_esw_vporttbl_get(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr)
 {
 	struct mlx5_core_dev *dev = esw->dev;
 	struct mlx5_flow_namespace *ns;
@@ -116,7 +116,7 @@ err_alloc:
 }
 
 void
-esw_vport_tbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr)
+mlx5_esw_vporttbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr)
 {
 	struct mlx5_vport_table *e;
 	struct mlx5_vport_key key;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 70eeb8d1ae03..b7d1f8854ef4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -720,9 +720,9 @@ struct mlx5_vport_tbl_attr {
 };
 
 struct mlx5_flow_table *
-esw_vport_tbl_get(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr);
+mlx5_esw_vporttbl_get(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr);
 void
-esw_vport_tbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr);
+mlx5_esw_vporttbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr);
 
 struct mlx5_flow_handle *
 esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 3caf6c0b3296..63e22e9e5ad1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -483,7 +483,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 		fwd_attr.prio = attr->prio;
 		fwd_attr.vport = esw_attr->in_rep->vport;
 
-		fdb = esw_vport_tbl_get(esw, &fwd_attr);
+		fdb = mlx5_esw_vporttbl_get(esw, &fwd_attr);
 	} else {
 		if (attr->chain || attr->prio)
 			fdb = mlx5_chains_get_table(chains, attr->chain,
@@ -515,7 +515,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 
 err_add_rule:
 	if (split)
-		esw_vport_tbl_put(esw, &fwd_attr);
+		mlx5_esw_vporttbl_put(esw, &fwd_attr);
 	else if (attr->chain || attr->prio)
 		mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
 err_esw_get:
@@ -548,7 +548,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
 	fwd_attr.chain = attr->chain;
 	fwd_attr.prio = attr->prio;
 	fwd_attr.vport = esw_attr->in_rep->vport;
-	fwd_fdb = esw_vport_tbl_get(esw, &fwd_attr);
+	fwd_fdb = mlx5_esw_vporttbl_get(esw, &fwd_attr);
 	if (IS_ERR(fwd_fdb)) {
 		rule = ERR_CAST(fwd_fdb);
 		goto err_get_fwd;
@@ -593,7 +593,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
 	return rule;
 err_chain_src_rewrite:
 	esw_put_dest_tables_loop(esw, attr, 0, i);
-	esw_vport_tbl_put(esw, &fwd_attr);
+	mlx5_esw_vporttbl_put(esw, &fwd_attr);
 err_get_fwd:
 	mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
 err_get_fast:
@@ -631,12 +631,12 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
 	}
 
 	if (fwd_rule)  {
-		esw_vport_tbl_put(esw, &fwd_attr);
+		mlx5_esw_vporttbl_put(esw, &fwd_attr);
 		mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
 		esw_put_dest_tables_loop(esw, attr, 0, esw_attr->split_count);
 	} else {
 		if (split)
-			esw_vport_tbl_put(esw, &fwd_attr);
+			mlx5_esw_vporttbl_put(esw, &fwd_attr);
 		else if (attr->chain || attr->prio)
 			mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
 		esw_cleanup_dests(esw, attr);
@@ -1335,7 +1335,7 @@ static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw,
 }
 
 #if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
-static void mlx5_esw_vport_tbl_put(struct mlx5_eswitch *esw)
+static void esw_vport_tbl_put(struct mlx5_eswitch *esw)
 {
 	struct mlx5_vport_tbl_attr attr;
 	struct mlx5_vport *vport;
@@ -1345,11 +1345,11 @@ static void mlx5_esw_vport_tbl_put(struct mlx5_eswitch *esw)
 	attr.prio = 1;
 	mlx5_esw_for_all_vports(esw, i, vport) {
 		attr.vport = vport->vport;
-		esw_vport_tbl_put(esw, &attr);
+		mlx5_esw_vporttbl_put(esw, &attr);
 	}
 }
 
-static int mlx5_esw_vport_tbl_get(struct mlx5_eswitch *esw)
+static int esw_vport_tbl_get(struct mlx5_eswitch *esw)
 {
 	struct mlx5_vport_tbl_attr attr;
 	struct mlx5_flow_table *fdb;
@@ -1360,14 +1360,14 @@ static int mlx5_esw_vport_tbl_get(struct mlx5_eswitch *esw)
 	attr.prio = 1;
 	mlx5_esw_for_all_vports(esw, i, vport) {
 		attr.vport = vport->vport;
-		fdb = esw_vport_tbl_get(esw, &attr);
+		fdb = mlx5_esw_vporttbl_get(esw, &attr);
 		if (IS_ERR(fdb))
 			goto out;
 	}
 	return 0;
 
 out:
-	mlx5_esw_vport_tbl_put(esw);
+	esw_vport_tbl_put(esw);
 	return PTR_ERR(fdb);
 }
 
@@ -1448,7 +1448,7 @@ esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb)
 
 	/* Open level 1 for split fdb rules now if prios isn't supported  */
 	if (!mlx5_chains_prios_supported(chains)) {
-		err = mlx5_esw_vport_tbl_get(esw);
+		err = esw_vport_tbl_get(esw);
 		if (err)
 			goto level_1_err;
 	}
@@ -1472,7 +1472,7 @@ static void
 esw_chains_destroy(struct mlx5_eswitch *esw, struct mlx5_fs_chains *chains)
 {
 	if (!mlx5_chains_prios_supported(chains))
-		mlx5_esw_vport_tbl_put(esw);
+		esw_vport_tbl_put(esw);
 	mlx5_chains_put_table(chains, 0, 1, 0);
 	mlx5_chains_put_table(chains, mlx5_chains_get_nf_ft_chain(chains), 1, 0);
 	mlx5_chains_destroy(chains);
-- 
cgit v1.2.3


From c796bb7cd2307d5fb815e919b964bf2d65c95ec3 Mon Sep 17 00:00:00 2001
From: Chris Mi <cmi@nvidia.com>
Date: Mon, 31 Aug 2020 13:23:25 +0800
Subject: net/mlx5: E-switch, Generalize per vport table API

Currently, per vport table was used only for port mirroring actions.
However, sample action will also require a per vport table instance.

Generalize the vport table API to work with multiple namespaces where
each namespace manages its own vport table instance.

Signed-off-by: Chris Mi <cmi@nvidia.com>
Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c    | 15 ++++++++-------
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h         |  7 +++++++
 .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c    | 14 ++++++++++++++
 3 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c
index 6c4246181615..abba1b801048 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c
@@ -3,9 +3,6 @@
 
 #include "eswitch.h"
 
-#define MLX5_ESW_VPORT_TABLE_SIZE 128
-#define MLX5_ESW_VPORT_TBL_NUM_GROUPS  4
-
 /* This struct is used as a key to the hash table and we need it to be packed
  * so hash result is consistent
  */
@@ -14,6 +11,7 @@ struct mlx5_vport_key {
 	u16 prio;
 	u16 vport;
 	u16 vhca_id;
+	const struct esw_vport_tbl_namespace *vport_ns;
 } __packed;
 
 struct mlx5_vport_table {
@@ -24,14 +22,16 @@ struct mlx5_vport_table {
 };
 
 static struct mlx5_flow_table *
-esw_vport_tbl_create(struct mlx5_eswitch *esw, struct mlx5_flow_namespace *ns)
+esw_vport_tbl_create(struct mlx5_eswitch *esw, struct mlx5_flow_namespace *ns,
+		     const struct esw_vport_tbl_namespace *vport_ns)
 {
 	struct mlx5_flow_table_attr ft_attr = {};
 	struct mlx5_flow_table *fdb;
 
-	ft_attr.autogroup.max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS;
-	ft_attr.max_fte = MLX5_ESW_VPORT_TABLE_SIZE;
+	ft_attr.autogroup.max_num_groups = vport_ns->max_num_groups;
+	ft_attr.max_fte = vport_ns->max_fte;
 	ft_attr.prio = FDB_PER_VPORT;
+	ft_attr.flags = vport_ns->flags;
 	fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
 	if (IS_ERR(fdb)) {
 		esw_warn(esw->dev, "Failed to create per vport FDB Table err %ld\n",
@@ -49,6 +49,7 @@ static u32 flow_attr_to_vport_key(struct mlx5_eswitch *esw,
 	key->chain = attr->chain;
 	key->prio = attr->prio;
 	key->vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
+	key->vport_ns  = attr->vport_ns;
 	return jhash(key, sizeof(*key), 0);
 }
 
@@ -96,7 +97,7 @@ mlx5_esw_vporttbl_get(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr
 		goto err_ns;
 	}
 
-	fdb = esw_vport_tbl_create(esw, ns);
+	fdb = esw_vport_tbl_create(esw, ns, attr->vport_ns);
 	if (IS_ERR(fdb))
 		goto err_ns;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index b7d1f8854ef4..e0415676821a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -713,10 +713,17 @@ void
 esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw,
 				      struct mlx5_vport *vport);
 
+struct esw_vport_tbl_namespace {
+	int max_fte;
+	int max_num_groups;
+	u32 flags;
+};
+
 struct mlx5_vport_tbl_attr {
 	u16 chain;
 	u16 prio;
 	u16 vport;
+	const struct esw_vport_tbl_namespace *vport_ns;
 };
 
 struct mlx5_flow_table *
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 63e22e9e5ad1..8ac4b60ea225 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -54,6 +54,15 @@
 #define MLX5_ESW_MISS_FLOWS (2)
 #define UPLINK_REP_INDEX 0
 
+#define MLX5_ESW_VPORT_TBL_SIZE 128
+#define MLX5_ESW_VPORT_TBL_NUM_GROUPS  4
+
+static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = {
+	.max_fte = MLX5_ESW_VPORT_TBL_SIZE,
+	.max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS,
+	.flags = 0,
+};
+
 static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw,
 						     u16 vport_num)
 {
@@ -482,6 +491,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 		fwd_attr.chain = attr->chain;
 		fwd_attr.prio = attr->prio;
 		fwd_attr.vport = esw_attr->in_rep->vport;
+		fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
 
 		fdb = mlx5_esw_vporttbl_get(esw, &fwd_attr);
 	} else {
@@ -548,6 +558,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
 	fwd_attr.chain = attr->chain;
 	fwd_attr.prio = attr->prio;
 	fwd_attr.vport = esw_attr->in_rep->vport;
+	fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
 	fwd_fdb = mlx5_esw_vporttbl_get(esw, &fwd_attr);
 	if (IS_ERR(fwd_fdb)) {
 		rule = ERR_CAST(fwd_fdb);
@@ -628,6 +639,7 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
 		fwd_attr.chain = attr->chain;
 		fwd_attr.prio = attr->prio;
 		fwd_attr.vport = esw_attr->in_rep->vport;
+		fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
 	}
 
 	if (fwd_rule)  {
@@ -1345,6 +1357,7 @@ static void esw_vport_tbl_put(struct mlx5_eswitch *esw)
 	attr.prio = 1;
 	mlx5_esw_for_all_vports(esw, i, vport) {
 		attr.vport = vport->vport;
+		attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
 		mlx5_esw_vporttbl_put(esw, &attr);
 	}
 }
@@ -1360,6 +1373,7 @@ static int esw_vport_tbl_get(struct mlx5_eswitch *esw)
 	attr.prio = 1;
 	mlx5_esw_for_all_vports(esw, i, vport) {
 		attr.vport = vport->vport;
+		attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
 		fdb = mlx5_esw_vporttbl_get(esw, &attr);
 		if (IS_ERR(fdb))
 			goto out;
-- 
cgit v1.2.3


From c1904360dde8fb4f79bec4751c7540df10d57ec2 Mon Sep 17 00:00:00 2001
From: Chris Mi <cmi@nvidia.com>
Date: Fri, 9 Oct 2020 11:06:33 +0800
Subject: net/mlx5: E-switch, Set per vport table default group number

Different per voprt table is created using a different per vport table
namespace. Because we can't use variable to set the namespace member
value.  If max group number is 0 in the namespace, use the eswitch
default max group number.

Signed-off-by: Chris Mi <cmi@nvidia.com>
Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c
index abba1b801048..9e72118f2e4c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c
@@ -28,7 +28,10 @@ esw_vport_tbl_create(struct mlx5_eswitch *esw, struct mlx5_flow_namespace *ns,
 	struct mlx5_flow_table_attr ft_attr = {};
 	struct mlx5_flow_table *fdb;
 
-	ft_attr.autogroup.max_num_groups = vport_ns->max_num_groups;
+	if (vport_ns->max_num_groups)
+		ft_attr.autogroup.max_num_groups = vport_ns->max_num_groups;
+	else
+		ft_attr.autogroup.max_num_groups = esw->params.large_group_num;
 	ft_attr.max_fte = vport_ns->max_fte;
 	ft_attr.prio = FDB_PER_VPORT;
 	ft_attr.flags = vport_ns->flags;
-- 
cgit v1.2.3


From a91d98a0a2b8e4c433b7341708f7d706e0cf1c8e Mon Sep 17 00:00:00 2001
From: Chris Mi <cmi@nvidia.com>
Date: Thu, 10 Sep 2020 15:28:02 +0800
Subject: net/mlx5: Map register values to restore objects

Currently reg_c0 lower 16 bits and reg_b are used to store the chain
id that missed in FDB and NIC tables accordingly. However, the
registers' values may index a restore object, rather than a single u32
value. Different object types can be used to restore mutually exclusive
contexts such as chain id and sample group id.

Use the mapping object to associate an index with a restore object
as a prestep for supporting additional restore types.

Signed-off-by: Chris Mi <cmi@nvidia.com>
Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/en/rep/tc.c    | 38 +++++++++++++---------
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    |  9 +++--
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  | 13 ++++++--
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 14 +++-----
 .../ethernet/mellanox/mlx5/core/lib/fs_chains.c    | 20 ++++++++----
 .../ethernet/mellanox/mlx5/core/lib/fs_chains.h    |  3 +-
 include/linux/mlx5/eswitch.h                       |  9 +++--
 7 files changed, 63 insertions(+), 43 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index 11a44d30adc7..dde83cba85c3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -618,9 +618,10 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
 			     struct mlx5e_tc_update_priv *tc_priv)
 {
 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
-	u32 chain = 0, reg_c0, reg_c1, tunnel_id, zone_restore_id;
+	u32 reg_c0, reg_c1, tunnel_id, zone_restore_id;
 	struct mlx5_rep_uplink_priv *uplink_priv;
 	struct mlx5e_rep_priv *uplink_rpriv;
+	struct mlx5_mapped_obj mapped_obj;
 	struct tc_skb_ext *tc_skb_ext;
 	struct mlx5_eswitch *esw;
 	struct mlx5e_priv *priv;
@@ -640,30 +641,35 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
 	priv = netdev_priv(skb->dev);
 	esw = priv->mdev->priv.eswitch;
 
-	err = mlx5_get_chain_for_tag(esw_chains(esw), reg_c0, &chain);
+	err = mlx5_get_mapped_object(esw_chains(esw), reg_c0, &mapped_obj);
 	if (err) {
 		netdev_dbg(priv->netdev,
-			   "Couldn't find chain for chain tag: %d, err: %d\n",
+			   "Couldn't find mapped object for reg_c0: %d, err: %d\n",
 			   reg_c0, err);
 		return false;
 	}
 
-	if (chain) {
-		tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
-		if (!tc_skb_ext) {
-			WARN_ON(1);
-			return false;
-		}
+	if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
+		if (mapped_obj.chain) {
+			tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
+			if (!tc_skb_ext) {
+				WARN_ON(1);
+				return false;
+			}
 
-		tc_skb_ext->chain = chain;
+			tc_skb_ext->chain = mapped_obj.chain;
 
-		zone_restore_id = reg_c1 & ESW_ZONE_ID_MASK;
+			zone_restore_id = reg_c1 & ESW_ZONE_ID_MASK;
 
-		uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
-		uplink_priv = &uplink_rpriv->uplink_priv;
-		if (!mlx5e_tc_ct_restore_flow(uplink_priv->ct_priv, skb,
-					      zone_restore_id))
-			return false;
+			uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+			uplink_priv = &uplink_rpriv->uplink_priv;
+			if (!mlx5e_tc_ct_restore_flow(uplink_priv->ct_priv, skb,
+						      zone_restore_id))
+				return false;
+		}
+	} else {
+		netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type);
+		return false;
 	}
 
 	tunnel_id = reg_c1 >> ESW_TUN_OFFSET;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index bb1e0d442b5c..9b5607ddb9a2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -4973,6 +4973,7 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
 	u32 chain = 0, chain_tag, reg_b, zone_restore_id;
 	struct mlx5e_priv *priv = netdev_priv(skb->dev);
 	struct mlx5e_tc_table *tc = &priv->fs.tc;
+	struct mlx5_mapped_obj mapped_obj;
 	struct tc_skb_ext *tc_skb_ext;
 	int err;
 
@@ -4980,7 +4981,7 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
 
 	chain_tag = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
 
-	err = mlx5_get_chain_for_tag(nic_chains(priv), chain_tag, &chain);
+	err = mlx5_get_mapped_object(nic_chains(priv), chain_tag, &mapped_obj);
 	if (err) {
 		netdev_dbg(priv->netdev,
 			   "Couldn't find chain for chain tag: %d, err: %d\n",
@@ -4988,7 +4989,8 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
 		return false;
 	}
 
-	if (chain) {
+	if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
+		chain = mapped_obj.chain;
 		tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
 		if (WARN_ON(!tc_skb_ext))
 			return false;
@@ -5001,6 +5003,9 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
 		if (!mlx5e_tc_ct_restore_flow(tc->ct, skb,
 					      zone_restore_id))
 			return false;
+	} else {
+		netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type);
+		return false;
 	}
 #endif /* CONFIG_NET_TC_SKB_EXT */
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index e0415676821a..c5b35e7f8aed 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -47,6 +47,17 @@
 #include "sf/sf.h"
 #include "en/tc_ct.h"
 
+enum mlx5_mapped_obj_type {
+	MLX5_MAPPED_OBJ_CHAIN,
+};
+
+struct mlx5_mapped_obj {
+	enum mlx5_mapped_obj_type type;
+	union {
+		u32 chain;
+	};
+};
+
 #ifdef CONFIG_MLX5_ESWITCH
 
 #define ESW_OFFLOADS_DEFAULT_NUM_GROUPS 15
@@ -733,8 +744,6 @@ mlx5_esw_vporttbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr
 
 struct mlx5_flow_handle *
 esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag);
-u32
-esw_get_max_restore_tag(struct mlx5_eswitch *esw);
 
 int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num);
 void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 8ac4b60ea225..117d9fa93ff5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1286,7 +1286,7 @@ esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
 	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 			    misc_parameters_2);
 	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
-		 ESW_CHAIN_TAG_METADATA_MASK);
+		 ESW_REG_C0_USER_DATA_METADATA_MASK);
 	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 			    misc_parameters_2);
 	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, tag);
@@ -1312,12 +1312,6 @@ esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
 	return flow_rule;
 }
 
-u32
-esw_get_max_restore_tag(struct mlx5_eswitch *esw)
-{
-	return ESW_CHAIN_TAG_METADATA_MASK;
-}
-
 #define MAX_PF_SQ 256
 #define MAX_SQ_NVPORTS 32
 
@@ -1434,7 +1428,7 @@ esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb)
 	attr.max_ft_sz = fdb_max;
 	attr.max_grp_num = esw->params.large_group_num;
 	attr.default_ft = miss_fdb;
-	attr.max_restore_tag = esw_get_max_restore_tag(esw);
+	attr.max_restore_tag = ESW_REG_C0_USER_DATA_METADATA_MASK;
 
 	chains = mlx5_chains_create(dev, &attr);
 	if (IS_ERR(chains)) {
@@ -1928,7 +1922,7 @@ static int esw_create_restore_table(struct mlx5_eswitch *esw)
 		goto out_free;
 	}
 
-	ft_attr.max_fte = 1 << ESW_CHAIN_TAG_METADATA_BITS;
+	ft_attr.max_fte = 1 << ESW_REG_C0_USER_DATA_METADATA_BITS;
 	ft = mlx5_create_flow_table(ns, &ft_attr);
 	if (IS_ERR(ft)) {
 		err = PTR_ERR(ft);
@@ -1943,7 +1937,7 @@ static int esw_create_restore_table(struct mlx5_eswitch *esw)
 			    misc_parameters_2);
 
 	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
-		 ESW_CHAIN_TAG_METADATA_MASK);
+		 ESW_REG_C0_USER_DATA_METADATA_MASK);
 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
 		 ft_attr.max_fte - 1);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
index 381325b4a863..00ff809dcfe8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
@@ -832,8 +832,7 @@ mlx5_chains_init(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr)
 	if (err)
 		goto init_prios_ht_err;
 
-	mapping = mapping_create(sizeof(u32), attr->max_restore_tag,
-				 true);
+	mapping = mapping_create(sizeof(struct mlx5_mapped_obj), attr->max_restore_tag, true);
 	if (IS_ERR(mapping)) {
 		err = PTR_ERR(mapping);
 		goto mapping_err;
@@ -884,21 +883,28 @@ int
 mlx5_chains_get_chain_mapping(struct mlx5_fs_chains *chains, u32 chain,
 			      u32 *chain_mapping)
 {
-	return mapping_add(chains_mapping(chains), &chain, chain_mapping);
+	struct mapping_ctx *ctx = chains->chains_mapping;
+	struct mlx5_mapped_obj mapped_obj = {};
+
+	mapped_obj.type = MLX5_MAPPED_OBJ_CHAIN;
+	mapped_obj.chain = chain;
+	return mapping_add(ctx, &mapped_obj, chain_mapping);
 }
 
 int
 mlx5_chains_put_chain_mapping(struct mlx5_fs_chains *chains, u32 chain_mapping)
 {
-	return mapping_remove(chains_mapping(chains), chain_mapping);
+	struct mapping_ctx *ctx = chains->chains_mapping;
+
+	return mapping_remove(ctx, chain_mapping);
 }
 
-int mlx5_get_chain_for_tag(struct mlx5_fs_chains *chains, u32 tag,
-			   u32 *chain)
+int
+mlx5_get_mapped_object(struct mlx5_fs_chains *chains, u32 tag, struct mlx5_mapped_obj *obj)
 {
 	int err;
 
-	err = mapping_find(chains_mapping(chains), tag, chain);
+	err = mapping_find(chains->chains_mapping, tag, obj);
 	if (err) {
 		mlx5_core_warn(chains->dev, "Can't find chain for tag: %d\n", tag);
 		return -ENOENT;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h
index 6d5be31b05dd..75a3bba12a78 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h
@@ -7,6 +7,7 @@
 #include <linux/mlx5/fs.h>
 
 struct mlx5_fs_chains;
+struct mlx5_mapped_obj;
 
 enum mlx5_chains_flags {
 	MLX5_CHAINS_AND_PRIOS_SUPPORTED = BIT(0),
@@ -64,7 +65,7 @@ mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr);
 void mlx5_chains_destroy(struct mlx5_fs_chains *chains);
 
 int
-mlx5_get_chain_for_tag(struct mlx5_fs_chains *chains, u32 tag, u32 *chain);
+mlx5_get_mapped_object(struct mlx5_fs_chains *chains, u32 tag, struct mlx5_mapped_obj *obj);
 
 void
 mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains,
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index 994c2c8cb4fd..125ae482383b 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -74,20 +74,19 @@ bool mlx5_eswitch_reg_c1_loopback_enabled(const struct mlx5_eswitch *esw);
 bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw);
 
 /* Reg C0 usage:
- * Reg C0 = < ESW_PFNUM_BITS(4) | ESW_VPORT BITS(12) | ESW_CHAIN_TAG(16) >
+ * Reg C0 = < ESW_PFNUM_BITS(4) | ESW_VPORT BITS(12) | ESW_REG_C0_OBJ(16) >
  *
  * Highest 4 bits of the reg c0 is the PF_NUM (range 0-15), 12 bits of
  * unique non-zero vport id (range 1-4095). The rest (lowest 16 bits) is left
- * for tc chain tag restoration.
+ * for user data objects managed by a common mapping context.
  * PFNUM + VPORT comprise the SOURCE_PORT matching.
  */
 #define ESW_VPORT_BITS 12
 #define ESW_PFNUM_BITS 4
 #define ESW_SOURCE_PORT_METADATA_BITS (ESW_PFNUM_BITS + ESW_VPORT_BITS)
 #define ESW_SOURCE_PORT_METADATA_OFFSET (32 - ESW_SOURCE_PORT_METADATA_BITS)
-#define ESW_CHAIN_TAG_METADATA_BITS (32 - ESW_SOURCE_PORT_METADATA_BITS)
-#define ESW_CHAIN_TAG_METADATA_MASK GENMASK(ESW_CHAIN_TAG_METADATA_BITS - 1,\
-					    0)
+#define ESW_REG_C0_USER_DATA_METADATA_BITS (32 - ESW_SOURCE_PORT_METADATA_BITS)
+#define ESW_REG_C0_USER_DATA_METADATA_MASK GENMASK(ESW_REG_C0_USER_DATA_METADATA_BITS - 1, 0)
 
 static inline u32 mlx5_eswitch_get_vport_metadata_mask(void)
 {
-- 
cgit v1.2.3


From c935568271b5b25baea65ad623f146de72570cd7 Mon Sep 17 00:00:00 2001
From: Chris Mi <cmi@nvidia.com>
Date: Mon, 31 Aug 2020 13:27:53 +0800
Subject: net/mlx5: Instantiate separate mapping objects for FDB and NIC tables

Currently, the u32 chain id is mapped to u16 value which is stored on
the lower 16 bits of reg_c0 for FDB and reg_b for NIC tables. The
mapping is internally maintained by the chains object. However, with
the introduction of reg_c0 objects the fdb may store more than just
the chain id on reg_c0. This is not relevant for NIC tables.

Separate the chains mapping instantiation for FDB and NIC tables.
Remove the mapping from the chains object. For FDB tables, create
the mapping per eswitch. For NIC tables, create the mapping per tc
table. Pass the corresponding mapping pointer when creating the
chains object.

Signed-off-by: Chris Mi <cmi@nvidia.com>
Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |  5 +--
 drivers/net/ethernet/mellanox/mlx5/core/en/fs.h    |  1 +
 .../net/ethernet/mellanox/mlx5/core/en/rep/tc.c    |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    | 19 +++++++---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  1 +
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 16 ++++++++-
 .../ethernet/mellanox/mlx5/core/lib/fs_chains.c    | 42 ++++------------------
 .../ethernet/mellanox/mlx5/core/lib/fs_chains.h    |  5 +--
 8 files changed, 43 insertions(+), 48 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 4c86b68ad26c..5bedc2c4d26f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -37,9 +37,10 @@ mlx5_core-$(CONFIG_MLX5_EN_RXNFC)    += en_fs_ethtool.o
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
 mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
 mlx5_core-$(CONFIG_MLX5_ESWITCH)     += lag_mp.o lib/geneve.o lib/port_tun.o \
-					en_rep.o en/rep/bond.o en/mod_hdr.o
+					en_rep.o en/rep/bond.o en/mod_hdr.o \
+					en/mapping.o
 mlx5_core-$(CONFIG_MLX5_CLS_ACT)     += en_tc.o en/rep/tc.o en/rep/neigh.o \
-					en/mapping.o lib/fs_chains.o en/tc_tun.o \
+					lib/fs_chains.o en/tc_tun.o \
 					esw/indir_table.o en/tc_tun_encap.o \
 					en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \
 					en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index 373bd89484cb..1d5ce07b83f4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -29,6 +29,7 @@ struct mlx5e_tc_table {
 	struct netdev_net_notifier	netdevice_nn;
 
 	struct mlx5_tc_ct_priv         *ct;
+	struct mapping_ctx             *mapping;
 };
 
 struct mlx5e_flow_table {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index dde83cba85c3..9b55a5c394d0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -641,7 +641,7 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
 	priv = netdev_priv(skb->dev);
 	esw = priv->mdev->priv.eswitch;
 
-	err = mlx5_get_mapped_object(esw_chains(esw), reg_c0, &mapped_obj);
+	err = mapping_find(esw->offloads.reg_c0_obj_pool, reg_c0, &mapped_obj);
 	if (err) {
 		netdev_dbg(priv->netdev,
 			   "Couldn't find mapped object for reg_c0: %d, err: %d\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 9b5607ddb9a2..c938215c8fbc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -4731,6 +4731,7 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
 {
 	struct mlx5e_tc_table *tc = &priv->fs.tc;
 	struct mlx5_core_dev *dev = priv->mdev;
+	struct mapping_ctx *chains_mapping;
 	struct mlx5_chains_attr attr = {};
 	int err;
 
@@ -4745,15 +4746,22 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
 
 	lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key);
 
-	if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
+	chains_mapping = mapping_create(sizeof(struct mlx5_mapped_obj),
+					MLX5E_TC_TABLE_CHAIN_TAG_MASK, true);
+	if (IS_ERR(chains_mapping)) {
+		err = PTR_ERR(chains_mapping);
+		goto err_mapping;
+	}
+	tc->mapping = chains_mapping;
+
+	if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
 		attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
 			MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
-		attr.max_restore_tag = MLX5E_TC_TABLE_CHAIN_TAG_MASK;
-	}
 	attr.ns = MLX5_FLOW_NAMESPACE_KERNEL;
 	attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev);
 	attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS;
 	attr.default_ft = mlx5e_vlan_get_flowtable(priv->fs.vlan);
+	attr.mapping = chains_mapping;
 
 	tc->chains = mlx5_chains_create(dev, &attr);
 	if (IS_ERR(tc->chains)) {
@@ -4780,6 +4788,8 @@ err_reg:
 	mlx5_tc_ct_clean(tc->ct);
 	mlx5_chains_destroy(tc->chains);
 err_chains:
+	mapping_destroy(chains_mapping);
+err_mapping:
 	rhashtable_destroy(&tc->ht);
 	return err;
 }
@@ -4814,6 +4824,7 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
 	mutex_destroy(&tc->t_lock);
 
 	mlx5_tc_ct_clean(tc->ct);
+	mapping_destroy(tc->mapping);
 	mlx5_chains_destroy(tc->chains);
 }
 
@@ -4981,7 +4992,7 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
 
 	chain_tag = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
 
-	err = mlx5_get_mapped_object(nic_chains(priv), chain_tag, &mapped_obj);
+	err = mapping_find(tc->mapping, chain_tag, &mapped_obj);
 	if (err) {
 		netdev_dbg(priv->netdev,
 			   "Couldn't find chain for chain tag: %d, err: %d\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index c5b35e7f8aed..a97396330160 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -217,6 +217,7 @@ struct mlx5_esw_offload {
 	struct mlx5_flow_table *ft_offloads_restore;
 	struct mlx5_flow_group *restore_group;
 	struct mlx5_modify_hdr *restore_copy_hdr_id;
+	struct mapping_ctx *reg_c0_obj_pool;
 
 	struct mlx5_flow_table *ft_offloads;
 	struct mlx5_flow_group *vport_rx_group;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 117d9fa93ff5..510e9b8b24fe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -47,6 +47,7 @@
 #include "lib/eq.h"
 #include "lib/fs_chains.h"
 #include "en_tc.h"
+#include "en/mapping.h"
 
 /* There are two match-all miss flows, one for unicast dst mac and
  * one for multicast.
@@ -1428,7 +1429,7 @@ esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb)
 	attr.max_ft_sz = fdb_max;
 	attr.max_grp_num = esw->params.large_group_num;
 	attr.default_ft = miss_fdb;
-	attr.max_restore_tag = ESW_REG_C0_USER_DATA_METADATA_MASK;
+	attr.mapping = esw->offloads.reg_c0_obj_pool;
 
 	chains = mlx5_chains_create(dev, &attr);
 	if (IS_ERR(chains)) {
@@ -2595,6 +2596,7 @@ static int mlx5_esw_host_number_init(struct mlx5_eswitch *esw)
 
 int esw_offloads_enable(struct mlx5_eswitch *esw)
 {
+	struct mapping_ctx *reg_c0_obj_pool;
 	struct mlx5_vport *vport;
 	int err, i;
 
@@ -2622,6 +2624,15 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
 	if (err)
 		goto err_vport_metadata;
 
+	reg_c0_obj_pool = mapping_create(sizeof(struct mlx5_mapped_obj),
+					 ESW_REG_C0_USER_DATA_METADATA_MASK,
+					 true);
+	if (IS_ERR(reg_c0_obj_pool)) {
+		err = PTR_ERR(reg_c0_obj_pool);
+		goto err_pool;
+	}
+	esw->offloads.reg_c0_obj_pool = reg_c0_obj_pool;
+
 	err = esw_offloads_steering_init(esw);
 	if (err)
 		goto err_steering_init;
@@ -2648,6 +2659,8 @@ err_vports:
 err_uplink:
 	esw_offloads_steering_cleanup(esw);
 err_steering_init:
+	mapping_destroy(reg_c0_obj_pool);
+err_pool:
 	esw_set_passing_vport_metadata(esw, false);
 err_vport_metadata:
 	esw_offloads_metadata_uninit(esw);
@@ -2686,6 +2699,7 @@ void esw_offloads_disable(struct mlx5_eswitch *esw)
 	esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK);
 	esw_set_passing_vport_metadata(esw, false);
 	esw_offloads_steering_cleanup(esw);
+	mapping_destroy(esw->offloads.reg_c0_obj_pool);
 	esw_offloads_metadata_uninit(esw);
 	esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
 	mlx5_rdma_disable_roce(esw->dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
index 00ff809dcfe8..00ef10a1a9f8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
@@ -7,15 +7,11 @@
 
 #include "lib/fs_chains.h"
 #include "en/mapping.h"
-#include "mlx5_core.h"
 #include "fs_core.h"
-#include "eswitch.h"
-#include "en.h"
 #include "en_tc.h"
 
 #define chains_lock(chains) ((chains)->lock)
 #define chains_ht(chains) ((chains)->chains_ht)
-#define chains_mapping(chains) ((chains)->chains_mapping)
 #define prios_ht(chains) ((chains)->prios_ht)
 #define ft_pool_left(chains) ((chains)->ft_left)
 #define tc_default_ft(chains) ((chains)->tc_default_ft)
@@ -300,7 +296,7 @@ create_chain_restore(struct fs_chain *chain)
 	    !mlx5_chains_prios_supported(chains))
 		return 0;
 
-	err = mapping_add(chains_mapping(chains), &chain->chain, &index);
+	err = mlx5_chains_get_chain_mapping(chains, chain->chain, &index);
 	if (err)
 		return err;
 	if (index == MLX5_FS_DEFAULT_FLOW_TAG) {
@@ -310,10 +306,8 @@ create_chain_restore(struct fs_chain *chain)
 		 *
 		 * This case isn't possible with MLX5_FS_DEFAULT_FLOW_TAG = 0.
 		 */
-		err = mapping_add(chains_mapping(chains),
-				  &chain->chain, &index);
-		mapping_remove(chains_mapping(chains),
-			       MLX5_FS_DEFAULT_FLOW_TAG);
+		err = mlx5_chains_get_chain_mapping(chains, chain->chain, &index);
+		mapping_remove(chains->chains_mapping, MLX5_FS_DEFAULT_FLOW_TAG);
 		if (err)
 			return err;
 	}
@@ -361,7 +355,7 @@ err_mod_hdr:
 		mlx5_del_flow_rules(chain->restore_rule);
 err_rule:
 	/* Datapath can't find this mapping, so we can safely remove it */
-	mapping_remove(chains_mapping(chains), chain->id);
+	mapping_remove(chains->chains_mapping, chain->id);
 	return err;
 }
 
@@ -376,7 +370,7 @@ static void destroy_chain_restore(struct fs_chain *chain)
 		mlx5_del_flow_rules(chain->restore_rule);
 
 	mlx5_modify_header_dealloc(chains->dev, chain->miss_modify_hdr);
-	mapping_remove(chains_mapping(chains), chain->id);
+	mapping_remove(chains->chains_mapping, chain->id);
 }
 
 static struct fs_chain *
@@ -797,7 +791,6 @@ static struct mlx5_fs_chains *
 mlx5_chains_init(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr)
 {
 	struct mlx5_fs_chains *chains_priv;
-	struct mapping_ctx *mapping;
 	u32 max_flow_counter;
 	int err;
 
@@ -816,6 +809,7 @@ mlx5_chains_init(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr)
 	chains_priv->flags = attr->flags;
 	chains_priv->ns = attr->ns;
 	chains_priv->group_num = attr->max_grp_num;
+	chains_priv->chains_mapping = attr->mapping;
 	tc_default_ft(chains_priv) = tc_end_ft(chains_priv) = attr->default_ft;
 
 	mlx5_core_info(dev, "Supported tc offload range - chains: %u, prios: %u\n",
@@ -832,19 +826,10 @@ mlx5_chains_init(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr)
 	if (err)
 		goto init_prios_ht_err;
 
-	mapping = mapping_create(sizeof(struct mlx5_mapped_obj), attr->max_restore_tag, true);
-	if (IS_ERR(mapping)) {
-		err = PTR_ERR(mapping);
-		goto mapping_err;
-	}
-	chains_mapping(chains_priv) = mapping;
-
 	mutex_init(&chains_lock(chains_priv));
 
 	return chains_priv;
 
-mapping_err:
-	rhashtable_destroy(&prios_ht(chains_priv));
 init_prios_ht_err:
 	rhashtable_destroy(&chains_ht(chains_priv));
 init_chains_ht_err:
@@ -856,7 +841,6 @@ static void
 mlx5_chains_cleanup(struct mlx5_fs_chains *chains)
 {
 	mutex_destroy(&chains_lock(chains));
-	mapping_destroy(chains_mapping(chains));
 	rhashtable_destroy(&prios_ht(chains));
 	rhashtable_destroy(&chains_ht(chains));
 
@@ -898,17 +882,3 @@ mlx5_chains_put_chain_mapping(struct mlx5_fs_chains *chains, u32 chain_mapping)
 
 	return mapping_remove(ctx, chain_mapping);
 }
-
-int
-mlx5_get_mapped_object(struct mlx5_fs_chains *chains, u32 tag, struct mlx5_mapped_obj *obj)
-{
-	int err;
-
-	err = mapping_find(chains->chains_mapping, tag, obj);
-	if (err) {
-		mlx5_core_warn(chains->dev, "Can't find chain for tag: %d\n", tag);
-		return -ENOENT;
-	}
-
-	return 0;
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h
index 75a3bba12a78..e96f345e7dae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h
@@ -21,7 +21,7 @@ struct mlx5_chains_attr {
 	u32 max_ft_sz;
 	u32 max_grp_num;
 	struct mlx5_flow_table *default_ft;
-	u32 max_restore_tag;
+	struct mapping_ctx *mapping;
 };
 
 #if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
@@ -64,9 +64,6 @@ struct mlx5_fs_chains *
 mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr);
 void mlx5_chains_destroy(struct mlx5_fs_chains *chains);
 
-int
-mlx5_get_mapped_object(struct mlx5_fs_chains *chains, u32 tag, struct mlx5_mapped_obj *obj);
-
 void
 mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains,
 		       struct mlx5_flow_table *ft);
-- 
cgit v1.2.3


From 41c2fd949803a5ff8acfed1d81c0bbd62d8f660d Mon Sep 17 00:00:00 2001
From: Chris Mi <cmi@nvidia.com>
Date: Mon, 31 Aug 2020 13:28:35 +0800
Subject: net/mlx5e: TC, Parse sample action

Parse TC sample action and save sample parameters in flow attribute
data structure.

Signed-off-by: Chris Mi <cmi@nvidia.com>
Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/en/tc_priv.h   |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    | 28 ++++++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/esw/sample.h   | 13 ++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  2 ++
 4 files changed, 44 insertions(+)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
index c223591ffc22..d1599b7b944b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
@@ -27,6 +27,7 @@ enum {
 	MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP        = MLX5E_TC_FLOW_BASE + 8,
 	MLX5E_TC_FLOW_FLAG_TUN_RX                = MLX5E_TC_FLOW_BASE + 9,
 	MLX5E_TC_FLOW_FLAG_FAILED                = MLX5E_TC_FLOW_BASE + 10,
+	MLX5E_TC_FLOW_FLAG_SAMPLE                = MLX5E_TC_FLOW_BASE + 11,
 };
 
 struct mlx5e_tc_flow_parse_attr {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index c938215c8fbc..85782d12ffb2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -47,6 +47,7 @@
 #include <net/tc_act/tc_pedit.h>
 #include <net/tc_act/tc_csum.h>
 #include <net/tc_act/tc_mpls.h>
+#include <net/psample.h>
 #include <net/arp.h>
 #include <net/ipv6_stubs.h>
 #include <net/bareudp.h>
@@ -1481,6 +1482,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
 	if (flow_flag_test(flow, L3_TO_L2_DECAP))
 		mlx5e_detach_decap(priv, flow);
 
+	kfree(flow->attr->esw_attr->sample);
 	kfree(flow->attr);
 }
 
@@ -3627,6 +3629,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 	bool ft_flow = mlx5e_is_ft_flow(flow);
 	const struct flow_action_entry *act;
 	struct mlx5_esw_flow_attr *esw_attr;
+	struct mlx5_sample_attr sample = {};
 	bool encap = false, decap = false;
 	u32 action = attr->action;
 	int err, i, if_count = 0;
@@ -3881,6 +3884,10 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 			attr->dest_chain = act->chain_index;
 			break;
 		case FLOW_ACTION_CT:
+			if (flow_flag_test(flow, SAMPLE)) {
+				NL_SET_ERR_MSG_MOD(extack, "Sample action with connection tracking is not supported");
+				return -EOPNOTSUPP;
+			}
 			err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack);
 			if (err)
 				return err;
@@ -3888,6 +3895,17 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 			flow_flag_set(flow, CT);
 			esw_attr->split_count = esw_attr->out_count;
 			break;
+		case FLOW_ACTION_SAMPLE:
+			if (flow_flag_test(flow, CT)) {
+				NL_SET_ERR_MSG_MOD(extack, "Sample action with connection tracking is not supported");
+				return -EOPNOTSUPP;
+			}
+			sample.rate = act->sample.rate;
+			sample.group_num = act->sample.psample_group->group_num;
+			if (act->sample.truncate)
+				sample.trunc_size = act->sample.trunc_size;
+			flow_flag_set(flow, SAMPLE);
+			break;
 		default:
 			NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
 			return -EOPNOTSUPP;
@@ -3966,6 +3984,16 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 		return -EOPNOTSUPP;
 	}
 
+	/* Allocate sample attribute only when there is a sample action and
+	 * no errors after parsing.
+	 */
+	if (flow_flag_test(flow, SAMPLE)) {
+		esw_attr->sample = kzalloc(sizeof(*esw_attr->sample), GFP_KERNEL);
+		if (!esw_attr->sample)
+			return -ENOMEM;
+		*esw_attr->sample = sample;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h
new file mode 100644
index 000000000000..35a5e6dddcd0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TC_SAMPLE_H__
+#define __MLX5_EN_TC_SAMPLE_H__
+
+struct mlx5_sample_attr {
+	u32 group_num;
+	u32 rate;
+	u32 trunc_size;
+};
+
+#endif /* __MLX5_EN_TC_SAMPLE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index a97396330160..9b26bd67e2b8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -46,6 +46,7 @@
 #include "lib/fs_chains.h"
 #include "sf/sf.h"
 #include "en/tc_ct.h"
+#include "esw/sample.h"
 
 enum mlx5_mapped_obj_type {
 	MLX5_MAPPED_OBJ_CHAIN,
@@ -440,6 +441,7 @@ struct mlx5_esw_flow_attr {
 	} dests[MLX5_MAX_FLOW_FWD_VPORTS];
 	struct mlx5_rx_tun_attr *rx_tun_attr;
 	struct mlx5_pkt_reformat *decap_pkt_reformat;
+	struct mlx5_sample_attr *sample;
 };
 
 int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
-- 
cgit v1.2.3


From 2a9ab10a5689a4612d441df1cc628c381dc75ed3 Mon Sep 17 00:00:00 2001
From: Chris Mi <cmi@nvidia.com>
Date: Fri, 18 Sep 2020 17:46:57 +0800
Subject: net/mlx5e: TC, Add sampler termination table API

Sampled packets are sent to software using termination tables. There
is only one rule in that table that is to forward sampled packets to
the e-switch management vport.

Create a sampler termination table and rule for each eswitch.

Signed-off-by: Chris Mi <cmi@nvidia.com>
Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Kconfig    | 12 +++
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.h   |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    | 11 +++
 .../net/ethernet/mellanox/mlx5/core/esw/sample.c   | 96 ++++++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/esw/sample.h   |  8 ++
 6 files changed, 129 insertions(+)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index 9d623e38d783..461a43f338e6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -104,6 +104,18 @@ config MLX5_TC_CT
 
 	  If unsure, set to Y
 
+config MLX5_TC_SAMPLE
+	bool "MLX5 TC sample offload support"
+	depends on MLX5_CLS_ACT
+	default y
+	help
+	  Say Y here if you want to support offloading sample rules via tc
+	  sample action.
+	  If set to N, will not be able to configure tc rules with sample
+	  action.
+
+	  If unsure, set to Y
+
 config MLX5_CORE_EN_DCB
 	bool "Data Center Bridging (DCB) Support"
 	default y
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 5bedc2c4d26f..8bde58379ac6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -55,6 +55,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH)   += esw/acl/helper.o \
 				      esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \
 				      esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o \
 				      esw/devlink_port.o esw/vporttbl.o
+mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += esw/sample.o
 
 mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
 mlx5_core-$(CONFIG_VXLAN)          += lib/vxlan.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 931fa619cb01..22585015c7a7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -89,6 +89,7 @@ struct mlx5_rep_uplink_priv {
 	struct mapping_ctx *tunnel_enc_opts_mapping;
 
 	struct mlx5_tc_ct_priv *ct_priv;
+	struct mlx5_esw_psample *esw_psample;
 
 	/* support eswitch vports bonding */
 	struct mlx5e_rep_bond *bond;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 85782d12ffb2..1a403112defd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -66,6 +66,7 @@
 #include "en/mod_hdr.h"
 #include "en/tc_priv.h"
 #include "en/tc_tun_encap.h"
+#include "esw/sample.h"
 #include "lib/devcom.h"
 #include "lib/geneve.h"
 #include "lib/fs_chains.h"
@@ -4876,6 +4877,10 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
 					       &esw->offloads.mod_hdr,
 					       MLX5_FLOW_NAMESPACE_FDB);
 
+#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+	uplink_priv->esw_psample = mlx5_esw_sample_init(netdev_priv(priv->netdev));
+#endif
+
 	mapping = mapping_create(sizeof(struct tunnel_match_key),
 				 TUNNEL_INFO_BITS_MASK, true);
 	if (IS_ERR(mapping)) {
@@ -4913,6 +4918,9 @@ err_ht_init:
 err_enc_opts_mapping:
 	mapping_destroy(uplink_priv->tunnel_mapping);
 err_tun_mapping:
+#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+	mlx5_esw_sample_cleanup(uplink_priv->esw_psample);
+#endif
 	mlx5_tc_ct_clean(uplink_priv->ct_priv);
 	netdev_warn(priv->netdev,
 		    "Failed to initialize tc (eswitch), err: %d", err);
@@ -4931,6 +4939,9 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
 	mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
 	mapping_destroy(uplink_priv->tunnel_mapping);
 
+#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+	mlx5_esw_sample_cleanup(uplink_priv->esw_psample);
+#endif
 	mlx5_tc_ct_clean(uplink_priv->ct_priv);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c
new file mode 100644
index 000000000000..9bd996e8d28a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#include "esw/sample.h"
+#include "eswitch.h"
+
+struct mlx5_esw_psample {
+	struct mlx5e_priv *priv;
+	struct mlx5_flow_table *termtbl;
+	struct mlx5_flow_handle *termtbl_rule;
+};
+
+static int
+sampler_termtbl_create(struct mlx5_esw_psample *esw_psample)
+{
+	struct mlx5_core_dev *dev = esw_psample->priv->mdev;
+	struct mlx5_eswitch *esw = dev->priv.eswitch;
+	struct mlx5_flow_table_attr ft_attr = {};
+	struct mlx5_flow_destination dest = {};
+	struct mlx5_flow_namespace *root_ns;
+	struct mlx5_flow_act act = {};
+	int err;
+
+	if (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, termination_table))  {
+		mlx5_core_warn(dev, "termination table is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+	if (!root_ns) {
+		mlx5_core_warn(dev, "failed to get FDB flow namespace\n");
+		return -EOPNOTSUPP;
+	}
+
+	ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION | MLX5_FLOW_TABLE_UNMANAGED;
+	ft_attr.autogroup.max_num_groups = 1;
+	ft_attr.prio = FDB_SLOW_PATH;
+	ft_attr.max_fte = 1;
+	ft_attr.level = 1;
+	esw_psample->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
+	if (IS_ERR(esw_psample->termtbl)) {
+		err = PTR_ERR(esw_psample->termtbl);
+		mlx5_core_warn(dev, "failed to create termtbl, err: %d\n", err);
+		return err;
+	}
+
+	act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	dest.vport.num = esw->manager_vport;
+	esw_psample->termtbl_rule = mlx5_add_flow_rules(esw_psample->termtbl, NULL, &act, &dest, 1);
+	if (IS_ERR(esw_psample->termtbl_rule)) {
+		err = PTR_ERR(esw_psample->termtbl_rule);
+		mlx5_core_warn(dev, "failed to create termtbl rule, err: %d\n", err);
+		mlx5_destroy_flow_table(esw_psample->termtbl);
+		return err;
+	}
+
+	return 0;
+}
+
+static void
+sampler_termtbl_destroy(struct mlx5_esw_psample *esw_psample)
+{
+	mlx5_del_flow_rules(esw_psample->termtbl_rule);
+	mlx5_destroy_flow_table(esw_psample->termtbl);
+}
+
+struct mlx5_esw_psample *
+mlx5_esw_sample_init(struct mlx5e_priv *priv)
+{
+	struct mlx5_esw_psample *esw_psample;
+	int err;
+
+	esw_psample = kzalloc(sizeof(*esw_psample), GFP_KERNEL);
+	if (!esw_psample)
+		return ERR_PTR(-ENOMEM);
+	esw_psample->priv = priv;
+	err = sampler_termtbl_create(esw_psample);
+	if (err)
+		goto err_termtbl;
+
+	return esw_psample;
+
+err_termtbl:
+	kfree(esw_psample);
+	return ERR_PTR(err);
+}
+
+void
+mlx5_esw_sample_cleanup(struct mlx5_esw_psample *esw_psample)
+{
+	if (IS_ERR_OR_NULL(esw_psample))
+		return;
+
+	sampler_termtbl_destroy(esw_psample);
+	kfree(esw_psample);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h
index 35a5e6dddcd0..e42e3cb01c8c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h
@@ -4,10 +4,18 @@
 #ifndef __MLX5_EN_TC_SAMPLE_H__
 #define __MLX5_EN_TC_SAMPLE_H__
 
+#include "en.h"
+
 struct mlx5_sample_attr {
 	u32 group_num;
 	u32 rate;
 	u32 trunc_size;
 };
 
+struct mlx5_esw_psample *
+mlx5_esw_sample_init(struct mlx5e_priv *priv);
+
+void
+mlx5_esw_sample_cleanup(struct mlx5_esw_psample *esw_psample);
+
 #endif /* __MLX5_EN_TC_SAMPLE_H__ */
-- 
cgit v1.2.3


From 11ecd6c60b4ee6f5d5bcd91f5b87e50bad8f142a Mon Sep 17 00:00:00 2001
From: Chris Mi <cmi@nvidia.com>
Date: Mon, 21 Sep 2020 13:25:54 +0800
Subject: net/mlx5e: TC, Add sampler object API

In order to offload sample action, HW introduces sampler object. The
sampler object samples packets according to the provided sample ratio.
Sampled packets are duplicated. One copy is processed by a termination
table, named the sample table, which sends the packet up to software.
The second copy is processed by the default table.

Instantiate sampler object. Re-use identical sampler object for
the same sample ratio, sample table and default table as a prestep for
offloading tc sample actions.

Signed-off-by: Chris Mi <cmi@nvidia.com>
Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/esw/sample.c   | 131 +++++++++++++++++++++
 1 file changed, 131 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c
index 9bd996e8d28a..37e33670bb24 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c
@@ -3,11 +3,28 @@
 
 #include "esw/sample.h"
 #include "eswitch.h"
+#include "en_tc.h"
+#include "fs_core.h"
 
 struct mlx5_esw_psample {
 	struct mlx5e_priv *priv;
 	struct mlx5_flow_table *termtbl;
 	struct mlx5_flow_handle *termtbl_rule;
+	DECLARE_HASHTABLE(hashtbl, 8);
+	struct mutex ht_lock; /* protect hashtbl */
+};
+
+struct mlx5_sampler {
+	struct hlist_node hlist;
+	u32 sampler_id;
+	u32 sample_ratio;
+	u32 sample_table_id;
+	u32 default_table_id;
+	int count;
+};
+
+struct mlx5_sample_flow {
+	struct mlx5_sampler *sampler;
 };
 
 static int
@@ -64,6 +81,117 @@ sampler_termtbl_destroy(struct mlx5_esw_psample *esw_psample)
 	mlx5_destroy_flow_table(esw_psample->termtbl);
 }
 
+static int
+sampler_obj_create(struct mlx5_core_dev *mdev, struct mlx5_sampler *sampler)
+{
+	u32 in[MLX5_ST_SZ_DW(create_sampler_obj_in)] = {};
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+	u64 general_obj_types;
+	void *obj;
+	int err;
+
+	general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types);
+	if (!(general_obj_types & MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER))
+		return -EOPNOTSUPP;
+	if (!MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, ignore_flow_level))
+		return -EOPNOTSUPP;
+
+	obj = MLX5_ADDR_OF(create_sampler_obj_in, in, sampler_object);
+	MLX5_SET(sampler_obj, obj, table_type, FS_FT_FDB);
+	MLX5_SET(sampler_obj, obj, ignore_flow_level, 1);
+	MLX5_SET(sampler_obj, obj, level, 1);
+	MLX5_SET(sampler_obj, obj, sample_ratio, sampler->sample_ratio);
+	MLX5_SET(sampler_obj, obj, sample_table_id, sampler->sample_table_id);
+	MLX5_SET(sampler_obj, obj, default_table_id, sampler->default_table_id);
+	MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_SAMPLER);
+
+	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	if (!err)
+		sampler->sampler_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+
+	return err;
+}
+
+static void
+sampler_obj_destroy(struct mlx5_core_dev *mdev, u32 sampler_id)
+{
+	u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+
+	MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_SAMPLER);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sampler_id);
+
+	mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static u32
+sampler_hash(u32 sample_ratio, u32 default_table_id)
+{
+	return jhash_2words(sample_ratio, default_table_id, 0);
+}
+
+static int
+sampler_cmp(u32 sample_ratio1, u32 default_table_id1, u32 sample_ratio2, u32 default_table_id2)
+{
+	return sample_ratio1 != sample_ratio2 || default_table_id1 != default_table_id2;
+}
+
+static struct mlx5_sampler *
+sampler_get(struct mlx5_esw_psample *esw_psample, u32 sample_ratio, u32 default_table_id)
+{
+	struct mlx5_sampler *sampler;
+	u32 hash_key;
+	int err;
+
+	mutex_lock(&esw_psample->ht_lock);
+	hash_key = sampler_hash(sample_ratio, default_table_id);
+	hash_for_each_possible(esw_psample->hashtbl, sampler, hlist, hash_key)
+		if (!sampler_cmp(sampler->sample_ratio, sampler->default_table_id,
+				 sample_ratio, default_table_id))
+			goto add_ref;
+
+	sampler = kzalloc(sizeof(*sampler), GFP_KERNEL);
+	if (!sampler) {
+		err = -ENOMEM;
+		goto err_alloc;
+	}
+
+	sampler->sample_table_id = esw_psample->termtbl->id;
+	sampler->default_table_id = default_table_id;
+	sampler->sample_ratio = sample_ratio;
+
+	err = sampler_obj_create(esw_psample->priv->mdev, sampler);
+	if (err)
+		goto err_create;
+
+	hash_add(esw_psample->hashtbl, &sampler->hlist, hash_key);
+
+add_ref:
+	sampler->count++;
+	mutex_unlock(&esw_psample->ht_lock);
+	return sampler;
+
+err_create:
+	kfree(sampler);
+err_alloc:
+	mutex_unlock(&esw_psample->ht_lock);
+	return ERR_PTR(err);
+}
+
+static void
+sampler_put(struct mlx5_esw_psample *esw_psample, struct mlx5_sampler *sampler)
+{
+	mutex_lock(&esw_psample->ht_lock);
+	if (--sampler->count == 0) {
+		hash_del(&sampler->hlist);
+		sampler_obj_destroy(esw_psample->priv->mdev, sampler->sampler_id);
+		kfree(sampler);
+	}
+	mutex_unlock(&esw_psample->ht_lock);
+}
+
 struct mlx5_esw_psample *
 mlx5_esw_sample_init(struct mlx5e_priv *priv)
 {
@@ -78,6 +206,8 @@ mlx5_esw_sample_init(struct mlx5e_priv *priv)
 	if (err)
 		goto err_termtbl;
 
+	mutex_init(&esw_psample->ht_lock);
+
 	return esw_psample;
 
 err_termtbl:
@@ -91,6 +221,7 @@ mlx5_esw_sample_cleanup(struct mlx5_esw_psample *esw_psample)
 	if (IS_ERR_OR_NULL(esw_psample))
 		return;
 
+	mutex_destroy(&esw_psample->ht_lock);
 	sampler_termtbl_destroy(esw_psample);
 	kfree(esw_psample);
 }
-- 
cgit v1.2.3


From 36a3196256bf3310e5e7142b0e61787f7a201abd Mon Sep 17 00:00:00 2001
From: Chris Mi <cmi@nvidia.com>
Date: Tue, 26 Jan 2021 11:08:28 +0800
Subject: net/mlx5e: TC, Add sampler restore handle API

Use common object pool to create an object ID to map sample parameters.
Allocate a modify header action to write the object ID to reg_c0 lower
16 bits. Create a restore rule to pass the object ID to software. So
software can identify sampled packets via the object ID and send it to
userspace.

Aggregate the modify header action, restore rule and object ID to a
sample restore handle. Re-use identical sample restore handle for
the same object ID.

Signed-off-by: Chris Mi <cmi@nvidia.com>
Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/esw/sample.c   | 108 +++++++++++++++++++++
 1 file changed, 108 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c
index 37e33670bb24..79a0e49b2799 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /* Copyright (c) 2021 Mellanox Technologies. */
 
+#include <linux/skbuff.h>
+#include <net/psample.h>
 #include "esw/sample.h"
 #include "eswitch.h"
 #include "en_tc.h"
@@ -12,6 +14,8 @@ struct mlx5_esw_psample {
 	struct mlx5_flow_handle *termtbl_rule;
 	DECLARE_HASHTABLE(hashtbl, 8);
 	struct mutex ht_lock; /* protect hashtbl */
+	DECLARE_HASHTABLE(restore_hashtbl, 8);
+	struct mutex restore_lock; /* protect restore_hashtbl */
 };
 
 struct mlx5_sampler {
@@ -25,6 +29,15 @@ struct mlx5_sampler {
 
 struct mlx5_sample_flow {
 	struct mlx5_sampler *sampler;
+	struct mlx5_sample_restore *restore;
+};
+
+struct mlx5_sample_restore {
+	struct hlist_node hlist;
+	struct mlx5_modify_hdr *modify_hdr;
+	struct mlx5_flow_handle *rule;
+	u32 obj_id;
+	int count;
 };
 
 static int
@@ -192,6 +205,99 @@ sampler_put(struct mlx5_esw_psample *esw_psample, struct mlx5_sampler *sampler)
 	mutex_unlock(&esw_psample->ht_lock);
 }
 
+static struct mlx5_modify_hdr *
+sample_metadata_rule_get(struct mlx5_core_dev *mdev, u32 obj_id)
+{
+	struct mlx5e_tc_mod_hdr_acts mod_acts = {};
+	struct mlx5_modify_hdr *modify_hdr;
+	int err;
+
+	err = mlx5e_tc_match_to_reg_set(mdev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
+					CHAIN_TO_REG, obj_id);
+	if (err)
+		goto err_set_regc0;
+
+	modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_FDB,
+					      mod_acts.num_actions,
+					      mod_acts.actions);
+	if (IS_ERR(modify_hdr)) {
+		err = PTR_ERR(modify_hdr);
+		goto err_modify_hdr;
+	}
+
+	dealloc_mod_hdr_actions(&mod_acts);
+	return modify_hdr;
+
+err_modify_hdr:
+	dealloc_mod_hdr_actions(&mod_acts);
+err_set_regc0:
+	return ERR_PTR(err);
+}
+
+static struct mlx5_sample_restore *
+sample_restore_get(struct mlx5_esw_psample *esw_psample, u32 obj_id)
+{
+	struct mlx5_core_dev *mdev = esw_psample->priv->mdev;
+	struct mlx5_eswitch *esw = mdev->priv.eswitch;
+	struct mlx5_sample_restore *restore;
+	struct mlx5_modify_hdr *modify_hdr;
+	int err;
+
+	mutex_lock(&esw_psample->restore_lock);
+	hash_for_each_possible(esw_psample->restore_hashtbl, restore, hlist, obj_id)
+		if (restore->obj_id == obj_id)
+			goto add_ref;
+
+	restore = kzalloc(sizeof(*restore), GFP_KERNEL);
+	if (!restore) {
+		err = -ENOMEM;
+		goto err_alloc;
+	}
+	restore->obj_id = obj_id;
+
+	modify_hdr = sample_metadata_rule_get(mdev, obj_id);
+	if (IS_ERR(modify_hdr)) {
+		err = PTR_ERR(modify_hdr);
+		goto err_modify_hdr;
+	}
+	restore->modify_hdr = modify_hdr;
+
+	restore->rule = esw_add_restore_rule(esw, obj_id);
+	if (IS_ERR(restore->rule)) {
+		err = PTR_ERR(restore->rule);
+		goto err_restore;
+	}
+
+	hash_add(esw_psample->restore_hashtbl, &restore->hlist, obj_id);
+add_ref:
+	restore->count++;
+	mutex_unlock(&esw_psample->restore_lock);
+	return restore;
+
+err_restore:
+	mlx5_modify_header_dealloc(mdev, restore->modify_hdr);
+err_modify_hdr:
+	kfree(restore);
+err_alloc:
+	mutex_unlock(&esw_psample->restore_lock);
+	return ERR_PTR(err);
+}
+
+static void
+sample_restore_put(struct mlx5_esw_psample *esw_psample, struct mlx5_sample_restore *restore)
+{
+	mutex_lock(&esw_psample->restore_lock);
+	if (--restore->count == 0)
+		hash_del(&restore->hlist);
+	mutex_unlock(&esw_psample->restore_lock);
+
+	if (!restore->count) {
+		mlx5_del_flow_rules(restore->rule);
+		mlx5_modify_header_dealloc(esw_psample->priv->mdev, restore->modify_hdr);
+		kfree(restore);
+	}
+}
+
 struct mlx5_esw_psample *
 mlx5_esw_sample_init(struct mlx5e_priv *priv)
 {
@@ -207,6 +313,7 @@ mlx5_esw_sample_init(struct mlx5e_priv *priv)
 		goto err_termtbl;
 
 	mutex_init(&esw_psample->ht_lock);
+	mutex_init(&esw_psample->restore_lock);
 
 	return esw_psample;
 
@@ -221,6 +328,7 @@ mlx5_esw_sample_cleanup(struct mlx5_esw_psample *esw_psample)
 	if (IS_ERR_OR_NULL(esw_psample))
 		return;
 
+	mutex_destroy(&esw_psample->restore_lock);
 	mutex_destroy(&esw_psample->ht_lock);
 	sampler_termtbl_destroy(esw_psample);
 	kfree(esw_psample);
-- 
cgit v1.2.3


From 7319a1cc3ca9f719e4ef1b1882f6fdb57d0719b8 Mon Sep 17 00:00:00 2001
From: Chris Mi <cmi@nvidia.com>
Date: Fri, 15 Jan 2021 16:03:47 +0800
Subject: net/mlx5e: TC, Refactor tc update skb function

As a pre-step to process sampled packet in this function.

Signed-off-by: Chris Mi <cmi@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/en/rep/tc.c    | 59 ++++++++++++----------
 1 file changed, 33 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index 9b55a5c394d0..a25ec309d298 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -611,20 +611,46 @@ static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
 
 	return true;
 }
+
+static bool mlx5e_restore_skb(struct sk_buff *skb, u32 chain, u32 reg_c1,
+			      struct mlx5e_tc_update_priv *tc_priv)
+{
+	struct mlx5e_priv *priv = netdev_priv(skb->dev);
+	u32 tunnel_id = reg_c1 >> ESW_TUN_OFFSET;
+
+	if (chain) {
+		struct mlx5_rep_uplink_priv *uplink_priv;
+		struct mlx5e_rep_priv *uplink_rpriv;
+		struct tc_skb_ext *tc_skb_ext;
+		struct mlx5_eswitch *esw;
+		u32 zone_restore_id;
+
+		tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
+		if (!tc_skb_ext) {
+			WARN_ON(1);
+			return false;
+		}
+		tc_skb_ext->chain = chain;
+		zone_restore_id = reg_c1 & ESW_ZONE_ID_MASK;
+		esw = priv->mdev->priv.eswitch;
+		uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+		uplink_priv = &uplink_rpriv->uplink_priv;
+		if (!mlx5e_tc_ct_restore_flow(uplink_priv->ct_priv, skb,
+					      zone_restore_id))
+			return false;
+	}
+	return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
+}
 #endif /* CONFIG_NET_TC_SKB_EXT */
 
 bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
 			     struct sk_buff *skb,
 			     struct mlx5e_tc_update_priv *tc_priv)
 {
-#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
-	u32 reg_c0, reg_c1, tunnel_id, zone_restore_id;
-	struct mlx5_rep_uplink_priv *uplink_priv;
-	struct mlx5e_rep_priv *uplink_rpriv;
 	struct mlx5_mapped_obj mapped_obj;
-	struct tc_skb_ext *tc_skb_ext;
 	struct mlx5_eswitch *esw;
 	struct mlx5e_priv *priv;
+	u32 reg_c0, reg_c1;
 	int err;
 
 	reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK);
@@ -640,7 +666,6 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
 
 	priv = netdev_priv(skb->dev);
 	esw = priv->mdev->priv.eswitch;
-
 	err = mapping_find(esw->offloads.reg_c0_obj_pool, reg_c0, &mapped_obj);
 	if (err) {
 		netdev_dbg(priv->netdev,
@@ -649,31 +674,13 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
 		return false;
 	}
 
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
 	if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
-		if (mapped_obj.chain) {
-			tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
-			if (!tc_skb_ext) {
-				WARN_ON(1);
-				return false;
-			}
-
-			tc_skb_ext->chain = mapped_obj.chain;
-
-			zone_restore_id = reg_c1 & ESW_ZONE_ID_MASK;
-
-			uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
-			uplink_priv = &uplink_rpriv->uplink_priv;
-			if (!mlx5e_tc_ct_restore_flow(uplink_priv->ct_priv, skb,
-						      zone_restore_id))
-				return false;
-		}
+		return mlx5e_restore_skb(skb, mapped_obj.chain, reg_c1, tc_priv);
 	} else {
 		netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type);
 		return false;
 	}
-
-	tunnel_id = reg_c1 >> ESW_TUN_OFFSET;
-	return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
 #endif /* CONFIG_NET_TC_SKB_EXT */
 
 	return true;
-- 
cgit v1.2.3


From be9dc0047450f7d8c0462a6ef3f7c26bced16911 Mon Sep 17 00:00:00 2001
From: Chris Mi <cmi@nvidia.com>
Date: Tue, 26 Jan 2021 11:15:46 +0800
Subject: net/mlx5e: TC, Handle sampled packets

Mark the sampled packets with a sample restore object. Send sampled
packets using the psample api.

Signed-off-by: Chris Mi <cmi@nvidia.com>
Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c  | 14 +++++++++++---
 drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c | 15 +++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h |  3 +++
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h    |  6 ++++++
 4 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index a25ec309d298..6cdc52d50a48 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -17,6 +17,7 @@
 #include "en/mapping.h"
 #include "en/tc_tun.h"
 #include "lib/port_tun.h"
+#include "esw/sample.h"
 
 struct mlx5e_rep_indr_block_priv {
 	struct net_device *netdev;
@@ -675,13 +676,20 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
 	}
 
 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
-	if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
+	if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN)
 		return mlx5e_restore_skb(skb, mapped_obj.chain, reg_c1, tc_priv);
-	} else {
+#endif /* CONFIG_NET_TC_SKB_EXT */
+#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+	if (mapped_obj.type == MLX5_MAPPED_OBJ_SAMPLE) {
+		mlx5_esw_sample_skb(skb, &mapped_obj);
+		return false;
+	}
+#endif /* CONFIG_MLX5_TC_SAMPLE */
+	if (mapped_obj.type != MLX5_MAPPED_OBJ_SAMPLE &&
+	    mapped_obj.type != MLX5_MAPPED_OBJ_CHAIN) {
 		netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type);
 		return false;
 	}
-#endif /* CONFIG_NET_TC_SKB_EXT */
 
 	return true;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c
index 79a0e49b2799..629a0a28a7ba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c
@@ -298,6 +298,21 @@ sample_restore_put(struct mlx5_esw_psample *esw_psample, struct mlx5_sample_rest
 	}
 }
 
+void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj)
+{
+	u32 trunc_size = mapped_obj->sample.trunc_size;
+	struct psample_group psample_group = {};
+	struct psample_metadata md = {};
+
+	md.trunc_size = trunc_size ? min(trunc_size, skb->len) : skb->len;
+	md.in_ifindex = skb->dev->ifindex;
+	psample_group.group_num = mapped_obj->sample.group_id;
+	psample_group.net = &init_net;
+	skb_push(skb, skb->mac_len);
+
+	psample_sample_packet(&psample_group, skb, mapped_obj->sample.rate, &md);
+}
+
 struct mlx5_esw_psample *
 mlx5_esw_sample_init(struct mlx5e_priv *priv)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h
index e42e3cb01c8c..82bff97bd9b7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h
@@ -5,6 +5,7 @@
 #define __MLX5_EN_TC_SAMPLE_H__
 
 #include "en.h"
+#include "eswitch.h"
 
 struct mlx5_sample_attr {
 	u32 group_num;
@@ -12,6 +13,8 @@ struct mlx5_sample_attr {
 	u32 trunc_size;
 };
 
+void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj);
+
 struct mlx5_esw_psample *
 mlx5_esw_sample_init(struct mlx5e_priv *priv);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 9b26bd67e2b8..ed4d7f8f798f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -50,12 +50,18 @@
 
 enum mlx5_mapped_obj_type {
 	MLX5_MAPPED_OBJ_CHAIN,
+	MLX5_MAPPED_OBJ_SAMPLE,
 };
 
 struct mlx5_mapped_obj {
 	enum mlx5_mapped_obj_type type;
 	union {
 		u32 chain;
+		struct {
+			u32 group_id;
+			u32 rate;
+			u32 trunc_size;
+		} sample;
 	};
 };
 
-- 
cgit v1.2.3


From f94d6389f6a8abd04760dcd015d14961260a8000 Mon Sep 17 00:00:00 2001
From: Chris Mi <cmi@nvidia.com>
Date: Mon, 21 Sep 2020 16:45:07 +0800
Subject: net/mlx5e: TC, Add support to offload sample action

The following diagram illustrates the hardware model for tc sample action:

        +---------------------+
        + original flow table +
        +---------------------+
        +   original match    +
        +---------------------+
                   |
                   v
+------------------------------------------------+
+                Flow Sampler Object             +
+------------------------------------------------+
+                    sample ratio                +
+------------------------------------------------+
+    sample table id    |    default table id    +
+------------------------------------------------+
           |                            |
           v                            v
+-----------------------------+  +----------------------------------------+
+        sample table         +  + default table per <vport, chain, prio> +
+-----------------------------+  +----------------------------------------+
+ forward to management vport +  +            original match              +
+-----------------------------+  +----------------------------------------+
                                 +            other actions               +
                                 +----------------------------------------+

The sample action is translated to a goto flow table object
destination which samples packets according to the provided
sample ratio. Sampled packets are duplicated. One copy is
processed by a termination table, named the sample table,
which sends the packet to the eswitch manager port (that will
be processed by software).

The second copy is processed by the default table which executes
the subsequent actions. The default table is created per <vport,
chain, prio> tuple as rules with different prios and chains may
overlap.

Signed-off-by: Chris Mi <cmi@nvidia.com>
Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    |  30 +++
 .../net/ethernet/mellanox/mlx5/core/esw/sample.c   | 235 +++++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/esw/sample.h   |  18 ++
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |   4 +
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c |  43 +++-
 5 files changed, 328 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 1a403112defd..d157d1b9cad6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -223,6 +223,25 @@ get_ct_priv(struct mlx5e_priv *priv)
 	return priv->fs.tc.ct;
 }
 
+#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+static struct mlx5_esw_psample *
+get_sample_priv(struct mlx5e_priv *priv)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5_rep_uplink_priv *uplink_priv;
+	struct mlx5e_rep_priv *uplink_rpriv;
+
+	if (is_mdev_switchdev_mode(priv->mdev)) {
+		uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+		uplink_priv = &uplink_rpriv->uplink_priv;
+
+		return uplink_priv->esw_psample;
+	}
+
+	return NULL;
+}
+#endif
+
 struct mlx5_flow_handle *
 mlx5_tc_rule_insert(struct mlx5e_priv *priv,
 		    struct mlx5_flow_spec *spec,
@@ -1092,6 +1111,10 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
 		rule = mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv),
 					       flow, spec, attr,
 					       mod_hdr_acts);
+#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+	} else if (flow_flag_test(flow, SAMPLE)) {
+		rule = mlx5_esw_sample_offload(get_sample_priv(flow->priv), spec, attr);
+#endif
 	} else {
 		rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
 	}
@@ -1127,6 +1150,13 @@ void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
 		return;
 	}
 
+#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+	if (flow_flag_test(flow, SAMPLE)) {
+		mlx5_esw_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr);
+		return;
+	}
+#endif
+
 	if (attr->esw_attr->split_count)
 		mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c
index 629a0a28a7ba..794012c5c476 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c
@@ -3,11 +3,20 @@
 
 #include <linux/skbuff.h>
 #include <net/psample.h>
+#include "en/mapping.h"
 #include "esw/sample.h"
 #include "eswitch.h"
 #include "en_tc.h"
 #include "fs_core.h"
 
+#define MLX5_ESW_VPORT_TBL_SIZE_SAMPLE (64 * 1024)
+
+static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_sample_ns = {
+	.max_fte = MLX5_ESW_VPORT_TBL_SIZE_SAMPLE,
+	.max_num_groups = 0,    /* default num of groups */
+	.flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | MLX5_FLOW_TABLE_TUNNEL_EN_DECAP,
+};
+
 struct mlx5_esw_psample {
 	struct mlx5e_priv *priv;
 	struct mlx5_flow_table *termtbl;
@@ -30,6 +39,9 @@ struct mlx5_sampler {
 struct mlx5_sample_flow {
 	struct mlx5_sampler *sampler;
 	struct mlx5_sample_restore *restore;
+	struct mlx5_flow_attr *pre_attr;
+	struct mlx5_flow_handle *pre_rule;
+	struct mlx5_flow_handle *rule;
 };
 
 struct mlx5_sample_restore {
@@ -313,6 +325,229 @@ void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj
 	psample_sample_packet(&psample_group, skb, mapped_obj->sample.rate, &md);
 }
 
+/* For the following typical flow table:
+ *
+ * +-------------------------------+
+ * +       original flow table     +
+ * +-------------------------------+
+ * +         original match        +
+ * +-------------------------------+
+ * + sample action + other actions +
+ * +-------------------------------+
+ *
+ * We translate the tc filter with sample action to the following HW model:
+ *
+ *         +---------------------+
+ *         + original flow table +
+ *         +---------------------+
+ *         +   original match    +
+ *         +---------------------+
+ *                    |
+ *                    v
+ * +------------------------------------------------+
+ * +                Flow Sampler Object             +
+ * +------------------------------------------------+
+ * +                    sample ratio                +
+ * +------------------------------------------------+
+ * +    sample table id    |    default table id    +
+ * +------------------------------------------------+
+ *            |                            |
+ *            v                            v
+ * +-----------------------------+  +----------------------------------------+
+ * +        sample table         +  + default table per <vport, chain, prio> +
+ * +-----------------------------+  +----------------------------------------+
+ * + forward to management vport +  +            original match              +
+ * +-----------------------------+  +----------------------------------------+
+ *                                  +            other actions               +
+ *                                  +----------------------------------------+
+ */
+struct mlx5_flow_handle *
+mlx5_esw_sample_offload(struct mlx5_esw_psample *esw_psample,
+			struct mlx5_flow_spec *spec,
+			struct mlx5_flow_attr *attr)
+{
+	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+	struct mlx5_vport_tbl_attr per_vport_tbl_attr;
+	struct mlx5_esw_flow_attr *pre_esw_attr;
+	struct mlx5_mapped_obj restore_obj = {};
+	struct mlx5_sample_flow *sample_flow;
+	struct mlx5_sample_attr *sample_attr;
+	struct mlx5_flow_table *default_tbl;
+	struct mlx5_flow_attr *pre_attr;
+	struct mlx5_eswitch *esw;
+	u32 obj_id;
+	int err;
+
+	if (IS_ERR_OR_NULL(esw_psample))
+		return ERR_PTR(-EOPNOTSUPP);
+
+	/* If slow path flag is set, eg. when the neigh is invalid for encap,
+	 * don't offload sample action.
+	 */
+	esw = esw_psample->priv->mdev->priv.eswitch;
+	if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)
+		return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
+
+	sample_flow = kzalloc(sizeof(*sample_flow), GFP_KERNEL);
+	if (!sample_flow)
+		return ERR_PTR(-ENOMEM);
+	esw_attr->sample->sample_flow = sample_flow;
+
+	/* Allocate default table per vport, chain and prio. Otherwise, there is
+	 * only one default table for the same sampler object. Rules with different
+	 * prio and chain may overlap. For CT sample action, per vport default
+	 * table is needed to resotre the metadata.
+	 */
+	per_vport_tbl_attr.chain = attr->chain;
+	per_vport_tbl_attr.prio = attr->prio;
+	per_vport_tbl_attr.vport = esw_attr->in_rep->vport;
+	per_vport_tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns;
+	default_tbl = mlx5_esw_vporttbl_get(esw, &per_vport_tbl_attr);
+	if (IS_ERR(default_tbl)) {
+		err = PTR_ERR(default_tbl);
+		goto err_default_tbl;
+	}
+
+	/* Perform the original matches on the default table.
+	 * Offload all actions except the sample action.
+	 */
+	esw_attr->sample->sample_default_tbl = default_tbl;
+	/* When offloading sample and encap action, if there is no valid
+	 * neigh data struct, a slow path rule is offloaded first. Source
+	 * port metadata match is set at that time. A per vport table is
+	 * already allocated. No need to match it again. So clear the source
+	 * port metadata match.
+	 */
+	mlx5_eswitch_clear_rule_source_port(esw, spec);
+	sample_flow->rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
+	if (IS_ERR(sample_flow->rule)) {
+		err = PTR_ERR(sample_flow->rule);
+		goto err_offload_rule;
+	}
+
+	/* Create sampler object. */
+	sample_flow->sampler = sampler_get(esw_psample, esw_attr->sample->rate, default_tbl->id);
+	if (IS_ERR(sample_flow->sampler)) {
+		err = PTR_ERR(sample_flow->sampler);
+		goto err_sampler;
+	}
+
+	/* Create an id mapping reg_c0 value to sample object. */
+	restore_obj.type = MLX5_MAPPED_OBJ_SAMPLE;
+	restore_obj.sample.group_id = esw_attr->sample->group_num;
+	restore_obj.sample.rate = esw_attr->sample->rate;
+	restore_obj.sample.trunc_size = esw_attr->sample->trunc_size;
+	err = mapping_add(esw->offloads.reg_c0_obj_pool, &restore_obj, &obj_id);
+	if (err)
+		goto err_obj_id;
+	esw_attr->sample->restore_obj_id = obj_id;
+
+	/* Create sample restore context. */
+	sample_flow->restore = sample_restore_get(esw_psample, obj_id);
+	if (IS_ERR(sample_flow->restore)) {
+		err = PTR_ERR(sample_flow->restore);
+		goto err_sample_restore;
+	}
+
+	/* Perform the original matches on the original table. Offload the
+	 * sample action. The destination is the sampler object.
+	 */
+	pre_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
+	if (!pre_attr) {
+		err = -ENOMEM;
+		goto err_alloc_flow_attr;
+	}
+	sample_attr = kzalloc(sizeof(*sample_attr), GFP_KERNEL);
+	if (!sample_attr) {
+		err = -ENOMEM;
+		goto err_alloc_sample_attr;
+	}
+	pre_esw_attr = pre_attr->esw_attr;
+	pre_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+	pre_attr->modify_hdr = sample_flow->restore->modify_hdr;
+	pre_attr->flags = MLX5_ESW_ATTR_FLAG_SAMPLE;
+	pre_attr->chain = attr->chain;
+	pre_attr->prio = attr->prio;
+	pre_esw_attr->sample = sample_attr;
+	pre_esw_attr->sample->sampler_id = sample_flow->sampler->sampler_id;
+	pre_esw_attr->in_mdev = esw_attr->in_mdev;
+	pre_esw_attr->in_rep = esw_attr->in_rep;
+	sample_flow->pre_rule = mlx5_eswitch_add_offloaded_rule(esw, spec, pre_attr);
+	if (IS_ERR(sample_flow->pre_rule)) {
+		err = PTR_ERR(sample_flow->pre_rule);
+		goto err_pre_offload_rule;
+	}
+	sample_flow->pre_attr = pre_attr;
+
+	return sample_flow->rule;
+
+err_pre_offload_rule:
+	kfree(sample_attr);
+err_alloc_sample_attr:
+	kfree(pre_attr);
+err_alloc_flow_attr:
+	sample_restore_put(esw_psample, sample_flow->restore);
+err_sample_restore:
+	mapping_remove(esw->offloads.reg_c0_obj_pool, obj_id);
+err_obj_id:
+	sampler_put(esw_psample, sample_flow->sampler);
+err_sampler:
+	/* For sample offload, rule is added in default_tbl. No need to call
+	 * mlx5_esw_chains_put_table()
+	 */
+	attr->prio = 0;
+	attr->chain = 0;
+	mlx5_eswitch_del_offloaded_rule(esw, sample_flow->rule, attr);
+err_offload_rule:
+	mlx5_esw_vporttbl_put(esw, &per_vport_tbl_attr);
+err_default_tbl:
+	return ERR_PTR(err);
+}
+
+void
+mlx5_esw_sample_unoffload(struct mlx5_esw_psample *esw_psample,
+			  struct mlx5_flow_handle *rule,
+			  struct mlx5_flow_attr *attr)
+{
+	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+	struct mlx5_sample_flow *sample_flow;
+	struct mlx5_vport_tbl_attr tbl_attr;
+	struct mlx5_flow_attr *pre_attr;
+	struct mlx5_eswitch *esw;
+
+	if (IS_ERR_OR_NULL(esw_psample))
+		return;
+
+	/* If slow path flag is set, sample action is not offloaded.
+	 * No need to delete sample rule.
+	 */
+	esw = esw_psample->priv->mdev->priv.eswitch;
+	if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) {
+		mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
+		return;
+	}
+
+	sample_flow = esw_attr->sample->sample_flow;
+	pre_attr = sample_flow->pre_attr;
+	memset(pre_attr, 0, sizeof(*pre_attr));
+	esw = esw_psample->priv->mdev->priv.eswitch;
+	mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, pre_attr);
+	mlx5_eswitch_del_offloaded_rule(esw, sample_flow->rule, attr);
+
+	sample_restore_put(esw_psample, sample_flow->restore);
+	mapping_remove(esw->offloads.reg_c0_obj_pool, esw_attr->sample->restore_obj_id);
+	sampler_put(esw_psample, sample_flow->sampler);
+	tbl_attr.chain = attr->chain;
+	tbl_attr.prio = attr->prio;
+	tbl_attr.vport = esw_attr->in_rep->vport;
+	tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns;
+	mlx5_esw_vporttbl_put(esw, &tbl_attr);
+
+	kfree(pre_attr->esw_attr->sample);
+	kfree(pre_attr);
+	kfree(sample_flow);
+}
+
 struct mlx5_esw_psample *
 mlx5_esw_sample_init(struct mlx5e_priv *priv)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h
index 82bff97bd9b7..2a3f4be10030 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h
@@ -7,14 +7,32 @@
 #include "en.h"
 #include "eswitch.h"
 
+struct mlx5e_priv;
+struct mlx5_flow_attr;
+struct mlx5_esw_psample;
+
 struct mlx5_sample_attr {
 	u32 group_num;
 	u32 rate;
 	u32 trunc_size;
+	u32 restore_obj_id;
+	u32 sampler_id;
+	struct mlx5_flow_table *sample_default_tbl;
+	struct mlx5_sample_flow *sample_flow;
 };
 
 void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj);
 
+struct mlx5_flow_handle *
+mlx5_esw_sample_offload(struct mlx5_esw_psample *sample_priv,
+			struct mlx5_flow_spec *spec,
+			struct mlx5_flow_attr *attr);
+
+void
+mlx5_esw_sample_unoffload(struct mlx5_esw_psample *sample_priv,
+			  struct mlx5_flow_handle *rule,
+			  struct mlx5_flow_attr *attr);
+
 struct mlx5_esw_psample *
 mlx5_esw_sample_init(struct mlx5e_priv *priv);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index ed4d7f8f798f..deafb0e03787 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -376,6 +376,9 @@ void
 mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw,
 			 struct mlx5_termtbl_handle *tt);
 
+void
+mlx5_eswitch_clear_rule_source_port(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec);
+
 struct mlx5_flow_handle *
 mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 				struct mlx5_flow_spec *spec,
@@ -423,6 +426,7 @@ enum {
 	MLX5_ESW_ATTR_FLAG_SLOW_PATH     = BIT(1),
 	MLX5_ESW_ATTR_FLAG_NO_IN_PORT    = BIT(2),
 	MLX5_ESW_ATTR_FLAG_SRC_REWRITE   = BIT(3),
+	MLX5_ESW_ATTR_FLAG_SAMPLE        = BIT(4),
 };
 
 struct mlx5_esw_flow_attr {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 510e9b8b24fe..ac92ffc8a5d3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -86,6 +86,26 @@ mlx5_eswitch_set_rule_flow_source(struct mlx5_eswitch *esw,
 				MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
 }
 
+/* Actually only the upper 16 bits of reg c0 need to be cleared, but the lower 16 bits
+ * are not needed as well in the following process. So clear them all for simplicity.
+ */
+void
+mlx5_eswitch_clear_rule_source_port(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec)
+{
+	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+		void *misc2;
+
+		misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
+		MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, 0);
+
+		misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
+		MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, 0);
+
+		if (!memchr_inv(misc2, 0, MLX5_ST_SZ_BYTES(fte_match_set_misc2)))
+			spec->match_criteria_enable &= ~MLX5_MATCH_MISC_PARAMETERS_2;
+	}
+}
+
 static void
 mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
 				  struct mlx5_flow_spec *spec,
@@ -156,6 +176,19 @@ esw_cleanup_decap_indir(struct mlx5_eswitch *esw,
 					 true);
 }
 
+static int
+esw_setup_sampler_dest(struct mlx5_flow_destination *dest,
+		       struct mlx5_flow_act *flow_act,
+		       struct mlx5_esw_flow_attr *esw_attr,
+		       int i)
+{
+	flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+	dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER;
+	dest[i].sampler_id = esw_attr->sample->sampler_id;
+
+	return 0;
+}
+
 static int
 esw_setup_ft_dest(struct mlx5_flow_destination *dest,
 		  struct mlx5_flow_act *flow_act,
@@ -385,7 +418,10 @@ esw_setup_dests(struct mlx5_flow_destination *dest,
 	    MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level))
 		attr->flags |= MLX5_ESW_ATTR_FLAG_SRC_REWRITE;
 
-	if (attr->dest_ft) {
+	if (attr->flags & MLX5_ESW_ATTR_FLAG_SAMPLE) {
+		esw_setup_sampler_dest(dest, flow_act, esw_attr, *i);
+		(*i)++;
+	} else if (attr->dest_ft) {
 		esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i);
 		(*i)++;
 	} else if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) {
@@ -488,7 +524,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 		flow_act.modify_hdr = attr->modify_hdr;
 
-	if (split) {
+	/* esw_attr->sample is allocated only when there is a sample action */
+	if (esw_attr->sample && esw_attr->sample->sample_default_tbl) {
+		fdb = esw_attr->sample->sample_default_tbl;
+	} else if (split) {
 		fwd_attr.chain = attr->chain;
 		fwd_attr.prio = attr->prio;
 		fwd_attr.vport = esw_attr->in_rep->vport;
-- 
cgit v1.2.3


From c8129487441e11617fab9770d9f98d9ae8fc7b3a Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Fri, 2 Apr 2021 12:05:39 +0200
Subject: can: skb: alloc_can{,fd}_skb(): set "cf" to NULL if skb allocation
 fails

The handling of CAN bus errors typically consist of allocating a CAN
error SKB using alloc_can_err_skb() followed by stats handling and
filling the error details in the newly allocated CAN error SKB. Even
if the allocation of the SKB fails the stats handling should not be
skipped.

The common pattern in CAN drivers is to allocate the skb and work on
the struct can_frame pointer "cf", if it has been assigned by
alloc_can_err_skb().

|	skb = alloc_can_err_skb(priv->ndev, &cf);
|
| 	/* RX errors */
| 	if (bdiag1 & (MCP251XFD_REG_BDIAG1_DCRCERR |
| 		      MCP251XFD_REG_BDIAG1_NCRCERR)) {
| 		netdev_dbg(priv->ndev, "CRC error\n");
|
| 		stats->rx_errors++;
| 		if (cf)
| 			cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ;
| 	}

In case of an OOM alloc_can_err_skb() returns NULL, but doesn't set
"cf" to NULL as well. For the above pattern to work the "cf" has to be
initialized to NULL, which is easily forgotten.

To solve this kind of problems, set "cf" to NULL if
alloc_can_err_skb() returns NULL.

Link: https://lore.kernel.org/r/20210402102245.1512583-1-mkl@pengutronix.de
Suggested-by: Vincent MAILHOL <mailhol.vincent@wanadoo.fr>
Reviewed-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev/skb.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/dev/skb.c b/drivers/net/can/dev/skb.c
index 387c0bc0fb9c..61660248c69e 100644
--- a/drivers/net/can/dev/skb.c
+++ b/drivers/net/can/dev/skb.c
@@ -183,8 +183,11 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf)
 
 	skb = netdev_alloc_skb(dev, sizeof(struct can_skb_priv) +
 			       sizeof(struct can_frame));
-	if (unlikely(!skb))
+	if (unlikely(!skb)) {
+		*cf = NULL;
+
 		return NULL;
+	}
 
 	skb->protocol = htons(ETH_P_CAN);
 	skb->pkt_type = PACKET_BROADCAST;
@@ -211,8 +214,11 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev,
 
 	skb = netdev_alloc_skb(dev, sizeof(struct can_skb_priv) +
 			       sizeof(struct canfd_frame));
-	if (unlikely(!skb))
+	if (unlikely(!skb)) {
+		*cfd = NULL;
+
 		return NULL;
+	}
 
 	skb->protocol = htons(ETH_P_CANFD);
 	skb->pkt_type = PACKET_BROADCAST;
-- 
cgit v1.2.3


From 644022b1de9efc901494b8931c78700d49d35d3d Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Thu, 1 Apr 2021 10:37:31 +0200
Subject: can: m_can: m_can_receive_skb(): add missing error handling to
 can_rx_offload_queue_sorted() call

In commit 1be37d3b0414 ("can: m_can: fix periph RX path: use
rx-offload to ensure skbs are sent from softirq context") the RX path
for peripherals (i.e. SPI based m_can controllers) was converted to
the rx-offload infrastructure. However, the error handling for
can_rx_offload_queue_sorted() was forgotten.
can_rx_offload_queue_sorted() will return with an error if the
internal queue is full.

This patch adds the missing error handling, by increasing the
rx_fifo_errors.

Fixes: 1be37d3b0414 ("can: m_can: fix periph RX path: use rx-offload to ensure skbs are sent from softirq context")
Link: https://lore.kernel.org/r/20210401084515.1455013-1-mkl@pengutronix.de
Reported-by: coverity-bot <keescook+coverity-bot@chromium.org>
Addresses-Coverity-ID: 1503583 ("Error handling issues")
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Torin Cooper-Bennun <torin@maxiluxsystems.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/m_can/m_can.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index 890ed826a355..34073cd077e4 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -466,10 +466,17 @@ static void m_can_receive_skb(struct m_can_classdev *cdev,
 			      struct sk_buff *skb,
 			      u32 timestamp)
 {
-	if (cdev->is_peripheral)
-		can_rx_offload_queue_sorted(&cdev->offload, skb, timestamp);
-	else
+	if (cdev->is_peripheral) {
+		struct net_device_stats *stats = &cdev->net->stats;
+		int err;
+
+		err = can_rx_offload_queue_sorted(&cdev->offload, skb,
+						  timestamp);
+		if (err)
+			stats->rx_fifo_errors++;
+	} else {
 		netif_receive_skb(skb);
+	}
 }
 
 static void m_can_read_fifo(struct net_device *dev, u32 rxfs)
-- 
cgit v1.2.3


From 8dc987519ae97fd3b994871720ae5f089654b599 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Wed, 24 Jul 2019 11:51:32 +0200
Subject: can: c_can: remove unused enum BOSCH_C_CAN_PLATFORM

This patch removes the unused enum BOSCH_C_CAN_PLATFORM.

Link: https://lore.kernel.org/r/20210406110617.1865592-2-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/c_can/c_can.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/can/c_can/c_can.h b/drivers/net/can/c_can/c_can.h
index 8acedd9e63a7..06045f610f0e 100644
--- a/drivers/net/can/c_can/c_can.h
+++ b/drivers/net/can/c_can/c_can.h
@@ -149,7 +149,6 @@ static const u16 __maybe_unused reg_map_d_can[] = {
 };
 
 enum c_can_dev_id {
-	BOSCH_C_CAN_PLATFORM,
 	BOSCH_C_CAN,
 	BOSCH_D_CAN,
 };
-- 
cgit v1.2.3


From 0084e298acfe643c68daf0a316bb27618ec2a852 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Sun, 13 Dec 2020 17:25:15 +0100
Subject: can: mcp251xfd: add BQL support

This patch re-adds BQL support to the driver. Support for
netdev_xmit_more() will be added in a separate patch series.

Link: https://lore.kernel.org/r/20210406110617.1865592-3-mkl@pengutronix.de
Cc: Manivannan Sadhasivam <mani@kernel.org>
Cc: Thomas Kopp <thomas.kopp@microchip.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index 142eb4506b55..970dc570e7a5 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -335,6 +335,8 @@ static void mcp251xfd_ring_init(struct mcp251xfd_priv *priv)
 	u8 len;
 	int i, j;
 
+	netdev_reset_queue(priv->ndev);
+
 	/* TEF */
 	tef_ring = priv->tef;
 	tef_ring->head = 0;
@@ -1262,7 +1264,8 @@ mcp251xfd_handle_tefif_recover(const struct mcp251xfd_priv *priv, const u32 seq)
 
 static int
 mcp251xfd_handle_tefif_one(struct mcp251xfd_priv *priv,
-			   const struct mcp251xfd_hw_tef_obj *hw_tef_obj)
+			   const struct mcp251xfd_hw_tef_obj *hw_tef_obj,
+			   unsigned int *frame_len_ptr)
 {
 	struct net_device_stats *stats = &priv->ndev->stats;
 	struct sk_buff *skb;
@@ -1288,8 +1291,8 @@ mcp251xfd_handle_tefif_one(struct mcp251xfd_priv *priv,
 		mcp251xfd_skb_set_timestamp(priv, skb, hw_tef_obj->ts);
 	stats->tx_bytes +=
 		can_rx_offload_get_echo_skb(&priv->offload,
-					    tef_tail,
-					    hw_tef_obj->ts, NULL);
+					    tef_tail, hw_tef_obj->ts,
+					    frame_len_ptr);
 	stats->tx_packets++;
 	priv->tef->tail++;
 
@@ -1347,6 +1350,7 @@ mcp251xfd_tef_obj_read(const struct mcp251xfd_priv *priv,
 static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
 {
 	struct mcp251xfd_hw_tef_obj hw_tef_obj[MCP251XFD_TX_OBJ_NUM_MAX];
+	unsigned int total_frame_len = 0;
 	u8 tef_tail, len, l;
 	int err, i;
 
@@ -1368,7 +1372,9 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
 	}
 
 	for (i = 0; i < len; i++) {
-		err = mcp251xfd_handle_tefif_one(priv, &hw_tef_obj[i]);
+		unsigned int frame_len = 0;
+
+		err = mcp251xfd_handle_tefif_one(priv, &hw_tef_obj[i], &frame_len);
 		/* -EAGAIN means the Sequence Number in the TEF
 		 * doesn't match our tef_tail. This can happen if we
 		 * read the TEF objects too early. Leave loop let the
@@ -1378,6 +1384,8 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
 			goto out_netif_wake_queue;
 		if (err)
 			return err;
+
+		total_frame_len += frame_len;
 	}
 
  out_netif_wake_queue:
@@ -1403,6 +1411,7 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
 			return err;
 
 		tx_ring->tail += len;
+		netdev_completed_queue(priv->ndev, len, total_frame_len);
 
 		err = mcp251xfd_check_tef_tail(priv);
 		if (err)
@@ -2446,6 +2455,7 @@ static netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
 	struct mcp251xfd_priv *priv = netdev_priv(ndev);
 	struct mcp251xfd_tx_ring *tx_ring = priv->tx;
 	struct mcp251xfd_tx_obj *tx_obj;
+	unsigned int frame_len;
 	u8 tx_head;
 	int err;
 
@@ -2464,7 +2474,10 @@ static netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
 	if (mcp251xfd_get_tx_free(tx_ring) == 0)
 		netif_stop_queue(ndev);
 
-	can_put_echo_skb(skb, ndev, tx_head, 0);
+	frame_len = can_skb_get_frame_len(skb);
+	err = can_put_echo_skb(skb, ndev, tx_head, frame_len);
+	if (!err)
+		netdev_sent_queue(priv->ndev, frame_len);
 
 	err = mcp251xfd_tx_obj_write(priv, tx_obj);
 	if (err)
-- 
cgit v1.2.3


From ef7a8c3e75994f2bdaa27b481de39ea000119d11 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Mon, 15 Mar 2021 08:59:15 +0100
Subject: can: mcp251xfd: mcp251xfd_regmap_crc_read_one(): Factor out crc check
 into separate function

This patch factors out the crc check into a separate function. This is
preparation for the next patch.

Link: https://lore.kernel.org/r/20210406110617.1865592-4-mkl@pengutronix.de
Cc: Manivannan Sadhasivam <mani@kernel.org>
Cc: Thomas Kopp <thomas.kopp@microchip.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c | 30 ++++++++++++++++--------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c
index 314f868b3465..35557ac43c03 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c
@@ -232,13 +232,31 @@ mcp251xfd_regmap_crc_write(void *context,
 						 count - data_offset);
 }
 
+static int
+mcp251xfd_regmap_crc_read_check_crc(const struct mcp251xfd_map_buf_crc * const buf_rx,
+				    const struct mcp251xfd_map_buf_crc * const buf_tx,
+				    unsigned int data_len)
+{
+	u16 crc_received, crc_calculated;
+
+	crc_received = get_unaligned_be16(buf_rx->data + data_len);
+	crc_calculated = mcp251xfd_crc16_compute2(&buf_tx->cmd,
+						  sizeof(buf_tx->cmd),
+						  buf_rx->data,
+						  data_len);
+	if (crc_received != crc_calculated)
+		return -EBADMSG;
+
+	return 0;
+}
+
+
 static int
 mcp251xfd_regmap_crc_read_one(struct mcp251xfd_priv *priv,
 			      struct spi_message *msg, unsigned int data_len)
 {
 	const struct mcp251xfd_map_buf_crc *buf_rx = priv->map_buf_crc_rx;
 	const struct mcp251xfd_map_buf_crc *buf_tx = priv->map_buf_crc_tx;
-	u16 crc_received, crc_calculated;
 	int err;
 
 	BUILD_BUG_ON(sizeof(buf_rx->cmd) != sizeof(__be16) + sizeof(u8));
@@ -248,15 +266,7 @@ mcp251xfd_regmap_crc_read_one(struct mcp251xfd_priv *priv,
 	if (err)
 		return err;
 
-	crc_received = get_unaligned_be16(buf_rx->data + data_len);
-	crc_calculated = mcp251xfd_crc16_compute2(&buf_tx->cmd,
-						  sizeof(buf_tx->cmd),
-						  buf_rx->data,
-						  data_len);
-	if (crc_received != crc_calculated)
-		return -EBADMSG;
-
-	return 0;
+	return mcp251xfd_regmap_crc_read_check_crc(buf_rx, buf_tx, data_len);
 }
 
 static int
-- 
cgit v1.2.3


From c7eb923c3caf4c6a183465cc012dc368b199a4b2 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Mon, 15 Mar 2021 10:59:09 +0100
Subject: can: mcp251xfd: mcp251xfd_regmap_crc_read(): work around broken CRC
 on TBC register

MCP251XFD_REG_TBC is the time base counter register. It increments
once per SYS clock tick, which is 20 or 40 MHz. Observation shows that
if the lowest byte (which is transferred first on the SPI bus) of that
register is 0x00 or 0x80 the calculated CRC doesn't always match the
transferred one.

To reproduce this problem let the driver read the TBC register in a
high frequency. This can be done by attaching only the mcp251xfd CAN
controller to a valid terminated CAN bus and send a single CAN frame.
As there are no other CAN controller on the bus, the sent CAN frame is
not ACKed and the mcp251xfd repeats it. If user space enables the bus
error reporting, each of the NACK errors is reported with a time
stamp (which is read from the TBC register) to user space.

$ ip link set can0 down
$ ip link set can0 up type can bitrate 500000 berr-reporting on
$ cansend can0 4FF#ff.01.00.00.00.00.00.00

This leads to several error messages per second:

| mcp251xfd spi0.0 can0: CRC read error at address 0x0010 (length=4, data=00 3a 86 da, CRC=0x7753) retrying.
| mcp251xfd spi0.0 can0: CRC read error at address 0x0010 (length=4, data=80 01 b4 da, CRC=0x5830) retrying.
| mcp251xfd spi0.0 can0: CRC read error at address 0x0010 (length=4, data=00 e9 23 db, CRC=0xa723) retrying.
| mcp251xfd spi0.0 can0: CRC read error at address 0x0010 (length=4, data=00 8a 30 db, CRC=0x4a9c) retrying.
| mcp251xfd spi0.0 can0: CRC read error at address 0x0010 (length=4, data=80 f3 43 db, CRC=0x66d2) retrying.

If the highest bit in the lowest byte is flipped the transferred CRC
matches the calculated one. We assume for now the CRC calculation in
the chip works on wrong data and the transferred data is correct.

This patch implements the following workaround:

- If a CRC read error on the TBC register is detected and the lowest
  byte is 0x00 or 0x80, the highest bit of the lowest byte is flipped
  and the CRC is calculated again.
- If the CRC now matches, the _original_ data is passed to the reader.
  For now we assume transferred data was OK.

Link: https://lore.kernel.org/r/20210406110617.1865592-5-mkl@pengutronix.de
Cc: Manivannan Sadhasivam <mani@kernel.org>
Cc: Thomas Kopp <thomas.kopp@microchip.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c | 34 ++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c
index 35557ac43c03..297491516a26 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-regmap.c
@@ -321,6 +321,40 @@ mcp251xfd_regmap_crc_read(void *context,
 		if (err != -EBADMSG)
 			return err;
 
+		/* MCP251XFD_REG_TBC is the time base counter
+		 * register. It increments once per SYS clock tick,
+		 * which is 20 or 40 MHz.
+		 *
+		 * Observation shows that if the lowest byte (which is
+		 * transferred first on the SPI bus) of that register
+		 * is 0x00 or 0x80 the calculated CRC doesn't always
+		 * match the transferred one.
+		 *
+		 * If the highest bit in the lowest byte is flipped
+		 * the transferred CRC matches the calculated one. We
+		 * assume for now the CRC calculation in the chip
+		 * works on wrong data and the transferred data is
+		 * correct.
+		 */
+		if (reg == MCP251XFD_REG_TBC &&
+		    (buf_rx->data[0] == 0x0 || buf_rx->data[0] == 0x80)) {
+			/* Flip highest bit in lowest byte of le32 */
+			buf_rx->data[0] ^= 0x80;
+
+			/* re-check CRC */
+			err = mcp251xfd_regmap_crc_read_check_crc(buf_rx,
+								  buf_tx,
+								  val_len);
+			if (!err) {
+				/* If CRC is now correct, assume
+				 * transferred data was OK, flip bit
+				 * back to original value.
+				 */
+				buf_rx->data[0] ^= 0x80;
+				goto out;
+			}
+		}
+
 		/* MCP251XFD_REG_OSC is the first ever reg we read from.
 		 *
 		 * The chip may be in deep sleep and this SPI transfer
-- 
cgit v1.2.3


From daa83ab039546a906953f1821e493ddc980ed889 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Tue, 6 Apr 2021 17:15:57 -0700
Subject: mptcp: move flags and ifindex out of mptcp_addr_info

This patch moved the flags and ifindex fields from struct mptcp_addr_info
to struct mptcp_pm_addr_entry. Add the flags and ifindex values as two new
parameters to __mptcp_subflow_connect.

In mptcp_pm_create_subflow_or_signal_addr, pass the local address entry's
flags and ifindex fields to __mptcp_subflow_connect.

In mptcp_pm_nl_add_addr_received, just pass two zeros to it.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/pm_netlink.c | 41 ++++++++++++++++++++++-------------------
 net/mptcp/protocol.h   |  5 ++---
 net/mptcp/subflow.c    |  7 ++++---
 3 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 51be6c34b339..6ba040897738 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -25,6 +25,8 @@ static int pm_nl_pernet_id;
 struct mptcp_pm_addr_entry {
 	struct list_head	list;
 	struct mptcp_addr_info	addr;
+	u8			flags;
+	int			ifindex;
 	struct rcu_head		rcu;
 	struct socket		*lsk;
 };
@@ -168,7 +170,7 @@ select_local_address(const struct pm_nl_pernet *pernet,
 	rcu_read_lock();
 	__mptcp_flush_join_list(msk);
 	list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
-		if (!(entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW))
+		if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW))
 			continue;
 
 		if (entry->addr.family != sk->sk_family) {
@@ -206,7 +208,7 @@ select_signal_address(struct pm_nl_pernet *pernet, unsigned int pos)
 	 * can lead to additional addresses not being announced.
 	 */
 	list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
-		if (!(entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL))
+		if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL))
 			continue;
 		if (i++ == pos) {
 			ret = entry;
@@ -459,7 +461,8 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
 			check_work_pending(msk);
 			remote_address((struct sock_common *)sk, &remote);
 			spin_unlock_bh(&msk->pm.lock);
-			__mptcp_subflow_connect(sk, &local->addr, &remote);
+			__mptcp_subflow_connect(sk, &local->addr, &remote,
+						local->flags, local->ifindex);
 			spin_lock_bh(&msk->pm.lock);
 			return;
 		}
@@ -514,7 +517,7 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
 	local.family = remote.family;
 
 	spin_unlock_bh(&msk->pm.lock);
-	__mptcp_subflow_connect(sk, &local, &remote);
+	__mptcp_subflow_connect(sk, &local, &remote, 0, 0);
 	spin_lock_bh(&msk->pm.lock);
 
 add_addr_echo:
@@ -683,7 +686,7 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk)
 
 static bool address_use_port(struct mptcp_pm_addr_entry *entry)
 {
-	return (entry->addr.flags &
+	return (entry->flags &
 		(MPTCP_PM_ADDR_FLAG_SIGNAL | MPTCP_PM_ADDR_FLAG_SUBFLOW)) ==
 		MPTCP_PM_ADDR_FLAG_SIGNAL;
 }
@@ -735,11 +738,11 @@ find_next:
 	if (entry->addr.id > pernet->next_id)
 		pernet->next_id = entry->addr.id;
 
-	if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
+	if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
 		addr_max = pernet->add_addr_signal_max;
 		WRITE_ONCE(pernet->add_addr_signal_max, addr_max + 1);
 	}
-	if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) {
+	if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) {
 		addr_max = pernet->local_addr_max;
 		WRITE_ONCE(pernet->local_addr_max, addr_max + 1);
 	}
@@ -841,10 +844,10 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
 		return -ENOMEM;
 
 	entry->addr = skc_local;
-	entry->addr.ifindex = 0;
-	entry->addr.flags = 0;
 	entry->addr.id = 0;
 	entry->addr.port = 0;
+	entry->ifindex = 0;
+	entry->flags = 0;
 	entry->lsk = NULL;
 	ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
 	if (ret < 0)
@@ -959,14 +962,14 @@ skip_family:
 	if (tb[MPTCP_PM_ADDR_ATTR_IF_IDX]) {
 		u32 val = nla_get_s32(tb[MPTCP_PM_ADDR_ATTR_IF_IDX]);
 
-		entry->addr.ifindex = val;
+		entry->ifindex = val;
 	}
 
 	if (tb[MPTCP_PM_ADDR_ATTR_ID])
 		entry->addr.id = nla_get_u8(tb[MPTCP_PM_ADDR_ATTR_ID]);
 
 	if (tb[MPTCP_PM_ADDR_ATTR_FLAGS])
-		entry->addr.flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);
+		entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);
 
 	if (tb[MPTCP_PM_ADDR_ATTR_PORT])
 		entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]));
@@ -1218,11 +1221,11 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
 		spin_unlock_bh(&pernet->lock);
 		return -EINVAL;
 	}
-	if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
+	if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
 		addr_max = pernet->add_addr_signal_max;
 		WRITE_ONCE(pernet->add_addr_signal_max, addr_max - 1);
 	}
-	if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) {
+	if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) {
 		addr_max = pernet->local_addr_max;
 		WRITE_ONCE(pernet->local_addr_max, addr_max - 1);
 	}
@@ -1338,10 +1341,10 @@ static int mptcp_nl_fill_addr(struct sk_buff *skb,
 		goto nla_put_failure;
 	if (nla_put_u8(skb, MPTCP_PM_ADDR_ATTR_ID, addr->id))
 		goto nla_put_failure;
-	if (nla_put_u32(skb, MPTCP_PM_ADDR_ATTR_FLAGS, entry->addr.flags))
+	if (nla_put_u32(skb, MPTCP_PM_ADDR_ATTR_FLAGS, entry->flags))
 		goto nla_put_failure;
-	if (entry->addr.ifindex &&
-	    nla_put_s32(skb, MPTCP_PM_ADDR_ATTR_IF_IDX, entry->addr.ifindex))
+	if (entry->ifindex &&
+	    nla_put_s32(skb, MPTCP_PM_ADDR_ATTR_IF_IDX, entry->ifindex))
 		goto nla_put_failure;
 
 	if (addr->family == AF_INET &&
@@ -1569,7 +1572,7 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
 	if (ret < 0)
 		return ret;
 
-	if (addr.addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
+	if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
 		bkup = 1;
 
 	list_for_each_entry(entry, &pernet->local_addr_list, list) {
@@ -1579,9 +1582,9 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
 				return ret;
 
 			if (bkup)
-				entry->addr.flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+				entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
 			else
-				entry->addr.flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
+				entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
 		}
 	}
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 40e9b05856cd..cb5dad522f39 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -173,8 +173,6 @@ struct mptcp_addr_info {
 	sa_family_t		family;
 	__be16			port;
 	u8			id;
-	u8			flags;
-	int			ifindex;
 	union {
 		struct in_addr addr;
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
@@ -557,7 +555,8 @@ struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
 
 /* called with sk socket lock held */
 int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
-			    const struct mptcp_addr_info *remote);
+			    const struct mptcp_addr_info *remote,
+			    u8 flags, int ifindex);
 int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);
 void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
 			 struct sockaddr_storage *addr,
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 223d6be5fc3b..3c19a5265a0f 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1256,7 +1256,8 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
 }
 
 int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
-			    const struct mptcp_addr_info *remote)
+			    const struct mptcp_addr_info *remote,
+			    u8 flags, int ifindex)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
 	struct mptcp_subflow_context *subflow;
@@ -1300,7 +1301,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
 	if (addr.ss_family == AF_INET6)
 		addrlen = sizeof(struct sockaddr_in6);
 #endif
-	ssk->sk_bound_dev_if = loc->ifindex;
+	ssk->sk_bound_dev_if = ifindex;
 	err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
 	if (err)
 		goto failed;
@@ -1312,7 +1313,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
 	subflow->local_id = local_id;
 	subflow->remote_id = remote_id;
 	subflow->request_join = 1;
-	subflow->request_bkup = !!(loc->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
+	subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
 	mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
 
 	mptcp_add_pending_subflow(msk, subflow);
-- 
cgit v1.2.3


From 30f60bae80922582a16e80b070171a865fce58cd Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Tue, 6 Apr 2021 17:15:58 -0700
Subject: mptcp: use mptcp_addr_info in mptcp_out_options

This patch moved the mptcp_addr_info struct from protocol.h to mptcp.h,
added a new struct mptcp_addr_info member addr in struct mptcp_out_options,
and dropped the original addr, addr6, addr_id and port fields in it. Then
we can use opts->addr to get the adding address from PM directly using
mptcp_pm_add_addr_signal.

Since the port number became big-endian now, use ntohs to convert it
before sending it out with the ADD_ADDR suboption. Also convert it
when passing it to add_addr_generate_hmac or printing it out.

Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/mptcp.h  | 21 +++++++++++++--------
 net/mptcp/options.c  | 44 ++++++++++++++++++++------------------------
 net/mptcp/protocol.h | 12 ------------
 3 files changed, 33 insertions(+), 44 deletions(-)

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 16fe34d139c3..83f23774b908 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -41,20 +41,25 @@ struct mptcp_rm_list {
 	u8 nr;
 };
 
+struct mptcp_addr_info {
+	u8			id;
+	sa_family_t		family;
+	__be16			port;
+	union {
+		struct in_addr	addr;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+		struct in6_addr	addr6;
+#endif
+	};
+};
+
 struct mptcp_out_options {
 #if IS_ENABLED(CONFIG_MPTCP)
 	u16 suboptions;
 	u64 sndr_key;
 	u64 rcvr_key;
-	union {
-		struct in_addr addr;
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
-		struct in6_addr addr6;
-#endif
-	};
-	u8 addr_id;
-	u16 port;
 	u64 ahmac;
+	struct mptcp_addr_info addr;
 	struct mptcp_rm_list rm_list;
 	u8 join_id;
 	u8 backup;
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 4b7119eb2c31..352c128337a7 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -626,7 +626,6 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
 	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
 	bool drop_other_suboptions = false;
 	unsigned int opt_size = *size;
-	struct mptcp_addr_info saddr;
 	bool echo;
 	bool port;
 	int len;
@@ -643,45 +642,40 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
 	}
 
 	if (!mptcp_pm_should_add_signal(msk) ||
-	    !(mptcp_pm_add_addr_signal(msk, remaining, &saddr, &echo, &port)))
+	    !(mptcp_pm_add_addr_signal(msk, remaining, &opts->addr, &echo, &port)))
 		return false;
 
-	len = mptcp_add_addr_len(saddr.family, echo, port);
+	len = mptcp_add_addr_len(opts->addr.family, echo, port);
 	if (remaining < len)
 		return false;
 
 	*size = len;
 	if (drop_other_suboptions)
 		*size -= opt_size;
-	opts->addr_id = saddr.id;
-	if (port)
-		opts->port = ntohs(saddr.port);
-	if (saddr.family == AF_INET) {
+	if (opts->addr.family == AF_INET) {
 		opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
-		opts->addr = saddr.addr;
 		if (!echo) {
 			opts->ahmac = add_addr_generate_hmac(msk->local_key,
 							     msk->remote_key,
-							     opts->addr_id,
-							     &opts->addr,
-							     opts->port);
+							     opts->addr.id,
+							     &opts->addr.addr,
+							     ntohs(opts->addr.port));
 		}
 	}
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
-	else if (saddr.family == AF_INET6) {
+	else if (opts->addr.family == AF_INET6) {
 		opts->suboptions |= OPTION_MPTCP_ADD_ADDR6;
-		opts->addr6 = saddr.addr6;
 		if (!echo) {
 			opts->ahmac = add_addr6_generate_hmac(msk->local_key,
 							      msk->remote_key,
-							      opts->addr_id,
-							      &opts->addr6,
-							      opts->port);
+							      opts->addr.id,
+							      &opts->addr.addr6,
+							      ntohs(opts->addr.port));
 		}
 	}
 #endif
 	pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d",
-		 opts->addr_id, opts->ahmac, echo, opts->port);
+		 opts->addr.id, opts->ahmac, echo, ntohs(opts->addr.port));
 
 	return true;
 }
@@ -1217,7 +1211,7 @@ mp_capable_done:
 			len = TCPOLEN_MPTCP_ADD_ADDR6_BASE;
 #endif
 
-		if (opts->port)
+		if (opts->addr.port)
 			len += TCPOLEN_MPTCP_PORT_LEN;
 
 		if (opts->ahmac) {
@@ -1226,28 +1220,30 @@ mp_capable_done:
 		}
 
 		*ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR,
-				      len, echo, opts->addr_id);
+				      len, echo, opts->addr.id);
 		if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
-			memcpy((u8 *)ptr, (u8 *)&opts->addr.s_addr, 4);
+			memcpy((u8 *)ptr, (u8 *)&opts->addr.addr.s_addr, 4);
 			ptr += 1;
 		}
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
 		else if (OPTION_MPTCP_ADD_ADDR6 & opts->suboptions) {
-			memcpy((u8 *)ptr, opts->addr6.s6_addr, 16);
+			memcpy((u8 *)ptr, opts->addr.addr6.s6_addr, 16);
 			ptr += 4;
 		}
 #endif
 
-		if (!opts->port) {
+		if (!opts->addr.port) {
 			if (opts->ahmac) {
 				put_unaligned_be64(opts->ahmac, ptr);
 				ptr += 2;
 			}
 		} else {
+			u16 port = ntohs(opts->addr.port);
+
 			if (opts->ahmac) {
 				u8 *bptr = (u8 *)ptr;
 
-				put_unaligned_be16(opts->port, bptr);
+				put_unaligned_be16(port, bptr);
 				bptr += 2;
 				put_unaligned_be64(opts->ahmac, bptr);
 				bptr += 8;
@@ -1256,7 +1252,7 @@ mp_capable_done:
 
 				ptr += 3;
 			} else {
-				put_unaligned_be32(opts->port << 16 |
+				put_unaligned_be32(port << 16 |
 						   TCPOPT_NOP << 8 |
 						   TCPOPT_NOP, ptr);
 				ptr += 1;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index cb5dad522f39..4890dbb9f710 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -169,18 +169,6 @@ static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
 		     ((nib & 0xF) << 8) | field);
 }
 
-struct mptcp_addr_info {
-	sa_family_t		family;
-	__be16			port;
-	u8			id;
-	union {
-		struct in_addr addr;
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
-		struct in6_addr addr6;
-#endif
-	};
-};
-
 enum mptcp_pm_status {
 	MPTCP_PM_ADD_ADDR_RECEIVED,
 	MPTCP_PM_ADD_ADDR_SEND_ACK,
-- 
cgit v1.2.3


From fef6b7ecfbd465b7e27530e797c00b7384f78e44 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Tue, 6 Apr 2021 17:15:59 -0700
Subject: mptcp: drop OPTION_MPTCP_ADD_ADDR6

Since the family field was added in struct mptcp_out_options, no need to
use OPTION_MPTCP_ADD_ADDR6 to identify the IPv6 address. Drop it.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/options.c  | 15 +++++----------
 net/mptcp/protocol.h |  9 ++++-----
 2 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 352c128337a7..3a4c939b3aff 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -652,8 +652,8 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
 	*size = len;
 	if (drop_other_suboptions)
 		*size -= opt_size;
+	opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
 	if (opts->addr.family == AF_INET) {
-		opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
 		if (!echo) {
 			opts->ahmac = add_addr_generate_hmac(msk->local_key,
 							     msk->remote_key,
@@ -664,7 +664,6 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
 	}
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
 	else if (opts->addr.family == AF_INET6) {
-		opts->suboptions |= OPTION_MPTCP_ADD_ADDR6;
 		if (!echo) {
 			opts->ahmac = add_addr6_generate_hmac(msk->local_key,
 							      msk->remote_key,
@@ -1198,16 +1197,12 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
 	}
 
 mp_capable_done:
-	if ((OPTION_MPTCP_ADD_ADDR
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
-	     | OPTION_MPTCP_ADD_ADDR6
-#endif
-	    ) & opts->suboptions) {
+	if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
 		u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE;
 		u8 echo = MPTCP_ADDR_ECHO;
 
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
-		if (OPTION_MPTCP_ADD_ADDR6 & opts->suboptions)
+		if (opts->addr.family == AF_INET6)
 			len = TCPOLEN_MPTCP_ADD_ADDR6_BASE;
 #endif
 
@@ -1221,12 +1216,12 @@ mp_capable_done:
 
 		*ptr++ = mptcp_option(MPTCPOPT_ADD_ADDR,
 				      len, echo, opts->addr.id);
-		if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
+		if (opts->addr.family == AF_INET) {
 			memcpy((u8 *)ptr, (u8 *)&opts->addr.addr.s_addr, 4);
 			ptr += 1;
 		}
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
-		else if (OPTION_MPTCP_ADD_ADDR6 & opts->suboptions) {
+		else if (opts->addr.family == AF_INET6) {
 			memcpy((u8 *)ptr, opts->addr.addr6.s6_addr, 16);
 			ptr += 4;
 		}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 4890dbb9f710..7c5fd06ceaf2 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -22,11 +22,10 @@
 #define OPTION_MPTCP_MPJ_SYNACK	BIT(4)
 #define OPTION_MPTCP_MPJ_ACK	BIT(5)
 #define OPTION_MPTCP_ADD_ADDR	BIT(6)
-#define OPTION_MPTCP_ADD_ADDR6	BIT(7)
-#define OPTION_MPTCP_RM_ADDR	BIT(8)
-#define OPTION_MPTCP_FASTCLOSE	BIT(9)
-#define OPTION_MPTCP_PRIO	BIT(10)
-#define OPTION_MPTCP_RST	BIT(11)
+#define OPTION_MPTCP_RM_ADDR	BIT(7)
+#define OPTION_MPTCP_FASTCLOSE	BIT(8)
+#define OPTION_MPTCP_PRIO	BIT(9)
+#define OPTION_MPTCP_RST	BIT(10)
 
 /* MPTCP option subtypes */
 #define MPTCPOPT_MP_CAPABLE	0
-- 
cgit v1.2.3


From f7dafee18538a3933f2cb9d4dcf8b98ff1276ffb Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Tue, 6 Apr 2021 17:16:00 -0700
Subject: mptcp: use mptcp_addr_info in mptcp_options_received

This patch added a new struct mptcp_addr_info member addr in struct
mptcp_options_received, and dropped the original family, addr_id, addr,
addr6 and port fields in it. Then we can pass the parameter mp_opt.addr
directly to mptcp_pm_add_addr_received and mptcp_pm_add_addr_echoed.

Since the port number became big-endian now, use htons to convert the
incoming port number to it. Also use ntohs to convert it when passing
it to add_addr_generate_hmac or printing it out.

Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/options.c  | 58 ++++++++++++++++++++--------------------------------
 net/mptcp/protocol.h | 10 +--------
 2 files changed, 23 insertions(+), 45 deletions(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 3a4c939b3aff..8d28f2e0de82 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -220,45 +220,45 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 		if (!mp_opt->echo) {
 			if (opsize == TCPOLEN_MPTCP_ADD_ADDR ||
 			    opsize == TCPOLEN_MPTCP_ADD_ADDR_PORT)
-				mp_opt->family = MPTCP_ADDR_IPVERSION_4;
+				mp_opt->addr.family = MPTCP_ADDR_IPVERSION_4;
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
 			else if (opsize == TCPOLEN_MPTCP_ADD_ADDR6 ||
 				 opsize == TCPOLEN_MPTCP_ADD_ADDR6_PORT)
-				mp_opt->family = MPTCP_ADDR_IPVERSION_6;
+				mp_opt->addr.family = MPTCP_ADDR_IPVERSION_6;
 #endif
 			else
 				break;
 		} else {
 			if (opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE ||
 			    opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT)
-				mp_opt->family = MPTCP_ADDR_IPVERSION_4;
+				mp_opt->addr.family = MPTCP_ADDR_IPVERSION_4;
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
 			else if (opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE ||
 				 opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT)
-				mp_opt->family = MPTCP_ADDR_IPVERSION_6;
+				mp_opt->addr.family = MPTCP_ADDR_IPVERSION_6;
 #endif
 			else
 				break;
 		}
 
 		mp_opt->add_addr = 1;
-		mp_opt->addr_id = *ptr++;
-		if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) {
-			memcpy((u8 *)&mp_opt->addr.s_addr, (u8 *)ptr, 4);
+		mp_opt->addr.id = *ptr++;
+		if (mp_opt->addr.family == MPTCP_ADDR_IPVERSION_4) {
+			memcpy((u8 *)&mp_opt->addr.addr.s_addr, (u8 *)ptr, 4);
 			ptr += 4;
 			if (opsize == TCPOLEN_MPTCP_ADD_ADDR_PORT ||
 			    opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT) {
-				mp_opt->port = get_unaligned_be16(ptr);
+				mp_opt->addr.port = htons(get_unaligned_be16(ptr));
 				ptr += 2;
 			}
 		}
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
 		else {
-			memcpy(mp_opt->addr6.s6_addr, (u8 *)ptr, 16);
+			memcpy(mp_opt->addr.addr6.s6_addr, (u8 *)ptr, 16);
 			ptr += 16;
 			if (opsize == TCPOLEN_MPTCP_ADD_ADDR6_PORT ||
 			    opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT) {
-				mp_opt->port = get_unaligned_be16(ptr);
+				mp_opt->addr.port = htons(get_unaligned_be16(ptr));
 				ptr += 2;
 			}
 		}
@@ -268,8 +268,8 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 			ptr += 8;
 		}
 		pr_debug("ADD_ADDR%s: id=%d, ahmac=%llu, echo=%d, port=%d",
-			 (mp_opt->family == MPTCP_ADDR_IPVERSION_6) ? "6" : "",
-			 mp_opt->addr_id, mp_opt->ahmac, mp_opt->echo, mp_opt->port);
+			 (mp_opt->addr.family == MPTCP_ADDR_IPVERSION_6) ? "6" : "",
+			 mp_opt->addr.id, mp_opt->ahmac, mp_opt->echo, ntohs(mp_opt->addr.port));
 		break;
 
 	case MPTCPOPT_RM_ADDR:
@@ -335,7 +335,7 @@ void mptcp_get_options(const struct sk_buff *skb,
 	mp_opt->add_addr = 0;
 	mp_opt->ahmac = 0;
 	mp_opt->fastclose = 0;
-	mp_opt->port = 0;
+	mp_opt->addr.port = 0;
 	mp_opt->rm_addr = 0;
 	mp_opt->dss = 0;
 	mp_opt->mp_prio = 0;
@@ -991,17 +991,17 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk,
 	if (mp_opt->echo)
 		return true;
 
-	if (mp_opt->family == MPTCP_ADDR_IPVERSION_4)
+	if (mp_opt->addr.family == MPTCP_ADDR_IPVERSION_4)
 		hmac = add_addr_generate_hmac(msk->remote_key,
 					      msk->local_key,
-					      mp_opt->addr_id, &mp_opt->addr,
-					      mp_opt->port);
+					      mp_opt->addr.id, &mp_opt->addr.addr,
+					      ntohs(mp_opt->addr.port));
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
 	else
 		hmac = add_addr6_generate_hmac(msk->remote_key,
 					       msk->local_key,
-					       mp_opt->addr_id, &mp_opt->addr6,
-					       mp_opt->port);
+					       mp_opt->addr.id, &mp_opt->addr.addr6,
+					       ntohs(mp_opt->addr.port));
 #endif
 
 	pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n",
@@ -1043,30 +1043,16 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
 	}
 
 	if (mp_opt.add_addr && add_addr_hmac_valid(msk, &mp_opt)) {
-		struct mptcp_addr_info addr;
-
-		addr.port = htons(mp_opt.port);
-		addr.id = mp_opt.addr_id;
-		if (mp_opt.family == MPTCP_ADDR_IPVERSION_4) {
-			addr.family = AF_INET;
-			addr.addr = mp_opt.addr;
-		}
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
-		else if (mp_opt.family == MPTCP_ADDR_IPVERSION_6) {
-			addr.family = AF_INET6;
-			addr.addr6 = mp_opt.addr6;
-		}
-#endif
 		if (!mp_opt.echo) {
-			mptcp_pm_add_addr_received(msk, &addr);
+			mptcp_pm_add_addr_received(msk, &mp_opt.addr);
 			MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR);
 		} else {
-			mptcp_pm_add_addr_echoed(msk, &addr);
-			mptcp_pm_del_add_timer(msk, &addr);
+			mptcp_pm_add_addr_echoed(msk, &mp_opt.addr);
+			mptcp_pm_del_add_timer(msk, &mp_opt.addr);
 			MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD);
 		}
 
-		if (mp_opt.port)
+		if (mp_opt.addr.port)
 			MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_PORTADD);
 
 		mp_opt.add_addr = 0;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 7c5fd06ceaf2..ca3013facbba 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -133,7 +133,6 @@ struct mptcp_options_received {
 		add_addr : 1,
 		rm_addr : 1,
 		mp_prio : 1,
-		family : 4,
 		echo : 1,
 		backup : 1;
 	u32	token;
@@ -148,16 +147,9 @@ struct mptcp_options_received {
 		ack64:1,
 		mpc_map:1,
 		__unused:2;
-	u8	addr_id;
+	struct mptcp_addr_info addr;
 	struct mptcp_rm_list rm_list;
-	union {
-		struct in_addr	addr;
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
-		struct in6_addr	addr6;
-#endif
-	};
 	u64	ahmac;
-	u16	port;
 	u8	reset_reason:4;
 	u8	reset_transient:1;
 };
-- 
cgit v1.2.3


From 1b1a6ef597c7f662da847499d02ad519c1a8b1b3 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Tue, 6 Apr 2021 17:16:01 -0700
Subject: mptcp: drop MPTCP_ADDR_IPVERSION_4/6

Since the type of the address family in struct mptcp_options_received
became sa_family_t, we should set AF_INET/AF_INET6 to it, instead of
using MPTCP_ADDR_IPVERSION_4/6.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/options.c  | 14 +++++++-------
 net/mptcp/protocol.h |  2 --
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 8d28f2e0de82..3bdb92a3b480 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -220,22 +220,22 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 		if (!mp_opt->echo) {
 			if (opsize == TCPOLEN_MPTCP_ADD_ADDR ||
 			    opsize == TCPOLEN_MPTCP_ADD_ADDR_PORT)
-				mp_opt->addr.family = MPTCP_ADDR_IPVERSION_4;
+				mp_opt->addr.family = AF_INET;
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
 			else if (opsize == TCPOLEN_MPTCP_ADD_ADDR6 ||
 				 opsize == TCPOLEN_MPTCP_ADD_ADDR6_PORT)
-				mp_opt->addr.family = MPTCP_ADDR_IPVERSION_6;
+				mp_opt->addr.family = AF_INET6;
 #endif
 			else
 				break;
 		} else {
 			if (opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE ||
 			    opsize == TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT)
-				mp_opt->addr.family = MPTCP_ADDR_IPVERSION_4;
+				mp_opt->addr.family = AF_INET;
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
 			else if (opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE ||
 				 opsize == TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT)
-				mp_opt->addr.family = MPTCP_ADDR_IPVERSION_6;
+				mp_opt->addr.family = AF_INET6;
 #endif
 			else
 				break;
@@ -243,7 +243,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 
 		mp_opt->add_addr = 1;
 		mp_opt->addr.id = *ptr++;
-		if (mp_opt->addr.family == MPTCP_ADDR_IPVERSION_4) {
+		if (mp_opt->addr.family == AF_INET) {
 			memcpy((u8 *)&mp_opt->addr.addr.s_addr, (u8 *)ptr, 4);
 			ptr += 4;
 			if (opsize == TCPOLEN_MPTCP_ADD_ADDR_PORT ||
@@ -268,7 +268,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 			ptr += 8;
 		}
 		pr_debug("ADD_ADDR%s: id=%d, ahmac=%llu, echo=%d, port=%d",
-			 (mp_opt->addr.family == MPTCP_ADDR_IPVERSION_6) ? "6" : "",
+			 (mp_opt->addr.family == AF_INET6) ? "6" : "",
 			 mp_opt->addr.id, mp_opt->ahmac, mp_opt->echo, ntohs(mp_opt->addr.port));
 		break;
 
@@ -991,7 +991,7 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk,
 	if (mp_opt->echo)
 		return true;
 
-	if (mp_opt->addr.family == MPTCP_ADDR_IPVERSION_4)
+	if (mp_opt->addr.family == AF_INET)
 		hmac = add_addr_generate_hmac(msk->remote_key,
 					      msk->local_key,
 					      mp_opt->addr.id, &mp_opt->addr.addr,
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index ca3013facbba..d8de1e961ab0 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -90,8 +90,6 @@
 
 /* MPTCP ADD_ADDR flags */
 #define MPTCP_ADDR_ECHO		BIT(0)
-#define MPTCP_ADDR_IPVERSION_4	4
-#define MPTCP_ADDR_IPVERSION_6	6
 
 /* MPTCP MP_PRIO flags */
 #define MPTCP_PRIO_BKUP		BIT(0)
-- 
cgit v1.2.3


From 761c124ed9698581e88d7babb9001401724435dd Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Tue, 6 Apr 2021 17:16:02 -0700
Subject: mptcp: unify add_addr(6)_generate_hmac

The length of the IPv4 address is 4 octets and IPv6 is 16. That's the only
difference between add_addr_generate_hmac and add_addr6_generate_hmac.

This patch dropped the duplicate code and unify them into one.

Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/options.c | 81 +++++++++++++++++------------------------------------
 1 file changed, 25 insertions(+), 56 deletions(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 3bdb92a3b480..c7eb61d0564c 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -583,39 +583,32 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
 	return true;
 }
 
-static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id,
-				  struct in_addr *addr, u16 port)
-{
-	u8 hmac[SHA256_DIGEST_SIZE];
-	u8 msg[7];
-
-	msg[0] = addr_id;
-	memcpy(&msg[1], &addr->s_addr, 4);
-	msg[5] = port >> 8;
-	msg[6] = port & 0xFF;
-
-	mptcp_crypto_hmac_sha(key1, key2, msg, 7, hmac);
-
-	return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]);
-}
-
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
-static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id,
-				   struct in6_addr *addr, u16 port)
+static u64 add_addr_generate_hmac(u64 key1, u64 key2,
+				  struct mptcp_addr_info *addr)
 {
+	u16 port = ntohs(addr->port);
 	u8 hmac[SHA256_DIGEST_SIZE];
 	u8 msg[19];
+	int i = 0;
 
-	msg[0] = addr_id;
-	memcpy(&msg[1], &addr->s6_addr, 16);
-	msg[17] = port >> 8;
-	msg[18] = port & 0xFF;
+	msg[i++] = addr->id;
+	if (addr->family == AF_INET) {
+		memcpy(&msg[i], &addr->addr.s_addr, 4);
+		i += 4;
+	}
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+	else if (addr->family == AF_INET6) {
+		memcpy(&msg[i], &addr->addr6.s6_addr, 16);
+		i += 16;
+	}
+#endif
+	msg[i++] = port >> 8;
+	msg[i++] = port & 0xFF;
 
-	mptcp_crypto_hmac_sha(key1, key2, msg, 19, hmac);
+	mptcp_crypto_hmac_sha(key1, key2, msg, i, hmac);
 
 	return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]);
 }
-#endif
 
 static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *skb,
 					       unsigned int *size,
@@ -653,26 +646,11 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
 	if (drop_other_suboptions)
 		*size -= opt_size;
 	opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
-	if (opts->addr.family == AF_INET) {
-		if (!echo) {
-			opts->ahmac = add_addr_generate_hmac(msk->local_key,
-							     msk->remote_key,
-							     opts->addr.id,
-							     &opts->addr.addr,
-							     ntohs(opts->addr.port));
-		}
+	if (!echo) {
+		opts->ahmac = add_addr_generate_hmac(msk->local_key,
+						     msk->remote_key,
+						     &opts->addr);
 	}
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
-	else if (opts->addr.family == AF_INET6) {
-		if (!echo) {
-			opts->ahmac = add_addr6_generate_hmac(msk->local_key,
-							      msk->remote_key,
-							      opts->addr.id,
-							      &opts->addr.addr6,
-							      ntohs(opts->addr.port));
-		}
-	}
-#endif
 	pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d",
 		 opts->addr.id, opts->ahmac, echo, ntohs(opts->addr.port));
 
@@ -991,18 +969,9 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk,
 	if (mp_opt->echo)
 		return true;
 
-	if (mp_opt->addr.family == AF_INET)
-		hmac = add_addr_generate_hmac(msk->remote_key,
-					      msk->local_key,
-					      mp_opt->addr.id, &mp_opt->addr.addr,
-					      ntohs(mp_opt->addr.port));
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
-	else
-		hmac = add_addr6_generate_hmac(msk->remote_key,
-					       msk->local_key,
-					       mp_opt->addr.id, &mp_opt->addr.addr6,
-					       ntohs(mp_opt->addr.port));
-#endif
+	hmac = add_addr_generate_hmac(msk->remote_key,
+				      msk->local_key,
+				      &mp_opt->addr);
 
 	pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n",
 		 msk, (unsigned long long)hmac,
-- 
cgit v1.2.3


From c3eaa5f667cb60ea23c1bbc59e83c4f71de18e48 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Tue, 6 Apr 2021 17:16:03 -0700
Subject: selftests: mptcp: add the net device name testcase

This patch added a new testcase for setting the net device name. In it,
pass the net device name to pm_nl_ctl to set the ifindex field of struct
mptcp_pm_addr_entry.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index abeb24b7f8ec..fd99485cf2a4 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -777,6 +777,14 @@ subflows_tests()
 	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "multiple subflows, limited by server" 2 2 1
+
+	# single subflow, dev
+	reset
+	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow dev ns2eth3
+	run_tests $ns1 $ns2 10.0.1.1
+	chk_join_nr "single subflow, dev" 1 1 1
 }
 
 signal_address_tests()
-- 
cgit v1.2.3


From 07f8252fe0e3c2b6320eeff18bdc5b7fb8845cb3 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Tue, 6 Apr 2021 17:16:04 -0700
Subject: mptcp: drop all sub-options except ADD_ADDR when the echo bit is set

Current Linux carries echo-ed ADD_ADDR over pure TCP ACKs, so there is no
need to add a DSS element that would fit only ADD_ADDR with IPv4 address.
Drop the DSS from echo-ed ADD_ADDR, regardless of the IP version.

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/options.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index c7eb61d0564c..d51c3ad54d9a 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -624,7 +624,8 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
 	int len;
 
 	if ((mptcp_pm_should_add_signal_ipv6(msk) ||
-	     mptcp_pm_should_add_signal_port(msk)) &&
+	     mptcp_pm_should_add_signal_port(msk) ||
+	     mptcp_pm_should_add_signal_echo(msk)) &&
 	    skb && skb_is_tcp_pure_ack(skb)) {
 		pr_debug("drop other suboptions");
 		opts->suboptions = 0;
-- 
cgit v1.2.3


From 56f15e2cb1f77fbcf9df38de7e5dcb4b37070196 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 6 Apr 2021 17:23:59 -0700
Subject: ethtool: document PHY tunable callbacks

Add missing kdoc for phy tunable callbacks.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 3583f7fc075c..5c631a298994 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -410,6 +410,8 @@ struct ethtool_pause_stats {
  * @get_ethtool_phy_stats: Return extended statistics about the PHY device.
  *	This is only useful if the device maintains PHY statistics and
  *	cannot use the standard PHY library helpers.
+ * @get_phy_tunable: Read the value of a PHY tunable.
+ * @set_phy_tunable: Set the value of a PHY tunable.
  *
  * All operations are optional (i.e. the function pointer may be set
  * to %NULL) and callers must take this into account.  Callers must
-- 
cgit v1.2.3


From 017d6250ad719d3ddab9070446db1e9fd4ba4a65 Mon Sep 17 00:00:00 2001
From: Voon Weifeng <weifeng.voon@intel.com>
Date: Tue, 6 Apr 2021 09:32:50 +0800
Subject: stmmac: intel: Enable SERDES PHY rx clk for PSE

EHL PSE SGMII mode requires to ungate the SERDES PHY rx clk for power up
sequence and vice versa.

Signed-off-by: Voon Weifeng <weifeng.voon@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 10 ++++++++++
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h |  1 +
 2 files changed, 11 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 8ba87c0be976..60566598d644 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -153,6 +153,11 @@ static int intel_serdes_powerup(struct net_device *ndev, void *priv_data)
 		return data;
 	}
 
+	/* PSE only - ungate SGMII PHY Rx Clock */
+	if (intel_priv->is_pse)
+		mdiobus_modify(priv->mii, serdes_phy_addr, SERDES_GCR0,
+			       0, SERDES_PHY_RX_CLK);
+
 	return 0;
 }
 
@@ -168,6 +173,11 @@ static void intel_serdes_powerdown(struct net_device *ndev, void *intel_data)
 
 	serdes_phy_addr = intel_priv->mdio_adhoc_addr;
 
+	/* PSE only - gate SGMII PHY Rx Clock */
+	if (intel_priv->is_pse)
+		mdiobus_modify(priv->mii, serdes_phy_addr, SERDES_GCR0,
+			       SERDES_PHY_RX_CLK, 0);
+
 	/*  move power state to P3 */
 	data = mdiobus_read(priv->mii, serdes_phy_addr, SERDES_GCR0);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h
index e723096c0b15..542acb8ce467 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h
@@ -14,6 +14,7 @@
 
 /* SERDES defines */
 #define SERDES_PLL_CLK		BIT(0)		/* PLL clk valid signal */
+#define SERDES_PHY_RX_CLK	BIT(1)		/* PSE SGMII PHY rx clk */
 #define SERDES_RST		BIT(2)		/* Serdes Reset */
 #define SERDES_PWR_ST_MASK	GENMASK(6, 4)	/* Serdes Power state*/
 #define SERDES_PWR_ST_SHIFT	4
-- 
cgit v1.2.3


From d567fd6e82faa61756810b9b0dfd9d0da11e960e Mon Sep 17 00:00:00 2001
From: Vadim Pasternak <vadimp@nvidia.com>
Date: Tue, 6 Apr 2021 15:27:33 +0300
Subject: mlxsw: core: Remove critical trip points from thermal zones

Disable software thermal protection by removing critical trip points
from all thermal zones.

The software thermal protection is redundant given there are two layers
of protection below it in firmware and hardware. The first layer is
performed by firmware, the second, in case firmware was not able to
perform protection, by hardware.
The temperature threshold set for hardware protection is always higher
than for firmware.

Signed-off-by: Vadim Pasternak <vadimp@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | 27 +++++-----------------
 1 file changed, 6 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
index bf85ce9835d7..37fb2e1fb278 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
@@ -19,7 +19,6 @@
 #define MLXSW_THERMAL_ASIC_TEMP_NORM	75000	/* 75C */
 #define MLXSW_THERMAL_ASIC_TEMP_HIGH	85000	/* 85C */
 #define MLXSW_THERMAL_ASIC_TEMP_HOT	105000	/* 105C */
-#define MLXSW_THERMAL_ASIC_TEMP_CRIT	140000	/* 140C */
 #define MLXSW_THERMAL_HYSTERESIS_TEMP	5000	/* 5C */
 #define MLXSW_THERMAL_MODULE_TEMP_SHIFT	(MLXSW_THERMAL_HYSTERESIS_TEMP * 2)
 #define MLXSW_THERMAL_ZONE_MAX_NAME	16
@@ -45,7 +44,6 @@ enum mlxsw_thermal_trips {
 	MLXSW_THERMAL_TEMP_TRIP_NORM,
 	MLXSW_THERMAL_TEMP_TRIP_HIGH,
 	MLXSW_THERMAL_TEMP_TRIP_HOT,
-	MLXSW_THERMAL_TEMP_TRIP_CRIT,
 };
 
 struct mlxsw_thermal_trip {
@@ -75,16 +73,9 @@ static const struct mlxsw_thermal_trip default_thermal_trips[] = {
 	{	/* Warning */
 		.type		= THERMAL_TRIP_HOT,
 		.temp		= MLXSW_THERMAL_ASIC_TEMP_HOT,
-		.hyst		= MLXSW_THERMAL_HYSTERESIS_TEMP,
 		.min_state	= MLXSW_THERMAL_MAX_STATE,
 		.max_state	= MLXSW_THERMAL_MAX_STATE,
 	},
-	{	/* Critical - soft poweroff */
-		.type		= THERMAL_TRIP_CRITICAL,
-		.temp		= MLXSW_THERMAL_ASIC_TEMP_CRIT,
-		.min_state	= MLXSW_THERMAL_MAX_STATE,
-		.max_state	= MLXSW_THERMAL_MAX_STATE,
-	}
 };
 
 #define MLXSW_THERMAL_NUM_TRIPS	ARRAY_SIZE(default_thermal_trips)
@@ -154,7 +145,6 @@ mlxsw_thermal_module_trips_reset(struct mlxsw_thermal_module *tz)
 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = 0;
 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = 0;
 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = 0;
-	tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = 0;
 }
 
 static int
@@ -183,11 +173,10 @@ mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core,
 	}
 
 	/* According to the system thermal requirements, the thermal zones are
-	 * defined with four trip points. The critical and emergency
+	 * defined with three trip points. The critical and emergency
 	 * temperature thresholds, provided by QSFP module are set as "active"
-	 * and "hot" trip points, "normal" and "critical" trip points are
-	 * derived from "active" and "hot" by subtracting or adding double
-	 * hysteresis value.
+	 * and "hot" trip points, "normal" trip point is derived from "active"
+	 * by subtracting double hysteresis value.
 	 */
 	if (crit_temp >= MLXSW_THERMAL_MODULE_TEMP_SHIFT)
 		tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp -
@@ -196,8 +185,6 @@ mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core,
 		tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp;
 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = crit_temp;
 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = emerg_temp;
-	tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp +
-					MLXSW_THERMAL_MODULE_TEMP_SHIFT;
 
 	return 0;
 }
@@ -210,7 +197,7 @@ static void mlxsw_thermal_tz_score_update(struct mlxsw_thermal *thermal,
 	struct mlxsw_thermal_trip *trip = trips;
 	unsigned int score, delta, i, shift = 1;
 
-	/* Calculate thermal zone score, if temperature is above the critical
+	/* Calculate thermal zone score, if temperature is above the hot
 	 * threshold score is set to MLXSW_THERMAL_TEMP_SCORE_MAX.
 	 */
 	score = MLXSW_THERMAL_TEMP_SCORE_MAX;
@@ -333,8 +320,7 @@ static int mlxsw_thermal_set_trip_temp(struct thermal_zone_device *tzdev,
 {
 	struct mlxsw_thermal *thermal = tzdev->devdata;
 
-	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS ||
-	    temp > MLXSW_THERMAL_ASIC_TEMP_CRIT)
+	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
 		return -EINVAL;
 
 	thermal->trips[trip].temp = temp;
@@ -502,8 +488,7 @@ mlxsw_thermal_module_trip_temp_set(struct thermal_zone_device *tzdev,
 {
 	struct mlxsw_thermal_module *tz = tzdev->devdata;
 
-	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS ||
-	    temp > tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp)
+	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
 		return -EINVAL;
 
 	tz->trips[trip].temp = temp;
-- 
cgit v1.2.3


From 872fff333fb1a100a54dfb7fd20fb418bb7e1ba5 Mon Sep 17 00:00:00 2001
From: wengjianfeng <wengjianfeng@yulong.com>
Date: Wed, 7 Apr 2021 11:16:38 +0800
Subject: nfc/fdp: remove unnecessary assignment and label
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In function fdp_nci_patch_otp and fdp_nci_patch_ram，many goto
out statements are used, and out label just return variable r.
in some places,just jump to the out label, and in other places,
assign a value to the variable r,then jump to the out label.
It is unnecessary, we just use return sentences to replace goto
sentences and delete out label.

Signed-off-by: wengjianfeng <wengjianfeng@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nfc/fdp/fdp.c | 42 ++++++++++++++++--------------------------
 1 file changed, 16 insertions(+), 26 deletions(-)

diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c
index ee2baa2b2fcf..fe0719ed81a0 100644
--- a/drivers/nfc/fdp/fdp.c
+++ b/drivers/nfc/fdp/fdp.c
@@ -344,7 +344,7 @@ static int fdp_nci_patch_otp(struct nci_dev *ndev)
 	int r = 0;
 
 	if (info->otp_version >= info->otp_patch_version)
-		goto out;
+		return r;
 
 	info->setup_patch_sent = 0;
 	info->setup_reset_ntf = 0;
@@ -353,19 +353,17 @@ static int fdp_nci_patch_otp(struct nci_dev *ndev)
 	/* Patch init request */
 	r = fdp_nci_patch_cmd(ndev, NCI_PATCH_TYPE_OTP);
 	if (r)
-		goto out;
+		return r;
 
 	/* Patch data connection creation */
 	conn_id = fdp_nci_create_conn(ndev);
-	if (conn_id < 0) {
-		r = conn_id;
-		goto out;
-	}
+	if (conn_id < 0)
+		return conn_id;
 
 	/* Send the patch over the data connection */
 	r = fdp_nci_send_patch(ndev, conn_id, NCI_PATCH_TYPE_OTP);
 	if (r)
-		goto out;
+		return r;
 
 	/* Wait for all the packets to be send over i2c */
 	wait_event_interruptible(info->setup_wq,
@@ -377,13 +375,12 @@ static int fdp_nci_patch_otp(struct nci_dev *ndev)
 	/* Close the data connection */
 	r = nci_core_conn_close(info->ndev, conn_id);
 	if (r)
-		goto out;
+		return r;
 
 	/* Patch finish message */
 	if (fdp_nci_patch_cmd(ndev, NCI_PATCH_TYPE_EOT)) {
 		nfc_err(dev, "OTP patch error 0x%x\n", r);
-		r = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
 
 	/* If the patch notification didn't arrive yet, wait for it */
@@ -393,8 +390,7 @@ static int fdp_nci_patch_otp(struct nci_dev *ndev)
 	r = info->setup_patch_status;
 	if (r) {
 		nfc_err(dev, "OTP patch error 0x%x\n", r);
-		r = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
 
 	/*
@@ -403,7 +399,6 @@ static int fdp_nci_patch_otp(struct nci_dev *ndev)
 	 */
 	wait_event_interruptible(info->setup_wq, info->setup_reset_ntf);
 
-out:
 	return r;
 }
 
@@ -415,7 +410,7 @@ static int fdp_nci_patch_ram(struct nci_dev *ndev)
 	int r = 0;
 
 	if (info->ram_version >= info->ram_patch_version)
-		goto out;
+		return r;
 
 	info->setup_patch_sent = 0;
 	info->setup_reset_ntf = 0;
@@ -424,19 +419,17 @@ static int fdp_nci_patch_ram(struct nci_dev *ndev)
 	/* Patch init request */
 	r = fdp_nci_patch_cmd(ndev, NCI_PATCH_TYPE_RAM);
 	if (r)
-		goto out;
+		return r;
 
 	/* Patch data connection creation */
 	conn_id = fdp_nci_create_conn(ndev);
-	if (conn_id < 0) {
-		r = conn_id;
-		goto out;
-	}
+	if (conn_id < 0)
+		return conn_id;
 
 	/* Send the patch over the data connection */
 	r = fdp_nci_send_patch(ndev, conn_id, NCI_PATCH_TYPE_RAM);
 	if (r)
-		goto out;
+		return r;
 
 	/* Wait for all the packets to be send over i2c */
 	wait_event_interruptible(info->setup_wq,
@@ -448,13 +441,12 @@ static int fdp_nci_patch_ram(struct nci_dev *ndev)
 	/* Close the data connection */
 	r = nci_core_conn_close(info->ndev, conn_id);
 	if (r)
-		goto out;
+		return r;
 
 	/* Patch finish message */
 	if (fdp_nci_patch_cmd(ndev, NCI_PATCH_TYPE_EOT)) {
 		nfc_err(dev, "RAM patch error 0x%x\n", r);
-		r = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
 
 	/* If the patch notification didn't arrive yet, wait for it */
@@ -464,8 +456,7 @@ static int fdp_nci_patch_ram(struct nci_dev *ndev)
 	r = info->setup_patch_status;
 	if (r) {
 		nfc_err(dev, "RAM patch error 0x%x\n", r);
-		r = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
 
 	/*
@@ -474,7 +465,6 @@ static int fdp_nci_patch_ram(struct nci_dev *ndev)
 	 */
 	wait_event_interruptible(info->setup_wq, info->setup_reset_ntf);
 
-out:
 	return r;
 }
 
-- 
cgit v1.2.3


From 7e4a51319d3a71ac8002c96f817bcbeb36789b07 Mon Sep 17 00:00:00 2001
From: Andrei Vagin <avagin@gmail.com>
Date: Tue, 6 Apr 2021 23:40:03 -0700
Subject: net: introduce nla_policy for IFLA_NEW_IFINDEX

In this case, we don't need to check that new_ifindex is positive in
validate_linkmsg.

Fixes: eeb85a14ee34 ("net: Allow to specify ifindex when device is moved to another namespace")
Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Andrei Vagin <avagin@gmail.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d51252afde0a..9108a7e6c0c0 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1877,6 +1877,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 				    .len = ALTIFNAMSIZ - 1 },
 	[IFLA_PERM_ADDRESS]	= { .type = NLA_REJECT },
 	[IFLA_PROTO_DOWN_REASON] = { .type = NLA_NESTED },
+	[IFLA_NEW_IFINDEX]	= NLA_POLICY_MIN(NLA_S32, 1),
 };
 
 static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -2266,9 +2267,6 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
 			return -EINVAL;
 	}
 
-	if (tb[IFLA_NEW_IFINDEX] && nla_get_s32(tb[IFLA_NEW_IFINDEX]) <= 0)
-		return -EINVAL;
-
 	if (tb[IFLA_AF_SPEC]) {
 		struct nlattr *af;
 		int rem, err;
-- 
cgit v1.2.3


From 0854fa82c96ca37a35e954b7079c0bfd795affb1 Mon Sep 17 00:00:00 2001
From: Andrei Vagin <avagin@gmail.com>
Date: Tue, 6 Apr 2021 23:40:51 -0700
Subject: net: remove the new_ifindex argument from dev_change_net_namespace

Here is only one place where we want to specify new_ifindex. In all
other cases, callers pass 0 as new_ifindex. It looks reasonable to add a
low-level function with new_ifindex and to convert
dev_change_net_namespace to a static inline wrapper.

Fixes: eeb85a14ee34 ("net: Allow to specify ifindex when device is moved to another namespace")
Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Andrei Vagin <avagin@gmail.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/netvsc_drv.c |  2 +-
 include/linux/netdevice.h       |  8 +++++++-
 net/core/dev.c                  | 10 +++++-----
 net/core/rtnetlink.c            |  4 ++--
 net/ieee802154/core.c           |  4 ++--
 net/wireless/core.c             |  4 ++--
 6 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 8c0c70e1da77..7349a70af083 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -2354,7 +2354,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
 	 */
 	if (!net_eq(dev_net(ndev), dev_net(vf_netdev))) {
 		ret = dev_change_net_namespace(vf_netdev,
-					       dev_net(ndev), "eth%d", 0);
+					       dev_net(ndev), "eth%d");
 		if (ret)
 			netdev_err(vf_netdev,
 				   "could not move to same namespace as %s: %d\n",
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b482236c0e99..5cbc950b34df 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4026,8 +4026,14 @@ void __dev_notify_flags(struct net_device *, unsigned int old_flags,
 int dev_change_name(struct net_device *, const char *);
 int dev_set_alias(struct net_device *, const char *, size_t);
 int dev_get_alias(const struct net_device *, char *, size_t);
+int __dev_change_net_namespace(struct net_device *dev, struct net *net,
+			       const char *pat, int new_ifindex);
+static inline
 int dev_change_net_namespace(struct net_device *dev, struct net *net,
-			     const char *pat, int new_ifindex);
+			     const char *pat)
+{
+	return __dev_change_net_namespace(dev, net, pat, 0);
+}
 int __dev_set_mtu(struct net_device *, int);
 int dev_validate_mtu(struct net_device *dev, int mtu,
 		     struct netlink_ext_ack *extack);
diff --git a/net/core/dev.c b/net/core/dev.c
index 9d1a8fac793f..33ff4a944109 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -11062,7 +11062,7 @@ void unregister_netdev(struct net_device *dev)
 EXPORT_SYMBOL(unregister_netdev);
 
 /**
- *	dev_change_net_namespace - move device to different nethost namespace
+ *	__dev_change_net_namespace - move device to different nethost namespace
  *	@dev: device
  *	@net: network namespace
  *	@pat: If not NULL name pattern to try if the current device name
@@ -11077,8 +11077,8 @@ EXPORT_SYMBOL(unregister_netdev);
  *	Callers must hold the rtnl semaphore.
  */
 
-int dev_change_net_namespace(struct net_device *dev, struct net *net,
-			     const char *pat, int new_ifindex)
+int __dev_change_net_namespace(struct net_device *dev, struct net *net,
+			       const char *pat, int new_ifindex)
 {
 	struct net *net_old = dev_net(dev);
 	int err, new_nsid;
@@ -11202,7 +11202,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net,
 out:
 	return err;
 }
-EXPORT_SYMBOL_GPL(dev_change_net_namespace);
+EXPORT_SYMBOL_GPL(__dev_change_net_namespace);
 
 static int dev_cpu_dead(unsigned int oldcpu)
 {
@@ -11458,7 +11458,7 @@ static void __net_exit default_device_exit(struct net *net)
 		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
 		if (__dev_get_by_name(&init_net, fb_name))
 			snprintf(fb_name, IFNAMSIZ, "dev%%d");
-		err = dev_change_net_namespace(dev, &init_net, fb_name, 0);
+		err = dev_change_net_namespace(dev, &init_net, fb_name);
 		if (err) {
 			pr_emerg("%s: failed to move %s to init_net: %d\n",
 				 __func__, dev->name, err);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9108a7e6c0c0..9f1f55785a6f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2619,7 +2619,7 @@ static int do_setlink(const struct sk_buff *skb,
 		else
 			new_ifindex = 0;
 
-		err = dev_change_net_namespace(dev, net, ifname, new_ifindex);
+		err = __dev_change_net_namespace(dev, net, ifname, new_ifindex);
 		put_net(net);
 		if (err)
 			goto errout;
@@ -3461,7 +3461,7 @@ replay:
 	if (err < 0)
 		goto out_unregister;
 	if (link_net) {
-		err = dev_change_net_namespace(dev, dest_net, ifname, 0);
+		err = dev_change_net_namespace(dev, dest_net, ifname);
 		if (err < 0)
 			goto out_unregister;
 	}
diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c
index ec3068937fc3..de259b5170ab 100644
--- a/net/ieee802154/core.c
+++ b/net/ieee802154/core.c
@@ -205,7 +205,7 @@ int cfg802154_switch_netns(struct cfg802154_registered_device *rdev,
 		if (!wpan_dev->netdev)
 			continue;
 		wpan_dev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
-		err = dev_change_net_namespace(wpan_dev->netdev, net, "wpan%d", 0);
+		err = dev_change_net_namespace(wpan_dev->netdev, net, "wpan%d");
 		if (err)
 			break;
 		wpan_dev->netdev->features |= NETIF_F_NETNS_LOCAL;
@@ -222,7 +222,7 @@ int cfg802154_switch_netns(struct cfg802154_registered_device *rdev,
 				continue;
 			wpan_dev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
 			err = dev_change_net_namespace(wpan_dev->netdev, net,
-						       "wpan%d", 0);
+						       "wpan%d");
 			WARN_ON(err);
 			wpan_dev->netdev->features |= NETIF_F_NETNS_LOCAL;
 		}
diff --git a/net/wireless/core.c b/net/wireless/core.c
index fabb677b7d58..a2785379df6e 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -165,7 +165,7 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
 		if (!wdev->netdev)
 			continue;
 		wdev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
-		err = dev_change_net_namespace(wdev->netdev, net, "wlan%d", 0);
+		err = dev_change_net_namespace(wdev->netdev, net, "wlan%d");
 		if (err)
 			break;
 		wdev->netdev->features |= NETIF_F_NETNS_LOCAL;
@@ -182,7 +182,7 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
 				continue;
 			wdev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
 			err = dev_change_net_namespace(wdev->netdev, net,
-							"wlan%d", 0);
+							"wlan%d");
 			WARN_ON(err);
 			wdev->netdev->features |= NETIF_F_NETNS_LOCAL;
 		}
-- 
cgit v1.2.3


From 7b3ae17f0f681027a5de532c6a94d99334d97633 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 7 Apr 2021 10:39:22 +0100
Subject: xircom: remove redundant error check on variable err

The error check on err is always false as err is always 0 at the
port_found label. The code is redundant and can be removed.

Addresses-Coverity: ("Logically dead code")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/xircom/xirc2ps_cs.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c
index 3e337142b516..2049d76a0e68 100644
--- a/drivers/net/ethernet/xircom/xirc2ps_cs.c
+++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c
@@ -798,8 +798,6 @@ xirc2ps_config(struct pcmcia_device * link)
 	    goto config_error;
     }
   port_found:
-    if (err)
-	 goto config_error;
 
     /****************
      * Now allocate an interrupt line.	Note that this does not
-- 
cgit v1.2.3


From 298b58f00c0f86868ea717426beb5c1198772f81 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 7 Apr 2021 11:12:48 +0100
Subject: liquidio: Fix unintented sign extension of a left shift of a u16

The macro CN23XX_PEM_BAR1_INDEX_REG is being used to shift oct->pcie_port
(a u16) left 24 places. There are two subtle issues here, first the
shift gets promoted to an signed int and then sign extended to a u64.
If oct->pcie_port is 0x80 or more then the upper bits get sign extended
to 1. Secondly shfiting a u16 24 bits will lead to an overflow so it
needs to be cast to a u64 for all the bits to not overflow.

It is entirely possible that the u16 port value is never large enough
for this to fail, but it is useful to fix unintended overflows such
as this.

Fix this by casting the port parameter to the macro to a u64 before
the shift.

Addresses-Coverity: ("Unintended sign extension")
Fixes: 5bc67f587ba7 ("liquidio: CN23XX register definitions")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/liquidio/cn23xx_pf_regs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_regs.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_regs.h
index e6d4ad99cc38..3f1c189646f4 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_regs.h
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_regs.h
@@ -521,7 +521,7 @@
 #define    CN23XX_BAR1_INDEX_OFFSET                3
 
 #define    CN23XX_PEM_BAR1_INDEX_REG(port, idx)		\
-		(CN23XX_PEM_BAR1_INDEX_START + ((port) << CN23XX_PEM_OFFSET) + \
+		(CN23XX_PEM_BAR1_INDEX_START + (((u64)port) << CN23XX_PEM_OFFSET) + \
 		 ((idx) << CN23XX_BAR1_INDEX_OFFSET))
 
 /*############################ DPI #########################*/
-- 
cgit v1.2.3


From a18f19e912014782c6ec67038e07f2dfea81625b Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 7 Apr 2021 12:48:56 +0200
Subject: net: wan: z85230: drop unused async state

According to the changelog, asynchronous mode was dropped sometime
before v2.2. Let's get rid of the unused driver-specific async state as
well so that it doesn't show up when doing tree-wide tty work.

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/z85230.h | 39 ---------------------------------------
 1 file changed, 39 deletions(-)

diff --git a/drivers/net/wan/z85230.h b/drivers/net/wan/z85230.h
index 1081d171e477..462cb620bc5d 100644
--- a/drivers/net/wan/z85230.h
+++ b/drivers/net/wan/z85230.h
@@ -327,45 +327,6 @@ struct z8530_channel
 	void		*private;	/* For our owner */
 	struct net_device	*netdevice;	/* Network layer device */
 
-	/*
-	 *	Async features
-	 */
-
-	struct tty_struct 	*tty;		/* Attached terminal */
-	int			line;		/* Minor number */
-	wait_queue_head_t	open_wait;	/* Tasks waiting to open */
-	wait_queue_head_t	close_wait;	/* and for close to end */
-	unsigned long		event;		/* Pending events */
-	int			fdcount;    	/* # of fd on device */
-	int			blocked_open;	/* # of blocked opens */
-	int			x_char;		/* XON/XOF char */
-	unsigned char 		*xmit_buf;	/* Transmit pointer */
-	int			xmit_head;	/* Transmit ring */
-	int			xmit_tail;
-	int			xmit_cnt;
-	int			flags;	
-	int			timeout;
-	int			xmit_fifo_size;	/* Transmit FIFO info */
-
-	int			close_delay;	/* Do we wait for drain on close ? */
-	unsigned short		closing_wait;
-
-	/* We need to know the current clock divisor
-	 * to read the bps rate the chip has currently
-	 * loaded.
-	 */
-
-	unsigned char		clk_divisor;  /* May be 1, 16, 32, or 64 */
-	int			zs_baud;
-
-	int			magic;
-	int			baud_base;		/* Baud parameters */
-	int			custom_divisor;
-
-
-	unsigned char		tx_active; /* character is being xmitted */
-	unsigned char		tx_stopped; /* output is suspended */
-
 	spinlock_t		*lock;	  /* Device lock */
 };
 
-- 
cgit v1.2.3


From 4e92cac843d3c1bfe033de9edbde483525ebd1c4 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 7 Apr 2021 15:07:04 +0000
Subject: net: encx24j600: use module_spi_driver to simplify the code

module_spi_driver() makes the code simpler by eliminating
boilerplate code.

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/encx24j600.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c
index a66a179236fd..3658c4ae3c37 100644
--- a/drivers/net/ethernet/microchip/encx24j600.c
+++ b/drivers/net/ethernet/microchip/encx24j600.c
@@ -1117,17 +1117,7 @@ static struct spi_driver encx24j600_spi_net_driver = {
 	.id_table	= encx24j600_spi_id_table,
 };
 
-static int __init encx24j600_init(void)
-{
-	return spi_register_driver(&encx24j600_spi_net_driver);
-}
-module_init(encx24j600_init);
-
-static void encx24j600_exit(void)
-{
-	spi_unregister_driver(&encx24j600_spi_net_driver);
-}
-module_exit(encx24j600_exit);
+module_spi_driver(encx24j600_spi_net_driver);
 
 MODULE_DESCRIPTION(DRV_NAME " ethernet driver");
 MODULE_AUTHOR("Jon Ringle <jringle@gridpoint.com>");
-- 
cgit v1.2.3


From 1ffa6604431aadbd36e2f95c0078a2455518464e Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 7 Apr 2021 15:07:05 +0000
Subject: enic: use module_pci_driver to simplify the code

Use the module_pci_driver() macro to make the code simpler
by eliminating module_init and module_exit calls.

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cisco/enic/enic_main.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index f04ec53544ae..f48957a17c3a 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -3040,15 +3040,4 @@ static struct pci_driver enic_driver = {
 	.remove = enic_remove,
 };
 
-static int __init enic_init_module(void)
-{
-	return pci_register_driver(&enic_driver);
-}
-
-static void __exit enic_cleanup_module(void)
-{
-	pci_unregister_driver(&enic_driver);
-}
-
-module_init(enic_init_module);
-module_exit(enic_cleanup_module);
+module_pci_driver(enic_driver);
-- 
cgit v1.2.3


From 95b2fbdb9321168fc557fdc4a5d58c9bf9d13fef Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 7 Apr 2021 15:07:07 +0000
Subject: tulip: windbond-840: use module_pci_driver to simplify the code

Use the module_pci_driver() macro to make the code simpler
by eliminating module_init and module_exit calls.

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/dec/tulip/winbond-840.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c
index 89cbdc1f4857..514df170ec5d 100644
--- a/drivers/net/ethernet/dec/tulip/winbond-840.c
+++ b/drivers/net/ethernet/dec/tulip/winbond-840.c
@@ -1629,15 +1629,4 @@ static struct pci_driver w840_driver = {
 	.driver.pm	= &w840_pm_ops,
 };
 
-static int __init w840_init(void)
-{
-	return pci_register_driver(&w840_driver);
-}
-
-static void __exit w840_exit(void)
-{
-	pci_unregister_driver(&w840_driver);
-}
-
-module_init(w840_init);
-module_exit(w840_exit);
+module_pci_driver(w840_driver);
-- 
cgit v1.2.3


From 02f2743ecd7baa64f0e716cde4f5b2fd18811955 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 7 Apr 2021 15:07:08 +0000
Subject: tulip: de2104x: use module_pci_driver to simplify the code

Use the module_pci_driver() macro to make the code simpler
by eliminating module_init and module_exit calls.

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/dec/tulip/de2104x.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c
index c3cbe55205a7..b018195f0243 100644
--- a/drivers/net/ethernet/dec/tulip/de2104x.c
+++ b/drivers/net/ethernet/dec/tulip/de2104x.c
@@ -2193,15 +2193,4 @@ static struct pci_driver de_driver = {
 	.driver.pm	= &de_pm_ops,
 };
 
-static int __init de_init (void)
-{
-	return pci_register_driver(&de_driver);
-}
-
-static void __exit de_exit (void)
-{
-	pci_unregister_driver (&de_driver);
-}
-
-module_init(de_init);
-module_exit(de_exit);
+module_pci_driver(de_driver);
-- 
cgit v1.2.3


From f670149a4f5f17190a828b6631e6c8de74efa975 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 7 Apr 2021 15:07:09 +0000
Subject: net: sundance: use module_pci_driver to simplify the code

Use the module_pci_driver() macro to make the code simpler
by eliminating module_init and module_exit calls.

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/dlink/sundance.c | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c
index e3a8858915b3..c22ed8ac7d9c 100644
--- a/drivers/net/ethernet/dlink/sundance.c
+++ b/drivers/net/ethernet/dlink/sundance.c
@@ -1982,17 +1982,4 @@ static struct pci_driver sundance_driver = {
 	.driver.pm	= &sundance_pm_ops,
 };
 
-static int __init sundance_init(void)
-{
-	return pci_register_driver(&sundance_driver);
-}
-
-static void __exit sundance_exit(void)
-{
-	pci_unregister_driver(&sundance_driver);
-}
-
-module_init(sundance_init);
-module_exit(sundance_exit);
-
-
+module_pci_driver(sundance_driver);
-- 
cgit v1.2.3


From 6381c45b28385451c5bb54d131475d38849639f3 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 7 Apr 2021 15:07:11 +0000
Subject: net: atheros: atl2: use module_pci_driver to simplify the code

Use the module_pci_driver() macro to make the code simpler
by eliminating module_init and module_exit calls.

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/atlx/atl2.c | 24 +-----------------------
 1 file changed, 1 insertion(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c
index f016f2e12ee7..0cc0db04c27d 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.c
+++ b/drivers/net/ethernet/atheros/atlx/atl2.c
@@ -1675,29 +1675,7 @@ static struct pci_driver atl2_driver = {
 	.shutdown = atl2_shutdown,
 };
 
-/**
- * atl2_init_module - Driver Registration Routine
- *
- * atl2_init_module is the first routine called when the driver is
- * loaded. All it does is register with the PCI subsystem.
- */
-static int __init atl2_init_module(void)
-{
-	return pci_register_driver(&atl2_driver);
-}
-module_init(atl2_init_module);
-
-/**
- * atl2_exit_module - Driver Exit Cleanup Routine
- *
- * atl2_exit_module is called just before the driver is removed
- * from memory.
- */
-static void __exit atl2_exit_module(void)
-{
-	pci_unregister_driver(&atl2_driver);
-}
-module_exit(atl2_exit_module);
+module_pci_driver(atl2_driver);
 
 static void atl2_read_pci_cfg(struct atl2_hw *hw, u32 reg, u16 *value)
 {
-- 
cgit v1.2.3


From 3cd52c1e32fe7dfee09815ced702db9ee9f84ec9 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 7 Apr 2021 15:07:12 +0000
Subject: net: fealnx: use module_pci_driver to simplify the code

Use the module_pci_driver() macro to make the code simpler
by eliminating module_init and module_exit calls.

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/fealnx.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c
index c696651dd735..0908771aa9ac 100644
--- a/drivers/net/ethernet/fealnx.c
+++ b/drivers/net/ethernet/fealnx.c
@@ -1948,15 +1948,4 @@ static struct pci_driver fealnx_driver = {
 	.remove		= fealnx_remove_one,
 };
 
-static int __init fealnx_init(void)
-{
-	return pci_register_driver(&fealnx_driver);
-}
-
-static void __exit fealnx_exit(void)
-{
-	pci_unregister_driver(&fealnx_driver);
-}
-
-module_init(fealnx_init);
-module_exit(fealnx_exit);
+module_pci_driver(fealnx_driver);
-- 
cgit v1.2.3


From 3056df93f7a83f456d20536c92260db0b1290ef5 Mon Sep 17 00:00:00 2001
From: Chinh T Cao <chinh.t.cao@intel.com>
Date: Thu, 25 Mar 2021 15:35:04 -0700
Subject: ice: Re-send some AQ commands, as result of EBUSY AQ error

Retry sending some AQ commands, as result of EBUSY AQ error.
ice_aqc_opc_get_link_topo
ice_aqc_opc_lldp_stop
ice_aqc_opc_lldp_start
ice_aqc_opc_lldp_filter_ctrl

This change follows the latest guidelines from HW team. It is
better to retry the same AQ command several times, as the result
of EBUSY, instead of returning error to the caller right away.

Signed-off-by: Chinh T Cao <chinh.t.cao@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_common.c   | 81 ++++++++++++++++++++++++++-
 drivers/net/ethernet/intel/ice/ice_common.h   |  3 +
 drivers/net/ethernet/intel/ice/ice_controlq.c |  2 +-
 3 files changed, 84 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 54df00ee912b..bde19e30394d 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -1292,6 +1292,85 @@ const struct ice_ctx_ele ice_tlan_ctx_info[] = {
  */
 DEFINE_MUTEX(ice_global_cfg_lock_sw);
 
+/**
+ * ice_should_retry_sq_send_cmd
+ * @opcode: AQ opcode
+ *
+ * Decide if we should retry the send command routine for the ATQ, depending
+ * on the opcode.
+ */
+static bool ice_should_retry_sq_send_cmd(u16 opcode)
+{
+	switch (opcode) {
+	case ice_aqc_opc_get_link_topo:
+	case ice_aqc_opc_lldp_stop:
+	case ice_aqc_opc_lldp_start:
+	case ice_aqc_opc_lldp_filter_ctrl:
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * ice_sq_send_cmd_retry - send command to Control Queue (ATQ)
+ * @hw: pointer to the HW struct
+ * @cq: pointer to the specific Control queue
+ * @desc: prefilled descriptor describing the command
+ * @buf: buffer to use for indirect commands (or NULL for direct commands)
+ * @buf_size: size of buffer for indirect commands (or 0 for direct commands)
+ * @cd: pointer to command details structure
+ *
+ * Retry sending the FW Admin Queue command, multiple times, to the FW Admin
+ * Queue if the EBUSY AQ error is returned.
+ */
+static enum ice_status
+ice_sq_send_cmd_retry(struct ice_hw *hw, struct ice_ctl_q_info *cq,
+		      struct ice_aq_desc *desc, void *buf, u16 buf_size,
+		      struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc_cpy;
+	enum ice_status status;
+	bool is_cmd_for_retry;
+	u8 *buf_cpy = NULL;
+	u8 idx = 0;
+	u16 opcode;
+
+	opcode = le16_to_cpu(desc->opcode);
+	is_cmd_for_retry = ice_should_retry_sq_send_cmd(opcode);
+	memset(&desc_cpy, 0, sizeof(desc_cpy));
+
+	if (is_cmd_for_retry) {
+		if (buf) {
+			buf_cpy = kzalloc(buf_size, GFP_KERNEL);
+			if (!buf_cpy)
+				return ICE_ERR_NO_MEMORY;
+		}
+
+		memcpy(&desc_cpy, desc, sizeof(desc_cpy));
+	}
+
+	do {
+		status = ice_sq_send_cmd(hw, cq, desc, buf, buf_size, cd);
+
+		if (!is_cmd_for_retry || !status ||
+		    hw->adminq.sq_last_status != ICE_AQ_RC_EBUSY)
+			break;
+
+		if (buf_cpy)
+			memcpy(buf, buf_cpy, buf_size);
+
+		memcpy(desc, &desc_cpy, sizeof(desc_cpy));
+
+		mdelay(ICE_SQ_SEND_DELAY_TIME_MS);
+
+	} while (++idx < ICE_SQ_SEND_MAX_EXECUTE);
+
+	kfree(buf_cpy);
+
+	return status;
+}
+
 /**
  * ice_aq_send_cmd - send FW Admin Queue command to FW Admin Queue
  * @hw: pointer to the HW struct
@@ -1333,7 +1412,7 @@ ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf,
 		break;
 	}
 
-	status = ice_sq_send_cmd(hw, &hw->adminq, desc, buf, buf_size, cd);
+	status = ice_sq_send_cmd_retry(hw, &hw->adminq, desc, buf, buf_size, cd);
 	if (lock_acquired)
 		mutex_unlock(&ice_global_cfg_lock_sw);
 
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index 81fd69cb1485..d14406b11c92 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -11,6 +11,9 @@
 #include "ice_switch.h"
 #include <linux/avf/virtchnl.h>
 
+#define ICE_SQ_SEND_DELAY_TIME_MS	10
+#define ICE_SQ_SEND_MAX_EXECUTE		3
+
 enum ice_status ice_init_hw(struct ice_hw *hw);
 void ice_deinit_hw(struct ice_hw *hw);
 enum ice_status ice_check_reset(struct ice_hw *hw);
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
index b2d8a5932b1d..0f207a42ea77 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.c
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
@@ -892,7 +892,7 @@ static bool ice_sq_done(struct ice_hw *hw, struct ice_ctl_q_info *cq)
  * ice_sq_send_cmd - send command to Control Queue (ATQ)
  * @hw: pointer to the HW struct
  * @cq: pointer to the specific Control queue
- * @desc: prefilled descriptor describing the command (non DMA mem)
+ * @desc: prefilled descriptor describing the command
  * @buf: buffer to use for indirect commands (or NULL for direct commands)
  * @buf_size: size of buffer for indirect commands (or 0 for direct commands)
  * @cd: pointer to command details structure
-- 
cgit v1.2.3


From 7fb09a737536fc7bff75c7018133acb30328ca07 Mon Sep 17 00:00:00 2001
From: Victor Raj <victor.raj@intel.com>
Date: Thu, 25 Mar 2021 15:35:05 -0700
Subject: ice: Modify recursive way of adding nodes

Remove the recursive way of adding the nodes to the layer in order
to reduce the stack usage. Instead the algorithm is modified to use
a while loop.

The previous code was scanning recursively the nodes horizontally.
The total stack consumption will be based on number of nodes present
on that layer. In some cases it can consume more stack.

Signed-off-by: Victor Raj <victor.raj@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_sched.c | 130 +++++++++++++++++------------
 1 file changed, 77 insertions(+), 53 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index f890337cc24a..97562051fe14 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -919,7 +919,7 @@ ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node,
 }
 
 /**
- * ice_sched_add_nodes_to_layer - Add nodes to a given layer
+ * ice_sched_add_nodes_to_hw_layer - Add nodes to HW layer
  * @pi: port information structure
  * @tc_node: pointer to TC node
  * @parent: pointer to parent node
@@ -928,82 +928,106 @@ ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node,
  * @first_node_teid: pointer to the first node TEID
  * @num_nodes_added: pointer to number of nodes added
  *
- * This function add nodes to a given layer.
+ * Add nodes into specific HW layer.
  */
 static enum ice_status
-ice_sched_add_nodes_to_layer(struct ice_port_info *pi,
-			     struct ice_sched_node *tc_node,
-			     struct ice_sched_node *parent, u8 layer,
-			     u16 num_nodes, u32 *first_node_teid,
-			     u16 *num_nodes_added)
+ice_sched_add_nodes_to_hw_layer(struct ice_port_info *pi,
+				struct ice_sched_node *tc_node,
+				struct ice_sched_node *parent, u8 layer,
+				u16 num_nodes, u32 *first_node_teid,
+				u16 *num_nodes_added)
 {
-	u32 *first_teid_ptr = first_node_teid;
-	u16 new_num_nodes, max_child_nodes;
-	enum ice_status status = 0;
-	struct ice_hw *hw = pi->hw;
-	u16 num_added = 0;
-	u32 temp;
+	u16 max_child_nodes;
 
 	*num_nodes_added = 0;
 
 	if (!num_nodes)
-		return status;
+		return 0;
 
-	if (!parent || layer < hw->sw_entry_point_layer)
+	if (!parent || layer < pi->hw->sw_entry_point_layer)
 		return ICE_ERR_PARAM;
 
 	/* max children per node per layer */
-	max_child_nodes = hw->max_children[parent->tx_sched_layer];
+	max_child_nodes = pi->hw->max_children[parent->tx_sched_layer];
 
-	/* current number of children + required nodes exceed max children ? */
+	/* current number of children + required nodes exceed max children */
 	if ((parent->num_children + num_nodes) > max_child_nodes) {
 		/* Fail if the parent is a TC node */
 		if (parent == tc_node)
 			return ICE_ERR_CFG;
+		return ICE_ERR_MAX_LIMIT;
+	}
+
+	return ice_sched_add_elems(pi, tc_node, parent, layer, num_nodes,
+				   num_nodes_added, first_node_teid);
+}
+
+/**
+ * ice_sched_add_nodes_to_layer - Add nodes to a given layer
+ * @pi: port information structure
+ * @tc_node: pointer to TC node
+ * @parent: pointer to parent node
+ * @layer: layer number to add nodes
+ * @num_nodes: number of nodes to be added
+ * @first_node_teid: pointer to the first node TEID
+ * @num_nodes_added: pointer to number of nodes added
+ *
+ * This function add nodes to a given layer.
+ */
+static enum ice_status
+ice_sched_add_nodes_to_layer(struct ice_port_info *pi,
+			     struct ice_sched_node *tc_node,
+			     struct ice_sched_node *parent, u8 layer,
+			     u16 num_nodes, u32 *first_node_teid,
+			     u16 *num_nodes_added)
+{
+	u32 *first_teid_ptr = first_node_teid;
+	u16 new_num_nodes = num_nodes;
+	enum ice_status status = 0;
 
+	*num_nodes_added = 0;
+	while (*num_nodes_added < num_nodes) {
+		u16 max_child_nodes, num_added = 0;
+		u32 temp;
+
+		status = ice_sched_add_nodes_to_hw_layer(pi, tc_node, parent,
+							 layer,	new_num_nodes,
+							 first_teid_ptr,
+							 &num_added);
+		if (!status)
+			*num_nodes_added += num_added;
+		/* added more nodes than requested ? */
+		if (*num_nodes_added > num_nodes) {
+			ice_debug(pi->hw, ICE_DBG_SCHED, "added extra nodes %d %d\n", num_nodes,
+				  *num_nodes_added);
+			status = ICE_ERR_CFG;
+			break;
+		}
+		/* break if all the nodes are added successfully */
+		if (!status && (*num_nodes_added == num_nodes))
+			break;
+		/* break if the error is not max limit */
+		if (status && status != ICE_ERR_MAX_LIMIT)
+			break;
+		/* Exceeded the max children */
+		max_child_nodes = pi->hw->max_children[parent->tx_sched_layer];
 		/* utilize all the spaces if the parent is not full */
 		if (parent->num_children < max_child_nodes) {
 			new_num_nodes = max_child_nodes - parent->num_children;
-			/* this recursion is intentional, and wouldn't
-			 * go more than 2 calls
+		} else {
+			/* This parent is full, try the next sibling */
+			parent = parent->sibling;
+			/* Don't modify the first node TEID memory if the
+			 * first node was added already in the above call.
+			 * Instead send some temp memory for all other
+			 * recursive calls.
 			 */
-			status = ice_sched_add_nodes_to_layer(pi, tc_node,
-							      parent, layer,
-							      new_num_nodes,
-							      first_node_teid,
-							      &num_added);
-			if (status)
-				return status;
+			if (num_added)
+				first_teid_ptr = &temp;
 
-			*num_nodes_added += num_added;
+			new_num_nodes = num_nodes - *num_nodes_added;
 		}
-		/* Don't modify the first node TEID memory if the first node was
-		 * added already in the above call. Instead send some temp
-		 * memory for all other recursive calls.
-		 */
-		if (num_added)
-			first_teid_ptr = &temp;
-
-		new_num_nodes = num_nodes - num_added;
-
-		/* This parent is full, try the next sibling */
-		parent = parent->sibling;
-
-		/* this recursion is intentional, for 1024 queues
-		 * per VSI, it goes max of 16 iterations.
-		 * 1024 / 8 = 128 layer 8 nodes
-		 * 128 /8 = 16 (add 8 nodes per iteration)
-		 */
-		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent,
-						      layer, new_num_nodes,
-						      first_teid_ptr,
-						      &num_added);
-		*num_nodes_added += num_added;
-		return status;
 	}
-
-	status = ice_sched_add_elems(pi, tc_node, parent, layer, num_nodes,
-				     num_nodes_added, first_node_teid);
 	return status;
 }
 
-- 
cgit v1.2.3


From d6730a871e68f10c786cdee59aebd6f92d49d249 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Thu, 25 Mar 2021 15:35:06 -0700
Subject: ice: Align macro names to the specification

For get PHY abilities AQ, the specification defines "report modes"
as "with media", "without media" and "active configuration". For
clarity, rename macros to align with the specification.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_adminq_cmd.h | 10 +++++-----
 drivers/net/ethernet/intel/ice/ice_common.c     | 13 +++++++------
 drivers/net/ethernet/intel/ice/ice_ethtool.c    | 12 ++++++------
 drivers/net/ethernet/intel/ice/ice_main.c       | 12 ++++++------
 4 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index b9491ef5f21c..3f3d51bf0019 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -882,11 +882,11 @@ struct ice_aqc_get_phy_caps {
 	 * 01b - Report topology capabilities
 	 * 10b - Report SW configured
 	 */
-#define ICE_AQC_REPORT_MODE_S		1
-#define ICE_AQC_REPORT_MODE_M		(3 << ICE_AQC_REPORT_MODE_S)
-#define ICE_AQC_REPORT_NVM_CAP		0
-#define ICE_AQC_REPORT_TOPO_CAP		BIT(1)
-#define ICE_AQC_REPORT_SW_CFG		BIT(2)
+#define ICE_AQC_REPORT_MODE_S			1
+#define ICE_AQC_REPORT_MODE_M			(3 << ICE_AQC_REPORT_MODE_S)
+#define ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA	0
+#define ICE_AQC_REPORT_TOPO_CAP_MEDIA		BIT(1)
+#define ICE_AQC_REPORT_ACTIVE_CFG		BIT(2)
 	__le32 reserved1;
 	__le32 addr_high;
 	__le32 addr_low;
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index bde19e30394d..509dce475bff 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -191,7 +191,7 @@ ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode,
 	ice_debug(hw, ICE_DBG_LINK, "   module_type[2] = 0x%x\n",
 		  pcaps->module_type[2]);
 
-	if (!status && report_mode == ICE_AQC_REPORT_TOPO_CAP) {
+	if (!status && report_mode == ICE_AQC_REPORT_TOPO_CAP_MEDIA) {
 		pi->phy.phy_type_low = le64_to_cpu(pcaps->phy_type_low);
 		pi->phy.phy_type_high = le64_to_cpu(pcaps->phy_type_high);
 		memcpy(pi->phy.link_info.module_type, &pcaps->module_type,
@@ -922,7 +922,8 @@ enum ice_status ice_init_hw(struct ice_hw *hw)
 
 	/* Initialize port_info struct with PHY capabilities */
 	status = ice_aq_get_phy_caps(hw->port_info, false,
-				     ICE_AQC_REPORT_TOPO_CAP, pcaps, NULL);
+				     ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps,
+				     NULL);
 	devm_kfree(ice_hw_to_dev(hw), pcaps);
 	if (status)
 		dev_warn(ice_hw_to_dev(hw), "Get PHY capabilities failed status = %d, continuing anyway\n",
@@ -2734,7 +2735,7 @@ enum ice_status ice_update_link_info(struct ice_port_info *pi)
 		if (!pcaps)
 			return ICE_ERR_NO_MEMORY;
 
-		status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP,
+		status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
 					     pcaps, NULL);
 
 		devm_kfree(ice_hw_to_dev(hw), pcaps);
@@ -2894,8 +2895,8 @@ ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update)
 		return ICE_ERR_NO_MEMORY;
 
 	/* Get the current PHY config */
-	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps,
-				     NULL);
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
+				     pcaps, NULL);
 	if (status) {
 		*aq_failures = ICE_SET_FC_AQ_FAIL_GET;
 		goto out;
@@ -3041,7 +3042,7 @@ ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
 	if (!pcaps)
 		return ICE_ERR_NO_MEMORY;
 
-	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, pcaps,
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps,
 				     NULL);
 	if (status)
 		goto out;
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 15152e63f204..84e42e598c85 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -1060,7 +1060,7 @@ ice_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam)
 	if (!caps)
 		return -ENOMEM;
 
-	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP,
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
 				     caps, NULL);
 	if (status) {
 		err = -EAGAIN;
@@ -2000,7 +2000,7 @@ ice_get_link_ksettings(struct net_device *netdev,
 		return -ENOMEM;
 
 	status = ice_aq_get_phy_caps(vsi->port_info, false,
-				     ICE_AQC_REPORT_SW_CFG, caps, NULL);
+				     ICE_AQC_REPORT_ACTIVE_CFG, caps, NULL);
 	if (status) {
 		err = -EIO;
 		goto done;
@@ -2037,7 +2037,7 @@ ice_get_link_ksettings(struct net_device *netdev,
 		ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS);
 
 	status = ice_aq_get_phy_caps(vsi->port_info, false,
-				     ICE_AQC_REPORT_TOPO_CAP, caps, NULL);
+				     ICE_AQC_REPORT_TOPO_CAP_MEDIA, caps, NULL);
 	if (status) {
 		err = -EIO;
 		goto done;
@@ -2243,7 +2243,7 @@ ice_set_link_ksettings(struct net_device *netdev,
 		return -ENOMEM;
 
 	/* Get the PHY capabilities based on media */
-	status = ice_aq_get_phy_caps(p, false, ICE_AQC_REPORT_TOPO_CAP,
+	status = ice_aq_get_phy_caps(p, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
 				     abilities, NULL);
 	if (status) {
 		err = -EAGAIN;
@@ -2972,7 +2972,7 @@ ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
 		return;
 
 	/* Get current PHY config */
-	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps,
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps,
 				     NULL);
 	if (status)
 		goto out;
@@ -3039,7 +3039,7 @@ ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
 		return -ENOMEM;
 
 	/* Get current PHY config */
-	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps,
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps,
 				     NULL);
 	if (status) {
 		kfree(pcaps);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index b3c1cadecf21..4379bbece4d9 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -720,7 +720,7 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
 	}
 
 	status = ice_aq_get_phy_caps(vsi->port_info, false,
-				     ICE_AQC_REPORT_SW_CFG, caps, NULL);
+				     ICE_AQC_REPORT_ACTIVE_CFG, caps, NULL);
 	if (status)
 		netdev_info(vsi->netdev, "Get phy capability failed.\n");
 
@@ -1631,7 +1631,7 @@ static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up)
 	if (!pcaps)
 		return -ENOMEM;
 
-	retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps,
+	retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps,
 				      NULL);
 	if (retcode) {
 		dev_err(dev, "Failed to get phy capabilities, VSI %d error %d\n",
@@ -1691,7 +1691,7 @@ static int ice_init_nvm_phy_type(struct ice_port_info *pi)
 	if (!pcaps)
 		return -ENOMEM;
 
-	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_NVM_CAP, pcaps,
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA, pcaps,
 				     NULL);
 
 	if (status) {
@@ -1807,7 +1807,7 @@ static int ice_init_phy_user_cfg(struct ice_port_info *pi)
 	if (!pcaps)
 		return -ENOMEM;
 
-	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, pcaps,
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps,
 				     NULL);
 	if (status) {
 		dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n");
@@ -1886,7 +1886,7 @@ static int ice_configure_phy(struct ice_vsi *vsi)
 		return -ENOMEM;
 
 	/* Get current PHY config */
-	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps,
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps,
 				     NULL);
 	if (status) {
 		dev_err(dev, "Failed to get PHY configuration, VSI %d error %s\n",
@@ -1904,7 +1904,7 @@ static int ice_configure_phy(struct ice_vsi *vsi)
 
 	/* Use PHY topology as baseline for configuration */
 	memset(pcaps, 0, sizeof(*pcaps));
-	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, pcaps,
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps,
 				     NULL);
 	if (status) {
 		dev_err(dev, "Failed to get PHY topology, VSI %d error %s\n",
-- 
cgit v1.2.3


From d348d51771b9db562b9b8c88c3ac76953741b906 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Thu, 25 Mar 2021 15:35:07 -0700
Subject: ice: Ignore EMODE return for opcode 0x0605

When link is owned by manageability, the driver is not allowed to fiddle
with link. FW returns ICE_AQ_RC_EMODE if the driver attempts to do so.
This patch adds a new function ice_set_link which abstracts the call to
ice_aq_set_link_restart_an and provides a clean way to turn on/off link.

While making this change, I also spotted that an int variable was being
used to hold both an ice_status return code and the Linux errno return
code. This pattern more often than not results in the driver inadvertently
returning ice_status back to kernel which is a major boo-boo. Clean it up.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_ethtool.c | 17 +++---------
 drivers/net/ethernet/intel/ice/ice_lib.c     | 37 +++++++++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_lib.h     |  2 ++
 drivers/net/ethernet/intel/ice/ice_main.c    | 41 +++++++++++-----------------
 4 files changed, 59 insertions(+), 38 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 84e42e598c85..0db31a89658a 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -1095,24 +1095,15 @@ static int ice_nway_reset(struct net_device *netdev)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
-	struct ice_port_info *pi;
-	enum ice_status status;
+	int err;
 
-	pi = vsi->port_info;
 	/* If VSI state is up, then restart autoneg with link up */
 	if (!test_bit(__ICE_DOWN, vsi->back->state))
-		status = ice_aq_set_link_restart_an(pi, true, NULL);
+		err = ice_set_link(vsi, true);
 	else
-		status = ice_aq_set_link_restart_an(pi, false, NULL);
+		err = ice_set_link(vsi, false);
 
-	if (status) {
-		netdev_info(netdev, "link restart failed, err %s aq_err %s\n",
-			    ice_stat_str(status),
-			    ice_aq_str(pi->hw->adminq.sq_last_status));
-		return -EIO;
-	}
-
-	return 0;
+	return err;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 6041ca2830de..5edc0da8b8c3 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -3423,3 +3423,40 @@ int ice_clear_dflt_vsi(struct ice_sw *sw)
 
 	return 0;
 }
+
+/**
+ * ice_set_link - turn on/off physical link
+ * @vsi: VSI to modify physical link on
+ * @ena: turn on/off physical link
+ */
+int ice_set_link(struct ice_vsi *vsi, bool ena)
+{
+	struct device *dev = ice_pf_to_dev(vsi->back);
+	struct ice_port_info *pi = vsi->port_info;
+	struct ice_hw *hw = pi->hw;
+	enum ice_status status;
+
+	if (vsi->type != ICE_VSI_PF)
+		return -EINVAL;
+
+	status = ice_aq_set_link_restart_an(pi, ena, NULL);
+
+	/* if link is owned by manageability, FW will return ICE_AQ_RC_EMODE.
+	 * this is not a fatal error, so print a warning message and return
+	 * a success code. Return an error if FW returns an error code other
+	 * than ICE_AQ_RC_EMODE
+	 */
+	if (status == ICE_ERR_AQ_ERROR) {
+		if (hw->adminq.sq_last_status == ICE_AQ_RC_EMODE)
+			dev_warn(dev, "can't set link to %s, err %s aq_err %s. not fatal, continuing\n",
+				 (ena ? "ON" : "OFF"), ice_stat_str(status),
+				 ice_aq_str(hw->adminq.sq_last_status));
+	} else if (status) {
+		dev_err(dev, "can't set link to %s, err %s aq_err %s\n",
+			(ena ? "ON" : "OFF"), ice_stat_str(status),
+			ice_aq_str(hw->adminq.sq_last_status));
+		return -EIO;
+	}
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index 3da17895a2b1..462c3ab7abad 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -45,6 +45,8 @@ int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc);
 
 void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create);
 
+int ice_set_link(struct ice_vsi *vsi, bool ena);
+
 #ifdef CONFIG_DCB
 int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc);
 #endif /* CONFIG_DCB */
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 4379bbece4d9..b976de4b5e6f 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -873,10 +873,10 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
 {
 	struct device *dev = ice_pf_to_dev(pf);
 	struct ice_phy_info *phy_info;
+	enum ice_status status;
 	struct ice_vsi *vsi;
 	u16 old_link_speed;
 	bool old_link;
-	int result;
 
 	phy_info = &pi->phy;
 	phy_info->link_info_old = phy_info->link_info;
@@ -887,10 +887,11 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
 	/* update the link info structures and re-enable link events,
 	 * don't bail on failure due to other book keeping needed
 	 */
-	result = ice_update_link_info(pi);
-	if (result)
-		dev_dbg(dev, "Failed to update link status and re-enable link events for port %d\n",
-			pi->lport);
+	status = ice_update_link_info(pi);
+	if (status)
+		dev_dbg(dev, "Failed to update link status on port %d, err %s aq_err %s\n",
+			pi->lport, ice_stat_str(status),
+			ice_aq_str(pi->hw->adminq.sq_last_status));
 
 	/* Check if the link state is up after updating link info, and treat
 	 * this event as an UP event since the link is actually UP now.
@@ -906,18 +907,12 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
 	if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags) &&
 	    !(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) {
 		set_bit(ICE_FLAG_NO_MEDIA, pf->flags);
-
-		result = ice_aq_set_link_restart_an(pi, false, NULL);
-		if (result) {
-			dev_dbg(dev, "Failed to set link down, VSI %d error %d\n",
-				vsi->vsi_num, result);
-			return result;
-		}
+		ice_set_link(vsi, false);
 	}
 
 	/* if the old link up/down and speed is the same as the new */
 	if (link_up == old_link && link_speed == old_link_speed)
-		return result;
+		return 0;
 
 	if (ice_is_dcb_active(pf)) {
 		if (test_bit(ICE_FLAG_DCB_ENA, pf->flags))
@@ -931,7 +926,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
 
 	ice_vc_notify_link_state(pf);
 
-	return result;
+	return 0;
 }
 
 /**
@@ -6678,6 +6673,7 @@ int ice_open(struct net_device *netdev)
 	struct ice_vsi *vsi = np->vsi;
 	struct ice_pf *pf = vsi->back;
 	struct ice_port_info *pi;
+	enum ice_status status;
 	int err;
 
 	if (test_bit(__ICE_NEEDS_RESTART, pf->state)) {
@@ -6688,11 +6684,11 @@ int ice_open(struct net_device *netdev)
 	netif_carrier_off(netdev);
 
 	pi = vsi->port_info;
-	err = ice_update_link_info(pi);
-	if (err) {
-		netdev_err(netdev, "Failed to get link info, error %d\n",
-			   err);
-		return err;
+	status = ice_update_link_info(pi);
+	if (status) {
+		netdev_err(netdev, "Failed to get link info, error %s\n",
+			   ice_stat_str(status));
+		return -EIO;
 	}
 
 	/* Set PHY if there is media, otherwise, turn off PHY */
@@ -6715,12 +6711,7 @@ int ice_open(struct net_device *netdev)
 		}
 	} else {
 		set_bit(ICE_FLAG_NO_MEDIA, pf->flags);
-		err = ice_aq_set_link_restart_an(pi, false, NULL);
-		if (err) {
-			netdev_err(netdev, "Failed to set PHY state, VSI %d error %d\n",
-				   vsi->vsi_num, err);
-			return err;
-		}
+		ice_set_link(vsi, false);
 	}
 
 	err = ice_vsi_open(vsi);
-- 
cgit v1.2.3


From fd3dc1655eda6173566d56eaeb54f27ab4c9e33c Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Thu, 25 Mar 2021 15:35:08 -0700
Subject: ice: Remove unnecessary checker loop

The loop checking for PF VSI doesn't make any sense. The VSI type
backing the netdev passed to ice_set_link_ksettings will always be
of type ICE_PF_VSI. Remove it.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_ethtool.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 0db31a89658a..c8a8fdac4977 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -2198,8 +2198,8 @@ ice_set_link_ksettings(struct net_device *netdev,
 	struct ethtool_link_ksettings safe_ks, copy_ks;
 	struct ice_aqc_get_phy_caps_data *abilities;
 	u8 autoneg, timeout = TEST_SET_BITS_TIMEOUT;
-	u16 adv_link_speed, curr_link_speed, idx;
 	struct ice_aqc_set_phy_cfg_data config;
+	u16 adv_link_speed, curr_link_speed;
 	struct ice_pf *pf = np->vsi->back;
 	struct ice_port_info *p;
 	u8 autoneg_changed = 0;
@@ -2214,14 +2214,6 @@ ice_set_link_ksettings(struct net_device *netdev,
 	if (!p)
 		return -EOPNOTSUPP;
 
-	/* Check if this is LAN VSI */
-	ice_for_each_vsi(pf, idx)
-		if (pf->vsi[idx]->type == ICE_VSI_PF) {
-			if (np->vsi != pf->vsi[idx])
-				return -EOPNOTSUPP;
-			break;
-		}
-
 	if (p->phy.media_type != ICE_MEDIA_BASET &&
 	    p->phy.media_type != ICE_MEDIA_FIBER &&
 	    p->phy.media_type != ICE_MEDIA_BACKPLANE &&
-- 
cgit v1.2.3


From 0be39bb4c7c8c358f7baf10296db2426f7cf814c Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Thu, 25 Mar 2021 15:35:09 -0700
Subject: ice: Rename a couple of variables

In ice_set_link_ksettings, change 'abilities' to 'phy_caps' and 'p' to
'pi'. This is more consistent with similar usages elsewhere in the
driver.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_ethtool.c | 50 ++++++++++++++--------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index c8a8fdac4977..a31b6cf8d599 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -2196,12 +2196,12 @@ ice_set_link_ksettings(struct net_device *netdev,
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ethtool_link_ksettings safe_ks, copy_ks;
-	struct ice_aqc_get_phy_caps_data *abilities;
 	u8 autoneg, timeout = TEST_SET_BITS_TIMEOUT;
+	struct ice_aqc_get_phy_caps_data *phy_caps;
 	struct ice_aqc_set_phy_cfg_data config;
 	u16 adv_link_speed, curr_link_speed;
 	struct ice_pf *pf = np->vsi->back;
-	struct ice_port_info *p;
+	struct ice_port_info *pi;
 	u8 autoneg_changed = 0;
 	enum ice_status status;
 	u64 phy_type_high = 0;
@@ -2209,25 +2209,25 @@ ice_set_link_ksettings(struct net_device *netdev,
 	int err = 0;
 	bool linkup;
 
-	p = np->vsi->port_info;
+	pi = np->vsi->port_info;
 
-	if (!p)
+	if (!pi)
 		return -EOPNOTSUPP;
 
-	if (p->phy.media_type != ICE_MEDIA_BASET &&
-	    p->phy.media_type != ICE_MEDIA_FIBER &&
-	    p->phy.media_type != ICE_MEDIA_BACKPLANE &&
-	    p->phy.media_type != ICE_MEDIA_DA &&
-	    p->phy.link_info.link_info & ICE_AQ_LINK_UP)
+	if (pi->phy.media_type != ICE_MEDIA_BASET &&
+	    pi->phy.media_type != ICE_MEDIA_FIBER &&
+	    pi->phy.media_type != ICE_MEDIA_BACKPLANE &&
+	    pi->phy.media_type != ICE_MEDIA_DA &&
+	    pi->phy.link_info.link_info & ICE_AQ_LINK_UP)
 		return -EOPNOTSUPP;
 
-	abilities = kzalloc(sizeof(*abilities), GFP_KERNEL);
-	if (!abilities)
+	phy_caps = kzalloc(sizeof(*phy_caps), GFP_KERNEL);
+	if (!phy_caps)
 		return -ENOMEM;
 
 	/* Get the PHY capabilities based on media */
-	status = ice_aq_get_phy_caps(p, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
-				     abilities, NULL);
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
+				     phy_caps, NULL);
 	if (status) {
 		err = -EAGAIN;
 		goto done;
@@ -2289,26 +2289,26 @@ ice_set_link_ksettings(struct net_device *netdev,
 	 * configuration is initialized during probe from PHY capabilities
 	 * software mode, and updated on set PHY configuration.
 	 */
-	memcpy(&config, &p->phy.curr_user_phy_cfg, sizeof(config));
+	memcpy(&config, &pi->phy.curr_user_phy_cfg, sizeof(config));
 
 	config.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
 
 	/* Check autoneg */
-	err = ice_setup_autoneg(p, &safe_ks, &config, autoneg, &autoneg_changed,
+	err = ice_setup_autoneg(pi, &safe_ks, &config, autoneg, &autoneg_changed,
 				netdev);
 
 	if (err)
 		goto done;
 
 	/* Call to get the current link speed */
-	p->phy.get_link_info = true;
-	status = ice_get_link_status(p, &linkup);
+	pi->phy.get_link_info = true;
+	status = ice_get_link_status(pi, &linkup);
 	if (status) {
 		err = -EAGAIN;
 		goto done;
 	}
 
-	curr_link_speed = p->phy.link_info.link_speed;
+	curr_link_speed = pi->phy.link_info.link_speed;
 	adv_link_speed = ice_ksettings_find_adv_link_speed(ks);
 
 	/* If speed didn't get set, set it to what it currently is.
@@ -2327,7 +2327,7 @@ ice_set_link_ksettings(struct net_device *netdev,
 	}
 
 	/* save the requested speeds */
-	p->phy.link_info.req_speeds = adv_link_speed;
+	pi->phy.link_info.req_speeds = adv_link_speed;
 
 	/* set link and auto negotiation so changes take effect */
 	config.caps |= ICE_AQ_PHY_ENA_LINK;
@@ -2343,9 +2343,9 @@ ice_set_link_ksettings(struct net_device *netdev,
 	 * for set PHY configuration
 	 */
 	config.phy_type_high = cpu_to_le64(phy_type_high) &
-			abilities->phy_type_high;
+			phy_caps->phy_type_high;
 	config.phy_type_low = cpu_to_le64(phy_type_low) &
-			abilities->phy_type_low;
+			phy_caps->phy_type_low;
 
 	if (!(config.phy_type_high || config.phy_type_low)) {
 		/* If there is no intersection and lenient mode is enabled, then
@@ -2365,7 +2365,7 @@ ice_set_link_ksettings(struct net_device *netdev,
 	}
 
 	/* If link is up put link down */
-	if (p->phy.link_info.link_info & ICE_AQ_LINK_UP) {
+	if (pi->phy.link_info.link_info & ICE_AQ_LINK_UP) {
 		/* Tell the OS link is going down, the link will go
 		 * back up when fw says it is ready asynchronously
 		 */
@@ -2375,7 +2375,7 @@ ice_set_link_ksettings(struct net_device *netdev,
 	}
 
 	/* make the aq call */
-	status = ice_aq_set_phy_cfg(&pf->hw, p, &config, NULL);
+	status = ice_aq_set_phy_cfg(&pf->hw, pi, &config, NULL);
 	if (status) {
 		netdev_info(netdev, "Set phy config failed,\n");
 		err = -EAGAIN;
@@ -2383,9 +2383,9 @@ ice_set_link_ksettings(struct net_device *netdev,
 	}
 
 	/* Save speed request */
-	p->phy.curr_user_speed_req = adv_link_speed;
+	pi->phy.curr_user_speed_req = adv_link_speed;
 done:
-	kfree(abilities);
+	kfree(phy_caps);
 	clear_bit(__ICE_CFG_BUSY, pf->state);
 
 	return err;
-- 
cgit v1.2.3


From 450f10e794199b49d0a134f5dae47896c8ab0f44 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Thu, 25 Mar 2021 15:35:10 -0700
Subject: ice: Fix error return codes in ice_set_link_ksettings

Return more appropriate error codes so that the right error
message is communicated to the user by ethtool.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_ethtool.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index a31b6cf8d599..41795d263dcd 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -2212,7 +2212,7 @@ ice_set_link_ksettings(struct net_device *netdev,
 	pi = np->vsi->port_info;
 
 	if (!pi)
-		return -EOPNOTSUPP;
+		return -EIO;
 
 	if (pi->phy.media_type != ICE_MEDIA_BASET &&
 	    pi->phy.media_type != ICE_MEDIA_FIBER &&
@@ -2229,7 +2229,7 @@ ice_set_link_ksettings(struct net_device *netdev,
 	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
 				     phy_caps, NULL);
 	if (status) {
-		err = -EAGAIN;
+		err = -EIO;
 		goto done;
 	}
 
@@ -2252,7 +2252,7 @@ ice_set_link_ksettings(struct net_device *netdev,
 			   __ETHTOOL_LINK_MODE_MASK_NBITS)) {
 		if (!test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags))
 			netdev_info(netdev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n");
-		err = -EINVAL;
+		err = -EOPNOTSUPP;
 		goto done;
 	}
 
@@ -2304,7 +2304,7 @@ ice_set_link_ksettings(struct net_device *netdev,
 	pi->phy.get_link_info = true;
 	status = ice_get_link_status(pi, &linkup);
 	if (status) {
-		err = -EAGAIN;
+		err = -EIO;
 		goto done;
 	}
 
@@ -2335,7 +2335,7 @@ ice_set_link_ksettings(struct net_device *netdev,
 	/* check if there is a PHY type for the requested advertised speed */
 	if (!(phy_type_low || phy_type_high)) {
 		netdev_info(netdev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n");
-		err = -EAGAIN;
+		err = -EOPNOTSUPP;
 		goto done;
 	}
 
@@ -2359,7 +2359,7 @@ ice_set_link_ksettings(struct net_device *netdev,
 					      pf->nvm_phy_type_lo;
 		} else {
 			netdev_info(netdev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n");
-			err = -EAGAIN;
+			err = -EOPNOTSUPP;
 			goto done;
 		}
 	}
@@ -2378,7 +2378,7 @@ ice_set_link_ksettings(struct net_device *netdev,
 	status = ice_aq_set_phy_cfg(&pf->hw, pi, &config, NULL);
 	if (status) {
 		netdev_info(netdev, "Set phy config failed,\n");
-		err = -EAGAIN;
+		err = -EIO;
 		goto done;
 	}
 
-- 
cgit v1.2.3


From 178a666daa0e32d0dcc2035d54a6071b568b974d Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Thu, 25 Mar 2021 15:35:11 -0700
Subject: ice: Replace some memsets and memcpys with assignment

In ice_set_link_ksettings, use assignment instead of memset/memcpy
where possible

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_ethtool.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 41795d263dcd..5dd84ccf6756 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -2195,8 +2195,9 @@ ice_set_link_ksettings(struct net_device *netdev,
 		       const struct ethtool_link_ksettings *ks)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
-	struct ethtool_link_ksettings safe_ks, copy_ks;
 	u8 autoneg, timeout = TEST_SET_BITS_TIMEOUT;
+	struct ethtool_link_ksettings copy_ks = *ks;
+	struct ethtool_link_ksettings safe_ks = {};
 	struct ice_aqc_get_phy_caps_data *phy_caps;
 	struct ice_aqc_set_phy_cfg_data config;
 	u16 adv_link_speed, curr_link_speed;
@@ -2233,14 +2234,9 @@ ice_set_link_ksettings(struct net_device *netdev,
 		goto done;
 	}
 
-	/* copy the ksettings to copy_ks to avoid modifying the original */
-	memcpy(&copy_ks, ks, sizeof(copy_ks));
-
 	/* save autoneg out of ksettings */
 	autoneg = copy_ks.base.autoneg;
 
-	memset(&safe_ks, 0, sizeof(safe_ks));
-
 	/* Get link modes supported by hardware.*/
 	ice_phy_type_to_ethtool(netdev, &safe_ks);
 
@@ -2289,7 +2285,7 @@ ice_set_link_ksettings(struct net_device *netdev,
 	 * configuration is initialized during probe from PHY capabilities
 	 * software mode, and updated on set PHY configuration.
 	 */
-	memcpy(&config, &pi->phy.curr_user_phy_cfg, sizeof(config));
+	config = pi->phy.curr_user_phy_cfg;
 
 	config.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
 
-- 
cgit v1.2.3


From 0a02944feaa75df4309103b8a19c56960cf32164 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Thu, 25 Mar 2021 15:35:12 -0700
Subject: ice: Use default configuration mode for PHY configuration

Recent firmware supports a new "get PHY capabilities" mode
ICE_AQC_REPORT_DFLT_CFG which makes it unnecessary for the driver
to track and apply NVM based default link overrides.

If FW AQ API version supports it, use Report Default Configuration.
Add check function for Report Default Configuration support and update
accordingly.

Also change adv_phy_type_[lo|hi] to advert_phy_type[lo|hi] for
clarity.

Co-developed-by: Mateusz Pacuszka <mateuszx.pacuszka@intel.com>
Signed-off-by: Mateusz Pacuszka <mateuszx.pacuszka@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_adminq_cmd.h | 12 ++---
 drivers/net/ethernet/intel/ice/ice_common.c     | 33 +++++++++++++-
 drivers/net/ethernet/intel/ice/ice_common.h     |  1 +
 drivers/net/ethernet/intel/ice/ice_ethtool.c    | 59 +++++++++++++++----------
 drivers/net/ethernet/intel/ice/ice_main.c       | 35 ++++++++++-----
 drivers/net/ethernet/intel/ice/ice_type.h       |  5 +++
 6 files changed, 103 insertions(+), 42 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 3f3d51bf0019..5cdfe406af84 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -877,16 +877,18 @@ struct ice_aqc_get_phy_caps {
 	__le16 param0;
 	/* 18.0 - Report qualified modules */
 #define ICE_AQC_GET_PHY_RQM		BIT(0)
-	/* 18.1 - 18.2 : Report mode
-	 * 00b - Report NVM capabilities
-	 * 01b - Report topology capabilities
-	 * 10b - Report SW configured
+	/* 18.1 - 18.3 : Report mode
+	 * 000b - Report NVM capabilities
+	 * 001b - Report topology capabilities
+	 * 010b - Report SW configured
+	 * 100b - Report default capabilities
 	 */
 #define ICE_AQC_REPORT_MODE_S			1
-#define ICE_AQC_REPORT_MODE_M			(3 << ICE_AQC_REPORT_MODE_S)
+#define ICE_AQC_REPORT_MODE_M			(7 << ICE_AQC_REPORT_MODE_S)
 #define ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA	0
 #define ICE_AQC_REPORT_TOPO_CAP_MEDIA		BIT(1)
 #define ICE_AQC_REPORT_ACTIVE_CFG		BIT(2)
+#define ICE_AQC_REPORT_DFLT_CFG		BIT(3)
 	__le32 reserved1;
 	__le32 addr_high;
 	__le32 addr_low;
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 509dce475bff..8485450aff80 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -158,6 +158,10 @@ ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode,
 		return ICE_ERR_PARAM;
 	hw = pi->hw;
 
+	if (report_mode == ICE_AQC_REPORT_DFLT_CFG &&
+	    !ice_fw_supports_report_dflt_cfg(hw))
+		return ICE_ERR_PARAM;
+
 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_phy_caps);
 
 	if (qual_mods)
@@ -3034,16 +3038,21 @@ ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
 {
 	struct ice_aqc_get_phy_caps_data *pcaps;
 	enum ice_status status;
+	struct ice_hw *hw;
 
 	if (!pi || !cfg)
 		return ICE_ERR_BAD_PTR;
 
+	hw = pi->hw;
+
 	pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
 	if (!pcaps)
 		return ICE_ERR_NO_MEMORY;
 
-	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps,
-				     NULL);
+	status = ice_aq_get_phy_caps(pi, false,
+				     (ice_fw_supports_report_dflt_cfg(hw) ?
+				      ICE_AQC_REPORT_DFLT_CFG :
+				      ICE_AQC_REPORT_TOPO_CAP_MEDIA), pcaps, NULL);
 	if (status)
 		goto out;
 
@@ -4492,3 +4501,23 @@ ice_lldp_fltr_add_remove(struct ice_hw *hw, u16 vsi_num, bool add)
 
 	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
 }
+
+/**
+ * ice_fw_supports_report_dflt_cfg
+ * @hw: pointer to the hardware structure
+ *
+ * Checks if the firmware supports report default configuration
+ */
+bool ice_fw_supports_report_dflt_cfg(struct ice_hw *hw)
+{
+	if (hw->api_maj_ver == ICE_FW_API_REPORT_DFLT_CFG_MAJ) {
+		if (hw->api_min_ver > ICE_FW_API_REPORT_DFLT_CFG_MIN)
+			return true;
+		if (hw->api_min_ver == ICE_FW_API_REPORT_DFLT_CFG_MIN &&
+		    hw->api_patch >= ICE_FW_API_REPORT_DFLT_CFG_PATCH)
+			return true;
+	} else if (hw->api_maj_ver > ICE_FW_API_REPORT_DFLT_CFG_MAJ) {
+		return true;
+	}
+	return false;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index d14406b11c92..7a9d2dfb21a2 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -179,4 +179,5 @@ ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size,
 bool ice_fw_supports_lldp_fltr_ctrl(struct ice_hw *hw);
 enum ice_status
 ice_lldp_fltr_add_remove(struct ice_hw *hw, u16 vsi_num, bool add);
+bool ice_fw_supports_report_dflt_cfg(struct ice_hw *hw);
 #endif /* _ICE_COMMON_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 5dd84ccf6756..4ca8d5880cfc 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -1445,8 +1445,8 @@ void ice_mask_min_supported_speeds(u64 phy_types_high, u64 *phy_types_low)
 	do {								     \
 		if (req_speeds & (aq_link_speed) ||			     \
 		    (!req_speeds &&					     \
-		     (adv_phy_type_lo & phy_type_mask_lo ||		     \
-		      adv_phy_type_hi & phy_type_mask_hi)))		     \
+		     (advert_phy_type_lo & phy_type_mask_lo ||		     \
+		      advert_phy_type_hi & phy_type_mask_hi)))		     \
 			ethtool_link_ksettings_add_link_mode(ks, advertising,\
 							ethtool_link_mode);  \
 	} while (0)
@@ -1463,10 +1463,10 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
 	struct ice_pf *pf = vsi->back;
+	u64 advert_phy_type_lo = 0;
+	u64 advert_phy_type_hi = 0;
 	u64 phy_type_mask_lo = 0;
 	u64 phy_type_mask_hi = 0;
-	u64 adv_phy_type_lo = 0;
-	u64 adv_phy_type_hi = 0;
 	u64 phy_types_high = 0;
 	u64 phy_types_low = 0;
 	u16 req_speeds;
@@ -1484,28 +1484,35 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	 * requested by user.
 	 */
 	if (test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags)) {
-		struct ice_link_default_override_tlv *ldo;
-
-		ldo = &pf->link_dflt_override;
 		phy_types_low = le64_to_cpu(pf->nvm_phy_type_lo);
 		phy_types_high = le64_to_cpu(pf->nvm_phy_type_hi);
 
 		ice_mask_min_supported_speeds(phy_types_high, &phy_types_low);
-
-		/* If override enabled and PHY mask set, then
-		 * Advertising link mode is the intersection of the PHY
-		 * types without media and the override PHY mask.
+		/* determine advertised modes based on link override only
+		 * if it's supported and if the FW doesn't abstract the
+		 * driver from having to account for link overrides
 		 */
-		if (ldo->options & ICE_LINK_OVERRIDE_EN &&
-		    (ldo->phy_type_low || ldo->phy_type_high)) {
-			adv_phy_type_lo =
-				le64_to_cpu(pf->nvm_phy_type_lo) &
-				ldo->phy_type_low;
-			adv_phy_type_hi =
-				le64_to_cpu(pf->nvm_phy_type_hi) &
-				ldo->phy_type_high;
+		if (ice_fw_supports_link_override(&pf->hw) &&
+		    !ice_fw_supports_report_dflt_cfg(&pf->hw)) {
+			struct ice_link_default_override_tlv *ldo;
+
+			ldo = &pf->link_dflt_override;
+			/* If override enabled and PHY mask set, then
+			 * Advertising link mode is the intersection of the PHY
+			 * types without media and the override PHY mask.
+			 */
+			if (ldo->options & ICE_LINK_OVERRIDE_EN &&
+			    (ldo->phy_type_low || ldo->phy_type_high)) {
+				advert_phy_type_lo =
+					le64_to_cpu(pf->nvm_phy_type_lo) &
+					ldo->phy_type_low;
+				advert_phy_type_hi =
+					le64_to_cpu(pf->nvm_phy_type_hi) &
+					ldo->phy_type_high;
+			}
 		}
 	} else {
+		/* strict mode */
 		phy_types_low = vsi->port_info->phy.phy_type_low;
 		phy_types_high = vsi->port_info->phy.phy_type_high;
 	}
@@ -1513,9 +1520,9 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 	/* If Advertising link mode PHY type is not using override PHY type,
 	 * then use PHY type with media.
 	 */
-	if (!adv_phy_type_lo && !adv_phy_type_hi) {
-		adv_phy_type_lo = vsi->port_info->phy.phy_type_low;
-		adv_phy_type_hi = vsi->port_info->phy.phy_type_high;
+	if (!advert_phy_type_lo && !advert_phy_type_hi) {
+		advert_phy_type_lo = vsi->port_info->phy.phy_type_low;
+		advert_phy_type_hi = vsi->port_info->phy.phy_type_high;
 	}
 
 	ethtool_link_ksettings_zero_link_mode(ks, supported);
@@ -2227,8 +2234,12 @@ ice_set_link_ksettings(struct net_device *netdev,
 		return -ENOMEM;
 
 	/* Get the PHY capabilities based on media */
-	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
-				     phy_caps, NULL);
+	if (ice_fw_supports_report_dflt_cfg(pi->hw))
+		status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG,
+					     phy_caps, NULL);
+	else
+		status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
+					     phy_caps, NULL);
 	if (status) {
 		err = -EIO;
 		goto done;
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index b976de4b5e6f..6bc2215b674d 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -1732,7 +1732,7 @@ static void ice_init_link_dflt_override(struct ice_port_info *pi)
  * ice_init_phy_cfg_dflt_override - Initialize PHY cfg default override settings
  * @pi: port info structure
  *
- * If default override is enabled, initialized the user PHY cfg speed and FEC
+ * If default override is enabled, initialize the user PHY cfg speed and FEC
  * settings using the default override mask from the NVM.
  *
  * The PHY should only be configured with the default override settings the
@@ -1741,6 +1741,9 @@ static void ice_init_link_dflt_override(struct ice_port_info *pi)
  * and the PHY has not been configured with the default override settings. The
  * state is set here, and cleared in ice_configure_phy the first time the PHY is
  * configured.
+ *
+ * This function should be called only if the FW doesn't support default
+ * configuration mode, as reported by ice_fw_supports_report_dflt_cfg.
  */
 static void ice_init_phy_cfg_dflt_override(struct ice_port_info *pi)
 {
@@ -1802,8 +1805,12 @@ static int ice_init_phy_user_cfg(struct ice_port_info *pi)
 	if (!pcaps)
 		return -ENOMEM;
 
-	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps,
-				     NULL);
+	if (ice_fw_supports_report_dflt_cfg(pi->hw))
+		status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG,
+					     pcaps, NULL);
+	else
+		status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
+					     pcaps, NULL);
 	if (status) {
 		dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n");
 		err = -EIO;
@@ -1818,17 +1825,19 @@ static int ice_init_phy_user_cfg(struct ice_port_info *pi)
 	      ICE_AQC_MOD_ENFORCE_STRICT_MODE)) {
 		set_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags);
 
-		/* if link default override is enabled, initialize user PHY
-		 * configuration with link default override values
+		/* if the FW supports default PHY configuration mode, then the driver
+		 * does not have to apply link override settings. If not,
+		 * initialize user PHY configuration with link override values
 		 */
-		if (pf->link_dflt_override.options & ICE_LINK_OVERRIDE_EN) {
+		if (!ice_fw_supports_report_dflt_cfg(pi->hw) &&
+		    (pf->link_dflt_override.options & ICE_LINK_OVERRIDE_EN)) {
 			ice_init_phy_cfg_dflt_override(pi);
 			goto out;
 		}
 	}
 
-	/* if link default override is not enabled, initialize PHY using
-	 * topology with media
+	/* if link default override is not enabled, set user flow control and
+	 * FEC settings based on what get_phy_caps returned
 	 */
 	phy->curr_user_fec_req = ice_caps_to_fec_mode(pcaps->caps,
 						      pcaps->link_fec_options);
@@ -1899,10 +1908,14 @@ static int ice_configure_phy(struct ice_vsi *vsi)
 
 	/* Use PHY topology as baseline for configuration */
 	memset(pcaps, 0, sizeof(*pcaps));
-	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps,
-				     NULL);
+	if (ice_fw_supports_report_dflt_cfg(pi->hw))
+		status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG,
+					     pcaps, NULL);
+	else
+		status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
+					     pcaps, NULL);
 	if (status) {
-		dev_err(dev, "Failed to get PHY topology, VSI %d error %s\n",
+		dev_err(dev, "Failed to get PHY caps, VSI %d error %s\n",
 			vsi->vsi_num, ice_stat_str(status));
 		err = -EIO;
 		goto done;
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 276ebcc309dc..2efc91b58c9e 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -941,4 +941,9 @@ struct ice_aq_get_set_rss_lut_params {
 #define ICE_FW_API_LLDP_FLTR_MIN	7
 #define ICE_FW_API_LLDP_FLTR_PATCH	1
 
+/* AQ API version for report default configuration */
+#define ICE_FW_API_REPORT_DFLT_CFG_MAJ		1
+#define ICE_FW_API_REPORT_DFLT_CFG_MIN		7
+#define ICE_FW_API_REPORT_DFLT_CFG_PATCH	3
+
 #endif /* _ICE_TYPE_H_ */
-- 
cgit v1.2.3


From 75751c80d6d841ec1f803f0a6c9b116345458d16 Mon Sep 17 00:00:00 2001
From: Jeb Cramer <jeb.j.cramer@intel.com>
Date: Thu, 25 Mar 2021 15:35:13 -0700
Subject: ice: Limit forced overrides based on FW version

Beyond a specific version of firmware, there is no need to provide
override values to the firmware when setting PHY capabilities.  In this
case, we do not need to indicate whether we're in Strict or Lenient Link
Mode.

In the case of translating capabilities to the configuration structure,
the module compliance enforcement is already correctly set by firmware,
so the extra code block is redundant.

Signed-off-by: Jeb Cramer <jeb.j.cramer@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_common.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 8485450aff80..b13a630ea1b7 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -3013,17 +3013,6 @@ ice_copy_phy_caps_to_cfg(struct ice_port_info *pi,
 	cfg->link_fec_opt = caps->link_fec_options;
 	cfg->module_compliance_enforcement =
 		caps->module_compliance_enforcement;
-
-	if (ice_fw_supports_link_override(pi->hw)) {
-		struct ice_link_default_override_tlv tlv;
-
-		if (ice_get_link_default_override(&tlv, pi))
-			return;
-
-		if (tlv.options & ICE_LINK_OVERRIDE_STRICT_MODE)
-			cfg->module_compliance_enforcement |=
-				ICE_LINK_OVERRIDE_STRICT_MODE;
-	}
 }
 
 /**
@@ -3091,7 +3080,8 @@ ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
 		break;
 	}
 
-	if (fec == ICE_FEC_AUTO && ice_fw_supports_link_override(pi->hw)) {
+	if (fec == ICE_FEC_AUTO && ice_fw_supports_link_override(hw) &&
+	    !ice_fw_supports_report_dflt_cfg(hw)) {
 		struct ice_link_default_override_tlv tlv;
 
 		if (ice_get_link_default_override(&tlv, pi))
-- 
cgit v1.2.3


From dc6aaa139fb74d51c446a624ce8a7fb543e49e57 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Thu, 25 Mar 2021 15:35:14 -0700
Subject: ice: Remove unnecessary variable

In ice_init_phy_user_cfg, vsi is used only to get to hw. Remove this
and just use pi->hw

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 6bc2215b674d..c81eb27e83a6 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -1791,16 +1791,11 @@ static int ice_init_phy_user_cfg(struct ice_port_info *pi)
 	struct ice_phy_info *phy = &pi->phy;
 	struct ice_pf *pf = pi->hw->back;
 	enum ice_status status;
-	struct ice_vsi *vsi;
 	int err = 0;
 
 	if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE))
 		return -EIO;
 
-	vsi = ice_get_main_vsi(pf);
-	if (!vsi)
-		return -EINVAL;
-
 	pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
 	if (!pcaps)
 		return -ENOMEM;
@@ -1820,7 +1815,7 @@ static int ice_init_phy_user_cfg(struct ice_port_info *pi)
 	ice_copy_phy_caps_to_cfg(pi, pcaps, &pi->phy.curr_user_phy_cfg);
 
 	/* check if lenient mode is supported and enabled */
-	if (ice_fw_supports_link_override(&vsi->back->hw) &&
+	if (ice_fw_supports_link_override(pi->hw) &&
 	    !(pcaps->module_compliance_enforcement &
 	      ICE_AQC_MOD_ENFORCE_STRICT_MODE)) {
 		set_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags);
-- 
cgit v1.2.3


From efc1eddb28aaeb445800041d1fbb9db4e977bf01 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Thu, 25 Mar 2021 15:35:15 -0700
Subject: ice: Use local variable instead of pointer derefs

Replace multiple instances of vsi->back and pi->phy with equivalent
local variables

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index c81eb27e83a6..73dd272acbf9 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -1857,27 +1857,24 @@ err_out:
 static int ice_configure_phy(struct ice_vsi *vsi)
 {
 	struct device *dev = ice_pf_to_dev(vsi->back);
+	struct ice_port_info *pi = vsi->port_info;
 	struct ice_aqc_get_phy_caps_data *pcaps;
 	struct ice_aqc_set_phy_cfg_data *cfg;
-	struct ice_port_info *pi;
+	struct ice_phy_info *phy = &pi->phy;
+	struct ice_pf *pf = vsi->back;
 	enum ice_status status;
 	int err = 0;
 
-	pi = vsi->port_info;
-	if (!pi)
-		return -EINVAL;
-
 	/* Ensure we have media as we cannot configure a medialess port */
-	if (!(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE))
+	if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE))
 		return -EPERM;
 
 	ice_print_topo_conflict(vsi);
 
-	if (vsi->port_info->phy.link_info.topo_media_conflict ==
-	    ICE_AQ_LINK_TOPO_UNSUPP_MEDIA)
+	if (phy->link_info.topo_media_conflict == ICE_AQ_LINK_TOPO_UNSUPP_MEDIA)
 		return -EPERM;
 
-	if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags))
+	if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags))
 		return ice_force_phys_link_state(vsi, true);
 
 	pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
@@ -1898,7 +1895,7 @@ static int ice_configure_phy(struct ice_vsi *vsi)
 	 * there's nothing to do
 	 */
 	if (pcaps->caps & ICE_AQC_PHY_EN_LINK &&
-	    ice_phy_caps_equals_cfg(pcaps, &pi->phy.curr_user_phy_cfg))
+	    ice_phy_caps_equals_cfg(pcaps, &phy->curr_user_phy_cfg))
 		goto done;
 
 	/* Use PHY topology as baseline for configuration */
@@ -1929,8 +1926,8 @@ static int ice_configure_phy(struct ice_vsi *vsi)
 	 */
 	if (test_and_clear_bit(__ICE_LINK_DEFAULT_OVERRIDE_PENDING,
 			       vsi->back->state)) {
-		cfg->phy_type_low = pi->phy.curr_user_phy_cfg.phy_type_low;
-		cfg->phy_type_high = pi->phy.curr_user_phy_cfg.phy_type_high;
+		cfg->phy_type_low = phy->curr_user_phy_cfg.phy_type_low;
+		cfg->phy_type_high = phy->curr_user_phy_cfg.phy_type_high;
 	} else {
 		u64 phy_low = 0, phy_high = 0;
 
@@ -1948,7 +1945,7 @@ static int ice_configure_phy(struct ice_vsi *vsi)
 	}
 
 	/* FEC */
-	ice_cfg_phy_fec(pi, cfg, pi->phy.curr_user_fec_req);
+	ice_cfg_phy_fec(pi, cfg, phy->curr_user_fec_req);
 
 	/* Can't provide what was requested; use PHY capabilities */
 	if (cfg->link_fec_opt !=
@@ -1960,12 +1957,12 @@ static int ice_configure_phy(struct ice_vsi *vsi)
 	/* Flow Control - always supported; no need to check against
 	 * capabilities
 	 */
-	ice_cfg_phy_fc(pi, cfg, pi->phy.curr_user_fc_req);
+	ice_cfg_phy_fc(pi, cfg, phy->curr_user_fc_req);
 
 	/* Enable link and link update */
 	cfg->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT | ICE_AQ_PHY_ENA_LINK;
 
-	status = ice_aq_set_phy_cfg(&vsi->back->hw, pi, cfg, NULL);
+	status = ice_aq_set_phy_cfg(&pf->hw, pi, cfg, NULL);
 	if (status) {
 		dev_err(dev, "Failed to set phy config, VSI %d error %s\n",
 			vsi->vsi_num, ice_stat_str(status));
-- 
cgit v1.2.3


From 51fe27e179b1fba5504068bdf02453acfabe96b0 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Thu, 25 Mar 2021 15:35:16 -0700
Subject: ice: Remove rx_gro_dropped stat

Tracking of the rx_gro_dropped statistic was removed in
commit f73fc40327c0 ("ice: drop dead code in ice_receive_skb()").
Remove the associated variables and its reporting to ethtool stats.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h         | 1 -
 drivers/net/ethernet/intel/ice/ice_ethtool.c | 1 -
 drivers/net/ethernet/intel/ice/ice_main.c    | 4 +---
 drivers/net/ethernet/intel/ice/ice_txrx.h    | 1 -
 4 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 02badaaf818c..721afa0f0a88 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -265,7 +265,6 @@ struct ice_vsi {
 	u32 tx_busy;
 	u32 rx_buf_failed;
 	u32 rx_page_failed;
-	u32 rx_gro_dropped;
 	u16 num_q_vectors;
 	u16 base_vector;		/* IRQ base for OS reserved vectors */
 	enum ice_vsi_type type;
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 4ca8d5880cfc..51d3a929ecfd 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -60,7 +60,6 @@ static const struct ice_stats ice_gstrings_vsi_stats[] = {
 	ICE_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol),
 	ICE_VSI_STAT("rx_alloc_fail", rx_buf_failed),
 	ICE_VSI_STAT("rx_pg_alloc_fail", rx_page_failed),
-	ICE_VSI_STAT("rx_gro_dropped", rx_gro_dropped),
 	ICE_VSI_STAT("tx_errors", eth_stats.tx_errors),
 	ICE_VSI_STAT("tx_linearize", tx_linearize),
 	ICE_VSI_STAT("tx_busy", tx_busy),
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 73dd272acbf9..3c73ee4a6c0c 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -5354,7 +5354,6 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi)
 	vsi->tx_linearize = 0;
 	vsi->rx_buf_failed = 0;
 	vsi->rx_page_failed = 0;
-	vsi->rx_gro_dropped = 0;
 
 	rcu_read_lock();
 
@@ -5369,7 +5368,6 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi)
 		vsi_stats->rx_bytes += bytes;
 		vsi->rx_buf_failed += ring->rx_stats.alloc_buf_failed;
 		vsi->rx_page_failed += ring->rx_stats.alloc_page_failed;
-		vsi->rx_gro_dropped += ring->rx_stats.gro_dropped;
 	}
 
 	/* update XDP Tx rings counters */
@@ -5401,7 +5399,7 @@ void ice_update_vsi_stats(struct ice_vsi *vsi)
 	ice_update_eth_stats(vsi);
 
 	cur_ns->tx_errors = cur_es->tx_errors;
-	cur_ns->rx_dropped = cur_es->rx_discards + vsi->rx_gro_dropped;
+	cur_ns->rx_dropped = cur_es->rx_discards;
 	cur_ns->tx_dropped = cur_es->tx_discards;
 	cur_ns->multicast = cur_es->rx_multicast;
 
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index ffe0d271dec7..701552d88bea 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -192,7 +192,6 @@ struct ice_rxq_stats {
 	u64 non_eop_descs;
 	u64 alloc_page_failed;
 	u64 alloc_buf_failed;
-	u64 gro_dropped; /* GRO returned dropped */
 };
 
 enum ice_ring_state_t {
-- 
cgit v1.2.3


From 771015b90b8686a20f9f328a050ef624e490f475 Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Thu, 25 Mar 2021 15:35:17 -0700
Subject: ice: Remove unnecessary checks in add/kill_vid ndo ops

Currently the driver is doing two unnecessary checks. First both ops are
checking if the VLAN ID passed in is less than VLAN_N_VID and second
both ops are checking to see if a port VLAN is configured on the VSI.

The first check is already handled by the 8021q driver so this is an
unnecessary check. The second check is unnecessary because the PF VSI is
never put into a port VLAN.

Remove these checks.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 3c73ee4a6c0c..5ab35c1d6121 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -3078,15 +3078,6 @@ ice_vlan_rx_add_vid(struct net_device *netdev, __always_unused __be16 proto,
 	struct ice_vsi *vsi = np->vsi;
 	int ret;
 
-	if (vid >= VLAN_N_VID) {
-		netdev_err(netdev, "VLAN id requested %d is out of range %d\n",
-			   vid, VLAN_N_VID);
-		return -EINVAL;
-	}
-
-	if (vsi->info.pvid)
-		return -EINVAL;
-
 	/* VLAN 0 is added by default during load/reset */
 	if (!vid)
 		return 0;
@@ -3124,9 +3115,6 @@ ice_vlan_rx_kill_vid(struct net_device *netdev, __always_unused __be16 proto,
 	struct ice_vsi *vsi = np->vsi;
 	int ret;
 
-	if (vsi->info.pvid)
-		return -EINVAL;
-
 	/* don't allow removal of VLAN 0 */
 	if (!vid)
 		return 0;
-- 
cgit v1.2.3


From 2e20521b80c76ba517a060a5db752a9ca21c597c Mon Sep 17 00:00:00 2001
From: Tony Nguyen <anthony.l.nguyen@intel.com>
Date: Tue, 2 Mar 2021 10:15:44 -0800
Subject: ice: Remove unnecessary blank line

Checkpatch reports the following, fix it.

-----------------------------------------
drivers/net/ethernet/intel/ice/ice_main.c
-----------------------------------------
CHECK:BRACES: Blank lines aren't necessary before a close brace '}'
FILE: drivers/net/ethernet/intel/ice/ice_main.c:455:
+
+}

Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 5ab35c1d6121..30935aaa8935 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -436,7 +436,6 @@ static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked)
 
 	for (node = 0; node < ICE_MAX_VF_AGG_NODES; node++)
 		pf->vf_agg_node[node].num_vsis = 0;
-
 }
 
 /**
-- 
cgit v1.2.3


From ed7247f30982bbc6f8bffd12a0c76398b90ec319 Mon Sep 17 00:00:00 2001
From: Guobin Huang <huangguobin4@huawei.com>
Date: Tue, 6 Apr 2021 20:11:56 +0800
Subject: rfkill: use DEFINE_SPINLOCK() for spinlock

spinlock can be initialized automatically with DEFINE_SPINLOCK()
rather than explicitly calling spin_lock_init().

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Guobin Huang <huangguobin4@huawei.com>
Link: https://lore.kernel.org/r/1617711116-49370-1-git-send-email-huangguobin4@huawei.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/rfkill/input.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/rfkill/input.c b/net/rfkill/input.c
index 4b01baea1d4a..598d0a61bda7 100644
--- a/net/rfkill/input.c
+++ b/net/rfkill/input.c
@@ -36,7 +36,7 @@ module_param_named(master_switch_mode, rfkill_master_switch_mode, uint, 0);
 MODULE_PARM_DESC(master_switch_mode,
 	"SW_RFKILL_ALL ON should: 0=do nothing (only unlock); 1=restore; 2=unblock all");
 
-static spinlock_t rfkill_op_lock;
+static DEFINE_SPINLOCK(rfkill_op_lock);
 static bool rfkill_op_pending;
 static unsigned long rfkill_sw_pending[BITS_TO_LONGS(NUM_RFKILL_TYPES)];
 static unsigned long rfkill_sw_state[BITS_TO_LONGS(NUM_RFKILL_TYPES)];
@@ -330,8 +330,6 @@ int __init rfkill_handler_init(void)
 		return -EINVAL;
 	}
 
-	spin_lock_init(&rfkill_op_lock);
-
 	/* Avoid delay at first schedule */
 	rfkill_last_scheduled =
 			jiffies - msecs_to_jiffies(RFKILL_OPS_DELAY) - 1;
-- 
cgit v1.2.3


From 30a70d18e862d505a34f2b21ccb83f5d4792e2d5 Mon Sep 17 00:00:00 2001
From: Guobin Huang <huangguobin4@huawei.com>
Date: Sat, 27 Mar 2021 17:59:20 +0800
Subject: mac80211_hwsim: use DEFINE_SPINLOCK() for spinlock

spinlock can be initialized automatically with DEFINE_SPINLOCK()
rather than explicitly calling spin_lock_init().

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Guobin Huang <huangguobin4@huawei.com>
Link: https://lore.kernel.org/r/1616839160-6654-1-git-send-email-huangguobin4@huawei.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index fa7d4c20dc13..d56d2095a0d4 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -596,7 +596,7 @@ static const struct nl80211_vendor_cmd_info mac80211_hwsim_vendor_events[] = {
 	{ .vendor_id = OUI_QCA, .subcmd = 1 },
 };
 
-static spinlock_t hwsim_radio_lock;
+static DEFINE_SPINLOCK(hwsim_radio_lock);
 static LIST_HEAD(hwsim_radios);
 static struct rhashtable hwsim_radios_rht;
 static int hwsim_radio_idx;
@@ -763,7 +763,7 @@ static const struct nla_policy hwsim_genl_policy[HWSIM_ATTR_MAX + 1] = {
 /* MAC80211_HWSIM virtio queues */
 static struct virtqueue *hwsim_vqs[HWSIM_NUM_VQS];
 static bool hwsim_virtio_enabled;
-static spinlock_t hwsim_virtio_lock;
+static DEFINE_SPINLOCK(hwsim_virtio_lock);
 
 static void hwsim_virtio_rx_work(struct work_struct *work);
 static DECLARE_WORK(hwsim_virtio_rx, hwsim_virtio_rx_work);
@@ -4410,8 +4410,6 @@ static struct virtio_driver virtio_hwsim = {
 
 static int hwsim_register_virtio_driver(void)
 {
-	spin_lock_init(&hwsim_virtio_lock);
-
 	return register_virtio_driver(&virtio_hwsim);
 }
 
@@ -4440,8 +4438,6 @@ static int __init init_mac80211_hwsim(void)
 	if (channels < 1)
 		return -EINVAL;
 
-	spin_lock_init(&hwsim_radio_lock);
-
 	err = rhashtable_init(&hwsim_radios_rht, &hwsim_rht_params);
 	if (err)
 		return err;
-- 
cgit v1.2.3


From 81d94f47beaa561840776087d22a809b17c11033 Mon Sep 17 00:00:00 2001
From: Qiheng Lin <linqiheng@huawei.com>
Date: Thu, 25 Mar 2021 22:38:54 +0800
Subject: cfg80211: regulatory: use DEFINE_SPINLOCK() for spinlock

spinlock can be initialized automatically with DEFINE_SPINLOCK()
rather than explicitly calling spin_lock_init().

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Qiheng Lin <linqiheng@huawei.com>
Link: https://lore.kernel.org/r/20210325143854.13186-1-linqiheng@huawei.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/reg.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 68db914df642..0406ce7334fa 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -126,7 +126,7 @@ static int reg_num_devs_support_basehint;
  * is relevant for all registered devices.
  */
 static bool reg_is_indoor;
-static spinlock_t reg_indoor_lock;
+static DEFINE_SPINLOCK(reg_indoor_lock);
 
 /* Used to track the userspace process controlling the indoor setting */
 static u32 reg_is_indoor_portid;
@@ -210,11 +210,11 @@ static struct regulatory_request *get_last_request(void)
 
 /* Used to queue up regulatory hints */
 static LIST_HEAD(reg_requests_list);
-static spinlock_t reg_requests_lock;
+static DEFINE_SPINLOCK(reg_requests_lock);
 
 /* Used to queue up beacon hints for review */
 static LIST_HEAD(reg_pending_beacons);
-static spinlock_t reg_pending_beacons_lock;
+static DEFINE_SPINLOCK(reg_pending_beacons_lock);
 
 /* Used to keep track of processed beacon hints */
 static LIST_HEAD(reg_beacon_list);
@@ -4262,10 +4262,6 @@ int __init regulatory_init(void)
 	if (IS_ERR(reg_pdev))
 		return PTR_ERR(reg_pdev);
 
-	spin_lock_init(&reg_requests_lock);
-	spin_lock_init(&reg_pending_beacons_lock);
-	spin_lock_init(&reg_indoor_lock);
-
 	rcu_assign_pointer(cfg80211_regdomain, cfg80211_world_regdom);
 
 	user_alpha2[0] = '9';
-- 
cgit v1.2.3


From 026dfac85f0a70cd1fd00181f1079b2c256b0e12 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Fri, 26 Mar 2021 02:48:43 +0000
Subject: mac80211: minstrel_ht: remove unused variable 'mg' in
 minstrel_ht_next_jump_rate()

GCC reports the following warning with W=1:

net/mac80211/rc80211_minstrel_ht.c:871:34: warning:
 variable 'mg' set but not used [-Wunused-but-set-variable]
  871 |  struct minstrel_mcs_group_data *mg;
      |                                  ^~

This variable is not used in function , this commit
remove it to fix the warning.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Link: https://lore.kernel.org/r/20210326024843.987941-1-weiyongjun1@huawei.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rc80211_minstrel_ht.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index ecad9b10984f..f21c85eb906a 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -868,7 +868,6 @@ static u16
 minstrel_ht_next_jump_rate(struct minstrel_ht_sta *mi, u32 fast_rate_dur,
 			   u32 slow_rate_dur, int *slow_rate_ofs)
 {
-	struct minstrel_mcs_group_data *mg;
 	struct minstrel_rate_stats *mrs;
 	u32 max_duration = slow_rate_dur;
 	int i, index, offset;
@@ -886,7 +885,6 @@ minstrel_ht_next_jump_rate(struct minstrel_ht_sta *mi, u32 fast_rate_dur,
 		u8 type;
 
 		group = (group + 1) % ARRAY_SIZE(minstrel_mcs_groups);
-		mg = &mi->groups[group];
 
 		supported = mi->supported[group];
 		if (!supported)
-- 
cgit v1.2.3


From 958574cbcc3ae31f18bbe0b1d7f3ab3f2bd109f1 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sun, 28 Mar 2021 22:37:29 +0100
Subject: mac80211: remove redundant assignment of variable result

The variable result is being assigned a value that is never
read and it is being updated later with a new value. The
initialization is redundant and can be removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Link: https://lore.kernel.org/r/20210328213729.65819-1-colin.king@canonical.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 5d06de61047a..2fba9db56e47 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1684,7 +1684,7 @@ static bool __ieee80211_tx(struct ieee80211_local *local,
 	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_vif *vif;
 	struct sk_buff *skb;
-	bool result = true;
+	bool result;
 	__le16 fc;
 
 	if (WARN_ON(skb_queue_empty(skbs)))
-- 
cgit v1.2.3


From 272cd0e8d4a639a87fe2fa40323871043ad9876e Mon Sep 17 00:00:00 2001
From: Aloka Dixit <alokad@codeaurora.org>
Date: Mon, 22 Feb 2021 13:20:59 -0800
Subject: nl80211: Add missing line in nl80211_fils_discovery_policy

Add NL80211_FILS_DISCOVERY_ATTR_TMPL explicitly in
nl80211_fils_discovery_policy definition.

Signed-off-by: Aloka Dixit <alokad@codeaurora.org>
Link: https://lore.kernel.org/r/20210222212059.22492-1-alokad@codeaurora.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 034af85f79d8..adfa07c67b44 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -407,9 +407,10 @@ static const struct nla_policy
 nl80211_fils_discovery_policy[NL80211_FILS_DISCOVERY_ATTR_MAX + 1] = {
 	[NL80211_FILS_DISCOVERY_ATTR_INT_MIN] = NLA_POLICY_MAX(NLA_U32, 10000),
 	[NL80211_FILS_DISCOVERY_ATTR_INT_MAX] = NLA_POLICY_MAX(NLA_U32, 10000),
-	NLA_POLICY_RANGE(NLA_BINARY,
-			 NL80211_FILS_DISCOVERY_TMPL_MIN_LEN,
-			 IEEE80211_MAX_DATA_LEN),
+	[NL80211_FILS_DISCOVERY_ATTR_TMPL] =
+			NLA_POLICY_RANGE(NLA_BINARY,
+					 NL80211_FILS_DISCOVERY_TMPL_MIN_LEN,
+					 IEEE80211_MAX_DATA_LEN),
 };
 
 static const struct nla_policy
-- 
cgit v1.2.3


From 73bc9e0af5945d03d398668dd97885742a5e85f7 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 19 Mar 2021 23:28:01 +0100
Subject: mac80211: don't apply flow control on management frames
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In some cases (depending on the driver, but it's true e.g. for
iwlwifi) we're using an internal TXQ for management packets,
mostly to simplify the code and to have a place to queue them.
However, it appears that in certain cases we can confuse the
code and management frames are dropped, which is certainly not
what we want.

Short-circuit the processing of management frames. To keep the
impact minimal, only put them on the frags queue and check the
tid == management only for doing that and to skip the airtime
fairness checks, if applicable.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/r/20210319232800.0e876c800866.Id2b66eb5a17f3869b776c39b5ca713272ea09d5d@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 2fba9db56e47..ac765dd19d02 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -5,7 +5,7 @@
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
  *
  * Transmit and frame generation functions.
  */
@@ -1388,8 +1388,17 @@ static void ieee80211_txq_enqueue(struct ieee80211_local *local,
 	ieee80211_set_skb_enqueue_time(skb);
 
 	spin_lock_bh(&fq->lock);
-	fq_tin_enqueue(fq, tin, flow_idx, skb,
-		       fq_skb_free_func);
+	/*
+	 * For management frames, don't really apply codel etc.,
+	 * we don't want to apply any shaping or anything we just
+	 * want to simplify the driver API by having them on the
+	 * txqi.
+	 */
+	if (unlikely(txqi->txq.tid == IEEE80211_NUM_TIDS))
+		__skb_queue_tail(&txqi->frags, skb);
+	else
+		fq_tin_enqueue(fq, tin, flow_idx, skb,
+			       fq_skb_free_func);
 	spin_unlock_bh(&fq->lock);
 }
 
@@ -3835,6 +3844,9 @@ bool ieee80211_txq_airtime_check(struct ieee80211_hw *hw,
 	if (!txq->sta)
 		return true;
 
+	if (unlikely(txq->tid == IEEE80211_NUM_TIDS))
+		return true;
+
 	sta = container_of(txq->sta, struct sta_info, sta);
 	if (atomic_read(&sta->airtime[txq->ac].aql_tx_pending) <
 	    sta->airtime[txq->ac].aql_limit_low)
-- 
cgit v1.2.3


From 196900fd97e26292f0bb90758690db4cd5e64e98 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 8 Mar 2021 23:01:49 +0100
Subject: mac80211: set sk_pacing_shift for 802.3 txpath

Similar to 802.11 txpath, set socket sk_pacing_shift for 802.3 tx path.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/7230abc48dcf940657838546cdaef7dce691ecdd.1615240733.git.lorenzo@kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index ac765dd19d02..7aaa71ed49b0 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -4162,6 +4162,9 @@ static bool ieee80211_tx_8023(struct ieee80211_sub_if_data *sdata,
 	unsigned long flags;
 	int q = info->hw_queue;
 
+	if (sta)
+		sk_pacing_shift_update(skb->sk, local->hw.tx_sk_pacing_shift);
+
 	if (ieee80211_queue_skb(local, sdata, sta, skb))
 		return true;
 
-- 
cgit v1.2.3


From 0750cfd8b7fd491c9d7c8bd19d9ac380cd3c84ee Mon Sep 17 00:00:00 2001
From: James Prestwood <prestwoj@gmail.com>
Date: Thu, 11 Mar 2021 15:03:33 -0800
Subject: nl80211: better document CMD_ROAM behavior

The docs were very sparse with how exactly CMD_ROAM should be
used. Specifically related to BSS information normally obtained
through a user space scan.

Signed-off-by: James Prestwood <prestwoj@gmail.com>
Link: https://lore.kernel.org/r/20210311230333.103934-1-prestwoj@gmail.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index ac78da99fccd..5e30c7f6c484 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -655,6 +655,9 @@
  *	When a security association was established on an 802.1X network using
  *	fast transition, this event should be followed by an
  *	%NL80211_CMD_PORT_AUTHORIZED event.
+ *	Following a %NL80211_CMD_ROAM event userspace can issue
+ *	%NL80211_CMD_GET_SCAN in order to obtain the scan information for the
+ *	new BSS the card/driver roamed to.
  * @NL80211_CMD_DISCONNECT: drop a given connection; also used to notify
  *	userspace that a connection was dropped by the AP or due to other
  *	reasons, for this the %NL80211_ATTR_DISCONNECTED_BY_AP and
-- 
cgit v1.2.3


From afd2daa26c7abd734d78bd274fc6c59a15e61063 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Tue, 6 Apr 2021 21:55:53 +0200
Subject: Bluetooth: Add support for virtio transport driver

This adds support for Bluetooth HCI transport over virtio.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 drivers/bluetooth/Kconfig       |  10 +
 drivers/bluetooth/Makefile      |   2 +
 drivers/bluetooth/virtio_bt.c   | 401 ++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/virtio_bt.h  |  31 ++++
 include/uapi/linux/virtio_ids.h |   1 +
 5 files changed, 445 insertions(+)
 create mode 100644 drivers/bluetooth/virtio_bt.c
 create mode 100644 include/uapi/linux/virtio_bt.h

diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig
index 4e73a531b377..851842372c9b 100644
--- a/drivers/bluetooth/Kconfig
+++ b/drivers/bluetooth/Kconfig
@@ -425,4 +425,14 @@ config BT_HCIRSI
 	  Say Y here to compile support for HCI over Redpine into the
 	  kernel or say M to compile as a module.
 
+config BT_VIRTIO
+	tristate "Virtio Bluetooth driver"
+	depends on VIRTIO
+	help
+	  Virtio Bluetooth support driver.
+	  This driver supports Virtio Bluetooth devices.
+
+	  Say Y here to compile support for HCI over Virtio into the
+	  kernel or say M to compile as a module.
+
 endmenu
diff --git a/drivers/bluetooth/Makefile b/drivers/bluetooth/Makefile
index 1a58a3ae142c..16286ea2655d 100644
--- a/drivers/bluetooth/Makefile
+++ b/drivers/bluetooth/Makefile
@@ -26,6 +26,8 @@ obj-$(CONFIG_BT_BCM)		+= btbcm.o
 obj-$(CONFIG_BT_RTL)		+= btrtl.o
 obj-$(CONFIG_BT_QCA)		+= btqca.o
 
+obj-$(CONFIG_BT_VIRTIO)		+= virtio_bt.o
+
 obj-$(CONFIG_BT_HCIUART_NOKIA)	+= hci_nokia.o
 
 obj-$(CONFIG_BT_HCIRSI)		+= btrsi.o
diff --git a/drivers/bluetooth/virtio_bt.c b/drivers/bluetooth/virtio_bt.c
new file mode 100644
index 000000000000..c804db7e90f8
--- /dev/null
+++ b/drivers/bluetooth/virtio_bt.c
@@ -0,0 +1,401 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/module.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/skbuff.h>
+
+#include <uapi/linux/virtio_ids.h>
+#include <uapi/linux/virtio_bt.h>
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+
+#define VERSION "0.1"
+
+enum {
+	VIRTBT_VQ_TX,
+	VIRTBT_VQ_RX,
+	VIRTBT_NUM_VQS,
+};
+
+struct virtio_bluetooth {
+	struct virtio_device *vdev;
+	struct virtqueue *vqs[VIRTBT_NUM_VQS];
+	struct work_struct rx;
+	struct hci_dev *hdev;
+};
+
+static int virtbt_add_inbuf(struct virtio_bluetooth *vbt)
+{
+	struct virtqueue *vq = vbt->vqs[VIRTBT_VQ_RX];
+	struct scatterlist sg[1];
+	struct sk_buff *skb;
+	int err;
+
+	skb = alloc_skb(1000, GFP_KERNEL);
+	sg_init_one(sg, skb->data, 1000);
+
+	err = virtqueue_add_inbuf(vq, sg, 1, skb, GFP_KERNEL);
+	if (err < 0) {
+		kfree_skb(skb);
+		return err;
+	}
+
+	return 0;
+}
+
+static int virtbt_open(struct hci_dev *hdev)
+{
+	struct virtio_bluetooth *vbt = hci_get_drvdata(hdev);
+
+	if (virtbt_add_inbuf(vbt) < 0)
+		return -EIO;
+
+	virtqueue_kick(vbt->vqs[VIRTBT_VQ_RX]);
+	return 0;
+}
+
+static int virtbt_close(struct hci_dev *hdev)
+{
+	struct virtio_bluetooth *vbt = hci_get_drvdata(hdev);
+	int i;
+
+	cancel_work_sync(&vbt->rx);
+
+	for (i = 0; i < ARRAY_SIZE(vbt->vqs); i++) {
+		struct virtqueue *vq = vbt->vqs[i];
+		struct sk_buff *skb;
+
+		while ((skb = virtqueue_detach_unused_buf(vq)))
+			kfree_skb(skb);
+	}
+
+	return 0;
+}
+
+static int virtbt_flush(struct hci_dev *hdev)
+{
+	return 0;
+}
+
+static int virtbt_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct virtio_bluetooth *vbt = hci_get_drvdata(hdev);
+	struct scatterlist sg[1];
+	int err;
+
+	memcpy(skb_push(skb, 1), &hci_skb_pkt_type(skb), 1);
+
+	sg_init_one(sg, skb->data, skb->len);
+	err = virtqueue_add_outbuf(vbt->vqs[VIRTBT_VQ_TX], sg, 1, skb,
+				   GFP_KERNEL);
+	if (err) {
+		kfree_skb(skb);
+		return err;
+	}
+
+	virtqueue_kick(vbt->vqs[VIRTBT_VQ_TX]);
+	return 0;
+}
+
+static int virtbt_setup_zephyr(struct hci_dev *hdev)
+{
+	struct sk_buff *skb;
+
+	/* Read Build Information */
+	skb = __hci_cmd_sync(hdev, 0xfc08, 0, NULL, HCI_INIT_TIMEOUT);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	bt_dev_info(hdev, "%s", (char *)(skb->data + 1));
+
+	hci_set_fw_info(hdev, "%s", skb->data + 1);
+
+	kfree_skb(skb);
+	return 0;
+}
+
+static int virtbt_set_bdaddr_zephyr(struct hci_dev *hdev,
+				    const bdaddr_t *bdaddr)
+{
+	struct sk_buff *skb;
+
+	/* Write BD_ADDR */
+	skb = __hci_cmd_sync(hdev, 0xfc06, 6, bdaddr, HCI_INIT_TIMEOUT);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	kfree_skb(skb);
+	return 0;
+}
+
+static int virtbt_setup_intel(struct hci_dev *hdev)
+{
+	struct sk_buff *skb;
+
+	/* Intel Read Version */
+	skb = __hci_cmd_sync(hdev, 0xfc05, 0, NULL, HCI_CMD_TIMEOUT);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	kfree_skb(skb);
+	return 0;
+}
+
+static int virtbt_set_bdaddr_intel(struct hci_dev *hdev, const bdaddr_t *bdaddr)
+{
+	struct sk_buff *skb;
+
+	/* Intel Write BD Address */
+	skb = __hci_cmd_sync(hdev, 0xfc31, 6, bdaddr, HCI_INIT_TIMEOUT);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	kfree_skb(skb);
+	return 0;
+}
+
+static int virtbt_setup_realtek(struct hci_dev *hdev)
+{
+	struct sk_buff *skb;
+
+	/* Read ROM Version */
+	skb = __hci_cmd_sync(hdev, 0xfc6d, 0, NULL, HCI_INIT_TIMEOUT);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	bt_dev_info(hdev, "ROM version %u", *((__u8 *) (skb->data + 1)));
+
+	kfree_skb(skb);
+	return 0;
+}
+
+static int virtbt_shutdown_generic(struct hci_dev *hdev)
+{
+	struct sk_buff *skb;
+
+	/* Reset */
+	skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	kfree_skb(skb);
+	return 0;
+}
+
+static void virtbt_rx_handle(struct virtio_bluetooth *vbt, struct sk_buff *skb)
+{
+	__u8 pkt_type;
+
+	pkt_type = *((__u8 *) skb->data);
+	skb_pull(skb, 1);
+
+	switch (pkt_type) {
+	case HCI_EVENT_PKT:
+	case HCI_ACLDATA_PKT:
+	case HCI_SCODATA_PKT:
+	case HCI_ISODATA_PKT:
+		hci_skb_pkt_type(skb) = pkt_type;
+		hci_recv_frame(vbt->hdev, skb);
+		break;
+	}
+}
+
+static void virtbt_rx_work(struct work_struct *work)
+{
+	struct virtio_bluetooth *vbt = container_of(work,
+						    struct virtio_bluetooth, rx);
+	struct sk_buff *skb;
+	unsigned int len;
+
+	skb = virtqueue_get_buf(vbt->vqs[VIRTBT_VQ_RX], &len);
+	if (!skb)
+		return;
+
+	skb->len = len;
+	virtbt_rx_handle(vbt, skb);
+
+	if (virtbt_add_inbuf(vbt) < 0)
+		return;
+
+	virtqueue_kick(vbt->vqs[VIRTBT_VQ_RX]);
+}
+
+static void virtbt_tx_done(struct virtqueue *vq)
+{
+	struct sk_buff *skb;
+	unsigned int len;
+
+	while ((skb = virtqueue_get_buf(vq, &len)))
+		kfree_skb(skb);
+}
+
+static void virtbt_rx_done(struct virtqueue *vq)
+{
+	struct virtio_bluetooth *vbt = vq->vdev->priv;
+
+	schedule_work(&vbt->rx);
+}
+
+static int virtbt_probe(struct virtio_device *vdev)
+{
+	vq_callback_t *callbacks[VIRTBT_NUM_VQS] = {
+		[VIRTBT_VQ_TX] = virtbt_tx_done,
+		[VIRTBT_VQ_RX] = virtbt_rx_done,
+	};
+	const char *names[VIRTBT_NUM_VQS] = {
+		[VIRTBT_VQ_TX] = "tx",
+		[VIRTBT_VQ_RX] = "rx",
+	};
+	struct virtio_bluetooth *vbt;
+	struct hci_dev *hdev;
+	int err;
+	__u8 type;
+
+	if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
+		return -ENODEV;
+
+	type = virtio_cread8(vdev, offsetof(struct virtio_bt_config, type));
+
+	switch (type) {
+	case VIRTIO_BT_CONFIG_TYPE_PRIMARY:
+	case VIRTIO_BT_CONFIG_TYPE_AMP:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	vbt = kzalloc(sizeof(*vbt), GFP_KERNEL);
+	if (!vbt)
+		return -ENOMEM;
+
+	vdev->priv = vbt;
+	vbt->vdev = vdev;
+
+	INIT_WORK(&vbt->rx, virtbt_rx_work);
+
+	err = virtio_find_vqs(vdev, VIRTBT_NUM_VQS, vbt->vqs, callbacks,
+			      names, NULL);
+	if (err)
+		return err;
+
+	hdev = hci_alloc_dev();
+	if (!hdev) {
+		err = -ENOMEM;
+		goto failed;
+	}
+
+	vbt->hdev = hdev;
+
+	hdev->bus = HCI_VIRTIO;
+	hdev->dev_type = type;
+	hci_set_drvdata(hdev, vbt);
+
+	hdev->open  = virtbt_open;
+	hdev->close = virtbt_close;
+	hdev->flush = virtbt_flush;
+	hdev->send  = virtbt_send_frame;
+
+	if (virtio_has_feature(vdev, VIRTIO_BT_F_VND_HCI)) {
+		__u16 vendor;
+
+		virtio_cread(vdev, struct virtio_bt_config, vendor, &vendor);
+
+		switch (vendor) {
+		case VIRTIO_BT_CONFIG_VENDOR_ZEPHYR:
+			hdev->manufacturer = 1521;
+			hdev->setup = virtbt_setup_zephyr;
+			hdev->shutdown = virtbt_shutdown_generic;
+			hdev->set_bdaddr = virtbt_set_bdaddr_zephyr;
+			break;
+
+		case VIRTIO_BT_CONFIG_VENDOR_INTEL:
+			hdev->manufacturer = 2;
+			hdev->setup = virtbt_setup_intel;
+			hdev->shutdown = virtbt_shutdown_generic;
+			hdev->set_bdaddr = virtbt_set_bdaddr_intel;
+			set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks);
+			set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
+			set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks);
+			break;
+
+		case VIRTIO_BT_CONFIG_VENDOR_REALTEK:
+			hdev->manufacturer = 93;
+			hdev->setup = virtbt_setup_realtek;
+			hdev->shutdown = virtbt_shutdown_generic;
+			set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
+			set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks);
+			break;
+		}
+	}
+
+	if (virtio_has_feature(vdev, VIRTIO_BT_F_MSFT_EXT)) {
+		__u16 msft_opcode;
+
+		virtio_cread(vdev, struct virtio_bt_config,
+			     msft_opcode, &msft_opcode);
+
+		hci_set_msft_opcode(hdev, msft_opcode);
+	}
+
+	if (virtio_has_feature(vdev, VIRTIO_BT_F_AOSP_EXT))
+		hci_set_aosp_capable(hdev);
+
+	if (hci_register_dev(hdev) < 0) {
+		hci_free_dev(hdev);
+		err = -EBUSY;
+		goto failed;
+	}
+
+	return 0;
+
+failed:
+	vdev->config->del_vqs(vdev);
+	return err;
+}
+
+static void virtbt_remove(struct virtio_device *vdev)
+{
+	struct virtio_bluetooth *vbt = vdev->priv;
+	struct hci_dev *hdev = vbt->hdev;
+
+	hci_unregister_dev(hdev);
+	vdev->config->reset(vdev);
+
+	hci_free_dev(hdev);
+	vbt->hdev = NULL;
+
+	vdev->config->del_vqs(vdev);
+	kfree(vbt);
+}
+
+static struct virtio_device_id virtbt_table[] = {
+	{ VIRTIO_ID_BT, VIRTIO_DEV_ANY_ID },
+	{ 0 },
+};
+
+MODULE_DEVICE_TABLE(virtio, virtbt_table);
+
+static const unsigned int virtbt_features[] = {
+	VIRTIO_BT_F_VND_HCI,
+	VIRTIO_BT_F_MSFT_EXT,
+	VIRTIO_BT_F_AOSP_EXT,
+};
+
+static struct virtio_driver virtbt_driver = {
+	.driver.name         = KBUILD_MODNAME,
+	.driver.owner        = THIS_MODULE,
+	.feature_table       = virtbt_features,
+	.feature_table_size  = ARRAY_SIZE(virtbt_features),
+	.id_table            = virtbt_table,
+	.probe               = virtbt_probe,
+	.remove              = virtbt_remove,
+};
+
+module_virtio_driver(virtbt_driver);
+
+MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
+MODULE_DESCRIPTION("Generic Bluetooth VIRTIO driver ver " VERSION);
+MODULE_VERSION(VERSION);
+MODULE_LICENSE("GPL");
diff --git a/include/uapi/linux/virtio_bt.h b/include/uapi/linux/virtio_bt.h
new file mode 100644
index 000000000000..a7bd48daa9a9
--- /dev/null
+++ b/include/uapi/linux/virtio_bt.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+
+#ifndef _UAPI_LINUX_VIRTIO_BT_H
+#define _UAPI_LINUX_VIRTIO_BT_H
+
+#include <linux/virtio_types.h>
+
+/* Feature bits */
+#define VIRTIO_BT_F_VND_HCI	0	/* Indicates vendor command support */
+#define VIRTIO_BT_F_MSFT_EXT	1	/* Indicates MSFT vendor support */
+#define VIRTIO_BT_F_AOSP_EXT	2	/* Indicates AOSP vendor support */
+
+enum virtio_bt_config_type {
+	VIRTIO_BT_CONFIG_TYPE_PRIMARY	= 0,
+	VIRTIO_BT_CONFIG_TYPE_AMP	= 1,
+};
+
+enum virtio_bt_config_vendor {
+	VIRTIO_BT_CONFIG_VENDOR_NONE	= 0,
+	VIRTIO_BT_CONFIG_VENDOR_ZEPHYR	= 1,
+	VIRTIO_BT_CONFIG_VENDOR_INTEL	= 2,
+	VIRTIO_BT_CONFIG_VENDOR_REALTEK	= 3,
+};
+
+struct virtio_bt_config {
+	__u8  type;
+	__u16 vendor;
+	__u16 msft_opcode;
+} __attribute__((packed));
+
+#endif /* _UAPI_LINUX_VIRTIO_BT_H */
diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index bc1c0621f5ed..b4f468e9441d 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -53,6 +53,7 @@
 #define VIRTIO_ID_MEM			24 /* virtio mem */
 #define VIRTIO_ID_FS			26 /* virtio filesystem */
 #define VIRTIO_ID_PMEM			27 /* virtio pmem */
+#define VIRTIO_ID_BT			28 /* virtio bluetooth */
 #define VIRTIO_ID_MAC80211_HWSIM	29 /* virtio mac80211-hwsim */
 
 #endif /* _LINUX_VIRTIO_IDS_H */
-- 
cgit v1.2.3


From cfa15cca51ef10d9aa363ed1d6907c40343ce34a Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Tue, 6 Apr 2021 21:55:54 +0200
Subject: Bluetooth: Fix default values for advertising interval

The DISCOV_LE_FAST_ADV_INT_{MIN,MAX} contants are in msec, but then used
later on directly while it is suppose to be N * 0.625 ms according to
the Bluetooth Core specification.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/hci_core.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index aa2879a3b0dd..58f7eada26fd 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1754,8 +1754,8 @@ void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c);
 #define DISCOV_INTERLEAVED_INQUIRY_LEN	0x04
 #define DISCOV_BREDR_INQUIRY_LEN	0x08
 #define DISCOV_LE_RESTART_DELAY		msecs_to_jiffies(200)	/* msec */
-#define DISCOV_LE_FAST_ADV_INT_MIN     100     /* msec */
-#define DISCOV_LE_FAST_ADV_INT_MAX     150     /* msec */
+#define DISCOV_LE_FAST_ADV_INT_MIN	0x00A0	/* 100 msec */
+#define DISCOV_LE_FAST_ADV_INT_MAX	0x00F0	/* 150 msec */
 
 void mgmt_fill_version_info(void *ver);
 int mgmt_new_settings(struct hci_dev *hdev);
-- 
cgit v1.2.3


From ba29d0360a092997074e07524cf6f798a95a71d8 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Tue, 6 Apr 2021 21:55:55 +0200
Subject: Bluetooth: Set defaults for le_scan_{int,window}_adv_monitor

The le_scan_{int,window}_adv_monitor settings have not been set with a
sensible default.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 net/bluetooth/hci_core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 0da9b3274986..fd12f1652bdf 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3763,6 +3763,8 @@ struct hci_dev *hci_alloc_dev(void)
 	hdev->le_scan_window_suspend = 0x0012;
 	hdev->le_scan_int_discovery = DISCOV_LE_SCAN_INT;
 	hdev->le_scan_window_discovery = DISCOV_LE_SCAN_WIN;
+	hdev->le_scan_int_adv_monitor = 0x0060;
+	hdev->le_scan_window_adv_monitor = 0x0030;
 	hdev->le_scan_int_connect = 0x0060;
 	hdev->le_scan_window_connect = 0x0060;
 	hdev->le_conn_min_interval = 0x0018;
-- 
cgit v1.2.3


From a61d67188f29ff678e94fb3ffba6c6d292e852c7 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Tue, 6 Apr 2021 21:55:56 +0200
Subject: Bluetooth: Allow Microsoft extension to indicate curve validation

Some controllers don't support the Simple Pairing Options feature that
can indicate the support for P-192 and P-256 public key validation.
However they might support the Microsoft vendor extension that can
indicate the validiation capability as well.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/net/bluetooth/hci_core.h | 1 +
 net/bluetooth/mgmt.c             | 5 ++++-
 net/bluetooth/msft.c             | 8 ++++++++
 net/bluetooth/msft.h             | 6 ++++++
 4 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 58f7eada26fd..c73ac52af186 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -584,6 +584,7 @@ struct hci_dev {
 #if IS_ENABLED(CONFIG_BT_MSFTEXT)
 	__u16			msft_opcode;
 	void			*msft_data;
+	bool			msft_curve_validity;
 #endif
 
 #if IS_ENABLED(CONFIG_BT_AOSPEXT)
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 59f8016c4866..f9be7f9084d6 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -3730,8 +3730,11 @@ static int read_controller_cap(struct sock *sk, struct hci_dev *hdev,
 
 	/* When the Read Simple Pairing Options command is supported, then
 	 * the remote public key validation is supported.
+	 *
+	 * Alternatively, when Microsoft extensions are available, they can
+	 * indicate support for public key validation as well.
 	 */
-	if (hdev->commands[41] & 0x08)
+	if ((hdev->commands[41] & 0x08) || msft_curve_validity(hdev))
 		flags |= 0x01;	/* Remote public key validation (BR/EDR) */
 
 	flags |= 0x02;		/* Remote public key validation (LE) */
diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c
index 47b104f318e9..e28f15439ce4 100644
--- a/net/bluetooth/msft.c
+++ b/net/bluetooth/msft.c
@@ -142,6 +142,9 @@ static bool read_supported_features(struct hci_dev *hdev,
 	msft->evt_prefix_len = rp->evt_prefix_len;
 	msft->features = __le64_to_cpu(rp->features);
 
+	if (msft->features & MSFT_FEATURE_MASK_CURVE_VALIDITY)
+		hdev->msft_curve_validity = true;
+
 	kfree_skb(skb);
 	return true;
 
@@ -605,3 +608,8 @@ int msft_set_filter_enable(struct hci_dev *hdev, bool enable)
 
 	return err;
 }
+
+bool msft_curve_validity(struct hci_dev *hdev)
+{
+	return hdev->msft_curve_validity;
+}
diff --git a/net/bluetooth/msft.h b/net/bluetooth/msft.h
index 88ed613dfa08..6e56d94b88d8 100644
--- a/net/bluetooth/msft.h
+++ b/net/bluetooth/msft.h
@@ -22,6 +22,7 @@ int msft_remove_monitor(struct hci_dev *hdev, struct adv_monitor *monitor,
 			u16 handle);
 void msft_req_add_set_filter_enable(struct hci_request *req, bool enable);
 int msft_set_filter_enable(struct hci_dev *hdev, bool enable);
+bool msft_curve_validity(struct hci_dev *hdev);
 
 #else
 
@@ -54,4 +55,9 @@ static inline int msft_set_filter_enable(struct hci_dev *hdev, bool enable)
 	return -EOPNOTSUPP;
 }
 
+static inline bool msft_curve_validity(struct hci_dev *hdev)
+{
+	return false;
+}
+
 #endif
-- 
cgit v1.2.3


From 6f779a66dc84450ceb4825024d3e337f42e633de Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Mon, 22 Mar 2021 22:46:31 +0200
Subject: cfg80211: allow specifying a reason for hw_rfkill

rfkill now allows to report a reason for the hw_rfkill state.
Allow cfg80211 drivers to specify this reason.
Keep the current API to use the default reason
(RFKILL_HARD_BLOCK_SIGNAL).

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Link: https://lore.kernel.org/r/20210322204633.102581-4-emmanuel.grumbach@intel.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 11 ++++++++++-
 net/wireless/core.c    |  7 ++++---
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 911fae42b0c0..3b296f2b7a2c 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -11,6 +11,7 @@
  */
 
 #include <linux/ethtool.h>
+#include <uapi/linux/rfkill.h>
 #include <linux/netdevice.h>
 #include <linux/debugfs.h>
 #include <linux/list.h>
@@ -6636,8 +6637,16 @@ void cfg80211_notify_new_peer_candidate(struct net_device *dev,
  * wiphy_rfkill_set_hw_state - notify cfg80211 about hw block state
  * @wiphy: the wiphy
  * @blocked: block status
+ * @reason: one of reasons in &enum rfkill_hard_block_reasons
  */
-void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked);
+void wiphy_rfkill_set_hw_state_reason(struct wiphy *wiphy, bool blocked,
+				      enum rfkill_hard_block_reasons reason);
+
+static inline void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked)
+{
+	wiphy_rfkill_set_hw_state_reason(wiphy, blocked,
+					 RFKILL_HARD_BLOCK_SIGNAL);
+}
 
 /**
  * wiphy_rfkill_start_polling - start polling rfkill
diff --git a/net/wireless/core.c b/net/wireless/core.c
index a2785379df6e..adfbcb33fb8f 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1092,14 +1092,15 @@ void wiphy_free(struct wiphy *wiphy)
 }
 EXPORT_SYMBOL(wiphy_free);
 
-void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked)
+void wiphy_rfkill_set_hw_state_reason(struct wiphy *wiphy, bool blocked,
+				      enum rfkill_hard_block_reasons reason)
 {
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 
-	if (rfkill_set_hw_state(rdev->rfkill, blocked))
+	if (rfkill_set_hw_state_reason(rdev->rfkill, blocked, reason))
 		schedule_work(&rdev->rfkill_block);
 }
-EXPORT_SYMBOL(wiphy_rfkill_set_hw_state);
+EXPORT_SYMBOL(wiphy_rfkill_set_hw_state_reason);
 
 void cfg80211_cqm_config_free(struct wireless_dev *wdev)
 {
-- 
cgit v1.2.3


From d84d13d6f6e03a7aaac9e7d064a11cf6f5087da6 Mon Sep 17 00:00:00 2001
From: Vamsi Krishna <vamsin@codeaurora.org>
Date: Tue, 2 Mar 2021 20:20:36 +0530
Subject: nl80211: Add interface to indicate TDLS peer's HE capability

Enhance enum nl80211_tdls_peer_capability to configure TDLS peer's
support for HE mode. Userspace decodes the TDLS setup response frame
and confugures the HE mode support to driver if the peer has advertized
HE mode support in TDLS setup response frame. The driver uses this
information to decide whether to include HE operation IE in TDLS setup
confirmation frame.

Signed-off-by: Vamsi Krishna <vamsin@codeaurora.org>
Link: https://lore.kernel.org/r/1614696636-30144-1-git-send-email-vamsin@codeaurora.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 5e30c7f6c484..18dfe744bcb5 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -6298,11 +6298,13 @@ struct nl80211_vendor_cmd_info {
  * @NL80211_TDLS_PEER_HT: TDLS peer is HT capable.
  * @NL80211_TDLS_PEER_VHT: TDLS peer is VHT capable.
  * @NL80211_TDLS_PEER_WMM: TDLS peer is WMM capable.
+ * @NL80211_TDLS_PEER_HE: TDLS peer is HE capable.
  */
 enum nl80211_tdls_peer_capability {
 	NL80211_TDLS_PEER_HT = 1<<0,
 	NL80211_TDLS_PEER_VHT = 1<<1,
 	NL80211_TDLS_PEER_WMM = 1<<2,
+	NL80211_TDLS_PEER_HE = 1<<3,
 };
 
 /**
-- 
cgit v1.2.3


From 55f8205e7dddb2151def733cefbbf63deba9e1a5 Mon Sep 17 00:00:00 2001
From: Sriram R <srirrama@codeaurora.org>
Date: Tue, 30 Mar 2021 07:05:16 +0530
Subject: mac80211: Allow concurrent monitor iface and ethernet rx decap

Some HW/driver can support passing ethernet rx decap frames and
raw 802.11 frames for the monitor interface concurrently and
via separate RX calls to mac80211. Packets going to the monitor
interface(s) would be in 802.11 format and thus not have the
RX_FLAG_8023 set, and 802.11 format monitoring frames should have
RX_FLAG_ONLY_MONITOR set.

Drivers doing such can enable the SUPPORTS_CONC_MON_RX_DECAP to
allow using ethernet decap offload while a monitor interface is
active, currently RX decapsulation offload gets disabled when a
monitor interface is added.

Signed-off-by: Sriram R <srirrama@codeaurora.org>
Link: https://lore.kernel.org/r/1617068116-32253-1-git-send-email-srirrama@codeaurora.org
[add proper documentation, rewrite commit message]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 7 +++++++
 net/mac80211/debugfs.c | 1 +
 net/mac80211/iface.c   | 3 ++-
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 2d1d629e5d14..c21a0e27b35e 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2399,6 +2399,12 @@ struct ieee80211_txq {
  * @IEEE80211_HW_SUPPORTS_RX_DECAP_OFFLOAD: Hardware supports rx decapsulation
  *	offload
  *
+ * @IEEE80211_HW_SUPPORTS_CONC_MON_RX_DECAP: Hardware supports concurrent rx
+ *	decapsulation offload and passing raw 802.11 frames for monitor iface.
+ *	If this is supported, the driver must pass both 802.3 frames for real
+ *	usage and 802.11 frames with %RX_FLAG_ONLY_MONITOR set for monitor to
+ *	the stack.
+ *
  * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
  */
 enum ieee80211_hw_flags {
@@ -2453,6 +2459,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_AMPDU_KEYBORDER_SUPPORT,
 	IEEE80211_HW_SUPPORTS_TX_ENCAP_OFFLOAD,
 	IEEE80211_HW_SUPPORTS_RX_DECAP_OFFLOAD,
+	IEEE80211_HW_SUPPORTS_CONC_MON_RX_DECAP,
 
 	/* keep last, obviously */
 	NUM_IEEE80211_HW_FLAGS
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 5296898875ff..9245c0421bda 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -456,6 +456,7 @@ static const char *hw_flag_names[] = {
 	FLAG(AMPDU_KEYBORDER_SUPPORT),
 	FLAG(SUPPORTS_TX_ENCAP_OFFLOAD),
 	FLAG(SUPPORTS_RX_DECAP_OFFLOAD),
+	FLAG(SUPPORTS_CONC_MON_RX_DECAP),
 #undef FLAG
 };
 
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index b80c9b016b2b..b1c170939e44 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -807,7 +807,8 @@ static bool ieee80211_set_sdata_offload_flags(struct ieee80211_sub_if_data *sdat
 	    ieee80211_iftype_supports_hdr_offload(sdata->vif.type)) {
 		flags |= IEEE80211_OFFLOAD_DECAP_ENABLED;
 
-		if (local->monitors)
+		if (local->monitors &&
+		    !ieee80211_hw_check(&local->hw, SUPPORTS_CONC_MON_RX_DECAP))
 			flags &= ~IEEE80211_OFFLOAD_DECAP_ENABLED;
 	} else {
 		flags &= ~IEEE80211_OFFLOAD_DECAP_ENABLED;
-- 
cgit v1.2.3


From 96a7109a16665255b65d021e24141c2edae0e202 Mon Sep 17 00:00:00 2001
From: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Date: Thu, 1 Apr 2021 18:44:55 +0200
Subject: mac80211: Set priority and queue mapping for injected frames

Some drivers, for example mt76, use the skb priority field, and
expects that to be consistent with the skb queue mapping. On some
frame injection code paths that was not true, and it broke frame
injection. Now the skb queue mapping is set according to the skb
priority value when the frame is injected. The skb priority value
is also derived from the frame data for all frame types, as it
was done prior to commit dbd50a851c50 (only allocate one queue
when using iTXQs). Fixes frame injection with the mt76 driver on
MT7610E chipset.

Signed-off-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Link: https://lore.kernel.org/r/20210401164455.978245-1-johan.almbladh@anyfinetworks.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 7aaa71ed49b0..3d6c5b8ec0c2 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2276,17 +2276,6 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
 						    payload[7]);
 	}
 
-	/* Initialize skb->priority for QoS frames. If the DONT_REORDER flag
-	 * is set, stick to the default value for skb->priority to assure
-	 * frames injected with this flag are not reordered relative to each
-	 * other.
-	 */
-	if (ieee80211_is_data_qos(hdr->frame_control) &&
-	    !(info->control.flags & IEEE80211_TX_CTRL_DONT_REORDER)) {
-		u8 *p = ieee80211_get_qos_ctl(hdr);
-		skb->priority = *p & IEEE80211_QOS_CTL_TAG1D_MASK;
-	}
-
 	rcu_read_lock();
 
 	/*
@@ -2350,6 +2339,15 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
 
 	info->band = chandef->chan->band;
 
+	/* Initialize skb->priority according to frame type and TID class,
+	 * with respect to the sub interface that the frame will actually
+	 * be transmitted on. If the DONT_REORDER flag is set, the original
+	 * skb-priority is preserved to assure frames injected with this
+	 * flag are not reordered relative to each other.
+	 */
+	ieee80211_select_queue_80211(sdata, skb, hdr);
+	skb_set_queue_mapping(skb, ieee80211_ac_from_tid(skb->priority));
+
 	/* remove the injection radiotap header */
 	skb_pull(skb, len_rthdr);
 
-- 
cgit v1.2.3


From d6843d1ee283137723b4a8c76244607ce6db1951 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Thu, 8 Apr 2021 14:31:25 +0200
Subject: mac80211: clear the beacon's CRC after channel switch

After channel switch, we should consider any beacon with a
CSA IE as a new switch. If the CSA IE is a leftover from
before the switch that the AP forgot to remove, we'll get
a CSA-to-Self.

This caused issues in iwlwifi where the firmware saw a beacon
with a CSA-to-Self with mode = 1 on the new channel after a
switch. The firmware considered this a new switch and closed
its queues. Since the beacon didn't change between before and
after the switch, we wouldn't handle it (the CRC is the same)
and we wouldn't let the firmware open its queues again or
disconnect if the CSA IE stays for too long.

Clear the CRC valid state after we switch to make sure that
we handle the beacon and handle the CSA IE as required.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Link: https://lore.kernel.org/r/20210408143124.b9e68aa98304.I465afb55ca2c7d59f7bf610c6046a1fd732b4c28@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index ce4e3855fec1..5f89aae9ea23 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1295,6 +1295,11 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_sub_if_data *sdata)
 
 	sdata->vif.csa_active = false;
 	ifmgd->csa_waiting_bcn = false;
+	/*
+	 * If the CSA IE is still present on the beacon after the switch,
+	 * we need to consider it as a new CSA (possibly to self).
+	 */
+	ifmgd->beacon_crc_valid = false;
 
 	ret = drv_post_channel_switch(sdata);
 	if (ret) {
-- 
cgit v1.2.3


From db878e27a98106a70315d264cc92230d84009e72 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 8 Apr 2021 14:31:50 +0200
Subject: mac80211: bail out if cipher schemes are invalid

If any of the cipher schemes specified by the driver are invalid, bail
out and fail the registration rather than just warning.  Otherwise, we
might later crash when we try to use the invalid cipher scheme, e.g.
if the hdr_len is (significantly) less than the pn_offs + pn_len, we'd
have an out-of-bounds access in RX validation.

Fixes: 2475b1cc0d52 ("mac80211: add generic cipher scheme support")
Link: https://lore.kernel.org/r/20210408143149.38a3a13a1b19.I6b7f5790fa0958ed8049cf02ac2a535c61e9bc96@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/main.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 1b9c82616606..0331f3a3c40e 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1141,8 +1141,11 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	if (local->hw.wiphy->max_scan_ie_len)
 		local->hw.wiphy->max_scan_ie_len -= local->scan_ies_len;
 
-	WARN_ON(!ieee80211_cs_list_valid(local->hw.cipher_schemes,
-					 local->hw.n_cipher_schemes));
+	if (WARN_ON(!ieee80211_cs_list_valid(local->hw.cipher_schemes,
+					     local->hw.n_cipher_schemes))) {
+		result = -EINVAL;
+		goto fail_workqueue;
+	}
 
 	result = ieee80211_init_cipher_suites(local);
 	if (result < 0)
-- 
cgit v1.2.3


From 8a16ffdc4cf37c1e6204054b0fb44052c8a48f0d Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Thu, 8 Apr 2021 14:32:24 +0200
Subject: cfg80211: Remove wrong RNR IE validation check

Remove a wrong length check for RNR information element as it
can have arbitrary length.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Link: https://lore.kernel.org/r/20210408143224.c7eeaf1a5270.Iead7762982e941a1cbff93f68bf8b5139447ff0c@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/scan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 019952d4fc7d..c3b51efff5c6 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -589,7 +589,7 @@ static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,
 
 	elem = cfg80211_find_elem(WLAN_EID_REDUCED_NEIGHBOR_REPORT, ies->data,
 				  ies->len);
-	if (!elem || elem->datalen > IEEE80211_MAX_SSID_LEN)
+	if (!elem)
 		return 0;
 
 	pos = elem->data;
-- 
cgit v1.2.3


From bd79d9aa6145897c0180964e8e68fcf9ba6b2206 Mon Sep 17 00:00:00 2001
From: Marek Behún <kabel@kernel.org>
Date: Wed, 7 Apr 2021 22:22:39 +0200
Subject: net: phy: marvell10g: rename register
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The MV_V2_PORT_MAC_TYPE_* is part of the CTRL register. Rename to
MV_V2_PORT_CTRL_MACTYPE_*.

Signed-off-by: Marek Behún <kabel@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell10g.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 74b64e52ffa2..9b514124af0d 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -80,8 +80,8 @@ enum {
 	MV_V2_PORT_CTRL		= 0xf001,
 	MV_V2_PORT_CTRL_SWRST	= BIT(15),
 	MV_V2_PORT_CTRL_PWRDOWN = BIT(11),
-	MV_V2_PORT_MAC_TYPE_MASK = 0x7,
-	MV_V2_PORT_MAC_TYPE_RATE_MATCH = 0x6,
+	MV_V2_PORT_CTRL_MACTYPE_MASK = 0x7,
+	MV_V2_PORT_CTRL_MACTYPE_RATE_MATCH = 0x6,
 	/* Temperature control/read registers (88X3310 only) */
 	MV_V2_TEMP_CTRL		= 0xf08a,
 	MV_V2_TEMP_CTRL_MASK	= 0xc000,
@@ -477,8 +477,8 @@ static int mv3310_config_init(struct phy_device *phydev)
 	val = phy_read_mmd(phydev, MDIO_MMD_VEND2, MV_V2_PORT_CTRL);
 	if (val < 0)
 		return val;
-	priv->rate_match = ((val & MV_V2_PORT_MAC_TYPE_MASK) ==
-			MV_V2_PORT_MAC_TYPE_RATE_MATCH);
+	priv->rate_match = ((val & MV_V2_PORT_CTRL_MACTYPE_MASK) ==
+			MV_V2_PORT_CTRL_MACTYPE_RATE_MATCH);
 
 	/* Enable EDPD mode - saving 600mW */
 	return mv3310_set_edpd(phydev, ETHTOOL_PHY_EDPD_DFLT_TX_MSECS);
-- 
cgit v1.2.3


From 283828142fad71a6bab4c1a6b0ba28c323fd36a0 Mon Sep 17 00:00:00 2001
From: Marek Behún <kabel@kernel.org>
Date: Wed, 7 Apr 2021 22:22:40 +0200
Subject: net: phy: marvell10g: fix typo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This space should be a tab instead.

Signed-off-by: Marek Behún <kabel@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell10g.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 9b514124af0d..f2f0da9717be 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -79,7 +79,7 @@ enum {
 	/* Vendor2 MMD registers */
 	MV_V2_PORT_CTRL		= 0xf001,
 	MV_V2_PORT_CTRL_SWRST	= BIT(15),
-	MV_V2_PORT_CTRL_PWRDOWN = BIT(11),
+	MV_V2_PORT_CTRL_PWRDOWN	= BIT(11),
 	MV_V2_PORT_CTRL_MACTYPE_MASK = 0x7,
 	MV_V2_PORT_CTRL_MACTYPE_RATE_MATCH = 0x6,
 	/* Temperature control/read registers (88X3310 only) */
-- 
cgit v1.2.3


From 0d3755428d6990637cf6dc45f007980a1dce28d1 Mon Sep 17 00:00:00 2001
From: Marek Behún <kabel@kernel.org>
Date: Wed, 7 Apr 2021 22:22:41 +0200
Subject: net: phy: marvell10g: allow 5gbase-r and usxgmii
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These modes are also supported by these PHYs.

Signed-off-by: Marek Behún <kabel@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell10g.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index f2f0da9717be..881a0717846e 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -462,9 +462,11 @@ static int mv3310_config_init(struct phy_device *phydev)
 	/* Check that the PHY interface type is compatible */
 	if (phydev->interface != PHY_INTERFACE_MODE_SGMII &&
 	    phydev->interface != PHY_INTERFACE_MODE_2500BASEX &&
+	    phydev->interface != PHY_INTERFACE_MODE_5GBASER &&
 	    phydev->interface != PHY_INTERFACE_MODE_XAUI &&
 	    phydev->interface != PHY_INTERFACE_MODE_RXAUI &&
-	    phydev->interface != PHY_INTERFACE_MODE_10GBASER)
+	    phydev->interface != PHY_INTERFACE_MODE_10GBASER &&
+	    phydev->interface != PHY_INTERFACE_MODE_USXGMII)
 		return -ENODEV;
 
 	phydev->mdix_ctrl = ETH_TP_MDI_AUTO;
@@ -599,6 +601,7 @@ static void mv3310_update_interface(struct phy_device *phydev)
 
 	if ((phydev->interface == PHY_INTERFACE_MODE_SGMII ||
 	     phydev->interface == PHY_INTERFACE_MODE_2500BASEX ||
+	     phydev->interface == PHY_INTERFACE_MODE_5GBASER ||
 	     phydev->interface == PHY_INTERFACE_MODE_10GBASER) &&
 	    phydev->link) {
 		/* The PHY automatically switches its serdes interface (and
@@ -611,6 +614,9 @@ static void mv3310_update_interface(struct phy_device *phydev)
 		case SPEED_10000:
 			phydev->interface = PHY_INTERFACE_MODE_10GBASER;
 			break;
+		case SPEED_5000:
+			phydev->interface = PHY_INTERFACE_MODE_5GBASER;
+			break;
 		case SPEED_2500:
 			phydev->interface = PHY_INTERFACE_MODE_2500BASEX;
 			break;
-- 
cgit v1.2.3


From 9893f31690162bf90ef7f85d9dda80731696d586 Mon Sep 17 00:00:00 2001
From: Marek Behún <kabel@kernel.org>
Date: Wed, 7 Apr 2021 22:22:42 +0200
Subject: net: phy: marvell10g: indicate 88X33x0 only port control registers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rename port control registers to indicate that they are valid only for
88X33x0, not for 88E21x0.

Signed-off-by: Marek Behún <kabel@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell10g.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 881a0717846e..7552a658a513 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -78,10 +78,10 @@ enum {
 
 	/* Vendor2 MMD registers */
 	MV_V2_PORT_CTRL		= 0xf001,
-	MV_V2_PORT_CTRL_SWRST	= BIT(15),
-	MV_V2_PORT_CTRL_PWRDOWN	= BIT(11),
-	MV_V2_PORT_CTRL_MACTYPE_MASK = 0x7,
-	MV_V2_PORT_CTRL_MACTYPE_RATE_MATCH = 0x6,
+	MV_V2_PORT_CTRL_PWRDOWN			= BIT(11),
+	MV_V2_33X0_PORT_CTRL_SWRST		= BIT(15),
+	MV_V2_33X0_PORT_CTRL_MACTYPE_MASK	= 0x7,
+	MV_V2_33X0_PORT_CTRL_MACTYPE_RATE_MATCH	= 0x6,
 	/* Temperature control/read registers (88X3310 only) */
 	MV_V2_TEMP_CTRL		= 0xf08a,
 	MV_V2_TEMP_CTRL_MASK	= 0xc000,
@@ -268,7 +268,7 @@ static int mv3310_power_up(struct phy_device *phydev)
 		return ret;
 
 	return phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, MV_V2_PORT_CTRL,
-				MV_V2_PORT_CTRL_SWRST);
+				MV_V2_33X0_PORT_CTRL_SWRST);
 }
 
 static int mv3310_reset(struct phy_device *phydev, u32 unit)
@@ -479,8 +479,8 @@ static int mv3310_config_init(struct phy_device *phydev)
 	val = phy_read_mmd(phydev, MDIO_MMD_VEND2, MV_V2_PORT_CTRL);
 	if (val < 0)
 		return val;
-	priv->rate_match = ((val & MV_V2_PORT_CTRL_MACTYPE_MASK) ==
-			MV_V2_PORT_CTRL_MACTYPE_RATE_MATCH);
+	priv->rate_match = ((val & MV_V2_33X0_PORT_CTRL_MACTYPE_MASK) ==
+			MV_V2_33X0_PORT_CTRL_MACTYPE_RATE_MATCH);
 
 	/* Enable EDPD mode - saving 600mW */
 	return mv3310_set_edpd(phydev, ETHTOOL_PHY_EDPD_DFLT_TX_MSECS);
-- 
cgit v1.2.3


From f8ee45fcbc5a94177d031b22f4e5a47d45874037 Mon Sep 17 00:00:00 2001
From: Marek Behún <kabel@kernel.org>
Date: Wed, 7 Apr 2021 22:22:43 +0200
Subject: net: phy: marvell10g: add all MACTYPE definitions for 88X33x0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add all MACTYPE definitions for 88X3310, 88X3310P, 88X3340 and 88X3340P.

In order to have consistent naming, rename
MV_V2_33X0_PORT_CTRL_MACTYPE_RATE_MATCH to
MV_V2_33X0_PORT_CTRL_MACTYPE_10GBASER_RATE_MATCH.

Signed-off-by: Marek Behún <kabel@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell10g.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 7552a658a513..7d9a45437b69 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -78,10 +78,18 @@ enum {
 
 	/* Vendor2 MMD registers */
 	MV_V2_PORT_CTRL		= 0xf001,
-	MV_V2_PORT_CTRL_PWRDOWN			= BIT(11),
-	MV_V2_33X0_PORT_CTRL_SWRST		= BIT(15),
-	MV_V2_33X0_PORT_CTRL_MACTYPE_MASK	= 0x7,
-	MV_V2_33X0_PORT_CTRL_MACTYPE_RATE_MATCH	= 0x6,
+	MV_V2_PORT_CTRL_PWRDOWN					= BIT(11),
+	MV_V2_33X0_PORT_CTRL_SWRST				= BIT(15),
+	MV_V2_33X0_PORT_CTRL_MACTYPE_MASK			= 0x7,
+	MV_V2_33X0_PORT_CTRL_MACTYPE_RXAUI			= 0x0,
+	MV_V2_3310_PORT_CTRL_MACTYPE_XAUI_RATE_MATCH		= 0x1,
+	MV_V2_3340_PORT_CTRL_MACTYPE_RXAUI_NO_SGMII_AN		= 0x1,
+	MV_V2_33X0_PORT_CTRL_MACTYPE_RXAUI_RATE_MATCH		= 0x2,
+	MV_V2_3310_PORT_CTRL_MACTYPE_XAUI			= 0x3,
+	MV_V2_33X0_PORT_CTRL_MACTYPE_10GBASER			= 0x4,
+	MV_V2_33X0_PORT_CTRL_MACTYPE_10GBASER_NO_SGMII_AN	= 0x5,
+	MV_V2_33X0_PORT_CTRL_MACTYPE_10GBASER_RATE_MATCH	= 0x6,
+	MV_V2_33X0_PORT_CTRL_MACTYPE_USXGMII			= 0x7,
 	/* Temperature control/read registers (88X3310 only) */
 	MV_V2_TEMP_CTRL		= 0xf08a,
 	MV_V2_TEMP_CTRL_MASK	= 0xc000,
@@ -480,7 +488,7 @@ static int mv3310_config_init(struct phy_device *phydev)
 	if (val < 0)
 		return val;
 	priv->rate_match = ((val & MV_V2_33X0_PORT_CTRL_MACTYPE_MASK) ==
-			MV_V2_33X0_PORT_CTRL_MACTYPE_RATE_MATCH);
+			MV_V2_33X0_PORT_CTRL_MACTYPE_10GBASER_RATE_MATCH);
 
 	/* Enable EDPD mode - saving 600mW */
 	return mv3310_set_edpd(phydev, ETHTOOL_PHY_EDPD_DFLT_TX_MSECS);
-- 
cgit v1.2.3


From 9ab0fbd0ffcebe0ba95f7f05af49bea273ffd4db Mon Sep 17 00:00:00 2001
From: Marek Behún <kabel@kernel.org>
Date: Wed, 7 Apr 2021 22:22:44 +0200
Subject: net: phy: marvell10g: add MACTYPE definitions for 88E21xx
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add all MACTYPE definitions for 88E2110, 88E2180, 88E2111 and 88E2181.

Signed-off-by: Marek Behún <kabel@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell10g.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 7d9a45437b69..556c9b43860e 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -35,6 +35,15 @@
 enum {
 	MV_PMA_FW_VER0		= 0xc011,
 	MV_PMA_FW_VER1		= 0xc012,
+	MV_PMA_21X0_PORT_CTRL	= 0xc04a,
+	MV_PMA_21X0_PORT_CTRL_SWRST				= BIT(15),
+	MV_PMA_21X0_PORT_CTRL_MACTYPE_MASK			= 0x7,
+	MV_PMA_21X0_PORT_CTRL_MACTYPE_USXGMII			= 0x0,
+	MV_PMA_2180_PORT_CTRL_MACTYPE_DXGMII			= 0x1,
+	MV_PMA_2180_PORT_CTRL_MACTYPE_QXGMII			= 0x2,
+	MV_PMA_21X0_PORT_CTRL_MACTYPE_5GBASER			= 0x4,
+	MV_PMA_21X0_PORT_CTRL_MACTYPE_5GBASER_NO_SGMII_AN	= 0x5,
+	MV_PMA_21X0_PORT_CTRL_MACTYPE_10GBASER_RATE_MATCH	= 0x6,
 	MV_PMA_BOOT		= 0xc050,
 	MV_PMA_BOOT_FATAL	= BIT(0),
 
-- 
cgit v1.2.3


From 97bbe3bd6922e7ccf8ecd308640a7da55ae3c2f9 Mon Sep 17 00:00:00 2001
From: Marek Behún <kabel@kernel.org>
Date: Wed, 7 Apr 2021 22:22:45 +0200
Subject: net: phy: marvell10g: support all rate matching modes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support for all rate matching modes for 88X3310 (currently only
10gbase-r is supported, but xaui and rxaui can also be used).

Add support for rate matching for 88E2110 (on 88E2110 the MACTYPE
register is at a different place).

Currently rate matching mode is selected by strapping pins (by setting
the MACTYPE register). There is work in progress to enable this driver
to deduce the best MACTYPE from the knowledge of which interface modes
are supported by the host, but this work is not finished yet.

Signed-off-by: Marek Behún <kabel@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell10g.c | 103 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 92 insertions(+), 11 deletions(-)

diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 556c9b43860e..b0b3fccac65f 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -108,14 +108,25 @@ enum {
 	MV_V2_TEMP_UNKNOWN	= 0x9600, /* unknown function */
 };
 
+struct mv3310_chip {
+	int (*get_mactype)(struct phy_device *phydev);
+	int (*init_interface)(struct phy_device *phydev, int mactype);
+};
+
 struct mv3310_priv {
 	u32 firmware_ver;
 	bool rate_match;
+	phy_interface_t const_interface;
 
 	struct device *hwmon_dev;
 	char *hwmon_name;
 };
 
+static const struct mv3310_chip *to_mv3310_chip(struct phy_device *phydev)
+{
+	return phydev->drv->driver_data;
+}
+
 #ifdef CONFIG_HWMON
 static umode_t mv3310_hwmon_is_visible(const void *data,
 				       enum hwmon_sensor_types type,
@@ -470,11 +481,67 @@ static bool mv3310_has_pma_ngbaset_quirk(struct phy_device *phydev)
 		MV_PHY_ALASKA_NBT_QUIRK_MASK) == MV_PHY_ALASKA_NBT_QUIRK_REV;
 }
 
-static int mv3310_config_init(struct phy_device *phydev)
+static int mv2110_get_mactype(struct phy_device *phydev)
+{
+	int mactype;
+
+	mactype = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MV_PMA_21X0_PORT_CTRL);
+	if (mactype < 0)
+		return mactype;
+
+	return mactype & MV_PMA_21X0_PORT_CTRL_MACTYPE_MASK;
+}
+
+static int mv3310_get_mactype(struct phy_device *phydev)
+{
+	int mactype;
+
+	mactype = phy_read_mmd(phydev, MDIO_MMD_VEND2, MV_V2_PORT_CTRL);
+	if (mactype < 0)
+		return mactype;
+
+	return mactype & MV_V2_33X0_PORT_CTRL_MACTYPE_MASK;
+}
+
+static int mv2110_init_interface(struct phy_device *phydev, int mactype)
 {
 	struct mv3310_priv *priv = dev_get_drvdata(&phydev->mdio.dev);
-	int err;
-	int val;
+
+	priv->rate_match = false;
+
+	if (mactype == MV_PMA_21X0_PORT_CTRL_MACTYPE_10GBASER_RATE_MATCH) {
+		priv->rate_match = true;
+		priv->const_interface = PHY_INTERFACE_MODE_10GBASER;
+	}
+
+	return 0;
+}
+
+static int mv3310_init_interface(struct phy_device *phydev, int mactype)
+{
+	struct mv3310_priv *priv = dev_get_drvdata(&phydev->mdio.dev);
+
+	priv->rate_match = false;
+
+	if (mactype == MV_V2_33X0_PORT_CTRL_MACTYPE_10GBASER_RATE_MATCH ||
+	    mactype == MV_V2_33X0_PORT_CTRL_MACTYPE_RXAUI_RATE_MATCH ||
+	    mactype == MV_V2_3310_PORT_CTRL_MACTYPE_XAUI_RATE_MATCH)
+		priv->rate_match = true;
+
+	if (mactype == MV_V2_33X0_PORT_CTRL_MACTYPE_10GBASER_RATE_MATCH)
+		priv->const_interface = PHY_INTERFACE_MODE_10GBASER;
+	else if (mactype == MV_V2_33X0_PORT_CTRL_MACTYPE_RXAUI_RATE_MATCH)
+		priv->const_interface = PHY_INTERFACE_MODE_RXAUI;
+	else if (mactype == MV_V2_3310_PORT_CTRL_MACTYPE_XAUI_RATE_MATCH)
+		priv->const_interface = PHY_INTERFACE_MODE_XAUI;
+
+	return 0;
+}
+
+static int mv3310_config_init(struct phy_device *phydev)
+{
+	const struct mv3310_chip *chip = to_mv3310_chip(phydev);
+	int err, mactype;
 
 	/* Check that the PHY interface type is compatible */
 	if (phydev->interface != PHY_INTERFACE_MODE_SGMII &&
@@ -493,11 +560,13 @@ static int mv3310_config_init(struct phy_device *phydev)
 	if (err)
 		return err;
 
-	val = phy_read_mmd(phydev, MDIO_MMD_VEND2, MV_V2_PORT_CTRL);
-	if (val < 0)
-		return val;
-	priv->rate_match = ((val & MV_V2_33X0_PORT_CTRL_MACTYPE_MASK) ==
-			MV_V2_33X0_PORT_CTRL_MACTYPE_10GBASER_RATE_MATCH);
+	mactype = chip->get_mactype(phydev);
+	if (mactype < 0)
+		return mactype;
+
+	err = chip->init_interface(phydev, mactype);
+	if (err)
+		return err;
 
 	/* Enable EDPD mode - saving 600mW */
 	return mv3310_set_edpd(phydev, ETHTOOL_PHY_EDPD_DFLT_TX_MSECS);
@@ -607,12 +676,12 @@ static void mv3310_update_interface(struct phy_device *phydev)
 {
 	struct mv3310_priv *priv = dev_get_drvdata(&phydev->mdio.dev);
 
-	/* In "XFI with Rate Matching" mode the PHY interface is fixed at
-	 * 10Gb. The PHY adapts the rate to actual wire speed with help of
+	/* In all of the "* with Rate Matching" modes the PHY interface is fixed
+	 * at 10Gb. The PHY adapts the rate to actual wire speed with help of
 	 * internal 16KB buffer.
 	 */
 	if (priv->rate_match) {
-		phydev->interface = PHY_INTERFACE_MODE_10GBASER;
+		phydev->interface = priv->const_interface;
 		return;
 	}
 
@@ -788,11 +857,22 @@ static int mv3310_set_tunable(struct phy_device *phydev,
 	}
 }
 
+static const struct mv3310_chip mv3310_type = {
+	.get_mactype = mv3310_get_mactype,
+	.init_interface = mv3310_init_interface,
+};
+
+static const struct mv3310_chip mv2110_type = {
+	.get_mactype = mv2110_get_mactype,
+	.init_interface = mv2110_init_interface,
+};
+
 static struct phy_driver mv3310_drivers[] = {
 	{
 		.phy_id		= MARVELL_PHY_ID_88X3310,
 		.phy_id_mask	= MARVELL_PHY_ID_MASK,
 		.name		= "mv88x3310",
+		.driver_data	= &mv3310_type,
 		.get_features	= mv3310_get_features,
 		.config_init	= mv3310_config_init,
 		.probe		= mv3310_probe,
@@ -810,6 +890,7 @@ static struct phy_driver mv3310_drivers[] = {
 		.phy_id		= MARVELL_PHY_ID_88E2110,
 		.phy_id_mask	= MARVELL_PHY_ID_MASK,
 		.name		= "mv88x2110",
+		.driver_data	= &mv2110_type,
 		.probe		= mv3310_probe,
 		.suspend	= mv3310_suspend,
 		.resume		= mv3310_resume,
-- 
cgit v1.2.3


From 261a74c64bb67fbd9795e4ece94c2579f3c866f2 Mon Sep 17 00:00:00 2001
From: Marek Behún <kabel@kernel.org>
Date: Wed, 7 Apr 2021 22:22:46 +0200
Subject: net: phy: marvell10g: check for correct supported interface mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The 88E2110 does not support xaui nor rxaui modes. Check for correct
interface mode for different chips.

Signed-off-by: Marek Behún <kabel@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell10g.c | 37 ++++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index b0b3fccac65f..a7c6b1944b05 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -109,11 +109,14 @@ enum {
 };
 
 struct mv3310_chip {
+	void (*init_supported_interfaces)(unsigned long *mask);
 	int (*get_mactype)(struct phy_device *phydev);
 	int (*init_interface)(struct phy_device *phydev, int mactype);
 };
 
 struct mv3310_priv {
+	DECLARE_BITMAP(supported_interfaces, PHY_INTERFACE_MODE_MAX);
+
 	u32 firmware_ver;
 	bool rate_match;
 	phy_interface_t const_interface;
@@ -391,6 +394,7 @@ static const struct sfp_upstream_ops mv3310_sfp_ops = {
 
 static int mv3310_probe(struct phy_device *phydev)
 {
+	const struct mv3310_chip *chip = to_mv3310_chip(phydev);
 	struct mv3310_priv *priv;
 	u32 mmd_mask = MDIO_DEVS_PMAPMD | MDIO_DEVS_AN;
 	int ret;
@@ -440,6 +444,8 @@ static int mv3310_probe(struct phy_device *phydev)
 	if (ret)
 		return ret;
 
+	chip->init_supported_interfaces(priv->supported_interfaces);
+
 	return phy_sfp_probe(phydev, &mv3310_sfp_ops);
 }
 
@@ -540,17 +546,12 @@ static int mv3310_init_interface(struct phy_device *phydev, int mactype)
 
 static int mv3310_config_init(struct phy_device *phydev)
 {
+	struct mv3310_priv *priv = dev_get_drvdata(&phydev->mdio.dev);
 	const struct mv3310_chip *chip = to_mv3310_chip(phydev);
 	int err, mactype;
 
 	/* Check that the PHY interface type is compatible */
-	if (phydev->interface != PHY_INTERFACE_MODE_SGMII &&
-	    phydev->interface != PHY_INTERFACE_MODE_2500BASEX &&
-	    phydev->interface != PHY_INTERFACE_MODE_5GBASER &&
-	    phydev->interface != PHY_INTERFACE_MODE_XAUI &&
-	    phydev->interface != PHY_INTERFACE_MODE_RXAUI &&
-	    phydev->interface != PHY_INTERFACE_MODE_10GBASER &&
-	    phydev->interface != PHY_INTERFACE_MODE_USXGMII)
+	if (!test_bit(phydev->interface, priv->supported_interfaces))
 		return -ENODEV;
 
 	phydev->mdix_ctrl = ETH_TP_MDI_AUTO;
@@ -857,12 +858,34 @@ static int mv3310_set_tunable(struct phy_device *phydev,
 	}
 }
 
+static void mv3310_init_supported_interfaces(unsigned long *mask)
+{
+	__set_bit(PHY_INTERFACE_MODE_SGMII, mask);
+	__set_bit(PHY_INTERFACE_MODE_2500BASEX, mask);
+	__set_bit(PHY_INTERFACE_MODE_5GBASER, mask);
+	__set_bit(PHY_INTERFACE_MODE_XAUI, mask);
+	__set_bit(PHY_INTERFACE_MODE_RXAUI, mask);
+	__set_bit(PHY_INTERFACE_MODE_10GBASER, mask);
+	__set_bit(PHY_INTERFACE_MODE_USXGMII, mask);
+}
+
+static void mv2110_init_supported_interfaces(unsigned long *mask)
+{
+	__set_bit(PHY_INTERFACE_MODE_SGMII, mask);
+	__set_bit(PHY_INTERFACE_MODE_2500BASEX, mask);
+	__set_bit(PHY_INTERFACE_MODE_5GBASER, mask);
+	__set_bit(PHY_INTERFACE_MODE_10GBASER, mask);
+	__set_bit(PHY_INTERFACE_MODE_USXGMII, mask);
+}
+
 static const struct mv3310_chip mv3310_type = {
+	.init_supported_interfaces = mv3310_init_supported_interfaces,
 	.get_mactype = mv3310_get_mactype,
 	.init_interface = mv3310_init_interface,
 };
 
 static const struct mv3310_chip mv2110_type = {
+	.init_supported_interfaces = mv2110_init_supported_interfaces,
 	.get_mactype = mv2110_get_mactype,
 	.init_interface = mv2110_init_interface,
 };
-- 
cgit v1.2.3


From 884d9a6758a1a704367e7bb3d3cc814a6fe8f20c Mon Sep 17 00:00:00 2001
From: Marek Behún <kabel@kernel.org>
Date: Wed, 7 Apr 2021 22:22:47 +0200
Subject: net: phy: marvell10g: store temperature read method in chip strucutre
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that we have a chip structure, we can store the temperature reading
method in this structure (OOP style).

Signed-off-by: Marek Behún <kabel@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell10g.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index a7c6b1944b05..20d3e572c935 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -112,6 +112,10 @@ struct mv3310_chip {
 	void (*init_supported_interfaces)(unsigned long *mask);
 	int (*get_mactype)(struct phy_device *phydev);
 	int (*init_interface)(struct phy_device *phydev, int mactype);
+
+#ifdef CONFIG_HWMON
+	int (*hwmon_read_temp_reg)(struct phy_device *phydev);
+#endif
 };
 
 struct mv3310_priv {
@@ -152,18 +156,11 @@ static int mv2110_hwmon_read_temp_reg(struct phy_device *phydev)
 	return phy_read_mmd(phydev, MDIO_MMD_PCS, MV_PCS_TEMP);
 }
 
-static int mv10g_hwmon_read_temp_reg(struct phy_device *phydev)
-{
-	if (phydev->drv->phy_id == MARVELL_PHY_ID_88X3310)
-		return mv3310_hwmon_read_temp_reg(phydev);
-	else /* MARVELL_PHY_ID_88E2110 */
-		return mv2110_hwmon_read_temp_reg(phydev);
-}
-
 static int mv3310_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
 			     u32 attr, int channel, long *value)
 {
 	struct phy_device *phydev = dev_get_drvdata(dev);
+	const struct mv3310_chip *chip = to_mv3310_chip(phydev);
 	int temp;
 
 	if (type == hwmon_chip && attr == hwmon_chip_update_interval) {
@@ -172,7 +169,7 @@ static int mv3310_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
 	}
 
 	if (type == hwmon_temp && attr == hwmon_temp_input) {
-		temp = mv10g_hwmon_read_temp_reg(phydev);
+		temp = chip->hwmon_read_temp_reg(phydev);
 		if (temp < 0)
 			return temp;
 
@@ -882,12 +879,20 @@ static const struct mv3310_chip mv3310_type = {
 	.init_supported_interfaces = mv3310_init_supported_interfaces,
 	.get_mactype = mv3310_get_mactype,
 	.init_interface = mv3310_init_interface,
+
+#ifdef CONFIG_HWMON
+	.hwmon_read_temp_reg = mv3310_hwmon_read_temp_reg,
+#endif
 };
 
 static const struct mv3310_chip mv2110_type = {
 	.init_supported_interfaces = mv2110_init_supported_interfaces,
 	.get_mactype = mv2110_get_mactype,
 	.init_interface = mv2110_init_interface,
+
+#ifdef CONFIG_HWMON
+	.hwmon_read_temp_reg = mv2110_hwmon_read_temp_reg,
+#endif
 };
 
 static struct phy_driver mv3310_drivers[] = {
-- 
cgit v1.2.3


From ccbf2891de9813fc35b6f35cee97731532e9d386 Mon Sep 17 00:00:00 2001
From: Marek Behún <kabel@kernel.org>
Date: Wed, 7 Apr 2021 22:22:48 +0200
Subject: net: phy: marvell10g: support other MACTYPEs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently the only "changing" MACTYPE we support is when the PHY changes
between
  10gbase-r / 5gbase-r / 2500base-x / sgmii

Add support for
  usxgmii
  xaui / 5gbase-r / 2500base-x / sgmii
  rxaui / 5gbase-r / 2500base-x / sgmii
and also
  5gbase-r / 2500base-x / sgmii
for 88E2110.

Signed-off-by: Marek Behún <kabel@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell10g.c | 90 ++++++++++++++++++++++++++------------------
 1 file changed, 54 insertions(+), 36 deletions(-)

diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 20d3e572c935..2dc1317e601e 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -512,10 +512,18 @@ static int mv2110_init_interface(struct phy_device *phydev, int mactype)
 
 	priv->rate_match = false;
 
-	if (mactype == MV_PMA_21X0_PORT_CTRL_MACTYPE_10GBASER_RATE_MATCH) {
+	if (mactype == MV_PMA_21X0_PORT_CTRL_MACTYPE_10GBASER_RATE_MATCH)
 		priv->rate_match = true;
+
+	if (mactype == MV_PMA_21X0_PORT_CTRL_MACTYPE_USXGMII)
+		priv->const_interface = PHY_INTERFACE_MODE_USXGMII;
+	else if (mactype == MV_PMA_21X0_PORT_CTRL_MACTYPE_10GBASER_RATE_MATCH)
 		priv->const_interface = PHY_INTERFACE_MODE_10GBASER;
-	}
+	else if (mactype == MV_PMA_21X0_PORT_CTRL_MACTYPE_5GBASER ||
+		 mactype == MV_PMA_21X0_PORT_CTRL_MACTYPE_5GBASER_NO_SGMII_AN)
+		priv->const_interface = PHY_INTERFACE_MODE_NA;
+	else
+		return -EINVAL;
 
 	return 0;
 }
@@ -531,12 +539,20 @@ static int mv3310_init_interface(struct phy_device *phydev, int mactype)
 	    mactype == MV_V2_3310_PORT_CTRL_MACTYPE_XAUI_RATE_MATCH)
 		priv->rate_match = true;
 
-	if (mactype == MV_V2_33X0_PORT_CTRL_MACTYPE_10GBASER_RATE_MATCH)
+	if (mactype == MV_V2_33X0_PORT_CTRL_MACTYPE_USXGMII)
+		priv->const_interface = PHY_INTERFACE_MODE_USXGMII;
+	else if (mactype == MV_V2_33X0_PORT_CTRL_MACTYPE_10GBASER_RATE_MATCH ||
+		 mactype == MV_V2_33X0_PORT_CTRL_MACTYPE_10GBASER_NO_SGMII_AN ||
+		 mactype == MV_V2_33X0_PORT_CTRL_MACTYPE_10GBASER)
 		priv->const_interface = PHY_INTERFACE_MODE_10GBASER;
-	else if (mactype == MV_V2_33X0_PORT_CTRL_MACTYPE_RXAUI_RATE_MATCH)
+	else if (mactype == MV_V2_33X0_PORT_CTRL_MACTYPE_RXAUI_RATE_MATCH ||
+		 mactype == MV_V2_33X0_PORT_CTRL_MACTYPE_RXAUI)
 		priv->const_interface = PHY_INTERFACE_MODE_RXAUI;
-	else if (mactype == MV_V2_3310_PORT_CTRL_MACTYPE_XAUI_RATE_MATCH)
+	else if (mactype == MV_V2_3310_PORT_CTRL_MACTYPE_XAUI_RATE_MATCH ||
+		 mactype == MV_V2_3310_PORT_CTRL_MACTYPE_XAUI)
 		priv->const_interface = PHY_INTERFACE_MODE_XAUI;
+	else
+		return -EINVAL;
 
 	return 0;
 }
@@ -563,8 +579,10 @@ static int mv3310_config_init(struct phy_device *phydev)
 		return mactype;
 
 	err = chip->init_interface(phydev, mactype);
-	if (err)
+	if (err) {
+		phydev_err(phydev, "MACTYPE configuration invalid\n");
 		return err;
+	}
 
 	/* Enable EDPD mode - saving 600mW */
 	return mv3310_set_edpd(phydev, ETHTOOL_PHY_EDPD_DFLT_TX_MSECS);
@@ -674,44 +692,44 @@ static void mv3310_update_interface(struct phy_device *phydev)
 {
 	struct mv3310_priv *priv = dev_get_drvdata(&phydev->mdio.dev);
 
+	if (!phydev->link)
+		return;
+
 	/* In all of the "* with Rate Matching" modes the PHY interface is fixed
 	 * at 10Gb. The PHY adapts the rate to actual wire speed with help of
 	 * internal 16KB buffer.
+	 *
+	 * In USXGMII mode the PHY interface mode is also fixed.
 	 */
-	if (priv->rate_match) {
+	if (priv->rate_match ||
+	    priv->const_interface == PHY_INTERFACE_MODE_USXGMII) {
 		phydev->interface = priv->const_interface;
 		return;
 	}
 
-	if ((phydev->interface == PHY_INTERFACE_MODE_SGMII ||
-	     phydev->interface == PHY_INTERFACE_MODE_2500BASEX ||
-	     phydev->interface == PHY_INTERFACE_MODE_5GBASER ||
-	     phydev->interface == PHY_INTERFACE_MODE_10GBASER) &&
-	    phydev->link) {
-		/* The PHY automatically switches its serdes interface (and
-		 * active PHYXS instance) between Cisco SGMII, 10GBase-R and
-		 * 2500BaseX modes according to the speed.  Florian suggests
-		 * setting phydev->interface to communicate this to the MAC.
-		 * Only do this if we are already in one of the above modes.
-		 */
-		switch (phydev->speed) {
-		case SPEED_10000:
-			phydev->interface = PHY_INTERFACE_MODE_10GBASER;
-			break;
-		case SPEED_5000:
-			phydev->interface = PHY_INTERFACE_MODE_5GBASER;
-			break;
-		case SPEED_2500:
-			phydev->interface = PHY_INTERFACE_MODE_2500BASEX;
-			break;
-		case SPEED_1000:
-		case SPEED_100:
-		case SPEED_10:
-			phydev->interface = PHY_INTERFACE_MODE_SGMII;
-			break;
-		default:
-			break;
-		}
+	/* The PHY automatically switches its serdes interface (and active PHYXS
+	 * instance) between Cisco SGMII, 2500BaseX, 5GBase-R and 10GBase-R /
+	 * xaui / rxaui modes according to the speed.
+	 * Florian suggests setting phydev->interface to communicate this to the
+	 * MAC. Only do this if we are already in one of the above modes.
+	 */
+	switch (phydev->speed) {
+	case SPEED_10000:
+		phydev->interface = priv->const_interface;
+		break;
+	case SPEED_5000:
+		phydev->interface = PHY_INTERFACE_MODE_5GBASER;
+		break;
+	case SPEED_2500:
+		phydev->interface = PHY_INTERFACE_MODE_2500BASEX;
+		break;
+	case SPEED_1000:
+	case SPEED_100:
+	case SPEED_10:
+		phydev->interface = PHY_INTERFACE_MODE_SGMII;
+		break;
+	default:
+		break;
 	}
 }
 
-- 
cgit v1.2.3


From 9885d016ffa9465f91498e7a70c413c30446ad49 Mon Sep 17 00:00:00 2001
From: Marek Behún <kabel@kernel.org>
Date: Wed, 7 Apr 2021 22:22:49 +0200
Subject: net: phy: marvell10g: add separate structure for 88X3340
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The 88X3340 contains 4 cores similar to 88X3310, but there is a
difference: it does not support xaui host mode. Instead the
corresponding MACTYPE means
  rxaui / 5gbase-r / 2500base-x / sgmii without AN

Signed-off-by: Marek Behún <kabel@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell10g.c | 58 ++++++++++++++++++++++++++++++++++++++++++--
 include/linux/marvell_phy.h  |  6 ++++-
 2 files changed, 61 insertions(+), 3 deletions(-)

diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 2dc1317e601e..f74dfd993d8b 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -557,6 +557,21 @@ static int mv3310_init_interface(struct phy_device *phydev, int mactype)
 	return 0;
 }
 
+static int mv3340_init_interface(struct phy_device *phydev, int mactype)
+{
+	struct mv3310_priv *priv = dev_get_drvdata(&phydev->mdio.dev);
+	int err = 0;
+
+	priv->rate_match = false;
+
+	if (mactype == MV_V2_3340_PORT_CTRL_MACTYPE_RXAUI_NO_SGMII_AN)
+		priv->const_interface = PHY_INTERFACE_MODE_RXAUI;
+	else
+		err = mv3310_init_interface(phydev, mactype);
+
+	return err;
+}
+
 static int mv3310_config_init(struct phy_device *phydev)
 {
 	struct mv3310_priv *priv = dev_get_drvdata(&phydev->mdio.dev);
@@ -884,6 +899,16 @@ static void mv3310_init_supported_interfaces(unsigned long *mask)
 	__set_bit(PHY_INTERFACE_MODE_USXGMII, mask);
 }
 
+static void mv3340_init_supported_interfaces(unsigned long *mask)
+{
+	__set_bit(PHY_INTERFACE_MODE_SGMII, mask);
+	__set_bit(PHY_INTERFACE_MODE_2500BASEX, mask);
+	__set_bit(PHY_INTERFACE_MODE_5GBASER, mask);
+	__set_bit(PHY_INTERFACE_MODE_RXAUI, mask);
+	__set_bit(PHY_INTERFACE_MODE_10GBASER, mask);
+	__set_bit(PHY_INTERFACE_MODE_USXGMII, mask);
+}
+
 static void mv2110_init_supported_interfaces(unsigned long *mask)
 {
 	__set_bit(PHY_INTERFACE_MODE_SGMII, mask);
@@ -903,6 +928,16 @@ static const struct mv3310_chip mv3310_type = {
 #endif
 };
 
+static const struct mv3310_chip mv3340_type = {
+	.init_supported_interfaces = mv3340_init_supported_interfaces,
+	.get_mactype = mv3310_get_mactype,
+	.init_interface = mv3340_init_interface,
+
+#ifdef CONFIG_HWMON
+	.hwmon_read_temp_reg = mv3310_hwmon_read_temp_reg,
+#endif
+};
+
 static const struct mv3310_chip mv2110_type = {
 	.init_supported_interfaces = mv2110_init_supported_interfaces,
 	.get_mactype = mv2110_get_mactype,
@@ -916,7 +951,7 @@ static const struct mv3310_chip mv2110_type = {
 static struct phy_driver mv3310_drivers[] = {
 	{
 		.phy_id		= MARVELL_PHY_ID_88X3310,
-		.phy_id_mask	= MARVELL_PHY_ID_MASK,
+		.phy_id_mask	= MARVELL_PHY_ID_88X33X0_MASK,
 		.name		= "mv88x3310",
 		.driver_data	= &mv3310_type,
 		.get_features	= mv3310_get_features,
@@ -932,6 +967,24 @@ static struct phy_driver mv3310_drivers[] = {
 		.remove		= mv3310_remove,
 		.set_loopback	= genphy_c45_loopback,
 	},
+	{
+		.phy_id		= MARVELL_PHY_ID_88X3340,
+		.phy_id_mask	= MARVELL_PHY_ID_88X33X0_MASK,
+		.name		= "mv88x3340",
+		.driver_data	= &mv3340_type,
+		.get_features	= mv3310_get_features,
+		.config_init	= mv3310_config_init,
+		.probe		= mv3310_probe,
+		.suspend	= mv3310_suspend,
+		.resume		= mv3310_resume,
+		.config_aneg	= mv3310_config_aneg,
+		.aneg_done	= mv3310_aneg_done,
+		.read_status	= mv3310_read_status,
+		.get_tunable	= mv3310_get_tunable,
+		.set_tunable	= mv3310_set_tunable,
+		.remove		= mv3310_remove,
+		.set_loopback	= genphy_c45_loopback,
+	},
 	{
 		.phy_id		= MARVELL_PHY_ID_88E2110,
 		.phy_id_mask	= MARVELL_PHY_ID_MASK,
@@ -954,7 +1007,8 @@ static struct phy_driver mv3310_drivers[] = {
 module_phy_driver(mv3310_drivers);
 
 static struct mdio_device_id __maybe_unused mv3310_tbl[] = {
-	{ MARVELL_PHY_ID_88X3310, MARVELL_PHY_ID_MASK },
+	{ MARVELL_PHY_ID_88X3310, MARVELL_PHY_ID_88X33X0_MASK },
+	{ MARVELL_PHY_ID_88X3340, MARVELL_PHY_ID_88X33X0_MASK },
 	{ MARVELL_PHY_ID_88E2110, MARVELL_PHY_ID_MASK },
 	{ },
 };
diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
index 274abd5fbac3..6b11a5411082 100644
--- a/include/linux/marvell_phy.h
+++ b/include/linux/marvell_phy.h
@@ -22,10 +22,14 @@
 #define MARVELL_PHY_ID_88E1545		0x01410ea0
 #define MARVELL_PHY_ID_88E1548P		0x01410ec0
 #define MARVELL_PHY_ID_88E3016		0x01410e60
-#define MARVELL_PHY_ID_88X3310		0x002b09a0
 #define MARVELL_PHY_ID_88E2110		0x002b09b0
 #define MARVELL_PHY_ID_88X2222		0x01410f10
 
+/* PHY IDs and mask for Alaska 10G PHYs */
+#define MARVELL_PHY_ID_88X33X0_MASK	0xfffffff8
+#define MARVELL_PHY_ID_88X3310		0x002b09a0
+#define MARVELL_PHY_ID_88X3340		0x002b09a8
+
 /* Marvel 88E1111 in Finisar SFP module with modified PHY ID */
 #define MARVELL_PHY_ID_88E1111_FINISAR	0x01ff0cc0
 
-- 
cgit v1.2.3


From c89f27d4d239d263a8b5abc597a963bb6b15a661 Mon Sep 17 00:00:00 2001
From: Marek Behún <kabel@kernel.org>
Date: Wed, 7 Apr 2021 22:22:50 +0200
Subject: net: phy: marvell10g: fix driver name for mv88e2110
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The driver name "mv88x2110" should be instead "mv88e2110".

Signed-off-by: Marek Behún <kabel@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell10g.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index f74dfd993d8b..3c99757f0306 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -988,7 +988,7 @@ static struct phy_driver mv3310_drivers[] = {
 	{
 		.phy_id		= MARVELL_PHY_ID_88E2110,
 		.phy_id_mask	= MARVELL_PHY_ID_MASK,
-		.name		= "mv88x2110",
+		.name		= "mv88e2110",
 		.driver_data	= &mv2110_type,
 		.probe		= mv3310_probe,
 		.suspend	= mv3310_suspend,
-- 
cgit v1.2.3


From 53f111cbfac6f2000c604994ddf01d3299d7de6b Mon Sep 17 00:00:00 2001
From: Marek Behún <kabel@kernel.org>
Date: Wed, 7 Apr 2021 22:22:51 +0200
Subject: net: phy: add constants for 2.5G and 5G speed in PCS speed register
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add constants for 2.5G and 5G speed in PCS speed register into mdio.h.

Signed-off-by: Marek Behún <kabel@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/mdio.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h
index 3f302e2523b2..bdf77dffa5a4 100644
--- a/include/uapi/linux/mdio.h
+++ b/include/uapi/linux/mdio.h
@@ -120,6 +120,8 @@
 #define MDIO_PMA_SPEED_100		0x0020	/* 100M capable */
 #define MDIO_PMA_SPEED_10		0x0040	/* 10M capable */
 #define MDIO_PCS_SPEED_10P2B		0x0002	/* 10PASS-TS/2BASE-TL capable */
+#define MDIO_PCS_SPEED_2_5G		0x0040	/* 2.5G capable */
+#define MDIO_PCS_SPEED_5G		0x0080	/* 5G capable */
 
 /* Device present registers. */
 #define MDIO_DEVS_PRESENT(devad)	(1 << (devad))
-- 
cgit v1.2.3


From 0fca947cbb27ddb28155996e29385b4ddf5d94f3 Mon Sep 17 00:00:00 2001
From: Marek Behún <kabel@kernel.org>
Date: Wed, 7 Apr 2021 22:22:52 +0200
Subject: net: phy: marvell10g: differentiate 88E2110 vs 88E2111
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

88E2111 is a variant of 88E2110 which does not support 5 gigabit speeds.

Differentiate these variants via the match_phy_device() method, since
they have the same PHY ID.

Signed-off-by: Marek Behún <kabel@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell10g.c | 62 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 3c99757f0306..fcf4db4e5665 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -918,6 +918,14 @@ static void mv2110_init_supported_interfaces(unsigned long *mask)
 	__set_bit(PHY_INTERFACE_MODE_USXGMII, mask);
 }
 
+static void mv2111_init_supported_interfaces(unsigned long *mask)
+{
+	__set_bit(PHY_INTERFACE_MODE_SGMII, mask);
+	__set_bit(PHY_INTERFACE_MODE_2500BASEX, mask);
+	__set_bit(PHY_INTERFACE_MODE_10GBASER, mask);
+	__set_bit(PHY_INTERFACE_MODE_USXGMII, mask);
+}
+
 static const struct mv3310_chip mv3310_type = {
 	.init_supported_interfaces = mv3310_init_supported_interfaces,
 	.get_mactype = mv3310_get_mactype,
@@ -948,6 +956,41 @@ static const struct mv3310_chip mv2110_type = {
 #endif
 };
 
+static const struct mv3310_chip mv2111_type = {
+	.init_supported_interfaces = mv2111_init_supported_interfaces,
+	.get_mactype = mv2110_get_mactype,
+	.init_interface = mv2110_init_interface,
+
+#ifdef CONFIG_HWMON
+	.hwmon_read_temp_reg = mv2110_hwmon_read_temp_reg,
+#endif
+};
+
+static int mv211x_match_phy_device(struct phy_device *phydev, bool has_5g)
+{
+	int val;
+
+	if ((phydev->c45_ids.device_ids[MDIO_MMD_PMAPMD] &
+	     MARVELL_PHY_ID_MASK) != MARVELL_PHY_ID_88E2110)
+		return 0;
+
+	val = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_SPEED);
+	if (val < 0)
+		return val;
+
+	return !!(val & MDIO_PCS_SPEED_5G) == has_5g;
+}
+
+static int mv2110_match_phy_device(struct phy_device *phydev)
+{
+	return mv211x_match_phy_device(phydev, true);
+}
+
+static int mv2111_match_phy_device(struct phy_device *phydev)
+{
+	return mv211x_match_phy_device(phydev, false);
+}
+
 static struct phy_driver mv3310_drivers[] = {
 	{
 		.phy_id		= MARVELL_PHY_ID_88X3310,
@@ -988,6 +1031,7 @@ static struct phy_driver mv3310_drivers[] = {
 	{
 		.phy_id		= MARVELL_PHY_ID_88E2110,
 		.phy_id_mask	= MARVELL_PHY_ID_MASK,
+		.match_phy_device = mv2110_match_phy_device,
 		.name		= "mv88e2110",
 		.driver_data	= &mv2110_type,
 		.probe		= mv3310_probe,
@@ -1002,6 +1046,24 @@ static struct phy_driver mv3310_drivers[] = {
 		.remove		= mv3310_remove,
 		.set_loopback	= genphy_c45_loopback,
 	},
+	{
+		.phy_id		= MARVELL_PHY_ID_88E2110,
+		.phy_id_mask	= MARVELL_PHY_ID_MASK,
+		.match_phy_device = mv2111_match_phy_device,
+		.name		= "mv88e2111",
+		.driver_data	= &mv2111_type,
+		.probe		= mv3310_probe,
+		.suspend	= mv3310_suspend,
+		.resume		= mv3310_resume,
+		.config_init	= mv3310_config_init,
+		.config_aneg	= mv3310_config_aneg,
+		.aneg_done	= mv3310_aneg_done,
+		.read_status	= mv3310_read_status,
+		.get_tunable	= mv3310_get_tunable,
+		.set_tunable	= mv3310_set_tunable,
+		.remove		= mv3310_remove,
+		.set_loopback	= genphy_c45_loopback,
+	},
 };
 
 module_phy_driver(mv3310_drivers);
-- 
cgit v1.2.3


From c7dce05e63ebc238ad941796d2ca0a90b5902ad3 Mon Sep 17 00:00:00 2001
From: Marek Behún <kabel@kernel.org>
Date: Wed, 7 Apr 2021 22:22:53 +0200
Subject: net: phy: marvell10g: change module description
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This module supports not only Alaska X, but also Alaska M.

Change module description appropriately.

Signed-off-by: Marek Behún <kabel@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell10g.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index fcf4db4e5665..bbbc6ac8fa82 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -1075,5 +1075,5 @@ static struct mdio_device_id __maybe_unused mv3310_tbl[] = {
 	{ },
 };
 MODULE_DEVICE_TABLE(mdio, mv3310_tbl);
-MODULE_DESCRIPTION("Marvell Alaska X 10Gigabit Ethernet PHY driver (MV88X3310)");
+MODULE_DESCRIPTION("Marvell Alaska X/M multi-gigabit Ethernet PHY driver");
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 9187b6cfe7fcc6f2f5330e82768652bc4931ffb3 Mon Sep 17 00:00:00 2001
From: Marek Behún <kabel@kernel.org>
Date: Wed, 7 Apr 2021 22:22:54 +0200
Subject: MAINTAINERS: add myself as maintainer of marvell10g driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add myself as maintainer of the marvell10g ethernet PHY driver, in
addition to Russell King.

Signed-off-by: Marek Behún <kabel@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 217c7470bfa9..3ea9539821b5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10695,6 +10695,7 @@ F:	include/linux/mv643xx.h
 
 MARVELL MV88X3310 PHY DRIVER
 M:	Russell King <linux@armlinux.org.uk>
+M:	Marek Behun <marek.behun@nic.cz>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/phy/marvell10g.c
-- 
cgit v1.2.3


From 33c252e1ba8b9ba00831bc520f7017c816429179 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Wed, 7 Apr 2021 16:19:54 -0700
Subject: ionic: fix up a couple of code style nits

Clean up variable declarations.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_lif.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index ee56fed12e07..4e22e50922cd 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -2015,9 +2015,8 @@ static void ionic_txrx_free(struct ionic_lif *lif)
 
 static int ionic_txrx_alloc(struct ionic_lif *lif)
 {
-	unsigned int num_desc, desc_sz, comp_sz, sg_desc_sz;
-	unsigned int flags;
-	unsigned int i;
+	unsigned int comp_sz, desc_sz, num_desc, sg_desc_sz;
+	unsigned int flags, i;
 	int err = 0;
 
 	num_desc = lif->ntxq_descs;
@@ -2584,12 +2583,11 @@ static void ionic_swap_queues(struct ionic_qcq *a, struct ionic_qcq *b)
 int ionic_reconfigure_queues(struct ionic_lif *lif,
 			     struct ionic_queue_params *qparam)
 {
-	unsigned int num_desc, desc_sz, comp_sz, sg_desc_sz;
+	unsigned int comp_sz, desc_sz, num_desc, sg_desc_sz;
 	struct ionic_qcq **tx_qcqs = NULL;
 	struct ionic_qcq **rx_qcqs = NULL;
-	unsigned int flags;
+	unsigned int flags, i;
 	int err = -ENOMEM;
-	unsigned int i;
 
 	/* allocate temporary qcq arrays to hold new queue structs */
 	if (qparam->nxqs != lif->nxqs || qparam->ntxq_descs != lif->ntxq_descs) {
-- 
cgit v1.2.3


From e1edcc966ae8579b76db720a94084866bd7dcd6c Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Wed, 7 Apr 2021 16:19:55 -0700
Subject: ionic: remove unnecessary compat ifdef

We don't need to look for HAVE_HWSTAMP_TX_ONESTEP_P2P in the
upstream kernel.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_phc.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_phc.c b/drivers/net/ethernet/pensando/ionic/ionic_phc.c
index 86ae5011ac9b..5d5da61284e7 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_phc.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_phc.c
@@ -18,10 +18,8 @@ static int ionic_hwstamp_tx_mode(int config_tx_type)
 		return IONIC_TXSTAMP_ON;
 	case HWTSTAMP_TX_ONESTEP_SYNC:
 		return IONIC_TXSTAMP_ONESTEP_SYNC;
-#ifdef HAVE_HWSTAMP_TX_ONESTEP_P2P
 	case HWTSTAMP_TX_ONESTEP_P2P:
 		return IONIC_TXSTAMP_ONESTEP_P2P;
-#endif
 	default:
 		return -ERANGE;
 	}
-- 
cgit v1.2.3


From e2ce148e948e7cf6267351ea407ac45a6b561a54 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Wed, 7 Apr 2021 16:19:56 -0700
Subject: ionic: check for valid tx_mode on SKBTX_HW_TSTAMP xmit

Make sure the device is in a Tx offload mode before calling the
hwstamp offload xmit.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 3478b0f2495f..765050a5f7a8 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -1233,7 +1233,7 @@ netdev_tx_t ionic_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 	}
 
 	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
-		if (lif->hwstamp_txq)
+		if (lif->hwstamp_txq && lif->phc->ts_config_tx_mode)
 			return ionic_start_hwstamp_xmit(skb, netdev);
 
 	if (unlikely(queue_index >= lif->nxqs))
-- 
cgit v1.2.3


From bd7856bcd498498a91b275904b9aa43882cf85a0 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Wed, 7 Apr 2021 16:19:57 -0700
Subject: ionic: add SKBTX_IN_PROGRESS

Set the SKBTX_IN_PROGRESS when offloading the Tx timestamp.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 765050a5f7a8..08934888575c 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -1203,6 +1203,7 @@ static netdev_tx_t ionic_start_hwstamp_xmit(struct sk_buff *skb,
 	if (unlikely(!ionic_q_has_space(q, ndescs)))
 		goto err_out_drop;
 
+	skb_shinfo(skb)->tx_flags |= SKBTX_HW_TSTAMP;
 	if (skb_is_gso(skb))
 		err = ionic_tx_tso(q, skb);
 	else
-- 
cgit v1.2.3


From 51117874554d7883c9b8051d361cb6a5c847c31a Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Wed, 7 Apr 2021 16:19:58 -0700
Subject: ionic: re-start ptp after queues up

When returning after a firmware reset, re-start the
PTP after we've restarted the general queues.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_lif.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 4e22e50922cd..8cf6477b9899 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -2987,14 +2987,14 @@ static void ionic_lif_handle_fw_up(struct ionic_lif *lif)
 			goto err_txrx_free;
 	}
 
-	/* restore the hardware timestamping queues */
-	ionic_lif_hwstamp_set(lif, NULL);
-
 	clear_bit(IONIC_LIF_F_FW_RESET, lif->state);
 	ionic_link_status_check_request(lif, CAN_SLEEP);
 	netif_device_attach(lif->netdev);
 	dev_info(ionic->dev, "FW Up: LIFs restarted\n");
 
+	/* restore the hardware timestamping queues */
+	ionic_lif_hwstamp_set(lif, NULL);
+
 	return;
 
 err_txrx_free:
-- 
cgit v1.2.3


From 99b5bea04f0f0dc9e6278536a269e1f4795b73e1 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Wed, 7 Apr 2021 16:19:59 -0700
Subject: ionic: ignore EBUSY on queue start

When starting the queues in the link-check, don't go into
the BROKEN state if the return was EBUSY.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_lif.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 8cf6477b9899..eae774c0a2d9 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -135,7 +135,7 @@ static void ionic_link_status_check(struct ionic_lif *lif)
 		if (netdev->flags & IFF_UP && netif_running(netdev)) {
 			mutex_lock(&lif->queue_lock);
 			err = ionic_start_queues(lif);
-			if (err) {
+			if (err && err != -EBUSY) {
 				netdev_err(lif->netdev,
 					   "Failed to start queues: %d\n", err);
 				set_bit(IONIC_LIF_F_BROKEN, lif->state);
-- 
cgit v1.2.3


From 829600ce5e4e985a8a49c445c5e5c72ca613ce41 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Wed, 7 Apr 2021 16:20:00 -0700
Subject: ionic: add ts_config replay

Split the call into ionic_lif_hwstamp_set() to have two
separate interfaces, one from the ioctl() for changing the
configuration and one for replaying the current configuration
after a FW RESET.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_lif.c |  2 +-
 drivers/net/ethernet/pensando/ionic/ionic_lif.h |  6 ++
 drivers/net/ethernet/pensando/ionic/ionic_phc.c | 84 ++++++++++++++++---------
 3 files changed, 60 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index eae774c0a2d9..af3a5368529c 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -2993,7 +2993,7 @@ static void ionic_lif_handle_fw_up(struct ionic_lif *lif)
 	dev_info(ionic->dev, "FW Up: LIFs restarted\n");
 
 	/* restore the hardware timestamping queues */
-	ionic_lif_hwstamp_set(lif, NULL);
+	ionic_lif_hwstamp_replay(lif);
 
 	return;
 
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
index ea3b086af179..346506f01715 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -302,6 +302,7 @@ int ionic_lif_identify(struct ionic *ionic, u8 lif_type,
 int ionic_lif_size(struct ionic *ionic);
 
 #if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
+int ionic_lif_hwstamp_replay(struct ionic_lif *lif);
 int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr);
 int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr);
 ktime_t ionic_lif_phc_ktime(struct ionic_lif *lif, u64 counter);
@@ -310,6 +311,11 @@ void ionic_lif_unregister_phc(struct ionic_lif *lif);
 void ionic_lif_alloc_phc(struct ionic_lif *lif);
 void ionic_lif_free_phc(struct ionic_lif *lif);
 #else
+static inline int ionic_lif_hwstamp_replay(struct ionic_lif *lif)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr)
 {
 	return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_phc.c b/drivers/net/ethernet/pensando/ionic/ionic_phc.c
index 5d5da61284e7..2bb749097d9e 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_phc.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_phc.c
@@ -64,10 +64,12 @@ static u64 ionic_hwstamp_rx_filt(int config_rx_filter)
 	}
 }
 
-int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr)
+static int ionic_lif_hwstamp_set_ts_config(struct ionic_lif *lif,
+					   struct hwtstamp_config *new_ts)
 {
 	struct ionic *ionic = lif->ionic;
-	struct hwtstamp_config config;
+	struct hwtstamp_config *config;
+	struct hwtstamp_config ts;
 	int tx_mode = 0;
 	u64 rx_filt = 0;
 	int err, err2;
@@ -77,18 +79,23 @@ int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr)
 	if (!lif->phc || !lif->phc->ptp)
 		return -EOPNOTSUPP;
 
-	if (ifr) {
-		if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
-			return -EFAULT;
+	if (new_ts) {
+		config = new_ts;
 	} else {
-		/* if called with ifr == NULL, behave as if called with the
-		 * current ts_config from the initial cleared state.
+		/* If called with new_ts == NULL, replay the previous request
+		 * primarily for recovery after a FW_RESET.
+		 * We saved the previous configuration request info, so copy
+		 * the previous request for reference, clear the current state
+		 * to match the device's reset state, and run with it.
 		 */
-		memcpy(&config, &lif->phc->ts_config, sizeof(config));
-		memset(&lif->phc->ts_config, 0, sizeof(config));
+		config = &ts;
+		memcpy(config, &lif->phc->ts_config, sizeof(*config));
+		memset(&lif->phc->ts_config, 0, sizeof(lif->phc->ts_config));
+		lif->phc->ts_config_tx_mode = 0;
+		lif->phc->ts_config_rx_filt = 0;
 	}
 
-	tx_mode = ionic_hwstamp_tx_mode(config.tx_type);
+	tx_mode = ionic_hwstamp_tx_mode(config->tx_type);
 	if (tx_mode < 0)
 		return tx_mode;
 
@@ -96,18 +103,18 @@ int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr)
 	if ((ionic->ident.lif.eth.hwstamp_tx_modes & mask) != mask)
 		return -ERANGE;
 
-	rx_filt = ionic_hwstamp_rx_filt(config.rx_filter);
-	rx_all = config.rx_filter != HWTSTAMP_FILTER_NONE && !rx_filt;
+	rx_filt = ionic_hwstamp_rx_filt(config->rx_filter);
+	rx_all = config->rx_filter != HWTSTAMP_FILTER_NONE && !rx_filt;
 
 	mask = cpu_to_le64(rx_filt);
 	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) != mask) {
 		rx_filt = 0;
 		rx_all = true;
-		config.rx_filter = HWTSTAMP_FILTER_ALL;
+		config->rx_filter = HWTSTAMP_FILTER_ALL;
 	}
 
 	dev_dbg(ionic->dev, "config_rx_filter %d rx_filt %#llx rx_all %d\n",
-		config.rx_filter, rx_filt, rx_all);
+		config->rx_filter, rx_filt, rx_all);
 
 	mutex_lock(&lif->phc->config_lock);
 
@@ -141,15 +148,7 @@ int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr)
 			goto err_rxall;
 	}
 
-	if (ifr) {
-		err = copy_to_user(ifr->ifr_data, &config, sizeof(config));
-		if (err) {
-			err = -EFAULT;
-			goto err_final;
-		}
-	}
-
-	memcpy(&lif->phc->ts_config, &config, sizeof(config));
+	memcpy(&lif->phc->ts_config, config, sizeof(*config));
 	lif->phc->ts_config_rx_filt = rx_filt;
 	lif->phc->ts_config_tx_mode = tx_mode;
 
@@ -157,14 +156,6 @@ int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr)
 
 	return 0;
 
-err_final:
-	if (rx_all != (lif->phc->ts_config.rx_filter == HWTSTAMP_FILTER_ALL)) {
-		rx_all = lif->phc->ts_config.rx_filter == HWTSTAMP_FILTER_ALL;
-		err2 = ionic_lif_config_hwstamp_rxq_all(lif, rx_all);
-		if (err2)
-			dev_err(ionic->dev,
-				"Failed to revert all-rxq timestamp config: %d\n", err2);
-	}
 err_rxall:
 	if (rx_filt != lif->phc->ts_config_rx_filt) {
 		rx_filt = lif->phc->ts_config_rx_filt;
@@ -188,6 +179,37 @@ err_queues:
 	return err;
 }
 
+int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr)
+{
+	struct hwtstamp_config config;
+	int err;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	err = ionic_lif_hwstamp_set_ts_config(lif, &config);
+	if (err) {
+		netdev_info(lif->netdev, "hwstamp set failed: %d\n", err);
+		return err;
+	}
+
+	if (copy_to_user(ifr->ifr_data, &config, sizeof(config)))
+		return -EFAULT;
+
+	return 0;
+}
+
+int ionic_lif_hwstamp_replay(struct ionic_lif *lif)
+{
+	int err;
+
+	err = ionic_lif_hwstamp_set_ts_config(lif, NULL);
+	if (err)
+		netdev_info(lif->netdev, "hwstamp replay failed: %d\n", err);
+
+	return err;
+}
+
 int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr)
 {
 	struct hwtstamp_config config;
-- 
cgit v1.2.3


From f3318099658edcc899b95cdf54257edd428e7fbb Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Wed, 7 Apr 2021 16:20:01 -0700
Subject: ionic: extend ts_config set locking

Make sure the configuration is locked before
operating on it for the replay.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_phc.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_phc.c b/drivers/net/ethernet/pensando/ionic/ionic_phc.c
index 2bb749097d9e..177dbf89affd 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_phc.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_phc.c
@@ -79,6 +79,8 @@ static int ionic_lif_hwstamp_set_ts_config(struct ionic_lif *lif,
 	if (!lif->phc || !lif->phc->ptp)
 		return -EOPNOTSUPP;
 
+	mutex_lock(&lif->phc->config_lock);
+
 	if (new_ts) {
 		config = new_ts;
 	} else {
@@ -96,12 +98,16 @@ static int ionic_lif_hwstamp_set_ts_config(struct ionic_lif *lif,
 	}
 
 	tx_mode = ionic_hwstamp_tx_mode(config->tx_type);
-	if (tx_mode < 0)
-		return tx_mode;
+	if (tx_mode < 0) {
+		err = tx_mode;
+		goto err_queues;
+	}
 
 	mask = cpu_to_le64(BIT_ULL(tx_mode));
-	if ((ionic->ident.lif.eth.hwstamp_tx_modes & mask) != mask)
-		return -ERANGE;
+	if ((ionic->ident.lif.eth.hwstamp_tx_modes & mask) != mask) {
+		err = -ERANGE;
+		goto err_queues;
+	}
 
 	rx_filt = ionic_hwstamp_rx_filt(config->rx_filter);
 	rx_all = config->rx_filter != HWTSTAMP_FILTER_NONE && !rx_filt;
@@ -116,8 +122,6 @@ static int ionic_lif_hwstamp_set_ts_config(struct ionic_lif *lif,
 	dev_dbg(ionic->dev, "config_rx_filter %d rx_filt %#llx rx_all %d\n",
 		config->rx_filter, rx_filt, rx_all);
 
-	mutex_lock(&lif->phc->config_lock);
-
 	if (tx_mode) {
 		err = ionic_lif_create_hwstamp_txq(lif);
 		if (err)
-- 
cgit v1.2.3


From bb1890d5f97425766a865d75f99fd556d5dc6893 Mon Sep 17 00:00:00 2001
From: Jiaran Zhang <zhangjiaran@huawei.com>
Date: Thu, 8 Apr 2021 11:40:04 +0800
Subject: net: hns3: change flr_prepare/flr_done function names

The flr_prepare/flr_done functions are not only used in the FLR scenario,
but also used in the suspend/resume.

Change the function names to prepare_for_reset/rebuild_for_reset, change
the flr_prepare/flr_done to reset_prepare/reset_done in hnae3_ae_ops.

Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h        |  5 ++--
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c    |  8 +++----
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 27 ++++++++++++----------
 .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c  | 27 ++++++++++++----------
 4 files changed, 37 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index a234116ba0e5..1d2189047781 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -474,8 +474,9 @@ struct hnae3_ae_dev {
 struct hnae3_ae_ops {
 	int (*init_ae_dev)(struct hnae3_ae_dev *ae_dev);
 	void (*uninit_ae_dev)(struct hnae3_ae_dev *ae_dev);
-	void (*flr_prepare)(struct hnae3_ae_dev *ae_dev);
-	void (*flr_done)(struct hnae3_ae_dev *ae_dev);
+	void (*reset_prepare)(struct hnae3_ae_dev *ae_dev,
+			      enum hnae3_reset_type rst_type);
+	void (*reset_done)(struct hnae3_ae_dev *ae_dev);
 	int (*init_client_instance)(struct hnae3_client *client,
 				    struct hnae3_ae_dev *ae_dev);
 	void (*uninit_client_instance)(struct hnae3_client *client,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 076bfb76bdb9..67fc5aa42bdd 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -2423,8 +2423,8 @@ static void hns3_reset_prepare(struct pci_dev *pdev)
 	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
 
 	dev_info(&pdev->dev, "FLR prepare\n");
-	if (ae_dev && ae_dev->ops && ae_dev->ops->flr_prepare)
-		ae_dev->ops->flr_prepare(ae_dev);
+	if (ae_dev && ae_dev->ops && ae_dev->ops->reset_prepare)
+		ae_dev->ops->reset_prepare(ae_dev, HNAE3_FLR_RESET);
 }
 
 static void hns3_reset_done(struct pci_dev *pdev)
@@ -2432,8 +2432,8 @@ static void hns3_reset_done(struct pci_dev *pdev)
 	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
 
 	dev_info(&pdev->dev, "FLR done\n");
-	if (ae_dev && ae_dev->ops && ae_dev->ops->flr_done)
-		ae_dev->ops->flr_done(ae_dev);
+	if (ae_dev && ae_dev->ops && ae_dev->ops->reset_done)
+		ae_dev->ops->reset_done(ae_dev);
 }
 
 static const struct pci_error_handlers hns3_err_handler = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index bc805d5fb16e..47c95dcec3d7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -11058,10 +11058,11 @@ static void hclge_state_uninit(struct hclge_dev *hdev)
 		cancel_delayed_work_sync(&hdev->service_task);
 }
 
-static void hclge_flr_prepare(struct hnae3_ae_dev *ae_dev)
+static void hclge_reset_prepare_general(struct hnae3_ae_dev *ae_dev,
+					enum hnae3_reset_type rst_type)
 {
-#define HCLGE_FLR_RETRY_WAIT_MS	500
-#define HCLGE_FLR_RETRY_CNT	5
+#define HCLGE_RESET_RETRY_WAIT_MS	500
+#define HCLGE_RESET_RETRY_CNT	5
 
 	struct hclge_dev *hdev = ae_dev->priv;
 	int retry_cnt = 0;
@@ -11070,30 +11071,32 @@ static void hclge_flr_prepare(struct hnae3_ae_dev *ae_dev)
 retry:
 	down(&hdev->reset_sem);
 	set_bit(HCLGE_STATE_RST_HANDLING, &hdev->state);
-	hdev->reset_type = HNAE3_FLR_RESET;
+	hdev->reset_type = rst_type;
 	ret = hclge_reset_prepare(hdev);
 	if (ret || hdev->reset_pending) {
-		dev_err(&hdev->pdev->dev, "fail to prepare FLR, ret=%d\n",
+		dev_err(&hdev->pdev->dev, "fail to prepare to reset, ret=%d\n",
 			ret);
 		if (hdev->reset_pending ||
-		    retry_cnt++ < HCLGE_FLR_RETRY_CNT) {
+		    retry_cnt++ < HCLGE_RESET_RETRY_CNT) {
 			dev_err(&hdev->pdev->dev,
 				"reset_pending:0x%lx, retry_cnt:%d\n",
 				hdev->reset_pending, retry_cnt);
 			clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state);
 			up(&hdev->reset_sem);
-			msleep(HCLGE_FLR_RETRY_WAIT_MS);
+			msleep(HCLGE_RESET_RETRY_WAIT_MS);
 			goto retry;
 		}
 	}
 
-	/* disable misc vector before FLR done */
+	/* disable misc vector before reset done */
 	hclge_enable_vector(&hdev->misc_vector, false);
 	set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
-	hdev->rst_stats.flr_rst_cnt++;
+
+	if (hdev->reset_type == HNAE3_FLR_RESET)
+		hdev->rst_stats.flr_rst_cnt++;
 }
 
-static void hclge_flr_done(struct hnae3_ae_dev *ae_dev)
+static void hclge_reset_done(struct hnae3_ae_dev *ae_dev)
 {
 	struct hclge_dev *hdev = ae_dev->priv;
 	int ret;
@@ -12466,8 +12469,8 @@ static int hclge_get_module_eeprom(struct hnae3_handle *handle, u32 offset,
 static const struct hnae3_ae_ops hclge_ops = {
 	.init_ae_dev = hclge_init_ae_dev,
 	.uninit_ae_dev = hclge_uninit_ae_dev,
-	.flr_prepare = hclge_flr_prepare,
-	.flr_done = hclge_flr_done,
+	.reset_prepare = hclge_reset_prepare_general,
+	.reset_done = hclge_reset_done,
 	.init_client_instance = hclge_init_client_instance,
 	.uninit_client_instance = hclge_uninit_client_instance,
 	.map_ring_to_vector = hclge_map_ring_to_vector,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 1682769112d0..c7d5c1726499 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -2114,10 +2114,11 @@ static void hclgevf_enable_vector(struct hclgevf_misc_vector *vector, bool en)
 	writel(en ? 1 : 0, vector->addr);
 }
 
-static void hclgevf_flr_prepare(struct hnae3_ae_dev *ae_dev)
+static void hclgevf_reset_prepare_general(struct hnae3_ae_dev *ae_dev,
+					  enum hnae3_reset_type rst_type)
 {
-#define HCLGEVF_FLR_RETRY_WAIT_MS	500
-#define HCLGEVF_FLR_RETRY_CNT		5
+#define HCLGEVF_RESET_RETRY_WAIT_MS	500
+#define HCLGEVF_RESET_RETRY_CNT		5
 
 	struct hclgevf_dev *hdev = ae_dev->priv;
 	int retry_cnt = 0;
@@ -2126,29 +2127,31 @@ static void hclgevf_flr_prepare(struct hnae3_ae_dev *ae_dev)
 retry:
 	down(&hdev->reset_sem);
 	set_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state);
-	hdev->reset_type = HNAE3_FLR_RESET;
+	hdev->reset_type = rst_type;
 	ret = hclgevf_reset_prepare(hdev);
 	if (ret) {
-		dev_err(&hdev->pdev->dev, "fail to prepare FLR, ret=%d\n",
+		dev_err(&hdev->pdev->dev, "fail to prepare to reset, ret=%d\n",
 			ret);
 		if (hdev->reset_pending ||
-		    retry_cnt++ < HCLGEVF_FLR_RETRY_CNT) {
+		    retry_cnt++ < HCLGEVF_RESET_RETRY_CNT) {
 			dev_err(&hdev->pdev->dev,
 				"reset_pending:0x%lx, retry_cnt:%d\n",
 				hdev->reset_pending, retry_cnt);
 			clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state);
 			up(&hdev->reset_sem);
-			msleep(HCLGEVF_FLR_RETRY_WAIT_MS);
+			msleep(HCLGEVF_RESET_RETRY_WAIT_MS);
 			goto retry;
 		}
 	}
 
-	/* disable misc vector before FLR done */
+	/* disable misc vector before reset done */
 	hclgevf_enable_vector(&hdev->misc_vector, false);
-	hdev->rst_stats.flr_rst_cnt++;
+
+	if (hdev->reset_type == HNAE3_FLR_RESET)
+		hdev->rst_stats.flr_rst_cnt++;
 }
 
-static void hclgevf_flr_done(struct hnae3_ae_dev *ae_dev)
+static void hclgevf_reset_done(struct hnae3_ae_dev *ae_dev)
 {
 	struct hclgevf_dev *hdev = ae_dev->priv;
 	int ret;
@@ -3748,8 +3751,8 @@ void hclgevf_update_port_base_vlan_info(struct hclgevf_dev *hdev, u16 state,
 static const struct hnae3_ae_ops hclgevf_ops = {
 	.init_ae_dev = hclgevf_init_ae_dev,
 	.uninit_ae_dev = hclgevf_uninit_ae_dev,
-	.flr_prepare = hclgevf_flr_prepare,
-	.flr_done = hclgevf_flr_done,
+	.reset_prepare = hclgevf_reset_prepare_general,
+	.reset_done = hclgevf_reset_done,
 	.init_client_instance = hclgevf_init_client_instance,
 	.uninit_client_instance = hclgevf_uninit_client_instance,
 	.start = hclgevf_ae_start,
-- 
cgit v1.2.3


From 715c58e94f0d78907bfccde12c2ca5236502c53e Mon Sep 17 00:00:00 2001
From: Jiaran Zhang <zhangjiaran@huawei.com>
Date: Thu, 8 Apr 2021 11:40:05 +0800
Subject: net: hns3: add suspend and resume pm_ops

To implement the system suspend/resume functions, the NIC driver needs
to support:
1. When the system enters the suspend mode, the driver needs to
implement the suspend callback function of the NIC device. The driver
needs to mute the device, stop all RX/TX activities of the device, and
unmap the interrupt.
2. When the system enters the resume mode, the driver needs to
implement the resume callback function of the NIC device and restore
the device to the state before suspension.

When the system enters the suspend and resume mode, the NIC driver
actually executes the PF function reset process.

When the PFs are suspending/resuming, VFs also enter the suspend/resume
state because the PFs trigger the VFs to reset, therefore no operation
is required when the VF pci_driver is suspending or resuming.

Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c    | 29 ++++++++++++++++++++++
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    |  2 ++
 2 files changed, 31 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 67fc5aa42bdd..25afe5a3348c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -2365,6 +2365,32 @@ static void hns3_shutdown(struct pci_dev *pdev)
 		pci_set_power_state(pdev, PCI_D3hot);
 }
 
+static int __maybe_unused hns3_suspend(struct device *dev)
+{
+	struct hnae3_ae_dev *ae_dev = dev_get_drvdata(dev);
+
+	if (hns3_is_phys_func(ae_dev->pdev)) {
+		dev_info(dev, "Begin to suspend.\n");
+		if (ae_dev && ae_dev->ops && ae_dev->ops->reset_prepare)
+			ae_dev->ops->reset_prepare(ae_dev, HNAE3_FUNC_RESET);
+	}
+
+	return 0;
+}
+
+static int __maybe_unused hns3_resume(struct device *dev)
+{
+	struct hnae3_ae_dev *ae_dev = dev_get_drvdata(dev);
+
+	if (hns3_is_phys_func(ae_dev->pdev)) {
+		dev_info(dev, "Begin to resume.\n");
+		if (ae_dev && ae_dev->ops && ae_dev->ops->reset_done)
+			ae_dev->ops->reset_done(ae_dev);
+	}
+
+	return 0;
+}
+
 static pci_ers_result_t hns3_error_detected(struct pci_dev *pdev,
 					    pci_channel_state_t state)
 {
@@ -2443,12 +2469,15 @@ static const struct pci_error_handlers hns3_err_handler = {
 	.reset_done	= hns3_reset_done,
 };
 
+static SIMPLE_DEV_PM_OPS(hns3_pm_ops, hns3_suspend, hns3_resume);
+
 static struct pci_driver hns3_driver = {
 	.name     = hns3_driver_name,
 	.id_table = hns3_pci_tbl,
 	.probe    = hns3_probe,
 	.remove   = hns3_remove,
 	.shutdown = hns3_shutdown,
+	.driver.pm  = &hns3_pm_ops,
 	.sriov_configure = hns3_pci_sriov_configure,
 	.err_handler    = &hns3_err_handler,
 };
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 47c95dcec3d7..c446b63be503 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -11094,6 +11094,8 @@ retry:
 
 	if (hdev->reset_type == HNAE3_FLR_RESET)
 		hdev->rst_stats.flr_rst_cnt++;
+	else if (hdev->reset_type == HNAE3_FUNC_RESET)
+		hdev->rst_stats.pf_rst_cnt++;
 }
 
 static void hclge_reset_done(struct hnae3_ae_dev *ae_dev)
-- 
cgit v1.2.3


From fbe82b3db3e58edca33a7e7d9157eb7bdda6e537 Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Thu, 8 Apr 2021 15:00:41 +0800
Subject: net: qed: remove unused including <linux/version.h>

Remove including <linux/version.h> that don't need it.

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Signed-off-by: Zhiqi Song <songzhiqi1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/qed_ll2_if.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h
index 2f64ed79cee9..ea273ba1c991 100644
--- a/include/linux/qed/qed_ll2_if.h
+++ b/include/linux/qed/qed_ll2_if.h
@@ -12,7 +12,6 @@
 #include <linux/netdevice.h>
 #include <linux/pci.h>
 #include <linux/skbuff.h>
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/qed/qed_if.h>
-- 
cgit v1.2.3


From 79749ae19de6863d0748419d413b231b0d57e28a Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@nvidia.com>
Date: Wed, 7 Apr 2021 18:46:42 +0300
Subject: tc-testing: add simple action test to verify batch add cleanup

Verify cleanup of failed actions batch add where second action in batch
fails after successful init of first action.

Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/tc-tests/actions/simple.json        | 30 ++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
index e15f708b0fa4..d5bcbb919dcc 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
@@ -175,5 +175,35 @@
         "teardown": [
             "$TC actions flush action simple"
         ]
+    },
+    {
+        "id": "8d07",
+        "name": "Verify cleanup of failed actions batch add",
+        "category": [
+            "actions",
+            "simple"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action simple",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action simple sdata \"2\" index 2",
+            [
+                "$TC actions add action simple sdata \"1\" index 1 action simple sdata \"2\" index 2",
+                255
+            ],
+            "$TC actions flush action simple"
+        ],
+        "cmdUnderTest": "$TC actions add action simple sdata \"2\" index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action simple",
+        "matchPattern": "action order [0-9]*: Simple <2>.*index 2 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action simple"
+        ]
     }
 ]
-- 
cgit v1.2.3


From 652e3124c3ee60d9770e070afab0e0a862e9bfd7 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@nvidia.com>
Date: Wed, 7 Apr 2021 18:46:43 +0300
Subject: tc-testing: add simple action test to verify batch change cleanup

Verify cleanup of failed actions batch change where second action in batch
fails after successful init of first action.

Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/tc-tests/actions/simple.json        | 29 ++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
index d5bcbb919dcc..e0c5f060ccb9 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
@@ -205,5 +205,34 @@
         "teardown": [
             "$TC actions flush action simple"
         ]
+    },
+    {
+        "id": "a68a",
+        "name": "Verify cleanup of failed actions batch change",
+        "category": [
+            "actions",
+            "simple"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action simple",
+                0,
+                1,
+                255
+            ],
+            [
+                "$TC actions change action simple sdata \"1\" index 1 action simple sdata \"2\" goto chain 42 index 2",
+                255
+            ],
+            "$TC actions flush action simple"
+        ],
+        "cmdUnderTest": "$TC actions add action simple sdata \"1\" index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action simple",
+        "matchPattern": "action order [0-9]*: Simple <1>.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action simple"
+        ]
     }
 ]
-- 
cgit v1.2.3


From ff182bc572cec4ca757775bf2a33a3ce8611227a Mon Sep 17 00:00:00 2001
From: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Date: Thu, 8 Apr 2021 09:13:02 +0300
Subject: selftests/bpf: test_progs/sockopt_sk: Remove version

As pointed by Andrii Nakryiko, _version is useless now, remove it.

Signed-off-by: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210408061310.95877-1-yauheni.kaliuta@redhat.com
---
 tools/testing/selftests/bpf/progs/sockopt_sk.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c
index d3597f81e6e9..978a68005966 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c
@@ -6,7 +6,6 @@
 #include <bpf/bpf_helpers.h>
 
 char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1;
 
 #ifndef PAGE_SIZE
 #define PAGE_SIZE 4096
-- 
cgit v1.2.3


From cad99cce133dd5377f22da1e75d7eb5c4b886d75 Mon Sep 17 00:00:00 2001
From: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Date: Thu, 8 Apr 2021 09:13:03 +0300
Subject: selftests/bpf: test_progs/sockopt_sk: Convert to use BPF skeleton

Switch the test to use BPF skeleton to save some boilerplate and
make it easy to access bpf program bss segment.

The latter will be used to pass PAGE_SIZE from userspace since there
is no convenient way for bpf program to get it from inside of the
kernel.

Signed-off-by: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210408061310.95877-2-yauheni.kaliuta@redhat.com
---
 .../testing/selftests/bpf/prog_tests/sockopt_sk.c  | 65 ++++++----------------
 1 file changed, 17 insertions(+), 48 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
index d5b44b135c00..7274b12abe17 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
@@ -3,6 +3,7 @@
 #include "cgroup_helpers.h"
 
 #include <linux/tcp.h>
+#include "sockopt_sk.skel.h"
 
 #ifndef SOL_TCP
 #define SOL_TCP IPPROTO_TCP
@@ -191,60 +192,28 @@ err:
 	return -1;
 }
 
-static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title)
-{
-	enum bpf_attach_type attach_type;
-	enum bpf_prog_type prog_type;
-	struct bpf_program *prog;
-	int err;
-
-	err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
-	if (err) {
-		log_err("Failed to deduct types for %s BPF program", title);
-		return -1;
-	}
-
-	prog = bpf_object__find_program_by_title(obj, title);
-	if (!prog) {
-		log_err("Failed to find %s BPF program", title);
-		return -1;
-	}
-
-	err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd,
-			      attach_type, 0);
-	if (err) {
-		log_err("Failed to attach %s BPF program", title);
-		return -1;
-	}
-
-	return 0;
-}
-
 static void run_test(int cgroup_fd)
 {
-	struct bpf_prog_load_attr attr = {
-		.file = "./sockopt_sk.o",
-	};
-	struct bpf_object *obj;
-	int ignored;
-	int err;
-
-	err = bpf_prog_load_xattr(&attr, &obj, &ignored);
-	if (CHECK_FAIL(err))
-		return;
+	struct sockopt_sk *skel;
+
+	skel = sockopt_sk__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_load"))
+		goto cleanup;
 
-	err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt");
-	if (CHECK_FAIL(err))
-		goto close_bpf_object;
+	skel->links._setsockopt =
+		bpf_program__attach_cgroup(skel->progs._setsockopt, cgroup_fd);
+	if (!ASSERT_OK_PTR(skel->links._setsockopt, "setsockopt_link"))
+		goto cleanup;
 
-	err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt");
-	if (CHECK_FAIL(err))
-		goto close_bpf_object;
+	skel->links._getsockopt =
+		bpf_program__attach_cgroup(skel->progs._getsockopt, cgroup_fd);
+	if (!ASSERT_OK_PTR(skel->links._getsockopt, "getsockopt_link"))
+		goto cleanup;
 
-	CHECK_FAIL(getsetsockopt());
+	ASSERT_OK(getsetsockopt(), "getsetsockopt");
 
-close_bpf_object:
-	bpf_object__close(obj);
+cleanup:
+	sockopt_sk__destroy(skel);
 }
 
 void test_sockopt_sk(void)
-- 
cgit v1.2.3


From 361d32028c7d52d28d7f0562193a2f4a41d10351 Mon Sep 17 00:00:00 2001
From: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Date: Thu, 8 Apr 2021 09:13:04 +0300
Subject: selftests/bpf: Pass page size from userspace in sockopt_sk

Since there is no convenient way for bpf program to get PAGE_SIZE
from inside of the kernel, pass the value from userspace.

Zero-initialize the variable in bpf prog, otherwise it will cause
problems on some versions of Clang.

Signed-off-by: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210408061310.95877-3-yauheni.kaliuta@redhat.com
---
 tools/testing/selftests/bpf/prog_tests/sockopt_sk.c |  2 ++
 tools/testing/selftests/bpf/progs/sockopt_sk.c      | 10 ++++------
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
index 7274b12abe17..4b937e5dbaca 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
@@ -200,6 +200,8 @@ static void run_test(int cgroup_fd)
 	if (!ASSERT_OK_PTR(skel, "skel_load"))
 		goto cleanup;
 
+	skel->bss->page_size = getpagesize();
+
 	skel->links._setsockopt =
 		bpf_program__attach_cgroup(skel->progs._setsockopt, cgroup_fd);
 	if (!ASSERT_OK_PTR(skel->links._setsockopt, "setsockopt_link"))
diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c
index 978a68005966..8acdb99b5959 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c
@@ -7,9 +7,7 @@
 
 char _license[] SEC("license") = "GPL";
 
-#ifndef PAGE_SIZE
-#define PAGE_SIZE 4096
-#endif
+int page_size = 0; /* userspace should set it */
 
 #ifndef SOL_TCP
 #define SOL_TCP IPPROTO_TCP
@@ -89,7 +87,7 @@ int _getsockopt(struct bpf_sockopt *ctx)
 		 * program can only see the first PAGE_SIZE
 		 * bytes of data.
 		 */
-		if (optval_end - optval != PAGE_SIZE)
+		if (optval_end - optval != page_size)
 			return 0; /* EPERM, unexpected data size */
 
 		return 1;
@@ -160,7 +158,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
 
 	if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
 		/* Original optlen is larger than PAGE_SIZE. */
-		if (ctx->optlen != PAGE_SIZE * 2)
+		if (ctx->optlen != page_size * 2)
 			return 0; /* EPERM, unexpected data size */
 
 		if (optval + 1 > optval_end)
@@ -174,7 +172,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
 		 * program can only see the first PAGE_SIZE
 		 * bytes of data.
 		 */
-		if (optval_end - optval != PAGE_SIZE)
+		if (optval_end - optval != page_size)
 			return 0; /* EPERM, unexpected data size */
 
 		return 1;
-- 
cgit v1.2.3


From 7a85e4dfa7f5d052ae5016e96f1ee426a8870e78 Mon Sep 17 00:00:00 2001
From: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Date: Thu, 8 Apr 2021 09:13:05 +0300
Subject: selftests/bpf: Pass page size from userspace in map_ptr

Use ASSERT to check result but keep CHECK where format was used to
report error.

Use bpf_map__set_max_entries() to set map size dynamically from
userspace according to page size.

Zero-initialize the variable in bpf prog, otherwise it will cause
problems on some versions of Clang.

Signed-off-by: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210408061310.95877-4-yauheni.kaliuta@redhat.com
---
 tools/testing/selftests/bpf/prog_tests/map_ptr.c | 15 +++++++++++++--
 tools/testing/selftests/bpf/progs/map_ptr_kern.c |  4 ++--
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/map_ptr.c b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
index c230a573c373..4972f92205c7 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_ptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
@@ -12,11 +12,22 @@ void test_map_ptr(void)
 	__u32 duration = 0, retval;
 	char buf[128];
 	int err;
+	int page_size = getpagesize();
 
-	skel = map_ptr_kern__open_and_load();
-	if (CHECK(!skel, "skel_open_load", "open_load failed\n"))
+	skel = map_ptr_kern__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
 		return;
 
+	err = bpf_map__set_max_entries(skel->maps.m_ringbuf, page_size);
+	if (!ASSERT_OK(err, "bpf_map__set_max_entries"))
+		goto cleanup;
+
+	err = map_ptr_kern__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto cleanup;
+
+	skel->bss->page_size = page_size;
+
 	err = bpf_prog_test_run(bpf_program__fd(skel->progs.cg_skb), 1, &pkt_v4,
 				sizeof(pkt_v4), buf, NULL, &retval, NULL);
 
diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
index d8850bc6a9f1..d1d304c980f0 100644
--- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -12,6 +12,7 @@ _Static_assert(MAX_ENTRIES < LOOP_BOUND, "MAX_ENTRIES must be < LOOP_BOUND");
 
 enum bpf_map_type g_map_type = BPF_MAP_TYPE_UNSPEC;
 __u32 g_line = 0;
+int page_size = 0; /* userspace should set it */
 
 #define VERIFY_TYPE(type, func) ({	\
 	g_map_type = type;		\
@@ -635,7 +636,6 @@ struct bpf_ringbuf_map {
 
 struct {
 	__uint(type, BPF_MAP_TYPE_RINGBUF);
-	__uint(max_entries, 1 << 12);
 } m_ringbuf SEC(".maps");
 
 static inline int check_ringbuf(void)
@@ -643,7 +643,7 @@ static inline int check_ringbuf(void)
 	struct bpf_ringbuf_map *ringbuf = (struct bpf_ringbuf_map *)&m_ringbuf;
 	struct bpf_map *map = (struct bpf_map *)&m_ringbuf;
 
-	VERIFY(check(&ringbuf->map, map, 0, 0, 1 << 12));
+	VERIFY(check(&ringbuf->map, map, 0, 0, page_size));
 
 	return 1;
 }
-- 
cgit v1.2.3


From 34090aaf256e469a39401cc04d5cd43275ccd97d Mon Sep 17 00:00:00 2001
From: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Date: Thu, 8 Apr 2021 09:13:06 +0300
Subject: selftests/bpf: mmap: Use runtime page size

Replace hardcoded 4096 with runtime value in the userspace part of
the test and set bpf table sizes dynamically according to the value.

Do not switch to ASSERT macros, keep CHECK, for consistency with the
rest of the test. Can be a separate cleanup patch.

Signed-off-by: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210408061310.95877-5-yauheni.kaliuta@redhat.com
---
 tools/testing/selftests/bpf/prog_tests/mmap.c | 24 +++++++++++++++++++-----
 tools/testing/selftests/bpf/progs/test_mmap.c |  2 --
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/mmap.c b/tools/testing/selftests/bpf/prog_tests/mmap.c
index 9c3c5c0f068f..37b002ca1167 100644
--- a/tools/testing/selftests/bpf/prog_tests/mmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/mmap.c
@@ -29,22 +29,36 @@ void test_mmap(void)
 	struct test_mmap *skel;
 	__u64 val = 0;
 
-	skel = test_mmap__open_and_load();
-	if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n"))
+	skel = test_mmap__open();
+	if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
 		return;
 
+	err = bpf_map__set_max_entries(skel->maps.rdonly_map, page_size);
+	if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+		goto cleanup;
+
+	/* at least 4 pages of data */
+	err = bpf_map__set_max_entries(skel->maps.data_map,
+				       4 * (page_size / sizeof(u64)));
+	if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+		goto cleanup;
+
+	err = test_mmap__load(skel);
+	if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
+		goto cleanup;
+
 	bss_map = skel->maps.bss;
 	data_map = skel->maps.data_map;
 	data_map_fd = bpf_map__fd(data_map);
 
 	rdmap_fd = bpf_map__fd(skel->maps.rdonly_map);
-	tmp1 = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, rdmap_fd, 0);
+	tmp1 = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rdmap_fd, 0);
 	if (CHECK(tmp1 != MAP_FAILED, "rdonly_write_mmap", "unexpected success\n")) {
-		munmap(tmp1, 4096);
+		munmap(tmp1, page_size);
 		goto cleanup;
 	}
 	/* now double-check if it's mmap()'able at all */
-	tmp1 = mmap(NULL, 4096, PROT_READ, MAP_SHARED, rdmap_fd, 0);
+	tmp1 = mmap(NULL, page_size, PROT_READ, MAP_SHARED, rdmap_fd, 0);
 	if (CHECK(tmp1 == MAP_FAILED, "rdonly_read_mmap", "failed: %d\n", errno))
 		goto cleanup;
 
diff --git a/tools/testing/selftests/bpf/progs/test_mmap.c b/tools/testing/selftests/bpf/progs/test_mmap.c
index 4eb42cff5fe9..5a5cc19a15bf 100644
--- a/tools/testing/selftests/bpf/progs/test_mmap.c
+++ b/tools/testing/selftests/bpf/progs/test_mmap.c
@@ -9,7 +9,6 @@ char _license[] SEC("license") = "GPL";
 
 struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY);
-	__uint(max_entries, 4096);
 	__uint(map_flags, BPF_F_MMAPABLE | BPF_F_RDONLY_PROG);
 	__type(key, __u32);
 	__type(value, char);
@@ -17,7 +16,6 @@ struct {
 
 struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY);
-	__uint(max_entries, 512 * 4); /* at least 4 pages of data */
 	__uint(map_flags, BPF_F_MMAPABLE);
 	__type(key, __u32);
 	__type(value, __u64);
-- 
cgit v1.2.3


From 23a65766066bb6e42a44e9097ddf79f72292a19f Mon Sep 17 00:00:00 2001
From: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Date: Thu, 8 Apr 2021 09:13:07 +0300
Subject: selftests/bpf: ringbuf: Use runtime page size

Replace hardcoded 4096 with runtime value in the userspace part of
the test and set bpf table sizes dynamically according to the value.

Do not switch to ASSERT macros, keep CHECK, for consistency with the
rest of the test. Can be a separate cleanup patch.

Signed-off-by: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210408061310.95877-6-yauheni.kaliuta@redhat.com
---
 tools/testing/selftests/bpf/prog_tests/ringbuf.c | 17 +++++++++++++----
 tools/testing/selftests/bpf/progs/test_ringbuf.c |  1 -
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index fddbc5db5d6a..de78617f6550 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -87,11 +87,20 @@ void test_ringbuf(void)
 	pthread_t thread;
 	long bg_ret = -1;
 	int err, cnt;
+	int page_size = getpagesize();
 
-	skel = test_ringbuf__open_and_load();
-	if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n"))
+	skel = test_ringbuf__open();
+	if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
 		return;
 
+	err = bpf_map__set_max_entries(skel->maps.ringbuf, page_size);
+	if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+		goto cleanup;
+
+	err = test_ringbuf__load(skel);
+	if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
+		goto cleanup;
+
 	/* only trigger BPF program for current process */
 	skel->bss->pid = getpid();
 
@@ -110,9 +119,9 @@ void test_ringbuf(void)
 	CHECK(skel->bss->avail_data != 3 * rec_sz,
 	      "err_avail_size", "exp %ld, got %ld\n",
 	      3L * rec_sz, skel->bss->avail_data);
-	CHECK(skel->bss->ring_size != 4096,
+	CHECK(skel->bss->ring_size != page_size,
 	      "err_ring_size", "exp %ld, got %ld\n",
-	      4096L, skel->bss->ring_size);
+	      (long)page_size, skel->bss->ring_size);
 	CHECK(skel->bss->cons_pos != 0,
 	      "err_cons_pos", "exp %ld, got %ld\n",
 	      0L, skel->bss->cons_pos);
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c b/tools/testing/selftests/bpf/progs/test_ringbuf.c
index 8ba9959b036b..6b3f288b7c63 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c
@@ -15,7 +15,6 @@ struct sample {
 
 struct {
 	__uint(type, BPF_MAP_TYPE_RINGBUF);
-	__uint(max_entries, 1 << 12);
 } ringbuf SEC(".maps");
 
 /* inputs */
-- 
cgit v1.2.3


From b3278099b2f6e81771c6c2b70fcf9a56e9ba5d93 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Date: Thu, 8 Apr 2021 09:13:08 +0300
Subject: libbpf: Add bpf_map__inner_map API

The API gives access to inner map for map in map types (array or
hash of map). It will be used to dynamically set max_entries in it.

Signed-off-by: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210408061310.95877-7-yauheni.kaliuta@redhat.com
---
 tools/lib/bpf/libbpf.c   | 10 ++++++++++
 tools/lib/bpf/libbpf.h   |  1 +
 tools/lib/bpf/libbpf.map |  1 +
 3 files changed, 12 insertions(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 7aad78dbb4b4..ed5586cce227 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2194,6 +2194,7 @@ static int parse_btf_map_def(struct bpf_object *obj,
 			map->inner_map = calloc(1, sizeof(*map->inner_map));
 			if (!map->inner_map)
 				return -ENOMEM;
+			map->inner_map->fd = -1;
 			map->inner_map->sec_idx = obj->efile.btf_maps_shndx;
 			map->inner_map->name = malloc(strlen(map->name) +
 						      sizeof(".inner") + 1);
@@ -3845,6 +3846,14 @@ __u32 bpf_map__max_entries(const struct bpf_map *map)
 	return map->def.max_entries;
 }
 
+struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
+{
+	if (!bpf_map_type__is_map_in_map(map->def.type))
+		return NULL;
+
+	return map->inner_map;
+}
+
 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
 {
 	if (map->fd >= 0)
@@ -9476,6 +9485,7 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
 		pr_warn("error: inner_map_fd already specified\n");
 		return -EINVAL;
 	}
+	zfree(&map->inner_map);
 	map->inner_map_fd = fd;
 	return 0;
 }
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index f500621d28e5..bec4e6a6e31d 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -480,6 +480,7 @@ LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
 LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
 
 LIBBPF_API int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd);
+LIBBPF_API struct bpf_map *bpf_map__inner_map(struct bpf_map *map);
 
 LIBBPF_API long libbpf_get_error(const void *ptr);
 
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index f5990f7208ce..b9b29baf1df8 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -359,5 +359,6 @@ LIBBPF_0.4.0 {
 		bpf_linker__finalize;
 		bpf_linker__free;
 		bpf_linker__new;
+		bpf_map__inner_map;
 		bpf_object__set_kversion;
 } LIBBPF_0.3.0;
-- 
cgit v1.2.3


From f3f4c23e1238783f3d719cfbda6c5e2fd03a48c4 Mon Sep 17 00:00:00 2001
From: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Date: Thu, 8 Apr 2021 09:13:09 +0300
Subject: selftests/bpf: ringbuf_multi: Use runtime page size

Set bpf table sizes dynamically according to the runtime page size
value.

Do not switch to ASSERT macros, keep CHECK, for consistency with the
rest of the test. Can be a separate cleanup patch.

Signed-off-by: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210408061310.95877-8-yauheni.kaliuta@redhat.com
---
 .../selftests/bpf/prog_tests/ringbuf_multi.c       | 23 +++++++++++++++++++---
 .../selftests/bpf/progs/test_ringbuf_multi.c       |  1 -
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
index d37161e59bb2..159de99621c7 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
@@ -41,13 +41,30 @@ static int process_sample(void *ctx, void *data, size_t len)
 void test_ringbuf_multi(void)
 {
 	struct test_ringbuf_multi *skel;
-	struct ring_buffer *ringbuf;
+	struct ring_buffer *ringbuf = NULL;
 	int err;
+	int page_size = getpagesize();
 
-	skel = test_ringbuf_multi__open_and_load();
-	if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n"))
+	skel = test_ringbuf_multi__open();
+	if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
 		return;
 
+	err = bpf_map__set_max_entries(skel->maps.ringbuf1, page_size);
+	if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+		goto cleanup;
+
+	err = bpf_map__set_max_entries(skel->maps.ringbuf2, page_size);
+	if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+		goto cleanup;
+
+	err = bpf_map__set_max_entries(bpf_map__inner_map(skel->maps.ringbuf_arr), page_size);
+	if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+		goto cleanup;
+
+	err = test_ringbuf_multi__load(skel);
+	if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
+		goto cleanup;
+
 	/* only trigger BPF program for current process */
 	skel->bss->pid = getpid();
 
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
index edf3b6953533..055c10b2ff80 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
@@ -15,7 +15,6 @@ struct sample {
 
 struct ringbuf_map {
 	__uint(type, BPF_MAP_TYPE_RINGBUF);
-	__uint(max_entries, 1 << 12);
 } ringbuf1 SEC(".maps"),
   ringbuf2 SEC(".maps");
 
-- 
cgit v1.2.3


From cfc0889cebccbc398a9d7a14e4e1b4724afe4bb3 Mon Sep 17 00:00:00 2001
From: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Date: Thu, 8 Apr 2021 09:13:10 +0300
Subject: selftests/bpf: ringbuf_multi: Test bpf_map__set_inner_map_fd

Test map__set_inner_map_fd() interaction with map-in-map
initialization. Use hashmap of maps just to make it different to
existing array of maps.

Signed-off-by: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210408061310.95877-9-yauheni.kaliuta@redhat.com
---
 tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c | 14 ++++++++++++++
 tools/testing/selftests/bpf/progs/test_ringbuf_multi.c | 11 +++++++++++
 2 files changed, 25 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
index 159de99621c7..cef63e703924 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
@@ -44,6 +44,7 @@ void test_ringbuf_multi(void)
 	struct ring_buffer *ringbuf = NULL;
 	int err;
 	int page_size = getpagesize();
+	int proto_fd = -1;
 
 	skel = test_ringbuf_multi__open();
 	if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
@@ -61,10 +62,21 @@ void test_ringbuf_multi(void)
 	if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
 		goto cleanup;
 
+	proto_fd = bpf_create_map(BPF_MAP_TYPE_RINGBUF, 0, 0, page_size, 0);
+	if (CHECK(proto_fd == -1, "bpf_create_map", "bpf_create_map failed\n"))
+		goto cleanup;
+
+	err = bpf_map__set_inner_map_fd(skel->maps.ringbuf_hash, proto_fd);
+	if (CHECK(err != 0, "bpf_map__set_inner_map_fd", "bpf_map__set_inner_map_fd failed\n"))
+		goto cleanup;
+
 	err = test_ringbuf_multi__load(skel);
 	if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
 		goto cleanup;
 
+	close(proto_fd);
+	proto_fd = -1;
+
 	/* only trigger BPF program for current process */
 	skel->bss->pid = getpid();
 
@@ -114,6 +126,8 @@ void test_ringbuf_multi(void)
 	      2L, skel->bss->total);
 
 cleanup:
+	if (proto_fd >= 0)
+		close(proto_fd);
 	ring_buffer__free(ringbuf);
 	test_ringbuf_multi__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
index 055c10b2ff80..197b86546dca 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
@@ -30,6 +30,17 @@ struct {
 	},
 };
 
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__array(values, struct ringbuf_map);
+} ringbuf_hash SEC(".maps") = {
+	.values = {
+		[0] = &ringbuf1,
+	},
+};
+
 /* inputs */
 int pid = 0;
 int target_ring = 0;
-- 
cgit v1.2.3


From b98b33043c95a3886b5feb73814f8882a9cceaad Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 8 Apr 2021 19:45:02 +0200
Subject: net: dccp: use net_generic storage

DCCP is virtually never used, so no need to use space in struct net for it.

Put the pernet ipv4/v6 socket in the dccp ipv4/ipv6 modules instead.

Signed-off-by: Florian Westphal <fw@strlen.de>
Link: https://lore.kernel.org/r/20210408174502.1625-1-fw@strlen.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/net_namespace.h |  4 ----
 include/net/netns/dccp.h    | 12 ------------
 net/dccp/ipv4.c             | 24 ++++++++++++++++++++----
 net/dccp/ipv6.c             | 24 ++++++++++++++++++++----
 4 files changed, 40 insertions(+), 24 deletions(-)
 delete mode 100644 include/net/netns/dccp.h

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 3802c8322ab0..fa5887143f0d 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -22,7 +22,6 @@
 #include <net/netns/nexthop.h>
 #include <net/netns/ieee802154_6lowpan.h>
 #include <net/netns/sctp.h>
-#include <net/netns/dccp.h>
 #include <net/netns/netfilter.h>
 #include <net/netns/x_tables.h>
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
@@ -130,9 +129,6 @@ struct net {
 #if defined(CONFIG_IP_SCTP) || defined(CONFIG_IP_SCTP_MODULE)
 	struct netns_sctp	sctp;
 #endif
-#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE)
-	struct netns_dccp	dccp;
-#endif
 #ifdef CONFIG_NETFILTER
 	struct netns_nf		nf;
 	struct netns_xt		xt;
diff --git a/include/net/netns/dccp.h b/include/net/netns/dccp.h
deleted file mode 100644
index cdbc4f5b8390..000000000000
--- a/include/net/netns/dccp.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __NETNS_DCCP_H__
-#define __NETNS_DCCP_H__
-
-struct sock;
-
-struct netns_dccp {
-	struct sock *v4_ctl_sk;
-	struct sock *v6_ctl_sk;
-};
-
-#endif
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 2455b0c0e486..ffc601a3b329 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -23,14 +23,21 @@
 #include <net/tcp_states.h>
 #include <net/xfrm.h>
 #include <net/secure_seq.h>
+#include <net/netns/generic.h>
 
 #include "ackvec.h"
 #include "ccid.h"
 #include "dccp.h"
 #include "feat.h"
 
+struct dccp_v4_pernet {
+	struct sock *v4_ctl_sk;
+};
+
+static unsigned int dccp_v4_pernet_id __read_mostly;
+
 /*
- * The per-net dccp.v4_ctl_sk socket is used for responding to
+ * The per-net v4_ctl_sk socket is used for responding to
  * the Out-of-the-blue (OOTB) packets. A control sock will be created
  * for this socket at the initialization time.
  */
@@ -513,7 +520,8 @@ static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
 	struct sk_buff *skb;
 	struct dst_entry *dst;
 	struct net *net = dev_net(skb_dst(rxskb)->dev);
-	struct sock *ctl_sk = net->dccp.v4_ctl_sk;
+	struct dccp_v4_pernet *pn;
+	struct sock *ctl_sk;
 
 	/* Never send a reset in response to a reset. */
 	if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET)
@@ -522,6 +530,8 @@ static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
 	if (skb_rtable(rxskb)->rt_type != RTN_LOCAL)
 		return;
 
+	pn = net_generic(net, dccp_v4_pernet_id);
+	ctl_sk = pn->v4_ctl_sk;
 	dst = dccp_v4_route_skb(net, ctl_sk, rxskb);
 	if (dst == NULL)
 		return;
@@ -1005,16 +1015,20 @@ static struct inet_protosw dccp_v4_protosw = {
 
 static int __net_init dccp_v4_init_net(struct net *net)
 {
+	struct dccp_v4_pernet *pn = net_generic(net, dccp_v4_pernet_id);
+
 	if (dccp_hashinfo.bhash == NULL)
 		return -ESOCKTNOSUPPORT;
 
-	return inet_ctl_sock_create(&net->dccp.v4_ctl_sk, PF_INET,
+	return inet_ctl_sock_create(&pn->v4_ctl_sk, PF_INET,
 				    SOCK_DCCP, IPPROTO_DCCP, net);
 }
 
 static void __net_exit dccp_v4_exit_net(struct net *net)
 {
-	inet_ctl_sock_destroy(net->dccp.v4_ctl_sk);
+	struct dccp_v4_pernet *pn = net_generic(net, dccp_v4_pernet_id);
+
+	inet_ctl_sock_destroy(pn->v4_ctl_sk);
 }
 
 static void __net_exit dccp_v4_exit_batch(struct list_head *net_exit_list)
@@ -1026,6 +1040,8 @@ static struct pernet_operations dccp_v4_ops = {
 	.init	= dccp_v4_init_net,
 	.exit	= dccp_v4_exit_net,
 	.exit_batch = dccp_v4_exit_batch,
+	.id	= &dccp_v4_pernet_id,
+	.size   = sizeof(struct dccp_v4_pernet),
 };
 
 static int __init dccp_v4_init(void)
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 2be5c69824f9..6f5304db5a67 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -27,13 +27,20 @@
 #include <net/ip6_checksum.h>
 #include <net/xfrm.h>
 #include <net/secure_seq.h>
+#include <net/netns/generic.h>
 #include <net/sock.h>
 
 #include "dccp.h"
 #include "ipv6.h"
 #include "feat.h"
 
-/* The per-net dccp.v6_ctl_sk is used for sending RSTs and ACKs */
+struct dccp_v6_pernet {
+	struct sock *v6_ctl_sk;
+};
+
+static unsigned int dccp_v6_pernet_id __read_mostly;
+
+/* The per-net v6_ctl_sk is used for sending RSTs and ACKs */
 
 static const struct inet_connection_sock_af_ops dccp_ipv6_mapped;
 static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops;
@@ -254,7 +261,8 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
 	struct sk_buff *skb;
 	struct flowi6 fl6;
 	struct net *net = dev_net(skb_dst(rxskb)->dev);
-	struct sock *ctl_sk = net->dccp.v6_ctl_sk;
+	struct dccp_v6_pernet *pn;
+	struct sock *ctl_sk;
 	struct dst_entry *dst;
 
 	if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET)
@@ -263,6 +271,8 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
 	if (!ipv6_unicast_destination(rxskb))
 		return;
 
+	pn = net_generic(net, dccp_v6_pernet_id);
+	ctl_sk = pn->v6_ctl_sk;
 	skb = dccp_ctl_make_reset(ctl_sk, rxskb);
 	if (skb == NULL)
 		return;
@@ -1089,16 +1099,20 @@ static struct inet_protosw dccp_v6_protosw = {
 
 static int __net_init dccp_v6_init_net(struct net *net)
 {
+	struct dccp_v6_pernet *pn = net_generic(net, dccp_v6_pernet_id);
+
 	if (dccp_hashinfo.bhash == NULL)
 		return -ESOCKTNOSUPPORT;
 
-	return inet_ctl_sock_create(&net->dccp.v6_ctl_sk, PF_INET6,
+	return inet_ctl_sock_create(&pn->v6_ctl_sk, PF_INET6,
 				    SOCK_DCCP, IPPROTO_DCCP, net);
 }
 
 static void __net_exit dccp_v6_exit_net(struct net *net)
 {
-	inet_ctl_sock_destroy(net->dccp.v6_ctl_sk);
+	struct dccp_v6_pernet *pn = net_generic(net, dccp_v6_pernet_id);
+
+	inet_ctl_sock_destroy(pn->v6_ctl_sk);
 }
 
 static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list)
@@ -1110,6 +1124,8 @@ static struct pernet_operations dccp_v6_ops = {
 	.init   = dccp_v6_init_net,
 	.exit   = dccp_v6_exit_net,
 	.exit_batch = dccp_v6_exit_batch,
+	.id	= &dccp_v6_pernet_id,
+	.size   = sizeof(struct dccp_v6_pernet),
 };
 
 static int __init dccp_v6_init(void)
-- 
cgit v1.2.3


From a7150e382267d5d2bb3e1a2a07776e97646b0952 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 9 Apr 2021 10:02:37 -0700
Subject: Revert "tcp: Reset tcp connections in SYN-SENT state"

This reverts commit e880f8b3a24a73704731a7227ed5fee14bd90192.

1) Patch has not been properly tested, and is wrong [1]
2) Patch submission did not include TCP maintainer (this is me)

[1]
divide error: 0000 [#1] PREEMPT SMP KASAN
CPU: 0 PID: 8426 Comm: syz-executor478 Not tainted 5.12.0-rc4-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:__tcp_select_window+0x56d/0xad0 net/ipv4/tcp_output.c:3015
Code: 44 89 ff e8 d5 cd f0 f9 45 39 e7 0f 8d 20 ff ff ff e8 f7 c7 f0 f9 44 89 e3 e9 13 ff ff ff e8 ea c7 f0 f9 44 89 e0 44 89 e3 99 <f7> 7c 24 04 29 d3 e9 fc fe ff ff e8 d3 c7 f0 f9 41 f7 dc bf 1f 00
RSP: 0018:ffffc9000184fac0 EFLAGS: 00010293
RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
RDX: 0000000000000000 RSI: ffffffff87832e76 RDI: 0000000000000003
RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
R10: ffffffff87832e14 R11: 0000000000000000 R12: 0000000000000000
R13: 1ffff92000309f5c R14: 0000000000000000 R15: 0000000000000000
FS:  00000000023eb300(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fc2b5f426c0 CR3: 000000001c5cf000 CR4: 00000000001506f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 tcp_select_window net/ipv4/tcp_output.c:264 [inline]
 __tcp_transmit_skb+0xa82/0x38f0 net/ipv4/tcp_output.c:1351
 tcp_transmit_skb net/ipv4/tcp_output.c:1423 [inline]
 tcp_send_active_reset+0x475/0x8e0 net/ipv4/tcp_output.c:3449
 tcp_disconnect+0x15a9/0x1e60 net/ipv4/tcp.c:2955
 inet_shutdown+0x260/0x430 net/ipv4/af_inet.c:905
 __sys_shutdown_sock net/socket.c:2189 [inline]
 __sys_shutdown_sock net/socket.c:2183 [inline]
 __sys_shutdown+0xf1/0x1b0 net/socket.c:2201
 __do_sys_shutdown net/socket.c:2209 [inline]
 __se_sys_shutdown net/socket.c:2207 [inline]
 __x64_sys_shutdown+0x50/0x70 net/socket.c:2207
 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xae

Fixes: e880f8b3a24a ("tcp: Reset tcp connections in SYN-SENT state")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Manoj Basapathi <manojbm@codeaurora.org>
Cc: Sauvik Saha <ssaha@codeaurora.org>
Link: https://lore.kernel.org/r/20210409170237.274904-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/tcp.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 627a472161fb..e14fd0c50c10 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2888,7 +2888,7 @@ static inline bool tcp_need_reset(int state)
 {
 	return (1 << state) &
 	       (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |
-		TCPF_FIN_WAIT2 | TCPF_SYN_RECV | TCPF_SYN_SENT);
+		TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
 }
 
 static void tcp_rtx_queue_purge(struct sock *sk)
@@ -2954,7 +2954,8 @@ int tcp_disconnect(struct sock *sk, int flags)
 		 */
 		tcp_send_active_reset(sk, gfp_any());
 		sk->sk_err = ECONNRESET;
-	}
+	} else if (old_state == TCP_SYN_SENT)
+		sk->sk_err = ECONNRESET;
 
 	tcp_clear_xmit_timers(sk);
 	__skb_queue_purge(&sk->sk_receive_queue);
-- 
cgit v1.2.3


From fba863b816049b03f3fbb07b10ebdcfe5c4141f7 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Wed, 7 Apr 2021 17:51:56 +0200
Subject: net: phy: make PHY PM ops a no-op if MAC driver manages PHY PM

Resume callback of the PHY driver is called after the one for the MAC
driver. The PHY driver resume callback calls phy_init_hw(), and this is
potentially problematic if the MAC driver calls phy_start() in its resume
callback. One issue was reported with the fec driver and a KSZ8081 PHY
which seems to become unstable if a soft reset is triggered during aneg.

The new flag allows MAC drivers to indicate that they take care of
suspending/resuming the PHY. Then the MAC PM callbacks can handle
any dependency between MAC and PHY PM.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/phy_device.c | 6 ++++++
 include/linux/phy.h          | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index a009d1769b08..73d29fd5e03d 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -273,6 +273,9 @@ static __maybe_unused int mdio_bus_phy_suspend(struct device *dev)
 {
 	struct phy_device *phydev = to_phy_device(dev);
 
+	if (phydev->mac_managed_pm)
+		return 0;
+
 	/* We must stop the state machine manually, otherwise it stops out of
 	 * control, possibly with the phydev->lock held. Upon resume, netdev
 	 * may call phy routines that try to grab the same lock, and that may
@@ -294,6 +297,9 @@ static __maybe_unused int mdio_bus_phy_resume(struct device *dev)
 	struct phy_device *phydev = to_phy_device(dev);
 	int ret;
 
+	if (phydev->mac_managed_pm)
+		return 0;
+
 	if (!phydev->suspended_by_mdio_bus)
 		goto no_resume;
 
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 8e2cf84b2318..98fb441dd72e 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -493,6 +493,7 @@ struct macsec_ops;
  * @loopback_enabled: Set true if this PHY has been loopbacked successfully.
  * @downshifted_rate: Set true if link speed has been downshifted.
  * @is_on_sfp_module: Set true if PHY is located on an SFP module.
+ * @mac_managed_pm: Set true if MAC driver takes of suspending/resuming PHY
  * @state: State of the PHY for management purposes
  * @dev_flags: Device-specific flags used by the PHY driver.
  * @irq: IRQ number of the PHY's interrupt (-1 if none)
@@ -567,6 +568,7 @@ struct phy_device {
 	unsigned loopback_enabled:1;
 	unsigned downshifted_rate:1;
 	unsigned is_on_sfp_module:1;
+	unsigned mac_managed_pm:1;
 
 	unsigned autoneg:1;
 	/* The most recently read link state */
-- 
cgit v1.2.3


From 557d5dc83f6831b4e54d141e9b121850406f9a60 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Wed, 7 Apr 2021 17:52:45 +0200
Subject: net: fec: use mac-managed PHY PM

Use the new mac_managed_pm flag to work around an issue with KSZ8081 PHY
that becomes unstable when a soft reset is triggered during aneg.

Reported-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Tested-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/freescale/fec_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 3db882322b2b..70aea9c274fe 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -2048,6 +2048,8 @@ static int fec_enet_mii_probe(struct net_device *ndev)
 	fep->link = 0;
 	fep->full_duplex = 0;
 
+	phy_dev->mac_managed_pm = 1;
+
 	phy_attached_info(phy_dev);
 
 	return 0;
@@ -3864,6 +3866,7 @@ static int __maybe_unused fec_resume(struct device *dev)
 		netif_device_attach(ndev);
 		netif_tx_unlock_bh(ndev);
 		napi_enable(&fep->napi);
+		phy_init_hw(ndev->phydev);
 		phy_start(ndev->phydev);
 	}
 	rtnl_unlock();
-- 
cgit v1.2.3


From 5c2280fc2ee4c6f0ee1f0d0adf6b76f1023b5e99 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Wed, 7 Apr 2021 17:53:52 +0200
Subject: r8169: use mac-managed PHY PM

Use the new mac_managed_pm flag to indicate that the driver takes care
of PHY power management.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/realtek/r8169_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 1cd5c6f6d44f..85031b4721fa 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -4660,6 +4660,7 @@ static void rtl8169_down(struct rtl8169_private *tp)
 static void rtl8169_up(struct rtl8169_private *tp)
 {
 	pci_set_master(tp->pci_dev);
+	phy_init_hw(tp->phydev);
 	phy_resume(tp->phydev);
 	rtl8169_init_phy(tp);
 	napi_enable(&tp->napi);
@@ -5085,6 +5086,8 @@ static int r8169_mdio_register(struct rtl8169_private *tp)
 		return -EUNATCH;
 	}
 
+	tp->phydev->mac_managed_pm = 1;
+
 	/* PHY will be woken up in rtl_open() */
 	phy_suspend(tp->phydev);
 
-- 
cgit v1.2.3


From 524e001b7dcaf50ade6eb115745561a46e374b06 Mon Sep 17 00:00:00 2001
From: Qiheng Lin <linqiheng@huawei.com>
Date: Fri, 9 Apr 2021 19:53:39 +0800
Subject: cxgb4: remove unneeded if-null-free check

Eliminate the following coccicheck warning:

drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c:529:3-9: WARNING:
 NULL check before some freeing functions is not needed.
drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c:533:2-8: WARNING:
 NULL check before some freeing functions is not needed.
drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c:161:2-7: WARNING:
 NULL check before some freeing functions is not needed.
drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c:327:3-9: WARNING:
 NULL check before some freeing functions is not needed.

Signed-off-by: Qiheng Lin <linqiheng@huawei.com>
Link: https://lore.kernel.org/r/20210409115339.4598-1-linqiheng@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c     | 3 +--
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c  | 3 +--
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c | 8 ++------
 3 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
index ce28820c57c9..12fcf84d67ad 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
@@ -323,8 +323,7 @@ void t4_cleanup_clip_tbl(struct adapter *adap)
 	struct clip_tbl *ctbl = adap->clipt;
 
 	if (ctbl) {
-		if (ctbl->cl_list)
-			kvfree(ctbl->cl_list);
+		kvfree(ctbl->cl_list);
 		kvfree(ctbl);
 	}
 }
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
index 77648e4ab4cc..dd66b244466d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
@@ -157,8 +157,7 @@ static int cudbg_alloc_compress_buff(struct cudbg_init *pdbg_init)
 
 static void cudbg_free_compress_buff(struct cudbg_init *pdbg_init)
 {
-	if (pdbg_init->compress_buff)
-		vfree(pdbg_init->compress_buff);
+	vfree(pdbg_init->compress_buff);
 }
 
 int cxgb4_cudbg_collect(struct adapter *adap, void *buf, u32 *buf_size,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
index dede02505ceb..a5d2f84dcdd5 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
@@ -524,13 +524,9 @@ struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap)
 out_no_mem:
 	for (i = 0; i < t->size; i++) {
 		struct cxgb4_link *link = &t->table[i];
-
-		if (link->tid_map)
-			kvfree(link->tid_map);
+		kvfree(link->tid_map);
 	}
-
-	if (t)
-		kvfree(t);
+	kvfree(t);
 
 	return NULL;
 }
-- 
cgit v1.2.3


From 626b598aa8becc95ad1c56e721c90ddfc6a8c409 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 9 Apr 2021 15:24:28 +0300
Subject: net: enetc: fix array underflow in error handling code

This loop will try to unmap enetc_unmap_tx_buff[-1] and crash.

Fixes: 9d2b68cc108d ("net: enetc: add support for XDP_REDIRECT")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://lore.kernel.org/r/YHBHfCY/yv3EnM9z@mwanda
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 57049ae97201..d86395775ed0 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -895,7 +895,7 @@ static int enetc_xdp_frame_to_xdp_tx_swbd(struct enetc_bdr *tx_ring,
 		dma = dma_map_single(tx_ring->dev, data, len, DMA_TO_DEVICE);
 		if (unlikely(dma_mapping_error(tx_ring->dev, dma))) {
 			/* Undo the DMA mapping for all fragments */
-			while (n-- >= 0)
+			while (--n >= 0)
 				enetc_unmap_tx_buff(tx_ring, &xdp_tx_arr[n]);
 
 			netdev_err(tx_ring->ndev, "DMA map error\n");
-- 
cgit v1.2.3


From a93580a02dbf9e67c4f801e2ff1b32ec4c748516 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 9 Apr 2021 22:27:59 +0300
Subject: net: enetc: fix TX ring interrupt storm

The blamed commit introduced a bit in the TX software buffer descriptor
structure for determining whether a BD is final or not; we rearm the TX
interrupt vector for every frame (hence final BD) transmitted.

But there is a problem with the patch: it replaced a condition whose
expression is a bool which was evaluated at the beginning of the "while"
loop with a bool expression that is evaluated on the spot: tx_swbd->is_eof.

The problem with the latter expression is that the tx_swbd has already
been incremented at that stage, so the tx_swbd->is_eof check is in fact
with the _next_ software BD. Which is _not_ final.

The effect is that the CPU is in 100% load with ksoftirqd because it
does not acknowledge the TX interrupt, so the handler keeps getting
called again and again.

The fix is to restore the code structure, and keep the local bool is_eof
variable, just to assign it the tx_swbd->is_eof value instead of
!!tx_swbd->skb.

Fixes: d504498d2eb3 ("net: enetc: add a dedicated is_eof bit in the TX software BD")
Reported-by: Alex Marginean <alexandru.marginean@nxp.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Link: https://lore.kernel.org/r/20210409192759.3895104-1-olteanv@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index d86395775ed0..182d808451bd 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -406,6 +406,7 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
 	while (bds_to_clean && tx_frm_cnt < ENETC_DEFAULT_TX_WORK) {
 		struct xdp_frame *xdp_frame = enetc_tx_swbd_get_xdp_frame(tx_swbd);
 		struct sk_buff *skb = enetc_tx_swbd_get_skb(tx_swbd);
+		bool is_eof = tx_swbd->is_eof;
 
 		if (unlikely(tx_swbd->check_wb)) {
 			struct enetc_ndev_priv *priv = netdev_priv(ndev);
@@ -453,7 +454,7 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
 		}
 
 		/* BD iteration loop end */
-		if (tx_swbd->is_eof) {
+		if (is_eof) {
 			tx_frm_cnt++;
 			/* re-arm interrupt source */
 			enetc_wr_reg_hot(tx_ring->idr, BIT(tx_ring->index) |
-- 
cgit v1.2.3


From 6c5e6b4ccc1bb9ac56579a9aed25d517d2318be6 Mon Sep 17 00:00:00 2001
From: Claudiu Manoil <claudiu.manoil@nxp.com>
Date: Fri, 9 Apr 2021 10:16:13 +0300
Subject: enetc: Use generic rule to map Tx rings to interrupt vectors

Even if the current mapping is correct for the 1 CPU and 2 CPU cases
(currently enetc is included in SoCs with up to 2 CPUs only), better
use a generic rule for the mapping to cover all possible cases.
The number of CPUs is the same as the number of interrupt vectors:

Per device Tx rings -
device_tx_ring[idx], where idx = 0..n_rings_total-1

Per interrupt vector Tx rings -
int_vector[i].ring[j], where i = 0..n_int_vects-1
			     j = 0..n_rings_per_v-1

Mapping rule -
n_rings_per_v = n_rings_total / n_int_vects
for i = 0..n_int_vects - 1:
	for j = 0..n_rings_per_v - 1:
		idx = n_int_vects * j + i
		int_vector[i].ring[j] <- device_tx_ring[idx]

Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Tested-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://lore.kernel.org/r/20210409071613.28912-1-claudiu.manoil@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 182d808451bd..41bfc6e623bf 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -2344,11 +2344,7 @@ int enetc_alloc_msix(struct enetc_ndev_priv *priv)
 			int idx;
 
 			/* default tx ring mapping policy */
-			if (priv->bdr_int_num == ENETC_MAX_BDR_INT)
-				idx = 2 * j + i; /* 2 CPUs */
-			else
-				idx = j + i * v_tx_rings; /* default */
-
+			idx = priv->bdr_int_num * j + i;
 			__set_bit(idx, &v->tx_rings_map);
 			bdr = &v->tx_ring[j];
 			bdr->index = idx;
-- 
cgit v1.2.3


From 7ad3bd52cbcb7d263d320a9dca5cfe409734f62e Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 9 Apr 2021 13:07:16 -0500
Subject: net: ipa: relax pool entry size requirement

I no longer know why a validation check ensured the size of an entry
passed to gsi_trans_pool_init() was restricted to be a multiple of 8.
For 32-bit builds, this condition doesn't always hold, and for DMA
pools, the size is rounded up to a power of 2 anyway.

Remove this restriction.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ipa/gsi_trans.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ipa/gsi_trans.c b/drivers/net/ipa/gsi_trans.c
index 70c2b585f98d..8c795a6a8598 100644
--- a/drivers/net/ipa/gsi_trans.c
+++ b/drivers/net/ipa/gsi_trans.c
@@ -91,7 +91,7 @@ int gsi_trans_pool_init(struct gsi_trans_pool *pool, size_t size, u32 count,
 	void *virt;
 
 #ifdef IPA_VALIDATE
-	if (!size || size % 8)
+	if (!size)
 		return -EINVAL;
 	if (count < max_alloc)
 		return -EINVAL;
@@ -141,7 +141,7 @@ int gsi_trans_pool_init_dma(struct device *dev, struct gsi_trans_pool *pool,
 	void *virt;
 
 #ifdef IPA_VALIDATE
-	if (!size || size % 8)
+	if (!size)
 		return -EINVAL;
 	if (count < max_alloc)
 		return -EINVAL;
-- 
cgit v1.2.3


From 49e76a418981293bcd8cb1e32df5b0e00e34d8ff Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 9 Apr 2021 13:07:17 -0500
Subject: net: ipa: update sequence type for modem TX endpoint

On IPA v3.5.1, the sequencer type for the modem TX endpoint does not
define the replication portion in the same way the downstream code
does.  This difference doesn't affect the behavior of the upstream
code, but I'd prefer the two code bases use the same configuration
value here.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ipa/ipa_data-v3.5.1.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ipa/ipa_data-v3.5.1.c b/drivers/net/ipa/ipa_data-v3.5.1.c
index 57703e95a3f9..ead1a82f32f5 100644
--- a/drivers/net/ipa/ipa_data-v3.5.1.c
+++ b/drivers/net/ipa/ipa_data-v3.5.1.c
@@ -116,6 +116,7 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 				.status_enable	= true,
 				.tx = {
 					.seq_type = IPA_SEQ_2_PASS_SKIP_LAST_UC,
+					.seq_rep_type = IPA_SEQ_REP_DMA_PARSER,
 					.status_endpoint =
 						IPA_ENDPOINT_MODEM_AP_RX,
 				},
-- 
cgit v1.2.3


From 57f63faf05625db10a49743287d3cbcb820d2afd Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 9 Apr 2021 13:07:18 -0500
Subject: net: ipa: only set endpoint netdev pointer when in use

In ipa_modem_start(), we set endpoint netdev pointers before the
network device is registered.  If registration fails, we don't undo
those assignments.  Instead, wait to assign the netdev pointer until
after registration succeeds.

Set these endpoint netdev pointers to NULL in ipa_modem_stop()
before unregistering the network device.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ipa/ipa_modem.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ipa/ipa_modem.c b/drivers/net/ipa/ipa_modem.c
index 9b08eb823984..8a6ccebde288 100644
--- a/drivers/net/ipa/ipa_modem.c
+++ b/drivers/net/ipa/ipa_modem.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 /* Copyright (c) 2014-2018, The Linux Foundation. All rights reserved.
- * Copyright (C) 2018-2020 Linaro Ltd.
+ * Copyright (C) 2018-2021 Linaro Ltd.
  */
 
 #include <linux/errno.h>
@@ -213,18 +213,18 @@ int ipa_modem_start(struct ipa *ipa)
 		goto out_set_state;
 	}
 
-	ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = netdev;
-	ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = netdev;
-
 	SET_NETDEV_DEV(netdev, &ipa->pdev->dev);
 	priv = netdev_priv(netdev);
 	priv->ipa = ipa;
 
 	ret = register_netdev(netdev);
-	if (ret)
-		free_netdev(netdev);
-	else
+	if (!ret) {
 		ipa->modem_netdev = netdev;
+		ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = netdev;
+		ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = netdev;
+	} else {
+		free_netdev(netdev);
+	}
 
 out_set_state:
 	if (ret)
@@ -263,6 +263,8 @@ int ipa_modem_stop(struct ipa *ipa)
 		if (ret)
 			goto out_set_state;
 
+		ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = NULL;
+		ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = NULL;
 		ipa->modem_netdev = NULL;
 		unregister_netdev(netdev);
 		free_netdev(netdev);
-- 
cgit v1.2.3


From 077e770f2601e9786331c00aa6f8b02cfdbc7fd9 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 9 Apr 2021 13:07:19 -0500
Subject: net: ipa: ipa_stop() does not return an error

In ipa_modem_stop(), if the modem netdev pointer is non-null we call
ipa_stop().  We check for an error and if one is returned we handle
it.  But ipa_stop() never returns an error, so this extra handling
is unnecessary.  Simplify the code in ipa_modem_stop() based on the
knowledge no error handling is needed at this spot.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ipa/ipa_modem.c | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ipa/ipa_modem.c b/drivers/net/ipa/ipa_modem.c
index 8a6ccebde288..af9aedbde717 100644
--- a/drivers/net/ipa/ipa_modem.c
+++ b/drivers/net/ipa/ipa_modem.c
@@ -240,7 +240,6 @@ int ipa_modem_stop(struct ipa *ipa)
 {
 	struct net_device *netdev = ipa->modem_netdev;
 	enum ipa_modem_state state;
-	int ret;
 
 	/* Only attempt to stop the modem if it's running */
 	state = atomic_cmpxchg(&ipa->modem_state, IPA_MODEM_STATE_RUNNING,
@@ -257,29 +256,20 @@ int ipa_modem_stop(struct ipa *ipa)
 	/* Prevent the modem from triggering a call to ipa_setup() */
 	ipa_smp2p_disable(ipa);
 
+	/* Stop the queue and disable the endpoints if it's open */
 	if (netdev) {
-		/* Stop the queue and disable the endpoints if it's open */
-		ret = ipa_stop(netdev);
-		if (ret)
-			goto out_set_state;
-
+		(void)ipa_stop(netdev);
 		ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = NULL;
 		ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = NULL;
 		ipa->modem_netdev = NULL;
 		unregister_netdev(netdev);
 		free_netdev(netdev);
-	} else {
-		ret = 0;
 	}
 
-out_set_state:
-	if (ret)
-		atomic_set(&ipa->modem_state, IPA_MODEM_STATE_RUNNING);
-	else
-		atomic_set(&ipa->modem_state, IPA_MODEM_STATE_STOPPED);
+	atomic_set(&ipa->modem_state, IPA_MODEM_STATE_STOPPED);
 	smp_mb__after_atomic();
 
-	return ret;
+	return 0;
 }
 
 /* Treat a "clean" modem stop the same as a crash */
-- 
cgit v1.2.3


From 74858b63c47cfbacafb26ea9701b26ffa18acd4a Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 9 Apr 2021 13:07:20 -0500
Subject: net: ipa: get rid of empty IPA functions

There are place holder functions in the IPA code that do nothing.
For the most part these are inverse functions, for example, once the
routing or filter tables are set up there is no need to perform any
matching teardown activity at shutdown, or in the case of an error.

These can be safely removed, resulting in some code simplification.
Add comments in these spots making it explicit that there is no
inverse.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ipa/ipa_main.c     | 29 +++++++++--------------------
 drivers/net/ipa/ipa_mem.c      |  9 +++------
 drivers/net/ipa/ipa_mem.h      |  5 ++---
 drivers/net/ipa/ipa_resource.c |  8 +-------
 drivers/net/ipa/ipa_resource.h |  8 ++------
 drivers/net/ipa/ipa_table.c    | 26 +++-----------------------
 drivers/net/ipa/ipa_table.h    | 16 ++++------------
 7 files changed, 24 insertions(+), 77 deletions(-)

diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index a970d10e650e..bfed151f5d6d 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -147,13 +147,13 @@ int ipa_setup(struct ipa *ipa)
 	if (ret)
 		goto err_endpoint_teardown;
 
-	ret = ipa_mem_setup(ipa);
+	ret = ipa_mem_setup(ipa);	/* No matching teardown required */
 	if (ret)
 		goto err_command_disable;
 
-	ret = ipa_table_setup(ipa);
+	ret = ipa_table_setup(ipa);	/* No matching teardown required */
 	if (ret)
-		goto err_mem_teardown;
+		goto err_command_disable;
 
 	/* Enable the exception handling endpoint, and tell the hardware
 	 * to use it by default.
@@ -161,7 +161,7 @@ int ipa_setup(struct ipa *ipa)
 	exception_endpoint = ipa->name_map[IPA_ENDPOINT_AP_LAN_RX];
 	ret = ipa_endpoint_enable_one(exception_endpoint);
 	if (ret)
-		goto err_table_teardown;
+		goto err_command_disable;
 
 	ipa_endpoint_default_route_set(ipa, exception_endpoint->endpoint_id);
 
@@ -179,10 +179,6 @@ int ipa_setup(struct ipa *ipa)
 err_default_route_clear:
 	ipa_endpoint_default_route_clear(ipa);
 	ipa_endpoint_disable_one(exception_endpoint);
-err_table_teardown:
-	ipa_table_teardown(ipa);
-err_mem_teardown:
-	ipa_mem_teardown(ipa);
 err_command_disable:
 	ipa_endpoint_disable_one(command_endpoint);
 err_endpoint_teardown:
@@ -211,8 +207,6 @@ static void ipa_teardown(struct ipa *ipa)
 	ipa_endpoint_default_route_clear(ipa);
 	exception_endpoint = ipa->name_map[IPA_ENDPOINT_AP_LAN_RX];
 	ipa_endpoint_disable_one(exception_endpoint);
-	ipa_table_teardown(ipa);
-	ipa_mem_teardown(ipa);
 	command_endpoint = ipa->name_map[IPA_ENDPOINT_AP_COMMAND_TX];
 	ipa_endpoint_disable_one(command_endpoint);
 	ipa_endpoint_teardown(ipa);
@@ -480,23 +474,20 @@ static int ipa_config(struct ipa *ipa, const struct ipa_data *data)
 	if (ret)
 		goto err_endpoint_deconfig;
 
-	ipa_table_config(ipa);
+	ipa_table_config(ipa);		/* No deconfig required */
 
-	/* Assign resource limitation to each group */
+	/* Assign resource limitation to each group; no deconfig required */
 	ret = ipa_resource_config(ipa, data->resource_data);
 	if (ret)
-		goto err_table_deconfig;
+		goto err_mem_deconfig;
 
 	ret = ipa_modem_config(ipa);
 	if (ret)
-		goto err_resource_deconfig;
+		goto err_mem_deconfig;
 
 	return 0;
 
-err_resource_deconfig:
-	ipa_resource_deconfig(ipa);
-err_table_deconfig:
-	ipa_table_deconfig(ipa);
+err_mem_deconfig:
 	ipa_mem_deconfig(ipa);
 err_endpoint_deconfig:
 	ipa_endpoint_deconfig(ipa);
@@ -514,8 +505,6 @@ err_hardware_deconfig:
 static void ipa_deconfig(struct ipa *ipa)
 {
 	ipa_modem_deconfig(ipa);
-	ipa_resource_deconfig(ipa);
-	ipa_table_deconfig(ipa);
 	ipa_mem_deconfig(ipa);
 	ipa_endpoint_deconfig(ipa);
 	ipa_hardware_deconfig(ipa);
diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index 32907dde5dc6..c5c3b1b7e67d 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 /* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved.
- * Copyright (C) 2019-2020 Linaro Ltd.
+ * Copyright (C) 2019-2021 Linaro Ltd.
  */
 
 #include <linux/types.h>
@@ -53,6 +53,8 @@ ipa_mem_zero_region_add(struct gsi_trans *trans, const struct ipa_mem *mem)
  * The AP informs the modem where its portions of memory are located
  * in a QMI exchange that occurs at modem startup.
  *
+ * There is no need for a matching ipa_mem_teardown() function.
+ *
  * Return:	0 if successful, or a negative error code
  */
 int ipa_mem_setup(struct ipa *ipa)
@@ -97,11 +99,6 @@ int ipa_mem_setup(struct ipa *ipa)
 	return 0;
 }
 
-void ipa_mem_teardown(struct ipa *ipa)
-{
-	/* Nothing to do */
-}
-
 #ifdef IPA_VALIDATE
 
 static bool ipa_mem_valid(struct ipa *ipa, enum ipa_mem_id mem_id)
diff --git a/drivers/net/ipa/ipa_mem.h b/drivers/net/ipa/ipa_mem.h
index df61ef48df36..9ca8a47bd4af 100644
--- a/drivers/net/ipa/ipa_mem.h
+++ b/drivers/net/ipa/ipa_mem.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 
 /* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved.
- * Copyright (C) 2019-2020 Linaro Ltd.
+ * Copyright (C) 2019-2021 Linaro Ltd.
  */
 #ifndef _IPA_MEM_H_
 #define _IPA_MEM_H_
@@ -88,8 +88,7 @@ struct ipa_mem {
 int ipa_mem_config(struct ipa *ipa);
 void ipa_mem_deconfig(struct ipa *ipa);
 
-int ipa_mem_setup(struct ipa *ipa);
-void ipa_mem_teardown(struct ipa *ipa);
+int ipa_mem_setup(struct ipa *ipa);	/* No ipa_mem_teardown() needed */
 
 int ipa_mem_zero_modem(struct ipa *ipa);
 
diff --git a/drivers/net/ipa/ipa_resource.c b/drivers/net/ipa/ipa_resource.c
index 85f922d6f222..3b2dc216d3a6 100644
--- a/drivers/net/ipa/ipa_resource.c
+++ b/drivers/net/ipa/ipa_resource.c
@@ -158,7 +158,7 @@ static void ipa_resource_config_dst(struct ipa *ipa, u32 resource_type,
 	ipa_resource_config_common(ipa, offset, &resource->limits[6], ylimits);
 }
 
-/* Configure resources */
+/* Configure resources; there is no ipa_resource_deconfig() */
 int ipa_resource_config(struct ipa *ipa, const struct ipa_resource_data *data)
 {
 	u32 i;
@@ -174,9 +174,3 @@ int ipa_resource_config(struct ipa *ipa, const struct ipa_resource_data *data)
 
 	return 0;
 }
-
-/* Inverse of ipa_resource_config() */
-void ipa_resource_deconfig(struct ipa *ipa)
-{
-	/* Nothing to do */
-}
diff --git a/drivers/net/ipa/ipa_resource.h b/drivers/net/ipa/ipa_resource.h
index 9f74036fb95c..ef5818bff180 100644
--- a/drivers/net/ipa/ipa_resource.h
+++ b/drivers/net/ipa/ipa_resource.h
@@ -14,14 +14,10 @@ struct ipa_resource_data;
  * @ipa:	IPA pointer
  * @data:	IPA resource configuration data
  *
+ * There is no need for a matching ipa_resource_deconfig() function.
+ *
  * Return:	true if all regions are valid, false otherwise
  */
 int ipa_resource_config(struct ipa *ipa, const struct ipa_resource_data *data);
 
-/**
- * ipa_resource_deconfig() - Inverse of ipa_resource_config()
- * @ipa:	IPA pointer
- */
-void ipa_resource_deconfig(struct ipa *ipa);
-
 #endif /* _IPA_RESOURCE_H_ */
diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c
index 401b568df6a3..3168d72f4245 100644
--- a/drivers/net/ipa/ipa_table.c
+++ b/drivers/net/ipa/ipa_table.c
@@ -497,11 +497,6 @@ int ipa_table_setup(struct ipa *ipa)
 	return 0;
 }
 
-void ipa_table_teardown(struct ipa *ipa)
-{
-	/* Nothing to do */	/* XXX Maybe reset the tables? */
-}
-
 /**
  * ipa_filter_tuple_zero() - Zero an endpoint's hashed filter tuple
  * @endpoint:	Endpoint whose filter hash tuple should be zeroed
@@ -525,6 +520,7 @@ static void ipa_filter_tuple_zero(struct ipa_endpoint *endpoint)
 	iowrite32(val, endpoint->ipa->reg_virt + offset);
 }
 
+/* Configure a hashed filter table; there is no ipa_filter_deconfig() */
 static void ipa_filter_config(struct ipa *ipa, bool modem)
 {
 	enum gsi_ee_id ee_id = modem ? GSI_EE_MODEM : GSI_EE_AP;
@@ -545,11 +541,6 @@ static void ipa_filter_config(struct ipa *ipa, bool modem)
 	}
 }
 
-static void ipa_filter_deconfig(struct ipa *ipa, bool modem)
-{
-	/* Nothing to do */
-}
-
 static bool ipa_route_id_modem(u32 route_id)
 {
 	return route_id >= IPA_ROUTE_MODEM_MIN &&
@@ -576,6 +567,7 @@ static void ipa_route_tuple_zero(struct ipa *ipa, u32 route_id)
 	iowrite32(val, ipa->reg_virt + offset);
 }
 
+/* Configure a hashed route table; there is no ipa_route_deconfig() */
 static void ipa_route_config(struct ipa *ipa, bool modem)
 {
 	u32 route_id;
@@ -588,11 +580,7 @@ static void ipa_route_config(struct ipa *ipa, bool modem)
 			ipa_route_tuple_zero(ipa, route_id);
 }
 
-static void ipa_route_deconfig(struct ipa *ipa, bool modem)
-{
-	/* Nothing to do */
-}
-
+/* Configure a filter and route tables; there is no ipa_table_deconfig() */
 void ipa_table_config(struct ipa *ipa)
 {
 	ipa_filter_config(ipa, false);
@@ -601,14 +589,6 @@ void ipa_table_config(struct ipa *ipa)
 	ipa_route_config(ipa, true);
 }
 
-void ipa_table_deconfig(struct ipa *ipa)
-{
-	ipa_route_deconfig(ipa, true);
-	ipa_route_deconfig(ipa, false);
-	ipa_filter_deconfig(ipa, true);
-	ipa_filter_deconfig(ipa, false);
-}
-
 /*
  * Initialize a coherent DMA allocation containing initialized filter and
  * route table data.  This is used when initializing or resetting the IPA
diff --git a/drivers/net/ipa/ipa_table.h b/drivers/net/ipa/ipa_table.h
index 018045b95aad..1e2be9fce2f8 100644
--- a/drivers/net/ipa/ipa_table.h
+++ b/drivers/net/ipa/ipa_table.h
@@ -74,27 +74,19 @@ int ipa_table_hash_flush(struct ipa *ipa);
 /**
  * ipa_table_setup() - Set up filter and route tables
  * @ipa:	IPA pointer
+ *
+ * There is no need for a matching ipa_table_teardown() function.
  */
 int ipa_table_setup(struct ipa *ipa);
 
-/**
- * ipa_table_teardown() - Inverse of ipa_table_setup()
- * @ipa:	IPA pointer
- */
-void ipa_table_teardown(struct ipa *ipa);
-
 /**
  * ipa_table_config() - Configure filter and route tables
  * @ipa:	IPA pointer
+ *
+ * There is no need for a matching ipa_table_deconfig() function.
  */
 void ipa_table_config(struct ipa *ipa);
 
-/**
- * ipa_table_deconfig() - Inverse of ipa_table_config()
- * @ipa:	IPA pointer
- */
-void ipa_table_deconfig(struct ipa *ipa);
-
 /**
  * ipa_table_init() - Do early initialization of filter and route tables
  * @ipa:	IPA pointer
-- 
cgit v1.2.3


From 57ab8ca42fa07fdbd02828a0b90de704605f70cd Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 9 Apr 2021 13:07:21 -0500
Subject: net: ipa: get rid of empty GSI functions

There are place holder functions in the GSI code that do nothing.
Remove these, knowing we can add something back in their place if
they're really needed someday.

Some of these are inverse functions (such as teardown to match setup).
Explicitly comment that there is no inverse in these cases.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ipa/gsi.c | 54 ++++++---------------------------------------------
 1 file changed, 6 insertions(+), 48 deletions(-)

diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
index 1c835b3e1a43..9f06663cef26 100644
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -198,7 +198,7 @@ static void gsi_irq_type_disable(struct gsi *gsi, enum gsi_irq_type_id type_id)
 	gsi_irq_type_update(gsi, gsi->type_enabled_bitmap & ~BIT(type_id));
 }
 
-/* Turn off all GSI interrupts initially */
+/* Turn off all GSI interrupts initially; there is no gsi_irq_teardown() */
 static void gsi_irq_setup(struct gsi *gsi)
 {
 	/* Disable all interrupt types */
@@ -217,12 +217,6 @@ static void gsi_irq_setup(struct gsi *gsi)
 	iowrite32(0, gsi->virt + GSI_CNTXT_GSI_IRQ_EN_OFFSET);
 }
 
-/* Turn off all GSI interrupts when we're all done */
-static void gsi_irq_teardown(struct gsi *gsi)
-{
-	/* Nothing to do */
-}
-
 /* Event ring commands are performed one at a time.  Their completion
  * is signaled by the event ring control GSI interrupt type, which is
  * only enabled when we issue an event ring command.  Only the event
@@ -786,7 +780,7 @@ static void gsi_channel_trans_quiesce(struct gsi_channel *channel)
 	}
 }
 
-/* Program a channel for use */
+/* Program a channel for use; there is no gsi_channel_deprogram() */
 static void gsi_channel_program(struct gsi_channel *channel, bool doorbell)
 {
 	size_t size = channel->tre_ring.count * GSI_RING_ELEMENT_SIZE;
@@ -874,11 +868,6 @@ static void gsi_channel_program(struct gsi_channel *channel, bool doorbell)
 	/* All done! */
 }
 
-static void gsi_channel_deprogram(struct gsi_channel *channel)
-{
-	/* Nothing to do */
-}
-
 static int __gsi_channel_start(struct gsi_channel *channel, bool start)
 {
 	struct gsi *gsi = channel->gsi;
@@ -1623,18 +1612,6 @@ static u32 gsi_event_bitmap_init(u32 evt_ring_max)
 	return event_bitmap;
 }
 
-/* Setup function for event rings */
-static void gsi_evt_ring_setup(struct gsi *gsi)
-{
-	/* Nothing to do */
-}
-
-/* Inverse of gsi_evt_ring_setup() */
-static void gsi_evt_ring_teardown(struct gsi *gsi)
-{
-	/* Nothing to do */
-}
-
 /* Setup function for a single channel */
 static int gsi_channel_setup_one(struct gsi *gsi, u32 channel_id)
 {
@@ -1684,7 +1661,6 @@ static void gsi_channel_teardown_one(struct gsi *gsi, u32 channel_id)
 
 	netif_napi_del(&channel->napi);
 
-	gsi_channel_deprogram(channel);
 	gsi_channel_de_alloc_command(gsi, channel_id);
 	gsi_evt_ring_reset_command(gsi, evt_ring_id);
 	gsi_evt_ring_de_alloc_command(gsi, evt_ring_id);
@@ -1759,7 +1735,6 @@ static int gsi_channel_setup(struct gsi *gsi)
 	u32 mask;
 	int ret;
 
-	gsi_evt_ring_setup(gsi);
 	gsi_irq_enable(gsi);
 
 	mutex_lock(&gsi->mutex);
@@ -1819,7 +1794,6 @@ err_unwind:
 	mutex_unlock(&gsi->mutex);
 
 	gsi_irq_disable(gsi);
-	gsi_evt_ring_teardown(gsi);
 
 	return ret;
 }
@@ -1848,7 +1822,6 @@ static void gsi_channel_teardown(struct gsi *gsi)
 	mutex_unlock(&gsi->mutex);
 
 	gsi_irq_disable(gsi);
-	gsi_evt_ring_teardown(gsi);
 }
 
 /* Setup function for GSI.  GSI firmware must be loaded and initialized */
@@ -1856,7 +1829,6 @@ int gsi_setup(struct gsi *gsi)
 {
 	struct device *dev = gsi->dev;
 	u32 val;
-	int ret;
 
 	/* Here is where we first touch the GSI hardware */
 	val = ioread32(gsi->virt + GSI_GSI_STATUS_OFFSET);
@@ -1865,7 +1837,7 @@ int gsi_setup(struct gsi *gsi)
 		return -EIO;
 	}
 
-	gsi_irq_setup(gsi);
+	gsi_irq_setup(gsi);		/* No matching teardown required */
 
 	val = ioread32(gsi->virt + GSI_GSI_HW_PARAM_2_OFFSET);
 
@@ -1899,18 +1871,13 @@ int gsi_setup(struct gsi *gsi)
 	/* Writing 1 indicates IRQ interrupts; 0 would be MSI */
 	iowrite32(1, gsi->virt + GSI_CNTXT_INTSET_OFFSET);
 
-	ret = gsi_channel_setup(gsi);
-	if (ret)
-		gsi_irq_teardown(gsi);
-
-	return ret;
+	return gsi_channel_setup(gsi);
 }
 
 /* Inverse of gsi_setup() */
 void gsi_teardown(struct gsi *gsi)
 {
 	gsi_channel_teardown(gsi);
-	gsi_irq_teardown(gsi);
 }
 
 /* Initialize a channel's event ring */
@@ -1952,7 +1919,7 @@ static void gsi_channel_evt_ring_exit(struct gsi_channel *channel)
 	gsi_evt_ring_id_free(gsi, evt_ring_id);
 }
 
-/* Init function for event rings */
+/* Init function for event rings; there is no gsi_evt_ring_exit() */
 static void gsi_evt_ring_init(struct gsi *gsi)
 {
 	u32 evt_ring_id = 0;
@@ -1964,12 +1931,6 @@ static void gsi_evt_ring_init(struct gsi *gsi)
 	while (++evt_ring_id < GSI_EVT_RING_COUNT_MAX);
 }
 
-/* Inverse of gsi_evt_ring_init() */
-static void gsi_evt_ring_exit(struct gsi *gsi)
-{
-	/* Nothing to do */
-}
-
 static bool gsi_channel_data_valid(struct gsi *gsi,
 				   const struct ipa_gsi_endpoint_data *data)
 {
@@ -2114,7 +2075,7 @@ static int gsi_channel_init(struct gsi *gsi, u32 count,
 	/* IPA v4.2 requires the AP to allocate channels for the modem */
 	modem_alloc = gsi->version == IPA_VERSION_4_2;
 
-	gsi_evt_ring_init(gsi);
+	gsi_evt_ring_init(gsi);			/* No matching exit required */
 
 	/* The endpoint data array is indexed by endpoint name */
 	for (i = 0; i < count; i++) {
@@ -2148,7 +2109,6 @@ err_unwind:
 		}
 		gsi_channel_exit_one(&gsi->channel[data->channel_id]);
 	}
-	gsi_evt_ring_exit(gsi);
 
 	return ret;
 }
@@ -2162,8 +2122,6 @@ static void gsi_channel_exit(struct gsi *gsi)
 		gsi_channel_exit_one(&gsi->channel[channel_id]);
 	while (channel_id--);
 	gsi->modem_channel_bitmap = 0;
-
-	gsi_evt_ring_exit(gsi);
 }
 
 /* Init function for GSI.  GSI hardware does not need to be "ready" */
-- 
cgit v1.2.3


From 602a1c76f84720c56ad4f6a56a330954cab87d05 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 9 Apr 2021 13:07:22 -0500
Subject: net: ipa: three small fixes

Some time ago changes were made to stop referring to clearing the
hardware pipeline as a "tag process."  Fix a comment to use the
newer terminology.

Get rid of a pointless double-negation of the Boolean toward_ipa
flag in ipa_endpoint_config().

make ipa_endpoint_exit_one() private; it's only referenced inside
"ipa_endpoint.c".

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ipa/ipa_endpoint.c | 6 +++---
 drivers/net/ipa/ipa_endpoint.h | 2 --
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index dd24179383c1..72751843b2e4 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -397,7 +397,7 @@ int ipa_endpoint_modem_exception_reset_all(struct ipa *ipa)
 	/* We need one command per modem TX endpoint.  We can get an upper
 	 * bound on that by assuming all initialized endpoints are modem->IPA.
 	 * That won't happen, and we could be more precise, but this is fine
-	 * for now.  We need to end the transaction with a "tag process."
+	 * for now.  End the transaction with commands to clear the pipeline.
 	 */
 	count = hweight32(initialized) + ipa_cmd_pipeline_clear_count();
 	trans = ipa_cmd_trans_alloc(ipa, count);
@@ -1755,7 +1755,7 @@ int ipa_endpoint_config(struct ipa *ipa)
 
 		/* Make sure it's pointing in the right direction */
 		endpoint = &ipa->endpoint[endpoint_id];
-		if ((endpoint_id < rx_base) != !!endpoint->toward_ipa) {
+		if ((endpoint_id < rx_base) != endpoint->toward_ipa) {
 			dev_err(dev, "endpoint id %u wrong direction\n",
 				endpoint_id);
 			ret = -EINVAL;
@@ -1791,7 +1791,7 @@ static void ipa_endpoint_init_one(struct ipa *ipa, enum ipa_endpoint_name name,
 	ipa->initialized |= BIT(endpoint->endpoint_id);
 }
 
-void ipa_endpoint_exit_one(struct ipa_endpoint *endpoint)
+static void ipa_endpoint_exit_one(struct ipa_endpoint *endpoint)
 {
 	endpoint->ipa->initialized &= ~BIT(endpoint->endpoint_id);
 
diff --git a/drivers/net/ipa/ipa_endpoint.h b/drivers/net/ipa/ipa_endpoint.h
index f034a9e6ef21..0a859d10312d 100644
--- a/drivers/net/ipa/ipa_endpoint.h
+++ b/drivers/net/ipa/ipa_endpoint.h
@@ -87,8 +87,6 @@ int ipa_endpoint_modem_exception_reset_all(struct ipa *ipa);
 
 int ipa_endpoint_skb_tx(struct ipa_endpoint *endpoint, struct sk_buff *skb);
 
-void ipa_endpoint_exit_one(struct ipa_endpoint *endpoint);
-
 int ipa_endpoint_enable_one(struct ipa_endpoint *endpoint);
 void ipa_endpoint_disable_one(struct ipa_endpoint *endpoint);
 
-- 
cgit v1.2.3


From 7b33ec8b93c9fa401a8fcd92ebde3129a1beb844 Mon Sep 17 00:00:00 2001
From: Zong-Zhe Yang <kevin_yang@realtek.com>
Date: Fri, 19 Mar 2021 13:42:12 +0800
Subject: rtw88: add flush hci support

Though mac queue flushing has been supported, sometimes data may be waiting
on interface from host to chip. If it occurs, there may still be data that
flows into mac just after we do flush. To avoid that, we add the hci part
of flushing.

Signed-off-by: Zong-Zhe Yang <kevin_yang@realtek.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210319054218.3319-2-pkshih@realtek.com
---
 drivers/net/wireless/realtek/rtw88/hci.h      | 16 +++++++
 drivers/net/wireless/realtek/rtw88/mac80211.c |  2 +
 drivers/net/wireless/realtek/rtw88/pci.c      | 69 +++++++++++++++++++++++++++
 3 files changed, 87 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtw88/hci.h b/drivers/net/wireless/realtek/rtw88/hci.h
index 2cba327e6218..4c6fc6fb3f83 100644
--- a/drivers/net/wireless/realtek/rtw88/hci.h
+++ b/drivers/net/wireless/realtek/rtw88/hci.h
@@ -11,6 +11,7 @@ struct rtw_hci_ops {
 			struct rtw_tx_pkt_info *pkt_info,
 			struct sk_buff *skb);
 	void (*tx_kick_off)(struct rtw_dev *rtwdev);
+	void (*flush_queues)(struct rtw_dev *rtwdev, u32 queues, bool drop);
 	int (*setup)(struct rtw_dev *rtwdev);
 	int (*start)(struct rtw_dev *rtwdev);
 	void (*stop)(struct rtw_dev *rtwdev);
@@ -258,4 +259,19 @@ static inline enum rtw_hci_type rtw_hci_type(struct rtw_dev *rtwdev)
 	return rtwdev->hci.type;
 }
 
+static inline void rtw_hci_flush_queues(struct rtw_dev *rtwdev, u32 queues,
+					bool drop)
+{
+	if (rtwdev->hci.ops->flush_queues)
+		rtwdev->hci.ops->flush_queues(rtwdev, queues, drop);
+}
+
+static inline void rtw_hci_flush_all_queues(struct rtw_dev *rtwdev, bool drop)
+{
+	if (rtwdev->hci.ops->flush_queues)
+		rtwdev->hci.ops->flush_queues(rtwdev,
+					      BIT(rtwdev->hw->queues) - 1,
+					      drop);
+}
+
 #endif
diff --git a/drivers/net/wireless/realtek/rtw88/mac80211.c b/drivers/net/wireless/realtek/rtw88/mac80211.c
index 2351dfb0d2e2..333df6b38113 100644
--- a/drivers/net/wireless/realtek/rtw88/mac80211.c
+++ b/drivers/net/wireless/realtek/rtw88/mac80211.c
@@ -520,6 +520,7 @@ static int rtw_ops_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 				  hw_key_type, hw_key_idx);
 		break;
 	case DISABLE_KEY:
+		rtw_hci_flush_all_queues(rtwdev, false);
 		rtw_mac_flush_all_queues(rtwdev, false);
 		rtw_sec_clear_cam(rtwdev, sec, key->hw_key_idx);
 		break;
@@ -670,6 +671,7 @@ static void rtw_ops_flush(struct ieee80211_hw *hw,
 	mutex_lock(&rtwdev->mutex);
 	rtw_leave_lps_deep(rtwdev);
 
+	rtw_hci_flush_queues(rtwdev, queues, drop);
 	rtw_mac_flush_queues(rtwdev, queues, drop);
 	mutex_unlock(&rtwdev->mutex);
 }
diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c
index 786a48649946..b8115b31839e 100644
--- a/drivers/net/wireless/realtek/rtw88/pci.c
+++ b/drivers/net/wireless/realtek/rtw88/pci.c
@@ -671,6 +671,8 @@ static u8 ac_to_hwq[] = {
 	[IEEE80211_AC_BK] = RTW_TX_QUEUE_BK,
 };
 
+static_assert(ARRAY_SIZE(ac_to_hwq) == IEEE80211_NUM_ACS);
+
 static u8 rtw_hw_queue_mapping(struct sk_buff *skb)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
@@ -727,6 +729,72 @@ static void rtw_pci_dma_check(struct rtw_dev *rtwdev,
 	rtwpci->rx_tag = (rtwpci->rx_tag + 1) % RX_TAG_MAX;
 }
 
+static u32 __pci_get_hw_tx_ring_rp(struct rtw_dev *rtwdev, u8 pci_q)
+{
+	u32 bd_idx_addr = rtw_pci_tx_queue_idx_addr[pci_q];
+	u32 bd_idx = rtw_read16(rtwdev, bd_idx_addr + 2);
+
+	return FIELD_GET(TRX_BD_IDX_MASK, bd_idx);
+}
+
+static void __pci_flush_queue(struct rtw_dev *rtwdev, u8 pci_q, bool drop)
+{
+	struct rtw_pci *rtwpci = (struct rtw_pci *)rtwdev->priv;
+	struct rtw_pci_tx_ring *ring = &rtwpci->tx_rings[pci_q];
+	u32 cur_rp;
+	u8 i;
+
+	/* Because the time taked by the I/O in __pci_get_hw_tx_ring_rp is a
+	 * bit dynamic, it's hard to define a reasonable fixed total timeout to
+	 * use read_poll_timeout* helper. Instead, we can ensure a reasonable
+	 * polling times, so we just use for loop with udelay here.
+	 */
+	for (i = 0; i < 30; i++) {
+		cur_rp = __pci_get_hw_tx_ring_rp(rtwdev, pci_q);
+		if (cur_rp == ring->r.wp)
+			return;
+
+		udelay(1);
+	}
+
+	if (!drop)
+		rtw_warn(rtwdev, "timed out to flush pci tx ring[%d]\n", pci_q);
+}
+
+static void __rtw_pci_flush_queues(struct rtw_dev *rtwdev, u32 pci_queues,
+				   bool drop)
+{
+	u8 q;
+
+	for (q = 0; q < RTK_MAX_TX_QUEUE_NUM; q++) {
+		/* It may be not necessary to flush BCN and H2C tx queues. */
+		if (q == RTW_TX_QUEUE_BCN || q == RTW_TX_QUEUE_H2C)
+			continue;
+
+		if (pci_queues & BIT(q))
+			__pci_flush_queue(rtwdev, q, drop);
+	}
+}
+
+static void rtw_pci_flush_queues(struct rtw_dev *rtwdev, u32 queues, bool drop)
+{
+	u32 pci_queues = 0;
+	u8 i;
+
+	/* If all of the hardware queues are requested to flush,
+	 * flush all of the pci queues.
+	 */
+	if (queues == BIT(rtwdev->hw->queues) - 1) {
+		pci_queues = BIT(RTK_MAX_TX_QUEUE_NUM) - 1;
+	} else {
+		for (i = 0; i < rtwdev->hw->queues; i++)
+			if (queues & BIT(i))
+				pci_queues |= BIT(ac_to_hwq[i]);
+	}
+
+	__rtw_pci_flush_queues(rtwdev, pci_queues, drop);
+}
+
 static void rtw_pci_tx_kick_off_queue(struct rtw_dev *rtwdev, u8 queue)
 {
 	struct rtw_pci *rtwpci = (struct rtw_pci *)rtwdev->priv;
@@ -1490,6 +1558,7 @@ static void rtw_pci_destroy(struct rtw_dev *rtwdev, struct pci_dev *pdev)
 static struct rtw_hci_ops rtw_pci_ops = {
 	.tx_write = rtw_pci_tx_write,
 	.tx_kick_off = rtw_pci_tx_kick_off,
+	.flush_queues = rtw_pci_flush_queues,
 	.setup = rtw_pci_setup,
 	.start = rtw_pci_start,
 	.stop = rtw_pci_stop,
-- 
cgit v1.2.3


From 76325506e8111ed013ff9d1f9e86bf015bda234d Mon Sep 17 00:00:00 2001
From: Zong-Zhe Yang <kevin_yang@realtek.com>
Date: Fri, 19 Mar 2021 13:42:14 +0800
Subject: rtw88: fix DIG min setting

DIG min is expected to be set according to chip. And actually we assigned
it under each chip information. However, we didn't use the setting when we
did DIG process. It is unexpected, so we fix it.

Signed-off-by: Zong-Zhe Yang <kevin_yang@realtek.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210319054218.3319-4-pkshih@realtek.com
---
 drivers/net/wireless/realtek/rtw88/phy.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/phy.c b/drivers/net/wireless/realtek/rtw88/phy.c
index e114ddecac09..f96edc049718 100644
--- a/drivers/net/wireless/realtek/rtw88/phy.c
+++ b/drivers/net/wireless/realtek/rtw88/phy.c
@@ -316,7 +316,8 @@ rtw_phy_dig_check_damping(struct rtw_dm_info *dm_info)
 	return damping;
 }
 
-static void rtw_phy_dig_get_boundary(struct rtw_dm_info *dm_info,
+static void rtw_phy_dig_get_boundary(struct rtw_dev *rtwdev,
+				     struct rtw_dm_info *dm_info,
 				     u8 *upper, u8 *lower, bool linked)
 {
 	u8 dig_max, dig_min, dig_mid;
@@ -325,8 +326,7 @@ static void rtw_phy_dig_get_boundary(struct rtw_dm_info *dm_info,
 	if (linked) {
 		dig_max = DIG_PERF_MAX;
 		dig_mid = DIG_PERF_MID;
-		/* 22B=0x1c, 22C=0x20 */
-		dig_min = 0x1c;
+		dig_min = rtwdev->chip->dig_min;
 		min_rssi = max_t(u8, dm_info->min_rssi, dig_min);
 	} else {
 		dig_max = DIG_CVRG_MAX;
@@ -437,7 +437,8 @@ static void rtw_phy_dig(struct rtw_dev *rtwdev)
 	 * the peers connected with us, meanwhile make sure the igi value does
 	 * not beyond the hardware limitation
 	 */
-	rtw_phy_dig_get_boundary(dm_info, &upper_bound, &lower_bound, linked);
+	rtw_phy_dig_get_boundary(rtwdev, dm_info, &upper_bound, &lower_bound,
+				 linked);
 	cur_igi = clamp_t(u8, cur_igi, lower_bound, upper_bound);
 
 	/* record current igi value and false alarm statistics for further
-- 
cgit v1.2.3


From a08398833ec22a245c6386a8688ccbec10f03363 Mon Sep 17 00:00:00 2001
From: Zong-Zhe Yang <kevin_yang@realtek.com>
Date: Fri, 19 Mar 2021 13:42:15 +0800
Subject: rtw88: 8822c: update tx power limit table to RF v40.1

update tx power limits to RF v40 and apply fix on FCC's channel 12 and 13,
so change the patch level to 1.

Signed-off-by: Zong-Zhe Yang <kevin_yang@realtek.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210319054218.3319-5-pkshih@realtek.com
---
 .../net/wireless/realtek/rtw88/rtw8822c_table.c    | 686 ++++++++++-----------
 1 file changed, 343 insertions(+), 343 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c_table.c b/drivers/net/wireless/realtek/rtw88/rtw8822c_table.c
index ad5715c65de3..822f3da91f1b 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c_table.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c_table.c
@@ -40863,7 +40863,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 8, 1, 0, 1, 144, 76, },
 	{ 9, 1, 0, 1, 144, 127, },
 	{ 0, 1, 0, 1, 149, 76, },
-	{ 2, 1, 0, 1, 149, -128, },
+	{ 2, 1, 0, 1, 149, 54, },
 	{ 1, 1, 0, 1, 149, 127, },
 	{ 3, 1, 0, 1, 149, 76, },
 	{ 4, 1, 0, 1, 149, 74, },
@@ -40871,9 +40871,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 1, 149, 76, },
 	{ 7, 1, 0, 1, 149, 54, },
 	{ 8, 1, 0, 1, 149, 76, },
-	{ 9, 1, 0, 1, 149, -128, },
+	{ 9, 1, 0, 1, 149, 54, },
 	{ 0, 1, 0, 1, 153, 76, },
-	{ 2, 1, 0, 1, 153, -128, },
+	{ 2, 1, 0, 1, 153, 54, },
 	{ 1, 1, 0, 1, 153, 127, },
 	{ 3, 1, 0, 1, 153, 76, },
 	{ 4, 1, 0, 1, 153, 74, },
@@ -40881,9 +40881,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 1, 153, 76, },
 	{ 7, 1, 0, 1, 153, 54, },
 	{ 8, 1, 0, 1, 153, 76, },
-	{ 9, 1, 0, 1, 153, -128, },
+	{ 9, 1, 0, 1, 153, 54, },
 	{ 0, 1, 0, 1, 157, 76, },
-	{ 2, 1, 0, 1, 157, -128, },
+	{ 2, 1, 0, 1, 157, 54, },
 	{ 1, 1, 0, 1, 157, 127, },
 	{ 3, 1, 0, 1, 157, 76, },
 	{ 4, 1, 0, 1, 157, 74, },
@@ -40891,9 +40891,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 1, 157, 76, },
 	{ 7, 1, 0, 1, 157, 54, },
 	{ 8, 1, 0, 1, 157, 76, },
-	{ 9, 1, 0, 1, 157, -128, },
+	{ 9, 1, 0, 1, 157, 54, },
 	{ 0, 1, 0, 1, 161, 76, },
-	{ 2, 1, 0, 1, 161, -128, },
+	{ 2, 1, 0, 1, 161, 54, },
 	{ 1, 1, 0, 1, 161, 127, },
 	{ 3, 1, 0, 1, 161, 76, },
 	{ 4, 1, 0, 1, 161, 74, },
@@ -40901,9 +40901,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 1, 161, 76, },
 	{ 7, 1, 0, 1, 161, 54, },
 	{ 8, 1, 0, 1, 161, 76, },
-	{ 9, 1, 0, 1, 161, -128, },
+	{ 9, 1, 0, 1, 161, 54, },
 	{ 0, 1, 0, 1, 165, 76, },
-	{ 2, 1, 0, 1, 165, -128, },
+	{ 2, 1, 0, 1, 165, 54, },
 	{ 1, 1, 0, 1, 165, 127, },
 	{ 3, 1, 0, 1, 165, 76, },
 	{ 4, 1, 0, 1, 165, 74, },
@@ -40911,7 +40911,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 1, 165, 76, },
 	{ 7, 1, 0, 1, 165, 54, },
 	{ 8, 1, 0, 1, 165, 76, },
-	{ 9, 1, 0, 1, 165, -128, },
+	{ 9, 1, 0, 1, 165, 54, },
 	{ 0, 1, 0, 2, 36, 72, },
 	{ 2, 1, 0, 2, 36, 62, },
 	{ 1, 1, 0, 2, 36, 62, },
@@ -41113,7 +41113,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 8, 1, 0, 2, 144, 76, },
 	{ 9, 1, 0, 2, 144, 127, },
 	{ 0, 1, 0, 2, 149, 76, },
-	{ 2, 1, 0, 2, 149, -128, },
+	{ 2, 1, 0, 2, 149, 54, },
 	{ 1, 1, 0, 2, 149, 127, },
 	{ 3, 1, 0, 2, 149, 76, },
 	{ 4, 1, 0, 2, 149, 74, },
@@ -41121,9 +41121,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 2, 149, 76, },
 	{ 7, 1, 0, 2, 149, 54, },
 	{ 8, 1, 0, 2, 149, 76, },
-	{ 9, 1, 0, 2, 149, -128, },
+	{ 9, 1, 0, 2, 149, 54, },
 	{ 0, 1, 0, 2, 153, 76, },
-	{ 2, 1, 0, 2, 153, -128, },
+	{ 2, 1, 0, 2, 153, 54, },
 	{ 1, 1, 0, 2, 153, 127, },
 	{ 3, 1, 0, 2, 153, 76, },
 	{ 4, 1, 0, 2, 153, 74, },
@@ -41131,9 +41131,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 2, 153, 76, },
 	{ 7, 1, 0, 2, 153, 54, },
 	{ 8, 1, 0, 2, 153, 76, },
-	{ 9, 1, 0, 2, 153, -128, },
+	{ 9, 1, 0, 2, 153, 54, },
 	{ 0, 1, 0, 2, 157, 76, },
-	{ 2, 1, 0, 2, 157, -128, },
+	{ 2, 1, 0, 2, 157, 54, },
 	{ 1, 1, 0, 2, 157, 127, },
 	{ 3, 1, 0, 2, 157, 76, },
 	{ 4, 1, 0, 2, 157, 74, },
@@ -41141,9 +41141,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 2, 157, 76, },
 	{ 7, 1, 0, 2, 157, 54, },
 	{ 8, 1, 0, 2, 157, 76, },
-	{ 9, 1, 0, 2, 157, -128, },
+	{ 9, 1, 0, 2, 157, 54, },
 	{ 0, 1, 0, 2, 161, 76, },
-	{ 2, 1, 0, 2, 161, -128, },
+	{ 2, 1, 0, 2, 161, 54, },
 	{ 1, 1, 0, 2, 161, 127, },
 	{ 3, 1, 0, 2, 161, 76, },
 	{ 4, 1, 0, 2, 161, 74, },
@@ -41151,9 +41151,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 2, 161, 76, },
 	{ 7, 1, 0, 2, 161, 54, },
 	{ 8, 1, 0, 2, 161, 76, },
-	{ 9, 1, 0, 2, 161, -128, },
+	{ 9, 1, 0, 2, 161, 54, },
 	{ 0, 1, 0, 2, 165, 76, },
-	{ 2, 1, 0, 2, 165, -128, },
+	{ 2, 1, 0, 2, 165, 54, },
 	{ 1, 1, 0, 2, 165, 127, },
 	{ 3, 1, 0, 2, 165, 76, },
 	{ 4, 1, 0, 2, 165, 74, },
@@ -41161,7 +41161,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 2, 165, 76, },
 	{ 7, 1, 0, 2, 165, 54, },
 	{ 8, 1, 0, 2, 165, 76, },
-	{ 9, 1, 0, 2, 165, -128, },
+	{ 9, 1, 0, 2, 165, 54, },
 	{ 0, 1, 0, 3, 36, 68, },
 	{ 2, 1, 0, 3, 36, 38, },
 	{ 1, 1, 0, 3, 36, 50, },
@@ -41363,7 +41363,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 8, 1, 0, 3, 144, 68, },
 	{ 9, 1, 0, 3, 144, 127, },
 	{ 0, 1, 0, 3, 149, 76, },
-	{ 2, 1, 0, 3, 149, -128, },
+	{ 2, 1, 0, 3, 149, 30, },
 	{ 1, 1, 0, 3, 149, 127, },
 	{ 3, 1, 0, 3, 149, 76, },
 	{ 4, 1, 0, 3, 149, 60, },
@@ -41371,9 +41371,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 3, 149, 76, },
 	{ 7, 1, 0, 3, 149, 30, },
 	{ 8, 1, 0, 3, 149, 72, },
-	{ 9, 1, 0, 3, 149, -128, },
+	{ 9, 1, 0, 3, 149, 30, },
 	{ 0, 1, 0, 3, 153, 76, },
-	{ 2, 1, 0, 3, 153, -128, },
+	{ 2, 1, 0, 3, 153, 30, },
 	{ 1, 1, 0, 3, 153, 127, },
 	{ 3, 1, 0, 3, 153, 76, },
 	{ 4, 1, 0, 3, 153, 60, },
@@ -41381,9 +41381,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 3, 153, 76, },
 	{ 7, 1, 0, 3, 153, 30, },
 	{ 8, 1, 0, 3, 153, 76, },
-	{ 9, 1, 0, 3, 153, -128, },
+	{ 9, 1, 0, 3, 153, 30, },
 	{ 0, 1, 0, 3, 157, 76, },
-	{ 2, 1, 0, 3, 157, -128, },
+	{ 2, 1, 0, 3, 157, 30, },
 	{ 1, 1, 0, 3, 157, 127, },
 	{ 3, 1, 0, 3, 157, 76, },
 	{ 4, 1, 0, 3, 157, 60, },
@@ -41391,9 +41391,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 3, 157, 76, },
 	{ 7, 1, 0, 3, 157, 30, },
 	{ 8, 1, 0, 3, 157, 76, },
-	{ 9, 1, 0, 3, 157, -128, },
+	{ 9, 1, 0, 3, 157, 30, },
 	{ 0, 1, 0, 3, 161, 76, },
-	{ 2, 1, 0, 3, 161, -128, },
+	{ 2, 1, 0, 3, 161, 30, },
 	{ 1, 1, 0, 3, 161, 127, },
 	{ 3, 1, 0, 3, 161, 76, },
 	{ 4, 1, 0, 3, 161, 60, },
@@ -41401,9 +41401,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 3, 161, 76, },
 	{ 7, 1, 0, 3, 161, 30, },
 	{ 8, 1, 0, 3, 161, 76, },
-	{ 9, 1, 0, 3, 161, -128, },
+	{ 9, 1, 0, 3, 161, 30, },
 	{ 0, 1, 0, 3, 165, 76, },
-	{ 2, 1, 0, 3, 165, -128, },
+	{ 2, 1, 0, 3, 165, 30, },
 	{ 1, 1, 0, 3, 165, 127, },
 	{ 3, 1, 0, 3, 165, 76, },
 	{ 4, 1, 0, 3, 165, 60, },
@@ -41411,7 +41411,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 3, 165, 76, },
 	{ 7, 1, 0, 3, 165, 30, },
 	{ 8, 1, 0, 3, 165, 76, },
-	{ 9, 1, 0, 3, 165, -128, },
+	{ 9, 1, 0, 3, 165, 30, },
 	{ 0, 1, 1, 2, 38, 66, },
 	{ 2, 1, 1, 2, 38, 64, },
 	{ 1, 1, 1, 2, 38, 62, },
@@ -41513,7 +41513,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 8, 1, 1, 2, 142, 72, },
 	{ 9, 1, 1, 2, 142, 127, },
 	{ 0, 1, 1, 2, 151, 72, },
-	{ 2, 1, 1, 2, 151, -128, },
+	{ 2, 1, 1, 2, 151, 54, },
 	{ 1, 1, 1, 2, 151, 127, },
 	{ 3, 1, 1, 2, 151, 72, },
 	{ 4, 1, 1, 2, 151, 72, },
@@ -41521,9 +41521,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 1, 2, 151, 72, },
 	{ 7, 1, 1, 2, 151, 54, },
 	{ 8, 1, 1, 2, 151, 72, },
-	{ 9, 1, 1, 2, 151, -128, },
+	{ 9, 1, 1, 2, 151, 54, },
 	{ 0, 1, 1, 2, 159, 72, },
-	{ 2, 1, 1, 2, 159, -128, },
+	{ 2, 1, 1, 2, 159, 54, },
 	{ 1, 1, 1, 2, 159, 127, },
 	{ 3, 1, 1, 2, 159, 72, },
 	{ 4, 1, 1, 2, 159, 72, },
@@ -41531,7 +41531,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 1, 2, 159, 72, },
 	{ 7, 1, 1, 2, 159, 54, },
 	{ 8, 1, 1, 2, 159, 72, },
-	{ 9, 1, 1, 2, 159, -128, },
+	{ 9, 1, 1, 2, 159, 54, },
 	{ 0, 1, 1, 3, 38, 60, },
 	{ 2, 1, 1, 3, 38, 40, },
 	{ 1, 1, 1, 3, 38, 50, },
@@ -41633,7 +41633,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 8, 1, 1, 3, 142, 68, },
 	{ 9, 1, 1, 3, 142, 127, },
 	{ 0, 1, 1, 3, 151, 72, },
-	{ 2, 1, 1, 3, 151, -128, },
+	{ 2, 1, 1, 3, 151, 30, },
 	{ 1, 1, 1, 3, 151, 127, },
 	{ 3, 1, 1, 3, 151, 72, },
 	{ 4, 1, 1, 3, 151, 66, },
@@ -41641,9 +41641,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 1, 3, 151, 72, },
 	{ 7, 1, 1, 3, 151, 30, },
 	{ 8, 1, 1, 3, 151, 68, },
-	{ 9, 1, 1, 3, 151, -128, },
+	{ 9, 1, 1, 3, 151, 30, },
 	{ 0, 1, 1, 3, 159, 72, },
-	{ 2, 1, 1, 3, 159, -128, },
+	{ 2, 1, 1, 3, 159, 30, },
 	{ 1, 1, 1, 3, 159, 127, },
 	{ 3, 1, 1, 3, 159, 72, },
 	{ 4, 1, 1, 3, 159, 66, },
@@ -41651,7 +41651,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 1, 3, 159, 72, },
 	{ 7, 1, 1, 3, 159, 30, },
 	{ 8, 1, 1, 3, 159, 72, },
-	{ 9, 1, 1, 3, 159, -128, },
+	{ 9, 1, 1, 3, 159, 30, },
 	{ 0, 1, 2, 4, 42, 64, },
 	{ 2, 1, 2, 4, 42, 64, },
 	{ 1, 1, 2, 4, 42, 64, },
@@ -41703,7 +41703,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 8, 1, 2, 4, 138, 72, },
 	{ 9, 1, 2, 4, 138, 127, },
 	{ 0, 1, 2, 4, 155, 72, },
-	{ 2, 1, 2, 4, 155, -128, },
+	{ 2, 1, 2, 4, 155, 54, },
 	{ 1, 1, 2, 4, 155, 127, },
 	{ 3, 1, 2, 4, 155, 72, },
 	{ 4, 1, 2, 4, 155, 68, },
@@ -41711,7 +41711,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 2, 4, 155, 72, },
 	{ 7, 1, 2, 4, 155, 54, },
 	{ 8, 1, 2, 4, 155, 68, },
-	{ 9, 1, 2, 4, 155, -128, },
+	{ 9, 1, 2, 4, 155, 54, },
 	{ 0, 1, 2, 5, 42, 54, },
 	{ 2, 1, 2, 5, 42, 40, },
 	{ 1, 1, 2, 5, 42, 50, },
@@ -41763,7 +41763,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 8, 1, 2, 5, 138, 66, },
 	{ 9, 1, 2, 5, 138, 127, },
 	{ 0, 1, 2, 5, 155, 62, },
-	{ 2, 1, 2, 5, 155, -128, },
+	{ 2, 1, 2, 5, 155, 30, },
 	{ 1, 1, 2, 5, 155, 127, },
 	{ 3, 1, 2, 5, 155, 62, },
 	{ 4, 1, 2, 5, 155, 58, },
@@ -41771,145 +41771,145 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 2, 5, 155, 62, },
 	{ 7, 1, 2, 5, 155, 30, },
 	{ 8, 1, 2, 5, 155, 62, },
-	{ 9, 1, 2, 5, 155, -128, },
+	{ 9, 1, 2, 5, 155, 30, },
 };
 
 RTW_DECL_TABLE_TXPWR_LMT(rtw8822c_txpwr_lmt_type0);
 
 static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 0, 0, 0, 0, 1, 72, },
-	{ 2, 0, 0, 0, 1, 60, },
-	{ 1, 0, 0, 0, 1, 68, },
+	{ 2, 0, 0, 0, 1, 56, },
+	{ 1, 0, 0, 0, 1, 72, },
 	{ 3, 0, 0, 0, 1, 72, },
 	{ 4, 0, 0, 0, 1, 76, },
-	{ 5, 0, 0, 0, 1, 60, },
+	{ 5, 0, 0, 0, 1, 56, },
 	{ 6, 0, 0, 0, 1, 72, },
 	{ 7, 0, 0, 0, 1, 60, },
 	{ 8, 0, 0, 0, 1, 72, },
 	{ 9, 0, 0, 0, 1, 60, },
 	{ 0, 0, 0, 0, 2, 72, },
-	{ 2, 0, 0, 0, 2, 60, },
-	{ 1, 0, 0, 0, 2, 68, },
+	{ 2, 0, 0, 0, 2, 56, },
+	{ 1, 0, 0, 0, 2, 72, },
 	{ 3, 0, 0, 0, 2, 72, },
 	{ 4, 0, 0, 0, 2, 76, },
-	{ 5, 0, 0, 0, 2, 60, },
+	{ 5, 0, 0, 0, 2, 56, },
 	{ 6, 0, 0, 0, 2, 72, },
 	{ 7, 0, 0, 0, 2, 60, },
 	{ 8, 0, 0, 0, 2, 72, },
 	{ 9, 0, 0, 0, 2, 60, },
 	{ 0, 0, 0, 0, 3, 76, },
-	{ 2, 0, 0, 0, 3, 60, },
-	{ 1, 0, 0, 0, 3, 68, },
+	{ 2, 0, 0, 0, 3, 56, },
+	{ 1, 0, 0, 0, 3, 72, },
 	{ 3, 0, 0, 0, 3, 76, },
 	{ 4, 0, 0, 0, 3, 76, },
-	{ 5, 0, 0, 0, 3, 60, },
+	{ 5, 0, 0, 0, 3, 56, },
 	{ 6, 0, 0, 0, 3, 76, },
 	{ 7, 0, 0, 0, 3, 60, },
 	{ 8, 0, 0, 0, 3, 76, },
 	{ 9, 0, 0, 0, 3, 60, },
 	{ 0, 0, 0, 0, 4, 76, },
-	{ 2, 0, 0, 0, 4, 60, },
-	{ 1, 0, 0, 0, 4, 68, },
+	{ 2, 0, 0, 0, 4, 56, },
+	{ 1, 0, 0, 0, 4, 72, },
 	{ 3, 0, 0, 0, 4, 76, },
 	{ 4, 0, 0, 0, 4, 76, },
-	{ 5, 0, 0, 0, 4, 60, },
+	{ 5, 0, 0, 0, 4, 56, },
 	{ 6, 0, 0, 0, 4, 76, },
 	{ 7, 0, 0, 0, 4, 60, },
 	{ 8, 0, 0, 0, 4, 76, },
 	{ 9, 0, 0, 0, 4, 60, },
 	{ 0, 0, 0, 0, 5, 76, },
-	{ 2, 0, 0, 0, 5, 60, },
-	{ 1, 0, 0, 0, 5, 68, },
+	{ 2, 0, 0, 0, 5, 56, },
+	{ 1, 0, 0, 0, 5, 72, },
 	{ 3, 0, 0, 0, 5, 76, },
 	{ 4, 0, 0, 0, 5, 76, },
-	{ 5, 0, 0, 0, 5, 60, },
+	{ 5, 0, 0, 0, 5, 56, },
 	{ 6, 0, 0, 0, 5, 76, },
 	{ 7, 0, 0, 0, 5, 60, },
 	{ 8, 0, 0, 0, 5, 76, },
 	{ 9, 0, 0, 0, 5, 60, },
 	{ 0, 0, 0, 0, 6, 76, },
-	{ 2, 0, 0, 0, 6, 60, },
-	{ 1, 0, 0, 0, 6, 68, },
+	{ 2, 0, 0, 0, 6, 56, },
+	{ 1, 0, 0, 0, 6, 72, },
 	{ 3, 0, 0, 0, 6, 76, },
 	{ 4, 0, 0, 0, 6, 76, },
-	{ 5, 0, 0, 0, 6, 60, },
+	{ 5, 0, 0, 0, 6, 56, },
 	{ 6, 0, 0, 0, 6, 76, },
 	{ 7, 0, 0, 0, 6, 60, },
 	{ 8, 0, 0, 0, 6, 76, },
 	{ 9, 0, 0, 0, 6, 60, },
 	{ 0, 0, 0, 0, 7, 76, },
-	{ 2, 0, 0, 0, 7, 60, },
-	{ 1, 0, 0, 0, 7, 68, },
+	{ 2, 0, 0, 0, 7, 56, },
+	{ 1, 0, 0, 0, 7, 72, },
 	{ 3, 0, 0, 0, 7, 76, },
 	{ 4, 0, 0, 0, 7, 76, },
-	{ 5, 0, 0, 0, 7, 60, },
+	{ 5, 0, 0, 0, 7, 56, },
 	{ 6, 0, 0, 0, 7, 76, },
 	{ 7, 0, 0, 0, 7, 60, },
 	{ 8, 0, 0, 0, 7, 76, },
 	{ 9, 0, 0, 0, 7, 60, },
 	{ 0, 0, 0, 0, 8, 76, },
-	{ 2, 0, 0, 0, 8, 60, },
-	{ 1, 0, 0, 0, 8, 68, },
+	{ 2, 0, 0, 0, 8, 56, },
+	{ 1, 0, 0, 0, 8, 72, },
 	{ 3, 0, 0, 0, 8, 76, },
 	{ 4, 0, 0, 0, 8, 76, },
-	{ 5, 0, 0, 0, 8, 60, },
+	{ 5, 0, 0, 0, 8, 56, },
 	{ 6, 0, 0, 0, 8, 76, },
 	{ 7, 0, 0, 0, 8, 60, },
 	{ 8, 0, 0, 0, 8, 76, },
 	{ 9, 0, 0, 0, 8, 60, },
 	{ 0, 0, 0, 0, 9, 76, },
-	{ 2, 0, 0, 0, 9, 60, },
-	{ 1, 0, 0, 0, 9, 68, },
+	{ 2, 0, 0, 0, 9, 56, },
+	{ 1, 0, 0, 0, 9, 72, },
 	{ 3, 0, 0, 0, 9, 76, },
 	{ 4, 0, 0, 0, 9, 76, },
-	{ 5, 0, 0, 0, 9, 60, },
+	{ 5, 0, 0, 0, 9, 56, },
 	{ 6, 0, 0, 0, 9, 76, },
 	{ 7, 0, 0, 0, 9, 60, },
 	{ 8, 0, 0, 0, 9, 76, },
 	{ 9, 0, 0, 0, 9, 60, },
 	{ 0, 0, 0, 0, 10, 72, },
-	{ 2, 0, 0, 0, 10, 60, },
-	{ 1, 0, 0, 0, 10, 68, },
+	{ 2, 0, 0, 0, 10, 56, },
+	{ 1, 0, 0, 0, 10, 72, },
 	{ 3, 0, 0, 0, 10, 72, },
 	{ 4, 0, 0, 0, 10, 76, },
-	{ 5, 0, 0, 0, 10, 60, },
+	{ 5, 0, 0, 0, 10, 56, },
 	{ 6, 0, 0, 0, 10, 72, },
 	{ 7, 0, 0, 0, 10, 60, },
 	{ 8, 0, 0, 0, 10, 72, },
 	{ 9, 0, 0, 0, 10, 60, },
 	{ 0, 0, 0, 0, 11, 72, },
-	{ 2, 0, 0, 0, 11, 60, },
-	{ 1, 0, 0, 0, 11, 68, },
+	{ 2, 0, 0, 0, 11, 56, },
+	{ 1, 0, 0, 0, 11, 72, },
 	{ 3, 0, 0, 0, 11, 72, },
 	{ 4, 0, 0, 0, 11, 76, },
-	{ 5, 0, 0, 0, 11, 60, },
+	{ 5, 0, 0, 0, 11, 56, },
 	{ 6, 0, 0, 0, 11, 72, },
 	{ 7, 0, 0, 0, 11, 60, },
 	{ 8, 0, 0, 0, 11, 72, },
 	{ 9, 0, 0, 0, 11, 60, },
 	{ 0, 0, 0, 0, 12, 44, },
-	{ 2, 0, 0, 0, 12, 60, },
-	{ 1, 0, 0, 0, 12, 68, },
+	{ 2, 0, 0, 0, 12, 56, },
+	{ 1, 0, 0, 0, 12, 72, },
 	{ 3, 0, 0, 0, 12, 52, },
 	{ 4, 0, 0, 0, 12, 76, },
-	{ 5, 0, 0, 0, 12, 60, },
+	{ 5, 0, 0, 0, 12, 56, },
 	{ 6, 0, 0, 0, 12, 52, },
 	{ 7, 0, 0, 0, 12, 60, },
 	{ 8, 0, 0, 0, 12, 52, },
 	{ 9, 0, 0, 0, 12, 60, },
 	{ 0, 0, 0, 0, 13, 40, },
-	{ 2, 0, 0, 0, 13, 60, },
-	{ 1, 0, 0, 0, 13, 68, },
+	{ 2, 0, 0, 0, 13, 56, },
+	{ 1, 0, 0, 0, 13, 72, },
 	{ 3, 0, 0, 0, 13, 48, },
 	{ 4, 0, 0, 0, 13, 76, },
-	{ 5, 0, 0, 0, 13, 60, },
+	{ 5, 0, 0, 0, 13, 56, },
 	{ 6, 0, 0, 0, 13, 48, },
 	{ 7, 0, 0, 0, 13, 60, },
 	{ 8, 0, 0, 0, 13, 48, },
 	{ 9, 0, 0, 0, 13, 60, },
 	{ 0, 0, 0, 0, 14, 127, },
 	{ 2, 0, 0, 0, 14, 127, },
-	{ 1, 0, 0, 0, 14, 68, },
+	{ 1, 0, 0, 0, 14, 72, },
 	{ 3, 0, 0, 0, 14, 127, },
 	{ 4, 0, 0, 0, 14, 127, },
 	{ 5, 0, 0, 0, 14, 127, },
@@ -42041,7 +42041,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 2, 0, 0, 1, 13, 60, },
 	{ 1, 0, 0, 1, 13, 76, },
 	{ 3, 0, 0, 1, 13, 28, },
-	{ 4, 0, 0, 1, 13, 70, },
+	{ 4, 0, 0, 1, 13, 74, },
 	{ 5, 0, 0, 1, 13, 60, },
 	{ 6, 0, 0, 1, 13, 28, },
 	{ 7, 0, 0, 1, 13, 60, },
@@ -42181,7 +42181,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 2, 0, 0, 2, 13, 60, },
 	{ 1, 0, 0, 2, 13, 76, },
 	{ 3, 0, 0, 2, 13, 28, },
-	{ 4, 0, 0, 2, 13, 72, },
+	{ 4, 0, 0, 2, 13, 74, },
 	{ 5, 0, 0, 2, 13, 60, },
 	{ 6, 0, 0, 2, 13, 28, },
 	{ 7, 0, 0, 2, 13, 60, },
@@ -42201,7 +42201,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 2, 0, 0, 3, 1, 36, },
 	{ 1, 0, 0, 3, 1, 66, },
 	{ 3, 0, 0, 3, 1, 52, },
-	{ 4, 0, 0, 3, 1, 68, },
+	{ 4, 0, 0, 3, 1, 72, },
 	{ 5, 0, 0, 3, 1, 36, },
 	{ 6, 0, 0, 3, 1, 52, },
 	{ 7, 0, 0, 3, 1, 36, },
@@ -42211,7 +42211,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 2, 0, 0, 3, 2, 36, },
 	{ 1, 0, 0, 3, 2, 66, },
 	{ 3, 0, 0, 3, 2, 60, },
-	{ 4, 0, 0, 3, 2, 70, },
+	{ 4, 0, 0, 3, 2, 72, },
 	{ 5, 0, 0, 3, 2, 36, },
 	{ 6, 0, 0, 3, 2, 60, },
 	{ 7, 0, 0, 3, 2, 36, },
@@ -42221,7 +42221,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 2, 0, 0, 3, 3, 36, },
 	{ 1, 0, 0, 3, 3, 66, },
 	{ 3, 0, 0, 3, 3, 64, },
-	{ 4, 0, 0, 3, 3, 70, },
+	{ 4, 0, 0, 3, 3, 72, },
 	{ 5, 0, 0, 3, 3, 36, },
 	{ 6, 0, 0, 3, 3, 64, },
 	{ 7, 0, 0, 3, 3, 36, },
@@ -42231,7 +42231,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 2, 0, 0, 3, 4, 36, },
 	{ 1, 0, 0, 3, 4, 66, },
 	{ 3, 0, 0, 3, 4, 68, },
-	{ 4, 0, 0, 3, 4, 70, },
+	{ 4, 0, 0, 3, 4, 72, },
 	{ 5, 0, 0, 3, 4, 36, },
 	{ 6, 0, 0, 3, 4, 68, },
 	{ 7, 0, 0, 3, 4, 36, },
@@ -42241,7 +42241,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 2, 0, 0, 3, 5, 36, },
 	{ 1, 0, 0, 3, 5, 66, },
 	{ 3, 0, 0, 3, 5, 76, },
-	{ 4, 0, 0, 3, 5, 70, },
+	{ 4, 0, 0, 3, 5, 72, },
 	{ 5, 0, 0, 3, 5, 36, },
 	{ 6, 0, 0, 3, 5, 76, },
 	{ 7, 0, 0, 3, 5, 36, },
@@ -42251,7 +42251,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 2, 0, 0, 3, 6, 36, },
 	{ 1, 0, 0, 3, 6, 66, },
 	{ 3, 0, 0, 3, 6, 76, },
-	{ 4, 0, 0, 3, 6, 70, },
+	{ 4, 0, 0, 3, 6, 72, },
 	{ 5, 0, 0, 3, 6, 36, },
 	{ 6, 0, 0, 3, 6, 76, },
 	{ 7, 0, 0, 3, 6, 36, },
@@ -42261,7 +42261,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 2, 0, 0, 3, 7, 36, },
 	{ 1, 0, 0, 3, 7, 66, },
 	{ 3, 0, 0, 3, 7, 76, },
-	{ 4, 0, 0, 3, 7, 70, },
+	{ 4, 0, 0, 3, 7, 72, },
 	{ 5, 0, 0, 3, 7, 36, },
 	{ 6, 0, 0, 3, 7, 76, },
 	{ 7, 0, 0, 3, 7, 36, },
@@ -42271,7 +42271,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 2, 0, 0, 3, 8, 36, },
 	{ 1, 0, 0, 3, 8, 66, },
 	{ 3, 0, 0, 3, 8, 68, },
-	{ 4, 0, 0, 3, 8, 70, },
+	{ 4, 0, 0, 3, 8, 72, },
 	{ 5, 0, 0, 3, 8, 36, },
 	{ 6, 0, 0, 3, 8, 68, },
 	{ 7, 0, 0, 3, 8, 36, },
@@ -42281,7 +42281,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 2, 0, 0, 3, 9, 36, },
 	{ 1, 0, 0, 3, 9, 66, },
 	{ 3, 0, 0, 3, 9, 64, },
-	{ 4, 0, 0, 3, 9, 70, },
+	{ 4, 0, 0, 3, 9, 72, },
 	{ 5, 0, 0, 3, 9, 36, },
 	{ 6, 0, 0, 3, 9, 64, },
 	{ 7, 0, 0, 3, 9, 36, },
@@ -42291,7 +42291,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 2, 0, 0, 3, 10, 36, },
 	{ 1, 0, 0, 3, 10, 66, },
 	{ 3, 0, 0, 3, 10, 60, },
-	{ 4, 0, 0, 3, 10, 70, },
+	{ 4, 0, 0, 3, 10, 72, },
 	{ 5, 0, 0, 3, 10, 36, },
 	{ 6, 0, 0, 3, 10, 60, },
 	{ 7, 0, 0, 3, 10, 36, },
@@ -42301,7 +42301,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 2, 0, 0, 3, 11, 36, },
 	{ 1, 0, 0, 3, 11, 66, },
 	{ 3, 0, 0, 3, 11, 52, },
-	{ 4, 0, 0, 3, 11, 70, },
+	{ 4, 0, 0, 3, 11, 72, },
 	{ 5, 0, 0, 3, 11, 36, },
 	{ 6, 0, 0, 3, 11, 52, },
 	{ 7, 0, 0, 3, 11, 36, },
@@ -42311,7 +42311,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 2, 0, 0, 3, 12, 36, },
 	{ 1, 0, 0, 3, 12, 66, },
 	{ 3, 0, 0, 3, 12, 40, },
-	{ 4, 0, 0, 3, 12, 70, },
+	{ 4, 0, 0, 3, 12, 72, },
 	{ 5, 0, 0, 3, 12, 36, },
 	{ 6, 0, 0, 3, 12, 40, },
 	{ 7, 0, 0, 3, 12, 36, },
@@ -42321,7 +42321,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 2, 0, 0, 3, 13, 36, },
 	{ 1, 0, 0, 3, 13, 66, },
 	{ 3, 0, 0, 3, 13, 28, },
-	{ 4, 0, 0, 3, 13, 62, },
+	{ 4, 0, 0, 3, 13, 68, },
 	{ 5, 0, 0, 3, 13, 36, },
 	{ 6, 0, 0, 3, 13, 28, },
 	{ 7, 0, 0, 3, 13, 36, },
@@ -42501,7 +42501,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 2, 0, 1, 3, 3, 36, },
 	{ 1, 0, 1, 3, 3, 66, },
 	{ 3, 0, 1, 3, 3, 48, },
-	{ 4, 0, 1, 3, 3, 66, },
+	{ 4, 0, 1, 3, 3, 68, },
 	{ 5, 0, 1, 3, 3, 36, },
 	{ 6, 0, 1, 3, 3, 48, },
 	{ 7, 0, 1, 3, 3, 36, },
@@ -42618,137 +42618,137 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 8, 0, 1, 3, 14, 127, },
 	{ 9, 0, 1, 3, 14, 127, },
 	{ 0, 1, 0, 1, 36, 74, },
-	{ 2, 1, 0, 1, 36, 62, },
-	{ 1, 1, 0, 1, 36, 60, },
+	{ 2, 1, 0, 1, 36, 58, },
+	{ 1, 1, 0, 1, 36, 62, },
 	{ 3, 1, 0, 1, 36, 62, },
-	{ 4, 1, 0, 1, 36, 76, },
-	{ 5, 1, 0, 1, 36, 62, },
+	{ 4, 1, 0, 1, 36, 74, },
+	{ 5, 1, 0, 1, 36, 58, },
 	{ 6, 1, 0, 1, 36, 64, },
 	{ 7, 1, 0, 1, 36, 54, },
 	{ 8, 1, 0, 1, 36, 62, },
 	{ 9, 1, 0, 1, 36, 62, },
 	{ 0, 1, 0, 1, 40, 76, },
-	{ 2, 1, 0, 1, 40, 62, },
+	{ 2, 1, 0, 1, 40, 58, },
 	{ 1, 1, 0, 1, 40, 62, },
 	{ 3, 1, 0, 1, 40, 62, },
 	{ 4, 1, 0, 1, 40, 76, },
-	{ 5, 1, 0, 1, 40, 62, },
+	{ 5, 1, 0, 1, 40, 58, },
 	{ 6, 1, 0, 1, 40, 64, },
 	{ 7, 1, 0, 1, 40, 54, },
 	{ 8, 1, 0, 1, 40, 62, },
 	{ 9, 1, 0, 1, 40, 62, },
 	{ 0, 1, 0, 1, 44, 76, },
-	{ 2, 1, 0, 1, 44, 62, },
+	{ 2, 1, 0, 1, 44, 58, },
 	{ 1, 1, 0, 1, 44, 62, },
 	{ 3, 1, 0, 1, 44, 62, },
 	{ 4, 1, 0, 1, 44, 76, },
-	{ 5, 1, 0, 1, 44, 62, },
+	{ 5, 1, 0, 1, 44, 58, },
 	{ 6, 1, 0, 1, 44, 64, },
 	{ 7, 1, 0, 1, 44, 54, },
 	{ 8, 1, 0, 1, 44, 62, },
 	{ 9, 1, 0, 1, 44, 62, },
 	{ 0, 1, 0, 1, 48, 76, },
-	{ 2, 1, 0, 1, 48, 62, },
+	{ 2, 1, 0, 1, 48, 58, },
 	{ 1, 1, 0, 1, 48, 62, },
 	{ 3, 1, 0, 1, 48, 62, },
-	{ 4, 1, 0, 1, 48, 54, },
-	{ 5, 1, 0, 1, 48, 62, },
+	{ 4, 1, 0, 1, 48, 58, },
+	{ 5, 1, 0, 1, 48, 58, },
 	{ 6, 1, 0, 1, 48, 64, },
 	{ 7, 1, 0, 1, 48, 54, },
 	{ 8, 1, 0, 1, 48, 62, },
 	{ 9, 1, 0, 1, 48, 62, },
 	{ 0, 1, 0, 1, 52, 76, },
-	{ 2, 1, 0, 1, 52, 62, },
+	{ 2, 1, 0, 1, 52, 58, },
 	{ 1, 1, 0, 1, 52, 62, },
 	{ 3, 1, 0, 1, 52, 64, },
 	{ 4, 1, 0, 1, 52, 76, },
-	{ 5, 1, 0, 1, 52, 62, },
+	{ 5, 1, 0, 1, 52, 58, },
 	{ 6, 1, 0, 1, 52, 76, },
 	{ 7, 1, 0, 1, 52, 54, },
 	{ 8, 1, 0, 1, 52, 76, },
 	{ 9, 1, 0, 1, 52, 62, },
 	{ 0, 1, 0, 1, 56, 76, },
-	{ 2, 1, 0, 1, 56, 62, },
+	{ 2, 1, 0, 1, 56, 58, },
 	{ 1, 1, 0, 1, 56, 62, },
 	{ 3, 1, 0, 1, 56, 64, },
 	{ 4, 1, 0, 1, 56, 76, },
-	{ 5, 1, 0, 1, 56, 62, },
+	{ 5, 1, 0, 1, 56, 58, },
 	{ 6, 1, 0, 1, 56, 76, },
 	{ 7, 1, 0, 1, 56, 54, },
 	{ 8, 1, 0, 1, 56, 76, },
 	{ 9, 1, 0, 1, 56, 62, },
 	{ 0, 1, 0, 1, 60, 76, },
-	{ 2, 1, 0, 1, 60, 62, },
+	{ 2, 1, 0, 1, 60, 58, },
 	{ 1, 1, 0, 1, 60, 62, },
 	{ 3, 1, 0, 1, 60, 64, },
 	{ 4, 1, 0, 1, 60, 76, },
-	{ 5, 1, 0, 1, 60, 62, },
+	{ 5, 1, 0, 1, 60, 58, },
 	{ 6, 1, 0, 1, 60, 76, },
 	{ 7, 1, 0, 1, 60, 54, },
 	{ 8, 1, 0, 1, 60, 76, },
 	{ 9, 1, 0, 1, 60, 62, },
-	{ 0, 1, 0, 1, 64, 74, },
-	{ 2, 1, 0, 1, 64, 62, },
-	{ 1, 1, 0, 1, 64, 60, },
+	{ 0, 1, 0, 1, 64, 76, },
+	{ 2, 1, 0, 1, 64, 58, },
+	{ 1, 1, 0, 1, 64, 62, },
 	{ 3, 1, 0, 1, 64, 64, },
 	{ 4, 1, 0, 1, 64, 76, },
-	{ 5, 1, 0, 1, 64, 62, },
+	{ 5, 1, 0, 1, 64, 58, },
 	{ 6, 1, 0, 1, 64, 74, },
 	{ 7, 1, 0, 1, 64, 54, },
 	{ 8, 1, 0, 1, 64, 74, },
 	{ 9, 1, 0, 1, 64, 62, },
-	{ 0, 1, 0, 1, 100, 72, },
-	{ 2, 1, 0, 1, 100, 62, },
+	{ 0, 1, 0, 1, 100, 68, },
+	{ 2, 1, 0, 1, 100, 58, },
 	{ 1, 1, 0, 1, 100, 76, },
-	{ 3, 1, 0, 1, 100, 72, },
+	{ 3, 1, 0, 1, 100, 68, },
 	{ 4, 1, 0, 1, 100, 76, },
-	{ 5, 1, 0, 1, 100, 62, },
+	{ 5, 1, 0, 1, 100, 58, },
 	{ 6, 1, 0, 1, 100, 72, },
 	{ 7, 1, 0, 1, 100, 54, },
 	{ 8, 1, 0, 1, 100, 72, },
 	{ 9, 1, 0, 1, 100, 127, },
 	{ 0, 1, 0, 1, 104, 76, },
-	{ 2, 1, 0, 1, 104, 62, },
+	{ 2, 1, 0, 1, 104, 58, },
 	{ 1, 1, 0, 1, 104, 76, },
 	{ 3, 1, 0, 1, 104, 76, },
 	{ 4, 1, 0, 1, 104, 76, },
-	{ 5, 1, 0, 1, 104, 62, },
+	{ 5, 1, 0, 1, 104, 58, },
 	{ 6, 1, 0, 1, 104, 76, },
 	{ 7, 1, 0, 1, 104, 54, },
 	{ 8, 1, 0, 1, 104, 76, },
 	{ 9, 1, 0, 1, 104, 127, },
 	{ 0, 1, 0, 1, 108, 76, },
-	{ 2, 1, 0, 1, 108, 62, },
+	{ 2, 1, 0, 1, 108, 58, },
 	{ 1, 1, 0, 1, 108, 76, },
 	{ 3, 1, 0, 1, 108, 76, },
 	{ 4, 1, 0, 1, 108, 76, },
-	{ 5, 1, 0, 1, 108, 62, },
+	{ 5, 1, 0, 1, 108, 58, },
 	{ 6, 1, 0, 1, 108, 76, },
 	{ 7, 1, 0, 1, 108, 54, },
 	{ 8, 1, 0, 1, 108, 76, },
 	{ 9, 1, 0, 1, 108, 127, },
 	{ 0, 1, 0, 1, 112, 76, },
-	{ 2, 1, 0, 1, 112, 62, },
+	{ 2, 1, 0, 1, 112, 58, },
 	{ 1, 1, 0, 1, 112, 76, },
 	{ 3, 1, 0, 1, 112, 76, },
 	{ 4, 1, 0, 1, 112, 76, },
-	{ 5, 1, 0, 1, 112, 62, },
+	{ 5, 1, 0, 1, 112, 58, },
 	{ 6, 1, 0, 1, 112, 76, },
 	{ 7, 1, 0, 1, 112, 54, },
 	{ 8, 1, 0, 1, 112, 76, },
 	{ 9, 1, 0, 1, 112, 127, },
 	{ 0, 1, 0, 1, 116, 76, },
-	{ 2, 1, 0, 1, 116, 62, },
+	{ 2, 1, 0, 1, 116, 58, },
 	{ 1, 1, 0, 1, 116, 76, },
 	{ 3, 1, 0, 1, 116, 76, },
 	{ 4, 1, 0, 1, 116, 76, },
-	{ 5, 1, 0, 1, 116, 62, },
+	{ 5, 1, 0, 1, 116, 58, },
 	{ 6, 1, 0, 1, 116, 76, },
 	{ 7, 1, 0, 1, 116, 54, },
 	{ 8, 1, 0, 1, 116, 76, },
 	{ 9, 1, 0, 1, 116, 127, },
 	{ 0, 1, 0, 1, 120, 76, },
-	{ 2, 1, 0, 1, 120, 62, },
+	{ 2, 1, 0, 1, 120, 58, },
 	{ 1, 1, 0, 1, 120, 76, },
 	{ 3, 1, 0, 1, 120, 127, },
 	{ 4, 1, 0, 1, 120, 76, },
@@ -42758,7 +42758,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 8, 1, 0, 1, 120, 76, },
 	{ 9, 1, 0, 1, 120, 127, },
 	{ 0, 1, 0, 1, 124, 76, },
-	{ 2, 1, 0, 1, 124, 62, },
+	{ 2, 1, 0, 1, 124, 58, },
 	{ 1, 1, 0, 1, 124, 76, },
 	{ 3, 1, 0, 1, 124, 127, },
 	{ 4, 1, 0, 1, 124, 76, },
@@ -42768,7 +42768,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 8, 1, 0, 1, 124, 76, },
 	{ 9, 1, 0, 1, 124, 127, },
 	{ 0, 1, 0, 1, 128, 76, },
-	{ 2, 1, 0, 1, 128, 62, },
+	{ 2, 1, 0, 1, 128, 58, },
 	{ 1, 1, 0, 1, 128, 76, },
 	{ 3, 1, 0, 1, 128, 127, },
 	{ 4, 1, 0, 1, 128, 76, },
@@ -42778,38 +42778,38 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 8, 1, 0, 1, 128, 76, },
 	{ 9, 1, 0, 1, 128, 127, },
 	{ 0, 1, 0, 1, 132, 76, },
-	{ 2, 1, 0, 1, 132, 62, },
+	{ 2, 1, 0, 1, 132, 58, },
 	{ 1, 1, 0, 1, 132, 76, },
 	{ 3, 1, 0, 1, 132, 76, },
 	{ 4, 1, 0, 1, 132, 76, },
-	{ 5, 1, 0, 1, 132, 62, },
+	{ 5, 1, 0, 1, 132, 58, },
 	{ 6, 1, 0, 1, 132, 76, },
 	{ 7, 1, 0, 1, 132, 54, },
 	{ 8, 1, 0, 1, 132, 76, },
 	{ 9, 1, 0, 1, 132, 127, },
 	{ 0, 1, 0, 1, 136, 76, },
-	{ 2, 1, 0, 1, 136, 62, },
+	{ 2, 1, 0, 1, 136, 58, },
 	{ 1, 1, 0, 1, 136, 76, },
 	{ 3, 1, 0, 1, 136, 76, },
 	{ 4, 1, 0, 1, 136, 76, },
-	{ 5, 1, 0, 1, 136, 62, },
+	{ 5, 1, 0, 1, 136, 58, },
 	{ 6, 1, 0, 1, 136, 76, },
 	{ 7, 1, 0, 1, 136, 54, },
 	{ 8, 1, 0, 1, 136, 76, },
 	{ 9, 1, 0, 1, 136, 127, },
-	{ 0, 1, 0, 1, 140, 72, },
-	{ 2, 1, 0, 1, 140, 62, },
+	{ 0, 1, 0, 1, 140, 74, },
+	{ 2, 1, 0, 1, 140, 58, },
 	{ 1, 1, 0, 1, 140, 76, },
-	{ 3, 1, 0, 1, 140, 72, },
+	{ 3, 1, 0, 1, 140, 74, },
 	{ 4, 1, 0, 1, 140, 76, },
-	{ 5, 1, 0, 1, 140, 62, },
+	{ 5, 1, 0, 1, 140, 58, },
 	{ 6, 1, 0, 1, 140, 72, },
 	{ 7, 1, 0, 1, 140, 54, },
 	{ 8, 1, 0, 1, 140, 72, },
 	{ 9, 1, 0, 1, 140, 127, },
 	{ 0, 1, 0, 1, 144, 76, },
 	{ 2, 1, 0, 1, 144, 127, },
-	{ 1, 1, 0, 1, 144, 127, },
+	{ 1, 1, 0, 1, 144, 76, },
 	{ 3, 1, 0, 1, 144, 76, },
 	{ 4, 1, 0, 1, 144, 76, },
 	{ 5, 1, 0, 1, 144, 127, },
@@ -42818,7 +42818,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 8, 1, 0, 1, 144, 76, },
 	{ 9, 1, 0, 1, 144, 127, },
 	{ 0, 1, 0, 1, 149, 76, },
-	{ 2, 1, 0, 1, 149, -128, },
+	{ 2, 1, 0, 1, 149, 28, },
 	{ 1, 1, 0, 1, 149, 127, },
 	{ 3, 1, 0, 1, 149, 76, },
 	{ 4, 1, 0, 1, 149, 74, },
@@ -42826,9 +42826,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 6, 1, 0, 1, 149, 76, },
 	{ 7, 1, 0, 1, 149, 54, },
 	{ 8, 1, 0, 1, 149, 76, },
-	{ 9, 1, 0, 1, 149, -128, },
+	{ 9, 1, 0, 1, 149, 28, },
 	{ 0, 1, 0, 1, 153, 76, },
-	{ 2, 1, 0, 1, 153, -128, },
+	{ 2, 1, 0, 1, 153, 28, },
 	{ 1, 1, 0, 1, 153, 127, },
 	{ 3, 1, 0, 1, 153, 76, },
 	{ 4, 1, 0, 1, 153, 74, },
@@ -42836,9 +42836,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 6, 1, 0, 1, 153, 76, },
 	{ 7, 1, 0, 1, 153, 54, },
 	{ 8, 1, 0, 1, 153, 76, },
-	{ 9, 1, 0, 1, 153, -128, },
+	{ 9, 1, 0, 1, 153, 28, },
 	{ 0, 1, 0, 1, 157, 76, },
-	{ 2, 1, 0, 1, 157, -128, },
+	{ 2, 1, 0, 1, 157, 28, },
 	{ 1, 1, 0, 1, 157, 127, },
 	{ 3, 1, 0, 1, 157, 76, },
 	{ 4, 1, 0, 1, 157, 74, },
@@ -42846,9 +42846,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 6, 1, 0, 1, 157, 76, },
 	{ 7, 1, 0, 1, 157, 54, },
 	{ 8, 1, 0, 1, 157, 76, },
-	{ 9, 1, 0, 1, 157, -128, },
+	{ 9, 1, 0, 1, 157, 28, },
 	{ 0, 1, 0, 1, 161, 76, },
-	{ 2, 1, 0, 1, 161, -128, },
+	{ 2, 1, 0, 1, 161, 28, },
 	{ 1, 1, 0, 1, 161, 127, },
 	{ 3, 1, 0, 1, 161, 76, },
 	{ 4, 1, 0, 1, 161, 74, },
@@ -42856,9 +42856,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 6, 1, 0, 1, 161, 76, },
 	{ 7, 1, 0, 1, 161, 54, },
 	{ 8, 1, 0, 1, 161, 76, },
-	{ 9, 1, 0, 1, 161, -128, },
+	{ 9, 1, 0, 1, 161, 28, },
 	{ 0, 1, 0, 1, 165, 76, },
-	{ 2, 1, 0, 1, 165, -128, },
+	{ 2, 1, 0, 1, 165, 28, },
 	{ 1, 1, 0, 1, 165, 127, },
 	{ 3, 1, 0, 1, 165, 76, },
 	{ 4, 1, 0, 1, 165, 74, },
@@ -42866,139 +42866,139 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 6, 1, 0, 1, 165, 76, },
 	{ 7, 1, 0, 1, 165, 54, },
 	{ 8, 1, 0, 1, 165, 76, },
-	{ 9, 1, 0, 1, 165, -128, },
-	{ 0, 1, 0, 2, 36, 72, },
-	{ 2, 1, 0, 2, 36, 62, },
-	{ 1, 1, 0, 2, 36, 62, },
+	{ 9, 1, 0, 1, 165, 28, },
+	{ 0, 1, 0, 2, 36, 70, },
+	{ 2, 1, 0, 2, 36, 58, },
+	{ 1, 1, 0, 2, 36, 64, },
 	{ 3, 1, 0, 2, 36, 62, },
 	{ 4, 1, 0, 2, 36, 76, },
-	{ 5, 1, 0, 2, 36, 62, },
+	{ 5, 1, 0, 2, 36, 58, },
 	{ 6, 1, 0, 2, 36, 64, },
 	{ 7, 1, 0, 2, 36, 54, },
 	{ 8, 1, 0, 2, 36, 62, },
 	{ 9, 1, 0, 2, 36, 62, },
 	{ 0, 1, 0, 2, 40, 76, },
-	{ 2, 1, 0, 2, 40, 62, },
+	{ 2, 1, 0, 2, 40, 58, },
 	{ 1, 1, 0, 2, 40, 62, },
 	{ 3, 1, 0, 2, 40, 62, },
 	{ 4, 1, 0, 2, 40, 76, },
-	{ 5, 1, 0, 2, 40, 62, },
+	{ 5, 1, 0, 2, 40, 58, },
 	{ 6, 1, 0, 2, 40, 64, },
 	{ 7, 1, 0, 2, 40, 54, },
 	{ 8, 1, 0, 2, 40, 62, },
 	{ 9, 1, 0, 2, 40, 62, },
 	{ 0, 1, 0, 2, 44, 76, },
-	{ 2, 1, 0, 2, 44, 62, },
+	{ 2, 1, 0, 2, 44, 58, },
 	{ 1, 1, 0, 2, 44, 62, },
 	{ 3, 1, 0, 2, 44, 62, },
 	{ 4, 1, 0, 2, 44, 76, },
-	{ 5, 1, 0, 2, 44, 62, },
+	{ 5, 1, 0, 2, 44, 58, },
 	{ 6, 1, 0, 2, 44, 64, },
 	{ 7, 1, 0, 2, 44, 54, },
 	{ 8, 1, 0, 2, 44, 62, },
 	{ 9, 1, 0, 2, 44, 62, },
 	{ 0, 1, 0, 2, 48, 76, },
-	{ 2, 1, 0, 2, 48, 62, },
+	{ 2, 1, 0, 2, 48, 58, },
 	{ 1, 1, 0, 2, 48, 62, },
 	{ 3, 1, 0, 2, 48, 62, },
-	{ 4, 1, 0, 2, 48, 54, },
-	{ 5, 1, 0, 2, 48, 62, },
+	{ 4, 1, 0, 2, 48, 58, },
+	{ 5, 1, 0, 2, 48, 58, },
 	{ 6, 1, 0, 2, 48, 64, },
 	{ 7, 1, 0, 2, 48, 54, },
 	{ 8, 1, 0, 2, 48, 62, },
 	{ 9, 1, 0, 2, 48, 62, },
 	{ 0, 1, 0, 2, 52, 76, },
-	{ 2, 1, 0, 2, 52, 62, },
+	{ 2, 1, 0, 2, 52, 58, },
 	{ 1, 1, 0, 2, 52, 62, },
 	{ 3, 1, 0, 2, 52, 64, },
 	{ 4, 1, 0, 2, 52, 76, },
-	{ 5, 1, 0, 2, 52, 62, },
+	{ 5, 1, 0, 2, 52, 58, },
 	{ 6, 1, 0, 2, 52, 76, },
 	{ 7, 1, 0, 2, 52, 54, },
 	{ 8, 1, 0, 2, 52, 76, },
 	{ 9, 1, 0, 2, 52, 62, },
 	{ 0, 1, 0, 2, 56, 76, },
-	{ 2, 1, 0, 2, 56, 62, },
+	{ 2, 1, 0, 2, 56, 58, },
 	{ 1, 1, 0, 2, 56, 62, },
 	{ 3, 1, 0, 2, 56, 64, },
 	{ 4, 1, 0, 2, 56, 76, },
-	{ 5, 1, 0, 2, 56, 62, },
+	{ 5, 1, 0, 2, 56, 58, },
 	{ 6, 1, 0, 2, 56, 76, },
 	{ 7, 1, 0, 2, 56, 54, },
 	{ 8, 1, 0, 2, 56, 76, },
 	{ 9, 1, 0, 2, 56, 62, },
 	{ 0, 1, 0, 2, 60, 76, },
-	{ 2, 1, 0, 2, 60, 62, },
+	{ 2, 1, 0, 2, 60, 58, },
 	{ 1, 1, 0, 2, 60, 62, },
 	{ 3, 1, 0, 2, 60, 64, },
 	{ 4, 1, 0, 2, 60, 76, },
-	{ 5, 1, 0, 2, 60, 62, },
+	{ 5, 1, 0, 2, 60, 58, },
 	{ 6, 1, 0, 2, 60, 76, },
 	{ 7, 1, 0, 2, 60, 54, },
 	{ 8, 1, 0, 2, 60, 76, },
 	{ 9, 1, 0, 2, 60, 62, },
-	{ 0, 1, 0, 2, 64, 74, },
-	{ 2, 1, 0, 2, 64, 62, },
-	{ 1, 1, 0, 2, 64, 60, },
+	{ 0, 1, 0, 2, 64, 70, },
+	{ 2, 1, 0, 2, 64, 58, },
+	{ 1, 1, 0, 2, 64, 62, },
 	{ 3, 1, 0, 2, 64, 64, },
 	{ 4, 1, 0, 2, 64, 74, },
-	{ 5, 1, 0, 2, 64, 62, },
+	{ 5, 1, 0, 2, 64, 58, },
 	{ 6, 1, 0, 2, 64, 74, },
 	{ 7, 1, 0, 2, 64, 54, },
 	{ 8, 1, 0, 2, 64, 74, },
 	{ 9, 1, 0, 2, 64, 62, },
-	{ 0, 1, 0, 2, 100, 70, },
-	{ 2, 1, 0, 2, 100, 62, },
+	{ 0, 1, 0, 2, 100, 66, },
+	{ 2, 1, 0, 2, 100, 58, },
 	{ 1, 1, 0, 2, 100, 76, },
-	{ 3, 1, 0, 2, 100, 70, },
+	{ 3, 1, 0, 2, 100, 66, },
 	{ 4, 1, 0, 2, 100, 76, },
-	{ 5, 1, 0, 2, 100, 62, },
+	{ 5, 1, 0, 2, 100, 58, },
 	{ 6, 1, 0, 2, 100, 70, },
 	{ 7, 1, 0, 2, 100, 54, },
 	{ 8, 1, 0, 2, 100, 70, },
 	{ 9, 1, 0, 2, 100, 127, },
 	{ 0, 1, 0, 2, 104, 76, },
-	{ 2, 1, 0, 2, 104, 62, },
+	{ 2, 1, 0, 2, 104, 58, },
 	{ 1, 1, 0, 2, 104, 76, },
 	{ 3, 1, 0, 2, 104, 76, },
 	{ 4, 1, 0, 2, 104, 76, },
-	{ 5, 1, 0, 2, 104, 62, },
+	{ 5, 1, 0, 2, 104, 58, },
 	{ 6, 1, 0, 2, 104, 76, },
 	{ 7, 1, 0, 2, 104, 54, },
 	{ 8, 1, 0, 2, 104, 76, },
 	{ 9, 1, 0, 2, 104, 127, },
 	{ 0, 1, 0, 2, 108, 76, },
-	{ 2, 1, 0, 2, 108, 62, },
+	{ 2, 1, 0, 2, 108, 58, },
 	{ 1, 1, 0, 2, 108, 76, },
 	{ 3, 1, 0, 2, 108, 76, },
 	{ 4, 1, 0, 2, 108, 76, },
-	{ 5, 1, 0, 2, 108, 62, },
+	{ 5, 1, 0, 2, 108, 58, },
 	{ 6, 1, 0, 2, 108, 76, },
 	{ 7, 1, 0, 2, 108, 54, },
 	{ 8, 1, 0, 2, 108, 76, },
 	{ 9, 1, 0, 2, 108, 127, },
 	{ 0, 1, 0, 2, 112, 76, },
-	{ 2, 1, 0, 2, 112, 62, },
+	{ 2, 1, 0, 2, 112, 58, },
 	{ 1, 1, 0, 2, 112, 76, },
 	{ 3, 1, 0, 2, 112, 76, },
 	{ 4, 1, 0, 2, 112, 76, },
-	{ 5, 1, 0, 2, 112, 62, },
+	{ 5, 1, 0, 2, 112, 58, },
 	{ 6, 1, 0, 2, 112, 76, },
 	{ 7, 1, 0, 2, 112, 54, },
 	{ 8, 1, 0, 2, 112, 76, },
 	{ 9, 1, 0, 2, 112, 127, },
 	{ 0, 1, 0, 2, 116, 76, },
-	{ 2, 1, 0, 2, 116, 62, },
+	{ 2, 1, 0, 2, 116, 58, },
 	{ 1, 1, 0, 2, 116, 76, },
 	{ 3, 1, 0, 2, 116, 76, },
 	{ 4, 1, 0, 2, 116, 76, },
-	{ 5, 1, 0, 2, 116, 62, },
+	{ 5, 1, 0, 2, 116, 58, },
 	{ 6, 1, 0, 2, 116, 76, },
 	{ 7, 1, 0, 2, 116, 54, },
 	{ 8, 1, 0, 2, 116, 76, },
 	{ 9, 1, 0, 2, 116, 127, },
 	{ 0, 1, 0, 2, 120, 76, },
-	{ 2, 1, 0, 2, 120, 62, },
+	{ 2, 1, 0, 2, 120, 58, },
 	{ 1, 1, 0, 2, 120, 76, },
 	{ 3, 1, 0, 2, 120, 127, },
 	{ 4, 1, 0, 2, 120, 76, },
@@ -43008,7 +43008,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 8, 1, 0, 2, 120, 76, },
 	{ 9, 1, 0, 2, 120, 127, },
 	{ 0, 1, 0, 2, 124, 76, },
-	{ 2, 1, 0, 2, 124, 62, },
+	{ 2, 1, 0, 2, 124, 58, },
 	{ 1, 1, 0, 2, 124, 76, },
 	{ 3, 1, 0, 2, 124, 127, },
 	{ 4, 1, 0, 2, 124, 76, },
@@ -43018,7 +43018,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 8, 1, 0, 2, 124, 76, },
 	{ 9, 1, 0, 2, 124, 127, },
 	{ 0, 1, 0, 2, 128, 76, },
-	{ 2, 1, 0, 2, 128, 62, },
+	{ 2, 1, 0, 2, 128, 58, },
 	{ 1, 1, 0, 2, 128, 76, },
 	{ 3, 1, 0, 2, 128, 127, },
 	{ 4, 1, 0, 2, 128, 76, },
@@ -43028,38 +43028,38 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 8, 1, 0, 2, 128, 76, },
 	{ 9, 1, 0, 2, 128, 127, },
 	{ 0, 1, 0, 2, 132, 76, },
-	{ 2, 1, 0, 2, 132, 62, },
+	{ 2, 1, 0, 2, 132, 58, },
 	{ 1, 1, 0, 2, 132, 76, },
 	{ 3, 1, 0, 2, 132, 76, },
 	{ 4, 1, 0, 2, 132, 76, },
-	{ 5, 1, 0, 2, 132, 62, },
+	{ 5, 1, 0, 2, 132, 58, },
 	{ 6, 1, 0, 2, 132, 76, },
 	{ 7, 1, 0, 2, 132, 54, },
 	{ 8, 1, 0, 2, 132, 76, },
 	{ 9, 1, 0, 2, 132, 127, },
 	{ 0, 1, 0, 2, 136, 76, },
-	{ 2, 1, 0, 2, 136, 62, },
+	{ 2, 1, 0, 2, 136, 58, },
 	{ 1, 1, 0, 2, 136, 76, },
 	{ 3, 1, 0, 2, 136, 76, },
 	{ 4, 1, 0, 2, 136, 76, },
-	{ 5, 1, 0, 2, 136, 62, },
+	{ 5, 1, 0, 2, 136, 58, },
 	{ 6, 1, 0, 2, 136, 76, },
 	{ 7, 1, 0, 2, 136, 54, },
 	{ 8, 1, 0, 2, 136, 76, },
 	{ 9, 1, 0, 2, 136, 127, },
-	{ 0, 1, 0, 2, 140, 70, },
-	{ 2, 1, 0, 2, 140, 62, },
+	{ 0, 1, 0, 2, 140, 66, },
+	{ 2, 1, 0, 2, 140, 58, },
 	{ 1, 1, 0, 2, 140, 76, },
-	{ 3, 1, 0, 2, 140, 70, },
+	{ 3, 1, 0, 2, 140, 66, },
 	{ 4, 1, 0, 2, 140, 76, },
-	{ 5, 1, 0, 2, 140, 62, },
+	{ 5, 1, 0, 2, 140, 58, },
 	{ 6, 1, 0, 2, 140, 70, },
 	{ 7, 1, 0, 2, 140, 54, },
 	{ 8, 1, 0, 2, 140, 70, },
 	{ 9, 1, 0, 2, 140, 127, },
 	{ 0, 1, 0, 2, 144, 76, },
 	{ 2, 1, 0, 2, 144, 127, },
-	{ 1, 1, 0, 2, 144, 127, },
+	{ 1, 1, 0, 2, 144, 76, },
 	{ 3, 1, 0, 2, 144, 76, },
 	{ 4, 1, 0, 2, 144, 76, },
 	{ 5, 1, 0, 2, 144, 127, },
@@ -43068,7 +43068,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 8, 1, 0, 2, 144, 76, },
 	{ 9, 1, 0, 2, 144, 127, },
 	{ 0, 1, 0, 2, 149, 76, },
-	{ 2, 1, 0, 2, 149, -128, },
+	{ 2, 1, 0, 2, 149, 28, },
 	{ 1, 1, 0, 2, 149, 127, },
 	{ 3, 1, 0, 2, 149, 76, },
 	{ 4, 1, 0, 2, 149, 74, },
@@ -43076,9 +43076,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 6, 1, 0, 2, 149, 76, },
 	{ 7, 1, 0, 2, 149, 54, },
 	{ 8, 1, 0, 2, 149, 76, },
-	{ 9, 1, 0, 2, 149, -128, },
+	{ 9, 1, 0, 2, 149, 28, },
 	{ 0, 1, 0, 2, 153, 76, },
-	{ 2, 1, 0, 2, 153, -128, },
+	{ 2, 1, 0, 2, 153, 28, },
 	{ 1, 1, 0, 2, 153, 127, },
 	{ 3, 1, 0, 2, 153, 76, },
 	{ 4, 1, 0, 2, 153, 74, },
@@ -43086,9 +43086,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 6, 1, 0, 2, 153, 76, },
 	{ 7, 1, 0, 2, 153, 54, },
 	{ 8, 1, 0, 2, 153, 76, },
-	{ 9, 1, 0, 2, 153, -128, },
+	{ 9, 1, 0, 2, 153, 28, },
 	{ 0, 1, 0, 2, 157, 76, },
-	{ 2, 1, 0, 2, 157, -128, },
+	{ 2, 1, 0, 2, 157, 28, },
 	{ 1, 1, 0, 2, 157, 127, },
 	{ 3, 1, 0, 2, 157, 76, },
 	{ 4, 1, 0, 2, 157, 74, },
@@ -43096,9 +43096,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 6, 1, 0, 2, 157, 76, },
 	{ 7, 1, 0, 2, 157, 54, },
 	{ 8, 1, 0, 2, 157, 76, },
-	{ 9, 1, 0, 2, 157, -128, },
+	{ 9, 1, 0, 2, 157, 28, },
 	{ 0, 1, 0, 2, 161, 76, },
-	{ 2, 1, 0, 2, 161, -128, },
+	{ 2, 1, 0, 2, 161, 28, },
 	{ 1, 1, 0, 2, 161, 127, },
 	{ 3, 1, 0, 2, 161, 76, },
 	{ 4, 1, 0, 2, 161, 74, },
@@ -43106,9 +43106,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 6, 1, 0, 2, 161, 76, },
 	{ 7, 1, 0, 2, 161, 54, },
 	{ 8, 1, 0, 2, 161, 76, },
-	{ 9, 1, 0, 2, 161, -128, },
+	{ 9, 1, 0, 2, 161, 28, },
 	{ 0, 1, 0, 2, 165, 76, },
-	{ 2, 1, 0, 2, 165, -128, },
+	{ 2, 1, 0, 2, 165, 28, },
 	{ 1, 1, 0, 2, 165, 127, },
 	{ 3, 1, 0, 2, 165, 76, },
 	{ 4, 1, 0, 2, 165, 74, },
@@ -43116,262 +43116,262 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 6, 1, 0, 2, 165, 76, },
 	{ 7, 1, 0, 2, 165, 54, },
 	{ 8, 1, 0, 2, 165, 76, },
-	{ 9, 1, 0, 2, 165, -128, },
-	{ 0, 1, 0, 3, 36, 68, },
-	{ 2, 1, 0, 3, 36, 38, },
+	{ 9, 1, 0, 2, 165, 28, },
+	{ 0, 1, 0, 3, 36, 64, },
+	{ 2, 1, 0, 3, 36, 36, },
 	{ 1, 1, 0, 3, 36, 50, },
 	{ 3, 1, 0, 3, 36, 38, },
 	{ 4, 1, 0, 3, 36, 66, },
-	{ 5, 1, 0, 3, 36, 38, },
+	{ 5, 1, 0, 3, 36, 36, },
 	{ 6, 1, 0, 3, 36, 52, },
 	{ 7, 1, 0, 3, 36, 30, },
 	{ 8, 1, 0, 3, 36, 50, },
 	{ 9, 1, 0, 3, 36, 38, },
 	{ 0, 1, 0, 3, 40, 68, },
-	{ 2, 1, 0, 3, 40, 38, },
+	{ 2, 1, 0, 3, 40, 36, },
 	{ 1, 1, 0, 3, 40, 50, },
 	{ 3, 1, 0, 3, 40, 38, },
 	{ 4, 1, 0, 3, 40, 66, },
-	{ 5, 1, 0, 3, 40, 38, },
+	{ 5, 1, 0, 3, 40, 36, },
 	{ 6, 1, 0, 3, 40, 52, },
 	{ 7, 1, 0, 3, 40, 30, },
 	{ 8, 1, 0, 3, 40, 50, },
 	{ 9, 1, 0, 3, 40, 38, },
 	{ 0, 1, 0, 3, 44, 68, },
-	{ 2, 1, 0, 3, 44, 38, },
+	{ 2, 1, 0, 3, 44, 36, },
 	{ 1, 1, 0, 3, 44, 50, },
 	{ 3, 1, 0, 3, 44, 38, },
 	{ 4, 1, 0, 3, 44, 66, },
-	{ 5, 1, 0, 3, 44, 38, },
+	{ 5, 1, 0, 3, 44, 36, },
 	{ 6, 1, 0, 3, 44, 52, },
 	{ 7, 1, 0, 3, 44, 30, },
 	{ 8, 1, 0, 3, 44, 50, },
 	{ 9, 1, 0, 3, 44, 38, },
 	{ 0, 1, 0, 3, 48, 68, },
-	{ 2, 1, 0, 3, 48, 38, },
+	{ 2, 1, 0, 3, 48, 36, },
 	{ 1, 1, 0, 3, 48, 50, },
 	{ 3, 1, 0, 3, 48, 38, },
-	{ 4, 1, 0, 3, 48, 36, },
-	{ 5, 1, 0, 3, 48, 38, },
+	{ 4, 1, 0, 3, 48, 42, },
+	{ 5, 1, 0, 3, 48, 36, },
 	{ 6, 1, 0, 3, 48, 52, },
 	{ 7, 1, 0, 3, 48, 30, },
 	{ 8, 1, 0, 3, 48, 50, },
 	{ 9, 1, 0, 3, 48, 38, },
 	{ 0, 1, 0, 3, 52, 68, },
-	{ 2, 1, 0, 3, 52, 38, },
+	{ 2, 1, 0, 3, 52, 36, },
 	{ 1, 1, 0, 3, 52, 50, },
 	{ 3, 1, 0, 3, 52, 40, },
 	{ 4, 1, 0, 3, 52, 66, },
-	{ 5, 1, 0, 3, 52, 38, },
+	{ 5, 1, 0, 3, 52, 36, },
 	{ 6, 1, 0, 3, 52, 68, },
 	{ 7, 1, 0, 3, 52, 30, },
 	{ 8, 1, 0, 3, 52, 68, },
 	{ 9, 1, 0, 3, 52, 38, },
 	{ 0, 1, 0, 3, 56, 68, },
-	{ 2, 1, 0, 3, 56, 38, },
+	{ 2, 1, 0, 3, 56, 36, },
 	{ 1, 1, 0, 3, 56, 50, },
 	{ 3, 1, 0, 3, 56, 40, },
 	{ 4, 1, 0, 3, 56, 66, },
-	{ 5, 1, 0, 3, 56, 38, },
+	{ 5, 1, 0, 3, 56, 36, },
 	{ 6, 1, 0, 3, 56, 68, },
 	{ 7, 1, 0, 3, 56, 30, },
 	{ 8, 1, 0, 3, 56, 68, },
 	{ 9, 1, 0, 3, 56, 38, },
-	{ 0, 1, 0, 3, 60, 66, },
-	{ 2, 1, 0, 3, 60, 38, },
+	{ 0, 1, 0, 3, 60, 68, },
+	{ 2, 1, 0, 3, 60, 36, },
 	{ 1, 1, 0, 3, 60, 50, },
 	{ 3, 1, 0, 3, 60, 40, },
 	{ 4, 1, 0, 3, 60, 66, },
-	{ 5, 1, 0, 3, 60, 38, },
+	{ 5, 1, 0, 3, 60, 36, },
 	{ 6, 1, 0, 3, 60, 66, },
 	{ 7, 1, 0, 3, 60, 30, },
 	{ 8, 1, 0, 3, 60, 66, },
 	{ 9, 1, 0, 3, 60, 38, },
-	{ 0, 1, 0, 3, 64, 68, },
-	{ 2, 1, 0, 3, 64, 38, },
+	{ 0, 1, 0, 3, 64, 66, },
+	{ 2, 1, 0, 3, 64, 36, },
 	{ 1, 1, 0, 3, 64, 50, },
 	{ 3, 1, 0, 3, 64, 40, },
 	{ 4, 1, 0, 3, 64, 66, },
-	{ 5, 1, 0, 3, 64, 38, },
+	{ 5, 1, 0, 3, 64, 36, },
 	{ 6, 1, 0, 3, 64, 68, },
 	{ 7, 1, 0, 3, 64, 30, },
 	{ 8, 1, 0, 3, 64, 68, },
 	{ 9, 1, 0, 3, 64, 38, },
-	{ 0, 1, 0, 3, 100, 60, },
-	{ 2, 1, 0, 3, 100, 38, },
+	{ 0, 1, 0, 3, 100, 64, },
+	{ 2, 1, 0, 3, 100, 36, },
 	{ 1, 1, 0, 3, 100, 70, },
-	{ 3, 1, 0, 3, 100, 60, },
-	{ 4, 1, 0, 3, 100, 64, },
-	{ 5, 1, 0, 3, 100, 38, },
+	{ 3, 1, 0, 3, 100, 64, },
+	{ 4, 1, 0, 3, 100, 66, },
+	{ 5, 1, 0, 3, 100, 36, },
 	{ 6, 1, 0, 3, 100, 60, },
 	{ 7, 1, 0, 3, 100, 30, },
 	{ 8, 1, 0, 3, 100, 60, },
 	{ 9, 1, 0, 3, 100, 127, },
 	{ 0, 1, 0, 3, 104, 68, },
-	{ 2, 1, 0, 3, 104, 38, },
+	{ 2, 1, 0, 3, 104, 36, },
 	{ 1, 1, 0, 3, 104, 70, },
 	{ 3, 1, 0, 3, 104, 68, },
-	{ 4, 1, 0, 3, 104, 64, },
-	{ 5, 1, 0, 3, 104, 38, },
+	{ 4, 1, 0, 3, 104, 66, },
+	{ 5, 1, 0, 3, 104, 36, },
 	{ 6, 1, 0, 3, 104, 68, },
 	{ 7, 1, 0, 3, 104, 30, },
 	{ 8, 1, 0, 3, 104, 68, },
 	{ 9, 1, 0, 3, 104, 127, },
 	{ 0, 1, 0, 3, 108, 68, },
-	{ 2, 1, 0, 3, 108, 38, },
+	{ 2, 1, 0, 3, 108, 36, },
 	{ 1, 1, 0, 3, 108, 70, },
 	{ 3, 1, 0, 3, 108, 68, },
-	{ 4, 1, 0, 3, 108, 64, },
-	{ 5, 1, 0, 3, 108, 38, },
+	{ 4, 1, 0, 3, 108, 66, },
+	{ 5, 1, 0, 3, 108, 36, },
 	{ 6, 1, 0, 3, 108, 68, },
 	{ 7, 1, 0, 3, 108, 30, },
 	{ 8, 1, 0, 3, 108, 68, },
 	{ 9, 1, 0, 3, 108, 127, },
 	{ 0, 1, 0, 3, 112, 68, },
-	{ 2, 1, 0, 3, 112, 38, },
+	{ 2, 1, 0, 3, 112, 36, },
 	{ 1, 1, 0, 3, 112, 70, },
 	{ 3, 1, 0, 3, 112, 68, },
-	{ 4, 1, 0, 3, 112, 64, },
-	{ 5, 1, 0, 3, 112, 38, },
+	{ 4, 1, 0, 3, 112, 66, },
+	{ 5, 1, 0, 3, 112, 36, },
 	{ 6, 1, 0, 3, 112, 68, },
 	{ 7, 1, 0, 3, 112, 30, },
 	{ 8, 1, 0, 3, 112, 68, },
 	{ 9, 1, 0, 3, 112, 127, },
 	{ 0, 1, 0, 3, 116, 68, },
-	{ 2, 1, 0, 3, 116, 38, },
+	{ 2, 1, 0, 3, 116, 36, },
 	{ 1, 1, 0, 3, 116, 70, },
 	{ 3, 1, 0, 3, 116, 68, },
-	{ 4, 1, 0, 3, 116, 64, },
-	{ 5, 1, 0, 3, 116, 38, },
+	{ 4, 1, 0, 3, 116, 66, },
+	{ 5, 1, 0, 3, 116, 36, },
 	{ 6, 1, 0, 3, 116, 68, },
 	{ 7, 1, 0, 3, 116, 30, },
 	{ 8, 1, 0, 3, 116, 68, },
 	{ 9, 1, 0, 3, 116, 127, },
 	{ 0, 1, 0, 3, 120, 68, },
-	{ 2, 1, 0, 3, 120, 38, },
+	{ 2, 1, 0, 3, 120, 36, },
 	{ 1, 1, 0, 3, 120, 70, },
 	{ 3, 1, 0, 3, 120, 127, },
-	{ 4, 1, 0, 3, 120, 64, },
+	{ 4, 1, 0, 3, 120, 66, },
 	{ 5, 1, 0, 3, 120, 127, },
 	{ 6, 1, 0, 3, 120, 68, },
 	{ 7, 1, 0, 3, 120, 30, },
 	{ 8, 1, 0, 3, 120, 68, },
 	{ 9, 1, 0, 3, 120, 127, },
 	{ 0, 1, 0, 3, 124, 68, },
-	{ 2, 1, 0, 3, 124, 38, },
+	{ 2, 1, 0, 3, 124, 36, },
 	{ 1, 1, 0, 3, 124, 70, },
 	{ 3, 1, 0, 3, 124, 127, },
-	{ 4, 1, 0, 3, 124, 64, },
+	{ 4, 1, 0, 3, 124, 66, },
 	{ 5, 1, 0, 3, 124, 127, },
 	{ 6, 1, 0, 3, 124, 68, },
 	{ 7, 1, 0, 3, 124, 30, },
 	{ 8, 1, 0, 3, 124, 68, },
 	{ 9, 1, 0, 3, 124, 127, },
 	{ 0, 1, 0, 3, 128, 68, },
-	{ 2, 1, 0, 3, 128, 38, },
+	{ 2, 1, 0, 3, 128, 36, },
 	{ 1, 1, 0, 3, 128, 70, },
 	{ 3, 1, 0, 3, 128, 127, },
-	{ 4, 1, 0, 3, 128, 64, },
+	{ 4, 1, 0, 3, 128, 66, },
 	{ 5, 1, 0, 3, 128, 127, },
 	{ 6, 1, 0, 3, 128, 68, },
 	{ 7, 1, 0, 3, 128, 30, },
 	{ 8, 1, 0, 3, 128, 68, },
 	{ 9, 1, 0, 3, 128, 127, },
 	{ 0, 1, 0, 3, 132, 68, },
-	{ 2, 1, 0, 3, 132, 38, },
+	{ 2, 1, 0, 3, 132, 36, },
 	{ 1, 1, 0, 3, 132, 70, },
 	{ 3, 1, 0, 3, 132, 68, },
-	{ 4, 1, 0, 3, 132, 64, },
-	{ 5, 1, 0, 3, 132, 38, },
+	{ 4, 1, 0, 3, 132, 66, },
+	{ 5, 1, 0, 3, 132, 36, },
 	{ 6, 1, 0, 3, 132, 68, },
 	{ 7, 1, 0, 3, 132, 30, },
 	{ 8, 1, 0, 3, 132, 68, },
 	{ 9, 1, 0, 3, 132, 127, },
 	{ 0, 1, 0, 3, 136, 68, },
-	{ 2, 1, 0, 3, 136, 38, },
+	{ 2, 1, 0, 3, 136, 36, },
 	{ 1, 1, 0, 3, 136, 70, },
 	{ 3, 1, 0, 3, 136, 68, },
-	{ 4, 1, 0, 3, 136, 64, },
-	{ 5, 1, 0, 3, 136, 38, },
+	{ 4, 1, 0, 3, 136, 66, },
+	{ 5, 1, 0, 3, 136, 36, },
 	{ 6, 1, 0, 3, 136, 68, },
 	{ 7, 1, 0, 3, 136, 30, },
 	{ 8, 1, 0, 3, 136, 68, },
 	{ 9, 1, 0, 3, 136, 127, },
-	{ 0, 1, 0, 3, 140, 60, },
-	{ 2, 1, 0, 3, 140, 38, },
+	{ 0, 1, 0, 3, 140, 58, },
+	{ 2, 1, 0, 3, 140, 36, },
 	{ 1, 1, 0, 3, 140, 70, },
-	{ 3, 1, 0, 3, 140, 60, },
-	{ 4, 1, 0, 3, 140, 64, },
-	{ 5, 1, 0, 3, 140, 38, },
+	{ 3, 1, 0, 3, 140, 58, },
+	{ 4, 1, 0, 3, 140, 66, },
+	{ 5, 1, 0, 3, 140, 36, },
 	{ 6, 1, 0, 3, 140, 60, },
 	{ 7, 1, 0, 3, 140, 30, },
 	{ 8, 1, 0, 3, 140, 60, },
 	{ 9, 1, 0, 3, 140, 127, },
 	{ 0, 1, 0, 3, 144, 68, },
 	{ 2, 1, 0, 3, 144, 127, },
-	{ 1, 1, 0, 3, 144, 127, },
+	{ 1, 1, 0, 3, 144, 70, },
 	{ 3, 1, 0, 3, 144, 68, },
-	{ 4, 1, 0, 3, 144, 64, },
+	{ 4, 1, 0, 3, 144, 66, },
 	{ 5, 1, 0, 3, 144, 127, },
 	{ 6, 1, 0, 3, 144, 68, },
 	{ 7, 1, 0, 3, 144, 127, },
 	{ 8, 1, 0, 3, 144, 68, },
 	{ 9, 1, 0, 3, 144, 127, },
 	{ 0, 1, 0, 3, 149, 76, },
-	{ 2, 1, 0, 3, 149, -128, },
+	{ 2, 1, 0, 3, 149, 4, },
 	{ 1, 1, 0, 3, 149, 127, },
 	{ 3, 1, 0, 3, 149, 76, },
-	{ 4, 1, 0, 3, 149, 60, },
+	{ 4, 1, 0, 3, 149, 62, },
 	{ 5, 1, 0, 3, 149, 76, },
 	{ 6, 1, 0, 3, 149, 76, },
 	{ 7, 1, 0, 3, 149, 30, },
 	{ 8, 1, 0, 3, 149, 72, },
-	{ 9, 1, 0, 3, 149, -128, },
+	{ 9, 1, 0, 3, 149, 4, },
 	{ 0, 1, 0, 3, 153, 76, },
-	{ 2, 1, 0, 3, 153, -128, },
+	{ 2, 1, 0, 3, 153, 4, },
 	{ 1, 1, 0, 3, 153, 127, },
 	{ 3, 1, 0, 3, 153, 76, },
-	{ 4, 1, 0, 3, 153, 60, },
+	{ 4, 1, 0, 3, 153, 62, },
 	{ 5, 1, 0, 3, 153, 76, },
 	{ 6, 1, 0, 3, 153, 76, },
 	{ 7, 1, 0, 3, 153, 30, },
 	{ 8, 1, 0, 3, 153, 76, },
-	{ 9, 1, 0, 3, 153, -128, },
+	{ 9, 1, 0, 3, 153, 4, },
 	{ 0, 1, 0, 3, 157, 76, },
-	{ 2, 1, 0, 3, 157, -128, },
+	{ 2, 1, 0, 3, 157, 4, },
 	{ 1, 1, 0, 3, 157, 127, },
 	{ 3, 1, 0, 3, 157, 76, },
-	{ 4, 1, 0, 3, 157, 60, },
+	{ 4, 1, 0, 3, 157, 62, },
 	{ 5, 1, 0, 3, 157, 76, },
 	{ 6, 1, 0, 3, 157, 76, },
 	{ 7, 1, 0, 3, 157, 30, },
 	{ 8, 1, 0, 3, 157, 76, },
-	{ 9, 1, 0, 3, 157, -128, },
+	{ 9, 1, 0, 3, 157, 4, },
 	{ 0, 1, 0, 3, 161, 76, },
-	{ 2, 1, 0, 3, 161, -128, },
+	{ 2, 1, 0, 3, 161, 4, },
 	{ 1, 1, 0, 3, 161, 127, },
 	{ 3, 1, 0, 3, 161, 76, },
-	{ 4, 1, 0, 3, 161, 60, },
+	{ 4, 1, 0, 3, 161, 62, },
 	{ 5, 1, 0, 3, 161, 76, },
 	{ 6, 1, 0, 3, 161, 76, },
 	{ 7, 1, 0, 3, 161, 30, },
 	{ 8, 1, 0, 3, 161, 76, },
-	{ 9, 1, 0, 3, 161, -128, },
+	{ 9, 1, 0, 3, 161, 4, },
 	{ 0, 1, 0, 3, 165, 76, },
-	{ 2, 1, 0, 3, 165, -128, },
+	{ 2, 1, 0, 3, 165, 4, },
 	{ 1, 1, 0, 3, 165, 127, },
 	{ 3, 1, 0, 3, 165, 76, },
-	{ 4, 1, 0, 3, 165, 60, },
+	{ 4, 1, 0, 3, 165, 62, },
 	{ 5, 1, 0, 3, 165, 76, },
 	{ 6, 1, 0, 3, 165, 76, },
 	{ 7, 1, 0, 3, 165, 30, },
 	{ 8, 1, 0, 3, 165, 76, },
-	{ 9, 1, 0, 3, 165, -128, },
+	{ 9, 1, 0, 3, 165, 4, },
 	{ 0, 1, 1, 2, 38, 66, },
 	{ 2, 1, 1, 2, 38, 64, },
-	{ 1, 1, 1, 2, 38, 62, },
+	{ 1, 1, 1, 2, 38, 64, },
 	{ 3, 1, 1, 2, 38, 64, },
-	{ 4, 1, 1, 2, 38, 72, },
+	{ 4, 1, 1, 2, 38, 64, },
 	{ 5, 1, 1, 2, 38, 64, },
 	{ 6, 1, 1, 2, 38, 64, },
 	{ 7, 1, 1, 2, 38, 54, },
@@ -43379,9 +43379,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 9, 1, 1, 2, 38, 64, },
 	{ 0, 1, 1, 2, 46, 72, },
 	{ 2, 1, 1, 2, 46, 64, },
-	{ 1, 1, 1, 2, 46, 62, },
+	{ 1, 1, 1, 2, 46, 64, },
 	{ 3, 1, 1, 2, 46, 64, },
-	{ 4, 1, 1, 2, 46, 60, },
+	{ 4, 1, 1, 2, 46, 70, },
 	{ 5, 1, 1, 2, 46, 64, },
 	{ 6, 1, 1, 2, 46, 64, },
 	{ 7, 1, 1, 2, 46, 54, },
@@ -43389,7 +43389,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 9, 1, 1, 2, 46, 64, },
 	{ 0, 1, 1, 2, 54, 72, },
 	{ 2, 1, 1, 2, 54, 64, },
-	{ 1, 1, 1, 2, 54, 62, },
+	{ 1, 1, 1, 2, 54, 64, },
 	{ 3, 1, 1, 2, 54, 64, },
 	{ 4, 1, 1, 2, 54, 72, },
 	{ 5, 1, 1, 2, 54, 64, },
@@ -43397,21 +43397,21 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 7, 1, 1, 2, 54, 54, },
 	{ 8, 1, 1, 2, 54, 72, },
 	{ 9, 1, 1, 2, 54, 64, },
-	{ 0, 1, 1, 2, 62, 64, },
+	{ 0, 1, 1, 2, 62, 60, },
 	{ 2, 1, 1, 2, 62, 64, },
 	{ 1, 1, 1, 2, 62, 62, },
-	{ 3, 1, 1, 2, 62, 64, },
-	{ 4, 1, 1, 2, 62, 70, },
+	{ 3, 1, 1, 2, 62, 60, },
+	{ 4, 1, 1, 2, 62, 60, },
 	{ 5, 1, 1, 2, 62, 64, },
 	{ 6, 1, 1, 2, 62, 64, },
 	{ 7, 1, 1, 2, 62, 54, },
 	{ 8, 1, 1, 2, 62, 64, },
 	{ 9, 1, 1, 2, 62, 64, },
-	{ 0, 1, 1, 2, 102, 58, },
+	{ 0, 1, 1, 2, 102, 60, },
 	{ 2, 1, 1, 2, 102, 64, },
 	{ 1, 1, 1, 2, 102, 72, },
-	{ 3, 1, 1, 2, 102, 58, },
-	{ 4, 1, 1, 2, 102, 72, },
+	{ 3, 1, 1, 2, 102, 60, },
+	{ 4, 1, 1, 2, 102, 64, },
 	{ 5, 1, 1, 2, 102, 64, },
 	{ 6, 1, 1, 2, 102, 58, },
 	{ 7, 1, 1, 2, 102, 54, },
@@ -43459,7 +43459,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 9, 1, 1, 2, 134, 127, },
 	{ 0, 1, 1, 2, 142, 72, },
 	{ 2, 1, 1, 2, 142, 127, },
-	{ 1, 1, 1, 2, 142, 127, },
+	{ 1, 1, 1, 2, 142, 72, },
 	{ 3, 1, 1, 2, 142, 72, },
 	{ 4, 1, 1, 2, 142, 72, },
 	{ 5, 1, 1, 2, 142, 127, },
@@ -43468,7 +43468,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 8, 1, 1, 2, 142, 72, },
 	{ 9, 1, 1, 2, 142, 127, },
 	{ 0, 1, 1, 2, 151, 72, },
-	{ 2, 1, 1, 2, 151, -128, },
+	{ 2, 1, 1, 2, 151, 28, },
 	{ 1, 1, 1, 2, 151, 127, },
 	{ 3, 1, 1, 2, 151, 72, },
 	{ 4, 1, 1, 2, 151, 72, },
@@ -43476,9 +43476,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 6, 1, 1, 2, 151, 72, },
 	{ 7, 1, 1, 2, 151, 54, },
 	{ 8, 1, 1, 2, 151, 72, },
-	{ 9, 1, 1, 2, 151, -128, },
+	{ 9, 1, 1, 2, 151, 28, },
 	{ 0, 1, 1, 2, 159, 72, },
-	{ 2, 1, 1, 2, 159, -128, },
+	{ 2, 1, 1, 2, 159, 28, },
 	{ 1, 1, 1, 2, 159, 127, },
 	{ 3, 1, 1, 2, 159, 72, },
 	{ 4, 1, 1, 2, 159, 72, },
@@ -43486,12 +43486,12 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 6, 1, 1, 2, 159, 72, },
 	{ 7, 1, 1, 2, 159, 54, },
 	{ 8, 1, 1, 2, 159, 72, },
-	{ 9, 1, 1, 2, 159, -128, },
+	{ 9, 1, 1, 2, 159, 28, },
 	{ 0, 1, 1, 3, 38, 60, },
 	{ 2, 1, 1, 3, 38, 40, },
 	{ 1, 1, 1, 3, 38, 50, },
 	{ 3, 1, 1, 3, 38, 40, },
-	{ 4, 1, 1, 3, 38, 62, },
+	{ 4, 1, 1, 3, 38, 54, },
 	{ 5, 1, 1, 3, 38, 40, },
 	{ 6, 1, 1, 3, 38, 52, },
 	{ 7, 1, 1, 3, 38, 30, },
@@ -43501,7 +43501,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 2, 1, 1, 3, 46, 40, },
 	{ 1, 1, 1, 3, 46, 50, },
 	{ 3, 1, 1, 3, 46, 40, },
-	{ 4, 1, 1, 3, 46, 46, },
+	{ 4, 1, 1, 3, 46, 54, },
 	{ 5, 1, 1, 3, 46, 40, },
 	{ 6, 1, 1, 3, 46, 52, },
 	{ 7, 1, 1, 3, 46, 30, },
@@ -43511,7 +43511,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 2, 1, 1, 3, 54, 40, },
 	{ 1, 1, 1, 3, 54, 50, },
 	{ 3, 1, 1, 3, 54, 40, },
-	{ 4, 1, 1, 3, 54, 62, },
+	{ 4, 1, 1, 3, 54, 66, },
 	{ 5, 1, 1, 3, 54, 40, },
 	{ 6, 1, 1, 3, 54, 68, },
 	{ 7, 1, 1, 3, 54, 30, },
@@ -43521,17 +43521,17 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 2, 1, 1, 3, 62, 40, },
 	{ 1, 1, 1, 3, 62, 48, },
 	{ 3, 1, 1, 3, 62, 40, },
-	{ 4, 1, 1, 3, 62, 58, },
+	{ 4, 1, 1, 3, 62, 50, },
 	{ 5, 1, 1, 3, 62, 40, },
 	{ 6, 1, 1, 3, 62, 58, },
 	{ 7, 1, 1, 3, 62, 30, },
 	{ 8, 1, 1, 3, 62, 58, },
 	{ 9, 1, 1, 3, 62, 40, },
-	{ 0, 1, 1, 3, 102, 54, },
+	{ 0, 1, 1, 3, 102, 56, },
 	{ 2, 1, 1, 3, 102, 40, },
 	{ 1, 1, 1, 3, 102, 70, },
-	{ 3, 1, 1, 3, 102, 54, },
-	{ 4, 1, 1, 3, 102, 64, },
+	{ 3, 1, 1, 3, 102, 56, },
+	{ 4, 1, 1, 3, 102, 54, },
 	{ 5, 1, 1, 3, 102, 40, },
 	{ 6, 1, 1, 3, 102, 54, },
 	{ 7, 1, 1, 3, 102, 30, },
@@ -43541,7 +43541,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 2, 1, 1, 3, 110, 40, },
 	{ 1, 1, 1, 3, 110, 70, },
 	{ 3, 1, 1, 3, 110, 68, },
-	{ 4, 1, 1, 3, 110, 64, },
+	{ 4, 1, 1, 3, 110, 66, },
 	{ 5, 1, 1, 3, 110, 40, },
 	{ 6, 1, 1, 3, 110, 68, },
 	{ 7, 1, 1, 3, 110, 30, },
@@ -43551,7 +43551,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 2, 1, 1, 3, 118, 40, },
 	{ 1, 1, 1, 3, 118, 70, },
 	{ 3, 1, 1, 3, 118, 127, },
-	{ 4, 1, 1, 3, 118, 64, },
+	{ 4, 1, 1, 3, 118, 66, },
 	{ 5, 1, 1, 3, 118, 127, },
 	{ 6, 1, 1, 3, 118, 68, },
 	{ 7, 1, 1, 3, 118, 30, },
@@ -43561,7 +43561,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 2, 1, 1, 3, 126, 40, },
 	{ 1, 1, 1, 3, 126, 70, },
 	{ 3, 1, 1, 3, 126, 127, },
-	{ 4, 1, 1, 3, 126, 64, },
+	{ 4, 1, 1, 3, 126, 66, },
 	{ 5, 1, 1, 3, 126, 127, },
 	{ 6, 1, 1, 3, 126, 68, },
 	{ 7, 1, 1, 3, 126, 30, },
@@ -43571,7 +43571,7 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 2, 1, 1, 3, 134, 40, },
 	{ 1, 1, 1, 3, 134, 70, },
 	{ 3, 1, 1, 3, 134, 68, },
-	{ 4, 1, 1, 3, 134, 64, },
+	{ 4, 1, 1, 3, 134, 66, },
 	{ 5, 1, 1, 3, 134, 40, },
 	{ 6, 1, 1, 3, 134, 68, },
 	{ 7, 1, 1, 3, 134, 30, },
@@ -43579,16 +43579,16 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 9, 1, 1, 3, 134, 127, },
 	{ 0, 1, 1, 3, 142, 68, },
 	{ 2, 1, 1, 3, 142, 127, },
-	{ 1, 1, 1, 3, 142, 127, },
+	{ 1, 1, 1, 3, 142, 70, },
 	{ 3, 1, 1, 3, 142, 68, },
-	{ 4, 1, 1, 3, 142, 64, },
+	{ 4, 1, 1, 3, 142, 66, },
 	{ 5, 1, 1, 3, 142, 127, },
 	{ 6, 1, 1, 3, 142, 68, },
 	{ 7, 1, 1, 3, 142, 127, },
 	{ 8, 1, 1, 3, 142, 68, },
 	{ 9, 1, 1, 3, 142, 127, },
 	{ 0, 1, 1, 3, 151, 72, },
-	{ 2, 1, 1, 3, 151, -128, },
+	{ 2, 1, 1, 3, 151, 4, },
 	{ 1, 1, 1, 3, 151, 127, },
 	{ 3, 1, 1, 3, 151, 72, },
 	{ 4, 1, 1, 3, 151, 66, },
@@ -43596,9 +43596,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 6, 1, 1, 3, 151, 72, },
 	{ 7, 1, 1, 3, 151, 30, },
 	{ 8, 1, 1, 3, 151, 68, },
-	{ 9, 1, 1, 3, 151, -128, },
+	{ 9, 1, 1, 3, 151, 4, },
 	{ 0, 1, 1, 3, 159, 72, },
-	{ 2, 1, 1, 3, 159, -128, },
+	{ 2, 1, 1, 3, 159, 4, },
 	{ 1, 1, 1, 3, 159, 127, },
 	{ 3, 1, 1, 3, 159, 72, },
 	{ 4, 1, 1, 3, 159, 66, },
@@ -43606,32 +43606,32 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 6, 1, 1, 3, 159, 72, },
 	{ 7, 1, 1, 3, 159, 30, },
 	{ 8, 1, 1, 3, 159, 72, },
-	{ 9, 1, 1, 3, 159, -128, },
-	{ 0, 1, 2, 4, 42, 64, },
+	{ 9, 1, 1, 3, 159, 4, },
+	{ 0, 1, 2, 4, 42, 68, },
 	{ 2, 1, 2, 4, 42, 64, },
 	{ 1, 1, 2, 4, 42, 64, },
 	{ 3, 1, 2, 4, 42, 64, },
-	{ 4, 1, 2, 4, 42, 68, },
+	{ 4, 1, 2, 4, 42, 60, },
 	{ 5, 1, 2, 4, 42, 64, },
 	{ 6, 1, 2, 4, 42, 64, },
 	{ 7, 1, 2, 4, 42, 54, },
 	{ 8, 1, 2, 4, 42, 62, },
 	{ 9, 1, 2, 4, 42, 64, },
-	{ 0, 1, 2, 4, 58, 62, },
+	{ 0, 1, 2, 4, 58, 60, },
 	{ 2, 1, 2, 4, 58, 64, },
 	{ 1, 1, 2, 4, 58, 64, },
-	{ 3, 1, 2, 4, 58, 62, },
-	{ 4, 1, 2, 4, 58, 64, },
+	{ 3, 1, 2, 4, 58, 60, },
+	{ 4, 1, 2, 4, 58, 56, },
 	{ 5, 1, 2, 4, 58, 64, },
 	{ 6, 1, 2, 4, 58, 62, },
 	{ 7, 1, 2, 4, 58, 54, },
 	{ 8, 1, 2, 4, 58, 62, },
 	{ 9, 1, 2, 4, 58, 64, },
-	{ 0, 1, 2, 4, 106, 58, },
+	{ 0, 1, 2, 4, 106, 60, },
 	{ 2, 1, 2, 4, 106, 64, },
 	{ 1, 1, 2, 4, 106, 72, },
-	{ 3, 1, 2, 4, 106, 58, },
-	{ 4, 1, 2, 4, 106, 66, },
+	{ 3, 1, 2, 4, 106, 60, },
+	{ 4, 1, 2, 4, 106, 58, },
 	{ 5, 1, 2, 4, 106, 64, },
 	{ 6, 1, 2, 4, 106, 58, },
 	{ 7, 1, 2, 4, 106, 54, },
@@ -43649,84 +43649,84 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
 	{ 9, 1, 2, 4, 122, 127, },
 	{ 0, 1, 2, 4, 138, 72, },
 	{ 2, 1, 2, 4, 138, 127, },
-	{ 1, 1, 2, 4, 138, 127, },
+	{ 1, 1, 2, 4, 138, 72, },
 	{ 3, 1, 2, 4, 138, 72, },
-	{ 4, 1, 2, 4, 138, 68, },
+	{ 4, 1, 2, 4, 138, 70, },
 	{ 5, 1, 2, 4, 138, 127, },
 	{ 6, 1, 2, 4, 138, 72, },
 	{ 7, 1, 2, 4, 138, 127, },
 	{ 8, 1, 2, 4, 138, 72, },
 	{ 9, 1, 2, 4, 138, 127, },
 	{ 0, 1, 2, 4, 155, 72, },
-	{ 2, 1, 2, 4, 155, -128, },
+	{ 2, 1, 2, 4, 155, 28, },
 	{ 1, 1, 2, 4, 155, 127, },
 	{ 3, 1, 2, 4, 155, 72, },
-	{ 4, 1, 2, 4, 155, 68, },
+	{ 4, 1, 2, 4, 155, 62, },
 	{ 5, 1, 2, 4, 155, 72, },
 	{ 6, 1, 2, 4, 155, 72, },
 	{ 7, 1, 2, 4, 155, 54, },
 	{ 8, 1, 2, 4, 155, 68, },
-	{ 9, 1, 2, 4, 155, -128, },
-	{ 0, 1, 2, 5, 42, 54, },
+	{ 9, 1, 2, 4, 155, 28, },
+	{ 0, 1, 2, 5, 42, 56, },
 	{ 2, 1, 2, 5, 42, 40, },
 	{ 1, 1, 2, 5, 42, 50, },
 	{ 3, 1, 2, 5, 42, 40, },
-	{ 4, 1, 2, 5, 42, 58, },
+	{ 4, 1, 2, 5, 42, 50, },
 	{ 5, 1, 2, 5, 42, 40, },
 	{ 6, 1, 2, 5, 42, 52, },
 	{ 7, 1, 2, 5, 42, 30, },
 	{ 8, 1, 2, 5, 42, 50, },
 	{ 9, 1, 2, 5, 42, 40, },
-	{ 0, 1, 2, 5, 58, 52, },
+	{ 0, 1, 2, 5, 58, 54, },
 	{ 2, 1, 2, 5, 58, 40, },
 	{ 1, 1, 2, 5, 58, 50, },
 	{ 3, 1, 2, 5, 58, 40, },
-	{ 4, 1, 2, 5, 58, 56, },
+	{ 4, 1, 2, 5, 58, 46, },
 	{ 5, 1, 2, 5, 58, 40, },
 	{ 6, 1, 2, 5, 58, 52, },
 	{ 7, 1, 2, 5, 58, 30, },
 	{ 8, 1, 2, 5, 58, 52, },
 	{ 9, 1, 2, 5, 58, 40, },
-	{ 0, 1, 2, 5, 106, 50, },
+	{ 0, 1, 2, 5, 106, 48, },
 	{ 2, 1, 2, 5, 106, 40, },
 	{ 1, 1, 2, 5, 106, 72, },
-	{ 3, 1, 2, 5, 106, 50, },
-	{ 4, 1, 2, 5, 106, 56, },
+	{ 3, 1, 2, 5, 106, 48, },
+	{ 4, 1, 2, 5, 106, 50, },
 	{ 5, 1, 2, 5, 106, 40, },
 	{ 6, 1, 2, 5, 106, 50, },
 	{ 7, 1, 2, 5, 106, 30, },
 	{ 8, 1, 2, 5, 106, 50, },
 	{ 9, 1, 2, 5, 106, 127, },
-	{ 0, 1, 2, 5, 122, 66, },
+	{ 0, 1, 2, 5, 122, 70, },
 	{ 2, 1, 2, 5, 122, 40, },
 	{ 1, 1, 2, 5, 122, 72, },
 	{ 3, 1, 2, 5, 122, 127, },
-	{ 4, 1, 2, 5, 122, 56, },
+	{ 4, 1, 2, 5, 122, 62, },
 	{ 5, 1, 2, 5, 122, 127, },
 	{ 6, 1, 2, 5, 122, 66, },
 	{ 7, 1, 2, 5, 122, 30, },
 	{ 8, 1, 2, 5, 122, 66, },
 	{ 9, 1, 2, 5, 122, 127, },
-	{ 0, 1, 2, 5, 138, 66, },
+	{ 0, 1, 2, 5, 138, 70, },
 	{ 2, 1, 2, 5, 138, 127, },
-	{ 1, 1, 2, 5, 138, 127, },
-	{ 3, 1, 2, 5, 138, 66, },
-	{ 4, 1, 2, 5, 138, 58, },
+	{ 1, 1, 2, 5, 138, 72, },
+	{ 3, 1, 2, 5, 138, 70, },
+	{ 4, 1, 2, 5, 138, 62, },
 	{ 5, 1, 2, 5, 138, 127, },
 	{ 6, 1, 2, 5, 138, 66, },
 	{ 7, 1, 2, 5, 138, 127, },
 	{ 8, 1, 2, 5, 138, 66, },
 	{ 9, 1, 2, 5, 138, 127, },
-	{ 0, 1, 2, 5, 155, 62, },
-	{ 2, 1, 2, 5, 155, -128, },
+	{ 0, 1, 2, 5, 155, 72, },
+	{ 2, 1, 2, 5, 155, 4, },
 	{ 1, 1, 2, 5, 155, 127, },
-	{ 3, 1, 2, 5, 155, 62, },
-	{ 4, 1, 2, 5, 155, 58, },
+	{ 3, 1, 2, 5, 155, 72, },
+	{ 4, 1, 2, 5, 155, 52, },
 	{ 5, 1, 2, 5, 155, 72, },
 	{ 6, 1, 2, 5, 155, 62, },
 	{ 7, 1, 2, 5, 155, 30, },
 	{ 8, 1, 2, 5, 155, 62, },
-	{ 9, 1, 2, 5, 155, -128, },
+	{ 9, 1, 2, 5, 155, 4, },
 };
 
 RTW_DECL_TABLE_TXPWR_LMT(rtw8822c_txpwr_lmt_type5);
-- 
cgit v1.2.3


From 7ae7784ec2a812c07d2ca91a6538ef2470154fb6 Mon Sep 17 00:00:00 2001
From: Po-Hao Huang <phhuang@realtek.com>
Date: Fri, 19 Mar 2021 13:42:16 +0800
Subject: rtw88: 8822c: add LC calibration for RTL8822C

Fix power tracking issue by replacing unnecessary IQ calibration
with LC calibration.
When thermal difference exceeds limitation, let RF circuit adjsut
its characteristic to fit in current environment.

Signed-off-by: Po-Hao Huang <phhuang@realtek.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210319054218.3319-6-pkshih@realtek.com
---
 drivers/net/wireless/realtek/rtw88/main.h     |  2 ++
 drivers/net/wireless/realtek/rtw88/phy.c      | 14 ++++++++++++++
 drivers/net/wireless/realtek/rtw88/phy.h      |  1 +
 drivers/net/wireless/realtek/rtw88/reg.h      |  5 +++++
 drivers/net/wireless/realtek/rtw88/rtw8822c.c | 27 +++++++++++++++++++++++++--
 5 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index d185209ee3cc..98b18daae1fb 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -1167,6 +1167,7 @@ struct rtw_chip_info {
 	bool en_dis_dpd;
 	u16 dpd_ratemask;
 	u8 iqk_threshold;
+	u8 lck_threshold;
 	const struct rtw_pwr_track_tbl *pwr_track_tbl;
 
 	u8 bfer_su_max_num;
@@ -1535,6 +1536,7 @@ struct rtw_dm_info {
 	u32 rrsr_mask_min;
 	u8 thermal_avg[RTW_RF_PATH_MAX];
 	u8 thermal_meter_k;
+	u8 thermal_meter_lck;
 	s8 delta_power_index[RTW_RF_PATH_MAX];
 	s8 delta_power_index_last[RTW_RF_PATH_MAX];
 	u8 default_ofdm_index;
diff --git a/drivers/net/wireless/realtek/rtw88/phy.c b/drivers/net/wireless/realtek/rtw88/phy.c
index f96edc049718..b3c0a38771a7 100644
--- a/drivers/net/wireless/realtek/rtw88/phy.c
+++ b/drivers/net/wireless/realtek/rtw88/phy.c
@@ -2220,6 +2220,20 @@ s8 rtw_phy_pwrtrack_get_pwridx(struct rtw_dev *rtwdev,
 }
 EXPORT_SYMBOL(rtw_phy_pwrtrack_get_pwridx);
 
+bool rtw_phy_pwrtrack_need_lck(struct rtw_dev *rtwdev)
+{
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+	u8 delta_lck;
+
+	delta_lck = abs(dm_info->thermal_avg[0] - dm_info->thermal_meter_lck);
+	if (delta_lck >= rtwdev->chip->lck_threshold) {
+		dm_info->thermal_meter_lck = dm_info->thermal_avg[0];
+		return true;
+	}
+	return false;
+}
+EXPORT_SYMBOL(rtw_phy_pwrtrack_need_lck);
+
 bool rtw_phy_pwrtrack_need_iqk(struct rtw_dev *rtwdev)
 {
 	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
diff --git a/drivers/net/wireless/realtek/rtw88/phy.h b/drivers/net/wireless/realtek/rtw88/phy.h
index a4fcfb878550..a0742a69446d 100644
--- a/drivers/net/wireless/realtek/rtw88/phy.h
+++ b/drivers/net/wireless/realtek/rtw88/phy.h
@@ -55,6 +55,7 @@ u8 rtw_phy_pwrtrack_get_delta(struct rtw_dev *rtwdev, u8 path);
 s8 rtw_phy_pwrtrack_get_pwridx(struct rtw_dev *rtwdev,
 			       struct rtw_swing_table *swing_table,
 			       u8 tbl_path, u8 therm_path, u8 delta);
+bool rtw_phy_pwrtrack_need_lck(struct rtw_dev *rtwdev);
 bool rtw_phy_pwrtrack_need_iqk(struct rtw_dev *rtwdev);
 void rtw_phy_config_swing_table(struct rtw_dev *rtwdev,
 				struct rtw_swing_table *swing_table);
diff --git a/drivers/net/wireless/realtek/rtw88/reg.h b/drivers/net/wireless/realtek/rtw88/reg.h
index e3a981ad01a6..a85fe29f13f7 100644
--- a/drivers/net/wireless/realtek/rtw88/reg.h
+++ b/drivers/net/wireless/realtek/rtw88/reg.h
@@ -653,8 +653,13 @@
 #define RF_TXATANK	0x64
 #define RF_TRXIQ	0x66
 #define RF_RXIQGEN	0x8d
+#define RF_SYN_PFD	0xb0
 #define RF_XTALX2	0xb8
+#define RF_SYN_CTRL	0xbb
 #define RF_MALSEL	0xbe
+#define RF_SYN_AAC	0xc9
+#define RF_AAC_CTRL	0xca
+#define RF_FAST_LCK	0xcc
 #define RF_RCKD		0xde
 #define RF_TXADBG	0xde
 #define RF_LUTDBG	0xdf
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
index a1f1da3db029..9453a20f966a 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
@@ -1126,6 +1126,7 @@ static void rtw8822c_pwrtrack_init(struct rtw_dev *rtwdev)
 
 	dm_info->pwr_trk_triggered = false;
 	dm_info->thermal_meter_k = rtwdev->efuse.thermal_meter_k;
+	dm_info->thermal_meter_lck = rtwdev->efuse.thermal_meter_k;
 }
 
 static void rtw8822c_phy_set_param(struct rtw_dev *rtwdev)
@@ -2117,6 +2118,26 @@ static void rtw8822c_false_alarm_statistics(struct rtw_dev *rtwdev)
 	rtw_write32_set(rtwdev, REG_RX_BREAK, BIT_COM_RX_GCK_EN);
 }
 
+static void rtw8822c_do_lck(struct rtw_dev *rtwdev)
+{
+	u32 val;
+
+	rtw_write_rf(rtwdev, RF_PATH_A, RF_SYN_CTRL, RFREG_MASK, 0x80010);
+	rtw_write_rf(rtwdev, RF_PATH_A, RF_SYN_PFD, RFREG_MASK, 0x1F0FA);
+	fsleep(1);
+	rtw_write_rf(rtwdev, RF_PATH_A, RF_AAC_CTRL, RFREG_MASK, 0x80000);
+	rtw_write_rf(rtwdev, RF_PATH_A, RF_SYN_AAC, RFREG_MASK, 0x80001);
+	read_poll_timeout(rtw_read_rf, val, val != 0x1, 1000, 100000,
+			  true, rtwdev, RF_PATH_A, RF_AAC_CTRL, 0x1000);
+	rtw_write_rf(rtwdev, RF_PATH_A, RF_SYN_PFD, RFREG_MASK, 0x1F0F8);
+	rtw_write_rf(rtwdev, RF_PATH_B, RF_SYN_CTRL, RFREG_MASK, 0x80010);
+
+	rtw_write_rf(rtwdev, RF_PATH_A, RF_FAST_LCK, RFREG_MASK, 0x0f000);
+	rtw_write_rf(rtwdev, RF_PATH_A, RF_FAST_LCK, RFREG_MASK, 0x4f000);
+	fsleep(1);
+	rtw_write_rf(rtwdev, RF_PATH_A, RF_FAST_LCK, RFREG_MASK, 0x0f000);
+}
+
 static void rtw8822c_do_iqk(struct rtw_dev *rtwdev)
 {
 	struct rtw_iqk_para para = {0};
@@ -3547,11 +3568,12 @@ static void __rtw8822c_pwr_track(struct rtw_dev *rtwdev)
 
 	rtw_phy_config_swing_table(rtwdev, &swing_table);
 
+	if (rtw_phy_pwrtrack_need_lck(rtwdev))
+		rtw8822c_do_lck(rtwdev);
+
 	for (i = 0; i < rtwdev->hal.rf_path_num; i++)
 		rtw8822c_pwr_track_path(rtwdev, &swing_table, i);
 
-	if (rtw_phy_pwrtrack_need_iqk(rtwdev))
-		rtw8822c_do_iqk(rtwdev);
 }
 
 static void rtw8822c_pwr_track(struct rtw_dev *rtwdev)
@@ -4361,6 +4383,7 @@ struct rtw_chip_info rtw8822c_hw_spec = {
 	.dpd_ratemask = DIS_DPD_RATEALL,
 	.pwr_track_tbl = &rtw8822c_rtw_pwr_track_tbl,
 	.iqk_threshold = 8,
+	.lck_threshold = 8,
 	.bfer_su_max_num = 2,
 	.bfer_mu_max_num = 1,
 	.rx_ldpc = true,
-- 
cgit v1.2.3


From 4517f811258d81a4666a7d0228a774dfbdb1e4fd Mon Sep 17 00:00:00 2001
From: Ching-Te Ku <ku920601@realtek.com>
Date: Fri, 19 Mar 2021 13:42:18 +0800
Subject: rtw88: coex: fix A2DP stutters while WL busy + WL scan

While WL scan, WL is more high priority than BT. The packets from AP
will be a big interference to A2DP. It will lead to A2DP stutters. Stop
answering CTS to AP to decrease AP's packets Tx while WL scan + WL busy.
Enable BT AFH feature to make BT leave away from WL channel.

Desired BT firmware BT-COEX version: 0x1c
Desired WL firmware version: 9.9.x

Signed-off-by: Ching-Te Ku <ku920601@realtek.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210319054218.3319-8-pkshih@realtek.com
---
 drivers/net/wireless/realtek/rtw88/coex.c     | 8 +++++---
 drivers/net/wireless/realtek/rtw88/rtw8822c.c | 2 +-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/coex.c b/drivers/net/wireless/realtek/rtw88/coex.c
index 7eee2c5ecb11..cedbf3825848 100644
--- a/drivers/net/wireless/realtek/rtw88/coex.c
+++ b/drivers/net/wireless/realtek/rtw88/coex.c
@@ -787,7 +787,6 @@ static void rtw_coex_update_wl_ch_info(struct rtw_dev *rtwdev, u8 type)
 {
 	struct rtw_chip_info *chip = rtwdev->chip;
 	struct rtw_coex_dm *coex_dm = &rtwdev->coex.dm;
-	struct rtw_efuse *efuse = &rtwdev->efuse;
 	u8 link = 0;
 	u8 center_chan = 0;
 	u8 bw;
@@ -798,7 +797,7 @@ static void rtw_coex_update_wl_ch_info(struct rtw_dev *rtwdev, u8 type)
 	if (type != COEX_MEDIA_DISCONNECT)
 		center_chan = rtwdev->hal.current_channel;
 
-	if (center_chan == 0 || (efuse->share_ant && center_chan <= 14)) {
+	if (center_chan == 0) {
 		link = 0;
 		center_chan = 0;
 		bw = 0;
@@ -2325,8 +2324,11 @@ static void rtw_coex_action_wl_linkscan(struct rtw_dev *rtwdev)
 	if (efuse->share_ant) { /* Shared-Ant */
 		if (coex_stat->bt_a2dp_exist) {
 			slot_type = TDMA_4SLOT;
-			table_case = 9;
 			tdma_case = 11;
+			if (coex_stat->wl_gl_busy)
+				table_case = 26;
+			else
+				table_case = 9;
 		} else {
 			table_case = 9;
 			tdma_case = 7;
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
index 9453a20f966a..07ff0d442e5b 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
@@ -4393,7 +4393,7 @@ struct rtw_chip_info rtw8822c_hw_spec = {
 	.wowlan_stub = &rtw_wowlan_stub_8822c,
 	.max_sched_scan_ssids = 4,
 #endif
-	.coex_para_ver = 0x201029,
+	.coex_para_ver = 0x2103181c,
 	.bt_desired_ver = 0x1c,
 	.scbd_support = true,
 	.new_scbd10_def = true,
-- 
cgit v1.2.3


From c9eaee0c2ec6b1002044fb698cdfb5d9ef4ed28c Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 15 Mar 2021 12:23:37 +0300
Subject: rtw88: Fix an error code in rtw_debugfs_set_rsvd_page()

The sscanf() function returns the number of matches (0 or 1 in this
case).  It doesn't return error codes.  We should return -EINVAL if the
string is invalid

Fixes: c376c1fc87b7 ("rtw88: add h2c command in debugfs")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/YE8nmatMDBDDWkjq@mwanda
---
 drivers/net/wireless/realtek/rtw88/debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtw88/debug.c b/drivers/net/wireless/realtek/rtw88/debug.c
index 4539b673f6fd..5c44fa87ed2e 100644
--- a/drivers/net/wireless/realtek/rtw88/debug.c
+++ b/drivers/net/wireless/realtek/rtw88/debug.c
@@ -271,7 +271,7 @@ static ssize_t rtw_debugfs_set_rsvd_page(struct file *filp,
 
 	if (num != 2) {
 		rtw_warn(rtwdev, "invalid arguments\n");
-		return num;
+		return -EINVAL;
 	}
 
 	debugfs_priv->rsvd_page.page_offset = offset;
-- 
cgit v1.2.3


From 87fce88658ba047ae62e83497d3f3c5dc22fa6f9 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 25 Feb 2021 18:32:41 +0000
Subject: mt7601u: fix always true expression

Currently the expression ~nic_conf1 is always true because nic_conf1
is a u16 and according to 6.5.3.3 of the C standard the ~ operator
promotes the u16 to an integer before flipping all the bits. Thus
the top 16 bits of the integer result are all set so the expression
is always true.  If the intention was to flip all the bits of nic_conf1
then casting the integer result back to a u16 is a suitabel fix.

Interestingly static analyzers seem to thing a bitwise ! should be
used instead of ~ for this scenario, so I think the original intent
of the expression may need some extra consideration.

Addresses-Coverity: ("Logical vs. bitwise operator")
Fixes: c869f77d6abb ("add mt7601u driver")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Jakub Kicinski <kubakici@wp.pl>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210225183241.1002129-1-colin.king@canonical.com
---
 drivers/net/wireless/mediatek/mt7601u/eeprom.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt7601u/eeprom.c b/drivers/net/wireless/mediatek/mt7601u/eeprom.c
index c868582c5d22..aa3b64902cf9 100644
--- a/drivers/net/wireless/mediatek/mt7601u/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt7601u/eeprom.c
@@ -99,7 +99,7 @@ mt7601u_has_tssi(struct mt7601u_dev *dev, u8 *eeprom)
 {
 	u16 nic_conf1 = get_unaligned_le16(eeprom + MT_EE_NIC_CONF_1);
 
-	return ~nic_conf1 && (nic_conf1 & MT_EE_NIC_CONF_1_TX_ALC_EN);
+	return (u16)~nic_conf1 && (nic_conf1 & MT_EE_NIC_CONF_1_TX_ALC_EN);
 }
 
 static void
-- 
cgit v1.2.3


From 670d9e53886c942b7f29c475b2b494278e586921 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 28 Feb 2021 19:23:09 +0100
Subject: mt7601u: enable TDLS support

Notify mac80211 the mt7601u chipset support 802.11 TDLS. The feature has
been tested with a mt7610u peer.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/8f9fd662cdbbb70ba896f1bea80e696b15011d3f.1614536496.git.lorenzo@kernel.org
---
 drivers/net/wireless/mediatek/mt7601u/init.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/mediatek/mt7601u/init.c b/drivers/net/wireless/mediatek/mt7601u/init.c
index cada48800928..5d9e952b2966 100644
--- a/drivers/net/wireless/mediatek/mt7601u/init.c
+++ b/drivers/net/wireless/mediatek/mt7601u/init.c
@@ -610,6 +610,7 @@ int mt7601u_register_device(struct mt7601u_dev *dev)
 
 	wiphy->features |= NL80211_FEATURE_ACTIVE_MONITOR;
 	wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION);
+	wiphy->flags |= WIPHY_FLAG_SUPPORTS_TDLS;
 
 	wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST);
 
-- 
cgit v1.2.3


From fa9f5d0e0b45a06802f7cb3afed237be6066821e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 22 Mar 2021 11:43:33 +0100
Subject: iwlegacy: avoid -Wempty-body warning

There are a couple of warnings in this driver when building with W=1:

drivers/net/wireless/intel/iwlegacy/common.c: In function 'il_power_set_mode':
drivers/net/wireless/intel/iwlegacy/common.c:1195:60: error: suggest braces around empty body in an 'if' statement [-Werror=empty-body]
 1195 |                                 il->chain_noise_data.state);
      |                                                            ^
drivers/net/wireless/intel/iwlegacy/common.c: In function 'il_do_scan_abort':
drivers/net/wireless/intel/iwlegacy/common.c:1343:57: error: suggest braces around empty body in an 'else' statement [-Werror=empty-body]

Change the empty debug macros to no_printk(), which avoids the
warnings and adds useful format string checks.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Stanislaw Gruszka <stf_xl@wp.pl>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210322104343.948660-3-arnd@kernel.org
---
 drivers/net/wireless/intel/iwlegacy/3945-mac.c | 2 --
 drivers/net/wireless/intel/iwlegacy/common.c   | 2 --
 drivers/net/wireless/intel/iwlegacy/common.h   | 2 +-
 3 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c
index 4ca8212d4fa4..6ff2674f8466 100644
--- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c
+++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c
@@ -751,9 +751,7 @@ il3945_hdl_alive(struct il_priv *il, struct il_rx_buf *rxb)
 static void
 il3945_hdl_add_sta(struct il_priv *il, struct il_rx_buf *rxb)
 {
-#ifdef CONFIG_IWLEGACY_DEBUG
 	struct il_rx_pkt *pkt = rxb_addr(rxb);
-#endif
 
 	D_RX("Received C_ADD_STA: 0x%02X\n", pkt->u.status);
 }
diff --git a/drivers/net/wireless/intel/iwlegacy/common.c b/drivers/net/wireless/intel/iwlegacy/common.c
index 0651a6a416d1..219fed91cac5 100644
--- a/drivers/net/wireless/intel/iwlegacy/common.c
+++ b/drivers/net/wireless/intel/iwlegacy/common.c
@@ -1430,10 +1430,8 @@ static void
 il_hdl_scan_complete(struct il_priv *il, struct il_rx_buf *rxb)
 {
 
-#ifdef CONFIG_IWLEGACY_DEBUG
 	struct il_rx_pkt *pkt = rxb_addr(rxb);
 	struct il_scancomplete_notification *scan_notif = (void *)pkt->u.raw;
-#endif
 
 	D_SCAN("Scan complete: %d channels (TSF 0x%08X:%08X) - %d\n",
 	       scan_notif->scanned_channels, scan_notif->tsf_low,
diff --git a/drivers/net/wireless/intel/iwlegacy/common.h b/drivers/net/wireless/intel/iwlegacy/common.h
index ea1b1bb7ddcb..40877ef1fbf2 100644
--- a/drivers/net/wireless/intel/iwlegacy/common.h
+++ b/drivers/net/wireless/intel/iwlegacy/common.h
@@ -2937,7 +2937,7 @@ do {									\
 } while (0)
 
 #else
-#define IL_DBG(level, fmt, args...)
+#define IL_DBG(level, fmt, args...) no_printk(fmt, ##args)
 static inline void
 il_print_hex_dump(struct il_priv *il, int level, const void *p, u32 len)
 {
-- 
cgit v1.2.3


From e195dad1411594809cb955c96a7a514bcc4ad638 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 16 Dec 2020 16:05:51 +0100
Subject: mt76: add support for 802.3 rx frames

Do not try to access the header when receiving them

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/agg-rx.c      | 13 +++++++------
 drivers/net/wireless/mediatek/mt76/mac80211.c    | 23 +++++++++++++++++------
 drivers/net/wireless/mediatek/mt76/mt76.h        |  2 +-
 drivers/net/wireless/mediatek/mt76/mt7603/mac.c  |  2 +-
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c  |  2 +-
 drivers/net/wireless/mediatek/mt76/mt76x02_mac.c |  4 +++-
 drivers/net/wireless/mediatek/mt76/mt7915/mac.c  |  2 +-
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c  |  2 +-
 8 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/agg-rx.c b/drivers/net/wireless/mediatek/mt76/agg-rx.c
index df25c00d9e06..144e8a8910ba 100644
--- a/drivers/net/wireless/mediatek/mt76/agg-rx.c
+++ b/drivers/net/wireless/mediatek/mt76/agg-rx.c
@@ -122,6 +122,7 @@ mt76_rx_aggr_check_ctl(struct sk_buff *skb, struct sk_buff_head *frames)
 	struct ieee80211_bar *bar = mt76_skb_get_hdr(skb);
 	struct mt76_wcid *wcid = status->wcid;
 	struct mt76_rx_tid *tid;
+	u8 tidno = status->qos_ctl & IEEE80211_QOS_CTL_TID_MASK;
 	u16 seqno;
 
 	if (!ieee80211_is_ctl(bar->frame_control))
@@ -130,9 +131,9 @@ mt76_rx_aggr_check_ctl(struct sk_buff *skb, struct sk_buff_head *frames)
 	if (!ieee80211_is_back_req(bar->frame_control))
 		return;
 
-	status->tid = le16_to_cpu(bar->control) >> 12;
+	status->qos_ctl = tidno = le16_to_cpu(bar->control) >> 12;
 	seqno = IEEE80211_SEQ_TO_SN(le16_to_cpu(bar->start_seq_num));
-	tid = rcu_dereference(wcid->aggr[status->tid]);
+	tid = rcu_dereference(wcid->aggr[tidno]);
 	if (!tid)
 		return;
 
@@ -147,12 +148,12 @@ mt76_rx_aggr_check_ctl(struct sk_buff *skb, struct sk_buff_head *frames)
 void mt76_rx_aggr_reorder(struct sk_buff *skb, struct sk_buff_head *frames)
 {
 	struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb;
-	struct ieee80211_hdr *hdr = mt76_skb_get_hdr(skb);
 	struct mt76_wcid *wcid = status->wcid;
 	struct ieee80211_sta *sta;
 	struct mt76_rx_tid *tid;
 	bool sn_less;
 	u16 seqno, head, size, idx;
+	u8 tidno = status->qos_ctl & IEEE80211_QOS_CTL_TID_MASK;
 	u8 ackp;
 
 	__skb_queue_tail(frames, skb);
@@ -161,18 +162,18 @@ void mt76_rx_aggr_reorder(struct sk_buff *skb, struct sk_buff_head *frames)
 	if (!sta)
 		return;
 
-	if (!status->aggr) {
+	if (!status->aggr && !(status->flag & RX_FLAG_8023)) {
 		mt76_rx_aggr_check_ctl(skb, frames);
 		return;
 	}
 
 	/* not part of a BA session */
-	ackp = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_ACK_POLICY_MASK;
+	ackp = status->qos_ctl & IEEE80211_QOS_CTL_ACK_POLICY_MASK;
 	if (ackp != IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK &&
 	    ackp != IEEE80211_QOS_CTL_ACK_POLICY_NORMAL)
 		return;
 
-	tid = rcu_dereference(wcid->aggr[status->tid]);
+	tid = rcu_dereference(wcid->aggr[tidno]);
 	if (!tid)
 		return;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
index 696d00d1976c..f9381bd96244 100644
--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
+++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
@@ -737,6 +737,7 @@ mt76_check_ccmp_pn(struct sk_buff *skb)
 	struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb;
 	struct mt76_wcid *wcid = status->wcid;
 	struct ieee80211_hdr *hdr;
+	u8 tidno = status->qos_ctl & IEEE80211_QOS_CTL_TID_MASK;
 	int ret;
 
 	if (!(status->flag & RX_FLAG_DECRYPTED))
@@ -757,12 +758,12 @@ mt76_check_ccmp_pn(struct sk_buff *skb)
 	}
 
 	BUILD_BUG_ON(sizeof(status->iv) != sizeof(wcid->rx_key_pn[0]));
-	ret = memcmp(status->iv, wcid->rx_key_pn[status->tid],
+	ret = memcmp(status->iv, wcid->rx_key_pn[tidno],
 		     sizeof(status->iv));
 	if (ret <= 0)
 		return -EINVAL; /* replay */
 
-	memcpy(wcid->rx_key_pn[status->tid], status->iv, sizeof(status->iv));
+	memcpy(wcid->rx_key_pn[tidno], status->iv, sizeof(status->iv));
 
 	if (status->flag & RX_FLAG_IV_STRIPPED)
 		status->flag |= RX_FLAG_PN_VALIDATED;
@@ -785,6 +786,7 @@ mt76_airtime_report(struct mt76_dev *dev, struct mt76_rx_status *status,
 	};
 	struct ieee80211_sta *sta;
 	u32 airtime;
+	u8 tidno = status->qos_ctl & IEEE80211_QOS_CTL_TID_MASK;
 
 	airtime = ieee80211_calc_rx_airtime(dev->hw, &info, len);
 	spin_lock(&dev->cc_lock);
@@ -795,7 +797,7 @@ mt76_airtime_report(struct mt76_dev *dev, struct mt76_rx_status *status,
 		return;
 
 	sta = container_of((void *)wcid, struct ieee80211_sta, drv_priv);
-	ieee80211_sta_register_airtime(sta, status->tid, 0, airtime);
+	ieee80211_sta_register_airtime(sta, tidno, 0, airtime);
 }
 
 static void
@@ -823,7 +825,6 @@ mt76_airtime_flush_ampdu(struct mt76_dev *dev)
 static void
 mt76_airtime_check(struct mt76_dev *dev, struct sk_buff *skb)
 {
-	struct ieee80211_hdr *hdr = mt76_skb_get_hdr(skb);
 	struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb;
 	struct mt76_wcid *wcid = status->wcid;
 
@@ -831,6 +832,11 @@ mt76_airtime_check(struct mt76_dev *dev, struct sk_buff *skb)
 		return;
 
 	if (!wcid || !wcid->sta) {
+		struct ieee80211_hdr *hdr = mt76_skb_get_hdr(skb);
+
+		if (status->flag & RX_FLAG_8023)
+			return;
+
 		if (!ether_addr_equal(hdr->addr1, dev->phy.macaddr))
 			return;
 
@@ -864,10 +870,12 @@ mt76_check_sta(struct mt76_dev *dev, struct sk_buff *skb)
 	struct ieee80211_sta *sta;
 	struct ieee80211_hw *hw;
 	struct mt76_wcid *wcid = status->wcid;
+	u8 tidno = status->qos_ctl & IEEE80211_QOS_CTL_TID_MASK;
 	bool ps;
 
 	hw = mt76_phy_hw(dev, status->ext_phy);
-	if (ieee80211_is_pspoll(hdr->frame_control) && !wcid) {
+	if (ieee80211_is_pspoll(hdr->frame_control) && !wcid &&
+	    !(status->flag & RX_FLAG_8023)) {
 		sta = ieee80211_find_sta_by_ifaddr(hw, hdr->addr2, NULL);
 		if (sta)
 			wcid = status->wcid = (struct mt76_wcid *)sta->drv_priv;
@@ -885,6 +893,9 @@ mt76_check_sta(struct mt76_dev *dev, struct sk_buff *skb)
 
 	wcid->inactive_count = 0;
 
+	if (status->flag & RX_FLAG_8023)
+		return;
+
 	if (!test_bit(MT_WCID_FLAG_CHECK_PS, &wcid->flags))
 		return;
 
@@ -902,7 +913,7 @@ mt76_check_sta(struct mt76_dev *dev, struct sk_buff *skb)
 
 	if (ps && (ieee80211_is_data_qos(hdr->frame_control) ||
 		   ieee80211_is_qos_nullfunc(hdr->frame_control)))
-		ieee80211_sta_uapsd_trigger(sta, status->tid);
+		ieee80211_sta_uapsd_trigger(sta, tidno);
 
 	if (!!test_bit(MT_WCID_FLAG_PS, &wcid->flags) == ps)
 		return;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 8bf45497cfca..a3455e2e1545 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -499,7 +499,7 @@ struct mt76_rx_status {
 
 	u8 ext_phy:1;
 	u8 aggr:1;
-	u8 tid;
+	u8 qos_ctl;
 	u16 seqno;
 
 	u16 freq;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
index cc4e7bc48294..c29b60a87819 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
@@ -651,7 +651,7 @@ mt7603_mac_fill_rx(struct mt7603_dev *dev, struct sk_buff *skb)
 
 	status->aggr = unicast &&
 		       !ieee80211_is_qos_nullfunc(hdr->frame_control);
-	status->tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+	status->qos_ctl = *ieee80211_get_qos_ctl(hdr);
 	status->seqno = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl));
 
 	return 0;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 59fdd0fc2ad4..6a835568f4a3 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -458,7 +458,7 @@ static int mt7615_mac_fill_rx(struct mt7615_dev *dev, struct sk_buff *skb)
 
 	status->aggr = unicast &&
 		       !ieee80211_is_qos_nullfunc(hdr->frame_control);
-	status->tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+	status->qos_ctl = *ieee80211_get_qos_ctl(hdr);
 	status->seqno = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl));
 
 	return 0;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
index 771bad60e1bc..0da37867cb64 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
@@ -770,6 +770,7 @@ int mt76x02_mac_process_rx(struct mt76x02_dev *dev, struct sk_buff *skb,
 			   void *rxi)
 {
 	struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb;
+	struct ieee80211_hdr *hdr;
 	struct mt76x02_rxwi *rxwi = rxi;
 	struct mt76x02_sta *sta;
 	u32 rxinfo = le32_to_cpu(rxwi->rxinfo);
@@ -864,7 +865,8 @@ int mt76x02_mac_process_rx(struct mt76x02_dev *dev, struct sk_buff *skb,
 	status->freq = dev->mphy.chandef.chan->center_freq;
 	status->band = dev->mphy.chandef.chan->band;
 
-	status->tid = FIELD_GET(MT_RXWI_TID, tid_sn);
+	hdr = (struct ieee80211_hdr *)skb->data;
+	status->qos_ctl = *ieee80211_get_qos_ctl(hdr);
 	status->seqno = FIELD_GET(MT_RXWI_SN, tid_sn);
 
 	return mt76x02_mac_process_rate(dev, status, rate);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index e5a258958ac9..63524b48e291 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -556,7 +556,7 @@ int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
 
 	status->aggr = unicast &&
 		       !ieee80211_is_qos_nullfunc(hdr->frame_control);
-	status->tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+	status->qos_ctl = *ieee80211_get_qos_ctl(hdr);
 	status->seqno = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl));
 
 	return 0;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 3f9097481a5e..9468c9c3b9cf 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -530,7 +530,7 @@ int mt7921_mac_fill_rx(struct mt7921_dev *dev, struct sk_buff *skb)
 
 	status->aggr = unicast &&
 		       !ieee80211_is_qos_nullfunc(hdr->frame_control);
-	status->tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+	status->qos_ctl = *ieee80211_get_qos_ctl(hdr);
 	status->seqno = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl));
 
 	return 0;
-- 
cgit v1.2.3


From cc4b3c139ad308fcd0086baa9dc13bf60e1b802f Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 11 Oct 2020 23:37:12 +0200
Subject: mt76: mt7915: enable hw rx-amsdu de-aggregation

Enable hw rx-amsdu de-aggregation support available in 7915 devices.
This is a preliminary patch to enable rx checksum offload

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mac80211.c    | 49 +++++++++++++++++++++++-
 drivers/net/wireless/mediatek/mt76/mt76.h        |  7 ++++
 drivers/net/wireless/mediatek/mt76/mt7915/init.c |  4 +-
 drivers/net/wireless/mediatek/mt76/mt7915/mac.c  | 16 +++++++-
 drivers/net/wireless/mediatek/mt76/mt7915/mac.h  |  4 ++
 5 files changed, 76 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
index f9381bd96244..ef31026ac9d7 100644
--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
+++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
@@ -508,6 +508,39 @@ void mt76_free_device(struct mt76_dev *dev)
 }
 EXPORT_SYMBOL_GPL(mt76_free_device);
 
+static void mt76_rx_release_amsdu(struct mt76_phy *phy, enum mt76_rxq_id q)
+{
+	struct sk_buff *skb = phy->rx_amsdu[q].head;
+	struct mt76_dev *dev = phy->dev;
+
+	phy->rx_amsdu[q].head = NULL;
+	phy->rx_amsdu[q].tail = NULL;
+	__skb_queue_tail(&dev->rx_skb[q], skb);
+}
+
+static void mt76_rx_release_burst(struct mt76_phy *phy, enum mt76_rxq_id q,
+				  struct sk_buff *skb)
+{
+	struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb;
+
+	if (phy->rx_amsdu[q].head &&
+	    (!status->amsdu || status->first_amsdu ||
+	     status->seqno != phy->rx_amsdu[q].seqno))
+		mt76_rx_release_amsdu(phy, q);
+
+	if (!phy->rx_amsdu[q].head) {
+		phy->rx_amsdu[q].tail = &skb_shinfo(skb)->frag_list;
+		phy->rx_amsdu[q].seqno = status->seqno;
+		phy->rx_amsdu[q].head = skb;
+	} else {
+		*phy->rx_amsdu[q].tail = skb;
+		phy->rx_amsdu[q].tail = &skb->next;
+	}
+
+	if (!status->amsdu || status->last_amsdu)
+		mt76_rx_release_amsdu(phy, q);
+}
+
 void mt76_rx(struct mt76_dev *dev, enum mt76_rxq_id q, struct sk_buff *skb)
 {
 	struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb;
@@ -525,7 +558,8 @@ void mt76_rx(struct mt76_dev *dev, enum mt76_rxq_id q, struct sk_buff *skb)
 			phy->test.rx_stats.fcs_error[q]++;
 	}
 #endif
-	__skb_queue_tail(&dev->rx_skb[q], skb);
+
+	mt76_rx_release_burst(phy, q, skb);
 }
 EXPORT_SYMBOL_GPL(mt76_rx);
 
@@ -937,13 +971,26 @@ void mt76_rx_complete(struct mt76_dev *dev, struct sk_buff_head *frames,
 
 	spin_lock(&dev->rx_lock);
 	while ((skb = __skb_dequeue(frames)) != NULL) {
+		struct sk_buff *nskb = skb_shinfo(skb)->frag_list;
+
 		if (mt76_check_ccmp_pn(skb)) {
 			dev_kfree_skb(skb);
 			continue;
 		}
 
+		skb_shinfo(skb)->frag_list = NULL;
 		mt76_rx_convert(dev, skb, &hw, &sta);
 		ieee80211_rx_list(hw, sta, skb, &list);
+
+		/* subsequent amsdu frames */
+		while (nskb) {
+			skb = nskb;
+			nskb = nskb->next;
+			skb->next = NULL;
+
+			mt76_rx_convert(dev, skb, &hw, &sta);
+			ieee80211_rx_list(hw, sta, skb, &list);
+		}
 	}
 	spin_unlock(&dev->rx_lock);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index a3455e2e1545..b301e5de30cc 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -507,6 +507,7 @@ struct mt76_rx_status {
 	u8 enc_flags;
 	u8 encoding:2, bw:3, he_ru:3;
 	u8 he_gi:2, he_dcm:1;
+	u8 amsdu:1, first_amsdu:1, last_amsdu:1;
 	u8 rate_idx;
 	u8 nss;
 	u8 band;
@@ -600,6 +601,12 @@ struct mt76_phy {
 
 	struct delayed_work mac_work;
 	u8 mac_work_count;
+
+	struct {
+		struct sk_buff *head;
+		struct sk_buff **tail;
+		u16 seqno;
+	} rx_amsdu[__MT_RXQ_MAX];
 };
 
 struct mt76_dev {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index ad4e5b95158b..76dfcb76fb03 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -153,8 +153,8 @@ static void mt7915_mac_init(struct mt7915_dev *dev)
 	int i;
 
 	mt76_rmw_field(dev, MT_MDP_DCR1, MT_MDP_DCR1_MAX_RX_LEN, 1536);
-	/* disable hardware de-agg */
-	mt76_clear(dev, MT_MDP_DCR0, MT_MDP_DCR0_DAMSDU_EN);
+	/* enable hardware de-agg */
+	mt76_set(dev, MT_MDP_DCR0, MT_MDP_DCR0_DAMSDU_EN);
 
 	for (i = 0; i < MT7915_WTBL_SIZE; i++)
 		mt7915_mac_wtbl_update(dev, i,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index 63524b48e291..48408ec4ad3c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -320,8 +320,9 @@ int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
 	u32 rxd1 = le32_to_cpu(rxd[1]);
 	u32 rxd2 = le32_to_cpu(rxd[2]);
 	u32 rxd3 = le32_to_cpu(rxd[3]);
+	u32 rxd4 = le32_to_cpu(rxd[4]);
 	bool unicast, insert_ccmp_hdr = false;
-	u8 remove_pad;
+	u8 remove_pad, amsdu_info;
 	int i, idx;
 
 	memset(status, 0, sizeof(*status));
@@ -338,6 +339,9 @@ int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
 	if (!test_bit(MT76_STATE_RUNNING, &mphy->state))
 		return -EINVAL;
 
+	if (rxd2 & MT_RXD2_NORMAL_AMSDU_ERR)
+		return -EINVAL;
+
 	unicast = FIELD_GET(MT_RXD3_NORMAL_ADDR_TYPE, rxd3) == MT_RXD3_NORMAL_U2M;
 	idx = FIELD_GET(MT_RXD1_NORMAL_WLAN_IDX, rxd1);
 	status->wcid = mt7915_rx_get_wcid(dev, idx, unicast);
@@ -541,6 +545,16 @@ int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
 
 	skb_pull(skb, (u8 *)rxd - skb->data + 2 * remove_pad);
 
+	amsdu_info = FIELD_GET(MT_RXD4_NORMAL_PAYLOAD_FORMAT, rxd4);
+	status->amsdu = !!amsdu_info;
+	if (status->amsdu) {
+		status->first_amsdu = amsdu_info == MT_RXD4_FIRST_AMSDU_FRAME;
+		status->last_amsdu = amsdu_info == MT_RXD4_LAST_AMSDU_FRAME;
+		memmove(skb->data + 2, skb->data,
+			ieee80211_get_hdrlen_from_skb(skb));
+		skb_pull(skb, 2);
+	}
+
 	if (insert_ccmp_hdr) {
 		u8 key_id = FIELD_GET(MT_RXD1_NORMAL_KEY_ID, rxd1);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.h b/drivers/net/wireless/mediatek/mt76/mt7915/mac.h
index 96ff3fb0d1f3..6ad8af835fd4 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.h
@@ -86,6 +86,10 @@ enum rx_pkt_type {
 
 /* RXD DW4 */
 #define MT_RXD4_NORMAL_PAYLOAD_FORMAT	GENMASK(1, 0)
+#define MT_RXD4_FIRST_AMSDU_FRAME	GENMASK(1, 0)
+#define MT_RXD4_MID_AMSDU_FRAME		BIT(1)
+#define MT_RXD4_LAST_AMSDU_FRAME	BIT(0)
+
 #define MT_RXD4_NORMAL_PATTERN_DROP	BIT(9)
 #define MT_RXD4_NORMAL_CLS		BIT(10)
 #define MT_RXD4_NORMAL_OFLD		GENMASK(12, 11)
-- 
cgit v1.2.3


From 94244d2ea503455cddec5a4de28d59c74e1aaf47 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 16 Dec 2020 19:04:24 +0100
Subject: mt76: mt7915: add rx checksum offload support

Set skb->ip_summed to CHECKSUM_UNNECESSARY if the hardware has validated
the IP and TCP/UDP checksum

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/init.c | 1 +
 drivers/net/wireless/mediatek/mt76/mt7915/mac.c  | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index 76dfcb76fb03..34a4fee24c48 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -93,6 +93,7 @@ mt7915_init_wiphy(struct ieee80211_hw *hw)
 	hw->queues = 4;
 	hw->max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
 	hw->max_tx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
+	hw->netdev_features = NETIF_F_RXCSUM;
 
 	phy->slottime = 9;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index 48408ec4ad3c..83dd504e7bf1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -317,10 +317,12 @@ int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
 	__le32 *rxd = (__le32 *)skb->data;
 	__le32 *rxv = NULL;
 	u32 mode = 0;
+	u32 rxd0 = le32_to_cpu(rxd[0]);
 	u32 rxd1 = le32_to_cpu(rxd[1]);
 	u32 rxd2 = le32_to_cpu(rxd[2]);
 	u32 rxd3 = le32_to_cpu(rxd[3]);
 	u32 rxd4 = le32_to_cpu(rxd[4]);
+	u32 csum_mask = MT_RXD0_NORMAL_IP_SUM | MT_RXD0_NORMAL_UDP_TCP_SUM;
 	bool unicast, insert_ccmp_hdr = false;
 	u8 remove_pad, amsdu_info;
 	int i, idx;
@@ -366,6 +368,9 @@ int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
 	if (!sband->channels)
 		return -EINVAL;
 
+	if ((rxd0 & csum_mask) == csum_mask)
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+
 	if (rxd1 & MT_RXD1_NORMAL_FCS_ERR)
 		status->flag |= RX_FLAG_FAILED_FCS_CRC;
 
-- 
cgit v1.2.3


From 90e3abf07c80a70f31227eea861f306312d5dbea Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 16 Dec 2020 21:13:13 +0100
Subject: mt76: mt7915: add support for rx decapsulation offload

For AP and Client mode, the hardware can pass received packets as 802.3 frames
that can be passed to the network stack as-is.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76.h        |  1 +
 drivers/net/wireless/mediatek/mt76/mt7915/init.c |  1 +
 drivers/net/wireless/mediatek/mt76/mt7915/mac.c  | 44 +++++++++++++++++++-----
 drivers/net/wireless/mediatek/mt76/mt7915/mac.h  | 11 ++++++
 drivers/net/wireless/mediatek/mt76/mt7915/main.c | 21 +++++++++--
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c  | 25 ++++++++++++++
 6 files changed, 92 insertions(+), 11 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index b301e5de30cc..695b58d7eadc 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -197,6 +197,7 @@ enum mt76_wcid_flags {
 	MT_WCID_FLAG_CHECK_PS,
 	MT_WCID_FLAG_PS,
 	MT_WCID_FLAG_4ADDR,
+	MT_WCID_FLAG_HDR_TRANS,
 };
 
 #define MT76_N_WCIDS 288
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index 34a4fee24c48..c599b44a728c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -109,6 +109,7 @@ mt7915_init_wiphy(struct ieee80211_hw *hw)
 
 	ieee80211_hw_set(hw, HAS_RATE_CONTROL);
 	ieee80211_hw_set(hw, SUPPORTS_TX_ENCAP_OFFLOAD);
+	ieee80211_hw_set(hw, SUPPORTS_RX_DECAP_OFFLOAD);
 	ieee80211_hw_set(hw, WANT_MONITOR_VIF);
 
 	hw->max_tx_fragments = 4;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index 83dd504e7bf1..1a7291caf099 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -325,6 +325,10 @@ int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
 	u32 csum_mask = MT_RXD0_NORMAL_IP_SUM | MT_RXD0_NORMAL_UDP_TCP_SUM;
 	bool unicast, insert_ccmp_hdr = false;
 	u8 remove_pad, amsdu_info;
+	bool hdr_trans;
+	u16 seq_ctrl = 0;
+	u8 qos_ctl = 0;
+	__le16 fc = 0;
 	int i, idx;
 
 	memset(status, 0, sizeof(*status));
@@ -346,6 +350,7 @@ int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
 
 	unicast = FIELD_GET(MT_RXD3_NORMAL_ADDR_TYPE, rxd3) == MT_RXD3_NORMAL_U2M;
 	idx = FIELD_GET(MT_RXD1_NORMAL_WLAN_IDX, rxd1);
+	hdr_trans = rxd2 & MT_RXD2_NORMAL_HDR_TRANS;
 	status->wcid = mt7915_rx_get_wcid(dev, idx, unicast);
 
 	if (status->wcid) {
@@ -404,6 +409,13 @@ int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
 
 	rxd += 6;
 	if (rxd1 & MT_RXD1_NORMAL_GROUP_4) {
+		u32 v0 = le32_to_cpu(rxd[0]);
+		u32 v2 = le32_to_cpu(rxd[2]);
+
+		fc = cpu_to_le16(FIELD_GET(MT_RXD6_FRAME_CONTROL, v0));
+		qos_ctl = FIELD_GET(MT_RXD8_QOS_CTL, v2);
+		seq_ctrl = FIELD_GET(MT_RXD8_SEQ_CTRL, v2);
+
 		rxd += 4;
 		if ((u8 *)rxd - skb->data >= skb->len)
 			return -EINVAL;
@@ -555,28 +567,42 @@ int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
 	if (status->amsdu) {
 		status->first_amsdu = amsdu_info == MT_RXD4_FIRST_AMSDU_FRAME;
 		status->last_amsdu = amsdu_info == MT_RXD4_LAST_AMSDU_FRAME;
-		memmove(skb->data + 2, skb->data,
-			ieee80211_get_hdrlen_from_skb(skb));
-		skb_pull(skb, 2);
+		if (!hdr_trans) {
+			memmove(skb->data + 2, skb->data,
+				ieee80211_get_hdrlen_from_skb(skb));
+			skb_pull(skb, 2);
+		}
 	}
 
-	if (insert_ccmp_hdr) {
+	if (insert_ccmp_hdr && !hdr_trans) {
 		u8 key_id = FIELD_GET(MT_RXD1_NORMAL_KEY_ID, rxd1);
 
 		mt76_insert_ccmp_hdr(skb, key_id);
 	}
 
+	if (!hdr_trans) {
+		hdr = mt76_skb_get_hdr(skb);
+		fc = hdr->frame_control;
+		if (ieee80211_is_data_qos(fc)) {
+			seq_ctrl = le16_to_cpu(hdr->seq_ctrl);
+			qos_ctl = *ieee80211_get_qos_ctl(hdr);
+		}
+	} else {
+		status->flag &= ~(RX_FLAG_RADIOTAP_HE |
+				  RX_FLAG_RADIOTAP_HE_MU);
+		status->flag |= RX_FLAG_8023;
+	}
+
 	if (rxv && status->flag & RX_FLAG_RADIOTAP_HE)
 		mt7915_mac_decode_he_radiotap(skb, status, rxv, mode);
 
-	hdr = mt76_skb_get_hdr(skb);
-	if (!status->wcid || !ieee80211_is_data_qos(hdr->frame_control))
+	if (!status->wcid || !ieee80211_is_data_qos(fc))
 		return 0;
 
 	status->aggr = unicast &&
-		       !ieee80211_is_qos_nullfunc(hdr->frame_control);
-	status->qos_ctl = *ieee80211_get_qos_ctl(hdr);
-	status->seqno = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl));
+		       !ieee80211_is_qos_nullfunc(fc);
+	status->qos_ctl = qos_ctl;
+	status->seqno = IEEE80211_SEQ_TO_SN(seq_ctrl);
 
 	return 0;
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.h b/drivers/net/wireless/mediatek/mt76/mt7915/mac.h
index 6ad8af835fd4..0f929fb53027 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.h
@@ -101,6 +101,17 @@ enum rx_pkt_type {
 
 #define MT_RXV_HDR_BAND_IDX		BIT(24)
 
+/* RXD GROUP4 */
+#define MT_RXD6_FRAME_CONTROL		GENMASK(15, 0)
+#define MT_RXD6_TA_LO			GENMASK(31, 16)
+
+#define MT_RXD7_TA_HI			GENMASK(31, 0)
+
+#define MT_RXD8_SEQ_CTRL		GENMASK(15, 0)
+#define MT_RXD8_QOS_CTL			GENMASK(31, 16)
+
+#define MT_RXD9_HT_CONTROL		GENMASK(31, 0)
+
 /* P-RXV */
 #define MT_PRXV_TX_RATE			GENMASK(6, 0)
 #define MT_PRXV_TX_DCM			BIT(4)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
index d4969b2e1ffb..2406733f542d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
@@ -34,14 +34,14 @@ static int mt7915_start(struct ieee80211_hw *hw)
 
 	if (!running) {
 		mt7915_mcu_set_pm(dev, 0, 0);
-		mt7915_mcu_set_mac(dev, 0, true, false);
+		mt7915_mcu_set_mac(dev, 0, true, true);
 		mt7915_mcu_set_scs(dev, 0, true);
 		mt7915_mac_enable_nf(dev, 0);
 	}
 
 	if (phy != &dev->phy) {
 		mt7915_mcu_set_pm(dev, 1, 0);
-		mt7915_mcu_set_mac(dev, 1, true, false);
+		mt7915_mcu_set_mac(dev, 1, true, true);
 		mt7915_mcu_set_scs(dev, 1, true);
 		mt7915_mac_enable_nf(dev, 1);
 	}
@@ -888,6 +888,22 @@ static void mt7915_sta_set_4addr(struct ieee80211_hw *hw,
 	mt7915_mcu_sta_update_hdr_trans(dev, vif, sta);
 }
 
+static void mt7915_sta_set_decap_offload(struct ieee80211_hw *hw,
+				 struct ieee80211_vif *vif,
+				 struct ieee80211_sta *sta,
+				 bool enabled)
+{
+	struct mt7915_dev *dev = mt7915_hw_dev(hw);
+	struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv;
+
+	if (enabled)
+		set_bit(MT_WCID_FLAG_HDR_TRANS, &msta->wcid.flags);
+	else
+		clear_bit(MT_WCID_FLAG_HDR_TRANS, &msta->wcid.flags);
+
+	mt7915_mcu_sta_update_hdr_trans(dev, vif, sta);
+}
+
 const struct ieee80211_ops mt7915_ops = {
 	.tx = mt7915_tx,
 	.start = mt7915_start,
@@ -920,6 +936,7 @@ const struct ieee80211_ops mt7915_ops = {
 	.set_coverage_class = mt7915_set_coverage_class,
 	.sta_statistics = mt7915_sta_statistics,
 	.sta_set_4addr = mt7915_sta_set_4addr,
+	.sta_set_decap_offload = mt7915_sta_set_decap_offload,
 	CFG80211_TESTMODE_CMD(mt76_testmode_cmd)
 	CFG80211_TESTMODE_DUMP(mt76_testmode_dump)
 #ifdef CONFIG_MAC80211_DEBUGFS
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 195929242b72..6a8a1aa50756 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -1685,6 +1685,7 @@ mt7915_mcu_wtbl_hdr_trans_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
 		return;
 
 	msta = (struct mt7915_sta *)sta->drv_priv;
+	htr->no_rx_trans = !test_bit(MT_WCID_FLAG_HDR_TRANS, &msta->wcid.flags);
 	if (test_bit(MT_WCID_FLAG_4ADDR, &msta->wcid.flags)) {
 		htr->to_ds = true;
 		htr->from_ds = true;
@@ -2869,6 +2870,27 @@ void mt7915_mcu_exit(struct mt7915_dev *dev)
 	skb_queue_purge(&dev->mt76.mcu.res_q);
 }
 
+static int
+mt7915_mcu_set_rx_hdr_trans_blacklist(struct mt7915_dev *dev, int band)
+{
+	struct {
+		u8 operation;
+		u8 count;
+		u8 _rsv[2];
+		u8 index;
+		u8 enable;
+		__le16 etype;
+	} req = {
+		.operation = 1,
+		.count = 1,
+		.enable = 1,
+		.etype = cpu_to_le16(ETH_P_PAE),
+	};
+
+	return mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD(RX_HDR_TRANS),
+				 &req, sizeof(req), false);
+}
+
 int mt7915_mcu_set_mac(struct mt7915_dev *dev, int band,
 		       bool enable, bool hdr_trans)
 {
@@ -2899,6 +2921,9 @@ int mt7915_mcu_set_mac(struct mt7915_dev *dev, int band,
 	if (ret)
 		return ret;
 
+	if (hdr_trans)
+		mt7915_mcu_set_rx_hdr_trans_blacklist(dev, band);
+
 	return mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD(MAC_INIT_CTRL),
 				 &req_mac, sizeof(req_mac), true);
 }
-- 
cgit v1.2.3


From 730d6d0da8d8f5905faafe645a5b3c08ac3f5a8f Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Mon, 8 Feb 2021 17:07:27 +0100
Subject: mt76: mt7615: fix key set/delete issues

There were multiple issues in the current key set/remove code:
- deleting a key with the previous key index deletes the current key
- BIP key would only be uploaded correctly initially and corrupted on rekey

Rework the code to better keep track of multiple keys and check for the
key index before deleting the current key

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76.h        |  1 +
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c  | 97 +++++++++++++-----------
 drivers/net/wireless/mediatek/mt76/mt7615/main.c | 18 +++--
 3 files changed, 65 insertions(+), 51 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 695b58d7eadc..5561f81c64f7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -223,6 +223,7 @@ struct mt76_wcid {
 
 	u16 idx;
 	u8 hw_key_idx;
+	u8 hw_key_idx2;
 
 	u8 sta:1;
 	u8 ext_phy:1;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 6a835568f4a3..c97ec70772f3 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1033,7 +1033,7 @@ EXPORT_SYMBOL_GPL(mt7615_mac_set_rates);
 static int
 mt7615_mac_wtbl_update_key(struct mt7615_dev *dev, struct mt76_wcid *wcid,
 			   struct ieee80211_key_conf *key,
-			   enum mt7615_cipher_type cipher,
+			   enum mt7615_cipher_type cipher, u16 cipher_mask,
 			   enum set_key_cmd cmd)
 {
 	u32 addr = mt7615_mac_wtbl_addr(dev, wcid->idx) + 30 * 4;
@@ -1050,22 +1050,22 @@ mt7615_mac_wtbl_update_key(struct mt7615_dev *dev, struct mt76_wcid *wcid,
 			memcpy(data + 16, key->key + 24, 8);
 			memcpy(data + 24, key->key + 16, 8);
 		} else {
-			if (cipher != MT_CIPHER_BIP_CMAC_128 && wcid->cipher)
-				memmove(data + 16, data, 16);
-			if (cipher != MT_CIPHER_BIP_CMAC_128 || !wcid->cipher)
+			if (cipher_mask == BIT(cipher))
 				memcpy(data, key->key, key->keylen);
-			else if (cipher == MT_CIPHER_BIP_CMAC_128)
+			else if (cipher != MT_CIPHER_BIP_CMAC_128)
+				memcpy(data, key->key, 16);
+			if (cipher == MT_CIPHER_BIP_CMAC_128)
 				memcpy(data + 16, key->key, 16);
 		}
 	} else {
-		if (wcid->cipher & ~BIT(cipher)) {
-			if (cipher != MT_CIPHER_BIP_CMAC_128)
-				memmove(data, data + 16, 16);
+		if (cipher == MT_CIPHER_BIP_CMAC_128)
 			memset(data + 16, 0, 16);
-		} else {
+		else if (cipher_mask)
+			memset(data, 0, 16);
+		if (!cipher_mask)
 			memset(data, 0, sizeof(data));
-		}
 	}
+
 	mt76_wr_copy(dev, addr, data, sizeof(data));
 
 	return 0;
@@ -1073,7 +1073,7 @@ mt7615_mac_wtbl_update_key(struct mt7615_dev *dev, struct mt76_wcid *wcid,
 
 static int
 mt7615_mac_wtbl_update_pk(struct mt7615_dev *dev, struct mt76_wcid *wcid,
-			  enum mt7615_cipher_type cipher,
+			  enum mt7615_cipher_type cipher, u16 cipher_mask,
 			  int keyidx, enum set_key_cmd cmd)
 {
 	u32 addr = mt7615_mac_wtbl_addr(dev, wcid->idx), w0, w1;
@@ -1083,20 +1083,23 @@ mt7615_mac_wtbl_update_pk(struct mt7615_dev *dev, struct mt76_wcid *wcid,
 
 	w0 = mt76_rr(dev, addr);
 	w1 = mt76_rr(dev, addr + 4);
-	if (cmd == SET_KEY) {
-		w0 |= MT_WTBL_W0_RX_KEY_VALID |
-		      FIELD_PREP(MT_WTBL_W0_RX_IK_VALID,
-				 cipher == MT_CIPHER_BIP_CMAC_128);
-		if (cipher != MT_CIPHER_BIP_CMAC_128 ||
-		    !wcid->cipher)
-			w0 |= FIELD_PREP(MT_WTBL_W0_KEY_IDX, keyidx);
-	}  else {
-		if (!(wcid->cipher & ~BIT(cipher)))
-			w0 &= ~(MT_WTBL_W0_RX_KEY_VALID |
-				MT_WTBL_W0_KEY_IDX);
-		if (cipher == MT_CIPHER_BIP_CMAC_128)
-			w0 &= ~MT_WTBL_W0_RX_IK_VALID;
+
+	if (cipher_mask)
+		w0 |= MT_WTBL_W0_RX_KEY_VALID;
+	else
+		w0 &= ~(MT_WTBL_W0_RX_KEY_VALID | MT_WTBL_W0_KEY_IDX);
+	if (cipher_mask & BIT(MT_CIPHER_BIP_CMAC_128))
+		w0 |= MT_WTBL_W0_RX_IK_VALID;
+	else
+		w0 &= ~MT_WTBL_W0_RX_IK_VALID;
+
+	if (cmd == SET_KEY &&
+	    (cipher != MT_CIPHER_BIP_CMAC_128 ||
+	     cipher_mask == BIT(cipher))) {
+		w0 &= ~MT_WTBL_W0_KEY_IDX;
+		w0 |= FIELD_PREP(MT_WTBL_W0_KEY_IDX, keyidx);
 	}
+
 	mt76_wr(dev, MT_WTBL_RICR0, w0);
 	mt76_wr(dev, MT_WTBL_RICR1, w1);
 
@@ -1109,24 +1112,25 @@ mt7615_mac_wtbl_update_pk(struct mt7615_dev *dev, struct mt76_wcid *wcid,
 
 static void
 mt7615_mac_wtbl_update_cipher(struct mt7615_dev *dev, struct mt76_wcid *wcid,
-			      enum mt7615_cipher_type cipher,
+			      enum mt7615_cipher_type cipher, u16 cipher_mask,
 			      enum set_key_cmd cmd)
 {
 	u32 addr = mt7615_mac_wtbl_addr(dev, wcid->idx);
 
-	if (cmd == SET_KEY) {
-		if (cipher != MT_CIPHER_BIP_CMAC_128 || !wcid->cipher)
-			mt76_rmw(dev, addr + 2 * 4, MT_WTBL_W2_KEY_TYPE,
-				 FIELD_PREP(MT_WTBL_W2_KEY_TYPE, cipher));
-	} else {
-		if (cipher != MT_CIPHER_BIP_CMAC_128 &&
-		    wcid->cipher & BIT(MT_CIPHER_BIP_CMAC_128))
-			mt76_rmw(dev, addr + 2 * 4, MT_WTBL_W2_KEY_TYPE,
-				 FIELD_PREP(MT_WTBL_W2_KEY_TYPE,
-					    MT_CIPHER_BIP_CMAC_128));
-		else if (!(wcid->cipher & ~BIT(cipher)))
-			mt76_clear(dev, addr + 2 * 4, MT_WTBL_W2_KEY_TYPE);
+	if (!cipher_mask) {
+		mt76_clear(dev, addr + 2 * 4, MT_WTBL_W2_KEY_TYPE);
+		return;
 	}
+
+	if (cmd != SET_KEY)
+		return;
+
+	if (cipher == MT_CIPHER_BIP_CMAC_128 &&
+	    cipher_mask & ~BIT(MT_CIPHER_BIP_CMAC_128))
+		return;
+
+	mt76_rmw(dev, addr + 2 * 4, MT_WTBL_W2_KEY_TYPE,
+		 FIELD_PREP(MT_WTBL_W2_KEY_TYPE, cipher));
 }
 
 int __mt7615_mac_wtbl_set_key(struct mt7615_dev *dev,
@@ -1135,25 +1139,30 @@ int __mt7615_mac_wtbl_set_key(struct mt7615_dev *dev,
 			      enum set_key_cmd cmd)
 {
 	enum mt7615_cipher_type cipher;
+	u16 cipher_mask = wcid->cipher;
 	int err;
 
 	cipher = mt7615_mac_get_cipher(key->cipher);
 	if (cipher == MT_CIPHER_NONE)
 		return -EOPNOTSUPP;
 
-	mt7615_mac_wtbl_update_cipher(dev, wcid, cipher, cmd);
-	err = mt7615_mac_wtbl_update_key(dev, wcid, key, cipher, cmd);
+	if (cmd == SET_KEY)
+		cipher_mask |= BIT(cipher);
+	else
+		cipher_mask &= ~BIT(cipher);
+
+	mt7615_mac_wtbl_update_cipher(dev, wcid, cipher, cipher_mask, cmd);
+	err = mt7615_mac_wtbl_update_key(dev, wcid, key, cipher, cipher_mask,
+					 cmd);
 	if (err < 0)
 		return err;
 
-	err = mt7615_mac_wtbl_update_pk(dev, wcid, cipher, key->keyidx, cmd);
+	err = mt7615_mac_wtbl_update_pk(dev, wcid, cipher, cipher_mask,
+					key->keyidx, cmd);
 	if (err < 0)
 		return err;
 
-	if (cmd == SET_KEY)
-		wcid->cipher |= BIT(cipher);
-	else
-		wcid->cipher &= ~BIT(cipher);
+	wcid->cipher = cipher_mask;
 
 	return 0;
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index 25faf486d279..1aa6928f88fc 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -337,7 +337,8 @@ static int mt7615_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	struct mt7615_sta *msta = sta ? (struct mt7615_sta *)sta->drv_priv :
 				  &mvif->sta;
 	struct mt76_wcid *wcid = &msta->wcid;
-	int idx = key->keyidx, err;
+	int idx = key->keyidx, err = 0;
+	u8 *wcid_keyidx = &wcid->hw_key_idx;
 
 	/* The hardware does not support per-STA RX GTK, fallback
 	 * to software mode for these.
@@ -352,6 +353,7 @@ static int mt7615_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	/* fall back to sw encryption for unsupported ciphers */
 	switch (key->cipher) {
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+		wcid_keyidx = &wcid->hw_key_idx2;
 		key->flags |= IEEE80211_KEY_FLAG_GENERATE_MMIE;
 		break;
 	case WLAN_CIPHER_SUITE_TKIP:
@@ -369,12 +371,13 @@ static int mt7615_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 
 	mt7615_mutex_acquire(dev);
 
-	if (cmd == SET_KEY) {
-		key->hw_key_idx = wcid->idx;
-		wcid->hw_key_idx = idx;
-	} else if (idx == wcid->hw_key_idx) {
-		wcid->hw_key_idx = -1;
-	}
+	if (cmd == SET_KEY)
+		*wcid_keyidx = idx;
+	else if (idx == *wcid_keyidx)
+		*wcid_keyidx = -1;
+	else
+		goto out;
+
 	mt76_wcid_key_setup(&dev->mt76, wcid,
 			    cmd == SET_KEY ? key : NULL);
 
@@ -383,6 +386,7 @@ static int mt7615_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	else
 		err = __mt7615_mac_wtbl_set_key(dev, wcid, key, cmd);
 
+out:
 	mt7615_mutex_release(dev);
 
 	return err;
-- 
cgit v1.2.3


From ebee7885bb12a8fe2c2f9bac87dbd87a05b645f9 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Sat, 13 Feb 2021 18:42:15 +0100
Subject: mt76: mt7615: fix tx skb dma unmap

The first pointer in the txp needs to be unmapped as well, otherwise it will
leak DMA mapping entries

Fixes: 27d5c528a7ca ("mt76: fix double DMA unmap of the first buffer on 7615/7915")
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index c97ec70772f3..9bae2f66e1ce 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -690,7 +690,7 @@ mt7615_txp_skb_unmap_fw(struct mt76_dev *dev, struct mt7615_fw_txp *txp)
 {
 	int i;
 
-	for (i = 1; i < txp->nbuf; i++)
+	for (i = 0; i < txp->nbuf; i++)
 		dma_unmap_single(dev->dev, le32_to_cpu(txp->buf[i]),
 				 le16_to_cpu(txp->len[i]), DMA_TO_DEVICE);
 }
-- 
cgit v1.2.3


From 7dcf3c04f0aca746517a77433b33d40868ca4749 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Sat, 13 Feb 2021 18:44:10 +0100
Subject: mt76: mt7915: fix tx skb dma unmap

The first pointer in the txp needs to be unmapped as well, otherwise it will
leak DMA mapping entries

Reported-by: Ben Greear <greearb@candelatech.com>
Fixes: 27d5c528a7ca ("mt76: fix double DMA unmap of the first buffer on 7615/7915")
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/mac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index 1a7291caf099..b5a41cb7fbda 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -1136,7 +1136,7 @@ void mt7915_txp_skb_unmap(struct mt76_dev *dev,
 	int i;
 
 	txp = mt7915_txwi_to_txp(dev, t);
-	for (i = 1; i < txp->nbuf; i++)
+	for (i = 0; i < txp->nbuf; i++)
 		dma_unmap_single(dev->dev, le32_to_cpu(txp->buf[i]),
 				 le16_to_cpu(txp->len[i]), DMA_TO_DEVICE);
 }
-- 
cgit v1.2.3


From 858ebf446bee7d5077bd99488aae617908c3f4fe Mon Sep 17 00:00:00 2001
From: Sander Vanheule <sander@svanheule.net>
Date: Tue, 2 Feb 2021 09:59:53 +0100
Subject: mt76: mt7615: support loading EEPROM for MT7613BE

EEPROM blobs for MT7613BE radios start with (little endian) 0x7663,
which is also the PCI device ID for this device. The EEPROM is required
for the radio to work at useful power levels, otherwise only the lowest
power level is available.

Suggested-by: Georgi Vlaev <georgi.vlaev@konsulko.com>
Tested-by: Stijn Segers <foss@volatilesystems.org>
Signed-off-by: Sander Vanheule <sander@svanheule.net>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
index 2eab23898c77..6dbaaf95ee38 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
@@ -86,6 +86,7 @@ static int mt7615_check_eeprom(struct mt76_dev *dev)
 	switch (val) {
 	case 0x7615:
 	case 0x7622:
+	case 0x7663:
 		return 0;
 	default:
 		return -EINVAL;
-- 
cgit v1.2.3


From c1941b8902794c55328a220c5bf3654b1b54ba09 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 30 Jan 2021 13:28:36 +0100
Subject: mt76: mt7921: enable random mac addr during scanning

Notify mac80211 mt7921 devices support random mac address during hw
frequency scanning

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/init.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index 89a13b4a74a4..4070f7156aa5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -95,6 +95,7 @@ mt7921_init_wiphy(struct ieee80211_hw *hw)
 	wiphy->flags |= WIPHY_FLAG_HAS_CHANNEL_SWITCH;
 	wiphy->reg_notifier = mt7921_regd_notifier;
 
+	wiphy->features |= NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR;
 	wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_SET_SCAN_DWELL);
 
 	ieee80211_hw_set(hw, SINGLE_SCAN_ON_ALL_BANDS);
-- 
cgit v1.2.3


From c2fa8edcca3b87c1d5cd6b9de00cb649abd636f5 Mon Sep 17 00:00:00 2001
From: Nigel Christian <nigel.l.christian@gmail.com>
Date: Sat, 13 Feb 2021 22:02:37 -0500
Subject: mt76: mt7921: remove unnecessary variable

In mt7921_pm_set() the variable "ret" is initialized to zero
and then returned. Remove it and return zero.

Signed-off-by: Nigel Christian <nigel.l.christian@gmail.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
index 0dc8e25e18e4..4bc3dd99bd14 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
@@ -164,7 +164,6 @@ mt7921_pm_set(void *data, u64 val)
 {
 	struct mt7921_dev *dev = data;
 	struct mt76_phy *mphy = dev->phy.mt76;
-	int ret = 0;
 
 	mt7921_mutex_acquire(dev);
 
@@ -175,7 +174,7 @@ mt7921_pm_set(void *data, u64 val)
 					    mt7921_pm_interface_iter, mphy->priv);
 	mt7921_mutex_release(dev);
 
-	return ret;
+	return 0;
 }
 
 static int
-- 
cgit v1.2.3


From 00ac71ff8dc8b8b9ea07947164e8e5e06c520807 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 6 Feb 2021 11:35:41 +0100
Subject: mt76: mt7921: removed unused definitions in mcu.h

Get rid of the following definitions in mt7921/mcu.h since they are not
actually used:
- MT7921_WTBL_UPDATE_MAX_SIZE
- MT7921_STA_UPDATE_MAX_SIZE
- MT7921_WTBL_UPDATE_BA_SIZE

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.h | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h
index 2fdc62367b3f..fd67838cb565 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h
@@ -251,29 +251,6 @@ enum {
 	MT_IBF = BIT(1)		/* implicit beamforming */
 };
 
-#define MT7921_WTBL_UPDATE_MAX_SIZE	(sizeof(struct wtbl_req_hdr) +	\
-					 sizeof(struct wtbl_generic) +	\
-					 sizeof(struct wtbl_rx) +	\
-					 sizeof(struct wtbl_ht) +	\
-					 sizeof(struct wtbl_vht) +	\
-					 sizeof(struct wtbl_hdr_trans) +\
-					 sizeof(struct wtbl_ba) +	\
-					 sizeof(struct wtbl_smps))
-
-#define MT7921_STA_UPDATE_MAX_SIZE	(sizeof(struct sta_req_hdr) +	\
-					 sizeof(struct sta_rec_basic) +	\
-					 sizeof(struct sta_rec_ht) +	\
-					 sizeof(struct sta_rec_he) +	\
-					 sizeof(struct sta_rec_ba) +	\
-					 sizeof(struct sta_rec_vht) +	\
-					 sizeof(struct sta_rec_uapsd) + \
-					 sizeof(struct sta_rec_amsdu) +	\
-					 sizeof(struct tlv) +		\
-					 MT7921_WTBL_UPDATE_MAX_SIZE)
-
-#define MT7921_WTBL_UPDATE_BA_SIZE	(sizeof(struct wtbl_req_hdr) +	\
-					 sizeof(struct wtbl_ba))
-
 #define STA_CAP_WMM			BIT(0)
 #define STA_CAP_SGI_20			BIT(4)
 #define STA_CAP_SGI_40			BIT(5)
-- 
cgit v1.2.3


From be2a2872d0b1db26222cfcd829f2d9e08f4c84c9 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Fri, 5 Feb 2021 13:13:01 +0800
Subject: mt76: always use WTBL_MAX_SIZE for tlv allocation

Simplify WTBL defines.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.h    | 24 ----------------------
 .../net/wireless/mediatek/mt76/mt76_connac_mcu.c   |  2 +-
 .../net/wireless/mediatek/mt76/mt76_connac_mcu.h   |  3 ---
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c    |  2 +-
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.h    |  3 ---
 5 files changed, 2 insertions(+), 32 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
index 3874f45da9eb..411ebb56baef 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
@@ -362,30 +362,6 @@ enum {
 	BSS_INFO_MAX_NUM
 };
 
-#define MT7615_WTBL_UPDATE_MAX_SIZE	(sizeof(struct wtbl_req_hdr) +	\
-					 sizeof(struct wtbl_generic) +	\
-					 sizeof(struct wtbl_rx) +	\
-					 sizeof(struct wtbl_ht) +	\
-					 sizeof(struct wtbl_vht) +	\
-					 sizeof(struct wtbl_tx_ps) +	\
-					 sizeof(struct wtbl_hdr_trans) +\
-					 sizeof(struct wtbl_ba) +	\
-					 sizeof(struct wtbl_bf) +	\
-					 sizeof(struct wtbl_smps) +	\
-					 sizeof(struct wtbl_pn) +	\
-					 sizeof(struct wtbl_spe))
-
-#define MT7615_STA_UPDATE_MAX_SIZE	(sizeof(struct sta_req_hdr) +	\
-					 sizeof(struct sta_rec_basic) +	\
-					 sizeof(struct sta_rec_ht) +	\
-					 sizeof(struct sta_rec_vht) +	\
-					 sizeof(struct sta_rec_uapsd) + \
-					 sizeof(struct tlv) +	\
-					 MT7615_WTBL_UPDATE_MAX_SIZE)
-
-#define MT7615_WTBL_UPDATE_BA_SIZE	(sizeof(struct wtbl_req_hdr) +	\
-					 sizeof(struct wtbl_ba))
-
 enum {
 	CH_SWITCH_NORMAL = 0,
 	CH_SWITCH_SCAN = 3,
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index 6cbccfb05f8b..e6ad6cbeb803 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -287,7 +287,7 @@ mt76_connac_mcu_alloc_wtbl_req(struct mt76_dev *dev, struct mt76_wcid *wcid,
 				     &hdr.wlan_idx_hi);
 	if (!nskb) {
 		nskb = mt76_mcu_msg_alloc(dev, NULL,
-					  MT76_CONNAC_WTBL_UPDATE_BA_SIZE);
+					  MT76_CONNAC_WTBL_UPDATE_MAX_SIZE);
 		if (!nskb)
 			return ERR_PTR(-ENOMEM);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
index c1e1df5f7cd7..9c26b90655a7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
@@ -304,9 +304,6 @@ struct wtbl_raw {
 					 sizeof(struct tlv) +		\
 					 MT76_CONNAC_WTBL_UPDATE_MAX_SIZE)
 
-#define MT76_CONNAC_WTBL_UPDATE_BA_SIZE	(sizeof(struct wtbl_req_hdr) +	\
-					 sizeof(struct wtbl_ba))
-
 enum {
 	STA_REC_BASIC,
 	STA_REC_RA,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 6a8a1aa50756..a9c4a003560d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -592,7 +592,7 @@ mt7915_mcu_alloc_wtbl_req(struct mt7915_dev *dev, struct mt7915_sta *msta,
 
 	if (!nskb) {
 		nskb = mt76_mcu_msg_alloc(&dev->mt76, NULL,
-					  MT7915_WTBL_UPDATE_BA_SIZE);
+					  MT7915_WTBL_UPDATE_MAX_SIZE);
 		if (!nskb)
 			return ERR_PTR(-ENOMEM);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
index 2d584142c27b..2a2f7bf2c94b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
@@ -1080,9 +1080,6 @@ enum {
 					 sizeof(struct tlv) +		\
 					 MT7915_WTBL_UPDATE_MAX_SIZE)
 
-#define MT7915_WTBL_UPDATE_BA_SIZE	(sizeof(struct wtbl_req_hdr) +	\
-					 sizeof(struct wtbl_ba))
-
 #define MT7915_BSS_UPDATE_MAX_SIZE	(sizeof(struct sta_req_hdr) +	\
 					 sizeof(struct bss_info_omac) +	\
 					 sizeof(struct bss_info_basic) +\
-- 
cgit v1.2.3


From 4b36cc6b390f18dbc59a45fb4141f90d7dfe2b23 Mon Sep 17 00:00:00 2001
From: David Bauer <mail@david-bauer.net>
Date: Sun, 7 Feb 2021 16:09:17 +0100
Subject: mt76: mt76x0: disable GTK offloading

When operating two VAP on a MT7610 with encryption (PSK2, SAE, OWE),
only the first one to be created will transmit properly encrypteded
frames.

All subsequently created VAPs will sent out frames with the payload left
unencrypted, breaking multicast traffic (ICMP6 NDP) and potentially
disclosing information to a third party.

Disable GTK offloading and encrypt these frames in software to
circumvent this issue. THis only seems to be necessary on MT7610 chips,
as MT7612 is not affected from our testing.

Signed-off-by: David Bauer <mail@david-bauer.net>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76x02_util.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
index ab671e21f882..02db5d66735d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
@@ -447,6 +447,10 @@ int mt76x02_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	    !(key->flags & IEEE80211_KEY_FLAG_PAIRWISE))
 		return -EOPNOTSUPP;
 
+	/* MT76x0 GTK offloading does not work with more than one VIF */
+	if (is_mt76x0(dev) && !(key->flags & IEEE80211_KEY_FLAG_PAIRWISE))
+		return -EOPNOTSUPP;
+
 	msta = sta ? (struct mt76x02_sta *)sta->drv_priv : NULL;
 	wcid = msta ? &msta->wcid : &mvif->group_wcid;
 
-- 
cgit v1.2.3


From baa3afb39e94965f4ca5b5d3d274379504b8fa24 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 12 Feb 2021 12:51:05 +0100
Subject: mt76: connac: always check return value from
 mt76_connac_mcu_alloc_wtbl_req

Even if this is not a real bug since mt76_connac_mcu_alloc_wtbl_req routine
can fails just if nskb is NULL , always check return value from
mt76_connac_mcu_alloc_wtbl_req in order to avoid possible future
mistake.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c      | 3 +++
 drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 631596fc2f36..4ecbd5406e2a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -1040,6 +1040,9 @@ mt7615_mcu_sta_ba(struct mt7615_dev *dev,
 
 	wtbl_hdr = mt76_connac_mcu_alloc_wtbl_req(&dev->mt76, &msta->wcid,
 						  WTBL_SET, sta_wtbl, &skb);
+	if (IS_ERR(wtbl_hdr))
+		return PTR_ERR(wtbl_hdr);
+
 	mt76_connac_mcu_wtbl_ba_tlv(&dev->mt76, skb, params, enable, tx,
 				    sta_wtbl, wtbl_hdr);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index e6ad6cbeb803..ac8d6ad76054 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -833,6 +833,9 @@ int mt76_connac_mcu_add_sta_cmd(struct mt76_phy *phy,
 	wtbl_hdr = mt76_connac_mcu_alloc_wtbl_req(dev, wcid,
 						  WTBL_RESET_AND_SET,
 						  sta_wtbl, &skb);
+	if (IS_ERR(wtbl_hdr))
+		return PTR_ERR(wtbl_hdr);
+
 	if (enable) {
 		mt76_connac_mcu_wtbl_generic_tlv(dev, skb, vif, sta, sta_wtbl,
 						 wtbl_hdr);
-- 
cgit v1.2.3


From 45f93e368211fbbd247e1ece254ffb121e20fa10 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 12 Feb 2021 20:30:04 +0100
Subject: mt76: mt7915: always check return value from
 mt7915_mcu_alloc_wtbl_req

As done for mt76_connac_mcu_alloc_wtbl_req, even if this is not a real
bug since mt7915_mcu_alloc_wtbl_req routine can fails just if nskb is NULL,
always check return value from mt7915_mcu_alloc_wtbl_req in order to avoid
possible future mistake.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index a9c4a003560d..6c429a2b15c5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -1188,6 +1188,9 @@ mt7915_mcu_sta_ba(struct mt7915_dev *dev,
 
 	wtbl_hdr = mt7915_mcu_alloc_wtbl_req(dev, msta, WTBL_SET, sta_wtbl,
 					     &skb);
+	if (IS_ERR(wtbl_hdr))
+		return PTR_ERR(wtbl_hdr);
+
 	mt7915_mcu_wtbl_ba_tlv(skb, params, enable, tx, sta_wtbl, wtbl_hdr);
 
 	ret = mt76_mcu_skb_send_msg(&dev->mt76, skb,
@@ -1705,6 +1708,9 @@ int mt7915_mcu_sta_update_hdr_trans(struct mt7915_dev *dev,
 		return -ENOMEM;
 
 	wtbl_hdr = mt7915_mcu_alloc_wtbl_req(dev, msta, WTBL_SET, NULL, &skb);
+	if (IS_ERR(wtbl_hdr))
+		return PTR_ERR(wtbl_hdr);
+
 	mt7915_mcu_wtbl_hdr_trans_tlv(skb, vif, sta, NULL, wtbl_hdr);
 
 	return mt76_mcu_skb_send_msg(&dev->mt76, skb, MCU_EXT_CMD(WTBL_UPDATE),
@@ -1729,6 +1735,9 @@ int mt7915_mcu_add_smps(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 
 	wtbl_hdr = mt7915_mcu_alloc_wtbl_req(dev, msta, WTBL_SET, sta_wtbl,
 					     &skb);
+	if (IS_ERR(wtbl_hdr))
+		return PTR_ERR(wtbl_hdr);
+
 	mt7915_mcu_wtbl_smps_tlv(skb, sta, sta_wtbl, wtbl_hdr);
 
 	return mt76_mcu_skb_send_msg(&dev->mt76, skb,
@@ -2254,6 +2263,9 @@ int mt7915_mcu_add_sta(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 
 	wtbl_hdr = mt7915_mcu_alloc_wtbl_req(dev, msta, WTBL_RESET_AND_SET,
 					     sta_wtbl, &skb);
+	if (IS_ERR(wtbl_hdr))
+		return PTR_ERR(wtbl_hdr);
+
 	if (enable) {
 		mt7915_mcu_wtbl_generic_tlv(skb, vif, sta, sta_wtbl, wtbl_hdr);
 		mt7915_mcu_wtbl_hdr_trans_tlv(skb, vif, sta, sta_wtbl, wtbl_hdr);
-- 
cgit v1.2.3


From 7715a1d54bdf47f89194b0a088e07dcd76820f51 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Tue, 2 Feb 2021 21:57:59 +0800
Subject: mt76: use PCI_VENDOR_ID_MEDIATEK to avoid open coded

Use PCI standard defines.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Acked-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7603/pci.c | 2 +-
 drivers/net/wireless/mediatek/mt76/mt7615/pci.c | 6 +++---
 drivers/net/wireless/mediatek/mt76/mt76x0/pci.c | 6 +++---
 drivers/net/wireless/mediatek/mt76/mt76x2/pci.c | 6 +++---
 drivers/net/wireless/mediatek/mt76/mt7921/pci.c | 2 +-
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/pci.c b/drivers/net/wireless/mediatek/mt76/mt7603/pci.c
index 06fa28f645f2..aa6cb668b012 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/pci.c
@@ -7,7 +7,7 @@
 #include "mt7603.h"
 
 static const struct pci_device_id mt76pci_device_table[] = {
-	{ PCI_DEVICE(0x14c3, 0x7603) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x7603) },
 	{ },
 };
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci.c
index 71487f532f36..11f169cdd603 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci.c
@@ -13,9 +13,9 @@
 #include "mcu.h"
 
 static const struct pci_device_id mt7615_pci_device_table[] = {
-	{ PCI_DEVICE(0x14c3, 0x7615) },
-	{ PCI_DEVICE(0x14c3, 0x7663) },
-	{ PCI_DEVICE(0x14c3, 0x7611) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x7615) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x7663) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x7611) },
 	{ },
 };
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c
index 02d0aa0b815e..5847f943e8da 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c
@@ -221,9 +221,9 @@ mt76x0e_remove(struct pci_dev *pdev)
 }
 
 static const struct pci_device_id mt76x0e_device_table[] = {
-	{ PCI_DEVICE(0x14c3, 0x7610) },
-	{ PCI_DEVICE(0x14c3, 0x7630) },
-	{ PCI_DEVICE(0x14c3, 0x7650) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x7610) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x7630) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x7650) },
 	{ },
 };
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
index ecaf85b483ac..adf288e50e21 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
@@ -10,9 +10,9 @@
 #include "mt76x2.h"
 
 static const struct pci_device_id mt76x2e_device_table[] = {
-	{ PCI_DEVICE(0x14c3, 0x7662) },
-	{ PCI_DEVICE(0x14c3, 0x7612) },
-	{ PCI_DEVICE(0x14c3, 0x7602) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x7662) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x7612) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x7602) },
 	{ },
 };
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
index 5570b4a50531..33ed952d7f4c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
@@ -13,7 +13,7 @@
 #include "../trace.h"
 
 static const struct pci_device_id mt7921_pci_device_table[] = {
-	{ PCI_DEVICE(0x14c3, 0x7961) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x7961) },
 	{ },
 };
 
-- 
cgit v1.2.3


From 5e30931494b4940ba74fa5796ca0b6d7e4c84e88 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Fri, 19 Feb 2021 18:28:44 +0100
Subject: mt76: mt7921: fix suspend/resume sequence

Any pcie access should happen in pci D0 state and we should give ownership
back to the device at the end of the suspend procedure.

Fixes: 1d8efc741df80 ("mt76: mt7921: introduce Runtime PM support")
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/pci.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
index 33ed952d7f4c..7e040c38c3b7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
@@ -209,12 +209,12 @@ static int mt7921_pci_suspend(struct pci_dev *pdev, pm_message_t state)
 	/* disable interrupt */
 	mt76_wr(dev, MT_WFDMA0_HOST_INT_ENA, 0);
 
-	pci_save_state(pdev);
-	err = pci_set_power_state(pdev, pci_choose_state(pdev, state));
+	err = mt7921_mcu_fw_pmctrl(dev);
 	if (err)
 		goto restore;
 
-	err = mt7921_mcu_drv_pmctrl(dev);
+	pci_save_state(pdev);
+	err = pci_set_power_state(pdev, pci_choose_state(pdev, state));
 	if (err)
 		goto restore;
 
@@ -237,16 +237,16 @@ static int mt7921_pci_resume(struct pci_dev *pdev)
 	struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76);
 	int i, err;
 
-	err = mt7921_mcu_fw_pmctrl(dev);
-	if (err < 0)
-		return err;
-
 	err = pci_set_power_state(pdev, PCI_D0);
 	if (err)
 		return err;
 
 	pci_restore_state(pdev);
 
+	err = mt7921_mcu_drv_pmctrl(dev);
+	if (err < 0)
+		return err;
+
 	/* enable interrupt */
 	mt7921_l1_wr(dev, MT_PCIE_MAC_INT_ENABLE, 0xff);
 	mt7921_irq_enable(dev, MT_INT_RX_DONE_ALL | MT_INT_TX_DONE_ALL |
-- 
cgit v1.2.3


From 782b3e86ea970e899f8e723db9f64708a15ca30e Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Fri, 19 Feb 2021 18:28:45 +0100
Subject: mt76: mt7921: fix memory leak in mt7921_coredump_work

Fix possible memory leak in mt7921_coredump_work.

Fixes: 1c099ab44727c ("mt76: mt7921: add MCU support")
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 9468c9c3b9cf..8befa8167cde 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1503,8 +1503,10 @@ void mt7921_coredump_work(struct work_struct *work)
 			break;
 
 		skb_pull(skb, sizeof(struct mt7921_mcu_rxd));
-		if (data + skb->len - dump > MT76_CONNAC_COREDUMP_SZ)
-			break;
+		if (data + skb->len - dump > MT76_CONNAC_COREDUMP_SZ) {
+			dev_kfree_skb(skb);
+			continue;
+		}
 
 		memcpy(data, skb->data, skb->len);
 		data += skb->len;
-- 
cgit v1.2.3


From 159f6dd619d953e2a137bfb42cbce7cbed621c76 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Fri, 19 Feb 2021 18:28:46 +0100
Subject: mt76: mt7921: switch to new api for hardware beacon filter

Current firmware only supports new api for enabling hardware beacon filter.

Fixes: 1d8efc741df80 ("mt76: mt7921: introduce Runtime PM support")
Beacon filter cmd have to rely on the associatied access point's beacon
interval and DTIM information.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/main.c | 49 ++++++++++++++++--------
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.c  |  8 +++-
 2 files changed, 40 insertions(+), 17 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 729f6c42cdde..11d85cf9cfb8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -295,15 +295,6 @@ static int mt7921_add_interface(struct ieee80211_hw *hw,
 	if (ret)
 		goto out;
 
-	if (dev->pm.enable) {
-		ret = mt7921_mcu_set_bss_pm(dev, vif, true);
-		if (ret)
-			goto out;
-
-		vif->driver_flags |= IEEE80211_VIF_BEACON_FILTER;
-		mt76_set(dev, MT_WF_RFCR(0), MT_WF_RFCR_DROP_OTHER_BEACON);
-	}
-
 	dev->mt76.vif_mask |= BIT(mvif->mt76.idx);
 	phy->omac_mask |= BIT_ULL(mvif->mt76.omac_idx);
 
@@ -349,13 +340,6 @@ static void mt7921_remove_interface(struct ieee80211_hw *hw,
 		phy->monitor_vif = NULL;
 
 	mt76_connac_free_pending_tx_skbs(&dev->pm, &msta->wcid);
-
-	if (dev->pm.enable) {
-		mt7921_mcu_set_bss_pm(dev, vif, false);
-		mt76_clear(dev, MT_WF_RFCR(0),
-			   MT_WF_RFCR_DROP_OTHER_BEACON);
-	}
-
 	mt76_connac_mcu_uni_add_dev(&dev->mphy, vif, &mvif->sta.wcid, false);
 
 	rcu_assign_pointer(dev->mt76.wcid[idx], NULL);
@@ -561,6 +545,36 @@ static void mt7921_configure_filter(struct ieee80211_hw *hw,
 	mt7921_mutex_release(dev);
 }
 
+static int
+mt7921_bss_bcnft_apply(struct mt7921_dev *dev, struct ieee80211_vif *vif,
+		       bool assoc)
+{
+	int ret;
+
+	if (!dev->pm.enable)
+		return 0;
+
+	if (assoc) {
+		ret = mt7921_mcu_uni_bss_bcnft(dev, vif, true);
+		if (ret)
+			return ret;
+
+		vif->driver_flags |= IEEE80211_VIF_BEACON_FILTER;
+		mt76_set(dev, MT_WF_RFCR(0), MT_WF_RFCR_DROP_OTHER_BEACON);
+
+		return 0;
+	}
+
+	ret = mt7921_mcu_set_bss_pm(dev, vif, false);
+	if (ret)
+		return ret;
+
+	vif->driver_flags &= ~IEEE80211_VIF_BEACON_FILTER;
+	mt76_clear(dev, MT_WF_RFCR(0), MT_WF_RFCR_DROP_OTHER_BEACON);
+
+	return 0;
+}
+
 static void mt7921_bss_info_changed(struct ieee80211_hw *hw,
 				    struct ieee80211_vif *vif,
 				    struct ieee80211_bss_conf *info,
@@ -587,6 +601,9 @@ static void mt7921_bss_info_changed(struct ieee80211_hw *hw,
 	if (changed & BSS_CHANGED_PS)
 		mt7921_mcu_uni_bss_ps(dev, vif);
 
+	if (changed & BSS_CHANGED_ASSOC)
+		mt7921_bss_bcnft_apply(dev, vif, info->assoc);
+
 	mt7921_mutex_release(dev);
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index b5cc72e7e81c..b0f8c29f756b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -1292,8 +1292,14 @@ mt7921_pm_interface_iter(void *priv, u8 *mac, struct ieee80211_vif *vif)
 {
 	struct mt7921_phy *phy = priv;
 	struct mt7921_dev *dev = phy->dev;
+	int ret;
+
+	if (dev->pm.enable)
+		ret = mt7921_mcu_uni_bss_bcnft(dev, vif, true);
+	else
+		ret = mt7921_mcu_set_bss_pm(dev, vif, false);
 
-	if (mt7921_mcu_set_bss_pm(dev, vif, dev->pm.enable))
+	if (ret)
 		return;
 
 	if (dev->pm.enable) {
-- 
cgit v1.2.3


From a7e3033fcdb60ad51b03896ae99ad9447389bed0 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Fri, 19 Feb 2021 18:28:47 +0100
Subject: mt76: connac: fix up the setting for ht40 mode in
 mt76_connac_mcu_uni_add_bss

Use proper value for ht40 mode configuration in mt76_connac_mcu_uni_add_bss
routine and not ht20 one

Fixes: d0e274af2f2e4 ("mt76: mt76_connac: create mcu library")
Co-developed-by: Soul Huang <Soul.Huang@mediatek.com>
Signed-off-by: Soul Huang <Soul.Huang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index ac8d6ad76054..81515419b5b6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -1198,6 +1198,7 @@ int mt76_connac_mcu_uni_add_bss(struct mt76_phy *phy,
 			.center_chan = ieee80211_frequency_to_channel(freq1),
 			.center_chan2 = ieee80211_frequency_to_channel(freq2),
 			.tx_streams = hweight8(phy->antenna_mask),
+			.ht_op_info = 4, /* set HT 40M allowed */
 			.rx_streams = phy->chainmask,
 			.short_st = true,
 		},
@@ -1290,6 +1291,7 @@ int mt76_connac_mcu_uni_add_bss(struct mt76_phy *phy,
 	case NL80211_CHAN_WIDTH_20:
 	default:
 		rlm_req.rlm.bw = CMD_CBW_20MHZ;
+		rlm_req.rlm.ht_op_info = 0;
 		break;
 	}
 
-- 
cgit v1.2.3


From 0940605a2a70803fc3362eeccba4c31adf4e70e7 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Fri, 19 Feb 2021 18:28:48 +0100
Subject: mt76: mt7921: fixup rx bitrate statistics

Since the related rx bitrate fields have been moved to group3 in Rxv,
fix rx bitrate statistics in mt7921_mac_fill_rx routine.

Fixes: 163f4d22c118d ("mt76: mt7921: add MAC support")
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c | 155 ++++++++++++------------
 drivers/net/wireless/mediatek/mt76/mt7921/mac.h |  10 +-
 2 files changed, 86 insertions(+), 79 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 8befa8167cde..76d5f194706f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -400,7 +400,9 @@ int mt7921_mac_fill_rx(struct mt7921_dev *dev, struct sk_buff *skb)
 
 	/* RXD Group 3 - P-RXV */
 	if (rxd1 & MT_RXD1_NORMAL_GROUP_3) {
-		u32 v0, v1, v2;
+		u8 stbc, gi;
+		u32 v0, v1;
+		bool cck;
 
 		rxv = rxd;
 		rxd += 2;
@@ -409,7 +411,6 @@ int mt7921_mac_fill_rx(struct mt7921_dev *dev, struct sk_buff *skb)
 
 		v0 = le32_to_cpu(rxv[0]);
 		v1 = le32_to_cpu(rxv[1]);
-		v2 = le32_to_cpu(rxv[2]);
 
 		if (v0 & MT_PRXV_HT_AD_CODE)
 			status->enc_flags |= RX_ENC_FLAG_LDPC;
@@ -429,87 +430,87 @@ int mt7921_mac_fill_rx(struct mt7921_dev *dev, struct sk_buff *skb)
 					     status->chain_signal[i]);
 		}
 
-		/* RXD Group 5 - C-RXV */
-		if (rxd1 & MT_RXD1_NORMAL_GROUP_5) {
-			u8 stbc = FIELD_GET(MT_CRXV_HT_STBC, v2);
-			u8 gi = FIELD_GET(MT_CRXV_HT_SHORT_GI, v2);
-			bool cck = false;
+		stbc = FIELD_GET(MT_PRXV_STBC, v0);
+		gi = FIELD_GET(MT_PRXV_SGI, v0);
+		cck = false;
 
-			rxd += 18;
-			if ((u8 *)rxd - skb->data >= skb->len)
-				return -EINVAL;
+		idx = i = FIELD_GET(MT_PRXV_TX_RATE, v0);
+		mode = FIELD_GET(MT_PRXV_TX_MODE, v0);
 
-			idx = i = FIELD_GET(MT_PRXV_TX_RATE, v0);
-			mode = FIELD_GET(MT_CRXV_TX_MODE, v2);
-
-			switch (mode) {
-			case MT_PHY_TYPE_CCK:
-				cck = true;
-				fallthrough;
-			case MT_PHY_TYPE_OFDM:
-				i = mt76_get_rate(&dev->mt76, sband, i, cck);
-				break;
-			case MT_PHY_TYPE_HT_GF:
-			case MT_PHY_TYPE_HT:
-				status->encoding = RX_ENC_HT;
-				if (i > 31)
-					return -EINVAL;
-				break;
-			case MT_PHY_TYPE_VHT:
-				status->nss =
-					FIELD_GET(MT_PRXV_NSTS, v0) + 1;
-				status->encoding = RX_ENC_VHT;
-				if (i > 9)
-					return -EINVAL;
-				break;
-			case MT_PHY_TYPE_HE_MU:
-				status->flag |= RX_FLAG_RADIOTAP_HE_MU;
-				fallthrough;
-			case MT_PHY_TYPE_HE_SU:
-			case MT_PHY_TYPE_HE_EXT_SU:
-			case MT_PHY_TYPE_HE_TB:
-				status->nss =
-					FIELD_GET(MT_PRXV_NSTS, v0) + 1;
-				status->encoding = RX_ENC_HE;
-				status->flag |= RX_FLAG_RADIOTAP_HE;
-				i &= GENMASK(3, 0);
-
-				if (gi <= NL80211_RATE_INFO_HE_GI_3_2)
-					status->he_gi = gi;
-
-				status->he_dcm = !!(idx & MT_PRXV_TX_DCM);
-				break;
-			default:
+		switch (mode) {
+		case MT_PHY_TYPE_CCK:
+			cck = true;
+			fallthrough;
+		case MT_PHY_TYPE_OFDM:
+			i = mt76_get_rate(&dev->mt76, sband, i, cck);
+			break;
+		case MT_PHY_TYPE_HT_GF:
+		case MT_PHY_TYPE_HT:
+			status->encoding = RX_ENC_HT;
+			if (i > 31)
 				return -EINVAL;
-			}
-			status->rate_idx = i;
-
-			switch (FIELD_GET(MT_CRXV_FRAME_MODE, v2)) {
-			case IEEE80211_STA_RX_BW_20:
-				break;
-			case IEEE80211_STA_RX_BW_40:
-				if (mode & MT_PHY_TYPE_HE_EXT_SU &&
-				    (idx & MT_PRXV_TX_ER_SU_106T)) {
-					status->bw = RATE_INFO_BW_HE_RU;
-					status->he_ru =
-						NL80211_RATE_INFO_HE_RU_ALLOC_106;
-				} else {
-					status->bw = RATE_INFO_BW_40;
-				}
-				break;
-			case IEEE80211_STA_RX_BW_80:
-				status->bw = RATE_INFO_BW_80;
-				break;
-			case IEEE80211_STA_RX_BW_160:
-				status->bw = RATE_INFO_BW_160;
-				break;
-			default:
+			break;
+		case MT_PHY_TYPE_VHT:
+			status->nss =
+				FIELD_GET(MT_PRXV_NSTS, v0) + 1;
+			status->encoding = RX_ENC_VHT;
+			if (i > 9)
 				return -EINVAL;
+			break;
+		case MT_PHY_TYPE_HE_MU:
+			status->flag |= RX_FLAG_RADIOTAP_HE_MU;
+			fallthrough;
+		case MT_PHY_TYPE_HE_SU:
+		case MT_PHY_TYPE_HE_EXT_SU:
+		case MT_PHY_TYPE_HE_TB:
+			status->nss =
+				FIELD_GET(MT_PRXV_NSTS, v0) + 1;
+			status->encoding = RX_ENC_HE;
+			status->flag |= RX_FLAG_RADIOTAP_HE;
+			i &= GENMASK(3, 0);
+
+			if (gi <= NL80211_RATE_INFO_HE_GI_3_2)
+				status->he_gi = gi;
+
+			status->he_dcm = !!(idx & MT_PRXV_TX_DCM);
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		status->rate_idx = i;
+
+		switch (FIELD_GET(MT_PRXV_FRAME_MODE, v0)) {
+		case IEEE80211_STA_RX_BW_20:
+			break;
+		case IEEE80211_STA_RX_BW_40:
+			if (mode & MT_PHY_TYPE_HE_EXT_SU &&
+			    (idx & MT_PRXV_TX_ER_SU_106T)) {
+				status->bw = RATE_INFO_BW_HE_RU;
+				status->he_ru =
+					NL80211_RATE_INFO_HE_RU_ALLOC_106;
+			} else {
+				status->bw = RATE_INFO_BW_40;
 			}
+			break;
+		case IEEE80211_STA_RX_BW_80:
+			status->bw = RATE_INFO_BW_80;
+			break;
+		case IEEE80211_STA_RX_BW_160:
+			status->bw = RATE_INFO_BW_160;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		status->enc_flags |= RX_ENC_FLAG_STBC_MASK * stbc;
+		if (mode < MT_PHY_TYPE_HE_SU && gi)
+			status->enc_flags |= RX_ENC_FLAG_SHORT_GI;
 
-			status->enc_flags |= RX_ENC_FLAG_STBC_MASK * stbc;
-			if (mode < MT_PHY_TYPE_HE_SU && gi)
-				status->enc_flags |= RX_ENC_FLAG_SHORT_GI;
+		if (rxd1 & MT_RXD1_NORMAL_GROUP_5) {
+			rxd += 18;
+			if ((u8 *)rxd - skb->data >= skb->len)
+				return -EINVAL;
 		}
 	}
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.h b/drivers/net/wireless/mediatek/mt76/mt7921/mac.h
index a0c1fa0f20e4..109c8849d106 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.h
@@ -97,18 +97,24 @@ enum rx_pkt_type {
 #define MT_RXD3_NORMAL_PF_MODE		BIT(29)
 #define MT_RXD3_NORMAL_PF_STS		GENMASK(31, 30)
 
-/* P-RXV */
+/* P-RXV DW0 */
 #define MT_PRXV_TX_RATE			GENMASK(6, 0)
 #define MT_PRXV_TX_DCM			BIT(4)
 #define MT_PRXV_TX_ER_SU_106T		BIT(5)
 #define MT_PRXV_NSTS			GENMASK(9, 7)
 #define MT_PRXV_HT_AD_CODE		BIT(11)
+#define MT_PRXV_FRAME_MODE		GENMASK(14, 12)
+#define MT_PRXV_SGI			GENMASK(16, 15)
+#define MT_PRXV_STBC			GENMASK(23, 22)
+#define MT_PRXV_TX_MODE			GENMASK(27, 24)
 #define MT_PRXV_HE_RU_ALLOC_L		GENMASK(31, 28)
-#define MT_PRXV_HE_RU_ALLOC_H		GENMASK(3, 0)
+
+/* P-RXV DW1 */
 #define MT_PRXV_RCPI3			GENMASK(31, 24)
 #define MT_PRXV_RCPI2			GENMASK(23, 16)
 #define MT_PRXV_RCPI1			GENMASK(15, 8)
 #define MT_PRXV_RCPI0			GENMASK(7, 0)
+#define MT_PRXV_HE_RU_ALLOC_H		GENMASK(3, 0)
 
 /* C-RXV */
 #define MT_CRXV_HT_STBC			GENMASK(1, 0)
-- 
cgit v1.2.3


From 9dfd2785ac730f617b06b572683d76bfc51b67c2 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Fri, 19 Feb 2021 18:28:49 +0100
Subject: mt76: mt7921: add flush operation

add flush operation to make sure cfg80211_mlme_deauth is able to flush
the deauthtication frame into air immediately.

Co-developed-by: YN Chen <YN.Chen@mediatek.com>
Signed-off-by: YN Chen <YN.Chen@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/main.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 11d85cf9cfb8..f7552f5201b4 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -1137,6 +1137,15 @@ static void mt7921_set_rekey_data(struct ieee80211_hw *hw,
 }
 #endif /* CONFIG_PM */
 
+static void mt7921_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+			 u32 queues, bool drop)
+{
+	struct mt7921_dev *dev = mt7921_hw_dev(hw);
+
+	wait_event_timeout(dev->mt76.tx_wait, !mt76_has_tx_pending(&dev->mphy),
+			   HZ / 2);
+}
+
 const struct ieee80211_ops mt7921_ops = {
 	.tx = mt7921_tx,
 	.start = mt7921_start,
@@ -1175,4 +1184,5 @@ const struct ieee80211_ops mt7921_ops = {
 	.set_wakeup = mt7921_set_wakeup,
 	.set_rekey_data = mt7921_set_rekey_data,
 #endif /* CONFIG_PM */
+	.flush = mt7921_flush,
 };
-- 
cgit v1.2.3


From 7139b5c0c98af1e2a04bd9ff09d2c01c40f1da41 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Fri, 19 Feb 2021 18:28:51 +0100
Subject: mt76: connac: update sched_scan cmd usage

Update sched_scan command usage according to the current firmware
submitted into linux-firmware.git.

Fixes: 80fc1e37c0eb ("mt76: mt7921: rely on mt76_connac_mcu module for sched_scan and hw_scan")
Co-developed-by: Soul Huang <Soul.Huang@mediatek.com>
Signed-off-by: Soul Huang <Soul.Huang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h   |  5 -----
 drivers/net/wireless/mediatek/mt76/mt76_connac.h     |  5 +++++
 drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c |  7 +++++--
 drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h | 14 +++++++++++---
 4 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 491841bc6291..5ef19fbee6bd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -408,11 +408,6 @@ static inline bool is_mt7615(struct mt76_dev *dev)
 	return mt76_chip(dev) == 0x7615 || mt76_chip(dev) == 0x7611;
 }
 
-static inline bool is_mt7663(struct mt76_dev *dev)
-{
-	return mt76_chip(dev) == 0x7663;
-}
-
 static inline bool is_mt7611(struct mt76_dev *dev)
 {
 	return mt76_chip(dev) == 0x7611;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac.h b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
index 0d58606391b0..b811f3c410a1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
@@ -73,6 +73,11 @@ static inline bool is_mt7921(struct mt76_dev *dev)
 	return mt76_chip(dev) == 0x7961;
 }
 
+static inline bool is_mt7663(struct mt76_dev *dev)
+{
+	return mt76_chip(dev) == 0x7663;
+}
+
 int mt76_connac_pm_wake(struct mt76_phy *phy, struct mt76_connac_pm *pm);
 void mt76_connac_power_save_sched(struct mt76_phy *phy,
 				  struct mt76_connac_pm *pm);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index 81515419b5b6..125e02b34a5f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -1438,10 +1438,13 @@ int mt76_connac_mcu_sched_scan_req(struct mt76_phy *phy,
 	req->version = 1;
 	req->seq_num = mvif->scan_seq_num | ext_phy << 7;
 
-	if (sreq->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
-		get_random_mask_addr(req->random_mac, sreq->mac_addr,
+	if (is_mt7663(phy->dev) &&
+	    (sreq->flags & NL80211_SCAN_FLAG_RANDOM_ADDR)) {
+		get_random_mask_addr(req->mt7663.random_mac, sreq->mac_addr,
 				     sreq->mac_addr_mask);
 		req->scan_func = 1;
+	} else if (is_mt7921(phy->dev)) {
+		req->mt7921.bss_idx = mvif->idx;
 	}
 
 	req->ssids_num = sreq->n_ssids;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
index 9c26b90655a7..8478f019f5d3 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
@@ -756,11 +756,19 @@ struct mt76_connac_sched_scan_req {
 	u8 channel_type;
 	u8 channels_num;
 	u8 intervals_num;
-	u8 scan_func; /* BIT(0) eable random mac address */
+	u8 scan_func; /* MT7663: BIT(0) eable random mac address */
 	struct mt76_connac_mcu_scan_channel channels[64];
 	__le16 intervals[MT76_CONNAC_MAX_SCHED_SCAN_INTERVAL];
-	u8 random_mac[ETH_ALEN]; /* valid when BIT(0) in scan_func is set */
-	u8 pad2[58];
+	union {
+		struct {
+			u8 random_mac[ETH_ALEN];
+			u8 pad2[58];
+		} mt7663;
+		struct {
+			u8 bss_idx;
+			u8 pad2[63];
+		} mt7921;
+	};
 } __packed;
 
 struct mt76_connac_sched_scan_done {
-- 
cgit v1.2.3


From aa40528a5bca990ef8f6a82eb26d23c81b4b3cff Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 26 Jul 2020 13:09:51 +0200
Subject: mt76: use threaded NAPI

With threaded NAPI, the rx handler function is no longer bound to the CPU that
fired the interrupt, which significantly helps to spread the workload over
multiple CPUs, especially when multiple devices are using threaded NAPI at the
same time.
Exclude the tx handler from threaded NAPI by using a separate dummy netdev.
The work is small and short-lived enough that it makes more sense to run it in
softirq instead of creating a dedicated thread

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/dma.c          | 6 ++++--
 drivers/net/wireless/mediatek/mt76/mt76.h         | 1 +
 drivers/net/wireless/mediatek/mt76/mt7603/dma.c   | 2 +-
 drivers/net/wireless/mediatek/mt76/mt7615/dma.c   | 2 +-
 drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c | 2 +-
 drivers/net/wireless/mediatek/mt76/mt7915/dma.c   | 2 +-
 drivers/net/wireless/mediatek/mt76/mt7921/dma.c   | 2 +-
 7 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index 2f27c43ad76d..6273b2cfce4f 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -602,7 +602,6 @@ mt76_dma_rx_poll(struct napi_struct *napi, int budget)
 	dev = container_of(napi->dev, struct mt76_dev, napi_dev);
 	qid = napi - dev->napi;
 
-	local_bh_disable();
 	rcu_read_lock();
 
 	do {
@@ -612,7 +611,6 @@ mt76_dma_rx_poll(struct napi_struct *napi, int budget)
 	} while (cur && done < budget);
 
 	rcu_read_unlock();
-	local_bh_enable();
 
 	if (done < budget && napi_complete(napi))
 		dev->drv->rx_poll_complete(dev, qid);
@@ -626,6 +624,10 @@ mt76_dma_init(struct mt76_dev *dev)
 	int i;
 
 	init_dummy_netdev(&dev->napi_dev);
+	init_dummy_netdev(&dev->tx_napi_dev);
+	snprintf(dev->napi_dev.name, sizeof(dev->napi_dev.name), "%s",
+		 wiphy_name(dev->hw->wiphy));
+	dev->napi_dev.threaded = 1;
 
 	mt76_for_each_q_rx(dev, i) {
 		netif_napi_add(&dev->napi_dev, &dev->napi[i], mt76_dma_rx_poll,
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 5561f81c64f7..4d71ca228de2 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -637,6 +637,7 @@ struct mt76_dev {
 	struct mt76_mcu mcu;
 
 	struct net_device napi_dev;
+	struct net_device tx_napi_dev;
 	spinlock_t rx_lock;
 	struct napi_struct napi[__MT_RXQ_MAX];
 	struct sk_buff_head rx_skb[__MT_RXQ_MAX];
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c
index 0086f18cb79a..2b6244116842 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c
@@ -223,7 +223,7 @@ int mt7603_dma_init(struct mt7603_dev *dev)
 	if (ret)
 		return ret;
 
-	netif_tx_napi_add(&dev->mt76.napi_dev, &dev->mt76.tx_napi,
+	netif_tx_napi_add(&dev->mt76.tx_napi_dev, &dev->mt76.tx_napi,
 			  mt7603_poll_tx, NAPI_POLL_WEIGHT);
 	napi_enable(&dev->mt76.tx_napi);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
index 25e3069cf2b1..2e3120eb2da9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
@@ -245,7 +245,7 @@ int mt7615_dma_init(struct mt7615_dev *dev)
 	if (ret < 0)
 		return ret;
 
-	netif_tx_napi_add(&dev->mt76.napi_dev, &dev->mt76.tx_napi,
+	netif_tx_napi_add(&dev->mt76.tx_napi_dev, &dev->mt76.tx_napi,
 			  mt7615_poll_tx, NAPI_POLL_WEIGHT);
 	napi_enable(&dev->mt76.tx_napi);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
index e7a46ac97f51..fc12824ab74e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
@@ -230,7 +230,7 @@ int mt76x02_dma_init(struct mt76x02_dev *dev)
 	if (ret)
 		return ret;
 
-	netif_tx_napi_add(&dev->mt76.napi_dev, &dev->mt76.tx_napi,
+	netif_tx_napi_add(&dev->mt76.tx_napi_dev, &dev->mt76.tx_napi,
 			  mt76x02_poll_tx, NAPI_POLL_WEIGHT);
 	napi_enable(&dev->mt76.tx_napi);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/dma.c b/drivers/net/wireless/mediatek/mt76/mt7915/dma.c
index bf51304a770b..3c961bf55e97 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/dma.c
@@ -325,7 +325,7 @@ int mt7915_dma_init(struct mt7915_dev *dev)
 	if (ret < 0)
 		return ret;
 
-	netif_tx_napi_add(&dev->mt76.napi_dev, &dev->mt76.tx_napi,
+	netif_tx_napi_add(&dev->mt76.tx_napi_dev, &dev->mt76.tx_napi,
 			  mt7915_poll_tx, NAPI_POLL_WEIGHT);
 	napi_enable(&dev->mt76.tx_napi);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
index cd9665610284..60de29a921a8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
@@ -299,7 +299,7 @@ int mt7921_dma_init(struct mt7921_dev *dev)
 	if (ret < 0)
 		return ret;
 
-	netif_tx_napi_add(&dev->mt76.napi_dev, &dev->mt76.tx_napi,
+	netif_tx_napi_add(&dev->mt76.tx_napi_dev, &dev->mt76.tx_napi,
 			  mt7921_poll_tx, NAPI_POLL_WEIGHT);
 	napi_enable(&dev->mt76.tx_napi);
 
-- 
cgit v1.2.3


From e78d73e01db3309bbc099be87a5157d3c710fcc8 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Thu, 18 Feb 2021 09:34:38 +0800
Subject: mt76: mt7615: enable hw rx-amsdu de-aggregation

Enable hw rx-amsdu de-aggregation support.
This is a preliminary patch to enable rx checksum offload.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/init.c |  3 +--
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c  | 15 ++++++++++++++-
 drivers/net/wireless/mediatek/mt76/mt7615/mac.h  |  3 +++
 drivers/net/wireless/mediatek/mt76/mt7615/regs.h |  1 +
 4 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 571390fa4de7..a97cc723094c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -116,10 +116,9 @@ mt7615_mac_init(struct mt7615_dev *dev)
 	mt76_set(dev, MT_WF_RMAC_MIB_TIME0, MT_WF_RMAC_MIB_RXTIME_EN);
 	mt76_set(dev, MT_WF_RMAC_MIB_AIRTIME0, MT_WF_RMAC_MIB_RXTIME_EN);
 
-	/* disable hdr translation and hw AMSDU */
 	mt76_wr(dev, MT_DMA_DCR0,
 		FIELD_PREP(MT_DMA_DCR0_MAX_RX_LEN, 3072) |
-		MT_DMA_DCR0_RX_VEC_DROP);
+		MT_DMA_DCR0_RX_VEC_DROP | MT_DMA_DCR0_DAMSDU_EN);
 	/* disable TDLS filtering */
 	mt76_clear(dev, MT_WF_PFCR, MT_WF_PFCR_TDLS_EN);
 	mt76_set(dev, MT_WF_MIB_SCR0, MT_MIB_SCR0_AGG_CNT_RANGE_EN);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 9bae2f66e1ce..928edd158f64 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -238,7 +238,7 @@ static int mt7615_mac_fill_rx(struct mt7615_dev *dev, struct sk_buff *skb)
 	bool unicast, remove_pad, insert_ccmp_hdr = false;
 	int phy_idx;
 	int i, idx;
-	u8 chfreq;
+	u8 chfreq, amsdu_info;
 
 	memset(status, 0, sizeof(*status));
 
@@ -254,6 +254,9 @@ static int mt7615_mac_fill_rx(struct mt7615_dev *dev, struct sk_buff *skb)
 	else
 		phy_idx = -1;
 
+	if (rxd2 & MT_RXD2_NORMAL_AMSDU_ERR)
+		return -EINVAL;
+
 	unicast = (rxd1 & MT_RXD1_NORMAL_ADDR_TYPE) == MT_RXD1_NORMAL_U2M;
 	idx = FIELD_GET(MT_RXD2_NORMAL_WLAN_IDX, rxd2);
 	status->wcid = mt7615_rx_get_wcid(dev, idx, unicast);
@@ -446,6 +449,16 @@ static int mt7615_mac_fill_rx(struct mt7615_dev *dev, struct sk_buff *skb)
 
 	skb_pull(skb, (u8 *)rxd - skb->data + 2 * remove_pad);
 
+	amsdu_info = FIELD_GET(MT_RXD1_NORMAL_PAYLOAD_FORMAT, rxd1);
+	status->amsdu = !!amsdu_info;
+	if (status->amsdu) {
+		status->first_amsdu = amsdu_info == MT_RXD1_FIRST_AMSDU_FRAME;
+		status->last_amsdu = amsdu_info == MT_RXD1_LAST_AMSDU_FRAME;
+		memmove(skb->data + 2, skb->data,
+			ieee80211_get_hdrlen_from_skb(skb));
+		skb_pull(skb, 2);
+	}
+
 	if (insert_ccmp_hdr) {
 		u8 key_id = FIELD_GET(MT_RXD1_NORMAL_KEY_ID, rxd1);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.h b/drivers/net/wireless/mediatek/mt76/mt7615/mac.h
index 169f4e17b5b4..ed009d085a53 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.h
@@ -33,6 +33,9 @@ enum rx_pkt_type {
 
 #define MT_RXD1_NORMAL_BSSID		GENMASK(31, 26)
 #define MT_RXD1_NORMAL_PAYLOAD_FORMAT	GENMASK(25, 24)
+#define MT_RXD1_FIRST_AMSDU_FRAME	GENMASK(1, 0)
+#define MT_RXD1_MID_AMSDU_FRAME		BIT(1)
+#define MT_RXD1_LAST_AMSDU_FRAME	BIT(0)
 #define MT_RXD1_NORMAL_HDR_TRANS	BIT(23)
 #define MT_RXD1_NORMAL_HDR_OFFSET	BIT(22)
 #define MT_RXD1_NORMAL_MAC_HDR_LEN	GENMASK(21, 16)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
index 6e5db015b32c..4ebffe52cb8e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
@@ -368,6 +368,7 @@ enum mt7615_reg_base {
 
 #define MT_DMA_DCR0			MT_WF_DMA(0x000)
 #define MT_DMA_DCR0_MAX_RX_LEN		GENMASK(15, 2)
+#define MT_DMA_DCR0_DAMSDU_EN		BIT(16)
 #define MT_DMA_DCR0_RX_VEC_DROP		BIT(17)
 
 #define MT_DMA_RCFR0(_band)		MT_WF_DMA(0x070 + (_band) * 0x40)
-- 
cgit v1.2.3


From 2122dfbfd0bd951a6bded5fbd9a1e87b37c41caa Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Thu, 18 Feb 2021 09:34:39 +0800
Subject: mt76: mt7615: add rx checksum offload support

Set skb->ip_summed to CHECKSUM_UNNECESSARY if the hardware has validated
the IP and TCP/UDP checksum

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/init.c | 1 +
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c  | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index a97cc723094c..fd38737a27bd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -329,6 +329,7 @@ mt7615_init_wiphy(struct ieee80211_hw *hw)
 	hw->max_rates = 3;
 	hw->max_report_rates = 7;
 	hw->max_rate_tries = 11;
+	hw->netdev_features = NETIF_F_RXCSUM;
 
 	phy->slottime = 9;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 928edd158f64..340beff0e121 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -234,6 +234,7 @@ static int mt7615_mac_fill_rx(struct mt7615_dev *dev, struct sk_buff *skb)
 	u32 rxd0 = le32_to_cpu(rxd[0]);
 	u32 rxd1 = le32_to_cpu(rxd[1]);
 	u32 rxd2 = le32_to_cpu(rxd[2]);
+	u32 csum_mask = MT_RXD0_NORMAL_IP_SUM | MT_RXD0_NORMAL_UDP_TCP_SUM;
 	__le32 rxd12 = rxd[12];
 	bool unicast, remove_pad, insert_ccmp_hdr = false;
 	int phy_idx;
@@ -271,6 +272,9 @@ static int mt7615_mac_fill_rx(struct mt7615_dev *dev, struct sk_buff *skb)
 		spin_unlock_bh(&dev->sta_poll_lock);
 	}
 
+	if ((rxd0 & csum_mask) == csum_mask)
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+
 	if (rxd2 & MT_RXD2_NORMAL_FCS_ERR)
 		status->flag |= RX_FLAG_FAILED_FCS_CRC;
 
-- 
cgit v1.2.3


From d4b98c63d7a772cfc2ed68c646915d5b3988e934 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Thu, 18 Feb 2021 09:34:40 +0800
Subject: mt76: mt7615: add support for rx decapsulation offload

For AP and Client mode, the hardware can pass received packets
as 802.3 frames that can be passed to the network stack as-is.

Tested-by: Frank Wunderlich <frank-w@public-files.de>
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/init.c   |  5 ++-
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c    | 44 ++++++++++++++++------
 drivers/net/wireless/mediatek/mt76/mt7615/mac.h    |  5 +++
 drivers/net/wireless/mediatek/mt76/mt7615/main.c   | 17 +++++++++
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    | 39 +++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  4 ++
 drivers/net/wireless/mediatek/mt76/mt7615/regs.h   |  1 +
 .../net/wireless/mediatek/mt76/mt76_connac_mcu.c   | 19 ++++++++++
 .../net/wireless/mediatek/mt76/mt76_connac_mcu.h   |  6 ++-
 9 files changed, 127 insertions(+), 13 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index fd38737a27bd..88a2ee7e7143 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -118,7 +118,8 @@ mt7615_mac_init(struct mt7615_dev *dev)
 
 	mt76_wr(dev, MT_DMA_DCR0,
 		FIELD_PREP(MT_DMA_DCR0_MAX_RX_LEN, 3072) |
-		MT_DMA_DCR0_RX_VEC_DROP | MT_DMA_DCR0_DAMSDU_EN);
+		MT_DMA_DCR0_RX_VEC_DROP | MT_DMA_DCR0_DAMSDU_EN |
+		MT_DMA_DCR0_RX_HDR_TRANS_EN);
 	/* disable TDLS filtering */
 	mt76_clear(dev, MT_WF_PFCR, MT_WF_PFCR_TDLS_EN);
 	mt76_set(dev, MT_WF_MIB_SCR0, MT_MIB_SCR0_AGG_CNT_RANGE_EN);
@@ -128,6 +129,7 @@ mt7615_mac_init(struct mt7615_dev *dev)
 	} else {
 		mt7615_init_mac_chain(dev, 1);
 	}
+	mt7615_mcu_set_rx_hdr_trans_blacklist(dev);
 }
 
 static void
@@ -360,6 +362,7 @@ mt7615_init_wiphy(struct ieee80211_hw *hw)
 	ieee80211_hw_set(hw, SINGLE_SCAN_ON_ALL_BANDS);
 	ieee80211_hw_set(hw, TX_STATUS_NO_AMPDU_LEN);
 	ieee80211_hw_set(hw, WANT_MONITOR_VIF);
+	ieee80211_hw_set(hw, SUPPORTS_RX_DECAP_OFFLOAD);
 
 	if (is_mt7615(&phy->dev->mt76))
 		hw->max_tx_fragments = MT_TXP_MAX_BUF_NUM;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 340beff0e121..f2dd56674acb 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -236,10 +236,12 @@ static int mt7615_mac_fill_rx(struct mt7615_dev *dev, struct sk_buff *skb)
 	u32 rxd2 = le32_to_cpu(rxd[2]);
 	u32 csum_mask = MT_RXD0_NORMAL_IP_SUM | MT_RXD0_NORMAL_UDP_TCP_SUM;
 	__le32 rxd12 = rxd[12];
-	bool unicast, remove_pad, insert_ccmp_hdr = false;
+	bool unicast, hdr_trans, remove_pad, insert_ccmp_hdr = false;
 	int phy_idx;
 	int i, idx;
-	u8 chfreq, amsdu_info;
+	u8 chfreq, amsdu_info, qos_ctl = 0;
+	u16 seq_ctrl = 0;
+	__le16 fc = 0;
 
 	memset(status, 0, sizeof(*status));
 
@@ -260,6 +262,7 @@ static int mt7615_mac_fill_rx(struct mt7615_dev *dev, struct sk_buff *skb)
 
 	unicast = (rxd1 & MT_RXD1_NORMAL_ADDR_TYPE) == MT_RXD1_NORMAL_U2M;
 	idx = FIELD_GET(MT_RXD2_NORMAL_WLAN_IDX, rxd2);
+	hdr_trans = rxd1 & MT_RXD1_NORMAL_HDR_TRANS;
 	status->wcid = mt7615_rx_get_wcid(dev, idx, unicast);
 
 	if (status->wcid) {
@@ -295,6 +298,13 @@ static int mt7615_mac_fill_rx(struct mt7615_dev *dev, struct sk_buff *skb)
 
 	rxd += 4;
 	if (rxd0 & MT_RXD0_NORMAL_GROUP_4) {
+		u32 v0 = le32_to_cpu(rxd[0]);
+		u32 v2 = le32_to_cpu(rxd[2]);
+
+		fc = cpu_to_le16(FIELD_GET(MT_RXD4_FRAME_CONTROL, v0));
+		qos_ctl = FIELD_GET(MT_RXD6_QOS_CTL, v2);
+		seq_ctrl = FIELD_GET(MT_RXD6_SEQ_CTRL, v2);
+
 		rxd += 4;
 		if ((u8 *)rxd - skb->data >= skb->len)
 			return -EINVAL;
@@ -458,25 +468,37 @@ static int mt7615_mac_fill_rx(struct mt7615_dev *dev, struct sk_buff *skb)
 	if (status->amsdu) {
 		status->first_amsdu = amsdu_info == MT_RXD1_FIRST_AMSDU_FRAME;
 		status->last_amsdu = amsdu_info == MT_RXD1_LAST_AMSDU_FRAME;
-		memmove(skb->data + 2, skb->data,
-			ieee80211_get_hdrlen_from_skb(skb));
-		skb_pull(skb, 2);
+		if (!hdr_trans) {
+			memmove(skb->data + 2, skb->data,
+				ieee80211_get_hdrlen_from_skb(skb));
+			skb_pull(skb, 2);
+		}
 	}
 
-	if (insert_ccmp_hdr) {
+	if (insert_ccmp_hdr && !hdr_trans) {
 		u8 key_id = FIELD_GET(MT_RXD1_NORMAL_KEY_ID, rxd1);
 
 		mt76_insert_ccmp_hdr(skb, key_id);
 	}
 
-	hdr = (struct ieee80211_hdr *)skb->data;
-	if (!status->wcid || !ieee80211_is_data_qos(hdr->frame_control))
+	if (!hdr_trans) {
+		hdr = (struct ieee80211_hdr *)skb->data;
+		fc = hdr->frame_control;
+		if (ieee80211_is_data_qos(fc)) {
+			seq_ctrl = le16_to_cpu(hdr->seq_ctrl);
+			qos_ctl = *ieee80211_get_qos_ctl(hdr);
+		}
+	} else {
+		status->flag |= RX_FLAG_8023;
+	}
+
+	if (!status->wcid || !ieee80211_is_data_qos(fc))
 		return 0;
 
 	status->aggr = unicast &&
-		       !ieee80211_is_qos_nullfunc(hdr->frame_control);
-	status->qos_ctl = *ieee80211_get_qos_ctl(hdr);
-	status->seqno = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl));
+		       !ieee80211_is_qos_nullfunc(fc);
+	status->qos_ctl = qos_ctl;
+	status->seqno = IEEE80211_SEQ_TO_SN(seq_ctrl);
 
 	return 0;
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.h b/drivers/net/wireless/mediatek/mt76/mt7615/mac.h
index ed009d085a53..6bf9da040196 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.h
@@ -81,6 +81,11 @@ enum rx_pkt_type {
 #define MT_RXD3_NORMAL_TSF_COMPARE_LOSS	BIT(8)
 #define MT_RXD3_NORMAL_RXV_SEQ		GENMASK(7, 0)
 
+#define MT_RXD4_FRAME_CONTROL		GENMASK(15, 0)
+
+#define MT_RXD6_SEQ_CTRL		GENMASK(15, 0)
+#define MT_RXD6_QOS_CTL			GENMASK(31, 16)
+
 #define MT_RXV1_ACID_DET_H		BIT(31)
 #define MT_RXV1_ACID_DET_L		BIT(30)
 #define MT_RXV1_VHTA2_B8_B3		GENMASK(29, 24)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index 1aa6928f88fc..1cf40f85eb55 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -1087,6 +1087,22 @@ static int mt7615_cancel_remain_on_channel(struct ieee80211_hw *hw,
 	return 0;
 }
 
+static void mt7615_sta_set_decap_offload(struct ieee80211_hw *hw,
+				 struct ieee80211_vif *vif,
+				 struct ieee80211_sta *sta,
+				 bool enabled)
+{
+	struct mt7615_dev *dev = mt7615_hw_dev(hw);
+	struct mt7615_sta *msta = (struct mt7615_sta *)sta->drv_priv;
+
+	if (enabled)
+		set_bit(MT_WCID_FLAG_HDR_TRANS, &msta->wcid.flags);
+	else
+		clear_bit(MT_WCID_FLAG_HDR_TRANS, &msta->wcid.flags);
+
+	mt7615_mcu_sta_update_hdr_trans(dev, vif, sta);
+}
+
 #ifdef CONFIG_PM
 static int mt7615_suspend(struct ieee80211_hw *hw,
 			  struct cfg80211_wowlan *wowlan)
@@ -1187,6 +1203,7 @@ const struct ieee80211_ops mt7615_ops = {
 	.sta_remove = mt7615_sta_remove,
 	.sta_pre_rcu_remove = mt76_sta_pre_rcu_remove,
 	.set_key = mt7615_set_key,
+	.sta_set_decap_offload = mt7615_sta_set_decap_offload,
 	.ampdu_action = mt7615_ampdu_action,
 	.set_rts_threshold = mt7615_set_rts_threshold,
 	.wake_tx_queue = mt7615_wake_tx_queue,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 4ecbd5406e2a..393ce09d3956 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -1097,6 +1097,25 @@ static const struct mt7615_mcu_ops sta_update_ops = {
 	.set_fw_ctrl = mt7615_mcu_fw_pmctrl,
 };
 
+int mt7615_mcu_sta_update_hdr_trans(struct mt7615_dev *dev,
+				    struct ieee80211_vif *vif,
+				    struct ieee80211_sta *sta)
+{
+	struct mt7615_sta *msta = (struct mt7615_sta *)sta->drv_priv;
+	struct wtbl_req_hdr *wtbl_hdr;
+	struct sk_buff *skb = NULL;
+
+	wtbl_hdr = mt76_connac_mcu_alloc_wtbl_req(&dev->mt76, &msta->wcid,
+						  WTBL_SET, NULL, &skb);
+	if (IS_ERR(wtbl_hdr))
+		return PTR_ERR(wtbl_hdr);
+
+	mt76_connac_mcu_wtbl_hdr_trans_tlv(skb, vif, sta, NULL, wtbl_hdr);
+
+	return mt76_mcu_skb_send_msg(&dev->mt76, skb, MCU_EXT_CMD_WTBL_UPDATE,
+				     true);
+}
+
 static int
 mt7615_mcu_uni_ctrl_pm_state(struct mt7615_dev *dev, int band, int state)
 {
@@ -2500,6 +2519,26 @@ out:
 	return ret;
 }
 
+int mt7615_mcu_set_rx_hdr_trans_blacklist(struct mt7615_dev *dev)
+{
+	struct {
+		u8 operation;
+		u8 count;
+		u8 _rsv[2];
+		u8 index;
+		u8 enable;
+		__le16 etype;
+	} req = {
+		.operation = 1,
+		.count = 1,
+		.enable = 1,
+		.etype = cpu_to_le16(ETH_P_PAE),
+	};
+
+	return mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_RX_HDR_TRANS,
+				 &req, sizeof(req), false);
+}
+
 int mt7615_mcu_set_bss_pm(struct mt7615_dev *dev, struct ieee80211_vif *vif,
 			  bool enable)
 {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 5ef19fbee6bd..aa6fe65f42a7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -519,6 +519,10 @@ void mt7615_mac_sta_remove(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 void mt7615_mac_work(struct work_struct *work);
 void mt7615_txp_skb_unmap(struct mt76_dev *dev,
 			  struct mt76_txwi_cache *txwi);
+int mt7615_mcu_sta_update_hdr_trans(struct mt7615_dev *dev,
+				    struct ieee80211_vif *vif,
+				    struct ieee80211_sta *sta);
+int mt7615_mcu_set_rx_hdr_trans_blacklist(struct mt7615_dev *dev);
 int mt7615_mcu_set_fcc5_lpn(struct mt7615_dev *dev, int val);
 int mt7615_mcu_set_pulse_th(struct mt7615_dev *dev,
 			    const struct mt7615_dfs_pulse *pulse);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
index 4ebffe52cb8e..1fd7f183629e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
@@ -370,6 +370,7 @@ enum mt7615_reg_base {
 #define MT_DMA_DCR0_MAX_RX_LEN		GENMASK(15, 2)
 #define MT_DMA_DCR0_DAMSDU_EN		BIT(16)
 #define MT_DMA_DCR0_RX_VEC_DROP		BIT(17)
+#define MT_DMA_DCR0_RX_HDR_TRANS_EN	BIT(19)
 
 #define MT_DMA_RCFR0(_band)		MT_WF_DMA(0x070 + (_band) * 0x40)
 #define MT_DMA_RCFR0_MCU_RX_MGMT	BIT(2)
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index 125e02b34a5f..ab9e48c19508 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -392,6 +392,25 @@ mt76_connac_mcu_sta_uapsd(struct sk_buff *skb, struct ieee80211_vif *vif,
 	uapsd->max_sp = sta->max_sp;
 }
 
+void mt76_connac_mcu_wtbl_hdr_trans_tlv(struct sk_buff *skb,
+					struct ieee80211_vif *vif,
+					struct ieee80211_sta *sta,
+					void *sta_wtbl, void *wtbl_tlv)
+{
+	struct mt76_wcid *wcid;
+	struct wtbl_hdr_trans *htr;
+	struct tlv *tlv;
+
+	tlv = mt76_connac_mcu_add_nested_tlv(skb, WTBL_HDR_TRANS,
+					     sizeof(*htr),
+					     wtbl_tlv, sta_wtbl);
+	htr = (struct wtbl_hdr_trans *)tlv;
+
+	wcid = (struct mt76_wcid *)sta->drv_priv;
+	htr->no_rx_trans = !test_bit(MT_WCID_FLAG_HDR_TRANS, &wcid->flags);
+}
+EXPORT_SYMBOL_GPL(mt76_connac_mcu_wtbl_hdr_trans_tlv);
+
 void mt76_connac_mcu_wtbl_generic_tlv(struct mt76_dev *dev,
 				      struct sk_buff *skb,
 				      struct ieee80211_vif *vif,
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
index 8478f019f5d3..950e68e11ddd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
@@ -210,7 +210,7 @@ struct wtbl_hdr_trans {
 	__le16 len;
 	u8 to_ds;
 	u8 from_ds;
-	u8 disable_rx_trans;
+	u8 no_rx_trans;
 	u8 rsv;
 } __packed;
 
@@ -922,6 +922,10 @@ void mt76_connac_mcu_wtbl_generic_tlv(struct mt76_dev *dev, struct sk_buff *skb,
 				      struct ieee80211_vif *vif,
 				      struct ieee80211_sta *sta, void *sta_wtbl,
 				      void *wtbl_tlv);
+void mt76_connac_mcu_wtbl_hdr_trans_tlv(struct sk_buff *skb,
+					struct ieee80211_vif *vif,
+					struct ieee80211_sta *sta,
+					void *sta_wtbl, void *wtbl_tlv);
 void mt76_connac_mcu_sta_tlv(struct mt76_phy *mphy, struct sk_buff *skb,
 			     struct ieee80211_sta *sta,
 			     struct ieee80211_vif *vif);
-- 
cgit v1.2.3


From 49cc85059a2cb656f96ff3693f891e8fe8f669a9 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 19 Feb 2021 18:22:11 +0100
Subject: mt76: mt7615: fix memory leak in mt7615_coredump_work

Similar to the issue fixed in mt7921_coredump_work, fix a possible memory
leak in mt7615_coredump_work routine.

Fixes: d2bf7959d9c0f ("mt76: mt7663: introduce coredump support")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index f2dd56674acb..d2ef2b33e2b5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -2352,8 +2352,10 @@ void mt7615_coredump_work(struct work_struct *work)
 			break;
 
 		skb_pull(skb, sizeof(struct mt7615_mcu_rxd));
-		if (data + skb->len - dump > MT76_CONNAC_COREDUMP_SZ)
-			break;
+		if (data + skb->len - dump > MT76_CONNAC_COREDUMP_SZ) {
+			dev_kfree_skb(skb);
+			continue;
+		}
 
 		memcpy(data, skb->data, skb->len);
 		data += skb->len;
-- 
cgit v1.2.3


From 461e3b7f45766f38eeb24ca7354ff01d993b5b47 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 22 Feb 2021 02:12:09 +0100
Subject: mt76: mt7921: fix aggr length histogram

Fix register definitions for 802.11 aggr length histogram estimation.

Fixes: 474a9f21e2e2 ("mt76: mt7921: add debugfs support")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c | 5 ++---
 drivers/net/wireless/mediatek/mt76/mt7921/regs.h    | 6 +++---
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
index 4bc3dd99bd14..b2e8d698e019 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
@@ -44,14 +44,13 @@ mt7921_ampdu_stat_read_phy(struct mt7921_phy *phy,
 		range[i] = mt76_rr(dev, MT_MIB_ARNG(0, i));
 
 	for (i = 0; i < ARRAY_SIZE(bound); i++)
-		bound[i] = MT_MIB_ARNCR_RANGE(range[i / 4], i) + 1;
+		bound[i] = MT_MIB_ARNCR_RANGE(range[i / 4], i % 4) + 1;
 
 	seq_printf(file, "\nPhy0\n");
 
 	seq_printf(file, "Length: %8d | ", bound[0]);
 	for (i = 0; i < ARRAY_SIZE(bound) - 1; i++)
-		seq_printf(file, "%3d -%3d | ",
-			   bound[i] + 1, bound[i + 1]);
+		seq_printf(file, "%3d  %3d | ", bound[i] + 1, bound[i + 1]);
 
 	seq_puts(file, "\nCount:  ");
 	for (i = 0; i < ARRAY_SIZE(bound); i++)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/regs.h b/drivers/net/wireless/mediatek/mt76/mt7921/regs.h
index 18980bb32dee..d7e316eb006c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/regs.h
@@ -128,9 +128,9 @@
 #define MT_MIB_MB_SDR2(_band, n)	MT_WF_MIB(_band, 0x108 + ((n) << 4))
 #define MT_MIB_FRAME_RETRIES_COUNT_MASK	GENMASK(15, 0)
 
-#define MT_TX_AGG_CNT(_band, n)		MT_WF_MIB(_band, 0x0a8 + ((n) << 2))
-#define MT_TX_AGG_CNT2(_band, n)	MT_WF_MIB(_band, 0x164 + ((n) << 2))
-#define MT_MIB_ARNG(_band, n)		MT_WF_MIB(_band, 0x4b8 + ((n) << 2))
+#define MT_TX_AGG_CNT(_band, n)		MT_WF_MIB(_band, 0x7dc + ((n) << 2))
+#define MT_TX_AGG_CNT2(_band, n)	MT_WF_MIB(_band, 0x7ec + ((n) << 2))
+#define MT_MIB_ARNG(_band, n)		MT_WF_MIB(_band, 0x0b0 + ((n) << 2))
 #define MT_MIB_ARNCR_RANGE(val, n)	(((val) >> ((n) << 3)) & GENMASK(7, 0))
 
 #define MT_WTBLON_TOP_BASE		0x34000
-- 
cgit v1.2.3


From 9fb9d755fae20b5ad62ef8b4e9289e5baea2c6fc Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 22 Feb 2021 02:14:36 +0100
Subject: mt76: mt7915: fix aggr len debugfs node

Similar to mt7921, fix 802.11 aggr len debugfs reporting for mt7915 driver.

Fixes: e57b7901469fc ("mt76: add mac80211 driver for MT7915 PCIe-based chipsets")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
index 77dcd71e49a5..2f706620686e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
@@ -124,7 +124,7 @@ mt7915_ampdu_stat_read_phy(struct mt7915_phy *phy,
 		range[i] = mt76_rr(dev, MT_MIB_ARNG(ext_phy, i));
 
 	for (i = 0; i < ARRAY_SIZE(bound); i++)
-		bound[i] = MT_MIB_ARNCR_RANGE(range[i / 4], i) + 1;
+		bound[i] = MT_MIB_ARNCR_RANGE(range[i / 4], i % 4) + 1;
 
 	seq_printf(file, "\nPhy %d\n", ext_phy);
 
-- 
cgit v1.2.3


From 8e84836283f18c8a408c422794a9d4ec1412f3c6 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Mon, 22 Feb 2021 17:03:31 +0800
Subject: mt76: mt7921: remove unneeded semicolon

Fix the following coccicheck warnings:

./drivers/net/wireless/mediatek/mt76/mt7921/mac.c:1402:2-3: Unneeded
semicolon.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 76d5f194706f..7faecacd5e07 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1400,7 +1400,7 @@ void mt7921_mac_work(struct work_struct *work)
 	if (++phy->sta_work_count == 10) {
 		phy->sta_work_count = 0;
 		mt7921_mac_sta_stats_work(phy);
-	};
+	}
 
 	mt7921_mutex_release(phy->dev);
 
-- 
cgit v1.2.3


From f76e9019913bffee0e49b096068e6f6b12f9b0e0 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 22 Feb 2021 19:17:49 +0100
Subject: mt76: mt7921: fix stats register definitions

Fix register definitions for mac80211 stats reporting.
Move mib counter reset to mt7921_get_stats routine.

Fixes: 163f4d22c118d ("mt76: mt7921: add MAC support")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c    | 31 +++++++---------------
 drivers/net/wireless/mediatek/mt76/mt7921/main.c   |  6 +++++
 drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h | 10 +++----
 drivers/net/wireless/mediatek/mt76/mt7921/regs.h   | 15 +++++++----
 4 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 7faecacd5e07..3e96bcfda45b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1318,31 +1318,20 @@ mt7921_mac_update_mib_stats(struct mt7921_phy *phy)
 	struct mib_stats *mib = &phy->mib;
 	int i, aggr0 = 0, aggr1;
 
-	memset(mib, 0, sizeof(*mib));
-
-	mib->fcs_err_cnt = mt76_get_field(dev, MT_MIB_SDR3(0),
-					  MT_MIB_SDR3_FCS_ERR_MASK);
+	mib->fcs_err_cnt += mt76_get_field(dev, MT_MIB_SDR3(0),
+					   MT_MIB_SDR3_FCS_ERR_MASK);
+	mib->ack_fail_cnt += mt76_get_field(dev, MT_MIB_MB_BSDR3(0),
+					    MT_MIB_ACK_FAIL_COUNT_MASK);
+	mib->ba_miss_cnt += mt76_get_field(dev, MT_MIB_MB_BSDR2(0),
+					   MT_MIB_BA_FAIL_COUNT_MASK);
+	mib->rts_cnt += mt76_get_field(dev, MT_MIB_MB_BSDR0(0),
+				       MT_MIB_RTS_COUNT_MASK);
+	mib->rts_retries_cnt += mt76_get_field(dev, MT_MIB_MB_BSDR1(0),
+					       MT_MIB_RTS_FAIL_COUNT_MASK);
 
 	for (i = 0, aggr1 = aggr0 + 4; i < 4; i++) {
 		u32 val, val2;
 
-		val = mt76_rr(dev, MT_MIB_MB_SDR1(0, i));
-
-		val2 = FIELD_GET(MT_MIB_ACK_FAIL_COUNT_MASK, val);
-		if (val2 > mib->ack_fail_cnt)
-			mib->ack_fail_cnt = val2;
-
-		val2 = FIELD_GET(MT_MIB_BA_MISS_COUNT_MASK, val);
-		if (val2 > mib->ba_miss_cnt)
-			mib->ba_miss_cnt = val2;
-
-		val = mt76_rr(dev, MT_MIB_MB_SDR0(0, i));
-		val2 = FIELD_GET(MT_MIB_RTS_RETRIES_COUNT_MASK, val);
-		if (val2 > mib->rts_retries_cnt) {
-			mib->rts_cnt = FIELD_GET(MT_MIB_RTS_COUNT_MASK, val);
-			mib->rts_retries_cnt = val2;
-		}
-
 		val = mt76_rr(dev, MT_TX_AGG_CNT(0, i));
 		val2 = mt76_rr(dev, MT_TX_AGG_CNT2(0, i));
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index f7552f5201b4..cdb474ff370a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -831,11 +831,17 @@ mt7921_get_stats(struct ieee80211_hw *hw,
 	struct mt7921_phy *phy = mt7921_hw_phy(hw);
 	struct mib_stats *mib = &phy->mib;
 
+	mt7921_mutex_acquire(phy->dev);
+
 	stats->dot11RTSSuccessCount = mib->rts_cnt;
 	stats->dot11RTSFailureCount = mib->rts_retries_cnt;
 	stats->dot11FCSErrorCount = mib->fcs_err_cnt;
 	stats->dot11ACKFailureCount = mib->ack_fail_cnt;
 
+	memset(mib, 0, sizeof(*mib));
+
+	mt7921_mutex_release(phy->dev);
+
 	return 0;
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index 46e6aeec35ae..2979d06ee0ad 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -102,11 +102,11 @@ struct mt7921_vif {
 };
 
 struct mib_stats {
-	u16 ack_fail_cnt;
-	u16 fcs_err_cnt;
-	u16 rts_cnt;
-	u16 rts_retries_cnt;
-	u16 ba_miss_cnt;
+	u32 ack_fail_cnt;
+	u32 fcs_err_cnt;
+	u32 rts_cnt;
+	u32 rts_retries_cnt;
+	u32 ba_miss_cnt;
 };
 
 struct mt7921_phy {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/regs.h b/drivers/net/wireless/mediatek/mt76/mt7921/regs.h
index d7e316eb006c..5cab39c9e7a2 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/regs.h
@@ -96,8 +96,8 @@
 #define MT_WF_MIB_BASE(_band)		((_band) ? 0xa4800 : 0x24800)
 #define MT_WF_MIB(_band, ofs)		(MT_WF_MIB_BASE(_band) + (ofs))
 
-#define MT_MIB_SDR3(_band)		MT_WF_MIB(_band, 0x014)
-#define MT_MIB_SDR3_FCS_ERR_MASK	GENMASK(15, 0)
+#define MT_MIB_SDR3(_band)		MT_WF_MIB(_band, 0x698)
+#define MT_MIB_SDR3_FCS_ERR_MASK	GENMASK(31, 16)
 
 #define MT_MIB_SDR9(_band)		MT_WF_MIB(_band, 0x02c)
 #define MT_MIB_SDR9_BUSY_MASK		GENMASK(23, 0)
@@ -121,9 +121,14 @@
 #define MT_MIB_RTS_RETRIES_COUNT_MASK	GENMASK(31, 16)
 #define MT_MIB_RTS_COUNT_MASK		GENMASK(15, 0)
 
-#define MT_MIB_MB_SDR1(_band, n)	MT_WF_MIB(_band, 0x104 + ((n) << 4))
-#define MT_MIB_BA_MISS_COUNT_MASK	GENMASK(15, 0)
-#define MT_MIB_ACK_FAIL_COUNT_MASK	GENMASK(31, 16)
+#define MT_MIB_MB_BSDR0(_band)		MT_WF_MIB(_band, 0x688)
+#define MT_MIB_RTS_COUNT_MASK		GENMASK(15, 0)
+#define MT_MIB_MB_BSDR1(_band)		MT_WF_MIB(_band, 0x690)
+#define MT_MIB_RTS_FAIL_COUNT_MASK	GENMASK(15, 0)
+#define MT_MIB_MB_BSDR2(_band)		MT_WF_MIB(_band, 0x518)
+#define MT_MIB_BA_FAIL_COUNT_MASK	GENMASK(15, 0)
+#define MT_MIB_MB_BSDR3(_band)		MT_WF_MIB(_band, 0x520)
+#define MT_MIB_ACK_FAIL_COUNT_MASK	GENMASK(15, 0)
 
 #define MT_MIB_MB_SDR2(_band, n)	MT_WF_MIB(_band, 0x108 + ((n) << 4))
 #define MT_MIB_FRAME_RETRIES_COUNT_MASK	GENMASK(15, 0)
-- 
cgit v1.2.3


From a4a5a430b076860691e95337787bc666c8ab28ff Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Tue, 23 Feb 2021 16:43:11 +0800
Subject: mt76: mt7615: fix TSF configuration

The index of TSF counters should follow HWBSSID.

Fixes: d8d59f66d136 ("mt76: mt7615: support 16 interfaces")
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c      |  6 +++++-
 drivers/net/wireless/mediatek/mt76/mt7615/main.c     | 16 ++++++++++++++--
 drivers/net/wireless/mediatek/mt76/mt7615/regs.h     |  7 ++++---
 drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c |  6 +++++-
 4 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index d2ef2b33e2b5..0af4a5d1a64f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1005,6 +1005,7 @@ void mt7615_mac_set_rates(struct mt7615_phy *phy, struct mt7615_sta *sta,
 	struct mt7615_dev *dev = phy->dev;
 	struct mt7615_rate_desc rd;
 	u32 w5, w27, addr;
+	u16 idx = sta->vif->mt76.omac_idx;
 
 	if (!mt76_is_mmio(&dev->mt76)) {
 		mt7615_mac_queue_rate_update(phy, sta, probe_rate, rates);
@@ -1056,7 +1057,10 @@ void mt7615_mac_set_rates(struct mt7615_phy *phy, struct mt7615_sta *sta,
 
 	mt76_wr(dev, addr + 27 * 4, w27);
 
-	mt76_set(dev, MT_LPON_T0CR, MT_LPON_T0CR_MODE); /* TSF read */
+	idx = idx > HW_BSSID_MAX ? HW_BSSID_0 : idx;
+	addr = idx > 1 ? MT_LPON_TCR2(idx): MT_LPON_TCR0(idx);
+
+	mt76_set(dev, addr, MT_LPON_TCR_MODE); /* TSF read */
 	sta->rate_set_tsf = mt76_rr(dev, MT_LPON_UTTR0) & ~BIT(0);
 	sta->rate_set_tsf |= rd.rateset;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index 1cf40f85eb55..68accb37ea28 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -818,15 +818,21 @@ mt7615_get_stats(struct ieee80211_hw *hw,
 static u64
 mt7615_get_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
 {
+	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
 	struct mt7615_dev *dev = mt7615_hw_dev(hw);
 	union {
 		u64 t64;
 		u32 t32[2];
 	} tsf;
+	u16 idx = mvif->mt76.omac_idx;
+	u32 reg;
+
+	idx = idx > HW_BSSID_MAX ? HW_BSSID_0 : idx;
+	reg = idx > 1 ? MT_LPON_TCR2(idx): MT_LPON_TCR0(idx);
 
 	mt7615_mutex_acquire(dev);
 
-	mt76_set(dev, MT_LPON_T0CR, MT_LPON_T0CR_MODE); /* TSF read */
+	mt76_set(dev, reg, MT_LPON_TCR_MODE); /* TSF read */
 	tsf.t32[0] = mt76_rr(dev, MT_LPON_UTTR0);
 	tsf.t32[1] = mt76_rr(dev, MT_LPON_UTTR1);
 
@@ -839,18 +845,24 @@ static void
 mt7615_set_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 	       u64 timestamp)
 {
+	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
 	struct mt7615_dev *dev = mt7615_hw_dev(hw);
 	union {
 		u64 t64;
 		u32 t32[2];
 	} tsf = { .t64 = timestamp, };
+	u16 idx = mvif->mt76.omac_idx;
+	u32 reg;
+
+	idx = idx > HW_BSSID_MAX ? HW_BSSID_0 : idx;
+	reg = idx > 1 ? MT_LPON_TCR2(idx): MT_LPON_TCR0(idx);
 
 	mt7615_mutex_acquire(dev);
 
 	mt76_wr(dev, MT_LPON_UTTR0, tsf.t32[0]);
 	mt76_wr(dev, MT_LPON_UTTR1, tsf.t32[1]);
 	/* TSF software overwrite */
-	mt76_set(dev, MT_LPON_T0CR, MT_LPON_T0CR_WRITE);
+	mt76_set(dev, reg, MT_LPON_TCR_WRITE);
 
 	mt7615_mutex_release(dev);
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
index 1fd7f183629e..190a02670795 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
@@ -449,9 +449,10 @@ enum mt7615_reg_base {
 
 #define MT_LPON(_n)			((dev)->reg_map[MT_LPON_BASE] + (_n))
 
-#define MT_LPON_T0CR			MT_LPON(0x010)
-#define MT_LPON_T0CR_MODE		GENMASK(1, 0)
-#define MT_LPON_T0CR_WRITE		BIT(0)
+#define MT_LPON_TCR0(_n)		MT_LPON(0x010 + ((_n) * 4))
+#define MT_LPON_TCR2(_n)		MT_LPON(0x0f8 + ((_n) - 2) * 4)
+#define MT_LPON_TCR_MODE		GENMASK(1, 0)
+#define MT_LPON_TCR_WRITE		BIT(0)
 
 #define MT_LPON_UTTR0			MT_LPON(0x018)
 #define MT_LPON_UTTR1			MT_LPON(0x01c)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c
index 203256862dfd..4a370b9f7a17 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c
@@ -67,6 +67,7 @@ static int mt7663_usb_sdio_set_rates(struct mt7615_dev *dev,
 	struct mt7615_rate_desc *rate = &wrd->rate;
 	struct mt7615_sta *sta = wrd->sta;
 	u32 w5, w27, addr, val;
+	u16 idx = sta->vif->mt76.omac_idx;
 
 	lockdep_assert_held(&dev->mt76.mutex);
 
@@ -118,7 +119,10 @@ static int mt7663_usb_sdio_set_rates(struct mt7615_dev *dev,
 
 	sta->rate_probe = sta->rateset[rate->rateset].probe_rate.idx != -1;
 
-	mt76_set(dev, MT_LPON_T0CR, MT_LPON_T0CR_MODE); /* TSF read */
+	idx = idx > HW_BSSID_MAX ? HW_BSSID_0 : idx;
+	addr = idx > 1 ? MT_LPON_TCR2(idx): MT_LPON_TCR0(idx);
+
+	mt76_set(dev, addr, MT_LPON_TCR_MODE); /* TSF read */
 	val = mt76_rr(dev, MT_LPON_UTTR0);
 	sta->rate_set_tsf = (val & ~BIT(0)) | rate->rateset;
 
-- 
cgit v1.2.3


From d43c7301d3308828220ad733043e53254fb98cd7 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Tue, 23 Feb 2021 16:43:12 +0800
Subject: mt76: mt7615: remove hdr->fw_ver check

Only mt7615 uses v1 version. Also, some of released firmware doesn't
have hdr->fw_ver.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 393ce09d3956..65c6ab14fe6f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -1449,8 +1449,7 @@ static int mt7615_load_n9(struct mt7615_dev *dev, const char *name)
 		 sizeof(dev->mt76.hw->wiphy->fw_version),
 		 "%.10s-%.15s", hdr->fw_ver, hdr->build_date);
 
-	if (!is_mt7615(&dev->mt76) &&
-	    !strncmp(hdr->fw_ver, "2.0", sizeof(hdr->fw_ver))) {
+	if (!is_mt7615(&dev->mt76)) {
 		dev->fw_ver = MT7615_FIRMWARE_V2;
 		dev->mcu_ops = &sta_update_ops;
 	} else {
-- 
cgit v1.2.3


From 2eb6f6c437745bce46bd7a8f3a22a732d5b9becb Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 23 Feb 2021 11:42:49 +0100
Subject: mt76: mt7615: fix mib stats counter reporting to mac80211

In order to properly report MIB counters to mac80211, resets stats in
mt7615_get_stats routine and hold mt76 mutex accessing MIB counters.
Sum up MIB counters in mt7615_mac_update_mib_stats routine.

Fixes: c388d8584bc83 ("mt76: mt7615: add a get_stats() callback")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c    | 26 +++++++---------------
 drivers/net/wireless/mediatek/mt76/mt7615/main.c   |  6 +++++
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h | 10 ++++-----
 3 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 0af4a5d1a64f..d49a3f1c6e16 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1873,10 +1873,8 @@ mt7615_mac_update_mib_stats(struct mt7615_phy *phy)
 	int i, aggr;
 	u32 val, val2;
 
-	memset(mib, 0, sizeof(*mib));
-
-	mib->fcs_err_cnt = mt76_get_field(dev, MT_MIB_SDR3(ext_phy),
-					  MT_MIB_SDR3_FCS_ERR_MASK);
+	mib->fcs_err_cnt += mt76_get_field(dev, MT_MIB_SDR3(ext_phy),
+					   MT_MIB_SDR3_FCS_ERR_MASK);
 
 	val = mt76_get_field(dev, MT_MIB_SDR14(ext_phy),
 			     MT_MIB_AMPDU_MPDU_COUNT);
@@ -1889,24 +1887,16 @@ mt7615_mac_update_mib_stats(struct mt7615_phy *phy)
 	aggr = ext_phy ? ARRAY_SIZE(dev->mt76.aggr_stats) / 2 : 0;
 	for (i = 0; i < 4; i++) {
 		val = mt76_rr(dev, MT_MIB_MB_SDR1(ext_phy, i));
-
-		val2 = FIELD_GET(MT_MIB_ACK_FAIL_COUNT_MASK, val);
-		if (val2 > mib->ack_fail_cnt)
-			mib->ack_fail_cnt = val2;
-
-		val2 = FIELD_GET(MT_MIB_BA_MISS_COUNT_MASK, val);
-		if (val2 > mib->ba_miss_cnt)
-			mib->ba_miss_cnt = val2;
+		mib->ba_miss_cnt += FIELD_GET(MT_MIB_BA_MISS_COUNT_MASK, val);
+		mib->ack_fail_cnt += FIELD_GET(MT_MIB_ACK_FAIL_COUNT_MASK,
+					       val);
 
 		val = mt76_rr(dev, MT_MIB_MB_SDR0(ext_phy, i));
-		val2 = FIELD_GET(MT_MIB_RTS_RETRIES_COUNT_MASK, val);
-		if (val2 > mib->rts_retries_cnt) {
-			mib->rts_cnt = FIELD_GET(MT_MIB_RTS_COUNT_MASK, val);
-			mib->rts_retries_cnt = val2;
-		}
+		mib->rts_cnt += FIELD_GET(MT_MIB_RTS_COUNT_MASK, val);
+		mib->rts_retries_cnt += FIELD_GET(MT_MIB_RTS_RETRIES_COUNT_MASK,
+						  val);
 
 		val = mt76_rr(dev, MT_TX_AGG_CNT(ext_phy, i));
-
 		dev->mt76.aggr_stats[aggr++] += val & 0xffff;
 		dev->mt76.aggr_stats[aggr++] += val >> 16;
 	}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index 68accb37ea28..2380854ce386 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -807,11 +807,17 @@ mt7615_get_stats(struct ieee80211_hw *hw,
 	struct mt7615_phy *phy = mt7615_hw_phy(hw);
 	struct mib_stats *mib = &phy->mib;
 
+	mt7615_mutex_acquire(phy->dev);
+
 	stats->dot11RTSSuccessCount = mib->rts_cnt;
 	stats->dot11RTSFailureCount = mib->rts_retries_cnt;
 	stats->dot11FCSErrorCount = mib->fcs_err_cnt;
 	stats->dot11ACKFailureCount = mib->ack_fail_cnt;
 
+	memset(mib, 0, sizeof(*mib));
+
+	mt7615_mutex_release(phy->dev);
+
 	return 0;
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index aa6fe65f42a7..84d06f2b41d4 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -133,11 +133,11 @@ struct mt7615_vif {
 };
 
 struct mib_stats {
-	u16 ack_fail_cnt;
-	u16 fcs_err_cnt;
-	u16 rts_cnt;
-	u16 rts_retries_cnt;
-	u16 ba_miss_cnt;
+	u32 ack_fail_cnt;
+	u32 fcs_err_cnt;
+	u32 rts_cnt;
+	u32 rts_retries_cnt;
+	u32 ba_miss_cnt;
 	unsigned long aggr_per;
 };
 
-- 
cgit v1.2.3


From 2b35050a321865859fd2f12a3c18ed7be27858c9 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Tue, 23 Feb 2021 22:00:36 +0800
Subject: mt76: mt7915: fix mib stats counter reporting to mac80211

In order to properly report MIB counters to mac80211, resets stats in
mt7915_get_stats routine() and hold mt76 mutex accessing MIB counters.
Sum up MIB counters in mt7915_mac_update_mib_stats routine.

Fixes: e57b7901469f ("mt76: add mac80211 driver for MT7915 PCIe-based chipsets")
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/mac.c    | 35 ++++++++--------------
 drivers/net/wireless/mediatek/mt76/mt7915/main.c   |  6 ++++
 drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h | 10 +++----
 3 files changed, 24 insertions(+), 27 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index b5a41cb7fbda..2a3c7b39d2ca 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -1678,39 +1678,30 @@ mt7915_mac_update_mib_stats(struct mt7915_phy *phy)
 	bool ext_phy = phy != &dev->phy;
 	int i, aggr0, aggr1;
 
-	memset(mib, 0, sizeof(*mib));
-
-	mib->fcs_err_cnt = mt76_get_field(dev, MT_MIB_SDR3(ext_phy),
-					  MT_MIB_SDR3_FCS_ERR_MASK);
+	mib->fcs_err_cnt += mt76_get_field(dev, MT_MIB_SDR3(ext_phy),
+					   MT_MIB_SDR3_FCS_ERR_MASK);
 
 	aggr0 = ext_phy ? ARRAY_SIZE(dev->mt76.aggr_stats) / 2 : 0;
 	for (i = 0, aggr1 = aggr0 + 4; i < 4; i++) {
-		u32 val, val2;
+		u32 val;
 
 		val = mt76_rr(dev, MT_MIB_MB_SDR1(ext_phy, i));
-
-		val2 = FIELD_GET(MT_MIB_ACK_FAIL_COUNT_MASK, val);
-		if (val2 > mib->ack_fail_cnt)
-			mib->ack_fail_cnt = val2;
-
-		val2 = FIELD_GET(MT_MIB_BA_MISS_COUNT_MASK, val);
-		if (val2 > mib->ba_miss_cnt)
-			mib->ba_miss_cnt = val2;
+		mib->ba_miss_cnt += FIELD_GET(MT_MIB_BA_MISS_COUNT_MASK, val);
+		mib->ack_fail_cnt +=
+			FIELD_GET(MT_MIB_ACK_FAIL_COUNT_MASK, val);
 
 		val = mt76_rr(dev, MT_MIB_MB_SDR0(ext_phy, i));
-		val2 = FIELD_GET(MT_MIB_RTS_RETRIES_COUNT_MASK, val);
-		if (val2 > mib->rts_retries_cnt) {
-			mib->rts_cnt = FIELD_GET(MT_MIB_RTS_COUNT_MASK, val);
-			mib->rts_retries_cnt = val2;
-		}
+		mib->rts_cnt += FIELD_GET(MT_MIB_RTS_COUNT_MASK, val);
+		mib->rts_retries_cnt +=
+			FIELD_GET(MT_MIB_RTS_RETRIES_COUNT_MASK, val);
 
 		val = mt76_rr(dev, MT_TX_AGG_CNT(ext_phy, i));
-		val2 = mt76_rr(dev, MT_TX_AGG_CNT2(ext_phy, i));
-
 		dev->mt76.aggr_stats[aggr0++] += val & 0xffff;
 		dev->mt76.aggr_stats[aggr0++] += val >> 16;
-		dev->mt76.aggr_stats[aggr1++] += val2 & 0xffff;
-		dev->mt76.aggr_stats[aggr1++] += val2 >> 16;
+
+		val = mt76_rr(dev, MT_TX_AGG_CNT2(ext_phy, i));
+		dev->mt76.aggr_stats[aggr1++] += val & 0xffff;
+		dev->mt76.aggr_stats[aggr1++] += val >> 16;
 	}
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
index 2406733f542d..cacc9b6933ca 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
@@ -717,13 +717,19 @@ mt7915_get_stats(struct ieee80211_hw *hw,
 		 struct ieee80211_low_level_stats *stats)
 {
 	struct mt7915_phy *phy = mt7915_hw_phy(hw);
+	struct mt7915_dev *dev = mt7915_hw_dev(hw);
 	struct mib_stats *mib = &phy->mib;
 
+	mutex_lock(&dev->mt76.mutex);
 	stats->dot11RTSSuccessCount = mib->rts_cnt;
 	stats->dot11RTSFailureCount = mib->rts_retries_cnt;
 	stats->dot11FCSErrorCount = mib->fcs_err_cnt;
 	stats->dot11ACKFailureCount = mib->ack_fail_cnt;
 
+	memset(mib, 0, sizeof(*mib));
+
+	mutex_unlock(&dev->mt76.mutex);
+
 	return 0;
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index 5c7eefdf2013..1160d1bf8a7c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -108,11 +108,11 @@ struct mt7915_vif {
 };
 
 struct mib_stats {
-	u16 ack_fail_cnt;
-	u16 fcs_err_cnt;
-	u16 rts_cnt;
-	u16 rts_retries_cnt;
-	u16 ba_miss_cnt;
+	u32 ack_fail_cnt;
+	u32 fcs_err_cnt;
+	u32 rts_cnt;
+	u32 rts_retries_cnt;
+	u32 ba_miss_cnt;
 };
 
 struct mt7915_hif {
-- 
cgit v1.2.3


From c996f0346e40e3b1ac2ebaf0681df898fb157f60 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 23 Feb 2021 18:28:43 +0100
Subject: mt76: connac: fix kernel warning adding monitor interface

Fix the following kernel warning adding a monitor interface in
mt76_connac_mcu_uni_add_dev routine.

[  507.984882] ------------[ cut here ]------------
[  507.989515] WARNING: CPU: 1 PID: 3017 at mt76_connac_mcu_uni_add_dev+0x178/0x190 [mt76_connac_lib]
[  508.059379] CPU: 1 PID: 3017 Comm: ifconfig Not tainted 5.4.98 #0
[  508.065461] Hardware name: MT7622_MT7531 RFB (DT)
[  508.070156] pstate: 80000005 (Nzcv daif -PAN -UAO)
[  508.074939] pc : mt76_connac_mcu_uni_add_dev+0x178/0x190 [mt76_connac_lib]
[  508.081806] lr : mt7921_eeprom_init+0x1288/0x1cb8 [mt7921e]
[  508.087367] sp : ffffffc013a33930
[  508.090671] x29: ffffffc013a33930 x28: ffffff801e628ac0
[  508.095973] x27: ffffff801c7f1200 x26: ffffff801c7eb008
[  508.101275] x25: ffffff801c7eaef0 x24: ffffff801d025610
[  508.106577] x23: ffffff801d022990 x22: ffffff801d024de8
[  508.111879] x21: ffffff801d0226a0 x20: ffffff801c7eaee8
[  508.117181] x19: ffffff801d0226a0 x18: 000000005d00b000
[  508.122482] x17: 00000000ffffffff x16: 0000000000000000
[  508.127785] x15: 0000000000000080 x14: ffffff801d704000
[  508.133087] x13: 0000000000000040 x12: 0000000000000002
[  508.138389] x11: 000000000000000c x10: 0000000000000000
[  508.143691] x9 : 0000000000000020 x8 : 0000000000000001
[  508.148992] x7 : 0000000000000000 x6 : 0000000000000000
[  508.154294] x5 : ffffff801c7eaee8 x4 : 0000000000000006
[  508.159596] x3 : 0000000000000001 x2 : 0000000000000000
[  508.164898] x1 : ffffff801c7eac08 x0 : ffffff801d0226a0
[  508.170200] Call trace:
[  508.172640]  mt76_connac_mcu_uni_add_dev+0x178/0x190 [mt76_connac_lib]
[  508.179159]  mt7921_eeprom_init+0x1288/0x1cb8 [mt7921e]
[  508.184394]  drv_add_interface+0x34/0x88 [mac80211]
[  508.189271]  ieee80211_add_virtual_monitor+0xe0/0xb48 [mac80211]
[  508.195277]  ieee80211_do_open+0x86c/0x918 [mac80211]
[  508.200328]  ieee80211_do_open+0x900/0x918 [mac80211]
[  508.205372]  __dev_open+0xcc/0x150
[  508.208763]  __dev_change_flags+0x134/0x198
[  508.212937]  dev_change_flags+0x20/0x60
[  508.216764]  devinet_ioctl+0x3e8/0x748
[  508.220503]  inet_ioctl+0x1e4/0x350
[  508.223983]  sock_do_ioctl+0x48/0x2a0
[  508.227635]  sock_ioctl+0x310/0x4f8
[  508.231116]  do_vfs_ioctl+0xa4/0xac0
[  508.234681]  ksys_ioctl+0x44/0x90
[  508.237985]  __arm64_sys_ioctl+0x1c/0x48
[  508.241901]  el0_svc_common.constprop.1+0x7c/0x100
[  508.246681]  el0_svc_handler+0x18/0x20
[  508.250421]  el0_svc+0x8/0x1c8
[  508.253465] ---[ end trace c7b90fee13d72c39 ]---
[  508.261278] ------------[ cut here ]------------

Fixes: d0e274af2f2e4 ("mt76: mt76_connac: create mcu library")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index ab9e48c19508..2cd1677d881e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -968,6 +968,7 @@ int mt76_connac_mcu_uni_add_dev(struct mt76_phy *phy,
 
 	switch (vif->type) {
 	case NL80211_IFTYPE_MESH_POINT:
+	case NL80211_IFTYPE_MONITOR:
 	case NL80211_IFTYPE_AP:
 		basic_req.basic.conn_type = cpu_to_le32(CONNECTION_INFRA_AP);
 		break;
-- 
cgit v1.2.3


From 57b8b57516c5108b0078051a31c68dc9dfcbf68f Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 24 Feb 2021 12:29:34 +0100
Subject: mt76: check return value of mt76_txq_send_burst in
 mt76_txq_schedule_list

Since mt76_txq_send_burst routine can report a negative error code,
check the returned value before incrementing the number of transmitted
frames in mt76_txq_schedule_list routine.
Return -EBUSY directly if the device is in reset or in power management.

Fixes: 90fdc1717b186 ("mt76: use mac80211 txq scheduling")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/tx.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c
index b8fe8adc43a3..451ed60c6296 100644
--- a/drivers/net/wireless/mediatek/mt76/tx.c
+++ b/drivers/net/wireless/mediatek/mt76/tx.c
@@ -461,11 +461,11 @@ mt76_txq_schedule_list(struct mt76_phy *phy, enum mt76_txq_id qid)
 	int ret = 0;
 
 	while (1) {
+		int n_frames = 0;
+
 		if (test_bit(MT76_STATE_PM, &phy->state) ||
-		    test_bit(MT76_RESET, &phy->state)) {
-			ret = -EBUSY;
-			break;
-		}
+		    test_bit(MT76_RESET, &phy->state))
+			return -EBUSY;
 
 		if (dev->queue_ops->tx_cleanup &&
 		    q->queued + 2 * MT_TXQ_FREE_THR >= q->ndesc) {
@@ -497,11 +497,16 @@ mt76_txq_schedule_list(struct mt76_phy *phy, enum mt76_txq_id qid)
 		}
 
 		if (!mt76_txq_stopped(q))
-			ret += mt76_txq_send_burst(phy, q, mtxq);
+			n_frames = mt76_txq_send_burst(phy, q, mtxq);
 
 		spin_unlock_bh(&q->lock);
 
 		ieee80211_return_txq(phy->hw, txq, false);
+
+		if (unlikely(n_frames < 0))
+			return n_frames;
+
+		ret += n_frames;
 	}
 
 	return ret;
-- 
cgit v1.2.3


From 3ab0269d75cd4800750f7e3e1b037db144f9d034 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 24 Feb 2021 16:57:18 +0100
Subject: mt76: mt7921: get rid of mt7921_sta_rc_update routine

Since the rate selection is currently managed by the fw, get rid of
empty mt7921_sta_rc_update routine

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/main.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index cdb474ff370a..43e878c69616 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -1031,14 +1031,6 @@ mt7921_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant)
 	return 0;
 }
 
-static void
-mt7921_sta_rc_update(struct ieee80211_hw *hw,
-		     struct ieee80211_vif *vif,
-		     struct ieee80211_sta *sta,
-		     u32 changed)
-{
-}
-
 static void mt7921_sta_statistics(struct ieee80211_hw *hw,
 				  struct ieee80211_vif *vif,
 				  struct ieee80211_sta *sta,
@@ -1165,7 +1157,6 @@ const struct ieee80211_ops mt7921_ops = {
 	.sta_add = mt7921_sta_add,
 	.sta_remove = mt7921_sta_remove,
 	.sta_pre_rcu_remove = mt76_sta_pre_rcu_remove,
-	.sta_rc_update = mt7921_sta_rc_update,
 	.set_key = mt7921_set_key,
 	.ampdu_action = mt7921_ampdu_action,
 	.set_rts_threshold = mt7921_set_rts_threshold,
-- 
cgit v1.2.3


From 23c1d2dc9ed5be1d0df7987335f5646e3826a461 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Fri, 26 Feb 2021 16:23:26 +0800
Subject: mt76: mt7921: fix the base of PCIe interrupt

Should use 0x10000 as the base to operate PCIe interrupt according
to the vendor reference driver.

Fixes: ffa1bf97425b ("mt76: mt7921: introduce PM support")
Co-developed-by: YN Chen <YN.Chen@mediatek.com>
Signed-off-by: YN Chen <YN.Chen@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Tested-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/pci.c  | 4 ++--
 drivers/net/wireless/mediatek/mt76/mt7921/regs.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
index 7e040c38c3b7..e94523bf19be 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
@@ -137,7 +137,7 @@ static int mt7921_pci_probe(struct pci_dev *pdev,
 
 	mt76_wr(dev, MT_WFDMA0_HOST_INT_ENA, 0);
 
-	mt7921_l1_wr(dev, MT_PCIE_MAC_INT_ENABLE, 0xff);
+	mt76_wr(dev, MT_PCIE_MAC_INT_ENABLE, 0xff);
 
 	ret = devm_request_irq(mdev->dev, pdev->irq, mt7921_irq_handler,
 			       IRQF_SHARED, KBUILD_MODNAME, dev);
@@ -248,7 +248,7 @@ static int mt7921_pci_resume(struct pci_dev *pdev)
 		return err;
 
 	/* enable interrupt */
-	mt7921_l1_wr(dev, MT_PCIE_MAC_INT_ENABLE, 0xff);
+	mt76_wr(dev, MT_PCIE_MAC_INT_ENABLE, 0xff);
 	mt7921_irq_enable(dev, MT_INT_RX_DONE_ALL | MT_INT_TX_DONE_ALL |
 			  MT_INT_MCU_CMD);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/regs.h b/drivers/net/wireless/mediatek/mt76/mt7921/regs.h
index 5cab39c9e7a2..6754b10f9a6b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/regs.h
@@ -389,7 +389,7 @@
 #define MT_HW_CHIPID			0x70010200
 #define MT_HW_REV			0x70010204
 
-#define MT_PCIE_MAC_BASE		0x74030000
+#define MT_PCIE_MAC_BASE		0x10000
 #define MT_PCIE_MAC(ofs)		(MT_PCIE_MAC_BASE + (ofs))
 #define MT_PCIE_MAC_INT_ENABLE		MT_PCIE_MAC(0x188)
 
-- 
cgit v1.2.3


From 53a8fb4afdc877f8f2d5e1e15cc5ad66155987a6 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Fri, 26 Feb 2021 16:23:27 +0800
Subject: mt76: mt7921: fix the base of the dynamic remap

We should change the base for the dynamic remap into another one, because
the current base (0xe0000) have been the one used to operate the device
ownership.

Fixes: 163f4d22c118 ("mt76: mt7921: add MAC support")
Co-developed-by: YN Chen <YN.Chen@mediatek.com>
Signed-off-by: YN Chen <YN.Chen@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/regs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/regs.h b/drivers/net/wireless/mediatek/mt76/mt7921/regs.h
index 6754b10f9a6b..b66e15c8ab70 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/regs.h
@@ -362,11 +362,11 @@
 #define MT_INFRA_CFG_BASE		0xfe000
 #define MT_INFRA(ofs)			(MT_INFRA_CFG_BASE + (ofs))
 
-#define MT_HIF_REMAP_L1			MT_INFRA(0x260)
+#define MT_HIF_REMAP_L1			MT_INFRA(0x24c)
 #define MT_HIF_REMAP_L1_MASK		GENMASK(15, 0)
 #define MT_HIF_REMAP_L1_OFFSET		GENMASK(15, 0)
 #define MT_HIF_REMAP_L1_BASE		GENMASK(31, 16)
-#define MT_HIF_REMAP_BASE_L1		0xe0000
+#define MT_HIF_REMAP_BASE_L1		0x40000
 
 #define MT_SWDEF_BASE			0x41f200
 #define MT_SWDEF(ofs)			(MT_SWDEF_BASE + (ofs))
-- 
cgit v1.2.3


From f92f81d35ac26f8a519866f1b561743fe70e33a5 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 26 Feb 2021 12:17:23 +0100
Subject: mt76: mt7921: check mcu returned values in mt7921_start

Properly check returned values from mcu utility routines in
mt7921_start.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/main.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 43e878c69616..043dd77bdbe7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -173,22 +173,31 @@ static int mt7921_start(struct ieee80211_hw *hw)
 {
 	struct mt7921_dev *dev = mt7921_hw_dev(hw);
 	struct mt7921_phy *phy = mt7921_hw_phy(hw);
+	int err;
 
 	mt7921_mutex_acquire(dev);
 
-	mt76_connac_mcu_set_mac_enable(&dev->mt76, 0, true, false);
-	mt76_connac_mcu_set_channel_domain(phy->mt76);
+	err = mt76_connac_mcu_set_mac_enable(&dev->mt76, 0, true, false);
+	if (err)
+		goto out;
+
+	err = mt76_connac_mcu_set_channel_domain(phy->mt76);
+	if (err)
+		goto out;
+
+	err = mt7921_mcu_set_chan_info(phy, MCU_EXT_CMD_SET_RX_PATH);
+	if (err)
+		goto out;
 
-	mt7921_mcu_set_chan_info(phy, MCU_EXT_CMD_SET_RX_PATH);
 	mt7921_mac_reset_counters(phy);
 	set_bit(MT76_STATE_RUNNING, &phy->mt76->state);
 
 	ieee80211_queue_delayed_work(hw, &phy->mt76->mac_work,
 				     MT7921_WATCHDOG_TIME);
-
+out:
 	mt7921_mutex_release(dev);
 
-	return 0;
+	return err;
 }
 
 static void mt7921_stop(struct ieee80211_hw *hw)
-- 
cgit v1.2.3


From e268fcaa67b855af41c5eb4c641288a2bef05b17 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 27 Feb 2021 00:44:27 +0800
Subject: mt76: mt7915: add missing capabilities for DBDC

This improves dbdc performance.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/init.c | 50 ++++++++++++++----------
 1 file changed, 29 insertions(+), 21 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index c599b44a728c..165be6abcc96 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -113,6 +113,24 @@ mt7915_init_wiphy(struct ieee80211_hw *hw)
 	ieee80211_hw_set(hw, WANT_MONITOR_VIF);
 
 	hw->max_tx_fragments = 4;
+
+	if (phy->mt76->cap.has_2ghz)
+		phy->mt76->sband_2g.sband.ht_cap.cap |=
+			IEEE80211_HT_CAP_LDPC_CODING |
+			IEEE80211_HT_CAP_MAX_AMSDU;
+
+	if (phy->mt76->cap.has_5ghz) {
+		phy->mt76->sband_5g.sband.ht_cap.cap |=
+			IEEE80211_HT_CAP_LDPC_CODING |
+			IEEE80211_HT_CAP_MAX_AMSDU;
+		phy->mt76->sband_5g.sband.vht_cap.cap |=
+			IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 |
+			IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK;
+	}
+
+	mt76_set_stream_caps(phy->mt76, true);
+	mt7915_set_stream_vht_txbf_caps(phy);
+	mt7915_set_stream_he_caps(phy);
 }
 
 static void
@@ -240,22 +258,17 @@ static int mt7915_register_ext_phy(struct mt7915_dev *dev)
 	phy->mt76 = mphy;
 	mphy->chainmask = dev->chainmask & ~dev->mphy.chainmask;
 	mphy->antenna_mask = BIT(hweight8(mphy->chainmask)) - 1;
-	mt7915_init_wiphy(mphy->hw);
 
 	INIT_LIST_HEAD(&phy->stats_list);
 	INIT_DELAYED_WORK(&mphy->mac_work, mt7915_mac_work);
 
 	mt7915_eeprom_parse_band_config(phy);
-	mt7915_set_stream_vht_txbf_caps(phy);
-	mt7915_set_stream_he_caps(phy);
+	mt7915_init_wiphy(mphy->hw);
 
 	memcpy(mphy->macaddr, dev->mt76.eeprom.data + MT_EE_MAC_ADDR2,
 	       ETH_ALEN);
 	mt76_eeprom_override(mphy);
 
-	/* The second interface does not get any packets unless it has a vif */
-	ieee80211_hw_set(mphy->hw, WANT_MONITOR_VIF);
-
 	ret = mt7915_init_tx_queues(phy, MT7915_TXQ_BAND1,
 				    MT7915_TX_RING_SIZE);
 	if (ret)
@@ -332,8 +345,14 @@ static int mt7915_init_hardware(struct mt7915_dev *dev)
 
 void mt7915_set_stream_vht_txbf_caps(struct mt7915_phy *phy)
 {
-	int nss = hweight8(phy->mt76->chainmask);
-	u32 *cap = &phy->mt76->sband_5g.sband.vht_cap.cap;
+	int nss;
+	u32 *cap;
+
+	if (!phy->mt76->cap.has_5ghz)
+		return;
+
+	nss = hweight8(phy->mt76->chainmask);
+	cap = &phy->mt76->sband_5g.sband.vht_cap.cap;
 
 	*cap |= IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE |
 		IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE |
@@ -637,25 +656,14 @@ int mt7915_register_device(struct mt7915_dev *dev)
 		return ret;
 
 	mt7915_init_wiphy(hw);
-	dev->mphy.sband_2g.sband.ht_cap.cap |=
-			IEEE80211_HT_CAP_LDPC_CODING |
-			IEEE80211_HT_CAP_MAX_AMSDU;
-	dev->mphy.sband_5g.sband.ht_cap.cap |=
-			IEEE80211_HT_CAP_LDPC_CODING |
-			IEEE80211_HT_CAP_MAX_AMSDU;
-	dev->mphy.sband_5g.sband.vht_cap.cap |=
-			IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 |
-			IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK;
+
 	if (!dev->dbdc_support)
 		dev->mphy.sband_5g.sband.vht_cap.cap |=
 			IEEE80211_VHT_CAP_SHORT_GI_160 |
 			IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ;
+
 	dev->mphy.hw->wiphy->available_antennas_rx = dev->mphy.chainmask;
 	dev->mphy.hw->wiphy->available_antennas_tx = dev->mphy.chainmask;
-
-	mt76_set_stream_caps(&dev->mphy, true);
-	mt7915_set_stream_vht_txbf_caps(&dev->phy);
-	mt7915_set_stream_he_caps(&dev->phy);
 	dev->phy.dfs_state = -1;
 
 #ifdef CONFIG_NL80211_TESTMODE
-- 
cgit v1.2.3


From 402a695b1ae69b1cc2e445b2258841fd39f92780 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 27 Feb 2021 15:39:42 +0800
Subject: mt76: mt7615: fix CSA notification for DBDC

Add CSA notification for second phy.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 25 ++++++++++++++++++++-----
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.h | 10 +++++++++-
 2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 65c6ab14fe6f..4ca0d8d4c536 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -372,6 +372,23 @@ mt7615_mcu_csa_finish(void *priv, u8 *mac, struct ieee80211_vif *vif)
 		ieee80211_csa_finish(vif);
 }
 
+static void
+mt7615_mcu_rx_csa_notify(struct mt7615_dev *dev, struct sk_buff *skb)
+{
+	struct mt7615_phy *ext_phy = mt7615_ext_phy(dev);
+	struct mt76_phy *mphy = &dev->mt76.phy;
+	struct mt7615_mcu_csa_notify *c;
+
+	c = (struct mt7615_mcu_csa_notify *)skb->data;
+
+	if (ext_phy && ext_phy->omac_mask & BIT_ULL(c->omac_idx))
+		mphy = dev->mt76.phy2;
+
+	ieee80211_iterate_active_interfaces_atomic(mphy->hw,
+			IEEE80211_IFACE_ITER_RESUME_ALL,
+			mt7615_mcu_csa_finish, mphy->hw);
+}
+
 static void
 mt7615_mcu_rx_radar_detected(struct mt7615_dev *dev, struct sk_buff *skb)
 {
@@ -380,7 +397,7 @@ mt7615_mcu_rx_radar_detected(struct mt7615_dev *dev, struct sk_buff *skb)
 
 	r = (struct mt7615_mcu_rdd_report *)skb->data;
 
-	if (r->idx && dev->mt76.phy2)
+	if (r->band_idx && dev->mt76.phy2)
 		mphy = dev->mt76.phy2;
 
 	ieee80211_radar_detected(mphy->hw);
@@ -419,9 +436,7 @@ mt7615_mcu_rx_ext_event(struct mt7615_dev *dev, struct sk_buff *skb)
 		mt7615_mcu_rx_radar_detected(dev, skb);
 		break;
 	case MCU_EXT_EVENT_CSA_NOTIFY:
-		ieee80211_iterate_active_interfaces_atomic(dev->mt76.hw,
-				IEEE80211_IFACE_ITER_RESUME_ALL,
-				mt7615_mcu_csa_finish, dev);
+		mt7615_mcu_rx_csa_notify(dev, skb);
 		break;
 	case MCU_EXT_EVENT_FW_LOG_2_HOST:
 		mt7615_mcu_rx_log_message(dev, skb);
@@ -2176,7 +2191,7 @@ int mt7615_mcu_set_chan_info(struct mt7615_phy *phy, int cmd)
 		.center_chan2 = ieee80211_frequency_to_channel(freq2),
 	};
 
-	if (dev->mt76.hw->conf.flags & IEEE80211_CONF_OFFCHANNEL)
+	if (phy->mt76->hw->conf.flags & IEEE80211_CONF_OFFCHANNEL)
 		req.switch_reason = CH_SWITCH_SCAN_BYPASS_DPD;
 	else if ((chandef->chan->flags & IEEE80211_CHAN_RADAR) &&
 		 chandef->chan->dfs_state != NL80211_DFS_AVAILABLE)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
index 411ebb56baef..98c383e400a1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
@@ -176,10 +176,18 @@ struct mt7615_mcu_rxd {
 	u8 s2d_index;
 };
 
+struct mt7615_mcu_csa_notify {
+	struct mt7615_mcu_rxd rxd;
+
+	u8 omac_idx;
+	u8 csa_count;
+	u8 rsv[2];
+} __packed;
+
 struct mt7615_mcu_rdd_report {
 	struct mt7615_mcu_rxd rxd;
 
-	u8 idx;
+	u8 band_idx;
 	u8 long_detected;
 	u8 constant_prf_detected;
 	u8 staggered_prf_detected;
-- 
cgit v1.2.3


From 62da7a38e04131178e1344f080a7d02f76f9c149 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 27 Feb 2021 15:39:43 +0800
Subject: mt76: mt7615: stop ext_phy queue when mac reset happens

Stop Tx burst for ext_phy after mac reset.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index d49a3f1c6e16..59aa8f84645b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -2075,6 +2075,7 @@ void mt7615_mac_reset_work(struct work_struct *work)
 	del_timer_sync(&dev->phy.roc_timer);
 	cancel_work_sync(&dev->phy.roc_work);
 	if (phy2) {
+		set_bit(MT76_RESET, &phy2->mt76->state);
 		cancel_delayed_work_sync(&phy2->mt76->mac_work);
 		del_timer_sync(&phy2->roc_timer);
 		cancel_work_sync(&phy2->roc_work);
@@ -2108,6 +2109,8 @@ void mt7615_mac_reset_work(struct work_struct *work)
 
 	clear_bit(MT76_MCU_RESET, &dev->mphy.state);
 	clear_bit(MT76_RESET, &dev->mphy.state);
+	if (phy2)
+		clear_bit(MT76_RESET, &phy2->mt76->state);
 
 	mt76_worker_enable(&dev->mt76.tx_worker);
 	napi_enable(&dev->mt76.tx_napi);
-- 
cgit v1.2.3


From b6d20ce433c97fa995d7ec067e642cfdf02c0aac Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 27 Feb 2021 15:39:44 +0800
Subject: mt76: mt7915: fix CSA notification for DBDC

Add CSA notification for second phy.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 24 +++++++++++++++++++-----
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.h | 11 ++++++++++-
 2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 6c429a2b15c5..4abe32357edc 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -336,6 +336,22 @@ mt7915_mcu_csa_finish(void *priv, u8 *mac, struct ieee80211_vif *vif)
 		ieee80211_csa_finish(vif);
 }
 
+static void
+mt7915_mcu_rx_csa_notify(struct mt7915_dev *dev, struct sk_buff *skb)
+{
+	struct mt76_phy *mphy = &dev->mt76.phy;
+	struct mt7915_mcu_csa_notify *c;
+
+	c = (struct mt7915_mcu_csa_notify *)skb->data;
+
+	if (c->band_idx && dev->mt76.phy2)
+		mphy = dev->mt76.phy2;
+
+	ieee80211_iterate_active_interfaces_atomic(mphy->hw,
+			IEEE80211_IFACE_ITER_RESUME_ALL,
+			mt7915_mcu_csa_finish, mphy->hw);
+}
+
 static void
 mt7915_mcu_rx_radar_detected(struct mt7915_dev *dev, struct sk_buff *skb)
 {
@@ -344,7 +360,7 @@ mt7915_mcu_rx_radar_detected(struct mt7915_dev *dev, struct sk_buff *skb)
 
 	r = (struct mt7915_mcu_rdd_report *)skb->data;
 
-	if (r->idx && dev->mt76.phy2)
+	if (r->band_idx && dev->mt76.phy2)
 		mphy = dev->mt76.phy2;
 
 	ieee80211_radar_detected(mphy->hw);
@@ -511,9 +527,7 @@ mt7915_mcu_rx_ext_event(struct mt7915_dev *dev, struct sk_buff *skb)
 		mt7915_mcu_rx_radar_detected(dev, skb);
 		break;
 	case MCU_EXT_EVENT_CSA_NOTIFY:
-		ieee80211_iterate_active_interfaces_atomic(dev->mt76.hw,
-				IEEE80211_IFACE_ITER_RESUME_ALL,
-				mt7915_mcu_csa_finish, dev);
+		mt7915_mcu_rx_csa_notify(dev, skb);
 		break;
 	case MCU_EXT_EVENT_RATE_REPORT:
 		mt7915_mcu_tx_rate_report(dev, skb);
@@ -3219,7 +3233,7 @@ int mt7915_mcu_set_chan_info(struct mt7915_phy *phy, int cmd)
 	}
 #endif
 
-	if (dev->mt76.hw->conf.flags & IEEE80211_CONF_OFFCHANNEL)
+	if (phy->mt76->hw->conf.flags & IEEE80211_CONF_OFFCHANNEL)
 		req.switch_reason = CH_SWITCH_SCAN_BYPASS_DPD;
 	else if ((chandef->chan->flags & IEEE80211_CHAN_RADAR) &&
 		 chandef->chan->dfs_state != NL80211_DFS_AVAILABLE)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
index 2a2f7bf2c94b..4a932140a7c3 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
@@ -68,10 +68,19 @@ struct mt7915_mcu_rxd {
 	u8 s2d_index;
 };
 
+struct mt7915_mcu_csa_notify {
+	struct mt7915_mcu_rxd rxd;
+
+	u8 omac_idx;
+	u8 csa_count;
+	u8 band_idx;
+	u8 rsv;
+} __packed;
+
 struct mt7915_mcu_rdd_report {
 	struct mt7915_mcu_rxd rxd;
 
-	u8 idx;
+	u8 band_idx;
 	u8 long_detected;
 	u8 constant_prf_detected;
 	u8 staggered_prf_detected;
-- 
cgit v1.2.3


From 6636539283780a0c34880e43a2f4ada561509f61 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 27 Feb 2021 15:39:45 +0800
Subject: mt76: mt7915: stop ext_phy queue when mac reset happens

Stop Tx burst for ext_phy after mac reset.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/mac.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index 2a3c7b39d2ca..d3a6312c058c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -1607,9 +1607,10 @@ void mt7915_mac_reset_work(struct work_struct *work)
 	set_bit(MT76_MCU_RESET, &dev->mphy.state);
 	wake_up(&dev->mt76.mcu.wait);
 	cancel_delayed_work_sync(&dev->mphy.mac_work);
-	if (phy2)
+	if (phy2) {
+		set_bit(MT76_RESET, &phy2->mt76->state);
 		cancel_delayed_work_sync(&phy2->mt76->mac_work);
-
+	}
 	/* lock/unlock all queues to ensure that no tx is pending */
 	mt76_txq_schedule_all(&dev->mphy);
 	if (ext_phy)
@@ -1637,6 +1638,8 @@ void mt7915_mac_reset_work(struct work_struct *work)
 
 	clear_bit(MT76_MCU_RESET, &dev->mphy.state);
 	clear_bit(MT76_RESET, &dev->mphy.state);
+	if (phy2)
+		clear_bit(MT76_RESET, &phy2->mt76->state);
 
 	mt76_worker_enable(&dev->mt76.tx_worker);
 	napi_enable(&dev->mt76.tx_napi);
-- 
cgit v1.2.3


From 1623474167f83252576c90b1df42292c6a99ccb7 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 27 Feb 2021 15:39:46 +0800
Subject: mt76: mt7915: fix PHY mode for DBDC

Fix PHY mode configuration for DBDC.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 40 ++++++++++++-------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 4abe32357edc..e0c03b7f278d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -147,9 +147,10 @@ mt7915_get_he_phy_cap(struct mt7915_phy *phy, struct ieee80211_vif *vif)
 }
 
 static u8
-mt7915_get_phy_mode(struct mt7915_dev *dev, struct ieee80211_vif *vif,
-		    enum nl80211_band band, struct ieee80211_sta *sta)
+mt7915_get_phy_mode(struct mt76_phy *mphy, struct ieee80211_vif *vif,
+		    struct ieee80211_sta *sta)
 {
+	enum nl80211_band band = mphy->chandef.chan->band;
 	struct ieee80211_sta_ht_cap *ht_cap;
 	struct ieee80211_sta_vht_cap *vht_cap;
 	const struct ieee80211_sta_he_cap *he_cap;
@@ -161,12 +162,8 @@ mt7915_get_phy_mode(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 		he_cap = &sta->he_cap;
 	} else {
 		struct ieee80211_supported_band *sband;
-		struct mt7915_phy *phy;
-		struct mt7915_vif *mvif;
 
-		mvif = (struct mt7915_vif *)vif->drv_priv;
-		phy = mvif->band_idx ? mt7915_ext_phy(dev) : &dev->phy;
-		sband = phy->mt76->hw->wiphy->bands[band];
+		sband = mphy->hw->wiphy->bands[band];
 
 		ht_cap = &sband->ht_cap;
 		vht_cap = &sband->vht_cap;
@@ -676,8 +673,6 @@ mt7915_mcu_bss_basic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
 			 struct mt7915_phy *phy, bool enable)
 {
 	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
-	struct cfg80211_chan_def *chandef = &phy->mt76->chandef;
-	enum nl80211_band band = chandef->chan->band;
 	struct bss_info_basic *bss;
 	u16 wlan_idx = mvif->sta.wcid.idx;
 	u32 type = NETWORK_INFRA;
@@ -727,7 +722,7 @@ mt7915_mcu_bss_basic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
 		memcpy(bss->bssid, vif->bss_conf.bssid, ETH_ALEN);
 		bss->bcn_interval = cpu_to_le16(vif->bss_conf.beacon_int);
 		bss->dtim_period = vif->bss_conf.dtim_period;
-		bss->phy_mode = mt7915_get_phy_mode(phy->dev, vif, band, NULL);
+		bss->phy_mode = mt7915_get_phy_mode(phy->mt76, vif, NULL);
 	} else {
 		memcpy(bss->bssid, phy->mt76->macaddr, ETH_ALEN);
 	}
@@ -2069,25 +2064,30 @@ mt7915_mcu_add_txbf(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 
 static void
 mt7915_mcu_sta_rate_ctrl_tlv(struct sk_buff *skb, struct mt7915_dev *dev,
-			     struct ieee80211_vif *vif,
-			     struct ieee80211_sta *sta)
+			     struct ieee80211_vif *vif, struct ieee80211_sta *sta)
 {
-	struct cfg80211_chan_def *chandef = &dev->mphy.chandef;
+	struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv;
+	struct mt76_phy *mphy = &dev->mphy;
+	enum nl80211_band band;
 	struct sta_rec_ra *ra;
 	struct tlv *tlv;
-	enum nl80211_band band = chandef->chan->band;
-	u32 supp_rate = sta->supp_rates[band];
-	int n_rates = hweight32(supp_rate);
-	u32 cap = sta->wme ? STA_CAP_WMM : 0;
+	u32 supp_rate, n_rates, cap = sta->wme ? STA_CAP_WMM : 0;
 	u8 i, nss = sta->rx_nss, mcs = 0;
 
 	tlv = mt7915_mcu_add_tlv(skb, STA_REC_RA, sizeof(*ra));
-
 	ra = (struct sta_rec_ra *)tlv;
+
+	if (msta->wcid.ext_phy && dev->mt76.phy2)
+		mphy = dev->mt76.phy2;
+
+	band = mphy->chandef.chan->band;
+	supp_rate = sta->supp_rates[band];
+	n_rates = hweight32(supp_rate);
+
 	ra->valid = true;
 	ra->auto_rate = true;
-	ra->phy_mode = mt7915_get_phy_mode(dev, vif, band, sta);
-	ra->channel = chandef->chan->hw_value;
+	ra->phy_mode = mt7915_get_phy_mode(mphy, vif, sta);
+	ra->channel = mphy->chandef.chan->hw_value;
 	ra->bw = sta->bandwidth;
 	ra->rate_len = n_rates;
 	ra->phy.bw = sta->bandwidth;
-- 
cgit v1.2.3


From 51bf9d60fb927b850ec2abdc1f9f72de523098a3 Mon Sep 17 00:00:00 2001
From: "Eric Y.Y. Wong" <eric.wongyeungyan@gmail.com>
Date: Wed, 3 Mar 2021 11:43:52 +0800
Subject: mt76: mt76x0u: Add support for TP-Link T2UHP(UN) v1

There are two variants of TP-Link Archer T2UHP V1, which are US and UN:

https://www.tp-link.com/us/support/download/archer-t2uhp/
https://www.tp-link.com/hk/support/download/archer-t2uhp/v1/

From the driver source code provided by the vendor:

https://www.tp-link.com/hk/support/download/archer-t2uhp/v1/#Driver

eric@eric-VirtualBox:~/src$ grep -n USB_DEVICE.*0x2357 DPA_mt7610u_wifi_sta_v3004_rev1_20140529/NETIF/common/rtusb_dev_id.c
45:	{USB_DEVICE(0x2357,0x0105)}, /* T1Uv1 */
46:	{USB_DEVICE(0x2357,0x010B)}, /* T2UHP_UN_v1 */
47:	{USB_DEVICE(0x2357,0x0123)}, /* T2UHP_US_v1 */

eric@eric-VirtualBox:~/src$ lsusb -d 2357:
Bus 001 Device 003: ID 2357:010b TP-Link WiFi
eric@eric-VirtualBox:~/src$ sudo modprobe mt76x0u
eric@eric-VirtualBox:~/src$ sudo sh -c "echo 2357 010b > /sys/bus/usb/drivers/mt76x0u/new_id"

[snip]

eric@eric-VirtualBox:~/src$ ifconfig -a
enp0s3: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        ether 08:00:27:74:d9:ee  txqueuelen 1000  (Ethernet)
        RX packets 78  bytes 16146 (16.1 KB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 75  bytes 8413 (8.4 KB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

lo: flags=73<UP,LOOPBACK,RUNNING>  mtu 65536
        inet 127.0.0.1  netmask 255.0.0.0
        inet6 ::1  prefixlen 128  scopeid 0x10<host>
        loop  txqueuelen 1000  (Local Loopback)
        RX packets 56  bytes 5496 (5.4 KB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 56  bytes 5496 (5.4 KB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

wlx18d6c71b6e3e: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 192.168.1.106  netmask 255.255.255.0  broadcast 192.168.1.255
        inet6 fe80::21c7:2d90:e47d:ee48  prefixlen 64  scopeid 0x20<link>
        ether 18:d6:c7:1b:6e:3e  txqueuelen 1000  (Ethernet)
        RX packets 37  bytes 5345 (5.3 KB)
        RX errors 0  dropped 7  overruns 0  frame 0
        TX packets 77  bytes 10513 (10.5 KB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

eric@eric-VirtualBox:~/src$ dmesg | tail -n 30
               12:51:25.551584 main     Log opened 2021-03-01T12:51:25.551572000Z
[   46.855763] 12:51:25.551995 main     OS Product: Linux
[   46.855838] 12:51:25.552075 main     OS Release: 5.4.0-66-generic
[   46.855897] 12:51:25.552150 main     OS Version: #74-Ubuntu SMP Wed Jan 27 22:54:38 UTC 2021
[   46.855971] 12:51:25.552207 main     Executable: /opt/VBoxGuestAdditions-6.1.16/sbin/VBoxService
               12:51:25.552208 main     Process ID: 898
               12:51:25.552209 main     Package type: LINUX_64BITS_GENERIC
[   46.858699] 12:51:25.554921 main     6.1.16 r140961 started. Verbose level = 0
[   46.863116] 12:51:25.559319 main     vbglR3GuestCtrlDetectPeekGetCancelSupport: Supported (#1)
[  140.687550] usb 1-2: new high-speed USB device number 3 using xhci_hcd
[  140.838279] usb 1-2: New USB device found, idVendor=2357, idProduct=010b, bcdDevice= 1.00
[  140.838281] usb 1-2: New USB device strings: Mfr=1, Product=2, SerialNumber=3
[  140.838283] usb 1-2: Product: WiFi
[  140.838284] usb 1-2: Manufacturer: MediaTek
[  140.838285] usb 1-2: SerialNumber: 1.0
[  178.374187] cfg80211: Loading compiled-in X.509 certificates for regulatory database
[  178.378370] cfg80211: Loaded X.509 cert 'sforshee: 00b28ddf47aef9cea7'
[  178.417507] usbcore: registered new interface driver mt76x0u
[  178.557165] usb 1-2: reset high-speed USB device number 3 using xhci_hcd
[  178.720436] mt76x0u 1-2:1.0: ASIC revision: 76100002 MAC revision: 76502000
[  180.156077] mt76x0u 1-2:1.0: EEPROM ver:02 fae:04
[  181.190558] ieee80211 phy0: Selected rate control algorithm 'minstrel_ht'
[  181.225560] mt76x0u 1-2:1.0 wlx18d6c71b6e3e: renamed from wlan0
[  188.660826] wlx18d6c71b6e3e: authenticate with d0:17:c2:e0:d7:9c
[  189.447781] wlx18d6c71b6e3e: send auth to d0:17:c2:e0:d7:9c (try 1/3)
[  189.618282] wlx18d6c71b6e3e: authenticated
[  189.619008] wlx18d6c71b6e3e: associate with d0:17:c2:e0:d7:9c (try 1/3)
[  189.715268] wlx18d6c71b6e3e: RX AssocResp from d0:17:c2:e0:d7:9c (capab=0x1011 status=0 aid=8)
[  189.734720] wlx18d6c71b6e3e: associated
[  190.233239] IPv6: ADDRCONF(NETDEV_CHANGE): wlx18d6c71b6e3e: link becomes ready

Signed-off-by: Eric Y.Y. Wong <eric.wongyeungyan@gmail.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76x0/usb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c
index a593a7796d23..f2b2fa733845 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c
@@ -32,7 +32,8 @@ static struct usb_device_id mt76x0_device_table[] = {
 	{ USB_DEVICE(0x20f4, 0x806b) },	/* TRENDnet TEW-806UBH  */
 	{ USB_DEVICE(0x7392, 0xc711) }, /* Devolo Wifi ac Stick */
 	{ USB_DEVICE(0x0df6, 0x0079) }, /* Sitecom Europe B.V. ac  Stick */
-	{ USB_DEVICE(0x2357, 0x0123) }, /* TP-LINK T2UHP */
+	{ USB_DEVICE(0x2357, 0x0123) }, /* TP-LINK T2UHP_US_v1 */
+	{ USB_DEVICE(0x2357, 0x010b) }, /* TP-LINK T2UHP_UN_v1 */
 	/* TP-LINK Archer T1U */
 	{ USB_DEVICE(0x2357, 0x0105), .driver_info = 1, },
 	/* MT7630U */
-- 
cgit v1.2.3


From 7883906d22c1e73f1f316bd84fc4a7ff8edd12aa Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Wed, 3 Mar 2021 18:37:59 +0800
Subject: mt76: mt7915: fix rxrate reporting

Avoid directly updating sinfo->rxrate from firmware since rate_info might
be overwritten by wrong results even mt7915_mcu_get_rx_rate() fails check.

Add more error handlings accordingly.

Fixes: 11553d88d0b9 ("mt76: mt7915: query station rx rate from firmware")
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/main.c |  5 ++-
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c  | 47 +++++++++++++-----------
 2 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
index cacc9b6933ca..bb49ea9307e0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
@@ -839,9 +839,12 @@ static void mt7915_sta_statistics(struct ieee80211_hw *hw,
 	struct mt7915_phy *phy = mt7915_hw_phy(hw);
 	struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv;
 	struct mt7915_sta_stats *stats = &msta->stats;
+	struct rate_info rxrate = {};
 
-	if (mt7915_mcu_get_rx_rate(phy, vif, sta, &sinfo->rxrate) == 0)
+	if (!mt7915_mcu_get_rx_rate(phy, vif, sta, &rxrate)) {
+		sinfo->rxrate = rxrate;
 		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BITRATE);
+	}
 
 	if (!stats->tx_rate.legacy && !stats->tx_rate.flags)
 		return;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index e0c03b7f278d..b204c5dd7914 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -3552,9 +3552,8 @@ int mt7915_mcu_get_rx_rate(struct mt7915_phy *phy, struct ieee80211_vif *vif,
 	struct ieee80211_supported_band *sband;
 	struct mt7915_mcu_phy_rx_info *res;
 	struct sk_buff *skb;
-	u16 flags = 0;
 	int ret;
-	int i;
+	bool cck = false;
 
 	ret = mt76_mcu_send_and_get_msg(&dev->mt76, MCU_EXT_CMD(PHY_STAT_INFO),
 					&req, sizeof(req), true, &skb);
@@ -3568,48 +3567,53 @@ int mt7915_mcu_get_rx_rate(struct mt7915_phy *phy, struct ieee80211_vif *vif,
 
 	switch (res->mode) {
 	case MT_PHY_TYPE_CCK:
+		cck = true;
+		fallthrough;
 	case MT_PHY_TYPE_OFDM:
 		if (mphy->chandef.chan->band == NL80211_BAND_5GHZ)
 			sband = &mphy->sband_5g.sband;
 		else
 			sband = &mphy->sband_2g.sband;
 
-		for (i = 0; i < sband->n_bitrates; i++) {
-			if (rate->mcs != (sband->bitrates[i].hw_value & 0xf))
-				continue;
-
-			rate->legacy = sband->bitrates[i].bitrate;
-			break;
-		}
+		rate->mcs = mt76_get_rate(&dev->mt76, sband, rate->mcs, cck);
+		rate->legacy = sband->bitrates[rate->mcs].bitrate;
 		break;
 	case MT_PHY_TYPE_HT:
 	case MT_PHY_TYPE_HT_GF:
-		if (rate->mcs > 31)
-			return -EINVAL;
-
-		flags |= RATE_INFO_FLAGS_MCS;
+		if (rate->mcs > 31) {
+			ret = -EINVAL;
+			goto out;
+		}
 
+		rate->flags = RATE_INFO_FLAGS_MCS;
 		if (res->gi)
-			flags |= RATE_INFO_FLAGS_SHORT_GI;
+			rate->flags |= RATE_INFO_FLAGS_SHORT_GI;
 		break;
 	case MT_PHY_TYPE_VHT:
-		flags |= RATE_INFO_FLAGS_VHT_MCS;
+		if (rate->mcs > 9) {
+			ret = -EINVAL;
+			goto out;
+		}
 
+		rate->flags = RATE_INFO_FLAGS_VHT_MCS;
 		if (res->gi)
-			flags |= RATE_INFO_FLAGS_SHORT_GI;
+			rate->flags |= RATE_INFO_FLAGS_SHORT_GI;
 		break;
 	case MT_PHY_TYPE_HE_SU:
 	case MT_PHY_TYPE_HE_EXT_SU:
 	case MT_PHY_TYPE_HE_TB:
 	case MT_PHY_TYPE_HE_MU:
+		if (res->gi > NL80211_RATE_INFO_HE_GI_3_2 || rate->mcs > 11) {
+			ret = -EINVAL;
+			goto out;
+		}
 		rate->he_gi = res->gi;
-
-		flags |= RATE_INFO_FLAGS_HE_MCS;
+		rate->flags = RATE_INFO_FLAGS_HE_MCS;
 		break;
 	default:
-		break;
+		ret = -EINVAL;
+		goto out;
 	}
-	rate->flags = flags;
 
 	switch (res->bw) {
 	case IEEE80211_STA_RX_BW_160:
@@ -3626,7 +3630,8 @@ int mt7915_mcu_get_rx_rate(struct mt7915_phy *phy, struct ieee80211_vif *vif,
 		break;
 	}
 
+out:
 	dev_kfree_skb(skb);
 
-	return 0;
+	return ret;
 }
-- 
cgit v1.2.3


From f43b941fd61003659a3f0e039595e5e525917aa8 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Wed, 3 Mar 2021 18:38:00 +0800
Subject: mt76: mt7915: fix txrate reporting

Properly check rate_info to fix unexpected reporting.

[ 1215.161863] Call trace:
[ 1215.164307]  cfg80211_calculate_bitrate+0x124/0x200 [cfg80211]
[ 1215.170139]  ieee80211s_update_metric+0x80/0xc0 [mac80211]
[ 1215.175624]  ieee80211_tx_status_ext+0x508/0x838 [mac80211]
[ 1215.181190]  mt7915_mcu_get_rx_rate+0x28c/0x8d0 [mt7915e]
[ 1215.186580]  mt7915_mac_tx_free+0x324/0x7c0 [mt7915e]
[ 1215.191623]  mt7915_queue_rx_skb+0xa8/0xd0 [mt7915e]
[ 1215.196582]  mt76_dma_cleanup+0x7b0/0x11d0 [mt76]
[ 1215.201276]  __napi_poll+0x38/0xf8
[ 1215.204668]  napi_workfn+0x40/0x80
[ 1215.208062]  process_one_work+0x1fc/0x390
[ 1215.212062]  worker_thread+0x48/0x4d0
[ 1215.215715]  kthread+0x120/0x128
[ 1215.218935]  ret_from_fork+0x10/0x1c

Fixes: e57b7901469f ("mt76: add mac80211 driver for MT7915 PCIe-based chipsets")
Fixes: e4c5ead632ff ("mt76: mt7915: rename mt7915_mcu_get_rate_info to mt7915_mcu_get_tx_rate")
Reported-by: Evelyn Tsai <evelyn.tsai@mediatek.com>
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 38 ++++++++++++++++---------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index b204c5dd7914..29b5039cc286 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -364,54 +364,62 @@ mt7915_mcu_rx_radar_detected(struct mt7915_dev *dev, struct sk_buff *skb)
 	dev->hw_pattern++;
 }
 
-static void
+static int
 mt7915_mcu_tx_rate_parse(struct mt76_phy *mphy, struct mt7915_mcu_ra_info *ra,
 			 struct rate_info *rate, u16 r)
 {
 	struct ieee80211_supported_band *sband;
 	u16 ru_idx = le16_to_cpu(ra->ru_idx);
-	u16 flags = 0;
+	bool cck = false;
 
 	rate->mcs = FIELD_GET(MT_RA_RATE_MCS, r);
 	rate->nss = FIELD_GET(MT_RA_RATE_NSS, r) + 1;
 
 	switch (FIELD_GET(MT_RA_RATE_TX_MODE, r)) {
 	case MT_PHY_TYPE_CCK:
+		cck = true;
+		fallthrough;
 	case MT_PHY_TYPE_OFDM:
 		if (mphy->chandef.chan->band == NL80211_BAND_5GHZ)
 			sband = &mphy->sband_5g.sband;
 		else
 			sband = &mphy->sband_2g.sband;
 
+		rate->mcs = mt76_get_rate(mphy->dev, sband, rate->mcs, cck);
 		rate->legacy = sband->bitrates[rate->mcs].bitrate;
 		break;
 	case MT_PHY_TYPE_HT:
 	case MT_PHY_TYPE_HT_GF:
 		rate->mcs += (rate->nss - 1) * 8;
-		flags |= RATE_INFO_FLAGS_MCS;
+		if (rate->mcs > 31)
+			return -EINVAL;
 
+		rate->flags = RATE_INFO_FLAGS_MCS;
 		if (ra->gi)
-			flags |= RATE_INFO_FLAGS_SHORT_GI;
+			rate->flags |= RATE_INFO_FLAGS_SHORT_GI;
 		break;
 	case MT_PHY_TYPE_VHT:
-		flags |= RATE_INFO_FLAGS_VHT_MCS;
+		if (rate->mcs > 9)
+			return -EINVAL;
 
+		rate->flags = RATE_INFO_FLAGS_VHT_MCS;
 		if (ra->gi)
-			flags |= RATE_INFO_FLAGS_SHORT_GI;
+			rate->flags |= RATE_INFO_FLAGS_SHORT_GI;
 		break;
 	case MT_PHY_TYPE_HE_SU:
 	case MT_PHY_TYPE_HE_EXT_SU:
 	case MT_PHY_TYPE_HE_TB:
 	case MT_PHY_TYPE_HE_MU:
+		if (ra->gi > NL80211_RATE_INFO_HE_GI_3_2 || rate->mcs > 11)
+			return -EINVAL;
+
 		rate->he_gi = ra->gi;
 		rate->he_dcm = FIELD_GET(MT_RA_RATE_DCM_EN, r);
-
-		flags |= RATE_INFO_FLAGS_HE_MCS;
+		rate->flags = RATE_INFO_FLAGS_HE_MCS;
 		break;
 	default:
-		break;
+		return -EINVAL;
 	}
-	rate->flags = flags;
 
 	if (ru_idx) {
 		switch (ru_idx) {
@@ -448,6 +456,8 @@ mt7915_mcu_tx_rate_parse(struct mt76_phy *mphy, struct mt7915_mcu_ra_info *ra,
 			break;
 		}
 	}
+
+	return 0;
 }
 
 static void
@@ -478,12 +488,12 @@ mt7915_mcu_tx_rate_report(struct mt7915_dev *dev, struct sk_buff *skb)
 		mphy = dev->mt76.phy2;
 
 	/* current rate */
-	mt7915_mcu_tx_rate_parse(mphy, ra, &rate, curr);
-	stats->tx_rate = rate;
+	if (!mt7915_mcu_tx_rate_parse(mphy, ra, &rate, curr))
+		stats->tx_rate = rate;
 
 	/* probing rate */
-	mt7915_mcu_tx_rate_parse(mphy, ra, &prob_rate, probe);
-	stats->prob_rate = prob_rate;
+	if (!mt7915_mcu_tx_rate_parse(mphy, ra, &prob_rate, probe))
+		stats->prob_rate = prob_rate;
 
 	if (attempts) {
 		u16 success = le16_to_cpu(ra->success);
-- 
cgit v1.2.3


From b1bed649ad0378afc01226c89d7e89f0580788ba Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Wed, 3 Mar 2021 18:38:01 +0800
Subject: mt76: mt7915: check mcu returned values in mt7915_ops

Properly check returned values from mcu utility routines in
mt7915_ops.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/main.c | 58 +++++++++++++++++-------
 1 file changed, 42 insertions(+), 16 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
index bb49ea9307e0..255f87aa1830 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
@@ -25,6 +25,7 @@ static int mt7915_start(struct ieee80211_hw *hw)
 	struct mt7915_dev *dev = mt7915_hw_dev(hw);
 	struct mt7915_phy *phy = mt7915_hw_phy(hw);
 	bool running;
+	int ret;
 
 	flush_work(&dev->init_work);
 
@@ -33,21 +34,44 @@ static int mt7915_start(struct ieee80211_hw *hw)
 	running = mt7915_dev_running(dev);
 
 	if (!running) {
-		mt7915_mcu_set_pm(dev, 0, 0);
-		mt7915_mcu_set_mac(dev, 0, true, true);
-		mt7915_mcu_set_scs(dev, 0, true);
+		ret = mt7915_mcu_set_pm(dev, 0, 0);
+		if (ret)
+			goto out;
+
+		ret = mt7915_mcu_set_mac(dev, 0, true, true);
+		if (ret)
+			goto out;
+
+		ret = mt7915_mcu_set_scs(dev, 0, true);
+		if (ret)
+			goto out;
+
 		mt7915_mac_enable_nf(dev, 0);
 	}
 
 	if (phy != &dev->phy) {
-		mt7915_mcu_set_pm(dev, 1, 0);
-		mt7915_mcu_set_mac(dev, 1, true, true);
-		mt7915_mcu_set_scs(dev, 1, true);
+		ret = mt7915_mcu_set_pm(dev, 1, 0);
+		if (ret)
+			goto out;
+
+		ret = mt7915_mcu_set_mac(dev, 1, true, true);
+		if (ret)
+			goto out;
+
+		ret = mt7915_mcu_set_scs(dev, 1, true);
+		if (ret)
+			goto out;
+
 		mt7915_mac_enable_nf(dev, 1);
 	}
 
-	mt7915_mcu_set_sku_en(phy, true);
-	mt7915_mcu_set_chan_info(phy, MCU_EXT_CMD(SET_RX_PATH));
+	ret = mt7915_mcu_set_sku_en(phy, true);
+	if (ret)
+		goto out;
+
+	ret = mt7915_mcu_set_chan_info(phy, MCU_EXT_CMD(SET_RX_PATH));
+	if (ret)
+		goto out;
 
 	set_bit(MT76_STATE_RUNNING, &phy->mt76->state);
 
@@ -58,9 +82,10 @@ static int mt7915_start(struct ieee80211_hw *hw)
 	if (!running)
 		mt7915_mac_reset_counters(phy);
 
+out:
 	mutex_unlock(&dev->mt76.mutex);
 
-	return 0;
+	return ret;
 }
 
 static void mt7915_stop(struct ieee80211_hw *hw)
@@ -631,12 +656,13 @@ static int mt7915_set_rts_threshold(struct ieee80211_hw *hw, u32 val)
 {
 	struct mt7915_dev *dev = mt7915_hw_dev(hw);
 	struct mt7915_phy *phy = mt7915_hw_phy(hw);
+	int ret;
 
 	mutex_lock(&dev->mt76.mutex);
-	mt7915_mcu_set_rts_thresh(phy, val);
+	ret = mt7915_mcu_set_rts_thresh(phy, val);
 	mutex_unlock(&dev->mt76.mutex);
 
-	return 0;
+	return ret;
 }
 
 static int
@@ -663,22 +689,22 @@ mt7915_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 	case IEEE80211_AMPDU_RX_START:
 		mt76_rx_aggr_start(&dev->mt76, &msta->wcid, tid, ssn,
 				   params->buf_size);
-		mt7915_mcu_add_rx_ba(dev, params, true);
+		ret = mt7915_mcu_add_rx_ba(dev, params, true);
 		break;
 	case IEEE80211_AMPDU_RX_STOP:
 		mt76_rx_aggr_stop(&dev->mt76, &msta->wcid, tid);
-		mt7915_mcu_add_rx_ba(dev, params, false);
+		ret = mt7915_mcu_add_rx_ba(dev, params, false);
 		break;
 	case IEEE80211_AMPDU_TX_OPERATIONAL:
 		mtxq->aggr = true;
 		mtxq->send_bar = false;
-		mt7915_mcu_add_tx_ba(dev, params, true);
+		ret = mt7915_mcu_add_tx_ba(dev, params, true);
 		break;
 	case IEEE80211_AMPDU_TX_STOP_FLUSH:
 	case IEEE80211_AMPDU_TX_STOP_FLUSH_CONT:
 		mtxq->aggr = false;
 		clear_bit(tid, &msta->ampdu_state);
-		mt7915_mcu_add_tx_ba(dev, params, false);
+		ret = mt7915_mcu_add_tx_ba(dev, params, false);
 		break;
 	case IEEE80211_AMPDU_TX_START:
 		set_bit(tid, &msta->ampdu_state);
@@ -687,7 +713,7 @@ mt7915_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 	case IEEE80211_AMPDU_TX_STOP_CONT:
 		mtxq->aggr = false;
 		clear_bit(tid, &msta->ampdu_state);
-		mt7915_mcu_add_tx_ba(dev, params, false);
+		ret = mt7915_mcu_add_tx_ba(dev, params, false);
 		ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid);
 		break;
 	}
-- 
cgit v1.2.3


From c3800cc2919721640021c00e5845c32c2e7ad24d Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Fri, 5 Mar 2021 23:52:59 +0800
Subject: mt76: mt7615: check mcu returned values in mt7615_ops

Properly check returned values from mcu utility routines in
mt7615_ops.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/main.c | 90 +++++++++++++++++-------
 1 file changed, 64 insertions(+), 26 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index 2380854ce386..4caf2b6b39b1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -29,6 +29,7 @@ static int mt7615_start(struct ieee80211_hw *hw)
 	struct mt7615_dev *dev = mt7615_hw_dev(hw);
 	struct mt7615_phy *phy = mt7615_hw_phy(hw);
 	bool running;
+	int ret;
 
 	if (!mt7615_wait_for_mcu_init(dev))
 		return -EIO;
@@ -38,21 +39,38 @@ static int mt7615_start(struct ieee80211_hw *hw)
 	running = mt7615_dev_running(dev);
 
 	if (!running) {
-		mt7615_mcu_set_pm(dev, 0, 0);
-		mt76_connac_mcu_set_mac_enable(&dev->mt76, 0, true, false);
+		ret = mt7615_mcu_set_pm(dev, 0, 0);
+		if (ret)
+			goto out;
+
+		ret = mt76_connac_mcu_set_mac_enable(&dev->mt76, 0, true, false);
+		if (ret)
+			goto out;
+
 		mt7615_mac_enable_nf(dev, 0);
 	}
 
 	if (phy != &dev->phy) {
-		mt7615_mcu_set_pm(dev, 1, 0);
-		mt76_connac_mcu_set_mac_enable(&dev->mt76, 1, true, false);
+		ret = mt7615_mcu_set_pm(dev, 1, 0);
+		if (ret)
+			goto out;
+
+		ret = mt76_connac_mcu_set_mac_enable(&dev->mt76, 1, true, false);
+		if (ret)
+			goto out;
+
 		mt7615_mac_enable_nf(dev, 1);
 	}
 
-	if (mt7615_firmware_offload(dev))
-		mt76_connac_mcu_set_channel_domain(phy->mt76);
+	if (mt7615_firmware_offload(dev)) {
+		ret = mt76_connac_mcu_set_channel_domain(phy->mt76);
+		if (ret)
+			goto out;
+	}
 
-	mt7615_mcu_set_chan_info(phy, MCU_EXT_CMD_SET_RX_PATH);
+	ret = mt7615_mcu_set_chan_info(phy, MCU_EXT_CMD_SET_RX_PATH);
+	if (ret)
+		goto out;
 
 	set_bit(MT76_STATE_RUNNING, &phy->mt76->state);
 
@@ -62,9 +80,10 @@ static int mt7615_start(struct ieee80211_hw *hw)
 	if (!running)
 		mt7615_mac_reset_counters(dev);
 
+out:
 	mt7615_mutex_release(dev);
 
-	return 0;
+	return ret;
 }
 
 static void mt7615_stop(struct ieee80211_hw *hw)
@@ -197,7 +216,9 @@ static int mt7615_add_interface(struct ieee80211_hw *hw,
 	dev->omac_mask |= BIT_ULL(mvif->mt76.omac_idx);
 	phy->omac_mask |= BIT_ULL(mvif->mt76.omac_idx);
 
-	mt7615_mcu_set_dbdc(dev);
+	ret = mt7615_mcu_set_dbdc(dev);
+	if (ret)
+		goto out;
 
 	idx = MT7615_WTBL_RESERVED - mvif->mt76.idx;
 
@@ -296,8 +317,13 @@ int mt7615_set_channel(struct mt7615_phy *phy)
 	mt76_set_channel(phy->mt76);
 
 	if (is_mt7615(&dev->mt76) && dev->flash_eeprom) {
-		mt7615_mcu_apply_rx_dcoc(phy);
-		mt7615_mcu_apply_tx_dpd(phy);
+		ret = mt7615_mcu_apply_rx_dcoc(phy);
+		if (ret)
+			goto out;
+
+		ret = mt7615_mcu_apply_tx_dpd(phy);
+		if (ret)
+			goto out;
 	}
 
 	ret = mt7615_mcu_set_chan_info(phy, MCU_EXT_CMD_CHANNEL_SWITCH);
@@ -306,8 +332,13 @@ int mt7615_set_channel(struct mt7615_phy *phy)
 
 	mt7615_mac_set_timing(phy);
 	ret = mt7615_dfs_init_radar_detector(phy);
+	if (ret)
+		goto out;
+
 	mt7615_mac_cca_stats_reset(phy);
-	mt7615_mcu_set_sku_en(phy, true);
+	ret = mt7615_mcu_set_sku_en(phy, true);
+	if (ret)
+		goto out;
 
 	mt7615_mac_reset_counters(dev);
 	phy->noise = 0;
@@ -587,15 +618,21 @@ int mt7615_mac_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 	if (err)
 		return err;
 
-	if (vif->type == NL80211_IFTYPE_STATION && !sta->tdls)
-		mt7615_mcu_add_bss_info(phy, vif, sta, true);
+	if (vif->type == NL80211_IFTYPE_STATION && !sta->tdls) {
+		err = mt7615_mcu_add_bss_info(phy, vif, sta, true);
+		if (err)
+			return err;
+	}
+
 	mt7615_mac_wtbl_update(dev, idx,
 			       MT_WTBL_UPDATE_ADM_COUNT_CLEAR);
-	mt7615_mcu_sta_add(&dev->phy, vif, sta, true);
+	err = mt7615_mcu_sta_add(&dev->phy, vif, sta, true);
+	if (err)
+		return err;
 
 	mt76_connac_power_save_sched(phy->mt76, &dev->pm);
 
-	return 0;
+	return err;
 }
 EXPORT_SYMBOL_GPL(mt7615_mac_sta_add);
 
@@ -715,13 +752,13 @@ static int mt7615_set_rts_threshold(struct ieee80211_hw *hw, u32 val)
 {
 	struct mt7615_dev *dev = mt7615_hw_dev(hw);
 	struct mt7615_phy *phy = mt7615_hw_phy(hw);
-	int band = phy != &dev->phy;
+	int err, band = phy != &dev->phy;
 
 	mt7615_mutex_acquire(dev);
-	mt76_connac_mcu_set_rts_thresh(&dev->mt76, val, band);
+	err = mt76_connac_mcu_set_rts_thresh(&dev->mt76, val, band);
 	mt7615_mutex_release(dev);
 
-	return 0;
+	return err;
 }
 
 static int
@@ -749,16 +786,16 @@ mt7615_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 	case IEEE80211_AMPDU_RX_START:
 		mt76_rx_aggr_start(&dev->mt76, &msta->wcid, tid, ssn,
 				   params->buf_size);
-		mt7615_mcu_add_rx_ba(dev, params, true);
+		ret = mt7615_mcu_add_rx_ba(dev, params, true);
 		break;
 	case IEEE80211_AMPDU_RX_STOP:
 		mt76_rx_aggr_stop(&dev->mt76, &msta->wcid, tid);
-		mt7615_mcu_add_rx_ba(dev, params, false);
+		ret = mt7615_mcu_add_rx_ba(dev, params, false);
 		break;
 	case IEEE80211_AMPDU_TX_OPERATIONAL:
 		mtxq->aggr = true;
 		mtxq->send_bar = false;
-		mt7615_mcu_add_tx_ba(dev, params, true);
+		ret = mt7615_mcu_add_tx_ba(dev, params, true);
 		ssn = mt7615_mac_get_sta_tid_sn(dev, msta->wcid.idx, tid);
 		ieee80211_send_bar(vif, sta->addr, tid,
 				   IEEE80211_SN_TO_SEQ(ssn));
@@ -766,7 +803,7 @@ mt7615_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 	case IEEE80211_AMPDU_TX_STOP_FLUSH:
 	case IEEE80211_AMPDU_TX_STOP_FLUSH_CONT:
 		mtxq->aggr = false;
-		mt7615_mcu_add_tx_ba(dev, params, false);
+		ret = mt7615_mcu_add_tx_ba(dev, params, false);
 		break;
 	case IEEE80211_AMPDU_TX_START:
 		ssn = mt7615_mac_get_sta_tid_sn(dev, msta->wcid.idx, tid);
@@ -775,7 +812,7 @@ mt7615_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 		break;
 	case IEEE80211_AMPDU_TX_STOP_CONT:
 		mtxq->aggr = false;
-		mt7615_mcu_add_tx_ba(dev, params, false);
+		ret = mt7615_mcu_add_tx_ba(dev, params, false);
 		ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid);
 		break;
 	}
@@ -1091,6 +1128,7 @@ static int mt7615_cancel_remain_on_channel(struct ieee80211_hw *hw,
 					   struct ieee80211_vif *vif)
 {
 	struct mt7615_phy *phy = mt7615_hw_phy(hw);
+	int err;
 
 	if (!test_and_clear_bit(MT76_STATE_ROC, &phy->mt76->state))
 		return 0;
@@ -1099,10 +1137,10 @@ static int mt7615_cancel_remain_on_channel(struct ieee80211_hw *hw,
 	cancel_work_sync(&phy->roc_work);
 
 	mt7615_mutex_acquire(phy->dev);
-	mt7615_mcu_set_roc(phy, vif, NULL, 0);
+	err = mt7615_mcu_set_roc(phy, vif, NULL, 0);
 	mt7615_mutex_release(phy->dev);
 
-	return 0;
+	return err;
 }
 
 static void mt7615_sta_set_decap_offload(struct ieee80211_hw *hw,
-- 
cgit v1.2.3


From 4bec61d9fb9629c21e60cd24a97235ea1f6020ec Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Thu, 4 Mar 2021 21:25:21 +0800
Subject: mt76: mt7663: fix when beacon filter is being applied

HW beacon filter command is being applied until we're in associated state
because the command would rely on the associated access point's beacon
interval and DTIM information.

Fixes: 7124198ab1a4 ("mt76: mt7615: enable beacon filtering by default for offload fw")
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index 4caf2b6b39b1..342e2c5d9766 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -238,8 +238,6 @@ static int mt7615_add_interface(struct ieee80211_hw *hw,
 	ret = mt7615_mcu_add_dev_info(phy, vif, true);
 	if (ret)
 		goto out;
-
-	mt7615_mac_set_beacon_filter(phy, vif, true);
 out:
 	mt7615_mutex_release(dev);
 
@@ -265,7 +263,6 @@ static void mt7615_remove_interface(struct ieee80211_hw *hw,
 
 	mt76_connac_free_pending_tx_skbs(&dev->pm, &msta->wcid);
 
-	mt7615_mac_set_beacon_filter(phy, vif, false);
 	mt7615_mcu_add_dev_info(phy, vif, false);
 
 	rcu_assign_pointer(dev->mt76.wcid[idx], NULL);
@@ -579,6 +576,9 @@ static void mt7615_bss_info_changed(struct ieee80211_hw *hw,
 	if (changed & BSS_CHANGED_ARP_FILTER)
 		mt7615_mcu_update_arp_filter(hw, vif, info);
 
+	if (changed & BSS_CHANGED_ASSOC)
+		mt7615_mac_set_beacon_filter(phy, vif, info->assoc);
+
 	mt7615_mutex_release(dev);
 }
 
-- 
cgit v1.2.3


From 455ae5aabcc72fed7e5c803d59d122415500dc08 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Thu, 4 Mar 2021 21:25:22 +0800
Subject: mt76: mt7663s: make all of packets 4-bytes aligned in sdio tx
 aggregation

Each packet should be padded with the additional zero to become 4-bytes
alignment in sdio tx aggregation.

Fixes: 1522ff731f84 ("mt76: mt7663s: introduce sdio tx aggregation")
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/sdio_txrx.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/sdio_txrx.c b/drivers/net/wireless/mediatek/mt76/mt7615/sdio_txrx.c
index 9fb506f2ace6..37fe65ced4fd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/sdio_txrx.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/sdio_txrx.c
@@ -218,6 +218,7 @@ static int mt7663s_tx_run_queue(struct mt76_dev *dev, struct mt76_queue *q)
 	int qid, err, nframes = 0, len = 0, pse_sz = 0, ple_sz = 0;
 	bool mcu = q == dev->q_mcu[MT_MCUQ_WM];
 	struct mt76_sdio *sdio = &dev->sdio;
+	u8 pad;
 
 	qid = mcu ? ARRAY_SIZE(sdio->xmit_buf) - 1 : q->qid;
 	while (q->first != q->head) {
@@ -234,7 +235,8 @@ static int mt7663s_tx_run_queue(struct mt76_dev *dev, struct mt76_queue *q)
 			goto next;
 		}
 
-		if (len + e->skb->len + 4 > MT76S_XMIT_BUF_SZ)
+		pad = roundup(e->skb->len, 4) - e->skb->len;
+		if (len + e->skb->len + pad + 4 > MT76S_XMIT_BUF_SZ)
 			break;
 
 		if (mt7663s_tx_pick_quota(sdio, mcu, e->buf_sz, &pse_sz,
@@ -252,6 +254,11 @@ static int mt7663s_tx_run_queue(struct mt76_dev *dev, struct mt76_queue *q)
 			len += iter->len;
 			nframes++;
 		}
+
+		if (unlikely(pad)) {
+			memset(sdio->xmit_buf[qid] + len, 0, pad);
+			len += pad;
+		}
 next:
 		q->first = (q->first + 1) % q->ndesc;
 		e->done = true;
-- 
cgit v1.2.3


From 45247a85614b49b07b9dc59a4e6783b17e766ff2 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Thu, 4 Mar 2021 21:25:23 +0800
Subject: mt76: mt7663s: fix the possible device hang in high traffic

Use the additional memory barrier to ensure the skb list up-to-date
between the skb producer and consumer to avoid the invalid skb content
written into sdio controller and then cause device hang due to mcu assert
caught by WR_TIMEOUT_INT.

Fixes: 1522ff731f84 ("mt76: mt7663s: introduce sdio tx aggregation")
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/sdio_txrx.c | 2 ++
 drivers/net/wireless/mediatek/mt76/sdio.c             | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/sdio_txrx.c b/drivers/net/wireless/mediatek/mt76/mt7615/sdio_txrx.c
index 37fe65ced4fd..4393dd21ebbb 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/sdio_txrx.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/sdio_txrx.c
@@ -225,6 +225,8 @@ static int mt7663s_tx_run_queue(struct mt76_dev *dev, struct mt76_queue *q)
 		struct mt76_queue_entry *e = &q->entry[q->first];
 		struct sk_buff *iter;
 
+		smp_rmb();
+
 		if (!test_bit(MT76_STATE_MCU_RUNNING, &dev->phy.state)) {
 			__skb_put_zero(e->skb, 4);
 			err = __mt7663s_xmit_queue(dev, e->skb->data,
diff --git a/drivers/net/wireless/mediatek/mt76/sdio.c b/drivers/net/wireless/mediatek/mt76/sdio.c
index 0b6facb17ff7..a18d2896ee1f 100644
--- a/drivers/net/wireless/mediatek/mt76/sdio.c
+++ b/drivers/net/wireless/mediatek/mt76/sdio.c
@@ -256,6 +256,9 @@ mt76s_tx_queue_skb(struct mt76_dev *dev, struct mt76_queue *q,
 
 	q->entry[q->head].skb = tx_info.skb;
 	q->entry[q->head].buf_sz = len;
+
+	smp_wmb();
+
 	q->head = (q->head + 1) % q->ndesc;
 	q->queued++;
 
-- 
cgit v1.2.3


From dc0a108bb060e3023090f97149e0ca81a2636e36 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Fri, 5 Mar 2021 08:22:14 +0800
Subject: mt76: mt7615: add missing capabilities for DBDC

This improves performance for second phy.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/init.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 88a2ee7e7143..c59043195870 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -368,6 +368,11 @@ mt7615_init_wiphy(struct ieee80211_hw *hw)
 		hw->max_tx_fragments = MT_TXP_MAX_BUF_NUM;
 	else
 		hw->max_tx_fragments = MT_HW_TXP_MAX_BUF_NUM;
+
+	phy->mt76->sband_2g.sband.ht_cap.cap |= IEEE80211_HT_CAP_LDPC_CODING;
+	phy->mt76->sband_5g.sband.ht_cap.cap |= IEEE80211_HT_CAP_LDPC_CODING;
+	phy->mt76->sband_5g.sband.vht_cap.cap |=
+			IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK;
 }
 
 static void
@@ -505,10 +510,6 @@ void mt7615_init_device(struct mt7615_dev *dev)
 
 	mt7615_init_wiphy(hw);
 	dev->pm.idle_timeout = MT7615_PM_TIMEOUT;
-	dev->mphy.sband_2g.sband.ht_cap.cap |= IEEE80211_HT_CAP_LDPC_CODING;
-	dev->mphy.sband_5g.sband.ht_cap.cap |= IEEE80211_HT_CAP_LDPC_CODING;
-	dev->mphy.sband_5g.sband.vht_cap.cap |=
-			IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK;
 	mt7615_cap_dbdc_disable(dev);
 	dev->phy.dfs_state = -1;
 
-- 
cgit v1.2.3


From 782018391748f3547d44c76341ef89a2118ffe32 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Fri, 5 Mar 2021 16:46:41 +0800
Subject: mt76: mt7915: fix possible deadlock while mt7915_register_ext_phy()

ieee80211_register_hw() is called with rtnl_lock held, and this could be
caused lockdep from a work item that's on a workqueue that is flushed
with the rtnl held.

Move mt7915_register_ext_phy() outside the init_work().

Signed-off-by: Evelyn Tsai <evelyn.tsai@mediatek.com>
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/init.c | 7 ++++---
 drivers/net/wireless/mediatek/mt76/mt7915/main.c | 4 ++++
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index 165be6abcc96..2ffadd22c259 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -181,8 +181,6 @@ static void mt7915_mac_init(struct mt7915_dev *dev)
 				       MT_WTBL_UPDATE_ADM_COUNT_CLEAR);
 	for (i = 0; i < 2; i++)
 		mt7915_mac_init_band(dev, i);
-
-	mt7915_mcu_set_rts_thresh(&dev->phy, 0x92b);
 }
 
 static int mt7915_txbf_init(struct mt7915_dev *dev)
@@ -295,7 +293,6 @@ static void mt7915_init_work(struct work_struct *work)
 	mt7915_mac_init(dev);
 	mt7915_init_txpower(dev);
 	mt7915_txbf_init(dev);
-	mt7915_register_ext_phy(dev);
 }
 
 static int mt7915_init_hardware(struct mt7915_dev *dev)
@@ -677,6 +674,10 @@ int mt7915_register_device(struct mt7915_dev *dev)
 
 	ieee80211_queue_work(mt76_hw(dev), &dev->init_work);
 
+	ret = mt7915_register_ext_phy(dev);
+	if (ret)
+		return ret;
+
 	return mt7915_init_debugfs(dev);
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
index 255f87aa1830..cde8c1ce3ae0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
@@ -65,6 +65,10 @@ static int mt7915_start(struct ieee80211_hw *hw)
 		mt7915_mac_enable_nf(dev, 1);
 	}
 
+	ret = mt7915_mcu_set_rts_thresh(phy, 0x92b);
+	if (ret)
+		goto out;
+
 	ret = mt7915_mcu_set_sku_en(phy, true);
 	if (ret)
 		goto out;
-- 
cgit v1.2.3


From a2a6cd54eec4649c7cefd00ddb58f55a98c6d951 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 7 Mar 2021 19:20:45 +0100
Subject: mt76: mt7921: reduce mcu timeouts for suspend, offload and hif_ctrl
 msg

Reduce mcu timeout for the following uni mcu commands:
- MCU_UNI_CMD_SUSPEND
- MCU_UNI_CMD_OFFLOAD
- MCU_UNI_CMD_HIF_CTRL

This is a preliminary patch to introduce chip reset support

Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index b0f8c29f756b..08da6d0f1f36 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -222,8 +222,16 @@ mt7921_mcu_send_message(struct mt76_dev *mdev, struct sk_buff *skb,
 	u32 val;
 	u8 seq;
 
-	/* TODO: make dynamic based on msg type */
-	mdev->mcu.timeout = 20 * HZ;
+	switch (cmd) {
+	case MCU_UNI_CMD_HIF_CTRL:
+	case MCU_UNI_CMD_SUSPEND:
+	case MCU_UNI_CMD_OFFLOAD:
+		mdev->mcu.timeout = HZ / 3;
+		break;
+	default:
+		mdev->mcu.timeout = 3 * HZ;
+		break;
+	}
 
 	seq = ++dev->mt76.mcu.msg_seq & 0xf;
 	if (!seq)
-- 
cgit v1.2.3


From acf337c33de55dbeb25a8c34fa2051d42d77aecc Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 7 Mar 2021 19:20:46 +0100
Subject: mt76: introduce mcu_reset function pointer in mt76_mcu_ops structure

Introduce mcu_reset function pointer in mt76_mcu_ops structure in order
to run hw related reset function for the mcu running on the chipset.
This is a preliminary patch to introduce chip reset for mt7921 devices.

Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mcu.c  | 4 ++++
 drivers/net/wireless/mediatek/mt76/mt76.h | 1 +
 2 files changed, 5 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mcu.c b/drivers/net/wireless/mediatek/mt76/mcu.c
index d3a5e2c4f12a..70624cd07449 100644
--- a/drivers/net/wireless/mediatek/mt76/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mcu.c
@@ -99,6 +99,10 @@ int mt76_mcu_skb_send_and_get_msg(struct mt76_dev *dev, struct sk_buff *skb,
 			dev_kfree_skb(skb);
 	} while (ret == -EAGAIN);
 
+	/* notify driver code to reset the mcu */
+	if (ret == -ETIMEDOUT && dev->mcu_ops->mcu_reset)
+		dev->mcu_ops->mcu_reset(dev);
+
 out:
 	mutex_unlock(&dev->mcu.mutex);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 4d71ca228de2..4945b0430380 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -166,6 +166,7 @@ struct mt76_mcu_ops {
 	int (*mcu_rd_rp)(struct mt76_dev *dev, u32 base,
 			 struct mt76_reg_pair *rp, int len);
 	int (*mcu_restart)(struct mt76_dev *dev);
+	void (*mcu_reset)(struct mt76_dev *dev);
 };
 
 struct mt76_queue_ops {
-- 
cgit v1.2.3


From d32464e68ffc9cbec4960cd06f05bf48b3b3703f Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 7 Mar 2021 19:20:47 +0100
Subject: mt76: mt7921: introduce mt7921_run_firmware utility routine.

This is a preliminary patch to introduce chip reset for mt7921 devices.

Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.c    | 32 +++++++++++++---------
 drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h |  1 +
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index 08da6d0f1f36..6819c5dd7e94 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -927,6 +927,24 @@ int mt7921_mcu_fw_log_2_host(struct mt7921_dev *dev, u8 ctrl)
 				 sizeof(data), false);
 }
 
+int mt7921_run_firmware(struct mt7921_dev *dev)
+{
+	int err;
+
+	err = mt7921_driver_own(dev);
+	if (err)
+		return err;
+
+	err = mt7921_load_firmware(dev);
+	if (err)
+		return err;
+
+	set_bit(MT76_STATE_MCU_RUNNING, &dev->mphy.state);
+	mt7921_mcu_fw_log_2_host(dev, 1);
+
+	return 0;
+}
+
 int mt7921_mcu_init(struct mt7921_dev *dev)
 {
 	static const struct mt76_mcu_ops mt7921_mcu_ops = {
@@ -935,22 +953,10 @@ int mt7921_mcu_init(struct mt7921_dev *dev)
 		.mcu_parse_response = mt7921_mcu_parse_response,
 		.mcu_restart = mt7921_mcu_restart,
 	};
-	int ret;
 
 	dev->mt76.mcu_ops = &mt7921_mcu_ops;
 
-	ret = mt7921_driver_own(dev);
-	if (ret)
-		return ret;
-
-	ret = mt7921_load_firmware(dev);
-	if (ret)
-		return ret;
-
-	set_bit(MT76_STATE_MCU_RUNNING, &dev->mphy.state);
-	mt7921_mcu_fw_log_2_host(dev, 1);
-
-	return 0;
+	return mt7921_run_firmware(dev);
 }
 
 void mt7921_mcu_exit(struct mt7921_dev *dev)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index 2979d06ee0ad..72dc2ba8fd40 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -220,6 +220,7 @@ void mt7921_eeprom_init_sku(struct mt7921_dev *dev);
 int mt7921_dma_init(struct mt7921_dev *dev);
 void mt7921_dma_prefetch(struct mt7921_dev *dev);
 void mt7921_dma_cleanup(struct mt7921_dev *dev);
+int mt7921_run_firmware(struct mt7921_dev *dev);
 int mt7921_mcu_init(struct mt7921_dev *dev);
 int mt7921_mcu_add_bss_info(struct mt7921_phy *phy,
 			    struct ieee80211_vif *vif, int enable);
-- 
cgit v1.2.3


From 1f7396acfef4691b8cf4a3e631fd3f59d779c0f2 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 7 Mar 2021 19:20:48 +0100
Subject: mt76: mt7921: introduce __mt7921_start utility routine

This is a preliminary patch to introduce mt7921 chip reset support.

Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/main.c   | 35 +++++++++++++---------
 drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h |  1 +
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 043dd77bdbe7..2b9fa4bbb21d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -169,33 +169,40 @@ void mt7921_set_stream_he_caps(struct mt7921_phy *phy)
 	}
 }
 
-static int mt7921_start(struct ieee80211_hw *hw)
+int __mt7921_start(struct mt7921_phy *phy)
 {
-	struct mt7921_dev *dev = mt7921_hw_dev(hw);
-	struct mt7921_phy *phy = mt7921_hw_phy(hw);
+	struct mt76_phy *mphy = phy->mt76;
 	int err;
 
-	mt7921_mutex_acquire(dev);
-
-	err = mt76_connac_mcu_set_mac_enable(&dev->mt76, 0, true, false);
+	err = mt76_connac_mcu_set_mac_enable(mphy->dev, 0, true, false);
 	if (err)
-		goto out;
+		return err;
 
-	err = mt76_connac_mcu_set_channel_domain(phy->mt76);
+	err = mt76_connac_mcu_set_channel_domain(mphy);
 	if (err)
-		goto out;
+		return err;
 
 	err = mt7921_mcu_set_chan_info(phy, MCU_EXT_CMD_SET_RX_PATH);
 	if (err)
-		goto out;
+		return err;
 
 	mt7921_mac_reset_counters(phy);
-	set_bit(MT76_STATE_RUNNING, &phy->mt76->state);
+	set_bit(MT76_STATE_RUNNING, &mphy->state);
 
-	ieee80211_queue_delayed_work(hw, &phy->mt76->mac_work,
+	ieee80211_queue_delayed_work(mphy->hw, &mphy->mac_work,
 				     MT7921_WATCHDOG_TIME);
-out:
-	mt7921_mutex_release(dev);
+
+	return 0;
+}
+
+static int mt7921_start(struct ieee80211_hw *hw)
+{
+	struct mt7921_phy *phy = mt7921_hw_phy(hw);
+	int err;
+
+	mt7921_mutex_acquire(phy->dev);
+	err = __mt7921_start(phy);
+	mt7921_mutex_release(phy->dev);
 
 	return err;
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index 72dc2ba8fd40..c1404a7f9ffb 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -209,6 +209,7 @@ extern struct pci_driver mt7921_pci_driver;
 
 u32 mt7921_reg_map(struct mt7921_dev *dev, u32 addr);
 
+int __mt7921_start(struct mt7921_phy *phy);
 int mt7921_register_device(struct mt7921_dev *dev);
 void mt7921_unregister_device(struct mt7921_dev *dev);
 int mt7921_eeprom_init(struct mt7921_dev *dev);
-- 
cgit v1.2.3


From 3990465db6829c91e8ebfde51ba2d98885020249 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 7 Mar 2021 19:20:49 +0100
Subject: mt76: dma: introduce mt76_dma_queue_reset routine

Introduce mt76_dma_queue_reset utility routine to reset a given hw
queue. This is a preliminary patch to introduce mt7921 chip reset
support.

Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/dma.c  | 46 +++++++++++++++++++------------
 drivers/net/wireless/mediatek/mt76/mt76.h |  3 ++
 2 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index 6273b2cfce4f..8d4a276130cf 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -79,13 +79,38 @@ mt76_free_pending_txwi(struct mt76_dev *dev)
 	local_bh_enable();
 }
 
+static void
+mt76_dma_sync_idx(struct mt76_dev *dev, struct mt76_queue *q)
+{
+	writel(q->desc_dma, &q->regs->desc_base);
+	writel(q->ndesc, &q->regs->ring_size);
+	q->head = readl(&q->regs->dma_idx);
+	q->tail = q->head;
+}
+
+static void
+mt76_dma_queue_reset(struct mt76_dev *dev, struct mt76_queue *q)
+{
+	int i;
+
+	if (!q)
+		return;
+
+	/* clear descriptors */
+	for (i = 0; i < q->ndesc; i++)
+		q->desc[i].ctrl = cpu_to_le32(MT_DMA_CTL_DMA_DONE);
+
+	writel(0, &q->regs->cpu_idx);
+	writel(0, &q->regs->dma_idx);
+	mt76_dma_sync_idx(dev, q);
+}
+
 static int
 mt76_dma_alloc_queue(struct mt76_dev *dev, struct mt76_queue *q,
 		     int idx, int n_desc, int bufsize,
 		     u32 ring_base)
 {
 	int size;
-	int i;
 
 	spin_lock_init(&q->lock);
 	spin_lock_init(&q->cleanup_lock);
@@ -105,14 +130,7 @@ mt76_dma_alloc_queue(struct mt76_dev *dev, struct mt76_queue *q,
 	if (!q->entry)
 		return -ENOMEM;
 
-	/* clear descriptors */
-	for (i = 0; i < q->ndesc; i++)
-		q->desc[i].ctrl = cpu_to_le32(MT_DMA_CTL_DMA_DONE);
-
-	writel(q->desc_dma, &q->regs->desc_base);
-	writel(0, &q->regs->cpu_idx);
-	writel(0, &q->regs->dma_idx);
-	writel(q->ndesc, &q->regs->ring_size);
+	mt76_dma_queue_reset(dev, q);
 
 	return 0;
 }
@@ -201,15 +219,6 @@ mt76_dma_tx_cleanup_idx(struct mt76_dev *dev, struct mt76_queue *q, int idx,
 	memset(e, 0, sizeof(*e));
 }
 
-static void
-mt76_dma_sync_idx(struct mt76_dev *dev, struct mt76_queue *q)
-{
-	writel(q->desc_dma, &q->regs->desc_base);
-	writel(q->ndesc, &q->regs->ring_size);
-	q->head = readl(&q->regs->dma_idx);
-	q->tail = q->head;
-}
-
 static void
 mt76_dma_kick_queue(struct mt76_dev *dev, struct mt76_queue *q)
 {
@@ -642,6 +651,7 @@ mt76_dma_init(struct mt76_dev *dev)
 static const struct mt76_queue_ops mt76_dma_ops = {
 	.init = mt76_dma_init,
 	.alloc = mt76_dma_alloc_queue,
+	.reset_q = mt76_dma_queue_reset,
 	.tx_queue_skb_raw = mt76_dma_tx_queue_skb_raw,
 	.tx_queue_skb = mt76_dma_tx_queue_skb,
 	.tx_cleanup = mt76_dma_tx_cleanup,
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 4945b0430380..b21a72467927 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -192,6 +192,8 @@ struct mt76_queue_ops {
 			   bool flush);
 
 	void (*kick)(struct mt76_dev *dev, struct mt76_queue *q);
+
+	void (*reset_q)(struct mt76_dev *dev, struct mt76_queue *q);
 };
 
 enum mt76_wcid_flags {
@@ -796,6 +798,7 @@ static inline u16 mt76_rev(struct mt76_dev *dev)
 #define mt76_queue_rx_reset(dev, ...)	(dev)->mt76.queue_ops->rx_reset(&((dev)->mt76), __VA_ARGS__)
 #define mt76_queue_tx_cleanup(dev, ...)        (dev)->mt76.queue_ops->tx_cleanup(&((dev)->mt76), __VA_ARGS__)
 #define mt76_queue_kick(dev, ...)	(dev)->mt76.queue_ops->kick(&((dev)->mt76), __VA_ARGS__)
+#define mt76_queue_reset(dev, ...)	(dev)->mt76.queue_ops->reset_q(&((dev)->mt76), __VA_ARGS__)
 
 #define mt76_for_each_q_rx(dev, i)	\
 	for (i = 0; i < ARRAY_SIZE((dev)->q_rx) && \
-- 
cgit v1.2.3


From c001df978e4cb88975147ddd2c829c9e12a55076 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 7 Mar 2021 19:20:50 +0100
Subject: mt76: dma: export mt76_dma_rx_cleanup routine

Export mt76_dma_rx_cleanup routine in mt76_queue_ops data structure.
This is a preliminary patch to introduce mt7921 chip reset support.

Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/dma.c  | 1 +
 drivers/net/wireless/mediatek/mt76/mt76.h | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index 8d4a276130cf..b87b46538b95 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -655,6 +655,7 @@ static const struct mt76_queue_ops mt76_dma_ops = {
 	.tx_queue_skb_raw = mt76_dma_tx_queue_skb_raw,
 	.tx_queue_skb = mt76_dma_tx_queue_skb,
 	.tx_cleanup = mt76_dma_tx_cleanup,
+	.rx_cleanup = mt76_dma_rx_cleanup,
 	.rx_reset = mt76_dma_rx_reset,
 	.kick = mt76_dma_kick_queue,
 };
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index b21a72467927..b09b0f5ffd6d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -191,6 +191,8 @@ struct mt76_queue_ops {
 	void (*tx_cleanup)(struct mt76_dev *dev, struct mt76_queue *q,
 			   bool flush);
 
+	void (*rx_cleanup)(struct mt76_dev *dev, struct mt76_queue *q);
+
 	void (*kick)(struct mt76_dev *dev, struct mt76_queue *q);
 
 	void (*reset_q)(struct mt76_dev *dev, struct mt76_queue *q);
@@ -796,7 +798,8 @@ static inline u16 mt76_rev(struct mt76_dev *dev)
 #define mt76_tx_queue_skb_raw(dev, ...)	(dev)->mt76.queue_ops->tx_queue_skb_raw(&((dev)->mt76), __VA_ARGS__)
 #define mt76_tx_queue_skb(dev, ...)	(dev)->mt76.queue_ops->tx_queue_skb(&((dev)->mt76), __VA_ARGS__)
 #define mt76_queue_rx_reset(dev, ...)	(dev)->mt76.queue_ops->rx_reset(&((dev)->mt76), __VA_ARGS__)
-#define mt76_queue_tx_cleanup(dev, ...)        (dev)->mt76.queue_ops->tx_cleanup(&((dev)->mt76), __VA_ARGS__)
+#define mt76_queue_tx_cleanup(dev, ...)	(dev)->mt76.queue_ops->tx_cleanup(&((dev)->mt76), __VA_ARGS__)
+#define mt76_queue_rx_cleanup(dev, ...)	(dev)->mt76.queue_ops->rx_cleanup(&((dev)->mt76), __VA_ARGS__)
 #define mt76_queue_kick(dev, ...)	(dev)->mt76.queue_ops->kick(&((dev)->mt76), __VA_ARGS__)
 #define mt76_queue_reset(dev, ...)	(dev)->mt76.queue_ops->reset_q(&((dev)->mt76), __VA_ARGS__)
 
-- 
cgit v1.2.3


From 0c1ce988460765ece1ba8eacd00533eefb6e666a Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 7 Mar 2021 19:20:51 +0100
Subject: mt76: mt7921: add wifi reset support

Introduce wifi chip reset support for mt7921 device to recover mcu
hangs.

Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/init.c   |   3 +-
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c    | 201 +++++++++++++++------
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.c    |   3 +
 drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h |   4 +-
 drivers/net/wireless/mediatek/mt76/mt7921/regs.h   |   4 +
 5 files changed, 155 insertions(+), 60 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index 4070f7156aa5..262c9d150bf0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -143,7 +143,7 @@ mt7921_mac_init_band(struct mt7921_dev *dev, u8 band)
 	mt76_clear(dev, MT_DMA_DCR0(band), MT_DMA_DCR0_RXD_G5_EN);
 }
 
-static void mt7921_mac_init(struct mt7921_dev *dev)
+void mt7921_mac_init(struct mt7921_dev *dev)
 {
 	int i;
 
@@ -233,7 +233,6 @@ int mt7921_register_device(struct mt7921_dev *dev)
 	INIT_LIST_HEAD(&dev->sta_poll_list);
 	spin_lock_init(&dev->sta_poll_lock);
 
-	init_waitqueue_head(&dev->reset_wait);
 	INIT_WORK(&dev->reset_work, mt7921_mac_reset_work);
 
 	ret = mt7921_init_hardware(dev);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 3e96bcfda45b..05971ea9f8fa 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1184,43 +1184,77 @@ void mt7921_update_channel(struct mt76_dev *mdev)
 	mt76_connac_power_save_sched(&dev->mphy, &dev->pm);
 }
 
-static bool
-mt7921_wait_reset_state(struct mt7921_dev *dev, u32 state)
+static int
+mt7921_wfsys_reset(struct mt7921_dev *dev)
 {
-	bool ret;
+	mt76_set(dev, 0x70002600, BIT(0));
+	msleep(200);
+	mt76_clear(dev, 0x70002600, BIT(0));
 
-	ret = wait_event_timeout(dev->reset_wait,
-				 (READ_ONCE(dev->reset_state) & state),
-				 MT7921_RESET_TIMEOUT);
-
-	WARN(!ret, "Timeout waiting for MCU reset state %x\n", state);
-	return ret;
+	return __mt76_poll_msec(&dev->mt76, MT_WFSYS_SW_RST_B,
+				WFSYS_SW_INIT_DONE, WFSYS_SW_INIT_DONE, 500);
 }
 
 static void
-mt7921_dma_reset(struct mt7921_phy *phy)
+mt7921_dma_reset(struct mt7921_dev *dev)
 {
-	struct mt7921_dev *dev = phy->dev;
 	int i;
 
+	/* reset */
+	mt76_clear(dev, MT_WFDMA0_RST,
+		   MT_WFDMA0_RST_DMASHDL_ALL_RST | MT_WFDMA0_RST_LOGIC_RST);
+
+	mt76_set(dev, MT_WFDMA0_RST,
+		 MT_WFDMA0_RST_DMASHDL_ALL_RST | MT_WFDMA0_RST_LOGIC_RST);
+
+	/* disable WFDMA0 */
 	mt76_clear(dev, MT_WFDMA0_GLO_CFG,
-		   MT_WFDMA0_GLO_CFG_TX_DMA_EN | MT_WFDMA0_GLO_CFG_RX_DMA_EN);
+		   MT_WFDMA0_GLO_CFG_TX_DMA_EN | MT_WFDMA0_GLO_CFG_RX_DMA_EN |
+		   MT_WFDMA0_GLO_CFG_CSR_DISP_BASE_PTR_CHAIN_EN |
+		   MT_WFDMA0_GLO_CFG_OMIT_TX_INFO |
+		   MT_WFDMA0_GLO_CFG_OMIT_RX_INFO |
+		   MT_WFDMA0_GLO_CFG_OMIT_RX_INFO_PFET2);
 
-	usleep_range(1000, 2000);
+	mt76_poll(dev, MT_WFDMA0_GLO_CFG,
+		  MT_WFDMA0_GLO_CFG_TX_DMA_BUSY |
+		  MT_WFDMA0_GLO_CFG_RX_DMA_BUSY, 0, 1000);
 
-	mt76_queue_tx_cleanup(dev, dev->mt76.q_mcu[MT_MCUQ_WA], true);
+	/* reset hw queues */
 	for (i = 0; i < __MT_TXQ_MAX; i++)
-		mt76_queue_tx_cleanup(dev, phy->mt76->q_tx[i], true);
+		mt76_queue_reset(dev, dev->mphy.q_tx[i]);
 
-	mt76_for_each_q_rx(&dev->mt76, i) {
-		mt76_queue_rx_reset(dev, i);
-	}
+	for (i = 0; i < __MT_MCUQ_MAX; i++)
+		mt76_queue_reset(dev, dev->mt76.q_mcu[i]);
+
+	mt76_for_each_q_rx(&dev->mt76, i)
+		mt76_queue_reset(dev, &dev->mt76.q_rx[i]);
 
-	/* re-init prefetch settings after reset */
+	/* configure perfetch settings */
 	mt7921_dma_prefetch(dev);
 
+	/* reset dma idx */
+	mt76_wr(dev, MT_WFDMA0_RST_DTX_PTR, ~0);
+
+	/* configure delay interrupt */
+	mt76_wr(dev, MT_WFDMA0_PRI_DLY_INT_CFG0, 0);
+
+	mt76_set(dev, MT_WFDMA0_GLO_CFG,
+		 MT_WFDMA0_GLO_CFG_TX_WB_DDONE |
+		 MT_WFDMA0_GLO_CFG_FIFO_LITTLE_ENDIAN |
+		 MT_WFDMA0_GLO_CFG_CLK_GAT_DIS |
+		 MT_WFDMA0_GLO_CFG_OMIT_TX_INFO |
+		 MT_WFDMA0_GLO_CFG_CSR_DISP_BASE_PTR_CHAIN_EN |
+		 MT_WFDMA0_GLO_CFG_OMIT_RX_INFO_PFET2);
+
 	mt76_set(dev, MT_WFDMA0_GLO_CFG,
 		 MT_WFDMA0_GLO_CFG_TX_DMA_EN | MT_WFDMA0_GLO_CFG_RX_DMA_EN);
+
+	mt76_set(dev, 0x54000120, BIT(1));
+
+	/* enable interrupts for TX/RX rings */
+	mt7921_irq_enable(dev,
+			  MT_INT_RX_DONE_ALL | MT_INT_TX_DONE_ALL |
+			  MT_INT_MCU_CMD);
 }
 
 void mt7921_tx_token_put(struct mt7921_dev *dev)
@@ -1244,71 +1278,125 @@ void mt7921_tx_token_put(struct mt7921_dev *dev)
 	idr_destroy(&dev->token);
 }
 
-/* system error recovery */
-void mt7921_mac_reset_work(struct work_struct *work)
+static void
+mt7921_vif_connect_iter(void *priv, u8 *mac,
+			struct ieee80211_vif *vif)
 {
-	struct mt7921_dev *dev;
+	struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv;
+	struct mt7921_dev *dev = mvif->phy->dev;
 
-	dev = container_of(work, struct mt7921_dev, reset_work);
+	ieee80211_disconnect(vif, true);
 
-	if (!(READ_ONCE(dev->reset_state) & MT_MCU_CMD_STOP_DMA))
-		return;
+	mt76_connac_mcu_uni_add_dev(&dev->mphy, vif, &mvif->sta.wcid, true);
+	mt7921_mcu_set_tx(dev, vif);
+}
+
+static int
+mt7921_mac_reset(struct mt7921_dev *dev)
+{
+	int i, err;
+
+	mt76_connac_free_pending_tx_skbs(&dev->pm, NULL);
 
-	ieee80211_stop_queues(mt76_hw(dev));
+	mt76_wr(dev, MT_WFDMA0_HOST_INT_ENA, 0);
+	mt76_wr(dev, MT_PCIE_MAC_INT_ENABLE, 0x0);
 
-	set_bit(MT76_RESET, &dev->mphy.state);
 	set_bit(MT76_MCU_RESET, &dev->mphy.state);
 	wake_up(&dev->mt76.mcu.wait);
-	cancel_delayed_work_sync(&dev->mphy.mac_work);
+	skb_queue_purge(&dev->mt76.mcu.res_q);
 
-	/* lock/unlock all queues to ensure that no tx is pending */
 	mt76_txq_schedule_all(&dev->mphy);
 
 	mt76_worker_disable(&dev->mt76.tx_worker);
-	napi_disable(&dev->mt76.napi[0]);
-	napi_disable(&dev->mt76.napi[1]);
-	napi_disable(&dev->mt76.napi[2]);
+	napi_disable(&dev->mt76.napi[MT_RXQ_MAIN]);
+	napi_disable(&dev->mt76.napi[MT_RXQ_MCU]);
+	napi_disable(&dev->mt76.napi[MT_RXQ_MCU_WA]);
 	napi_disable(&dev->mt76.tx_napi);
 
-	mt7921_mutex_acquire(dev);
-
-	mt76_wr(dev, MT_MCU_INT_EVENT, MT_MCU_INT_EVENT_DMA_STOPPED);
-
 	mt7921_tx_token_put(dev);
 	idr_init(&dev->token);
 
-	if (mt7921_wait_reset_state(dev, MT_MCU_CMD_RESET_DONE)) {
-		mt7921_dma_reset(&dev->phy);
+	/* clean up hw queues */
+	for (i = 0; i < ARRAY_SIZE(dev->mt76.phy.q_tx); i++)
+		mt76_queue_tx_cleanup(dev, dev->mphy.q_tx[i], true);
 
-		mt76_wr(dev, MT_MCU_INT_EVENT, MT_MCU_INT_EVENT_DMA_INIT);
-		mt7921_wait_reset_state(dev, MT_MCU_CMD_RECOVERY_DONE);
-	}
+	for (i = 0; i < ARRAY_SIZE(dev->mt76.q_mcu); i++)
+		mt76_queue_tx_cleanup(dev, dev->mt76.q_mcu[i], true);
 
-	clear_bit(MT76_MCU_RESET, &dev->mphy.state);
-	clear_bit(MT76_RESET, &dev->mphy.state);
+	mt76_for_each_q_rx(&dev->mt76, i)
+		mt76_queue_rx_cleanup(dev, &dev->mt76.q_rx[i]);
+
+	mt7921_wfsys_reset(dev);
+	mt7921_dma_reset(dev);
+
+	mt76_for_each_q_rx(&dev->mt76, i) {
+		mt76_queue_rx_reset(dev, i);
+		napi_enable(&dev->mt76.napi[i]);
+		napi_schedule(&dev->mt76.napi[i]);
+	}
 
-	mt76_worker_enable(&dev->mt76.tx_worker);
 	napi_enable(&dev->mt76.tx_napi);
 	napi_schedule(&dev->mt76.tx_napi);
+	mt76_worker_enable(&dev->mt76.tx_worker);
 
-	napi_enable(&dev->mt76.napi[0]);
-	napi_schedule(&dev->mt76.napi[0]);
+	clear_bit(MT76_MCU_RESET, &dev->mphy.state);
 
-	napi_enable(&dev->mt76.napi[1]);
-	napi_schedule(&dev->mt76.napi[1]);
+	mt76_wr(dev, MT_WFDMA0_HOST_INT_ENA, 0);
+	mt76_wr(dev, MT_PCIE_MAC_INT_ENABLE, 0xff);
+	mt7921_irq_enable(dev,
+			  MT_INT_RX_DONE_ALL | MT_INT_TX_DONE_ALL |
+			  MT_INT_MCU_CMD);
 
-	napi_enable(&dev->mt76.napi[2]);
-	napi_schedule(&dev->mt76.napi[2]);
+	err = mt7921_run_firmware(dev);
+	if (err)
+		return err;
 
-	ieee80211_wake_queues(mt76_hw(dev));
+	err = mt7921_mcu_set_eeprom(dev);
+	if (err)
+		return err;
 
-	mt76_wr(dev, MT_MCU_INT_EVENT, MT_MCU_INT_EVENT_RESET_DONE);
-	mt7921_wait_reset_state(dev, MT_MCU_CMD_NORMAL_STATE);
+	mt7921_mac_init(dev);
+	return __mt7921_start(&dev->phy);
+}
 
-	mt7921_mutex_release(dev);
+/* system error recovery */
+void mt7921_mac_reset_work(struct work_struct *work)
+{
+	struct ieee80211_hw *hw;
+	struct mt7921_dev *dev;
+	int i;
 
-	ieee80211_queue_delayed_work(mt76_hw(dev), &dev->mphy.mac_work,
-				     MT7921_WATCHDOG_TIME);
+	dev = container_of(work, struct mt7921_dev, reset_work);
+	hw = mt76_hw(dev);
+
+	dev_err(dev->mt76.dev, "chip reset\n");
+	ieee80211_stop_queues(hw);
+
+	cancel_delayed_work_sync(&dev->mphy.mac_work);
+	cancel_delayed_work_sync(&dev->pm.ps_work);
+	cancel_work_sync(&dev->pm.wake_work);
+
+	mutex_lock(&dev->mt76.mutex);
+	for (i = 0; i < 10; i++) {
+		if (!mt7921_mac_reset(dev))
+			break;
+	}
+	mutex_unlock(&dev->mt76.mutex);
+
+	if (i == 10)
+		dev_err(dev->mt76.dev, "chip reset failed\n");
+
+	ieee80211_wake_queues(hw);
+	ieee80211_iterate_active_interfaces(hw,
+					    IEEE80211_IFACE_ITER_RESUME_ALL,
+					    mt7921_vif_connect_iter, 0);
+}
+
+void mt7921_reset(struct mt76_dev *mdev)
+{
+	struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76);
+
+	queue_work(dev->mt76.wq, &dev->reset_work);
 }
 
 static void
@@ -1505,4 +1593,5 @@ void mt7921_coredump_work(struct work_struct *work)
 	}
 	dev_coredumpv(dev->mt76.dev, dump, MT76_CONNAC_COREDUMP_SZ,
 		      GFP_KERNEL);
+	mt7921_reset(&dev->mt76);
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index 6819c5dd7e94..98b48b26a6fe 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -952,6 +952,7 @@ int mt7921_mcu_init(struct mt7921_dev *dev)
 		.mcu_skb_send_msg = mt7921_mcu_send_message,
 		.mcu_parse_response = mt7921_mcu_parse_response,
 		.mcu_restart = mt7921_mcu_restart,
+		.mcu_reset = mt7921_reset,
 	};
 
 	dev->mt76.mcu_ops = &mt7921_mcu_ops;
@@ -1269,6 +1270,7 @@ int mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev)
 
 	if (i == MT7921_DRV_OWN_RETRY_COUNT) {
 		dev_err(dev->mt76.dev, "driver own failed\n");
+		mt7921_reset(&dev->mt76);
 		return -EIO;
 	}
 
@@ -1295,6 +1297,7 @@ int mt7921_mcu_fw_pmctrl(struct mt7921_dev *dev)
 
 	if (i == MT7921_DRV_OWN_RETRY_COUNT) {
 		dev_err(dev->mt76.dev, "firmware own failed\n");
+		mt7921_reset(&dev->mt76);
 		return -EIO;
 	}
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index c1404a7f9ffb..df3c9730876c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -151,8 +151,6 @@ struct mt7921_dev {
 
 	struct work_struct init_work;
 	struct work_struct reset_work;
-	wait_queue_head_t reset_wait;
-	u32 reset_state;
 
 	struct list_head sta_poll_list;
 	spinlock_t sta_poll_lock;
@@ -283,6 +281,7 @@ mt7921_l1_rmw(struct mt7921_dev *dev, u32 addr, u32 mask, u32 val)
 #define mt7921_l1_set(dev, addr, val)	mt7921_l1_rmw(dev, addr, 0, val)
 #define mt7921_l1_clear(dev, addr, val)	mt7921_l1_rmw(dev, addr, val, 0)
 
+void mt7921_mac_init(struct mt7921_dev *dev);
 bool mt7921_mac_wtbl_update(struct mt7921_dev *dev, int idx, u32 mask);
 void mt7921_mac_reset_counters(struct mt7921_phy *phy);
 void mt7921_mac_write_txwi(struct mt7921_dev *dev, __le32 *txwi,
@@ -298,6 +297,7 @@ void mt7921_mac_sta_remove(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 			   struct ieee80211_sta *sta);
 void mt7921_mac_work(struct work_struct *work);
 void mt7921_mac_reset_work(struct work_struct *work);
+void mt7921_reset(struct mt76_dev *mdev);
 int mt7921_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
 			  enum mt76_txq_id qid, struct mt76_wcid *wcid,
 			  struct ieee80211_sta *sta,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/regs.h b/drivers/net/wireless/mediatek/mt76/mt7921/regs.h
index b66e15c8ab70..035fa472fa7d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/regs.h
@@ -418,6 +418,10 @@
 #define PCIE_LPCR_HOST_CLR_OWN		BIT(1)
 #define PCIE_LPCR_HOST_SET_OWN		BIT(0)
 
+#define MT_WFSYS_SW_RST_B		0x18000140
+#define WFSYS_SW_RST_B			BIT(0)
+#define WFSYS_SW_INIT_DONE		BIT(4)
+
 #define MT_CONN_ON_MISC			0x7c0600f0
 #define MT_TOP_MISC2_FW_N9_RDY		GENMASK(1, 0)
 
-- 
cgit v1.2.3


From de29d0afebad775910b428816427b55ba2bea50b Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 11 Mar 2021 10:46:21 +0100
Subject: mt76: mt7921: remove leftovers from dbdc configuration

Remove leftovers from dbdc configuration since mt7921
does not support it.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/main.c | 28 ++++++++++--------------
 1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 2b9fa4bbb21d..1c4c26a7b5ce 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -459,7 +459,6 @@ static int mt7921_config(struct ieee80211_hw *hw, u32 changed)
 {
 	struct mt7921_dev *dev = mt7921_hw_dev(hw);
 	struct mt7921_phy *phy = mt7921_hw_phy(hw);
-	bool band = phy != &dev->phy;
 	int ret;
 
 	if (changed & IEEE80211_CONF_CHANGE_CHANNEL) {
@@ -480,9 +479,9 @@ static int mt7921_config(struct ieee80211_hw *hw, u32 changed)
 		else
 			phy->rxfilter &= ~MT_WF_RFCR_DROP_OTHER_UC;
 
-		mt76_rmw_field(dev, MT_DMA_DCR0(band), MT_DMA_DCR0_RXD_G5_EN,
+		mt76_rmw_field(dev, MT_DMA_DCR0(0), MT_DMA_DCR0_RXD_G5_EN,
 			       enabled);
-		mt76_wr(dev, MT_WF_RFCR(band), phy->rxfilter);
+		mt76_wr(dev, MT_WF_RFCR(0), phy->rxfilter);
 	}
 
 	mt7921_mutex_release(dev);
@@ -511,7 +510,6 @@ static void mt7921_configure_filter(struct ieee80211_hw *hw,
 {
 	struct mt7921_dev *dev = mt7921_hw_dev(hw);
 	struct mt7921_phy *phy = mt7921_hw_phy(hw);
-	bool band = phy != &dev->phy;
 	u32 ctl_flags = MT_WF_RFCR1_DROP_ACK |
 			MT_WF_RFCR1_DROP_BF_POLL |
 			MT_WF_RFCR1_DROP_BA |
@@ -551,12 +549,12 @@ static void mt7921_configure_filter(struct ieee80211_hw *hw,
 			     MT_WF_RFCR_DROP_NDPA);
 
 	*total_flags = flags;
-	mt76_wr(dev, MT_WF_RFCR(band), phy->rxfilter);
+	mt76_wr(dev, MT_WF_RFCR(0), phy->rxfilter);
 
 	if (*total_flags & FIF_CONTROL)
-		mt76_clear(dev, MT_WF_RFCR1(band), ctl_flags);
+		mt76_clear(dev, MT_WF_RFCR1(0), ctl_flags);
 	else
-		mt76_set(dev, MT_WF_RFCR1(band), ctl_flags);
+		mt76_set(dev, MT_WF_RFCR1(0), ctl_flags);
 
 	mt7921_mutex_release(dev);
 }
@@ -866,9 +864,7 @@ mt7921_get_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
 {
 	struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv;
 	struct mt7921_dev *dev = mt7921_hw_dev(hw);
-	struct mt7921_phy *phy = mt7921_hw_phy(hw);
 	u8 omac_idx = mvif->mt76.omac_idx;
-	bool band = phy != &dev->phy;
 	union {
 		u64 t64;
 		u32 t32[2];
@@ -879,9 +875,9 @@ mt7921_get_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
 
 	n = omac_idx > HW_BSSID_MAX ? HW_BSSID_0 : omac_idx;
 	/* TSF software read */
-	mt76_set(dev, MT_LPON_TCR(band, n), MT_LPON_TCR_SW_MODE);
-	tsf.t32[0] = mt76_rr(dev, MT_LPON_UTTR0(band));
-	tsf.t32[1] = mt76_rr(dev, MT_LPON_UTTR1(band));
+	mt76_set(dev, MT_LPON_TCR(0, n), MT_LPON_TCR_SW_MODE);
+	tsf.t32[0] = mt76_rr(dev, MT_LPON_UTTR0(0));
+	tsf.t32[1] = mt76_rr(dev, MT_LPON_UTTR1(0));
 
 	mt7921_mutex_release(dev);
 
@@ -894,9 +890,7 @@ mt7921_set_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 {
 	struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv;
 	struct mt7921_dev *dev = mt7921_hw_dev(hw);
-	struct mt7921_phy *phy = mt7921_hw_phy(hw);
 	u8 omac_idx = mvif->mt76.omac_idx;
-	bool band = phy != &dev->phy;
 	union {
 		u64 t64;
 		u32 t32[2];
@@ -906,10 +900,10 @@ mt7921_set_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 	mt7921_mutex_acquire(dev);
 
 	n = omac_idx > HW_BSSID_MAX ? HW_BSSID_0 : omac_idx;
-	mt76_wr(dev, MT_LPON_UTTR0(band), tsf.t32[0]);
-	mt76_wr(dev, MT_LPON_UTTR1(band), tsf.t32[1]);
+	mt76_wr(dev, MT_LPON_UTTR0(0), tsf.t32[0]);
+	mt76_wr(dev, MT_LPON_UTTR1(0), tsf.t32[1]);
 	/* TSF software overwrite */
-	mt76_set(dev, MT_LPON_TCR(band, n), MT_LPON_TCR_SW_WRITE);
+	mt76_set(dev, MT_LPON_TCR(0, n), MT_LPON_TCR_SW_WRITE);
 
 	mt7921_mutex_release(dev);
 }
-- 
cgit v1.2.3


From 1921b8925c6f2a171af6294bba6af733da2409b9 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 12 Mar 2021 17:29:49 +0000
Subject: mt76: mt7921: remove redundant check on type

Currently in the switch statement case where type is
NL80211_IFTYPE_STATION there is a check to see if type
is not NL80211_IFTYPE_STATION.  This check is always false
and is redundant dead code that can be removed.

Addresses-Coverity: ("Logically dead code")
Fixes: e0f9fdda81bd ("mt76: mt7921: add ieee80211_ops")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/main.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 1c4c26a7b5ce..dd22561ae800 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -240,9 +240,6 @@ static int get_omac_idx(enum nl80211_iftype type, u64 mask)
 		if (i)
 			return i - 1;
 
-		if (type != NL80211_IFTYPE_STATION)
-			break;
-
 		/* next, try to find a free repeater entry for the sta */
 		i = get_free_idx(mask >> REPEATER_BSSID_START, 0,
 				 REPEATER_BSSID_MAX - REPEATER_BSSID_START);
-- 
cgit v1.2.3


From 12f4be0e6a68650dbc9b73a047748431b2253904 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 15 Mar 2021 09:21:10 +0100
Subject: mt76: mt7921: remove duplicated macros in mcu.h

Remove mcu definitions already available in mt76_connac_mcu.h

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.h | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h
index fd67838cb565..af8b42983a00 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h
@@ -177,25 +177,6 @@ enum {
 	MCU_PHY_STATE_OFDMLQ_CNINFO,
 };
 
-#define STA_TYPE_STA			BIT(0)
-#define STA_TYPE_AP			BIT(1)
-#define STA_TYPE_ADHOC			BIT(2)
-#define STA_TYPE_WDS			BIT(4)
-#define STA_TYPE_BC			BIT(5)
-
-#define NETWORK_INFRA			BIT(16)
-#define NETWORK_P2P			BIT(17)
-#define NETWORK_IBSS			BIT(18)
-#define NETWORK_WDS			BIT(21)
-
-#define CONNECTION_INFRA_STA		(STA_TYPE_STA | NETWORK_INFRA)
-#define CONNECTION_INFRA_AP		(STA_TYPE_AP | NETWORK_INFRA)
-#define CONNECTION_P2P_GC		(STA_TYPE_STA | NETWORK_P2P)
-#define CONNECTION_P2P_GO		(STA_TYPE_AP | NETWORK_P2P)
-#define CONNECTION_IBSS_ADHOC		(STA_TYPE_ADHOC | NETWORK_IBSS)
-#define CONNECTION_WDS			(STA_TYPE_WDS | NETWORK_WDS)
-#define CONNECTION_INFRA_BC		(STA_TYPE_BC | NETWORK_INFRA)
-
 struct sec_key {
 	u8 cipher_id;
 	u8 cipher_len;
-- 
cgit v1.2.3


From 1da4fd48d28436f8b690cdc2879603dede6d8355 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Mon, 22 Mar 2021 21:39:57 +0100
Subject: mt76: mt7915: fix key set/delete issue

Deleting a key with the previous key index deletes the current key
Rework the code to better keep track of multiple keys and check for the
key index before deleting the current key

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/main.c | 25 +++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
index cde8c1ce3ae0..b32a435688bc 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
@@ -346,7 +346,9 @@ static int mt7915_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	struct mt7915_sta *msta = sta ? (struct mt7915_sta *)sta->drv_priv :
 				  &mvif->sta;
 	struct mt76_wcid *wcid = &msta->wcid;
+	u8 *wcid_keyidx = &wcid->hw_key_idx;
 	int idx = key->keyidx;
+	int err = 0;
 
 	/* The hardware does not support per-STA RX GTK, fallback
 	 * to software mode for these.
@@ -361,6 +363,7 @@ static int mt7915_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	/* fall back to sw encryption for unsupported ciphers */
 	switch (key->cipher) {
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+		wcid_keyidx = &wcid->hw_key_idx2;
 		key->flags |= IEEE80211_KEY_FLAG_GENERATE_MMIE;
 		break;
 	case WLAN_CIPHER_SUITE_TKIP:
@@ -376,16 +379,24 @@ static int mt7915_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 		return -EOPNOTSUPP;
 	}
 
-	if (cmd == SET_KEY) {
-		key->hw_key_idx = wcid->idx;
-		wcid->hw_key_idx = idx;
-	} else if (idx == wcid->hw_key_idx) {
-		wcid->hw_key_idx = -1;
-	}
+	mutex_lock(&dev->mt76.mutex);
+
+	if (cmd == SET_KEY)
+		*wcid_keyidx = idx;
+	else if (idx == *wcid_keyidx)
+		*wcid_keyidx = -1;
+	else
+		goto out;
+
 	mt76_wcid_key_setup(&dev->mt76, wcid,
 			    cmd == SET_KEY ? key : NULL);
 
-	return mt7915_mcu_add_key(dev, vif, msta, key, cmd);
+	err = mt7915_mcu_add_key(dev, vif, msta, key, cmd);
+
+out:
+	mutex_unlock(&dev->mt76.mutex);
+
+	return err;
 }
 
 static int mt7915_config(struct ieee80211_hw *hw, u32 changed)
-- 
cgit v1.2.3


From 9add4bf2b81eb32754a6b34f7e12c0ecd2009cfc Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 16 Feb 2021 15:23:13 +0100
Subject: mt76: mt7915: refresh repeater entry MAC address when setting BSSID

When disassociating and clearing the BSSID of a repeater entry used by a client
mode interface, the corresponding MAC address entry can get lost too, causing
the client interface to not be able to associate anymore.
Fix this by also refreshing the MAC address when setting the BSSID

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 29b5039cc286..9d0911ac1d90 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -1008,8 +1008,10 @@ int mt7915_mcu_add_bss_info(struct mt7915_phy *phy,
 	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
 	struct sk_buff *skb;
 
-	if (mvif->omac_idx >= REPEATER_BSSID_START)
+	if (mvif->omac_idx >= REPEATER_BSSID_START) {
+		mt7915_mcu_muar_config(phy, vif, false, enable);
 		mt7915_mcu_muar_config(phy, vif, true, enable);
+	}
 
 	skb = mt7915_mcu_alloc_sta_req(phy->dev, mvif, NULL,
 				       MT7915_BSS_UPDATE_MAX_SIZE);
-- 
cgit v1.2.3


From 14edf1093836e53804c2c6d7699b4ddcbe40c27f Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 24 Mar 2021 09:25:22 +0100
Subject: mt76: mt7921: get rid of mt7921_mac_wtbl_lmac_addr

Get rid of mt7921_mac_wtbl_lmac_addr routine since mt7921 wtbl size is
19 entries

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 05971ea9f8fa..8faae260df79 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -51,14 +51,6 @@ bool mt7921_mac_wtbl_update(struct mt7921_dev *dev, int idx, u32 mask)
 			 0, 5000);
 }
 
-static u32 mt7921_mac_wtbl_lmac_addr(struct mt7921_dev *dev, u16 wcid)
-{
-	mt76_wr(dev, MT_WTBLON_TOP_WDUCR,
-		FIELD_PREP(MT_WTBLON_TOP_WDUCR_GROUP, (wcid >> 7)));
-
-	return MT_WTBL_LMAC_OFFS(wcid, 0);
-}
-
 static void mt7921_mac_sta_poll(struct mt7921_dev *dev)
 {
 	static const u8 ac_to_tid[] = {
@@ -95,7 +87,7 @@ static void mt7921_mac_sta_poll(struct mt7921_dev *dev)
 		spin_unlock_bh(&dev->sta_poll_lock);
 
 		idx = msta->wcid.idx;
-		addr = mt7921_mac_wtbl_lmac_addr(dev, idx) + 20 * 4;
+		addr = MT_WTBL_LMAC_OFFS(idx, 0) + 20 * 4;
 
 		for (i = 0; i < IEEE80211_NUM_ACS; i++) {
 			u32 tx_last = msta->airtime_ac[i];
-- 
cgit v1.2.3


From 5802106f8bc710cac0e2db6d5d2d219d5f9490b1 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 24 Mar 2021 09:37:36 +0100
Subject: mt76: connac: introduce mt76_sta_cmd_info data structure

Introduce mt76_sta_cmd_info data structure to contain parameters passed
to mt76_sta_cmd_info routine. This is preliminary patch to properly
configure rcpi for mt7921 devices.

Tested-by: Jayden.Kuo <jayden.kuo@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    | 13 +++++---
 .../net/wireless/mediatek/mt76/mt76_connac_mcu.c   | 35 +++++++++++-----------
 .../net/wireless/mediatek/mt76/mt76_connac_mcu.h   | 19 ++++++++----
 drivers/net/wireless/mediatek/mt76/mt7921/main.c   | 19 +++++++++---
 4 files changed, 56 insertions(+), 30 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 4ca0d8d4c536..1ed06c253816 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -988,7 +988,7 @@ mt7615_mcu_wtbl_sta_add(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 
 	mt76_connac_mcu_sta_basic_tlv(sskb, vif, sta, enable);
 	if (enable && sta)
-		mt76_connac_mcu_sta_tlv(phy->mt76, sskb, sta, vif);
+		mt76_connac_mcu_sta_tlv(phy->mt76, sskb, sta, vif, 0);
 
 	wtbl_hdr = mt76_connac_mcu_alloc_wtbl_req(&dev->mt76, &msta->wcid,
 						  WTBL_RESET_AND_SET, NULL,
@@ -1086,10 +1086,15 @@ __mt7615_mcu_add_sta(struct mt76_phy *phy, struct ieee80211_vif *vif,
 		     struct ieee80211_sta *sta, bool enable, int cmd)
 {
 	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
-	struct mt76_wcid *wcid;
+	struct mt76_sta_cmd_info info = {
+		.sta = sta,
+		.vif = vif,
+		.enable = enable,
+		.cmd = cmd,
+	};
 
-	wcid = sta ? (struct mt76_wcid *)sta->drv_priv : &mvif->sta.wcid;
-	return mt76_connac_mcu_add_sta_cmd(phy, vif, sta, wcid, enable, cmd);
+	info.wcid = sta ? (struct mt76_wcid *)sta->drv_priv : &mvif->sta.wcid;
+	return mt76_connac_mcu_add_sta_cmd(phy, &info);
 }
 
 static int
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index 2cd1677d881e..bafe95210807 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -674,7 +674,8 @@ mt76_connac_get_phy_mode_v2(struct mt76_phy *mphy, struct ieee80211_vif *vif,
 
 void mt76_connac_mcu_sta_tlv(struct mt76_phy *mphy, struct sk_buff *skb,
 			     struct ieee80211_sta *sta,
-			     struct ieee80211_vif *vif)
+			     struct ieee80211_vif *vif,
+			     u8 rcpi)
 {
 	struct cfg80211_chan_def *chandef = &mphy->chandef;
 	enum nl80211_band band = chandef->chan->band;
@@ -723,6 +724,7 @@ void mt76_connac_mcu_sta_tlv(struct mt76_phy *mphy, struct sk_buff *skb,
 	phy = (struct sta_rec_phy *)tlv;
 	phy->phy_type = mt76_connac_get_phy_mode_v2(mphy, vif, band, sta);
 	phy->basic_rate = cpu_to_le16((u16)vif->bss_conf.basic_rates);
+	phy->rcpi = rcpi;
 
 	tlv = mt76_connac_mcu_add_tlv(skb, STA_REC_RA, sizeof(*ra_info));
 	ra_info = (struct sta_rec_ra_info *)tlv;
@@ -827,43 +829,42 @@ void mt76_connac_mcu_wtbl_ht_tlv(struct mt76_dev *dev, struct sk_buff *skb,
 EXPORT_SYMBOL_GPL(mt76_connac_mcu_wtbl_ht_tlv);
 
 int mt76_connac_mcu_add_sta_cmd(struct mt76_phy *phy,
-				struct ieee80211_vif *vif,
-				struct ieee80211_sta *sta,
-				struct mt76_wcid *wcid,
-				bool enable, int cmd)
+				struct mt76_sta_cmd_info *info)
 {
-	struct mt76_vif *mvif = (struct mt76_vif *)vif->drv_priv;
+	struct mt76_vif *mvif = (struct mt76_vif *)info->vif->drv_priv;
 	struct mt76_dev *dev = phy->dev;
 	struct wtbl_req_hdr *wtbl_hdr;
 	struct tlv *sta_wtbl;
 	struct sk_buff *skb;
 
-	skb = mt76_connac_mcu_alloc_sta_req(dev, mvif, wcid);
+	skb = mt76_connac_mcu_alloc_sta_req(dev, mvif, info->wcid);
 	if (IS_ERR(skb))
 		return PTR_ERR(skb);
 
-	mt76_connac_mcu_sta_basic_tlv(skb, vif, sta, enable);
-	if (enable && sta)
-		mt76_connac_mcu_sta_tlv(phy, skb, sta, vif);
+	mt76_connac_mcu_sta_basic_tlv(skb, info->vif, info->sta, info->enable);
+	if (info->enable && info->sta)
+		mt76_connac_mcu_sta_tlv(phy, skb, info->sta, info->vif,
+					info->rcpi);
 
 	sta_wtbl = mt76_connac_mcu_add_tlv(skb, STA_REC_WTBL,
 					   sizeof(struct tlv));
 
-	wtbl_hdr = mt76_connac_mcu_alloc_wtbl_req(dev, wcid,
+	wtbl_hdr = mt76_connac_mcu_alloc_wtbl_req(dev, info->wcid,
 						  WTBL_RESET_AND_SET,
 						  sta_wtbl, &skb);
 	if (IS_ERR(wtbl_hdr))
 		return PTR_ERR(wtbl_hdr);
 
-	if (enable) {
-		mt76_connac_mcu_wtbl_generic_tlv(dev, skb, vif, sta, sta_wtbl,
+	if (info->enable) {
+		mt76_connac_mcu_wtbl_generic_tlv(dev, skb, info->vif,
+						 info->sta, sta_wtbl,
 						 wtbl_hdr);
-		if (sta)
-			mt76_connac_mcu_wtbl_ht_tlv(dev, skb, sta, sta_wtbl,
-						    wtbl_hdr);
+		if (info->sta)
+			mt76_connac_mcu_wtbl_ht_tlv(dev, skb, info->sta,
+						    sta_wtbl, wtbl_hdr);
 	}
 
-	return mt76_mcu_skb_send_msg(dev, skb, cmd, true);
+	return mt76_mcu_skb_send_msg(dev, skb, info->cmd, true);
 }
 EXPORT_SYMBOL_GPL(mt76_connac_mcu_add_sta_cmd);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
index 950e68e11ddd..5a3efd744546 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
@@ -881,6 +881,17 @@ struct mt76_connac_suspend_tlv {
 	u8 pad[5];
 } __packed;
 
+struct mt76_sta_cmd_info {
+	struct ieee80211_sta *sta;
+	struct mt76_wcid *wcid;
+
+	struct ieee80211_vif *vif;
+
+	bool enable;
+	int cmd;
+	u8 rcpi;
+};
+
 #define to_wcid_lo(id)		FIELD_GET(GENMASK(7, 0), (u16)id)
 #define to_wcid_hi(id)		FIELD_GET(GENMASK(9, 8), (u16)id)
 
@@ -928,7 +939,8 @@ void mt76_connac_mcu_wtbl_hdr_trans_tlv(struct sk_buff *skb,
 					void *sta_wtbl, void *wtbl_tlv);
 void mt76_connac_mcu_sta_tlv(struct mt76_phy *mphy, struct sk_buff *skb,
 			     struct ieee80211_sta *sta,
-			     struct ieee80211_vif *vif);
+			     struct ieee80211_vif *vif,
+			     u8 rcpi);
 void mt76_connac_mcu_wtbl_ht_tlv(struct mt76_dev *dev, struct sk_buff *skb,
 				 struct ieee80211_sta *sta, void *sta_wtbl,
 				 void *wtbl_tlv);
@@ -951,10 +963,7 @@ int mt76_connac_mcu_uni_add_bss(struct mt76_phy *phy,
 				struct mt76_wcid *wcid,
 				bool enable);
 int mt76_connac_mcu_add_sta_cmd(struct mt76_phy *phy,
-				struct ieee80211_vif *vif,
-				struct ieee80211_sta *sta,
-				struct mt76_wcid *wcid,
-				bool enable, int cmd);
+				struct mt76_sta_cmd_info *info);
 void mt76_connac_mcu_beacon_loss_iter(void *priv, u8 *mac,
 				      struct ieee80211_vif *vif);
 int mt76_connac_mcu_set_rts_thresh(struct mt76_dev *dev, u32 val, u8 band);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index dd22561ae800..302d8d78d6f7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -624,6 +624,13 @@ int mt7921_mac_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 	struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76);
 	struct mt7921_sta *msta = (struct mt7921_sta *)sta->drv_priv;
 	struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv;
+	struct mt76_sta_cmd_info info = {
+		.sta = sta,
+		.vif = vif,
+		.enable = true,
+		.cmd = MCU_UNI_CMD_STA_REC_UPDATE,
+		.wcid = &msta->wcid,
+	};
 	int ret, idx;
 
 	idx = mt76_wcid_alloc(dev->mt76.wcid_mask, MT7921_WTBL_STA - 1);
@@ -650,8 +657,7 @@ int mt7921_mac_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 	mt7921_mac_wtbl_update(dev, idx,
 			       MT_WTBL_UPDATE_ADM_COUNT_CLEAR);
 
-	ret = mt76_connac_mcu_add_sta_cmd(&dev->mphy, vif, sta, &msta->wcid,
-					  true, MCU_UNI_CMD_STA_REC_UPDATE);
+	ret = mt76_connac_mcu_add_sta_cmd(&dev->mphy, &info);
 	if (ret)
 		return ret;
 
@@ -665,12 +671,17 @@ void mt7921_mac_sta_remove(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 {
 	struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76);
 	struct mt7921_sta *msta = (struct mt7921_sta *)sta->drv_priv;
+	struct mt76_sta_cmd_info info = {
+		.sta = sta,
+		.vif = vif,
+		.cmd = MCU_UNI_CMD_STA_REC_UPDATE,
+		.wcid = &msta->wcid,
+	};
 
 	mt76_connac_free_pending_tx_skbs(&dev->pm, &msta->wcid);
 	mt76_connac_pm_wake(&dev->mphy, &dev->pm);
 
-	mt76_connac_mcu_add_sta_cmd(&dev->mphy, vif, sta, &msta->wcid, false,
-				    MCU_UNI_CMD_STA_REC_UPDATE);
+	mt76_connac_mcu_add_sta_cmd(&dev->mphy, &info);
 
 	mt7921_mac_wtbl_update(dev, msta->wcid.idx,
 			       MT_WTBL_UPDATE_ADM_COUNT_CLEAR);
-- 
cgit v1.2.3


From 93c81df520a1ba506757a47c6e4a148f4372a282 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 24 Mar 2021 09:37:37 +0100
Subject: mt76: mt7921: properly configure rcpi adding a sta to the fw

Properly configure rcpi based on association process rssi. rcpi is used
by rate controller embedded into the fw to initialize amsdu size.

Tested-by: Jayden.Kuo <jayden.kuo@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c    | 35 ++++++++++++++++++++--
 drivers/net/wireless/mediatek/mt76/mt7921/main.c   | 12 ++++++--
 drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h |  7 +++++
 3 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 8faae260df79..ee6482124c0a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -9,8 +9,6 @@
 #include "mac.h"
 #include "mcu.h"
 
-#define to_rssi(field, rxv)	((FIELD_GET(field, rxv) - 220) / 2)
-
 #define HE_BITS(f)		cpu_to_le16(IEEE80211_RADIOTAP_HE_##f)
 #define HE_PREP(f, m, v)	le16_encode_bits(le32_get_bits(v, MT_CRXV_HE_##m),\
 						 IEEE80211_RADIOTAP_HE_##f)
@@ -277,6 +275,37 @@ mt7921_get_status_freq_info(struct mt7921_dev *dev, struct mt76_phy *mphy,
 	status->freq = ieee80211_channel_to_frequency(chfreq, status->band);
 }
 
+static void
+mt7921_mac_rssi_iter(void *priv, u8 *mac, struct ieee80211_vif *vif)
+{
+	struct sk_buff *skb = priv;
+	struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb;
+	struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv;
+	struct ieee80211_hdr *hdr = mt76_skb_get_hdr(skb);
+
+	if (status->signal > 0)
+		return;
+
+	if (!ether_addr_equal(vif->addr, hdr->addr1))
+		return;
+
+	ewma_rssi_add(&mvif->rssi, -status->signal);
+}
+
+static void
+mt7921_mac_assoc_rssi(struct mt7921_dev *dev, struct sk_buff *skb)
+{
+	struct ieee80211_hdr *hdr = mt76_skb_get_hdr(skb);
+
+	if (!ieee80211_is_assoc_resp(hdr->frame_control) &&
+	    !ieee80211_is_auth(hdr->frame_control))
+		return;
+
+	ieee80211_iterate_active_interfaces_atomic(mt76_hw(dev),
+		IEEE80211_IFACE_ITER_RESUME_ALL,
+		mt7921_mac_rssi_iter, skb);
+}
+
 int mt7921_mac_fill_rx(struct mt7921_dev *dev, struct sk_buff *skb)
 {
 	struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb;
@@ -514,6 +543,8 @@ int mt7921_mac_fill_rx(struct mt7921_dev *dev, struct sk_buff *skb)
 		mt76_insert_ccmp_hdr(skb, key_id);
 	}
 
+	mt7921_mac_assoc_rssi(dev, skb);
+
 	if (rxv && status->flag & RX_FLAG_RADIOTAP_HE)
 		mt7921_mac_decode_he_radiotap(skb, status, rxv, mode);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 302d8d78d6f7..7b4f6a70f87a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -322,6 +322,8 @@ static int mt7921_add_interface(struct ieee80211_hw *hw,
 	mt7921_mac_wtbl_update(dev, idx,
 			       MT_WTBL_UPDATE_ADM_COUNT_CLEAR);
 
+	ewma_rssi_init(&mvif->rssi);
+
 	rcu_assign_pointer(dev->mt76.wcid[idx], &mvif->sta.wcid);
 	if (vif->txq) {
 		mtxq = (struct mt76_txq *)vif->txq->drv_priv;
@@ -624,12 +626,14 @@ int mt7921_mac_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 	struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76);
 	struct mt7921_sta *msta = (struct mt7921_sta *)sta->drv_priv;
 	struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv;
+	int rssi = -ewma_rssi_read(&mvif->rssi);
 	struct mt76_sta_cmd_info info = {
 		.sta = sta,
 		.vif = vif,
 		.enable = true,
 		.cmd = MCU_UNI_CMD_STA_REC_UPDATE,
 		.wcid = &msta->wcid,
+		.rcpi = to_rcpi(rssi),
 	};
 	int ret, idx;
 
@@ -686,11 +690,13 @@ void mt7921_mac_sta_remove(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 	mt7921_mac_wtbl_update(dev, msta->wcid.idx,
 			       MT_WTBL_UPDATE_ADM_COUNT_CLEAR);
 
-	if (vif->type == NL80211_IFTYPE_STATION && !sta->tdls) {
+	if (vif->type == NL80211_IFTYPE_STATION) {
 		struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv;
 
-		mt76_connac_mcu_uni_add_bss(&dev->mphy, vif, &mvif->sta.wcid,
-					    false);
+		ewma_rssi_init(&mvif->rssi);
+		if (!sta->tdls)
+			mt76_connac_mcu_uni_add_bss(&dev->mphy, vif,
+						    &mvif->sta.wcid, false);
 	}
 
 	spin_lock_bh(&dev->sta_poll_lock);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index df3c9730876c..5cedefc41416 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -46,6 +46,9 @@
 #define MT7921_SKU_MAX_DELTA_IDX	MT7921_SKU_RATE_NUM
 #define MT7921_SKU_TABLE_SIZE		(MT7921_SKU_RATE_NUM + 1)
 
+#define to_rssi(field, rxv)		((FIELD_GET(field, rxv) - 220) / 2)
+#define to_rcpi(rssi)			(2 * (rssi) + 220)
+
 struct mt7921_vif;
 struct mt7921_sta;
 
@@ -92,12 +95,16 @@ struct mt7921_sta {
 	struct mt7921_sta_key_conf bip;
 };
 
+DECLARE_EWMA(rssi, 10, 8);
+
 struct mt7921_vif {
 	struct mt76_vif mt76; /* must be first */
 
 	struct mt7921_sta sta;
 	struct mt7921_phy *phy;
 
+	struct ewma_rssi rssi;
+
 	struct ieee80211_tx_queue_params queue_params[IEEE80211_NUM_ACS];
 };
 
-- 
cgit v1.2.3


From 6104edf9e8a364c9fc586f88867850887524ca04 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Thu, 25 Mar 2021 16:06:01 +0800
Subject: mt76: mt7615: only enable DFS test knobs for mt7615

DFS knobs should not be visible to non-DFS devices.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7615/debugfs.c    | 29 +++++++++++++---------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
index 7ae48b4fa564..1b414220521a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
@@ -517,18 +517,23 @@ int mt7615_init_debugfs(struct mt7615_dev *dev)
 			    &fops_pm_idle_timeout);
 	debugfs_create_devm_seqfile(dev->mt76.dev, "radio", dir,
 				    mt7615_radio_read);
-	debugfs_create_u32("dfs_hw_pattern", 0400, dir, &dev->hw_pattern);
-	/* test pattern knobs */
-	debugfs_create_u8("pattern_len", 0600, dir,
-			  &dev->radar_pattern.n_pulses);
-	debugfs_create_u32("pulse_period", 0600, dir,
-			   &dev->radar_pattern.period);
-	debugfs_create_u16("pulse_width", 0600, dir,
-			   &dev->radar_pattern.width);
-	debugfs_create_u16("pulse_power", 0600, dir,
-			   &dev->radar_pattern.power);
-	debugfs_create_file("radar_trigger", 0200, dir, dev,
-			    &fops_radar_pattern);
+
+	if (is_mt7615(&dev->mt76)) {
+		debugfs_create_u32("dfs_hw_pattern", 0400, dir,
+				   &dev->hw_pattern);
+		/* test pattern knobs */
+		debugfs_create_u8("pattern_len", 0600, dir,
+				  &dev->radar_pattern.n_pulses);
+		debugfs_create_u32("pulse_period", 0600, dir,
+				   &dev->radar_pattern.period);
+		debugfs_create_u16("pulse_width", 0600, dir,
+				   &dev->radar_pattern.width);
+		debugfs_create_u16("pulse_power", 0600, dir,
+				   &dev->radar_pattern.power);
+		debugfs_create_file("radar_trigger", 0200, dir, dev,
+				    &fops_radar_pattern);
+	}
+
 	debugfs_create_file("reset_test", 0200, dir, dev,
 			    &fops_reset_test);
 	debugfs_create_devm_seqfile(dev->mt76.dev, "temperature", dir,
-- 
cgit v1.2.3


From 69e74d7f23d515fb559b2e0bebfdf4c458d9507d Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Thu, 25 Mar 2021 16:06:02 +0800
Subject: mt76: mt7615: cleanup mcu tx queue in mt7615_dma_reset()

With this patch, mt7615_mac_reset_work() can recover system back.

Fixes: e637763b606b ("mt76: move mcu queues to mt76_dev q_mcu array")
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 59aa8f84645b..17aa3e4d67ca 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -2018,15 +2018,17 @@ void mt7615_dma_reset(struct mt7615_dev *dev)
 	mt76_clear(dev, MT_WPDMA_GLO_CFG,
 		   MT_WPDMA_GLO_CFG_RX_DMA_EN | MT_WPDMA_GLO_CFG_TX_DMA_EN |
 		   MT_WPDMA_GLO_CFG_TX_WRITEBACK_DONE);
+
 	usleep_range(1000, 2000);
 
-	mt76_queue_tx_cleanup(dev, dev->mt76.q_mcu[MT_MCUQ_WM], true);
 	for (i = 0; i < __MT_TXQ_MAX; i++)
 		mt76_queue_tx_cleanup(dev, dev->mphy.q_tx[i], true);
 
-	mt76_for_each_q_rx(&dev->mt76, i) {
+	for (i = 0; i < __MT_MCUQ_MAX; i++)
+		mt76_queue_tx_cleanup(dev, dev->mt76.q_mcu[i], true);
+
+	mt76_for_each_q_rx(&dev->mt76, i)
 		mt76_queue_rx_reset(dev, i);
-	}
 
 	mt76_set(dev, MT_WPDMA_GLO_CFG,
 		 MT_WPDMA_GLO_CFG_RX_DMA_EN | MT_WPDMA_GLO_CFG_TX_DMA_EN |
-- 
cgit v1.2.3


From ad2a1ea49f631bb739116e9839b6f65231053112 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Thu, 25 Mar 2021 16:06:03 +0800
Subject: mt76: mt7622: trigger hif interrupt for system reset

hif interrupt needs to be triggered after MT_MCU_INT_EVENT.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c    | 15 ++++++++++++---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    |  2 +-
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  2 ++
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 17aa3e4d67ca..3d9d893dbac5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -2053,6 +2053,15 @@ void mt7615_tx_token_put(struct mt7615_dev *dev)
 }
 EXPORT_SYMBOL_GPL(mt7615_tx_token_put);
 
+static void
+mt7615_hif_int_event_trigger(struct mt7615_dev *dev, u8 event)
+{
+	mt76_wr(dev, MT_MCU_INT_EVENT, event);
+
+	mt7622_trigger_hif_int(dev, true);
+	mt7622_trigger_hif_int(dev, false);
+}
+
 void mt7615_mac_reset_work(struct work_struct *work)
 {
 	struct mt7615_phy *phy2;
@@ -2095,7 +2104,7 @@ void mt7615_mac_reset_work(struct work_struct *work)
 
 	mt7615_mutex_acquire(dev);
 
-	mt76_wr(dev, MT_MCU_INT_EVENT, MT_MCU_INT_EVENT_PDMA_STOPPED);
+	mt7615_hif_int_event_trigger(dev, MT_MCU_INT_EVENT_PDMA_STOPPED);
 
 	mt7615_tx_token_put(dev);
 	idr_init(&dev->token);
@@ -2105,7 +2114,7 @@ void mt7615_mac_reset_work(struct work_struct *work)
 
 		mt76_wr(dev, MT_WPDMA_MEM_RNG_ERR, 0);
 
-		mt76_wr(dev, MT_MCU_INT_EVENT, MT_MCU_INT_EVENT_PDMA_INIT);
+		mt7615_hif_int_event_trigger(dev, MT_MCU_INT_EVENT_PDMA_INIT);
 		mt7615_wait_reset_state(dev, MT_MCU_CMD_RECOVERY_DONE);
 	}
 
@@ -2128,7 +2137,7 @@ void mt7615_mac_reset_work(struct work_struct *work)
 	if (ext_phy)
 		ieee80211_wake_queues(ext_phy->hw);
 
-	mt76_wr(dev, MT_MCU_INT_EVENT, MT_MCU_INT_EVENT_RESET_DONE);
+	mt7615_hif_int_event_trigger(dev, MT_MCU_INT_EVENT_RESET_DONE);
 	mt7615_wait_reset_state(dev, MT_MCU_CMD_NORMAL_STATE);
 
 	mt7615_update_beacons(dev);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 1ed06c253816..e565fc770a0f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -274,7 +274,7 @@ int mt7615_rf_wr(struct mt7615_dev *dev, u32 wf, u32 reg, u32 val)
 				 sizeof(req), false);
 }
 
-static void mt7622_trigger_hif_int(struct mt7615_dev *dev, bool en)
+void mt7622_trigger_hif_int(struct mt7615_dev *dev, bool en)
 {
 	if (!is_mt7622(&dev->mt76))
 		return;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 84d06f2b41d4..484fc16f0c11 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -556,6 +556,8 @@ u32 mt7615_mcu_reg_rr(struct mt76_dev *dev, u32 offset);
 void mt7615_mcu_reg_wr(struct mt76_dev *dev, u32 offset, u32 val);
 void mt7615_coredump_work(struct work_struct *work);
 
+void mt7622_trigger_hif_int(struct mt7615_dev *dev, bool en);
+
 /* usb */
 int mt7663_usb_sdio_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
 				   enum mt76_txq_id qid, struct mt76_wcid *wcid,
-- 
cgit v1.2.3


From d9852ab2f362fc4fcf501f4350d007b08559ccc1 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Thu, 25 Mar 2021 16:06:04 +0800
Subject: mt76: mt7615: keep mcu_add_bss_info enabled till interface removal

BSS_INFO_BASIC is never called alone with inactive state, which always
follows beacon offload disable, so keep it enabled throughout interfaces
life cycle. Inactive state also removes sta record of all connected
stations, thurs causes connection lost which defeats the purpose of CSA.

Lastly, this is especially an unexpected behavior that keeping mt7622
failing beacon buffer recyled when scanning channels.

bss_conf change is updated with active state only, so just overwrite it.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/main.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index 342e2c5d9766..e30b256784e0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -253,10 +253,11 @@ static void mt7615_remove_interface(struct ieee80211_hw *hw,
 	struct mt7615_phy *phy = mt7615_hw_phy(hw);
 	int idx = msta->wcid.idx;
 
-	/* TODO: disable beacon for the bss */
-
 	mt7615_mutex_acquire(dev);
 
+	mt7615_mcu_add_bss_info(phy, vif, NULL, false);
+	mt7615_mcu_sta_add(phy, vif, NULL, false);
+
 	mt76_testmode_reset(phy->mt76, true);
 	if (vif == phy->monitor_vif)
 	    phy->monitor_vif = NULL;
@@ -558,11 +559,11 @@ static void mt7615_bss_info_changed(struct ieee80211_hw *hw,
 		}
 	}
 
-	if (changed & BSS_CHANGED_BEACON_ENABLED) {
-		mt7615_mcu_add_bss_info(phy, vif, NULL, info->enable_beacon);
-		mt7615_mcu_sta_add(phy, vif, NULL, info->enable_beacon);
+	if (changed & BSS_CHANGED_BEACON_ENABLED && info->enable_beacon) {
+		mt7615_mcu_add_bss_info(phy, vif, NULL, true);
+		mt7615_mcu_sta_add(phy, vif, NULL, true);
 
-		if (vif->p2p && info->enable_beacon)
+		if (vif->p2p)
 			mt7615_mcu_set_p2p_oppps(hw, vif);
 	}
 
-- 
cgit v1.2.3


From a9bae3f5361487c9b3859f288e112ad045973a4a Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Thu, 25 Mar 2021 16:06:05 +0800
Subject: mt76: mt7915: keep mcu_add_bss_info enabled till interface removal

The same as mt7615. Keep BSS_INFO_BASIC enabled throughout interfaces
life cycle.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/main.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
index b32a435688bc..2fd87987312e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
@@ -256,7 +256,8 @@ static void mt7915_remove_interface(struct ieee80211_hw *hw,
 	struct mt7915_phy *phy = mt7915_hw_phy(hw);
 	int idx = msta->wcid.idx;
 
-	/* TODO: disable beacon for the bss */
+	mt7915_mcu_add_bss_info(phy, vif, false);
+	mt7915_mcu_add_sta(dev, vif, NULL, false);
 
 	mutex_lock(&dev->mt76.mutex);
 	mt76_testmode_reset(phy->mt76, true);
@@ -555,9 +556,9 @@ static void mt7915_bss_info_changed(struct ieee80211_hw *hw,
 		}
 	}
 
-	if (changed & BSS_CHANGED_BEACON_ENABLED) {
-		mt7915_mcu_add_bss_info(phy, vif, info->enable_beacon);
-		mt7915_mcu_add_sta(dev, vif, NULL, info->enable_beacon);
+	if (changed & BSS_CHANGED_BEACON_ENABLED && info->enable_beacon) {
+		mt7915_mcu_add_bss_info(phy, vif, true);
+		mt7915_mcu_add_sta(dev, vif, NULL, true);
 	}
 
 	/* ensure that enable txcmd_mode after bss_info */
-- 
cgit v1.2.3


From 1ebea45ef027ee31cd50ed92903071391e792edb Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Fri, 26 Mar 2021 02:28:56 +0800
Subject: mt76: mt7915: cleanup mcu tx queue in mt7915_dma_reset()

Cleanup mcu queues in mt7915_mac_reset_work().

Fixes: e637763b606b ("mt76: move mcu queues to mt76_dev q_mcu array")
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/mac.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index d3a6312c058c..0924ae074db2 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -1515,9 +1515,8 @@ mt7915_update_beacons(struct mt7915_dev *dev)
 }
 
 static void
-mt7915_dma_reset(struct mt7915_phy *phy)
+mt7915_dma_reset(struct mt7915_dev *dev)
 {
-	struct mt7915_dev *dev = phy->dev;
 	struct mt76_phy *mphy_ext = dev->mt76.phy2;
 	u32 hif1_ofs = MT_WFDMA1_PCIE1_BASE - MT_WFDMA1_BASE;
 	int i;
@@ -1534,18 +1533,20 @@ mt7915_dma_reset(struct mt7915_phy *phy)
 			   (MT_WFDMA1_GLO_CFG_TX_DMA_EN |
 			    MT_WFDMA1_GLO_CFG_RX_DMA_EN));
 	}
+
 	usleep_range(1000, 2000);
 
-	mt76_queue_tx_cleanup(dev, dev->mt76.q_mcu[MT_MCUQ_WA], true);
 	for (i = 0; i < __MT_TXQ_MAX; i++) {
-		mt76_queue_tx_cleanup(dev, phy->mt76->q_tx[i], true);
+		mt76_queue_tx_cleanup(dev, dev->mphy.q_tx[i], true);
 		if (mphy_ext)
 			mt76_queue_tx_cleanup(dev, mphy_ext->q_tx[i], true);
 	}
 
-	mt76_for_each_q_rx(&dev->mt76, i) {
+	for (i = 0; i < __MT_MCUQ_MAX; i++)
+		mt76_queue_tx_cleanup(dev, dev->mt76.q_mcu[i], true);
+
+	mt76_for_each_q_rx(&dev->mt76, i)
 		mt76_queue_rx_reset(dev, i);
-	}
 
 	/* re-init prefetch settings after reset */
 	mt7915_dma_prefetch(dev);
@@ -1630,7 +1631,7 @@ void mt7915_mac_reset_work(struct work_struct *work)
 	idr_init(&dev->token);
 
 	if (mt7915_wait_reset_state(dev, MT_MCU_CMD_RESET_DONE)) {
-		mt7915_dma_reset(&dev->phy);
+		mt7915_dma_reset(dev);
 
 		mt76_wr(dev, MT_MCU_INT_EVENT, MT_MCU_INT_EVENT_DMA_INIT);
 		mt7915_wait_reset_state(dev, MT_MCU_CMD_RECOVERY_DONE);
-- 
cgit v1.2.3


From 1cb7ea2acb725bf8752dc765c1c1645c7ecc23bb Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 26 Mar 2021 21:52:11 +0100
Subject: mt76: mt7615: fix chip reset on MT7622 and MT7663e

After chip reset, the DMA scheduler needs to be initialized as well.
Since the code is PCI/SoC specific, move it to pci_mac.c, so that it
can depend on a function in dma.c

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/dma.c    |  26 ++--
 drivers/net/wireless/mediatek/mt76/mt7615/init.c   |   1 -
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c    | 170 ---------------------
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    |   1 +
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |   1 +
 .../net/wireless/mediatek/mt76/mt7615/pci_init.c   |   1 +
 .../net/wireless/mediatek/mt76/mt7615/pci_mac.c    | 168 ++++++++++++++++++++
 7 files changed, 187 insertions(+), 181 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
index 2e3120eb2da9..0ec1c526f583 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
@@ -176,6 +176,21 @@ static void mt7663_dma_sched_init(struct mt7615_dev *dev)
 	mt76_wr(dev, MT_DMA_SHDL(MT_DMASHDL_SCHED_SET1), 0xedcba987);
 }
 
+void mt7615_dma_start(struct mt7615_dev *dev)
+{
+	/* start dma engine */
+	mt76_set(dev, MT_WPDMA_GLO_CFG,
+		 MT_WPDMA_GLO_CFG_TX_DMA_EN |
+		 MT_WPDMA_GLO_CFG_RX_DMA_EN |
+		 MT_WPDMA_GLO_CFG_TX_WRITEBACK_DONE);
+
+	if (is_mt7622(&dev->mt76))
+		mt7622_dma_sched_init(dev);
+
+	if (is_mt7663(&dev->mt76))
+		mt7663_dma_sched_init(dev);
+}
+
 int mt7615_dma_init(struct mt7615_dev *dev)
 {
 	int rx_ring_size = MT7615_RX_RING_SIZE;
@@ -253,20 +268,11 @@ int mt7615_dma_init(struct mt7615_dev *dev)
 		  MT_WPDMA_GLO_CFG_TX_DMA_BUSY |
 		  MT_WPDMA_GLO_CFG_RX_DMA_BUSY, 0, 1000);
 
-	/* start dma engine */
-	mt76_set(dev, MT_WPDMA_GLO_CFG,
-		 MT_WPDMA_GLO_CFG_TX_DMA_EN |
-		 MT_WPDMA_GLO_CFG_RX_DMA_EN);
-
 	/* enable interrupts for TX/RX rings */
 	mt7615_irq_enable(dev, MT_INT_RX_DONE_ALL | mt7615_tx_mcu_int_mask(dev) |
 			       MT_INT_MCU_CMD);
 
-	if (is_mt7622(&dev->mt76))
-		mt7622_dma_sched_init(dev);
-
-	if (is_mt7663(&dev->mt76))
-		mt7663_dma_sched_init(dev);
+	mt7615_dma_start(dev);
 
 	return 0;
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index c59043195870..857fb4918975 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -504,7 +504,6 @@ void mt7615_init_device(struct mt7615_dev *dev)
 	init_waitqueue_head(&dev->reset_wait);
 	init_waitqueue_head(&dev->phy.roc_wait);
 
-	INIT_WORK(&dev->reset_work, mt7615_mac_reset_work);
 	INIT_WORK(&dev->phy.roc_work, mt7615_roc_work);
 	timer_setup(&dev->phy.roc_timer, mt7615_roc_timer, 0);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 3d9d893dbac5..6a1395de3066 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1966,76 +1966,6 @@ void mt7615_mac_work(struct work_struct *work)
 				     MT7615_WATCHDOG_TIME);
 }
 
-static bool
-mt7615_wait_reset_state(struct mt7615_dev *dev, u32 state)
-{
-	bool ret;
-
-	ret = wait_event_timeout(dev->reset_wait,
-				 (READ_ONCE(dev->reset_state) & state),
-				 MT7615_RESET_TIMEOUT);
-	WARN(!ret, "Timeout waiting for MCU reset state %x\n", state);
-	return ret;
-}
-
-static void
-mt7615_update_vif_beacon(void *priv, u8 *mac, struct ieee80211_vif *vif)
-{
-	struct ieee80211_hw *hw = priv;
-	struct mt7615_dev *dev = mt7615_hw_dev(hw);
-
-	switch (vif->type) {
-	case NL80211_IFTYPE_MESH_POINT:
-	case NL80211_IFTYPE_ADHOC:
-	case NL80211_IFTYPE_AP:
-		mt7615_mcu_add_beacon(dev, hw, vif,
-				      vif->bss_conf.enable_beacon);
-		break;
-	default:
-		break;
-	}
-}
-
-static void
-mt7615_update_beacons(struct mt7615_dev *dev)
-{
-	ieee80211_iterate_active_interfaces(dev->mt76.hw,
-		IEEE80211_IFACE_ITER_RESUME_ALL,
-		mt7615_update_vif_beacon, dev->mt76.hw);
-
-	if (!dev->mt76.phy2)
-		return;
-
-	ieee80211_iterate_active_interfaces(dev->mt76.phy2->hw,
-		IEEE80211_IFACE_ITER_RESUME_ALL,
-		mt7615_update_vif_beacon, dev->mt76.phy2->hw);
-}
-
-void mt7615_dma_reset(struct mt7615_dev *dev)
-{
-	int i;
-
-	mt76_clear(dev, MT_WPDMA_GLO_CFG,
-		   MT_WPDMA_GLO_CFG_RX_DMA_EN | MT_WPDMA_GLO_CFG_TX_DMA_EN |
-		   MT_WPDMA_GLO_CFG_TX_WRITEBACK_DONE);
-
-	usleep_range(1000, 2000);
-
-	for (i = 0; i < __MT_TXQ_MAX; i++)
-		mt76_queue_tx_cleanup(dev, dev->mphy.q_tx[i], true);
-
-	for (i = 0; i < __MT_MCUQ_MAX; i++)
-		mt76_queue_tx_cleanup(dev, dev->mt76.q_mcu[i], true);
-
-	mt76_for_each_q_rx(&dev->mt76, i)
-		mt76_queue_rx_reset(dev, i);
-
-	mt76_set(dev, MT_WPDMA_GLO_CFG,
-		 MT_WPDMA_GLO_CFG_RX_DMA_EN | MT_WPDMA_GLO_CFG_TX_DMA_EN |
-		 MT_WPDMA_GLO_CFG_TX_WRITEBACK_DONE);
-}
-EXPORT_SYMBOL_GPL(mt7615_dma_reset);
-
 void mt7615_tx_token_put(struct mt7615_dev *dev)
 {
 	struct mt76_txwi_cache *txwi;
@@ -2053,106 +1983,6 @@ void mt7615_tx_token_put(struct mt7615_dev *dev)
 }
 EXPORT_SYMBOL_GPL(mt7615_tx_token_put);
 
-static void
-mt7615_hif_int_event_trigger(struct mt7615_dev *dev, u8 event)
-{
-	mt76_wr(dev, MT_MCU_INT_EVENT, event);
-
-	mt7622_trigger_hif_int(dev, true);
-	mt7622_trigger_hif_int(dev, false);
-}
-
-void mt7615_mac_reset_work(struct work_struct *work)
-{
-	struct mt7615_phy *phy2;
-	struct mt76_phy *ext_phy;
-	struct mt7615_dev *dev;
-
-	dev = container_of(work, struct mt7615_dev, reset_work);
-	ext_phy = dev->mt76.phy2;
-	phy2 = ext_phy ? ext_phy->priv : NULL;
-
-	if (!(READ_ONCE(dev->reset_state) & MT_MCU_CMD_STOP_PDMA))
-		return;
-
-	ieee80211_stop_queues(mt76_hw(dev));
-	if (ext_phy)
-		ieee80211_stop_queues(ext_phy->hw);
-
-	set_bit(MT76_RESET, &dev->mphy.state);
-	set_bit(MT76_MCU_RESET, &dev->mphy.state);
-	wake_up(&dev->mt76.mcu.wait);
-	cancel_delayed_work_sync(&dev->mphy.mac_work);
-	del_timer_sync(&dev->phy.roc_timer);
-	cancel_work_sync(&dev->phy.roc_work);
-	if (phy2) {
-		set_bit(MT76_RESET, &phy2->mt76->state);
-		cancel_delayed_work_sync(&phy2->mt76->mac_work);
-		del_timer_sync(&phy2->roc_timer);
-		cancel_work_sync(&phy2->roc_work);
-	}
-
-	/* lock/unlock all queues to ensure that no tx is pending */
-	mt76_txq_schedule_all(&dev->mphy);
-	if (ext_phy)
-		mt76_txq_schedule_all(ext_phy);
-
-	mt76_worker_disable(&dev->mt76.tx_worker);
-	napi_disable(&dev->mt76.napi[0]);
-	napi_disable(&dev->mt76.napi[1]);
-	napi_disable(&dev->mt76.tx_napi);
-
-	mt7615_mutex_acquire(dev);
-
-	mt7615_hif_int_event_trigger(dev, MT_MCU_INT_EVENT_PDMA_STOPPED);
-
-	mt7615_tx_token_put(dev);
-	idr_init(&dev->token);
-
-	if (mt7615_wait_reset_state(dev, MT_MCU_CMD_RESET_DONE)) {
-		mt7615_dma_reset(dev);
-
-		mt76_wr(dev, MT_WPDMA_MEM_RNG_ERR, 0);
-
-		mt7615_hif_int_event_trigger(dev, MT_MCU_INT_EVENT_PDMA_INIT);
-		mt7615_wait_reset_state(dev, MT_MCU_CMD_RECOVERY_DONE);
-	}
-
-	clear_bit(MT76_MCU_RESET, &dev->mphy.state);
-	clear_bit(MT76_RESET, &dev->mphy.state);
-	if (phy2)
-		clear_bit(MT76_RESET, &phy2->mt76->state);
-
-	mt76_worker_enable(&dev->mt76.tx_worker);
-	napi_enable(&dev->mt76.tx_napi);
-	napi_schedule(&dev->mt76.tx_napi);
-
-	napi_enable(&dev->mt76.napi[0]);
-	napi_schedule(&dev->mt76.napi[0]);
-
-	napi_enable(&dev->mt76.napi[1]);
-	napi_schedule(&dev->mt76.napi[1]);
-
-	ieee80211_wake_queues(mt76_hw(dev));
-	if (ext_phy)
-		ieee80211_wake_queues(ext_phy->hw);
-
-	mt7615_hif_int_event_trigger(dev, MT_MCU_INT_EVENT_RESET_DONE);
-	mt7615_wait_reset_state(dev, MT_MCU_CMD_NORMAL_STATE);
-
-	mt7615_update_beacons(dev);
-
-	mt7615_mutex_release(dev);
-
-	ieee80211_queue_delayed_work(mt76_hw(dev), &dev->mphy.mac_work,
-				     MT7615_WATCHDOG_TIME);
-	if (phy2)
-		ieee80211_queue_delayed_work(ext_phy->hw,
-					     &phy2->mt76->mac_work,
-					     MT7615_WATCHDOG_TIME);
-
-}
-
 static void mt7615_dfs_stop_radar_detector(struct mt7615_phy *phy)
 {
 	struct mt7615_dev *dev = phy->dev;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index e565fc770a0f..3855acbd4a5d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -283,6 +283,7 @@ void mt7622_trigger_hif_int(struct mt7615_dev *dev, bool en)
 			   MT_INFRACFG_MISC_AP2CONN_WAKE,
 			   !en * MT_INFRACFG_MISC_AP2CONN_WAKE);
 }
+EXPORT_SYMBOL_GPL(mt7622_trigger_hif_int);
 
 static int mt7615_mcu_drv_pmctrl(struct mt7615_dev *dev)
 {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 484fc16f0c11..dbabdeef4e0b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -376,6 +376,7 @@ int mt7615_eeprom_get_power_delta_index(struct mt7615_dev *dev,
 					enum nl80211_band band);
 int mt7615_wait_pdma_busy(struct mt7615_dev *dev);
 int mt7615_dma_init(struct mt7615_dev *dev);
+void mt7615_dma_start(struct mt7615_dev *dev);
 void mt7615_dma_cleanup(struct mt7615_dev *dev);
 int mt7615_mcu_init(struct mt7615_dev *dev);
 bool mt7615_wait_for_mcu_init(struct mt7615_dev *dev);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
index 72395925ddee..a629d9cb3806 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
@@ -126,6 +126,7 @@ int mt7615_register_device(struct mt7615_dev *dev)
 	int ret;
 
 	mt7615_init_device(dev);
+	INIT_WORK(&dev->reset_work, mt7615_mac_reset_work);
 
 	/* init led callbacks */
 	if (IS_ENABLED(CONFIG_MT76_LEDS)) {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
index 1b4cb145f38e..1b206ccdadf2 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
@@ -181,3 +181,171 @@ int mt7615_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
 
 	return 0;
 }
+
+void mt7615_dma_reset(struct mt7615_dev *dev)
+{
+	int i;
+
+	mt76_clear(dev, MT_WPDMA_GLO_CFG,
+		   MT_WPDMA_GLO_CFG_RX_DMA_EN | MT_WPDMA_GLO_CFG_TX_DMA_EN |
+		   MT_WPDMA_GLO_CFG_TX_WRITEBACK_DONE);
+
+	usleep_range(1000, 2000);
+
+	for (i = 0; i < __MT_TXQ_MAX; i++)
+		mt76_queue_tx_cleanup(dev, dev->mphy.q_tx[i], true);
+
+	for (i = 0; i < __MT_MCUQ_MAX; i++)
+		mt76_queue_tx_cleanup(dev, dev->mt76.q_mcu[i], true);
+
+	mt76_for_each_q_rx(&dev->mt76, i)
+		mt76_queue_rx_reset(dev, i);
+
+	mt7615_dma_start(dev);
+}
+EXPORT_SYMBOL_GPL(mt7615_dma_reset);
+
+static void
+mt7615_hif_int_event_trigger(struct mt7615_dev *dev, u8 event)
+{
+	mt76_wr(dev, MT_MCU_INT_EVENT, event);
+
+	mt7622_trigger_hif_int(dev, true);
+	mt7622_trigger_hif_int(dev, false);
+}
+
+static bool
+mt7615_wait_reset_state(struct mt7615_dev *dev, u32 state)
+{
+	bool ret;
+
+	ret = wait_event_timeout(dev->reset_wait,
+				 (READ_ONCE(dev->reset_state) & state),
+				 MT7615_RESET_TIMEOUT);
+	WARN(!ret, "Timeout waiting for MCU reset state %x\n", state);
+	return ret;
+}
+
+static void
+mt7615_update_vif_beacon(void *priv, u8 *mac, struct ieee80211_vif *vif)
+{
+	struct ieee80211_hw *hw = priv;
+	struct mt7615_dev *dev = mt7615_hw_dev(hw);
+
+	switch (vif->type) {
+	case NL80211_IFTYPE_MESH_POINT:
+	case NL80211_IFTYPE_ADHOC:
+	case NL80211_IFTYPE_AP:
+		mt7615_mcu_add_beacon(dev, hw, vif,
+				      vif->bss_conf.enable_beacon);
+		break;
+	default:
+		break;
+	}
+}
+
+static void
+mt7615_update_beacons(struct mt7615_dev *dev)
+{
+	ieee80211_iterate_active_interfaces(dev->mt76.hw,
+		IEEE80211_IFACE_ITER_RESUME_ALL,
+		mt7615_update_vif_beacon, dev->mt76.hw);
+
+	if (!dev->mt76.phy2)
+		return;
+
+	ieee80211_iterate_active_interfaces(dev->mt76.phy2->hw,
+		IEEE80211_IFACE_ITER_RESUME_ALL,
+		mt7615_update_vif_beacon, dev->mt76.phy2->hw);
+}
+
+void mt7615_mac_reset_work(struct work_struct *work)
+{
+	struct mt7615_phy *phy2;
+	struct mt76_phy *ext_phy;
+	struct mt7615_dev *dev;
+
+	dev = container_of(work, struct mt7615_dev, reset_work);
+	ext_phy = dev->mt76.phy2;
+	phy2 = ext_phy ? ext_phy->priv : NULL;
+
+	if (!(READ_ONCE(dev->reset_state) & MT_MCU_CMD_STOP_PDMA))
+		return;
+
+	ieee80211_stop_queues(mt76_hw(dev));
+	if (ext_phy)
+		ieee80211_stop_queues(ext_phy->hw);
+
+	set_bit(MT76_RESET, &dev->mphy.state);
+	set_bit(MT76_MCU_RESET, &dev->mphy.state);
+	wake_up(&dev->mt76.mcu.wait);
+	cancel_delayed_work_sync(&dev->mphy.mac_work);
+	del_timer_sync(&dev->phy.roc_timer);
+	cancel_work_sync(&dev->phy.roc_work);
+	if (phy2) {
+		set_bit(MT76_RESET, &phy2->mt76->state);
+		cancel_delayed_work_sync(&phy2->mt76->mac_work);
+		del_timer_sync(&phy2->roc_timer);
+		cancel_work_sync(&phy2->roc_work);
+	}
+
+	/* lock/unlock all queues to ensure that no tx is pending */
+	mt76_txq_schedule_all(&dev->mphy);
+	if (ext_phy)
+		mt76_txq_schedule_all(ext_phy);
+
+	mt76_worker_disable(&dev->mt76.tx_worker);
+	napi_disable(&dev->mt76.napi[0]);
+	napi_disable(&dev->mt76.napi[1]);
+	napi_disable(&dev->mt76.tx_napi);
+
+	mt7615_mutex_acquire(dev);
+
+	mt7615_hif_int_event_trigger(dev, MT_MCU_INT_EVENT_PDMA_STOPPED);
+
+	mt7615_tx_token_put(dev);
+	idr_init(&dev->token);
+
+	if (mt7615_wait_reset_state(dev, MT_MCU_CMD_RESET_DONE)) {
+		mt7615_dma_reset(dev);
+
+		mt76_wr(dev, MT_WPDMA_MEM_RNG_ERR, 0);
+
+		mt7615_hif_int_event_trigger(dev, MT_MCU_INT_EVENT_PDMA_INIT);
+		mt7615_wait_reset_state(dev, MT_MCU_CMD_RECOVERY_DONE);
+	}
+
+	clear_bit(MT76_MCU_RESET, &dev->mphy.state);
+	clear_bit(MT76_RESET, &dev->mphy.state);
+	if (phy2)
+		clear_bit(MT76_RESET, &phy2->mt76->state);
+
+	mt76_worker_enable(&dev->mt76.tx_worker);
+	napi_enable(&dev->mt76.tx_napi);
+	napi_schedule(&dev->mt76.tx_napi);
+
+	napi_enable(&dev->mt76.napi[0]);
+	napi_schedule(&dev->mt76.napi[0]);
+
+	napi_enable(&dev->mt76.napi[1]);
+	napi_schedule(&dev->mt76.napi[1]);
+
+	ieee80211_wake_queues(mt76_hw(dev));
+	if (ext_phy)
+		ieee80211_wake_queues(ext_phy->hw);
+
+	mt7615_hif_int_event_trigger(dev, MT_MCU_INT_EVENT_RESET_DONE);
+	mt7615_wait_reset_state(dev, MT_MCU_CMD_NORMAL_STATE);
+
+	mt7615_update_beacons(dev);
+
+	mt7615_mutex_release(dev);
+
+	ieee80211_queue_delayed_work(mt76_hw(dev), &dev->mphy.mac_work,
+				     MT7615_WATCHDOG_TIME);
+	if (phy2)
+		ieee80211_queue_delayed_work(ext_phy->hw,
+					     &phy2->mt76->mac_work,
+					     MT7615_WATCHDOG_TIME);
+
+}
-- 
cgit v1.2.3


From d76d6c3ba2b0addbf8398641f406b339d91b4fbc Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 26 Mar 2021 10:38:25 +0100
Subject: mt76: mt7615: limit firmware log message printk to buffer length

Avoid including garbage from previous rx data

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 3855acbd4a5d..600c5366c099 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -424,7 +424,8 @@ mt7615_mcu_rx_log_message(struct mt7615_dev *dev, struct sk_buff *skb)
 		break;
 	}
 
-	wiphy_info(mt76_hw(dev)->wiphy, "%s: %s", type, data);
+	wiphy_info(mt76_hw(dev)->wiphy, "%s: %*s", type,
+		   (int)(skb->len - sizeof(*rxd)), data);
 }
 
 static void
-- 
cgit v1.2.3


From 665b2c780d63762349646e9abaea507192c6666a Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 26 Mar 2021 10:40:55 +0100
Subject: mt76: mt7915: limit firmware log message printk to buffer length

Avoid including garbage from previous rx data

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 9d0911ac1d90..67b1426e98b6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -521,7 +521,8 @@ mt7915_mcu_rx_log_message(struct mt7915_dev *dev, struct sk_buff *skb)
 		break;
 	}
 
-	wiphy_info(mt76_hw(dev)->wiphy, "%s: %s", type, data);
+	wiphy_info(mt76_hw(dev)->wiphy, "%s: %*s", type,
+		   (int)(skb->len - sizeof(*rxd)), data);
 }
 
 static void
-- 
cgit v1.2.3


From 7c82bbb1acc04ef38698582f6ae6c13358563406 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 24 Mar 2021 22:49:59 +0100
Subject: dt-bindings:net:wireless:ieee80211: txt to yaml conversion

Convert generic ieee80211 dts bindings from .txt to .yaml

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../devicetree/bindings/net/wireless/ieee80211.txt | 24 ------------
 .../bindings/net/wireless/ieee80211.yaml           | 45 ++++++++++++++++++++++
 2 files changed, 45 insertions(+), 24 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/net/wireless/ieee80211.txt
 create mode 100644 Documentation/devicetree/bindings/net/wireless/ieee80211.yaml

diff --git a/Documentation/devicetree/bindings/net/wireless/ieee80211.txt b/Documentation/devicetree/bindings/net/wireless/ieee80211.txt
deleted file mode 100644
index f6442b1397f5..000000000000
--- a/Documentation/devicetree/bindings/net/wireless/ieee80211.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-Common IEEE 802.11 properties
-
-This provides documentation of common properties that are valid for all wireless
-devices.
-
-Optional properties:
- - ieee80211-freq-limit : list of supported frequency ranges in KHz. This can be
-	used for devices that in a given config support less channels than
-	normally. It may happen chipset supports a wide wireless band but it is
-	limited to some part of it due to used antennas or power amplifier.
-	An example case for this can be tri-band wireless router with two
-	identical chipsets used for two different 5 GHz subbands. Using them
-	incorrectly could not work or decrease performance noticeably.
-
-Example:
-
-pcie@0,0 {
-	reg = <0x0000 0 0 0 0>;
-	wifi@0,0 {
-		reg = <0x0000 0 0 0 0>;
-		ieee80211-freq-limit = <2402000 2482000>,
-				       <5170000 5250000>;
-	};
-};
diff --git a/Documentation/devicetree/bindings/net/wireless/ieee80211.yaml b/Documentation/devicetree/bindings/net/wireless/ieee80211.yaml
new file mode 100644
index 000000000000..d58e1571df9b
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/wireless/ieee80211.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
+
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/wireless/ieee80211.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Common IEEE 802.11 Binding
+
+maintainers:
+  - Lorenzo Bianconi <lorenzo@kernel.org>
+
+description: |
+  This provides documentation of common properties that are valid for
+  all wireless devices
+
+properties:
+  ieee80211-freq-limit:
+    $ref: /schemas/types.yaml#/definitions/uint32-matrix
+    items:
+      minItems: 2
+      maxItems: 2
+    description:
+      List of supported frequency ranges in KHz. This can be used for devices
+      that in a given config support less channels than normally. It may happen
+      chipset supports a wide wireless band but it is limited to some part of
+      it due to used antennas or power amplifier. An example case for this
+      can be tri-band wireless router with two identical chipsets used for two
+      different 5 GHz subbands. Using them incorrectly could not work or
+      decrease performance noticeably
+
+additionalProperties: true
+
+examples:
+  - |
+    pcie0 {
+      #address-cells = <3>;
+      #size-cells = <2>;
+      wifi@0,0 {
+        reg = <0x0000 0 0 0 0>;
+        ieee80211-freq-limit = <2402000 2482000>,
+                               <5170000 5250000>;
+      };
+    };
-- 
cgit v1.2.3


From 1735e44dddc04e27860b6fc5099a64e4866115b7 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 28 Mar 2021 19:37:48 +0200
Subject: dt-bindings:net:wireless:mediatek,mt76: txt to yaml conversion

Convert mt76 dts bindings from .txt to .yaml

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../bindings/net/wireless/mediatek,mt76.txt        |  78 -------------
 .../bindings/net/wireless/mediatek,mt76.yaml       | 121 +++++++++++++++++++++
 2 files changed, 121 insertions(+), 78 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt
 create mode 100644 Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml

diff --git a/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt b/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt
deleted file mode 100644
index ab7e7a00e534..000000000000
--- a/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt
+++ /dev/null
@@ -1,78 +0,0 @@
-* MediaTek mt76xx devices
-
-This node provides properties for configuring the MediaTek mt76xx wireless
-device. The node is expected to be specified as a child node of the PCI
-controller to which the wireless chip is connected.
-
-Alternatively, it can specify the wireless part of the MT7628/MT7688 or
-MT7622 SoC. For SoC, use the following compatible strings:
-
-compatible:
-- "mediatek,mt7628-wmac" for MT7628/MT7688
-- "mediatek,mt7622-wmac" for MT7622
-
-properties:
-- reg: Address and length of the register set for the device.
-- interrupts: Main device interrupt
-
-MT7622 specific properties:
-- power-domains: phandle to the power domain that the WMAC is part of
-- mediatek,infracfg: phandle to the infrastructure bus fabric syscon node
-
-Optional properties:
-
-- ieee80211-freq-limit: See ieee80211.txt
-- mediatek,mtd-eeprom: Specify a MTD partition + offset containing EEPROM data
-- big-endian: if the radio eeprom partition is written in big-endian, specify
-  this property
-- mediatek,eeprom-merge-otp: Merge EEPROM data with OTP data. Can be used on
-  boards where the flash calibration data is generic and specific calibration
-  data should be pulled from the OTP ROM
-
-The MAC address can as well be set with corresponding optional properties
-defined in net/ethernet.txt.
-
-Optional nodes:
-- led: Properties for a connected LED
-  Optional properties:
-    - led-sources: See Documentation/devicetree/bindings/leds/common.txt
-
-&pcie {
-	pcie0 {
-		wifi@0,0 {
-			compatible = "mediatek,mt76";
-			reg = <0x0000 0 0 0 0>;
-			ieee80211-freq-limit = <5000000 6000000>;
-			mediatek,mtd-eeprom = <&factory 0x8000>;
-			big-endian;
-
-			led {
-				led-sources = <2>;
-			};
-		};
-	};
-};
-
-MT7628 example:
-
-wmac: wmac@10300000 {
-	compatible = "mediatek,mt7628-wmac";
-	reg = <0x10300000 0x100000>;
-
-	interrupt-parent = <&cpuintc>;
-	interrupts = <6>;
-
-	mediatek,mtd-eeprom = <&factory 0x0000>;
-};
-
-MT7622 example:
-
-wmac: wmac@18000000 {
-	compatible = "mediatek,mt7622-wmac";
-	reg = <0 0x18000000 0 0x100000>;
-	interrupts = <GIC_SPI 211 IRQ_TYPE_LEVEL_LOW>;
-
-	mediatek,infracfg = <&infracfg>;
-
-	power-domains = <&scpsys MT7622_POWER_DOMAIN_WB>;
-};
diff --git a/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml b/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml
new file mode 100644
index 000000000000..d6f835d17d66
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml
@@ -0,0 +1,121 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
+
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/wireless/mediatek,mt76.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek mt76 wireless devices Generic Binding
+
+maintainers:
+  - Felix Fietkau <nbd@nbd.name>
+  - Lorenzo Bianconi <lorenzo@kernel.org>
+  - Ryder Lee <ryder.lee@mediatek.com>
+
+description: |
+  This node provides properties for configuring the MediaTek mt76xx
+  wireless device. The node is expected to be specified as a child
+  node of the PCI controller to which the wireless chip is connected.
+  Alternatively, it can specify the wireless part of the MT7628/MT7688
+  or MT7622 SoC.
+
+allOf:
+  - $ref: ieee80211.yaml#
+
+properties:
+  compatible:
+    enum:
+      - mediatek,mt76
+      - mediatek,mt7628-wmac
+      - mediatek,mt7622-wmac
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  power-domains:
+    maxItems: 1
+
+  mediatek,infracfg:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      Phandle to the infrastructure bus fabric syscon node.
+      This property is MT7622 specific
+
+  ieee80211-freq-limit: true
+
+  mediatek,mtd-eeprom:
+    $ref: /schemas/types.yaml#/definitions/phandle-array
+    description:
+      Phandle to a MTD partition + offset containing EEPROM data
+
+  big-endian:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      Specify if the radio eeprom partition is written in big-endian
+
+  mediatek,eeprom-merge-otp:
+    type: boolean
+    description:
+      Merge EEPROM data with OTP data. Can be used on boards where the flash
+      calibration data is generic and specific calibration data should be
+      pulled from the OTP ROM
+
+  led:
+    type: object
+    $ref: /schemas/leds/common.yaml#
+    additionalProperties: false
+    properties:
+      led-sources:
+        maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    pcie0 {
+      #address-cells = <3>;
+      #size-cells = <2>;
+      wifi@0,0 {
+        compatible = "mediatek,mt76";
+        reg = <0x0000 0 0 0 0>;
+        ieee80211-freq-limit = <5000000 6000000>;
+        mediatek,mtd-eeprom = <&factory 0x8000>;
+        big-endian;
+
+        led {
+          led-sources = <2>;
+        };
+      };
+    };
+
+  - |
+    wifi@10300000 {
+      compatible = "mediatek,mt7628-wmac";
+      reg = <0x10300000 0x100000>;
+
+      interrupt-parent = <&cpuintc>;
+      interrupts = <6>;
+
+      mediatek,mtd-eeprom = <&factory 0x0>;
+    };
+
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+    wifi@18000000 {
+      compatible = "mediatek,mt7622-wmac";
+      reg = <0x10300000 0x100000>;
+      interrupts = <GIC_SPI 211 IRQ_TYPE_LEVEL_LOW>;
+
+      mediatek,infracfg = <&infracfg>;
+
+      power-domains = <&scpsys 3>;
+    };
-- 
cgit v1.2.3


From 2df51a2b7a256b242a91450325c143faae13f234 Mon Sep 17 00:00:00 2001
From: Guobin Huang <huangguobin4@huawei.com>
Date: Sat, 27 Mar 2021 17:56:18 +0800
Subject: mt76: mt7615: remove redundant dev_err call in mt7622_wmac_probe()

There is a error message within devm_ioremap_resource
already, so remove the dev_err call to avoid redundant
error message.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Guobin Huang <huangguobin4@huawei.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/soc.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/soc.c b/drivers/net/wireless/mediatek/mt76/mt7615/soc.c
index 9aa5183c7a56..be9a69fe1b38 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/soc.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/soc.c
@@ -40,10 +40,8 @@ static int mt7622_wmac_probe(struct platform_device *pdev)
 		return irq;
 
 	mem_base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(mem_base)) {
-		dev_err(&pdev->dev, "Failed to get memory resource\n");
+	if (IS_ERR(mem_base))
 		return PTR_ERR(mem_base);
-	}
 
 	return mt7615_mmio_probe(&pdev->dev, mem_base, irq, mt7615e_reg_map);
 }
-- 
cgit v1.2.3


From a226ccd04c479ccd23d6927c64bad1b441707f70 Mon Sep 17 00:00:00 2001
From: Shayne Chen <shayne.chen@mediatek.com>
Date: Thu, 1 Apr 2021 10:31:29 +0800
Subject: mt76: mt7915: fix txpower init for TSSI off chips

Fix incorrect txpower init value for TSSI off chips which causes
too small txpower.

Signed-off-by: Shayne Chen <shayne.chen@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c
index 660398ac53c2..738ecf8f4fa2 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c
@@ -124,7 +124,7 @@ int mt7915_eeprom_get_target_power(struct mt7915_dev *dev,
 				   struct ieee80211_channel *chan,
 				   u8 chain_idx)
 {
-	int index;
+	int index, target_power;
 	bool tssi_on;
 
 	if (chain_idx > 3)
@@ -133,15 +133,22 @@ int mt7915_eeprom_get_target_power(struct mt7915_dev *dev,
 	tssi_on = mt7915_tssi_enabled(dev, chan->band);
 
 	if (chan->band == NL80211_BAND_2GHZ) {
-		index = MT_EE_TX0_POWER_2G + chain_idx * 3 + !tssi_on;
+		index = MT_EE_TX0_POWER_2G + chain_idx * 3;
+		target_power = mt7915_eeprom_read(dev, index);
+
+		if (!tssi_on)
+			target_power += mt7915_eeprom_read(dev, index + 1);
 	} else {
-		int group = tssi_on ?
-			    mt7915_get_channel_group(chan->hw_value) : 8;
+		int group = mt7915_get_channel_group(chan->hw_value);
+
+		index = MT_EE_TX0_POWER_5G + chain_idx * 12;
+		target_power = mt7915_eeprom_read(dev, index + group);
 
-		index = MT_EE_TX0_POWER_5G + chain_idx * 12 + group;
+		if (!tssi_on)
+			target_power += mt7915_eeprom_read(dev, index + 8);
 	}
 
-	return mt7915_eeprom_read(dev, index);
+	return target_power;
 }
 
 static const u8 sku_cck_delta_map[] = {
-- 
cgit v1.2.3


From 60468f7fd7072c804b2613f1cadabace8d77d311 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 27 Mar 2021 10:10:14 +0100
Subject: mt76: mt7921: fix key set/delete issue

Similar to the mt7915 driver, deleting a key with the previous key index
deletes the current key. Rework the code to better keep track of
multiple keys and check for the key index before deleting the current
key

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/main.c | 25 ++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 7b4f6a70f87a..0be00389c7ed 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -412,7 +412,8 @@ static int mt7921_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	struct mt7921_sta *msta = sta ? (struct mt7921_sta *)sta->drv_priv :
 				  &mvif->sta;
 	struct mt76_wcid *wcid = &msta->wcid;
-	int idx = key->keyidx;
+	u8 *wcid_keyidx = &wcid->hw_key_idx;
+	int idx = key->keyidx, err = 0;
 
 	/* The hardware does not support per-STA RX GTK, fallback
 	 * to software mode for these.
@@ -428,6 +429,7 @@ static int mt7921_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	switch (key->cipher) {
 	case WLAN_CIPHER_SUITE_AES_CMAC:
 		key->flags |= IEEE80211_KEY_FLAG_GENERATE_MMIE;
+		wcid_keyidx = &wcid->hw_key_idx2;
 		break;
 	case WLAN_CIPHER_SUITE_TKIP:
 	case WLAN_CIPHER_SUITE_CCMP:
@@ -442,16 +444,23 @@ static int mt7921_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 		return -EOPNOTSUPP;
 	}
 
-	if (cmd == SET_KEY) {
-		key->hw_key_idx = wcid->idx;
-		wcid->hw_key_idx = idx;
-	} else if (idx == wcid->hw_key_idx) {
-		wcid->hw_key_idx = -1;
-	}
+	mt7921_mutex_acquire(dev);
+
+	if (cmd == SET_KEY)
+		*wcid_keyidx = idx;
+	else if (idx == *wcid_keyidx)
+		*wcid_keyidx = -1;
+	else
+		goto out;
+
 	mt76_wcid_key_setup(&dev->mt76, wcid,
 			    cmd == SET_KEY ? key : NULL);
 
-	return mt7921_mcu_add_key(dev, vif, msta, key, cmd);
+	err = mt7921_mcu_add_key(dev, vif, msta, key, cmd);
+out:
+	mt7921_mutex_release(dev);
+
+	return err;
 }
 
 static int mt7921_config(struct ieee80211_hw *hw, u32 changed)
-- 
cgit v1.2.3


From 859c85fd19715349ce01539459095fd5fc7e483a Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 3 Apr 2021 14:26:39 +0200
Subject: mt76: mt7921: always wake the device in mt7921_remove_interface

Make sure the mcu is not in sleep mode before sending mcu messages in
mt7921_remove_interface routine.

Fixes: 1d8efc741df80 ("mt76: mt7921: introduce Runtime PM support")
Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Co-developed-by: Leon Yen <leon.yen@mediatek.com>
Signed-off-by: Leon Yen <leon.yen@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 0be00389c7ed..06a47cab494e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -354,12 +354,12 @@ static void mt7921_remove_interface(struct ieee80211_hw *hw,
 	if (vif == phy->monitor_vif)
 		phy->monitor_vif = NULL;
 
+	mt7921_mutex_acquire(dev);
 	mt76_connac_free_pending_tx_skbs(&dev->pm, &msta->wcid);
 	mt76_connac_mcu_uni_add_dev(&dev->mphy, vif, &mvif->sta.wcid, false);
 
 	rcu_assign_pointer(dev->mt76.wcid[idx], NULL);
 
-	mt7921_mutex_acquire(dev);
 	dev->mt76.vif_mask &= ~BIT(mvif->mt76.idx);
 	phy->omac_mask &= ~BIT_ULL(mvif->mt76.omac_idx);
 	mt7921_mutex_release(dev);
-- 
cgit v1.2.3


From c7cc5ec5730321ea6c86f0f87cb562fc3827b0d3 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 3 Apr 2021 14:35:12 +0200
Subject: mt76: mt7921: rework mt7921_mcu_debug_msg_event routine

Rework mt7921_mcu_debug_msg_event routing removing unnecessary
assignments and relying on wiphy_info

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.c | 26 ++++++++++++-------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index 98b48b26a6fe..9283f9865ad5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -474,29 +474,27 @@ mt7921_mcu_bss_event(struct mt7921_dev *dev, struct sk_buff *skb)
 static void
 mt7921_mcu_debug_msg_event(struct mt7921_dev *dev, struct sk_buff *skb)
 {
-	struct mt7921_mcu_rxd *rxd = (struct mt7921_mcu_rxd *)skb->data;
-	struct debug_msg {
+	struct mt7921_debug_msg {
 		__le16 id;
 		u8 type;
 		u8 flag;
 		__le32 value;
 		__le16 len;
 		u8 content[512];
-	} __packed * debug_msg;
-	u16 cur_len;
-	int i;
-
-	skb_pull(skb, sizeof(*rxd));
-	debug_msg = (struct debug_msg *)skb->data;
+	} __packed * msg;
 
-	cur_len = min_t(u16, le16_to_cpu(debug_msg->len), 512);
+	skb_pull(skb, sizeof(struct mt7921_mcu_rxd));
+	msg = (struct mt7921_debug_msg *)skb->data;
 
-	if (debug_msg->type == 0x3) {
-		for (i = 0 ; i < cur_len; i++)
-			if (!debug_msg->content[i])
-				debug_msg->content[i] = ' ';
+	if (msg->type == 3) { /* fw log */
+		u16 len = min_t(u16, le16_to_cpu(msg->len), 512);
+		int i;
 
-		dev_dbg(dev->mt76.dev, "%s", debug_msg->content);
+		for (i = 0 ; i < len; i++) {
+			if (!msg->content[i])
+				msg->content[i] = ' ';
+		}
+		wiphy_info(mt76_hw(dev)->wiphy, "%*s", len, msg->content);
 	}
 }
 
-- 
cgit v1.2.3


From 8a5a5dbfc18c667832e1f73b436dc324659fbbf8 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sun, 4 Apr 2021 15:48:23 +0800
Subject: mt76: mt7615: fix .add_beacon_offload()

ieee80211_beacon_get_template() returns NULL when beacon state is disabled,
so beacon_offload cannot be disabled for some devices.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 600c5366c099..6ca9e9840399 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -702,6 +702,9 @@ mt7615_mcu_add_beacon_offload(struct mt7615_dev *dev,
 	};
 	struct sk_buff *skb;
 
+	if (!enable)
+		goto out;
+
 	skb = ieee80211_beacon_get_template(hw, vif, &offs);
 	if (!skb)
 		return -EINVAL;
@@ -731,6 +734,7 @@ mt7615_mcu_add_beacon_offload(struct mt7615_dev *dev,
 	}
 	dev_kfree_skb(skb);
 
+out:
 	return mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_BCN_OFFLOAD, &req,
 				 sizeof(req), true);
 }
@@ -1164,8 +1168,8 @@ mt7615_mcu_uni_add_beacon_offload(struct mt7615_dev *dev,
 			__le16 tim_ie_pos;
 			__le16 csa_ie_pos;
 			__le16 bcc_ie_pos;
-			/* 0: enable beacon offload
-			 * 1: disable beacon offload
+			/* 0: disable beacon offload
+			 * 1: enable beacon offload
 			 * 2: update probe respond offload
 			 */
 			u8 enable;
@@ -1188,6 +1192,9 @@ mt7615_mcu_uni_add_beacon_offload(struct mt7615_dev *dev,
 	};
 	struct sk_buff *skb;
 
+	if (!enable)
+		goto out;
+
 	skb = ieee80211_beacon_get_template(mt76_hw(dev), vif, &offs);
 	if (!skb)
 		return -EINVAL;
@@ -1212,6 +1219,7 @@ mt7615_mcu_uni_add_beacon_offload(struct mt7615_dev *dev,
 	}
 	dev_kfree_skb(skb);
 
+out:
 	return mt76_mcu_send_msg(&dev->mt76, MCU_UNI_CMD_BSS_INFO_UPDATE,
 				 &req, sizeof(req), true);
 }
-- 
cgit v1.2.3


From ac15f9b6da3957b360c356a3a6bb74f5727e433e Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sun, 4 Apr 2021 15:48:24 +0800
Subject: mt76: mt7915: fix mt7915_mcu_add_beacon

ieee80211_beacon_get_template() returns NULL when beacon state is disabled.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 67b1426e98b6..3507fd135dfe 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -2451,6 +2451,17 @@ int mt7915_mcu_add_beacon(struct ieee80211_hw *hw,
 	struct bss_info_bcn *bcn;
 	int len = MT7915_BEACON_UPDATE_SIZE + MAX_BEACON_SIZE;
 
+	rskb = mt7915_mcu_alloc_sta_req(dev, mvif, NULL, len);
+	if (IS_ERR(rskb))
+		return PTR_ERR(rskb);
+
+	tlv = mt7915_mcu_add_tlv(rskb, BSS_INFO_OFFLOAD, sizeof(*bcn));
+	bcn = (struct bss_info_bcn *)tlv;
+	bcn->enable = en;
+
+	if (!en)
+		goto out;
+
 	skb = ieee80211_beacon_get_template(hw, vif, &offs);
 	if (!skb)
 		return -EINVAL;
@@ -2461,16 +2472,6 @@ int mt7915_mcu_add_beacon(struct ieee80211_hw *hw,
 		return -EINVAL;
 	}
 
-	rskb = mt7915_mcu_alloc_sta_req(dev, mvif, NULL, len);
-	if (IS_ERR(rskb)) {
-		dev_kfree_skb(skb);
-		return PTR_ERR(rskb);
-	}
-
-	tlv = mt7915_mcu_add_tlv(rskb, BSS_INFO_OFFLOAD, sizeof(*bcn));
-	bcn = (struct bss_info_bcn *)tlv;
-	bcn->enable = en;
-
 	if (mvif->band_idx) {
 		info = IEEE80211_SKB_CB(skb);
 		info->hw_queue |= MT_TX_HW_QUEUE_EXT_PHY;
@@ -2481,6 +2482,7 @@ int mt7915_mcu_add_beacon(struct ieee80211_hw *hw,
 	mt7915_mcu_beacon_cont(dev, rskb, skb, bcn, &offs);
 	dev_kfree_skb(skb);
 
+out:
 	return mt76_mcu_skb_send_msg(&phy->dev->mt76, rskb,
 				     MCU_EXT_CMD(BSS_INFO_UPDATE), true);
 }
-- 
cgit v1.2.3


From e07419a7dca97dd9bddfe5d099380857c19535f3 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Tue, 6 Apr 2021 12:46:09 +0800
Subject: mt76: mt7915: add wifi subsystem reset

Reset wifi subsystem when MCU is already running.
Fixes firmware download failure after soft reboot on systems where the PCIe
reset could not be performed properly.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Co-developed-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/init.c | 58 +++++++++++++++++++++++-
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c  | 15 +-----
 drivers/net/wireless/mediatek/mt76/mt7915/regs.h | 13 ++++++
 3 files changed, 70 insertions(+), 16 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index 2ffadd22c259..261f5ab23721 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -4,6 +4,7 @@
 #include <linux/etherdevice.h>
 #include "mt7915.h"
 #include "mac.h"
+#include "mcu.h"
 #include "eeprom.h"
 
 #define CCK_RATE(_idx, _rate) {						\
@@ -295,9 +296,50 @@ static void mt7915_init_work(struct work_struct *work)
 	mt7915_txbf_init(dev);
 }
 
+static void mt7915_wfsys_reset(struct mt7915_dev *dev)
+{
+	u32 val = MT_TOP_PWR_KEY | MT_TOP_PWR_SW_PWR_ON | MT_TOP_PWR_PWR_ON;
+	u32 reg = mt7915_reg_map_l1(dev, MT_TOP_MISC);
+
+#define MT_MCU_DUMMY_RANDOM	GENMASK(15, 0)
+#define MT_MCU_DUMMY_DEFAULT	GENMASK(31, 16)
+
+	mt76_wr(dev, MT_MCU_WFDMA0_DUMMY_CR, MT_MCU_DUMMY_RANDOM);
+
+	/* change to software control */
+	val |= MT_TOP_PWR_SW_RST;
+	mt76_wr(dev, MT_TOP_PWR_CTRL, val);
+
+	/* reset wfsys */
+	val &= ~MT_TOP_PWR_SW_RST;
+	mt76_wr(dev, MT_TOP_PWR_CTRL, val);
+
+	/* release wfsys then mcu re-excutes romcode */
+	val |= MT_TOP_PWR_SW_RST;
+	mt76_wr(dev, MT_TOP_PWR_CTRL, val);
+
+	/* switch to hw control */
+	val &= ~MT_TOP_PWR_SW_RST;
+	val |= MT_TOP_PWR_HW_CTRL;
+	mt76_wr(dev, MT_TOP_PWR_CTRL, val);
+
+	/* check whether mcu resets to default */
+	if (!mt76_poll_msec(dev, MT_MCU_WFDMA0_DUMMY_CR, MT_MCU_DUMMY_DEFAULT,
+			    MT_MCU_DUMMY_DEFAULT, 1000)) {
+		dev_err(dev->mt76.dev, "wifi subsystem reset failure\n");
+		return;
+	}
+
+	/* wfsys reset won't clear host registers */
+	mt76_clear(dev, reg, MT_TOP_MISC_FW_STATE);
+
+	msleep(100);
+}
+
 static int mt7915_init_hardware(struct mt7915_dev *dev)
 {
 	int ret, idx;
+	u32 val;
 
 	mt76_wr(dev, MT_INT_SOURCE_CSR, ~0);
 
@@ -307,6 +349,12 @@ static int mt7915_init_hardware(struct mt7915_dev *dev)
 
 	dev->dbdc_support = !!(mt7915_l1_rr(dev, MT_HW_BOUND) & BIT(5));
 
+	val = mt76_rr(dev, mt7915_reg_map_l1(dev, MT_TOP_MISC));
+
+	/* If MCU was already running, it is likely in a bad state */
+	if (FIELD_GET(MT_TOP_MISC_FW_STATE, val) > FW_STATE_FW_DOWNLOAD)
+		mt7915_wfsys_reset(dev);
+
 	ret = mt7915_dma_init(dev);
 	if (ret)
 		return ret;
@@ -320,8 +368,14 @@ static int mt7915_init_hardware(struct mt7915_dev *dev)
 	mt76_wr(dev, MT_SWDEF_MODE, MT_SWDEF_NORMAL_MODE);
 
 	ret = mt7915_mcu_init(dev);
-	if (ret)
-		return ret;
+	if (ret) {
+		/* Reset and try again */
+		mt7915_wfsys_reset(dev);
+
+		ret = mt7915_mcu_init(dev);
+		if (ret)
+			return ret;
+	}
 
 	ret = mt7915_eeprom_init(dev);
 	if (ret < 0)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 3507fd135dfe..639f65d63add 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -2784,21 +2784,8 @@ out:
 
 static int mt7915_load_firmware(struct mt7915_dev *dev)
 {
+	u32 reg = mt7915_reg_map_l1(dev, MT_TOP_MISC);
 	int ret;
-	u32 val, reg = mt7915_reg_map_l1(dev, MT_TOP_MISC);
-
-	val = FIELD_PREP(MT_TOP_MISC_FW_STATE, FW_STATE_FW_DOWNLOAD);
-
-	if (!mt76_poll_msec(dev, reg, MT_TOP_MISC_FW_STATE, val, 1000)) {
-		/* restart firmware once */
-		__mt76_mcu_restart(&dev->mt76);
-		if (!mt76_poll_msec(dev, reg, MT_TOP_MISC_FW_STATE,
-				    val, 1000)) {
-			dev_err(dev->mt76.dev,
-				"Firmware is not ready for download\n");
-			return -EIO;
-		}
-	}
 
 	ret = mt7915_load_patch(dev);
 	if (ret)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/regs.h b/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
index ed0c9a24bb53..dfb8880657bf 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
@@ -4,6 +4,11 @@
 #ifndef __MT7915_REGS_H
 #define __MT7915_REGS_H
 
+/* MCU WFDMA0 */
+#define MT_MCU_WFDMA0_BASE		0x2000
+#define MT_MCU_WFDMA0(ofs)		(MT_MCU_WFDMA0_BASE + (ofs))
+#define MT_MCU_WFDMA0_DUMMY_CR		MT_MCU_WFDMA0(0x120)
+
 /* MCU WFDMA1 */
 #define MT_MCU_WFDMA1_BASE		0x3000
 #define MT_MCU_WFDMA1(ofs)		(MT_MCU_WFDMA1_BASE + (ofs))
@@ -396,6 +401,14 @@
 #define MT_WFDMA1_PCIE1_BUSY_ENA_TX_FIFO1	BIT(1)
 #define MT_WFDMA1_PCIE1_BUSY_ENA_RX_FIFO	BIT(2)
 
+#define MT_TOP_RGU_BASE				0xf0000
+#define MT_TOP_PWR_CTRL				(MT_TOP_RGU_BASE + (0x0))
+#define MT_TOP_PWR_KEY				(0x5746 << 16)
+#define MT_TOP_PWR_SW_RST			BIT(0)
+#define MT_TOP_PWR_SW_PWR_ON			GENMASK(3, 2)
+#define MT_TOP_PWR_HW_CTRL			BIT(4)
+#define MT_TOP_PWR_PWR_ON			BIT(7)
+
 #define MT_INFRA_CFG_BASE		0xf1000
 #define MT_INFRA(ofs)			(MT_INFRA_CFG_BASE + (ofs))
 
-- 
cgit v1.2.3


From f2d167c7dd2c0bc444e2d5639bcf856d0ea09a71 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 4 Apr 2021 13:36:57 +0200
Subject: mt76: mt7921: introduce MT_WFDMA_DUMMY_CR definition

Introduce MT_WFDMA_DUMMY_CR definition and remove magic numbers

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/dma.c  | 2 +-
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c  | 2 +-
 drivers/net/wireless/mediatek/mt76/mt7921/regs.h | 6 ++++++
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
index 60de29a921a8..992faf82ad09 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
@@ -323,7 +323,7 @@ int mt7921_dma_init(struct mt7921_dev *dev)
 	mt76_set(dev, MT_WFDMA0_GLO_CFG,
 		 MT_WFDMA0_GLO_CFG_TX_DMA_EN | MT_WFDMA0_GLO_CFG_RX_DMA_EN);
 
-	mt76_set(dev, 0x54000120, BIT(1));
+	mt76_set(dev, MT_WFDMA_DUMMY_CR, MT_WFDMA_NEED_REINIT);
 
 	/* enable interrupts for TX/RX rings */
 	mt7921_irq_enable(dev, MT_INT_RX_DONE_ALL | MT_INT_TX_DONE_ALL |
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index ee6482124c0a..f3a537c6e3c8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1272,7 +1272,7 @@ mt7921_dma_reset(struct mt7921_dev *dev)
 	mt76_set(dev, MT_WFDMA0_GLO_CFG,
 		 MT_WFDMA0_GLO_CFG_TX_DMA_EN | MT_WFDMA0_GLO_CFG_RX_DMA_EN);
 
-	mt76_set(dev, 0x54000120, BIT(1));
+	mt76_set(dev, MT_WFDMA_DUMMY_CR, MT_WFDMA_NEED_REINIT);
 
 	/* enable interrupts for TX/RX rings */
 	mt7921_irq_enable(dev,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/regs.h b/drivers/net/wireless/mediatek/mt76/mt7921/regs.h
index 035fa472fa7d..5b1bb5f85e5d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/regs.h
@@ -385,6 +385,12 @@
 #define MT_TOP_MISC			MT_TOP(0xf0)
 #define MT_TOP_MISC_FW_STATE		GENMASK(2, 0)
 
+#define MT_MCU_WPDMA0_BASE		0x54000000
+#define MT_MCU_WPDMA0(ofs)		(MT_MCU_WPDMA0_BASE + (ofs))
+
+#define MT_WFDMA_DUMMY_CR		MT_MCU_WPDMA0(0x120)
+#define MT_WFDMA_NEED_REINIT		BIT(1)
+
 #define MT_HW_BOUND			0x70010020
 #define MT_HW_CHIPID			0x70010200
 #define MT_HW_REV			0x70010204
-- 
cgit v1.2.3


From 9c9d83213424679b087267600d53a35acfa0201f Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Tue, 6 Apr 2021 11:34:34 +0800
Subject: mt76: mt7921: fix inappropriate WoW setup with the missing ARP
 informaiton

Fix the Wake-on-WoWLAN failure should rely on ARP Information is being
updated in time to the firmware.

Fixes: ffa1bf97425b ("mt76: mt7921: introduce PM support")
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/main.c   |  3 ++
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.c    | 44 ++++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h |  3 ++
 3 files changed, 50 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 06a47cab494e..32b1cd0bca1f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -626,6 +626,9 @@ static void mt7921_bss_info_changed(struct ieee80211_hw *hw,
 	if (changed & BSS_CHANGED_ASSOC)
 		mt7921_bss_bcnft_apply(dev, vif, info->assoc);
 
+	if (changed & BSS_CHANGED_ARP_FILTER)
+		mt7921_mcu_update_arp_filter(hw, vif, info);
+
 	mt7921_mutex_release(dev);
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index 9283f9865ad5..353877f1c762 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -1325,3 +1325,47 @@ mt7921_pm_interface_iter(void *priv, u8 *mac, struct ieee80211_vif *vif)
 		mt76_clear(dev, MT_WF_RFCR(0), MT_WF_RFCR_DROP_OTHER_BEACON);
 	}
 }
+
+int mt7921_mcu_update_arp_filter(struct ieee80211_hw *hw,
+				 struct ieee80211_vif *vif,
+				 struct ieee80211_bss_conf *info)
+{
+	struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv;
+	struct mt7921_dev *dev = mt7921_hw_dev(hw);
+	struct sk_buff *skb;
+	int i, len = min_t(int, info->arp_addr_cnt,
+			   IEEE80211_BSS_ARP_ADDR_LIST_LEN);
+	struct {
+		struct {
+			u8 bss_idx;
+			u8 pad[3];
+		} __packed hdr;
+		struct mt76_connac_arpns_tlv arp;
+	} req_hdr = {
+		.hdr = {
+			.bss_idx = mvif->mt76.idx,
+		},
+		.arp = {
+			.tag = cpu_to_le16(UNI_OFFLOAD_OFFLOAD_ARP),
+			.len = cpu_to_le16(sizeof(struct mt76_connac_arpns_tlv)),
+			.ips_num = len,
+			.mode = 2,  /* update */
+			.option = 1,
+		},
+	};
+
+	skb = mt76_mcu_msg_alloc(&dev->mt76, NULL,
+				 sizeof(req_hdr) + len * sizeof(__be32));
+	if (!skb)
+		return -ENOMEM;
+
+	skb_put_data(skb, &req_hdr, sizeof(req_hdr));
+	for (i = 0; i < len; i++) {
+		u8 *addr = (u8 *)skb_put(skb, sizeof(__be32));
+
+		memcpy(addr, &info->arp_addr_list[i], sizeof(__be32));
+	}
+
+	return mt76_mcu_skb_send_msg(&dev->mt76, skb, MCU_UNI_CMD_OFFLOAD,
+				     true);
+}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index 5cedefc41416..ebe51017dd55 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -348,4 +348,7 @@ int mt7921_mac_set_beacon_filter(struct mt7921_phy *phy,
 				 bool enable);
 void mt7921_pm_interface_iter(void *priv, u8 *mac, struct ieee80211_vif *vif);
 void mt7921_coredump_work(struct work_struct *work);
+int mt7921_mcu_update_arp_filter(struct ieee80211_hw *hw,
+				 struct ieee80211_vif *vif,
+				 struct ieee80211_bss_conf *info);
 #endif
-- 
cgit v1.2.3


From 9db419f0cb39a63fb2f645a846cae17b81cd5c96 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Tue, 6 Apr 2021 11:34:35 +0800
Subject: mt76: mt7921: fix the dwell time control

dwell time for the scan is not configurable according to the current
firmware submitted into linux-firmware.git, so leave the dwell time 0 to
indicate the dwell time always determined by the firmware.

Fixes: 399090ef9605 ("mt76: mt76_connac: move hw_scan and sched_scan routine in mt76_connac_mcu module")
Suggested-by: Soul Huang <Soul.Huang@mediatek.com>
Co-developed-by: YN Chen <YN.Chen@mediatek.com>
Signed-off-by: YN Chen <YN.Chen@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index bafe95210807..1bd6294042b6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -1332,7 +1332,7 @@ int mt76_connac_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif,
 {
 	struct mt76_vif *mvif = (struct mt76_vif *)vif->drv_priv;
 	struct cfg80211_scan_request *sreq = &scan_req->req;
-	int n_ssids = 0, err, i, duration = MT76_CONNAC_SCAN_CHANNEL_TIME;
+	int n_ssids = 0, err, i, duration;
 	int ext_channels_num = max_t(int, sreq->n_channels - 32, 0);
 	struct ieee80211_channel **scan_list = sreq->channels;
 	struct mt76_dev *mdev = phy->dev;
@@ -1369,6 +1369,7 @@ int mt76_connac_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif,
 	req->ssid_type_ext = n_ssids ? BIT(0) : 0;
 	req->ssids_num = n_ssids;
 
+	duration = is_mt7921(phy->dev) ? 0 : MT76_CONNAC_SCAN_CHANNEL_TIME;
 	/* increase channel time for passive scan */
 	if (!sreq->n_ssids)
 		duration *= 2;
-- 
cgit v1.2.3


From c781ff12a2f37a9795e13bf328e5053d3e69f9e0 Mon Sep 17 00:00:00 2001
From: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
Date: Fri, 9 Apr 2021 11:06:34 +0300
Subject: ethtool: Allow network drivers to dump arbitrary EEPROM data

Define get_module_eeprom_by_page() ethtool callback and implement
netlink infrastructure.

get_module_eeprom_by_page() allows network drivers to dump a part of
module's EEPROM specified by page and bank numbers along with offset and
length. It is effectively a netlink replacement for get_module_info()
and get_module_eeprom() pair, which is needed due to emergence of
complex non-linear EEPROM layouts.

Signed-off-by: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ethtool-netlink.rst |  36 +++++-
 include/linux/ethtool.h                      |  33 +++++-
 include/uapi/linux/ethtool_netlink.h         |  19 +++
 net/ethtool/Makefile                         |   2 +-
 net/ethtool/eeprom.c                         | 171 +++++++++++++++++++++++++++
 net/ethtool/netlink.c                        |  11 ++
 net/ethtool/netlink.h                        |   2 +
 7 files changed, 270 insertions(+), 4 deletions(-)
 create mode 100644 net/ethtool/eeprom.c

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index ce4a69f8308f..bbecffc7b11a 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -1338,6 +1338,38 @@ in an implementation specific way.
 ``ETHTOOL_A_FEC_AUTO`` requests the driver to choose FEC mode based on SFP
 module parameters. This does not mean autonegotiation.
 
+MODULE_EEPROM
+=============
+
+Fetch module EEPROM data dump.
+This interface is designed to allow dumps of at most 1/2 page at once. This
+means only dumps of 128 (or less) bytes are allowed, without crossing half page
+boundary located at offset 128. For pages other than 0 only high 128 bytes are
+accessible.
+
+Request contents:
+
+  =======================================  ======  ==========================
+  ``ETHTOOL_A_MODULE_EEPROM_HEADER``       nested  request header
+  ``ETHTOOL_A_MODULE_EEPROM_OFFSET``       u32     offset within a page
+  ``ETHTOOL_A_MODULE_EEPROM_LENGTH``       u32     amount of bytes to read
+  ``ETHTOOL_A_MODULE_EEPROM_PAGE``         u8      page number
+  ``ETHTOOL_A_MODULE_EEPROM_BANK``         u8      bank number
+  ``ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS``  u8      page I2C address
+  =======================================  ======  ==========================
+
+Kernel response contents:
+
+ +---------------------------------------------+--------+---------------------+
+ | ``ETHTOOL_A_MODULE_EEPROM_HEADER``          | nested | reply header        |
+ +---------------------------------------------+--------+---------------------+
+ | ``ETHTOOL_A_MODULE_EEPROM_DATA``            | nested | array of bytes from |
+ |                                             |        | module EEPROM       |
+ +---------------------------------------------+--------+---------------------+
+
+``ETHTOOL_A_MODULE_EEPROM_DATA`` has an attribute length equal to the amount of
+bytes driver actually read.
+
 Request translation
 ===================
 
@@ -1415,8 +1447,8 @@ are netlink only.
   ``ETHTOOL_GET_DUMP_FLAG``           n/a
   ``ETHTOOL_GET_DUMP_DATA``           n/a
   ``ETHTOOL_GET_TS_INFO``             ``ETHTOOL_MSG_TSINFO_GET``
-  ``ETHTOOL_GMODULEINFO``             n/a
-  ``ETHTOOL_GMODULEEEPROM``           n/a
+  ``ETHTOOL_GMODULEINFO``             ``ETHTOOL_MSG_MODULE_EEPROM_GET``
+  ``ETHTOOL_GMODULEEEPROM``           ``ETHTOOL_MSG_MODULE_EEPROM_GET``
   ``ETHTOOL_GEEE``                    ``ETHTOOL_MSG_EEE_GET``
   ``ETHTOOL_SEEE``                    ``ETHTOOL_MSG_EEE_SET``
   ``ETHTOOL_GRSSH``                   n/a
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 4290e2fa3117..9f6f323af59a 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -81,6 +81,7 @@ enum {
 #define ETH_RSS_HASH_NO_CHANGE	0
 
 struct net_device;
+struct netlink_ext_ack;
 
 /* Some generic methods drivers may use in their ethtool_ops */
 u32 ethtool_op_get_link(struct net_device *dev);
@@ -262,6 +263,31 @@ struct ethtool_pause_stats {
 	u64 rx_pause_frames;
 };
 
+#define ETH_MODULE_EEPROM_PAGE_LEN	128
+#define ETH_MODULE_MAX_I2C_ADDRESS	0x7f
+
+/**
+ * struct ethtool_module_eeprom - EEPROM dump from specified page
+ * @offset: Offset within the specified EEPROM page to begin read, in bytes.
+ * @length: Number of bytes to read.
+ * @page: Page number to read from.
+ * @bank: Page bank number to read from, if applicable by EEPROM spec.
+ * @i2c_address: I2C address of a page. Value less than 0x7f expected. Most
+ *	EEPROMs use 0x50 or 0x51.
+ * @data: Pointer to buffer with EEPROM data of @length size.
+ *
+ * This can be used to manage pages during EEPROM dump in ethtool and pass
+ * required information to the driver.
+ */
+struct ethtool_module_eeprom {
+	__u32	offset;
+	__u32	length;
+	__u8	page;
+	__u8	bank;
+	__u8	i2c_address;
+	__u8	*data;
+};
+
 /**
  * struct ethtool_ops - optional netdev operations
  * @cap_link_lanes_supported: indicates if the driver supports lanes
@@ -414,6 +440,9 @@ struct ethtool_pause_stats {
  *	cannot use the standard PHY library helpers.
  * @get_phy_tunable: Read the value of a PHY tunable.
  * @set_phy_tunable: Set the value of a PHY tunable.
+ * @get_module_eeprom_by_page: Get a region of plug-in module EEPROM data from
+ *	specified page. Returns a negative error code or the amount of bytes
+ *	read.
  *
  * All operations are optional (i.e. the function pointer may be set
  * to %NULL) and callers must take this into account.  Callers must
@@ -519,6 +548,9 @@ struct ethtool_ops {
 				   const struct ethtool_tunable *, void *);
 	int	(*set_phy_tunable)(struct net_device *,
 				   const struct ethtool_tunable *, const void *);
+	int	(*get_module_eeprom_by_page)(struct net_device *dev,
+					     const struct ethtool_module_eeprom *page,
+					     struct netlink_ext_ack *extack);
 };
 
 int ethtool_check_ops(const struct ethtool_ops *ops);
@@ -542,7 +574,6 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev,
 				       const struct ethtool_link_ksettings *cmd,
 				       u32 *dev_speed, u8 *dev_duplex);
 
-struct netlink_ext_ack;
 struct phy_device;
 struct phy_tdr_config;
 
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 7f1bdb5b31ba..9612dcd48a6a 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -44,6 +44,7 @@ enum {
 	ETHTOOL_MSG_TUNNEL_INFO_GET,
 	ETHTOOL_MSG_FEC_GET,
 	ETHTOOL_MSG_FEC_SET,
+	ETHTOOL_MSG_MODULE_EEPROM_GET,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_USER_CNT,
@@ -84,6 +85,7 @@ enum {
 	ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY,
 	ETHTOOL_MSG_FEC_GET_REPLY,
 	ETHTOOL_MSG_FEC_NTF,
+	ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_KERNEL_CNT,
@@ -646,6 +648,23 @@ enum {
 	ETHTOOL_A_FEC_MAX = (__ETHTOOL_A_FEC_CNT - 1)
 };
 
+/* MODULE EEPROM */
+
+enum {
+	ETHTOOL_A_MODULE_EEPROM_UNSPEC,
+	ETHTOOL_A_MODULE_EEPROM_HEADER,			/* nest - _A_HEADER_* */
+
+	ETHTOOL_A_MODULE_EEPROM_OFFSET,			/* u32 */
+	ETHTOOL_A_MODULE_EEPROM_LENGTH,			/* u32 */
+	ETHTOOL_A_MODULE_EEPROM_PAGE,			/* u8 */
+	ETHTOOL_A_MODULE_EEPROM_BANK,			/* u8 */
+	ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS,		/* u8 */
+	ETHTOOL_A_MODULE_EEPROM_DATA,			/* nested */
+
+	__ETHTOOL_A_MODULE_EEPROM_CNT,
+	ETHTOOL_A_MODULE_EEPROM_MAX = (__ETHTOOL_A_MODULE_EEPROM_CNT - 1)
+};
+
 /* generic netlink info */
 #define ETHTOOL_GENL_NAME "ethtool"
 #define ETHTOOL_GENL_VERSION 1
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
index c2dc9033a8f7..83842685fd8c 100644
--- a/net/ethtool/Makefile
+++ b/net/ethtool/Makefile
@@ -7,4 +7,4 @@ obj-$(CONFIG_ETHTOOL_NETLINK)	+= ethtool_nl.o
 ethtool_nl-y	:= netlink.o bitset.o strset.o linkinfo.o linkmodes.o \
 		   linkstate.o debug.o wol.o features.o privflags.o rings.o \
 		   channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \
-		   tunnels.o fec.o
+		   tunnels.o fec.o eeprom.o
diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c
new file mode 100644
index 000000000000..8536dd905da5
--- /dev/null
+++ b/net/ethtool/eeprom.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/ethtool.h>
+#include "netlink.h"
+#include "common.h"
+
+struct eeprom_req_info {
+	struct ethnl_req_info	base;
+	u32			offset;
+	u32			length;
+	u8			page;
+	u8			bank;
+	u8			i2c_address;
+};
+
+struct eeprom_reply_data {
+	struct ethnl_reply_data base;
+	u32			length;
+	u8			*data;
+};
+
+#define MODULE_EEPROM_REQINFO(__req_base) \
+	container_of(__req_base, struct eeprom_req_info, base)
+
+#define MODULE_EEPROM_REPDATA(__reply_base) \
+	container_of(__reply_base, struct eeprom_reply_data, base)
+
+static int eeprom_prepare_data(const struct ethnl_req_info *req_base,
+			       struct ethnl_reply_data *reply_base,
+			       struct genl_info *info)
+{
+	struct eeprom_reply_data *reply = MODULE_EEPROM_REPDATA(reply_base);
+	struct eeprom_req_info *request = MODULE_EEPROM_REQINFO(req_base);
+	struct ethtool_module_eeprom page_data = {0};
+	struct net_device *dev = reply_base->dev;
+	int ret;
+
+	if (!dev->ethtool_ops->get_module_eeprom_by_page)
+		return -EOPNOTSUPP;
+
+	page_data.offset = request->offset;
+	page_data.length = request->length;
+	page_data.i2c_address = request->i2c_address;
+	page_data.page = request->page;
+	page_data.bank = request->bank;
+	page_data.data = kmalloc(page_data.length, GFP_KERNEL);
+	if (!page_data.data)
+		return -ENOMEM;
+
+	ret = ethnl_ops_begin(dev);
+	if (ret)
+		goto err_free;
+
+	ret = dev->ethtool_ops->get_module_eeprom_by_page(dev, &page_data,
+							  info->extack);
+	if (ret < 0)
+		goto err_ops;
+
+	reply->length = ret;
+	reply->data = page_data.data;
+
+	ethnl_ops_complete(dev);
+	return 0;
+
+err_ops:
+	ethnl_ops_complete(dev);
+err_free:
+	kfree(page_data.data);
+	return ret;
+}
+
+static int eeprom_parse_request(struct ethnl_req_info *req_info, struct nlattr **tb,
+				struct netlink_ext_ack *extack)
+{
+	struct eeprom_req_info *request = MODULE_EEPROM_REQINFO(req_info);
+
+	if (!tb[ETHTOOL_A_MODULE_EEPROM_OFFSET] ||
+	    !tb[ETHTOOL_A_MODULE_EEPROM_LENGTH] ||
+	    !tb[ETHTOOL_A_MODULE_EEPROM_PAGE] ||
+	    !tb[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS])
+		return -EINVAL;
+
+	request->i2c_address = nla_get_u8(tb[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS]);
+	request->offset = nla_get_u32(tb[ETHTOOL_A_MODULE_EEPROM_OFFSET]);
+	request->length = nla_get_u32(tb[ETHTOOL_A_MODULE_EEPROM_LENGTH]);
+
+	if (!request->length)
+		return -EINVAL;
+
+	/* The following set of conditions limit the API to only dump 1/2
+	 * EEPROM page without crossing low page boundary located at offset 128.
+	 * This means user may only request dumps of length limited to 128 from
+	 * either low 128 bytes or high 128 bytes.
+	 * For pages higher than 0 only high 128 bytes are accessible.
+	 */
+	request->page = nla_get_u8(tb[ETHTOOL_A_MODULE_EEPROM_PAGE]);
+	if (request->page && request->offset < ETH_MODULE_EEPROM_PAGE_LEN) {
+		NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MODULE_EEPROM_PAGE],
+				    "reading from lower half page is allowed for page 0 only");
+		return -EINVAL;
+	}
+
+	if (request->offset < ETH_MODULE_EEPROM_PAGE_LEN &&
+	    request->offset + request->length > ETH_MODULE_EEPROM_PAGE_LEN) {
+		NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MODULE_EEPROM_LENGTH],
+				    "reading cross half page boundary is illegal");
+		return -EINVAL;
+	} else if (request->offset >= ETH_MODULE_EEPROM_PAGE_LEN * 2) {
+		NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MODULE_EEPROM_OFFSET],
+				    "offset is out of bounds");
+		return -EINVAL;
+	} else if (request->offset + request->length > ETH_MODULE_EEPROM_PAGE_LEN * 2) {
+		NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MODULE_EEPROM_LENGTH],
+				    "reading cross page boundary is illegal");
+		return -EINVAL;
+	}
+
+	if (tb[ETHTOOL_A_MODULE_EEPROM_BANK])
+		request->bank = nla_get_u8(tb[ETHTOOL_A_MODULE_EEPROM_BANK]);
+
+	return 0;
+}
+
+static int eeprom_reply_size(const struct ethnl_req_info *req_base,
+			     const struct ethnl_reply_data *reply_base)
+{
+	const struct eeprom_req_info *request = MODULE_EEPROM_REQINFO(req_base);
+
+	return nla_total_size(sizeof(u8) * request->length); /* _EEPROM_DATA */
+}
+
+static int eeprom_fill_reply(struct sk_buff *skb,
+			     const struct ethnl_req_info *req_base,
+			     const struct ethnl_reply_data *reply_base)
+{
+	struct eeprom_reply_data *reply = MODULE_EEPROM_REPDATA(reply_base);
+
+	return nla_put(skb, ETHTOOL_A_MODULE_EEPROM_DATA, reply->length, reply->data);
+}
+
+static void eeprom_cleanup_data(struct ethnl_reply_data *reply_base)
+{
+	struct eeprom_reply_data *reply = MODULE_EEPROM_REPDATA(reply_base);
+
+	kfree(reply->data);
+}
+
+const struct ethnl_request_ops ethnl_module_eeprom_request_ops = {
+	.request_cmd		= ETHTOOL_MSG_MODULE_EEPROM_GET,
+	.reply_cmd		= ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY,
+	.hdr_attr		= ETHTOOL_A_MODULE_EEPROM_HEADER,
+	.req_info_size		= sizeof(struct eeprom_req_info),
+	.reply_data_size	= sizeof(struct eeprom_reply_data),
+
+	.parse_request		= eeprom_parse_request,
+	.prepare_data		= eeprom_prepare_data,
+	.reply_size		= eeprom_reply_size,
+	.fill_reply		= eeprom_fill_reply,
+	.cleanup_data		= eeprom_cleanup_data,
+};
+
+const struct nla_policy ethnl_module_eeprom_get_policy[] = {
+	[ETHTOOL_A_MODULE_EEPROM_HEADER]	= NLA_POLICY_NESTED(ethnl_header_policy),
+	[ETHTOOL_A_MODULE_EEPROM_OFFSET]	= { .type = NLA_U32 },
+	[ETHTOOL_A_MODULE_EEPROM_LENGTH]	= { .type = NLA_U32 },
+	[ETHTOOL_A_MODULE_EEPROM_PAGE]		= { .type = NLA_U8 },
+	[ETHTOOL_A_MODULE_EEPROM_BANK]		= { .type = NLA_U8 },
+	[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS]	=
+		NLA_POLICY_RANGE(NLA_U8, 0, ETH_MODULE_MAX_I2C_ADDRESS),
+};
+
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 705a4b201564..5f5d7c4b3d4a 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -246,6 +246,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = {
 	[ETHTOOL_MSG_EEE_GET]		= &ethnl_eee_request_ops,
 	[ETHTOOL_MSG_FEC_GET]		= &ethnl_fec_request_ops,
 	[ETHTOOL_MSG_TSINFO_GET]	= &ethnl_tsinfo_request_ops,
+	[ETHTOOL_MSG_MODULE_EEPROM_GET]	= &ethnl_module_eeprom_request_ops,
 };
 
 static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb)
@@ -931,6 +932,16 @@ static const struct genl_ops ethtool_genl_ops[] = {
 		.policy = ethnl_fec_set_policy,
 		.maxattr = ARRAY_SIZE(ethnl_fec_set_policy) - 1,
 	},
+	{
+		.cmd	= ETHTOOL_MSG_MODULE_EEPROM_GET,
+		.flags  = GENL_UNS_ADMIN_PERM,
+		.doit	= ethnl_default_doit,
+		.start	= ethnl_default_start,
+		.dumpit	= ethnl_default_dumpit,
+		.done	= ethnl_default_done,
+		.policy = ethnl_module_eeprom_get_policy,
+		.maxattr = ARRAY_SIZE(ethnl_module_eeprom_get_policy) - 1,
+	},
 };
 
 static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 785f7ee45930..4305ac971bb0 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -345,6 +345,7 @@ extern const struct ethnl_request_ops ethnl_pause_request_ops;
 extern const struct ethnl_request_ops ethnl_eee_request_ops;
 extern const struct ethnl_request_ops ethnl_tsinfo_request_ops;
 extern const struct ethnl_request_ops ethnl_fec_request_ops;
+extern const struct ethnl_request_ops ethnl_module_eeprom_request_ops;
 
 extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];
 extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1];
@@ -378,6 +379,7 @@ extern const struct nla_policy ethnl_cable_test_tdr_act_policy[ETHTOOL_A_CABLE_T
 extern const struct nla_policy ethnl_tunnel_info_get_policy[ETHTOOL_A_TUNNEL_INFO_HEADER + 1];
 extern const struct nla_policy ethnl_fec_get_policy[ETHTOOL_A_FEC_HEADER + 1];
 extern const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1];
+extern const struct nla_policy ethnl_module_eeprom_get_policy[ETHTOOL_A_MODULE_EEPROM_DATA + 1];
 
 int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info);
 int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info);
-- 
cgit v1.2.3


From e19b0a3474ab9ef90dd110af9f39fc87329755f1 Mon Sep 17 00:00:00 2001
From: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
Date: Fri, 9 Apr 2021 11:06:35 +0300
Subject: net/mlx5: Refactor module EEPROM query

Prepare for ethtool_ops::get_module_eeprom_data() implementation by
extracting common part of mlx5_query_module_eeprom() into a separate
function.

Signed-off-by: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/port.c | 79 +++++++++++++++-----------
 include/linux/mlx5/port.h                      |  9 +++
 2 files changed, 54 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 4bb219565c58..9b9f870d67a4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -353,67 +353,78 @@ static void mlx5_sfp_eeprom_params_set(u16 *i2c_addr, int *page_num, u16 *offset
 	*offset -= MLX5_EEPROM_PAGE_LENGTH;
 }
 
-int mlx5_query_module_eeprom(struct mlx5_core_dev *dev,
-			     u16 offset, u16 size, u8 *data)
+static int mlx5_query_mcia(struct mlx5_core_dev *dev,
+			   struct mlx5_module_eeprom_query_params *params, u8 *data)
 {
-	int module_num, status, err, page_num = 0;
 	u32 in[MLX5_ST_SZ_DW(mcia_reg)] = {};
 	u32 out[MLX5_ST_SZ_DW(mcia_reg)];
-	u16 i2c_addr = 0;
-	u8 module_id;
+	int status, err;
 	void *ptr;
+	u16 size;
+
+	size = min_t(int, params->size, MLX5_EEPROM_MAX_BYTES);
+
+	MLX5_SET(mcia_reg, in, l, 0);
+	MLX5_SET(mcia_reg, in, size, size);
+	MLX5_SET(mcia_reg, in, module, params->module_number);
+	MLX5_SET(mcia_reg, in, device_address, params->offset);
+	MLX5_SET(mcia_reg, in, page_number, params->page);
+	MLX5_SET(mcia_reg, in, i2c_device_address, params->i2c_address);
 
-	err = mlx5_query_module_num(dev, &module_num);
+	err = mlx5_core_access_reg(dev, in, sizeof(in), out,
+				   sizeof(out), MLX5_REG_MCIA, 0, 0);
 	if (err)
 		return err;
 
-	err = mlx5_query_module_id(dev, module_num, &module_id);
+	status = MLX5_GET(mcia_reg, out, status);
+	if (status) {
+		mlx5_core_err(dev, "query_mcia_reg failed: status: 0x%x\n",
+			      status);
+		return -EIO;
+	}
+
+	ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0);
+	memcpy(data, ptr, size);
+
+	return size;
+}
+
+int mlx5_query_module_eeprom(struct mlx5_core_dev *dev,
+			     u16 offset, u16 size, u8 *data)
+{
+	struct mlx5_module_eeprom_query_params query = {0};
+	u8 module_id;
+	int err;
+
+	err = mlx5_query_module_num(dev, &query.module_number);
+	if (err)
+		return err;
+
+	err = mlx5_query_module_id(dev, query.module_number, &module_id);
 	if (err)
 		return err;
 
 	switch (module_id) {
 	case MLX5_MODULE_ID_SFP:
-		mlx5_sfp_eeprom_params_set(&i2c_addr, &page_num, &offset);
+		mlx5_sfp_eeprom_params_set(&query.i2c_address, &query.page, &query.offset);
 		break;
 	case MLX5_MODULE_ID_QSFP:
 	case MLX5_MODULE_ID_QSFP_PLUS:
 	case MLX5_MODULE_ID_QSFP28:
-		mlx5_qsfp_eeprom_params_set(&i2c_addr, &page_num, &offset);
+		mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &query.offset);
 		break;
 	default:
 		mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id);
 		return -EINVAL;
 	}
 
-	if (offset + size > MLX5_EEPROM_PAGE_LENGTH)
+	if (query.offset + size > MLX5_EEPROM_PAGE_LENGTH)
 		/* Cross pages read, read until offset 256 in low page */
 		size -= offset + size - MLX5_EEPROM_PAGE_LENGTH;
 
-	size = min_t(int, size, MLX5_EEPROM_MAX_BYTES);
+	query.size = size;
 
-	MLX5_SET(mcia_reg, in, l, 0);
-	MLX5_SET(mcia_reg, in, module, module_num);
-	MLX5_SET(mcia_reg, in, i2c_device_address, i2c_addr);
-	MLX5_SET(mcia_reg, in, page_number, page_num);
-	MLX5_SET(mcia_reg, in, device_address, offset);
-	MLX5_SET(mcia_reg, in, size, size);
-
-	err = mlx5_core_access_reg(dev, in, sizeof(in), out,
-				   sizeof(out), MLX5_REG_MCIA, 0, 0);
-	if (err)
-		return err;
-
-	status = MLX5_GET(mcia_reg, out, status);
-	if (status) {
-		mlx5_core_err(dev, "query_mcia_reg failed: status: 0x%x\n",
-			      status);
-		return -EIO;
-	}
-
-	ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0);
-	memcpy(data, ptr, size);
-
-	return size;
+	return mlx5_query_mcia(dev, &query, data);
 }
 EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom);
 
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index 23edd2db4803..90b87aa82db3 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -62,6 +62,15 @@ enum mlx5_an_status {
 #define MLX5_EEPROM_PAGE_LENGTH		256
 #define MLX5_EEPROM_HIGH_PAGE_LENGTH	128
 
+struct mlx5_module_eeprom_query_params {
+	u16 size;
+	u16 offset;
+	u16 i2c_address;
+	u32 page;
+	u32 bank;
+	u32 module_number;
+};
+
 enum mlx5e_link_mode {
 	MLX5E_1000BASE_CX_SGMII	 = 0,
 	MLX5E_1000BASE_KX	 = 1,
-- 
cgit v1.2.3


From e109d2b204daa223e6d3cdaa369071c3ea96dcbf Mon Sep 17 00:00:00 2001
From: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
Date: Fri, 9 Apr 2021 11:06:36 +0300
Subject: net/mlx5: Implement get_module_eeprom_by_page()

Implement ethtool_ops::get_module_eeprom_by_page() to enable
support of new SFP standards.

Signed-off-by: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   | 44 ++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/port.c     | 41 ++++++++++++++++++++
 include/linux/mlx5/port.h                          |  2 +
 3 files changed, 87 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index b185a0452629..c8057a44d5ab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1770,6 +1770,49 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev,
 	return 0;
 }
 
+static int mlx5e_get_module_eeprom_by_page(struct net_device *netdev,
+					   const struct ethtool_module_eeprom *page_data,
+					   struct netlink_ext_ack *extack)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_module_eeprom_query_params query;
+	struct mlx5_core_dev *mdev = priv->mdev;
+	u8 *data = page_data->data;
+	int size_read;
+	int i = 0;
+
+	if (!page_data->length)
+		return -EINVAL;
+
+	memset(data, 0, page_data->length);
+
+	query.offset = page_data->offset;
+	query.i2c_address = page_data->i2c_address;
+	query.bank = page_data->bank;
+	query.page = page_data->page;
+	while (i < page_data->length) {
+		query.size = page_data->length - i;
+		size_read = mlx5_query_module_eeprom_by_page(mdev, &query, data + i);
+
+		/* Done reading, return how many bytes was read */
+		if (!size_read)
+			return i;
+
+		if (size_read == -EINVAL)
+			return -EINVAL;
+		if (size_read < 0) {
+			netdev_err(priv->netdev, "%s: mlx5_query_module_eeprom_by_page failed:0x%x\n",
+				   __func__, size_read);
+			return i;
+		}
+
+		i += size_read;
+		query.offset += size_read;
+	}
+
+	return i;
+}
+
 int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
 			       struct ethtool_flash *flash)
 {
@@ -2159,6 +2202,7 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
 	.set_wol	   = mlx5e_set_wol,
 	.get_module_info   = mlx5e_get_module_info,
 	.get_module_eeprom = mlx5e_get_module_eeprom,
+	.get_module_eeprom_by_page = mlx5e_get_module_eeprom_by_page,
 	.flash_device      = mlx5e_flash_device,
 	.get_priv_flags    = mlx5e_get_priv_flags,
 	.set_priv_flags    = mlx5e_set_priv_flags,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 9b9f870d67a4..522a41f8f1e2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -428,6 +428,47 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev,
 }
 EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom);
 
+int mlx5_query_module_eeprom_by_page(struct mlx5_core_dev *dev,
+				     struct mlx5_module_eeprom_query_params *params,
+				     u8 *data)
+{
+	u8 module_id;
+	int err;
+
+	err = mlx5_query_module_num(dev, &params->module_number);
+	if (err)
+		return err;
+
+	err = mlx5_query_module_id(dev, params->module_number, &module_id);
+	if (err)
+		return err;
+
+	switch (module_id) {
+	case MLX5_MODULE_ID_SFP:
+		if (params->page > 0)
+			return -EINVAL;
+		break;
+	case MLX5_MODULE_ID_QSFP:
+	case MLX5_MODULE_ID_QSFP28:
+	case MLX5_MODULE_ID_QSFP_PLUS:
+		if (params->page > 3)
+			return -EINVAL;
+		break;
+	default:
+		mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id);
+		return -EINVAL;
+	}
+
+	if (params->i2c_address != MLX5_I2C_ADDR_HIGH &&
+	    params->i2c_address != MLX5_I2C_ADDR_LOW) {
+		mlx5_core_err(dev, "I2C address not recognized: 0x%x\n", params->i2c_address);
+		return -EINVAL;
+	}
+
+	return mlx5_query_mcia(dev, params, data);
+}
+EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom_by_page);
+
 static int mlx5_query_port_pvlc(struct mlx5_core_dev *dev, u32 *pvlc,
 				int pvlc_size,  u8 local_port)
 {
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index 90b87aa82db3..58d56adb9842 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -209,6 +209,8 @@ void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported,
 			 bool *enabled);
 int mlx5_query_module_eeprom(struct mlx5_core_dev *dev,
 			     u16 offset, u16 size, u8 *data);
+int mlx5_query_module_eeprom_by_page(struct mlx5_core_dev *dev,
+				     struct mlx5_module_eeprom_query_params *params, u8 *data);
 
 int mlx5_query_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *out);
 int mlx5_set_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *in);
-- 
cgit v1.2.3


From 4c88fa412a100f925b8ab1aa952a672895f69d35 Mon Sep 17 00:00:00 2001
From: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
Date: Fri, 9 Apr 2021 11:06:37 +0300
Subject: net/mlx5: Add support for DSFP module EEPROM dumps

Allow the driver to recognise DSFP transceiver module ID and therefore
allow its EEPROM dumps using ethtool.

Signed-off-by: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/port.c | 2 ++
 include/linux/mlx5/port.h                      | 1 +
 2 files changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 522a41f8f1e2..1ef2b6a848c1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -454,6 +454,8 @@ int mlx5_query_module_eeprom_by_page(struct mlx5_core_dev *dev,
 		if (params->page > 3)
 			return -EINVAL;
 		break;
+	case MLX5_MODULE_ID_DSFP:
+		break;
 	default:
 		mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id);
 		return -EINVAL;
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index 58d56adb9842..77ea4f9c5265 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -45,6 +45,7 @@ enum mlx5_module_id {
 	MLX5_MODULE_ID_QSFP             = 0xC,
 	MLX5_MODULE_ID_QSFP_PLUS        = 0xD,
 	MLX5_MODULE_ID_QSFP28           = 0x11,
+	MLX5_MODULE_ID_DSFP		= 0x1B,
 };
 
 enum mlx5_an_status {
-- 
cgit v1.2.3


From 95dfc7effd88b49d66791678e042970824cae838 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Fri, 9 Apr 2021 11:06:38 +0300
Subject: net: ethtool: Export helpers for getting EEPROM info

There are two ways to retrieve information from SFP EEPROMs.  Many
devices make use of the common code, and assign the sfp_bus pointer in
the netdev to point to the bus holding the SFP device. Some MAC
drivers directly implement ops in there ethool structure.

Export within net/ethtool the two helpers used to call these methods,
so that they can also be used in the new netlink code.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethtool/common.h |  5 +++++
 net/ethtool/ioctl.c  | 14 +++++++-------
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/net/ethtool/common.h b/net/ethtool/common.h
index a9d071248698..2dc2b80aea5f 100644
--- a/net/ethtool/common.h
+++ b/net/ethtool/common.h
@@ -47,4 +47,9 @@ int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info);
 
 extern const struct ethtool_phy_ops *ethtool_phy_ops;
 
+int ethtool_get_module_info_call(struct net_device *dev,
+				 struct ethtool_modinfo *modinfo);
+int ethtool_get_module_eeprom_call(struct net_device *dev,
+				   struct ethtool_eeprom *ee, u8 *data);
+
 #endif /* _ETHTOOL_COMMON_H */
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index a9f67574148f..27f1c5224acb 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -2188,8 +2188,8 @@ static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr)
 	return 0;
 }
 
-static int __ethtool_get_module_info(struct net_device *dev,
-				     struct ethtool_modinfo *modinfo)
+int ethtool_get_module_info_call(struct net_device *dev,
+				 struct ethtool_modinfo *modinfo)
 {
 	const struct ethtool_ops *ops = dev->ethtool_ops;
 	struct phy_device *phydev = dev->phydev;
@@ -2215,7 +2215,7 @@ static int ethtool_get_module_info(struct net_device *dev,
 	if (copy_from_user(&modinfo, useraddr, sizeof(modinfo)))
 		return -EFAULT;
 
-	ret = __ethtool_get_module_info(dev, &modinfo);
+	ret = ethtool_get_module_info_call(dev, &modinfo);
 	if (ret)
 		return ret;
 
@@ -2225,8 +2225,8 @@ static int ethtool_get_module_info(struct net_device *dev,
 	return 0;
 }
 
-static int __ethtool_get_module_eeprom(struct net_device *dev,
-				       struct ethtool_eeprom *ee, u8 *data)
+int ethtool_get_module_eeprom_call(struct net_device *dev,
+				   struct ethtool_eeprom *ee, u8 *data)
 {
 	const struct ethtool_ops *ops = dev->ethtool_ops;
 	struct phy_device *phydev = dev->phydev;
@@ -2249,12 +2249,12 @@ static int ethtool_get_module_eeprom(struct net_device *dev,
 	int ret;
 	struct ethtool_modinfo modinfo;
 
-	ret = __ethtool_get_module_info(dev, &modinfo);
+	ret = ethtool_get_module_info_call(dev, &modinfo);
 	if (ret)
 		return ret;
 
 	return ethtool_get_any_eeprom(dev, useraddr,
-				      __ethtool_get_module_eeprom,
+				      ethtool_get_module_eeprom_call,
 				      modinfo.eeprom_len);
 }
 
-- 
cgit v1.2.3


From 96d971e307cc0e434f96329b42bbd98cfbca07d2 Mon Sep 17 00:00:00 2001
From: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
Date: Fri, 9 Apr 2021 11:06:39 +0300
Subject: ethtool: Add fallback to get_module_eeprom from netlink command

In case netlink get_module_eeprom_by_page() callback is not implemented
by the driver, try to call old get_module_info() and get_module_eeprom()
pair. Recalculate parameters to get_module_eeprom() offset and len using
page number and their sizes. Return error if this can't be done.

Signed-off-by: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethtool/eeprom.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 61 insertions(+), 1 deletion(-)

diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c
index 8536dd905da5..1a49c133d401 100644
--- a/net/ethtool/eeprom.c
+++ b/net/ethtool/eeprom.c
@@ -25,6 +25,66 @@ struct eeprom_reply_data {
 #define MODULE_EEPROM_REPDATA(__reply_base) \
 	container_of(__reply_base, struct eeprom_reply_data, base)
 
+static int fallback_set_params(struct eeprom_req_info *request,
+			       struct ethtool_modinfo *modinfo,
+			       struct ethtool_eeprom *eeprom)
+{
+	u32 offset = request->offset;
+	u32 length = request->length;
+
+	if (request->page)
+		offset = request->page * ETH_MODULE_EEPROM_PAGE_LEN + offset;
+
+	if (modinfo->type == ETH_MODULE_SFF_8079 &&
+	    request->i2c_address == 0x51)
+		offset += ETH_MODULE_EEPROM_PAGE_LEN * 2;
+
+	if (offset >= modinfo->eeprom_len)
+		return -EINVAL;
+
+	eeprom->cmd = ETHTOOL_GMODULEEEPROM;
+	eeprom->len = length;
+	eeprom->offset = offset;
+
+	return 0;
+}
+
+static int eeprom_fallback(struct eeprom_req_info *request,
+			   struct eeprom_reply_data *reply,
+			   struct genl_info *info)
+{
+	struct net_device *dev = reply->base.dev;
+	struct ethtool_modinfo modinfo = {0};
+	struct ethtool_eeprom eeprom = {0};
+	u8 *data;
+	int err;
+
+	modinfo.cmd = ETHTOOL_GMODULEINFO;
+	err = ethtool_get_module_info_call(dev, &modinfo);
+	if (err < 0)
+		return err;
+
+	err = fallback_set_params(request, &modinfo, &eeprom);
+	if (err < 0)
+		return err;
+
+	data = kmalloc(eeprom.len, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+	err = ethtool_get_module_eeprom_call(dev, &eeprom, data);
+	if (err < 0)
+		goto err_out;
+
+	reply->data = data;
+	reply->length = eeprom.len;
+
+	return 0;
+
+err_out:
+	kfree(data);
+	return err;
+}
+
 static int eeprom_prepare_data(const struct ethnl_req_info *req_base,
 			       struct ethnl_reply_data *reply_base,
 			       struct genl_info *info)
@@ -36,7 +96,7 @@ static int eeprom_prepare_data(const struct ethnl_req_info *req_base,
 	int ret;
 
 	if (!dev->ethtool_ops->get_module_eeprom_by_page)
-		return -EOPNOTSUPP;
+		return eeprom_fallback(request, reply, info);
 
 	page_data.offset = request->offset;
 	page_data.length = request->length;
-- 
cgit v1.2.3


From d740513f05a24b1a46722325974223980f068728 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Fri, 9 Apr 2021 11:06:40 +0300
Subject: phy: sfp: add netlink SFP support to generic SFP code

The new netlink API for reading SFP data requires a new op to be
implemented. The idea of the new netlink SFP code is that userspace is
responsible to parsing the EEPROM data and requesting pages, rather
than have the kernel decide what pages are interesting and returning
them. This allows greater flexibility for newer formats.

Currently the generic SFP code only supports simple SFPs. Allow i2c
address 0x50 and 0x51 to be accessed with page and bank must always be
0. This interface will later be extended when for example QSFP support
is added.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/sfp-bus.c | 20 ++++++++++++++++++++
 drivers/net/phy/sfp.c     | 25 +++++++++++++++++++++++++
 drivers/net/phy/sfp.h     |  3 +++
 include/linux/sfp.h       | 10 ++++++++++
 4 files changed, 58 insertions(+)

diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index 2e11176c6b94..e61de66e973b 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -555,6 +555,26 @@ int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee,
 }
 EXPORT_SYMBOL_GPL(sfp_get_module_eeprom);
 
+/**
+ * sfp_get_module_eeprom_by_page() - Read a page from the SFP module EEPROM
+ * @bus: a pointer to the &struct sfp_bus structure for the sfp module
+ * @page: a &struct ethtool_module_eeprom
+ * @extack: extack for reporting problems
+ *
+ * Read an EEPROM page as specified by the supplied @page. See the
+ * documentation for &struct ethtool_module_eeprom for the page to be read.
+ *
+ * Returns 0 on success or a negative errno number. More error
+ * information might be provided via extack
+ */
+int sfp_get_module_eeprom_by_page(struct sfp_bus *bus,
+				  const struct ethtool_module_eeprom *page,
+				  struct netlink_ext_ack *extack)
+{
+	return bus->socket_ops->module_eeprom_by_page(bus->sfp, page, extack);
+}
+EXPORT_SYMBOL_GPL(sfp_get_module_eeprom_by_page);
+
 /**
  * sfp_upstream_start() - Inform the SFP that the network device is up
  * @bus: a pointer to the &struct sfp_bus structure for the sfp module
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 7998acc689b7..37f722c763d7 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -2330,6 +2330,30 @@ static int sfp_module_eeprom(struct sfp *sfp, struct ethtool_eeprom *ee,
 	return 0;
 }
 
+static int sfp_module_eeprom_by_page(struct sfp *sfp,
+				     const struct ethtool_module_eeprom *page,
+				     struct netlink_ext_ack *extack)
+{
+	if (page->bank) {
+		NL_SET_ERR_MSG(extack, "Banks not supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (page->page) {
+		NL_SET_ERR_MSG(extack, "Only page 0 supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (page->i2c_address != 0x50 &&
+	    page->i2c_address != 0x51) {
+		NL_SET_ERR_MSG(extack, "Only address 0x50 and 0x51 supported");
+		return -EOPNOTSUPP;
+	}
+
+	return sfp_read(sfp, page->i2c_address == 0x51, page->offset,
+			page->data, page->length);
+};
+
 static const struct sfp_socket_ops sfp_module_ops = {
 	.attach = sfp_attach,
 	.detach = sfp_detach,
@@ -2337,6 +2361,7 @@ static const struct sfp_socket_ops sfp_module_ops = {
 	.stop = sfp_stop,
 	.module_info = sfp_module_info,
 	.module_eeprom = sfp_module_eeprom,
+	.module_eeprom_by_page = sfp_module_eeprom_by_page,
 };
 
 static void sfp_timeout(struct work_struct *work)
diff --git a/drivers/net/phy/sfp.h b/drivers/net/phy/sfp.h
index b83f70526270..27226535c72b 100644
--- a/drivers/net/phy/sfp.h
+++ b/drivers/net/phy/sfp.h
@@ -14,6 +14,9 @@ struct sfp_socket_ops {
 	int (*module_info)(struct sfp *sfp, struct ethtool_modinfo *modinfo);
 	int (*module_eeprom)(struct sfp *sfp, struct ethtool_eeprom *ee,
 			     u8 *data);
+	int (*module_eeprom_by_page)(struct sfp *sfp,
+				     const struct ethtool_module_eeprom *page,
+				     struct netlink_ext_ack *extack);
 };
 
 int sfp_add_phy(struct sfp_bus *bus, struct phy_device *phydev);
diff --git a/include/linux/sfp.h b/include/linux/sfp.h
index 38893e4dd0f0..302094b855fb 100644
--- a/include/linux/sfp.h
+++ b/include/linux/sfp.h
@@ -542,6 +542,9 @@ phy_interface_t sfp_select_interface(struct sfp_bus *bus,
 int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo);
 int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee,
 			  u8 *data);
+int sfp_get_module_eeprom_by_page(struct sfp_bus *bus,
+				  const struct ethtool_module_eeprom *page,
+				  struct netlink_ext_ack *extack);
 void sfp_upstream_start(struct sfp_bus *bus);
 void sfp_upstream_stop(struct sfp_bus *bus);
 void sfp_bus_put(struct sfp_bus *bus);
@@ -587,6 +590,13 @@ static inline int sfp_get_module_eeprom(struct sfp_bus *bus,
 	return -EOPNOTSUPP;
 }
 
+static inline int sfp_get_module_eeprom_by_page(struct sfp_bus *bus,
+						const struct ethtool_module_eeprom *page,
+						struct netlink_ext_ack *extack)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline void sfp_upstream_start(struct sfp_bus *bus)
 {
 }
-- 
cgit v1.2.3


From c97a31f66ebcab54c006878142fb683c6116bed1 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Fri, 9 Apr 2021 11:06:41 +0300
Subject: ethtool: wire in generic SFP module access

If the device has a sfp bus attached, call its
sfp_get_module_eeprom_by_page() function, otherwise use the ethtool op
for the device. This follows how the IOCTL works.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethtool/eeprom.c | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c
index 1a49c133d401..2a6733a6449a 100644
--- a/net/ethtool/eeprom.c
+++ b/net/ethtool/eeprom.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 
 #include <linux/ethtool.h>
+#include <linux/sfp.h>
 #include "netlink.h"
 #include "common.h"
 
@@ -85,6 +86,21 @@ err_out:
 	return err;
 }
 
+static int get_module_eeprom_by_page(struct net_device *dev,
+				     struct ethtool_module_eeprom *page_data,
+				     struct netlink_ext_ack *extack)
+{
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+
+	if (dev->sfp_bus)
+		return sfp_get_module_eeprom_by_page(dev->sfp_bus, page_data, extack);
+
+	if (ops->get_module_info)
+		return ops->get_module_eeprom_by_page(dev, page_data, extack);
+
+	return -EOPNOTSUPP;
+}
+
 static int eeprom_prepare_data(const struct ethnl_req_info *req_base,
 			       struct ethnl_reply_data *reply_base,
 			       struct genl_info *info)
@@ -95,9 +111,6 @@ static int eeprom_prepare_data(const struct ethnl_req_info *req_base,
 	struct net_device *dev = reply_base->dev;
 	int ret;
 
-	if (!dev->ethtool_ops->get_module_eeprom_by_page)
-		return eeprom_fallback(request, reply, info);
-
 	page_data.offset = request->offset;
 	page_data.length = request->length;
 	page_data.i2c_address = request->i2c_address;
@@ -111,8 +124,7 @@ static int eeprom_prepare_data(const struct ethnl_req_info *req_base,
 	if (ret)
 		goto err_free;
 
-	ret = dev->ethtool_ops->get_module_eeprom_by_page(dev, &page_data,
-							  info->extack);
+	ret = get_module_eeprom_by_page(dev, &page_data, info->extack);
 	if (ret < 0)
 		goto err_ops;
 
@@ -126,6 +138,9 @@ err_ops:
 	ethnl_ops_complete(dev);
 err_free:
 	kfree(page_data.data);
+
+	if (ret == -EOPNOTSUPP)
+		return eeprom_fallback(request, reply, info);
 	return ret;
 }
 
-- 
cgit v1.2.3


From c75fb320d482a5ce6e522378d137fd2c3bf79225 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 9 Apr 2021 13:04:37 +0200
Subject: veth: use skb_orphan_partial instead of skb_orphan

As described by commit 9c4c325252c5 ("skbuff: preserve sock
reference when scrubbing the skb."), orphaning a skb
in the TX path will cause OoO.

Let's use skb_orphan_partial() instead of skb_orphan(), so
that we keep the sk around for queue's selection sake and we
still avoid the problem fixed with commit 4bf9ffa0fb57 ("veth:
Orphan skb before GRO")

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/veth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 9e525646df1d..ce085659d55f 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -686,7 +686,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
 	int mac_len, delta, off;
 	struct xdp_buff xdp;
 
-	skb_orphan(skb);
+	skb_orphan_partial(skb);
 
 	rcu_read_lock();
 	xdp_prog = rcu_dereference(rq->xdp_prog);
-- 
cgit v1.2.3


From d3256efd8e8b234a6251e4d4580bd2c3c31fdc4c Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 9 Apr 2021 13:04:38 +0200
Subject: veth: allow enabling NAPI even without XDP

Currently the veth device has the GRO feature bit set, even if
no GRO aggregation is possible with the default configuration,
as the veth device does not hook into the GRO engine.

Flipping the GRO feature bit from user-space is a no-op, unless
XDP is enabled. In such scenario GRO could actually take place, but
TSO is forced to off on the peer device.

This change allow user-space to really control the GRO feature, with
no need for an XDP program.

The GRO feature bit is now cleared by default - so that there are no
user-visible behavior changes with the default configuration.

When the GRO bit is set, the per-queue NAPI instances are initialized
and registered. On xmit, when napi instances are available, we try
to use them.

Some additional checks are in place to ensure we initialize/delete NAPIs
only when needed in case of overlapping XDP and GRO configuration
changes.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/veth.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 116 insertions(+), 13 deletions(-)

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index ce085659d55f..c95c67aa52d2 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -57,6 +57,7 @@ struct veth_rq_stats {
 
 struct veth_rq {
 	struct napi_struct	xdp_napi;
+	struct napi_struct __rcu *napi; /* points to xdp_napi when the latter is initialized */
 	struct net_device	*dev;
 	struct bpf_prog __rcu	*xdp_prog;
 	struct xdp_mem_info	xdp_mem;
@@ -299,7 +300,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct veth_rq *rq = NULL;
 	struct net_device *rcv;
 	int length = skb->len;
-	bool rcv_xdp = false;
+	bool use_napi = false;
 	int rxq;
 
 	rcu_read_lock();
@@ -313,20 +314,24 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 	rxq = skb_get_queue_mapping(skb);
 	if (rxq < rcv->real_num_rx_queues) {
 		rq = &rcv_priv->rq[rxq];
-		rcv_xdp = rcu_access_pointer(rq->xdp_prog);
+
+		/* The napi pointer is available when an XDP program is
+		 * attached or when GRO is enabled
+		 */
+		use_napi = rcu_access_pointer(rq->napi);
 		skb_record_rx_queue(skb, rxq);
 	}
 
 	skb_tx_timestamp(skb);
-	if (likely(veth_forward_skb(rcv, skb, rq, rcv_xdp) == NET_RX_SUCCESS)) {
-		if (!rcv_xdp)
+	if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) {
+		if (!use_napi)
 			dev_lstats_add(dev, length);
 	} else {
 drop:
 		atomic64_inc(&priv->dropped);
 	}
 
-	if (rcv_xdp)
+	if (use_napi)
 		__veth_xdp_flush(rq);
 
 	rcu_read_unlock();
@@ -903,7 +908,7 @@ static int veth_poll(struct napi_struct *napi, int budget)
 	return done;
 }
 
-static int veth_napi_add(struct net_device *dev)
+static int __veth_napi_enable(struct net_device *dev)
 {
 	struct veth_priv *priv = netdev_priv(dev);
 	int err, i;
@@ -920,6 +925,7 @@ static int veth_napi_add(struct net_device *dev)
 		struct veth_rq *rq = &priv->rq[i];
 
 		napi_enable(&rq->xdp_napi);
+		rcu_assign_pointer(priv->rq[i].napi, &priv->rq[i].xdp_napi);
 	}
 
 	return 0;
@@ -938,6 +944,7 @@ static void veth_napi_del(struct net_device *dev)
 	for (i = 0; i < dev->real_num_rx_queues; i++) {
 		struct veth_rq *rq = &priv->rq[i];
 
+		rcu_assign_pointer(priv->rq[i].napi, NULL);
 		napi_disable(&rq->xdp_napi);
 		__netif_napi_del(&rq->xdp_napi);
 	}
@@ -951,8 +958,14 @@ static void veth_napi_del(struct net_device *dev)
 	}
 }
 
+static bool veth_gro_requested(const struct net_device *dev)
+{
+	return !!(dev->wanted_features & NETIF_F_GRO);
+}
+
 static int veth_enable_xdp(struct net_device *dev)
 {
+	bool napi_already_on = veth_gro_requested(dev) && (dev->flags & IFF_UP);
 	struct veth_priv *priv = netdev_priv(dev);
 	int err, i;
 
@@ -960,7 +973,8 @@ static int veth_enable_xdp(struct net_device *dev)
 		for (i = 0; i < dev->real_num_rx_queues; i++) {
 			struct veth_rq *rq = &priv->rq[i];
 
-			netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT);
+			if (!napi_already_on)
+				netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT);
 			err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i, rq->xdp_napi.napi_id);
 			if (err < 0)
 				goto err_rxq_reg;
@@ -975,13 +989,25 @@ static int veth_enable_xdp(struct net_device *dev)
 			rq->xdp_mem = rq->xdp_rxq.mem;
 		}
 
-		err = veth_napi_add(dev);
-		if (err)
-			goto err_rxq_reg;
+		if (!napi_already_on) {
+			err = __veth_napi_enable(dev);
+			if (err)
+				goto err_rxq_reg;
+
+			if (!veth_gro_requested(dev)) {
+				/* user-space did not require GRO, but adding XDP
+				 * is supposed to get GRO working
+				 */
+				dev->features |= NETIF_F_GRO;
+				netdev_features_change(dev);
+			}
+		}
 	}
 
-	for (i = 0; i < dev->real_num_rx_queues; i++)
+	for (i = 0; i < dev->real_num_rx_queues; i++) {
 		rcu_assign_pointer(priv->rq[i].xdp_prog, priv->_xdp_prog);
+		rcu_assign_pointer(priv->rq[i].napi, &priv->rq[i].xdp_napi);
+	}
 
 	return 0;
 err_reg_mem:
@@ -991,7 +1017,8 @@ err_rxq_reg:
 		struct veth_rq *rq = &priv->rq[i];
 
 		xdp_rxq_info_unreg(&rq->xdp_rxq);
-		netif_napi_del(&rq->xdp_napi);
+		if (!napi_already_on)
+			netif_napi_del(&rq->xdp_napi);
 	}
 
 	return err;
@@ -1004,7 +1031,19 @@ static void veth_disable_xdp(struct net_device *dev)
 
 	for (i = 0; i < dev->real_num_rx_queues; i++)
 		rcu_assign_pointer(priv->rq[i].xdp_prog, NULL);
-	veth_napi_del(dev);
+
+	if (!netif_running(dev) || !veth_gro_requested(dev)) {
+		veth_napi_del(dev);
+
+		/* if user-space did not require GRO, since adding XDP
+		 * enabled it, clear it now
+		 */
+		if (!veth_gro_requested(dev) && netif_running(dev)) {
+			dev->features &= ~NETIF_F_GRO;
+			netdev_features_change(dev);
+		}
+	}
+
 	for (i = 0; i < dev->real_num_rx_queues; i++) {
 		struct veth_rq *rq = &priv->rq[i];
 
@@ -1013,6 +1052,29 @@ static void veth_disable_xdp(struct net_device *dev)
 	}
 }
 
+static int veth_napi_enable(struct net_device *dev)
+{
+	struct veth_priv *priv = netdev_priv(dev);
+	int err, i;
+
+	for (i = 0; i < dev->real_num_rx_queues; i++) {
+		struct veth_rq *rq = &priv->rq[i];
+
+		netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT);
+	}
+
+	err = __veth_napi_enable(dev);
+	if (err) {
+		for (i = 0; i < dev->real_num_rx_queues; i++) {
+			struct veth_rq *rq = &priv->rq[i];
+
+			netif_napi_del(&rq->xdp_napi);
+		}
+		return err;
+	}
+	return err;
+}
+
 static int veth_open(struct net_device *dev)
 {
 	struct veth_priv *priv = netdev_priv(dev);
@@ -1026,6 +1088,10 @@ static int veth_open(struct net_device *dev)
 		err = veth_enable_xdp(dev);
 		if (err)
 			return err;
+	} else if (veth_gro_requested(dev)) {
+		err = veth_napi_enable(dev);
+		if (err)
+			return err;
 	}
 
 	if (peer->flags & IFF_UP) {
@@ -1047,6 +1113,8 @@ static int veth_close(struct net_device *dev)
 
 	if (priv->_xdp_prog)
 		veth_disable_xdp(dev);
+	else if (veth_gro_requested(dev))
+		veth_napi_del(dev);
 
 	return 0;
 }
@@ -1145,10 +1213,32 @@ static netdev_features_t veth_fix_features(struct net_device *dev,
 		if (peer_priv->_xdp_prog)
 			features &= ~NETIF_F_GSO_SOFTWARE;
 	}
+	if (priv->_xdp_prog)
+		features |= NETIF_F_GRO;
 
 	return features;
 }
 
+static int veth_set_features(struct net_device *dev,
+			     netdev_features_t features)
+{
+	netdev_features_t changed = features ^ dev->features;
+	struct veth_priv *priv = netdev_priv(dev);
+	int err;
+
+	if (!(changed & NETIF_F_GRO) || !(dev->flags & IFF_UP) || priv->_xdp_prog)
+		return 0;
+
+	if (features & NETIF_F_GRO) {
+		err = veth_napi_enable(dev);
+		if (err)
+			return err;
+	} else {
+		veth_napi_del(dev);
+	}
+	return 0;
+}
+
 static void veth_set_rx_headroom(struct net_device *dev, int new_hr)
 {
 	struct veth_priv *peer_priv, *priv = netdev_priv(dev);
@@ -1267,6 +1357,7 @@ static const struct net_device_ops veth_netdev_ops = {
 #endif
 	.ndo_get_iflink		= veth_get_iflink,
 	.ndo_fix_features	= veth_fix_features,
+	.ndo_set_features	= veth_set_features,
 	.ndo_features_check	= passthru_features_check,
 	.ndo_set_rx_headroom	= veth_set_rx_headroom,
 	.ndo_bpf		= veth_xdp,
@@ -1329,6 +1420,13 @@ static int veth_validate(struct nlattr *tb[], struct nlattr *data[],
 
 static struct rtnl_link_ops veth_link_ops;
 
+static void veth_disable_gro(struct net_device *dev)
+{
+	dev->features &= ~NETIF_F_GRO;
+	dev->wanted_features &= ~NETIF_F_GRO;
+	netdev_update_features(dev);
+}
+
 static int veth_newlink(struct net *src_net, struct net_device *dev,
 			struct nlattr *tb[], struct nlattr *data[],
 			struct netlink_ext_ack *extack)
@@ -1401,6 +1499,10 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
 	if (err < 0)
 		goto err_register_peer;
 
+	/* keep GRO disabled by default to be consistent with the established
+	 * veth behavior
+	 */
+	veth_disable_gro(peer);
 	netif_carrier_off(peer);
 
 	err = rtnl_configure_link(peer, ifmp);
@@ -1438,6 +1540,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
 	priv = netdev_priv(peer);
 	rcu_assign_pointer(priv->peer, dev);
 
+	veth_disable_gro(dev);
 	return 0;
 
 err_register_dev:
-- 
cgit v1.2.3


From 47e550e0105be9b716a3860545731735a67c6b3c Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 9 Apr 2021 13:04:39 +0200
Subject: veth: refine napi usage

After the previous patch, when enabling GRO, locally generated
TCP traffic experiences some measurable overhead, as it traverses
the GRO engine without any chance of aggregation.

This change refine the NAPI receive path admission test, to avoid
unnecessary GRO overhead in most scenarios, when GRO is enabled
on a veth peer.

Only skbs that are eligible for aggregation enter the GRO layer,
the others will go through the traditional receive path.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/veth.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index c95c67aa52d2..15b2e3923c47 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -294,6 +294,25 @@ static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb,
 		netif_rx(skb);
 }
 
+/* return true if the specified skb has chances of GRO aggregation
+ * Don't strive for accuracy, but try to avoid GRO overhead in the most
+ * common scenarios.
+ * When XDP is enabled, all traffic is considered eligible, as the xmit
+ * device has TSO off.
+ * When TSO is enabled on the xmit device, we are likely interested only
+ * in UDP aggregation, explicitly check for that if the skb is suspected
+ * - the sock_wfree destructor is used by UDP, ICMP and XDP sockets -
+ * to belong to locally generated UDP traffic.
+ */
+static bool veth_skb_is_eligible_for_gro(const struct net_device *dev,
+					 const struct net_device *rcv,
+					 const struct sk_buff *skb)
+{
+	return !(dev->features & NETIF_F_ALL_TSO) ||
+		(skb->destructor == sock_wfree &&
+		 rcv->features & (NETIF_F_GRO_FRAGLIST | NETIF_F_GRO_UDP_FWD));
+}
+
 static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
@@ -317,8 +336,10 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		/* The napi pointer is available when an XDP program is
 		 * attached or when GRO is enabled
+		 * Don't bother with napi/GRO if the skb can't be aggregated
 		 */
-		use_napi = rcu_access_pointer(rq->napi);
+		use_napi = rcu_access_pointer(rq->napi) &&
+			   veth_skb_is_eligible_for_gro(dev, rcv, skb);
 		skb_record_rx_queue(skb, rxq);
 	}
 
-- 
cgit v1.2.3


From 1c3cadbe02420e6c85251c416a78a16f17761231 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 9 Apr 2021 13:04:40 +0200
Subject: self-tests: add veth tests

Add some basic veth tests, that verify the expected flags and
aggregation with different setups (default, xdp, etc...)

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/Makefile |   1 +
 tools/testing/selftests/net/veth.sh  | 177 +++++++++++++++++++++++++++++++++++
 2 files changed, 178 insertions(+)
 create mode 100755 tools/testing/selftests/net/veth.sh

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 2d71b283dde3..f4242a961088 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -24,6 +24,7 @@ TEST_PROGS += vrf_route_leaking.sh
 TEST_PROGS += bareudp.sh
 TEST_PROGS += unicast_extensions.sh
 TEST_PROGS += udpgro_fwd.sh
+TEST_PROGS += veth.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket nettest
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh
new file mode 100755
index 000000000000..2fedc0781ce8
--- /dev/null
+++ b/tools/testing/selftests/net/veth.sh
@@ -0,0 +1,177 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+readonly STATS="$(mktemp -p /tmp ns-XXXXXX)"
+readonly BASE=`basename $STATS`
+readonly SRC=2
+readonly DST=1
+readonly DST_NAT=100
+readonly NS_SRC=$BASE$SRC
+readonly NS_DST=$BASE$DST
+
+# "baremetal" network used for raw UDP traffic
+readonly BM_NET_V4=192.168.1.
+readonly BM_NET_V6=2001:db8::
+
+readonly NPROCS=`nproc`
+ret=0
+
+cleanup() {
+	local ns
+	local -r jobs="$(jobs -p)"
+	[ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null
+	rm -f $STATS
+
+	for ns in $NS_SRC $NS_DST; do
+		ip netns del $ns 2>/dev/null
+	done
+}
+
+trap cleanup EXIT
+
+create_ns() {
+	local ns
+
+	for ns in $NS_SRC $NS_DST; do
+		ip netns add $ns
+		ip -n $ns link set dev lo up
+	done
+
+	ip link add name veth$SRC type veth peer name veth$DST
+
+	for ns in $SRC $DST; do
+		ip link set dev veth$ns netns $BASE$ns up
+		ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24
+		ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad
+	done
+	echo "#kernel" > $BASE
+	chmod go-rw $BASE
+}
+
+__chk_flag() {
+	local msg="$1"
+	local target=$2
+	local expected=$3
+	local flagname=$4
+
+	local flag=`ip netns exec $BASE$target ethtool -k veth$target |\
+		    grep $flagname | awk '{print $2}'`
+
+	printf "%-60s" "$msg"
+	if [ "$flag" = "$expected" ]; then
+		echo " ok "
+	else
+		echo " fail - expected $expected found $flag"
+		ret=1
+	fi
+}
+
+chk_gro_flag() {
+	__chk_flag "$1" $2 $3 generic-receive-offload
+}
+
+chk_tso_flag() {
+	__chk_flag "$1" $2 $3 tcp-segmentation-offload
+}
+
+chk_gro() {
+	local msg="$1"
+	local expected=$2
+
+	ip netns exec $BASE$SRC ping -qc 1 $BM_NET_V4$DST >/dev/null
+	NSTAT_HISTORY=$STATS ip netns exec $NS_DST nstat -n
+
+	printf "%-60s" "$msg"
+	ip netns exec $BASE$DST ./udpgso_bench_rx -C 1000 -R 10 &
+	local spid=$!
+	sleep 0.1
+
+	ip netns exec $NS_SRC ./udpgso_bench_tx -4 -s 13000 -S 1300 -M 1 -D $BM_NET_V4$DST
+	local retc=$?
+	wait $spid
+	local rets=$?
+	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+		echo " fail client exit code $retc, server $rets"
+		ret=1
+		return
+	fi
+
+	local pkts=`NSTAT_HISTORY=$STATS ip netns exec $NS_DST nstat IpInReceives | \
+		    awk '{print $2}' | tail -n 1`
+	if [ "$pkts" = "$expected" ]; then
+		echo " ok "
+	else
+		echo " fail - got $pkts packets, expected $expected "
+		ret=1
+	fi
+}
+
+if [ ! -f ../bpf/xdp_dummy.o ]; then
+	echo "Missing xdp_dummy helper. Build bpf selftest first"
+	exit -1
+fi
+
+create_ns
+chk_gro_flag "default - gro flag" $SRC off
+chk_gro_flag "        - peer gro flag" $DST off
+chk_tso_flag "        - tso flag" $SRC on
+chk_tso_flag "        - peer tso flag" $DST on
+chk_gro "        - aggregation" 1
+ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off
+chk_gro "        - aggregation with TSO off" 10
+cleanup
+
+create_ns
+ip netns exec $NS_DST ethtool -K veth$DST gro on
+chk_gro_flag "with gro on - gro flag" $DST on
+chk_gro_flag "        - peer gro flag" $SRC off
+chk_tso_flag "        - tso flag" $SRC on
+chk_tso_flag "        - peer tso flag" $DST on
+ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off
+ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+chk_gro "        - aggregation with TSO off" 1
+cleanup
+
+create_ns
+ip -n $NS_DST link set dev veth$DST down
+ip netns exec $NS_DST ethtool -K veth$DST gro on
+chk_gro_flag "with gro enabled on link down - gro flag" $DST on
+chk_gro_flag "        - peer gro flag" $SRC off
+chk_tso_flag "        - tso flag" $SRC on
+chk_tso_flag "        - peer tso flag" $DST on
+ip -n $NS_DST link set dev veth$DST up
+ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off
+ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+chk_gro "        - aggregation with TSO off" 1
+cleanup
+
+create_ns
+ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null
+chk_gro_flag "with xdp attached - gro flag" $DST on
+chk_gro_flag "        - peer gro flag" $SRC off
+chk_tso_flag "        - tso flag" $SRC off
+chk_tso_flag "        - peer tso flag" $DST on
+ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+chk_gro "        - aggregation" 1
+
+
+ip -n $NS_DST link set dev veth$DST down
+ip -n $NS_SRC link set dev veth$SRC down
+chk_gro_flag "        - after dev off, flag" $DST on
+chk_gro_flag "        - peer flag" $SRC off
+
+ip netns exec $NS_DST ethtool -K veth$DST gro on
+ip -n $NS_DST link set dev veth$DST xdp off
+chk_gro_flag "        - after gro on xdp off, gro flag" $DST on
+chk_gro_flag "        - peer gro flag" $SRC off
+chk_tso_flag "        - tso flag" $SRC on
+chk_tso_flag "        - peer tso flag" $DST on
+ip -n $NS_DST link set dev veth$DST up
+ip -n $NS_SRC link set dev veth$SRC up
+chk_gro "        - aggregation" 1
+
+ip netns exec $NS_DST ethtool -K veth$DST gro off
+ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off
+chk_gro "aggregation again with default and TSO off" 10
+
+exit $ret
-- 
cgit v1.2.3


From 95291ced8169d157f42232e2463d27fb16da2efe Mon Sep 17 00:00:00 2001
From: Qiheng Lin <linqiheng@huawei.com>
Date: Fri, 9 Apr 2021 19:09:11 +0800
Subject: ehea: add missing MODULE_DEVICE_TABLE

This patch adds missing MODULE_DEVICE_TABLE definition which generates
correct modalias for automatic loading of this driver when it is built
as an external module.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Qiheng Lin <linqiheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ehea/ehea_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
index c2e740475786..ea55314b209d 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
@@ -109,6 +109,7 @@ static const struct of_device_id ehea_device_table[] = {
 	},
 	{},
 };
+MODULE_DEVICE_TABLE(of, ehea_device_table);
 
 static struct platform_driver ehea_driver = {
 	.driver = {
-- 
cgit v1.2.3


From c3264fee72e7565ca5df2896a221672678d595b2 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 9 Apr 2021 15:40:21 -0500
Subject: dt-bindings: net: qcom,ipa: add some compatible strings

Add existing supported platform "qcom,sc7180-ipa" to the set of IPA
compatible strings.  Also add newly-supported "qcom,sdx55-ipa",
"qcom,sc7280-ipa".

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/qcom,ipa.yaml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
index 8f86084bf12e..2645a02cf19b 100644
--- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml
+++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
@@ -43,7 +43,11 @@ description:
 
 properties:
   compatible:
-    const: "qcom,sdm845-ipa"
+    oneOf:
+      - const: "qcom,sc7180-ipa"
+      - const: "qcom,sc7280-ipa"
+      - const: "qcom,sdm845-ipa"
+      - const: "qcom,sdx55-ipa"
 
   reg:
     items:
-- 
cgit v1.2.3


From c88c34fcf8f501d588c0a999aa7e51e18552c5f0 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 9 Apr 2021 15:40:22 -0500
Subject: net: ipa: disable checksum offload for IPA v4.5+

Checksum offload for IPA v4.5+ is implemented differently, using
"inline" offload (which uses a common header format for both upload
and download offload).

The IPA hardware must be programmed to enable MAP checksum offload,
but the RMNet driver is responsible for interpreting checksum
metadata supplied with messages.

Currently, the RMNet driver does not support inline checksum offload.
This support is imminent, but until it is available, do not allow
newer versions of IPA to specify checksum offload for endpoints.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_endpoint.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index 72751843b2e4..ccc99ad983eb 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -88,6 +88,11 @@ static bool ipa_endpoint_data_valid_one(struct ipa *ipa, u32 count,
 	if (ipa_gsi_endpoint_data_empty(data))
 		return true;
 
+	/* IPA v4.5+ uses checksum offload, not yet supported by RMNet */
+	if (ipa->version >= IPA_VERSION_4_5)
+		if (data->endpoint.config.checksum)
+			return false;
+
 	if (!data->toward_ipa) {
 		if (data->endpoint.filter_support) {
 			dev_err(dev, "filtering not supported for "
@@ -230,6 +235,17 @@ static bool ipa_endpoint_data_valid(struct ipa *ipa, u32 count,
 static bool ipa_endpoint_data_valid(struct ipa *ipa, u32 count,
 				    const struct ipa_gsi_endpoint_data *data)
 {
+	const struct ipa_gsi_endpoint_data *dp = data;
+	enum ipa_endpoint_name name;
+
+	if (ipa->version < IPA_VERSION_4_5)
+		return true;
+
+	/* IPA v4.5+ uses checksum offload, not yet supported by RMNet */
+	for (name = 0; name < count; name++, dp++)
+		if (data->endpoint.config.checksum)
+			return false;
+
 	return true;
 }
 
-- 
cgit v1.2.3


From fbb763e7e7366c27848cbfb09d983802b6322709 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 9 Apr 2021 15:40:23 -0500
Subject: net: ipa: add IPA v4.5 configuration data

Add support for the SDX55 SoC, which includes IPA version 4.5.

Starting with IPA v4.5, a few of the memory regions have a different
number of "canary" values; update comments in the where the region
identifers are defined to accurately reflect that.

I'll note three differences in SDX55 versus the other two existing
platforms (SDM845 and SC7180):
  - SDX55 uses a 32-bit Linux kernel
  - SDX55 has four interconnects rather than three
  - SDX55 uses IPA v4.5, which uses inline checksum offload

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/Makefile        |   3 +-
 drivers/net/ipa/ipa_data-v4.5.c | 437 ++++++++++++++++++++++++++++++++++++++++
 drivers/net/ipa/ipa_data.h      |   1 +
 drivers/net/ipa/ipa_main.c      |   4 +
 drivers/net/ipa/ipa_mem.h       |   6 +-
 5 files changed, 447 insertions(+), 4 deletions(-)
 create mode 100644 drivers/net/ipa/ipa_data-v4.5.c

diff --git a/drivers/net/ipa/Makefile b/drivers/net/ipa/Makefile
index 6abd1db9fe33..ccc4924881ac 100644
--- a/drivers/net/ipa/Makefile
+++ b/drivers/net/ipa/Makefile
@@ -9,4 +9,5 @@ ipa-y			:=	ipa_main.o ipa_clock.o ipa_reg.o ipa_mem.o \
 				ipa_endpoint.o ipa_cmd.o ipa_modem.o \
 				ipa_resource.o ipa_qmi.o ipa_qmi_msg.o
 
-ipa-y			+=	ipa_data-v3.5.1.o ipa_data-v4.2.o
+ipa-y			+=	ipa_data-v3.5.1.o ipa_data-v4.2.o \
+				ipa_data-v4.5.o
diff --git a/drivers/net/ipa/ipa_data-v4.5.c b/drivers/net/ipa/ipa_data-v4.5.c
new file mode 100644
index 000000000000..5f67a3a909ee
--- /dev/null
+++ b/drivers/net/ipa/ipa_data-v4.5.c
@@ -0,0 +1,437 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Copyright (C) 2021 Linaro Ltd. */
+
+#include <linux/log2.h>
+
+#include "gsi.h"
+#include "ipa_data.h"
+#include "ipa_endpoint.h"
+#include "ipa_mem.h"
+
+/** enum ipa_resource_type - IPA resource types for an SoC having IPA v4.5 */
+enum ipa_resource_type {
+	/* Source resource types; first must have value 0 */
+	IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS		= 0,
+	IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS,
+	IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF,
+	IPA_RESOURCE_TYPE_SRC_HPS_DMARS,
+	IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES,
+
+	/* Destination resource types; first must have value 0 */
+	IPA_RESOURCE_TYPE_DST_DATA_SECTORS		= 0,
+	IPA_RESOURCE_TYPE_DST_DPS_DMARS,
+};
+
+/* Resource groups used for an SoC having IPA v4.5 */
+enum ipa_rsrc_group_id {
+	/* Source resource group identifiers */
+	IPA_RSRC_GROUP_SRC_UNUSED_0		= 0,
+	IPA_RSRC_GROUP_SRC_UL_DL,
+	IPA_RSRC_GROUP_SRC_UNUSED_2,
+	IPA_RSRC_GROUP_SRC_UNUSED_3,
+	IPA_RSRC_GROUP_SRC_UC_RX_Q,
+	IPA_RSRC_GROUP_SRC_COUNT,	/* Last in set; not a source group */
+
+	/* Destination resource group identifiers */
+	IPA_RSRC_GROUP_DST_UNUSED_0		= 0,
+	IPA_RSRC_GROUP_DST_UL_DL_DPL,
+	IPA_RSRC_GROUP_DST_UNUSED_2,
+	IPA_RSRC_GROUP_DST_UNUSED_3,
+	IPA_RSRC_GROUP_DST_UC,
+	IPA_RSRC_GROUP_DST_COUNT,	/* Last; not a destination group */
+};
+
+/* QSB configuration data for an SoC having IPA v4.5 */
+static const struct ipa_qsb_data ipa_qsb_data[] = {
+	[IPA_QSB_MASTER_DDR] = {
+		.max_writes		= 8,
+		.max_reads		= 0,	/* no limit (hardware max) */
+		.max_reads_beats	= 120,
+	},
+	[IPA_QSB_MASTER_PCIE] = {
+		.max_writes		= 8,
+		.max_reads		= 12,
+		/* no outstanding read byte (beat) limit */
+	},
+};
+
+/* Endpoint configuration data for an SoC having IPA v4.5 */
+static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
+	[IPA_ENDPOINT_AP_COMMAND_TX] = {
+		.ee_id		= GSI_EE_AP,
+		.channel_id	= 9,
+		.endpoint_id	= 7,
+		.toward_ipa	= true,
+		.channel = {
+			.tre_count	= 256,
+			.event_count	= 256,
+			.tlv_count	= 20,
+		},
+		.endpoint = {
+			.config = {
+				.resource_group	= IPA_RSRC_GROUP_SRC_UL_DL,
+				.dma_mode	= true,
+				.dma_endpoint	= IPA_ENDPOINT_AP_LAN_RX,
+				.tx = {
+					.seq_type = IPA_SEQ_DMA,
+				},
+			},
+		},
+	},
+	[IPA_ENDPOINT_AP_LAN_RX] = {
+		.ee_id		= GSI_EE_AP,
+		.channel_id	= 10,
+		.endpoint_id	= 16,
+		.toward_ipa	= false,
+		.channel = {
+			.tre_count	= 256,
+			.event_count	= 256,
+			.tlv_count	= 9,
+		},
+		.endpoint = {
+			.config = {
+				.resource_group	= IPA_RSRC_GROUP_DST_UL_DL_DPL,
+				.aggregation	= true,
+				.status_enable	= true,
+				.rx = {
+					.pad_align	= ilog2(sizeof(u32)),
+				},
+			},
+		},
+	},
+	[IPA_ENDPOINT_AP_MODEM_TX] = {
+		.ee_id		= GSI_EE_AP,
+		.channel_id	= 7,
+		.endpoint_id	= 2,
+		.toward_ipa	= true,
+		.channel = {
+			.tre_count	= 512,
+			.event_count	= 512,
+			.tlv_count	= 16,
+		},
+		.endpoint = {
+			.filter_support	= true,
+			.config = {
+				.resource_group	= IPA_RSRC_GROUP_SRC_UL_DL,
+				.qmap		= true,
+				.status_enable	= true,
+				.tx = {
+					.seq_type = IPA_SEQ_2_PASS_SKIP_LAST_UC,
+					.status_endpoint =
+						IPA_ENDPOINT_MODEM_AP_RX,
+				},
+			},
+		},
+	},
+	[IPA_ENDPOINT_AP_MODEM_RX] = {
+		.ee_id		= GSI_EE_AP,
+		.channel_id	= 1,
+		.endpoint_id	= 14,
+		.toward_ipa	= false,
+		.channel = {
+			.tre_count	= 256,
+			.event_count	= 256,
+			.tlv_count	= 9,
+		},
+		.endpoint = {
+			.config = {
+				.resource_group	= IPA_RSRC_GROUP_DST_UL_DL_DPL,
+				.qmap		= true,
+				.aggregation	= true,
+				.rx = {
+					.aggr_close_eof	= true,
+				},
+			},
+		},
+	},
+	[IPA_ENDPOINT_MODEM_AP_TX] = {
+		.ee_id		= GSI_EE_MODEM,
+		.channel_id	= 0,
+		.endpoint_id	= 5,
+		.toward_ipa	= true,
+		.endpoint = {
+			.filter_support	= true,
+		},
+	},
+	[IPA_ENDPOINT_MODEM_AP_RX] = {
+		.ee_id		= GSI_EE_MODEM,
+		.channel_id	= 7,
+		.endpoint_id	= 21,
+		.toward_ipa	= false,
+	},
+	[IPA_ENDPOINT_MODEM_DL_NLO_TX] = {
+		.ee_id		= GSI_EE_MODEM,
+		.channel_id	= 2,
+		.endpoint_id	= 8,
+		.toward_ipa	= true,
+		.endpoint = {
+			.filter_support	= true,
+		},
+	},
+};
+
+/* Source resource configuration data for an SoC having IPA v4.5 */
+static const struct ipa_resource ipa_resource_src[] = {
+	[IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
+			.min = 1,	.max = 11,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 1,	.max = 63,
+		},
+	},
+	[IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
+			.min = 14,	.max = 14,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 3,	.max = 3,
+		},
+	},
+	[IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
+			.min = 18,	.max = 18,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 8,	.max = 8,
+		},
+	},
+	[IPA_RESOURCE_TYPE_SRC_HPS_DMARS] = {
+		.limits[IPA_RSRC_GROUP_SRC_UNUSED_0] = {
+			.min = 0,	.max = 63,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
+			.min = 0,	.max = 63,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UNUSED_2] = {
+			.min = 0,	.max = 63,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UNUSED_3] = {
+			.min = 0,	.max = 63,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 0,	.max = 63,
+		},
+	},
+	[IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
+			.min = 24,	.max = 24,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 8,	.max = 8,
+		},
+	},
+};
+
+/* Destination resource configuration data for an SoC having IPA v4.5 */
+static const struct ipa_resource ipa_resource_dst[] = {
+	[IPA_RESOURCE_TYPE_DST_DATA_SECTORS] = {
+		.limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = {
+			.min = 16,	.max = 16,
+		},
+		.limits[IPA_RSRC_GROUP_DST_UNUSED_2] = {
+			.min = 2,	.max = 2,
+		},
+		.limits[IPA_RSRC_GROUP_DST_UNUSED_3] = {
+			.min = 2,	.max = 2,
+		},
+	},
+	[IPA_RESOURCE_TYPE_DST_DPS_DMARS] = {
+		.limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = {
+			.min = 2,	.max = 63,
+		},
+		.limits[IPA_RSRC_GROUP_DST_UNUSED_2] = {
+			.min = 1,	.max = 2,
+		},
+		.limits[IPA_RSRC_GROUP_DST_UNUSED_3] = {
+			.min = 1,	.max = 2,
+		},
+		.limits[IPA_RSRC_GROUP_DST_UC] = {
+			.min = 0,	.max = 2,
+		},
+	},
+};
+
+/* Resource configuration data for an SoC having IPA v4.5 */
+static const struct ipa_resource_data ipa_resource_data = {
+	.rsrc_group_src_count	= IPA_RSRC_GROUP_SRC_COUNT,
+	.rsrc_group_dst_count	= IPA_RSRC_GROUP_DST_COUNT,
+	.resource_src_count	= ARRAY_SIZE(ipa_resource_src),
+	.resource_src		= ipa_resource_src,
+	.resource_dst_count	= ARRAY_SIZE(ipa_resource_dst),
+	.resource_dst		= ipa_resource_dst,
+};
+
+/* IPA-resident memory region data for an SoC having IPA v4.5 */
+static const struct ipa_mem ipa_mem_local_data[] = {
+	[IPA_MEM_UC_SHARED] = {
+		.offset		= 0x0000,
+		.size		= 0x0080,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_UC_INFO] = {
+		.offset		= 0x0080,
+		.size		= 0x0200,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_V4_FILTER_HASHED] = {
+		.offset		= 0x0288,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V4_FILTER] = {
+		.offset		= 0x0308,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V6_FILTER_HASHED] = {
+		.offset		= 0x0388,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V6_FILTER] = {
+		.offset		= 0x0408,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V4_ROUTE_HASHED] = {
+		.offset		= 0x0488,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V4_ROUTE] = {
+		.offset		= 0x0508,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V6_ROUTE_HASHED] = {
+		.offset		= 0x0588,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V6_ROUTE] = {
+		.offset		= 0x0608,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_MODEM_HEADER] = {
+		.offset		= 0x0688,
+		.size		= 0x0240,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_AP_HEADER] = {
+		.offset		= 0x08c8,
+		.size		= 0x0200,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_MODEM_PROC_CTX] = {
+		.offset		= 0x0ad0,
+		.size		= 0x0b20,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_AP_PROC_CTX] = {
+		.offset		= 0x15f0,
+		.size		= 0x0200,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_NAT_TABLE] = {
+		.offset		= 0x1800,
+		.size		= 0x0d00,
+		.canary_count	= 4,
+	},
+	[IPA_MEM_STATS_QUOTA_MODEM] = {
+		.offset		= 0x2510,
+		.size		= 0x0030,
+		.canary_count	= 4,
+	},
+	[IPA_MEM_STATS_QUOTA_AP] = {
+		.offset		= 0x2540,
+		.size		= 0x0048,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_STATS_TETHERING] = {
+		.offset		= 0x2588,
+		.size		= 0x0238,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_STATS_FILTER_ROUTE] = {
+		.offset		= 0x27c0,
+		.size		= 0x0800,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_STATS_DROP] = {
+		.offset		= 0x2fc0,
+		.size		= 0x0020,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_MODEM] = {
+		.offset		= 0x2fe8,
+		.size		= 0x0800,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_UC_EVENT_RING] = {
+		.offset		= 0x3800,
+		.size		= 0x1000,
+		.canary_count	= 1,
+	},
+	[IPA_MEM_PDN_CONFIG] = {
+		.offset		= 0x4800,
+		.size		= 0x0050,
+		.canary_count	= 0,
+	},
+};
+
+/* Memory configuration data for an SoC having IPA v4.5 */
+static const struct ipa_mem_data ipa_mem_data = {
+	.local_count	= ARRAY_SIZE(ipa_mem_local_data),
+	.local		= ipa_mem_local_data,
+	.imem_addr	= 0x14688000,
+	.imem_size	= 0x00003000,
+	.smem_id	= 497,
+	.smem_size	= 0x00009000,
+};
+
+/* Interconnect rates are in 1000 byte/second units */
+static const struct ipa_interconnect_data ipa_interconnect_data[] = {
+	{
+		.name			= "memory-a",
+		.peak_bandwidth		= 600000,	/* 600 MBps */
+		.average_bandwidth	= 150000,	/* 150 MBps */
+	},
+	{
+		.name			= "memory-b",
+		.peak_bandwidth		= 1804000,	/* 1.804 GBps */
+		.average_bandwidth	= 150000,	/* 150 MBps */
+	},
+	/* Average rate is unused for the next two interconnects */
+	{
+		.name			= "imem",
+		.peak_bandwidth		= 450000,	/* 450 MBps */
+		.average_bandwidth	= 75000,	/* 75 MBps (unused?) */
+	},
+	{
+		.name			= "config",
+		.peak_bandwidth		= 171400,	/* 171.4 MBps */
+		.average_bandwidth	= 0,		/* unused */
+	},
+};
+
+/* Clock and interconnect configuration data for an SoC having IPA v4.5 */
+static const struct ipa_clock_data ipa_clock_data = {
+	.core_clock_rate	= 150 * 1000 * 1000,	/* Hz (150?  60?) */
+	.interconnect_count	= ARRAY_SIZE(ipa_interconnect_data),
+	.interconnect_data	= ipa_interconnect_data,
+};
+
+/* Configuration data for an SoC having IPA v4.5 */
+const struct ipa_data ipa_data_v4_5 = {
+	.version	= IPA_VERSION_4_5,
+	.qsb_count	= ARRAY_SIZE(ipa_qsb_data),
+	.qsb_data	= ipa_qsb_data,
+	.endpoint_count	= ARRAY_SIZE(ipa_gsi_endpoint_data),
+	.endpoint_data	= ipa_gsi_endpoint_data,
+	.resource_data	= &ipa_resource_data,
+	.mem_data	= &ipa_mem_data,
+	.clock_data	= &ipa_clock_data,
+};
diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h
index 769f68923527..4bbb978fefdb 100644
--- a/drivers/net/ipa/ipa_data.h
+++ b/drivers/net/ipa/ipa_data.h
@@ -302,5 +302,6 @@ struct ipa_data {
 
 extern const struct ipa_data ipa_data_v3_5_1;
 extern const struct ipa_data ipa_data_v4_2;
+extern const struct ipa_data ipa_data_v4_5;
 
 #endif /* _IPA_DATA_H_ */
diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index bfed151f5d6d..27725f72ee7c 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -573,6 +573,10 @@ static const struct of_device_id ipa_match[] = {
 		.compatible	= "qcom,sc7180-ipa",
 		.data		= &ipa_data_v4_2,
 	},
+	{
+		.compatible	= "qcom,sdx55-ipa",
+		.data		= &ipa_data_v4_5,
+	},
 	{ },
 };
 MODULE_DEVICE_TABLE(of, ipa_match);
diff --git a/drivers/net/ipa/ipa_mem.h b/drivers/net/ipa/ipa_mem.h
index 9ca8a47bd4af..a422aec69e5d 100644
--- a/drivers/net/ipa/ipa_mem.h
+++ b/drivers/net/ipa/ipa_mem.h
@@ -58,8 +58,8 @@ enum ipa_mem_id {
 	IPA_MEM_MODEM_PROC_CTX,		/* 2 canaries */
 	IPA_MEM_AP_PROC_CTX,		/* 0 canaries */
 	IPA_MEM_NAT_TABLE,		/* 4 canaries (IPA v4.5 and above) */
-	IPA_MEM_PDN_CONFIG,		/* 2 canaries (IPA v4.0 and above) */
-	IPA_MEM_STATS_QUOTA_MODEM,	/* 2 canaries (IPA v4.0 and above) */
+	IPA_MEM_PDN_CONFIG,		/* 0/2 canaries (IPA v4.0 and above) */
+	IPA_MEM_STATS_QUOTA_MODEM,	/* 2/4 canaries (IPA v4.0 and above) */
 	IPA_MEM_STATS_QUOTA_AP,		/* 0 canaries (IPA v4.0 and above) */
 	IPA_MEM_STATS_TETHERING,	/* 0 canaries (IPA v4.0 and above) */
 	IPA_MEM_STATS_V4_FILTER,	/* 0 canaries (IPA v4.0-v4.2) */
@@ -68,7 +68,7 @@ enum ipa_mem_id {
 	IPA_MEM_STATS_V6_ROUTE,		/* 0 canaries (IPA v4.0-v4.2) */
 	IPA_MEM_STATS_FILTER_ROUTE,	/* 0 canaries (IPA v4.5 and above) */
 	IPA_MEM_STATS_DROP,		/* 0 canaries (IPA v4.0 and above) */
-	IPA_MEM_MODEM,			/* 0 canaries */
+	IPA_MEM_MODEM,			/* 0/2 canaries */
 	IPA_MEM_UC_EVENT_RING,		/* 1 canary */
 	IPA_MEM_COUNT,			/* Number of regions (not an index) */
 };
-- 
cgit v1.2.3


From 927c5043459ec613bad281074293f073599d2906 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 9 Apr 2021 15:40:24 -0500
Subject: net: ipa: add IPA v4.11 configuration data

Add support for the SC7280 SoC, which includes IPA version 4.11.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/Makefile         |   2 +-
 drivers/net/ipa/ipa_data-v4.11.c | 382 +++++++++++++++++++++++++++++++++++++++
 drivers/net/ipa/ipa_data.h       |   1 +
 drivers/net/ipa/ipa_main.c       |   4 +
 4 files changed, 388 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ipa/ipa_data-v4.11.c

diff --git a/drivers/net/ipa/Makefile b/drivers/net/ipa/Makefile
index ccc4924881ac..8c0ac8790354 100644
--- a/drivers/net/ipa/Makefile
+++ b/drivers/net/ipa/Makefile
@@ -10,4 +10,4 @@ ipa-y			:=	ipa_main.o ipa_clock.o ipa_reg.o ipa_mem.o \
 				ipa_resource.o ipa_qmi.o ipa_qmi_msg.o
 
 ipa-y			+=	ipa_data-v3.5.1.o ipa_data-v4.2.o \
-				ipa_data-v4.5.o
+				ipa_data-v4.5.o ipa_data-v4.11.o
diff --git a/drivers/net/ipa/ipa_data-v4.11.c b/drivers/net/ipa/ipa_data-v4.11.c
new file mode 100644
index 000000000000..05806ceae8b5
--- /dev/null
+++ b/drivers/net/ipa/ipa_data-v4.11.c
@@ -0,0 +1,382 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Copyright (C) 2021 Linaro Ltd. */
+
+#include <linux/log2.h>
+
+#include "gsi.h"
+#include "ipa_data.h"
+#include "ipa_endpoint.h"
+#include "ipa_mem.h"
+
+/** enum ipa_resource_type - IPA resource types for an SoC having IPA v4.11 */
+enum ipa_resource_type {
+	/* Source resource types; first must have value 0 */
+	IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS		= 0,
+	IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS,
+	IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF,
+	IPA_RESOURCE_TYPE_SRC_HPS_DMARS,
+	IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES,
+
+	/* Destination resource types; first must have value 0 */
+	IPA_RESOURCE_TYPE_DST_DATA_SECTORS		= 0,
+	IPA_RESOURCE_TYPE_DST_DPS_DMARS,
+};
+
+/* Resource groups used for an SoC having IPA v4.11 */
+enum ipa_rsrc_group_id {
+	/* Source resource group identifiers */
+	IPA_RSRC_GROUP_SRC_UL_DL			= 0,
+	IPA_RSRC_GROUP_SRC_UC_RX_Q,
+	IPA_RSRC_GROUP_SRC_UNUSED_2,
+	IPA_RSRC_GROUP_SRC_COUNT,	/* Last in set; not a source group */
+
+	/* Destination resource group identifiers */
+	IPA_RSRC_GROUP_DST_UL_DL_DPL			= 0,
+	IPA_RSRC_GROUP_DST_UNUSED_1,
+	IPA_RSRC_GROUP_DST_DRB_IP,
+	IPA_RSRC_GROUP_DST_COUNT,	/* Last; not a destination group */
+};
+
+/* QSB configuration data for an SoC having IPA v4.11 */
+static const struct ipa_qsb_data ipa_qsb_data[] = {
+	[IPA_QSB_MASTER_DDR] = {
+		.max_writes		= 12,
+		.max_reads		= 13,
+		.max_reads_beats	= 120,
+	},
+};
+
+/* Endpoint configuration data for an SoC having IPA v4.11 */
+static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
+	[IPA_ENDPOINT_AP_COMMAND_TX] = {
+		.ee_id		= GSI_EE_AP,
+		.channel_id	= 5,
+		.endpoint_id	= 7,
+		.toward_ipa	= true,
+		.channel = {
+			.tre_count	= 256,
+			.event_count	= 256,
+			.tlv_count	= 20,
+		},
+		.endpoint = {
+			.config = {
+				.resource_group	= IPA_RSRC_GROUP_SRC_UL_DL,
+				.dma_mode	= true,
+				.dma_endpoint	= IPA_ENDPOINT_AP_LAN_RX,
+				.tx = {
+					.seq_type = IPA_SEQ_DMA,
+				},
+			},
+		},
+	},
+	[IPA_ENDPOINT_AP_LAN_RX] = {
+		.ee_id		= GSI_EE_AP,
+		.channel_id	= 14,
+		.endpoint_id	= 9,
+		.toward_ipa	= false,
+		.channel = {
+			.tre_count	= 256,
+			.event_count	= 256,
+			.tlv_count	= 9,
+		},
+		.endpoint = {
+			.config = {
+				.resource_group	= IPA_RSRC_GROUP_DST_UL_DL_DPL,
+				.aggregation	= true,
+				.status_enable	= true,
+				.rx = {
+					.pad_align	= ilog2(sizeof(u32)),
+				},
+			},
+		},
+	},
+	[IPA_ENDPOINT_AP_MODEM_TX] = {
+		.ee_id		= GSI_EE_AP,
+		.channel_id	= 2,
+		.endpoint_id	= 2,
+		.toward_ipa	= true,
+		.channel = {
+			.tre_count	= 512,
+			.event_count	= 512,
+			.tlv_count	= 16,
+		},
+		.endpoint = {
+			.filter_support	= true,
+			.config = {
+				.resource_group	= IPA_RSRC_GROUP_SRC_UL_DL,
+				.qmap		= true,
+				.status_enable	= true,
+				.tx = {
+					.seq_type = IPA_SEQ_2_PASS_SKIP_LAST_UC,
+					.status_endpoint =
+						IPA_ENDPOINT_MODEM_AP_RX,
+				},
+			},
+		},
+	},
+	[IPA_ENDPOINT_AP_MODEM_RX] = {
+		.ee_id		= GSI_EE_AP,
+		.channel_id	= 7,
+		.endpoint_id	= 16,
+		.toward_ipa	= false,
+		.channel = {
+			.tre_count	= 256,
+			.event_count	= 256,
+			.tlv_count	= 9,
+		},
+		.endpoint = {
+			.config = {
+				.resource_group	= IPA_RSRC_GROUP_DST_UL_DL_DPL,
+				.qmap		= true,
+				.aggregation	= true,
+				.rx = {
+					.aggr_close_eof	= true,
+				},
+			},
+		},
+	},
+	[IPA_ENDPOINT_MODEM_AP_TX] = {
+		.ee_id		= GSI_EE_MODEM,
+		.channel_id	= 0,
+		.endpoint_id	= 5,
+		.toward_ipa	= true,
+		.endpoint = {
+			.filter_support	= true,
+		},
+	},
+	[IPA_ENDPOINT_MODEM_AP_RX] = {
+		.ee_id		= GSI_EE_MODEM,
+		.channel_id	= 7,
+		.endpoint_id	= 14,
+		.toward_ipa	= false,
+	},
+	[IPA_ENDPOINT_MODEM_DL_NLO_TX] = {
+		.ee_id		= GSI_EE_MODEM,
+		.channel_id	= 2,
+		.endpoint_id	= 8,
+		.toward_ipa	= true,
+		.endpoint = {
+			.filter_support	= true,
+		},
+	},
+};
+
+/* Source resource configuration data for an SoC having IPA v4.11 */
+static const struct ipa_resource ipa_resource_src[] = {
+	[IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
+			.min = 6,	.max = 6,
+		},
+	},
+	[IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
+			.min = 8,	.max = 8,
+		},
+	},
+	[IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
+			.min = 18,	.max = 18,
+		},
+	},
+	[IPA_RESOURCE_TYPE_SRC_HPS_DMARS] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
+			.min = 2,	.max = 2,
+		},
+	},
+	[IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
+			.min = 15,	.max = 15,
+		},
+	},
+};
+
+/* Destination resource configuration data for an SoC having IPA v4.11 */
+static const struct ipa_resource ipa_resource_dst[] = {
+	[IPA_RESOURCE_TYPE_DST_DATA_SECTORS] = {
+		.limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = {
+			.min = 3,	.max = 3,
+		},
+		.limits[IPA_RSRC_GROUP_DST_DRB_IP] = {
+			.min = 25,	.max = 25,
+		},
+	},
+	[IPA_RESOURCE_TYPE_DST_DPS_DMARS] = {
+		.limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = {
+			.min = 2,	.max = 2,
+		},
+	},
+};
+
+/* Resource configuration data for an SoC having IPA v4.11 */
+static const struct ipa_resource_data ipa_resource_data = {
+	.rsrc_group_src_count	= IPA_RSRC_GROUP_SRC_COUNT,
+	.rsrc_group_dst_count	= IPA_RSRC_GROUP_DST_COUNT,
+	.resource_src_count	= ARRAY_SIZE(ipa_resource_src),
+	.resource_src		= ipa_resource_src,
+	.resource_dst_count	= ARRAY_SIZE(ipa_resource_dst),
+	.resource_dst		= ipa_resource_dst,
+};
+
+/* IPA-resident memory region data for an SoC having IPA v4.11 */
+static const struct ipa_mem ipa_mem_local_data[] = {
+	[IPA_MEM_UC_SHARED] = {
+		.offset		= 0x0000,
+		.size		= 0x0080,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_UC_INFO] = {
+		.offset		= 0x0080,
+		.size		= 0x0200,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_V4_FILTER_HASHED] = {
+		.offset		= 0x0288,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V4_FILTER] = {
+		.offset		= 0x0308,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V6_FILTER_HASHED] = {
+		.offset		= 0x0388,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V6_FILTER] = {
+		.offset		= 0x0408,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V4_ROUTE_HASHED] = {
+		.offset		= 0x0488,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V4_ROUTE] = {
+		.offset		= 0x0508,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V6_ROUTE_HASHED] = {
+		.offset		= 0x0588,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V6_ROUTE] = {
+		.offset		= 0x0608,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_MODEM_HEADER] = {
+		.offset		= 0x0688,
+		.size		= 0x0240,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_AP_HEADER] = {
+		.offset		= 0x08c8,
+		.size		= 0x0200,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_MODEM_PROC_CTX] = {
+		.offset		= 0x0ad0,
+		.size		= 0x0200,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_AP_PROC_CTX] = {
+		.offset		= 0x0cd0,
+		.size		= 0x0200,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_NAT_TABLE] = {
+		.offset		= 0x0ee0,
+		.size		= 0x0d00,
+		.canary_count	= 4,
+	},
+	[IPA_MEM_PDN_CONFIG] = {
+		.offset		= 0x1be8,
+		.size		= 0x0050,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_STATS_QUOTA_MODEM] = {
+		.offset		= 0x1c40,
+		.size		= 0x0030,
+		.canary_count	= 4,
+	},
+	[IPA_MEM_STATS_QUOTA_AP] = {
+		.offset		= 0x1c70,
+		.size		= 0x0048,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_STATS_TETHERING] = {
+		.offset		= 0x1cb8,
+		.size		= 0x0238,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_STATS_DROP] = {
+		.offset		= 0x1ef0,
+		.size		= 0x0020,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_MODEM] = {
+		.offset		= 0x1f18,
+		.size		= 0x100c,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_UC_EVENT_RING] = {
+		.offset		= 0x3000,
+		.size		= 0x0000,
+		.canary_count	= 1,
+	},
+};
+
+/* Memory configuration data for an SoC having IPA v4.11 */
+static const struct ipa_mem_data ipa_mem_data = {
+	.local_count	= ARRAY_SIZE(ipa_mem_local_data),
+	.local		= ipa_mem_local_data,
+	.imem_addr	= 0x146a8000,
+	.imem_size	= 0x00002000,
+	.smem_id	= 497,
+	.smem_size	= 0x00009000,
+};
+
+/* Interconnect rates are in 1000 byte/second units */
+static const struct ipa_interconnect_data ipa_interconnect_data[] = {
+	{
+		.name			= "memory",
+		.peak_bandwidth		= 465000,	/* 465 MBps */
+		.average_bandwidth	= 80000,	/* 80 MBps */
+	},
+	/* Average rate is unused for the next two interconnects */
+	{
+		.name			= "imem",
+		.peak_bandwidth		= 68570,	/* 68.57 MBps */
+		.average_bandwidth	= 80000,	/* 80 MBps (unused?) */
+	},
+	{
+		.name			= "config",
+		.peak_bandwidth		= 30000,	/* 30 MBps */
+		.average_bandwidth	= 0,		/* unused */
+	},
+};
+
+/* Clock and interconnect configuration data for an SoC having IPA v4.11 */
+static const struct ipa_clock_data ipa_clock_data = {
+	.core_clock_rate	= 60 * 1000 * 1000,	/* Hz */
+	.interconnect_count	= ARRAY_SIZE(ipa_interconnect_data),
+	.interconnect_data	= ipa_interconnect_data,
+};
+
+/* Configuration data for an SoC having IPA v4.11 */
+const struct ipa_data ipa_data_v4_11 = {
+	.version	= IPA_VERSION_4_11,
+	.qsb_count	= ARRAY_SIZE(ipa_qsb_data),
+	.qsb_data	= ipa_qsb_data,
+	.endpoint_count	= ARRAY_SIZE(ipa_gsi_endpoint_data),
+	.endpoint_data	= ipa_gsi_endpoint_data,
+	.resource_data	= &ipa_resource_data,
+	.mem_data	= &ipa_mem_data,
+	.clock_data	= &ipa_clock_data,
+};
diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h
index 4bbb978fefdb..e3212ea9e3bc 100644
--- a/drivers/net/ipa/ipa_data.h
+++ b/drivers/net/ipa/ipa_data.h
@@ -303,5 +303,6 @@ struct ipa_data {
 extern const struct ipa_data ipa_data_v3_5_1;
 extern const struct ipa_data ipa_data_v4_2;
 extern const struct ipa_data ipa_data_v4_5;
+extern const struct ipa_data ipa_data_v4_11;
 
 #endif /* _IPA_DATA_H_ */
diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index 27725f72ee7c..0d168afcdf04 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -577,6 +577,10 @@ static const struct of_device_id ipa_match[] = {
 		.compatible	= "qcom,sdx55-ipa",
 		.data		= &ipa_data_v4_5,
 	},
+	{
+		.compatible	= "qcom,sc7280-ipa",
+		.data		= &ipa_data_v4_11,
+	},
 	{ },
 };
 MODULE_DEVICE_TABLE(of, ipa_match);
-- 
cgit v1.2.3


From f3c45326ee71d1d3ec11e9ddb5afc04bca9ae492 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@cilium.io>
Date: Sat, 10 Apr 2021 10:45:48 -0700
Subject: bpf: Document PROG_TEST_RUN limitations

Per net/bpf/test_run.c, particular prog types have additional
restrictions around the parameters that can be provided, so document
these in the header.

I didn't bother documenting the limitation on duration for raw
tracepoints since that's an output parameter anyway.

Tested with ./tools/testing/selftests/bpf/test_doc_build.sh.

Suggested-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Joe Stringer <joe@cilium.io>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Lorenz Bauer <lmb@cloudflare.com>
Link: https://lore.kernel.org/bpf/20210410174549.816482-1-joe@cilium.io
---
 include/uapi/linux/bpf.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 49371eba98ba..e1ee1be7e49b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -312,6 +312,27 @@ union bpf_iter_link_info {
  *		*ctx_out*, *data_out* (for example, packet data), result of the
  *		execution *retval*, and *duration* of the test run.
  *
+ *		The sizes of the buffers provided as input and output
+ *		parameters *ctx_in*, *ctx_out*, *data_in*, and *data_out* must
+ *		be provided in the corresponding variables *ctx_size_in*,
+ *		*ctx_size_out*, *data_size_in*, and/or *data_size_out*. If any
+ *		of these parameters are not provided (ie set to NULL), the
+ *		corresponding size field must be zero.
+ *
+ *		Some program types have particular requirements:
+ *
+ *		**BPF_PROG_TYPE_SK_LOOKUP**
+ *			*data_in* and *data_out* must be NULL.
+ *
+ *		**BPF_PROG_TYPE_XDP**
+ *			*ctx_in* and *ctx_out* must be NULL.
+ *
+ *		**BPF_PROG_TYPE_RAW_TRACEPOINT**,
+ *		**BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE**
+ *
+ *			*ctx_out*, *data_in* and *data_out* must be NULL.
+ *			*repeat* must be zero.
+ *
  *	Return
  *		Returns zero on success. On error, -1 is returned and *errno*
  *		is set appropriately.
-- 
cgit v1.2.3


From cbaa683bb3923df4d3c12481bff6cb6d8fdbc060 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 12 Apr 2021 17:19:00 +0200
Subject: bpf: Sync bpf headers in tooling infrastucture

Synchronize tools/include/uapi/linux/bpf.h which was missing changes
from various commits:

  - f3c45326ee71 ("bpf: Document PROG_TEST_RUN limitations")
  - e5e35e754c28 ("bpf: BPF-helper for MTU checking add length input")

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/include/uapi/linux/bpf.h | 37 ++++++++++++++++++++++++++++++++-----
 1 file changed, 32 insertions(+), 5 deletions(-)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 69902603012c..e1ee1be7e49b 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -312,6 +312,27 @@ union bpf_iter_link_info {
  *		*ctx_out*, *data_out* (for example, packet data), result of the
  *		execution *retval*, and *duration* of the test run.
  *
+ *		The sizes of the buffers provided as input and output
+ *		parameters *ctx_in*, *ctx_out*, *data_in*, and *data_out* must
+ *		be provided in the corresponding variables *ctx_size_in*,
+ *		*ctx_size_out*, *data_size_in*, and/or *data_size_out*. If any
+ *		of these parameters are not provided (ie set to NULL), the
+ *		corresponding size field must be zero.
+ *
+ *		Some program types have particular requirements:
+ *
+ *		**BPF_PROG_TYPE_SK_LOOKUP**
+ *			*data_in* and *data_out* must be NULL.
+ *
+ *		**BPF_PROG_TYPE_XDP**
+ *			*ctx_in* and *ctx_out* must be NULL.
+ *
+ *		**BPF_PROG_TYPE_RAW_TRACEPOINT**,
+ *		**BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE**
+ *
+ *			*ctx_out*, *data_in* and *data_out* must be NULL.
+ *			*repeat* must be zero.
+ *
  *	Return
  *		Returns zero on success. On error, -1 is returned and *errno*
  *		is set appropriately.
@@ -4578,7 +4599,7 @@ union bpf_attr {
  *
  * long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags)
  *	Description
- *		Check ctx packet size against exceeding MTU of net device (based
+ *		Check packet size against exceeding MTU of net device (based
  *		on *ifindex*).  This helper will likely be used in combination
  *		with helpers that adjust/change the packet size.
  *
@@ -4595,6 +4616,14 @@ union bpf_attr {
  *		against the current net device.  This is practical if this isn't
  *		used prior to redirect.
  *
+ *		On input *mtu_len* must be a valid pointer, else verifier will
+ *		reject BPF program.  If the value *mtu_len* is initialized to
+ *		zero then the ctx packet size is use.  When value *mtu_len* is
+ *		provided as input this specify the L3 length that the MTU check
+ *		is done against. Remember XDP and TC length operate at L2, but
+ *		this value is L3 as this correlate to MTU and IP-header tot_len
+ *		values which are L3 (similar behavior as bpf_fib_lookup).
+ *
  *		The Linux kernel route table can configure MTUs on a more
  *		specific per route level, which is not provided by this helper.
  *		For route level MTU checks use the **bpf_fib_lookup**\ ()
@@ -4619,11 +4648,9 @@ union bpf_attr {
  *
  *		On return *mtu_len* pointer contains the MTU value of the net
  *		device.  Remember the net device configured MTU is the L3 size,
- *		which is returned here and XDP and TX length operate at L2.
+ *		which is returned here and XDP and TC length operate at L2.
  *		Helper take this into account for you, but remember when using
- *		MTU value in your BPF-code.  On input *mtu_len* must be a valid
- *		pointer and be initialized (to zero), else verifier will reject
- *		BPF program.
+ *		MTU value in your BPF-code.
  *
  *	Return
  *		* 0 on success, and populate MTU value in *mtu_len* pointer.
-- 
cgit v1.2.3


From 51e0158a54321a48d260e95998393934bb0de52c Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 6 Apr 2021 20:21:11 -0700
Subject: skmsg: Pass psock pointer to ->psock_update_sk_prot()

Using sk_psock() to retrieve psock pointer from sock requires
RCU read lock, but we already get psock pointer before calling
->psock_update_sk_prot() in both cases, so we can just pass it
without bothering sk_psock().

Fixes: 8a59f9d1e3d4 ("sock: Introduce sk->sk_prot->psock_update_sk_prot()")
Reported-by: syzbot+320a3bc8d80f478c37e4@syzkaller.appspotmail.com
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: syzbot+320a3bc8d80f478c37e4@syzkaller.appspotmail.com
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210407032111.33398-1-xiyou.wangcong@gmail.com
---
 include/linux/skmsg.h | 5 +++--
 include/net/sock.h    | 5 ++++-
 include/net/tcp.h     | 2 +-
 include/net/udp.h     | 2 +-
 net/core/sock_map.c   | 2 +-
 net/ipv4/tcp_bpf.c    | 3 +--
 net/ipv4/udp_bpf.c    | 3 +--
 7 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index f78e90a04a69..e2fb0a5a101e 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -99,7 +99,8 @@ struct sk_psock {
 	void (*saved_close)(struct sock *sk, long timeout);
 	void (*saved_write_space)(struct sock *sk);
 	void (*saved_data_ready)(struct sock *sk);
-	int  (*psock_update_sk_prot)(struct sock *sk, bool restore);
+	int  (*psock_update_sk_prot)(struct sock *sk, struct sk_psock *psock,
+				     bool restore);
 	struct proto			*sk_proto;
 	struct mutex			work_mutex;
 	struct sk_psock_work_state	work_state;
@@ -405,7 +406,7 @@ static inline void sk_psock_restore_proto(struct sock *sk,
 {
 	sk->sk_prot->unhash = psock->saved_unhash;
 	if (psock->psock_update_sk_prot)
-		psock->psock_update_sk_prot(sk, true);
+		psock->psock_update_sk_prot(sk, psock, true);
 }
 
 static inline void sk_psock_set_state(struct sk_psock *psock,
diff --git a/include/net/sock.h b/include/net/sock.h
index 8b4155e756c2..c4bbdcd83f4d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1114,6 +1114,7 @@ struct inet_hashinfo;
 struct raw_hashinfo;
 struct smc_hashinfo;
 struct module;
+struct sk_psock;
 
 /*
  * caches using SLAB_TYPESAFE_BY_RCU should let .next pointer from nulls nodes
@@ -1185,7 +1186,9 @@ struct proto {
 	void			(*rehash)(struct sock *sk);
 	int			(*get_port)(struct sock *sk, unsigned short snum);
 #ifdef CONFIG_BPF_SYSCALL
-	int			(*psock_update_sk_prot)(struct sock *sk, bool restore);
+	int			(*psock_update_sk_prot)(struct sock *sk,
+							struct sk_psock *psock,
+							bool restore);
 #endif
 
 	/* Keeping track of sockets in use */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index eaea43afcc97..d05193cb0d99 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2215,7 +2215,7 @@ struct sk_psock;
 
 #ifdef CONFIG_BPF_SYSCALL
 struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
-int tcp_bpf_update_proto(struct sock *sk, bool restore);
+int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
 void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
 #endif /* CONFIG_BPF_SYSCALL */
 
diff --git a/include/net/udp.h b/include/net/udp.h
index f55aaeef7e91..360df454356c 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -543,7 +543,7 @@ static inline void udp_post_segment_fix_csum(struct sk_buff *skb)
 #ifdef CONFIG_BPF_SYSCALL
 struct sk_psock;
 struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
-int udp_bpf_update_proto(struct sock *sk, bool restore);
+int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
 #endif
 
 #endif	/* _UDP_H */
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 3d190d22b0d8..f473c51cbc4b 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -188,7 +188,7 @@ static int sock_map_init_proto(struct sock *sk, struct sk_psock *psock)
 	if (!sk->sk_prot->psock_update_sk_prot)
 		return -EINVAL;
 	psock->psock_update_sk_prot = sk->sk_prot->psock_update_sk_prot;
-	return sk->sk_prot->psock_update_sk_prot(sk, false);
+	return sk->sk_prot->psock_update_sk_prot(sk, psock, false);
 }
 
 static struct sk_psock *sock_map_psock_get_checked(struct sock *sk)
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 3d622a0d0753..4930bc8ab47e 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -499,9 +499,8 @@ static int tcp_bpf_assert_proto_ops(struct proto *ops)
 	       ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP;
 }
 
-int tcp_bpf_update_proto(struct sock *sk, bool restore)
+int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
 {
-	struct sk_psock *psock = sk_psock(sk);
 	int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
 	int config = psock->progs.msg_parser   ? TCP_BPF_TX   : TCP_BPF_BASE;
 
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index 4a7e38c5d842..954c4591a6fd 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -103,10 +103,9 @@ static int __init udp_bpf_v4_build_proto(void)
 }
 core_initcall(udp_bpf_v4_build_proto);
 
-int udp_bpf_update_proto(struct sock *sk, bool restore)
+int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
 {
 	int family = sk->sk_family == AF_INET ? UDP_BPF_IPV4 : UDP_BPF_IPV6;
-	struct sk_psock *psock = sk_psock(sk);
 
 	if (restore) {
 		sk->sk_write_space = psock->saved_write_space;
-- 
cgit v1.2.3


From aadb2bb83ff789de63b48b4edeab7329423a50d3 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Wed, 7 Apr 2021 20:05:56 -0700
Subject: sock_map: Fix a potential use-after-free in sock_map_close()

The last refcnt of the psock can be gone right after
sock_map_remove_links(), so sk_psock_stop() could trigger a UAF.
The reason why I placed sk_psock_stop() there is to avoid RCU read
critical section, and more importantly, some callee of
sock_map_remove_links() is supposed to be called with RCU read lock,
we can not simply get rid of RCU read lock here. Therefore, the only
choice we have is to grab an additional refcnt with sk_psock_get()
and put it back after sk_psock_stop().

Fixes: 799aa7f98d53 ("skmsg: Avoid lock_sock() in sk_psock_backlog()")
Reported-by: syzbot+7b6548ae483d6f4c64ae@syzkaller.appspotmail.com
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/20210408030556.45134-1-xiyou.wangcong@gmail.com
---
 net/core/sock_map.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index f473c51cbc4b..6f1b82b8ad49 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1521,7 +1521,7 @@ void sock_map_close(struct sock *sk, long timeout)
 
 	lock_sock(sk);
 	rcu_read_lock();
-	psock = sk_psock(sk);
+	psock = sk_psock_get(sk);
 	if (unlikely(!psock)) {
 		rcu_read_unlock();
 		release_sock(sk);
@@ -1532,6 +1532,7 @@ void sock_map_close(struct sock *sk, long timeout)
 	sock_map_remove_links(sk, psock);
 	rcu_read_unlock();
 	sk_psock_stop(psock, true);
+	sk_psock_put(sk, psock);
 	release_sock(sk);
 	saved_close(sk, timeout);
 }
-- 
cgit v1.2.3


From dd2c79677375c37f8f9f8d663eb4708495d595ef Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 9 Apr 2021 12:08:57 +0100
Subject: cxgb4: Fix unintentional sign extension issues

The shifting of the u8 integers f->fs.nat_lip[] by 24 bits to
the left will be promoted to a 32 bit signed int and then
sign-extended to a u64. In the event that the top bit of the u8
is set then all then all the upper 32 bits of the u64 end up as
also being set because of the sign-extension. Fix this by
casting the u8 values to a u64 before the 24 bit left shift.

Addresses-Coverity: ("Unintended sign extension")
Fixes: 12b276fbf6e0 ("cxgb4: add support to create hash filters")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index b1cae5a19839..bc581b149b11 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -174,31 +174,31 @@ static void set_nat_params(struct adapter *adap, struct filter_entry *f,
 				      WORD_MASK, f->fs.nat_lip[15] |
 				      f->fs.nat_lip[14] << 8 |
 				      f->fs.nat_lip[13] << 16 |
-				      f->fs.nat_lip[12] << 24, 1);
+				      (u64)f->fs.nat_lip[12] << 24, 1);
 
 			set_tcb_field(adap, f, tid, TCB_SND_UNA_RAW_W + 1,
 				      WORD_MASK, f->fs.nat_lip[11] |
 				      f->fs.nat_lip[10] << 8 |
 				      f->fs.nat_lip[9] << 16 |
-				      f->fs.nat_lip[8] << 24, 1);
+				      (u64)f->fs.nat_lip[8] << 24, 1);
 
 			set_tcb_field(adap, f, tid, TCB_SND_UNA_RAW_W + 2,
 				      WORD_MASK, f->fs.nat_lip[7] |
 				      f->fs.nat_lip[6] << 8 |
 				      f->fs.nat_lip[5] << 16 |
-				      f->fs.nat_lip[4] << 24, 1);
+				      (u64)f->fs.nat_lip[4] << 24, 1);
 
 			set_tcb_field(adap, f, tid, TCB_SND_UNA_RAW_W + 3,
 				      WORD_MASK, f->fs.nat_lip[3] |
 				      f->fs.nat_lip[2] << 8 |
 				      f->fs.nat_lip[1] << 16 |
-				      f->fs.nat_lip[0] << 24, 1);
+				      (u64)f->fs.nat_lip[0] << 24, 1);
 		} else {
 			set_tcb_field(adap, f, tid, TCB_RX_FRAG3_LEN_RAW_W,
 				      WORD_MASK, f->fs.nat_lip[3] |
 				      f->fs.nat_lip[2] << 8 |
 				      f->fs.nat_lip[1] << 16 |
-				      f->fs.nat_lip[0] << 24, 1);
+				      (u64)f->fs.nat_lip[0] << 25, 1);
 		}
 	}
 
@@ -208,25 +208,25 @@ static void set_nat_params(struct adapter *adap, struct filter_entry *f,
 				      WORD_MASK, f->fs.nat_fip[15] |
 				      f->fs.nat_fip[14] << 8 |
 				      f->fs.nat_fip[13] << 16 |
-				      f->fs.nat_fip[12] << 24, 1);
+				      (u64)f->fs.nat_fip[12] << 24, 1);
 
 			set_tcb_field(adap, f, tid, TCB_RX_FRAG2_PTR_RAW_W + 1,
 				      WORD_MASK, f->fs.nat_fip[11] |
 				      f->fs.nat_fip[10] << 8 |
 				      f->fs.nat_fip[9] << 16 |
-				      f->fs.nat_fip[8] << 24, 1);
+				      (u64)f->fs.nat_fip[8] << 24, 1);
 
 			set_tcb_field(adap, f, tid, TCB_RX_FRAG2_PTR_RAW_W + 2,
 				      WORD_MASK, f->fs.nat_fip[7] |
 				      f->fs.nat_fip[6] << 8 |
 				      f->fs.nat_fip[5] << 16 |
-				      f->fs.nat_fip[4] << 24, 1);
+				      (u64)f->fs.nat_fip[4] << 24, 1);
 
 			set_tcb_field(adap, f, tid, TCB_RX_FRAG2_PTR_RAW_W + 3,
 				      WORD_MASK, f->fs.nat_fip[3] |
 				      f->fs.nat_fip[2] << 8 |
 				      f->fs.nat_fip[1] << 16 |
-				      f->fs.nat_fip[0] << 24, 1);
+				      (u64)f->fs.nat_fip[0] << 24, 1);
 
 		} else {
 			set_tcb_field(adap, f, tid,
@@ -234,13 +234,13 @@ static void set_nat_params(struct adapter *adap, struct filter_entry *f,
 				      WORD_MASK, f->fs.nat_fip[3] |
 				      f->fs.nat_fip[2] << 8 |
 				      f->fs.nat_fip[1] << 16 |
-				      f->fs.nat_fip[0] << 24, 1);
+				      (u64)f->fs.nat_fip[0] << 24, 1);
 		}
 	}
 
 	set_tcb_field(adap, f, tid, TCB_PDU_HDR_LEN_W, WORD_MASK,
 		      (dp ? (nat_lp[1] | nat_lp[0] << 8) : 0) |
-		      (sp ? (nat_fp[1] << 16 | nat_fp[0] << 24) : 0),
+		      (sp ? (nat_fp[1] << 16 | (u64)nat_fp[0] << 24) : 0),
 		      1);
 }
 
-- 
cgit v1.2.3


From e701a25840360706fe4cf5de0015913ca19c274b Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 9 Apr 2021 14:07:26 +0100
Subject: net: thunderx: Fix unintentional sign extension issue

The shifting of the u8 integers rq->caching by 26 bits to
the left will be promoted to a 32 bit signed int and then
sign-extended to a u64. In the event that rq->caching is
greater than 0x1f then all then all the upper 32 bits of
the u64 end up as also being set because of the int
sign-extension. Fix this by casting the u8 values to a
u64 before the 26 bit left shift.

Addresses-Coverity: ("Unintended sign extension")
Fixes: 4863dea3fab0 ("net: Adding support for Cavium ThunderX network controller")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index f782e6af45e9..50bbe79fb93d 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -776,7 +776,7 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
 	mbx.rq.msg = NIC_MBOX_MSG_RQ_CFG;
 	mbx.rq.qs_num = qs->vnic_id;
 	mbx.rq.rq_num = qidx;
-	mbx.rq.cfg = (rq->caching << 26) | (rq->cq_qs << 19) |
+	mbx.rq.cfg = ((u64)rq->caching << 26) | (rq->cq_qs << 19) |
 			  (rq->cq_idx << 16) | (rq->cont_rbdr_qs << 9) |
 			  (rq->cont_qs_rbdr_idx << 8) |
 			  (rq->start_rbdr_qs << 1) | (rq->start_qs_rbdr_idx);
-- 
cgit v1.2.3


From d0494135f94c7ab5a9cf7a9094fbb233275c7ba6 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 9 Apr 2021 17:37:26 +0100
Subject: net: hns3: Fix potential null pointer defererence of null ae_dev

The reset_prepare and reset_done calls have a null pointer check
on ae_dev however ae_dev is being dereferenced via the call to
ns3_is_phys_func with the ae->pdev argument. Fix this by performing
a null pointer check on ae_dev and hence short-circuiting the
dereference to ae_dev on the call to ns3_is_phys_func.

Addresses-Coverity: ("Dereference before null check")
Fixes: 715c58e94f0d ("net: hns3: add suspend and resume pm_ops")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 25afe5a3348c..c21dd11baed9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -2369,9 +2369,9 @@ static int __maybe_unused hns3_suspend(struct device *dev)
 {
 	struct hnae3_ae_dev *ae_dev = dev_get_drvdata(dev);
 
-	if (hns3_is_phys_func(ae_dev->pdev)) {
+	if (ae_dev && hns3_is_phys_func(ae_dev->pdev)) {
 		dev_info(dev, "Begin to suspend.\n");
-		if (ae_dev && ae_dev->ops && ae_dev->ops->reset_prepare)
+		if (ae_dev->ops && ae_dev->ops->reset_prepare)
 			ae_dev->ops->reset_prepare(ae_dev, HNAE3_FUNC_RESET);
 	}
 
@@ -2382,9 +2382,9 @@ static int __maybe_unused hns3_resume(struct device *dev)
 {
 	struct hnae3_ae_dev *ae_dev = dev_get_drvdata(dev);
 
-	if (hns3_is_phys_func(ae_dev->pdev)) {
+	if (ae_dev && hns3_is_phys_func(ae_dev->pdev)) {
 		dev_info(dev, "Begin to resume.\n");
-		if (ae_dev && ae_dev->ops && ae_dev->ops->reset_done)
+		if (ae_dev->ops && ae_dev->ops->reset_done)
 			ae_dev->ops->reset_done(ae_dev);
 	}
 
-- 
cgit v1.2.3


From 0d770360577895a204634f01b4780e6fbf4adc95 Mon Sep 17 00:00:00 2001
From: Andrea Mayer <andrea.mayer@uniroma2.it>
Date: Sat, 10 Apr 2021 19:46:14 +0200
Subject: net: seg6: trivial fix of a spelling mistake in comment

There is a comment spelling mistake "interfarence" -> "interference" in
function parse_nla_action(). Fix it.

Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/seg6_local.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 8936f48570fc..bd7140885e60 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -1475,7 +1475,7 @@ static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
 	/* Forcing the desc->optattrs *set* and the desc->attrs *set* to be
 	 * disjoined, this allow us to release acquired resources by optional
 	 * attributes and by required attributes independently from each other
-	 * without any interfarence.
+	 * without any interference.
 	 * In other terms, we are sure that we do not release some the acquired
 	 * resources twice.
 	 *
-- 
cgit v1.2.3


From 17e1be342d46eb0b7c3df4c7e623493483080b63 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sun, 11 Apr 2021 20:18:11 -0400
Subject: bnxt_en: Treat health register value 0 as valid in
 bnxt_try_reover_fw().

The retry loop in bnxt_try_recover_fw() should not abort when the
health register value is 0.  It is a valid value that indicates the
firmware is booting up.

Fixes: 861aae786f2f ("bnxt_en: Enhance retry of the first message to the firmware.")
Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 6f13642121c4..3d36f945baf8 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -9532,8 +9532,8 @@ static int bnxt_try_recover_fw(struct bnxt *bp)
 		do {
 			sts = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
 			rc = __bnxt_hwrm_ver_get(bp, true);
-			if (!sts || (!BNXT_FW_IS_BOOTING(sts) &&
-				     !BNXT_FW_IS_RECOVERING(sts)))
+			if (!BNXT_FW_IS_BOOTING(sts) &&
+			    !BNXT_FW_IS_RECOVERING(sts))
 				break;
 			retry++;
 		} while (rc == -EBUSY && retry < BNXT_FW_RETRY);
-- 
cgit v1.2.3


From 190eda1a9dbc47409073bec79b81f362e21973b6 Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Sun, 11 Apr 2021 20:18:12 -0400
Subject: bnxt_en: Invalidate health register mapping at the end of probe.

After probe is successful, interface may not be bought up in all
the cases and health register mapping could be invalid if firmware
undergoes reset. Fix it by invalidating the health register at the
end of probe. It will be remapped during ifup.

Fixes: 43a440c4007b ("bnxt_en: Improve the status_reliable flag in bp->fw_health.")
Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 3d36f945baf8..3f5fbdf61257 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -12972,6 +12972,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 				   rc);
 	}
 
+	bnxt_inv_fw_health_reg(bp);
 	bnxt_dl_register(bp);
 
 	rc = register_netdev(dev);
-- 
cgit v1.2.3


From ea2d37b2b3079a896bc3f44a1962d3f01aa81b7f Mon Sep 17 00:00:00 2001
From: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Date: Sun, 11 Apr 2021 20:18:13 -0400
Subject: bnxt_en: Refactor bnxt_vf_reps_create().

Add a new function bnxt_alloc_vf_rep() to allocate a VF representor.
This function will be needed in subsequent patches to recreate the
VF reps after error recovery.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c | 40 +++++++++++++++------------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
index 4b5c8fd76a51..b5d6cd63bea7 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
@@ -350,6 +350,26 @@ void bnxt_vf_reps_destroy(struct bnxt *bp)
 	__bnxt_vf_reps_destroy(bp);
 }
 
+static int bnxt_alloc_vf_rep(struct bnxt *bp, struct bnxt_vf_rep *vf_rep,
+			     u16 *cfa_code_map)
+{
+	/* get cfa handles from FW */
+	if (hwrm_cfa_vfr_alloc(bp, vf_rep->vf_idx, &vf_rep->tx_cfa_action,
+			       &vf_rep->rx_cfa_code))
+		return -ENOLINK;
+
+	cfa_code_map[vf_rep->rx_cfa_code] = vf_rep->vf_idx;
+	vf_rep->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX, GFP_KERNEL);
+	if (!vf_rep->dst)
+		return -ENOMEM;
+
+	/* only cfa_action is needed to mux a packet while TXing */
+	vf_rep->dst->u.port_info.port_id = vf_rep->tx_cfa_action;
+	vf_rep->dst->u.port_info.lower_dev = bp->dev;
+
+	return 0;
+}
+
 /* Use the OUI of the PF's perm addr and report the same mac addr
  * for the same VF-rep each time
  */
@@ -428,25 +448,9 @@ static int bnxt_vf_reps_create(struct bnxt *bp)
 		vf_rep->vf_idx = i;
 		vf_rep->tx_cfa_action = CFA_HANDLE_INVALID;
 
-		/* get cfa handles from FW */
-		rc = hwrm_cfa_vfr_alloc(bp, vf_rep->vf_idx,
-					&vf_rep->tx_cfa_action,
-					&vf_rep->rx_cfa_code);
-		if (rc) {
-			rc = -ENOLINK;
-			goto err;
-		}
-		cfa_code_map[vf_rep->rx_cfa_code] = vf_rep->vf_idx;
-
-		vf_rep->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX,
-						 GFP_KERNEL);
-		if (!vf_rep->dst) {
-			rc = -ENOMEM;
+		rc = bnxt_alloc_vf_rep(bp, vf_rep, cfa_code_map);
+		if (rc)
 			goto err;
-		}
-		/* only cfa_action is needed to mux a packet while TXing */
-		vf_rep->dst->u.port_info.port_id = vf_rep->tx_cfa_action;
-		vf_rep->dst->u.port_info.lower_dev = bp->dev;
 
 		bnxt_vf_rep_netdev_init(bp, vf_rep, dev);
 		rc = register_netdev(dev);
-- 
cgit v1.2.3


From 90f4fd02968720bdeb38a16deeff96fa770206e4 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sun, 11 Apr 2021 20:18:14 -0400
Subject: bnxt_en: Refactor __bnxt_vf_reps_destroy().

Add a new helper function __bnxt_free_one_vf_rep() to free one VF rep.
We also reintialize the VF rep fields to proper initial values so that
the function can be used without freeing the VF rep data structure.  This
will be used in subsequent patches to free and recreate VF reps after
error recovery.

Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Reviewed-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
index b5d6cd63bea7..a4ac11f5b0e5 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
@@ -288,6 +288,21 @@ void bnxt_vf_reps_open(struct bnxt *bp)
 		bnxt_vf_rep_open(bp->vf_reps[i]->dev);
 }
 
+static void __bnxt_free_one_vf_rep(struct bnxt *bp, struct bnxt_vf_rep *vf_rep)
+{
+	if (!vf_rep)
+		return;
+
+	if (vf_rep->dst) {
+		dst_release((struct dst_entry *)vf_rep->dst);
+		vf_rep->dst = NULL;
+	}
+	if (vf_rep->tx_cfa_action != CFA_HANDLE_INVALID) {
+		hwrm_cfa_vfr_free(bp, vf_rep->vf_idx);
+		vf_rep->tx_cfa_action = CFA_HANDLE_INVALID;
+	}
+}
+
 static void __bnxt_vf_reps_destroy(struct bnxt *bp)
 {
 	u16 num_vfs = pci_num_vf(bp->pdev);
@@ -297,11 +312,7 @@ static void __bnxt_vf_reps_destroy(struct bnxt *bp)
 	for (i = 0; i < num_vfs; i++) {
 		vf_rep = bp->vf_reps[i];
 		if (vf_rep) {
-			dst_release((struct dst_entry *)vf_rep->dst);
-
-			if (vf_rep->tx_cfa_action != CFA_HANDLE_INVALID)
-				hwrm_cfa_vfr_free(bp, vf_rep->vf_idx);
-
+			__bnxt_free_one_vf_rep(bp, vf_rep);
 			if (vf_rep->dev) {
 				/* if register_netdev failed, then netdev_ops
 				 * would have been set to NULL
-- 
cgit v1.2.3


From ac797ced1fd0faba285c460eb1f64d1296b9cfa4 Mon Sep 17 00:00:00 2001
From: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Date: Sun, 11 Apr 2021 20:18:15 -0400
Subject: bnxt_en: Free and allocate VF-Reps during error recovery.

During firmware recovery, VF-Rep configuration in the firmware is lost.
Fix it by freeing and (re)allocating VF-Reps in FW at relevant points
during the error recovery process.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     |  3 ++
 drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c | 61 ++++++++++++++++++++++++++-
 drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h | 12 ++++++
 3 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 3f5fbdf61257..e15d454e33f0 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -11081,6 +11081,7 @@ static void bnxt_fw_reset_close(struct bnxt *bp)
 		pci_disable_device(bp->pdev);
 	}
 	__bnxt_close_nic(bp, true, false);
+	bnxt_vf_reps_free(bp);
 	bnxt_clear_int_mode(bp);
 	bnxt_hwrm_func_drv_unrgtr(bp);
 	if (pci_is_enabled(bp->pdev))
@@ -11825,6 +11826,8 @@ static void bnxt_fw_reset_task(struct work_struct *work)
 		bnxt_ulp_start(bp, rc);
 		if (!rc)
 			bnxt_reenable_sriov(bp);
+		bnxt_vf_reps_alloc(bp);
+		bnxt_vf_reps_open(bp);
 		bnxt_dl_health_recovery_done(bp);
 		bnxt_dl_health_status_update(bp, true);
 		rtnl_unlock();
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
index a4ac11f5b0e5..dd66302343a2 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
@@ -284,8 +284,11 @@ void bnxt_vf_reps_open(struct bnxt *bp)
 	if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV)
 		return;
 
-	for (i = 0; i < pci_num_vf(bp->pdev); i++)
-		bnxt_vf_rep_open(bp->vf_reps[i]->dev);
+	for (i = 0; i < pci_num_vf(bp->pdev); i++) {
+		/* Open the VF-Rep only if it is allocated in the FW */
+		if (bp->vf_reps[i]->tx_cfa_action != CFA_HANDLE_INVALID)
+			bnxt_vf_rep_open(bp->vf_reps[i]->dev);
+	}
 }
 
 static void __bnxt_free_one_vf_rep(struct bnxt *bp, struct bnxt_vf_rep *vf_rep)
@@ -361,6 +364,23 @@ void bnxt_vf_reps_destroy(struct bnxt *bp)
 	__bnxt_vf_reps_destroy(bp);
 }
 
+/* Free the VF-Reps in firmware, during firmware hot-reset processing.
+ * Note that the VF-Rep netdevs are still active (not unregistered) during
+ * this process. As the mode transition from SWITCHDEV to LEGACY happens
+ * under the rtnl_lock() this routine is safe under the rtnl_lock().
+ */
+void bnxt_vf_reps_free(struct bnxt *bp)
+{
+	u16 num_vfs = pci_num_vf(bp->pdev);
+	int i;
+
+	if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV)
+		return;
+
+	for (i = 0; i < num_vfs; i++)
+		__bnxt_free_one_vf_rep(bp, bp->vf_reps[i]);
+}
+
 static int bnxt_alloc_vf_rep(struct bnxt *bp, struct bnxt_vf_rep *vf_rep,
 			     u16 *cfa_code_map)
 {
@@ -381,6 +401,43 @@ static int bnxt_alloc_vf_rep(struct bnxt *bp, struct bnxt_vf_rep *vf_rep,
 	return 0;
 }
 
+/* Allocate the VF-Reps in firmware, during firmware hot-reset processing.
+ * Note that the VF-Rep netdevs are still active (not unregistered) during
+ * this process. As the mode transition from SWITCHDEV to LEGACY happens
+ * under the rtnl_lock() this routine is safe under the rtnl_lock().
+ */
+int bnxt_vf_reps_alloc(struct bnxt *bp)
+{
+	u16 *cfa_code_map = bp->cfa_code_map, num_vfs = pci_num_vf(bp->pdev);
+	struct bnxt_vf_rep *vf_rep;
+	int rc, i;
+
+	if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV)
+		return 0;
+
+	if (!cfa_code_map)
+		return -EINVAL;
+
+	for (i = 0; i < MAX_CFA_CODE; i++)
+		cfa_code_map[i] = VF_IDX_INVALID;
+
+	for (i = 0; i < num_vfs; i++) {
+		vf_rep = bp->vf_reps[i];
+		vf_rep->vf_idx = i;
+
+		rc = bnxt_alloc_vf_rep(bp, vf_rep, cfa_code_map);
+		if (rc)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	netdev_info(bp->dev, "%s error=%d\n", __func__, rc);
+	bnxt_vf_reps_free(bp);
+	return rc;
+}
+
 /* Use the OUI of the PF's perm addr and report the same mac addr
  * for the same VF-rep each time
  */
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h
index d7287651422f..5637a84884d7 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h
@@ -19,6 +19,8 @@ void bnxt_vf_reps_close(struct bnxt *bp);
 void bnxt_vf_reps_open(struct bnxt *bp);
 void bnxt_vf_rep_rx(struct bnxt *bp, struct sk_buff *skb);
 struct net_device *bnxt_get_vf_rep(struct bnxt *bp, u16 cfa_code);
+int bnxt_vf_reps_alloc(struct bnxt *bp);
+void bnxt_vf_reps_free(struct bnxt *bp);
 
 static inline u16 bnxt_vf_rep_get_fid(struct net_device *dev)
 {
@@ -61,5 +63,15 @@ static inline bool bnxt_dev_is_vf_rep(struct net_device *dev)
 {
 	return false;
 }
+
+static inline int bnxt_vf_reps_alloc(struct bnxt *bp)
+{
+	return 0;
+}
+
+static inline void bnxt_vf_reps_free(struct bnxt *bp)
+{
+}
+
 #endif /* CONFIG_BNXT_SRIOV */
 #endif /* BNXT_VFR_H */
-- 
cgit v1.2.3


From a115d24a636e892ddd1ae58f8e23c78a0390cb68 Mon Sep 17 00:00:00 2001
From: wengjianfeng <wengjianfeng@yulong.com>
Date: Mon, 12 Apr 2021 10:20:06 +0800
Subject: nfc: pn533: remove redundant assignment

In many places,first assign a value to a variable and then return
the variable. which is redundant, we should directly return the value.
in pn533_rf_field funciton,return rc also in the if statement, so we
use return 0 to replace the last return rc.

Signed-off-by: wengjianfeng <wengjianfeng@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nfc/pn533/i2c.c   |  8 ++------
 drivers/nfc/pn533/pn533.c | 17 ++++-------------
 2 files changed, 6 insertions(+), 19 deletions(-)

diff --git a/drivers/nfc/pn533/i2c.c b/drivers/nfc/pn533/i2c.c
index 0207e66cee21..795da9b85d56 100644
--- a/drivers/nfc/pn533/i2c.c
+++ b/drivers/nfc/pn533/i2c.c
@@ -40,11 +40,8 @@ static int pn533_i2c_send_ack(struct pn533 *dev, gfp_t flags)
 	struct i2c_client *client = phy->i2c_dev;
 	static const u8 ack[6] = {0x00, 0x00, 0xff, 0x00, 0xff, 0x00};
 	/* spec 6.2.1.3:  Preamble, SoPC (2), ACK Code (2), Postamble */
-	int rc;
-
-	rc = i2c_master_send(client, ack, 6);
 
-	return rc;
+	return i2c_master_send(client, ack, 6);
 }
 
 static int pn533_i2c_send_frame(struct pn533 *dev,
@@ -199,8 +196,7 @@ static int pn533_i2c_probe(struct i2c_client *client,
 				&phy->i2c_dev->dev);
 
 	if (IS_ERR(priv)) {
-		r = PTR_ERR(priv);
-		return r;
+		return PTR_ERR(priv);
 	}
 
 	phy->priv = priv;
diff --git a/drivers/nfc/pn533/pn533.c b/drivers/nfc/pn533/pn533.c
index 3fe5b81eda2d..2c7f9916f206 100644
--- a/drivers/nfc/pn533/pn533.c
+++ b/drivers/nfc/pn533/pn533.c
@@ -489,12 +489,8 @@ static int pn533_send_data_async(struct pn533 *dev, u8 cmd_code,
 				 pn533_send_async_complete_t complete_cb,
 				 void *complete_cb_context)
 {
-	int rc;
-
-	rc = __pn533_send_async(dev, cmd_code, req, complete_cb,
+	return __pn533_send_async(dev, cmd_code, req, complete_cb,
 				complete_cb_context);
-
-	return rc;
 }
 
 static int pn533_send_cmd_async(struct pn533 *dev, u8 cmd_code,
@@ -502,12 +498,8 @@ static int pn533_send_cmd_async(struct pn533 *dev, u8 cmd_code,
 				pn533_send_async_complete_t complete_cb,
 				void *complete_cb_context)
 {
-	int rc;
-
-	rc = __pn533_send_async(dev, cmd_code, req, complete_cb,
+	return __pn533_send_async(dev, cmd_code, req, complete_cb,
 				complete_cb_context);
-
-	return rc;
 }
 
 /*
@@ -2620,7 +2612,7 @@ static int pn533_rf_field(struct nfc_dev *nfc_dev, u8 rf)
 		return rc;
 	}
 
-	return rc;
+	return 0;
 }
 
 static int pn532_sam_configuration(struct nfc_dev *nfc_dev)
@@ -2794,7 +2786,6 @@ struct pn533 *pn53x_common_init(u32 device_type,
 				struct device *dev)
 {
 	struct pn533 *priv;
-	int rc = -ENOMEM;
 
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv)
@@ -2836,7 +2827,7 @@ struct pn533 *pn53x_common_init(u32 device_type,
 
 error:
 	kfree(priv);
-	return ERR_PTR(rc);
+	return ERR_PTR(-ENOMEM);
 }
 EXPORT_SYMBOL_GPL(pn53x_common_init);
 
-- 
cgit v1.2.3


From 31c4d2f160eb7b17cbead24dc6efed06505a3fee Mon Sep 17 00:00:00 2001
From: Jonathon Reinhart <jonathon.reinhart@gmail.com>
Date: Mon, 12 Apr 2021 00:24:52 -0400
Subject: net: Ensure net namespace isolation of sysctls

This adds an ensure_safe_net_sysctl() check during register_net_sysctl()
to validate that sysctl table entries for a non-init_net netns are
sufficiently isolated. To be netns-safe, an entry must adhere to at
least (and usually exactly) one of these rules:

1. It is marked read-only inside the netns.
2. Its data pointer does not point to kernel/module global data.

An entry which fails both of these checks is indicative of a bug,
whereby a child netns can affect global net sysctl values.

If such an entry is found, this code will issue a warning to the kernel
log, and force the entry to be read-only to prevent a leak.

To test, simply create a new netns:

    $ sudo ip netns add dummy

As it sits now, this patch will WARN for two sysctls which will be
addressed in a subsequent patch:
- /proc/sys/net/netfilter/nf_conntrack_max
- /proc/sys/net/netfilter/nf_conntrack_expect_max

Signed-off-by: Jonathon Reinhart <Jonathon.Reinhart@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sysctl_net.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index d14dab8b6774..f6cb0d4d114c 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -115,9 +115,57 @@ out1:
 	goto out;
 }
 
+/* Verify that sysctls for non-init netns are safe by either:
+ * 1) being read-only, or
+ * 2) having a data pointer which points outside of the global kernel/module
+ *    data segment, and rather into the heap where a per-net object was
+ *    allocated.
+ */
+static void ensure_safe_net_sysctl(struct net *net, const char *path,
+				   struct ctl_table *table)
+{
+	struct ctl_table *ent;
+
+	pr_debug("Registering net sysctl (net %p): %s\n", net, path);
+	for (ent = table; ent->procname; ent++) {
+		unsigned long addr;
+		const char *where;
+
+		pr_debug("  procname=%s mode=%o proc_handler=%ps data=%p\n",
+			 ent->procname, ent->mode, ent->proc_handler, ent->data);
+
+		/* If it's not writable inside the netns, then it can't hurt. */
+		if ((ent->mode & 0222) == 0) {
+			pr_debug("    Not writable by anyone\n");
+			continue;
+		}
+
+		/* Where does data point? */
+		addr = (unsigned long)ent->data;
+		if (is_module_address(addr))
+			where = "module";
+		else if (core_kernel_data(addr))
+			where = "kernel";
+		else
+			continue;
+
+		/* If it is writable and points to kernel/module global
+		 * data, then it's probably a netns leak.
+		 */
+		WARN(1, "sysctl %s/%s: data points to %s global data: %ps\n",
+		     path, ent->procname, where, ent->data);
+
+		/* Make it "safe" by dropping writable perms */
+		ent->mode &= ~0222;
+	}
+}
+
 struct ctl_table_header *register_net_sysctl(struct net *net,
 	const char *path, struct ctl_table *table)
 {
+	if (!net_eq(net, &init_net))
+		ensure_safe_net_sysctl(net, path, table);
+
 	return __register_sysctl_table(&net->sysctls, path, table);
 }
 EXPORT_SYMBOL_GPL(register_net_sysctl);
-- 
cgit v1.2.3


From 2671fa4dc0109d3fb581bc3078fdf17b5d9080f6 Mon Sep 17 00:00:00 2001
From: Jonathon Reinhart <jonathon.reinhart@gmail.com>
Date: Mon, 12 Apr 2021 00:24:53 -0400
Subject: netfilter: conntrack: Make global sysctls readonly in non-init netns

These sysctls point to global variables:
- NF_SYSCTL_CT_MAX (&nf_conntrack_max)
- NF_SYSCTL_CT_EXPECT_MAX (&nf_ct_expect_max)
- NF_SYSCTL_CT_BUCKETS (&nf_conntrack_htable_size_user)

Because their data pointers are not updated to point to per-netns
structures, they must be marked read-only in a non-init_net ns.
Otherwise, changes in any net namespace are reflected in (leaked into)
all other net namespaces. This problem has existed since the
introduction of net namespaces.

The current logic marks them read-only only if the net namespace is
owned by an unprivileged user (other than init_user_ns).

Commit d0febd81ae77 ("netfilter: conntrack: re-visit sysctls in
unprivileged namespaces") "exposes all sysctls even if the namespace is
unpriviliged." Since we need to mark them readonly in any case, we can
forego the unprivileged user check altogether.

Fixes: d0febd81ae77 ("netfilter: conntrack: re-visit sysctls in unprivileged namespaces")
Signed-off-by: Jonathon Reinhart <Jonathon.Reinhart@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_standalone.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 3f2cc7b04b20..54d36d3eb905 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -1060,16 +1060,10 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
 	nf_conntrack_standalone_init_dccp_sysctl(net, table);
 	nf_conntrack_standalone_init_gre_sysctl(net, table);
 
-	/* Don't allow unprivileged users to alter certain sysctls */
-	if (net->user_ns != &init_user_ns) {
+	/* Don't allow non-init_net ns to alter global sysctls */
+	if (!net_eq(&init_net, net)) {
 		table[NF_SYSCTL_CT_MAX].mode = 0444;
 		table[NF_SYSCTL_CT_EXPECT_MAX].mode = 0444;
-		table[NF_SYSCTL_CT_HELPER].mode = 0444;
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
-		table[NF_SYSCTL_CT_EVENTS].mode = 0444;
-#endif
-		table[NF_SYSCTL_CT_BUCKETS].mode = 0444;
-	} else if (!net_eq(&init_net, net)) {
 		table[NF_SYSCTL_CT_BUCKETS].mode = 0444;
 	}
 
-- 
cgit v1.2.3


From c82eaa4064f3c59f8b026a6b6e5f8693b5be92da Mon Sep 17 00:00:00 2001
From: Lijun Pan <lijunp213@gmail.com>
Date: Mon, 12 Apr 2021 02:40:59 -0500
Subject: ibmvnic: clean up the remaining debugfs data structures

Commit e704f0434ea6 ("ibmvnic: Remove debugfs support") did not
clean up everything. Remove the remaining code.

Signed-off-by: Lijun Pan <lijunp213@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.h | 94 --------------------------------------
 1 file changed, 94 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 806aa75a4e86..c1d39a748546 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -412,77 +412,6 @@ struct ibmvnic_control_ip_offload {
 	struct ibmvnic_rc rc;
 } __packed __aligned(8);
 
-struct ibmvnic_request_dump_size {
-	u8 first;
-	u8 cmd;
-	u8 reserved[6];
-	__be32 len;
-	struct ibmvnic_rc rc;
-} __packed __aligned(8);
-
-struct ibmvnic_request_dump {
-	u8 first;
-	u8 cmd;
-	u8 reserved1[2];
-	__be32 ioba;
-	__be32 len;
-	u8 reserved2[4];
-} __packed __aligned(8);
-
-struct ibmvnic_request_dump_rsp {
-	u8 first;
-	u8 cmd;
-	u8 reserved[6];
-	__be32 dumped_len;
-	struct ibmvnic_rc rc;
-} __packed __aligned(8);
-
-struct ibmvnic_request_ras_comp_num {
-	u8 first;
-	u8 cmd;
-	u8 reserved1[2];
-	__be32 num_components;
-	u8 reserved2[4];
-	struct ibmvnic_rc rc;
-} __packed __aligned(8);
-
-struct ibmvnic_request_ras_comps {
-	u8 first;
-	u8 cmd;
-	u8 reserved[2];
-	__be32 ioba;
-	__be32 len;
-	struct ibmvnic_rc rc;
-} __packed __aligned(8);
-
-struct ibmvnic_control_ras {
-	u8 first;
-	u8 cmd;
-	u8 correlator;
-	u8 level;
-	u8 op;
-#define IBMVNIC_TRACE_LEVEL	1
-#define IBMVNIC_ERROR_LEVEL	2
-#define IBMVNIC_TRACE_PAUSE	3
-#define IBMVNIC_TRACE_RESUME	4
-#define IBMVNIC_TRACE_ON		5
-#define IBMVNIC_TRACE_OFF		6
-#define IBMVNIC_CHG_TRACE_BUFF_SZ	7
-	u8 trace_buff_sz[3];
-	u8 reserved[4];
-	struct ibmvnic_rc rc;
-} __packed __aligned(8);
-
-struct ibmvnic_collect_fw_trace {
-	u8 first;
-	u8 cmd;
-	u8 correlator;
-	u8 reserved;
-	__be32 ioba;
-	__be32 len;
-	struct ibmvnic_rc rc;
-} __packed __aligned(8);
-
 struct ibmvnic_request_statistics {
 	u8 first;
 	u8 cmd;
@@ -494,15 +423,6 @@ struct ibmvnic_request_statistics {
 	u8 reserved[4];
 } __packed __aligned(8);
 
-struct ibmvnic_request_debug_stats {
-	u8 first;
-	u8 cmd;
-	u8 reserved[2];
-	__be32 ioba;
-	__be32 len;
-	struct ibmvnic_rc rc;
-} __packed __aligned(8);
-
 struct ibmvnic_error_indication {
 	u8 first;
 	u8 cmd;
@@ -677,22 +597,8 @@ union ibmvnic_crq {
 	struct ibmvnic_query_ip_offload query_ip_offload_rsp;
 	struct ibmvnic_control_ip_offload control_ip_offload;
 	struct ibmvnic_control_ip_offload control_ip_offload_rsp;
-	struct ibmvnic_request_dump_size request_dump_size;
-	struct ibmvnic_request_dump_size request_dump_size_rsp;
-	struct ibmvnic_request_dump request_dump;
-	struct ibmvnic_request_dump_rsp request_dump_rsp;
-	struct ibmvnic_request_ras_comp_num request_ras_comp_num;
-	struct ibmvnic_request_ras_comp_num request_ras_comp_num_rsp;
-	struct ibmvnic_request_ras_comps request_ras_comps;
-	struct ibmvnic_request_ras_comps request_ras_comps_rsp;
-	struct ibmvnic_control_ras control_ras;
-	struct ibmvnic_control_ras control_ras_rsp;
-	struct ibmvnic_collect_fw_trace collect_fw_trace;
-	struct ibmvnic_collect_fw_trace collect_fw_trace_rsp;
 	struct ibmvnic_request_statistics request_statistics;
 	struct ibmvnic_generic_crq request_statistics_rsp;
-	struct ibmvnic_request_debug_stats request_debug_stats;
-	struct ibmvnic_request_debug_stats request_debug_stats_rsp;
 	struct ibmvnic_error_indication error_indication;
 	struct ibmvnic_link_state_indication link_state_indication;
 	struct ibmvnic_change_mac_addr change_mac_addr;
-- 
cgit v1.2.3


From caee7bf5b0a9a1b0956b5910f0c44278ec1a9bb4 Mon Sep 17 00:00:00 2001
From: Lijun Pan <lijunp213@gmail.com>
Date: Mon, 12 Apr 2021 02:41:27 -0500
Subject: ibmvnic: print reset reason as a string

The reset reason can be added or deleted over different versions
of the source code. Print a string instead of a number.

Signed-off-by: Lijun Pan <lijunp213@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 35 ++++++++++++++++++++++++++++-------
 1 file changed, 28 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 473411542911..5c89dd7fa3de 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1911,6 +1911,26 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p)
 	return rc;
 }
 
+static const char *reset_reason_to_string(enum ibmvnic_reset_reason reason)
+{
+	switch (reason) {
+	case VNIC_RESET_FAILOVER:
+		return "FAILOVER";
+	case VNIC_RESET_MOBILITY:
+		return "MOBILITY";
+	case VNIC_RESET_FATAL:
+		return "FATAL";
+	case VNIC_RESET_NON_FATAL:
+		return "NON_FATAL";
+	case VNIC_RESET_TIMEOUT:
+		return "TIMEOUT";
+	case VNIC_RESET_CHANGE_PARAM:
+		return "CHANGE_PARAM";
+	default:
+		return "UNKNOWN";
+	}
+}
+
 /*
  * do_reset returns zero if we are able to keep processing reset events, or
  * non-zero if we hit a fatal error and must halt.
@@ -1924,9 +1944,9 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 	int i, rc;
 
 	netdev_dbg(adapter->netdev,
-		   "[S:%d FOP:%d] Reset reason %d, reset_state %d\n",
+		   "[S:%d FOP:%d] Reset reason: %s, reset_state %d\n",
 		   adapter->state, adapter->failover_pending,
-		   rwi->reset_reason, reset_state);
+		   reset_reason_to_string(rwi->reset_reason), reset_state);
 
 	adapter->reset_reason = rwi->reset_reason;
 	/* requestor of VNIC_RESET_CHANGE_PARAM already has the rtnl lock */
@@ -2139,8 +2159,8 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter,
 	struct net_device *netdev = adapter->netdev;
 	int rc;
 
-	netdev_dbg(adapter->netdev, "Hard resetting driver (%d)\n",
-		   rwi->reset_reason);
+	netdev_dbg(adapter->netdev, "Hard resetting driver (%s)\n",
+		   reset_reason_to_string(rwi->reset_reason));
 
 	/* read the state and check (again) after getting rtnl */
 	reset_state = adapter->state;
@@ -2363,8 +2383,8 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
 	list_for_each(entry, &adapter->rwi_list) {
 		tmp = list_entry(entry, struct ibmvnic_rwi, list);
 		if (tmp->reset_reason == reason) {
-			netdev_dbg(netdev, "Skipping matching reset, reason=%d\n",
-				   reason);
+			netdev_dbg(netdev, "Skipping matching reset, reason=%s\n",
+				   reset_reason_to_string(reason));
 			ret = EBUSY;
 			goto err;
 		}
@@ -2384,7 +2404,8 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
 	}
 	rwi->reset_reason = reason;
 	list_add_tail(&rwi->list, &adapter->rwi_list);
-	netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", reason);
+	netdev_dbg(adapter->netdev, "Scheduling reset (reason %s)\n",
+		   reset_reason_to_string(reason));
 	schedule_work(&adapter->ibmvnic_reset);
 
 	ret = 0;
-- 
cgit v1.2.3


From 0666ef7f61ca763897fdcd385d65555dd4764514 Mon Sep 17 00:00:00 2001
From: Lijun Pan <lijunp213@gmail.com>
Date: Mon, 12 Apr 2021 02:41:28 -0500
Subject: ibmvnic: print adapter state as a string

The adapter state can be added or deleted over different versions
of the source code. Print a string instead of a number.

Signed-off-by: Lijun Pan <lijunp213@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 67 ++++++++++++++++++++++++++++----------
 1 file changed, 49 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 5c89dd7fa3de..ee9bf18c597f 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -827,6 +827,30 @@ static void release_napi(struct ibmvnic_adapter *adapter)
 	adapter->napi_enabled = false;
 }
 
+static const char *adapter_state_to_string(enum vnic_state state)
+{
+	switch (state) {
+	case VNIC_PROBING:
+		return "PROBING";
+	case VNIC_PROBED:
+		return "PROBED";
+	case VNIC_OPENING:
+		return "OPENING";
+	case VNIC_OPEN:
+		return "OPEN";
+	case VNIC_CLOSING:
+		return "CLOSING";
+	case VNIC_CLOSED:
+		return "CLOSED";
+	case VNIC_REMOVING:
+		return "REMOVING";
+	case VNIC_REMOVED:
+		return "REMOVED";
+	default:
+		return "UNKNOWN";
+	}
+}
+
 static int ibmvnic_login(struct net_device *netdev)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
@@ -905,7 +929,7 @@ static int ibmvnic_login(struct net_device *netdev)
 
 	__ibmvnic_set_mac(netdev, adapter->mac_addr);
 
-	netdev_dbg(netdev, "[S:%d] Login succeeded\n", adapter->state);
+	netdev_dbg(netdev, "[S:%s] Login succeeded\n", adapter_state_to_string(adapter->state));
 	return 0;
 }
 
@@ -1185,8 +1209,9 @@ static int ibmvnic_open(struct net_device *netdev)
 	 * honor our setting below.
 	 */
 	if (adapter->failover_pending || (test_bit(0, &adapter->resetting))) {
-		netdev_dbg(netdev, "[S:%d FOP:%d] Resetting, deferring open\n",
-			   adapter->state, adapter->failover_pending);
+		netdev_dbg(netdev, "[S:%s FOP:%d] Resetting, deferring open\n",
+			   adapter_state_to_string(adapter->state),
+			   adapter->failover_pending);
 		adapter->state = VNIC_OPEN;
 		rc = 0;
 		goto out;
@@ -1350,8 +1375,9 @@ static int ibmvnic_close(struct net_device *netdev)
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 	int rc;
 
-	netdev_dbg(netdev, "[S:%d FOP:%d FRR:%d] Closing\n",
-		   adapter->state, adapter->failover_pending,
+	netdev_dbg(netdev, "[S:%s FOP:%d FRR:%d] Closing\n",
+		   adapter_state_to_string(adapter->state),
+		   adapter->failover_pending,
 		   adapter->force_reset_recovery);
 
 	/* If device failover is pending, just set device state and return.
@@ -1944,9 +1970,11 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 	int i, rc;
 
 	netdev_dbg(adapter->netdev,
-		   "[S:%d FOP:%d] Reset reason: %s, reset_state %d\n",
-		   adapter->state, adapter->failover_pending,
-		   reset_reason_to_string(rwi->reset_reason), reset_state);
+		   "[S:%s FOP:%d] Reset reason: %s, reset_state: %s\n",
+		   adapter_state_to_string(adapter->state),
+		   adapter->failover_pending,
+		   reset_reason_to_string(rwi->reset_reason),
+		   adapter_state_to_string(reset_state));
 
 	adapter->reset_reason = rwi->reset_reason;
 	/* requestor of VNIC_RESET_CHANGE_PARAM already has the rtnl lock */
@@ -2006,8 +2034,8 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 				 * from VNIC_CLOSING state.
 				 */
 				netdev_dbg(netdev,
-					   "Open changed state from %d, updating.\n",
-					   reset_state);
+					   "Open changed state from %s, updating.\n",
+					   adapter_state_to_string(reset_state));
 				reset_state = VNIC_OPEN;
 				adapter->state = VNIC_CLOSING;
 			}
@@ -2148,8 +2176,9 @@ out:
 	if (!(adapter->reset_reason == VNIC_RESET_CHANGE_PARAM))
 		rtnl_unlock();
 
-	netdev_dbg(adapter->netdev, "[S:%d FOP:%d] Reset done, rc %d\n",
-		   adapter->state, adapter->failover_pending, rc);
+	netdev_dbg(adapter->netdev, "[S:%s FOP:%d] Reset done, rc %d\n",
+		   adapter_state_to_string(adapter->state),
+		   adapter->failover_pending, rc);
 	return rc;
 }
 
@@ -2226,8 +2255,9 @@ out:
 	/* restore adapter state if reset failed */
 	if (rc)
 		adapter->state = reset_state;
-	netdev_dbg(adapter->netdev, "[S:%d FOP:%d] Hard reset done, rc %d\n",
-		   adapter->state, adapter->failover_pending, rc);
+	netdev_dbg(adapter->netdev, "[S:%s FOP:%d] Hard reset done, rc %d\n",
+		   adapter_state_to_string(adapter->state),
+		   adapter->failover_pending, rc);
 	return rc;
 }
 
@@ -2306,8 +2336,8 @@ static void __ibmvnic_reset(struct work_struct *work)
 			if (rc) {
 				/* give backing device time to settle down */
 				netdev_dbg(adapter->netdev,
-					   "[S:%d] Hard reset failed, waiting 60 secs\n",
-					   adapter->state);
+					   "[S:%s] Hard reset failed, waiting 60 secs\n",
+					   adapter_state_to_string(adapter->state));
 				set_current_state(TASK_UNINTERRUPTIBLE);
 				schedule_timeout(60 * HZ);
 			}
@@ -2335,8 +2365,9 @@ static void __ibmvnic_reset(struct work_struct *work)
 	clear_bit_unlock(0, &adapter->resetting);
 
 	netdev_dbg(adapter->netdev,
-		   "[S:%d FRR:%d WFR:%d] Done processing resets\n",
-		   adapter->state, adapter->force_reset_recovery,
+		   "[S:%s FRR:%d WFR:%d] Done processing resets\n",
+		   adapter_state_to_string(adapter->state),
+		   adapter->force_reset_recovery,
 		   adapter->wait_for_reset);
 }
 
-- 
cgit v1.2.3


From f768e75130159b5444ee691bbdd201bef8e5bb24 Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Mon, 12 Apr 2021 17:03:26 +0800
Subject: enetc: mark TX timestamp type per skb

Mark TX timestamp type per skb on skb->cb[0], instead of
global variable for all skbs. This is a preparation for
one step timestamp support.

For one-step timestamping enablement, there will be both
one-step and two-step PTP messages to transfer. And a skb
queue is needed for one-step PTP messages making sure
start to send current message only after the last one
completed on hardware. (ENETC single-step register has to
be dynamically configured per message.) So, marking TX
timestamp type per skb is required.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Reviewed-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 10 ++++++----
 drivers/net/ethernet/freescale/enetc/enetc.h | 11 +++++++----
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 41bfc6e623bf..6e3a5303e2bb 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -67,8 +67,7 @@ static void enetc_update_tx_ring_tail(struct enetc_bdr *tx_ring)
 	enetc_wr_reg_hot(tx_ring->tpir, tx_ring->next_to_use);
 }
 
-static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
-			      int active_offloads)
+static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb)
 {
 	struct enetc_tx_swbd *tx_swbd;
 	skb_frag_t *frag;
@@ -101,7 +100,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
 	count++;
 
 	do_vlan = skb_vlan_tag_present(skb);
-	do_tstamp = (active_offloads & ENETC_F_TX_TSTAMP) &&
+	do_tstamp = (skb->cb[0] & ENETC_F_TX_TSTAMP) &&
 		    (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP);
 	tx_swbd->do_tstamp = do_tstamp;
 	tx_swbd->check_wb = tx_swbd->do_tstamp;
@@ -221,6 +220,9 @@ netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev)
 	struct enetc_bdr *tx_ring;
 	int count;
 
+	/* cb[0] used for TX timestamp type */
+	skb->cb[0] = priv->active_offloads & ENETC_F_TX_TSTAMP_MASK;
+
 	tx_ring = priv->tx_ring[skb->queue_mapping];
 
 	if (unlikely(skb_shinfo(skb)->nr_frags > ENETC_MAX_SKB_FRAGS))
@@ -234,7 +236,7 @@ netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev)
 	}
 
 	enetc_lock_mdio();
-	count = enetc_map_tx_buffs(tx_ring, skb, priv->active_offloads);
+	count = enetc_map_tx_buffs(tx_ring, skb);
 	enetc_unlock_mdio();
 
 	if (unlikely(!count))
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index 05474f46b0d9..96889529383e 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -271,12 +271,15 @@ struct psfp_cap {
 	u32 max_psfp_meter;
 };
 
+#define ENETC_F_TX_TSTAMP_MASK	0xff
 /* TODO: more hardware offloads */
 enum enetc_active_offloads {
-	ENETC_F_RX_TSTAMP	= BIT(0),
-	ENETC_F_TX_TSTAMP	= BIT(1),
-	ENETC_F_QBV             = BIT(2),
-	ENETC_F_QCI		= BIT(3),
+	/* 8 bits reserved for TX timestamp types (hwtstamp_tx_types) */
+	ENETC_F_TX_TSTAMP	= BIT(0),
+
+	ENETC_F_RX_TSTAMP	= BIT(8),
+	ENETC_F_QBV		= BIT(9),
+	ENETC_F_QCI		= BIT(10),
 };
 
 /* interrupt coalescing modes */
-- 
cgit v1.2.3


From 7294380c5211687aa4d66166984b152ee84caf5f Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Mon, 12 Apr 2021 17:03:27 +0800
Subject: enetc: support PTP Sync packet one-step timestamping

This patch is to add support for PTP Sync packet one-step timestamping.
Since ENETC single-step register has to be configured dynamically per
packet for correctionField offeset and UDP checksum update, current
one-step timestamping packet has to be sent only when the last one
completes transmitting on hardware. So, on the TX, this patch handles
one-step timestamping packet as below:

- Trasmit packet immediately if no other one in transfer, or queue to
  skb queue if there is already one in transfer.
  The test_and_set_bit_lock() is used here to lock and check state.
- Start a work when complete transfer on hardware, to release the bit
  lock and to send one skb in skb queue if has.

And the configuration for one-step timestamping on ENETC before
transmitting is,

- Set one-step timestamping flag in extension BD.
- Write 30 bits current timestamp in tstamp field of extension BD.
- Update PTP Sync packet originTimestamp field with current timestamp.
- Configure single-step register for correctionField offeset and UDP
  checksum update.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Reviewed-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c       | 191 ++++++++++++++++++---
 drivers/net/ethernet/freescale/enetc/enetc.h       |  20 ++-
 .../net/ethernet/freescale/enetc/enetc_ethtool.c   |   3 +-
 drivers/net/ethernet/freescale/enetc/enetc_hw.h    |   7 +
 4 files changed, 195 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 6e3a5303e2bb..4a0adb0b8bd7 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -6,6 +6,7 @@
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/vmalloc.h>
+#include <linux/ptp_classify.h>
 #include <net/pkt_sched.h>
 
 static struct sk_buff *enetc_tx_swbd_get_skb(struct enetc_tx_swbd *tx_swbd)
@@ -67,15 +68,52 @@ static void enetc_update_tx_ring_tail(struct enetc_bdr *tx_ring)
 	enetc_wr_reg_hot(tx_ring->tpir, tx_ring->next_to_use);
 }
 
+static int enetc_ptp_parse(struct sk_buff *skb, u8 *udp,
+			   u8 *msgtype, u8 *twostep,
+			   u16 *correction_offset, u16 *body_offset)
+{
+	unsigned int ptp_class;
+	struct ptp_header *hdr;
+	unsigned int type;
+	u8 *base;
+
+	ptp_class = ptp_classify_raw(skb);
+	if (ptp_class == PTP_CLASS_NONE)
+		return -EINVAL;
+
+	hdr = ptp_parse_header(skb, ptp_class);
+	if (!hdr)
+		return -EINVAL;
+
+	type = ptp_class & PTP_CLASS_PMASK;
+	if (type == PTP_CLASS_IPV4 || type == PTP_CLASS_IPV6)
+		*udp = 1;
+	else
+		*udp = 0;
+
+	*msgtype = ptp_get_msgtype(hdr, ptp_class);
+	*twostep = hdr->flag_field[0] & 0x2;
+
+	base = skb_mac_header(skb);
+	*correction_offset = (u8 *)&hdr->correction - base;
+	*body_offset = (u8 *)hdr + sizeof(struct ptp_header) - base;
+
+	return 0;
+}
+
 static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb)
 {
+	bool do_vlan, do_onestep_tstamp = false, do_twostep_tstamp = false;
+	struct enetc_ndev_priv *priv = netdev_priv(tx_ring->ndev);
+	struct enetc_hw *hw = &priv->si->hw;
 	struct enetc_tx_swbd *tx_swbd;
-	skb_frag_t *frag;
 	int len = skb_headlen(skb);
 	union enetc_tx_bd temp_bd;
+	u8 msgtype, twostep, udp;
 	union enetc_tx_bd *txbd;
-	bool do_vlan, do_tstamp;
+	u16 offset1, offset2;
 	int i, count = 0;
+	skb_frag_t *frag;
 	unsigned int f;
 	dma_addr_t dma;
 	u8 flags = 0;
@@ -100,12 +138,21 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb)
 	count++;
 
 	do_vlan = skb_vlan_tag_present(skb);
-	do_tstamp = (skb->cb[0] & ENETC_F_TX_TSTAMP) &&
-		    (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP);
-	tx_swbd->do_tstamp = do_tstamp;
-	tx_swbd->check_wb = tx_swbd->do_tstamp;
+	if (skb->cb[0] & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) {
+		if (enetc_ptp_parse(skb, &udp, &msgtype, &twostep, &offset1,
+				    &offset2) ||
+		    msgtype != PTP_MSGTYPE_SYNC || twostep)
+			WARN_ONCE(1, "Bad packet for one-step timestamping\n");
+		else
+			do_onestep_tstamp = true;
+	} else if (skb->cb[0] & ENETC_F_TX_TSTAMP) {
+		do_twostep_tstamp = true;
+	}
+
+	tx_swbd->do_twostep_tstamp = do_twostep_tstamp;
+	tx_swbd->check_wb = tx_swbd->do_twostep_tstamp;
 
-	if (do_vlan || do_tstamp)
+	if (do_vlan || do_onestep_tstamp || do_twostep_tstamp)
 		flags |= ENETC_TXBD_FLAGS_EX;
 
 	if (tx_ring->tsd_enable)
@@ -142,7 +189,40 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb)
 			e_flags |= ENETC_TXBD_E_FLAGS_VLAN_INS;
 		}
 
-		if (do_tstamp) {
+		if (do_onestep_tstamp) {
+			u32 lo, hi, val;
+			u64 sec, nsec;
+			u8 *data;
+
+			lo = enetc_rd_hot(hw, ENETC_SICTR0);
+			hi = enetc_rd_hot(hw, ENETC_SICTR1);
+			sec = (u64)hi << 32 | lo;
+			nsec = do_div(sec, 1000000000);
+
+			/* Configure extension BD */
+			temp_bd.ext.tstamp = cpu_to_le32(lo & 0x3fffffff);
+			e_flags |= ENETC_TXBD_E_FLAGS_ONE_STEP_PTP;
+
+			/* Update originTimestamp field of Sync packet
+			 * - 48 bits seconds field
+			 * - 32 bits nanseconds field
+			 */
+			data = skb_mac_header(skb);
+			*(__be16 *)(data + offset2) =
+				htons((sec >> 32) & 0xffff);
+			*(__be32 *)(data + offset2 + 2) =
+				htonl(sec & 0xffffffff);
+			*(__be32 *)(data + offset2 + 6) = htonl(nsec);
+
+			/* Configure single-step register */
+			val = ENETC_PM0_SINGLE_STEP_EN;
+			val |= ENETC_SET_SINGLE_STEP_OFFSET(offset1);
+			if (udp)
+				val |= ENETC_PM0_SINGLE_STEP_CH;
+
+			enetc_port_wr(hw, ENETC_PM0_SINGLE_STEP, val);
+			enetc_port_wr(hw, ENETC_PM1_SINGLE_STEP, val);
+		} else if (do_twostep_tstamp) {
 			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 			e_flags |= ENETC_TXBD_E_FLAGS_TWO_STEP_PTP;
 		}
@@ -214,15 +294,13 @@ dma_err:
 	return 0;
 }
 
-netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev)
+static netdev_tx_t enetc_start_xmit(struct sk_buff *skb,
+				    struct net_device *ndev)
 {
 	struct enetc_ndev_priv *priv = netdev_priv(ndev);
 	struct enetc_bdr *tx_ring;
 	int count;
 
-	/* cb[0] used for TX timestamp type */
-	skb->cb[0] = priv->active_offloads & ENETC_F_TX_TSTAMP_MASK;
-
 	tx_ring = priv->tx_ring[skb->queue_mapping];
 
 	if (unlikely(skb_shinfo(skb)->nr_frags > ENETC_MAX_SKB_FRAGS))
@@ -252,6 +330,40 @@ drop_packet_err:
 	return NETDEV_TX_OK;
 }
 
+netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	u8 udp, msgtype, twostep;
+	u16 offset1, offset2;
+
+	/* Mark tx timestamp type on skb->cb[0] if requires */
+	if ((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
+	    (priv->active_offloads & ENETC_F_TX_TSTAMP_MASK)) {
+		skb->cb[0] = priv->active_offloads & ENETC_F_TX_TSTAMP_MASK;
+	} else {
+		skb->cb[0] = 0;
+	}
+
+	/* Fall back to two-step timestamp if not one-step Sync packet */
+	if (skb->cb[0] & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) {
+		if (enetc_ptp_parse(skb, &udp, &msgtype, &twostep,
+				    &offset1, &offset2) ||
+		    msgtype != PTP_MSGTYPE_SYNC || twostep != 0)
+			skb->cb[0] = ENETC_F_TX_TSTAMP;
+	}
+
+	/* Queue one-step Sync packet if already locked */
+	if (skb->cb[0] & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) {
+		if (test_and_set_bit_lock(ENETC_TX_ONESTEP_TSTAMP_IN_PROGRESS,
+					  &priv->flags)) {
+			skb_queue_tail(&priv->tx_skbs, skb);
+			return NETDEV_TX_OK;
+		}
+	}
+
+	return enetc_start_xmit(skb, ndev);
+}
+
 static irqreturn_t enetc_msix(int irq, void *data)
 {
 	struct enetc_int_vector	*v = data;
@@ -392,10 +504,11 @@ static void enetc_recycle_xdp_tx_buff(struct enetc_bdr *tx_ring,
 static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
 {
 	struct net_device *ndev = tx_ring->ndev;
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
 	int tx_frm_cnt = 0, tx_byte_cnt = 0;
 	struct enetc_tx_swbd *tx_swbd;
 	int i, bds_to_clean;
-	bool do_tstamp;
+	bool do_twostep_tstamp;
 	u64 tstamp = 0;
 
 	i = tx_ring->next_to_clean;
@@ -403,7 +516,7 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
 
 	bds_to_clean = enetc_bd_ready_count(tx_ring, i);
 
-	do_tstamp = false;
+	do_twostep_tstamp = false;
 
 	while (bds_to_clean && tx_frm_cnt < ENETC_DEFAULT_TX_WORK) {
 		struct xdp_frame *xdp_frame = enetc_tx_swbd_get_xdp_frame(tx_swbd);
@@ -417,10 +530,10 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
 			txbd = ENETC_TXBD(*tx_ring, i);
 
 			if (txbd->flags & ENETC_TXBD_FLAGS_W &&
-			    tx_swbd->do_tstamp) {
+			    tx_swbd->do_twostep_tstamp) {
 				enetc_get_tx_tstamp(&priv->si->hw, txbd,
 						    &tstamp);
-				do_tstamp = true;
+				do_twostep_tstamp = true;
 			}
 		}
 
@@ -433,9 +546,16 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
 			xdp_return_frame(xdp_frame);
 			tx_swbd->xdp_frame = NULL;
 		} else if (skb) {
-			if (unlikely(do_tstamp)) {
+			if (unlikely(tx_swbd->skb->cb[0] &
+				     ENETC_F_TX_ONESTEP_SYNC_TSTAMP)) {
+				/* Start work to release lock for next one-step
+				 * timestamping packet. And send one skb in
+				 * tx_skbs queue if has.
+				 */
+				queue_work(system_wq, &priv->tx_onestep_tstamp);
+			} else if (unlikely(do_twostep_tstamp)) {
 				enetc_tstamp_tx(skb, tstamp);
-				do_tstamp = false;
+				do_twostep_tstamp = false;
 			}
 			napi_consume_skb(skb, napi_budget);
 			tx_swbd->skb = NULL;
@@ -1864,6 +1984,29 @@ static int enetc_phylink_connect(struct net_device *ndev)
 	return 0;
 }
 
+static void enetc_tx_onestep_tstamp(struct work_struct *work)
+{
+	struct enetc_ndev_priv *priv;
+	struct sk_buff *skb;
+
+	priv = container_of(work, struct enetc_ndev_priv, tx_onestep_tstamp);
+
+	netif_tx_lock(priv->ndev);
+
+	clear_bit_unlock(ENETC_TX_ONESTEP_TSTAMP_IN_PROGRESS, &priv->flags);
+	skb = skb_dequeue(&priv->tx_skbs);
+	if (skb)
+		enetc_start_xmit(skb, priv->ndev);
+
+	netif_tx_unlock(priv->ndev);
+}
+
+static void enetc_tx_onestep_tstamp_init(struct enetc_ndev_priv *priv)
+{
+	INIT_WORK(&priv->tx_onestep_tstamp, enetc_tx_onestep_tstamp);
+	skb_queue_head_init(&priv->tx_skbs);
+}
+
 void enetc_start(struct net_device *ndev)
 {
 	struct enetc_ndev_priv *priv = netdev_priv(ndev);
@@ -1916,6 +2059,7 @@ int enetc_open(struct net_device *ndev)
 	if (err)
 		goto err_set_queues;
 
+	enetc_tx_onestep_tstamp_init(priv);
 	enetc_setup_bdrs(priv);
 	enetc_start(ndev);
 
@@ -2214,11 +2358,16 @@ static int enetc_hwtstamp_set(struct net_device *ndev, struct ifreq *ifr)
 
 	switch (config.tx_type) {
 	case HWTSTAMP_TX_OFF:
-		priv->active_offloads &= ~ENETC_F_TX_TSTAMP;
+		priv->active_offloads &= ~ENETC_F_TX_TSTAMP_MASK;
 		break;
 	case HWTSTAMP_TX_ON:
+		priv->active_offloads &= ~ENETC_F_TX_TSTAMP_MASK;
 		priv->active_offloads |= ENETC_F_TX_TSTAMP;
 		break;
+	case HWTSTAMP_TX_ONESTEP_SYNC:
+		priv->active_offloads &= ~ENETC_F_TX_TSTAMP_MASK;
+		priv->active_offloads |= ENETC_F_TX_ONESTEP_SYNC_TSTAMP;
+		break;
 	default:
 		return -ERANGE;
 	}
@@ -2249,7 +2398,9 @@ static int enetc_hwtstamp_get(struct net_device *ndev, struct ifreq *ifr)
 
 	config.flags = 0;
 
-	if (priv->active_offloads & ENETC_F_TX_TSTAMP)
+	if (priv->active_offloads & ENETC_F_TX_ONESTEP_SYNC_TSTAMP)
+		config.tx_type = HWTSTAMP_TX_ONESTEP_SYNC;
+	else if (priv->active_offloads & ENETC_F_TX_TSTAMP)
 		config.tx_type = HWTSTAMP_TX_ON;
 	else
 		config.tx_type = HWTSTAMP_TX_OFF;
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index 96889529383e..d52717bc73c7 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -30,7 +30,7 @@ struct enetc_tx_swbd {
 	enum dma_data_direction dir;
 	u8 is_dma_page:1;
 	u8 check_wb:1;
-	u8 do_tstamp:1;
+	u8 do_twostep_tstamp:1;
 	u8 is_eof:1;
 	u8 is_xdp_tx:1;
 	u8 is_xdp_redirect:1;
@@ -275,11 +275,16 @@ struct psfp_cap {
 /* TODO: more hardware offloads */
 enum enetc_active_offloads {
 	/* 8 bits reserved for TX timestamp types (hwtstamp_tx_types) */
-	ENETC_F_TX_TSTAMP	= BIT(0),
+	ENETC_F_TX_TSTAMP		= BIT(0),
+	ENETC_F_TX_ONESTEP_SYNC_TSTAMP	= BIT(1),
 
-	ENETC_F_RX_TSTAMP	= BIT(8),
-	ENETC_F_QBV		= BIT(9),
-	ENETC_F_QCI		= BIT(10),
+	ENETC_F_RX_TSTAMP		= BIT(8),
+	ENETC_F_QBV			= BIT(9),
+	ENETC_F_QCI			= BIT(10),
+};
+
+enum enetc_flags_bit {
+	ENETC_TX_ONESTEP_TSTAMP_IN_PROGRESS = 0,
 };
 
 /* interrupt coalescing modes */
@@ -324,6 +329,11 @@ struct enetc_ndev_priv {
 	u32 tx_ictt;
 
 	struct bpf_prog *xdp_prog;
+
+	unsigned long flags;
+
+	struct work_struct	tx_onestep_tstamp;
+	struct sk_buff_head	tx_skbs;
 };
 
 /* Messaging */
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
index 7cc81b453bd7..49835e878bbb 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
@@ -671,7 +671,8 @@ static int enetc_get_ts_info(struct net_device *ndev,
 				SOF_TIMESTAMPING_RAW_HARDWARE;
 
 	info->tx_types = (1 << HWTSTAMP_TX_OFF) |
-			 (1 << HWTSTAMP_TX_ON);
+			 (1 << HWTSTAMP_TX_ON) |
+			 (1 << HWTSTAMP_TX_ONESTEP_SYNC);
 	info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
 			   (1 << HWTSTAMP_FILTER_ALL);
 #else
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
index 00938f7960a4..04ac7fc23ead 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
@@ -239,6 +239,12 @@ enum enetc_bdr_type {TX, RX};
 
 #define ENETC_PM_IMDIO_BASE	0x8030
 
+#define ENETC_PM0_SINGLE_STEP		0x80c0
+#define ENETC_PM1_SINGLE_STEP		0x90c0
+#define ENETC_PM0_SINGLE_STEP_CH	BIT(7)
+#define ENETC_PM0_SINGLE_STEP_EN	BIT(31)
+#define ENETC_SET_SINGLE_STEP_OFFSET(v)	(((v) & 0xff) << 8)
+
 #define ENETC_PM0_IF_MODE	0x8300
 #define ENETC_PM0_IFM_RG	BIT(2)
 #define ENETC_PM0_IFM_RLP	(BIT(5) | BIT(11))
@@ -548,6 +554,7 @@ static inline void enetc_clear_tx_bd(union enetc_tx_bd *txbd)
 
 /* Extension flags */
 #define ENETC_TXBD_E_FLAGS_VLAN_INS	BIT(0)
+#define ENETC_TXBD_E_FLAGS_ONE_STEP_PTP	BIT(1)
 #define ENETC_TXBD_E_FLAGS_TWO_STEP_PTP	BIT(2)
 
 union enetc_rx_bd {
-- 
cgit v1.2.3


From e230f0c44f011f3270680a506b19b7e84c5e8923 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Tue, 6 Apr 2021 11:34:36 +0800
Subject: mt76: mt7921: fix kernel crash when the firmware fails to download

Fix kernel crash when the firmware is missing or fails to download.

[    9.444758] kernel BUG at drivers/pci/msi.c:375!
[    9.449363] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP
[    9.501033] pstate: a0400009 (NzCv daif +PAN -UAO)
[    9.505814] pc : free_msi_irqs+0x180/0x184
[    9.509897] lr : free_msi_irqs+0x40/0x184
[    9.513893] sp : ffffffc015193870
[    9.517194] x29: ffffffc015193870 x28: 00000000f0e94fa2
[    9.522492] x27: 0000000000000acd x26: 000000000000009a
[    9.527790] x25: ffffffc0152cee58 x24: ffffffdbb383e0d8
[    9.533087] x23: ffffffdbb38628d0 x22: 0000000000040200
[    9.538384] x21: ffffff8cf7de7318 x20: ffffff8cd65a2480
[    9.543681] x19: ffffff8cf7de7000 x18: 0000000000000000
[    9.548979] x17: ffffff8cf9ca03b4 x16: ffffffdc13ad9a34
[    9.554277] x15: 0000000000000000 x14: 0000000000080800
[    9.559575] x13: ffffff8cd65a2980 x12: 0000000000000000
[    9.564873] x11: ffffff8cfa45d820 x10: ffffff8cfa45d6d0
[    9.570171] x9 : 0000000000000040 x8 : ffffff8ccef1b780
[    9.575469] x7 : aaaaaaaaaaaaaaaa x6 : 0000000000000000
[    9.580766] x5 : ffffffdc13824900 x4 : ffffff8ccefe0000
[    9.586063] x3 : 0000000000000000 x2 : 0000000000000000
[    9.591362] x1 : 0000000000000125 x0 : ffffff8ccefe0000
[    9.596660] Call trace:
[    9.599095]  free_msi_irqs+0x180/0x184
[    9.602831]  pci_disable_msi+0x100/0x130
[    9.606740]  pci_free_irq_vectors+0x24/0x30
[    9.610915]  mt7921_pci_probe+0xbc/0x250 [mt7921e]
[    9.615693]  pci_device_probe+0xd4/0x14c
[    9.619604]  really_probe+0x134/0x2ec
[    9.623252]  driver_probe_device+0x64/0xfc
[    9.627335]  device_driver_attach+0x4c/0x6c
[    9.631506]  __driver_attach+0xac/0xc0
[    9.635243]  bus_for_each_dev+0x8c/0xd4
[    9.639066]  driver_attach+0x2c/0x38
[    9.642628]  bus_add_driver+0xfc/0x1d0
[    9.646365]  driver_register+0x64/0xf8
[    9.650101]  __pci_register_driver+0x6c/0x7c
[    9.654360]  init_module+0x28/0xfdc [mt7921e]
[    9.658704]  do_one_initcall+0x13c/0x2d0
[    9.662615]  do_init_module+0x58/0x1e8
[    9.666351]  load_module+0xd80/0xeb4
[    9.669912]  __arm64_sys_finit_module+0xa8/0xe0
[    9.674430]  el0_svc_common+0xa4/0x16c
[    9.678168]  el0_svc_compat_handler+0x2c/0x40
[    9.682511]  el0_svc_compat+0x8/0x10
[    9.686076] Code: a94257f6 f9400bf7 a8c47bfd d65f03c0 (d4210000)
[    9.692155] ---[ end trace 7621f966afbf0a29 ]---
[    9.697385] Kernel panic - not syncing: Fatal exception
[    9.702599] SMP: stopping secondary CPUs
[    9.706549] Kernel Offset: 0x1c03600000 from 0xffffffc010000000
[    9.712456] PHYS_OFFSET: 0xfffffff440000000
[    9.716625] CPU features: 0x080026,2a80aa18
[    9.720795] Memory Limit: none

Fixes: 5c14a5f944b9 ("mt76: mt7921: introduce mt7921e support")
Reported-by: Claire Chang <tientzu@google.com>
Co-developed-by: YN Chen <YN.Chen@mediatek.com>
Signed-off-by: YN Chen <YN.Chen@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/pci.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
index e94523bf19be..40e2086d075c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
@@ -146,10 +146,12 @@ static int mt7921_pci_probe(struct pci_dev *pdev,
 
 	ret = mt7921_register_device(dev);
 	if (ret)
-		goto err_free_dev;
+		goto err_free_irq;
 
 	return 0;
 
+err_free_irq:
+	devm_free_irq(&pdev->dev, pdev->irq, dev);
 err_free_dev:
 	mt76_free_device(&dev->mt76);
 err_free_pci_vec:
-- 
cgit v1.2.3


From 49897c529f85504139a6e54417a65f26a07492d2 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Tue, 6 Apr 2021 11:34:37 +0800
Subject: mt76: mt7921: fix the insmod hangs

Fix the second insert module causing the device hangs after remove module.

Fixes: 1c099ab44727 ("mt76: mt7921: add MCU support")
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/init.c   |  2 --
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c    |  3 +--
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.c    | 13 +------------
 drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h |  1 +
 4 files changed, 3 insertions(+), 16 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index 262c9d150bf0..9046bbfc0690 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -273,8 +273,6 @@ void mt7921_unregister_device(struct mt7921_dev *dev)
 {
 	mt76_unregister_device(&dev->mt76);
 	mt7921_mcu_exit(dev);
-	mt7921_dma_cleanup(dev);
-
 	mt7921_tx_token_put(dev);
 
 	tasklet_disable(&dev->irq_tasklet);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index f3a537c6e3c8..36eaa9d97f64 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1207,8 +1207,7 @@ void mt7921_update_channel(struct mt76_dev *mdev)
 	mt76_connac_power_save_sched(&dev->mphy, &dev->pm);
 }
 
-static int
-mt7921_wfsys_reset(struct mt7921_dev *dev)
+int mt7921_wfsys_reset(struct mt7921_dev *dev)
 {
 	mt76_set(dev, 0x70002600, BIT(0));
 	msleep(200);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index 353877f1c762..b233b12860ef 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -960,18 +960,7 @@ int mt7921_mcu_init(struct mt7921_dev *dev)
 
 void mt7921_mcu_exit(struct mt7921_dev *dev)
 {
-	u32 reg = mt7921_reg_map_l1(dev, MT_TOP_MISC);
-
-	__mt76_mcu_restart(&dev->mt76);
-	if (!mt76_poll_msec(dev, reg, MT_TOP_MISC_FW_STATE,
-			    FIELD_PREP(MT_TOP_MISC_FW_STATE,
-				       FW_STATE_FW_DOWNLOAD), 1000)) {
-		dev_err(dev->mt76.dev, "Failed to exit mcu\n");
-		return;
-	}
-
-	reg = mt7921_reg_map_l1(dev, MT_TOP_LPCR_HOST_BAND0);
-	mt76_wr(dev, reg, MT_TOP_LPCR_HOST_FW_OWN);
+	mt7921_wfsys_reset(dev);
 	skb_queue_purge(&dev->mt76.mcu.res_q);
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index ebe51017dd55..e4211b049040 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -351,4 +351,5 @@ void mt7921_coredump_work(struct work_struct *work);
 int mt7921_mcu_update_arp_filter(struct ieee80211_hw *hw,
 				 struct ieee80211_vif *vif,
 				 struct ieee80211_bss_conf *info);
+int mt7921_wfsys_reset(struct mt7921_dev *dev);
 #endif
-- 
cgit v1.2.3


From 4da64fe086d95daa66d0def40fbd1b02d4f813fd Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Tue, 6 Apr 2021 11:34:39 +0800
Subject: mt76: mt7921: reduce the data latency during hw scan

Reduce the data latency during hw_scan by the split scan which would switch
back to operational channel right after scanning each channel done.

Suggested-by: Asda Wen <Asda.Wen@mediatek.com>
Suggested-by: Soul Huang <Soul.Huang@mediatek.com>
Co-developed-by: YN Chen <YN.Chen@mediatek.com>
Signed-off-by: YN Chen <YN.Chen@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c | 5 ++++-
 drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h | 3 +++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index 1bd6294042b6..4892728ad9bb 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -1395,11 +1395,14 @@ int mt76_connac_mcu_hw_scan(struct mt76_phy *phy, struct ieee80211_vif *vif,
 		req->ies_len = cpu_to_le16(sreq->ie_len);
 	}
 
+	if (is_mt7921(phy->dev))
+		req->scan_func |= SCAN_FUNC_SPLIT_SCAN;
+
 	memcpy(req->bssid, sreq->bssid, ETH_ALEN);
 	if (sreq->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
 		get_random_mask_addr(req->random_mac, sreq->mac_addr,
 				     sreq->mac_addr_mask);
-		req->scan_func = 1;
+		req->scan_func |= SCAN_FUNC_RANDOM_MAC;
 	}
 
 	err = mt76_mcu_skb_send_msg(mdev, skb, MCU_CMD_START_HW_SCAN, false);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
index 5a3efd744546..6f9b7807305a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
@@ -362,6 +362,9 @@ enum {
 #define NETWORK_IBSS			BIT(18)
 #define NETWORK_WDS			BIT(21)
 
+#define SCAN_FUNC_RANDOM_MAC		BIT(0)
+#define SCAN_FUNC_SPLIT_SCAN		BIT(5)
+
 #define CONNECTION_INFRA_STA		(STA_TYPE_STA | NETWORK_INFRA)
 #define CONNECTION_INFRA_AP		(STA_TYPE_AP | NETWORK_INFRA)
 #define CONNECTION_P2P_GC		(STA_TYPE_STA | NETWORK_P2P)
-- 
cgit v1.2.3


From b4403cee6400c5f679e9c4a82b91d61aa961eccf Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 23 Mar 2021 22:47:37 +0100
Subject: mt76: fix potential DMA mapping leak

With buf uninitialized in mt76_dma_tx_queue_skb_raw, its field skip_unmap
could potentially inherit a non-zero value from stack garbage.
If this happens, it will cause DMA mappings for MCU command frames to not be
unmapped after completion

Fixes: 27d5c528a7ca ("mt76: fix double DMA unmap of the first buffer on 7615/7915")
Cc: stable@vger.kernel.org
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/dma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index b87b46538b95..6ea58aecca41 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -318,7 +318,7 @@ static int
 mt76_dma_tx_queue_skb_raw(struct mt76_dev *dev, struct mt76_queue *q,
 			  struct sk_buff *skb, u32 tx_info)
 {
-	struct mt76_queue_buf buf;
+	struct mt76_queue_buf buf = {};
 	dma_addr_t addr;
 
 	if (q->queued + 1 >= q->ndesc - 1)
-- 
cgit v1.2.3


From 92e916362ba58e080a8b8e97556d11670ea6ce3d Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 11 Apr 2021 12:46:17 +0200
Subject: mt76: mt7921: remove 80+80 MHz support capabilities

This mode is not supported by the hardware

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/init.c | 3 ---
 drivers/net/wireless/mediatek/mt76/mt7921/main.c | 4 ----
 2 files changed, 7 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index 9046bbfc0690..b34db6e8d0ae 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -250,9 +250,6 @@ int mt7921_register_device(struct mt7921_dev *dev)
 	dev->mphy.sband_5g.sband.vht_cap.cap |=
 			IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 |
 			IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK;
-	dev->mphy.sband_5g.sband.vht_cap.cap |=
-			IEEE80211_VHT_CAP_SHORT_GI_160 |
-			IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ;
 	dev->mphy.hw->wiphy->available_antennas_rx = dev->mphy.chainmask;
 	dev->mphy.hw->wiphy->available_antennas_tx = dev->mphy.chainmask;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 32b1cd0bca1f..d9ec7d9bb1c4 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -125,10 +125,6 @@ mt7921_init_he_caps(struct mt7921_phy *phy, enum nl80211_band band,
 
 		he_mcs->rx_mcs_80 = cpu_to_le16(mcs_map);
 		he_mcs->tx_mcs_80 = cpu_to_le16(mcs_map);
-		he_mcs->rx_mcs_160 = cpu_to_le16(mcs_map);
-		he_mcs->tx_mcs_160 = cpu_to_le16(mcs_map);
-		he_mcs->rx_mcs_80p80 = cpu_to_le16(mcs_map);
-		he_mcs->tx_mcs_80p80 = cpu_to_le16(mcs_map);
 
 		memset(he_cap->ppe_thres, 0, sizeof(he_cap->ppe_thres));
 		if (he_cap_elem->phy_cap_info[6] &
-- 
cgit v1.2.3


From 0fda6d7bb5a29b241fba8d37b92567c6bec79655 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Tue, 30 Mar 2021 01:25:00 +0800
Subject: mt76: report Rx timestamp

Frame reception timestamp (low 32-bits) that indicates the value of the
local TSF timer value at the time the first bit of the MAC header in the
received frame (PPDU unit) arriving at the MAC.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/agg-rx.c        |  6 ++--
 drivers/net/wireless/mediatek/mt76/mac80211.c      |  2 ++
 drivers/net/wireless/mediatek/mt76/mt76.h          |  3 +-
 drivers/net/wireless/mediatek/mt76/mt7603/init.c   |  3 ++
 drivers/net/wireless/mediatek/mt76/mt7603/mac.c    | 31 +++++++++++----------
 drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h |  2 +-
 drivers/net/wireless/mediatek/mt76/mt7615/init.c   |  3 ++
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c    | 32 ++++++++++++----------
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  2 +-
 drivers/net/wireless/mediatek/mt76/mt7915/init.c   |  3 ++
 drivers/net/wireless/mediatek/mt76/mt7915/mac.c    | 29 +++++++++++---------
 drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h |  2 +-
 drivers/net/wireless/mediatek/mt76/mt7921/init.c   |  3 ++
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c    | 29 +++++++++++---------
 drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h |  2 +-
 15 files changed, 89 insertions(+), 63 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/agg-rx.c b/drivers/net/wireless/mediatek/mt76/agg-rx.c
index 144e8a8910ba..72622220051b 100644
--- a/drivers/net/wireless/mediatek/mt76/agg-rx.c
+++ b/drivers/net/wireless/mediatek/mt76/agg-rx.c
@@ -76,9 +76,9 @@ mt76_rx_aggr_check_release(struct mt76_rx_tid *tid, struct sk_buff_head *frames)
 
 		nframes--;
 		status = (struct mt76_rx_status *)skb->cb;
-		if (!time_after(jiffies,
-				status->reorder_time +
-				mt76_aggr_tid_to_timeo(tid->num)))
+		if (!time_after32(jiffies,
+				  status->reorder_time +
+				  mt76_aggr_tid_to_timeo(tid->num)))
 			continue;
 
 		mt76_rx_aggr_release_frames(tid, frames, status->seqno);
diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
index ef31026ac9d7..29ef15ec22fe 100644
--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
+++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
@@ -754,6 +754,8 @@ mt76_rx_convert(struct mt76_dev *dev, struct sk_buff *skb,
 	status->signal = mstat.signal;
 	status->chains = mstat.chains;
 	status->ampdu_reference = mstat.ampdu_ref;
+	status->device_timestamp = mstat.timestamp;
+	status->mactime = mstat.timestamp;
 
 	BUILD_BUG_ON(sizeof(mstat) > sizeof(skb->cb));
 	BUILD_BUG_ON(sizeof(status->chain_signal) !=
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index b09b0f5ffd6d..d121c176c37c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -498,9 +498,10 @@ struct mt76_rx_status {
 		u16 wcid_idx;
 	};
 
-	unsigned long reorder_time;
+	u32 reorder_time;
 
 	u32 ampdu_ref;
+	u32 timestamp;
 
 	u8 iv[6];
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/init.c b/drivers/net/wireless/mediatek/mt76/mt7603/init.c
index f0b879c3eba8..e1b2cfa56074 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/init.c
@@ -548,6 +548,9 @@ int mt7603_register_device(struct mt7603_dev *dev)
 	hw->max_report_rates = 7;
 	hw->max_rate_tries = 11;
 
+	hw->radiotap_timestamp.units_pos =
+		IEEE80211_RADIOTAP_TIMESTAMP_UNIT_US;
+
 	hw->sta_data_size = sizeof(struct mt7603_sta);
 	hw->vif_data_size = sizeof(struct mt7603_vif);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
index c29b60a87819..e3a9dd6fbd87 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
@@ -532,20 +532,6 @@ mt7603_mac_fill_rx(struct mt7603_dev *dev, struct sk_buff *skb)
 		status->flag |= RX_FLAG_MMIC_STRIPPED | RX_FLAG_MIC_STRIPPED;
 	}
 
-	if (!(rxd2 & (MT_RXD2_NORMAL_NON_AMPDU_SUB |
-		      MT_RXD2_NORMAL_NON_AMPDU))) {
-		status->flag |= RX_FLAG_AMPDU_DETAILS;
-
-		/* all subframes of an A-MPDU have the same timestamp */
-		if (dev->rx_ampdu_ts != rxd[12]) {
-			if (!++dev->ampdu_ref)
-				dev->ampdu_ref++;
-		}
-		dev->rx_ampdu_ts = rxd[12];
-
-		status->ampdu_ref = dev->ampdu_ref;
-	}
-
 	remove_pad = rxd1 & MT_RXD1_NORMAL_HDR_OFFSET;
 
 	if (rxd2 & MT_RXD2_NORMAL_MAX_LEN_ERROR)
@@ -579,6 +565,23 @@ mt7603_mac_fill_rx(struct mt7603_dev *dev, struct sk_buff *skb)
 			return -EINVAL;
 	}
 	if (rxd0 & MT_RXD0_NORMAL_GROUP_2) {
+		status->timestamp = le32_to_cpu(rxd[0]);
+		status->flag |= RX_FLAG_MACTIME_START;
+
+		if (!(rxd2 & (MT_RXD2_NORMAL_NON_AMPDU_SUB |
+			      MT_RXD2_NORMAL_NON_AMPDU))) {
+			status->flag |= RX_FLAG_AMPDU_DETAILS;
+
+			/* all subframes of an A-MPDU have the same timestamp */
+			if (dev->rx_ampdu_ts != status->timestamp) {
+				if (!++dev->ampdu_ref)
+					dev->ampdu_ref++;
+			}
+			dev->rx_ampdu_ts = status->timestamp;
+
+			status->ampdu_ref = dev->ampdu_ref;
+		}
+
 		rxd += 2;
 		if ((u8 *)rxd - skb->data >= skb->len)
 			return -EINVAL;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h b/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h
index b787c56fd8d6..1df5b9fed2bb 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mt7603.h
@@ -120,7 +120,7 @@ struct mt7603_dev {
 	unsigned long last_cca_adj;
 
 	u32 ampdu_ref;
-	__le32 rx_ampdu_ts;
+	u32 rx_ampdu_ts;
 	u8 rssi_offset[3];
 
 	u8 slottime;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 857fb4918975..1e418740c17b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -333,6 +333,9 @@ mt7615_init_wiphy(struct ieee80211_hw *hw)
 	hw->max_rate_tries = 11;
 	hw->netdev_features = NETIF_F_RXCSUM;
 
+	hw->radiotap_timestamp.units_pos =
+		IEEE80211_RADIOTAP_TIMESTAMP_UNIT_US;
+
 	phy->slottime = 9;
 
 	hw->sta_data_size = sizeof(struct mt7615_sta);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 6a1395de3066..005c2829d3df 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -235,7 +235,6 @@ static int mt7615_mac_fill_rx(struct mt7615_dev *dev, struct sk_buff *skb)
 	u32 rxd1 = le32_to_cpu(rxd[1]);
 	u32 rxd2 = le32_to_cpu(rxd[2]);
 	u32 csum_mask = MT_RXD0_NORMAL_IP_SUM | MT_RXD0_NORMAL_UDP_TCP_SUM;
-	__le32 rxd12 = rxd[12];
 	bool unicast, hdr_trans, remove_pad, insert_ccmp_hdr = false;
 	int phy_idx;
 	int i, idx;
@@ -329,6 +328,23 @@ static int mt7615_mac_fill_rx(struct mt7615_dev *dev, struct sk_buff *skb)
 	}
 
 	if (rxd0 & MT_RXD0_NORMAL_GROUP_2) {
+		status->timestamp = le32_to_cpu(rxd[0]);
+		status->flag |= RX_FLAG_MACTIME_START;
+
+		if (!(rxd2 & (MT_RXD2_NORMAL_NON_AMPDU_SUB |
+			      MT_RXD2_NORMAL_NON_AMPDU))) {
+			status->flag |= RX_FLAG_AMPDU_DETAILS;
+
+			/* all subframes of an A-MPDU have the same timestamp */
+			if (phy->rx_ampdu_ts != status->timestamp) {
+				if (!++phy->ampdu_ref)
+					phy->ampdu_ref++;
+			}
+			phy->rx_ampdu_ts = status->timestamp;
+
+			status->ampdu_ref = phy->ampdu_ref;
+		}
+
 		rxd += 2;
 		if ((u8 *)rxd - skb->data >= skb->len)
 			return -EINVAL;
@@ -372,20 +388,6 @@ static int mt7615_mac_fill_rx(struct mt7615_dev *dev, struct sk_buff *skb)
 	if (!sband->channels)
 		return -EINVAL;
 
-	if (!(rxd2 & (MT_RXD2_NORMAL_NON_AMPDU_SUB |
-		      MT_RXD2_NORMAL_NON_AMPDU))) {
-		status->flag |= RX_FLAG_AMPDU_DETAILS;
-
-		/* all subframes of an A-MPDU have the same timestamp */
-		if (phy->rx_ampdu_ts != rxd12) {
-			if (!++phy->ampdu_ref)
-				phy->ampdu_ref++;
-		}
-		phy->rx_ampdu_ts = rxd12;
-
-		status->ampdu_ref = phy->ampdu_ref;
-	}
-
 	if (rxd0 & MT_RXD0_NORMAL_GROUP_3) {
 		u32 rxdg0 = le32_to_cpu(rxd[0]);
 		u32 rxdg1 = le32_to_cpu(rxd[1]);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index dbabdeef4e0b..6a50338ec9f5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -168,7 +168,7 @@ struct mt7615_phy {
 	u8 rdd_state;
 	int dfs_state;
 
-	__le32 rx_ampdu_ts;
+	u32 rx_ampdu_ts;
 	u32 ampdu_ref;
 
 	struct mib_stats mib;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index 261f5ab23721..983ec5276f46 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -96,6 +96,9 @@ mt7915_init_wiphy(struct ieee80211_hw *hw)
 	hw->max_tx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
 	hw->netdev_features = NETIF_F_RXCSUM;
 
+	hw->radiotap_timestamp.units_pos =
+		IEEE80211_RADIOTAP_TIMESTAMP_UNIT_US;
+
 	phy->slottime = 9;
 
 	hw->sta_data_size = sizeof(struct mt7915_sta);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index 0924ae074db2..11596e74712a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -389,19 +389,6 @@ int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
 		status->flag |= RX_FLAG_MMIC_STRIPPED | RX_FLAG_MIC_STRIPPED;
 	}
 
-	if (!(rxd2 & MT_RXD2_NORMAL_NON_AMPDU)) {
-		status->flag |= RX_FLAG_AMPDU_DETAILS;
-
-		/* all subframes of an A-MPDU have the same timestamp */
-		if (phy->rx_ampdu_ts != rxd[14]) {
-			if (!++phy->ampdu_ref)
-				phy->ampdu_ref++;
-		}
-		phy->rx_ampdu_ts = rxd[14];
-
-		status->ampdu_ref = phy->ampdu_ref;
-	}
-
 	remove_pad = FIELD_GET(MT_RXD2_NORMAL_HDR_OFFSET, rxd2);
 
 	if (rxd2 & MT_RXD2_NORMAL_MAX_LEN_ERROR)
@@ -440,6 +427,22 @@ int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
 	}
 
 	if (rxd1 & MT_RXD1_NORMAL_GROUP_2) {
+		status->timestamp = le32_to_cpu(rxd[0]);
+		status->flag |= RX_FLAG_MACTIME_START;
+
+		if (!(rxd2 & MT_RXD2_NORMAL_NON_AMPDU)) {
+			status->flag |= RX_FLAG_AMPDU_DETAILS;
+
+			/* all subframes of an A-MPDU have the same timestamp */
+			if (phy->rx_ampdu_ts != status->timestamp) {
+				if (!++phy->ampdu_ref)
+					phy->ampdu_ref++;
+			}
+			phy->rx_ampdu_ts = status->timestamp;
+
+			status->ampdu_ref = phy->ampdu_ref;
+		}
+
 		rxd += 2;
 		if ((u8 *)rxd - skb->data >= skb->len)
 			return -EINVAL;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index 1160d1bf8a7c..455c84cd99c2 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -142,7 +142,7 @@ struct mt7915_phy {
 	u8 rdd_state;
 	int dfs_state;
 
-	__le32 rx_ampdu_ts;
+	u32 rx_ampdu_ts;
 	u32 ampdu_ref;
 
 	struct mib_stats mib;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index b34db6e8d0ae..5bb0a7b9e9e5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -77,6 +77,9 @@ mt7921_init_wiphy(struct ieee80211_hw *hw)
 	hw->max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
 	hw->max_tx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
 
+	hw->radiotap_timestamp.units_pos =
+		IEEE80211_RADIOTAP_TIMESTAMP_UNIT_US;
+
 	phy->slottime = 9;
 
 	hw->sta_data_size = sizeof(struct mt7921_sta);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 36eaa9d97f64..b507f3917830 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -370,19 +370,6 @@ int mt7921_mac_fill_rx(struct mt7921_dev *dev, struct sk_buff *skb)
 		status->flag |= RX_FLAG_MMIC_STRIPPED | RX_FLAG_MIC_STRIPPED;
 	}
 
-	if (!(rxd2 & MT_RXD2_NORMAL_NON_AMPDU)) {
-		status->flag |= RX_FLAG_AMPDU_DETAILS;
-
-		/* all subframes of an A-MPDU have the same timestamp */
-		if (phy->rx_ampdu_ts != rxd[14]) {
-			if (!++phy->ampdu_ref)
-				phy->ampdu_ref++;
-		}
-		phy->rx_ampdu_ts = rxd[14];
-
-		status->ampdu_ref = phy->ampdu_ref;
-	}
-
 	remove_pad = FIELD_GET(MT_RXD2_NORMAL_HDR_OFFSET, rxd2);
 
 	if (rxd2 & MT_RXD2_NORMAL_MAX_LEN_ERROR)
@@ -414,6 +401,22 @@ int mt7921_mac_fill_rx(struct mt7921_dev *dev, struct sk_buff *skb)
 	}
 
 	if (rxd1 & MT_RXD1_NORMAL_GROUP_2) {
+		status->timestamp = le32_to_cpu(rxd[0]);
+		status->flag |= RX_FLAG_MACTIME_START;
+
+		if (!(rxd2 & MT_RXD2_NORMAL_NON_AMPDU)) {
+			status->flag |= RX_FLAG_AMPDU_DETAILS;
+
+			/* all subframes of an A-MPDU have the same timestamp */
+			if (phy->rx_ampdu_ts != status->timestamp) {
+				if (!++phy->ampdu_ref)
+					phy->ampdu_ref++;
+			}
+			phy->rx_ampdu_ts = status->timestamp;
+
+			status->ampdu_ref = phy->ampdu_ref;
+		}
+
 		rxd += 2;
 		if ((u8 *)rxd - skb->data >= skb->len)
 			return -EINVAL;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index e4211b049040..e3d83d3d954c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -132,7 +132,7 @@ struct mt7921_phy {
 	s16 coverage_class;
 	u8 slottime;
 
-	__le32 rx_ampdu_ts;
+	u32 rx_ampdu_ts;
 	u32 ampdu_ref;
 
 	struct mib_stats mib;
-- 
cgit v1.2.3


From 6d88629e158dc1a3f58a0f9b528fe0057d44e8f6 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Fri, 9 Apr 2021 00:36:31 +0800
Subject: mt76: mt7915: add mmio.c

Add mmio.c in order to use mt76_bus_ops throughout the driver.
It will be also shared with the future APSoC revision.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/Makefile |   2 +-
 drivers/net/wireless/mediatek/mt76/mt7915/dma.c    | 108 ---------------
 drivers/net/wireless/mediatek/mt76/mt7915/init.c   |  11 +-
 drivers/net/wireless/mediatek/mt76/mt7915/mac.c    |  16 +--
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c    |  18 +--
 drivers/net/wireless/mediatek/mt76/mt7915/mmio.c   | 152 +++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h |  75 +---------
 drivers/net/wireless/mediatek/mt76/mt7915/pci.c    |  13 +-
 8 files changed, 177 insertions(+), 218 deletions(-)
 create mode 100644 drivers/net/wireless/mediatek/mt76/mt7915/mmio.c

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/Makefile b/drivers/net/wireless/mediatek/mt76/mt7915/Makefile
index cc2054dffa98..40c8061787e9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/Makefile
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/Makefile
@@ -3,6 +3,6 @@
 obj-$(CONFIG_MT7915E) += mt7915e.o
 
 mt7915e-y := pci.o init.o dma.o eeprom.o main.o mcu.o mac.o \
-	     debugfs.o
+	     debugfs.o mmio.o
 
 mt7915e-$(CONFIG_NL80211_TESTMODE) += testmode.o
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/dma.c b/drivers/net/wireless/mediatek/mt76/mt7915/dma.c
index 3c961bf55e97..9fe09870f8f0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/dma.c
@@ -110,121 +110,13 @@ void mt7915_dma_prefetch(struct mt7915_dev *dev)
 		__mt7915_dma_prefetch(dev, MT_WFDMA1_PCIE1_BASE - MT_WFDMA1_BASE);
 }
 
-static u32 __mt7915_reg_addr(struct mt7915_dev *dev, u32 addr)
-{
-	static const struct {
-		u32 phys;
-		u32 mapped;
-		u32 size;
-	} fixed_map[] = {
-		{ 0x54000000, 0x02000, 0x1000 }, /* WFDMA PCIE0 MCU DMA0 */
-		{ 0x55000000, 0x03000, 0x1000 }, /* WFDMA PCIE0 MCU DMA1 */
-		{ 0x58000000, 0x06000, 0x1000 }, /* WFDMA PCIE1 MCU DMA0 (MEM_DMA) */
-		{ 0x59000000, 0x07000, 0x1000 }, /* WFDMA PCIE1 MCU DMA1 */
-		{ 0x7c000000, 0xf0000, 0x10000 }, /* CONN_INFRA */
-		{ 0x7c020000, 0xd0000, 0x10000 }, /* CONN_INFRA, WFDMA */
-		{ 0x80020000, 0xb0000, 0x10000 }, /* WF_TOP_MISC_OFF */
-		{ 0x81020000, 0xc0000, 0x10000 }, /* WF_TOP_MISC_ON */
-		{ 0x820c0000, 0x08000, 0x4000 }, /* WF_UMAC_TOP (PLE) */
-		{ 0x820c8000, 0x0c000, 0x2000 }, /* WF_UMAC_TOP (PSE) */
-		{ 0x820cc000, 0x0e000, 0x2000 }, /* WF_UMAC_TOP (PP) */
-		{ 0x820ce000, 0x21c00, 0x0200 }, /* WF_LMAC_TOP (WF_SEC) */
-		{ 0x820cf000, 0x22000, 0x1000 }, /* WF_LMAC_TOP (WF_PF) */
-		{ 0x820d0000, 0x30000, 0x10000 }, /* WF_LMAC_TOP (WF_WTBLON) */
-		{ 0x820e0000, 0x20000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_CFG) */
-		{ 0x820e1000, 0x20400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_TRB) */
-		{ 0x820e2000, 0x20800, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_AGG) */
-		{ 0x820e3000, 0x20c00, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_ARB) */
-		{ 0x820e4000, 0x21000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_TMAC) */
-		{ 0x820e5000, 0x21400, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_RMAC) */
-		{ 0x820e7000, 0x21e00, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_DMA) */
-		{ 0x820e9000, 0x23400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_WTBLOFF) */
-		{ 0x820ea000, 0x24000, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_ETBF) */
-		{ 0x820eb000, 0x24200, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_LPON) */
-		{ 0x820ec000, 0x24600, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_INT) */
-		{ 0x820ed000, 0x24800, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_MIB) */
-		{ 0x820f0000, 0xa0000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_CFG) */
-		{ 0x820f1000, 0xa0600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_TRB) */
-		{ 0x820f2000, 0xa0800, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_AGG) */
-		{ 0x820f3000, 0xa0c00, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_ARB) */
-		{ 0x820f4000, 0xa1000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_TMAC) */
-		{ 0x820f5000, 0xa1400, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_RMAC) */
-		{ 0x820f7000, 0xa1e00, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_DMA) */
-		{ 0x820f9000, 0xa3400, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_WTBLOFF) */
-		{ 0x820fa000, 0xa4000, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_ETBF) */
-		{ 0x820fb000, 0xa4200, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_LPON) */
-		{ 0x820fc000, 0xa4600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_INT) */
-		{ 0x820fd000, 0xa4800, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_MIB) */
-	};
-	int i;
-
-	if (addr < 0x100000)
-		return addr;
-
-	for (i = 0; i < ARRAY_SIZE(fixed_map); i++) {
-		u32 ofs;
-
-		if (addr < fixed_map[i].phys)
-			continue;
-
-		ofs = addr - fixed_map[i].phys;
-		if (ofs > fixed_map[i].size)
-			continue;
-
-		return fixed_map[i].mapped + ofs;
-	}
-
-	if ((addr >= 0x18000000 && addr < 0x18c00000) ||
-	    (addr >= 0x70000000 && addr < 0x78000000) ||
-	    (addr >= 0x7c000000 && addr < 0x7c400000))
-		return mt7915_reg_map_l1(dev, addr);
-
-	return mt7915_reg_map_l2(dev, addr);
-}
-
-static u32 mt7915_rr(struct mt76_dev *mdev, u32 offset)
-{
-	struct mt7915_dev *dev = container_of(mdev, struct mt7915_dev, mt76);
-	u32 addr = __mt7915_reg_addr(dev, offset);
-
-	return dev->bus_ops->rr(mdev, addr);
-}
-
-static void mt7915_wr(struct mt76_dev *mdev, u32 offset, u32 val)
-{
-	struct mt7915_dev *dev = container_of(mdev, struct mt7915_dev, mt76);
-	u32 addr = __mt7915_reg_addr(dev, offset);
-
-	dev->bus_ops->wr(mdev, addr, val);
-}
-
-static u32 mt7915_rmw(struct mt76_dev *mdev, u32 offset, u32 mask, u32 val)
-{
-	struct mt7915_dev *dev = container_of(mdev, struct mt7915_dev, mt76);
-	u32 addr = __mt7915_reg_addr(dev, offset);
-
-	return dev->bus_ops->rmw(mdev, addr, mask, val);
-}
-
 int mt7915_dma_init(struct mt7915_dev *dev)
 {
 	/* Increase buffer size to receive large VHT/HE MPDUs */
-	struct mt76_bus_ops *bus_ops;
 	int rx_buf_size = MT_RX_BUF_SIZE * 2;
 	u32 hif1_ofs = 0;
 	int ret;
 
-	dev->bus_ops = dev->mt76.bus;
-	bus_ops = devm_kmemdup(dev->mt76.dev, dev->bus_ops, sizeof(*bus_ops),
-			       GFP_KERNEL);
-	if (!bus_ops)
-		return -ENOMEM;
-
-	bus_ops->rr = mt7915_rr;
-	bus_ops->wr = mt7915_wr;
-	bus_ops->rmw = mt7915_rmw;
-	dev->mt76.bus = bus_ops;
-
 	mt76_dma_attach(&dev->mt76);
 
 	if (dev->hif2)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index 983ec5276f46..2732899689fd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -302,7 +302,6 @@ static void mt7915_init_work(struct work_struct *work)
 static void mt7915_wfsys_reset(struct mt7915_dev *dev)
 {
 	u32 val = MT_TOP_PWR_KEY | MT_TOP_PWR_SW_PWR_ON | MT_TOP_PWR_PWR_ON;
-	u32 reg = mt7915_reg_map_l1(dev, MT_TOP_MISC);
 
 #define MT_MCU_DUMMY_RANDOM	GENMASK(15, 0)
 #define MT_MCU_DUMMY_DEFAULT	GENMASK(31, 16)
@@ -334,7 +333,7 @@ static void mt7915_wfsys_reset(struct mt7915_dev *dev)
 	}
 
 	/* wfsys reset won't clear host registers */
-	mt76_clear(dev, reg, MT_TOP_MISC_FW_STATE);
+	mt76_clear(dev, MT_TOP_MISC, MT_TOP_MISC_FW_STATE);
 
 	msleep(100);
 }
@@ -342,7 +341,6 @@ static void mt7915_wfsys_reset(struct mt7915_dev *dev)
 static int mt7915_init_hardware(struct mt7915_dev *dev)
 {
 	int ret, idx;
-	u32 val;
 
 	mt76_wr(dev, MT_INT_SOURCE_CSR, ~0);
 
@@ -350,12 +348,11 @@ static int mt7915_init_hardware(struct mt7915_dev *dev)
 	spin_lock_init(&dev->token_lock);
 	idr_init(&dev->token);
 
-	dev->dbdc_support = !!(mt7915_l1_rr(dev, MT_HW_BOUND) & BIT(5));
-
-	val = mt76_rr(dev, mt7915_reg_map_l1(dev, MT_TOP_MISC));
+	dev->dbdc_support = !!(mt76_rr(dev, MT_HW_BOUND) & BIT(5));
 
 	/* If MCU was already running, it is likely in a bad state */
-	if (FIELD_GET(MT_TOP_MISC_FW_STATE, val) > FW_STATE_FW_DOWNLOAD)
+	if (mt76_get_field(dev, MT_TOP_MISC, MT_TOP_MISC_FW_STATE) >
+	    FW_STATE_FW_DOWNLOAD)
 		mt7915_wfsys_reset(dev);
 
 	ret = mt7915_dma_init(dev);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index 11596e74712a..3f3bfead1ce7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -1300,8 +1300,8 @@ void mt7915_mac_cca_stats_reset(struct mt7915_phy *phy)
 	bool ext_phy = phy != &dev->phy;
 	u32 reg = MT_WF_PHY_RX_CTRL1(ext_phy);
 
-	mt7915_l2_clear(dev, reg, MT_WF_PHY_RX_CTRL1_STSCNT_EN);
-	mt7915_l2_set(dev, reg, BIT(11) | BIT(9));
+	mt76_clear(dev, reg, MT_WF_PHY_RX_CTRL1_STSCNT_EN);
+	mt76_set(dev, reg, BIT(11) | BIT(9));
 }
 
 void mt7915_mac_reset_counters(struct mt7915_phy *phy)
@@ -1394,12 +1394,12 @@ void mt7915_mac_set_timing(struct mt7915_phy *phy)
 
 void mt7915_mac_enable_nf(struct mt7915_dev *dev, bool ext_phy)
 {
-	mt7915_l2_set(dev, MT_WF_PHY_RXTD12(ext_phy),
-		      MT_WF_PHY_RXTD12_IRPI_SW_CLR_ONLY |
-		      MT_WF_PHY_RXTD12_IRPI_SW_CLR);
+	mt76_set(dev, MT_WF_PHY_RXTD12(ext_phy),
+		 MT_WF_PHY_RXTD12_IRPI_SW_CLR_ONLY |
+		 MT_WF_PHY_RXTD12_IRPI_SW_CLR);
 
-	mt7915_l2_set(dev, MT_WF_PHY_RX_CTRL1(ext_phy),
-		      FIELD_PREP(MT_WF_PHY_RX_CTRL1_IPI_EN, 0x5));
+	mt76_set(dev, MT_WF_PHY_RX_CTRL1(ext_phy),
+		 FIELD_PREP(MT_WF_PHY_RX_CTRL1_IPI_EN, 0x5));
 }
 
 static u8
@@ -1414,7 +1414,7 @@ mt7915_phy_get_nf(struct mt7915_phy *phy, int idx)
 		u32 reg = MT_WF_IRPI(nss + (idx << dev->dbdc_support));
 
 		for (i = 0; i < ARRAY_SIZE(nf_power); i++, reg += 4) {
-			val = mt7915_l2_rr(dev, reg);
+			val = mt76_rr(dev, reg);
 			sum += val * nf_power[i];
 			n += val;
 		}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 639f65d63add..0b739ed8ce33 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -2542,11 +2542,9 @@ static int mt7915_mcu_start_patch(struct mt7915_dev *dev)
 
 static int mt7915_driver_own(struct mt7915_dev *dev)
 {
-	u32 reg = mt7915_reg_map_l1(dev, MT_TOP_LPCR_HOST_BAND0);
-
-	mt76_wr(dev, reg, MT_TOP_LPCR_HOST_DRV_OWN);
-	if (!mt76_poll_msec(dev, reg, MT_TOP_LPCR_HOST_FW_OWN,
-			    0, 500)) {
+	mt76_wr(dev, MT_TOP_LPCR_HOST_BAND0, MT_TOP_LPCR_HOST_DRV_OWN);
+	if (!mt76_poll_msec(dev, MT_TOP_LPCR_HOST_BAND0,
+			    MT_TOP_LPCR_HOST_FW_OWN, 0, 500)) {
 		dev_err(dev->mt76.dev, "Timeout for driver own\n");
 		return -EIO;
 	}
@@ -2784,7 +2782,6 @@ out:
 
 static int mt7915_load_firmware(struct mt7915_dev *dev)
 {
-	u32 reg = mt7915_reg_map_l1(dev, MT_TOP_MISC);
 	int ret;
 
 	ret = mt7915_load_patch(dev);
@@ -2795,7 +2792,7 @@ static int mt7915_load_firmware(struct mt7915_dev *dev)
 	if (ret)
 		return ret;
 
-	if (!mt76_poll_msec(dev, reg, MT_TOP_MISC_FW_STATE,
+	if (!mt76_poll_msec(dev, MT_TOP_MISC, MT_TOP_MISC_FW_STATE,
 			    FIELD_PREP(MT_TOP_MISC_FW_STATE,
 				       FW_STATE_WACPU_RDY), 1000)) {
 		dev_err(dev->mt76.dev, "Timeout for initializing firmware\n");
@@ -2883,18 +2880,15 @@ int mt7915_mcu_init(struct mt7915_dev *dev)
 
 void mt7915_mcu_exit(struct mt7915_dev *dev)
 {
-	u32 reg = mt7915_reg_map_l1(dev, MT_TOP_MISC);
-
 	__mt76_mcu_restart(&dev->mt76);
-	if (!mt76_poll_msec(dev, reg, MT_TOP_MISC_FW_STATE,
+	if (!mt76_poll_msec(dev, MT_TOP_MISC, MT_TOP_MISC_FW_STATE,
 			    FIELD_PREP(MT_TOP_MISC_FW_STATE,
 				       FW_STATE_FW_DOWNLOAD), 1000)) {
 		dev_err(dev->mt76.dev, "Failed to exit mcu\n");
 		return;
 	}
 
-	reg = mt7915_reg_map_l1(dev, MT_TOP_LPCR_HOST_BAND0);
-	mt76_wr(dev, reg, MT_TOP_LPCR_HOST_FW_OWN);
+	mt76_wr(dev, MT_TOP_LPCR_HOST_BAND0, MT_TOP_LPCR_HOST_FW_OWN);
 	skb_queue_purge(&dev->mt76.mcu.res_q);
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
new file mode 100644
index 000000000000..af712a936ef6
--- /dev/null
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: ISC
+/* Copyright (C) 2020 MediaTek Inc. */
+
+#include "mt7915.h"
+
+static u32 mt7915_reg_map_l1(struct mt7915_dev *dev, u32 addr)
+{
+	u32 offset = FIELD_GET(MT_HIF_REMAP_L1_OFFSET, addr);
+	u32 base = FIELD_GET(MT_HIF_REMAP_L1_BASE, addr);
+
+	mt76_rmw_field(dev, MT_HIF_REMAP_L1, MT_HIF_REMAP_L1_MASK, base);
+	/* use read to push write */
+	mt76_rr(dev, MT_HIF_REMAP_L1);
+
+	return MT_HIF_REMAP_BASE_L1 + offset;
+}
+
+static u32 mt7915_reg_map_l2(struct mt7915_dev *dev, u32 addr)
+{
+	u32 offset = FIELD_GET(MT_HIF_REMAP_L2_OFFSET, addr);
+	u32 base = FIELD_GET(MT_HIF_REMAP_L2_BASE, addr);
+
+	mt76_rmw_field(dev, MT_HIF_REMAP_L2, MT_HIF_REMAP_L2_MASK, base);
+	/* use read to push write */
+	mt76_rr(dev, MT_HIF_REMAP_L2);
+
+	return MT_HIF_REMAP_BASE_L2 + offset;
+}
+
+static u32 __mt7915_reg_addr(struct mt7915_dev *dev, u32 addr)
+{
+	static const struct {
+		u32 phys;
+		u32 mapped;
+		u32 size;
+	} fixed_map[] = {
+		{ 0x54000000, 0x02000, 0x1000 }, /* WFDMA PCIE0 MCU DMA0 */
+		{ 0x55000000, 0x03000, 0x1000 }, /* WFDMA PCIE0 MCU DMA1 */
+		{ 0x58000000, 0x06000, 0x1000 }, /* WFDMA PCIE1 MCU DMA0 (MEM_DMA) */
+		{ 0x59000000, 0x07000, 0x1000 }, /* WFDMA PCIE1 MCU DMA1 */
+		{ 0x7c000000, 0xf0000, 0x10000 }, /* CONN_INFRA */
+		{ 0x7c020000, 0xd0000, 0x10000 }, /* CONN_INFRA, WFDMA */
+		{ 0x80020000, 0xb0000, 0x10000 }, /* WF_TOP_MISC_OFF */
+		{ 0x81020000, 0xc0000, 0x10000 }, /* WF_TOP_MISC_ON */
+		{ 0x820c0000, 0x08000, 0x4000 }, /* WF_UMAC_TOP (PLE) */
+		{ 0x820c8000, 0x0c000, 0x2000 }, /* WF_UMAC_TOP (PSE) */
+		{ 0x820cc000, 0x0e000, 0x2000 }, /* WF_UMAC_TOP (PP) */
+		{ 0x820ce000, 0x21c00, 0x0200 }, /* WF_LMAC_TOP (WF_SEC) */
+		{ 0x820cf000, 0x22000, 0x1000 }, /* WF_LMAC_TOP (WF_PF) */
+		{ 0x820d0000, 0x30000, 0x10000 }, /* WF_LMAC_TOP (WF_WTBLON) */
+		{ 0x820e0000, 0x20000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_CFG) */
+		{ 0x820e1000, 0x20400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_TRB) */
+		{ 0x820e2000, 0x20800, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_AGG) */
+		{ 0x820e3000, 0x20c00, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_ARB) */
+		{ 0x820e4000, 0x21000, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_TMAC) */
+		{ 0x820e5000, 0x21400, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_RMAC) */
+		{ 0x820e7000, 0x21e00, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_DMA) */
+		{ 0x820e9000, 0x23400, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_WTBLOFF) */
+		{ 0x820ea000, 0x24000, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_ETBF) */
+		{ 0x820eb000, 0x24200, 0x0400 }, /* WF_LMAC_TOP BN0 (WF_LPON) */
+		{ 0x820ec000, 0x24600, 0x0200 }, /* WF_LMAC_TOP BN0 (WF_INT) */
+		{ 0x820ed000, 0x24800, 0x0800 }, /* WF_LMAC_TOP BN0 (WF_MIB) */
+		{ 0x820f0000, 0xa0000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_CFG) */
+		{ 0x820f1000, 0xa0600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_TRB) */
+		{ 0x820f2000, 0xa0800, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_AGG) */
+		{ 0x820f3000, 0xa0c00, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_ARB) */
+		{ 0x820f4000, 0xa1000, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_TMAC) */
+		{ 0x820f5000, 0xa1400, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_RMAC) */
+		{ 0x820f7000, 0xa1e00, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_DMA) */
+		{ 0x820f9000, 0xa3400, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_WTBLOFF) */
+		{ 0x820fa000, 0xa4000, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_ETBF) */
+		{ 0x820fb000, 0xa4200, 0x0400 }, /* WF_LMAC_TOP BN1 (WF_LPON) */
+		{ 0x820fc000, 0xa4600, 0x0200 }, /* WF_LMAC_TOP BN1 (WF_INT) */
+		{ 0x820fd000, 0xa4800, 0x0800 }, /* WF_LMAC_TOP BN1 (WF_MIB) */
+	};
+	int i;
+
+	if (addr < 0x100000)
+		return addr;
+
+	for (i = 0; i < ARRAY_SIZE(fixed_map); i++) {
+		u32 ofs;
+
+		if (addr < fixed_map[i].phys)
+			continue;
+
+		ofs = addr - fixed_map[i].phys;
+		if (ofs > fixed_map[i].size)
+			continue;
+
+		return fixed_map[i].mapped + ofs;
+	}
+
+	if ((addr >= 0x18000000 && addr < 0x18c00000) ||
+	    (addr >= 0x70000000 && addr < 0x78000000) ||
+	    (addr >= 0x7c000000 && addr < 0x7c400000))
+		return mt7915_reg_map_l1(dev, addr);
+
+	return mt7915_reg_map_l2(dev, addr);
+}
+
+static u32 mt7915_rr(struct mt76_dev *mdev, u32 offset)
+{
+	struct mt7915_dev *dev = container_of(mdev, struct mt7915_dev, mt76);
+	u32 addr = __mt7915_reg_addr(dev, offset);
+
+	return dev->bus_ops->rr(mdev, addr);
+}
+
+static void mt7915_wr(struct mt76_dev *mdev, u32 offset, u32 val)
+{
+	struct mt7915_dev *dev = container_of(mdev, struct mt7915_dev, mt76);
+	u32 addr = __mt7915_reg_addr(dev, offset);
+
+	dev->bus_ops->wr(mdev, addr, val);
+}
+
+static u32 mt7915_rmw(struct mt76_dev *mdev, u32 offset, u32 mask, u32 val)
+{
+	struct mt7915_dev *dev = container_of(mdev, struct mt7915_dev, mt76);
+	u32 addr = __mt7915_reg_addr(dev, offset);
+
+	return dev->bus_ops->rmw(mdev, addr, mask, val);
+}
+
+int mt7915_mmio_init(struct mt76_dev *mdev, void __iomem *mem_base, int irq)
+{
+	struct mt76_bus_ops *bus_ops;
+	struct mt7915_dev *dev;
+
+	dev = container_of(mdev, struct mt7915_dev, mt76);
+	mt76_mmio_init(&dev->mt76, mem_base);
+
+	dev->bus_ops = dev->mt76.bus;
+	bus_ops = devm_kmemdup(dev->mt76.dev, dev->bus_ops, sizeof(*bus_ops),
+			       GFP_KERNEL);
+	if (!bus_ops)
+		return -ENOMEM;
+
+	bus_ops->rr = mt7915_rr;
+	bus_ops->wr = mt7915_wr;
+	bus_ops->rmw = mt7915_rmw;
+	dev->mt76.bus = bus_ops;
+
+	mdev->rev = (mt76_rr(dev, MT_HW_CHIPID) << 16) |
+		    (mt76_rr(dev, MT_HW_REV) & 0xff);
+	dev_dbg(mdev->dev, "ASIC revision: %04x\n", mdev->rev);
+
+	mt76_wr(dev, MT_INT_MASK_CSR, 0);
+
+	return 0;
+}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index 455c84cd99c2..dbc70c0d6668 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -394,80 +394,6 @@ static inline void mt7915_irq_disable(struct mt7915_dev *dev, u32 mask)
 		mt76_set_irq_mask(&dev->mt76, MT_INT_MASK_CSR, mask, 0);
 }
 
-static inline u32
-mt7915_reg_map_l1(struct mt7915_dev *dev, u32 addr)
-{
-	u32 offset = FIELD_GET(MT_HIF_REMAP_L1_OFFSET, addr);
-	u32 base = FIELD_GET(MT_HIF_REMAP_L1_BASE, addr);
-
-	mt76_rmw_field(dev, MT_HIF_REMAP_L1, MT_HIF_REMAP_L1_MASK, base);
-	/* use read to push write */
-	mt76_rr(dev, MT_HIF_REMAP_L1);
-
-	return MT_HIF_REMAP_BASE_L1 + offset;
-}
-
-static inline u32
-mt7915_l1_rr(struct mt7915_dev *dev, u32 addr)
-{
-	return mt76_rr(dev, mt7915_reg_map_l1(dev, addr));
-}
-
-static inline void
-mt7915_l1_wr(struct mt7915_dev *dev, u32 addr, u32 val)
-{
-	mt76_wr(dev, mt7915_reg_map_l1(dev, addr), val);
-}
-
-static inline u32
-mt7915_l1_rmw(struct mt7915_dev *dev, u32 addr, u32 mask, u32 val)
-{
-	val |= mt7915_l1_rr(dev, addr) & ~mask;
-	mt7915_l1_wr(dev, addr, val);
-
-	return val;
-}
-
-#define mt7915_l1_set(dev, addr, val)	mt7915_l1_rmw(dev, addr, 0, val)
-#define mt7915_l1_clear(dev, addr, val)	mt7915_l1_rmw(dev, addr, val, 0)
-
-static inline u32
-mt7915_reg_map_l2(struct mt7915_dev *dev, u32 addr)
-{
-	u32 offset = FIELD_GET(MT_HIF_REMAP_L2_OFFSET, addr);
-	u32 base = FIELD_GET(MT_HIF_REMAP_L2_BASE, addr);
-
-	mt76_rmw_field(dev, MT_HIF_REMAP_L2, MT_HIF_REMAP_L2_MASK, base);
-	/* use read to push write */
-	mt76_rr(dev, MT_HIF_REMAP_L2);
-
-	return MT_HIF_REMAP_BASE_L2 + offset;
-}
-
-static inline u32
-mt7915_l2_rr(struct mt7915_dev *dev, u32 addr)
-{
-	return mt76_rr(dev, mt7915_reg_map_l2(dev, addr));
-}
-
-static inline void
-mt7915_l2_wr(struct mt7915_dev *dev, u32 addr, u32 val)
-{
-	mt76_wr(dev, mt7915_reg_map_l2(dev, addr), val);
-}
-
-static inline u32
-mt7915_l2_rmw(struct mt7915_dev *dev, u32 addr, u32 mask, u32 val)
-{
-	val |= mt7915_l2_rr(dev, addr) & ~mask;
-	mt7915_l2_wr(dev, addr, val);
-
-	return val;
-}
-
-#define mt7915_l2_set(dev, addr, val)	mt7915_l2_rmw(dev, addr, 0, val)
-#define mt7915_l2_clear(dev, addr, val)	mt7915_l2_rmw(dev, addr, val, 0)
-
 bool mt7915_mac_wtbl_update(struct mt7915_dev *dev, int idx, u32 mask);
 void mt7915_mac_reset_counters(struct mt7915_phy *phy);
 void mt7915_mac_cca_stats_reset(struct mt7915_phy *phy);
@@ -486,6 +412,7 @@ void mt7915_mac_sta_remove(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 void mt7915_mac_work(struct work_struct *work);
 void mt7915_mac_reset_work(struct work_struct *work);
 void mt7915_mac_sta_rc_work(struct work_struct *work);
+int mt7915_mmio_init(struct mt76_dev *mdev, void __iomem *mem_base, int irq);
 int mt7915_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
 			  enum mt76_txq_id qid, struct mt76_wcid *wcid,
 			  struct ieee80211_sta *sta,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/pci.c b/drivers/net/wireless/mediatek/mt76/mt7915/pci.c
index 13880cc9c9e8..75769595d1e1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/pci.c
@@ -201,7 +201,7 @@ static void mt7915_pci_init_hif2(struct mt7915_dev *dev)
 	}
 
 	/* master switch of PCIe tnterrupt enable */
-	mt7915_l1_wr(dev, MT_PCIE1_MAC_INT_ENABLE, 0xff);
+	mt76_wr(dev, MT_PCIE1_MAC_INT_ENABLE, 0xff);
 }
 
 static int mt7915_pci_hif2_probe(struct pci_dev *pdev)
@@ -274,15 +274,12 @@ static int mt7915_pci_probe(struct pci_dev *pdev,
 	if (ret)
 		goto error;
 
-	mt76_mmio_init(&dev->mt76, pcim_iomap_table(pdev)[0]);
-	mdev->rev = (mt7915_l1_rr(dev, MT_HW_CHIPID) << 16) |
-		    (mt7915_l1_rr(dev, MT_HW_REV) & 0xff);
-	dev_dbg(mdev->dev, "ASIC revision: %04x\n", mdev->rev);
-
-	mt76_wr(dev, MT_INT_MASK_CSR, 0);
+	ret = mt7915_mmio_init(mdev, pcim_iomap_table(pdev)[0], pdev->irq);
+	if (ret)
+		goto error;
 
 	/* master switch of PCIe tnterrupt enable */
-	mt7915_l1_wr(dev, MT_PCIE_MAC_INT_ENABLE, 0xff);
+	mt76_wr(dev, MT_PCIE_MAC_INT_ENABLE, 0xff);
 
 	ret = devm_request_irq(mdev->dev, pdev->irq, mt7915_irq_handler,
 			       IRQF_SHARED, KBUILD_MODNAME, dev);
-- 
cgit v1.2.3


From 30ad36214ab52a5eb3475e012e6bf327f843aa48 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Fri, 9 Apr 2021 00:36:32 +0800
Subject: mt76: mt7615: add missing SPDX tag in mmio.c

Resolve the following checkpatch.pl script warning:

WARNING: Missing or malformed SPDX-License-Identifier tag in line 1

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mmio.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
index a7f92fa0488f..eaa22752e7cd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
@@ -1,3 +1,6 @@
+// SPDX-License-Identifier: ISC
+/* Copyright (C) 2020 MediaTek Inc. */
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
-- 
cgit v1.2.3


From 66978204f71b36c2c0240e50c5789fae2d8b87a6 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 11 Apr 2021 16:58:50 +0200
Subject: mt76: mt7615: always add rx header translation tlv when adding
 stations

Ensures that header translation is disabled for interfaces that do not support
it.

Fixes: d4b98c63d7a7 ("mt76: mt7615: add support for rx decapsulation offload")
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c      | 4 +++-
 drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c | 6 +-----
 drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h | 3 +--
 3 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 6ca9e9840399..9b9f8d88e9bb 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -1008,6 +1008,8 @@ mt7615_mcu_wtbl_sta_add(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 		if (sta)
 			mt76_connac_mcu_wtbl_ht_tlv(&dev->mt76, wskb, sta,
 						    NULL, wtbl_hdr);
+		mt76_connac_mcu_wtbl_hdr_trans_tlv(wskb, &msta->wcid, NULL,
+						   wtbl_hdr);
 	}
 
 	cmd = enable ? MCU_EXT_CMD_WTBL_UPDATE : MCU_EXT_CMD_STA_REC_UPDATE;
@@ -1136,7 +1138,7 @@ int mt7615_mcu_sta_update_hdr_trans(struct mt7615_dev *dev,
 	if (IS_ERR(wtbl_hdr))
 		return PTR_ERR(wtbl_hdr);
 
-	mt76_connac_mcu_wtbl_hdr_trans_tlv(skb, vif, sta, NULL, wtbl_hdr);
+	mt76_connac_mcu_wtbl_hdr_trans_tlv(skb, &msta->wcid, NULL, wtbl_hdr);
 
 	return mt76_mcu_skb_send_msg(&dev->mt76, skb, MCU_EXT_CMD_WTBL_UPDATE,
 				     true);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index 4892728ad9bb..443ec0a8bb7d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -393,11 +393,9 @@ mt76_connac_mcu_sta_uapsd(struct sk_buff *skb, struct ieee80211_vif *vif,
 }
 
 void mt76_connac_mcu_wtbl_hdr_trans_tlv(struct sk_buff *skb,
-					struct ieee80211_vif *vif,
-					struct ieee80211_sta *sta,
+					struct mt76_wcid *wcid,
 					void *sta_wtbl, void *wtbl_tlv)
 {
-	struct mt76_wcid *wcid;
 	struct wtbl_hdr_trans *htr;
 	struct tlv *tlv;
 
@@ -405,8 +403,6 @@ void mt76_connac_mcu_wtbl_hdr_trans_tlv(struct sk_buff *skb,
 					     sizeof(*htr),
 					     wtbl_tlv, sta_wtbl);
 	htr = (struct wtbl_hdr_trans *)tlv;
-
-	wcid = (struct mt76_wcid *)sta->drv_priv;
 	htr->no_rx_trans = !test_bit(MT_WCID_FLAG_HDR_TRANS, &wcid->flags);
 }
 EXPORT_SYMBOL_GPL(mt76_connac_mcu_wtbl_hdr_trans_tlv);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
index 6f9b7807305a..587097450416 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
@@ -937,8 +937,7 @@ void mt76_connac_mcu_wtbl_generic_tlv(struct mt76_dev *dev, struct sk_buff *skb,
 				      struct ieee80211_sta *sta, void *sta_wtbl,
 				      void *wtbl_tlv);
 void mt76_connac_mcu_wtbl_hdr_trans_tlv(struct sk_buff *skb,
-					struct ieee80211_vif *vif,
-					struct ieee80211_sta *sta,
+					struct mt76_wcid *wcid,
 					void *sta_wtbl, void *wtbl_tlv);
 void mt76_connac_mcu_sta_tlv(struct mt76_phy *mphy, struct sk_buff *skb,
 			     struct ieee80211_sta *sta,
-- 
cgit v1.2.3


From 2afd17b4d0fc15cba0144e7a4c26549b0e829ce9 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 11 Apr 2021 21:03:45 +0200
Subject: mt76: mt7921: introduce MCU_EVENT_LP_INFO event parsing

Report trace event related to MCU_EVENT_LP_INFO that is sent by the mcu
when it is ready to enter in deep sleep state

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/Makefile |  4 +-
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.c    | 19 ++++++++
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.h    |  1 +
 .../wireless/mediatek/mt76/mt7921/mt7921_trace.h   | 51 ++++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7921/trace.c  | 12 +++++
 5 files changed, 86 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/wireless/mediatek/mt76/mt7921/mt7921_trace.h
 create mode 100644 drivers/net/wireless/mediatek/mt76/mt7921/trace.c

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/Makefile b/drivers/net/wireless/mediatek/mt76/mt7921/Makefile
index 09d1446ad933..e531666f9fb4 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/Makefile
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/Makefile
@@ -2,4 +2,6 @@
 
 obj-$(CONFIG_MT7921E) += mt7921e.o
 
-mt7921e-y := pci.o mac.o mcu.o dma.o eeprom.o main.o init.o debugfs.o
+CFLAGS_trace.o := -I$(src)
+
+mt7921e-y := pci.o mac.o mcu.o dma.o eeprom.o main.o init.o debugfs.o trace.o
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index b233b12860ef..7d45e8795e15 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -4,6 +4,7 @@
 #include <linux/firmware.h>
 #include <linux/fs.h>
 #include "mt7921.h"
+#include "mt7921_trace.h"
 #include "mcu.h"
 #include "mac.h"
 
@@ -498,6 +499,20 @@ mt7921_mcu_debug_msg_event(struct mt7921_dev *dev, struct sk_buff *skb)
 	}
 }
 
+static void
+mt7921_mcu_low_power_event(struct mt7921_dev *dev, struct sk_buff *skb)
+{
+	struct mt7921_mcu_lp_event {
+		u8 state;
+		u8 reserved[3];
+	} __packed * event;
+
+	skb_pull(skb, sizeof(struct mt7921_mcu_rxd));
+	event = (struct mt7921_mcu_lp_event *)skb->data;
+
+	trace_lp_event(dev, event->state);
+}
+
 static void
 mt7921_mcu_rx_unsolicited_event(struct mt7921_dev *dev, struct sk_buff *skb)
 {
@@ -521,6 +536,9 @@ mt7921_mcu_rx_unsolicited_event(struct mt7921_dev *dev, struct sk_buff *skb)
 		mt76_connac_mcu_coredump_event(&dev->mt76, skb,
 					       &dev->coredump);
 		return;
+	case MCU_EVENT_LP_INFO:
+		mt7921_mcu_low_power_event(dev, skb);
+		break;
 	default:
 		break;
 	}
@@ -543,6 +561,7 @@ void mt7921_mcu_rx_event(struct mt7921_dev *dev, struct sk_buff *skb)
 	    rxd->eid == MCU_EVENT_SCAN_DONE ||
 	    rxd->eid == MCU_EVENT_DBG_MSG ||
 	    rxd->eid == MCU_EVENT_COREDUMP ||
+	    rxd->eid == MCU_EVENT_LP_INFO ||
 	    !rxd->seq)
 		mt7921_mcu_rx_unsolicited_event(dev, skb);
 	else
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h
index af8b42983a00..13e125f32283 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h
@@ -79,6 +79,7 @@ struct mt7921_uni_txd {
 /* event table */
 enum {
 	MCU_EVENT_REG_ACCESS = 0x05,
+	MCU_EVENT_LP_INFO = 0x07,
 	MCU_EVENT_SCAN_DONE = 0x0d,
 	MCU_EVENT_BSS_ABSENCE  = 0x11,
 	MCU_EVENT_BSS_BEACON_LOSS = 0x13,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921_trace.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921_trace.h
new file mode 100644
index 000000000000..9bc4db67f352
--- /dev/null
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921_trace.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: ISC */
+/*
+ * Copyright (C) 2021 Lorenzo Bianconi <lorenzo@kernel.org>
+ */
+
+#if !defined(__MT7921_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __MT7921_TRACE_H
+
+#include <linux/tracepoint.h>
+#include "mt7921.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mt7921
+
+#define MAXNAME		32
+#define DEV_ENTRY	__array(char, wiphy_name, 32)
+#define DEV_ASSIGN	strlcpy(__entry->wiphy_name,	\
+				wiphy_name(mt76_hw(dev)->wiphy), MAXNAME)
+#define DEV_PR_FMT	"%s"
+#define DEV_PR_ARG	__entry->wiphy_name
+#define LP_STATE_PR_ARG	__entry->lp_state ? "lp ready" : "lp not ready"
+
+TRACE_EVENT(lp_event,
+	TP_PROTO(struct mt7921_dev *dev, u8 lp_state),
+
+	TP_ARGS(dev, lp_state),
+
+	TP_STRUCT__entry(
+		DEV_ENTRY
+		__field(u8, lp_state)
+	),
+
+	TP_fast_assign(
+		DEV_ASSIGN;
+		__entry->lp_state = lp_state;
+	),
+
+	TP_printk(
+		DEV_PR_FMT " %s",
+		DEV_PR_ARG, LP_STATE_PR_ARG
+	)
+);
+
+#endif
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE mt7921_trace
+
+#include <trace/define_trace.h>
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/trace.c b/drivers/net/wireless/mediatek/mt76/mt7921/trace.c
new file mode 100644
index 000000000000..4dc3c7b89ebd
--- /dev/null
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/trace.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: ISC
+/*
+ * Copyright (C) 2021 Lorenzo Bianconi <lorenzo@kernel.org>
+ */
+
+#include <linux/module.h>
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "mt7921_trace.h"
+
+#endif
-- 
cgit v1.2.3


From 6f43735b6da64bd46bc1ee2af5edce584a09012d Mon Sep 17 00:00:00 2001
From: Adam Ford <aford173@gmail.com>
Date: Mon, 12 Apr 2021 08:26:18 -0500
Subject: dt-bindings: net: renesas,etheravb: Add additional clocks

The AVB driver assumes there is an external crystal, but it could
be clocked by other means.  In order to enable a programmable
clock, it needs to be added to the clocks list and enabled in the
driver.  Since there currently only one clock, there is no
clock-names list either.

Update bindings to add the additional optional clock, and explicitly
name both of them.

Signed-off-by: Adam Ford <aford173@gmail.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Sergei Shtylyov <sergei.shtylyov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/renesas,etheravb.yaml | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml
index 91ba96d43c6c..fe72a5598add 100644
--- a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml
+++ b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml
@@ -50,7 +50,16 @@ properties:
   interrupt-names: true
 
   clocks:
-    maxItems: 1
+    minItems: 1
+    maxItems: 2
+    items:
+      - description: AVB functional clock
+      - description: Optional TXC reference clock
+
+  clock-names:
+    items:
+      - const: fck
+      - const: refclk
 
   iommus:
     maxItems: 1
-- 
cgit v1.2.3


From 8ef7adc6beb2ef0bce83513dc9e4505e7b21e8c2 Mon Sep 17 00:00:00 2001
From: Adam Ford <aford173@gmail.com>
Date: Mon, 12 Apr 2021 08:26:19 -0500
Subject: net: ethernet: ravb: Enable optional refclk

For devices that use a programmable clock for the AVB reference clock,
the driver may need to enable them.  Add code to find the optional clock
and enable it when available.

Signed-off-by: Adam Ford <aford173@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/ravb.h      |  1 +
 drivers/net/ethernet/renesas/ravb_main.c | 10 ++++++++++
 2 files changed, 11 insertions(+)

diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h
index cb47e68c1a3e..86a1eb0634e8 100644
--- a/drivers/net/ethernet/renesas/ravb.h
+++ b/drivers/net/ethernet/renesas/ravb.h
@@ -993,6 +993,7 @@ struct ravb_private {
 	struct platform_device *pdev;
 	void __iomem *addr;
 	struct clk *clk;
+	struct clk *refclk;
 	struct mdiobb_ctrl mdiobb;
 	u32 num_rx_ring[NUM_RX_QUEUE];
 	u32 num_tx_ring[NUM_TX_QUEUE];
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index eb0c03bdb12d..1409ae986aa2 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -2148,6 +2148,13 @@ static int ravb_probe(struct platform_device *pdev)
 		goto out_release;
 	}
 
+	priv->refclk = devm_clk_get_optional(&pdev->dev, "refclk");
+	if (IS_ERR(priv->refclk)) {
+		error = PTR_ERR(priv->refclk);
+		goto out_release;
+	}
+	clk_prepare_enable(priv->refclk);
+
 	ndev->max_mtu = 2048 - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN);
 	ndev->min_mtu = ETH_MIN_MTU;
 
@@ -2244,6 +2251,7 @@ out_dma_free:
 	if (chip_id != RCAR_GEN2)
 		ravb_ptp_stop(ndev);
 out_release:
+	clk_disable_unprepare(priv->refclk);
 	free_netdev(ndev);
 
 	pm_runtime_put(&pdev->dev);
@@ -2260,6 +2268,8 @@ static int ravb_remove(struct platform_device *pdev)
 	if (priv->chip_id != RCAR_GEN2)
 		ravb_ptp_stop(ndev);
 
+	clk_disable_unprepare(priv->refclk);
+
 	dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat,
 			  priv->desc_bat_dma);
 	/* Set reset mode */
-- 
cgit v1.2.3


From 481fc927c8289919cc0be58666fcd1b7da187a0c Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 12 Apr 2021 14:25:54 +0200
Subject: mt76: mt7921: add rcu section in mt7921_mcu_tx_rate_report

Introduce rcu section in mt7921_mcu_tx_rate_report before dereferencing
wcid pointer otherwise loockdep will report the following issue:

[  115.245740] =============================
[  115.245754] WARNING: suspicious RCU usage
[  115.245771] 5.10.20 #0 Not tainted
[  115.245784] -----------------------------
[  115.245816] other info that might help us debug this:
[  115.245830] rcu_scheduler_active = 2, debug_locks = 1
[  115.245845] 3 locks held by kworker/u4:1/20:
[  115.245858]  #0: ffffff80065ab138 ((wq_completion)phy0){+.+.}-{0:0}, at: process_one_work+0x1f8/0x6b8
[  115.245948]  #1: ffffffc01198bdd8 ((work_completion)(&(&dev->mphy.mac_work)->work)){+.+.}-{0:0}, at: process_one_8
[  115.246027]  #2: ffffff8006543ce8 (&dev->mutex#2){+.+.}-{3:3}, at: mt7921_mac_work+0x60/0x2b0 [mt7921e]
[  115.246125]
[  115.246125] stack backtrace:
[  115.246142] CPU: 1 PID: 20 Comm: kworker/u4:1 Not tainted 5.10.20 #0
[  115.246152] Hardware name: MediaTek MT7622 RFB1 board (DT)
[  115.246168] Workqueue: phy0 mt7921_mac_work [mt7921e]
[  115.246188] Call trace:
[  115.246201]  dump_backtrace+0x0/0x1a8
[  115.246213]  show_stack+0x14/0x30
[  115.246228]  dump_stack+0xec/0x134
[  115.246240]  lockdep_rcu_suspicious+0xcc/0xdc
[  115.246255]  mt7921_get_wtbl_info+0x2a4/0x310 [mt7921e]
[  115.246269]  mt7921_mac_work+0x284/0x2b0 [mt7921e]
[  115.246281]  process_one_work+0x2a0/0x6b8
[  115.246293]  worker_thread+0x40/0x440
[  115.246305]  kthread+0x144/0x148
[  115.246317]  ret_from_fork+0x10/0x18

Fixes: 1c099ab44727c ("mt76: mt7921: add MCU support")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index 7d45e8795e15..aa55667b6ed7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -413,9 +413,12 @@ mt7921_mcu_tx_rate_report(struct mt7921_dev *dev, struct sk_buff *skb,
 
 	if (wlan_idx >= MT76_N_WCIDS)
 		return;
+
+	rcu_read_lock();
+
 	wcid = rcu_dereference(dev->mt76.wcid[wlan_idx]);
 	if (!wcid)
-		return;
+		goto out;
 
 	msta = container_of(wcid, struct mt7921_sta, wcid);
 	stats = &msta->stats;
@@ -423,6 +426,8 @@ mt7921_mcu_tx_rate_report(struct mt7921_dev *dev, struct sk_buff *skb,
 	/* current rate */
 	mt7921_mcu_tx_rate_parse(mphy, &peer, &rate, curr);
 	stats->tx_rate = rate;
+out:
+	rcu_read_unlock();
 }
 
 static void
-- 
cgit v1.2.3


From 5c507329000e282dce91e6c98ee6ffa61a8a5e49 Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@gmail.com>
Date: Mon, 12 Apr 2021 16:24:32 -0300
Subject: libbpf: Clarify flags in ringbuf helpers

In 'bpf_ringbuf_reserve()' we require the flag to '0' at the moment.

For 'bpf_ringbuf_{discard,submit,output}' a flag of '0' might send a
notification to the process if needed.

Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210412192434.944343-1-pctammela@mojatatu.com
---
 include/uapi/linux/bpf.h       | 16 ++++++++++++++++
 tools/include/uapi/linux/bpf.h | 16 ++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e1ee1be7e49b..85c924bc21b1 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4082,12 +4082,20 @@ union bpf_attr {
  * 		of new data availability is sent.
  * 		If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
  * 		of new data availability is sent unconditionally.
+ * 		If **0** is specified in *flags*, an adaptive notification
+ * 		of new data availability is sent.
+ *
+ * 		An adaptive notification is a notification sent whenever the user-space
+ * 		process has caught up and consumed all available payloads. In case the user-space
+ * 		process is still processing a previous payload, then no notification is needed
+ * 		as it will process the newly added payload automatically.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
  * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags)
  * 	Description
  * 		Reserve *size* bytes of payload in a ring buffer *ringbuf*.
+ * 		*flags* must be 0.
  * 	Return
  * 		Valid pointer with *size* bytes of memory available; NULL,
  * 		otherwise.
@@ -4099,6 +4107,10 @@ union bpf_attr {
  * 		of new data availability is sent.
  * 		If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
  * 		of new data availability is sent unconditionally.
+ * 		If **0** is specified in *flags*, an adaptive notification
+ * 		of new data availability is sent.
+ *
+ * 		See 'bpf_ringbuf_output()' for the definition of adaptive notification.
  * 	Return
  * 		Nothing. Always succeeds.
  *
@@ -4109,6 +4121,10 @@ union bpf_attr {
  * 		of new data availability is sent.
  * 		If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
  * 		of new data availability is sent unconditionally.
+ * 		If **0** is specified in *flags*, an adaptive notification
+ * 		of new data availability is sent.
+ *
+ * 		See 'bpf_ringbuf_output()' for the definition of adaptive notification.
  * 	Return
  * 		Nothing. Always succeeds.
  *
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e1ee1be7e49b..85c924bc21b1 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4082,12 +4082,20 @@ union bpf_attr {
  * 		of new data availability is sent.
  * 		If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
  * 		of new data availability is sent unconditionally.
+ * 		If **0** is specified in *flags*, an adaptive notification
+ * 		of new data availability is sent.
+ *
+ * 		An adaptive notification is a notification sent whenever the user-space
+ * 		process has caught up and consumed all available payloads. In case the user-space
+ * 		process is still processing a previous payload, then no notification is needed
+ * 		as it will process the newly added payload automatically.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
  * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags)
  * 	Description
  * 		Reserve *size* bytes of payload in a ring buffer *ringbuf*.
+ * 		*flags* must be 0.
  * 	Return
  * 		Valid pointer with *size* bytes of memory available; NULL,
  * 		otherwise.
@@ -4099,6 +4107,10 @@ union bpf_attr {
  * 		of new data availability is sent.
  * 		If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
  * 		of new data availability is sent unconditionally.
+ * 		If **0** is specified in *flags*, an adaptive notification
+ * 		of new data availability is sent.
+ *
+ * 		See 'bpf_ringbuf_output()' for the definition of adaptive notification.
  * 	Return
  * 		Nothing. Always succeeds.
  *
@@ -4109,6 +4121,10 @@ union bpf_attr {
  * 		of new data availability is sent.
  * 		If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
  * 		of new data availability is sent unconditionally.
+ * 		If **0** is specified in *flags*, an adaptive notification
+ * 		of new data availability is sent.
+ *
+ * 		See 'bpf_ringbuf_output()' for the definition of adaptive notification.
  * 	Return
  * 		Nothing. Always succeeds.
  *
-- 
cgit v1.2.3


From cfa00070a1a82fce8c0ad3327fad1f182d4c691f Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Fri, 9 Apr 2021 09:00:20 +0900
Subject: dt-bindings: net: can: rcar_can: Document r8a77961 support

Document SoC specific bindings for R-Car M3-W+ (r8a77961) SoC.

Also as R8A7796 is now called R8A77960 so that update those
references.

Link: https://lore.kernel.org/r/20210409000020.2317696-1-yoshihiro.shimoda.uh@renesas.com
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 Documentation/devicetree/bindings/net/can/rcar_can.txt | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/can/rcar_can.txt b/Documentation/devicetree/bindings/net/can/rcar_can.txt
index 6a5956347816..90ac4fef23f5 100644
--- a/Documentation/devicetree/bindings/net/can/rcar_can.txt
+++ b/Documentation/devicetree/bindings/net/can/rcar_can.txt
@@ -19,7 +19,8 @@ Required properties:
 	      "renesas,can-r8a7793" if CAN controller is a part of R8A7793 SoC.
 	      "renesas,can-r8a7794" if CAN controller is a part of R8A7794 SoC.
 	      "renesas,can-r8a7795" if CAN controller is a part of R8A7795 SoC.
-	      "renesas,can-r8a7796" if CAN controller is a part of R8A7796 SoC.
+	      "renesas,can-r8a7796" if CAN controller is a part of R8A77960 SoC.
+	      "renesas,can-r8a77961" if CAN controller is a part of R8A77961 SoC.
 	      "renesas,can-r8a77965" if CAN controller is a part of R8A77965 SoC.
 	      "renesas,can-r8a77990" if CAN controller is a part of R8A77990 SoC.
 	      "renesas,can-r8a77995" if CAN controller is a part of R8A77995 SoC.
@@ -40,7 +41,7 @@ Required properties:
 - pinctrl-names: must be "default".
 
 Required properties for R8A774A1, R8A774B1, R8A774C0, R8A774E1, R8A7795,
-R8A7796, R8A77965, R8A77990, and R8A77995:
+R8A77960, R8A77961, R8A77965, R8A77990, and R8A77995:
 For the denoted SoCs, "clkp2" can be CANFD clock. This is a div6 clock and can
 be used by both CAN and CAN FD controller at the same time. It needs to be
 scaled to maximum frequency if any of these controllers use it. This is done
-- 
cgit v1.2.3


From 8537257874e949a59c834cecfd5a063e11b64b0b Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Sat, 10 Apr 2021 18:59:46 +0900
Subject: can: etas_es58x: add core support for ETAS ES58X CAN USB interfaces

This patch adds the core support for various USB CAN interfaces from
ETAS GmbH (https://www.etas.com/en/products/es58x.php). The next
patches add the glue code drivers for the individual interfaces.

Link: https://lore.kernel.org/r/20210410095948.233305-2-mailhol.vincent@wanadoo.fr
Co-developed-by: Arunachalam Santhanam <arunachalam.santhanam@in.bosch.com>
Signed-off-by: Arunachalam Santhanam <arunachalam.santhanam@in.bosch.com>
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/Kconfig                 |   10 +
 drivers/net/can/usb/Makefile                |    1 +
 drivers/net/can/usb/etas_es58x/Makefile     |    3 +
 drivers/net/can/usb/etas_es58x/es58x_core.c | 2281 +++++++++++++++++++++++++++
 drivers/net/can/usb/etas_es58x/es58x_core.h |  693 ++++++++
 5 files changed, 2988 insertions(+)
 create mode 100644 drivers/net/can/usb/etas_es58x/Makefile
 create mode 100644 drivers/net/can/usb/etas_es58x/es58x_core.c
 create mode 100644 drivers/net/can/usb/etas_es58x/es58x_core.h

diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig
index 538f4d9adb91..3deb9f1cd292 100644
--- a/drivers/net/can/usb/Kconfig
+++ b/drivers/net/can/usb/Kconfig
@@ -20,6 +20,16 @@ config CAN_ESD_USB2
 	  This driver supports the CAN-USB/2 interface
 	  from esd electronic system design gmbh (http://www.esd.eu).
 
+config CAN_ETAS_ES58X
+	tristate "ETAS ES58X CAN/USB interfaces"
+	select CRC16
+	help
+	  This driver supports the ES581.4, ES582.1 and ES584.1 interfaces
+	  from ETAS GmbH (https://www.etas.com/en/products/es58x.php).
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called etas_es58x.
+
 config CAN_GS_USB
 	tristate "Geschwister Schneider UG interfaces"
 	help
diff --git a/drivers/net/can/usb/Makefile b/drivers/net/can/usb/Makefile
index aa0f17c0b2ed..748cf31a0d53 100644
--- a/drivers/net/can/usb/Makefile
+++ b/drivers/net/can/usb/Makefile
@@ -6,6 +6,7 @@
 obj-$(CONFIG_CAN_8DEV_USB) += usb_8dev.o
 obj-$(CONFIG_CAN_EMS_USB) += ems_usb.o
 obj-$(CONFIG_CAN_ESD_USB2) += esd_usb2.o
+obj-$(CONFIG_CAN_ETAS_ES58X) += etas_es58x/
 obj-$(CONFIG_CAN_GS_USB) += gs_usb.o
 obj-$(CONFIG_CAN_KVASER_USB) += kvaser_usb/
 obj-$(CONFIG_CAN_MCBA_USB) += mcba_usb.o
diff --git a/drivers/net/can/usb/etas_es58x/Makefile b/drivers/net/can/usb/etas_es58x/Makefile
new file mode 100644
index 000000000000..60a1ac935a69
--- /dev/null
+++ b/drivers/net/can/usb/etas_es58x/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_CAN_ETAS_ES58X) += etas_es58x.o
+etas_es58x-y = es58x_core.o
diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.c b/drivers/net/can/usb/etas_es58x/es58x_core.c
new file mode 100644
index 000000000000..da7f9d1d1162
--- /dev/null
+++ b/drivers/net/can/usb/etas_es58x/es58x_core.c
@@ -0,0 +1,2281 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Driver for ETAS GmbH ES58X USB CAN(-FD) Bus Interfaces.
+ *
+ * File es58x_core.c: Core logic to manage the network devices and the
+ * USB interface.
+ *
+ * Copyright (c) 2019 Robert Bosch Engineering and Business Solutions. All rights reserved.
+ * Copyright (c) 2020 ETAS K.K.. All rights reserved.
+ * Copyright (c) 2020, 2021 Vincent Mailhol <mailhol.vincent@wanadoo.fr>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/usb.h>
+#include <linux/crc16.h>
+#include <asm/unaligned.h>
+
+#include "es58x_core.h"
+
+#define DRV_VERSION "1.00"
+MODULE_AUTHOR("Mailhol Vincent <mailhol.vincent@wanadoo.fr>");
+MODULE_AUTHOR("Arunachalam Santhanam <arunachalam.santhanam@in.bosch.com>");
+MODULE_DESCRIPTION("Socket CAN driver for ETAS ES58X USB adapters");
+MODULE_VERSION(DRV_VERSION);
+MODULE_LICENSE("GPL v2");
+
+#define ES58X_MODULE_NAME "etas_es58x"
+#define ES58X_VENDOR_ID 0x108C
+
+/* Table of devices which work with this driver. */
+static const struct usb_device_id es58x_id_table[] = {
+	{
+		/* Terminating entry */
+	}
+};
+
+MODULE_DEVICE_TABLE(usb, es58x_id_table);
+
+#define es58x_print_hex_dump(buf, len)					\
+	print_hex_dump(KERN_DEBUG,					\
+		       ES58X_MODULE_NAME " " __stringify(buf) ": ",	\
+		       DUMP_PREFIX_NONE, 16, 1, buf, len, false)
+
+#define es58x_print_hex_dump_debug(buf, len)				 \
+	print_hex_dump_debug(ES58X_MODULE_NAME " " __stringify(buf) ": ",\
+			     DUMP_PREFIX_NONE, 16, 1, buf, len, false)
+
+/* The last two bytes of an ES58X command is a CRC16. The first two
+ * bytes (the start of frame) are skipped and the CRC calculation
+ * starts on the third byte.
+ */
+#define ES58X_CRC_CALC_OFFSET 2
+
+/**
+ * es58x_calculate_crc() - Compute the crc16 of a given URB.
+ * @urb_cmd: The URB command for which we want to calculate the CRC.
+ * @urb_len: Length of @urb_cmd. Must be at least bigger than 4
+ *	(ES58X_CRC_CALC_OFFSET + sizeof(crc))
+ *
+ * Return: crc16 value.
+ */
+static u16 es58x_calculate_crc(const union es58x_urb_cmd *urb_cmd, u16 urb_len)
+{
+	u16 crc;
+	ssize_t len = urb_len - ES58X_CRC_CALC_OFFSET - sizeof(crc);
+
+	crc = crc16(0, &urb_cmd->raw_cmd[ES58X_CRC_CALC_OFFSET], len);
+	return crc;
+}
+
+/**
+ * es58x_get_crc() - Get the CRC value of a given URB.
+ * @urb_cmd: The URB command for which we want to get the CRC.
+ * @urb_len: Length of @urb_cmd. Must be at least bigger than 4
+ *	(ES58X_CRC_CALC_OFFSET + sizeof(crc))
+ *
+ * Return: crc16 value.
+ */
+static u16 es58x_get_crc(const union es58x_urb_cmd *urb_cmd, u16 urb_len)
+{
+	u16 crc;
+	const __le16 *crc_addr;
+
+	crc_addr = (__le16 *)&urb_cmd->raw_cmd[urb_len - sizeof(crc)];
+	crc = get_unaligned_le16(crc_addr);
+	return crc;
+}
+
+/**
+ * es58x_set_crc() - Set the CRC value of a given URB.
+ * @urb_cmd: The URB command for which we want to get the CRC.
+ * @urb_len: Length of @urb_cmd. Must be at least bigger than 4
+ *	(ES58X_CRC_CALC_OFFSET + sizeof(crc))
+ */
+static void es58x_set_crc(union es58x_urb_cmd *urb_cmd, u16 urb_len)
+{
+	u16 crc;
+	__le16 *crc_addr;
+
+	crc = es58x_calculate_crc(urb_cmd, urb_len);
+	crc_addr = (__le16 *)&urb_cmd->raw_cmd[urb_len - sizeof(crc)];
+	put_unaligned_le16(crc, crc_addr);
+}
+
+/**
+ * es58x_check_crc() - Validate the CRC value of a given URB.
+ * @es58x_dev: ES58X device.
+ * @urb_cmd: The URB command for which we want to check the CRC.
+ * @urb_len: Length of @urb_cmd. Must be at least bigger than 4
+ *	(ES58X_CRC_CALC_OFFSET + sizeof(crc))
+ *
+ * Return: zero on success, -EBADMSG if the CRC check fails.
+ */
+static int es58x_check_crc(struct es58x_device *es58x_dev,
+			   const union es58x_urb_cmd *urb_cmd, u16 urb_len)
+{
+	u16 calculated_crc = es58x_calculate_crc(urb_cmd, urb_len);
+	u16 expected_crc = es58x_get_crc(urb_cmd, urb_len);
+
+	if (expected_crc != calculated_crc) {
+		dev_err_ratelimited(es58x_dev->dev,
+				    "%s: Bad CRC, urb_len: %d\n",
+				    __func__, urb_len);
+		return -EBADMSG;
+	}
+
+	return 0;
+}
+
+/**
+ * es58x_timestamp_to_ns() - Convert a timestamp value received from a
+ *	ES58X device to nanoseconds.
+ * @timestamp: Timestamp received from a ES58X device.
+ *
+ * The timestamp received from ES58X is expressed in multiples of 0.5
+ * micro seconds. This function converts it in to nanoseconds.
+ *
+ * Return: Timestamp value in nanoseconds.
+ */
+static u64 es58x_timestamp_to_ns(u64 timestamp)
+{
+	const u64 es58x_timestamp_ns_mult_coef = 500ULL;
+
+	return es58x_timestamp_ns_mult_coef * timestamp;
+}
+
+/**
+ * es58x_set_skb_timestamp() - Set the hardware timestamp of an skb.
+ * @netdev: CAN network device.
+ * @skb: socket buffer of a CAN message.
+ * @timestamp: Timestamp received from an ES58X device.
+ *
+ * Used for both received and echo messages.
+ */
+static void es58x_set_skb_timestamp(struct net_device *netdev,
+				    struct sk_buff *skb, u64 timestamp)
+{
+	struct es58x_device *es58x_dev = es58x_priv(netdev)->es58x_dev;
+	struct skb_shared_hwtstamps *hwts;
+
+	hwts = skb_hwtstamps(skb);
+	/* Ignoring overflow (overflow on 64 bits timestamp with nano
+	 * second precision would occur after more than 500 years).
+	 */
+	hwts->hwtstamp = ns_to_ktime(es58x_timestamp_to_ns(timestamp) +
+				     es58x_dev->realtime_diff_ns);
+}
+
+/**
+ * es58x_rx_timestamp() - Handle a received timestamp.
+ * @es58x_dev: ES58X device.
+ * @timestamp: Timestamp received from a ES58X device.
+ *
+ * Calculate the difference between the ES58X device and the kernel
+ * internal clocks. This difference will be later used as an offset to
+ * convert the timestamps of RX and echo messages to match the kernel
+ * system time (e.g. convert to UNIX time).
+ */
+void es58x_rx_timestamp(struct es58x_device *es58x_dev, u64 timestamp)
+{
+	u64 ktime_real_ns = ktime_get_real_ns();
+	u64 device_timestamp = es58x_timestamp_to_ns(timestamp);
+
+	dev_dbg(es58x_dev->dev, "%s: request round-trip time: %llu ns\n",
+		__func__, ktime_real_ns - es58x_dev->ktime_req_ns);
+
+	es58x_dev->realtime_diff_ns =
+	    (es58x_dev->ktime_req_ns + ktime_real_ns) / 2 - device_timestamp;
+	es58x_dev->ktime_req_ns = 0;
+
+	dev_dbg(es58x_dev->dev,
+		"%s: Device timestamp: %llu, diff with kernel: %llu\n",
+		__func__, device_timestamp, es58x_dev->realtime_diff_ns);
+}
+
+/**
+ * es58x_set_realtime_diff_ns() - Calculate difference between the
+ *	clocks of the ES58X device and the kernel
+ * @es58x_dev: ES58X device.
+ *
+ * Request a timestamp from the ES58X device. Once the answer is
+ * received, the timestamp difference will be set by the callback
+ * function es58x_rx_timestamp().
+ *
+ * Return: zero on success, errno when any error occurs.
+ */
+static int es58x_set_realtime_diff_ns(struct es58x_device *es58x_dev)
+{
+	if (es58x_dev->ktime_req_ns) {
+		dev_warn(es58x_dev->dev,
+			 "%s: Previous request to set timestamp has not completed yet\n",
+			 __func__);
+		return -EBUSY;
+	}
+
+	es58x_dev->ktime_req_ns = ktime_get_real_ns();
+	return es58x_dev->ops->get_timestamp(es58x_dev);
+}
+
+/**
+ * es58x_is_can_state_active() - Is the network device in an active
+ *	CAN state?
+ * @netdev: CAN network device.
+ *
+ * The device is considered active if it is able to send or receive
+ * CAN frames, that is to say if it is in any of
+ * CAN_STATE_ERROR_ACTIVE, CAN_STATE_ERROR_WARNING or
+ * CAN_STATE_ERROR_PASSIVE states.
+ *
+ * Caution: when recovering from a bus-off,
+ * net/core/dev.c#can_restart() will call
+ * net/core/dev.c#can_flush_echo_skb() without using any kind of
+ * locks. For this reason, it is critical to guarantee that no TX or
+ * echo operations (i.e. any access to priv->echo_skb[]) can be done
+ * while this function is returning false.
+ *
+ * Return: true if the device is active, else returns false.
+ */
+static bool es58x_is_can_state_active(struct net_device *netdev)
+{
+	return es58x_priv(netdev)->can.state < CAN_STATE_BUS_OFF;
+}
+
+/**
+ * es58x_is_echo_skb_threshold_reached() - Determine the limit of how
+ *	many skb slots can be taken before we should stop the network
+ *	queue.
+ * @priv: ES58X private parameters related to the network device.
+ *
+ * We need to save enough free skb slots in order to be able to do
+ * bulk send. This function can be used to determine when to wake or
+ * stop the network queue in regard to the number of skb slots already
+ * taken if the echo FIFO.
+ *
+ * Return: boolean.
+ */
+static bool es58x_is_echo_skb_threshold_reached(struct es58x_priv *priv)
+{
+	u32 num_echo_skb =  priv->tx_head - priv->tx_tail;
+	u32 threshold = priv->can.echo_skb_max -
+		priv->es58x_dev->param->tx_bulk_max + 1;
+
+	return num_echo_skb >= threshold;
+}
+
+/**
+ * es58x_can_free_echo_skb_tail() - Remove the oldest echo skb of the
+ *	echo FIFO.
+ * @netdev: CAN network device.
+ *
+ * Naming convention: the tail is the beginning of the FIFO, i.e. the
+ * first skb to have entered the FIFO.
+ */
+static void es58x_can_free_echo_skb_tail(struct net_device *netdev)
+{
+	struct es58x_priv *priv = es58x_priv(netdev);
+	u16 fifo_mask = priv->es58x_dev->param->fifo_mask;
+	unsigned int frame_len = 0;
+
+	can_free_echo_skb(netdev, priv->tx_tail & fifo_mask, &frame_len);
+	netdev_completed_queue(netdev, 1, frame_len);
+
+	priv->tx_tail++;
+
+	netdev->stats.tx_dropped++;
+}
+
+/**
+ * es58x_can_get_echo_skb_recovery() - Try to re-sync the echo FIFO.
+ * @netdev: CAN network device.
+ * @rcv_packet_idx: Index
+ *
+ * This function should not be called under normal circumstances. In
+ * the unlikely case that one or several URB packages get dropped by
+ * the device, the index will get out of sync. Try to recover by
+ * dropping the echo skb packets with older indexes.
+ *
+ * Return: zero if recovery was successful, -EINVAL otherwise.
+ */
+static int es58x_can_get_echo_skb_recovery(struct net_device *netdev,
+					   u32 rcv_packet_idx)
+{
+	struct es58x_priv *priv = es58x_priv(netdev);
+	int ret = 0;
+
+	netdev->stats.tx_errors++;
+
+	if (net_ratelimit())
+		netdev_warn(netdev,
+			    "Bad echo packet index: %u. First index: %u, end index %u, num_echo_skb: %02u/%02u\n",
+			    rcv_packet_idx, priv->tx_tail, priv->tx_head,
+			    priv->tx_head - priv->tx_tail,
+			    priv->can.echo_skb_max);
+
+	if ((s32)(rcv_packet_idx - priv->tx_tail) < 0) {
+		if (net_ratelimit())
+			netdev_warn(netdev,
+				    "Received echo index is from the past. Ignoring it\n");
+		ret = -EINVAL;
+	} else if ((s32)(rcv_packet_idx - priv->tx_head) >= 0) {
+		if (net_ratelimit())
+			netdev_err(netdev,
+				   "Received echo index is from the future. Ignoring it\n");
+		ret = -EINVAL;
+	} else {
+		if (net_ratelimit())
+			netdev_warn(netdev,
+				    "Recovery: dropping %u echo skb from index %u to %u\n",
+				    rcv_packet_idx - priv->tx_tail,
+				    priv->tx_tail, rcv_packet_idx - 1);
+		while (priv->tx_tail != rcv_packet_idx) {
+			if (priv->tx_tail == priv->tx_head)
+				return -EINVAL;
+			es58x_can_free_echo_skb_tail(netdev);
+		}
+	}
+	return ret;
+}
+
+/**
+ * es58x_can_get_echo_skb() - Get the skb from the echo FIFO and loop
+ *	it back locally.
+ * @netdev: CAN network device.
+ * @rcv_packet_idx: Index of the first packet received from the device.
+ * @tstamps: Array of hardware timestamps received from a ES58X device.
+ * @pkts: Number of packets (and so, length of @tstamps).
+ *
+ * Callback function for when we receive a self reception
+ * acknowledgment.  Retrieves the skb from the echo FIFO, sets its
+ * hardware timestamp (the actual time it was sent) and loops it back
+ * locally.
+ *
+ * The device has to be active (i.e. network interface UP and not in
+ * bus off state or restarting).
+ *
+ * Packet indexes must be consecutive (i.e. index of first packet is
+ * @rcv_packet_idx, index of second packet is @rcv_packet_idx + 1 and
+ * index of last packet is @rcv_packet_idx + @pkts - 1).
+ *
+ * Return: zero on success.
+ */
+int es58x_can_get_echo_skb(struct net_device *netdev, u32 rcv_packet_idx,
+			   u64 *tstamps, unsigned int pkts)
+{
+	struct es58x_priv *priv = es58x_priv(netdev);
+	unsigned int rx_total_frame_len = 0;
+	unsigned int num_echo_skb = priv->tx_head - priv->tx_tail;
+	int i;
+	u16 fifo_mask = priv->es58x_dev->param->fifo_mask;
+
+	if (!netif_running(netdev)) {
+		if (net_ratelimit())
+			netdev_info(netdev,
+				    "%s: %s is down, dropping %d echo packets\n",
+				    __func__, netdev->name, pkts);
+		netdev->stats.tx_dropped += pkts;
+		return 0;
+	} else if (!es58x_is_can_state_active(netdev)) {
+		if (net_ratelimit())
+			netdev_dbg(netdev,
+				   "Bus is off or device is restarting. Ignoring %u echo packets from index %u\n",
+				   pkts, rcv_packet_idx);
+		/* stats.tx_dropped will be (or was already)
+		 * incremented by
+		 * drivers/net/can/net/dev.c:can_flush_echo_skb().
+		 */
+		return 0;
+	} else if (num_echo_skb == 0) {
+		if (net_ratelimit())
+			netdev_warn(netdev,
+				    "Received %u echo packets from index: %u but echo skb queue is empty.\n",
+				    pkts, rcv_packet_idx);
+		netdev->stats.tx_dropped += pkts;
+		return 0;
+	}
+
+	if (priv->tx_tail != rcv_packet_idx) {
+		if (es58x_can_get_echo_skb_recovery(netdev, rcv_packet_idx) < 0) {
+			if (net_ratelimit())
+				netdev_warn(netdev,
+					    "Could not find echo skb for echo packet index: %u\n",
+					    rcv_packet_idx);
+			return 0;
+		}
+	}
+	if (num_echo_skb < pkts) {
+		int pkts_drop = pkts - num_echo_skb;
+
+		if (net_ratelimit())
+			netdev_err(netdev,
+				   "Received %u echo packets but have only %d echo skb. Dropping %d echo skb\n",
+				   pkts, num_echo_skb, pkts_drop);
+		netdev->stats.tx_dropped += pkts_drop;
+		pkts -= pkts_drop;
+	}
+
+	for (i = 0; i < pkts; i++) {
+		unsigned int skb_idx = priv->tx_tail & fifo_mask;
+		struct sk_buff *skb = priv->can.echo_skb[skb_idx];
+		unsigned int frame_len = 0;
+
+		if (skb)
+			es58x_set_skb_timestamp(netdev, skb, tstamps[i]);
+
+		netdev->stats.tx_bytes += can_get_echo_skb(netdev, skb_idx,
+							   &frame_len);
+		rx_total_frame_len += frame_len;
+
+		priv->tx_tail++;
+	}
+
+	netdev_completed_queue(netdev, pkts, rx_total_frame_len);
+	netdev->stats.tx_packets += pkts;
+
+	priv->err_passive_before_rtx_success = 0;
+	if (!es58x_is_echo_skb_threshold_reached(priv))
+		netif_wake_queue(netdev);
+
+	return 0;
+}
+
+/**
+ * es58x_can_reset_echo_fifo() - Reset the echo FIFO.
+ * @netdev: CAN network device.
+ *
+ * The echo_skb array of struct can_priv will be flushed by
+ * drivers/net/can/dev.c:can_flush_echo_skb(). This function resets
+ * the parameters of the struct es58x_priv of our device and reset the
+ * queue (c.f. BQL).
+ */
+static void es58x_can_reset_echo_fifo(struct net_device *netdev)
+{
+	struct es58x_priv *priv = es58x_priv(netdev);
+
+	priv->tx_tail = 0;
+	priv->tx_head = 0;
+	priv->tx_urb = NULL;
+	priv->err_passive_before_rtx_success = 0;
+	netdev_reset_queue(netdev);
+}
+
+/**
+ * es58x_flush_pending_tx_msg() - Reset the buffer for transmission messages.
+ * @netdev: CAN network device.
+ *
+ * es58x_start_xmit() will queue up to tx_bulk_max messages in
+ * &tx_urb buffer and do a bulk send of all messages in one single URB
+ * (c.f. xmit_more flag). When the device recovers from a bus off
+ * state or when the device stops, the tx_urb buffer might still have
+ * pending messages in it and thus need to be flushed.
+ */
+static void es58x_flush_pending_tx_msg(struct net_device *netdev)
+{
+	struct es58x_priv *priv = es58x_priv(netdev);
+	struct es58x_device *es58x_dev = priv->es58x_dev;
+
+	if (priv->tx_urb) {
+		netdev_warn(netdev, "%s: dropping %d TX messages\n",
+			    __func__, priv->tx_can_msg_cnt);
+		netdev->stats.tx_dropped += priv->tx_can_msg_cnt;
+		while (priv->tx_can_msg_cnt > 0) {
+			unsigned int frame_len = 0;
+			u16 fifo_mask = priv->es58x_dev->param->fifo_mask;
+
+			priv->tx_head--;
+			priv->tx_can_msg_cnt--;
+			can_free_echo_skb(netdev, priv->tx_head & fifo_mask,
+					  &frame_len);
+			netdev_completed_queue(netdev, 1, frame_len);
+		}
+		usb_anchor_urb(priv->tx_urb, &priv->es58x_dev->tx_urbs_idle);
+		atomic_inc(&es58x_dev->tx_urbs_idle_cnt);
+		usb_free_urb(priv->tx_urb);
+	}
+	priv->tx_urb = NULL;
+}
+
+/**
+ * es58x_tx_ack_msg() - Handle acknowledgment messages.
+ * @netdev: CAN network device.
+ * @tx_free_entries: Number of free entries in the device transmit FIFO.
+ * @rx_cmd_ret_u32: error code as returned by the ES58X device.
+ *
+ * ES58X sends an acknowledgment message after a transmission request
+ * is done. This is mandatory for the ES581.4 but is optional (and
+ * deactivated in this driver) for the ES58X_FD family.
+ *
+ * Under normal circumstances, this function should never throw an
+ * error message.
+ *
+ * Return: zero on success, errno when any error occurs.
+ */
+int es58x_tx_ack_msg(struct net_device *netdev, u16 tx_free_entries,
+		     enum es58x_ret_u32 rx_cmd_ret_u32)
+{
+	struct es58x_priv *priv = es58x_priv(netdev);
+
+	if (tx_free_entries <= priv->es58x_dev->param->tx_bulk_max) {
+		if (net_ratelimit())
+			netdev_err(netdev,
+				   "Only %d entries left in device queue, num_echo_skb: %d/%d\n",
+				   tx_free_entries,
+				   priv->tx_head - priv->tx_tail,
+				   priv->can.echo_skb_max);
+		netif_stop_queue(netdev);
+	}
+
+	return es58x_rx_cmd_ret_u32(netdev, ES58X_RET_TYPE_TX_MSG,
+				    rx_cmd_ret_u32);
+}
+
+/**
+ * es58x_rx_can_msg() - Handle a received a CAN message.
+ * @netdev: CAN network device.
+ * @timestamp: Hardware time stamp (only relevant in rx branches).
+ * @data: CAN payload.
+ * @can_id: CAN ID.
+ * @es58x_flags: Please refer to enum es58x_flag.
+ * @dlc: Data Length Code (raw value).
+ *
+ * Fill up a CAN skb and post it.
+ *
+ * This function handles the case where the DLC of a classical CAN
+ * frame is greater than CAN_MAX_DLEN (c.f. the len8_dlc field of
+ * struct can_frame).
+ *
+ * Return: zero on success.
+ */
+int es58x_rx_can_msg(struct net_device *netdev, u64 timestamp, const u8 *data,
+		     canid_t can_id, enum es58x_flag es58x_flags, u8 dlc)
+{
+	struct canfd_frame *cfd;
+	struct can_frame *ccf;
+	struct sk_buff *skb;
+	u8 len;
+	bool is_can_fd = !!(es58x_flags & ES58X_FLAG_FD_DATA);
+
+	if (dlc > CAN_MAX_RAW_DLC) {
+		netdev_err(netdev,
+			   "%s: DLC is %d but maximum should be %d\n",
+			   __func__, dlc, CAN_MAX_RAW_DLC);
+		return -EMSGSIZE;
+	}
+
+	if (is_can_fd) {
+		len = can_fd_dlc2len(dlc);
+		skb = alloc_canfd_skb(netdev, &cfd);
+	} else {
+		len = can_cc_dlc2len(dlc);
+		skb = alloc_can_skb(netdev, &ccf);
+		cfd = (struct canfd_frame *)ccf;
+	}
+	if (!skb) {
+		netdev->stats.rx_dropped++;
+		return 0;
+	}
+
+	cfd->can_id = can_id;
+	if (es58x_flags & ES58X_FLAG_EFF)
+		cfd->can_id |= CAN_EFF_FLAG;
+	if (is_can_fd) {
+		cfd->len = len;
+		if (es58x_flags & ES58X_FLAG_FD_BRS)
+			cfd->flags |= CANFD_BRS;
+		if (es58x_flags & ES58X_FLAG_FD_ESI)
+			cfd->flags |= CANFD_ESI;
+	} else {
+		can_frame_set_cc_len(ccf, dlc, es58x_priv(netdev)->can.ctrlmode);
+		if (es58x_flags & ES58X_FLAG_RTR) {
+			ccf->can_id |= CAN_RTR_FLAG;
+			len = 0;
+		}
+	}
+	memcpy(cfd->data, data, len);
+	netdev->stats.rx_packets++;
+	netdev->stats.rx_bytes += len;
+
+	es58x_set_skb_timestamp(netdev, skb, timestamp);
+	netif_rx(skb);
+
+	es58x_priv(netdev)->err_passive_before_rtx_success = 0;
+
+	return 0;
+}
+
+/**
+ * es58x_rx_err_msg() - Handle a received CAN event or error message.
+ * @netdev: CAN network device.
+ * @error: Error code.
+ * @event: Event code.
+ * @timestamp: Timestamp received from a ES58X device.
+ *
+ * Handle the errors and events received by the ES58X device, create
+ * a CAN error skb and post it.
+ *
+ * In some rare cases the devices might get stuck alternating between
+ * CAN_STATE_ERROR_PASSIVE and CAN_STATE_ERROR_WARNING. To prevent
+ * this behavior, we force a bus off state if the device goes in
+ * CAN_STATE_ERROR_WARNING for ES58X_MAX_CONSECUTIVE_WARN consecutive
+ * times with no successful transmission or reception in between.
+ *
+ * Once the device is in bus off state, the only way to restart it is
+ * through the drivers/net/can/dev.c:can_restart() function. The
+ * device is technically capable to recover by itself under certain
+ * circumstances, however, allowing self recovery would create
+ * complex race conditions with drivers/net/can/dev.c:can_restart()
+ * and thus was not implemented. To activate automatic restart, please
+ * set the restart-ms parameter (e.g. ip link set can0 type can
+ * restart-ms 100).
+ *
+ * If the bus is really instable, this function would try to send a
+ * lot of log messages. Those are rate limited (i.e. you will see
+ * messages such as "net_ratelimit: XXX callbacks suppressed" in
+ * dmesg).
+ *
+ * Return: zero on success, errno when any error occurs.
+ */
+int es58x_rx_err_msg(struct net_device *netdev, enum es58x_err error,
+		     enum es58x_event event, u64 timestamp)
+{
+	struct es58x_priv *priv = es58x_priv(netdev);
+	struct can_priv *can = netdev_priv(netdev);
+	struct can_device_stats *can_stats = &can->can_stats;
+	struct can_frame *cf = NULL;
+	struct sk_buff *skb;
+	int ret;
+
+	if (!netif_running(netdev)) {
+		if (net_ratelimit())
+			netdev_info(netdev, "%s: %s is down, dropping packet\n",
+				    __func__, netdev->name);
+		netdev->stats.rx_dropped++;
+		return 0;
+	}
+
+	if (error == ES58X_ERR_OK && event == ES58X_EVENT_OK) {
+		netdev_err(netdev, "%s: Both error and event are zero\n",
+			   __func__);
+		return -EINVAL;
+	}
+
+	skb = alloc_can_err_skb(netdev, &cf);
+
+	switch (error) {
+	case ES58X_ERR_OK:	/* 0: No error */
+		break;
+
+	case ES58X_ERR_PROT_STUFF:
+		if (net_ratelimit())
+			netdev_dbg(netdev, "Error BITSUFF\n");
+		if (cf)
+			cf->data[2] |= CAN_ERR_PROT_STUFF;
+		break;
+
+	case ES58X_ERR_PROT_FORM:
+		if (net_ratelimit())
+			netdev_dbg(netdev, "Error FORMAT\n");
+		if (cf)
+			cf->data[2] |= CAN_ERR_PROT_FORM;
+		break;
+
+	case ES58X_ERR_ACK:
+		if (net_ratelimit())
+			netdev_dbg(netdev, "Error ACK\n");
+		if (cf)
+			cf->can_id |= CAN_ERR_ACK;
+		break;
+
+	case ES58X_ERR_PROT_BIT:
+		if (net_ratelimit())
+			netdev_dbg(netdev, "Error BIT\n");
+		if (cf)
+			cf->data[2] |= CAN_ERR_PROT_BIT;
+		break;
+
+	case ES58X_ERR_PROT_CRC:
+		if (net_ratelimit())
+			netdev_dbg(netdev, "Error CRC\n");
+		if (cf)
+			cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ;
+		break;
+
+	case ES58X_ERR_PROT_BIT1:
+		if (net_ratelimit())
+			netdev_dbg(netdev,
+				   "Error: expected a recessive bit but monitored a dominant one\n");
+		if (cf)
+			cf->data[2] |= CAN_ERR_PROT_BIT1;
+		break;
+
+	case ES58X_ERR_PROT_BIT0:
+		if (net_ratelimit())
+			netdev_dbg(netdev,
+				   "Error expected a dominant bit but monitored a recessive one\n");
+		if (cf)
+			cf->data[2] |= CAN_ERR_PROT_BIT0;
+		break;
+
+	case ES58X_ERR_PROT_OVERLOAD:
+		if (net_ratelimit())
+			netdev_dbg(netdev, "Error OVERLOAD\n");
+		if (cf)
+			cf->data[2] |= CAN_ERR_PROT_OVERLOAD;
+		break;
+
+	case ES58X_ERR_PROT_UNSPEC:
+		if (net_ratelimit())
+			netdev_dbg(netdev, "Unspecified error\n");
+		if (cf)
+			cf->can_id |= CAN_ERR_PROT;
+		break;
+
+	default:
+		if (net_ratelimit())
+			netdev_err(netdev,
+				   "%s: Unspecified error code 0x%04X\n",
+				   __func__, (int)error);
+		if (cf)
+			cf->can_id |= CAN_ERR_PROT;
+		break;
+	}
+
+	switch (event) {
+	case ES58X_EVENT_OK:	/* 0: No event */
+		break;
+
+	case ES58X_EVENT_CRTL_ACTIVE:
+		if (can->state == CAN_STATE_BUS_OFF) {
+			netdev_err(netdev,
+				   "%s: state transition: BUS OFF -> ACTIVE\n",
+				   __func__);
+		}
+		if (net_ratelimit())
+			netdev_dbg(netdev, "Event CAN BUS ACTIVE\n");
+		if (cf)
+			cf->data[1] |= CAN_ERR_CRTL_ACTIVE;
+		can->state = CAN_STATE_ERROR_ACTIVE;
+		break;
+
+	case ES58X_EVENT_CRTL_PASSIVE:
+		if (net_ratelimit())
+			netdev_dbg(netdev, "Event CAN BUS PASSIVE\n");
+		/* Either TX or RX error count reached passive state
+		 * but we do not know which. Setting both flags by
+		 * default.
+		 */
+		if (cf) {
+			cf->data[1] |= CAN_ERR_CRTL_RX_PASSIVE;
+			cf->data[1] |= CAN_ERR_CRTL_TX_PASSIVE;
+		}
+		if (can->state < CAN_STATE_BUS_OFF)
+			can->state = CAN_STATE_ERROR_PASSIVE;
+		can_stats->error_passive++;
+		if (priv->err_passive_before_rtx_success < U8_MAX)
+			priv->err_passive_before_rtx_success++;
+		break;
+
+	case ES58X_EVENT_CRTL_WARNING:
+		if (net_ratelimit())
+			netdev_dbg(netdev, "Event CAN BUS WARNING\n");
+		/* Either TX or RX error count reached warning state
+		 * but we do not know which. Setting both flags by
+		 * default.
+		 */
+		if (cf) {
+			cf->data[1] |= CAN_ERR_CRTL_RX_WARNING;
+			cf->data[1] |= CAN_ERR_CRTL_TX_WARNING;
+		}
+		if (can->state < CAN_STATE_BUS_OFF)
+			can->state = CAN_STATE_ERROR_WARNING;
+		can_stats->error_warning++;
+		break;
+
+	case ES58X_EVENT_BUSOFF:
+		if (net_ratelimit())
+			netdev_dbg(netdev, "Event CAN BUS OFF\n");
+		if (cf)
+			cf->can_id |= CAN_ERR_BUSOFF;
+		can_stats->bus_off++;
+		netif_stop_queue(netdev);
+		if (can->state != CAN_STATE_BUS_OFF) {
+			can->state = CAN_STATE_BUS_OFF;
+			can_bus_off(netdev);
+			ret = can->do_set_mode(netdev, CAN_MODE_STOP);
+			if (ret)
+				return ret;
+		}
+		break;
+
+	case ES58X_EVENT_SINGLE_WIRE:
+		if (net_ratelimit())
+			netdev_warn(netdev,
+				    "Lost connection on either CAN high or CAN low\n");
+		/* Lost connection on either CAN high or CAN
+		 * low. Setting both flags by default.
+		 */
+		if (cf) {
+			cf->data[4] |= CAN_ERR_TRX_CANH_NO_WIRE;
+			cf->data[4] |= CAN_ERR_TRX_CANL_NO_WIRE;
+		}
+		break;
+
+	default:
+		if (net_ratelimit())
+			netdev_err(netdev,
+				   "%s: Unspecified event code 0x%04X\n",
+				   __func__, (int)event);
+		if (cf)
+			cf->can_id |= CAN_ERR_CRTL;
+		break;
+	}
+
+	/* driver/net/can/dev.c:can_restart() takes in account error
+	 * messages in the RX stats. Doing the same here for
+	 * consistency.
+	 */
+	netdev->stats.rx_packets++;
+	netdev->stats.rx_bytes += cf->can_dlc;
+
+	if (cf) {
+		if (cf->data[1])
+			cf->can_id |= CAN_ERR_CRTL;
+		if (cf->data[2] || cf->data[3]) {
+			cf->can_id |= CAN_ERR_PROT;
+			can_stats->bus_error++;
+		}
+		if (cf->data[4])
+			cf->can_id |= CAN_ERR_TRX;
+
+		es58x_set_skb_timestamp(netdev, skb, timestamp);
+		netif_rx(skb);
+	}
+
+	if ((event & ES58X_EVENT_CRTL_PASSIVE) &&
+	    priv->err_passive_before_rtx_success == ES58X_CONSECUTIVE_ERR_PASSIVE_MAX) {
+		netdev_info(netdev,
+			    "Got %d consecutive warning events with no successful RX or TX. Forcing bus-off\n",
+			    priv->err_passive_before_rtx_success);
+		return es58x_rx_err_msg(netdev, ES58X_ERR_OK,
+					ES58X_EVENT_BUSOFF, timestamp);
+	}
+
+	return 0;
+}
+
+/**
+ * es58x_cmd_ret_desc() - Convert a command type to a string.
+ * @cmd_ret_type: Type of the command which triggered the return code.
+ *
+ * The final line (return "<unknown>") should not be reached. If this
+ * is the case, there is an implementation bug.
+ *
+ * Return: a readable description of the @cmd_ret_type.
+ */
+static const char *es58x_cmd_ret_desc(enum es58x_ret_type cmd_ret_type)
+{
+	switch (cmd_ret_type) {
+	case ES58X_RET_TYPE_SET_BITTIMING:
+		return "Set bittiming";
+	case ES58X_RET_TYPE_ENABLE_CHANNEL:
+		return "Enable channel";
+	case ES58X_RET_TYPE_DISABLE_CHANNEL:
+		return "Disable channel";
+	case ES58X_RET_TYPE_TX_MSG:
+		return "Transmit message";
+	case ES58X_RET_TYPE_RESET_RX:
+		return "Reset RX";
+	case ES58X_RET_TYPE_RESET_TX:
+		return "Reset TX";
+	case ES58X_RET_TYPE_DEVICE_ERR:
+		return "Device error";
+	}
+
+	return "<unknown>";
+};
+
+/**
+ * es58x_rx_cmd_ret_u8() - Handle the command's return code received
+ *	from the ES58X device.
+ * @dev: Device, only used for the dev_XXX() print functions.
+ * @cmd_ret_type: Type of the command which triggered the return code.
+ * @rx_cmd_ret_u8: Command error code as returned by the ES58X device.
+ *
+ * Handles the 8 bits command return code. Those are specific to the
+ * ES581.4 device. The return value will eventually be used by
+ * es58x_handle_urb_cmd() function which will take proper actions in
+ * case of critical issues such and memory errors or bad CRC values.
+ *
+ * In contrast with es58x_rx_cmd_ret_u32(), the network device is
+ * unknown.
+ *
+ * Return: zero on success, return errno when any error occurs.
+ */
+int es58x_rx_cmd_ret_u8(struct device *dev,
+			enum es58x_ret_type cmd_ret_type,
+			enum es58x_ret_u8 rx_cmd_ret_u8)
+{
+	const char *ret_desc = es58x_cmd_ret_desc(cmd_ret_type);
+
+	switch (rx_cmd_ret_u8) {
+	case ES58X_RET_U8_OK:
+		dev_dbg_ratelimited(dev, "%s: OK\n", ret_desc);
+		return 0;
+
+	case ES58X_RET_U8_ERR_UNSPECIFIED_FAILURE:
+		dev_err(dev, "%s: unspecified failure\n", ret_desc);
+		return -EBADMSG;
+
+	case ES58X_RET_U8_ERR_NO_MEM:
+		dev_err(dev, "%s: device ran out of memory\n", ret_desc);
+		return -ENOMEM;
+
+	case ES58X_RET_U8_ERR_BAD_CRC:
+		dev_err(dev, "%s: CRC of previous command is incorrect\n",
+			ret_desc);
+		return -EIO;
+
+	default:
+		dev_err(dev, "%s: returned unknown value: 0x%02X\n",
+			ret_desc, rx_cmd_ret_u8);
+		return -EBADMSG;
+	}
+}
+
+/**
+ * es58x_rx_cmd_ret_u32() - Handle the command return code received
+ *	from the ES58X device.
+ * @netdev: CAN network device.
+ * @cmd_ret_type: Type of the command which triggered the return code.
+ * @rx_cmd_ret_u32: error code as returned by the ES58X device.
+ *
+ * Handles the 32 bits command return code. The return value will
+ * eventually be used by es58x_handle_urb_cmd() function which will
+ * take proper actions in case of critical issues such and memory
+ * errors or bad CRC values.
+ *
+ * Return: zero on success, errno when any error occurs.
+ */
+int es58x_rx_cmd_ret_u32(struct net_device *netdev,
+			 enum es58x_ret_type cmd_ret_type,
+			 enum es58x_ret_u32 rx_cmd_ret_u32)
+{
+	struct es58x_priv *priv = es58x_priv(netdev);
+	const struct es58x_operators *ops = priv->es58x_dev->ops;
+	const char *ret_desc = es58x_cmd_ret_desc(cmd_ret_type);
+
+	switch (rx_cmd_ret_u32) {
+	case ES58X_RET_U32_OK:
+		switch (cmd_ret_type) {
+		case ES58X_RET_TYPE_ENABLE_CHANNEL:
+			es58x_can_reset_echo_fifo(netdev);
+			priv->can.state = CAN_STATE_ERROR_ACTIVE;
+			netif_wake_queue(netdev);
+			netdev_info(netdev,
+				    "%s: %s (Serial Number %s): CAN%d channel becomes ready\n",
+				    ret_desc, priv->es58x_dev->udev->product,
+				    priv->es58x_dev->udev->serial,
+				    priv->channel_idx + 1);
+			break;
+
+		case ES58X_RET_TYPE_TX_MSG:
+			if (IS_ENABLED(CONFIG_VERBOSE_DEBUG) && net_ratelimit())
+				netdev_vdbg(netdev, "%s: OK\n", ret_desc);
+			break;
+
+		default:
+			netdev_dbg(netdev, "%s: OK\n", ret_desc);
+			break;
+		}
+		return 0;
+
+	case ES58X_RET_U32_ERR_UNSPECIFIED_FAILURE:
+		if (cmd_ret_type == ES58X_RET_TYPE_ENABLE_CHANNEL) {
+			int ret;
+
+			netdev_warn(netdev,
+				    "%s: channel is already opened, closing and re-openning it to reflect new configuration\n",
+				    ret_desc);
+			ret = ops->disable_channel(es58x_priv(netdev));
+			if (ret)
+				return ret;
+			return ops->enable_channel(es58x_priv(netdev));
+		}
+		if (cmd_ret_type == ES58X_RET_TYPE_DISABLE_CHANNEL) {
+			netdev_info(netdev,
+				    "%s: channel is already closed\n", ret_desc);
+			return 0;
+		}
+		netdev_err(netdev,
+			   "%s: unspecified failure\n", ret_desc);
+		return -EBADMSG;
+
+	case ES58X_RET_U32_ERR_NO_MEM:
+		netdev_err(netdev, "%s: device ran out of memory\n", ret_desc);
+		return -ENOMEM;
+
+	case ES58X_RET_U32_WARN_PARAM_ADJUSTED:
+		netdev_warn(netdev,
+			    "%s: some incompatible parameters have been adjusted\n",
+			    ret_desc);
+		return 0;
+
+	case ES58X_RET_U32_WARN_TX_MAYBE_REORDER:
+		netdev_warn(netdev,
+			    "%s: TX messages might have been reordered\n",
+			    ret_desc);
+		return 0;
+
+	case ES58X_RET_U32_ERR_TIMEDOUT:
+		netdev_err(netdev, "%s: command timed out\n", ret_desc);
+		return -ETIMEDOUT;
+
+	case ES58X_RET_U32_ERR_FIFO_FULL:
+		netdev_warn(netdev, "%s: fifo is full\n", ret_desc);
+		return 0;
+
+	case ES58X_RET_U32_ERR_BAD_CONFIG:
+		netdev_err(netdev, "%s: bad configuration\n", ret_desc);
+		return -EINVAL;
+
+	case ES58X_RET_U32_ERR_NO_RESOURCE:
+		netdev_err(netdev, "%s: no resource available\n", ret_desc);
+		return -EBUSY;
+
+	default:
+		netdev_err(netdev, "%s returned unknown value: 0x%08X\n",
+			   ret_desc, rx_cmd_ret_u32);
+		return -EBADMSG;
+	}
+}
+
+/**
+ * es58x_increment_rx_errors() - Increment the network devices' error
+ *	count.
+ * @es58x_dev: ES58X device.
+ *
+ * If an error occurs on the early stages on receiving an URB command,
+ * we might not be able to figure out on which network device the
+ * error occurred. In such case, we arbitrarily increment the error
+ * count of all the network devices attached to our ES58X device.
+ */
+static void es58x_increment_rx_errors(struct es58x_device *es58x_dev)
+{
+	int i;
+
+	for (i = 0; i < es58x_dev->num_can_ch; i++)
+		if (es58x_dev->netdev[i])
+			es58x_dev->netdev[i]->stats.rx_errors++;
+}
+
+/**
+ * es58x_handle_urb_cmd() - Handle the URB command
+ * @es58x_dev: ES58X device.
+ * @urb_cmd: The URB command received from the ES58X device, might not
+ *	be aligned.
+ *
+ * Sends the URB command to the device specific function. Manages the
+ * errors thrown back by those functions.
+ */
+static void es58x_handle_urb_cmd(struct es58x_device *es58x_dev,
+				 const union es58x_urb_cmd *urb_cmd)
+{
+	const struct es58x_operators *ops = es58x_dev->ops;
+	size_t cmd_len;
+	int i, ret;
+
+	ret = ops->handle_urb_cmd(es58x_dev, urb_cmd);
+	switch (ret) {
+	case 0:		/* OK */
+		return;
+
+	case -ENODEV:
+		dev_err_ratelimited(es58x_dev->dev, "Device is not ready\n");
+		break;
+
+	case -EINVAL:
+	case -EMSGSIZE:
+	case -EBADRQC:
+	case -EBADMSG:
+	case -ECHRNG:
+	case -ETIMEDOUT:
+		cmd_len = es58x_get_urb_cmd_len(es58x_dev,
+						ops->get_msg_len(urb_cmd));
+		dev_err(es58x_dev->dev,
+			"ops->handle_urb_cmd() returned error %pe",
+			ERR_PTR(ret));
+		es58x_print_hex_dump(urb_cmd, cmd_len);
+		break;
+
+	case -EFAULT:
+	case -ENOMEM:
+	case -EIO:
+	default:
+		dev_crit(es58x_dev->dev,
+			 "ops->handle_urb_cmd() returned error %pe, detaching all network devices\n",
+			 ERR_PTR(ret));
+		for (i = 0; i < es58x_dev->num_can_ch; i++)
+			if (es58x_dev->netdev[i])
+				netif_device_detach(es58x_dev->netdev[i]);
+		if (es58x_dev->ops->reset_device)
+			es58x_dev->ops->reset_device(es58x_dev);
+		break;
+	}
+
+	/* Because the urb command could not fully be parsed,
+	 * channel_id is not confirmed. Incrementing rx_errors count
+	 * of all channels.
+	 */
+	es58x_increment_rx_errors(es58x_dev);
+}
+
+/**
+ * es58x_check_rx_urb() - Check the length and format of the URB command.
+ * @es58x_dev: ES58X device.
+ * @urb_cmd: The URB command received from the ES58X device, might not
+ *	be aligned.
+ * @urb_actual_len: The actual length of the URB command.
+ *
+ * Check if the first message of the received urb is valid, that is to
+ * say that both the header and the length are coherent.
+ *
+ * Return:
+ * the length of the first message of the URB on success.
+ *
+ * -ENODATA if the URB command is incomplete (in which case, the URB
+ * command should be buffered and combined with the next URB to try to
+ * reconstitute the URB command).
+ *
+ * -EOVERFLOW if the length is bigger than the maximum expected one.
+ *
+ * -EBADRQC if the start of frame does not match the expected value.
+ */
+static signed int es58x_check_rx_urb(struct es58x_device *es58x_dev,
+				     const union es58x_urb_cmd *urb_cmd,
+				     u32 urb_actual_len)
+{
+	const struct device *dev = es58x_dev->dev;
+	const struct es58x_parameters *param = es58x_dev->param;
+	u16 sof, msg_len;
+	signed int urb_cmd_len, ret;
+
+	if (urb_actual_len < param->urb_cmd_header_len) {
+		dev_vdbg(dev,
+			 "%s: Received %d bytes [%*ph]: header incomplete\n",
+			 __func__, urb_actual_len, urb_actual_len,
+			 urb_cmd->raw_cmd);
+		return -ENODATA;
+	}
+
+	sof = get_unaligned_le16(&urb_cmd->sof);
+	if (sof != param->rx_start_of_frame) {
+		dev_err_ratelimited(es58x_dev->dev,
+				    "%s: Expected sequence 0x%04X for start of frame but got 0x%04X.\n",
+				    __func__, param->rx_start_of_frame, sof);
+		return -EBADRQC;
+	}
+
+	msg_len = es58x_dev->ops->get_msg_len(urb_cmd);
+	urb_cmd_len = es58x_get_urb_cmd_len(es58x_dev, msg_len);
+	if (urb_cmd_len > param->rx_urb_cmd_max_len) {
+		dev_err_ratelimited(es58x_dev->dev,
+				    "%s: Biggest expected size for rx urb_cmd is %u but receive a command of size %d\n",
+				    __func__,
+				    param->rx_urb_cmd_max_len, urb_cmd_len);
+		return -EOVERFLOW;
+	} else if (urb_actual_len < urb_cmd_len) {
+		dev_vdbg(dev, "%s: Received %02d/%02d bytes\n",
+			 __func__, urb_actual_len, urb_cmd_len);
+		return -ENODATA;
+	}
+
+	ret = es58x_check_crc(es58x_dev, urb_cmd, urb_cmd_len);
+	if (ret)
+		return ret;
+
+	return urb_cmd_len;
+}
+
+/**
+ * es58x_copy_to_cmd_buf() - Copy an array to the URB command buffer.
+ * @es58x_dev: ES58X device.
+ * @raw_cmd: the buffer we want to copy.
+ * @raw_cmd_len: length of @raw_cmd.
+ *
+ * Concatenates @raw_cmd_len bytes of @raw_cmd to the end of the URB
+ * command buffer.
+ *
+ * Return: zero on success, -EMSGSIZE if not enough space is available
+ * to do the copy.
+ */
+static int es58x_copy_to_cmd_buf(struct es58x_device *es58x_dev,
+				 u8 *raw_cmd, int raw_cmd_len)
+{
+	if (es58x_dev->rx_cmd_buf_len + raw_cmd_len >
+	    es58x_dev->param->rx_urb_cmd_max_len)
+		return -EMSGSIZE;
+
+	memcpy(&es58x_dev->rx_cmd_buf.raw_cmd[es58x_dev->rx_cmd_buf_len],
+	       raw_cmd, raw_cmd_len);
+	es58x_dev->rx_cmd_buf_len += raw_cmd_len;
+
+	return 0;
+}
+
+/**
+ * es58x_split_urb_try_recovery() - Try to recover bad URB sequences.
+ * @es58x_dev: ES58X device.
+ * @raw_cmd: pointer to the buffer we want to copy.
+ * @raw_cmd_len: length of @raw_cmd.
+ *
+ * Under some rare conditions, we might get incorrect URBs from the
+ * device. From our observations, one of the valid URB gets replaced
+ * by one from the past. The full root cause is not identified.
+ *
+ * This function looks for the next start of frame in the urb buffer
+ * in order to try to recover.
+ *
+ * Such behavior was not observed on the devices of the ES58X FD
+ * family and only seems to impact the ES581.4.
+ *
+ * Return: the number of bytes dropped on success, -EBADMSG if recovery failed.
+ */
+static int es58x_split_urb_try_recovery(struct es58x_device *es58x_dev,
+					u8 *raw_cmd, size_t raw_cmd_len)
+{
+	union es58x_urb_cmd *urb_cmd;
+	signed int urb_cmd_len;
+	u16 sof;
+	int dropped_bytes = 0;
+
+	es58x_increment_rx_errors(es58x_dev);
+
+	while (raw_cmd_len > sizeof(sof)) {
+		urb_cmd = (union es58x_urb_cmd *)raw_cmd;
+		sof = get_unaligned_le16(&urb_cmd->sof);
+
+		if (sof == es58x_dev->param->rx_start_of_frame) {
+			urb_cmd_len = es58x_check_rx_urb(es58x_dev,
+							 urb_cmd, raw_cmd_len);
+			if ((urb_cmd_len == -ENODATA) || urb_cmd_len > 0) {
+				dev_info_ratelimited(es58x_dev->dev,
+						     "Recovery successful! Dropped %d bytes (urb_cmd_len: %d)\n",
+						     dropped_bytes,
+						     urb_cmd_len);
+				return dropped_bytes;
+			}
+		}
+		raw_cmd++;
+		raw_cmd_len--;
+		dropped_bytes++;
+	}
+
+	dev_warn_ratelimited(es58x_dev->dev, "%s: Recovery failed\n", __func__);
+	return -EBADMSG;
+}
+
+/**
+ * es58x_handle_incomplete_cmd() - Reconstitute an URB command from
+ *	different URB pieces.
+ * @es58x_dev: ES58X device.
+ * @urb: last urb buffer received.
+ *
+ * The device might split the URB commands in an arbitrary amount of
+ * pieces. This function concatenates those in an URB buffer until a
+ * full URB command is reconstituted and consume it.
+ *
+ * Return:
+ * number of bytes consumed from @urb if successful.
+ *
+ * -ENODATA if the URB command is still incomplete.
+ *
+ * -EBADMSG if the URB command is incorrect.
+ */
+static signed int es58x_handle_incomplete_cmd(struct es58x_device *es58x_dev,
+					      struct urb *urb)
+{
+	size_t cpy_len;
+	signed int urb_cmd_len, tmp_cmd_buf_len, ret;
+
+	tmp_cmd_buf_len = es58x_dev->rx_cmd_buf_len;
+	cpy_len = min_t(int, es58x_dev->param->rx_urb_cmd_max_len -
+			es58x_dev->rx_cmd_buf_len, urb->actual_length);
+	ret = es58x_copy_to_cmd_buf(es58x_dev, urb->transfer_buffer, cpy_len);
+	if (ret < 0)
+		return ret;
+
+	urb_cmd_len = es58x_check_rx_urb(es58x_dev, &es58x_dev->rx_cmd_buf,
+					 es58x_dev->rx_cmd_buf_len);
+	if (urb_cmd_len == -ENODATA) {
+		return -ENODATA;
+	} else if (urb_cmd_len < 0) {
+		dev_err_ratelimited(es58x_dev->dev,
+				    "Could not reconstitute incomplete command from previous URB, dropping %d bytes\n",
+				    tmp_cmd_buf_len + urb->actual_length);
+		dev_err_ratelimited(es58x_dev->dev,
+				    "Error code: %pe, es58x_dev->rx_cmd_buf_len: %d, urb->actual_length: %u\n",
+				    ERR_PTR(urb_cmd_len),
+				    tmp_cmd_buf_len, urb->actual_length);
+		es58x_print_hex_dump(&es58x_dev->rx_cmd_buf, tmp_cmd_buf_len);
+		es58x_print_hex_dump(urb->transfer_buffer, urb->actual_length);
+		return urb->actual_length;
+	}
+
+	es58x_handle_urb_cmd(es58x_dev, &es58x_dev->rx_cmd_buf);
+	return urb_cmd_len - tmp_cmd_buf_len;	/* consumed length */
+}
+
+/**
+ * es58x_split_urb() - Cut the received URB in individual URB commands.
+ * @es58x_dev: ES58X device.
+ * @urb: last urb buffer received.
+ *
+ * The device might send urb in bulk format (i.e. several URB commands
+ * concatenated together). This function will split all the commands
+ * contained in the urb.
+ *
+ * Return:
+ * number of bytes consumed from @urb if successful.
+ *
+ * -ENODATA if the URB command is incomplete.
+ *
+ * -EBADMSG if the URB command is incorrect.
+ */
+static signed int es58x_split_urb(struct es58x_device *es58x_dev,
+				  struct urb *urb)
+{
+	union es58x_urb_cmd *urb_cmd;
+	u8 *raw_cmd = urb->transfer_buffer;
+	s32 raw_cmd_len = urb->actual_length;
+	int ret;
+
+	if (es58x_dev->rx_cmd_buf_len != 0) {
+		ret = es58x_handle_incomplete_cmd(es58x_dev, urb);
+		if (ret != -ENODATA)
+			es58x_dev->rx_cmd_buf_len = 0;
+		if (ret < 0)
+			return ret;
+
+		raw_cmd += ret;
+		raw_cmd_len -= ret;
+	}
+
+	while (raw_cmd_len > 0) {
+		if (raw_cmd[0] == ES58X_HEARTBEAT) {
+			raw_cmd++;
+			raw_cmd_len--;
+			continue;
+		}
+		urb_cmd = (union es58x_urb_cmd *)raw_cmd;
+		ret = es58x_check_rx_urb(es58x_dev, urb_cmd, raw_cmd_len);
+		if (ret > 0) {
+			es58x_handle_urb_cmd(es58x_dev, urb_cmd);
+		} else if (ret == -ENODATA) {
+			es58x_copy_to_cmd_buf(es58x_dev, raw_cmd, raw_cmd_len);
+			return -ENODATA;
+		} else if (ret < 0) {
+			ret = es58x_split_urb_try_recovery(es58x_dev, raw_cmd,
+							   raw_cmd_len);
+			if (ret < 0)
+				return ret;
+		}
+		raw_cmd += ret;
+		raw_cmd_len -= ret;
+	}
+
+	return 0;
+}
+
+/**
+ * es58x_read_bulk_callback() - Callback for reading data from device.
+ * @urb: last urb buffer received.
+ *
+ * This function gets eventually called each time an URB is received
+ * from the ES58X device.
+ *
+ * Checks urb status, calls read function and resubmits urb read
+ * operation.
+ */
+static void es58x_read_bulk_callback(struct urb *urb)
+{
+	struct es58x_device *es58x_dev = urb->context;
+	const struct device *dev = es58x_dev->dev;
+	int i, ret;
+
+	switch (urb->status) {
+	case 0:		/* success */
+		break;
+
+	case -EOVERFLOW:
+		dev_err_ratelimited(dev, "%s: error %pe\n",
+				    __func__, ERR_PTR(urb->status));
+		es58x_print_hex_dump_debug(urb->transfer_buffer,
+					   urb->transfer_buffer_length);
+		goto resubmit_urb;
+
+	case -EPROTO:
+		dev_warn_ratelimited(dev, "%s: error %pe. Device unplugged?\n",
+				     __func__, ERR_PTR(urb->status));
+		goto free_urb;
+
+	case -ENOENT:
+	case -EPIPE:
+		dev_err_ratelimited(dev, "%s: error %pe\n",
+				    __func__, ERR_PTR(urb->status));
+		goto free_urb;
+
+	case -ESHUTDOWN:
+		dev_dbg_ratelimited(dev, "%s: error %pe\n",
+				    __func__, ERR_PTR(urb->status));
+		goto free_urb;
+
+	default:
+		dev_err_ratelimited(dev, "%s: error %pe\n",
+				    __func__, ERR_PTR(urb->status));
+		goto resubmit_urb;
+	}
+
+	ret = es58x_split_urb(es58x_dev, urb);
+	if ((ret != -ENODATA) && ret < 0) {
+		dev_err(es58x_dev->dev, "es58x_split_urb() returned error %pe",
+			ERR_PTR(ret));
+		es58x_print_hex_dump_debug(urb->transfer_buffer,
+					   urb->actual_length);
+
+		/* Because the urb command could not be parsed,
+		 * channel_id is not confirmed. Incrementing rx_errors
+		 * count of all channels.
+		 */
+		es58x_increment_rx_errors(es58x_dev);
+	}
+
+ resubmit_urb:
+	usb_fill_bulk_urb(urb, es58x_dev->udev, es58x_dev->rx_pipe,
+			  urb->transfer_buffer, urb->transfer_buffer_length,
+			  es58x_read_bulk_callback, es58x_dev);
+
+	ret = usb_submit_urb(urb, GFP_ATOMIC);
+	if (ret == -ENODEV) {
+		for (i = 0; i < es58x_dev->num_can_ch; i++)
+			if (es58x_dev->netdev[i])
+				netif_device_detach(es58x_dev->netdev[i]);
+	} else if (ret)
+		dev_err_ratelimited(dev,
+				    "Failed resubmitting read bulk urb: %pe\n",
+				    ERR_PTR(ret));
+	return;
+
+ free_urb:
+	usb_free_coherent(urb->dev, urb->transfer_buffer_length,
+			  urb->transfer_buffer, urb->transfer_dma);
+}
+
+/**
+ * es58x_write_bulk_callback() - Callback after writing data to the device.
+ * @urb: urb buffer which was previously submitted.
+ *
+ * This function gets eventually called each time an URB was sent to
+ * the ES58X device.
+ *
+ * Puts the @urb back to the urbs idle anchor and tries to restart the
+ * network queue.
+ */
+static void es58x_write_bulk_callback(struct urb *urb)
+{
+	struct net_device *netdev = urb->context;
+	struct es58x_device *es58x_dev = es58x_priv(netdev)->es58x_dev;
+
+	switch (urb->status) {
+	case 0:		/* success */
+		break;
+
+	case -EOVERFLOW:
+		if (net_ratelimit())
+			netdev_err(netdev, "%s: error %pe\n",
+				   __func__, ERR_PTR(urb->status));
+		es58x_print_hex_dump(urb->transfer_buffer,
+				     urb->transfer_buffer_length);
+		break;
+
+	case -ENOENT:
+		if (net_ratelimit())
+			netdev_dbg(netdev, "%s: error %pe\n",
+				   __func__, ERR_PTR(urb->status));
+		usb_free_coherent(urb->dev,
+				  es58x_dev->param->tx_urb_cmd_max_len,
+				  urb->transfer_buffer, urb->transfer_dma);
+		return;
+
+	default:
+		if (net_ratelimit())
+			netdev_info(netdev, "%s: error %pe\n",
+				    __func__, ERR_PTR(urb->status));
+		break;
+	}
+
+	usb_anchor_urb(urb, &es58x_dev->tx_urbs_idle);
+	atomic_inc(&es58x_dev->tx_urbs_idle_cnt);
+}
+
+/**
+ * es58x_alloc_urb() - Allocate memory for an URB and its transfer
+ *	buffer.
+ * @es58x_dev: ES58X device.
+ * @urb: URB to be allocated.
+ * @buf: used to return DMA address of buffer.
+ * @buf_len: requested buffer size.
+ * @mem_flags: affect whether allocation may block.
+ *
+ * Allocates an URB and its @transfer_buffer and set its @transfer_dma
+ * address.
+ *
+ * This function is used at start-up to allocate all RX URBs at once
+ * and during run time for TX URBs.
+ *
+ * Return: zero on success, -ENOMEM if no memory is available.
+ */
+static int es58x_alloc_urb(struct es58x_device *es58x_dev, struct urb **urb,
+			   u8 **buf, size_t buf_len, gfp_t mem_flags)
+{
+	*urb = usb_alloc_urb(0, mem_flags);
+	if (!*urb) {
+		dev_err(es58x_dev->dev, "No memory left for URBs\n");
+		return -ENOMEM;
+	}
+
+	*buf = usb_alloc_coherent(es58x_dev->udev, buf_len,
+				  mem_flags, &(*urb)->transfer_dma);
+	if (!*buf) {
+		dev_err(es58x_dev->dev, "No memory left for USB buffer\n");
+		usb_free_urb(*urb);
+		return -ENOMEM;
+	}
+
+	(*urb)->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+
+	return 0;
+}
+
+/**
+ * es58x_get_tx_urb() - Get an URB for transmission.
+ * @es58x_dev: ES58X device.
+ *
+ * Gets an URB from the idle urbs anchor or allocate a new one if the
+ * anchor is empty.
+ *
+ * If there are more than ES58X_TX_URBS_MAX in the idle anchor, do
+ * some garbage collection. The garbage collection is done here
+ * instead of within es58x_write_bulk_callback() because
+ * usb_free_coherent() should not be used in IRQ context:
+ * c.f. WARN_ON(irqs_disabled()) in dma_free_attrs().
+ *
+ * Return: a pointer to an URB on success, NULL if no memory is
+ * available.
+ */
+static struct urb *es58x_get_tx_urb(struct es58x_device *es58x_dev)
+{
+	atomic_t *idle_cnt = &es58x_dev->tx_urbs_idle_cnt;
+	struct urb *urb = usb_get_from_anchor(&es58x_dev->tx_urbs_idle);
+
+	if (!urb) {
+		size_t tx_buf_len;
+		u8 *buf;
+
+		tx_buf_len = es58x_dev->param->tx_urb_cmd_max_len;
+		if (es58x_alloc_urb(es58x_dev, &urb, &buf, tx_buf_len,
+				    GFP_ATOMIC))
+			return NULL;
+
+		usb_fill_bulk_urb(urb, es58x_dev->udev, es58x_dev->tx_pipe,
+				  buf, tx_buf_len, NULL, NULL);
+		return urb;
+	}
+
+	while (atomic_dec_return(idle_cnt) > ES58X_TX_URBS_MAX) {
+		/* Garbage collector */
+		struct urb *tmp = usb_get_from_anchor(&es58x_dev->tx_urbs_idle);
+
+		if (!tmp)
+			break;
+		usb_free_coherent(tmp->dev,
+				  es58x_dev->param->tx_urb_cmd_max_len,
+				  tmp->transfer_buffer, tmp->transfer_dma);
+		usb_free_urb(tmp);
+	}
+
+	return urb;
+}
+
+/**
+ * es58x_submit_urb() - Send data to the device.
+ * @es58x_dev: ES58X device.
+ * @urb: URB to be sent.
+ * @netdev: CAN network device.
+ *
+ * Return: zero on success, errno when any error occurs.
+ */
+static int es58x_submit_urb(struct es58x_device *es58x_dev, struct urb *urb,
+			    struct net_device *netdev)
+{
+	int ret;
+
+	es58x_set_crc(urb->transfer_buffer, urb->transfer_buffer_length);
+	usb_fill_bulk_urb(urb, es58x_dev->udev, es58x_dev->tx_pipe,
+			  urb->transfer_buffer, urb->transfer_buffer_length,
+			  es58x_write_bulk_callback, netdev);
+	usb_anchor_urb(urb, &es58x_dev->tx_urbs_busy);
+	ret = usb_submit_urb(urb, GFP_ATOMIC);
+	if (ret) {
+		netdev_err(netdev, "%s: USB send urb failure: %pe\n",
+			   __func__, ERR_PTR(ret));
+		usb_unanchor_urb(urb);
+		usb_free_coherent(urb->dev,
+				  es58x_dev->param->tx_urb_cmd_max_len,
+				  urb->transfer_buffer, urb->transfer_dma);
+	}
+	usb_free_urb(urb);
+
+	return ret;
+}
+
+/**
+ * es58x_send_msg() - Prepare an URB and submit it.
+ * @es58x_dev: ES58X device.
+ * @cmd_type: Command type.
+ * @cmd_id: Command ID.
+ * @msg: ES58X message to be sent.
+ * @msg_len: Length of @msg.
+ * @channel_idx: Index of the network device.
+ *
+ * Creates an URB command from a given message, sets the header and the
+ * CRC and then submits it.
+ *
+ * Return: zero on success, errno when any error occurs.
+ */
+int es58x_send_msg(struct es58x_device *es58x_dev, u8 cmd_type, u8 cmd_id,
+		   const void *msg, u16 msg_len, int channel_idx)
+{
+	struct net_device *netdev;
+	union es58x_urb_cmd *urb_cmd;
+	struct urb *urb;
+	int urb_cmd_len;
+
+	if (channel_idx == ES58X_CHANNEL_IDX_NA)
+		netdev = es58x_dev->netdev[0];	/* Default to first channel */
+	else
+		netdev = es58x_dev->netdev[channel_idx];
+
+	urb_cmd_len = es58x_get_urb_cmd_len(es58x_dev, msg_len);
+	if (urb_cmd_len > es58x_dev->param->tx_urb_cmd_max_len)
+		return -EOVERFLOW;
+
+	urb = es58x_get_tx_urb(es58x_dev);
+	if (!urb)
+		return -ENOMEM;
+
+	urb_cmd = urb->transfer_buffer;
+	es58x_dev->ops->fill_urb_header(urb_cmd, cmd_type, cmd_id,
+					channel_idx, msg_len);
+	memcpy(&urb_cmd->raw_cmd[es58x_dev->param->urb_cmd_header_len],
+	       msg, msg_len);
+	urb->transfer_buffer_length = urb_cmd_len;
+
+	return es58x_submit_urb(es58x_dev, urb, netdev);
+}
+
+/**
+ * es58x_alloc_rx_urbs() - Allocate RX URBs.
+ * @es58x_dev: ES58X device.
+ *
+ * Allocate URBs for reception and anchor them.
+ *
+ * Return: zero on success, errno when any error occurs.
+ */
+static int es58x_alloc_rx_urbs(struct es58x_device *es58x_dev)
+{
+	const struct device *dev = es58x_dev->dev;
+	const struct es58x_parameters *param = es58x_dev->param;
+	size_t rx_buf_len = es58x_dev->rx_max_packet_size;
+	struct urb *urb;
+	u8 *buf;
+	int i;
+	int ret = -EINVAL;
+
+	for (i = 0; i < param->rx_urb_max; i++) {
+		ret = es58x_alloc_urb(es58x_dev, &urb, &buf, rx_buf_len,
+				      GFP_KERNEL);
+		if (ret)
+			break;
+
+		usb_fill_bulk_urb(urb, es58x_dev->udev, es58x_dev->rx_pipe,
+				  buf, rx_buf_len, es58x_read_bulk_callback,
+				  es58x_dev);
+		usb_anchor_urb(urb, &es58x_dev->rx_urbs);
+
+		ret = usb_submit_urb(urb, GFP_KERNEL);
+		if (ret) {
+			usb_unanchor_urb(urb);
+			usb_free_coherent(es58x_dev->udev, rx_buf_len,
+					  buf, urb->transfer_dma);
+			usb_free_urb(urb);
+			break;
+		}
+		usb_free_urb(urb);
+	}
+
+	if (i == 0) {
+		dev_err(dev, "%s: Could not setup any rx URBs\n", __func__);
+		return ret;
+	}
+	dev_dbg(dev, "%s: Allocated %d rx URBs each of size %zu\n",
+		__func__, i, rx_buf_len);
+
+	return ret;
+}
+
+/**
+ * es58x_free_urbs() - Free all the TX and RX URBs.
+ * @es58x_dev: ES58X device.
+ */
+static void es58x_free_urbs(struct es58x_device *es58x_dev)
+{
+	struct urb *urb;
+
+	if (!usb_wait_anchor_empty_timeout(&es58x_dev->tx_urbs_busy, 1000)) {
+		dev_err(es58x_dev->dev, "%s: Timeout, some TX urbs still remain\n",
+			__func__);
+		usb_kill_anchored_urbs(&es58x_dev->tx_urbs_busy);
+	}
+
+	while ((urb = usb_get_from_anchor(&es58x_dev->tx_urbs_idle)) != NULL) {
+		usb_free_coherent(urb->dev, es58x_dev->param->tx_urb_cmd_max_len,
+				  urb->transfer_buffer, urb->transfer_dma);
+		usb_free_urb(urb);
+		atomic_dec(&es58x_dev->tx_urbs_idle_cnt);
+	}
+	if (atomic_read(&es58x_dev->tx_urbs_idle_cnt))
+		dev_err(es58x_dev->dev,
+			"All idle urbs were freed but tx_urb_idle_cnt is %d\n",
+			atomic_read(&es58x_dev->tx_urbs_idle_cnt));
+
+	usb_kill_anchored_urbs(&es58x_dev->rx_urbs);
+}
+
+/**
+ * es58x_open() - Enable the network device.
+ * @netdev: CAN network device.
+ *
+ * Called when the network transitions to the up state. Allocate the
+ * URB resources if needed and open the channel.
+ *
+ * Return: zero on success, errno when any error occurs.
+ */
+static int es58x_open(struct net_device *netdev)
+{
+	struct es58x_device *es58x_dev = es58x_priv(netdev)->es58x_dev;
+	int ret;
+
+	if (atomic_inc_return(&es58x_dev->opened_channel_cnt) == 1) {
+		ret = es58x_alloc_rx_urbs(es58x_dev);
+		if (ret)
+			return ret;
+
+		ret = es58x_set_realtime_diff_ns(es58x_dev);
+		if (ret)
+			goto free_urbs;
+	}
+
+	ret = open_candev(netdev);
+	if (ret)
+		goto free_urbs;
+
+	ret = es58x_dev->ops->enable_channel(es58x_priv(netdev));
+	if (ret)
+		goto free_urbs;
+
+	netif_start_queue(netdev);
+
+	return ret;
+
+ free_urbs:
+	if (atomic_dec_and_test(&es58x_dev->opened_channel_cnt))
+		es58x_free_urbs(es58x_dev);
+	netdev_err(netdev, "%s: Could not open the network device: %pe\n",
+		   __func__, ERR_PTR(ret));
+
+	return ret;
+}
+
+/**
+ * es58x_stop() - Disable the network device.
+ * @netdev: CAN network device.
+ *
+ * Called when the network transitions to the down state. If all the
+ * channels of the device are closed, free the URB resources which are
+ * not needed anymore.
+ *
+ * Return: zero on success, errno when any error occurs.
+ */
+static int es58x_stop(struct net_device *netdev)
+{
+	struct es58x_priv *priv = es58x_priv(netdev);
+	struct es58x_device *es58x_dev = priv->es58x_dev;
+	int ret;
+
+	netif_stop_queue(netdev);
+	ret = es58x_dev->ops->disable_channel(priv);
+	if (ret)
+		return ret;
+
+	priv->can.state = CAN_STATE_STOPPED;
+	es58x_can_reset_echo_fifo(netdev);
+	close_candev(netdev);
+
+	es58x_flush_pending_tx_msg(netdev);
+
+	if (atomic_dec_and_test(&es58x_dev->opened_channel_cnt))
+		es58x_free_urbs(es58x_dev);
+
+	return 0;
+}
+
+/**
+ * es58x_xmit_commit() - Send the bulk urb.
+ * @netdev: CAN network device.
+ *
+ * Do the bulk send. This function should be called only once by bulk
+ * transmission.
+ *
+ * Return: zero on success, errno when any error occurs.
+ */
+static int es58x_xmit_commit(struct net_device *netdev)
+{
+	struct es58x_priv *priv = es58x_priv(netdev);
+	int ret;
+
+	if (!es58x_is_can_state_active(netdev))
+		return -ENETDOWN;
+
+	if (es58x_is_echo_skb_threshold_reached(priv))
+		netif_stop_queue(netdev);
+
+	ret = es58x_submit_urb(priv->es58x_dev, priv->tx_urb, netdev);
+	if (ret == 0)
+		priv->tx_urb = NULL;
+
+	return ret;
+}
+
+/**
+ * es58x_xmit_more() - Can we put more packets?
+ * @priv: ES58X private parameters related to the network device.
+ *
+ * Return: true if we can put more, false if it is time to send.
+ */
+static bool es58x_xmit_more(struct es58x_priv *priv)
+{
+	unsigned int free_slots =
+	    priv->can.echo_skb_max - (priv->tx_head - priv->tx_tail);
+
+	return netdev_xmit_more() && free_slots > 0 &&
+		priv->tx_can_msg_cnt < priv->es58x_dev->param->tx_bulk_max;
+}
+
+/**
+ * es58x_start_xmit() - Transmit an skb.
+ * @skb: socket buffer of a CAN message.
+ * @netdev: CAN network device.
+ *
+ * Called when a packet needs to be transmitted.
+ *
+ * This function relies on Byte Queue Limits (BQL). The main benefit
+ * is to increase the throughput by allowing bulk transfers
+ * (c.f. xmit_more flag).
+ *
+ * Queues up to tx_bulk_max messages in &tx_urb buffer and does
+ * a bulk send of all messages in one single URB.
+ *
+ * Return: NETDEV_TX_OK regardless of if we could transmit the @skb or
+ *	had to drop it.
+ */
+static netdev_tx_t es58x_start_xmit(struct sk_buff *skb,
+				    struct net_device *netdev)
+{
+	struct es58x_priv *priv = es58x_priv(netdev);
+	struct es58x_device *es58x_dev = priv->es58x_dev;
+	unsigned int frame_len;
+	int ret;
+
+	if (can_dropped_invalid_skb(netdev, skb)) {
+		if (priv->tx_urb)
+			goto xmit_commit;
+		return NETDEV_TX_OK;
+	}
+
+	if (priv->tx_urb && priv->tx_can_msg_is_fd != can_is_canfd_skb(skb)) {
+		/* Can not do bulk send with mixed CAN and CAN FD frames. */
+		ret = es58x_xmit_commit(netdev);
+		if (ret)
+			goto drop_skb;
+	}
+
+	if (!priv->tx_urb) {
+		priv->tx_urb = es58x_get_tx_urb(es58x_dev);
+		if (!priv->tx_urb) {
+			ret = -ENOMEM;
+			goto drop_skb;
+		}
+		priv->tx_can_msg_cnt = 0;
+		priv->tx_can_msg_is_fd = can_is_canfd_skb(skb);
+	}
+
+	ret = es58x_dev->ops->tx_can_msg(priv, skb);
+	if (ret)
+		goto drop_skb;
+
+	frame_len = can_skb_get_frame_len(skb);
+	ret = can_put_echo_skb(skb, netdev,
+			       priv->tx_head & es58x_dev->param->fifo_mask,
+			       frame_len);
+	if (ret)
+		goto xmit_failure;
+	netdev_sent_queue(netdev, frame_len);
+
+	priv->tx_head++;
+	priv->tx_can_msg_cnt++;
+
+ xmit_commit:
+	if (!es58x_xmit_more(priv)) {
+		ret = es58x_xmit_commit(netdev);
+		if (ret)
+			goto xmit_failure;
+	}
+
+	return NETDEV_TX_OK;
+
+ drop_skb:
+	dev_kfree_skb(skb);
+	netdev->stats.tx_dropped++;
+ xmit_failure:
+	netdev_warn(netdev, "%s: send message failure: %pe\n",
+		    __func__, ERR_PTR(ret));
+	netdev->stats.tx_errors++;
+	es58x_flush_pending_tx_msg(netdev);
+	return NETDEV_TX_OK;
+}
+
+static const struct net_device_ops es58x_netdev_ops = {
+	.ndo_open = es58x_open,
+	.ndo_stop = es58x_stop,
+	.ndo_start_xmit = es58x_start_xmit
+};
+
+/**
+ * es58x_set_mode() - Change network device mode.
+ * @netdev: CAN network device.
+ * @mode: either %CAN_MODE_START, %CAN_MODE_STOP or %CAN_MODE_SLEEP
+ *
+ * Currently, this function is only used to stop and restart the
+ * channel during a bus off event (c.f. es58x_rx_err_msg() and
+ * drivers/net/can/dev.c:can_restart() which are the two only
+ * callers).
+ *
+ * Return: zero on success, errno when any error occurs.
+ */
+static int es58x_set_mode(struct net_device *netdev, enum can_mode mode)
+{
+	struct es58x_priv *priv = es58x_priv(netdev);
+
+	switch (mode) {
+	case CAN_MODE_START:
+		switch (priv->can.state) {
+		case CAN_STATE_BUS_OFF:
+			return priv->es58x_dev->ops->enable_channel(priv);
+
+		case CAN_STATE_STOPPED:
+			return es58x_open(netdev);
+
+		case CAN_STATE_ERROR_ACTIVE:
+		case CAN_STATE_ERROR_WARNING:
+		case CAN_STATE_ERROR_PASSIVE:
+		default:
+			return 0;
+		}
+
+	case CAN_MODE_STOP:
+		switch (priv->can.state) {
+		case CAN_STATE_STOPPED:
+			return 0;
+
+		case CAN_STATE_ERROR_ACTIVE:
+		case CAN_STATE_ERROR_WARNING:
+		case CAN_STATE_ERROR_PASSIVE:
+		case CAN_STATE_BUS_OFF:
+		default:
+			return priv->es58x_dev->ops->disable_channel(priv);
+		}
+
+	case CAN_MODE_SLEEP:
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/**
+ * es58x_init_priv() - Initialize private parameters.
+ * @es58x_dev: ES58X device.
+ * @priv: ES58X private parameters related to the network device.
+ * @channel_idx: Index of the network device.
+ */
+static void es58x_init_priv(struct es58x_device *es58x_dev,
+			    struct es58x_priv *priv, int channel_idx)
+{
+	const struct es58x_parameters *param = es58x_dev->param;
+	struct can_priv *can = &priv->can;
+
+	priv->es58x_dev = es58x_dev;
+	priv->channel_idx = channel_idx;
+	priv->tx_urb = NULL;
+	priv->tx_can_msg_cnt = 0;
+
+	can->bittiming_const = param->bittiming_const;
+	if (param->ctrlmode_supported & CAN_CTRLMODE_FD) {
+		can->data_bittiming_const = param->data_bittiming_const;
+		can->tdc_const = param->tdc_const;
+	}
+	can->bitrate_max = param->bitrate_max;
+	can->clock = param->clock;
+	can->state = CAN_STATE_STOPPED;
+	can->ctrlmode_supported = param->ctrlmode_supported;
+	can->do_set_mode = es58x_set_mode;
+}
+
+/**
+ * es58x_init_netdev() - Initialize the network device.
+ * @es58x_dev: ES58X device.
+ * @channel_idx: Index of the network device.
+ *
+ * Return: zero on success, errno when any error occurs.
+ */
+static int es58x_init_netdev(struct es58x_device *es58x_dev, int channel_idx)
+{
+	struct net_device *netdev;
+	struct device *dev = es58x_dev->dev;
+	int ret;
+
+	netdev = alloc_candev(sizeof(struct es58x_priv),
+			      es58x_dev->param->fifo_mask + 1);
+	if (!netdev) {
+		dev_err(dev, "Could not allocate candev\n");
+		return -ENOMEM;
+	}
+	SET_NETDEV_DEV(netdev, dev);
+	es58x_dev->netdev[channel_idx] = netdev;
+	es58x_init_priv(es58x_dev, es58x_priv(netdev), channel_idx);
+
+	netdev->netdev_ops = &es58x_netdev_ops;
+	netdev->flags |= IFF_ECHO;	/* We support local echo */
+
+	ret = register_candev(netdev);
+	if (ret)
+		return ret;
+
+	netdev_queue_set_dql_min_limit(netdev_get_tx_queue(netdev, 0),
+				       es58x_dev->param->dql_min_limit);
+
+	return ret;
+}
+
+/**
+ * es58x_get_product_info() - Get the product information and print them.
+ * @es58x_dev: ES58X device.
+ *
+ * Do a synchronous call to get the product information.
+ *
+ * Return: zero on success, errno when any error occurs.
+ */
+static int es58x_get_product_info(struct es58x_device *es58x_dev)
+{
+	struct usb_device *udev = es58x_dev->udev;
+	const int es58x_prod_info_idx = 6;
+	/* Empirical tests show a prod_info length of maximum 83,
+	 * below should be more than enough.
+	 */
+	const size_t prod_info_len = 127;
+	char *prod_info;
+	int ret;
+
+	prod_info = kmalloc(prod_info_len, GFP_KERNEL);
+	if (!prod_info)
+		return -ENOMEM;
+
+	ret = usb_string(udev, es58x_prod_info_idx, prod_info, prod_info_len);
+	if (ret < 0) {
+		dev_err(es58x_dev->dev,
+			"%s: Could not read the product info: %pe\n",
+			__func__, ERR_PTR(ret));
+		goto out_free;
+	}
+	if (ret >= prod_info_len - 1) {
+		dev_warn(es58x_dev->dev,
+			 "%s: Buffer is too small, result might be truncated\n",
+			 __func__);
+	}
+	dev_info(es58x_dev->dev, "Product info: %s\n", prod_info);
+
+ out_free:
+	kfree(prod_info);
+	return ret < 0 ? ret : 0;
+}
+
+/**
+ * es58x_init_es58x_dev() - Initialize the ES58X device.
+ * @intf: USB interface.
+ * @p_es58x_dev: pointer to the address of the ES58X device.
+ * @driver_info: Quirks of the device.
+ *
+ * Return: zero on success, errno when any error occurs.
+ */
+static int es58x_init_es58x_dev(struct usb_interface *intf,
+				struct es58x_device **p_es58x_dev,
+				kernel_ulong_t driver_info)
+{
+	struct device *dev = &intf->dev;
+	struct es58x_device *es58x_dev;
+	const struct es58x_parameters *param;
+	const struct es58x_operators *ops;
+	struct usb_device *udev = interface_to_usbdev(intf);
+	struct usb_endpoint_descriptor *ep_in, *ep_out;
+	int ret;
+
+	dev_info(dev,
+		 "Starting %s %s (Serial Number %s) driver version %s\n",
+		 udev->manufacturer, udev->product, udev->serial, DRV_VERSION);
+
+	ret = usb_find_common_endpoints(intf->cur_altsetting, &ep_in, &ep_out,
+					NULL, NULL);
+	if (ret)
+		return ret;
+
+	if (driver_info & ES58X_FD_FAMILY) {
+		return -ENODEV;
+		/* Place holder for es58x_fd glue code. */
+	} else {
+		return -ENODEV;
+		/* Place holder for es581_4 glue code. */
+	}
+
+	es58x_dev = kzalloc(es58x_sizeof_es58x_device(param), GFP_KERNEL);
+	if (!es58x_dev)
+		return -ENOMEM;
+
+	es58x_dev->param = param;
+	es58x_dev->ops = ops;
+	es58x_dev->dev = dev;
+	es58x_dev->udev = udev;
+
+	if (driver_info & ES58X_DUAL_CHANNEL)
+		es58x_dev->num_can_ch = 2;
+	else
+		es58x_dev->num_can_ch = 1;
+
+	init_usb_anchor(&es58x_dev->rx_urbs);
+	init_usb_anchor(&es58x_dev->tx_urbs_idle);
+	init_usb_anchor(&es58x_dev->tx_urbs_busy);
+	atomic_set(&es58x_dev->tx_urbs_idle_cnt, 0);
+	atomic_set(&es58x_dev->opened_channel_cnt, 0);
+	usb_set_intfdata(intf, es58x_dev);
+
+	es58x_dev->rx_pipe = usb_rcvbulkpipe(es58x_dev->udev,
+					     ep_in->bEndpointAddress);
+	es58x_dev->tx_pipe = usb_sndbulkpipe(es58x_dev->udev,
+					     ep_out->bEndpointAddress);
+	es58x_dev->rx_max_packet_size = le16_to_cpu(ep_in->wMaxPacketSize);
+
+	*p_es58x_dev = es58x_dev;
+
+	return 0;
+}
+
+/**
+ * es58x_probe() - Initialize the USB device.
+ * @intf: USB interface.
+ * @id: USB device ID.
+ *
+ * Return: zero on success, -ENODEV if the interface is not supported
+ * or errno when any other error occurs.
+ */
+static int es58x_probe(struct usb_interface *intf,
+		       const struct usb_device_id *id)
+{
+	struct es58x_device *es58x_dev;
+	int ch_idx, ret;
+
+	ret = es58x_init_es58x_dev(intf, &es58x_dev, id->driver_info);
+	if (ret)
+		return ret;
+
+	ret = es58x_get_product_info(es58x_dev);
+	if (ret)
+		goto cleanup_es58x_dev;
+
+	for (ch_idx = 0; ch_idx < es58x_dev->num_can_ch; ch_idx++) {
+		ret = es58x_init_netdev(es58x_dev, ch_idx);
+		if (ret)
+			goto cleanup_candev;
+	}
+
+	return ret;
+
+ cleanup_candev:
+	for (ch_idx = 0; ch_idx < es58x_dev->num_can_ch; ch_idx++)
+		if (es58x_dev->netdev[ch_idx]) {
+			unregister_candev(es58x_dev->netdev[ch_idx]);
+			free_candev(es58x_dev->netdev[ch_idx]);
+		}
+ cleanup_es58x_dev:
+	kfree(es58x_dev);
+
+	return ret;
+}
+
+/**
+ * es58x_disconnect() - Disconnect the USB device.
+ * @intf: USB interface
+ *
+ * Called by the usb core when driver is unloaded or device is
+ * removed.
+ */
+static void es58x_disconnect(struct usb_interface *intf)
+{
+	struct es58x_device *es58x_dev = usb_get_intfdata(intf);
+	struct net_device *netdev;
+	int i;
+
+	dev_info(&intf->dev, "Disconnecting %s %s\n",
+		 es58x_dev->udev->manufacturer, es58x_dev->udev->product);
+
+	for (i = 0; i < es58x_dev->num_can_ch; i++) {
+		netdev = es58x_dev->netdev[i];
+		if (!netdev)
+			continue;
+		unregister_candev(netdev);
+		es58x_dev->netdev[i] = NULL;
+		free_candev(netdev);
+	}
+
+	es58x_free_urbs(es58x_dev);
+
+	kfree(es58x_dev);
+	usb_set_intfdata(intf, NULL);
+}
+
+static struct usb_driver es58x_driver = {
+	.name = ES58X_MODULE_NAME,
+	.probe = es58x_probe,
+	.disconnect = es58x_disconnect,
+	.id_table = es58x_id_table
+};
+
+module_usb_driver(es58x_driver);
diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.h b/drivers/net/can/usb/etas_es58x/es58x_core.h
new file mode 100644
index 000000000000..ccced39d5d81
--- /dev/null
+++ b/drivers/net/can/usb/etas_es58x/es58x_core.h
@@ -0,0 +1,693 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/* Driver for ETAS GmbH ES58X USB CAN(-FD) Bus Interfaces.
+ *
+ * File es58x_core.h: All common definitions and declarations.
+ *
+ * Copyright (c) 2019 Robert Bosch Engineering and Business Solutions. All rights reserved.
+ * Copyright (c) 2020 ETAS K.K.. All rights reserved.
+ * Copyright (c) 2020, 2021 Vincent Mailhol <mailhol.vincent@wanadoo.fr>
+ */
+
+#ifndef __ES58X_COMMON_H__
+#define __ES58X_COMMON_H__
+
+#include <linux/types.h>
+#include <linux/usb.h>
+#include <linux/netdevice.h>
+#include <linux/can.h>
+#include <linux/can/dev.h>
+
+/* Driver constants */
+#define ES58X_RX_URBS_MAX 5	/* Empirical value */
+#define ES58X_TX_URBS_MAX 6	/* Empirical value */
+
+#define ES58X_MAX(param) 0
+#define ES58X_TX_BULK_MAX ES58X_MAX(TX_BULK_MAX)
+#define ES58X_RX_BULK_MAX ES58X_MAX(RX_BULK_MAX)
+#define ES58X_ECHO_BULK_MAX ES58X_MAX(ECHO_BULK_MAX)
+#define ES58X_NUM_CAN_CH_MAX ES58X_MAX(NUM_CAN_CH)
+
+/* Use this when channel index is irrelevant (e.g. device
+ * timestamp).
+ */
+#define ES58X_CHANNEL_IDX_NA 0xFF
+#define ES58X_EMPTY_MSG NULL
+
+/* Threshold on consecutive CAN_STATE_ERROR_PASSIVE. If we receive
+ * ES58X_CONSECUTIVE_ERR_PASSIVE_MAX times the event
+ * ES58X_ERR_CRTL_PASSIVE in a row without any successful RX or TX,
+ * we force the device to switch to CAN_STATE_BUS_OFF state.
+ */
+#define ES58X_CONSECUTIVE_ERR_PASSIVE_MAX 254
+
+/* A magic number sent by the ES581.4 to inform it is alive. */
+#define ES58X_HEARTBEAT 0x11
+
+/**
+ * enum es58x_driver_info - Quirks of the device.
+ * @ES58X_DUAL_CHANNEL: Device has two CAN channels. If this flag is
+ *	not set, it is implied that the device has only one CAN
+ *	channel.
+ * @ES58X_FD_FAMILY: Device is CAN-FD capable. If this flag is not
+ *	set, the device only supports classical CAN.
+ */
+enum es58x_driver_info {
+	ES58X_DUAL_CHANNEL = BIT(0),
+	ES58X_FD_FAMILY = BIT(1)
+};
+
+enum es58x_echo {
+	ES58X_ECHO_OFF = 0,
+	ES58X_ECHO_ON = 1
+};
+
+/**
+ * enum es58x_physical_layer - Type of the physical layer.
+ * @ES58X_PHYSICAL_LAYER_HIGH_SPEED: High-speed CAN (c.f. ISO
+ *	11898-2).
+ *
+ * Some products of the ETAS portfolio also support low-speed CAN
+ * (c.f. ISO 11898-3). However, all the devices in scope of this
+ * driver do not support the option, thus, the enum has only one
+ * member.
+ */
+enum es58x_physical_layer {
+	ES58X_PHYSICAL_LAYER_HIGH_SPEED = 1
+};
+
+enum es58x_samples_per_bit {
+	ES58X_SAMPLES_PER_BIT_ONE = 1,
+	ES58X_SAMPLES_PER_BIT_THREE = 2
+};
+
+/**
+ * enum es58x_sync_edge - Synchronization method.
+ * @ES58X_SYNC_EDGE_SINGLE: ISO CAN specification defines the use of a
+ *	single edge synchronization.  The synchronization should be
+ *	done on recessive to dominant level change.
+ *
+ * For information, ES582.1 and ES584.1 also support a double
+ * synchronization, requiring both recessive to dominant then dominant
+ * to recessive level change. However, this is not supported in
+ * SocketCAN framework, thus, the enum has only one member.
+ */
+enum es58x_sync_edge {
+	ES58X_SYNC_EDGE_SINGLE = 1
+};
+
+/**
+ * enum es58x_flag - CAN flags for RX/TX messages.
+ * @ES58X_FLAG_EFF: Extended Frame Format (EFF).
+ * @ES58X_FLAG_RTR: Remote Transmission Request (RTR).
+ * @ES58X_FLAG_FD_BRS: Bit rate switch (BRS): second bitrate for
+ *	payload data.
+ * @ES58X_FLAG_FD_ESI: Error State Indicator (ESI): tell if the
+ *	transmitting node is in error passive mode.
+ * @ES58X_FLAG_FD_DATA: CAN FD frame.
+ */
+enum es58x_flag {
+	ES58X_FLAG_EFF = BIT(0),
+	ES58X_FLAG_RTR = BIT(1),
+	ES58X_FLAG_FD_BRS = BIT(3),
+	ES58X_FLAG_FD_ESI = BIT(5),
+	ES58X_FLAG_FD_DATA = BIT(6)
+};
+
+/**
+ * enum es58x_err - CAN error detection.
+ * @ES58X_ERR_OK: No errors.
+ * @ES58X_ERR_PROT_STUFF: Bit stuffing error: more than 5 consecutive
+ *	equal bits.
+ * @ES58X_ERR_PROT_FORM: Frame format error.
+ * @ES58X_ERR_ACK: Received no ACK on transmission.
+ * @ES58X_ERR_PROT_BIT: Single bit error.
+ * @ES58X_ERR_PROT_CRC: Incorrect 15, 17 or 21 bits CRC.
+ * @ES58X_ERR_PROT_BIT1: Unable to send recessive bit: tried to send
+ *	recessive bit 1 but monitored dominant bit 0.
+ * @ES58X_ERR_PROT_BIT0: Unable to send dominant bit: tried to send
+ *	dominant bit 0 but monitored recessive bit 1.
+ * @ES58X_ERR_PROT_OVERLOAD: Bus overload.
+ * @ES58X_ERR_PROT_UNSPEC: Unspecified.
+ *
+ * Please refer to ISO 11898-1:2015, section 10.11 "Error detection"
+ * and section 10.13 "Overload signaling" for additional details.
+ */
+enum es58x_err {
+	ES58X_ERR_OK = 0,
+	ES58X_ERR_PROT_STUFF = BIT(0),
+	ES58X_ERR_PROT_FORM = BIT(1),
+	ES58X_ERR_ACK = BIT(2),
+	ES58X_ERR_PROT_BIT = BIT(3),
+	ES58X_ERR_PROT_CRC = BIT(4),
+	ES58X_ERR_PROT_BIT1 = BIT(5),
+	ES58X_ERR_PROT_BIT0 = BIT(6),
+	ES58X_ERR_PROT_OVERLOAD = BIT(7),
+	ES58X_ERR_PROT_UNSPEC = BIT(31)
+};
+
+/**
+ * enum es58x_event - CAN error codes returned by the device.
+ * @ES58X_EVENT_OK: No errors.
+ * @ES58X_EVENT_CRTL_ACTIVE: Active state: both TR and RX error count
+ *	is less than 128.
+ * @ES58X_EVENT_CRTL_PASSIVE: Passive state: either TX or RX error
+ *	count is greater than 127.
+ * @ES58X_EVENT_CRTL_WARNING: Warning state: either TX or RX error
+ *	count is greater than 96.
+ * @ES58X_EVENT_BUSOFF: Bus off.
+ * @ES58X_EVENT_SINGLE_WIRE: Lost connection on either CAN high or CAN
+ *	low.
+ *
+ * Please refer to ISO 11898-1:2015, section 12.1.4 "Rules of fault
+ * confinement" for additional details.
+ */
+enum es58x_event {
+	ES58X_EVENT_OK = 0,
+	ES58X_EVENT_CRTL_ACTIVE = BIT(0),
+	ES58X_EVENT_CRTL_PASSIVE = BIT(1),
+	ES58X_EVENT_CRTL_WARNING = BIT(2),
+	ES58X_EVENT_BUSOFF = BIT(3),
+	ES58X_EVENT_SINGLE_WIRE = BIT(4)
+};
+
+/* enum es58x_ret_u8 - Device return error codes, 8 bit format.
+ *
+ * Specific to ES581.4.
+ */
+enum es58x_ret_u8 {
+	ES58X_RET_U8_OK = 0x00,
+	ES58X_RET_U8_ERR_UNSPECIFIED_FAILURE = 0x80,
+	ES58X_RET_U8_ERR_NO_MEM = 0x81,
+	ES58X_RET_U8_ERR_BAD_CRC = 0x99
+};
+
+/* enum es58x_ret_u32 - Device return error codes, 32 bit format.
+ */
+enum es58x_ret_u32 {
+	ES58X_RET_U32_OK = 0x00000000UL,
+	ES58X_RET_U32_ERR_UNSPECIFIED_FAILURE = 0x80000000UL,
+	ES58X_RET_U32_ERR_NO_MEM = 0x80004001UL,
+	ES58X_RET_U32_WARN_PARAM_ADJUSTED = 0x40004000UL,
+	ES58X_RET_U32_WARN_TX_MAYBE_REORDER = 0x40004001UL,
+	ES58X_RET_U32_ERR_TIMEDOUT = 0x80000008UL,
+	ES58X_RET_U32_ERR_FIFO_FULL = 0x80003002UL,
+	ES58X_RET_U32_ERR_BAD_CONFIG = 0x80004000UL,
+	ES58X_RET_U32_ERR_NO_RESOURCE = 0x80004002UL
+};
+
+/* enum es58x_ret_type - Type of the command returned by the ES58X
+ *	device.
+ */
+enum es58x_ret_type {
+	ES58X_RET_TYPE_SET_BITTIMING,
+	ES58X_RET_TYPE_ENABLE_CHANNEL,
+	ES58X_RET_TYPE_DISABLE_CHANNEL,
+	ES58X_RET_TYPE_TX_MSG,
+	ES58X_RET_TYPE_RESET_RX,
+	ES58X_RET_TYPE_RESET_TX,
+	ES58X_RET_TYPE_DEVICE_ERR
+};
+
+union es58x_urb_cmd {
+	struct {		/* Common header parts of all variants */
+		__le16 sof;
+		u8 cmd_type;
+		u8 cmd_id;
+	} __packed;
+	u8 raw_cmd[0];
+};
+
+/**
+ * struct es58x_priv - All information specific to a CAN channel.
+ * @can: struct can_priv must be the first member (Socket CAN relies
+ *	on the fact that function netdev_priv() returns a pointer to
+ *	a struct can_priv).
+ * @es58x_dev: pointer to the corresponding ES58X device.
+ * @tx_urb: Used as a buffer to concatenate the TX messages and to do
+ *	a bulk send. Please refer to es58x_start_xmit() for more
+ *	details.
+ * @tx_tail: Index of the oldest packet still pending for
+ *	completion. @tx_tail & echo_skb_mask represents the beginning
+ *	of the echo skb FIFO, i.e. index of the first element.
+ * @tx_head: Index of the next packet to be sent to the
+ *	device. @tx_head & echo_skb_mask represents the end of the
+ *	echo skb FIFO plus one, i.e. the first free index.
+ * @tx_can_msg_cnt: Number of messages in @tx_urb.
+ * @tx_can_msg_is_fd: false: all messages in @tx_urb are Classical
+ *	CAN, true: all messages in @tx_urb are CAN FD. Rationale:
+ *	ES58X FD devices do not allow to mix Classical CAN and FD CAN
+ *	frames in one single bulk transmission.
+ * @err_passive_before_rtx_success: The ES58X device might enter in a
+ *	state in which it keeps alternating between error passive
+ *	and active states. This counter keeps track of the number of
+ *	error passive and if it gets bigger than
+ *	ES58X_CONSECUTIVE_ERR_PASSIVE_MAX, es58x_rx_err_msg() will
+ *	force the status to bus-off.
+ * @channel_idx: Channel index, starts at zero.
+ */
+struct es58x_priv {
+	struct can_priv can;
+	struct es58x_device *es58x_dev;
+	struct urb *tx_urb;
+
+	u32 tx_tail;
+	u32 tx_head;
+
+	u8 tx_can_msg_cnt;
+	bool tx_can_msg_is_fd;
+
+	u8 err_passive_before_rtx_success;
+
+	u8 channel_idx;
+};
+
+/**
+ * struct es58x_parameters - Constant parameters of a given hardware
+ *	variant.
+ * @bittiming_const: Nominal bittimming constant parameters.
+ * @data_bittiming_const: Data bittiming constant parameters.
+ * @tdc_const: Transmission Delay Compensation constant parameters.
+ * @bitrate_max: Maximum bitrate supported by the device.
+ * @clock: CAN clock parameters.
+ * @ctrlmode_supported: List of supported modes. Please refer to
+ *	can/netlink.h file for additional details.
+ * @tx_start_of_frame: Magic number at the beginning of each TX URB
+ *	command.
+ * @rx_start_of_frame: Magic number at the beginning of each RX URB
+ *	command.
+ * @tx_urb_cmd_max_len: Maximum length of a TX URB command.
+ * @rx_urb_cmd_max_len: Maximum length of a RX URB command.
+ * @fifo_mask: Bit mask to quickly convert the tx_tail and tx_head
+ *	field of the struct es58x_priv into echo_skb
+ *	indexes. Properties: @fifo_mask = echos_skb_max - 1 where
+ *	echo_skb_max must be a power of two. Also, echo_skb_max must
+ *	not exceed the maximum size of the device internal TX FIFO
+ *	length. This parameter is used to control the network queue
+ *	wake/stop logic.
+ * @dql_min_limit: Dynamic Queue Limits (DQL) absolute minimum limit
+ *	of bytes allowed to be queued on this network device transmit
+ *	queue. Used by the Byte Queue Limits (BQL) to determine how
+ *	frequently the xmit_more flag will be set to true in
+ *	es58x_start_xmit(). Set this value higher to optimize for
+ *	throughput but be aware that it might have a negative impact
+ *	on the latency! This value can also be set dynamically. Please
+ *	refer to Documentation/ABI/testing/sysfs-class-net-queues for
+ *	more details.
+ * @tx_bulk_max: Maximum number of TX messages that can be sent in one
+ *	single URB packet.
+ * @urb_cmd_header_len: Length of the URB command header.
+ * @rx_urb_max: Number of RX URB to be allocated during device probe.
+ * @tx_urb_max: Number of TX URB to be allocated during device probe.
+ */
+struct es58x_parameters {
+	const struct can_bittiming_const *bittiming_const;
+	const struct can_bittiming_const *data_bittiming_const;
+	const struct can_tdc_const *tdc_const;
+	u32 bitrate_max;
+	struct can_clock clock;
+	u32 ctrlmode_supported;
+	u16 tx_start_of_frame;
+	u16 rx_start_of_frame;
+	u16 tx_urb_cmd_max_len;
+	u16 rx_urb_cmd_max_len;
+	u16 fifo_mask;
+	u16 dql_min_limit;
+	u8 tx_bulk_max;
+	u8 urb_cmd_header_len;
+	u8 rx_urb_max;
+	u8 tx_urb_max;
+};
+
+/**
+ * struct es58x_operators - Function pointers used to encode/decode
+ *	the TX/RX messages.
+ * @get_msg_len: Get field msg_len of the urb_cmd. The offset of
+ *	msg_len inside urb_cmd depends of the device model.
+ * @handle_urb_cmd: Decode the URB command received from the device
+ *	and dispatch it to the relevant sub function.
+ * @fill_urb_header: Fill the header of urb_cmd.
+ * @tx_can_msg: Encode a TX CAN message and add it to the bulk buffer
+ *	cmd_buf of es58x_dev.
+ * @enable_channel: Start the CAN channel.
+ * @disable_channel: Stop the CAN channel.
+ * @reset_device: Full reset of the device. N.B: this feature is only
+ *	present on the ES581.4. For ES58X FD devices, this field is
+ *	set to NULL.
+ * @get_timestamp: Request a timestamp from the ES58X device.
+ */
+struct es58x_operators {
+	u16 (*get_msg_len)(const union es58x_urb_cmd *urb_cmd);
+	int (*handle_urb_cmd)(struct es58x_device *es58x_dev,
+			      const union es58x_urb_cmd *urb_cmd);
+	void (*fill_urb_header)(union es58x_urb_cmd *urb_cmd, u8 cmd_type,
+				u8 cmd_id, u8 channel_idx, u16 cmd_len);
+	int (*tx_can_msg)(struct es58x_priv *priv, const struct sk_buff *skb);
+	int (*enable_channel)(struct es58x_priv *priv);
+	int (*disable_channel)(struct es58x_priv *priv);
+	int (*reset_device)(struct es58x_device *es58x_dev);
+	int (*get_timestamp)(struct es58x_device *es58x_dev);
+};
+
+/**
+ * struct es58x_device - All information specific to an ES58X device.
+ * @dev: Device information.
+ * @udev: USB device information.
+ * @netdev: Array of our CAN channels.
+ * @param: The constant parameters.
+ * @ops: Operators.
+ * @rx_pipe: USB reception pipe.
+ * @tx_pipe: USB transmission pipe.
+ * @rx_urbs: Anchor for received URBs.
+ * @tx_urbs_busy: Anchor for TX URBs which were send to the device.
+ * @tx_urbs_idle: Anchor for TX USB which are idle. This driver
+ *	allocates the memory for the URBs during the probe. When a TX
+ *	URB is needed, it can be taken from this anchor. The network
+ *	queue wake/stop logic should prevent this URB from getting
+ *	empty. Please refer to es58x_get_tx_urb() for more details.
+ * @tx_urbs_idle_cnt: number of urbs in @tx_urbs_idle.
+ * @opened_channel_cnt: number of channels opened (c.f. es58x_open()
+ *	and es58x_stop()).
+ * @ktime_req_ns: kernel timestamp when es58x_set_realtime_diff_ns()
+ *	was called.
+ * @realtime_diff_ns: difference in nanoseconds between the clocks of
+ *	the ES58X device and the kernel.
+ * @timestamps: a temporary buffer to store the time stamps before
+ *	feeding them to es58x_can_get_echo_skb(). Can only be used
+ *	in RX branches.
+ * @rx_max_packet_size: Maximum length of bulk-in URB.
+ * @num_can_ch: Number of CAN channel (i.e. number of elements of @netdev).
+ * @rx_cmd_buf_len: Length of @rx_cmd_buf.
+ * @rx_cmd_buf: The device might split the URB commands in an
+ *	arbitrary amount of pieces. This buffer is used to concatenate
+ *	all those pieces. Can only be used in RX branches. This field
+ *	has to be the last one of the structure because it is has a
+ *	flexible size (c.f. es58x_sizeof_es58x_device() function).
+ */
+struct es58x_device {
+	struct device *dev;
+	struct usb_device *udev;
+	struct net_device *netdev[ES58X_NUM_CAN_CH_MAX];
+
+	const struct es58x_parameters *param;
+	const struct es58x_operators *ops;
+
+	int rx_pipe;
+	int tx_pipe;
+
+	struct usb_anchor rx_urbs;
+	struct usb_anchor tx_urbs_busy;
+	struct usb_anchor tx_urbs_idle;
+	atomic_t tx_urbs_idle_cnt;
+	atomic_t opened_channel_cnt;
+
+	u64 ktime_req_ns;
+	s64 realtime_diff_ns;
+
+	u64 timestamps[ES58X_ECHO_BULK_MAX];
+
+	u16 rx_max_packet_size;
+	u8 num_can_ch;
+
+	u16 rx_cmd_buf_len;
+	union es58x_urb_cmd rx_cmd_buf;
+};
+
+/**
+ * es58x_sizeof_es58x_device() - Calculate the maximum length of
+ *	struct es58x_device.
+ * @es58x_dev_param: The constant parameters of the device.
+ *
+ * The length of struct es58x_device depends on the length of its last
+ * field: rx_cmd_buf. This macro allows to optimize the memory
+ * allocation.
+ *
+ * Return: length of struct es58x_device.
+ */
+static inline size_t es58x_sizeof_es58x_device(const struct es58x_parameters
+					       *es58x_dev_param)
+{
+	return offsetof(struct es58x_device, rx_cmd_buf) +
+		es58x_dev_param->rx_urb_cmd_max_len;
+}
+
+static inline int __es58x_check_msg_len(const struct device *dev,
+					const char *stringified_msg,
+					size_t actual_len, size_t expected_len)
+{
+	if (expected_len != actual_len) {
+		dev_err(dev,
+			"Length of %s is %zu but received command is %zu.\n",
+			stringified_msg, expected_len, actual_len);
+		return -EMSGSIZE;
+	}
+	return 0;
+}
+
+/**
+ * es58x_check_msg_len() - Check the size of a received message.
+ * @dev: Device, used to print error messages.
+ * @msg: Received message, must not be a pointer.
+ * @actual_len: Length of the message as advertised in the command header.
+ *
+ * Must be a macro in order to accept the different types of messages
+ * as an input. Can be use with any of the messages which have a fixed
+ * length. Check for an exact match of the size.
+ *
+ * Return: zero on success, -EMSGSIZE if @actual_len differs from the
+ * expected length.
+ */
+#define es58x_check_msg_len(dev, msg, actual_len)			\
+	__es58x_check_msg_len(dev, __stringify(msg),			\
+			      actual_len, sizeof(msg))
+
+static inline int __es58x_check_msg_max_len(const struct device *dev,
+					    const char *stringified_msg,
+					    size_t actual_len,
+					    size_t expected_len)
+{
+	if (actual_len > expected_len) {
+		dev_err(dev,
+			"Maximum length for %s is %zu but received command is %zu.\n",
+			stringified_msg, expected_len, actual_len);
+		return -EOVERFLOW;
+	}
+	return 0;
+}
+
+/**
+ * es58x_check_msg_max_len() - Check the maximum size of a received message.
+ * @dev: Device, used to print error messages.
+ * @msg: Received message, must not be a pointer.
+ * @actual_len: Length of the message as advertised in the command header.
+ *
+ * Must be a macro in order to accept the different types of messages
+ * as an input. To be used with the messages of variable sizes. Only
+ * check that the message is not bigger than the maximum expected
+ * size.
+ *
+ * Return: zero on success, -EOVERFLOW if @actual_len is greater than
+ * the expected length.
+ */
+#define es58x_check_msg_max_len(dev, msg, actual_len)			\
+	__es58x_check_msg_max_len(dev, __stringify(msg),		\
+				  actual_len, sizeof(msg))
+
+static inline int __es58x_msg_num_element(const struct device *dev,
+					  const char *stringified_msg,
+					  size_t actual_len, size_t msg_len,
+					  size_t elem_len)
+{
+	size_t actual_num_elem = actual_len / elem_len;
+	size_t expected_num_elem = msg_len / elem_len;
+
+	if (actual_num_elem == 0) {
+		dev_err(dev,
+			"Minimum length for %s is %zu but received command is %zu.\n",
+			stringified_msg, elem_len, actual_len);
+		return -EMSGSIZE;
+	} else if ((actual_len % elem_len) != 0) {
+		dev_err(dev,
+			"Received command length: %zu is not a multiple of %s[0]: %zu\n",
+			actual_len, stringified_msg, elem_len);
+		return -EMSGSIZE;
+	} else if (actual_num_elem > expected_num_elem) {
+		dev_err(dev,
+			"Array %s is supposed to have %zu elements each of size %zu...\n",
+			stringified_msg, expected_num_elem, elem_len);
+		dev_err(dev,
+			"... But received command has %zu elements (total length %zu).\n",
+			actual_num_elem, actual_len);
+		return -EOVERFLOW;
+	}
+	return actual_num_elem;
+}
+
+/**
+ * es58x_msg_num_element() - Check size and give the number of
+ *	elements in a message of array type.
+ * @dev: Device, used to print error messages.
+ * @msg: Received message, must be an array.
+ * @actual_len: Length of the message as advertised in the command
+ *	header.
+ *
+ * Must be a macro in order to accept the different types of messages
+ * as an input. To be used on message of array type. Array's element
+ * has to be of fixed size (else use es58x_check_msg_max_len()). Check
+ * that the total length is an exact multiple of the length of a
+ * single element.
+ *
+ * Return: number of elements in the array on success, -EOVERFLOW if
+ * @actual_len is greater than the expected length, -EMSGSIZE if
+ * @actual_len is not a multiple of a single element.
+ */
+#define es58x_msg_num_element(dev, msg, actual_len)			\
+({									\
+	size_t __elem_len = sizeof((msg)[0]) + __must_be_array(msg);	\
+	__es58x_msg_num_element(dev, __stringify(msg), actual_len,	\
+				sizeof(msg), __elem_len);		\
+})
+
+/**
+ * es58x_priv() - Get the priv member and cast it to struct es58x_priv.
+ * @netdev: CAN network device.
+ *
+ * Return: ES58X device.
+ */
+static inline struct es58x_priv *es58x_priv(struct net_device *netdev)
+{
+	return (struct es58x_priv *)netdev_priv(netdev);
+}
+
+/**
+ * ES58X_SIZEOF_URB_CMD() - Calculate the maximum length of an urb
+ *	command for a given message field name.
+ * @es58x_urb_cmd_type: type (either "struct es581_4_urb_cmd" or
+ *	"struct es58x_fd_urb_cmd").
+ * @msg_field: name of the message field.
+ *
+ * Must be a macro in order to accept the different command types as
+ * an input.
+ *
+ * Return: length of the urb command.
+ */
+#define ES58X_SIZEOF_URB_CMD(es58x_urb_cmd_type, msg_field)		\
+	(offsetof(es58x_urb_cmd_type, raw_msg)				\
+		+ sizeof_field(es58x_urb_cmd_type, msg_field)		\
+		+ sizeof_field(es58x_urb_cmd_type,			\
+			       reserved_for_crc16_do_not_use))
+
+/**
+ * es58x_get_urb_cmd_len() - Calculate the actual length of an urb
+ *	command for a given message length.
+ * @es58x_dev: ES58X device.
+ * @msg_len: Length of the message.
+ *
+ * Add the header and CRC lengths to the message length.
+ *
+ * Return: length of the urb command.
+ */
+static inline size_t es58x_get_urb_cmd_len(struct es58x_device *es58x_dev,
+					   u16 msg_len)
+{
+	return es58x_dev->param->urb_cmd_header_len + msg_len + sizeof(u16);
+}
+
+/**
+ * es58x_get_netdev() - Get the network device.
+ * @es58x_dev: ES58X device.
+ * @channel_no: The channel number as advertised in the urb command.
+ * @channel_idx_offset: Some of the ES58x starts channel numbering
+ *	from 0 (ES58X FD), others from 1 (ES581.4).
+ * @netdev: CAN network device.
+ *
+ * Do a sanity check on the index provided by the device.
+ *
+ * Return: zero on success, -ECHRNG if the received channel number is
+ *	out of range and -ENODEV if the network device is not yet
+ *	configured.
+ */
+static inline int es58x_get_netdev(struct es58x_device *es58x_dev,
+				   int channel_no, int channel_idx_offset,
+				   struct net_device **netdev)
+{
+	int channel_idx = channel_no - channel_idx_offset;
+
+	*netdev = NULL;
+	if (channel_idx < 0 || channel_idx >= es58x_dev->num_can_ch)
+		return -ECHRNG;
+
+	*netdev = es58x_dev->netdev[channel_idx];
+	if (!netdev || !netif_device_present(*netdev))
+		return -ENODEV;
+
+	return 0;
+}
+
+/**
+ * es58x_get_raw_can_id() - Get the CAN ID.
+ * @cf: CAN frame.
+ *
+ * Mask the CAN ID in order to only keep the significant bits.
+ *
+ * Return: the raw value of the CAN ID.
+ */
+static inline int es58x_get_raw_can_id(const struct can_frame *cf)
+{
+	if (cf->can_id & CAN_EFF_FLAG)
+		return cf->can_id & CAN_EFF_MASK;
+	else
+		return cf->can_id & CAN_SFF_MASK;
+}
+
+/**
+ * es58x_get_flags() - Get the CAN flags.
+ * @skb: socket buffer of a CAN message.
+ *
+ * Return: the CAN flag as an enum es58x_flag.
+ */
+static inline enum es58x_flag es58x_get_flags(const struct sk_buff *skb)
+{
+	struct canfd_frame *cf = (struct canfd_frame *)skb->data;
+	enum es58x_flag es58x_flags = 0;
+
+	if (cf->can_id & CAN_EFF_FLAG)
+		es58x_flags |= ES58X_FLAG_EFF;
+
+	if (can_is_canfd_skb(skb)) {
+		es58x_flags |= ES58X_FLAG_FD_DATA;
+		if (cf->flags & CANFD_BRS)
+			es58x_flags |= ES58X_FLAG_FD_BRS;
+		if (cf->flags & CANFD_ESI)
+			es58x_flags |= ES58X_FLAG_FD_ESI;
+	} else if (cf->can_id & CAN_RTR_FLAG)
+		/* Remote frames are only defined in Classical CAN frames */
+		es58x_flags |= ES58X_FLAG_RTR;
+
+	return es58x_flags;
+}
+
+int es58x_can_get_echo_skb(struct net_device *netdev, u32 packet_idx,
+			   u64 *tstamps, unsigned int pkts);
+int es58x_tx_ack_msg(struct net_device *netdev, u16 tx_free_entries,
+		     enum es58x_ret_u32 rx_cmd_ret_u32);
+int es58x_rx_can_msg(struct net_device *netdev, u64 timestamp, const u8 *data,
+		     canid_t can_id, enum es58x_flag es58x_flags, u8 dlc);
+int es58x_rx_err_msg(struct net_device *netdev, enum es58x_err error,
+		     enum es58x_event event, u64 timestamp);
+void es58x_rx_timestamp(struct es58x_device *es58x_dev, u64 timestamp);
+int es58x_rx_cmd_ret_u8(struct device *dev, enum es58x_ret_type cmd_ret_type,
+			enum es58x_ret_u8 rx_cmd_ret_u8);
+int es58x_rx_cmd_ret_u32(struct net_device *netdev,
+			 enum es58x_ret_type cmd_ret_type,
+			 enum es58x_ret_u32 rx_cmd_ret_u32);
+int es58x_send_msg(struct es58x_device *es58x_dev, u8 cmd_type, u8 cmd_id,
+		   const void *msg, u16 cmd_len, int channel_idx);
+
+extern const struct es58x_parameters es581_4_param;
+extern const struct es58x_operators es581_4_ops;
+
+extern const struct es58x_parameters es58x_fd_param;
+extern const struct es58x_operators es58x_fd_ops;
+
+#endif /* __ES58X_COMMON_H__ */
-- 
cgit v1.2.3


From 1dfb6005a60b13d18aacf190b853bf6f89d31820 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Sat, 10 Apr 2021 18:59:47 +0900
Subject: can: etas_es58x: add support for ETAS ES581.4 CAN USB interface

This patch adds support for the ES581.4 interface from ETAS
GmbH (https://www.etas.com/en/products/es58x.php).

Link: https://lore.kernel.org/r/20210410095948.233305-3-mailhol.vincent@wanadoo.fr
Co-developed-by: Arunachalam Santhanam <arunachalam.santhanam@in.bosch.com>
Signed-off-by: Arunachalam Santhanam <arunachalam.santhanam@in.bosch.com>
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/etas_es58x/Makefile     |   2 +-
 drivers/net/can/usb/etas_es58x/es581_4.c    | 507 ++++++++++++++++++++++++++++
 drivers/net/can/usb/etas_es58x/es581_4.h    | 207 ++++++++++++
 drivers/net/can/usb/etas_es58x/es58x_core.c |   9 +-
 drivers/net/can/usb/etas_es58x/es58x_core.h |   5 +-
 5 files changed, 726 insertions(+), 4 deletions(-)
 create mode 100644 drivers/net/can/usb/etas_es58x/es581_4.c
 create mode 100644 drivers/net/can/usb/etas_es58x/es581_4.h

diff --git a/drivers/net/can/usb/etas_es58x/Makefile b/drivers/net/can/usb/etas_es58x/Makefile
index 60a1ac935a69..e4753ec9cb60 100644
--- a/drivers/net/can/usb/etas_es58x/Makefile
+++ b/drivers/net/can/usb/etas_es58x/Makefile
@@ -1,3 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_CAN_ETAS_ES58X) += etas_es58x.o
-etas_es58x-y = es58x_core.o
+etas_es58x-y = es58x_core.o es581_4.o
diff --git a/drivers/net/can/usb/etas_es58x/es581_4.c b/drivers/net/can/usb/etas_es58x/es581_4.c
new file mode 100644
index 000000000000..1985f772fc3c
--- /dev/null
+++ b/drivers/net/can/usb/etas_es58x/es581_4.c
@@ -0,0 +1,507 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Driver for ETAS GmbH ES58X USB CAN(-FD) Bus Interfaces.
+ *
+ * File es581_4.c: Adds support to ETAS ES581.4.
+ *
+ * Copyright (c) 2019 Robert Bosch Engineering and Business Solutions. All rights reserved.
+ * Copyright (c) 2020 ETAS K.K.. All rights reserved.
+ * Copyright (c) 2020, 2021 Vincent Mailhol <mailhol.vincent@wanadoo.fr>
+ */
+
+#include <linux/kernel.h>
+#include <asm/unaligned.h>
+
+#include "es58x_core.h"
+#include "es581_4.h"
+
+/**
+ * es581_4_sizeof_rx_tx_msg() - Calculate the actual length of the
+ *	structure of a rx or tx message.
+ * @msg: message of variable length, must have a dlc field.
+ *
+ * Even if RTR frames have actually no payload, the ES58X devices
+ * still expect it. Must be a macro in order to accept several types
+ * (struct es581_4_tx_can_msg and struct es581_4_rx_can_msg) as an
+ * input.
+ *
+ * Return: length of the message.
+ */
+#define es581_4_sizeof_rx_tx_msg(msg)				\
+	offsetof(typeof(msg), data[can_cc_dlc2len((msg).dlc)])
+
+static u16 es581_4_get_msg_len(const union es58x_urb_cmd *urb_cmd)
+{
+	return get_unaligned_le16(&urb_cmd->es581_4_urb_cmd.msg_len);
+}
+
+static int es581_4_echo_msg(struct es58x_device *es58x_dev,
+			    const struct es581_4_urb_cmd *es581_4_urb_cmd)
+{
+	struct net_device *netdev;
+	const struct es581_4_bulk_echo_msg *bulk_echo_msg;
+	const struct es581_4_echo_msg *echo_msg;
+	u64 *tstamps = es58x_dev->timestamps;
+	u16 msg_len;
+	u32 first_packet_idx, packet_idx;
+	unsigned int dropped = 0;
+	int i, num_element, ret;
+
+	bulk_echo_msg = &es581_4_urb_cmd->bulk_echo_msg;
+	msg_len = get_unaligned_le16(&es581_4_urb_cmd->msg_len) -
+	    sizeof(bulk_echo_msg->channel_no);
+	num_element = es58x_msg_num_element(es58x_dev->dev,
+					    bulk_echo_msg->echo_msg, msg_len);
+	if (num_element <= 0)
+		return num_element;
+
+	ret = es58x_get_netdev(es58x_dev, bulk_echo_msg->channel_no,
+			       ES581_4_CHANNEL_IDX_OFFSET, &netdev);
+	if (ret)
+		return ret;
+
+	echo_msg = &bulk_echo_msg->echo_msg[0];
+	first_packet_idx = get_unaligned_le32(&echo_msg->packet_idx);
+	packet_idx = first_packet_idx;
+	for (i = 0; i < num_element; i++) {
+		u32 tmp_idx;
+
+		echo_msg = &bulk_echo_msg->echo_msg[i];
+		tmp_idx = get_unaligned_le32(&echo_msg->packet_idx);
+		if (tmp_idx == packet_idx - 1) {
+			if (net_ratelimit())
+				netdev_warn(netdev,
+					    "Received echo packet idx %u twice\n",
+					    packet_idx - 1);
+			dropped++;
+			continue;
+		}
+		if (tmp_idx != packet_idx) {
+			netdev_err(netdev, "Echo packet idx jumped from %u to %u\n",
+				   packet_idx - 1, echo_msg->packet_idx);
+			return -EBADMSG;
+		}
+
+		tstamps[i] = get_unaligned_le64(&echo_msg->timestamp);
+		packet_idx++;
+	}
+
+	netdev->stats.tx_dropped += dropped;
+	return es58x_can_get_echo_skb(netdev, first_packet_idx,
+				      tstamps, num_element - dropped);
+}
+
+static int es581_4_rx_can_msg(struct es58x_device *es58x_dev,
+			      const struct es581_4_urb_cmd *es581_4_urb_cmd,
+			      u16 msg_len)
+{
+	const struct device *dev = es58x_dev->dev;
+	struct net_device *netdev;
+	int pkts, num_element, channel_no, ret;
+
+	num_element = es58x_msg_num_element(dev, es581_4_urb_cmd->rx_can_msg,
+					    msg_len);
+	if (num_element <= 0)
+		return num_element;
+
+	channel_no = es581_4_urb_cmd->rx_can_msg[0].channel_no;
+	ret = es58x_get_netdev(es58x_dev, channel_no,
+			       ES581_4_CHANNEL_IDX_OFFSET, &netdev);
+	if (ret)
+		return ret;
+
+	if (!netif_running(netdev)) {
+		if (net_ratelimit())
+			netdev_info(netdev,
+				    "%s: %s is down, dropping %d rx packets\n",
+				    __func__, netdev->name, num_element);
+		netdev->stats.rx_dropped += num_element;
+		return 0;
+	}
+
+	for (pkts = 0; pkts < num_element; pkts++) {
+		const struct es581_4_rx_can_msg *rx_can_msg =
+		    &es581_4_urb_cmd->rx_can_msg[pkts];
+		u64 tstamp = get_unaligned_le64(&rx_can_msg->timestamp);
+		canid_t can_id = get_unaligned_le32(&rx_can_msg->can_id);
+
+		if (channel_no != rx_can_msg->channel_no)
+			return -EBADMSG;
+
+		ret = es58x_rx_can_msg(netdev, tstamp, rx_can_msg->data,
+				       can_id, rx_can_msg->flags,
+				       rx_can_msg->dlc);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static int es581_4_rx_err_msg(struct es58x_device *es58x_dev,
+			      const struct es581_4_rx_err_msg *rx_err_msg)
+{
+	struct net_device *netdev;
+	enum es58x_err error = get_unaligned_le32(&rx_err_msg->error);
+	int ret;
+
+	ret = es58x_get_netdev(es58x_dev, rx_err_msg->channel_no,
+			       ES581_4_CHANNEL_IDX_OFFSET, &netdev);
+	if (ret)
+		return ret;
+
+	return es58x_rx_err_msg(netdev, error, 0,
+				get_unaligned_le64(&rx_err_msg->timestamp));
+}
+
+static int es581_4_rx_event_msg(struct es58x_device *es58x_dev,
+				const struct es581_4_rx_event_msg *rx_event_msg)
+{
+	struct net_device *netdev;
+	enum es58x_event event = get_unaligned_le32(&rx_event_msg->event);
+	int ret;
+
+	ret = es58x_get_netdev(es58x_dev, rx_event_msg->channel_no,
+			       ES581_4_CHANNEL_IDX_OFFSET, &netdev);
+	if (ret)
+		return ret;
+
+	return es58x_rx_err_msg(netdev, 0, event,
+				get_unaligned_le64(&rx_event_msg->timestamp));
+}
+
+static int es581_4_rx_cmd_ret_u32(struct es58x_device *es58x_dev,
+				  const struct es581_4_urb_cmd *es581_4_urb_cmd,
+				  enum es58x_ret_type ret_type)
+{
+	struct net_device *netdev;
+	const struct es581_4_rx_cmd_ret *rx_cmd_ret;
+	u16 msg_len = get_unaligned_le16(&es581_4_urb_cmd->msg_len);
+	int ret;
+
+	ret = es58x_check_msg_len(es58x_dev->dev,
+				  es581_4_urb_cmd->rx_cmd_ret, msg_len);
+	if (ret)
+		return ret;
+
+	rx_cmd_ret = &es581_4_urb_cmd->rx_cmd_ret;
+
+	ret = es58x_get_netdev(es58x_dev, rx_cmd_ret->channel_no,
+			       ES581_4_CHANNEL_IDX_OFFSET, &netdev);
+	if (ret)
+		return ret;
+
+	return es58x_rx_cmd_ret_u32(netdev, ret_type,
+				    get_unaligned_le32(&rx_cmd_ret->rx_cmd_ret_le32));
+}
+
+static int es581_4_tx_ack_msg(struct es58x_device *es58x_dev,
+			      const struct es581_4_urb_cmd *es581_4_urb_cmd)
+{
+	struct net_device *netdev;
+	const struct es581_4_tx_ack_msg *tx_ack_msg;
+	u16 msg_len = get_unaligned_le16(&es581_4_urb_cmd->msg_len);
+	int ret;
+
+	tx_ack_msg = &es581_4_urb_cmd->tx_ack_msg;
+	ret = es58x_check_msg_len(es58x_dev->dev, *tx_ack_msg, msg_len);
+	if (ret)
+		return ret;
+
+	if (tx_ack_msg->rx_cmd_ret_u8 != ES58X_RET_U8_OK)
+		return es58x_rx_cmd_ret_u8(es58x_dev->dev,
+					   ES58X_RET_TYPE_TX_MSG,
+					   tx_ack_msg->rx_cmd_ret_u8);
+
+	ret = es58x_get_netdev(es58x_dev, tx_ack_msg->channel_no,
+			       ES581_4_CHANNEL_IDX_OFFSET, &netdev);
+	if (ret)
+		return ret;
+
+	return es58x_tx_ack_msg(netdev,
+				get_unaligned_le16(&tx_ack_msg->tx_free_entries),
+				ES58X_RET_U32_OK);
+}
+
+static int es581_4_dispatch_rx_cmd(struct es58x_device *es58x_dev,
+				   const struct es581_4_urb_cmd *es581_4_urb_cmd)
+{
+	const struct device *dev = es58x_dev->dev;
+	u16 msg_len = get_unaligned_le16(&es581_4_urb_cmd->msg_len);
+	enum es581_4_rx_type rx_type = es581_4_urb_cmd->rx_can_msg[0].rx_type;
+	int ret = 0;
+
+	switch (rx_type) {
+	case ES581_4_RX_TYPE_MESSAGE:
+		return es581_4_rx_can_msg(es58x_dev, es581_4_urb_cmd, msg_len);
+
+	case ES581_4_RX_TYPE_ERROR:
+		ret = es58x_check_msg_len(dev, es581_4_urb_cmd->rx_err_msg,
+					  msg_len);
+		if (ret < 0)
+			return ret;
+		return es581_4_rx_err_msg(es58x_dev,
+					  &es581_4_urb_cmd->rx_err_msg);
+
+	case ES581_4_RX_TYPE_EVENT:
+		ret = es58x_check_msg_len(dev, es581_4_urb_cmd->rx_event_msg,
+					  msg_len);
+		if (ret < 0)
+			return ret;
+		return es581_4_rx_event_msg(es58x_dev,
+					    &es581_4_urb_cmd->rx_event_msg);
+
+	default:
+		dev_err(dev, "%s: Unknown rx_type 0x%02X\n", __func__, rx_type);
+		return -EBADRQC;
+	}
+}
+
+static int es581_4_handle_urb_cmd(struct es58x_device *es58x_dev,
+				  const union es58x_urb_cmd *urb_cmd)
+{
+	const struct es581_4_urb_cmd *es581_4_urb_cmd;
+	struct device *dev = es58x_dev->dev;
+	u16 msg_len = es581_4_get_msg_len(urb_cmd);
+	int ret;
+
+	es581_4_urb_cmd = &urb_cmd->es581_4_urb_cmd;
+
+	if (es581_4_urb_cmd->cmd_type != ES581_4_CAN_COMMAND_TYPE) {
+		dev_err(dev, "%s: Unknown command type (0x%02X)\n",
+			__func__, es581_4_urb_cmd->cmd_type);
+		return -EBADRQC;
+	}
+
+	switch ((enum es581_4_cmd_id)es581_4_urb_cmd->cmd_id) {
+	case ES581_4_CMD_ID_SET_BITTIMING:
+		return es581_4_rx_cmd_ret_u32(es58x_dev, es581_4_urb_cmd,
+					      ES58X_RET_TYPE_SET_BITTIMING);
+
+	case ES581_4_CMD_ID_ENABLE_CHANNEL:
+		return es581_4_rx_cmd_ret_u32(es58x_dev, es581_4_urb_cmd,
+					      ES58X_RET_TYPE_ENABLE_CHANNEL);
+
+	case ES581_4_CMD_ID_TX_MSG:
+		return es581_4_tx_ack_msg(es58x_dev, es581_4_urb_cmd);
+
+	case ES581_4_CMD_ID_RX_MSG:
+		return es581_4_dispatch_rx_cmd(es58x_dev, es581_4_urb_cmd);
+
+	case ES581_4_CMD_ID_RESET_RX:
+		ret = es581_4_rx_cmd_ret_u32(es58x_dev, es581_4_urb_cmd,
+					     ES58X_RET_TYPE_RESET_RX);
+		return ret;
+
+	case ES581_4_CMD_ID_RESET_TX:
+		ret = es581_4_rx_cmd_ret_u32(es58x_dev, es581_4_urb_cmd,
+					     ES58X_RET_TYPE_RESET_TX);
+		return ret;
+
+	case ES581_4_CMD_ID_DISABLE_CHANNEL:
+		return es581_4_rx_cmd_ret_u32(es58x_dev, es581_4_urb_cmd,
+					      ES58X_RET_TYPE_DISABLE_CHANNEL);
+
+	case ES581_4_CMD_ID_TIMESTAMP:
+		ret = es58x_check_msg_len(dev, es581_4_urb_cmd->timestamp,
+					  msg_len);
+		if (ret < 0)
+			return ret;
+		es58x_rx_timestamp(es58x_dev,
+				   get_unaligned_le64(&es581_4_urb_cmd->timestamp));
+		return 0;
+
+	case ES581_4_CMD_ID_ECHO:
+		return es581_4_echo_msg(es58x_dev, es581_4_urb_cmd);
+
+	case ES581_4_CMD_ID_DEVICE_ERR:
+		ret = es58x_check_msg_len(dev, es581_4_urb_cmd->rx_cmd_ret_u8,
+					  msg_len);
+		if (ret)
+			return ret;
+		return es58x_rx_cmd_ret_u8(dev, ES58X_RET_TYPE_DEVICE_ERR,
+					   es581_4_urb_cmd->rx_cmd_ret_u8);
+
+	default:
+		dev_warn(dev, "%s: Unexpected command ID: 0x%02X\n",
+			 __func__, es581_4_urb_cmd->cmd_id);
+		return -EBADRQC;
+	}
+}
+
+static void es581_4_fill_urb_header(union es58x_urb_cmd *urb_cmd, u8 cmd_type,
+				    u8 cmd_id, u8 channel_idx, u16 msg_len)
+{
+	struct es581_4_urb_cmd *es581_4_urb_cmd = &urb_cmd->es581_4_urb_cmd;
+
+	es581_4_urb_cmd->SOF = cpu_to_le16(es581_4_param.tx_start_of_frame);
+	es581_4_urb_cmd->cmd_type = cmd_type;
+	es581_4_urb_cmd->cmd_id = cmd_id;
+	es581_4_urb_cmd->msg_len = cpu_to_le16(msg_len);
+}
+
+static int es581_4_tx_can_msg(struct es58x_priv *priv,
+			      const struct sk_buff *skb)
+{
+	struct es58x_device *es58x_dev = priv->es58x_dev;
+	union es58x_urb_cmd *urb_cmd = priv->tx_urb->transfer_buffer;
+	struct es581_4_urb_cmd *es581_4_urb_cmd = &urb_cmd->es581_4_urb_cmd;
+	struct can_frame *cf = (struct can_frame *)skb->data;
+	struct es581_4_tx_can_msg *tx_can_msg;
+	u16 msg_len;
+	int ret;
+
+	if (can_is_canfd_skb(skb))
+		return -EMSGSIZE;
+
+	if (priv->tx_can_msg_cnt == 0) {
+		msg_len = 1; /* struct es581_4_bulk_tx_can_msg:num_can_msg */
+		es581_4_fill_urb_header(urb_cmd, ES581_4_CAN_COMMAND_TYPE,
+					ES581_4_CMD_ID_TX_MSG,
+					priv->channel_idx, msg_len);
+		es581_4_urb_cmd->bulk_tx_can_msg.num_can_msg = 0;
+	} else {
+		msg_len = es581_4_get_msg_len(urb_cmd);
+	}
+
+	ret = es58x_check_msg_max_len(es58x_dev->dev,
+				      es581_4_urb_cmd->bulk_tx_can_msg,
+				      msg_len + sizeof(*tx_can_msg));
+	if (ret)
+		return ret;
+
+	/* Fill message contents. */
+	tx_can_msg = (struct es581_4_tx_can_msg *)
+	    &es581_4_urb_cmd->bulk_tx_can_msg.tx_can_msg_buf[msg_len - 1];
+	put_unaligned_le32(es58x_get_raw_can_id(cf), &tx_can_msg->can_id);
+	put_unaligned_le32(priv->tx_head, &tx_can_msg->packet_idx);
+	put_unaligned_le16((u16)es58x_get_flags(skb), &tx_can_msg->flags);
+	tx_can_msg->channel_no = priv->channel_idx + ES581_4_CHANNEL_IDX_OFFSET;
+	tx_can_msg->dlc = can_get_cc_dlc(cf, priv->can.ctrlmode);
+
+	memcpy(tx_can_msg->data, cf->data, cf->len);
+
+	/* Calculate new sizes. */
+	es581_4_urb_cmd->bulk_tx_can_msg.num_can_msg++;
+	msg_len += es581_4_sizeof_rx_tx_msg(*tx_can_msg);
+	priv->tx_urb->transfer_buffer_length = es58x_get_urb_cmd_len(es58x_dev,
+								     msg_len);
+	es581_4_urb_cmd->msg_len = cpu_to_le16(msg_len);
+
+	return 0;
+}
+
+static int es581_4_set_bittiming(struct es58x_priv *priv)
+{
+	struct es581_4_tx_conf_msg tx_conf_msg = { 0 };
+	struct can_bittiming *bt = &priv->can.bittiming;
+
+	tx_conf_msg.bitrate = cpu_to_le32(bt->bitrate);
+	/* bt->sample_point is in tenth of percent. Convert it to percent. */
+	tx_conf_msg.sample_point = cpu_to_le32(bt->sample_point / 10U);
+	tx_conf_msg.samples_per_bit = cpu_to_le32(ES58X_SAMPLES_PER_BIT_ONE);
+	tx_conf_msg.bit_time = cpu_to_le32(can_bit_time(bt));
+	tx_conf_msg.sjw = cpu_to_le32(bt->sjw);
+	tx_conf_msg.sync_edge = cpu_to_le32(ES58X_SYNC_EDGE_SINGLE);
+	tx_conf_msg.physical_layer =
+	    cpu_to_le32(ES58X_PHYSICAL_LAYER_HIGH_SPEED);
+	tx_conf_msg.echo_mode = cpu_to_le32(ES58X_ECHO_ON);
+	tx_conf_msg.channel_no = priv->channel_idx + ES581_4_CHANNEL_IDX_OFFSET;
+
+	return es58x_send_msg(priv->es58x_dev, ES581_4_CAN_COMMAND_TYPE,
+			      ES581_4_CMD_ID_SET_BITTIMING, &tx_conf_msg,
+			      sizeof(tx_conf_msg), priv->channel_idx);
+}
+
+static int es581_4_enable_channel(struct es58x_priv *priv)
+{
+	int ret;
+	u8 msg = priv->channel_idx + ES581_4_CHANNEL_IDX_OFFSET;
+
+	ret = es581_4_set_bittiming(priv);
+	if (ret)
+		return ret;
+
+	return es58x_send_msg(priv->es58x_dev, ES581_4_CAN_COMMAND_TYPE,
+			      ES581_4_CMD_ID_ENABLE_CHANNEL, &msg, sizeof(msg),
+			      priv->channel_idx);
+}
+
+static int es581_4_disable_channel(struct es58x_priv *priv)
+{
+	u8 msg = priv->channel_idx + ES581_4_CHANNEL_IDX_OFFSET;
+
+	return es58x_send_msg(priv->es58x_dev, ES581_4_CAN_COMMAND_TYPE,
+			      ES581_4_CMD_ID_DISABLE_CHANNEL, &msg, sizeof(msg),
+			      priv->channel_idx);
+}
+
+static int es581_4_reset_device(struct es58x_device *es58x_dev)
+{
+	return es58x_send_msg(es58x_dev, ES581_4_CAN_COMMAND_TYPE,
+			      ES581_4_CMD_ID_RESET_DEVICE,
+			      ES58X_EMPTY_MSG, 0, ES58X_CHANNEL_IDX_NA);
+}
+
+static int es581_4_get_timestamp(struct es58x_device *es58x_dev)
+{
+	return es58x_send_msg(es58x_dev, ES581_4_CAN_COMMAND_TYPE,
+			      ES581_4_CMD_ID_TIMESTAMP,
+			      ES58X_EMPTY_MSG, 0, ES58X_CHANNEL_IDX_NA);
+}
+
+/* Nominal bittiming constants for ES581.4 as specified in the
+ * microcontroller datasheet: "Stellaris(R) LM3S5B91 Microcontroller"
+ * table 17-4 "CAN Protocol Ranges" from Texas Instruments.
+ */
+static const struct can_bittiming_const es581_4_bittiming_const = {
+	.name = "ES581.4",
+	.tseg1_min = 1,
+	.tseg1_max = 8,
+	.tseg2_min = 1,
+	.tseg2_max = 8,
+	.sjw_max = 4,
+	.brp_min = 1,
+	.brp_max = 128,
+	.brp_inc = 1
+};
+
+const struct es58x_parameters es581_4_param = {
+	.bittiming_const = &es581_4_bittiming_const,
+	.data_bittiming_const = NULL,
+	.tdc_const = NULL,
+	.bitrate_max = 1 * CAN_MBPS,
+	.clock = {.freq = 50 * CAN_MHZ},
+	.ctrlmode_supported = CAN_CTRLMODE_CC_LEN8_DLC,
+	.tx_start_of_frame = 0xAFAF,
+	.rx_start_of_frame = 0xFAFA,
+	.tx_urb_cmd_max_len = ES581_4_TX_URB_CMD_MAX_LEN,
+	.rx_urb_cmd_max_len = ES581_4_RX_URB_CMD_MAX_LEN,
+	/* Size of internal device TX queue is 330.
+	 *
+	 * However, we witnessed some ES58X_ERR_PROT_CRC errors from
+	 * the device and thus, echo_skb_max was lowered to the
+	 * empirical value of 75 which seems stable and then rounded
+	 * down to become a power of two.
+	 *
+	 * Root cause of those ES58X_ERR_PROT_CRC errors is still
+	 * unclear.
+	 */
+	.fifo_mask = 63, /* echo_skb_max = 64 */
+	.dql_min_limit = CAN_FRAME_LEN_MAX * 50, /* Empirical value. */
+	.tx_bulk_max = ES581_4_TX_BULK_MAX,
+	.urb_cmd_header_len = ES581_4_URB_CMD_HEADER_LEN,
+	.rx_urb_max = ES58X_RX_URBS_MAX,
+	.tx_urb_max = ES58X_TX_URBS_MAX
+};
+
+const struct es58x_operators es581_4_ops = {
+	.get_msg_len = es581_4_get_msg_len,
+	.handle_urb_cmd = es581_4_handle_urb_cmd,
+	.fill_urb_header = es581_4_fill_urb_header,
+	.tx_can_msg = es581_4_tx_can_msg,
+	.enable_channel = es581_4_enable_channel,
+	.disable_channel = es581_4_disable_channel,
+	.reset_device = es581_4_reset_device,
+	.get_timestamp = es581_4_get_timestamp
+};
diff --git a/drivers/net/can/usb/etas_es58x/es581_4.h b/drivers/net/can/usb/etas_es58x/es581_4.h
new file mode 100644
index 000000000000..4bc60a6df697
--- /dev/null
+++ b/drivers/net/can/usb/etas_es58x/es581_4.h
@@ -0,0 +1,207 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/* Driver for ETAS GmbH ES58X USB CAN(-FD) Bus Interfaces.
+ *
+ * File es581_4.h: Definitions and declarations specific to ETAS
+ * ES581.4.
+ *
+ * Copyright (c) 2019 Robert Bosch Engineering and Business Solutions. All rights reserved.
+ * Copyright (c) 2020 ETAS K.K.. All rights reserved.
+ * Copyright (c) 2020, 2021 Vincent Mailhol <mailhol.vincent@wanadoo.fr>
+ */
+
+#ifndef __ES581_4_H__
+#define __ES581_4_H__
+
+#include <linux/types.h>
+
+#define ES581_4_NUM_CAN_CH 2
+#define ES581_4_CHANNEL_IDX_OFFSET 1
+
+#define ES581_4_TX_BULK_MAX 25
+#define ES581_4_RX_BULK_MAX 30
+#define ES581_4_ECHO_BULK_MAX 30
+
+enum es581_4_cmd_type {
+	ES581_4_CAN_COMMAND_TYPE = 0x45
+};
+
+enum es581_4_cmd_id {
+	ES581_4_CMD_ID_OPEN_CHANNEL = 0x01,
+	ES581_4_CMD_ID_CLOSE_CHANNEL = 0x02,
+	ES581_4_CMD_ID_SET_BITTIMING = 0x03,
+	ES581_4_CMD_ID_ENABLE_CHANNEL = 0x04,
+	ES581_4_CMD_ID_TX_MSG = 0x05,
+	ES581_4_CMD_ID_RX_MSG = 0x06,
+	ES581_4_CMD_ID_RESET_RX = 0x0A,
+	ES581_4_CMD_ID_RESET_TX = 0x0B,
+	ES581_4_CMD_ID_DISABLE_CHANNEL = 0x0C,
+	ES581_4_CMD_ID_TIMESTAMP = 0x0E,
+	ES581_4_CMD_ID_RESET_DEVICE = 0x28,
+	ES581_4_CMD_ID_ECHO = 0x71,
+	ES581_4_CMD_ID_DEVICE_ERR = 0x72
+};
+
+enum es581_4_rx_type {
+	ES581_4_RX_TYPE_MESSAGE = 1,
+	ES581_4_RX_TYPE_ERROR = 3,
+	ES581_4_RX_TYPE_EVENT = 4
+};
+
+/**
+ * struct es581_4_tx_conf_msg - Channel configuration.
+ * @bitrate: Bitrate.
+ * @sample_point: Sample point is in percent [0..100].
+ * @samples_per_bit: type enum es58x_samples_per_bit.
+ * @bit_time: Number of time quanta in one bit.
+ * @sjw: Synchronization Jump Width.
+ * @sync_edge: type enum es58x_sync_edge.
+ * @physical_layer: type enum es58x_physical_layer.
+ * @echo_mode: type enum es58x_echo_mode.
+ * @channel_no: Channel number, starting from 1. Not to be confused
+ *	with channed_idx of the ES58X FD which starts from 0.
+ */
+struct es581_4_tx_conf_msg {
+	__le32 bitrate;
+	__le32 sample_point;
+	__le32 samples_per_bit;
+	__le32 bit_time;
+	__le32 sjw;
+	__le32 sync_edge;
+	__le32 physical_layer;
+	__le32 echo_mode;
+	u8 channel_no;
+} __packed;
+
+struct es581_4_tx_can_msg {
+	__le32 can_id;
+	__le32 packet_idx;
+	__le16 flags;
+	u8 channel_no;
+	u8 dlc;
+	u8 data[CAN_MAX_DLEN];
+} __packed;
+
+/* The ES581.4 allows bulk transfer.  */
+struct es581_4_bulk_tx_can_msg {
+	u8 num_can_msg;
+	/* Using type "u8[]" instead of "struct es581_4_tx_can_msg[]"
+	 * for tx_msg_buf because each member has a flexible size.
+	 */
+	u8 tx_can_msg_buf[ES581_4_TX_BULK_MAX *
+			  sizeof(struct es581_4_tx_can_msg)];
+} __packed;
+
+struct es581_4_echo_msg {
+	__le64 timestamp;
+	__le32 packet_idx;
+} __packed;
+
+struct es581_4_bulk_echo_msg {
+	u8 channel_no;
+	struct es581_4_echo_msg echo_msg[ES581_4_ECHO_BULK_MAX];
+} __packed;
+
+/* Normal Rx CAN Message */
+struct es581_4_rx_can_msg {
+	__le64 timestamp;
+	u8 rx_type;		/* type enum es581_4_rx_type */
+	u8 flags;		/* type enum es58x_flag */
+	u8 channel_no;
+	u8 dlc;
+	__le32 can_id;
+	u8 data[CAN_MAX_DLEN];
+} __packed;
+
+struct es581_4_rx_err_msg {
+	__le64 timestamp;
+	__le16 rx_type;		/* type enum es581_4_rx_type */
+	__le16 flags;		/* type enum es58x_flag */
+	u8 channel_no;
+	u8 __padding[2];
+	u8 dlc;
+	__le32 tag;		/* Related to the CAN filtering. Unused in this module */
+	__le32 can_id;
+	__le32 error;		/* type enum es58x_error */
+	__le32 destination;	/* Unused in this module */
+} __packed;
+
+struct es581_4_rx_event_msg {
+	__le64 timestamp;
+	__le16 rx_type;		/* type enum es581_4_rx_type */
+	u8 channel_no;
+	u8 __padding;
+	__le32 tag;		/* Related to the CAN filtering. Unused in this module */
+	__le32 event;		/* type enum es58x_event */
+	__le32 destination;	/* Unused in this module */
+} __packed;
+
+struct es581_4_tx_ack_msg {
+	__le16 tx_free_entries;	/* Number of remaining free entries in the device TX queue */
+	u8 channel_no;
+	u8 rx_cmd_ret_u8;	/* type enum es58x_cmd_ret_code_u8 */
+} __packed;
+
+struct es581_4_rx_cmd_ret {
+	__le32 rx_cmd_ret_le32;
+	u8 channel_no;
+	u8 __padding[3];
+} __packed;
+
+/**
+ * struct es581_4_urb_cmd - Commands received from or sent to the
+ *	ES581.4 device.
+ * @SOF: Start of Frame.
+ * @cmd_type: Command Type (type: enum es581_4_cmd_type). The CRC
+ *	calculation starts at this position.
+ * @cmd_id: Command ID (type: enum es581_4_cmd_id).
+ * @msg_len: Length of the message, excluding CRC (i.e. length of the
+ *	union).
+ * @tx_conf_msg: Channel configuration.
+ * @bulk_tx_can_msg: Tx messages.
+ * @rx_can_msg: Array of Rx messages.
+ * @bulk_echo_msg: Tx message being looped back.
+ * @rx_err_msg: Error message.
+ * @rx_event_msg: Event message.
+ * @tx_ack_msg: Tx acknowledgment message.
+ * @rx_cmd_ret: Command return code.
+ * @timestamp: Timestamp reply.
+ * @rx_cmd_ret_u8: Rx 8 bits return code (type: enum
+ *	es58x_cmd_ret_code_u8).
+ * @raw_msg: Message raw payload.
+ * @reserved_for_crc16_do_not_use: The structure ends with a
+ *	CRC16. Because the structures in above union are of variable
+ *	lengths, we can not predict the offset of the CRC in
+ *	advance. Use functions es58x_get_crc() and es58x_set_crc() to
+ *	manipulate it.
+ */
+struct es581_4_urb_cmd {
+	__le16 SOF;
+	u8 cmd_type;
+	u8 cmd_id;
+	__le16 msg_len;
+
+	union {
+		struct es581_4_tx_conf_msg tx_conf_msg;
+		struct es581_4_bulk_tx_can_msg bulk_tx_can_msg;
+		struct es581_4_rx_can_msg rx_can_msg[ES581_4_RX_BULK_MAX];
+		struct es581_4_bulk_echo_msg bulk_echo_msg;
+		struct es581_4_rx_err_msg rx_err_msg;
+		struct es581_4_rx_event_msg rx_event_msg;
+		struct es581_4_tx_ack_msg tx_ack_msg;
+		struct es581_4_rx_cmd_ret rx_cmd_ret;
+		__le64 timestamp;
+		u8 rx_cmd_ret_u8;
+		u8 raw_msg[0];
+	} __packed;
+
+	__le16 reserved_for_crc16_do_not_use;
+} __packed;
+
+#define ES581_4_URB_CMD_HEADER_LEN (offsetof(struct es581_4_urb_cmd, raw_msg))
+#define ES581_4_TX_URB_CMD_MAX_LEN					\
+	ES58X_SIZEOF_URB_CMD(struct es581_4_urb_cmd, bulk_tx_can_msg)
+#define ES581_4_RX_URB_CMD_MAX_LEN					\
+	ES58X_SIZEOF_URB_CMD(struct es581_4_urb_cmd, rx_can_msg)
+
+#endif /* __ES581_4_H__ */
diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.c b/drivers/net/can/usb/etas_es58x/es58x_core.c
index da7f9d1d1162..2d79d4e7e0b2 100644
--- a/drivers/net/can/usb/etas_es58x/es58x_core.c
+++ b/drivers/net/can/usb/etas_es58x/es58x_core.c
@@ -27,10 +27,15 @@ MODULE_LICENSE("GPL v2");
 
 #define ES58X_MODULE_NAME "etas_es58x"
 #define ES58X_VENDOR_ID 0x108C
+#define ES581_4_PRODUCT_ID 0x0159
 
 /* Table of devices which work with this driver. */
 static const struct usb_device_id es58x_id_table[] = {
 	{
+		/* ETAS GmbH ES581.4 USB dual-channel CAN Bus Interface module. */
+		USB_DEVICE(ES58X_VENDOR_ID, ES581_4_PRODUCT_ID),
+		.driver_info = ES58X_DUAL_CHANNEL
+	}, {
 		/* Terminating entry */
 	}
 };
@@ -2162,8 +2167,8 @@ static int es58x_init_es58x_dev(struct usb_interface *intf,
 		return -ENODEV;
 		/* Place holder for es58x_fd glue code. */
 	} else {
-		return -ENODEV;
-		/* Place holder for es581_4 glue code. */
+		param = &es581_4_param;
+		ops = &es581_4_ops;
 	}
 
 	es58x_dev = kzalloc(es58x_sizeof_es58x_device(param), GFP_KERNEL);
diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.h b/drivers/net/can/usb/etas_es58x/es58x_core.h
index ccced39d5d81..0e7a7dcc48d4 100644
--- a/drivers/net/can/usb/etas_es58x/es58x_core.h
+++ b/drivers/net/can/usb/etas_es58x/es58x_core.h
@@ -18,11 +18,13 @@
 #include <linux/can.h>
 #include <linux/can/dev.h>
 
+#include "es581_4.h"
+
 /* Driver constants */
 #define ES58X_RX_URBS_MAX 5	/* Empirical value */
 #define ES58X_TX_URBS_MAX 6	/* Empirical value */
 
-#define ES58X_MAX(param) 0
+#define ES58X_MAX(param) (ES581_4_##param)
 #define ES58X_TX_BULK_MAX ES58X_MAX(TX_BULK_MAX)
 #define ES58X_RX_BULK_MAX ES58X_MAX(RX_BULK_MAX)
 #define ES58X_ECHO_BULK_MAX ES58X_MAX(ECHO_BULK_MAX)
@@ -210,6 +212,7 @@ enum es58x_ret_type {
 };
 
 union es58x_urb_cmd {
+	struct es581_4_urb_cmd es581_4_urb_cmd;
 	struct {		/* Common header parts of all variants */
 		__le16 sof;
 		u8 cmd_type;
-- 
cgit v1.2.3


From c664e2137a27680922d8aeb64fb10313416b254f Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Sat, 10 Apr 2021 18:59:48 +0900
Subject: can: etas_es58x: add support for the ETAS ES58X_FD CAN USB interfaces

This patch add support for the ES582.1 and ES584.1 interfaces from
ETAS GmbH (https://www.etas.com/en/products/es58x.php).

Link: https://lore.kernel.org/r/20210410095948.233305-4-mailhol.vincent@wanadoo.fr
Co-developed-by: Arunachalam Santhanam <arunachalam.santhanam@in.bosch.com>
Signed-off-by: Arunachalam Santhanam <arunachalam.santhanam@in.bosch.com>
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/etas_es58x/Makefile     |   2 +-
 drivers/net/can/usb/etas_es58x/es58x_core.c |  19 +-
 drivers/net/can/usb/etas_es58x/es58x_core.h |   6 +-
 drivers/net/can/usb/etas_es58x/es58x_fd.c   | 562 ++++++++++++++++++++++++++++
 drivers/net/can/usb/etas_es58x/es58x_fd.h   | 243 ++++++++++++
 5 files changed, 828 insertions(+), 4 deletions(-)
 create mode 100644 drivers/net/can/usb/etas_es58x/es58x_fd.c
 create mode 100644 drivers/net/can/usb/etas_es58x/es58x_fd.h

diff --git a/drivers/net/can/usb/etas_es58x/Makefile b/drivers/net/can/usb/etas_es58x/Makefile
index e4753ec9cb60..a129b4aa0215 100644
--- a/drivers/net/can/usb/etas_es58x/Makefile
+++ b/drivers/net/can/usb/etas_es58x/Makefile
@@ -1,3 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_CAN_ETAS_ES58X) += etas_es58x.o
-etas_es58x-y = es58x_core.o es581_4.o
+etas_es58x-y = es58x_core.o es581_4.o es58x_fd.o
diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.c b/drivers/net/can/usb/etas_es58x/es58x_core.c
index 2d79d4e7e0b2..7222b3b6ca46 100644
--- a/drivers/net/can/usb/etas_es58x/es58x_core.c
+++ b/drivers/net/can/usb/etas_es58x/es58x_core.c
@@ -28,6 +28,11 @@ MODULE_LICENSE("GPL v2");
 #define ES58X_MODULE_NAME "etas_es58x"
 #define ES58X_VENDOR_ID 0x108C
 #define ES581_4_PRODUCT_ID 0x0159
+#define ES582_1_PRODUCT_ID 0x0168
+#define ES584_1_PRODUCT_ID 0x0169
+
+/* ES58X FD has some interface protocols unsupported by this driver. */
+#define ES58X_FD_INTERFACE_PROTOCOL 0
 
 /* Table of devices which work with this driver. */
 static const struct usb_device_id es58x_id_table[] = {
@@ -35,6 +40,16 @@ static const struct usb_device_id es58x_id_table[] = {
 		/* ETAS GmbH ES581.4 USB dual-channel CAN Bus Interface module. */
 		USB_DEVICE(ES58X_VENDOR_ID, ES581_4_PRODUCT_ID),
 		.driver_info = ES58X_DUAL_CHANNEL
+	}, {
+		/* ETAS GmbH ES582.1 USB dual-channel CAN FD Bus Interface module. */
+		USB_DEVICE_INTERFACE_PROTOCOL(ES58X_VENDOR_ID, ES582_1_PRODUCT_ID,
+					      ES58X_FD_INTERFACE_PROTOCOL),
+		.driver_info = ES58X_DUAL_CHANNEL | ES58X_FD_FAMILY
+	}, {
+		/* ETAS GmbH ES584.1 USB single-channel CAN FD Bus Interface module. */
+		USB_DEVICE_INTERFACE_PROTOCOL(ES58X_VENDOR_ID, ES584_1_PRODUCT_ID,
+					      ES58X_FD_INTERFACE_PROTOCOL),
+		.driver_info = ES58X_FD_FAMILY
 	}, {
 		/* Terminating entry */
 	}
@@ -2164,8 +2179,8 @@ static int es58x_init_es58x_dev(struct usb_interface *intf,
 		return ret;
 
 	if (driver_info & ES58X_FD_FAMILY) {
-		return -ENODEV;
-		/* Place holder for es58x_fd glue code. */
+		param = &es58x_fd_param;
+		ops = &es58x_fd_ops;
 	} else {
 		param = &es581_4_param;
 		ops = &es581_4_ops;
diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.h b/drivers/net/can/usb/etas_es58x/es58x_core.h
index 0e7a7dcc48d4..5f4e7dc5be35 100644
--- a/drivers/net/can/usb/etas_es58x/es58x_core.h
+++ b/drivers/net/can/usb/etas_es58x/es58x_core.h
@@ -19,12 +19,15 @@
 #include <linux/can/dev.h>
 
 #include "es581_4.h"
+#include "es58x_fd.h"
 
 /* Driver constants */
 #define ES58X_RX_URBS_MAX 5	/* Empirical value */
 #define ES58X_TX_URBS_MAX 6	/* Empirical value */
 
-#define ES58X_MAX(param) (ES581_4_##param)
+#define ES58X_MAX(param)				\
+	(ES581_4_##param > ES58X_FD_##param ?		\
+		ES581_4_##param : ES58X_FD_##param)
 #define ES58X_TX_BULK_MAX ES58X_MAX(TX_BULK_MAX)
 #define ES58X_RX_BULK_MAX ES58X_MAX(RX_BULK_MAX)
 #define ES58X_ECHO_BULK_MAX ES58X_MAX(ECHO_BULK_MAX)
@@ -213,6 +216,7 @@ enum es58x_ret_type {
 
 union es58x_urb_cmd {
 	struct es581_4_urb_cmd es581_4_urb_cmd;
+	struct es58x_fd_urb_cmd es58x_fd_urb_cmd;
 	struct {		/* Common header parts of all variants */
 		__le16 sof;
 		u8 cmd_type;
diff --git a/drivers/net/can/usb/etas_es58x/es58x_fd.c b/drivers/net/can/usb/etas_es58x/es58x_fd.c
new file mode 100644
index 000000000000..1a2779d383a4
--- /dev/null
+++ b/drivers/net/can/usb/etas_es58x/es58x_fd.c
@@ -0,0 +1,562 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Driver for ETAS GmbH ES58X USB CAN(-FD) Bus Interfaces.
+ *
+ * File es58x_fd.c: Adds support to ETAS ES582.1 and ES584.1 (naming
+ * convention: we use the term "ES58X FD" when referring to those two
+ * variants together).
+ *
+ * Copyright (c) 2019 Robert Bosch Engineering and Business Solutions. All rights reserved.
+ * Copyright (c) 2020 ETAS K.K.. All rights reserved.
+ * Copyright (c) 2020, 2021 Vincent Mailhol <mailhol.vincent@wanadoo.fr>
+ */
+
+#include <linux/kernel.h>
+#include <asm/unaligned.h>
+
+#include "es58x_core.h"
+#include "es58x_fd.h"
+
+/**
+ * es58x_fd_sizeof_rx_tx_msg() - Calculate the actual length of the
+ *	structure of a rx or tx message.
+ * @msg: message of variable length, must have a dlc and a len fields.
+ *
+ * Even if RTR frames have actually no payload, the ES58X devices
+ * still expect it. Must be a macro in order to accept several types
+ * (struct es58x_fd_tx_can_msg and struct es58x_fd_rx_can_msg) as an
+ * input.
+ *
+ * Return: length of the message.
+ */
+#define es58x_fd_sizeof_rx_tx_msg(msg)					\
+({									\
+	typeof(msg) __msg = (msg);					\
+	size_t __msg_len;						\
+									\
+	if (__msg.flags & ES58X_FLAG_FD_DATA)				\
+		__msg_len = canfd_sanitize_len(__msg.len);		\
+	else								\
+		__msg_len = can_cc_dlc2len(__msg.dlc);			\
+									\
+	offsetof(typeof(__msg), data[__msg_len]);			\
+})
+
+static enum es58x_fd_cmd_type es58x_fd_cmd_type(struct net_device *netdev)
+{
+	u32 ctrlmode = es58x_priv(netdev)->can.ctrlmode;
+
+	if (ctrlmode & (CAN_CTRLMODE_FD | CAN_CTRLMODE_FD_NON_ISO))
+		return ES58X_FD_CMD_TYPE_CANFD;
+	else
+		return ES58X_FD_CMD_TYPE_CAN;
+}
+
+static u16 es58x_fd_get_msg_len(const union es58x_urb_cmd *urb_cmd)
+{
+	return get_unaligned_le16(&urb_cmd->es58x_fd_urb_cmd.msg_len);
+}
+
+static int es58x_fd_echo_msg(struct net_device *netdev,
+			     const struct es58x_fd_urb_cmd *es58x_fd_urb_cmd)
+{
+	struct es58x_priv *priv = es58x_priv(netdev);
+	const struct es58x_fd_echo_msg *echo_msg;
+	struct es58x_device *es58x_dev = priv->es58x_dev;
+	u64 *tstamps = es58x_dev->timestamps;
+	u16 msg_len = get_unaligned_le16(&es58x_fd_urb_cmd->msg_len);
+	int i, num_element;
+	u32 rcv_packet_idx;
+
+	const u32 mask = GENMASK(31, sizeof(echo_msg->packet_idx) * 8);
+
+	num_element = es58x_msg_num_element(es58x_dev->dev,
+					    es58x_fd_urb_cmd->echo_msg,
+					    msg_len);
+	if (num_element < 0)
+		return num_element;
+	echo_msg = es58x_fd_urb_cmd->echo_msg;
+
+	rcv_packet_idx = (priv->tx_tail & mask) | echo_msg[0].packet_idx;
+	for (i = 0; i < num_element; i++) {
+		if ((u8)rcv_packet_idx != echo_msg[i].packet_idx) {
+			netdev_err(netdev, "Packet idx jumped from %u to %u\n",
+				   (u8)rcv_packet_idx - 1,
+				   echo_msg[i].packet_idx);
+			return -EBADMSG;
+		}
+
+		tstamps[i] = get_unaligned_le64(&echo_msg[i].timestamp);
+		rcv_packet_idx++;
+	}
+
+	return es58x_can_get_echo_skb(netdev, priv->tx_tail, tstamps, num_element);
+}
+
+static int es58x_fd_rx_can_msg(struct net_device *netdev,
+			       const struct es58x_fd_urb_cmd *es58x_fd_urb_cmd)
+{
+	struct es58x_device *es58x_dev = es58x_priv(netdev)->es58x_dev;
+	const u8 *rx_can_msg_buf = es58x_fd_urb_cmd->rx_can_msg_buf;
+	u16 rx_can_msg_buf_len = get_unaligned_le16(&es58x_fd_urb_cmd->msg_len);
+	int pkts, ret;
+
+	ret = es58x_check_msg_max_len(es58x_dev->dev,
+				      es58x_fd_urb_cmd->rx_can_msg_buf,
+				      rx_can_msg_buf_len);
+	if (ret)
+		return ret;
+
+	for (pkts = 0; rx_can_msg_buf_len > 0; pkts++) {
+		const struct es58x_fd_rx_can_msg *rx_can_msg =
+		    (const struct es58x_fd_rx_can_msg *)rx_can_msg_buf;
+		bool is_can_fd = !!(rx_can_msg->flags & ES58X_FLAG_FD_DATA);
+		/* rx_can_msg_len is the length of the rx_can_msg
+		 * buffer. Not to be confused with rx_can_msg->len
+		 * which is the length of the CAN payload
+		 * rx_can_msg->data.
+		 */
+		u16 rx_can_msg_len = es58x_fd_sizeof_rx_tx_msg(*rx_can_msg);
+
+		if (rx_can_msg_len > rx_can_msg_buf_len) {
+			netdev_err(netdev,
+				   "%s: Expected a rx_can_msg of size %d but only %d bytes are left in rx_can_msg_buf\n",
+				   __func__,
+				   rx_can_msg_len, rx_can_msg_buf_len);
+			return -EMSGSIZE;
+		}
+		if (rx_can_msg->len > CANFD_MAX_DLEN) {
+			netdev_err(netdev,
+				   "%s: Data length is %d but maximum should be %d\n",
+				   __func__, rx_can_msg->len, CANFD_MAX_DLEN);
+			return -EMSGSIZE;
+		}
+
+		if (netif_running(netdev)) {
+			u64 tstamp = get_unaligned_le64(&rx_can_msg->timestamp);
+			canid_t can_id = get_unaligned_le32(&rx_can_msg->can_id);
+			u8 dlc;
+
+			if (is_can_fd)
+				dlc = can_fd_len2dlc(rx_can_msg->len);
+			else
+				dlc = rx_can_msg->dlc;
+
+			ret = es58x_rx_can_msg(netdev, tstamp, rx_can_msg->data,
+					       can_id, rx_can_msg->flags, dlc);
+			if (ret)
+				break;
+		}
+
+		rx_can_msg_buf_len -= rx_can_msg_len;
+		rx_can_msg_buf += rx_can_msg_len;
+	}
+
+	if (!netif_running(netdev)) {
+		if (net_ratelimit())
+			netdev_info(netdev,
+				    "%s: %s is down, dropping %d rx packets\n",
+				    __func__, netdev->name, pkts);
+		netdev->stats.rx_dropped += pkts;
+	}
+
+	return ret;
+}
+
+static int es58x_fd_rx_event_msg(struct net_device *netdev,
+				 const struct es58x_fd_urb_cmd *es58x_fd_urb_cmd)
+{
+	struct es58x_device *es58x_dev = es58x_priv(netdev)->es58x_dev;
+	u16 msg_len = get_unaligned_le16(&es58x_fd_urb_cmd->msg_len);
+	const struct es58x_fd_rx_event_msg *rx_event_msg;
+	int ret;
+
+	ret = es58x_check_msg_len(es58x_dev->dev, *rx_event_msg, msg_len);
+	if (ret)
+		return ret;
+
+	rx_event_msg = &es58x_fd_urb_cmd->rx_event_msg;
+
+	return es58x_rx_err_msg(netdev, rx_event_msg->error_code,
+				rx_event_msg->event_code,
+				get_unaligned_le64(&rx_event_msg->timestamp));
+}
+
+static int es58x_fd_rx_cmd_ret_u32(struct net_device *netdev,
+				   const struct es58x_fd_urb_cmd *es58x_fd_urb_cmd,
+				   enum es58x_ret_type cmd_ret_type)
+{
+	struct es58x_device *es58x_dev = es58x_priv(netdev)->es58x_dev;
+	u16 msg_len = get_unaligned_le16(&es58x_fd_urb_cmd->msg_len);
+	int ret;
+
+	ret = es58x_check_msg_len(es58x_dev->dev,
+				  es58x_fd_urb_cmd->rx_cmd_ret_le32, msg_len);
+	if (ret)
+		return ret;
+
+	return es58x_rx_cmd_ret_u32(netdev, cmd_ret_type,
+				    get_unaligned_le32(&es58x_fd_urb_cmd->rx_cmd_ret_le32));
+}
+
+static int es58x_fd_tx_ack_msg(struct net_device *netdev,
+			       const struct es58x_fd_urb_cmd *es58x_fd_urb_cmd)
+{
+	struct es58x_device *es58x_dev = es58x_priv(netdev)->es58x_dev;
+	const struct es58x_fd_tx_ack_msg *tx_ack_msg;
+	u16 msg_len = get_unaligned_le16(&es58x_fd_urb_cmd->msg_len);
+	int ret;
+
+	tx_ack_msg = &es58x_fd_urb_cmd->tx_ack_msg;
+	ret = es58x_check_msg_len(es58x_dev->dev, *tx_ack_msg, msg_len);
+	if (ret)
+		return ret;
+
+	return es58x_tx_ack_msg(netdev,
+				get_unaligned_le16(&tx_ack_msg->tx_free_entries),
+				get_unaligned_le32(&tx_ack_msg->rx_cmd_ret_le32));
+}
+
+static int es58x_fd_can_cmd_id(struct es58x_device *es58x_dev,
+			       const struct es58x_fd_urb_cmd *es58x_fd_urb_cmd)
+{
+	struct net_device *netdev;
+	int ret;
+
+	ret = es58x_get_netdev(es58x_dev, es58x_fd_urb_cmd->channel_idx,
+			       ES58X_FD_CHANNEL_IDX_OFFSET, &netdev);
+	if (ret)
+		return ret;
+
+	switch ((enum es58x_fd_can_cmd_id)es58x_fd_urb_cmd->cmd_id) {
+	case ES58X_FD_CAN_CMD_ID_ENABLE_CHANNEL:
+		return es58x_fd_rx_cmd_ret_u32(netdev, es58x_fd_urb_cmd,
+					       ES58X_RET_TYPE_ENABLE_CHANNEL);
+
+	case ES58X_FD_CAN_CMD_ID_DISABLE_CHANNEL:
+		return es58x_fd_rx_cmd_ret_u32(netdev, es58x_fd_urb_cmd,
+					       ES58X_RET_TYPE_DISABLE_CHANNEL);
+
+	case ES58X_FD_CAN_CMD_ID_TX_MSG:
+		return es58x_fd_tx_ack_msg(netdev, es58x_fd_urb_cmd);
+
+	case ES58X_FD_CAN_CMD_ID_ECHO_MSG:
+		return es58x_fd_echo_msg(netdev, es58x_fd_urb_cmd);
+
+	case ES58X_FD_CAN_CMD_ID_RX_MSG:
+		return es58x_fd_rx_can_msg(netdev, es58x_fd_urb_cmd);
+
+	case ES58X_FD_CAN_CMD_ID_RESET_RX:
+		return es58x_fd_rx_cmd_ret_u32(netdev, es58x_fd_urb_cmd,
+					       ES58X_RET_TYPE_RESET_RX);
+
+	case ES58X_FD_CAN_CMD_ID_RESET_TX:
+		return es58x_fd_rx_cmd_ret_u32(netdev, es58x_fd_urb_cmd,
+					       ES58X_RET_TYPE_RESET_TX);
+
+	case ES58X_FD_CAN_CMD_ID_ERROR_OR_EVENT_MSG:
+		return es58x_fd_rx_event_msg(netdev, es58x_fd_urb_cmd);
+
+	default:
+		return -EBADRQC;
+	}
+}
+
+static int es58x_fd_device_cmd_id(struct es58x_device *es58x_dev,
+				  const struct es58x_fd_urb_cmd *es58x_fd_urb_cmd)
+{
+	u16 msg_len = get_unaligned_le16(&es58x_fd_urb_cmd->msg_len);
+	int ret;
+
+	switch ((enum es58x_fd_dev_cmd_id)es58x_fd_urb_cmd->cmd_id) {
+	case ES58X_FD_DEV_CMD_ID_TIMESTAMP:
+		ret = es58x_check_msg_len(es58x_dev->dev,
+					  es58x_fd_urb_cmd->timestamp, msg_len);
+		if (ret)
+			return ret;
+		es58x_rx_timestamp(es58x_dev,
+				   get_unaligned_le64(&es58x_fd_urb_cmd->timestamp));
+		return 0;
+
+	default:
+		return -EBADRQC;
+	}
+}
+
+static int es58x_fd_handle_urb_cmd(struct es58x_device *es58x_dev,
+				   const union es58x_urb_cmd *urb_cmd)
+{
+	const struct es58x_fd_urb_cmd *es58x_fd_urb_cmd;
+	int ret;
+
+	es58x_fd_urb_cmd = &urb_cmd->es58x_fd_urb_cmd;
+
+	switch ((enum es58x_fd_cmd_type)es58x_fd_urb_cmd->cmd_type) {
+	case ES58X_FD_CMD_TYPE_CAN:
+	case ES58X_FD_CMD_TYPE_CANFD:
+		ret = es58x_fd_can_cmd_id(es58x_dev, es58x_fd_urb_cmd);
+		break;
+
+	case ES58X_FD_CMD_TYPE_DEVICE:
+		ret = es58x_fd_device_cmd_id(es58x_dev, es58x_fd_urb_cmd);
+		break;
+
+	default:
+		ret = -EBADRQC;
+		break;
+	}
+
+	if (ret == -EBADRQC)
+		dev_err(es58x_dev->dev,
+			"%s: Unknown command type (0x%02X) and command ID (0x%02X) combination\n",
+			__func__, es58x_fd_urb_cmd->cmd_type,
+			es58x_fd_urb_cmd->cmd_id);
+
+	return ret;
+}
+
+static void es58x_fd_fill_urb_header(union es58x_urb_cmd *urb_cmd, u8 cmd_type,
+				     u8 cmd_id, u8 channel_idx, u16 msg_len)
+{
+	struct es58x_fd_urb_cmd *es58x_fd_urb_cmd = &urb_cmd->es58x_fd_urb_cmd;
+
+	es58x_fd_urb_cmd->SOF = cpu_to_le16(es58x_fd_param.tx_start_of_frame);
+	es58x_fd_urb_cmd->cmd_type = cmd_type;
+	es58x_fd_urb_cmd->cmd_id = cmd_id;
+	es58x_fd_urb_cmd->channel_idx = channel_idx;
+	es58x_fd_urb_cmd->msg_len = cpu_to_le16(msg_len);
+}
+
+static int es58x_fd_tx_can_msg(struct es58x_priv *priv,
+			       const struct sk_buff *skb)
+{
+	struct es58x_device *es58x_dev = priv->es58x_dev;
+	union es58x_urb_cmd *urb_cmd = priv->tx_urb->transfer_buffer;
+	struct es58x_fd_urb_cmd *es58x_fd_urb_cmd = &urb_cmd->es58x_fd_urb_cmd;
+	struct can_frame *cf = (struct can_frame *)skb->data;
+	struct es58x_fd_tx_can_msg *tx_can_msg;
+	bool is_fd = can_is_canfd_skb(skb);
+	u16 msg_len;
+	int ret;
+
+	if (priv->tx_can_msg_cnt == 0) {
+		msg_len = 0;
+		es58x_fd_fill_urb_header(urb_cmd,
+					 is_fd ? ES58X_FD_CMD_TYPE_CANFD
+					       : ES58X_FD_CMD_TYPE_CAN,
+					 ES58X_FD_CAN_CMD_ID_TX_MSG_NO_ACK,
+					 priv->channel_idx, msg_len);
+	} else {
+		msg_len = es58x_fd_get_msg_len(urb_cmd);
+	}
+
+	ret = es58x_check_msg_max_len(es58x_dev->dev,
+				      es58x_fd_urb_cmd->tx_can_msg_buf,
+				      msg_len + sizeof(*tx_can_msg));
+	if (ret)
+		return ret;
+
+	/* Fill message contents. */
+	tx_can_msg = (struct es58x_fd_tx_can_msg *)
+	    &es58x_fd_urb_cmd->tx_can_msg_buf[msg_len];
+	tx_can_msg->packet_idx = (u8)priv->tx_head;
+	put_unaligned_le32(es58x_get_raw_can_id(cf), &tx_can_msg->can_id);
+	tx_can_msg->flags = (u8)es58x_get_flags(skb);
+	if (is_fd)
+		tx_can_msg->len = cf->len;
+	else
+		tx_can_msg->dlc = can_get_cc_dlc(cf, priv->can.ctrlmode);
+	memcpy(tx_can_msg->data, cf->data, cf->len);
+
+	/* Calculate new sizes */
+	msg_len += es58x_fd_sizeof_rx_tx_msg(*tx_can_msg);
+	priv->tx_urb->transfer_buffer_length = es58x_get_urb_cmd_len(es58x_dev,
+								     msg_len);
+	put_unaligned_le16(msg_len, &es58x_fd_urb_cmd->msg_len);
+
+	return 0;
+}
+
+static void es58x_fd_convert_bittiming(struct es58x_fd_bittiming *es58x_fd_bt,
+				       struct can_bittiming *bt)
+{
+	/* The actual value set in the hardware registers is one less
+	 * than the functional value.
+	 */
+	const int offset = 1;
+
+	es58x_fd_bt->bitrate = cpu_to_le32(bt->bitrate);
+	es58x_fd_bt->tseg1 =
+	    cpu_to_le16(bt->prop_seg + bt->phase_seg1 - offset);
+	es58x_fd_bt->tseg2 = cpu_to_le16(bt->phase_seg2 - offset);
+	es58x_fd_bt->brp = cpu_to_le16(bt->brp - offset);
+	es58x_fd_bt->sjw = cpu_to_le16(bt->sjw - offset);
+}
+
+static int es58x_fd_enable_channel(struct es58x_priv *priv)
+{
+	struct es58x_device *es58x_dev = priv->es58x_dev;
+	struct net_device *netdev = es58x_dev->netdev[priv->channel_idx];
+	struct es58x_fd_tx_conf_msg tx_conf_msg = { 0 };
+	u32 ctrlmode;
+	size_t conf_len = 0;
+
+	es58x_fd_convert_bittiming(&tx_conf_msg.nominal_bittiming,
+				   &priv->can.bittiming);
+	ctrlmode = priv->can.ctrlmode;
+
+	if (ctrlmode & CAN_CTRLMODE_3_SAMPLES)
+		tx_conf_msg.samples_per_bit = ES58X_SAMPLES_PER_BIT_THREE;
+	else
+		tx_conf_msg.samples_per_bit = ES58X_SAMPLES_PER_BIT_ONE;
+	tx_conf_msg.sync_edge = ES58X_SYNC_EDGE_SINGLE;
+	tx_conf_msg.physical_layer = ES58X_PHYSICAL_LAYER_HIGH_SPEED;
+	tx_conf_msg.echo_mode = ES58X_ECHO_ON;
+	if (ctrlmode & CAN_CTRLMODE_LISTENONLY)
+		tx_conf_msg.ctrlmode |= ES58X_FD_CTRLMODE_PASSIVE;
+	else
+		tx_conf_msg.ctrlmode |=  ES58X_FD_CTRLMODE_ACTIVE;
+
+	if (ctrlmode & CAN_CTRLMODE_FD_NON_ISO) {
+		tx_conf_msg.ctrlmode |= ES58X_FD_CTRLMODE_FD_NON_ISO;
+		tx_conf_msg.canfd_enabled = 1;
+	} else if (ctrlmode & CAN_CTRLMODE_FD) {
+		tx_conf_msg.ctrlmode |= ES58X_FD_CTRLMODE_FD;
+		tx_conf_msg.canfd_enabled = 1;
+	}
+
+	if (tx_conf_msg.canfd_enabled) {
+		es58x_fd_convert_bittiming(&tx_conf_msg.data_bittiming,
+					   &priv->can.data_bittiming);
+
+		if (priv->can.tdc.tdco) {
+			tx_conf_msg.tdc_enabled = 1;
+			tx_conf_msg.tdco = cpu_to_le16(priv->can.tdc.tdco);
+			tx_conf_msg.tdcf = cpu_to_le16(priv->can.tdc.tdcf);
+		}
+
+		conf_len = ES58X_FD_CANFD_CONF_LEN;
+	} else {
+		conf_len = ES58X_FD_CAN_CONF_LEN;
+	}
+
+	return es58x_send_msg(es58x_dev, es58x_fd_cmd_type(netdev),
+			      ES58X_FD_CAN_CMD_ID_ENABLE_CHANNEL,
+			      &tx_conf_msg, conf_len, priv->channel_idx);
+}
+
+static int es58x_fd_disable_channel(struct es58x_priv *priv)
+{
+	/* The type (ES58X_FD_CMD_TYPE_CAN or ES58X_FD_CMD_TYPE_CANFD) does
+	 * not matter here.
+	 */
+	return es58x_send_msg(priv->es58x_dev, ES58X_FD_CMD_TYPE_CAN,
+			      ES58X_FD_CAN_CMD_ID_DISABLE_CHANNEL,
+			      ES58X_EMPTY_MSG, 0, priv->channel_idx);
+}
+
+static int es58x_fd_get_timestamp(struct es58x_device *es58x_dev)
+{
+	return es58x_send_msg(es58x_dev, ES58X_FD_CMD_TYPE_DEVICE,
+			      ES58X_FD_DEV_CMD_ID_TIMESTAMP, ES58X_EMPTY_MSG,
+			      0, ES58X_CHANNEL_IDX_NA);
+}
+
+/* Nominal bittiming constants for ES582.1 and ES584.1 as specified in
+ * the microcontroller datasheet: "SAM E701/S70/V70/V71 Family"
+ * section 49.6.8 "MCAN Nominal Bit Timing and Prescaler Register"
+ * from Microchip.
+ *
+ * The values from the specification are the hardware register
+ * values. To convert them to the functional values, all ranges were
+ * incremented by 1 (e.g. range [0..n-1] changed to [1..n]).
+ */
+static const struct can_bittiming_const es58x_fd_nom_bittiming_const = {
+	.name = "ES582.1/ES584.1",
+	.tseg1_min = 2,
+	.tseg1_max = 256,
+	.tseg2_min = 2,
+	.tseg2_max = 128,
+	.sjw_max = 128,
+	.brp_min = 1,
+	.brp_max = 512,
+	.brp_inc = 1
+};
+
+/* Data bittiming constants for ES582.1 and ES584.1 as specified in
+ * the microcontroller datasheet: "SAM E701/S70/V70/V71 Family"
+ * section 49.6.4 "MCAN Data Bit Timing and Prescaler Register" from
+ * Microchip.
+ */
+static const struct can_bittiming_const es58x_fd_data_bittiming_const = {
+	.name = "ES582.1/ES584.1",
+	.tseg1_min = 2,
+	.tseg1_max = 32,
+	.tseg2_min = 1,
+	.tseg2_max = 16,
+	.sjw_max = 8,
+	.brp_min = 1,
+	.brp_max = 32,
+	.brp_inc = 1
+};
+
+/* Transmission Delay Compensation constants for ES582.1 and ES584.1
+ * as specified in the microcontroller datasheet: "SAM
+ * E701/S70/V70/V71 Family" section 49.6.15 "MCAN Transmitter Delay
+ * Compensation Register" from Microchip.
+ */
+static const struct can_tdc_const es58x_tdc_const = {
+	.tdcv_max = 0, /* Manual mode not supported. */
+	.tdco_max = 127,
+	.tdcf_max = 127
+};
+
+const struct es58x_parameters es58x_fd_param = {
+	.bittiming_const = &es58x_fd_nom_bittiming_const,
+	.data_bittiming_const = &es58x_fd_data_bittiming_const,
+	.tdc_const = &es58x_tdc_const,
+	/* The devices use NXP TJA1044G transievers which guarantee
+	 * the timing for data rates up to 5 Mbps. Bitrates up to 8
+	 * Mbps work in an optimal environment but are not recommended
+	 * for production environment.
+	 */
+	.bitrate_max = 8 * CAN_MBPS,
+	.clock = {.freq = 80 * CAN_MHZ},
+	.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK | CAN_CTRLMODE_LISTENONLY |
+	    CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_FD | CAN_CTRLMODE_FD_NON_ISO |
+	    CAN_CTRLMODE_CC_LEN8_DLC,
+	.tx_start_of_frame = 0xCEFA,	/* FACE in little endian */
+	.rx_start_of_frame = 0xFECA,	/* CAFE in little endian */
+	.tx_urb_cmd_max_len = ES58X_FD_TX_URB_CMD_MAX_LEN,
+	.rx_urb_cmd_max_len = ES58X_FD_RX_URB_CMD_MAX_LEN,
+	/* Size of internal device TX queue is 500.
+	 *
+	 * However, when reaching value around 278, the device's busy
+	 * LED turns on and thus maximum value of 500 is never reached
+	 * in practice. Also, when this value is too high, some error
+	 * on the echo_msg were witnessed when the device is
+	 * recovering from bus off.
+	 *
+	 * For above reasons, a value that would prevent the device
+	 * from becoming busy was chosen. In practice, BQL would
+	 * prevent the value from even getting closer to below
+	 * maximum, so no impact on performance was measured.
+	 */
+	.fifo_mask = 255, /* echo_skb_max = 256 */
+	.dql_min_limit = CAN_FRAME_LEN_MAX * 15, /* Empirical value. */
+	.tx_bulk_max = ES58X_FD_TX_BULK_MAX,
+	.urb_cmd_header_len = ES58X_FD_URB_CMD_HEADER_LEN,
+	.rx_urb_max = ES58X_RX_URBS_MAX,
+	.tx_urb_max = ES58X_TX_URBS_MAX
+};
+
+const struct es58x_operators es58x_fd_ops = {
+	.get_msg_len = es58x_fd_get_msg_len,
+	.handle_urb_cmd = es58x_fd_handle_urb_cmd,
+	.fill_urb_header = es58x_fd_fill_urb_header,
+	.tx_can_msg = es58x_fd_tx_can_msg,
+	.enable_channel = es58x_fd_enable_channel,
+	.disable_channel = es58x_fd_disable_channel,
+	.reset_device = NULL, /* Not implemented in the device firmware. */
+	.get_timestamp = es58x_fd_get_timestamp
+};
diff --git a/drivers/net/can/usb/etas_es58x/es58x_fd.h b/drivers/net/can/usb/etas_es58x/es58x_fd.h
new file mode 100644
index 000000000000..ee18a87e40c0
--- /dev/null
+++ b/drivers/net/can/usb/etas_es58x/es58x_fd.h
@@ -0,0 +1,243 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/* Driver for ETAS GmbH ES58X USB CAN(-FD) Bus Interfaces.
+ *
+ * File es58x_fd.h: Definitions and declarations specific to ETAS
+ * ES582.1 and ES584.1 (naming convention: we use the term "ES58X FD"
+ * when referring to those two variants together).
+ *
+ * Copyright (c) 2019 Robert Bosch Engineering and Business Solutions. All rights reserved.
+ * Copyright (c) 2020 ETAS K.K.. All rights reserved.
+ * Copyright (c) 2020, 2021 Vincent Mailhol <mailhol.vincent@wanadoo.fr>
+ */
+
+#ifndef __ES58X_FD_H__
+#define __ES58X_FD_H__
+
+#include <linux/types.h>
+
+#define ES582_1_NUM_CAN_CH 2
+#define ES584_1_NUM_CAN_CH 1
+#define ES58X_FD_NUM_CAN_CH 2
+#define ES58X_FD_CHANNEL_IDX_OFFSET 0
+
+#define ES58X_FD_TX_BULK_MAX 100
+#define ES58X_FD_RX_BULK_MAX 100
+#define ES58X_FD_ECHO_BULK_MAX 100
+
+enum es58x_fd_cmd_type {
+	ES58X_FD_CMD_TYPE_CAN = 0x03,
+	ES58X_FD_CMD_TYPE_CANFD = 0x04,
+	ES58X_FD_CMD_TYPE_DEVICE = 0xFF
+};
+
+/* Command IDs for ES58X_FD_CMD_TYPE_{CAN,CANFD}. */
+enum es58x_fd_can_cmd_id {
+	ES58X_FD_CAN_CMD_ID_ENABLE_CHANNEL = 0x01,
+	ES58X_FD_CAN_CMD_ID_DISABLE_CHANNEL = 0x02,
+	ES58X_FD_CAN_CMD_ID_TX_MSG = 0x05,
+	ES58X_FD_CAN_CMD_ID_ECHO_MSG = 0x07,
+	ES58X_FD_CAN_CMD_ID_RX_MSG = 0x10,
+	ES58X_FD_CAN_CMD_ID_ERROR_OR_EVENT_MSG = 0x11,
+	ES58X_FD_CAN_CMD_ID_RESET_RX = 0x20,
+	ES58X_FD_CAN_CMD_ID_RESET_TX = 0x21,
+	ES58X_FD_CAN_CMD_ID_TX_MSG_NO_ACK = 0x55
+};
+
+/* Command IDs for ES58X_FD_CMD_TYPE_DEVICE. */
+enum es58x_fd_dev_cmd_id {
+	ES58X_FD_DEV_CMD_ID_GETTIMETICKS = 0x01,
+	ES58X_FD_DEV_CMD_ID_TIMESTAMP = 0x02
+};
+
+/**
+ * enum es58x_fd_ctrlmode - Controller mode.
+ * @ES58X_FD_CTRLMODE_ACTIVE: send and receive messages.
+ * @ES58X_FD_CTRLMODE_PASSIVE: only receive messages (monitor). Do not
+ *	send anything, not even the acknowledgment bit.
+ * @ES58X_FD_CTRLMODE_FD: CAN FD according to ISO11898-1.
+ * @ES58X_FD_CTRLMODE_FD_NON_ISO: follow Bosch CAN FD Specification
+ *	V1.0
+ * @ES58X_FD_CTRLMODE_DISABLE_PROTOCOL_EXCEPTION_HANDLING: How to
+ *	behave when CAN FD reserved bit is monitored as
+ *	dominant. (c.f. ISO 11898-1:2015, section 10.4.2.4 "Control
+ *	field", paragraph "r0 bit"). 0 (not disable = enable): send
+ *	error frame. 1 (disable): goes into bus integration mode
+ *	(c.f. below).
+ * @ES58X_FD_CTRLMODE_EDGE_FILTER_DURING_BUS_INTEGRATION: 0: Edge
+ *	filtering is disabled. 1: Edge filtering is enabled. Two
+ *	consecutive dominant bits required to detect an edge for hard
+ *	synchronization.
+ */
+enum es58x_fd_ctrlmode {
+	ES58X_FD_CTRLMODE_ACTIVE = 0,
+	ES58X_FD_CTRLMODE_PASSIVE = BIT(0),
+	ES58X_FD_CTRLMODE_FD = BIT(4),
+	ES58X_FD_CTRLMODE_FD_NON_ISO = BIT(5),
+	ES58X_FD_CTRLMODE_DISABLE_PROTOCOL_EXCEPTION_HANDLING = BIT(6),
+	ES58X_FD_CTRLMODE_EDGE_FILTER_DURING_BUS_INTEGRATION = BIT(7)
+};
+
+struct es58x_fd_bittiming {
+	__le32 bitrate;
+	__le16 tseg1;		/* range: [tseg1_min-1..tseg1_max-1] */
+	__le16 tseg2;		/* range: [tseg2_min-1..tseg2_max-1] */
+	__le16 brp;		/* range: [brp_min-1..brp_max-1] */
+	__le16 sjw;		/* range: [0..sjw_max-1] */
+} __packed;
+
+/**
+ * struct es58x_fd_tx_conf_msg - Channel configuration.
+ * @nominal_bittiming: Nominal bittiming.
+ * @samples_per_bit: type enum es58x_samples_per_bit.
+ * @sync_edge: type enum es58x_sync_edge.
+ * @physical_layer: type enum es58x_physical_layer.
+ * @echo_mode: type enum es58x_echo_mode.
+ * @ctrlmode: type enum es58x_fd_ctrlmode.
+ * @canfd_enabled: boolean (0: Classical CAN, 1: CAN and/or CANFD).
+ * @data_bittiming: Bittiming for flexible data-rate transmission.
+ * @tdc_enabled: Transmitter Delay Compensation switch (0: disabled,
+ *	1: enabled). On very high bitrates, the delay between when the
+ *	bit is sent and received on the CANTX and CANRX pins of the
+ *	transceiver start to be significant enough for errors to occur
+ *	and thus need to be compensated.
+ * @tdco: Transmitter Delay Compensation Offset. Offset value, in time
+ *	quanta, defining the delay between the start of the bit
+ *	reception on the CANRX pin of the transceiver and the SSP
+ *	(Secondary Sample Point). Valid values: 0 to 127.
+ * @tdcf: Transmitter Delay Compensation Filter window. Defines the
+ *	minimum value for the SSP position, in time quanta. The
+ *	feature is enabled when TDCF is configured to a value greater
+ *	than TDCO. Valid values: 0 to 127.
+ *
+ * Please refer to the microcontroller datasheet: "SAM
+ * E701/S70/V70/V71 Family" section 49 "Controller Area Network
+ * (MCAN)" for additional information.
+ */
+struct es58x_fd_tx_conf_msg {
+	struct es58x_fd_bittiming nominal_bittiming;
+	u8 samples_per_bit;
+	u8 sync_edge;
+	u8 physical_layer;
+	u8 echo_mode;
+	u8 ctrlmode;
+	u8 canfd_enabled;
+	struct es58x_fd_bittiming data_bittiming;
+	u8 tdc_enabled;
+	__le16 tdco;
+	__le16 tdcf;
+} __packed;
+
+#define ES58X_FD_CAN_CONF_LEN					\
+	(offsetof(struct es58x_fd_tx_conf_msg, canfd_enabled))
+#define ES58X_FD_CANFD_CONF_LEN (sizeof(struct es58x_fd_tx_conf_msg))
+
+struct es58x_fd_tx_can_msg {
+	u8 packet_idx;
+	__le32 can_id;
+	u8 flags;
+	union {
+		u8 dlc;		/* Only if cmd_id is ES58X_FD_CMD_TYPE_CAN */
+		u8 len;		/* Only if cmd_id is ES58X_FD_CMD_TYPE_CANFD */
+	} __packed;
+	u8 data[CANFD_MAX_DLEN];
+} __packed;
+
+#define ES58X_FD_CAN_TX_LEN						\
+	(offsetof(struct es58x_fd_tx_can_msg, data[CAN_MAX_DLEN]))
+#define ES58X_FD_CANFD_TX_LEN (sizeof(struct es58x_fd_tx_can_msg))
+
+struct es58x_fd_rx_can_msg {
+	__le64 timestamp;
+	__le32 can_id;
+	u8 flags;
+	union {
+		u8 dlc;		/* Only if cmd_id is ES58X_FD_CMD_TYPE_CAN */
+		u8 len;		/* Only if cmd_id is ES58X_FD_CMD_TYPE_CANFD */
+	} __packed;
+	u8 data[CANFD_MAX_DLEN];
+} __packed;
+
+#define ES58X_FD_CAN_RX_LEN						\
+	(offsetof(struct es58x_fd_rx_can_msg, data[CAN_MAX_DLEN]))
+#define ES58X_FD_CANFD_RX_LEN (sizeof(struct es58x_fd_rx_can_msg))
+
+struct es58x_fd_echo_msg {
+	__le64 timestamp;
+	u8 packet_idx;
+} __packed;
+
+struct es58x_fd_rx_event_msg {
+	__le64 timestamp;
+	__le32 can_id;
+	u8 flags;		/* type enum es58x_flag */
+	u8 error_type;		/* 0: event, 1: error */
+	u8 error_code;
+	u8 event_code;
+} __packed;
+
+struct es58x_fd_tx_ack_msg {
+	__le32 rx_cmd_ret_le32;	/* type enum es58x_cmd_ret_code_u32 */
+	__le16 tx_free_entries;	/* Number of remaining free entries in the device TX queue */
+} __packed;
+
+/**
+ * struct es58x_fd_urb_cmd - Commands received from or sent to the
+ *	ES58X FD device.
+ * @SOF: Start of Frame.
+ * @cmd_type: Command Type (type: enum es58x_fd_cmd_type). The CRC
+ *	calculation starts at this position.
+ * @cmd_id: Command ID (type: enum es58x_fd_cmd_id).
+ * @channel_idx: Channel index starting at 0.
+ * @msg_len: Length of the message, excluding CRC (i.e. length of the
+ *	union).
+ * @tx_conf_msg: Channel configuration.
+ * @tx_can_msg_buf: Concatenation of Tx messages. Type is "u8[]"
+ *	instead of "struct es58x_fd_tx_msg[]" because the structure
+ *	has a flexible size.
+ * @rx_can_msg_buf: Concatenation Rx messages. Type is "u8[]" instead
+ *	of "struct es58x_fd_rx_msg[]" because the structure has a
+ *	flexible size.
+ * @echo_msg: Array of echo messages (e.g. Tx messages being looped
+ *	back).
+ * @rx_event_msg: Error or event message.
+ * @tx_ack_msg: Tx acknowledgment message.
+ * @timestamp: Timestamp reply.
+ * @rx_cmd_ret_le32: Rx 32 bits return code (type: enum
+ *	es58x_cmd_ret_code_u32).
+ * @raw_msg: Message raw payload.
+ * @reserved_for_crc16_do_not_use: The structure ends with a
+ *	CRC16. Because the structures in above union are of variable
+ *	lengths, we can not predict the offset of the CRC in
+ *	advance. Use functions es58x_get_crc() and es58x_set_crc() to
+ *	manipulate it.
+ */
+struct es58x_fd_urb_cmd {
+	__le16 SOF;
+	u8 cmd_type;
+	u8 cmd_id;
+	u8 channel_idx;
+	__le16 msg_len;
+
+	union {
+		struct es58x_fd_tx_conf_msg tx_conf_msg;
+		u8 tx_can_msg_buf[ES58X_FD_TX_BULK_MAX * ES58X_FD_CANFD_TX_LEN];
+		u8 rx_can_msg_buf[ES58X_FD_RX_BULK_MAX * ES58X_FD_CANFD_RX_LEN];
+		struct es58x_fd_echo_msg echo_msg[ES58X_FD_ECHO_BULK_MAX];
+		struct es58x_fd_rx_event_msg rx_event_msg;
+		struct es58x_fd_tx_ack_msg tx_ack_msg;
+		__le64 timestamp;
+		__le32 rx_cmd_ret_le32;
+		u8 raw_msg[0];
+	} __packed;
+
+	__le16 reserved_for_crc16_do_not_use;
+} __packed;
+
+#define ES58X_FD_URB_CMD_HEADER_LEN (offsetof(struct es58x_fd_urb_cmd, raw_msg))
+#define ES58X_FD_TX_URB_CMD_MAX_LEN					\
+	ES58X_SIZEOF_URB_CMD(struct es58x_fd_urb_cmd, tx_can_msg_buf)
+#define ES58X_FD_RX_URB_CMD_MAX_LEN					\
+	ES58X_SIZEOF_URB_CMD(struct es58x_fd_urb_cmd, rx_can_msg_buf)
+
+#endif /* __ES58X_FD_H__ */
-- 
cgit v1.2.3


From bc256b95971f50ff5b88c6a9617c514bd72413bb Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Mon, 5 Apr 2021 12:34:56 +0200
Subject: can: peak_usb: fix checkpatch warnings

This patch cleans several checkpatch warnings in the peak_usb driver.

Link: https://lore.kernel.org/r/20210406111622.1874957-2-mkl@pengutronix.de
Acked-by: Stephane Grosjean <s.grosjean@peak-system.com>
Tested-by: Stephane Grosjean <s.grosjean@peak-system.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/peak_usb/pcan_usb.c      | 4 ++--
 drivers/net/can/usb/peak_usb/pcan_usb_core.c | 1 +
 drivers/net/can/usb/peak_usb/pcan_usb_core.h | 2 +-
 drivers/net/can/usb/peak_usb/pcan_usb_pro.c  | 2 +-
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c
index ba509aed7b4c..e23049c81159 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb.c
@@ -401,7 +401,7 @@ static int pcan_usb_update_ts(struct pcan_usb_msg_context *mc)
 {
 	__le16 tmp16;
 
-	if ((mc->ptr+2) > mc->end)
+	if ((mc->ptr + 2) > mc->end)
 		return -EINVAL;
 
 	memcpy(&tmp16, mc->ptr, 2);
@@ -1039,7 +1039,7 @@ const struct peak_usb_adapter pcan_usb = {
 			      CAN_CTRLMODE_BERR_REPORTING |
 			      CAN_CTRLMODE_CC_LEN8_DLC,
 	.clock = {
-		.freq = PCAN_USB_CRYSTAL_HZ / 2 ,
+		.freq = PCAN_USB_CRYSTAL_HZ / 2,
 	},
 	.bittiming_const = &pcan_usb_const,
 
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
index e69b005be068..7dff77a6ef70 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
@@ -624,6 +624,7 @@ static int peak_usb_ndo_stop(struct net_device *netdev)
 	/* can set bus off now */
 	if (dev->adapter->dev_set_bus) {
 		int err = dev->adapter->dev_set_bus(dev, 0);
+
 		if (err)
 			return err;
 	}
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.h b/drivers/net/can/usb/peak_usb/pcan_usb_core.h
index e15b4c78f309..59afe880a481 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.h
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.h
@@ -31,7 +31,7 @@
 /* usb adapters maximum channels per usb interface */
 #define PCAN_USB_MAX_CHANNEL		2
 
-/* maximum length of the usb commands sent to/received from  the devices */
+/* maximum length of the usb commands sent to/received from the devices */
 #define PCAN_USB_MAX_CMD_LEN		32
 
 struct peak_usb_device;
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c
index 2d1b645af76c..ecb08359f719 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c
@@ -290,7 +290,7 @@ static int pcan_usb_pro_wait_rsp(struct peak_usb_device *dev,
 					   pr->data_type);
 
 			/* check if channel in response corresponds too */
-			else if ((req_channel != 0xff) && \
+			else if ((req_channel != 0xff) &&
 				(pr->bus_act.channel != req_channel))
 				netdev_err(dev->netdev,
 					"got rsp %xh but on chan%u: ignored\n",
-- 
cgit v1.2.3


From fa34e0a18f8a24e87de66d7e3b32c50e9c1ce1d7 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Mon, 5 Apr 2021 21:31:52 +0200
Subject: can: peak_usb: pcan_usb_pro.h: remove double space in indention

This patch replaces the double space indention after the u8 with a
single space in pcan_usb_pro.h.

Link: https://lore.kernel.org/r/20210406111622.1874957-3-mkl@pengutronix.de
Acked-by: Stephane Grosjean <s.grosjean@peak-system.com>
Tested-by: Stephane Grosjean <s.grosjean@peak-system.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/peak_usb/pcan_usb_pro.h | 76 ++++++++++++++---------------
 1 file changed, 38 insertions(+), 38 deletions(-)

diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_pro.h b/drivers/net/can/usb/peak_usb/pcan_usb_pro.h
index 6f4504300e23..5d4cf14eb9d9 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_pro.h
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_pro.h
@@ -34,11 +34,11 @@
 /* PCAN_USBPRO_INFO_BL vendor request record type */
 struct __packed pcan_usb_pro_blinfo {
 	__le32 ctrl_type;
-	u8  version[4];
-	u8  day;
-	u8  month;
-	u8  year;
-	u8  dummy;
+	u8 version[4];
+	u8 day;
+	u8 month;
+	u8 year;
+	u8 dummy;
 	__le32 serial_num_hi;
 	__le32 serial_num_lo;
 	__le32 hw_type;
@@ -48,11 +48,11 @@ struct __packed pcan_usb_pro_blinfo {
 /* PCAN_USBPRO_INFO_FW vendor request record type */
 struct __packed pcan_usb_pro_fwinfo {
 	__le32 ctrl_type;
-	u8  version[4];
-	u8  day;
-	u8  month;
-	u8  year;
-	u8  dummy;
+	u8 version[4];
+	u8 day;
+	u8 month;
+	u8 year;
+	u8 dummy;
 	__le32 fw_type;
 };
 
@@ -78,39 +78,39 @@ struct __packed pcan_usb_pro_fwinfo {
 
 /* record structures */
 struct __packed pcan_usb_pro_btr {
-	u8  data_type;
-	u8  channel;
+	u8 data_type;
+	u8 channel;
 	__le16 dummy;
 	__le32 CCBT;
 };
 
 struct __packed pcan_usb_pro_busact {
-	u8  data_type;
-	u8  channel;
+	u8 data_type;
+	u8 channel;
 	__le16 onoff;
 };
 
 struct __packed pcan_usb_pro_silent {
-	u8  data_type;
-	u8  channel;
+	u8 data_type;
+	u8 channel;
 	__le16 onoff;
 };
 
 struct __packed pcan_usb_pro_filter {
-	u8  data_type;
-	u8  dummy;
+	u8 data_type;
+	u8 dummy;
 	__le16 filter_mode;
 };
 
 struct __packed pcan_usb_pro_setts {
-	u8  data_type;
-	u8  dummy;
+	u8 data_type;
+	u8 dummy;
 	__le16 mode;
 };
 
 struct __packed pcan_usb_pro_devid {
-	u8  data_type;
-	u8  channel;
+	u8 data_type;
+	u8 channel;
 	__le16 dummy;
 	__le32 serial_num;
 };
@@ -122,21 +122,21 @@ struct __packed pcan_usb_pro_devid {
 #define PCAN_USBPRO_LED_OFF		0x04
 
 struct __packed pcan_usb_pro_setled {
-	u8  data_type;
-	u8  channel;
+	u8 data_type;
+	u8 channel;
 	__le16 mode;
 	__le32 timeout;
 };
 
 struct __packed pcan_usb_pro_rxmsg {
-	u8  data_type;
-	u8  client;
-	u8  flags;
-	u8  len;
+	u8 data_type;
+	u8 client;
+	u8 flags;
+	u8 len;
 	__le32 ts32;
 	__le32 id;
 
-	u8  data[8];
+	u8 data[8];
 };
 
 #define PCAN_USBPRO_STATUS_ERROR	0x0001
@@ -145,26 +145,26 @@ struct __packed pcan_usb_pro_rxmsg {
 #define PCAN_USBPRO_STATUS_QOVERRUN	0x0008
 
 struct __packed pcan_usb_pro_rxstatus {
-	u8  data_type;
-	u8  channel;
+	u8 data_type;
+	u8 channel;
 	__le16 status;
 	__le32 ts32;
 	__le32 err_frm;
 };
 
 struct __packed pcan_usb_pro_rxts {
-	u8  data_type;
-	u8  dummy[3];
+	u8 data_type;
+	u8 dummy[3];
 	__le32 ts64[2];
 };
 
 struct __packed pcan_usb_pro_txmsg {
-	u8  data_type;
-	u8  client;
-	u8  flags;
-	u8  len;
+	u8 data_type;
+	u8 client;
+	u8 flags;
+	u8 len;
 	__le32 id;
-	u8  data[8];
+	u8 data[8];
 };
 
 union pcan_usb_pro_rec {
-- 
cgit v1.2.3


From c779e1271a9e2beb81bfd3cd36b45f4d26b13c95 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Mon, 5 Apr 2021 15:05:28 +0200
Subject: can: peak_usb: remove unused variables from struct peak_usb_device

This patch removes the unused variables struct
peak_usb_device::echo_skb and struct peak_usb_device::bus_load from
the driver.

Link: https://lore.kernel.org/r/20210406111622.1874957-4-mkl@pengutronix.de
Acked-by: Stephane Grosjean <s.grosjean@peak-system.com>
Tested-by: Stephane Grosjean <s.grosjean@peak-system.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/peak_usb/pcan_usb_core.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.h b/drivers/net/can/usb/peak_usb/pcan_usb_core.h
index 59afe880a481..64c4c22bb296 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.h
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.h
@@ -114,8 +114,6 @@ struct peak_usb_device {
 	unsigned int ctrl_idx;
 	u32 state;
 
-	struct sk_buff *echo_skb[PCAN_USB_MAX_TX_URBS];
-
 	struct usb_device *udev;
 	struct net_device *netdev;
 
@@ -132,8 +130,6 @@ struct peak_usb_device {
 	u8 ep_msg_in;
 	u8 ep_msg_out;
 
-	u16 bus_load;
-
 	struct peak_usb_device *prev_siblings;
 	struct peak_usb_device *next_siblings;
 };
-- 
cgit v1.2.3


From 1a5a5eedf80772ac0fb3f1ec443711c2eab030a5 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Sat, 3 Apr 2021 14:10:25 +0200
Subject: can: peak_usb: remove write only variable struct
 peak_usb_adapter::ts_period

The variable struct peak_usb_adapter::ts_period is only ever written
to. This patch removes it from the driver.

Link: https://lore.kernel.org/r/20210406111622.1874957-5-mkl@pengutronix.de
Acked-by: Stephane Grosjean <s.grosjean@peak-system.com>
Tested-by: Stephane Grosjean <s.grosjean@peak-system.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/peak_usb/pcan_usb.c      | 1 -
 drivers/net/can/usb/peak_usb/pcan_usb_core.h | 1 -
 drivers/net/can/usb/peak_usb/pcan_usb_fd.c   | 4 ----
 drivers/net/can/usb/peak_usb/pcan_usb_pro.c  | 1 -
 4 files changed, 7 deletions(-)

diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c
index e23049c81159..38bee69ff48a 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb.c
@@ -1050,7 +1050,6 @@ const struct peak_usb_adapter pcan_usb = {
 
 	/* timestamps usage */
 	.ts_used_bits = 16,
-	.ts_period = 24575, /* calibration period in ts. */
 	.us_per_ts_scale = PCAN_USB_TS_US_PER_TICK, /* us=(ts*scale) */
 	.us_per_ts_shift = PCAN_USB_TS_DIV_SHIFTER, /*  >> shift     */
 
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.h b/drivers/net/can/usb/peak_usb/pcan_usb_core.h
index 64c4c22bb296..b00a4811bf61 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.h
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.h
@@ -73,7 +73,6 @@ struct peak_usb_adapter {
 	u8 ep_msg_in;
 	u8 ep_msg_out[PCAN_USB_MAX_CHANNEL];
 	u8 ts_used_bits;
-	u32 ts_period;
 	u8 us_per_ts_shift;
 	u32 us_per_ts_scale;
 
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
index 6f62b6f51051..b11eabad575b 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
@@ -1081,7 +1081,6 @@ const struct peak_usb_adapter pcan_usb_fd = {
 
 	/* timestamps usage */
 	.ts_used_bits = 32,
-	.ts_period = 1000000, /* calibration period in ts. */
 	.us_per_ts_scale = 1, /* us = (ts * scale) >> shift */
 	.us_per_ts_shift = 0,
 
@@ -1156,7 +1155,6 @@ const struct peak_usb_adapter pcan_usb_chip = {
 
 	/* timestamps usage */
 	.ts_used_bits = 32,
-	.ts_period = 1000000, /* calibration period in ts. */
 	.us_per_ts_scale = 1, /* us = (ts * scale) >> shift */
 	.us_per_ts_shift = 0,
 
@@ -1231,7 +1229,6 @@ const struct peak_usb_adapter pcan_usb_pro_fd = {
 
 	/* timestamps usage */
 	.ts_used_bits = 32,
-	.ts_period = 1000000, /* calibration period in ts. */
 	.us_per_ts_scale = 1, /* us = (ts * scale) >> shift */
 	.us_per_ts_shift = 0,
 
@@ -1306,7 +1303,6 @@ const struct peak_usb_adapter pcan_usb_x6 = {
 
 	/* timestamps usage */
 	.ts_used_bits = 32,
-	.ts_period = 1000000, /* calibration period in ts. */
 	.us_per_ts_scale = 1, /* us = (ts * scale) >> shift */
 	.us_per_ts_shift = 0,
 
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c
index ecb08359f719..589ba797fb33 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c
@@ -1058,7 +1058,6 @@ const struct peak_usb_adapter pcan_usb_pro = {
 
 	/* timestamps usage */
 	.ts_used_bits = 32,
-	.ts_period = 1000000, /* calibration period in ts. */
 	.us_per_ts_scale = 1, /* us = (ts * scale) >> shift */
 	.us_per_ts_shift = 0,
 
-- 
cgit v1.2.3


From 592bf5a09d195e70652bed4187254c540e7cca9e Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Mon, 5 Apr 2021 12:34:56 +0200
Subject: can: peak_usb: peak_usb_probe(): make use of driver_info

There's no need to iterate over all supported adapters to find the
struct peak_usb_adapter that describes the currently probed devices's
capabilities. The driver core gives us the information for free, if we
assign it to the struct usb_device_id::driver_info.

This patch assigns the usb_device_id::driver_info and converts
peak_usb_probe() to make use of it. This reduces the driver size by
100 bytes on ARCH=arm.

| add/remove: 0/1 grow/shrink: 0/1 up/down: 0/-124 (-124)
| Function                                     old     new   delta
| peak_usb_adapters_list                        24       -     -24
| peak_usb_probe                               236     136    -100
| Total: Before=25263, After=25139, chg -0.49%

Link: https://lore.kernel.org/r/20210406111622.1874957-6-mkl@pengutronix.de
Acked-by: Stephane Grosjean <s.grosjean@peak-system.com>
Tested-by: Stephane Grosjean <s.grosjean@peak-system.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/peak_usb/pcan_usb_core.c | 57 ++++++++++++----------------
 1 file changed, 24 insertions(+), 33 deletions(-)

diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
index 7dff77a6ef70..e8f43ed90b72 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
@@ -27,28 +27,32 @@ MODULE_DESCRIPTION("CAN driver for PEAK-System USB adapters");
 MODULE_LICENSE("GPL v2");
 
 /* Table of devices that work with this driver */
-static struct usb_device_id peak_usb_table[] = {
-	{USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USB_PRODUCT_ID)},
-	{USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBPRO_PRODUCT_ID)},
-	{USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBFD_PRODUCT_ID)},
-	{USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBPROFD_PRODUCT_ID)},
-	{USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBCHIP_PRODUCT_ID)},
-	{USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBX6_PRODUCT_ID)},
-	{} /* Terminating entry */
+static const struct usb_device_id peak_usb_table[] = {
+	{
+		USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USB_PRODUCT_ID),
+		.driver_info = (kernel_ulong_t)&pcan_usb,
+	}, {
+		USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBPRO_PRODUCT_ID),
+		.driver_info = (kernel_ulong_t)&pcan_usb_pro,
+	}, {
+		USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBFD_PRODUCT_ID),
+		.driver_info = (kernel_ulong_t)&pcan_usb_fd,
+	}, {
+		USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBPROFD_PRODUCT_ID),
+		.driver_info = (kernel_ulong_t)&pcan_usb_pro_fd,
+	}, {
+		USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBCHIP_PRODUCT_ID),
+		.driver_info = (kernel_ulong_t)&pcan_usb_chip,
+	}, {
+		USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBX6_PRODUCT_ID),
+		.driver_info = (kernel_ulong_t)&pcan_usb_x6,
+	}, {
+		/* Terminating entry */
+	}
 };
 
 MODULE_DEVICE_TABLE(usb, peak_usb_table);
 
-/* List of supported PCAN-USB adapters (NULL terminated list) */
-static const struct peak_usb_adapter *const peak_usb_adapters_list[] = {
-	&pcan_usb,
-	&pcan_usb_pro,
-	&pcan_usb_fd,
-	&pcan_usb_pro_fd,
-	&pcan_usb_chip,
-	&pcan_usb_x6,
-};
-
 /*
  * dump memory
  */
@@ -928,24 +932,11 @@ static void peak_usb_disconnect(struct usb_interface *intf)
 static int peak_usb_probe(struct usb_interface *intf,
 			  const struct usb_device_id *id)
 {
-	struct usb_device *usb_dev = interface_to_usbdev(intf);
-	const u16 usb_id_product = le16_to_cpu(usb_dev->descriptor.idProduct);
-	const struct peak_usb_adapter *peak_usb_adapter = NULL;
+	const struct peak_usb_adapter *peak_usb_adapter;
 	int i, err = -ENOMEM;
 
 	/* get corresponding PCAN-USB adapter */
-	for (i = 0; i < ARRAY_SIZE(peak_usb_adapters_list); i++)
-		if (peak_usb_adapters_list[i]->device_id == usb_id_product) {
-			peak_usb_adapter = peak_usb_adapters_list[i];
-			break;
-		}
-
-	if (!peak_usb_adapter) {
-		/* should never come except device_id bad usage in this file */
-		pr_err("%s: didn't find device id. 0x%x in devices list\n",
-			PCAN_USB_DRIVER_NAME, usb_id_product);
-		return -ENODEV;
-	}
+	peak_usb_adapter = (const struct peak_usb_adapter *)id->driver_info;
 
 	/* got corresponding adapter: check if it handles current interface */
 	if (peak_usb_adapter->intf_probe) {
-- 
cgit v1.2.3


From 426718f3fe0e4315d36edf88f11748d118558acc Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Mon, 5 Apr 2021 13:49:45 +0200
Subject: can: peak_usb: pcan_usb_{,pro}_get_device_id(): remove unneeded check
 for device_id

The callback struct peak_usb_adapter::dev_get_device_id, which is
implemented by the functions pcan_usb_{,pro}_get_device_id() is only
ever called with a valid device_id pointer.

This patch removes the unneeded check if the device_id pointer is
valid.

Link: https://lore.kernel.org/r/20210406111622.1874957-7-mkl@pengutronix.de
Acked-by: Stephane Grosjean <s.grosjean@peak-system.com>
Tested-by: Stephane Grosjean <s.grosjean@peak-system.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/peak_usb/pcan_usb.c     | 4 ++--
 drivers/net/can/usb/peak_usb/pcan_usb_pro.c | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c
index 38bee69ff48a..671d589b48c1 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb.c
@@ -388,8 +388,8 @@ static int pcan_usb_get_device_id(struct peak_usb_device *dev, u32 *device_id)
 	err = pcan_usb_wait_rsp(dev, PCAN_USB_CMD_DEVID, PCAN_USB_GET, args);
 	if (err)
 		netdev_err(dev->netdev, "getting device id failure: %d\n", err);
-	else if (device_id)
-		*device_id = args[0];
+
+	*device_id = args[0];
 
 	return err;
 }
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c
index 589ba797fb33..858ab22708fc 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c
@@ -439,8 +439,7 @@ static int pcan_usb_pro_get_device_id(struct peak_usb_device *dev,
 		return err;
 
 	pdn = (struct pcan_usb_pro_devid *)pc;
-	if (device_id)
-		*device_id = le32_to_cpu(pdn->serial_num);
+	*device_id = le32_to_cpu(pdn->serial_num);
 
 	return err;
 }
-- 
cgit v1.2.3


From 5e164a4f0aaee1b604e920a8cf99878c06fe3f11 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Mon, 5 Apr 2021 19:52:19 +0200
Subject: can: peak_usb: pcan_usb_get_serial(): remove error message from error
 path

The caller of pcan_usb_get_serial() already prints an error message,
so remove this one and return immediately.

Link: https://lore.kernel.org/r/20210406111622.1874957-8-mkl@pengutronix.de
Acked-by: Stephane Grosjean <s.grosjean@peak-system.com>
Tested-by: Stephane Grosjean <s.grosjean@peak-system.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/peak_usb/pcan_usb.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c
index 671d589b48c1..fd5ea95fd55d 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb.c
@@ -365,16 +365,17 @@ static int pcan_usb_get_serial(struct peak_usb_device *dev, u32 *serial_number)
 	int err;
 
 	err = pcan_usb_wait_rsp(dev, PCAN_USB_CMD_SN, PCAN_USB_GET, args);
-	if (err) {
-		netdev_err(dev->netdev, "getting serial failure: %d\n", err);
-	} else if (serial_number) {
+	if (err)
+		return err;
+
+	if (serial_number) {
 		__le32 tmp32;
 
 		memcpy(&tmp32, args, 4);
 		*serial_number = le32_to_cpu(tmp32);
 	}
 
-	return err;
+	return 0;
 }
 
 /*
-- 
cgit v1.2.3


From 0a7d6cdf90c0f10fd62b1b2d80fbaeb7e0172c13 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Mon, 5 Apr 2021 13:49:45 +0200
Subject: can: peak_usb: pcan_usb_get_serial(): make use of le32_to_cpup()

This patch replaces the memcpy() + le32_to_cpu() by le32_to_cpup().

Link: https://lore.kernel.org/r/20210406111622.1874957-9-mkl@pengutronix.de
Acked-by: Stephane Grosjean <s.grosjean@peak-system.com>
Tested-by: Stephane Grosjean <s.grosjean@peak-system.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/peak_usb/pcan_usb.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c
index fd5ea95fd55d..ffb01c3a3827 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb.c
@@ -368,12 +368,8 @@ static int pcan_usb_get_serial(struct peak_usb_device *dev, u32 *serial_number)
 	if (err)
 		return err;
 
-	if (serial_number) {
-		__le32 tmp32;
-
-		memcpy(&tmp32, args, 4);
-		*serial_number = le32_to_cpu(tmp32);
-	}
+	if (serial_number)
+		*serial_number = le32_to_cpup((__le32 *)args);
 
 	return 0;
 }
-- 
cgit v1.2.3


From b7a29d35a9849e898232111ec083e382fb1adf25 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Mon, 5 Apr 2021 19:52:19 +0200
Subject: can: peak_usb: pcan_usb_get_serial(): unconditionally assign
 serial_number

The function serial_number is only called from one location with a
valid serial_number pointer. Remove not needed NULL pointer check.

Link: https://lore.kernel.org/r/20210406111622.1874957-10-mkl@pengutronix.de
Acked-by: Stephane Grosjean <s.grosjean@peak-system.com>
Tested-by: Stephane Grosjean <s.grosjean@peak-system.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/peak_usb/pcan_usb.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c
index ffb01c3a3827..929cc1b05aa2 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb.c
@@ -367,9 +367,7 @@ static int pcan_usb_get_serial(struct peak_usb_device *dev, u32 *serial_number)
 	err = pcan_usb_wait_rsp(dev, PCAN_USB_CMD_SN, PCAN_USB_GET, args);
 	if (err)
 		return err;
-
-	if (serial_number)
-		*serial_number = le32_to_cpup((__le32 *)args);
+	*serial_number = le32_to_cpup((__le32 *)args);
 
 	return 0;
 }
-- 
cgit v1.2.3


From bd573ea5720470d1ea70f3e39fb2e2efad219311 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Mon, 5 Apr 2021 14:44:15 +0200
Subject: can: peak_usb: pcan_usb: replace open coded endianness conversion of
 unaligned data

This patch replaces the open coded endianness conversion of unaligned
data by the appropriate get/put_unaligned_leXX() variants.

Link: https://lore.kernel.org/r/20210406111622.1874957-11-mkl@pengutronix.de
Acked-by: Stephane Grosjean <s.grosjean@peak-system.com>
Tested-by: Stephane Grosjean <s.grosjean@peak-system.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/peak_usb/pcan_usb.c | 37 ++++++++++-----------------------
 1 file changed, 11 insertions(+), 26 deletions(-)

diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c
index 929cc1b05aa2..1d6f77252f01 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb.c
@@ -394,14 +394,10 @@ static int pcan_usb_get_device_id(struct peak_usb_device *dev, u32 *device_id)
  */
 static int pcan_usb_update_ts(struct pcan_usb_msg_context *mc)
 {
-	__le16 tmp16;
-
 	if ((mc->ptr + 2) > mc->end)
 		return -EINVAL;
 
-	memcpy(&tmp16, mc->ptr, 2);
-
-	mc->ts16 = le16_to_cpu(tmp16);
+	mc->ts16 = get_unaligned_le16(mc->ptr);
 
 	if (mc->rec_idx > 0)
 		peak_usb_update_ts_now(&mc->pdev->time_ref, mc->ts16);
@@ -418,16 +414,13 @@ static int pcan_usb_decode_ts(struct pcan_usb_msg_context *mc, u8 first_packet)
 {
 	/* only 1st packet supplies a word timestamp */
 	if (first_packet) {
-		__le16 tmp16;
-
 		if ((mc->ptr + 2) > mc->end)
 			return -EINVAL;
 
-		memcpy(&tmp16, mc->ptr, 2);
-		mc->ptr += 2;
-
-		mc->ts16 = le16_to_cpu(tmp16);
+		mc->ts16 = get_unaligned_le16(mc->ptr);
 		mc->prev_ts8 = mc->ts16 & 0x00ff;
+
+		mc->ptr += 2;
 	} else {
 		u8 ts8;
 
@@ -717,25 +710,17 @@ static int pcan_usb_decode_data(struct pcan_usb_msg_context *mc, u8 status_len)
 		return -ENOMEM;
 
 	if (status_len & PCAN_USB_STATUSLEN_EXT_ID) {
-		__le32 tmp32;
-
 		if ((mc->ptr + 4) > mc->end)
 			goto decode_failed;
 
-		memcpy(&tmp32, mc->ptr, 4);
+		cf->can_id = get_unaligned_le32(mc->ptr) >> 3 | CAN_EFF_FLAG;
 		mc->ptr += 4;
-
-		cf->can_id = (le32_to_cpu(tmp32) >> 3) | CAN_EFF_FLAG;
 	} else {
-		__le16 tmp16;
-
 		if ((mc->ptr + 2) > mc->end)
 			goto decode_failed;
 
-		memcpy(&tmp16, mc->ptr, 2);
+		cf->can_id = get_unaligned_le16(mc->ptr) >> 5;
 		mc->ptr += 2;
-
-		cf->can_id = le16_to_cpu(tmp16) >> 5;
 	}
 
 	can_frame_set_cc_len(cf, rec_len, mc->pdev->dev.can.ctrlmode);
@@ -849,15 +834,15 @@ static int pcan_usb_encode_msg(struct peak_usb_device *dev, struct sk_buff *skb,
 
 	/* can id */
 	if (cf->can_id & CAN_EFF_FLAG) {
-		__le32 tmp32 = cpu_to_le32((cf->can_id & CAN_ERR_MASK) << 3);
-
 		*pc |= PCAN_USB_STATUSLEN_EXT_ID;
-		memcpy(++pc, &tmp32, 4);
+		pc++;
+
+		put_unaligned_le32((cf->can_id & CAN_ERR_MASK) << 3, pc);
 		pc += 4;
 	} else {
-		__le16 tmp16 = cpu_to_le16((cf->can_id & CAN_ERR_MASK) << 5);
+		pc++;
 
-		memcpy(++pc, &tmp16, 2);
+		put_unaligned_le16((cf->can_id & CAN_ERR_MASK) << 5, pc);
 		pc += 2;
 	}
 
-- 
cgit v1.2.3


From 3e1b0c168f6c8648f217c78ed6a4135af8c9d830 Mon Sep 17 00:00:00 2001
From: wenxu <wenxu@ucloud.cn>
Date: Sat, 3 Apr 2021 21:59:42 +0800
Subject: netfilter: flowtable: add vlan match offload support

This patch adds support for vlan_id, vlan_priority and vlan_proto match
for flowtable offload.

Signed-off-by: wenxu <wenxu@ucloud.cn>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_flow_table.h |  2 ++
 net/netfilter/nf_flow_table_offload.c | 37 +++++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+)

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 583b327d8fc0..d46e422c9d10 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -21,6 +21,8 @@ struct nf_flow_key {
 	struct flow_dissector_key_control		control;
 	struct flow_dissector_key_control		enc_control;
 	struct flow_dissector_key_basic			basic;
+	struct flow_dissector_key_vlan			vlan;
+	struct flow_dissector_key_vlan			cvlan;
 	union {
 		struct flow_dissector_key_ipv4_addrs	ipv4;
 		struct flow_dissector_key_ipv6_addrs	ipv6;
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 7d0d128407be..dc1d6b4e35f8 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -78,6 +78,16 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
 	match->dissector.used_keys |= enc_keys;
 }
 
+static void nf_flow_rule_vlan_match(struct flow_dissector_key_vlan *key,
+				    struct flow_dissector_key_vlan *mask,
+				    u16 vlan_id, __be16 proto)
+{
+	key->vlan_id = vlan_id;
+	mask->vlan_id = VLAN_VID_MASK;
+	key->vlan_tpid = proto;
+	mask->vlan_tpid = 0xffff;
+}
+
 static int nf_flow_rule_match(struct nf_flow_match *match,
 			      const struct flow_offload_tuple *tuple,
 			      struct dst_entry *other_dst)
@@ -85,6 +95,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
 	struct nf_flow_key *mask = &match->mask;
 	struct nf_flow_key *key = &match->key;
 	struct ip_tunnel_info *tun_info;
+	bool vlan_encap = false;
 
 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
 	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
@@ -102,6 +113,32 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
 	key->meta.ingress_ifindex = tuple->iifidx;
 	mask->meta.ingress_ifindex = 0xffffffff;
 
+	if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) &&
+	    tuple->encap[0].proto == htons(ETH_P_8021Q)) {
+		NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, vlan);
+		nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
+					tuple->encap[0].id,
+					tuple->encap[0].proto);
+		vlan_encap = true;
+	}
+
+	if (tuple->encap_num > 1 && !(tuple->in_vlan_ingress & BIT(1)) &&
+	    tuple->encap[1].proto == htons(ETH_P_8021Q)) {
+		if (vlan_encap) {
+			NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CVLAN,
+					  cvlan);
+			nf_flow_rule_vlan_match(&key->cvlan, &mask->cvlan,
+						tuple->encap[1].id,
+						tuple->encap[1].proto);
+		} else {
+			NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN,
+					  vlan);
+			nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
+						tuple->encap[1].id,
+						tuple->encap[1].proto);
+		}
+	}
+
 	switch (tuple->l3proto) {
 	case AF_INET:
 		key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
-- 
cgit v1.2.3


From efce49dfe6a8ec491759ad9eaa85fadbf26654c5 Mon Sep 17 00:00:00 2001
From: wenxu <wenxu@ucloud.cn>
Date: Sat, 3 Apr 2021 21:59:43 +0800
Subject: netfilter: flowtable: add vlan pop action offload support

This patch adds vlan pop action offload in the flowtable offload.

Signed-off-by: wenxu <wenxu@ucloud.cn>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_flow_table_offload.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index dc1d6b4e35f8..b87f8c3ee6c2 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -619,6 +619,7 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
 			  struct nf_flow_rule *flow_rule)
 {
 	const struct flow_offload_tuple *other_tuple;
+	const struct flow_offload_tuple *tuple;
 	int i;
 
 	flow_offload_decap_tunnel(flow, dir, flow_rule);
@@ -628,6 +629,20 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
 	    flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
 		return -1;
 
+	tuple = &flow->tuplehash[dir].tuple;
+
+	for (i = 0; i < tuple->encap_num; i++) {
+		struct flow_action_entry *entry;
+
+		if (tuple->in_vlan_ingress & BIT(i))
+			continue;
+
+		if (tuple->encap[i].proto == htons(ETH_P_8021Q)) {
+			entry = flow_action_entry_next(flow_rule);
+			entry->id = FLOW_ACTION_VLAN_POP;
+		}
+	}
+
 	other_tuple = &flow->tuplehash[!dir].tuple;
 
 	for (i = 0; i < other_tuple->encap_num; i++) {
-- 
cgit v1.2.3


From 098b5d3565e2391ca260964807e7324d489dd10b Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 12 Apr 2021 21:55:40 +0200
Subject: netfilter: conntrack: move autoassign warning member to net_generic
 data

Not accessed in fast path, place this is generic_net data instead.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h | 4 ++++
 net/netfilter/nf_conntrack_helper.c  | 9 ++++++---
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 86d86c860ede..c532b629db7b 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -44,6 +44,10 @@ union nf_conntrack_expect_proto {
 };
 
 struct nf_conntrack_net {
+	/* only used when new connection is allocated: */
+	bool auto_assign_helper_warned;
+
+	/* only used from work queues, configuration plane, and so on: */
 	unsigned int users4;
 	unsigned int users6;
 	unsigned int users_bridge;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index b055187235f8..ad91964eaa92 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -43,6 +43,8 @@ MODULE_PARM_DESC(nf_conntrack_helper,
 static DEFINE_MUTEX(nf_ct_nat_helpers_mutex);
 static struct list_head nf_ct_nat_helpers __read_mostly;
 
+extern unsigned int nf_conntrack_net_id;
+
 /* Stupid hash, but collision free for the default registrations of the
  * helpers currently in the kernel. */
 static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple)
@@ -212,8 +214,10 @@ EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add);
 static struct nf_conntrack_helper *
 nf_ct_lookup_helper(struct nf_conn *ct, struct net *net)
 {
+	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+
 	if (!net->ct.sysctl_auto_assign_helper) {
-		if (net->ct.auto_assign_helper_warned)
+		if (cnet->auto_assign_helper_warned)
 			return NULL;
 		if (!__nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple))
 			return NULL;
@@ -221,7 +225,7 @@ nf_ct_lookup_helper(struct nf_conn *ct, struct net *net)
 			"has been turned off for security reasons and CT-based "
 			"firewall rule not found. Use the iptables CT target "
 			"to attach helpers instead.\n");
-		net->ct.auto_assign_helper_warned = 1;
+		cnet->auto_assign_helper_warned = true;
 		return NULL;
 	}
 
@@ -556,7 +560,6 @@ static const struct nf_ct_ext_type helper_extend = {
 
 void nf_conntrack_helper_pernet_init(struct net *net)
 {
-	net->ct.auto_assign_helper_warned = false;
 	net->ct.sysctl_auto_assign_helper = nf_ct_auto_assign_helper;
 }
 
-- 
cgit v1.2.3


From 67f28216ca04b9ba965cd652fea08f670b99a0c6 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 12 Apr 2021 21:55:41 +0200
Subject: netfilter: conntrack: move autoassign_helper sysctl to net_generic
 data

While at it, make it an u8, no need to use an integer for a boolean.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h    | 1 +
 net/netfilter/nf_conntrack_helper.c     | 6 ++++--
 net/netfilter/nf_conntrack_standalone.c | 7 +++----
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index c532b629db7b..db8f047eb75f 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -45,6 +45,7 @@ union nf_conntrack_expect_proto {
 
 struct nf_conntrack_net {
 	/* only used when new connection is allocated: */
+	u8 sysctl_auto_assign_helper;
 	bool auto_assign_helper_warned;
 
 	/* only used from work queues, configuration plane, and so on: */
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index ad91964eaa92..ac396cc8bfae 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -216,7 +216,7 @@ nf_ct_lookup_helper(struct nf_conn *ct, struct net *net)
 {
 	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
 
-	if (!net->ct.sysctl_auto_assign_helper) {
+	if (!cnet->sysctl_auto_assign_helper) {
 		if (cnet->auto_assign_helper_warned)
 			return NULL;
 		if (!__nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple))
@@ -560,7 +560,9 @@ static const struct nf_ct_ext_type helper_extend = {
 
 void nf_conntrack_helper_pernet_init(struct net *net)
 {
-	net->ct.sysctl_auto_assign_helper = nf_ct_auto_assign_helper;
+	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+
+	cnet->sysctl_auto_assign_helper = nf_ct_auto_assign_helper;
 }
 
 int nf_conntrack_helper_init(void)
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 54d36d3eb905..a7538379cfca 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -662,10 +662,9 @@ static struct ctl_table nf_ct_sysctl_table[] = {
 	},
 	[NF_SYSCTL_CT_HELPER] = {
 		.procname	= "nf_conntrack_helper",
-		.data		= &init_net.ct.sysctl_auto_assign_helper,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dou8vec_minmax,
 		.extra1 	= SYSCTL_ZERO,
 		.extra2 	= SYSCTL_ONE,
 	},
@@ -1042,7 +1041,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
 	table[NF_SYSCTL_CT_CHECKSUM].data = &net->ct.sysctl_checksum;
 	table[NF_SYSCTL_CT_LOG_INVALID].data = &net->ct.sysctl_log_invalid;
 	table[NF_SYSCTL_CT_ACCT].data = &net->ct.sysctl_acct;
-	table[NF_SYSCTL_CT_HELPER].data = &net->ct.sysctl_auto_assign_helper;
+	table[NF_SYSCTL_CT_HELPER].data = &cnet->sysctl_auto_assign_helper;
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
 	table[NF_SYSCTL_CT_EVENTS].data = &net->ct.sysctl_events;
 #endif
-- 
cgit v1.2.3


From f6f2e580d5f7152fb5ab11232edecb7fbeca3759 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 12 Apr 2021 21:55:42 +0200
Subject: netfilter: conntrack: move expect counter to net_generic data

Creation of a new conntrack entry isn't a frequent operation (compared
to 'ct entry already exists').  Creation of a new entry that is also an
expected (related) connection even less so.

Place this counter in net_generic data.

A followup patch will also move the conntrack count -- this will make
netns_ct a read-mostly structure.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h |  1 +
 net/netfilter/nf_conntrack_core.c    |  6 +++++-
 net/netfilter/nf_conntrack_expect.c  | 22 ++++++++++++++++------
 3 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index db8f047eb75f..0578a905b1df 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -45,6 +45,7 @@ union nf_conntrack_expect_proto {
 
 struct nf_conntrack_net {
 	/* only used when new connection is allocated: */
+	unsigned int expect_count;
 	u8 sysctl_auto_assign_helper;
 	bool auto_assign_helper_warned;
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index ace3e8265e0a..5fa68f94ec65 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -55,6 +55,8 @@
 
 #include "nf_internals.h"
 
+extern unsigned int nf_conntrack_net_id;
+
 __cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
 EXPORT_SYMBOL_GPL(nf_conntrack_locks);
 
@@ -1570,6 +1572,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 	const struct nf_conntrack_zone *zone;
 	struct nf_conn_timeout *timeout_ext;
 	struct nf_conntrack_zone tmp;
+	struct nf_conntrack_net *cnet;
 
 	if (!nf_ct_invert_tuple(&repl_tuple, tuple)) {
 		pr_debug("Can't invert tuple.\n");
@@ -1603,7 +1606,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 			     GFP_ATOMIC);
 
 	local_bh_disable();
-	if (net->ct.expect_count) {
+	cnet = net_generic(net, nf_conntrack_net_id);
+	if (cnet->expect_count) {
 		spin_lock(&nf_conntrack_expect_lock);
 		exp = nf_ct_find_expectation(net, zone, tuple);
 		if (exp) {
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 42557d2b6a90..efdd391b3f72 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -43,18 +43,23 @@ unsigned int nf_ct_expect_max __read_mostly;
 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
 static unsigned int nf_ct_expect_hashrnd __read_mostly;
 
+extern unsigned int nf_conntrack_net_id;
+
 /* nf_conntrack_expect helper functions */
 void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
 				u32 portid, int report)
 {
 	struct nf_conn_help *master_help = nfct_help(exp->master);
 	struct net *net = nf_ct_exp_net(exp);
+	struct nf_conntrack_net *cnet;
 
 	WARN_ON(!master_help);
 	WARN_ON(timer_pending(&exp->timeout));
 
 	hlist_del_rcu(&exp->hnode);
-	net->ct.expect_count--;
+
+	cnet = net_generic(net, nf_conntrack_net_id);
+	cnet->expect_count--;
 
 	hlist_del_rcu(&exp->lnode);
 	master_help->expecting[exp->class]--;
@@ -118,10 +123,11 @@ __nf_ct_expect_find(struct net *net,
 		    const struct nf_conntrack_zone *zone,
 		    const struct nf_conntrack_tuple *tuple)
 {
+	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
 	struct nf_conntrack_expect *i;
 	unsigned int h;
 
-	if (!net->ct.expect_count)
+	if (!cnet->expect_count)
 		return NULL;
 
 	h = nf_ct_expect_dst_hash(net, tuple);
@@ -158,10 +164,11 @@ nf_ct_find_expectation(struct net *net,
 		       const struct nf_conntrack_zone *zone,
 		       const struct nf_conntrack_tuple *tuple)
 {
+	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
 	struct nf_conntrack_expect *i, *exp = NULL;
 	unsigned int h;
 
-	if (!net->ct.expect_count)
+	if (!cnet->expect_count)
 		return NULL;
 
 	h = nf_ct_expect_dst_hash(net, tuple);
@@ -368,6 +375,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_put);
 
 static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 {
+	struct nf_conntrack_net *cnet;
 	struct nf_conn_help *master_help = nfct_help(exp->master);
 	struct nf_conntrack_helper *helper;
 	struct net *net = nf_ct_exp_net(exp);
@@ -389,7 +397,8 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 	master_help->expecting[exp->class]++;
 
 	hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
-	net->ct.expect_count++;
+	cnet = net_generic(net, nf_conntrack_net_id);
+	cnet->expect_count++;
 
 	NF_CT_STAT_INC(net, expect_create);
 }
@@ -415,6 +424,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
 {
 	const struct nf_conntrack_expect_policy *p;
 	struct nf_conntrack_expect *i;
+	struct nf_conntrack_net *cnet;
 	struct nf_conn *master = expect->master;
 	struct nf_conn_help *master_help = nfct_help(master);
 	struct nf_conntrack_helper *helper;
@@ -458,7 +468,8 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
 		}
 	}
 
-	if (net->ct.expect_count >= nf_ct_expect_max) {
+	cnet = net_generic(net, nf_conntrack_net_id);
+	if (cnet->expect_count >= nf_ct_expect_max) {
 		net_warn_ratelimited("nf_conntrack: expectation table full\n");
 		ret = -EMFILE;
 	}
@@ -686,7 +697,6 @@ module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
 
 int nf_conntrack_expect_pernet_init(struct net *net)
 {
-	net->ct.expect_count = 0;
 	return exp_proc_init(net);
 }
 
-- 
cgit v1.2.3


From c53bd0e96662c2f77109e08a9889c9e1ee86c52d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 12 Apr 2021 21:55:43 +0200
Subject: netfilter: conntrack: move ct counter to net_generic data

Its only needed from slowpath (sysctl, ctnetlink, gc worker) and
when a new conntrack object is allocated.

Furthermore, each write dirties the otherwise read-mostly pernet
data in struct net.ct, which are accessed from packet path.

Move it to the net_generic data.  This makes struct netns_ct
read-mostly.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h    |  2 ++
 net/netfilter/nf_conntrack_core.c       | 40 +++++++++++++++++++++++----------
 net/netfilter/nf_conntrack_netlink.c    |  5 +++--
 net/netfilter/nf_conntrack_standalone.c | 17 +++++++++++---
 4 files changed, 47 insertions(+), 17 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 0578a905b1df..06dc6db70d18 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -45,6 +45,7 @@ union nf_conntrack_expect_proto {
 
 struct nf_conntrack_net {
 	/* only used when new connection is allocated: */
+	atomic_t count;
 	unsigned int expect_count;
 	u8 sysctl_auto_assign_helper;
 	bool auto_assign_helper_warned;
@@ -337,6 +338,7 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
 void nf_ct_tmpl_free(struct nf_conn *tmpl);
 
 u32 nf_ct_get_id(const struct nf_conn *ct);
+u32 nf_conntrack_count(const struct net *net);
 
 static inline void
 nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 5fa68f94ec65..e0befcf8113a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -87,6 +87,8 @@ static __read_mostly bool nf_conntrack_locks_all;
 
 static struct conntrack_gc_work conntrack_gc_work;
 
+extern unsigned int nf_conntrack_net_id;
+
 void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
 {
 	/* 1) Acquire the lock */
@@ -1381,6 +1383,7 @@ static void gc_worker(struct work_struct *work)
 			i = 0;
 
 		hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
+			struct nf_conntrack_net *cnet;
 			struct net *net;
 
 			tmp = nf_ct_tuplehash_to_ctrack(h);
@@ -1401,7 +1404,8 @@ static void gc_worker(struct work_struct *work)
 				continue;
 
 			net = nf_ct_net(tmp);
-			if (atomic_read(&net->ct.count) < nf_conntrack_max95)
+			cnet = net_generic(net, nf_conntrack_net_id);
+			if (atomic_read(&cnet->count) < nf_conntrack_max95)
 				continue;
 
 			/* need to take reference to avoid possible races */
@@ -1480,17 +1484,18 @@ __nf_conntrack_alloc(struct net *net,
 		     const struct nf_conntrack_tuple *repl,
 		     gfp_t gfp, u32 hash)
 {
+	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+	unsigned int ct_count;
 	struct nf_conn *ct;
 
 	/* We don't want any race condition at early drop stage */
-	atomic_inc(&net->ct.count);
+	ct_count = atomic_inc_return(&cnet->count);
 
-	if (nf_conntrack_max &&
-	    unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
+	if (nf_conntrack_max && unlikely(ct_count > nf_conntrack_max)) {
 		if (!early_drop(net, hash)) {
 			if (!conntrack_gc_work.early_drop)
 				conntrack_gc_work.early_drop = true;
-			atomic_dec(&net->ct.count);
+			atomic_dec(&cnet->count);
 			net_warn_ratelimited("nf_conntrack: table full, dropping packet\n");
 			return ERR_PTR(-ENOMEM);
 		}
@@ -1525,7 +1530,7 @@ __nf_conntrack_alloc(struct net *net,
 	atomic_set(&ct->ct_general.use, 0);
 	return ct;
 out:
-	atomic_dec(&net->ct.count);
+	atomic_dec(&cnet->count);
 	return ERR_PTR(-ENOMEM);
 }
 
@@ -1542,6 +1547,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
 void nf_conntrack_free(struct nf_conn *ct)
 {
 	struct net *net = nf_ct_net(ct);
+	struct nf_conntrack_net *cnet;
 
 	/* A freed object has refcnt == 0, that's
 	 * the golden rule for SLAB_TYPESAFE_BY_RCU
@@ -1550,8 +1556,10 @@ void nf_conntrack_free(struct nf_conn *ct)
 
 	nf_ct_ext_destroy(ct);
 	kmem_cache_free(nf_conntrack_cachep, ct);
+	cnet = net_generic(net, nf_conntrack_net_id);
+
 	smp_mb__before_atomic();
-	atomic_dec(&net->ct.count);
+	atomic_dec(&cnet->count);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_free);
 
@@ -2309,9 +2317,11 @@ __nf_ct_unconfirmed_destroy(struct net *net)
 
 void nf_ct_unconfirmed_destroy(struct net *net)
 {
+	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+
 	might_sleep();
 
-	if (atomic_read(&net->ct.count) > 0) {
+	if (atomic_read(&cnet->count) > 0) {
 		__nf_ct_unconfirmed_destroy(net);
 		nf_queue_nf_hook_drop(net);
 		synchronize_net();
@@ -2323,11 +2333,12 @@ void nf_ct_iterate_cleanup_net(struct net *net,
 			       int (*iter)(struct nf_conn *i, void *data),
 			       void *data, u32 portid, int report)
 {
+	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
 	struct iter_data d;
 
 	might_sleep();
 
-	if (atomic_read(&net->ct.count) == 0)
+	if (atomic_read(&cnet->count) == 0)
 		return;
 
 	d.iter = iter;
@@ -2356,7 +2367,9 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
 
 	down_read(&net_rwsem);
 	for_each_net(net) {
-		if (atomic_read(&net->ct.count) == 0)
+		struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+
+		if (atomic_read(&cnet->count) == 0)
 			continue;
 		__nf_ct_unconfirmed_destroy(net);
 		nf_queue_nf_hook_drop(net);
@@ -2436,8 +2449,10 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
 i_see_dead_people:
 	busy = 0;
 	list_for_each_entry(net, net_exit_list, exit_list) {
+		struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+
 		nf_ct_iterate_cleanup(kill_all, net, 0, 0);
-		if (atomic_read(&net->ct.count) != 0)
+		if (atomic_read(&cnet->count) != 0)
 			busy = 1;
 	}
 	if (busy) {
@@ -2718,12 +2733,13 @@ void nf_conntrack_init_end(void)
 
 int nf_conntrack_init_net(struct net *net)
 {
+	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
 	int ret = -ENOMEM;
 	int cpu;
 
 	BUILD_BUG_ON(IP_CT_UNTRACKED == IP_CT_NUMBER);
 	BUILD_BUG_ON_NOT_POWER_OF_2(CONNTRACK_LOCKS);
-	atomic_set(&net->ct.count, 0);
+	atomic_set(&cnet->count, 0);
 
 	net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
 	if (!net->ct.pcpu_lists)
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index c67a6ec22a74..44e3cb80e2e0 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2559,9 +2559,9 @@ static int
 ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 			    struct net *net)
 {
-	struct nlmsghdr *nlh;
 	unsigned int flags = portid ? NLM_F_MULTI : 0, event;
-	unsigned int nr_conntracks = atomic_read(&net->ct.count);
+	unsigned int nr_conntracks;
+	struct nlmsghdr *nlh;
 
 	event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_GET_STATS);
 	nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
@@ -2569,6 +2569,7 @@ ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 	if (!nlh)
 		goto nlmsg_failure;
 
+	nr_conntracks = nf_conntrack_count(net);
 	if (nla_put_be32(skb, CTA_STATS_GLOBAL_ENTRIES, htonl(nr_conntracks)))
 		goto nla_put_failure;
 
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index a7538379cfca..fb89f6e5c8bc 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -424,14 +424,16 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
 static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 {
 	struct net *net = seq_file_net(seq);
-	unsigned int nr_conntracks = atomic_read(&net->ct.count);
 	const struct ip_conntrack_stat *st = v;
+	unsigned int nr_conntracks;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq, "entries  clashres found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete search_restart\n");
 		return 0;
 	}
 
+	nr_conntracks = nf_conntrack_count(net);
+
 	seq_printf(seq, "%08x  %08x %08x %08x %08x %08x %08x %08x "
 			"%08x %08x %08x %08x %08x  %08x %08x %08x %08x\n",
 		   nr_conntracks,
@@ -507,6 +509,16 @@ static void nf_conntrack_standalone_fini_proc(struct net *net)
 }
 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
 
+u32 nf_conntrack_count(const struct net *net)
+{
+	const struct nf_conntrack_net *cnet;
+
+	cnet = net_generic(net, nf_conntrack_net_id);
+
+	return atomic_read(&cnet->count);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_count);
+
 /* Sysctl support */
 
 #ifdef CONFIG_SYSCTL
@@ -614,7 +626,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
 	},
 	[NF_SYSCTL_CT_COUNT] = {
 		.procname	= "nf_conntrack_count",
-		.data		= &init_net.ct.count,
 		.maxlen		= sizeof(int),
 		.mode		= 0444,
 		.proc_handler	= proc_dointvec,
@@ -1037,7 +1048,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
 	if (!table)
 		return -ENOMEM;
 
-	table[NF_SYSCTL_CT_COUNT].data = &net->ct.count;
+	table[NF_SYSCTL_CT_COUNT].data = &cnet->count;
 	table[NF_SYSCTL_CT_CHECKSUM].data = &net->ct.sysctl_checksum;
 	table[NF_SYSCTL_CT_LOG_INVALID].data = &net->ct.sysctl_log_invalid;
 	table[NF_SYSCTL_CT_ACCT].data = &net->ct.sysctl_acct;
-- 
cgit v1.2.3


From 9b1a4d0f914b1186248fc88b1cb6ee49e336a2b2 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 12 Apr 2021 21:55:44 +0200
Subject: netfilter: conntrack: convert sysctls to u8

log_invalid sysctl allows values of 0 to 255 inclusive so we no longer
need a range check: the min/max values can be removed.

This also removes all member variables that were moved to net_generic
data in previous patches.

This reduces size of netns_ct struct by one cache line.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netns/conntrack.h           | 23 ++++++++----------
 net/netfilter/nf_conntrack_proto_tcp.c  | 34 +++++++++++++-------------
 net/netfilter/nf_conntrack_standalone.c | 42 ++++++++++++++-------------------
 3 files changed, 45 insertions(+), 54 deletions(-)

diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index e5f664d69ead..ad0a95c2335e 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -24,9 +24,9 @@ struct nf_generic_net {
 
 struct nf_tcp_net {
 	unsigned int timeouts[TCP_CONNTRACK_TIMEOUT_MAX];
-	int tcp_loose;
-	int tcp_be_liberal;
-	int tcp_max_retrans;
+	u8 tcp_loose;
+	u8 tcp_be_liberal;
+	u8 tcp_max_retrans;
 };
 
 enum udp_conntrack {
@@ -45,7 +45,7 @@ struct nf_icmp_net {
 
 #ifdef CONFIG_NF_CT_PROTO_DCCP
 struct nf_dccp_net {
-	int dccp_loose;
+	u8 dccp_loose;
 	unsigned int dccp_timeout[CT_DCCP_MAX + 1];
 };
 #endif
@@ -93,18 +93,15 @@ struct ct_pcpu {
 };
 
 struct netns_ct {
-	atomic_t		count;
-	unsigned int		expect_count;
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
 	bool ecache_dwork_pending;
 #endif
-	bool			auto_assign_helper_warned;
-	unsigned int		sysctl_log_invalid; /* Log invalid packets */
-	int			sysctl_events;
-	int			sysctl_acct;
-	int			sysctl_auto_assign_helper;
-	int			sysctl_tstamp;
-	int			sysctl_checksum;
+	u8			sysctl_log_invalid; /* Log invalid packets */
+	u8			sysctl_events;
+	u8			sysctl_acct;
+	u8			sysctl_auto_assign_helper;
+	u8			sysctl_tstamp;
+	u8			sysctl_checksum;
 
 	struct ct_pcpu __percpu *pcpu_lists;
 	struct ip_conntrack_stat __percpu *stat;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index ec23330687a5..318b8f723349 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -31,20 +31,6 @@
 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
 
-/* "Be conservative in what you do,
-    be liberal in what you accept from others."
-    If it's non-zero, we mark only out of window RST segments as INVALID. */
-static int nf_ct_tcp_be_liberal __read_mostly = 0;
-
-/* If it is set to zero, we disable picking up already established
-   connections. */
-static int nf_ct_tcp_loose __read_mostly = 1;
-
-/* Max number of the retransmitted packets without receiving an (acceptable)
-   ACK from the destination. If this number is reached, a shorter timer
-   will be started. */
-static int nf_ct_tcp_max_retrans __read_mostly = 3;
-
   /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
      closely.  They're more complex. --RR */
 
@@ -1436,9 +1422,23 @@ void nf_conntrack_tcp_init_net(struct net *net)
 	 * ->timeouts[0] contains 'new' timeout, like udp or icmp.
 	 */
 	tn->timeouts[0] = tcp_timeouts[TCP_CONNTRACK_SYN_SENT];
-	tn->tcp_loose = nf_ct_tcp_loose;
-	tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
-	tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
+
+	/* If it is set to zero, we disable picking up already established
+	 * connections.
+	 */
+	tn->tcp_loose = 1;
+
+	/* "Be conservative in what you do,
+	 *  be liberal in what you accept from others."
+	 * If it's non-zero, we mark only out of window RST segments as INVALID.
+	 */
+	tn->tcp_be_liberal = 0;
+
+	/* Max number of the retransmitted packets without receiving an (acceptable)
+	 * ACK from the destination. If this number is reached, a shorter timer
+	 * will be started.
+	 */
+	tn->tcp_max_retrans = 3;
 }
 
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index fb89f6e5c8bc..fe99605444be 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -522,10 +522,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_count);
 /* Sysctl support */
 
 #ifdef CONFIG_SYSCTL
-/* Log invalid packets of a given protocol */
-static int log_invalid_proto_min __read_mostly;
-static int log_invalid_proto_max __read_mostly = 255;
-
 /* size the user *wants to set */
 static unsigned int nf_conntrack_htable_size_user __read_mostly;
 
@@ -640,20 +636,18 @@ static struct ctl_table nf_ct_sysctl_table[] = {
 	[NF_SYSCTL_CT_CHECKSUM] = {
 		.procname	= "nf_conntrack_checksum",
 		.data		= &init_net.ct.sysctl_checksum,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dou8vec_minmax,
 		.extra1 	= SYSCTL_ZERO,
 		.extra2 	= SYSCTL_ONE,
 	},
 	[NF_SYSCTL_CT_LOG_INVALID] = {
 		.procname	= "nf_conntrack_log_invalid",
 		.data		= &init_net.ct.sysctl_log_invalid,
-		.maxlen		= sizeof(unsigned int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &log_invalid_proto_min,
-		.extra2		= &log_invalid_proto_max,
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	[NF_SYSCTL_CT_EXPECT_MAX] = {
 		.procname	= "nf_conntrack_expect_max",
@@ -665,9 +659,9 @@ static struct ctl_table nf_ct_sysctl_table[] = {
 	[NF_SYSCTL_CT_ACCT] = {
 		.procname	= "nf_conntrack_acct",
 		.data		= &init_net.ct.sysctl_acct,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dou8vec_minmax,
 		.extra1 	= SYSCTL_ZERO,
 		.extra2 	= SYSCTL_ONE,
 	},
@@ -683,9 +677,9 @@ static struct ctl_table nf_ct_sysctl_table[] = {
 	[NF_SYSCTL_CT_EVENTS] = {
 		.procname	= "nf_conntrack_events",
 		.data		= &init_net.ct.sysctl_events,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dou8vec_minmax,
 		.extra1 	= SYSCTL_ZERO,
 		.extra2 	= SYSCTL_ONE,
 	},
@@ -694,9 +688,9 @@ static struct ctl_table nf_ct_sysctl_table[] = {
 	[NF_SYSCTL_CT_TIMESTAMP] = {
 		.procname	= "nf_conntrack_timestamp",
 		.data		= &init_net.ct.sysctl_tstamp,
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dou8vec_minmax,
 		.extra1 	= SYSCTL_ZERO,
 		.extra2 	= SYSCTL_ONE,
 	},
@@ -769,25 +763,25 @@ static struct ctl_table nf_ct_sysctl_table[] = {
 	},
 	[NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
 		.procname	= "nf_conntrack_tcp_loose",
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dou8vec_minmax,
 		.extra1 	= SYSCTL_ZERO,
 		.extra2 	= SYSCTL_ONE,
 	},
 	[NF_SYSCTL_CT_PROTO_TCP_LIBERAL] = {
 		.procname       = "nf_conntrack_tcp_be_liberal",
-		.maxlen         = sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode           = 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dou8vec_minmax,
 		.extra1 	= SYSCTL_ZERO,
 		.extra2 	= SYSCTL_ONE,
 	},
 	[NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS] = {
 		.procname	= "nf_conntrack_tcp_max_retrans",
-		.maxlen		= sizeof(unsigned int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dou8vec_minmax,
 	},
 	[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP] = {
 		.procname	= "nf_conntrack_udp_timeout",
@@ -914,9 +908,9 @@ static struct ctl_table nf_ct_sysctl_table[] = {
 	},
 	[NF_SYSCTL_CT_PROTO_DCCP_LOOSE] = {
 		.procname	= "nf_conntrack_dccp_loose",
-		.maxlen		= sizeof(int),
+		.maxlen		= sizeof(u8),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dou8vec_minmax,
 		.extra1 	= SYSCTL_ZERO,
 		.extra2 	= SYSCTL_ONE,
 	},
-- 
cgit v1.2.3


From 78ed0a9bc6db76f8e5f5f4cb0d2b2f0d1bb21b24 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Tue, 13 Apr 2021 11:06:05 +0300
Subject: netfilter: flowtable: Add FLOW_OFFLOAD_XMIT_UNSPEC xmit type

It could be xmit type was not set and would default to FLOW_OFFLOAD_XMIT_NEIGH
and in this type the gc expect to have a route info.
Fix that by adding FLOW_OFFLOAD_XMIT_UNSPEC which defaults to 0.

Fixes: 8b9229d15877 ("netfilter: flowtable: dst_check() from garbage collector path")
Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_flow_table.h | 3 ++-
 net/netfilter/nf_flow_table_core.c    | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index d46e422c9d10..51d8eb99764d 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -92,7 +92,8 @@ enum flow_offload_tuple_dir {
 #define FLOW_OFFLOAD_DIR_MAX	IP_CT_DIR_MAX
 
 enum flow_offload_xmit_type {
-	FLOW_OFFLOAD_XMIT_NEIGH		= 0,
+	FLOW_OFFLOAD_XMIT_UNSPEC	= 0,
+	FLOW_OFFLOAD_XMIT_NEIGH,
 	FLOW_OFFLOAD_XMIT_XFRM,
 	FLOW_OFFLOAD_XMIT_DIRECT,
 };
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 76573bae6664..39c02d1aeedf 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -130,6 +130,9 @@ static int flow_offload_fill_route(struct flow_offload *flow,
 		flow_tuple->dst_cache = dst;
 		flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
 		break;
+	default:
+		WARN_ON_ONCE(1);
+		break;
 	}
 	flow_tuple->xmit_type = route->tuple[dir].xmit_type;
 
-- 
cgit v1.2.3


From 40b5d2f15c091fa9c854acde91ad2acb504027d7 Mon Sep 17 00:00:00 2001
From: René van Dorst <opensource@vdorst.com>
Date: Mon, 12 Apr 2021 08:50:31 +0200
Subject: net: dsa: mt7530: Add support for EEE features
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds EEE support.

Signed-off-by: René van Dorst <opensource@vdorst.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mt7530.c | 43 +++++++++++++++++++++++++++++++++++++++++++
 drivers/net/dsa/mt7530.h | 14 +++++++++++++-
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 2bd1bab71497..96f7c9eede35 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -2570,6 +2570,17 @@ static void mt753x_phylink_mac_link_up(struct dsa_switch *ds, int port,
 			mcr |= PMCR_RX_FC_EN;
 	}
 
+	if (mode == MLO_AN_PHY && phydev && phy_init_eee(phydev, 0) >= 0) {
+		switch (speed) {
+		case SPEED_1000:
+			mcr |= PMCR_FORCE_EEE1G;
+			break;
+		case SPEED_100:
+			mcr |= PMCR_FORCE_EEE100;
+			break;
+		}
+	}
+
 	mt7530_set(priv, MT7530_PMCR_P(port), mcr);
 }
 
@@ -2800,6 +2811,36 @@ mt753x_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val)
 	return priv->info->phy_write(ds, port, regnum, val);
 }
 
+static int mt753x_get_mac_eee(struct dsa_switch *ds, int port,
+			      struct ethtool_eee *e)
+{
+	struct mt7530_priv *priv = ds->priv;
+	u32 eeecr = mt7530_read(priv, MT7530_PMEEECR_P(port));
+
+	e->tx_lpi_enabled = !(eeecr & LPI_MODE_EN);
+	e->tx_lpi_timer = GET_LPI_THRESH(eeecr);
+
+	return 0;
+}
+
+static int mt753x_set_mac_eee(struct dsa_switch *ds, int port,
+			      struct ethtool_eee *e)
+{
+	struct mt7530_priv *priv = ds->priv;
+	u32 set, mask = LPI_THRESH_MASK | LPI_MODE_EN;
+
+	if (e->tx_lpi_timer > 0xFFF)
+		return -EINVAL;
+
+	set = SET_LPI_THRESH(e->tx_lpi_timer);
+	if (!e->tx_lpi_enabled)
+		/* Force LPI Mode without a delay */
+		set |= LPI_MODE_EN;
+	mt7530_rmw(priv, MT7530_PMEEECR_P(port), mask, set);
+
+	return 0;
+}
+
 static const struct dsa_switch_ops mt7530_switch_ops = {
 	.get_tag_protocol	= mtk_get_tag_protocol,
 	.setup			= mt753x_setup,
@@ -2835,6 +2876,8 @@ static const struct dsa_switch_ops mt7530_switch_ops = {
 	.phylink_mac_an_restart	= mt753x_phylink_mac_an_restart,
 	.phylink_mac_link_down	= mt753x_phylink_mac_link_down,
 	.phylink_mac_link_up	= mt753x_phylink_mac_link_up,
+	.get_mac_eee		= mt753x_get_mac_eee,
+	.set_mac_eee		= mt753x_set_mac_eee,
 };
 
 static const struct mt753x_info mt753x_table[] = {
diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h
index ec36ea5dfd57..0204da486f3a 100644
--- a/drivers/net/dsa/mt7530.h
+++ b/drivers/net/dsa/mt7530.h
@@ -257,6 +257,8 @@ enum mt7530_vlan_port_attr {
 #define  PMCR_RX_EN			BIT(13)
 #define  PMCR_BACKOFF_EN		BIT(9)
 #define  PMCR_BACKPR_EN			BIT(8)
+#define  PMCR_FORCE_EEE1G		BIT(7)
+#define  PMCR_FORCE_EEE100		BIT(6)
 #define  PMCR_TX_FC_EN			BIT(5)
 #define  PMCR_RX_FC_EN			BIT(4)
 #define  PMCR_FORCE_SPEED_1000		BIT(3)
@@ -281,7 +283,8 @@ enum mt7530_vlan_port_attr {
 #define  PMCR_LINK_SETTINGS_MASK	(PMCR_TX_EN | PMCR_FORCE_SPEED_1000 | \
 					 PMCR_RX_EN | PMCR_FORCE_SPEED_100 | \
 					 PMCR_TX_FC_EN | PMCR_RX_FC_EN | \
-					 PMCR_FORCE_FDX | PMCR_FORCE_LNK)
+					 PMCR_FORCE_FDX | PMCR_FORCE_LNK | \
+					 PMCR_FORCE_EEE1G | PMCR_FORCE_EEE100)
 #define  PMCR_CPU_PORT_SETTING(id)	(PMCR_FORCE_MODE_ID((id)) | \
 					 PMCR_IFG_XMIT(1) | PMCR_MAC_MODE | \
 					 PMCR_BACKOFF_EN | PMCR_BACKPR_EN | \
@@ -290,6 +293,15 @@ enum mt7530_vlan_port_attr {
 					 PMCR_FORCE_SPEED_1000 | \
 					 PMCR_FORCE_FDX | PMCR_FORCE_LNK)
 
+#define MT7530_PMEEECR_P(x)		(0x3004 + (x) * 0x100)
+#define  WAKEUP_TIME_1000(x)		(((x) & 0xFF) << 24)
+#define  WAKEUP_TIME_100(x)		(((x) & 0xFF) << 16)
+#define  LPI_THRESH_MASK		GENMASK(15, 4)
+#define  LPI_THRESH_SHT			4
+#define  SET_LPI_THRESH(x)		(((x) << LPI_THRESH_SHT) & LPI_THRESH_MASK)
+#define  GET_LPI_THRESH(x)		(((x) & LPI_THRESH_MASK) >> LPI_THRESH_SHT)
+#define  LPI_MODE_EN			BIT(0)
+
 #define MT7530_PMSR_P(x)		(0x3008 + (x) * 0x100)
 #define  PMSR_EEE1G			BIT(7)
 #define  PMSR_EEE100M			BIT(6)
-- 
cgit v1.2.3


From db16c1fe92d7ba7d39061faef897842baee2c887 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Tue, 13 Apr 2021 21:00:43 +0200
Subject: bpf: Generate BTF_KIND_FLOAT when linking vmlinux

pahole v1.21 supports the --btf_gen_floats flag, which makes it
generate the information about the floating-point types [1].

Adjust link-vmlinux.sh to pass this flag to pahole in case it's
supported, which is determined using a simple version check.

[1] https://lore.kernel.org/dwarves/YHRiXNX1JUF2Az0A@kernel.org/

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210413190043.21918-1-iii@linux.ibm.com
---
 scripts/link-vmlinux.sh | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 3b261b0f74f0..667aacb9261c 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -213,6 +213,7 @@ vmlinux_link()
 gen_btf()
 {
 	local pahole_ver
+	local extra_paholeopt=
 
 	if ! [ -x "$(command -v ${PAHOLE})" ]; then
 		echo >&2 "BTF: ${1}: pahole (${PAHOLE}) is not available"
@@ -227,8 +228,12 @@ gen_btf()
 
 	vmlinux_link ${1}
 
+	if [ "${pahole_ver}" -ge "121" ]; then
+		extra_paholeopt="${extra_paholeopt} --btf_gen_floats"
+	fi
+
 	info "BTF" ${2}
-	LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${1}
+	LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${extra_paholeopt} ${1}
 
 	# Create ${2} which contains just .BTF section but no symbols. Add
 	# SHF_ALLOC because .BTF will be part of the vmlinux image. --strip-all
-- 
cgit v1.2.3


From 83216e3988cd196183542937c9bd58b279f946af Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Mon, 12 Apr 2021 19:47:17 +0200
Subject: of: net: pass the dst buffer to of_get_mac_address()

of_get_mac_address() returns a "const void*" pointer to a MAC address.
Lately, support to fetch the MAC address by an NVMEM provider was added.
But this will only work with platform devices. It will not work with
PCI devices (e.g. of an integrated root complex) and esp. not with DSA
ports.

There is an of_* variant of the nvmem binding which works without
devices. The returned data of a nvmem_cell_read() has to be freed after
use. On the other hand the return of_get_mac_address() points to some
static data without a lifetime. The trick for now, was to allocate a
device resource managed buffer which is then returned. This will only
work if we have an actual device.

Change it, so that the caller of of_get_mac_address() has to supply a
buffer where the MAC address is written to. Unfortunately, this will
touch all drivers which use the of_get_mac_address().

Usually the code looks like:

  const char *addr;
  addr = of_get_mac_address(np);
  if (!IS_ERR(addr))
    ether_addr_copy(ndev->dev_addr, addr);

This can then be simply rewritten as:

  of_get_mac_address(np, ndev->dev_addr);

Sometimes is_valid_ether_addr() is used to test the MAC address.
of_get_mac_address() already makes sure, it just returns a valid MAC
address. Thus we can just test its return code. But we have to be
careful if there are still other sources for the MAC address before the
of_get_mac_address(). In this case we have to keep the
is_valid_ether_addr() call.

The following coccinelle patch was used to convert common cases to the
new style. Afterwards, I've manually gone over the drivers and fixed the
return code variable: either used a new one or if one was already
available use that. Mansour Moufid, thanks for that coccinelle patch!

<spml>
@a@
identifier x;
expression y, z;
@@
- x = of_get_mac_address(y);
+ x = of_get_mac_address(y, z);
  <...
- ether_addr_copy(z, x);
  ...>

@@
identifier a.x;
@@
- if (<+... x ...+>) {}

@@
identifier a.x;
@@
  if (<+... x ...+>) {
      ...
  }
- else {}

@@
identifier a.x;
expression e;
@@
- if (<+... x ...+>@e)
-     {}
- else
+ if (!(e))
      {...}

@@
expression x, y, z;
@@
- x = of_get_mac_address(y, z);
+ of_get_mac_address(y, z);
  ... when != x
</spml>

All drivers, except drivers/net/ethernet/aeroflex/greth.c, were
compile-time tested.

Suggested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Michael Walle <michael@walle.cc>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/mach-mvebu/kirkwood.c                     |  3 +-
 arch/powerpc/sysdev/tsi108_dev.c                   |  5 +-
 drivers/net/ethernet/aeroflex/greth.c              |  6 +--
 drivers/net/ethernet/allwinner/sun4i-emac.c        | 10 ++--
 drivers/net/ethernet/altera/altera_tse_main.c      |  7 +--
 drivers/net/ethernet/arc/emac_main.c               |  8 +--
 drivers/net/ethernet/atheros/ag71xx.c              |  7 +--
 drivers/net/ethernet/broadcom/bcm4908_enet.c       |  7 +--
 drivers/net/ethernet/broadcom/bcmsysport.c         |  7 +--
 drivers/net/ethernet/broadcom/bgmac-bcma.c         | 10 ++--
 drivers/net/ethernet/broadcom/bgmac-platform.c     | 11 ++--
 drivers/net/ethernet/cadence/macb_main.c           | 11 ++--
 drivers/net/ethernet/cavium/octeon/octeon_mgmt.c   |  8 +--
 drivers/net/ethernet/cavium/thunder/thunder_bgx.c  |  5 +-
 drivers/net/ethernet/davicom/dm9000.c              | 10 ++--
 drivers/net/ethernet/ethoc.c                       |  6 +--
 drivers/net/ethernet/ezchip/nps_enet.c             |  7 +--
 drivers/net/ethernet/freescale/fec_main.c          |  7 +--
 drivers/net/ethernet/freescale/fec_mpc52xx.c       |  7 +--
 drivers/net/ethernet/freescale/fman/mac.c          |  9 ++--
 .../net/ethernet/freescale/fs_enet/fs_enet-main.c  |  5 +-
 drivers/net/ethernet/freescale/gianfar.c           |  8 +--
 drivers/net/ethernet/freescale/ucc_geth.c          |  5 +-
 drivers/net/ethernet/hisilicon/hisi_femac.c        |  7 +--
 drivers/net/ethernet/hisilicon/hix5hd2_gmac.c      |  7 +--
 drivers/net/ethernet/lantiq_xrx200.c               |  7 +--
 drivers/net/ethernet/marvell/mv643xx_eth.c         |  5 +-
 drivers/net/ethernet/marvell/mvneta.c              |  6 +--
 .../net/ethernet/marvell/prestera/prestera_main.c  | 11 ++--
 drivers/net/ethernet/marvell/pxa168_eth.c          |  9 +---
 drivers/net/ethernet/marvell/sky2.c                |  8 ++-
 drivers/net/ethernet/mediatek/mtk_eth_soc.c        | 11 ++--
 drivers/net/ethernet/micrel/ks8851_common.c        |  7 ++-
 drivers/net/ethernet/microchip/lan743x_main.c      |  5 +-
 drivers/net/ethernet/nxp/lpc_eth.c                 |  4 +-
 drivers/net/ethernet/qualcomm/qca_spi.c            | 10 ++--
 drivers/net/ethernet/qualcomm/qca_uart.c           |  9 +---
 drivers/net/ethernet/renesas/ravb_main.c           | 12 +++--
 drivers/net/ethernet/renesas/sh_eth.c              |  5 +-
 .../net/ethernet/samsung/sxgbe/sxgbe_platform.c    | 13 ++---
 drivers/net/ethernet/socionext/sni_ave.c           | 10 ++--
 .../net/ethernet/stmicro/stmmac/dwmac-anarion.c    |  2 +-
 .../ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c    |  2 +-
 .../net/ethernet/stmicro/stmmac/dwmac-generic.c    |  2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c    |  2 +-
 .../net/ethernet/stmicro/stmmac/dwmac-intel-plat.c |  2 +-
 .../net/ethernet/stmicro/stmmac/dwmac-ipq806x.c    |  2 +-
 .../net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c    |  2 +-
 .../net/ethernet/stmicro/stmmac/dwmac-mediatek.c   |  2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c  |  2 +-
 .../net/ethernet/stmicro/stmmac/dwmac-meson8b.c    |  2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c  |  2 +-
 .../ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c    |  2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c     |  2 +-
 .../net/ethernet/stmicro/stmmac/dwmac-socfpga.c    |  2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c    |  2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c  |  2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c  |  2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c  |  2 +-
 .../net/ethernet/stmicro/stmmac/dwmac-visconti.c   |  2 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac.h       |  2 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |  2 +-
 .../net/ethernet/stmicro/stmmac/stmmac_platform.c  | 14 ++---
 .../net/ethernet/stmicro/stmmac/stmmac_platform.h  |  2 +-
 drivers/net/ethernet/ti/am65-cpsw-nuss.c           | 19 ++++---
 drivers/net/ethernet/ti/cpsw.c                     |  7 +--
 drivers/net/ethernet/ti/cpsw_new.c                 |  7 +--
 drivers/net/ethernet/ti/davinci_emac.c             |  8 +--
 drivers/net/ethernet/ti/netcp_core.c               |  7 +--
 drivers/net/ethernet/wiznet/w5100-spi.c            |  8 ++-
 drivers/net/ethernet/wiznet/w5100.c                |  2 +-
 drivers/net/ethernet/xilinx/ll_temac_main.c        |  8 +--
 drivers/net/ethernet/xilinx/xilinx_axienet_main.c  | 15 +++---
 drivers/net/ethernet/xilinx/xilinx_emaclite.c      |  8 +--
 drivers/net/wireless/ath/ath9k/init.c              |  5 +-
 drivers/net/wireless/mediatek/mt76/eeprom.c        |  9 +---
 drivers/net/wireless/ralink/rt2x00/rt2x00dev.c     |  6 +--
 drivers/of/of_net.c                                | 60 ++++++++++------------
 drivers/staging/octeon/ethernet.c                  | 10 ++--
 drivers/staging/wfx/main.c                         |  7 ++-
 include/linux/of_net.h                             |  6 +--
 include/net/dsa.h                                  |  2 +-
 net/dsa/dsa2.c                                     |  2 +-
 net/dsa/slave.c                                    |  2 +-
 net/ethernet/eth.c                                 | 11 ++--
 85 files changed, 218 insertions(+), 364 deletions(-)

diff --git a/arch/arm/mach-mvebu/kirkwood.c b/arch/arm/mach-mvebu/kirkwood.c
index ceaad6d5927e..06b1706595f4 100644
--- a/arch/arm/mach-mvebu/kirkwood.c
+++ b/arch/arm/mach-mvebu/kirkwood.c
@@ -84,6 +84,7 @@ static void __init kirkwood_dt_eth_fixup(void)
 		struct device_node *pnp = of_get_parent(np);
 		struct clk *clk;
 		struct property *pmac;
+		u8 tmpmac[ETH_ALEN];
 		void __iomem *io;
 		u8 *macaddr;
 		u32 reg;
@@ -93,7 +94,7 @@ static void __init kirkwood_dt_eth_fixup(void)
 
 		/* skip disabled nodes or nodes with valid MAC address*/
 		if (!of_device_is_available(pnp) ||
-		    !IS_ERR(of_get_mac_address(np)))
+		    !of_get_mac_address(np, tmpmac))
 			goto eth_fixup_skip;
 
 		clk = of_clk_get(pnp, 0);
diff --git a/arch/powerpc/sysdev/tsi108_dev.c b/arch/powerpc/sysdev/tsi108_dev.c
index 0baec82510b9..4c4a6efd5e5f 100644
--- a/arch/powerpc/sysdev/tsi108_dev.c
+++ b/arch/powerpc/sysdev/tsi108_dev.c
@@ -73,7 +73,6 @@ static int __init tsi108_eth_of_init(void)
 		struct device_node *phy, *mdio;
 		hw_info tsi_eth_data;
 		const unsigned int *phy_id;
-		const void *mac_addr;
 		const phandle *ph;
 
 		memset(r, 0, sizeof(r));
@@ -101,9 +100,7 @@ static int __init tsi108_eth_of_init(void)
 			goto err;
 		}
 
-		mac_addr = of_get_mac_address(np);
-		if (!IS_ERR(mac_addr))
-			ether_addr_copy(tsi_eth_data.mac_addr, mac_addr);
+		of_get_mac_address(np, tsi_eth_data.mac_addr);
 
 		ph = of_get_property(np, "mdio-handle", NULL);
 		mdio = of_find_node_by_phandle(*ph);
diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c
index 9c5891bbfe61..d77fafbc1530 100644
--- a/drivers/net/ethernet/aeroflex/greth.c
+++ b/drivers/net/ethernet/aeroflex/greth.c
@@ -1449,10 +1449,10 @@ static int greth_of_probe(struct platform_device *ofdev)
 			break;
 	}
 	if (i == 6) {
-		const u8 *addr;
+		u8 addr[ETH_ALEN];
 
-		addr = of_get_mac_address(ofdev->dev.of_node);
-		if (!IS_ERR(addr)) {
+		err = of_get_mac_address(ofdev->dev.of_node, addr);
+		if (!err) {
 			for (i = 0; i < 6; i++)
 				macaddr[i] = (unsigned int) addr[i];
 		} else {
diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c
index 5ed80d9a6b9f..f99ae317c188 100644
--- a/drivers/net/ethernet/allwinner/sun4i-emac.c
+++ b/drivers/net/ethernet/allwinner/sun4i-emac.c
@@ -790,7 +790,6 @@ static int emac_probe(struct platform_device *pdev)
 	struct emac_board_info *db;
 	struct net_device *ndev;
 	int ret = 0;
-	const char *mac_addr;
 
 	ndev = alloc_etherdev(sizeof(struct emac_board_info));
 	if (!ndev) {
@@ -853,12 +852,9 @@ static int emac_probe(struct platform_device *pdev)
 	}
 
 	/* Read MAC-address from DT */
-	mac_addr = of_get_mac_address(np);
-	if (!IS_ERR(mac_addr))
-		ether_addr_copy(ndev->dev_addr, mac_addr);
-
-	/* Check if the MAC address is valid, if not get a random one */
-	if (!is_valid_ether_addr(ndev->dev_addr)) {
+	ret = of_get_mac_address(np, ndev->dev_addr);
+	if (ret) {
+		/* if the MAC address is invalid get a random one */
 		eth_hw_addr_random(ndev);
 		dev_warn(&pdev->dev, "using random MAC address %pM\n",
 			 ndev->dev_addr);
diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c
index 907125abef2c..1c00d719e5d7 100644
--- a/drivers/net/ethernet/altera/altera_tse_main.c
+++ b/drivers/net/ethernet/altera/altera_tse_main.c
@@ -1351,7 +1351,6 @@ static int altera_tse_probe(struct platform_device *pdev)
 	struct resource *control_port;
 	struct resource *dma_res;
 	struct altera_tse_private *priv;
-	const unsigned char *macaddr;
 	void __iomem *descmap;
 	const struct of_device_id *of_id = NULL;
 
@@ -1525,10 +1524,8 @@ static int altera_tse_probe(struct platform_device *pdev)
 	priv->rx_dma_buf_sz = ALTERA_RXDMABUFFER_SIZE;
 
 	/* get default MAC address from device tree */
-	macaddr = of_get_mac_address(pdev->dev.of_node);
-	if (!IS_ERR(macaddr))
-		ether_addr_copy(ndev->dev_addr, macaddr);
-	else
+	ret = of_get_mac_address(pdev->dev.of_node, ndev->dev_addr);
+	if (ret)
 		eth_hw_addr_random(ndev);
 
 	/* get phy addr and create mdio */
diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c
index b56a9e2aecd9..67b8113a2b53 100644
--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c
@@ -857,7 +857,6 @@ int arc_emac_probe(struct net_device *ndev, int interface)
 	struct device_node *phy_node;
 	struct phy_device *phydev = NULL;
 	struct arc_emac_priv *priv;
-	const char *mac_addr;
 	unsigned int id, clock_frequency, irq;
 	int err;
 
@@ -942,11 +941,8 @@ int arc_emac_probe(struct net_device *ndev, int interface)
 	}
 
 	/* Get MAC address from device tree */
-	mac_addr = of_get_mac_address(dev->of_node);
-
-	if (!IS_ERR(mac_addr))
-		ether_addr_copy(ndev->dev_addr, mac_addr);
-	else
+	err = of_get_mac_address(dev->of_node, ndev->dev_addr);
+	if (err)
 		eth_hw_addr_random(ndev);
 
 	arc_emac_set_address_internal(ndev);
diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c
index 7352f98123c7..3a23b92ebfe3 100644
--- a/drivers/net/ethernet/atheros/ag71xx.c
+++ b/drivers/net/ethernet/atheros/ag71xx.c
@@ -1856,7 +1856,6 @@ static int ag71xx_probe(struct platform_device *pdev)
 	const struct ag71xx_dcfg *dcfg;
 	struct net_device *ndev;
 	struct resource *res;
-	const void *mac_addr;
 	int tx_size, err, i;
 	struct ag71xx *ag;
 
@@ -1957,10 +1956,8 @@ static int ag71xx_probe(struct platform_device *pdev)
 	ag->stop_desc->ctrl = 0;
 	ag->stop_desc->next = (u32)ag->stop_desc_dma;
 
-	mac_addr = of_get_mac_address(np);
-	if (!IS_ERR(mac_addr))
-		memcpy(ndev->dev_addr, mac_addr, ETH_ALEN);
-	if (IS_ERR(mac_addr) || !is_valid_ether_addr(ndev->dev_addr)) {
+	err = of_get_mac_address(np, ndev->dev_addr);
+	if (err) {
 		netif_err(ag, probe, ndev, "invalid MAC address, using random address\n");
 		eth_random_addr(ndev->dev_addr);
 	}
diff --git a/drivers/net/ethernet/broadcom/bcm4908_enet.c b/drivers/net/ethernet/broadcom/bcm4908_enet.c
index b7afac5c7ca7..60d908507f51 100644
--- a/drivers/net/ethernet/broadcom/bcm4908_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm4908_enet.c
@@ -686,7 +686,6 @@ static int bcm4908_enet_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct net_device *netdev;
 	struct bcm4908_enet *enet;
-	const u8 *mac;
 	int err;
 
 	netdev = devm_alloc_etherdev(dev, sizeof(*enet));
@@ -716,10 +715,8 @@ static int bcm4908_enet_probe(struct platform_device *pdev)
 		return err;
 
 	SET_NETDEV_DEV(netdev, &pdev->dev);
-	mac = of_get_mac_address(dev->of_node);
-	if (!IS_ERR(mac))
-		ether_addr_copy(netdev->dev_addr, mac);
-	else
+	err = of_get_mac_address(dev->of_node, netdev->dev_addr);
+	if (err)
 		eth_hw_addr_random(netdev);
 	netdev->netdev_ops = &bcm4908_enet_netdev_ops;
 	netdev->min_mtu = ETH_ZLEN;
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 777bbf6d2586..d9f0f0df8f7b 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -2457,7 +2457,6 @@ static int bcm_sysport_probe(struct platform_device *pdev)
 	struct bcm_sysport_priv *priv;
 	struct device_node *dn;
 	struct net_device *dev;
-	const void *macaddr;
 	u32 txq, rxq;
 	int ret;
 
@@ -2552,12 +2551,10 @@ static int bcm_sysport_probe(struct platform_device *pdev)
 	}
 
 	/* Initialize netdevice members */
-	macaddr = of_get_mac_address(dn);
-	if (IS_ERR(macaddr)) {
+	ret = of_get_mac_address(dn, dev->dev_addr);
+	if (ret) {
 		dev_warn(&pdev->dev, "using random Ethernet MAC\n");
 		eth_hw_addr_random(dev);
-	} else {
-		ether_addr_copy(dev->dev_addr, macaddr);
 	}
 
 	SET_NETDEV_DEV(dev, &pdev->dev);
diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c
index a5fd161ab5ee..85fa0ab7201c 100644
--- a/drivers/net/ethernet/broadcom/bgmac-bcma.c
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c
@@ -115,7 +115,7 @@ static int bgmac_probe(struct bcma_device *core)
 	struct ssb_sprom *sprom = &core->bus->sprom;
 	struct mii_bus *mii_bus;
 	struct bgmac *bgmac;
-	const u8 *mac = NULL;
+	const u8 *mac;
 	int err;
 
 	bgmac = bgmac_alloc(&core->dev);
@@ -128,11 +128,10 @@ static int bgmac_probe(struct bcma_device *core)
 
 	bcma_set_drvdata(core, bgmac);
 
-	if (bgmac->dev->of_node)
-		mac = of_get_mac_address(bgmac->dev->of_node);
+	err = of_get_mac_address(bgmac->dev->of_node, bgmac->net_dev->dev_addr);
 
 	/* If no MAC address assigned via device tree, check SPROM */
-	if (IS_ERR_OR_NULL(mac)) {
+	if (err) {
 		switch (core->core_unit) {
 		case 0:
 			mac = sprom->et0mac;
@@ -149,10 +148,9 @@ static int bgmac_probe(struct bcma_device *core)
 			err = -ENOTSUPP;
 			goto err;
 		}
+		ether_addr_copy(bgmac->net_dev->dev_addr, mac);
 	}
 
-	ether_addr_copy(bgmac->net_dev->dev_addr, mac);
-
 	/* On BCM4706 we need common core to access PHY */
 	if (core->id.id == BCMA_CORE_4706_MAC_GBIT &&
 	    !core->bus->drv_gmac_cmn.core) {
diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c
index f37f1c58f368..9834b77cf4b6 100644
--- a/drivers/net/ethernet/broadcom/bgmac-platform.c
+++ b/drivers/net/ethernet/broadcom/bgmac-platform.c
@@ -173,7 +173,7 @@ static int bgmac_probe(struct platform_device *pdev)
 	struct device_node *np = pdev->dev.of_node;
 	struct bgmac *bgmac;
 	struct resource *regs;
-	const u8 *mac_addr;
+	int ret;
 
 	bgmac = bgmac_alloc(&pdev->dev);
 	if (!bgmac)
@@ -192,11 +192,10 @@ static int bgmac_probe(struct platform_device *pdev)
 	bgmac->dev = &pdev->dev;
 	bgmac->dma_dev = &pdev->dev;
 
-	mac_addr = of_get_mac_address(np);
-	if (!IS_ERR(mac_addr))
-		ether_addr_copy(bgmac->net_dev->dev_addr, mac_addr);
-	else
-		dev_warn(&pdev->dev, "MAC address not present in device tree\n");
+	ret = of_get_mac_address(np, bgmac->net_dev->dev_addr);
+	if (ret)
+		dev_warn(&pdev->dev,
+			 "MAC address not present in device tree\n");
 
 	bgmac->irq = platform_get_irq(pdev, 0);
 	if (bgmac->irq < 0)
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index ffd56a23f8b0..d6bde1748a22 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -4649,7 +4649,6 @@ static int macb_probe(struct platform_device *pdev)
 	struct net_device *dev;
 	struct resource *regs;
 	void __iomem *mem;
-	const char *mac;
 	struct macb *bp;
 	int err, val;
 
@@ -4764,15 +4763,11 @@ static int macb_probe(struct platform_device *pdev)
 	if (bp->caps & MACB_CAPS_NEEDS_RSTONUBR)
 		bp->rx_intr_mask |= MACB_BIT(RXUBR);
 
-	mac = of_get_mac_address(np);
-	if (PTR_ERR(mac) == -EPROBE_DEFER) {
-		err = -EPROBE_DEFER;
+	err = of_get_mac_address(np, bp->dev->dev_addr);
+	if (err == -EPROBE_DEFER)
 		goto err_out_free_netdev;
-	} else if (!IS_ERR_OR_NULL(mac)) {
-		ether_addr_copy(bp->dev->dev_addr, mac);
-	} else {
+	else if (err)
 		macb_get_hwaddr(bp);
-	}
 
 	err = of_get_phy_mode(np, &interface);
 	if (err)
diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
index ecffebd513be..48ff6fb0eed9 100644
--- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
+++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
@@ -1385,7 +1385,6 @@ static int octeon_mgmt_probe(struct platform_device *pdev)
 	struct net_device *netdev;
 	struct octeon_mgmt *p;
 	const __be32 *data;
-	const u8 *mac;
 	struct resource *res_mix;
 	struct resource *res_agl;
 	struct resource *res_agl_prt_ctl;
@@ -1502,11 +1501,8 @@ static int octeon_mgmt_probe(struct platform_device *pdev)
 	netdev->min_mtu = 64 - OCTEON_MGMT_RX_HEADROOM;
 	netdev->max_mtu = 16383 - OCTEON_MGMT_RX_HEADROOM - VLAN_HLEN;
 
-	mac = of_get_mac_address(pdev->dev.of_node);
-
-	if (!IS_ERR(mac))
-		ether_addr_copy(netdev->dev_addr, mac);
-	else
+	result = of_get_mac_address(pdev->dev.of_node, netdev->dev_addr);
+	if (result)
 		eth_hw_addr_random(netdev);
 
 	p->phy_np = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 8ff28ed04b7f..0c783aadf393 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -1474,7 +1474,6 @@ static int bgx_init_of_phy(struct bgx *bgx)
 	device_for_each_child_node(&bgx->pdev->dev, fwn) {
 		struct phy_device *pd;
 		struct device_node *phy_np;
-		const char *mac;
 
 		/* Should always be an OF node.  But if it is not, we
 		 * cannot handle it, so exit the loop.
@@ -1483,9 +1482,7 @@ static int bgx_init_of_phy(struct bgx *bgx)
 		if (!node)
 			break;
 
-		mac = of_get_mac_address(node);
-		if (!IS_ERR(mac))
-			ether_addr_copy(bgx->lmac[lmac].mac, mac);
+		of_get_mac_address(node, bgx->lmac[lmac].mac);
 
 		SET_NETDEV_DEV(&bgx->lmac[lmac].netdev, &bgx->pdev->dev);
 		bgx->lmac[lmac].lmacid = lmac;
diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index 252adfa5d837..2374c51bf2b2 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -1385,7 +1385,7 @@ static struct dm9000_plat_data *dm9000_parse_dt(struct device *dev)
 {
 	struct dm9000_plat_data *pdata;
 	struct device_node *np = dev->of_node;
-	const void *mac_addr;
+	int ret;
 
 	if (!IS_ENABLED(CONFIG_OF) || !np)
 		return ERR_PTR(-ENXIO);
@@ -1399,11 +1399,9 @@ static struct dm9000_plat_data *dm9000_parse_dt(struct device *dev)
 	if (of_find_property(np, "davicom,no-eeprom", NULL))
 		pdata->flags |= DM9000_PLATF_NO_EEPROM;
 
-	mac_addr = of_get_mac_address(np);
-	if (!IS_ERR(mac_addr))
-		ether_addr_copy(pdata->dev_addr, mac_addr);
-	else if (PTR_ERR(mac_addr) == -EPROBE_DEFER)
-		return ERR_CAST(mac_addr);
+	ret = of_get_mac_address(np, pdata->dev_addr);
+	if (ret == -EPROBE_DEFER)
+		return ERR_PTR(ret);
 
 	return pdata;
 }
diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index 3d9b0b161e24..e1b43b07755b 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c
@@ -1151,11 +1151,7 @@ static int ethoc_probe(struct platform_device *pdev)
 		ether_addr_copy(netdev->dev_addr, pdata->hwaddr);
 		priv->phy_id = pdata->phy_id;
 	} else {
-		const void *mac;
-
-		mac = of_get_mac_address(pdev->dev.of_node);
-		if (!IS_ERR(mac))
-			ether_addr_copy(netdev->dev_addr, mac);
+		of_get_mac_address(pdev->dev.of_node, netdev->dev_addr);
 		priv->phy_id = -1;
 	}
 
diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c
index 815fb62c4b02..e3954d8835e7 100644
--- a/drivers/net/ethernet/ezchip/nps_enet.c
+++ b/drivers/net/ethernet/ezchip/nps_enet.c
@@ -575,7 +575,6 @@ static s32 nps_enet_probe(struct platform_device *pdev)
 	struct net_device *ndev;
 	struct nps_enet_priv *priv;
 	s32 err = 0;
-	const char *mac_addr;
 
 	if (!dev->of_node)
 		return -ENODEV;
@@ -602,10 +601,8 @@ static s32 nps_enet_probe(struct platform_device *pdev)
 	dev_dbg(dev, "Registers base address is 0x%p\n", priv->regs_base);
 
 	/* set kernel MAC address to dev */
-	mac_addr = of_get_mac_address(dev->of_node);
-	if (!IS_ERR(mac_addr))
-		ether_addr_copy(ndev->dev_addr, mac_addr);
-	else
+	err = of_get_mac_address(dev->of_node, ndev->dev_addr);
+	if (err)
 		eth_hw_addr_random(ndev);
 
 	/* Get IRQ number */
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 70aea9c274fe..aecc111fbe73 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1665,6 +1665,7 @@ static void fec_get_mac(struct net_device *ndev)
 {
 	struct fec_enet_private *fep = netdev_priv(ndev);
 	unsigned char *iap, tmpaddr[ETH_ALEN];
+	int ret;
 
 	/*
 	 * try to get mac address in following order:
@@ -1680,9 +1681,9 @@ static void fec_get_mac(struct net_device *ndev)
 	if (!is_valid_ether_addr(iap)) {
 		struct device_node *np = fep->pdev->dev.of_node;
 		if (np) {
-			const char *mac = of_get_mac_address(np);
-			if (!IS_ERR(mac))
-				iap = (unsigned char *) mac;
+			ret = of_get_mac_address(np, tmpaddr);
+			if (!ret)
+				iap = tmpaddr;
 		}
 	}
 
diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c
index b3bad429e03b..02c47658a215 100644
--- a/drivers/net/ethernet/freescale/fec_mpc52xx.c
+++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c
@@ -813,7 +813,6 @@ static int mpc52xx_fec_probe(struct platform_device *op)
 	const u32 *prop;
 	int prop_size;
 	struct device_node *np = op->dev.of_node;
-	const char *mac_addr;
 
 	phys_addr_t rx_fifo;
 	phys_addr_t tx_fifo;
@@ -891,10 +890,8 @@ static int mpc52xx_fec_probe(struct platform_device *op)
 	 *
 	 * First try to read MAC address from DT
 	 */
-	mac_addr = of_get_mac_address(np);
-	if (!IS_ERR(mac_addr)) {
-		ether_addr_copy(ndev->dev_addr, mac_addr);
-	} else {
+	rv = of_get_mac_address(np, ndev->dev_addr);
+	if (rv) {
 		struct mpc52xx_fec __iomem *fec = priv->fec;
 
 		/*
diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c
index 901749a7a318..46ecb42f2ef8 100644
--- a/drivers/net/ethernet/freescale/fman/mac.c
+++ b/drivers/net/ethernet/freescale/fman/mac.c
@@ -605,7 +605,6 @@ static int mac_probe(struct platform_device *_of_dev)
 	struct platform_device	*of_dev;
 	struct resource		 res;
 	struct mac_priv_s	*priv;
-	const u8		*mac_addr;
 	u32			 val;
 	u8			fman_id;
 	phy_interface_t          phy_if;
@@ -723,11 +722,9 @@ static int mac_probe(struct platform_device *_of_dev)
 	priv->cell_index = (u8)val;
 
 	/* Get the MAC address */
-	mac_addr = of_get_mac_address(mac_node);
-	if (IS_ERR(mac_addr))
+	err = of_get_mac_address(mac_node, mac_dev->addr);
+	if (err)
 		dev_warn(dev, "of_get_mac_address(%pOF) failed\n", mac_node);
-	else
-		ether_addr_copy(mac_dev->addr, mac_addr);
 
 	/* Get the port handles */
 	nph = of_count_phandle_with_args(mac_node, "fsl,fman-ports", NULL);
@@ -853,7 +850,7 @@ static int mac_probe(struct platform_device *_of_dev)
 	if (err < 0)
 		dev_err(dev, "fman_set_mac_active_pause() = %d\n", err);
 
-	if (!IS_ERR(mac_addr))
+	if (!is_zero_ether_addr(mac_dev->addr))
 		dev_info(dev, "FMan MAC address: %pM\n", mac_dev->addr);
 
 	priv->eth_dev = dpaa_eth_add_device(fman_id, mac_dev);
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
index 78e008b81374..6ee325ad35c5 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
@@ -918,7 +918,6 @@ static int fs_enet_probe(struct platform_device *ofdev)
 	const u32 *data;
 	struct clk *clk;
 	int err;
-	const u8 *mac_addr;
 	const char *phy_connection_type;
 	int privsize, len, ret = -ENODEV;
 
@@ -1006,9 +1005,7 @@ static int fs_enet_probe(struct platform_device *ofdev)
 	spin_lock_init(&fep->lock);
 	spin_lock_init(&fep->tx_lock);
 
-	mac_addr = of_get_mac_address(ofdev->dev.of_node);
-	if (!IS_ERR(mac_addr))
-		ether_addr_copy(ndev->dev_addr, mac_addr);
+	of_get_mac_address(ofdev->dev.of_node, ndev->dev_addr);
 
 	ret = fep->ops->allocate_bd(ndev);
 	if (ret)
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 3ec4d9fddd52..339f9567ef9d 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -640,7 +640,6 @@ static phy_interface_t gfar_get_interface(struct net_device *dev)
 static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
 {
 	const char *model;
-	const void *mac_addr;
 	int err = 0, i;
 	phy_interface_t interface;
 	struct net_device *dev = NULL;
@@ -782,11 +781,8 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
 	if (stash_len || stash_idx)
 		priv->device_flags |= FSL_GIANFAR_DEV_HAS_BUF_STASHING;
 
-	mac_addr = of_get_mac_address(np);
-
-	if (!IS_ERR(mac_addr)) {
-		ether_addr_copy(dev->dev_addr, mac_addr);
-	} else {
+	err = of_get_mac_address(np, dev->dev_addr);
+	if (err) {
 		eth_hw_addr_random(dev);
 		dev_info(&ofdev->dev, "Using random MAC address: %pM\n", dev->dev_addr);
 	}
diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index ef4e2febeb5b..e0936510fa34 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -3562,7 +3562,6 @@ static int ucc_geth_probe(struct platform_device* ofdev)
 	struct resource res;
 	int err, ucc_num, max_speed = 0;
 	const unsigned int *prop;
-	const void *mac_addr;
 	phy_interface_t phy_interface;
 	static const int enet_to_speed[] = {
 		SPEED_10, SPEED_10, SPEED_10,
@@ -3733,9 +3732,7 @@ static int ucc_geth_probe(struct platform_device* ofdev)
 		goto err_free_netdev;
 	}
 
-	mac_addr = of_get_mac_address(np);
-	if (!IS_ERR(mac_addr))
-		ether_addr_copy(dev->dev_addr, mac_addr);
+	of_get_mac_address(np, dev->dev_addr);
 
 	ugeth->ug_info = ug_info;
 	ugeth->dev = device;
diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c
index 57c3bc4f7089..3c4db4a6b431 100644
--- a/drivers/net/ethernet/hisilicon/hisi_femac.c
+++ b/drivers/net/ethernet/hisilicon/hisi_femac.c
@@ -772,7 +772,6 @@ static int hisi_femac_drv_probe(struct platform_device *pdev)
 	struct net_device *ndev;
 	struct hisi_femac_priv *priv;
 	struct phy_device *phy;
-	const char *mac_addr;
 	int ret;
 
 	ndev = alloc_etherdev(sizeof(*priv));
@@ -842,10 +841,8 @@ static int hisi_femac_drv_probe(struct platform_device *pdev)
 			   (unsigned long)phy->phy_id,
 			   phy_modes(phy->interface));
 
-	mac_addr = of_get_mac_address(node);
-	if (!IS_ERR(mac_addr))
-		ether_addr_copy(ndev->dev_addr, mac_addr);
-	if (!is_valid_ether_addr(ndev->dev_addr)) {
+	ret = of_get_mac_address(node, ndev->dev_addr);
+	if (ret) {
 		eth_hw_addr_random(ndev);
 		dev_warn(dev, "using random MAC address %pM\n",
 			 ndev->dev_addr);
diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
index 8b2bf85039f1..c1aae0fca5e9 100644
--- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
@@ -1098,7 +1098,6 @@ static int hix5hd2_dev_probe(struct platform_device *pdev)
 	struct net_device *ndev;
 	struct hix5hd2_priv *priv;
 	struct mii_bus *bus;
-	const char *mac_addr;
 	int ret;
 
 	ndev = alloc_etherdev(sizeof(struct hix5hd2_priv));
@@ -1220,10 +1219,8 @@ static int hix5hd2_dev_probe(struct platform_device *pdev)
 		goto out_phy_node;
 	}
 
-	mac_addr = of_get_mac_address(node);
-	if (!IS_ERR(mac_addr))
-		ether_addr_copy(ndev->dev_addr, mac_addr);
-	if (!is_valid_ether_addr(ndev->dev_addr)) {
+	ret = of_get_mac_address(node, ndev->dev_addr);
+	if (ret) {
 		eth_hw_addr_random(ndev);
 		netdev_warn(ndev, "using random MAC address %pM\n",
 			    ndev->dev_addr);
diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c
index 0f8ef8f1232c..41c2ad210bc9 100644
--- a/drivers/net/ethernet/lantiq_xrx200.c
+++ b/drivers/net/ethernet/lantiq_xrx200.c
@@ -435,7 +435,6 @@ static int xrx200_probe(struct platform_device *pdev)
 	struct resource *res;
 	struct xrx200_priv *priv;
 	struct net_device *net_dev;
-	const u8 *mac;
 	int err;
 
 	/* alloc the network device */
@@ -477,10 +476,8 @@ static int xrx200_probe(struct platform_device *pdev)
 		return PTR_ERR(priv->clk);
 	}
 
-	mac = of_get_mac_address(np);
-	if (!IS_ERR(mac))
-		ether_addr_copy(net_dev->dev_addr, mac);
-	else
+	err = of_get_mac_address(np, net_dev->dev_addr);
+	if (err)
 		eth_hw_addr_random(net_dev);
 
 	/* bring up the dma engine and IP core */
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index ca1681aa951a..d207bfcaf31d 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -2702,7 +2702,6 @@ static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev,
 	struct platform_device *ppdev;
 	struct mv643xx_eth_platform_data ppd;
 	struct resource res;
-	const char *mac_addr;
 	int ret;
 	int dev_num = 0;
 
@@ -2733,9 +2732,7 @@ static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev,
 		return -EINVAL;
 	}
 
-	mac_addr = of_get_mac_address(pnp);
-	if (!IS_ERR(mac_addr))
-		ether_addr_copy(ppd.mac_addr, mac_addr);
+	of_get_mac_address(pnp, ppd.mac_addr);
 
 	mv643xx_eth_property(pnp, "tx-queue-size", ppd.tx_queue_size);
 	mv643xx_eth_property(pnp, "tx-sram-addr", ppd.tx_sram_addr);
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index f20dfd1d7a6b..7d5cd9bc6c99 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -5141,7 +5141,6 @@ static int mvneta_probe(struct platform_device *pdev)
 	struct net_device *dev;
 	struct phylink *phylink;
 	struct phy *comphy;
-	const char *dt_mac_addr;
 	char hw_mac_addr[ETH_ALEN];
 	phy_interface_t phy_mode;
 	const char *mac_from;
@@ -5237,10 +5236,9 @@ static int mvneta_probe(struct platform_device *pdev)
 		goto err_free_ports;
 	}
 
-	dt_mac_addr = of_get_mac_address(dn);
-	if (!IS_ERR(dt_mac_addr)) {
+	err = of_get_mac_address(dn, dev->dev_addr);
+	if (!err) {
 		mac_from = "device tree";
-		ether_addr_copy(dev->dev_addr, dt_mac_addr);
 	} else {
 		mvneta_get_mac_addr(pp, hw_mac_addr);
 		if (is_valid_ether_addr(hw_mac_addr)) {
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c
index 25dd903a3e92..f08c420a5803 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_main.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
@@ -456,20 +456,17 @@ static int prestera_switch_set_base_mac_addr(struct prestera_switch *sw)
 {
 	struct device_node *base_mac_np;
 	struct device_node *np;
-	const char *base_mac;
+	int ret;
 
 	np = of_find_compatible_node(NULL, NULL, "marvell,prestera");
 	base_mac_np = of_parse_phandle(np, "base-mac-provider", 0);
 
-	base_mac = of_get_mac_address(base_mac_np);
-	of_node_put(base_mac_np);
-	if (!IS_ERR(base_mac))
-		ether_addr_copy(sw->base_mac, base_mac);
-
-	if (!is_valid_ether_addr(sw->base_mac)) {
+	ret = of_get_mac_address(base_mac_np, sw->base_mac);
+	if (ret) {
 		eth_random_addr(sw->base_mac);
 		dev_info(prestera_dev(sw), "using random base mac address\n");
 	}
+	of_node_put(base_mac_np);
 
 	return prestera_hw_switch_mac_set(sw, sw->base_mac);
 }
diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index 3712e1786091..e967867828d8 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c
@@ -1392,7 +1392,6 @@ static int pxa168_eth_probe(struct platform_device *pdev)
 	struct resource *res;
 	struct clk *clk;
 	struct device_node *np;
-	const unsigned char *mac_addr = NULL;
 	int err;
 
 	printk(KERN_NOTICE "PXA168 10/100 Ethernet Driver\n");
@@ -1435,12 +1434,8 @@ static int pxa168_eth_probe(struct platform_device *pdev)
 
 	INIT_WORK(&pep->tx_timeout_task, pxa168_eth_tx_timeout_task);
 
-	if (pdev->dev.of_node)
-		mac_addr = of_get_mac_address(pdev->dev.of_node);
-
-	if (!IS_ERR_OR_NULL(mac_addr)) {
-		ether_addr_copy(dev->dev_addr, mac_addr);
-	} else {
+	err = of_get_mac_address(pdev->dev.of_node, dev->dev_addr);
+	if (err) {
 		/* try reading the mac address, if set by the bootloader */
 		pxa168_eth_get_mac_address(dev, dev->dev_addr);
 		if (!is_valid_ether_addr(dev->dev_addr)) {
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index 68c154d715d6..222c32367b2c 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -4728,7 +4728,7 @@ static struct net_device *sky2_init_netdev(struct sky2_hw *hw, unsigned port,
 {
 	struct sky2_port *sky2;
 	struct net_device *dev = alloc_etherdev(sizeof(*sky2));
-	const void *iap;
+	int ret;
 
 	if (!dev)
 		return NULL;
@@ -4798,10 +4798,8 @@ static struct net_device *sky2_init_netdev(struct sky2_hw *hw, unsigned port,
 	 * 1) from device tree data
 	 * 2) from internal registers set by bootloader
 	 */
-	iap = of_get_mac_address(hw->pdev->dev.of_node);
-	if (!IS_ERR(iap))
-		ether_addr_copy(dev->dev_addr, iap);
-	else
+	ret = of_get_mac_address(hw->pdev->dev.of_node, dev->dev_addr);
+	if (ret)
 		memcpy_fromio(dev->dev_addr, hw->regs + B2_MAC_1 + port * 8,
 			      ETH_ALEN);
 
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 810def064f11..6b00c12c6c43 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -2484,14 +2484,11 @@ static int __init mtk_init(struct net_device *dev)
 {
 	struct mtk_mac *mac = netdev_priv(dev);
 	struct mtk_eth *eth = mac->hw;
-	const char *mac_addr;
-
-	mac_addr = of_get_mac_address(mac->of_node);
-	if (!IS_ERR(mac_addr))
-		ether_addr_copy(dev->dev_addr, mac_addr);
+	int ret;
 
-	/* If the mac address is invalid, use random mac address  */
-	if (!is_valid_ether_addr(dev->dev_addr)) {
+	ret = of_get_mac_address(mac->of_node, dev->dev_addr);
+	if (ret) {
+		/* If the mac address is invalid, use random mac address */
 		eth_hw_addr_random(dev);
 		dev_err(eth->dev, "generated random MAC address %pM\n",
 			dev->dev_addr);
diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c
index 2feed6ce19d3..13eef6e9bd2d 100644
--- a/drivers/net/ethernet/micrel/ks8851_common.c
+++ b/drivers/net/ethernet/micrel/ks8851_common.c
@@ -193,11 +193,10 @@ static void ks8851_read_mac_addr(struct net_device *dev)
 static void ks8851_init_mac(struct ks8851_net *ks, struct device_node *np)
 {
 	struct net_device *dev = ks->netdev;
-	const u8 *mac_addr;
+	int ret;
 
-	mac_addr = of_get_mac_address(np);
-	if (!IS_ERR(mac_addr)) {
-		ether_addr_copy(dev->dev_addr, mac_addr);
+	ret = of_get_mac_address(np, dev->dev_addr);
+	if (!ret) {
 		ks8851_write_mac_addr(dev);
 		return;
 	}
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index 11a1dc4c436d..dae10328c6cf 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -2771,7 +2771,6 @@ static int lan743x_pcidev_probe(struct pci_dev *pdev,
 {
 	struct lan743x_adapter *adapter = NULL;
 	struct net_device *netdev = NULL;
-	const void *mac_addr;
 	int ret = -ENODEV;
 
 	netdev = devm_alloc_etherdev(&pdev->dev,
@@ -2788,9 +2787,7 @@ static int lan743x_pcidev_probe(struct pci_dev *pdev,
 			      NETIF_MSG_IFDOWN | NETIF_MSG_TX_QUEUED;
 	netdev->max_mtu = LAN743X_MAX_FRAME_SIZE;
 
-	mac_addr = of_get_mac_address(pdev->dev.of_node);
-	if (!IS_ERR(mac_addr))
-		ether_addr_copy(adapter->mac_address, mac_addr);
+	of_get_mac_address(pdev->dev.of_node, adapter->mac_address);
 
 	ret = lan743x_pci_init(adapter, pdev);
 	if (ret)
diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index e72fd33a214c..64c6842bd452 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -1350,9 +1350,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
 	__lpc_get_mac(pldat, ndev->dev_addr);
 
 	if (!is_valid_ether_addr(ndev->dev_addr)) {
-		const char *macaddr = of_get_mac_address(np);
-		if (!IS_ERR(macaddr))
-			ether_addr_copy(ndev->dev_addr, macaddr);
+		of_get_mac_address(np, ndev->dev_addr);
 	}
 	if (!is_valid_ether_addr(ndev->dev_addr))
 		eth_hw_addr_random(ndev);
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index 5a3b65a6eb4f..ab9b02574a15 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -885,7 +885,7 @@ qca_spi_probe(struct spi_device *spi)
 	struct net_device *qcaspi_devs = NULL;
 	u8 legacy_mode = 0;
 	u16 signature;
-	const char *mac;
+	int ret;
 
 	if (!spi->dev.of_node) {
 		dev_err(&spi->dev, "Missing device tree\n");
@@ -962,12 +962,8 @@ qca_spi_probe(struct spi_device *spi)
 
 	spi_set_drvdata(spi, qcaspi_devs);
 
-	mac = of_get_mac_address(spi->dev.of_node);
-
-	if (!IS_ERR(mac))
-		ether_addr_copy(qca->net_dev->dev_addr, mac);
-
-	if (!is_valid_ether_addr(qca->net_dev->dev_addr)) {
+	ret = of_get_mac_address(spi->dev.of_node, qca->net_dev->dev_addr);
+	if (ret) {
 		eth_hw_addr_random(qca->net_dev);
 		dev_info(&spi->dev, "Using random MAC address: %pM\n",
 			 qca->net_dev->dev_addr);
diff --git a/drivers/net/ethernet/qualcomm/qca_uart.c b/drivers/net/ethernet/qualcomm/qca_uart.c
index 362b4f5c162c..bcdeca7b3366 100644
--- a/drivers/net/ethernet/qualcomm/qca_uart.c
+++ b/drivers/net/ethernet/qualcomm/qca_uart.c
@@ -323,7 +323,6 @@ static int qca_uart_probe(struct serdev_device *serdev)
 {
 	struct net_device *qcauart_dev = alloc_etherdev(sizeof(struct qcauart));
 	struct qcauart *qca;
-	const char *mac;
 	u32 speed = 115200;
 	int ret;
 
@@ -348,12 +347,8 @@ static int qca_uart_probe(struct serdev_device *serdev)
 
 	of_property_read_u32(serdev->dev.of_node, "current-speed", &speed);
 
-	mac = of_get_mac_address(serdev->dev.of_node);
-
-	if (!IS_ERR(mac))
-		ether_addr_copy(qca->net_dev->dev_addr, mac);
-
-	if (!is_valid_ether_addr(qca->net_dev->dev_addr)) {
+	ret = of_get_mac_address(serdev->dev.of_node, qca->net_dev->dev_addr);
+	if (ret) {
 		eth_hw_addr_random(qca->net_dev);
 		dev_info(&serdev->dev, "Using random MAC address: %pM\n",
 			 qca->net_dev->dev_addr);
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 1409ae986aa2..8c84c40ab9a0 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -109,11 +109,13 @@ static void ravb_set_buffer_align(struct sk_buff *skb)
  * Ethernet AVB device doesn't have ROM for MAC address.
  * This function gets the MAC address that was used by a bootloader.
  */
-static void ravb_read_mac_address(struct net_device *ndev, const u8 *mac)
+static void ravb_read_mac_address(struct device_node *np,
+				  struct net_device *ndev)
 {
-	if (!IS_ERR(mac)) {
-		ether_addr_copy(ndev->dev_addr, mac);
-	} else {
+	int ret;
+
+	ret = of_get_mac_address(np, ndev->dev_addr);
+	if (ret) {
 		u32 mahr = ravb_read(ndev, MAHR);
 		u32 malr = ravb_read(ndev, MALR);
 
@@ -2207,7 +2209,7 @@ static int ravb_probe(struct platform_device *pdev)
 	priv->msg_enable = RAVB_DEF_MSG_ENABLE;
 
 	/* Read and set MAC address */
-	ravb_read_mac_address(ndev, of_get_mac_address(np));
+	ravb_read_mac_address(np, ndev);
 	if (!is_valid_ether_addr(ndev->dev_addr)) {
 		dev_warn(&pdev->dev,
 			 "no valid MAC address supplied, using a random one\n");
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index ebedb1a11132..c5b154868c1f 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -3170,7 +3170,6 @@ static struct sh_eth_plat_data *sh_eth_parse_dt(struct device *dev)
 	struct device_node *np = dev->of_node;
 	struct sh_eth_plat_data *pdata;
 	phy_interface_t interface;
-	const char *mac_addr;
 	int ret;
 
 	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
@@ -3182,9 +3181,7 @@ static struct sh_eth_plat_data *sh_eth_parse_dt(struct device *dev)
 		return NULL;
 	pdata->phy_interface = interface;
 
-	mac_addr = of_get_mac_address(np);
-	if (!IS_ERR(mac_addr))
-		ether_addr_copy(pdata->mac_addr, mac_addr);
+	of_get_mac_address(np, pdata->mac_addr);
 
 	pdata->no_ether_link =
 		of_property_read_bool(np, "renesas,no-ether-link");
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
index 33f79402850d..4639ed9438a3 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c
@@ -25,8 +25,7 @@
 
 #ifdef CONFIG_OF
 static int sxgbe_probe_config_dt(struct platform_device *pdev,
-				 struct sxgbe_plat_data *plat,
-				 const char **mac)
+				 struct sxgbe_plat_data *plat)
 {
 	struct device_node *np = pdev->dev.of_node;
 	struct sxgbe_dma_cfg *dma_cfg;
@@ -35,7 +34,6 @@ static int sxgbe_probe_config_dt(struct platform_device *pdev,
 	if (!np)
 		return -ENODEV;
 
-	*mac = of_get_mac_address(np);
 	err = of_get_phy_mode(np, &plat->interface);
 	if (err && err != -ENODEV)
 		return err;
@@ -63,8 +61,7 @@ static int sxgbe_probe_config_dt(struct platform_device *pdev,
 }
 #else
 static int sxgbe_probe_config_dt(struct platform_device *pdev,
-				 struct sxgbe_plat_data *plat,
-				 const char **mac)
+				 struct sxgbe_plat_data *plat)
 {
 	return -ENOSYS;
 }
@@ -85,7 +82,6 @@ static int sxgbe_platform_probe(struct platform_device *pdev)
 	void __iomem *addr;
 	struct sxgbe_priv_data *priv = NULL;
 	struct sxgbe_plat_data *plat_dat = NULL;
-	const char *mac = NULL;
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct device_node *node = dev->of_node;
 
@@ -101,7 +97,7 @@ static int sxgbe_platform_probe(struct platform_device *pdev)
 		if (!plat_dat)
 			return  -ENOMEM;
 
-		ret = sxgbe_probe_config_dt(pdev, plat_dat, &mac);
+		ret = sxgbe_probe_config_dt(pdev, plat_dat);
 		if (ret) {
 			pr_err("%s: main dt probe failed\n", __func__);
 			return ret;
@@ -122,8 +118,7 @@ static int sxgbe_platform_probe(struct platform_device *pdev)
 	}
 
 	/* Get MAC address if available (DT) */
-	if (!IS_ERR_OR_NULL(mac))
-		ether_addr_copy(priv->dev->dev_addr, mac);
+	of_get_mac_address(node, priv->dev->dev_addr);
 
 	/* Get the TX/RX IRQ numbers */
 	for (i = 0, chan = 1; i < SXGBE_TX_QUEUES; i++) {
diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c
index 501b9c7aba56..fcbb4bb31408 100644
--- a/drivers/net/ethernet/socionext/sni_ave.c
+++ b/drivers/net/ethernet/socionext/sni_ave.c
@@ -1559,7 +1559,6 @@ static int ave_probe(struct platform_device *pdev)
 	struct ave_private *priv;
 	struct net_device *ndev;
 	struct device_node *np;
-	const void *mac_addr;
 	void __iomem *base;
 	const char *name;
 	int i, irq, ret;
@@ -1600,12 +1599,9 @@ static int ave_probe(struct platform_device *pdev)
 
 	ndev->max_mtu = AVE_MAX_ETHFRAME - (ETH_HLEN + ETH_FCS_LEN);
 
-	mac_addr = of_get_mac_address(np);
-	if (!IS_ERR(mac_addr))
-		ether_addr_copy(ndev->dev_addr, mac_addr);
-
-	/* if the mac address is invalid, use random mac address */
-	if (!is_valid_ether_addr(ndev->dev_addr)) {
+	ret = of_get_mac_address(np, ndev->dev_addr);
+	if (ret) {
+		/* if the mac address is invalid, use random mac address */
 		eth_hw_addr_random(ndev);
 		dev_warn(dev, "Using random MAC address: %pM\n",
 			 ndev->dev_addr);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c
index 08c76636c164..dfbaea06d108 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c
@@ -115,7 +115,7 @@ static int anarion_dwmac_probe(struct platform_device *pdev)
 	if (IS_ERR(gmac))
 		return PTR_ERR(gmac);
 
-	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
index 27254b27d7ed..bc91fd867dcd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
@@ -438,7 +438,7 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev)
 	if (IS_ERR(stmmac_res.addr))
 		return PTR_ERR(stmmac_res.addr);
 
-	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c
index fad503820e04..fbfda55b4c52 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c
@@ -27,7 +27,7 @@ static int dwmac_generic_probe(struct platform_device *pdev)
 		return ret;
 
 	if (pdev->dev.of_node) {
-		plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+		plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
 		if (IS_ERR(plat_dat)) {
 			dev_err(&pdev->dev, "dt configuration failed\n");
 			return PTR_ERR(plat_dat);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
index c1a361305a5a..84651207a1de 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
@@ -231,7 +231,7 @@ static int imx_dwmac_probe(struct platform_device *pdev)
 	if (!dwmac)
 		return -ENOMEM;
 
-	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
index 6c19fcc76c6f..06d287f104be 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
@@ -85,7 +85,7 @@ static int intel_eth_plat_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat)) {
 		dev_err(&pdev->dev, "dt configuration failed\n");
 		return PTR_ERR(plat_dat);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
index 749585fe6fc9..28dd0ed85a82 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
@@ -255,7 +255,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
 	if (val)
 		return val;
 
-	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c
index 3d3f43d91b98..9d77c647badd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c
@@ -37,7 +37,7 @@ static int lpc18xx_dwmac_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
index 9e4b83832938..58c0feaa8131 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
@@ -407,7 +407,7 @@ static int mediatek_dwmac_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c
index bbc16b5a410a..16fb66a0ca72 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c
@@ -52,7 +52,7 @@ static int meson6_dwmac_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index 848e5c37746b..c7a6588d9398 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -398,7 +398,7 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c
index 8551ea878ba5..adfeb8d3293d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c
@@ -118,7 +118,7 @@ static int oxnas_dwmac_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
index a674b7d6b49a..84382fc5cc4d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
@@ -461,7 +461,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat)) {
 		dev_err(&pdev->dev, "dt configuration failed\n");
 		return PTR_ERR(plat_dat);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index 6ef30252bfe0..8d28a536e1bb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -1396,7 +1396,7 @@ static int rk_gmac_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
index 70d41783329d..85208128f135 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
@@ -398,7 +398,7 @@ static int socfpga_dwmac_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
index e1b63df6f96f..710d7435733e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
@@ -325,7 +325,7 @@ static int sti_dwmac_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
index 5d4df4c5254e..2b38a499a404 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
@@ -371,7 +371,7 @@ static int stm32_dwmac_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index 19e7ec30af4c..4422baeed3d8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -1221,7 +1221,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
 	if (ret)
 		return -EINVAL;
 
-	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
index 0e1ca2cba3c7..527077c98ebc 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
@@ -108,7 +108,7 @@ static int sun7i_gmac_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
index d23be45a64e5..d046e33b8a29 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
@@ -208,7 +208,7 @@ static int visconti_eth_dwmac_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac);
 	if (IS_ERR(plat_dat))
 		return PTR_ERR(plat_dat);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index c49debb62b05..8b4ff9c189a1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -26,7 +26,7 @@
 
 struct stmmac_resources {
 	void __iomem *addr;
-	const char *mac;
+	u8 mac[ETH_ALEN];
 	int wol_irq;
 	int lpi_irq;
 	int irq;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 77285646c5fc..328aeb2cd276 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -6016,7 +6016,7 @@ int stmmac_dvr_probe(struct device *device,
 	for (i = 0; i < MTL_MAX_TX_QUEUES; i++)
 		priv->tx_irq[i] = res->tx_irq[i];
 
-	if (!IS_ERR_OR_NULL(res->mac))
+	if (!is_zero_ether_addr(res->mac))
 		memcpy(priv->dev->dev_addr, res->mac, ETH_ALEN);
 
 	dev_set_drvdata(device, priv->dev);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 5a1e018884e6..1e17a23d9118 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -394,7 +394,7 @@ static int stmmac_of_get_mac_mode(struct device_node *np)
  * set some private fields that will be used by the main at runtime.
  */
 struct plat_stmmacenet_data *
-stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
+stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
 {
 	struct device_node *np = pdev->dev.of_node;
 	struct plat_stmmacenet_data *plat;
@@ -406,12 +406,12 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 	if (!plat)
 		return ERR_PTR(-ENOMEM);
 
-	*mac = of_get_mac_address(np);
-	if (IS_ERR(*mac)) {
-		if (PTR_ERR(*mac) == -EPROBE_DEFER)
-			return ERR_CAST(*mac);
+	rc = of_get_mac_address(np, mac);
+	if (rc) {
+		if (rc == -EPROBE_DEFER)
+			return ERR_PTR(rc);
 
-		*mac = NULL;
+		eth_zero_addr(mac);
 	}
 
 	plat->phy_interface = device_get_phy_mode(&pdev->dev);
@@ -627,7 +627,7 @@ void stmmac_remove_config_dt(struct platform_device *pdev,
 }
 #else
 struct plat_stmmacenet_data *
-stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
+stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
 {
 	return ERR_PTR(-EINVAL);
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h
index 3a4663b7b460..3fff3f59d73d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h
@@ -12,7 +12,7 @@
 #include "stmmac.h"
 
 struct plat_stmmacenet_data *
-stmmac_probe_config_dt(struct platform_device *pdev, const char **mac);
+stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac);
 void stmmac_remove_config_dt(struct platform_device *pdev,
 			     struct plat_stmmacenet_data *plat);
 
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 638d7b03be4b..6a67b026df0b 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -1824,7 +1824,6 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common)
 
 	for_each_child_of_node(node, port_np) {
 		struct am65_cpsw_port *port;
-		const void *mac_addr;
 		u32 port_id;
 
 		/* it is not a slave port node, continue */
@@ -1903,15 +1902,15 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common)
 			return ret;
 		}
 
-		mac_addr = of_get_mac_address(port_np);
-		if (!IS_ERR(mac_addr)) {
-			ether_addr_copy(port->slave.mac_addr, mac_addr);
-		} else if (am65_cpsw_am654_get_efuse_macid(port_np,
-							   port->port_id,
-							   port->slave.mac_addr) ||
-			   !is_valid_ether_addr(port->slave.mac_addr)) {
-			random_ether_addr(port->slave.mac_addr);
-			dev_err(dev, "Use random MAC address\n");
+		ret = of_get_mac_address(port_np, port->slave.mac_addr);
+		if (ret) {
+			am65_cpsw_am654_get_efuse_macid(port_np,
+							port->port_id,
+							port->slave.mac_addr);
+			if (!is_valid_ether_addr(port->slave.mac_addr)) {
+				random_ether_addr(port->slave.mac_addr);
+				dev_err(dev, "Use random MAC address\n");
+			}
 		}
 	}
 	of_node_put(node);
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 074702af3dc6..c0cd7de88316 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1296,7 +1296,6 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 
 	for_each_available_child_of_node(node, slave_node) {
 		struct cpsw_slave_data *slave_data = data->slave_data + i;
-		const void *mac_addr = NULL;
 		int lenp;
 		const __be32 *parp;
 
@@ -1368,10 +1367,8 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 		}
 
 no_phy_slave:
-		mac_addr = of_get_mac_address(slave_node);
-		if (!IS_ERR(mac_addr)) {
-			ether_addr_copy(slave_data->mac_addr, mac_addr);
-		} else {
+		ret = of_get_mac_address(slave_node, slave_data->mac_addr);
+		if (ret) {
 			ret = ti_cm_get_macid(&pdev->dev, i,
 					      slave_data->mac_addr);
 			if (ret)
diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c
index 0751f77de2c7..69b7a4e0220a 100644
--- a/drivers/net/ethernet/ti/cpsw_new.c
+++ b/drivers/net/ethernet/ti/cpsw_new.c
@@ -1257,7 +1257,6 @@ static int cpsw_probe_dt(struct cpsw_common *cpsw)
 
 	for_each_child_of_node(tmp_node, port_np) {
 		struct cpsw_slave_data *slave_data;
-		const void *mac_addr;
 		u32 port_id;
 
 		ret = of_property_read_u32(port_np, "reg", &port_id);
@@ -1316,10 +1315,8 @@ static int cpsw_probe_dt(struct cpsw_common *cpsw)
 			goto err_node_put;
 		}
 
-		mac_addr = of_get_mac_address(port_np);
-		if (!IS_ERR(mac_addr)) {
-			ether_addr_copy(slave_data->mac_addr, mac_addr);
-		} else {
+		ret = of_get_mac_address(port_np, slave_data->mac_addr);
+		if (ret) {
 			ret = ti_cm_get_macid(dev, port_id - 1,
 					      slave_data->mac_addr);
 			if (ret)
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index c7031e1960d4..14e7da7d302f 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -1687,7 +1687,6 @@ davinci_emac_of_get_pdata(struct platform_device *pdev, struct emac_priv *priv)
 	const struct of_device_id *match;
 	const struct emac_platform_data *auxdata;
 	struct emac_platform_data *pdata = NULL;
-	const u8 *mac_addr;
 
 	if (!IS_ENABLED(CONFIG_OF) || !pdev->dev.of_node)
 		return dev_get_platdata(&pdev->dev);
@@ -1699,11 +1698,8 @@ davinci_emac_of_get_pdata(struct platform_device *pdev, struct emac_priv *priv)
 	np = pdev->dev.of_node;
 	pdata->version = EMAC_VERSION_2;
 
-	if (!is_valid_ether_addr(pdata->mac_addr)) {
-		mac_addr = of_get_mac_address(np);
-		if (!IS_ERR(mac_addr))
-			ether_addr_copy(pdata->mac_addr, mac_addr);
-	}
+	if (!is_valid_ether_addr(pdata->mac_addr))
+		of_get_mac_address(np, pdata->mac_addr);
 
 	of_property_read_u32(np, "ti,davinci-ctrl-reg-offset",
 			     &pdata->ctrl_reg_offset);
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index d7a144b4a09f..9030e619e543 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -1966,7 +1966,6 @@ static int netcp_create_interface(struct netcp_device *netcp_device,
 	struct resource res;
 	void __iomem *efuse = NULL;
 	u32 efuse_mac = 0;
-	const void *mac_addr;
 	u8 efuse_mac_addr[6];
 	u32 temp[2];
 	int ret = 0;
@@ -2036,10 +2035,8 @@ static int netcp_create_interface(struct netcp_device *netcp_device,
 		devm_iounmap(dev, efuse);
 		devm_release_mem_region(dev, res.start, size);
 	} else {
-		mac_addr = of_get_mac_address(node_interface);
-		if (!IS_ERR(mac_addr))
-			ether_addr_copy(ndev->dev_addr, mac_addr);
-		else
+		ret = of_get_mac_address(node_interface, ndev->dev_addr);
+		if (ret)
 			eth_random_addr(ndev->dev_addr);
 	}
 
diff --git a/drivers/net/ethernet/wiznet/w5100-spi.c b/drivers/net/ethernet/wiznet/w5100-spi.c
index 2b4126d2427d..2b84848dc26a 100644
--- a/drivers/net/ethernet/wiznet/w5100-spi.c
+++ b/drivers/net/ethernet/wiznet/w5100-spi.c
@@ -423,8 +423,14 @@ static int w5100_spi_probe(struct spi_device *spi)
 	const struct of_device_id *of_id;
 	const struct w5100_ops *ops;
 	kernel_ulong_t driver_data;
+	const void *mac = NULL;
+	u8 tmpmac[ETH_ALEN];
 	int priv_size;
-	const void *mac = of_get_mac_address(spi->dev.of_node);
+	int ret;
+
+	ret = of_get_mac_address(spi->dev.of_node, tmpmac);
+	if (!ret)
+		mac = tmpmac;
 
 	if (spi->dev.of_node) {
 		of_id = of_match_device(w5100_of_match, &spi->dev);
diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c
index c0d181a7f83a..ec5db481c9cd 100644
--- a/drivers/net/ethernet/wiznet/w5100.c
+++ b/drivers/net/ethernet/wiznet/w5100.c
@@ -1157,7 +1157,7 @@ int w5100_probe(struct device *dev, const struct w5100_ops *ops,
 	INIT_WORK(&priv->setrx_work, w5100_setrx_work);
 	INIT_WORK(&priv->restart_work, w5100_restart_work);
 
-	if (!IS_ERR_OR_NULL(mac_addr))
+	if (mac_addr)
 		memcpy(ndev->dev_addr, mac_addr, ETH_ALEN);
 	else
 		eth_hw_addr_random(ndev);
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 030185301014..a1f5f07f4ca9 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -438,7 +438,7 @@ static void temac_do_set_mac_address(struct net_device *ndev)
 
 static int temac_init_mac_address(struct net_device *ndev, const void *address)
 {
-	ether_addr_copy(ndev->dev_addr, address);
+	memcpy(ndev->dev_addr, address, ETH_ALEN);
 	if (!is_valid_ether_addr(ndev->dev_addr))
 		eth_hw_addr_random(ndev);
 	temac_do_set_mac_address(ndev);
@@ -1351,7 +1351,7 @@ static int temac_probe(struct platform_device *pdev)
 	struct device_node *temac_np = dev_of_node(&pdev->dev), *dma_np;
 	struct temac_local *lp;
 	struct net_device *ndev;
-	const void *addr;
+	u8 addr[ETH_ALEN];
 	__be32 *p;
 	bool little_endian;
 	int rc = 0;
@@ -1542,8 +1542,8 @@ static int temac_probe(struct platform_device *pdev)
 
 	if (temac_np) {
 		/* Retrieve the MAC address */
-		addr = of_get_mac_address(temac_np);
-		if (IS_ERR(addr)) {
+		rc = of_get_mac_address(temac_np, addr);
+		if (rc) {
 			dev_err(&pdev->dev, "could not find MAC address\n");
 			return -ENODEV;
 		}
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index feb1aa4ec927..b508c9453f40 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -1835,8 +1835,8 @@ static int axienet_probe(struct platform_device *pdev)
 	struct device_node *np;
 	struct axienet_local *lp;
 	struct net_device *ndev;
-	const void *mac_addr;
 	struct resource *ethres;
+	u8 mac_addr[ETH_ALEN];
 	int addr_width = 32;
 	u32 value;
 
@@ -2062,13 +2062,14 @@ static int axienet_probe(struct platform_device *pdev)
 		dev_info(&pdev->dev, "Ethernet core IRQ not defined\n");
 
 	/* Retrieve the MAC address */
-	mac_addr = of_get_mac_address(pdev->dev.of_node);
-	if (IS_ERR(mac_addr)) {
-		dev_warn(&pdev->dev, "could not find MAC address property: %ld\n",
-			 PTR_ERR(mac_addr));
-		mac_addr = NULL;
+	ret = of_get_mac_address(pdev->dev.of_node, mac_addr);
+	if (!ret) {
+		axienet_set_mac_address(ndev, mac_addr);
+	} else {
+		dev_warn(&pdev->dev, "could not find MAC address property: %d\n",
+			 ret);
+		axienet_set_mac_address(ndev, NULL);
 	}
-	axienet_set_mac_address(ndev, mac_addr);
 
 	lp->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD;
 	lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD;
diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
index 007840d4a807..d9d58a7dabee 100644
--- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c
+++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
@@ -1115,7 +1115,6 @@ static int xemaclite_of_probe(struct platform_device *ofdev)
 	struct net_device *ndev = NULL;
 	struct net_local *lp = NULL;
 	struct device *dev = &ofdev->dev;
-	const void *mac_address;
 
 	int rc = 0;
 
@@ -1157,12 +1156,9 @@ static int xemaclite_of_probe(struct platform_device *ofdev)
 	lp->next_rx_buf_to_use = 0x0;
 	lp->tx_ping_pong = get_bool(ofdev, "xlnx,tx-ping-pong");
 	lp->rx_ping_pong = get_bool(ofdev, "xlnx,rx-ping-pong");
-	mac_address = of_get_mac_address(ofdev->dev.of_node);
 
-	if (!IS_ERR(mac_address)) {
-		/* Set the MAC address. */
-		ether_addr_copy(ndev->dev_addr, mac_address);
-	} else {
+	rc = of_get_mac_address(ofdev->dev.of_node, ndev->dev_addr);
+	if (rc) {
 		dev_warn(dev, "No MAC address found, using random\n");
 		eth_hw_addr_random(ndev);
 	}
diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
index 01f9c26f9bf3..e9a36dd7144f 100644
--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c
@@ -617,7 +617,6 @@ static int ath9k_of_init(struct ath_softc *sc)
 	struct ath_hw *ah = sc->sc_ah;
 	struct ath_common *common = ath9k_hw_common(ah);
 	enum ath_bus_type bus_type = common->bus_ops->ath_bus_type;
-	const char *mac;
 	char eeprom_name[100];
 	int ret;
 
@@ -640,9 +639,7 @@ static int ath9k_of_init(struct ath_softc *sc)
 		ah->ah_flags |= AH_NO_EEP_SWAP;
 	}
 
-	mac = of_get_mac_address(np);
-	if (!IS_ERR(mac))
-		ether_addr_copy(common->macaddr, mac);
+	of_get_mac_address(np, common->macaddr);
 
 	return 0;
 }
diff --git a/drivers/net/wireless/mediatek/mt76/eeprom.c b/drivers/net/wireless/mediatek/mt76/eeprom.c
index 665b54c5c8ae..6d895738222a 100644
--- a/drivers/net/wireless/mediatek/mt76/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/eeprom.c
@@ -91,16 +91,9 @@ void
 mt76_eeprom_override(struct mt76_phy *phy)
 {
 	struct mt76_dev *dev = phy->dev;
-
-#ifdef CONFIG_OF
 	struct device_node *np = dev->dev->of_node;
-	const u8 *mac = NULL;
 
-	if (np)
-		mac = of_get_mac_address(np);
-	if (!IS_ERR_OR_NULL(mac))
-		ether_addr_copy(phy->macaddr, mac);
-#endif
+	of_get_mac_address(np, phy->macaddr);
 
 	if (!is_valid_ether_addr(phy->macaddr)) {
 		eth_random_addr(phy->macaddr);
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c b/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c
index 61a4f1ad31e2..e95c101c2711 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c
@@ -989,11 +989,7 @@ static void rt2x00lib_rate(struct ieee80211_rate *entry,
 
 void rt2x00lib_set_mac_address(struct rt2x00_dev *rt2x00dev, u8 *eeprom_mac_addr)
 {
-	const char *mac_addr;
-
-	mac_addr = of_get_mac_address(rt2x00dev->dev->of_node);
-	if (!IS_ERR(mac_addr))
-		ether_addr_copy(eeprom_mac_addr, mac_addr);
+	of_get_mac_address(rt2x00dev->dev->of_node, eeprom_mac_addr);
 
 	if (!is_valid_ether_addr(eeprom_mac_addr)) {
 		eth_random_addr(eeprom_mac_addr);
diff --git a/drivers/of/of_net.c b/drivers/of/of_net.c
index bc0a27de69d4..cb77b774bf76 100644
--- a/drivers/of/of_net.c
+++ b/drivers/of/of_net.c
@@ -45,42 +45,35 @@ int of_get_phy_mode(struct device_node *np, phy_interface_t *interface)
 }
 EXPORT_SYMBOL_GPL(of_get_phy_mode);
 
-static const void *of_get_mac_addr(struct device_node *np, const char *name)
+static int of_get_mac_addr(struct device_node *np, const char *name, u8 *addr)
 {
 	struct property *pp = of_find_property(np, name, NULL);
 
-	if (pp && pp->length == ETH_ALEN && is_valid_ether_addr(pp->value))
-		return pp->value;
-	return NULL;
+	if (pp && pp->length == ETH_ALEN && is_valid_ether_addr(pp->value)) {
+		memcpy(addr, pp->value, ETH_ALEN);
+		return 0;
+	}
+	return -ENODEV;
 }
 
-static const void *of_get_mac_addr_nvmem(struct device_node *np)
+static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr)
 {
-	int ret;
-	const void *mac;
-	u8 nvmem_mac[ETH_ALEN];
 	struct platform_device *pdev = of_find_device_by_node(np);
+	int ret;
 
 	if (!pdev)
-		return ERR_PTR(-ENODEV);
+		return -ENODEV;
 
-	ret = nvmem_get_mac_address(&pdev->dev, &nvmem_mac);
-	if (ret) {
-		put_device(&pdev->dev);
-		return ERR_PTR(ret);
-	}
-
-	mac = devm_kmemdup(&pdev->dev, nvmem_mac, ETH_ALEN, GFP_KERNEL);
+	ret = nvmem_get_mac_address(&pdev->dev, addr);
 	put_device(&pdev->dev);
-	if (!mac)
-		return ERR_PTR(-ENOMEM);
 
-	return mac;
+	return ret;
 }
 
 /**
  * of_get_mac_address()
  * @np:		Caller's Device Node
+ * @addr:	Pointer to a six-byte array for the result
  *
  * Search the device tree for the best MAC address to use.  'mac-address' is
  * checked first, because that is supposed to contain to "most recent" MAC
@@ -101,24 +94,27 @@ static const void *of_get_mac_addr_nvmem(struct device_node *np)
  * this case, the real MAC is in 'local-mac-address', and 'mac-address' exists
  * but is all zeros.
  *
- * Return: Will be a valid pointer on success and ERR_PTR in case of error.
+ * Return: 0 on success and errno in case of error.
 */
-const void *of_get_mac_address(struct device_node *np)
+int of_get_mac_address(struct device_node *np, u8 *addr)
 {
-	const void *addr;
+	int ret;
+
+	if (!np)
+		return -ENODEV;
 
-	addr = of_get_mac_addr(np, "mac-address");
-	if (addr)
-		return addr;
+	ret = of_get_mac_addr(np, "mac-address", addr);
+	if (!ret)
+		return 0;
 
-	addr = of_get_mac_addr(np, "local-mac-address");
-	if (addr)
-		return addr;
+	ret = of_get_mac_addr(np, "local-mac-address", addr);
+	if (!ret)
+		return 0;
 
-	addr = of_get_mac_addr(np, "address");
-	if (addr)
-		return addr;
+	ret = of_get_mac_addr(np, "address", addr);
+	if (!ret)
+		return 0;
 
-	return of_get_mac_addr_nvmem(np);
+	return of_get_mac_addr_nvmem(np, addr);
 }
 EXPORT_SYMBOL(of_get_mac_address);
diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c
index 5dea6e96ec90..da7c2cd8ebb8 100644
--- a/drivers/staging/octeon/ethernet.c
+++ b/drivers/staging/octeon/ethernet.c
@@ -407,14 +407,10 @@ static int cvm_oct_common_set_mac_address(struct net_device *dev, void *addr)
 int cvm_oct_common_init(struct net_device *dev)
 {
 	struct octeon_ethernet *priv = netdev_priv(dev);
-	const u8 *mac = NULL;
+	int ret;
 
-	if (priv->of_node)
-		mac = of_get_mac_address(priv->of_node);
-
-	if (!IS_ERR_OR_NULL(mac))
-		ether_addr_copy(dev->dev_addr, mac);
-	else
+	ret = of_get_mac_address(priv->of_node, dev->dev_addr);
+	if (ret)
 		eth_hw_addr_random(dev);
 
 	/*
diff --git a/drivers/staging/wfx/main.c b/drivers/staging/wfx/main.c
index e7bc1988124a..4b9fdf99981b 100644
--- a/drivers/staging/wfx/main.c
+++ b/drivers/staging/wfx/main.c
@@ -334,7 +334,6 @@ int wfx_probe(struct wfx_dev *wdev)
 {
 	int i;
 	int err;
-	const void *macaddr;
 	struct gpio_desc *gpio_saved;
 
 	// During first part of boot, gpio_wakeup cannot yet been used. So
@@ -423,9 +422,9 @@ int wfx_probe(struct wfx_dev *wdev)
 
 	for (i = 0; i < ARRAY_SIZE(wdev->addresses); i++) {
 		eth_zero_addr(wdev->addresses[i].addr);
-		macaddr = of_get_mac_address(wdev->dev->of_node);
-		if (!IS_ERR_OR_NULL(macaddr)) {
-			ether_addr_copy(wdev->addresses[i].addr, macaddr);
+		err = of_get_mac_address(wdev->dev->of_node,
+					 wdev->addresses[i].addr);
+		if (!err) {
 			wdev->addresses[i].addr[ETH_ALEN - 1] += i;
 		} else {
 			ether_addr_copy(wdev->addresses[i].addr,
diff --git a/include/linux/of_net.h b/include/linux/of_net.h
index 71bbfcf3adcd..daef3b0d9270 100644
--- a/include/linux/of_net.h
+++ b/include/linux/of_net.h
@@ -13,7 +13,7 @@
 
 struct net_device;
 extern int of_get_phy_mode(struct device_node *np, phy_interface_t *interface);
-extern const void *of_get_mac_address(struct device_node *np);
+extern int of_get_mac_address(struct device_node *np, u8 *mac);
 extern struct net_device *of_find_net_device_by_node(struct device_node *np);
 #else
 static inline int of_get_phy_mode(struct device_node *np,
@@ -22,9 +22,9 @@ static inline int of_get_phy_mode(struct device_node *np,
 	return -ENODEV;
 }
 
-static inline const void *of_get_mac_address(struct device_node *np)
+static inline int of_get_mac_address(struct device_node *np, u8 *mac)
 {
-	return ERR_PTR(-ENODEV);
+	return -ENODEV;
 }
 
 static inline struct net_device *of_find_net_device_by_node(struct device_node *np)
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 57b2c49f72f4..1259b0f40684 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -260,7 +260,7 @@ struct dsa_port {
 	unsigned int		index;
 	const char		*name;
 	struct dsa_port		*cpu_dp;
-	const char		*mac;
+	u8			mac[ETH_ALEN];
 	struct device_node	*dn;
 	unsigned int		ageing_time;
 	bool			vlan_filtering;
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 3c3e56a1f34d..d7c22e3a1fbf 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -392,7 +392,7 @@ static int dsa_port_setup(struct dsa_port *dp)
 
 		break;
 	case DSA_PORT_TYPE_USER:
-		dp->mac = of_get_mac_address(dp->dn);
+		of_get_mac_address(dp->dn, dp->mac);
 		err = dsa_slave_create(dp);
 		if (err)
 			break;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 995e0e16f295..9300cb66e500 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1896,7 +1896,7 @@ int dsa_slave_create(struct dsa_port *port)
 	slave_dev->hw_features |= NETIF_F_HW_TC;
 	slave_dev->features |= NETIF_F_LLTX;
 	slave_dev->ethtool_ops = &dsa_slave_ethtool_ops;
-	if (!IS_ERR_OR_NULL(port->mac))
+	if (!is_zero_ether_addr(port->mac))
 		ether_addr_copy(slave_dev->dev_addr, port->mac);
 	else
 		eth_hw_addr_inherit(slave_dev, master);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 933b427122be..9cce612e8976 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -511,13 +511,14 @@ unsigned char * __weak arch_get_platform_mac_address(void)
 
 int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr)
 {
-	const unsigned char *addr = NULL;
+	unsigned char *addr;
+	int ret;
 
-	if (dev->of_node)
-		addr = of_get_mac_address(dev->of_node);
-	if (IS_ERR_OR_NULL(addr))
-		addr = arch_get_platform_mac_address();
+	ret = of_get_mac_address(dev->of_node, mac_addr);
+	if (!ret)
+		return 0;
 
+	addr = arch_get_platform_mac_address();
 	if (!addr)
 		return -ENODEV;
 
-- 
cgit v1.2.3


From f10843e04a075202dbb39dfcee047e3a2fdf5a8d Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Mon, 12 Apr 2021 19:47:18 +0200
Subject: of: net: fix of_get_mac_addr_nvmem() for non-platform devices

of_get_mac_address() already supports fetching the MAC address by an
nvmem provider. But until now, it was just working for platform devices.
Esp. it was not working for DSA ports and PCI devices. It gets more
common that PCI devices have a device tree binding since SoCs contain
integrated root complexes.

Use the nvmem of_* binding to fetch the nvmem cells by a struct
device_node. We still have to try to read the cell by device first
because there might be a nvmem_cell_lookup associated with that device.

Signed-off-by: Michael Walle <michael@walle.cc>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/of/of_net.c | 35 ++++++++++++++++++++++++++++++-----
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/drivers/of/of_net.c b/drivers/of/of_net.c
index cb77b774bf76..dbac3a172a11 100644
--- a/drivers/of/of_net.c
+++ b/drivers/of/of_net.c
@@ -11,6 +11,7 @@
 #include <linux/phy.h>
 #include <linux/export.h>
 #include <linux/device.h>
+#include <linux/nvmem-consumer.h>
 
 /**
  * of_get_phy_mode - Get phy mode for given device_node
@@ -59,15 +60,39 @@ static int of_get_mac_addr(struct device_node *np, const char *name, u8 *addr)
 static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr)
 {
 	struct platform_device *pdev = of_find_device_by_node(np);
+	struct nvmem_cell *cell;
+	const void *mac;
+	size_t len;
 	int ret;
 
-	if (!pdev)
-		return -ENODEV;
+	/* Try lookup by device first, there might be a nvmem_cell_lookup
+	 * associated with a given device.
+	 */
+	if (pdev) {
+		ret = nvmem_get_mac_address(&pdev->dev, addr);
+		put_device(&pdev->dev);
+		return ret;
+	}
+
+	cell = of_nvmem_cell_get(np, "mac-address");
+	if (IS_ERR(cell))
+		return PTR_ERR(cell);
+
+	mac = nvmem_cell_read(cell, &len);
+	nvmem_cell_put(cell);
+
+	if (IS_ERR(mac))
+		return PTR_ERR(mac);
+
+	if (len != ETH_ALEN || !is_valid_ether_addr(mac)) {
+		kfree(mac);
+		return -EINVAL;
+	}
 
-	ret = nvmem_get_mac_address(&pdev->dev, addr);
-	put_device(&pdev->dev);
+	memcpy(addr, mac, ETH_ALEN);
+	kfree(mac);
 
-	return ret;
+	return 0;
 }
 
 /**
-- 
cgit v1.2.3


From 314332023b1f095fb24e230e60b50aff981943c3 Mon Sep 17 00:00:00 2001
From: Andreas Roeseler <andreas.a.roeseler@gmail.com>
Date: Mon, 12 Apr 2021 16:23:56 -0500
Subject: icmp: ICMPV6: pass RFC 8335 reply messages to ping_rcv

The current icmp_rcv function drops all unknown ICMP types, including
ICMP_EXT_ECHOREPLY (type 43). In order to parse Extended Echo Reply messages, we have
to pass these packets to the ping_rcv function, which does not do any
other filtering and passes the packet to the designated socket.

Pass incoming RFC 8335 ICMP Extended Echo Reply packets to the ping_rcv
handler instead of discarding the packet.

Signed-off-by: Andreas Roeseler <andreas.a.roeseler@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/icmp.c | 5 +++++
 net/ipv6/icmp.c | 4 ++++
 2 files changed, 9 insertions(+)

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 76990e13a2f9..8bd988fbcb31 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1196,6 +1196,11 @@ int icmp_rcv(struct sk_buff *skb)
 		goto success_check;
 	}
 
+	if (icmph->type == ICMP_EXT_ECHOREPLY) {
+		success = ping_rcv(skb);
+		goto success_check;
+	}
+
 	/*
 	 *	18 is the highest 'known' ICMP type. Anything else is a mystery
 	 *
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 1bca2b09d77e..e8398ffb5e35 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -916,6 +916,10 @@ static int icmpv6_rcv(struct sk_buff *skb)
 		success = ping_rcv(skb);
 		break;
 
+	case ICMPV6_EXT_ECHO_REPLY:
+		success = ping_rcv(skb);
+		break;
+
 	case ICMPV6_PKT_TOOBIG:
 		/* BUGGG_FUTURE: if packet contains rthdr, we cannot update
 		   standard destination cache. Seems, only "advanced"
-- 
cgit v1.2.3


From 334c4241472916851d97aae209aedf1927ec84e3 Mon Sep 17 00:00:00 2001
From: Lijun Pan <lijunp213@gmail.com>
Date: Tue, 13 Apr 2021 03:31:44 -0500
Subject: ibmvnic: improve failover sysfs entry

The current implementation relies on H_IOCTL call to issue a
H_SESSION_ERR_DETECTED command to let the hypervisor to send a failover
signal. However, it may not work if there is no backup device or if
the vnic is already in error state,
e.g., "ibmvnic 30000003 env3: rx buffer returned with rc 6".
Add a last resort, that is to schedule a failover reset via CRQ command.

Signed-off-by: Lijun Pan <lijunp213@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index ee9bf18c597f..0961d36833d5 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -5503,7 +5503,7 @@ static ssize_t failover_store(struct device *dev, struct device_attribute *attr,
 	if (rc) {
 		netdev_err(netdev, "Couldn't retrieve session token, rc %ld\n",
 			   rc);
-		return -EINVAL;
+		goto last_resort;
 	}
 
 	session_token = (__be64)retbuf[0];
@@ -5511,15 +5511,17 @@ static ssize_t failover_store(struct device *dev, struct device_attribute *attr,
 		   be64_to_cpu(session_token));
 	rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address,
 				H_SESSION_ERR_DETECTED, session_token, 0, 0);
-	if (rc) {
-		netdev_err(netdev, "Client initiated failover failed, rc %ld\n",
+	if (rc)
+		netdev_err(netdev,
+			   "H_VIOCTL initiated failover failed, rc %ld\n",
 			   rc);
-		return -EINVAL;
-	}
+
+last_resort:
+	netdev_dbg(netdev, "Trying to send CRQ_CMD, the last resort\n");
+	ibmvnic_reset(adapter, VNIC_RESET_FAILOVER);
 
 	return count;
 }
-
 static DEVICE_ATTR_WO(failover);
 
 static unsigned long ibmvnic_get_desired_dma(struct vio_dev *vdev)
-- 
cgit v1.2.3


From eba43fac8dfafc07141315e8d7715e317d5cfe95 Mon Sep 17 00:00:00 2001
From: wengjianfeng <wengjianfeng@yulong.com>
Date: Tue, 13 Apr 2021 17:45:30 +0800
Subject: nfc: st-nci: remove unnecessary label

in st_nci_spi_write function, first assign a value to a variable then
goto exit label. return statement just follow the label and exit label
just used once, so we should directly return and remove exit label.

Signed-off-by: wengjianfeng <wengjianfeng@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nfc/st-nci/spi.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/nfc/st-nci/spi.c b/drivers/nfc/st-nci/spi.c
index 8db323adebf0..09df6ea65840 100644
--- a/drivers/nfc/st-nci/spi.c
+++ b/drivers/nfc/st-nci/spi.c
@@ -95,17 +95,14 @@ static int st_nci_spi_write(void *phy_id, struct sk_buff *skb)
 	 */
 	if (!r) {
 		skb_rx = alloc_skb(skb->len, GFP_KERNEL);
-		if (!skb_rx) {
-			r = -ENOMEM;
-			goto exit;
-		}
+		if (!skb_rx)
+			return -ENOMEM;
 
 		skb_put(skb_rx, skb->len);
 		memcpy(skb_rx->data, buf, skb->len);
 		ndlc_recv(phy->ndlc, skb_rx);
 	}
 
-exit:
 	return r;
 }
 
-- 
cgit v1.2.3


From 46568170036d0b627d92c99fc9ad95209fdc4159 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Tue, 13 Apr 2021 17:46:12 +0800
Subject: rsi: remove unused including <linux/version.h>

Fix the following versioncheck warning:
./drivers/net/wireless/rsi/rsi_91x_ps.c: 19 linux/version.h not needed.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wireless/rsi/rsi_91x_ps.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/wireless/rsi/rsi_91x_ps.c b/drivers/net/wireless/rsi/rsi_91x_ps.c
index fdaa5a7260dd..a02904921729 100644
--- a/drivers/net/wireless/rsi/rsi_91x_ps.c
+++ b/drivers/net/wireless/rsi/rsi_91x_ps.c
@@ -16,7 +16,6 @@
 
 #include <linux/etherdevice.h>
 #include <linux/if.h>
-#include <linux/version.h>
 #include "rsi_debugfs.h"
 #include "rsi_mgmt.h"
 #include "rsi_common.h"
-- 
cgit v1.2.3


From 870e04ae45ea2e569d1ca2780439b16e988da08d Mon Sep 17 00:00:00 2001
From: Lijun Pan <lijunp213@gmail.com>
Date: Tue, 13 Apr 2021 14:33:39 -0500
Subject: ibmvnic: queue reset work in system_long_wq

The reset process for ibmvnic commonly takes multiple seconds, clearly
making it inappropriate for schedule_work/system_wq. The reason to make
this change is that ibmvnic's use of the default system-wide workqueue
for a relatively long-running work item can negatively affect other
workqueue users. So, queue the relatively slow reset job to the
system_long_wq.

Suggested-by: Nathan Lynch <nathanl@linux.ibm.com>
Signed-off-by: Lijun Pan <lijunp213@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 0961d36833d5..b72159ccca3a 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2292,8 +2292,9 @@ static void __ibmvnic_reset(struct work_struct *work)
 	adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
 
 	if (test_and_set_bit_lock(0, &adapter->resetting)) {
-		schedule_delayed_work(&adapter->ibmvnic_delayed_reset,
-				      IBMVNIC_RESET_DELAY);
+		queue_delayed_work(system_long_wq,
+				   &adapter->ibmvnic_delayed_reset,
+				   IBMVNIC_RESET_DELAY);
 		return;
 	}
 
@@ -2437,7 +2438,7 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
 	list_add_tail(&rwi->list, &adapter->rwi_list);
 	netdev_dbg(adapter->netdev, "Scheduling reset (reason %s)\n",
 		   reset_reason_to_string(reason));
-	schedule_work(&adapter->ibmvnic_reset);
+	queue_work(system_long_wq, &adapter->ibmvnic_reset);
 
 	ret = 0;
 err:
-- 
cgit v1.2.3


From 1da41aa110df51bfe1f2a060af8c834cc796455e Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Tue, 13 Apr 2021 10:22:16 -0700
Subject: ionic: git_ts_info bit shifters

All the uses of HWTSTAMP_FILTER_* values need to be
bit shifters, not straight values.

v2: fixed subject and added Cc Dan and SoB Allen

Fixes: f8ba81da73fc ("ionic: add ethtool support for PTP")
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: Allen Hubbe <allenbh@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/pensando/ionic/ionic_ethtool.c    | 26 +++++++++++-----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
index 71db1e2c7d8a..6583be570e45 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
@@ -888,55 +888,55 @@ static int ionic_get_ts_info(struct net_device *netdev,
 
 	mask = cpu_to_le64(IONIC_PKT_CLS_NTP_ALL);
 	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask)
-		info->rx_filters |= HWTSTAMP_FILTER_NTP_ALL;
+		info->rx_filters |= BIT(HWTSTAMP_FILTER_NTP_ALL);
 
 	mask = cpu_to_le64(IONIC_PKT_CLS_PTP1_SYNC);
 	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask)
-		info->rx_filters |= HWTSTAMP_FILTER_PTP_V1_L4_SYNC;
+		info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V1_L4_SYNC);
 
 	mask = cpu_to_le64(IONIC_PKT_CLS_PTP1_DREQ);
 	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask)
-		info->rx_filters |= HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ;
+		info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ);
 
 	mask = cpu_to_le64(IONIC_PKT_CLS_PTP1_ALL);
 	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask)
-		info->rx_filters |= HWTSTAMP_FILTER_PTP_V1_L4_EVENT;
+		info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V1_L4_EVENT);
 
 	mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_L4_SYNC);
 	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask)
-		info->rx_filters |= HWTSTAMP_FILTER_PTP_V2_L4_SYNC;
+		info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_L4_SYNC);
 
 	mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_L4_DREQ);
 	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask)
-		info->rx_filters |= HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ;
+		info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ);
 
 	mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_L4_ALL);
 	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask)
-		info->rx_filters |= HWTSTAMP_FILTER_PTP_V2_L4_EVENT;
+		info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_L4_EVENT);
 
 	mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_L2_SYNC);
 	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask)
-		info->rx_filters |= HWTSTAMP_FILTER_PTP_V2_L2_SYNC;
+		info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_L2_SYNC);
 
 	mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_L2_DREQ);
 	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask)
-		info->rx_filters |= HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ;
+		info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ);
 
 	mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_L2_ALL);
 	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask)
-		info->rx_filters |= HWTSTAMP_FILTER_PTP_V2_L2_EVENT;
+		info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_L2_EVENT);
 
 	mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_SYNC);
 	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask)
-		info->rx_filters |= HWTSTAMP_FILTER_PTP_V2_SYNC;
+		info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_SYNC);
 
 	mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_DREQ);
 	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask)
-		info->rx_filters |= HWTSTAMP_FILTER_PTP_V2_DELAY_REQ;
+		info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_DELAY_REQ);
 
 	mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_ALL);
 	if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask)
-		info->rx_filters |= HWTSTAMP_FILTER_PTP_V2_EVENT;
+		info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_EVENT);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 15c88e185eb98b86157bb887e7d851c366348be5 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Tue, 13 Apr 2021 11:38:25 -0500
Subject: dt-bindings: net: qcom,ipa: add support for SM8350

Add support for "qcom,sm8350-ipa", which uses IPA v4.9.

Use "enum" rather than "oneOf/const ..." to specify compatible
strings, as suggested by Rob Herring.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/qcom,ipa.yaml | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
index 2645a02cf19b..da5212e693e9 100644
--- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml
+++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
@@ -43,11 +43,12 @@ description:
 
 properties:
   compatible:
-    oneOf:
-      - const: "qcom,sc7180-ipa"
-      - const: "qcom,sc7280-ipa"
-      - const: "qcom,sdm845-ipa"
-      - const: "qcom,sdx55-ipa"
+    enum:
+      - qcom,sc7180-ipa
+      - qcom,sc7280-ipa
+      - qcom,sdm845-ipa
+      - qcom,sdx55-ipa
+      - qcom,sm8350-ipa
 
   reg:
     items:
-- 
cgit v1.2.3


From e557dc82418d24a5fb035815dc57131b68239801 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Tue, 13 Apr 2021 11:38:26 -0500
Subject: net: ipa: add IPA v4.9 configuration data

Add support for the SM8350 SoC, which includes IPA version 4.9.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/Makefile        |   3 +-
 drivers/net/ipa/ipa_data-v4.9.c | 430 ++++++++++++++++++++++++++++++++++++++++
 drivers/net/ipa/ipa_data.h      |   1 +
 drivers/net/ipa/ipa_main.c      |   4 +
 4 files changed, 437 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ipa/ipa_data-v4.9.c

diff --git a/drivers/net/ipa/Makefile b/drivers/net/ipa/Makefile
index 8c0ac8790354..1efe1a88104b 100644
--- a/drivers/net/ipa/Makefile
+++ b/drivers/net/ipa/Makefile
@@ -10,4 +10,5 @@ ipa-y			:=	ipa_main.o ipa_clock.o ipa_reg.o ipa_mem.o \
 				ipa_resource.o ipa_qmi.o ipa_qmi_msg.o
 
 ipa-y			+=	ipa_data-v3.5.1.o ipa_data-v4.2.o \
-				ipa_data-v4.5.o ipa_data-v4.11.o
+				ipa_data-v4.5.o ipa_data-v4.9.o \
+				ipa_data-v4.11.o
diff --git a/drivers/net/ipa/ipa_data-v4.9.c b/drivers/net/ipa/ipa_data-v4.9.c
new file mode 100644
index 000000000000..e41be790f45e
--- /dev/null
+++ b/drivers/net/ipa/ipa_data-v4.9.c
@@ -0,0 +1,430 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Copyright (C) 2021 Linaro Ltd. */
+
+#include <linux/log2.h>
+
+#include "gsi.h"
+#include "ipa_data.h"
+#include "ipa_endpoint.h"
+#include "ipa_mem.h"
+
+/** enum ipa_resource_type - IPA resource types for an SoC having IPA v4.9 */
+enum ipa_resource_type {
+	/* Source resource types; first must have value 0 */
+	IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS		= 0,
+	IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS,
+	IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF,
+	IPA_RESOURCE_TYPE_SRC_HPS_DMARS,
+	IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES,
+
+	/* Destination resource types; first must have value 0 */
+	IPA_RESOURCE_TYPE_DST_DATA_SECTORS		= 0,
+	IPA_RESOURCE_TYPE_DST_DPS_DMARS,
+};
+
+/* Resource groups used for an SoC having IPA v4.9 */
+enum ipa_rsrc_group_id {
+	/* Source resource group identifiers */
+	IPA_RSRC_GROUP_SRC_UL_DL			= 0,
+	IPA_RSRC_GROUP_SRC_DMA,
+	IPA_RSRC_GROUP_SRC_UC_RX_Q,
+	IPA_RSRC_GROUP_SRC_COUNT,	/* Last in set; not a source group */
+
+	/* Destination resource group identifiers */
+	IPA_RSRC_GROUP_DST_UL_DL_DPL			= 0,
+	IPA_RSRC_GROUP_DST_DMA,
+	IPA_RSRC_GROUP_DST_UC,
+	IPA_RSRC_GROUP_DST_DRB_IP,
+	IPA_RSRC_GROUP_DST_COUNT,	/* Last; not a destination group */
+};
+
+/* QSB configuration data for an SoC having IPA v4.9 */
+static const struct ipa_qsb_data ipa_qsb_data[] = {
+	[IPA_QSB_MASTER_DDR] = {
+		.max_writes		= 8,
+		.max_reads		= 0,	/* no limit (hardware max) */
+		.max_reads_beats	= 120,
+	},
+};
+
+/* Endpoint configuration data for an SoC having IPA v4.9 */
+static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
+	[IPA_ENDPOINT_AP_COMMAND_TX] = {
+		.ee_id		= GSI_EE_AP,
+		.channel_id	= 6,
+		.endpoint_id	= 7,
+		.toward_ipa	= true,
+		.channel = {
+			.tre_count	= 256,
+			.event_count	= 256,
+			.tlv_count	= 20,
+		},
+		.endpoint = {
+			.config = {
+				.resource_group	= IPA_RSRC_GROUP_SRC_UL_DL,
+				.dma_mode	= true,
+				.dma_endpoint	= IPA_ENDPOINT_AP_LAN_RX,
+				.tx = {
+					.seq_type = IPA_SEQ_DMA,
+				},
+			},
+		},
+	},
+	[IPA_ENDPOINT_AP_LAN_RX] = {
+		.ee_id		= GSI_EE_AP,
+		.channel_id	= 7,
+		.endpoint_id	= 11,
+		.toward_ipa	= false,
+		.channel = {
+			.tre_count	= 256,
+			.event_count	= 256,
+			.tlv_count	= 9,
+		},
+		.endpoint = {
+			.config = {
+				.resource_group	= IPA_RSRC_GROUP_DST_UL_DL_DPL,
+				.aggregation	= true,
+				.status_enable	= true,
+				.rx = {
+					.pad_align	= ilog2(sizeof(u32)),
+				},
+			},
+		},
+	},
+	[IPA_ENDPOINT_AP_MODEM_TX] = {
+		.ee_id		= GSI_EE_AP,
+		.channel_id	= 2,
+		.endpoint_id	= 2,
+		.toward_ipa	= true,
+		.channel = {
+			.tre_count	= 512,
+			.event_count	= 512,
+			.tlv_count	= 16,
+		},
+		.endpoint = {
+			.filter_support	= true,
+			.config = {
+				.resource_group	= IPA_RSRC_GROUP_SRC_UL_DL,
+				.qmap		= true,
+				.status_enable	= true,
+				.tx = {
+					.seq_type = IPA_SEQ_2_PASS_SKIP_LAST_UC,
+					.status_endpoint =
+						IPA_ENDPOINT_MODEM_AP_RX,
+				},
+			},
+		},
+	},
+	[IPA_ENDPOINT_AP_MODEM_RX] = {
+		.ee_id		= GSI_EE_AP,
+		.channel_id	= 12,
+		.endpoint_id	= 20,
+		.toward_ipa	= false,
+		.channel = {
+			.tre_count	= 256,
+			.event_count	= 256,
+			.tlv_count	= 9,
+		},
+		.endpoint = {
+			.config = {
+				.resource_group	= IPA_RSRC_GROUP_DST_UL_DL_DPL,
+				.qmap		= true,
+				.aggregation	= true,
+				.rx = {
+					.aggr_close_eof	= true,
+				},
+			},
+		},
+	},
+	[IPA_ENDPOINT_MODEM_AP_TX] = {
+		.ee_id		= GSI_EE_MODEM,
+		.channel_id	= 0,
+		.endpoint_id	= 5,
+		.toward_ipa	= true,
+		.endpoint = {
+			.filter_support	= true,
+		},
+	},
+	[IPA_ENDPOINT_MODEM_AP_RX] = {
+		.ee_id		= GSI_EE_MODEM,
+		.channel_id	= 7,
+		.endpoint_id	= 16,
+		.toward_ipa	= false,
+	},
+	[IPA_ENDPOINT_MODEM_DL_NLO_TX] = {
+		.ee_id		= GSI_EE_MODEM,
+		.channel_id	= 2,
+		.endpoint_id	= 8,
+		.toward_ipa	= true,
+		.endpoint = {
+			.filter_support	= true,
+		},
+	},
+};
+
+/* Source resource configuration data for an SoC having IPA v4.9 */
+static const struct ipa_resource ipa_resource_src[] = {
+	[IPA_RESOURCE_TYPE_SRC_PKT_CONTEXTS] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
+			.min = 1,	.max = 12,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DMA] = {
+			.min = 1,	.max = 1,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 1,	.max = 12,
+		},
+	},
+	[IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_LISTS] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
+			.min = 20,	.max = 20,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DMA] = {
+			.min = 2,	.max = 2,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 3,	.max = 3,
+		},
+	},
+	[IPA_RESOURCE_TYPE_SRC_DESCRIPTOR_BUFF] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
+			.min = 38,	.max = 38,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DMA] = {
+			.min = 4,	.max = 4,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 8,	.max = 8,
+		},
+	},
+	[IPA_RESOURCE_TYPE_SRC_HPS_DMARS] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
+			.min = 0,	.max = 4,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DMA] = {
+			.min = 0,	.max = 4,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 0,	.max = 4,
+		},
+	},
+	[IPA_RESOURCE_TYPE_SRC_ACK_ENTRIES] = {
+		.limits[IPA_RSRC_GROUP_SRC_UL_DL] = {
+			.min = 30,	.max = 30,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_DMA] = {
+			.min = 8,	.max = 8,
+		},
+		.limits[IPA_RSRC_GROUP_SRC_UC_RX_Q] = {
+			.min = 8,	.max = 8,
+		},
+	},
+};
+
+/* Destination resource configuration data for an SoC having IPA v4.9 */
+static const struct ipa_resource ipa_resource_dst[] = {
+	[IPA_RESOURCE_TYPE_DST_DATA_SECTORS] = {
+		.limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = {
+			.min = 9,	.max = 9,
+		},
+		.limits[IPA_RSRC_GROUP_DST_DMA] = {
+			.min = 1,	.max = 1,
+		},
+		.limits[IPA_RSRC_GROUP_DST_UC] = {
+			.min = 1,	.max = 1,
+		},
+		.limits[IPA_RSRC_GROUP_DST_DRB_IP] = {
+			.min = 39,	.max = 39,
+		},
+	},
+	[IPA_RESOURCE_TYPE_DST_DPS_DMARS] = {
+		.limits[IPA_RSRC_GROUP_DST_UL_DL_DPL] = {
+			.min = 2,	.max = 3,
+		},
+		.limits[IPA_RSRC_GROUP_DST_DMA] = {
+			.min = 1,	.max = 2,
+		},
+		.limits[IPA_RSRC_GROUP_DST_UC] = {
+			.min = 0,	.max = 2,
+		},
+	},
+};
+
+/* Resource configuration data for an SoC having IPA v4.9 */
+static const struct ipa_resource_data ipa_resource_data = {
+	.rsrc_group_dst_count	= IPA_RSRC_GROUP_DST_COUNT,
+	.rsrc_group_src_count	= IPA_RSRC_GROUP_SRC_COUNT,
+	.resource_src_count	= ARRAY_SIZE(ipa_resource_src),
+	.resource_src		= ipa_resource_src,
+	.resource_dst_count	= ARRAY_SIZE(ipa_resource_dst),
+	.resource_dst		= ipa_resource_dst,
+};
+
+/* IPA-resident memory region data for an SoC having IPA v4.9 */
+static const struct ipa_mem ipa_mem_local_data[] = {
+	[IPA_MEM_UC_SHARED] = {
+		.offset		= 0x0000,
+		.size		= 0x0080,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_UC_INFO] = {
+		.offset		= 0x0080,
+		.size		= 0x0200,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_V4_FILTER_HASHED] = { .offset		= 0x0288,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V4_FILTER] = {
+		.offset		= 0x0308,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V6_FILTER_HASHED] = {
+		.offset		= 0x0388,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V6_FILTER] = {
+		.offset		= 0x0408,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V4_ROUTE_HASHED] = {
+		.offset		= 0x0488,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V4_ROUTE] = {
+		.offset		= 0x0508,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V6_ROUTE_HASHED] = {
+		.offset		= 0x0588,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_V6_ROUTE] = {
+		.offset		= 0x0608,
+		.size		= 0x0078,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_MODEM_HEADER] = {
+		.offset		= 0x0688,
+		.size		= 0x0240,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_AP_HEADER] = {
+		.offset		= 0x08c8,
+		.size		= 0x0200,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_MODEM_PROC_CTX] = {
+		.offset		= 0x0ad0,
+		.size		= 0x0b20,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_AP_PROC_CTX] = {
+		.offset		= 0x15f0,
+		.size		= 0x0200,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_NAT_TABLE] = {
+		.offset		= 0x1800,
+		.size		= 0x0d00,
+		.canary_count	= 4,
+	},
+	[IPA_MEM_STATS_QUOTA_MODEM] = {
+		.offset		= 0x2510,
+		.size		= 0x0030,
+		.canary_count	= 4,
+	},
+	[IPA_MEM_STATS_QUOTA_AP] = {
+		.offset		= 0x2540,
+		.size		= 0x0048,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_STATS_TETHERING] = {
+		.offset		= 0x2588,
+		.size		= 0x0238,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_STATS_FILTER_ROUTE] = {
+		.offset		= 0x27c0,
+		.size		= 0x0800,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_STATS_DROP] = {
+		.offset		= 0x2fc0,
+		.size		= 0x0020,
+		.canary_count	= 0,
+	},
+	[IPA_MEM_MODEM] = {
+		.offset		= 0x2fe8,
+		.size		= 0x0800,
+		.canary_count	= 2,
+	},
+	[IPA_MEM_UC_EVENT_RING] = {
+		.offset		= 0x3800,
+		.size		= 0x1000,
+		.canary_count	= 1,
+	},
+	[IPA_MEM_PDN_CONFIG] = {
+		.offset		= 0x4800,
+		.size		= 0x0050,
+		.canary_count	= 0,
+	},
+};
+
+/* Memory configuration data for an SoC having IPA v4.9 */
+static const struct ipa_mem_data ipa_mem_data = {
+	.local_count	= ARRAY_SIZE(ipa_mem_local_data),
+	.local		= ipa_mem_local_data,
+	.imem_addr	= 0x146bd000,
+	.imem_size	= 0x00002000,
+	.smem_id	= 497,
+	.smem_size	= 0x00009000,
+};
+
+/* Interconnect rates are in 1000 byte/second units */
+static const struct ipa_interconnect_data ipa_interconnect_data[] = {
+	{
+		.name			= "ipa_to_llcc",
+		.peak_bandwidth		= 600000,	/* 600 MBps */
+		.average_bandwidth	= 150000,	/* 150 MBps */
+	},
+	{
+		.name			= "llcc_to_ebi1",
+		.peak_bandwidth		= 1804000,	/* 1.804 GBps */
+		.average_bandwidth	= 150000,	/* 150 MBps */
+	},
+	/* Average rate is unused for the next interconnect */
+	{
+		.name			= "appss_to_ipa",
+		.peak_bandwidth		= 74000,	/* 74 MBps */
+		.average_bandwidth	= 0,		/* unused */
+	},
+
+};
+
+/* Clock and interconnect configuration data for an SoC having IPA v4.9 */
+static const struct ipa_clock_data ipa_clock_data = {
+	.core_clock_rate	= 60 * 1000 * 1000,	/* Hz */
+	.interconnect_count	= ARRAY_SIZE(ipa_interconnect_data),
+	.interconnect_data	= ipa_interconnect_data,
+};
+
+/* Configuration data for an SoC having IPA v4.9. */
+const struct ipa_data ipa_data_v4_9 = {
+	.version	= IPA_VERSION_4_9,
+	.qsb_count	= ARRAY_SIZE(ipa_qsb_data),
+	.qsb_data	= ipa_qsb_data,
+	.endpoint_count	= ARRAY_SIZE(ipa_gsi_endpoint_data),
+	.endpoint_data	= ipa_gsi_endpoint_data,
+	.resource_data	= &ipa_resource_data,
+	.mem_data	= &ipa_mem_data,
+	.clock_data	= &ipa_clock_data,
+};
diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h
index e3212ea9e3bc..5c4c8d72d7d8 100644
--- a/drivers/net/ipa/ipa_data.h
+++ b/drivers/net/ipa/ipa_data.h
@@ -303,6 +303,7 @@ struct ipa_data {
 extern const struct ipa_data ipa_data_v3_5_1;
 extern const struct ipa_data ipa_data_v4_2;
 extern const struct ipa_data ipa_data_v4_5;
+extern const struct ipa_data ipa_data_v4_9;
 extern const struct ipa_data ipa_data_v4_11;
 
 #endif /* _IPA_DATA_H_ */
diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index 0d168afcdf04..aad915e2ce52 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -577,6 +577,10 @@ static const struct of_device_id ipa_match[] = {
 		.compatible	= "qcom,sdx55-ipa",
 		.data		= &ipa_data_v4_5,
 	},
+	{
+		.compatible	= "qcom,sm8350-ipa",
+		.data		= &ipa_data_v4_9,
+	},
 	{
 		.compatible	= "qcom,sc7280-ipa",
 		.data		= &ipa_data_v4_11,
-- 
cgit v1.2.3


From 4298255f26fa91c698048e5cc79e371f3bfaf1da Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Tue, 13 Apr 2021 17:36:20 +0800
Subject: net: stmmac: rearrange RX buffer allocation and free functions

This patch restructures the per RX queue buffer allocation from page_pool
to stmmac_alloc_rx_buffers().

We also rearrange dma_free_rx_skbufs() so that it can be used in
init_dma_rx_desc_rings() during freeing of RX buffer in the event of
page_pool allocation failure to replace the more efficient method earlier.
The replacement is needed to make the RX buffer alloc and free method
scalable to XDP ZC xsk_pool alloc and free later.

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 84 +++++++++++++----------
 1 file changed, 47 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 328aeb2cd276..0a043240de9b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1475,6 +1475,43 @@ static void stmmac_free_tx_buffer(struct stmmac_priv *priv, u32 queue, int i)
 	tx_q->tx_skbuff_dma[i].map_as_page = false;
 }
 
+/**
+ * dma_free_rx_skbufs - free RX dma buffers
+ * @priv: private structure
+ * @queue: RX queue index
+ */
+static void dma_free_rx_skbufs(struct stmmac_priv *priv, u32 queue)
+{
+	int i;
+
+	for (i = 0; i < priv->dma_rx_size; i++)
+		stmmac_free_rx_buffer(priv, queue, i);
+}
+
+static int stmmac_alloc_rx_buffers(struct stmmac_priv *priv, u32 queue,
+				   gfp_t flags)
+{
+	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+	int i;
+
+	for (i = 0; i < priv->dma_rx_size; i++) {
+		struct dma_desc *p;
+		int ret;
+
+		if (priv->extend_desc)
+			p = &((rx_q->dma_erx + i)->basic);
+		else
+			p = rx_q->dma_rx + i;
+
+		ret = stmmac_init_rx_buffers(priv, p, i, flags,
+					     queue);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 /**
  * stmmac_reinit_rx_buffers - reinit the RX descriptor buffer.
  * @priv: driver private structure
@@ -1547,15 +1584,14 @@ static void stmmac_reinit_rx_buffers(struct stmmac_priv *priv)
 	return;
 
 err_reinit_rx_buffers:
-	do {
-		while (--i >= 0)
-			stmmac_free_rx_buffer(priv, queue, i);
+	while (queue >= 0) {
+		dma_free_rx_skbufs(priv, queue);
 
 		if (queue == 0)
 			break;
 
-		i = priv->dma_rx_size;
-	} while (queue-- > 0);
+		queue--;
+	}
 }
 
 /**
@@ -1572,7 +1608,6 @@ static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags)
 	u32 rx_count = priv->plat->rx_queues_to_use;
 	int ret = -ENOMEM;
 	int queue;
-	int i;
 
 	/* RX INITIALIZATION */
 	netif_dbg(priv, probe, priv->dev,
@@ -1580,7 +1615,7 @@ static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags)
 
 	for (queue = 0; queue < rx_count; queue++) {
 		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
-		int ret;
+
 
 		netif_dbg(priv, probe, priv->dev,
 			  "(%s) dma_rx_phy=0x%08x\n", __func__,
@@ -1596,22 +1631,12 @@ static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags)
 			    "Register MEM_TYPE_PAGE_POOL RxQ-%d\n",
 			    rx_q->queue_index);
 
-		for (i = 0; i < priv->dma_rx_size; i++) {
-			struct dma_desc *p;
-
-			if (priv->extend_desc)
-				p = &((rx_q->dma_erx + i)->basic);
-			else
-				p = rx_q->dma_rx + i;
-
-			ret = stmmac_init_rx_buffers(priv, p, i, flags,
-						     queue);
-			if (ret)
-				goto err_init_rx_buffers;
-		}
+		ret = stmmac_alloc_rx_buffers(priv, queue, flags);
+		if (ret < 0)
+			goto err_init_rx_buffers;
 
 		rx_q->cur_rx = 0;
-		rx_q->dirty_rx = (unsigned int)(i - priv->dma_rx_size);
+		rx_q->dirty_rx = 0;
 
 		/* Setup the chained descriptor addresses */
 		if (priv->mode == STMMAC_CHAIN_MODE) {
@@ -1630,13 +1655,11 @@ static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags)
 
 err_init_rx_buffers:
 	while (queue >= 0) {
-		while (--i >= 0)
-			stmmac_free_rx_buffer(priv, queue, i);
+		dma_free_rx_skbufs(priv, queue);
 
 		if (queue == 0)
 			break;
 
-		i = priv->dma_rx_size;
 		queue--;
 	}
 
@@ -1731,19 +1754,6 @@ static int init_dma_desc_rings(struct net_device *dev, gfp_t flags)
 	return ret;
 }
 
-/**
- * dma_free_rx_skbufs - free RX dma buffers
- * @priv: private structure
- * @queue: RX queue index
- */
-static void dma_free_rx_skbufs(struct stmmac_priv *priv, u32 queue)
-{
-	int i;
-
-	for (i = 0; i < priv->dma_rx_size; i++)
-		stmmac_free_rx_buffer(priv, queue, i);
-}
-
 /**
  * dma_free_tx_skbufs - free TX dma buffers
  * @priv: private structure
-- 
cgit v1.2.3


From 80f573c995fc8e385d4aebc0ffce88f6b32bb183 Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Tue, 13 Apr 2021 17:36:21 +0800
Subject: net: stmmac: introduce dma_recycle_rx_skbufs for
 stmmac_reinit_rx_buffers

Rearrange RX buffer page_pool recycling logics into dma_recycle_rx_skbufs,
so that we prepare stmmac_reinit_rx_buffers() for XSK pool expansion.

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 44 ++++++++++++++---------
 1 file changed, 27 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 0a043240de9b..f48d18352b8f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1512,6 +1512,31 @@ static int stmmac_alloc_rx_buffers(struct stmmac_priv *priv, u32 queue,
 	return 0;
 }
 
+/**
+ * dma_recycle_rx_skbufs - recycle RX dma buffers
+ * @priv: private structure
+ * @queue: RX queue index
+ */
+static void dma_recycle_rx_skbufs(struct stmmac_priv *priv, u32 queue)
+{
+	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+	int i;
+
+	for (i = 0; i < priv->dma_rx_size; i++) {
+		struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i];
+
+		if (buf->page) {
+			page_pool_recycle_direct(rx_q->page_pool, buf->page);
+			buf->page = NULL;
+		}
+
+		if (priv->sph && buf->sec_page) {
+			page_pool_recycle_direct(rx_q->page_pool, buf->sec_page);
+			buf->sec_page = NULL;
+		}
+	}
+}
+
 /**
  * stmmac_reinit_rx_buffers - reinit the RX descriptor buffer.
  * @priv: driver private structure
@@ -1524,23 +1549,8 @@ static void stmmac_reinit_rx_buffers(struct stmmac_priv *priv)
 	u32 queue;
 	int i;
 
-	for (queue = 0; queue < rx_count; queue++) {
-		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
-
-		for (i = 0; i < priv->dma_rx_size; i++) {
-			struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i];
-
-			if (buf->page) {
-				page_pool_recycle_direct(rx_q->page_pool, buf->page);
-				buf->page = NULL;
-			}
-
-			if (priv->sph && buf->sec_page) {
-				page_pool_recycle_direct(rx_q->page_pool, buf->sec_page);
-				buf->sec_page = NULL;
-			}
-		}
-	}
+	for (queue = 0; queue < rx_count; queue++)
+		dma_recycle_rx_skbufs(priv, queue);
 
 	for (queue = 0; queue < rx_count; queue++) {
 		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
-- 
cgit v1.2.3


From da5ec7f22a0f1a10a5a9c3063421e73ae054649d Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Tue, 13 Apr 2021 17:36:22 +0800
Subject: net: stmmac: refactor stmmac_init_rx_buffers for
 stmmac_reinit_rx_buffers

The per-queue RX buffer allocation in stmmac_reinit_rx_buffers() can be
made to use stmmac_alloc_rx_buffers() by merging the page_pool alloc
checks for "buf->page" and "buf->sec_page" in stmmac_init_rx_buffers().

This is in preparation for XSK pool allocation later.

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 378 +++++++++++-----------
 1 file changed, 189 insertions(+), 189 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index f48d18352b8f..f410c8ff70df 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1388,12 +1388,14 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p,
 	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
 	struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i];
 
-	buf->page = page_pool_dev_alloc_pages(rx_q->page_pool);
-	if (!buf->page)
-		return -ENOMEM;
-	buf->page_offset = stmmac_rx_offset(priv);
+	if (!buf->page) {
+		buf->page = page_pool_dev_alloc_pages(rx_q->page_pool);
+		if (!buf->page)
+			return -ENOMEM;
+		buf->page_offset = stmmac_rx_offset(priv);
+	}
 
-	if (priv->sph) {
+	if (priv->sph && !buf->sec_page) {
 		buf->sec_page = page_pool_dev_alloc_pages(rx_q->page_pool);
 		if (!buf->sec_page)
 			return -ENOMEM;
@@ -1547,48 +1549,16 @@ static void stmmac_reinit_rx_buffers(struct stmmac_priv *priv)
 {
 	u32 rx_count = priv->plat->rx_queues_to_use;
 	u32 queue;
-	int i;
 
 	for (queue = 0; queue < rx_count; queue++)
 		dma_recycle_rx_skbufs(priv, queue);
 
 	for (queue = 0; queue < rx_count; queue++) {
-		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
-
-		for (i = 0; i < priv->dma_rx_size; i++) {
-			struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i];
-			struct dma_desc *p;
-
-			if (priv->extend_desc)
-				p = &((rx_q->dma_erx + i)->basic);
-			else
-				p = rx_q->dma_rx + i;
-
-			if (!buf->page) {
-				buf->page = page_pool_dev_alloc_pages(rx_q->page_pool);
-				if (!buf->page)
-					goto err_reinit_rx_buffers;
-
-				buf->addr = page_pool_get_dma_addr(buf->page) +
-					    buf->page_offset;
-			}
-
-			if (priv->sph && !buf->sec_page) {
-				buf->sec_page = page_pool_dev_alloc_pages(rx_q->page_pool);
-				if (!buf->sec_page)
-					goto err_reinit_rx_buffers;
-
-				buf->sec_addr = page_pool_get_dma_addr(buf->sec_page);
-			}
+		int ret;
 
-			stmmac_set_desc_addr(priv, p, buf->addr);
-			if (priv->sph)
-				stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, true);
-			else
-				stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, false);
-			if (priv->dma_buf_sz == BUF_SIZE_16KiB)
-				stmmac_init_desc3(priv, p);
-		}
+		ret = stmmac_alloc_rx_buffers(priv, queue, GFP_KERNEL);
+		if (ret < 0)
+			goto err_reinit_rx_buffers;
 	}
 
 	return;
@@ -1791,153 +1761,173 @@ static void stmmac_free_tx_skbufs(struct stmmac_priv *priv)
 }
 
 /**
- * free_dma_rx_desc_resources - free RX dma desc resources
+ * __free_dma_rx_desc_resources - free RX dma desc resources (per queue)
  * @priv: private structure
+ * @queue: RX queue index
  */
-static void free_dma_rx_desc_resources(struct stmmac_priv *priv)
+static void __free_dma_rx_desc_resources(struct stmmac_priv *priv, u32 queue)
 {
-	u32 rx_count = priv->plat->rx_queues_to_use;
-	u32 queue;
+	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
 
-	/* Free RX queue resources */
-	for (queue = 0; queue < rx_count; queue++) {
-		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+	/* Release the DMA RX socket buffers */
+	dma_free_rx_skbufs(priv, queue);
 
-		/* Release the DMA RX socket buffers */
-		dma_free_rx_skbufs(priv, queue);
+	/* Free DMA regions of consistent memory previously allocated */
+	if (!priv->extend_desc)
+		dma_free_coherent(priv->device, priv->dma_rx_size *
+				  sizeof(struct dma_desc),
+				  rx_q->dma_rx, rx_q->dma_rx_phy);
+	else
+		dma_free_coherent(priv->device, priv->dma_rx_size *
+				  sizeof(struct dma_extended_desc),
+				  rx_q->dma_erx, rx_q->dma_rx_phy);
 
-		/* Free DMA regions of consistent memory previously allocated */
-		if (!priv->extend_desc)
-			dma_free_coherent(priv->device, priv->dma_rx_size *
-					  sizeof(struct dma_desc),
-					  rx_q->dma_rx, rx_q->dma_rx_phy);
-		else
-			dma_free_coherent(priv->device, priv->dma_rx_size *
-					  sizeof(struct dma_extended_desc),
-					  rx_q->dma_erx, rx_q->dma_rx_phy);
+	if (xdp_rxq_info_is_reg(&rx_q->xdp_rxq))
+		xdp_rxq_info_unreg(&rx_q->xdp_rxq);
 
-		if (xdp_rxq_info_is_reg(&rx_q->xdp_rxq))
-			xdp_rxq_info_unreg(&rx_q->xdp_rxq);
+	kfree(rx_q->buf_pool);
+	if (rx_q->page_pool)
+		page_pool_destroy(rx_q->page_pool);
+}
 
-		kfree(rx_q->buf_pool);
-		if (rx_q->page_pool)
-			page_pool_destroy(rx_q->page_pool);
-	}
+static void free_dma_rx_desc_resources(struct stmmac_priv *priv)
+{
+	u32 rx_count = priv->plat->rx_queues_to_use;
+	u32 queue;
+
+	/* Free RX queue resources */
+	for (queue = 0; queue < rx_count; queue++)
+		__free_dma_rx_desc_resources(priv, queue);
 }
 
 /**
- * free_dma_tx_desc_resources - free TX dma desc resources
+ * __free_dma_tx_desc_resources - free TX dma desc resources (per queue)
  * @priv: private structure
+ * @queue: TX queue index
  */
-static void free_dma_tx_desc_resources(struct stmmac_priv *priv)
+static void __free_dma_tx_desc_resources(struct stmmac_priv *priv, u32 queue)
 {
-	u32 tx_count = priv->plat->tx_queues_to_use;
-	u32 queue;
+	struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+	size_t size;
+	void *addr;
 
-	/* Free TX queue resources */
-	for (queue = 0; queue < tx_count; queue++) {
-		struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
-		size_t size;
-		void *addr;
+	/* Release the DMA TX socket buffers */
+	dma_free_tx_skbufs(priv, queue);
+
+	if (priv->extend_desc) {
+		size = sizeof(struct dma_extended_desc);
+		addr = tx_q->dma_etx;
+	} else if (tx_q->tbs & STMMAC_TBS_AVAIL) {
+		size = sizeof(struct dma_edesc);
+		addr = tx_q->dma_entx;
+	} else {
+		size = sizeof(struct dma_desc);
+		addr = tx_q->dma_tx;
+	}
 
-		/* Release the DMA TX socket buffers */
-		dma_free_tx_skbufs(priv, queue);
+	size *= priv->dma_tx_size;
 
-		if (priv->extend_desc) {
-			size = sizeof(struct dma_extended_desc);
-			addr = tx_q->dma_etx;
-		} else if (tx_q->tbs & STMMAC_TBS_AVAIL) {
-			size = sizeof(struct dma_edesc);
-			addr = tx_q->dma_entx;
-		} else {
-			size = sizeof(struct dma_desc);
-			addr = tx_q->dma_tx;
-		}
+	dma_free_coherent(priv->device, size, addr, tx_q->dma_tx_phy);
 
-		size *= priv->dma_tx_size;
+	kfree(tx_q->tx_skbuff_dma);
+	kfree(tx_q->tx_skbuff);
+}
 
-		dma_free_coherent(priv->device, size, addr, tx_q->dma_tx_phy);
+static void free_dma_tx_desc_resources(struct stmmac_priv *priv)
+{
+	u32 tx_count = priv->plat->tx_queues_to_use;
+	u32 queue;
 
-		kfree(tx_q->tx_skbuff_dma);
-		kfree(tx_q->tx_skbuff);
-	}
+	/* Free TX queue resources */
+	for (queue = 0; queue < tx_count; queue++)
+		__free_dma_tx_desc_resources(priv, queue);
 }
 
 /**
- * alloc_dma_rx_desc_resources - alloc RX resources.
+ * __alloc_dma_rx_desc_resources - alloc RX resources (per queue).
  * @priv: private structure
+ * @queue: RX queue index
  * Description: according to which descriptor can be used (extend or basic)
  * this function allocates the resources for TX and RX paths. In case of
  * reception, for example, it pre-allocated the RX socket buffer in order to
  * allow zero-copy mechanism.
  */
-static int alloc_dma_rx_desc_resources(struct stmmac_priv *priv)
+static int __alloc_dma_rx_desc_resources(struct stmmac_priv *priv, u32 queue)
 {
+	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+	struct stmmac_channel *ch = &priv->channel[queue];
 	bool xdp_prog = stmmac_xdp_is_enabled(priv);
-	u32 rx_count = priv->plat->rx_queues_to_use;
-	int ret = -ENOMEM;
-	u32 queue;
+	struct page_pool_params pp_params = { 0 };
+	unsigned int num_pages;
+	int ret;
 
-	/* RX queues buffers and DMA */
-	for (queue = 0; queue < rx_count; queue++) {
-		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
-		struct stmmac_channel *ch = &priv->channel[queue];
-		struct page_pool_params pp_params = { 0 };
-		unsigned int num_pages;
-		int ret;
+	rx_q->queue_index = queue;
+	rx_q->priv_data = priv;
+
+	pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
+	pp_params.pool_size = priv->dma_rx_size;
+	num_pages = DIV_ROUND_UP(priv->dma_buf_sz, PAGE_SIZE);
+	pp_params.order = ilog2(num_pages);
+	pp_params.nid = dev_to_node(priv->device);
+	pp_params.dev = priv->device;
+	pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
+	pp_params.offset = stmmac_rx_offset(priv);
+	pp_params.max_len = STMMAC_MAX_RX_BUF_SIZE(num_pages);
+
+	rx_q->page_pool = page_pool_create(&pp_params);
+	if (IS_ERR(rx_q->page_pool)) {
+		ret = PTR_ERR(rx_q->page_pool);
+		rx_q->page_pool = NULL;
+		return ret;
+	}
 
-		rx_q->queue_index = queue;
-		rx_q->priv_data = priv;
-
-		pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
-		pp_params.pool_size = priv->dma_rx_size;
-		num_pages = DIV_ROUND_UP(priv->dma_buf_sz, PAGE_SIZE);
-		pp_params.order = ilog2(num_pages);
-		pp_params.nid = dev_to_node(priv->device);
-		pp_params.dev = priv->device;
-		pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
-		pp_params.offset = stmmac_rx_offset(priv);
-		pp_params.max_len = STMMAC_MAX_RX_BUF_SIZE(num_pages);
-
-		rx_q->page_pool = page_pool_create(&pp_params);
-		if (IS_ERR(rx_q->page_pool)) {
-			ret = PTR_ERR(rx_q->page_pool);
-			rx_q->page_pool = NULL;
-			goto err_dma;
-		}
+	rx_q->buf_pool = kcalloc(priv->dma_rx_size,
+				 sizeof(*rx_q->buf_pool),
+				 GFP_KERNEL);
+	if (!rx_q->buf_pool)
+		return -ENOMEM;
 
-		rx_q->buf_pool = kcalloc(priv->dma_rx_size,
-					 sizeof(*rx_q->buf_pool),
-					 GFP_KERNEL);
-		if (!rx_q->buf_pool)
-			goto err_dma;
+	if (priv->extend_desc) {
+		rx_q->dma_erx = dma_alloc_coherent(priv->device,
+						   priv->dma_rx_size *
+						   sizeof(struct dma_extended_desc),
+						   &rx_q->dma_rx_phy,
+						   GFP_KERNEL);
+		if (!rx_q->dma_erx)
+			return -ENOMEM;
 
-		if (priv->extend_desc) {
-			rx_q->dma_erx = dma_alloc_coherent(priv->device,
-							   priv->dma_rx_size *
-							   sizeof(struct dma_extended_desc),
-							   &rx_q->dma_rx_phy,
-							   GFP_KERNEL);
-			if (!rx_q->dma_erx)
-				goto err_dma;
+	} else {
+		rx_q->dma_rx = dma_alloc_coherent(priv->device,
+						  priv->dma_rx_size *
+						  sizeof(struct dma_desc),
+						  &rx_q->dma_rx_phy,
+						  GFP_KERNEL);
+		if (!rx_q->dma_rx)
+			return -ENOMEM;
+	}
 
-		} else {
-			rx_q->dma_rx = dma_alloc_coherent(priv->device,
-							  priv->dma_rx_size *
-							  sizeof(struct dma_desc),
-							  &rx_q->dma_rx_phy,
-							  GFP_KERNEL);
-			if (!rx_q->dma_rx)
-				goto err_dma;
-		}
+	ret = xdp_rxq_info_reg(&rx_q->xdp_rxq, priv->dev,
+			       rx_q->queue_index,
+			       ch->rx_napi.napi_id);
+	if (ret) {
+		netdev_err(priv->dev, "Failed to register xdp rxq info\n");
+		return -EINVAL;
+	}
 
-		ret = xdp_rxq_info_reg(&rx_q->xdp_rxq, priv->dev,
-				       rx_q->queue_index,
-				       ch->rx_napi.napi_id);
-		if (ret) {
-			netdev_err(priv->dev, "Failed to register xdp rxq info\n");
+	return 0;
+}
+
+static int alloc_dma_rx_desc_resources(struct stmmac_priv *priv)
+{
+	u32 rx_count = priv->plat->rx_queues_to_use;
+	u32 queue;
+	int ret;
+
+	/* RX queues buffers and DMA */
+	for (queue = 0; queue < rx_count; queue++) {
+		ret = __alloc_dma_rx_desc_resources(priv, queue);
+		if (ret)
 			goto err_dma;
-		}
 	}
 
 	return 0;
@@ -1949,60 +1939,70 @@ err_dma:
 }
 
 /**
- * alloc_dma_tx_desc_resources - alloc TX resources.
+ * __alloc_dma_tx_desc_resources - alloc TX resources (per queue).
  * @priv: private structure
+ * @queue: TX queue index
  * Description: according to which descriptor can be used (extend or basic)
  * this function allocates the resources for TX and RX paths. In case of
  * reception, for example, it pre-allocated the RX socket buffer in order to
  * allow zero-copy mechanism.
  */
-static int alloc_dma_tx_desc_resources(struct stmmac_priv *priv)
+static int __alloc_dma_tx_desc_resources(struct stmmac_priv *priv, u32 queue)
 {
-	u32 tx_count = priv->plat->tx_queues_to_use;
-	int ret = -ENOMEM;
-	u32 queue;
+	struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+	size_t size;
+	void *addr;
 
-	/* TX queues buffers and DMA */
-	for (queue = 0; queue < tx_count; queue++) {
-		struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
-		size_t size;
-		void *addr;
+	tx_q->queue_index = queue;
+	tx_q->priv_data = priv;
 
-		tx_q->queue_index = queue;
-		tx_q->priv_data = priv;
+	tx_q->tx_skbuff_dma = kcalloc(priv->dma_tx_size,
+				      sizeof(*tx_q->tx_skbuff_dma),
+				      GFP_KERNEL);
+	if (!tx_q->tx_skbuff_dma)
+		return -ENOMEM;
 
-		tx_q->tx_skbuff_dma = kcalloc(priv->dma_tx_size,
-					      sizeof(*tx_q->tx_skbuff_dma),
-					      GFP_KERNEL);
-		if (!tx_q->tx_skbuff_dma)
-			goto err_dma;
+	tx_q->tx_skbuff = kcalloc(priv->dma_tx_size,
+				  sizeof(struct sk_buff *),
+				  GFP_KERNEL);
+	if (!tx_q->tx_skbuff)
+		return -ENOMEM;
 
-		tx_q->tx_skbuff = kcalloc(priv->dma_tx_size,
-					  sizeof(struct sk_buff *),
-					  GFP_KERNEL);
-		if (!tx_q->tx_skbuff)
-			goto err_dma;
+	if (priv->extend_desc)
+		size = sizeof(struct dma_extended_desc);
+	else if (tx_q->tbs & STMMAC_TBS_AVAIL)
+		size = sizeof(struct dma_edesc);
+	else
+		size = sizeof(struct dma_desc);
 
-		if (priv->extend_desc)
-			size = sizeof(struct dma_extended_desc);
-		else if (tx_q->tbs & STMMAC_TBS_AVAIL)
-			size = sizeof(struct dma_edesc);
-		else
-			size = sizeof(struct dma_desc);
+	size *= priv->dma_tx_size;
 
-		size *= priv->dma_tx_size;
+	addr = dma_alloc_coherent(priv->device, size,
+				  &tx_q->dma_tx_phy, GFP_KERNEL);
+	if (!addr)
+		return -ENOMEM;
 
-		addr = dma_alloc_coherent(priv->device, size,
-					  &tx_q->dma_tx_phy, GFP_KERNEL);
-		if (!addr)
-			goto err_dma;
+	if (priv->extend_desc)
+		tx_q->dma_etx = addr;
+	else if (tx_q->tbs & STMMAC_TBS_AVAIL)
+		tx_q->dma_entx = addr;
+	else
+		tx_q->dma_tx = addr;
 
-		if (priv->extend_desc)
-			tx_q->dma_etx = addr;
-		else if (tx_q->tbs & STMMAC_TBS_AVAIL)
-			tx_q->dma_entx = addr;
-		else
-			tx_q->dma_tx = addr;
+	return 0;
+}
+
+static int alloc_dma_tx_desc_resources(struct stmmac_priv *priv)
+{
+	u32 tx_count = priv->plat->tx_queues_to_use;
+	u32 queue;
+	int ret;
+
+	/* TX queues buffers and DMA */
+	for (queue = 0; queue < tx_count; queue++) {
+		ret = __alloc_dma_tx_desc_resources(priv, queue);
+		if (ret)
+			goto err_dma;
 	}
 
 	return 0;
-- 
cgit v1.2.3


From de0b90e52a116a951ca8b13c924c359d5fc39fa0 Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Tue, 13 Apr 2021 17:36:23 +0800
Subject: net: stmmac: rearrange RX and TX desc init into per-queue basis

Below functions are made to be per-queue in preparation of XDP ZC:

 __init_dma_rx_desc_rings(struct stmmac_priv *priv, u32 queue, gfp_t flags)
 __init_dma_tx_desc_rings(struct stmmac_priv *priv, u32 queue)

The original functions below are stay maintained for all queue usage:

 init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags)
 init_dma_tx_desc_rings(struct net_device *dev)

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 180 ++++++++++++----------
 1 file changed, 100 insertions(+), 80 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index f410c8ff70df..790e1af3fe8f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1575,60 +1575,70 @@ err_reinit_rx_buffers:
 }
 
 /**
- * init_dma_rx_desc_rings - init the RX descriptor rings
- * @dev: net device structure
+ * __init_dma_rx_desc_rings - init the RX descriptor ring (per queue)
+ * @priv: driver private structure
+ * @queue: RX queue index
  * @flags: gfp flag.
  * Description: this function initializes the DMA RX descriptors
  * and allocates the socket buffers. It supports the chained and ring
  * modes.
  */
-static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags)
+static int __init_dma_rx_desc_rings(struct stmmac_priv *priv, u32 queue, gfp_t flags)
 {
-	struct stmmac_priv *priv = netdev_priv(dev);
-	u32 rx_count = priv->plat->rx_queues_to_use;
-	int ret = -ENOMEM;
-	int queue;
+	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+	int ret;
 
-	/* RX INITIALIZATION */
 	netif_dbg(priv, probe, priv->dev,
-		  "SKB addresses:\nskb\t\tskb data\tdma data\n");
+		  "(%s) dma_rx_phy=0x%08x\n", __func__,
+		  (u32)rx_q->dma_rx_phy);
 
-	for (queue = 0; queue < rx_count; queue++) {
-		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+	stmmac_clear_rx_descriptors(priv, queue);
 
+	WARN_ON(xdp_rxq_info_reg_mem_model(&rx_q->xdp_rxq,
+					   MEM_TYPE_PAGE_POOL,
+					   rx_q->page_pool));
 
-		netif_dbg(priv, probe, priv->dev,
-			  "(%s) dma_rx_phy=0x%08x\n", __func__,
-			  (u32)rx_q->dma_rx_phy);
+	netdev_info(priv->dev,
+		    "Register MEM_TYPE_PAGE_POOL RxQ-%d\n",
+		    rx_q->queue_index);
 
-		stmmac_clear_rx_descriptors(priv, queue);
+	ret = stmmac_alloc_rx_buffers(priv, queue, flags);
+	if (ret < 0)
+		return -ENOMEM;
 
-		WARN_ON(xdp_rxq_info_reg_mem_model(&rx_q->xdp_rxq,
-						   MEM_TYPE_PAGE_POOL,
-						   rx_q->page_pool));
+	rx_q->cur_rx = 0;
+	rx_q->dirty_rx = 0;
 
-		netdev_info(priv->dev,
-			    "Register MEM_TYPE_PAGE_POOL RxQ-%d\n",
-			    rx_q->queue_index);
+	/* Setup the chained descriptor addresses */
+	if (priv->mode == STMMAC_CHAIN_MODE) {
+		if (priv->extend_desc)
+			stmmac_mode_init(priv, rx_q->dma_erx,
+					 rx_q->dma_rx_phy,
+					 priv->dma_rx_size, 1);
+		else
+			stmmac_mode_init(priv, rx_q->dma_rx,
+					 rx_q->dma_rx_phy,
+					 priv->dma_rx_size, 0);
+	}
 
-		ret = stmmac_alloc_rx_buffers(priv, queue, flags);
-		if (ret < 0)
-			goto err_init_rx_buffers;
+	return 0;
+}
 
-		rx_q->cur_rx = 0;
-		rx_q->dirty_rx = 0;
+static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+	u32 rx_count = priv->plat->rx_queues_to_use;
+	u32 queue;
+	int ret;
 
-		/* Setup the chained descriptor addresses */
-		if (priv->mode == STMMAC_CHAIN_MODE) {
-			if (priv->extend_desc)
-				stmmac_mode_init(priv, rx_q->dma_erx,
-						 rx_q->dma_rx_phy,
-						 priv->dma_rx_size, 1);
-			else
-				stmmac_mode_init(priv, rx_q->dma_rx,
-						 rx_q->dma_rx_phy,
-						 priv->dma_rx_size, 0);
-		}
+	/* RX INITIALIZATION */
+	netif_dbg(priv, probe, priv->dev,
+		  "SKB addresses:\nskb\t\tskb data\tdma data\n");
+
+	for (queue = 0; queue < rx_count; queue++) {
+		ret = __init_dma_rx_desc_rings(priv, queue, flags);
+		if (ret)
+			goto err_init_rx_buffers;
 	}
 
 	return 0;
@@ -1647,63 +1657,73 @@ err_init_rx_buffers:
 }
 
 /**
- * init_dma_tx_desc_rings - init the TX descriptor rings
- * @dev: net device structure.
+ * __init_dma_tx_desc_rings - init the TX descriptor ring (per queue)
+ * @priv: driver private structure
+ * @queue : TX queue index
  * Description: this function initializes the DMA TX descriptors
  * and allocates the socket buffers. It supports the chained and ring
  * modes.
  */
-static int init_dma_tx_desc_rings(struct net_device *dev)
+static int __init_dma_tx_desc_rings(struct stmmac_priv *priv, u32 queue)
 {
-	struct stmmac_priv *priv = netdev_priv(dev);
-	u32 tx_queue_cnt = priv->plat->tx_queues_to_use;
-	u32 queue;
+	struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
 	int i;
 
-	for (queue = 0; queue < tx_queue_cnt; queue++) {
-		struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
-
-		netif_dbg(priv, probe, priv->dev,
-			  "(%s) dma_tx_phy=0x%08x\n", __func__,
-			 (u32)tx_q->dma_tx_phy);
-
-		/* Setup the chained descriptor addresses */
-		if (priv->mode == STMMAC_CHAIN_MODE) {
-			if (priv->extend_desc)
-				stmmac_mode_init(priv, tx_q->dma_etx,
-						 tx_q->dma_tx_phy,
-						 priv->dma_tx_size, 1);
-			else if (!(tx_q->tbs & STMMAC_TBS_AVAIL))
-				stmmac_mode_init(priv, tx_q->dma_tx,
-						 tx_q->dma_tx_phy,
-						 priv->dma_tx_size, 0);
-		}
+	netif_dbg(priv, probe, priv->dev,
+		  "(%s) dma_tx_phy=0x%08x\n", __func__,
+		  (u32)tx_q->dma_tx_phy);
 
-		for (i = 0; i < priv->dma_tx_size; i++) {
-			struct dma_desc *p;
-			if (priv->extend_desc)
-				p = &((tx_q->dma_etx + i)->basic);
-			else if (tx_q->tbs & STMMAC_TBS_AVAIL)
-				p = &((tx_q->dma_entx + i)->basic);
-			else
-				p = tx_q->dma_tx + i;
+	/* Setup the chained descriptor addresses */
+	if (priv->mode == STMMAC_CHAIN_MODE) {
+		if (priv->extend_desc)
+			stmmac_mode_init(priv, tx_q->dma_etx,
+					 tx_q->dma_tx_phy,
+					 priv->dma_tx_size, 1);
+		else if (!(tx_q->tbs & STMMAC_TBS_AVAIL))
+			stmmac_mode_init(priv, tx_q->dma_tx,
+					 tx_q->dma_tx_phy,
+					 priv->dma_tx_size, 0);
+	}
 
-			stmmac_clear_desc(priv, p);
+	for (i = 0; i < priv->dma_tx_size; i++) {
+		struct dma_desc *p;
 
-			tx_q->tx_skbuff_dma[i].buf = 0;
-			tx_q->tx_skbuff_dma[i].map_as_page = false;
-			tx_q->tx_skbuff_dma[i].len = 0;
-			tx_q->tx_skbuff_dma[i].last_segment = false;
-			tx_q->tx_skbuff[i] = NULL;
-		}
+		if (priv->extend_desc)
+			p = &((tx_q->dma_etx + i)->basic);
+		else if (tx_q->tbs & STMMAC_TBS_AVAIL)
+			p = &((tx_q->dma_entx + i)->basic);
+		else
+			p = tx_q->dma_tx + i;
 
-		tx_q->dirty_tx = 0;
-		tx_q->cur_tx = 0;
-		tx_q->mss = 0;
+		stmmac_clear_desc(priv, p);
 
-		netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, queue));
+		tx_q->tx_skbuff_dma[i].buf = 0;
+		tx_q->tx_skbuff_dma[i].map_as_page = false;
+		tx_q->tx_skbuff_dma[i].len = 0;
+		tx_q->tx_skbuff_dma[i].last_segment = false;
+		tx_q->tx_skbuff[i] = NULL;
 	}
 
+	tx_q->dirty_tx = 0;
+	tx_q->cur_tx = 0;
+	tx_q->mss = 0;
+
+	netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, queue));
+
+	return 0;
+}
+
+static int init_dma_tx_desc_rings(struct net_device *dev)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+	u32 tx_queue_cnt;
+	u32 queue;
+
+	tx_queue_cnt = priv->plat->tx_queues_to_use;
+
+	for (queue = 0; queue < tx_queue_cnt; queue++)
+		__init_dma_tx_desc_rings(priv, queue);
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From bba71cac680f30de93486c989ee283cccedc5f77 Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Tue, 13 Apr 2021 17:36:24 +0800
Subject: net: stmmac: Refactor __stmmac_xdp_run_prog for XDP ZC

Prepare stmmac_xdp_run_prog() for AF_XDP zero-copy support which will be
added by upcoming patches by splitting out the XDP verdict processing
into __stmmac_xdp_run_prog() and it callable for XDP ZC path which does
not need to verify bpf_prog is not NULL.

The stmmac_xdp_run_prog() is used for regular XDP Rx path which requires
bpf_prog to be verified.

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 35 +++++++++++++++--------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 790e1af3fe8f..43c6dbcc423d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -4408,20 +4408,13 @@ static int stmmac_xdp_xmit_back(struct stmmac_priv *priv,
 	return res;
 }
 
-static struct sk_buff *stmmac_xdp_run_prog(struct stmmac_priv *priv,
-					   struct xdp_buff *xdp)
+/* This function assumes rcu_read_lock() is held by the caller. */
+static int __stmmac_xdp_run_prog(struct stmmac_priv *priv,
+				 struct bpf_prog *prog,
+				 struct xdp_buff *xdp)
 {
-	struct bpf_prog *prog;
-	int res;
 	u32 act;
-
-	rcu_read_lock();
-
-	prog = READ_ONCE(priv->xdp_prog);
-	if (!prog) {
-		res = STMMAC_XDP_PASS;
-		goto unlock;
-	}
+	int res;
 
 	act = bpf_prog_run_xdp(prog, xdp);
 	switch (act) {
@@ -4448,6 +4441,24 @@ static struct sk_buff *stmmac_xdp_run_prog(struct stmmac_priv *priv,
 		break;
 	}
 
+	return res;
+}
+
+static struct sk_buff *stmmac_xdp_run_prog(struct stmmac_priv *priv,
+					   struct xdp_buff *xdp)
+{
+	struct bpf_prog *prog;
+	int res;
+
+	rcu_read_lock();
+
+	prog = READ_ONCE(priv->xdp_prog);
+	if (!prog) {
+		res = STMMAC_XDP_PASS;
+		goto unlock;
+	}
+
+	res = __stmmac_xdp_run_prog(priv, prog, xdp);
 unlock:
 	rcu_read_unlock();
 	return ERR_PTR(-res);
-- 
cgit v1.2.3


From bba2556efad66e7eaa56fece13f7708caa1187f8 Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Tue, 13 Apr 2021 17:36:25 +0800
Subject: net: stmmac: Enable RX via AF_XDP zero-copy

This patch adds the support for receiving packet via AF_XDP zero-copy
mechanism.

XDP ZC uses 1:1 mapping of XDP buffer to receive packet, therefore the
use of split header is not used currently. The 'xdp_buff' is declared as
union together with a struct that contains 'page', 'addr' and
'page_offset' that are associated with primary buffer.

RX buffers are now allocated either via page_pool or xsk pool. For RX
buffers from xsk_pool they are allocated and deallocated using below
functions:

 * stmmac_alloc_rx_buffers_zc(struct stmmac_priv *priv, u32 queue)
 * dma_free_rx_xskbufs(struct stmmac_priv *priv, u32 queue)

With above functions now available, we then extend the following driver
functions to support XDP ZC:
 * stmmac_reinit_rx_buffers()
 * __init_dma_rx_desc_rings()
 * init_dma_rx_desc_rings()
 * __free_dma_rx_desc_resources()

Note: stmmac_alloc_rx_buffers_zc() may return -ENOMEM due to RX XDP
buffer pool is not allocated (e.g. samples/bpf/xdpsock TX-only). But,
it is still ok to let TX XDP ZC to continue, therefore, the -ENOMEM
is silently ignored to let the driver succcessfully transition to XDP
ZC mode for the said RX and TX queue.

As XDP ZC buffer size is different, the DMA buffer size is required
to be reprogrammed accordingly for RX DMA/Queue that is populated with
XDP buffer from XSK pool.

Next, to add or remove per-queue XSK pool, stmmac_xdp_setup_pool()
will call stmmac_xdp_enable_pool() or stmmac_xdp_disable_pool()
that in-turn coordinates the tearing down and setting up RX ring via
RX buffers and descriptors removal and reallocation through
stmmac_disable_rx_queue() and stmmac_enable_rx_queue(). In addition,
stmmac_xsk_wakeup() is added to initiate XDP RX buffer replenishing
by signalling user application to add available XDP frames back to
FILL queue.

For RX processing using XDP zero-copy buffer, stmmac_rx_zc() is
introduced which is implemented with the assumption that RX split
header is disabled. For XDP verdict is XDP_PASS, the XDP buffer is
copied into a sk_buff allocated through stmmac_construct_skb_zc()
and sent to Linux network GRO inside stmmac_dispatch_skb_zc(). Free RX
buffers are then replenished using stmmac_rx_refill_zc()

v2: introduce __stmmac_disable_all_queues() to contain the original code
    that does napi_disable() and then make stmmac_setup_tc_block_cb()
    to use it. Move synchronize_rcu() into stmmac_disable_all_queues()
    that eventually calls __stmmac_disable_all_queues(). Then,
    make both stmmac_release() and stmmac_suspend() to use
    stmmac_disable_all_queues(). Thanks David Miller for spotting the
    synchronize_rcu() issue in v1 patch.

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac.h      |  18 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 600 +++++++++++++++++++++-
 drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c  |  87 ++++
 drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.h  |   3 +
 4 files changed, 679 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 8b4ff9c189a1..d7d67d94ab3f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -77,9 +77,14 @@ struct stmmac_tx_queue {
 };
 
 struct stmmac_rx_buffer {
-	struct page *page;
-	dma_addr_t addr;
-	__u32 page_offset;
+	union {
+		struct {
+			struct page *page;
+			dma_addr_t addr;
+			__u32 page_offset;
+		};
+		struct xdp_buff *xdp;
+	};
 	struct page *sec_page;
 	dma_addr_t sec_addr;
 };
@@ -88,6 +93,7 @@ struct stmmac_rx_queue {
 	u32 rx_count_frames;
 	u32 queue_index;
 	struct xdp_rxq_info xdp_rxq;
+	struct xsk_buff_pool *xsk_pool;
 	struct page_pool *page_pool;
 	struct stmmac_rx_buffer *buf_pool;
 	struct stmmac_priv *priv_data;
@@ -95,6 +101,7 @@ struct stmmac_rx_queue {
 	struct dma_desc *dma_rx ____cacheline_aligned_in_smp;
 	unsigned int cur_rx;
 	unsigned int dirty_rx;
+	unsigned int buf_alloc_num;
 	u32 rx_zeroc_thresh;
 	dma_addr_t dma_rx_phy;
 	u32 rx_tail_addr;
@@ -283,6 +290,7 @@ struct stmmac_priv {
 	struct stmmac_rss rss;
 
 	/* XDP BPF Program */
+	unsigned long *af_xdp_zc_qps;
 	struct bpf_prog *xdp_prog;
 };
 
@@ -328,6 +336,10 @@ static inline unsigned int stmmac_rx_offset(struct stmmac_priv *priv)
 	return 0;
 }
 
+void stmmac_disable_rx_queue(struct stmmac_priv *priv, u32 queue);
+void stmmac_enable_rx_queue(struct stmmac_priv *priv, u32 queue);
+int stmmac_xsk_wakeup(struct net_device *dev, u32 queue, u32 flags);
+
 #if IS_ENABLED(CONFIG_STMMAC_SELFTESTS)
 void stmmac_selftest_run(struct net_device *dev,
 			 struct ethtool_test *etest, u64 *buf);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 43c6dbcc423d..d4813956efe3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -40,6 +40,7 @@
 #include <linux/udp.h>
 #include <linux/bpf_trace.h>
 #include <net/pkt_cls.h>
+#include <net/xdp_sock_drv.h>
 #include "stmmac_ptp.h"
 #include "stmmac.h"
 #include "stmmac_xdp.h"
@@ -69,6 +70,8 @@ MODULE_PARM_DESC(phyaddr, "Physical device address");
 #define STMMAC_TX_THRESH(x)	((x)->dma_tx_size / 4)
 #define STMMAC_RX_THRESH(x)	((x)->dma_rx_size / 4)
 
+#define STMMAC_RX_FILL_BATCH		16
+
 #define STMMAC_XDP_PASS		0
 #define STMMAC_XDP_CONSUMED	BIT(0)
 #define STMMAC_XDP_TX		BIT(1)
@@ -179,11 +182,7 @@ static void stmmac_verify_args(void)
 		eee_timer = STMMAC_DEFAULT_LPI_TIMER;
 }
 
-/**
- * stmmac_disable_all_queues - Disable all queues
- * @priv: driver private structure
- */
-static void stmmac_disable_all_queues(struct stmmac_priv *priv)
+static void __stmmac_disable_all_queues(struct stmmac_priv *priv)
 {
 	u32 rx_queues_cnt = priv->plat->rx_queues_to_use;
 	u32 tx_queues_cnt = priv->plat->tx_queues_to_use;
@@ -200,6 +199,28 @@ static void stmmac_disable_all_queues(struct stmmac_priv *priv)
 	}
 }
 
+/**
+ * stmmac_disable_all_queues - Disable all queues
+ * @priv: driver private structure
+ */
+static void stmmac_disable_all_queues(struct stmmac_priv *priv)
+{
+	u32 rx_queues_cnt = priv->plat->rx_queues_to_use;
+	struct stmmac_rx_queue *rx_q;
+	u32 queue;
+
+	/* synchronize_rcu() needed for pending XDP buffers to drain */
+	for (queue = 0; queue < rx_queues_cnt; queue++) {
+		rx_q = &priv->rx_queue[queue];
+		if (rx_q->xsk_pool) {
+			synchronize_rcu();
+			break;
+		}
+	}
+
+	__stmmac_disable_all_queues(priv);
+}
+
 /**
  * stmmac_enable_all_queues - Enable all queues
  * @priv: driver private structure
@@ -1509,6 +1530,8 @@ static int stmmac_alloc_rx_buffers(struct stmmac_priv *priv, u32 queue,
 					     queue);
 		if (ret)
 			return ret;
+
+		rx_q->buf_alloc_num++;
 	}
 
 	return 0;
@@ -1539,6 +1562,56 @@ static void dma_recycle_rx_skbufs(struct stmmac_priv *priv, u32 queue)
 	}
 }
 
+/**
+ * dma_free_rx_xskbufs - free RX dma buffers from XSK pool
+ * @priv: private structure
+ * @queue: RX queue index
+ */
+static void dma_free_rx_xskbufs(struct stmmac_priv *priv, u32 queue)
+{
+	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+	int i;
+
+	for (i = 0; i < priv->dma_rx_size; i++) {
+		struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i];
+
+		if (!buf->xdp)
+			continue;
+
+		xsk_buff_free(buf->xdp);
+		buf->xdp = NULL;
+	}
+}
+
+static int stmmac_alloc_rx_buffers_zc(struct stmmac_priv *priv, u32 queue)
+{
+	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+	int i;
+
+	for (i = 0; i < priv->dma_rx_size; i++) {
+		struct stmmac_rx_buffer *buf;
+		dma_addr_t dma_addr;
+		struct dma_desc *p;
+
+		if (priv->extend_desc)
+			p = (struct dma_desc *)(rx_q->dma_erx + i);
+		else
+			p = rx_q->dma_rx + i;
+
+		buf = &rx_q->buf_pool[i];
+
+		buf->xdp = xsk_buff_alloc(rx_q->xsk_pool);
+		if (!buf->xdp)
+			return -ENOMEM;
+
+		dma_addr = xsk_buff_xdp_get_dma(buf->xdp);
+		stmmac_set_desc_addr(priv, p, dma_addr);
+		rx_q->buf_alloc_num++;
+	}
+
+	return 0;
+}
+
 /**
  * stmmac_reinit_rx_buffers - reinit the RX descriptor buffer.
  * @priv: driver private structure
@@ -1550,15 +1623,31 @@ static void stmmac_reinit_rx_buffers(struct stmmac_priv *priv)
 	u32 rx_count = priv->plat->rx_queues_to_use;
 	u32 queue;
 
-	for (queue = 0; queue < rx_count; queue++)
-		dma_recycle_rx_skbufs(priv, queue);
+	for (queue = 0; queue < rx_count; queue++) {
+		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+		if (rx_q->xsk_pool)
+			dma_free_rx_xskbufs(priv, queue);
+		else
+			dma_recycle_rx_skbufs(priv, queue);
+
+		rx_q->buf_alloc_num = 0;
+	}
 
 	for (queue = 0; queue < rx_count; queue++) {
+		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
 		int ret;
 
-		ret = stmmac_alloc_rx_buffers(priv, queue, GFP_KERNEL);
-		if (ret < 0)
-			goto err_reinit_rx_buffers;
+		if (rx_q->xsk_pool) {
+			/* RX XDP ZC buffer pool may not be populated, e.g.
+			 * xdpsock TX-only.
+			 */
+			stmmac_alloc_rx_buffers_zc(priv, queue);
+		} else {
+			ret = stmmac_alloc_rx_buffers(priv, queue, GFP_KERNEL);
+			if (ret < 0)
+				goto err_reinit_rx_buffers;
+		}
 	}
 
 	return;
@@ -1574,6 +1663,14 @@ err_reinit_rx_buffers:
 	}
 }
 
+static struct xsk_buff_pool *stmmac_get_xsk_pool(struct stmmac_priv *priv, u32 queue)
+{
+	if (!stmmac_xdp_is_enabled(priv) || !test_bit(queue, priv->af_xdp_zc_qps))
+		return NULL;
+
+	return xsk_get_pool_from_qid(priv->dev, queue);
+}
+
 /**
  * __init_dma_rx_desc_rings - init the RX descriptor ring (per queue)
  * @priv: driver private structure
@@ -1594,17 +1691,37 @@ static int __init_dma_rx_desc_rings(struct stmmac_priv *priv, u32 queue, gfp_t f
 
 	stmmac_clear_rx_descriptors(priv, queue);
 
-	WARN_ON(xdp_rxq_info_reg_mem_model(&rx_q->xdp_rxq,
-					   MEM_TYPE_PAGE_POOL,
-					   rx_q->page_pool));
+	xdp_rxq_info_unreg_mem_model(&rx_q->xdp_rxq);
 
-	netdev_info(priv->dev,
-		    "Register MEM_TYPE_PAGE_POOL RxQ-%d\n",
-		    rx_q->queue_index);
+	rx_q->xsk_pool = stmmac_get_xsk_pool(priv, queue);
 
-	ret = stmmac_alloc_rx_buffers(priv, queue, flags);
-	if (ret < 0)
-		return -ENOMEM;
+	if (rx_q->xsk_pool) {
+		WARN_ON(xdp_rxq_info_reg_mem_model(&rx_q->xdp_rxq,
+						   MEM_TYPE_XSK_BUFF_POOL,
+						   NULL));
+		netdev_info(priv->dev,
+			    "Register MEM_TYPE_XSK_BUFF_POOL RxQ-%d\n",
+			    rx_q->queue_index);
+		xsk_pool_set_rxq_info(rx_q->xsk_pool, &rx_q->xdp_rxq);
+	} else {
+		WARN_ON(xdp_rxq_info_reg_mem_model(&rx_q->xdp_rxq,
+						   MEM_TYPE_PAGE_POOL,
+						   rx_q->page_pool));
+		netdev_info(priv->dev,
+			    "Register MEM_TYPE_PAGE_POOL RxQ-%d\n",
+			    rx_q->queue_index);
+	}
+
+	if (rx_q->xsk_pool) {
+		/* RX XDP ZC buffer pool may not be populated, e.g.
+		 * xdpsock TX-only.
+		 */
+		stmmac_alloc_rx_buffers_zc(priv, queue);
+	} else {
+		ret = stmmac_alloc_rx_buffers(priv, queue, flags);
+		if (ret < 0)
+			return -ENOMEM;
+	}
 
 	rx_q->cur_rx = 0;
 	rx_q->dirty_rx = 0;
@@ -1645,7 +1762,15 @@ static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags)
 
 err_init_rx_buffers:
 	while (queue >= 0) {
-		dma_free_rx_skbufs(priv, queue);
+		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+		if (rx_q->xsk_pool)
+			dma_free_rx_xskbufs(priv, queue);
+		else
+			dma_free_rx_skbufs(priv, queue);
+
+		rx_q->buf_alloc_num = 0;
+		rx_q->xsk_pool = NULL;
 
 		if (queue == 0)
 			break;
@@ -1790,7 +1915,13 @@ static void __free_dma_rx_desc_resources(struct stmmac_priv *priv, u32 queue)
 	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
 
 	/* Release the DMA RX socket buffers */
-	dma_free_rx_skbufs(priv, queue);
+	if (rx_q->xsk_pool)
+		dma_free_rx_xskbufs(priv, queue);
+	else
+		dma_free_rx_skbufs(priv, queue);
+
+	rx_q->buf_alloc_num = 0;
+	rx_q->xsk_pool = NULL;
 
 	/* Free DMA regions of consistent memory previously allocated */
 	if (!priv->extend_desc)
@@ -2222,12 +2353,24 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
 
 	/* configure all channels */
 	for (chan = 0; chan < rx_channels_count; chan++) {
+		struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
+		u32 buf_size;
+
 		qmode = priv->plat->rx_queues_cfg[chan].mode_to_use;
 
 		stmmac_dma_rx_mode(priv, priv->ioaddr, rxmode, chan,
 				rxfifosz, qmode);
-		stmmac_set_dma_bfsize(priv, priv->ioaddr, priv->dma_buf_sz,
-				chan);
+
+		if (rx_q->xsk_pool) {
+			buf_size = xsk_pool_get_rx_frame_size(rx_q->xsk_pool);
+			stmmac_set_dma_bfsize(priv, priv->ioaddr,
+					      buf_size,
+					      chan);
+		} else {
+			stmmac_set_dma_bfsize(priv, priv->ioaddr,
+					      priv->dma_buf_sz,
+					      chan);
+		}
 	}
 
 	for (chan = 0; chan < tx_channels_count; chan++) {
@@ -2638,7 +2781,7 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 				    rx_q->dma_rx_phy, chan);
 
 		rx_q->rx_tail_addr = rx_q->dma_rx_phy +
-				     (priv->dma_rx_size *
+				     (rx_q->buf_alloc_num *
 				      sizeof(struct dma_desc));
 		stmmac_set_rx_tail_ptr(priv, priv->ioaddr,
 				       rx_q->rx_tail_addr, chan);
@@ -4479,6 +4622,302 @@ static void stmmac_finalize_xdp_rx(struct stmmac_priv *priv,
 		xdp_do_flush();
 }
 
+static struct sk_buff *stmmac_construct_skb_zc(struct stmmac_channel *ch,
+					       struct xdp_buff *xdp)
+{
+	unsigned int metasize = xdp->data - xdp->data_meta;
+	unsigned int datasize = xdp->data_end - xdp->data;
+	struct sk_buff *skb;
+
+	skb = __napi_alloc_skb(&ch->rx_napi,
+			       xdp->data_end - xdp->data_hard_start,
+			       GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(!skb))
+		return NULL;
+
+	skb_reserve(skb, xdp->data - xdp->data_hard_start);
+	memcpy(__skb_put(skb, datasize), xdp->data, datasize);
+	if (metasize)
+		skb_metadata_set(skb, metasize);
+
+	return skb;
+}
+
+static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue,
+				   struct dma_desc *p, struct dma_desc *np,
+				   struct xdp_buff *xdp)
+{
+	struct stmmac_channel *ch = &priv->channel[queue];
+	unsigned int len = xdp->data_end - xdp->data;
+	enum pkt_hash_types hash_type;
+	int coe = priv->hw->rx_csum;
+	struct sk_buff *skb;
+	u32 hash;
+
+	skb = stmmac_construct_skb_zc(ch, xdp);
+	if (!skb) {
+		priv->dev->stats.rx_dropped++;
+		return;
+	}
+
+	stmmac_get_rx_hwtstamp(priv, p, np, skb);
+	stmmac_rx_vlan(priv->dev, skb);
+	skb->protocol = eth_type_trans(skb, priv->dev);
+
+	if (unlikely(!coe))
+		skb_checksum_none_assert(skb);
+	else
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	if (!stmmac_get_rx_hash(priv, p, &hash, &hash_type))
+		skb_set_hash(skb, hash, hash_type);
+
+	skb_record_rx_queue(skb, queue);
+	napi_gro_receive(&ch->rx_napi, skb);
+
+	priv->dev->stats.rx_packets++;
+	priv->dev->stats.rx_bytes += len;
+}
+
+static bool stmmac_rx_refill_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
+{
+	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+	unsigned int entry = rx_q->dirty_rx;
+	struct dma_desc *rx_desc = NULL;
+	bool ret = true;
+
+	budget = min(budget, stmmac_rx_dirty(priv, queue));
+
+	while (budget-- > 0 && entry != rx_q->cur_rx) {
+		struct stmmac_rx_buffer *buf = &rx_q->buf_pool[entry];
+		dma_addr_t dma_addr;
+		bool use_rx_wd;
+
+		if (!buf->xdp) {
+			buf->xdp = xsk_buff_alloc(rx_q->xsk_pool);
+			if (!buf->xdp) {
+				ret = false;
+				break;
+			}
+		}
+
+		if (priv->extend_desc)
+			rx_desc = (struct dma_desc *)(rx_q->dma_erx + entry);
+		else
+			rx_desc = rx_q->dma_rx + entry;
+
+		dma_addr = xsk_buff_xdp_get_dma(buf->xdp);
+		stmmac_set_desc_addr(priv, rx_desc, dma_addr);
+		stmmac_set_desc_sec_addr(priv, rx_desc, 0, false);
+		stmmac_refill_desc3(priv, rx_q, rx_desc);
+
+		rx_q->rx_count_frames++;
+		rx_q->rx_count_frames += priv->rx_coal_frames[queue];
+		if (rx_q->rx_count_frames > priv->rx_coal_frames[queue])
+			rx_q->rx_count_frames = 0;
+
+		use_rx_wd = !priv->rx_coal_frames[queue];
+		use_rx_wd |= rx_q->rx_count_frames > 0;
+		if (!priv->use_riwt)
+			use_rx_wd = false;
+
+		dma_wmb();
+		stmmac_set_rx_owner(priv, rx_desc, use_rx_wd);
+
+		entry = STMMAC_GET_ENTRY(entry, priv->dma_rx_size);
+	}
+
+	if (rx_desc) {
+		rx_q->dirty_rx = entry;
+		rx_q->rx_tail_addr = rx_q->dma_rx_phy +
+				     (rx_q->dirty_rx * sizeof(struct dma_desc));
+		stmmac_set_rx_tail_ptr(priv, priv->ioaddr, rx_q->rx_tail_addr, queue);
+	}
+
+	return ret;
+}
+
+static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue)
+{
+	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+	unsigned int count = 0, error = 0, len = 0;
+	int dirty = stmmac_rx_dirty(priv, queue);
+	unsigned int next_entry = rx_q->cur_rx;
+	unsigned int desc_size;
+	struct bpf_prog *prog;
+	bool failure = false;
+	int xdp_status = 0;
+	int status = 0;
+
+	if (netif_msg_rx_status(priv)) {
+		void *rx_head;
+
+		netdev_dbg(priv->dev, "%s: descriptor ring:\n", __func__);
+		if (priv->extend_desc) {
+			rx_head = (void *)rx_q->dma_erx;
+			desc_size = sizeof(struct dma_extended_desc);
+		} else {
+			rx_head = (void *)rx_q->dma_rx;
+			desc_size = sizeof(struct dma_desc);
+		}
+
+		stmmac_display_ring(priv, rx_head, priv->dma_rx_size, true,
+				    rx_q->dma_rx_phy, desc_size);
+	}
+	while (count < limit) {
+		struct stmmac_rx_buffer *buf;
+		unsigned int buf1_len = 0;
+		struct dma_desc *np, *p;
+		int entry;
+		int res;
+
+		if (!count && rx_q->state_saved) {
+			error = rx_q->state.error;
+			len = rx_q->state.len;
+		} else {
+			rx_q->state_saved = false;
+			error = 0;
+			len = 0;
+		}
+
+		if (count >= limit)
+			break;
+
+read_again:
+		buf1_len = 0;
+		entry = next_entry;
+		buf = &rx_q->buf_pool[entry];
+
+		if (dirty >= STMMAC_RX_FILL_BATCH) {
+			failure = failure ||
+				  !stmmac_rx_refill_zc(priv, queue, dirty);
+			dirty = 0;
+		}
+
+		if (priv->extend_desc)
+			p = (struct dma_desc *)(rx_q->dma_erx + entry);
+		else
+			p = rx_q->dma_rx + entry;
+
+		/* read the status of the incoming frame */
+		status = stmmac_rx_status(priv, &priv->dev->stats,
+					  &priv->xstats, p);
+		/* check if managed by the DMA otherwise go ahead */
+		if (unlikely(status & dma_own))
+			break;
+
+		/* Prefetch the next RX descriptor */
+		rx_q->cur_rx = STMMAC_GET_ENTRY(rx_q->cur_rx,
+						priv->dma_rx_size);
+		next_entry = rx_q->cur_rx;
+
+		if (priv->extend_desc)
+			np = (struct dma_desc *)(rx_q->dma_erx + next_entry);
+		else
+			np = rx_q->dma_rx + next_entry;
+
+		prefetch(np);
+
+		if (priv->extend_desc)
+			stmmac_rx_extended_status(priv, &priv->dev->stats,
+						  &priv->xstats,
+						  rx_q->dma_erx + entry);
+		if (unlikely(status == discard_frame)) {
+			xsk_buff_free(buf->xdp);
+			buf->xdp = NULL;
+			dirty++;
+			error = 1;
+			if (!priv->hwts_rx_en)
+				priv->dev->stats.rx_errors++;
+		}
+
+		if (unlikely(error && (status & rx_not_ls)))
+			goto read_again;
+		if (unlikely(error)) {
+			count++;
+			continue;
+		}
+
+		/* Ensure a valid XSK buffer before proceed */
+		if (!buf->xdp)
+			break;
+
+		/* XSK pool expects RX frame 1:1 mapped to XSK buffer */
+		if (likely(status & rx_not_ls)) {
+			xsk_buff_free(buf->xdp);
+			buf->xdp = NULL;
+			dirty++;
+			count++;
+			goto read_again;
+		}
+
+		/* XDP ZC Frame only support primary buffers for now */
+		buf1_len = stmmac_rx_buf1_len(priv, p, status, len);
+		len += buf1_len;
+
+		/* ACS is set; GMAC core strips PAD/FCS for IEEE 802.3
+		 * Type frames (LLC/LLC-SNAP)
+		 *
+		 * llc_snap is never checked in GMAC >= 4, so this ACS
+		 * feature is always disabled and packets need to be
+		 * stripped manually.
+		 */
+		if (likely(!(status & rx_not_ls)) &&
+		    (likely(priv->synopsys_id >= DWMAC_CORE_4_00) ||
+		     unlikely(status != llc_snap))) {
+			buf1_len -= ETH_FCS_LEN;
+			len -= ETH_FCS_LEN;
+		}
+
+		/* RX buffer is good and fit into a XSK pool buffer */
+		buf->xdp->data_end = buf->xdp->data + buf1_len;
+		xsk_buff_dma_sync_for_cpu(buf->xdp, rx_q->xsk_pool);
+
+		rcu_read_lock();
+		prog = READ_ONCE(priv->xdp_prog);
+		res = __stmmac_xdp_run_prog(priv, prog, buf->xdp);
+		rcu_read_unlock();
+
+		switch (res) {
+		case STMMAC_XDP_PASS:
+			stmmac_dispatch_skb_zc(priv, queue, p, np, buf->xdp);
+			xsk_buff_free(buf->xdp);
+			break;
+		case STMMAC_XDP_CONSUMED:
+			xsk_buff_free(buf->xdp);
+			priv->dev->stats.rx_dropped++;
+			break;
+		case STMMAC_XDP_TX:
+		case STMMAC_XDP_REDIRECT:
+			xdp_status |= res;
+			break;
+		}
+
+		buf->xdp = NULL;
+		dirty++;
+		count++;
+	}
+
+	if (status & rx_not_ls) {
+		rx_q->state_saved = true;
+		rx_q->state.error = error;
+		rx_q->state.len = len;
+	}
+
+	stmmac_finalize_xdp_rx(priv, xdp_status);
+
+	if (xsk_uses_need_wakeup(rx_q->xsk_pool)) {
+		if (failure || stmmac_rx_dirty(priv, queue) > 0)
+			xsk_set_rx_need_wakeup(rx_q->xsk_pool);
+		else
+			xsk_clear_rx_need_wakeup(rx_q->xsk_pool);
+
+		return (int)count;
+	}
+
+	return failure ? limit : (int)count;
+}
+
 /**
  * stmmac_rx - manage the receive process
  * @priv: driver private structure
@@ -4766,12 +5205,17 @@ static int stmmac_napi_poll_rx(struct napi_struct *napi, int budget)
 	struct stmmac_channel *ch =
 		container_of(napi, struct stmmac_channel, rx_napi);
 	struct stmmac_priv *priv = ch->priv_data;
+	struct stmmac_rx_queue *rx_q;
 	u32 chan = ch->index;
 	int work_done;
 
 	priv->xstats.napi_poll++;
 
-	work_done = stmmac_rx(priv, budget, chan);
+	rx_q = &priv->rx_queue[chan];
+
+	work_done = rx_q->xsk_pool ?
+		    stmmac_rx_zc(priv, budget, chan) :
+		    stmmac_rx(priv, budget, chan);
 	if (work_done < budget && napi_complete_done(napi, work_done)) {
 		unsigned long flags;
 
@@ -5254,7 +5698,7 @@ static int stmmac_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
 	if (!tc_cls_can_offload_and_chain0(priv->dev, type_data))
 		return ret;
 
-	stmmac_disable_all_queues(priv);
+	__stmmac_disable_all_queues(priv);
 
 	switch (type) {
 	case TC_SETUP_CLSU32:
@@ -5675,6 +6119,9 @@ static int stmmac_bpf(struct net_device *dev, struct netdev_bpf *bpf)
 	switch (bpf->command) {
 	case XDP_SETUP_PROG:
 		return stmmac_xdp_set_prog(priv, bpf->prog, bpf->extack);
+	case XDP_SETUP_XSK_POOL:
+		return stmmac_xdp_setup_pool(priv, bpf->xsk.pool,
+					     bpf->xsk.queue_id);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -5722,6 +6169,102 @@ static int stmmac_xdp_xmit(struct net_device *dev, int num_frames,
 	return nxmit;
 }
 
+void stmmac_disable_rx_queue(struct stmmac_priv *priv, u32 queue)
+{
+	struct stmmac_channel *ch = &priv->channel[queue];
+	unsigned long flags;
+
+	spin_lock_irqsave(&ch->lock, flags);
+	stmmac_disable_dma_irq(priv, priv->ioaddr, queue, 1, 0);
+	spin_unlock_irqrestore(&ch->lock, flags);
+
+	stmmac_stop_rx_dma(priv, queue);
+	__free_dma_rx_desc_resources(priv, queue);
+}
+
+void stmmac_enable_rx_queue(struct stmmac_priv *priv, u32 queue)
+{
+	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+	struct stmmac_channel *ch = &priv->channel[queue];
+	unsigned long flags;
+	u32 buf_size;
+	int ret;
+
+	ret = __alloc_dma_rx_desc_resources(priv, queue);
+	if (ret) {
+		netdev_err(priv->dev, "Failed to alloc RX desc.\n");
+		return;
+	}
+
+	ret = __init_dma_rx_desc_rings(priv, queue, GFP_KERNEL);
+	if (ret) {
+		__free_dma_rx_desc_resources(priv, queue);
+		netdev_err(priv->dev, "Failed to init RX desc.\n");
+		return;
+	}
+
+	stmmac_clear_rx_descriptors(priv, queue);
+
+	stmmac_init_rx_chan(priv, priv->ioaddr, priv->plat->dma_cfg,
+			    rx_q->dma_rx_phy, rx_q->queue_index);
+
+	rx_q->rx_tail_addr = rx_q->dma_rx_phy + (rx_q->buf_alloc_num *
+			     sizeof(struct dma_desc));
+	stmmac_set_rx_tail_ptr(priv, priv->ioaddr,
+			       rx_q->rx_tail_addr, rx_q->queue_index);
+
+	if (rx_q->xsk_pool && rx_q->buf_alloc_num) {
+		buf_size = xsk_pool_get_rx_frame_size(rx_q->xsk_pool);
+		stmmac_set_dma_bfsize(priv, priv->ioaddr,
+				      buf_size,
+				      rx_q->queue_index);
+	} else {
+		stmmac_set_dma_bfsize(priv, priv->ioaddr,
+				      priv->dma_buf_sz,
+				      rx_q->queue_index);
+	}
+
+	stmmac_start_rx_dma(priv, queue);
+
+	spin_lock_irqsave(&ch->lock, flags);
+	stmmac_enable_dma_irq(priv, priv->ioaddr, queue, 1, 0);
+	spin_unlock_irqrestore(&ch->lock, flags);
+}
+
+int stmmac_xsk_wakeup(struct net_device *dev, u32 queue, u32 flags)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+	struct stmmac_rx_queue *rx_q;
+	struct stmmac_channel *ch;
+
+	if (test_bit(STMMAC_DOWN, &priv->state) ||
+	    !netif_carrier_ok(priv->dev))
+		return -ENETDOWN;
+
+	if (!stmmac_xdp_is_enabled(priv))
+		return -ENXIO;
+
+	if (queue >= priv->plat->rx_queues_to_use)
+		return -EINVAL;
+
+	rx_q = &priv->rx_queue[queue];
+	ch = &priv->channel[queue];
+
+	if (!rx_q->xsk_pool)
+		return -ENXIO;
+
+	if (flags & XDP_WAKEUP_RX &&
+	    !napi_if_scheduled_mark_missed(&ch->rx_napi)) {
+		/* EQoS does not have per-DMA channel SW interrupt,
+		 * so we schedule RX Napi straight-away.
+		 */
+		if (likely(napi_schedule_prep(&ch->rx_napi)))
+			__napi_schedule(&ch->rx_napi);
+	}
+
+	return 0;
+}
+
 static const struct net_device_ops stmmac_netdev_ops = {
 	.ndo_open = stmmac_open,
 	.ndo_start_xmit = stmmac_xmit,
@@ -5742,6 +6285,7 @@ static const struct net_device_ops stmmac_netdev_ops = {
 	.ndo_vlan_rx_kill_vid = stmmac_vlan_rx_kill_vid,
 	.ndo_bpf = stmmac_bpf,
 	.ndo_xdp_xmit = stmmac_xdp_xmit,
+	.ndo_xsk_wakeup = stmmac_xsk_wakeup,
 };
 
 static void stmmac_reset_subtask(struct stmmac_priv *priv)
@@ -6075,6 +6619,10 @@ int stmmac_dvr_probe(struct device *device,
 	/* Verify driver arguments */
 	stmmac_verify_args();
 
+	priv->af_xdp_zc_qps = bitmap_zalloc(MTL_MAX_TX_QUEUES, GFP_KERNEL);
+	if (!priv->af_xdp_zc_qps)
+		return -ENOMEM;
+
 	/* Allocate workqueue */
 	priv->wq = create_singlethread_workqueue("stmmac_wq");
 	if (!priv->wq) {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c
index bf38d231860b..caff0dfc6f4b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c
@@ -1,9 +1,96 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2021, Intel Corporation. */
 
+#include <net/xdp_sock_drv.h>
+
 #include "stmmac.h"
 #include "stmmac_xdp.h"
 
+static int stmmac_xdp_enable_pool(struct stmmac_priv *priv,
+				  struct xsk_buff_pool *pool, u16 queue)
+{
+	struct stmmac_channel *ch = &priv->channel[queue];
+	bool need_update;
+	u32 frame_size;
+	int err;
+
+	if (queue >= priv->plat->rx_queues_to_use)
+		return -EINVAL;
+
+	frame_size = xsk_pool_get_rx_frame_size(pool);
+	/* XDP ZC does not span multiple frame, make sure XSK pool buffer
+	 * size can at least store Q-in-Q frame.
+	 */
+	if (frame_size < ETH_FRAME_LEN + VLAN_HLEN * 2)
+		return -EOPNOTSUPP;
+
+	err = xsk_pool_dma_map(pool, priv->device, STMMAC_RX_DMA_ATTR);
+	if (err) {
+		netdev_err(priv->dev, "Failed to map xsk pool\n");
+		return err;
+	}
+
+	need_update = netif_running(priv->dev) && stmmac_xdp_is_enabled(priv);
+
+	if (need_update) {
+		stmmac_disable_rx_queue(priv, queue);
+		napi_disable(&ch->rx_napi);
+	}
+
+	set_bit(queue, priv->af_xdp_zc_qps);
+
+	if (need_update) {
+		napi_enable(&ch->rx_napi);
+		stmmac_enable_rx_queue(priv, queue);
+
+		err = stmmac_xsk_wakeup(priv->dev, queue, XDP_WAKEUP_RX);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int stmmac_xdp_disable_pool(struct stmmac_priv *priv, u16 queue)
+{
+	struct stmmac_channel *ch = &priv->channel[queue];
+	struct xsk_buff_pool *pool;
+	bool need_update;
+
+	if (queue >= priv->plat->rx_queues_to_use)
+		return -EINVAL;
+
+	pool = xsk_get_pool_from_qid(priv->dev, queue);
+	if (!pool)
+		return -EINVAL;
+
+	need_update = netif_running(priv->dev) && stmmac_xdp_is_enabled(priv);
+
+	if (need_update) {
+		stmmac_disable_rx_queue(priv, queue);
+		synchronize_rcu();
+		napi_disable(&ch->rx_napi);
+	}
+
+	xsk_pool_dma_unmap(pool, STMMAC_RX_DMA_ATTR);
+
+	clear_bit(queue, priv->af_xdp_zc_qps);
+
+	if (need_update) {
+		napi_enable(&ch->rx_napi);
+		stmmac_enable_rx_queue(priv, queue);
+	}
+
+	return 0;
+}
+
+int stmmac_xdp_setup_pool(struct stmmac_priv *priv, struct xsk_buff_pool *pool,
+			  u16 queue)
+{
+	return pool ? stmmac_xdp_enable_pool(priv, pool, queue) :
+		      stmmac_xdp_disable_pool(priv, queue);
+}
+
 int stmmac_xdp_set_prog(struct stmmac_priv *priv, struct bpf_prog *prog,
 			struct netlink_ext_ack *extack)
 {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.h
index 93948569d92a..896dc987d4ef 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.h
@@ -5,7 +5,10 @@
 #define _STMMAC_XDP_H_
 
 #define STMMAC_MAX_RX_BUF_SIZE(num)	(((num) * PAGE_SIZE) - XDP_PACKET_HEADROOM)
+#define STMMAC_RX_DMA_ATTR	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
 
+int stmmac_xdp_setup_pool(struct stmmac_priv *priv, struct xsk_buff_pool *pool,
+			  u16 queue);
 int stmmac_xdp_set_prog(struct stmmac_priv *priv, struct bpf_prog *prog,
 			struct netlink_ext_ack *extack);
 
-- 
cgit v1.2.3


From 132c32ee5bc09b1d2fa518f3b38a01f4b859b6df Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Tue, 13 Apr 2021 17:36:26 +0800
Subject: net: stmmac: Add TX via XDP zero-copy socket

We add the support of XDP ZC TX submission and cleaning into
stmmac_tx_clean(). The function is made to clean as many TX complete
frames as possible, i.e. limit by priv->dma_tx_size instead of NAPI
budget. For TX ring that is associated with XSK pool, the function
stmmac_xdp_xmit_zc() is introduced to TX frame buffers from XSK pool by
using xsk_tx_peek_desc(). To make stmmac_tx_clean() support the cleaning
of XSK TX frames, STMMAC_TXBUF_T_XSK_TX TX buffer type is introduced.

As stmmac_tx_clean() uses the return value to cue whether NAPI function
should continue to poll, we augment the caller of stmmac_tx_clean() to
pass NAPI budget instead of priv->dma_tx_size through 'budget' input and
made stmmac_tx_clean() to always clean up-to the TX ring size instead.
This allows us to use the return boolean status of stmmac_xdp_xmit_zc()
to decide if XSK TX work is done or not: If true, set 'xmits' to return
'budget - 1' so that NAPI poll may exit. Else, set 'xmits' to return
'budget' to make NAPI poll continue to poll since XSK TX work is not
done. Finally, at the end of stmmac_tx_clean(), the function now take
a maximum value between 'count' and 'xmits' so that status from both
TX cleaning and XSK TX (only for XDP ZC) is considered.

This patch adds a new NAPI poll called stmmac_napi_poll_rxtx() that is
meant to be enabled/disabled for RX and TX ring that are bound to XSK
pool. This NAPI poll function starts with cleaning TX ring, then submits
XSK TX frames to TX ring before proceed to perform RX operations, i.e.
, receiving RX frames and replenishing RX ring with RX free buffers
obtained from XSK pool. Therefore, during XSK RX and TX setup, the driver
enables stmmac_napi_poll_rxtx() for RX and TX operations, then during
XSK RX and TX pool tear-down, the driver reenables the exisiting
independent NAPI poll functions accordingly: stmmac_napi_poll_rx() and
stmmac_napi_poll_tx().

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac.h      |   6 +
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 317 ++++++++++++++++++++--
 drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c  |  16 +-
 3 files changed, 310 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index d7d67d94ab3f..b8a42260066d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -40,6 +40,7 @@ enum stmmac_txbuf_type {
 	STMMAC_TXBUF_T_SKB,
 	STMMAC_TXBUF_T_XDP_TX,
 	STMMAC_TXBUF_T_XDP_NDO,
+	STMMAC_TXBUF_T_XSK_TX,
 };
 
 struct stmmac_tx_info {
@@ -69,6 +70,8 @@ struct stmmac_tx_queue {
 		struct xdp_frame **xdpf;
 	};
 	struct stmmac_tx_info *tx_skbuff_dma;
+	struct xsk_buff_pool *xsk_pool;
+	u32 xsk_frames_done;
 	unsigned int cur_tx;
 	unsigned int dirty_tx;
 	dma_addr_t dma_tx_phy;
@@ -116,6 +119,7 @@ struct stmmac_rx_queue {
 struct stmmac_channel {
 	struct napi_struct rx_napi ____cacheline_aligned_in_smp;
 	struct napi_struct tx_napi ____cacheline_aligned_in_smp;
+	struct napi_struct rxtx_napi ____cacheline_aligned_in_smp;
 	struct stmmac_priv *priv_data;
 	spinlock_t lock;
 	u32 index;
@@ -338,6 +342,8 @@ static inline unsigned int stmmac_rx_offset(struct stmmac_priv *priv)
 
 void stmmac_disable_rx_queue(struct stmmac_priv *priv, u32 queue);
 void stmmac_enable_rx_queue(struct stmmac_priv *priv, u32 queue);
+void stmmac_disable_tx_queue(struct stmmac_priv *priv, u32 queue);
+void stmmac_enable_tx_queue(struct stmmac_priv *priv, u32 queue);
 int stmmac_xsk_wakeup(struct net_device *dev, u32 queue, u32 flags);
 
 #if IS_ENABLED(CONFIG_STMMAC_SELFTESTS)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index d4813956efe3..e3e22200a4fd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -70,6 +70,9 @@ MODULE_PARM_DESC(phyaddr, "Physical device address");
 #define STMMAC_TX_THRESH(x)	((x)->dma_tx_size / 4)
 #define STMMAC_RX_THRESH(x)	((x)->dma_rx_size / 4)
 
+/* Limit to make sure XDP TX and slow path can coexist */
+#define STMMAC_XSK_TX_BUDGET_MAX	256
+#define STMMAC_TX_XSK_AVAIL		16
 #define STMMAC_RX_FILL_BATCH		16
 
 #define STMMAC_XDP_PASS		0
@@ -120,6 +123,8 @@ static irqreturn_t stmmac_mac_interrupt(int irq, void *dev_id);
 static irqreturn_t stmmac_safety_interrupt(int irq, void *dev_id);
 static irqreturn_t stmmac_msi_intr_tx(int irq, void *data);
 static irqreturn_t stmmac_msi_intr_rx(int irq, void *data);
+static void stmmac_tx_timer_arm(struct stmmac_priv *priv, u32 queue);
+static void stmmac_flush_tx_descriptors(struct stmmac_priv *priv, int queue);
 
 #ifdef CONFIG_DEBUG_FS
 static const struct net_device_ops stmmac_netdev_ops;
@@ -192,6 +197,12 @@ static void __stmmac_disable_all_queues(struct stmmac_priv *priv)
 	for (queue = 0; queue < maxq; queue++) {
 		struct stmmac_channel *ch = &priv->channel[queue];
 
+		if (stmmac_xdp_is_enabled(priv) &&
+		    test_bit(queue, priv->af_xdp_zc_qps)) {
+			napi_disable(&ch->rxtx_napi);
+			continue;
+		}
+
 		if (queue < rx_queues_cnt)
 			napi_disable(&ch->rx_napi);
 		if (queue < tx_queues_cnt)
@@ -235,6 +246,12 @@ static void stmmac_enable_all_queues(struct stmmac_priv *priv)
 	for (queue = 0; queue < maxq; queue++) {
 		struct stmmac_channel *ch = &priv->channel[queue];
 
+		if (stmmac_xdp_is_enabled(priv) &&
+		    test_bit(queue, priv->af_xdp_zc_qps)) {
+			napi_enable(&ch->rxtx_napi);
+			continue;
+		}
+
 		if (queue < rx_queues_cnt)
 			napi_enable(&ch->rx_napi);
 		if (queue < tx_queues_cnt)
@@ -1488,6 +1505,9 @@ static void stmmac_free_tx_buffer(struct stmmac_priv *priv, u32 queue, int i)
 		tx_q->xdpf[i] = NULL;
 	}
 
+	if (tx_q->tx_skbuff_dma[i].buf_type == STMMAC_TXBUF_T_XSK_TX)
+		tx_q->xsk_frames_done++;
+
 	if (tx_q->tx_skbuff[i] &&
 	    tx_q->tx_skbuff_dma[i].buf_type == STMMAC_TXBUF_T_SKB) {
 		dev_kfree_skb_any(tx_q->tx_skbuff[i]);
@@ -1810,6 +1830,8 @@ static int __init_dma_tx_desc_rings(struct stmmac_priv *priv, u32 queue)
 					 priv->dma_tx_size, 0);
 	}
 
+	tx_q->xsk_pool = stmmac_get_xsk_pool(priv, queue);
+
 	for (i = 0; i < priv->dma_tx_size; i++) {
 		struct dma_desc *p;
 
@@ -1886,10 +1908,19 @@ static int init_dma_desc_rings(struct net_device *dev, gfp_t flags)
  */
 static void dma_free_tx_skbufs(struct stmmac_priv *priv, u32 queue)
 {
+	struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
 	int i;
 
+	tx_q->xsk_frames_done = 0;
+
 	for (i = 0; i < priv->dma_tx_size; i++)
 		stmmac_free_tx_buffer(priv, queue, i);
+
+	if (tx_q->xsk_pool && tx_q->xsk_frames_done) {
+		xsk_tx_completed(tx_q->xsk_pool, tx_q->xsk_frames_done);
+		tx_q->xsk_frames_done = 0;
+		tx_q->xsk_pool = NULL;
+	}
 }
 
 /**
@@ -2010,6 +2041,7 @@ static int __alloc_dma_rx_desc_resources(struct stmmac_priv *priv, u32 queue)
 	bool xdp_prog = stmmac_xdp_is_enabled(priv);
 	struct page_pool_params pp_params = { 0 };
 	unsigned int num_pages;
+	unsigned int napi_id;
 	int ret;
 
 	rx_q->queue_index = queue;
@@ -2057,9 +2089,15 @@ static int __alloc_dma_rx_desc_resources(struct stmmac_priv *priv, u32 queue)
 			return -ENOMEM;
 	}
 
+	if (stmmac_xdp_is_enabled(priv) &&
+	    test_bit(queue, priv->af_xdp_zc_qps))
+		napi_id = ch->rxtx_napi.napi_id;
+	else
+		napi_id = ch->rx_napi.napi_id;
+
 	ret = xdp_rxq_info_reg(&rx_q->xdp_rxq, priv->dev,
 			       rx_q->queue_index,
-			       ch->rx_napi.napi_id);
+			       napi_id);
 	if (ret) {
 		netdev_err(priv->dev, "Failed to register xdp rxq info\n");
 		return -EINVAL;
@@ -2381,6 +2419,101 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
 	}
 }
 
+static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
+{
+	struct netdev_queue *nq = netdev_get_tx_queue(priv->dev, queue);
+	struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+	struct xsk_buff_pool *pool = tx_q->xsk_pool;
+	unsigned int entry = tx_q->cur_tx;
+	struct dma_desc *tx_desc = NULL;
+	struct xdp_desc xdp_desc;
+	bool work_done = true;
+
+	/* Avoids TX time-out as we are sharing with slow path */
+	nq->trans_start = jiffies;
+
+	budget = min(budget, stmmac_tx_avail(priv, queue));
+
+	while (budget-- > 0) {
+		dma_addr_t dma_addr;
+		bool set_ic;
+
+		/* We are sharing with slow path and stop XSK TX desc submission when
+		 * available TX ring is less than threshold.
+		 */
+		if (unlikely(stmmac_tx_avail(priv, queue) < STMMAC_TX_XSK_AVAIL) ||
+		    !netif_carrier_ok(priv->dev)) {
+			work_done = false;
+			break;
+		}
+
+		if (!xsk_tx_peek_desc(pool, &xdp_desc))
+			break;
+
+		if (likely(priv->extend_desc))
+			tx_desc = (struct dma_desc *)(tx_q->dma_etx + entry);
+		else if (tx_q->tbs & STMMAC_TBS_AVAIL)
+			tx_desc = &tx_q->dma_entx[entry].basic;
+		else
+			tx_desc = tx_q->dma_tx + entry;
+
+		dma_addr = xsk_buff_raw_get_dma(pool, xdp_desc.addr);
+		xsk_buff_raw_dma_sync_for_device(pool, dma_addr, xdp_desc.len);
+
+		tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_XSK_TX;
+
+		/* To return XDP buffer to XSK pool, we simple call
+		 * xsk_tx_completed(), so we don't need to fill up
+		 * 'buf' and 'xdpf'.
+		 */
+		tx_q->tx_skbuff_dma[entry].buf = 0;
+		tx_q->xdpf[entry] = NULL;
+
+		tx_q->tx_skbuff_dma[entry].map_as_page = false;
+		tx_q->tx_skbuff_dma[entry].len = xdp_desc.len;
+		tx_q->tx_skbuff_dma[entry].last_segment = true;
+		tx_q->tx_skbuff_dma[entry].is_jumbo = false;
+
+		stmmac_set_desc_addr(priv, tx_desc, dma_addr);
+
+		tx_q->tx_count_frames++;
+
+		if (!priv->tx_coal_frames[queue])
+			set_ic = false;
+		else if (tx_q->tx_count_frames % priv->tx_coal_frames[queue] == 0)
+			set_ic = true;
+		else
+			set_ic = false;
+
+		if (set_ic) {
+			tx_q->tx_count_frames = 0;
+			stmmac_set_tx_ic(priv, tx_desc);
+			priv->xstats.tx_set_ic_bit++;
+		}
+
+		stmmac_prepare_tx_desc(priv, tx_desc, 1, xdp_desc.len,
+				       true, priv->mode, true, true,
+				       xdp_desc.len);
+
+		stmmac_enable_dma_transmission(priv, priv->ioaddr);
+
+		tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, priv->dma_tx_size);
+		entry = tx_q->cur_tx;
+	}
+
+	if (tx_desc) {
+		stmmac_flush_tx_descriptors(priv, queue);
+		xsk_tx_release(pool);
+	}
+
+	/* Return true if all of the 3 conditions are met
+	 *  a) TX Budget is still available
+	 *  b) work_done = true when XSK TX desc peek is empty (no more
+	 *     pending XSK TX for transmission)
+	 */
+	return !!budget && work_done;
+}
+
 /**
  * stmmac_tx_clean - to manage the transmission completion
  * @priv: driver private structure
@@ -2392,14 +2525,18 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 {
 	struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
 	unsigned int bytes_compl = 0, pkts_compl = 0;
-	unsigned int entry, count = 0;
+	unsigned int entry, xmits = 0, count = 0;
 
 	__netif_tx_lock_bh(netdev_get_tx_queue(priv->dev, queue));
 
 	priv->xstats.tx_clean++;
 
+	tx_q->xsk_frames_done = 0;
+
 	entry = tx_q->dirty_tx;
-	while ((entry != tx_q->cur_tx) && (count < budget)) {
+
+	/* Try to clean all TX complete frame in 1 shot */
+	while ((entry != tx_q->cur_tx) && count < priv->dma_tx_size) {
 		struct xdp_frame *xdpf;
 		struct sk_buff *skb;
 		struct dma_desc *p;
@@ -2484,6 +2621,9 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 			tx_q->xdpf[entry] = NULL;
 		}
 
+		if (tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_XSK_TX)
+			tx_q->xsk_frames_done++;
+
 		if (tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_SKB) {
 			if (likely(skb)) {
 				pkts_compl++;
@@ -2511,6 +2651,28 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 		netif_tx_wake_queue(netdev_get_tx_queue(priv->dev, queue));
 	}
 
+	if (tx_q->xsk_pool) {
+		bool work_done;
+
+		if (tx_q->xsk_frames_done)
+			xsk_tx_completed(tx_q->xsk_pool, tx_q->xsk_frames_done);
+
+		if (xsk_uses_need_wakeup(tx_q->xsk_pool))
+			xsk_set_tx_need_wakeup(tx_q->xsk_pool);
+
+		/* For XSK TX, we try to send as many as possible.
+		 * If XSK work done (XSK TX desc empty and budget still
+		 * available), return "budget - 1" to reenable TX IRQ.
+		 * Else, return "budget" to make NAPI continue polling.
+		 */
+		work_done = stmmac_xdp_xmit_zc(priv, queue,
+					       STMMAC_XSK_TX_BUDGET_MAX);
+		if (work_done)
+			xmits = budget - 1;
+		else
+			xmits = budget;
+	}
+
 	if (priv->eee_enabled && !priv->tx_path_in_lpi_mode &&
 	    priv->eee_sw_timer_en) {
 		stmmac_enable_eee_mode(priv);
@@ -2525,7 +2687,8 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 
 	__netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue));
 
-	return count;
+	/* Combine decisions from TX clean and XSK TX */
+	return max(count, xmits);
 }
 
 /**
@@ -2607,24 +2770,31 @@ static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan, u32 dir)
 {
 	int status = stmmac_dma_interrupt_status(priv, priv->ioaddr,
 						 &priv->xstats, chan, dir);
+	struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
+	struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
 	struct stmmac_channel *ch = &priv->channel[chan];
+	struct napi_struct *rx_napi;
+	struct napi_struct *tx_napi;
 	unsigned long flags;
 
+	rx_napi = rx_q->xsk_pool ? &ch->rxtx_napi : &ch->rx_napi;
+	tx_napi = tx_q->xsk_pool ? &ch->rxtx_napi : &ch->tx_napi;
+
 	if ((status & handle_rx) && (chan < priv->plat->rx_queues_to_use)) {
-		if (napi_schedule_prep(&ch->rx_napi)) {
+		if (napi_schedule_prep(rx_napi)) {
 			spin_lock_irqsave(&ch->lock, flags);
 			stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 1, 0);
 			spin_unlock_irqrestore(&ch->lock, flags);
-			__napi_schedule(&ch->rx_napi);
+			__napi_schedule(rx_napi);
 		}
 	}
 
 	if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use)) {
-		if (napi_schedule_prep(&ch->tx_napi)) {
+		if (napi_schedule_prep(tx_napi)) {
 			spin_lock_irqsave(&ch->lock, flags);
 			stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 0, 1);
 			spin_unlock_irqrestore(&ch->lock, flags);
-			__napi_schedule(&ch->tx_napi);
+			__napi_schedule(tx_napi);
 		}
 	}
 
@@ -2822,16 +2992,18 @@ static enum hrtimer_restart stmmac_tx_timer(struct hrtimer *t)
 	struct stmmac_tx_queue *tx_q = container_of(t, struct stmmac_tx_queue, txtimer);
 	struct stmmac_priv *priv = tx_q->priv_data;
 	struct stmmac_channel *ch;
+	struct napi_struct *napi;
 
 	ch = &priv->channel[tx_q->queue_index];
+	napi = tx_q->xsk_pool ? &ch->rxtx_napi : &ch->tx_napi;
 
-	if (likely(napi_schedule_prep(&ch->tx_napi))) {
+	if (likely(napi_schedule_prep(napi))) {
 		unsigned long flags;
 
 		spin_lock_irqsave(&ch->lock, flags);
 		stmmac_disable_dma_irq(priv, priv->ioaddr, ch->index, 0, 1);
 		spin_unlock_irqrestore(&ch->lock, flags);
-		__napi_schedule(&ch->tx_napi);
+		__napi_schedule(napi);
 	}
 
 	return HRTIMER_NORESTART;
@@ -4629,7 +4801,7 @@ static struct sk_buff *stmmac_construct_skb_zc(struct stmmac_channel *ch,
 	unsigned int datasize = xdp->data_end - xdp->data;
 	struct sk_buff *skb;
 
-	skb = __napi_alloc_skb(&ch->rx_napi,
+	skb = __napi_alloc_skb(&ch->rxtx_napi,
 			       xdp->data_end - xdp->data_hard_start,
 			       GFP_ATOMIC | __GFP_NOWARN);
 	if (unlikely(!skb))
@@ -4673,7 +4845,7 @@ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue,
 		skb_set_hash(skb, hash, hash_type);
 
 	skb_record_rx_queue(skb, queue);
-	napi_gro_receive(&ch->rx_napi, skb);
+	napi_gro_receive(&ch->rxtx_napi, skb);
 
 	priv->dev->stats.rx_packets++;
 	priv->dev->stats.rx_bytes += len;
@@ -5205,17 +5377,12 @@ static int stmmac_napi_poll_rx(struct napi_struct *napi, int budget)
 	struct stmmac_channel *ch =
 		container_of(napi, struct stmmac_channel, rx_napi);
 	struct stmmac_priv *priv = ch->priv_data;
-	struct stmmac_rx_queue *rx_q;
 	u32 chan = ch->index;
 	int work_done;
 
 	priv->xstats.napi_poll++;
 
-	rx_q = &priv->rx_queue[chan];
-
-	work_done = rx_q->xsk_pool ?
-		    stmmac_rx_zc(priv, budget, chan) :
-		    stmmac_rx(priv, budget, chan);
+	work_done = stmmac_rx(priv, budget, chan);
 	if (work_done < budget && napi_complete_done(napi, work_done)) {
 		unsigned long flags;
 
@@ -5237,7 +5404,7 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
 
 	priv->xstats.napi_poll++;
 
-	work_done = stmmac_tx_clean(priv, priv->dma_tx_size, chan);
+	work_done = stmmac_tx_clean(priv, budget, chan);
 	work_done = min(work_done, budget);
 
 	if (work_done < budget && napi_complete_done(napi, work_done)) {
@@ -5251,6 +5418,42 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
 	return work_done;
 }
 
+static int stmmac_napi_poll_rxtx(struct napi_struct *napi, int budget)
+{
+	struct stmmac_channel *ch =
+		container_of(napi, struct stmmac_channel, rxtx_napi);
+	struct stmmac_priv *priv = ch->priv_data;
+	int rx_done, tx_done;
+	u32 chan = ch->index;
+
+	priv->xstats.napi_poll++;
+
+	tx_done = stmmac_tx_clean(priv, budget, chan);
+	tx_done = min(tx_done, budget);
+
+	rx_done = stmmac_rx_zc(priv, budget, chan);
+
+	/* If either TX or RX work is not complete, return budget
+	 * and keep pooling
+	 */
+	if (tx_done >= budget || rx_done >= budget)
+		return budget;
+
+	/* all work done, exit the polling mode */
+	if (napi_complete_done(napi, rx_done)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&ch->lock, flags);
+		/* Both RX and TX work done are compelte,
+		 * so enable both RX & TX IRQs.
+		 */
+		stmmac_enable_dma_irq(priv, priv->ioaddr, chan, 1, 1);
+		spin_unlock_irqrestore(&ch->lock, flags);
+	}
+
+	return min(rx_done, budget - 1);
+}
+
 /**
  *  stmmac_tx_timeout
  *  @dev : Pointer to net device structure
@@ -6231,10 +6434,63 @@ void stmmac_enable_rx_queue(struct stmmac_priv *priv, u32 queue)
 	spin_unlock_irqrestore(&ch->lock, flags);
 }
 
+void stmmac_disable_tx_queue(struct stmmac_priv *priv, u32 queue)
+{
+	struct stmmac_channel *ch = &priv->channel[queue];
+	unsigned long flags;
+
+	spin_lock_irqsave(&ch->lock, flags);
+	stmmac_disable_dma_irq(priv, priv->ioaddr, queue, 0, 1);
+	spin_unlock_irqrestore(&ch->lock, flags);
+
+	stmmac_stop_tx_dma(priv, queue);
+	__free_dma_tx_desc_resources(priv, queue);
+}
+
+void stmmac_enable_tx_queue(struct stmmac_priv *priv, u32 queue)
+{
+	struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+	struct stmmac_channel *ch = &priv->channel[queue];
+	unsigned long flags;
+	int ret;
+
+	ret = __alloc_dma_tx_desc_resources(priv, queue);
+	if (ret) {
+		netdev_err(priv->dev, "Failed to alloc TX desc.\n");
+		return;
+	}
+
+	ret = __init_dma_tx_desc_rings(priv, queue);
+	if (ret) {
+		__free_dma_tx_desc_resources(priv, queue);
+		netdev_err(priv->dev, "Failed to init TX desc.\n");
+		return;
+	}
+
+	stmmac_clear_tx_descriptors(priv, queue);
+
+	stmmac_init_tx_chan(priv, priv->ioaddr, priv->plat->dma_cfg,
+			    tx_q->dma_tx_phy, tx_q->queue_index);
+
+	if (tx_q->tbs & STMMAC_TBS_AVAIL)
+		stmmac_enable_tbs(priv, priv->ioaddr, 1, tx_q->queue_index);
+
+	tx_q->tx_tail_addr = tx_q->dma_tx_phy;
+	stmmac_set_tx_tail_ptr(priv, priv->ioaddr,
+			       tx_q->tx_tail_addr, tx_q->queue_index);
+
+	stmmac_start_tx_dma(priv, queue);
+
+	spin_lock_irqsave(&ch->lock, flags);
+	stmmac_enable_dma_irq(priv, priv->ioaddr, queue, 0, 1);
+	spin_unlock_irqrestore(&ch->lock, flags);
+}
+
 int stmmac_xsk_wakeup(struct net_device *dev, u32 queue, u32 flags)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
 	struct stmmac_rx_queue *rx_q;
+	struct stmmac_tx_queue *tx_q;
 	struct stmmac_channel *ch;
 
 	if (test_bit(STMMAC_DOWN, &priv->state) ||
@@ -6244,22 +6500,23 @@ int stmmac_xsk_wakeup(struct net_device *dev, u32 queue, u32 flags)
 	if (!stmmac_xdp_is_enabled(priv))
 		return -ENXIO;
 
-	if (queue >= priv->plat->rx_queues_to_use)
+	if (queue >= priv->plat->rx_queues_to_use ||
+	    queue >= priv->plat->tx_queues_to_use)
 		return -EINVAL;
 
 	rx_q = &priv->rx_queue[queue];
+	tx_q = &priv->tx_queue[queue];
 	ch = &priv->channel[queue];
 
-	if (!rx_q->xsk_pool)
+	if (!rx_q->xsk_pool && !tx_q->xsk_pool)
 		return -ENXIO;
 
-	if (flags & XDP_WAKEUP_RX &&
-	    !napi_if_scheduled_mark_missed(&ch->rx_napi)) {
+	if (!napi_if_scheduled_mark_missed(&ch->rxtx_napi)) {
 		/* EQoS does not have per-DMA channel SW interrupt,
 		 * so we schedule RX Napi straight-away.
 		 */
-		if (likely(napi_schedule_prep(&ch->rx_napi)))
-			__napi_schedule(&ch->rx_napi);
+		if (likely(napi_schedule_prep(&ch->rxtx_napi)))
+			__napi_schedule(&ch->rxtx_napi);
 	}
 
 	return 0;
@@ -6444,6 +6701,12 @@ static void stmmac_napi_add(struct net_device *dev)
 					  stmmac_napi_poll_tx,
 					  NAPI_POLL_WEIGHT);
 		}
+		if (queue < priv->plat->rx_queues_to_use &&
+		    queue < priv->plat->tx_queues_to_use) {
+			netif_napi_add(dev, &ch->rxtx_napi,
+				       stmmac_napi_poll_rxtx,
+				       NAPI_POLL_WEIGHT);
+		}
 	}
 }
 
@@ -6461,6 +6724,10 @@ static void stmmac_napi_del(struct net_device *dev)
 			netif_napi_del(&ch->rx_napi);
 		if (queue < priv->plat->tx_queues_to_use)
 			netif_napi_del(&ch->tx_napi);
+		if (queue < priv->plat->rx_queues_to_use &&
+		    queue < priv->plat->tx_queues_to_use) {
+			netif_napi_del(&ch->rxtx_napi);
+		}
 	}
 }
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c
index caff0dfc6f4b..105821b53020 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c
@@ -14,7 +14,8 @@ static int stmmac_xdp_enable_pool(struct stmmac_priv *priv,
 	u32 frame_size;
 	int err;
 
-	if (queue >= priv->plat->rx_queues_to_use)
+	if (queue >= priv->plat->rx_queues_to_use ||
+	    queue >= priv->plat->tx_queues_to_use)
 		return -EINVAL;
 
 	frame_size = xsk_pool_get_rx_frame_size(pool);
@@ -34,14 +35,17 @@ static int stmmac_xdp_enable_pool(struct stmmac_priv *priv,
 
 	if (need_update) {
 		stmmac_disable_rx_queue(priv, queue);
+		stmmac_disable_tx_queue(priv, queue);
 		napi_disable(&ch->rx_napi);
+		napi_disable(&ch->tx_napi);
 	}
 
 	set_bit(queue, priv->af_xdp_zc_qps);
 
 	if (need_update) {
-		napi_enable(&ch->rx_napi);
+		napi_enable(&ch->rxtx_napi);
 		stmmac_enable_rx_queue(priv, queue);
+		stmmac_enable_tx_queue(priv, queue);
 
 		err = stmmac_xsk_wakeup(priv->dev, queue, XDP_WAKEUP_RX);
 		if (err)
@@ -57,7 +61,8 @@ static int stmmac_xdp_disable_pool(struct stmmac_priv *priv, u16 queue)
 	struct xsk_buff_pool *pool;
 	bool need_update;
 
-	if (queue >= priv->plat->rx_queues_to_use)
+	if (queue >= priv->plat->rx_queues_to_use ||
+	    queue >= priv->plat->tx_queues_to_use)
 		return -EINVAL;
 
 	pool = xsk_get_pool_from_qid(priv->dev, queue);
@@ -68,8 +73,9 @@ static int stmmac_xdp_disable_pool(struct stmmac_priv *priv, u16 queue)
 
 	if (need_update) {
 		stmmac_disable_rx_queue(priv, queue);
+		stmmac_disable_tx_queue(priv, queue);
 		synchronize_rcu();
-		napi_disable(&ch->rx_napi);
+		napi_disable(&ch->rxtx_napi);
 	}
 
 	xsk_pool_dma_unmap(pool, STMMAC_RX_DMA_ATTR);
@@ -78,7 +84,9 @@ static int stmmac_xdp_disable_pool(struct stmmac_priv *priv, u16 queue)
 
 	if (need_update) {
 		napi_enable(&ch->rx_napi);
+		napi_enable(&ch->tx_napi);
 		stmmac_enable_rx_queue(priv, queue);
+		stmmac_enable_tx_queue(priv, queue);
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 5871d0c6b8ea805916c3135d0c53b095315bc674 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 13 Apr 2021 13:47:59 +0300
Subject: ionic: return -EFAULT if copy_to_user() fails

The copy_to_user() function returns the number of bytes that it wasn't
able to copy.  We want to return -EFAULT to the user.

Fixes: fee6efce565d ("ionic: add hw timestamp support files")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_phc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_phc.c b/drivers/net/ethernet/pensando/ionic/ionic_phc.c
index 177dbf89affd..a87c87e86aef 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_phc.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_phc.c
@@ -225,7 +225,9 @@ int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr)
 	memcpy(&config, &lif->phc->ts_config, sizeof(config));
 	mutex_unlock(&lif->phc->config_lock);
 
-	return copy_to_user(ifr->ifr_data, &config, sizeof(config));
+	if (copy_to_user(ifr->ifr_data, &config, sizeof(config)))
+		return -EFAULT;
+	return 0;
 }
 
 static u64 ionic_hwstamp_read(struct ionic *ionic,
-- 
cgit v1.2.3


From 1b0f14b6c218238de8e1e3ebb06a4efad58043b5 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Tue, 13 Apr 2021 16:24:44 +0300
Subject: dpaa2-switch: create a central dpaa2_switch_acl_tbl structure

Introduce a new structure - dpaa2_switch_acl_tbl - to hold all data
related to an ACL table: number of rules added, ACL table id, etc.
This will be used more in the next patches when adding support for
sharing an ACL table between ports.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/freescale/dpaa2/dpaa2-switch.c    | 44 ++++++++++++++++++----
 .../net/ethernet/freescale/dpaa2/dpaa2-switch.h    | 10 ++++-
 2 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 80efc8116963..351ee8f7461c 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -40,6 +40,17 @@ static struct dpaa2_switch_fdb *dpaa2_switch_fdb_get_unused(struct ethsw_core *e
 	return NULL;
 }
 
+static struct dpaa2_switch_acl_tbl *
+dpaa2_switch_acl_tbl_get_unused(struct ethsw_core *ethsw)
+{
+	int i;
+
+	for (i = 0; i < ethsw->sw_attr.num_ifs; i++)
+		if (!ethsw->acls[i].in_use)
+			return &ethsw->acls[i];
+	return NULL;
+}
+
 static u16 dpaa2_switch_port_set_fdb(struct ethsw_port_priv *port_priv,
 				     struct net_device *bridge_dev)
 {
@@ -2689,7 +2700,7 @@ static int dpaa2_switch_port_trap_mac_addr(struct ethsw_port_priv *port_priv,
 	acl_h = &acl_key.match;
 	acl_m = &acl_key.mask;
 
-	if (port_priv->acl_num_rules >= DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES) {
+	if (port_priv->acl_tbl->num_rules >= DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES) {
 		netdev_err(netdev, "ACL full\n");
 		return -ENOMEM;
 	}
@@ -2707,7 +2718,7 @@ static int dpaa2_switch_port_trap_mac_addr(struct ethsw_port_priv *port_priv,
 	dpsw_acl_prepare_entry_cfg(&acl_key, cmd_buff);
 
 	memset(&acl_entry_cfg, 0, sizeof(acl_entry_cfg));
-	acl_entry_cfg.precedence = port_priv->acl_num_rules;
+	acl_entry_cfg.precedence = port_priv->acl_tbl->num_rules;
 	acl_entry_cfg.result.action = DPSW_ACL_ACTION_REDIRECT_TO_CTRL_IF;
 	acl_entry_cfg.key_iova = dma_map_single(dev, cmd_buff,
 						DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE,
@@ -2719,7 +2730,7 @@ static int dpaa2_switch_port_trap_mac_addr(struct ethsw_port_priv *port_priv,
 
 	err = dpsw_acl_add_entry(port_priv->ethsw_data->mc_io, 0,
 				 port_priv->ethsw_data->dpsw_handle,
-				 port_priv->acl_tbl, &acl_entry_cfg);
+				 port_priv->acl_tbl->id, &acl_entry_cfg);
 
 	dma_unmap_single(dev, acl_entry_cfg.key_iova, sizeof(cmd_buff),
 			 DMA_TO_DEVICE);
@@ -2728,7 +2739,7 @@ static int dpaa2_switch_port_trap_mac_addr(struct ethsw_port_priv *port_priv,
 		return err;
 	}
 
-	port_priv->acl_num_rules++;
+	port_priv->acl_tbl->num_rules++;
 
 	return 0;
 }
@@ -2743,12 +2754,13 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
 	};
 	struct net_device *netdev = port_priv->netdev;
 	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct dpaa2_switch_acl_tbl *acl_tbl;
 	struct dpsw_fdb_cfg fdb_cfg = {0};
 	struct dpsw_acl_if_cfg acl_if_cfg;
 	struct dpsw_if_attr dpsw_if_attr;
 	struct dpaa2_switch_fdb *fdb;
 	struct dpsw_acl_cfg acl_cfg;
-	u16 fdb_id;
+	u16 fdb_id, acl_tbl_id;
 	int err;
 
 	/* Get the Tx queue for this specific port */
@@ -2792,7 +2804,7 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
 	/* Create an ACL table to be used by this switch port */
 	acl_cfg.max_entries = DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES;
 	err = dpsw_acl_add(ethsw->mc_io, 0, ethsw->dpsw_handle,
-			   &port_priv->acl_tbl, &acl_cfg);
+			   &acl_tbl_id, &acl_cfg);
 	if (err) {
 		netdev_err(netdev, "dpsw_acl_add err %d\n", err);
 		return err;
@@ -2801,13 +2813,19 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
 	acl_if_cfg.if_id[0] = port_priv->idx;
 	acl_if_cfg.num_ifs = 1;
 	err = dpsw_acl_add_if(ethsw->mc_io, 0, ethsw->dpsw_handle,
-			      port_priv->acl_tbl, &acl_if_cfg);
+			      acl_tbl_id, &acl_if_cfg);
 	if (err) {
 		netdev_err(netdev, "dpsw_acl_add_if err %d\n", err);
 		dpsw_acl_remove(ethsw->mc_io, 0, ethsw->dpsw_handle,
-				port_priv->acl_tbl);
+				acl_tbl_id);
 	}
 
+	acl_tbl = dpaa2_switch_acl_tbl_get_unused(ethsw);
+	acl_tbl->id = acl_tbl_id;
+	acl_tbl->in_use = true;
+	acl_tbl->num_rules = 0;
+	port_priv->acl_tbl = acl_tbl;
+
 	err = dpaa2_switch_port_trap_mac_addr(port_priv, stpa);
 	if (err)
 		return err;
@@ -2858,6 +2876,7 @@ static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev)
 	}
 
 	kfree(ethsw->fdbs);
+	kfree(ethsw->acls);
 	kfree(ethsw->ports);
 
 	dpaa2_switch_takedown(sw_dev);
@@ -2983,6 +3002,13 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
 		goto err_free_ports;
 	}
 
+	ethsw->acls = kcalloc(ethsw->sw_attr.num_ifs, sizeof(*ethsw->acls),
+			      GFP_KERNEL);
+	if (!ethsw->acls) {
+		err = -ENOMEM;
+		goto err_free_fdbs;
+	}
+
 	for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
 		err = dpaa2_switch_probe_port(ethsw, i);
 		if (err)
@@ -3031,6 +3057,8 @@ err_stop:
 err_free_netdev:
 	for (i--; i >= 0; i--)
 		free_netdev(ethsw->ports[i]->netdev);
+	kfree(ethsw->acls);
+err_free_fdbs:
 	kfree(ethsw->fdbs);
 err_free_ports:
 	kfree(ethsw->ports);
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
index 0ae1d27c811e..a2c0ff23c7e9 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
@@ -101,6 +101,12 @@ struct dpaa2_switch_fdb {
 	bool			in_use;
 };
 
+struct dpaa2_switch_acl_tbl {
+	u16			id;
+	u8			num_rules;
+	bool			in_use;
+};
+
 /* Per port private data */
 struct ethsw_port_priv {
 	struct net_device	*netdev;
@@ -118,8 +124,7 @@ struct ethsw_port_priv {
 	bool			ucast_flood;
 	bool			learn_ena;
 
-	u16			acl_tbl;
-	u8			acl_num_rules;
+	struct dpaa2_switch_acl_tbl *acl_tbl;
 };
 
 /* Switch data */
@@ -145,6 +150,7 @@ struct ethsw_core {
 	int				napi_users;
 
 	struct dpaa2_switch_fdb		*fdbs;
+	struct dpaa2_switch_acl_tbl	*acls;
 };
 
 static inline bool dpaa2_switch_supports_cpu_traffic(struct ethsw_core *ethsw)
-- 
cgit v1.2.3


From 2bf90ba5100efa2a06eeecda52fec72a2b024d50 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Tue, 13 Apr 2021 16:24:45 +0300
Subject: dpaa2-switch: install default STP trap rule with the highest priority

Change the default ACL trap rule for STP frames to have the highest
priority.

In the same ACL table will reside both default rules added by the driver
for its internal use as well as rules added with tc flower.  In this
case, the default rules such as the STP one that we already have should
have the highest priority.

Also, remove the check for a full ACL table since we already know that
it's sized so that we don't hit this case.  The last thing changes is
that default trap filters will not be counted in the acl_tbl's num_rules
variable since their number doesn't change.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 351ee8f7461c..fc9e2eb0ad11 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -2700,11 +2700,6 @@ static int dpaa2_switch_port_trap_mac_addr(struct ethsw_port_priv *port_priv,
 	acl_h = &acl_key.match;
 	acl_m = &acl_key.mask;
 
-	if (port_priv->acl_tbl->num_rules >= DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES) {
-		netdev_err(netdev, "ACL full\n");
-		return -ENOMEM;
-	}
-
 	memset(&acl_entry_cfg, 0, sizeof(acl_entry_cfg));
 	memset(&acl_key, 0, sizeof(acl_key));
 
@@ -2718,7 +2713,7 @@ static int dpaa2_switch_port_trap_mac_addr(struct ethsw_port_priv *port_priv,
 	dpsw_acl_prepare_entry_cfg(&acl_key, cmd_buff);
 
 	memset(&acl_entry_cfg, 0, sizeof(acl_entry_cfg));
-	acl_entry_cfg.precedence = port_priv->acl_tbl->num_rules;
+	acl_entry_cfg.precedence = 0;
 	acl_entry_cfg.result.action = DPSW_ACL_ACTION_REDIRECT_TO_CTRL_IF;
 	acl_entry_cfg.key_iova = dma_map_single(dev, cmd_buff,
 						DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE,
@@ -2739,8 +2734,6 @@ static int dpaa2_switch_port_trap_mac_addr(struct ethsw_port_priv *port_priv,
 		return err;
 	}
 
-	port_priv->acl_tbl->num_rules++;
-
 	return 0;
 }
 
-- 
cgit v1.2.3


From 1110318d83e8011c4dfcb2f7dd343bcfb1623c5f Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Tue, 13 Apr 2021 16:24:46 +0300
Subject: dpaa2-switch: add tc flower hardware offload on ingress traffic

This patch adds support for tc flower hardware offload on the ingress
path. Shared filter blocks are supported by sharing a single ACL table
between multiple ports.

The following flow keys are supported:
 - Ethernet: dst_mac/src_mac
 - IPv4: dst_ip/src_ip/ip_proto/tos
 - VLAN: vlan_id/vlan_prio/vlan_tpid/vlan_dei
 - L4: dst_port/src_port

As per flow actions, the following are supported:
 - drop
 - mirred egress redirect
 - trap
Each ACL entry (filter) can be setup with only one of the listed
actions.

A sorted single linked list is used to keep the ACL entries by their
order of priority. When adding a new filter, this enables us to quickly
ascertain if the new entry has the highest priority of the entire block
or if we should make some space in the ACL table by increasing the
priority of the filters already in the table.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/Makefile      |   2 +-
 .../ethernet/freescale/dpaa2/dpaa2-switch-flower.c | 436 +++++++++++++++++++++
 .../net/ethernet/freescale/dpaa2/dpaa2-switch.c    | 261 +++++++++++-
 .../net/ethernet/freescale/dpaa2/dpaa2-switch.h    |  44 +++
 drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h    |   1 +
 drivers/net/ethernet/freescale/dpaa2/dpsw.c        |  35 ++
 drivers/net/ethernet/freescale/dpaa2/dpsw.h        |   3 +
 7 files changed, 768 insertions(+), 14 deletions(-)
 create mode 100644 drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c

diff --git a/drivers/net/ethernet/freescale/dpaa2/Makefile b/drivers/net/ethernet/freescale/dpaa2/Makefile
index 644ef9ae02a3..c2ef74052ef8 100644
--- a/drivers/net/ethernet/freescale/dpaa2/Makefile
+++ b/drivers/net/ethernet/freescale/dpaa2/Makefile
@@ -11,7 +11,7 @@ fsl-dpaa2-eth-objs	:= dpaa2-eth.o dpaa2-ethtool.o dpni.o dpaa2-mac.o dpmac.o dpa
 fsl-dpaa2-eth-${CONFIG_FSL_DPAA2_ETH_DCB} += dpaa2-eth-dcb.o
 fsl-dpaa2-eth-${CONFIG_DEBUG_FS} += dpaa2-eth-debugfs.o
 fsl-dpaa2-ptp-objs	:= dpaa2-ptp.o dprtc.o
-fsl-dpaa2-switch-objs	:= dpaa2-switch.o dpaa2-switch-ethtool.o dpsw.o
+fsl-dpaa2-switch-objs	:= dpaa2-switch.o dpaa2-switch-ethtool.o dpsw.o dpaa2-switch-flower.o
 
 # Needed by the tracing framework
 CFLAGS_dpaa2-eth.o := -I$(src)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c
new file mode 100644
index 000000000000..ee987fa02f0d
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c
@@ -0,0 +1,436 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DPAA2 Ethernet Switch flower support
+ *
+ * Copyright 2021 NXP
+ *
+ */
+
+#include "dpaa2-switch.h"
+
+static int dpaa2_switch_flower_parse_key(struct flow_cls_offload *cls,
+					 struct dpsw_acl_key *acl_key)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+	struct flow_dissector *dissector = rule->match.dissector;
+	struct netlink_ext_ack *extack = cls->common.extack;
+	struct dpsw_acl_fields *acl_h, *acl_m;
+
+	if (dissector->used_keys &
+	    ~(BIT(FLOW_DISSECTOR_KEY_BASIC) |
+	      BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+	      BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_VLAN) |
+	      BIT(FLOW_DISSECTOR_KEY_PORTS) |
+	      BIT(FLOW_DISSECTOR_KEY_IP) |
+	      BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS))) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Unsupported keys used");
+		return -EOPNOTSUPP;
+	}
+
+	acl_h = &acl_key->match;
+	acl_m = &acl_key->mask;
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
+
+		flow_rule_match_basic(rule, &match);
+		acl_h->l3_protocol = match.key->ip_proto;
+		acl_h->l2_ether_type = be16_to_cpu(match.key->n_proto);
+		acl_m->l3_protocol = match.mask->ip_proto;
+		acl_m->l2_ether_type = be16_to_cpu(match.mask->n_proto);
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_match_eth_addrs match;
+
+		flow_rule_match_eth_addrs(rule, &match);
+		ether_addr_copy(acl_h->l2_dest_mac, &match.key->dst[0]);
+		ether_addr_copy(acl_h->l2_source_mac, &match.key->src[0]);
+		ether_addr_copy(acl_m->l2_dest_mac, &match.mask->dst[0]);
+		ether_addr_copy(acl_m->l2_source_mac, &match.mask->src[0]);
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match;
+
+		flow_rule_match_vlan(rule, &match);
+		acl_h->l2_vlan_id = match.key->vlan_id;
+		acl_h->l2_tpid = be16_to_cpu(match.key->vlan_tpid);
+		acl_h->l2_pcp_dei = match.key->vlan_priority << 1 |
+				    match.key->vlan_dei;
+
+		acl_m->l2_vlan_id = match.mask->vlan_id;
+		acl_m->l2_tpid = be16_to_cpu(match.mask->vlan_tpid);
+		acl_m->l2_pcp_dei = match.mask->vlan_priority << 1 |
+				    match.mask->vlan_dei;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
+		struct flow_match_ipv4_addrs match;
+
+		flow_rule_match_ipv4_addrs(rule, &match);
+		acl_h->l3_source_ip = be32_to_cpu(match.key->src);
+		acl_h->l3_dest_ip = be32_to_cpu(match.key->dst);
+		acl_m->l3_source_ip = be32_to_cpu(match.mask->src);
+		acl_m->l3_dest_ip = be32_to_cpu(match.mask->dst);
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_match_ports match;
+
+		flow_rule_match_ports(rule, &match);
+		acl_h->l4_source_port = be16_to_cpu(match.key->src);
+		acl_h->l4_dest_port = be16_to_cpu(match.key->dst);
+		acl_m->l4_source_port = be16_to_cpu(match.mask->src);
+		acl_m->l4_dest_port = be16_to_cpu(match.mask->dst);
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
+		struct flow_match_ip match;
+
+		flow_rule_match_ip(rule, &match);
+		if (match.mask->ttl != 0) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Matching on TTL not supported");
+			return -EOPNOTSUPP;
+		}
+
+		if ((match.mask->tos & 0x3) != 0) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Matching on ECN not supported, only DSCP");
+			return -EOPNOTSUPP;
+		}
+
+		acl_h->l3_dscp = match.key->tos >> 2;
+		acl_m->l3_dscp = match.mask->tos >> 2;
+	}
+
+	return 0;
+}
+
+static int dpaa2_switch_acl_entry_add(struct dpaa2_switch_acl_tbl *acl_tbl,
+				      struct dpaa2_switch_acl_entry *entry)
+{
+	struct dpsw_acl_entry_cfg *acl_entry_cfg = &entry->cfg;
+	struct ethsw_core *ethsw = acl_tbl->ethsw;
+	struct dpsw_acl_key *acl_key = &entry->key;
+	struct device *dev = ethsw->dev;
+	u8 *cmd_buff;
+	int err;
+
+	cmd_buff = kzalloc(DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE, GFP_KERNEL);
+	if (!cmd_buff)
+		return -ENOMEM;
+
+	dpsw_acl_prepare_entry_cfg(acl_key, cmd_buff);
+
+	acl_entry_cfg->key_iova = dma_map_single(dev, cmd_buff,
+						 DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE,
+						 DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(dev, acl_entry_cfg->key_iova))) {
+		dev_err(dev, "DMA mapping failed\n");
+		return -EFAULT;
+	}
+
+	err = dpsw_acl_add_entry(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				 acl_tbl->id, acl_entry_cfg);
+
+	dma_unmap_single(dev, acl_entry_cfg->key_iova, sizeof(cmd_buff),
+			 DMA_TO_DEVICE);
+	if (err) {
+		dev_err(dev, "dpsw_acl_add_entry() failed %d\n", err);
+		return err;
+	}
+
+	kfree(cmd_buff);
+
+	return 0;
+}
+
+static int dpaa2_switch_acl_entry_remove(struct dpaa2_switch_acl_tbl *acl_tbl,
+					 struct dpaa2_switch_acl_entry *entry)
+{
+	struct dpsw_acl_entry_cfg *acl_entry_cfg = &entry->cfg;
+	struct dpsw_acl_key *acl_key = &entry->key;
+	struct ethsw_core *ethsw = acl_tbl->ethsw;
+	struct device *dev = ethsw->dev;
+	u8 *cmd_buff;
+	int err;
+
+	cmd_buff = kzalloc(DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE, GFP_KERNEL);
+	if (!cmd_buff)
+		return -ENOMEM;
+
+	dpsw_acl_prepare_entry_cfg(acl_key, cmd_buff);
+
+	acl_entry_cfg->key_iova = dma_map_single(dev, cmd_buff,
+						 DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE,
+						 DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(dev, acl_entry_cfg->key_iova))) {
+		dev_err(dev, "DMA mapping failed\n");
+		return -EFAULT;
+	}
+
+	err = dpsw_acl_remove_entry(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				    acl_tbl->id, acl_entry_cfg);
+
+	dma_unmap_single(dev, acl_entry_cfg->key_iova, sizeof(cmd_buff),
+			 DMA_TO_DEVICE);
+	if (err) {
+		dev_err(dev, "dpsw_acl_remove_entry() failed %d\n", err);
+		return err;
+	}
+
+	kfree(cmd_buff);
+
+	return 0;
+}
+
+static int
+dpaa2_switch_acl_entry_add_to_list(struct dpaa2_switch_acl_tbl *acl_tbl,
+				   struct dpaa2_switch_acl_entry *entry)
+{
+	struct dpaa2_switch_acl_entry *tmp;
+	struct list_head *pos, *n;
+	int index = 0;
+
+	if (list_empty(&acl_tbl->entries)) {
+		list_add(&entry->list, &acl_tbl->entries);
+		return index;
+	}
+
+	list_for_each_safe(pos, n, &acl_tbl->entries) {
+		tmp = list_entry(pos, struct dpaa2_switch_acl_entry, list);
+		if (entry->prio < tmp->prio)
+			break;
+		index++;
+	}
+	list_add(&entry->list, pos->prev);
+	return index;
+}
+
+static struct dpaa2_switch_acl_entry*
+dpaa2_switch_acl_entry_get_by_index(struct dpaa2_switch_acl_tbl *acl_tbl,
+				    int index)
+{
+	struct dpaa2_switch_acl_entry *tmp;
+	int i = 0;
+
+	list_for_each_entry(tmp, &acl_tbl->entries, list) {
+		if (i == index)
+			return tmp;
+		++i;
+	}
+
+	return NULL;
+}
+
+static int
+dpaa2_switch_acl_entry_set_precedence(struct dpaa2_switch_acl_tbl *acl_tbl,
+				      struct dpaa2_switch_acl_entry *entry,
+				      int precedence)
+{
+	int err;
+
+	err = dpaa2_switch_acl_entry_remove(acl_tbl, entry);
+	if (err)
+		return err;
+
+	entry->cfg.precedence = precedence;
+	return dpaa2_switch_acl_entry_add(acl_tbl, entry);
+}
+
+static int dpaa2_switch_acl_tbl_add_entry(struct dpaa2_switch_acl_tbl *acl_tbl,
+					  struct dpaa2_switch_acl_entry *entry)
+{
+	struct dpaa2_switch_acl_entry *tmp;
+	int index, i, precedence, err;
+
+	/* Add the new ACL entry to the linked list and get its index */
+	index = dpaa2_switch_acl_entry_add_to_list(acl_tbl, entry);
+
+	/* Move up in priority the ACL entries to make space
+	 * for the new filter.
+	 */
+	precedence = DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES - acl_tbl->num_rules - 1;
+	for (i = 0; i < index; i++) {
+		tmp = dpaa2_switch_acl_entry_get_by_index(acl_tbl, i);
+
+		err = dpaa2_switch_acl_entry_set_precedence(acl_tbl, tmp,
+							    precedence);
+		if (err)
+			return err;
+
+		precedence++;
+	}
+
+	/* Add the new entry to hardware */
+	entry->cfg.precedence = precedence;
+	err = dpaa2_switch_acl_entry_add(acl_tbl, entry);
+	acl_tbl->num_rules++;
+
+	return err;
+}
+
+static struct dpaa2_switch_acl_entry *
+dpaa2_switch_acl_tbl_find_entry_by_cookie(struct dpaa2_switch_acl_tbl *acl_tbl,
+					  unsigned long cookie)
+{
+	struct dpaa2_switch_acl_entry *tmp, *n;
+
+	list_for_each_entry_safe(tmp, n, &acl_tbl->entries, list) {
+		if (tmp->cookie == cookie)
+			return tmp;
+	}
+	return NULL;
+}
+
+static int
+dpaa2_switch_acl_entry_get_index(struct dpaa2_switch_acl_tbl *acl_tbl,
+				 struct dpaa2_switch_acl_entry *entry)
+{
+	struct dpaa2_switch_acl_entry *tmp, *n;
+	int index = 0;
+
+	list_for_each_entry_safe(tmp, n, &acl_tbl->entries, list) {
+		if (tmp->cookie == entry->cookie)
+			return index;
+		index++;
+	}
+	return -ENOENT;
+}
+
+static int
+dpaa2_switch_acl_tbl_remove_entry(struct dpaa2_switch_acl_tbl *acl_tbl,
+				  struct dpaa2_switch_acl_entry *entry)
+{
+	struct dpaa2_switch_acl_entry *tmp;
+	int index, i, precedence, err;
+
+	index = dpaa2_switch_acl_entry_get_index(acl_tbl, entry);
+
+	/* Remove from hardware the ACL entry */
+	err = dpaa2_switch_acl_entry_remove(acl_tbl, entry);
+	if (err)
+		return err;
+
+	acl_tbl->num_rules--;
+
+	/* Remove it from the list also */
+	list_del(&entry->list);
+
+	/* Move down in priority the entries over the deleted one */
+	precedence = entry->cfg.precedence;
+	for (i = index - 1; i >= 0; i--) {
+		tmp = dpaa2_switch_acl_entry_get_by_index(acl_tbl, i);
+		err = dpaa2_switch_acl_entry_set_precedence(acl_tbl, tmp,
+							    precedence);
+		if (err)
+			return err;
+
+		precedence--;
+	}
+
+	kfree(entry);
+
+	return 0;
+}
+
+static int dpaa2_switch_tc_parse_action(struct ethsw_core *ethsw,
+					struct flow_action_entry *cls_act,
+					struct dpsw_acl_result *dpsw_act,
+					struct netlink_ext_ack *extack)
+{
+	int err = 0;
+
+	switch (cls_act->id) {
+	case FLOW_ACTION_TRAP:
+		dpsw_act->action = DPSW_ACL_ACTION_REDIRECT_TO_CTRL_IF;
+		break;
+	case FLOW_ACTION_REDIRECT:
+		if (!dpaa2_switch_port_dev_check(cls_act->dev)) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Destination not a DPAA2 switch port");
+			return -EOPNOTSUPP;
+		}
+
+		dpsw_act->if_id = dpaa2_switch_get_index(ethsw, cls_act->dev);
+		dpsw_act->action = DPSW_ACL_ACTION_REDIRECT;
+		break;
+	case FLOW_ACTION_DROP:
+		dpsw_act->action = DPSW_ACL_ACTION_DROP;
+		break;
+	default:
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Action not supported");
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+out:
+	return err;
+}
+
+int dpaa2_switch_cls_flower_replace(struct dpaa2_switch_acl_tbl *acl_tbl,
+				    struct flow_cls_offload *cls)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
+	struct netlink_ext_ack *extack = cls->common.extack;
+	struct ethsw_core *ethsw = acl_tbl->ethsw;
+	struct dpaa2_switch_acl_entry *acl_entry;
+	struct flow_action_entry *act;
+	int err;
+
+	if (!flow_offload_has_one_action(&rule->action)) {
+		NL_SET_ERR_MSG(extack, "Only singular actions are supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (dpaa2_switch_acl_tbl_is_full(acl_tbl)) {
+		NL_SET_ERR_MSG(extack, "Maximum filter capacity reached");
+		return -ENOMEM;
+	}
+
+	acl_entry = kzalloc(sizeof(*acl_entry), GFP_KERNEL);
+	if (!acl_entry)
+		return -ENOMEM;
+
+	err = dpaa2_switch_flower_parse_key(cls, &acl_entry->key);
+	if (err)
+		goto free_acl_entry;
+
+	act = &rule->action.entries[0];
+	err = dpaa2_switch_tc_parse_action(ethsw, act,
+					   &acl_entry->cfg.result, extack);
+	if (err)
+		goto free_acl_entry;
+
+	acl_entry->prio = cls->common.prio;
+	acl_entry->cookie = cls->cookie;
+
+	err = dpaa2_switch_acl_tbl_add_entry(acl_tbl, acl_entry);
+	if (err)
+		goto free_acl_entry;
+
+	return 0;
+
+free_acl_entry:
+	kfree(acl_entry);
+
+	return err;
+}
+
+int dpaa2_switch_cls_flower_destroy(struct dpaa2_switch_acl_tbl *acl_tbl,
+				    struct flow_cls_offload *cls)
+{
+	struct dpaa2_switch_acl_entry *entry;
+
+	entry = dpaa2_switch_acl_tbl_find_entry_by_cookie(acl_tbl, cls->cookie);
+	if (!entry)
+		return 0;
+
+	return dpaa2_switch_acl_tbl_remove_entry(acl_tbl, entry);
+}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index fc9e2eb0ad11..5080788c692b 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -14,6 +14,7 @@
 #include <linux/kthread.h>
 #include <linux/workqueue.h>
 #include <linux/iommu.h>
+#include <net/pkt_cls.h>
 
 #include <linux/fsl/mc.h>
 
@@ -1125,6 +1126,243 @@ err_exit:
 	return NETDEV_TX_OK;
 }
 
+static int
+dpaa2_switch_setup_tc_cls_flower(struct dpaa2_switch_acl_tbl *acl_tbl,
+				 struct flow_cls_offload *f)
+{
+	switch (f->command) {
+	case FLOW_CLS_REPLACE:
+		return dpaa2_switch_cls_flower_replace(acl_tbl, f);
+	case FLOW_CLS_DESTROY:
+		return dpaa2_switch_cls_flower_destroy(acl_tbl, f);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int dpaa2_switch_port_setup_tc_block_cb_ig(enum tc_setup_type type,
+						  void *type_data,
+						  void *cb_priv)
+{
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return dpaa2_switch_setup_tc_cls_flower(cb_priv, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static LIST_HEAD(dpaa2_switch_block_cb_list);
+
+static int dpaa2_switch_port_acl_tbl_bind(struct ethsw_port_priv *port_priv,
+					  struct dpaa2_switch_acl_tbl *acl_tbl)
+{
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct net_device *netdev = port_priv->netdev;
+	struct dpsw_acl_if_cfg acl_if_cfg;
+	int err;
+
+	if (port_priv->acl_tbl)
+		return -EINVAL;
+
+	acl_if_cfg.if_id[0] = port_priv->idx;
+	acl_if_cfg.num_ifs = 1;
+	err = dpsw_acl_add_if(ethsw->mc_io, 0, ethsw->dpsw_handle,
+			      acl_tbl->id, &acl_if_cfg);
+	if (err) {
+		netdev_err(netdev, "dpsw_acl_add_if err %d\n", err);
+		return err;
+	}
+
+	acl_tbl->ports |= BIT(port_priv->idx);
+	port_priv->acl_tbl = acl_tbl;
+
+	return 0;
+}
+
+static int
+dpaa2_switch_port_acl_tbl_unbind(struct ethsw_port_priv *port_priv,
+				 struct dpaa2_switch_acl_tbl *acl_tbl)
+{
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct net_device *netdev = port_priv->netdev;
+	struct dpsw_acl_if_cfg acl_if_cfg;
+	int err;
+
+	if (port_priv->acl_tbl != acl_tbl)
+		return -EINVAL;
+
+	acl_if_cfg.if_id[0] = port_priv->idx;
+	acl_if_cfg.num_ifs = 1;
+	err = dpsw_acl_remove_if(ethsw->mc_io, 0, ethsw->dpsw_handle,
+				 acl_tbl->id, &acl_if_cfg);
+	if (err) {
+		netdev_err(netdev, "dpsw_acl_add_if err %d\n", err);
+		return err;
+	}
+
+	acl_tbl->ports &= ~BIT(port_priv->idx);
+	port_priv->acl_tbl = NULL;
+	return 0;
+}
+
+static int dpaa2_switch_port_block_bind(struct ethsw_port_priv *port_priv,
+					struct dpaa2_switch_acl_tbl *acl_tbl)
+{
+	struct dpaa2_switch_acl_tbl *old_acl_tbl = port_priv->acl_tbl;
+	int err;
+
+	/* If the port is already bound to this ACL table then do nothing. This
+	 * can happen when this port is the first one to join a tc block
+	 */
+	if (port_priv->acl_tbl == acl_tbl)
+		return 0;
+
+	err = dpaa2_switch_port_acl_tbl_unbind(port_priv, old_acl_tbl);
+	if (err)
+		return err;
+
+	/* Mark the previous ACL table as being unused if this was the last
+	 * port that was using it.
+	 */
+	if (old_acl_tbl->ports == 0)
+		old_acl_tbl->in_use = false;
+
+	return dpaa2_switch_port_acl_tbl_bind(port_priv, acl_tbl);
+}
+
+static int dpaa2_switch_port_block_unbind(struct ethsw_port_priv *port_priv,
+					  struct dpaa2_switch_acl_tbl *acl_tbl)
+{
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct dpaa2_switch_acl_tbl *new_acl_tbl;
+	int err;
+
+	/* We are the last port that leaves a block (an ACL table).
+	 * We'll continue to use this table.
+	 */
+	if (acl_tbl->ports == BIT(port_priv->idx))
+		return 0;
+
+	err = dpaa2_switch_port_acl_tbl_unbind(port_priv, acl_tbl);
+	if (err)
+		return err;
+
+	if (acl_tbl->ports == 0)
+		acl_tbl->in_use = false;
+
+	new_acl_tbl = dpaa2_switch_acl_tbl_get_unused(ethsw);
+	new_acl_tbl->in_use = true;
+	return dpaa2_switch_port_acl_tbl_bind(port_priv, new_acl_tbl);
+}
+
+static int dpaa2_switch_setup_tc_block_bind(struct net_device *netdev,
+					    struct flow_block_offload *f)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct dpaa2_switch_acl_tbl *acl_tbl;
+	struct flow_block_cb *block_cb;
+	bool register_block = false;
+	int err;
+
+	block_cb = flow_block_cb_lookup(f->block,
+					dpaa2_switch_port_setup_tc_block_cb_ig,
+					ethsw);
+
+	if (!block_cb) {
+		/* If the ACL table is not already known, then this port must
+		 * be the first to join it. In this case, we can just continue
+		 * to use our private table
+		 */
+		acl_tbl = port_priv->acl_tbl;
+
+		block_cb = flow_block_cb_alloc(dpaa2_switch_port_setup_tc_block_cb_ig,
+					       ethsw, acl_tbl, NULL);
+		if (IS_ERR(block_cb))
+			return PTR_ERR(block_cb);
+
+		register_block = true;
+	} else {
+		acl_tbl = flow_block_cb_priv(block_cb);
+	}
+
+	flow_block_cb_incref(block_cb);
+	err = dpaa2_switch_port_block_bind(port_priv, acl_tbl);
+	if (err)
+		goto err_block_bind;
+
+	if (register_block) {
+		flow_block_cb_add(block_cb, f);
+		list_add_tail(&block_cb->driver_list,
+			      &dpaa2_switch_block_cb_list);
+	}
+
+	return 0;
+
+err_block_bind:
+	if (!flow_block_cb_decref(block_cb))
+		flow_block_cb_free(block_cb);
+	return err;
+}
+
+static void dpaa2_switch_setup_tc_block_unbind(struct net_device *netdev,
+					       struct flow_block_offload *f)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	struct ethsw_core *ethsw = port_priv->ethsw_data;
+	struct dpaa2_switch_acl_tbl *acl_tbl;
+	struct flow_block_cb *block_cb;
+	int err;
+
+	block_cb = flow_block_cb_lookup(f->block,
+					dpaa2_switch_port_setup_tc_block_cb_ig,
+					ethsw);
+	if (!block_cb)
+		return;
+
+	acl_tbl = flow_block_cb_priv(block_cb);
+	err = dpaa2_switch_port_block_unbind(port_priv, acl_tbl);
+	if (!err && !flow_block_cb_decref(block_cb)) {
+		flow_block_cb_remove(block_cb, f);
+		list_del(&block_cb->driver_list);
+	}
+}
+
+static int dpaa2_switch_setup_tc_block(struct net_device *netdev,
+				       struct flow_block_offload *f)
+{
+	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+		return -EOPNOTSUPP;
+
+	f->driver_block_list = &dpaa2_switch_block_cb_list;
+
+	switch (f->command) {
+	case FLOW_BLOCK_BIND:
+		return dpaa2_switch_setup_tc_block_bind(netdev, f);
+	case FLOW_BLOCK_UNBIND:
+		dpaa2_switch_setup_tc_block_unbind(netdev, f);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int dpaa2_switch_port_setup_tc(struct net_device *netdev,
+				      enum tc_setup_type type,
+				      void *type_data)
+{
+	switch (type) {
+	case TC_SETUP_BLOCK: {
+		return dpaa2_switch_setup_tc_block(netdev, type_data);
+	}
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
 static const struct net_device_ops dpaa2_switch_port_ops = {
 	.ndo_open		= dpaa2_switch_port_open,
 	.ndo_stop		= dpaa2_switch_port_stop,
@@ -1141,6 +1379,7 @@ static const struct net_device_ops dpaa2_switch_port_ops = {
 	.ndo_start_xmit		= dpaa2_switch_port_tx,
 	.ndo_get_port_parent_id	= dpaa2_switch_port_parent_id,
 	.ndo_get_phys_port_name = dpaa2_switch_port_get_phys_name,
+	.ndo_setup_tc		= dpaa2_switch_port_setup_tc,
 };
 
 bool dpaa2_switch_port_dev_check(const struct net_device *netdev)
@@ -2749,7 +2988,6 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
 	struct ethsw_core *ethsw = port_priv->ethsw_data;
 	struct dpaa2_switch_acl_tbl *acl_tbl;
 	struct dpsw_fdb_cfg fdb_cfg = {0};
-	struct dpsw_acl_if_cfg acl_if_cfg;
 	struct dpsw_if_attr dpsw_if_attr;
 	struct dpaa2_switch_fdb *fdb;
 	struct dpsw_acl_cfg acl_cfg;
@@ -2803,21 +3041,16 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
 		return err;
 	}
 
-	acl_if_cfg.if_id[0] = port_priv->idx;
-	acl_if_cfg.num_ifs = 1;
-	err = dpsw_acl_add_if(ethsw->mc_io, 0, ethsw->dpsw_handle,
-			      acl_tbl_id, &acl_if_cfg);
-	if (err) {
-		netdev_err(netdev, "dpsw_acl_add_if err %d\n", err);
-		dpsw_acl_remove(ethsw->mc_io, 0, ethsw->dpsw_handle,
-				acl_tbl_id);
-	}
-
 	acl_tbl = dpaa2_switch_acl_tbl_get_unused(ethsw);
+	acl_tbl->ethsw = ethsw;
 	acl_tbl->id = acl_tbl_id;
 	acl_tbl->in_use = true;
 	acl_tbl->num_rules = 0;
-	port_priv->acl_tbl = acl_tbl;
+	INIT_LIST_HEAD(&acl_tbl->entries);
+
+	err = dpaa2_switch_port_acl_tbl_bind(port_priv, acl_tbl);
+	if (err)
+		return err;
 
 	err = dpaa2_switch_port_trap_mac_addr(port_priv, stpa);
 	if (err)
@@ -2927,7 +3160,9 @@ static int dpaa2_switch_probe_port(struct ethsw_core *ethsw,
 	/* The DPAA2 switch's ingress path depends on the VLAN table,
 	 * thus we are not able to disable VLAN filtering.
 	 */
-	port_netdev->features = NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER;
+	port_netdev->features = NETIF_F_HW_VLAN_CTAG_FILTER |
+				NETIF_F_HW_VLAN_STAG_FILTER |
+				NETIF_F_HW_TC;
 
 	err = dpaa2_switch_port_init(port_priv, port_idx);
 	if (err)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
index a2c0ff23c7e9..629186208b58 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
@@ -80,6 +80,8 @@
 	(DPAA2_SWITCH_TX_DATA_OFFSET + DPAA2_SWITCH_TX_BUF_ALIGN)
 
 #define DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES	16
+#define DPAA2_ETHSW_PORT_DEFAULT_TRAPS		1
+
 #define DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE	256
 
 extern const struct ethtool_ops dpaa2_switch_port_ethtool_ops;
@@ -101,12 +103,34 @@ struct dpaa2_switch_fdb {
 	bool			in_use;
 };
 
+struct dpaa2_switch_acl_entry {
+	struct list_head	list;
+	u16			prio;
+	unsigned long		cookie;
+
+	struct dpsw_acl_entry_cfg cfg;
+	struct dpsw_acl_key	key;
+};
+
 struct dpaa2_switch_acl_tbl {
+	struct list_head	entries;
+	struct ethsw_core	*ethsw;
+	u64			ports;
+
 	u16			id;
 	u8			num_rules;
 	bool			in_use;
 };
 
+static inline bool
+dpaa2_switch_acl_tbl_is_full(struct dpaa2_switch_acl_tbl *acl_tbl)
+{
+	if ((acl_tbl->num_rules + DPAA2_ETHSW_PORT_DEFAULT_TRAPS) >=
+	    DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES)
+		return true;
+	return false;
+}
+
 /* Per port private data */
 struct ethsw_port_priv {
 	struct net_device	*netdev;
@@ -153,6 +177,18 @@ struct ethsw_core {
 	struct dpaa2_switch_acl_tbl	*acls;
 };
 
+static inline int dpaa2_switch_get_index(struct ethsw_core *ethsw,
+					 struct net_device *netdev)
+{
+	int i;
+
+	for (i = 0; i < ethsw->sw_attr.num_ifs; i++)
+		if (ethsw->ports[i]->netdev == netdev)
+			return ethsw->ports[i]->idx;
+
+	return -EINVAL;
+}
+
 static inline bool dpaa2_switch_supports_cpu_traffic(struct ethsw_core *ethsw)
 {
 	if (ethsw->sw_attr.options & DPSW_OPT_CTRL_IF_DIS) {
@@ -189,4 +225,12 @@ int dpaa2_switch_port_vlans_del(struct net_device *netdev,
 typedef int dpaa2_switch_fdb_cb_t(struct ethsw_port_priv *port_priv,
 				  struct fdb_dump_entry *fdb_entry,
 				  void *data);
+
+/* TC offload */
+
+int dpaa2_switch_cls_flower_replace(struct dpaa2_switch_acl_tbl *acl_tbl,
+				    struct flow_cls_offload *cls);
+
+int dpaa2_switch_cls_flower_destroy(struct dpaa2_switch_acl_tbl *acl_tbl,
+				    struct flow_cls_offload *cls);
 #endif	/* __ETHSW_H */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
index 1747cee19a72..cb13e740f72b 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h
@@ -77,6 +77,7 @@
 #define DPSW_CMDID_ACL_ADD                  DPSW_CMD_ID(0x090)
 #define DPSW_CMDID_ACL_REMOVE               DPSW_CMD_ID(0x091)
 #define DPSW_CMDID_ACL_ADD_ENTRY            DPSW_CMD_ID(0x092)
+#define DPSW_CMDID_ACL_REMOVE_ENTRY         DPSW_CMD_ID(0x093)
 #define DPSW_CMDID_ACL_ADD_IF               DPSW_CMD_ID(0x094)
 #define DPSW_CMDID_ACL_REMOVE_IF            DPSW_CMD_ID(0x095)
 
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.c b/drivers/net/ethernet/freescale/dpaa2/dpsw.c
index 6704efe89bc1..6352d6d1ecba 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.c
@@ -1544,3 +1544,38 @@ int dpsw_acl_add_entry(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
 
 	return mc_send_command(mc_io, &cmd);
 }
+
+/**
+ * dpsw_acl_remove_entry() - Removes an entry from ACL.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @acl_id:	ACL ID
+ * @cfg:	Entry configuration
+ *
+ * warning: This function has to be called after dpsw_acl_set_entry_cfg()
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_acl_remove_entry(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			  u16 acl_id, const struct dpsw_acl_entry_cfg *cfg)
+{
+	struct dpsw_cmd_acl_entry *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_ACL_REMOVE_ENTRY,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_acl_entry *)cmd.params;
+	cmd_params->acl_id = cpu_to_le16(acl_id);
+	cmd_params->result_if_id = cpu_to_le16(cfg->result.if_id);
+	cmd_params->precedence = cpu_to_le32(cfg->precedence);
+	cmd_params->key_iova = cpu_to_le64(cfg->key_iova);
+	dpsw_set_field(cmd_params->result_action,
+		       RESULT_ACTION,
+		       cfg->result.action);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.h b/drivers/net/ethernet/freescale/dpaa2/dpsw.h
index 08e37c475ae8..5ef221a25b02 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.h
@@ -749,4 +749,7 @@ void dpsw_acl_prepare_entry_cfg(const struct dpsw_acl_key *key,
 
 int dpsw_acl_add_entry(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
 		       u16 acl_id, const struct dpsw_acl_entry_cfg *cfg);
+
+int dpsw_acl_remove_entry(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token,
+			  u16 acl_id, const struct dpsw_acl_entry_cfg *cfg);
 #endif /* __FSL_DPSW_H */
-- 
cgit v1.2.3


From 4ba28c1a1aff053e6471151cffee860668ead786 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Tue, 13 Apr 2021 16:24:47 +0300
Subject: dpaa2-switch: add tc matchall filter support

Add support TC_SETUP_CLSMATCHALL by using the same ACL table entries
framework as for tc flower. Adding a matchall rule is done by installing
an entry which has a mask of all zeroes, thus matching on any packet.

This can be used as a catch-all type of rule if used correctly, ie the
priority of the matchall filter should be kept as the lowest one in the
entire filter block.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/freescale/dpaa2/dpaa2-switch-flower.c | 56 ++++++++++++++++++++++
 .../net/ethernet/freescale/dpaa2/dpaa2-switch.c    | 16 +++++++
 .../net/ethernet/freescale/dpaa2/dpaa2-switch.h    |  7 +++
 3 files changed, 79 insertions(+)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c
index ee987fa02f0d..b4807ddc2011 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c
@@ -434,3 +434,59 @@ int dpaa2_switch_cls_flower_destroy(struct dpaa2_switch_acl_tbl *acl_tbl,
 
 	return dpaa2_switch_acl_tbl_remove_entry(acl_tbl, entry);
 }
+
+int dpaa2_switch_cls_matchall_replace(struct dpaa2_switch_acl_tbl *acl_tbl,
+				      struct tc_cls_matchall_offload *cls)
+{
+	struct netlink_ext_ack *extack = cls->common.extack;
+	struct ethsw_core *ethsw = acl_tbl->ethsw;
+	struct dpaa2_switch_acl_entry *acl_entry;
+	struct flow_action_entry *act;
+	int err;
+
+	if (!flow_offload_has_one_action(&cls->rule->action)) {
+		NL_SET_ERR_MSG(extack, "Only singular actions are supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (dpaa2_switch_acl_tbl_is_full(acl_tbl)) {
+		NL_SET_ERR_MSG(extack, "Maximum filter capacity reached");
+		return -ENOMEM;
+	}
+
+	acl_entry = kzalloc(sizeof(*acl_entry), GFP_KERNEL);
+	if (!acl_entry)
+		return -ENOMEM;
+
+	act = &cls->rule->action.entries[0];
+	err = dpaa2_switch_tc_parse_action(ethsw, act,
+					   &acl_entry->cfg.result, extack);
+	if (err)
+		goto free_acl_entry;
+
+	acl_entry->prio = cls->common.prio;
+	acl_entry->cookie = cls->cookie;
+
+	err = dpaa2_switch_acl_tbl_add_entry(acl_tbl, acl_entry);
+	if (err)
+		goto free_acl_entry;
+
+	return 0;
+
+free_acl_entry:
+	kfree(acl_entry);
+
+	return err;
+}
+
+int dpaa2_switch_cls_matchall_destroy(struct dpaa2_switch_acl_tbl *acl_tbl,
+				      struct tc_cls_matchall_offload *cls)
+{
+	struct dpaa2_switch_acl_entry *entry;
+
+	entry = dpaa2_switch_acl_tbl_find_entry_by_cookie(acl_tbl, cls->cookie);
+	if (!entry)
+		return 0;
+
+	return  dpaa2_switch_acl_tbl_remove_entry(acl_tbl, entry);
+}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 5080788c692b..adf9e5880d89 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -1140,6 +1140,20 @@ dpaa2_switch_setup_tc_cls_flower(struct dpaa2_switch_acl_tbl *acl_tbl,
 	}
 }
 
+static int
+dpaa2_switch_setup_tc_cls_matchall(struct dpaa2_switch_acl_tbl *acl_tbl,
+				   struct tc_cls_matchall_offload *f)
+{
+	switch (f->command) {
+	case TC_CLSMATCHALL_REPLACE:
+		return dpaa2_switch_cls_matchall_replace(acl_tbl, f);
+	case TC_CLSMATCHALL_DESTROY:
+		return dpaa2_switch_cls_matchall_destroy(acl_tbl, f);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static int dpaa2_switch_port_setup_tc_block_cb_ig(enum tc_setup_type type,
 						  void *type_data,
 						  void *cb_priv)
@@ -1147,6 +1161,8 @@ static int dpaa2_switch_port_setup_tc_block_cb_ig(enum tc_setup_type type,
 	switch (type) {
 	case TC_SETUP_CLSFLOWER:
 		return dpaa2_switch_setup_tc_cls_flower(cb_priv, type_data);
+	case TC_SETUP_CLSMATCHALL:
+		return dpaa2_switch_setup_tc_cls_matchall(cb_priv, type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
index 629186208b58..8575eed02d15 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
@@ -18,6 +18,7 @@
 #include <net/switchdev.h>
 #include <linux/if_bridge.h>
 #include <linux/fsl/mc.h>
+#include <net/pkt_cls.h>
 #include <soc/fsl/dpaa2-io.h>
 
 #include "dpsw.h"
@@ -233,4 +234,10 @@ int dpaa2_switch_cls_flower_replace(struct dpaa2_switch_acl_tbl *acl_tbl,
 
 int dpaa2_switch_cls_flower_destroy(struct dpaa2_switch_acl_tbl *acl_tbl,
 				    struct flow_cls_offload *cls);
+
+int dpaa2_switch_cls_matchall_replace(struct dpaa2_switch_acl_tbl *acl_tbl,
+				      struct tc_cls_matchall_offload *cls);
+
+int dpaa2_switch_cls_matchall_destroy(struct dpaa2_switch_acl_tbl *acl_tbl,
+				      struct tc_cls_matchall_offload *cls);
 #endif	/* __ETHSW_H */
-- 
cgit v1.2.3


From 166179542e805a793a0337010f95168cfa143a6f Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Tue, 13 Apr 2021 16:24:48 +0300
Subject: dpaa2-switch: reuse dpaa2_switch_acl_entry_add() for STP frames trap

Since we added the dpaa2_switch_acl_entry_add() function in the previous
patches to hide all the details of actually adding the ACL entry by
issuing a firmware command, let's use it also for adding a CPU trap for
the STP frames.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/freescale/dpaa2/dpaa2-switch-flower.c |  4 +-
 .../net/ethernet/freescale/dpaa2/dpaa2-switch.c    | 51 +++-------------------
 .../net/ethernet/freescale/dpaa2/dpaa2-switch.h    |  3 ++
 3 files changed, 12 insertions(+), 46 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c
index b4807ddc2011..f9451ec5f2cb 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c
@@ -111,8 +111,8 @@ static int dpaa2_switch_flower_parse_key(struct flow_cls_offload *cls,
 	return 0;
 }
 
-static int dpaa2_switch_acl_entry_add(struct dpaa2_switch_acl_tbl *acl_tbl,
-				      struct dpaa2_switch_acl_entry *entry)
+int dpaa2_switch_acl_entry_add(struct dpaa2_switch_acl_tbl *acl_tbl,
+			       struct dpaa2_switch_acl_entry *entry)
 {
 	struct dpsw_acl_entry_cfg *acl_entry_cfg = &entry->cfg;
 	struct ethsw_core *ethsw = acl_tbl->ethsw;
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index adf9e5880d89..5250d51d783c 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -2942,54 +2942,17 @@ err_close:
 static int dpaa2_switch_port_trap_mac_addr(struct ethsw_port_priv *port_priv,
 					   const char *mac)
 {
-	struct net_device *netdev = port_priv->netdev;
-	struct dpsw_acl_entry_cfg acl_entry_cfg;
-	struct dpsw_acl_fields *acl_h;
-	struct dpsw_acl_fields *acl_m;
-	struct dpsw_acl_key acl_key;
-	struct device *dev;
-	u8 *cmd_buff;
-	int err;
-
-	dev = port_priv->netdev->dev.parent;
-	acl_h = &acl_key.match;
-	acl_m = &acl_key.mask;
-
-	memset(&acl_entry_cfg, 0, sizeof(acl_entry_cfg));
-	memset(&acl_key, 0, sizeof(acl_key));
+	struct dpaa2_switch_acl_entry acl_entry = {0};
 
 	/* Match on the destination MAC address */
-	ether_addr_copy(acl_h->l2_dest_mac, mac);
-	eth_broadcast_addr(acl_m->l2_dest_mac);
-
-	cmd_buff = kzalloc(DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE, GFP_KERNEL);
-	if (!cmd_buff)
-		return -ENOMEM;
-	dpsw_acl_prepare_entry_cfg(&acl_key, cmd_buff);
+	ether_addr_copy(acl_entry.key.match.l2_dest_mac, mac);
+	eth_broadcast_addr(acl_entry.key.mask.l2_dest_mac);
 
-	memset(&acl_entry_cfg, 0, sizeof(acl_entry_cfg));
-	acl_entry_cfg.precedence = 0;
-	acl_entry_cfg.result.action = DPSW_ACL_ACTION_REDIRECT_TO_CTRL_IF;
-	acl_entry_cfg.key_iova = dma_map_single(dev, cmd_buff,
-						DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE,
-						DMA_TO_DEVICE);
-	if (unlikely(dma_mapping_error(dev, acl_entry_cfg.key_iova))) {
-		netdev_err(netdev, "DMA mapping failed\n");
-		return -EFAULT;
-	}
+	/* Trap to CPU */
+	acl_entry.cfg.precedence = 0;
+	acl_entry.cfg.result.action = DPSW_ACL_ACTION_REDIRECT_TO_CTRL_IF;
 
-	err = dpsw_acl_add_entry(port_priv->ethsw_data->mc_io, 0,
-				 port_priv->ethsw_data->dpsw_handle,
-				 port_priv->acl_tbl->id, &acl_entry_cfg);
-
-	dma_unmap_single(dev, acl_entry_cfg.key_iova, sizeof(cmd_buff),
-			 DMA_TO_DEVICE);
-	if (err) {
-		netdev_err(netdev, "dpsw_acl_add_entry() failed %d\n", err);
-		return err;
-	}
-
-	return 0;
+	return dpaa2_switch_acl_entry_add(port_priv->acl_tbl, &acl_entry);
 }
 
 static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
index 8575eed02d15..bdef71f234cb 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
@@ -240,4 +240,7 @@ int dpaa2_switch_cls_matchall_replace(struct dpaa2_switch_acl_tbl *acl_tbl,
 
 int dpaa2_switch_cls_matchall_destroy(struct dpaa2_switch_acl_tbl *acl_tbl,
 				      struct tc_cls_matchall_offload *cls);
+
+int dpaa2_switch_acl_entry_add(struct dpaa2_switch_acl_tbl *acl_tbl,
+			       struct dpaa2_switch_acl_entry *entry);
 #endif	/* __ETHSW_H */
-- 
cgit v1.2.3


From 87b7e5c05c98c5f3947345d7fba68472e43cf611 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 13 Apr 2021 16:16:17 +0200
Subject: net: Space: remove hp100 probe

The driver was removed last year, but the static initialization got left
behind by accident.

Fixes: a10079c66290 ("staging: remove hp100 driver")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Space.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/Space.c b/drivers/net/Space.c
index 890c86e11bcc..df79e7370bcc 100644
--- a/drivers/net/Space.c
+++ b/drivers/net/Space.c
@@ -59,9 +59,6 @@ static int __init probe_list2(int unit, struct devprobe2 *p, int autoprobe)
  * look for EISA/PCI cards in addition to ISA cards).
  */
 static struct devprobe2 isa_probes[] __initdata = {
-#if defined(CONFIG_HP100) && defined(CONFIG_ISA)	/* ISA, EISA */
-	{hp100_probe, 0},
-#endif
 #ifdef CONFIG_3C515
 	{tc515_probe, 0},
 #endif
-- 
cgit v1.2.3


From 441e8c66b23e027c00ccebd70df9fd933918eefe Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Tue, 13 Apr 2021 11:16:06 +0200
Subject: bpf: Return target info when a tracing bpf_link is queried
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is currently no way to discover the target of a tracing program
attachment after the fact. Add this information to bpf_link_info and return
it when querying the bpf_link fd.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210413091607.58945-1-toke@redhat.com
---
 include/linux/bpf_verifier.h   | 9 +++++++++
 include/uapi/linux/bpf.h       | 2 ++
 kernel/bpf/syscall.c           | 3 +++
 tools/include/uapi/linux/bpf.h | 2 ++
 4 files changed, 16 insertions(+)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 51c2ffa3d901..6023a1367853 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -487,6 +487,15 @@ static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog,
 		return ((u64)btf_obj_id(btf) << 32) | 0x80000000 | btf_id;
 }
 
+/* unpack the IDs from the key as constructed above */
+static inline void bpf_trampoline_unpack_key(u64 key, u32 *obj_id, u32 *btf_id)
+{
+	if (obj_id)
+		*obj_id = key >> 32;
+	if (btf_id)
+		*btf_id = key & 0x7FFFFFFF;
+}
+
 int bpf_check_attach_target(struct bpf_verifier_log *log,
 			    const struct bpf_prog *prog,
 			    const struct bpf_prog *tgt_prog,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 85c924bc21b1..df164a44bb41 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5416,6 +5416,8 @@ struct bpf_link_info {
 		} raw_tracepoint;
 		struct {
 			__u32 attach_type;
+			__u32 target_obj_id; /* prog_id for PROG_EXT, otherwise btf object id */
+			__u32 target_btf_id; /* BTF type id inside the object */
 		} tracing;
 		struct {
 			__u64 cgroup_id;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 6428634da57e..fd495190115e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2551,6 +2551,9 @@ static int bpf_tracing_link_fill_link_info(const struct bpf_link *link,
 		container_of(link, struct bpf_tracing_link, link);
 
 	info->tracing.attach_type = tr_link->attach_type;
+	bpf_trampoline_unpack_key(tr_link->trampoline->key,
+				  &info->tracing.target_obj_id,
+				  &info->tracing.target_btf_id);
 
 	return 0;
 }
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 85c924bc21b1..df164a44bb41 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5416,6 +5416,8 @@ struct bpf_link_info {
 		} raw_tracepoint;
 		struct {
 			__u32 attach_type;
+			__u32 target_obj_id; /* prog_id for PROG_EXT, otherwise btf object id */
+			__u32 target_btf_id; /* BTF type id inside the object */
 		} tracing;
 		struct {
 			__u64 cgroup_id;
-- 
cgit v1.2.3


From 463c2149ede72b696c42b0d6c5a03c061600d04c Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Tue, 13 Apr 2021 11:16:07 +0200
Subject: selftests/bpf: Add tests for target information in bpf_link info
 queries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extend the fexit_bpf2bpf test to check that the info for the bpf_link
returned by the kernel matches the expected values.

While we're updating the test, change existing uses of CHEC() to use the
much easier to read ASSERT_*() macros.

v2:
- Convert last CHECK() call and get rid of 'duration' var
- Split ASSERT_OK_PTR() checks to two separate if statements

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210413091607.58945-2-toke@redhat.com
---
 .../selftests/bpf/prog_tests/fexit_bpf2bpf.c       | 58 ++++++++++++++++------
 1 file changed, 44 insertions(+), 14 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index 5c0448910426..63990842d20f 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -58,42 +58,73 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
 				      test_cb cb)
 {
 	struct bpf_object *obj = NULL, *tgt_obj;
+	__u32 retval, tgt_prog_id, info_len;
+	struct bpf_prog_info prog_info = {};
 	struct bpf_program **prog = NULL;
 	struct bpf_link **link = NULL;
-	__u32 duration = 0, retval;
 	int err, tgt_fd, i;
+	struct btf *btf;
 
 	err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
 			    &tgt_obj, &tgt_fd);
-	if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
-		  target_obj_file, err, errno))
+	if (!ASSERT_OK(err, "tgt_prog_load"))
 		return;
 	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
 			    .attach_prog_fd = tgt_fd,
 			   );
 
+	info_len = sizeof(prog_info);
+	err = bpf_obj_get_info_by_fd(tgt_fd, &prog_info, &info_len);
+	if (!ASSERT_OK(err, "tgt_fd_get_info"))
+		goto close_prog;
+
+	tgt_prog_id = prog_info.id;
+	btf = bpf_object__btf(tgt_obj);
+
 	link = calloc(sizeof(struct bpf_link *), prog_cnt);
+	if (!ASSERT_OK_PTR(link, "link_ptr"))
+		goto close_prog;
+
 	prog = calloc(sizeof(struct bpf_program *), prog_cnt);
-	if (CHECK(!link || !prog, "alloc_memory", "failed to alloc memory"))
+	if (!ASSERT_OK_PTR(prog, "prog_ptr"))
 		goto close_prog;
 
 	obj = bpf_object__open_file(obj_file, &opts);
-	if (CHECK(IS_ERR_OR_NULL(obj), "obj_open",
-		  "failed to open %s: %ld\n", obj_file,
-		  PTR_ERR(obj)))
+	if (!ASSERT_OK_PTR(obj, "obj_open"))
 		goto close_prog;
 
 	err = bpf_object__load(obj);
-	if (CHECK(err, "obj_load", "err %d\n", err))
+	if (!ASSERT_OK(err, "obj_load"))
 		goto close_prog;
 
 	for (i = 0; i < prog_cnt; i++) {
+		struct bpf_link_info link_info;
+		char *tgt_name;
+		__s32 btf_id;
+
+		tgt_name = strstr(prog_name[i], "/");
+		if (!ASSERT_OK_PTR(tgt_name, "tgt_name"))
+			goto close_prog;
+		btf_id = btf__find_by_name_kind(btf, tgt_name + 1, BTF_KIND_FUNC);
+
 		prog[i] = bpf_object__find_program_by_title(obj, prog_name[i]);
-		if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name[i]))
+		if (!ASSERT_OK_PTR(prog[i], prog_name[i]))
 			goto close_prog;
+
 		link[i] = bpf_program__attach_trace(prog[i]);
-		if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n"))
+		if (!ASSERT_OK_PTR(link[i], "attach_trace"))
 			goto close_prog;
+
+		info_len = sizeof(link_info);
+		memset(&link_info, 0, sizeof(link_info));
+		err = bpf_obj_get_info_by_fd(bpf_link__fd(link[i]),
+					     &link_info, &info_len);
+		ASSERT_OK(err, "link_fd_get_info");
+		ASSERT_EQ(link_info.tracing.attach_type,
+			  bpf_program__get_expected_attach_type(prog[i]),
+			  "link_attach_type");
+		ASSERT_EQ(link_info.tracing.target_obj_id, tgt_prog_id, "link_tgt_obj_id");
+		ASSERT_EQ(link_info.tracing.target_btf_id, btf_id, "link_tgt_btf_id");
 	}
 
 	if (cb) {
@@ -106,10 +137,9 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
 		goto close_prog;
 
 	err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6),
-				NULL, NULL, &retval, &duration);
-	CHECK(err || retval, "ipv6",
-	      "err %d errno %d retval %d duration %d\n",
-	      err, errno, retval, duration);
+				NULL, NULL, &retval, NULL);
+	ASSERT_OK(err, "prog_run");
+	ASSERT_EQ(retval, 0, "prog_run_ret");
 
 	if (check_data_map(obj, prog_cnt, false))
 		goto close_prog;
-- 
cgit v1.2.3


From 47222864c14bc10c7769378f7601e2a45bd52026 Mon Sep 17 00:00:00 2001
From: Jostar Yang <jostar_yang@accton.com>
Date: Thu, 12 Nov 2020 21:33:56 +0100
Subject: ixgbe: Support external GBE SerDes PHY BCM54616s

The Broadcom PHY is used in switches, so add the ID, and hook it up.

This upstreams the Linux kernel patch from the network operating system
SONiC from February 2020 [1].

[1]: https://github.com/Azure/sonic-linux-kernel/pull/122

Signed-off-by: Jostar Yang <jostar_yang@accton.com>
Signed-off-by: Guohan Lu <lguohan@gmail.com>
Signed-off-by: Paul Menzel <pmenzel@molgen.mpg.de>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c  | 3 +++
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 1 +
 2 files changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
index 73bc170d1ae9..24aa97f993ca 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
@@ -380,6 +380,9 @@ static enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id)
 	case X557_PHY_ID2:
 		phy_type = ixgbe_phy_x550em_ext_t;
 		break;
+	case BCM54616S_E_PHY_ID:
+		phy_type = ixgbe_phy_ext_1g_t;
+		break;
 	default:
 		phy_type = ixgbe_phy_unknown;
 		break;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 2be1c4c72435..4c317b0dbfd4 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -1407,6 +1407,7 @@ struct ixgbe_nvm_version {
 #define QT2022_PHY_ID    0x0043A400
 #define ATH_PHY_ID       0x03429050
 #define AQ_FW_REV        0x20
+#define BCM54616S_E_PHY_ID 0x03625D10
 
 /* Special PHY Init Routine */
 #define IXGBE_PHY_INIT_OFFSET_NL 0x002B
-- 
cgit v1.2.3


From 7eceea90c5427c8a4deed38e67c1250c31df8363 Mon Sep 17 00:00:00 2001
From: Chen Lin <chen.lin5@zte.com.cn>
Date: Mon, 15 Feb 2021 20:04:58 +0800
Subject: net: intel: Remove unused function pointer typedef ixgbe_mc_addr_itr

Remove the 'ixgbe_mc_addr_itr' typedef as it is not used.

Signed-off-by: Chen Lin <chen.lin5@zte.com.cn>
Tested-by: Dave Switzer <david.switzer@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 4 ----
 drivers/net/ethernet/intel/ixgbevf/vf.h       | 3 ---
 2 files changed, 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 4c317b0dbfd4..2647937f7f4d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -3384,10 +3384,6 @@ struct ixgbe_hw_stats {
 /* forward declaration */
 struct ixgbe_hw;
 
-/* iterator type for walking multicast address lists */
-typedef u8* (*ixgbe_mc_addr_itr) (struct ixgbe_hw *hw, u8 **mc_addr_ptr,
-				  u32 *vmdq);
-
 /* Function pointer table */
 struct ixgbe_eeprom_operations {
 	s32 (*init_params)(struct ixgbe_hw *);
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.h b/drivers/net/ethernet/intel/ixgbevf/vf.h
index d1e9e306653b..1d8209df4162 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.h
@@ -16,9 +16,6 @@
 
 struct ixgbe_hw;
 
-/* iterator type for walking multicast address lists */
-typedef u8* (*ixgbe_mc_addr_itr) (struct ixgbe_hw *hw, u8 **mc_addr_ptr,
-				  u32 *vmdq);
 struct ixgbe_mac_operations {
 	s32 (*init_hw)(struct ixgbe_hw *);
 	s32 (*reset_hw)(struct ixgbe_hw *);
-- 
cgit v1.2.3


From ce2cb12dccab57179fc97d2c89f872333ff369f0 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Wed, 17 Mar 2021 15:30:01 +0530
Subject: net: ethernet: intel: Fix a typo in the file ixgbe_dcb_nl.c

s/Reprogam/Reprogram/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
index c00332d2e02a..72e6ebffea33 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
@@ -361,7 +361,7 @@ static u8 ixgbe_dcbnl_set_all(struct net_device *netdev)
 	}
 
 #ifdef IXGBE_FCOE
-	/* Reprogam FCoE hardware offloads when the traffic class
+	/* Reprogram FCoE hardware offloads when the traffic class
 	 * FCoE is using changes. This happens if the APP info
 	 * changes or the up2tc mapping is updated.
 	 */
-- 
cgit v1.2.3


From e2b1e4b532abdd39bfb7313146153815e370d60c Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Tue, 13 Apr 2021 20:42:42 +0900
Subject: can: etas_es58x: fix null pointer dereference when handling error
 frames

During the handling of CAN bus errors, a CAN error SKB is allocated
using alloc_can_err_skb(). Even if the allocation of the SKB fails,
the function continues in order to do the stats handling.

All access to the can_frame pointer (cf) should be guarded by an if
statement:
	if (cf)

However, the increment of the rx_bytes stats:
	netdev->stats.rx_bytes += cf->can_dlc;
dereferences the cf pointer and was not guarded by an if condition
leading to a NULL pointer dereference if the can_err_skb() function
failed.

Replacing the cf->can_dlc by the macro CAN_ERR_DLC (which is the
length of any CAN error frames) solves this NULL pointer dereference.

Fixes: 8537257874e9 ("can: etas_es58x: add core support for ETAS ES58X CAN USB interfaces")
Link: https://lore.kernel.org/r/20210413114242.2760-1-mailhol.vincent@wanadoo.fr
Reported-by: Arunachalam Santhanam <arunachalam.santhanam@in.bosch.com>
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/etas_es58x/es58x_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.c b/drivers/net/can/usb/etas_es58x/es58x_core.c
index 7222b3b6ca46..57e5f94468e9 100644
--- a/drivers/net/can/usb/etas_es58x/es58x_core.c
+++ b/drivers/net/can/usb/etas_es58x/es58x_core.c
@@ -856,7 +856,7 @@ int es58x_rx_err_msg(struct net_device *netdev, enum es58x_err error,
 	 * consistency.
 	 */
 	netdev->stats.rx_packets++;
-	netdev->stats.rx_bytes += cf->can_dlc;
+	netdev->stats.rx_bytes += CAN_ERR_DLC;
 
 	if (cf) {
 		if (cf->data[1])
-- 
cgit v1.2.3


From 7686fd52b8757aaedb4cf3b38bf02d1911674545 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Tue, 30 Mar 2021 16:24:49 +0300
Subject: iwlwifi: mvm: enable TX on new CSA channel before disconnecting

When moving to the new channel, we block TX until we hear the
first beacon. if it is not heard, we proceed to disconnect.
Since TX is blocked (without mac80211 being aware of it) the frame
is stuck, resulting with queue hang.

Instead, reenable TX before reporting on the connection loss.
As we are on the new channel, there is no problem with that,
even if the original CSA had quiet mode.

Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210330162204.eb4f2ff1b863.Ib16238106b33d58b2b7688dc6297018b915ecef4@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/time-event.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
index 0b012f8c9eb2..1418a6438635 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
@@ -151,6 +151,16 @@ static bool iwl_mvm_te_check_disconnect(struct iwl_mvm *mvm,
 	if (errmsg)
 		IWL_ERR(mvm, "%s\n", errmsg);
 
+	if (mvmvif->csa_bcn_pending) {
+		struct iwl_mvm_sta *mvmsta;
+
+		rcu_read_lock();
+		mvmsta = iwl_mvm_sta_from_staid_rcu(mvm, mvmvif->ap_sta_id);
+		if (!WARN_ON(!mvmsta))
+			iwl_mvm_sta_modify_disable_tx(mvm, mvmsta, false);
+		rcu_read_unlock();
+	}
+
 	iwl_mvm_connection_loss(mvm, vif, errmsg);
 	return true;
 }
-- 
cgit v1.2.3


From 72bc934cb393d9aa0a3a73026c020da36e817fa1 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 30 Mar 2021 16:24:50 +0300
Subject: iwlwifi: pcie: avoid unnecessarily taking spinlock

Most devices don't set the apmg_wake_up_wa flag, so we don't do
anything for them. Avoid taking the spinlock for every command
unless the device needs this workaround.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210330162204.1ab60af3f318.I51cc202f68a2a953223e70c3e8610343412961b6@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 54 +++++++++++++---------------
 1 file changed, 24 insertions(+), 30 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
index 7ae32491b5da..5ce61fb469cd 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
@@ -181,16 +181,20 @@ static void iwl_pcie_clear_cmd_in_flight(struct iwl_trans *trans)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 
-	lockdep_assert_held(&trans_pcie->reg_lock);
-
 	if (!trans->trans_cfg->base_params->apmg_wake_up_wa)
 		return;
-	if (WARN_ON(!trans_pcie->cmd_hold_nic_awake))
+
+	spin_lock(&trans_pcie->reg_lock);
+
+	if (WARN_ON(!trans_pcie->cmd_hold_nic_awake)) {
+		spin_unlock(&trans_pcie->reg_lock);
 		return;
+	}
 
 	trans_pcie->cmd_hold_nic_awake = false;
 	__iwl_trans_pcie_clear_bit(trans, CSR_GP_CNTRL,
 				   CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
+	spin_unlock(&trans_pcie->reg_lock);
 }
 
 /*
@@ -198,7 +202,6 @@ static void iwl_pcie_clear_cmd_in_flight(struct iwl_trans *trans)
  */
 static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id)
 {
-	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwl_txq *txq = trans->txqs.txq[txq_id];
 
 	if (!txq) {
@@ -222,12 +225,9 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id)
 		iwl_txq_free_tfd(trans, txq);
 		txq->read_ptr = iwl_txq_inc_wrap(trans, txq->read_ptr);
 
-		if (txq->read_ptr == txq->write_ptr) {
-			spin_lock(&trans_pcie->reg_lock);
-			if (txq_id == trans->txqs.cmd.q_id)
-				iwl_pcie_clear_cmd_in_flight(trans);
-			spin_unlock(&trans_pcie->reg_lock);
-		}
+		if (txq->read_ptr == txq->write_ptr &&
+		    txq_id == trans->txqs.cmd.q_id)
+			iwl_pcie_clear_cmd_in_flight(trans);
 	}
 
 	while (!skb_queue_empty(&txq->overflow_q)) {
@@ -629,22 +629,23 @@ static int iwl_pcie_set_cmd_in_flight(struct iwl_trans *trans,
 				      const struct iwl_host_cmd *cmd)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-	int ret;
-
-	lockdep_assert_held(&trans_pcie->reg_lock);
+	int ret = 0;
 
 	/* Make sure the NIC is still alive in the bus */
 	if (test_bit(STATUS_TRANS_DEAD, &trans->status))
 		return -ENODEV;
 
+	if (!trans->trans_cfg->base_params->apmg_wake_up_wa)
+		return 0;
+
+	spin_lock(&trans_pcie->reg_lock);
 	/*
 	 * wake up the NIC to make sure that the firmware will see the host
 	 * command - we will let the NIC sleep once all the host commands
 	 * returned. This needs to be done only on NICs that have
-	 * apmg_wake_up_wa set.
+	 * apmg_wake_up_wa set (see above.)
 	 */
-	if (trans->trans_cfg->base_params->apmg_wake_up_wa &&
-	    !trans_pcie->cmd_hold_nic_awake) {
+	if (!trans_pcie->cmd_hold_nic_awake) {
 		__iwl_trans_pcie_set_bit(trans, CSR_GP_CNTRL,
 					 CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
 
@@ -657,12 +658,14 @@ static int iwl_pcie_set_cmd_in_flight(struct iwl_trans *trans,
 			__iwl_trans_pcie_clear_bit(trans, CSR_GP_CNTRL,
 					CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
 			IWL_ERR(trans, "Failed to wake NIC for hcmd\n");
-			return -EIO;
+			ret = -EIO;
+		} else {
+			trans_pcie->cmd_hold_nic_awake = true;
 		}
-		trans_pcie->cmd_hold_nic_awake = true;
 	}
+	spin_unlock(&trans_pcie->reg_lock);
 
-	return 0;
+	return ret;
 }
 
 /*
@@ -674,7 +677,6 @@ static int iwl_pcie_set_cmd_in_flight(struct iwl_trans *trans,
  */
 static void iwl_pcie_cmdq_reclaim(struct iwl_trans *trans, int txq_id, int idx)
 {
-	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwl_txq *txq = trans->txqs.txq[txq_id];
 	int nfreed = 0;
 	u16 r;
@@ -705,12 +707,8 @@ static void iwl_pcie_cmdq_reclaim(struct iwl_trans *trans, int txq_id, int idx)
 		}
 	}
 
-	if (txq->read_ptr == txq->write_ptr) {
-		/* BHs are also disabled due to txq->lock */
-		spin_lock(&trans_pcie->reg_lock);
+	if (txq->read_ptr == txq->write_ptr)
 		iwl_pcie_clear_cmd_in_flight(trans);
-		spin_unlock(&trans_pcie->reg_lock);
-	}
 
 	iwl_txq_progress(txq);
 }
@@ -914,7 +912,6 @@ void iwl_trans_pcie_txq_disable(struct iwl_trans *trans, int txq_id,
 int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 			  struct iwl_host_cmd *cmd)
 {
-	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwl_txq *txq = trans->txqs.txq[trans->txqs.cmd.q_id];
 	struct iwl_device_cmd *out_cmd;
 	struct iwl_cmd_meta *out_meta;
@@ -1161,19 +1158,16 @@ int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 	if (txq->read_ptr == txq->write_ptr && txq->wd_timeout)
 		mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout);
 
-	spin_lock(&trans_pcie->reg_lock);
 	ret = iwl_pcie_set_cmd_in_flight(trans, cmd);
 	if (ret < 0) {
 		idx = ret;
-		goto unlock_reg;
+		goto out;
 	}
 
 	/* Increment and update queue's write index */
 	txq->write_ptr = iwl_txq_inc_wrap(trans, txq->write_ptr);
 	iwl_pcie_txq_inc_wr_ptr(trans, txq);
 
- unlock_reg:
-	spin_unlock(&trans_pcie->reg_lock);
  out:
 	spin_unlock_irqrestore(&txq->lock, flags);
  free_dup_buf:
-- 
cgit v1.2.3


From 416dde0f83a87f35fa8ca44e0837a55903b46ffe Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 30 Mar 2021 16:24:51 +0300
Subject: iwlwifi: pcie: normally grab NIC access for inflight-hcmd

We currently have a special, separate, code path to acquire NIC
access for the in-flight host-command workaround on 7000 series
hardware. However, the normal code path here has grown a number
of additional workarounds/semantics over time, such as reprobing
the device if things fail.

Rather than try to replicate any of this logic, call the normal
grab_nic_access logic for the workaround.

This changes the spinlock to _bh, but that's OK since it's just
redundant, we already have soft-IRQs disabled when we get here,
and so didn't (have to) do it again. Since it's only for commands
there's however no point in making the code more complex just to
not use _bh here.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210330162204.d196fc6ffb23.Idc1ce3ce9fed9178beee7e5409bc669f79b06a0d@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 33 ++++++++++------------------
 1 file changed, 11 insertions(+), 22 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
index 5ce61fb469cd..b07194b74d97 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
@@ -629,7 +629,6 @@ static int iwl_pcie_set_cmd_in_flight(struct iwl_trans *trans,
 				      const struct iwl_host_cmd *cmd)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-	int ret = 0;
 
 	/* Make sure the NIC is still alive in the bus */
 	if (test_bit(STATUS_TRANS_DEAD, &trans->status))
@@ -638,34 +637,24 @@ static int iwl_pcie_set_cmd_in_flight(struct iwl_trans *trans,
 	if (!trans->trans_cfg->base_params->apmg_wake_up_wa)
 		return 0;
 
-	spin_lock(&trans_pcie->reg_lock);
 	/*
 	 * wake up the NIC to make sure that the firmware will see the host
 	 * command - we will let the NIC sleep once all the host commands
 	 * returned. This needs to be done only on NICs that have
 	 * apmg_wake_up_wa set (see above.)
 	 */
-	if (!trans_pcie->cmd_hold_nic_awake) {
-		__iwl_trans_pcie_set_bit(trans, CSR_GP_CNTRL,
-					 CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
-
-		ret = iwl_poll_bit(trans, CSR_GP_CNTRL,
-				   CSR_GP_CNTRL_REG_VAL_MAC_ACCESS_EN,
-				   (CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY |
-				    CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP),
-				   15000);
-		if (ret < 0) {
-			__iwl_trans_pcie_clear_bit(trans, CSR_GP_CNTRL,
-					CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
-			IWL_ERR(trans, "Failed to wake NIC for hcmd\n");
-			ret = -EIO;
-		} else {
-			trans_pcie->cmd_hold_nic_awake = true;
-		}
-	}
-	spin_unlock(&trans_pcie->reg_lock);
+	if (!iwl_trans_grab_nic_access(trans))
+		return -EIO;
 
-	return ret;
+	/*
+	 * In iwl_trans_grab_nic_access(), we've acquired the reg_lock.
+	 * There, we also returned immediately if cmd_hold_nic_awake is
+	 * already true, so it's OK to unconditionally set it to true.
+	 */
+	trans_pcie->cmd_hold_nic_awake = true;
+	spin_unlock_bh(&trans_pcie->reg_lock);
+
+	return 0;
 }
 
 /*
-- 
cgit v1.2.3


From 2360acbd5e22d95d1a8fcd096478198c572d9b85 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Tue, 30 Mar 2021 16:24:52 +0300
Subject: iwlwifi: mvm: don't allow CSA if we haven't been fully associated

"Fully associated" means that we heard a beacon with the DTIM
information and the firmware is configured to track the beacons.
Since the firmware needs to track the beacons for the CSA, we
can't configure the firmware for CSA before it knows when the
beacons are expected otherwise we'd get ASSERT 301D.
If we are required to start CSA before we told the firmware
when the beacons are expected to arrive, just report a
failure and let mac80211 disconnect.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210330162204.9adaedeb59e4.Idaad6aaf3f9759d023b8e00b10064915c0db9aa3@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index baf7404c137d..9ed6e2783cc4 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -4610,6 +4610,16 @@ static int iwl_mvm_pre_channel_switch(struct ieee80211_hw *hw,
 
 		break;
 	case NL80211_IFTYPE_STATION:
+		/*
+		 * We haven't configured the firmware to be associated yet since
+		 * we don't know the dtim period. In this case, the firmware can't
+		 * track the beacons.
+		 */
+		if (!vif->bss_conf.assoc || !vif->bss_conf.dtim_period) {
+			ret = -EBUSY;
+			goto out_unlock;
+		}
+
 		if (chsw->delay > IWL_MAX_CSA_BLOCK_TX)
 			schedule_delayed_work(&mvmvif->csa_work, 0);
 
-- 
cgit v1.2.3


From 2be05dfd9c3f86c66ebcfd7b2a01d07c7e0158dd Mon Sep 17 00:00:00 2001
From: Matti Gottlieb <matti.gottlieb@intel.com>
Date: Tue, 30 Mar 2021 16:24:53 +0300
Subject: iwlwifi: pcie: Add support for Bz Family

Add support for different combinations of Bz
and CRFs.

Note: As of now we do not know the exact values
for ltr_delay and xtal_latency, so for now use the
worst case scenario values until the actual values
are clarified.

Signed-off-by: Matti Gottlieb <matti.gottlieb@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210330162204.caac8d996532.I6a22d6decb106cd50d7954b19236b69d685dcc39@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/cfg/22000.c  | 58 +++++++++++++++++++++++++
 drivers/net/wireless/intel/iwlwifi/iwl-config.h |  6 +++
 drivers/net/wireless/intel/iwlwifi/pcie/drv.c   | 24 ++++++++++
 3 files changed, 88 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index 0a0e25a3c681..d4f3eee81db7 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -48,6 +48,10 @@
 #define IWL_MA_A_GF4_A_FW_PRE		"iwlwifi-ma-a0-gf4-a0-"
 #define IWL_MA_A_MR_A_FW_PRE		"iwlwifi-ma-a0-mr-a0-"
 #define IWL_SNJ_A_MR_A_FW_PRE		"iwlwifi-SoSnj-a0-mr-a0-"
+#define IWL_BZ_A_HR_B_FW_PRE		"iwlwifi-bz-a0-hr-b0-"
+#define IWL_BZ_A_GF_A_FW_PRE		"iwlwifi-bz-a0-gf-a0-"
+#define IWL_BZ_A_GF4_A_FW_PRE		"iwlwifi-bz-a0-gf4-a0-"
+#define IWL_BZ_A_MR_A_FW_PRE		"iwlwifi-bz-a0-mr-a0-"
 
 #define IWL_QU_B_HR_B_MODULE_FIRMWARE(api) \
 	IWL_QU_B_HR_B_FW_PRE __stringify(api) ".ucode"
@@ -91,6 +95,14 @@
 	IWL_MA_A_MR_A_FW_PRE __stringify(api) ".ucode"
 #define IWL_SNJ_A_MR_A_MODULE_FIRMWARE(api) \
 	IWL_SNJ_A_MR_A_FW_PRE __stringify(api) ".ucode"
+#define IWL_BZ_A_HR_B_MODULE_FIRMWARE(api) \
+	IWL_BZ_A_HR_B_FW_PRE __stringify(api) ".ucode"
+#define IWL_BZ_A_GF_A_MODULE_FIRMWARE(api) \
+	IWL_BZ_A_GF_A_FW_PRE __stringify(api) ".ucode"
+#define IWL_BZ_A_GF4_A_MODULE_FIRMWARE(api) \
+	IWL_BZ_A_GF4_A_FW_PRE __stringify(api) ".ucode"
+#define IWL_BZ_A_MR_A_MODULE_FIRMWARE(api) \
+	IWL_BZ_A_MR_A_FW_PRE __stringify(api) ".ucode"
 
 static const struct iwl_base_params iwl_22000_base_params = {
 	.eeprom_size = OTP_LOW_IMAGE_SIZE_32K,
@@ -357,6 +369,20 @@ const struct iwl_cfg_trans_params iwl_ma_trans_cfg = {
 	.umac_prph_offset = 0x300000
 };
 
+const struct iwl_cfg_trans_params iwl_bz_trans_cfg = {
+	.device_family = IWL_DEVICE_FAMILY_AX210,
+	.base_params = &iwl_ax210_base_params,
+	.mq_rx_supported = true,
+	.use_tfh = true,
+	.rf_id = true,
+	.gen2 = true,
+	.integrated = true,
+	.umac_prph_offset = 0x300000,
+	.xtal_latency = 12000,
+	.low_latency_xtal = true,
+	.ltr_delay = IWL_CFG_TRANS_LTR_DELAY_2500US,
+};
+
 const char iwl_ax101_name[] = "Intel(R) Wi-Fi 6 AX101";
 const char iwl_ax200_name[] = "Intel(R) Wi-Fi 6 AX200 160MHz";
 const char iwl_ax201_name[] = "Intel(R) Wi-Fi 6 AX201 160MHz";
@@ -719,6 +745,34 @@ const struct iwl_cfg iwl_cfg_quz_a0_hr_b0 = {
 	.num_rbds = IWL_NUM_RBDS_22000_HE,
 };
 
+const struct iwl_cfg iwl_cfg_bz_a0_hr_b0 = {
+	.fw_name_pre = IWL_BZ_A_HR_B_FW_PRE,
+	.uhb_supported = true,
+	IWL_DEVICE_AX210,
+	.num_rbds = IWL_NUM_RBDS_AX210_HE,
+};
+
+const struct iwl_cfg iwl_cfg_bz_a0_gf_a0 = {
+	.fw_name_pre = IWL_BZ_A_GF_A_FW_PRE,
+	.uhb_supported = true,
+	IWL_DEVICE_AX210,
+	.num_rbds = IWL_NUM_RBDS_AX210_HE,
+};
+
+const struct iwl_cfg iwl_cfg_bz_a0_gf4_a0 = {
+	.fw_name_pre = IWL_BZ_A_GF4_A_FW_PRE,
+	.uhb_supported = true,
+	IWL_DEVICE_AX210,
+	.num_rbds = IWL_NUM_RBDS_AX210_HE,
+};
+
+const struct iwl_cfg iwl_cfg_bz_a0_mr_a0 = {
+	.fw_name_pre = IWL_BZ_A_MR_A_FW_PRE,
+	.uhb_supported = true,
+	IWL_DEVICE_AX210,
+	.num_rbds = IWL_NUM_RBDS_AX210_HE,
+};
+
 MODULE_FIRMWARE(IWL_QU_B_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_QNJ_B_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_QU_C_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
@@ -740,3 +794,7 @@ MODULE_FIRMWARE(IWL_MA_A_GF_A_FW_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_MA_A_GF4_A_FW_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_MA_A_MR_A_FW_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_SNJ_A_MR_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
+MODULE_FIRMWARE(IWL_BZ_A_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
+MODULE_FIRMWARE(IWL_BZ_A_GF_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
+MODULE_FIRMWARE(IWL_BZ_A_GF4_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
+MODULE_FIRMWARE(IWL_BZ_A_MR_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
index c4f5da76f1c0..98335325e6b4 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
@@ -416,6 +416,7 @@ struct iwl_cfg {
 #define IWL_CFG_MAC_TYPE_SNJ		0x42
 #define IWL_CFG_MAC_TYPE_SOF		0x43
 #define IWL_CFG_MAC_TYPE_MA		0x44
+#define IWL_CFG_MAC_TYPE_BZ		0x46
 
 #define IWL_CFG_RF_TYPE_TH		0x105
 #define IWL_CFG_RF_TYPE_TH1		0x108
@@ -477,6 +478,7 @@ extern const struct iwl_cfg_trans_params iwl_snj_trans_cfg;
 extern const struct iwl_cfg_trans_params iwl_so_trans_cfg;
 extern const struct iwl_cfg_trans_params iwl_so_long_latency_trans_cfg;
 extern const struct iwl_cfg_trans_params iwl_ma_trans_cfg;
+extern const struct iwl_cfg_trans_params iwl_bz_trans_cfg;
 extern const char iwl9162_name[];
 extern const char iwl9260_name[];
 extern const char iwl9260_1_name[];
@@ -612,6 +614,10 @@ extern const struct iwl_cfg iwl_cfg_ma_a0_mr_a0;
 extern const struct iwl_cfg iwl_cfg_snj_a0_mr_a0;
 extern const struct iwl_cfg iwl_cfg_so_a0_hr_a0;
 extern const struct iwl_cfg iwl_cfg_quz_a0_hr_b0;
+extern const struct iwl_cfg iwl_cfg_bz_a0_hr_b0;
+extern const struct iwl_cfg iwl_cfg_bz_a0_gf_a0;
+extern const struct iwl_cfg iwl_cfg_bz_a0_gf4_a0;
+extern const struct iwl_cfg iwl_cfg_bz_a0_mr_a0;
 #endif /* CONFIG_IWLMVM */
 
 #endif /* __IWL_CONFIG_H__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index 558a0b2ef0fc..d7d91457f421 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -490,6 +490,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 	{IWL_PCI_DEVICE(0x2729, PCI_ANY_ID, iwl_ma_trans_cfg)},
 	{IWL_PCI_DEVICE(0x7E40, PCI_ANY_ID, iwl_ma_trans_cfg)},
 
+/* Bz devices */
+	{IWL_PCI_DEVICE(0x2727, PCI_ANY_ID, iwl_bz_trans_cfg)},
 #endif /* CONFIG_IWLMVM */
 
 	{0}
@@ -1067,6 +1069,28 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
 		      IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_NO_CDB,
 		      iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_name),
 
+/* Bz */
+	_IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
+		      IWL_CFG_MAC_TYPE_BZ, IWL_CFG_ANY,
+		      IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY,
+		      IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB,
+		      iwl_cfg_bz_a0_hr_b0, iwl_ax201_name),
+	_IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
+		      IWL_CFG_MAC_TYPE_BZ, IWL_CFG_ANY,
+		      IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY,
+		      IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB,
+		      iwl_cfg_bz_a0_gf_a0, iwl_ax211_name),
+	_IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
+		      IWL_CFG_MAC_TYPE_BZ, IWL_CFG_ANY,
+		      IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY,
+		      IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_CDB,
+		      iwl_cfg_bz_a0_gf4_a0, iwl_ax211_name),
+	_IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
+		      IWL_CFG_MAC_TYPE_BZ, IWL_CFG_ANY,
+		      IWL_CFG_RF_TYPE_MR, IWL_CFG_ANY,
+		      IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB,
+		      iwl_cfg_bz_a0_mr_a0, iwl_ma_name),
+
 #endif /* CONFIG_IWLMVM */
 };
 
-- 
cgit v1.2.3


From 39ab22c127aabc0634ea6fa268ffb9b938d31d9d Mon Sep 17 00:00:00 2001
From: ybaruch <yaara.baruch@intel.com>
Date: Tue, 30 Mar 2021 16:24:54 +0300
Subject: iwlwifi: change step in so-gf struct

change the step of iwlax210_2ax_cfg_so_jf_a0 to
iwlax210_2ax_cfg_so_jf_b0 as it is on the wcd_fw-dev
repository.

Signed-off-by: ybaruch <yaara.baruch@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210330162204.e9a9d1da76bc.Ie964f37872bbb88d1a02094134f9a2c38faad884@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/cfg/22000.c  | 2 +-
 drivers/net/wireless/intel/iwlwifi/iwl-config.h | 2 +-
 drivers/net/wireless/intel/iwlwifi/pcie/drv.c   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index d4f3eee81db7..8f98edcff572 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -604,7 +604,7 @@ const struct iwl_cfg iwl_qnj_b0_hr_b0_cfg = {
 	.num_rbds = IWL_NUM_RBDS_22000_HE,
 };
 
-const struct iwl_cfg iwlax210_2ax_cfg_so_jf_a0 = {
+const struct iwl_cfg iwlax210_2ax_cfg_so_jf_b0 = {
 	.name = "Intel(R) Wireless-AC 9560 160MHz",
 	.fw_name_pre = IWL_SO_A_JF_B_FW_PRE,
 	IWL_DEVICE_AX210,
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
index 98335325e6b4..f041f7f2160b 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
@@ -596,7 +596,7 @@ extern const struct iwl_cfg killer1650i_2ax_cfg_qu_c0_hr_b0;
 extern const struct iwl_cfg killer1650x_2ax_cfg;
 extern const struct iwl_cfg killer1650w_2ax_cfg;
 extern const struct iwl_cfg iwl_qnj_b0_hr_b0_cfg;
-extern const struct iwl_cfg iwlax210_2ax_cfg_so_jf_a0;
+extern const struct iwl_cfg iwlax210_2ax_cfg_so_jf_b0;
 extern const struct iwl_cfg iwlax210_2ax_cfg_so_hr_a0;
 extern const struct iwl_cfg iwlax211_2ax_cfg_so_gf_a0;
 extern const struct iwl_cfg iwlax211_2ax_cfg_so_gf_a0_long;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index d7d91457f421..7ae089234503 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -1189,7 +1189,7 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 			iwl_trans->cfg = &iwlax210_2ax_cfg_ty_gf_a0;
 		} else if (CSR_HW_RF_ID_TYPE_CHIP_ID(iwl_trans->hw_rf_id) ==
 			   CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_JF)) {
-			iwl_trans->cfg = &iwlax210_2ax_cfg_so_jf_a0;
+			iwl_trans->cfg = &iwlax210_2ax_cfg_so_jf_b0;
 		} else if (CSR_HW_RF_ID_TYPE_CHIP_ID(iwl_trans->hw_rf_id) ==
 			   CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_GF)) {
 			iwl_trans->cfg = &iwlax211_2ax_cfg_so_gf_a0;
-- 
cgit v1.2.3


From 20d04296b3cf1be81d5399a0ca12a38c8623dfe9 Mon Sep 17 00:00:00 2001
From: ybaruch <yaara.baruch@intel.com>
Date: Tue, 30 Mar 2021 16:24:55 +0300
Subject: iwlwifi: change name to AX 211 and 411 family

change the name of the ax211 and ax411.

Signed-off-by: ybaruch <yaara.baruch@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210330162204.fc3218805052.I203f1a802338f59955bd511c90217f63b918390b@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/cfg/22000.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index 8f98edcff572..bea3d5f3014a 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -387,8 +387,8 @@ const char iwl_ax101_name[] = "Intel(R) Wi-Fi 6 AX101";
 const char iwl_ax200_name[] = "Intel(R) Wi-Fi 6 AX200 160MHz";
 const char iwl_ax201_name[] = "Intel(R) Wi-Fi 6 AX201 160MHz";
 const char iwl_ax203_name[] = "Intel(R) Wi-Fi 6 AX203";
-const char iwl_ax211_name[] = "Intel(R) Wi-Fi 6 AX211 160MHz";
-const char iwl_ax411_name[] = "Intel(R) Wi-Fi 6 AX411 160MHz";
+const char iwl_ax211_name[] = "Intel(R) Wi-Fi 6E AX211 160MHz";
+const char iwl_ax411_name[] = "Intel(R) Wi-Fi 6E AX411 160MHz";
 const char iwl_ma_name[] = "Intel(R) Wi-Fi 6";
 
 const char iwl_ax200_killer_1650w_name[] =
-- 
cgit v1.2.3


From a7ff1899a7bb97f2e396100c1be373d17a0ae836 Mon Sep 17 00:00:00 2001
From: ybaruch <yaara.baruch@intel.com>
Date: Tue, 30 Mar 2021 16:24:56 +0300
Subject: iwlwifi: add 160Mhz to killer 1550 name

change the name of 1550 killer device to 160Mhz.

Signed-off-by: ybaruch <yaara.baruch@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210330162204.b87618a26ff8.Icf1d9c150ec108f30ce0e72c18b9350da6ae5087@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/cfg/9000.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c
index c4164bf508e5..df1297358379 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c
@@ -168,7 +168,7 @@ const char iwl9462_160_name[] = "Intel(R) Wireless-AC 9462 160MHz";
 const char iwl9560_160_name[] = "Intel(R) Wireless-AC 9560 160MHz";
 
 const char iwl9260_killer_1550_name[] =
-	"Killer (R) Wireless-AC 1550 Wireless Network Adapter (9260NGW)";
+	"Killer (R) Wireless-AC 1550 Wireless Network Adapter (9260NGW) 160MHz";
 const char iwl9560_killer_1550i_name[] =
 	"Killer (R) Wireless-AC 1550i Wireless Network Adapter (9560NGW)";
 const char iwl9560_killer_1550s_name[] =
-- 
cgit v1.2.3


From d4626f91739b76633dbb3013e843e178c4a26618 Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Tue, 30 Mar 2021 16:24:57 +0300
Subject: iwlwifi: pcie: clear only FH bits handle in the interrupt

For simplicity we assume that msix has 2 IRQ lines one used for rx data
called msix_non_share, and another used for one bit flags messages
(alive, hw error, sw error, rx data flag) called msix_share.

Every time the FW has data to send it puts it on the RX queue and HW
turns on the flags in msix_share (inta_fw) indicating about rx data,
and HW sends an interrupt a bit later to the msix_non_share _unless_
the msix_shared RX data bit was cleared.

Currently in the code every time we get an msix_shared we clear all bits
including rx data queue bits.

So we can have a race

----------------------------------------------------
DRIVER		       |   HW          	     |   FW
----------------------------------------------------
- send host cmd to FW  |		     |
		       |		     | - handle message
		       |		     |   and put a response
		       |		     |   on the RX queue
		       | - RX flag on        |
		       |	     	     | - send alive msix
		       | - alive flag on     |
		       | - interrupt         |
		       |   msix_share driver |
- handle msix_shared   |		     |
  and clear all flags  |		     |
  bits		       |		     |
		       | - don't send an     |
		       |   interrupt on	     |
		       |   msix_non_shared   |
		       |   (driver cleared)  |
- driver timeout on    |		     |
  waiting for host cmd |		     |
  respond	       |		     |
		       |		     |
----------------------------------------------------

The change is to clear only the msi_shared flags that are handled in
the msix_shared flow, which will cause the hardware to send an interrupt
on the msix_non_share line as well, when it has data.

Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210330162204.a1cdda2fa270.I02a82312679f4541f30bb8db8747a797dbb70ee7@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/iwl-csr.h | 3 +++
 drivers/net/wireless/intel/iwlwifi/pcie/rx.c | 9 ++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
index 6ccde7e30211..db312abd2e09 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
@@ -578,6 +578,9 @@ enum msix_fh_int_causes {
 	MSIX_FH_INT_CAUSES_FH_ERR		= BIT(21),
 };
 
+/* The low 16 bits are for rx data queue indication */
+#define MSIX_FH_INT_CAUSES_DATA_QUEUE 0xffff
+
 /*
  * Causes for the HW register interrupts
  */
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
index 2bec97133119..0cbc79949982 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
@@ -2194,9 +2194,16 @@ irqreturn_t iwl_pcie_irq_msix_handler(int irq, void *dev_id)
 	struct iwl_trans_pcie *trans_pcie = iwl_pcie_get_trans_pcie(entry);
 	struct iwl_trans *trans = trans_pcie->trans;
 	struct isr_statistics *isr_stats = &trans_pcie->isr_stats;
+	u32 inta_fh_msk = ~MSIX_FH_INT_CAUSES_DATA_QUEUE;
 	u32 inta_fh, inta_hw;
 	bool polling = false;
 
+	if (trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_NON_RX)
+		inta_fh_msk |= MSIX_FH_INT_CAUSES_Q0;
+
+	if (trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_FIRST_RSS)
+		inta_fh_msk |= MSIX_FH_INT_CAUSES_Q1;
+
 	lock_map_acquire(&trans->sync_cmd_lockdep_map);
 
 	spin_lock_bh(&trans_pcie->irq_lock);
@@ -2205,7 +2212,7 @@ irqreturn_t iwl_pcie_irq_msix_handler(int irq, void *dev_id)
 	/*
 	 * Clear causes registers to avoid being handling the same cause.
 	 */
-	iwl_write32(trans, CSR_MSIX_FH_INT_CAUSES_AD, inta_fh);
+	iwl_write32(trans, CSR_MSIX_FH_INT_CAUSES_AD, inta_fh & inta_fh_msk);
 	iwl_write32(trans, CSR_MSIX_HW_INT_CAUSES_AD, inta_hw);
 	spin_unlock_bh(&trans_pcie->irq_lock);
 
-- 
cgit v1.2.3


From 48a5494d6a4cb5812f0640d9515f1876ffc7a013 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 30 Mar 2021 16:24:58 +0300
Subject: iwlwifi: pcie: make cfg vs. trans_cfg more robust

If we (for example) have a trans_cfg entry in the PCI IDs table,
but then don't find a full cfg entry for it in the info table,
we fall through to the code that treats the PCI ID table entry
as a full cfg entry. This obviously causes crashes later, e.g.
when trying to build the firmware name string.

Avoid such crashes by using the low bit of the pointer as a tag
for trans_cfg entries (automatically using a macro that checks
the type when assigning) and then checking that before trying to
use the data as a full entry - if it's just a partial entry at
that point, fail.

Since we're adding some macro magic, also check that the type is
in fact either struct iwl_cfg_trans_params or struct iwl_cfg,
failing compilation ("initializer element is not constant") if
it isn't.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210330162204.6f69fe6e4128.I921d4ae20ef5276716baeeeda0b001cf25b9b968@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 35 +++++++++++++++++++++------
 1 file changed, 28 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index 7ae089234503..d7c4625583a8 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -17,10 +17,20 @@
 #include "iwl-prph.h"
 #include "internal.h"
 
+#define TRANS_CFG_MARKER BIT(0)
+#define _IS_A(cfg, _struct) __builtin_types_compatible_p(typeof(cfg),	\
+							 struct _struct)
+extern int _invalid_type;
+#define _TRANS_CFG_MARKER(cfg)						\
+	(__builtin_choose_expr(_IS_A(cfg, iwl_cfg_trans_params),	\
+			       TRANS_CFG_MARKER,			\
+	 __builtin_choose_expr(_IS_A(cfg, iwl_cfg), 0, _invalid_type)))
+#define _ASSIGN_CFG(cfg) (_TRANS_CFG_MARKER(cfg) + (kernel_ulong_t)&(cfg))
+
 #define IWL_PCI_DEVICE(dev, subdev, cfg) \
 	.vendor = PCI_VENDOR_ID_INTEL,  .device = (dev), \
 	.subvendor = PCI_ANY_ID, .subdevice = (subdev), \
-	.driver_data = (kernel_ulong_t)&(cfg)
+	.driver_data = _ASSIGN_CFG(cfg)
 
 /* Hardware specific file defines the PCI IDs table for that hardware module */
 static const struct pci_device_id iwl_hw_card_ids[] = {
@@ -1099,19 +1109,22 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
 
 static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
-	const struct iwl_cfg_trans_params *trans =
-		(struct iwl_cfg_trans_params *)(ent->driver_data);
+	const struct iwl_cfg_trans_params *trans;
 	const struct iwl_cfg *cfg_7265d __maybe_unused = NULL;
 	struct iwl_trans *iwl_trans;
 	struct iwl_trans_pcie *trans_pcie;
 	int i, ret;
+	const struct iwl_cfg *cfg;
+
+	trans = (void *)(ent->driver_data & ~TRANS_CFG_MARKER);
+
 	/*
 	 * This is needed for backwards compatibility with the old
 	 * tables, so we don't need to change all the config structs
 	 * at the same time.  The cfg is used to compare with the old
 	 * full cfg structs.
 	 */
-	const struct iwl_cfg *cfg = (struct iwl_cfg *)(ent->driver_data);
+	cfg = (void *)(ent->driver_data & ~TRANS_CFG_MARKER);
 
 	/* make sure trans is the first element in iwl_cfg */
 	BUILD_BUG_ON(offsetof(struct iwl_cfg, trans));
@@ -1226,11 +1239,19 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 #endif
 	/*
-	 * If we didn't set the cfg yet, assume the trans is actually
-	 * a full cfg from the old tables.
+	 * If we didn't set the cfg yet, the PCI ID table entry should have
+	 * been a full config - if yes, use it, otherwise fail.
 	 */
-	if (!iwl_trans->cfg)
+	if (!iwl_trans->cfg) {
+		if (ent->driver_data & TRANS_CFG_MARKER) {
+			pr_err("No config found for PCI dev %04x/%04x, rev=0x%x, rfid=0x%x\n",
+			       pdev->device, pdev->subsystem_device,
+			       iwl_trans->hw_rev, iwl_trans->hw_rf_id);
+			ret = -EINVAL;
+			goto out_free_trans;
+		}
 		iwl_trans->cfg = cfg;
+	}
 
 	/* if we don't have a name yet, copy name from the old cfg */
 	if (!iwl_trans->name)
-- 
cgit v1.2.3


From 9896b0b904455ef5371327e0fa8de823d192c5a1 Mon Sep 17 00:00:00 2001
From: Avraham Stern <avraham.stern@intel.com>
Date: Tue, 30 Mar 2021 16:24:59 +0300
Subject: iwlwifi: mvm: support range request command version 12

Version 12 adds configuration of NDP ranging parameters:
- max number of LTF repetitions
- max number of spatial streams
- max total LTFs

Signed-off-by: Avraham Stern <avraham.stern@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210330162204.2c30c376c5cf.I01460f7277594ee7c98a8e1fe5447c59e70bf073@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 .../net/wireless/intel/iwlwifi/fw/api/location.h   | 106 ++++++++++++++++++++-
 drivers/net/wireless/intel/iwlwifi/mvm/constants.h |   7 ++
 .../net/wireless/intel/iwlwifi/mvm/ftm-initiator.c |  76 ++++++++++++++-
 3 files changed, 186 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/location.h b/drivers/net/wireless/intel/iwlwifi/fw/api/location.h
index ceeef8749765..16af37f124ee 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/location.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/location.h
@@ -422,10 +422,12 @@ struct iwl_tof_range_req_ap_entry_v2 {
  *	driver.
  * @IWL_INITIATOR_AP_FLAGS_NON_TB: Use non trigger based flow
  * @IWL_INITIATOR_AP_FLAGS_TB: Use trigger based flow
- * @IWL_INITIATOR_AP_FLAGS_SECURED: request secured measurement
+ * @IWL_INITIATOR_AP_FLAGS_SECURED: request secure LTF measurement
  * @IWL_INITIATOR_AP_FLAGS_LMR_FEEDBACK: Send LMR feedback
  * @IWL_INITIATOR_AP_FLAGS_USE_CALIB: Use calibration values from the request
  *      instead of fw internal values.
+ * @IWL_INITIATOR_AP_FLAGS_PMF: request to protect the negotiation and LMR
+ *      frames with protected management frames.
  */
 enum iwl_initiator_ap_flags {
 	IWL_INITIATOR_AP_FLAGS_ASAP = BIT(1),
@@ -440,6 +442,7 @@ enum iwl_initiator_ap_flags {
 	IWL_INITIATOR_AP_FLAGS_SECURED = BIT(11),
 	IWL_INITIATOR_AP_FLAGS_LMR_FEEDBACK = BIT(12),
 	IWL_INITIATOR_AP_FLAGS_USE_CALIB = BIT(13),
+	IWL_INITIATOR_AP_FLAGS_PMF = BIT(14),
 };
 
 /**
@@ -657,6 +660,79 @@ struct iwl_tof_range_req_ap_entry_v7 {
 	u8 tx_pn[IEEE80211_CCMP_PN_LEN];
 } __packed; /* LOCATION_RANGE_REQ_AP_ENTRY_CMD_API_S_VER_7 */
 
+#define IWL_LOCATION_MAX_STS_POS	3
+
+/**
+ * struct iwl_tof_range_req_ap_entry_v8 - AP configuration parameters
+ * @initiator_ap_flags: see &enum iwl_initiator_ap_flags.
+ * @channel_num: AP Channel number
+ * @format_bw: bits 0 - 3: &enum iwl_location_frame_format.
+ *             bits 4 - 7: &enum iwl_location_bw.
+ * @ctrl_ch_position: Coding of the control channel position relative to the
+ *	center frequency, see iwl_mvm_get_ctrl_pos().
+ * @ftmr_max_retries: Max number of retries to send the FTMR in case of no
+ *	reply from the AP.
+ * @bssid: AP's BSSID
+ * @burst_period: Recommended value to be sent to the AP. Measurement
+ *	periodicity In units of 100ms. ignored if num_of_bursts_exp = 0
+ * @samples_per_burst: the number of FTMs pairs in single Burst (1-31);
+ * @num_of_bursts: Recommended value to be sent to the AP. 2s Exponent of
+ *	the number of measurement iterations (min 2^0 = 1, max 2^14)
+ * @sta_id: the station id of the AP. Only relevant when associated to the AP,
+ *	otherwise should be set to &IWL_MVM_INVALID_STA.
+ * @cipher: pairwise cipher suite for secured measurement.
+ *          &enum iwl_location_cipher.
+ * @hltk: HLTK to be used for secured 11az measurement
+ * @tk: TK to be used for secured 11az measurement
+ * @calib: An array of calibration values per FTM rx bandwidth.
+ *         If &IWL_INITIATOR_AP_FLAGS_USE_CALIB is set, the fw will use the
+ *         calibration value that corresponds to the rx bandwidth of the FTM
+ *         frame.
+ * @beacon_interval: beacon interval of the AP in TUs. Only required if
+ *	&IWL_INITIATOR_AP_FLAGS_TB is set.
+ * @rx_pn: the next expected PN for protected management frames Rx. LE byte
+ *	order. Only valid if &IWL_INITIATOR_AP_FLAGS_SECURED is set and sta_id
+ *	is set to &IWL_MVM_INVALID_STA.
+ * @tx_pn: the next PN to use for protected management frames Tx. LE byte
+ *	order. Only valid if &IWL_INITIATOR_AP_FLAGS_SECURED is set and sta_id
+ *	is set to &IWL_MVM_INVALID_STA.
+ * @r2i_ndp_params: parameters for R2I NDP ranging negotiation.
+ *      bits 0 - 2: max LTF repetitions
+ *      bits 3 - 5: max number of spatial streams
+ *      bits 6 - 7: reserved
+ * @i2r_ndp_params: parameters for I2R NDP ranging negotiation.
+ *      bits 0 - 2: max LTF repetitions
+ *      bits 3 - 5: max number of spatial streams (supported values are < 2)
+ *      bits 6 - 7: reserved
+ * @r2i_max_total_ltf: R2I Max Total LTFs for NDP ranging negotiation.
+ *      One of &enum ieee80211_range_params_max_total_ltf.
+ * @i2r_max_total_ltf: I2R Max Total LTFs for NDP ranging negotiation.
+ *      One of &enum ieee80211_range_params_max_total_ltf.
+ */
+struct iwl_tof_range_req_ap_entry_v8 {
+	__le32 initiator_ap_flags;
+	u8 channel_num;
+	u8 format_bw;
+	u8 ctrl_ch_position;
+	u8 ftmr_max_retries;
+	u8 bssid[ETH_ALEN];
+	__le16 burst_period;
+	u8 samples_per_burst;
+	u8 num_of_bursts;
+	u8 sta_id;
+	u8 cipher;
+	u8 hltk[HLTK_11AZ_LEN];
+	u8 tk[TK_11AZ_LEN];
+	__le16 calib[IWL_TOF_BW_NUM];
+	__le16 beacon_interval;
+	u8 rx_pn[IEEE80211_CCMP_PN_LEN];
+	u8 tx_pn[IEEE80211_CCMP_PN_LEN];
+	u8 r2i_ndp_params;
+	u8 i2r_ndp_params;
+	u8 r2i_max_total_ltf;
+	u8 i2r_max_total_ltf;
+} __packed; /* LOCATION_RANGE_REQ_AP_ENTRY_CMD_API_S_VER_8 */
+
 /**
  * enum iwl_tof_response_mode
  * @IWL_MVM_TOF_RESPONSE_ASAP: report each AP measurement separately as soon as
@@ -878,6 +954,34 @@ struct iwl_tof_range_req_cmd_v11 {
 	struct iwl_tof_range_req_ap_entry_v7 ap[IWL_MVM_TOF_MAX_APS];
 } __packed; /* LOCATION_RANGE_REQ_CMD_API_S_VER_11 */
 
+/**
+ * struct iwl_tof_range_req_cmd_v12 - start measurement cmd
+ * @initiator_flags: see flags @ iwl_tof_initiator_flags
+ * @request_id: A Token incremented per request. The same Token will be
+ *		sent back in the range response
+ * @num_of_ap: Number of APs to measure (error if > IWL_MVM_TOF_MAX_APS)
+ * @range_req_bssid: ranging request BSSID
+ * @macaddr_mask: Bits set to 0 shall be copied from the MAC address template.
+ *		  Bits set to 1 shall be randomized by the UMAC
+ * @macaddr_template: MAC address template to use for non-randomized bits
+ * @req_timeout_ms: Requested timeout of the response in units of milliseconds.
+ *	This is the session time for completing the measurement.
+ * @tsf_mac_id: report the measurement start time for each ap in terms of the
+ *	TSF of this mac id. 0xff to disable TSF reporting.
+ * @ap: per-AP request data, see &struct iwl_tof_range_req_ap_entry_v2.
+ */
+struct iwl_tof_range_req_cmd_v12 {
+	__le32 initiator_flags;
+	u8 request_id;
+	u8 num_of_ap;
+	u8 range_req_bssid[ETH_ALEN];
+	u8 macaddr_mask[ETH_ALEN];
+	u8 macaddr_template[ETH_ALEN];
+	__le32 req_timeout_ms;
+	__le32 tsf_mac_id;
+	struct iwl_tof_range_req_ap_entry_v8 ap[IWL_MVM_TOF_MAX_APS];
+} __packed; /* LOCATION_RANGE_REQ_CMD_API_S_VER_12 */
+
 /*
  * enum iwl_tof_range_request_status - status of the sent request
  * @IWL_TOF_RANGE_REQUEST_STATUS_SUCCESSFUL - FW successfully received the
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/constants.h b/drivers/net/wireless/intel/iwlwifi/mvm/constants.h
index 617b41ee5801..2622debbb877 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/constants.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/constants.h
@@ -93,6 +93,13 @@
 #define IWL_MVM_ENABLE_EBS			1
 #define IWL_MVM_FTM_INITIATOR_ALGO		IWL_TOF_ALGO_TYPE_MAX_LIKE
 #define IWL_MVM_FTM_INITIATOR_DYNACK		true
+#define IWL_MVM_FTM_R2I_MAX_REP			7
+#define IWL_MVM_FTM_I2R_MAX_REP			7
+#define IWL_MVM_FTM_R2I_MAX_STS			1
+#define IWL_MVM_FTM_I2R_MAX_STS			1
+#define IWL_MVM_FTM_R2I_MAX_TOTAL_LTF		3
+#define IWL_MVM_FTM_I2R_MAX_TOTAL_LTF		3
+#define IWL_MVM_FTM_INITIATOR_SECURE_LTF	false
 #define IWL_MVM_D3_DEBUG			false
 #define IWL_MVM_USE_TWT				true
 #define IWL_MVM_AMPDU_CONSEC_DROPS_DELBA	10
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
index a4fd0bf9ba19..24ca2934bea0 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
@@ -684,6 +684,19 @@ iwl_mvm_ftm_set_secured_ranging(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 	}
 }
 
+static int
+iwl_mvm_ftm_put_target_v7(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
+			  struct cfg80211_pmsr_request_peer *peer,
+			  struct iwl_tof_range_req_ap_entry_v7 *target)
+{
+	int err = iwl_mvm_ftm_put_target(mvm, vif, peer, (void *)target);
+	if (err)
+		return err;
+
+	iwl_mvm_ftm_set_secured_ranging(mvm, vif, target);
+	return err;
+}
+
 static int iwl_mvm_ftm_start_v11(struct iwl_mvm *mvm,
 				 struct ieee80211_vif *vif,
 				 struct cfg80211_pmsr_request *req)
@@ -704,11 +717,67 @@ static int iwl_mvm_ftm_start_v11(struct iwl_mvm *mvm,
 		struct cfg80211_pmsr_request_peer *peer = &req->peers[i];
 		struct iwl_tof_range_req_ap_entry_v7 *target = &cmd.ap[i];
 
-		err = iwl_mvm_ftm_put_target(mvm, vif, peer, (void *)target);
+		err = iwl_mvm_ftm_put_target_v7(mvm, vif, peer, target);
 		if (err)
 			return err;
+	}
 
-		iwl_mvm_ftm_set_secured_ranging(mvm, vif, target);
+	return iwl_mvm_ftm_send_cmd(mvm, &hcmd);
+}
+
+static void
+iwl_mvm_ftm_set_ndp_params(struct iwl_mvm *mvm,
+			   struct iwl_tof_range_req_ap_entry_v8 *target)
+{
+	/* Only 2 STS are supported on Tx */
+	u32 i2r_max_sts = IWL_MVM_FTM_I2R_MAX_STS > 1 ? 1 :
+		IWL_MVM_FTM_I2R_MAX_STS;
+
+	target->r2i_ndp_params = IWL_MVM_FTM_R2I_MAX_REP |
+		(IWL_MVM_FTM_R2I_MAX_STS << IWL_LOCATION_MAX_STS_POS);
+	target->i2r_ndp_params = IWL_MVM_FTM_I2R_MAX_REP |
+		(i2r_max_sts << IWL_LOCATION_MAX_STS_POS);
+	target->r2i_max_total_ltf = IWL_MVM_FTM_R2I_MAX_TOTAL_LTF;
+	target->i2r_max_total_ltf = IWL_MVM_FTM_I2R_MAX_TOTAL_LTF;
+}
+
+static int iwl_mvm_ftm_start_v12(struct iwl_mvm *mvm,
+				 struct ieee80211_vif *vif,
+				 struct cfg80211_pmsr_request *req)
+{
+	struct iwl_tof_range_req_cmd_v12 cmd;
+	struct iwl_host_cmd hcmd = {
+		.id = iwl_cmd_id(TOF_RANGE_REQ_CMD, LOCATION_GROUP, 0),
+		.dataflags[0] = IWL_HCMD_DFL_DUP,
+		.data[0] = &cmd,
+		.len[0] = sizeof(cmd),
+	};
+	u8 i;
+	int err;
+
+	iwl_mvm_ftm_cmd_common(mvm, vif, (void *)&cmd, req);
+
+	for (i = 0; i < cmd.num_of_ap; i++) {
+		struct cfg80211_pmsr_request_peer *peer = &req->peers[i];
+		struct iwl_tof_range_req_ap_entry_v8 *target = &cmd.ap[i];
+		u32 flags;
+
+		err = iwl_mvm_ftm_put_target_v7(mvm, vif, peer, (void *)target);
+		if (err)
+			return err;
+
+		iwl_mvm_ftm_set_ndp_params(mvm, target);
+
+		/*
+		 * If secure LTF is turned off, replace the flag with PMF only
+		 */
+		flags = le32_to_cpu(target->initiator_ap_flags);
+		if ((flags & IWL_INITIATOR_AP_FLAGS_SECURED) &&
+		    !IWL_MVM_FTM_INITIATOR_SECURE_LTF) {
+			flags &= ~IWL_INITIATOR_AP_FLAGS_SECURED;
+			flags |= IWL_INITIATOR_AP_FLAGS_PMF;
+			target->initiator_ap_flags = cpu_to_le32(flags);
+		}
 	}
 
 	return iwl_mvm_ftm_send_cmd(mvm, &hcmd);
@@ -732,6 +801,9 @@ int iwl_mvm_ftm_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 						   IWL_FW_CMD_VER_UNKNOWN);
 
 		switch (cmd_ver) {
+		case 12:
+			err = iwl_mvm_ftm_start_v12(mvm, vif, req);
+			break;
 		case 11:
 			err = iwl_mvm_ftm_start_v11(mvm, vif, req);
 			break;
-- 
cgit v1.2.3


From 20578872d503d5c211bdf334185ccd8afe4ae697 Mon Sep 17 00:00:00 2001
From: Avraham Stern <avraham.stern@intel.com>
Date: Tue, 30 Mar 2021 16:25:00 +0300
Subject: iwlwifi: mvm: responder: support responder config command version 8

Version 8 add NDP ranging parameters configuration, as well as
enable/disable NDP ranging and LMR feedback.

Signed-off-by: Avraham Stern <avraham.stern@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210330162204.ce9570d755d3.Ic81cb8da9aecbbc9edff468cb4ffbb741418cc73@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 .../net/wireless/intel/iwlwifi/fw/api/location.h   | 67 +++++++++++++++++++++-
 drivers/net/wireless/intel/iwlwifi/mvm/constants.h |  2 +
 .../net/wireless/intel/iwlwifi/mvm/ftm-responder.c | 27 ++++++++-
 3 files changed, 90 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/location.h b/drivers/net/wireless/intel/iwlwifi/fw/api/location.h
index 16af37f124ee..0e38eb1cd75d 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/location.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/location.h
@@ -147,6 +147,10 @@ enum iwl_tof_mcsi_enable {
  * @IWL_TOF_RESPONDER_CMD_VALID_RETRY_ON_ALGO_FAIL: retry on algorithm failure
  *	is valid
  * @IWL_TOF_RESPONDER_CMD_VALID_STA_ID: station ID is valid
+ * @IWL_TOF_RESPONDER_CMD_VALID_NDP_SUPPORT: enable/disable NDP ranging support
+ *	is valid
+ * @IWL_TOF_RESPONDER_CMD_VALID_NDP_PARAMS: NDP parameters are valid
+ * @IWL_TOF_RESPONDER_CMD_VALID_LMR_FEEDBACK: LMR feedback support is valid
  */
 enum iwl_tof_responder_cmd_valid_field {
 	IWL_TOF_RESPONDER_CMD_VALID_CHAN_INFO = BIT(0),
@@ -162,6 +166,9 @@ enum iwl_tof_responder_cmd_valid_field {
 	IWL_TOF_RESPONDER_CMD_VALID_FAST_ALGO_SUPPORT = BIT(10),
 	IWL_TOF_RESPONDER_CMD_VALID_RETRY_ON_ALGO_FAIL = BIT(11),
 	IWL_TOF_RESPONDER_CMD_VALID_STA_ID = BIT(12),
+	IWL_TOF_RESPONDER_CMD_VALID_NDP_SUPPORT = BIT(22),
+	IWL_TOF_RESPONDER_CMD_VALID_NDP_PARAMS = BIT(23),
+	IWL_TOF_RESPONDER_CMD_VALID_LMR_FEEDBACK = BIT(24),
 };
 
 /**
@@ -176,6 +183,9 @@ enum iwl_tof_responder_cmd_valid_field {
  * @IWL_TOF_RESPONDER_FLAGS_FAST_ALGO_SUPPORT: fast algorithm support
  * @IWL_TOF_RESPONDER_FLAGS_RETRY_ON_ALGO_FAIL: retry on algorithm fail
  * @IWL_TOF_RESPONDER_FLAGS_FTM_TX_ANT: TX antenna mask
+ * @IWL_TOF_RESPONDER_FLAGS_NDP_SUPPORT: support NDP ranging
+ * @IWL_TOF_RESPONDER_FLAGS_LMR_FEEDBACK: request for LMR feedback if the
+ *	initiator supports it
  */
 enum iwl_tof_responder_cfg_flags {
 	IWL_TOF_RESPONDER_FLAGS_NON_ASAP_SUPPORT = BIT(0),
@@ -188,6 +198,8 @@ enum iwl_tof_responder_cfg_flags {
 	IWL_TOF_RESPONDER_FLAGS_FAST_ALGO_SUPPORT = BIT(9),
 	IWL_TOF_RESPONDER_FLAGS_RETRY_ON_ALGO_FAIL = BIT(10),
 	IWL_TOF_RESPONDER_FLAGS_FTM_TX_ANT = RATE_MCS_ANT_ABC_MSK,
+	IWL_TOF_RESPONDER_FLAGS_NDP_SUPPORT = BIT(24),
+	IWL_TOF_RESPONDER_FLAGS_LMR_FEEDBACK = BIT(25),
 };
 
 /**
@@ -226,7 +238,7 @@ struct iwl_tof_responder_config_cmd_v6 {
 } __packed; /* TOF_RESPONDER_CONFIG_CMD_API_S_VER_6 */
 
 /**
- * struct iwl_tof_responder_config_cmd - ToF AP mode (for debug)
+ * struct iwl_tof_responder_config_cmd_v7 - ToF AP mode (for debug)
  * @cmd_valid_fields: &iwl_tof_responder_cmd_valid_field
  * @responder_cfg_flags: &iwl_tof_responder_cfg_flags
  * @format_bw: bits 0 - 3: &enum iwl_location_frame_format.
@@ -245,7 +257,7 @@ struct iwl_tof_responder_config_cmd_v6 {
  * @bssid: Current AP BSSID
  * @reserved2: reserved
  */
-struct iwl_tof_responder_config_cmd {
+struct iwl_tof_responder_config_cmd_v7 {
 	__le32 cmd_valid_fields;
 	__le32 responder_cfg_flags;
 	u8 format_bw;
@@ -259,7 +271,56 @@ struct iwl_tof_responder_config_cmd {
 	__le16 specific_calib;
 	u8 bssid[ETH_ALEN];
 	__le16 reserved2;
-} __packed; /* TOF_RESPONDER_CONFIG_CMD_API_S_VER_6 */
+} __packed; /* TOF_RESPONDER_CONFIG_CMD_API_S_VER_7 */
+
+#define IWL_RESPONDER_STS_POS	3
+#define IWL_RESPONDER_TOTAL_LTF_POS	6
+
+/**
+ * struct iwl_tof_responder_config_cmd_v8 - ToF AP mode (for debug)
+ * @cmd_valid_fields: &iwl_tof_responder_cmd_valid_field
+ * @responder_cfg_flags: &iwl_tof_responder_cfg_flags
+ * @format_bw: bits 0 - 3: &enum iwl_location_frame_format.
+ *             bits 4 - 7: &enum iwl_location_bw.
+ * @rate: current AP rate
+ * @channel_num: current AP Channel
+ * @ctrl_ch_position: coding of the control channel position relative to
+ *	the center frequency, see iwl_mvm_get_ctrl_pos()
+ * @sta_id: index of the AP STA when in AP mode
+ * @reserved1: reserved
+ * @toa_offset: Artificial addition [pSec] for the ToA - to be used for debug
+ *	purposes, simulating station movement by adding various values
+ *	to this field
+ * @common_calib: XVT: common calibration value
+ * @specific_calib: XVT: specific calibration value
+ * @bssid: Current AP BSSID
+ * @r2i_ndp_params: parameters for R2I NDP.
+ *	bits 0 - 2: max number of LTF repetitions
+ *	bits 3 - 5: max number of spatial streams (supported values are < 2)
+ *	bits 6 - 7: max number of total LTFs
+ *		    (&enum ieee80211_range_params_max_total_ltf)
+ * @i2r_ndp_params: parameters for I2R NDP.
+ *	bits 0 - 2: max number of LTF repetitions
+ *	bits 3 - 5: max number of spatial streams
+ *	bits 6 - 7: max number of total LTFs
+ *		    (&enum ieee80211_range_params_max_total_ltf)
+ */
+struct iwl_tof_responder_config_cmd_v8 {
+	__le32 cmd_valid_fields;
+	__le32 responder_cfg_flags;
+	u8 format_bw;
+	u8 rate;
+	u8 channel_num;
+	u8 ctrl_ch_position;
+	u8 sta_id;
+	u8 reserved1;
+	__le16 toa_offset;
+	__le16 common_calib;
+	__le16 specific_calib;
+	u8 bssid[ETH_ALEN];
+	u8 r2i_ndp_params;
+	u8 i2r_ndp_params;
+} __packed; /* TOF_RESPONDER_CONFIG_CMD_API_S_VER_8 */
 
 #define IWL_LCI_CIVIC_IE_MAX_SIZE	400
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/constants.h b/drivers/net/wireless/intel/iwlwifi/mvm/constants.h
index 2622debbb877..45634302801f 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/constants.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/constants.h
@@ -100,6 +100,8 @@
 #define IWL_MVM_FTM_R2I_MAX_TOTAL_LTF		3
 #define IWL_MVM_FTM_I2R_MAX_TOTAL_LTF		3
 #define IWL_MVM_FTM_INITIATOR_SECURE_LTF	false
+#define IWL_MVM_FTM_RESP_NDP_SUPPORT		true
+#define IWL_MVM_FTM_RESP_LMR_FEEDBACK_SUPPORT	true
 #define IWL_MVM_D3_DEBUG			false
 #define IWL_MVM_USE_TWT				true
 #define IWL_MVM_AMPDU_CONSEC_DROPS_DELBA	10
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-responder.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-responder.c
index 996f45c19f10..5a249ea97eb2 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-responder.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-responder.c
@@ -75,6 +75,24 @@ static int iwl_mvm_ftm_responder_set_bw_v2(struct cfg80211_chan_def *chandef,
 	return 0;
 }
 
+static void
+iwl_mvm_ftm_responder_set_ndp(struct iwl_mvm *mvm,
+			      struct iwl_tof_responder_config_cmd_v8 *cmd)
+{
+	/* Up to 2 R2I STS are allowed on the responder */
+	u32 r2i_max_sts = IWL_MVM_FTM_R2I_MAX_STS < 2 ?
+		IWL_MVM_FTM_R2I_MAX_STS : 1;
+
+	cmd->r2i_ndp_params = IWL_MVM_FTM_R2I_MAX_REP |
+		(r2i_max_sts << IWL_RESPONDER_STS_POS) |
+		(IWL_MVM_FTM_R2I_MAX_TOTAL_LTF << IWL_RESPONDER_TOTAL_LTF_POS);
+	cmd->i2r_ndp_params = IWL_MVM_FTM_I2R_MAX_REP |
+		(IWL_MVM_FTM_I2R_MAX_STS << IWL_RESPONDER_STS_POS) |
+		(IWL_MVM_FTM_I2R_MAX_TOTAL_LTF << IWL_RESPONDER_TOTAL_LTF_POS);
+	cmd->cmd_valid_fields |=
+		cpu_to_le32(IWL_TOF_RESPONDER_CMD_VALID_NDP_PARAMS);
+}
+
 static int
 iwl_mvm_ftm_responder_cmd(struct iwl_mvm *mvm,
 			  struct ieee80211_vif *vif,
@@ -82,11 +100,11 @@ iwl_mvm_ftm_responder_cmd(struct iwl_mvm *mvm,
 {
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 	/*
-	 * The command structure is the same for versions 6 and 7, (only the
+	 * The command structure is the same for versions 6, 7 and 8 (only the
 	 * field interpretation is different), so the same struct can be use
 	 * for all cases.
 	 */
-	struct iwl_tof_responder_config_cmd cmd = {
+	struct iwl_tof_responder_config_cmd_v8 cmd = {
 		.channel_num = chandef->chan->hw_value,
 		.cmd_valid_fields =
 			cpu_to_le32(IWL_TOF_RESPONDER_CMD_VALID_CHAN_INFO |
@@ -100,7 +118,10 @@ iwl_mvm_ftm_responder_cmd(struct iwl_mvm *mvm,
 
 	lockdep_assert_held(&mvm->mutex);
 
-	if (cmd_ver == 7)
+if (cmd_ver == 8)
+		iwl_mvm_ftm_responder_set_ndp(mvm, &cmd);
+
+	if (cmd_ver >= 7)
 		err = iwl_mvm_ftm_responder_set_bw_v2(chandef, &cmd.format_bw,
 						      &cmd.ctrl_ch_position);
 	else
-- 
cgit v1.2.3


From 5f8a3561ea8bf75ad52cb16dafe69dd550fa542e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 31 Mar 2021 12:14:41 +0300
Subject: iwlwifi: mvm: write queue_sync_state only for sync

We use mvm->queue_sync_state to wait for synchronous queue sync
messages, but if an async one happens inbetween we shouldn't
clear mvm->queue_sync_state after sending the async one, that
can run concurrently (at least from the CPU POV) with another
synchronous queue sync.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210331121101.d11c9bcdb4aa.I0772171dbaec87433a11513e9586d98b5d920b5f@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 9ed6e2783cc4..fbd37a19b6e0 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -5177,9 +5177,10 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm,
 	}
 
 out:
-	mvm->queue_sync_state = 0;
-	if (notif->sync)
+	if (notif->sync) {
+		mvm->queue_sync_state = 0;
 		mvm->queue_sync_cookie++;
+	}
 }
 
 static void iwl_mvm_sync_rx_queues(struct ieee80211_hw *hw)
-- 
cgit v1.2.3


From 5e1688ce914d46f37cce4695f6aebb1ecb9943e0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 31 Mar 2021 12:14:42 +0300
Subject: iwlwifi: mvm: clean up queue sync implementation

When we do queue sync, it's confusing that we have the structures
declared in the FW API header files that aren't really firmware,
and the union is also confusing - especially now in the code that
checks the size on the return.

So rework this: change the type of sync and whether to do it in a
synchronous fashion to arguments, and build the data structure in
the function, so we don't need the union.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210331121101.f62833fd9893.I612d7ac1c655ec4880329360e15d207698c750bc@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/api/rx.h    | 30 ------------
 drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 44 ++++++++++++-----
 drivers/net/wireless/intel/iwlwifi/mvm/mvm.h      |  8 ++--
 drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c     | 58 +++++------------------
 drivers/net/wireless/intel/iwlwifi/mvm/sta.c      | 10 ++--
 drivers/net/wireless/intel/iwlwifi/mvm/sta.h      | 38 +++++++++++----
 6 files changed, 80 insertions(+), 108 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h b/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h
index 2c74db823778..3f13b572915a 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h
@@ -779,36 +779,6 @@ struct iwl_rxq_sync_notification {
 	u8 payload[];
 } __packed; /* MULTI_QUEUE_DRV_SYNC_HDR_CMD_API_S_VER_1 */
 
-/**
- * enum iwl_mvm_rxq_notif_type - Internal message identifier
- *
- * @IWL_MVM_RXQ_EMPTY: empty sync notification
- * @IWL_MVM_RXQ_NOTIF_DEL_BA: notify RSS queues of delBA
- * @IWL_MVM_RXQ_NSSN_SYNC: notify all the RSS queues with the new NSSN
- */
-enum iwl_mvm_rxq_notif_type {
-	IWL_MVM_RXQ_EMPTY,
-	IWL_MVM_RXQ_NOTIF_DEL_BA,
-	IWL_MVM_RXQ_NSSN_SYNC,
-};
-
-/**
- * struct iwl_mvm_internal_rxq_notif - Internal representation of the data sent
- * in &iwl_rxq_sync_cmd. Should be DWORD aligned.
- * FW is agnostic to the payload, so there are no endianity requirements.
- *
- * @type: value from &iwl_mvm_rxq_notif_type
- * @sync: ctrl path is waiting for all notifications to be received
- * @cookie: internal cookie to identify old notifications
- * @data: payload
- */
-struct iwl_mvm_internal_rxq_notif {
-	u16 type;
-	u16 sync;
-	u32 cookie;
-	u8 data[];
-} __packed;
-
 /**
  * enum iwl_mvm_pm_event - type of station PM event
  * @IWL_MVM_PM_EVENT_AWAKE: station woke up
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index fbd37a19b6e0..fd35dd31613e 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -5144,28 +5144,50 @@ static void iwl_mvm_mac_event_callback(struct ieee80211_hw *hw,
 }
 
 void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm,
-				     struct iwl_mvm_internal_rxq_notif *notif,
-				     u32 size)
+				     enum iwl_mvm_rxq_notif_type type,
+				     bool sync,
+				     const void *data, u32 size)
 {
-	u32 qmask = BIT(mvm->trans->num_rx_queues) - 1;
+	struct {
+		struct iwl_rxq_sync_cmd cmd;
+		struct iwl_mvm_internal_rxq_notif notif;
+	} __packed cmd = {
+		.cmd.rxq_mask = cpu_to_le32(BIT(mvm->trans->num_rx_queues) - 1),
+		.cmd.count =
+			cpu_to_le32(sizeof(struct iwl_mvm_internal_rxq_notif) +
+				    size),
+		.notif.type = type,
+		.notif.sync = sync,
+	};
+	struct iwl_host_cmd hcmd = {
+		.id = WIDE_ID(DATA_PATH_GROUP, TRIGGER_RX_QUEUES_NOTIF_CMD),
+		.data[0] = &cmd,
+		.len[0] = sizeof(cmd),
+		.data[1] = data,
+		.len[1] = size,
+		.flags = sync ? 0 : CMD_ASYNC,
+	};
 	int ret;
 
+	/* size must be a multiple of DWORD */
+	if (WARN_ON(cmd.cmd.count & cpu_to_le32(3)))
+		return;
 
 	if (!iwl_mvm_has_new_rx_api(mvm))
 		return;
 
-	if (notif->sync) {
-		notif->cookie = mvm->queue_sync_cookie;
+	if (sync) {
+		cmd.notif.cookie = mvm->queue_sync_cookie;
 		mvm->queue_sync_state = (1 << mvm->trans->num_rx_queues) - 1;
 	}
 
-	ret = iwl_mvm_notify_rx_queue(mvm, qmask, notif, size, !notif->sync);
+	ret = iwl_mvm_send_cmd(mvm, &hcmd);
 	if (ret) {
 		IWL_ERR(mvm, "Failed to trigger RX queues sync (%d)\n", ret);
 		goto out;
 	}
 
-	if (notif->sync) {
+	if (sync) {
 		lockdep_assert_held(&mvm->mutex);
 		ret = wait_event_timeout(mvm->rx_sync_waitq,
 					 READ_ONCE(mvm->queue_sync_state) == 0 ||
@@ -5177,7 +5199,7 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm,
 	}
 
 out:
-	if (notif->sync) {
+	if (sync) {
 		mvm->queue_sync_state = 0;
 		mvm->queue_sync_cookie++;
 	}
@@ -5186,13 +5208,9 @@ out:
 static void iwl_mvm_sync_rx_queues(struct ieee80211_hw *hw)
 {
 	struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
-	struct iwl_mvm_internal_rxq_notif data = {
-		.type = IWL_MVM_RXQ_EMPTY,
-		.sync = 1,
-	};
 
 	mutex_lock(&mvm->mutex);
-	iwl_mvm_sync_rx_queues_internal(mvm, &data, sizeof(data));
+	iwl_mvm_sync_rx_queues_internal(mvm, IWL_MVM_RXQ_EMPTY, true, NULL, 0);
 	mutex_unlock(&mvm->mutex);
 }
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index 0a963d01b825..e607ad713f88 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -1570,9 +1570,6 @@ void iwl_mvm_rx_frame_release(struct iwl_mvm *mvm, struct napi_struct *napi,
 			      struct iwl_rx_cmd_buffer *rxb, int queue);
 void iwl_mvm_rx_bar_frame_release(struct iwl_mvm *mvm, struct napi_struct *napi,
 				  struct iwl_rx_cmd_buffer *rxb, int queue);
-int iwl_mvm_notify_rx_queue(struct iwl_mvm *mvm, u32 rxq_mask,
-			    const struct iwl_mvm_internal_rxq_notif *notif,
-			    u32 notif_size, bool async);
 void iwl_mvm_rx_queue_notif(struct iwl_mvm *mvm, struct napi_struct *napi,
 			    struct iwl_rx_cmd_buffer *rxb, int queue);
 void iwl_mvm_rx_tx_cmd(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb);
@@ -2001,8 +1998,9 @@ void iwl_mvm_rx_tdls_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb);
 void iwl_mvm_tdls_ch_switch_work(struct work_struct *work);
 
 void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm,
-				     struct iwl_mvm_internal_rxq_notif *notif,
-				     u32 size);
+				     enum iwl_mvm_rxq_notif_type type,
+				     bool sync,
+				     const void *data, u32 size);
 void iwl_mvm_reorder_timer_expired(struct timer_list *t);
 struct ieee80211_vif *iwl_mvm_get_bss_vif(struct iwl_mvm *mvm);
 struct ieee80211_vif *iwl_mvm_get_vif_by_macid(struct iwl_mvm *mvm, u32 macid);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index af5a6dd81c41..8e26422ca326 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -527,37 +527,6 @@ static bool iwl_mvm_is_dup(struct ieee80211_sta *sta, int queue,
 	return false;
 }
 
-int iwl_mvm_notify_rx_queue(struct iwl_mvm *mvm, u32 rxq_mask,
-			    const struct iwl_mvm_internal_rxq_notif *notif,
-			    u32 notif_size, bool async)
-{
-	u8 buf[sizeof(struct iwl_rxq_sync_cmd) +
-	       sizeof(struct iwl_mvm_rss_sync_notif)];
-	struct iwl_rxq_sync_cmd *cmd = (void *)buf;
-	u32 data_size = sizeof(*cmd) + notif_size;
-	int ret;
-
-	/*
-	 * size must be a multiple of DWORD
-	 * Ensure we don't overflow buf
-	 */
-	if (WARN_ON(notif_size & 3 ||
-		    notif_size > sizeof(struct iwl_mvm_rss_sync_notif)))
-		return -EINVAL;
-
-	cmd->rxq_mask = cpu_to_le32(rxq_mask);
-	cmd->count =  cpu_to_le32(notif_size);
-	cmd->flags = 0;
-	memcpy(cmd->payload, notif, notif_size);
-
-	ret = iwl_mvm_send_cmd_pdu(mvm,
-				   WIDE_ID(DATA_PATH_GROUP,
-					   TRIGGER_RX_QUEUES_NOTIF_CMD),
-				   async ? CMD_ASYNC : 0, data_size, cmd);
-
-	return ret;
-}
-
 /*
  * Returns true if sn2 - buffer_size < sn1 < sn2.
  * To be used only in order to compare reorder buffer head with NSSN.
@@ -573,15 +542,13 @@ static bool iwl_mvm_is_sn_less(u16 sn1, u16 sn2, u16 buffer_size)
 static void iwl_mvm_sync_nssn(struct iwl_mvm *mvm, u8 baid, u16 nssn)
 {
 	if (IWL_MVM_USE_NSSN_SYNC) {
-		struct iwl_mvm_rss_sync_notif notif = {
-			.metadata.type = IWL_MVM_RXQ_NSSN_SYNC,
-			.metadata.sync = 0,
-			.nssn_sync.baid = baid,
-			.nssn_sync.nssn = nssn,
+		struct iwl_mvm_nssn_sync_data notif = {
+			.baid = baid,
+			.nssn = nssn,
 		};
 
-		iwl_mvm_sync_rx_queues_internal(mvm, (void *)&notif,
-						sizeof(notif));
+		iwl_mvm_sync_rx_queues_internal(mvm, IWL_MVM_RXQ_NSSN_SYNC, false,
+						&notif, sizeof(notif));
 	}
 }
 
@@ -830,8 +797,7 @@ void iwl_mvm_rx_queue_notif(struct iwl_mvm *mvm, struct napi_struct *napi,
 		      "invalid notification size %d (%d)",
 		      len, (int)(sizeof(*notif) + sizeof(*internal_notif))))
 		return;
-	/* remove only the firmware header, we want all of our payload below */
-	len -= sizeof(*notif);
+	len -= sizeof(*notif) + sizeof(*internal_notif);
 
 	if (internal_notif->sync &&
 	    mvm->queue_sync_cookie != internal_notif->cookie) {
@@ -841,21 +807,19 @@ void iwl_mvm_rx_queue_notif(struct iwl_mvm *mvm, struct napi_struct *napi,
 
 	switch (internal_notif->type) {
 	case IWL_MVM_RXQ_EMPTY:
-		WARN_ONCE(len != sizeof(*internal_notif),
-			  "invalid empty notification size %d (%d)",
-			  len, (int)sizeof(*internal_notif));
+		WARN_ONCE(len, "invalid empty notification size %d", len);
 		break;
 	case IWL_MVM_RXQ_NOTIF_DEL_BA:
-		if (WARN_ONCE(len != sizeof(struct iwl_mvm_rss_sync_notif),
+		if (WARN_ONCE(len != sizeof(struct iwl_mvm_delba_data),
 			      "invalid delba notification size %d (%d)",
-			      len, (int)sizeof(struct iwl_mvm_rss_sync_notif)))
+			      len, (int)sizeof(struct iwl_mvm_delba_data)))
 			break;
 		iwl_mvm_del_ba(mvm, queue, (void *)internal_notif->data);
 		break;
 	case IWL_MVM_RXQ_NSSN_SYNC:
-		if (WARN_ONCE(len != sizeof(struct iwl_mvm_rss_sync_notif),
+		if (WARN_ONCE(len != sizeof(struct iwl_mvm_nssn_sync_data),
 			      "invalid nssn sync notification size %d (%d)",
-			      len, (int)sizeof(struct iwl_mvm_rss_sync_notif)))
+			      len, (int)sizeof(struct iwl_mvm_nssn_sync_data)))
 			break;
 		iwl_mvm_nssn_sync(mvm, napi, queue,
 				  (void *)internal_notif->data);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index 3a411bbda5fd..f618368eda83 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -2441,12 +2441,12 @@ int iwl_mvm_rm_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 
 static void iwl_mvm_sync_rxq_del_ba(struct iwl_mvm *mvm, u8 baid)
 {
-	struct iwl_mvm_rss_sync_notif notif = {
-		.metadata.type = IWL_MVM_RXQ_NOTIF_DEL_BA,
-		.metadata.sync = 1,
-		.delba.baid = baid,
+	struct iwl_mvm_delba_data notif = {
+		.baid = baid,
 	};
-	iwl_mvm_sync_rx_queues_internal(mvm, (void *)&notif, sizeof(notif));
+
+	iwl_mvm_sync_rx_queues_internal(mvm, IWL_MVM_RXQ_NOTIF_DEL_BA, true,
+					&notif, sizeof(notif));
 };
 
 static void iwl_mvm_free_reorder(struct iwl_mvm *mvm,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
index 35a18b96aac5..32b4d1935788 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
@@ -281,6 +281,36 @@ struct iwl_mvm_key_pn {
 	} ____cacheline_aligned_in_smp q[];
 };
 
+/**
+ * enum iwl_mvm_rxq_notif_type - Internal message identifier
+ *
+ * @IWL_MVM_RXQ_EMPTY: empty sync notification
+ * @IWL_MVM_RXQ_NOTIF_DEL_BA: notify RSS queues of delBA
+ * @IWL_MVM_RXQ_NSSN_SYNC: notify all the RSS queues with the new NSSN
+ */
+enum iwl_mvm_rxq_notif_type {
+	IWL_MVM_RXQ_EMPTY,
+	IWL_MVM_RXQ_NOTIF_DEL_BA,
+	IWL_MVM_RXQ_NSSN_SYNC,
+};
+
+/**
+ * struct iwl_mvm_internal_rxq_notif - Internal representation of the data sent
+ * in &iwl_rxq_sync_cmd. Should be DWORD aligned.
+ * FW is agnostic to the payload, so there are no endianity requirements.
+ *
+ * @type: value from &iwl_mvm_rxq_notif_type
+ * @sync: ctrl path is waiting for all notifications to be received
+ * @cookie: internal cookie to identify old notifications
+ * @data: payload
+ */
+struct iwl_mvm_internal_rxq_notif {
+	u16 type;
+	u16 sync;
+	u32 cookie;
+	u8 data[];
+} __packed;
+
 struct iwl_mvm_delba_data {
 	u32 baid;
 } __packed;
@@ -290,14 +320,6 @@ struct iwl_mvm_nssn_sync_data {
 	u32 nssn;
 } __packed;
 
-struct iwl_mvm_rss_sync_notif {
-	struct iwl_mvm_internal_rxq_notif metadata;
-	union {
-		struct iwl_mvm_delba_data delba;
-		struct iwl_mvm_nssn_sync_data nssn_sync;
-	};
-} __packed;
-
 /**
  * struct iwl_mvm_rxq_dup_data - per station per rx queue data
  * @last_seq: last sequence per tid for duplicate packet detection
-- 
cgit v1.2.3


From 517a5eb9fab2bf1d078ee0d3f95abf6a7aa092f8 Mon Sep 17 00:00:00 2001
From: Avraham Stern <avraham.stern@intel.com>
Date: Wed, 31 Mar 2021 12:14:43 +0300
Subject: iwlwifi: mvm: when associated with PMF, use protected NDP ranging
 negotiation

When associated to the resonder with PMF, request to protect the NDP
ranging negotiation with PMF.

Signed-off-by: Avraham Stern <avraham.stern@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210331121101.e7982c72e12b.Ib6db362d01a31132c638e194d49476cd8e3ff430@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
index 24ca2934bea0..a456b8a0ae58 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
@@ -490,6 +490,15 @@ iwl_mvm_ftm_put_target(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 	if (vif->bss_conf.assoc &&
 	    !memcmp(peer->addr, vif->bss_conf.bssid, ETH_ALEN)) {
 		struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+		struct ieee80211_sta *sta;
+
+		rcu_read_lock();
+
+		sta = rcu_dereference(mvm->fw_id_to_mac_id[mvmvif->ap_sta_id]);
+		if (sta->mfp)
+			FTM_PUT_FLAG(PMF);
+
+		rcu_read_unlock();
 
 		target->sta_id = mvmvif->ap_sta_id;
 	} else {
-- 
cgit v1.2.3


From 85b5fd94d7d9d7e775024cd0ba0f31c432bea79b Mon Sep 17 00:00:00 2001
From: ybaruch <yaara.baruch@intel.com>
Date: Wed, 31 Mar 2021 12:14:44 +0300
Subject: iwlwifi: add ax201 killer device

add new killer devices configurations.

Signed-off-by: ybaruch <yaara.baruch@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210331121101.54967363d26d.I5d1a3d810cf6abace51ebb2630d62d891e9fd302@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/cfg/22000.c  | 4 ++++
 drivers/net/wireless/intel/iwlwifi/iwl-config.h | 2 ++
 drivers/net/wireless/intel/iwlwifi/pcie/drv.c   | 2 ++
 3 files changed, 8 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index bea3d5f3014a..d775cd9c1394 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -399,6 +399,10 @@ const char iwl_ax201_killer_1650s_name[] =
 	"Killer(R) Wi-Fi 6 AX1650s 160MHz Wireless Network Adapter (201D2W)";
 const char iwl_ax201_killer_1650i_name[] =
 	"Killer(R) Wi-Fi 6 AX1650i 160MHz Wireless Network Adapter (201NGW)";
+const char iwl_ax210_killer_1675w_name[] =
+	"Killer(R) Wi-Fi 6E AX1675w 160MHz Wireless Network Adapter (210D2W)";
+const char iwl_ax210_killer_1675x_name[] =
+	"Killer(R) Wi-Fi 6E AX1675x 160MHz Wireless Network Adapter (210NGW)";
 
 const struct iwl_cfg iwl_qu_b0_hr1_b0 = {
 	.fw_name_pre = IWL_QU_B_HR_B_FW_PRE,
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
index f041f7f2160b..bdf352e97f51 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
@@ -503,6 +503,8 @@ extern const char iwl_ax200_killer_1650w_name[];
 extern const char iwl_ax200_killer_1650x_name[];
 extern const char iwl_ax201_killer_1650s_name[];
 extern const char iwl_ax201_killer_1650i_name[];
+extern const char iwl_ax210_killer_1675w_name[];
+extern const char iwl_ax210_killer_1675x_name[];
 extern const char iwl_ma_name[];
 extern const char iwl_ax211_name[];
 extern const char iwl_ax411_name[];
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index d7c4625583a8..dd7e295397c8 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -619,6 +619,8 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
 	IWL_DEV_INFO(0x2725, 0x4020, iwlax210_2ax_cfg_ty_gf_a0, NULL),
 	IWL_DEV_INFO(0x2725, 0x6020, iwlax210_2ax_cfg_ty_gf_a0, NULL),
 	IWL_DEV_INFO(0x2725, 0x6024, iwlax210_2ax_cfg_ty_gf_a0, NULL),
+	IWL_DEV_INFO(0x2725, 0x1673, iwlax210_2ax_cfg_ty_gf_a0, iwl_ax210_killer_1675w_name),
+	IWL_DEV_INFO(0x2725, 0x1674, iwlax210_2ax_cfg_ty_gf_a0, iwl_ax210_killer_1675x_name),
 	IWL_DEV_INFO(0x7A70, 0x0090, iwlax211_2ax_cfg_so_gf_a0_long, NULL),
 	IWL_DEV_INFO(0x7A70, 0x0098, iwlax211_2ax_cfg_so_gf_a0_long, NULL),
 	IWL_DEV_INFO(0x7A70, 0x00B0, iwlax411_2ax_cfg_so_gf4_a0_long, NULL),
-- 
cgit v1.2.3


From e8fe3b41c3a36c7a7aa88bdfec112b91530577e4 Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Wed, 31 Mar 2021 12:14:46 +0300
Subject: iwlwifi: mvm: Add support for 6GHz passive scan

When doing scan while 6GHz channels are not enabled, the 6GHz band
is not scanned. Thus, if there are no APs on the 2GHz and 5GHz bands
(that will allow discovery of geographic location etc. that would
allow enabling the 6GHz channels) but there are non collocated APs
on 6GHz PSC channels these would never be discovered.

To overcome this, FW added support for performing passive UHB scan
in case no APs were discovered during scan on the 2GHz and 5GHz
channels.

Add support for enabling such scan when the following conditions are
met:

- 6GHz channels are supported but not enabled by regulatory.
- Station interface is not associated or less than a defined time
  interval passed from the last resume or HW reset flows.
- At least 4 channels are included in the scan request
- The scan request includes the widlcard SSID.
- At least 50 minutes passed from the last 6GHz passive scan.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210331121101.7c7bd00e0aeb.Ib226ad57e416b43a710c36a78a617d4243458b99@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/api/scan.h   |   8 ++
 drivers/net/wireless/intel/iwlwifi/fw/file.h       |   3 +
 drivers/net/wireless/intel/iwlwifi/mvm/constants.h |   2 +
 drivers/net/wireless/intel/iwlwifi/mvm/d3.c        |   2 +
 drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c  |   2 +
 drivers/net/wireless/intel/iwlwifi/mvm/mvm.h       |   3 +
 drivers/net/wireless/intel/iwlwifi/mvm/scan.c      | 128 +++++++++++++++++++++
 7 files changed, 148 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
index 6b8ca35cec1a..b2605aefc290 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
@@ -634,6 +634,12 @@ enum iwl_umac_scan_general_flags2 {
  * @IWL_UMAC_SCAN_GEN_FLAGS_V2_TRIGGER_UHB_SCAN: at the end of 2.4GHz and
  *		5.2Ghz bands scan, trigger scan on 6GHz band to discover
  *		the reported collocated APs
+ * @IWL_UMAC_SCAN_GEN_FLAGS_V2_6GHZ_PASSIVE_SCAN: at the end of 2.4GHz and 5GHz
+ *      bands scan, if not APs were discovered, allow scan to conitnue and scan
+ *      6GHz PSC channels in order to discover country information.
+ * @IWL_UMAC_SCAN_GEN_FLAGS_V2_6GHZ_PASSIVE_SCAN_FILTER_IN: in case
+ *      &IWL_UMAC_SCAN_GEN_FLAGS_V2_6GHZ_PASSIVE_SCAN is enabled and scan is
+ *      activated over 6GHz PSC channels, filter in beacons and probe responses.
  */
 enum iwl_umac_scan_general_flags_v2 {
 	IWL_UMAC_SCAN_GEN_FLAGS_V2_PERIODIC             = BIT(0),
@@ -649,6 +655,8 @@ enum iwl_umac_scan_general_flags_v2 {
 	IWL_UMAC_SCAN_GEN_FLAGS_V2_MULTI_SSID           = BIT(10),
 	IWL_UMAC_SCAN_GEN_FLAGS_V2_FORCE_PASSIVE        = BIT(11),
 	IWL_UMAC_SCAN_GEN_FLAGS_V2_TRIGGER_UHB_SCAN     = BIT(12),
+	IWL_UMAC_SCAN_GEN_FLAGS_V2_6GHZ_PASSIVE_SCAN    = BIT(13),
+	IWL_UMAC_SCAN_GEN_FLAGS_V2_6GHZ_PASSIVE_SCAN_FILTER_IN = BIT(14),
 };
 
 /**
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h
index 35dffcaf5aba..f9c5cf538ad1 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/file.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h
@@ -362,6 +362,8 @@ typedef unsigned int __bitwise iwl_ucode_tlv_capa_t;
  * @IWL_UCODE_TLV_CAPA_PROTECTED_TWT: Supports protection of TWT action frames
  * @IWL_UCODE_TLV_CAPA_FW_RESET_HANDSHAKE: Supports the firmware handshake in
  *	reset flow
+ * @IWL_UCODE_TLV_CAPA_PASSIVE_6GHZ_SCAN: Support for passive scan on 6GHz PSC
+ *      channels even when these are not enabled.
  *
  * @NUM_IWL_UCODE_TLV_CAPA: number of bits used
  */
@@ -408,6 +410,7 @@ enum iwl_ucode_tlv_capa {
 	IWL_UCODE_TLV_CAPA_SESSION_PROT_CMD		= (__force iwl_ucode_tlv_capa_t)54,
 	IWL_UCODE_TLV_CAPA_PROTECTED_TWT		= (__force iwl_ucode_tlv_capa_t)56,
 	IWL_UCODE_TLV_CAPA_FW_RESET_HANDSHAKE		= (__force iwl_ucode_tlv_capa_t)57,
+	IWL_UCODE_TLV_CAPA_PASSIVE_6GHZ_SCAN		= (__force iwl_ucode_tlv_capa_t)58,
 
 	/* set 2 */
 	IWL_UCODE_TLV_CAPA_EXTENDED_DTS_MEASURE		= (__force iwl_ucode_tlv_capa_t)64,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/constants.h b/drivers/net/wireless/intel/iwlwifi/mvm/constants.h
index 45634302801f..1343f25f1090 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/constants.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/constants.h
@@ -117,5 +117,7 @@
 #define IWL_MVM_FTM_INITIATOR_SMOOTH_OVERSHOOT  20016
 #define IWL_MVM_FTM_INITIATOR_SMOOTH_AGE_SEC    2
 #define IWL_MVM_DISABLE_AP_FILS			false
+#define IWL_MVM_6GHZ_PASSIVE_SCAN_TIMEOUT       3000 /* in seconds */
+#define IWL_MVM_6GHZ_PASSIVE_SCAN_ASSOC_TIMEOUT 60   /* in seconds */
 
 #endif /* __MVM_CONSTANTS_H */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
index a7dc85c704a9..2e28cf299ef4 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
@@ -2028,6 +2028,8 @@ static int __iwl_mvm_resume(struct iwl_mvm *mvm, bool test)
 
 	mutex_lock(&mvm->mutex);
 
+	mvm->last_reset_or_resume_time_jiffies = jiffies;
+
 	/* get the BSS vif pointer again */
 	vif = iwl_mvm_get_bss_vif(mvm);
 	if (IS_ERR_OR_NULL(vif))
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index fd35dd31613e..607d5d564928 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -1099,6 +1099,8 @@ int __iwl_mvm_mac_start(struct iwl_mvm *mvm)
 	iwl_dbg_tlv_time_point(&mvm->fwrt, IWL_FW_INI_TIME_POINT_PERIODIC,
 			       NULL);
 
+	mvm->last_reset_or_resume_time_jiffies = jiffies;
+
 	if (ret && test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) {
 		/* Something went wrong - we need to finish some cleanup
 		 * that normally iwl_mvm_mac_restart_complete() below
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index e607ad713f88..e2a37ac7c4b1 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -1096,6 +1096,9 @@ struct iwl_mvm {
 	/* sniffer data to include in radiotap */
 	__le16 cur_aid;
 	u8 cur_bssid[ETH_ALEN];
+
+	unsigned long last_6ghz_passive_scan_jiffies;
+	unsigned long last_reset_or_resume_time_jiffies;
 };
 
 /* Extract MVM priv from op_mode and _hw */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index caf87f320094..5a0696c44f6d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@ -43,6 +43,9 @@
 /* adaptive dwell number of APs override for social channels */
 #define IWL_SCAN_ADWELL_N_APS_SOCIAL_CHS 2
 
+/* minimal number of 2GHz and 5GHz channels in the regular scan request */
+#define IWL_MVM_6GHZ_PASSIVE_SCAN_MIN_CHANS 4
+
 struct iwl_mvm_scan_timing_params {
 	u32 suspend_time;
 	u32 max_out_time;
@@ -94,6 +97,7 @@ struct iwl_mvm_scan_params {
 	struct cfg80211_scan_6ghz_params *scan_6ghz_params;
 	u32 n_6ghz_params;
 	bool scan_6ghz;
+	bool enable_6ghz_passive;
 };
 
 static inline void *iwl_mvm_get_scan_req_umac_data(struct iwl_mvm *mvm)
@@ -1873,6 +1877,98 @@ static u8 iwl_mvm_scan_umac_chan_flags_v2(struct iwl_mvm *mvm,
 	return flags;
 }
 
+static void iwl_mvm_scan_6ghz_passive_scan(struct iwl_mvm *mvm,
+					   struct iwl_mvm_scan_params *params,
+					   struct ieee80211_vif *vif)
+{
+	struct ieee80211_supported_band *sband =
+		&mvm->nvm_data->bands[NL80211_BAND_6GHZ];
+	u32 n_disabled, i;
+
+	params->enable_6ghz_passive = false;
+
+	if (params->scan_6ghz)
+		return;
+
+	if (!fw_has_capa(&mvm->fw->ucode_capa,
+			 IWL_UCODE_TLV_CAPA_PASSIVE_6GHZ_SCAN)) {
+		IWL_DEBUG_SCAN(mvm,
+			       "6GHz passive scan: Not supported by FW\n");
+		return;
+	}
+
+	/* 6GHz passive scan allowed only on station interface  */
+	if (vif->type != NL80211_IFTYPE_STATION) {
+		IWL_DEBUG_SCAN(mvm,
+			       "6GHz passive scan: not station interface\n");
+		return;
+	}
+
+	/*
+	 * 6GHz passive scan is allowed while associated in a defined time
+	 * interval following HW reset or resume flow
+	 */
+	if (vif->bss_conf.assoc &&
+	    (time_before(mvm->last_reset_or_resume_time_jiffies +
+			 (IWL_MVM_6GHZ_PASSIVE_SCAN_ASSOC_TIMEOUT * HZ),
+			 jiffies))) {
+		IWL_DEBUG_SCAN(mvm, "6GHz passive scan: associated\n");
+		return;
+	}
+
+	/* No need for 6GHz passive scan if not enough time elapsed */
+	if (time_after(mvm->last_6ghz_passive_scan_jiffies +
+		       (IWL_MVM_6GHZ_PASSIVE_SCAN_TIMEOUT * HZ), jiffies)) {
+		IWL_DEBUG_SCAN(mvm,
+			       "6GHz passive scan: timeout did not expire\n");
+		return;
+	}
+
+	/* not enough channels in the regular scan request */
+	if (params->n_channels < IWL_MVM_6GHZ_PASSIVE_SCAN_MIN_CHANS) {
+		IWL_DEBUG_SCAN(mvm,
+			       "6GHz passive scan: not enough channels\n");
+		return;
+	}
+
+	for (i = 0; i < params->n_ssids; i++) {
+		if (!params->ssids[i].ssid_len)
+			break;
+	}
+
+	/* not a wildcard scan, so cannot enable passive 6GHz scan */
+	if (i == params->n_ssids) {
+		IWL_DEBUG_SCAN(mvm,
+			       "6GHz passive scan: no wildcard SSID\n");
+		return;
+	}
+
+	if (!sband || !sband->n_channels) {
+		IWL_DEBUG_SCAN(mvm,
+			       "6GHz passive scan: no 6GHz channels\n");
+		return;
+	}
+
+	for (i = 0, n_disabled = 0; i < sband->n_channels; i++) {
+		if (sband->channels[i].flags & (IEEE80211_CHAN_DISABLED))
+			n_disabled++;
+	}
+
+	/*
+	 * Not all the 6GHz channels are disabled, so no need for 6GHz passive
+	 * scan
+	 */
+	if (n_disabled != sband->n_channels) {
+		IWL_DEBUG_SCAN(mvm,
+			       "6GHz passive scan: 6GHz channels enabled\n");
+		return;
+	}
+
+	/* all conditions to enable 6ghz passive scan are satisfied */
+	IWL_DEBUG_SCAN(mvm, "6GHz passive scan: can be enabled\n");
+	params->enable_6ghz_passive = true;
+}
+
 static u16 iwl_mvm_scan_umac_flags_v2(struct iwl_mvm *mvm,
 				      struct iwl_mvm_scan_params *params,
 				      struct ieee80211_vif *vif,
@@ -1911,6 +2007,9 @@ static u16 iwl_mvm_scan_umac_flags_v2(struct iwl_mvm *mvm,
 	    params->flags & NL80211_SCAN_FLAG_COLOCATED_6GHZ)
 		flags |= IWL_UMAC_SCAN_GEN_FLAGS_V2_TRIGGER_UHB_SCAN;
 
+	if (params->enable_6ghz_passive)
+		flags |= IWL_UMAC_SCAN_GEN_FLAGS_V2_6GHZ_PASSIVE_SCAN;
+
 	return flags;
 }
 
@@ -2183,6 +2282,30 @@ iwl_mvm_scan_umac_fill_ch_p_v6(struct iwl_mvm *mvm,
 					  params->n_channels,
 					  channel_cfg_flags,
 					  vif->type);
+
+	if (params->enable_6ghz_passive) {
+		struct ieee80211_supported_band *sband =
+			&mvm->nvm_data->bands[NL80211_BAND_6GHZ];
+		u32 i;
+
+		for (i = 0; i < sband->n_channels; i++) {
+			struct ieee80211_channel *channel =
+				&sband->channels[i];
+
+			struct iwl_scan_channel_cfg_umac *cfg =
+				&cp->channel_config[cp->count];
+
+			if (!cfg80211_channel_is_psc(channel))
+				continue;
+
+			cfg->flags = 0;
+			cfg->v2.channel_num = channel->hw_value;
+			cfg->v2.band = PHY_BAND_6;
+			cfg->v2.iter_count = 1;
+			cfg->v2.iter_interval = 0;
+			cp->count++;
+		}
+	}
 }
 
 static int iwl_mvm_scan_umac_v12(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
@@ -2500,6 +2623,8 @@ int iwl_mvm_reg_scan_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 
 	iwl_mvm_build_scan_probe(mvm, vif, ies, &params);
 
+	iwl_mvm_scan_6ghz_passive_scan(mvm, &params, vif);
+
 	uid = iwl_mvm_build_scan_cmd(mvm, vif, &hcmd, &params,
 				     IWL_MVM_SCAN_REGULAR);
 
@@ -2524,6 +2649,9 @@ int iwl_mvm_reg_scan_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 	mvm->scan_status |= IWL_MVM_SCAN_REGULAR;
 	mvm->scan_vif = iwl_mvm_vif_from_mac80211(vif);
 
+	if (params.enable_6ghz_passive)
+		mvm->last_6ghz_passive_scan_jiffies = jiffies;
+
 	schedule_delayed_work(&mvm->scan_timeout_dwork,
 			      msecs_to_jiffies(SCAN_TIMEOUT));
 
-- 
cgit v1.2.3


From e12cfc7bbfd31c1be6567b296ea058e35fba91ab Mon Sep 17 00:00:00 2001
From: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Date: Wed, 31 Mar 2021 12:14:47 +0300
Subject: iwlwifi: mvm: enable PPAG in China

Add support for ppag in China by reading revision 2 of the ppag table
from ACPI, and passing the data to the FW.
This is needed to enable OEMs to control ppag enablement
in China.

Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210331121101.69af388d0dce.I8cfddf9e6837bf394b00390181b4b774ded19acd@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/acpi.c      |  9 ++-
 drivers/net/wireless/intel/iwlwifi/fw/acpi.h      |  4 +-
 drivers/net/wireless/intel/iwlwifi/fw/api/power.h | 22 ++++---
 drivers/net/wireless/intel/iwlwifi/mvm/fw.c       | 76 ++++++++++++++---------
 4 files changed, 65 insertions(+), 46 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
index 82a4f7e8ba54..56c4d05c9cc2 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
@@ -181,14 +181,13 @@ union acpi_object *iwl_acpi_get_wifi_pkg(struct device *dev,
 	/*
 	 * We need at least two packages, one for the revision and one
 	 * for the data itself.  Also check that the revision is valid
-	 * (i.e. it is an integer smaller than 2, as we currently support only
-	 * 2 revisions).
+	 * (i.e. it is an integer (each caller has to check by itself
+	 * if the returned revision is supported)).
 	 */
 	if (data->type != ACPI_TYPE_PACKAGE ||
 	    data->package.count < 2 ||
-	    data->package.elements[0].type != ACPI_TYPE_INTEGER ||
-	    data->package.elements[0].integer.value > 1) {
-		IWL_DEBUG_DEV_RADIO(dev, "Unsupported packages structure\n");
+	    data->package.elements[0].type != ACPI_TYPE_INTEGER) {
+		IWL_DEBUG_DEV_RADIO(dev, "Invalid packages structure\n");
 		return ERR_PTR(-EINVAL);
 	}
 
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
index 030c50082568..f8ea588e4fbb 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
@@ -53,8 +53,8 @@
 
 #define ACPI_WGDS_TABLE_SIZE		3
 
-#define ACPI_PPAG_WIFI_DATA_SIZE	((IWL_NUM_CHAIN_LIMITS * \
-					  IWL_NUM_SUB_BANDS) + 2)
+#define ACPI_PPAG_WIFI_DATA_SIZE_V1	((IWL_NUM_CHAIN_LIMITS * \
+					  IWL_NUM_SUB_BANDS_V1) + 2)
 #define ACPI_PPAG_WIFI_DATA_SIZE_V2	((IWL_NUM_CHAIN_LIMITS * \
 					  IWL_NUM_SUB_BANDS_V2) + 2)
 
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/power.h b/drivers/net/wireless/intel/iwlwifi/fw/api/power.h
index 798417182d54..86445385f072 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/power.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/power.h
@@ -274,7 +274,7 @@ enum iwl_dev_tx_power_cmd_mode {
 #define IWL_NUM_CHAIN_TABLES	1
 #define IWL_NUM_CHAIN_TABLES_V2	2
 #define IWL_NUM_CHAIN_LIMITS	2
-#define IWL_NUM_SUB_BANDS	5
+#define IWL_NUM_SUB_BANDS_V1	5
 #define IWL_NUM_SUB_BANDS_V2	11
 
 /**
@@ -300,7 +300,7 @@ struct iwl_dev_tx_power_common {
  * @per_chain: per chain restrictions
  */
 struct iwl_dev_tx_power_cmd_v3 {
-	__le16 per_chain[IWL_NUM_CHAIN_TABLES][IWL_NUM_CHAIN_LIMITS][IWL_NUM_SUB_BANDS];
+	__le16 per_chain[IWL_NUM_CHAIN_TABLES][IWL_NUM_CHAIN_LIMITS][IWL_NUM_SUB_BANDS_V1];
 } __packed; /* TX_REDUCED_POWER_API_S_VER_3 */
 
 #define IWL_DEV_MAX_TX_POWER 0x7FFF
@@ -313,7 +313,7 @@ struct iwl_dev_tx_power_cmd_v3 {
  * @reserved: reserved (padding)
  */
 struct iwl_dev_tx_power_cmd_v4 {
-	__le16 per_chain[IWL_NUM_CHAIN_TABLES][IWL_NUM_CHAIN_LIMITS][IWL_NUM_SUB_BANDS];
+	__le16 per_chain[IWL_NUM_CHAIN_TABLES][IWL_NUM_CHAIN_LIMITS][IWL_NUM_SUB_BANDS_V1];
 	u8 enable_ack_reduction;
 	u8 reserved[3];
 } __packed; /* TX_REDUCED_POWER_API_S_VER_4 */
@@ -332,7 +332,7 @@ struct iwl_dev_tx_power_cmd_v4 {
  *	BIOS values. relevant if setMode is IWL_TX_POWER_MODE_SET_SAR_TIMER
  */
 struct iwl_dev_tx_power_cmd_v5 {
-	__le16 per_chain[IWL_NUM_CHAIN_TABLES][IWL_NUM_CHAIN_LIMITS][IWL_NUM_SUB_BANDS];
+	__le16 per_chain[IWL_NUM_CHAIN_TABLES][IWL_NUM_CHAIN_LIMITS][IWL_NUM_SUB_BANDS_V1];
 	u8 enable_ack_reduction;
 	u8 per_chain_restriction_changed;
 	u8 reserved[2];
@@ -454,21 +454,23 @@ struct iwl_geo_tx_power_profiles_resp {
 
 /**
  * union iwl_ppag_table_cmd - union for all versions of PPAG command
- * @v1: version 1, table revision = 0
- * @v2: version 2, table revision = 1
+ * @v1: version 1
+ * @v2: version 2
  *
- * @enabled: 1 if PPAG is enabled, 0 otherwise
+ * @flags: bit 0 - indicates enablement of PPAG for ETSI
+ *         bit 1 - indicates enablement of PPAG for CHINA BIOS
+ *         bit 1 can be used only in v3 (identical to v2)
  * @gain: table of antenna gain values per chain and sub-band
  * @reserved: reserved
  */
 union iwl_ppag_table_cmd {
 	struct {
-		__le32 enabled;
-		s8 gain[IWL_NUM_CHAIN_LIMITS][IWL_NUM_SUB_BANDS];
+		__le32 flags;
+		s8 gain[IWL_NUM_CHAIN_LIMITS][IWL_NUM_SUB_BANDS_V1];
 		s8 reserved[2];
 	} v1;
 	struct {
-		__le32 enabled;
+		__le32 flags;
 		s8 gain[IWL_NUM_CHAIN_LIMITS][IWL_NUM_SUB_BANDS_V2];
 		s8 reserved[2];
 	} v2;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 5ee64f7f3c85..4f297edd8e18 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -29,6 +29,9 @@
 
 #define UCODE_VALID_OK	cpu_to_le32(0x1)
 
+#define IWL_PPAG_MASK 3
+#define IWL_PPAG_ETSI_MASK BIT(0)
+
 struct iwl_mvm_alive_data {
 	bool valid;
 	u32 scd_base_addr;
@@ -773,16 +776,16 @@ int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm, int prof_a, int prof_b)
 	} else if (fw_has_api(&mvm->fw->ucode_capa,
 			      IWL_UCODE_TLV_API_REDUCE_TX_POWER)) {
 		len = sizeof(cmd.v5);
-		n_subbands = IWL_NUM_SUB_BANDS;
+		n_subbands = IWL_NUM_SUB_BANDS_V1;
 		per_chain = cmd.v5.per_chain[0][0];
 	} else if (fw_has_capa(&mvm->fw->ucode_capa,
 			       IWL_UCODE_TLV_CAPA_TX_POWER_ACK)) {
 		len = sizeof(cmd.v4);
-		n_subbands = IWL_NUM_SUB_BANDS;
+		n_subbands = IWL_NUM_SUB_BANDS_V1;
 		per_chain = cmd.v4.per_chain[0][0];
 	} else {
 		len = sizeof(cmd.v3);
-		n_subbands = IWL_NUM_SUB_BANDS;
+		n_subbands = IWL_NUM_SUB_BANDS_V1;
 		per_chain = cmd.v3.per_chain[0][0];
 	}
 
@@ -909,46 +912,50 @@ static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm)
 
 static int iwl_mvm_get_ppag_table(struct iwl_mvm *mvm)
 {
-	union acpi_object *wifi_pkg, *data, *enabled;
+	union acpi_object *wifi_pkg, *data, *flags;
 	int i, j, ret, tbl_rev, num_sub_bands;
 	int idx = 2;
 	s8 *gain;
 
 	/*
-	 * The 'enabled' field is the same in v1 and v2 so we can just
+	 * The 'flags' field is the same in v1 and in v2 so we can just
 	 * use v1 to access it.
 	 */
-	mvm->fwrt.ppag_table.v1.enabled = cpu_to_le32(0);
+	mvm->fwrt.ppag_table.v1.flags = cpu_to_le32(0);
+
 	data = iwl_acpi_get_object(mvm->dev, ACPI_PPAG_METHOD);
 	if (IS_ERR(data))
 		return PTR_ERR(data);
 
-	/* try to read ppag table revision 1 */
+	/* try to read ppag table rev 2 or 1 (both have the same data size) */
 	wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data,
 					 ACPI_PPAG_WIFI_DATA_SIZE_V2, &tbl_rev);
 	if (!IS_ERR(wifi_pkg)) {
-		if (tbl_rev != 1) {
+		if (tbl_rev == 1 || tbl_rev == 2) {
+			num_sub_bands = IWL_NUM_SUB_BANDS_V2;
+			gain = mvm->fwrt.ppag_table.v2.gain[0];
+			mvm->fwrt.ppag_ver = tbl_rev;
+			IWL_DEBUG_RADIO(mvm,
+					"Reading PPAG table v2 (tbl_rev=%d)\n",
+					tbl_rev);
+			goto read_table;
+		} else {
 			ret = -EINVAL;
 			goto out_free;
 		}
-		num_sub_bands = IWL_NUM_SUB_BANDS_V2;
-		gain = mvm->fwrt.ppag_table.v2.gain[0];
-		mvm->fwrt.ppag_ver = 2;
-		IWL_DEBUG_RADIO(mvm, "Reading PPAG table v2 (tbl_rev=1)\n");
-		goto read_table;
 	}
 
 	/* try to read ppag table revision 0 */
 	wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data,
-					 ACPI_PPAG_WIFI_DATA_SIZE, &tbl_rev);
+					 ACPI_PPAG_WIFI_DATA_SIZE_V1, &tbl_rev);
 	if (!IS_ERR(wifi_pkg)) {
 		if (tbl_rev != 0) {
 			ret = -EINVAL;
 			goto out_free;
 		}
-		num_sub_bands = IWL_NUM_SUB_BANDS;
+		num_sub_bands = IWL_NUM_SUB_BANDS_V1;
 		gain = mvm->fwrt.ppag_table.v1.gain[0];
-		mvm->fwrt.ppag_ver = 1;
+		mvm->fwrt.ppag_ver = 0;
 		IWL_DEBUG_RADIO(mvm, "Reading PPAG table v1 (tbl_rev=0)\n");
 		goto read_table;
 	}
@@ -956,15 +963,17 @@ static int iwl_mvm_get_ppag_table(struct iwl_mvm *mvm)
 	goto out_free;
 
 read_table:
-	enabled = &wifi_pkg->package.elements[1];
-	if (enabled->type != ACPI_TYPE_INTEGER ||
-	    (enabled->integer.value != 0 && enabled->integer.value != 1)) {
+	flags = &wifi_pkg->package.elements[1];
+
+	if (flags->type != ACPI_TYPE_INTEGER) {
 		ret = -EINVAL;
 		goto out_free;
 	}
 
-	mvm->fwrt.ppag_table.v1.enabled = cpu_to_le32(enabled->integer.value);
-	if (!mvm->fwrt.ppag_table.v1.enabled) {
+	mvm->fwrt.ppag_table.v1.flags = cpu_to_le32(flags->integer.value &
+						    IWL_PPAG_MASK);
+
+	if (!mvm->fwrt.ppag_table.v1.flags) {
 		ret = 0;
 		goto out_free;
 	}
@@ -992,12 +1001,13 @@ read_table:
 			    (j != 0 &&
 			     (gain[i * num_sub_bands + j] > ACPI_PPAG_MAX_HB ||
 			      gain[i * num_sub_bands + j] < ACPI_PPAG_MIN_HB))) {
-				mvm->fwrt.ppag_table.v1.enabled = cpu_to_le32(0);
+				mvm->fwrt.ppag_table.v1.flags = cpu_to_le32(0);
 				ret = -EINVAL;
 				goto out_free;
 			}
 		}
 	}
+
 	ret = 0;
 out_free:
 	kfree(data);
@@ -1015,7 +1025,7 @@ int iwl_mvm_ppag_send_cmd(struct iwl_mvm *mvm)
 				"PPAG capability not supported by FW, command not sent.\n");
 		return 0;
 	}
-	if (!mvm->fwrt.ppag_table.v1.enabled) {
+	if (!mvm->fwrt.ppag_table.v1.flags) {
 		IWL_DEBUG_RADIO(mvm, "PPAG not enabled, command not sent.\n");
 		return 0;
 	}
@@ -1024,20 +1034,28 @@ int iwl_mvm_ppag_send_cmd(struct iwl_mvm *mvm)
 					PER_PLATFORM_ANT_GAIN_CMD,
 					IWL_FW_CMD_VER_UNKNOWN);
 	if (cmd_ver == 1) {
-		num_sub_bands = IWL_NUM_SUB_BANDS;
+		num_sub_bands = IWL_NUM_SUB_BANDS_V1;
 		gain = mvm->fwrt.ppag_table.v1.gain[0];
 		cmd_size = sizeof(mvm->fwrt.ppag_table.v1);
-		if (mvm->fwrt.ppag_ver == 2) {
+		if (mvm->fwrt.ppag_ver == 1 || mvm->fwrt.ppag_ver == 2) {
 			IWL_DEBUG_RADIO(mvm,
-					"PPAG table is v2 but FW supports v1, sending truncated table\n");
+					"PPAG table rev is %d but FW supports v1, sending truncated table\n",
+					mvm->fwrt.ppag_ver);
+			mvm->fwrt.ppag_table.v1.flags &=
+				cpu_to_le32(IWL_PPAG_ETSI_MASK);
 		}
-	} else if (cmd_ver == 2) {
+	} else if (cmd_ver == 2 || cmd_ver == 3) {
 		num_sub_bands = IWL_NUM_SUB_BANDS_V2;
 		gain = mvm->fwrt.ppag_table.v2.gain[0];
 		cmd_size = sizeof(mvm->fwrt.ppag_table.v2);
-		if (mvm->fwrt.ppag_ver == 1) {
+		if (mvm->fwrt.ppag_ver == 0) {
 			IWL_DEBUG_RADIO(mvm,
 					"PPAG table is v1 but FW supports v2, sending padded table\n");
+		} else if (cmd_ver == 2 && mvm->fwrt.ppag_ver == 2) {
+			IWL_DEBUG_RADIO(mvm,
+					"PPAG table is v3 but FW supports v2, sending partial bitmap.\n");
+			mvm->fwrt.ppag_table.v1.flags &=
+				cpu_to_le32(IWL_PPAG_ETSI_MASK);
 		}
 	} else {
 		IWL_DEBUG_RADIO(mvm, "Unsupported PPAG command version\n");
@@ -1102,7 +1120,7 @@ static int iwl_mvm_ppag_init(struct iwl_mvm *mvm)
 		IWL_DEBUG_RADIO(mvm,
 				"System vendor '%s' is not in the approved list, disabling PPAG.\n",
 				dmi_get_system_info(DMI_SYS_VENDOR));
-		mvm->fwrt.ppag_table.v1.enabled = cpu_to_le32(0);
+		mvm->fwrt.ppag_table.v1.flags = cpu_to_le32(0);
 		return 0;
 	}
 
-- 
cgit v1.2.3


From 9a0f28d8628ecf86841682784d4d34aac9b1d336 Mon Sep 17 00:00:00 2001
From: ybaruch <yaara.baruch@intel.com>
Date: Wed, 31 Mar 2021 12:14:48 +0300
Subject: iwlwifi: add new so-gf device

add new so-gf device to the driver.

Signed-off-by: ybaruch <yaara.baruch@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210331121101.d6b0c1f85a7e.I2098ca066607edc48336021ea2e5afdbf8196acf@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index dd7e295397c8..729e5503f17a 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
- * Copyright (C) 2005-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2005-2014, 2018-2021 Intel Corporation
  * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
  * Copyright (C) 2016-2017 Intel Deutschland GmbH
  */
@@ -1103,6 +1103,13 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
 		      IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB,
 		      iwl_cfg_bz_a0_mr_a0, iwl_ma_name),
 
+/* So with GF */
+	_IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
+		      IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY,
+		      IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY,
+		      IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_NO_CDB,
+		      iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_name)
+
 #endif /* CONFIG_IWLMVM */
 };
 
-- 
cgit v1.2.3


From 9cd243f24ec1960403de5f24f45155af24d94b13 Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Wed, 31 Mar 2021 12:14:49 +0300
Subject: iwlwifi: move iwl_configure_rxq to be used by other op_modes

All the op_modes need to send this command as well. Instead of
duplicating the code from mvm, put the code in a common place.

Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210331121101.deb71fce883a.I5eef009512f180e5974f3f491ff56c763cdcc878@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/init.c    | 59 ++++++++++++++++++++++++
 drivers/net/wireless/intel/iwlwifi/fw/runtime.h |  1 +
 drivers/net/wireless/intel/iwlwifi/mvm/fw.c     | 61 ++-----------------------
 3 files changed, 63 insertions(+), 58 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/init.c b/drivers/net/wireless/intel/iwlwifi/fw/init.c
index 986913f2fbd5..2ecec00db9da 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/init.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/init.c
@@ -10,6 +10,8 @@
 
 #include "fw/api/soc.h"
 #include "fw/api/commands.h"
+#include "fw/api/rx.h"
+#include "fw/api/datapath.h"
 
 void iwl_fw_runtime_init(struct iwl_fw_runtime *fwrt, struct iwl_trans *trans,
 			const struct iwl_fw *fw,
@@ -95,3 +97,60 @@ int iwl_set_soc_latency(struct iwl_fw_runtime *fwrt)
 	return ret;
 }
 IWL_EXPORT_SYMBOL(iwl_set_soc_latency);
+
+int iwl_configure_rxq(struct iwl_fw_runtime *fwrt)
+{
+	int i, num_queues, size, ret;
+	struct iwl_rfh_queue_config *cmd;
+	struct iwl_host_cmd hcmd = {
+		.id = WIDE_ID(DATA_PATH_GROUP, RFH_QUEUE_CONFIG_CMD),
+		.dataflags[0] = IWL_HCMD_DFL_NOCOPY,
+	};
+
+	/*
+	 * The default queue is configured via context info, so if we
+	 * have a single queue, there's nothing to do here.
+	 */
+	if (fwrt->trans->num_rx_queues == 1)
+		return 0;
+
+	if (fwrt->trans->trans_cfg->device_family < IWL_DEVICE_FAMILY_22000)
+		return 0;
+
+	/* skip the default queue */
+	num_queues = fwrt->trans->num_rx_queues - 1;
+
+	size = struct_size(cmd, data, num_queues);
+
+	cmd = kzalloc(size, GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
+
+	cmd->num_queues = num_queues;
+
+	for (i = 0; i < num_queues; i++) {
+		struct iwl_trans_rxq_dma_data data;
+
+		cmd->data[i].q_num = i + 1;
+		iwl_trans_get_rxq_dma_data(fwrt->trans, i + 1, &data);
+
+		cmd->data[i].fr_bd_cb = cpu_to_le64(data.fr_bd_cb);
+		cmd->data[i].urbd_stts_wrptr =
+			cpu_to_le64(data.urbd_stts_wrptr);
+		cmd->data[i].ur_bd_cb = cpu_to_le64(data.ur_bd_cb);
+		cmd->data[i].fr_bd_wid = cpu_to_le32(data.fr_bd_wid);
+	}
+
+	hcmd.data[0] = cmd;
+	hcmd.len[0] = size;
+
+	ret = iwl_trans_send_cmd(fwrt->trans, &hcmd);
+
+	kfree(cmd);
+
+	if (ret)
+		IWL_ERR(fwrt, "Failed to configure RX queues: %d\n", ret);
+
+	return ret;
+}
+IWL_EXPORT_SYMBOL(iwl_configure_rxq);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h
index 0dba5444f2db..35af85a5430b 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h
@@ -190,5 +190,6 @@ void iwl_free_fw_paging(struct iwl_fw_runtime *fwrt);
 
 void iwl_get_shared_mem_conf(struct iwl_fw_runtime *fwrt);
 int iwl_set_soc_latency(struct iwl_fw_runtime *fwrt);
+int iwl_configure_rxq(struct iwl_fw_runtime *fwrt);
 
 #endif /* __iwl_fw_runtime_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 4f297edd8e18..b8a25943a57d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -73,56 +73,6 @@ static int iwl_send_rss_cfg_cmd(struct iwl_mvm *mvm)
 	return iwl_mvm_send_cmd_pdu(mvm, RSS_CONFIG_CMD, 0, sizeof(cmd), &cmd);
 }
 
-static int iwl_configure_rxq(struct iwl_mvm *mvm)
-{
-	int i, num_queues, size, ret;
-	struct iwl_rfh_queue_config *cmd;
-	struct iwl_host_cmd hcmd = {
-		.id = WIDE_ID(DATA_PATH_GROUP, RFH_QUEUE_CONFIG_CMD),
-		.dataflags[0] = IWL_HCMD_DFL_NOCOPY,
-	};
-
-	/*
-	 * The default queue is configured via context info, so if we
-	 * have a single queue, there's nothing to do here.
-	 */
-	if (mvm->trans->num_rx_queues == 1)
-		return 0;
-
-	/* skip the default queue */
-	num_queues = mvm->trans->num_rx_queues - 1;
-
-	size = struct_size(cmd, data, num_queues);
-
-	cmd = kzalloc(size, GFP_KERNEL);
-	if (!cmd)
-		return -ENOMEM;
-
-	cmd->num_queues = num_queues;
-
-	for (i = 0; i < num_queues; i++) {
-		struct iwl_trans_rxq_dma_data data;
-
-		cmd->data[i].q_num = i + 1;
-		iwl_trans_get_rxq_dma_data(mvm->trans, i + 1, &data);
-
-		cmd->data[i].fr_bd_cb = cpu_to_le64(data.fr_bd_cb);
-		cmd->data[i].urbd_stts_wrptr =
-			cpu_to_le64(data.urbd_stts_wrptr);
-		cmd->data[i].ur_bd_cb = cpu_to_le64(data.ur_bd_cb);
-		cmd->data[i].fr_bd_wid = cpu_to_le32(data.fr_bd_wid);
-	}
-
-	hcmd.data[0] = cmd;
-	hcmd.len[0] = size;
-
-	ret = iwl_mvm_send_cmd(mvm, &hcmd);
-
-	kfree(cmd);
-
-	return ret;
-}
-
 static int iwl_mvm_send_dqa_cmd(struct iwl_mvm *mvm)
 {
 	struct iwl_dqa_enable_cmd dqa_cmd = {
@@ -1503,14 +1453,9 @@ int iwl_mvm_up(struct iwl_mvm *mvm)
 	}
 
 	/* Init RSS configuration */
-	if (mvm->trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_22000) {
-		ret = iwl_configure_rxq(mvm);
-		if (ret) {
-			IWL_ERR(mvm, "Failed to configure RX queues: %d\n",
-				ret);
-			goto error;
-		}
-	}
+	ret = iwl_configure_rxq(&mvm->fwrt);
+	if (ret)
+		goto error;
 
 	if (iwl_mvm_has_new_rx_api(mvm)) {
 		ret = iwl_send_rss_cfg_cmd(mvm);
-- 
cgit v1.2.3


From d2bfda8addf9d78146c1ae3a39f4df3c845ea0d5 Mon Sep 17 00:00:00 2001
From: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Date: Wed, 31 Mar 2021 12:14:50 +0300
Subject: iwlwifi: mvm: support BIOS enable/disable for 11ax in Ukraine

Read the new BIOS DSM and Pass to FW if to disable\enable
11ax for Ukraine according to the BIOS key. this is
needed to enable OEMs to control enable/disable 11ax in Ukraine

Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210331121101.055654e7707e.If7eaf9839cd5d59729a235df07e31668b408e740@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/acpi.c       | 29 ++++++++++++++++++++++
 drivers/net/wireless/intel/iwlwifi/fw/acpi.h       |  9 +++++++
 .../net/wireless/intel/iwlwifi/fw/api/nvm-reg.h    |  3 +++
 drivers/net/wireless/intel/iwlwifi/mvm/fw.c        |  4 +++
 4 files changed, 45 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
index 56c4d05c9cc2..874cc110d97f 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
@@ -695,3 +695,32 @@ int iwl_sar_geo_init(struct iwl_fw_runtime *fwrt,
 	return 0;
 }
 IWL_EXPORT_SYMBOL(iwl_sar_geo_init);
+
+u32 iwl_acpi_eval_dsm_11ax_enablement(struct device *dev)
+{
+	union acpi_object *obj;
+	u32 ret;
+
+	obj = iwl_acpi_get_dsm_object(dev, 0,
+				      DSM_FUNC_11AX_ENABLEMENT, NULL,
+				      &iwl_guid);
+	if (IS_ERR(obj))
+		return 0;
+
+	if (obj->type != ACPI_TYPE_INTEGER) {
+		IWL_DEBUG_DEV_RADIO(dev,
+				    "ACPI: DSM method did not return a valid object, type=%d\n",
+				    obj->type);
+		ret = 0;
+		goto out;
+	}
+
+	ret = obj->integer.value;
+	IWL_DEBUG_DEV_RADIO(dev,
+			    "ACPI: DSM method evaluated: func=DSM_FUNC_11AX_ENABLEMENT, ret=%d\n",
+			    ret);
+out:
+	ACPI_FREE(obj);
+	return ret;
+}
+IWL_EXPORT_SYMBOL(iwl_acpi_eval_dsm_11ax_enablement);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
index f8ea588e4fbb..b7389f41a009 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
@@ -77,6 +77,7 @@ enum iwl_dsm_funcs_rev_0 {
 	DSM_FUNC_QUERY = 0,
 	DSM_FUNC_DISABLE_SRD = 1,
 	DSM_FUNC_ENABLE_INDONESIA_5G2 = 2,
+	DSM_FUNC_11AX_ENABLEMENT = 6,
 };
 
 enum iwl_dsm_values_srd {
@@ -160,6 +161,8 @@ int iwl_sar_geo_init(struct iwl_fw_runtime *fwrt,
 int iwl_acpi_get_tas(struct iwl_fw_runtime *fwrt, __le32 *block_list_array,
 		     int *block_list_size);
 
+u32 iwl_acpi_eval_dsm_11ax_enablement(struct device *dev);
+
 #else /* CONFIG_ACPI */
 
 static inline void *iwl_acpi_get_object(struct device *dev, acpi_string method)
@@ -235,5 +238,11 @@ static inline int iwl_acpi_get_tas(struct iwl_fw_runtime *fwrt,
 {
 	return -ENOENT;
 }
+
+static inline u32 iwl_acpi_eval_dsm_11ax_enablement(struct device *dev)
+{
+	return 0;
+}
+
 #endif /* CONFIG_ACPI */
 #endif /* __iwl_fw_acpi__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h
index fbca9dd872e7..3a358bbdde8e 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h
@@ -414,6 +414,9 @@ enum iwl_lari_config_masks {
 	LARI_CONFIG_ENABLE_5G2_IN_INDONESIA_MSK		= BIT(3),
 };
 
+#define IWL_11AX_UKRAINE_MASK 3
+#define IWL_11AX_UKRAINE_SHIFT 8
+
 /**
  * struct iwl_lari_config_change_cmd_v1 - change LARI configuration
  * @config_bitmap: bit map of the config commands. each bit will trigger a
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index b8a25943a57d..ff3967f7c66b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -1213,6 +1213,10 @@ static void iwl_mvm_lari_cfg(struct iwl_mvm *mvm)
 		cmd.config_bitmap |=
 			cpu_to_le32(LARI_CONFIG_CHANGE_ETSI_TO_DISABLED_MSK);
 
+	ret = iwl_acpi_eval_dsm_11ax_enablement((&mvm->fwrt)->dev);
+	cmd.config_bitmap |=
+		cpu_to_le32((ret &= IWL_11AX_UKRAINE_MASK) << IWL_11AX_UKRAINE_SHIFT);
+
 	/* apply more config masks here */
 
 	if (cmd.config_bitmap) {
-- 
cgit v1.2.3


From f21afabae70394aa2cf4bdee24f34884cdb93be1 Mon Sep 17 00:00:00 2001
From: Harish Mitty <harish.mitty@intel.com>
Date: Wed, 31 Mar 2021 12:14:51 +0300
Subject: iwlwifi: mvm: refactor ACPI DSM evaluation function

Instead of implementing the DSM evaluation function in the MVM code,
refactor it so it can be generalized and part of the common ACPI
implementation.

Signed-off-by: Harish Mitty <harish.mitty@intel.com>
[reworded subject and commit message]
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210331121101.a24af3551aac.I8e6bd5eb05f853b6331fa4823750f7ba8ffe46e6@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 58 ++++++++++++++++++---
 drivers/net/wireless/intel/iwlwifi/fw/acpi.h |  4 +-
 drivers/net/wireless/intel/iwlwifi/mvm/fw.c  | 76 +---------------------------
 3 files changed, 55 insertions(+), 83 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
index 874cc110d97f..eecc76a6e408 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
@@ -696,20 +696,27 @@ int iwl_sar_geo_init(struct iwl_fw_runtime *fwrt,
 }
 IWL_EXPORT_SYMBOL(iwl_sar_geo_init);
 
-u32 iwl_acpi_eval_dsm_11ax_enablement(struct device *dev)
+static u32 iwl_acpi_eval_dsm_func(struct device *dev, enum iwl_dsm_funcs_rev_0 eval_func)
 {
 	union acpi_object *obj;
 	u32 ret;
 
 	obj = iwl_acpi_get_dsm_object(dev, 0,
-				      DSM_FUNC_11AX_ENABLEMENT, NULL,
+				      eval_func, NULL,
 				      &iwl_guid);
-	if (IS_ERR(obj))
+
+	if (IS_ERR(obj)) {
+		IWL_DEBUG_DEV_RADIO(dev,
+				    "ACPI: DSM func '%d': Got Error in obj = %ld\n",
+				    eval_func,
+				    PTR_ERR(obj));
 		return 0;
+	}
 
 	if (obj->type != ACPI_TYPE_INTEGER) {
 		IWL_DEBUG_DEV_RADIO(dev,
-				    "ACPI: DSM method did not return a valid object, type=%d\n",
+				    "ACPI: DSM func '%d' did not return a valid object, type=%d\n",
+				    eval_func,
 				    obj->type);
 		ret = 0;
 		goto out;
@@ -717,10 +724,49 @@ u32 iwl_acpi_eval_dsm_11ax_enablement(struct device *dev)
 
 	ret = obj->integer.value;
 	IWL_DEBUG_DEV_RADIO(dev,
-			    "ACPI: DSM method evaluated: func=DSM_FUNC_11AX_ENABLEMENT, ret=%d\n",
+			    "ACPI: DSM method evaluated: func='%d', ret=%d\n",
+			    eval_func,
 			    ret);
 out:
 	ACPI_FREE(obj);
 	return ret;
 }
-IWL_EXPORT_SYMBOL(iwl_acpi_eval_dsm_11ax_enablement);
+
+__le32 iwl_acpi_get_lari_config_bitmap(struct iwl_fw_runtime *fwrt)
+{
+	u32 ret;
+	__le32 config_bitmap = 0;
+
+	/*
+	 ** Evaluate func 'DSM_FUNC_ENABLE_INDONESIA_5G2'
+	 */
+	ret = iwl_acpi_eval_dsm_func(fwrt->dev, DSM_FUNC_ENABLE_INDONESIA_5G2);
+
+	if (ret == DSM_VALUE_INDONESIA_ENABLE)
+		config_bitmap |=
+			cpu_to_le32(LARI_CONFIG_ENABLE_5G2_IN_INDONESIA_MSK);
+
+	/*
+	 ** Evaluate func 'DSM_FUNC_DISABLE_SRD'
+	 */
+	ret = iwl_acpi_eval_dsm_func(fwrt->dev, DSM_FUNC_DISABLE_SRD);
+
+	if (ret == DSM_VALUE_SRD_PASSIVE)
+		config_bitmap |=
+			cpu_to_le32(LARI_CONFIG_CHANGE_ETSI_TO_PASSIVE_MSK);
+
+	else if (ret == DSM_VALUE_SRD_DISABLE)
+		config_bitmap |=
+			cpu_to_le32(LARI_CONFIG_CHANGE_ETSI_TO_DISABLED_MSK);
+
+	/*
+	 ** Evaluate func 'DSM_FUNC_11AX_ENABLEMENT'
+	 */
+	ret = iwl_acpi_eval_dsm_func(fwrt->dev, DSM_FUNC_11AX_ENABLEMENT);
+
+	config_bitmap |=
+		cpu_to_le32((ret &= IWL_11AX_UKRAINE_MASK) << IWL_11AX_UKRAINE_SHIFT);
+
+	return config_bitmap;
+}
+IWL_EXPORT_SYMBOL(iwl_acpi_get_lari_config_bitmap);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
index b7389f41a009..d16e6ec08c9f 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
@@ -161,7 +161,7 @@ int iwl_sar_geo_init(struct iwl_fw_runtime *fwrt,
 int iwl_acpi_get_tas(struct iwl_fw_runtime *fwrt, __le32 *block_list_array,
 		     int *block_list_size);
 
-u32 iwl_acpi_eval_dsm_11ax_enablement(struct device *dev);
+__le32 iwl_acpi_get_lari_config_bitmap(struct iwl_fw_runtime *fwrt);
 
 #else /* CONFIG_ACPI */
 
@@ -239,7 +239,7 @@ static inline int iwl_acpi_get_tas(struct iwl_fw_runtime *fwrt,
 	return -ENOENT;
 }
 
-static inline u32 iwl_acpi_eval_dsm_11ax_enablement(struct device *dev)
+static inline __le32 iwl_acpi_get_lari_config_bitmap(struct iwl_fw_runtime *fwrt)
 {
 	return 0;
 }
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index ff3967f7c66b..be831dd2c05c 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -1112,33 +1112,6 @@ static void iwl_mvm_tas_init(struct iwl_mvm *mvm)
 		IWL_DEBUG_RADIO(mvm, "failed to send TAS_CONFIG (%d)\n", ret);
 }
 
-static u8 iwl_mvm_eval_dsm_indonesia_5g2(struct iwl_mvm *mvm)
-{
-	u8 value;
-
-	int ret = iwl_acpi_get_dsm_u8((&mvm->fwrt)->dev, 0,
-				      DSM_FUNC_ENABLE_INDONESIA_5G2,
-				      &iwl_guid, &value);
-
-	if (ret < 0)
-		IWL_DEBUG_RADIO(mvm,
-				"Failed to evaluate DSM function ENABLE_INDONESIA_5G2, ret=%d\n",
-				ret);
-
-	else if (value >= DSM_VALUE_INDONESIA_MAX)
-		IWL_DEBUG_RADIO(mvm,
-				"DSM function ENABLE_INDONESIA_5G2 return invalid value, value=%d\n",
-				value);
-
-	else if (value == DSM_VALUE_INDONESIA_ENABLE) {
-		IWL_DEBUG_RADIO(mvm,
-				"Evaluated DSM function ENABLE_INDONESIA_5G2: Enabling 5g2\n");
-		return DSM_VALUE_INDONESIA_ENABLE;
-	}
-	/* default behaviour is disabled */
-	return DSM_VALUE_INDONESIA_DISABLE;
-}
-
 static u8 iwl_mvm_eval_dsm_rfi(struct iwl_mvm *mvm)
 {
 	u8 value;
@@ -1163,59 +1136,12 @@ static u8 iwl_mvm_eval_dsm_rfi(struct iwl_mvm *mvm)
 	return DSM_VALUE_RFI_DISABLE;
 }
 
-static u8 iwl_mvm_eval_dsm_disable_srd(struct iwl_mvm *mvm)
-{
-	u8 value;
-	int ret = iwl_acpi_get_dsm_u8((&mvm->fwrt)->dev, 0,
-				      DSM_FUNC_DISABLE_SRD,
-				      &iwl_guid, &value);
-
-	if (ret < 0)
-		IWL_DEBUG_RADIO(mvm,
-				"Failed to evaluate DSM function DISABLE_SRD, ret=%d\n",
-				ret);
-
-	else if (value >= DSM_VALUE_SRD_MAX)
-		IWL_DEBUG_RADIO(mvm,
-				"DSM function DISABLE_SRD return invalid value, value=%d\n",
-				value);
-
-	else if (value == DSM_VALUE_SRD_PASSIVE) {
-		IWL_DEBUG_RADIO(mvm,
-				"Evaluated DSM function DISABLE_SRD: setting SRD to passive\n");
-		return DSM_VALUE_SRD_PASSIVE;
-
-	} else if (value == DSM_VALUE_SRD_DISABLE) {
-		IWL_DEBUG_RADIO(mvm,
-				"Evaluated DSM function DISABLE_SRD: disabling SRD\n");
-		return DSM_VALUE_SRD_DISABLE;
-	}
-	/* default behaviour is active */
-	return DSM_VALUE_SRD_ACTIVE;
-}
-
 static void iwl_mvm_lari_cfg(struct iwl_mvm *mvm)
 {
-	u8 ret;
 	int cmd_ret;
 	struct iwl_lari_config_change_cmd_v2 cmd = {};
 
-	if (iwl_mvm_eval_dsm_indonesia_5g2(mvm) == DSM_VALUE_INDONESIA_ENABLE)
-		cmd.config_bitmap |=
-			cpu_to_le32(LARI_CONFIG_ENABLE_5G2_IN_INDONESIA_MSK);
-
-	ret = iwl_mvm_eval_dsm_disable_srd(mvm);
-	if (ret == DSM_VALUE_SRD_PASSIVE)
-		cmd.config_bitmap |=
-			cpu_to_le32(LARI_CONFIG_CHANGE_ETSI_TO_PASSIVE_MSK);
-
-	else if (ret == DSM_VALUE_SRD_DISABLE)
-		cmd.config_bitmap |=
-			cpu_to_le32(LARI_CONFIG_CHANGE_ETSI_TO_DISABLED_MSK);
-
-	ret = iwl_acpi_eval_dsm_11ax_enablement((&mvm->fwrt)->dev);
-	cmd.config_bitmap |=
-		cpu_to_le32((ret &= IWL_11AX_UKRAINE_MASK) << IWL_11AX_UKRAINE_SHIFT);
+	cmd.config_bitmap = iwl_acpi_get_lari_config_bitmap(&mvm->fwrt);
 
 	/* apply more config masks here */
 
-- 
cgit v1.2.3


From 098f1ea54e7ce80c39ecdf1b61a0939380674c1b Mon Sep 17 00:00:00 2001
From: Ravi Darsi <ravi.darsi@intel.com>
Date: Wed, 31 Mar 2021 12:14:52 +0300
Subject: iwlwifi: mvm: Use IWL_INFO in fw_reset_handshake()

Debug message "firmware didn't ACK the reset - continue anyway\n"
in fw_reset_handshake() is classified as error, however this is not
an error as it is ignored. So, change it to info message for proper
classification of debug messages.

Signed-off-by: Ravi Darsi <ravi.darsi@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210331121101.449b3092c330.I515edcc41913ca7fbe4a4de923671d120d5618c6@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
index 94ffc1ae484d..9d94c47952d1 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
@@ -108,8 +108,8 @@ static void iwl_trans_pcie_fw_reset_handshake(struct iwl_trans *trans)
 	ret = wait_event_timeout(trans_pcie->fw_reset_waitq,
 				 trans_pcie->fw_reset_done, FW_RESET_TIMEOUT);
 	if (!ret)
-		IWL_ERR(trans,
-			"firmware didn't ACK the reset - continue anyway\n");
+		IWL_INFO(trans,
+			 "firmware didn't ACK the reset - continue anyway\n");
 }
 
 void _iwl_trans_pcie_gen2_stop_device(struct iwl_trans *trans)
-- 
cgit v1.2.3


From 0c73f47b430d125163ae42d7f271df1050cb993f Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Sun, 11 Apr 2021 12:46:19 +0300
Subject: iwlwifi: remove TCM events

Nobody uses that in the user space.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210411124417.b6560a5de0cd.I5dac9c60faed7f48b06d352aa2d65bcf8142c2dc@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/mvm.h   |  1 -
 drivers/net/wireless/intel/iwlwifi/mvm/utils.c | 18 ++----------------
 2 files changed, 2 insertions(+), 17 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index e2a37ac7c4b1..4d9d4d6892fc 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -591,7 +591,6 @@ struct iwl_mvm_tcm {
 		enum iwl_mvm_traffic_load global_load;
 		bool low_latency[NUM_MAC_INDEX_DRIVER];
 		bool change[NUM_MAC_INDEX_DRIVER];
-		bool global_change;
 	} result;
 };
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
index b6b481ff1518..c566be99a4c7 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
@@ -1030,15 +1030,9 @@ iwl_mvm_tcm_load(struct iwl_mvm *mvm, u32 airtime, unsigned long elapsed)
 	return IWL_MVM_TRAFFIC_LOW;
 }
 
-struct iwl_mvm_tcm_iter_data {
-	struct iwl_mvm *mvm;
-	bool any_sent;
-};
-
 static void iwl_mvm_tcm_iter(void *_data, u8 *mac, struct ieee80211_vif *vif)
 {
-	struct iwl_mvm_tcm_iter_data *data = _data;
-	struct iwl_mvm *mvm = data->mvm;
+	struct iwl_mvm *mvm = _data;
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 	bool low_latency, prev = mvmvif->low_latency & LOW_LATENCY_TRAFFIC;
 
@@ -1060,22 +1054,15 @@ static void iwl_mvm_tcm_iter(void *_data, u8 *mac, struct ieee80211_vif *vif)
 	} else {
 		iwl_mvm_update_quotas(mvm, false, NULL);
 	}
-
-	data->any_sent = true;
 }
 
 static void iwl_mvm_tcm_results(struct iwl_mvm *mvm)
 {
-	struct iwl_mvm_tcm_iter_data data = {
-		.mvm = mvm,
-		.any_sent = false,
-	};
-
 	mutex_lock(&mvm->mutex);
 
 	ieee80211_iterate_active_interfaces(
 		mvm->hw, IEEE80211_IFACE_ITER_NORMAL,
-		iwl_mvm_tcm_iter, &data);
+		iwl_mvm_tcm_iter, mvm);
 
 	if (fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_UMAC_SCAN))
 		iwl_mvm_config_scan(mvm);
@@ -1257,7 +1244,6 @@ static unsigned long iwl_mvm_calc_tcm_stats(struct iwl_mvm *mvm,
 	}
 
 	load = iwl_mvm_tcm_load(mvm, total_airtime, elapsed);
-	mvm->tcm.result.global_change = load != mvm->tcm.result.global_load;
 	mvm->tcm.result.global_load = load;
 
 	for (i = 0; i < NUM_NL80211_BANDS; i++) {
-- 
cgit v1.2.3


From fb54b86339adb6915a512d955d49d0d7f2aa4ba2 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sun, 11 Apr 2021 12:46:20 +0300
Subject: iwlwifi: remove remaining software checksum code

After the removal of the software checksum code for the
A-MSDU path that we had for testing, the csum_skb variable
stuck around. Remove it.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210411124417.280f268ae679.Iad455b6c91e427c9f74963bbd3eb0ce743aaac53@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/pcie/tx.c  | 11 +++--------
 drivers/net/wireless/intel/iwlwifi/queue/tx.c | 11 +++--------
 2 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
index b07194b74d97..0346505351f5 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
- * Copyright (C) 2003-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2003-2014, 2018-2021 Intel Corporation
  * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
  * Copyright (C) 2016-2017 Intel Deutschland GmbH
  */
@@ -1350,7 +1350,6 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
 		/* this is the data left for this subframe */
 		unsigned int data_left =
 			min_t(unsigned int, mss, total_len);
-		struct sk_buff *csum_skb = NULL;
 		unsigned int hdr_tb_len;
 		dma_addr_t hdr_tb_phys;
 		u8 *subf_hdrs_start = hdr_page->pos;
@@ -1381,10 +1380,8 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
 		hdr_tb_len = hdr_page->pos - start_hdr;
 		hdr_tb_phys = dma_map_single(trans->dev, start_hdr,
 					     hdr_tb_len, DMA_TO_DEVICE);
-		if (unlikely(dma_mapping_error(trans->dev, hdr_tb_phys))) {
-			dev_kfree_skb(csum_skb);
+		if (unlikely(dma_mapping_error(trans->dev, hdr_tb_phys)))
 			return -EINVAL;
-		}
 		iwl_pcie_txq_build_tfd(trans, txq, hdr_tb_phys,
 				       hdr_tb_len, false);
 		trace_iwlwifi_dev_tx_tb(trans->dev, skb, start_hdr,
@@ -1403,10 +1400,8 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
 
 			tb_phys = dma_map_single(trans->dev, tso.data,
 						 size, DMA_TO_DEVICE);
-			if (unlikely(dma_mapping_error(trans->dev, tb_phys))) {
-				dev_kfree_skb(csum_skb);
+			if (unlikely(dma_mapping_error(trans->dev, tb_phys)))
 				return -EINVAL;
-			}
 
 			iwl_pcie_txq_build_tfd(trans, txq, tb_phys,
 					       size, false);
diff --git a/drivers/net/wireless/intel/iwlwifi/queue/tx.c b/drivers/net/wireless/intel/iwlwifi/queue/tx.c
index 833f43d1ca7a..494f9dbf92d6 100644
--- a/drivers/net/wireless/intel/iwlwifi/queue/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/queue/tx.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
- * Copyright (C) 2020 Intel Corporation
+ * Copyright (C) 2020-2021 Intel Corporation
  */
 #include <net/tso.h>
 #include <linux/tcp.h>
@@ -399,7 +399,6 @@ static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans,
 	while (total_len) {
 		/* this is the data left for this subframe */
 		unsigned int data_left = min_t(unsigned int, mss, total_len);
-		struct sk_buff *csum_skb = NULL;
 		unsigned int tb_len;
 		dma_addr_t tb_phys;
 		u8 *subf_hdrs_start = hdr_page->pos;
@@ -430,10 +429,8 @@ static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans,
 		tb_len = hdr_page->pos - start_hdr;
 		tb_phys = dma_map_single(trans->dev, start_hdr,
 					 tb_len, DMA_TO_DEVICE);
-		if (unlikely(dma_mapping_error(trans->dev, tb_phys))) {
-			dev_kfree_skb(csum_skb);
+		if (unlikely(dma_mapping_error(trans->dev, tb_phys)))
 			goto out_err;
-		}
 		/*
 		 * No need for _with_wa, this is from the TSO page and
 		 * we leave some space at the end of it so can't hit
@@ -458,10 +455,8 @@ static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans,
 			ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd,
 							  tb_phys, tso.data,
 							  tb_len, NULL);
-			if (ret) {
-				dev_kfree_skb(csum_skb);
+			if (ret)
 				goto out_err;
-			}
 
 			data_left -= tb_len;
 			tso_build_data(skb, &tso, tb_len);
-- 
cgit v1.2.3


From 2b84e6328e65e263f62bae602894712449f41bc9 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Sun, 11 Apr 2021 12:46:21 +0300
Subject: iwlwifi: don't warn if we can't wait for empty tx queues

If the firmware is dead, the Tx queues won't drain, but
leaving a print in the log is enough, no need to WARN.
If the firmware is dead, we must already have printed enough
information in the log anyway.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210411124417.9a438b2320a9.I4aa897178df82acefd80173d76dd6849ad1bcdc1@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/iwl-trans.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
index 4a5822c1be13..6abb7385dae1 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
- * Copyright (C) 2005-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2005-2014, 2018-2021 Intel Corporation
  * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
  * Copyright (C) 2016-2017 Intel Deutschland GmbH
  */
@@ -1267,7 +1267,8 @@ static inline int iwl_trans_wait_tx_queues_empty(struct iwl_trans *trans,
 	if (WARN_ON_ONCE(!trans->ops->wait_tx_queues_empty))
 		return -ENOTSUPP;
 
-	if (WARN_ON_ONCE(trans->state != IWL_TRANS_FW_ALIVE)) {
+	/* No need to wait if the firmware is not alive */
+	if (trans->state != IWL_TRANS_FW_ALIVE) {
 		IWL_ERR(trans, "%s bad state = %d\n", __func__, trans->state);
 		return -EIO;
 	}
-- 
cgit v1.2.3


From 4cf2f5904d971a461f67825434ae3c31900ff84b Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Sun, 11 Apr 2021 12:46:22 +0300
Subject: iwlwifi: queue: avoid memory leak in reset flow

In case the device is stopped any usage of hw queues needs to be
reallocated in fw due to fw reset after device stop, so all driver
internal queue should also be freed, and if we don't free the next usage
would leak the old memory and get in recover flows
"iwlwifi 0000:00:03.0: dma_pool_destroy iwlwifi:bc" warning.

Also warn about trying to reuse an internal allocated queue.

Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210411124417.c72d2f0355c4.Ia3baff633b9b9109f88ab379ef0303aa152c16bf@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 .../net/wireless/intel/iwlwifi/pcie/trans-gen2.c   |  4 +--
 drivers/net/wireless/intel/iwlwifi/queue/tx.c      | 30 +++++-----------------
 drivers/net/wireless/intel/iwlwifi/queue/tx.h      |  3 +--
 3 files changed, 9 insertions(+), 28 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
index 9d94c47952d1..1bcd36e9e008 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
  * Copyright (C) 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
  */
 #include "iwl-trans.h"
 #include "iwl-prph.h"
@@ -143,7 +143,7 @@ void _iwl_trans_pcie_gen2_stop_device(struct iwl_trans *trans)
 	if (test_and_clear_bit(STATUS_DEVICE_ENABLED, &trans->status)) {
 		IWL_DEBUG_INFO(trans,
 			       "DEVICE_ENABLED bit was set and is now cleared\n");
-		iwl_txq_gen2_tx_stop(trans);
+		iwl_txq_gen2_tx_free(trans);
 		iwl_pcie_rx_stop(trans);
 	}
 
diff --git a/drivers/net/wireless/intel/iwlwifi/queue/tx.c b/drivers/net/wireless/intel/iwlwifi/queue/tx.c
index 494f9dbf92d6..451b06069350 100644
--- a/drivers/net/wireless/intel/iwlwifi/queue/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/queue/tx.c
@@ -13,30 +13,6 @@
 #include "iwl-scd.h"
 #include <linux/dmapool.h>
 
-/*
- * iwl_txq_gen2_tx_stop - Stop all Tx DMA channels
- */
-void iwl_txq_gen2_tx_stop(struct iwl_trans *trans)
-{
-	int txq_id;
-
-	/*
-	 * This function can be called before the op_mode disabled the
-	 * queues. This happens when we have an rfkill interrupt.
-	 * Since we stop Tx altogether - mark the queues as stopped.
-	 */
-	memset(trans->txqs.queue_stopped, 0,
-	       sizeof(trans->txqs.queue_stopped));
-	memset(trans->txqs.queue_used, 0, sizeof(trans->txqs.queue_used));
-
-	/* Unmap DMA from host system and free skb's */
-	for (txq_id = 0; txq_id < ARRAY_SIZE(trans->txqs.txq); txq_id++) {
-		if (!trans->txqs.txq[txq_id])
-			continue;
-		iwl_txq_gen2_unmap(trans, txq_id);
-	}
-}
-
 /*
  * iwl_txq_update_byte_tbl - Set up entry in Tx byte-count array
  */
@@ -1184,6 +1160,12 @@ static int iwl_txq_alloc_response(struct iwl_trans *trans, struct iwl_txq *txq,
 		goto error_free_resp;
 	}
 
+	if (WARN_ONCE(trans->txqs.txq[qid],
+		      "queue %d already allocated\n", qid)) {
+		ret = -EIO;
+		goto error_free_resp;
+	}
+
 	txq->id = qid;
 	trans->txqs.txq[qid] = txq;
 	wr_ptr &= (trans->trans_cfg->base_params->max_tfd_queue_size - 1);
diff --git a/drivers/net/wireless/intel/iwlwifi/queue/tx.h b/drivers/net/wireless/intel/iwlwifi/queue/tx.h
index af1dbdf5617a..20efc62acf13 100644
--- a/drivers/net/wireless/intel/iwlwifi/queue/tx.h
+++ b/drivers/net/wireless/intel/iwlwifi/queue/tx.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
- * Copyright (C) 2020 Intel Corporation
+ * Copyright (C) 2020-2021 Intel Corporation
  */
 #ifndef __iwl_trans_queue_tx_h__
 #define __iwl_trans_queue_tx_h__
@@ -123,7 +123,6 @@ int iwl_txq_gen2_tx(struct iwl_trans *trans, struct sk_buff *skb,
 void iwl_txq_dyn_free(struct iwl_trans *trans, int queue);
 void iwl_txq_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq);
 void iwl_txq_inc_wr_ptr(struct iwl_trans *trans, struct iwl_txq *txq);
-void iwl_txq_gen2_tx_stop(struct iwl_trans *trans);
 void iwl_txq_gen2_tx_free(struct iwl_trans *trans);
 int iwl_txq_init(struct iwl_trans *trans, struct iwl_txq *txq, int slots_num,
 		 bool cmd_queue);
-- 
cgit v1.2.3


From 4f7411d648939dfac596a7af8f28fc6eeb9833e8 Mon Sep 17 00:00:00 2001
From: Roee Goldfiner <roee.h.goldfiner@intel.com>
Date: Sun, 11 Apr 2021 12:46:23 +0300
Subject: iwlwifi: mvm: umac error table mismatch

umac_error_table In TLV address was read using mask on MSB
but on the same table in alive message it was without which
caused mismatch in devices with different memory region MSB

Signed-off-by: Roee Goldfiner <roee.h.goldfiner@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210411124417.179fee442115.Ib6eabe86cfda0b6044f07c07448c366b6e07e53d@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/img.h  | 5 ++++-
 drivers/net/wireless/intel/iwlwifi/iwl-drv.c | 4 +---
 drivers/net/wireless/intel/iwlwifi/mvm/fw.c  | 5 +++--
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/img.h b/drivers/net/wireless/intel/iwlwifi/fw/img.h
index 1dee4714e505..153a3529e77a 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/img.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/img.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
- * Copyright (C) 2005-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2005-2014, 2018-2021 Intel Corporation
  * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
  * Copyright (C) 2016 Intel Deutschland GmbH
  */
@@ -116,6 +116,9 @@ struct fw_img {
 #define PAGING_CMD_NUM_OF_PAGES_IN_LAST_GRP_POS	0
 #define PAGING_TLV_SECURE_MASK 1
 
+/* FW MSB Mask for regions/cache_control */
+#define FW_ADDR_CACHE_CONTROL 0xC0000000UL
+
 /**
  * struct iwl_fw_paging
  * @fw_paging_phys: page phy pointer
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
index eb168dc535d4..884750bf7840 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
- * Copyright (C) 2005-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2005-2014, 2018-2021 Intel Corporation
  * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
  * Copyright (C) 2016-2017 Intel Deutschland GmbH
  */
@@ -550,8 +550,6 @@ static int iwl_parse_v1_v2_firmware(struct iwl_drv *drv,
 	return 0;
 }
 
-#define FW_ADDR_CACHE_CONTROL 0xC0000000
-
 static int iwl_parse_tlv_firmware(struct iwl_drv *drv,
 				const struct firmware *ucode_raw,
 				struct iwl_firmware_pieces *pieces,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index be831dd2c05c..9a3981f43050 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
- * Copyright (C) 2012-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2021 Intel Corporation
  * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
  * Copyright (C) 2016-2017 Intel Deutschland GmbH
  */
@@ -186,7 +186,8 @@ static bool iwl_alive_fn(struct iwl_notif_wait_data *notif_wait,
 		mvm->trans->dbg.lmac_error_event_table[1] =
 			le32_to_cpu(lmac2->dbg_ptrs.error_event_table_ptr);
 
-	umac_error_table = le32_to_cpu(umac->dbg_ptrs.error_info_addr);
+	umac_error_table = le32_to_cpu(umac->dbg_ptrs.error_info_addr) &
+							~FW_ADDR_CACHE_CONTROL;
 
 	if (umac_error_table) {
 		if (umac_error_table >=
-- 
cgit v1.2.3


From aa1540ca7616b996327abb8cb6f19ee91b34dbe0 Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Sun, 11 Apr 2021 12:46:24 +0300
Subject: iwlwifi: mvm: remove PS from lower rates.

Power save (PS) should only be enabled when we reach the max phy rate.
Before we reach it (MCS_9) for VHT, we should keep trying to improve the
throughput.

Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210411124417.2a2fb9f9c25e.I7c7bbcfbdc1d35d2c3338778fb397dd5b08ea0e8@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/rs.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
index 91b6541d579f..b97708cb869d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /******************************************************************************
  *
- * Copyright(c) 2005 - 2014, 2018 - 2020 Intel Corporation. All rights reserved.
+ * Copyright(c) 2005 - 2014, 2018 - 2021 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
  *
@@ -1926,9 +1926,7 @@ static bool rs_tpc_allowed(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 	if (is_ht(rate))
 		return index == IWL_RATE_MCS_7_INDEX;
 	if (is_vht(rate))
-		return index == IWL_RATE_MCS_7_INDEX ||
-		       index == IWL_RATE_MCS_8_INDEX ||
-		       index == IWL_RATE_MCS_9_INDEX;
+		return index == IWL_RATE_MCS_9_INDEX;
 
 	WARN_ON_ONCE(1);
 	return false;
-- 
cgit v1.2.3


From 6da29d3b42ea8b23251f8312e083a8da2e5ed6eb Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sun, 11 Apr 2021 12:46:25 +0300
Subject: iwlwifi: mvm: don't lock mutex in RCU critical section

We cannot lock a mutex while we're in an RCU critical section. At
the same time, we're accessing data structures that are protected
by the mvm->mutex anyway, so just move the entire locking here to
use only that.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210411124417.4d27bd36e10e.I1fd8e8fe442c41a5deaa560452b598ed7a60ada5@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
index 34ddef97b099..63d65018d098 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
- * Copyright (C) 2012-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2021 Intel Corporation
  * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
  * Copyright (C) 2016-2017 Intel Deutschland GmbH
  */
@@ -1210,10 +1210,10 @@ static int _iwl_dbgfs_inject_beacon_ie(struct iwl_mvm *mvm, char *bin, int len)
 			IWL_UCODE_TLV_API_NEW_BEACON_TEMPLATE))
 		return -EINVAL;
 
-	rcu_read_lock();
+	mutex_lock(&mvm->mutex);
 
 	for (i = 0; i < NUM_MAC_INDEX_DRIVER; i++) {
-		vif = iwl_mvm_rcu_dereference_vif_id(mvm, i, true);
+		vif = iwl_mvm_rcu_dereference_vif_id(mvm, i, false);
 		if (!vif)
 			continue;
 
@@ -1253,18 +1253,16 @@ static int _iwl_dbgfs_inject_beacon_ie(struct iwl_mvm *mvm, char *bin, int len)
 				 &beacon_cmd.tim_size,
 				 beacon->data, beacon->len);
 
-	mutex_lock(&mvm->mutex);
 	iwl_mvm_mac_ctxt_send_beacon_cmd(mvm, beacon, &beacon_cmd,
 					 sizeof(beacon_cmd));
 	mutex_unlock(&mvm->mutex);
 
 	dev_kfree_skb(beacon);
 
-	rcu_read_unlock();
 	return 0;
 
 out_err:
-	rcu_read_unlock();
+	mutex_unlock(&mvm->mutex);
 	return -EINVAL;
 }
 
-- 
cgit v1.2.3


From 2b6166664d2b5553953a8a5dee0843f3b1ca18e2 Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Sun, 11 Apr 2021 12:46:26 +0300
Subject: iwlwifi: pcie: merge napi_poll_msix functions

The only difference between iwl_pcie_napi_poll_msix_shared() and
iwl_pcie_napi_poll_msix() is when we have a shared queue and nothing
in the rx queue.  This case doesn't affect CPU performance, so we can
merge the two functions.

Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210411124417.9d1b61ef53a5.I60b33d5379cf7c12f1de30fc3fd4cefc38220141@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/pcie/rx.c | 43 +++++++---------------------
 1 file changed, 10 insertions(+), 33 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
index 0cbc79949982..e4607f943ac3 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
- * Copyright (C) 2003-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2003-2014, 2018-2021 Intel Corporation
  * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
  * Copyright (C) 2016-2017 Intel Deutschland GmbH
  */
@@ -1046,33 +1046,19 @@ static int iwl_pcie_napi_poll_msix(struct napi_struct *napi, int budget)
 	trans = trans_pcie->trans;
 
 	ret = iwl_pcie_rx_handle(trans, rxq->id, budget);
+	IWL_DEBUG_ISR(trans, "[%d] handled %d, budget %d\n", rxq->id, ret,
+		      budget);
 
 	if (ret < budget) {
-		spin_lock(&trans_pcie->irq_lock);
-		iwl_pcie_clear_irq(trans, rxq->id);
-		spin_unlock(&trans_pcie->irq_lock);
-
-		napi_complete_done(&rxq->napi, ret);
-	}
-
-	return ret;
-}
-
-static int iwl_pcie_napi_poll_msix_shared(struct napi_struct *napi, int budget)
-{
-	struct iwl_rxq *rxq = container_of(napi, struct iwl_rxq, napi);
-	struct iwl_trans_pcie *trans_pcie;
-	struct iwl_trans *trans;
-	int ret;
+		int irq_line = rxq->id;
 
-	trans_pcie = container_of(napi->dev, struct iwl_trans_pcie, napi_dev);
-	trans = trans_pcie->trans;
+		/* FIRST_RSS is shared with line 0 */
+		if (trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_FIRST_RSS &&
+		    rxq->id == 1)
+			irq_line = 0;
 
-	ret = iwl_pcie_rx_handle(trans, rxq->id, budget);
-
-	if (ret < budget) {
 		spin_lock(&trans_pcie->irq_lock);
-		iwl_pcie_clear_irq(trans, 0);
+		iwl_pcie_clear_irq(trans, irq_line);
 		spin_unlock(&trans_pcie->irq_lock);
 
 		napi_complete_done(&rxq->napi, ret);
@@ -1134,18 +1120,9 @@ static int _iwl_pcie_rx_init(struct iwl_trans *trans)
 		if (!rxq->napi.poll) {
 			int (*poll)(struct napi_struct *, int) = iwl_pcie_napi_poll;
 
-			if (trans_pcie->msix_enabled) {
+			if (trans_pcie->msix_enabled)
 				poll = iwl_pcie_napi_poll_msix;
 
-				if (trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_NON_RX &&
-				    i == 0)
-					poll = iwl_pcie_napi_poll_msix_shared;
-
-				if (trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_FIRST_RSS &&
-				    i == 1)
-					poll = iwl_pcie_napi_poll_msix_shared;
-			}
-
 			netif_napi_add(&trans_pcie->napi_dev, &rxq->napi,
 				       poll, NAPI_POLL_WEIGHT);
 			napi_enable(&rxq->napi);
-- 
cgit v1.2.3


From 9d401222db54dac353fecad26f2610c12c33ef47 Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Sun, 11 Apr 2021 12:46:27 +0300
Subject: iwlwifi: pcie: add ISR debug info for msix debug

The debug prints help in case we get timeout on waiting for
hw.

Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210411124417.306e2e56d3e8.I72e2977abbb1fddf23b8476bedf6a183fe969ff5@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/pcie/rx.c    | 16 +++++++++++-----
 drivers/net/wireless/intel/iwlwifi/pcie/trans.c |  5 +++++
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
index e4607f943ac3..fb8491412be4 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
@@ -1023,6 +1023,9 @@ static int iwl_pcie_napi_poll(struct napi_struct *napi, int budget)
 
 	ret = iwl_pcie_rx_handle(trans, rxq->id, budget);
 
+	IWL_DEBUG_ISR(trans, "[%d] handled %d, budget %d\n",
+		      rxq->id, ret, budget);
+
 	if (ret < budget) {
 		spin_lock(&trans_pcie->irq_lock);
 		if (test_bit(STATUS_INT_ENABLED, &trans->status))
@@ -1636,10 +1639,13 @@ irqreturn_t iwl_pcie_irq_rx_msix_handler(int irq, void *dev_id)
 	if (WARN_ON(entry->entry >= trans->num_rx_queues))
 		return IRQ_NONE;
 
-	if (WARN_ONCE(!rxq, "Got MSI-X interrupt before we have Rx queues"))
+	if (WARN_ONCE(!rxq,
+		      "[%d] Got MSI-X interrupt before we have Rx queues",
+		      entry->entry))
 		return IRQ_NONE;
 
 	lock_map_acquire(&trans->sync_cmd_lockdep_map);
+	IWL_DEBUG_ISR(trans, "[%d] Got interrupt\n", entry->entry);
 
 	local_bh_disable();
 	if (napi_schedule_prep(&rxq->napi))
@@ -2203,8 +2209,8 @@ irqreturn_t iwl_pcie_irq_msix_handler(int irq, void *dev_id)
 
 	if (iwl_have_debug_level(IWL_DL_ISR)) {
 		IWL_DEBUG_ISR(trans,
-			      "ISR inta_fh 0x%08x, enabled (sw) 0x%08x (hw) 0x%08x\n",
-			      inta_fh, trans_pcie->fh_mask,
+			      "ISR[%d] inta_fh 0x%08x, enabled (sw) 0x%08x (hw) 0x%08x\n",
+			      entry->entry, inta_fh, trans_pcie->fh_mask,
 			      iwl_read32(trans, CSR_MSIX_FH_INT_MASK_AD));
 		if (inta_fh & ~trans_pcie->fh_mask)
 			IWL_DEBUG_ISR(trans,
@@ -2259,8 +2265,8 @@ irqreturn_t iwl_pcie_irq_msix_handler(int irq, void *dev_id)
 	/* After checking FH register check HW register */
 	if (iwl_have_debug_level(IWL_DL_ISR)) {
 		IWL_DEBUG_ISR(trans,
-			      "ISR inta_hw 0x%08x, enabled (sw) 0x%08x (hw) 0x%08x\n",
-			      inta_hw, trans_pcie->hw_mask,
+			      "ISR[%d] inta_hw 0x%08x, enabled (sw) 0x%08x (hw) 0x%08x\n",
+			      entry->entry, inta_hw, trans_pcie->hw_mask,
 			      iwl_read32(trans, CSR_MSIX_HW_INT_MASK_AD));
 		if (inta_hw & ~trans_pcie->hw_mask)
 			IWL_DEBUG_ISR(trans,
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index 1bf4c37fe960..861dbc03d183 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -1604,6 +1604,11 @@ iwl_pcie_set_interrupt_capa(struct pci_dev *pdev,
 	} else {
 		trans_pcie->trans->num_rx_queues = num_irqs - 1;
 	}
+
+	IWL_DEBUG_INFO(trans,
+		       "MSI-X enabled with rx queues %d, vec mask 0x%x\n",
+		       trans_pcie->trans->num_rx_queues, trans_pcie->shared_vec_mask);
+
 	WARN_ON(trans_pcie->trans->num_rx_queues > IWL_MAX_RX_HW_QUEUES);
 
 	trans_pcie->alloc_vecs = num_irqs;
-- 
cgit v1.2.3


From 3c21990b0ccecf0e3679f82cd1931999bd78b261 Mon Sep 17 00:00:00 2001
From: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Date: Sun, 11 Apr 2021 12:46:28 +0300
Subject: iwlwifi: mvm: add support for version 3 of LARI_CONFIG_CHANGE
 command.

Add support for version 3 of the LARI_CONFIG_CHANGE command.
This is needed to support FW API change which is needed
to support 11ax enablement in Russia.

Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210411124418.7e68856c8a95.I83acdbe39b63c363cabc04ad42d1d0b9ce98901c@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/acpi.c        | 10 +---------
 drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h | 17 ++++++++++++++++-
 drivers/net/wireless/intel/iwlwifi/mvm/fw.c         | 18 ++++++++++++------
 3 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
index eecc76a6e408..e31bba836c6f 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
  * Copyright (C) 2017 Intel Deutschland GmbH
- * Copyright (C) 2019-2020 Intel Corporation
+ * Copyright (C) 2019-2021 Intel Corporation
  */
 #include <linux/uuid.h>
 #include "iwl-drv.h"
@@ -759,14 +759,6 @@ __le32 iwl_acpi_get_lari_config_bitmap(struct iwl_fw_runtime *fwrt)
 		config_bitmap |=
 			cpu_to_le32(LARI_CONFIG_CHANGE_ETSI_TO_DISABLED_MSK);
 
-	/*
-	 ** Evaluate func 'DSM_FUNC_11AX_ENABLEMENT'
-	 */
-	ret = iwl_acpi_eval_dsm_func(fwrt->dev, DSM_FUNC_11AX_ENABLEMENT);
-
-	config_bitmap |=
-		cpu_to_le32((ret &= IWL_11AX_UKRAINE_MASK) << IWL_11AX_UKRAINE_SHIFT);
-
 	return config_bitmap;
 }
 IWL_EXPORT_SYMBOL(iwl_acpi_get_lari_config_bitmap);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h
index 3a358bbdde8e..dc8f2777e944 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
- * Copyright (C) 2012-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2021 Intel Corporation
  * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
  * Copyright (C) 2016-2017 Intel Deutschland GmbH
  */
@@ -437,6 +437,21 @@ struct iwl_lari_config_change_cmd_v2 {
 	__le32 oem_uhb_allow_bitmap;
 } __packed; /* LARI_CHANGE_CONF_CMD_S_VER_2 */
 
+/**
+ * struct iwl_lari_config_change_cmd_v3 - change LARI configuration
+ * @config_bitmap: bit map of the config commands. each bit will trigger a
+ * different predefined FW config operation
+ * @oem_uhb_allow_bitmap: bitmap of UHB enabled MCC sets
+ * @oem_11ax_allow_bitmap: bitmap of 11ax allowed MCCs.
+ * For each supported country, a pair of regulatory override bit and 11ax mode exist
+ * in the bit field.
+ */
+struct iwl_lari_config_change_cmd_v3 {
+	__le32 config_bitmap;
+	__le32 oem_uhb_allow_bitmap;
+	__le32 oem_11ax_allow_bitmap;
+} __packed; /* LARI_CHANGE_CONF_CMD_S_VER_3 */
+
 /**
  * struct iwl_pnvm_init_complete_ntfy - PNVM initialization complete
  * @status: PNVM image loading status
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 9a3981f43050..8aa5f1a2c58c 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -1140,18 +1140,24 @@ static u8 iwl_mvm_eval_dsm_rfi(struct iwl_mvm *mvm)
 static void iwl_mvm_lari_cfg(struct iwl_mvm *mvm)
 {
 	int cmd_ret;
-	struct iwl_lari_config_change_cmd_v2 cmd = {};
+	struct iwl_lari_config_change_cmd_v3 cmd = {};
 
 	cmd.config_bitmap = iwl_acpi_get_lari_config_bitmap(&mvm->fwrt);
 
 	/* apply more config masks here */
 
 	if (cmd.config_bitmap) {
-		size_t cmd_size = iwl_fw_lookup_cmd_ver(mvm->fw,
-							REGULATORY_AND_NVM_GROUP,
-							LARI_CONFIG_CHANGE, 1) == 2 ?
-			sizeof(struct iwl_lari_config_change_cmd_v2) :
-			sizeof(struct iwl_lari_config_change_cmd_v1);
+		size_t cmd_size;
+		u8 cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw,
+						   REGULATORY_AND_NVM_GROUP,
+						   LARI_CONFIG_CHANGE, 1);
+		if (cmd_ver == 3)
+			cmd_size = sizeof(struct iwl_lari_config_change_cmd_v3);
+		else if (cmd_ver == 2)
+			cmd_size = sizeof(struct iwl_lari_config_change_cmd_v2);
+		else
+			cmd_size = sizeof(struct iwl_lari_config_change_cmd_v1);
+
 		IWL_DEBUG_RADIO(mvm,
 				"sending LARI_CONFIG_CHANGE, config_bitmap=0x%x\n",
 				le32_to_cpu(cmd.config_bitmap));
-- 
cgit v1.2.3


From 70c9101d0a3e499b418e119d475d0e4533409e08 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sun, 11 Apr 2021 12:46:29 +0300
Subject: iwlwifi: warn on SKB free w/o op-mode

If this ever happens, and it looks like some code in PCIe is
a bit broken and might lead to this, we want to know without
crashing, so add a WARN_ON_ONCE().

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210411124418.a18b7443dc55.Ia29836738acf14a55af5504aba90c6fea9fff785@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h b/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h
index e6d2e0994317..cf9c64090014 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
- * Copyright (C) 2005-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2005-2014, 2018-2021 Intel Corporation
  * Copyright (C) 2013-2014 Intel Mobile Communications GmbH
  * Copyright (C) 2015 Intel Deutschland GmbH
  */
@@ -176,6 +176,8 @@ iwl_op_mode_hw_rf_kill(struct iwl_op_mode *op_mode, bool state)
 static inline void iwl_op_mode_free_skb(struct iwl_op_mode *op_mode,
 					struct sk_buff *skb)
 {
+	if (WARN_ON_ONCE(!op_mode))
+		return;
 	op_mode->ops->free_skb(op_mode, skb);
 }
 
-- 
cgit v1.2.3


From a9174578262b86f15cb1882f35e53b1fae0649fd Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Sun, 11 Apr 2021 12:46:30 +0300
Subject: iwlwifi: rs-fw: don't support stbc for HE 160

Our HE doesn't support it so never set HE 160 stbc

Fixes: 3e467b8e4cf4 ("iwlwifi: rs-fw: enable STBC in he correctly")
Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210411124418.550fd1903eb7.I8ddbc2f87044a5ef78d916c9c59be797811a1b7f@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c
index 8772b65c9dab..2d58cb969918 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
  * Copyright (C) 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
  */
 #include "rs.h"
 #include "fw-api.h"
@@ -72,19 +72,15 @@ static u16 rs_fw_get_config_flags(struct iwl_mvm *mvm,
 	bool vht_ena = vht_cap->vht_supported;
 	u16 flags = 0;
 
+	/* get STBC flags */
 	if (mvm->cfg->ht_params->stbc &&
 	    (num_of_ant(iwl_mvm_get_valid_tx_ant(mvm)) > 1)) {
-		if (he_cap->has_he) {
-			if (he_cap->he_cap_elem.phy_cap_info[2] &
-			    IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ)
-				flags |= IWL_TLC_MNG_CFG_FLAGS_STBC_MSK;
-
-			if (he_cap->he_cap_elem.phy_cap_info[7] &
-			    IEEE80211_HE_PHY_CAP7_STBC_RX_ABOVE_80MHZ)
-				flags |= IWL_TLC_MNG_CFG_FLAGS_HE_STBC_160MHZ_MSK;
-		} else if ((ht_cap->cap & IEEE80211_HT_CAP_RX_STBC) ||
-			   (vht_ena &&
-			    (vht_cap->cap & IEEE80211_VHT_CAP_RXSTBC_MASK)))
+		if (he_cap->has_he && he_cap->he_cap_elem.phy_cap_info[2] &
+				      IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ)
+			flags |= IWL_TLC_MNG_CFG_FLAGS_STBC_MSK;
+		else if (vht_cap->cap & IEEE80211_VHT_CAP_RXSTBC_MASK)
+			flags |= IWL_TLC_MNG_CFG_FLAGS_STBC_MSK;
+		else if (ht_cap->cap & IEEE80211_HT_CAP_RX_STBC)
 			flags |= IWL_TLC_MNG_CFG_FLAGS_STBC_MSK;
 	}
 
-- 
cgit v1.2.3


From cc61d3ced2aac08230f20809378c5e3134f6702d Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Sun, 11 Apr 2021 13:25:38 +0300
Subject: iwlwifi: mvm: don't disconnect immediately if we don't hear beacons
 after CSA

When we switch channel, we may miss a few beacons on the
new channel. Don't disconnect if the time event for the
switch ends before we hear the beacons.

Note that this is relevant only for old devices that still
use the TIME_EVENT firmware API for channel switch.

The check that we hear a beacon before the time event
ends was meant to be used for the association time event
and not for the channel switch time event.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210411132130.3d710091a0bd.I37a161ffdfb099a10080fbdc3b70a4deb76952e2@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/time-event.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
index 1418a6438635..76c36f5cf9a3 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
- * Copyright (C) 2012-2014, 2018-2020 Intel Corporation
+ * Copyright (C) 2012-2014, 2018-2021 Intel Corporation
  * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
  * Copyright (C) 2017 Intel Deutschland GmbH
  */
@@ -294,6 +294,17 @@ static void iwl_mvm_te_handle_notif(struct iwl_mvm *mvm,
 			iwl_mvm_roc_finished(mvm);
 			break;
 		case NL80211_IFTYPE_STATION:
+			/*
+			 * If we are switching channel, don't disconnect
+			 * if the time event is already done. Beacons can
+			 * be delayed a bit after the switch.
+			 */
+			if (te_data->id == TE_CHANNEL_SWITCH_PERIOD) {
+				IWL_DEBUG_TE(mvm,
+					     "No beacon heard and the CS time event is over, don't disconnect\n");
+				break;
+			}
+
 			/*
 			 * By now, we should have finished association
 			 * and know the dtim period.
-- 
cgit v1.2.3


From 997254a9169c9a8fab1c8408db4ef64a4081f4ce Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Sun, 11 Apr 2021 13:25:39 +0300
Subject: iwlwifi: mvm: don't WARN if we can't remove a time event

It is not very useful to WARN if we can't send a host command
The firmware is likely in a bad situation and the fact that
we didn't send the host command has an impact on the firmware
only, not on the driver. The driver could clean up all its
state.

Don't WARN in this case, but just leave a smaller note.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210411132130.0324abc169c8.I4f9b769bc38d68f8ed43f77d2cd75e8f1993e964@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/time-event.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
index 76c36f5cf9a3..83342a6a6d5b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
@@ -734,8 +734,8 @@ void iwl_mvm_remove_time_event(struct iwl_mvm *mvm,
 	IWL_DEBUG_TE(mvm, "Removing TE 0x%x\n", le32_to_cpu(time_cmd.id));
 	ret = iwl_mvm_send_cmd_pdu(mvm, TIME_EVENT_CMD, 0,
 				   sizeof(time_cmd), &time_cmd);
-	if (WARN_ON(ret))
-		return;
+	if (ret)
+		IWL_ERR(mvm, "Couldn't remove the time event\n");
 }
 
 /*
-- 
cgit v1.2.3


From 7db67f68392c1c5a6373c1caa7dd951bf94e550b Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Sun, 11 Apr 2021 13:25:40 +0300
Subject: iwlwifi: bump FW API to 63 for AX devices

Start supporting API version 63 for AX devices.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210411132130.a588a9dacd98.Ie4f96b8988c2cbd5f096ee64d0eb0f4829d55aee@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/cfg/22000.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index d775cd9c1394..44735a6d1d39 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -9,7 +9,7 @@
 #include "iwl-prph.h"
 
 /* Highest firmware API version supported */
-#define IWL_22000_UCODE_API_MAX	62
+#define IWL_22000_UCODE_API_MAX	63
 
 /* Lowest firmware API version supported */
 #define IWL_22000_UCODE_API_MIN	39
-- 
cgit v1.2.3


From d12455fdbfe9430affd88bfbfee51777356667a0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sun, 11 Apr 2021 13:25:41 +0300
Subject: iwlwifi: trans/pcie: defer transport initialisation

In a few PCIe devices we may have to swap out the configuration
after we allocate/initialise some parts of the device because
we only know the correct one after reading some registers. This
causes some things such as the byte-count table allocations to
be incorrect, since the configuration is swapped for one with a
bigger queue size.

Fix this by initialising most of the transport much later, only
after the configuration has finally been determined.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210411132130.8f5db97db1e4.Ic622da559b586a04ca536a0ec49ed5ecf03a9354@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/iwl-trans.c | 91 +++++++++++++++-----------
 drivers/net/wireless/intel/iwlwifi/iwl-trans.h |  1 +
 drivers/net/wireless/intel/iwlwifi/pcie/drv.c  |  4 ++
 3 files changed, 57 insertions(+), 39 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c
index 60e0db4a5e20..9236f9106826 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c
@@ -2,7 +2,7 @@
 /*
  * Copyright (C) 2015 Intel Mobile Communications GmbH
  * Copyright (C) 2016-2017 Intel Deutschland GmbH
- * Copyright (C) 2019-2020 Intel Corporation
+ * Copyright (C) 2019-2021 Intel Corporation
  */
 #include <linux/kernel.h>
 #include <linux/bsearch.h>
@@ -21,7 +21,6 @@ struct iwl_trans *iwl_trans_alloc(unsigned int priv_size,
 				  const struct iwl_cfg_trans_params *cfg_trans)
 {
 	struct iwl_trans *trans;
-	int txcmd_size, txcmd_align;
 #ifdef CONFIG_LOCKDEP
 	static struct lock_class_key __key;
 #endif
@@ -31,10 +30,40 @@ struct iwl_trans *iwl_trans_alloc(unsigned int priv_size,
 		return NULL;
 
 	trans->trans_cfg = cfg_trans;
-	if (!cfg_trans->gen2) {
+
+#ifdef CONFIG_LOCKDEP
+	lockdep_init_map(&trans->sync_cmd_lockdep_map, "sync_cmd_lockdep_map",
+			 &__key, 0);
+#endif
+
+	trans->dev = dev;
+	trans->ops = ops;
+	trans->num_rx_queues = 1;
+
+	WARN_ON(!ops->wait_txq_empty && !ops->wait_tx_queues_empty);
+
+	if (trans->trans_cfg->use_tfh) {
+		trans->txqs.tfd.addr_size = 64;
+		trans->txqs.tfd.max_tbs = IWL_TFH_NUM_TBS;
+		trans->txqs.tfd.size = sizeof(struct iwl_tfh_tfd);
+	} else {
+		trans->txqs.tfd.addr_size = 36;
+		trans->txqs.tfd.max_tbs = IWL_NUM_OF_TBS;
+		trans->txqs.tfd.size = sizeof(struct iwl_tfd);
+	}
+	trans->max_skb_frags = IWL_TRANS_MAX_FRAGS(trans);
+
+	return trans;
+}
+
+int iwl_trans_init(struct iwl_trans *trans)
+{
+	int txcmd_size, txcmd_align;
+
+	if (!trans->trans_cfg->gen2) {
 		txcmd_size = sizeof(struct iwl_tx_cmd);
 		txcmd_align = sizeof(void *);
-	} else if (cfg_trans->device_family < IWL_DEVICE_FAMILY_AX210) {
+	} else if (trans->trans_cfg->device_family < IWL_DEVICE_FAMILY_AX210) {
 		txcmd_size = sizeof(struct iwl_tx_cmd_gen2);
 		txcmd_align = 64;
 	} else {
@@ -46,17 +75,8 @@ struct iwl_trans *iwl_trans_alloc(unsigned int priv_size,
 	txcmd_size += 36; /* biggest possible 802.11 header */
 
 	/* Ensure device TX cmd cannot reach/cross a page boundary in gen2 */
-	if (WARN_ON(cfg_trans->gen2 && txcmd_size >= txcmd_align))
-		return ERR_PTR(-EINVAL);
-
-#ifdef CONFIG_LOCKDEP
-	lockdep_init_map(&trans->sync_cmd_lockdep_map, "sync_cmd_lockdep_map",
-			 &__key, 0);
-#endif
-
-	trans->dev = dev;
-	trans->ops = ops;
-	trans->num_rx_queues = 1;
+	if (WARN_ON(trans->trans_cfg->gen2 && txcmd_size >= txcmd_align))
+		return -EINVAL;
 
 	if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210)
 		trans->txqs.bc_tbl_size = sizeof(struct iwl_gen3_bc_tbl);
@@ -68,23 +88,16 @@ struct iwl_trans *iwl_trans_alloc(unsigned int priv_size,
 	 * allocate here.
 	 */
 	if (trans->trans_cfg->gen2) {
-		trans->txqs.bc_pool = dmam_pool_create("iwlwifi:bc", dev,
+		trans->txqs.bc_pool = dmam_pool_create("iwlwifi:bc", trans->dev,
 						       trans->txqs.bc_tbl_size,
 						       256, 0);
 		if (!trans->txqs.bc_pool)
-			return NULL;
+			return -ENOMEM;
 	}
 
-	if (trans->trans_cfg->use_tfh) {
-		trans->txqs.tfd.addr_size = 64;
-		trans->txqs.tfd.max_tbs = IWL_TFH_NUM_TBS;
-		trans->txqs.tfd.size = sizeof(struct iwl_tfh_tfd);
-	} else {
-		trans->txqs.tfd.addr_size = 36;
-		trans->txqs.tfd.max_tbs = IWL_NUM_OF_TBS;
-		trans->txqs.tfd.size = sizeof(struct iwl_tfd);
-	}
-	trans->max_skb_frags = IWL_TRANS_MAX_FRAGS(trans);
+	/* Some things must not change even if the config does */
+	WARN_ON(trans->txqs.tfd.addr_size !=
+		(trans->trans_cfg->use_tfh ? 64 : 36));
 
 	snprintf(trans->dev_cmd_pool_name, sizeof(trans->dev_cmd_pool_name),
 		 "iwl_cmd_pool:%s", dev_name(trans->dev));
@@ -93,35 +106,35 @@ struct iwl_trans *iwl_trans_alloc(unsigned int priv_size,
 				  txcmd_size, txcmd_align,
 				  SLAB_HWCACHE_ALIGN, NULL);
 	if (!trans->dev_cmd_pool)
-		return NULL;
-
-	WARN_ON(!ops->wait_txq_empty && !ops->wait_tx_queues_empty);
+		return -ENOMEM;
 
 	trans->txqs.tso_hdr_page = alloc_percpu(struct iwl_tso_hdr_page);
 	if (!trans->txqs.tso_hdr_page) {
 		kmem_cache_destroy(trans->dev_cmd_pool);
-		return NULL;
+		return -ENOMEM;
 	}
 
 	/* Initialize the wait queue for commands */
 	init_waitqueue_head(&trans->wait_command_queue);
 
-	return trans;
+	return 0;
 }
 
 void iwl_trans_free(struct iwl_trans *trans)
 {
 	int i;
 
-	for_each_possible_cpu(i) {
-		struct iwl_tso_hdr_page *p =
-			per_cpu_ptr(trans->txqs.tso_hdr_page, i);
+	if (trans->txqs.tso_hdr_page) {
+		for_each_possible_cpu(i) {
+			struct iwl_tso_hdr_page *p =
+				per_cpu_ptr(trans->txqs.tso_hdr_page, i);
 
-		if (p->page)
-			__free_page(p->page);
-	}
+			if (p && p->page)
+				__free_page(p->page);
+		}
 
-	free_percpu(trans->txqs.tso_hdr_page);
+		free_percpu(trans->txqs.tso_hdr_page);
+	}
 
 	kmem_cache_destroy(trans->dev_cmd_pool);
 }
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
index 6abb7385dae1..bf569f856ad8 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
@@ -1439,6 +1439,7 @@ struct iwl_trans *iwl_trans_alloc(unsigned int priv_size,
 			  struct device *dev,
 			  const struct iwl_trans_ops *ops,
 			  const struct iwl_cfg_trans_params *cfg_trans);
+int iwl_trans_init(struct iwl_trans *trans);
 void iwl_trans_free(struct iwl_trans *trans);
 
 /*****************************************************
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index 729e5503f17a..e33e559f847a 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -1276,6 +1276,10 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		trans_pcie->num_rx_bufs = RX_QUEUE_SIZE;
 	}
 
+	ret = iwl_trans_init(iwl_trans);
+	if (ret)
+		goto out_free_trans;
+
 	pci_set_drvdata(pdev, iwl_trans);
 	iwl_trans->drv = iwl_drv_start(iwl_trans);
 
-- 
cgit v1.2.3


From 378c7be65ccd173762242805f4591618249b91ae Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sun, 11 Apr 2021 13:25:42 +0300
Subject: iwlwifi: fw: print out trigger delay when collecting data

It can be confusing to see "Collecting data: ..." followed by
that not actually happening immediately so print out the delay
in that message.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210411132130.5bd095dc579a.Id1f3b746ac61497951638ba7ce70fc4b63dd87d1@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index 504729663c35..cc4e18ca9566 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -2559,7 +2559,9 @@ int iwl_fw_dbg_ini_collect(struct iwl_fw_runtime *fwrt,
 
 	fwrt->dump.wks[idx].dump_data = *dump_data;
 
-	IWL_WARN(fwrt, "WRT: Collecting data: ini trigger %d fired.\n", tp_id);
+	IWL_WARN(fwrt,
+		 "WRT: Collecting data: ini trigger %d fired (delay=%dms).\n",
+		 tp_id, (u32)(delay / USEC_PER_MSEC));
 
 	schedule_delayed_work(&fwrt->dump.wks[idx].wk, usecs_to_jiffies(delay));
 
-- 
cgit v1.2.3


From 8932abef6ea433228aea0e39eb237b244ae2b091 Mon Sep 17 00:00:00 2001
From: Matti Gottlieb <matti.gottlieb@intel.com>
Date: Sun, 11 Apr 2021 13:25:43 +0300
Subject: iwlwifi: pcie: Change ma product string name

Change ma product string name to the correct name,
and to reflect the CRF and not the CNV.

Signed-off-by: Matti Gottlieb <matti.gottlieb@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210411132130.c05b4c55540f.I8dd0361b033f63658999ba53640949701b048f17@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/cfg/22000.c  | 2 +-
 drivers/net/wireless/intel/iwlwifi/iwl-config.h | 2 +-
 drivers/net/wireless/intel/iwlwifi/pcie/drv.c   | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index 44735a6d1d39..c2315dea9a23 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -388,8 +388,8 @@ const char iwl_ax200_name[] = "Intel(R) Wi-Fi 6 AX200 160MHz";
 const char iwl_ax201_name[] = "Intel(R) Wi-Fi 6 AX201 160MHz";
 const char iwl_ax203_name[] = "Intel(R) Wi-Fi 6 AX203";
 const char iwl_ax211_name[] = "Intel(R) Wi-Fi 6E AX211 160MHz";
+const char iwl_ax221_name[] = "Intel(R) Wi-Fi 6E AX221 160MHz";
 const char iwl_ax411_name[] = "Intel(R) Wi-Fi 6E AX411 160MHz";
-const char iwl_ma_name[] = "Intel(R) Wi-Fi 6";
 
 const char iwl_ax200_killer_1650w_name[] =
 	"Killer(R) Wi-Fi 6 AX1650w 160MHz Wireless Network Adapter (200D2W)";
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
index bdf352e97f51..b35ffdfdf14b 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
@@ -505,8 +505,8 @@ extern const char iwl_ax201_killer_1650s_name[];
 extern const char iwl_ax201_killer_1650i_name[];
 extern const char iwl_ax210_killer_1675w_name[];
 extern const char iwl_ax210_killer_1675x_name[];
-extern const char iwl_ma_name[];
 extern const char iwl_ax211_name[];
+extern const char iwl_ax221_name[];
 extern const char iwl_ax411_name[];
 #if IS_ENABLED(CONFIG_IWLDVM)
 extern const struct iwl_cfg iwl5300_agn_cfg;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index e33e559f847a..d94bd8d732e9 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -1028,12 +1028,12 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
 		      IWL_CFG_MAC_TYPE_MA, IWL_CFG_ANY,
 		      IWL_CFG_RF_TYPE_MR, IWL_CFG_ANY,
 		      IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB,
-		      iwl_cfg_ma_a0_mr_a0, iwl_ma_name),
+		      iwl_cfg_ma_a0_mr_a0, iwl_ax221_name),
 	_IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
 		      IWL_CFG_MAC_TYPE_SNJ, IWL_CFG_ANY,
 		      IWL_CFG_RF_TYPE_MR, IWL_CFG_ANY,
 		      IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB,
-		      iwl_cfg_snj_a0_mr_a0, iwl_ma_name),
+		      iwl_cfg_snj_a0_mr_a0, iwl_ax221_name),
 
 /* So with Hr */
 	_IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
@@ -1101,7 +1101,7 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
 		      IWL_CFG_MAC_TYPE_BZ, IWL_CFG_ANY,
 		      IWL_CFG_RF_TYPE_MR, IWL_CFG_ANY,
 		      IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB,
-		      iwl_cfg_bz_a0_mr_a0, iwl_ma_name),
+		      iwl_cfg_bz_a0_mr_a0, iwl_ax211_name),
 
 /* So with GF */
 	_IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
-- 
cgit v1.2.3


From 7c81a025054cd0aeeeaf17aba2e9757f0a6a38a1 Mon Sep 17 00:00:00 2001
From: Mukesh Sisodiya <mukesh.sisodiya@intel.com>
Date: Sun, 11 Apr 2021 13:25:44 +0300
Subject: iwlwifi: dbg: disable ini debug in 9000 family and below

Yoyo based debug is not applicable to old devices.  As init debug is
enabled by default in the driver, it needs to be disabled to work the
old debug mechanism in old devices.

Signed-off-by: Mukesh Sisodiya <mukesh.sisodiya@intel.com>
Fixes: b0d8d2c27007 ("iwlwifi: yoyo: enable yoyo by default")
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210411132130.805401a1b8ec.I30db38184a418cfc1c5ca1a305cc14a52501d415@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
index 579bc81cc0ae..4cd8c39cc3e9 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
  */
 #include <linux/firmware.h>
 #include "iwl-drv.h"
@@ -426,7 +426,8 @@ void iwl_dbg_tlv_load_bin(struct device *dev, struct iwl_trans *trans)
 	const struct firmware *fw;
 	int res;
 
-	if (!iwlwifi_mod_params.enable_ini)
+	if (!iwlwifi_mod_params.enable_ini ||
+	    trans->trans_cfg->device_family <= IWL_DEVICE_FAMILY_9000)
 		return;
 
 	res = firmware_request_nowarn(&fw, "iwl-debug-yoyo.bin", dev);
-- 
cgit v1.2.3


From 7bf481d7e75a474e2a0f564b4de6bc2f1eeafc9a Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Fri, 30 Oct 2020 22:44:16 +0200
Subject: net/mlx5: E-Switch, let user to enable disable metadata

Currently each packet inserted in eswitch is tagged with a internal
metadata to indicate source vport. Metadata tagging is not always
needed. Metadata insertion is needed for multi-port RoCE, failover
between representors and stacked devices. In many other cases,
metadata enablement is not needed.

Metadata insertion slows down the packet processing rate of the E-switch
when it is in switchdev mode.

Below table show performance gain with metadata disabled for VXLAN
offload rules in both SMFS and DMFS steering mode on ConnectX-5 device.

----------------------------------------------
| steering | metadata | pkt size | rx pps    |
| mode     |          |          | (million) |
----------------------------------------------
| smfs     | disabled | 128Bytes | 42        |
----------------------------------------------
| smfs     | enabled  | 128Bytes | 36        |
----------------------------------------------
| dmfs     | disabled | 128Bytes | 42        |
----------------------------------------------
| dmfs     | enabled  | 128Bytes | 36        |
----------------------------------------------

Hence, allow user to disable metadata using driver specific devlink
parameter. Metadata setting of the eswitch is applicable only for the
switchdev mode.

Example to show and disable metadata before changing eswitch mode:
$ devlink dev param show pci/0000:06:00.0 name esw_port_metadata
pci/0000:06:00.0:
  name esw_port_metadata type driver-specific
    values:
      cmode runtime value true

$ devlink dev param set pci/0000:06:00.0 \
	  name esw_port_metadata value false cmode runtime

$ devlink dev eswitch set pci/0000:06:00.0 mode switchdev

Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Reviewed-by: Vu Pham <vuhuong@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
changelog:
v1->v2:
 - added performance numbers in commit log
 - updated commit log and documentation for switchdev mode
 - added explicit note on when user can disable metadata in
   documentation
---
 .../device_drivers/ethernet/mellanox/mlx5.rst      | 34 ++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/devlink.c  | 62 ++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/devlink.h  |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  2 +
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 30 ++++++++---
 5 files changed, 122 insertions(+), 7 deletions(-)

diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
index 1b7e32d8a61b..936a10f1942c 100644
--- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
@@ -183,6 +183,40 @@ User command examples:
       values:
          cmode driverinit value true
 
+esw_port_metadata: Eswitch port metadata state
+----------------------------------------------
+When applicable, disabling Eswitch metadata can increase packet rate
+up to 20% depending on the use case and packet sizes.
+
+Eswitch port metadata state controls whether to internally tag packets with
+metadata. Metadata tagging must be enabled for multi-port RoCE, failover
+between representors and stacked devices.
+By default metadata is enabled on the supported devices in E-switch.
+Metadata is applicable only for E-switch in switchdev mode and
+users may disable it when NONE of the below use cases will be in use:
+1. HCA is in Dual/multi-port RoCE mode.
+2. VF/SF representor bonding (Usually used for Live migration)
+3. Stacked devices
+
+When metadata is disabled, the above use cases will fail to initialize if
+users try to enable them.
+
+- Show eswitch port metadata::
+
+    $ devlink dev param show pci/0000:06:00.0 name esw_port_metadata
+      pci/0000:06:00.0:
+        name esw_port_metadata type driver-specific
+          values:
+            cmode runtime value true
+
+- Disable eswitch port metadata::
+
+    $ devlink dev param set pci/0000:06:00.0 name esw_port_metadata value false cmode runtime
+
+- Change eswitch mode to switchdev mode where after choosing the metadata value::
+
+    $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev
+
 mlx5 subfunction
 ================
 mlx5 supports subfunction management using devlink port (see :ref:`Documentation/networking/devlink/devlink-port.rst <devlink_port>`) interface.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index 38c7c44fe883..ee0f5355f120 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -456,6 +456,50 @@ static int mlx5_devlink_large_group_num_validate(struct devlink *devlink, u32 id
 
 	return 0;
 }
+
+static int mlx5_devlink_esw_port_metadata_set(struct devlink *devlink, u32 id,
+					      struct devlink_param_gset_ctx *ctx)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+	if (!MLX5_ESWITCH_MANAGER(dev))
+		return -EOPNOTSUPP;
+
+	return mlx5_esw_offloads_vport_metadata_set(dev->priv.eswitch, ctx->val.vbool);
+}
+
+static int mlx5_devlink_esw_port_metadata_get(struct devlink *devlink, u32 id,
+					      struct devlink_param_gset_ctx *ctx)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+	if (!MLX5_ESWITCH_MANAGER(dev))
+		return -EOPNOTSUPP;
+
+	ctx->val.vbool = mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch);
+	return 0;
+}
+
+static int mlx5_devlink_esw_port_metadata_validate(struct devlink *devlink, u32 id,
+						   union devlink_param_value val,
+						   struct netlink_ext_ack *extack)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	u8 esw_mode;
+
+	if (!MLX5_ESWITCH_MANAGER(dev)) {
+		NL_SET_ERR_MSG_MOD(extack, "E-Switch is unsupported");
+		return -EOPNOTSUPP;
+	}
+	esw_mode = mlx5_eswitch_mode(dev);
+	if (esw_mode == MLX5_ESWITCH_OFFLOADS) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "E-Switch must either disabled or non switchdev mode");
+		return -EBUSY;
+	}
+	return 0;
+}
+
 #endif
 
 static int mlx5_devlink_enable_remote_dev_reset_set(struct devlink *devlink, u32 id,
@@ -490,6 +534,12 @@ static const struct devlink_param mlx5_devlink_params[] = {
 			     BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
 			     NULL, NULL,
 			     mlx5_devlink_large_group_num_validate),
+	DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA,
+			     "esw_port_metadata", DEVLINK_PARAM_TYPE_BOOL,
+			     BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+			     mlx5_devlink_esw_port_metadata_get,
+			     mlx5_devlink_esw_port_metadata_set,
+			     mlx5_devlink_esw_port_metadata_validate),
 #endif
 	DEVLINK_PARAM_GENERIC(ENABLE_REMOTE_DEV_RESET, BIT(DEVLINK_PARAM_CMODE_RUNTIME),
 			      mlx5_devlink_enable_remote_dev_reset_get,
@@ -519,6 +569,18 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink)
 	devlink_param_driverinit_value_set(devlink,
 					   MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM,
 					   value);
+
+	if (MLX5_ESWITCH_MANAGER(dev)) {
+		if (mlx5_esw_vport_match_metadata_supported(dev->priv.eswitch)) {
+			dev->priv.eswitch->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
+			value.vbool = true;
+		} else {
+			value.vbool = false;
+		}
+		devlink_param_driverinit_value_set(devlink,
+						   MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA,
+						   value);
+	}
 #endif
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
index eff107dad922..7318d44b774b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
@@ -10,6 +10,7 @@ enum mlx5_devlink_param_id {
 	MLX5_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
 	MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE,
 	MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM,
+	MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA,
 };
 
 struct mlx5_trap_ctx {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index deafb0e03787..c7a73dbd64b4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -314,6 +314,8 @@ int esw_offloads_enable(struct mlx5_eswitch *esw);
 void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw);
 int esw_offloads_init_reps(struct mlx5_eswitch *esw);
 
+bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw);
+int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable);
 u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw);
 void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index ab32f685cbb7..1f58e84bdfc6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -2351,8 +2351,7 @@ static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
 	mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
 }
 
-static bool
-esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw)
+bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw)
 {
 	if (!MLX5_CAP_ESW(esw->dev, esw_uplink_ingress_acl))
 		return false;
@@ -2452,6 +2451,28 @@ metadata_err:
 	return err;
 }
 
+int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable)
+{
+	int err = 0;
+
+	down_write(&esw->mode_lock);
+	if (esw->mode != MLX5_ESWITCH_NONE) {
+		err = -EBUSY;
+		goto done;
+	}
+	if (!mlx5_esw_vport_match_metadata_supported(esw)) {
+		err = -EOPNOTSUPP;
+		goto done;
+	}
+	if (enable)
+		esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
+	else
+		esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
+done:
+	up_write(&esw->mode_lock);
+	return err;
+}
+
 int
 esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw,
 				     struct mlx5_vport *vport)
@@ -2673,9 +2694,6 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
 	if (err)
 		goto err_metadata;
 
-	if (esw_check_vport_match_metadata_supported(esw))
-		esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
-
 	err = esw_offloads_metadata_init(esw);
 	if (err)
 		goto err_metadata;
@@ -2725,7 +2743,6 @@ err_pool:
 err_vport_metadata:
 	esw_offloads_metadata_uninit(esw);
 err_metadata:
-	esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
 	mlx5_rdma_disable_roce(esw->dev);
 	mutex_destroy(&esw->offloads.termtbl_mutex);
 	return err;
@@ -2761,7 +2778,6 @@ void esw_offloads_disable(struct mlx5_eswitch *esw)
 	esw_offloads_steering_cleanup(esw);
 	mapping_destroy(esw->offloads.reg_c0_obj_pool);
 	esw_offloads_metadata_uninit(esw);
-	esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
 	mlx5_rdma_disable_roce(esw->dev);
 	mutex_destroy(&esw->offloads.termtbl_mutex);
 	esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
-- 
cgit v1.2.3


From 7d5ae47891929235c4a269b91996ab951cbf3c20 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Mon, 8 Mar 2021 11:50:40 +0200
Subject: net/mlx5: E-Switch, Skip querying SF enabled bits

With vhca events, SF state is queried through the VHCA events. Device no
longer expects SF bitmap in the query eswitch functions command.

Hence, remove it to simplify the code.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 6cf04a366f99..b3bc82e419b6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1390,15 +1390,9 @@ const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
 {
 	int outlen = MLX5_ST_SZ_BYTES(query_esw_functions_out);
 	u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {};
-	u16 max_sf_vports;
 	u32 *out;
 	int err;
 
-	max_sf_vports = mlx5_sf_max_functions(dev);
-	/* Device interface is array of 64-bits */
-	if (max_sf_vports)
-		outlen += DIV_ROUND_UP(max_sf_vports, BITS_PER_TYPE(__be64)) * sizeof(__be64);
-
 	out = kvzalloc(outlen, GFP_KERNEL);
 	if (!out)
 		return ERR_PTR(-ENOMEM);
-- 
cgit v1.2.3


From 6308a5f06be08f3ea1f1a895a9ef54c7b65c4c35 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Tue, 2 Mar 2021 21:27:47 +0200
Subject: net/mlx5: E-Switch, Make vport number u16

Vport number is 16-bit field in hardware. Make it u16.

Move location of vport in the structure so that it reduces a hole
in the structure.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h          | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 3 ++-
 include/linux/mlx5/eswitch.h                               | 3 +--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index c7a73dbd64b4..a4b9f78bf4d6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -152,7 +152,6 @@ enum mlx5_eswitch_vport_event {
 
 struct mlx5_vport {
 	struct mlx5_core_dev    *dev;
-	int                     vport;
 	struct hlist_head       uc_list[MLX5_L2_ADDR_HASH_SIZE];
 	struct hlist_head       mc_list[MLX5_L2_ADDR_HASH_SIZE];
 	struct mlx5_flow_handle *promisc_rule;
@@ -174,6 +173,7 @@ struct mlx5_vport {
 		u32 max_rate;
 	} qos;
 
+	u16 vport;
 	bool                    enabled;
 	enum mlx5_eswitch_vport_event enabled_events;
 	struct devlink_port *dl_port;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 1f58e84bdfc6..bbb707117296 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -986,12 +986,13 @@ static void mlx5_eswitch_del_send_to_vport_meta_rules(struct mlx5_eswitch *esw)
 static int
 mlx5_eswitch_add_send_to_vport_meta_rules(struct mlx5_eswitch *esw)
 {
-	int num_vfs, vport_num, rule_idx = 0, err = 0;
 	struct mlx5_flow_destination dest = {};
 	struct mlx5_flow_act flow_act = {0};
+	int num_vfs, rule_idx = 0, err = 0;
 	struct mlx5_flow_handle *flow_rule;
 	struct mlx5_flow_handle **flows;
 	struct mlx5_flow_spec *spec;
+	u16 vport_num;
 
 	num_vfs = esw->esw_funcs.num_vfs;
 	flows = kvzalloc(num_vfs * sizeof(*flows), GFP_KERNEL);
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index 429a710c5a99..9cf1da2883c6 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -152,8 +152,7 @@ mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw)
 };
 
 static inline u32
-mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw,
-					  int vport_num)
+mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, u16 vport_num)
 {
 	return 0;
 };
-- 
cgit v1.2.3


From 13795553a84dd3411d0870e161d40fee9c4ebe5c Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Tue, 2 Mar 2021 21:39:01 +0200
Subject: net/mlx5: E-Switch Make cleanup sequence mirror of init

Make cleanup sequence mirror of init sequence for cleaning up reps
and freeing vports.

Also when reps initialization fails, there is no need to perform reps
cleanup.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index b3bc82e419b6..9009574372fc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1856,7 +1856,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 abort:
 	if (esw->work_queue)
 		destroy_workqueue(esw->work_queue);
-	esw_offloads_cleanup_reps(esw);
 	kfree(esw->vports);
 	kfree(esw);
 	return err;
@@ -1871,7 +1870,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
 
 	esw->dev->priv.eswitch = NULL;
 	destroy_workqueue(esw->work_queue);
-	esw_offloads_cleanup_reps(esw);
 	mutex_destroy(&esw->state_lock);
 	WARN_ON(!xa_empty(&esw->offloads.vhca_map));
 	xa_destroy(&esw->offloads.vhca_map);
@@ -1879,6 +1877,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
 	mlx5e_mod_hdr_tbl_destroy(&esw->offloads.mod_hdr);
 	mutex_destroy(&esw->offloads.encap_tbl_lock);
 	mutex_destroy(&esw->offloads.decap_tbl_lock);
+	esw_offloads_cleanup_reps(esw);
 	kfree(esw->vports);
 	kfree(esw);
 }
-- 
cgit v1.2.3


From b16f2bb6b6ca926e303273913288461091ba8cd8 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Fri, 26 Feb 2021 14:36:22 +0200
Subject: net/mlx5: E-Switch, Convert a macro to a helper routine

Convert ESW_ALLOWED macro to a helper routine so that it can be used in
other eswitch files.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 32 +++++++++++------------
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h |  5 ++++
 2 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 9009574372fc..6a70e385beb8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1443,8 +1443,6 @@ static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw)
 }
 
 /* Public E-Switch API */
-#define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev))
-
 int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num,
 			    enum mlx5_eswitch_vport_event enabled_events)
 {
@@ -1713,7 +1711,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
 {
 	int ret;
 
-	if (!ESW_ALLOWED(esw))
+	if (!mlx5_esw_allowed(esw))
 		return 0;
 
 	down_write(&esw->mode_lock);
@@ -1773,7 +1771,7 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf)
 
 void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf)
 {
-	if (!ESW_ALLOWED(esw))
+	if (!mlx5_esw_allowed(esw))
 		return;
 
 	down_write(&esw->mode_lock);
@@ -2023,7 +2021,7 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
 	int other_vport = 1;
 	int err = 0;
 
-	if (!ESW_ALLOWED(esw))
+	if (!mlx5_esw_allowed(esw))
 		return -EPERM;
 	if (IS_ERR(evport))
 		return PTR_ERR(evport);
@@ -2111,7 +2109,7 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
 	u8 set_flags = 0;
 	int err = 0;
 
-	if (!ESW_ALLOWED(esw))
+	if (!mlx5_esw_allowed(esw))
 		return -EPERM;
 
 	if (vlan || qos)
@@ -2140,7 +2138,7 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
 	bool pschk;
 	int err = 0;
 
-	if (!ESW_ALLOWED(esw))
+	if (!mlx5_esw_allowed(esw))
 		return -EPERM;
 	if (IS_ERR(evport))
 		return PTR_ERR(evport);
@@ -2248,7 +2246,7 @@ int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting)
 	if (!esw)
 		return -EOPNOTSUPP;
 
-	if (!ESW_ALLOWED(esw))
+	if (!mlx5_esw_allowed(esw))
 		return -EPERM;
 
 	mutex_lock(&esw->state_lock);
@@ -2269,7 +2267,7 @@ int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting)
 	if (!esw)
 		return -EOPNOTSUPP;
 
-	if (!ESW_ALLOWED(esw))
+	if (!mlx5_esw_allowed(esw))
 		return -EPERM;
 
 	if (esw->mode != MLX5_ESWITCH_LEGACY)
@@ -2285,7 +2283,7 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
 	struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
 	int err = 0;
 
-	if (!ESW_ALLOWED(esw))
+	if (!mlx5_esw_allowed(esw))
 		return -EPERM;
 	if (IS_ERR(evport))
 		return PTR_ERR(evport);
@@ -2369,7 +2367,7 @@ int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
 	bool max_rate_supported;
 	int err = 0;
 
-	if (!ESW_ALLOWED(esw))
+	if (!mlx5_esw_allowed(esw))
 		return -EPERM;
 	if (IS_ERR(evport))
 		return PTR_ERR(evport);
@@ -2534,7 +2532,7 @@ u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev)
 {
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
 
-	return ESW_ALLOWED(esw) ? esw->mode : MLX5_ESWITCH_NONE;
+	return mlx5_esw_allowed(esw) ? esw->mode : MLX5_ESWITCH_NONE;
 }
 EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);
 
@@ -2544,7 +2542,7 @@ mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev)
 	struct mlx5_eswitch *esw;
 
 	esw = dev->priv.eswitch;
-	return ESW_ALLOWED(esw) ? esw->offloads.encap :
+	return mlx5_esw_allowed(esw) ? esw->offloads.encap :
 		DEVLINK_ESWITCH_ENCAP_MODE_NONE;
 }
 EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode);
@@ -2590,7 +2588,7 @@ bool mlx5_esw_hold(struct mlx5_core_dev *mdev)
 	struct mlx5_eswitch *esw = mdev->priv.eswitch;
 
 	/* e.g. VF doesn't have eswitch so nothing to do */
-	if (!ESW_ALLOWED(esw))
+	if (!mlx5_esw_allowed(esw))
 		return true;
 
 	if (down_read_trylock(&esw->mode_lock) != 0)
@@ -2607,7 +2605,7 @@ void mlx5_esw_release(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_eswitch *esw = mdev->priv.eswitch;
 
-	if (ESW_ALLOWED(esw))
+	if (mlx5_esw_allowed(esw))
 		up_read(&esw->mode_lock);
 }
 
@@ -2619,7 +2617,7 @@ void mlx5_esw_get(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_eswitch *esw = mdev->priv.eswitch;
 
-	if (ESW_ALLOWED(esw))
+	if (mlx5_esw_allowed(esw))
 		atomic64_inc(&esw->user_count);
 }
 
@@ -2631,7 +2629,7 @@ void mlx5_esw_put(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_eswitch *esw = mdev->priv.eswitch;
 
-	if (ESW_ALLOWED(esw))
+	if (mlx5_esw_allowed(esw))
 		atomic64_dec_if_positive(&esw->user_count);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index a4b9f78bf4d6..a61a33c36fa8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -521,6 +521,11 @@ const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev);
 #define esw_debug(dev, format, ...)				\
 	mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__)
 
+static inline bool mlx5_esw_allowed(const struct mlx5_eswitch *esw)
+{
+	return esw && MLX5_ESWITCH_MANAGER(esw->dev);
+}
+
 /* The returned number is valid only when the dev is eswitch manager. */
 static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev)
 {
-- 
cgit v1.2.3


From b55b35382e51c84bcddc45bd87cf8fa16621452b Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Fri, 26 Feb 2021 14:30:50 +0200
Subject: net/mlx5: E-Switch, Move legacy code to a individual file

Currently eswitch offers two modes. Legacy and offloads.
Offloads code is already in its own file eswitch_offloads.c

However eswitch.c contains the eswitch legacy code and common
infrastructure  code.

To enable future extensions and to better manage generic common eswitch
infrastructure code, move the legacy code to its own legacy.c file.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
 .../net/ethernet/mellanox/mlx5/core/esw/legacy.c   | 509 ++++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/esw/legacy.h   |  22 +
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  | 511 +--------------------
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |   2 +
 5 files changed, 539 insertions(+), 507 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 8bde58379ac6..a1223e904190 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -50,7 +50,7 @@ mlx5_core-$(CONFIG_MLX5_TC_CT)	     += en/tc_ct.o
 # Core extra
 #
 mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \
-				      ecpf.o rdma.o
+				      ecpf.o rdma.o esw/legacy.o
 mlx5_core-$(CONFIG_MLX5_ESWITCH)   += esw/acl/helper.o \
 				      esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \
 				      esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
new file mode 100644
index 000000000000..8ab1224653a4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
@@ -0,0 +1,509 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies Ltd */
+
+#include <linux/etherdevice.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/vport.h>
+#include <linux/mlx5/fs.h>
+#include "esw/acl/lgcy.h"
+#include "esw/legacy.h"
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "fs_core.h"
+
+enum {
+	LEGACY_VEPA_PRIO = 0,
+	LEGACY_FDB_PRIO,
+};
+
+static int esw_create_legacy_vepa_table(struct mlx5_eswitch *esw)
+{
+	struct mlx5_flow_table_attr ft_attr = {};
+	struct mlx5_core_dev *dev = esw->dev;
+	struct mlx5_flow_namespace *root_ns;
+	struct mlx5_flow_table *fdb;
+	int err;
+
+	root_ns = mlx5_get_fdb_sub_ns(dev, 0);
+	if (!root_ns) {
+		esw_warn(dev, "Failed to get FDB flow namespace\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* num FTE 2, num FG 2 */
+	ft_attr.prio = LEGACY_VEPA_PRIO;
+	ft_attr.max_fte = 2;
+	ft_attr.autogroup.max_num_groups = 2;
+	fdb = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
+	if (IS_ERR(fdb)) {
+		err = PTR_ERR(fdb);
+		esw_warn(dev, "Failed to create VEPA FDB err %d\n", err);
+		return err;
+	}
+	esw->fdb_table.legacy.vepa_fdb = fdb;
+
+	return 0;
+}
+
+static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw)
+{
+	esw_debug(esw->dev, "Destroy FDB Table\n");
+	if (!esw->fdb_table.legacy.fdb)
+		return;
+
+	if (esw->fdb_table.legacy.promisc_grp)
+		mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp);
+	if (esw->fdb_table.legacy.allmulti_grp)
+		mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp);
+	if (esw->fdb_table.legacy.addr_grp)
+		mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp);
+	mlx5_destroy_flow_table(esw->fdb_table.legacy.fdb);
+
+	esw->fdb_table.legacy.fdb = NULL;
+	esw->fdb_table.legacy.addr_grp = NULL;
+	esw->fdb_table.legacy.allmulti_grp = NULL;
+	esw->fdb_table.legacy.promisc_grp = NULL;
+	atomic64_set(&esw->user_count, 0);
+}
+
+static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_flow_table_attr ft_attr = {};
+	struct mlx5_core_dev *dev = esw->dev;
+	struct mlx5_flow_namespace *root_ns;
+	struct mlx5_flow_table *fdb;
+	struct mlx5_flow_group *g;
+	void *match_criteria;
+	int table_size;
+	u32 *flow_group_in;
+	u8 *dmac;
+	int err = 0;
+
+	esw_debug(dev, "Create FDB log_max_size(%d)\n",
+		  MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
+
+	root_ns = mlx5_get_fdb_sub_ns(dev, 0);
+	if (!root_ns) {
+		esw_warn(dev, "Failed to get FDB flow namespace\n");
+		return -EOPNOTSUPP;
+	}
+
+	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+	if (!flow_group_in)
+		return -ENOMEM;
+
+	table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
+	ft_attr.max_fte = table_size;
+	ft_attr.prio = LEGACY_FDB_PRIO;
+	fdb = mlx5_create_flow_table(root_ns, &ft_attr);
+	if (IS_ERR(fdb)) {
+		err = PTR_ERR(fdb);
+		esw_warn(dev, "Failed to create FDB Table err %d\n", err);
+		goto out;
+	}
+	esw->fdb_table.legacy.fdb = fdb;
+
+	/* Addresses group : Full match unicast/multicast addresses */
+	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+		 MLX5_MATCH_OUTER_HEADERS);
+	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+	dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, outer_headers.dmac_47_16);
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+	/* Preserve 2 entries for allmulti and promisc rules*/
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 3);
+	eth_broadcast_addr(dmac);
+	g = mlx5_create_flow_group(fdb, flow_group_in);
+	if (IS_ERR(g)) {
+		err = PTR_ERR(g);
+		esw_warn(dev, "Failed to create flow group err(%d)\n", err);
+		goto out;
+	}
+	esw->fdb_table.legacy.addr_grp = g;
+
+	/* Allmulti group : One rule that forwards any mcast traffic */
+	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+		 MLX5_MATCH_OUTER_HEADERS);
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 2);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 2);
+	eth_zero_addr(dmac);
+	dmac[0] = 0x01;
+	g = mlx5_create_flow_group(fdb, flow_group_in);
+	if (IS_ERR(g)) {
+		err = PTR_ERR(g);
+		esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err);
+		goto out;
+	}
+	esw->fdb_table.legacy.allmulti_grp = g;
+
+	/* Promiscuous group :
+	 * One rule that forward all unmatched traffic from previous groups
+	 */
+	eth_zero_addr(dmac);
+	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+		 MLX5_MATCH_MISC_PARAMETERS);
+	MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 1);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1);
+	g = mlx5_create_flow_group(fdb, flow_group_in);
+	if (IS_ERR(g)) {
+		err = PTR_ERR(g);
+		esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err);
+		goto out;
+	}
+	esw->fdb_table.legacy.promisc_grp = g;
+
+out:
+	if (err)
+		esw_destroy_legacy_fdb_table(esw);
+
+	kvfree(flow_group_in);
+	return err;
+}
+
+static void esw_destroy_legacy_vepa_table(struct mlx5_eswitch *esw)
+{
+	esw_debug(esw->dev, "Destroy VEPA Table\n");
+	if (!esw->fdb_table.legacy.vepa_fdb)
+		return;
+
+	mlx5_destroy_flow_table(esw->fdb_table.legacy.vepa_fdb);
+	esw->fdb_table.legacy.vepa_fdb = NULL;
+}
+
+static int esw_create_legacy_table(struct mlx5_eswitch *esw)
+{
+	int err;
+
+	memset(&esw->fdb_table.legacy, 0, sizeof(struct legacy_fdb));
+	atomic64_set(&esw->user_count, 0);
+
+	err = esw_create_legacy_vepa_table(esw);
+	if (err)
+		return err;
+
+	err = esw_create_legacy_fdb_table(esw);
+	if (err)
+		esw_destroy_legacy_vepa_table(esw);
+
+	return err;
+}
+
+static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw)
+{
+	if (esw->fdb_table.legacy.vepa_uplink_rule)
+		mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_uplink_rule);
+
+	if (esw->fdb_table.legacy.vepa_star_rule)
+		mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_star_rule);
+
+	esw->fdb_table.legacy.vepa_uplink_rule = NULL;
+	esw->fdb_table.legacy.vepa_star_rule = NULL;
+}
+
+static void esw_destroy_legacy_table(struct mlx5_eswitch *esw)
+{
+	esw_cleanup_vepa_rules(esw);
+	esw_destroy_legacy_fdb_table(esw);
+	esw_destroy_legacy_vepa_table(esw);
+}
+
+#define MLX5_LEGACY_SRIOV_VPORT_EVENTS (MLX5_VPORT_UC_ADDR_CHANGE | \
+					MLX5_VPORT_MC_ADDR_CHANGE | \
+					MLX5_VPORT_PROMISC_CHANGE)
+
+int esw_legacy_enable(struct mlx5_eswitch *esw)
+{
+	struct mlx5_vport *vport;
+	int ret, i;
+
+	ret = esw_create_legacy_table(esw);
+	if (ret)
+		return ret;
+
+	mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs)
+		vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
+
+	ret = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_LEGACY_SRIOV_VPORT_EVENTS);
+	if (ret)
+		esw_destroy_legacy_table(esw);
+	return ret;
+}
+
+void esw_legacy_disable(struct mlx5_eswitch *esw)
+{
+	struct esw_mc_addr *mc_promisc;
+
+	mlx5_eswitch_disable_pf_vf_vports(esw);
+
+	mc_promisc = &esw->mc_promisc;
+	if (mc_promisc->uplink_rule)
+		mlx5_del_flow_rules(mc_promisc->uplink_rule);
+
+	esw_destroy_legacy_table(esw);
+}
+
+static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw,
+					 u8 setting)
+{
+	struct mlx5_flow_destination dest = {};
+	struct mlx5_flow_act flow_act = {};
+	struct mlx5_flow_handle *flow_rule;
+	struct mlx5_flow_spec *spec;
+	int err = 0;
+	void *misc;
+
+	if (!setting) {
+		esw_cleanup_vepa_rules(esw);
+		return 0;
+	}
+
+	if (esw->fdb_table.legacy.vepa_uplink_rule)
+		return 0;
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return -ENOMEM;
+
+	/* Uplink rule forward uplink traffic to FDB */
+	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+	MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK);
+
+	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+
+	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	dest.ft = esw->fdb_table.legacy.fdb;
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, spec,
+					&flow_act, &dest, 1);
+	if (IS_ERR(flow_rule)) {
+		err = PTR_ERR(flow_rule);
+		goto out;
+	} else {
+		esw->fdb_table.legacy.vepa_uplink_rule = flow_rule;
+	}
+
+	/* Star rule to forward all traffic to uplink vport */
+	memset(&dest, 0, sizeof(dest));
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+	dest.vport.num = MLX5_VPORT_UPLINK;
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, NULL,
+					&flow_act, &dest, 1);
+	if (IS_ERR(flow_rule)) {
+		err = PTR_ERR(flow_rule);
+		goto out;
+	} else {
+		esw->fdb_table.legacy.vepa_star_rule = flow_rule;
+	}
+
+out:
+	kvfree(spec);
+	if (err)
+		esw_cleanup_vepa_rules(esw);
+	return err;
+}
+
+int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting)
+{
+	int err = 0;
+
+	if (!esw)
+		return -EOPNOTSUPP;
+
+	if (!mlx5_esw_allowed(esw))
+		return -EPERM;
+
+	mutex_lock(&esw->state_lock);
+	if (esw->mode != MLX5_ESWITCH_LEGACY) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	err = _mlx5_eswitch_set_vepa_locked(esw, setting);
+
+out:
+	mutex_unlock(&esw->state_lock);
+	return err;
+}
+
+int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting)
+{
+	if (!esw)
+		return -EOPNOTSUPP;
+
+	if (!mlx5_esw_allowed(esw))
+		return -EPERM;
+
+	if (esw->mode != MLX5_ESWITCH_LEGACY)
+		return -EOPNOTSUPP;
+
+	*setting = esw->fdb_table.legacy.vepa_uplink_rule ? 1 : 0;
+	return 0;
+}
+
+int esw_legacy_vport_acl_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+	int ret;
+
+	/* Only non manager vports need ACL in legacy mode */
+	if (mlx5_esw_is_manager_vport(esw, vport->vport))
+		return 0;
+
+	ret = esw_acl_ingress_lgcy_setup(esw, vport);
+	if (ret)
+		goto ingress_err;
+
+	ret = esw_acl_egress_lgcy_setup(esw, vport);
+	if (ret)
+		goto egress_err;
+
+	return 0;
+
+egress_err:
+	esw_acl_ingress_lgcy_cleanup(esw, vport);
+ingress_err:
+	return ret;
+}
+
+void esw_legacy_vport_acl_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+	if (mlx5_esw_is_manager_vport(esw, vport->vport))
+		return;
+
+	esw_acl_egress_lgcy_cleanup(esw, vport);
+	esw_acl_ingress_lgcy_cleanup(esw, vport);
+}
+
+int mlx5_esw_query_vport_drop_stats(struct mlx5_core_dev *dev,
+				    struct mlx5_vport *vport,
+				    struct mlx5_vport_drop_stats *stats)
+{
+	u64 rx_discard_vport_down, tx_discard_vport_down;
+	struct mlx5_eswitch *esw = dev->priv.eswitch;
+	u64 bytes = 0;
+	int err = 0;
+
+	if (esw->mode != MLX5_ESWITCH_LEGACY)
+		return 0;
+
+	mutex_lock(&esw->state_lock);
+	if (!vport->enabled)
+		goto unlock;
+
+	if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_counter))
+		mlx5_fc_query(dev, vport->egress.legacy.drop_counter,
+			      &stats->rx_dropped, &bytes);
+
+	if (vport->ingress.legacy.drop_counter)
+		mlx5_fc_query(dev, vport->ingress.legacy.drop_counter,
+			      &stats->tx_dropped, &bytes);
+
+	if (!MLX5_CAP_GEN(dev, receive_discard_vport_down) &&
+	    !MLX5_CAP_GEN(dev, transmit_discard_vport_down))
+		goto unlock;
+
+	err = mlx5_query_vport_down_stats(dev, vport->vport, 1,
+					  &rx_discard_vport_down,
+					  &tx_discard_vport_down);
+	if (err)
+		goto unlock;
+
+	if (MLX5_CAP_GEN(dev, receive_discard_vport_down))
+		stats->rx_dropped += rx_discard_vport_down;
+	if (MLX5_CAP_GEN(dev, transmit_discard_vport_down))
+		stats->tx_dropped += tx_discard_vport_down;
+
+unlock:
+	mutex_unlock(&esw->state_lock);
+	return err;
+}
+
+int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+				u16 vport, u16 vlan, u8 qos)
+{
+	u8 set_flags = 0;
+	int err = 0;
+
+	if (!mlx5_esw_allowed(esw))
+		return -EPERM;
+
+	if (vlan || qos)
+		set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT;
+
+	mutex_lock(&esw->state_lock);
+	if (esw->mode != MLX5_ESWITCH_LEGACY) {
+		if (!vlan)
+			goto unlock; /* compatibility with libvirt */
+
+		err = -EOPNOTSUPP;
+		goto unlock;
+	}
+
+	err = __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags);
+
+unlock:
+	mutex_unlock(&esw->state_lock);
+	return err;
+}
+
+int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
+				    u16 vport, bool spoofchk)
+{
+	struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
+	bool pschk;
+	int err = 0;
+
+	if (!mlx5_esw_allowed(esw))
+		return -EPERM;
+	if (IS_ERR(evport))
+		return PTR_ERR(evport);
+
+	mutex_lock(&esw->state_lock);
+	if (esw->mode != MLX5_ESWITCH_LEGACY) {
+		err = -EOPNOTSUPP;
+		goto unlock;
+	}
+	pschk = evport->info.spoofchk;
+	evport->info.spoofchk = spoofchk;
+	if (pschk && !is_valid_ether_addr(evport->info.mac))
+		mlx5_core_warn(esw->dev,
+			       "Spoofchk in set while MAC is invalid, vport(%d)\n",
+			       evport->vport);
+	if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY)
+		err = esw_acl_ingress_lgcy_setup(esw, evport);
+	if (err)
+		evport->info.spoofchk = pschk;
+
+unlock:
+	mutex_unlock(&esw->state_lock);
+	return err;
+}
+
+int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
+				 u16 vport, bool setting)
+{
+	struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
+	int err = 0;
+
+	if (!mlx5_esw_allowed(esw))
+		return -EPERM;
+	if (IS_ERR(evport))
+		return PTR_ERR(evport);
+
+	mutex_lock(&esw->state_lock);
+	if (esw->mode != MLX5_ESWITCH_LEGACY) {
+		err = -EOPNOTSUPP;
+		goto unlock;
+	}
+	evport->info.trusted = setting;
+	if (evport->enabled)
+		esw_vport_change_handle_locked(evport);
+
+unlock:
+	mutex_unlock(&esw->state_lock);
+	return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.h
new file mode 100644
index 000000000000..e0820bb72b57
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies Ltd */
+
+#ifndef __MLX5_ESW_LEGACY_H__
+#define __MLX5_ESW_LEGACY_H__
+
+#define MLX5_LEGACY_SRIOV_VPORT_EVENTS (MLX5_VPORT_UC_ADDR_CHANGE | \
+					MLX5_VPORT_MC_ADDR_CHANGE | \
+					MLX5_VPORT_PROMISC_CHANGE)
+
+struct mlx5_eswitch;
+
+int esw_legacy_enable(struct mlx5_eswitch *esw);
+void esw_legacy_disable(struct mlx5_eswitch *esw);
+
+int esw_legacy_vport_acl_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+void esw_legacy_vport_acl_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+
+int mlx5_esw_query_vport_drop_stats(struct mlx5_core_dev *dev,
+				    struct mlx5_vport *vport,
+				    struct mlx5_vport_drop_stats *stats);
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 6a70e385beb8..3dfcd4ec1556 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -36,6 +36,7 @@
 #include <linux/mlx5/vport.h>
 #include <linux/mlx5/fs.h>
 #include "esw/acl/lgcy.h"
+#include "esw/legacy.h"
 #include "mlx5_core.h"
 #include "lib/eq.h"
 #include "eswitch.h"
@@ -61,9 +62,6 @@ struct vport_addr {
 	bool mc_promisc;
 };
 
-static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw);
-static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw);
-
 static int mlx5_eswitch_check(const struct mlx5_core_dev *dev)
 {
 	if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
@@ -278,226 +276,6 @@ esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u16 vport)
 	return __esw_fdb_set_vport_rule(esw, vport, true, mac_c, mac_v);
 }
 
-enum {
-	LEGACY_VEPA_PRIO = 0,
-	LEGACY_FDB_PRIO,
-};
-
-static int esw_create_legacy_vepa_table(struct mlx5_eswitch *esw)
-{
-	struct mlx5_flow_table_attr ft_attr = {};
-	struct mlx5_core_dev *dev = esw->dev;
-	struct mlx5_flow_namespace *root_ns;
-	struct mlx5_flow_table *fdb;
-	int err;
-
-	root_ns = mlx5_get_fdb_sub_ns(dev, 0);
-	if (!root_ns) {
-		esw_warn(dev, "Failed to get FDB flow namespace\n");
-		return -EOPNOTSUPP;
-	}
-
-	/* num FTE 2, num FG 2 */
-	ft_attr.prio = LEGACY_VEPA_PRIO;
-	ft_attr.max_fte = 2;
-	ft_attr.autogroup.max_num_groups = 2;
-	fdb = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
-	if (IS_ERR(fdb)) {
-		err = PTR_ERR(fdb);
-		esw_warn(dev, "Failed to create VEPA FDB err %d\n", err);
-		return err;
-	}
-	esw->fdb_table.legacy.vepa_fdb = fdb;
-
-	return 0;
-}
-
-static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw)
-{
-	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
-	struct mlx5_flow_table_attr ft_attr = {};
-	struct mlx5_core_dev *dev = esw->dev;
-	struct mlx5_flow_namespace *root_ns;
-	struct mlx5_flow_table *fdb;
-	struct mlx5_flow_group *g;
-	void *match_criteria;
-	int table_size;
-	u32 *flow_group_in;
-	u8 *dmac;
-	int err = 0;
-
-	esw_debug(dev, "Create FDB log_max_size(%d)\n",
-		  MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
-
-	root_ns = mlx5_get_fdb_sub_ns(dev, 0);
-	if (!root_ns) {
-		esw_warn(dev, "Failed to get FDB flow namespace\n");
-		return -EOPNOTSUPP;
-	}
-
-	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
-	if (!flow_group_in)
-		return -ENOMEM;
-
-	table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
-	ft_attr.max_fte = table_size;
-	ft_attr.prio = LEGACY_FDB_PRIO;
-	fdb = mlx5_create_flow_table(root_ns, &ft_attr);
-	if (IS_ERR(fdb)) {
-		err = PTR_ERR(fdb);
-		esw_warn(dev, "Failed to create FDB Table err %d\n", err);
-		goto out;
-	}
-	esw->fdb_table.legacy.fdb = fdb;
-
-	/* Addresses group : Full match unicast/multicast addresses */
-	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
-		 MLX5_MATCH_OUTER_HEADERS);
-	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
-	dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, outer_headers.dmac_47_16);
-	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
-	/* Preserve 2 entries for allmulti and promisc rules*/
-	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 3);
-	eth_broadcast_addr(dmac);
-	g = mlx5_create_flow_group(fdb, flow_group_in);
-	if (IS_ERR(g)) {
-		err = PTR_ERR(g);
-		esw_warn(dev, "Failed to create flow group err(%d)\n", err);
-		goto out;
-	}
-	esw->fdb_table.legacy.addr_grp = g;
-
-	/* Allmulti group : One rule that forwards any mcast traffic */
-	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
-		 MLX5_MATCH_OUTER_HEADERS);
-	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 2);
-	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 2);
-	eth_zero_addr(dmac);
-	dmac[0] = 0x01;
-	g = mlx5_create_flow_group(fdb, flow_group_in);
-	if (IS_ERR(g)) {
-		err = PTR_ERR(g);
-		esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err);
-		goto out;
-	}
-	esw->fdb_table.legacy.allmulti_grp = g;
-
-	/* Promiscuous group :
-	 * One rule that forward all unmatched traffic from previous groups
-	 */
-	eth_zero_addr(dmac);
-	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
-		 MLX5_MATCH_MISC_PARAMETERS);
-	MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
-	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 1);
-	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1);
-	g = mlx5_create_flow_group(fdb, flow_group_in);
-	if (IS_ERR(g)) {
-		err = PTR_ERR(g);
-		esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err);
-		goto out;
-	}
-	esw->fdb_table.legacy.promisc_grp = g;
-
-out:
-	if (err)
-		esw_destroy_legacy_fdb_table(esw);
-
-	kvfree(flow_group_in);
-	return err;
-}
-
-static void esw_destroy_legacy_vepa_table(struct mlx5_eswitch *esw)
-{
-	esw_debug(esw->dev, "Destroy VEPA Table\n");
-	if (!esw->fdb_table.legacy.vepa_fdb)
-		return;
-
-	mlx5_destroy_flow_table(esw->fdb_table.legacy.vepa_fdb);
-	esw->fdb_table.legacy.vepa_fdb = NULL;
-}
-
-static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw)
-{
-	esw_debug(esw->dev, "Destroy FDB Table\n");
-	if (!esw->fdb_table.legacy.fdb)
-		return;
-
-	if (esw->fdb_table.legacy.promisc_grp)
-		mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp);
-	if (esw->fdb_table.legacy.allmulti_grp)
-		mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp);
-	if (esw->fdb_table.legacy.addr_grp)
-		mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp);
-	mlx5_destroy_flow_table(esw->fdb_table.legacy.fdb);
-
-	esw->fdb_table.legacy.fdb = NULL;
-	esw->fdb_table.legacy.addr_grp = NULL;
-	esw->fdb_table.legacy.allmulti_grp = NULL;
-	esw->fdb_table.legacy.promisc_grp = NULL;
-	atomic64_set(&esw->user_count, 0);
-}
-
-static int esw_create_legacy_table(struct mlx5_eswitch *esw)
-{
-	int err;
-
-	memset(&esw->fdb_table.legacy, 0, sizeof(struct legacy_fdb));
-	atomic64_set(&esw->user_count, 0);
-
-	err = esw_create_legacy_vepa_table(esw);
-	if (err)
-		return err;
-
-	err = esw_create_legacy_fdb_table(esw);
-	if (err)
-		esw_destroy_legacy_vepa_table(esw);
-
-	return err;
-}
-
-static void esw_destroy_legacy_table(struct mlx5_eswitch *esw)
-{
-	esw_cleanup_vepa_rules(esw);
-	esw_destroy_legacy_fdb_table(esw);
-	esw_destroy_legacy_vepa_table(esw);
-}
-
-#define MLX5_LEGACY_SRIOV_VPORT_EVENTS (MLX5_VPORT_UC_ADDR_CHANGE | \
-					MLX5_VPORT_MC_ADDR_CHANGE | \
-					MLX5_VPORT_PROMISC_CHANGE)
-
-static int esw_legacy_enable(struct mlx5_eswitch *esw)
-{
-	struct mlx5_vport *vport;
-	int ret, i;
-
-	ret = esw_create_legacy_table(esw);
-	if (ret)
-		return ret;
-
-	mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs)
-		vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
-
-	ret = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_LEGACY_SRIOV_VPORT_EVENTS);
-	if (ret)
-		esw_destroy_legacy_table(esw);
-	return ret;
-}
-
-static void esw_legacy_disable(struct mlx5_eswitch *esw)
-{
-	struct esw_mc_addr *mc_promisc;
-
-	mlx5_eswitch_disable_pf_vf_vports(esw);
-
-	mc_promisc = &esw->mc_promisc;
-	if (mc_promisc->uplink_rule)
-		mlx5_del_flow_rules(mc_promisc->uplink_rule);
-
-	esw_destroy_legacy_table(esw);
-}
-
 /* E-Switch vport UC/MC lists management */
 typedef int (*vport_addr_action)(struct mlx5_eswitch *esw,
 				 struct vport_addr *vaddr);
@@ -919,7 +697,7 @@ static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw,
 				(promisc_all || promisc_mc));
 }
 
-static void esw_vport_change_handle_locked(struct mlx5_vport *vport)
+void esw_vport_change_handle_locked(struct mlx5_vport *vport)
 {
 	struct mlx5_core_dev *dev = vport->dev;
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
@@ -1170,56 +948,20 @@ static void node_guid_gen_from_mac(u64 *node_guid, const u8 *mac)
 	((u8 *)node_guid)[0] = mac[5];
 }
 
-static int esw_vport_create_legacy_acl_tables(struct mlx5_eswitch *esw,
-					      struct mlx5_vport *vport)
-{
-	int ret;
-
-	/* Only non manager vports need ACL in legacy mode */
-	if (mlx5_esw_is_manager_vport(esw, vport->vport))
-		return 0;
-
-	ret = esw_acl_ingress_lgcy_setup(esw, vport);
-	if (ret)
-		goto ingress_err;
-
-	ret = esw_acl_egress_lgcy_setup(esw, vport);
-	if (ret)
-		goto egress_err;
-
-	return 0;
-
-egress_err:
-	esw_acl_ingress_lgcy_cleanup(esw, vport);
-ingress_err:
-	return ret;
-}
-
 static int esw_vport_setup_acl(struct mlx5_eswitch *esw,
 			       struct mlx5_vport *vport)
 {
 	if (esw->mode == MLX5_ESWITCH_LEGACY)
-		return esw_vport_create_legacy_acl_tables(esw, vport);
+		return esw_legacy_vport_acl_setup(esw, vport);
 	else
 		return esw_vport_create_offloads_acl_tables(esw, vport);
 }
 
-static void esw_vport_destroy_legacy_acl_tables(struct mlx5_eswitch *esw,
-						struct mlx5_vport *vport)
-
-{
-	if (mlx5_esw_is_manager_vport(esw, vport->vport))
-		return;
-
-	esw_acl_egress_lgcy_cleanup(esw, vport);
-	esw_acl_ingress_lgcy_cleanup(esw, vport);
-}
-
 static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw,
 				  struct mlx5_vport *vport)
 {
 	if (esw->mode == MLX5_ESWITCH_LEGACY)
-		esw_vport_destroy_legacy_acl_tables(esw, vport);
+		esw_legacy_vport_acl_cleanup(esw, vport);
 	else
 		esw_vport_destroy_offloads_acl_tables(esw, vport);
 }
@@ -2103,205 +1845,6 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
 	return err;
 }
 
-int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
-				u16 vport, u16 vlan, u8 qos)
-{
-	u8 set_flags = 0;
-	int err = 0;
-
-	if (!mlx5_esw_allowed(esw))
-		return -EPERM;
-
-	if (vlan || qos)
-		set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT;
-
-	mutex_lock(&esw->state_lock);
-	if (esw->mode != MLX5_ESWITCH_LEGACY) {
-		if (!vlan)
-			goto unlock; /* compatibility with libvirt */
-
-		err = -EOPNOTSUPP;
-		goto unlock;
-	}
-
-	err = __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags);
-
-unlock:
-	mutex_unlock(&esw->state_lock);
-	return err;
-}
-
-int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
-				    u16 vport, bool spoofchk)
-{
-	struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
-	bool pschk;
-	int err = 0;
-
-	if (!mlx5_esw_allowed(esw))
-		return -EPERM;
-	if (IS_ERR(evport))
-		return PTR_ERR(evport);
-
-	mutex_lock(&esw->state_lock);
-	if (esw->mode != MLX5_ESWITCH_LEGACY) {
-		err = -EOPNOTSUPP;
-		goto unlock;
-	}
-	pschk = evport->info.spoofchk;
-	evport->info.spoofchk = spoofchk;
-	if (pschk && !is_valid_ether_addr(evport->info.mac))
-		mlx5_core_warn(esw->dev,
-			       "Spoofchk in set while MAC is invalid, vport(%d)\n",
-			       evport->vport);
-	if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY)
-		err = esw_acl_ingress_lgcy_setup(esw, evport);
-	if (err)
-		evport->info.spoofchk = pschk;
-
-unlock:
-	mutex_unlock(&esw->state_lock);
-	return err;
-}
-
-static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw)
-{
-	if (esw->fdb_table.legacy.vepa_uplink_rule)
-		mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_uplink_rule);
-
-	if (esw->fdb_table.legacy.vepa_star_rule)
-		mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_star_rule);
-
-	esw->fdb_table.legacy.vepa_uplink_rule = NULL;
-	esw->fdb_table.legacy.vepa_star_rule = NULL;
-}
-
-static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw,
-					 u8 setting)
-{
-	struct mlx5_flow_destination dest = {};
-	struct mlx5_flow_act flow_act = {};
-	struct mlx5_flow_handle *flow_rule;
-	struct mlx5_flow_spec *spec;
-	int err = 0;
-	void *misc;
-
-	if (!setting) {
-		esw_cleanup_vepa_rules(esw);
-		return 0;
-	}
-
-	if (esw->fdb_table.legacy.vepa_uplink_rule)
-		return 0;
-
-	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
-	if (!spec)
-		return -ENOMEM;
-
-	/* Uplink rule forward uplink traffic to FDB */
-	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
-	MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK);
-
-	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
-	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
-
-	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
-	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
-	dest.ft = esw->fdb_table.legacy.fdb;
-	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
-	flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, spec,
-					&flow_act, &dest, 1);
-	if (IS_ERR(flow_rule)) {
-		err = PTR_ERR(flow_rule);
-		goto out;
-	} else {
-		esw->fdb_table.legacy.vepa_uplink_rule = flow_rule;
-	}
-
-	/* Star rule to forward all traffic to uplink vport */
-	memset(&dest, 0, sizeof(dest));
-	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
-	dest.vport.num = MLX5_VPORT_UPLINK;
-	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
-	flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, NULL,
-					&flow_act, &dest, 1);
-	if (IS_ERR(flow_rule)) {
-		err = PTR_ERR(flow_rule);
-		goto out;
-	} else {
-		esw->fdb_table.legacy.vepa_star_rule = flow_rule;
-	}
-
-out:
-	kvfree(spec);
-	if (err)
-		esw_cleanup_vepa_rules(esw);
-	return err;
-}
-
-int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting)
-{
-	int err = 0;
-
-	if (!esw)
-		return -EOPNOTSUPP;
-
-	if (!mlx5_esw_allowed(esw))
-		return -EPERM;
-
-	mutex_lock(&esw->state_lock);
-	if (esw->mode != MLX5_ESWITCH_LEGACY) {
-		err = -EOPNOTSUPP;
-		goto out;
-	}
-
-	err = _mlx5_eswitch_set_vepa_locked(esw, setting);
-
-out:
-	mutex_unlock(&esw->state_lock);
-	return err;
-}
-
-int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting)
-{
-	if (!esw)
-		return -EOPNOTSUPP;
-
-	if (!mlx5_esw_allowed(esw))
-		return -EPERM;
-
-	if (esw->mode != MLX5_ESWITCH_LEGACY)
-		return -EOPNOTSUPP;
-
-	*setting = esw->fdb_table.legacy.vepa_uplink_rule ? 1 : 0;
-	return 0;
-}
-
-int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
-				 u16 vport, bool setting)
-{
-	struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
-	int err = 0;
-
-	if (!mlx5_esw_allowed(esw))
-		return -EPERM;
-	if (IS_ERR(evport))
-		return PTR_ERR(evport);
-
-	mutex_lock(&esw->state_lock);
-	if (esw->mode != MLX5_ESWITCH_LEGACY) {
-		err = -EOPNOTSUPP;
-		goto unlock;
-	}
-	evport->info.trusted = setting;
-	if (evport->enabled)
-		esw_vport_change_handle_locked(evport);
-
-unlock:
-	mutex_unlock(&esw->state_lock);
-	return err;
-}
-
 static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw)
 {
 	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
@@ -2406,50 +1949,6 @@ unlock:
 	return err;
 }
 
-static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev,
-					       struct mlx5_vport *vport,
-					       struct mlx5_vport_drop_stats *stats)
-{
-	struct mlx5_eswitch *esw = dev->priv.eswitch;
-	u64 rx_discard_vport_down, tx_discard_vport_down;
-	u64 bytes = 0;
-	int err = 0;
-
-	if (esw->mode != MLX5_ESWITCH_LEGACY)
-		return 0;
-
-	mutex_lock(&esw->state_lock);
-	if (!vport->enabled)
-		goto unlock;
-
-	if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_counter))
-		mlx5_fc_query(dev, vport->egress.legacy.drop_counter,
-			      &stats->rx_dropped, &bytes);
-
-	if (vport->ingress.legacy.drop_counter)
-		mlx5_fc_query(dev, vport->ingress.legacy.drop_counter,
-			      &stats->tx_dropped, &bytes);
-
-	if (!MLX5_CAP_GEN(dev, receive_discard_vport_down) &&
-	    !MLX5_CAP_GEN(dev, transmit_discard_vport_down))
-		goto unlock;
-
-	err = mlx5_query_vport_down_stats(dev, vport->vport, 1,
-					  &rx_discard_vport_down,
-					  &tx_discard_vport_down);
-	if (err)
-		goto unlock;
-
-	if (MLX5_CAP_GEN(dev, receive_discard_vport_down))
-		stats->rx_dropped += rx_discard_vport_down;
-	if (MLX5_CAP_GEN(dev, transmit_discard_vport_down))
-		stats->tx_dropped += tx_discard_vport_down;
-
-unlock:
-	mutex_unlock(&esw->state_lock);
-	return err;
-}
-
 int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
 				 u16 vport_num,
 				 struct ifla_vf_stats *vf_stats)
@@ -2517,7 +2016,7 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
 	vf_stats->broadcast =
 		MLX5_GET_CTR(out, received_eth_broadcast.packets);
 
-	err = mlx5_eswitch_query_vport_drop_stats(esw->dev, vport, &stats);
+	err = mlx5_esw_query_vport_drop_stats(esw->dev, vport, &stats);
 	if (err)
 		goto free_out;
 	vf_stats->rx_dropped = stats.rx_dropped;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index a61a33c36fa8..b289d756a7e4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -814,6 +814,8 @@ void mlx5_esw_put(struct mlx5_core_dev *dev);
 int mlx5_esw_try_lock(struct mlx5_eswitch *esw);
 void mlx5_esw_unlock(struct mlx5_eswitch *esw);
 
+void esw_vport_change_handle_locked(struct mlx5_vport *vport);
+
 #else  /* CONFIG_MLX5_ESWITCH */
 /* eswitch API stubs */
 static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
-- 
cgit v1.2.3


From 57b92bdd9e14f969de85d25cefc10abc3f73defd Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Mon, 1 Mar 2021 14:12:13 +0200
Subject: net/mlx5: E-Switch, Initialize eswitch acls ns when eswitch is
 enabled

Currently eswitch flow steering (FS) namespace of vport's ingress and
egress ACL are enabled when FS layer is initialized. This is done even
when eswitch is diabled. This demands that total eswitch ports to be
known to FS layer without eswitch in use.

Given the FS core is not dependent on eswitch, make namespace init and
cleanup routines as helper routines to be invoked only when eswitch is
needed.

With this change, ingress and egress ACL namespaces are created only
when eswitch legacy/offloads mode is enabled.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 53 ++++++++++++--
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 88 ++++++++++-------------
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h |  7 ++
 3 files changed, 93 insertions(+), 55 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 3dfcd4ec1556..1bb229ecd43b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1367,6 +1367,47 @@ static void mlx5_esw_mode_change_notify(struct mlx5_eswitch *esw, u16 mode)
 	blocking_notifier_call_chain(&esw->n_head, 0, &info);
 }
 
+static int mlx5_esw_acls_ns_init(struct mlx5_eswitch *esw)
+{
+	struct mlx5_core_dev *dev = esw->dev;
+	int total_vports;
+	int err;
+
+	total_vports = mlx5_eswitch_get_total_vports(dev);
+
+	if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) {
+		err = mlx5_fs_egress_acls_init(dev, total_vports);
+		if (err)
+			return err;
+	} else {
+		esw_warn(dev, "engress ACL is not supported by FW\n");
+	}
+
+	if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) {
+		err = mlx5_fs_ingress_acls_init(dev, total_vports);
+		if (err)
+			goto err;
+	} else {
+		esw_warn(dev, "ingress ACL is not supported by FW\n");
+	}
+	return 0;
+
+err:
+	if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support))
+		mlx5_fs_egress_acls_cleanup(dev);
+	return err;
+}
+
+static void mlx5_esw_acls_ns_cleanup(struct mlx5_eswitch *esw)
+{
+	struct mlx5_core_dev *dev = esw->dev;
+
+	if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support))
+		mlx5_fs_ingress_acls_cleanup(dev);
+	if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support))
+		mlx5_fs_egress_acls_cleanup(dev);
+}
+
 /**
  * mlx5_eswitch_enable_locked - Enable eswitch
  * @esw:	Pointer to eswitch
@@ -1395,14 +1436,12 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs)
 		return -EOPNOTSUPP;
 	}
 
-	if (!MLX5_CAP_ESW_INGRESS_ACL(esw->dev, ft_support))
-		esw_warn(esw->dev, "ingress ACL is not supported by FW\n");
-
-	if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support))
-		esw_warn(esw->dev, "engress ACL is not supported by FW\n");
-
 	mlx5_eswitch_get_devlink_param(esw);
 
+	err = mlx5_esw_acls_ns_init(esw);
+	if (err)
+		return err;
+
 	mlx5_eswitch_update_num_of_vfs(esw, num_vfs);
 
 	esw_create_tsar(esw);
@@ -1438,6 +1477,7 @@ abort:
 		mlx5_rescan_drivers(esw->dev);
 
 	esw_destroy_tsar(esw);
+	mlx5_esw_acls_ns_cleanup(esw);
 	return err;
 }
 
@@ -1506,6 +1546,7 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf)
 		mlx5_rescan_drivers(esw->dev);
 
 	esw_destroy_tsar(esw);
+	mlx5_esw_acls_ns_cleanup(esw);
 
 	if (clear_vf)
 		mlx5_eswitch_clear_vf_vports_info(esw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 0216bd63a42d..f74d2c834037 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -2229,17 +2229,21 @@ struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_d
 {
 	struct mlx5_flow_steering *steering = dev->priv.steering;
 
-	if (!steering || vport >= mlx5_eswitch_get_total_vports(dev))
+	if (!steering)
 		return NULL;
 
 	switch (type) {
 	case MLX5_FLOW_NAMESPACE_ESW_EGRESS:
+		if (vport >= steering->esw_egress_acl_vports)
+			return NULL;
 		if (steering->esw_egress_root_ns &&
 		    steering->esw_egress_root_ns[vport])
 			return &steering->esw_egress_root_ns[vport]->ns;
 		else
 			return NULL;
 	case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
+		if (vport >= steering->esw_ingress_acl_vports)
+			return NULL;
 		if (steering->esw_ingress_root_ns &&
 		    steering->esw_ingress_root_ns[vport])
 			return &steering->esw_ingress_root_ns[vport]->ns;
@@ -2571,43 +2575,11 @@ static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns)
 	clean_tree(&root_ns->ns.node);
 }
 
-static void cleanup_egress_acls_root_ns(struct mlx5_core_dev *dev)
-{
-	struct mlx5_flow_steering *steering = dev->priv.steering;
-	int i;
-
-	if (!steering->esw_egress_root_ns)
-		return;
-
-	for (i = 0; i < mlx5_eswitch_get_total_vports(dev); i++)
-		cleanup_root_ns(steering->esw_egress_root_ns[i]);
-
-	kfree(steering->esw_egress_root_ns);
-	steering->esw_egress_root_ns = NULL;
-}
-
-static void cleanup_ingress_acls_root_ns(struct mlx5_core_dev *dev)
-{
-	struct mlx5_flow_steering *steering = dev->priv.steering;
-	int i;
-
-	if (!steering->esw_ingress_root_ns)
-		return;
-
-	for (i = 0; i < mlx5_eswitch_get_total_vports(dev); i++)
-		cleanup_root_ns(steering->esw_ingress_root_ns[i]);
-
-	kfree(steering->esw_ingress_root_ns);
-	steering->esw_ingress_root_ns = NULL;
-}
-
 void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
 {
 	struct mlx5_flow_steering *steering = dev->priv.steering;
 
 	cleanup_root_ns(steering->root_ns);
-	cleanup_egress_acls_root_ns(dev);
-	cleanup_ingress_acls_root_ns(dev);
 	cleanup_root_ns(steering->fdb_root_ns);
 	steering->fdb_root_ns = NULL;
 	kfree(steering->fdb_sub_ns);
@@ -2852,10 +2824,9 @@ static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering, int vpo
 	return PTR_ERR_OR_ZERO(prio);
 }
 
-static int init_egress_acls_root_ns(struct mlx5_core_dev *dev)
+int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports)
 {
 	struct mlx5_flow_steering *steering = dev->priv.steering;
-	int total_vports = mlx5_eswitch_get_total_vports(dev);
 	int err;
 	int i;
 
@@ -2871,7 +2842,7 @@ static int init_egress_acls_root_ns(struct mlx5_core_dev *dev)
 		if (err)
 			goto cleanup_root_ns;
 	}
-
+	steering->esw_egress_acl_vports = total_vports;
 	return 0;
 
 cleanup_root_ns:
@@ -2882,10 +2853,24 @@ cleanup_root_ns:
 	return err;
 }
 
-static int init_ingress_acls_root_ns(struct mlx5_core_dev *dev)
+void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev)
+{
+	struct mlx5_flow_steering *steering = dev->priv.steering;
+	int i;
+
+	if (!steering->esw_egress_root_ns)
+		return;
+
+	for (i = 0; i < steering->esw_egress_acl_vports; i++)
+		cleanup_root_ns(steering->esw_egress_root_ns[i]);
+
+	kfree(steering->esw_egress_root_ns);
+	steering->esw_egress_root_ns = NULL;
+}
+
+int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports)
 {
 	struct mlx5_flow_steering *steering = dev->priv.steering;
-	int total_vports = mlx5_eswitch_get_total_vports(dev);
 	int err;
 	int i;
 
@@ -2901,7 +2886,7 @@ static int init_ingress_acls_root_ns(struct mlx5_core_dev *dev)
 		if (err)
 			goto cleanup_root_ns;
 	}
-
+	steering->esw_ingress_acl_vports = total_vports;
 	return 0;
 
 cleanup_root_ns:
@@ -2912,6 +2897,21 @@ cleanup_root_ns:
 	return err;
 }
 
+void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev)
+{
+	struct mlx5_flow_steering *steering = dev->priv.steering;
+	int i;
+
+	if (!steering->esw_ingress_root_ns)
+		return;
+
+	for (i = 0; i < steering->esw_ingress_acl_vports; i++)
+		cleanup_root_ns(steering->esw_ingress_root_ns[i]);
+
+	kfree(steering->esw_ingress_root_ns);
+	steering->esw_ingress_root_ns = NULL;
+}
+
 static int init_egress_root_ns(struct mlx5_flow_steering *steering)
 {
 	int err;
@@ -2974,16 +2974,6 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
 			if (err)
 				goto err;
 		}
-		if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) {
-			err = init_egress_acls_root_ns(dev);
-			if (err)
-				goto err;
-		}
-		if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) {
-			err = init_ingress_acls_root_ns(dev);
-			if (err)
-				goto err;
-		}
 	}
 
 	if (MLX5_CAP_FLOWTABLE_SNIFFER_RX(dev, ft_support)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index b24a9849c45e..e577a2c424af 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -129,6 +129,8 @@ struct mlx5_flow_steering {
 	struct mlx5_flow_root_namespace	*rdma_rx_root_ns;
 	struct mlx5_flow_root_namespace	*rdma_tx_root_ns;
 	struct mlx5_flow_root_namespace	*egress_root_ns;
+	int esw_egress_acl_vports;
+	int esw_ingress_acl_vports;
 };
 
 struct fs_node {
@@ -287,6 +289,11 @@ int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns,
 int mlx5_init_fs(struct mlx5_core_dev *dev);
 void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
 
+int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports);
+void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev);
+int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports);
+void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev);
+
 #define fs_get_obj(v, _node)  {v = container_of((_node), typeof(*v), node); }
 
 #define fs_list_for_each_entry(pos, root)		\
-- 
cgit v1.2.3


From 6e74e6ea1b64a7684a52d6e593255c8ef84b8252 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Fri, 5 Mar 2021 10:11:21 +0200
Subject: net/mlx5: SF, Use device pointer directly

At many places in the code, device pointer is directly available. Make
use of it, instead of accessing it from the table.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
index c9bddde04047..ec53c11c8344 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
@@ -67,8 +67,8 @@ int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum)
 		goto exist_err;
 	}
 
-	hw_fn_id = mlx5_sf_sw_to_hw_id(table->dev, sw_id);
-	err = mlx5_cmd_alloc_sf(table->dev, hw_fn_id);
+	hw_fn_id = mlx5_sf_sw_to_hw_id(dev, sw_id);
+	err = mlx5_cmd_alloc_sf(dev, hw_fn_id);
 	if (err)
 		goto err;
 
@@ -80,7 +80,7 @@ int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum)
 	return sw_id;
 
 vhca_err:
-	mlx5_cmd_dealloc_sf(table->dev, hw_fn_id);
+	mlx5_cmd_dealloc_sf(dev, hw_fn_id);
 err:
 	table->sfs[i].allocated = false;
 exist_err:
@@ -93,8 +93,8 @@ static void _mlx5_sf_hw_id_free(struct mlx5_core_dev *dev, u16 id)
 	struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
 	u16 hw_fn_id;
 
-	hw_fn_id = mlx5_sf_sw_to_hw_id(table->dev, id);
-	mlx5_cmd_dealloc_sf(table->dev, hw_fn_id);
+	hw_fn_id = mlx5_sf_sw_to_hw_id(dev, id);
+	mlx5_cmd_dealloc_sf(dev, hw_fn_id);
 	table->sfs[id].allocated = false;
 	table->sfs[id].pending_delete = false;
 }
@@ -123,7 +123,7 @@ void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u16 id)
 		goto err;
 	state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state);
 	if (state == MLX5_VHCA_STATE_ALLOCATED) {
-		mlx5_cmd_dealloc_sf(table->dev, hw_fn_id);
+		mlx5_cmd_dealloc_sf(dev, hw_fn_id);
 		table->sfs[id].allocated = false;
 	} else {
 		table->sfs[id].pending_delete = true;
@@ -216,7 +216,7 @@ int mlx5_sf_hw_table_create(struct mlx5_core_dev *dev)
 		return 0;
 
 	table->vhca_nb.notifier_call = mlx5_sf_hw_vhca_event;
-	return mlx5_vhca_event_notifier_register(table->dev, &table->vhca_nb);
+	return mlx5_vhca_event_notifier_register(dev, &table->vhca_nb);
 }
 
 void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev)
@@ -226,7 +226,7 @@ void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev)
 	if (!table)
 		return;
 
-	mlx5_vhca_event_notifier_unregister(table->dev, &table->vhca_nb);
+	mlx5_vhca_event_notifier_unregister(dev, &table->vhca_nb);
 	/* Dealloc SFs whose firmware event has been missed. */
 	mlx5_sf_hw_dealloc_all(table);
 }
-- 
cgit v1.2.3


From a74ed24c437eddda3299904d870f0df5ef23e475 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Fri, 5 Mar 2021 07:38:02 +0200
Subject: net/mlx5: SF, Reuse stored hardware function id

SF's hardware function id is already stored in mlx5_sf. Reuse it,
instead of querying the hw table.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
index 60a6328a9ca0..52226d9b9a6d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
@@ -270,15 +270,14 @@ static int mlx5_sf_add(struct mlx5_core_dev *dev, struct mlx5_sf_table *table,
 {
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
 	struct mlx5_sf *sf;
-	u16 hw_fn_id;
 	int err;
 
 	sf = mlx5_sf_alloc(table, new_attr->sfnum, extack);
 	if (IS_ERR(sf))
 		return PTR_ERR(sf);
 
-	hw_fn_id = mlx5_sf_sw_to_hw_id(dev, sf->id);
-	err = mlx5_esw_offloads_sf_vport_enable(esw, &sf->dl_port, hw_fn_id, new_attr->sfnum);
+	err = mlx5_esw_offloads_sf_vport_enable(esw, &sf->dl_port, sf->hw_fn_id,
+						new_attr->sfnum);
 	if (err)
 		goto esw_err;
 	*new_port_index = sf->port_index;
-- 
cgit v1.2.3


From 9dac2966c531365d9cd1fb33d8d3a70cec349e35 Mon Sep 17 00:00:00 2001
From: Jianbo Liu <jianbol@nvidia.com>
Date: Thu, 12 Nov 2020 01:32:52 +0000
Subject: net/mlx5: DR, Use variably sized data structures for different
 actions

mlx5dr_action is a generally used data structure, and there is an
union for different types of actions in it. The size of mlx5dr_action
is about 72 bytes, but for those actions with fewer fields, most of
the allocated memory is wasted.
Remove this union, and mlx5dr_action becomes a generic action header.
Then actions are dynamically allocated with needed memory, the data
for each action is stored right after the header.

Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../mellanox/mlx5/core/steering/dr_action.c        | 242 +++++++++++----------
 .../ethernet/mellanox/mlx5/core/steering/dr_send.c |   8 +-
 .../mellanox/mlx5/core/steering/dr_table.c         |   4 +-
 .../mellanox/mlx5/core/steering/dr_types.h         | 104 +++++----
 4 files changed, 199 insertions(+), 159 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
index 28a7971cac6a..949879cf2092 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
@@ -313,8 +313,8 @@ static int dr_action_handle_cs_recalc(struct mlx5dr_domain *dmn,
 		 * table, since there is an *assumption* that in such case FW
 		 * will recalculate the CS.
 		 */
-		if (dest_action->dest_tbl.is_fw_tbl) {
-			*final_icm_addr = dest_action->dest_tbl.fw_tbl.rx_icm_addr;
+		if (dest_action->dest_tbl->is_fw_tbl) {
+			*final_icm_addr = dest_action->dest_tbl->fw_tbl.rx_icm_addr;
 		} else {
 			mlx5dr_dbg(dmn,
 				   "Destination FT should be terminating when modify TTL is used\n");
@@ -326,8 +326,8 @@ static int dr_action_handle_cs_recalc(struct mlx5dr_domain *dmn,
 		/* If destination is vport we will get the FW flow table
 		 * that recalculates the CS and forwards to the vport.
 		 */
-		ret = mlx5dr_domain_cache_get_recalc_cs_ft_addr(dest_action->vport.dmn,
-								dest_action->vport.caps->num,
+		ret = mlx5dr_domain_cache_get_recalc_cs_ft_addr(dest_action->vport->dmn,
+								dest_action->vport->caps->num,
 								final_icm_addr);
 		if (ret) {
 			mlx5dr_err(dmn, "Failed to get FW cs recalc flow table\n");
@@ -369,6 +369,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
 	action_domain = dr_action_get_action_domain(dmn->type, nic_dmn->ste_type);
 
 	for (i = 0; i < num_actions; i++) {
+		struct mlx5dr_action_dest_tbl *dest_tbl;
 		struct mlx5dr_action *action;
 		int max_actions_type = 1;
 		u32 action_type;
@@ -382,37 +383,38 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
 			break;
 		case DR_ACTION_TYP_FT:
 			dest_action = action;
-			if (!action->dest_tbl.is_fw_tbl) {
-				if (action->dest_tbl.tbl->dmn != dmn) {
+			dest_tbl = action->dest_tbl;
+			if (!dest_tbl->is_fw_tbl) {
+				if (dest_tbl->tbl->dmn != dmn) {
 					mlx5dr_err(dmn,
 						   "Destination table belongs to a different domain\n");
 					goto out_invalid_arg;
 				}
-				if (action->dest_tbl.tbl->level <= matcher->tbl->level) {
+				if (dest_tbl->tbl->level <= matcher->tbl->level) {
 					mlx5_core_warn_once(dmn->mdev,
 							    "Connecting table to a lower/same level destination table\n");
 					mlx5dr_dbg(dmn,
 						   "Connecting table at level %d to a destination table at level %d\n",
 						   matcher->tbl->level,
-						   action->dest_tbl.tbl->level);
+						   dest_tbl->tbl->level);
 				}
 				attr.final_icm_addr = rx_rule ?
-					action->dest_tbl.tbl->rx.s_anchor->chunk->icm_addr :
-					action->dest_tbl.tbl->tx.s_anchor->chunk->icm_addr;
+					dest_tbl->tbl->rx.s_anchor->chunk->icm_addr :
+					dest_tbl->tbl->tx.s_anchor->chunk->icm_addr;
 			} else {
 				struct mlx5dr_cmd_query_flow_table_details output;
 				int ret;
 
 				/* get the relevant addresses */
-				if (!action->dest_tbl.fw_tbl.rx_icm_addr) {
+				if (!action->dest_tbl->fw_tbl.rx_icm_addr) {
 					ret = mlx5dr_cmd_query_flow_table(dmn->mdev,
-									  action->dest_tbl.fw_tbl.type,
-									  action->dest_tbl.fw_tbl.id,
+									  dest_tbl->fw_tbl.type,
+									  dest_tbl->fw_tbl.id,
 									  &output);
 					if (!ret) {
-						action->dest_tbl.fw_tbl.tx_icm_addr =
+						dest_tbl->fw_tbl.tx_icm_addr =
 							output.sw_owner_icm_root_1;
-						action->dest_tbl.fw_tbl.rx_icm_addr =
+						dest_tbl->fw_tbl.rx_icm_addr =
 							output.sw_owner_icm_root_0;
 					} else {
 						mlx5dr_err(dmn,
@@ -422,50 +424,50 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
 					}
 				}
 				attr.final_icm_addr = rx_rule ?
-					action->dest_tbl.fw_tbl.rx_icm_addr :
-					action->dest_tbl.fw_tbl.tx_icm_addr;
+					dest_tbl->fw_tbl.rx_icm_addr :
+					dest_tbl->fw_tbl.tx_icm_addr;
 			}
 			break;
 		case DR_ACTION_TYP_QP:
 			mlx5dr_info(dmn, "Domain doesn't support QP\n");
 			goto out_invalid_arg;
 		case DR_ACTION_TYP_CTR:
-			attr.ctr_id = action->ctr.ctr_id +
-				action->ctr.offeset;
+			attr.ctr_id = action->ctr->ctr_id +
+				action->ctr->offeset;
 			break;
 		case DR_ACTION_TYP_TAG:
-			attr.flow_tag = action->flow_tag;
+			attr.flow_tag = action->flow_tag->flow_tag;
 			break;
 		case DR_ACTION_TYP_TNL_L2_TO_L2:
 			break;
 		case DR_ACTION_TYP_TNL_L3_TO_L2:
-			attr.decap_index = action->rewrite.index;
-			attr.decap_actions = action->rewrite.num_of_actions;
+			attr.decap_index = action->rewrite->index;
+			attr.decap_actions = action->rewrite->num_of_actions;
 			attr.decap_with_vlan =
 				attr.decap_actions == WITH_VLAN_NUM_HW_ACTIONS;
 			break;
 		case DR_ACTION_TYP_MODIFY_HDR:
-			attr.modify_index = action->rewrite.index;
-			attr.modify_actions = action->rewrite.num_of_actions;
-			recalc_cs_required = action->rewrite.modify_ttl &&
+			attr.modify_index = action->rewrite->index;
+			attr.modify_actions = action->rewrite->num_of_actions;
+			recalc_cs_required = action->rewrite->modify_ttl &&
 					     !mlx5dr_ste_supp_ttl_cs_recalc(&dmn->info.caps);
 			break;
 		case DR_ACTION_TYP_L2_TO_TNL_L2:
 		case DR_ACTION_TYP_L2_TO_TNL_L3:
-			attr.reformat_size = action->reformat.reformat_size;
-			attr.reformat_id = action->reformat.reformat_id;
+			attr.reformat_size = action->reformat->reformat_size;
+			attr.reformat_id = action->reformat->reformat_id;
 			break;
 		case DR_ACTION_TYP_VPORT:
-			attr.hit_gvmi = action->vport.caps->vhca_gvmi;
+			attr.hit_gvmi = action->vport->caps->vhca_gvmi;
 			dest_action = action;
 			if (rx_rule) {
 				/* Loopback on WIRE vport is not supported */
-				if (action->vport.caps->num == WIRE_PORT)
+				if (action->vport->caps->num == WIRE_PORT)
 					goto out_invalid_arg;
 
-				attr.final_icm_addr = action->vport.caps->icm_address_rx;
+				attr.final_icm_addr = action->vport->caps->icm_address_rx;
 			} else {
-				attr.final_icm_addr = action->vport.caps->icm_address_tx;
+				attr.final_icm_addr = action->vport->caps->icm_address_tx;
 			}
 			break;
 		case DR_ACTION_TYP_POP_VLAN:
@@ -477,7 +479,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
 			if (attr.vlans.count == MLX5DR_MAX_VLANS)
 				return -EINVAL;
 
-			attr.vlans.headers[attr.vlans.count++] = action->push_vlan.vlan_hdr;
+			attr.vlans.headers[attr.vlans.count++] = action->push_vlan->vlan_hdr;
 			break;
 		default:
 			goto out_invalid_arg;
@@ -530,17 +532,37 @@ out_invalid_arg:
 	return -EINVAL;
 }
 
+static unsigned int action_size[DR_ACTION_TYP_MAX] = {
+	[DR_ACTION_TYP_TNL_L2_TO_L2] = sizeof(struct mlx5dr_action_reformat),
+	[DR_ACTION_TYP_L2_TO_TNL_L2] = sizeof(struct mlx5dr_action_reformat),
+	[DR_ACTION_TYP_TNL_L3_TO_L2] = sizeof(struct mlx5dr_action_rewrite),
+	[DR_ACTION_TYP_L2_TO_TNL_L3] = sizeof(struct mlx5dr_action_reformat),
+	[DR_ACTION_TYP_FT]           = sizeof(struct mlx5dr_action_dest_tbl),
+	[DR_ACTION_TYP_CTR]          = sizeof(struct mlx5dr_action_ctr),
+	[DR_ACTION_TYP_TAG]          = sizeof(struct mlx5dr_action_flow_tag),
+	[DR_ACTION_TYP_MODIFY_HDR]   = sizeof(struct mlx5dr_action_rewrite),
+	[DR_ACTION_TYP_VPORT]        = sizeof(struct mlx5dr_action_vport),
+	[DR_ACTION_TYP_PUSH_VLAN]    = sizeof(struct mlx5dr_action_push_vlan),
+};
+
 static struct mlx5dr_action *
 dr_action_create_generic(enum mlx5dr_action_type action_type)
 {
 	struct mlx5dr_action *action;
+	int extra_size;
+
+	if (action_type < DR_ACTION_TYP_MAX)
+		extra_size = action_size[action_type];
+	else
+		return NULL;
 
-	action = kzalloc(sizeof(*action), GFP_KERNEL);
+	action = kzalloc(sizeof(*action) + extra_size, GFP_KERNEL);
 	if (!action)
 		return NULL;
 
 	action->action_type = action_type;
 	refcount_set(&action->refcount, 1);
+	action->data = action + 1;
 
 	return action;
 }
@@ -559,10 +581,10 @@ mlx5dr_action_create_dest_table_num(struct mlx5dr_domain *dmn, u32 table_num)
 	if (!action)
 		return NULL;
 
-	action->dest_tbl.is_fw_tbl = true;
-	action->dest_tbl.fw_tbl.dmn = dmn;
-	action->dest_tbl.fw_tbl.id = table_num;
-	action->dest_tbl.fw_tbl.type = FS_FT_FDB;
+	action->dest_tbl->is_fw_tbl = true;
+	action->dest_tbl->fw_tbl.dmn = dmn;
+	action->dest_tbl->fw_tbl.id = table_num;
+	action->dest_tbl->fw_tbl.type = FS_FT_FDB;
 	refcount_inc(&dmn->refcount);
 
 	return action;
@@ -579,7 +601,7 @@ mlx5dr_action_create_dest_table(struct mlx5dr_table *tbl)
 	if (!action)
 		goto dec_ref;
 
-	action->dest_tbl.tbl = tbl;
+	action->dest_tbl->tbl = tbl;
 
 	return action;
 
@@ -624,12 +646,12 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
 		case DR_ACTION_TYP_VPORT:
 			hw_dests[i].vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
 			hw_dests[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
-			hw_dests[i].vport.num = dest_action->vport.caps->num;
-			hw_dests[i].vport.vhca_id = dest_action->vport.caps->vhca_gvmi;
+			hw_dests[i].vport.num = dest_action->vport->caps->num;
+			hw_dests[i].vport.vhca_id = dest_action->vport->caps->vhca_gvmi;
 			if (reformat_action) {
 				reformat_req = true;
 				hw_dests[i].vport.reformat_id =
-					reformat_action->reformat.reformat_id;
+					reformat_action->reformat->reformat_id;
 				ref_actions[num_of_ref++] = reformat_action;
 				hw_dests[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
 			}
@@ -637,10 +659,10 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
 
 		case DR_ACTION_TYP_FT:
 			hw_dests[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
-			if (dest_action->dest_tbl.is_fw_tbl)
-				hw_dests[i].ft_id = dest_action->dest_tbl.fw_tbl.id;
+			if (dest_action->dest_tbl->is_fw_tbl)
+				hw_dests[i].ft_id = dest_action->dest_tbl->fw_tbl.id;
 			else
-				hw_dests[i].ft_id = dest_action->dest_tbl.tbl->table_id;
+				hw_dests[i].ft_id = dest_action->dest_tbl->tbl->table_id;
 			break;
 
 		default:
@@ -657,8 +679,8 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
 				      hw_dests,
 				      num_of_dests,
 				      reformat_req,
-				      &action->dest_tbl.fw_tbl.id,
-				      &action->dest_tbl.fw_tbl.group_id);
+				      &action->dest_tbl->fw_tbl.id,
+				      &action->dest_tbl->fw_tbl.group_id);
 	if (ret)
 		goto free_action;
 
@@ -667,11 +689,11 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
 	for (i = 0; i < num_of_ref; i++)
 		refcount_inc(&ref_actions[i]->refcount);
 
-	action->dest_tbl.is_fw_tbl = true;
-	action->dest_tbl.fw_tbl.dmn = dmn;
-	action->dest_tbl.fw_tbl.type = FS_FT_FDB;
-	action->dest_tbl.fw_tbl.ref_actions = ref_actions;
-	action->dest_tbl.fw_tbl.num_of_ref_actions = num_of_ref;
+	action->dest_tbl->is_fw_tbl = true;
+	action->dest_tbl->fw_tbl.dmn = dmn;
+	action->dest_tbl->fw_tbl.type = FS_FT_FDB;
+	action->dest_tbl->fw_tbl.ref_actions = ref_actions;
+	action->dest_tbl->fw_tbl.num_of_ref_actions = num_of_ref;
 
 	kfree(hw_dests);
 
@@ -696,10 +718,10 @@ mlx5dr_action_create_dest_flow_fw_table(struct mlx5dr_domain *dmn,
 	if (!action)
 		return NULL;
 
-	action->dest_tbl.is_fw_tbl = 1;
-	action->dest_tbl.fw_tbl.type = ft->type;
-	action->dest_tbl.fw_tbl.id = ft->id;
-	action->dest_tbl.fw_tbl.dmn = dmn;
+	action->dest_tbl->is_fw_tbl = 1;
+	action->dest_tbl->fw_tbl.type = ft->type;
+	action->dest_tbl->fw_tbl.id = ft->id;
+	action->dest_tbl->fw_tbl.dmn = dmn;
 
 	refcount_inc(&dmn->refcount);
 
@@ -715,7 +737,7 @@ mlx5dr_action_create_flow_counter(u32 counter_id)
 	if (!action)
 		return NULL;
 
-	action->ctr.ctr_id = counter_id;
+	action->ctr->ctr_id = counter_id;
 
 	return action;
 }
@@ -728,7 +750,7 @@ struct mlx5dr_action *mlx5dr_action_create_tag(u32 tag_value)
 	if (!action)
 		return NULL;
 
-	action->flow_tag = tag_value & 0xffffff;
+	action->flow_tag->flow_tag = tag_value & 0xffffff;
 
 	return action;
 }
@@ -794,8 +816,8 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn,
 		if (ret)
 			return ret;
 
-		action->reformat.reformat_id = reformat_id;
-		action->reformat.reformat_size = data_sz;
+		action->reformat->reformat_id = reformat_id;
+		action->reformat->reformat_size = data_sz;
 		return 0;
 	}
 	case DR_ACTION_TYP_TNL_L2_TO_L2:
@@ -811,28 +833,28 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn,
 							  data, data_sz,
 							  hw_actions,
 							  ACTION_CACHE_LINE_SIZE,
-							  &action->rewrite.num_of_actions);
+							  &action->rewrite->num_of_actions);
 		if (ret) {
 			mlx5dr_dbg(dmn, "Failed creating decap l3 action list\n");
 			return ret;
 		}
 
-		action->rewrite.chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool,
-							       DR_CHUNK_SIZE_8);
-		if (!action->rewrite.chunk) {
+		action->rewrite->chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool,
+								DR_CHUNK_SIZE_8);
+		if (!action->rewrite->chunk) {
 			mlx5dr_dbg(dmn, "Failed allocating modify header chunk\n");
 			return -ENOMEM;
 		}
 
-		action->rewrite.data = (void *)hw_actions;
-		action->rewrite.index = (action->rewrite.chunk->icm_addr -
+		action->rewrite->data = (void *)hw_actions;
+		action->rewrite->index = (action->rewrite->chunk->icm_addr -
 					 dmn->info.caps.hdr_modify_icm_addr) /
 					 ACTION_CACHE_LINE_SIZE;
 
 		ret = mlx5dr_send_postsend_action(dmn, action);
 		if (ret) {
 			mlx5dr_dbg(dmn, "Writing decap l3 actions to ICM failed\n");
-			mlx5dr_icm_free_chunk(action->rewrite.chunk);
+			mlx5dr_icm_free_chunk(action->rewrite->chunk);
 			return ret;
 		}
 		return 0;
@@ -867,7 +889,7 @@ struct mlx5dr_action *mlx5dr_action_create_push_vlan(struct mlx5dr_domain *dmn,
 	if (!action)
 		return NULL;
 
-	action->push_vlan.vlan_hdr = vlan_hdr_h;
+	action->push_vlan->vlan_hdr = vlan_hdr_h;
 	return action;
 }
 
@@ -898,7 +920,7 @@ mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn,
 	if (!action)
 		goto dec_ref;
 
-	action->reformat.dmn = dmn;
+	action->reformat->dmn = dmn;
 
 	ret = dr_action_create_reformat_action(dmn,
 					       data_sz,
@@ -1104,17 +1126,17 @@ dr_action_modify_check_set_field_limitation(struct mlx5dr_action *action,
 					    const __be64 *sw_action)
 {
 	u16 sw_field = MLX5_GET(set_action_in, sw_action, field);
-	struct mlx5dr_domain *dmn = action->rewrite.dmn;
+	struct mlx5dr_domain *dmn = action->rewrite->dmn;
 
 	if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_A) {
-		action->rewrite.allow_rx = 0;
+		action->rewrite->allow_rx = 0;
 		if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_TX) {
 			mlx5dr_dbg(dmn, "Unsupported field %d for RX/FDB set action\n",
 				   sw_field);
 			return -EINVAL;
 		}
 	} else if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_B) {
-		action->rewrite.allow_tx = 0;
+		action->rewrite->allow_tx = 0;
 		if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_RX) {
 			mlx5dr_dbg(dmn, "Unsupported field %d for TX/FDB set action\n",
 				   sw_field);
@@ -1122,7 +1144,7 @@ dr_action_modify_check_set_field_limitation(struct mlx5dr_action *action,
 		}
 	}
 
-	if (!action->rewrite.allow_rx && !action->rewrite.allow_tx) {
+	if (!action->rewrite->allow_rx && !action->rewrite->allow_tx) {
 		mlx5dr_dbg(dmn, "Modify SET actions not supported on both RX and TX\n");
 		return -EINVAL;
 	}
@@ -1135,7 +1157,7 @@ dr_action_modify_check_add_field_limitation(struct mlx5dr_action *action,
 					    const __be64 *sw_action)
 {
 	u16 sw_field = MLX5_GET(set_action_in, sw_action, field);
-	struct mlx5dr_domain *dmn = action->rewrite.dmn;
+	struct mlx5dr_domain *dmn = action->rewrite->dmn;
 
 	if (sw_field != MLX5_ACTION_IN_FIELD_OUT_IP_TTL &&
 	    sw_field != MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT &&
@@ -1153,7 +1175,7 @@ static int
 dr_action_modify_check_copy_field_limitation(struct mlx5dr_action *action,
 					     const __be64 *sw_action)
 {
-	struct mlx5dr_domain *dmn = action->rewrite.dmn;
+	struct mlx5dr_domain *dmn = action->rewrite->dmn;
 	u16 sw_fields[2];
 	int i;
 
@@ -1162,14 +1184,14 @@ dr_action_modify_check_copy_field_limitation(struct mlx5dr_action *action,
 
 	for (i = 0; i < 2; i++) {
 		if (sw_fields[i] == MLX5_ACTION_IN_FIELD_METADATA_REG_A) {
-			action->rewrite.allow_rx = 0;
+			action->rewrite->allow_rx = 0;
 			if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_TX) {
 				mlx5dr_dbg(dmn, "Unsupported field %d for RX/FDB set action\n",
 					   sw_fields[i]);
 				return -EINVAL;
 			}
 		} else if (sw_fields[i] == MLX5_ACTION_IN_FIELD_METADATA_REG_B) {
-			action->rewrite.allow_tx = 0;
+			action->rewrite->allow_tx = 0;
 			if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_RX) {
 				mlx5dr_dbg(dmn, "Unsupported field %d for TX/FDB set action\n",
 					   sw_fields[i]);
@@ -1178,7 +1200,7 @@ dr_action_modify_check_copy_field_limitation(struct mlx5dr_action *action,
 		}
 	}
 
-	if (!action->rewrite.allow_rx && !action->rewrite.allow_tx) {
+	if (!action->rewrite->allow_rx && !action->rewrite->allow_tx) {
 		mlx5dr_dbg(dmn, "Modify copy actions not supported on both RX and TX\n");
 		return -EINVAL;
 	}
@@ -1190,7 +1212,7 @@ static int
 dr_action_modify_check_field_limitation(struct mlx5dr_action *action,
 					const __be64 *sw_action)
 {
-	struct mlx5dr_domain *dmn = action->rewrite.dmn;
+	struct mlx5dr_domain *dmn = action->rewrite->dmn;
 	u8 action_type;
 	int ret;
 
@@ -1239,7 +1261,7 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action,
 {
 	const struct mlx5dr_ste_action_modify_field *hw_dst_action_info;
 	const struct mlx5dr_ste_action_modify_field *hw_src_action_info;
-	struct mlx5dr_domain *dmn = action->rewrite.dmn;
+	struct mlx5dr_domain *dmn = action->rewrite->dmn;
 	int ret, i, hw_idx = 0;
 	__be64 *sw_action;
 	__be64 hw_action;
@@ -1249,8 +1271,8 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action,
 
 	*modify_ttl = false;
 
-	action->rewrite.allow_rx = 1;
-	action->rewrite.allow_tx = 1;
+	action->rewrite->allow_rx = 1;
+	action->rewrite->allow_tx = 1;
 
 	for (i = 0; i < num_sw_actions; i++) {
 		sw_action = &sw_actions[i];
@@ -1358,13 +1380,13 @@ static int dr_action_create_modify_action(struct mlx5dr_domain *dmn,
 	if (ret)
 		goto free_hw_actions;
 
-	action->rewrite.chunk = chunk;
-	action->rewrite.modify_ttl = modify_ttl;
-	action->rewrite.data = (u8 *)hw_actions;
-	action->rewrite.num_of_actions = num_hw_actions;
-	action->rewrite.index = (chunk->icm_addr -
-				 dmn->info.caps.hdr_modify_icm_addr) /
-				 ACTION_CACHE_LINE_SIZE;
+	action->rewrite->chunk = chunk;
+	action->rewrite->modify_ttl = modify_ttl;
+	action->rewrite->data = (u8 *)hw_actions;
+	action->rewrite->num_of_actions = num_hw_actions;
+	action->rewrite->index = (chunk->icm_addr -
+				  dmn->info.caps.hdr_modify_icm_addr) /
+				  ACTION_CACHE_LINE_SIZE;
 
 	ret = mlx5dr_send_postsend_action(dmn, action);
 	if (ret)
@@ -1399,7 +1421,7 @@ mlx5dr_action_create_modify_header(struct mlx5dr_domain *dmn,
 	if (!action)
 		goto dec_ref;
 
-	action->rewrite.dmn = dmn;
+	action->rewrite->dmn = dmn;
 
 	ret = dr_action_create_modify_action(dmn,
 					     actions_sz,
@@ -1451,8 +1473,8 @@ mlx5dr_action_create_dest_vport(struct mlx5dr_domain *dmn,
 	if (!action)
 		return NULL;
 
-	action->vport.dmn = vport_dmn;
-	action->vport.caps = vport_cap;
+	action->vport->dmn = vport_dmn;
+	action->vport->caps = vport_cap;
 
 	return action;
 }
@@ -1464,44 +1486,44 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action)
 
 	switch (action->action_type) {
 	case DR_ACTION_TYP_FT:
-		if (action->dest_tbl.is_fw_tbl)
-			refcount_dec(&action->dest_tbl.fw_tbl.dmn->refcount);
+		if (action->dest_tbl->is_fw_tbl)
+			refcount_dec(&action->dest_tbl->fw_tbl.dmn->refcount);
 		else
-			refcount_dec(&action->dest_tbl.tbl->refcount);
+			refcount_dec(&action->dest_tbl->tbl->refcount);
 
-		if (action->dest_tbl.is_fw_tbl &&
-		    action->dest_tbl.fw_tbl.num_of_ref_actions) {
+		if (action->dest_tbl->is_fw_tbl &&
+		    action->dest_tbl->fw_tbl.num_of_ref_actions) {
 			struct mlx5dr_action **ref_actions;
 			int i;
 
-			ref_actions = action->dest_tbl.fw_tbl.ref_actions;
-			for (i = 0; i < action->dest_tbl.fw_tbl.num_of_ref_actions; i++)
+			ref_actions = action->dest_tbl->fw_tbl.ref_actions;
+			for (i = 0; i < action->dest_tbl->fw_tbl.num_of_ref_actions; i++)
 				refcount_dec(&ref_actions[i]->refcount);
 
 			kfree(ref_actions);
 
-			mlx5dr_fw_destroy_md_tbl(action->dest_tbl.fw_tbl.dmn,
-						 action->dest_tbl.fw_tbl.id,
-						 action->dest_tbl.fw_tbl.group_id);
+			mlx5dr_fw_destroy_md_tbl(action->dest_tbl->fw_tbl.dmn,
+						 action->dest_tbl->fw_tbl.id,
+						 action->dest_tbl->fw_tbl.group_id);
 		}
 		break;
 	case DR_ACTION_TYP_TNL_L2_TO_L2:
-		refcount_dec(&action->reformat.dmn->refcount);
+		refcount_dec(&action->reformat->dmn->refcount);
 		break;
 	case DR_ACTION_TYP_TNL_L3_TO_L2:
-		mlx5dr_icm_free_chunk(action->rewrite.chunk);
-		refcount_dec(&action->reformat.dmn->refcount);
+		mlx5dr_icm_free_chunk(action->rewrite->chunk);
+		refcount_dec(&action->rewrite->dmn->refcount);
 		break;
 	case DR_ACTION_TYP_L2_TO_TNL_L2:
 	case DR_ACTION_TYP_L2_TO_TNL_L3:
-		mlx5dr_cmd_destroy_reformat_ctx((action->reformat.dmn)->mdev,
-						action->reformat.reformat_id);
-		refcount_dec(&action->reformat.dmn->refcount);
+		mlx5dr_cmd_destroy_reformat_ctx((action->reformat->dmn)->mdev,
+						action->reformat->reformat_id);
+		refcount_dec(&action->reformat->dmn->refcount);
 		break;
 	case DR_ACTION_TYP_MODIFY_HDR:
-		mlx5dr_icm_free_chunk(action->rewrite.chunk);
-		kfree(action->rewrite.data);
-		refcount_dec(&action->rewrite.dmn->refcount);
+		mlx5dr_icm_free_chunk(action->rewrite->chunk);
+		kfree(action->rewrite->data);
+		refcount_dec(&action->rewrite->dmn->refcount);
 		break;
 	default:
 		break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
index 8a6a56f9dc4e..24acced415d3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -572,12 +572,12 @@ int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
 	struct postsend_info send_info = {};
 	int ret;
 
-	send_info.write.addr = (uintptr_t)action->rewrite.data;
-	send_info.write.length = action->rewrite.num_of_actions *
+	send_info.write.addr = (uintptr_t)action->rewrite->data;
+	send_info.write.length = action->rewrite->num_of_actions *
 				 DR_MODIFY_ACTION_SIZE;
 	send_info.write.lkey = 0;
-	send_info.remote_addr = action->rewrite.chunk->mr_addr;
-	send_info.rkey = action->rewrite.chunk->rkey;
+	send_info.remote_addr = action->rewrite->chunk->mr_addr;
+	send_info.rkey = action->rewrite->chunk->rkey;
 
 	ret = dr_postsend_icm_data(dmn, &send_info);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
index b599b6beb5b9..30ae3cda6d2e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
@@ -29,7 +29,7 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl,
 			last_htbl = tbl->rx.s_anchor;
 
 		tbl->rx.default_icm_addr = action ?
-			action->dest_tbl.tbl->rx.s_anchor->chunk->icm_addr :
+			action->dest_tbl->tbl->rx.s_anchor->chunk->icm_addr :
 			tbl->rx.nic_dmn->default_icm_addr;
 
 		info.type = CONNECT_MISS;
@@ -53,7 +53,7 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl,
 			last_htbl = tbl->tx.s_anchor;
 
 		tbl->tx.default_icm_addr = action ?
-			action->dest_tbl.tbl->tx.s_anchor->chunk->icm_addr :
+			action->dest_tbl->tbl->tx.s_anchor->chunk->icm_addr :
 			tbl->tx.nic_dmn->default_icm_addr;
 
 		info.type = CONNECT_MISS;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 4af0e4e6a13c..462673947f3c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -806,53 +806,71 @@ struct mlx5dr_ste_action_modify_field {
 	u8 l4_type;
 };
 
+struct mlx5dr_action_rewrite {
+	struct mlx5dr_domain *dmn;
+	struct mlx5dr_icm_chunk *chunk;
+	u8 *data;
+	u16 num_of_actions;
+	u32 index;
+	u8 allow_rx:1;
+	u8 allow_tx:1;
+	u8 modify_ttl:1;
+};
+
+struct mlx5dr_action_reformat {
+	struct mlx5dr_domain *dmn;
+	u32 reformat_id;
+	u32 reformat_size;
+};
+
+struct mlx5dr_action_dest_tbl {
+	u8 is_fw_tbl:1;
+	union {
+		struct mlx5dr_table *tbl;
+		struct {
+			struct mlx5dr_domain *dmn;
+			u32 id;
+			u32 group_id;
+			enum fs_flow_table_type type;
+			u64 rx_icm_addr;
+			u64 tx_icm_addr;
+			struct mlx5dr_action **ref_actions;
+			u32 num_of_ref_actions;
+		} fw_tbl;
+	};
+};
+
+struct mlx5dr_action_ctr {
+	u32 ctr_id;
+	u32 offeset;
+};
+
+struct mlx5dr_action_vport {
+	struct mlx5dr_domain *dmn;
+	struct mlx5dr_cmd_vport_cap *caps;
+};
+
+struct mlx5dr_action_push_vlan {
+	u32 vlan_hdr; /* tpid_pcp_dei_vid */
+};
+
+struct mlx5dr_action_flow_tag {
+	u32 flow_tag;
+};
+
 struct mlx5dr_action {
 	enum mlx5dr_action_type action_type;
 	refcount_t refcount;
+
 	union {
-		struct {
-			struct mlx5dr_domain *dmn;
-			struct mlx5dr_icm_chunk *chunk;
-			u8 *data;
-			u16 num_of_actions;
-			u32 index;
-			u8 allow_rx:1;
-			u8 allow_tx:1;
-			u8 modify_ttl:1;
-		} rewrite;
-		struct {
-			struct mlx5dr_domain *dmn;
-			u32 reformat_id;
-			u32 reformat_size;
-		} reformat;
-		struct {
-			u8 is_fw_tbl:1;
-			union {
-				struct mlx5dr_table *tbl;
-				struct {
-					struct mlx5dr_domain *dmn;
-					u32 id;
-					u32 group_id;
-					enum fs_flow_table_type type;
-					u64 rx_icm_addr;
-					u64 tx_icm_addr;
-					struct mlx5dr_action **ref_actions;
-					u32 num_of_ref_actions;
-				} fw_tbl;
-			};
-		} dest_tbl;
-		struct {
-			u32 ctr_id;
-			u32 offeset;
-		} ctr;
-		struct {
-			struct mlx5dr_domain *dmn;
-			struct mlx5dr_cmd_vport_cap *caps;
-		} vport;
-		struct {
-			u32 vlan_hdr; /* tpid_pcp_dei_vid */
-		} push_vlan;
-		u32 flow_tag;
+		void *data;
+		struct mlx5dr_action_rewrite *rewrite;
+		struct mlx5dr_action_reformat *reformat;
+		struct mlx5dr_action_dest_tbl *dest_tbl;
+		struct mlx5dr_action_ctr *ctr;
+		struct mlx5dr_action_vport *vport;
+		struct mlx5dr_action_push_vlan *push_vlan;
+		struct mlx5dr_action_flow_tag *flow_tag;
 	};
 };
 
-- 
cgit v1.2.3


From b7f86258a26402e5dd6b492b1d523e1b1bde34ec Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Mon, 15 Mar 2021 10:22:51 +0200
Subject: net/mlx5: DR, Alloc cmd buffer with kvzalloc() instead of kzalloc()

The cmd size is 8K so use kvzalloc().

Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c  | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
index 30b0136b5bc7..461473d31e2e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
@@ -287,7 +287,7 @@ int mlx5dr_cmd_create_empty_flow_group(struct mlx5_core_dev *mdev,
 	u32 *in;
 	int err;
 
-	in = kzalloc(inlen, GFP_KERNEL);
+	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
@@ -302,7 +302,7 @@ int mlx5dr_cmd_create_empty_flow_group(struct mlx5_core_dev *mdev,
 	*group_id = MLX5_GET(create_flow_group_out, out, group_id);
 
 out:
-	kfree(in);
+	kvfree(in);
 	return err;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
index 24acced415d3..c1926d927008 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -406,7 +406,7 @@ static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn,
 		alloc_size = *num_stes * DR_STE_SIZE;
 	}
 
-	*data = kzalloc(alloc_size, GFP_KERNEL);
+	*data = kvzalloc(alloc_size, GFP_KERNEL);
 	if (!*data)
 		return -ENOMEM;
 
@@ -505,7 +505,7 @@ int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
 	}
 
 out_free:
-	kfree(data);
+	kvfree(data);
 	return ret;
 }
 
@@ -562,7 +562,7 @@ int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
 	}
 
 out_free:
-	kfree(data);
+	kvfree(data);
 	return ret;
 }
 
-- 
cgit v1.2.3


From 82c3ba31c370b6001cbf90689e98da1fb6f26aef Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 6 Apr 2021 17:53:46 +0100
Subject: net/mlx5: Fix bit-wise and with zero

The bit-wise and of the action field with MLX5_ACCEL_ESP_ACTION_DECRYPT
is incorrect as MLX5_ACCEL_ESP_ACTION_DECRYPT is zero and not intended
to be a bit-flag. Fix this by using the == operator as was originally
intended.

Addresses-Coverity: ("Logically dead code")
Fixes: 7dfee4b1d79e ("net/mlx5: IPsec, Refactor SA handle creation and destruction")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index d43a05e77f67..0b19293cdd74 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -850,7 +850,7 @@ mlx5_fpga_ipsec_release_sa_ctx(struct mlx5_fpga_ipsec_sa_ctx *sa_ctx)
 		return;
 	}
 
-	if (sa_ctx->fpga_xfrm->accel_xfrm.attrs.action &
+	if (sa_ctx->fpga_xfrm->accel_xfrm.attrs.action ==
 	    MLX5_ACCEL_ESP_ACTION_DECRYPT)
 		ida_free(&fipsec->halloc, sa_ctx->sa_handle);
 
-- 
cgit v1.2.3


From 02f47c04c36cdc0e002fbe7cc12fbb7c1d8fc56f Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Thu, 1 Apr 2021 21:07:41 +0800
Subject: net/mlx5: Add a blank line after declarations

There should be a blank lines after declarations.

Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c    | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index d5b1eb74d5e5..5cd466ec6492 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -392,11 +392,11 @@ static void arfs_may_expire_flow(struct mlx5e_priv *priv)
 {
 	struct arfs_rule *arfs_rule;
 	struct hlist_node *htmp;
+	HLIST_HEAD(del_list);
 	int quota = 0;
 	int i;
 	int j;
 
-	HLIST_HEAD(del_list);
 	spin_lock_bh(&priv->fs.arfs->arfs_lock);
 	mlx5e_for_each_arfs_rule(arfs_rule, htmp, priv->fs.arfs->arfs_tables, i, j) {
 		if (!work_pending(&arfs_rule->arfs_work) &&
@@ -422,10 +422,10 @@ static void arfs_del_rules(struct mlx5e_priv *priv)
 {
 	struct hlist_node *htmp;
 	struct arfs_rule *rule;
+	HLIST_HEAD(del_list);
 	int i;
 	int j;
 
-	HLIST_HEAD(del_list);
 	spin_lock_bh(&priv->fs.arfs->arfs_lock);
 	mlx5e_for_each_arfs_rule(rule, htmp, priv->fs.arfs->arfs_tables, i, j) {
 		hlist_del_init(&rule->hlist);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index 0b19293cdd74..0bba92cf5dc0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -1085,6 +1085,7 @@ static int fpga_ipsec_fs_create_fte(struct mlx5_flow_root_namespace *ns,
 	rule->ctx = mlx5_fpga_ipsec_fs_create_sa_ctx(dev, fte, is_egress);
 	if (IS_ERR(rule->ctx)) {
 		int err = PTR_ERR(rule->ctx);
+
 		kfree(rule);
 		return err;
 	}
-- 
cgit v1.2.3


From 9dee115bc1478b6a51f664defbc5b091985a3fd3 Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Thu, 1 Apr 2021 21:07:42 +0800
Subject: net/mlx5: Remove return statement exist at the end of void function

void function return statements are not generally useful.

Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lag.c     | 2 --
 drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 1 -
 drivers/net/ethernet/mellanox/mlx5/core/rdma.c    | 1 -
 3 files changed, 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index 127bb92da150..b8748390335f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -603,8 +603,6 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
 	if (err)
 		mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
 			      err);
-
-	return;
 }
 
 /* Must be called with intf_mutex held */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 19e3e978267e..1f907df5b3a2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -167,7 +167,6 @@ static void irq_set_name(char *name, int vecidx)
 
 	snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d",
 		 vecidx - MLX5_IRQ_VEC_COMP_BASE);
-	return;
 }
 
 static int request_irqs(struct mlx5_core_dev *dev, int nvec)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
index 8e0dddc6383f..441b5453acae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
@@ -180,5 +180,4 @@ del_roce_addr:
 	mlx5_rdma_del_roce_addr(dev);
 disable_roce:
 	mlx5_nic_vport_disable_roce(dev);
-	return;
 }
-- 
cgit v1.2.3


From 31450b435fe660c4d5ed25efcde5783952942f0d Mon Sep 17 00:00:00 2001
From: Wenpeng Liang <liangwenpeng@huawei.com>
Date: Thu, 1 Apr 2021 21:07:43 +0800
Subject: net/mlx5: Replace spaces with tab at the start of a line

There should be no spaces at the start of the line.

Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index e58ef8c713e4..34eb1118670f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -52,7 +52,7 @@
 #include "diag/en_rep_tracepoint.h"
 
 #define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \
-        max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
+	max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
 #define MLX5E_REP_PARAMS_DEF_NUM_CHANNELS 1
 
 static const char mlx5e_rep_driver_name[] = "mlx5e_rep";
-- 
cgit v1.2.3


From 5b232ea94c90aa6196321820740e2969ae64e9cb Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Wed, 7 Apr 2021 14:09:05 +0300
Subject: net/mlx5e: Fix RQ creation flow for queues which doesn't support XDP

Allow to create an RQ which is not registered as an XDP RQ. For example:
the trap-RQ doesn't register as an XDP RQ.

Fixes: 869c5f926247 ("net/mlx5e: Generalize open RQ")
Signed-off-by: Aya Levin <ayal@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 2f47608bb9b9..6847e7b909a5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -510,8 +510,9 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
 			rq->page_pool = NULL;
 			goto err_free_by_rq_type;
 		}
-		err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
-						 MEM_TYPE_PAGE_POOL, rq->page_pool);
+		if (xdp_rxq_info_is_reg(&rq->xdp_rxq))
+			err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
+							 MEM_TYPE_PAGE_POOL, rq->page_pool);
 	}
 	if (err)
 		goto err_free_by_rq_type;
-- 
cgit v1.2.3


From 58581478a734b07eb27032299fcb36caa4f9988b Mon Sep 17 00:00:00 2001
From: Ivan Bornyakov <i.bornyakov@metrotek.ru>
Date: Tue, 13 Apr 2021 23:54:50 +0300
Subject: net: phy: marvell-88x2222: check that link is operational

Some SFP modules uses RX_LOS for link indication. In such cases link
will be always up, even without cable connected. RX_LOS changes will
trigger link_up()/link_down() upstream operations. Thus, check that SFP
link is operational before actual read link status.

If there is no SFP cage connected to the tranciever, check only PMD
Recieve Signal Detect register.

Signed-off-by: Ivan Bornyakov <i.bornyakov@metrotek.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell-88x2222.c | 43 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/drivers/net/phy/marvell-88x2222.c b/drivers/net/phy/marvell-88x2222.c
index eca8c2f20684..28fe520228a4 100644
--- a/drivers/net/phy/marvell-88x2222.c
+++ b/drivers/net/phy/marvell-88x2222.c
@@ -32,6 +32,10 @@
 #define	MV_HOST_RST_SW	BIT(7)
 #define	MV_PORT_RST_SW	(MV_LINE_RST_SW | MV_HOST_RST_SW)
 
+/* PMD Receive Signal Detect */
+#define	MV_RX_SIGNAL_DETECT		0x000A
+#define	MV_RX_SIGNAL_DETECT_GLOBAL	BIT(0)
+
 /* 1000Base-X/SGMII Control Register */
 #define	MV_1GBX_CTRL		(0x2000 + MII_BMCR)
 
@@ -51,6 +55,7 @@
 struct mv2222_data {
 	phy_interface_t line_interface;
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
+	bool sfp_link;
 };
 
 /* SFI PMA transmit enable */
@@ -139,6 +144,21 @@ static int mv2222_read_status_1g(struct phy_device *phydev)
 	return link;
 }
 
+static bool mv2222_link_is_operational(struct phy_device *phydev)
+{
+	struct mv2222_data *priv = phydev->priv;
+	int val;
+
+	val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MV_RX_SIGNAL_DETECT);
+	if (val < 0 || !(val & MV_RX_SIGNAL_DETECT_GLOBAL))
+		return false;
+
+	if (phydev->sfp_bus && !priv->sfp_link)
+		return false;
+
+	return true;
+}
+
 static int mv2222_read_status(struct phy_device *phydev)
 {
 	struct mv2222_data *priv = phydev->priv;
@@ -148,6 +168,9 @@ static int mv2222_read_status(struct phy_device *phydev)
 	phydev->speed = SPEED_UNKNOWN;
 	phydev->duplex = DUPLEX_UNKNOWN;
 
+	if (!mv2222_link_is_operational(phydev))
+		return 0;
+
 	if (priv->line_interface == PHY_INTERFACE_MODE_10GBASER)
 		link = mv2222_read_status_10g(phydev);
 	else
@@ -446,9 +469,29 @@ static void mv2222_sfp_remove(void *upstream)
 	linkmode_zero(priv->supported);
 }
 
+static void mv2222_sfp_link_up(void *upstream)
+{
+	struct phy_device *phydev = upstream;
+	struct mv2222_data *priv;
+
+	priv = phydev->priv;
+	priv->sfp_link = true;
+}
+
+static void mv2222_sfp_link_down(void *upstream)
+{
+	struct phy_device *phydev = upstream;
+	struct mv2222_data *priv;
+
+	priv = phydev->priv;
+	priv->sfp_link = false;
+}
+
 static const struct sfp_upstream_ops sfp_phy_ops = {
 	.module_insert = mv2222_sfp_insert,
 	.module_remove = mv2222_sfp_remove,
+	.link_up = mv2222_sfp_link_up,
+	.link_down = mv2222_sfp_link_down,
 	.attach = phy_sfp_attach,
 	.detach = phy_sfp_detach,
 };
-- 
cgit v1.2.3


From 473960a7b4434dbda6f628eb9e29e989f730343f Mon Sep 17 00:00:00 2001
From: Ivan Bornyakov <i.bornyakov@metrotek.ru>
Date: Tue, 13 Apr 2021 23:54:51 +0300
Subject: net: phy: marvell-88x2222: move read_status after config_aneg

No functional changes, just move read link status routines below
autonegotiation configuration to make future functional changes more
distinct.

Signed-off-by: Ivan Bornyakov <i.bornyakov@metrotek.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell-88x2222.c | 196 +++++++++++++++++++-------------------
 1 file changed, 98 insertions(+), 98 deletions(-)

diff --git a/drivers/net/phy/marvell-88x2222.c b/drivers/net/phy/marvell-88x2222.c
index 28fe520228a4..640b133f1371 100644
--- a/drivers/net/phy/marvell-88x2222.c
+++ b/drivers/net/phy/marvell-88x2222.c
@@ -86,104 +86,6 @@ static int mv2222_soft_reset(struct phy_device *phydev)
 					 5000, 1000000, true);
 }
 
-/* Returns negative on error, 0 if link is down, 1 if link is up */
-static int mv2222_read_status_10g(struct phy_device *phydev)
-{
-	int val, link = 0;
-
-	val = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_STAT1);
-	if (val < 0)
-		return val;
-
-	if (val & MDIO_STAT1_LSTATUS) {
-		link = 1;
-
-		/* 10GBASE-R do not support auto-negotiation */
-		phydev->autoneg = AUTONEG_DISABLE;
-		phydev->speed = SPEED_10000;
-		phydev->duplex = DUPLEX_FULL;
-	}
-
-	return link;
-}
-
-/* Returns negative on error, 0 if link is down, 1 if link is up */
-static int mv2222_read_status_1g(struct phy_device *phydev)
-{
-	int val, link = 0;
-
-	val = phy_read_mmd(phydev, MDIO_MMD_PCS, MV_1GBX_STAT);
-	if (val < 0)
-		return val;
-
-	if (!(val & BMSR_LSTATUS) ||
-	    (phydev->autoneg == AUTONEG_ENABLE &&
-	     !(val & BMSR_ANEGCOMPLETE)))
-		return 0;
-
-	link = 1;
-
-	val = phy_read_mmd(phydev, MDIO_MMD_PCS, MV_1GBX_PHY_STAT);
-	if (val < 0)
-		return val;
-
-	if (val & MV_1GBX_PHY_STAT_AN_RESOLVED) {
-		if (val & MV_1GBX_PHY_STAT_DUPLEX)
-			phydev->duplex = DUPLEX_FULL;
-		else
-			phydev->duplex = DUPLEX_HALF;
-
-		if (val & MV_1GBX_PHY_STAT_SPEED1000)
-			phydev->speed = SPEED_1000;
-		else if (val & MV_1GBX_PHY_STAT_SPEED100)
-			phydev->speed = SPEED_100;
-		else
-			phydev->speed = SPEED_10;
-	}
-
-	return link;
-}
-
-static bool mv2222_link_is_operational(struct phy_device *phydev)
-{
-	struct mv2222_data *priv = phydev->priv;
-	int val;
-
-	val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MV_RX_SIGNAL_DETECT);
-	if (val < 0 || !(val & MV_RX_SIGNAL_DETECT_GLOBAL))
-		return false;
-
-	if (phydev->sfp_bus && !priv->sfp_link)
-		return false;
-
-	return true;
-}
-
-static int mv2222_read_status(struct phy_device *phydev)
-{
-	struct mv2222_data *priv = phydev->priv;
-	int link;
-
-	phydev->link = 0;
-	phydev->speed = SPEED_UNKNOWN;
-	phydev->duplex = DUPLEX_UNKNOWN;
-
-	if (!mv2222_link_is_operational(phydev))
-		return 0;
-
-	if (priv->line_interface == PHY_INTERFACE_MODE_10GBASER)
-		link = mv2222_read_status_10g(phydev);
-	else
-		link = mv2222_read_status_1g(phydev);
-
-	if (link < 0)
-		return link;
-
-	phydev->link = link;
-
-	return 0;
-}
-
 static int mv2222_disable_aneg(struct phy_device *phydev)
 {
 	int ret = phy_clear_bits_mmd(phydev, MDIO_MMD_PCS, MV_1GBX_CTRL,
@@ -386,6 +288,104 @@ static int mv2222_aneg_done(struct phy_device *phydev)
 	return (ret & BMSR_ANEGCOMPLETE);
 }
 
+/* Returns negative on error, 0 if link is down, 1 if link is up */
+static int mv2222_read_status_10g(struct phy_device *phydev)
+{
+	int val, link = 0;
+
+	val = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_STAT1);
+	if (val < 0)
+		return val;
+
+	if (val & MDIO_STAT1_LSTATUS) {
+		link = 1;
+
+		/* 10GBASE-R do not support auto-negotiation */
+		phydev->autoneg = AUTONEG_DISABLE;
+		phydev->speed = SPEED_10000;
+		phydev->duplex = DUPLEX_FULL;
+	}
+
+	return link;
+}
+
+/* Returns negative on error, 0 if link is down, 1 if link is up */
+static int mv2222_read_status_1g(struct phy_device *phydev)
+{
+	int val, link = 0;
+
+	val = phy_read_mmd(phydev, MDIO_MMD_PCS, MV_1GBX_STAT);
+	if (val < 0)
+		return val;
+
+	if (!(val & BMSR_LSTATUS) ||
+	    (phydev->autoneg == AUTONEG_ENABLE &&
+	     !(val & BMSR_ANEGCOMPLETE)))
+		return 0;
+
+	link = 1;
+
+	val = phy_read_mmd(phydev, MDIO_MMD_PCS, MV_1GBX_PHY_STAT);
+	if (val < 0)
+		return val;
+
+	if (val & MV_1GBX_PHY_STAT_AN_RESOLVED) {
+		if (val & MV_1GBX_PHY_STAT_DUPLEX)
+			phydev->duplex = DUPLEX_FULL;
+		else
+			phydev->duplex = DUPLEX_HALF;
+
+		if (val & MV_1GBX_PHY_STAT_SPEED1000)
+			phydev->speed = SPEED_1000;
+		else if (val & MV_1GBX_PHY_STAT_SPEED100)
+			phydev->speed = SPEED_100;
+		else
+			phydev->speed = SPEED_10;
+	}
+
+	return link;
+}
+
+static bool mv2222_link_is_operational(struct phy_device *phydev)
+{
+	struct mv2222_data *priv = phydev->priv;
+	int val;
+
+	val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MV_RX_SIGNAL_DETECT);
+	if (val < 0 || !(val & MV_RX_SIGNAL_DETECT_GLOBAL))
+		return false;
+
+	if (phydev->sfp_bus && !priv->sfp_link)
+		return false;
+
+	return true;
+}
+
+static int mv2222_read_status(struct phy_device *phydev)
+{
+	struct mv2222_data *priv = phydev->priv;
+	int link;
+
+	phydev->link = 0;
+	phydev->speed = SPEED_UNKNOWN;
+	phydev->duplex = DUPLEX_UNKNOWN;
+
+	if (!mv2222_link_is_operational(phydev))
+		return 0;
+
+	if (priv->line_interface == PHY_INTERFACE_MODE_10GBASER)
+		link = mv2222_read_status_10g(phydev);
+	else
+		link = mv2222_read_status_1g(phydev);
+
+	if (link < 0)
+		return link;
+
+	phydev->link = link;
+
+	return 0;
+}
+
 static int mv2222_resume(struct phy_device *phydev)
 {
 	return mv2222_tx_enable(phydev);
-- 
cgit v1.2.3


From d7029f55cc46066d833cbf7f532b1ae8d6835859 Mon Sep 17 00:00:00 2001
From: Ivan Bornyakov <i.bornyakov@metrotek.ru>
Date: Tue, 13 Apr 2021 23:54:52 +0300
Subject: net: phy: marvell-88x2222: swap 1G/10G modes on autoneg

Setting 10G without autonegotiation is invalid according to
phy_ethtool_ksettings_set(). Thus, we need to set it during
autonegotiation.

If 1G autonegotiation can't complete for quite a time, but there is
signal in line, switch line interface type to 10GBase-R, if supported,
in hope for link to be established.

And vice versa. If 10GBase-R link can't be established for quite a time,
and autonegotiation is enabled, and there is signal in line, switch line
interface type to appropriate 1G mode, i.e. 1000Base-X or SGMII, if
supported.

Signed-off-by: Ivan Bornyakov <i.bornyakov@metrotek.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell-88x2222.c | 117 +++++++++++++++++++++++++++++---------
 1 file changed, 89 insertions(+), 28 deletions(-)

diff --git a/drivers/net/phy/marvell-88x2222.c b/drivers/net/phy/marvell-88x2222.c
index 640b133f1371..9b9ac3ef735d 100644
--- a/drivers/net/phy/marvell-88x2222.c
+++ b/drivers/net/phy/marvell-88x2222.c
@@ -52,6 +52,8 @@
 #define	MV_1GBX_PHY_STAT_SPEED100	BIT(14)
 #define	MV_1GBX_PHY_STAT_SPEED1000	BIT(15)
 
+#define	AUTONEG_TIMEOUT	3
+
 struct mv2222_data {
 	phy_interface_t line_interface;
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
@@ -173,6 +175,24 @@ static bool mv2222_is_1gbx_capable(struct phy_device *phydev)
 				 priv->supported);
 }
 
+static bool mv2222_is_sgmii_capable(struct phy_device *phydev)
+{
+	struct mv2222_data *priv = phydev->priv;
+
+	return (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+				  priv->supported) ||
+		linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
+				  priv->supported) ||
+		linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+				  priv->supported) ||
+		linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT,
+				  priv->supported) ||
+		linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT,
+				  priv->supported) ||
+		linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT,
+				  priv->supported));
+}
+
 static int mv2222_config_line(struct phy_device *phydev)
 {
 	struct mv2222_data *priv = phydev->priv;
@@ -192,7 +212,8 @@ static int mv2222_config_line(struct phy_device *phydev)
 	}
 }
 
-static int mv2222_setup_forced(struct phy_device *phydev)
+/* Switch between 1G (1000Base-X/SGMII) and 10G (10GBase-R) modes */
+static int mv2222_swap_line_type(struct phy_device *phydev)
 {
 	struct mv2222_data *priv = phydev->priv;
 	bool changed = false;
@@ -200,25 +221,23 @@ static int mv2222_setup_forced(struct phy_device *phydev)
 
 	switch (priv->line_interface) {
 	case PHY_INTERFACE_MODE_10GBASER:
-		if (phydev->speed == SPEED_1000 &&
-		    mv2222_is_1gbx_capable(phydev)) {
+		if (mv2222_is_1gbx_capable(phydev)) {
 			priv->line_interface = PHY_INTERFACE_MODE_1000BASEX;
 			changed = true;
 		}
 
-		break;
-	case PHY_INTERFACE_MODE_1000BASEX:
-		if (phydev->speed == SPEED_10000 &&
-		    mv2222_is_10g_capable(phydev)) {
-			priv->line_interface = PHY_INTERFACE_MODE_10GBASER;
+		if (mv2222_is_sgmii_capable(phydev)) {
+			priv->line_interface = PHY_INTERFACE_MODE_SGMII;
 			changed = true;
 		}
 
 		break;
+	case PHY_INTERFACE_MODE_1000BASEX:
 	case PHY_INTERFACE_MODE_SGMII:
-		ret = mv2222_set_sgmii_speed(phydev);
-		if (ret < 0)
-			return ret;
+		if (mv2222_is_10g_capable(phydev)) {
+			priv->line_interface = PHY_INTERFACE_MODE_10GBASER;
+			changed = true;
+		}
 
 		break;
 	default:
@@ -231,6 +250,29 @@ static int mv2222_setup_forced(struct phy_device *phydev)
 			return ret;
 	}
 
+	return 0;
+}
+
+static int mv2222_setup_forced(struct phy_device *phydev)
+{
+	struct mv2222_data *priv = phydev->priv;
+	int ret;
+
+	if (priv->line_interface == PHY_INTERFACE_MODE_10GBASER) {
+		if (phydev->speed < SPEED_10000 &&
+		    phydev->speed != SPEED_UNKNOWN) {
+			ret = mv2222_swap_line_type(phydev);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	if (priv->line_interface == PHY_INTERFACE_MODE_SGMII) {
+		ret = mv2222_set_sgmii_speed(phydev);
+		if (ret < 0)
+			return ret;
+	}
+
 	return mv2222_disable_aneg(phydev);
 }
 
@@ -244,17 +286,9 @@ static int mv2222_config_aneg(struct phy_device *phydev)
 		return 0;
 
 	if (phydev->autoneg == AUTONEG_DISABLE ||
-	    phydev->speed == SPEED_10000)
+	    priv->line_interface == PHY_INTERFACE_MODE_10GBASER)
 		return mv2222_setup_forced(phydev);
 
-	if (priv->line_interface == PHY_INTERFACE_MODE_10GBASER &&
-	    mv2222_is_1gbx_capable(phydev)) {
-		priv->line_interface = PHY_INTERFACE_MODE_1000BASEX;
-		ret = mv2222_config_line(phydev);
-		if (ret < 0)
-			return ret;
-	}
-
 	adv = linkmode_adv_to_mii_adv_x(priv->supported,
 					ETHTOOL_LINK_MODE_1000baseX_Full_BIT);
 
@@ -291,6 +325,7 @@ static int mv2222_aneg_done(struct phy_device *phydev)
 /* Returns negative on error, 0 if link is down, 1 if link is up */
 static int mv2222_read_status_10g(struct phy_device *phydev)
 {
+	static int timeout;
 	int val, link = 0;
 
 	val = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_STAT1);
@@ -304,6 +339,20 @@ static int mv2222_read_status_10g(struct phy_device *phydev)
 		phydev->autoneg = AUTONEG_DISABLE;
 		phydev->speed = SPEED_10000;
 		phydev->duplex = DUPLEX_FULL;
+	} else {
+		if (phydev->autoneg == AUTONEG_ENABLE) {
+			timeout++;
+
+			if (timeout > AUTONEG_TIMEOUT) {
+				timeout = 0;
+
+				val = mv2222_swap_line_type(phydev);
+				if (val < 0)
+					return val;
+
+				return mv2222_config_aneg(phydev);
+			}
+		}
 	}
 
 	return link;
@@ -312,15 +361,31 @@ static int mv2222_read_status_10g(struct phy_device *phydev)
 /* Returns negative on error, 0 if link is down, 1 if link is up */
 static int mv2222_read_status_1g(struct phy_device *phydev)
 {
+	static int timeout;
 	int val, link = 0;
 
 	val = phy_read_mmd(phydev, MDIO_MMD_PCS, MV_1GBX_STAT);
 	if (val < 0)
 		return val;
 
-	if (!(val & BMSR_LSTATUS) ||
-	    (phydev->autoneg == AUTONEG_ENABLE &&
-	     !(val & BMSR_ANEGCOMPLETE)))
+	if (phydev->autoneg == AUTONEG_ENABLE &&
+	    !(val & BMSR_ANEGCOMPLETE)) {
+		timeout++;
+
+		if (timeout > AUTONEG_TIMEOUT) {
+			timeout = 0;
+
+			val = mv2222_swap_line_type(phydev);
+			if (val < 0)
+				return val;
+
+			return mv2222_config_aneg(phydev);
+		}
+
+		return 0;
+	}
+
+	if (!(val & BMSR_LSTATUS))
 		return 0;
 
 	link = 1;
@@ -447,11 +512,7 @@ static int mv2222_sfp_insert(void *upstream, const struct sfp_eeprom_id *id)
 		return ret;
 
 	if (mutex_trylock(&phydev->lock)) {
-		if (priv->line_interface == PHY_INTERFACE_MODE_10GBASER)
-			ret = mv2222_setup_forced(phydev);
-		else
-			ret = mv2222_config_aneg(phydev);
-
+		ret = mv2222_config_aneg(phydev);
 		mutex_unlock(&phydev->lock);
 	}
 
-- 
cgit v1.2.3


From f4da56529da602010979e8497d1f02eaf5df8883 Mon Sep 17 00:00:00 2001
From: Tan Tee Min <tee.min.tan@intel.com>
Date: Wed, 14 Apr 2021 08:16:17 +0800
Subject: net: stmmac: Add support for external trigger timestamping

The Synopsis MAC controller supports auxiliary snapshot feature that
allows user to store a snapshot of the system time based on an external
event.

This patch add supports to the above mentioned feature. Users will be
able to triggered capturing the time snapshot from user-space using
application such as testptp or any other applications that uses the
PTP_EXTTS_REQUEST ioctl request.

Cc: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Tan Tee Min <tee.min.tan@intel.com>
Co-developed-by: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Signed-off-by: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c  | 10 ++++++
 drivers/net/ethernet/stmicro/stmmac/hwif.h         |  5 +++
 drivers/net/ethernet/stmicro/stmmac/stmmac.h       |  3 ++
 .../net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c  | 39 +++++++++++++++++++++
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |  2 ++
 drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c   | 40 +++++++++++++++++++++-
 drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h   |  1 +
 include/linux/stmmac.h                             |  2 ++
 8 files changed, 101 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 60566598d644..ec140fc4a0f5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -296,6 +296,13 @@ static int intel_crosststamp(ktime_t *device,
 
 	intel_priv = priv->plat->bsp_priv;
 
+	/* Both internal crosstimestamping and external triggered event
+	 * timestamping cannot be run concurrently.
+	 */
+	if (priv->plat->ext_snapshot_en)
+		return -EBUSY;
+
+	mutex_lock(&priv->aux_ts_lock);
 	/* Enable Internal snapshot trigger */
 	acr_value = readl(ptpaddr + PTP_ACR);
 	acr_value &= ~PTP_ACR_MASK;
@@ -321,6 +328,8 @@ static int intel_crosststamp(ktime_t *device,
 	acr_value = readl(ptpaddr + PTP_ACR);
 	acr_value |= PTP_ACR_ATSFC;
 	writel(acr_value, ptpaddr + PTP_ACR);
+	/* Release the mutex */
+	mutex_unlock(&priv->aux_ts_lock);
 
 	/* Trigger Internal snapshot signal
 	 * Create a rising edge by just toggle the GPO1 to low
@@ -520,6 +529,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
 	plat->mdio_bus_data->phy_mask |= 1 << INTEL_MGBE_XPCS_ADDR;
 
 	plat->int_snapshot_num = AUX_SNAPSHOT1;
+	plat->ext_snapshot_num = AUX_SNAPSHOT0;
 
 	plat->has_crossts = true;
 	plat->crosststamp = intel_crosststamp;
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 2b5022ef1e52..2cc91759b91f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -504,6 +504,8 @@ struct stmmac_ops {
 #define stmmac_fpe_irq_status(__priv, __args...) \
 	stmmac_do_callback(__priv, mac, fpe_irq_status, __args)
 
+struct stmmac_priv;
+
 /* PTP and HW Timer helpers */
 struct stmmac_hwtimestamp {
 	void (*config_hw_tstamping) (void __iomem *ioaddr, u32 data);
@@ -515,6 +517,7 @@ struct stmmac_hwtimestamp {
 			       int add_sub, int gmac4);
 	void (*get_systime) (void __iomem *ioaddr, u64 *systime);
 	void (*get_ptptime)(void __iomem *ioaddr, u64 *ptp_time);
+	void (*timestamp_interrupt)(struct stmmac_priv *priv);
 };
 
 #define stmmac_config_hw_tstamping(__priv, __args...) \
@@ -531,6 +534,8 @@ struct stmmac_hwtimestamp {
 	stmmac_do_void_callback(__priv, ptp, get_systime, __args)
 #define stmmac_get_ptptime(__priv, __args...) \
 	stmmac_do_void_callback(__priv, ptp, get_ptptime, __args)
+#define stmmac_timestamp_interrupt(__priv, __args...) \
+	stmmac_do_void_callback(__priv, ptp, timestamp_interrupt, __args)
 
 /* Helpers to manage the descriptors for chain and ring modes */
 struct stmmac_mode_ops {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index b8a42260066d..b6cd43eda7ac 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -250,6 +250,9 @@ struct stmmac_priv {
 	int use_riwt;
 	int irq_wake;
 	spinlock_t ptp_lock;
+	/* Protects auxiliary snapshot registers from concurrent access. */
+	struct mutex aux_ts_lock;
+
 	void __iomem *mmcaddr;
 	void __iomem *ptpaddr;
 	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
index 113c51bcc0b5..074e2cdfb0fa 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
@@ -12,8 +12,11 @@
 #include <linux/io.h>
 #include <linux/iopoll.h>
 #include <linux/delay.h>
+#include <linux/ptp_clock_kernel.h>
 #include "common.h"
 #include "stmmac_ptp.h"
+#include "dwmac4.h"
+#include "stmmac.h"
 
 static void config_hw_tstamping(void __iomem *ioaddr, u32 data)
 {
@@ -163,6 +166,41 @@ static void get_ptptime(void __iomem *ptpaddr, u64 *ptp_time)
 	*ptp_time = ns;
 }
 
+static void timestamp_interrupt(struct stmmac_priv *priv)
+{
+	u32 num_snapshot, ts_status, tsync_int;
+	struct ptp_clock_event event;
+	unsigned long flags;
+	u64 ptp_time;
+	int i;
+
+	tsync_int = readl(priv->ioaddr + GMAC_INT_STATUS) & GMAC_INT_TSIE;
+
+	if (!tsync_int)
+		return;
+
+	/* Read timestamp status to clear interrupt from either external
+	 * timestamp or start/end of PPS.
+	 */
+	ts_status = readl(priv->ioaddr + GMAC_TIMESTAMP_STATUS);
+
+	if (!priv->plat->ext_snapshot_en)
+		return;
+
+	num_snapshot = (ts_status & GMAC_TIMESTAMP_ATSNS_MASK) >>
+		       GMAC_TIMESTAMP_ATSNS_SHIFT;
+
+	for (i = 0; i < num_snapshot; i++) {
+		spin_lock_irqsave(&priv->ptp_lock, flags);
+		get_ptptime(priv->ptpaddr, &ptp_time);
+		spin_unlock_irqrestore(&priv->ptp_lock, flags);
+		event.type = PTP_CLOCK_EXTTS;
+		event.index = 0;
+		event.timestamp = ptp_time;
+		ptp_clock_event(priv->ptp_clock, &event);
+	}
+}
+
 const struct stmmac_hwtimestamp stmmac_ptp = {
 	.config_hw_tstamping = config_hw_tstamping,
 	.init_systime = init_systime,
@@ -171,4 +209,5 @@ const struct stmmac_hwtimestamp stmmac_ptp = {
 	.adjust_systime = adjust_systime,
 	.get_systime = get_systime,
 	.get_ptptime = get_ptptime,
+	.timestamp_interrupt = timestamp_interrupt,
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index e3e22200a4fd..3a5ca5833ce1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -5687,6 +5687,8 @@ static void stmmac_common_interrupt(struct stmmac_priv *priv)
 			else
 				netif_carrier_off(priv->dev);
 		}
+
+		stmmac_timestamp_interrupt(priv, priv);
 	}
 }
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index b164ae22e35f..4e86cdf2bc9f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -135,7 +135,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
 {
 	struct stmmac_priv *priv =
 	    container_of(ptp, struct stmmac_priv, ptp_clock_ops);
+	void __iomem *ptpaddr = priv->ptpaddr;
+	void __iomem *ioaddr = priv->hw->pcsr;
 	struct stmmac_pps_cfg *cfg;
+	u32 intr_value, acr_value;
 	int ret = -EOPNOTSUPP;
 	unsigned long flags;
 
@@ -159,6 +162,37 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
 					     priv->systime_flags);
 		spin_unlock_irqrestore(&priv->ptp_lock, flags);
 		break;
+	case PTP_CLK_REQ_EXTTS:
+		priv->plat->ext_snapshot_en = on;
+		mutex_lock(&priv->aux_ts_lock);
+		acr_value = readl(ptpaddr + PTP_ACR);
+		acr_value &= ~PTP_ACR_MASK;
+		if (on) {
+			/* Enable External snapshot trigger */
+			acr_value |= priv->plat->ext_snapshot_num;
+			acr_value |= PTP_ACR_ATSFC;
+			netdev_dbg(priv->dev, "Auxiliary Snapshot %d enabled.\n",
+				   priv->plat->ext_snapshot_num >>
+				   PTP_ACR_ATSEN_SHIFT);
+			/* Enable Timestamp Interrupt */
+			intr_value = readl(ioaddr + GMAC_INT_EN);
+			intr_value |= GMAC_INT_TSIE;
+			writel(intr_value, ioaddr + GMAC_INT_EN);
+
+		} else {
+			netdev_dbg(priv->dev, "Auxiliary Snapshot %d disabled.\n",
+				   priv->plat->ext_snapshot_num >>
+				   PTP_ACR_ATSEN_SHIFT);
+			/* Disable Timestamp Interrupt */
+			intr_value = readl(ioaddr + GMAC_INT_EN);
+			intr_value &= ~GMAC_INT_TSIE;
+			writel(intr_value, ioaddr + GMAC_INT_EN);
+		}
+		writel(acr_value, ptpaddr + PTP_ACR);
+		mutex_unlock(&priv->aux_ts_lock);
+		ret = 0;
+		break;
+
 	default:
 		break;
 	}
@@ -202,7 +236,7 @@ static struct ptp_clock_info stmmac_ptp_clock_ops = {
 	.name = "stmmac ptp",
 	.max_adj = 62500000,
 	.n_alarm = 0,
-	.n_ext_ts = 0,
+	.n_ext_ts = 0, /* will be overwritten in stmmac_ptp_register */
 	.n_per_out = 0, /* will be overwritten in stmmac_ptp_register */
 	.n_pins = 0,
 	.pps = 0,
@@ -237,8 +271,10 @@ void stmmac_ptp_register(struct stmmac_priv *priv)
 		stmmac_ptp_clock_ops.max_adj = priv->plat->ptp_max_adj;
 
 	stmmac_ptp_clock_ops.n_per_out = priv->dma_cap.pps_out_num;
+	stmmac_ptp_clock_ops.n_ext_ts = priv->dma_cap.aux_snapshot_n;
 
 	spin_lock_init(&priv->ptp_lock);
+	mutex_init(&priv->aux_ts_lock);
 	priv->ptp_clock_ops = stmmac_ptp_clock_ops;
 
 	priv->ptp_clock = ptp_clock_register(&priv->ptp_clock_ops,
@@ -264,4 +300,6 @@ void stmmac_ptp_unregister(struct stmmac_priv *priv)
 		pr_debug("Removed PTP HW clock successfully on %s\n",
 			 priv->dev->name);
 	}
+
+	mutex_destroy(&priv->aux_ts_lock);
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h
index f88727ce4d30..53172a439810 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h
@@ -73,6 +73,7 @@
 #define	PTP_ACR_ATSEN1		BIT(5)	/* Auxiliary Snapshot 1 Enable */
 #define	PTP_ACR_ATSEN2		BIT(6)	/* Auxiliary Snapshot 2 Enable */
 #define	PTP_ACR_ATSEN3		BIT(7)	/* Auxiliary Snapshot 3 Enable */
+#define	PTP_ACR_ATSEN_SHIFT	5	/* Auxiliary Snapshot shift */
 #define	PTP_ACR_MASK		GENMASK(7, 4)	/* Aux Snapshot Mask */
 #define	PMC_ART_VALUE0		0x01	/* PMC_ART[15:0] timer value */
 #define	PMC_ART_VALUE1		0x02	/* PMC_ART[31:16] timer value */
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index e338ef7abc00..97edb31d6310 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -238,6 +238,8 @@ struct plat_stmmacenet_data {
 	struct pci_dev *pdev;
 	bool has_crossts;
 	int int_snapshot_num;
+	int ext_snapshot_num;
+	bool ext_snapshot_en;
 	bool multi_msi_en;
 	int msi_mac_vec;
 	int msi_wol_vec;
-- 
cgit v1.2.3


From 216f78ea8cf6fae5140aeb55657ebdab71a05502 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Wed, 14 Apr 2021 08:23:15 +0200
Subject: r8169: add support for pause ethtool ops

This adds support for the [g|s]et_pauseparam ethtool ops. It considers
that the chip doesn't support pause frame use in jumbo mode.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 85031b4721fa..98567731e2ba 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -1910,6 +1910,32 @@ static void rtl8169_get_ringparam(struct net_device *dev,
 	data->tx_pending = NUM_TX_DESC;
 }
 
+static void rtl8169_get_pauseparam(struct net_device *dev,
+				   struct ethtool_pauseparam *data)
+{
+	struct rtl8169_private *tp = netdev_priv(dev);
+	bool tx_pause, rx_pause;
+
+	phy_get_pause(tp->phydev, &tx_pause, &rx_pause);
+
+	data->autoneg = tp->phydev->autoneg;
+	data->tx_pause = tx_pause ? 1 : 0;
+	data->rx_pause = rx_pause ? 1 : 0;
+}
+
+static int rtl8169_set_pauseparam(struct net_device *dev,
+				  struct ethtool_pauseparam *data)
+{
+	struct rtl8169_private *tp = netdev_priv(dev);
+
+	if (dev->mtu > ETH_DATA_LEN)
+		return -EOPNOTSUPP;
+
+	phy_set_asym_pause(tp->phydev, data->rx_pause, data->tx_pause);
+
+	return 0;
+}
+
 static const struct ethtool_ops rtl8169_ethtool_ops = {
 	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
 				     ETHTOOL_COALESCE_MAX_FRAMES,
@@ -1931,6 +1957,8 @@ static const struct ethtool_ops rtl8169_ethtool_ops = {
 	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
 	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
 	.get_ringparam		= rtl8169_get_ringparam,
+	.get_pauseparam		= rtl8169_get_pauseparam,
+	.set_pauseparam		= rtl8169_set_pauseparam,
 };
 
 static void rtl_enable_eee(struct rtl8169_private *tp)
-- 
cgit v1.2.3


From 73d7de66aa3c76d90092649766278296042ba836 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Wed, 14 Apr 2021 15:13:39 +0800
Subject: atm: idt77252: remove unused function

Fix the following clang warning:

drivers/atm/idt77252.c:1787:1: warning: unused function
'idt77252_fbq_level' [-Wunused-function].

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/idt77252.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 0c13cac903de..9e4bd751db79 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -1783,12 +1783,6 @@ set_tct(struct idt77252_dev *card, struct vc_map *vc)
 /*                                                                           */
 /*****************************************************************************/
 
-static __inline__ int
-idt77252_fbq_level(struct idt77252_dev *card, int queue)
-{
-	return (readl(SAR_REG_STAT) >> (16 + (queue << 2))) & 0x0f;
-}
-
 static __inline__ int
 idt77252_fbq_full(struct idt77252_dev *card, int queue)
 {
-- 
cgit v1.2.3


From 17c3df7078e3742bd9e907f3006a9e3469383007 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 14 Apr 2021 12:48:48 +0200
Subject: skbuff: revert "skbuff: remove some unnecessary operation in
 skb_segment_list()"

the commit 1ddc3229ad3c ("skbuff: remove some unnecessary operation
in skb_segment_list()") introduces an issue very similar to the
one already fixed by commit 53475c5dd856 ("net: fix use-after-free when
UDP GRO with shared fraglist").

If the GSO skb goes though skb_clone() and pskb_expand_head() before
entering skb_segment_list(), the latter  will unshare the frag_list
skbs and will release the old list. With the reverted commit in place,
when skb_segment_list() completes, skb->next points to the just
released list, and later on the kernel will hit UaF.

Note that since commit e0e3070a9bc9 ("udp: properly complete L4 GRO
over UDP tunnel packet") the critical scenario can be reproduced also
receiving UDP over vxlan traffic with:

NIC (NETIF_F_GRO_FRAGLIST enabled) -> vxlan -> UDP sink

Attaching a packet socket to the NIC will cause skb_clone() and the
tunnel decapsulation will call pskb_expand_head().

Fixes: 1ddc3229ad3c ("skbuff: remove some unnecessary operation in skb_segment_list()")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3ad9e8425ab2..14010c0eec48 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3773,13 +3773,13 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
 	unsigned int tnl_hlen = skb_tnl_header_len(skb);
 	unsigned int delta_truesize = 0;
 	unsigned int delta_len = 0;
+	struct sk_buff *tail = NULL;
 	struct sk_buff *nskb, *tmp;
 	int err;
 
 	skb_push(skb, -skb_network_offset(skb) + offset);
 
 	skb_shinfo(skb)->frag_list = NULL;
-	skb->next = list_skb;
 
 	do {
 		nskb = list_skb;
@@ -3797,8 +3797,17 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
 			}
 		}
 
-		if (unlikely(err))
+		if (!tail)
+			skb->next = nskb;
+		else
+			tail->next = nskb;
+
+		if (unlikely(err)) {
+			nskb->next = list_skb;
 			goto err_linearize;
+		}
+
+		tail = nskb;
 
 		delta_len += nskb->len;
 		delta_truesize += nskb->truesize;
@@ -3825,7 +3834,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
 
 	skb_gso_reset(skb);
 
-	skb->prev = nskb;
+	skb->prev = tail;
 
 	if (skb_needs_linearize(skb, features) &&
 	    __skb_linearize(skb))
-- 
cgit v1.2.3


From ace8d281aa71ef785d7b58e9c6b0fcc198103606 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Wed, 14 Apr 2021 19:06:45 +0800
Subject: sfc: Remove duplicate argument

Fix the following coccicheck warning:

./drivers/net/ethernet/sfc/enum.h:80:7-28: duplicated argument to |

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/enum.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/sfc/enum.h b/drivers/net/ethernet/sfc/enum.h
index 3332cdf2918a..cd590e0685e5 100644
--- a/drivers/net/ethernet/sfc/enum.h
+++ b/drivers/net/ethernet/sfc/enum.h
@@ -78,7 +78,6 @@ enum efx_loopback_mode {
 			    (1 << LOOPBACK_XAUI) |		\
 			    (1 << LOOPBACK_GMII) |		\
 			    (1 << LOOPBACK_SGMII) |		\
-			    (1 << LOOPBACK_SGMII) |		\
 			    (1 << LOOPBACK_XGBR) |		\
 			    (1 << LOOPBACK_XFI) |		\
 			    (1 << LOOPBACK_XAUI_FAR) |		\
-- 
cgit v1.2.3


From 652d3be21dc838f526c01837cbc837894f9c7bc1 Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Wed, 14 Apr 2021 16:48:14 +0200
Subject: net: enetc: fetch MAC address from device tree

Normally, the bootloader will already initialize the MAC address
registers of the ENETC and the driver will just use them or generate a
random one, if it is not initialized.

Add a new way to provide the MAC address: via device tree. Besides the
usual 'mac-address' property, there is also the possibility to fetch it
via a NVMEM provider. The sl28 board stores the MAC address in the SPI
NOR flash OTP region. Having this will allow linux to fetch the MAC
address from there without being dependent on the bootloader.

No in-tree boards have the device tree properties set, thus for these,
this is a no-op.

Signed-off-by: Michael Walle <michael@walle.cc>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc_pf.c | 65 +++++++++++++++++++------
 1 file changed, 49 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index f61fedf462e5..30b22b71630e 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -390,23 +390,54 @@ static int enetc_pf_set_vf_spoofchk(struct net_device *ndev, int vf, bool en)
 	return 0;
 }
 
-static void enetc_port_setup_primary_mac_address(struct enetc_si *si)
+static int enetc_setup_mac_address(struct device_node *np, struct enetc_pf *pf,
+				   int si)
 {
-	unsigned char mac_addr[MAX_ADDR_LEN];
-	struct enetc_pf *pf = enetc_si_priv(si);
-	struct enetc_hw *hw = &si->hw;
-	int i;
+	struct device *dev = &pf->si->pdev->dev;
+	struct enetc_hw *hw = &pf->si->hw;
+	u8 mac_addr[ETH_ALEN] = { 0 };
+	int err;
 
-	/* check MAC addresses for PF and all VFs, if any is 0 set it ro rand */
-	for (i = 0; i < pf->total_vfs + 1; i++) {
-		enetc_pf_get_primary_mac_addr(hw, i, mac_addr);
-		if (!is_zero_ether_addr(mac_addr))
-			continue;
+	/* (1) try to get the MAC address from the device tree */
+	if (np) {
+		err = of_get_mac_address(np, mac_addr);
+		if (err == -EPROBE_DEFER)
+			return err;
+	}
+
+	/* (2) bootloader supplied MAC address */
+	if (is_zero_ether_addr(mac_addr))
+		enetc_pf_get_primary_mac_addr(hw, si, mac_addr);
+
+	/* (3) choose a random one */
+	if (is_zero_ether_addr(mac_addr)) {
 		eth_random_addr(mac_addr);
-		dev_info(&si->pdev->dev, "no MAC address specified for SI%d, using %pM\n",
-			 i, mac_addr);
-		enetc_pf_set_primary_mac_addr(hw, i, mac_addr);
+		dev_info(dev, "no MAC address specified for SI%d, using %pM\n",
+			 si, mac_addr);
 	}
+
+	enetc_pf_set_primary_mac_addr(hw, si, mac_addr);
+
+	return 0;
+}
+
+static int enetc_setup_mac_addresses(struct device_node *np,
+				     struct enetc_pf *pf)
+{
+	int err, i;
+
+	/* The PF might take its MAC from the device tree */
+	err = enetc_setup_mac_address(np, pf, 0);
+	if (err)
+		return err;
+
+	for (i = 0; i < pf->total_vfs; i++) {
+		err = enetc_setup_mac_address(NULL, pf, i + 1);
+		if (err)
+			return err;
+	}
+
+	return 0;
 }
 
 static void enetc_port_assign_rfs_entries(struct enetc_si *si)
@@ -562,9 +593,6 @@ static void enetc_configure_port(struct enetc_pf *pf)
 	/* split up RFS entries */
 	enetc_port_assign_rfs_entries(pf->si);
 
-	/* fix-up primary MAC addresses, if not set already */
-	enetc_port_setup_primary_mac_address(pf->si);
-
 	/* enforce VLAN promisc mode for all SIs */
 	pf->vlan_promisc_simap = ENETC_VLAN_PROMISC_MAP_ALL;
 	enetc_set_vlan_promisc(hw, pf->vlan_promisc_simap);
@@ -1137,6 +1165,10 @@ static int enetc_pf_probe(struct pci_dev *pdev,
 	pf->si = si;
 	pf->total_vfs = pci_sriov_get_totalvfs(pdev);
 
+	err = enetc_setup_mac_addresses(node, pf);
+	if (err)
+		goto err_setup_mac_addresses;
+
 	enetc_configure_port(pf);
 
 	enetc_get_si_caps(si);
@@ -1204,6 +1236,7 @@ err_alloc_netdev:
 err_init_port_rss:
 err_init_port_rfs:
 err_device_disabled:
+err_setup_mac_addresses:
 	enetc_teardown_cbdr(&si->cbd_ring);
 err_setup_cbdr:
 err_map_pf_space:
-- 
cgit v1.2.3


From ae1ea84b33dab45c7b6c1754231ebda5959b504c Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 14 Apr 2021 22:22:57 +0300
Subject: net: bridge: propagate error code and extack from
 br_mc_disabled_update

Some Ethernet switches might only be able to support disabling multicast
snooping globally, which is an issue for example when several bridges
span the same physical device and request contradictory settings.

Propagate the return value of br_mc_disabled_update() such that this
limitation is transmitted correctly to user-space.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 28 +++++++++++++++++++++-------
 net/bridge/br_netlink.c   |  4 +++-
 net/bridge/br_private.h   |  3 ++-
 net/bridge/br_sysfs_br.c  |  8 +-------
 4 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 9d265447d654..4daa95c913d0 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1593,7 +1593,8 @@ out:
 	spin_unlock(&br->multicast_lock);
 }
 
-static void br_mc_disabled_update(struct net_device *dev, bool value)
+static int br_mc_disabled_update(struct net_device *dev, bool value,
+				 struct netlink_ext_ack *extack)
 {
 	struct switchdev_attr attr = {
 		.orig_dev = dev,
@@ -1602,11 +1603,13 @@ static void br_mc_disabled_update(struct net_device *dev, bool value)
 		.u.mc_disabled = !value,
 	};
 
-	switchdev_port_attr_set(dev, &attr, NULL);
+	return switchdev_port_attr_set(dev, &attr, extack);
 }
 
 int br_multicast_add_port(struct net_bridge_port *port)
 {
+	int err;
+
 	port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
 	port->multicast_eht_hosts_limit = BR_MCAST_DEFAULT_EHT_HOSTS_LIMIT;
 
@@ -1618,8 +1621,12 @@ int br_multicast_add_port(struct net_bridge_port *port)
 	timer_setup(&port->ip6_own_query.timer,
 		    br_ip6_multicast_port_query_expired, 0);
 #endif
-	br_mc_disabled_update(port->dev,
-			      br_opt_get(port->br, BROPT_MULTICAST_ENABLED));
+	err = br_mc_disabled_update(port->dev,
+				    br_opt_get(port->br,
+					       BROPT_MULTICAST_ENABLED),
+				    NULL);
+	if (err)
+		return err;
 
 	port->mcast_stats = netdev_alloc_pcpu_stats(struct bridge_mcast_stats);
 	if (!port->mcast_stats)
@@ -3560,16 +3567,23 @@ static void br_multicast_start_querier(struct net_bridge *br,
 	rcu_read_unlock();
 }
 
-int br_multicast_toggle(struct net_bridge *br, unsigned long val)
+int br_multicast_toggle(struct net_bridge *br, unsigned long val,
+			struct netlink_ext_ack *extack)
 {
 	struct net_bridge_port *port;
 	bool change_snoopers = false;
+	int err = 0;
 
 	spin_lock_bh(&br->multicast_lock);
 	if (!!br_opt_get(br, BROPT_MULTICAST_ENABLED) == !!val)
 		goto unlock;
 
-	br_mc_disabled_update(br->dev, val);
+	err = br_mc_disabled_update(br->dev, val, extack);
+	if (err == -EOPNOTSUPP)
+		err = 0;
+	if (err)
+		goto unlock;
+
 	br_opt_toggle(br, BROPT_MULTICAST_ENABLED, !!val);
 	if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) {
 		change_snoopers = true;
@@ -3607,7 +3621,7 @@ unlock:
 			br_multicast_leave_snoopers(br);
 	}
 
-	return 0;
+	return err;
 }
 
 bool br_multicast_enabled(const struct net_device *dev)
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index f2b1343f8332..0456593aceec 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1293,7 +1293,9 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
 	if (data[IFLA_BR_MCAST_SNOOPING]) {
 		u8 mcast_snooping = nla_get_u8(data[IFLA_BR_MCAST_SNOOPING]);
 
-		br_multicast_toggle(br, mcast_snooping);
+		err = br_multicast_toggle(br, mcast_snooping, extack);
+		if (err)
+			return err;
 	}
 
 	if (data[IFLA_BR_MCAST_QUERY_USE_IFADDR]) {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 50747990188e..7ce8a77cc6b6 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -810,7 +810,8 @@ void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
 			struct sk_buff *skb, bool local_rcv, bool local_orig);
 int br_multicast_set_router(struct net_bridge *br, unsigned long val);
 int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val);
-int br_multicast_toggle(struct net_bridge *br, unsigned long val);
+int br_multicast_toggle(struct net_bridge *br, unsigned long val,
+			struct netlink_ext_ack *extack);
 int br_multicast_set_querier(struct net_bridge *br, unsigned long val);
 int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val);
 int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val);
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 072e29840082..381467b691d5 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -409,17 +409,11 @@ static ssize_t multicast_snooping_show(struct device *d,
 	return sprintf(buf, "%d\n", br_opt_get(br, BROPT_MULTICAST_ENABLED));
 }
 
-static int toggle_multicast(struct net_bridge *br, unsigned long val,
-			    struct netlink_ext_ack *extack)
-{
-	return br_multicast_toggle(br, val);
-}
-
 static ssize_t multicast_snooping_store(struct device *d,
 					struct device_attribute *attr,
 					const char *buf, size_t len)
 {
-	return store_bridge_parm(d, buf, len, toggle_multicast);
+	return store_bridge_parm(d, buf, len, br_multicast_toggle);
 }
 static DEVICE_ATTR_RW(multicast_snooping);
 
-- 
cgit v1.2.3


From 94f633ea8ade8418634d152ad0931133338226f6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 14 Apr 2021 12:36:44 -0700
Subject: net/packet: remove data races in fanout operations

af_packet fanout uses RCU rules to ensure f->arr elements
are not dismantled before RCU grace period.

However, it lacks rcu accessors to make sure KCSAN and other tools
wont detect data races. Stupid compilers could also play games.

Fixes: dc99f600698d ("packet: Add fanout support.")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: "Gong, Sishuai" <sishuai@purdue.edu>
Cc: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 15 +++++++++------
 net/packet/internal.h  |  2 +-
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 118d585337d7..ba96db1880ea 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1359,7 +1359,7 @@ static unsigned int fanout_demux_rollover(struct packet_fanout *f,
 	struct packet_sock *po, *po_next, *po_skip = NULL;
 	unsigned int i, j, room = ROOM_NONE;
 
-	po = pkt_sk(f->arr[idx]);
+	po = pkt_sk(rcu_dereference(f->arr[idx]));
 
 	if (try_self) {
 		room = packet_rcv_has_room(po, skb);
@@ -1371,7 +1371,7 @@ static unsigned int fanout_demux_rollover(struct packet_fanout *f,
 
 	i = j = min_t(int, po->rollover->sock, num - 1);
 	do {
-		po_next = pkt_sk(f->arr[i]);
+		po_next = pkt_sk(rcu_dereference(f->arr[i]));
 		if (po_next != po_skip && !READ_ONCE(po_next->pressure) &&
 		    packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) {
 			if (i != j)
@@ -1466,7 +1466,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
 	if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER))
 		idx = fanout_demux_rollover(f, skb, idx, true, num);
 
-	po = pkt_sk(f->arr[idx]);
+	po = pkt_sk(rcu_dereference(f->arr[idx]));
 	return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
 }
 
@@ -1480,7 +1480,7 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po)
 	struct packet_fanout *f = po->fanout;
 
 	spin_lock(&f->lock);
-	f->arr[f->num_members] = sk;
+	rcu_assign_pointer(f->arr[f->num_members], sk);
 	smp_wmb();
 	f->num_members++;
 	if (f->num_members == 1)
@@ -1495,11 +1495,14 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
 
 	spin_lock(&f->lock);
 	for (i = 0; i < f->num_members; i++) {
-		if (f->arr[i] == sk)
+		if (rcu_dereference_protected(f->arr[i],
+					      lockdep_is_held(&f->lock)) == sk)
 			break;
 	}
 	BUG_ON(i >= f->num_members);
-	f->arr[i] = f->arr[f->num_members - 1];
+	rcu_assign_pointer(f->arr[i],
+			   rcu_dereference_protected(f->arr[f->num_members - 1],
+						     lockdep_is_held(&f->lock)));
 	f->num_members--;
 	if (f->num_members == 0)
 		__dev_remove_pack(&f->prot_hook);
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 5f61e59ebbff..48af35b1aed2 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -94,7 +94,7 @@ struct packet_fanout {
 	spinlock_t		lock;
 	refcount_t		sk_ref;
 	struct packet_type	prot_hook ____cacheline_aligned_in_smp;
-	struct sock		*arr[];
+	struct sock	__rcu	*arr[];
 };
 
 struct packet_rollover {
-- 
cgit v1.2.3


From d41f26b5ef8fb4d5ae6f9b51526eefa62ec53348 Mon Sep 17 00:00:00 2001
From: Bruce Allan <bruce.w.allan@intel.com>
Date: Tue, 2 Mar 2021 10:12:06 -0800
Subject: ice: use kernel definitions for IANA protocol ports and ether-types

The well-known IANA protocol port 3260 (iscsi-target 0x0cbc) and the
ether-types 0x8906 (ETH_P_FCOE) and 0x8914 (ETH_P_FIP) are already defined
in kernel header files.  Use those definitions instead of open-coding the
same.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h         | 3 +++
 drivers/net/ethernet/intel/ice/ice_dcb.c     | 8 ++++----
 drivers/net/ethernet/intel/ice/ice_dcb_lib.c | 2 +-
 drivers/net/ethernet/intel/ice/ice_type.h    | 3 ---
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 07777ac4f098..dd2e75d15558 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -44,6 +44,9 @@
 #include <net/gre.h>
 #include <net/udp_tunnel.h>
 #include <net/vxlan.h>
+#if IS_ENABLED(CONFIG_DCB)
+#include <scsi/iscsi_proto.h>
+#endif /* CONFIG_DCB */
 #include "ice_devids.h"
 #include "ice_type.h"
 #include "ice_txrx.h"
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c
index 43c6af42de8a..34c1aba050b8 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.c
@@ -804,7 +804,7 @@ ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg,
 			ice_aqc_cee_app_mask = ICE_AQC_CEE_APP_FCOE_M;
 			ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_FCOE_S;
 			ice_app_sel_type = ICE_APP_SEL_ETHTYPE;
-			ice_app_prot_id_type = ICE_APP_PROT_ID_FCOE;
+			ice_app_prot_id_type = ETH_P_FCOE;
 		} else if (i == 1) {
 			/* iSCSI APP */
 			ice_aqc_cee_status_mask = ICE_AQC_CEE_ISCSI_STATUS_M;
@@ -812,14 +812,14 @@ ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg,
 			ice_aqc_cee_app_mask = ICE_AQC_CEE_APP_ISCSI_M;
 			ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_ISCSI_S;
 			ice_app_sel_type = ICE_APP_SEL_TCPIP;
-			ice_app_prot_id_type = ICE_APP_PROT_ID_ISCSI;
+			ice_app_prot_id_type = ISCSI_LISTEN_PORT;
 
 			for (j = 0; j < cmp_dcbcfg->numapps; j++) {
 				u16 prot_id = cmp_dcbcfg->app[j].prot_id;
 				u8 sel = cmp_dcbcfg->app[j].selector;
 
 				if  (sel == ICE_APP_SEL_TCPIP &&
-				     (prot_id == ICE_APP_PROT_ID_ISCSI ||
+				     (prot_id == ISCSI_LISTEN_PORT ||
 				      prot_id == ICE_APP_PROT_ID_ISCSI_860)) {
 					ice_app_prot_id_type = prot_id;
 					break;
@@ -832,7 +832,7 @@ ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg,
 			ice_aqc_cee_app_mask = ICE_AQC_CEE_APP_FIP_M;
 			ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_FIP_S;
 			ice_app_sel_type = ICE_APP_SEL_ETHTYPE;
-			ice_app_prot_id_type = ICE_APP_PROT_ID_FIP;
+			ice_app_prot_id_type = ETH_P_FIP;
 		}
 
 		status = (tlv_status & ice_aqc_cee_status_mask) >>
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index 1e8f71ffc8ce..df02cffdf209 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -563,7 +563,7 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked)
 	dcbcfg->numapps = 1;
 	dcbcfg->app[0].selector = ICE_APP_SEL_ETHTYPE;
 	dcbcfg->app[0].priority = 3;
-	dcbcfg->app[0].prot_id = ICE_APP_PROT_ID_FCOE;
+	dcbcfg->app[0].prot_id = ETH_P_FCOE;
 
 	ret = ice_pf_dcb_cfg(pf, dcbcfg, locked);
 	kfree(dcbcfg);
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 7ead1c13f16f..9b80962ff92f 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -551,10 +551,7 @@ struct ice_dcb_app_priority_table {
 #define ICE_TLV_STATUS_OPER	0x1
 #define ICE_TLV_STATUS_SYNC	0x2
 #define ICE_TLV_STATUS_ERR	0x4
-#define ICE_APP_PROT_ID_FCOE	0x8906
-#define ICE_APP_PROT_ID_ISCSI	0x0cbc
 #define ICE_APP_PROT_ID_ISCSI_860 0x035c
-#define ICE_APP_PROT_ID_FIP	0x8914
 #define ICE_APP_SEL_ETHTYPE	0x1
 #define ICE_APP_SEL_TCPIP	0x2
 #define ICE_CEE_APP_SEL_ETHTYPE	0x0
-- 
cgit v1.2.3


From 7e408e07b42dceba4bc6630ff9ce9a55fcb043e0 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Tue, 2 Mar 2021 10:15:38 -0800
Subject: ice: Drop leading underscores in enum ice_pf_state

Remove the leading underscores in enum ice_pf_state. This is not really
communicating anything and is unnecessary. No functional change.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h               |  68 +++---
 drivers/net/ethernet/intel/ice/ice_ethtool.c       |  18 +-
 drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c  |   2 +-
 drivers/net/ethernet/intel/ice/ice_lib.c           |  18 +-
 drivers/net/ethernet/intel/ice/ice_main.c          | 234 ++++++++++-----------
 drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c |   6 +-
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c   |  22 +-
 drivers/net/ethernet/intel/ice/ice_xsk.c           |   6 +-
 8 files changed, 187 insertions(+), 187 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index dd2e75d15558..2cb09af3148c 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -197,45 +197,45 @@ struct ice_sw {
 };
 
 enum ice_pf_state {
-	__ICE_TESTING,
-	__ICE_DOWN,
-	__ICE_NEEDS_RESTART,
-	__ICE_PREPARED_FOR_RESET,	/* set by driver when prepared */
-	__ICE_RESET_OICR_RECV,		/* set by driver after rcv reset OICR */
-	__ICE_PFR_REQ,			/* set by driver and peers */
-	__ICE_CORER_REQ,		/* set by driver and peers */
-	__ICE_GLOBR_REQ,		/* set by driver and peers */
-	__ICE_CORER_RECV,		/* set by OICR handler */
-	__ICE_GLOBR_RECV,		/* set by OICR handler */
-	__ICE_EMPR_RECV,		/* set by OICR handler */
-	__ICE_SUSPENDED,		/* set on module remove path */
-	__ICE_RESET_FAILED,		/* set by reset/rebuild */
+	ICE_TESTING,
+	ICE_DOWN,
+	ICE_NEEDS_RESTART,
+	ICE_PREPARED_FOR_RESET,	/* set by driver when prepared */
+	ICE_RESET_OICR_RECV,		/* set by driver after rcv reset OICR */
+	ICE_PFR_REQ,			/* set by driver and peers */
+	ICE_CORER_REQ,		/* set by driver and peers */
+	ICE_GLOBR_REQ,		/* set by driver and peers */
+	ICE_CORER_RECV,		/* set by OICR handler */
+	ICE_GLOBR_RECV,		/* set by OICR handler */
+	ICE_EMPR_RECV,		/* set by OICR handler */
+	ICE_SUSPENDED,		/* set on module remove path */
+	ICE_RESET_FAILED,		/* set by reset/rebuild */
 	/* When checking for the PF to be in a nominal operating state, the
 	 * bits that are grouped at the beginning of the list need to be
-	 * checked. Bits occurring before __ICE_STATE_NOMINAL_CHECK_BITS will
+	 * checked. Bits occurring before ICE_STATE_NOMINAL_CHECK_BITS will
 	 * be checked. If you need to add a bit into consideration for nominal
 	 * operating state, it must be added before
-	 * __ICE_STATE_NOMINAL_CHECK_BITS. Do not move this entry's position
+	 * ICE_STATE_NOMINAL_CHECK_BITS. Do not move this entry's position
 	 * without appropriate consideration.
 	 */
-	__ICE_STATE_NOMINAL_CHECK_BITS,
-	__ICE_ADMINQ_EVENT_PENDING,
-	__ICE_MAILBOXQ_EVENT_PENDING,
-	__ICE_MDD_EVENT_PENDING,
-	__ICE_VFLR_EVENT_PENDING,
-	__ICE_FLTR_OVERFLOW_PROMISC,
-	__ICE_VF_DIS,
-	__ICE_CFG_BUSY,
-	__ICE_SERVICE_SCHED,
-	__ICE_SERVICE_DIS,
-	__ICE_FD_FLUSH_REQ,
-	__ICE_OICR_INTR_DIS,		/* Global OICR interrupt disabled */
-	__ICE_MDD_VF_PRINT_PENDING,	/* set when MDD event handle */
-	__ICE_VF_RESETS_DISABLED,	/* disable resets during ice_remove */
-	__ICE_LINK_DEFAULT_OVERRIDE_PENDING,
-	__ICE_PHY_INIT_COMPLETE,
-	__ICE_FD_VF_FLUSH_CTX,		/* set at FD Rx IRQ or timeout */
-	__ICE_STATE_NBITS		/* must be last */
+	ICE_STATE_NOMINAL_CHECK_BITS,
+	ICE_ADMINQ_EVENT_PENDING,
+	ICE_MAILBOXQ_EVENT_PENDING,
+	ICE_MDD_EVENT_PENDING,
+	ICE_VFLR_EVENT_PENDING,
+	ICE_FLTR_OVERFLOW_PROMISC,
+	ICE_VF_DIS,
+	ICE_CFG_BUSY,
+	ICE_SERVICE_SCHED,
+	ICE_SERVICE_DIS,
+	ICE_FD_FLUSH_REQ,
+	ICE_OICR_INTR_DIS,		/* Global OICR interrupt disabled */
+	ICE_MDD_VF_PRINT_PENDING,	/* set when MDD event handle */
+	ICE_VF_RESETS_DISABLED,	/* disable resets during ice_remove */
+	ICE_LINK_DEFAULT_OVERRIDE_PENDING,
+	ICE_PHY_INIT_COMPLETE,
+	ICE_FD_VF_FLUSH_CTX,		/* set at FD Rx IRQ or timeout */
+	ICE_STATE_NBITS		/* must be last */
 };
 
 enum ice_vsi_state {
@@ -421,7 +421,7 @@ struct ice_pf {
 	u16 num_msix_per_vf;
 	/* used to ratelimit the MDD event logging */
 	unsigned long last_printed_mdd_jiffies;
-	DECLARE_BITMAP(state, __ICE_STATE_NBITS);
+	DECLARE_BITMAP(state, ICE_STATE_NBITS);
 	DECLARE_BITMAP(flags, ICE_PF_FLAGS_NBITS);
 	unsigned long *avail_txqs;	/* bitmap to track PF Tx queue usage */
 	unsigned long *avail_rxqs;	/* bitmap to track PF Rx queue usage */
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index a39e890100d9..f2bc8f1e86cc 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -806,7 +806,7 @@ ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test,
 	if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
 		netdev_info(netdev, "offline testing starting\n");
 
-		set_bit(__ICE_TESTING, pf->state);
+		set_bit(ICE_TESTING, pf->state);
 
 		if (ice_active_vfs(pf)) {
 			dev_warn(dev, "Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n");
@@ -816,7 +816,7 @@ ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test,
 			data[ICE_ETH_TEST_LOOP] = 1;
 			data[ICE_ETH_TEST_LINK] = 1;
 			eth_test->flags |= ETH_TEST_FL_FAILED;
-			clear_bit(__ICE_TESTING, pf->state);
+			clear_bit(ICE_TESTING, pf->state);
 			goto skip_ol_tests;
 		}
 		/* If the device is online then take it offline */
@@ -837,7 +837,7 @@ ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test,
 		    data[ICE_ETH_TEST_REG])
 			eth_test->flags |= ETH_TEST_FL_FAILED;
 
-		clear_bit(__ICE_TESTING, pf->state);
+		clear_bit(ICE_TESTING, pf->state);
 
 		if (if_running) {
 			int status = ice_open(netdev);
@@ -1097,7 +1097,7 @@ static int ice_nway_reset(struct net_device *netdev)
 	int err;
 
 	/* If VSI state is up, then restart autoneg with link up */
-	if (!test_bit(__ICE_DOWN, vsi->back->state))
+	if (!test_bit(ICE_DOWN, vsi->back->state))
 		err = ice_set_link(vsi, true);
 	else
 		err = ice_set_link(vsi, false);
@@ -2282,7 +2282,7 @@ ice_set_link_ksettings(struct net_device *netdev,
 		goto done;
 	}
 
-	while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) {
+	while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) {
 		timeout--;
 		if (!timeout) {
 			err = -EBUSY;
@@ -2392,7 +2392,7 @@ ice_set_link_ksettings(struct net_device *netdev,
 	pi->phy.curr_user_speed_req = adv_link_speed;
 done:
 	kfree(phy_caps);
-	clear_bit(__ICE_CFG_BUSY, pf->state);
+	clear_bit(ICE_CFG_BUSY, pf->state);
 
 	return err;
 }
@@ -2748,7 +2748,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
 	if (ice_xsk_any_rx_ring_ena(vsi))
 		return -EBUSY;
 
-	while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) {
+	while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) {
 		timeout--;
 		if (!timeout)
 			return -EBUSY;
@@ -2927,7 +2927,7 @@ free_tx:
 	}
 
 done:
-	clear_bit(__ICE_CFG_BUSY, pf->state);
+	clear_bit(ICE_CFG_BUSY, pf->state);
 	return err;
 }
 
@@ -3046,7 +3046,7 @@ ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
 	}
 
 	/* If we have link and don't have autoneg */
-	if (!test_bit(__ICE_DOWN, pf->state) &&
+	if (!test_bit(ICE_DOWN, pf->state) &&
 	    !(hw_link_info->an_info & ICE_AQ_AN_COMPLETED)) {
 		/* Send message that it might not necessarily work*/
 		netdev_info(netdev, "Autoneg did not complete so changing settings may not result in an actual change.\n");
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
index 440964defa4a..16de603b280c 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
@@ -1452,7 +1452,7 @@ int ice_del_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd)
 		return -EBUSY;
 	}
 
-	if (test_bit(__ICE_FD_FLUSH_REQ, pf->state))
+	if (test_bit(ICE_FD_FLUSH_REQ, pf->state))
 		return -EBUSY;
 
 	mutex_lock(&hw->fdir_fltr_lock);
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 16d0ee5b48a5..162f01b42147 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1502,13 +1502,13 @@ static void ice_vsi_set_rss_flow_fld(struct ice_vsi *vsi)
  */
 bool ice_pf_state_is_nominal(struct ice_pf *pf)
 {
-	DECLARE_BITMAP(check_bits, __ICE_STATE_NBITS) = { 0 };
+	DECLARE_BITMAP(check_bits, ICE_STATE_NBITS) = { 0 };
 
 	if (!pf)
 		return false;
 
-	bitmap_set(check_bits, 0, __ICE_STATE_NOMINAL_CHECK_BITS);
-	if (bitmap_intersects(pf->state, check_bits, __ICE_STATE_NBITS))
+	bitmap_set(check_bits, 0, ICE_STATE_NOMINAL_CHECK_BITS);
+	if (bitmap_intersects(pf->state, check_bits, ICE_STATE_NBITS))
 		return false;
 
 	return true;
@@ -2811,7 +2811,7 @@ int ice_vsi_release(struct ice_vsi *vsi)
 	ice_vsi_delete(vsi);
 	ice_vsi_free_q_vectors(vsi);
 
-	/* make sure unregister_netdev() was called by checking __ICE_DOWN */
+	/* make sure unregister_netdev() was called by checking ICE_DOWN */
 	if (vsi->netdev && test_bit(ICE_VSI_DOWN, vsi->state)) {
 		free_netdev(vsi->netdev);
 		vsi->netdev = NULL;
@@ -3140,7 +3140,7 @@ err_rings:
 	}
 err_vsi:
 	ice_vsi_clear(vsi);
-	set_bit(__ICE_RESET_FAILED, pf->state);
+	set_bit(ICE_RESET_FAILED, pf->state);
 	kfree(coalesce);
 	return ret;
 }
@@ -3151,10 +3151,10 @@ err_vsi:
  */
 bool ice_is_reset_in_progress(unsigned long *state)
 {
-	return test_bit(__ICE_RESET_OICR_RECV, state) ||
-	       test_bit(__ICE_PFR_REQ, state) ||
-	       test_bit(__ICE_CORER_REQ, state) ||
-	       test_bit(__ICE_GLOBR_REQ, state);
+	return test_bit(ICE_RESET_OICR_RECV, state) ||
+	       test_bit(ICE_PFR_REQ, state) ||
+	       test_bit(ICE_CORER_REQ, state) ||
+	       test_bit(ICE_GLOBR_REQ, state);
 }
 
 #ifdef CONFIG_DCB
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 1b2f1e258e5c..032101680e09 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -257,7 +257,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
 	if (!vsi->netdev)
 		return -EINVAL;
 
-	while (test_and_set_bit(__ICE_CFG_BUSY, vsi->state))
+	while (test_and_set_bit(ICE_CFG_BUSY, vsi->state))
 		usleep_range(1000, 2000);
 
 	changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags;
@@ -307,7 +307,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
 		 * space reserved for promiscuous filters.
 		 */
 		if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOSPC &&
-		    !test_and_set_bit(__ICE_FLTR_OVERFLOW_PROMISC,
+		    !test_and_set_bit(ICE_FLTR_OVERFLOW_PROMISC,
 				      vsi->state)) {
 			promisc_forced_on = true;
 			netdev_warn(netdev, "Reached MAC filter limit, forcing promisc mode on VSI %d\n",
@@ -391,7 +391,7 @@ out:
 	set_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state);
 	set_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
 exit:
-	clear_bit(__ICE_CFG_BUSY, vsi->state);
+	clear_bit(ICE_CFG_BUSY, vsi->state);
 	return err;
 }
 
@@ -451,7 +451,7 @@ ice_prepare_for_reset(struct ice_pf *pf)
 	unsigned int i;
 
 	/* already prepared for reset */
-	if (test_bit(__ICE_PREPARED_FOR_RESET, pf->state))
+	if (test_bit(ICE_PREPARED_FOR_RESET, pf->state))
 		return;
 
 	/* Notify VFs of impending reset */
@@ -472,7 +472,7 @@ ice_prepare_for_reset(struct ice_pf *pf)
 
 	ice_shutdown_all_ctrlq(hw);
 
-	set_bit(__ICE_PREPARED_FOR_RESET, pf->state);
+	set_bit(ICE_PREPARED_FOR_RESET, pf->state);
 }
 
 /**
@@ -493,12 +493,12 @@ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
 	/* trigger the reset */
 	if (ice_reset(hw, reset_type)) {
 		dev_err(dev, "reset %d failed\n", reset_type);
-		set_bit(__ICE_RESET_FAILED, pf->state);
-		clear_bit(__ICE_RESET_OICR_RECV, pf->state);
-		clear_bit(__ICE_PREPARED_FOR_RESET, pf->state);
-		clear_bit(__ICE_PFR_REQ, pf->state);
-		clear_bit(__ICE_CORER_REQ, pf->state);
-		clear_bit(__ICE_GLOBR_REQ, pf->state);
+		set_bit(ICE_RESET_FAILED, pf->state);
+		clear_bit(ICE_RESET_OICR_RECV, pf->state);
+		clear_bit(ICE_PREPARED_FOR_RESET, pf->state);
+		clear_bit(ICE_PFR_REQ, pf->state);
+		clear_bit(ICE_CORER_REQ, pf->state);
+		clear_bit(ICE_GLOBR_REQ, pf->state);
 		return;
 	}
 
@@ -509,8 +509,8 @@ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
 	if (reset_type == ICE_RESET_PFR) {
 		pf->pfr_count++;
 		ice_rebuild(pf, reset_type);
-		clear_bit(__ICE_PREPARED_FOR_RESET, pf->state);
-		clear_bit(__ICE_PFR_REQ, pf->state);
+		clear_bit(ICE_PREPARED_FOR_RESET, pf->state);
+		clear_bit(ICE_PFR_REQ, pf->state);
 		ice_reset_all_vfs(pf, true);
 	}
 }
@@ -526,20 +526,20 @@ static void ice_reset_subtask(struct ice_pf *pf)
 	/* When a CORER/GLOBR/EMPR is about to happen, the hardware triggers an
 	 * OICR interrupt. The OICR handler (ice_misc_intr) determines what type
 	 * of reset is pending and sets bits in pf->state indicating the reset
-	 * type and __ICE_RESET_OICR_RECV. So, if the latter bit is set
+	 * type and ICE_RESET_OICR_RECV. So, if the latter bit is set
 	 * prepare for pending reset if not already (for PF software-initiated
 	 * global resets the software should already be prepared for it as
-	 * indicated by __ICE_PREPARED_FOR_RESET; for global resets initiated
+	 * indicated by ICE_PREPARED_FOR_RESET; for global resets initiated
 	 * by firmware or software on other PFs, that bit is not set so prepare
 	 * for the reset now), poll for reset done, rebuild and return.
 	 */
-	if (test_bit(__ICE_RESET_OICR_RECV, pf->state)) {
+	if (test_bit(ICE_RESET_OICR_RECV, pf->state)) {
 		/* Perform the largest reset requested */
-		if (test_and_clear_bit(__ICE_CORER_RECV, pf->state))
+		if (test_and_clear_bit(ICE_CORER_RECV, pf->state))
 			reset_type = ICE_RESET_CORER;
-		if (test_and_clear_bit(__ICE_GLOBR_RECV, pf->state))
+		if (test_and_clear_bit(ICE_GLOBR_RECV, pf->state))
 			reset_type = ICE_RESET_GLOBR;
-		if (test_and_clear_bit(__ICE_EMPR_RECV, pf->state))
+		if (test_and_clear_bit(ICE_EMPR_RECV, pf->state))
 			reset_type = ICE_RESET_EMPR;
 		/* return if no valid reset type requested */
 		if (reset_type == ICE_RESET_INVAL)
@@ -548,7 +548,7 @@ static void ice_reset_subtask(struct ice_pf *pf)
 
 		/* make sure we are ready to rebuild */
 		if (ice_check_reset(&pf->hw)) {
-			set_bit(__ICE_RESET_FAILED, pf->state);
+			set_bit(ICE_RESET_FAILED, pf->state);
 		} else {
 			/* done with reset. start rebuild */
 			pf->hw.reset_ongoing = false;
@@ -556,11 +556,11 @@ static void ice_reset_subtask(struct ice_pf *pf)
 			/* clear bit to resume normal operations, but
 			 * ICE_NEEDS_RESTART bit is set in case rebuild failed
 			 */
-			clear_bit(__ICE_RESET_OICR_RECV, pf->state);
-			clear_bit(__ICE_PREPARED_FOR_RESET, pf->state);
-			clear_bit(__ICE_PFR_REQ, pf->state);
-			clear_bit(__ICE_CORER_REQ, pf->state);
-			clear_bit(__ICE_GLOBR_REQ, pf->state);
+			clear_bit(ICE_RESET_OICR_RECV, pf->state);
+			clear_bit(ICE_PREPARED_FOR_RESET, pf->state);
+			clear_bit(ICE_PFR_REQ, pf->state);
+			clear_bit(ICE_CORER_REQ, pf->state);
+			clear_bit(ICE_GLOBR_REQ, pf->state);
 			ice_reset_all_vfs(pf, true);
 		}
 
@@ -568,19 +568,19 @@ static void ice_reset_subtask(struct ice_pf *pf)
 	}
 
 	/* No pending resets to finish processing. Check for new resets */
-	if (test_bit(__ICE_PFR_REQ, pf->state))
+	if (test_bit(ICE_PFR_REQ, pf->state))
 		reset_type = ICE_RESET_PFR;
-	if (test_bit(__ICE_CORER_REQ, pf->state))
+	if (test_bit(ICE_CORER_REQ, pf->state))
 		reset_type = ICE_RESET_CORER;
-	if (test_bit(__ICE_GLOBR_REQ, pf->state))
+	if (test_bit(ICE_GLOBR_REQ, pf->state))
 		reset_type = ICE_RESET_GLOBR;
 	/* If no valid reset type requested just return */
 	if (reset_type == ICE_RESET_INVAL)
 		return;
 
 	/* reset if not already down or busy */
-	if (!test_bit(__ICE_DOWN, pf->state) &&
-	    !test_bit(__ICE_CFG_BUSY, pf->state)) {
+	if (!test_bit(ICE_DOWN, pf->state) &&
+	    !test_bit(ICE_CFG_BUSY, pf->state)) {
 		ice_do_reset(pf, reset_type);
 	}
 }
@@ -937,8 +937,8 @@ static void ice_watchdog_subtask(struct ice_pf *pf)
 	int i;
 
 	/* if interface is down do nothing */
-	if (test_bit(__ICE_DOWN, pf->state) ||
-	    test_bit(__ICE_CFG_BUSY, pf->state))
+	if (test_bit(ICE_DOWN, pf->state) ||
+	    test_bit(ICE_CFG_BUSY, pf->state))
 		return;
 
 	/* make sure we don't do these things too often */
@@ -1182,7 +1182,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
 	u32 oldval, val;
 
 	/* Do not clean control queue if/when PF reset fails */
-	if (test_bit(__ICE_RESET_FAILED, pf->state))
+	if (test_bit(ICE_RESET_FAILED, pf->state))
 		return 0;
 
 	switch (q_type) {
@@ -1317,13 +1317,13 @@ static void ice_clean_adminq_subtask(struct ice_pf *pf)
 {
 	struct ice_hw *hw = &pf->hw;
 
-	if (!test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state))
+	if (!test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state))
 		return;
 
 	if (__ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN))
 		return;
 
-	clear_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state);
+	clear_bit(ICE_ADMINQ_EVENT_PENDING, pf->state);
 
 	/* There might be a situation where new messages arrive to a control
 	 * queue between processing the last message and clearing the
@@ -1344,13 +1344,13 @@ static void ice_clean_mailboxq_subtask(struct ice_pf *pf)
 {
 	struct ice_hw *hw = &pf->hw;
 
-	if (!test_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state))
+	if (!test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state))
 		return;
 
 	if (__ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX))
 		return;
 
-	clear_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state);
+	clear_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state);
 
 	if (ice_ctrlq_pending(hw, &hw->mailboxq))
 		__ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX);
@@ -1366,9 +1366,9 @@ static void ice_clean_mailboxq_subtask(struct ice_pf *pf)
  */
 void ice_service_task_schedule(struct ice_pf *pf)
 {
-	if (!test_bit(__ICE_SERVICE_DIS, pf->state) &&
-	    !test_and_set_bit(__ICE_SERVICE_SCHED, pf->state) &&
-	    !test_bit(__ICE_NEEDS_RESTART, pf->state))
+	if (!test_bit(ICE_SERVICE_DIS, pf->state) &&
+	    !test_and_set_bit(ICE_SERVICE_SCHED, pf->state) &&
+	    !test_bit(ICE_NEEDS_RESTART, pf->state))
 		queue_work(ice_wq, &pf->serv_task);
 }
 
@@ -1378,32 +1378,32 @@ void ice_service_task_schedule(struct ice_pf *pf)
  */
 static void ice_service_task_complete(struct ice_pf *pf)
 {
-	WARN_ON(!test_bit(__ICE_SERVICE_SCHED, pf->state));
+	WARN_ON(!test_bit(ICE_SERVICE_SCHED, pf->state));
 
 	/* force memory (pf->state) to sync before next service task */
 	smp_mb__before_atomic();
-	clear_bit(__ICE_SERVICE_SCHED, pf->state);
+	clear_bit(ICE_SERVICE_SCHED, pf->state);
 }
 
 /**
  * ice_service_task_stop - stop service task and cancel works
  * @pf: board private structure
  *
- * Return 0 if the __ICE_SERVICE_DIS bit was not already set,
+ * Return 0 if the ICE_SERVICE_DIS bit was not already set,
  * 1 otherwise.
  */
 static int ice_service_task_stop(struct ice_pf *pf)
 {
 	int ret;
 
-	ret = test_and_set_bit(__ICE_SERVICE_DIS, pf->state);
+	ret = test_and_set_bit(ICE_SERVICE_DIS, pf->state);
 
 	if (pf->serv_tmr.function)
 		del_timer_sync(&pf->serv_tmr);
 	if (pf->serv_task.func)
 		cancel_work_sync(&pf->serv_task);
 
-	clear_bit(__ICE_SERVICE_SCHED, pf->state);
+	clear_bit(ICE_SERVICE_SCHED, pf->state);
 	return ret;
 }
 
@@ -1415,7 +1415,7 @@ static int ice_service_task_stop(struct ice_pf *pf)
  */
 static void ice_service_task_restart(struct ice_pf *pf)
 {
-	clear_bit(__ICE_SERVICE_DIS, pf->state);
+	clear_bit(ICE_SERVICE_DIS, pf->state);
 	ice_service_task_schedule(pf);
 }
 
@@ -1448,7 +1448,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
 	unsigned int i;
 	u32 reg;
 
-	if (!test_and_clear_bit(__ICE_MDD_EVENT_PENDING, pf->state)) {
+	if (!test_and_clear_bit(ICE_MDD_EVENT_PENDING, pf->state)) {
 		/* Since the VF MDD event logging is rate limited, check if
 		 * there are pending MDD events.
 		 */
@@ -1540,7 +1540,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
 		if (reg & VP_MDET_TX_PQM_VALID_M) {
 			wr32(hw, VP_MDET_TX_PQM(i), 0xFFFF);
 			vf->mdd_tx_events.count++;
-			set_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state);
+			set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
 			if (netif_msg_tx_err(pf))
 				dev_info(dev, "Malicious Driver Detection event TX_PQM detected on VF %d\n",
 					 i);
@@ -1550,7 +1550,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
 		if (reg & VP_MDET_TX_TCLAN_VALID_M) {
 			wr32(hw, VP_MDET_TX_TCLAN(i), 0xFFFF);
 			vf->mdd_tx_events.count++;
-			set_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state);
+			set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
 			if (netif_msg_tx_err(pf))
 				dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on VF %d\n",
 					 i);
@@ -1560,7 +1560,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
 		if (reg & VP_MDET_TX_TDPU_VALID_M) {
 			wr32(hw, VP_MDET_TX_TDPU(i), 0xFFFF);
 			vf->mdd_tx_events.count++;
-			set_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state);
+			set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
 			if (netif_msg_tx_err(pf))
 				dev_info(dev, "Malicious Driver Detection event TX_TDPU detected on VF %d\n",
 					 i);
@@ -1570,7 +1570,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
 		if (reg & VP_MDET_RX_VALID_M) {
 			wr32(hw, VP_MDET_RX(i), 0xFFFF);
 			vf->mdd_rx_events.count++;
-			set_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state);
+			set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
 			if (netif_msg_rx_err(pf))
 				dev_info(dev, "Malicious Driver Detection event RX detected on VF %d\n",
 					 i);
@@ -1735,7 +1735,7 @@ static void ice_init_link_dflt_override(struct ice_port_info *pi)
  * settings using the default override mask from the NVM.
  *
  * The PHY should only be configured with the default override settings the
- * first time media is available. The __ICE_LINK_DEFAULT_OVERRIDE_PENDING state
+ * first time media is available. The ICE_LINK_DEFAULT_OVERRIDE_PENDING state
  * is used to indicate that the user PHY cfg default override is initialized
  * and the PHY has not been configured with the default override settings. The
  * state is set here, and cleared in ice_configure_phy the first time the PHY is
@@ -1767,7 +1767,7 @@ static void ice_init_phy_cfg_dflt_override(struct ice_port_info *pi)
 	cfg->link_fec_opt = ldo->fec_options;
 	phy->curr_user_fec_req = ICE_FEC_AUTO;
 
-	set_bit(__ICE_LINK_DEFAULT_OVERRIDE_PENDING, pf->state);
+	set_bit(ICE_LINK_DEFAULT_OVERRIDE_PENDING, pf->state);
 }
 
 /**
@@ -1839,7 +1839,7 @@ static int ice_init_phy_user_cfg(struct ice_port_info *pi)
 
 out:
 	phy->curr_user_speed_req = ICE_AQ_LINK_SPEED_M;
-	set_bit(__ICE_PHY_INIT_COMPLETE, pf->state);
+	set_bit(ICE_PHY_INIT_COMPLETE, pf->state);
 err_out:
 	kfree(pcaps);
 	return err;
@@ -1923,7 +1923,7 @@ static int ice_configure_phy(struct ice_vsi *vsi)
 	/* Speed - If default override pending, use curr_user_phy_cfg set in
 	 * ice_init_phy_user_cfg_ldo.
 	 */
-	if (test_and_clear_bit(__ICE_LINK_DEFAULT_OVERRIDE_PENDING,
+	if (test_and_clear_bit(ICE_LINK_DEFAULT_OVERRIDE_PENDING,
 			       vsi->back->state)) {
 		cfg->phy_type_low = phy->curr_user_phy_cfg.phy_type_low;
 		cfg->phy_type_high = phy->curr_user_phy_cfg.phy_type_high;
@@ -2002,7 +2002,7 @@ static void ice_check_media_subtask(struct ice_pf *pf)
 		return;
 
 	if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
-		if (!test_bit(__ICE_PHY_INIT_COMPLETE, pf->state))
+		if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state))
 			ice_init_phy_user_cfg(pi);
 
 		/* PHY settings are reset on media insertion, reconfigure
@@ -2038,8 +2038,8 @@ static void ice_service_task(struct work_struct *work)
 
 	/* bail if a reset/recovery cycle is pending or rebuild failed */
 	if (ice_is_reset_in_progress(pf->state) ||
-	    test_bit(__ICE_SUSPENDED, pf->state) ||
-	    test_bit(__ICE_NEEDS_RESTART, pf->state)) {
+	    test_bit(ICE_SUSPENDED, pf->state) ||
+	    test_bit(ICE_NEEDS_RESTART, pf->state)) {
 		ice_service_task_complete(pf);
 		return;
 	}
@@ -2060,7 +2060,8 @@ static void ice_service_task(struct work_struct *work)
 	ice_clean_mailboxq_subtask(pf);
 	ice_sync_arfs_fltrs(pf);
 	ice_flush_fdir_ctx(pf);
-	/* Clear __ICE_SERVICE_SCHED flag to allow scheduling next event */
+
+	/* Clear ICE_SERVICE_SCHED flag to allow scheduling next event */
 	ice_service_task_complete(pf);
 
 	/* If the tasks have taken longer than one service timer period
@@ -2068,11 +2069,11 @@ static void ice_service_task(struct work_struct *work)
 	 * schedule the service task now.
 	 */
 	if (time_after(jiffies, (start_time + pf->serv_tmr_period)) ||
-	    test_bit(__ICE_MDD_EVENT_PENDING, pf->state) ||
-	    test_bit(__ICE_VFLR_EVENT_PENDING, pf->state) ||
-	    test_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state) ||
-	    test_bit(__ICE_FD_VF_FLUSH_CTX, pf->state) ||
-	    test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state))
+	    test_bit(ICE_MDD_EVENT_PENDING, pf->state) ||
+	    test_bit(ICE_VFLR_EVENT_PENDING, pf->state) ||
+	    test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state) ||
+	    test_bit(ICE_FD_VF_FLUSH_CTX, pf->state) ||
+	    test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state))
 		mod_timer(&pf->serv_tmr, jiffies);
 }
 
@@ -2102,7 +2103,7 @@ int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset)
 	struct device *dev = ice_pf_to_dev(pf);
 
 	/* bail out if earlier reset has failed */
-	if (test_bit(__ICE_RESET_FAILED, pf->state)) {
+	if (test_bit(ICE_RESET_FAILED, pf->state)) {
 		dev_dbg(dev, "earlier reset has failed\n");
 		return -EIO;
 	}
@@ -2114,13 +2115,13 @@ int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset)
 
 	switch (reset) {
 	case ICE_RESET_PFR:
-		set_bit(__ICE_PFR_REQ, pf->state);
+		set_bit(ICE_PFR_REQ, pf->state);
 		break;
 	case ICE_RESET_CORER:
-		set_bit(__ICE_CORER_REQ, pf->state);
+		set_bit(ICE_CORER_REQ, pf->state);
 		break;
 	case ICE_RESET_GLOBR:
-		set_bit(__ICE_GLOBR_REQ, pf->state);
+		set_bit(ICE_GLOBR_REQ, pf->state);
 		break;
 	default:
 		return -EINVAL;
@@ -2625,8 +2626,8 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
 	u32 oicr, ena_mask;
 
 	dev = ice_pf_to_dev(pf);
-	set_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state);
-	set_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state);
+	set_bit(ICE_ADMINQ_EVENT_PENDING, pf->state);
+	set_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state);
 
 	oicr = rd32(hw, PFINT_OICR);
 	ena_mask = rd32(hw, PFINT_OICR_ENA);
@@ -2638,18 +2639,18 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
 
 	if (oicr & PFINT_OICR_MAL_DETECT_M) {
 		ena_mask &= ~PFINT_OICR_MAL_DETECT_M;
-		set_bit(__ICE_MDD_EVENT_PENDING, pf->state);
+		set_bit(ICE_MDD_EVENT_PENDING, pf->state);
 	}
 	if (oicr & PFINT_OICR_VFLR_M) {
 		/* disable any further VFLR event notifications */
-		if (test_bit(__ICE_VF_RESETS_DISABLED, pf->state)) {
+		if (test_bit(ICE_VF_RESETS_DISABLED, pf->state)) {
 			u32 reg = rd32(hw, PFINT_OICR_ENA);
 
 			reg &= ~PFINT_OICR_VFLR_M;
 			wr32(hw, PFINT_OICR_ENA, reg);
 		} else {
 			ena_mask &= ~PFINT_OICR_VFLR_M;
-			set_bit(__ICE_VFLR_EVENT_PENDING, pf->state);
+			set_bit(ICE_VFLR_EVENT_PENDING, pf->state);
 		}
 	}
 
@@ -2675,13 +2676,13 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
 		 * We also make note of which reset happened so that peer
 		 * devices/drivers can be informed.
 		 */
-		if (!test_and_set_bit(__ICE_RESET_OICR_RECV, pf->state)) {
+		if (!test_and_set_bit(ICE_RESET_OICR_RECV, pf->state)) {
 			if (reset == ICE_RESET_CORER)
-				set_bit(__ICE_CORER_RECV, pf->state);
+				set_bit(ICE_CORER_RECV, pf->state);
 			else if (reset == ICE_RESET_GLOBR)
-				set_bit(__ICE_GLOBR_RECV, pf->state);
+				set_bit(ICE_GLOBR_RECV, pf->state);
 			else
-				set_bit(__ICE_EMPR_RECV, pf->state);
+				set_bit(ICE_EMPR_RECV, pf->state);
 
 			/* There are couple of different bits at play here.
 			 * hw->reset_ongoing indicates whether the hardware is
@@ -2689,7 +2690,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
 			 * is received and set back to false after the driver
 			 * has determined that the hardware is out of reset.
 			 *
-			 * __ICE_RESET_OICR_RECV in pf->state indicates
+			 * ICE_RESET_OICR_RECV in pf->state indicates
 			 * that a post reset rebuild is required before the
 			 * driver is operational again. This is set above.
 			 *
@@ -2717,7 +2718,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
 		if (oicr & (PFINT_OICR_PE_CRITERR_M |
 			    PFINT_OICR_PCI_EXCEPTION_M |
 			    PFINT_OICR_ECC_ERR_M)) {
-			set_bit(__ICE_PFR_REQ, pf->state);
+			set_bit(ICE_PFR_REQ, pf->state);
 			ice_service_task_schedule(pf);
 		}
 	}
@@ -3321,7 +3322,7 @@ static int ice_init_pf(struct ice_pf *pf)
 	timer_setup(&pf->serv_tmr, ice_service_timer, 0);
 	pf->serv_tmr_period = HZ;
 	INIT_WORK(&pf->serv_task, ice_service_task);
-	clear_bit(__ICE_SERVICE_SCHED, pf->state);
+	clear_bit(ICE_SERVICE_SCHED, pf->state);
 
 	mutex_init(&pf->avail_q_mutex);
 	pf->avail_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL);
@@ -3530,7 +3531,7 @@ int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx)
 	if (!new_rx && !new_tx)
 		return -EINVAL;
 
-	while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) {
+	while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) {
 		timeout--;
 		if (!timeout)
 			return -EBUSY;
@@ -3554,7 +3555,7 @@ int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx)
 	ice_pf_dcb_recfg(pf);
 	ice_vsi_open(vsi);
 done:
-	clear_bit(__ICE_CFG_BUSY, pf->state);
+	clear_bit(ICE_CFG_BUSY, pf->state);
 	return err;
 }
 
@@ -4020,9 +4021,9 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 
 	pf->pdev = pdev;
 	pci_set_drvdata(pdev, pf);
-	set_bit(__ICE_DOWN, pf->state);
+	set_bit(ICE_DOWN, pf->state);
 	/* Disable service task until DOWN bit is cleared */
-	set_bit(__ICE_SERVICE_DIS, pf->state);
+	set_bit(ICE_SERVICE_DIS, pf->state);
 
 	hw = &pf->hw;
 	hw->hw_addr = pcim_iomap_table(pdev)[ICE_BAR0];
@@ -4162,7 +4163,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 		goto err_alloc_sw_unroll;
 	}
 
-	clear_bit(__ICE_SERVICE_DIS, pf->state);
+	clear_bit(ICE_SERVICE_DIS, pf->state);
 
 	/* tell the firmware we are up */
 	err = ice_send_version(pf);
@@ -4256,16 +4257,15 @@ probe_done:
 		goto err_netdev_reg;
 
 	/* ready to go, so clear down state bit */
-	clear_bit(__ICE_DOWN, pf->state);
-
+	clear_bit(ICE_DOWN, pf->state);
 	return 0;
 
 err_netdev_reg:
 err_send_version_unroll:
 	ice_vsi_release_all(pf);
 err_alloc_sw_unroll:
-	set_bit(__ICE_SERVICE_DIS, pf->state);
-	set_bit(__ICE_DOWN, pf->state);
+	set_bit(ICE_SERVICE_DIS, pf->state);
+	set_bit(ICE_DOWN, pf->state);
 	devm_kfree(dev, pf->first_sw);
 err_msix_misc_unroll:
 	ice_free_irq_msix_misc(pf);
@@ -4365,11 +4365,11 @@ static void ice_remove(struct pci_dev *pdev)
 	}
 
 	if (test_bit(ICE_FLAG_SRIOV_ENA, pf->flags)) {
-		set_bit(__ICE_VF_RESETS_DISABLED, pf->state);
+		set_bit(ICE_VF_RESETS_DISABLED, pf->state);
 		ice_free_vfs(pf);
 	}
 
-	set_bit(__ICE_DOWN, pf->state);
+	set_bit(ICE_DOWN, pf->state);
 	ice_service_task_stop(pf);
 
 	ice_aq_cancel_waiting_tasks(pf);
@@ -4529,13 +4529,13 @@ static int __maybe_unused ice_suspend(struct device *dev)
 	disabled = ice_service_task_stop(pf);
 
 	/* Already suspended?, then there is nothing to do */
-	if (test_and_set_bit(__ICE_SUSPENDED, pf->state)) {
+	if (test_and_set_bit(ICE_SUSPENDED, pf->state)) {
 		if (!disabled)
 			ice_service_task_restart(pf);
 		return 0;
 	}
 
-	if (test_bit(__ICE_DOWN, pf->state) ||
+	if (test_bit(ICE_DOWN, pf->state) ||
 	    ice_is_reset_in_progress(pf->state)) {
 		dev_err(dev, "can't suspend device in reset or already down\n");
 		if (!disabled)
@@ -4607,16 +4607,16 @@ static int __maybe_unused ice_resume(struct device *dev)
 	if (ret)
 		dev_err(dev, "Cannot restore interrupt scheme: %d\n", ret);
 
-	clear_bit(__ICE_DOWN, pf->state);
+	clear_bit(ICE_DOWN, pf->state);
 	/* Now perform PF reset and rebuild */
 	reset_type = ICE_RESET_PFR;
 	/* re-enable service task for reset, but allow reset to schedule it */
-	clear_bit(__ICE_SERVICE_DIS, pf->state);
+	clear_bit(ICE_SERVICE_DIS, pf->state);
 
 	if (ice_schedule_reset(pf, reset_type))
 		dev_err(dev, "Reset during resume failed.\n");
 
-	clear_bit(__ICE_SUSPENDED, pf->state);
+	clear_bit(ICE_SUSPENDED, pf->state);
 	ice_service_task_restart(pf);
 
 	/* Restart the service task */
@@ -4645,11 +4645,11 @@ ice_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t err)
 		return PCI_ERS_RESULT_DISCONNECT;
 	}
 
-	if (!test_bit(__ICE_SUSPENDED, pf->state)) {
+	if (!test_bit(ICE_SUSPENDED, pf->state)) {
 		ice_service_task_stop(pf);
 
-		if (!test_bit(__ICE_PREPARED_FOR_RESET, pf->state)) {
-			set_bit(__ICE_PFR_REQ, pf->state);
+		if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) {
+			set_bit(ICE_PFR_REQ, pf->state);
 			ice_prepare_for_reset(pf);
 		}
 	}
@@ -4716,7 +4716,7 @@ static void ice_pci_err_resume(struct pci_dev *pdev)
 		return;
 	}
 
-	if (test_bit(__ICE_SUSPENDED, pf->state)) {
+	if (test_bit(ICE_SUSPENDED, pf->state)) {
 		dev_dbg(&pdev->dev, "%s failed to resume normal operations!\n",
 			__func__);
 		return;
@@ -4737,11 +4737,11 @@ static void ice_pci_err_reset_prepare(struct pci_dev *pdev)
 {
 	struct ice_pf *pf = pci_get_drvdata(pdev);
 
-	if (!test_bit(__ICE_SUSPENDED, pf->state)) {
+	if (!test_bit(ICE_SUSPENDED, pf->state)) {
 		ice_service_task_stop(pf);
 
-		if (!test_bit(__ICE_PREPARED_FOR_RESET, pf->state)) {
-			set_bit(__ICE_PFR_REQ, pf->state);
+		if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) {
+			set_bit(ICE_PFR_REQ, pf->state);
 			ice_prepare_for_reset(pf);
 		}
 	}
@@ -4888,7 +4888,7 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi)
 		return 0;
 	}
 
-	if (test_bit(__ICE_DOWN, pf->state) ||
+	if (test_bit(ICE_DOWN, pf->state) ||
 	    ice_is_reset_in_progress(pf->state)) {
 		netdev_err(netdev, "can't set mac %pM. device not ready\n",
 			   mac);
@@ -5373,7 +5373,7 @@ void ice_update_vsi_stats(struct ice_vsi *vsi)
 	struct ice_pf *pf = vsi->back;
 
 	if (test_bit(ICE_VSI_DOWN, vsi->state) ||
-	    test_bit(__ICE_CFG_BUSY, pf->state))
+	    test_bit(ICE_CFG_BUSY, pf->state))
 		return;
 
 	/* get stats as recorded by Tx/Rx rings */
@@ -5625,7 +5625,7 @@ int ice_down(struct ice_vsi *vsi)
 	int i, tx_err, rx_err, link_err = 0;
 
 	/* Caller of this function is expected to set the
-	 * vsi->state __ICE_DOWN bit
+	 * vsi->state ICE_DOWN bit
 	 */
 	if (vsi->netdev) {
 		netif_carrier_off(vsi->netdev);
@@ -5973,7 +5973,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
 	enum ice_status ret;
 	int err;
 
-	if (test_bit(__ICE_DOWN, pf->state))
+	if (test_bit(ICE_DOWN, pf->state))
 		goto clear_recovery;
 
 	dev_dbg(dev, "rebuilding PF after reset_type=%d\n", reset_type);
@@ -6089,7 +6089,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
 	ice_replay_post(hw);
 
 	/* if we get here, reset flow is successful */
-	clear_bit(__ICE_RESET_FAILED, pf->state);
+	clear_bit(ICE_RESET_FAILED, pf->state);
 	return;
 
 err_vsi_rebuild:
@@ -6097,10 +6097,10 @@ err_sched_init_port:
 	ice_sched_cleanup_all(hw);
 err_init_ctrlq:
 	ice_shutdown_all_ctrlq(hw);
-	set_bit(__ICE_RESET_FAILED, pf->state);
+	set_bit(ICE_RESET_FAILED, pf->state);
 clear_recovery:
 	/* set this bit in PF state to control service task scheduling */
-	set_bit(__ICE_NEEDS_RESTART, pf->state);
+	set_bit(ICE_NEEDS_RESTART, pf->state);
 	dev_err(dev, "Rebuild failed, unload and reload driver\n");
 }
 
@@ -6622,19 +6622,19 @@ static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 
 	switch (pf->tx_timeout_recovery_level) {
 	case 1:
-		set_bit(__ICE_PFR_REQ, pf->state);
+		set_bit(ICE_PFR_REQ, pf->state);
 		break;
 	case 2:
-		set_bit(__ICE_CORER_REQ, pf->state);
+		set_bit(ICE_CORER_REQ, pf->state);
 		break;
 	case 3:
-		set_bit(__ICE_GLOBR_REQ, pf->state);
+		set_bit(ICE_GLOBR_REQ, pf->state);
 		break;
 	default:
 		netdev_err(netdev, "tx_timeout recovery unsuccessful, device is in unrecoverable state.\n");
-		set_bit(__ICE_DOWN, pf->state);
+		set_bit(ICE_DOWN, pf->state);
 		set_bit(ICE_VSI_NEEDS_RESTART, vsi->state);
-		set_bit(__ICE_SERVICE_DIS, pf->state);
+		set_bit(ICE_SERVICE_DIS, pf->state);
 		break;
 	}
 
@@ -6685,7 +6685,7 @@ int ice_open_internal(struct net_device *netdev)
 	enum ice_status status;
 	int err;
 
-	if (test_bit(__ICE_NEEDS_RESTART, pf->state)) {
+	if (test_bit(ICE_NEEDS_RESTART, pf->state)) {
 		netdev_err(netdev, "driver needs to be unloaded and reloaded\n");
 		return -EIO;
 	}
@@ -6703,7 +6703,7 @@ int ice_open_internal(struct net_device *netdev)
 	/* Set PHY if there is media, otherwise, turn off PHY */
 	if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
 		clear_bit(ICE_FLAG_NO_MEDIA, pf->flags);
-		if (!test_bit(__ICE_PHY_INIT_COMPLETE, pf->state)) {
+		if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state)) {
 			err = ice_init_phy_user_cfg(pi);
 			if (err) {
 				netdev_err(netdev, "Failed to initialize PHY settings, error %d\n",
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
index 1f4ba38b1599..eee180d8c024 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
@@ -1548,7 +1548,7 @@ static void ice_vf_fdir_timer(struct timer_list *t)
 	ctx_done->v_opcode = ctx_irq->v_opcode;
 	spin_unlock_irqrestore(&fdir->ctx_lock, flags);
 
-	set_bit(__ICE_FD_VF_FLUSH_CTX, pf->state);
+	set_bit(ICE_FD_VF_FLUSH_CTX, pf->state);
 	ice_service_task_schedule(pf);
 }
 
@@ -1596,7 +1596,7 @@ ice_vc_fdir_irq_handler(struct ice_vsi *ctrl_vsi,
 	if (!ret)
 		dev_err(dev, "VF %d: Unexpected inactive timer!\n", vf->vf_id);
 
-	set_bit(__ICE_FD_VF_FLUSH_CTX, pf->state);
+	set_bit(ICE_FD_VF_FLUSH_CTX, pf->state);
 	ice_service_task_schedule(pf);
 }
 
@@ -1847,7 +1847,7 @@ void ice_flush_fdir_ctx(struct ice_pf *pf)
 {
 	int i;
 
-	if (!test_and_clear_bit(__ICE_FD_VF_FLUSH_CTX, pf->state))
+	if (!test_and_clear_bit(ICE_FD_VF_FLUSH_CTX, pf->state))
 		return;
 
 	ice_for_each_vf(pf, i) {
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index e68d52a6b11d..a3d8d06b1e3f 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -371,7 +371,7 @@ void ice_free_vfs(struct ice_pf *pf)
 	if (!pf->vf)
 		return;
 
-	while (test_and_set_bit(__ICE_VF_DIS, pf->state))
+	while (test_and_set_bit(ICE_VF_DIS, pf->state))
 		usleep_range(1000, 2000);
 
 	/* Disable IOV before freeing resources. This lets any VF drivers
@@ -424,7 +424,7 @@ void ice_free_vfs(struct ice_pf *pf)
 			wr32(hw, GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx));
 		}
 	}
-	clear_bit(__ICE_VF_DIS, pf->state);
+	clear_bit(ICE_VF_DIS, pf->state);
 	clear_bit(ICE_FLAG_SRIOV_ENA, pf->flags);
 }
 
@@ -1258,7 +1258,7 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
 		return false;
 
 	/* If VFs have been disabled, there is no need to reset */
-	if (test_and_set_bit(__ICE_VF_DIS, pf->state))
+	if (test_and_set_bit(ICE_VF_DIS, pf->state))
 		return false;
 
 	/* Begin reset on all VFs at once */
@@ -1314,7 +1314,7 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
 	}
 
 	ice_flush(hw);
-	clear_bit(__ICE_VF_DIS, pf->state);
+	clear_bit(ICE_VF_DIS, pf->state);
 
 	return true;
 }
@@ -1334,7 +1334,7 @@ static bool ice_is_vf_disabled(struct ice_vf *vf)
 	 * means something else is resetting the VF, so we shouldn't continue.
 	 * Otherwise, set disable VF state bit for actual reset, and continue.
 	 */
-	return (test_bit(__ICE_VF_DIS, pf->state) ||
+	return (test_bit(ICE_VF_DIS, pf->state) ||
 		test_bit(ICE_VF_STATE_DIS, vf->vf_states));
 }
 
@@ -1359,7 +1359,7 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
 
 	dev = ice_pf_to_dev(pf);
 
-	if (test_bit(__ICE_VF_RESETS_DISABLED, pf->state)) {
+	if (test_bit(ICE_VF_RESETS_DISABLED, pf->state)) {
 		dev_dbg(dev, "Trying to reset VF %d, but all VF resets are disabled\n",
 			vf->vf_id);
 		return true;
@@ -1651,7 +1651,7 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs)
 	/* Disable global interrupt 0 so we don't try to handle the VFLR. */
 	wr32(hw, GLINT_DYN_CTL(pf->oicr_idx),
 	     ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S);
-	set_bit(__ICE_OICR_INTR_DIS, pf->state);
+	set_bit(ICE_OICR_INTR_DIS, pf->state);
 	ice_flush(hw);
 
 	ret = pci_enable_sriov(pf->pdev, num_vfs);
@@ -1679,7 +1679,7 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs)
 		goto err_unroll_sriov;
 	}
 
-	clear_bit(__ICE_VF_DIS, pf->state);
+	clear_bit(ICE_VF_DIS, pf->state);
 	return 0;
 
 err_unroll_sriov:
@@ -1691,7 +1691,7 @@ err_pci_disable_sriov:
 err_unroll_intr:
 	/* rearm interrupts here */
 	ice_irq_dynamic_ena(hw, NULL, NULL);
-	clear_bit(__ICE_OICR_INTR_DIS, pf->state);
+	clear_bit(ICE_OICR_INTR_DIS, pf->state);
 	return ret;
 }
 
@@ -1809,7 +1809,7 @@ void ice_process_vflr_event(struct ice_pf *pf)
 	unsigned int vf_id;
 	u32 reg;
 
-	if (!test_and_clear_bit(__ICE_VFLR_EVENT_PENDING, pf->state) ||
+	if (!test_and_clear_bit(ICE_VFLR_EVENT_PENDING, pf->state) ||
 	    !pf->num_alloc_vfs)
 		return;
 
@@ -4194,7 +4194,7 @@ void ice_print_vfs_mdd_events(struct ice_pf *pf)
 	int i;
 
 	/* check that there are pending MDD events to print */
-	if (!test_and_clear_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state))
+	if (!test_and_clear_bit(ICE_MDD_VF_PRINT_PENDING, pf->state))
 		return;
 
 	/* VF MDD event logs are rate limited to one second intervals */
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 17ab8ef024ad..b881b650af98 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -159,7 +159,7 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
 	rx_ring = vsi->rx_rings[q_idx];
 	q_vector = rx_ring->q_vector;
 
-	while (test_and_set_bit(__ICE_CFG_BUSY, vsi->state)) {
+	while (test_and_set_bit(ICE_CFG_BUSY, vsi->state)) {
 		timeout--;
 		if (!timeout)
 			return -EBUSY;
@@ -249,7 +249,7 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx)
 	if (err)
 		goto free_buf;
 
-	clear_bit(__ICE_CFG_BUSY, vsi->state);
+	clear_bit(ICE_CFG_BUSY, vsi->state);
 	ice_qvec_toggle_napi(vsi, q_vector, true);
 	ice_qvec_ena_irq(vsi, q_vector);
 
@@ -758,7 +758,7 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
 	struct ice_vsi *vsi = np->vsi;
 	struct ice_ring *ring;
 
-	if (test_bit(__ICE_DOWN, vsi->state))
+	if (test_bit(ICE_DOWN, vsi->state))
 		return -ENETDOWN;
 
 	if (!ice_is_xdp_ena_vsi(vsi))
-- 
cgit v1.2.3


From a476d72abe6cdd2cccc3dbf5a844286cfe9684ed Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Tue, 2 Mar 2021 10:15:41 -0800
Subject: ice: Add new VSI states to track netdev alloc/registration

Add two new VSI states, one to track if a netdev for the VSI has been
allocated and the other to track if the netdev has been registered.
Call unregister_netdev/free_netdev only when the corresponding state
bits are set.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h      |  2 ++
 drivers/net/ethernet/intel/ice/ice_lib.c  | 21 +++++++++++++++------
 drivers/net/ethernet/intel/ice/ice_main.c |  5 +++++
 3 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 2cb09af3148c..5e1a680e8a8e 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -241,6 +241,8 @@ enum ice_pf_state {
 enum ice_vsi_state {
 	ICE_VSI_DOWN,
 	ICE_VSI_NEEDS_RESTART,
+	ICE_VSI_NETDEV_ALLOCD,
+	ICE_VSI_NETDEV_REGISTERED,
 	ICE_VSI_UMAC_FLTR_CHANGED,
 	ICE_VSI_MMAC_FLTR_CHANGED,
 	ICE_VSI_VLAN_FLTR_CHANGED,
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 162f01b42147..40a9b034d73b 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2752,11 +2752,14 @@ int ice_vsi_release(struct ice_vsi *vsi)
 	 * PF that is running the work queue items currently. This is done to
 	 * avoid check_flush_dependency() warning on this wq
 	 */
-	if (vsi->netdev && !ice_is_reset_in_progress(pf->state)) {
+	if (vsi->netdev && !ice_is_reset_in_progress(pf->state) &&
+	    (test_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state))) {
 		unregister_netdev(vsi->netdev);
-		ice_devlink_destroy_port(vsi);
+		clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state);
 	}
 
+	ice_devlink_destroy_port(vsi);
+
 	if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
 		ice_rss_clean(vsi);
 
@@ -2811,10 +2814,16 @@ int ice_vsi_release(struct ice_vsi *vsi)
 	ice_vsi_delete(vsi);
 	ice_vsi_free_q_vectors(vsi);
 
-	/* make sure unregister_netdev() was called by checking ICE_DOWN */
-	if (vsi->netdev && test_bit(ICE_VSI_DOWN, vsi->state)) {
-		free_netdev(vsi->netdev);
-		vsi->netdev = NULL;
+	if (vsi->netdev) {
+		if (test_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state)) {
+			unregister_netdev(vsi->netdev);
+			clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state);
+		}
+		if (test_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state)) {
+			free_netdev(vsi->netdev);
+			vsi->netdev = NULL;
+			clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state);
+		}
 	}
 
 	if (vsi->type == ICE_VSI_VF &&
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 032101680e09..035c593bce61 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2977,6 +2977,7 @@ static int ice_cfg_netdev(struct ice_vsi *vsi)
 	if (!netdev)
 		return -ENOMEM;
 
+	set_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state);
 	vsi->netdev = netdev;
 	np = netdev_priv(netdev);
 	np->vsi = vsi;
@@ -3189,6 +3190,7 @@ unroll_napi_add:
 	if (vsi) {
 		ice_napi_del(vsi);
 		if (vsi->netdev) {
+			clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state);
 			free_netdev(vsi->netdev);
 			vsi->netdev = NULL;
 		}
@@ -3958,6 +3960,7 @@ static int ice_register_netdev(struct ice_pf *pf)
 	if (err)
 		goto err_register_netdev;
 
+	set_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state);
 	netif_carrier_off(vsi->netdev);
 	netif_tx_stop_all_queues(vsi->netdev);
 	err = ice_devlink_create_port(vsi);
@@ -3969,9 +3972,11 @@ static int ice_register_netdev(struct ice_pf *pf)
 	return 0;
 err_devlink_create:
 	unregister_netdev(vsi->netdev);
+	clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state);
 err_register_netdev:
 	free_netdev(vsi->netdev);
 	vsi->netdev = NULL;
+	clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state);
 	return err;
 }
 
-- 
cgit v1.2.3


From b8b4772377dd8a916479796c8a8c5425f937fcaf Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Wed, 31 Mar 2021 14:16:56 -0700
Subject: ice: refactor interrupt moderation writes

Introduce several new helpers for writing ITR and GLINT_RATE
registers, and refactor the code calling them.  This resulted
in removal of several duplicate functions and rolled a bunch
of simple code back into the calling routines.

In particular this removes some code that was doing both
a store and a set in a helper function, which seems better
done as separate tasks in the caller (and generally takes
less lines of code even with a tiny bit of repetition).

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_base.c    |  22 +---
 drivers/net/ethernet/intel/ice/ice_ethtool.c |  17 +--
 drivers/net/ethernet/intel/ice/ice_lib.c     | 171 +++++++++++++++------------
 drivers/net/ethernet/intel/ice/ice_lib.h     |   3 +-
 drivers/net/ethernet/intel/ice/ice_xsk.c     |   3 -
 5 files changed, 112 insertions(+), 104 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index be26775a7dfe..21eb5d447d31 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -740,25 +740,13 @@ void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector)
 {
 	ice_cfg_itr_gran(hw);
 
-	if (q_vector->num_ring_rx) {
-		struct ice_ring_container *rc = &q_vector->rx;
-
-		rc->target_itr = ITR_TO_REG(rc->itr_setting);
-		rc->next_update = jiffies + 1;
-		rc->current_itr = rc->target_itr;
-		wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
-		     ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
-	}
+	if (q_vector->num_ring_rx)
+		ice_write_itr(&q_vector->rx, q_vector->rx.itr_setting);
 
-	if (q_vector->num_ring_tx) {
-		struct ice_ring_container *rc = &q_vector->tx;
+	if (q_vector->num_ring_tx)
+		ice_write_itr(&q_vector->tx, q_vector->tx.itr_setting);
 
-		rc->target_itr = ITR_TO_REG(rc->itr_setting);
-		rc->next_update = jiffies + 1;
-		rc->current_itr = rc->target_itr;
-		wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
-		     ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
-	}
+	ice_write_intrl(q_vector, q_vector->intrl);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index f2bc8f1e86cc..e273560d9e6e 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -3636,9 +3636,8 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec,
 		}
 		if (ec->rx_coalesce_usecs_high != rc->ring->q_vector->intrl) {
 			rc->ring->q_vector->intrl = ec->rx_coalesce_usecs_high;
-			wr32(&pf->hw, GLINT_RATE(rc->ring->q_vector->reg_idx),
-			     ice_intrl_usec_to_reg(ec->rx_coalesce_usecs_high,
-						   pf->hw.intrl_gran));
+			ice_write_intrl(rc->ring->q_vector,
+					ec->rx_coalesce_usecs_high);
 		}
 
 		use_adaptive_coalesce = ec->use_adaptive_rx_coalesce;
@@ -3672,10 +3671,15 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec,
 	if (use_adaptive_coalesce) {
 		rc->itr_setting |= ICE_ITR_DYNAMIC;
 	} else {
-		/* save the user set usecs */
+		/* store user facing value how it was set */
 		rc->itr_setting = coalesce_usecs;
-		/* device ITR granularity is in 2 usec increments */
-		rc->target_itr = ITR_REG_ALIGN(rc->itr_setting);
+		/* write the change to the register */
+		ice_write_itr(rc, coalesce_usecs);
+		/* force writes to take effect immediately, the flush shouldn't
+		 * be done in the functions above because the intent is for
+		 * them to do lazy writes.
+		 */
+		ice_flush(&pf->hw);
 	}
 
 	return 0;
@@ -3793,7 +3797,6 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec,
 		return -EINVAL;
 
 set_complete:
-
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 40a9b034d73b..bb47376386f4 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1773,7 +1773,7 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
  * This function converts a decimal interrupt rate limit in usecs to the format
  * expected by firmware.
  */
-u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran)
+static u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran)
 {
 	u32 val = intrl / gran;
 
@@ -1782,6 +1782,58 @@ u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran)
 	return 0;
 }
 
+/**
+ * ice_write_intrl - write throttle rate limit to interrupt specific register
+ * @q_vector: pointer to interrupt specific structure
+ * @intrl: throttle rate limit in microseconds to write
+ */
+void ice_write_intrl(struct ice_q_vector *q_vector, u8 intrl)
+{
+	struct ice_hw *hw = &q_vector->vsi->back->hw;
+
+	wr32(hw, GLINT_RATE(q_vector->reg_idx),
+	     ice_intrl_usec_to_reg(intrl, ICE_INTRL_GRAN_ABOVE_25));
+}
+
+/**
+ * __ice_write_itr - write throttle rate to register
+ * @q_vector: pointer to interrupt data structure
+ * @rc: pointer to ring container
+ * @itr: throttle rate in microseconds to write
+ */
+static void __ice_write_itr(struct ice_q_vector *q_vector,
+			    struct ice_ring_container *rc, u16 itr)
+{
+	struct ice_hw *hw = &q_vector->vsi->back->hw;
+
+	wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
+	     ITR_REG_ALIGN(itr) >> ICE_ITR_GRAN_S);
+}
+
+/**
+ * ice_write_itr - write throttle rate to queue specific register
+ * @rc: pointer to ring container
+ * @itr: throttle rate in microseconds to write
+ *
+ * This function is resilient to having the 0x8000 bit set which
+ * is indicating that an ITR value is "DYNAMIC", and will write
+ * the correct value to the register.
+ */
+void ice_write_itr(struct ice_ring_container *rc, u16 itr)
+{
+	struct ice_q_vector *q_vector;
+
+	if (!rc->ring)
+		return;
+
+	q_vector = rc->ring->q_vector;
+
+	/* clear the "DYNAMIC" bit */
+	itr = ITR_TO_REG(itr);
+
+	__ice_write_itr(q_vector, rc, itr);
+}
+
 /**
  * ice_vsi_cfg_msix - MSIX mode Interrupt Config in the HW
  * @vsi: the VSI being configured
@@ -1802,9 +1854,6 @@ void ice_vsi_cfg_msix(struct ice_vsi *vsi)
 
 		ice_cfg_itr(hw, q_vector);
 
-		wr32(hw, GLINT_RATE(reg_idx),
-		     ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran));
-
 		/* Both Transmit Queue Interrupt Cause Control register
 		 * and Receive Queue Interrupt Cause control register
 		 * expects MSIX_INDX field to be the vector index
@@ -2492,11 +2541,10 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi)
 
 	for (i = 0; i < vsi->num_q_vectors; i++) {
 		struct ice_q_vector *q_vector = vsi->q_vectors[i];
-		u16 reg_idx = q_vector->reg_idx;
 
-		wr32(hw, GLINT_ITR(ICE_IDX_ITR0, reg_idx), 0);
-		wr32(hw, GLINT_ITR(ICE_IDX_ITR1, reg_idx), 0);
+		ice_write_intrl(q_vector, 0);
 		for (q = 0; q < q_vector->num_ring_tx; q++) {
+			ice_write_itr(&q_vector->tx, 0);
 			wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0);
 			if (ice_is_xdp_ena_vsi(vsi)) {
 				u32 xdp_txq = txq + vsi->num_xdp_txq;
@@ -2507,6 +2555,7 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi)
 		}
 
 		for (q = 0; q < q_vector->num_ring_rx; q++) {
+			ice_write_itr(&q_vector->rx, 0);
 			wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), 0);
 			rxq++;
 		}
@@ -2843,47 +2892,6 @@ int ice_vsi_release(struct ice_vsi *vsi)
 	return 0;
 }
 
-/**
- * ice_vsi_rebuild_update_coalesce_intrl - set interrupt rate limit for a q_vector
- * @q_vector: pointer to q_vector which is being updated
- * @stored_intrl_setting: original INTRL setting
- *
- * Set coalesce param in q_vector and update these parameters in HW.
- */
-static void
-ice_vsi_rebuild_update_coalesce_intrl(struct ice_q_vector *q_vector,
-				      u16 stored_intrl_setting)
-{
-	struct ice_hw *hw = &q_vector->vsi->back->hw;
-
-	q_vector->intrl = stored_intrl_setting;
-	wr32(hw, GLINT_RATE(q_vector->reg_idx),
-	     ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran));
-}
-
-/**
- * ice_vsi_rebuild_update_coalesce_itr - set coalesce for a q_vector
- * @q_vector: pointer to q_vector which is being updated
- * @rc: pointer to ring container
- * @stored_itr_setting: original ITR setting
- *
- * Set coalesce param in q_vector and update these parameters in HW.
- */
-static void
-ice_vsi_rebuild_update_coalesce_itr(struct ice_q_vector *q_vector,
-				    struct ice_ring_container *rc,
-				    u16 stored_itr_setting)
-{
-	struct ice_hw *hw = &q_vector->vsi->back->hw;
-
-	rc->itr_setting = stored_itr_setting;
-
-	/* dynamic ITR values will be updated during Tx/Rx */
-	if (!ITR_IS_DYNAMIC(rc->itr_setting))
-		wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
-		     ITR_REG_ALIGN(rc->itr_setting) >> ICE_ITR_GRAN_S);
-}
-
 /**
  * ice_vsi_rebuild_get_coalesce - get coalesce from all q_vectors
  * @vsi: VSI connected with q_vectors
@@ -2927,6 +2935,7 @@ static void
 ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi,
 			     struct ice_coalesce_stored *coalesce, int size)
 {
+	struct ice_ring_container *rc;
 	int i;
 
 	if ((size && !coalesce) || !vsi)
@@ -2949,41 +2958,51 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi,
 		 *   rings is less than are allocated (this means the number of
 		 *   rings increased from previously), then write out the
 		 *   values in the first element
+		 *
+		 *   Also, always write the ITR, even if in ITR_IS_DYNAMIC
+		 *   as there is no harm because the dynamic algorithm
+		 *   will just overwrite.
 		 */
-		if (i < vsi->alloc_rxq && coalesce[i].rx_valid)
-			ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i],
-							    &vsi->q_vectors[i]->rx,
-							    coalesce[i].itr_rx);
-		else if (i < vsi->alloc_rxq)
-			ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i],
-							    &vsi->q_vectors[i]->rx,
-							    coalesce[0].itr_rx);
-
-		if (i < vsi->alloc_txq && coalesce[i].tx_valid)
-			ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i],
-							    &vsi->q_vectors[i]->tx,
-							    coalesce[i].itr_tx);
-		else if (i < vsi->alloc_txq)
-			ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i],
-							    &vsi->q_vectors[i]->tx,
-							    coalesce[0].itr_tx);
-
-		ice_vsi_rebuild_update_coalesce_intrl(vsi->q_vectors[i],
-						      coalesce[i].intrl);
+		if (i < vsi->alloc_rxq && coalesce[i].rx_valid) {
+			rc = &vsi->q_vectors[i]->rx;
+			rc->itr_setting = coalesce[i].itr_rx;
+			ice_write_itr(rc, rc->itr_setting);
+		} else if (i < vsi->alloc_rxq) {
+			rc = &vsi->q_vectors[i]->rx;
+			rc->itr_setting = coalesce[0].itr_rx;
+			ice_write_itr(rc, rc->itr_setting);
+		}
+
+		if (i < vsi->alloc_txq && coalesce[i].tx_valid) {
+			rc = &vsi->q_vectors[i]->tx;
+			rc->itr_setting = coalesce[i].itr_tx;
+			ice_write_itr(rc, rc->itr_setting);
+		} else if (i < vsi->alloc_txq) {
+			rc = &vsi->q_vectors[i]->tx;
+			rc->itr_setting = coalesce[0].itr_tx;
+			ice_write_itr(rc, rc->itr_setting);
+		}
+
+		vsi->q_vectors[i]->intrl = coalesce[i].intrl;
+		ice_write_intrl(vsi->q_vectors[i], coalesce[i].intrl);
 	}
 
 	/* the number of queue vectors increased so write whatever is in
 	 * the first element
 	 */
 	for (; i < vsi->num_q_vectors; i++) {
-		ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i],
-						    &vsi->q_vectors[i]->tx,
-						    coalesce[0].itr_tx);
-		ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i],
-						    &vsi->q_vectors[i]->rx,
-						    coalesce[0].itr_rx);
-		ice_vsi_rebuild_update_coalesce_intrl(vsi->q_vectors[i],
-						      coalesce[0].intrl);
+		/* transmit */
+		rc = &vsi->q_vectors[i]->tx;
+		rc->itr_setting = coalesce[0].itr_tx;
+		ice_write_itr(rc, rc->itr_setting);
+
+		/* receive */
+		rc = &vsi->q_vectors[i]->rx;
+		rc->itr_setting = coalesce[0].itr_rx;
+		ice_write_itr(rc, rc->itr_setting);
+
+		vsi->q_vectors[i]->intrl = coalesce[0].intrl;
+		ice_write_intrl(vsi->q_vectors[i], coalesce[0].intrl);
 	}
 }
 
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index 462c3ab7abad..f48c5ccb8036 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -95,7 +95,8 @@ void ice_vsi_cfg_frame_size(struct ice_vsi *vsi);
 
 int ice_status_to_errno(enum ice_status err);
 
-u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran);
+void ice_write_intrl(struct ice_q_vector *q_vector, u8 intrl);
+void ice_write_itr(struct ice_ring_container *rc, u16 itr);
 
 enum ice_status
 ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set);
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index b881b650af98..faa7b8d96adb 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -108,9 +108,6 @@ ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
 
 	ice_cfg_itr(hw, q_vector);
 
-	wr32(hw, GLINT_RATE(reg_idx),
-	     ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran));
-
 	ice_for_each_ring(ring, q_vector->tx)
 		ice_cfg_txq_interrupt(vsi, ring->reg_idx, reg_idx,
 				      q_vector->tx.itr_idx);
-- 
cgit v1.2.3


From cdf1f1f169179659621bb540575b3a9d1cd38072 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Wed, 31 Mar 2021 14:16:57 -0700
Subject: ice: replace custom AIM algorithm with kernel's DIM library

The ice driver has support for adaptive interrupt moderation, an
algorithm for tuning the interrupt rate dynamically. This algorithm
is based on various assumptions about ring size, socket buffer size,
link speed, SKB overhead, ethernet frame overhead and more.

The Linux kernel has support for a dynamic interrupt moderation
algorithm known as "dimlib". Replace the custom driver-specific
implementation of dynamic interrupt moderation with the kernel's
algorithm.

The Intel hardware has a different hardware implementation than the
originators of the dimlib code had to work with, which requires the
driver to use a slightly different set of inputs for the actual
moderation values, while getting all the advice from dimlib of
better/worse, shift left or right.

The change made for this implementation is to use a pair of values
for each of the 5 "slots" that the dimlib moderation expects, and
the driver will program those pairs when dimlib recommends a slot to
use. The currently implementation uses two tables, one for receive
and one for transmit, and the pairs of values in each slot set the
maximum delay of an interrupt and a maximum number of interrupts per
second (both expressed in microseconds).

There are two separate kinds of bugs fixed by using DIMLIB, one is
UDP single stream send was too slow, and the other is that 8K
ping-pong was going to the most aggressive moderation and has much
too high latency.

The overall result of using DIMLIB is that we meet or exceed our
performance expectations set based on the old algorithm.

Co-developed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/Kconfig           |   1 +
 drivers/net/ethernet/intel/ice/ice.h         |   5 +-
 drivers/net/ethernet/intel/ice/ice_ethtool.c |   6 +
 drivers/net/ethernet/intel/ice/ice_lib.c     |  13 +-
 drivers/net/ethernet/intel/ice/ice_main.c    | 108 ++++++++++
 drivers/net/ethernet/intel/ice/ice_txrx.c    | 303 +++++----------------------
 drivers/net/ethernet/intel/ice/ice_txrx.h    |   6 +-
 7 files changed, 175 insertions(+), 267 deletions(-)

diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index 5aa86318ed3e..c1d155690341 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -294,6 +294,7 @@ config ICE
 	tristate "Intel(R) Ethernet Connection E800 Series Support"
 	default n
 	depends on PCI_MSI
+	select DIMLIB
 	select NET_DEVLINK
 	select PLDMFW
 	help
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 5e1a680e8a8e..7ae10fd87265 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -36,6 +36,7 @@
 #include <linux/bpf.h>
 #include <linux/avf/virtchnl.h>
 #include <linux/cpu_rmap.h>
+#include <linux/dim.h>
 #include <net/devlink.h>
 #include <net/ipv6.h>
 #include <net/xdp_sock.h>
@@ -351,7 +352,7 @@ struct ice_q_vector {
 	u16 reg_idx;
 	u8 num_ring_rx;			/* total number of Rx rings in vector */
 	u8 num_ring_tx;			/* total number of Tx rings in vector */
-	u8 itr_countdown;		/* when 0 should adjust adaptive ITR */
+	u8 wb_on_itr:1;			/* if true, WB on ITR is enabled */
 	/* in usecs, need to use ice_intrl_to_usecs_reg() before writing this
 	 * value to the device
 	 */
@@ -366,6 +367,8 @@ struct ice_q_vector {
 	struct irq_affinity_notify affinity_notify;
 
 	char name[ICE_INT_NAME_STR_LEN];
+
+	u16 total_events;	/* net_dim(): number of interrupts processed */
 } ____cacheline_internodealigned_in_smp;
 
 enum ice_pf_flags {
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index e273560d9e6e..6c057eb6cc45 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -3634,6 +3634,12 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec,
 				    ICE_MAX_INTRL);
 			return -EINVAL;
 		}
+		if (ec->rx_coalesce_usecs_high != rc->ring->q_vector->intrl &&
+		    (ec->use_adaptive_rx_coalesce || ec->use_adaptive_tx_coalesce)) {
+			netdev_info(vsi->netdev, "Invalid value, %s-usecs-high cannot be changed if adaptive-tx or adaptive-rx is enabled\n",
+				    c_type_str);
+			return -EINVAL;
+		}
 		if (ec->rx_coalesce_usecs_high != rc->ring->q_vector->intrl) {
 			rc->ring->q_vector->intrl = ec->rx_coalesce_usecs_high;
 			ice_write_intrl(rc->ring->q_vector,
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index bb47376386f4..a080cf7bc2d1 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -385,6 +385,8 @@ static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data)
 	if (!q_vector->tx.ring && !q_vector->rx.ring)
 		return IRQ_HANDLED;
 
+	q_vector->total_events++;
+
 	napi_schedule(&q_vector->napi);
 
 	return IRQ_HANDLED;
@@ -3270,20 +3272,15 @@ out:
 /**
  * ice_update_ring_stats - Update ring statistics
  * @ring: ring to update
- * @cont: used to increment per-vector counters
  * @pkts: number of processed packets
  * @bytes: number of processed bytes
  *
  * This function assumes that caller has acquired a u64_stats_sync lock.
  */
-static void
-ice_update_ring_stats(struct ice_ring *ring, struct ice_ring_container *cont,
-		      u64 pkts, u64 bytes)
+static void ice_update_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes)
 {
 	ring->stats.bytes += bytes;
 	ring->stats.pkts += pkts;
-	cont->total_bytes += bytes;
-	cont->total_pkts += pkts;
 }
 
 /**
@@ -3295,7 +3292,7 @@ ice_update_ring_stats(struct ice_ring *ring, struct ice_ring_container *cont,
 void ice_update_tx_ring_stats(struct ice_ring *tx_ring, u64 pkts, u64 bytes)
 {
 	u64_stats_update_begin(&tx_ring->syncp);
-	ice_update_ring_stats(tx_ring, &tx_ring->q_vector->tx, pkts, bytes);
+	ice_update_ring_stats(tx_ring, pkts, bytes);
 	u64_stats_update_end(&tx_ring->syncp);
 }
 
@@ -3308,7 +3305,7 @@ void ice_update_tx_ring_stats(struct ice_ring *tx_ring, u64 pkts, u64 bytes)
 void ice_update_rx_ring_stats(struct ice_ring *rx_ring, u64 pkts, u64 bytes)
 {
 	u64_stats_update_begin(&rx_ring->syncp);
-	ice_update_ring_stats(rx_ring, &rx_ring->q_vector->rx, pkts, bytes);
+	ice_update_ring_stats(rx_ring, pkts, bytes);
 	u64_stats_update_end(&rx_ring->syncp);
 }
 
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 035c593bce61..1e023d163447 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -5196,6 +5196,105 @@ int ice_vsi_cfg(struct ice_vsi *vsi)
 	return err;
 }
 
+/* THEORY OF MODERATION:
+ * The below code creates custom DIM profiles for use by this driver, because
+ * the ice driver hardware works differently than the hardware that DIMLIB was
+ * originally made for. ice hardware doesn't have packet count limits that
+ * can trigger an interrupt, but it *does* have interrupt rate limit support,
+ * and this code adds that capability to be used by the driver when it's using
+ * DIMLIB. The DIMLIB code was always designed to be a suggestion to the driver
+ * for how to "respond" to traffic and interrupts, so this driver uses a
+ * slightly different set of moderation parameters to get best performance.
+ */
+struct ice_dim {
+	/* the throttle rate for interrupts, basically worst case delay before
+	 * an initial interrupt fires, value is stored in microseconds.
+	 */
+	u16 itr;
+	/* the rate limit for interrupts, which can cap a delay from a small
+	 * ITR at a certain amount of interrupts per second. f.e. a 2us ITR
+	 * could yield as much as 500,000 interrupts per second, but with a
+	 * 10us rate limit, it limits to 100,000 interrupts per second. Value
+	 * is stored in microseconds.
+	 */
+	u16 intrl;
+};
+
+/* Make a different profile for Rx that doesn't allow quite so aggressive
+ * moderation at the high end (it maxes out at 128us or about 8k interrupts a
+ * second. The INTRL/rate parameters here are only useful to cap small ITR
+ * values, which is why for larger ITR's - like 128, which can only generate
+ * 8k interrupts per second, there is no point to rate limit and the values
+ * are set to zero. The rate limit values do affect latency, and so must
+ * be reasonably small so to not impact latency sensitive tests.
+ */
+static const struct ice_dim rx_profile[] = {
+	{2, 10},
+	{8, 16},
+	{32, 0},
+	{96, 0},
+	{128, 0}
+};
+
+/* The transmit profile, which has the same sorts of values
+ * as the previous struct
+ */
+static const struct ice_dim tx_profile[] = {
+	{2, 10},
+	{8, 16},
+	{64, 0},
+	{128, 0},
+	{256, 0}
+};
+
+static void ice_tx_dim_work(struct work_struct *work)
+{
+	struct ice_ring_container *rc;
+	struct ice_q_vector *q_vector;
+	struct dim *dim;
+	u16 itr, intrl;
+
+	dim = container_of(work, struct dim, work);
+	rc = container_of(dim, struct ice_ring_container, dim);
+	q_vector = container_of(rc, struct ice_q_vector, tx);
+
+	if (dim->profile_ix >= ARRAY_SIZE(tx_profile))
+		dim->profile_ix = ARRAY_SIZE(tx_profile) - 1;
+
+	/* look up the values in our local table */
+	itr = tx_profile[dim->profile_ix].itr;
+	intrl = tx_profile[dim->profile_ix].intrl;
+
+	ice_write_itr(rc, itr);
+	ice_write_intrl(q_vector, intrl);
+
+	dim->state = DIM_START_MEASURE;
+}
+
+static void ice_rx_dim_work(struct work_struct *work)
+{
+	struct ice_ring_container *rc;
+	struct ice_q_vector *q_vector;
+	struct dim *dim;
+	u16 itr, intrl;
+
+	dim = container_of(work, struct dim, work);
+	rc = container_of(dim, struct ice_ring_container, dim);
+	q_vector = container_of(rc, struct ice_q_vector, rx);
+
+	if (dim->profile_ix >= ARRAY_SIZE(rx_profile))
+		dim->profile_ix = ARRAY_SIZE(rx_profile) - 1;
+
+	/* look up the values in our local table */
+	itr = rx_profile[dim->profile_ix].itr;
+	intrl = rx_profile[dim->profile_ix].intrl;
+
+	ice_write_itr(rc, itr);
+	ice_write_intrl(q_vector, intrl);
+
+	dim->state = DIM_START_MEASURE;
+}
+
 /**
  * ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI
  * @vsi: the VSI being configured
@@ -5210,6 +5309,12 @@ static void ice_napi_enable_all(struct ice_vsi *vsi)
 	ice_for_each_q_vector(vsi, q_idx) {
 		struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
 
+		INIT_WORK(&q_vector->tx.dim.work, ice_tx_dim_work);
+		q_vector->tx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+
+		INIT_WORK(&q_vector->rx.dim.work, ice_rx_dim_work);
+		q_vector->rx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+
 		if (q_vector->rx.ring || q_vector->tx.ring)
 			napi_enable(&q_vector->napi);
 	}
@@ -5618,6 +5723,9 @@ static void ice_napi_disable_all(struct ice_vsi *vsi)
 
 		if (q_vector->rx.ring || q_vector->tx.ring)
 			napi_disable(&q_vector->napi);
+
+		cancel_work_sync(&q_vector->tx.dim.work);
+		cancel_work_sync(&q_vector->rx.dim.work);
 	}
 }
 
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index dfdf2c1fa9d3..97c3efb932dc 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1223,216 +1223,50 @@ construct_skb:
 }
 
 /**
- * ice_adjust_itr_by_size_and_speed - Adjust ITR based on current traffic
- * @port_info: port_info structure containing the current link speed
- * @avg_pkt_size: average size of Tx or Rx packets based on clean routine
- * @itr: ITR value to update
+ * ice_net_dim - Update net DIM algorithm
+ * @q_vector: the vector associated with the interrupt
  *
- * Calculate how big of an increment should be applied to the ITR value passed
- * in based on wmem_default, SKB overhead, ethernet overhead, and the current
- * link speed.
+ * Create a DIM sample and notify net_dim() so that it can possibly decide
+ * a new ITR value based on incoming packets, bytes, and interrupts.
  *
- * The following is a calculation derived from:
- *  wmem_default / (size + overhead) = desired_pkts_per_int
- *  rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
- *  (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
- *
- * Assuming wmem_default is 212992 and overhead is 640 bytes per
- * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
- * formula down to:
- *
- *	 wmem_default * bits_per_byte * usecs_per_sec   pkt_size + 24
- * ITR = -------------------------------------------- * --------------
- *			     rate			pkt_size + 640
+ * This function is a no-op if the ring is not configured to dynamic ITR.
  */
-static unsigned int
-ice_adjust_itr_by_size_and_speed(struct ice_port_info *port_info,
-				 unsigned int avg_pkt_size,
-				 unsigned int itr)
+static void ice_net_dim(struct ice_q_vector *q_vector)
 {
-	switch (port_info->phy.link_info.link_speed) {
-	case ICE_AQ_LINK_SPEED_100GB:
-		itr += DIV_ROUND_UP(17 * (avg_pkt_size + 24),
-				    avg_pkt_size + 640);
-		break;
-	case ICE_AQ_LINK_SPEED_50GB:
-		itr += DIV_ROUND_UP(34 * (avg_pkt_size + 24),
-				    avg_pkt_size + 640);
-		break;
-	case ICE_AQ_LINK_SPEED_40GB:
-		itr += DIV_ROUND_UP(43 * (avg_pkt_size + 24),
-				    avg_pkt_size + 640);
-		break;
-	case ICE_AQ_LINK_SPEED_25GB:
-		itr += DIV_ROUND_UP(68 * (avg_pkt_size + 24),
-				    avg_pkt_size + 640);
-		break;
-	case ICE_AQ_LINK_SPEED_20GB:
-		itr += DIV_ROUND_UP(85 * (avg_pkt_size + 24),
-				    avg_pkt_size + 640);
-		break;
-	case ICE_AQ_LINK_SPEED_10GB:
-	default:
-		itr += DIV_ROUND_UP(170 * (avg_pkt_size + 24),
-				    avg_pkt_size + 640);
-		break;
-	}
-
-	if ((itr & ICE_ITR_MASK) > ICE_ITR_ADAPTIVE_MAX_USECS) {
-		itr &= ICE_ITR_ADAPTIVE_LATENCY;
-		itr += ICE_ITR_ADAPTIVE_MAX_USECS;
-	}
+	struct ice_ring_container *tx = &q_vector->tx;
+	struct ice_ring_container *rx = &q_vector->rx;
 
-	return itr;
-}
+	if (ITR_IS_DYNAMIC(tx->itr_setting)) {
+		struct dim_sample dim_sample = {};
+		u64 packets = 0, bytes = 0;
+		struct ice_ring *ring;
 
-/**
- * ice_update_itr - update the adaptive ITR value based on statistics
- * @q_vector: structure containing interrupt and ring information
- * @rc: structure containing ring performance data
- *
- * Stores a new ITR value based on packets and byte
- * counts during the last interrupt.  The advantage of per interrupt
- * computation is faster updates and more accurate ITR for the current
- * traffic pattern.  Constants in this function were computed
- * based on theoretical maximum wire speed and thresholds were set based
- * on testing data as well as attempting to minimize response time
- * while increasing bulk throughput.
- */
-static void
-ice_update_itr(struct ice_q_vector *q_vector, struct ice_ring_container *rc)
-{
-	unsigned long next_update = jiffies;
-	unsigned int packets, bytes, itr;
-	bool container_is_rx;
+		ice_for_each_ring(ring, q_vector->tx) {
+			packets += ring->stats.pkts;
+			bytes += ring->stats.bytes;
+		}
 
-	if (!rc->ring || !ITR_IS_DYNAMIC(rc->itr_setting))
-		return;
+		dim_update_sample(q_vector->total_events, packets, bytes,
+				  &dim_sample);
 
-	/* If itr_countdown is set it means we programmed an ITR within
-	 * the last 4 interrupt cycles. This has a side effect of us
-	 * potentially firing an early interrupt. In order to work around
-	 * this we need to throw out any data received for a few
-	 * interrupts following the update.
-	 */
-	if (q_vector->itr_countdown) {
-		itr = rc->target_itr;
-		goto clear_counts;
+		net_dim(&tx->dim, dim_sample);
 	}
 
-	container_is_rx = (&q_vector->rx == rc);
-	/* For Rx we want to push the delay up and default to low latency.
-	 * for Tx we want to pull the delay down and default to high latency.
-	 */
-	itr = container_is_rx ?
-		ICE_ITR_ADAPTIVE_MIN_USECS | ICE_ITR_ADAPTIVE_LATENCY :
-		ICE_ITR_ADAPTIVE_MAX_USECS | ICE_ITR_ADAPTIVE_LATENCY;
-
-	/* If we didn't update within up to 1 - 2 jiffies we can assume
-	 * that either packets are coming in so slow there hasn't been
-	 * any work, or that there is so much work that NAPI is dealing
-	 * with interrupt moderation and we don't need to do anything.
-	 */
-	if (time_after(next_update, rc->next_update))
-		goto clear_counts;
-
-	prefetch(q_vector->vsi->port_info);
-
-	packets = rc->total_pkts;
-	bytes = rc->total_bytes;
-
-	if (container_is_rx) {
-		/* If Rx there are 1 to 4 packets and bytes are less than
-		 * 9000 assume insufficient data to use bulk rate limiting
-		 * approach unless Tx is already in bulk rate limiting. We
-		 * are likely latency driven.
-		 */
-		if (packets && packets < 4 && bytes < 9000 &&
-		    (q_vector->tx.target_itr & ICE_ITR_ADAPTIVE_LATENCY)) {
-			itr = ICE_ITR_ADAPTIVE_LATENCY;
-			goto adjust_by_size_and_speed;
-		}
-	} else if (packets < 4) {
-		/* If we have Tx and Rx ITR maxed and Tx ITR is running in
-		 * bulk mode and we are receiving 4 or fewer packets just
-		 * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so
-		 * that the Rx can relax.
-		 */
-		if (rc->target_itr == ICE_ITR_ADAPTIVE_MAX_USECS &&
-		    (q_vector->rx.target_itr & ICE_ITR_MASK) ==
-		    ICE_ITR_ADAPTIVE_MAX_USECS)
-			goto clear_counts;
-	} else if (packets > 32) {
-		/* If we have processed over 32 packets in a single interrupt
-		 * for Tx assume we need to switch over to "bulk" mode.
-		 */
-		rc->target_itr &= ~ICE_ITR_ADAPTIVE_LATENCY;
-	}
+	if (ITR_IS_DYNAMIC(rx->itr_setting)) {
+		struct dim_sample dim_sample = {};
+		u64 packets = 0, bytes = 0;
+		struct ice_ring *ring;
 
-	/* We have no packets to actually measure against. This means
-	 * either one of the other queues on this vector is active or
-	 * we are a Tx queue doing TSO with too high of an interrupt rate.
-	 *
-	 * Between 4 and 56 we can assume that our current interrupt delay
-	 * is only slightly too low. As such we should increase it by a small
-	 * fixed amount.
-	 */
-	if (packets < 56) {
-		itr = rc->target_itr + ICE_ITR_ADAPTIVE_MIN_INC;
-		if ((itr & ICE_ITR_MASK) > ICE_ITR_ADAPTIVE_MAX_USECS) {
-			itr &= ICE_ITR_ADAPTIVE_LATENCY;
-			itr += ICE_ITR_ADAPTIVE_MAX_USECS;
+		ice_for_each_ring(ring, q_vector->rx) {
+			packets += ring->stats.pkts;
+			bytes += ring->stats.bytes;
 		}
-		goto clear_counts;
-	}
-
-	if (packets <= 256) {
-		itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr);
-		itr &= ICE_ITR_MASK;
-
-		/* Between 56 and 112 is our "goldilocks" zone where we are
-		 * working out "just right". Just report that our current
-		 * ITR is good for us.
-		 */
-		if (packets <= 112)
-			goto clear_counts;
 
-		/* If packet count is 128 or greater we are likely looking
-		 * at a slight overrun of the delay we want. Try halving
-		 * our delay to see if that will cut the number of packets
-		 * in half per interrupt.
-		 */
-		itr >>= 1;
-		itr &= ICE_ITR_MASK;
-		if (itr < ICE_ITR_ADAPTIVE_MIN_USECS)
-			itr = ICE_ITR_ADAPTIVE_MIN_USECS;
+		dim_update_sample(q_vector->total_events, packets, bytes,
+				  &dim_sample);
 
-		goto clear_counts;
+		net_dim(&rx->dim, dim_sample);
 	}
-
-	/* The paths below assume we are dealing with a bulk ITR since
-	 * number of packets is greater than 256. We are just going to have
-	 * to compute a value and try to bring the count under control,
-	 * though for smaller packet sizes there isn't much we can do as
-	 * NAPI polling will likely be kicking in sooner rather than later.
-	 */
-	itr = ICE_ITR_ADAPTIVE_BULK;
-
-adjust_by_size_and_speed:
-
-	/* based on checks above packets cannot be 0 so division is safe */
-	itr = ice_adjust_itr_by_size_and_speed(q_vector->vsi->port_info,
-					       bytes / packets, itr);
-
-clear_counts:
-	/* write back value */
-	rc->target_itr = itr;
-
-	/* next update should occur within next jiffy */
-	rc->next_update = next_update + 1;
-
-	rc->total_bytes = 0;
-	rc->total_pkts = 0;
 }
 
 /**
@@ -1456,72 +1290,35 @@ static u32 ice_buildreg_itr(u16 itr_idx, u16 itr)
 		(itr << (GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S));
 }
 
-/* The act of updating the ITR will cause it to immediately trigger. In order
- * to prevent this from throwing off adaptive update statistics we defer the
- * update so that it can only happen so often. So after either Tx or Rx are
- * updated we make the adaptive scheme wait until either the ITR completely
- * expires via the next_update expiration or we have been through at least
- * 3 interrupts.
- */
-#define ITR_COUNTDOWN_START 3
-
 /**
- * ice_update_ena_itr - Update ITR and re-enable MSIX interrupt
- * @q_vector: q_vector for which ITR is being updated and interrupt enabled
+ * ice_update_ena_itr - Update ITR moderation and re-enable MSI-X interrupt
+ * @q_vector: the vector associated with the interrupt to enable
+ *
+ * Update the net_dim() algorithm and re-enable the interrupt associated with
+ * this vector.
+ *
+ * If the VSI is down, the interrupt will not be re-enabled.
  */
 static void ice_update_ena_itr(struct ice_q_vector *q_vector)
 {
-	struct ice_ring_container *tx = &q_vector->tx;
-	struct ice_ring_container *rx = &q_vector->rx;
 	struct ice_vsi *vsi = q_vector->vsi;
 	u32 itr_val;
 
-	/* when exiting WB_ON_ITR just reset the countdown and let ITR
-	 * resume it's normal "interrupts-enabled" path
-	 */
-	if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE)
-		q_vector->itr_countdown = 0;
-
-	/* This will do nothing if dynamic updates are not enabled */
-	ice_update_itr(q_vector, tx);
-	ice_update_itr(q_vector, rx);
+	if (test_bit(ICE_DOWN, vsi->state))
+		return;
 
-	/* This block of logic allows us to get away with only updating
-	 * one ITR value with each interrupt. The idea is to perform a
-	 * pseudo-lazy update with the following criteria.
-	 *
-	 * 1. Rx is given higher priority than Tx if both are in same state
-	 * 2. If we must reduce an ITR that is given highest priority.
-	 * 3. We then give priority to increasing ITR based on amount.
+	/* When exiting WB_ON_ITR, let ITR resume its normal
+	 * interrupts-enabled path.
 	 */
-	if (rx->target_itr < rx->current_itr) {
-		/* Rx ITR needs to be reduced, this is highest priority */
-		itr_val = ice_buildreg_itr(rx->itr_idx, rx->target_itr);
-		rx->current_itr = rx->target_itr;
-		q_vector->itr_countdown = ITR_COUNTDOWN_START;
-	} else if ((tx->target_itr < tx->current_itr) ||
-		   ((rx->target_itr - rx->current_itr) <
-		    (tx->target_itr - tx->current_itr))) {
-		/* Tx ITR needs to be reduced, this is second priority
-		 * Tx ITR needs to be increased more than Rx, fourth priority
-		 */
-		itr_val = ice_buildreg_itr(tx->itr_idx, tx->target_itr);
-		tx->current_itr = tx->target_itr;
-		q_vector->itr_countdown = ITR_COUNTDOWN_START;
-	} else if (rx->current_itr != rx->target_itr) {
-		/* Rx ITR needs to be increased, third priority */
-		itr_val = ice_buildreg_itr(rx->itr_idx, rx->target_itr);
-		rx->current_itr = rx->target_itr;
-		q_vector->itr_countdown = ITR_COUNTDOWN_START;
-	} else {
-		/* Still have to re-enable the interrupts */
-		itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0);
-		if (q_vector->itr_countdown)
-			q_vector->itr_countdown--;
-	}
+	if (q_vector->wb_on_itr)
+		q_vector->wb_on_itr = false;
+
+	/* This will do nothing if dynamic updates are not enabled. */
+	ice_net_dim(q_vector);
 
-	if (!test_bit(ICE_VSI_DOWN, vsi->state))
-		wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val);
+	/* net_dim() updates ITR out-of-band using a work item */
+	itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0);
+	wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val);
 }
 
 /**
@@ -1543,7 +1340,7 @@ static void ice_set_wb_on_itr(struct ice_q_vector *q_vector)
 	struct ice_vsi *vsi = q_vector->vsi;
 
 	/* already in wb_on_itr mode no need to change it */
-	if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE)
+	if (q_vector->wb_on_itr)
 		return;
 
 	/* use previously set ITR values for all of the ITR indices by
@@ -1555,7 +1352,7 @@ static void ice_set_wb_on_itr(struct ice_q_vector *q_vector)
 	      GLINT_DYN_CTL_ITR_INDX_M) | GLINT_DYN_CTL_INTENA_MSK_M |
 	     GLINT_DYN_CTL_WB_ON_ITR_M);
 
-	q_vector->itr_countdown = ICE_IN_WB_ON_ITR_MODE;
+	q_vector->wb_on_itr = true;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 701552d88bea..ae12b4e6453b 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -339,12 +339,8 @@ static inline bool ice_ring_is_xdp(struct ice_ring *ring)
 struct ice_ring_container {
 	/* head of linked-list of rings */
 	struct ice_ring *ring;
-	unsigned long next_update;	/* jiffies value of next queue update */
-	unsigned int total_bytes;	/* total bytes processed this int */
-	unsigned int total_pkts;	/* total packets processed this int */
+	struct dim dim;		/* data for net_dim algorithm */
 	u16 itr_idx;		/* index in the interrupt vector */
-	u16 target_itr;		/* value in usecs divided by the hw->itr_gran */
-	u16 current_itr;	/* value in usecs divided by the hw->itr_gran */
 	/* high bit set means dynamic ITR, rest is used to store user
 	 * readable ITR value in usecs and must be converted before programming
 	 * to a register.
-- 
cgit v1.2.3


From b7306b42beaf6abdbcb49849b5254ad06321abd1 Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Wed, 31 Mar 2021 14:16:58 -0700
Subject: ice: manage interrupts during poll exit

The driver would occasionally miss that there were outstanding
descriptors to clean when exiting busy/napi poll. This issue has
been in the code since the introduction of the ice driver.

Attempt to "catch" any remaining work by triggering a software
interrupt when exiting napi poll or busy-poll. This will not
cause extra interrupts in the case of normal execution.

This issue was found when running sfnt-pingpong, with busy
poll enabled, and typically with larger I/O sizes like > 8192,
the program would occasionally report > 1 second maximums
to complete a ping pong.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_hw_autogen.h |  1 +
 drivers/net/ethernet/intel/ice/ice_txrx.c       | 13 ++++++++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 67b5b9b9d009..de38a0fc9665 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -130,6 +130,7 @@
 #define GLINT_DYN_CTL_ITR_INDX_M		ICE_M(0x3, 3)
 #define GLINT_DYN_CTL_INTERVAL_S		5
 #define GLINT_DYN_CTL_INTERVAL_M		ICE_M(0xFFF, 5)
+#define GLINT_DYN_CTL_SW_ITR_INDX_ENA_M		BIT(24)
 #define GLINT_DYN_CTL_SW_ITR_INDX_M		ICE_M(0x3, 25)
 #define GLINT_DYN_CTL_WB_ON_ITR_M		BIT(30)
 #define GLINT_DYN_CTL_INTENA_MSK_M		BIT(31)
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 97c3efb932dc..1ce9ad02477d 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1302,6 +1302,7 @@ static u32 ice_buildreg_itr(u16 itr_idx, u16 itr)
 static void ice_update_ena_itr(struct ice_q_vector *q_vector)
 {
 	struct ice_vsi *vsi = q_vector->vsi;
+	bool wb_en = q_vector->wb_on_itr;
 	u32 itr_val;
 
 	if (test_bit(ICE_DOWN, vsi->state))
@@ -1310,7 +1311,7 @@ static void ice_update_ena_itr(struct ice_q_vector *q_vector)
 	/* When exiting WB_ON_ITR, let ITR resume its normal
 	 * interrupts-enabled path.
 	 */
-	if (q_vector->wb_on_itr)
+	if (wb_en)
 		q_vector->wb_on_itr = false;
 
 	/* This will do nothing if dynamic updates are not enabled. */
@@ -1318,6 +1319,16 @@ static void ice_update_ena_itr(struct ice_q_vector *q_vector)
 
 	/* net_dim() updates ITR out-of-band using a work item */
 	itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0);
+	/* trigger an immediate software interrupt when exiting
+	 * busy poll, to make sure to catch any pending cleanups
+	 * that might have been missed due to interrupt state
+	 * transition.
+	 */
+	if (wb_en) {
+		itr_val |= GLINT_DYN_CTL_SWINT_TRIG_M |
+			   GLINT_DYN_CTL_SW_ITR_INDX_M |
+			   GLINT_DYN_CTL_SW_ITR_INDX_ENA_M;
+	}
 	wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val);
 }
 
-- 
cgit v1.2.3


From d59684a07e37b06295e314301c9d0c04915a52f7 Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Wed, 31 Mar 2021 14:16:59 -0700
Subject: ice: refactor ITR data structures

Use a dedicated bitfield in order to both increase
the amount of checking around the length of ITR writes
as well as simplify the checks of dynamic mode.

Basically unpack the "high bit means dynamic" logic
into bitfields.

Also, remove some unused ITR defines.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_base.c    |  3 +++
 drivers/net/ethernet/intel/ice/ice_ethtool.c | 15 +++++++-------
 drivers/net/ethernet/intel/ice/ice_lib.c     |  7 -------
 drivers/net/ethernet/intel/ice/ice_txrx.c    |  4 ++--
 drivers/net/ethernet/intel/ice/ice_txrx.h    | 30 +++++++++++++---------------
 5 files changed, 26 insertions(+), 33 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index 21eb5d447d31..5985a7e5ca8a 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -113,6 +113,9 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx)
 	q_vector->v_idx = v_idx;
 	q_vector->tx.itr_setting = ICE_DFLT_TX_ITR;
 	q_vector->rx.itr_setting = ICE_DFLT_RX_ITR;
+	q_vector->tx.itr_mode = ITR_DYNAMIC;
+	q_vector->rx.itr_mode = ITR_DYNAMIC;
+
 	if (vsi->type == ICE_VSI_VF)
 		goto out;
 	/* only set affinity_mask if the CPU is online */
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 6c057eb6cc45..68c8ad8d157e 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -3510,13 +3510,13 @@ ice_get_rc_coalesce(struct ethtool_coalesce *ec, enum ice_container_type c_type,
 
 	switch (c_type) {
 	case ICE_RX_CONTAINER:
-		ec->use_adaptive_rx_coalesce = ITR_IS_DYNAMIC(rc->itr_setting);
-		ec->rx_coalesce_usecs = rc->itr_setting & ~ICE_ITR_DYNAMIC;
+		ec->use_adaptive_rx_coalesce = ITR_IS_DYNAMIC(rc);
+		ec->rx_coalesce_usecs = rc->itr_setting;
 		ec->rx_coalesce_usecs_high = rc->ring->q_vector->intrl;
 		break;
 	case ICE_TX_CONTAINER:
-		ec->use_adaptive_tx_coalesce = ITR_IS_DYNAMIC(rc->itr_setting);
-		ec->tx_coalesce_usecs = rc->itr_setting & ~ICE_ITR_DYNAMIC;
+		ec->use_adaptive_tx_coalesce = ITR_IS_DYNAMIC(rc);
+		ec->tx_coalesce_usecs = rc->itr_setting;
 		break;
 	default:
 		dev_dbg(ice_pf_to_dev(pf), "Invalid c_type %d\n", c_type);
@@ -3661,7 +3661,7 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec,
 		return -EINVAL;
 	}
 
-	itr_setting = rc->itr_setting & ~ICE_ITR_DYNAMIC;
+	itr_setting = rc->itr_setting;
 	if (coalesce_usecs != itr_setting && use_adaptive_coalesce) {
 		netdev_info(vsi->netdev, "%s interrupt throttling cannot be changed if adaptive-%s is enabled\n",
 			    c_type_str, c_type_str);
@@ -3675,8 +3675,9 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec,
 	}
 
 	if (use_adaptive_coalesce) {
-		rc->itr_setting |= ICE_ITR_DYNAMIC;
+		rc->itr_mode = ITR_DYNAMIC;
 	} else {
+		rc->itr_mode = ITR_STATIC;
 		/* store user facing value how it was set */
 		rc->itr_setting = coalesce_usecs;
 		/* write the change to the register */
@@ -3747,8 +3748,6 @@ ice_print_if_odd_usecs(struct net_device *netdev, u16 itr_setting,
 	if (use_adaptive_coalesce)
 		return;
 
-	itr_setting = ITR_TO_REG(itr_setting);
-
 	if (itr_setting != coalesce_usecs && (coalesce_usecs % 2))
 		netdev_info(netdev, "User set %s-usecs to %d, device only supports even values. Rounding down and attempting to set %s-usecs to %d\n",
 			    c_type_str, coalesce_usecs, c_type_str,
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index a080cf7bc2d1..45837e9eee86 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1816,10 +1816,6 @@ static void __ice_write_itr(struct ice_q_vector *q_vector,
  * ice_write_itr - write throttle rate to queue specific register
  * @rc: pointer to ring container
  * @itr: throttle rate in microseconds to write
- *
- * This function is resilient to having the 0x8000 bit set which
- * is indicating that an ITR value is "DYNAMIC", and will write
- * the correct value to the register.
  */
 void ice_write_itr(struct ice_ring_container *rc, u16 itr)
 {
@@ -1830,9 +1826,6 @@ void ice_write_itr(struct ice_ring_container *rc, u16 itr)
 
 	q_vector = rc->ring->q_vector;
 
-	/* clear the "DYNAMIC" bit */
-	itr = ITR_TO_REG(itr);
-
 	__ice_write_itr(q_vector, rc, itr);
 }
 
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 1ce9ad02477d..e2b4b29ea207 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1236,7 +1236,7 @@ static void ice_net_dim(struct ice_q_vector *q_vector)
 	struct ice_ring_container *tx = &q_vector->tx;
 	struct ice_ring_container *rx = &q_vector->rx;
 
-	if (ITR_IS_DYNAMIC(tx->itr_setting)) {
+	if (ITR_IS_DYNAMIC(tx)) {
 		struct dim_sample dim_sample = {};
 		u64 packets = 0, bytes = 0;
 		struct ice_ring *ring;
@@ -1252,7 +1252,7 @@ static void ice_net_dim(struct ice_q_vector *q_vector)
 		net_dim(&tx->dim, dim_sample);
 	}
 
-	if (ITR_IS_DYNAMIC(rx->itr_setting)) {
+	if (ITR_IS_DYNAMIC(rx)) {
 		struct dim_sample dim_sample = {};
 		u64 packets = 0, bytes = 0;
 		struct ice_ring *ring;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index ae12b4e6453b..c5a92ac787d6 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -223,23 +223,20 @@ enum ice_rx_dtype {
 #define ICE_TX_ITR	ICE_IDX_ITR1
 #define ICE_ITR_8K	124
 #define ICE_ITR_20K	50
-#define ICE_ITR_MAX	8160
-#define ICE_DFLT_TX_ITR	(ICE_ITR_20K | ICE_ITR_DYNAMIC)
-#define ICE_DFLT_RX_ITR	(ICE_ITR_20K | ICE_ITR_DYNAMIC)
-#define ICE_ITR_DYNAMIC	0x8000  /* used as flag for itr_setting */
-#define ITR_IS_DYNAMIC(setting) (!!((setting) & ICE_ITR_DYNAMIC))
-#define ITR_TO_REG(setting)	((setting) & ~ICE_ITR_DYNAMIC)
+#define ICE_ITR_MAX	8160 /* 0x1FE0 */
+#define ICE_DFLT_TX_ITR	ICE_ITR_20K
+#define ICE_DFLT_RX_ITR	ICE_ITR_20K
+enum ice_dynamic_itr {
+	ITR_STATIC = 0,
+	ITR_DYNAMIC = 1
+};
+
+#define ITR_IS_DYNAMIC(rc) ((rc)->itr_mode == ITR_DYNAMIC)
 #define ICE_ITR_GRAN_S		1	/* ITR granularity is always 2us */
 #define ICE_ITR_GRAN_US		BIT(ICE_ITR_GRAN_S)
 #define ICE_ITR_MASK		0x1FFE	/* ITR register value alignment mask */
 #define ITR_REG_ALIGN(setting)	((setting) & ICE_ITR_MASK)
 
-#define ICE_ITR_ADAPTIVE_MIN_INC	0x0002
-#define ICE_ITR_ADAPTIVE_MIN_USECS	0x0002
-#define ICE_ITR_ADAPTIVE_MAX_USECS	0x00FA
-#define ICE_ITR_ADAPTIVE_LATENCY	0x8000
-#define ICE_ITR_ADAPTIVE_BULK		0x0000
-
 #define ICE_DFLT_INTRL	0
 #define ICE_MAX_INTRL	236
 
@@ -341,11 +338,12 @@ struct ice_ring_container {
 	struct ice_ring *ring;
 	struct dim dim;		/* data for net_dim algorithm */
 	u16 itr_idx;		/* index in the interrupt vector */
-	/* high bit set means dynamic ITR, rest is used to store user
-	 * readable ITR value in usecs and must be converted before programming
-	 * to a register.
+	/* this matches the maximum number of ITR bits, but in usec
+	 * values, so it is shifted left one bit (bit zero is ignored)
 	 */
-	u16 itr_setting;
+	u16 itr_setting:13;
+	u16 itr_reserved:2;
+	u16 itr_mode:1;
 };
 
 struct ice_coalesce_stored {
-- 
cgit v1.2.3


From e9c9692c8a81aacf0854f68ab54dc182f8be38e8 Mon Sep 17 00:00:00 2001
From: Scott W Taylor <scott.w.taylor@intel.com>
Date: Wed, 31 Mar 2021 14:17:00 -0700
Subject: ice: Reimplement module reads used by ethtool

There was an excessive increment of the QSFP page, which is
now fixed. Additionally, this new update now reads 8 bytes
at a time and will retry each request if the module/bus is
busy.

Also, prevent reading from upper pages if module does not
support those pages.

Signed-off-by: Scott W Taylor <scott.w.taylor@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_ethtool.c | 49 ++++++++++++++++++++++------
 1 file changed, 39 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 68c8ad8d157e..d9ddd0bcf65f 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -3914,30 +3914,33 @@ ice_get_module_eeprom(struct net_device *netdev,
 		      struct ethtool_eeprom *ee, u8 *data)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
+#define SFF_READ_BLOCK_SIZE 8
+	u8 value[SFF_READ_BLOCK_SIZE] = { 0 };
 	u8 addr = ICE_I2C_EEPROM_DEV_ADDR;
 	struct ice_vsi *vsi = np->vsi;
 	struct ice_pf *pf = vsi->back;
 	struct ice_hw *hw = &pf->hw;
 	enum ice_status status;
 	bool is_sfp = false;
-	unsigned int i;
+	unsigned int i, j;
 	u16 offset = 0;
-	u8 value = 0;
 	u8 page = 0;
 
 	if (!ee || !ee->len || !data)
 		return -EINVAL;
 
-	status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0, &value, 1, 0,
+	status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0, value, 1, 0,
 				   NULL);
 	if (status)
 		return -EIO;
 
-	if (value == ICE_MODULE_TYPE_SFP)
+	if (value[0] == ICE_MODULE_TYPE_SFP)
 		is_sfp = true;
 
-	for (i = 0; i < ee->len; i++) {
+	memset(data, 0, ee->len);
+	for (i = 0; i < ee->len; i += SFF_READ_BLOCK_SIZE) {
 		offset = i + ee->offset;
+		page = 0;
 
 		/* Check if we need to access the other memory page */
 		if (is_sfp) {
@@ -3953,11 +3956,37 @@ ice_get_module_eeprom(struct net_device *netdev,
 			}
 		}
 
-		status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, !is_sfp,
-					   &value, 1, 0, NULL);
-		if (status)
-			value = 0;
-		data[i] = value;
+		/* Bit 2 of EEPROM address 0x02 declares upper
+		 * pages are disabled on QSFP modules.
+		 * SFP modules only ever use page 0.
+		 */
+		if (page == 0 || !(data[0x2] & 0x4)) {
+			/* If i2c bus is busy due to slow page change or
+			 * link management access, call can fail. This is normal.
+			 * So we retry this a few times.
+			 */
+			for (j = 0; j < 4; j++) {
+				status = ice_aq_sff_eeprom(hw, 0, addr, offset, page,
+							   !is_sfp, value,
+							   SFF_READ_BLOCK_SIZE,
+							   0, NULL);
+				netdev_dbg(netdev, "SFF %02X %02X %02X %X = %02X%02X%02X%02X.%02X%02X%02X%02X (%X)\n",
+					   addr, offset, page, is_sfp,
+					   value[0], value[1], value[2], value[3],
+					   value[4], value[5], value[6], value[7],
+					   status);
+				if (status) {
+					usleep_range(1500, 2500);
+					memset(value, 0, SFF_READ_BLOCK_SIZE);
+					continue;
+				}
+				break;
+			}
+
+			/* Make sure we have enough room for the new block */
+			if ((i + SFF_READ_BLOCK_SIZE) < ee->len)
+				memcpy(data + i, value, SFF_READ_BLOCK_SIZE);
+		}
 	}
 	return 0;
 }
-- 
cgit v1.2.3


From 80ad6dde61894dd880c3690b33eebbbc813e0276 Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Wed, 31 Mar 2021 14:17:01 -0700
Subject: ice: print name in /proc/iomem

The driver previously printed it's PCI address in
the name field for the pci resource, which when displayed
via /proc/iomem, would print the same thing twice.

It's more useful for debugging to see the driver name, as
most other modules do.

Here's a diff of before and after this change:
     99100000-991fffff : 0000:3b:00.1
   9a000000-a04fffff : PCI Bus 0000:3b
     9a000000-9bffffff : 0000:3b:00.1
-      9a000000-9bffffff : 0000:3b:00.1
+      9a000000-9bffffff : ice
     9c000000-9dffffff : 0000:3b:00.0
-      9c000000-9dffffff : 0000:3b:00.0
+      9c000000-9dffffff : ice
     9e000000-9effffff : 0000:3b:00.1
     9f000000-9fffffff : 0000:3b:00.0
     a0000000-a000ffff : 0000:3b:00.1

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 1e023d163447..02672be04f78 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -4002,7 +4002,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 	if (err)
 		return err;
 
-	err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), pci_name(pdev));
+	err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), dev_driver_string(dev));
 	if (err) {
 		dev_err(dev, "BAR0 I/O map error %d\n", err);
 		return err;
-- 
cgit v1.2.3


From 58623c52b4278de6ed462e6f25402457ffbdd63f Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Wed, 31 Mar 2021 14:17:02 -0700
Subject: ice: use local for consistency

Do a minor refactor on ice_vsi_rebuild to use a local
variable to store vsi->type.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_lib.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 45837e9eee86..0443338f9eaf 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -3014,6 +3014,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
 	struct ice_coalesce_stored *coalesce;
 	int prev_num_q_vectors = 0;
 	struct ice_vf *vf = NULL;
+	enum ice_vsi_type vtype;
 	enum ice_status status;
 	struct ice_pf *pf;
 	int ret, i;
@@ -3022,7 +3023,8 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
 		return -EINVAL;
 
 	pf = vsi->back;
-	if (vsi->type == ICE_VSI_VF)
+	vtype = vsi->type;
+	if (vtype == ICE_VSI_VF)
 		vf = &pf->vf[vsi->vf_id];
 
 	coalesce = kcalloc(vsi->num_q_vectors,
@@ -3040,7 +3042,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
 	 * many interrupts each VF needs. SR-IOV MSIX resources are also
 	 * cleared in the same manner.
 	 */
-	if (vsi->type != ICE_VSI_VF) {
+	if (vtype != ICE_VSI_VF) {
 		/* reclaim SW interrupts back to the common pool */
 		ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx);
 		pf->num_avail_sw_msix += vsi->num_q_vectors;
@@ -3055,7 +3057,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
 	ice_vsi_put_qs(vsi);
 	ice_vsi_clear_rings(vsi);
 	ice_vsi_free_arrays(vsi);
-	if (vsi->type == ICE_VSI_VF)
+	if (vtype == ICE_VSI_VF)
 		ice_vsi_set_num_qs(vsi, vf->vf_id);
 	else
 		ice_vsi_set_num_qs(vsi, ICE_INVAL_VFID);
@@ -3074,7 +3076,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
 	if (ret < 0)
 		goto err_vsi;
 
-	switch (vsi->type) {
+	switch (vtype) {
 	case ICE_VSI_CTRL:
 	case ICE_VSI_PF:
 		ret = ice_vsi_alloc_q_vectors(vsi);
@@ -3101,7 +3103,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
 				goto err_vectors;
 		}
 		/* ICE_VSI_CTRL does not need RSS so skip RSS processing */
-		if (vsi->type != ICE_VSI_CTRL)
+		if (vtype != ICE_VSI_CTRL)
 			/* Do not exit if configuring RSS had an issue, at
 			 * least receive traffic on first queue. Hence no
 			 * need to capture return value
-- 
cgit v1.2.3


From 1cdea9a7eae3a976adc2735bc7ce62ac07cafcdb Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Wed, 31 Mar 2021 14:17:03 -0700
Subject: ice: remove unused struct member

The only time you can ever have a rq_last_status is if
a firmware event was somehow reporting a status on the receive
queue, which are generally firmware initiated events or
mailbox messages from a VF.  Mostly this struct member was unused.

Fix this problem by still printing the value of the field in a debug
print, but don't store the value forever in a struct, potentially
creating opportunities for callers to use the wrong struct member.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_controlq.c | 6 +++---
 drivers/net/ethernet/intel/ice/ice_controlq.h | 1 -
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
index 0f207a42ea77..87b33bdd4960 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.c
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
@@ -1097,6 +1097,7 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 		  struct ice_rq_event_info *e, u16 *pending)
 {
 	u16 ntc = cq->rq.next_to_clean;
+	enum ice_aq_err rq_last_status;
 	enum ice_status ret_code = 0;
 	struct ice_aq_desc *desc;
 	struct ice_dma_mem *bi;
@@ -1130,13 +1131,12 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 	desc = ICE_CTL_Q_DESC(cq->rq, ntc);
 	desc_idx = ntc;
 
-	cq->rq_last_status = (enum ice_aq_err)le16_to_cpu(desc->retval);
+	rq_last_status = (enum ice_aq_err)le16_to_cpu(desc->retval);
 	flags = le16_to_cpu(desc->flags);
 	if (flags & ICE_AQ_FLAG_ERR) {
 		ret_code = ICE_ERR_AQ_ERROR;
 		ice_debug(hw, ICE_DBG_AQ_MSG, "Control Receive Queue Event 0x%04X received with error 0x%X\n",
-			  le16_to_cpu(desc->opcode),
-			  cq->rq_last_status);
+			  le16_to_cpu(desc->opcode), rq_last_status);
 	}
 	memcpy(&e->desc, desc, sizeof(e->desc));
 	datalen = le16_to_cpu(desc->datalen);
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h
index 77c2307d4fb8..fe75871e48ca 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.h
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.h
@@ -83,7 +83,6 @@ struct ice_rq_event_info {
 /* Control Queue information */
 struct ice_ctl_q_info {
 	enum ice_ctl_q qtype;
-	enum ice_aq_err rq_last_status;	/* last status on receive queue */
 	struct ice_ctl_q_ring rq;	/* receive queue */
 	struct ice_ctl_q_ring sq;	/* send queue */
 	u32 sq_cmd_timeout;		/* send queue cmd write back timeout */
-- 
cgit v1.2.3


From c931c782d8465c0408e14bf031e951134c30b059 Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Wed, 31 Mar 2021 14:17:04 -0700
Subject: ice: Set vsi->vf_id as ICE_INVAL_VFID for non VF VSI types

Currently the vsi->vf_id is set only for ICE_VSI_VF and it's left as 0
for all other VSI types. This is confusing and could be problematic
since 0 is a valid vf_id. Fix this by always setting non VF VSI types to
ICE_INVAL_VFID.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_lib.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 0443338f9eaf..eb6e352d3387 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -158,6 +158,8 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id)
 
 	if (vsi->type == ICE_VSI_VF)
 		vsi->vf_id = vf_id;
+	else
+		vsi->vf_id = ICE_INVAL_VFID;
 
 	switch (vsi->type) {
 	case ICE_VSI_PF:
-- 
cgit v1.2.3


From b370245b4b95a07433a03e06eb3d32a01ded2c5d Mon Sep 17 00:00:00 2001
From: Bruce Allan <bruce.w.allan@intel.com>
Date: Wed, 31 Mar 2021 14:17:05 -0700
Subject: ice: suppress false cppcheck issues

Silence false errors, warnings and style issues reported by cppcheck.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_flex_pipe.c | 3 +++
 drivers/net/ethernet/intel/ice/ice_nvm.c       | 1 +
 drivers/net/ethernet/intel/ice/ice_sched.c     | 1 +
 3 files changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
index 4b83960876f4..06ac9badee77 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
@@ -334,6 +334,7 @@ ice_boost_tcam_handler(u32 sect_type, void *section, u32 index, u32 *offset)
 	if (sect_type != ICE_SID_RXPARSER_BOOST_TCAM)
 		return NULL;
 
+	/* cppcheck-suppress nullPointer */
 	if (index > ICE_MAX_BST_TCAMS_IN_BUF)
 		return NULL;
 
@@ -404,6 +405,7 @@ ice_label_enum_handler(u32 __always_unused sect_type, void *section, u32 index,
 	if (!section)
 		return NULL;
 
+	/* cppcheck-suppress nullPointer */
 	if (index > ICE_MAX_LABELS_IN_BUF)
 		return NULL;
 
@@ -2067,6 +2069,7 @@ ice_match_prop_lst(struct list_head *list1, struct list_head *list2)
 		count++;
 	list_for_each_entry(tmp2, list2, list)
 		chk_count++;
+	/* cppcheck-suppress knownConditionTrueFalse */
 	if (!count || count != chk_count)
 		return false;
 
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
index 75ccbfc07f99..fee37a5844cf 100644
--- a/drivers/net/ethernet/intel/ice/ice_nvm.c
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.c
@@ -644,6 +644,7 @@ ice_get_orom_civd_data(struct ice_hw *hw, enum ice_bank_select bank,
 
 		/* Verify that the simple checksum is zero */
 		for (i = 0; i < sizeof(tmp); i++)
+			/* cppcheck-suppress objectIndex */
 			sum += ((u8 *)&tmp)[i];
 
 		if (sum) {
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index 97562051fe14..2f097637e405 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -988,6 +988,7 @@ ice_sched_add_nodes_to_layer(struct ice_port_info *pi,
 	*num_nodes_added = 0;
 	while (*num_nodes_added < num_nodes) {
 		u16 max_child_nodes, num_added = 0;
+		/* cppcheck-suppress unusedVariable */
 		u32 temp;
 
 		status = ice_sched_add_nodes_to_hw_layer(pi, tc_node, parent,
-- 
cgit v1.2.3


From 4fe36226943b9ca99cf51573297b39644a1946d6 Mon Sep 17 00:00:00 2001
From: Paul M Stillwell Jr <paul.m.stillwell.jr@intel.com>
Date: Wed, 31 Mar 2021 14:17:07 -0700
Subject: ice: remove return variable

We were saving the return value from ice_vsi_manage_rss_lut(), but
the errors from that function are not critical so change it to
return void and remove the code that saved the value.

Signed-off-by: Paul M Stillwell Jr <paul.m.stillwell.jr@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_lib.c  | 8 +++-----
 drivers/net/ethernet/intel/ice/ice_lib.h  | 2 +-
 drivers/net/ethernet/intel/ice/ice_main.c | 4 ++--
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index eb6e352d3387..82e2ce23df3d 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1313,14 +1313,13 @@ err_out:
  * LUT, while in the event of enable request for RSS, it will reconfigure RSS
  * LUT.
  */
-int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena)
+void ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena)
 {
-	int err = 0;
 	u8 *lut;
 
 	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
 	if (!lut)
-		return -ENOMEM;
+		return;
 
 	if (ena) {
 		if (vsi->rss_lut_user)
@@ -1330,9 +1329,8 @@ int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena)
 					 vsi->rss_size);
 	}
 
-	err = ice_set_rss_lut(vsi, lut, vsi->rss_table_size);
+	ice_set_rss_lut(vsi, lut, vsi->rss_table_size);
 	kfree(lut);
-	return err;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index f48c5ccb8036..511c2316c40c 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -85,7 +85,7 @@ void ice_vsi_free_rx_rings(struct ice_vsi *vsi);
 
 void ice_vsi_free_tx_rings(struct ice_vsi *vsi);
 
-int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena);
+void ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena);
 
 void ice_update_tx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes);
 
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 02672be04f78..6dbaa9099fdf 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -5112,10 +5112,10 @@ ice_set_features(struct net_device *netdev, netdev_features_t features)
 	 * separate if/else statements to guarantee each feature is checked
 	 */
 	if (features & NETIF_F_RXHASH && !(netdev->features & NETIF_F_RXHASH))
-		ret = ice_vsi_manage_rss_lut(vsi, true);
+		ice_vsi_manage_rss_lut(vsi, true);
 	else if (!(features & NETIF_F_RXHASH) &&
 		 netdev->features & NETIF_F_RXHASH)
-		ret = ice_vsi_manage_rss_lut(vsi, false);
+		ice_vsi_manage_rss_lut(vsi, false);
 
 	if ((features & NETIF_F_HW_VLAN_CTAG_RX) &&
 	    !(netdev->features & NETIF_F_HW_VLAN_CTAG_RX))
-- 
cgit v1.2.3


From 4c26f69d0cf966044ef0c31a87c2da68fc6d066a Mon Sep 17 00:00:00 2001
From: Paul M Stillwell Jr <paul.m.stillwell.jr@intel.com>
Date: Wed, 31 Mar 2021 14:17:08 -0700
Subject: ice: reduce scope of variable

The scope of this variable can be reduced so do that.

Signed-off-by: Paul M Stillwell Jr <paul.m.stillwell.jr@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index a3d8d06b1e3f..e38d4adc5b8d 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -4234,7 +4234,6 @@ void ice_print_vfs_mdd_events(struct ice_pf *pf)
  */
 void ice_restore_all_vfs_msi_state(struct pci_dev *pdev)
 {
-	struct pci_dev *vfdev;
 	u16 vf_id;
 	int pos;
 
@@ -4243,6 +4242,8 @@ void ice_restore_all_vfs_msi_state(struct pci_dev *pdev)
 
 	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
 	if (pos) {
+		struct pci_dev *vfdev;
+
 		pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID,
 				     &vf_id);
 		vfdev = pci_get_device(pdev->vendor, vf_id, NULL);
-- 
cgit v1.2.3


From 1969b3c60db675040ec0d1b09698807647aac7ed Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Wed, 14 Apr 2021 17:56:32 +0200
Subject: selftests/bpf: Fix the ASSERT_ERR_PTR macro

It is just missing a ';'. This macro is not used by any test yet.

Fixes: 22ba36351631 ("selftests/bpf: Move and extend ASSERT_xxx() testing macros")
Signed-off-by: Florent Revest <revest@chromium.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210414155632.737866-1-revest@chromium.org
---
 tools/testing/selftests/bpf/test_progs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index e87c8546230e..ee7e3b45182a 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -210,7 +210,7 @@ extern int test__join_cgroup(const char *path);
 #define ASSERT_ERR_PTR(ptr, name) ({					\
 	static int duration = 0;					\
 	const void *___res = (ptr);					\
-	bool ___ok = IS_ERR(___res)					\
+	bool ___ok = IS_ERR(___res);					\
 	CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res);	\
 	___ok;								\
 })
-- 
cgit v1.2.3


From 069904ce318e0e15dc67f3c2829303237c5e912b Mon Sep 17 00:00:00 2001
From: zuoqilin <zuoqilin@yulong.com>
Date: Wed, 14 Apr 2021 22:16:39 +0800
Subject: tools/testing: Remove unused variable

Remove unused variable "ret2".

Signed-off-by: zuoqilin <zuoqilin@yulong.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210414141639.1446-1-zuoqilin1@163.com
---
 tools/testing/selftests/bpf/progs/test_tunnel_kern.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index ba6eadfec565..e7b673117436 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -396,7 +396,7 @@ int _ip6vxlan_get_tunnel(struct __sk_buff *skb)
 SEC("geneve_set_tunnel")
 int _geneve_set_tunnel(struct __sk_buff *skb)
 {
-	int ret, ret2;
+	int ret;
 	struct bpf_tunnel_key key;
 	struct geneve_opt gopt;
 
-- 
cgit v1.2.3


From d3d93e34bd98e4dbb002310fed08630f4b549a08 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 15 Apr 2021 07:18:17 -0700
Subject: libbpf: Remove unused field.

relo->processed is set, but not used. Remove it.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210415141817.53136-1-alexei.starovoitov@gmail.com
---
 tools/lib/bpf/libbpf.c | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index ed5586cce227..9cc2d45b0080 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -195,7 +195,6 @@ struct reloc_desc {
 	int insn_idx;
 	int map_idx;
 	int sym_off;
-	bool processed;
 };
 
 struct bpf_sec_def;
@@ -3499,8 +3498,6 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
 	const char *sym_sec_name;
 	struct bpf_map *map;
 
-	reloc_desc->processed = false;
-
 	if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) {
 		pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
 			prog->name, sym_name, insn_idx, insn->code);
@@ -6314,13 +6311,11 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
 		case RELO_LD64:
 			insn[0].src_reg = BPF_PSEUDO_MAP_FD;
 			insn[0].imm = obj->maps[relo->map_idx].fd;
-			relo->processed = true;
 			break;
 		case RELO_DATA:
 			insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
 			insn[1].imm = insn[0].imm + relo->sym_off;
 			insn[0].imm = obj->maps[relo->map_idx].fd;
-			relo->processed = true;
 			break;
 		case RELO_EXTERN_VAR:
 			ext = &obj->externs[relo->sym_off];
@@ -6338,13 +6333,11 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
 					insn[1].imm = ext->ksym.addr >> 32;
 				}
 			}
-			relo->processed = true;
 			break;
 		case RELO_EXTERN_FUNC:
 			ext = &obj->externs[relo->sym_off];
 			insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
 			insn[0].imm = ext->ksym.kernel_btf_id;
-			relo->processed = true;
 			break;
 		case RELO_SUBPROG_ADDR:
 			insn[0].src_reg = BPF_PSEUDO_FUNC;
@@ -6630,9 +6623,6 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
 		 * different main programs */
 		insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
 
-		if (relo)
-			relo->processed = true;
-
 		pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
 			 prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
 	}
@@ -6725,7 +6715,7 @@ static int
 bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
 {
 	struct bpf_program *subprog;
-	int i, j, err;
+	int i, err;
 
 	/* mark all subprogs as not relocated (yet) within the context of
 	 * current main program
@@ -6736,9 +6726,6 @@ bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
 			continue;
 
 		subprog->sub_insn_off = 0;
-		for (j = 0; j < subprog->nr_reloc; j++)
-			if (subprog->reloc_desc[j].type == RELO_CALL)
-				subprog->reloc_desc[j].processed = false;
 	}
 
 	err = bpf_object__reloc_code(obj, prog, prog);
-- 
cgit v1.2.3


From c329e5afb42ff0a88285eb4d8a391a18793e4777 Mon Sep 17 00:00:00 2001
From: David Bauer <mail@david-bauer.net>
Date: Thu, 15 Apr 2021 03:26:50 +0200
Subject: net: phy: at803x: select correct page on config init

The Atheros AR8031 and AR8033 expose different registers for SGMII/Fiber
as well as the copper side of the PHY depending on the BT_BX_REG_SEL bit
in the chip configure register.

The driver assumes the copper side is selected on probe, but this might
not be the case depending which page was last selected by the
bootloader. Notably, Ubiquiti UniFi bootloaders show this behavior.

Select the copper page when probing to circumvent this.

Signed-off-by: David Bauer <mail@david-bauer.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/at803x.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index d7799beb811c..e0f56850edc5 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -144,6 +144,9 @@
 #define ATH8035_PHY_ID 0x004dd072
 #define AT8030_PHY_ID_MASK			0xffffffef
 
+#define AT803X_PAGE_FIBER		0
+#define AT803X_PAGE_COPPER		1
+
 MODULE_DESCRIPTION("Qualcomm Atheros AR803x PHY driver");
 MODULE_AUTHOR("Matus Ujhelyi");
 MODULE_LICENSE("GPL");
@@ -198,6 +201,35 @@ static int at803x_debug_reg_mask(struct phy_device *phydev, u16 reg,
 	return phy_write(phydev, AT803X_DEBUG_DATA, val);
 }
 
+static int at803x_write_page(struct phy_device *phydev, int page)
+{
+	int mask;
+	int set;
+
+	if (page == AT803X_PAGE_COPPER) {
+		set = AT803X_BT_BX_REG_SEL;
+		mask = 0;
+	} else {
+		set = 0;
+		mask = AT803X_BT_BX_REG_SEL;
+	}
+
+	return __phy_modify(phydev, AT803X_REG_CHIP_CONFIG, mask, set);
+}
+
+static int at803x_read_page(struct phy_device *phydev)
+{
+	int ccr = __phy_read(phydev, AT803X_REG_CHIP_CONFIG);
+
+	if (ccr < 0)
+		return ccr;
+
+	if (ccr & AT803X_BT_BX_REG_SEL)
+		return AT803X_PAGE_COPPER;
+
+	return AT803X_PAGE_FIBER;
+}
+
 static int at803x_enable_rx_delay(struct phy_device *phydev)
 {
 	return at803x_debug_reg_mask(phydev, AT803X_DEBUG_REG_0, 0,
@@ -535,6 +567,7 @@ static int at803x_probe(struct phy_device *phydev)
 {
 	struct device *dev = &phydev->mdio.dev;
 	struct at803x_priv *priv;
+	int ret;
 
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
@@ -542,7 +575,20 @@ static int at803x_probe(struct phy_device *phydev)
 
 	phydev->priv = priv;
 
-	return at803x_parse_dt(phydev);
+	ret = at803x_parse_dt(phydev);
+	if (ret)
+		return ret;
+
+	/* Some bootloaders leave the fiber page selected.
+	 * Switch to the copper page, as otherwise we read
+	 * the PHY capabilities from the fiber side.
+	 */
+	if (at803x_match_phy_id(phydev, ATH8031_PHY_ID)) {
+		ret = phy_select_page(phydev, AT803X_PAGE_COPPER);
+		ret = phy_restore_page(phydev, AT803X_PAGE_COPPER, ret);
+	}
+
+	return ret;
 }
 
 static void at803x_remove(struct phy_device *phydev)
@@ -1166,6 +1212,8 @@ static struct phy_driver at803x_driver[] = {
 	.get_wol		= at803x_get_wol,
 	.suspend		= at803x_suspend,
 	.resume			= at803x_resume,
+	.read_page		= at803x_read_page,
+	.write_page		= at803x_write_page,
 	/* PHY_GBIT_FEATURES */
 	.read_status		= at803x_read_status,
 	.config_intr		= &at803x_config_intr,
-- 
cgit v1.2.3


From 26e6dd1072763cd5696b75994c03982dde952ad9 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 13 Apr 2021 08:34:13 -0700
Subject: selftests: Set CC to clang in lib.mk if LLVM is set

selftests/bpf/Makefile includes lib.mk. With the following command
  make -j60 LLVM=1 LLVM_IAS=1  <=== compile kernel
  make -j60 -C tools/testing/selftests/bpf LLVM=1 LLVM_IAS=1 V=1
some files are still compiled with gcc. This patch
fixed lib.mk issue which sets CC to gcc in all cases.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210413153413.3027426-1-yhs@fb.com
---
 tools/testing/selftests/lib.mk | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index a5ce26d548e4..9a41d8bb9ff1 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -1,6 +1,10 @@
 # This mimics the top-level Makefile. We do it explicitly here so that this
 # Makefile can operate with or without the kbuild infrastructure.
+ifneq ($(LLVM),)
+CC := clang
+else
 CC := $(CROSS_COMPILE)gcc
+endif
 
 ifeq (0,$(MAKELEVEL))
     ifeq ($(OUTPUT),)
-- 
cgit v1.2.3


From f62700ce63a315b4607cc9e97aa15ea409a677b9 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 13 Apr 2021 08:34:19 -0700
Subject: tools: Allow proper CC/CXX/... override with LLVM=1 in
 Makefile.include

selftests/bpf/Makefile includes tools/scripts/Makefile.include.
With the following command
  make -j60 LLVM=1 LLVM_IAS=1  <=== compile kernel
  make -j60 -C tools/testing/selftests/bpf LLVM=1 LLVM_IAS=1 V=1
some files are still compiled with gcc. This patch
fixed the case if CC/AR/LD/CXX/STRIP is allowed to be
overridden, it will be written to clang/llvm-ar/..., instead of
gcc binaries. The definition of CC_NO_CLANG is also relocated
to the place after the above CC is defined.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210413153419.3028165-1-yhs@fb.com
---
 tools/scripts/Makefile.include | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index a402f32a145c..91130648d8e6 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -39,8 +39,6 @@ EXTRA_WARNINGS += -Wundef
 EXTRA_WARNINGS += -Wwrite-strings
 EXTRA_WARNINGS += -Wformat
 
-CC_NO_CLANG := $(shell $(CC) -dM -E -x c /dev/null | grep -Fq "__clang__"; echo $$?)
-
 # Makefiles suck: This macro sets a default value of $(2) for the
 # variable named by $(1), unless the variable has been set by
 # environment or command line. This is necessary for CC and AR
@@ -52,12 +50,22 @@ define allow-override
     $(eval $(1) = $(2)))
 endef
 
+ifneq ($(LLVM),)
+$(call allow-override,CC,clang)
+$(call allow-override,AR,llvm-ar)
+$(call allow-override,LD,ld.lld)
+$(call allow-override,CXX,clang++)
+$(call allow-override,STRIP,llvm-strip)
+else
 # Allow setting various cross-compile vars or setting CROSS_COMPILE as a prefix.
 $(call allow-override,CC,$(CROSS_COMPILE)gcc)
 $(call allow-override,AR,$(CROSS_COMPILE)ar)
 $(call allow-override,LD,$(CROSS_COMPILE)ld)
 $(call allow-override,CXX,$(CROSS_COMPILE)g++)
 $(call allow-override,STRIP,$(CROSS_COMPILE)strip)
+endif
+
+CC_NO_CLANG := $(shell $(CC) -dM -E -x c /dev/null | grep -Fq "__clang__"; echo $$?)
 
 ifneq ($(LLVM),)
 HOSTAR  ?= llvm-ar
-- 
cgit v1.2.3


From a22c0c81da644223d911466746ef414a786cb1c8 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 13 Apr 2021 08:34:24 -0700
Subject: selftests/bpf: Fix test_cpp compilation failure with clang

With clang compiler:
  make -j60 LLVM=1 LLVM_IAS=1  <=== compile kernel
  make -j60 -C tools/testing/selftests/bpf LLVM=1 LLVM_IAS=1
the test_cpp build failed due to the failure:
  warning: treating 'c-header' input as 'c++-header' when in C++ mode, this behavior is deprecated [-Wdeprecated]
  clang-13: error: cannot specify -o when generating multiple output files

test_cpp compilation flag looks like:
  clang++ -g -Og -rdynamic -Wall -I<...> ... \
  -Dbpf_prog_load=bpf_prog_test_load -Dbpf_load_program=bpf_test_load_program \
  test_cpp.cpp <...>/test_core_extern.skel.h <...>/libbpf.a <...>/test_stub.o \
  -lcap -lelf -lz -lrt -lpthread -o <...>/test_cpp

The clang++ compiler complains the header file in the command line and
also failed the compilation due to this.
Let us remove the header file from the command line which is not intended
any way, and this fixed the compilation problem.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210413153424.3028986-1-yhs@fb.com
---
 tools/testing/selftests/bpf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 6448c626498f..dcc2dc1f2a86 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -481,7 +481,7 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)
 # Make sure we are able to include and link libbpf against c++.
 $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
 	$(call msg,CXX,,$@)
-	$(Q)$(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@
+	$(Q)$(CXX) $(CFLAGS) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@
 
 # Benchmark runner
 $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h
-- 
cgit v1.2.3


From ef9985893caf905b769fae8984780847e8527065 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 13 Apr 2021 08:34:29 -0700
Subject: selftests/bpf: Silence clang compilation warnings

With clang compiler:
  make -j60 LLVM=1 LLVM_IAS=1  <=== compile kernel
  make -j60 -C tools/testing/selftests/bpf LLVM=1 LLVM_IAS=1
Some linker flags are not used/effective for some binaries and
we have warnings like:
  warning: -lelf: 'linker' input unused [-Wunused-command-line-argument]

We also have warnings like:
  .../selftests/bpf/prog_tests/ns_current_pid_tgid.c:74:57: note: treat the string as an argument to avoid this
        if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", strerror(errno)))
                                                               ^
                                                               "%s",
  .../selftests/bpf/test_progs.h:129:35: note: expanded from macro 'CHECK'
        _CHECK(condition, tag, duration, format)
                                         ^
  .../selftests/bpf/test_progs.h:108:21: note: expanded from macro '_CHECK'
                fprintf(stdout, ##format);                              \
                                  ^
The first warning can be silenced with clang option -Wno-unused-command-line-argument.
For the second warning, source codes are modified as suggested by the compiler
to silence the warning. Since gcc does not support the option
-Wno-unused-command-line-argument and the warning only happens with clang
compiler, the option -Wno-unused-command-line-argument is enabled only when
clang compiler is used.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210413153429.3029377-1-yhs@fb.com
---
 tools/testing/selftests/bpf/Makefile                         | 5 +++++
 tools/testing/selftests/bpf/prog_tests/fexit_sleep.c         | 4 ++--
 tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c | 4 ++--
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index dcc2dc1f2a86..c45ae13b88a0 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -28,6 +28,11 @@ CFLAGS += -g -Og -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS)		\
 	  -Dbpf_load_program=bpf_test_load_program
 LDLIBS += -lcap -lelf -lz -lrt -lpthread
 
+# Silence some warnings when compiled with clang
+ifneq ($(LLVM),)
+CFLAGS += -Wno-unused-command-line-argument
+endif
+
 # Order correspond to 'make run_tests' order
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
 	test_verifier_log test_dev_cgroup \
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
index 6c4d42a2386f..ccc7e8a34ab6 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
@@ -39,7 +39,7 @@ void test_fexit_sleep(void)
 		goto cleanup;
 
 	cpid = clone(do_sleep, child_stack + STACK_SIZE, CLONE_FILES | SIGCHLD, fexit_skel);
-	if (CHECK(cpid == -1, "clone", strerror(errno)))
+	if (CHECK(cpid == -1, "clone", "%s\n", strerror(errno)))
 		goto cleanup;
 
 	/* wait until first sys_nanosleep ends and second sys_nanosleep starts */
@@ -65,7 +65,7 @@ void test_fexit_sleep(void)
 	/* kill the thread to unwind sys_nanosleep stack through the trampoline */
 	kill(cpid, 9);
 
-	if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", strerror(errno)))
+	if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", "%s\n", strerror(errno)))
 		goto cleanup;
 	if (CHECK(WEXITSTATUS(wstatus) != 0, "exitstatus", "failed"))
 		goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
index 31a3114906e2..2535788e135f 100644
--- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
+++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
@@ -68,10 +68,10 @@ static void test_ns_current_pid_tgid_new_ns(void)
 	cpid = clone(test_current_pid_tgid, child_stack + STACK_SIZE,
 		     CLONE_NEWPID | SIGCHLD, NULL);
 
-	if (CHECK(cpid == -1, "clone", strerror(errno)))
+	if (CHECK(cpid == -1, "clone", "%s\n", strerror(errno)))
 		return;
 
-	if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", strerror(errno)))
+	if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", "%s\n", strerror(errno)))
 		return;
 
 	if (CHECK(WEXITSTATUS(wstatus) != 0, "newns_pidtgid", "failed"))
-- 
cgit v1.2.3


From 8af50142763c6e70d426e45278b23d7103e5b7a7 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 13 Apr 2021 08:34:35 -0700
Subject: bpftool: Fix a clang compilation warning

With clang compiler:
  make -j60 LLVM=1 LLVM_IAS=1  <=== compile kernel
  # build selftests/bpf or bpftool
  make -j60 -C tools/testing/selftests/bpf LLVM=1 LLVM_IAS=1
  make -j60 -C tools/bpf/bpftool LLVM=1 LLVM_IAS=1
the following compilation warning showed up,
  net.c:160:37: warning: comparison of integers of different signs: '__u32' (aka 'unsigned int') and 'int' [-Wsign-compare]
                for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
                                                  ^~~~~~~~~~~~~~~~~
  .../tools/include/uapi/linux/netlink.h:99:24: note: expanded from macro 'NLMSG_OK'
                           (nlh)->nlmsg_len <= (len))
                           ~~~~~~~~~~~~~~~~ ^   ~~~

In this particular case, "len" is defined as "int" and (nlh)->nlmsg_len is "unsigned int".
The macro NLMSG_OK is defined as below in uapi/linux/netlink.h.
  #define NLMSG_OK(nlh,len) ((len) >= (int)sizeof(struct nlmsghdr) && \
                             (nlh)->nlmsg_len >= sizeof(struct nlmsghdr) && \
                             (nlh)->nlmsg_len <= (len))

The clang compiler complains the comparision "(nlh)->nlmsg_len <= (len))",
but in bpftool/net.c, it is already ensured that "len > 0" must be true.
So theoretically the compiler could deduce that comparison of
"(nlh)->nlmsg_len" and "len" is okay, but this really depends on compiler
internals. Let us add an explicit type conversion (from "int" to "unsigned int")
for "len" in NLMSG_OK to silence this warning right now.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210413153435.3029635-1-yhs@fb.com
---
 tools/bpf/bpftool/net.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index ff3aa0cf3997..f836d115d7d6 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -157,7 +157,7 @@ static int netlink_recv(int sock, __u32 nl_pid, __u32 seq,
 		if (len == 0)
 			break;
 
-		for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+		for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, (unsigned int)len);
 		     nh = NLMSG_NEXT(nh, len)) {
 			if (nh->nlmsg_pid != nl_pid) {
 				ret = -LIBBPF_ERRNO__WRNGPID;
-- 
cgit v1.2.3


From 18b6e31f8bf4ac7af7b057228f38a5a530378e4e Mon Sep 17 00:00:00 2001
From: Guangbin Huang <huangguangbin2@huawei.com>
Date: Thu, 15 Apr 2021 10:20:38 +0800
Subject: net: hns3: PF add support for pushing link status to VFs

Previously, VF updates its link status every second by send query command
to PF in periodic service task. If link stats of PF is changed, VF may
need at most one second to update its link status.

To reduce delay of link status between PF and VFs, PF actively push its
link status to VFs when its link status is updated. And to let VF know
PF supports this new feature, the link status changed mailbox command
adds one bit to indicate it.

Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h    |  3 ++
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 35 +++++++++++++++++++++-
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h    |  1 +
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 12 ++++----
 4 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
index 33defa4c180a..a2c17af57fde 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
@@ -172,4 +172,7 @@ struct hclgevf_mbx_arq_ring {
 		(arq.tail = (arq.tail + 1) % HCLGE_MBX_MAX_ARQ_MSG_NUM)
 #define hclge_mbx_head_ptr_move_arq(arq) \
 		(arq.head = (arq.head + 1) % HCLGE_MBX_MAX_ARQ_MSG_NUM)
+
+/* PF immediately push link status to VFs when link status changed */
+#define HCLGE_MBX_PUSH_LINK_STATUS_EN			BIT(0)
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 1c17fdc780e9..dc06986055e4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2880,6 +2880,28 @@ static int hclge_get_mac_phy_link(struct hclge_dev *hdev, int *link_status)
 	return hclge_get_mac_link_status(hdev, link_status);
 }
 
+static void hclge_push_link_status(struct hclge_dev *hdev)
+{
+	struct hclge_vport *vport;
+	int ret;
+	u16 i;
+
+	for (i = 0; i < pci_num_vf(hdev->pdev); i++) {
+		vport = &hdev->vport[i + HCLGE_VF_VPORT_START_NUM];
+
+		if (!test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state) ||
+		    vport->vf_info.link_state != IFLA_VF_LINK_STATE_AUTO)
+			continue;
+
+		ret = hclge_push_vf_link_status(vport);
+		if (ret) {
+			dev_err(&hdev->pdev->dev,
+				"failed to push link status to vf%u, ret = %d\n",
+				i, ret);
+		}
+	}
+}
+
 static void hclge_update_link_status(struct hclge_dev *hdev)
 {
 	struct hnae3_handle *rhandle = &hdev->vport[0].roce;
@@ -2908,6 +2930,7 @@ static void hclge_update_link_status(struct hclge_dev *hdev)
 			rclient->ops->link_status_change(rhandle, state);
 
 		hdev->hw.mac.link = state;
+		hclge_push_link_status(hdev);
 	}
 
 	clear_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state);
@@ -3246,14 +3269,24 @@ static int hclge_set_vf_link_state(struct hnae3_handle *handle, int vf,
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
+	int link_state_old;
+	int ret;
 
 	vport = hclge_get_vf_vport(hdev, vf);
 	if (!vport)
 		return -EINVAL;
 
+	link_state_old = vport->vf_info.link_state;
 	vport->vf_info.link_state = link_state;
 
-	return 0;
+	ret = hclge_push_vf_link_status(vport);
+	if (ret) {
+		vport->vf_info.link_state = link_state_old;
+		dev_err(&hdev->pdev->dev,
+			"failed to push vf%d link status, ret = %d\n", vf, ret);
+	}
+
+	return ret;
 }
 
 static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index c1aaf7c534c9..ff1d47308c2d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -1089,4 +1089,5 @@ void hclge_report_hw_error(struct hclge_dev *hdev,
 			   enum hnae3_hw_error_type type);
 void hclge_inform_vf_promisc_info(struct hclge_vport *vport);
 void hclge_dbg_dump_rst_info(struct hclge_dev *hdev);
+int hclge_push_vf_link_status(struct hclge_vport *vport);
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index c88607bdda59..5512ffe0a149 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -490,16 +490,14 @@ static void hclge_get_vf_media_type(struct hclge_vport *vport,
 	resp_msg->len = HCLGE_VF_MEDIA_TYPE_LENGTH;
 }
 
-static int hclge_get_link_info(struct hclge_vport *vport,
-			       struct hclge_mbx_vf_to_pf_cmd *mbx_req)
+int hclge_push_vf_link_status(struct hclge_vport *vport)
 {
 #define HCLGE_VF_LINK_STATE_UP		1U
 #define HCLGE_VF_LINK_STATE_DOWN	0U
 
 	struct hclge_dev *hdev = vport->back;
 	u16 link_status;
-	u8 msg_data[8];
-	u8 dest_vfid;
+	u8 msg_data[9];
 	u16 duplex;
 
 	/* mac.link can only be 0 or 1 */
@@ -520,11 +518,11 @@ static int hclge_get_link_info(struct hclge_vport *vport,
 	memcpy(&msg_data[0], &link_status, sizeof(u16));
 	memcpy(&msg_data[2], &hdev->hw.mac.speed, sizeof(u32));
 	memcpy(&msg_data[6], &duplex, sizeof(u16));
-	dest_vfid = mbx_req->mbx_src_vfid;
+	msg_data[8] = HCLGE_MBX_PUSH_LINK_STATUS_EN;
 
 	/* send this requested info to VF */
 	return hclge_send_mbx_msg(vport, msg_data, sizeof(msg_data),
-				  HCLGE_MBX_LINK_STAT_CHANGE, dest_vfid);
+				  HCLGE_MBX_LINK_STAT_CHANGE, vport->vport_id);
 }
 
 static void hclge_get_link_mode(struct hclge_vport *vport,
@@ -794,7 +792,7 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 			hclge_get_vf_tcinfo(vport, &resp_msg);
 			break;
 		case HCLGE_MBX_GET_LINK_STATUS:
-			ret = hclge_get_link_info(vport, req);
+			ret = hclge_push_vf_link_status(vport);
 			if (ret)
 				dev_err(&hdev->pdev->dev,
 					"failed to inform link stat to VF, ret = %d\n",
-- 
cgit v1.2.3


From 01305e16ebe7791e7a1b97485f83999f48cd79d0 Mon Sep 17 00:00:00 2001
From: Guangbin Huang <huangguangbin2@huawei.com>
Date: Thu, 15 Apr 2021 10:20:39 +0800
Subject: net: hns3: VF not request link status when PF support push link
 status feature

To reduce the processing of unnecessary mailbox command when PF supports
actively push its link status to VFs, VFs stop sending request link
status command in periodic service task in this case.

Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 8 +++++---
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h | 1 +
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c  | 6 ++++++
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 07aa26ba0966..07066c416d42 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -2340,10 +2340,11 @@ static void hclgevf_periodic_service_task(struct hclgevf_dev *hdev)
 	if (!(hdev->serv_processed_cnt % HCLGEVF_STATS_TIMER_INTERVAL))
 		hclgevf_tqps_update_stats(handle);
 
-	/* request the link status from the PF. PF would be able to tell VF
-	 * about such updates in future so we might remove this later
+	/* VF does not need to request link status when this bit is set, because
+	 * PF will push its link status to VFs when link status changed.
 	 */
-	hclgevf_request_link_info(hdev);
+	if (!test_bit(HCLGEVF_STATE_PF_PUSH_LINK_STATUS, &hdev->state))
+		hclgevf_request_link_info(hdev);
 
 	hclgevf_update_link_mode(hdev);
 
@@ -2657,6 +2658,7 @@ static int hclgevf_ae_start(struct hnae3_handle *handle)
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 
 	clear_bit(HCLGEVF_STATE_DOWN, &hdev->state);
+	clear_bit(HCLGEVF_STATE_PF_PUSH_LINK_STATUS, &hdev->state);
 
 	hclgevf_reset_tqp_stats(handle);
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index ade6e7f5be5b..956095b89a1b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -152,6 +152,7 @@ enum hclgevf_states {
 	HCLGEVF_STATE_LINK_UPDATING,
 	HCLGEVF_STATE_PROMISC_CHANGED,
 	HCLGEVF_STATE_RST_FAIL,
+	HCLGEVF_STATE_PF_PUSH_LINK_STATUS,
 };
 
 struct hclgevf_mac {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
index 5b2dcd97c107..9b17735b9f4c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
@@ -276,6 +276,7 @@ void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev)
 	u8 duplex;
 	u32 speed;
 	u32 tail;
+	u8 flag;
 	u8 idx;
 
 	/* we can safely clear it now as we are at start of the async message
@@ -300,11 +301,16 @@ void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev)
 			link_status = msg_q[1];
 			memcpy(&speed, &msg_q[2], sizeof(speed));
 			duplex = (u8)msg_q[4];
+			flag = (u8)msg_q[5];
 
 			/* update upper layer with new link link status */
 			hclgevf_update_link_status(hdev, link_status);
 			hclgevf_update_speed_duplex(hdev, speed, duplex);
 
+			if (flag & HCLGE_MBX_PUSH_LINK_STATUS_EN)
+				set_bit(HCLGEVF_STATE_PF_PUSH_LINK_STATUS,
+					&hdev->state);
+
 			break;
 		case HCLGE_MBX_LINK_STAT_MODE:
 			idx = (u8)msg_q[1];
-- 
cgit v1.2.3


From b6faf160d0dac3c8654aa911e6e281541628d304 Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Thu, 15 Apr 2021 13:34:55 +0800
Subject: enetc: convert to schedule_work()

Convert system_wq queue_work() to schedule_work() which is
a wrapper around it, since the former is a rare construct.

Fixes: 7294380c5211 ("enetc: support PTP Sync packet one-step timestamping")
Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 4a0adb0b8bd7..9a726085841d 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -552,7 +552,7 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
 				 * timestamping packet. And send one skb in
 				 * tx_skbs queue if has.
 				 */
-				queue_work(system_wq, &priv->tx_onestep_tstamp);
+				schedule_work(&priv->tx_onestep_tstamp);
 			} else if (unlikely(do_twostep_tstamp)) {
 				enetc_tstamp_tx(skb, tstamp);
 				do_twostep_tstamp = false;
-- 
cgit v1.2.3


From 38ebcf5096a86762b82262e96b2c8b170fe79040 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 15 Apr 2021 10:37:53 -0700
Subject: scm: optimize put_cmsg()

Calling two copy_to_user() for very small regions has very high overhead.

Switch to inlined unsafe_put_user() to save one stac/clac sequence,
and avoid copy_to_user().

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/scm.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/net/core/scm.c b/net/core/scm.c
index 8156d4fb8a39..bd96c922041d 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -228,14 +228,16 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
 
 	if (msg->msg_control_is_user) {
 		struct cmsghdr __user *cm = msg->msg_control_user;
-		struct cmsghdr cmhdr;
-
-		cmhdr.cmsg_level = level;
-		cmhdr.cmsg_type = type;
-		cmhdr.cmsg_len = cmlen;
-		if (copy_to_user(cm, &cmhdr, sizeof cmhdr) ||
-		    copy_to_user(CMSG_USER_DATA(cm), data, cmlen - sizeof(*cm)))
-			return -EFAULT;
+
+		if (!user_write_access_begin(cm, cmlen))
+			goto efault;
+
+		unsafe_put_user(len, &cm->cmsg_len, efault_end);
+		unsafe_put_user(level, &cm->cmsg_level, efault_end);
+		unsafe_put_user(type, &cm->cmsg_type, efault_end);
+		unsafe_copy_to_user(CMSG_USER_DATA(cm), data,
+				    cmlen - sizeof(*cm), efault_end);
+		user_write_access_end();
 	} else {
 		struct cmsghdr *cm = msg->msg_control;
 
@@ -249,6 +251,11 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
 	msg->msg_control += cmlen;
 	msg->msg_controllen -= cmlen;
 	return 0;
+
+efault_end:
+	user_write_access_end();
+efault:
+	return -EFAULT;
 }
 EXPORT_SYMBOL(put_cmsg);
 
-- 
cgit v1.2.3


From c5797f8a64158f724238d13fa5a4b351b03fe42d Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 15 Apr 2021 15:53:13 -0700
Subject: ethtool: move ethtool_stats_init

We'll need it for FEC stats as well.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 6 ++++++
 net/ethtool/pause.c     | 6 ------
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 9f6f323af59a..069100b252bd 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -244,6 +244,12 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
 
 #define ETHTOOL_STAT_NOT_SET	(~0ULL)
 
+static inline void ethtool_stats_init(u64 *stats, unsigned int n)
+{
+	while (n--)
+		stats[n] = ETHTOOL_STAT_NOT_SET;
+}
+
 /**
  * struct ethtool_pause_stats - statistics for IEEE 802.3x pause frames
  * @tx_pause_frames: transmitted pause frame count. Reported to user space
diff --git a/net/ethtool/pause.c b/net/ethtool/pause.c
index 09998dc5c185..f1967c121278 100644
--- a/net/ethtool/pause.c
+++ b/net/ethtool/pause.c
@@ -21,12 +21,6 @@ const struct nla_policy ethnl_pause_get_policy[] = {
 		NLA_POLICY_NESTED(ethnl_header_policy_stats),
 };
 
-static void ethtool_stats_init(u64 *stats, unsigned int n)
-{
-	while (n--)
-		stats[n] = ETHTOOL_STAT_NOT_SET;
-}
-
 static int pause_prepare_data(const struct ethnl_req_info *req_base,
 			      struct ethnl_reply_data *reply_base,
 			      struct genl_info *info)
-- 
cgit v1.2.3


From 3d7cc109ecf76afc74f40eb71d5c9baa03c167a3 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 15 Apr 2021 15:53:14 -0700
Subject: ethtool: fec_prepare_data() - jump to error handling

Refactor fec_prepare_data() a little bit to skip the body
of the function and exit on error. Currently the code
depends on the fact that we only have one call which
may fail between ethnl_ops_begin() and ethnl_ops_complete()
and simply saves the error code. This will get hairy with
the stats also being queried.

No functional changes.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethtool/fec.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/ethtool/fec.c b/net/ethtool/fec.c
index 31454b9188bd..3e7d091ee7aa 100644
--- a/net/ethtool/fec.c
+++ b/net/ethtool/fec.c
@@ -80,9 +80,8 @@ static int fec_prepare_data(const struct ethnl_req_info *req_base,
 	if (ret < 0)
 		return ret;
 	ret = dev->ethtool_ops->get_fecparam(dev, &fec);
-	ethnl_ops_complete(dev);
 	if (ret)
-		return ret;
+		goto out_complete;
 
 	WARN_ON_ONCE(fec.reserved);
 
@@ -98,7 +97,9 @@ static int fec_prepare_data(const struct ethnl_req_info *req_base,
 	if (data->active_fec == __ETHTOOL_LINK_MODE_MASK_NBITS)
 		data->active_fec = 0;
 
-	return 0;
+out_complete:
+	ethnl_ops_complete(dev);
+	return ret;
 }
 
 static int fec_reply_size(const struct ethnl_req_info *req_base,
-- 
cgit v1.2.3


From be85dbfeb37c8c4d4344da2ee594d78034b82489 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 15 Apr 2021 15:53:15 -0700
Subject: ethtool: add FEC statistics

Similarly to pause statistics add stats for FEC.

The IEEE standard mandates two sets of counters:
 - 30.5.1.1.17 aFECCorrectedBlocks
 - 30.5.1.1.18 aFECUncorrectableBlocks
where block is a block of bits FEC operates on.
Each of these counters is defined per lane (PCS instance).

Multiple vendors provide number of corrected _bits_ rather
than/as well as blocks.

This set adds the 2 standard-based block counters and a extra
one for corrected bits.

Counters are exposed to user space via netlink in new attributes.
Each attribute carries an array of u64s, first element is
the total count, and the following ones are a per-lane break down.

Much like with pause stats the operation will not fail when driver
does not implement the get_fec_stats callback (nor can the driver
fail the operation by returning an error). If stats can't be
reported the relevant attributes will be empty.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ethtool-netlink.rst | 21 ++++++++
 Documentation/networking/statistics.rst      |  2 +
 include/linux/ethtool.h                      | 40 +++++++++++++++
 include/uapi/linux/ethtool_netlink.h         | 14 ++++++
 net/ethtool/fec.c                            | 73 +++++++++++++++++++++++++++-
 5 files changed, 149 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index bbecffc7b11a..f8219e2f489e 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -1302,6 +1302,7 @@ Kernel response contents:
   ``ETHTOOL_A_FEC_MODES``                bitset  configured modes
   ``ETHTOOL_A_FEC_AUTO``                 bool    FEC mode auto selection
   ``ETHTOOL_A_FEC_ACTIVE``               u32     index of active FEC mode
+  ``ETHTOOL_A_FEC_STATS``                nested  FEC statistics
   =====================================  ======  ==========================
 
 ``ETHTOOL_A_FEC_ACTIVE`` is the bit index of the FEC link mode currently
@@ -1315,6 +1316,26 @@ This is equivalent to the ``ETHTOOL_FEC_AUTO`` bit of the ioctl interface.
 ``ETHTOOL_A_FEC_MODES`` carry the current FEC configuration using link mode
 bits (rather than old ``ETHTOOL_FEC_*`` bits).
 
+``ETHTOOL_A_FEC_STATS`` are reported if ``ETHTOOL_FLAG_STATS`` was set in
+``ETHTOOL_A_HEADER_FLAGS``.
+Each attribute carries an array of 64bit statistics. First entry in the array
+contains the total number of events on the port, while the following entries
+are counters corresponding to lanes/PCS instances. The number of entries in
+the array will be:
+
++--------------+---------------------------------------------+
+| `0`          | device does not support FEC statistics      |
++--------------+---------------------------------------------+
+| `1`          | device does not support per-lane break down |
++--------------+---------------------------------------------+
+| `1 + #lanes` | device has full support for FEC stats       |
++--------------+---------------------------------------------+
+
+Drivers fill in the statistics in the following structure:
+
+.. kernel-doc:: include/linux/ethtool.h
+    :identifiers: ethtool_fec_stats
+
 FEC_SET
 =======
 
diff --git a/Documentation/networking/statistics.rst b/Documentation/networking/statistics.rst
index 234abedc29b2..b748fe44ee02 100644
--- a/Documentation/networking/statistics.rst
+++ b/Documentation/networking/statistics.rst
@@ -130,6 +130,7 @@ the `ETHTOOL_FLAG_STATS` flag in `ETHTOOL_A_HEADER_FLAGS`. Currently
 statistics are supported in the following commands:
 
   - `ETHTOOL_MSG_PAUSE_GET`
+  - `ETHTOOL_MSG_FEC_GET`
 
 debugfs
 -------
@@ -176,3 +177,4 @@ translated to netlink attributes when dumped. Drivers must not overwrite
 the statistics they don't report with 0.
 
 - ethtool_pause_stats()
+- ethtool_fec_stats()
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 069100b252bd..112a85b57f1f 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -269,6 +269,39 @@ struct ethtool_pause_stats {
 	u64 rx_pause_frames;
 };
 
+#define ETHTOOL_MAX_LANES	8
+
+/**
+ * struct ethtool_fec_stats - statistics for IEEE 802.3 FEC
+ * @corrected_blocks: number of received blocks corrected by FEC
+ *	Reported to user space as %ETHTOOL_A_FEC_STAT_CORRECTED.
+ *
+ *	Equivalent to `30.5.1.1.17 aFECCorrectedBlocks` from the standard.
+ *
+ * @uncorrectable_blocks: number of received blocks FEC was not able to correct
+ *	Reported to user space as %ETHTOOL_A_FEC_STAT_UNCORR.
+ *
+ *	Equivalent to `30.5.1.1.18 aFECUncorrectableBlocks` from the standard.
+ *
+ * @corrected_bits: number of bits corrected by FEC
+ *	Similar to @corrected_blocks but counts individual bit changes,
+ *	not entire FEC data blocks. This is a non-standard statistic.
+ *	Reported to user space as %ETHTOOL_A_FEC_STAT_CORR_BITS.
+ *
+ * @lane: per-lane/PCS-instance counts as defined by the standard
+ * @total: error counts for the entire port, for drivers incapable of reporting
+ *	per-lane stats
+ *
+ * Drivers should fill in either only total or per-lane statistics, core
+ * will take care of adding lane values up to produce the total.
+ */
+struct ethtool_fec_stats {
+	struct ethtool_fec_stat {
+		u64 total;
+		u64 lanes[ETHTOOL_MAX_LANES];
+	} corrected_blocks, uncorrectable_blocks, corrected_bits;
+};
+
 #define ETH_MODULE_EEPROM_PAGE_LEN	128
 #define ETH_MODULE_MAX_I2C_ADDRESS	0x7f
 
@@ -439,6 +472,11 @@ struct ethtool_module_eeprom {
  *	ignored (use %__ETHTOOL_LINK_MODE_MASK_NBITS instead of the latter),
  *	any change to them will be overwritten by kernel. Returns a negative
  *	error code or zero.
+ * @get_fec_stats: Report FEC statistics.
+ *	Core will sum up per-lane stats to get the total.
+ *	Drivers must not zero statistics which they don't report. The stats
+ *	structure is initialized to ETHTOOL_STAT_NOT_SET indicating driver does
+ *	not report statistics.
  * @get_fecparam: Get the network device Forward Error Correction parameters.
  * @set_fecparam: Set the network device Forward Error Correction parameters.
  * @get_ethtool_phy_stats: Return extended statistics about the PHY device.
@@ -544,6 +582,8 @@ struct ethtool_ops {
 				      struct ethtool_link_ksettings *);
 	int	(*set_link_ksettings)(struct net_device *,
 				      const struct ethtool_link_ksettings *);
+	void	(*get_fec_stats)(struct net_device *dev,
+				 struct ethtool_fec_stats *fec_stats);
 	int	(*get_fecparam)(struct net_device *,
 				      struct ethtool_fecparam *);
 	int	(*set_fecparam)(struct net_device *,
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 9612dcd48a6a..3a2b31ccbc5b 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -643,11 +643,25 @@ enum {
 	ETHTOOL_A_FEC_MODES,				/* bitset */
 	ETHTOOL_A_FEC_AUTO,				/* u8 */
 	ETHTOOL_A_FEC_ACTIVE,				/* u32 */
+	ETHTOOL_A_FEC_STATS,				/* nest - _A_FEC_STAT */
 
 	__ETHTOOL_A_FEC_CNT,
 	ETHTOOL_A_FEC_MAX = (__ETHTOOL_A_FEC_CNT - 1)
 };
 
+enum {
+	ETHTOOL_A_FEC_STAT_UNSPEC,
+	ETHTOOL_A_FEC_STAT_PAD,
+
+	ETHTOOL_A_FEC_STAT_CORRECTED,			/* array, u64 */
+	ETHTOOL_A_FEC_STAT_UNCORR,			/* array, u64 */
+	ETHTOOL_A_FEC_STAT_CORR_BITS,			/* array, u64 */
+
+	/* add new constants above here */
+	__ETHTOOL_A_FEC_STAT_CNT,
+	ETHTOOL_A_FEC_STAT_MAX = (__ETHTOOL_A_FEC_STAT_CNT - 1)
+};
+
 /* MODULE EEPROM */
 
 enum {
diff --git a/net/ethtool/fec.c b/net/ethtool/fec.c
index 3e7d091ee7aa..8738dafd5417 100644
--- a/net/ethtool/fec.c
+++ b/net/ethtool/fec.c
@@ -13,6 +13,10 @@ struct fec_reply_data {
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(fec_link_modes);
 	u32 active_fec;
 	u8 fec_auto;
+	struct fec_stat_grp {
+		u64 stats[1 + ETHTOOL_MAX_LANES];
+		u8 cnt;
+	} corr, uncorr, corr_bits;
 };
 
 #define FEC_REPDATA(__reply_base) \
@@ -21,7 +25,7 @@ struct fec_reply_data {
 #define ETHTOOL_FEC_MASK	((ETHTOOL_FEC_LLRS << 1) - 1)
 
 const struct nla_policy ethnl_fec_get_policy[ETHTOOL_A_FEC_HEADER + 1] = {
-	[ETHTOOL_A_FEC_HEADER]	= NLA_POLICY_NESTED(ethnl_header_policy),
+	[ETHTOOL_A_FEC_HEADER]	= NLA_POLICY_NESTED(ethnl_header_policy_stats),
 };
 
 static void
@@ -64,6 +68,28 @@ ethtool_link_modes_to_fecparam(struct ethtool_fecparam *fec,
 	return 0;
 }
 
+static void
+fec_stats_recalc(struct fec_stat_grp *grp, struct ethtool_fec_stat *stats)
+{
+	int i;
+
+	if (stats->lanes[0] == ETHTOOL_STAT_NOT_SET) {
+		grp->stats[0] = stats->total;
+		grp->cnt = stats->total != ETHTOOL_STAT_NOT_SET;
+		return;
+	}
+
+	grp->cnt = 1;
+	grp->stats[0] = 0;
+	for (i = 0; i < ETHTOOL_MAX_LANES; i++) {
+		if (stats->lanes[i] == ETHTOOL_STAT_NOT_SET)
+			break;
+
+		grp->stats[0] += stats->lanes[i];
+		grp->stats[grp->cnt++] = stats->lanes[i];
+	}
+}
+
 static int fec_prepare_data(const struct ethnl_req_info *req_base,
 			    struct ethnl_reply_data *reply_base,
 			    struct genl_info *info)
@@ -82,6 +108,17 @@ static int fec_prepare_data(const struct ethnl_req_info *req_base,
 	ret = dev->ethtool_ops->get_fecparam(dev, &fec);
 	if (ret)
 		goto out_complete;
+	if (req_base->flags & ETHTOOL_FLAG_STATS &&
+	    dev->ethtool_ops->get_fec_stats) {
+		struct ethtool_fec_stats stats;
+
+		ethtool_stats_init((u64 *)&stats, sizeof(stats) / 8);
+		dev->ethtool_ops->get_fec_stats(dev, &stats);
+
+		fec_stats_recalc(&data->corr, &stats.corrected_blocks);
+		fec_stats_recalc(&data->uncorr, &stats.uncorrectable_blocks);
+		fec_stats_recalc(&data->corr_bits, &stats.corrected_bits);
+	}
 
 	WARN_ON_ONCE(fec.reserved);
 
@@ -120,9 +157,40 @@ static int fec_reply_size(const struct ethnl_req_info *req_base,
 	len += nla_total_size(sizeof(u8)) +	/* _FEC_AUTO */
 	       nla_total_size(sizeof(u32));	/* _FEC_ACTIVE */
 
+	if (req_base->flags & ETHTOOL_FLAG_STATS)
+		len += 3 * nla_total_size_64bit(sizeof(u64) *
+						(1 + ETHTOOL_MAX_LANES));
+
 	return len;
 }
 
+static int fec_put_stats(struct sk_buff *skb, const struct fec_reply_data *data)
+{
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, ETHTOOL_A_FEC_STATS);
+	if (!nest)
+		return -EMSGSIZE;
+
+	if (nla_put_64bit(skb, ETHTOOL_A_FEC_STAT_CORRECTED,
+			  sizeof(u64) * data->corr.cnt,
+			  data->corr.stats, ETHTOOL_A_FEC_STAT_PAD) ||
+	    nla_put_64bit(skb, ETHTOOL_A_FEC_STAT_UNCORR,
+			  sizeof(u64) * data->uncorr.cnt,
+			  data->uncorr.stats, ETHTOOL_A_FEC_STAT_PAD) ||
+	    nla_put_64bit(skb, ETHTOOL_A_FEC_STAT_CORR_BITS,
+			  sizeof(u64) * data->corr_bits.cnt,
+			  data->corr_bits.stats, ETHTOOL_A_FEC_STAT_PAD))
+		goto err_cancel;
+
+	nla_nest_end(skb, nest);
+	return 0;
+
+err_cancel:
+	nla_nest_cancel(skb, nest);
+	return -EMSGSIZE;
+}
+
 static int fec_fill_reply(struct sk_buff *skb,
 			  const struct ethnl_req_info *req_base,
 			  const struct ethnl_reply_data *reply_base)
@@ -143,6 +211,9 @@ static int fec_fill_reply(struct sk_buff *skb,
 	     nla_put_u32(skb, ETHTOOL_A_FEC_ACTIVE, data->active_fec)))
 		return -EMSGSIZE;
 
+	if (req_base->flags & ETHTOOL_FLAG_STATS && fec_put_stats(skb, data))
+		return -EMSGSIZE;
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From c9ca5c3aabafcaa934731b8a841f28f8df990b7f Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 15 Apr 2021 15:53:16 -0700
Subject: bnxt: implement ethtool::get_fec_stats

Report corrected bits.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 2f8b193a772d..7b90357daba1 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -1930,6 +1930,20 @@ static int bnxt_get_fecparam(struct net_device *dev,
 	return 0;
 }
 
+static void bnxt_get_fec_stats(struct net_device *dev,
+			       struct ethtool_fec_stats *fec_stats)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	u64 *rx;
+
+	if (BNXT_VF(bp) || !(bp->flags & BNXT_FLAG_PORT_STATS_EXT))
+		return;
+
+	rx = bp->rx_port_stats_ext.sw_stats;
+	fec_stats->corrected_bits.total =
+		*(rx + BNXT_RX_STATS_EXT_OFFSET(rx_corrected_bits));
+}
+
 static u32 bnxt_ethtool_forced_fec_to_fw(struct bnxt_link_info *link_info,
 					 u32 fec)
 {
@@ -3991,6 +4005,7 @@ const struct ethtool_ops bnxt_ethtool_ops = {
 				     ETHTOOL_COALESCE_USE_ADAPTIVE_RX,
 	.get_link_ksettings	= bnxt_get_link_ksettings,
 	.set_link_ksettings	= bnxt_set_link_ksettings,
+	.get_fec_stats		= bnxt_get_fec_stats,
 	.get_fecparam		= bnxt_get_fecparam,
 	.set_fecparam		= bnxt_set_fecparam,
 	.get_pause_stats	= bnxt_get_pause_stats,
-- 
cgit v1.2.3


From cab351be53c263c857563eb56b2bbfe67e8a12a0 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 15 Apr 2021 15:53:17 -0700
Subject: sfc: ef10: implement ethtool::get_fec_stats

Report what appears to be the standard block counts:
 - 30.5.1.1.17 aFECCorrectedBlocks
 - 30.5.1.1.18 aFECUncorrectableBlocks

Don't report the per-lane symbol counts, if those really
count symbols they are not what the standard calls for
(even if symbols seem like the most useful thing to count.)

Fingers crossed that fec_corrected_errors is not in symbols.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ef10.c       | 17 +++++++++++++++++
 drivers/net/ethernet/sfc/ethtool.c    | 10 ++++++++++
 drivers/net/ethernet/sfc/net_driver.h |  3 +++
 3 files changed, 30 insertions(+)

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index da6886dcac37..c873f961d5a5 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -1747,6 +1747,22 @@ static size_t efx_ef10_describe_stats(struct efx_nic *efx, u8 *names)
 				      mask, names);
 }
 
+static void efx_ef10_get_fec_stats(struct efx_nic *efx,
+				   struct ethtool_fec_stats *fec_stats)
+{
+	DECLARE_BITMAP(mask, EF10_STAT_COUNT);
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	u64 *stats = nic_data->stats;
+
+	efx_ef10_get_stat_mask(efx, mask);
+	if (test_bit(EF10_STAT_fec_corrected_errors, mask))
+		fec_stats->corrected_blocks.total =
+			stats[EF10_STAT_fec_corrected_errors];
+	if (test_bit(EF10_STAT_fec_uncorrected_errors, mask))
+		fec_stats->uncorrectable_blocks.total =
+			stats[EF10_STAT_fec_uncorrected_errors];
+}
+
 static size_t efx_ef10_update_stats_common(struct efx_nic *efx, u64 *full_stats,
 					   struct rtnl_link_stats64 *core_stats)
 {
@@ -4122,6 +4138,7 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
 	.get_wol = efx_ef10_get_wol,
 	.set_wol = efx_ef10_set_wol,
 	.resume_wol = efx_port_dummy_op_void,
+	.get_fec_stats = efx_ef10_get_fec_stats,
 	.test_chip = efx_ef10_test_chip,
 	.test_nvram = efx_mcdi_nvram_test_all,
 	.mcdi_request = efx_ef10_mcdi_request,
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index 12a91c559aa2..058d9fe41d99 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -206,6 +206,15 @@ static int efx_ethtool_set_wol(struct net_device *net_dev,
 	return efx->type->set_wol(efx, wol->wolopts);
 }
 
+static void efx_ethtool_get_fec_stats(struct net_device *net_dev,
+				      struct ethtool_fec_stats *fec_stats)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
+
+	if (efx->type->get_fec_stats)
+		efx->type->get_fec_stats(efx, fec_stats);
+}
+
 static int efx_ethtool_get_ts_info(struct net_device *net_dev,
 				   struct ethtool_ts_info *ts_info)
 {
@@ -257,6 +266,7 @@ const struct ethtool_ops efx_ethtool_ops = {
 	.get_module_eeprom	= efx_ethtool_get_module_eeprom,
 	.get_link_ksettings	= efx_ethtool_get_link_ksettings,
 	.set_link_ksettings	= efx_ethtool_set_link_ksettings,
+	.get_fec_stats		= efx_ethtool_get_fec_stats,
 	.get_fecparam		= efx_ethtool_get_fecparam,
 	.set_fecparam		= efx_ethtool_set_fecparam,
 };
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 9f7dfdf708cf..9b4b25704271 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -1187,6 +1187,7 @@ struct efx_udp_tunnel {
  * @get_wol: Get WoL configuration from driver state
  * @set_wol: Push WoL configuration to the NIC
  * @resume_wol: Synchronise WoL state between driver and MC (e.g. after resume)
+ * @get_fec_stats: Get standard FEC statistics.
  * @test_chip: Test registers.  May use efx_farch_test_registers(), and is
  *	expected to reset the NIC.
  * @test_nvram: Test validity of NVRAM contents
@@ -1332,6 +1333,8 @@ struct efx_nic_type {
 	void (*get_wol)(struct efx_nic *efx, struct ethtool_wolinfo *wol);
 	int (*set_wol)(struct efx_nic *efx, u32 type);
 	void (*resume_wol)(struct efx_nic *efx);
+	void (*get_fec_stats)(struct efx_nic *efx,
+			      struct ethtool_fec_stats *fec_stats);
 	unsigned int (*check_caps)(const struct efx_nic *efx,
 				   u8 flag,
 				   u32 offset);
-- 
cgit v1.2.3


From 1703bb50df0e4248b8def88fc6dd43a505479c66 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 15 Apr 2021 15:53:18 -0700
Subject: mlx5: implement ethtool::get_fec_stats

Report corrected bits.

v2: catch reg access errors (Saeed)

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |  9 +++++++
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 29 ++++++++++++++++++++--
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.h |  2 ++
 3 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index c8057a44d5ab..f17690cbeeea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1602,6 +1602,14 @@ static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
 	return mlx5_set_port_wol(mdev, mlx5_wol_mode);
 }
 
+static void mlx5e_get_fec_stats(struct net_device *netdev,
+				struct ethtool_fec_stats *fec_stats)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+
+	mlx5e_stats_fec_get(priv, fec_stats);
+}
+
 static int mlx5e_get_fecparam(struct net_device *netdev,
 			      struct ethtool_fecparam *fecparam)
 {
@@ -2209,6 +2217,7 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
 	.self_test         = mlx5e_self_test,
 	.get_msglevel      = mlx5e_get_msglevel,
 	.set_msglevel      = mlx5e_set_msglevel,
+	.get_fec_stats     = mlx5e_get_fec_stats,
 	.get_fecparam      = mlx5e_get_fecparam,
 	.set_fecparam      = mlx5e_set_fecparam,
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index ae0570ea08bf..353513bd0d5e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -768,10 +768,10 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(802_3)
 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
 }
 
-#define MLX5E_READ_CTR64_BE_F(ptr, c)			\
+#define MLX5E_READ_CTR64_BE_F(ptr, set, c)		\
 	be64_to_cpu(*(__be64 *)((char *)ptr +		\
 		MLX5_BYTE_OFF(ppcnt_reg,		\
-			counter_set.eth_802_3_cntrs_grp_data_layout.c##_high)))
+			      counter_set.set.c##_high)))
 
 void mlx5e_stats_pause_get(struct mlx5e_priv *priv,
 			   struct ethtool_pause_stats *pause_stats)
@@ -791,9 +791,11 @@ void mlx5e_stats_pause_get(struct mlx5e_priv *priv,
 
 	pause_stats->tx_pause_frames =
 		MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3,
+				      eth_802_3_cntrs_grp_data_layout,
 				      a_pause_mac_ctrl_frames_transmitted);
 	pause_stats->rx_pause_frames =
 		MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3,
+				      eth_802_3_cntrs_grp_data_layout,
 				      a_pause_mac_ctrl_frames_received);
 }
 
@@ -1015,6 +1017,29 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(phy)
 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
 }
 
+void mlx5e_stats_fec_get(struct mlx5e_priv *priv,
+			 struct ethtool_fec_stats *fec_stats)
+{
+	u32 ppcnt_phy_statistical[MLX5_ST_SZ_DW(ppcnt_reg)];
+	struct mlx5_core_dev *mdev = priv->mdev;
+	u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0};
+	int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+
+	if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group))
+		return;
+
+	MLX5_SET(ppcnt_reg, in, local_port, 1);
+	MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP);
+	if (mlx5_core_access_reg(mdev, in, sz, ppcnt_phy_statistical,
+				 sz, MLX5_REG_PPCNT, 0, 0))
+		return;
+
+	fec_stats->corrected_bits.total =
+		MLX5E_READ_CTR64_BE_F(ppcnt_phy_statistical,
+				      phys_layer_statistical_cntrs,
+				      phy_corrected_bits);
+}
+
 #define PPORT_ETH_EXT_OFF(c) \
 	MLX5_BYTE_OFF(ppcnt_reg, \
 		      counter_set.eth_extended_cntrs_grp_data_layout.c##_high)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 21d3b8747f93..3f0789e51eed 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -114,6 +114,8 @@ void mlx5e_stats_update_ndo_stats(struct mlx5e_priv *priv);
 
 void mlx5e_stats_pause_get(struct mlx5e_priv *priv,
 			   struct ethtool_pause_stats *pause_stats);
+void mlx5e_stats_fec_get(struct mlx5e_priv *priv,
+			 struct ethtool_fec_stats *fec_stats);
 
 /* Concrete NIC Stats */
 
-- 
cgit v1.2.3


From 26b67f5a1e067605d5db3062090618ca542115b8 Mon Sep 17 00:00:00 2001
From: Ederson de Souza <ederson.desouza@intel.com>
Date: Wed, 3 Feb 2021 17:50:25 -0800
Subject: igb: Redistribute memory for transmit packet buffers when in Qav mode

i210 has a total of 24KB of transmit packet buffer. When in Qav mode,
this buffer is divided into four pieces, one for each Tx queue.
Currently, 8KB are given to each of the two SR queues and 4KB are given
to each of the two SP queues.

However, it was noticed that such distribution can make best effort
traffic (which would usually go to the SP queues when Qav is enabled, as
the SR queues would be used by ETF or CBS qdiscs for TSN-aware traffic)
perform poorly. Using iperf3 to measure, one could see the performance
of best effort traffic drop by nearly a third (from 935Mbps to 578Mbps),
with no TSN traffic competing.

This patch redistributes the 24KB to each queue equally: 6KB each. On
tests, there was no notable performance reduction of best effort traffic
performance when there was no TSN traffic competing.

Below, more details about the data collected:

All experiments were run using the following qdisc setup:

qdisc taprio 100: root refcnt 9 tc 4 map 3 3 3 2 3 0 0 3 3 3 3 3 3 3 3 3
    queues offset 0 count 1 offset 1 count 1 offset 2 count 1 offset 3 count 1
    clockid TAI base-time 0 cycle-time 10000000 cycle-time-extension 0
    index 0 cmd S gatemask 0xf interval 10000000

qdisc etf 8045: parent 100:1 clockid TAI delta 1000000 offload on
    deadline_mode off skip_sock_check off

TSN traffic, when enabled, had this characteristics:
 Packet size: 1500 bytes
 Transmission interval: 125us

----------------------------------
Without this patch:
----------------------------------
- TCP data:
    - No TSN traffic:
        [ ID] Interval           Transfer     Bitrate         Retr
        [  5]   0.00-20.00  sec  1.35 GBytes   578 Mbits/sec    0

    - With TSN traffic:
        [ ID] Interval           Transfer     Bitrate         Retr
        [  5]   0.00-20.00  sec  1.07 GBytes   460 Mbits/sec    1

- TCP data limiting iperf3 buffer size to 4K:
    - No TSN traffic:
        [ ID] Interval           Transfer     Bitrate         Retr
        [  5]   0.00-20.00  sec  1.35 GBytes   579 Mbits/sec    0

    - With TSN traffic:
        [ ID] Interval           Transfer     Bitrate         Retr
        [  5]   0.00-20.00  sec  1.08 GBytes   462 Mbits/sec    0

- TCP data limiting iperf3 buffer size to 192 bytes (smallest size without
 serious performance degradation):
    - No TSN traffic:
        [ ID] Interval           Transfer     Bitrate         Retr
        [  5]   0.00-20.00  sec  1.34 GBytes   577 Mbits/sec    0

    - With TSN traffic:
        [ ID] Interval           Transfer     Bitrate         Retr
        [  5]   0.00-20.00  sec  1.07 GBytes   461 Mbits/sec    1

- UDP data at 1000Mbit/sec:
    - No TSN traffic:
        [ ID] Interval           Transfer     Bitrate         Jitter    Lost/Total Datagrams
        [  5]   0.00-20.00  sec  1.36 GBytes   586 Mbits/sec  0.000 ms  0/1011407 (0%)

    - With TSN traffic:
        [ ID] Interval           Transfer     Bitrate         Jitter    Lost/Total Datagrams
        [  5]   0.00-20.00  sec  1.05 GBytes   451 Mbits/sec  0.000 ms  0/778672 (0%)

----------------------------------
With this patch:
----------------------------------

- TCP data:
    - No TSN traffic:
        [ ID] Interval           Transfer     Bitrate         Retr
        [  5]   0.00-20.00  sec  2.17 GBytes   932 Mbits/sec    0

    - With TSN traffic:
        [ ID] Interval           Transfer     Bitrate         Retr
        [  5]   0.00-20.00  sec  1.50 GBytes   646 Mbits/sec    1

- TCP data limiting iperf3 buffer size to 4K:
    - No TSN traffic:
        [ ID] Interval           Transfer     Bitrate         Retr
        [  5]   0.00-20.00  sec  2.17 GBytes   931 Mbits/sec    0

    - With TSN traffic:
        [ ID] Interval           Transfer     Bitrate         Retr
        [  5]   0.00-20.00  sec  1.50 GBytes   645 Mbits/sec    0

- TCP data limiting iperf3 buffer size to 192 bytes (smallest size without
 serious performance degradation):
    - No TSN traffic:
        [ ID] Interval           Transfer     Bitrate         Retr
        [  5]   0.00-20.00  sec  2.17 GBytes   932 Mbits/sec    1

    - With TSN traffic:
        [ ID] Interval           Transfer     Bitrate         Retr
        [  5]   0.00-20.00  sec  1.50 GBytes   645 Mbits/sec    0

- UDP data at 1000Mbit/sec:
    - No TSN traffic:
        [ ID] Interval           Transfer     Bitrate         Jitter    Lost/Total Datagrams
        [  5]   0.00-20.00  sec  2.23 GBytes   956 Mbits/sec  0.000 ms  0/1650226 (0%)

    - With TSN traffic:
        [ ID] Interval           Transfer     Bitrate         Jitter    Lost/Total Datagrams
        [  5]   0.00-20.00  sec  1.51 GBytes   649 Mbits/sec  0.000 ms  0/1120264 (0%)

Signed-off-by: Ederson de Souza <ederson.desouza@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igb/e1000_defines.h | 8 ++++----
 drivers/net/ethernet/intel/igb/igb_main.c      | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h
index d2e2c50ce257..ca5429774994 100644
--- a/drivers/net/ethernet/intel/igb/e1000_defines.h
+++ b/drivers/net/ethernet/intel/igb/e1000_defines.h
@@ -340,10 +340,10 @@
 #define I210_RXPBSIZE_PB_32KB		0x00000020
 #define I210_TXPBSIZE_DEFAULT		0x04000014 /* TXPBSIZE default */
 #define I210_TXPBSIZE_MASK		0xC0FFFFFF
-#define I210_TXPBSIZE_PB0_8KB		(8 << 0)
-#define I210_TXPBSIZE_PB1_8KB		(8 << 6)
-#define I210_TXPBSIZE_PB2_4KB		(4 << 12)
-#define I210_TXPBSIZE_PB3_4KB		(4 << 18)
+#define I210_TXPBSIZE_PB0_6KB		(6 << 0)
+#define I210_TXPBSIZE_PB1_6KB		(6 << 6)
+#define I210_TXPBSIZE_PB2_6KB		(6 << 12)
+#define I210_TXPBSIZE_PB3_6KB		(6 << 18)
 
 #define I210_DTXMXPKTSZ_DEFAULT		0x00000098
 
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index c9e8c65a3cfe..038a9fd1af44 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -1921,8 +1921,8 @@ static void igb_setup_tx_mode(struct igb_adapter *adapter)
 		 */
 		val = rd32(E1000_TXPBS);
 		val &= ~I210_TXPBSIZE_MASK;
-		val |= I210_TXPBSIZE_PB0_8KB | I210_TXPBSIZE_PB1_8KB |
-			I210_TXPBSIZE_PB2_4KB | I210_TXPBSIZE_PB3_4KB;
+		val |= I210_TXPBSIZE_PB0_6KB | I210_TXPBSIZE_PB1_6KB |
+			I210_TXPBSIZE_PB2_6KB | I210_TXPBSIZE_PB3_6KB;
 		wr32(E1000_TXPBS, val);
 
 		val = rd32(E1000_RXPBS);
-- 
cgit v1.2.3


From e7ad33fa7bc5f788cdb14eea68c65c4da0f06edf Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 16 Apr 2021 11:35:38 -0700
Subject: scm: fix a typo in put_cmsg()

We need to store cmlen instead of len in cm->cmsg_len.

Fixes: 38ebcf5096a8 ("scm: optimize put_cmsg()")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/scm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/scm.c b/net/core/scm.c
index bd96c922041d..ae3085d9aae8 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -232,7 +232,7 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
 		if (!user_write_access_begin(cm, cmlen))
 			goto efault;
 
-		unsafe_put_user(len, &cm->cmsg_len, efault_end);
+		unsafe_put_user(cmlen, &cm->cmsg_len, efault_end);
 		unsafe_put_user(level, &cm->cmsg_level, efault_end);
 		unsafe_put_user(type, &cm->cmsg_type, efault_end);
 		unsafe_copy_to_user(CMSG_USER_DATA(cm), data,
-- 
cgit v1.2.3


From 2f014f4016db44103864d295ba427eb1e0086551 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Tue, 9 Mar 2021 16:30:10 +0200
Subject: net/mlx5e: Remove non-essential TLS SQ state bit

Maintaining an SQ state bit to indicate TLS support
has no real need, a simple and fast test [1] for the SKB is
almost equally good.

[1] !skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk)

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h                | 1 -
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h | 8 +++-----
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c           | 2 --
 3 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index e1c51eabe8fe..cb4e7aaa4f8a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -325,7 +325,6 @@ enum {
 	MLX5E_SQ_STATE_RECOVERING,
 	MLX5E_SQ_STATE_IPSEC,
 	MLX5E_SQ_STATE_AM,
-	MLX5E_SQ_STATE_TLS,
 	MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE,
 	MLX5E_SQ_STATE_PENDING_XSK_TX,
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
index cc0efac7b812..cc2851ecd512 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -123,11 +123,9 @@ static inline bool mlx5e_accel_tx_begin(struct net_device *dev,
 		mlx5e_udp_gso_handle_tx_skb(skb);
 
 #ifdef CONFIG_MLX5_EN_TLS
-	if (test_bit(MLX5E_SQ_STATE_TLS, &sq->state)) {
-		/* May send SKBs and WQEs. */
-		if (unlikely(!mlx5e_tls_handle_tx_skb(dev, sq, skb, &state->tls)))
-			return false;
-	}
+	/* May send SKBs and WQEs. */
+	if (unlikely(!mlx5e_tls_handle_tx_skb(dev, sq, skb, &state->tls)))
+		return false;
 #endif
 
 #ifdef CONFIG_MLX5_EN_IPSEC
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 6847e7b909a5..64d6c0fd92bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1134,8 +1134,6 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
 		set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
 	if (MLX5_IPSEC_DEV(c->priv->mdev))
 		set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
-	if (mlx5_accel_is_tls_device(c->priv->mdev))
-		set_bit(MLX5E_SQ_STATE_TLS, &sq->state);
 	if (param->is_mpw)
 		set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state);
 	sq->stop_room = param->stop_room;
-- 
cgit v1.2.3


From 8668587a33b99492b73fd4dbec432f793853376d Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Wed, 31 Mar 2021 11:02:09 +0300
Subject: net/mlx5e: Cleanup unused function parameter

Socket parameter is not used in accel_rule_init(), remove it.

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
index 8c0f78c09215..76fd4b230003 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
@@ -119,8 +119,7 @@ out:
 	complete(&priv_rx->add_ctx);
 }
 
-static void accel_rule_init(struct accel_rule *rule, struct mlx5e_priv *priv,
-			    struct sock *sk)
+static void accel_rule_init(struct accel_rule *rule, struct mlx5e_priv *priv)
 {
 	INIT_WORK(&rule->work, accel_rule_handle_work);
 	rule->priv = priv;
@@ -618,7 +617,7 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk,
 
 	init_completion(&priv_rx->add_ctx);
 
-	accel_rule_init(&priv_rx->rule, priv, sk);
+	accel_rule_init(&priv_rx->rule, priv);
 	resync = &priv_rx->resync;
 	resync_init(resync, priv);
 	tls_offload_ctx_rx(tls_ctx)->resync_async = &resync->core;
-- 
cgit v1.2.3


From b6b3ad2175c852557c06fb52ed533e35a143fb72 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Tue, 9 Mar 2021 16:37:42 +0200
Subject: net/mlx5e: TX, Inline TLS skb check

When TLS is supported and enabled, every transmitted packet is tested
to identify if TLS offload is required.

Take the early-return condition into an inline function, to save
the overhead of a function call for non-TLS packets.

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h | 5 +++--
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c | 3 ---
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h | 6 ++++++
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
index cc2851ecd512..043c86c52798 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -124,8 +124,9 @@ static inline bool mlx5e_accel_tx_begin(struct net_device *dev,
 
 #ifdef CONFIG_MLX5_EN_TLS
 	/* May send SKBs and WQEs. */
-	if (unlikely(!mlx5e_tls_handle_tx_skb(dev, sq, skb, &state->tls)))
-		return false;
+	if (mlx5e_tls_skb_offloaded(skb))
+		if (unlikely(!mlx5e_tls_handle_tx_skb(dev, sq, skb, &state->tls)))
+			return false;
 #endif
 
 #ifdef CONFIG_MLX5_EN_IPSEC
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
index 2b51d3222ca1..97cbea7ed048 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
@@ -263,9 +263,6 @@ bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
 	int datalen;
 	u32 skb_seq;
 
-	if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))
-		return true;
-
 	datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
 	if (!datalen)
 		return true;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
index 9923132c9440..5c3443200fd6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
@@ -47,6 +47,12 @@ u16 mlx5e_tls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *par
 
 bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
 			     struct sk_buff *skb, struct mlx5e_accel_tx_tls_state *state);
+
+static inline bool mlx5e_tls_skb_offloaded(struct sk_buff *skb)
+{
+	return skb->sk && tls_is_sk_tx_device_offloaded(skb->sk);
+}
+
 void mlx5e_tls_handle_tx_wqe(struct mlx5e_txqsq *sq, struct mlx5_wqe_ctrl_seg *cseg,
 			     struct mlx5e_accel_tx_tls_state *state);
 
-- 
cgit v1.2.3


From 72f6f2f8d6aa213a85b69a0f0ca2c6f4f80aa85e Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Tue, 16 Feb 2021 12:27:57 +0200
Subject: net/mlx5e: TX, Inline function mlx5e_tls_handle_tx_wqe()

When TLS is supported, WQE ctrl segment of every transmitted packet
is updated with the (possibly empty, for non-TLS packets) TISN field.

Take this one-liner function into the header file and inline it,
to save the overhead of a function call per packet.

While here, remove unused function parameter.

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c | 6 ------
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h | 8 ++++++--
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
index 043c86c52798..00af0b831a28 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -185,7 +185,7 @@ static inline void mlx5e_accel_tx_finish(struct mlx5e_txqsq *sq,
 					 struct mlx5_wqe_inline_seg *inlseg)
 {
 #ifdef CONFIG_MLX5_EN_TLS
-	mlx5e_tls_handle_tx_wqe(sq, &wqe->ctrl, &state->tls);
+	mlx5e_tls_handle_tx_wqe(&wqe->ctrl, &state->tls);
 #endif
 
 #ifdef CONFIG_MLX5_EN_IPSEC
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
index 97cbea7ed048..82dc09aaa7fc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
@@ -298,12 +298,6 @@ err_out:
 	return false;
 }
 
-void mlx5e_tls_handle_tx_wqe(struct mlx5e_txqsq *sq, struct mlx5_wqe_ctrl_seg *cseg,
-			     struct mlx5e_accel_tx_tls_state *state)
-{
-	cseg->tis_tir_num = cpu_to_be32(state->tls_tisn << 8);
-}
-
 static int tls_update_resync_sn(struct net_device *netdev,
 				struct sk_buff *skb,
 				struct mlx5e_tls_metadata *mdata)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
index 5c3443200fd6..0ca0a023fb8d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
@@ -53,8 +53,12 @@ static inline bool mlx5e_tls_skb_offloaded(struct sk_buff *skb)
 	return skb->sk && tls_is_sk_tx_device_offloaded(skb->sk);
 }
 
-void mlx5e_tls_handle_tx_wqe(struct mlx5e_txqsq *sq, struct mlx5_wqe_ctrl_seg *cseg,
-			     struct mlx5e_accel_tx_tls_state *state);
+static inline void
+mlx5e_tls_handle_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg,
+			struct mlx5e_accel_tx_tls_state *state)
+{
+	cseg->tis_tir_num = cpu_to_be32(state->tls_tisn << 8);
+}
 
 void mlx5e_tls_handle_rx_skb_metadata(struct mlx5e_rq *rq, struct sk_buff *skb,
 				      u32 *cqe_bcnt);
-- 
cgit v1.2.3


From e9ce991bce5bacf71641bd0f72f4b7c589529f40 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Wed, 24 Feb 2021 16:26:34 +0200
Subject: net/mlx5e: kTLS, Add resiliency to RX resync failures

When the TLS logic finds a tcp seq match for a kTLS RX resync
request, it calls the driver callback function mlx5e_ktls_resync()
to handle it and communicate it to the device.

Errors might occur during mlx5e_ktls_resync(), however, they are not
reported to the stack. Moreover, there is no error handling in the
stack for these errors.

In this patch, the driver obtains responsibility on errors handling,
adding queue and retry mechanisms to these resyncs.

We maintain a linked list of resync matches, and try posting them
to the async ICOSQ in the NAPI context.

Only possible failure that demands driver handling is ICOSQ being full.
By relying on the NAPI mechanism, we make sure that the entries in list
will be handled when ICOSQ completions arrive and make some room
available.

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h       |   4 +
 .../net/ethernet/mellanox/mlx5/core/en/params.c    |   3 +
 .../net/ethernet/mellanox/mlx5/core/en/params.h    |   1 +
 .../ethernet/mellanox/mlx5/core/en_accel/ktls.h    |  11 ++
 .../ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c | 124 ++++++++++++++++++---
 .../mellanox/mlx5/core/en_accel/ktls_txrx.h        |  20 ++++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  11 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.c |   3 +
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.h |   2 +
 drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c  |   5 +
 10 files changed, 166 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index cb4e7aaa4f8a..28a68eef8ae6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -327,6 +327,7 @@ enum {
 	MLX5E_SQ_STATE_AM,
 	MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE,
 	MLX5E_SQ_STATE_PENDING_XSK_TX,
+	MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC,
 };
 
 struct mlx5e_tx_mpwqe {
@@ -499,6 +500,8 @@ struct mlx5e_xdpsq {
 	struct mlx5e_channel      *channel;
 } ____cacheline_aligned_in_smp;
 
+struct mlx5e_ktls_resync_resp;
+
 struct mlx5e_icosq {
 	/* data path */
 	u16                        cc;
@@ -518,6 +521,7 @@ struct mlx5e_icosq {
 	u32                        sqn;
 	u16                        reserved_room;
 	unsigned long              state;
+	struct mlx5e_ktls_resync_resp *ktls_resync;
 
 	/* control path */
 	struct mlx5_wq_ctrl        wq_ctrl;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 7b2b52e75222..f6ba568e00be 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -621,6 +621,9 @@ static void mlx5e_build_async_icosq_param(struct mlx5_core_dev *mdev,
 
 	mlx5e_build_sq_param_common(mdev, param);
 	param->stop_room = mlx5e_stop_room_for_wqe(1); /* for XSK NOP */
+	param->is_tls = mlx5_accel_is_ktls_rx(mdev);
+	if (param->is_tls)
+		param->stop_room += mlx5e_stop_room_for_wqe(1); /* for TLS RX resync NOP */
 	MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq));
 	MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
 	mlx5e_build_ico_cq_param(mdev, log_wq_size, &param->cqp);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index 602e41a2bddd..fcc51ec6084e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -30,6 +30,7 @@ struct mlx5e_sq_param {
 	u32                        sqc[MLX5_ST_SZ_DW(sqc)];
 	struct mlx5_wq_param       wq;
 	bool                       is_mpw;
+	bool                       is_tls;
 	u16                        stop_room;
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
index baa58b62e8df..aaa579bf9a39 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
@@ -12,6 +12,9 @@ void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv);
 int mlx5e_ktls_init_rx(struct mlx5e_priv *priv);
 void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv);
 int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enable);
+struct mlx5e_ktls_resync_resp *
+mlx5e_ktls_rx_resync_create_resp_list(void);
+void mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list);
 #else
 
 static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv)
@@ -33,6 +36,14 @@ static inline int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enab
 	return -EOPNOTSUPP;
 }
 
+static inline struct mlx5e_ktls_resync_resp *
+mlx5e_ktls_rx_resync_create_resp_list(void)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void
+mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list) {}
 #endif
 
 #endif /* __MLX5E_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
index 76fd4b230003..4e58fade7a60 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
@@ -56,6 +56,7 @@ struct mlx5e_ktls_offload_context_rx {
 
 	/* resync */
 	struct mlx5e_ktls_rx_resync_ctx resync;
+	struct list_head list;
 };
 
 static bool mlx5e_ktls_priv_rx_put(struct mlx5e_ktls_offload_context_rx *priv_rx)
@@ -72,6 +73,32 @@ static void mlx5e_ktls_priv_rx_get(struct mlx5e_ktls_offload_context_rx *priv_rx
 	refcount_inc(&priv_rx->resync.refcnt);
 }
 
+struct mlx5e_ktls_resync_resp {
+	/* protects list changes */
+	spinlock_t lock;
+	struct list_head list;
+};
+
+void mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list)
+{
+	kvfree(resp_list);
+}
+
+struct mlx5e_ktls_resync_resp *
+mlx5e_ktls_rx_resync_create_resp_list(void)
+{
+	struct mlx5e_ktls_resync_resp *resp_list;
+
+	resp_list = kvzalloc(sizeof(*resp_list), GFP_KERNEL);
+	if (!resp_list)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&resp_list->list);
+	spin_lock_init(&resp_list->lock);
+
+	return resp_list;
+}
+
 static int mlx5e_ktls_create_tir(struct mlx5_core_dev *mdev, u32 *tirn, u32 rqtn)
 {
 	int err, inlen;
@@ -358,33 +385,32 @@ static void resync_init(struct mlx5e_ktls_rx_resync_ctx *resync,
 /* Function can be called with the refcount being either elevated or not.
  * It does not affect the refcount.
  */
-static int resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_rx,
-				   struct mlx5e_channel *c)
+static void resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_rx,
+				    struct mlx5e_channel *c)
 {
 	struct tls12_crypto_info_aes_gcm_128 *info = &priv_rx->crypto_info;
-	struct mlx5_wqe_ctrl_seg *cseg;
+	struct mlx5e_ktls_resync_resp *ktls_resync;
 	struct mlx5e_icosq *sq;
-	int err;
+	bool trigger_poll;
 
 	memcpy(info->rec_seq, &priv_rx->resync.sw_rcd_sn_be, sizeof(info->rec_seq));
-	err = 0;
 
 	sq = &c->async_icosq;
-	spin_lock_bh(&c->async_icosq_lock);
+	ktls_resync = sq->ktls_resync;
 
-	cseg = post_static_params(sq, priv_rx);
-	if (IS_ERR(cseg)) {
-		priv_rx->rq_stats->tls_resync_res_skip++;
-		err = PTR_ERR(cseg);
-		goto unlock;
-	}
-	/* Do not increment priv_rx refcnt, CQE handling is empty */
-	mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
-	priv_rx->rq_stats->tls_resync_res_ok++;
-unlock:
-	spin_unlock_bh(&c->async_icosq_lock);
+	spin_lock_bh(&ktls_resync->lock);
+	list_add_tail(&priv_rx->list, &ktls_resync->list);
+	trigger_poll = !test_and_set_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state);
+	spin_unlock_bh(&ktls_resync->lock);
 
-	return err;
+	if (!trigger_poll)
+		return;
+
+	if (!napi_if_scheduled_mark_missed(&c->napi)) {
+		spin_lock_bh(&c->async_icosq_lock);
+		mlx5e_trigger_irq(sq);
+		spin_unlock_bh(&c->async_icosq_lock);
+	}
 }
 
 /* Function can be called with the refcount being either elevated or not.
@@ -675,3 +701,65 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx)
 	 */
 	mlx5e_ktls_priv_rx_put(priv_rx);
 }
+
+bool mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget)
+{
+	struct mlx5e_ktls_offload_context_rx *priv_rx, *tmp;
+	struct mlx5e_ktls_resync_resp *ktls_resync;
+	struct mlx5_wqe_ctrl_seg *db_cseg;
+	struct mlx5e_icosq *sq;
+	LIST_HEAD(local_list);
+	int i, j;
+
+	sq = &c->async_icosq;
+
+	if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
+		return false;
+
+	ktls_resync = sq->ktls_resync;
+	db_cseg = NULL;
+	i = 0;
+
+	spin_lock(&ktls_resync->lock);
+	list_for_each_entry_safe(priv_rx, tmp, &ktls_resync->list, list) {
+		list_move(&priv_rx->list, &local_list);
+		if (++i == budget)
+			break;
+	}
+	if (list_empty(&ktls_resync->list))
+		clear_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state);
+	spin_unlock(&ktls_resync->lock);
+
+	spin_lock(&c->async_icosq_lock);
+	for (j = 0; j < i; j++) {
+		struct mlx5_wqe_ctrl_seg *cseg;
+
+		priv_rx = list_first_entry(&local_list,
+					   struct mlx5e_ktls_offload_context_rx,
+					   list);
+		cseg = post_static_params(sq, priv_rx);
+		if (IS_ERR(cseg))
+			break;
+		list_del(&priv_rx->list);
+		db_cseg = cseg;
+	}
+	if (db_cseg)
+		mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, db_cseg);
+	spin_unlock(&c->async_icosq_lock);
+
+	priv_rx->rq_stats->tls_resync_res_ok += j;
+
+	if (!list_empty(&local_list)) {
+		/* This happens only if ICOSQ is full.
+		 * There is no need to mark busy or explicitly ask for a NAPI cycle,
+		 * it will be triggered by the outstanding ICOSQ completions.
+		 */
+		spin_lock(&ktls_resync->lock);
+		list_splice(&local_list, &ktls_resync->list);
+		set_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state);
+		spin_unlock(&ktls_resync->lock);
+		priv_rx->rq_stats->tls_resync_res_retry++;
+	}
+
+	return i == budget;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
index ee04e916fa21..8f79335057dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
@@ -40,6 +40,14 @@ mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
 	}
 	return false;
 }
+
+bool mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget);
+
+static inline bool
+mlx5e_ktls_rx_pending_resync_list(struct mlx5e_channel *c, int budget)
+{
+	return budget && test_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &c->async_icosq.state);
+}
 #else
 static inline bool
 mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
@@ -49,6 +57,18 @@ mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
 	return false;
 }
 
+static inline bool
+mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget)
+{
+	return false;
+}
+
+static inline bool
+mlx5e_ktls_rx_pending_resync_list(struct mlx5e_channel *c, int budget)
+{
+	return false;
+}
+
 #endif /* CONFIG_MLX5_EN_TLS */
 
 #endif /* __MLX5E_TLS_TXRX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 64d6c0fd92bf..df4959e46f27 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1409,8 +1409,17 @@ int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
 	if (err)
 		goto err_free_icosq;
 
+	if (param->is_tls) {
+		sq->ktls_resync = mlx5e_ktls_rx_resync_create_resp_list();
+		if (IS_ERR(sq->ktls_resync)) {
+			err = PTR_ERR(sq->ktls_resync);
+			goto err_destroy_icosq;
+		}
+	}
 	return 0;
 
+err_destroy_icosq:
+	mlx5e_destroy_sq(c->mdev, sq->sqn);
 err_free_icosq:
 	mlx5e_free_icosq(sq);
 
@@ -1432,6 +1441,8 @@ void mlx5e_close_icosq(struct mlx5e_icosq *sq)
 {
 	struct mlx5e_channel *c = sq->channel;
 
+	if (sq->ktls_resync)
+		mlx5e_ktls_rx_resync_destroy_resp_list(sq->ktls_resync);
 	mlx5e_destroy_sq(c->mdev, sq->sqn);
 	mlx5e_free_icosq_descs(sq);
 	mlx5e_free_icosq(sq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 353513bd0d5e..5146aa200de9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -184,6 +184,7 @@ static const struct counter_desc sw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_end) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_skip) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_res_ok) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_res_retry) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_res_skip) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_err) },
 #endif
@@ -344,6 +345,7 @@ static void mlx5e_stats_grp_sw_update_stats_rq_stats(struct mlx5e_sw_stats *s,
 	s->rx_tls_resync_req_end      += rq_stats->tls_resync_req_end;
 	s->rx_tls_resync_req_skip     += rq_stats->tls_resync_req_skip;
 	s->rx_tls_resync_res_ok       += rq_stats->tls_resync_res_ok;
+	s->rx_tls_resync_res_retry    += rq_stats->tls_resync_res_retry;
 	s->rx_tls_resync_res_skip     += rq_stats->tls_resync_res_skip;
 	s->rx_tls_err                 += rq_stats->tls_err;
 #endif
@@ -1654,6 +1656,7 @@ static const struct counter_desc rq_stats_desc[] = {
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_end) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_skip) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_res_ok) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_res_retry) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_res_skip) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_err) },
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 3f0789e51eed..9614de49b7ac 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -209,6 +209,7 @@ struct mlx5e_sw_stats {
 	u64 rx_tls_resync_req_end;
 	u64 rx_tls_resync_req_skip;
 	u64 rx_tls_resync_res_ok;
+	u64 rx_tls_resync_res_retry;
 	u64 rx_tls_resync_res_skip;
 	u64 rx_tls_err;
 #endif
@@ -339,6 +340,7 @@ struct mlx5e_rq_stats {
 	u64 tls_resync_req_end;
 	u64 tls_resync_req_skip;
 	u64 tls_resync_res_ok;
+	u64 tls_resync_res_retry;
 	u64 tls_resync_res_skip;
 	u64 tls_err;
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index d54da3797c30..833be29170a1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -36,6 +36,7 @@
 #include "en/xdp.h"
 #include "en/xsk/rx.h"
 #include "en/xsk/tx.h"
+#include "en_accel/ktls_txrx.h"
 
 static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c)
 {
@@ -171,6 +172,10 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
 		 */
 		clear_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, &c->async_icosq.state);
 
+	/* Keep after async ICOSQ CQ poll */
+	if (unlikely(mlx5e_ktls_rx_pending_resync_list(c, budget)))
+		busy |= mlx5e_ktls_rx_handle_resync_list(c, budget);
+
 	busy |= INDIRECT_CALL_2(rq->post_wqes,
 				mlx5e_post_rx_mpwqes,
 				mlx5e_post_rx_wqes,
-- 
cgit v1.2.3


From 6cad120d9e621a4eeff1d45dca41416a1e0b77d5 Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Wed, 27 Jan 2021 18:28:03 +0200
Subject: net/mlx5e: Allow mlx5e_safe_switch_channels to work with channels
 closed

mlx5e_safe_switch_channels is used to modify channel parameters and/or
hardware configuration in a safe way, so that if anything goes wrong,
everything reverts to the old configuration and remains in a consistent
state.

However, this function only works when the channels are open. When the
caller needs to modify some parameters, first it has to check that the
channels are open, otherwise it has to assign parameters directly, and
such boilerplate repeats in many different places.

This commit prepares for the refactoring of such places by allowing
mlx5e_safe_switch_channels to work when the channels are closed. In this
case it will assign the new parameters and run the preactivate hook.

Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 40 +++++++++++++++--------
 1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index df4959e46f27..cb88d7239db6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2852,12 +2852,16 @@ static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
 	struct net_device *netdev = priv->netdev;
 	struct mlx5e_channels old_chs;
 	int carrier_ok;
+	bool opened;
 	int err = 0;
 
-	carrier_ok = netif_carrier_ok(netdev);
-	netif_carrier_off(netdev);
+	opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+	if (opened) {
+		carrier_ok = netif_carrier_ok(netdev);
+		netif_carrier_off(netdev);
 
-	mlx5e_deactivate_priv_channels(priv);
+		mlx5e_deactivate_priv_channels(priv);
+	}
 
 	old_chs = priv->channels;
 	priv->channels = *new_chs;
@@ -2873,15 +2877,19 @@ static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
 		}
 	}
 
-	mlx5e_close_channels(&old_chs);
-	priv->profile->update_rx(priv);
+	if (opened) {
+		mlx5e_close_channels(&old_chs);
+		priv->profile->update_rx(priv);
+	}
 
 out:
-	mlx5e_activate_priv_channels(priv);
+	if (opened) {
+		mlx5e_activate_priv_channels(priv);
 
-	/* return carrier back if needed */
-	if (carrier_ok)
-		netif_carrier_on(netdev);
+		/* return carrier back if needed */
+		if (carrier_ok)
+			netif_carrier_on(netdev);
+	}
 
 	return err;
 }
@@ -2891,11 +2899,16 @@ int mlx5e_safe_switch_channels(struct mlx5e_priv *priv,
 			       mlx5e_fp_preactivate preactivate,
 			       void *context)
 {
+	bool opened;
 	int err;
 
-	err = mlx5e_open_channels(priv, new_chs);
-	if (err)
-		return err;
+	opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+
+	if (opened) {
+		err = mlx5e_open_channels(priv, new_chs);
+		if (err)
+			return err;
+	}
 
 	err = mlx5e_switch_priv_channels(priv, new_chs, preactivate, context);
 	if (err)
@@ -2904,7 +2917,8 @@ int mlx5e_safe_switch_channels(struct mlx5e_priv *priv,
 	return 0;
 
 err_close:
-	mlx5e_close_channels(new_chs);
+	if (opened)
+		mlx5e_close_channels(new_chs);
 
 	return err;
 }
-- 
cgit v1.2.3


From 69cc4185dcbaaedc52d5d8d13b4aac2a3836a874 Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Thu, 28 Jan 2021 15:36:21 +0200
Subject: net/mlx5e: Use mlx5e_safe_switch_channels when channels are closed

This commit uses new functionality of mlx5e_safe_switch_channels
introduced by the previous commit to reduce the amount of repeating
similar code all over the driver.

It's very common in mlx5e to call mlx5e_safe_switch_channels when the
channels are open, but assign parameters and run hardware commands
manually when the channels are closed.

After the previous commit it's no longer needed to do such manual things
every time, so this commit removes unneeded code and relies on the new
functionality of mlx5e_safe_switch_channels. Some of the places are
refactored and simplified, where more complex flows are used to change
configuration on the fly, without recreating the channels (the logic is
rewritten in a more robust way, with a reset required by default and a
list of exceptions).

Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c   |  3 +
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 23 +++-----
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   | 65 +++-------------------
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 65 +++++++---------------
 .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c  | 11 +---
 5 files changed, 40 insertions(+), 127 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index 72e7dd6d78c0..d907c1acd4d5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -792,6 +792,9 @@ int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set)
 	if (!priv->profile->rx_ptp_support)
 		return 0;
 
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+		return 0;
+
 	if (set) {
 		if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state)) {
 			netdev_WARN_ONCE(priv->netdev, "Don't try to add PTP RX-FS rules");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index f23c67575073..9d4d83159603 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -1150,8 +1150,6 @@ static int mlx5e_update_trust_state_hw(struct mlx5e_priv *priv, void *context)
 static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state)
 {
 	struct mlx5e_channels new_channels = {};
-	bool reset_channels = true;
-	bool opened;
 	int err = 0;
 
 	mutex_lock(&priv->state_lock);
@@ -1160,25 +1158,18 @@ static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state)
 	mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &new_channels.params,
 						   trust_state);
 
-	opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
-	if (!opened)
-		reset_channels = false;
-
 	/* Skip if tx_min_inline is the same */
 	if (new_channels.params.tx_min_inline_mode ==
-	    priv->channels.params.tx_min_inline_mode)
-		reset_channels = false;
-
-	if (reset_channels) {
-		err = mlx5e_safe_switch_channels(priv, &new_channels,
-						 mlx5e_update_trust_state_hw,
-						 &trust_state);
-	} else {
+	    priv->channels.params.tx_min_inline_mode) {
 		err = mlx5e_update_trust_state_hw(priv, &trust_state);
-		if (!err && !opened)
-			priv->channels.params = new_channels.params;
+		goto out;
 	}
 
+	err = mlx5e_safe_switch_channels(priv, &new_channels,
+					 mlx5e_update_trust_state_hw,
+					 &trust_state);
+
+out:
 	mutex_unlock(&priv->state_lock);
 
 	return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index f17690cbeeea..6a15666f106f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -373,11 +373,6 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv,
 	if (err)
 		goto unlock;
 
-	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
-		priv->channels.params = new_channels.params;
-		goto unlock;
-	}
-
 	err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL);
 
 unlock:
@@ -425,6 +420,7 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
 	unsigned int count = ch->combined_count;
 	struct mlx5e_channels new_channels = {};
 	bool arfs_enabled;
+	bool opened;
 	int err = 0;
 
 	if (!count) {
@@ -462,19 +458,9 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
 	new_channels.params = *cur_params;
 	new_channels.params.num_channels = count;
 
-	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
-		struct mlx5e_params old_params;
-
-		old_params = *cur_params;
-		*cur_params = new_channels.params;
-		err = mlx5e_num_channels_changed(priv);
-		if (err)
-			*cur_params = old_params;
+	opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
 
-		goto out;
-	}
-
-	arfs_enabled = priv->netdev->features & NETIF_F_NTUPLE;
+	arfs_enabled = opened && (priv->netdev->features & NETIF_F_NTUPLE);
 	if (arfs_enabled)
 		mlx5e_arfs_disable(priv);
 
@@ -625,12 +611,10 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
 		mlx5e_reset_tx_moderation(&new_channels.params, mode);
 	}
 
-	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
-		priv->channels.params = new_channels.params;
-		goto out;
-	}
-
-	if (!reset_rx && !reset_tx) {
+	/* If DIM state hasn't changed, it's possible to modify interrupt
+	 * moderation parameters on the fly, even if the channels are open.
+	 */
+	if (!reset_rx && !reset_tx && test_bit(MLX5E_STATE_OPENED, &priv->state)) {
 		if (!coal->use_adaptive_rx_coalesce)
 			mlx5e_set_priv_channels_rx_coalesce(priv, coal);
 		if (!coal->use_adaptive_tx_coalesce)
@@ -1885,11 +1869,6 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable,
 	else
 		mlx5e_set_tx_cq_mode_params(&new_channels.params, cq_period_mode);
 
-	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
-		priv->channels.params = new_channels.params;
-		return 0;
-	}
-
 	return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL);
 }
 
@@ -1920,10 +1899,6 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
 	if (priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE)
 		new_channels.params.ptp_rx = new_val;
 
-	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
-		priv->channels.params = new_channels.params;
-		return 0;
-	}
 
 	if (new_channels.params.ptp_rx == priv->channels.params.ptp_rx)
 		err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL);
@@ -1980,11 +1955,6 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable)
 	MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_STRIDING_RQ, enable);
 	mlx5e_set_rq_type(mdev, &new_channels.params);
 
-	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
-		priv->channels.params = new_channels.params;
-		return 0;
-	}
-
 	return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL);
 }
 
@@ -2015,7 +1985,6 @@ static int set_pflag_tx_mpwqe_common(struct net_device *netdev, u32 flag, bool e
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_channels new_channels = {};
-	int err;
 
 	if (enable && !MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe))
 		return -EOPNOTSUPP;
@@ -2024,13 +1993,7 @@ static int set_pflag_tx_mpwqe_common(struct net_device *netdev, u32 flag, bool e
 
 	MLX5E_SET_PFLAG(&new_channels.params, flag, enable);
 
-	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
-		priv->channels.params = new_channels.params;
-		return 0;
-	}
-
-	err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL);
-	return err;
+	return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL);
 }
 
 static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable)
@@ -2070,20 +2033,8 @@ static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable)
 	 * has the same log_sq_size.
 	 */
 
-	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
-		struct mlx5e_params old_params;
-
-		old_params = priv->channels.params;
-		priv->channels.params = new_channels.params;
-		err = mlx5e_num_channels_changed(priv);
-		if (err)
-			priv->channels.params = old_params;
-		goto out;
-	}
-
 	err = mlx5e_safe_switch_channels(priv, &new_channels,
 					 mlx5e_num_channels_changed_ctx, NULL);
-out:
 	if (!err)
 		priv->tx_ptp_opened = true;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index cb88d7239db6..0e5539afc3a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3407,18 +3407,6 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv,
 	new_channels.params = priv->channels.params;
 	new_channels.params.num_tc = tc ? tc : 1;
 
-	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
-		struct mlx5e_params old_params;
-
-		old_params = priv->channels.params;
-		priv->channels.params = new_channels.params;
-		err = mlx5e_num_channels_changed(priv);
-		if (err)
-			priv->channels.params = old_params;
-
-		goto out;
-	}
-
 	err = mlx5e_safe_switch_channels(priv, &new_channels,
 					 mlx5e_num_channels_changed_ctx, NULL);
 
@@ -3647,8 +3635,8 @@ static int set_feature_lro(struct net_device *netdev, bool enable)
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_channels new_channels = {};
 	struct mlx5e_params *cur_params;
+	bool skip_reset = false;
 	int err = 0;
-	bool reset;
 
 	mutex_lock(&priv->state_lock);
 
@@ -3666,18 +3654,16 @@ static int set_feature_lro(struct net_device *netdev, bool enable)
 		goto out;
 	}
 
-	reset = test_bit(MLX5E_STATE_OPENED, &priv->state);
-
 	new_channels.params = *cur_params;
 	new_channels.params.lro_en = enable;
 
-	if (cur_params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) {
+	if (cur_params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
 		if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) ==
 		    mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params, NULL))
-			reset = false;
+			skip_reset = true;
 	}
 
-	if (!reset) {
+	if (skip_reset) {
 		struct mlx5e_params old_params;
 
 		old_params = *cur_params;
@@ -3920,16 +3906,13 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5e_channels new_channels = {};
 	struct mlx5e_params *params;
+	bool skip_reset = false;
 	int err = 0;
-	bool reset;
 
 	mutex_lock(&priv->state_lock);
 
 	params = &priv->channels.params;
 
-	reset = !params->lro_en;
-	reset = reset && test_bit(MLX5E_STATE_OPENED, &priv->state);
-
 	new_channels.params = *params;
 	new_channels.params.sw_mtu = new_mtu;
 	err = mlx5e_validate_params(priv->mdev, &new_channels.params);
@@ -3951,21 +3934,26 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
 		goto out;
 	}
 
+	if (params->lro_en)
+		skip_reset = true;
+
 	if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
-		bool is_linear = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev,
-							      &new_channels.params,
-							      NULL);
+		bool is_linear_old = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, params, NULL);
+		bool is_linear_new = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev,
+								  &new_channels.params, NULL);
 		u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params, NULL);
 		u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params, NULL);
 
-		/* If XSK is active, XSK RQs are linear. */
-		is_linear |= priv->xsk.refcnt;
-
-		/* Always reset in linear mode - hw_mtu is used in data path. */
-		reset = reset && (is_linear || (ppw_old != ppw_new));
+		/* Always reset in linear mode - hw_mtu is used in data path.
+		 * Check that the mode was non-linear and didn't change.
+		 * If XSK is active, XSK RQs are linear.
+		 */
+		if (!is_linear_old && !is_linear_new && !priv->xsk.refcnt &&
+		    ppw_old == ppw_new)
+			skip_reset = true;
 	}
 
-	if (!reset) {
+	if (skip_reset) {
 		unsigned int old_mtu = params->sw_mtu;
 
 		params->sw_mtu = new_mtu;
@@ -3976,17 +3964,13 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
 				goto out;
 			}
 		}
-		netdev->mtu = params->sw_mtu;
 		goto out;
 	}
 
 	err = mlx5e_safe_switch_channels(priv, &new_channels, preactivate, NULL);
-	if (err)
-		goto out;
-
-	netdev->mtu = new_channels.params.sw_mtu;
 
 out:
+	netdev->mtu = params->sw_mtu;
 	mutex_unlock(&priv->state_lock);
 	return err;
 }
@@ -4061,10 +4045,6 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
 	if (new_channels.params.ptp_rx == priv->channels.params.ptp_rx)
 		goto out;
 
-	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
-		priv->channels.params = new_channels.params;
-		goto out;
-	}
 	err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_ptp_rx_manage_fs_ctx,
 					 &new_channels.params.ptp_rx);
 	if (err) {
@@ -4449,7 +4429,7 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
 		 */
 		bpf_prog_add(prog, priv->channels.num);
 
-	if (was_opened && reset) {
+	if (reset) {
 		struct mlx5e_channels new_channels = {};
 
 		new_channels.params = priv->channels.params;
@@ -4470,9 +4450,6 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
 	if (old_prog)
 		bpf_prog_put(old_prog);
 
-	if (!was_opened && reset) /* change RQ type according to priv->xdp_prog */
-		mlx5e_set_rq_type(priv->mdev, &priv->channels.params);
-
 	if (!was_opened || reset)
 		goto unlock;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index b65b0cefc5b3..9555127ce7e7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -482,20 +482,11 @@ static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct mlx5e_priv *priv = mlx5i_epriv(netdev);
 	struct mlx5e_channels new_channels = {};
-	struct mlx5e_params *params;
 	int err = 0;
 
 	mutex_lock(&priv->state_lock);
 
-	params = &priv->channels.params;
-
-	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
-		params->sw_mtu = new_mtu;
-		netdev->mtu = params->sw_mtu;
-		goto out;
-	}
-
-	new_channels.params = *params;
+	new_channels.params = priv->channels.params;
 	new_channels.params.sw_mtu = new_mtu;
 
 	err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL);
-- 
cgit v1.2.3


From b3b886cf965d5f8d8e51f9481ce60ee8f9548580 Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Thu, 4 Feb 2021 12:21:16 +0200
Subject: net/mlx5e: Refactor on-the-fly configuration changes

This commit extends mlx5e_safe_switch_channels() to support on-the-fly
configuration changes, when the channels are open, but don't need to be
recreated. Such flows exist when a parameter being changed doesn't
affect how the queues are created, or when the queues can be modified
while remaining active.

Before this commit, such flows were handled as special cases on the
caller site. This commit adds this functionality to
mlx5e_safe_switch_channels(), allowing the caller to pass a boolean
indicating whether it's required to recreate the channels or it's
allowed to skip it. The logic of switching channel parameters is now
completely encapsulated into mlx5e_safe_switch_channels().

Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h       |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c |  12 +-
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |  24 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 151 ++++++++-------------
 .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c  |   2 +-
 5 files changed, 78 insertions(+), 113 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 28a68eef8ae6..d9ed20a4db53 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -1021,7 +1021,7 @@ int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv);
 int mlx5e_safe_switch_channels(struct mlx5e_priv *priv,
 			       struct mlx5e_channels *new_chs,
 			       mlx5e_fp_preactivate preactivate,
-			       void *context);
+			       void *context, bool reset);
 int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv);
 int mlx5e_num_channels_changed(struct mlx5e_priv *priv);
 int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 9d4d83159603..0b0273fac6e3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -1150,7 +1150,8 @@ static int mlx5e_update_trust_state_hw(struct mlx5e_priv *priv, void *context)
 static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state)
 {
 	struct mlx5e_channels new_channels = {};
-	int err = 0;
+	bool reset = true;
+	int err;
 
 	mutex_lock(&priv->state_lock);
 
@@ -1160,16 +1161,13 @@ static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state)
 
 	/* Skip if tx_min_inline is the same */
 	if (new_channels.params.tx_min_inline_mode ==
-	    priv->channels.params.tx_min_inline_mode) {
-		err = mlx5e_update_trust_state_hw(priv, &trust_state);
-		goto out;
-	}
+	    priv->channels.params.tx_min_inline_mode)
+		reset = false;
 
 	err = mlx5e_safe_switch_channels(priv, &new_channels,
 					 mlx5e_update_trust_state_hw,
-					 &trust_state);
+					 &trust_state, reset);
 
-out:
 	mutex_unlock(&priv->state_lock);
 
 	return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 6a15666f106f..82994175aded 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -373,7 +373,7 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv,
 	if (err)
 		goto unlock;
 
-	err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL);
+	err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL, true);
 
 unlock:
 	mutex_unlock(&priv->state_lock);
@@ -466,7 +466,7 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
 
 	/* Switch to new channels, set new parameters and close old ones */
 	err = mlx5e_safe_switch_channels(priv, &new_channels,
-					 mlx5e_num_channels_changed_ctx, NULL);
+					 mlx5e_num_channels_changed_ctx, NULL, true);
 
 	if (arfs_enabled) {
 		int err2 = mlx5e_arfs_enable(priv);
@@ -563,6 +563,7 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_channels new_channels = {};
 	bool reset_rx, reset_tx;
+	bool reset = true;
 	int err = 0;
 
 	if (!MLX5_CAP_GEN(mdev, cq_moderation))
@@ -619,13 +620,11 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
 			mlx5e_set_priv_channels_rx_coalesce(priv, coal);
 		if (!coal->use_adaptive_tx_coalesce)
 			mlx5e_set_priv_channels_tx_coalesce(priv, coal);
-		priv->channels.params = new_channels.params;
-		goto out;
+		reset = false;
 	}
 
-	err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL);
+	err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL, reset);
 
-out:
 	mutex_unlock(&priv->state_lock);
 	return err;
 }
@@ -1869,7 +1868,7 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable,
 	else
 		mlx5e_set_tx_cq_mode_params(&new_channels.params, cq_period_mode);
 
-	return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL);
+	return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL, true);
 }
 
 static int set_pflag_tx_cqe_based_moder(struct net_device *netdev, bool enable)
@@ -1899,12 +1898,11 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
 	if (priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE)
 		new_channels.params.ptp_rx = new_val;
 
-
 	if (new_channels.params.ptp_rx == priv->channels.params.ptp_rx)
-		err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL);
+		err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL, true);
 	else
 		err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_ptp_rx_manage_fs_ctx,
-						 &new_channels.params.ptp_rx);
+						 &new_channels.params.ptp_rx, true);
 	if (err)
 		return err;
 
@@ -1955,7 +1953,7 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable)
 	MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_STRIDING_RQ, enable);
 	mlx5e_set_rq_type(mdev, &new_channels.params);
 
-	return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL);
+	return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL, true);
 }
 
 static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable)
@@ -1993,7 +1991,7 @@ static int set_pflag_tx_mpwqe_common(struct net_device *netdev, u32 flag, bool e
 
 	MLX5E_SET_PFLAG(&new_channels.params, flag, enable);
 
-	return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL);
+	return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL, true);
 }
 
 static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable)
@@ -2034,7 +2032,7 @@ static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable)
 	 */
 
 	err = mlx5e_safe_switch_channels(priv, &new_channels,
-					 mlx5e_num_channels_changed_ctx, NULL);
+					 mlx5e_num_channels_changed_ctx, NULL, true);
 	if (!err)
 		priv->tx_ptp_opened = true;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 0e5539afc3a0..7686d4997696 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2844,6 +2844,29 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
 	mlx5e_deactivate_channels(&priv->channels);
 }
 
+static int mlx5e_switch_priv_params(struct mlx5e_priv *priv,
+				    struct mlx5e_params *new_params,
+				    mlx5e_fp_preactivate preactivate,
+				    void *context)
+{
+	struct mlx5e_params old_params;
+
+	old_params = priv->channels.params;
+	priv->channels.params = *new_params;
+
+	if (preactivate) {
+		int err;
+
+		err = preactivate(priv, context);
+		if (err) {
+			priv->channels.params = old_params;
+			return err;
+		}
+	}
+
+	return 0;
+}
+
 static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
 				      struct mlx5e_channels *new_chs,
 				      mlx5e_fp_preactivate preactivate,
@@ -2852,16 +2875,12 @@ static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
 	struct net_device *netdev = priv->netdev;
 	struct mlx5e_channels old_chs;
 	int carrier_ok;
-	bool opened;
 	int err = 0;
 
-	opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
-	if (opened) {
-		carrier_ok = netif_carrier_ok(netdev);
-		netif_carrier_off(netdev);
+	carrier_ok = netif_carrier_ok(netdev);
+	netif_carrier_off(netdev);
 
-		mlx5e_deactivate_priv_channels(priv);
-	}
+	mlx5e_deactivate_priv_channels(priv);
 
 	old_chs = priv->channels;
 	priv->channels = *new_chs;
@@ -2877,19 +2896,15 @@ static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
 		}
 	}
 
-	if (opened) {
-		mlx5e_close_channels(&old_chs);
-		priv->profile->update_rx(priv);
-	}
+	mlx5e_close_channels(&old_chs);
+	priv->profile->update_rx(priv);
 
 out:
-	if (opened) {
-		mlx5e_activate_priv_channels(priv);
+	mlx5e_activate_priv_channels(priv);
 
-		/* return carrier back if needed */
-		if (carrier_ok)
-			netif_carrier_on(netdev);
-	}
+	/* return carrier back if needed */
+	if (carrier_ok)
+		netif_carrier_on(netdev);
 
 	return err;
 }
@@ -2897,27 +2912,19 @@ out:
 int mlx5e_safe_switch_channels(struct mlx5e_priv *priv,
 			       struct mlx5e_channels *new_chs,
 			       mlx5e_fp_preactivate preactivate,
-			       void *context)
+			       void *context, bool reset)
 {
-	bool opened;
 	int err;
 
-	opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
-
-	if (opened) {
-		err = mlx5e_open_channels(priv, new_chs);
-		if (err)
-			return err;
-	}
+	reset &= test_bit(MLX5E_STATE_OPENED, &priv->state);
+	if (!reset)
+		return mlx5e_switch_priv_params(priv, &new_chs->params, preactivate, context);
 
+	err = mlx5e_open_channels(priv, new_chs);
+	if (err)
+		return err;
 	err = mlx5e_switch_priv_channels(priv, new_chs, preactivate, context);
 	if (err)
-		goto err_close;
-
-	return 0;
-
-err_close:
-	if (opened)
 		mlx5e_close_channels(new_chs);
 
 	return err;
@@ -2928,7 +2935,7 @@ int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv)
 	struct mlx5e_channels new_channels = {};
 
 	new_channels.params = priv->channels.params;
-	return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL);
+	return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL, true);
 }
 
 void mlx5e_timestamp_init(struct mlx5e_priv *priv)
@@ -3408,7 +3415,7 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv,
 	new_channels.params.num_tc = tc ? tc : 1;
 
 	err = mlx5e_safe_switch_channels(priv, &new_channels,
-					 mlx5e_num_channels_changed_ctx, NULL);
+					 mlx5e_num_channels_changed_ctx, NULL, true);
 
 out:
 	priv->max_opened_tc = max_t(u8, priv->max_opened_tc,
@@ -3635,7 +3642,7 @@ static int set_feature_lro(struct net_device *netdev, bool enable)
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_channels new_channels = {};
 	struct mlx5e_params *cur_params;
-	bool skip_reset = false;
+	bool reset = true;
 	int err = 0;
 
 	mutex_lock(&priv->state_lock);
@@ -3660,22 +3667,11 @@ static int set_feature_lro(struct net_device *netdev, bool enable)
 	if (cur_params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
 		if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) ==
 		    mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params, NULL))
-			skip_reset = true;
-	}
-
-	if (skip_reset) {
-		struct mlx5e_params old_params;
-
-		old_params = *cur_params;
-		*cur_params = new_channels.params;
-		err = mlx5e_modify_tirs_lro(priv);
-		if (err)
-			*cur_params = old_params;
-		goto out;
+			reset = false;
 	}
 
 	err = mlx5e_safe_switch_channels(priv, &new_channels,
-					 mlx5e_modify_tirs_lro_ctx, NULL);
+					 mlx5e_modify_tirs_lro_ctx, NULL, reset);
 out:
 	mutex_unlock(&priv->state_lock);
 	return err;
@@ -3906,7 +3902,7 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5e_channels new_channels = {};
 	struct mlx5e_params *params;
-	bool skip_reset = false;
+	bool reset = true;
 	int err = 0;
 
 	mutex_lock(&priv->state_lock);
@@ -3935,7 +3931,7 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
 	}
 
 	if (params->lro_en)
-		skip_reset = true;
+		reset = false;
 
 	if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
 		bool is_linear_old = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, params, NULL);
@@ -3950,24 +3946,10 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
 		 */
 		if (!is_linear_old && !is_linear_new && !priv->xsk.refcnt &&
 		    ppw_old == ppw_new)
-			skip_reset = true;
-	}
-
-	if (skip_reset) {
-		unsigned int old_mtu = params->sw_mtu;
-
-		params->sw_mtu = new_mtu;
-		if (preactivate) {
-			err = preactivate(priv, NULL);
-			if (err) {
-				params->sw_mtu = old_mtu;
-				goto out;
-			}
-		}
-		goto out;
+			reset = false;
 	}
 
-	err = mlx5e_safe_switch_channels(priv, &new_channels, preactivate, NULL);
+	err = mlx5e_safe_switch_channels(priv, &new_channels, preactivate, NULL, reset);
 
 out:
 	netdev->mtu = params->sw_mtu;
@@ -4046,7 +4028,7 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
 		goto out;
 
 	err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_ptp_rx_manage_fs_ctx,
-					 &new_channels.params.ptp_rx);
+					 &new_channels.params.ptp_rx, true);
 	if (err) {
 		mutex_unlock(&priv->state_lock);
 		return err;
@@ -4406,9 +4388,10 @@ static void mlx5e_rq_replace_xdp_prog(struct mlx5e_rq *rq, struct bpf_prog *prog
 static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5e_channels new_channels = {};
 	struct bpf_prog *old_prog;
-	bool reset, was_opened;
 	int err = 0;
+	bool reset;
 	int i;
 
 	mutex_lock(&priv->state_lock);
@@ -4419,43 +4402,29 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
 			goto unlock;
 	}
 
-	was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
 	/* no need for full reset when exchanging programs */
 	reset = (!priv->channels.params.xdp_prog || !prog);
 
-	if (was_opened && !reset)
-		/* num_channels is invariant here, so we can take the
-		 * batched reference right upfront.
-		 */
-		bpf_prog_add(prog, priv->channels.num);
-
-	if (reset) {
-		struct mlx5e_channels new_channels = {};
-
-		new_channels.params = priv->channels.params;
-		new_channels.params.xdp_prog = prog;
+	new_channels.params = priv->channels.params;
+	new_channels.params.xdp_prog = prog;
+	if (reset)
 		mlx5e_set_rq_type(priv->mdev, &new_channels.params);
-		old_prog = priv->channels.params.xdp_prog;
+	old_prog = priv->channels.params.xdp_prog;
 
-		err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL);
-		if (err)
-			goto unlock;
-	} else {
-		/* exchange programs, extra prog reference we got from caller
-		 * as long as we don't fail from this point onwards.
-		 */
-		old_prog = xchg(&priv->channels.params.xdp_prog, prog);
-	}
+	err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL, reset);
+	if (err)
+		goto unlock;
 
 	if (old_prog)
 		bpf_prog_put(old_prog);
 
-	if (!was_opened || reset)
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset)
 		goto unlock;
 
 	/* exchanging programs w/o reset, we update ref counts on behalf
 	 * of the channels RQs here.
 	 */
+	bpf_prog_add(prog, priv->channels.num);
 	for (i = 0; i < priv->channels.num; i++) {
 		struct mlx5e_channel *c = priv->channels.c[i];
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 9555127ce7e7..df409111ef3a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -489,7 +489,7 @@ static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu)
 	new_channels.params = priv->channels.params;
 	new_channels.params.sw_mtu = new_mtu;
 
-	err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL);
+	err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL, true);
 	if (err)
 		goto out;
 
-- 
cgit v1.2.3


From 94872d4ef9c09cb0938595b473c68f4a5fb138f6 Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@nvidia.com>
Date: Tue, 16 Mar 2021 15:47:58 +0200
Subject: net/mlx5e: Cleanup safe switch channels API by passing params

mlx5e_safe_switch_channels accepts new_chs as a parameter and opens new
channels in place, then copying them to priv->channels. It requires all
the callers to allocate space for this temporary storage of the new
channels.

This commit cleans up the API by replacing new_chs with new_params, a
meaningful subset of new_chs to be filled by the caller. The temporary
space for the new channels is allocated inside mlx5e_safe_switch_params
(a new name for mlx5e_safe_switch_channels). An extra copy of params is
made, but since it's control flow, it's not critical.

Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h       |  8 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 15 ++--
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   | 98 ++++++++++-----------
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 99 +++++++++++-----------
 .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c  | 10 +--
 5 files changed, 114 insertions(+), 116 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index d9ed20a4db53..b636d63358d2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -1018,10 +1018,10 @@ int fn##_ctx(struct mlx5e_priv *priv, void *context) \
 	return fn(priv); \
 }
 int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv);
-int mlx5e_safe_switch_channels(struct mlx5e_priv *priv,
-			       struct mlx5e_channels *new_chs,
-			       mlx5e_fp_preactivate preactivate,
-			       void *context, bool reset);
+int mlx5e_safe_switch_params(struct mlx5e_priv *priv,
+			     struct mlx5e_params *new_params,
+			     mlx5e_fp_preactivate preactivate,
+			     void *context, bool reset);
 int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv);
 int mlx5e_num_channels_changed(struct mlx5e_priv *priv);
 int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 0b0273fac6e3..a4c8d8d00d5a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -1149,24 +1149,23 @@ static int mlx5e_update_trust_state_hw(struct mlx5e_priv *priv, void *context)
 
 static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state)
 {
-	struct mlx5e_channels new_channels = {};
+	struct mlx5e_params new_params;
 	bool reset = true;
 	int err;
 
 	mutex_lock(&priv->state_lock);
 
-	new_channels.params = priv->channels.params;
-	mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &new_channels.params,
+	new_params = priv->channels.params;
+	mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &new_params,
 						   trust_state);
 
 	/* Skip if tx_min_inline is the same */
-	if (new_channels.params.tx_min_inline_mode ==
-	    priv->channels.params.tx_min_inline_mode)
+	if (new_params.tx_min_inline_mode == priv->channels.params.tx_min_inline_mode)
 		reset = false;
 
-	err = mlx5e_safe_switch_channels(priv, &new_channels,
-					 mlx5e_update_trust_state_hw,
-					 &trust_state, reset);
+	err = mlx5e_safe_switch_params(priv, &new_params,
+				       mlx5e_update_trust_state_hw,
+				       &trust_state, reset);
 
 	mutex_unlock(&priv->state_lock);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 82994175aded..ef4a330c4cfa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -326,7 +326,7 @@ static void mlx5e_get_ringparam(struct net_device *dev,
 int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv,
 				struct ethtool_ringparam *param)
 {
-	struct mlx5e_channels new_channels = {};
+	struct mlx5e_params new_params;
 	u8 log_rq_size;
 	u8 log_sq_size;
 	int err = 0;
@@ -365,15 +365,15 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv,
 
 	mutex_lock(&priv->state_lock);
 
-	new_channels.params = priv->channels.params;
-	new_channels.params.log_rq_mtu_frames = log_rq_size;
-	new_channels.params.log_sq_size = log_sq_size;
+	new_params = priv->channels.params;
+	new_params.log_rq_mtu_frames = log_rq_size;
+	new_params.log_sq_size = log_sq_size;
 
-	err = mlx5e_validate_params(priv->mdev, &new_channels.params);
+	err = mlx5e_validate_params(priv->mdev, &new_params);
 	if (err)
 		goto unlock;
 
-	err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL, true);
+	err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true);
 
 unlock:
 	mutex_unlock(&priv->state_lock);
@@ -418,7 +418,7 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
 {
 	struct mlx5e_params *cur_params = &priv->channels.params;
 	unsigned int count = ch->combined_count;
-	struct mlx5e_channels new_channels = {};
+	struct mlx5e_params new_params;
 	bool arfs_enabled;
 	bool opened;
 	int err = 0;
@@ -455,8 +455,8 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
 		goto out;
 	}
 
-	new_channels.params = *cur_params;
-	new_channels.params.num_channels = count;
+	new_params = *cur_params;
+	new_params.num_channels = count;
 
 	opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
 
@@ -465,8 +465,8 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
 		mlx5e_arfs_disable(priv);
 
 	/* Switch to new channels, set new parameters and close old ones */
-	err = mlx5e_safe_switch_channels(priv, &new_channels,
-					 mlx5e_num_channels_changed_ctx, NULL, true);
+	err = mlx5e_safe_switch_params(priv, &new_params,
+				       mlx5e_num_channels_changed_ctx, NULL, true);
 
 	if (arfs_enabled) {
 		int err2 = mlx5e_arfs_enable(priv);
@@ -561,7 +561,7 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
 {
 	struct dim_cq_moder *rx_moder, *tx_moder;
 	struct mlx5_core_dev *mdev = priv->mdev;
-	struct mlx5e_channels new_channels = {};
+	struct mlx5e_params new_params;
 	bool reset_rx, reset_tx;
 	bool reset = true;
 	int err = 0;
@@ -584,32 +584,32 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
 	}
 
 	mutex_lock(&priv->state_lock);
-	new_channels.params = priv->channels.params;
+	new_params = priv->channels.params;
 
-	rx_moder          = &new_channels.params.rx_cq_moderation;
+	rx_moder          = &new_params.rx_cq_moderation;
 	rx_moder->usec    = coal->rx_coalesce_usecs;
 	rx_moder->pkts    = coal->rx_max_coalesced_frames;
-	new_channels.params.rx_dim_enabled = !!coal->use_adaptive_rx_coalesce;
+	new_params.rx_dim_enabled = !!coal->use_adaptive_rx_coalesce;
 
-	tx_moder          = &new_channels.params.tx_cq_moderation;
+	tx_moder          = &new_params.tx_cq_moderation;
 	tx_moder->usec    = coal->tx_coalesce_usecs;
 	tx_moder->pkts    = coal->tx_max_coalesced_frames;
-	new_channels.params.tx_dim_enabled = !!coal->use_adaptive_tx_coalesce;
+	new_params.tx_dim_enabled = !!coal->use_adaptive_tx_coalesce;
 
 	reset_rx = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled;
 	reset_tx = !!coal->use_adaptive_tx_coalesce != priv->channels.params.tx_dim_enabled;
 
 	if (reset_rx) {
-		u8 mode = MLX5E_GET_PFLAG(&new_channels.params,
+		u8 mode = MLX5E_GET_PFLAG(&new_params,
 					  MLX5E_PFLAG_RX_CQE_BASED_MODER);
 
-		mlx5e_reset_rx_moderation(&new_channels.params, mode);
+		mlx5e_reset_rx_moderation(&new_params, mode);
 	}
 	if (reset_tx) {
-		u8 mode = MLX5E_GET_PFLAG(&new_channels.params,
+		u8 mode = MLX5E_GET_PFLAG(&new_params,
 					  MLX5E_PFLAG_TX_CQE_BASED_MODER);
 
-		mlx5e_reset_tx_moderation(&new_channels.params, mode);
+		mlx5e_reset_tx_moderation(&new_params, mode);
 	}
 
 	/* If DIM state hasn't changed, it's possible to modify interrupt
@@ -623,7 +623,7 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
 		reset = false;
 	}
 
-	err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL, reset);
+	err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset);
 
 	mutex_unlock(&priv->state_lock);
 	return err;
@@ -1843,7 +1843,7 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable,
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
-	struct mlx5e_channels new_channels = {};
+	struct mlx5e_params new_params;
 	bool mode_changed;
 	u8 cq_period_mode, current_cq_period_mode;
 
@@ -1862,13 +1862,13 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable,
 	if (!mode_changed)
 		return 0;
 
-	new_channels.params = priv->channels.params;
+	new_params = priv->channels.params;
 	if (is_rx_cq)
-		mlx5e_set_rx_cq_mode_params(&new_channels.params, cq_period_mode);
+		mlx5e_set_rx_cq_mode_params(&new_params, cq_period_mode);
 	else
-		mlx5e_set_tx_cq_mode_params(&new_channels.params, cq_period_mode);
+		mlx5e_set_tx_cq_mode_params(&new_params, cq_period_mode);
 
-	return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL, true);
+	return mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true);
 }
 
 static int set_pflag_tx_cqe_based_moder(struct net_device *netdev, bool enable)
@@ -1884,7 +1884,7 @@ static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable)
 int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val)
 {
 	bool curr_val = MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS);
-	struct mlx5e_channels new_channels = {};
+	struct mlx5e_params new_params;
 	int err = 0;
 
 	if (!MLX5_CAP_GEN(priv->mdev, cqe_compression))
@@ -1893,16 +1893,16 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
 	if (curr_val == new_val)
 		return 0;
 
-	new_channels.params = priv->channels.params;
-	MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val);
+	new_params = priv->channels.params;
+	MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val);
 	if (priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE)
-		new_channels.params.ptp_rx = new_val;
+		new_params.ptp_rx = new_val;
 
-	if (new_channels.params.ptp_rx == priv->channels.params.ptp_rx)
-		err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL, true);
+	if (new_params.ptp_rx == priv->channels.params.ptp_rx)
+		err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true);
 	else
-		err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_ptp_rx_manage_fs_ctx,
-						 &new_channels.params.ptp_rx, true);
+		err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_ptp_rx_manage_fs_ctx,
+					       &new_params.ptp_rx, true);
 	if (err)
 		return err;
 
@@ -1936,7 +1936,7 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
-	struct mlx5e_channels new_channels = {};
+	struct mlx5e_params new_params;
 
 	if (enable) {
 		if (!mlx5e_check_fragmented_striding_rq_cap(mdev))
@@ -1948,12 +1948,12 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable)
 		return -EINVAL;
 	}
 
-	new_channels.params = priv->channels.params;
+	new_params = priv->channels.params;
 
-	MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_STRIDING_RQ, enable);
-	mlx5e_set_rq_type(mdev, &new_channels.params);
+	MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_STRIDING_RQ, enable);
+	mlx5e_set_rq_type(mdev, &new_params);
 
-	return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL, true);
+	return mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true);
 }
 
 static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable)
@@ -1982,16 +1982,16 @@ static int set_pflag_tx_mpwqe_common(struct net_device *netdev, u32 flag, bool e
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
-	struct mlx5e_channels new_channels = {};
+	struct mlx5e_params new_params;
 
 	if (enable && !MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe))
 		return -EOPNOTSUPP;
 
-	new_channels.params = priv->channels.params;
+	new_params = priv->channels.params;
 
-	MLX5E_SET_PFLAG(&new_channels.params, flag, enable);
+	MLX5E_SET_PFLAG(&new_params, flag, enable);
 
-	return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL, true);
+	return mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true);
 }
 
 static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable)
@@ -2008,7 +2008,7 @@ static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
-	struct mlx5e_channels new_channels = {};
+	struct mlx5e_params new_params;
 	int err;
 
 	if (!MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn))
@@ -2024,15 +2024,15 @@ static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable)
 		return -EINVAL;
 	}
 
-	new_channels.params = priv->channels.params;
-	MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_TX_PORT_TS, enable);
+	new_params = priv->channels.params;
+	MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_TX_PORT_TS, enable);
 	/* No need to verify SQ stop room as
 	 * ptpsq.txqsq.stop_room <= generic_sq->stop_room, and both
 	 * has the same log_sq_size.
 	 */
 
-	err = mlx5e_safe_switch_channels(priv, &new_channels,
-					 mlx5e_num_channels_changed_ctx, NULL, true);
+	err = mlx5e_safe_switch_params(priv, &new_params,
+				       mlx5e_num_channels_changed_ctx, NULL, true);
 	if (!err)
 		priv->tx_ptp_opened = true;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 7686d4997696..feb347e81448 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2909,33 +2909,32 @@ out:
 	return err;
 }
 
-int mlx5e_safe_switch_channels(struct mlx5e_priv *priv,
-			       struct mlx5e_channels *new_chs,
-			       mlx5e_fp_preactivate preactivate,
-			       void *context, bool reset)
+int mlx5e_safe_switch_params(struct mlx5e_priv *priv,
+			     struct mlx5e_params *params,
+			     mlx5e_fp_preactivate preactivate,
+			     void *context, bool reset)
 {
+	struct mlx5e_channels new_chs = {};
 	int err;
 
 	reset &= test_bit(MLX5E_STATE_OPENED, &priv->state);
 	if (!reset)
-		return mlx5e_switch_priv_params(priv, &new_chs->params, preactivate, context);
+		return mlx5e_switch_priv_params(priv, params, preactivate, context);
 
-	err = mlx5e_open_channels(priv, new_chs);
+	new_chs.params = *params;
+	err = mlx5e_open_channels(priv, &new_chs);
 	if (err)
 		return err;
-	err = mlx5e_switch_priv_channels(priv, new_chs, preactivate, context);
+	err = mlx5e_switch_priv_channels(priv, &new_chs, preactivate, context);
 	if (err)
-		mlx5e_close_channels(new_chs);
+		mlx5e_close_channels(&new_chs);
 
 	return err;
 }
 
 int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv)
 {
-	struct mlx5e_channels new_channels = {};
-
-	new_channels.params = priv->channels.params;
-	return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL, true);
+	return mlx5e_safe_switch_params(priv, &priv->channels.params, NULL, NULL, true);
 }
 
 void mlx5e_timestamp_init(struct mlx5e_priv *priv)
@@ -3392,7 +3391,7 @@ static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd)
 static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv,
 				 struct tc_mqprio_qopt *mqprio)
 {
-	struct mlx5e_channels new_channels = {};
+	struct mlx5e_params new_params;
 	u8 tc = mqprio->num_tc;
 	int err = 0;
 
@@ -3411,11 +3410,11 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv,
 		goto out;
 	}
 
-	new_channels.params = priv->channels.params;
-	new_channels.params.num_tc = tc ? tc : 1;
+	new_params = priv->channels.params;
+	new_params.num_tc = tc ? tc : 1;
 
-	err = mlx5e_safe_switch_channels(priv, &new_channels,
-					 mlx5e_num_channels_changed_ctx, NULL, true);
+	err = mlx5e_safe_switch_params(priv, &new_params,
+				       mlx5e_num_channels_changed_ctx, NULL, true);
 
 out:
 	priv->max_opened_tc = max_t(u8, priv->max_opened_tc,
@@ -3640,8 +3639,8 @@ static int set_feature_lro(struct net_device *netdev, bool enable)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
-	struct mlx5e_channels new_channels = {};
 	struct mlx5e_params *cur_params;
+	struct mlx5e_params new_params;
 	bool reset = true;
 	int err = 0;
 
@@ -3661,17 +3660,17 @@ static int set_feature_lro(struct net_device *netdev, bool enable)
 		goto out;
 	}
 
-	new_channels.params = *cur_params;
-	new_channels.params.lro_en = enable;
+	new_params = *cur_params;
+	new_params.lro_en = enable;
 
 	if (cur_params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
 		if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) ==
-		    mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params, NULL))
+		    mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_params, NULL))
 			reset = false;
 	}
 
-	err = mlx5e_safe_switch_channels(priv, &new_channels,
-					 mlx5e_modify_tirs_lro_ctx, NULL, reset);
+	err = mlx5e_safe_switch_params(priv, &new_params,
+				       mlx5e_modify_tirs_lro_ctx, NULL, reset);
 out:
 	mutex_unlock(&priv->state_lock);
 	return err;
@@ -3900,7 +3899,7 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
 		     mlx5e_fp_preactivate preactivate)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
-	struct mlx5e_channels new_channels = {};
+	struct mlx5e_params new_params;
 	struct mlx5e_params *params;
 	bool reset = true;
 	int err = 0;
@@ -3909,14 +3908,14 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
 
 	params = &priv->channels.params;
 
-	new_channels.params = *params;
-	new_channels.params.sw_mtu = new_mtu;
-	err = mlx5e_validate_params(priv->mdev, &new_channels.params);
+	new_params = *params;
+	new_params.sw_mtu = new_mtu;
+	err = mlx5e_validate_params(priv->mdev, &new_params);
 	if (err)
 		goto out;
 
 	if (params->xdp_prog &&
-	    !mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) {
+	    !mlx5e_rx_is_linear_skb(&new_params, NULL)) {
 		netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n",
 			   new_mtu, mlx5e_xdp_max_mtu(params, NULL));
 		err = -EINVAL;
@@ -3925,7 +3924,7 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
 
 	if (priv->xsk.refcnt &&
 	    !mlx5e_xsk_validate_mtu(netdev, &priv->channels,
-				    &new_channels.params, priv->mdev)) {
+				    &new_params, priv->mdev)) {
 		err = -EINVAL;
 		goto out;
 	}
@@ -3936,9 +3935,9 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
 	if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
 		bool is_linear_old = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, params, NULL);
 		bool is_linear_new = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev,
-								  &new_channels.params, NULL);
+								  &new_params, NULL);
 		u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params, NULL);
-		u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params, NULL);
+		u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_params, NULL);
 
 		/* Always reset in linear mode - hw_mtu is used in data path.
 		 * Check that the mode was non-linear and didn't change.
@@ -3949,7 +3948,7 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
 			reset = false;
 	}
 
-	err = mlx5e_safe_switch_channels(priv, &new_channels, preactivate, NULL, reset);
+	err = mlx5e_safe_switch_params(priv, &new_params, preactivate, NULL, reset);
 
 out:
 	netdev->mtu = params->sw_mtu;
@@ -3971,7 +3970,7 @@ int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx)
 
 int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
 {
-	struct mlx5e_channels new_channels = {};
+	struct mlx5e_params new_params;
 	struct hwtstamp_config config;
 	bool rx_cqe_compress_def;
 	int err;
@@ -3993,13 +3992,13 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
 	}
 
 	mutex_lock(&priv->state_lock);
-	new_channels.params = priv->channels.params;
+	new_params = priv->channels.params;
 	rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def;
 
 	/* RX HW timestamp */
 	switch (config.rx_filter) {
 	case HWTSTAMP_FILTER_NONE:
-		new_channels.params.ptp_rx = false;
+		new_params.ptp_rx = false;
 		break;
 	case HWTSTAMP_FILTER_ALL:
 	case HWTSTAMP_FILTER_SOME:
@@ -4016,7 +4015,7 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
 	case HWTSTAMP_FILTER_PTP_V2_SYNC:
 	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
 	case HWTSTAMP_FILTER_NTP_ALL:
-		new_channels.params.ptp_rx = rx_cqe_compress_def;
+		new_params.ptp_rx = rx_cqe_compress_def;
 		config.rx_filter = HWTSTAMP_FILTER_ALL;
 		break;
 	default:
@@ -4024,11 +4023,11 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
 		return -ERANGE;
 	}
 
-	if (new_channels.params.ptp_rx == priv->channels.params.ptp_rx)
+	if (new_params.ptp_rx == priv->channels.params.ptp_rx)
 		goto out;
 
-	err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_ptp_rx_manage_fs_ctx,
-					 &new_channels.params.ptp_rx, true);
+	err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_ptp_rx_manage_fs_ctx,
+				       &new_params.ptp_rx, true);
 	if (err) {
 		mutex_unlock(&priv->state_lock);
 		return err;
@@ -4346,7 +4345,7 @@ static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue)
 static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
 {
 	struct net_device *netdev = priv->netdev;
-	struct mlx5e_channels new_channels = {};
+	struct mlx5e_params new_params;
 
 	if (priv->channels.params.lro_en) {
 		netdev_warn(netdev, "can't set XDP while LRO is on, disable LRO first\n");
@@ -4359,16 +4358,16 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
 		return -EINVAL;
 	}
 
-	new_channels.params = priv->channels.params;
-	new_channels.params.xdp_prog = prog;
+	new_params = priv->channels.params;
+	new_params.xdp_prog = prog;
 
 	/* No XSK params: AF_XDP can't be enabled yet at the point of setting
 	 * the XDP program.
 	 */
-	if (!mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) {
+	if (!mlx5e_rx_is_linear_skb(&new_params, NULL)) {
 		netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n",
-			    new_channels.params.sw_mtu,
-			    mlx5e_xdp_max_mtu(&new_channels.params, NULL));
+			    new_params.sw_mtu,
+			    mlx5e_xdp_max_mtu(&new_params, NULL));
 		return -EINVAL;
 	}
 
@@ -4388,7 +4387,7 @@ static void mlx5e_rq_replace_xdp_prog(struct mlx5e_rq *rq, struct bpf_prog *prog
 static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
-	struct mlx5e_channels new_channels = {};
+	struct mlx5e_params new_params;
 	struct bpf_prog *old_prog;
 	int err = 0;
 	bool reset;
@@ -4405,13 +4404,13 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
 	/* no need for full reset when exchanging programs */
 	reset = (!priv->channels.params.xdp_prog || !prog);
 
-	new_channels.params = priv->channels.params;
-	new_channels.params.xdp_prog = prog;
+	new_params = priv->channels.params;
+	new_params.xdp_prog = prog;
 	if (reset)
-		mlx5e_set_rq_type(priv->mdev, &new_channels.params);
+		mlx5e_set_rq_type(priv->mdev, &new_params);
 	old_prog = priv->channels.params.xdp_prog;
 
-	err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL, reset);
+	err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset);
 	if (err)
 		goto unlock;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index df409111ef3a..612a7f69366d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -481,19 +481,19 @@ static const struct mlx5e_profile mlx5i_nic_profile = {
 static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct mlx5e_priv *priv = mlx5i_epriv(netdev);
-	struct mlx5e_channels new_channels = {};
+	struct mlx5e_params new_params;
 	int err = 0;
 
 	mutex_lock(&priv->state_lock);
 
-	new_channels.params = priv->channels.params;
-	new_channels.params.sw_mtu = new_mtu;
+	new_params = priv->channels.params;
+	new_params.sw_mtu = new_mtu;
 
-	err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL, true);
+	err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true);
 	if (err)
 		goto out;
 
-	netdev->mtu = new_channels.params.sw_mtu;
+	netdev->mtu = new_params.sw_mtu;
 
 out:
 	mutex_unlock(&priv->state_lock);
-- 
cgit v1.2.3


From 5cec6de0ae09457bdab2308e9fbac962dd7be3db Mon Sep 17 00:00:00 2001
From: Maor Dickman <maord@nvidia.com>
Date: Sun, 11 Apr 2021 11:50:18 +0300
Subject: net/mlx5: Allocate FC bulk structs with kvzalloc() instead of
 kzalloc()

The bulk size is larger than 16K so use kvzalloc().
The bulk bitmask upper size limit is 16K so use kvcalloc().

Signed-off-by: Maor Dickman <maord@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index f43caefd07a1..18e5aec14641 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -497,13 +497,13 @@ static struct mlx5_fc_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev)
 	alloc_bitmask = MLX5_CAP_GEN(dev, flow_counter_bulk_alloc);
 	bulk_len = alloc_bitmask > 0 ? MLX5_FC_BULK_NUM_FCS(alloc_bitmask) : 1;
 
-	bulk = kzalloc(sizeof(*bulk) + bulk_len * sizeof(struct mlx5_fc),
-		       GFP_KERNEL);
+	bulk = kvzalloc(sizeof(*bulk) + bulk_len * sizeof(struct mlx5_fc),
+			GFP_KERNEL);
 	if (!bulk)
 		goto err_alloc_bulk;
 
-	bulk->bitmask = kcalloc(BITS_TO_LONGS(bulk_len), sizeof(unsigned long),
-				GFP_KERNEL);
+	bulk->bitmask = kvcalloc(BITS_TO_LONGS(bulk_len), sizeof(unsigned long),
+				 GFP_KERNEL);
 	if (!bulk->bitmask)
 		goto err_alloc_bitmask;
 
@@ -521,9 +521,9 @@ static struct mlx5_fc_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev)
 	return bulk;
 
 err_mlx5_cmd_bulk_alloc:
-	kfree(bulk->bitmask);
+	kvfree(bulk->bitmask);
 err_alloc_bitmask:
-	kfree(bulk);
+	kvfree(bulk);
 err_alloc_bulk:
 	return ERR_PTR(err);
 }
@@ -537,8 +537,8 @@ mlx5_fc_bulk_destroy(struct mlx5_core_dev *dev, struct mlx5_fc_bulk *bulk)
 	}
 
 	mlx5_cmd_fc_free(dev, bulk->base_id);
-	kfree(bulk->bitmask);
-	kfree(bulk);
+	kvfree(bulk->bitmask);
+	kvfree(bulk);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 36830159acbeb9896d7684b5f52db7b22efa197f Mon Sep 17 00:00:00 2001
From: Moshe Tal <moshet@nvidia.com>
Date: Mon, 15 Feb 2021 16:13:02 +0200
Subject: net/mlx5: Add register layout to support extended link state

Add needed structure layouts and defines for pddr register
(Port Diagnostics Database Register) and the troublshooting page.

This will be used to get extended link state from the monitor opcode
bits.

Signed-off-by: Moshe Tal <moshet@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/driver.h   |  1 +
 include/linux/mlx5/mlx5_ifc.h | 50 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 2da953ad02ed..4e531c2aab52 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -127,6 +127,7 @@ enum {
 	MLX5_REG_PELC		 = 0x500e,
 	MLX5_REG_PVLC		 = 0x500f,
 	MLX5_REG_PCMR		 = 0x5041,
+	MLX5_REG_PDDR		 = 0x5031,
 	MLX5_REG_PMLP		 = 0x5002,
 	MLX5_REG_PPLM		 = 0x5023,
 	MLX5_REG_PCAM		 = 0x507f,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 1599deee0456..f2c51d6833c6 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -9956,6 +9956,53 @@ struct mlx5_ifc_mirc_reg_bits {
 	u8         reserved_at_20[0x20];
 };
 
+struct mlx5_ifc_pddr_monitor_opcode_bits {
+	u8         reserved_at_0[0x10];
+	u8         monitor_opcode[0x10];
+};
+
+union mlx5_ifc_pddr_troubleshooting_page_status_opcode_auto_bits {
+	struct mlx5_ifc_pddr_monitor_opcode_bits pddr_monitor_opcode;
+	u8         reserved_at_0[0x20];
+};
+
+enum {
+	/* Monitor opcodes */
+	MLX5_PDDR_REG_TRBLSH_GROUP_OPCODE_MONITOR = 0x0,
+};
+
+struct mlx5_ifc_pddr_troubleshooting_page_bits {
+	u8         reserved_at_0[0x10];
+	u8         group_opcode[0x10];
+
+	union mlx5_ifc_pddr_troubleshooting_page_status_opcode_auto_bits status_opcode;
+
+	u8         reserved_at_40[0x20];
+
+	u8         status_message[59][0x20];
+};
+
+union mlx5_ifc_pddr_reg_page_data_auto_bits {
+	struct mlx5_ifc_pddr_troubleshooting_page_bits pddr_troubleshooting_page;
+	u8         reserved_at_0[0x7c0];
+};
+
+enum {
+	MLX5_PDDR_REG_PAGE_SELECT_TROUBLESHOOTING_INFO_PAGE      = 0x1,
+};
+
+struct mlx5_ifc_pddr_reg_bits {
+	u8         reserved_at_0[0x8];
+	u8         local_port[0x8];
+	u8         pnat[0x2];
+	u8         reserved_at_12[0xe];
+
+	u8         reserved_at_20[0x18];
+	u8         page_select[0x8];
+
+	union mlx5_ifc_pddr_reg_page_data_auto_bits page_data;
+};
+
 union mlx5_ifc_ports_control_registers_document_bits {
 	struct mlx5_ifc_bufferx_reg_bits bufferx_reg;
 	struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout;
@@ -9970,6 +10017,9 @@ union mlx5_ifc_ports_control_registers_document_bits {
 	struct mlx5_ifc_pamp_reg_bits pamp_reg;
 	struct mlx5_ifc_paos_reg_bits paos_reg;
 	struct mlx5_ifc_pcap_reg_bits pcap_reg;
+	struct mlx5_ifc_pddr_monitor_opcode_bits pddr_monitor_opcode;
+	struct mlx5_ifc_pddr_reg_bits pddr_reg;
+	struct mlx5_ifc_pddr_troubleshooting_page_bits pddr_troubleshooting_page;
 	struct mlx5_ifc_peir_reg_bits peir_reg;
 	struct mlx5_ifc_pelc_reg_bits pelc_reg;
 	struct mlx5_ifc_pfcc_reg_bits pfcc_reg;
-- 
cgit v1.2.3


From b3446acb2b9ae6128587cd2d311214950adfb68b Mon Sep 17 00:00:00 2001
From: Moshe Tal <moshet@nvidia.com>
Date: Tue, 23 Feb 2021 13:42:17 +0200
Subject: net/mlx5e: Add ethtool extended link state

In case the interface was set up but cannot establish the link, ethtool
will print more information to help the user troubleshoot the state.

For example, no link due to missing cable:
$ ethtool eth1
...
Link detected: no (No cable)

Beside the general extended state, drivers can pass additional
information about the link state using the sub-state field. For example:

$ ethtool eth1
...
Link detected: no (Autoneg, No partner detected)

The extended state is available only for specific cases, in other cases
ethtool with print only "Link detected: no" as before

Signed-off-by: Moshe Tal <moshet@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   | 171 +++++++++++++++++++++
 1 file changed, 171 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index ef4a330c4cfa..e1d3cd1f1f11 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -2125,12 +2125,183 @@ int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
 	return mlx5e_ethtool_set_rxnfc(dev, cmd);
 }
 
+static int query_port_status_opcode(struct mlx5_core_dev *mdev, u32 *status_opcode)
+{
+	struct mlx5_ifc_pddr_troubleshooting_page_bits *pddr_troubleshooting_page;
+	u32 in[MLX5_ST_SZ_DW(pddr_reg)] = {};
+	u32 out[MLX5_ST_SZ_DW(pddr_reg)];
+	int err;
+
+	MLX5_SET(pddr_reg, in, local_port, 1);
+	MLX5_SET(pddr_reg, in, page_select,
+		 MLX5_PDDR_REG_PAGE_SELECT_TROUBLESHOOTING_INFO_PAGE);
+
+	pddr_troubleshooting_page = MLX5_ADDR_OF(pddr_reg, in, page_data);
+	MLX5_SET(pddr_troubleshooting_page, pddr_troubleshooting_page,
+		 group_opcode, MLX5_PDDR_REG_TRBLSH_GROUP_OPCODE_MONITOR);
+	err = mlx5_core_access_reg(mdev, in, sizeof(in), out,
+				   sizeof(out), MLX5_REG_PDDR, 0, 0);
+	if (err)
+		return err;
+
+	pddr_troubleshooting_page = MLX5_ADDR_OF(pddr_reg, out, page_data);
+	*status_opcode = MLX5_GET(pddr_troubleshooting_page, pddr_troubleshooting_page,
+				  status_opcode);
+	return 0;
+}
+
+struct mlx5e_ethtool_link_ext_state_opcode_mapping {
+	u32 status_opcode;
+	enum ethtool_link_ext_state link_ext_state;
+	u8 link_ext_substate;
+};
+
+static const struct mlx5e_ethtool_link_ext_state_opcode_mapping
+mlx5e_link_ext_state_opcode_map[] = {
+	/* States relating to the autonegotiation or issues therein */
+	{2, ETHTOOL_LINK_EXT_STATE_AUTONEG,
+		ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED},
+	{3, ETHTOOL_LINK_EXT_STATE_AUTONEG,
+		ETHTOOL_LINK_EXT_SUBSTATE_AN_ACK_NOT_RECEIVED},
+	{4, ETHTOOL_LINK_EXT_STATE_AUTONEG,
+		ETHTOOL_LINK_EXT_SUBSTATE_AN_NEXT_PAGE_EXCHANGE_FAILED},
+	{36, ETHTOOL_LINK_EXT_STATE_AUTONEG,
+		ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED_FORCE_MODE},
+	{38, ETHTOOL_LINK_EXT_STATE_AUTONEG,
+		ETHTOOL_LINK_EXT_SUBSTATE_AN_FEC_MISMATCH_DURING_OVERRIDE},
+	{39, ETHTOOL_LINK_EXT_STATE_AUTONEG,
+		ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_HCD},
+
+	/* Failure during link training */
+	{5, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE,
+		ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_FRAME_LOCK_NOT_ACQUIRED},
+	{6, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE,
+		ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_INHIBIT_TIMEOUT},
+	{7, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE,
+		ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_PARTNER_DID_NOT_SET_RECEIVER_READY},
+	{8, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, 0},
+	{14, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE,
+		ETHTOOL_LINK_EXT_SUBSTATE_LT_REMOTE_FAULT},
+
+	/* Logical mismatch in physical coding sublayer or forward error correction sublayer */
+	{9, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH,
+		ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_BLOCK_LOCK},
+	{10, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH,
+		ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_AM_LOCK},
+	{11, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH,
+		ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_GET_ALIGN_STATUS},
+	{12, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH,
+		ETHTOOL_LINK_EXT_SUBSTATE_LLM_FC_FEC_IS_NOT_LOCKED},
+	{13, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH,
+		ETHTOOL_LINK_EXT_SUBSTATE_LLM_RS_FEC_IS_NOT_LOCKED},
+
+	/* Signal integrity issues */
+	{15, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY, 0},
+	{17, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY,
+		ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS},
+	{42, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY,
+		ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE},
+
+	/* No cable connected */
+	{1024, ETHTOOL_LINK_EXT_STATE_NO_CABLE, 0},
+
+	/* Failure is related to cable, e.g., unsupported cable */
+	{16, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE,
+		ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE},
+	{20, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE,
+		ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE},
+	{29, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE,
+		ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE},
+	{1025, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE,
+		ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE},
+	{1029, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE,
+		ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE},
+	{1031, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, 0},
+
+	/* Failure is related to EEPROM, e.g., failure during reading or parsing the data */
+	{1027, ETHTOOL_LINK_EXT_STATE_EEPROM_ISSUE, 0},
+
+	/* Failure during calibration algorithm */
+	{23, ETHTOOL_LINK_EXT_STATE_CALIBRATION_FAILURE, 0},
+
+	/* The hardware is not able to provide the power required from cable or module */
+	{1032, ETHTOOL_LINK_EXT_STATE_POWER_BUDGET_EXCEEDED, 0},
+
+	/* The module is overheated */
+	{1030, ETHTOOL_LINK_EXT_STATE_OVERHEAT, 0},
+};
+
+static void
+mlx5e_set_link_ext_state(struct mlx5e_ethtool_link_ext_state_opcode_mapping
+			 link_ext_state_mapping,
+			 struct ethtool_link_ext_state_info *link_ext_state_info)
+{
+	switch (link_ext_state_mapping.link_ext_state) {
+	case ETHTOOL_LINK_EXT_STATE_AUTONEG:
+		link_ext_state_info->autoneg =
+			link_ext_state_mapping.link_ext_substate;
+		break;
+	case ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE:
+		link_ext_state_info->link_training =
+			link_ext_state_mapping.link_ext_substate;
+		break;
+	case ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH:
+		link_ext_state_info->link_logical_mismatch =
+			link_ext_state_mapping.link_ext_substate;
+		break;
+	case ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY:
+		link_ext_state_info->bad_signal_integrity =
+			link_ext_state_mapping.link_ext_substate;
+		break;
+	case ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE:
+		link_ext_state_info->cable_issue =
+			link_ext_state_mapping.link_ext_substate;
+		break;
+	default:
+		break;
+	}
+
+	link_ext_state_info->link_ext_state = link_ext_state_mapping.link_ext_state;
+}
+
+static int
+mlx5e_get_link_ext_state(struct net_device *dev,
+			 struct ethtool_link_ext_state_info *link_ext_state_info)
+{
+	struct mlx5e_ethtool_link_ext_state_opcode_mapping link_ext_state_mapping;
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	u32 status_opcode = 0;
+	int i;
+
+	/* Exit without data if the interface state is OK, since no extended data is
+	 * available in such case
+	 */
+	if (netif_carrier_ok(dev))
+		return -ENODATA;
+
+	if (query_port_status_opcode(priv->mdev, &status_opcode) ||
+	    !status_opcode)
+		return -ENODATA;
+
+	for (i = 0; i < ARRAY_SIZE(mlx5e_link_ext_state_opcode_map); i++) {
+		link_ext_state_mapping = mlx5e_link_ext_state_opcode_map[i];
+		if (link_ext_state_mapping.status_opcode == status_opcode) {
+			mlx5e_set_link_ext_state(link_ext_state_mapping,
+						 link_ext_state_info);
+			return 0;
+		}
+	}
+
+	return -ENODATA;
+}
+
 const struct ethtool_ops mlx5e_ethtool_ops = {
 	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
 				     ETHTOOL_COALESCE_MAX_FRAMES |
 				     ETHTOOL_COALESCE_USE_ADAPTIVE,
 	.get_drvinfo       = mlx5e_get_drvinfo,
 	.get_link          = ethtool_op_get_link,
+	.get_link_ext_state  = mlx5e_get_link_ext_state,
 	.get_strings       = mlx5e_get_strings,
 	.get_sset_count    = mlx5e_get_sset_count,
 	.get_ethtool_stats = mlx5e_get_ethtool_stats,
-- 
cgit v1.2.3


From 302522e67c70c57bd35c2793d419dba457871ca0 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Sun, 7 Mar 2021 12:43:24 +0200
Subject: net/mlx5: Add helper to initialize 1PPS

Wrap 1PPS initialization in a helper for a cleaner init flow.

Signed-off-by: Aya Levin <ayal@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/lib/clock.c    | 25 +++++++++++++++-------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 1e7f26b240de..ce696d523493 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -645,16 +645,19 @@ static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin)
 	return PTP_PF_NONE;
 }
 
-static int mlx5_init_pin_config(struct mlx5_clock *clock)
+static void mlx5_init_pin_config(struct mlx5_clock *clock)
 {
 	int i;
 
+	if (!clock->ptp_info.n_pins)
+		return;
+
 	clock->ptp_info.pin_config =
 			kcalloc(clock->ptp_info.n_pins,
 				sizeof(*clock->ptp_info.pin_config),
 				GFP_KERNEL);
 	if (!clock->ptp_info.pin_config)
-		return -ENOMEM;
+		return;
 	clock->ptp_info.enable = mlx5_ptp_enable;
 	clock->ptp_info.verify = mlx5_ptp_verify;
 	clock->ptp_info.pps = 1;
@@ -667,8 +670,6 @@ static int mlx5_init_pin_config(struct mlx5_clock *clock)
 		clock->ptp_info.pin_config[i].func = mlx5_get_pps_pin_mode(clock, i);
 		clock->ptp_info.pin_config[i].chan = 0;
 	}
-
-	return 0;
 }
 
 static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev)
@@ -859,6 +860,17 @@ static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev)
 	}
 }
 
+static void mlx5_init_pps(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_clock *clock = &mdev->clock;
+
+	if (!MLX5_PPS_CAP(mdev))
+		return;
+
+	mlx5_get_pps_caps(mdev);
+	mlx5_init_pin_config(clock);
+}
+
 void mlx5_init_clock(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_clock *clock = &mdev->clock;
@@ -876,10 +888,7 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
 	clock->ptp_info = mlx5_ptp_clock_info;
 
 	/* Initialize 1PPS data structures */
-	if (MLX5_PPS_CAP(mdev))
-		mlx5_get_pps_caps(mdev);
-	if (clock->ptp_info.n_pins)
-		mlx5_init_pin_config(clock);
+	mlx5_init_pps(mdev);
 
 	clock->ptp = ptp_clock_register(&clock->ptp_info,
 					&mdev->pdev->dev);
-- 
cgit v1.2.3


From 95742c1cc59d0a6aa2ca9e75bd21f2a8721f5129 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Mon, 8 Mar 2021 12:26:35 +0200
Subject: net/mlx5: Enhance diagnostics info for TX/RX reporters

Add ts_format to 'Common Config' section of the TX/RX devlink reporters
diagnostics info. Possible values for ts_format: 'RT' or 'FRC'
which stands for: Real Time and Free Running Counters correspondingly.

Signed-off-by: Aya Levin <ayal@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c | 6 ++++++
 drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index f9fdf3606bbd..0eb125316fe2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -323,10 +323,12 @@ static int mlx5e_rx_reporter_diagnose_generic_rq(struct mlx5e_rq *rq,
 	struct mlx5e_priv *priv = rq->priv;
 	struct mlx5e_params *params;
 	u32 rq_stride, rq_sz;
+	bool real_time;
 	int err;
 
 	params = &priv->channels.params;
 	rq_sz = mlx5e_rqwq_get_size(rq);
+	real_time =  mlx5_is_real_time_rq(priv->mdev);
 	rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(priv->mdev, params, NULL));
 
 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ");
@@ -345,6 +347,10 @@ static int mlx5e_rx_reporter_diagnose_generic_rq(struct mlx5e_rq *rq,
 	if (err)
 		return err;
 
+	err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC");
+	if (err)
+		return err;
+
 	err = mlx5e_health_cq_common_diag_fmsg(&rq->cq, fmsg);
 	if (err)
 		return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 1a0505bd1e9a..9d361efd5ff7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -257,12 +257,14 @@ mlx5e_tx_reporter_diagnose_generic_txqsq(struct devlink_fmsg *fmsg,
 					 struct mlx5e_txqsq *txqsq)
 {
 	u32 sq_stride, sq_sz;
+	bool real_time;
 	int err;
 
 	err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ");
 	if (err)
 		return err;
 
+	real_time =  mlx5_is_real_time_sq(txqsq->mdev);
 	sq_sz = mlx5_wq_cyc_get_size(&txqsq->wq);
 	sq_stride = MLX5_SEND_WQE_BB;
 
@@ -274,6 +276,10 @@ mlx5e_tx_reporter_diagnose_generic_txqsq(struct devlink_fmsg *fmsg,
 	if (err)
 		return err;
 
+	err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC");
+	if (err)
+		return err;
+
 	err = mlx5e_health_cq_common_diag_fmsg(&txqsq->cq, fmsg);
 	if (err)
 		return err;
-- 
cgit v1.2.3


From 1d3cb90cb0101bb44254d295a421a89f3b73f6e8 Mon Sep 17 00:00:00 2001
From: Grzegorz Siwik <grzegorz.siwik@intel.com>
Date: Tue, 23 Feb 2021 15:15:27 +0100
Subject: igb: Add double-check MTA_REGISTER for i210 and i211

Add new function which checks MTA_REGISTER if its filled correctly.
If not then writes again to same register.
There is possibility that i210 and i211 could not accept
MTA_REGISTER settings, specially when you add and remove
many of multicast addresses in short time.
Without this patch there is possibility that multicast settings will be
not always set correctly in hardware.

Signed-off-by: Grzegorz Siwik <grzegorz.siwik@intel.com>
Tested-by: Dave Switzer <david.switzer@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igb/e1000_mac.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c
index fd8eb2f9ab9d..e63ee3cca5ea 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mac.c
+++ b/drivers/net/ethernet/intel/igb/e1000_mac.c
@@ -483,6 +483,31 @@ static u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr)
 	return hash_value;
 }
 
+/**
+ * igb_i21x_hw_doublecheck - double checks potential HW issue in i21X
+ * @hw: pointer to the HW structure
+ *
+ * Checks if multicast array is wrote correctly
+ * If not then rewrites again to register
+ **/
+static void igb_i21x_hw_doublecheck(struct e1000_hw *hw)
+{
+	bool is_failed;
+	int i;
+
+	do {
+		is_failed = false;
+		for (i = hw->mac.mta_reg_count - 1; i >= 0; i--) {
+			if (array_rd32(E1000_MTA, i) != hw->mac.mta_shadow[i]) {
+				is_failed = true;
+				array_wr32(E1000_MTA, i, hw->mac.mta_shadow[i]);
+				wrfl();
+				break;
+			}
+		}
+	} while (is_failed);
+}
+
 /**
  *  igb_update_mc_addr_list - Update Multicast addresses
  *  @hw: pointer to the HW structure
@@ -516,6 +541,8 @@ void igb_update_mc_addr_list(struct e1000_hw *hw,
 	for (i = hw->mac.mta_reg_count - 1; i >= 0; i--)
 		array_wr32(E1000_MTA, i, hw->mac.mta_shadow[i]);
 	wrfl();
+	if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211)
+		igb_i21x_hw_doublecheck(hw);
 }
 
 /**
-- 
cgit v1.2.3


From 64433e5bf40abf893c7edbc60899bdcdd7c70b76 Mon Sep 17 00:00:00 2001
From: Ederson de Souza <ederson.desouza@intel.com>
Date: Thu, 18 Feb 2021 17:31:03 -0800
Subject: igc: Enable internal i225 PPS

The i225 device can produce one interrupt on the full second, much
like i210 - from where this patch is inspired.

This patch sets up the full second interruption on the i225 and when
receiving it, it sends a PPS event to PTP (Precision Time Protocol)
kernel subsystem.

The PTP subsystem exposes the PPS events via ioctl and sysfs, and one
can use the `testptp` tool (tools/testing/selftests/ptp) to check that
the events are being generated.

Signed-off-by: Ederson de Souza <ederson.desouza@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h      |  2 ++
 drivers/net/ethernet/intel/igc/igc_main.c |  8 ++++++++
 drivers/net/ethernet/intel/igc/igc_ptp.c  | 28 +++++++++++++++++++++++++++-
 3 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 91493a73355d..7c404c2daa47 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -223,6 +223,8 @@ struct igc_adapter {
 	char fw_version[32];
 
 	struct bpf_prog *xdp_prog;
+
+	bool pps_sys_wrap_on;
 };
 
 void igc_up(struct igc_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 10765491e357..ac93c0e1b618 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -4251,9 +4251,17 @@ igc_features_check(struct sk_buff *skb, struct net_device *dev,
 static void igc_tsync_interrupt(struct igc_adapter *adapter)
 {
 	struct igc_hw *hw = &adapter->hw;
+	struct ptp_clock_event event;
 	u32 tsicr = rd32(IGC_TSICR);
 	u32 ack = 0;
 
+	if (tsicr & IGC_TSICR_SYS_WRAP) {
+		event.type = PTP_CLOCK_PPS;
+		if (adapter->ptp_caps.pps)
+			ptp_clock_event(adapter->ptp_clock, &event);
+		ack |= IGC_TSICR_SYS_WRAP;
+	}
+
 	if (tsicr & IGC_TSICR_TXTS) {
 		/* retrieve hardware timestamp */
 		schedule_work(&adapter->ptp_tx_work);
diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
index dfa3b247fcd8..8d6fbf609761 100644
--- a/drivers/net/ethernet/intel/igc/igc_ptp.c
+++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
@@ -123,6 +123,29 @@ static int igc_ptp_settime_i225(struct ptp_clock_info *ptp,
 static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp,
 				       struct ptp_clock_request *rq, int on)
 {
+	struct igc_adapter *igc =
+		container_of(ptp, struct igc_adapter, ptp_caps);
+	struct igc_hw *hw = &igc->hw;
+	unsigned long flags;
+	u32 tsim;
+
+	switch (rq->type) {
+	case PTP_CLK_REQ_PPS:
+		spin_lock_irqsave(&igc->tmreg_lock, flags);
+		tsim = rd32(IGC_TSIM);
+		if (on)
+			tsim |= IGC_TSICR_SYS_WRAP;
+		else
+			tsim &= ~IGC_TSICR_SYS_WRAP;
+		igc->pps_sys_wrap_on = on;
+		wr32(IGC_TSIM, tsim);
+		spin_unlock_irqrestore(&igc->tmreg_lock, flags);
+		return 0;
+
+	default:
+		break;
+	}
+
 	return -EOPNOTSUPP;
 }
 
@@ -497,6 +520,7 @@ void igc_ptp_init(struct igc_adapter *adapter)
 		adapter->ptp_caps.gettimex64 = igc_ptp_gettimex64_i225;
 		adapter->ptp_caps.settime64 = igc_ptp_settime_i225;
 		adapter->ptp_caps.enable = igc_ptp_feature_enable_i225;
+		adapter->ptp_caps.pps = 1;
 		break;
 	default:
 		adapter->ptp_clock = NULL;
@@ -598,7 +622,9 @@ void igc_ptp_reset(struct igc_adapter *adapter)
 	case igc_i225:
 		wr32(IGC_TSAUXC, 0x0);
 		wr32(IGC_TSSDP, 0x0);
-		wr32(IGC_TSIM, IGC_TSICR_INTERRUPTS);
+		wr32(IGC_TSIM,
+		     IGC_TSICR_INTERRUPTS |
+		     (adapter->pps_sys_wrap_on ? IGC_TSICR_SYS_WRAP : 0));
 		wr32(IGC_IMS, IGC_IMS_TS);
 		break;
 	default:
-- 
cgit v1.2.3


From 87938851b6efb6d5b44ae93c83226c6f991d5cc1 Mon Sep 17 00:00:00 2001
From: Ederson de Souza <ederson.desouza@intel.com>
Date: Thu, 18 Feb 2021 17:31:04 -0800
Subject: igc: enable auxiliary PHC functions for the i225

The i225 device offers a number of special PTP Hardware Clock features on
the Software Defined Pins (SDPs) - much like i210, which is used as
inspiration for this patch. It enables two possible functions, namely
time stamping external events and periodic output signals.

The assignment of PHC functions to the four SDP can be freely chosen by
the user.

For the external events time stamping, when the SDP (configured as input
by user) level changes, an interrupt is generated and the kernel
Precision Time Protocol (PTP) is informed.

For the periodic output signals, the i225 is configured to generate them
(so the SDP level will change periodically) and the driver also has to
keep updating the time of the next level change. However, this work is
not necessary for some frequencies as the i225 takes care of them
(namely, anything with a half-cycle of 500ms, 250ms, 125ms or < 70ms).

While i225 allows up to four timers to be used to source the time used
on the external events or output signals, this patch uses only one of
those timers. Main reason is to keep it simple, as it's not clear how
these extra timers would be exposed to users. Note that currently a NIC
can expose a single PTP device.

Signed-off-by: Ederson de Souza <ederson.desouza@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h         |  11 ++
 drivers/net/ethernet/intel/igc/igc_defines.h |  63 +++++++
 drivers/net/ethernet/intel/igc/igc_main.c    |  55 +++++-
 drivers/net/ethernet/intel/igc/igc_ptp.c     | 269 ++++++++++++++++++++++++++-
 drivers/net/ethernet/intel/igc/igc_regs.h    |  10 +
 5 files changed, 405 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 7c404c2daa47..25871351730b 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -28,6 +28,11 @@ void igc_ethtool_set_ops(struct net_device *);
 #define MAX_ETYPE_FILTER		8
 #define IGC_RETA_SIZE			128
 
+/* SDP support */
+#define IGC_N_EXTTS	2
+#define IGC_N_PEROUT	2
+#define IGC_N_SDP	4
+
 enum igc_mac_filter_type {
 	IGC_MAC_FILTER_TYPE_DST = 0,
 	IGC_MAC_FILTER_TYPE_SRC
@@ -225,6 +230,12 @@ struct igc_adapter {
 	struct bpf_prog *xdp_prog;
 
 	bool pps_sys_wrap_on;
+
+	struct ptp_pin_desc sdp_config[IGC_N_SDP];
+	struct {
+		struct timespec64 start;
+		struct timespec64 period;
+	} perout[IGC_N_PEROUT];
 };
 
 void igc_up(struct igc_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index 35ed997af075..0103dda32f39 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -8,6 +8,8 @@
 #define REQ_TX_DESCRIPTOR_MULTIPLE	8
 #define REQ_RX_DESCRIPTOR_MULTIPLE	8
 
+#define IGC_CTRL_EXT_SDP2_DIR	0x00000400 /* SDP2 Data direction */
+#define IGC_CTRL_EXT_SDP3_DIR	0x00000800 /* SDP3 Data direction */
 #define IGC_CTRL_EXT_DRV_LOAD	0x10000000 /* Drv loaded bit for FW */
 
 /* Definitions for power management and wakeup registers */
@@ -96,6 +98,9 @@
 #define IGC_CTRL_RFCE		0x08000000  /* Receive Flow Control enable */
 #define IGC_CTRL_TFCE		0x10000000  /* Transmit flow control enable */
 
+#define IGC_CTRL_SDP0_DIR 0x00400000	/* SDP0 Data direction */
+#define IGC_CTRL_SDP1_DIR 0x00800000	/* SDP1 Data direction */
+
 /* As per the EAS the maximum supported size is 9.5KB (9728 bytes) */
 #define MAX_JUMBO_FRAME_SIZE	0x2600
 
@@ -403,6 +408,64 @@
 #define IGC_TSYNCTXCTL_START_SYNC		0x80000000  /* initiate sync */
 #define IGC_TSYNCTXCTL_TXSYNSIG			0x00000020  /* Sample TX tstamp in PHY sop */
 
+/* Timer selection bits */
+#define IGC_AUX_IO_TIMER_SEL_SYSTIM0	(0u << 30) /* Select SYSTIM0 for auxiliary time stamp */
+#define IGC_AUX_IO_TIMER_SEL_SYSTIM1	(1u << 30) /* Select SYSTIM1 for auxiliary time stamp */
+#define IGC_AUX_IO_TIMER_SEL_SYSTIM2	(2u << 30) /* Select SYSTIM2 for auxiliary time stamp */
+#define IGC_AUX_IO_TIMER_SEL_SYSTIM3	(3u << 30) /* Select SYSTIM3 for auxiliary time stamp */
+#define IGC_TT_IO_TIMER_SEL_SYSTIM0	(0u << 30) /* Select SYSTIM0 for target time stamp */
+#define IGC_TT_IO_TIMER_SEL_SYSTIM1	(1u << 30) /* Select SYSTIM1 for target time stamp */
+#define IGC_TT_IO_TIMER_SEL_SYSTIM2	(2u << 30) /* Select SYSTIM2 for target time stamp */
+#define IGC_TT_IO_TIMER_SEL_SYSTIM3	(3u << 30) /* Select SYSTIM3 for target time stamp */
+
+/* TSAUXC Configuration Bits */
+#define IGC_TSAUXC_EN_TT0	BIT(0)  /* Enable target time 0. */
+#define IGC_TSAUXC_EN_TT1	BIT(1)  /* Enable target time 1. */
+#define IGC_TSAUXC_EN_CLK0	BIT(2)  /* Enable Configurable Frequency Clock 0. */
+#define IGC_TSAUXC_EN_CLK1	BIT(5)  /* Enable Configurable Frequency Clock 1. */
+#define IGC_TSAUXC_EN_TS0	BIT(8)  /* Enable hardware timestamp 0. */
+#define IGC_TSAUXC_AUTT0	BIT(9)  /* Auxiliary Timestamp Taken. */
+#define IGC_TSAUXC_EN_TS1	BIT(10) /* Enable hardware timestamp 0. */
+#define IGC_TSAUXC_AUTT1	BIT(11) /* Auxiliary Timestamp Taken. */
+#define IGC_TSAUXC_PLSG		BIT(17) /* Generate a pulse. */
+#define IGC_TSAUXC_DISABLE1	BIT(27) /* Disable SYSTIM0 Count Operation. */
+#define IGC_TSAUXC_DISABLE2	BIT(28) /* Disable SYSTIM1 Count Operation. */
+#define IGC_TSAUXC_DISABLE3	BIT(29) /* Disable SYSTIM2 Count Operation. */
+#define IGC_TSAUXC_DIS_TS_CLEAR	BIT(30) /* Disable EN_TT0/1 auto clear. */
+#define IGC_TSAUXC_DISABLE0	BIT(31) /* Disable SYSTIM0 Count Operation. */
+
+/* SDP Configuration Bits */
+#define IGC_AUX0_SEL_SDP0	(0u << 0)  /* Assign SDP0 to auxiliary time stamp 0. */
+#define IGC_AUX0_SEL_SDP1	(1u << 0)  /* Assign SDP1 to auxiliary time stamp 0. */
+#define IGC_AUX0_SEL_SDP2	(2u << 0)  /* Assign SDP2 to auxiliary time stamp 0. */
+#define IGC_AUX0_SEL_SDP3	(3u << 0)  /* Assign SDP3 to auxiliary time stamp 0. */
+#define IGC_AUX0_TS_SDP_EN	(1u << 2)  /* Enable auxiliary time stamp trigger 0. */
+#define IGC_AUX1_SEL_SDP0	(0u << 3)  /* Assign SDP0 to auxiliary time stamp 1. */
+#define IGC_AUX1_SEL_SDP1	(1u << 3)  /* Assign SDP1 to auxiliary time stamp 1. */
+#define IGC_AUX1_SEL_SDP2	(2u << 3)  /* Assign SDP2 to auxiliary time stamp 1. */
+#define IGC_AUX1_SEL_SDP3	(3u << 3)  /* Assign SDP3 to auxiliary time stamp 1. */
+#define IGC_AUX1_TS_SDP_EN	(1u << 5)  /* Enable auxiliary time stamp trigger 1. */
+#define IGC_TS_SDP0_SEL_TT0	(0u << 6)  /* Target time 0 is output on SDP0. */
+#define IGC_TS_SDP0_SEL_TT1	(1u << 6)  /* Target time 1 is output on SDP0. */
+#define IGC_TS_SDP0_SEL_FC0	(2u << 6)  /* Freq clock  0 is output on SDP0. */
+#define IGC_TS_SDP0_SEL_FC1	(3u << 6)  /* Freq clock  1 is output on SDP0. */
+#define IGC_TS_SDP0_EN		(1u << 8)  /* SDP0 is assigned to Tsync. */
+#define IGC_TS_SDP1_SEL_TT0	(0u << 9)  /* Target time 0 is output on SDP1. */
+#define IGC_TS_SDP1_SEL_TT1	(1u << 9)  /* Target time 1 is output on SDP1. */
+#define IGC_TS_SDP1_SEL_FC0	(2u << 9)  /* Freq clock  0 is output on SDP1. */
+#define IGC_TS_SDP1_SEL_FC1	(3u << 9)  /* Freq clock  1 is output on SDP1. */
+#define IGC_TS_SDP1_EN		(1u << 11) /* SDP1 is assigned to Tsync. */
+#define IGC_TS_SDP2_SEL_TT0	(0u << 12) /* Target time 0 is output on SDP2. */
+#define IGC_TS_SDP2_SEL_TT1	(1u << 12) /* Target time 1 is output on SDP2. */
+#define IGC_TS_SDP2_SEL_FC0	(2u << 12) /* Freq clock  0 is output on SDP2. */
+#define IGC_TS_SDP2_SEL_FC1	(3u << 12) /* Freq clock  1 is output on SDP2. */
+#define IGC_TS_SDP2_EN		(1u << 14) /* SDP2 is assigned to Tsync. */
+#define IGC_TS_SDP3_SEL_TT0	(0u << 15) /* Target time 0 is output on SDP3. */
+#define IGC_TS_SDP3_SEL_TT1	(1u << 15) /* Target time 1 is output on SDP3. */
+#define IGC_TS_SDP3_SEL_FC0	(2u << 15) /* Freq clock  0 is output on SDP3. */
+#define IGC_TS_SDP3_SEL_FC1	(3u << 15) /* Freq clock  1 is output on SDP3. */
+#define IGC_TS_SDP3_EN		(1u << 17) /* SDP3 is assigned to Tsync. */
+
 /* Transmit Scheduling */
 #define IGC_TQAVCTRL_TRANSMIT_MODE_TSN	0x00000001
 #define IGC_TQAVCTRL_ENHANCED_QAV	0x00000008
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index ac93c0e1b618..069471b7ffb0 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -4250,10 +4250,13 @@ igc_features_check(struct sk_buff *skb, struct net_device *dev,
 
 static void igc_tsync_interrupt(struct igc_adapter *adapter)
 {
+	u32 ack, tsauxc, sec, nsec, tsicr;
 	struct igc_hw *hw = &adapter->hw;
 	struct ptp_clock_event event;
-	u32 tsicr = rd32(IGC_TSICR);
-	u32 ack = 0;
+	struct timespec64 ts;
+
+	tsicr = rd32(IGC_TSICR);
+	ack = 0;
 
 	if (tsicr & IGC_TSICR_SYS_WRAP) {
 		event.type = PTP_CLOCK_PPS;
@@ -4268,6 +4271,54 @@ static void igc_tsync_interrupt(struct igc_adapter *adapter)
 		ack |= IGC_TSICR_TXTS;
 	}
 
+	if (tsicr & IGC_TSICR_TT0) {
+		spin_lock(&adapter->tmreg_lock);
+		ts = timespec64_add(adapter->perout[0].start,
+				    adapter->perout[0].period);
+		wr32(IGC_TRGTTIML0, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0);
+		wr32(IGC_TRGTTIMH0, (u32)ts.tv_sec);
+		tsauxc = rd32(IGC_TSAUXC);
+		tsauxc |= IGC_TSAUXC_EN_TT0;
+		wr32(IGC_TSAUXC, tsauxc);
+		adapter->perout[0].start = ts;
+		spin_unlock(&adapter->tmreg_lock);
+		ack |= IGC_TSICR_TT0;
+	}
+
+	if (tsicr & IGC_TSICR_TT1) {
+		spin_lock(&adapter->tmreg_lock);
+		ts = timespec64_add(adapter->perout[1].start,
+				    adapter->perout[1].period);
+		wr32(IGC_TRGTTIML1, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0);
+		wr32(IGC_TRGTTIMH1, (u32)ts.tv_sec);
+		tsauxc = rd32(IGC_TSAUXC);
+		tsauxc |= IGC_TSAUXC_EN_TT1;
+		wr32(IGC_TSAUXC, tsauxc);
+		adapter->perout[1].start = ts;
+		spin_unlock(&adapter->tmreg_lock);
+		ack |= IGC_TSICR_TT1;
+	}
+
+	if (tsicr & IGC_TSICR_AUTT0) {
+		nsec = rd32(IGC_AUXSTMPL0);
+		sec  = rd32(IGC_AUXSTMPH0);
+		event.type = PTP_CLOCK_EXTTS;
+		event.index = 0;
+		event.timestamp = sec * NSEC_PER_SEC + nsec;
+		ptp_clock_event(adapter->ptp_clock, &event);
+		ack |= IGC_TSICR_AUTT0;
+	}
+
+	if (tsicr & IGC_TSICR_AUTT1) {
+		nsec = rd32(IGC_AUXSTMPL1);
+		sec  = rd32(IGC_AUXSTMPH1);
+		event.type = PTP_CLOCK_EXTTS;
+		event.index = 1;
+		event.timestamp = sec * NSEC_PER_SEC + nsec;
+		ptp_clock_event(adapter->ptp_clock, &event);
+		ack |= IGC_TSICR_AUTT1;
+	}
+
 	/* acknowledge the interrupts */
 	wr32(IGC_TSICR, ack);
 }
diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
index 8d6fbf609761..69617d2c1be2 100644
--- a/drivers/net/ethernet/intel/igc/igc_ptp.c
+++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
@@ -120,6 +120,124 @@ static int igc_ptp_settime_i225(struct ptp_clock_info *ptp,
 	return 0;
 }
 
+static void igc_pin_direction(int pin, int input, u32 *ctrl, u32 *ctrl_ext)
+{
+	u32 *ptr = pin < 2 ? ctrl : ctrl_ext;
+	static const u32 mask[IGC_N_SDP] = {
+		IGC_CTRL_SDP0_DIR,
+		IGC_CTRL_SDP1_DIR,
+		IGC_CTRL_EXT_SDP2_DIR,
+		IGC_CTRL_EXT_SDP3_DIR,
+	};
+
+	if (input)
+		*ptr &= ~mask[pin];
+	else
+		*ptr |= mask[pin];
+}
+
+static void igc_pin_perout(struct igc_adapter *igc, int chan, int pin, int freq)
+{
+	static const u32 igc_aux0_sel_sdp[IGC_N_SDP] = {
+		IGC_AUX0_SEL_SDP0, IGC_AUX0_SEL_SDP1, IGC_AUX0_SEL_SDP2, IGC_AUX0_SEL_SDP3,
+	};
+	static const u32 igc_aux1_sel_sdp[IGC_N_SDP] = {
+		IGC_AUX1_SEL_SDP0, IGC_AUX1_SEL_SDP1, IGC_AUX1_SEL_SDP2, IGC_AUX1_SEL_SDP3,
+	};
+	static const u32 igc_ts_sdp_en[IGC_N_SDP] = {
+		IGC_TS_SDP0_EN, IGC_TS_SDP1_EN, IGC_TS_SDP2_EN, IGC_TS_SDP3_EN,
+	};
+	static const u32 igc_ts_sdp_sel_tt0[IGC_N_SDP] = {
+		IGC_TS_SDP0_SEL_TT0, IGC_TS_SDP1_SEL_TT0,
+		IGC_TS_SDP2_SEL_TT0, IGC_TS_SDP3_SEL_TT0,
+	};
+	static const u32 igc_ts_sdp_sel_tt1[IGC_N_SDP] = {
+		IGC_TS_SDP0_SEL_TT1, IGC_TS_SDP1_SEL_TT1,
+		IGC_TS_SDP2_SEL_TT1, IGC_TS_SDP3_SEL_TT1,
+	};
+	static const u32 igc_ts_sdp_sel_fc0[IGC_N_SDP] = {
+		IGC_TS_SDP0_SEL_FC0, IGC_TS_SDP1_SEL_FC0,
+		IGC_TS_SDP2_SEL_FC0, IGC_TS_SDP3_SEL_FC0,
+	};
+	static const u32 igc_ts_sdp_sel_fc1[IGC_N_SDP] = {
+		IGC_TS_SDP0_SEL_FC1, IGC_TS_SDP1_SEL_FC1,
+		IGC_TS_SDP2_SEL_FC1, IGC_TS_SDP3_SEL_FC1,
+	};
+	static const u32 igc_ts_sdp_sel_clr[IGC_N_SDP] = {
+		IGC_TS_SDP0_SEL_FC1, IGC_TS_SDP1_SEL_FC1,
+		IGC_TS_SDP2_SEL_FC1, IGC_TS_SDP3_SEL_FC1,
+	};
+	struct igc_hw *hw = &igc->hw;
+	u32 ctrl, ctrl_ext, tssdp = 0;
+
+	ctrl = rd32(IGC_CTRL);
+	ctrl_ext = rd32(IGC_CTRL_EXT);
+	tssdp = rd32(IGC_TSSDP);
+
+	igc_pin_direction(pin, 0, &ctrl, &ctrl_ext);
+
+	/* Make sure this pin is not enabled as an input. */
+	if ((tssdp & IGC_AUX0_SEL_SDP3) == igc_aux0_sel_sdp[pin])
+		tssdp &= ~IGC_AUX0_TS_SDP_EN;
+
+	if ((tssdp & IGC_AUX1_SEL_SDP3) == igc_aux1_sel_sdp[pin])
+		tssdp &= ~IGC_AUX1_TS_SDP_EN;
+
+	tssdp &= ~igc_ts_sdp_sel_clr[pin];
+	if (freq) {
+		if (chan == 1)
+			tssdp |= igc_ts_sdp_sel_fc1[pin];
+		else
+			tssdp |= igc_ts_sdp_sel_fc0[pin];
+	} else {
+		if (chan == 1)
+			tssdp |= igc_ts_sdp_sel_tt1[pin];
+		else
+			tssdp |= igc_ts_sdp_sel_tt0[pin];
+	}
+	tssdp |= igc_ts_sdp_en[pin];
+
+	wr32(IGC_TSSDP, tssdp);
+	wr32(IGC_CTRL, ctrl);
+	wr32(IGC_CTRL_EXT, ctrl_ext);
+}
+
+static void igc_pin_extts(struct igc_adapter *igc, int chan, int pin)
+{
+	static const u32 igc_aux0_sel_sdp[IGC_N_SDP] = {
+		IGC_AUX0_SEL_SDP0, IGC_AUX0_SEL_SDP1, IGC_AUX0_SEL_SDP2, IGC_AUX0_SEL_SDP3,
+	};
+	static const u32 igc_aux1_sel_sdp[IGC_N_SDP] = {
+		IGC_AUX1_SEL_SDP0, IGC_AUX1_SEL_SDP1, IGC_AUX1_SEL_SDP2, IGC_AUX1_SEL_SDP3,
+	};
+	static const u32 igc_ts_sdp_en[IGC_N_SDP] = {
+		IGC_TS_SDP0_EN, IGC_TS_SDP1_EN, IGC_TS_SDP2_EN, IGC_TS_SDP3_EN,
+	};
+	struct igc_hw *hw = &igc->hw;
+	u32 ctrl, ctrl_ext, tssdp = 0;
+
+	ctrl = rd32(IGC_CTRL);
+	ctrl_ext = rd32(IGC_CTRL_EXT);
+	tssdp = rd32(IGC_TSSDP);
+
+	igc_pin_direction(pin, 1, &ctrl, &ctrl_ext);
+
+	/* Make sure this pin is not enabled as an output. */
+	tssdp &= ~igc_ts_sdp_en[pin];
+
+	if (chan == 1) {
+		tssdp &= ~IGC_AUX1_SEL_SDP3;
+		tssdp |= igc_aux1_sel_sdp[pin] | IGC_AUX1_TS_SDP_EN;
+	} else {
+		tssdp &= ~IGC_AUX0_SEL_SDP3;
+		tssdp |= igc_aux0_sel_sdp[pin] | IGC_AUX0_TS_SDP_EN;
+	}
+
+	wr32(IGC_TSSDP, tssdp);
+	wr32(IGC_CTRL, ctrl);
+	wr32(IGC_CTRL_EXT, ctrl_ext);
+}
+
 static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp,
 				       struct ptp_clock_request *rq, int on)
 {
@@ -127,9 +245,131 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp,
 		container_of(ptp, struct igc_adapter, ptp_caps);
 	struct igc_hw *hw = &igc->hw;
 	unsigned long flags;
-	u32 tsim;
+	struct timespec64 ts;
+	int use_freq = 0, pin = -1;
+	u32 tsim, tsauxc, tsauxc_mask, tsim_mask, trgttiml, trgttimh, freqout;
+	s64 ns;
 
 	switch (rq->type) {
+	case PTP_CLK_REQ_EXTTS:
+		/* Reject requests with unsupported flags */
+		if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
+					PTP_RISING_EDGE |
+					PTP_FALLING_EDGE |
+					PTP_STRICT_FLAGS))
+			return -EOPNOTSUPP;
+
+		/* Reject requests failing to enable both edges. */
+		if ((rq->extts.flags & PTP_STRICT_FLAGS) &&
+		    (rq->extts.flags & PTP_ENABLE_FEATURE) &&
+		    (rq->extts.flags & PTP_EXTTS_EDGES) != PTP_EXTTS_EDGES)
+			return -EOPNOTSUPP;
+
+		if (on) {
+			pin = ptp_find_pin(igc->ptp_clock, PTP_PF_EXTTS,
+					   rq->extts.index);
+			if (pin < 0)
+				return -EBUSY;
+		}
+		if (rq->extts.index == 1) {
+			tsauxc_mask = IGC_TSAUXC_EN_TS1;
+			tsim_mask = IGC_TSICR_AUTT1;
+		} else {
+			tsauxc_mask = IGC_TSAUXC_EN_TS0;
+			tsim_mask = IGC_TSICR_AUTT0;
+		}
+		spin_lock_irqsave(&igc->tmreg_lock, flags);
+		tsauxc = rd32(IGC_TSAUXC);
+		tsim = rd32(IGC_TSIM);
+		if (on) {
+			igc_pin_extts(igc, rq->extts.index, pin);
+			tsauxc |= tsauxc_mask;
+			tsim |= tsim_mask;
+		} else {
+			tsauxc &= ~tsauxc_mask;
+			tsim &= ~tsim_mask;
+		}
+		wr32(IGC_TSAUXC, tsauxc);
+		wr32(IGC_TSIM, tsim);
+		spin_unlock_irqrestore(&igc->tmreg_lock, flags);
+		return 0;
+
+	case PTP_CLK_REQ_PEROUT:
+		/* Reject requests with unsupported flags */
+		if (rq->perout.flags)
+			return -EOPNOTSUPP;
+
+		if (on) {
+			pin = ptp_find_pin(igc->ptp_clock, PTP_PF_PEROUT,
+					   rq->perout.index);
+			if (pin < 0)
+				return -EBUSY;
+		}
+		ts.tv_sec = rq->perout.period.sec;
+		ts.tv_nsec = rq->perout.period.nsec;
+		ns = timespec64_to_ns(&ts);
+		ns = ns >> 1;
+		if (on && (ns <= 70000000LL || ns == 125000000LL ||
+			   ns == 250000000LL || ns == 500000000LL)) {
+			if (ns < 8LL)
+				return -EINVAL;
+			use_freq = 1;
+		}
+		ts = ns_to_timespec64(ns);
+		if (rq->perout.index == 1) {
+			if (use_freq) {
+				tsauxc_mask = IGC_TSAUXC_EN_CLK1;
+				tsim_mask = 0;
+			} else {
+				tsauxc_mask = IGC_TSAUXC_EN_TT1;
+				tsim_mask = IGC_TSICR_TT1;
+			}
+			trgttiml = IGC_TRGTTIML1;
+			trgttimh = IGC_TRGTTIMH1;
+			freqout = IGC_FREQOUT1;
+		} else {
+			if (use_freq) {
+				tsauxc_mask = IGC_TSAUXC_EN_CLK0;
+				tsim_mask = 0;
+			} else {
+				tsauxc_mask = IGC_TSAUXC_EN_TT0;
+				tsim_mask = IGC_TSICR_TT0;
+			}
+			trgttiml = IGC_TRGTTIML0;
+			trgttimh = IGC_TRGTTIMH0;
+			freqout = IGC_FREQOUT0;
+		}
+		spin_lock_irqsave(&igc->tmreg_lock, flags);
+		tsauxc = rd32(IGC_TSAUXC);
+		tsim = rd32(IGC_TSIM);
+		if (rq->perout.index == 1) {
+			tsauxc &= ~(IGC_TSAUXC_EN_TT1 | IGC_TSAUXC_EN_CLK1);
+			tsim &= ~IGC_TSICR_TT1;
+		} else {
+			tsauxc &= ~(IGC_TSAUXC_EN_TT0 | IGC_TSAUXC_EN_CLK0);
+			tsim &= ~IGC_TSICR_TT0;
+		}
+		if (on) {
+			int i = rq->perout.index;
+
+			igc_pin_perout(igc, i, pin, use_freq);
+			igc->perout[i].start.tv_sec = rq->perout.start.sec;
+			igc->perout[i].start.tv_nsec = rq->perout.start.nsec;
+			igc->perout[i].period.tv_sec = ts.tv_sec;
+			igc->perout[i].period.tv_nsec = ts.tv_nsec;
+			wr32(trgttimh, rq->perout.start.sec);
+			/* For now, always select timer 0 as source. */
+			wr32(trgttiml, rq->perout.start.nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0);
+			if (use_freq)
+				wr32(freqout, ns);
+			tsauxc |= tsauxc_mask;
+			tsim |= tsim_mask;
+		}
+		wr32(IGC_TSAUXC, tsauxc);
+		wr32(IGC_TSIM, tsim);
+		spin_unlock_irqrestore(&igc->tmreg_lock, flags);
+		return 0;
+
 	case PTP_CLK_REQ_PPS:
 		spin_lock_irqsave(&igc->tmreg_lock, flags);
 		tsim = rd32(IGC_TSIM);
@@ -149,6 +389,20 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp,
 	return -EOPNOTSUPP;
 }
 
+static int igc_ptp_verify_pin(struct ptp_clock_info *ptp, unsigned int pin,
+			      enum ptp_pin_function func, unsigned int chan)
+{
+	switch (func) {
+	case PTP_PF_NONE:
+	case PTP_PF_EXTTS:
+	case PTP_PF_PEROUT:
+		break;
+	case PTP_PF_PHYSYNC:
+		return -1;
+	}
+	return 0;
+}
+
 /**
  * igc_ptp_systim_to_hwtstamp - convert system time value to HW timestamp
  * @adapter: board private structure
@@ -509,9 +763,17 @@ void igc_ptp_init(struct igc_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
 	struct igc_hw *hw = &adapter->hw;
+	int i;
 
 	switch (hw->mac.type) {
 	case igc_i225:
+		for (i = 0; i < IGC_N_SDP; i++) {
+			struct ptp_pin_desc *ppd = &adapter->sdp_config[i];
+
+			snprintf(ppd->name, sizeof(ppd->name), "SDP%d", i);
+			ppd->index = i;
+			ppd->func = PTP_PF_NONE;
+		}
 		snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr);
 		adapter->ptp_caps.owner = THIS_MODULE;
 		adapter->ptp_caps.max_adj = 62499999;
@@ -521,6 +783,11 @@ void igc_ptp_init(struct igc_adapter *adapter)
 		adapter->ptp_caps.settime64 = igc_ptp_settime_i225;
 		adapter->ptp_caps.enable = igc_ptp_feature_enable_i225;
 		adapter->ptp_caps.pps = 1;
+		adapter->ptp_caps.pin_config = adapter->sdp_config;
+		adapter->ptp_caps.n_ext_ts = IGC_N_EXTTS;
+		adapter->ptp_caps.n_per_out = IGC_N_PEROUT;
+		adapter->ptp_caps.n_pins = IGC_N_SDP;
+		adapter->ptp_caps.verify = igc_ptp_verify_pin;
 		break;
 	default:
 		adapter->ptp_clock = NULL;
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index 3e5cb7aef9da..cc174853554b 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -192,6 +192,16 @@
 #define IGC_TSYNCTXCTL	0x0B614  /* Tx Time Sync Control register - RW */
 #define IGC_TSYNCRXCFG	0x05F50  /* Time Sync Rx Configuration - RW */
 #define IGC_TSSDP	0x0003C  /* Time Sync SDP Configuration Register - RW */
+#define IGC_TRGTTIML0	0x0B644 /* Target Time Register 0 Low  - RW */
+#define IGC_TRGTTIMH0	0x0B648 /* Target Time Register 0 High - RW */
+#define IGC_TRGTTIML1	0x0B64C /* Target Time Register 1 Low  - RW */
+#define IGC_TRGTTIMH1	0x0B650 /* Target Time Register 1 High - RW */
+#define IGC_FREQOUT0	0x0B654 /* Frequency Out 0 Control Register - RW */
+#define IGC_FREQOUT1	0x0B658 /* Frequency Out 1 Control Register - RW */
+#define IGC_AUXSTMPL0	0x0B65C /* Auxiliary Time Stamp 0 Register Low  - RO */
+#define IGC_AUXSTMPH0	0x0B660 /* Auxiliary Time Stamp 0 Register High - RO */
+#define IGC_AUXSTMPL1	0x0B664 /* Auxiliary Time Stamp 1 Register Low  - RO */
+#define IGC_AUXSTMPH1	0x0B668 /* Auxiliary Time Stamp 1 Register High - RO */
 
 #define IGC_IMIR(_i)	(0x05A80 + ((_i) * 4))  /* Immediate Interrupt */
 #define IGC_IMIREXT(_i)	(0x05AA0 + ((_i) * 4))  /* Immediate INTR Ext*/
-- 
cgit v1.2.3


From b3d4f405620a7c9f3f601329e9a55e6133b15aca Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Wed, 24 Mar 2021 10:05:31 +0200
Subject: igc: Fix overwrites return value

drivers/net/ethernet/intel/igc/igc_i225.c:235 igc_write_nvm_srwr()
warn: loop overwrites return value 'ret_val'

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_i225.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c
index cc83bb5c15e8..b2ef9fde97b3 100644
--- a/drivers/net/ethernet/intel/igc/igc_i225.c
+++ b/drivers/net/ethernet/intel/igc/igc_i225.c
@@ -229,10 +229,11 @@ static s32 igc_write_nvm_srwr(struct igc_hw *hw, u16 offset, u16 words,
 	if (offset >= nvm->word_size || (words > (nvm->word_size - offset)) ||
 	    words == 0) {
 		hw_dbg("nvm parameter(s) out of bounds\n");
-		goto out;
+		return ret_val;
 	}
 
 	for (i = 0; i < words; i++) {
+		ret_val = -IGC_ERR_NVM;
 		eewr = ((offset + i) << IGC_NVM_RW_ADDR_SHIFT) |
 			(data[i] << IGC_NVM_RW_REG_DATA) |
 			IGC_NVM_RW_REG_START;
@@ -254,7 +255,6 @@ static s32 igc_write_nvm_srwr(struct igc_hw *hw, u16 offset, u16 words,
 		}
 	}
 
-out:
 	return ret_val;
 }
 
-- 
cgit v1.2.3


From 1feaf60ff26086d4ae212c0fd61ff5755e1fd10c Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Wed, 24 Mar 2021 15:15:25 +0200
Subject: igc: Expose LPI counters

Expose EEE Tx and Rx low power idle counters via ethtool
A EEE TX or RX LPI event occurs when the transmitter or the receiver
enters EEE (IEEE802.3az) LPI state.
ethtool --statistics <iface>

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 8722294ab90c..9722449d7633 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -65,6 +65,8 @@ static const struct igc_stats igc_gstrings_stats[] = {
 	IGC_STAT("tx_hwtstamp_timeouts", tx_hwtstamp_timeouts),
 	IGC_STAT("tx_hwtstamp_skipped", tx_hwtstamp_skipped),
 	IGC_STAT("rx_hwtstamp_cleared", rx_hwtstamp_cleared),
+	IGC_STAT("tx_lpi_counter", stats.tlpic),
+	IGC_STAT("rx_lpi_counter", stats.rlpic),
 };
 
 #define IGC_NETDEV_STAT(_net_stat) { \
-- 
cgit v1.2.3


From e5b4b8988b7a1348790caf6f7e593a4541eafb2c Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Wed, 14 Apr 2021 19:52:55 +0300
Subject: net: bridge: switchdev: refactor br_switchdev_fdb_notify

Instead of having to add more and more arguments to
br_switchdev_fdb_call_notifiers, get rid of it and build the info
struct directly in br_switchdev_fdb_notify.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_switchdev.c | 41 +++++++++++------------------------------
 1 file changed, 11 insertions(+), 30 deletions(-)

diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 1e24d9a2c9a7..c390f84adea2 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -107,25 +107,16 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
 	return 0;
 }
 
-static void
-br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac,
-				u16 vid, struct net_device *dev,
-				bool added_by_user, bool offloaded)
-{
-	struct switchdev_notifier_fdb_info info;
-	unsigned long notifier_type;
-
-	info.addr = mac;
-	info.vid = vid;
-	info.added_by_user = added_by_user;
-	info.offloaded = offloaded;
-	notifier_type = adding ? SWITCHDEV_FDB_ADD_TO_DEVICE : SWITCHDEV_FDB_DEL_TO_DEVICE;
-	call_switchdev_notifiers(notifier_type, dev, &info.info, NULL);
-}
-
 void
 br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
 {
+	struct switchdev_notifier_fdb_info info = {
+		.addr = fdb->key.addr.addr,
+		.vid = fdb->key.vlan_id,
+		.added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags),
+		.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags),
+	};
+
 	if (!fdb->dst)
 		return;
 	if (test_bit(BR_FDB_LOCAL, &fdb->flags))
@@ -133,22 +124,12 @@ br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
 
 	switch (type) {
 	case RTM_DELNEIGH:
-		br_switchdev_fdb_call_notifiers(false, fdb->key.addr.addr,
-						fdb->key.vlan_id,
-						fdb->dst->dev,
-						test_bit(BR_FDB_ADDED_BY_USER,
-							 &fdb->flags),
-						test_bit(BR_FDB_OFFLOADED,
-							 &fdb->flags));
+		call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_DEVICE,
+					 fdb->dst->dev, &info.info, NULL);
 		break;
 	case RTM_NEWNEIGH:
-		br_switchdev_fdb_call_notifiers(true, fdb->key.addr.addr,
-						fdb->key.vlan_id,
-						fdb->dst->dev,
-						test_bit(BR_FDB_ADDED_BY_USER,
-							 &fdb->flags),
-						test_bit(BR_FDB_OFFLOADED,
-							 &fdb->flags));
+		call_switchdev_notifiers(SWITCHDEV_FDB_ADD_TO_DEVICE,
+					 fdb->dst->dev, &info.info, NULL);
 		break;
 	}
 }
-- 
cgit v1.2.3


From 2c4eca3ef7161f6632959c00c8eae182f4398901 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 14 Apr 2021 19:52:56 +0300
Subject: net: bridge: switchdev: include local flag in FDB notifications

As explained in bugfix commit 6ab4c3117aec ("net: bridge: don't notify
switchdev for local FDB addresses") as well as in this discussion:
https://lore.kernel.org/netdev/20210117193009.io3nungdwuzmo5f7@skbuf/

the switchdev notifiers for FDB entries managed to have a zero-day bug,
which was that drivers would not know what to do with local FDB entries,
because they were not told that they are local. The bug fix was to
simply not notify them of those addresses.

Let us now add the 'is_local' bit to bridge FDB entries, and make all
drivers ignore these entries by their own choice.

Co-developed-by: Tobias Waldekranz <tobias@waldekranz.com>
Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c        | 4 ++--
 drivers/net/ethernet/marvell/prestera/prestera_switchdev.c | 2 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c   | 5 +++--
 drivers/net/ethernet/rocker/rocker_main.c                  | 4 ++--
 drivers/net/ethernet/ti/am65-cpsw-switchdev.c              | 4 ++--
 drivers/net/ethernet/ti/cpsw_switchdev.c                   | 4 ++--
 include/net/switchdev.h                                    | 1 +
 net/bridge/br_switchdev.c                                  | 3 +--
 net/dsa/slave.c                                            | 2 +-
 9 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index 5250d51d783c..05de37c3b64c 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -2098,7 +2098,7 @@ static void dpaa2_switch_event_work(struct work_struct *work)
 
 	switch (switchdev_work->event) {
 	case SWITCHDEV_FDB_ADD_TO_DEVICE:
-		if (!fdb_info->added_by_user)
+		if (!fdb_info->added_by_user || fdb_info->is_local)
 			break;
 		if (is_unicast_ether_addr(fdb_info->addr))
 			err = dpaa2_switch_port_fdb_add_uc(netdev_priv(dev),
@@ -2113,7 +2113,7 @@ static void dpaa2_switch_event_work(struct work_struct *work)
 					 &fdb_info->info, NULL);
 		break;
 	case SWITCHDEV_FDB_DEL_TO_DEVICE:
-		if (!fdb_info->added_by_user)
+		if (!fdb_info->added_by_user || fdb_info->is_local)
 			break;
 		if (is_unicast_ether_addr(fdb_info->addr))
 			dpaa2_switch_port_fdb_del_uc(netdev_priv(dev), fdb_info->addr);
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
index 49e052273f30..cb564890a3dc 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
@@ -798,7 +798,7 @@ static void prestera_fdb_event_work(struct work_struct *work)
 	switch (swdev_work->event) {
 	case SWITCHDEV_FDB_ADD_TO_DEVICE:
 		fdb_info = &swdev_work->fdb_info;
-		if (!fdb_info->added_by_user)
+		if (!fdb_info->added_by_user || fdb_info->is_local)
 			break;
 
 		err = prestera_port_fdb_set(port, fdb_info, true);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index c1f05c17557d..eeccd586e781 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -2916,7 +2916,8 @@ mlxsw_sp_switchdev_bridge_nve_fdb_event(struct mlxsw_sp_switchdev_event_work *
 		return;
 
 	if (switchdev_work->event == SWITCHDEV_FDB_ADD_TO_DEVICE &&
-	    !switchdev_work->fdb_info.added_by_user)
+	    (!switchdev_work->fdb_info.added_by_user ||
+	     switchdev_work->fdb_info.is_local))
 		return;
 
 	if (!netif_running(dev))
@@ -2971,7 +2972,7 @@ static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work)
 	switch (switchdev_work->event) {
 	case SWITCHDEV_FDB_ADD_TO_DEVICE:
 		fdb_info = &switchdev_work->fdb_info;
-		if (!fdb_info->added_by_user)
+		if (!fdb_info->added_by_user || fdb_info->is_local)
 			break;
 		err = mlxsw_sp_port_fdb_set(mlxsw_sp_port, fdb_info, true);
 		if (err)
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index 3473d296b2e2..a46633606cae 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -2736,7 +2736,7 @@ static void rocker_switchdev_event_work(struct work_struct *work)
 	switch (switchdev_work->event) {
 	case SWITCHDEV_FDB_ADD_TO_DEVICE:
 		fdb_info = &switchdev_work->fdb_info;
-		if (!fdb_info->added_by_user)
+		if (!fdb_info->added_by_user || fdb_info->is_local)
 			break;
 		err = rocker_world_port_fdb_add(rocker_port, fdb_info);
 		if (err) {
@@ -2747,7 +2747,7 @@ static void rocker_switchdev_event_work(struct work_struct *work)
 		break;
 	case SWITCHDEV_FDB_DEL_TO_DEVICE:
 		fdb_info = &switchdev_work->fdb_info;
-		if (!fdb_info->added_by_user)
+		if (!fdb_info->added_by_user || fdb_info->is_local)
 			break;
 		err = rocker_world_port_fdb_del(rocker_port, fdb_info);
 		if (err)
diff --git a/drivers/net/ethernet/ti/am65-cpsw-switchdev.c b/drivers/net/ethernet/ti/am65-cpsw-switchdev.c
index d93ffd8a08b0..23cfb91e9c4d 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-switchdev.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-switchdev.c
@@ -385,7 +385,7 @@ static void am65_cpsw_switchdev_event_work(struct work_struct *work)
 			   fdb->addr, fdb->vid, fdb->added_by_user,
 			   fdb->offloaded, port_id);
 
-		if (!fdb->added_by_user)
+		if (!fdb->added_by_user || fdb->is_local)
 			break;
 		if (memcmp(port->slave.mac_addr, (u8 *)fdb->addr, ETH_ALEN) == 0)
 			port_id = HOST_PORT_NUM;
@@ -401,7 +401,7 @@ static void am65_cpsw_switchdev_event_work(struct work_struct *work)
 			   fdb->addr, fdb->vid, fdb->added_by_user,
 			   fdb->offloaded, port_id);
 
-		if (!fdb->added_by_user)
+		if (!fdb->added_by_user || fdb->is_local)
 			break;
 		if (memcmp(port->slave.mac_addr, (u8 *)fdb->addr, ETH_ALEN) == 0)
 			port_id = HOST_PORT_NUM;
diff --git a/drivers/net/ethernet/ti/cpsw_switchdev.c b/drivers/net/ethernet/ti/cpsw_switchdev.c
index a72bb570756f..05a64fb7a04f 100644
--- a/drivers/net/ethernet/ti/cpsw_switchdev.c
+++ b/drivers/net/ethernet/ti/cpsw_switchdev.c
@@ -395,7 +395,7 @@ static void cpsw_switchdev_event_work(struct work_struct *work)
 			fdb->addr, fdb->vid, fdb->added_by_user,
 			fdb->offloaded, port);
 
-		if (!fdb->added_by_user)
+		if (!fdb->added_by_user || fdb->is_local)
 			break;
 		if (memcmp(priv->mac_addr, (u8 *)fdb->addr, ETH_ALEN) == 0)
 			port = HOST_PORT_NUM;
@@ -411,7 +411,7 @@ static void cpsw_switchdev_event_work(struct work_struct *work)
 			fdb->addr, fdb->vid, fdb->added_by_user,
 			fdb->offloaded, port);
 
-		if (!fdb->added_by_user)
+		if (!fdb->added_by_user || fdb->is_local)
 			break;
 		if (memcmp(priv->mac_addr, (u8 *)fdb->addr, ETH_ALEN) == 0)
 			port = HOST_PORT_NUM;
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 8c3218177136..f1a5a9a3634d 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -209,6 +209,7 @@ struct switchdev_notifier_fdb_info {
 	const unsigned char *addr;
 	u16 vid;
 	u8 added_by_user:1,
+	   is_local:1,
 	   offloaded:1;
 };
 
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index c390f84adea2..a5e601e41cb9 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -114,13 +114,12 @@ br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
 		.addr = fdb->key.addr.addr,
 		.vid = fdb->key.vlan_id,
 		.added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags),
+		.is_local = test_bit(BR_FDB_LOCAL, &fdb->flags),
 		.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags),
 	};
 
 	if (!fdb->dst)
 		return;
-	if (test_bit(BR_FDB_LOCAL, &fdb->flags))
-		return;
 
 	switch (type) {
 	case RTM_DELNEIGH:
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 9300cb66e500..3ae67202fda2 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -2329,7 +2329,7 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
 		fdb_info = ptr;
 
 		if (dsa_slave_dev_check(dev)) {
-			if (!fdb_info->added_by_user)
+			if (!fdb_info->added_by_user || fdb_info->is_local)
 				return NOTIFY_OK;
 
 			dp = dsa_slave_to_port(dev);
-- 
cgit v1.2.3


From a1150a04b7e8caee235e38996e042e1bcb1a6574 Mon Sep 17 00:00:00 2001
From: Gatis Peisenieks <gatis@mikrotik.com>
Date: Wed, 14 Apr 2021 22:09:20 +0300
Subject: atl1c: move tx cleanup processing out of interrupt

Tx queue cleanup happens in interrupt handler on same core as rx queue
processing. Both can take considerable amount of processing in high
packet-per-second scenarios.

Sending big amounts of packets can stall the rx processing which is
unfair and also can lead to out-of-memory condition since
__dev_kfree_skb_irq queues the skbs for later kfree in softirq which
is not allowed to happen with heavy load in interrupt handler.

This puts tx cleanup in its own napi and enables threaded napi to
allow the rx/tx queue processing to happen on different cores.

The ability to sustain equal amounts of tx/rx traffic increased:
from 280Kpps to 1130Kpps on Threadripper 3960X with upcoming
Mikrotik 10/25G NIC,
from 520Kpps to 850Kpps on Intel i3-3320 with Mikrotik RB44Ge adapter.

Signed-off-by: Gatis Peisenieks <gatis@mikrotik.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/atl1c/atl1c.h      |  2 ++
 drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 44 +++++++++++++++++++------
 2 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c.h b/drivers/net/ethernet/atheros/atl1c/atl1c.h
index a0562a90fb6d..28ae5c16831e 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c.h
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c.h
@@ -367,6 +367,7 @@ struct atl1c_hw {
 	u16 phy_id1;
 	u16 phy_id2;
 
+	spinlock_t intr_mask_lock;	/* protect the intr_mask */
 	u32 intr_mask;
 
 	u8 preamble_len;
@@ -506,6 +507,7 @@ struct atl1c_adapter {
 	struct net_device   *netdev;
 	struct pci_dev      *pdev;
 	struct napi_struct  napi;
+	struct napi_struct  tx_napi;
 	struct page         *rx_page;
 	unsigned int	    rx_page_offset;
 	unsigned int	    rx_frag_size;
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index d54375b255dc..1d17c24e6d75 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -813,6 +813,7 @@ static int atl1c_sw_init(struct atl1c_adapter *adapter)
 	atl1c_set_rxbufsize(adapter, adapter->netdev);
 	atomic_set(&adapter->irq_sem, 1);
 	spin_lock_init(&adapter->mdio_lock);
+	spin_lock_init(&adapter->hw.intr_mask_lock);
 	set_bit(__AT_DOWN, &adapter->flags);
 
 	return 0;
@@ -1530,20 +1531,19 @@ static inline void atl1c_clear_phy_int(struct atl1c_adapter *adapter)
 	spin_unlock(&adapter->mdio_lock);
 }
 
-static bool atl1c_clean_tx_irq(struct atl1c_adapter *adapter,
-				enum atl1c_trans_queue type)
+static int atl1c_clean_tx(struct napi_struct *napi, int budget)
 {
-	struct atl1c_tpd_ring *tpd_ring = &adapter->tpd_ring[type];
+	struct atl1c_adapter *adapter =
+		container_of(napi, struct atl1c_adapter, tx_napi);
+	struct atl1c_tpd_ring *tpd_ring = &adapter->tpd_ring[atl1c_trans_normal];
 	struct atl1c_buffer *buffer_info;
 	struct pci_dev *pdev = adapter->pdev;
 	u16 next_to_clean = atomic_read(&tpd_ring->next_to_clean);
 	u16 hw_next_to_clean;
-	u16 reg;
 	unsigned int total_bytes = 0, total_packets = 0;
+	unsigned long flags;
 
-	reg = type == atl1c_trans_high ? REG_TPD_PRI1_CIDX : REG_TPD_PRI0_CIDX;
-
-	AT_READ_REGW(&adapter->hw, reg, &hw_next_to_clean);
+	AT_READ_REGW(&adapter->hw, REG_TPD_PRI0_CIDX, &hw_next_to_clean);
 
 	while (next_to_clean != hw_next_to_clean) {
 		buffer_info = &tpd_ring->buffer_info[next_to_clean];
@@ -1564,7 +1564,15 @@ static bool atl1c_clean_tx_irq(struct atl1c_adapter *adapter,
 		netif_wake_queue(adapter->netdev);
 	}
 
-	return true;
+	if (total_packets < budget) {
+		napi_complete_done(napi, total_packets);
+		spin_lock_irqsave(&adapter->hw.intr_mask_lock, flags);
+		adapter->hw.intr_mask |= ISR_TX_PKT;
+		AT_WRITE_REG(&adapter->hw, REG_IMR, adapter->hw.intr_mask);
+		spin_unlock_irqrestore(&adapter->hw.intr_mask_lock, flags);
+		return total_packets;
+	}
+	return budget;
 }
 
 /**
@@ -1599,13 +1607,22 @@ static irqreturn_t atl1c_intr(int irq, void *data)
 		AT_WRITE_REG(hw, REG_ISR, status | ISR_DIS_INT);
 		if (status & ISR_RX_PKT) {
 			if (likely(napi_schedule_prep(&adapter->napi))) {
+				spin_lock(&hw->intr_mask_lock);
 				hw->intr_mask &= ~ISR_RX_PKT;
 				AT_WRITE_REG(hw, REG_IMR, hw->intr_mask);
+				spin_unlock(&hw->intr_mask_lock);
 				__napi_schedule(&adapter->napi);
 			}
 		}
-		if (status & ISR_TX_PKT)
-			atl1c_clean_tx_irq(adapter, atl1c_trans_normal);
+		if (status & ISR_TX_PKT) {
+			if (napi_schedule_prep(&adapter->tx_napi)) {
+				spin_lock(&hw->intr_mask_lock);
+				hw->intr_mask &= ~ISR_TX_PKT;
+				AT_WRITE_REG(hw, REG_IMR, hw->intr_mask);
+				spin_unlock(&hw->intr_mask_lock);
+				__napi_schedule(&adapter->tx_napi);
+			}
+		}
 
 		handled = IRQ_HANDLED;
 		/* check if PCIE PHY Link down */
@@ -1876,6 +1893,7 @@ static int atl1c_clean(struct napi_struct *napi, int budget)
 	struct atl1c_adapter *adapter =
 			container_of(napi, struct atl1c_adapter, napi);
 	int work_done = 0;
+	unsigned long flags;
 
 	/* Keep link state information with original netdev */
 	if (!netif_carrier_ok(adapter->netdev))
@@ -1886,8 +1904,10 @@ static int atl1c_clean(struct napi_struct *napi, int budget)
 	if (work_done < budget) {
 quit_polling:
 		napi_complete_done(napi, work_done);
+		spin_lock_irqsave(&adapter->hw.intr_mask_lock, flags);
 		adapter->hw.intr_mask |= ISR_RX_PKT;
 		AT_WRITE_REG(&adapter->hw, REG_IMR, adapter->hw.intr_mask);
+		spin_unlock_irqrestore(&adapter->hw.intr_mask_lock, flags);
 	}
 	return work_done;
 }
@@ -2325,6 +2345,7 @@ static int atl1c_up(struct atl1c_adapter *adapter)
 	atl1c_check_link_status(adapter);
 	clear_bit(__AT_DOWN, &adapter->flags);
 	napi_enable(&adapter->napi);
+	napi_enable(&adapter->tx_napi);
 	atl1c_irq_enable(adapter);
 	netif_start_queue(netdev);
 	return err;
@@ -2345,6 +2366,7 @@ static void atl1c_down(struct atl1c_adapter *adapter)
 	set_bit(__AT_DOWN, &adapter->flags);
 	netif_carrier_off(netdev);
 	napi_disable(&adapter->napi);
+	napi_disable(&adapter->tx_napi);
 	atl1c_irq_disable(adapter);
 	atl1c_free_irq(adapter);
 	/* disable ASPM if device inactive */
@@ -2593,7 +2615,9 @@ static int atl1c_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	adapter->mii.mdio_write = atl1c_mdio_write;
 	adapter->mii.phy_id_mask = 0x1f;
 	adapter->mii.reg_num_mask = MDIO_CTRL_REG_MASK;
+	dev_set_threaded(netdev, true);
 	netif_napi_add(netdev, &adapter->napi, atl1c_clean, 64);
+	netif_napi_add(netdev, &adapter->tx_napi, atl1c_clean_tx, 64);
 	timer_setup(&adapter->phy_config_timer, atl1c_phy_config, 0);
 	/* setup the private structure */
 	err = atl1c_sw_init(adapter);
-- 
cgit v1.2.3


From bd005f53862b9e840977907e14e28cbcc10c6d51 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Thu, 15 Apr 2021 16:44:50 -0700
Subject: mptcp: revert "mptcp: forbit mcast-related sockopt on MPTCP sockets"

This change reverts commit 86581852d771 ("mptcp: forbit mcast-related sockopt on MPTCP sockets").

As announced in the cover letter of the mentioned patch above, the
following commits introduce a larger MPTCP sockopt implementation
refactor.

This time, we switch from a blocklist to an allowlist. This is safer for
the future where new sockoptions could be added while not being fully
supported with MPTCP sockets and thus causing unstabilities.

Suggested-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 45 ---------------------------------------------
 1 file changed, 45 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 8009b3f8e4c1..2fcdc611c122 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2869,48 +2869,6 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
 	return ret;
 }
 
-static bool mptcp_unsupported(int level, int optname)
-{
-	if (level == SOL_IP) {
-		switch (optname) {
-		case IP_ADD_MEMBERSHIP:
-		case IP_ADD_SOURCE_MEMBERSHIP:
-		case IP_DROP_MEMBERSHIP:
-		case IP_DROP_SOURCE_MEMBERSHIP:
-		case IP_BLOCK_SOURCE:
-		case IP_UNBLOCK_SOURCE:
-		case MCAST_JOIN_GROUP:
-		case MCAST_LEAVE_GROUP:
-		case MCAST_JOIN_SOURCE_GROUP:
-		case MCAST_LEAVE_SOURCE_GROUP:
-		case MCAST_BLOCK_SOURCE:
-		case MCAST_UNBLOCK_SOURCE:
-		case MCAST_MSFILTER:
-			return true;
-		}
-		return false;
-	}
-	if (level == SOL_IPV6) {
-		switch (optname) {
-		case IPV6_ADDRFORM:
-		case IPV6_ADD_MEMBERSHIP:
-		case IPV6_DROP_MEMBERSHIP:
-		case IPV6_JOIN_ANYCAST:
-		case IPV6_LEAVE_ANYCAST:
-		case MCAST_JOIN_GROUP:
-		case MCAST_LEAVE_GROUP:
-		case MCAST_JOIN_SOURCE_GROUP:
-		case MCAST_LEAVE_SOURCE_GROUP:
-		case MCAST_BLOCK_SOURCE:
-		case MCAST_UNBLOCK_SOURCE:
-		case MCAST_MSFILTER:
-			return true;
-		}
-		return false;
-	}
-	return false;
-}
-
 static int mptcp_setsockopt(struct sock *sk, int level, int optname,
 			    sockptr_t optval, unsigned int optlen)
 {
@@ -2919,9 +2877,6 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname,
 
 	pr_debug("msk=%p", msk);
 
-	if (mptcp_unsupported(level, optname))
-		return -ENOPROTOOPT;
-
 	if (level == SOL_SOCKET)
 		return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen);
 
-- 
cgit v1.2.3


From 0abdde82b163600dcafb80da6e155dbf60c331bc Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 15 Apr 2021 16:44:51 -0700
Subject: mptcp: move sockopt function into a new file

The MPTCP sockopt implementation is going to be much
more big and complex soon. Let's move it to a different
source file.

No functional change intended.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/Makefile   |   2 +-
 net/mptcp/protocol.c | 120 ---------------------------------------------
 net/mptcp/protocol.h |   5 ++
 net/mptcp/sockopt.c  | 136 +++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 142 insertions(+), 121 deletions(-)
 create mode 100644 net/mptcp/sockopt.c

diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
index a611968be4d7..d2642c012a6a 100644
--- a/net/mptcp/Makefile
+++ b/net/mptcp/Makefile
@@ -2,7 +2,7 @@
 obj-$(CONFIG_MPTCP) += mptcp.o
 
 mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \
-	   mib.o pm_netlink.o
+	   mib.o pm_netlink.o sockopt.o
 
 obj-$(CONFIG_SYN_COOKIES) += syncookies.o
 obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 2fcdc611c122..e0b381ae99af 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -90,16 +90,6 @@ static bool mptcp_is_tcpsk(struct sock *sk)
 	return false;
 }
 
-static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
-{
-	sock_owned_by_me((const struct sock *)msk);
-
-	if (likely(!__mptcp_check_fallback(msk)))
-		return NULL;
-
-	return msk->first;
-}
-
 static int __mptcp_socket_create(struct mptcp_sock *msk)
 {
 	struct mptcp_subflow_context *subflow;
@@ -2811,116 +2801,6 @@ static void mptcp_destroy(struct sock *sk)
 	sk_sockets_allocated_dec(sk);
 }
 
-static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
-				       sockptr_t optval, unsigned int optlen)
-{
-	struct sock *sk = (struct sock *)msk;
-	struct socket *ssock;
-	int ret;
-
-	switch (optname) {
-	case SO_REUSEPORT:
-	case SO_REUSEADDR:
-		lock_sock(sk);
-		ssock = __mptcp_nmpc_socket(msk);
-		if (!ssock) {
-			release_sock(sk);
-			return -EINVAL;
-		}
-
-		ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen);
-		if (ret == 0) {
-			if (optname == SO_REUSEPORT)
-				sk->sk_reuseport = ssock->sk->sk_reuseport;
-			else if (optname == SO_REUSEADDR)
-				sk->sk_reuse = ssock->sk->sk_reuse;
-		}
-		release_sock(sk);
-		return ret;
-	}
-
-	return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen);
-}
-
-static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
-			       sockptr_t optval, unsigned int optlen)
-{
-	struct sock *sk = (struct sock *)msk;
-	int ret = -EOPNOTSUPP;
-	struct socket *ssock;
-
-	switch (optname) {
-	case IPV6_V6ONLY:
-		lock_sock(sk);
-		ssock = __mptcp_nmpc_socket(msk);
-		if (!ssock) {
-			release_sock(sk);
-			return -EINVAL;
-		}
-
-		ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen);
-		if (ret == 0)
-			sk->sk_ipv6only = ssock->sk->sk_ipv6only;
-
-		release_sock(sk);
-		break;
-	}
-
-	return ret;
-}
-
-static int mptcp_setsockopt(struct sock *sk, int level, int optname,
-			    sockptr_t optval, unsigned int optlen)
-{
-	struct mptcp_sock *msk = mptcp_sk(sk);
-	struct sock *ssk;
-
-	pr_debug("msk=%p", msk);
-
-	if (level == SOL_SOCKET)
-		return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen);
-
-	/* @@ the meaning of setsockopt() when the socket is connected and
-	 * there are multiple subflows is not yet defined. It is up to the
-	 * MPTCP-level socket to configure the subflows until the subflow
-	 * is in TCP fallback, when TCP socket options are passed through
-	 * to the one remaining subflow.
-	 */
-	lock_sock(sk);
-	ssk = __mptcp_tcp_fallback(msk);
-	release_sock(sk);
-	if (ssk)
-		return tcp_setsockopt(ssk, level, optname, optval, optlen);
-
-	if (level == SOL_IPV6)
-		return mptcp_setsockopt_v6(msk, optname, optval, optlen);
-
-	return -EOPNOTSUPP;
-}
-
-static int mptcp_getsockopt(struct sock *sk, int level, int optname,
-			    char __user *optval, int __user *option)
-{
-	struct mptcp_sock *msk = mptcp_sk(sk);
-	struct sock *ssk;
-
-	pr_debug("msk=%p", msk);
-
-	/* @@ the meaning of setsockopt() when the socket is connected and
-	 * there are multiple subflows is not yet defined. It is up to the
-	 * MPTCP-level socket to configure the subflows until the subflow
-	 * is in TCP fallback, when socket options are passed through
-	 * to the one remaining subflow.
-	 */
-	lock_sock(sk);
-	ssk = __mptcp_tcp_fallback(msk);
-	release_sock(sk);
-	if (ssk)
-		return tcp_getsockopt(ssk, level, optname, optval, option);
-
-	return -EOPNOTSUPP;
-}
-
 void __mptcp_data_acked(struct sock *sk)
 {
 	if (!sock_owned_by_user(sk))
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d8de1e961ab0..14f0114be17a 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -571,6 +571,11 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk);
 void mptcp_data_ready(struct sock *sk, struct sock *ssk);
 bool mptcp_finish_join(struct sock *sk);
 bool mptcp_schedule_work(struct sock *sk);
+int mptcp_setsockopt(struct sock *sk, int level, int optname,
+		     sockptr_t optval, unsigned int optlen);
+int mptcp_getsockopt(struct sock *sk, int level, int optname,
+		     char __user *optval, int __user *option);
+
 void __mptcp_check_push(struct sock *sk, struct sock *ssk);
 void __mptcp_data_acked(struct sock *sk);
 void __mptcp_error_report(struct sock *sk);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
new file mode 100644
index 000000000000..479f75653969
--- /dev/null
+++ b/net/mptcp/sockopt.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Multipath TCP
+ *
+ * Copyright (c) 2021, Red Hat.
+ */
+
+#define pr_fmt(fmt) "MPTCP: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <net/sock.h>
+#include <net/protocol.h>
+#include <net/tcp.h>
+#include <net/mptcp.h>
+#include "protocol.h"
+
+static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
+{
+	sock_owned_by_me((const struct sock *)msk);
+
+	if (likely(!__mptcp_check_fallback(msk)))
+		return NULL;
+
+	return msk->first;
+}
+
+static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
+				       sockptr_t optval, unsigned int optlen)
+{
+	struct sock *sk = (struct sock *)msk;
+	struct socket *ssock;
+	int ret;
+
+	switch (optname) {
+	case SO_REUSEPORT:
+	case SO_REUSEADDR:
+		lock_sock(sk);
+		ssock = __mptcp_nmpc_socket(msk);
+		if (!ssock) {
+			release_sock(sk);
+			return -EINVAL;
+		}
+
+		ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen);
+		if (ret == 0) {
+			if (optname == SO_REUSEPORT)
+				sk->sk_reuseport = ssock->sk->sk_reuseport;
+			else if (optname == SO_REUSEADDR)
+				sk->sk_reuse = ssock->sk->sk_reuse;
+		}
+		release_sock(sk);
+		return ret;
+	}
+
+	return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen);
+}
+
+static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
+			       sockptr_t optval, unsigned int optlen)
+{
+	struct sock *sk = (struct sock *)msk;
+	int ret = -EOPNOTSUPP;
+	struct socket *ssock;
+
+	switch (optname) {
+	case IPV6_V6ONLY:
+		lock_sock(sk);
+		ssock = __mptcp_nmpc_socket(msk);
+		if (!ssock) {
+			release_sock(sk);
+			return -EINVAL;
+		}
+
+		ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen);
+		if (ret == 0)
+			sk->sk_ipv6only = ssock->sk->sk_ipv6only;
+
+		release_sock(sk);
+		break;
+	}
+
+	return ret;
+}
+
+int mptcp_setsockopt(struct sock *sk, int level, int optname,
+		     sockptr_t optval, unsigned int optlen)
+{
+	struct mptcp_sock *msk = mptcp_sk(sk);
+	struct sock *ssk;
+
+	pr_debug("msk=%p", msk);
+
+	if (level == SOL_SOCKET)
+		return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen);
+
+	/* @@ the meaning of setsockopt() when the socket is connected and
+	 * there are multiple subflows is not yet defined. It is up to the
+	 * MPTCP-level socket to configure the subflows until the subflow
+	 * is in TCP fallback, when TCP socket options are passed through
+	 * to the one remaining subflow.
+	 */
+	lock_sock(sk);
+	ssk = __mptcp_tcp_fallback(msk);
+	release_sock(sk);
+	if (ssk)
+		return tcp_setsockopt(ssk, level, optname, optval, optlen);
+
+	if (level == SOL_IPV6)
+		return mptcp_setsockopt_v6(msk, optname, optval, optlen);
+
+	return -EOPNOTSUPP;
+}
+
+int mptcp_getsockopt(struct sock *sk, int level, int optname,
+		     char __user *optval, int __user *option)
+{
+	struct mptcp_sock *msk = mptcp_sk(sk);
+	struct sock *ssk;
+
+	pr_debug("msk=%p", msk);
+
+	/* @@ the meaning of setsockopt() when the socket is connected and
+	 * there are multiple subflows is not yet defined. It is up to the
+	 * MPTCP-level socket to configure the subflows until the subflow
+	 * is in TCP fallback, when socket options are passed through
+	 * to the one remaining subflow.
+	 */
+	lock_sock(sk);
+	ssk = __mptcp_tcp_fallback(msk);
+	release_sock(sk);
+	if (ssk)
+		return tcp_getsockopt(ssk, level, optname, optval, option);
+
+	return -EOPNOTSUPP;
+}
+
-- 
cgit v1.2.3


From d9e4c129181004ec94b315b0c9db5eeb09da75e6 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 15 Apr 2021 16:44:52 -0700
Subject: mptcp: only admit explicitly supported sockopt

Unrolling mcast state at msk dismantel time is bug prone, as
syzkaller reported:

======================================================
WARNING: possible circular locking dependency detected
5.11.0-syzkaller #0 Not tainted
------------------------------------------------------
syz-executor905/8822 is trying to acquire lock:
ffffffff8d678fe8 (rtnl_mutex){+.+.}-{3:3}, at: ipv6_sock_mc_close+0xd7/0x110 net/ipv6/mcast.c:323

but task is already holding lock:
ffff888024390120 (sk_lock-AF_INET6){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1600 [inline]
ffff888024390120 (sk_lock-AF_INET6){+.+.}-{0:0}, at: mptcp6_release+0x57/0x130 net/mptcp/protocol.c:3507

which lock already depends on the new lock.

Instead we can simply forbid any mcast-related setsockopt.
Let's do the same with all other non supported sockopts.

Fixes: 717e79c867ca5 ("mptcp: Add setsockopt()/getsockopt() socket operations")
Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/sockopt.c | 216 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 216 insertions(+)

diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 479f75653969..fb98fab252df 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -82,6 +82,219 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
 	return ret;
 }
 
+static bool mptcp_supported_sockopt(int level, int optname)
+{
+	if (level == SOL_SOCKET) {
+		switch (optname) {
+		case SO_DEBUG:
+		case SO_REUSEPORT:
+		case SO_REUSEADDR:
+
+		/* the following ones need a better implementation,
+		 * but are quite common we want to preserve them
+		 */
+		case SO_BINDTODEVICE:
+		case SO_SNDBUF:
+		case SO_SNDBUFFORCE:
+		case SO_RCVBUF:
+		case SO_RCVBUFFORCE:
+		case SO_KEEPALIVE:
+		case SO_PRIORITY:
+		case SO_LINGER:
+		case SO_TIMESTAMP_OLD:
+		case SO_TIMESTAMP_NEW:
+		case SO_TIMESTAMPNS_OLD:
+		case SO_TIMESTAMPNS_NEW:
+		case SO_TIMESTAMPING_OLD:
+		case SO_TIMESTAMPING_NEW:
+		case SO_RCVLOWAT:
+		case SO_RCVTIMEO_OLD:
+		case SO_RCVTIMEO_NEW:
+		case SO_SNDTIMEO_OLD:
+		case SO_SNDTIMEO_NEW:
+		case SO_MARK:
+		case SO_INCOMING_CPU:
+		case SO_BINDTOIFINDEX:
+		case SO_BUSY_POLL:
+		case SO_PREFER_BUSY_POLL:
+		case SO_BUSY_POLL_BUDGET:
+
+		/* next ones are no-op for plain TCP */
+		case SO_NO_CHECK:
+		case SO_DONTROUTE:
+		case SO_BROADCAST:
+		case SO_BSDCOMPAT:
+		case SO_PASSCRED:
+		case SO_PASSSEC:
+		case SO_RXQ_OVFL:
+		case SO_WIFI_STATUS:
+		case SO_NOFCS:
+		case SO_SELECT_ERR_QUEUE:
+			return true;
+		}
+
+		/* SO_OOBINLINE is not supported, let's avoid the related mess */
+		/* SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF,
+		 * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER,
+		 * we must be careful with subflows
+		 */
+		/* SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks
+		 * explicitly the sk_protocol field
+		 */
+		/* SO_PEEK_OFF is unsupported, as it is for plain TCP */
+		/* SO_MAX_PACING_RATE is unsupported, we must be careful with subflows */
+		/* SO_CNX_ADVICE is currently unsupported, could possibly be relevant,
+		 * but likely needs careful design
+		 */
+		/* SO_ZEROCOPY is currently unsupported, TODO in sndmsg */
+		/* SO_TXTIME is currently unsupported */
+		return false;
+	}
+	if (level == SOL_IP) {
+		switch (optname) {
+		/* should work fine */
+		case IP_FREEBIND:
+		case IP_TRANSPARENT:
+
+		/* the following are control cmsg related */
+		case IP_PKTINFO:
+		case IP_RECVTTL:
+		case IP_RECVTOS:
+		case IP_RECVOPTS:
+		case IP_RETOPTS:
+		case IP_PASSSEC:
+		case IP_RECVORIGDSTADDR:
+		case IP_CHECKSUM:
+		case IP_RECVFRAGSIZE:
+
+		/* common stuff that need some love */
+		case IP_TOS:
+		case IP_TTL:
+		case IP_BIND_ADDRESS_NO_PORT:
+		case IP_MTU_DISCOVER:
+		case IP_RECVERR:
+
+		/* possibly less common may deserve some love */
+		case IP_MINTTL:
+
+		/* the following is apparently a no-op for plain TCP */
+		case IP_RECVERR_RFC4884:
+			return true;
+		}
+
+		/* IP_OPTIONS is not supported, needs subflow care */
+		/* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */
+		/* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF,
+		 * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP,
+		 * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE,
+		 * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP,
+		 * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE,
+		 * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal
+		 * with mcast stuff
+		 */
+		/* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */
+		return false;
+	}
+	if (level == SOL_IPV6) {
+		switch (optname) {
+		case IPV6_V6ONLY:
+
+		/* the following are control cmsg related */
+		case IPV6_RECVPKTINFO:
+		case IPV6_2292PKTINFO:
+		case IPV6_RECVHOPLIMIT:
+		case IPV6_2292HOPLIMIT:
+		case IPV6_RECVRTHDR:
+		case IPV6_2292RTHDR:
+		case IPV6_RECVHOPOPTS:
+		case IPV6_2292HOPOPTS:
+		case IPV6_RECVDSTOPTS:
+		case IPV6_2292DSTOPTS:
+		case IPV6_RECVTCLASS:
+		case IPV6_FLOWINFO:
+		case IPV6_RECVPATHMTU:
+		case IPV6_RECVORIGDSTADDR:
+		case IPV6_RECVFRAGSIZE:
+
+		/* the following ones need some love but are quite common */
+		case IPV6_TCLASS:
+		case IPV6_TRANSPARENT:
+		case IPV6_FREEBIND:
+		case IPV6_PKTINFO:
+		case IPV6_2292PKTOPTIONS:
+		case IPV6_UNICAST_HOPS:
+		case IPV6_MTU_DISCOVER:
+		case IPV6_MTU:
+		case IPV6_RECVERR:
+		case IPV6_FLOWINFO_SEND:
+		case IPV6_FLOWLABEL_MGR:
+		case IPV6_MINHOPCOUNT:
+		case IPV6_DONTFRAG:
+		case IPV6_AUTOFLOWLABEL:
+
+		/* the following one is a no-op for plain TCP */
+		case IPV6_RECVERR_RFC4884:
+			return true;
+		}
+
+		/* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are
+		 * not supported
+		 */
+		/* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF,
+		 * IPV6_MULTICAST_IF, IPV6_ADDRFORM,
+		 * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST,
+		 * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP,
+		 * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP,
+		 * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER
+		 * are not supported better not deal with mcast
+		 */
+		/* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */
+
+		/* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */
+		/* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */
+		return false;
+	}
+	if (level == SOL_TCP) {
+		switch (optname) {
+		/* the following are no-op or should work just fine */
+		case TCP_THIN_DUPACK:
+		case TCP_DEFER_ACCEPT:
+
+		/* the following need some love */
+		case TCP_MAXSEG:
+		case TCP_NODELAY:
+		case TCP_THIN_LINEAR_TIMEOUTS:
+		case TCP_CONGESTION:
+		case TCP_ULP:
+		case TCP_CORK:
+		case TCP_KEEPIDLE:
+		case TCP_KEEPINTVL:
+		case TCP_KEEPCNT:
+		case TCP_SYNCNT:
+		case TCP_SAVE_SYN:
+		case TCP_LINGER2:
+		case TCP_WINDOW_CLAMP:
+		case TCP_QUICKACK:
+		case TCP_USER_TIMEOUT:
+		case TCP_TIMESTAMP:
+		case TCP_NOTSENT_LOWAT:
+		case TCP_TX_DELAY:
+			return true;
+		}
+
+		/* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */
+
+		/* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS,
+		 * TCP_REPAIR_WINDOW are not supported, better avoid this mess
+		 */
+		/* TCP_FASTOPEN_KEY, TCP_FASTOPEN TCP_FASTOPEN_CONNECT, TCP_FASTOPEN_NO_COOKIE,
+		 * are not supported fastopen is currently unsupported
+		 */
+		/* TCP_INQ is currently unsupported, needs some recvmsg work */
+	}
+	return false;
+}
+
 int mptcp_setsockopt(struct sock *sk, int level, int optname,
 		     sockptr_t optval, unsigned int optlen)
 {
@@ -90,6 +303,9 @@ int mptcp_setsockopt(struct sock *sk, int level, int optname,
 
 	pr_debug("msk=%p", msk);
 
+	if (!mptcp_supported_sockopt(level, optname))
+		return -ENOPROTOOPT;
+
 	if (level == SOL_SOCKET)
 		return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen);
 
-- 
cgit v1.2.3


From 7896248983ef4eec18c8bd301a81d8672dbc9955 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 15 Apr 2021 16:44:53 -0700
Subject: mptcp: add skeleton to sync msk socket options to subflows

Handle following cases:
1. setsockopt is called with multiple subflows.
   Change might have to be mirrored to all of them.
   This is done directly in process context/setsockopt call.
2. Outgoing subflow is created after one or several setsockopt()
   calls have been made.  Old setsockopt changes should be
   synced to the new socket.
3. Incoming subflow, after setsockopt call(s).

Cases 2 and 3 are handled right after the join list is spliced to the conn
list.

Not all sockopt values can be just be copied by value, some require
helper calls.  Those can acquire socket lock (which can sleep).

If the join->conn list splicing is done from preemptible context,
synchronization can be done right away, otherwise its deferred to work
queue.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 41 +++++++++++++++++++++++++++++++++--------
 net/mptcp/protocol.h |  7 +++++++
 net/mptcp/sockopt.c  | 19 +++++++++++++++++++
 net/mptcp/subflow.c  |  1 +
 4 files changed, 60 insertions(+), 8 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index e0b381ae99af..1399d301d47f 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -730,18 +730,42 @@ wake:
 		sk->sk_data_ready(sk);
 }
 
-void __mptcp_flush_join_list(struct mptcp_sock *msk)
+static bool mptcp_do_flush_join_list(struct mptcp_sock *msk)
 {
 	struct mptcp_subflow_context *subflow;
 
 	if (likely(list_empty(&msk->join_list)))
-		return;
+		return false;
 
 	spin_lock_bh(&msk->join_list_lock);
 	list_for_each_entry(subflow, &msk->join_list, node)
 		mptcp_propagate_sndbuf((struct sock *)msk, mptcp_subflow_tcp_sock(subflow));
+
 	list_splice_tail_init(&msk->join_list, &msk->conn_list);
 	spin_unlock_bh(&msk->join_list_lock);
+
+	return true;
+}
+
+void __mptcp_flush_join_list(struct mptcp_sock *msk)
+{
+	if (likely(!mptcp_do_flush_join_list(msk)))
+		return;
+
+	if (!test_and_set_bit(MPTCP_WORK_SYNC_SETSOCKOPT, &msk->flags))
+		mptcp_schedule_work((struct sock *)msk);
+}
+
+static void mptcp_flush_join_list(struct mptcp_sock *msk)
+{
+	bool sync_needed = test_and_clear_bit(MPTCP_WORK_SYNC_SETSOCKOPT, &msk->flags);
+
+	might_sleep();
+
+	if (!mptcp_do_flush_join_list(msk) && !sync_needed)
+		return;
+
+	mptcp_sockopt_sync_all(msk);
 }
 
 static bool mptcp_timer_pending(struct sock *sk)
@@ -1457,7 +1481,7 @@ static void __mptcp_push_pending(struct sock *sk, unsigned int flags)
 			int ret = 0;
 
 			prev_ssk = ssk;
-			__mptcp_flush_join_list(msk);
+			mptcp_flush_join_list(msk);
 			ssk = mptcp_subflow_get_send(msk);
 
 			/* try to keep the subflow socket lock across
@@ -1883,7 +1907,7 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
 	unsigned int moved = 0;
 	bool ret, done;
 
-	__mptcp_flush_join_list(msk);
+	mptcp_flush_join_list(msk);
 	do {
 		struct sock *ssk = mptcp_subflow_recv_lookup(msk);
 		bool slowpath;
@@ -2307,7 +2331,7 @@ static void mptcp_worker(struct work_struct *work)
 		goto unlock;
 
 	mptcp_check_data_fin_ack(sk);
-	__mptcp_flush_join_list(msk);
+	mptcp_flush_join_list(msk);
 
 	mptcp_check_fastclose(msk);
 
@@ -2507,7 +2531,7 @@ static void __mptcp_check_send_data_fin(struct sock *sk)
 		}
 	}
 
-	__mptcp_flush_join_list(msk);
+	mptcp_flush_join_list(msk);
 	mptcp_for_each_subflow(msk, subflow) {
 		struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
 
@@ -2644,7 +2668,8 @@ static int mptcp_disconnect(struct sock *sk, int flags)
 	struct mptcp_subflow_context *subflow;
 	struct mptcp_sock *msk = mptcp_sk(sk);
 
-	__mptcp_flush_join_list(msk);
+	mptcp_do_flush_join_list(msk);
+
 	mptcp_for_each_subflow(msk, subflow) {
 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 
@@ -3210,7 +3235,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
 		/* set ssk->sk_socket of accept()ed flows to mptcp socket.
 		 * This is needed so NOSPACE flag can be set from tcp stack.
 		 */
-		__mptcp_flush_join_list(msk);
+		mptcp_flush_join_list(msk);
 		mptcp_for_each_subflow(msk, subflow) {
 			struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 14f0114be17a..0186aad3108a 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -108,6 +108,7 @@
 #define MPTCP_CLEAN_UNA		7
 #define MPTCP_ERROR_REPORT	8
 #define MPTCP_RETRANSMIT	9
+#define MPTCP_WORK_SYNC_SETSOCKOPT 10
 
 static inline bool before64(__u64 seq1, __u64 seq2)
 {
@@ -735,6 +736,12 @@ unsigned int mptcp_pm_get_add_addr_accept_max(struct mptcp_sock *msk);
 unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk);
 unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk);
 
+int mptcp_setsockopt(struct sock *sk, int level, int optname,
+		     sockptr_t optval, unsigned int optlen);
+
+void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk);
+void mptcp_sockopt_sync_all(struct mptcp_sock *msk);
+
 static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb)
 {
 	return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index fb98fab252df..4fdc0ad6acf7 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -350,3 +350,22 @@ int mptcp_getsockopt(struct sock *sk, int level, int optname,
 	return -EOPNOTSUPP;
 }
 
+void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
+{
+	msk_owned_by_me(msk);
+}
+
+void mptcp_sockopt_sync_all(struct mptcp_sock *msk)
+{
+	struct mptcp_subflow_context *subflow;
+
+	msk_owned_by_me(msk);
+
+	mptcp_for_each_subflow(msk, subflow) {
+		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+		mptcp_sockopt_sync(msk, ssk);
+
+		cond_resched();
+	}
+}
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 3c19a5265a0f..350c51c6bf9d 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1317,6 +1317,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
 	mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
 
 	mptcp_add_pending_subflow(msk, subflow);
+	mptcp_sockopt_sync(msk, ssk);
 	err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
 	if (err && err != -EINPROGRESS)
 		goto failed_unlink;
-- 
cgit v1.2.3


From df00b087da24c0b5341178bbd5353101c7cef98f Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 15 Apr 2021 16:44:54 -0700
Subject: mptcp: tag sequence_seq with socket state

Paolo Abeni suggested to avoid re-syncing new subflows because
they inherit options from listener. In case options were set on
listener but are not set on mptcp-socket there is no need to
do any synchronisation for new subflows.

This change sets sockopt_seq of new mptcp sockets to the seq of
the mptcp listener sock.

Subflow sequence is set to the embedded tcp listener sk.
Add a comment explaing why sk_state is involved in sockopt_seq
generation.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 12 +++++++++---
 net/mptcp/protocol.h |  4 ++++
 net/mptcp/sockopt.c  | 47 +++++++++++++++++++++++++++++++++++++++++++++--
 net/mptcp/subflow.c  |  4 ++++
 4 files changed, 62 insertions(+), 5 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 1399d301d47f..5cba90948a7e 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -733,18 +733,23 @@ wake:
 static bool mptcp_do_flush_join_list(struct mptcp_sock *msk)
 {
 	struct mptcp_subflow_context *subflow;
+	bool ret = false;
 
 	if (likely(list_empty(&msk->join_list)))
 		return false;
 
 	spin_lock_bh(&msk->join_list_lock);
-	list_for_each_entry(subflow, &msk->join_list, node)
-		mptcp_propagate_sndbuf((struct sock *)msk, mptcp_subflow_tcp_sock(subflow));
+	list_for_each_entry(subflow, &msk->join_list, node) {
+		u32 sseq = READ_ONCE(subflow->setsockopt_seq);
 
+		mptcp_propagate_sndbuf((struct sock *)msk, mptcp_subflow_tcp_sock(subflow));
+		if (READ_ONCE(msk->setsockopt_seq) != sseq)
+			ret = true;
+	}
 	list_splice_tail_init(&msk->join_list, &msk->conn_list);
 	spin_unlock_bh(&msk->join_list_lock);
 
-	return true;
+	return ret;
 }
 
 void __mptcp_flush_join_list(struct mptcp_sock *msk)
@@ -2718,6 +2723,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
 	msk->snd_nxt = msk->write_seq;
 	msk->snd_una = msk->write_seq;
 	msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
+	msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
 
 	if (mp_opt->mp_capable) {
 		msk->can_ack = true;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 0186aad3108a..df269c26f145 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -256,6 +256,8 @@ struct mptcp_sock {
 		u64	time;	/* start time of measurement window */
 		u64	rtt_us; /* last maximum rtt of subflows */
 	} rcvq_space;
+
+	u32 setsockopt_seq;
 };
 
 #define mptcp_lock_sock(___sk, cb) do {					\
@@ -414,6 +416,8 @@ struct mptcp_subflow_context {
 	long	delegated_status;
 	struct	list_head delegated_node;   /* link into delegated_action, protected by local BH */
 
+	u32 setsockopt_seq;
+
 	struct	sock *tcp_sock;	    /* tcp sk backpointer */
 	struct	sock *conn;	    /* parent mptcp_sock */
 	const	struct inet_connection_sock_af_ops *icsk_af_ops;
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 4fdc0ad6acf7..27b49543fc58 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -24,6 +24,27 @@ static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
 	return msk->first;
 }
 
+static u32 sockopt_seq_reset(const struct sock *sk)
+{
+	sock_owned_by_me(sk);
+
+	/* Highbits contain state.  Allows to distinguish sockopt_seq
+	 * of listener and established:
+	 * s0 = new_listener()
+	 * sockopt(s0) - seq is 1
+	 * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0)
+	 * sockopt(s0) - seq increments to 2 on s0
+	 * sockopt(s1) // seq increments to 2 on s1 (different option)
+	 * new ssk completes join, inherits options from s0 // seq 2
+	 * Needs sync from mptcp join logic, but ssk->seq == msk->seq
+	 *
+	 * Set High order bits to sk_state so ssk->seq == msk->seq test
+	 * will fail.
+	 */
+
+	return (u32)sk->sk_state << 24u;
+}
+
 static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
 				       sockptr_t optval, unsigned int optlen)
 {
@@ -350,22 +371,44 @@ int mptcp_getsockopt(struct sock *sk, int level, int optname,
 	return -EOPNOTSUPP;
 }
 
+static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
+{
+}
+
 void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
 {
+	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+
 	msk_owned_by_me(msk);
+
+	if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) {
+		__mptcp_sockopt_sync(msk, ssk);
+
+		subflow->setsockopt_seq = msk->setsockopt_seq;
+	}
 }
 
 void mptcp_sockopt_sync_all(struct mptcp_sock *msk)
 {
 	struct mptcp_subflow_context *subflow;
+	struct sock *sk = (struct sock *)msk;
+	u32 seq;
 
-	msk_owned_by_me(msk);
+	seq = sockopt_seq_reset(sk);
 
 	mptcp_for_each_subflow(msk, subflow) {
 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+		u32 sseq = READ_ONCE(subflow->setsockopt_seq);
 
-		mptcp_sockopt_sync(msk, ssk);
+		if (sseq != msk->setsockopt_seq) {
+			__mptcp_sockopt_sync(msk, ssk);
+			WRITE_ONCE(subflow->setsockopt_seq, seq);
+		} else if (sseq != seq) {
+			WRITE_ONCE(subflow->setsockopt_seq, seq);
+		}
 
 		cond_resched();
 	}
+
+	msk->setsockopt_seq = seq;
 }
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 350c51c6bf9d..c3da84576b3c 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -679,6 +679,9 @@ create_child:
 			goto out;
 		}
 
+		/* ssk inherits options of listener sk */
+		ctx->setsockopt_seq = listener->setsockopt_seq;
+
 		if (ctx->mp_capable) {
 			/* this can't race with mptcp_close(), as the msk is
 			 * not yet exposted to user-space
@@ -694,6 +697,7 @@ create_child:
 			 * created mptcp socket
 			 */
 			new_msk->sk_destruct = mptcp_sock_destruct;
+			mptcp_sk(new_msk)->setsockopt_seq = ctx->setsockopt_seq;
 			mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1);
 			mptcp_token_accept(subflow_req, mptcp_sk(new_msk));
 			ctx->conn = new_msk;
-- 
cgit v1.2.3


From 1b3e7ede1365a24db1b4fd837e58a595f52fa4ad Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 15 Apr 2021 16:44:55 -0700
Subject: mptcp: setsockopt: handle SO_KEEPALIVE and SO_PRIORITY

start with something simple: both take an integer value, both
need to be mirrored to all subflows.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/sockopt.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 106 insertions(+)

diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 27b49543fc58..9be4c94ff4d4 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -45,6 +45,90 @@ static u32 sockopt_seq_reset(const struct sock *sk)
 	return (u32)sk->sk_state << 24u;
 }
 
+static void sockopt_seq_inc(struct mptcp_sock *msk)
+{
+	u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff;
+
+	msk->setsockopt_seq = sockopt_seq_reset((struct sock *)msk) + seq;
+}
+
+static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval,
+				unsigned int optlen, int *val)
+{
+	if (optlen < sizeof(int))
+		return -EINVAL;
+
+	if (copy_from_sockptr(val, optval, sizeof(*val)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val)
+{
+	struct mptcp_subflow_context *subflow;
+	struct sock *sk = (struct sock *)msk;
+
+	lock_sock(sk);
+	sockopt_seq_inc(msk);
+
+	mptcp_for_each_subflow(msk, subflow) {
+		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+		bool slow = lock_sock_fast(ssk);
+
+		switch (optname) {
+		case SO_KEEPALIVE:
+			if (ssk->sk_prot->keepalive)
+				ssk->sk_prot->keepalive(ssk, !!val);
+			sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val);
+			break;
+		case SO_PRIORITY:
+			ssk->sk_priority = val;
+			break;
+		}
+
+		subflow->setsockopt_seq = msk->setsockopt_seq;
+		unlock_sock_fast(ssk, slow);
+	}
+
+	release_sock(sk);
+}
+
+static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val)
+{
+	sockptr_t optval = KERNEL_SOCKPTR(&val);
+	struct sock *sk = (struct sock *)msk;
+	int ret;
+
+	ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname,
+			      optval, sizeof(val));
+	if (ret)
+		return ret;
+
+	mptcp_sol_socket_sync_intval(msk, optname, val);
+	return 0;
+}
+
+static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname,
+					   sockptr_t optval, unsigned int optlen)
+{
+	int val, ret;
+
+	ret = mptcp_get_int_option(msk, optval, optlen, &val);
+	if (ret)
+		return ret;
+
+	switch (optname) {
+	case SO_KEEPALIVE:
+		mptcp_sol_socket_sync_intval(msk, optname, val);
+		return 0;
+	case SO_PRIORITY:
+		return mptcp_sol_socket_intval(msk, optname, val);
+	}
+
+	return -ENOPROTOOPT;
+}
+
 static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
 				       sockptr_t optval, unsigned int optlen)
 {
@@ -71,6 +155,9 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
 		}
 		release_sock(sk);
 		return ret;
+	case SO_KEEPALIVE:
+	case SO_PRIORITY:
+		return mptcp_setsockopt_sol_socket_int(msk, optname, optval, optlen);
 	}
 
 	return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen);
@@ -371,8 +458,27 @@ int mptcp_getsockopt(struct sock *sk, int level, int optname,
 	return -EOPNOTSUPP;
 }
 
+static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
+{
+	struct sock *sk = (struct sock *)msk;
+
+	if (ssk->sk_prot->keepalive) {
+		if (sock_flag(sk, SOCK_KEEPOPEN))
+			ssk->sk_prot->keepalive(ssk, 1);
+		else
+			ssk->sk_prot->keepalive(ssk, 0);
+	}
+
+	ssk->sk_priority = sk->sk_priority;
+}
+
 static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
 {
+	bool slow = lock_sock_fast(ssk);
+
+	sync_socket_options(msk, ssk);
+
+	unlock_sock_fast(ssk, slow);
 }
 
 void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
-- 
cgit v1.2.3


From 5d0a6bc82d38d773c20b44aa1b9f312c4294b594 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 15 Apr 2021 16:44:56 -0700
Subject: mptcp: setsockopt: handle receive/send buffer and device bind

Similar to previous patch: needs to be mirrored to all subflows.

Device bind is simpler: it is only done on the initial (listener) sk.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/sockopt.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 9be4c94ff4d4..bfb9db04d26b 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -85,6 +85,16 @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in
 		case SO_PRIORITY:
 			ssk->sk_priority = val;
 			break;
+		case SO_SNDBUF:
+		case SO_SNDBUFFORCE:
+			ssk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+			WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf);
+			break;
+		case SO_RCVBUF:
+		case SO_RCVBUFFORCE:
+			ssk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+			WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf);
+			break;
 		}
 
 		subflow->setsockopt_seq = msk->setsockopt_seq;
@@ -123,6 +133,10 @@ static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname,
 		mptcp_sol_socket_sync_intval(msk, optname, val);
 		return 0;
 	case SO_PRIORITY:
+	case SO_SNDBUF:
+	case SO_SNDBUFFORCE:
+	case SO_RCVBUF:
+	case SO_RCVBUFFORCE:
 		return mptcp_sol_socket_intval(msk, optname, val);
 	}
 
@@ -139,6 +153,8 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
 	switch (optname) {
 	case SO_REUSEPORT:
 	case SO_REUSEADDR:
+	case SO_BINDTODEVICE:
+	case SO_BINDTOIFINDEX:
 		lock_sock(sk);
 		ssock = __mptcp_nmpc_socket(msk);
 		if (!ssock) {
@@ -152,11 +168,19 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
 				sk->sk_reuseport = ssock->sk->sk_reuseport;
 			else if (optname == SO_REUSEADDR)
 				sk->sk_reuse = ssock->sk->sk_reuse;
+			else if (optname == SO_BINDTODEVICE)
+				sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if;
+			else if (optname == SO_BINDTOIFINDEX)
+				sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if;
 		}
 		release_sock(sk);
 		return ret;
 	case SO_KEEPALIVE:
 	case SO_PRIORITY:
+	case SO_SNDBUF:
+	case SO_SNDBUFFORCE:
+	case SO_RCVBUF:
+	case SO_RCVBUFFORCE:
 		return mptcp_setsockopt_sol_socket_int(msk, optname, optval, optlen);
 	}
 
@@ -460,6 +484,7 @@ int mptcp_getsockopt(struct sock *sk, int level, int optname,
 
 static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
 {
+	static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK;
 	struct sock *sk = (struct sock *)msk;
 
 	if (ssk->sk_prot->keepalive) {
@@ -470,6 +495,33 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
 	}
 
 	ssk->sk_priority = sk->sk_priority;
+	ssk->sk_bound_dev_if = sk->sk_bound_dev_if;
+	ssk->sk_incoming_cpu = sk->sk_incoming_cpu;
+
+	if (sk->sk_userlocks & tx_rx_locks) {
+		ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks;
+		if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
+			WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf);
+		if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
+			WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf);
+	}
+
+	if (sock_flag(sk, SOCK_LINGER)) {
+		ssk->sk_lingertime = sk->sk_lingertime;
+		sock_set_flag(ssk, SOCK_LINGER);
+	} else {
+		sock_reset_flag(ssk, SOCK_LINGER);
+	}
+
+	if (sk->sk_mark != ssk->sk_mark) {
+		ssk->sk_mark = sk->sk_mark;
+		sk_dst_reset(ssk);
+	}
+
+	sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG));
+
+	if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops)
+		tcp_set_congestion_control(ssk, inet_csk(sk)->icsk_ca_ops->name, false, true);
 }
 
 static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
-- 
cgit v1.2.3


From 268b1238746086f3608daa20b068182ddc2b0128 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 15 Apr 2021 16:44:57 -0700
Subject: mptcp: setsockopt: support SO_LINGER

Similar to PRIORITY/KEEPALIVE: needs to be mirrored to all subflows.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/sockopt.c | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index bfb9db04d26b..ee5d58747ce7 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -143,6 +143,47 @@ static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname,
 	return -ENOPROTOOPT;
 }
 
+static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval,
+					      unsigned int optlen)
+{
+	struct mptcp_subflow_context *subflow;
+	struct sock *sk = (struct sock *)msk;
+	struct linger ling;
+	sockptr_t kopt;
+	int ret;
+
+	if (optlen < sizeof(ling))
+		return -EINVAL;
+
+	if (copy_from_sockptr(&ling, optval, sizeof(ling)))
+		return -EFAULT;
+
+	kopt = KERNEL_SOCKPTR(&ling);
+	ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, SO_LINGER, kopt, sizeof(ling));
+	if (ret)
+		return ret;
+
+	lock_sock(sk);
+	sockopt_seq_inc(msk);
+	mptcp_for_each_subflow(msk, subflow) {
+		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+		bool slow = lock_sock_fast(ssk);
+
+		if (!ling.l_onoff) {
+			sock_reset_flag(ssk, SOCK_LINGER);
+		} else {
+			ssk->sk_lingertime = sk->sk_lingertime;
+			sock_set_flag(ssk, SOCK_LINGER);
+		}
+
+		subflow->setsockopt_seq = msk->setsockopt_seq;
+		unlock_sock_fast(ssk, slow);
+	}
+
+	release_sock(sk);
+	return 0;
+}
+
 static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
 				       sockptr_t optval, unsigned int optlen)
 {
@@ -182,6 +223,8 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
 	case SO_RCVBUF:
 	case SO_RCVBUFFORCE:
 		return mptcp_setsockopt_sol_socket_int(msk, optname, optval, optlen);
+	case SO_LINGER:
+		return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen);
 	}
 
 	return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen);
-- 
cgit v1.2.3


From 36704413db79127f6716ea402f85f85465fba165 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 15 Apr 2021 16:44:58 -0700
Subject: mptcp: setsockopt: add SO_MARK support

Value is synced to all subflows.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/sockopt.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index ee5d58747ce7..1ad6092811e5 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -95,6 +95,12 @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in
 			ssk->sk_userlocks |= SOCK_RCVBUF_LOCK;
 			WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf);
 			break;
+		case SO_MARK:
+			if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) {
+				ssk->sk_mark = sk->sk_mark;
+				sk_dst_reset(ssk);
+			}
+			break;
 		}
 
 		subflow->setsockopt_seq = msk->setsockopt_seq;
@@ -132,6 +138,7 @@ static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname,
 	case SO_KEEPALIVE:
 		mptcp_sol_socket_sync_intval(msk, optname, val);
 		return 0;
+	case SO_MARK:
 	case SO_PRIORITY:
 	case SO_SNDBUF:
 	case SO_SNDBUFFORCE:
@@ -222,6 +229,7 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
 	case SO_SNDBUFFORCE:
 	case SO_RCVBUF:
 	case SO_RCVBUFFORCE:
+	case SO_MARK:
 		return mptcp_setsockopt_sol_socket_int(msk, optname, optval, optlen);
 	case SO_LINGER:
 		return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen);
-- 
cgit v1.2.3


From 6f0d7198084c4096794ae58b9cf5d30c79eea222 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 15 Apr 2021 16:44:59 -0700
Subject: mptcp: setsockopt: add SO_INCOMING_CPU

Replicate to all subflows.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/sockopt.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 1ad6092811e5..7eb637488dc2 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -101,6 +101,9 @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in
 				sk_dst_reset(ssk);
 			}
 			break;
+		case SO_INCOMING_CPU:
+			WRITE_ONCE(ssk->sk_incoming_cpu, val);
+			break;
 		}
 
 		subflow->setsockopt_seq = msk->setsockopt_seq;
@@ -125,6 +128,15 @@ static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val)
 	return 0;
 }
 
+static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val)
+{
+	struct sock *sk = (struct sock *)msk;
+
+	WRITE_ONCE(sk->sk_incoming_cpu, val);
+
+	mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val);
+}
+
 static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname,
 					   sockptr_t optval, unsigned int optlen)
 {
@@ -145,6 +157,9 @@ static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname,
 	case SO_RCVBUF:
 	case SO_RCVBUFFORCE:
 		return mptcp_sol_socket_intval(msk, optname, val);
+	case SO_INCOMING_CPU:
+		mptcp_so_incoming_cpu(msk, val);
+		return 0;
 	}
 
 	return -ENOPROTOOPT;
@@ -230,6 +245,7 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
 	case SO_RCVBUF:
 	case SO_RCVBUFFORCE:
 	case SO_MARK:
+	case SO_INCOMING_CPU:
 		return mptcp_setsockopt_sol_socket_int(msk, optname, optval, optlen);
 	case SO_LINGER:
 		return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen);
-- 
cgit v1.2.3


From a03c99b253c232d7d305c9dd476b5b120841dff7 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 15 Apr 2021 16:45:00 -0700
Subject: mptcp: setsockopt: SO_DEBUG and no-op options

Handle SO_DEBUG and set it on all subflows.
Ignore those values not implemented on TCP sockets.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/sockopt.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 7eb637488dc2..390433b7f324 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -77,6 +77,9 @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in
 		bool slow = lock_sock_fast(ssk);
 
 		switch (optname) {
+		case SO_DEBUG:
+			sock_valbool_flag(ssk, SOCK_DBG, !!val);
+			break;
 		case SO_KEEPALIVE:
 			if (ssk->sk_prot->keepalive)
 				ssk->sk_prot->keepalive(ssk, !!val);
@@ -150,6 +153,7 @@ static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname,
 	case SO_KEEPALIVE:
 		mptcp_sol_socket_sync_intval(msk, optname, val);
 		return 0;
+	case SO_DEBUG:
 	case SO_MARK:
 	case SO_PRIORITY:
 	case SO_SNDBUF:
@@ -246,9 +250,21 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
 	case SO_RCVBUFFORCE:
 	case SO_MARK:
 	case SO_INCOMING_CPU:
+	case SO_DEBUG:
 		return mptcp_setsockopt_sol_socket_int(msk, optname, optval, optlen);
 	case SO_LINGER:
 		return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen);
+	case SO_NO_CHECK:
+	case SO_DONTROUTE:
+	case SO_BROADCAST:
+	case SO_BSDCOMPAT:
+	case SO_PASSCRED:
+	case SO_PASSSEC:
+	case SO_RXQ_OVFL:
+	case SO_WIFI_STATUS:
+	case SO_NOFCS:
+	case SO_SELECT_ERR_QUEUE:
+		return 0;
 	}
 
 	return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen);
-- 
cgit v1.2.3


From aa1fbd94e5c7d3a356058b4ee4a455d952dd48aa Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 15 Apr 2021 16:45:01 -0700
Subject: mptcp: sockopt: add TCP_CONGESTION and TCP_INFO

TCP_CONGESTION is set for all subflows.
The mptcp socket gains icsk_ca_ops too so it can be used to keep the
authoritative state that should be set on new/future subflows.

TCP_INFO will return first subflow only.
The out-of-tree kernel has a MPTCP_INFO getsockopt, this could be added
later on.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c |   5 +++
 net/mptcp/sockopt.c  | 101 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 106 insertions(+)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 5cba90948a7e..073e20078ed0 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2399,6 +2399,9 @@ static int __mptcp_init_sock(struct sock *sk)
 	/* re-use the csk retrans timer for MPTCP-level retrans */
 	timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
 	timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0);
+
+	tcp_assign_congestion_control(sk);
+
 	return 0;
 }
 
@@ -2592,6 +2595,8 @@ static void __mptcp_destroy_sock(struct sock *sk)
 	WARN_ON_ONCE(msk->rmem_released);
 	sk_stream_kill_queues(sk);
 	xfrm_sk_free_policy(sk);
+
+	tcp_cleanup_congestion_control(sk);
 	sk_refcnt_debug_release(sk);
 	mptcp_dispose_initial_subflow(msk);
 	sock_put(sk);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 390433b7f324..00d941b66c1e 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -510,6 +510,62 @@ static bool mptcp_supported_sockopt(int level, int optname)
 	return false;
 }
 
+static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval,
+					       unsigned int optlen)
+{
+	struct mptcp_subflow_context *subflow;
+	struct sock *sk = (struct sock *)msk;
+	char name[TCP_CA_NAME_MAX];
+	bool cap_net_admin;
+	int ret;
+
+	if (optlen < 1)
+		return -EINVAL;
+
+	ret = strncpy_from_sockptr(name, optval,
+				   min_t(long, TCP_CA_NAME_MAX - 1, optlen));
+	if (ret < 0)
+		return -EFAULT;
+
+	name[ret] = 0;
+
+	cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN);
+
+	ret = 0;
+	lock_sock(sk);
+	sockopt_seq_inc(msk);
+	mptcp_for_each_subflow(msk, subflow) {
+		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+		int err;
+
+		lock_sock(ssk);
+		err = tcp_set_congestion_control(ssk, name, true, cap_net_admin);
+		if (err < 0 && ret == 0)
+			ret = err;
+		subflow->setsockopt_seq = msk->setsockopt_seq;
+		release_sock(ssk);
+	}
+
+	if (ret == 0)
+		tcp_set_congestion_control(sk, name, false, cap_net_admin);
+
+	release_sock(sk);
+	return ret;
+}
+
+static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
+				    sockptr_t optval, unsigned int optlen)
+{
+	switch (optname) {
+	case TCP_ULP:
+		return -EOPNOTSUPP;
+	case TCP_CONGESTION:
+		return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen);
+	}
+
+	return -EOPNOTSUPP;
+}
+
 int mptcp_setsockopt(struct sock *sk, int level, int optname,
 		     sockptr_t optval, unsigned int optlen)
 {
@@ -539,6 +595,49 @@ int mptcp_setsockopt(struct sock *sk, int level, int optname,
 	if (level == SOL_IPV6)
 		return mptcp_setsockopt_v6(msk, optname, optval, optlen);
 
+	if (level == SOL_TCP)
+		return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen);
+
+	return -EOPNOTSUPP;
+}
+
+static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname,
+					  char __user *optval, int __user *optlen)
+{
+	struct sock *sk = (struct sock *)msk;
+	struct socket *ssock;
+	int ret = -EINVAL;
+	struct sock *ssk;
+
+	lock_sock(sk);
+	ssk = msk->first;
+	if (ssk) {
+		ret = tcp_getsockopt(ssk, level, optname, optval, optlen);
+		goto out;
+	}
+
+	ssock = __mptcp_nmpc_socket(msk);
+	if (!ssock)
+		goto out;
+
+	ret = tcp_getsockopt(ssock->sk, level, optname, optval, optlen);
+
+out:
+	release_sock(sk);
+	return ret;
+}
+
+static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
+				    char __user *optval, int __user *optlen)
+{
+	switch (optname) {
+	case TCP_ULP:
+	case TCP_CONGESTION:
+	case TCP_INFO:
+	case TCP_CC_INFO:
+		return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname,
+						      optval, optlen);
+	}
 	return -EOPNOTSUPP;
 }
 
@@ -562,6 +661,8 @@ int mptcp_getsockopt(struct sock *sk, int level, int optname,
 	if (ssk)
 		return tcp_getsockopt(ssk, level, optname, optval, option);
 
+	if (level == SOL_TCP)
+		return mptcp_getsockopt_sol_tcp(msk, optname, optval, option);
 	return -EOPNOTSUPP;
 }
 
-- 
cgit v1.2.3


From dc65fe82fb07e610e03a9b05bd445f46f93175f5 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 15 Apr 2021 16:45:02 -0700
Subject: selftests: mptcp: add packet mark test case

Extend mptcp_connect tool with SO_MARK support (-M <value>) and
add a test case that checks that the packet mark gets copied to all
subflows.

This is done by only allowing packets with either skb->mark 1 or 2
via iptables.

DROP rule packet counter is checked; if its not zero, print an error
message and fail the test case.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/Makefile         |   2 +-
 tools/testing/selftests/net/mptcp/mptcp_connect.c  |  23 +-
 tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 276 +++++++++++++++++++++
 3 files changed, 299 insertions(+), 2 deletions(-)
 create mode 100755 tools/testing/selftests/net/mptcp/mptcp_sockopt.sh

diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index 00bb158b4a5d..f1464f09b080 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -6,7 +6,7 @@ KSFT_KHDR_INSTALL := 1
 CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g  -I$(top_srcdir)/usr/include
 
 TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
-	      simult_flows.sh
+	      simult_flows.sh mptcp_sockopt.sh
 
 TEST_GEN_FILES = mptcp_connect pm_nl_ctl
 
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 69d89b5d666f..2f207cf33661 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -57,6 +57,7 @@ static bool cfg_join;
 static bool cfg_remove;
 static unsigned int cfg_do_w;
 static int cfg_wait;
+static uint32_t cfg_mark;
 
 static void die_usage(void)
 {
@@ -69,6 +70,7 @@ static void die_usage(void)
 	fprintf(stderr, "\t-p num -- use port num\n");
 	fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n");
 	fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n");
+	fprintf(stderr, "\t-M mark -- set socket packet mark\n");
 	fprintf(stderr, "\t-u -- check mptcp ulp\n");
 	fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
 	exit(1);
@@ -140,6 +142,17 @@ static void set_sndbuf(int fd, unsigned int size)
 	}
 }
 
+static void set_mark(int fd, uint32_t mark)
+{
+	int err;
+
+	err = setsockopt(fd, SOL_SOCKET, SO_MARK, &mark, sizeof(mark));
+	if (err) {
+		perror("set SO_MARK");
+		exit(1);
+	}
+}
+
 static int sock_listen_mptcp(const char * const listenaddr,
 			     const char * const port)
 {
@@ -248,6 +261,9 @@ static int sock_connect_mptcp(const char * const remoteaddr,
 			continue;
 		}
 
+		if (cfg_mark)
+			set_mark(sock, cfg_mark);
+
 		if (connect(sock, a->ai_addr, a->ai_addrlen) == 0)
 			break; /* success */
 
@@ -830,7 +846,7 @@ static void parse_opts(int argc, char **argv)
 {
 	int c;
 
-	while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:")) != -1) {
+	while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:M:")) != -1) {
 		switch (c) {
 		case 'j':
 			cfg_join = true;
@@ -880,6 +896,9 @@ static void parse_opts(int argc, char **argv)
 		case 'w':
 			cfg_wait = atoi(optarg)*1000000;
 			break;
+		case 'M':
+			cfg_mark = strtol(optarg, NULL, 0);
+			break;
 		}
 	}
 
@@ -911,6 +930,8 @@ int main(int argc, char *argv[])
 			set_rcvbuf(fd, cfg_rcvbuf);
 		if (cfg_sndbuf)
 			set_sndbuf(fd, cfg_sndbuf);
+		if (cfg_mark)
+			set_mark(fd, cfg_mark);
 
 		return main_loop_s(fd);
 	}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
new file mode 100755
index 000000000000..2fa13946ac04
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -0,0 +1,276 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ret=0
+sin=""
+sout=""
+cin=""
+cout=""
+ksft_skip=4
+timeout_poll=30
+timeout_test=$((timeout_poll * 2 + 1))
+mptcp_connect=""
+do_all_tests=1
+
+add_mark_rules()
+{
+	local ns=$1
+	local m=$2
+
+	for t in iptables ip6tables; do
+		# just to debug: check we have multiple subflows connection requests
+		ip netns exec $ns $t -A OUTPUT -p tcp --syn -m mark --mark $m -j ACCEPT
+
+		# RST packets might be handled by a internal dummy socket
+		ip netns exec $ns $t -A OUTPUT -p tcp --tcp-flags RST RST -m mark --mark 0 -j ACCEPT
+
+		ip netns exec $ns $t -A OUTPUT -p tcp -m mark --mark $m -j ACCEPT
+		ip netns exec $ns $t -A OUTPUT -p tcp -m mark --mark 0 -j DROP
+	done
+}
+
+init()
+{
+	rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
+
+	ns1="ns1-$rndh"
+	ns2="ns2-$rndh"
+
+	for netns in "$ns1" "$ns2";do
+		ip netns add $netns || exit $ksft_skip
+		ip -net $netns link set lo up
+		ip netns exec $netns sysctl -q net.mptcp.enabled=1
+		ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0
+		ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0
+	done
+
+	for i in `seq 1 4`; do
+		ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2"
+		ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i
+		ip -net "$ns1" addr add dead:beef:$i::1/64 dev ns1eth$i nodad
+		ip -net "$ns1" link set ns1eth$i up
+
+		ip -net "$ns2" addr add 10.0.$i.2/24 dev ns2eth$i
+		ip -net "$ns2" addr add dead:beef:$i::2/64 dev ns2eth$i nodad
+		ip -net "$ns2" link set ns2eth$i up
+
+		# let $ns2 reach any $ns1 address from any interface
+		ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i
+
+		ip netns exec $ns1 ./pm_nl_ctl add 10.0.$i.1 flags signal
+		ip netns exec $ns1 ./pm_nl_ctl add dead:beef:$i::1 flags signal
+
+		ip netns exec $ns2 ./pm_nl_ctl add 10.0.$i.2 flags signal
+		ip netns exec $ns2 ./pm_nl_ctl add dead:beef:$i::2 flags signal
+	done
+
+	ip netns exec $ns1 ./pm_nl_ctl limits 8 8
+	ip netns exec $ns2 ./pm_nl_ctl limits 8 8
+
+	add_mark_rules $ns1 1
+	add_mark_rules $ns2 2
+}
+
+cleanup()
+{
+	for netns in "$ns1" "$ns2"; do
+		ip netns del $netns
+	done
+	rm -f "$cin" "$cout"
+	rm -f "$sin" "$sout"
+}
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+iptables -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run all tests without iptables tool"
+	exit $ksft_skip
+fi
+
+ip6tables -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run all tests without ip6tables tool"
+	exit $ksft_skip
+fi
+
+check_mark()
+{
+	local ns=$1
+	local af=$2
+
+	tables=iptables
+
+	if [ $af -eq 6 ];then
+		tables=ip6tables
+	fi
+
+	counters=$(ip netns exec $ns $tables -v -L OUTPUT | grep DROP)
+	values=${counters%DROP*}
+
+	for v in $values; do
+		if [ $v -ne 0 ]; then
+			echo "FAIL: got $tables $values in ns $ns , not 0 - not all expected packets marked" 1>&2
+			return 1
+		fi
+	done
+
+	return 0
+}
+
+print_file_err()
+{
+	ls -l "$1" 1>&2
+	echo "Trailing bytes are: "
+	tail -c 27 "$1"
+}
+
+check_transfer()
+{
+	in=$1
+	out=$2
+	what=$3
+
+	cmp "$in" "$out" > /dev/null 2>&1
+	if [ $? -ne 0 ] ;then
+		echo "[ FAIL ] $what does not match (in, out):"
+		print_file_err "$in"
+		print_file_err "$out"
+		ret=1
+
+		return 1
+	fi
+
+	return 0
+}
+
+# $1: IP address
+is_v6()
+{
+	[ -z "${1##*:*}" ]
+}
+
+do_transfer()
+{
+	listener_ns="$1"
+	connector_ns="$2"
+	cl_proto="$3"
+	srv_proto="$4"
+	connect_addr="$5"
+
+	port=12001
+
+	:> "$cout"
+	:> "$sout"
+
+	mptcp_connect="./mptcp_connect -r 20"
+
+	local local_addr
+	if is_v6 "${connect_addr}"; then
+		local_addr="::"
+	else
+		local_addr="0.0.0.0"
+	fi
+
+	timeout ${timeout_test} \
+		ip netns exec ${listener_ns} \
+			$mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} \
+				${local_addr} < "$sin" > "$sout" &
+	spid=$!
+
+	sleep 1
+
+	timeout ${timeout_test} \
+		ip netns exec ${connector_ns} \
+			$mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} \
+				$connect_addr < "$cin" > "$cout" &
+
+	cpid=$!
+
+	wait $cpid
+	retc=$?
+	wait $spid
+	rets=$?
+
+	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+		echo " client exit code $retc, server $rets" 1>&2
+		echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
+		ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
+
+		echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
+		ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port"
+
+		ret=1
+		return 1
+	fi
+
+	if [ $local_addr = "::" ];then
+		check_mark $listener_ns 6
+		check_mark $connector_ns 6
+	else
+		check_mark $listener_ns 4
+		check_mark $connector_ns 4
+	fi
+
+	check_transfer $cin $sout "file received by server"
+
+	rets=$?
+
+	if [ $retc -eq 0 ] && [ $rets -eq 0 ];then
+		return 0
+	fi
+
+	return 1
+}
+
+make_file()
+{
+	name=$1
+	who=$2
+	size=$3
+
+	dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null
+	echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
+
+	echo "Created $name (size $size KB) containing data sent by $who"
+}
+
+run_tests()
+{
+	listener_ns="$1"
+	connector_ns="$2"
+	connect_addr="$3"
+	lret=0
+
+	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr}
+
+	lret=$?
+
+	if [ $lret -ne 0 ]; then
+		ret=$lret
+		return
+	fi
+}
+
+sin=$(mktemp)
+sout=$(mktemp)
+cin=$(mktemp)
+cout=$(mktemp)
+init
+make_file "$cin" "client" 1
+make_file "$sin" "server" 1
+trap cleanup EXIT
+
+run_tests $ns1 $ns2 10.0.1.1
+run_tests $ns1 $ns2 dead:beef:1::1
+
+
+if [ $ret -eq 0 ];then
+	echo "PASS: all packets had packet mark set"
+fi
+
+exit $ret
-- 
cgit v1.2.3


From c5d66587b8900201e1530b7c18d41e87bd5812f4 Mon Sep 17 00:00:00 2001
From: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Date: Thu, 15 Apr 2021 17:37:48 -0700
Subject: net: ethernet: mediatek: ppe: fix busy wait loop

The intention is for the loop to timeout if the body does not succeed.
The current logic calls time_is_before_jiffies(timeout) which is false
until after the timeout, so the loop body never executes.

Fix by using readl_poll_timeout as a more standard and less error-prone
solution.

Fixes: ba37b7caf1ed ("net: ethernet: mtk_eth_soc: add support for initializing the PPE")
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Cc: Felix Fietkau <nbd@nbd.name>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_ppe.c | 20 +++++++++-----------
 drivers/net/ethernet/mediatek/mtk_ppe.h |  1 +
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
index 71e1ccea6e72..3ad10c793308 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
@@ -2,9 +2,8 @@
 /* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
 
 #include <linux/kernel.h>
-#include <linux/jiffies.h>
-#include <linux/delay.h>
 #include <linux/io.h>
+#include <linux/iopoll.h>
 #include <linux/etherdevice.h>
 #include <linux/platform_device.h>
 #include "mtk_ppe.h"
@@ -44,18 +43,17 @@ static u32 ppe_clear(struct mtk_ppe *ppe, u32 reg, u32 val)
 
 static int mtk_ppe_wait_busy(struct mtk_ppe *ppe)
 {
-	unsigned long timeout = jiffies + HZ;
-
-	while (time_is_before_jiffies(timeout)) {
-		if (!(ppe_r32(ppe, MTK_PPE_GLO_CFG) & MTK_PPE_GLO_CFG_BUSY))
-			return 0;
+	int ret;
+	u32 val;
 
-		usleep_range(10, 20);
-	}
+	ret = readl_poll_timeout(ppe->base + MTK_PPE_GLO_CFG, val,
+				 !(val & MTK_PPE_GLO_CFG_BUSY),
+				 20, MTK_PPE_WAIT_TIMEOUT_US);
 
-	dev_err(ppe->dev, "PPE table busy");
+	if (ret)
+		dev_err(ppe->dev, "PPE table busy");
 
-	return -ETIMEDOUT;
+	return ret;
 }
 
 static void mtk_ppe_cache_clear(struct mtk_ppe *ppe)
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h
index 51bd5e75bbbd..242fb8f2ae65 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe.h
+++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
@@ -12,6 +12,7 @@
 #define MTK_PPE_ENTRIES_SHIFT		3
 #define MTK_PPE_ENTRIES			(1024 << MTK_PPE_ENTRIES_SHIFT)
 #define MTK_PPE_HASH_MASK		(MTK_PPE_ENTRIES - 1)
+#define MTK_PPE_WAIT_TIMEOUT_US		1000000
 
 #define MTK_FOE_IB1_UNBIND_TIMESTAMP	GENMASK(7, 0)
 #define MTK_FOE_IB1_UNBIND_PACKETS	GENMASK(23, 8)
-- 
cgit v1.2.3


From 5133bcc7481528e36fff0a3b056601efb704fb32 Mon Sep 17 00:00:00 2001
From: Hayes Wang <hayeswang@realtek.com>
Date: Fri, 16 Apr 2021 16:04:32 +0800
Subject: r8152: set inter fram gap time depending on speed

Set the maximum inter frame gap time (144ns) for speed 10M/half and
100M/half. It improves the performance for those speeds. And, there
is no effect for the other speeds.

For 10M/half and 100M/half, the fast inter frame gap time let the
device couldn't use the feature of the aggregation effectively,
because the transfer would be completed fastly. Therefore, use the
maximum value to improve the effect of the aggregation. However, you
may not feel the improvement for fast CPUs, because they compensate
for the effect of the aggregation.

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 20fb5638ac65..10db48f4ed77 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -249,6 +249,9 @@
 
 /* PLA_TCR1 */
 #define VERSION_MASK		0x7cf0
+#define IFG_MASK		(BIT(3) | BIT(9) | BIT(8))
+#define IFG_144NS		BIT(9)
+#define IFG_96NS		(BIT(9) | BIT(8))
 
 /* PLA_MTPS */
 #define MTPS_JUMBO		(12 * 1024 / 64)
@@ -2747,6 +2750,29 @@ static int rtl_stop_rx(struct r8152 *tp)
 	return 0;
 }
 
+static void rtl_set_ifg(struct r8152 *tp, u16 speed)
+{
+	u32 ocp_data;
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_TCR1);
+	ocp_data &= ~IFG_MASK;
+	if ((speed & (_10bps | _100bps)) && !(speed & FULL_DUP)) {
+		ocp_data |= IFG_144NS;
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_TCR1, ocp_data);
+
+		ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4);
+		ocp_data &= ~TX10MIDLE_EN;
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, ocp_data);
+	} else {
+		ocp_data |= IFG_96NS;
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_TCR1, ocp_data);
+
+		ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4);
+		ocp_data |= TX10MIDLE_EN;
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, ocp_data);
+	}
+}
+
 static inline void r8153b_rx_agg_chg_indicate(struct r8152 *tp)
 {
 	ocp_write_byte(tp, MCU_TYPE_USB, USB_UPT_RXDMA_OWN,
@@ -2850,6 +2876,8 @@ static int rtl8153_enable(struct r8152 *tp)
 	r8153_set_rx_early_timeout(tp);
 	r8153_set_rx_early_size(tp);
 
+	rtl_set_ifg(tp, rtl8152_get_speed(tp));
+
 	if (tp->version == RTL_VER_09) {
 		u32 ocp_data;
 
-- 
cgit v1.2.3


From a8a7be178e81a3d4b6972cbeb0ccd091ca2f9f89 Mon Sep 17 00:00:00 2001
From: Hayes Wang <hayeswang@realtek.com>
Date: Fri, 16 Apr 2021 16:04:33 +0800
Subject: r8152: adjust rtl8152_check_firmware function

Use bits operations to record and check the firmware.

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 51 ++++++++++++++++++++++++++++---------------------
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 10db48f4ed77..28c9b4dc1a60 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -874,6 +874,14 @@ struct fw_header {
 	struct fw_block blocks[];
 } __packed;
 
+enum rtl8152_fw_flags {
+	FW_FLAGS_USB = 0,
+	FW_FLAGS_PLA,
+	FW_FLAGS_START,
+	FW_FLAGS_STOP,
+	FW_FLAGS_NC,
+};
+
 /**
  * struct fw_mac - a firmware block used by RTL_FW_PLA and RTL_FW_USB.
  *	The layout of the firmware block is:
@@ -3800,10 +3808,7 @@ static long rtl8152_check_firmware(struct r8152 *tp, struct rtl_fw *rtl_fw)
 {
 	const struct firmware *fw = rtl_fw->fw;
 	struct fw_header *fw_hdr = (struct fw_header *)fw->data;
-	struct fw_mac *pla = NULL, *usb = NULL;
-	struct fw_phy_patch_key *start = NULL;
-	struct fw_phy_nc *phy_nc = NULL;
-	struct fw_block *stop = NULL;
+	unsigned long fw_flags = 0;
 	long ret = -EFAULT;
 	int i;
 
@@ -3832,50 +3837,52 @@ static long rtl8152_check_firmware(struct r8152 *tp, struct rtl_fw *rtl_fw)
 				goto fail;
 			goto fw_end;
 		case RTL_FW_PLA:
-			if (pla) {
+			if (test_bit(FW_FLAGS_PLA, &fw_flags)) {
 				dev_err(&tp->intf->dev,
 					"multiple PLA firmware encountered");
 				goto fail;
 			}
 
-			pla = (struct fw_mac *)block;
-			if (!rtl8152_is_fw_mac_ok(tp, pla)) {
+			if (!rtl8152_is_fw_mac_ok(tp, (struct fw_mac *)block)) {
 				dev_err(&tp->intf->dev,
 					"check PLA firmware failed\n");
 				goto fail;
 			}
+			__set_bit(FW_FLAGS_PLA, &fw_flags);
 			break;
 		case RTL_FW_USB:
-			if (usb) {
+			if (test_bit(FW_FLAGS_USB, &fw_flags)) {
 				dev_err(&tp->intf->dev,
 					"multiple USB firmware encountered");
 				goto fail;
 			}
 
-			usb = (struct fw_mac *)block;
-			if (!rtl8152_is_fw_mac_ok(tp, usb)) {
+			if (!rtl8152_is_fw_mac_ok(tp, (struct fw_mac *)block)) {
 				dev_err(&tp->intf->dev,
 					"check USB firmware failed\n");
 				goto fail;
 			}
+			__set_bit(FW_FLAGS_USB, &fw_flags);
 			break;
 		case RTL_FW_PHY_START:
-			if (start || phy_nc || stop) {
+			if (test_bit(FW_FLAGS_START, &fw_flags) ||
+			    test_bit(FW_FLAGS_NC, &fw_flags) ||
+			    test_bit(FW_FLAGS_STOP, &fw_flags)) {
 				dev_err(&tp->intf->dev,
 					"check PHY_START fail\n");
 				goto fail;
 			}
 
-			if (__le32_to_cpu(block->length) != sizeof(*start)) {
+			if (__le32_to_cpu(block->length) != sizeof(struct fw_phy_patch_key)) {
 				dev_err(&tp->intf->dev,
 					"Invalid length for PHY_START\n");
 				goto fail;
 			}
-
-			start = (struct fw_phy_patch_key *)block;
+			__set_bit(FW_FLAGS_START, &fw_flags);
 			break;
 		case RTL_FW_PHY_STOP:
-			if (stop || !start) {
+			if (test_bit(FW_FLAGS_STOP, &fw_flags) ||
+			    !test_bit(FW_FLAGS_START, &fw_flags)) {
 				dev_err(&tp->intf->dev,
 					"Check PHY_STOP fail\n");
 				goto fail;
@@ -3886,28 +3893,28 @@ static long rtl8152_check_firmware(struct r8152 *tp, struct rtl_fw *rtl_fw)
 					"Invalid length for PHY_STOP\n");
 				goto fail;
 			}
-
-			stop = block;
+			__set_bit(FW_FLAGS_STOP, &fw_flags);
 			break;
 		case RTL_FW_PHY_NC:
-			if (!start || stop) {
+			if (!test_bit(FW_FLAGS_START, &fw_flags) ||
+			    test_bit(FW_FLAGS_STOP, &fw_flags)) {
 				dev_err(&tp->intf->dev,
 					"check PHY_NC fail\n");
 				goto fail;
 			}
 
-			if (phy_nc) {
+			if (test_bit(FW_FLAGS_NC, &fw_flags)) {
 				dev_err(&tp->intf->dev,
 					"multiple PHY NC encountered\n");
 				goto fail;
 			}
 
-			phy_nc = (struct fw_phy_nc *)block;
-			if (!rtl8152_is_fw_phy_nc_ok(tp, phy_nc)) {
+			if (!rtl8152_is_fw_phy_nc_ok(tp, (struct fw_phy_nc *)block)) {
 				dev_err(&tp->intf->dev,
 					"check PHY NC firmware failed\n");
 				goto fail;
 			}
+			__set_bit(FW_FLAGS_NC, &fw_flags);
 
 			break;
 		default:
@@ -3921,7 +3928,7 @@ static long rtl8152_check_firmware(struct r8152 *tp, struct rtl_fw *rtl_fw)
 	}
 
 fw_end:
-	if ((phy_nc || start) && !stop) {
+	if (test_bit(FW_FLAGS_START, &fw_flags) && !test_bit(FW_FLAGS_STOP, &fw_flags)) {
 		dev_err(&tp->intf->dev, "without PHY_STOP\n");
 		goto fail;
 	}
-- 
cgit v1.2.3


From 67ce1a806f164e59a074fea8809725d3411eaa20 Mon Sep 17 00:00:00 2001
From: Hayes Wang <hayeswang@realtek.com>
Date: Fri, 16 Apr 2021 16:04:34 +0800
Subject: r8152: add help function to change mtu

The different chips may have different requests when changing mtu.
Therefore, add a new help function of rtl_ops to change mtu. Besides,
reset the tx/rx after changing mtu.

Additionally, add mtu_to_size() and size_to_mtu() macros to simplify
the code.

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 53 +++++++++++++++++++++++++++++--------------------
 1 file changed, 31 insertions(+), 22 deletions(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 28c9b4dc1a60..3f465242a4f0 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -657,15 +657,13 @@ enum rtl_register_content {
 
 #define INTR_LINK		0x0004
 
-#define RTL8153_MAX_PACKET	9216 /* 9K */
-#define RTL8153_MAX_MTU		(RTL8153_MAX_PACKET - VLAN_ETH_HLEN - \
-				 ETH_FCS_LEN)
 #define RTL8152_RMS		(VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
 #define RTL8153_RMS		RTL8153_MAX_PACKET
 #define RTL8152_TX_TIMEOUT	(5 * HZ)
 #define RTL8152_NAPI_WEIGHT	64
-#define rx_reserved_size(x)	((x) + VLAN_ETH_HLEN + ETH_FCS_LEN + \
-				 sizeof(struct rx_desc) + RX_ALIGN)
+#define mtu_to_size(m)		((m) + VLAN_ETH_HLEN + ETH_FCS_LEN)
+#define size_to_mtu(s)		((s) - VLAN_ETH_HLEN - ETH_FCS_LEN)
+#define rx_reserved_size(x)	(mtu_to_size(x) + sizeof(struct rx_desc) + RX_ALIGN)
 
 /* rtl8152 flags */
 enum rtl8152_flags {
@@ -795,6 +793,7 @@ struct r8152 {
 		bool (*in_nway)(struct r8152 *tp);
 		void (*hw_phy_cfg)(struct r8152 *tp);
 		void (*autosuspend_en)(struct r8152 *tp, bool enable);
+		void (*change_mtu)(struct r8152 *tp);
 	} rtl_ops;
 
 	struct ups_info {
@@ -1021,8 +1020,7 @@ enum tx_csum_stat {
 static const int multicast_filter_limit = 32;
 static unsigned int agg_buf_sz = 16384;
 
-#define RTL_LIMITED_TSO_SIZE	(agg_buf_sz - sizeof(struct tx_desc) - \
-				 VLAN_ETH_HLEN - ETH_FCS_LEN)
+#define RTL_LIMITED_TSO_SIZE	(size_to_mtu(agg_buf_sz) - sizeof(struct tx_desc))
 
 static
 int get_registers(struct r8152 *tp, u16 value, u16 index, u16 size, void *data)
@@ -2632,10 +2630,7 @@ static void rtl8152_nic_reset(struct r8152 *tp)
 
 static void set_tx_qlen(struct r8152 *tp)
 {
-	struct net_device *netdev = tp->netdev;
-
-	tp->tx_qlen = agg_buf_sz / (netdev->mtu + VLAN_ETH_HLEN + ETH_FCS_LEN +
-				    sizeof(struct tx_desc));
+	tp->tx_qlen = agg_buf_sz / (mtu_to_size(tp->netdev->mtu) + sizeof(struct tx_desc));
 }
 
 static inline u8 rtl8152_get_speed(struct r8152 *tp)
@@ -4724,6 +4719,12 @@ static void r8153b_hw_phy_cfg(struct r8152 *tp)
 	set_bit(PHY_RESET, &tp->flags);
 }
 
+static void rtl8153_change_mtu(struct r8152 *tp)
+{
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, mtu_to_size(tp->netdev->mtu));
+	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, MTPS_JUMBO);
+}
+
 static void r8153_first_init(struct r8152 *tp)
 {
 	u32 ocp_data;
@@ -4756,9 +4757,7 @@ static void r8153_first_init(struct r8152 *tp)
 
 	rtl_rx_vlan_en(tp, tp->netdev->features & NETIF_F_HW_VLAN_CTAG_RX);
 
-	ocp_data = tp->netdev->mtu + VLAN_ETH_HLEN + ETH_FCS_LEN;
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, ocp_data);
-	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, MTPS_JUMBO);
+	rtl8153_change_mtu(tp);
 
 	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_TCR0);
 	ocp_data |= TCR0_AUTO_FIFO;
@@ -4793,8 +4792,7 @@ static void r8153_enter_oob(struct r8152 *tp)
 
 	wait_oob_link_list_ready(tp);
 
-	ocp_data = tp->netdev->mtu + VLAN_ETH_HLEN + ETH_FCS_LEN;
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, ocp_data);
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, mtu_to_size(tp->netdev->mtu));
 
 	switch (tp->version) {
 	case RTL_VER_03:
@@ -6494,12 +6492,21 @@ static int rtl8152_change_mtu(struct net_device *dev, int new_mtu)
 	dev->mtu = new_mtu;
 
 	if (netif_running(dev)) {
-		u32 rms = new_mtu + VLAN_ETH_HLEN + ETH_FCS_LEN;
-
-		ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, rms);
+		if (tp->rtl_ops.change_mtu)
+			tp->rtl_ops.change_mtu(tp);
 
-		if (netif_carrier_ok(dev))
-			r8153_set_rx_early_size(tp);
+		if (netif_carrier_ok(dev)) {
+			netif_stop_queue(dev);
+			napi_disable(&tp->napi);
+			tasklet_disable(&tp->tx_tl);
+			tp->rtl_ops.disable(tp);
+			tp->rtl_ops.enable(tp);
+			rtl_start_rx(tp);
+			tasklet_enable(&tp->tx_tl);
+			napi_enable(&tp->napi);
+			rtl8152_set_rx_mode(dev);
+			netif_wake_queue(dev);
+		}
 	}
 
 	mutex_unlock(&tp->control);
@@ -6588,6 +6595,7 @@ static int rtl_ops_init(struct r8152 *tp)
 		ops->in_nway		= rtl8153_in_nway;
 		ops->hw_phy_cfg		= r8153_hw_phy_cfg;
 		ops->autosuspend_en	= rtl8153_runtime_enable;
+		ops->change_mtu		= rtl8153_change_mtu;
 		if (tp->udev->speed < USB_SPEED_SUPER)
 			tp->rx_buf_sz	= 16 * 1024;
 		else
@@ -6609,6 +6617,7 @@ static int rtl_ops_init(struct r8152 *tp)
 		ops->in_nway		= rtl8153_in_nway;
 		ops->hw_phy_cfg		= r8153b_hw_phy_cfg;
 		ops->autosuspend_en	= rtl8153b_runtime_enable;
+		ops->change_mtu		= rtl8153_change_mtu;
 		tp->rx_buf_sz		= 32 * 1024;
 		tp->eee_en		= true;
 		tp->eee_adv		= MDIO_EEE_1000T | MDIO_EEE_100TX;
@@ -6829,7 +6838,7 @@ static int rtl8152_probe(struct usb_interface *intf,
 		netdev->max_mtu = ETH_DATA_LEN;
 		break;
 	default:
-		netdev->max_mtu = RTL8153_MAX_MTU;
+		netdev->max_mtu = size_to_mtu(9 * 1024);
 		break;
 	}
 
-- 
cgit v1.2.3


From 195aae321c829dd1945900d75561e6aa79cce208 Mon Sep 17 00:00:00 2001
From: Hayes Wang <hayeswang@realtek.com>
Date: Fri, 16 Apr 2021 16:04:35 +0800
Subject: r8152: support new chips

Support RTL8153C, RTL8153D, RTL8156A, and RTL8156B. The RTL8156A
and RTL8156B are the 2.5G ethernet.

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 2634 ++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 2359 insertions(+), 275 deletions(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 3f465242a4f0..72b8ef0ad5a1 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -43,10 +43,14 @@
 
 #define PLA_IDR			0xc000
 #define PLA_RCR			0xc010
+#define PLA_RCR1		0xc012
 #define PLA_RMS			0xc016
 #define PLA_RXFIFO_CTRL0	0xc0a0
+#define PLA_RXFIFO_FULL		0xc0a2
 #define PLA_RXFIFO_CTRL1	0xc0a4
+#define PLA_RX_FIFO_FULL	0xc0a6
 #define PLA_RXFIFO_CTRL2	0xc0a8
+#define PLA_RX_FIFO_EMPTY	0xc0aa
 #define PLA_DMY_REG0		0xc0b0
 #define PLA_FMC			0xc0b4
 #define PLA_CFG_WOL		0xc0b6
@@ -63,6 +67,8 @@
 #define PLA_MACDBG_PRE		0xd38c	/* RTL_VER_04 only */
 #define PLA_MACDBG_POST		0xd38e	/* RTL_VER_04 only */
 #define PLA_EXTRA_STATUS	0xd398
+#define PLA_GPHY_CTRL		0xd3ae
+#define PLA_POL_GPIO_CTRL	0xdc6a
 #define PLA_EFUSE_DATA		0xdd00
 #define PLA_EFUSE_CMD		0xdd02
 #define PLA_LEDSEL		0xdd90
@@ -72,6 +78,8 @@
 #define PLA_LWAKE_CTRL_REG	0xe007
 #define PLA_GPHY_INTR_IMR	0xe022
 #define PLA_EEE_CR		0xe040
+#define PLA_EEE_TXTWSYS		0xe04c
+#define PLA_EEE_TXTWSYS_2P5G	0xe058
 #define PLA_EEEP_CR		0xe080
 #define PLA_MAC_PWR_CTRL	0xe0c0
 #define PLA_MAC_PWR_CTRL2	0xe0ca
@@ -82,6 +90,7 @@
 #define PLA_TCR1		0xe612
 #define PLA_MTPS		0xe615
 #define PLA_TXFIFO_CTRL		0xe618
+#define PLA_TXFIFO_FULL		0xe61a
 #define PLA_RSTTALLY		0xe800
 #define PLA_CR			0xe813
 #define PLA_CRWECR		0xe81c
@@ -98,6 +107,7 @@
 #define PLA_SFF_STS_7		0xe8de
 #define PLA_PHYSTATUS		0xe908
 #define PLA_CONFIG6		0xe90a /* CONFIG6 */
+#define PLA_USB_CFG		0xe952
 #define PLA_BP_BA		0xfc26
 #define PLA_BP_0		0xfc28
 #define PLA_BP_1		0xfc2a
@@ -112,6 +122,7 @@
 #define USB_USB2PHY		0xb41e
 #define USB_SSPHYLINK1		0xb426
 #define USB_SSPHYLINK2		0xb428
+#define USB_L1_CTRL		0xb45e
 #define USB_U2P3_CTRL		0xb460
 #define USB_CSR_DUMMY1		0xb464
 #define USB_CSR_DUMMY2		0xb466
@@ -122,7 +133,12 @@
 #define USB_FW_FIX_EN0		0xcfca
 #define USB_FW_FIX_EN1		0xcfcc
 #define USB_LPM_CONFIG		0xcfd8
+#define USB_ECM_OPTION		0xcfee
 #define USB_CSTMR		0xcfef	/* RTL8153A */
+#define USB_MISC_2		0xcfff
+#define USB_ECM_OP		0xd26b
+#define USB_GPHY_CTRL		0xd284
+#define USB_SPEED_OPTION	0xd32a
 #define USB_FW_CTRL		0xd334	/* RTL8153B */
 #define USB_FC_TIMER		0xd340
 #define USB_USB_CTRL		0xd406
@@ -136,16 +152,20 @@
 #define USB_RX_EXTRA_AGGR_TMR	0xd432	/* RTL8153B */
 #define USB_TX_DMA		0xd434
 #define USB_UPT_RXDMA_OWN	0xd437
+#define USB_UPHY3_MDCMDIO	0xd480
 #define USB_TOLERANCE		0xd490
 #define USB_LPM_CTRL		0xd41a
 #define USB_BMU_RESET		0xd4b0
+#define USB_BMU_CONFIG		0xd4b4
 #define USB_U1U2_TIMER		0xd4da
 #define USB_FW_TASK		0xd4e8	/* RTL8153B */
+#define USB_RX_AGGR_NUM		0xd4ee
 #define USB_UPS_CTRL		0xd800
 #define USB_POWER_CUT		0xd80a
 #define USB_MISC_0		0xd81a
 #define USB_MISC_1		0xd81f
 #define USB_AFE_CTRL2		0xd824
+#define USB_UPHY_XTAL		0xd826
 #define USB_UPS_CFG		0xd842
 #define USB_UPS_FLAGS		0xd848
 #define USB_WDT1_CTRL		0xe404
@@ -188,6 +208,9 @@
 #define OCP_EEE_ABLE		0xa5c4
 #define OCP_EEE_ADV		0xa5d0
 #define OCP_EEE_LPABLE		0xa5d2
+#define OCP_10GBT_CTRL		0xa5d4
+#define OCP_10GBT_STAT		0xa5d6
+#define OCP_EEE_ADV2		0xa6d4
 #define OCP_PHY_STATE		0xa708		/* nway state for 8153 */
 #define OCP_PHY_PATCH_STAT	0xb800
 #define OCP_PHY_PATCH_CMD	0xb820
@@ -199,6 +222,7 @@
 /* SRAM Register */
 #define SRAM_GREEN_CFG		0x8011
 #define SRAM_LPF_CFG		0x8012
+#define SRAM_GPHY_FW_VER	0x801e
 #define SRAM_10M_AMP1		0x8080
 #define SRAM_10M_AMP2		0x8082
 #define SRAM_IMPEDANCE		0x8084
@@ -210,11 +234,19 @@
 #define RCR_AM			0x00000004
 #define RCR_AB			0x00000008
 #define RCR_ACPT_ALL		(RCR_AAP | RCR_APM | RCR_AM | RCR_AB)
+#define SLOT_EN			BIT(11)
+
+/* PLA_RCR1 */
+#define OUTER_VLAN		BIT(7)
+#define INNER_VLAN		BIT(6)
 
 /* PLA_RXFIFO_CTRL0 */
 #define RXFIFO_THR1_NORMAL	0x00080002
 #define RXFIFO_THR1_OOB		0x01800003
 
+/* PLA_RXFIFO_FULL */
+#define RXFIFO_FULL_MASK	0xfff
+
 /* PLA_RXFIFO_CTRL1 */
 #define RXFIFO_THR2_FULL	0x00000060
 #define RXFIFO_THR2_HIGH	0x00000038
@@ -285,6 +317,7 @@
 #define MCU_BORW_EN		0x4000
 
 /* PLA_CPCR */
+#define FLOW_CTRL_EN		BIT(0)
 #define CPCR_RX_VLAN		0x0040
 
 /* PLA_CFG_WOL */
@@ -310,6 +343,10 @@
 /* PLA_CONFIG6 */
 #define LANWAKE_CLR_EN		BIT(0)
 
+/* PLA_USB_CFG */
+#define EN_XG_LIP		BIT(1)
+#define EN_G_LIP		BIT(2)
+
 /* PLA_CONFIG5 */
 #define BWF_EN			0x0040
 #define MWF_EN			0x0020
@@ -333,6 +370,7 @@
 /* PLA_MAC_PWR_CTRL2 */
 #define EEE_SPDWN_RATIO		0x8007
 #define MAC_CLK_SPDWN_EN	BIT(15)
+#define EEE_SPDWN_RATIO_MASK	0xff
 
 /* PLA_MAC_PWR_CTRL3 */
 #define PLA_MCU_SPDWN_EN	BIT(14)
@@ -345,6 +383,7 @@
 #define PWRSAVE_SPDWN_EN	0x1000
 #define RXDV_SPDWN_EN		0x0800
 #define TX10MIDLE_EN		0x0100
+#define IDLE_SPDWN_EN		BIT(6)
 #define TP100_SPDWN_EN		0x0020
 #define TP500_SPDWN_EN		0x0010
 #define TP1000_SPDWN_EN		0x0008
@@ -385,6 +424,13 @@
 #define LINK_CHANGE_FLAG	BIT(8)
 #define POLL_LINK_CHG		BIT(0)
 
+/* PLA_GPHY_CTRL */
+#define GPHY_FLASH		BIT(1)
+
+/* PLA_POL_GPIO_CTRL */
+#define DACK_DET_EN		BIT(15)
+#define POL_GPHY_PATCH		BIT(4)
+
 /* USB_USB2PHY */
 #define USB2PHY_SUSPEND		0x0001
 #define USB2PHY_L1		0x0002
@@ -433,6 +479,9 @@
 #define BMU_RESET_EP_IN		0x01
 #define BMU_RESET_EP_OUT	0x02
 
+/* USB_BMU_CONFIG */
+#define ACT_ODMA		BIT(1)
+
 /* USB_UPT_RXDMA_OWN */
 #define OWN_UPDATE		BIT(0)
 #define OWN_CLEAR		BIT(1)
@@ -440,27 +489,52 @@
 /* USB_FW_TASK */
 #define FC_PATCH_TASK		BIT(1)
 
+/* USB_RX_AGGR_NUM */
+#define RX_AGGR_NUM_MASK	0x1ff
+
 /* USB_UPS_CTRL */
 #define POWER_CUT		0x0100
 
 /* USB_PM_CTRL_STATUS */
 #define RESUME_INDICATE		0x0001
 
+/* USB_ECM_OPTION */
+#define BYPASS_MAC_RESET	BIT(5)
+
 /* USB_CSTMR */
 #define FORCE_SUPER		BIT(0)
 
+/* USB_MISC_2 */
+#define UPS_FORCE_PWR_DOWN	BIT(0)
+
+/* USB_ECM_OP */
+#define	EN_ALL_SPEED		BIT(0)
+
+/* USB_GPHY_CTRL */
+#define GPHY_PATCH_DONE		BIT(2)
+#define BYPASS_FLASH		BIT(5)
+#define BACKUP_RESTRORE		BIT(6)
+
+/* USB_SPEED_OPTION */
+#define RG_PWRDN_EN		BIT(8)
+#define ALL_SPEED_OFF		BIT(9)
+
 /* USB_FW_CTRL */
 #define FLOW_CTRL_PATCH_OPT	BIT(1)
+#define AUTO_SPEEDUP		BIT(3)
+#define FLOW_CTRL_PATCH_2	BIT(8)
 
 /* USB_FC_TIMER */
 #define CTRL_TIMER_EN		BIT(15)
 
 /* USB_USB_CTRL */
+#define CDC_ECM_EN		BIT(3)
 #define RX_AGG_DISABLE		0x0010
 #define RX_ZERO_EN		0x0080
 
 /* USB_U2P3_CTRL */
 #define U2P3_ENABLE		0x0001
+#define RX_DETECT8		BIT(3)
 
 /* USB_POWER_CUT */
 #define PWR_EN			0x0001
@@ -496,8 +570,12 @@
 #define SEN_VAL_NORMAL		0xa000
 #define SEL_RXIDLE		0x0100
 
+/* USB_UPHY_XTAL */
+#define OOBS_POLLING		BIT(8)
+
 /* USB_UPS_CFG */
 #define SAW_CNT_1MS_MASK	0x0fff
+#define MID_REVERSE		BIT(5)	/* RTL8156A */
 
 /* USB_UPS_FLAGS */
 #define UPS_FLAGS_R_TUNE		BIT(0)
@@ -505,6 +583,7 @@
 #define UPS_FLAGS_250M_CKDIV		BIT(2)
 #define UPS_FLAGS_EN_ALDPS		BIT(3)
 #define UPS_FLAGS_CTAP_SHORT_DIS	BIT(4)
+#define UPS_FLAGS_SPEED_MASK		(0xf << 16)
 #define ups_flags_speed(x)		((x) << 16)
 #define UPS_FLAGS_EN_EEE		BIT(20)
 #define UPS_FLAGS_EN_500M_EEE		BIT(21)
@@ -525,6 +604,8 @@ enum spd_duplex {
 	FORCE_10M_FULL,
 	FORCE_100M_HALF,
 	FORCE_100M_FULL,
+	FORCE_1000M_FULL,
+	NWAY_2500M_FULL,
 };
 
 /* OCP_ALDPS_CONFIG */
@@ -589,6 +670,9 @@ enum spd_duplex {
 #define EN_10M_CLKDIV		BIT(11)
 #define EN_10M_BGOFF		0x0080
 
+/* OCP_10GBT_CTRL */
+#define RTL_ADV2_5G_F_R		BIT(5)	/* Advertise 2.5GBASE-T fast-retrain */
+
 /* OCP_PHY_STATE */
 #define TXDIS_STATE		0x01
 #define ABD_STATE		0x02
@@ -608,7 +692,8 @@ enum spd_duplex {
 #define EN_EMI_L		0x0040
 
 /* OCP_SYSCLK_CFG */
-#define clk_div_expo(x)		(min(x, 5) << 8)
+#define sysclk_div_expo(x)	(min(x, 5) << 8)
+#define clk_div_expo(x)		(min(x, 5) << 4)
 
 /* SRAM_GREEN_CFG */
 #define GREEN_ETH_EN		BIT(15)
@@ -639,6 +724,11 @@ enum spd_duplex {
 #define BP4_SUPER_ONLY		0x1578	/* RTL_VER_04 only */
 
 enum rtl_register_content {
+	_2500bps	= BIT(10),
+	_1250bps	= BIT(9),
+	_500bps		= BIT(8),
+	_tx_flow	= BIT(6),
+	_rx_flow	= BIT(5),
 	_1000bps	= 0x10,
 	_100bps		= 0x08,
 	_10bps		= 0x04,
@@ -646,6 +736,9 @@ enum rtl_register_content {
 	FULL_DUP	= 0x01,
 };
 
+#define is_speed_2500(_speed)	(((_speed) & (_2500bps | LINK_STATUS)) == (_2500bps | LINK_STATUS))
+#define is_flow_control(_speed)	(((_speed) & (_tx_flow | _rx_flow)) == (_tx_flow | _rx_flow))
+
 #define RTL8152_MAX_TX		4
 #define RTL8152_MAX_RX		10
 #define INTBUFSIZE		2
@@ -660,7 +753,6 @@ enum rtl_register_content {
 #define RTL8152_RMS		(VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
 #define RTL8153_RMS		RTL8153_MAX_PACKET
 #define RTL8152_TX_TIMEOUT	(5 * HZ)
-#define RTL8152_NAPI_WEIGHT	64
 #define mtu_to_size(m)		((m) + VLAN_ETH_HLEN + ETH_FCS_LEN)
 #define size_to_mtu(s)		((s) - VLAN_ETH_HLEN - ETH_FCS_LEN)
 #define rx_reserved_size(x)	(mtu_to_size(x) + sizeof(struct rx_desc) + RX_ALIGN)
@@ -797,6 +889,7 @@ struct r8152 {
 	} rtl_ops;
 
 	struct ups_info {
+		u32 r_tune:1;
 		u32 _10m_ckdiv:1;
 		u32 _250m_ckdiv:1;
 		u32 aldps:1;
@@ -838,7 +931,9 @@ struct r8152 {
 	u32 rx_buf_sz;
 	u32 rx_copybreak;
 	u32 rx_pending;
+	u32 fc_pause_on, fc_pause_off;
 
+	u32 support_2500full:1;
 	u16 ocp_base;
 	u16 speed;
 	u16 eee_adv;
@@ -998,6 +1093,15 @@ enum rtl_version {
 	RTL_VER_07,
 	RTL_VER_08,
 	RTL_VER_09,
+
+	RTL_TEST_01,
+	RTL_VER_10,
+	RTL_VER_11,
+	RTL_VER_12,
+	RTL_VER_13,
+	RTL_VER_14,
+	RTL_VER_15,
+
 	RTL_VER_MAX
 };
 
@@ -1013,6 +1117,7 @@ enum tx_csum_stat {
 #define RTL_ADVERTISED_100_FULL			BIT(3)
 #define RTL_ADVERTISED_1000_HALF		BIT(4)
 #define RTL_ADVERTISED_1000_FULL		BIT(5)
+#define RTL_ADVERTISED_2500_FULL		BIT(6)
 
 /* Maximum number of multicast addresses to filter (vs. Rx-all-multicast).
  * The RTL chips use a 64 element hash table based on the Ethernet CRC.
@@ -2606,7 +2711,7 @@ static netdev_tx_t rtl8152_start_xmit(struct sk_buff *skb,
 
 static void r8152b_reset_packet_filter(struct r8152 *tp)
 {
-	u32	ocp_data;
+	u32 ocp_data;
 
 	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_FMC);
 	ocp_data &= ~FMC_FCR_MCU_EN;
@@ -2617,14 +2722,47 @@ static void r8152b_reset_packet_filter(struct r8152 *tp)
 
 static void rtl8152_nic_reset(struct r8152 *tp)
 {
-	int	i;
+	u32 ocp_data;
+	int i;
 
-	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CR, CR_RST);
+	switch (tp->version) {
+	case RTL_TEST_01:
+	case RTL_VER_10:
+	case RTL_VER_11:
+		ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CR);
+		ocp_data &= ~CR_TE;
+		ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CR, ocp_data);
+
+		ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_BMU_RESET);
+		ocp_data &= ~BMU_RESET_EP_IN;
+		ocp_write_word(tp, MCU_TYPE_USB, USB_BMU_RESET, ocp_data);
+
+		ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL);
+		ocp_data |= CDC_ECM_EN;
+		ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data);
+
+		ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CR);
+		ocp_data &= ~CR_RE;
+		ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CR, ocp_data);
+
+		ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_BMU_RESET);
+		ocp_data |= BMU_RESET_EP_IN;
+		ocp_write_word(tp, MCU_TYPE_USB, USB_BMU_RESET, ocp_data);
+
+		ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL);
+		ocp_data &= ~CDC_ECM_EN;
+		ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data);
+		break;
 
-	for (i = 0; i < 1000; i++) {
-		if (!(ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CR) & CR_RST))
-			break;
-		usleep_range(100, 400);
+	default:
+		ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CR, CR_RST);
+
+		for (i = 0; i < 1000; i++) {
+			if (!(ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CR) & CR_RST))
+				break;
+			usleep_range(100, 400);
+		}
+		break;
 	}
 }
 
@@ -2633,9 +2771,9 @@ static void set_tx_qlen(struct r8152 *tp)
 	tp->tx_qlen = agg_buf_sz / (mtu_to_size(tp->netdev->mtu) + sizeof(struct tx_desc));
 }
 
-static inline u8 rtl8152_get_speed(struct r8152 *tp)
+static inline u16 rtl8152_get_speed(struct r8152 *tp)
 {
-	return ocp_read_byte(tp, MCU_TYPE_PLA, PLA_PHYSTATUS);
+	return ocp_read_word(tp, MCU_TYPE_PLA, PLA_PHYSTATUS);
 }
 
 static void rtl_eee_plus_en(struct r8152 *tp, bool enable)
@@ -2795,6 +2933,7 @@ static int rtl_enable(struct r8152 *tp)
 	switch (tp->version) {
 	case RTL_VER_08:
 	case RTL_VER_09:
+	case RTL_VER_14:
 		r8153b_rx_agg_chg_indicate(tp);
 		break;
 	default:
@@ -2832,6 +2971,7 @@ static void r8153_set_rx_early_timeout(struct r8152 *tp)
 
 	case RTL_VER_08:
 	case RTL_VER_09:
+	case RTL_VER_14:
 		/* The RTL8153B uses USB_RX_EXTRA_AGGR_TMR for rx timeout
 		 * primarily. For USB_RX_EARLY_TIMEOUT, we fix it to 128ns.
 		 */
@@ -2841,6 +2981,18 @@ static void r8153_set_rx_early_timeout(struct r8152 *tp)
 			       ocp_data);
 		break;
 
+	case RTL_VER_10:
+	case RTL_VER_11:
+	case RTL_VER_12:
+	case RTL_VER_13:
+	case RTL_VER_15:
+		ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EARLY_TIMEOUT,
+			       640 / 8);
+		ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EXTRA_AGGR_TMR,
+			       ocp_data);
+		r8153b_rx_agg_chg_indicate(tp);
+		break;
+
 	default:
 		break;
 	}
@@ -2860,8 +3012,19 @@ static void r8153_set_rx_early_size(struct r8152 *tp)
 		break;
 	case RTL_VER_08:
 	case RTL_VER_09:
+	case RTL_VER_14:
+		ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EARLY_SIZE,
+			       ocp_data / 8);
+		break;
+	case RTL_TEST_01:
+	case RTL_VER_10:
+	case RTL_VER_11:
+	case RTL_VER_12:
+	case RTL_VER_13:
+	case RTL_VER_15:
 		ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EARLY_SIZE,
 			       ocp_data / 8);
+		r8153b_rx_agg_chg_indicate(tp);
 		break;
 	default:
 		WARN_ON_ONCE(1);
@@ -2871,6 +3034,8 @@ static void r8153_set_rx_early_size(struct r8152 *tp)
 
 static int rtl8153_enable(struct r8152 *tp)
 {
+	u32 ocp_data;
+
 	if (test_bit(RTL8152_UNPLUG, &tp->flags))
 		return -ENODEV;
 
@@ -2881,15 +3046,18 @@ static int rtl8153_enable(struct r8152 *tp)
 
 	rtl_set_ifg(tp, rtl8152_get_speed(tp));
 
-	if (tp->version == RTL_VER_09) {
-		u32 ocp_data;
-
+	switch (tp->version) {
+	case RTL_VER_09:
+	case RTL_VER_14:
 		ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_TASK);
 		ocp_data &= ~FC_PATCH_TASK;
 		ocp_write_word(tp, MCU_TYPE_USB, USB_FW_TASK, ocp_data);
 		usleep_range(1000, 2000);
 		ocp_data |= FC_PATCH_TASK;
 		ocp_write_word(tp, MCU_TYPE_USB, USB_FW_TASK, ocp_data);
+		break;
+	default:
+		break;
 	}
 
 	return rtl_enable(tp);
@@ -2954,12 +3122,40 @@ static void rtl_rx_vlan_en(struct r8152 *tp, bool enable)
 {
 	u32 ocp_data;
 
-	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CPCR);
-	if (enable)
-		ocp_data |= CPCR_RX_VLAN;
-	else
-		ocp_data &= ~CPCR_RX_VLAN;
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_CPCR, ocp_data);
+	switch (tp->version) {
+	case RTL_VER_01:
+	case RTL_VER_02:
+	case RTL_VER_03:
+	case RTL_VER_04:
+	case RTL_VER_05:
+	case RTL_VER_06:
+	case RTL_VER_07:
+	case RTL_VER_08:
+	case RTL_VER_09:
+	case RTL_VER_14:
+		ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CPCR);
+		if (enable)
+			ocp_data |= CPCR_RX_VLAN;
+		else
+			ocp_data &= ~CPCR_RX_VLAN;
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_CPCR, ocp_data);
+		break;
+
+	case RTL_TEST_01:
+	case RTL_VER_10:
+	case RTL_VER_11:
+	case RTL_VER_12:
+	case RTL_VER_13:
+	case RTL_VER_15:
+	default:
+		ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_RCR1);
+		if (enable)
+			ocp_data |= OUTER_VLAN | INNER_VLAN;
+		else
+			ocp_data &= ~(OUTER_VLAN | INNER_VLAN);
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_RCR1, ocp_data);
+		break;
+	}
 }
 
 static int rtl8152_set_features(struct net_device *dev,
@@ -3052,6 +3248,40 @@ static void __rtl_set_wol(struct r8152 *tp, u32 wolopts)
 		device_set_wakeup_enable(&tp->udev->dev, false);
 }
 
+static void r8153_mac_clk_speed_down(struct r8152 *tp, bool enable)
+{
+	u32 ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2);
+
+	/* MAC clock speed down */
+	if (enable)
+		ocp_data |= MAC_CLK_SPDWN_EN;
+	else
+		ocp_data &= ~MAC_CLK_SPDWN_EN;
+
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, ocp_data);
+}
+
+static void r8156_mac_clk_spd(struct r8152 *tp, bool enable)
+{
+	u32 ocp_data;
+
+	/* MAC clock speed down */
+	if (enable) {
+		/* aldps_spdwn_ratio, tp10_spdwn_ratio */
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL,
+			       0x0403);
+
+		ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2);
+		ocp_data &= ~EEE_SPDWN_RATIO_MASK;
+		ocp_data |= MAC_CLK_SPDWN_EN | 0x03; /* eee_spdwn_ratio */
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, ocp_data);
+	} else {
+		ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2);
+		ocp_data &= ~MAC_CLK_SPDWN_EN;
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, ocp_data);
+	}
+}
+
 static void r8153_u1u2en(struct r8152 *tp, bool enable)
 {
 	u8 u1u2[8];
@@ -3111,6 +3341,9 @@ static void r8153b_ups_flags(struct r8152 *tp)
 	if (tp->ups_info.eee_cmod_lv)
 		ups_flags |= UPS_FLAGS_EEE_CMOD_LV_EN;
 
+	if (tp->ups_info.r_tune)
+		ups_flags |= UPS_FLAGS_R_TUNE;
+
 	if (tp->ups_info._10m_ckdiv)
 		ups_flags |= UPS_FLAGS_EN_10M_CKDIV;
 
@@ -3161,6 +3394,88 @@ static void r8153b_ups_flags(struct r8152 *tp)
 	ocp_write_dword(tp, MCU_TYPE_USB, USB_UPS_FLAGS, ups_flags);
 }
 
+static void r8156_ups_flags(struct r8152 *tp)
+{
+	u32 ups_flags = 0;
+
+	if (tp->ups_info.green)
+		ups_flags |= UPS_FLAGS_EN_GREEN;
+
+	if (tp->ups_info.aldps)
+		ups_flags |= UPS_FLAGS_EN_ALDPS;
+
+	if (tp->ups_info.eee)
+		ups_flags |= UPS_FLAGS_EN_EEE;
+
+	if (tp->ups_info.flow_control)
+		ups_flags |= UPS_FLAGS_EN_FLOW_CTR;
+
+	if (tp->ups_info.eee_ckdiv)
+		ups_flags |= UPS_FLAGS_EN_EEE_CKDIV;
+
+	if (tp->ups_info._10m_ckdiv)
+		ups_flags |= UPS_FLAGS_EN_10M_CKDIV;
+
+	if (tp->ups_info.eee_plloff_100)
+		ups_flags |= UPS_FLAGS_EEE_PLLOFF_100;
+
+	if (tp->ups_info.eee_plloff_giga)
+		ups_flags |= UPS_FLAGS_EEE_PLLOFF_GIGA;
+
+	if (tp->ups_info._250m_ckdiv)
+		ups_flags |= UPS_FLAGS_250M_CKDIV;
+
+	switch (tp->ups_info.speed_duplex) {
+	case FORCE_10M_HALF:
+		ups_flags |= ups_flags_speed(0);
+		break;
+	case FORCE_10M_FULL:
+		ups_flags |= ups_flags_speed(1);
+		break;
+	case FORCE_100M_HALF:
+		ups_flags |= ups_flags_speed(2);
+		break;
+	case FORCE_100M_FULL:
+		ups_flags |= ups_flags_speed(3);
+		break;
+	case NWAY_10M_HALF:
+		ups_flags |= ups_flags_speed(4);
+		break;
+	case NWAY_10M_FULL:
+		ups_flags |= ups_flags_speed(5);
+		break;
+	case NWAY_100M_HALF:
+		ups_flags |= ups_flags_speed(6);
+		break;
+	case NWAY_100M_FULL:
+		ups_flags |= ups_flags_speed(7);
+		break;
+	case NWAY_1000M_FULL:
+		ups_flags |= ups_flags_speed(8);
+		break;
+	case NWAY_2500M_FULL:
+		ups_flags |= ups_flags_speed(9);
+		break;
+	default:
+		break;
+	}
+
+	switch (tp->ups_info.lite_mode) {
+	case 1:
+		ups_flags |= 0 << 5;
+		break;
+	case 2:
+		ups_flags |= 2 << 5;
+		break;
+	case 0:
+	default:
+		ups_flags |= 1 << 5;
+		break;
+	}
+
+	ocp_write_dword(tp, MCU_TYPE_USB, USB_UPS_FLAGS, ups_flags);
+}
+
 static void rtl_green_en(struct r8152 *tp, bool enable)
 {
 	u16 data;
@@ -3224,16 +3539,56 @@ static void r8153b_ups_en(struct r8152 *tp, bool enable)
 		ocp_data |= UPS_EN | USP_PREWAKE | PHASE2_EN;
 		ocp_write_byte(tp, MCU_TYPE_USB, USB_POWER_CUT, ocp_data);
 
-		ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, 0xcfff);
-		ocp_data |= BIT(0);
-		ocp_write_byte(tp, MCU_TYPE_USB, 0xcfff, ocp_data);
+		ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_MISC_2);
+		ocp_data |= UPS_FORCE_PWR_DOWN;
+		ocp_write_byte(tp, MCU_TYPE_USB, USB_MISC_2, ocp_data);
+	} else {
+		ocp_data &= ~(UPS_EN | USP_PREWAKE);
+		ocp_write_byte(tp, MCU_TYPE_USB, USB_POWER_CUT, ocp_data);
+
+		ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_MISC_2);
+		ocp_data &= ~UPS_FORCE_PWR_DOWN;
+		ocp_write_byte(tp, MCU_TYPE_USB, USB_MISC_2, ocp_data);
+
+		if (ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0) & PCUT_STATUS) {
+			int i;
+
+			for (i = 0; i < 500; i++) {
+				if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) &
+				    AUTOLOAD_DONE)
+					break;
+				msleep(20);
+			}
+
+			tp->rtl_ops.hw_phy_cfg(tp);
+
+			rtl8152_set_speed(tp, tp->autoneg, tp->speed,
+					  tp->duplex, tp->advertising);
+		}
+	}
+}
+
+static void r8153c_ups_en(struct r8152 *tp, bool enable)
+{
+	u32 ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_POWER_CUT);
+
+	if (enable) {
+		r8153b_ups_flags(tp);
+
+		ocp_data |= UPS_EN | USP_PREWAKE | PHASE2_EN;
+		ocp_write_byte(tp, MCU_TYPE_USB, USB_POWER_CUT, ocp_data);
+
+		ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_MISC_2);
+		ocp_data |= UPS_FORCE_PWR_DOWN;
+		ocp_data &= ~BIT(7);
+		ocp_write_byte(tp, MCU_TYPE_USB, USB_MISC_2, ocp_data);
 	} else {
 		ocp_data &= ~(UPS_EN | USP_PREWAKE);
 		ocp_write_byte(tp, MCU_TYPE_USB, USB_POWER_CUT, ocp_data);
 
-		ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, 0xcfff);
-		ocp_data &= ~BIT(0);
-		ocp_write_byte(tp, MCU_TYPE_USB, 0xcfff, ocp_data);
+		ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_MISC_2);
+		ocp_data &= ~UPS_FORCE_PWR_DOWN;
+		ocp_write_byte(tp, MCU_TYPE_USB, USB_MISC_2, ocp_data);
 
 		if (ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0) & PCUT_STATUS) {
 			int i;
@@ -3250,6 +3605,55 @@ static void r8153b_ups_en(struct r8152 *tp, bool enable)
 			rtl8152_set_speed(tp, tp->autoneg, tp->speed,
 					  tp->duplex, tp->advertising);
 		}
+
+		ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_CONFIG);
+
+		ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CONFIG34);
+		ocp_data |= BIT(8);
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_CONFIG34, ocp_data);
+
+		ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML);
+	}
+}
+
+static void r8156_ups_en(struct r8152 *tp, bool enable)
+{
+	u32 ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_POWER_CUT);
+
+	if (enable) {
+		r8156_ups_flags(tp);
+
+		ocp_data |= UPS_EN | USP_PREWAKE | PHASE2_EN;
+		ocp_write_byte(tp, MCU_TYPE_USB, USB_POWER_CUT, ocp_data);
+
+		ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_MISC_2);
+		ocp_data |= UPS_FORCE_PWR_DOWN;
+		ocp_write_byte(tp, MCU_TYPE_USB, USB_MISC_2, ocp_data);
+
+		switch (tp->version) {
+		case RTL_VER_13:
+		case RTL_VER_15:
+			ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_UPHY_XTAL);
+			ocp_data &= ~OOBS_POLLING;
+			ocp_write_byte(tp, MCU_TYPE_USB, USB_UPHY_XTAL, ocp_data);
+			break;
+		default:
+			break;
+		}
+	} else {
+		ocp_data &= ~(UPS_EN | USP_PREWAKE);
+		ocp_write_byte(tp, MCU_TYPE_USB, USB_POWER_CUT, ocp_data);
+
+		ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_MISC_2);
+		ocp_data &= ~UPS_FORCE_PWR_DOWN;
+		ocp_write_byte(tp, MCU_TYPE_USB, USB_MISC_2, ocp_data);
+
+		if (ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0) & PCUT_STATUS) {
+			tp->rtl_ops.hw_phy_cfg(tp);
+
+			rtl8152_set_speed(tp, tp->autoneg, tp->speed,
+					  tp->duplex, tp->advertising);
+		}
 	}
 }
 
@@ -3382,34 +3786,71 @@ static void rtl8153b_runtime_enable(struct r8152 *tp, bool enable)
 	}
 }
 
-static void r8153_teredo_off(struct r8152 *tp)
+static void rtl8153c_runtime_enable(struct r8152 *tp, bool enable)
 {
-	u32 ocp_data;
+	if (enable) {
+		r8153_queue_wake(tp, true);
+		r8153b_u1u2en(tp, false);
+		r8153_u2p3en(tp, false);
+		rtl_runtime_suspend_enable(tp, true);
+		r8153c_ups_en(tp, true);
+	} else {
+		r8153c_ups_en(tp, false);
+		r8153_queue_wake(tp, false);
+		rtl_runtime_suspend_enable(tp, false);
+		r8153b_u1u2en(tp, true);
+	}
+}
 
-	switch (tp->version) {
-	case RTL_VER_01:
-	case RTL_VER_02:
-	case RTL_VER_03:
-	case RTL_VER_04:
-	case RTL_VER_05:
-	case RTL_VER_06:
-	case RTL_VER_07:
-		ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_TEREDO_CFG);
-		ocp_data &= ~(TEREDO_SEL | TEREDO_RS_EVENT_MASK |
-			      OOB_TEREDO_EN);
-		ocp_write_word(tp, MCU_TYPE_PLA, PLA_TEREDO_CFG, ocp_data);
-		break;
+static void rtl8156_runtime_enable(struct r8152 *tp, bool enable)
+{
+	if (enable) {
+		r8153_queue_wake(tp, true);
+		r8153b_u1u2en(tp, false);
+		r8153_u2p3en(tp, false);
+		rtl_runtime_suspend_enable(tp, true);
+	} else {
+		r8153_queue_wake(tp, false);
+		rtl_runtime_suspend_enable(tp, false);
+		r8153_u2p3en(tp, true);
+		if (tp->udev->speed >= USB_SPEED_SUPER)
+			r8153b_u1u2en(tp, true);
+	}
+}
+
+static void r8153_teredo_off(struct r8152 *tp)
+{
+	u32 ocp_data;
+
+	switch (tp->version) {
+	case RTL_VER_01:
+	case RTL_VER_02:
+	case RTL_VER_03:
+	case RTL_VER_04:
+	case RTL_VER_05:
+	case RTL_VER_06:
+	case RTL_VER_07:
+		ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_TEREDO_CFG);
+		ocp_data &= ~(TEREDO_SEL | TEREDO_RS_EVENT_MASK |
+			      OOB_TEREDO_EN);
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_TEREDO_CFG, ocp_data);
+		break;
 
 	case RTL_VER_08:
 	case RTL_VER_09:
+	case RTL_TEST_01:
+	case RTL_VER_10:
+	case RTL_VER_11:
+	case RTL_VER_12:
+	case RTL_VER_13:
+	case RTL_VER_14:
+	case RTL_VER_15:
+	default:
 		/* The bit 0 ~ 7 are relative with teredo settings. They are
 		 * W1C (write 1 to clear), so set all 1 to disable it.
 		 */
 		ocp_write_byte(tp, MCU_TYPE_PLA, PLA_TEREDO_CFG, 0xff);
 		break;
-
-	default:
-		break;
 	}
 
 	ocp_write_word(tp, MCU_TYPE_PLA, PLA_WDT6_CTRL, WDT6_SET_MODE);
@@ -3444,6 +3885,12 @@ static void rtl_clear_bp(struct r8152 *tp, u16 type)
 		break;
 	case RTL_VER_08:
 	case RTL_VER_09:
+	case RTL_VER_10:
+	case RTL_VER_11:
+	case RTL_VER_12:
+	case RTL_VER_13:
+	case RTL_VER_14:
+	case RTL_VER_15:
 	default:
 		if (type == MCU_TYPE_USB) {
 			ocp_write_byte(tp, MCU_TYPE_USB, USB_BP2_EN, 0);
@@ -3653,6 +4100,11 @@ static bool rtl8152_is_fw_mac_ok(struct r8152 *tp, struct fw_mac *mac)
 		case RTL_VER_06:
 		case RTL_VER_08:
 		case RTL_VER_09:
+		case RTL_VER_11:
+		case RTL_VER_12:
+		case RTL_VER_13:
+		case RTL_VER_14:
+		case RTL_VER_15:
 			fw_reg = 0xf800;
 			bp_ba_addr = PLA_BP_BA;
 			bp_en_addr = PLA_BP_EN;
@@ -3676,6 +4128,11 @@ static bool rtl8152_is_fw_mac_ok(struct r8152 *tp, struct fw_mac *mac)
 			break;
 		case RTL_VER_08:
 		case RTL_VER_09:
+		case RTL_VER_11:
+		case RTL_VER_12:
+		case RTL_VER_13:
+		case RTL_VER_14:
+		case RTL_VER_15:
 			fw_reg = 0xe600;
 			bp_ba_addr = USB_BP_BA;
 			bp_en_addr = USB_BP2_EN;
@@ -4215,6 +4672,22 @@ static void r8153_eee_en(struct r8152 *tp, bool enable)
 	tp->ups_info.eee = enable;
 }
 
+static void r8156_eee_en(struct r8152 *tp, bool enable)
+{
+	u16 config;
+
+	r8153_eee_en(tp, enable);
+
+	config = ocp_reg_read(tp, OCP_EEE_ADV2);
+
+	if (enable)
+		config |= MDIO_EEE_2_5GT;
+	else
+		config &= ~MDIO_EEE_2_5GT;
+
+	ocp_reg_write(tp, OCP_EEE_ADV2, config);
+}
+
 static void rtl_eee_enable(struct r8152 *tp, bool enable)
 {
 	switch (tp->version) {
@@ -4236,6 +4709,7 @@ static void rtl_eee_enable(struct r8152 *tp, bool enable)
 	case RTL_VER_06:
 	case RTL_VER_08:
 	case RTL_VER_09:
+	case RTL_VER_14:
 		if (enable) {
 			r8153_eee_en(tp, true);
 			ocp_reg_write(tp, OCP_EEE_ADV, tp->eee_adv);
@@ -4244,6 +4718,19 @@ static void rtl_eee_enable(struct r8152 *tp, bool enable)
 			ocp_reg_write(tp, OCP_EEE_ADV, 0);
 		}
 		break;
+	case RTL_VER_10:
+	case RTL_VER_11:
+	case RTL_VER_12:
+	case RTL_VER_13:
+	case RTL_VER_15:
+		if (enable) {
+			r8156_eee_en(tp, true);
+			ocp_reg_write(tp, OCP_EEE_ADV, tp->eee_adv);
+		} else {
+			r8156_eee_en(tp, false);
+			ocp_reg_write(tp, OCP_EEE_ADV, 0);
+		}
+		break;
 	default:
 		break;
 	}
@@ -4290,6 +4777,20 @@ static void wait_oob_link_list_ready(struct r8152 *tp)
 	}
 }
 
+static void r8156b_wait_loading_flash(struct r8152 *tp)
+{
+	if ((ocp_read_word(tp, MCU_TYPE_PLA, PLA_GPHY_CTRL) & GPHY_FLASH) &&
+	    !(ocp_read_word(tp, MCU_TYPE_USB, USB_GPHY_CTRL) & BYPASS_FLASH)) {
+		int i;
+
+		for (i = 0; i < 100; i++) {
+			if (ocp_read_word(tp, MCU_TYPE_USB, USB_GPHY_CTRL) & GPHY_PATCH_DONE)
+				break;
+			usleep_range(1000, 2000);
+		}
+	}
+}
+
 static void r8152b_exit_oob(struct r8152 *tp)
 {
 	u32 ocp_data;
@@ -4340,7 +4841,7 @@ static void r8152b_exit_oob(struct r8152 *tp)
 	}
 
 	/* TX share fifo free credit full threshold */
-	ocp_write_dword(tp, MCU_TYPE_PLA, PLA_TXFIFO_CTRL, TXFIFO_THR_NORMAL);
+	ocp_write_dword(tp, MCU_TYPE_PLA, PLA_TXFIFO_CTRL, TXFIFO_THR_NORMAL2);
 
 	ocp_write_byte(tp, MCU_TYPE_USB, USB_TX_AGG, TX_AGG_MAX_THRESHOLD);
 	ocp_write_dword(tp, MCU_TYPE_USB, USB_RX_BUF_TH, RX_THR_HIGH);
@@ -4517,6 +5018,21 @@ static int r8153b_post_firmware_1(struct r8152 *tp)
 	return 0;
 }
 
+static int r8153c_post_firmware_1(struct r8152 *tp)
+{
+	u32 ocp_data;
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_CTRL);
+	ocp_data |= FLOW_CTRL_PATCH_2;
+	ocp_write_word(tp, MCU_TYPE_USB, USB_FW_CTRL, ocp_data);
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_TASK);
+	ocp_data |= FC_PATCH_TASK;
+	ocp_write_word(tp, MCU_TYPE_USB, USB_FW_TASK, ocp_data);
+
+	return 0;
+}
+
 static void r8153_aldps_en(struct r8152 *tp, bool enable)
 {
 	u16 data;
@@ -4719,6 +5235,13 @@ static void r8153b_hw_phy_cfg(struct r8152 *tp)
 	set_bit(PHY_RESET, &tp->flags);
 }
 
+static void r8153c_hw_phy_cfg(struct r8152 *tp)
+{
+	r8153b_hw_phy_cfg(tp);
+
+	tp->ups_info.r_tune = true;
+}
+
 static void rtl8153_change_mtu(struct r8152 *tp)
 {
 	ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, mtu_to_size(tp->netdev->mtu));
@@ -4806,6 +5329,7 @@ static void r8153_enter_oob(struct r8152 *tp)
 
 	case RTL_VER_08:
 	case RTL_VER_09:
+	case RTL_VER_14:
 		/* Clear teredo wake event. bit[15:8] is the teredo wakeup
 		 * type. Set it to zero. bits[7:0] are the W1C bits about
 		 * the events. Set them to all 1 to clear them.
@@ -4842,6 +5366,96 @@ static void rtl8153_disable(struct r8152 *tp)
 	r8153_aldps_en(tp, true);
 }
 
+static int rtl8156_enable(struct r8152 *tp)
+{
+	u32 ocp_data;
+	u16 speed;
+
+	if (test_bit(RTL8152_UNPLUG, &tp->flags))
+		return -ENODEV;
+
+	set_tx_qlen(tp);
+	rtl_set_eee_plus(tp);
+	r8153_set_rx_early_timeout(tp);
+	r8153_set_rx_early_size(tp);
+
+	speed = rtl8152_get_speed(tp);
+	rtl_set_ifg(tp, speed);
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4);
+	if (speed & _2500bps)
+		ocp_data &= ~IDLE_SPDWN_EN;
+	else
+		ocp_data |= IDLE_SPDWN_EN;
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, ocp_data);
+
+	if (speed & _1000bps)
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_TXTWSYS, 0x11);
+	else if (speed & _500bps)
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_TXTWSYS, 0x3d);
+
+	if (tp->udev->speed == USB_SPEED_HIGH) {
+		/* USB 0xb45e[3:0] l1_nyet_hird */
+		ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_L1_CTRL);
+		ocp_data &= ~0xf;
+		if (is_flow_control(speed))
+			ocp_data |= 0xf;
+		else
+			ocp_data |= 0x1;
+		ocp_write_word(tp, MCU_TYPE_USB, USB_L1_CTRL, ocp_data);
+	}
+
+	return rtl_enable(tp);
+}
+
+static int rtl8156b_enable(struct r8152 *tp)
+{
+	u32 ocp_data;
+	u16 speed;
+
+	if (test_bit(RTL8152_UNPLUG, &tp->flags))
+		return -ENODEV;
+
+	set_tx_qlen(tp);
+	rtl_set_eee_plus(tp);
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_RX_AGGR_NUM);
+	ocp_data &= ~RX_AGGR_NUM_MASK;
+	ocp_write_word(tp, MCU_TYPE_USB, USB_RX_AGGR_NUM, ocp_data);
+
+	r8153_set_rx_early_timeout(tp);
+	r8153_set_rx_early_size(tp);
+
+	speed = rtl8152_get_speed(tp);
+	rtl_set_ifg(tp, speed);
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4);
+	if (speed & _2500bps)
+		ocp_data &= ~IDLE_SPDWN_EN;
+	else
+		ocp_data |= IDLE_SPDWN_EN;
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, ocp_data);
+
+	if (tp->udev->speed == USB_SPEED_HIGH) {
+		ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_L1_CTRL);
+		ocp_data &= ~0xf;
+		if (is_flow_control(speed))
+			ocp_data |= 0xf;
+		else
+			ocp_data |= 0x1;
+		ocp_write_word(tp, MCU_TYPE_USB, USB_L1_CTRL, ocp_data);
+	}
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_TASK);
+	ocp_data &= ~FC_PATCH_TASK;
+	ocp_write_word(tp, MCU_TYPE_USB, USB_FW_TASK, ocp_data);
+	usleep_range(1000, 2000);
+	ocp_data |= FC_PATCH_TASK;
+	ocp_write_word(tp, MCU_TYPE_USB, USB_FW_TASK, ocp_data);
+
+	return rtl_enable(tp);
+}
+
 static int rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u32 speed, u8 duplex,
 			     u32 advertising)
 {
@@ -4890,58 +5504,73 @@ static int rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u32 speed, u8 duplex,
 
 		tp->mii.force_media = 1;
 	} else {
-		u16 anar, tmp1;
+		u16 orig, new1;
 		u32 support;
 
 		support = RTL_ADVERTISED_10_HALF | RTL_ADVERTISED_10_FULL |
 			  RTL_ADVERTISED_100_HALF | RTL_ADVERTISED_100_FULL;
 
-		if (tp->mii.supports_gmii)
+		if (tp->mii.supports_gmii) {
 			support |= RTL_ADVERTISED_1000_FULL;
 
+			if (tp->support_2500full)
+				support |= RTL_ADVERTISED_2500_FULL;
+		}
+
 		if (!(advertising & support))
 			return -EINVAL;
 
-		anar = r8152_mdio_read(tp, MII_ADVERTISE);
-		tmp1 = anar & ~(ADVERTISE_10HALF | ADVERTISE_10FULL |
+		orig = r8152_mdio_read(tp, MII_ADVERTISE);
+		new1 = orig & ~(ADVERTISE_10HALF | ADVERTISE_10FULL |
 				ADVERTISE_100HALF | ADVERTISE_100FULL);
 		if (advertising & RTL_ADVERTISED_10_HALF) {
-			tmp1 |= ADVERTISE_10HALF;
+			new1 |= ADVERTISE_10HALF;
 			tp->ups_info.speed_duplex = NWAY_10M_HALF;
 		}
 		if (advertising & RTL_ADVERTISED_10_FULL) {
-			tmp1 |= ADVERTISE_10FULL;
+			new1 |= ADVERTISE_10FULL;
 			tp->ups_info.speed_duplex = NWAY_10M_FULL;
 		}
 
 		if (advertising & RTL_ADVERTISED_100_HALF) {
-			tmp1 |= ADVERTISE_100HALF;
+			new1 |= ADVERTISE_100HALF;
 			tp->ups_info.speed_duplex = NWAY_100M_HALF;
 		}
 		if (advertising & RTL_ADVERTISED_100_FULL) {
-			tmp1 |= ADVERTISE_100FULL;
+			new1 |= ADVERTISE_100FULL;
 			tp->ups_info.speed_duplex = NWAY_100M_FULL;
 		}
 
-		if (anar != tmp1) {
-			r8152_mdio_write(tp, MII_ADVERTISE, tmp1);
-			tp->mii.advertising = tmp1;
+		if (orig != new1) {
+			r8152_mdio_write(tp, MII_ADVERTISE, new1);
+			tp->mii.advertising = new1;
 		}
 
 		if (tp->mii.supports_gmii) {
-			u16 gbcr;
-
-			gbcr = r8152_mdio_read(tp, MII_CTRL1000);
-			tmp1 = gbcr & ~(ADVERTISE_1000FULL |
+			orig = r8152_mdio_read(tp, MII_CTRL1000);
+			new1 = orig & ~(ADVERTISE_1000FULL |
 					ADVERTISE_1000HALF);
 
 			if (advertising & RTL_ADVERTISED_1000_FULL) {
-				tmp1 |= ADVERTISE_1000FULL;
+				new1 |= ADVERTISE_1000FULL;
 				tp->ups_info.speed_duplex = NWAY_1000M_FULL;
 			}
 
-			if (gbcr != tmp1)
-				r8152_mdio_write(tp, MII_CTRL1000, tmp1);
+			if (orig != new1)
+				r8152_mdio_write(tp, MII_CTRL1000, new1);
+		}
+
+		if (tp->support_2500full) {
+			orig = ocp_reg_read(tp, OCP_10GBT_CTRL);
+			new1 = orig & ~MDIO_AN_10GBT_CTRL_ADV2_5G;
+
+			if (advertising & RTL_ADVERTISED_2500_FULL) {
+				new1 |= MDIO_AN_10GBT_CTRL_ADV2_5G;
+				tp->ups_info.speed_duplex = NWAY_2500M_FULL;
+			}
+
+			if (orig != new1)
+				ocp_reg_write(tp, OCP_10GBT_CTRL, new1);
 		}
 
 		bmcr = BMCR_ANENABLE | BMCR_ANRESTART;
@@ -5097,116 +5726,363 @@ static void rtl8153b_down(struct r8152 *tp)
 	r8153_aldps_en(tp, true);
 }
 
-static bool rtl8152_in_nway(struct r8152 *tp)
+static void rtl8153c_change_mtu(struct r8152 *tp)
 {
-	u16 nway_state;
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, mtu_to_size(tp->netdev->mtu));
+	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, 10 * 1024 / 64);
 
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_OCP_GPHY_BASE, 0x2000);
-	tp->ocp_base = 0x2000;
-	ocp_write_byte(tp, MCU_TYPE_PLA, 0xb014, 0x4c);		/* phy state */
-	nway_state = ocp_read_word(tp, MCU_TYPE_PLA, 0xb01a);
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_TXFIFO_CTRL, 512 / 64);
 
-	/* bit 15: TXDIS_STATE, bit 14: ABD_STATE */
-	if (nway_state & 0xc000)
-		return false;
+	/* Adjust the tx fifo free credit full threshold, otherwise
+	 * the fifo would be too small to send a jumbo frame packet.
+	 */
+	if (tp->netdev->mtu < 8000)
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_TXFIFO_FULL, 2048 / 8);
 	else
-		return true;
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_TXFIFO_FULL, 900 / 8);
 }
 
-static bool rtl8153_in_nway(struct r8152 *tp)
+static void rtl8153c_up(struct r8152 *tp)
 {
-	u16 phy_state = ocp_reg_read(tp, OCP_PHY_STATE) & 0xff;
-
-	if (phy_state == TXDIS_STATE || phy_state == ABD_STATE)
-		return false;
-	else
-		return true;
-}
+	u32 ocp_data;
 
-static void set_carrier(struct r8152 *tp)
-{
-	struct net_device *netdev = tp->netdev;
-	struct napi_struct *napi = &tp->napi;
-	u8 speed;
+	if (test_bit(RTL8152_UNPLUG, &tp->flags))
+		return;
 
-	speed = rtl8152_get_speed(tp);
+	r8153b_u1u2en(tp, false);
+	r8153_u2p3en(tp, false);
+	r8153_aldps_en(tp, false);
 
-	if (speed & LINK_STATUS) {
-		if (!netif_carrier_ok(netdev)) {
-			tp->rtl_ops.enable(tp);
-			netif_stop_queue(netdev);
-			napi_disable(napi);
-			netif_carrier_on(netdev);
-			rtl_start_rx(tp);
-			clear_bit(RTL8152_SET_RX_MODE, &tp->flags);
-			_rtl8152_set_rx_mode(netdev);
-			napi_enable(&tp->napi);
-			netif_wake_queue(netdev);
-			netif_info(tp, link, netdev, "carrier on\n");
-		} else if (netif_queue_stopped(netdev) &&
-			   skb_queue_len(&tp->tx_queue) < tp->tx_qlen) {
-			netif_wake_queue(netdev);
-		}
-	} else {
-		if (netif_carrier_ok(netdev)) {
-			netif_carrier_off(netdev);
-			tasklet_disable(&tp->tx_tl);
-			napi_disable(napi);
-			tp->rtl_ops.disable(tp);
-			napi_enable(napi);
-			tasklet_enable(&tp->tx_tl);
-			netif_info(tp, link, netdev, "carrier off\n");
-		}
-	}
-}
+	rxdy_gated_en(tp, true);
+	r8153_teredo_off(tp);
 
-static void rtl_work_func_t(struct work_struct *work)
-{
-	struct r8152 *tp = container_of(work, struct r8152, schedule.work);
+	ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR);
+	ocp_data &= ~RCR_ACPT_ALL;
+	ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
 
-	/* If the device is unplugged or !netif_running(), the workqueue
-	 * doesn't need to wake the device, and could return directly.
-	 */
-	if (test_bit(RTL8152_UNPLUG, &tp->flags) || !netif_running(tp->netdev))
-		return;
+	rtl8152_nic_reset(tp);
+	rtl_reset_bmu(tp);
 
-	if (usb_autopm_get_interface(tp->intf) < 0)
-		return;
+	ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
+	ocp_data &= ~NOW_IS_OOB;
+	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
 
-	if (!test_bit(WORK_ENABLE, &tp->flags))
-		goto out1;
+	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7);
+	ocp_data &= ~MCU_BORW_EN;
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data);
 
-	if (!mutex_trylock(&tp->control)) {
-		schedule_delayed_work(&tp->schedule, 0);
-		goto out1;
-	}
+	wait_oob_link_list_ready(tp);
 
-	if (test_and_clear_bit(RTL8152_LINK_CHG, &tp->flags))
-		set_carrier(tp);
+	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7);
+	ocp_data |= RE_INIT_LL;
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data);
 
-	if (test_and_clear_bit(RTL8152_SET_RX_MODE, &tp->flags))
-		_rtl8152_set_rx_mode(tp->netdev);
+	wait_oob_link_list_ready(tp);
 
-	/* don't schedule tasket before linking */
-	if (test_and_clear_bit(SCHEDULE_TASKLET, &tp->flags) &&
-	    netif_carrier_ok(tp->netdev))
-		tasklet_schedule(&tp->tx_tl);
+	rtl_rx_vlan_en(tp, tp->netdev->features & NETIF_F_HW_VLAN_CTAG_RX);
 
-	mutex_unlock(&tp->control);
+	rtl8153c_change_mtu(tp);
 
-out1:
-	usb_autopm_put_interface(tp->intf);
-}
+	rtl8152_nic_reset(tp);
 
-static void rtl_hw_phy_work_func_t(struct work_struct *work)
-{
-	struct r8152 *tp = container_of(work, struct r8152, hw_phy_work.work);
+	/* rx share fifo credit full threshold */
+	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL0, 0x02);
+	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_RXFIFO_FULL, 0x08);
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL1, RXFIFO_THR2_NORMAL);
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL2, RXFIFO_THR3_NORMAL);
 
-	if (test_bit(RTL8152_UNPLUG, &tp->flags))
-		return;
+	ocp_write_dword(tp, MCU_TYPE_USB, USB_RX_BUF_TH, RX_THR_B);
 
-	if (usb_autopm_get_interface(tp->intf) < 0)
-		return;
+	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_CONFIG);
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CONFIG34);
+	ocp_data |= BIT(8);
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_CONFIG34, ocp_data);
+
+	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML);
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3);
+	ocp_data &= ~PLA_MCU_SPDWN_EN;
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, ocp_data);
+
+	r8153_aldps_en(tp, true);
+	r8153b_u1u2en(tp, true);
+}
+
+static inline u32 fc_pause_on_auto(struct r8152 *tp)
+{
+	return (ALIGN(mtu_to_size(tp->netdev->mtu), 1024) + 6 * 1024);
+}
+
+static inline u32 fc_pause_off_auto(struct r8152 *tp)
+{
+	return (ALIGN(mtu_to_size(tp->netdev->mtu), 1024) + 14 * 1024);
+}
+
+static void r8156_fc_parameter(struct r8152 *tp)
+{
+	u32 pause_on = tp->fc_pause_on ? tp->fc_pause_on : fc_pause_on_auto(tp);
+	u32 pause_off = tp->fc_pause_off ? tp->fc_pause_off : fc_pause_off_auto(tp);
+
+	switch (tp->version) {
+	case RTL_VER_10:
+	case RTL_VER_11:
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_FULL, pause_on / 8);
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_EMPTY, pause_off / 8);
+		break;
+	case RTL_VER_12:
+	case RTL_VER_13:
+	case RTL_VER_15:
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_FULL, pause_on / 16);
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_EMPTY, pause_off / 16);
+		break;
+	default:
+		break;
+	}
+}
+
+static void rtl8156_change_mtu(struct r8152 *tp)
+{
+	u32 rx_max_size = mtu_to_size(tp->netdev->mtu);
+
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, rx_max_size);
+	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, MTPS_JUMBO);
+	r8156_fc_parameter(tp);
+
+	/* TX share fifo free credit full threshold */
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_TXFIFO_CTRL, 512 / 64);
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_TXFIFO_FULL,
+		       ALIGN(rx_max_size + sizeof(struct tx_desc), 1024) / 16);
+}
+
+static void rtl8156_up(struct r8152 *tp)
+{
+	u32 ocp_data;
+
+	if (test_bit(RTL8152_UNPLUG, &tp->flags))
+		return;
+
+	r8153b_u1u2en(tp, false);
+	r8153_u2p3en(tp, false);
+	r8153_aldps_en(tp, false);
+
+	rxdy_gated_en(tp, true);
+	r8153_teredo_off(tp);
+
+	ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR);
+	ocp_data &= ~RCR_ACPT_ALL;
+	ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
+
+	rtl8152_nic_reset(tp);
+	rtl_reset_bmu(tp);
+
+	ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
+	ocp_data &= ~NOW_IS_OOB;
+	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7);
+	ocp_data &= ~MCU_BORW_EN;
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data);
+
+	rtl_rx_vlan_en(tp, tp->netdev->features & NETIF_F_HW_VLAN_CTAG_RX);
+
+	rtl8156_change_mtu(tp);
+
+	switch (tp->version) {
+	case RTL_TEST_01:
+	case RTL_VER_10:
+	case RTL_VER_11:
+		ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_BMU_CONFIG);
+		ocp_data |= ACT_ODMA;
+		ocp_write_word(tp, MCU_TYPE_USB, USB_BMU_CONFIG, ocp_data);
+		break;
+	default:
+		break;
+	}
+
+	/* share FIFO settings */
+	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_RXFIFO_FULL);
+	ocp_data &= ~RXFIFO_FULL_MASK;
+	ocp_data |= 0x08;
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_RXFIFO_FULL, ocp_data);
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3);
+	ocp_data &= ~PLA_MCU_SPDWN_EN;
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, ocp_data);
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_SPEED_OPTION);
+	ocp_data &= ~(RG_PWRDN_EN | ALL_SPEED_OFF);
+	ocp_write_word(tp, MCU_TYPE_USB, USB_SPEED_OPTION, ocp_data);
+
+	ocp_write_dword(tp, MCU_TYPE_USB, USB_RX_BUF_TH, 0x00600400);
+
+	if (tp->saved_wolopts != __rtl_get_wol(tp)) {
+		netif_warn(tp, ifup, tp->netdev, "wol setting is changed\n");
+		__rtl_set_wol(tp, tp->saved_wolopts);
+	}
+
+	r8153_aldps_en(tp, true);
+	r8153_u2p3en(tp, true);
+
+	if (tp->udev->speed >= USB_SPEED_SUPER)
+		r8153b_u1u2en(tp, true);
+}
+
+static void rtl8156_down(struct r8152 *tp)
+{
+	u32 ocp_data;
+
+	if (test_bit(RTL8152_UNPLUG, &tp->flags)) {
+		rtl_drop_queued_tx(tp);
+		return;
+	}
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3);
+	ocp_data |= PLA_MCU_SPDWN_EN;
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, ocp_data);
+
+	r8153b_u1u2en(tp, false);
+	r8153_u2p3en(tp, false);
+	r8153b_power_cut_en(tp, false);
+	r8153_aldps_en(tp, false);
+
+	ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
+	ocp_data &= ~NOW_IS_OOB;
+	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
+
+	rtl_disable(tp);
+	rtl_reset_bmu(tp);
+
+	/* Clear teredo wake event. bit[15:8] is the teredo wakeup
+	 * type. Set it to zero. bits[7:0] are the W1C bits about
+	 * the events. Set them to all 1 to clear them.
+	 */
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_TEREDO_WAKE_BASE, 0x00ff);
+
+	ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
+	ocp_data |= NOW_IS_OOB;
+	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
+
+	rtl_rx_vlan_en(tp, true);
+	rxdy_gated_en(tp, false);
+
+	ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR);
+	ocp_data |= RCR_APM | RCR_AM | RCR_AB;
+	ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
+
+	r8153_aldps_en(tp, true);
+}
+
+static bool rtl8152_in_nway(struct r8152 *tp)
+{
+	u16 nway_state;
+
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_OCP_GPHY_BASE, 0x2000);
+	tp->ocp_base = 0x2000;
+	ocp_write_byte(tp, MCU_TYPE_PLA, 0xb014, 0x4c);		/* phy state */
+	nway_state = ocp_read_word(tp, MCU_TYPE_PLA, 0xb01a);
+
+	/* bit 15: TXDIS_STATE, bit 14: ABD_STATE */
+	if (nway_state & 0xc000)
+		return false;
+	else
+		return true;
+}
+
+static bool rtl8153_in_nway(struct r8152 *tp)
+{
+	u16 phy_state = ocp_reg_read(tp, OCP_PHY_STATE) & 0xff;
+
+	if (phy_state == TXDIS_STATE || phy_state == ABD_STATE)
+		return false;
+	else
+		return true;
+}
+
+static void set_carrier(struct r8152 *tp)
+{
+	struct net_device *netdev = tp->netdev;
+	struct napi_struct *napi = &tp->napi;
+	u16 speed;
+
+	speed = rtl8152_get_speed(tp);
+
+	if (speed & LINK_STATUS) {
+		if (!netif_carrier_ok(netdev)) {
+			tp->rtl_ops.enable(tp);
+			netif_stop_queue(netdev);
+			napi_disable(napi);
+			netif_carrier_on(netdev);
+			rtl_start_rx(tp);
+			clear_bit(RTL8152_SET_RX_MODE, &tp->flags);
+			_rtl8152_set_rx_mode(netdev);
+			napi_enable(napi);
+			netif_wake_queue(netdev);
+			netif_info(tp, link, netdev, "carrier on\n");
+		} else if (netif_queue_stopped(netdev) &&
+			   skb_queue_len(&tp->tx_queue) < tp->tx_qlen) {
+			netif_wake_queue(netdev);
+		}
+	} else {
+		if (netif_carrier_ok(netdev)) {
+			netif_carrier_off(netdev);
+			tasklet_disable(&tp->tx_tl);
+			napi_disable(napi);
+			tp->rtl_ops.disable(tp);
+			napi_enable(napi);
+			tasklet_enable(&tp->tx_tl);
+			netif_info(tp, link, netdev, "carrier off\n");
+		}
+	}
+}
+
+static void rtl_work_func_t(struct work_struct *work)
+{
+	struct r8152 *tp = container_of(work, struct r8152, schedule.work);
+
+	/* If the device is unplugged or !netif_running(), the workqueue
+	 * doesn't need to wake the device, and could return directly.
+	 */
+	if (test_bit(RTL8152_UNPLUG, &tp->flags) || !netif_running(tp->netdev))
+		return;
+
+	if (usb_autopm_get_interface(tp->intf) < 0)
+		return;
+
+	if (!test_bit(WORK_ENABLE, &tp->flags))
+		goto out1;
+
+	if (!mutex_trylock(&tp->control)) {
+		schedule_delayed_work(&tp->schedule, 0);
+		goto out1;
+	}
+
+	if (test_and_clear_bit(RTL8152_LINK_CHG, &tp->flags))
+		set_carrier(tp);
+
+	if (test_and_clear_bit(RTL8152_SET_RX_MODE, &tp->flags))
+		_rtl8152_set_rx_mode(tp->netdev);
+
+	/* don't schedule tasket before linking */
+	if (test_and_clear_bit(SCHEDULE_TASKLET, &tp->flags) &&
+	    netif_carrier_ok(tp->netdev))
+		tasklet_schedule(&tp->tx_tl);
+
+	mutex_unlock(&tp->control);
+
+out1:
+	usb_autopm_put_interface(tp->intf);
+}
+
+static void rtl_hw_phy_work_func_t(struct work_struct *work)
+{
+	struct r8152 *tp = container_of(work, struct r8152, hw_phy_work.work);
+
+	if (test_bit(RTL8152_UNPLUG, &tp->flags))
+		return;
+
+	if (usb_autopm_get_interface(tp->intf) < 0)
+		return;
 
 	mutex_lock(&tp->control);
 
@@ -5399,7 +6275,1068 @@ static void r8152b_init(struct r8152 *tp)
 	ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data);
 }
 
-static void r8153_init(struct r8152 *tp)
+static void r8153_init(struct r8152 *tp)
+{
+	u32 ocp_data;
+	u16 data;
+	int i;
+
+	if (test_bit(RTL8152_UNPLUG, &tp->flags))
+		return;
+
+	r8153_u1u2en(tp, false);
+
+	for (i = 0; i < 500; i++) {
+		if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) &
+		    AUTOLOAD_DONE)
+			break;
+
+		msleep(20);
+		if (test_bit(RTL8152_UNPLUG, &tp->flags))
+			break;
+	}
+
+	data = r8153_phy_status(tp, 0);
+
+	if (tp->version == RTL_VER_03 || tp->version == RTL_VER_04 ||
+	    tp->version == RTL_VER_05)
+		ocp_reg_write(tp, OCP_ADC_CFG, CKADSEL_L | ADC_EN | EN_EMI_L);
+
+	data = r8152_mdio_read(tp, MII_BMCR);
+	if (data & BMCR_PDOWN) {
+		data &= ~BMCR_PDOWN;
+		r8152_mdio_write(tp, MII_BMCR, data);
+	}
+
+	data = r8153_phy_status(tp, PHY_STAT_LAN_ON);
+
+	r8153_u2p3en(tp, false);
+
+	if (tp->version == RTL_VER_04) {
+		ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_SSPHYLINK2);
+		ocp_data &= ~pwd_dn_scale_mask;
+		ocp_data |= pwd_dn_scale(96);
+		ocp_write_word(tp, MCU_TYPE_USB, USB_SSPHYLINK2, ocp_data);
+
+		ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_USB2PHY);
+		ocp_data |= USB2PHY_L1 | USB2PHY_SUSPEND;
+		ocp_write_byte(tp, MCU_TYPE_USB, USB_USB2PHY, ocp_data);
+	} else if (tp->version == RTL_VER_05) {
+		ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_DMY_REG0);
+		ocp_data &= ~ECM_ALDPS;
+		ocp_write_byte(tp, MCU_TYPE_PLA, PLA_DMY_REG0, ocp_data);
+
+		ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_CSR_DUMMY1);
+		if (ocp_read_word(tp, MCU_TYPE_USB, USB_BURST_SIZE) == 0)
+			ocp_data &= ~DYNAMIC_BURST;
+		else
+			ocp_data |= DYNAMIC_BURST;
+		ocp_write_byte(tp, MCU_TYPE_USB, USB_CSR_DUMMY1, ocp_data);
+	} else if (tp->version == RTL_VER_06) {
+		ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_CSR_DUMMY1);
+		if (ocp_read_word(tp, MCU_TYPE_USB, USB_BURST_SIZE) == 0)
+			ocp_data &= ~DYNAMIC_BURST;
+		else
+			ocp_data |= DYNAMIC_BURST;
+		ocp_write_byte(tp, MCU_TYPE_USB, USB_CSR_DUMMY1, ocp_data);
+
+		r8153_queue_wake(tp, false);
+
+		ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS);
+		if (rtl8152_get_speed(tp) & LINK_STATUS)
+			ocp_data |= CUR_LINK_OK;
+		else
+			ocp_data &= ~CUR_LINK_OK;
+		ocp_data |= POLL_LINK_CHG;
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, ocp_data);
+	}
+
+	ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_CSR_DUMMY2);
+	ocp_data |= EP4_FULL_FC;
+	ocp_write_byte(tp, MCU_TYPE_USB, USB_CSR_DUMMY2, ocp_data);
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_WDT11_CTRL);
+	ocp_data &= ~TIMER11_EN;
+	ocp_write_word(tp, MCU_TYPE_USB, USB_WDT11_CTRL, ocp_data);
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_LED_FEATURE);
+	ocp_data &= ~LED_MODE_MASK;
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_LED_FEATURE, ocp_data);
+
+	ocp_data = FIFO_EMPTY_1FB | ROK_EXIT_LPM;
+	if (tp->version == RTL_VER_04 && tp->udev->speed < USB_SPEED_SUPER)
+		ocp_data |= LPM_TIMER_500MS;
+	else
+		ocp_data |= LPM_TIMER_500US;
+	ocp_write_byte(tp, MCU_TYPE_USB, USB_LPM_CTRL, ocp_data);
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_AFE_CTRL2);
+	ocp_data &= ~SEN_VAL_MASK;
+	ocp_data |= SEN_VAL_NORMAL | SEL_RXIDLE;
+	ocp_write_word(tp, MCU_TYPE_USB, USB_AFE_CTRL2, ocp_data);
+
+	ocp_write_word(tp, MCU_TYPE_USB, USB_CONNECT_TIMER, 0x0001);
+
+	r8153_power_cut_en(tp, false);
+	rtl_runtime_suspend_enable(tp, false);
+	r8153_mac_clk_speed_down(tp, false);
+	r8153_u1u2en(tp, true);
+	usb_enable_lpm(tp->udev);
+
+	ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CONFIG6);
+	ocp_data |= LANWAKE_CLR_EN;
+	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CONFIG6, ocp_data);
+
+	ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_LWAKE_CTRL_REG);
+	ocp_data &= ~LANWAKE_PIN;
+	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_LWAKE_CTRL_REG, ocp_data);
+
+	/* rx aggregation */
+	ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL);
+	ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN);
+	if (test_bit(DELL_TB_RX_AGG_BUG, &tp->flags))
+		ocp_data |= RX_AGG_DISABLE;
+
+	ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data);
+
+	rtl_tally_reset(tp);
+
+	switch (tp->udev->speed) {
+	case USB_SPEED_SUPER:
+	case USB_SPEED_SUPER_PLUS:
+		tp->coalesce = COALESCE_SUPER;
+		break;
+	case USB_SPEED_HIGH:
+		tp->coalesce = COALESCE_HIGH;
+		break;
+	default:
+		tp->coalesce = COALESCE_SLOW;
+		break;
+	}
+}
+
+static void r8153b_init(struct r8152 *tp)
+{
+	u32 ocp_data;
+	u16 data;
+	int i;
+
+	if (test_bit(RTL8152_UNPLUG, &tp->flags))
+		return;
+
+	r8153b_u1u2en(tp, false);
+
+	for (i = 0; i < 500; i++) {
+		if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) &
+		    AUTOLOAD_DONE)
+			break;
+
+		msleep(20);
+		if (test_bit(RTL8152_UNPLUG, &tp->flags))
+			break;
+	}
+
+	data = r8153_phy_status(tp, 0);
+
+	data = r8152_mdio_read(tp, MII_BMCR);
+	if (data & BMCR_PDOWN) {
+		data &= ~BMCR_PDOWN;
+		r8152_mdio_write(tp, MII_BMCR, data);
+	}
+
+	data = r8153_phy_status(tp, PHY_STAT_LAN_ON);
+
+	r8153_u2p3en(tp, false);
+
+	/* MSC timer = 0xfff * 8ms = 32760 ms */
+	ocp_write_word(tp, MCU_TYPE_USB, USB_MSC_TIMER, 0x0fff);
+
+	r8153b_power_cut_en(tp, false);
+	r8153b_ups_en(tp, false);
+	r8153_queue_wake(tp, false);
+	rtl_runtime_suspend_enable(tp, false);
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS);
+	if (rtl8152_get_speed(tp) & LINK_STATUS)
+		ocp_data |= CUR_LINK_OK;
+	else
+		ocp_data &= ~CUR_LINK_OK;
+	ocp_data |= POLL_LINK_CHG;
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, ocp_data);
+
+	if (tp->udev->speed >= USB_SPEED_SUPER)
+		r8153b_u1u2en(tp, true);
+
+	usb_enable_lpm(tp->udev);
+
+	/* MAC clock speed down */
+	r8153_mac_clk_speed_down(tp, true);
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3);
+	ocp_data &= ~PLA_MCU_SPDWN_EN;
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, ocp_data);
+
+	if (tp->version == RTL_VER_09) {
+		/* Disable Test IO for 32QFN */
+		if (ocp_read_byte(tp, MCU_TYPE_PLA, 0xdc00) & BIT(5)) {
+			ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_PHY_PWR);
+			ocp_data |= TEST_IO_OFF;
+			ocp_write_word(tp, MCU_TYPE_PLA, PLA_PHY_PWR, ocp_data);
+		}
+	}
+
+	set_bit(GREEN_ETHERNET, &tp->flags);
+
+	/* rx aggregation */
+	ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL);
+	ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN);
+	ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data);
+
+	rtl_tally_reset(tp);
+
+	tp->coalesce = 15000;	/* 15 us */
+}
+
+static void r8153c_init(struct r8152 *tp)
+{
+	u32 ocp_data;
+	u16 data;
+	int i;
+
+	if (test_bit(RTL8152_UNPLUG, &tp->flags))
+		return;
+
+	r8153b_u1u2en(tp, false);
+
+	/* Disable spi_en */
+	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_CONFIG);
+	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CONFIG5);
+	ocp_data &= ~BIT(3);
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_CONFIG5, ocp_data);
+	ocp_data = ocp_read_word(tp, MCU_TYPE_USB, 0xcbf0);
+	ocp_data |= BIT(1);
+	ocp_write_word(tp, MCU_TYPE_USB, 0xcbf0, ocp_data);
+
+	for (i = 0; i < 500; i++) {
+		if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) &
+		    AUTOLOAD_DONE)
+			break;
+
+		msleep(20);
+		if (test_bit(RTL8152_UNPLUG, &tp->flags))
+			return;
+	}
+
+	data = r8153_phy_status(tp, 0);
+
+	data = r8152_mdio_read(tp, MII_BMCR);
+	if (data & BMCR_PDOWN) {
+		data &= ~BMCR_PDOWN;
+		r8152_mdio_write(tp, MII_BMCR, data);
+	}
+
+	data = r8153_phy_status(tp, PHY_STAT_LAN_ON);
+
+	r8153_u2p3en(tp, false);
+
+	/* MSC timer = 0xfff * 8ms = 32760 ms */
+	ocp_write_word(tp, MCU_TYPE_USB, USB_MSC_TIMER, 0x0fff);
+
+	r8153b_power_cut_en(tp, false);
+	r8153c_ups_en(tp, false);
+	r8153_queue_wake(tp, false);
+	rtl_runtime_suspend_enable(tp, false);
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS);
+	if (rtl8152_get_speed(tp) & LINK_STATUS)
+		ocp_data |= CUR_LINK_OK;
+	else
+		ocp_data &= ~CUR_LINK_OK;
+
+	ocp_data |= POLL_LINK_CHG;
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, ocp_data);
+
+	r8153b_u1u2en(tp, true);
+
+	usb_enable_lpm(tp->udev);
+
+	/* MAC clock speed down */
+	r8153_mac_clk_speed_down(tp, true);
+
+	ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_MISC_2);
+	ocp_data &= ~BIT(7);
+	ocp_write_byte(tp, MCU_TYPE_USB, USB_MISC_2, ocp_data);
+
+	set_bit(GREEN_ETHERNET, &tp->flags);
+
+	/* rx aggregation */
+	ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL);
+	ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN);
+	ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data);
+
+	rtl_tally_reset(tp);
+
+	tp->coalesce = 15000;	/* 15 us */
+}
+
+static void r8156_hw_phy_cfg(struct r8152 *tp)
+{
+	u32 ocp_data;
+	u16 data;
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0);
+	if (ocp_data & PCUT_STATUS) {
+		ocp_data &= ~PCUT_STATUS;
+		ocp_write_word(tp, MCU_TYPE_USB, USB_MISC_0, ocp_data);
+	}
+
+	data = r8153_phy_status(tp, 0);
+	switch (data) {
+	case PHY_STAT_EXT_INIT:
+		rtl8152_apply_firmware(tp, true);
+
+		data = ocp_reg_read(tp, 0xa468);
+		data &= ~(BIT(3) | BIT(1));
+		ocp_reg_write(tp, 0xa468, data);
+		break;
+	case PHY_STAT_LAN_ON:
+	case PHY_STAT_PWRDN:
+	default:
+		rtl8152_apply_firmware(tp, false);
+		break;
+	}
+
+	/* disable ALDPS before updating the PHY parameters */
+	r8153_aldps_en(tp, false);
+
+	/* disable EEE before updating the PHY parameters */
+	rtl_eee_enable(tp, false);
+
+	data = r8153_phy_status(tp, PHY_STAT_LAN_ON);
+	WARN_ON_ONCE(data != PHY_STAT_LAN_ON);
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_PHY_PWR);
+	ocp_data |= PFM_PWM_SWITCH;
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_PHY_PWR, ocp_data);
+
+	switch (tp->version) {
+	case RTL_VER_10:
+		data = ocp_reg_read(tp, 0xad40);
+		data &= ~0x3ff;
+		data |= BIT(7) | BIT(2);
+		ocp_reg_write(tp, 0xad40, data);
+
+		data = ocp_reg_read(tp, 0xad4e);
+		data |= BIT(4);
+		ocp_reg_write(tp, 0xad4e, data);
+		data = ocp_reg_read(tp, 0xad16);
+		data &= ~0x3ff;
+		data |= 0x6;
+		ocp_reg_write(tp, 0xad16, data);
+		data = ocp_reg_read(tp, 0xad32);
+		data &= ~0x3f;
+		data |= 6;
+		ocp_reg_write(tp, 0xad32, data);
+		data = ocp_reg_read(tp, 0xac08);
+		data &= ~(BIT(12) | BIT(8));
+		ocp_reg_write(tp, 0xac08, data);
+		data = ocp_reg_read(tp, 0xac8a);
+		data |= BIT(12) | BIT(13) | BIT(14);
+		data &= ~BIT(15);
+		ocp_reg_write(tp, 0xac8a, data);
+		data = ocp_reg_read(tp, 0xad18);
+		data |= BIT(10);
+		ocp_reg_write(tp, 0xad18, data);
+		data = ocp_reg_read(tp, 0xad1a);
+		data |= 0x3ff;
+		ocp_reg_write(tp, 0xad1a, data);
+		data = ocp_reg_read(tp, 0xad1c);
+		data |= 0x3ff;
+		ocp_reg_write(tp, 0xad1c, data);
+
+		data = sram_read(tp, 0x80ea);
+		data &= ~0xff00;
+		data |= 0xc400;
+		sram_write(tp, 0x80ea, data);
+		data = sram_read(tp, 0x80eb);
+		data &= ~0x0700;
+		data |= 0x0300;
+		sram_write(tp, 0x80eb, data);
+		data = sram_read(tp, 0x80f8);
+		data &= ~0xff00;
+		data |= 0x1c00;
+		sram_write(tp, 0x80f8, data);
+		data = sram_read(tp, 0x80f1);
+		data &= ~0xff00;
+		data |= 0x3000;
+		sram_write(tp, 0x80f1, data);
+
+		data = sram_read(tp, 0x80fe);
+		data &= ~0xff00;
+		data |= 0xa500;
+		sram_write(tp, 0x80fe, data);
+		data = sram_read(tp, 0x8102);
+		data &= ~0xff00;
+		data |= 0x5000;
+		sram_write(tp, 0x8102, data);
+		data = sram_read(tp, 0x8015);
+		data &= ~0xff00;
+		data |= 0x3300;
+		sram_write(tp, 0x8015, data);
+		data = sram_read(tp, 0x8100);
+		data &= ~0xff00;
+		data |= 0x7000;
+		sram_write(tp, 0x8100, data);
+		data = sram_read(tp, 0x8014);
+		data &= ~0xff00;
+		data |= 0xf000;
+		sram_write(tp, 0x8014, data);
+		data = sram_read(tp, 0x8016);
+		data &= ~0xff00;
+		data |= 0x6500;
+		sram_write(tp, 0x8016, data);
+		data = sram_read(tp, 0x80dc);
+		data &= ~0xff00;
+		data |= 0xed00;
+		sram_write(tp, 0x80dc, data);
+		data = sram_read(tp, 0x80df);
+		data |= BIT(8);
+		sram_write(tp, 0x80df, data);
+		data = sram_read(tp, 0x80e1);
+		data &= ~BIT(8);
+		sram_write(tp, 0x80e1, data);
+
+		data = ocp_reg_read(tp, 0xbf06);
+		data &= ~0x003f;
+		data |= 0x0038;
+		ocp_reg_write(tp, 0xbf06, data);
+
+		sram_write(tp, 0x819f, 0xddb6);
+
+		ocp_reg_write(tp, 0xbc34, 0x5555);
+		data = ocp_reg_read(tp, 0xbf0a);
+		data &= ~0x0e00;
+		data |= 0x0a00;
+		ocp_reg_write(tp, 0xbf0a, data);
+
+		data = ocp_reg_read(tp, 0xbd2c);
+		data &= ~BIT(13);
+		ocp_reg_write(tp, 0xbd2c, data);
+		break;
+	case RTL_VER_11:
+		data = ocp_reg_read(tp, 0xad16);
+		data |= 0x3ff;
+		ocp_reg_write(tp, 0xad16, data);
+		data = ocp_reg_read(tp, 0xad32);
+		data &= ~0x3f;
+		data |= 6;
+		ocp_reg_write(tp, 0xad32, data);
+		data = ocp_reg_read(tp, 0xac08);
+		data &= ~(BIT(12) | BIT(8));
+		ocp_reg_write(tp, 0xac08, data);
+		data = ocp_reg_read(tp, 0xacc0);
+		data &= ~0x3;
+		data |= BIT(1);
+		ocp_reg_write(tp, 0xacc0, data);
+		data = ocp_reg_read(tp, 0xad40);
+		data &= ~0xe7;
+		data |= BIT(6) | BIT(2);
+		ocp_reg_write(tp, 0xad40, data);
+		data = ocp_reg_read(tp, 0xac14);
+		data &= ~BIT(7);
+		ocp_reg_write(tp, 0xac14, data);
+		data = ocp_reg_read(tp, 0xac80);
+		data &= ~(BIT(8) | BIT(9));
+		ocp_reg_write(tp, 0xac80, data);
+		data = ocp_reg_read(tp, 0xac5e);
+		data &= ~0x7;
+		data |= BIT(1);
+		ocp_reg_write(tp, 0xac5e, data);
+		ocp_reg_write(tp, 0xad4c, 0x00a8);
+		ocp_reg_write(tp, 0xac5c, 0x01ff);
+		data = ocp_reg_read(tp, 0xac8a);
+		data &= ~0xf0;
+		data |= BIT(4) | BIT(5);
+		ocp_reg_write(tp, 0xac8a, data);
+		ocp_reg_write(tp, 0xb87c, 0x8157);
+		data = ocp_reg_read(tp, 0xb87e);
+		data &= ~0xff00;
+		data |= 0x0500;
+		ocp_reg_write(tp, 0xb87e, data);
+		ocp_reg_write(tp, 0xb87c, 0x8159);
+		data = ocp_reg_read(tp, 0xb87e);
+		data &= ~0xff00;
+		data |= 0x0700;
+		ocp_reg_write(tp, 0xb87e, data);
+
+		/* AAGC */
+		ocp_reg_write(tp, 0xb87c, 0x80a2);
+		ocp_reg_write(tp, 0xb87e, 0x0153);
+		ocp_reg_write(tp, 0xb87c, 0x809c);
+		ocp_reg_write(tp, 0xb87e, 0x0153);
+
+		/* EEE parameter */
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_TXTWSYS_2P5G, 0x0056);
+
+		ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_USB_CFG);
+		ocp_data |= EN_XG_LIP | EN_G_LIP;
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_USB_CFG, ocp_data);
+
+		sram_write(tp, 0x8257, 0x020f); /*  XG PLL */
+		sram_write(tp, 0x80ea, 0x7843); /* GIGA Master */
+
+		if (rtl_phy_patch_request(tp, true, true))
+			return;
+
+		/* Advance EEE */
+		ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4);
+		ocp_data |= EEE_SPDWN_EN;
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, ocp_data);
+
+		data = ocp_reg_read(tp, OCP_DOWN_SPEED);
+		data &= ~(EN_EEE_100 | EN_EEE_1000);
+		data |= EN_10M_CLKDIV;
+		ocp_reg_write(tp, OCP_DOWN_SPEED, data);
+		tp->ups_info._10m_ckdiv = true;
+		tp->ups_info.eee_plloff_100 = false;
+		tp->ups_info.eee_plloff_giga = false;
+
+		data = ocp_reg_read(tp, OCP_POWER_CFG);
+		data &= ~EEE_CLKDIV_EN;
+		ocp_reg_write(tp, OCP_POWER_CFG, data);
+		tp->ups_info.eee_ckdiv = false;
+
+		ocp_reg_write(tp, OCP_SYSCLK_CFG, 0);
+		ocp_reg_write(tp, OCP_SYSCLK_CFG, sysclk_div_expo(5));
+		tp->ups_info._250m_ckdiv = false;
+
+		rtl_phy_patch_request(tp, false, true);
+
+		/* enable ADC Ibias Cal */
+		data = ocp_reg_read(tp, 0xd068);
+		data |= BIT(13);
+		ocp_reg_write(tp, 0xd068, data);
+
+		/* enable Thermal Sensor */
+		data = sram_read(tp, 0x81a2);
+		data &= ~BIT(8);
+		sram_write(tp, 0x81a2, data);
+		data = ocp_reg_read(tp, 0xb54c);
+		data &= ~0xff00;
+		data |= 0xdb00;
+		ocp_reg_write(tp, 0xb54c, data);
+
+		/* Nway 2.5G Lite */
+		data = ocp_reg_read(tp, 0xa454);
+		data &= ~BIT(0);
+		ocp_reg_write(tp, 0xa454, data);
+
+		/* CS DSP solution */
+		data = ocp_reg_read(tp, OCP_10GBT_CTRL);
+		data |= RTL_ADV2_5G_F_R;
+		ocp_reg_write(tp, OCP_10GBT_CTRL, data);
+		data = ocp_reg_read(tp, 0xad4e);
+		data &= ~BIT(4);
+		ocp_reg_write(tp, 0xad4e, data);
+		data = ocp_reg_read(tp, 0xa86a);
+		data &= ~BIT(0);
+		ocp_reg_write(tp, 0xa86a, data);
+
+		/* MDI SWAP */
+		if ((ocp_read_word(tp, MCU_TYPE_USB, USB_UPS_CFG) & MID_REVERSE) &&
+		    (ocp_reg_read(tp, 0xd068) & BIT(1))) {
+			u16 swap_a, swap_b;
+
+			data = ocp_reg_read(tp, 0xd068);
+			data &= ~0x1f;
+			data |= 0x1; /* p0 */
+			ocp_reg_write(tp, 0xd068, data);
+			swap_a = ocp_reg_read(tp, 0xd06a);
+			data &= ~0x18;
+			data |= 0x18; /* p3 */
+			ocp_reg_write(tp, 0xd068, data);
+			swap_b = ocp_reg_read(tp, 0xd06a);
+			data &= ~0x18; /* p0 */
+			ocp_reg_write(tp, 0xd068, data);
+			ocp_reg_write(tp, 0xd06a,
+				      (swap_a & ~0x7ff) | (swap_b & 0x7ff));
+			data |= 0x18; /* p3 */
+			ocp_reg_write(tp, 0xd068, data);
+			ocp_reg_write(tp, 0xd06a,
+				      (swap_b & ~0x7ff) | (swap_a & 0x7ff));
+			data &= ~0x18;
+			data |= 0x08; /* p1 */
+			ocp_reg_write(tp, 0xd068, data);
+			swap_a = ocp_reg_read(tp, 0xd06a);
+			data &= ~0x18;
+			data |= 0x10; /* p2 */
+			ocp_reg_write(tp, 0xd068, data);
+			swap_b = ocp_reg_read(tp, 0xd06a);
+			data &= ~0x18;
+			data |= 0x08; /* p1 */
+			ocp_reg_write(tp, 0xd068, data);
+			ocp_reg_write(tp, 0xd06a,
+				      (swap_a & ~0x7ff) | (swap_b & 0x7ff));
+			data &= ~0x18;
+			data |= 0x10; /* p2 */
+			ocp_reg_write(tp, 0xd068, data);
+			ocp_reg_write(tp, 0xd06a,
+				      (swap_b & ~0x7ff) | (swap_a & 0x7ff));
+			swap_a = ocp_reg_read(tp, 0xbd5a);
+			swap_b = ocp_reg_read(tp, 0xbd5c);
+			ocp_reg_write(tp, 0xbd5a, (swap_a & ~0x1f1f) |
+				      ((swap_b & 0x1f) << 8) |
+				      ((swap_b >> 8) & 0x1f));
+			ocp_reg_write(tp, 0xbd5c, (swap_b & ~0x1f1f) |
+				      ((swap_a & 0x1f) << 8) |
+				      ((swap_a >> 8) & 0x1f));
+			swap_a = ocp_reg_read(tp, 0xbc18);
+			swap_b = ocp_reg_read(tp, 0xbc1a);
+			ocp_reg_write(tp, 0xbc18, (swap_a & ~0x1f1f) |
+				      ((swap_b & 0x1f) << 8) |
+				      ((swap_b >> 8) & 0x1f));
+			ocp_reg_write(tp, 0xbc1a, (swap_b & ~0x1f1f) |
+				      ((swap_a & 0x1f) << 8) |
+				      ((swap_a >> 8) & 0x1f));
+		}
+		break;
+	default:
+		break;
+	}
+
+	rtl_green_en(tp, test_bit(GREEN_ETHERNET, &tp->flags));
+
+	data = ocp_reg_read(tp, 0xa428);
+	data &= ~BIT(9);
+	ocp_reg_write(tp, 0xa428, data);
+	data = ocp_reg_read(tp, 0xa5ea);
+	data &= ~BIT(0);
+	ocp_reg_write(tp, 0xa5ea, data);
+	tp->ups_info.lite_mode = 0;
+
+	if (tp->eee_en)
+		rtl_eee_enable(tp, true);
+
+	r8153_aldps_en(tp, true);
+	r8152b_enable_fc(tp);
+	r8153_u2p3en(tp, true);
+
+	set_bit(PHY_RESET, &tp->flags);
+}
+
+static void r8156b_hw_phy_cfg(struct r8152 *tp)
+{
+	u32 ocp_data;
+	u16 data;
+
+	switch (tp->version) {
+	case RTL_VER_12:
+		ocp_reg_write(tp, 0xbf86, 0x9000);
+		data = ocp_reg_read(tp, 0xc402);
+		data |= BIT(10);
+		ocp_reg_write(tp, 0xc402, data);
+		data &= ~BIT(10);
+		ocp_reg_write(tp, 0xc402, data);
+		ocp_reg_write(tp, 0xbd86, 0x1010);
+		ocp_reg_write(tp, 0xbd88, 0x1010);
+		data = ocp_reg_read(tp, 0xbd4e);
+		data &= ~(BIT(10) | BIT(11));
+		data |= BIT(11);
+		ocp_reg_write(tp, 0xbd4e, data);
+		data = ocp_reg_read(tp, 0xbf46);
+		data &= ~0xf00;
+		data |= 0x700;
+		ocp_reg_write(tp, 0xbf46, data);
+		break;
+	case RTL_VER_13:
+	case RTL_VER_15:
+		r8156b_wait_loading_flash(tp);
+		break;
+	default:
+		break;
+	}
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0);
+	if (ocp_data & PCUT_STATUS) {
+		ocp_data &= ~PCUT_STATUS;
+		ocp_write_word(tp, MCU_TYPE_USB, USB_MISC_0, ocp_data);
+	}
+
+	data = r8153_phy_status(tp, 0);
+	switch (data) {
+	case PHY_STAT_EXT_INIT:
+		rtl8152_apply_firmware(tp, true);
+
+		data = ocp_reg_read(tp, 0xa466);
+		data &= ~BIT(0);
+		ocp_reg_write(tp, 0xa466, data);
+
+		data = ocp_reg_read(tp, 0xa468);
+		data &= ~(BIT(3) | BIT(1));
+		ocp_reg_write(tp, 0xa468, data);
+		break;
+	case PHY_STAT_LAN_ON:
+	case PHY_STAT_PWRDN:
+	default:
+		rtl8152_apply_firmware(tp, false);
+		break;
+	}
+
+	data = r8152_mdio_read(tp, MII_BMCR);
+	if (data & BMCR_PDOWN) {
+		data &= ~BMCR_PDOWN;
+		r8152_mdio_write(tp, MII_BMCR, data);
+	}
+
+	/* disable ALDPS before updating the PHY parameters */
+	r8153_aldps_en(tp, false);
+
+	/* disable EEE before updating the PHY parameters */
+	rtl_eee_enable(tp, false);
+
+	data = r8153_phy_status(tp, PHY_STAT_LAN_ON);
+	WARN_ON_ONCE(data != PHY_STAT_LAN_ON);
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_PHY_PWR);
+	ocp_data |= PFM_PWM_SWITCH;
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_PHY_PWR, ocp_data);
+
+	switch (tp->version) {
+	case RTL_VER_12:
+		data = ocp_reg_read(tp, 0xbc08);
+		data |= BIT(3) | BIT(2);
+		ocp_reg_write(tp, 0xbc08, data);
+
+		data = sram_read(tp, 0x8fff);
+		data &= ~0xff00;
+		data |= 0x0400;
+		sram_write(tp, 0x8fff, data);
+
+		data = ocp_reg_read(tp, 0xacda);
+		data |= 0xff00;
+		ocp_reg_write(tp, 0xacda, data);
+		data = ocp_reg_read(tp, 0xacde);
+		data |= 0xf000;
+		ocp_reg_write(tp, 0xacde, data);
+		ocp_reg_write(tp, 0xac8c, 0x0ffc);
+		ocp_reg_write(tp, 0xac46, 0xb7b4);
+		ocp_reg_write(tp, 0xac50, 0x0fbc);
+		ocp_reg_write(tp, 0xac3c, 0x9240);
+		ocp_reg_write(tp, 0xac4e, 0x0db4);
+		ocp_reg_write(tp, 0xacc6, 0x0707);
+		ocp_reg_write(tp, 0xacc8, 0xa0d3);
+		ocp_reg_write(tp, 0xad08, 0x0007);
+
+		ocp_reg_write(tp, 0xb87c, 0x8560);
+		ocp_reg_write(tp, 0xb87e, 0x19cc);
+		ocp_reg_write(tp, 0xb87c, 0x8562);
+		ocp_reg_write(tp, 0xb87e, 0x19cc);
+		ocp_reg_write(tp, 0xb87c, 0x8564);
+		ocp_reg_write(tp, 0xb87e, 0x19cc);
+		ocp_reg_write(tp, 0xb87c, 0x8566);
+		ocp_reg_write(tp, 0xb87e, 0x147d);
+		ocp_reg_write(tp, 0xb87c, 0x8568);
+		ocp_reg_write(tp, 0xb87e, 0x147d);
+		ocp_reg_write(tp, 0xb87c, 0x856a);
+		ocp_reg_write(tp, 0xb87e, 0x147d);
+		ocp_reg_write(tp, 0xb87c, 0x8ffe);
+		ocp_reg_write(tp, 0xb87e, 0x0907);
+		ocp_reg_write(tp, 0xb87c, 0x80d6);
+		ocp_reg_write(tp, 0xb87e, 0x2801);
+		ocp_reg_write(tp, 0xb87c, 0x80f2);
+		ocp_reg_write(tp, 0xb87e, 0x2801);
+		ocp_reg_write(tp, 0xb87c, 0x80f4);
+		ocp_reg_write(tp, 0xb87e, 0x6077);
+		ocp_reg_write(tp, 0xb506, 0x01e7);
+
+		ocp_reg_write(tp, 0xb87c, 0x8013);
+		ocp_reg_write(tp, 0xb87e, 0x0700);
+		ocp_reg_write(tp, 0xb87c, 0x8fb9);
+		ocp_reg_write(tp, 0xb87e, 0x2801);
+		ocp_reg_write(tp, 0xb87c, 0x8fba);
+		ocp_reg_write(tp, 0xb87e, 0x0100);
+		ocp_reg_write(tp, 0xb87c, 0x8fbc);
+		ocp_reg_write(tp, 0xb87e, 0x1900);
+		ocp_reg_write(tp, 0xb87c, 0x8fbe);
+		ocp_reg_write(tp, 0xb87e, 0xe100);
+		ocp_reg_write(tp, 0xb87c, 0x8fc0);
+		ocp_reg_write(tp, 0xb87e, 0x0800);
+		ocp_reg_write(tp, 0xb87c, 0x8fc2);
+		ocp_reg_write(tp, 0xb87e, 0xe500);
+		ocp_reg_write(tp, 0xb87c, 0x8fc4);
+		ocp_reg_write(tp, 0xb87e, 0x0f00);
+		ocp_reg_write(tp, 0xb87c, 0x8fc6);
+		ocp_reg_write(tp, 0xb87e, 0xf100);
+		ocp_reg_write(tp, 0xb87c, 0x8fc8);
+		ocp_reg_write(tp, 0xb87e, 0x0400);
+		ocp_reg_write(tp, 0xb87c, 0x8fca);
+		ocp_reg_write(tp, 0xb87e, 0xf300);
+		ocp_reg_write(tp, 0xb87c, 0x8fcc);
+		ocp_reg_write(tp, 0xb87e, 0xfd00);
+		ocp_reg_write(tp, 0xb87c, 0x8fce);
+		ocp_reg_write(tp, 0xb87e, 0xff00);
+		ocp_reg_write(tp, 0xb87c, 0x8fd0);
+		ocp_reg_write(tp, 0xb87e, 0xfb00);
+		ocp_reg_write(tp, 0xb87c, 0x8fd2);
+		ocp_reg_write(tp, 0xb87e, 0x0100);
+		ocp_reg_write(tp, 0xb87c, 0x8fd4);
+		ocp_reg_write(tp, 0xb87e, 0xf400);
+		ocp_reg_write(tp, 0xb87c, 0x8fd6);
+		ocp_reg_write(tp, 0xb87e, 0xff00);
+		ocp_reg_write(tp, 0xb87c, 0x8fd8);
+		ocp_reg_write(tp, 0xb87e, 0xf600);
+
+		ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_USB_CFG);
+		ocp_data |= EN_XG_LIP | EN_G_LIP;
+		ocp_write_byte(tp, MCU_TYPE_PLA, PLA_USB_CFG, ocp_data);
+		ocp_reg_write(tp, 0xb87c, 0x813d);
+		ocp_reg_write(tp, 0xb87e, 0x390e);
+		ocp_reg_write(tp, 0xb87c, 0x814f);
+		ocp_reg_write(tp, 0xb87e, 0x790e);
+		ocp_reg_write(tp, 0xb87c, 0x80b0);
+		ocp_reg_write(tp, 0xb87e, 0x0f31);
+		data = ocp_reg_read(tp, 0xbf4c);
+		data |= BIT(1);
+		ocp_reg_write(tp, 0xbf4c, data);
+		data = ocp_reg_read(tp, 0xbcca);
+		data |= BIT(9) | BIT(8);
+		ocp_reg_write(tp, 0xbcca, data);
+		ocp_reg_write(tp, 0xb87c, 0x8141);
+		ocp_reg_write(tp, 0xb87e, 0x320e);
+		ocp_reg_write(tp, 0xb87c, 0x8153);
+		ocp_reg_write(tp, 0xb87e, 0x720e);
+		ocp_reg_write(tp, 0xb87c, 0x8529);
+		ocp_reg_write(tp, 0xb87e, 0x050e);
+		data = ocp_reg_read(tp, OCP_EEE_CFG);
+		data &= ~CTAP_SHORT_EN;
+		ocp_reg_write(tp, OCP_EEE_CFG, data);
+
+		sram_write(tp, 0x816c, 0xc4a0);
+		sram_write(tp, 0x8170, 0xc4a0);
+		sram_write(tp, 0x8174, 0x04a0);
+		sram_write(tp, 0x8178, 0x04a0);
+		sram_write(tp, 0x817c, 0x0719);
+		sram_write(tp, 0x8ff4, 0x0400);
+		sram_write(tp, 0x8ff1, 0x0404);
+
+		ocp_reg_write(tp, 0xbf4a, 0x001b);
+		ocp_reg_write(tp, 0xb87c, 0x8033);
+		ocp_reg_write(tp, 0xb87e, 0x7c13);
+		ocp_reg_write(tp, 0xb87c, 0x8037);
+		ocp_reg_write(tp, 0xb87e, 0x7c13);
+		ocp_reg_write(tp, 0xb87c, 0x803b);
+		ocp_reg_write(tp, 0xb87e, 0xfc32);
+		ocp_reg_write(tp, 0xb87c, 0x803f);
+		ocp_reg_write(tp, 0xb87e, 0x7c13);
+		ocp_reg_write(tp, 0xb87c, 0x8043);
+		ocp_reg_write(tp, 0xb87e, 0x7c13);
+		ocp_reg_write(tp, 0xb87c, 0x8047);
+		ocp_reg_write(tp, 0xb87e, 0x7c13);
+
+		ocp_reg_write(tp, 0xb87c, 0x8145);
+		ocp_reg_write(tp, 0xb87e, 0x370e);
+		ocp_reg_write(tp, 0xb87c, 0x8157);
+		ocp_reg_write(tp, 0xb87e, 0x770e);
+		ocp_reg_write(tp, 0xb87c, 0x8169);
+		ocp_reg_write(tp, 0xb87e, 0x0d0a);
+		ocp_reg_write(tp, 0xb87c, 0x817b);
+		ocp_reg_write(tp, 0xb87e, 0x1d0a);
+
+		data = sram_read(tp, 0x8217);
+		data &= ~0xff00;
+		data |= 0x5000;
+		sram_write(tp, 0x8217, data);
+		data = sram_read(tp, 0x821a);
+		data &= ~0xff00;
+		data |= 0x5000;
+		sram_write(tp, 0x821a, data);
+		sram_write(tp, 0x80da, 0x0403);
+		data = sram_read(tp, 0x80dc);
+		data &= ~0xff00;
+		data |= 0x1000;
+		sram_write(tp, 0x80dc, data);
+		sram_write(tp, 0x80b3, 0x0384);
+		sram_write(tp, 0x80b7, 0x2007);
+		data = sram_read(tp, 0x80ba);
+		data &= ~0xff00;
+		data |= 0x6c00;
+		sram_write(tp, 0x80ba, data);
+		sram_write(tp, 0x80b5, 0xf009);
+		data = sram_read(tp, 0x80bd);
+		data &= ~0xff00;
+		data |= 0x9f00;
+		sram_write(tp, 0x80bd, data);
+		sram_write(tp, 0x80c7, 0xf083);
+		sram_write(tp, 0x80dd, 0x03f0);
+		data = sram_read(tp, 0x80df);
+		data &= ~0xff00;
+		data |= 0x1000;
+		sram_write(tp, 0x80df, data);
+		sram_write(tp, 0x80cb, 0x2007);
+		data = sram_read(tp, 0x80ce);
+		data &= ~0xff00;
+		data |= 0x6c00;
+		sram_write(tp, 0x80ce, data);
+		sram_write(tp, 0x80c9, 0x8009);
+		data = sram_read(tp, 0x80d1);
+		data &= ~0xff00;
+		data |= 0x8000;
+		sram_write(tp, 0x80d1, data);
+		sram_write(tp, 0x80a3, 0x200a);
+		sram_write(tp, 0x80a5, 0xf0ad);
+		sram_write(tp, 0x809f, 0x6073);
+		sram_write(tp, 0x80a1, 0x000b);
+		data = sram_read(tp, 0x80a9);
+		data &= ~0xff00;
+		data |= 0xc000;
+		sram_write(tp, 0x80a9, data);
+
+		if (rtl_phy_patch_request(tp, true, true))
+			return;
+
+		data = ocp_reg_read(tp, 0xb896);
+		data &= ~BIT(0);
+		ocp_reg_write(tp, 0xb896, data);
+		data = ocp_reg_read(tp, 0xb892);
+		data &= ~0xff00;
+		ocp_reg_write(tp, 0xb892, data);
+		ocp_reg_write(tp, 0xb88e, 0xc23e);
+		ocp_reg_write(tp, 0xb890, 0x0000);
+		ocp_reg_write(tp, 0xb88e, 0xc240);
+		ocp_reg_write(tp, 0xb890, 0x0103);
+		ocp_reg_write(tp, 0xb88e, 0xc242);
+		ocp_reg_write(tp, 0xb890, 0x0507);
+		ocp_reg_write(tp, 0xb88e, 0xc244);
+		ocp_reg_write(tp, 0xb890, 0x090b);
+		ocp_reg_write(tp, 0xb88e, 0xc246);
+		ocp_reg_write(tp, 0xb890, 0x0c0e);
+		ocp_reg_write(tp, 0xb88e, 0xc248);
+		ocp_reg_write(tp, 0xb890, 0x1012);
+		ocp_reg_write(tp, 0xb88e, 0xc24a);
+		ocp_reg_write(tp, 0xb890, 0x1416);
+		data = ocp_reg_read(tp, 0xb896);
+		data |= BIT(0);
+		ocp_reg_write(tp, 0xb896, data);
+
+		rtl_phy_patch_request(tp, false, true);
+
+		data = ocp_reg_read(tp, 0xa86a);
+		data |= BIT(0);
+		ocp_reg_write(tp, 0xa86a, data);
+		data = ocp_reg_read(tp, 0xa6f0);
+		data |= BIT(0);
+		ocp_reg_write(tp, 0xa6f0, data);
+
+		ocp_reg_write(tp, 0xbfa0, 0xd70d);
+		ocp_reg_write(tp, 0xbfa2, 0x4100);
+		ocp_reg_write(tp, 0xbfa4, 0xe868);
+		ocp_reg_write(tp, 0xbfa6, 0xdc59);
+		ocp_reg_write(tp, 0xb54c, 0x3c18);
+		data = ocp_reg_read(tp, 0xbfa4);
+		data &= ~BIT(5);
+		ocp_reg_write(tp, 0xbfa4, data);
+		data = sram_read(tp, 0x817d);
+		data |= BIT(12);
+		sram_write(tp, 0x817d, data);
+		break;
+	case RTL_VER_13:
+		/* 2.5G INRX */
+		data = ocp_reg_read(tp, 0xac46);
+		data &= ~0x00f0;
+		data |= 0x0090;
+		ocp_reg_write(tp, 0xac46, data);
+		data = ocp_reg_read(tp, 0xad30);
+		data &= ~0x0003;
+		data |= 0x0001;
+		ocp_reg_write(tp, 0xad30, data);
+		fallthrough;
+	case RTL_VER_15:
+		/* EEE parameter */
+		ocp_reg_write(tp, 0xb87c, 0x80f5);
+		ocp_reg_write(tp, 0xb87e, 0x760e);
+		ocp_reg_write(tp, 0xb87c, 0x8107);
+		ocp_reg_write(tp, 0xb87e, 0x360e);
+		ocp_reg_write(tp, 0xb87c, 0x8551);
+		data = ocp_reg_read(tp, 0xb87e);
+		data &= ~0xff00;
+		data |= 0x0800;
+		ocp_reg_write(tp, 0xb87e, data);
+
+		/* ADC_PGA parameter */
+		data = ocp_reg_read(tp, 0xbf00);
+		data &= ~0xe000;
+		data |= 0xa000;
+		ocp_reg_write(tp, 0xbf00, data);
+		data = ocp_reg_read(tp, 0xbf46);
+		data &= ~0x0f00;
+		data |= 0x0300;
+		ocp_reg_write(tp, 0xbf46, data);
+
+		/* Green Table-PGA, 1G full viterbi */
+		sram_write(tp, 0x8044, 0x2417);
+		sram_write(tp, 0x804a, 0x2417);
+		sram_write(tp, 0x8050, 0x2417);
+		sram_write(tp, 0x8056, 0x2417);
+		sram_write(tp, 0x805c, 0x2417);
+		sram_write(tp, 0x8062, 0x2417);
+		sram_write(tp, 0x8068, 0x2417);
+		sram_write(tp, 0x806e, 0x2417);
+		sram_write(tp, 0x8074, 0x2417);
+		sram_write(tp, 0x807a, 0x2417);
+
+		/* XG PLL */
+		data = ocp_reg_read(tp, 0xbf84);
+		data &= ~0xe000;
+		data |= 0xa000;
+		ocp_reg_write(tp, 0xbf84, data);
+		break;
+	default:
+		break;
+	}
+
+	if (rtl_phy_patch_request(tp, true, true))
+		return;
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4);
+	ocp_data |= EEE_SPDWN_EN;
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, ocp_data);
+
+	data = ocp_reg_read(tp, OCP_DOWN_SPEED);
+	data &= ~(EN_EEE_100 | EN_EEE_1000);
+	data |= EN_10M_CLKDIV;
+	ocp_reg_write(tp, OCP_DOWN_SPEED, data);
+	tp->ups_info._10m_ckdiv = true;
+	tp->ups_info.eee_plloff_100 = false;
+	tp->ups_info.eee_plloff_giga = false;
+
+	data = ocp_reg_read(tp, OCP_POWER_CFG);
+	data &= ~EEE_CLKDIV_EN;
+	ocp_reg_write(tp, OCP_POWER_CFG, data);
+	tp->ups_info.eee_ckdiv = false;
+
+	rtl_phy_patch_request(tp, false, true);
+
+	rtl_green_en(tp, test_bit(GREEN_ETHERNET, &tp->flags));
+
+	data = ocp_reg_read(tp, 0xa428);
+	data &= ~BIT(9);
+	ocp_reg_write(tp, 0xa428, data);
+	data = ocp_reg_read(tp, 0xa5ea);
+	data &= ~BIT(0);
+	ocp_reg_write(tp, 0xa5ea, data);
+	tp->ups_info.lite_mode = 0;
+
+	if (tp->eee_en)
+		rtl_eee_enable(tp, true);
+
+	r8153_aldps_en(tp, true);
+	r8152b_enable_fc(tp);
+	r8153_u2p3en(tp, true);
+
+	set_bit(PHY_RESET, &tp->flags);
+}
+
+static void r8156_init(struct r8152 *tp)
 {
 	u32 ocp_data;
 	u16 data;
@@ -5408,7 +7345,17 @@ static void r8153_init(struct r8152 *tp)
 	if (test_bit(RTL8152_UNPLUG, &tp->flags))
 		return;
 
-	r8153_u1u2en(tp, false);
+	ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_ECM_OP);
+	ocp_data &= ~EN_ALL_SPEED;
+	ocp_write_byte(tp, MCU_TYPE_USB, USB_ECM_OP, ocp_data);
+
+	ocp_write_word(tp, MCU_TYPE_USB, USB_SPEED_OPTION, 0);
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_ECM_OPTION);
+	ocp_data |= BYPASS_MAC_RESET;
+	ocp_write_word(tp, MCU_TYPE_USB, USB_ECM_OPTION, ocp_data);
+
+	r8153b_u1u2en(tp, false);
 
 	for (i = 0; i < 500; i++) {
 		if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) &
@@ -5417,14 +7364,15 @@ static void r8153_init(struct r8152 *tp)
 
 		msleep(20);
 		if (test_bit(RTL8152_UNPLUG, &tp->flags))
-			break;
+			return;
 	}
 
 	data = r8153_phy_status(tp, 0);
-
-	if (tp->version == RTL_VER_03 || tp->version == RTL_VER_04 ||
-	    tp->version == RTL_VER_05)
-		ocp_reg_write(tp, OCP_ADC_CFG, CKADSEL_L | ADC_EN | EN_EMI_L);
+	if (data == PHY_STAT_EXT_INIT) {
+		data = ocp_reg_read(tp, 0xa468);
+		data &= ~(BIT(3) | BIT(1));
+		ocp_reg_write(tp, 0xa468, data);
+	}
 
 	data = r8152_mdio_read(tp, MII_BMCR);
 	if (data & BMCR_PDOWN) {
@@ -5433,118 +7381,57 @@ static void r8153_init(struct r8152 *tp)
 	}
 
 	data = r8153_phy_status(tp, PHY_STAT_LAN_ON);
+	WARN_ON_ONCE(data != PHY_STAT_LAN_ON);
 
 	r8153_u2p3en(tp, false);
 
-	if (tp->version == RTL_VER_04) {
-		ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_SSPHYLINK2);
-		ocp_data &= ~pwd_dn_scale_mask;
-		ocp_data |= pwd_dn_scale(96);
-		ocp_write_word(tp, MCU_TYPE_USB, USB_SSPHYLINK2, ocp_data);
-
-		ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_USB2PHY);
-		ocp_data |= USB2PHY_L1 | USB2PHY_SUSPEND;
-		ocp_write_byte(tp, MCU_TYPE_USB, USB_USB2PHY, ocp_data);
-	} else if (tp->version == RTL_VER_05) {
-		ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_DMY_REG0);
-		ocp_data &= ~ECM_ALDPS;
-		ocp_write_byte(tp, MCU_TYPE_PLA, PLA_DMY_REG0, ocp_data);
+	/* MSC timer = 0xfff * 8ms = 32760 ms */
+	ocp_write_word(tp, MCU_TYPE_USB, USB_MSC_TIMER, 0x0fff);
 
-		ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_CSR_DUMMY1);
-		if (ocp_read_word(tp, MCU_TYPE_USB, USB_BURST_SIZE) == 0)
-			ocp_data &= ~DYNAMIC_BURST;
-		else
-			ocp_data |= DYNAMIC_BURST;
-		ocp_write_byte(tp, MCU_TYPE_USB, USB_CSR_DUMMY1, ocp_data);
-	} else if (tp->version == RTL_VER_06) {
-		ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_CSR_DUMMY1);
-		if (ocp_read_word(tp, MCU_TYPE_USB, USB_BURST_SIZE) == 0)
-			ocp_data &= ~DYNAMIC_BURST;
-		else
-			ocp_data |= DYNAMIC_BURST;
-		ocp_write_byte(tp, MCU_TYPE_USB, USB_CSR_DUMMY1, ocp_data);
+	/* U1/U2/L1 idle timer. 500 us */
+	ocp_write_word(tp, MCU_TYPE_USB, USB_U1U2_TIMER, 500);
 
-		r8153_queue_wake(tp, false);
+	r8153b_power_cut_en(tp, false);
+	r8156_ups_en(tp, false);
+	r8153_queue_wake(tp, false);
+	rtl_runtime_suspend_enable(tp, false);
 
-		ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS);
-		if (rtl8152_get_speed(tp) & LINK_STATUS)
-			ocp_data |= CUR_LINK_OK;
-		else
-			ocp_data &= ~CUR_LINK_OK;
-		ocp_data |= POLL_LINK_CHG;
-		ocp_write_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, ocp_data);
-	}
+	if (tp->udev->speed >= USB_SPEED_SUPER)
+		r8153b_u1u2en(tp, true);
 
-	ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_CSR_DUMMY2);
-	ocp_data |= EP4_FULL_FC;
-	ocp_write_byte(tp, MCU_TYPE_USB, USB_CSR_DUMMY2, ocp_data);
+	usb_enable_lpm(tp->udev);
 
-	ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_WDT11_CTRL);
-	ocp_data &= ~TIMER11_EN;
-	ocp_write_word(tp, MCU_TYPE_USB, USB_WDT11_CTRL, ocp_data);
+	r8156_mac_clk_spd(tp, true);
 
-	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_LED_FEATURE);
-	ocp_data &= ~LED_MODE_MASK;
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_LED_FEATURE, ocp_data);
+	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3);
+	ocp_data &= ~PLA_MCU_SPDWN_EN;
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, ocp_data);
 
-	ocp_data = FIFO_EMPTY_1FB | ROK_EXIT_LPM;
-	if (tp->version == RTL_VER_04 && tp->udev->speed < USB_SPEED_SUPER)
-		ocp_data |= LPM_TIMER_500MS;
+	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS);
+	if (rtl8152_get_speed(tp) & LINK_STATUS)
+		ocp_data |= CUR_LINK_OK;
 	else
-		ocp_data |= LPM_TIMER_500US;
-	ocp_write_byte(tp, MCU_TYPE_USB, USB_LPM_CTRL, ocp_data);
-
-	ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_AFE_CTRL2);
-	ocp_data &= ~SEN_VAL_MASK;
-	ocp_data |= SEN_VAL_NORMAL | SEL_RXIDLE;
-	ocp_write_word(tp, MCU_TYPE_USB, USB_AFE_CTRL2, ocp_data);
-
-	ocp_write_word(tp, MCU_TYPE_USB, USB_CONNECT_TIMER, 0x0001);
-
-	/* MAC clock speed down */
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, 0);
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, 0);
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, 0);
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, 0);
-
-	r8153_power_cut_en(tp, false);
-	rtl_runtime_suspend_enable(tp, false);
-	r8153_u1u2en(tp, true);
-	usb_enable_lpm(tp->udev);
-
-	ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CONFIG6);
-	ocp_data |= LANWAKE_CLR_EN;
-	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CONFIG6, ocp_data);
+		ocp_data &= ~CUR_LINK_OK;
+	ocp_data |= POLL_LINK_CHG;
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, ocp_data);
 
-	ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_LWAKE_CTRL_REG);
-	ocp_data &= ~LANWAKE_PIN;
-	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_LWAKE_CTRL_REG, ocp_data);
+	set_bit(GREEN_ETHERNET, &tp->flags);
 
 	/* rx aggregation */
 	ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL);
 	ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN);
-	if (test_bit(DELL_TB_RX_AGG_BUG, &tp->flags))
-		ocp_data |= RX_AGG_DISABLE;
-
 	ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data);
 
+	ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_BMU_CONFIG);
+	ocp_data |= ACT_ODMA;
+	ocp_write_byte(tp, MCU_TYPE_USB, USB_BMU_CONFIG, ocp_data);
+
 	rtl_tally_reset(tp);
 
-	switch (tp->udev->speed) {
-	case USB_SPEED_SUPER:
-	case USB_SPEED_SUPER_PLUS:
-		tp->coalesce = COALESCE_SUPER;
-		break;
-	case USB_SPEED_HIGH:
-		tp->coalesce = COALESCE_HIGH;
-		break;
-	default:
-		tp->coalesce = COALESCE_SLOW;
-		break;
-	}
+	tp->coalesce = 15000;	/* 15 us */
 }
 
-static void r8153b_init(struct r8152 *tp)
+static void r8156b_init(struct r8152 *tp)
 {
 	u32 ocp_data;
 	u16 data;
@@ -5553,8 +7440,31 @@ static void r8153b_init(struct r8152 *tp)
 	if (test_bit(RTL8152_UNPLUG, &tp->flags))
 		return;
 
+	ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_ECM_OP);
+	ocp_data &= ~EN_ALL_SPEED;
+	ocp_write_byte(tp, MCU_TYPE_USB, USB_ECM_OP, ocp_data);
+
+	ocp_write_word(tp, MCU_TYPE_USB, USB_SPEED_OPTION, 0);
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_ECM_OPTION);
+	ocp_data |= BYPASS_MAC_RESET;
+	ocp_write_word(tp, MCU_TYPE_USB, USB_ECM_OPTION, ocp_data);
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_U2P3_CTRL);
+	ocp_data |= RX_DETECT8;
+	ocp_write_word(tp, MCU_TYPE_USB, USB_U2P3_CTRL, ocp_data);
+
 	r8153b_u1u2en(tp, false);
 
+	switch (tp->version) {
+	case RTL_VER_13:
+	case RTL_VER_15:
+		r8156b_wait_loading_flash(tp);
+		break;
+	default:
+		break;
+	}
+
 	for (i = 0; i < 500; i++) {
 		if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) &
 		    AUTOLOAD_DONE)
@@ -5562,10 +7472,19 @@ static void r8153b_init(struct r8152 *tp)
 
 		msleep(20);
 		if (test_bit(RTL8152_UNPLUG, &tp->flags))
-			break;
+			return;
 	}
 
 	data = r8153_phy_status(tp, 0);
+	if (data == PHY_STAT_EXT_INIT) {
+		data = ocp_reg_read(tp, 0xa468);
+		data &= ~(BIT(3) | BIT(1));
+		ocp_reg_write(tp, 0xa468, data);
+
+		data = ocp_reg_read(tp, 0xa466);
+		data &= ~BIT(0);
+		ocp_reg_write(tp, 0xa466, data);
+	}
 
 	data = r8152_mdio_read(tp, MII_BMCR);
 	if (data & BMCR_PDOWN) {
@@ -5580,41 +7499,54 @@ static void r8153b_init(struct r8152 *tp)
 	/* MSC timer = 0xfff * 8ms = 32760 ms */
 	ocp_write_word(tp, MCU_TYPE_USB, USB_MSC_TIMER, 0x0fff);
 
+	/* U1/U2/L1 idle timer. 500 us */
+	ocp_write_word(tp, MCU_TYPE_USB, USB_U1U2_TIMER, 500);
+
 	r8153b_power_cut_en(tp, false);
-	r8153b_ups_en(tp, false);
+	r8156_ups_en(tp, false);
 	r8153_queue_wake(tp, false);
 	rtl_runtime_suspend_enable(tp, false);
 
-	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS);
-	if (rtl8152_get_speed(tp) & LINK_STATUS)
-		ocp_data |= CUR_LINK_OK;
-	else
-		ocp_data &= ~CUR_LINK_OK;
-	ocp_data |= POLL_LINK_CHG;
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, ocp_data);
-
 	if (tp->udev->speed >= USB_SPEED_SUPER)
 		r8153b_u1u2en(tp, true);
 
 	usb_enable_lpm(tp->udev);
 
-	/* MAC clock speed down */
-	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2);
-	ocp_data |= MAC_CLK_SPDWN_EN;
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, ocp_data);
+	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_RCR);
+	ocp_data &= ~SLOT_EN;
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CPCR);
+	ocp_data |= FLOW_CTRL_EN;
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_CPCR, ocp_data);
+
+	/* enable fc timer and set timer to 600 ms. */
+	ocp_write_word(tp, MCU_TYPE_USB, USB_FC_TIMER,
+		       CTRL_TIMER_EN | (600 / 8));
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_CTRL);
+	if (!(ocp_read_word(tp, MCU_TYPE_PLA, PLA_POL_GPIO_CTRL) & DACK_DET_EN))
+		ocp_data |= FLOW_CTRL_PATCH_2;
+	ocp_data &= ~AUTO_SPEEDUP;
+	ocp_write_word(tp, MCU_TYPE_USB, USB_FW_CTRL, ocp_data);
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_TASK);
+	ocp_data |= FC_PATCH_TASK;
+	ocp_write_word(tp, MCU_TYPE_USB, USB_FW_TASK, ocp_data);
+
+	r8156_mac_clk_spd(tp, true);
 
 	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3);
 	ocp_data &= ~PLA_MCU_SPDWN_EN;
 	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, ocp_data);
 
-	if (tp->version == RTL_VER_09) {
-		/* Disable Test IO for 32QFN */
-		if (ocp_read_byte(tp, MCU_TYPE_PLA, 0xdc00) & BIT(5)) {
-			ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_PHY_PWR);
-			ocp_data |= TEST_IO_OFF;
-			ocp_write_word(tp, MCU_TYPE_PLA, PLA_PHY_PWR, ocp_data);
-		}
-	}
+	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS);
+	if (rtl8152_get_speed(tp) & LINK_STATUS)
+		ocp_data |= CUR_LINK_OK;
+	else
+		ocp_data &= ~CUR_LINK_OK;
+	ocp_data |= POLL_LINK_CHG;
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, ocp_data);
 
 	set_bit(GREEN_ETHERNET, &tp->flags);
 
@@ -5991,6 +7923,22 @@ int rtl8152_get_link_ksettings(struct net_device *netdev,
 
 	mii_ethtool_get_link_ksettings(&tp->mii, cmd);
 
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+			 cmd->link_modes.supported, tp->support_2500full);
+
+	if (tp->support_2500full) {
+		linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+				 cmd->link_modes.advertising,
+				 ocp_reg_read(tp, OCP_10GBT_CTRL) & MDIO_AN_10GBT_CTRL_ADV2_5G);
+
+		linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+				 cmd->link_modes.lp_advertising,
+				 ocp_reg_read(tp, OCP_10GBT_STAT) & MDIO_AN_10GBT_STAT_LP2_5G);
+
+		if (is_speed_2500(rtl8152_get_speed(tp)))
+			cmd->base.speed = SPEED_2500;
+	}
+
 	mutex_unlock(&tp->control);
 
 	usb_autopm_put_interface(tp->intf);
@@ -6034,6 +7982,10 @@ static int rtl8152_set_link_ksettings(struct net_device *dev,
 		     cmd->link_modes.advertising))
 		advertising |= RTL_ADVERTISED_1000_FULL;
 
+	if (test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+		     cmd->link_modes.advertising))
+		advertising |= RTL_ADVERTISED_2500_FULL;
+
 	mutex_lock(&tp->control);
 
 	ret = rtl8152_set_speed(tp, cmd->base.autoneg, cmd->base.speed,
@@ -6623,6 +8575,67 @@ static int rtl_ops_init(struct r8152 *tp)
 		tp->eee_adv		= MDIO_EEE_1000T | MDIO_EEE_100TX;
 		break;
 
+	case RTL_VER_11:
+		tp->eee_en		= true;
+		tp->eee_adv		= MDIO_EEE_1000T | MDIO_EEE_100TX;
+		fallthrough;
+	case RTL_VER_10:
+		ops->init		= r8156_init;
+		ops->enable		= rtl8156_enable;
+		ops->disable		= rtl8153_disable;
+		ops->up			= rtl8156_up;
+		ops->down		= rtl8156_down;
+		ops->unload		= rtl8153_unload;
+		ops->eee_get		= r8153_get_eee;
+		ops->eee_set		= r8152_set_eee;
+		ops->in_nway		= rtl8153_in_nway;
+		ops->hw_phy_cfg		= r8156_hw_phy_cfg;
+		ops->autosuspend_en	= rtl8156_runtime_enable;
+		ops->change_mtu		= rtl8156_change_mtu;
+		tp->rx_buf_sz		= 48 * 1024;
+		tp->support_2500full	= 1;
+		break;
+
+	case RTL_VER_12:
+	case RTL_VER_13:
+		tp->support_2500full	= 1;
+		fallthrough;
+	case RTL_VER_15:
+		tp->eee_en		= true;
+		tp->eee_adv		= MDIO_EEE_1000T | MDIO_EEE_100TX;
+		ops->init		= r8156b_init;
+		ops->enable		= rtl8156b_enable;
+		ops->disable		= rtl8153_disable;
+		ops->up			= rtl8156_up;
+		ops->down		= rtl8156_down;
+		ops->unload		= rtl8153_unload;
+		ops->eee_get		= r8153_get_eee;
+		ops->eee_set		= r8152_set_eee;
+		ops->in_nway		= rtl8153_in_nway;
+		ops->hw_phy_cfg		= r8156b_hw_phy_cfg;
+		ops->autosuspend_en	= rtl8156_runtime_enable;
+		ops->change_mtu		= rtl8156_change_mtu;
+		tp->rx_buf_sz		= 48 * 1024;
+		break;
+
+	case RTL_VER_14:
+		ops->init		= r8153c_init;
+		ops->enable		= rtl8153_enable;
+		ops->disable		= rtl8153_disable;
+		ops->up			= rtl8153c_up;
+		ops->down		= rtl8153b_down;
+		ops->unload		= rtl8153_unload;
+		ops->eee_get		= r8153_get_eee;
+		ops->eee_set		= r8152_set_eee;
+		ops->in_nway		= rtl8153_in_nway;
+		ops->hw_phy_cfg		= r8153c_hw_phy_cfg;
+		ops->autosuspend_en	= rtl8153c_runtime_enable;
+		ops->change_mtu		= rtl8153c_change_mtu;
+		tp->rx_buf_sz		= 32 * 1024;
+		tp->eee_en		= true;
+		tp->eee_adv		= MDIO_EEE_1000T | MDIO_EEE_100TX;
+		break;
+
 	default:
 		ret = -ENODEV;
 		dev_err(&tp->intf->dev, "Unknown Device\n");
@@ -6636,11 +8649,13 @@ static int rtl_ops_init(struct r8152 *tp)
 #define FIRMWARE_8153A_3	"rtl_nic/rtl8153a-3.fw"
 #define FIRMWARE_8153A_4	"rtl_nic/rtl8153a-4.fw"
 #define FIRMWARE_8153B_2	"rtl_nic/rtl8153b-2.fw"
+#define FIRMWARE_8153C_1	"rtl_nic/rtl8153c-1.fw"
 
 MODULE_FIRMWARE(FIRMWARE_8153A_2);
 MODULE_FIRMWARE(FIRMWARE_8153A_3);
 MODULE_FIRMWARE(FIRMWARE_8153A_4);
 MODULE_FIRMWARE(FIRMWARE_8153B_2);
+MODULE_FIRMWARE(FIRMWARE_8153C_1);
 
 static int rtl_fw_init(struct r8152 *tp)
 {
@@ -6666,6 +8681,11 @@ static int rtl_fw_init(struct r8152 *tp)
 		rtl_fw->pre_fw		= r8153b_pre_firmware_1;
 		rtl_fw->post_fw		= r8153b_post_firmware_1;
 		break;
+	case RTL_VER_14:
+		rtl_fw->fw_name		= FIRMWARE_8153C_1;
+		rtl_fw->pre_fw		= r8153b_pre_firmware_1;
+		rtl_fw->post_fw		= r8153c_post_firmware_1;
+		break;
 	default:
 		break;
 	}
@@ -6721,6 +8741,27 @@ u8 rtl8152_get_version(struct usb_interface *intf)
 	case 0x6010:
 		version = RTL_VER_09;
 		break;
+	case 0x7010:
+		version = RTL_TEST_01;
+		break;
+	case 0x7020:
+		version = RTL_VER_10;
+		break;
+	case 0x7030:
+		version = RTL_VER_11;
+		break;
+	case 0x7400:
+		version = RTL_VER_12;
+		break;
+	case 0x7410:
+		version = RTL_VER_13;
+		break;
+	case 0x6400:
+		version = RTL_VER_14;
+		break;
+	case 0x7420:
+		version = RTL_VER_15;
+		break;
 	default:
 		version = RTL_VER_UNKNOWN;
 		dev_info(&intf->dev, "Unknown version 0x%04x\n", ocp_data);
@@ -6833,12 +8874,29 @@ static int rtl8152_probe(struct usb_interface *intf,
 	/* MTU range: 68 - 1500 or 9194 */
 	netdev->min_mtu = ETH_MIN_MTU;
 	switch (tp->version) {
+	case RTL_VER_03:
+	case RTL_VER_04:
+	case RTL_VER_05:
+	case RTL_VER_06:
+	case RTL_VER_08:
+	case RTL_VER_09:
+	case RTL_VER_14:
+		netdev->max_mtu = size_to_mtu(9 * 1024);
+		break;
+	case RTL_VER_10:
+	case RTL_VER_11:
+		netdev->max_mtu = size_to_mtu(15 * 1024);
+		break;
+	case RTL_VER_12:
+	case RTL_VER_13:
+	case RTL_VER_15:
+		netdev->max_mtu = size_to_mtu(16 * 1024);
+		break;
 	case RTL_VER_01:
 	case RTL_VER_02:
-		netdev->max_mtu = ETH_DATA_LEN;
-		break;
+	case RTL_VER_07:
 	default:
-		netdev->max_mtu = size_to_mtu(9 * 1024);
+		netdev->max_mtu = ETH_DATA_LEN;
 		break;
 	}
 
@@ -6854,7 +8912,13 @@ static int rtl8152_probe(struct usb_interface *intf,
 	tp->advertising = RTL_ADVERTISED_10_HALF | RTL_ADVERTISED_10_FULL |
 			  RTL_ADVERTISED_100_HALF | RTL_ADVERTISED_100_FULL;
 	if (tp->mii.supports_gmii) {
-		tp->speed = SPEED_1000;
+		if (tp->support_2500full &&
+		    tp->udev->speed >= USB_SPEED_SUPER) {
+			tp->speed = SPEED_2500;
+			tp->advertising |= RTL_ADVERTISED_2500_FULL;
+		} else {
+			tp->speed = SPEED_1000;
+		}
 		tp->advertising |= RTL_ADVERTISED_1000_FULL;
 	}
 	tp->duplex = DUPLEX_FULL;
@@ -6878,7 +8942,11 @@ static int rtl8152_probe(struct usb_interface *intf,
 	set_ethernet_addr(tp);
 
 	usb_set_intfdata(intf, tp);
-	netif_napi_add(netdev, &tp->napi, r8152_poll, RTL8152_NAPI_WEIGHT);
+
+	if (tp->support_2500full)
+		netif_napi_add(netdev, &tp->napi, r8152_poll, 256);
+	else
+		netif_napi_add(netdev, &tp->napi, r8152_poll, 64);
 
 	ret = register_netdev(netdev);
 	if (ret != 0) {
@@ -6914,7 +8982,8 @@ static void rtl8152_disconnect(struct usb_interface *intf)
 		unregister_netdev(tp->netdev);
 		tasklet_kill(&tp->tx_tl);
 		cancel_delayed_work_sync(&tp->hw_phy_work);
-		tp->rtl_ops.unload(tp);
+		if (tp->rtl_ops.unload)
+			tp->rtl_ops.unload(tp);
 		rtl8152_release_firmware(tp);
 		free_netdev(tp->netdev);
 	}
@@ -6934,13 +9003,28 @@ static void rtl8152_disconnect(struct usb_interface *intf)
 	.idProduct = (prod), \
 	.bInterfaceClass = USB_CLASS_COMM, \
 	.bInterfaceSubClass = USB_CDC_SUBCLASS_ETHERNET, \
+	.bInterfaceProtocol = USB_CDC_PROTO_NONE \
+}, \
+{ \
+	.match_flags = USB_DEVICE_ID_MATCH_INT_INFO | \
+		       USB_DEVICE_ID_MATCH_DEVICE, \
+	.idVendor = (vend), \
+	.idProduct = (prod), \
+	.bInterfaceClass = USB_CLASS_COMM, \
+	.bInterfaceSubClass = USB_CDC_SUBCLASS_NCM, \
 	.bInterfaceProtocol = USB_CDC_PROTO_NONE
 
 /* table of devices that work with this driver */
 static const struct usb_device_id rtl8152_table[] = {
+	/* Realtek */
 	{REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8050)},
+	{REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8053)},
 	{REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8152)},
 	{REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8153)},
+	{REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8155)},
+	{REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8156)},
+
+	/* Microsoft */
 	{REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07ab)},
 	{REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07c6)},
 	{REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x0927)},
-- 
cgit v1.2.3


From 4a51b0e8a0143b0e83d51d9c58c6416c3818a9f2 Mon Sep 17 00:00:00 2001
From: Hayes Wang <hayeswang@realtek.com>
Date: Fri, 16 Apr 2021 16:04:36 +0800
Subject: r8152: support PHY firmware for RTL8156 series

Support new firmware type and method for RTL8156 series.

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 563 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 561 insertions(+), 2 deletions(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 72b8ef0ad5a1..34c1ee61af01 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -974,8 +974,60 @@ enum rtl8152_fw_flags {
 	FW_FLAGS_START,
 	FW_FLAGS_STOP,
 	FW_FLAGS_NC,
+	FW_FLAGS_NC1,
+	FW_FLAGS_NC2,
+	FW_FLAGS_UC2,
+	FW_FLAGS_UC,
+	FW_FLAGS_SPEED_UP,
+	FW_FLAGS_VER,
 };
 
+enum rtl8152_fw_fixup_cmd {
+	FW_FIXUP_AND = 0,
+	FW_FIXUP_OR,
+	FW_FIXUP_NOT,
+	FW_FIXUP_XOR,
+};
+
+struct fw_phy_set {
+	__le16 addr;
+	__le16 data;
+} __packed;
+
+struct fw_phy_speed_up {
+	struct fw_block blk_hdr;
+	__le16 fw_offset;
+	__le16 version;
+	__le16 fw_reg;
+	__le16 reserved;
+	char info[];
+} __packed;
+
+struct fw_phy_ver {
+	struct fw_block blk_hdr;
+	struct fw_phy_set ver;
+	__le32 reserved;
+} __packed;
+
+struct fw_phy_fixup {
+	struct fw_block blk_hdr;
+	struct fw_phy_set setting;
+	__le16 bit_cmd;
+	__le16 reserved;
+} __packed;
+
+struct fw_phy_union {
+	struct fw_block blk_hdr;
+	__le16 fw_offset;
+	__le16 fw_reg;
+	struct fw_phy_set pre_set[2];
+	struct fw_phy_set bp[8];
+	struct fw_phy_set bp_en;
+	u8 pre_num;
+	u8 bp_num;
+	char info[];
+} __packed;
+
 /**
  * struct fw_mac - a firmware block used by RTL_FW_PLA and RTL_FW_USB.
  *	The layout of the firmware block is:
@@ -1080,6 +1132,15 @@ enum rtl_fw_type {
 	RTL_FW_PHY_START,
 	RTL_FW_PHY_STOP,
 	RTL_FW_PHY_NC,
+	RTL_FW_PHY_FIXUP,
+	RTL_FW_PHY_UNION_NC,
+	RTL_FW_PHY_UNION_NC1,
+	RTL_FW_PHY_UNION_NC2,
+	RTL_FW_PHY_UNION_UC2,
+	RTL_FW_PHY_UNION_UC,
+	RTL_FW_PHY_UNION_MISC,
+	RTL_FW_PHY_SPEED_UP,
+	RTL_FW_PHY_VER,
 };
 
 enum rtl_version {
@@ -3999,6 +4060,162 @@ static int rtl_post_ram_code(struct r8152 *tp, u16 key_addr, bool wait)
 	return 0;
 }
 
+static bool rtl8152_is_fw_phy_speed_up_ok(struct r8152 *tp, struct fw_phy_speed_up *phy)
+{
+	u16 fw_offset;
+	u32 length;
+	bool rc = false;
+
+	switch (tp->version) {
+	case RTL_VER_01:
+	case RTL_VER_02:
+	case RTL_VER_03:
+	case RTL_VER_04:
+	case RTL_VER_05:
+	case RTL_VER_06:
+	case RTL_VER_07:
+	case RTL_VER_08:
+	case RTL_VER_09:
+	case RTL_VER_10:
+	case RTL_VER_11:
+	case RTL_VER_12:
+	case RTL_VER_14:
+		goto out;
+	case RTL_VER_13:
+	case RTL_VER_15:
+	default:
+		break;
+	}
+
+	fw_offset = __le16_to_cpu(phy->fw_offset);
+	length = __le32_to_cpu(phy->blk_hdr.length);
+	if (fw_offset < sizeof(*phy) || length <= fw_offset) {
+		dev_err(&tp->intf->dev, "invalid fw_offset\n");
+		goto out;
+	}
+
+	length -= fw_offset;
+	if (length & 3) {
+		dev_err(&tp->intf->dev, "invalid block length\n");
+		goto out;
+	}
+
+	if (__le16_to_cpu(phy->fw_reg) != 0x9A00) {
+		dev_err(&tp->intf->dev, "invalid register to load firmware\n");
+		goto out;
+	}
+
+	rc = true;
+out:
+	return rc;
+}
+
+static bool rtl8152_is_fw_phy_ver_ok(struct r8152 *tp, struct fw_phy_ver *ver)
+{
+	bool rc = false;
+
+	switch (tp->version) {
+	case RTL_VER_10:
+	case RTL_VER_11:
+	case RTL_VER_12:
+	case RTL_VER_13:
+	case RTL_VER_15:
+		break;
+	default:
+		goto out;
+	}
+
+	if (__le32_to_cpu(ver->blk_hdr.length) != sizeof(*ver)) {
+		dev_err(&tp->intf->dev, "invalid block length\n");
+		goto out;
+	}
+
+	if (__le16_to_cpu(ver->ver.addr) != SRAM_GPHY_FW_VER) {
+		dev_err(&tp->intf->dev, "invalid phy ver addr\n");
+		goto out;
+	}
+
+	rc = true;
+out:
+	return rc;
+}
+
+static bool rtl8152_is_fw_phy_fixup_ok(struct r8152 *tp, struct fw_phy_fixup *fix)
+{
+	bool rc = false;
+
+	switch (tp->version) {
+	case RTL_VER_10:
+	case RTL_VER_11:
+	case RTL_VER_12:
+	case RTL_VER_13:
+	case RTL_VER_15:
+		break;
+	default:
+		goto out;
+	}
+
+	if (__le32_to_cpu(fix->blk_hdr.length) != sizeof(*fix)) {
+		dev_err(&tp->intf->dev, "invalid block length\n");
+		goto out;
+	}
+
+	if (__le16_to_cpu(fix->setting.addr) != OCP_PHY_PATCH_CMD ||
+	    __le16_to_cpu(fix->setting.data) != BIT(7)) {
+		dev_err(&tp->intf->dev, "invalid phy fixup\n");
+		goto out;
+	}
+
+	rc = true;
+out:
+	return rc;
+}
+
+static bool rtl8152_is_fw_phy_union_ok(struct r8152 *tp, struct fw_phy_union *phy)
+{
+	u16 fw_offset;
+	u32 length;
+	bool rc = false;
+
+	switch (tp->version) {
+	case RTL_VER_10:
+	case RTL_VER_11:
+	case RTL_VER_12:
+	case RTL_VER_13:
+	case RTL_VER_15:
+		break;
+	default:
+		goto out;
+	}
+
+	fw_offset = __le16_to_cpu(phy->fw_offset);
+	length = __le32_to_cpu(phy->blk_hdr.length);
+	if (fw_offset < sizeof(*phy) || length <= fw_offset) {
+		dev_err(&tp->intf->dev, "invalid fw_offset\n");
+		goto out;
+	}
+
+	length -= fw_offset;
+	if (length & 1) {
+		dev_err(&tp->intf->dev, "invalid block length\n");
+		goto out;
+	}
+
+	if (phy->pre_num > 2) {
+		dev_err(&tp->intf->dev, "invalid pre_num %d\n", phy->pre_num);
+		goto out;
+	}
+
+	if (phy->bp_num > 8) {
+		dev_err(&tp->intf->dev, "invalid bp_num %d\n", phy->bp_num);
+		goto out;
+	}
+
+	rc = true;
+out:
+	return rc;
+}
+
 static bool rtl8152_is_fw_phy_nc_ok(struct r8152 *tp, struct fw_phy_nc *phy)
 {
 	u32 length;
@@ -4319,6 +4536,10 @@ static long rtl8152_check_firmware(struct r8152 *tp, struct rtl_fw *rtl_fw)
 		case RTL_FW_PHY_START:
 			if (test_bit(FW_FLAGS_START, &fw_flags) ||
 			    test_bit(FW_FLAGS_NC, &fw_flags) ||
+			    test_bit(FW_FLAGS_NC1, &fw_flags) ||
+			    test_bit(FW_FLAGS_NC2, &fw_flags) ||
+			    test_bit(FW_FLAGS_UC2, &fw_flags) ||
+			    test_bit(FW_FLAGS_UC, &fw_flags) ||
 			    test_bit(FW_FLAGS_STOP, &fw_flags)) {
 				dev_err(&tp->intf->dev,
 					"check PHY_START fail\n");
@@ -4367,7 +4588,153 @@ static long rtl8152_check_firmware(struct r8152 *tp, struct rtl_fw *rtl_fw)
 				goto fail;
 			}
 			__set_bit(FW_FLAGS_NC, &fw_flags);
+			break;
+		case RTL_FW_PHY_UNION_NC:
+			if (!test_bit(FW_FLAGS_START, &fw_flags) ||
+			    test_bit(FW_FLAGS_NC1, &fw_flags) ||
+			    test_bit(FW_FLAGS_NC2, &fw_flags) ||
+			    test_bit(FW_FLAGS_UC2, &fw_flags) ||
+			    test_bit(FW_FLAGS_UC, &fw_flags) ||
+			    test_bit(FW_FLAGS_STOP, &fw_flags)) {
+				dev_err(&tp->intf->dev, "PHY_UNION_NC out of order\n");
+				goto fail;
+			}
+
+			if (test_bit(FW_FLAGS_NC, &fw_flags)) {
+				dev_err(&tp->intf->dev, "multiple PHY_UNION_NC encountered\n");
+				goto fail;
+			}
+
+			if (!rtl8152_is_fw_phy_union_ok(tp, (struct fw_phy_union *)block)) {
+				dev_err(&tp->intf->dev, "check PHY_UNION_NC failed\n");
+				goto fail;
+			}
+			__set_bit(FW_FLAGS_NC, &fw_flags);
+			break;
+		case RTL_FW_PHY_UNION_NC1:
+			if (!test_bit(FW_FLAGS_START, &fw_flags) ||
+			    test_bit(FW_FLAGS_NC2, &fw_flags) ||
+			    test_bit(FW_FLAGS_UC2, &fw_flags) ||
+			    test_bit(FW_FLAGS_UC, &fw_flags) ||
+			    test_bit(FW_FLAGS_STOP, &fw_flags)) {
+				dev_err(&tp->intf->dev, "PHY_UNION_NC1 out of order\n");
+				goto fail;
+			}
+
+			if (test_bit(FW_FLAGS_NC1, &fw_flags)) {
+				dev_err(&tp->intf->dev, "multiple PHY NC1 encountered\n");
+				goto fail;
+			}
+
+			if (!rtl8152_is_fw_phy_union_ok(tp, (struct fw_phy_union *)block)) {
+				dev_err(&tp->intf->dev, "check PHY_UNION_NC1 failed\n");
+				goto fail;
+			}
+			__set_bit(FW_FLAGS_NC1, &fw_flags);
+			break;
+		case RTL_FW_PHY_UNION_NC2:
+			if (!test_bit(FW_FLAGS_START, &fw_flags) ||
+			    test_bit(FW_FLAGS_UC2, &fw_flags) ||
+			    test_bit(FW_FLAGS_UC, &fw_flags) ||
+			    test_bit(FW_FLAGS_STOP, &fw_flags)) {
+				dev_err(&tp->intf->dev, "PHY_UNION_NC2 out of order\n");
+				goto fail;
+			}
+
+			if (test_bit(FW_FLAGS_NC2, &fw_flags)) {
+				dev_err(&tp->intf->dev, "multiple PHY NC2 encountered\n");
+				goto fail;
+			}
+
+			if (!rtl8152_is_fw_phy_union_ok(tp, (struct fw_phy_union *)block)) {
+				dev_err(&tp->intf->dev, "check PHY_UNION_NC2 failed\n");
+				goto fail;
+			}
+			__set_bit(FW_FLAGS_NC2, &fw_flags);
+			break;
+		case RTL_FW_PHY_UNION_UC2:
+			if (!test_bit(FW_FLAGS_START, &fw_flags) ||
+			    test_bit(FW_FLAGS_UC, &fw_flags) ||
+			    test_bit(FW_FLAGS_STOP, &fw_flags)) {
+				dev_err(&tp->intf->dev, "PHY_UNION_UC2 out of order\n");
+				goto fail;
+			}
+
+			if (test_bit(FW_FLAGS_UC2, &fw_flags)) {
+				dev_err(&tp->intf->dev, "multiple PHY UC2 encountered\n");
+				goto fail;
+			}
+
+			if (!rtl8152_is_fw_phy_union_ok(tp, (struct fw_phy_union *)block)) {
+				dev_err(&tp->intf->dev, "check PHY_UNION_UC2 failed\n");
+				goto fail;
+			}
+			__set_bit(FW_FLAGS_UC2, &fw_flags);
+			break;
+		case RTL_FW_PHY_UNION_UC:
+			if (!test_bit(FW_FLAGS_START, &fw_flags) ||
+			    test_bit(FW_FLAGS_STOP, &fw_flags)) {
+				dev_err(&tp->intf->dev, "PHY_UNION_UC out of order\n");
+				goto fail;
+			}
+
+			if (test_bit(FW_FLAGS_UC, &fw_flags)) {
+				dev_err(&tp->intf->dev, "multiple PHY UC encountered\n");
+				goto fail;
+			}
+
+			if (!rtl8152_is_fw_phy_union_ok(tp, (struct fw_phy_union *)block)) {
+				dev_err(&tp->intf->dev, "check PHY_UNION_UC failed\n");
+				goto fail;
+			}
+			__set_bit(FW_FLAGS_UC, &fw_flags);
+			break;
+		case RTL_FW_PHY_UNION_MISC:
+			if (!rtl8152_is_fw_phy_union_ok(tp, (struct fw_phy_union *)block)) {
+				dev_err(&tp->intf->dev, "check RTL_FW_PHY_UNION_MISC failed\n");
+				goto fail;
+			}
+			break;
+		case RTL_FW_PHY_FIXUP:
+			if (!rtl8152_is_fw_phy_fixup_ok(tp, (struct fw_phy_fixup *)block)) {
+				dev_err(&tp->intf->dev, "check PHY fixup failed\n");
+				goto fail;
+			}
+			break;
+		case RTL_FW_PHY_SPEED_UP:
+			if (test_bit(FW_FLAGS_SPEED_UP, &fw_flags)) {
+				dev_err(&tp->intf->dev, "multiple PHY firmware encountered");
+				goto fail;
+			}
 
+			if (!rtl8152_is_fw_phy_speed_up_ok(tp, (struct fw_phy_speed_up *)block)) {
+				dev_err(&tp->intf->dev, "check PHY speed up failed\n");
+				goto fail;
+			}
+			__set_bit(FW_FLAGS_SPEED_UP, &fw_flags);
+			break;
+		case RTL_FW_PHY_VER:
+			if (test_bit(FW_FLAGS_START, &fw_flags) ||
+			    test_bit(FW_FLAGS_NC, &fw_flags) ||
+			    test_bit(FW_FLAGS_NC1, &fw_flags) ||
+			    test_bit(FW_FLAGS_NC2, &fw_flags) ||
+			    test_bit(FW_FLAGS_UC2, &fw_flags) ||
+			    test_bit(FW_FLAGS_UC, &fw_flags) ||
+			    test_bit(FW_FLAGS_STOP, &fw_flags)) {
+				dev_err(&tp->intf->dev, "Invalid order to set PHY version\n");
+				goto fail;
+			}
+
+			if (test_bit(FW_FLAGS_VER, &fw_flags)) {
+				dev_err(&tp->intf->dev, "multiple PHY version encountered");
+				goto fail;
+			}
+
+			if (!rtl8152_is_fw_phy_ver_ok(tp, (struct fw_phy_ver *)block)) {
+				dev_err(&tp->intf->dev, "check PHY version failed\n");
+				goto fail;
+			}
+			__set_bit(FW_FLAGS_VER, &fw_flags);
 			break;
 		default:
 			dev_warn(&tp->intf->dev, "Unknown type %u is found\n",
@@ -4390,6 +4757,143 @@ fail:
 	return ret;
 }
 
+static void rtl_ram_code_speed_up(struct r8152 *tp, struct fw_phy_speed_up *phy, bool wait)
+{
+	u32 len;
+	u8 *data;
+
+	if (sram_read(tp, SRAM_GPHY_FW_VER) >= __le16_to_cpu(phy->version)) {
+		dev_dbg(&tp->intf->dev, "PHY firmware has been the newest\n");
+		return;
+	}
+
+	len = __le32_to_cpu(phy->blk_hdr.length);
+	len -= __le16_to_cpu(phy->fw_offset);
+	data = (u8 *)phy + __le16_to_cpu(phy->fw_offset);
+
+	if (rtl_phy_patch_request(tp, true, wait))
+		return;
+
+	while (len) {
+		u32 ocp_data, size;
+		int i;
+
+		if (len < 2048)
+			size = len;
+		else
+			size = 2048;
+
+		ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_GPHY_CTRL);
+		ocp_data |= GPHY_PATCH_DONE | BACKUP_RESTRORE;
+		ocp_write_word(tp, MCU_TYPE_USB, USB_GPHY_CTRL, ocp_data);
+
+		generic_ocp_write(tp, __le16_to_cpu(phy->fw_reg), 0xff, size, data, MCU_TYPE_USB);
+
+		data += size;
+		len -= size;
+
+		ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_POL_GPIO_CTRL);
+		ocp_data |= POL_GPHY_PATCH;
+		ocp_write_word(tp, MCU_TYPE_PLA, PLA_POL_GPIO_CTRL, ocp_data);
+
+		for (i = 0; i < 1000; i++) {
+			if (!(ocp_read_word(tp, MCU_TYPE_PLA, PLA_POL_GPIO_CTRL) & POL_GPHY_PATCH))
+				break;
+		}
+
+		if (i == 1000) {
+			dev_err(&tp->intf->dev, "ram code speedup mode timeout\n");
+			return;
+		}
+	}
+
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_OCP_GPHY_BASE, tp->ocp_base);
+	rtl_phy_patch_request(tp, false, wait);
+
+	if (sram_read(tp, SRAM_GPHY_FW_VER) == __le16_to_cpu(phy->version))
+		dev_dbg(&tp->intf->dev, "successfully applied %s\n", phy->info);
+	else
+		dev_err(&tp->intf->dev, "ram code speedup mode fail\n");
+}
+
+static int rtl8152_fw_phy_ver(struct r8152 *tp, struct fw_phy_ver *phy_ver)
+{
+	u16 ver_addr, ver;
+
+	ver_addr = __le16_to_cpu(phy_ver->ver.addr);
+	ver = __le16_to_cpu(phy_ver->ver.data);
+
+	if (sram_read(tp, ver_addr) >= ver) {
+		dev_dbg(&tp->intf->dev, "PHY firmware has been the newest\n");
+		return 0;
+	}
+
+	sram_write(tp, ver_addr, ver);
+
+	dev_dbg(&tp->intf->dev, "PHY firmware version %x\n", ver);
+
+	return ver;
+}
+
+static void rtl8152_fw_phy_fixup(struct r8152 *tp, struct fw_phy_fixup *fix)
+{
+	u16 addr, data;
+
+	addr = __le16_to_cpu(fix->setting.addr);
+	data = ocp_reg_read(tp, addr);
+
+	switch (__le16_to_cpu(fix->bit_cmd)) {
+	case FW_FIXUP_AND:
+		data &= __le16_to_cpu(fix->setting.data);
+		break;
+	case FW_FIXUP_OR:
+		data |= __le16_to_cpu(fix->setting.data);
+		break;
+	case FW_FIXUP_NOT:
+		data &= ~__le16_to_cpu(fix->setting.data);
+		break;
+	case FW_FIXUP_XOR:
+		data ^= __le16_to_cpu(fix->setting.data);
+		break;
+	default:
+		return;
+	}
+
+	ocp_reg_write(tp, addr, data);
+
+	dev_dbg(&tp->intf->dev, "applied ocp %x %x\n", addr, data);
+}
+
+static void rtl8152_fw_phy_union_apply(struct r8152 *tp, struct fw_phy_union *phy)
+{
+	__le16 *data;
+	u32 length;
+	int i, num;
+
+	num = phy->pre_num;
+	for (i = 0; i < num; i++)
+		sram_write(tp, __le16_to_cpu(phy->pre_set[i].addr),
+			   __le16_to_cpu(phy->pre_set[i].data));
+
+	length = __le32_to_cpu(phy->blk_hdr.length);
+	length -= __le16_to_cpu(phy->fw_offset);
+	num = length / 2;
+	data = (__le16 *)((u8 *)phy + __le16_to_cpu(phy->fw_offset));
+
+	ocp_reg_write(tp, OCP_SRAM_ADDR, __le16_to_cpu(phy->fw_reg));
+	for (i = 0; i < num; i++)
+		ocp_reg_write(tp, OCP_SRAM_DATA, __le16_to_cpu(data[i]));
+
+	num = phy->bp_num;
+	for (i = 0; i < num; i++)
+		sram_write(tp, __le16_to_cpu(phy->bp[i].addr), __le16_to_cpu(phy->bp[i].data));
+
+	if (phy->bp_num && phy->bp_en.addr)
+		sram_write(tp, __le16_to_cpu(phy->bp_en.addr), __le16_to_cpu(phy->bp_en.data));
+
+	dev_dbg(&tp->intf->dev, "successfully applied %s\n", phy->info);
+}
+
 static void rtl8152_fw_phy_nc_apply(struct r8152 *tp, struct fw_phy_nc *phy)
 {
 	u16 mode_reg, bp_index;
@@ -4443,6 +4947,12 @@ static void rtl8152_fw_mac_apply(struct r8152 *tp, struct fw_mac *mac)
 		return;
 	}
 
+	fw_ver_reg = __le16_to_cpu(mac->fw_ver_reg);
+	if (fw_ver_reg && ocp_read_byte(tp, MCU_TYPE_USB, fw_ver_reg) >= mac->fw_ver_data) {
+		dev_dbg(&tp->intf->dev, "%s firmware has been the newest\n", type ? "PLA" : "USB");
+		return;
+	}
+
 	rtl_clear_bp(tp, type);
 
 	/* Enable backup/restore of MACDBG. This is required after clearing PLA
@@ -4478,7 +4988,6 @@ static void rtl8152_fw_mac_apply(struct r8152 *tp, struct fw_mac *mac)
 		ocp_write_word(tp, type, bp_en_addr,
 			       __le16_to_cpu(mac->bp_en_value));
 
-	fw_ver_reg = __le16_to_cpu(mac->fw_ver_reg);
 	if (fw_ver_reg)
 		ocp_write_byte(tp, MCU_TYPE_USB, fw_ver_reg,
 			       mac->fw_ver_data);
@@ -4493,7 +5002,7 @@ static void rtl8152_apply_firmware(struct r8152 *tp, bool power_cut)
 	struct fw_header *fw_hdr;
 	struct fw_phy_patch_key *key;
 	u16 key_addr = 0;
-	int i;
+	int i, patch_phy = 1;
 
 	if (IS_ERR_OR_NULL(rtl_fw->fw))
 		return;
@@ -4515,17 +5024,40 @@ static void rtl8152_apply_firmware(struct r8152 *tp, bool power_cut)
 			rtl8152_fw_mac_apply(tp, (struct fw_mac *)block);
 			break;
 		case RTL_FW_PHY_START:
+			if (!patch_phy)
+				break;
 			key = (struct fw_phy_patch_key *)block;
 			key_addr = __le16_to_cpu(key->key_reg);
 			rtl_pre_ram_code(tp, key_addr, __le16_to_cpu(key->key_data), !power_cut);
 			break;
 		case RTL_FW_PHY_STOP:
+			if (!patch_phy)
+				break;
 			WARN_ON(!key_addr);
 			rtl_post_ram_code(tp, key_addr, !power_cut);
 			break;
 		case RTL_FW_PHY_NC:
 			rtl8152_fw_phy_nc_apply(tp, (struct fw_phy_nc *)block);
 			break;
+		case RTL_FW_PHY_VER:
+			patch_phy = rtl8152_fw_phy_ver(tp, (struct fw_phy_ver *)block);
+			break;
+		case RTL_FW_PHY_UNION_NC:
+		case RTL_FW_PHY_UNION_NC1:
+		case RTL_FW_PHY_UNION_NC2:
+		case RTL_FW_PHY_UNION_UC2:
+		case RTL_FW_PHY_UNION_UC:
+		case RTL_FW_PHY_UNION_MISC:
+			if (patch_phy)
+				rtl8152_fw_phy_union_apply(tp, (struct fw_phy_union *)block);
+			break;
+		case RTL_FW_PHY_FIXUP:
+			if (patch_phy)
+				rtl8152_fw_phy_fixup(tp, (struct fw_phy_fixup *)block);
+			break;
+		case RTL_FW_PHY_SPEED_UP:
+			rtl_ram_code_speed_up(tp, (struct fw_phy_speed_up *)block, !power_cut);
+			break;
 		default:
 			break;
 		}
@@ -5033,6 +5565,21 @@ static int r8153c_post_firmware_1(struct r8152 *tp)
 	return 0;
 }
 
+static int r8156a_post_firmware_1(struct r8152 *tp)
+{
+	u32 ocp_data;
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN1);
+	ocp_data |= FW_IP_RESET_EN;
+	ocp_write_word(tp, MCU_TYPE_USB, USB_FW_FIX_EN1, ocp_data);
+
+	/* Modify U3PHY parameter for compatibility issue */
+	ocp_write_dword(tp, MCU_TYPE_USB, USB_UPHY3_MDCMDIO, 0x4026840e);
+	ocp_write_dword(tp, MCU_TYPE_USB, USB_UPHY3_MDCMDIO, 0x4001acc9);
+
+	return 0;
+}
+
 static void r8153_aldps_en(struct r8152 *tp, bool enable)
 {
 	u16 data;
@@ -8650,12 +9197,16 @@ static int rtl_ops_init(struct r8152 *tp)
 #define FIRMWARE_8153A_4	"rtl_nic/rtl8153a-4.fw"
 #define FIRMWARE_8153B_2	"rtl_nic/rtl8153b-2.fw"
 #define FIRMWARE_8153C_1	"rtl_nic/rtl8153c-1.fw"
+#define FIRMWARE_8156A_2	"rtl_nic/rtl8156a-2.fw"
+#define FIRMWARE_8156B_2	"rtl_nic/rtl8156b-2.fw"
 
 MODULE_FIRMWARE(FIRMWARE_8153A_2);
 MODULE_FIRMWARE(FIRMWARE_8153A_3);
 MODULE_FIRMWARE(FIRMWARE_8153A_4);
 MODULE_FIRMWARE(FIRMWARE_8153B_2);
 MODULE_FIRMWARE(FIRMWARE_8153C_1);
+MODULE_FIRMWARE(FIRMWARE_8156A_2);
+MODULE_FIRMWARE(FIRMWARE_8156B_2);
 
 static int rtl_fw_init(struct r8152 *tp)
 {
@@ -8681,6 +9232,14 @@ static int rtl_fw_init(struct r8152 *tp)
 		rtl_fw->pre_fw		= r8153b_pre_firmware_1;
 		rtl_fw->post_fw		= r8153b_post_firmware_1;
 		break;
+	case RTL_VER_11:
+		rtl_fw->fw_name		= FIRMWARE_8156A_2;
+		rtl_fw->post_fw		= r8156a_post_firmware_1;
+		break;
+	case RTL_VER_13:
+	case RTL_VER_15:
+		rtl_fw->fw_name		= FIRMWARE_8156B_2;
+		break;
 	case RTL_VER_14:
 		rtl_fw->fw_name		= FIRMWARE_8153C_1;
 		rtl_fw->pre_fw		= r8153b_pre_firmware_1;
-- 
cgit v1.2.3


From c2198943e33b100ed21dfb636c8fa6baef841e9d Mon Sep 17 00:00:00 2001
From: Hayes Wang <hayeswang@realtek.com>
Date: Fri, 16 Apr 2021 16:04:37 +0800
Subject: r8152: search the configuration of vendor mode

The vendor mode is not always at config #1, so it is necessary to
set the correct configuration number.

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 39 +++++++++++++++++++++++++++++++++++----
 1 file changed, 35 insertions(+), 4 deletions(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 34c1ee61af01..9119a860e9bd 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -29,7 +29,7 @@
 #include <linux/usb/r8152.h>
 
 /* Information for net-next */
-#define NETNEXT_VERSION		"11"
+#define NETNEXT_VERSION		"12"
 
 /* Information for net */
 #define NET_VERSION		"11"
@@ -8107,6 +8107,39 @@ static void r8156b_init(struct r8152 *tp)
 	tp->coalesce = 15000;	/* 15 us */
 }
 
+static bool rtl_vendor_mode(struct usb_interface *intf)
+{
+	struct usb_host_interface *alt = intf->cur_altsetting;
+	struct usb_device *udev;
+	struct usb_host_config *c;
+	int i, num_configs;
+
+	if (alt->desc.bInterfaceClass == USB_CLASS_VENDOR_SPEC)
+		return true;
+
+	/* The vendor mode is not always config #1, so to find it out. */
+	udev = interface_to_usbdev(intf);
+	c = udev->config;
+	num_configs = udev->descriptor.bNumConfigurations;
+	for (i = 0; i < num_configs; (i++, c++)) {
+		struct usb_interface_descriptor	*desc = NULL;
+
+		if (c->desc.bNumInterfaces > 0)
+			desc = &c->intf_cache[0]->altsetting->desc;
+		else
+			continue;
+
+		if (desc->bInterfaceClass == USB_CLASS_VENDOR_SPEC) {
+			usb_driver_set_configuration(udev, c->desc.bConfigurationValue);
+			break;
+		}
+	}
+
+	WARN_ON_ONCE(i == num_configs);
+
+	return false;
+}
+
 static int rtl8152_pre_reset(struct usb_interface *intf)
 {
 	struct r8152 *tp = usb_get_intfdata(intf);
@@ -9345,10 +9378,8 @@ static int rtl8152_probe(struct usb_interface *intf,
 	if (version == RTL_VER_UNKNOWN)
 		return -ENODEV;
 
-	if (udev->actconfig->desc.bConfigurationValue != 1) {
-		usb_driver_set_configuration(udev, 1);
+	if (!rtl_vendor_mode(intf))
 		return -ENODEV;
-	}
 
 	if (intf->cur_altsetting->desc.bNumEndpoints < 3)
 		return -ENODEV;
-- 
cgit v1.2.3


From 4ad29b1a484e0c58acfffdcd87172ed17f35c1dd Mon Sep 17 00:00:00 2001
From: Stefan Chulski <stefanc@marvell.com>
Date: Fri, 16 Apr 2021 11:15:17 +0300
Subject: net: mvpp2: Add parsing support for different IPv4 IHL values

Add parser entries for different IPv4 IHL values.
Each entry will set the L4 header offset according to the IPv4 IHL field.
L3 header offset will set during the parsing of the IPv4 protocol.

Because of missed parser support for IP header length > 20, RX IPv4 checksum HW offload fails
and skb->ip_summed set to CHECKSUM_NONE(checksum done by Network stack).
This patch adds RX IPv4 checksum HW offload capability for frames with IP header length > 20.

v1 --> v2
- Improve commit message.

Suggested-by: Dana Vardi <danat@marvell.com>
Signed-off-by: Stefan Chulski <stefanc@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c | 107 ++++++++++---------------
 drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h |   3 +-
 2 files changed, 43 insertions(+), 67 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
index 4812cdb4609e..7cc7d72d761e 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
@@ -918,9 +918,8 @@ static int mvpp2_prs_ip4_proto(struct mvpp2 *priv, unsigned short proto,
 	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_FLOWS);
 	mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_GEN_BIT, 1);
 
-	/* Set L4 offset */
-	mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L4,
-				  sizeof(struct iphdr) - 4,
+	/* Set L3 offset */
+	mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3, -4,
 				  MVPP2_PRS_SRAM_OP_SEL_UDF_ADD);
 	mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_IPV4_DIP_AI_BIT);
 	mvpp2_prs_sram_ri_update(&pe, ri, ri_mask | MVPP2_PRS_RI_IP_FRAG_MASK);
@@ -1335,7 +1334,7 @@ static void mvpp2_prs_vid_init(struct mvpp2 *priv)
 static int mvpp2_prs_etype_init(struct mvpp2 *priv)
 {
 	struct mvpp2_prs_entry pe;
-	int tid;
+	int tid, ihl;
 
 	/* Ethertype: PPPoE */
 	tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
@@ -1427,67 +1426,43 @@ static int mvpp2_prs_etype_init(struct mvpp2 *priv)
 				MVPP2_PRS_RI_UDF3_MASK);
 	mvpp2_prs_hw_write(priv, &pe);
 
-	/* Ethertype: IPv4 without options */
-	tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
-					MVPP2_PE_LAST_FREE_TID);
-	if (tid < 0)
-		return tid;
-
-	memset(&pe, 0, sizeof(pe));
-	mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_L2);
-	pe.index = tid;
-
-	mvpp2_prs_match_etype(&pe, 0, ETH_P_IP);
-	mvpp2_prs_tcam_data_byte_set(&pe, MVPP2_ETH_TYPE_LEN,
-				     MVPP2_PRS_IPV4_HEAD | MVPP2_PRS_IPV4_IHL,
-				     MVPP2_PRS_IPV4_HEAD_MASK |
-				     MVPP2_PRS_IPV4_IHL_MASK);
-
-	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP4);
-	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4,
-				 MVPP2_PRS_RI_L3_PROTO_MASK);
-	/* goto ipv4 dest-address (skip eth_type + IP-header-size - 4) */
-	mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN +
-				 sizeof(struct iphdr) - 4,
-				 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
-	/* Set L3 offset */
-	mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3,
-				  MVPP2_ETH_TYPE_LEN,
-				  MVPP2_PRS_SRAM_OP_SEL_UDF_ADD);
-
-	/* Update shadow table and hw entry */
-	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_L2);
-	priv->prs_shadow[pe.index].udf = MVPP2_PRS_UDF_L2_DEF;
-	priv->prs_shadow[pe.index].finish = false;
-	mvpp2_prs_shadow_ri_set(priv, pe.index, MVPP2_PRS_RI_L3_IP4,
-				MVPP2_PRS_RI_L3_PROTO_MASK);
-	mvpp2_prs_hw_write(priv, &pe);
-
-	/* Ethertype: IPv4 with options */
-	tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
-					MVPP2_PE_LAST_FREE_TID);
-	if (tid < 0)
-		return tid;
-
-	pe.index = tid;
+	/* Ethertype: IPv4 with header length >= 5 */
+	for (ihl = MVPP2_PRS_IPV4_IHL_MIN; ihl <= MVPP2_PRS_IPV4_IHL_MAX; ihl++) {
+		tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
+						MVPP2_PE_LAST_FREE_TID);
+		if (tid < 0)
+			return tid;
 
-	mvpp2_prs_tcam_data_byte_set(&pe, MVPP2_ETH_TYPE_LEN,
-				     MVPP2_PRS_IPV4_HEAD,
-				     MVPP2_PRS_IPV4_HEAD_MASK);
+		memset(&pe, 0, sizeof(pe));
+		mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_L2);
+		pe.index = tid;
 
-	/* Clear ri before updating */
-	pe.sram[MVPP2_PRS_SRAM_RI_WORD] = 0x0;
-	pe.sram[MVPP2_PRS_SRAM_RI_CTRL_WORD] = 0x0;
-	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4_OPT,
-				 MVPP2_PRS_RI_L3_PROTO_MASK);
+		mvpp2_prs_match_etype(&pe, 0, ETH_P_IP);
+		mvpp2_prs_tcam_data_byte_set(&pe, MVPP2_ETH_TYPE_LEN,
+					     MVPP2_PRS_IPV4_HEAD | ihl,
+					     MVPP2_PRS_IPV4_HEAD_MASK |
+					     MVPP2_PRS_IPV4_IHL_MASK);
+
+		mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP4);
+		mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4,
+					 MVPP2_PRS_RI_L3_PROTO_MASK);
+		/* goto ipv4 dst-address (skip eth_type + IP-header-size - 4) */
+		mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN +
+					 sizeof(struct iphdr) - 4,
+					 MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD);
+		/* Set L4 offset */
+		mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L4,
+					  MVPP2_ETH_TYPE_LEN + (ihl * 4),
+					  MVPP2_PRS_SRAM_OP_SEL_UDF_ADD);
 
-	/* Update shadow table and hw entry */
-	mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_L2);
-	priv->prs_shadow[pe.index].udf = MVPP2_PRS_UDF_L2_DEF;
-	priv->prs_shadow[pe.index].finish = false;
-	mvpp2_prs_shadow_ri_set(priv, pe.index, MVPP2_PRS_RI_L3_IP4_OPT,
-				MVPP2_PRS_RI_L3_PROTO_MASK);
-	mvpp2_prs_hw_write(priv, &pe);
+		/* Update shadow table and hw entry */
+		mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_L2);
+		priv->prs_shadow[pe.index].udf = MVPP2_PRS_UDF_L2_DEF;
+		priv->prs_shadow[pe.index].finish = false;
+		mvpp2_prs_shadow_ri_set(priv, pe.index, MVPP2_PRS_RI_L3_IP4,
+					MVPP2_PRS_RI_L3_PROTO_MASK);
+		mvpp2_prs_hw_write(priv, &pe);
+	}
 
 	/* Ethertype: IPv6 without options */
 	tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID,
@@ -1674,7 +1649,8 @@ static int mvpp2_prs_pppoe_init(struct mvpp2 *priv)
 	pe.index = tid;
 
 	mvpp2_prs_tcam_data_byte_set(&pe, MVPP2_ETH_TYPE_LEN,
-				     MVPP2_PRS_IPV4_HEAD | MVPP2_PRS_IPV4_IHL,
+				     MVPP2_PRS_IPV4_HEAD |
+				     MVPP2_PRS_IPV4_IHL_MIN,
 				     MVPP2_PRS_IPV4_HEAD_MASK |
 				     MVPP2_PRS_IPV4_IHL_MASK);
 
@@ -1788,9 +1764,8 @@ static int mvpp2_prs_ip4_init(struct mvpp2 *priv)
 	mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_FLOWS);
 	mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_GEN_BIT, 1);
 
-	/* Set L4 offset */
-	mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L4,
-				  sizeof(struct iphdr) - 4,
+	/* Set L3 offset */
+	mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3, -4,
 				  MVPP2_PRS_SRAM_OP_SEL_UDF_ADD);
 	mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_IPV4_DIP_AI_BIT);
 	mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L4_OTHER,
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h
index c16e5b9947bd..5ce5907be591 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h
@@ -28,7 +28,8 @@
 #define MVPP2_PRS_IPV4_MC		0xe0
 #define MVPP2_PRS_IPV4_MC_MASK		0xf0
 #define MVPP2_PRS_IPV4_BC_MASK		0xff
-#define MVPP2_PRS_IPV4_IHL		0x5
+#define MVPP2_PRS_IPV4_IHL_MIN		0x5
+#define MVPP2_PRS_IPV4_IHL_MAX		0xf
 #define MVPP2_PRS_IPV4_IHL_MASK		0xf
 #define MVPP2_PRS_IPV6_MC		0xff
 #define MVPP2_PRS_IPV6_MC_MASK		0xff
-- 
cgit v1.2.3


From 9a44c1cc63887627284ae232a9626a9f1cd066fc Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@linaro.org>
Date: Fri, 16 Apr 2021 10:36:33 +0200
Subject: net: Add a WWAN subsystem

This change introduces initial support for a WWAN framework. Given the
complexity and heterogeneity of existing WWAN hardwares and interfaces,
there is no strict definition of what a WWAN device is and how it should
be represented. It's often a collection of multiple devices that perform
the global WWAN feature (netdev, tty, chardev, etc).

One usual way to expose modem controls and configuration is via high
level protocols such as the well known AT command protocol, MBIM or
QMI. The USB modems started to expose them as character devices, and
user daemons such as ModemManager learnt to use them.

This initial version adds the concept of WWAN port, which is a logical
pipe to a modem control protocol. The protocols are rawly exposed to
user via character device, allowing straigthforward support in existing
tools (ModemManager, ofono...). The WWAN core takes care of the generic
part, including character device management, and relies on port driver
operations to receive/submit protocol data.

Since the different devices exposing protocols for a same WWAN hardware
do not necessarily know about each others (e.g. two different USB
interfaces, PCI/MHI channel devices...) and can be created/removed in
different orders, the WWAN core ensures that all WAN ports contributing
to the 'whole' WWAN feature are grouped under the same virtual WWAN
device, relying on the provided parent device (e.g. mhi controller,
USB device). It's a 'trick' I copied from Johannes's earlier WWAN
subsystem proposal.

This initial version is purposely minimalist, it's essentially moving
the generic part of the previously proposed mhi_wwan_ctrl driver inside
a common WWAN framework, but the implementation is open and flexible
enough to allow extension for further drivers.

Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Kconfig          |   2 +
 drivers/net/Makefile         |   1 +
 drivers/net/wwan/Kconfig     |  23 ++
 drivers/net/wwan/Makefile    |   7 +
 drivers/net/wwan/wwan_core.c | 552 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/wwan.h         | 111 +++++++++
 6 files changed, 696 insertions(+)
 create mode 100644 drivers/net/wwan/Kconfig
 create mode 100644 drivers/net/wwan/Makefile
 create mode 100644 drivers/net/wwan/wwan_core.c
 create mode 100644 include/linux/wwan.h

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 5895905b6aa1..74dc8e249faa 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -502,6 +502,8 @@ source "drivers/net/wan/Kconfig"
 
 source "drivers/net/ieee802154/Kconfig"
 
+source "drivers/net/wwan/Kconfig"
+
 config XEN_NETDEV_FRONTEND
 	tristate "Xen network device frontend driver"
 	depends on XEN
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 040e20b81317..7ffd2d03efaf 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -68,6 +68,7 @@ obj-$(CONFIG_SUNGEM_PHY) += sungem_phy.o
 obj-$(CONFIG_WAN) += wan/
 obj-$(CONFIG_WLAN) += wireless/
 obj-$(CONFIG_IEEE802154) += ieee802154/
+obj-$(CONFIG_WWAN) += wwan/
 
 obj-$(CONFIG_VMXNET3) += vmxnet3/
 obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
diff --git a/drivers/net/wwan/Kconfig b/drivers/net/wwan/Kconfig
new file mode 100644
index 000000000000..fc3f3a1c80ee
--- /dev/null
+++ b/drivers/net/wwan/Kconfig
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Wireless WAN device configuration
+#
+
+menuconfig WWAN
+	bool "Wireless WAN"
+	help
+	  This section contains Wireless WAN configuration for WWAN framework
+	  and drivers.
+
+if WWAN
+
+config WWAN_CORE
+	tristate "WWAN Driver Core"
+	help
+	  Say Y here if you want to use the WWAN driver core. This driver
+	  provides a common framework for WWAN drivers.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called wwan.
+
+endif # WWAN
diff --git a/drivers/net/wwan/Makefile b/drivers/net/wwan/Makefile
new file mode 100644
index 000000000000..934590b9e47d
--- /dev/null
+++ b/drivers/net/wwan/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the Linux WWAN device drivers.
+#
+
+obj-$(CONFIG_WWAN_CORE) += wwan.o
+wwan-objs += wwan_core.o
diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
new file mode 100644
index 000000000000..b618b7937846
--- /dev/null
+++ b/drivers/net/wwan/wwan_core.c
@@ -0,0 +1,552 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2021, Linaro Ltd <loic.poulain@linaro.org> */
+
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/idr.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/wwan.h>
+
+#define WWAN_MAX_MINORS 256 /* 256 minors allowed with register_chrdev() */
+
+static DEFINE_MUTEX(wwan_register_lock); /* WWAN device create|remove lock */
+static DEFINE_IDA(minors); /* minors for WWAN port chardevs */
+static DEFINE_IDA(wwan_dev_ids); /* for unique WWAN device IDs */
+static struct class *wwan_class;
+static int wwan_major;
+
+#define to_wwan_dev(d) container_of(d, struct wwan_device, dev)
+#define to_wwan_port(d) container_of(d, struct wwan_port, dev)
+
+/* WWAN port flags */
+#define WWAN_PORT_TX_OFF	BIT(0)
+
+/**
+ * struct wwan_device - The structure that defines a WWAN device
+ *
+ * @id: WWAN device unique ID.
+ * @dev: Underlying device.
+ * @port_id: Current available port ID to pick.
+ */
+struct wwan_device {
+	unsigned int id;
+	struct device dev;
+	atomic_t port_id;
+};
+
+/**
+ * struct wwan_port - The structure that defines a WWAN port
+ * @type: Port type
+ * @start_count: Port start counter
+ * @flags: Store port state and capabilities
+ * @ops: Pointer to WWAN port operations
+ * @ops_lock: Protect port ops
+ * @dev: Underlying device
+ * @rxq: Buffer inbound queue
+ * @waitqueue: The waitqueue for port fops (read/write/poll)
+ */
+struct wwan_port {
+	enum wwan_port_type type;
+	unsigned int start_count;
+	unsigned long flags;
+	const struct wwan_port_ops *ops;
+	struct mutex ops_lock; /* Serialize ops + protect against removal */
+	struct device dev;
+	struct sk_buff_head rxq;
+	wait_queue_head_t waitqueue;
+};
+
+static void wwan_dev_destroy(struct device *dev)
+{
+	struct wwan_device *wwandev = to_wwan_dev(dev);
+
+	ida_free(&wwan_dev_ids, wwandev->id);
+	kfree(wwandev);
+}
+
+static const struct device_type wwan_dev_type = {
+	.name    = "wwan_dev",
+	.release = wwan_dev_destroy,
+};
+
+static int wwan_dev_parent_match(struct device *dev, const void *parent)
+{
+	return (dev->type == &wwan_dev_type && dev->parent == parent);
+}
+
+static struct wwan_device *wwan_dev_get_by_parent(struct device *parent)
+{
+	struct device *dev;
+
+	dev = class_find_device(wwan_class, NULL, parent, wwan_dev_parent_match);
+	if (!dev)
+		return ERR_PTR(-ENODEV);
+
+	return to_wwan_dev(dev);
+}
+
+/* This function allocates and registers a new WWAN device OR if a WWAN device
+ * already exist for the given parent, it gets a reference and return it.
+ * This function is not exported (for now), it is called indirectly via
+ * wwan_create_port().
+ */
+static struct wwan_device *wwan_create_dev(struct device *parent)
+{
+	struct wwan_device *wwandev;
+	int err, id;
+
+	/* The 'find-alloc-register' operation must be protected against
+	 * concurrent execution, a WWAN device is possibly shared between
+	 * multiple callers or concurrently unregistered from wwan_remove_dev().
+	 */
+	mutex_lock(&wwan_register_lock);
+
+	/* If wwandev already exists, return it */
+	wwandev = wwan_dev_get_by_parent(parent);
+	if (!IS_ERR(wwandev))
+		goto done_unlock;
+
+	id = ida_alloc(&wwan_dev_ids, GFP_KERNEL);
+	if (id < 0)
+		goto done_unlock;
+
+	wwandev = kzalloc(sizeof(*wwandev), GFP_KERNEL);
+	if (!wwandev) {
+		ida_free(&wwan_dev_ids, id);
+		goto done_unlock;
+	}
+
+	wwandev->dev.parent = parent;
+	wwandev->dev.class = wwan_class;
+	wwandev->dev.type = &wwan_dev_type;
+	wwandev->id = id;
+	dev_set_name(&wwandev->dev, "wwan%d", wwandev->id);
+
+	err = device_register(&wwandev->dev);
+	if (err) {
+		put_device(&wwandev->dev);
+		wwandev = NULL;
+	}
+
+done_unlock:
+	mutex_unlock(&wwan_register_lock);
+
+	return wwandev;
+}
+
+static int is_wwan_child(struct device *dev, void *data)
+{
+	return dev->class == wwan_class;
+}
+
+static void wwan_remove_dev(struct wwan_device *wwandev)
+{
+	int ret;
+
+	/* Prevent concurrent picking from wwan_create_dev */
+	mutex_lock(&wwan_register_lock);
+
+	/* WWAN device is created and registered (get+add) along with its first
+	 * child port, and subsequent port registrations only grab a reference
+	 * (get). The WWAN device must then be unregistered (del+put) along with
+	 * its latest port, and reference simply dropped (put) otherwise.
+	 */
+	ret = device_for_each_child(&wwandev->dev, NULL, is_wwan_child);
+	if (!ret)
+		device_unregister(&wwandev->dev);
+	else
+		put_device(&wwandev->dev);
+
+	mutex_unlock(&wwan_register_lock);
+}
+
+/* ------- WWAN port management ------- */
+
+static void wwan_port_destroy(struct device *dev)
+{
+	struct wwan_port *port = to_wwan_port(dev);
+
+	ida_free(&minors, MINOR(port->dev.devt));
+	skb_queue_purge(&port->rxq);
+	mutex_destroy(&port->ops_lock);
+	kfree(port);
+}
+
+static const struct device_type wwan_port_dev_type = {
+	.name = "wwan_port",
+	.release = wwan_port_destroy,
+};
+
+static int wwan_port_minor_match(struct device *dev, const void *minor)
+{
+	return (dev->type == &wwan_port_dev_type &&
+		MINOR(dev->devt) == *(unsigned int *)minor);
+}
+
+static struct wwan_port *wwan_port_get_by_minor(unsigned int minor)
+{
+	struct device *dev;
+
+	dev = class_find_device(wwan_class, NULL, &minor, wwan_port_minor_match);
+	if (!dev)
+		return ERR_PTR(-ENODEV);
+
+	return to_wwan_port(dev);
+}
+
+/* Keep aligned with wwan_port_type enum */
+static const char * const wwan_port_type_str[] = {
+	"AT",
+	"MBIM",
+	"QMI",
+	"QCDM",
+	"FIREHOSE"
+};
+
+struct wwan_port *wwan_create_port(struct device *parent,
+				   enum wwan_port_type type,
+				   const struct wwan_port_ops *ops,
+				   void *drvdata)
+{
+	struct wwan_device *wwandev;
+	struct wwan_port *port;
+	int minor, err = -ENOMEM;
+
+	if (type >= WWAN_PORT_MAX || !ops)
+		return ERR_PTR(-EINVAL);
+
+	/* A port is always a child of a WWAN device, retrieve (allocate or
+	 * pick) the WWAN device based on the provided parent device.
+	 */
+	wwandev = wwan_create_dev(parent);
+	if (IS_ERR(wwandev))
+		return ERR_CAST(wwandev);
+
+	/* A port is exposed as character device, get a minor */
+	minor = ida_alloc_range(&minors, 0, WWAN_MAX_MINORS - 1, GFP_KERNEL);
+	if (minor < 0)
+		goto error_wwandev_remove;
+
+	port = kzalloc(sizeof(*port), GFP_KERNEL);
+	if (!port) {
+		ida_free(&minors, minor);
+		goto error_wwandev_remove;
+	}
+
+	port->type = type;
+	port->ops = ops;
+	mutex_init(&port->ops_lock);
+	skb_queue_head_init(&port->rxq);
+	init_waitqueue_head(&port->waitqueue);
+
+	port->dev.parent = &wwandev->dev;
+	port->dev.class = wwan_class;
+	port->dev.type = &wwan_port_dev_type;
+	port->dev.devt = MKDEV(wwan_major, minor);
+	dev_set_drvdata(&port->dev, drvdata);
+
+	/* create unique name based on wwan device id, port index and type */
+	dev_set_name(&port->dev, "wwan%up%u%s", wwandev->id,
+		     atomic_inc_return(&wwandev->port_id),
+		     wwan_port_type_str[port->type]);
+
+	err = device_register(&port->dev);
+	if (err)
+		goto error_put_device;
+
+	return port;
+
+error_put_device:
+	put_device(&port->dev);
+error_wwandev_remove:
+	wwan_remove_dev(wwandev);
+
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(wwan_create_port);
+
+void wwan_remove_port(struct wwan_port *port)
+{
+	struct wwan_device *wwandev = to_wwan_dev(port->dev.parent);
+
+	mutex_lock(&port->ops_lock);
+	if (port->start_count)
+		port->ops->stop(port);
+	port->ops = NULL; /* Prevent any new port operations (e.g. from fops) */
+	mutex_unlock(&port->ops_lock);
+
+	wake_up_interruptible(&port->waitqueue);
+
+	skb_queue_purge(&port->rxq);
+	dev_set_drvdata(&port->dev, NULL);
+	device_unregister(&port->dev);
+
+	/* Release related wwan device */
+	wwan_remove_dev(wwandev);
+}
+EXPORT_SYMBOL_GPL(wwan_remove_port);
+
+void wwan_port_rx(struct wwan_port *port, struct sk_buff *skb)
+{
+	skb_queue_tail(&port->rxq, skb);
+	wake_up_interruptible(&port->waitqueue);
+}
+EXPORT_SYMBOL_GPL(wwan_port_rx);
+
+void wwan_port_txon(struct wwan_port *port)
+{
+	clear_bit(WWAN_PORT_TX_OFF, &port->flags);
+	wake_up_interruptible(&port->waitqueue);
+}
+EXPORT_SYMBOL_GPL(wwan_port_txon);
+
+void wwan_port_txoff(struct wwan_port *port)
+{
+	set_bit(WWAN_PORT_TX_OFF, &port->flags);
+}
+EXPORT_SYMBOL_GPL(wwan_port_txoff);
+
+void *wwan_port_get_drvdata(struct wwan_port *port)
+{
+	return dev_get_drvdata(&port->dev);
+}
+EXPORT_SYMBOL_GPL(wwan_port_get_drvdata);
+
+static int wwan_port_op_start(struct wwan_port *port)
+{
+	int ret = 0;
+
+	mutex_lock(&port->ops_lock);
+	if (!port->ops) { /* Port got unplugged */
+		ret = -ENODEV;
+		goto out_unlock;
+	}
+
+	/* If port is already started, don't start again */
+	if (!port->start_count)
+		ret = port->ops->start(port);
+
+	if (!ret)
+		port->start_count++;
+
+out_unlock:
+	mutex_unlock(&port->ops_lock);
+
+	return ret;
+}
+
+static void wwan_port_op_stop(struct wwan_port *port)
+{
+	mutex_lock(&port->ops_lock);
+	port->start_count--;
+	if (port->ops && !port->start_count)
+		port->ops->stop(port);
+	mutex_unlock(&port->ops_lock);
+}
+
+static int wwan_port_op_tx(struct wwan_port *port, struct sk_buff *skb)
+{
+	int ret;
+
+	mutex_lock(&port->ops_lock);
+	if (!port->ops) { /* Port got unplugged */
+		ret = -ENODEV;
+		goto out_unlock;
+	}
+
+	ret = port->ops->tx(port, skb);
+
+out_unlock:
+	mutex_unlock(&port->ops_lock);
+
+	return ret;
+}
+
+static bool is_read_blocked(struct wwan_port *port)
+{
+	return skb_queue_empty(&port->rxq) && port->ops;
+}
+
+static bool is_write_blocked(struct wwan_port *port)
+{
+	return test_bit(WWAN_PORT_TX_OFF, &port->flags) && port->ops;
+}
+
+static int wwan_wait_rx(struct wwan_port *port, bool nonblock)
+{
+	if (!is_read_blocked(port))
+		return 0;
+
+	if (nonblock)
+		return -EAGAIN;
+
+	if (wait_event_interruptible(port->waitqueue, !is_read_blocked(port)))
+		return -ERESTARTSYS;
+
+	return 0;
+}
+
+static int wwan_wait_tx(struct wwan_port *port, bool nonblock)
+{
+	if (!is_write_blocked(port))
+		return 0;
+
+	if (nonblock)
+		return -EAGAIN;
+
+	if (wait_event_interruptible(port->waitqueue, !is_write_blocked(port)))
+		return -ERESTARTSYS;
+
+	return 0;
+}
+
+static int wwan_port_fops_open(struct inode *inode, struct file *file)
+{
+	struct wwan_port *port;
+	int err = 0;
+
+	port = wwan_port_get_by_minor(iminor(inode));
+	if (IS_ERR(port))
+		return PTR_ERR(port);
+
+	file->private_data = port;
+	stream_open(inode, file);
+
+	err = wwan_port_op_start(port);
+	if (err)
+		put_device(&port->dev);
+
+	return err;
+}
+
+static int wwan_port_fops_release(struct inode *inode, struct file *filp)
+{
+	struct wwan_port *port = filp->private_data;
+
+	wwan_port_op_stop(port);
+	put_device(&port->dev);
+
+	return 0;
+}
+
+static ssize_t wwan_port_fops_read(struct file *filp, char __user *buf,
+				   size_t count, loff_t *ppos)
+{
+	struct wwan_port *port = filp->private_data;
+	struct sk_buff *skb;
+	size_t copied;
+	int ret;
+
+	ret = wwan_wait_rx(port, !!(filp->f_flags & O_NONBLOCK));
+	if (ret)
+		return ret;
+
+	skb = skb_dequeue(&port->rxq);
+	if (!skb)
+		return -EIO;
+
+	copied = min_t(size_t, count, skb->len);
+	if (copy_to_user(buf, skb->data, copied)) {
+		kfree_skb(skb);
+		return -EFAULT;
+	}
+	skb_pull(skb, copied);
+
+	/* skb is not fully consumed, keep it in the queue */
+	if (skb->len)
+		skb_queue_head(&port->rxq, skb);
+	else
+		consume_skb(skb);
+
+	return copied;
+}
+
+static ssize_t wwan_port_fops_write(struct file *filp, const char __user *buf,
+				    size_t count, loff_t *offp)
+{
+	struct wwan_port *port = filp->private_data;
+	struct sk_buff *skb;
+	int ret;
+
+	ret = wwan_wait_tx(port, !!(filp->f_flags & O_NONBLOCK));
+	if (ret)
+		return ret;
+
+	skb = alloc_skb(count, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	if (copy_from_user(skb_put(skb, count), buf, count)) {
+		kfree_skb(skb);
+		return -EFAULT;
+	}
+
+	ret = wwan_port_op_tx(port, skb);
+	if (ret) {
+		kfree_skb(skb);
+		return ret;
+	}
+
+	return count;
+}
+
+static __poll_t wwan_port_fops_poll(struct file *filp, poll_table *wait)
+{
+	struct wwan_port *port = filp->private_data;
+	__poll_t mask = 0;
+
+	poll_wait(filp, &port->waitqueue, wait);
+
+	if (!is_write_blocked(port))
+		mask |= EPOLLOUT | EPOLLWRNORM;
+	if (!is_read_blocked(port))
+		mask |= EPOLLIN | EPOLLRDNORM;
+
+	return mask;
+}
+
+static const struct file_operations wwan_port_fops = {
+	.owner = THIS_MODULE,
+	.open = wwan_port_fops_open,
+	.release = wwan_port_fops_release,
+	.read = wwan_port_fops_read,
+	.write = wwan_port_fops_write,
+	.poll = wwan_port_fops_poll,
+	.llseek = noop_llseek,
+};
+
+static int __init wwan_init(void)
+{
+	wwan_class = class_create(THIS_MODULE, "wwan");
+	if (IS_ERR(wwan_class))
+		return PTR_ERR(wwan_class);
+
+	/* chrdev used for wwan ports */
+	wwan_major = register_chrdev(0, "wwan_port", &wwan_port_fops);
+	if (wwan_major < 0) {
+		class_destroy(wwan_class);
+		return wwan_major;
+	}
+
+	return 0;
+}
+
+static void __exit wwan_exit(void)
+{
+	unregister_chrdev(wwan_major, "wwan_port");
+	class_destroy(wwan_class);
+}
+
+module_init(wwan_init);
+module_exit(wwan_exit);
+
+MODULE_AUTHOR("Loic Poulain <loic.poulain@linaro.org>");
+MODULE_DESCRIPTION("WWAN core");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/wwan.h b/include/linux/wwan.h
new file mode 100644
index 000000000000..aa05a253dcf9
--- /dev/null
+++ b/include/linux/wwan.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (c) 2021, Linaro Ltd <loic.poulain@linaro.org> */
+
+#ifndef __WWAN_H
+#define __WWAN_H
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+
+/**
+ * enum wwan_port_type - WWAN port types
+ * @WWAN_PORT_AT: AT commands
+ * @WWAN_PORT_MBIM: Mobile Broadband Interface Model control
+ * @WWAN_PORT_QMI: Qcom modem/MSM interface for modem control
+ * @WWAN_PORT_QCDM: Qcom Modem diagnostic interface
+ * @WWAN_PORT_FIREHOSE: XML based command protocol
+ * @WWAN_PORT_MAX: Number of supported port types
+ */
+enum wwan_port_type {
+	WWAN_PORT_AT,
+	WWAN_PORT_MBIM,
+	WWAN_PORT_QMI,
+	WWAN_PORT_QCDM,
+	WWAN_PORT_FIREHOSE,
+	WWAN_PORT_MAX,
+};
+
+struct wwan_port;
+
+/** struct wwan_port_ops - The WWAN port operations
+ * @start: The routine for starting the WWAN port device.
+ * @stop: The routine for stopping the WWAN port device.
+ * @tx: The routine that sends WWAN port protocol data to the device.
+ *
+ * The wwan_port_ops structure contains a list of low-level operations
+ * that control a WWAN port device. All functions are mandatory.
+ */
+struct wwan_port_ops {
+	int (*start)(struct wwan_port *port);
+	void (*stop)(struct wwan_port *port);
+	int (*tx)(struct wwan_port *port, struct sk_buff *skb);
+};
+
+/**
+ * wwan_create_port - Add a new WWAN port
+ * @parent: Device to use as parent and shared by all WWAN ports
+ * @type: WWAN port type
+ * @ops: WWAN port operations
+ * @drvdata: Pointer to caller driver data
+ *
+ * Allocate and register a new WWAN port. The port will be automatically exposed
+ * to user as a character device and attached to the right virtual WWAN device,
+ * based on the parent pointer. The parent pointer is the device shared by all
+ * components of a same WWAN modem (e.g. USB dev, PCI dev, MHI controller...).
+ *
+ * drvdata will be placed in the WWAN port device driver data and can be
+ * retrieved with wwan_port_get_drvdata().
+ *
+ * This function must be balanced with a call to wwan_remove_port().
+ *
+ * Returns a valid pointer to wwan_port on success or PTR_ERR on failure
+ */
+struct wwan_port *wwan_create_port(struct device *parent,
+				   enum wwan_port_type type,
+				   const struct wwan_port_ops *ops,
+				   void *drvdata);
+
+/**
+ * wwan_remove_port - Remove a WWAN port
+ * @port: WWAN port to remove
+ *
+ * Remove a previously created port.
+ */
+void wwan_remove_port(struct wwan_port *port);
+
+/**
+ * wwan_port_rx - Receive data from the WWAN port
+ * @port: WWAN port for which data is received
+ * @skb: Pointer to the rx buffer
+ *
+ * A port driver calls this function upon data reception (MBIM, AT...).
+ */
+void wwan_port_rx(struct wwan_port *port, struct sk_buff *skb);
+
+/**
+ * wwan_port_txoff - Stop TX on WWAN port
+ * @port: WWAN port for which TX must be stopped
+ *
+ * Used for TX flow control, a port driver calls this function to indicate TX
+ * is temporary unavailable (e.g. due to ring buffer fullness).
+ */
+void wwan_port_txoff(struct wwan_port *port);
+
+
+/**
+ * wwan_port_txon - Restart TX on WWAN port
+ * @port: WWAN port for which TX must be restarted
+ *
+ * Used for TX flow control, a port driver calls this function to indicate TX
+ * is available again.
+ */
+void wwan_port_txon(struct wwan_port *port);
+
+/**
+ * wwan_port_get_drvdata - Retrieve driver data from a WWAN port
+ * @port: Related WWAN port
+ */
+void *wwan_port_get_drvdata(struct wwan_port *port);
+
+#endif /* __WWAN_H */
-- 
cgit v1.2.3


From fa588eba632df14d296436995e6bbea0c146ae77 Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@linaro.org>
Date: Fri, 16 Apr 2021 10:36:34 +0200
Subject: net: Add Qcom WWAN control driver

The MHI WWWAN control driver allows MHI QCOM-based modems to expose
different modem control protocols/ports via the WWAN framework, so that
userspace modem tools or daemon (e.g. ModemManager) can control WWAN
config and state (APN config, SMS, provider selection...). A QCOM-based
modem can expose one or several of the following protocols:
- AT: Well known AT commands interactive protocol (microcom, minicom...)
- MBIM: Mobile Broadband Interface Model (libmbim, mbimcli)
- QMI: QCOM MSM/Modem Interface (libqmi, qmicli)
- QCDM: QCOM Modem diagnostic interface (libqcdm)
- FIREHOSE: XML-based protocol for Modem firmware management
        (qmi-firmware-update)

Note that this patch is mostly a rework of the earlier MHI UCI
tentative that was a generic interface for accessing MHI bus from
userspace. As suggested, this new version is WWAN specific and is
dedicated to only expose channels used for controlling a modem, and
for which related opensource userpace support exist.

Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/Kconfig         |  14 ++
 drivers/net/wwan/Makefile        |   2 +
 drivers/net/wwan/mhi_wwan_ctrl.c | 282 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 298 insertions(+)
 create mode 100644 drivers/net/wwan/mhi_wwan_ctrl.c

diff --git a/drivers/net/wwan/Kconfig b/drivers/net/wwan/Kconfig
index fc3f3a1c80ee..7ad1920120bc 100644
--- a/drivers/net/wwan/Kconfig
+++ b/drivers/net/wwan/Kconfig
@@ -20,4 +20,18 @@ config WWAN_CORE
 	  To compile this driver as a module, choose M here: the module will be
 	  called wwan.
 
+config MHI_WWAN_CTRL
+	tristate "MHI WWAN control driver for QCOM-based PCIe modems"
+	select WWAN_CORE
+	depends on MHI_BUS
+	help
+	  MHI WWAN CTRL allows QCOM-based PCIe modems to expose different modem
+	  control protocols/ports to userspace, including AT, MBIM, QMI, DIAG
+	  and FIREHOSE. These protocols can be accessed directly from userspace
+	  (e.g. AT commands) or via libraries/tools (e.g. libmbim, libqmi,
+	  libqcdm...).
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called mhi_wwan_ctrl.
+
 endif # WWAN
diff --git a/drivers/net/wwan/Makefile b/drivers/net/wwan/Makefile
index 934590b9e47d..556cd90958ca 100644
--- a/drivers/net/wwan/Makefile
+++ b/drivers/net/wwan/Makefile
@@ -5,3 +5,5 @@
 
 obj-$(CONFIG_WWAN_CORE) += wwan.o
 wwan-objs += wwan_core.o
+
+obj-$(CONFIG_MHI_WWAN_CTRL) += mhi_wwan_ctrl.o
diff --git a/drivers/net/wwan/mhi_wwan_ctrl.c b/drivers/net/wwan/mhi_wwan_ctrl.c
new file mode 100644
index 000000000000..11475ade4be5
--- /dev/null
+++ b/drivers/net/wwan/mhi_wwan_ctrl.c
@@ -0,0 +1,282 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2021, Linaro Ltd <loic.poulain@linaro.org> */
+#include <linux/kernel.h>
+#include <linux/mhi.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/wwan.h>
+
+/* MHI wwan flags */
+#define MHI_WWAN_DL_CAP		BIT(0)
+#define MHI_WWAN_UL_CAP		BIT(1)
+#define MHI_WWAN_RX_REFILL	BIT(2)
+
+#define MHI_WWAN_MAX_MTU	0x8000
+
+struct mhi_wwan_dev {
+	/* Lower level is a mhi dev, upper level is a wwan port */
+	struct mhi_device *mhi_dev;
+	struct wwan_port *wwan_port;
+
+	/* State and capabilities */
+	unsigned long flags;
+	size_t mtu;
+
+	/* Protect against concurrent TX and TX-completion (bh) */
+	spinlock_t tx_lock;
+
+	/* Protect RX budget and rx_refill scheduling */
+	spinlock_t rx_lock;
+	struct work_struct rx_refill;
+
+	/* RX budget is initially set to the size of the MHI RX queue and is
+	 * used to limit the number of allocated and queued packets. It is
+	 * decremented on data queueing and incremented on data release.
+	 */
+	unsigned int rx_budget;
+};
+
+/* Increment RX budget and schedule RX refill if necessary */
+static void mhi_wwan_rx_budget_inc(struct mhi_wwan_dev *mhiwwan)
+{
+	spin_lock(&mhiwwan->rx_lock);
+
+	mhiwwan->rx_budget++;
+
+	if (test_bit(MHI_WWAN_RX_REFILL, &mhiwwan->flags))
+		schedule_work(&mhiwwan->rx_refill);
+
+	spin_unlock(&mhiwwan->rx_lock);
+}
+
+/* Decrement RX budget if non-zero and return true on success */
+static bool mhi_wwan_rx_budget_dec(struct mhi_wwan_dev *mhiwwan)
+{
+	bool ret = false;
+
+	spin_lock(&mhiwwan->rx_lock);
+
+	if (mhiwwan->rx_budget)
+		mhiwwan->rx_budget--;
+
+	if (mhiwwan->rx_budget && test_bit(MHI_WWAN_RX_REFILL, &mhiwwan->flags))
+		ret = true;
+
+	spin_unlock(&mhiwwan->rx_lock);
+
+	return ret;
+}
+
+static void __mhi_skb_destructor(struct sk_buff *skb)
+{
+	/* RX buffer has been consumed, increase the allowed budget */
+	mhi_wwan_rx_budget_inc(skb_shinfo(skb)->destructor_arg);
+}
+
+static void mhi_wwan_ctrl_refill_work(struct work_struct *work)
+{
+	struct mhi_wwan_dev *mhiwwan = container_of(work, struct mhi_wwan_dev, rx_refill);
+	struct mhi_device *mhi_dev = mhiwwan->mhi_dev;
+
+	while (mhi_wwan_rx_budget_dec(mhiwwan)) {
+		struct sk_buff *skb;
+
+		skb = alloc_skb(mhiwwan->mtu, GFP_KERNEL);
+		if (!skb) {
+			mhi_wwan_rx_budget_inc(mhiwwan);
+			break;
+		}
+
+		/* To prevent unlimited buffer allocation if nothing consumes
+		 * the RX buffers (passed to WWAN core), track their lifespan
+		 * to not allocate more than allowed budget.
+		 */
+		skb->destructor = __mhi_skb_destructor;
+		skb_shinfo(skb)->destructor_arg = mhiwwan;
+
+		if (mhi_queue_skb(mhi_dev, DMA_FROM_DEVICE, skb, mhiwwan->mtu, MHI_EOT)) {
+			dev_err(&mhi_dev->dev, "Failed to queue buffer\n");
+			kfree_skb(skb);
+			break;
+		}
+	}
+}
+
+static int mhi_wwan_ctrl_start(struct wwan_port *port)
+{
+	struct mhi_wwan_dev *mhiwwan = wwan_port_get_drvdata(port);
+	int ret;
+
+	/* Start mhi device's channel(s) */
+	ret = mhi_prepare_for_transfer(mhiwwan->mhi_dev);
+	if (ret)
+		return ret;
+
+	/* Don't allocate more buffers than MHI channel queue size */
+	mhiwwan->rx_budget = mhi_get_free_desc_count(mhiwwan->mhi_dev, DMA_FROM_DEVICE);
+
+	/* Add buffers to the MHI inbound queue */
+	if (test_bit(MHI_WWAN_DL_CAP, &mhiwwan->flags)) {
+		set_bit(MHI_WWAN_RX_REFILL, &mhiwwan->flags);
+		mhi_wwan_ctrl_refill_work(&mhiwwan->rx_refill);
+	}
+
+	return 0;
+}
+
+static void mhi_wwan_ctrl_stop(struct wwan_port *port)
+{
+	struct mhi_wwan_dev *mhiwwan = wwan_port_get_drvdata(port);
+
+	spin_lock(&mhiwwan->rx_lock);
+	clear_bit(MHI_WWAN_RX_REFILL, &mhiwwan->flags);
+	spin_unlock(&mhiwwan->rx_lock);
+
+	cancel_work_sync(&mhiwwan->rx_refill);
+
+	mhi_unprepare_from_transfer(mhiwwan->mhi_dev);
+}
+
+static int mhi_wwan_ctrl_tx(struct wwan_port *port, struct sk_buff *skb)
+{
+	struct mhi_wwan_dev *mhiwwan = wwan_port_get_drvdata(port);
+	int ret;
+
+	if (skb->len > mhiwwan->mtu)
+		return -EMSGSIZE;
+
+	if (!test_bit(MHI_WWAN_UL_CAP, &mhiwwan->flags))
+		return -EOPNOTSUPP;
+
+	/* Queue the packet for MHI transfer and check fullness of the queue */
+	spin_lock_bh(&mhiwwan->tx_lock);
+	ret = mhi_queue_skb(mhiwwan->mhi_dev, DMA_TO_DEVICE, skb, skb->len, MHI_EOT);
+	if (mhi_queue_is_full(mhiwwan->mhi_dev, DMA_TO_DEVICE))
+		wwan_port_txoff(port);
+	spin_unlock_bh(&mhiwwan->tx_lock);
+
+	return ret;
+}
+
+static const struct wwan_port_ops wwan_pops = {
+	.start = mhi_wwan_ctrl_start,
+	.stop = mhi_wwan_ctrl_stop,
+	.tx = mhi_wwan_ctrl_tx,
+};
+
+static void mhi_ul_xfer_cb(struct mhi_device *mhi_dev,
+			   struct mhi_result *mhi_result)
+{
+	struct mhi_wwan_dev *mhiwwan = dev_get_drvdata(&mhi_dev->dev);
+	struct wwan_port *port = mhiwwan->wwan_port;
+	struct sk_buff *skb = mhi_result->buf_addr;
+
+	dev_dbg(&mhi_dev->dev, "%s: status: %d xfer_len: %zu\n", __func__,
+		mhi_result->transaction_status, mhi_result->bytes_xferd);
+
+	/* MHI core has done with the buffer, release it */
+	consume_skb(skb);
+
+	/* There is likely new slot available in the MHI queue, re-allow TX */
+	spin_lock_bh(&mhiwwan->tx_lock);
+	if (!mhi_queue_is_full(mhiwwan->mhi_dev, DMA_TO_DEVICE))
+		wwan_port_txon(port);
+	spin_unlock_bh(&mhiwwan->tx_lock);
+}
+
+static void mhi_dl_xfer_cb(struct mhi_device *mhi_dev,
+			   struct mhi_result *mhi_result)
+{
+	struct mhi_wwan_dev *mhiwwan = dev_get_drvdata(&mhi_dev->dev);
+	struct wwan_port *port = mhiwwan->wwan_port;
+	struct sk_buff *skb = mhi_result->buf_addr;
+
+	dev_dbg(&mhi_dev->dev, "%s: status: %d receive_len: %zu\n", __func__,
+		mhi_result->transaction_status, mhi_result->bytes_xferd);
+
+	if (mhi_result->transaction_status &&
+	    mhi_result->transaction_status != -EOVERFLOW) {
+		kfree_skb(skb);
+		return;
+	}
+
+	/* MHI core does not update skb->len, do it before forward */
+	skb_put(skb, mhi_result->bytes_xferd);
+	wwan_port_rx(port, skb);
+
+	/* Do not increment rx budget nor refill RX buffers now, wait for the
+	 * buffer to be consumed. Done from __mhi_skb_destructor().
+	 */
+}
+
+static int mhi_wwan_ctrl_probe(struct mhi_device *mhi_dev,
+			       const struct mhi_device_id *id)
+{
+	struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
+	struct mhi_wwan_dev *mhiwwan;
+	struct wwan_port *port;
+
+	mhiwwan = kzalloc(sizeof(*mhiwwan), GFP_KERNEL);
+	if (!mhiwwan)
+		return -ENOMEM;
+
+	mhiwwan->mhi_dev = mhi_dev;
+	mhiwwan->mtu = MHI_WWAN_MAX_MTU;
+	INIT_WORK(&mhiwwan->rx_refill, mhi_wwan_ctrl_refill_work);
+	spin_lock_init(&mhiwwan->tx_lock);
+	spin_lock_init(&mhiwwan->rx_lock);
+
+	if (mhi_dev->dl_chan)
+		set_bit(MHI_WWAN_DL_CAP, &mhiwwan->flags);
+	if (mhi_dev->ul_chan)
+		set_bit(MHI_WWAN_UL_CAP, &mhiwwan->flags);
+
+	dev_set_drvdata(&mhi_dev->dev, mhiwwan);
+
+	/* Register as a wwan port, id->driver_data contains wwan port type */
+	port = wwan_create_port(&cntrl->mhi_dev->dev, id->driver_data,
+				&wwan_pops, mhiwwan);
+	if (IS_ERR(port)) {
+		kfree(mhiwwan);
+		return PTR_ERR(port);
+	}
+
+	mhiwwan->wwan_port = port;
+
+	return 0;
+};
+
+static void mhi_wwan_ctrl_remove(struct mhi_device *mhi_dev)
+{
+	struct mhi_wwan_dev *mhiwwan = dev_get_drvdata(&mhi_dev->dev);
+
+	wwan_remove_port(mhiwwan->wwan_port);
+	kfree(mhiwwan);
+}
+
+static const struct mhi_device_id mhi_wwan_ctrl_match_table[] = {
+	{ .chan = "DUN", .driver_data = WWAN_PORT_AT },
+	{ .chan = "MBIM", .driver_data = WWAN_PORT_MBIM },
+	{ .chan = "QMI", .driver_data = WWAN_PORT_QMI },
+	{ .chan = "DIAG", .driver_data = WWAN_PORT_QCDM },
+	{ .chan = "FIREHOSE", .driver_data = WWAN_PORT_FIREHOSE },
+	{},
+};
+MODULE_DEVICE_TABLE(mhi, mhi_wwan_ctrl_match_table);
+
+static struct mhi_driver mhi_wwan_ctrl_driver = {
+	.id_table = mhi_wwan_ctrl_match_table,
+	.remove = mhi_wwan_ctrl_remove,
+	.probe = mhi_wwan_ctrl_probe,
+	.ul_xfer_cb = mhi_ul_xfer_cb,
+	.dl_xfer_cb = mhi_dl_xfer_cb,
+	.driver = {
+		.name = "mhi_wwan_ctrl",
+	},
+};
+
+module_mhi_driver(mhi_wwan_ctrl_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("MHI WWAN CTRL Driver");
+MODULE_AUTHOR("Loic Poulain <loic.poulain@linaro.org>");
-- 
cgit v1.2.3


From fb32856b16ad9d5bcd75b76a274e2c515ac7b9d7 Mon Sep 17 00:00:00 2001
From: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Date: Fri, 16 Apr 2021 17:16:15 +0800
Subject: virtio-net: page_to_skb() use build_skb when there's sufficient
 tailroom

In page_to_skb(), if we have enough tailroom to save skb_shared_info, we
can use build_skb to create skb directly. No need to alloc for
additional space. And it can save a 'frags slot', which is very friendly
to GRO.

Here, if the payload of the received package is too small (less than
GOOD_COPY_LEN), we still choose to copy it directly to the space got by
napi_alloc_skb. So we can reuse these pages.

Testing Machine:
    The four queues of the network card are bound to the cpu1.

Test command:
    for ((i=0;i<5;++i)); do sockperf tp --ip 192.168.122.64 -m 1000 -t 150& done

The size of the udp package is 1000, so in the case of this patch, there
will always be enough tailroom to use build_skb. The sent udp packet
will be discarded because there is no port to receive it. The irqsoftd
of the machine is 100%, we observe the received quantity displayed by
sar -n DEV 1:

no build_skb:  956864.00 rxpck/s
build_skb:    1158465.00 rxpck/s

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Suggested-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 69 +++++++++++++++++++++++++++++++++---------------
 1 file changed, 48 insertions(+), 21 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 101659cd4b87..8cd76037c724 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -379,21 +379,17 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 				   struct receive_queue *rq,
 				   struct page *page, unsigned int offset,
 				   unsigned int len, unsigned int truesize,
-				   bool hdr_valid, unsigned int metasize)
+				   bool hdr_valid, unsigned int metasize,
+				   unsigned int headroom)
 {
 	struct sk_buff *skb;
 	struct virtio_net_hdr_mrg_rxbuf *hdr;
 	unsigned int copy, hdr_len, hdr_padded_len;
-	char *p;
+	int tailroom, shinfo_size;
+	char *p, *hdr_p;
 
 	p = page_address(page) + offset;
-
-	/* copy small packet so we can reuse these pages for small data */
-	skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN);
-	if (unlikely(!skb))
-		return NULL;
-
-	hdr = skb_vnet_hdr(skb);
+	hdr_p = p;
 
 	hdr_len = vi->hdr_len;
 	if (vi->mergeable_rx_bufs)
@@ -401,14 +397,38 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 	else
 		hdr_padded_len = sizeof(struct padded_vnet_hdr);
 
-	/* hdr_valid means no XDP, so we can copy the vnet header */
-	if (hdr_valid)
-		memcpy(hdr, p, hdr_len);
+	/* If headroom is not 0, there is an offset between the beginning of the
+	 * data and the allocated space, otherwise the data and the allocated
+	 * space are aligned.
+	 */
+	if (headroom) {
+		/* The actual allocated space size is PAGE_SIZE. */
+		truesize = PAGE_SIZE;
+		tailroom = truesize - len - offset;
+	} else {
+		tailroom = truesize - len;
+	}
 
 	len -= hdr_len;
 	offset += hdr_padded_len;
 	p += hdr_padded_len;
 
+	shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+	if (len > GOOD_COPY_LEN && tailroom >= shinfo_size) {
+		skb = build_skb(p, truesize);
+		if (unlikely(!skb))
+			return NULL;
+
+		skb_put(skb, len);
+		goto ok;
+	}
+
+	/* copy small packet so we can reuse these pages for small data */
+	skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN);
+	if (unlikely(!skb))
+		return NULL;
+
 	/* Copy all frame if it fits skb->head, otherwise
 	 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed.
 	 */
@@ -418,11 +438,6 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 		copy = ETH_HLEN + metasize;
 	skb_put_data(skb, p, copy);
 
-	if (metasize) {
-		__skb_pull(skb, metasize);
-		skb_metadata_set(skb, metasize);
-	}
-
 	len -= copy;
 	offset += copy;
 
@@ -431,7 +446,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 			skb_add_rx_frag(skb, 0, page, offset, len, truesize);
 		else
 			put_page(page);
-		return skb;
+		goto ok;
 	}
 
 	/*
@@ -458,6 +473,18 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 	if (page)
 		give_pages(rq, page);
 
+ok:
+	/* hdr_valid means no XDP, so we can copy the vnet header */
+	if (hdr_valid) {
+		hdr = skb_vnet_hdr(skb);
+		memcpy(hdr, hdr_p, hdr_len);
+	}
+
+	if (metasize) {
+		__skb_pull(skb, metasize);
+		skb_metadata_set(skb, metasize);
+	}
+
 	return skb;
 }
 
@@ -808,7 +835,7 @@ static struct sk_buff *receive_big(struct net_device *dev,
 {
 	struct page *page = buf;
 	struct sk_buff *skb =
-		page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0);
+		page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0, 0);
 
 	stats->bytes += len - vi->hdr_len;
 	if (unlikely(!skb))
@@ -922,7 +949,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 				put_page(page);
 				head_skb = page_to_skb(vi, rq, xdp_page, offset,
 						       len, PAGE_SIZE, false,
-						       metasize);
+						       metasize, headroom);
 				return head_skb;
 			}
 			break;
@@ -980,7 +1007,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	}
 
 	head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog,
-			       metasize);
+			       metasize, headroom);
 	curr_skb = head_skb;
 
 	if (unlikely(!curr_skb))
-- 
cgit v1.2.3


From d8604b209e9b3762280b8321162f0f64219d51c9 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 16 Apr 2021 08:08:49 -0500
Subject: dt-bindings: net: qcom,ipa: add firmware-name property

Add a new optional firmware-name property to the IPA DT node.  It
is used only if the modem is not doing early initialization (i.e.,
if the modem-init property is not present).  Its value is the name
of the firmware file to use; if it's not specified, a default name
("ipa_fws.mdt") is used.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/qcom,ipa.yaml | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
index da5212e693e9..7443490d4cc6 100644
--- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml
+++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
@@ -125,6 +125,14 @@ properties:
       the firmware passed to Trust Zone for authentication.  Required
       when Trust Zone (not the modem) performs early initialization.
 
+  firmware-name:
+    $ref: /schemas/types.yaml#/definitions/string
+    description:
+      If present, name (or relative path) of the file within the
+      firmware search path containing the firmware image used when
+      initializing IPA hardware.  Optional, and only used when
+      Trust Zone performs early initialization.
+
 required:
   - compatible
   - iommus
@@ -134,12 +142,23 @@ required:
   - interconnects
   - qcom,smem-states
 
+# Either modem-init is present, or memory-region must be present.
 oneOf:
   - required:
       - modem-init
   - required:
       - memory-region
 
+# If memory-region is present, firmware-name may optionally be present.
+# But if modem-init is present, firmware-name must not be present.
+if:
+  required:
+    - modem-init
+then:
+  not:
+    required:
+      - firmware-name
+
 additionalProperties: false
 
 examples:
-- 
cgit v1.2.3


From 9ce062ba6a8d0a22e873e6b8cf068bf278adb5e7 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 16 Apr 2021 08:08:50 -0500
Subject: net: ipa: optionally define firmware name via DT

IPA initialization includes loading some firmware.  This step is
done either by the modem or by the AP under Trust Zone.  If the
AP loads firmware, the name of the firmware file is currently
hard-coded ("ipa_fws.mdt").

Add the ability to specify the relative path of the firmware file to
use in a property in the Device Tree IPA node.  If the property is
not found (or if any other error occurs attempting to get it), fall
back to using a default relative path.

Use the "old" fixed name as the default.  Rename the symbol that
represents this default to emphasize its purpose.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_main.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index aad915e2ce52..9915603ed10b 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -67,7 +67,7 @@
  */
 
 /* The name of the GSI firmware file relative to /lib/firmware */
-#define IPA_FWS_PATH		"ipa_fws.mdt"
+#define IPA_FW_PATH_DEFAULT	"ipa_fws.mdt"
 #define IPA_PAS_ID		15
 
 /* Shift of 19.2 MHz timestamp to achieve lower resolution timestamps */
@@ -517,6 +517,7 @@ static int ipa_firmware_load(struct device *dev)
 	struct device_node *node;
 	struct resource res;
 	phys_addr_t phys;
+	const char *path;
 	ssize_t size;
 	void *virt;
 	int ret;
@@ -534,9 +535,17 @@ static int ipa_firmware_load(struct device *dev)
 		return ret;
 	}
 
-	ret = request_firmware(&fw, IPA_FWS_PATH, dev);
+	/* Use name from DTB if specified; use default for *any* error */
+	ret = of_property_read_string(dev->of_node, "firmware-name", &path);
 	if (ret) {
-		dev_err(dev, "error %d requesting \"%s\"\n", ret, IPA_FWS_PATH);
+		dev_dbg(dev, "error %d getting \"firmware-name\" resource\n",
+			ret);
+		path = IPA_FW_PATH_DEFAULT;
+	}
+
+	ret = request_firmware(&fw, path, dev);
+	if (ret) {
+		dev_err(dev, "error %d requesting \"%s\"\n", ret, path);
 		return ret;
 	}
 
@@ -549,13 +558,11 @@ static int ipa_firmware_load(struct device *dev)
 		goto out_release_firmware;
 	}
 
-	ret = qcom_mdt_load(dev, fw, IPA_FWS_PATH, IPA_PAS_ID,
-			    virt, phys, size, NULL);
+	ret = qcom_mdt_load(dev, fw, path, IPA_PAS_ID, virt, phys, size, NULL);
 	if (ret)
-		dev_err(dev, "error %d loading \"%s\"\n", ret, IPA_FWS_PATH);
+		dev_err(dev, "error %d loading \"%s\"\n", ret, path);
 	else if ((ret = qcom_scm_pas_auth_and_reset(IPA_PAS_ID)))
-		dev_err(dev, "error %d authenticating \"%s\"\n", ret,
-			IPA_FWS_PATH);
+		dev_err(dev, "error %d authenticating \"%s\"\n", ret, path);
 
 	memunmap(virt);
 out_release_firmware:
-- 
cgit v1.2.3


From aa8caa767e319bad34a82bfce7da1ed2b9c0ed6f Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Fri, 16 Apr 2021 14:16:06 +0000
Subject: mld: fix suspicious RCU usage in __ipv6_dev_mc_dec()

__ipv6_dev_mc_dec() internally uses sleepable functions so that caller
must not acquire atomic locks. But caller, which is addrconf_verify_rtnl()
acquires rcu_read_lock_bh().
So this warning occurs in the __ipv6_dev_mc_dec().

Test commands:
    ip netns add A
    ip link add veth0 type veth peer name veth1
    ip link set veth1 netns A
    ip link set veth0 up
    ip netns exec A ip link set veth1 up
    ip a a 2001:db8::1/64 dev veth0 valid_lft 2 preferred_lft 1

Splat looks like:
============================
WARNING: suspicious RCU usage
5.12.0-rc6+ #515 Not tainted
-----------------------------
kernel/sched/core.c:8294 Illegal context switch in RCU-bh read-side
critical section!

other info that might help us debug this:

rcu_scheduler_active = 2, debug_locks = 1
4 locks held by kworker/4:0/1997:
 #0: ffff88810bd72d48 ((wq_completion)ipv6_addrconf){+.+.}-{0:0}, at:
process_one_work+0x761/0x1440
 #1: ffff888105c8fe00 ((addr_chk_work).work){+.+.}-{0:0}, at:
process_one_work+0x795/0x1440
 #2: ffffffffb9279fb0 (rtnl_mutex){+.+.}-{3:3}, at:
addrconf_verify_work+0xa/0x20
 #3: ffffffffb8e30860 (rcu_read_lock_bh){....}-{1:2}, at:
addrconf_verify_rtnl+0x23/0xc60

stack backtrace:
CPU: 4 PID: 1997 Comm: kworker/4:0 Not tainted 5.12.0-rc6+ #515
Workqueue: ipv6_addrconf addrconf_verify_work
Call Trace:
 dump_stack+0xa4/0xe5
 ___might_sleep+0x27d/0x2b0
 __mutex_lock+0xc8/0x13f0
 ? lock_downgrade+0x690/0x690
 ? __ipv6_dev_mc_dec+0x49/0x2a0
 ? mark_held_locks+0xb7/0x120
 ? mutex_lock_io_nested+0x1270/0x1270
 ? lockdep_hardirqs_on_prepare+0x12c/0x3e0
 ? _raw_spin_unlock_irqrestore+0x47/0x50
 ? trace_hardirqs_on+0x41/0x120
 ? __wake_up_common_lock+0xc9/0x100
 ? __wake_up_common+0x620/0x620
 ? memset+0x1f/0x40
 ? netlink_broadcast_filtered+0x2c4/0xa70
 ? __ipv6_dev_mc_dec+0x49/0x2a0
 __ipv6_dev_mc_dec+0x49/0x2a0
 ? netlink_broadcast_filtered+0x2f6/0xa70
 addrconf_leave_solict.part.64+0xad/0xf0
 ? addrconf_join_solict.part.63+0xf0/0xf0
 ? nlmsg_notify+0x63/0x1b0
 __ipv6_ifa_notify+0x22c/0x9c0
 ? inet6_fill_ifaddr+0xbe0/0xbe0
 ? lockdep_hardirqs_on_prepare+0x12c/0x3e0
 ? __local_bh_enable_ip+0xa5/0xf0
 ? ipv6_del_addr+0x347/0x870
 ipv6_del_addr+0x3b1/0x870
 ? addrconf_ifdown+0xfe0/0xfe0
 ? rcu_read_lock_any_held.part.27+0x20/0x20
 addrconf_verify_rtnl+0x8a9/0xc60
 addrconf_verify_work+0xf/0x20
 process_one_work+0x84c/0x1440

In order to avoid this problem, it uses rcu_read_unlock_bh() for
a short time. RCU is used for avoiding freeing
ifp(struct *inet6_ifaddr) while ifp is being used. But this will
not be released even if rcu_read_unlock_bh() is used.
Because before rcu_read_unlock_bh(), it uses in6_ifa_hold(ifp).
So this is safe.

Fixes: 63ed8de4be81 ("mld: add mc_lock for protecting per-interface mld data")
Suggested-by: Eric Dumazet <edumazet@google.com>
Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index dbb5bb9269bb..b0ef65eb9bd2 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4485,7 +4485,9 @@ restart:
 			    age >= ifp->valid_lft) {
 				spin_unlock(&ifp->lock);
 				in6_ifa_hold(ifp);
+				rcu_read_unlock_bh();
 				ipv6_del_addr(ifp);
+				rcu_read_lock_bh();
 				goto restart;
 			} else if (ifp->prefered_lft == INFINITY_LIFE_TIME) {
 				spin_unlock(&ifp->lock);
-- 
cgit v1.2.3


From 0e672f306a28ddd55d2fb2ab89afdc615b5324a4 Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Fri, 16 Apr 2021 17:47:45 +0200
Subject: veth: check for NAPI instead of xdp_prog before xmit of XDP frame
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The recent patch that tied enabling of veth NAPI to the GRO flag also has
the nice side effect that a veth device can be the target of an
XDP_REDIRECT without an XDP program needing to be loaded on the peer
device. However, the patch adding this extra NAPI mode didn't actually
change the check in veth_xdp_xmit() to also look at the new NAPI pointer,
so let's fix that.

Fixes: 6788fa154546 ("veth: allow enabling NAPI even without XDP")
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/veth.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 15b2e3923c47..bdb7ce3cb054 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -486,11 +486,10 @@ static int veth_xdp_xmit(struct net_device *dev, int n,
 
 	rcv_priv = netdev_priv(rcv);
 	rq = &rcv_priv->rq[veth_select_rxq(rcv)];
-	/* Non-NULL xdp_prog ensures that xdp_ring is initialized on receive
-	 * side. This means an XDP program is loaded on the peer and the peer
-	 * device is up.
+	/* The napi pointer is set if NAPI is enabled, which ensures that
+	 * xdp_ring is initialized on receive side and the peer device is up.
 	 */
-	if (!rcu_access_pointer(rq->xdp_prog))
+	if (!rcu_access_pointer(rq->napi))
 		goto out;
 
 	max_len = rcv->mtu + rcv->hard_header_len + VLAN_HLEN;
-- 
cgit v1.2.3


From 8eda54c5e6c4eb3f3a9b70fdea278f4e0f8496b2 Mon Sep 17 00:00:00 2001
From: Claudiu Manoil <claudiu.manoil@nxp.com>
Date: Fri, 16 Apr 2021 20:11:22 +0300
Subject: gianfar: Drop GFAR_MQ_POLLING support

Gianfar used to enable all 8 Rx queues (DMA rings) per
ethernet device, even though the controller can only
support 2 interrupt lines at most.  This meant that
multiple Rx queues would have to be grouped per NAPI poll
routine, and the CPU would have to split the budget and
service them in a round robin manner.  The overhead of
this scheme proved to outweight the potential benefits.
The alternative was to introduce the "Single Queue" polling
mode, supporting one Rx queue per NAPI, which became the
default packet processing option and helped improve the
performance of the driver.
MQ_POLLING also relies on undocumeted device tree properties
to specify how to map the 8 Rx and Tx queues to a given
interrupt line (aka "interrupt group").  Using module parameters
to enable this mode wasn't an option either.  Long story short,
MQ_POLLING became obsolete, now it is just dead code, and no
one asked for it so far.
For the Tx queues, multi-queue support (more than 1 Tx queue
per CPU) could be revisited by adding tc MQPRIO support, but
again, one has to consider that there are only 2 interrupt lines.
So the NAPI poll routine would have to service multiple Tx rings.

Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/gianfar.c | 170 ++-----------------------------
 drivers/net/ethernet/freescale/gianfar.h |  17 ----
 2 files changed, 11 insertions(+), 176 deletions(-)

diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 339f9567ef9d..f2945abdb041 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -175,10 +175,7 @@ static void gfar_mac_rx_config(struct gfar_private *priv)
 	if (priv->rx_filer_enable) {
 		rctrl |= RCTRL_FILREN | RCTRL_PRSDEP_INIT;
 		/* Program the RIR0 reg with the required distribution */
-		if (priv->poll_mode == GFAR_SQ_POLLING)
-			gfar_write(&regs->rir0, DEFAULT_2RXQ_RIR0);
-		else /* GFAR_MQ_POLLING */
-			gfar_write(&regs->rir0, DEFAULT_8RXQ_RIR0);
+		gfar_write(&regs->rir0, DEFAULT_2RXQ_RIR0);
 	}
 
 	/* Restore PROMISC mode */
@@ -521,29 +518,9 @@ static int gfar_parse_group(struct device_node *np,
 	grp->priv = priv;
 	spin_lock_init(&grp->grplock);
 	if (priv->mode == MQ_MG_MODE) {
-		u32 rxq_mask, txq_mask;
-		int ret;
-
+		/* One Q per interrupt group: Q0 to G0, Q1 to G1 */
 		grp->rx_bit_map = (DEFAULT_MAPPING >> priv->num_grps);
 		grp->tx_bit_map = (DEFAULT_MAPPING >> priv->num_grps);
-
-		ret = of_property_read_u32(np, "fsl,rx-bit-map", &rxq_mask);
-		if (!ret) {
-			grp->rx_bit_map = rxq_mask ?
-			rxq_mask : (DEFAULT_MAPPING >> priv->num_grps);
-		}
-
-		ret = of_property_read_u32(np, "fsl,tx-bit-map", &txq_mask);
-		if (!ret) {
-			grp->tx_bit_map = txq_mask ?
-			txq_mask : (DEFAULT_MAPPING >> priv->num_grps);
-		}
-
-		if (priv->poll_mode == GFAR_SQ_POLLING) {
-			/* One Q per interrupt group: Q0 to G0, Q1 to G1 */
-			grp->rx_bit_map = (DEFAULT_MAPPING >> priv->num_grps);
-			grp->tx_bit_map = (DEFAULT_MAPPING >> priv->num_grps);
-		}
 	} else {
 		grp->rx_bit_map = 0xFF;
 		grp->tx_bit_map = 0xFF;
@@ -649,18 +626,15 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
 	u32 stash_len = 0;
 	u32 stash_idx = 0;
 	unsigned int num_tx_qs, num_rx_qs;
-	unsigned short mode, poll_mode;
+	unsigned short mode;
 
 	if (!np)
 		return -ENODEV;
 
-	if (of_device_is_compatible(np, "fsl,etsec2")) {
+	if (of_device_is_compatible(np, "fsl,etsec2"))
 		mode = MQ_MG_MODE;
-		poll_mode = GFAR_SQ_POLLING;
-	} else {
+	else
 		mode = SQ_SG_MODE;
-		poll_mode = GFAR_SQ_POLLING;
-	}
 
 	if (mode == SQ_SG_MODE) {
 		num_tx_qs = 1;
@@ -676,22 +650,8 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
 			return -EINVAL;
 		}
 
-		if (poll_mode == GFAR_SQ_POLLING) {
-			num_tx_qs = num_grps; /* one txq per int group */
-			num_rx_qs = num_grps; /* one rxq per int group */
-		} else { /* GFAR_MQ_POLLING */
-			u32 tx_queues, rx_queues;
-			int ret;
-
-			/* parse the num of HW tx and rx queues */
-			ret = of_property_read_u32(np, "fsl,num_tx_queues",
-						   &tx_queues);
-			num_tx_qs = ret ? 1 : tx_queues;
-
-			ret = of_property_read_u32(np, "fsl,num_rx_queues",
-						   &rx_queues);
-			num_rx_qs = ret ? 1 : rx_queues;
-		}
+		num_tx_qs = num_grps; /* one txq per int group */
+		num_rx_qs = num_grps; /* one rxq per int group */
 	}
 
 	if (num_tx_qs > MAX_TX_QS) {
@@ -717,7 +677,6 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
 	priv->ndev = dev;
 
 	priv->mode = mode;
-	priv->poll_mode = poll_mode;
 
 	priv->num_tx_queues = num_tx_qs;
 	netif_set_real_num_rx_queues(dev, num_rx_qs);
@@ -2691,106 +2650,6 @@ static int gfar_poll_tx_sq(struct napi_struct *napi, int budget)
 	return 0;
 }
 
-static int gfar_poll_rx(struct napi_struct *napi, int budget)
-{
-	struct gfar_priv_grp *gfargrp =
-		container_of(napi, struct gfar_priv_grp, napi_rx);
-	struct gfar_private *priv = gfargrp->priv;
-	struct gfar __iomem *regs = gfargrp->regs;
-	struct gfar_priv_rx_q *rx_queue = NULL;
-	int work_done = 0, work_done_per_q = 0;
-	int i, budget_per_q = 0;
-	unsigned long rstat_rxf;
-	int num_act_queues;
-
-	/* Clear IEVENT, so interrupts aren't called again
-	 * because of the packets that have already arrived
-	 */
-	gfar_write(&regs->ievent, IEVENT_RX_MASK);
-
-	rstat_rxf = gfar_read(&regs->rstat) & RSTAT_RXF_MASK;
-
-	num_act_queues = bitmap_weight(&rstat_rxf, MAX_RX_QS);
-	if (num_act_queues)
-		budget_per_q = budget/num_act_queues;
-
-	for_each_set_bit(i, &gfargrp->rx_bit_map, priv->num_rx_queues) {
-		/* skip queue if not active */
-		if (!(rstat_rxf & (RSTAT_CLEAR_RXF0 >> i)))
-			continue;
-
-		rx_queue = priv->rx_queue[i];
-		work_done_per_q =
-			gfar_clean_rx_ring(rx_queue, budget_per_q);
-		work_done += work_done_per_q;
-
-		/* finished processing this queue */
-		if (work_done_per_q < budget_per_q) {
-			/* clear active queue hw indication */
-			gfar_write(&regs->rstat,
-				   RSTAT_CLEAR_RXF0 >> i);
-			num_act_queues--;
-
-			if (!num_act_queues)
-				break;
-		}
-	}
-
-	if (!num_act_queues) {
-		u32 imask;
-		napi_complete_done(napi, work_done);
-
-		/* Clear the halt bit in RSTAT */
-		gfar_write(&regs->rstat, gfargrp->rstat);
-
-		spin_lock_irq(&gfargrp->grplock);
-		imask = gfar_read(&regs->imask);
-		imask |= IMASK_RX_DEFAULT;
-		gfar_write(&regs->imask, imask);
-		spin_unlock_irq(&gfargrp->grplock);
-	}
-
-	return work_done;
-}
-
-static int gfar_poll_tx(struct napi_struct *napi, int budget)
-{
-	struct gfar_priv_grp *gfargrp =
-		container_of(napi, struct gfar_priv_grp, napi_tx);
-	struct gfar_private *priv = gfargrp->priv;
-	struct gfar __iomem *regs = gfargrp->regs;
-	struct gfar_priv_tx_q *tx_queue = NULL;
-	int has_tx_work = 0;
-	int i;
-
-	/* Clear IEVENT, so interrupts aren't called again
-	 * because of the packets that have already arrived
-	 */
-	gfar_write(&regs->ievent, IEVENT_TX_MASK);
-
-	for_each_set_bit(i, &gfargrp->tx_bit_map, priv->num_tx_queues) {
-		tx_queue = priv->tx_queue[i];
-		/* run Tx cleanup to completion */
-		if (tx_queue->tx_skbuff[tx_queue->skb_dirtytx]) {
-			gfar_clean_tx_ring(tx_queue);
-			has_tx_work = 1;
-		}
-	}
-
-	if (!has_tx_work) {
-		u32 imask;
-		napi_complete(napi);
-
-		spin_lock_irq(&gfargrp->grplock);
-		imask = gfar_read(&regs->imask);
-		imask |= IMASK_TX_DEFAULT;
-		gfar_write(&regs->imask, imask);
-		spin_unlock_irq(&gfargrp->grplock);
-	}
-
-	return 0;
-}
-
 /* GFAR error interrupt handler */
 static irqreturn_t gfar_error(int irq, void *grp_id)
 {
@@ -3348,17 +3207,10 @@ static int gfar_probe(struct platform_device *ofdev)
 
 	/* Register for napi ...We are registering NAPI for each grp */
 	for (i = 0; i < priv->num_grps; i++) {
-		if (priv->poll_mode == GFAR_SQ_POLLING) {
-			netif_napi_add(dev, &priv->gfargrp[i].napi_rx,
-				       gfar_poll_rx_sq, GFAR_DEV_WEIGHT);
-			netif_tx_napi_add(dev, &priv->gfargrp[i].napi_tx,
-				       gfar_poll_tx_sq, 2);
-		} else {
-			netif_napi_add(dev, &priv->gfargrp[i].napi_rx,
-				       gfar_poll_rx, GFAR_DEV_WEIGHT);
-			netif_tx_napi_add(dev, &priv->gfargrp[i].napi_tx,
-				       gfar_poll_tx, 2);
-		}
+		netif_napi_add(dev, &priv->gfargrp[i].napi_rx,
+			       gfar_poll_rx_sq, GFAR_DEV_WEIGHT);
+		netif_tx_napi_add(dev, &priv->gfargrp[i].napi_tx,
+				  gfar_poll_tx_sq, 2);
 	}
 
 	if (priv->device_flags & FSL_GIANFAR_DEV_HAS_CSUM) {
diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h
index 8ced783f5302..5ea47df93e5e 100644
--- a/drivers/net/ethernet/freescale/gianfar.h
+++ b/drivers/net/ethernet/freescale/gianfar.h
@@ -909,22 +909,6 @@ enum {
 	MQ_MG_MODE
 };
 
-/* GFAR_SQ_POLLING: Single Queue NAPI polling mode
- *	The driver supports a single pair of RX/Tx queues
- *	per interrupt group (Rx/Tx int line). MQ_MG mode
- *	devices have 2 interrupt groups, so the device will
- *	have a total of 2 Tx and 2 Rx queues in this case.
- * GFAR_MQ_POLLING: Multi Queue NAPI polling mode
- *	The driver supports all the 8 Rx and Tx HW queues
- *	each queue mapped by the Device Tree to one of
- *	the 2 interrupt groups. This mode implies significant
- *	processing overhead (CPU and controller level).
- */
-enum gfar_poll_mode {
-	GFAR_SQ_POLLING = 0,
-	GFAR_MQ_POLLING
-};
-
 /*
  * Per TX queue stats
  */
@@ -1105,7 +1089,6 @@ struct gfar_private {
 	unsigned long state;
 
 	unsigned short mode;
-	unsigned short poll_mode;
 	unsigned int num_tx_queues;
 	unsigned int num_rx_queues;
 	unsigned int num_grps;
-- 
cgit v1.2.3


From 221e8c126b7810c1d8d90286cb0c279071be6843 Mon Sep 17 00:00:00 2001
From: Claudiu Manoil <claudiu.manoil@nxp.com>
Date: Fri, 16 Apr 2021 20:11:23 +0300
Subject: powerpc: dts: fsl: Drop obsolete fsl,rx-bit-map and fsl,tx-bit-map
 properties

These are very old properties that were used by the "gianfar" ethernet
driver.  They don't have documented bindings and are obsolete.

Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi |  4 ----
 arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi |  4 ----
 arch/powerpc/boot/dts/fsl/c293si-post.dtsi    |  4 ----
 arch/powerpc/boot/dts/fsl/p1010si-post.dtsi   | 21 ---------------------
 4 files changed, 33 deletions(-)

diff --git a/arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi b/arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi
index 0c0efa94cfb4..2a677fd323eb 100644
--- a/arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi
@@ -170,8 +170,6 @@ timer@41100 {
 /include/ "pq3-etsec2-0.dtsi"
 enet0: ethernet@b0000 {
 	queue-group@b0000 {
-		fsl,rx-bit-map = <0xff>;
-		fsl,tx-bit-map = <0xff>;
 		interrupts = <26 2 0 0 27 2 0 0 28 2 0 0>;
 	};
 };
@@ -179,8 +177,6 @@ enet0: ethernet@b0000 {
 /include/ "pq3-etsec2-1.dtsi"
 enet1: ethernet@b1000 {
 	queue-group@b1000 {
-		fsl,rx-bit-map = <0xff>;
-		fsl,tx-bit-map = <0xff>;
 		interrupts = <33 2 0 0 34 2 0 0 35 2 0 0>;
 	};
 };
diff --git a/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi b/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi
index b5f071574e83..b8e0edd1ac69 100644
--- a/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi
@@ -190,8 +190,6 @@ crypto@30000 {
 /include/ "pq3-etsec2-0.dtsi"
 enet0: ethernet@b0000 {
 	queue-group@b0000 {
-		fsl,rx-bit-map = <0xff>;
-		fsl,tx-bit-map = <0xff>;
 		interrupts = <26 2 0 0 27 2 0 0 28 2 0 0>;
 	};
 };
@@ -199,8 +197,6 @@ enet0: ethernet@b0000 {
 /include/ "pq3-etsec2-1.dtsi"
 enet1: ethernet@b1000 {
 	queue-group@b1000 {
-		fsl,rx-bit-map = <0xff>;
-		fsl,tx-bit-map = <0xff>;
 		interrupts = <33 2 0 0 34 2 0 0 35 2 0 0>;
 	};
 };
diff --git a/arch/powerpc/boot/dts/fsl/c293si-post.dtsi b/arch/powerpc/boot/dts/fsl/c293si-post.dtsi
index bd208320bff5..bec0fc36849d 100644
--- a/arch/powerpc/boot/dts/fsl/c293si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/c293si-post.dtsi
@@ -171,8 +171,6 @@
 	enet0: ethernet@b0000 {
 		queue-group@b0000 {
 			reg = <0x10000 0x1000>;
-			fsl,rx-bit-map = <0xff>;
-			fsl,tx-bit-map = <0xff>;
 		};
 	};
 
@@ -180,8 +178,6 @@
 	enet1: ethernet@b1000 {
 		queue-group@b1000 {
 			reg = <0x11000 0x1000>;
-			fsl,rx-bit-map = <0xff>;
-			fsl,tx-bit-map = <0xff>;
 		};
 	};
 
diff --git a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi
index 1b4aafc1f6a2..c2717f31925a 100644
--- a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi
@@ -172,29 +172,8 @@
 /include/ "pq3-mpic-timer-B.dtsi"
 
 /include/ "pq3-etsec2-0.dtsi"
-	enet0: ethernet@b0000 {
-		queue-group@b0000 {
-			fsl,rx-bit-map = <0xff>;
-			fsl,tx-bit-map = <0xff>;
-		};
-	};
-
 /include/ "pq3-etsec2-1.dtsi"
-	enet1: ethernet@b1000 {
-		queue-group@b1000 {
-			fsl,rx-bit-map = <0xff>;
-			fsl,tx-bit-map = <0xff>;
-		};
-	};
-
 /include/ "pq3-etsec2-2.dtsi"
-	enet2: ethernet@b2000 {
-		queue-group@b2000 {
-			fsl,rx-bit-map = <0xff>;
-			fsl,tx-bit-map = <0xff>;
-		};
-
-	};
 
 	global-utilities@e0000 {
 		compatible = "fsl,p1010-guts";
-- 
cgit v1.2.3


From e5272ad4aab347dde5610c0aedb786219e3ff793 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 16 Apr 2021 14:12:36 -0500
Subject: sctp: Fix out-of-bounds warning in sctp_process_asconf_param()

Fix the following out-of-bounds warning:

net/sctp/sm_make_chunk.c:3150:4: warning: 'memcpy' offset [17, 28] from the object at 'addr' is out of the bounds of referenced subobject 'v4' with type 'struct sockaddr_in' at offset 0 [-Warray-bounds]

This helps with the ongoing efforts to globally enable -Warray-bounds
and get us closer to being able to tighten the FORTIFY_SOURCE routines
on memcpy().

Link: https://github.com/KSPP/linux/issues/109
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_make_chunk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 54e6a708d06e..5f9a7c028274 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -3147,7 +3147,7 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
 		 * primary.
 		 */
 		if (af->is_any(&addr))
-			memcpy(&addr.v4, sctp_source(asconf), sizeof(addr));
+			memcpy(&addr, sctp_source(asconf), sizeof(addr));
 
 		if (security_sctp_bind_connect(asoc->ep->base.sk,
 					       SCTP_PARAM_SET_PRIMARY,
-- 
cgit v1.2.3


From f117c48c0dc88a50dc2f761fd3df25bd6f9b6c6f Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 16 Apr 2021 12:27:37 -0700
Subject: docs: networking: extend the statistics documentation

Make the lack of expectations for switching NICs explicit,
describe the new stats.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/statistics.rst | 44 +++++++++++++++++++++++++++++++--
 1 file changed, 42 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/statistics.rst b/Documentation/networking/statistics.rst
index b748fe44ee02..c9aeb70dafa2 100644
--- a/Documentation/networking/statistics.rst
+++ b/Documentation/networking/statistics.rst
@@ -44,8 +44,27 @@ If `-s` is specified once the detailed errors won't be shown.
 Protocol-specific statistics
 ----------------------------
 
-Some of the interfaces used for configuring devices are also able
-to report related statistics. For example ethtool interface used
+Protocol-specific statistics are exposed via relevant interfaces,
+the same interfaces as are used to configure them.
+
+ethtool
+~~~~~~~
+
+Ethtool exposes common low-level statistics.
+All the standard statistics are expected to be maintained
+by the device, not the driver (as opposed to driver-defined stats
+described in the next section which mix software and hardware stats).
+For devices which contain unmanaged
+switches (e.g. legacy SR-IOV or multi-host NICs) the events counted
+may not pertain exclusively to the packets destined to
+the local host interface. In other words the events may
+be counted at the network port (MAC/PHY blocks) without separation
+for different host side (PCIe) devices. Such ambiguity must not
+be present when internal switch is managed by Linux (so called
+switchdev mode for NICs).
+
+Standard ethtool statistics can be accessed via the interfaces used
+for configuration. For example ethtool interface used
 to configure pause frames can report corresponding hardware counters::
 
   $ ethtool --include-statistics -a eth0
@@ -57,6 +76,27 @@ to configure pause frames can report corresponding hardware counters::
     tx_pause_frames: 1
     rx_pause_frames: 1
 
+General Ethernet statistics not associated with any particular
+functionality are exposed via ``ethtool -S $ifc`` by specifying
+the ``--groups`` parameter::
+
+  $ ethtool -S eth0 --groups eth-phy eth-mac eth-ctrl rmon
+  Stats for eth0:
+  eth-phy-SymbolErrorDuringCarrier: 0
+  eth-mac-FramesTransmittedOK: 1
+  eth-mac-FrameTooLongErrors: 1
+  eth-ctrl-MACControlFramesTransmitted: 1
+  eth-ctrl-MACControlFramesReceived: 0
+  eth-ctrl-UnsupportedOpcodesReceived: 1
+  rmon-etherStatsUndersizePkts: 1
+  rmon-etherStatsJabbers: 0
+  rmon-rx-etherStatsPkts64Octets: 1
+  rmon-rx-etherStatsPkts65to127Octets: 0
+  rmon-rx-etherStatsPkts128to255Octets: 0
+  rmon-tx-etherStatsPkts64Octets: 2
+  rmon-tx-etherStatsPkts65to127Octets: 3
+  rmon-tx-etherStatsPkts128to255Octets: 0
+
 Driver-defined statistics
 -------------------------
 
-- 
cgit v1.2.3


From ddc78b3621242dd691ef4c234a80e316422c2876 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 16 Apr 2021 12:27:38 -0700
Subject: docs: ethtool: document standard statistics

Add documentation for ETHTOOL_MSG_STATS_GET.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ethtool-netlink.rst | 82 ++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index f8219e2f489e..48cad2fce147 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -210,6 +210,7 @@ Userspace to kernel:
   ``ETHTOOL_MSG_TUNNEL_INFO_GET``       get tunnel offload info
   ``ETHTOOL_MSG_FEC_GET``               get FEC settings
   ``ETHTOOL_MSG_FEC_SET``               set FEC settings
+  ``ETHTOOL_MSG_STATS_GET``             get standard statistics
   ===================================== ================================
 
 Kernel to userspace:
@@ -246,6 +247,7 @@ Kernel to userspace:
   ``ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY`` tunnel offload info
   ``ETHTOOL_MSG_FEC_GET_REPLY``         FEC settings
   ``ETHTOOL_MSG_FEC_NTF``               FEC settings
+  ``ETHTOOL_MSG_STATS_GET_REPLY``       standard statistics
   ===================================== =================================
 
 ``GET`` requests are sent by userspace applications to retrieve device
@@ -1391,6 +1393,86 @@ Kernel response contents:
 ``ETHTOOL_A_MODULE_EEPROM_DATA`` has an attribute length equal to the amount of
 bytes driver actually read.
 
+STATS_GET
+=========
+
+Get standard statistics for the interface. Note that this is not
+a re-implementation of ``ETHTOOL_GSTATS`` which exposed driver-defined
+stats.
+
+Request contents:
+
+  =======================================  ======  ==========================
+  ``ETHTOOL_A_STATS_HEADER``               nested  request header
+  ``ETHTOOL_A_STATS_GROUPS``               bitset  requested groups of stats
+  =======================================  ======  ==========================
+
+Kernel response contents:
+
+ +-----------------------------------+--------+--------------------------------+
+ | ``ETHTOOL_A_STATS_HEADER``        | nested | reply header                   |
+ +-----------------------------------+--------+--------------------------------+
+ | ``ETHTOOL_A_STATS_GRP``           | nested | one or more group of stats     |
+ +-+---------------------------------+--------+--------------------------------+
+ | | ``ETHTOOL_A_STATS_GRP_ID``      | u32    | group ID - ``ETHTOOL_STATS_*`` |
+ +-+---------------------------------+--------+--------------------------------+
+ | | ``ETHTOOL_A_STATS_GRP_SS_ID``   | u32    | string set ID for names        |
+ +-+---------------------------------+--------+--------------------------------+
+ | | ``ETHTOOL_A_STATS_GRP_STAT``    | nested | nest containing a statistic    |
+ +-+---------------------------------+--------+--------------------------------+
+ | | ``ETHTOOL_A_STATS_GRP_HIST_RX`` | nested | histogram statistic (Rx)       |
+ +-+---------------------------------+--------+--------------------------------+
+ | | ``ETHTOOL_A_STATS_GRP_HIST_TX`` | nested | histogram statistic (Tx)       |
+ +-+---------------------------------+--------+--------------------------------+
+
+Users specify which groups of statistics they are requesting via
+the ``ETHTOOL_A_STATS_GROUPS`` bitset. Currently defined values are:
+
+ ====================== ======== ===============================================
+ ETHTOOL_STATS_ETH_MAC  eth-mac  Basic IEEE 802.3 MAC statistics (30.3.1.1.*)
+ ETHTOOL_STATS_ETH_PHY  eth-phy  Basic IEEE 802.3 PHY statistics (30.3.2.1.*)
+ ETHTOOL_STATS_ETH_CTRL eth-ctrl Basic IEEE 802.3 MAC Ctrl statistics (30.3.3.*)
+ ETHTOOL_STATS_RMON     rmon     RMON (RFC 2819) statistics
+ ====================== ======== ===============================================
+
+Each group should have a corresponding ``ETHTOOL_A_STATS_GRP`` in the reply.
+``ETHTOOL_A_STATS_GRP_ID`` identifies which group's statistics nest contains.
+``ETHTOOL_A_STATS_GRP_SS_ID`` identifies the string set ID for the names of
+the statistics in the group, if available.
+
+Statistics are added to the ``ETHTOOL_A_STATS_GRP`` nest under
+``ETHTOOL_A_STATS_GRP_STAT``. ``ETHTOOL_A_STATS_GRP_STAT`` should contain
+single 8 byte (u64) attribute inside - the type of that attribute is
+the statistic ID and the value is the value of the statistic.
+Each group has its own interpretation of statistic IDs.
+Attribute IDs correspond to strings from the string set identified
+by ``ETHTOOL_A_STATS_GRP_SS_ID``. Complex statistics (such as RMON histogram
+entries) are also listed inside ``ETHTOOL_A_STATS_GRP`` and do not have
+a string defined in the string set.
+
+RMON "histogram" counters count number of packets within given size range.
+Because RFC does not specify the ranges beyond the standard 1518 MTU devices
+differ in definition of buckets. For this reason the definition of packet ranges
+is left to each driver.
+
+``ETHTOOL_A_STATS_GRP_HIST_RX`` and ``ETHTOOL_A_STATS_GRP_HIST_TX`` nests
+contain the following attributes:
+
+ ================================= ====== ===================================
+ ETHTOOL_A_STATS_RMON_HIST_BKT_LOW u32    low bound of the packet size bucket
+ ETHTOOL_A_STATS_RMON_HIST_BKT_HI  u32    high bound of the bucket
+ ETHTOOL_A_STATS_RMON_HIST_VAL     u64    packet counter
+ ================================= ====== ===================================
+
+Low and high bounds are inclusive, for example:
+
+ ============================= ==== ====
+ RFC statistic                 low  high
+ ============================= ==== ====
+ etherStatsPkts64Octets          0    64
+ etherStatsPkts512to1023Octets 512  1023
+ ============================= ==== ====
+
 Request translation
 ===================
 
-- 
cgit v1.2.3


From f09ea6fb12723d6726293d68de00b6307368bd76 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 16 Apr 2021 12:27:39 -0700
Subject: ethtool: add a new command for reading standard stats

Add an interface for reading standard stats, including
stats which don't have a corresponding control interface.

Start with IEEE 802.3 PHY stats. There seems to be only
one stat to expose there.

Define API to not require user space changes when new
stats or groups are added. Groups are based on bitset,
stats have a string set associated.

v1: wrap stats in a nest

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h              |  10 ++
 include/uapi/linux/ethtool.h         |   4 +
 include/uapi/linux/ethtool_netlink.h |  47 ++++++++
 net/ethtool/Makefile                 |   2 +-
 net/ethtool/netlink.c                |  10 ++
 net/ethtool/netlink.h                |   5 +
 net/ethtool/stats.c                  | 200 +++++++++++++++++++++++++++++++++++
 net/ethtool/strset.c                 |  10 ++
 8 files changed, 287 insertions(+), 1 deletion(-)
 create mode 100644 net/ethtool/stats.c

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 112a85b57f1f..2d5455eedbf4 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -250,6 +250,13 @@ static inline void ethtool_stats_init(u64 *stats, unsigned int n)
 		stats[n] = ETHTOOL_STAT_NOT_SET;
 }
 
+/* Basic IEEE 802.3 PHY statistics (30.3.2.1.*), not otherwise exposed
+ * via a more targeted API.
+ */
+struct ethtool_eth_phy_stats {
+	u64 SymbolErrorDuringCarrier;
+};
+
 /**
  * struct ethtool_pause_stats - statistics for IEEE 802.3x pause frames
  * @tx_pause_frames: transmitted pause frame count. Reported to user space
@@ -487,6 +494,7 @@ struct ethtool_module_eeprom {
  * @get_module_eeprom_by_page: Get a region of plug-in module EEPROM data from
  *	specified page. Returns a negative error code or the amount of bytes
  *	read.
+ * @get_eth_phy_stats: Query some of the IEEE 802.3 PHY statistics.
  *
  * All operations are optional (i.e. the function pointer may be set
  * to %NULL) and callers must take this into account.  Callers must
@@ -597,6 +605,8 @@ struct ethtool_ops {
 	int	(*get_module_eeprom_by_page)(struct net_device *dev,
 					     const struct ethtool_module_eeprom *page,
 					     struct netlink_ext_ack *extack);
+	void	(*get_eth_phy_stats)(struct net_device *dev,
+				     struct ethtool_eth_phy_stats *phy_stats);
 };
 
 int ethtool_check_ops(const struct ethtool_ops *ops);
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index f91e079e3108..190ae6e03918 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -669,6 +669,8 @@ enum ethtool_link_ext_substate_cable_issue {
  * @ETH_SS_TS_TX_TYPES: timestamping Tx types
  * @ETH_SS_TS_RX_FILTERS: timestamping Rx filters
  * @ETH_SS_UDP_TUNNEL_TYPES: UDP tunnel types
+ * @ETH_SS_STATS_STD: standardized stats
+ * @ETH_SS_STATS_ETH_PHY: names of IEEE 802.3 PHY statistics
  *
  * @ETH_SS_COUNT: number of defined string sets
  */
@@ -689,6 +691,8 @@ enum ethtool_stringset {
 	ETH_SS_TS_TX_TYPES,
 	ETH_SS_TS_RX_FILTERS,
 	ETH_SS_UDP_TUNNEL_TYPES,
+	ETH_SS_STATS_STD,
+	ETH_SS_STATS_ETH_PHY,
 
 	/* add new constants above here */
 	ETH_SS_COUNT
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 3a2b31ccbc5b..a54cfe625f34 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -45,6 +45,7 @@ enum {
 	ETHTOOL_MSG_FEC_GET,
 	ETHTOOL_MSG_FEC_SET,
 	ETHTOOL_MSG_MODULE_EEPROM_GET,
+	ETHTOOL_MSG_STATS_GET,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_USER_CNT,
@@ -86,6 +87,7 @@ enum {
 	ETHTOOL_MSG_FEC_GET_REPLY,
 	ETHTOOL_MSG_FEC_NTF,
 	ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY,
+	ETHTOOL_MSG_STATS_GET_REPLY,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_KERNEL_CNT,
@@ -679,6 +681,51 @@ enum {
 	ETHTOOL_A_MODULE_EEPROM_MAX = (__ETHTOOL_A_MODULE_EEPROM_CNT - 1)
 };
 
+/* STATS */
+
+enum {
+	ETHTOOL_A_STATS_UNSPEC,
+	ETHTOOL_A_STATS_PAD,
+	ETHTOOL_A_STATS_HEADER,			/* nest - _A_HEADER_* */
+	ETHTOOL_A_STATS_GROUPS,			/* bitset */
+
+	ETHTOOL_A_STATS_GRP,			/* nest - _A_STATS_GRP_* */
+
+	/* add new constants above here */
+	__ETHTOOL_A_STATS_CNT,
+	ETHTOOL_A_STATS_MAX = (__ETHTOOL_A_STATS_CNT - 1)
+};
+
+enum {
+	ETHTOOL_STATS_ETH_PHY,
+
+	/* add new constants above here */
+	__ETHTOOL_STATS_CNT
+};
+
+enum {
+	ETHTOOL_A_STATS_GRP_UNSPEC,
+	ETHTOOL_A_STATS_GRP_PAD,
+
+	ETHTOOL_A_STATS_GRP_ID,			/* u32 */
+	ETHTOOL_A_STATS_GRP_SS_ID,		/* u32 */
+
+	ETHTOOL_A_STATS_GRP_STAT,		/* nest */
+
+	/* add new constants above here */
+	__ETHTOOL_A_STATS_GRP_CNT,
+	ETHTOOL_A_STATS_GRP_MAX = (__ETHTOOL_A_STATS_CNT - 1)
+};
+
+enum {
+	/* 30.3.2.1.5 aSymbolErrorDuringCarrier */
+	ETHTOOL_A_STATS_ETH_PHY_5_SYM_ERR,
+
+	/* add new constants above here */
+	__ETHTOOL_A_STATS_ETH_PHY_CNT,
+	ETHTOOL_A_STATS_ETH_PHY_MAX = (__ETHTOOL_A_STATS_ETH_PHY_CNT - 1)
+};
+
 /* generic netlink info */
 #define ETHTOOL_GENL_NAME "ethtool"
 #define ETHTOOL_GENL_VERSION 1
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
index 83842685fd8c..723c9a8a8cdf 100644
--- a/net/ethtool/Makefile
+++ b/net/ethtool/Makefile
@@ -7,4 +7,4 @@ obj-$(CONFIG_ETHTOOL_NETLINK)	+= ethtool_nl.o
 ethtool_nl-y	:= netlink.o bitset.o strset.o linkinfo.o linkmodes.o \
 		   linkstate.o debug.o wol.o features.o privflags.o rings.o \
 		   channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \
-		   tunnels.o fec.o eeprom.o
+		   tunnels.o fec.o eeprom.o stats.o
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 5f5d7c4b3d4a..290012d0d11d 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -247,6 +247,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = {
 	[ETHTOOL_MSG_FEC_GET]		= &ethnl_fec_request_ops,
 	[ETHTOOL_MSG_TSINFO_GET]	= &ethnl_tsinfo_request_ops,
 	[ETHTOOL_MSG_MODULE_EEPROM_GET]	= &ethnl_module_eeprom_request_ops,
+	[ETHTOOL_MSG_STATS_GET]		= &ethnl_stats_request_ops,
 };
 
 static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb)
@@ -942,6 +943,15 @@ static const struct genl_ops ethtool_genl_ops[] = {
 		.policy = ethnl_module_eeprom_get_policy,
 		.maxattr = ARRAY_SIZE(ethnl_module_eeprom_get_policy) - 1,
 	},
+	{
+		.cmd	= ETHTOOL_MSG_STATS_GET,
+		.doit	= ethnl_default_doit,
+		.start	= ethnl_default_start,
+		.dumpit	= ethnl_default_dumpit,
+		.done	= ethnl_default_done,
+		.policy = ethnl_stats_get_policy,
+		.maxattr = ARRAY_SIZE(ethnl_stats_get_policy) - 1,
+	},
 };
 
 static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 4305ac971bb0..9d88983b6597 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -346,6 +346,7 @@ extern const struct ethnl_request_ops ethnl_eee_request_ops;
 extern const struct ethnl_request_ops ethnl_tsinfo_request_ops;
 extern const struct ethnl_request_ops ethnl_fec_request_ops;
 extern const struct ethnl_request_ops ethnl_module_eeprom_request_ops;
+extern const struct ethnl_request_ops ethnl_stats_request_ops;
 
 extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];
 extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1];
@@ -380,6 +381,7 @@ extern const struct nla_policy ethnl_tunnel_info_get_policy[ETHTOOL_A_TUNNEL_INF
 extern const struct nla_policy ethnl_fec_get_policy[ETHTOOL_A_FEC_HEADER + 1];
 extern const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1];
 extern const struct nla_policy ethnl_module_eeprom_get_policy[ETHTOOL_A_MODULE_EEPROM_DATA + 1];
+extern const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_GROUPS + 1];
 
 int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info);
 int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info);
@@ -399,4 +401,7 @@ int ethnl_tunnel_info_start(struct netlink_callback *cb);
 int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
 int ethnl_set_fec(struct sk_buff *skb, struct genl_info *info);
 
+extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN];
+extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN];
+
 #endif /* _NET_ETHTOOL_NETLINK_H */
diff --git a/net/ethtool/stats.c b/net/ethtool/stats.c
new file mode 100644
index 000000000000..fd8f47178c06
--- /dev/null
+++ b/net/ethtool/stats.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "netlink.h"
+#include "common.h"
+#include "bitset.h"
+
+struct stats_req_info {
+	struct ethnl_req_info		base;
+	DECLARE_BITMAP(stat_mask, __ETHTOOL_STATS_CNT);
+};
+
+#define STATS_REQINFO(__req_base) \
+	container_of(__req_base, struct stats_req_info, base)
+
+struct stats_reply_data {
+	struct ethnl_reply_data		base;
+	struct ethtool_eth_phy_stats	phy_stats;
+};
+
+#define STATS_REPDATA(__reply_base) \
+	container_of(__reply_base, struct stats_reply_data, base)
+
+const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN] = {
+	[ETHTOOL_STATS_ETH_PHY]			= "eth-phy",
+};
+
+const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN] = {
+	[ETHTOOL_A_STATS_ETH_PHY_5_SYM_ERR]	= "SymbolErrorDuringCarrier",
+};
+
+const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_GROUPS + 1] = {
+	[ETHTOOL_A_STATS_HEADER]	=
+		NLA_POLICY_NESTED(ethnl_header_policy),
+	[ETHTOOL_A_STATS_GROUPS]	= { .type = NLA_NESTED },
+};
+
+static int stats_parse_request(struct ethnl_req_info *req_base,
+			       struct nlattr **tb,
+			       struct netlink_ext_ack *extack)
+{
+	struct stats_req_info *req_info = STATS_REQINFO(req_base);
+	bool mod = false;
+	int err;
+
+	err = ethnl_update_bitset(req_info->stat_mask, __ETHTOOL_STATS_CNT,
+				  tb[ETHTOOL_A_STATS_GROUPS], stats_std_names,
+				  extack, &mod);
+	if (err)
+		return err;
+
+	if (!mod) {
+		NL_SET_ERR_MSG(extack, "no stats requested");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int stats_prepare_data(const struct ethnl_req_info *req_base,
+			      struct ethnl_reply_data *reply_base,
+			      struct genl_info *info)
+{
+	const struct stats_req_info *req_info = STATS_REQINFO(req_base);
+	struct stats_reply_data *data = STATS_REPDATA(reply_base);
+	struct net_device *dev = reply_base->dev;
+	int ret;
+
+	ret = ethnl_ops_begin(dev);
+	if (ret < 0)
+		return ret;
+
+	memset(&data->phy_stats, 0xff, sizeof(data->phy_stats));
+
+	if (test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask) &&
+	    dev->ethtool_ops->get_eth_phy_stats)
+		dev->ethtool_ops->get_eth_phy_stats(dev, &data->phy_stats);
+
+	ethnl_ops_complete(dev);
+	return 0;
+}
+
+static int stats_reply_size(const struct ethnl_req_info *req_base,
+			    const struct ethnl_reply_data *reply_base)
+{
+	const struct stats_req_info *req_info = STATS_REQINFO(req_base);
+	unsigned int n_grps = 0, n_stats = 0;
+	int len = 0;
+
+	if (test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask)) {
+		n_stats += sizeof(struct ethtool_eth_phy_stats) / sizeof(u64);
+		n_grps++;
+	}
+
+	len += n_grps * (nla_total_size(0) + /* _A_STATS_GRP */
+			 nla_total_size(4) + /* _A_STATS_GRP_ID */
+			 nla_total_size(4)); /* _A_STATS_GRP_SS_ID */
+	len += n_stats * (nla_total_size(0) + /* _A_STATS_GRP_STAT */
+			  nla_total_size_64bit(sizeof(u64)));
+
+	return len;
+}
+
+static int stat_put(struct sk_buff *skb, u16 attrtype, u64 val)
+{
+	struct nlattr *nest;
+	int ret;
+
+	if (val == ETHTOOL_STAT_NOT_SET)
+		return 0;
+
+	/* We want to start stats attr types from 0, so we don't have a type
+	 * for pad inside ETHTOOL_A_STATS_GRP_STAT. Pad things on the outside
+	 * of ETHTOOL_A_STATS_GRP_STAT. Since we're one nest away from the
+	 * actual attr we're 4B off - nla_need_padding_for_64bit() & co.
+	 * can't be used.
+	 */
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	if (!IS_ALIGNED((unsigned long)skb_tail_pointer(skb), 8))
+		if (!nla_reserve(skb, ETHTOOL_A_STATS_GRP_PAD, 0))
+			return -EMSGSIZE;
+#endif
+
+	nest = nla_nest_start(skb, ETHTOOL_A_STATS_GRP_STAT);
+	if (!nest)
+		return -EMSGSIZE;
+
+	ret = nla_put_u64_64bit(skb, attrtype, val, -1 /* not used */);
+	if (ret) {
+		nla_nest_cancel(skb, nest);
+		return ret;
+	}
+
+	nla_nest_end(skb, nest);
+	return 0;
+}
+
+static int stats_put_phy_stats(struct sk_buff *skb,
+			       const struct stats_reply_data *data)
+{
+	if (stat_put(skb, ETHTOOL_A_STATS_ETH_PHY_5_SYM_ERR,
+		     data->phy_stats.SymbolErrorDuringCarrier))
+		return -EMSGSIZE;
+	return 0;
+}
+
+static int stats_put_stats(struct sk_buff *skb,
+			   const struct stats_reply_data *data,
+			   u32 id, u32 ss_id,
+			   int (*cb)(struct sk_buff *skb,
+				     const struct stats_reply_data *data))
+{
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, ETHTOOL_A_STATS_GRP);
+	if (!nest)
+		return -EMSGSIZE;
+
+	if (nla_put_u32(skb, ETHTOOL_A_STATS_GRP_ID, id) ||
+	    nla_put_u32(skb, ETHTOOL_A_STATS_GRP_SS_ID, ss_id))
+		goto err_cancel;
+
+	if (cb(skb, data))
+		goto err_cancel;
+
+	nla_nest_end(skb, nest);
+	return 0;
+
+err_cancel:
+	nla_nest_cancel(skb, nest);
+	return -EMSGSIZE;
+}
+
+static int stats_fill_reply(struct sk_buff *skb,
+			    const struct ethnl_req_info *req_base,
+			    const struct ethnl_reply_data *reply_base)
+{
+	const struct stats_req_info *req_info = STATS_REQINFO(req_base);
+	const struct stats_reply_data *data = STATS_REPDATA(reply_base);
+	int ret = 0;
+
+	if (!ret && test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask))
+		ret = stats_put_stats(skb, data, ETHTOOL_STATS_ETH_PHY,
+				      ETH_SS_STATS_ETH_PHY,
+				      stats_put_phy_stats);
+
+	return ret;
+}
+
+const struct ethnl_request_ops ethnl_stats_request_ops = {
+	.request_cmd		= ETHTOOL_MSG_STATS_GET,
+	.reply_cmd		= ETHTOOL_MSG_STATS_GET_REPLY,
+	.hdr_attr		= ETHTOOL_A_STATS_HEADER,
+	.req_info_size		= sizeof(struct stats_req_info),
+	.reply_data_size	= sizeof(struct stats_reply_data),
+
+	.parse_request		= stats_parse_request,
+	.prepare_data		= stats_prepare_data,
+	.reply_size		= stats_reply_size,
+	.fill_reply		= stats_fill_reply,
+};
diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c
index c3a5489964cd..5f3c73587ff4 100644
--- a/net/ethtool/strset.c
+++ b/net/ethtool/strset.c
@@ -80,6 +80,16 @@ static const struct strset_info info_template[] = {
 		.count		= __ETHTOOL_UDP_TUNNEL_TYPE_CNT,
 		.strings	= udp_tunnel_type_names,
 	},
+	[ETH_SS_STATS_STD] = {
+		.per_dev	= false,
+		.count		= __ETHTOOL_STATS_CNT,
+		.strings	= stats_std_names,
+	},
+	[ETH_SS_STATS_ETH_PHY] = {
+		.per_dev	= false,
+		.count		= __ETHTOOL_A_STATS_ETH_PHY_CNT,
+		.strings	= stats_eth_phy_names,
+	},
 };
 
 struct strset_req_info {
-- 
cgit v1.2.3


From ca2244547ec7505d1cf61d43f5e76e3ffd99cf77 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 16 Apr 2021 12:27:40 -0700
Subject: ethtool: add interface to read standard MAC stats

Most of the MAC statistics are included in
struct rtnl_link_stats64, but some fields
are aggregated. Besides it's good to expose
these clearly hardware stats separately.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h              | 31 +++++++++++++
 include/uapi/linux/ethtool.h         |  2 +
 include/uapi/linux/ethtool_netlink.h | 53 +++++++++++++++++++++
 net/ethtool/netlink.h                |  1 +
 net/ethtool/stats.c                  | 90 ++++++++++++++++++++++++++++++++++++
 net/ethtool/strset.c                 |  5 ++
 6 files changed, 182 insertions(+)

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 2d5455eedbf4..3c689a13e679 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -250,6 +250,34 @@ static inline void ethtool_stats_init(u64 *stats, unsigned int n)
 		stats[n] = ETHTOOL_STAT_NOT_SET;
 }
 
+/* Basic IEEE 802.3 MAC statistics (30.3.1.1.*), not otherwise exposed
+ * via a more targeted API.
+ */
+struct ethtool_eth_mac_stats {
+	u64 FramesTransmittedOK;
+	u64 SingleCollisionFrames;
+	u64 MultipleCollisionFrames;
+	u64 FramesReceivedOK;
+	u64 FrameCheckSequenceErrors;
+	u64 AlignmentErrors;
+	u64 OctetsTransmittedOK;
+	u64 FramesWithDeferredXmissions;
+	u64 LateCollisions;
+	u64 FramesAbortedDueToXSColls;
+	u64 FramesLostDueToIntMACXmitError;
+	u64 CarrierSenseErrors;
+	u64 OctetsReceivedOK;
+	u64 FramesLostDueToIntMACRcvError;
+	u64 MulticastFramesXmittedOK;
+	u64 BroadcastFramesXmittedOK;
+	u64 FramesWithExcessiveDeferral;
+	u64 MulticastFramesReceivedOK;
+	u64 BroadcastFramesReceivedOK;
+	u64 InRangeLengthErrors;
+	u64 OutOfRangeLengthField;
+	u64 FrameTooLongErrors;
+};
+
 /* Basic IEEE 802.3 PHY statistics (30.3.2.1.*), not otherwise exposed
  * via a more targeted API.
  */
@@ -495,6 +523,7 @@ struct ethtool_module_eeprom {
  *	specified page. Returns a negative error code or the amount of bytes
  *	read.
  * @get_eth_phy_stats: Query some of the IEEE 802.3 PHY statistics.
+ * @get_eth_mac_stats: Query some of the IEEE 802.3 MAC statistics.
  *
  * All operations are optional (i.e. the function pointer may be set
  * to %NULL) and callers must take this into account.  Callers must
@@ -607,6 +636,8 @@ struct ethtool_ops {
 					     struct netlink_ext_ack *extack);
 	void	(*get_eth_phy_stats)(struct net_device *dev,
 				     struct ethtool_eth_phy_stats *phy_stats);
+	void	(*get_eth_mac_stats)(struct net_device *dev,
+				     struct ethtool_eth_mac_stats *mac_stats);
 };
 
 int ethtool_check_ops(const struct ethtool_ops *ops);
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 190ae6e03918..c227376d811a 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -671,6 +671,7 @@ enum ethtool_link_ext_substate_cable_issue {
  * @ETH_SS_UDP_TUNNEL_TYPES: UDP tunnel types
  * @ETH_SS_STATS_STD: standardized stats
  * @ETH_SS_STATS_ETH_PHY: names of IEEE 802.3 PHY statistics
+ * @ETH_SS_STATS_ETH_MAC: names of IEEE 802.3 MAC statistics
  *
  * @ETH_SS_COUNT: number of defined string sets
  */
@@ -693,6 +694,7 @@ enum ethtool_stringset {
 	ETH_SS_UDP_TUNNEL_TYPES,
 	ETH_SS_STATS_STD,
 	ETH_SS_STATS_ETH_PHY,
+	ETH_SS_STATS_ETH_MAC,
 
 	/* add new constants above here */
 	ETH_SS_COUNT
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index a54cfe625f34..f0fbe8f4eb1b 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -698,6 +698,7 @@ enum {
 
 enum {
 	ETHTOOL_STATS_ETH_PHY,
+	ETHTOOL_STATS_ETH_MAC,
 
 	/* add new constants above here */
 	__ETHTOOL_STATS_CNT
@@ -726,6 +727,58 @@ enum {
 	ETHTOOL_A_STATS_ETH_PHY_MAX = (__ETHTOOL_A_STATS_ETH_PHY_CNT - 1)
 };
 
+enum {
+	/* 30.3.1.1.2 aFramesTransmittedOK */
+	ETHTOOL_A_STATS_ETH_MAC_2_TX_PKT,
+	/* 30.3.1.1.3 aSingleCollisionFrames */
+	ETHTOOL_A_STATS_ETH_MAC_3_SINGLE_COL,
+	/* 30.3.1.1.4 aMultipleCollisionFrames */
+	ETHTOOL_A_STATS_ETH_MAC_4_MULTI_COL,
+	/* 30.3.1.1.5 aFramesReceivedOK */
+	ETHTOOL_A_STATS_ETH_MAC_5_RX_PKT,
+	/* 30.3.1.1.6 aFrameCheckSequenceErrors */
+	ETHTOOL_A_STATS_ETH_MAC_6_FCS_ERR,
+	/* 30.3.1.1.7 aAlignmentErrors */
+	ETHTOOL_A_STATS_ETH_MAC_7_ALIGN_ERR,
+	/* 30.3.1.1.8 aOctetsTransmittedOK */
+	ETHTOOL_A_STATS_ETH_MAC_8_TX_BYTES,
+	/* 30.3.1.1.9 aFramesWithDeferredXmissions */
+	ETHTOOL_A_STATS_ETH_MAC_9_TX_DEFER,
+	/* 30.3.1.1.10 aLateCollisions */
+	ETHTOOL_A_STATS_ETH_MAC_10_LATE_COL,
+	/* 30.3.1.1.11 aFramesAbortedDueToXSColls */
+	ETHTOOL_A_STATS_ETH_MAC_11_XS_COL,
+	/* 30.3.1.1.12 aFramesLostDueToIntMACXmitError */
+	ETHTOOL_A_STATS_ETH_MAC_12_TX_INT_ERR,
+	/* 30.3.1.1.13 aCarrierSenseErrors */
+	ETHTOOL_A_STATS_ETH_MAC_13_CS_ERR,
+	/* 30.3.1.1.14 aOctetsReceivedOK */
+	ETHTOOL_A_STATS_ETH_MAC_14_RX_BYTES,
+	/* 30.3.1.1.15 aFramesLostDueToIntMACRcvError */
+	ETHTOOL_A_STATS_ETH_MAC_15_RX_INT_ERR,
+
+	/* 30.3.1.1.18 aMulticastFramesXmittedOK */
+	ETHTOOL_A_STATS_ETH_MAC_18_TX_MCAST,
+	/* 30.3.1.1.19 aBroadcastFramesXmittedOK */
+	ETHTOOL_A_STATS_ETH_MAC_19_TX_BCAST,
+	/* 30.3.1.1.20 aFramesWithExcessiveDeferral */
+	ETHTOOL_A_STATS_ETH_MAC_20_XS_DEFER,
+	/* 30.3.1.1.21 aMulticastFramesReceivedOK */
+	ETHTOOL_A_STATS_ETH_MAC_21_RX_MCAST,
+	/* 30.3.1.1.22 aBroadcastFramesReceivedOK */
+	ETHTOOL_A_STATS_ETH_MAC_22_RX_BCAST,
+	/* 30.3.1.1.23 aInRangeLengthErrors */
+	ETHTOOL_A_STATS_ETH_MAC_23_IR_LEN_ERR,
+	/* 30.3.1.1.24 aOutOfRangeLengthField */
+	ETHTOOL_A_STATS_ETH_MAC_24_OOR_LEN,
+	/* 30.3.1.1.25 aFrameTooLongErrors */
+	ETHTOOL_A_STATS_ETH_MAC_25_TOO_LONG_ERR,
+
+	/* add new constants above here */
+	__ETHTOOL_A_STATS_ETH_MAC_CNT,
+	ETHTOOL_A_STATS_ETH_MAC_MAX = (__ETHTOOL_A_STATS_ETH_MAC_CNT - 1)
+};
+
 /* generic netlink info */
 #define ETHTOOL_GENL_NAME "ethtool"
 #define ETHTOOL_GENL_VERSION 1
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 9d88983b6597..c70bac5329af 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -403,5 +403,6 @@ int ethnl_set_fec(struct sk_buff *skb, struct genl_info *info);
 
 extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN];
 extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN];
+extern const char stats_eth_mac_names[__ETHTOOL_A_STATS_ETH_MAC_CNT][ETH_GSTRING_LEN];
 
 #endif /* _NET_ETHTOOL_NETLINK_H */
diff --git a/net/ethtool/stats.c b/net/ethtool/stats.c
index fd8f47178c06..e80175872226 100644
--- a/net/ethtool/stats.c
+++ b/net/ethtool/stats.c
@@ -15,6 +15,7 @@ struct stats_req_info {
 struct stats_reply_data {
 	struct ethnl_reply_data		base;
 	struct ethtool_eth_phy_stats	phy_stats;
+	struct ethtool_eth_mac_stats	mac_stats;
 };
 
 #define STATS_REPDATA(__reply_base) \
@@ -22,12 +23,38 @@ struct stats_reply_data {
 
 const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN] = {
 	[ETHTOOL_STATS_ETH_PHY]			= "eth-phy",
+	[ETHTOOL_STATS_ETH_MAC]			= "eth-mac",
 };
 
 const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN] = {
 	[ETHTOOL_A_STATS_ETH_PHY_5_SYM_ERR]	= "SymbolErrorDuringCarrier",
 };
 
+const char stats_eth_mac_names[__ETHTOOL_A_STATS_ETH_MAC_CNT][ETH_GSTRING_LEN] = {
+	[ETHTOOL_A_STATS_ETH_MAC_2_TX_PKT]	= "FramesTransmittedOK",
+	[ETHTOOL_A_STATS_ETH_MAC_3_SINGLE_COL]	= "SingleCollisionFrames",
+	[ETHTOOL_A_STATS_ETH_MAC_4_MULTI_COL]	= "MultipleCollisionFrames",
+	[ETHTOOL_A_STATS_ETH_MAC_5_RX_PKT]	= "FramesReceivedOK",
+	[ETHTOOL_A_STATS_ETH_MAC_6_FCS_ERR]	= "FrameCheckSequenceErrors",
+	[ETHTOOL_A_STATS_ETH_MAC_7_ALIGN_ERR]	= "AlignmentErrors",
+	[ETHTOOL_A_STATS_ETH_MAC_8_TX_BYTES]	= "OctetsTransmittedOK",
+	[ETHTOOL_A_STATS_ETH_MAC_9_TX_DEFER]	= "FramesWithDeferredXmissions",
+	[ETHTOOL_A_STATS_ETH_MAC_10_LATE_COL]	= "LateCollisions",
+	[ETHTOOL_A_STATS_ETH_MAC_11_XS_COL]	= "FramesAbortedDueToXSColls",
+	[ETHTOOL_A_STATS_ETH_MAC_12_TX_INT_ERR]	= "FramesLostDueToIntMACXmitError",
+	[ETHTOOL_A_STATS_ETH_MAC_13_CS_ERR]	= "CarrierSenseErrors",
+	[ETHTOOL_A_STATS_ETH_MAC_14_RX_BYTES]	= "OctetsReceivedOK",
+	[ETHTOOL_A_STATS_ETH_MAC_15_RX_INT_ERR]	= "FramesLostDueToIntMACRcvError",
+	[ETHTOOL_A_STATS_ETH_MAC_18_TX_MCAST]	= "MulticastFramesXmittedOK",
+	[ETHTOOL_A_STATS_ETH_MAC_19_TX_BCAST]	= "BroadcastFramesXmittedOK",
+	[ETHTOOL_A_STATS_ETH_MAC_20_XS_DEFER]	= "FramesWithExcessiveDeferral",
+	[ETHTOOL_A_STATS_ETH_MAC_21_RX_MCAST]	= "MulticastFramesReceivedOK",
+	[ETHTOOL_A_STATS_ETH_MAC_22_RX_BCAST]	= "BroadcastFramesReceivedOK",
+	[ETHTOOL_A_STATS_ETH_MAC_23_IR_LEN_ERR]	= "InRangeLengthErrors",
+	[ETHTOOL_A_STATS_ETH_MAC_24_OOR_LEN]	= "OutOfRangeLengthField",
+	[ETHTOOL_A_STATS_ETH_MAC_25_TOO_LONG_ERR]	= "FrameTooLongErrors",
+};
+
 const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_GROUPS + 1] = {
 	[ETHTOOL_A_STATS_HEADER]	=
 		NLA_POLICY_NESTED(ethnl_header_policy),
@@ -70,10 +97,14 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base,
 		return ret;
 
 	memset(&data->phy_stats, 0xff, sizeof(data->phy_stats));
+	memset(&data->mac_stats, 0xff, sizeof(data->mac_stats));
 
 	if (test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask) &&
 	    dev->ethtool_ops->get_eth_phy_stats)
 		dev->ethtool_ops->get_eth_phy_stats(dev, &data->phy_stats);
+	if (test_bit(ETHTOOL_STATS_ETH_MAC, req_info->stat_mask) &&
+	    dev->ethtool_ops->get_eth_mac_stats)
+		dev->ethtool_ops->get_eth_mac_stats(dev, &data->mac_stats);
 
 	ethnl_ops_complete(dev);
 	return 0;
@@ -90,6 +121,10 @@ static int stats_reply_size(const struct ethnl_req_info *req_base,
 		n_stats += sizeof(struct ethtool_eth_phy_stats) / sizeof(u64);
 		n_grps++;
 	}
+	if (test_bit(ETHTOOL_STATS_ETH_MAC, req_info->stat_mask)) {
+		n_stats += sizeof(struct ethtool_eth_mac_stats) / sizeof(u64);
+		n_grps++;
+	}
 
 	len += n_grps * (nla_total_size(0) + /* _A_STATS_GRP */
 			 nla_total_size(4) + /* _A_STATS_GRP_ID */
@@ -143,6 +178,57 @@ static int stats_put_phy_stats(struct sk_buff *skb,
 	return 0;
 }
 
+static int stats_put_mac_stats(struct sk_buff *skb,
+			       const struct stats_reply_data *data)
+{
+	if (stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_2_TX_PKT,
+		     data->mac_stats.FramesTransmittedOK) ||
+	    stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_3_SINGLE_COL,
+		     data->mac_stats.SingleCollisionFrames) ||
+	    stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_4_MULTI_COL,
+		     data->mac_stats.MultipleCollisionFrames) ||
+	    stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_5_RX_PKT,
+		     data->mac_stats.FramesReceivedOK) ||
+	    stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_6_FCS_ERR,
+		     data->mac_stats.FrameCheckSequenceErrors) ||
+	    stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_7_ALIGN_ERR,
+		     data->mac_stats.AlignmentErrors) ||
+	    stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_8_TX_BYTES,
+		     data->mac_stats.OctetsTransmittedOK) ||
+	    stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_9_TX_DEFER,
+		     data->mac_stats.FramesWithDeferredXmissions) ||
+	    stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_10_LATE_COL,
+		     data->mac_stats.LateCollisions) ||
+	    stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_11_XS_COL,
+		     data->mac_stats.FramesAbortedDueToXSColls) ||
+	    stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_12_TX_INT_ERR,
+		     data->mac_stats.FramesLostDueToIntMACXmitError) ||
+	    stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_13_CS_ERR,
+		     data->mac_stats.CarrierSenseErrors) ||
+	    stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_14_RX_BYTES,
+		     data->mac_stats.OctetsReceivedOK) ||
+	    stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_15_RX_INT_ERR,
+		     data->mac_stats.FramesLostDueToIntMACRcvError) ||
+	    stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_18_TX_MCAST,
+		     data->mac_stats.MulticastFramesXmittedOK) ||
+	    stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_19_TX_BCAST,
+		     data->mac_stats.BroadcastFramesXmittedOK) ||
+	    stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_20_XS_DEFER,
+		     data->mac_stats.FramesWithExcessiveDeferral) ||
+	    stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_21_RX_MCAST,
+		     data->mac_stats.MulticastFramesReceivedOK) ||
+	    stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_22_RX_BCAST,
+		     data->mac_stats.BroadcastFramesReceivedOK) ||
+	    stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_23_IR_LEN_ERR,
+		     data->mac_stats.InRangeLengthErrors) ||
+	    stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_24_OOR_LEN,
+		     data->mac_stats.OutOfRangeLengthField) ||
+	    stat_put(skb, ETHTOOL_A_STATS_ETH_MAC_25_TOO_LONG_ERR,
+		     data->mac_stats.FrameTooLongErrors))
+		return -EMSGSIZE;
+	return 0;
+}
+
 static int stats_put_stats(struct sk_buff *skb,
 			   const struct stats_reply_data *data,
 			   u32 id, u32 ss_id,
@@ -182,6 +268,10 @@ static int stats_fill_reply(struct sk_buff *skb,
 		ret = stats_put_stats(skb, data, ETHTOOL_STATS_ETH_PHY,
 				      ETH_SS_STATS_ETH_PHY,
 				      stats_put_phy_stats);
+	if (!ret && test_bit(ETHTOOL_STATS_ETH_MAC, req_info->stat_mask))
+		ret = stats_put_stats(skb, data, ETHTOOL_STATS_ETH_MAC,
+				      ETH_SS_STATS_ETH_MAC,
+				      stats_put_mac_stats);
 
 	return ret;
 }
diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c
index 5f3c73587ff4..a8aac7bcfcc9 100644
--- a/net/ethtool/strset.c
+++ b/net/ethtool/strset.c
@@ -90,6 +90,11 @@ static const struct strset_info info_template[] = {
 		.count		= __ETHTOOL_A_STATS_ETH_PHY_CNT,
 		.strings	= stats_eth_phy_names,
 	},
+	[ETH_SS_STATS_ETH_MAC] = {
+		.per_dev	= false,
+		.count		= __ETHTOOL_A_STATS_ETH_MAC_CNT,
+		.strings	= stats_eth_mac_names,
+	},
 };
 
 struct strset_req_info {
-- 
cgit v1.2.3


From bfad2b979ddcc330c08bb071eb3c3f7b3411a681 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 16 Apr 2021 12:27:41 -0700
Subject: ethtool: add interface to read standard MAC Ctrl stats

Number of devices maintains the standard-based MAC control
counters for control frames. Add a API for those.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h              | 12 ++++++++++++
 include/uapi/linux/ethtool.h         |  2 ++
 include/uapi/linux/ethtool_netlink.h | 14 ++++++++++++++
 net/ethtool/netlink.h                |  1 +
 net/ethtool/stats.c                  | 33 +++++++++++++++++++++++++++++++++
 net/ethtool/strset.c                 |  5 +++++
 6 files changed, 67 insertions(+)

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 3c689a13e679..1ca6b836f9fe 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -285,6 +285,15 @@ struct ethtool_eth_phy_stats {
 	u64 SymbolErrorDuringCarrier;
 };
 
+/* Basic IEEE 802.3 MAC Ctrl statistics (30.3.3.*), not otherwise exposed
+ * via a more targeted API.
+ */
+struct ethtool_eth_ctrl_stats {
+	u64 MACControlFramesTransmitted;
+	u64 MACControlFramesReceived;
+	u64 UnsupportedOpcodesReceived;
+};
+
 /**
  * struct ethtool_pause_stats - statistics for IEEE 802.3x pause frames
  * @tx_pause_frames: transmitted pause frame count. Reported to user space
@@ -524,6 +533,7 @@ struct ethtool_module_eeprom {
  *	read.
  * @get_eth_phy_stats: Query some of the IEEE 802.3 PHY statistics.
  * @get_eth_mac_stats: Query some of the IEEE 802.3 MAC statistics.
+ * @get_eth_ctrl_stats: Query some of the IEEE 802.3 MAC Ctrl statistics.
  *
  * All operations are optional (i.e. the function pointer may be set
  * to %NULL) and callers must take this into account.  Callers must
@@ -638,6 +648,8 @@ struct ethtool_ops {
 				     struct ethtool_eth_phy_stats *phy_stats);
 	void	(*get_eth_mac_stats)(struct net_device *dev,
 				     struct ethtool_eth_mac_stats *mac_stats);
+	void	(*get_eth_ctrl_stats)(struct net_device *dev,
+				      struct ethtool_eth_ctrl_stats *ctrl_stats);
 };
 
 int ethtool_check_ops(const struct ethtool_ops *ops);
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index c227376d811a..9cb8df89d4f2 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -672,6 +672,7 @@ enum ethtool_link_ext_substate_cable_issue {
  * @ETH_SS_STATS_STD: standardized stats
  * @ETH_SS_STATS_ETH_PHY: names of IEEE 802.3 PHY statistics
  * @ETH_SS_STATS_ETH_MAC: names of IEEE 802.3 MAC statistics
+ * @ETH_SS_STATS_ETH_CTRL: names of IEEE 802.3 MAC Control statistics
  *
  * @ETH_SS_COUNT: number of defined string sets
  */
@@ -695,6 +696,7 @@ enum ethtool_stringset {
 	ETH_SS_STATS_STD,
 	ETH_SS_STATS_ETH_PHY,
 	ETH_SS_STATS_ETH_MAC,
+	ETH_SS_STATS_ETH_CTRL,
 
 	/* add new constants above here */
 	ETH_SS_COUNT
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index f0fbe8f4eb1b..2ea5f049df6a 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -699,6 +699,7 @@ enum {
 enum {
 	ETHTOOL_STATS_ETH_PHY,
 	ETHTOOL_STATS_ETH_MAC,
+	ETHTOOL_STATS_ETH_CTRL,
 
 	/* add new constants above here */
 	__ETHTOOL_STATS_CNT
@@ -779,6 +780,19 @@ enum {
 	ETHTOOL_A_STATS_ETH_MAC_MAX = (__ETHTOOL_A_STATS_ETH_MAC_CNT - 1)
 };
 
+enum {
+	/* 30.3.3.3 aMACControlFramesTransmitted */
+	ETHTOOL_A_STATS_ETH_CTRL_3_TX,
+	/* 30.3.3.4 aMACControlFramesReceived */
+	ETHTOOL_A_STATS_ETH_CTRL_4_RX,
+	/* 30.3.3.5 aUnsupportedOpcodesReceived */
+	ETHTOOL_A_STATS_ETH_CTRL_5_RX_UNSUP,
+
+	/* add new constants above here */
+	__ETHTOOL_A_STATS_ETH_CTRL_CNT,
+	ETHTOOL_A_STATS_ETH_CTRL_MAX = (__ETHTOOL_A_STATS_ETH_CTRL_CNT - 1)
+};
+
 /* generic netlink info */
 #define ETHTOOL_GENL_NAME "ethtool"
 #define ETHTOOL_GENL_VERSION 1
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index c70bac5329af..febfa61e52e2 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -404,5 +404,6 @@ int ethnl_set_fec(struct sk_buff *skb, struct genl_info *info);
 extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN];
 extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN];
 extern const char stats_eth_mac_names[__ETHTOOL_A_STATS_ETH_MAC_CNT][ETH_GSTRING_LEN];
+extern const char stats_eth_ctrl_names[__ETHTOOL_A_STATS_ETH_CTRL_CNT][ETH_GSTRING_LEN];
 
 #endif /* _NET_ETHTOOL_NETLINK_H */
diff --git a/net/ethtool/stats.c b/net/ethtool/stats.c
index e80175872226..f4fded66731c 100644
--- a/net/ethtool/stats.c
+++ b/net/ethtool/stats.c
@@ -16,6 +16,7 @@ struct stats_reply_data {
 	struct ethnl_reply_data		base;
 	struct ethtool_eth_phy_stats	phy_stats;
 	struct ethtool_eth_mac_stats	mac_stats;
+	struct ethtool_eth_ctrl_stats	ctrl_stats;
 };
 
 #define STATS_REPDATA(__reply_base) \
@@ -24,6 +25,7 @@ struct stats_reply_data {
 const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN] = {
 	[ETHTOOL_STATS_ETH_PHY]			= "eth-phy",
 	[ETHTOOL_STATS_ETH_MAC]			= "eth-mac",
+	[ETHTOOL_STATS_ETH_CTRL]		= "eth-ctrl",
 };
 
 const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN] = {
@@ -55,6 +57,12 @@ const char stats_eth_mac_names[__ETHTOOL_A_STATS_ETH_MAC_CNT][ETH_GSTRING_LEN] =
 	[ETHTOOL_A_STATS_ETH_MAC_25_TOO_LONG_ERR]	= "FrameTooLongErrors",
 };
 
+const char stats_eth_ctrl_names[__ETHTOOL_A_STATS_ETH_CTRL_CNT][ETH_GSTRING_LEN] = {
+	[ETHTOOL_A_STATS_ETH_CTRL_3_TX]		= "MACControlFramesTransmitted",
+	[ETHTOOL_A_STATS_ETH_CTRL_4_RX]		= "MACControlFramesReceived",
+	[ETHTOOL_A_STATS_ETH_CTRL_5_RX_UNSUP]	= "UnsupportedOpcodesReceived",
+};
+
 const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_GROUPS + 1] = {
 	[ETHTOOL_A_STATS_HEADER]	=
 		NLA_POLICY_NESTED(ethnl_header_policy),
@@ -98,6 +106,7 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base,
 
 	memset(&data->phy_stats, 0xff, sizeof(data->phy_stats));
 	memset(&data->mac_stats, 0xff, sizeof(data->mac_stats));
+	memset(&data->ctrl_stats, 0xff, sizeof(data->mac_stats));
 
 	if (test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask) &&
 	    dev->ethtool_ops->get_eth_phy_stats)
@@ -105,6 +114,9 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base,
 	if (test_bit(ETHTOOL_STATS_ETH_MAC, req_info->stat_mask) &&
 	    dev->ethtool_ops->get_eth_mac_stats)
 		dev->ethtool_ops->get_eth_mac_stats(dev, &data->mac_stats);
+	if (test_bit(ETHTOOL_STATS_ETH_CTRL, req_info->stat_mask) &&
+	    dev->ethtool_ops->get_eth_ctrl_stats)
+		dev->ethtool_ops->get_eth_ctrl_stats(dev, &data->ctrl_stats);
 
 	ethnl_ops_complete(dev);
 	return 0;
@@ -125,6 +137,10 @@ static int stats_reply_size(const struct ethnl_req_info *req_base,
 		n_stats += sizeof(struct ethtool_eth_mac_stats) / sizeof(u64);
 		n_grps++;
 	}
+	if (test_bit(ETHTOOL_STATS_ETH_CTRL, req_info->stat_mask)) {
+		n_stats += sizeof(struct ethtool_eth_ctrl_stats) / sizeof(u64);
+		n_grps++;
+	}
 
 	len += n_grps * (nla_total_size(0) + /* _A_STATS_GRP */
 			 nla_total_size(4) + /* _A_STATS_GRP_ID */
@@ -229,6 +245,19 @@ static int stats_put_mac_stats(struct sk_buff *skb,
 	return 0;
 }
 
+static int stats_put_ctrl_stats(struct sk_buff *skb,
+				const struct stats_reply_data *data)
+{
+	if (stat_put(skb, ETHTOOL_A_STATS_ETH_CTRL_3_TX,
+		     data->ctrl_stats.MACControlFramesTransmitted) ||
+	    stat_put(skb, ETHTOOL_A_STATS_ETH_CTRL_4_RX,
+		     data->ctrl_stats.MACControlFramesReceived) ||
+	    stat_put(skb, ETHTOOL_A_STATS_ETH_CTRL_5_RX_UNSUP,
+		     data->ctrl_stats.UnsupportedOpcodesReceived))
+		return -EMSGSIZE;
+	return 0;
+}
+
 static int stats_put_stats(struct sk_buff *skb,
 			   const struct stats_reply_data *data,
 			   u32 id, u32 ss_id,
@@ -272,6 +301,10 @@ static int stats_fill_reply(struct sk_buff *skb,
 		ret = stats_put_stats(skb, data, ETHTOOL_STATS_ETH_MAC,
 				      ETH_SS_STATS_ETH_MAC,
 				      stats_put_mac_stats);
+	if (!ret && test_bit(ETHTOOL_STATS_ETH_CTRL, req_info->stat_mask))
+		ret = stats_put_stats(skb, data, ETHTOOL_STATS_ETH_CTRL,
+				      ETH_SS_STATS_ETH_CTRL,
+				      stats_put_ctrl_stats);
 
 	return ret;
 }
diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c
index a8aac7bcfcc9..a33c603a7a02 100644
--- a/net/ethtool/strset.c
+++ b/net/ethtool/strset.c
@@ -95,6 +95,11 @@ static const struct strset_info info_template[] = {
 		.count		= __ETHTOOL_A_STATS_ETH_MAC_CNT,
 		.strings	= stats_eth_mac_names,
 	},
+	[ETH_SS_STATS_ETH_CTRL] = {
+		.per_dev	= false,
+		.count		= __ETHTOOL_A_STATS_ETH_CTRL_CNT,
+		.strings	= stats_eth_ctrl_names,
+	},
 };
 
 struct strset_req_info {
-- 
cgit v1.2.3


From a8b06e9d40d8b18c41c8ce060e8dc004fa59e708 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 16 Apr 2021 12:27:42 -0700
Subject: ethtool: add interface to read RMON stats

Most devices maintain RMON (RFC 2819) stats - particularly
the "histogram" of packets received by size. Unlike other
RFCs which duplicate IEEE stats, the short/oversized frame
counters in RMON don't seem to match IEEE stats 1-to-1 either,
so expose those, too. Do not expose basic packet, CRC errors
etc - those are already otherwise covered.

Because standard defines packet ranges only up to 1518, and
everything above that should theoretically be "oversized"
- devices often create their own ranges.

Going beyond what the RFC defines - expose the "histogram"
in the Tx direction (assume for now that the ranges will
be the same).

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h              | 43 ++++++++++++++++++
 include/uapi/linux/ethtool.h         |  2 +
 include/uapi/linux/ethtool_netlink.h | 23 ++++++++++
 net/ethtool/netlink.h                |  1 +
 net/ethtool/stats.c                  | 87 ++++++++++++++++++++++++++++++++++++
 net/ethtool/strset.c                 |  5 +++
 6 files changed, 161 insertions(+)

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 1ca6b836f9fe..e030f7510cd3 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -346,6 +346,44 @@ struct ethtool_fec_stats {
 	} corrected_blocks, uncorrectable_blocks, corrected_bits;
 };
 
+/**
+ * struct ethtool_rmon_hist_range - byte range for histogram statistics
+ * @low: low bound of the bucket (inclusive)
+ * @high: high bound of the bucket (inclusive)
+ */
+struct ethtool_rmon_hist_range {
+	u16 low;
+	u16 high;
+};
+
+#define ETHTOOL_RMON_HIST_MAX	10
+
+/**
+ * struct ethtool_rmon_stats - selected RMON (RFC 2819) statistics
+ * @undersize_pkts: Equivalent to `etherStatsUndersizePkts` from the RFC.
+ * @oversize_pkts: Equivalent to `etherStatsOversizePkts` from the RFC.
+ * @fragments: Equivalent to `etherStatsFragments` from the RFC.
+ * @jabbers: Equivalent to `etherStatsJabbers` from the RFC.
+ * @hist: Packet counter for packet length buckets (e.g.
+ *	`etherStatsPkts128to255Octets` from the RFC).
+ * @hist_tx: Tx counters in similar form to @hist, not defined in the RFC.
+ *
+ * Selection of RMON (RFC 2819) statistics which are not exposed via different
+ * APIs, primarily the packet-length-based counters.
+ * Unfortunately different designs choose different buckets beyond
+ * the 1024B mark (jumbo frame teritory), so the definition of the bucket
+ * ranges is left to the driver.
+ */
+struct ethtool_rmon_stats {
+	u64 undersize_pkts;
+	u64 oversize_pkts;
+	u64 fragments;
+	u64 jabbers;
+
+	u64 hist[ETHTOOL_RMON_HIST_MAX];
+	u64 hist_tx[ETHTOOL_RMON_HIST_MAX];
+};
+
 #define ETH_MODULE_EEPROM_PAGE_LEN	128
 #define ETH_MODULE_MAX_I2C_ADDRESS	0x7f
 
@@ -534,6 +572,8 @@ struct ethtool_module_eeprom {
  * @get_eth_phy_stats: Query some of the IEEE 802.3 PHY statistics.
  * @get_eth_mac_stats: Query some of the IEEE 802.3 MAC statistics.
  * @get_eth_ctrl_stats: Query some of the IEEE 802.3 MAC Ctrl statistics.
+ * @get_rmon_stats: Query some of the RMON (RFC 2819) statistics.
+ *	Set %ranges to a pointer to zero-terminated array of byte ranges.
  *
  * All operations are optional (i.e. the function pointer may be set
  * to %NULL) and callers must take this into account.  Callers must
@@ -650,6 +690,9 @@ struct ethtool_ops {
 				     struct ethtool_eth_mac_stats *mac_stats);
 	void	(*get_eth_ctrl_stats)(struct net_device *dev,
 				      struct ethtool_eth_ctrl_stats *ctrl_stats);
+	void	(*get_rmon_stats)(struct net_device *dev,
+				  struct ethtool_rmon_stats *rmon_stats,
+				  const struct ethtool_rmon_hist_range **ranges);
 };
 
 int ethtool_check_ops(const struct ethtool_ops *ops);
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 9cb8df89d4f2..cfef6b08169a 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -673,6 +673,7 @@ enum ethtool_link_ext_substate_cable_issue {
  * @ETH_SS_STATS_ETH_PHY: names of IEEE 802.3 PHY statistics
  * @ETH_SS_STATS_ETH_MAC: names of IEEE 802.3 MAC statistics
  * @ETH_SS_STATS_ETH_CTRL: names of IEEE 802.3 MAC Control statistics
+ * @ETH_SS_STATS_RMON: names of RMON statistics
  *
  * @ETH_SS_COUNT: number of defined string sets
  */
@@ -697,6 +698,7 @@ enum ethtool_stringset {
 	ETH_SS_STATS_ETH_PHY,
 	ETH_SS_STATS_ETH_MAC,
 	ETH_SS_STATS_ETH_CTRL,
+	ETH_SS_STATS_RMON,
 
 	/* add new constants above here */
 	ETH_SS_COUNT
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 2ea5f049df6a..825cfda1c5d5 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -700,6 +700,7 @@ enum {
 	ETHTOOL_STATS_ETH_PHY,
 	ETHTOOL_STATS_ETH_MAC,
 	ETHTOOL_STATS_ETH_CTRL,
+	ETHTOOL_STATS_RMON,
 
 	/* add new constants above here */
 	__ETHTOOL_STATS_CNT
@@ -714,6 +715,13 @@ enum {
 
 	ETHTOOL_A_STATS_GRP_STAT,		/* nest */
 
+	ETHTOOL_A_STATS_GRP_HIST_RX,		/* nest */
+	ETHTOOL_A_STATS_GRP_HIST_TX,		/* nest */
+
+	ETHTOOL_A_STATS_GRP_HIST_BKT_LOW,	/* u32 */
+	ETHTOOL_A_STATS_GRP_HIST_BKT_HI,	/* u32 */
+	ETHTOOL_A_STATS_GRP_HIST_VAL,		/* u64 */
+
 	/* add new constants above here */
 	__ETHTOOL_A_STATS_GRP_CNT,
 	ETHTOOL_A_STATS_GRP_MAX = (__ETHTOOL_A_STATS_CNT - 1)
@@ -793,6 +801,21 @@ enum {
 	ETHTOOL_A_STATS_ETH_CTRL_MAX = (__ETHTOOL_A_STATS_ETH_CTRL_CNT - 1)
 };
 
+enum {
+	/* etherStatsUndersizePkts */
+	ETHTOOL_A_STATS_RMON_UNDERSIZE,
+	/* etherStatsOversizePkts */
+	ETHTOOL_A_STATS_RMON_OVERSIZE,
+	/* etherStatsFragments */
+	ETHTOOL_A_STATS_RMON_FRAG,
+	/* etherStatsJabbers */
+	ETHTOOL_A_STATS_RMON_JABBER,
+
+	/* add new constants above here */
+	__ETHTOOL_A_STATS_RMON_CNT,
+	ETHTOOL_A_STATS_RMON_MAX = (__ETHTOOL_A_STATS_RMON_CNT - 1)
+};
+
 /* generic netlink info */
 #define ETHTOOL_GENL_NAME "ethtool"
 #define ETHTOOL_GENL_VERSION 1
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index febfa61e52e2..bed3afdf3656 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -405,5 +405,6 @@ extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN];
 extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN];
 extern const char stats_eth_mac_names[__ETHTOOL_A_STATS_ETH_MAC_CNT][ETH_GSTRING_LEN];
 extern const char stats_eth_ctrl_names[__ETHTOOL_A_STATS_ETH_CTRL_CNT][ETH_GSTRING_LEN];
+extern const char stats_rmon_names[__ETHTOOL_A_STATS_RMON_CNT][ETH_GSTRING_LEN];
 
 #endif /* _NET_ETHTOOL_NETLINK_H */
diff --git a/net/ethtool/stats.c b/net/ethtool/stats.c
index f4fded66731c..acb2b080c358 100644
--- a/net/ethtool/stats.c
+++ b/net/ethtool/stats.c
@@ -17,6 +17,8 @@ struct stats_reply_data {
 	struct ethtool_eth_phy_stats	phy_stats;
 	struct ethtool_eth_mac_stats	mac_stats;
 	struct ethtool_eth_ctrl_stats	ctrl_stats;
+	struct ethtool_rmon_stats	rmon_stats;
+	const struct ethtool_rmon_hist_range	*rmon_ranges;
 };
 
 #define STATS_REPDATA(__reply_base) \
@@ -26,6 +28,7 @@ const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN] = {
 	[ETHTOOL_STATS_ETH_PHY]			= "eth-phy",
 	[ETHTOOL_STATS_ETH_MAC]			= "eth-mac",
 	[ETHTOOL_STATS_ETH_CTRL]		= "eth-ctrl",
+	[ETHTOOL_STATS_RMON]			= "rmon",
 };
 
 const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN] = {
@@ -63,6 +66,13 @@ const char stats_eth_ctrl_names[__ETHTOOL_A_STATS_ETH_CTRL_CNT][ETH_GSTRING_LEN]
 	[ETHTOOL_A_STATS_ETH_CTRL_5_RX_UNSUP]	= "UnsupportedOpcodesReceived",
 };
 
+const char stats_rmon_names[__ETHTOOL_A_STATS_RMON_CNT][ETH_GSTRING_LEN] = {
+	[ETHTOOL_A_STATS_RMON_UNDERSIZE]	= "etherStatsUndersizePkts",
+	[ETHTOOL_A_STATS_RMON_OVERSIZE]		= "etherStatsOversizePkts",
+	[ETHTOOL_A_STATS_RMON_FRAG]		= "etherStatsFragments",
+	[ETHTOOL_A_STATS_RMON_JABBER]		= "etherStatsJabbers",
+};
+
 const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_GROUPS + 1] = {
 	[ETHTOOL_A_STATS_HEADER]	=
 		NLA_POLICY_NESTED(ethnl_header_policy),
@@ -107,6 +117,7 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base,
 	memset(&data->phy_stats, 0xff, sizeof(data->phy_stats));
 	memset(&data->mac_stats, 0xff, sizeof(data->mac_stats));
 	memset(&data->ctrl_stats, 0xff, sizeof(data->mac_stats));
+	memset(&data->rmon_stats, 0xff, sizeof(data->rmon_stats));
 
 	if (test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask) &&
 	    dev->ethtool_ops->get_eth_phy_stats)
@@ -117,6 +128,10 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base,
 	if (test_bit(ETHTOOL_STATS_ETH_CTRL, req_info->stat_mask) &&
 	    dev->ethtool_ops->get_eth_ctrl_stats)
 		dev->ethtool_ops->get_eth_ctrl_stats(dev, &data->ctrl_stats);
+	if (test_bit(ETHTOOL_STATS_RMON, req_info->stat_mask) &&
+	    dev->ethtool_ops->get_rmon_stats)
+		dev->ethtool_ops->get_rmon_stats(dev, &data->rmon_stats,
+						 &data->rmon_ranges);
 
 	ethnl_ops_complete(dev);
 	return 0;
@@ -141,6 +156,16 @@ static int stats_reply_size(const struct ethnl_req_info *req_base,
 		n_stats += sizeof(struct ethtool_eth_ctrl_stats) / sizeof(u64);
 		n_grps++;
 	}
+	if (test_bit(ETHTOOL_STATS_RMON, req_info->stat_mask)) {
+		n_stats += sizeof(struct ethtool_rmon_stats) / sizeof(u64);
+		n_grps++;
+		/* Above includes the space for _A_STATS_GRP_HIST_VALs */
+
+		len += (nla_total_size(0) +	/* _A_STATS_GRP_HIST */
+			nla_total_size(4) +	/* _A_STATS_GRP_HIST_BKT_LOW */
+			nla_total_size(4)) *	/* _A_STATS_GRP_HIST_BKT_HI */
+			ETHTOOL_RMON_HIST_MAX * 2;
+	}
 
 	len += n_grps * (nla_total_size(0) + /* _A_STATS_GRP */
 			 nla_total_size(4) + /* _A_STATS_GRP_ID */
@@ -258,6 +283,65 @@ static int stats_put_ctrl_stats(struct sk_buff *skb,
 	return 0;
 }
 
+static int stats_put_rmon_hist(struct sk_buff *skb, u32 attr, const u64 *hist,
+			       const struct ethtool_rmon_hist_range *ranges)
+{
+	struct nlattr *nest;
+	int i;
+
+	if (!ranges)
+		return 0;
+
+	for (i = 0; i <	ETHTOOL_RMON_HIST_MAX; i++) {
+		if (!ranges[i].low && !ranges[i].high)
+			break;
+		if (hist[i] == ETHTOOL_STAT_NOT_SET)
+			continue;
+
+		nest = nla_nest_start(skb, attr);
+		if (!nest)
+			return -EMSGSIZE;
+
+		if (nla_put_u32(skb, ETHTOOL_A_STATS_GRP_HIST_BKT_LOW,
+				ranges[i].low) ||
+		    nla_put_u32(skb, ETHTOOL_A_STATS_GRP_HIST_BKT_HI,
+				ranges[i].high) ||
+		    nla_put_u64_64bit(skb, ETHTOOL_A_STATS_GRP_HIST_VAL,
+				      hist[i], ETHTOOL_A_STATS_GRP_PAD))
+			goto err_cancel_hist;
+
+		nla_nest_end(skb, nest);
+	}
+
+	return 0;
+
+err_cancel_hist:
+	nla_nest_cancel(skb, nest);
+	return -EMSGSIZE;
+}
+
+static int stats_put_rmon_stats(struct sk_buff *skb,
+				const struct stats_reply_data *data)
+{
+	if (stats_put_rmon_hist(skb, ETHTOOL_A_STATS_GRP_HIST_RX,
+				data->rmon_stats.hist, data->rmon_ranges) ||
+	    stats_put_rmon_hist(skb, ETHTOOL_A_STATS_GRP_HIST_TX,
+				data->rmon_stats.hist_tx, data->rmon_ranges))
+		return -EMSGSIZE;
+
+	if (stat_put(skb, ETHTOOL_A_STATS_RMON_UNDERSIZE,
+		     data->rmon_stats.undersize_pkts) ||
+	    stat_put(skb, ETHTOOL_A_STATS_RMON_OVERSIZE,
+		     data->rmon_stats.oversize_pkts) ||
+	    stat_put(skb, ETHTOOL_A_STATS_RMON_FRAG,
+		     data->rmon_stats.fragments) ||
+	    stat_put(skb, ETHTOOL_A_STATS_RMON_JABBER,
+		     data->rmon_stats.jabbers))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
 static int stats_put_stats(struct sk_buff *skb,
 			   const struct stats_reply_data *data,
 			   u32 id, u32 ss_id,
@@ -305,6 +389,9 @@ static int stats_fill_reply(struct sk_buff *skb,
 		ret = stats_put_stats(skb, data, ETHTOOL_STATS_ETH_CTRL,
 				      ETH_SS_STATS_ETH_CTRL,
 				      stats_put_ctrl_stats);
+	if (!ret && test_bit(ETHTOOL_STATS_RMON, req_info->stat_mask))
+		ret = stats_put_stats(skb, data, ETHTOOL_STATS_RMON,
+				      ETH_SS_STATS_RMON, stats_put_rmon_stats);
 
 	return ret;
 }
diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c
index a33c603a7a02..b3029fff715d 100644
--- a/net/ethtool/strset.c
+++ b/net/ethtool/strset.c
@@ -100,6 +100,11 @@ static const struct strset_info info_template[] = {
 		.count		= __ETHTOOL_A_STATS_ETH_CTRL_CNT,
 		.strings	= stats_eth_ctrl_names,
 	},
+	[ETH_SS_STATS_RMON] = {
+		.per_dev	= false,
+		.count		= __ETHTOOL_A_STATS_RMON_CNT,
+		.strings	= stats_rmon_names,
+	},
 };
 
 struct strset_req_info {
-- 
cgit v1.2.3


From c1912ab0eeba6ba7fbf601bad9c2992d5f528672 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 16 Apr 2021 12:27:43 -0700
Subject: mlxsw: implement ethtool standard stats

mlxsw has nicely grouped stats, add support for standard uAPI.
I'm guessing the register access part. Compile tested only.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_ethtool.c | 129 +++++++++++++++++++++
 1 file changed, 129 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
index 078601d31cde..c8061beed6db 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
@@ -1059,6 +1059,131 @@ mlxsw_sp_get_ts_info(struct net_device *netdev, struct ethtool_ts_info *info)
 	return mlxsw_sp->ptp_ops->get_ts_info(mlxsw_sp, info);
 }
 
+static void
+mlxsw_sp_get_eth_phy_stats(struct net_device *dev,
+			   struct ethtool_eth_phy_stats *phy_stats)
+{
+	char ppcnt_pl[MLXSW_REG_PPCNT_LEN];
+
+	if (mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_IEEE_8023_CNT,
+					0, ppcnt_pl))
+		return;
+
+	phy_stats->SymbolErrorDuringCarrier =
+		mlxsw_reg_ppcnt_a_symbol_error_during_carrier_get(ppcnt_pl);
+}
+
+static void
+mlxsw_sp_get_eth_mac_stats(struct net_device *dev,
+			   struct ethtool_eth_mac_stats *mac_stats)
+{
+	char ppcnt_pl[MLXSW_REG_PPCNT_LEN];
+
+	if (mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_IEEE_8023_CNT,
+					0, ppcnt_pl))
+		return;
+
+	mac_stats->FramesTransmittedOK =
+		mlxsw_reg_ppcnt_a_frames_transmitted_ok_get(ppcnt_pl);
+	mac_stats->FramesReceivedOK =
+		mlxsw_reg_ppcnt_a_frames_received_ok_get(ppcnt_pl);
+	mac_stats->FrameCheckSequenceErrors =
+		mlxsw_reg_ppcnt_a_frame_check_sequence_errors_get(ppcnt_pl);
+	mac_stats->AlignmentErrors =
+		mlxsw_reg_ppcnt_a_alignment_errors_get(ppcnt_pl);
+	mac_stats->OctetsTransmittedOK =
+		mlxsw_reg_ppcnt_a_octets_transmitted_ok_get(ppcnt_pl);
+	mac_stats->OctetsReceivedOK =
+		mlxsw_reg_ppcnt_a_octets_received_ok_get(ppcnt_pl);
+	mac_stats->MulticastFramesXmittedOK =
+		mlxsw_reg_ppcnt_a_multicast_frames_xmitted_ok_get(ppcnt_pl);
+	mac_stats->BroadcastFramesXmittedOK =
+		mlxsw_reg_ppcnt_a_broadcast_frames_xmitted_ok_get(ppcnt_pl);
+	mac_stats->MulticastFramesReceivedOK =
+		mlxsw_reg_ppcnt_a_multicast_frames_received_ok_get(ppcnt_pl);
+	mac_stats->BroadcastFramesReceivedOK =
+		mlxsw_reg_ppcnt_a_broadcast_frames_received_ok_get(ppcnt_pl);
+	mac_stats->InRangeLengthErrors =
+		mlxsw_reg_ppcnt_a_in_range_length_errors_get(ppcnt_pl);
+	mac_stats->OutOfRangeLengthField =
+		mlxsw_reg_ppcnt_a_out_of_range_length_field_get(ppcnt_pl);
+	mac_stats->FrameTooLongErrors =
+		mlxsw_reg_ppcnt_a_frame_too_long_errors_get(ppcnt_pl);
+}
+
+static void
+mlxsw_sp_get_eth_ctrl_stats(struct net_device *dev,
+			    struct ethtool_eth_ctrl_stats *ctrl_stats)
+{
+	char ppcnt_pl[MLXSW_REG_PPCNT_LEN];
+
+	if (mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_IEEE_8023_CNT,
+					0, ppcnt_pl))
+		return;
+
+	ctrl_stats->MACControlFramesTransmitted =
+		mlxsw_reg_ppcnt_a_mac_control_frames_transmitted_get(ppcnt_pl);
+	ctrl_stats->MACControlFramesReceived =
+		mlxsw_reg_ppcnt_a_mac_control_frames_received_get(ppcnt_pl);
+	ctrl_stats->UnsupportedOpcodesReceived =
+		mlxsw_reg_ppcnt_a_unsupported_opcodes_received_get(ppcnt_pl);
+}
+
+static const struct ethtool_rmon_hist_range mlxsw_rmon_ranges[] = {
+	{    0,    64 },
+	{   65,   127 },
+	{  128,   255 },
+	{  256,   511 },
+	{  512,  1023 },
+	{ 1024,  1518 },
+	{ 1519,  2047 },
+	{ 2048,  4095 },
+	{ 4096,  8191 },
+	{ 8192, 10239 },
+	{}
+};
+
+static void
+mlxsw_sp_get_rmon_stats(struct net_device *dev,
+			struct ethtool_rmon_stats *rmon,
+			const struct ethtool_rmon_hist_range **ranges)
+{
+	char ppcnt_pl[MLXSW_REG_PPCNT_LEN];
+
+	if (mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_RFC_2819_CNT,
+					0, ppcnt_pl))
+		return;
+
+	rmon->undersize_pkts =
+		mlxsw_reg_ppcnt_ether_stats_undersize_pkts_get(ppcnt_pl);
+	rmon->oversize_pkts =
+		mlxsw_reg_ppcnt_ether_stats_oversize_pkts_get(ppcnt_pl);
+	rmon->fragments =
+		mlxsw_reg_ppcnt_ether_stats_fragments_get(ppcnt_pl);
+
+	rmon->hist[0] = mlxsw_reg_ppcnt_ether_stats_pkts64octets_get(ppcnt_pl);
+	rmon->hist[1] =
+		mlxsw_reg_ppcnt_ether_stats_pkts65to127octets_get(ppcnt_pl);
+	rmon->hist[2] =
+		mlxsw_reg_ppcnt_ether_stats_pkts128to255octets_get(ppcnt_pl);
+	rmon->hist[3] =
+		mlxsw_reg_ppcnt_ether_stats_pkts256to511octets_get(ppcnt_pl);
+	rmon->hist[4] =
+		mlxsw_reg_ppcnt_ether_stats_pkts512to1023octets_get(ppcnt_pl);
+	rmon->hist[5] =
+		mlxsw_reg_ppcnt_ether_stats_pkts1024to1518octets_get(ppcnt_pl);
+	rmon->hist[6] =
+		mlxsw_reg_ppcnt_ether_stats_pkts1519to2047octets_get(ppcnt_pl);
+	rmon->hist[7] =
+		mlxsw_reg_ppcnt_ether_stats_pkts2048to4095octets_get(ppcnt_pl);
+	rmon->hist[8] =
+		mlxsw_reg_ppcnt_ether_stats_pkts4096to8191octets_get(ppcnt_pl);
+	rmon->hist[9] =
+		mlxsw_reg_ppcnt_ether_stats_pkts8192to10239octets_get(ppcnt_pl);
+
+	*ranges = mlxsw_rmon_ranges;
+}
+
 const struct ethtool_ops mlxsw_sp_port_ethtool_ops = {
 	.cap_link_lanes_supported	= true,
 	.get_drvinfo			= mlxsw_sp_port_get_drvinfo,
@@ -1075,6 +1200,10 @@ const struct ethtool_ops mlxsw_sp_port_ethtool_ops = {
 	.get_module_info		= mlxsw_sp_get_module_info,
 	.get_module_eeprom		= mlxsw_sp_get_module_eeprom,
 	.get_ts_info			= mlxsw_sp_get_ts_info,
+	.get_eth_phy_stats		= mlxsw_sp_get_eth_phy_stats,
+	.get_eth_mac_stats		= mlxsw_sp_get_eth_mac_stats,
+	.get_eth_ctrl_stats		= mlxsw_sp_get_eth_ctrl_stats,
+	.get_rmon_stats			= mlxsw_sp_get_rmon_stats,
 };
 
 struct mlxsw_sp1_port_link_mode {
-- 
cgit v1.2.3


From 782bc00affcd63dacaa34e9ab6da588605423312 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 16 Apr 2021 12:27:44 -0700
Subject: bnxt: implement ethtool standard stats

Most of the names seem to strongly correlate with names from
the standard and RFC. Whether ..+good_frames are indeed Frames..OK
I'm the least sure of.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 125 ++++++++++++++++++++++
 1 file changed, 125 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 7b90357daba1..832252313b18 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -3990,6 +3990,127 @@ ethtool_init_exit:
 	mutex_unlock(&bp->hwrm_cmd_lock);
 }
 
+static void bnxt_get_eth_phy_stats(struct net_device *dev,
+				   struct ethtool_eth_phy_stats *phy_stats)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	u64 *rx;
+
+	if (BNXT_VF(bp) || !(bp->flags & BNXT_FLAG_PORT_STATS_EXT))
+		return;
+
+	rx = bp->rx_port_stats_ext.sw_stats;
+	phy_stats->SymbolErrorDuringCarrier =
+		*(rx + BNXT_RX_STATS_EXT_OFFSET(rx_pcs_symbol_err));
+}
+
+static void bnxt_get_eth_mac_stats(struct net_device *dev,
+				   struct ethtool_eth_mac_stats *mac_stats)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	u64 *rx, *tx;
+
+	if (BNXT_VF(bp) || !(bp->flags & BNXT_FLAG_PORT_STATS))
+		return;
+
+	rx = bp->port_stats.sw_stats;
+	tx = bp->port_stats.sw_stats + BNXT_TX_PORT_STATS_BYTE_OFFSET / 8;
+
+	mac_stats->FramesReceivedOK =
+		BNXT_GET_RX_PORT_STATS64(rx, rx_good_frames);
+	mac_stats->FramesTransmittedOK =
+		BNXT_GET_TX_PORT_STATS64(tx, tx_good_frames);
+}
+
+static void bnxt_get_eth_ctrl_stats(struct net_device *dev,
+				    struct ethtool_eth_ctrl_stats *ctrl_stats)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	u64 *rx;
+
+	if (BNXT_VF(bp) || !(bp->flags & BNXT_FLAG_PORT_STATS))
+		return;
+
+	rx = bp->port_stats.sw_stats;
+	ctrl_stats->MACControlFramesReceived =
+		BNXT_GET_RX_PORT_STATS64(rx, rx_ctrl_frames);
+}
+
+static const struct ethtool_rmon_hist_range bnxt_rmon_ranges[] = {
+	{    0,    64 },
+	{   65,   127 },
+	{  128,   255 },
+	{  256,   511 },
+	{  512,  1023 },
+	{ 1024,  1518 },
+	{ 1519,  2047 },
+	{ 2048,  4095 },
+	{ 4096,  9216 },
+	{ 9217, 16383 },
+	{}
+};
+
+static void bnxt_get_rmon_stats(struct net_device *dev,
+				struct ethtool_rmon_stats *rmon_stats,
+				const struct ethtool_rmon_hist_range **ranges)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	u64 *rx, *tx;
+
+	if (BNXT_VF(bp) || !(bp->flags & BNXT_FLAG_PORT_STATS))
+		return;
+
+	rx = bp->port_stats.sw_stats;
+	tx = bp->port_stats.sw_stats + BNXT_TX_PORT_STATS_BYTE_OFFSET / 8;
+
+	rmon_stats->jabbers =
+		BNXT_GET_RX_PORT_STATS64(rx, rx_jbr_frames);
+	rmon_stats->oversize_pkts =
+		BNXT_GET_RX_PORT_STATS64(rx, rx_ovrsz_frames);
+	rmon_stats->undersize_pkts =
+		BNXT_GET_RX_PORT_STATS64(rx, rx_undrsz_frames);
+
+	rmon_stats->hist[0] = BNXT_GET_RX_PORT_STATS64(rx, rx_64b_frames);
+	rmon_stats->hist[1] = BNXT_GET_RX_PORT_STATS64(rx, rx_65b_127b_frames);
+	rmon_stats->hist[2] = BNXT_GET_RX_PORT_STATS64(rx, rx_128b_255b_frames);
+	rmon_stats->hist[3] = BNXT_GET_RX_PORT_STATS64(rx, rx_256b_511b_frames);
+	rmon_stats->hist[4] =
+		BNXT_GET_RX_PORT_STATS64(rx, rx_512b_1023b_frames);
+	rmon_stats->hist[5] =
+		BNXT_GET_RX_PORT_STATS64(rx, rx_1024b_1518b_frames);
+	rmon_stats->hist[6] =
+		BNXT_GET_RX_PORT_STATS64(rx, rx_1519b_2047b_frames);
+	rmon_stats->hist[7] =
+		BNXT_GET_RX_PORT_STATS64(rx, rx_2048b_4095b_frames);
+	rmon_stats->hist[8] =
+		BNXT_GET_RX_PORT_STATS64(rx, rx_4096b_9216b_frames);
+	rmon_stats->hist[9] =
+		BNXT_GET_RX_PORT_STATS64(rx, rx_9217b_16383b_frames);
+
+	rmon_stats->hist_tx[0] =
+		BNXT_GET_TX_PORT_STATS64(tx, tx_64b_frames);
+	rmon_stats->hist_tx[1] =
+		BNXT_GET_TX_PORT_STATS64(tx, tx_65b_127b_frames);
+	rmon_stats->hist_tx[2] =
+		BNXT_GET_TX_PORT_STATS64(tx, tx_128b_255b_frames);
+	rmon_stats->hist_tx[3] =
+		BNXT_GET_TX_PORT_STATS64(tx, tx_256b_511b_frames);
+	rmon_stats->hist_tx[4] =
+		BNXT_GET_TX_PORT_STATS64(tx, tx_512b_1023b_frames);
+	rmon_stats->hist_tx[5] =
+		BNXT_GET_TX_PORT_STATS64(tx, tx_1024b_1518b_frames);
+	rmon_stats->hist_tx[6] =
+		BNXT_GET_TX_PORT_STATS64(tx, tx_1519b_2047b_frames);
+	rmon_stats->hist_tx[7] =
+		BNXT_GET_TX_PORT_STATS64(tx, tx_2048b_4095b_frames);
+	rmon_stats->hist_tx[8] =
+		BNXT_GET_TX_PORT_STATS64(tx, tx_4096b_9216b_frames);
+	rmon_stats->hist_tx[9] =
+		BNXT_GET_TX_PORT_STATS64(tx, tx_9217b_16383b_frames);
+
+	*ranges = bnxt_rmon_ranges;
+}
+
 void bnxt_ethtool_free(struct bnxt *bp)
 {
 	kfree(bp->test_info);
@@ -4049,4 +4170,8 @@ const struct ethtool_ops bnxt_ethtool_ops = {
 	.set_dump		= bnxt_set_dump,
 	.get_dump_flag		= bnxt_get_dump_flag,
 	.get_dump_data		= bnxt_get_dump_data,
+	.get_eth_phy_stats	= bnxt_get_eth_phy_stats,
+	.get_eth_mac_stats	= bnxt_get_eth_mac_stats,
+	.get_eth_ctrl_stats	= bnxt_get_eth_ctrl_stats,
+	.get_rmon_stats		= bnxt_get_rmon_stats,
 };
-- 
cgit v1.2.3


From b572ec9ff087eb71a857d5af277480818f6a1c59 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 16 Apr 2021 12:27:45 -0700
Subject: mlx5: implement ethtool standard stats

Add support for PHY/MAC/Ctrl/RMON stats.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |  37 ++++++
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 142 ++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.h |  10 ++
 3 files changed, 182 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index e1d3cd1f1f11..8360289813f0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -2295,6 +2295,39 @@ mlx5e_get_link_ext_state(struct net_device *dev,
 	return -ENODATA;
 }
 
+static void mlx5e_get_eth_phy_stats(struct net_device *netdev,
+				    struct ethtool_eth_phy_stats *phy_stats)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+
+	mlx5e_stats_eth_phy_get(priv, phy_stats);
+}
+
+static void mlx5e_get_eth_mac_stats(struct net_device *netdev,
+				    struct ethtool_eth_mac_stats *mac_stats)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+
+	mlx5e_stats_eth_mac_get(priv, mac_stats);
+}
+
+static void mlx5e_get_eth_ctrl_stats(struct net_device *netdev,
+				     struct ethtool_eth_ctrl_stats *ctrl_stats)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+
+	mlx5e_stats_eth_ctrl_get(priv, ctrl_stats);
+}
+
+static void mlx5e_get_rmon_stats(struct net_device *netdev,
+				 struct ethtool_rmon_stats *rmon_stats,
+				 const struct ethtool_rmon_hist_range **ranges)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+
+	mlx5e_stats_rmon_get(priv, rmon_stats, ranges);
+}
+
 const struct ethtool_ops mlx5e_ethtool_ops = {
 	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
 				     ETHTOOL_COALESCE_MAX_FRAMES |
@@ -2340,4 +2373,8 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
 	.get_fec_stats     = mlx5e_get_fec_stats,
 	.get_fecparam      = mlx5e_get_fecparam,
 	.set_fecparam      = mlx5e_set_fecparam,
+	.get_eth_phy_stats = mlx5e_get_eth_phy_stats,
+	.get_eth_mac_stats = mlx5e_get_eth_mac_stats,
+	.get_eth_ctrl_stats = mlx5e_get_eth_ctrl_stats,
+	.get_rmon_stats    = mlx5e_get_rmon_stats,
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 5146aa200de9..e4f5b6395148 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -775,21 +775,29 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(802_3)
 		MLX5_BYTE_OFF(ppcnt_reg,		\
 			      counter_set.set.c##_high)))
 
-void mlx5e_stats_pause_get(struct mlx5e_priv *priv,
-			   struct ethtool_pause_stats *pause_stats)
+static int mlx5e_stats_get_ieee(struct mlx5_core_dev *mdev,
+				u32 *ppcnt_ieee_802_3)
 {
-	u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)];
-	struct mlx5_core_dev *mdev = priv->mdev;
 	u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
 	int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
 
 	if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev))
-		return;
+		return -EOPNOTSUPP;
 
 	MLX5_SET(ppcnt_reg, in, local_port, 1);
 	MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
-	mlx5_core_access_reg(mdev, in, sz, ppcnt_ieee_802_3,
-			     sz, MLX5_REG_PPCNT, 0, 0);
+	return mlx5_core_access_reg(mdev, in, sz, ppcnt_ieee_802_3,
+				    sz, MLX5_REG_PPCNT, 0, 0);
+}
+
+void mlx5e_stats_pause_get(struct mlx5e_priv *priv,
+			   struct ethtool_pause_stats *pause_stats)
+{
+	u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)];
+	struct mlx5_core_dev *mdev = priv->mdev;
+
+	if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3))
+		return;
 
 	pause_stats->tx_pause_frames =
 		MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3,
@@ -801,6 +809,73 @@ void mlx5e_stats_pause_get(struct mlx5e_priv *priv,
 				      a_pause_mac_ctrl_frames_received);
 }
 
+void mlx5e_stats_eth_phy_get(struct mlx5e_priv *priv,
+			     struct ethtool_eth_phy_stats *phy_stats)
+{
+	u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)];
+	struct mlx5_core_dev *mdev = priv->mdev;
+
+	if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3))
+		return;
+
+	phy_stats->SymbolErrorDuringCarrier =
+		MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3,
+				      eth_802_3_cntrs_grp_data_layout,
+				      a_symbol_error_during_carrier);
+}
+
+void mlx5e_stats_eth_mac_get(struct mlx5e_priv *priv,
+			     struct ethtool_eth_mac_stats *mac_stats)
+{
+	u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)];
+	struct mlx5_core_dev *mdev = priv->mdev;
+
+	if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3))
+		return;
+
+#define RD(name)							\
+	MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3,				\
+			      eth_802_3_cntrs_grp_data_layout,		\
+			      name)
+
+	mac_stats->FramesTransmittedOK	= RD(a_frames_transmitted_ok);
+	mac_stats->FramesReceivedOK	= RD(a_frames_received_ok);
+	mac_stats->FrameCheckSequenceErrors = RD(a_frame_check_sequence_errors);
+	mac_stats->OctetsTransmittedOK	= RD(a_octets_transmitted_ok);
+	mac_stats->OctetsReceivedOK	= RD(a_octets_received_ok);
+	mac_stats->MulticastFramesXmittedOK = RD(a_multicast_frames_xmitted_ok);
+	mac_stats->BroadcastFramesXmittedOK = RD(a_broadcast_frames_xmitted_ok);
+	mac_stats->MulticastFramesReceivedOK = RD(a_multicast_frames_received_ok);
+	mac_stats->BroadcastFramesReceivedOK = RD(a_broadcast_frames_received_ok);
+	mac_stats->InRangeLengthErrors	= RD(a_in_range_length_errors);
+	mac_stats->OutOfRangeLengthField = RD(a_out_of_range_length_field);
+	mac_stats->FrameTooLongErrors	= RD(a_frame_too_long_errors);
+#undef RD
+}
+
+void mlx5e_stats_eth_ctrl_get(struct mlx5e_priv *priv,
+			      struct ethtool_eth_ctrl_stats *ctrl_stats)
+{
+	u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)];
+	struct mlx5_core_dev *mdev = priv->mdev;
+
+	if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3))
+		return;
+
+	ctrl_stats->MACControlFramesTransmitted =
+		MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3,
+				      eth_802_3_cntrs_grp_data_layout,
+				      a_mac_control_frames_transmitted);
+	ctrl_stats->MACControlFramesReceived =
+		MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3,
+				      eth_802_3_cntrs_grp_data_layout,
+				      a_mac_control_frames_received);
+	ctrl_stats->UnsupportedOpcodesReceived =
+		MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3,
+				      eth_802_3_cntrs_grp_data_layout,
+				      a_unsupported_opcodes_received);
+}
+
 #define PPORT_2863_OFF(c) \
 	MLX5_BYTE_OFF(ppcnt_reg, \
 		      counter_set.eth_2863_cntrs_grp_data_layout.c##_high)
@@ -912,6 +987,59 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(2819)
 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
 }
 
+static const struct ethtool_rmon_hist_range mlx5e_rmon_ranges[] = {
+	{    0,    64 },
+	{   65,   127 },
+	{  128,   255 },
+	{  256,   511 },
+	{  512,  1023 },
+	{ 1024,  1518 },
+	{ 1519,  2047 },
+	{ 2048,  4095 },
+	{ 4096,  8191 },
+	{ 8192, 10239 },
+	{}
+};
+
+void mlx5e_stats_rmon_get(struct mlx5e_priv *priv,
+			  struct ethtool_rmon_stats *rmon,
+			  const struct ethtool_rmon_hist_range **ranges)
+{
+	u32 ppcnt_RFC_2819_counters[MLX5_ST_SZ_DW(ppcnt_reg)];
+	struct mlx5_core_dev *mdev = priv->mdev;
+	u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0};
+	int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+
+	MLX5_SET(ppcnt_reg, in, local_port, 1);
+	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
+	if (mlx5_core_access_reg(mdev, in, sz, ppcnt_RFC_2819_counters,
+				 sz, MLX5_REG_PPCNT, 0, 0))
+		return;
+
+#define RD(name)						\
+	MLX5E_READ_CTR64_BE_F(ppcnt_RFC_2819_counters,		\
+			      eth_2819_cntrs_grp_data_layout,	\
+			      name)
+
+	rmon->undersize_pkts	= RD(ether_stats_undersize_pkts);
+	rmon->fragments		= RD(ether_stats_fragments);
+	rmon->jabbers		= RD(ether_stats_jabbers);
+
+	rmon->hist[0]		= RD(ether_stats_pkts64octets);
+	rmon->hist[1]		= RD(ether_stats_pkts65to127octets);
+	rmon->hist[2]		= RD(ether_stats_pkts128to255octets);
+	rmon->hist[3]		= RD(ether_stats_pkts256to511octets);
+	rmon->hist[4]		= RD(ether_stats_pkts512to1023octets);
+	rmon->hist[5]		= RD(ether_stats_pkts1024to1518octets);
+	rmon->hist[6]		= RD(ether_stats_pkts1519to2047octets);
+	rmon->hist[7]		= RD(ether_stats_pkts2048to4095octets);
+	rmon->hist[8]		= RD(ether_stats_pkts4096to8191octets);
+	rmon->hist[9]		= RD(ether_stats_pkts8192to10239octets);
+#undef RD
+
+	*ranges = mlx5e_rmon_ranges;
+}
+
 #define PPORT_PHY_STATISTICAL_OFF(c) \
 	MLX5_BYTE_OFF(ppcnt_reg, \
 		      counter_set.phys_layer_statistical_cntrs.c##_high)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 9614de49b7ac..139e59f30db0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -117,6 +117,16 @@ void mlx5e_stats_pause_get(struct mlx5e_priv *priv,
 void mlx5e_stats_fec_get(struct mlx5e_priv *priv,
 			 struct ethtool_fec_stats *fec_stats);
 
+void mlx5e_stats_eth_phy_get(struct mlx5e_priv *priv,
+			     struct ethtool_eth_phy_stats *phy_stats);
+void mlx5e_stats_eth_mac_get(struct mlx5e_priv *priv,
+			     struct ethtool_eth_mac_stats *mac_stats);
+void mlx5e_stats_eth_ctrl_get(struct mlx5e_priv *priv,
+			      struct ethtool_eth_ctrl_stats *ctrl_stats);
+void mlx5e_stats_rmon_get(struct mlx5e_priv *priv,
+			  struct ethtool_rmon_stats *rmon,
+			  const struct ethtool_rmon_hist_range **ranges);
+
 /* Concrete NIC Stats */
 
 struct mlx5e_sw_stats {
-- 
cgit v1.2.3


From 1e3d976dbb23b3fce544752b434bdc32ce64aabc Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 16 Apr 2021 14:31:51 -0500
Subject: flow_dissector: Fix out-of-bounds warning in
 __skb_flow_bpf_to_target()

Fix the following out-of-bounds warning:

net/core/flow_dissector.c:835:3: warning: 'memcpy' offset [33, 48] from the object at 'flow_keys' is out of the bounds of referenced subobject 'ipv6_src' with type '__u32[4]' {aka 'unsigned int[4]'} at offset 16 [-Warray-bounds]

The problem is that the original code is trying to copy data into a
couple of struct members adjacent to each other in a single call to
memcpy().  So, the compiler legitimately complains about it. As these
are just a couple of members, fix this by copying each one of them in
separate calls to memcpy().

This helps with the ongoing efforts to globally enable -Warray-bounds
and get us closer to being able to tighten the FORTIFY_SOURCE routines
on memcpy().

Link: https://github.com/KSPP/linux/issues/109
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/flow_dissector.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 5985029e43d4..3ed7c98a98e1 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -832,8 +832,10 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
 		key_addrs = skb_flow_dissector_target(flow_dissector,
 						      FLOW_DISSECTOR_KEY_IPV6_ADDRS,
 						      target_container);
-		memcpy(&key_addrs->v6addrs, &flow_keys->ipv6_src,
-		       sizeof(key_addrs->v6addrs));
+		memcpy(&key_addrs->v6addrs.src, &flow_keys->ipv6_src,
+		       sizeof(key_addrs->v6addrs.src));
+		memcpy(&key_addrs->v6addrs.dst, &flow_keys->ipv6_dst,
+		       sizeof(key_addrs->v6addrs.dst));
 		key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
 	}
 
-- 
cgit v1.2.3


From e9e49ae88ec86c370aae56700c9ff7421dcf91fe Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 17 Apr 2021 00:22:16 +0300
Subject: net: enetc: remove redundant clearing of skb/xdp_frame pointer in TX
 conf path

Later in enetc_clean_tx_ring we have:

		/* Scrub the swbd here so we don't have to do that
		 * when we reuse it during xmit
		 */
		memset(tx_swbd, 0, sizeof(*tx_swbd));

So these assignments are unnecessary.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 9a726085841d..c7f3c6e691a1 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -544,7 +544,6 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
 
 		if (xdp_frame) {
 			xdp_return_frame(xdp_frame);
-			tx_swbd->xdp_frame = NULL;
 		} else if (skb) {
 			if (unlikely(tx_swbd->skb->cb[0] &
 				     ENETC_F_TX_ONESTEP_SYNC_TSTAMP)) {
@@ -558,7 +557,6 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
 				do_twostep_tstamp = false;
 			}
 			napi_consume_skb(skb, napi_budget);
-			tx_swbd->skb = NULL;
 		}
 
 		tx_byte_cnt += tx_swbd->len;
-- 
cgit v1.2.3


From 6b04830d5e0d7cbe137310527e9ad114686edef7 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 17 Apr 2021 00:22:17 +0300
Subject: net: enetc: rename the buffer reuse helpers

enetc_put_xdp_buff has nothing to do with XDP, frankly, it is just a
helper to populate the recycle end of the shadow RX BD ring
(next_to_alloc) with a given buffer.

On the other hand, enetc_put_rx_buff plays more tricks than its name
would suggest.

So let's rename enetc_put_rx_buff into enetc_flip_rx_buff to reflect the
half-page buffer reuse tricks that it employs, and enetc_put_xdp_buff
into enetc_put_rx_buff which suggests a more garden-variety operation.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 54 +++++++++++++---------------
 1 file changed, 24 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index c7f3c6e691a1..c4ff090f29ec 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -751,27 +751,35 @@ static struct enetc_rx_swbd *enetc_get_rx_buff(struct enetc_bdr *rx_ring,
 	return rx_swbd;
 }
 
+/* Reuse the current page without performing half-page buffer flipping */
 static void enetc_put_rx_buff(struct enetc_bdr *rx_ring,
 			      struct enetc_rx_swbd *rx_swbd)
 {
-	if (likely(enetc_page_reusable(rx_swbd->page))) {
-		size_t buffer_size = ENETC_RXB_TRUESIZE - rx_ring->buffer_offset;
+	size_t buffer_size = ENETC_RXB_TRUESIZE - rx_ring->buffer_offset;
+
+	enetc_reuse_page(rx_ring, rx_swbd);
 
+	dma_sync_single_range_for_device(rx_ring->dev, rx_swbd->dma,
+					 rx_swbd->page_offset,
+					 buffer_size, rx_swbd->dir);
+
+	rx_swbd->page = NULL;
+}
+
+/* Reuse the current page by performing half-page buffer flipping */
+static void enetc_flip_rx_buff(struct enetc_bdr *rx_ring,
+			       struct enetc_rx_swbd *rx_swbd)
+{
+	if (likely(enetc_page_reusable(rx_swbd->page))) {
 		rx_swbd->page_offset ^= ENETC_RXB_TRUESIZE;
 		page_ref_inc(rx_swbd->page);
 
-		enetc_reuse_page(rx_ring, rx_swbd);
-
-		/* sync for use by the device */
-		dma_sync_single_range_for_device(rx_ring->dev, rx_swbd->dma,
-						 rx_swbd->page_offset,
-						 buffer_size, rx_swbd->dir);
+		enetc_put_rx_buff(rx_ring, rx_swbd);
 	} else {
 		dma_unmap_page(rx_ring->dev, rx_swbd->dma, PAGE_SIZE,
 			       rx_swbd->dir);
+		rx_swbd->page = NULL;
 	}
-
-	rx_swbd->page = NULL;
 }
 
 static struct sk_buff *enetc_map_rx_buff_to_skb(struct enetc_bdr *rx_ring,
@@ -791,7 +799,7 @@ static struct sk_buff *enetc_map_rx_buff_to_skb(struct enetc_bdr *rx_ring,
 	skb_reserve(skb, rx_ring->buffer_offset);
 	__skb_put(skb, size);
 
-	enetc_put_rx_buff(rx_ring, rx_swbd);
+	enetc_flip_rx_buff(rx_ring, rx_swbd);
 
 	return skb;
 }
@@ -804,7 +812,7 @@ static void enetc_add_rx_buff_to_skb(struct enetc_bdr *rx_ring, int i,
 	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_swbd->page,
 			rx_swbd->page_offset, size, ENETC_RXB_TRUESIZE);
 
-	enetc_put_rx_buff(rx_ring, rx_swbd);
+	enetc_flip_rx_buff(rx_ring, rx_swbd);
 }
 
 static bool enetc_check_bd_errors_and_consume(struct enetc_bdr *rx_ring,
@@ -1142,20 +1150,6 @@ static void enetc_build_xdp_buff(struct enetc_bdr *rx_ring, u32 bd_status,
 	}
 }
 
-/* Reuse the current page without performing half-page buffer flipping */
-static void enetc_put_xdp_buff(struct enetc_bdr *rx_ring,
-			       struct enetc_rx_swbd *rx_swbd)
-{
-	enetc_reuse_page(rx_ring, rx_swbd);
-
-	dma_sync_single_range_for_device(rx_ring->dev, rx_swbd->dma,
-					 rx_swbd->page_offset,
-					 ENETC_RXB_DMA_SIZE_XDP,
-					 rx_swbd->dir);
-
-	rx_swbd->page = NULL;
-}
-
 /* Convert RX buffer descriptors to TX buffer descriptors. These will be
  * recycled back into the RX ring in enetc_clean_tx_ring. We need to scrub the
  * RX software BDs because the ownership of the buffer no longer belongs to the
@@ -1194,8 +1188,8 @@ static void enetc_xdp_drop(struct enetc_bdr *rx_ring, int rx_ring_first,
 			   int rx_ring_last)
 {
 	while (rx_ring_first != rx_ring_last) {
-		enetc_put_xdp_buff(rx_ring,
-				   &rx_ring->rx_swbd[rx_ring_first]);
+		enetc_put_rx_buff(rx_ring,
+				  &rx_ring->rx_swbd[rx_ring_first]);
 		enetc_bdr_idx_inc(rx_ring, &rx_ring_first);
 	}
 	rx_ring->stats.xdp_drops++;
@@ -1316,8 +1310,8 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
 			tmp_orig_i = orig_i;
 
 			while (orig_i != i) {
-				enetc_put_rx_buff(rx_ring,
-						  &rx_ring->rx_swbd[orig_i]);
+				enetc_flip_rx_buff(rx_ring,
+						   &rx_ring->rx_swbd[orig_i]);
 				enetc_bdr_idx_inc(rx_ring, &orig_i);
 			}
 
-- 
cgit v1.2.3


From 672f9a21989e56c8233d1d8daab3e5eecf76c59e Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 17 Apr 2021 00:22:18 +0300
Subject: net: enetc: recycle buffers for frames with RX errors

When receiving a frame with errors, currently we do nothing with it (we
don't construct an skb or an xdp_buff), we just exit the NAPI poll loop.

Let's put the buffer back into the RX ring (similar to XDP_DROP).

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index c4ff090f29ec..c6f984473337 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -822,12 +822,14 @@ static bool enetc_check_bd_errors_and_consume(struct enetc_bdr *rx_ring,
 	if (likely(!(bd_status & ENETC_RXBD_LSTATUS(ENETC_RXBD_ERR_MASK))))
 		return false;
 
+	enetc_put_rx_buff(rx_ring, &rx_ring->rx_swbd[*i]);
 	enetc_rxbd_next(rx_ring, rxbd, i);
 
 	while (!(bd_status & ENETC_RXBD_LSTATUS_F)) {
 		dma_rmb();
 		bd_status = le32_to_cpu((*rxbd)->r.lstatus);
 
+		enetc_put_rx_buff(rx_ring, &rx_ring->rx_swbd[*i]);
 		enetc_rxbd_next(rx_ring, rxbd, i);
 	}
 
-- 
cgit v1.2.3


From 8f50d8bb3f1c173492d1d224bce99486fd6ccd32 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 17 Apr 2021 00:22:19 +0300
Subject: net: enetc: stop XDP NAPI processing when build_skb() fails

When the code path below fails:

enetc_clean_rx_ring_xdp // XDP_PASS
-> enetc_build_skb
   -> enetc_map_rx_buff_to_skb
      -> build_skb

enetc_clean_rx_ring_xdp will 'break', but that 'break' instruction isn't
strong enough to actually break the NAPI poll loop, just the switch/case
statement for XDP actions. So we increment rx_frm_cnt and go to the next
frames minding our own business.

Instead let's do what the skb NAPI poll function does, and break the
loop now, waiting for the memory pressure to go away. Otherwise the next
calls to build_skb() are likely to fail too.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index c6f984473337..469170076efa 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -1275,8 +1275,7 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
 					      &i, &cleaned_cnt,
 					      ENETC_RXB_DMA_SIZE_XDP);
 			if (unlikely(!skb))
-				/* Exit the switch/case, not the loop */
-				break;
+				goto out;
 
 			napi_gro_receive(napi, skb);
 			break;
@@ -1338,6 +1337,7 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
 		rx_frm_cnt++;
 	}
 
+out:
 	rx_ring->next_to_clean = i;
 
 	rx_ring->stats.packets += rx_frm_cnt;
-- 
cgit v1.2.3


From a6369fe6e07d7e45aa5e73eccc6d426e92525e5c Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 17 Apr 2021 00:22:20 +0300
Subject: net: enetc: remove unneeded xdp_do_flush_map()

xdp_do_redirect already contains:
-> dev_map_enqueue
   -> __xdp_enqueue
      -> bq_enqueue
         -> bq_xmit_all // if we have more than 16 frames

So the logic from enetc will never be hit, because ENETC_DEFAULT_TX_WORK
is 128.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 469170076efa..c7b940979314 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -1324,11 +1324,6 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
 				rx_ring->stats.xdp_redirect++;
 			}
 
-			if (unlikely(xdp_redirect_frm_cnt > ENETC_DEFAULT_TX_WORK)) {
-				xdp_do_flush_map();
-				xdp_redirect_frm_cnt = 0;
-			}
-
 			break;
 		default:
 			bpf_warn_invalid_xdp_action(xdp_act);
-- 
cgit v1.2.3


From ee3e875f10fca68fb7478c23c75b553e56da319c Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 17 Apr 2021 00:22:21 +0300
Subject: net: enetc: increase TX ring size

Now that commit d6a2829e82cf ("net: enetc: increase RX ring default
size") has increased the RX ring size, it is quite easy to congest the
TX rings when the traffic is predominantly XDP_TX, as the RX ring is
quite a bit larger than the TX one.

Since we bit the bullet and did the expensive thing already (larger RX
rings consume more memory pages), it seems quite foolish to keep the TX
rings small. So make them equally sized with TX.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index d52717bc73c7..6f818e33e03b 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -79,7 +79,7 @@ struct enetc_xdp_data {
 };
 
 #define ENETC_RX_RING_DEFAULT_SIZE	2048
-#define ENETC_TX_RING_DEFAULT_SIZE	256
+#define ENETC_TX_RING_DEFAULT_SIZE	2048
 #define ENETC_DEFAULT_TX_WORK		(ENETC_TX_RING_DEFAULT_SIZE / 2)
 
 struct enetc_bdr {
-- 
cgit v1.2.3


From 7eab503b11ee1c4bb4a28866a6b029b0bbbeadfe Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 17 Apr 2021 00:22:22 +0300
Subject: net: enetc: use dedicated TX rings for XDP

It is possible for one CPU to perform TX hashing (see netdev_pick_tx)
between the 8 ENETC TX rings, and the TX hashing to select TX queue 1.

At the same time, it is possible for the other CPU to already use TX
ring 1 for XDP (either XDP_TX or XDP_REDIRECT). Since there is no mutual
exclusion between XDP and the network stack, we run into an issue
because the ENETC TX procedure is not reentrant.

The obvious approach would be to just make XDP take the lock of the
network stack's TX queue corresponding to the ring it's about to enqueue
in.

For XDP_REDIRECT, this is quite straightforward, a lock at the beginning
and end of enetc_xdp_xmit() should do the trick.

But for XDP_TX, it's a bit more complicated. For one, we do TX batching
all by ourselves for frames with the XDP_TX verdict. This is something
we would like to keep the way it is, for performance reasons. But
batching means that the network stack's lock should be kept from the
first enqueued XDP_TX frame and until we ring the doorbell. That is
mostly fine, except for cases when in the same NAPI loop we have mixed
XDP_TX and XDP_REDIRECT frames. So if enetc_xdp_xmit() gets called while
we are holding the lock from the RX NAPI, then bam, deadlock. The naive
answer could be 'just flush the XDP_TX frames first, then release the
network stack's TX queue lock, then call xdp_do_flush_map()'. But even
xdp_do_redirect() is capable of flushing the batched XDP_REDIRECT
frames, so unless we unlock/relock the TX queue around xdp_do_redirect(),
there simply isn't any clean way to protect XDP_TX from concurrent
network stack .ndo_start_xmit() on another CPU.

So we need to take a different approach, and that is to reserve two
rings for the sole use of XDP. We leave TX rings
0..ndev->real_num_tx_queues-1 to be handled by the network stack, and we
pick them from the end of the priv->tx_ring array.

We make an effort to keep the mapping done by enetc_alloc_msix() which
decides which CPU handles the TX completions of which TX ring in its
NAPI poll. So the XDP TX ring of CPU 0 is handled by TX ring 6, and the
XDP TX ring of CPU 1 is handled by TX ring 7.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 46 +++++++++++++++++++++++-----
 drivers/net/ethernet/freescale/enetc/enetc.h |  1 +
 2 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index c7b940979314..56190d861bb9 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -9,6 +9,26 @@
 #include <linux/ptp_classify.h>
 #include <net/pkt_sched.h>
 
+static int enetc_num_stack_tx_queues(struct enetc_ndev_priv *priv)
+{
+	int num_tx_rings = priv->num_tx_rings;
+	int i;
+
+	for (i = 0; i < priv->num_rx_rings; i++)
+		if (priv->rx_ring[i]->xdp.prog)
+			return num_tx_rings - num_possible_cpus();
+
+	return num_tx_rings;
+}
+
+static struct enetc_bdr *enetc_rx_ring_from_xdp_tx_ring(struct enetc_ndev_priv *priv,
+							struct enetc_bdr *tx_ring)
+{
+	int index = &priv->tx_ring[tx_ring->index] - priv->xdp_tx_ring;
+
+	return priv->rx_ring[index];
+}
+
 static struct sk_buff *enetc_tx_swbd_get_skb(struct enetc_tx_swbd *tx_swbd)
 {
 	if (tx_swbd->is_xdp_tx || tx_swbd->is_xdp_redirect)
@@ -468,7 +488,6 @@ static void enetc_recycle_xdp_tx_buff(struct enetc_bdr *tx_ring,
 				      struct enetc_tx_swbd *tx_swbd)
 {
 	struct enetc_ndev_priv *priv = netdev_priv(tx_ring->ndev);
-	struct enetc_bdr *rx_ring = priv->rx_ring[tx_ring->index];
 	struct enetc_rx_swbd rx_swbd = {
 		.dma = tx_swbd->dma,
 		.page = tx_swbd->page,
@@ -476,6 +495,9 @@ static void enetc_recycle_xdp_tx_buff(struct enetc_bdr *tx_ring,
 		.dir = tx_swbd->dir,
 		.len = tx_swbd->len,
 	};
+	struct enetc_bdr *rx_ring;
+
+	rx_ring = enetc_rx_ring_from_xdp_tx_ring(priv, tx_ring);
 
 	if (likely(enetc_swbd_unused(rx_ring))) {
 		enetc_reuse_page(rx_ring, &rx_swbd);
@@ -1059,7 +1081,7 @@ int enetc_xdp_xmit(struct net_device *ndev, int num_frames,
 	int xdp_tx_bd_cnt, i, k;
 	int xdp_tx_frm_cnt = 0;
 
-	tx_ring = priv->tx_ring[smp_processor_id()];
+	tx_ring = priv->xdp_tx_ring[smp_processor_id()];
 
 	prefetchw(ENETC_TXBD(*tx_ring, tx_ring->next_to_use));
 
@@ -1221,8 +1243,8 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
 	int xdp_tx_bd_cnt, xdp_tx_frm_cnt = 0, xdp_redirect_frm_cnt = 0;
 	struct enetc_tx_swbd xdp_tx_arr[ENETC_MAX_SKB_FRAGS] = {0};
 	struct enetc_ndev_priv *priv = netdev_priv(rx_ring->ndev);
-	struct enetc_bdr *tx_ring = priv->tx_ring[rx_ring->index];
 	int rx_frm_cnt = 0, rx_byte_cnt = 0;
+	struct enetc_bdr *tx_ring;
 	int cleaned_cnt, i;
 	u32 xdp_act;
 
@@ -1280,6 +1302,7 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
 			napi_gro_receive(napi, skb);
 			break;
 		case XDP_TX:
+			tx_ring = priv->xdp_tx_ring[rx_ring->index];
 			xdp_tx_bd_cnt = enetc_rx_swbd_to_xdp_tx_swbd(xdp_tx_arr,
 								     rx_ring,
 								     orig_i, i);
@@ -2022,6 +2045,7 @@ void enetc_start(struct net_device *ndev)
 int enetc_open(struct net_device *ndev)
 {
 	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	int num_stack_tx_queues;
 	int err;
 
 	err = enetc_setup_irqs(priv);
@@ -2040,7 +2064,9 @@ int enetc_open(struct net_device *ndev)
 	if (err)
 		goto err_alloc_rx;
 
-	err = netif_set_real_num_tx_queues(ndev, priv->num_tx_rings);
+	num_stack_tx_queues = enetc_num_stack_tx_queues(priv);
+
+	err = netif_set_real_num_tx_queues(ndev, num_stack_tx_queues);
 	if (err)
 		goto err_set_queues;
 
@@ -2113,15 +2139,17 @@ static int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data)
 	struct enetc_ndev_priv *priv = netdev_priv(ndev);
 	struct tc_mqprio_qopt *mqprio = type_data;
 	struct enetc_bdr *tx_ring;
+	int num_stack_tx_queues;
 	u8 num_tc;
 	int i;
 
+	num_stack_tx_queues = enetc_num_stack_tx_queues(priv);
 	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
 	num_tc = mqprio->num_tc;
 
 	if (!num_tc) {
 		netdev_reset_tc(ndev);
-		netif_set_real_num_tx_queues(ndev, priv->num_tx_rings);
+		netif_set_real_num_tx_queues(ndev, num_stack_tx_queues);
 
 		/* Reset all ring priorities to 0 */
 		for (i = 0; i < priv->num_tx_rings; i++) {
@@ -2133,7 +2161,7 @@ static int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data)
 	}
 
 	/* Check if we have enough BD rings available to accommodate all TCs */
-	if (num_tc > priv->num_tx_rings) {
+	if (num_tc > num_stack_tx_queues) {
 		netdev_err(ndev, "Max %d traffic classes supported\n",
 			   priv->num_tx_rings);
 		return -EINVAL;
@@ -2421,8 +2449,9 @@ int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
 int enetc_alloc_msix(struct enetc_ndev_priv *priv)
 {
 	struct pci_dev *pdev = priv->si->pdev;
-	int v_tx_rings;
+	int first_xdp_tx_ring;
 	int i, n, err, nvec;
+	int v_tx_rings;
 
 	nvec = ENETC_BDR_INT_BASE_IDX + priv->bdr_int_num;
 	/* allocate MSIX for both messaging and Rx/Tx interrupts */
@@ -2497,6 +2526,9 @@ int enetc_alloc_msix(struct enetc_ndev_priv *priv)
 		}
 	}
 
+	first_xdp_tx_ring = priv->num_tx_rings - num_possible_cpus();
+	priv->xdp_tx_ring = &priv->tx_ring[first_xdp_tx_ring];
+
 	return 0;
 
 fail:
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index 6f818e33e03b..3de71669e317 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -317,6 +317,7 @@ struct enetc_ndev_priv {
 
 	u32 speed; /* store speed for compare update pspeed */
 
+	struct enetc_bdr **xdp_tx_ring;
 	struct enetc_bdr *tx_ring[16];
 	struct enetc_bdr *rx_ring[16];
 
-- 
cgit v1.2.3


From 975acc833c9f34d784075815a8374760d1c6358b Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 17 Apr 2021 00:22:23 +0300
Subject: net: enetc: handle the invalid XDP action the same way as XDP_DROP

When the XDP program returns an invalid action, we should free the RX
buffer.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 56190d861bb9..0b84d4a74889 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -1282,6 +1282,9 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
 		xdp_act = bpf_prog_run_xdp(prog, &xdp_buff);
 
 		switch (xdp_act) {
+		default:
+			bpf_warn_invalid_xdp_action(xdp_act);
+			fallthrough;
 		case XDP_ABORTED:
 			trace_xdp_exception(rx_ring->ndev, prog, xdp_act);
 			fallthrough;
@@ -1346,10 +1349,6 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
 				xdp_redirect_frm_cnt++;
 				rx_ring->stats.xdp_redirect++;
 			}
-
-			break;
-		default:
-			bpf_warn_invalid_xdp_action(xdp_act);
 		}
 
 		rx_frm_cnt++;
-- 
cgit v1.2.3


From 92ff9a6e578dc32950567efaf987328c32fefdc6 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 17 Apr 2021 00:22:24 +0300
Subject: net: enetc: fix buffer leaks with XDP_TX enqueue rejections

If the TX ring is congested, enetc_xdp_tx() returns false for the
current XDP frame (represented as an array of software BDs).

This array of software TX BDs is constructed in enetc_rx_swbd_to_xdp_tx_swbd
from software BDs freshly cleaned from the RX ring. The issue is that we
scrub the RX software BDs too soon, more precisely before we know that
we can enqueue the TX BDs successfully into the TX ring.

If we can't enqueue them (and enetc_xdp_tx returns false), we call
enetc_xdp_drop which attempts to recycle the buffers held by the RX
software BDs. But because we scrubbed those RX BDs already, two things
happen:

(a) we leak their memory
(b) we populate the RX software BD ring with an all-zero rx_swbd
    structure, which makes the buffer refill path allocate more memory.

enetc_refill_rx_ring
-> if (unlikely(!rx_swbd->page))
   -> enetc_new_page

That is a recipe for fast OOM.

Fixes: 7ed2bc80074e ("net: enetc: add support for XDP_TX")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 0b84d4a74889..f0ba612d5ce3 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -1175,9 +1175,7 @@ static void enetc_build_xdp_buff(struct enetc_bdr *rx_ring, u32 bd_status,
 }
 
 /* Convert RX buffer descriptors to TX buffer descriptors. These will be
- * recycled back into the RX ring in enetc_clean_tx_ring. We need to scrub the
- * RX software BDs because the ownership of the buffer no longer belongs to the
- * RX ring, so enetc_refill_rx_ring may not reuse rx_swbd->page.
+ * recycled back into the RX ring in enetc_clean_tx_ring.
  */
 static int enetc_rx_swbd_to_xdp_tx_swbd(struct enetc_tx_swbd *xdp_tx_arr,
 					struct enetc_bdr *rx_ring,
@@ -1199,7 +1197,6 @@ static int enetc_rx_swbd_to_xdp_tx_swbd(struct enetc_tx_swbd *xdp_tx_arr,
 		tx_swbd->is_dma_page = true;
 		tx_swbd->is_xdp_tx = true;
 		tx_swbd->is_eof = false;
-		memset(rx_swbd, 0, sizeof(*rx_swbd));
 	}
 
 	/* We rely on caller providing an rx_ring_last > rx_ring_first */
@@ -1317,6 +1314,17 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
 				tx_ring->stats.xdp_tx += xdp_tx_bd_cnt;
 				rx_ring->xdp.xdp_tx_in_flight += xdp_tx_bd_cnt;
 				xdp_tx_frm_cnt++;
+				/* The XDP_TX enqueue was successful, so we
+				 * need to scrub the RX software BDs because
+				 * the ownership of the buffers no longer
+				 * belongs to the RX ring, and we must prevent
+				 * enetc_refill_rx_ring() from reusing
+				 * rx_swbd->page.
+				 */
+				while (orig_i != i) {
+					rx_ring->rx_swbd[orig_i].page = NULL;
+					enetc_bdr_idx_inc(rx_ring, &orig_i);
+				}
 			}
 			break;
 		case XDP_REDIRECT:
-- 
cgit v1.2.3


From 24e393097171719e3a64abb8f4cc7bf8fbce2ac4 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 17 Apr 2021 00:22:25 +0300
Subject: net: enetc: apply the MDIO workaround for XDP_REDIRECT too

Described in fd5736bf9f23 ("enetc: Workaround for MDIO register access
issue") is a workaround for a hardware bug that requires a register
access of the MDIO controller to never happen concurrently with a
register access of a port PF. To avoid that, a mutual exclusion scheme
with rwlocks was implemented - the port PF accessors are the 'read'
side, and the MDIO accessors are the 'write' side.

When we do XDP_REDIRECT between two ENETC interfaces, all is fine
because the MDIO lock is already taken from the NAPI poll loop.

But when the ingress interface is not ENETC, just the egress is, the
MDIO lock is not taken, so we might access the port PF registers
concurrently with MDIO, which will make the link flap due to wrong
values returned from the PHY.

To avoid this, let's just slap an enetc_lock_mdio/enetc_unlock_mdio at
the beginning and ending of enetc_xdp_xmit. The fact that the MDIO lock
is designed as a rwlock is important here, because the read side is
reentrant (that is one of the main reasons why we chose it). Usually,
the way we benefit of its reentrancy is by running the data path
concurrently on both CPUs, but in this case, we benefit from the
reentrancy by taking the lock even when the lock is already taken
(and that's the situation where ENETC is both the ingress and the egress
interface for XDP_REDIRECT, which was fine before and still is fine now).

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index f0ba612d5ce3..4f23829e7317 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -1081,6 +1081,8 @@ int enetc_xdp_xmit(struct net_device *ndev, int num_frames,
 	int xdp_tx_bd_cnt, i, k;
 	int xdp_tx_frm_cnt = 0;
 
+	enetc_lock_mdio();
+
 	tx_ring = priv->xdp_tx_ring[smp_processor_id()];
 
 	prefetchw(ENETC_TXBD(*tx_ring, tx_ring->next_to_use));
@@ -1109,6 +1111,8 @@ int enetc_xdp_xmit(struct net_device *ndev, int num_frames,
 
 	tx_ring->stats.xdp_tx += xdp_tx_frm_cnt;
 
+	enetc_unlock_mdio();
+
 	return xdp_tx_frm_cnt;
 }
 
-- 
cgit v1.2.3


From 3fcc8a25e39171a48b8025835942571b84455fd9 Mon Sep 17 00:00:00 2001
From: Nico Pache <npache@redhat.com>
Date: Fri, 16 Apr 2021 15:38:01 -0700
Subject: kunit: mptcp: adhere to KUNIT formatting standard

Drop 'S' from end of CONFIG_MPTCP_KUNIT_TESTS in order to adhere to the
KUNIT *_KUNIT_TEST config name format.

Fixes: a00a582203db (mptcp: move crypto test to KUNIT)
Reviewed-by: David Gow <davidgow@google.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Nico Pache <npache@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/Kconfig  | 2 +-
 net/mptcp/Makefile | 2 +-
 net/mptcp/crypto.c | 2 +-
 net/mptcp/token.c  | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig
index a014149aa323..20328920f6ed 100644
--- a/net/mptcp/Kconfig
+++ b/net/mptcp/Kconfig
@@ -22,7 +22,7 @@ config MPTCP_IPV6
 	depends on IPV6=y
 	default y
 
-config MPTCP_KUNIT_TESTS
+config MPTCP_KUNIT_TEST
 	tristate "This builds the MPTCP KUnit tests" if !KUNIT_ALL_TESTS
 	depends on KUNIT
 	default KUNIT_ALL_TESTS
diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
index d2642c012a6a..e54daceac58b 100644
--- a/net/mptcp/Makefile
+++ b/net/mptcp/Makefile
@@ -9,4 +9,4 @@ obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o
 
 mptcp_crypto_test-objs := crypto_test.o
 mptcp_token_test-objs := token_test.o
-obj-$(CONFIG_MPTCP_KUNIT_TESTS) += mptcp_crypto_test.o mptcp_token_test.o
+obj-$(CONFIG_MPTCP_KUNIT_TEST) += mptcp_crypto_test.o mptcp_token_test.o
diff --git a/net/mptcp/crypto.c b/net/mptcp/crypto.c
index b472dc149856..a8931349933c 100644
--- a/net/mptcp/crypto.c
+++ b/net/mptcp/crypto.c
@@ -78,6 +78,6 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac)
 	sha256(input, SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE, hmac);
 }
 
-#if IS_MODULE(CONFIG_MPTCP_KUNIT_TESTS)
+#if IS_MODULE(CONFIG_MPTCP_KUNIT_TEST)
 EXPORT_SYMBOL_GPL(mptcp_crypto_hmac_sha);
 #endif
diff --git a/net/mptcp/token.c b/net/mptcp/token.c
index feb4b9ffd462..8f0270a780ce 100644
--- a/net/mptcp/token.c
+++ b/net/mptcp/token.c
@@ -402,7 +402,7 @@ void __init mptcp_token_init(void)
 	}
 }
 
-#if IS_MODULE(CONFIG_MPTCP_KUNIT_TESTS)
+#if IS_MODULE(CONFIG_MPTCP_KUNIT_TEST)
 EXPORT_SYMBOL_GPL(mptcp_token_new_request);
 EXPORT_SYMBOL_GPL(mptcp_token_new_connect);
 EXPORT_SYMBOL_GPL(mptcp_token_accept);
-- 
cgit v1.2.3


From e4b6135134a75f530bd634ea7c168efaf0f9dff3 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 16 Apr 2021 15:38:02 -0700
Subject: mptcp: fix format specifiers for unsigned int

Some of the sequence numbers are printed as the negative ones in the debug
log:

[   46.250932] MPTCP: DSS
[   46.250940] MPTCP: data_fin=0 dsn64=0 use_map=0 ack64=1 use_ack=1
[   46.250948] MPTCP: data_ack=2344892449471675613
[   46.251012] MPTCP: msk=000000006e157e3f status=10
[   46.251023] MPTCP: msk=000000006e157e3f snd_data_fin_enable=0 pending=0 snd_nxt=2344892449471700189 write_seq=2344892449471700189
[   46.251343] MPTCP: msk=00000000ec44a129 ssk=00000000f7abd481 sending dfrag at seq=-1658937016627538668 len=100 already sent=0
[   46.251360] MPTCP: data_seq=16787807057082012948 subflow_seq=1 data_len=100 dsn64=1

This patch used the format specifier %u instead of %d for the unsigned int
values to fix it.

Fixes: d9ca1de8c0cd ("mptcp: move page frag allocation in mptcp_sendmsg()")
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 073e20078ed0..9d0b9f76ab3c 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1294,7 +1294,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
 	int avail_size;
 	size_t ret = 0;
 
-	pr_debug("msk=%p ssk=%p sending dfrag at seq=%lld len=%d already sent=%d",
+	pr_debug("msk=%p ssk=%p sending dfrag at seq=%llu len=%u already sent=%u",
 		 msk, ssk, dfrag->data_seq, dfrag->data_len, info->sent);
 
 	/* compute send limit */
@@ -1712,7 +1712,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 			if (!msk->first_pending)
 				WRITE_ONCE(msk->first_pending, dfrag);
 		}
-		pr_debug("msk=%p dfrag at seq=%lld len=%d sent=%d new=%d", msk,
+		pr_debug("msk=%p dfrag at seq=%llu len=%u sent=%u new=%d", msk,
 			 dfrag->data_seq, dfrag->data_len, dfrag->already_sent,
 			 !dfrag_collapsed);
 
-- 
cgit v1.2.3


From 43f1140b9678e0fd9dcddd96faee8fad86a70061 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 16 Apr 2021 15:38:03 -0700
Subject: mptcp: export mptcp_subflow_active

This patch moved the static function mptcp_subflow_active to protocol.h
as an inline one.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 12 ------------
 net/mptcp/protocol.h | 12 ++++++++++++
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 9d0b9f76ab3c..5a05c6ca943c 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -399,18 +399,6 @@ static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk)
 	mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN;
 }
 
-static bool mptcp_subflow_active(struct mptcp_subflow_context *subflow)
-{
-	struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-
-	/* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */
-	if (subflow->request_join && !subflow->fully_established)
-		return false;
-
-	/* only send if our side has not closed yet */
-	return ((1 << ssk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT));
-}
-
 static bool tcp_can_send_ack(const struct sock *ssk)
 {
 	return !((1 << inet_sk_state_load(ssk)) &
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index df269c26f145..edc0128730df 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -544,6 +544,18 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
 			 struct sockaddr_storage *addr,
 			 unsigned short family);
 
+static inline bool mptcp_subflow_active(struct mptcp_subflow_context *subflow)
+{
+	struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+	/* can't send if JOIN hasn't completed yet (i.e. is usable for mptcp) */
+	if (subflow->request_join && !subflow->fully_established)
+		return false;
+
+	/* only send if our side has not closed yet */
+	return ((1 << ssk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT));
+}
+
 static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
 					      struct mptcp_subflow_context *ctx)
 {
-- 
cgit v1.2.3


From e10a9892097672b62be4ea265a9eb48f698ca3b8 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 16 Apr 2021 15:38:04 -0700
Subject: mptcp: add tracepoint in mptcp_subflow_get_send

This patch added a tracepoint in the packet scheduler function
mptcp_subflow_get_send().

Suggested-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                  |  1 +
 include/trace/events/mptcp.h | 60 ++++++++++++++++++++++++++++++++++++++++++++
 net/mptcp/protocol.c         |  8 +++---
 3 files changed, 65 insertions(+), 4 deletions(-)
 create mode 100644 include/trace/events/mptcp.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 795b9941c151..0f82854cc430 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12546,6 +12546,7 @@ W:	https://github.com/multipath-tcp/mptcp_net-next/wiki
 B:	https://github.com/multipath-tcp/mptcp_net-next/issues
 F:	Documentation/networking/mptcp-sysctl.rst
 F:	include/net/mptcp.h
+F:	include/trace/events/mptcp.h
 F:	include/uapi/linux/mptcp.h
 F:	net/mptcp/
 F:	tools/testing/selftests/net/mptcp/
diff --git a/include/trace/events/mptcp.h b/include/trace/events/mptcp.h
new file mode 100644
index 000000000000..b1617a0162da
--- /dev/null
+++ b/include/trace/events/mptcp.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mptcp
+
+#if !defined(_TRACE_MPTCP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MPTCP_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(mptcp_subflow_get_send,
+
+	TP_PROTO(struct mptcp_subflow_context *subflow),
+
+	TP_ARGS(subflow),
+
+	TP_STRUCT__entry(
+		__field(bool, active)
+		__field(bool, free)
+		__field(u32, snd_wnd)
+		__field(u32, pace)
+		__field(u8, backup)
+		__field(u64, ratio)
+	),
+
+	TP_fast_assign(
+		struct sock *ssk;
+
+		__entry->active = mptcp_subflow_active(subflow);
+		__entry->backup = subflow->backup;
+
+		if (subflow->tcp_sock && sk_fullsock(subflow->tcp_sock))
+			__entry->free = sk_stream_memory_free(subflow->tcp_sock);
+		else
+			__entry->free = 0;
+
+		ssk = mptcp_subflow_tcp_sock(subflow);
+		if (ssk && sk_fullsock(ssk)) {
+			__entry->snd_wnd = tcp_sk(ssk)->snd_wnd;
+			__entry->pace = ssk->sk_pacing_rate;
+		} else {
+			__entry->snd_wnd = 0;
+			__entry->pace = 0;
+		}
+
+		if (ssk && sk_fullsock(ssk) && __entry->pace)
+			__entry->ratio = div_u64((u64)ssk->sk_wmem_queued << 32, __entry->pace);
+		else
+			__entry->ratio = 0;
+	),
+
+	TP_printk("active=%d free=%d snd_wnd=%u pace=%u backup=%u ratio=%llu",
+		  __entry->active, __entry->free,
+		  __entry->snd_wnd, __entry->pace,
+		  __entry->backup, __entry->ratio)
+);
+
+#endif /* _TRACE_MPTCP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 5a05c6ca943c..e26ea143754d 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -25,6 +25,9 @@
 #include "protocol.h"
 #include "mib.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/mptcp.h>
+
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
 struct mptcp6_sock {
 	struct mptcp_sock msk;
@@ -1410,6 +1413,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
 		send_info[i].ratio = -1;
 	}
 	mptcp_for_each_subflow(msk, subflow) {
+		trace_mptcp_subflow_get_send(subflow);
 		ssk =  mptcp_subflow_tcp_sock(subflow);
 		if (!mptcp_subflow_active(subflow))
 			continue;
@@ -1430,10 +1434,6 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
 		}
 	}
 
-	pr_debug("msk=%p nr_active=%d ssk=%p:%lld backup=%p:%lld",
-		 msk, nr_active, send_info[0].ssk, send_info[0].ratio,
-		 send_info[1].ssk, send_info[1].ratio);
-
 	/* pick the best backup if no other subflow is active */
 	if (!nr_active)
 		send_info[0].ssk = send_info[1].ssk;
-- 
cgit v1.2.3


From 0918e34b85c7e125f531caaf3d2918baf2b1a5f9 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 16 Apr 2021 15:38:05 -0700
Subject: mptcp: add tracepoint in get_mapping_status

This patch added a tracepoint in the mapping status function
get_mapping_status() to dump every mpext field.

Suggested-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/mptcp.h | 52 ++++++++++++++++++++++++++++++++++++++++++++
 net/mptcp/subflow.c          |  6 ++---
 2 files changed, 55 insertions(+), 3 deletions(-)

diff --git a/include/trace/events/mptcp.h b/include/trace/events/mptcp.h
index b1617a0162da..ec20350d82eb 100644
--- a/include/trace/events/mptcp.h
+++ b/include/trace/events/mptcp.h
@@ -54,6 +54,58 @@ TRACE_EVENT(mptcp_subflow_get_send,
 		  __entry->backup, __entry->ratio)
 );
 
+DECLARE_EVENT_CLASS(mptcp_dump_mpext,
+
+	TP_PROTO(struct mptcp_ext *mpext),
+
+	TP_ARGS(mpext),
+
+	TP_STRUCT__entry(
+		__field(u64, data_ack)
+		__field(u64, data_seq)
+		__field(u32, subflow_seq)
+		__field(u16, data_len)
+		__field(u8, use_map)
+		__field(u8, dsn64)
+		__field(u8, data_fin)
+		__field(u8, use_ack)
+		__field(u8, ack64)
+		__field(u8, mpc_map)
+		__field(u8, frozen)
+		__field(u8, reset_transient)
+		__field(u8, reset_reason)
+	),
+
+	TP_fast_assign(
+		__entry->data_ack = mpext->ack64 ? mpext->data_ack : mpext->data_ack32;
+		__entry->data_seq = mpext->data_seq;
+		__entry->subflow_seq = mpext->subflow_seq;
+		__entry->data_len = mpext->data_len;
+		__entry->use_map = mpext->use_map;
+		__entry->dsn64 = mpext->dsn64;
+		__entry->data_fin = mpext->data_fin;
+		__entry->use_ack = mpext->use_ack;
+		__entry->ack64 = mpext->ack64;
+		__entry->mpc_map = mpext->mpc_map;
+		__entry->frozen = mpext->frozen;
+		__entry->reset_transient = mpext->reset_transient;
+		__entry->reset_reason = mpext->reset_reason;
+	),
+
+	TP_printk("data_ack=%llu data_seq=%llu subflow_seq=%u data_len=%u use_map=%u dsn64=%u data_fin=%u use_ack=%u ack64=%u mpc_map=%u frozen=%u reset_transient=%u reset_reason=%u",
+		  __entry->data_ack, __entry->data_seq,
+		  __entry->subflow_seq, __entry->data_len,
+		  __entry->use_map, __entry->dsn64,
+		  __entry->data_fin, __entry->use_ack,
+		  __entry->ack64, __entry->mpc_map,
+		  __entry->frozen, __entry->reset_transient,
+		  __entry->reset_reason)
+);
+
+DEFINE_EVENT(mptcp_dump_mpext, get_mapping_status,
+	TP_PROTO(struct mptcp_ext *mpext),
+	TP_ARGS(mpext));
+
 #endif /* _TRACE_MPTCP_H */
 
 /* This part must be outside protection */
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index c3da84576b3c..d8a2a55ae916 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -25,6 +25,8 @@
 #include "protocol.h"
 #include "mib.h"
 
+#include <trace/events/mptcp.h>
+
 static void mptcp_subflow_ops_undo_override(struct sock *ssk);
 
 static void SUBFLOW_REQ_INC_STATS(struct request_sock *req,
@@ -862,9 +864,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
 		goto validate_seq;
 	}
 
-	pr_debug("seq=%llu is64=%d ssn=%u data_len=%u data_fin=%d",
-		 mpext->data_seq, mpext->dsn64, mpext->subflow_seq,
-		 mpext->data_len, mpext->data_fin);
+	trace_get_mapping_status(mpext);
 
 	data_len = mpext->data_len;
 	if (data_len == 0) {
-- 
cgit v1.2.3


From ed66bfb4ce34a94174bb755eeaca85d1661d36ad Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 16 Apr 2021 15:38:06 -0700
Subject: mptcp: add tracepoint in ack_update_msk

This patch added a tracepoint in ack_update_msk() to track the
incoming data_ack and window/snd_una updates.

Suggested-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/mptcp.h | 32 ++++++++++++++++++++++++++++++++
 net/mptcp/options.c          |  6 ++++++
 2 files changed, 38 insertions(+)

diff --git a/include/trace/events/mptcp.h b/include/trace/events/mptcp.h
index ec20350d82eb..b90bfe45d995 100644
--- a/include/trace/events/mptcp.h
+++ b/include/trace/events/mptcp.h
@@ -106,6 +106,38 @@ DEFINE_EVENT(mptcp_dump_mpext, get_mapping_status,
 	TP_PROTO(struct mptcp_ext *mpext),
 	TP_ARGS(mpext));
 
+TRACE_EVENT(ack_update_msk,
+
+	TP_PROTO(u64 data_ack, u64 old_snd_una,
+		 u64 new_snd_una, u64 new_wnd_end,
+		 u64 msk_wnd_end),
+
+	TP_ARGS(data_ack, old_snd_una,
+		new_snd_una, new_wnd_end,
+		msk_wnd_end),
+
+	TP_STRUCT__entry(
+		__field(u64, data_ack)
+		__field(u64, old_snd_una)
+		__field(u64, new_snd_una)
+		__field(u64, new_wnd_end)
+		__field(u64, msk_wnd_end)
+	),
+
+	TP_fast_assign(
+		__entry->data_ack = data_ack;
+		__entry->old_snd_una = old_snd_una;
+		__entry->new_snd_una = new_snd_una;
+		__entry->new_wnd_end = new_wnd_end;
+		__entry->msk_wnd_end = msk_wnd_end;
+	),
+
+	TP_printk("data_ack=%llu old_snd_una=%llu new_snd_una=%llu new_wnd_end=%llu msk_wnd_end=%llu",
+		  __entry->data_ack, __entry->old_snd_una,
+		  __entry->new_snd_una, __entry->new_wnd_end,
+		  __entry->msk_wnd_end)
+);
+
 #endif /* _TRACE_MPTCP_H */
 
 /* This part must be outside protection */
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index d51c3ad54d9a..99fc21406168 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -13,6 +13,8 @@
 #include "protocol.h"
 #include "mib.h"
 
+#include <trace/events/mptcp.h>
+
 static bool mptcp_cap_flag_sha256(u8 flags)
 {
 	return (flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA256;
@@ -943,6 +945,10 @@ static void ack_update_msk(struct mptcp_sock *msk,
 		__mptcp_data_acked(sk);
 	}
 	mptcp_data_unlock(sk);
+
+	trace_ack_update_msk(mp_opt->data_ack,
+			     old_snd_una, new_snd_una,
+			     new_wnd_end, msk->wnd_end);
 }
 
 bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit)
-- 
cgit v1.2.3


From d96a838a7ce2772ed181f89becd79b72d267f93a Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 16 Apr 2021 15:38:07 -0700
Subject: mptcp: add tracepoint in subflow_check_data_avail

This patch added a tracepoint in subflow_check_data_avail() to show the
mapping status.

Suggested-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/mptcp.h | 29 +++++++++++++++++++++++++++++
 net/mptcp/subflow.c          |  4 +---
 2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/include/trace/events/mptcp.h b/include/trace/events/mptcp.h
index b90bfe45d995..775a46d0b0f0 100644
--- a/include/trace/events/mptcp.h
+++ b/include/trace/events/mptcp.h
@@ -7,6 +7,14 @@
 
 #include <linux/tracepoint.h>
 
+#define show_mapping_status(status)					\
+	__print_symbolic(status,					\
+		{ 0, "MAPPING_OK" },					\
+		{ 1, "MAPPING_INVALID" },				\
+		{ 2, "MAPPING_EMPTY" },					\
+		{ 3, "MAPPING_DATA_FIN" },				\
+		{ 4, "MAPPING_DUMMY" })
+
 TRACE_EVENT(mptcp_subflow_get_send,
 
 	TP_PROTO(struct mptcp_subflow_context *subflow),
@@ -138,6 +146,27 @@ TRACE_EVENT(ack_update_msk,
 		  __entry->msk_wnd_end)
 );
 
+TRACE_EVENT(subflow_check_data_avail,
+
+	TP_PROTO(__u8 status, struct sk_buff *skb),
+
+	TP_ARGS(status, skb),
+
+	TP_STRUCT__entry(
+		__field(u8, status)
+		__field(const void *, skb)
+	),
+
+	TP_fast_assign(
+		__entry->status = status;
+		__entry->skb = skb;
+	),
+
+	TP_printk("mapping_status=%s, skb=%p",
+		  show_mapping_status(__entry->status),
+		  __entry->skb)
+);
+
 #endif /* _TRACE_MPTCP_H */
 
 /* This part must be outside protection */
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index d8a2a55ae916..82e91b00ad39 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1002,8 +1002,6 @@ static bool subflow_check_data_avail(struct sock *ssk)
 	struct mptcp_sock *msk;
 	struct sk_buff *skb;
 
-	pr_debug("msk=%p ssk=%p data_avail=%d skb=%p", subflow->conn, ssk,
-		 subflow->data_avail, skb_peek(&ssk->sk_receive_queue));
 	if (!skb_peek(&ssk->sk_receive_queue))
 		subflow->data_avail = 0;
 	if (subflow->data_avail)
@@ -1015,7 +1013,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
 		u64 old_ack;
 
 		status = get_mapping_status(ssk, msk);
-		pr_debug("msk=%p ssk=%p status=%d", msk, ssk, status);
+		trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue));
 		if (status == MAPPING_INVALID) {
 			ssk->sk_err = EBADMSG;
 			goto fatal;
-- 
cgit v1.2.3


From 442279154c73bc681e5346bdd1270a628dfdfdc7 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 16 Apr 2021 15:38:08 -0700
Subject: mptcp: use mptcp_for_each_subflow in mptcp_close

This patch used the macro helper mptcp_for_each_subflow() instead of
list_for_each_entry() in mptcp_close.

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index e26ea143754d..c14ac2975736 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2611,7 +2611,7 @@ static void mptcp_close(struct sock *sk, long timeout)
 cleanup:
 	/* orphan all the subflows */
 	inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;
-	list_for_each_entry(subflow, &mptcp_sk(sk)->conn_list, node) {
+	mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 		bool slow = lock_sock_fast(ssk);
 
-- 
cgit v1.2.3


From 1c22233a745e46dbc11db30d1f65aaae01e26eb5 Mon Sep 17 00:00:00 2001
From: Chen Lin <chen.lin5@zte.com.cn>
Date: Tue, 16 Feb 2021 11:41:58 +0800
Subject: cw1200: Remove unused function pointer typedef cw1200_wsm_handler

Remove the 'cw1200_wsm_handler' typedef as it is not used.

Signed-off-by: Chen Lin <chen.lin5@zte.com.cn>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1613446918-4532-1-git-send-email-chen45464546@163.com
---
 drivers/net/wireless/st/cw1200/bh.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/wireless/st/cw1200/bh.c b/drivers/net/wireless/st/cw1200/bh.c
index c364a3987618..8bade5d89f12 100644
--- a/drivers/net/wireless/st/cw1200/bh.c
+++ b/drivers/net/wireless/st/cw1200/bh.c
@@ -42,9 +42,6 @@ enum cw1200_bh_pm_state {
 	CW1200_BH_RESUME,
 };
 
-typedef int (*cw1200_wsm_handler)(struct cw1200_common *priv,
-	u8 *data, size_t size);
-
 static void cw1200_bh_work(struct work_struct *work)
 {
 	struct cw1200_common *priv =
-- 
cgit v1.2.3


From 9dc5fdc8c4f889bd9ea5b6aa8b9d47ff9acef47e Mon Sep 17 00:00:00 2001
From: Chen Lin <chen.lin5@zte.com.cn>
Date: Tue, 16 Feb 2021 12:30:33 +0800
Subject: cw1200: Remove unused function pointer typedef wsm_*

Remove the 'wsm_*' typedef as it is not used.

Signed-off-by: Chen Lin <chen.lin5@zte.com.cn>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1613449833-4910-1-git-send-email-chen45464546@163.com
---
 drivers/net/wireless/st/cw1200/wsm.h | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/drivers/net/wireless/st/cw1200/wsm.h b/drivers/net/wireless/st/cw1200/wsm.h
index 1ffa47994bb9..89fdc9115e9d 100644
--- a/drivers/net/wireless/st/cw1200/wsm.h
+++ b/drivers/net/wireless/st/cw1200/wsm.h
@@ -785,8 +785,6 @@ struct wsm_tx_confirm {
 };
 
 /* 3.15 */
-typedef void (*wsm_tx_confirm_cb) (struct cw1200_common *priv,
-				   struct wsm_tx_confirm *arg);
 
 /* Note that ideology of wsm_tx struct is different against the rest of
  * WSM API. wsm_hdr is /not/ a caller-adapted struct to be used as an input
@@ -862,9 +860,6 @@ struct wsm_rx {
 /* = sizeof(generic hi hdr) + sizeof(wsm hdr) */
 #define WSM_RX_EXTRA_HEADROOM (16)
 
-typedef void (*wsm_rx_cb) (struct cw1200_common *priv, struct wsm_rx *arg,
-			   struct sk_buff **skb_p);
-
 /* 3.17 */
 struct wsm_event {
 	/* WSM_STATUS_... */
@@ -1180,8 +1175,6 @@ struct wsm_switch_channel {
 int wsm_switch_channel(struct cw1200_common *priv,
 		       const struct wsm_switch_channel *arg);
 
-typedef void (*wsm_channel_switch_cb) (struct cw1200_common *priv);
-
 #define WSM_START_REQ_ID 0x0017
 #define WSM_START_RESP_ID 0x0417
 
@@ -1240,8 +1233,6 @@ int wsm_start_find(struct cw1200_common *priv);
 
 int wsm_stop_find(struct cw1200_common *priv);
 
-typedef void (*wsm_find_complete_cb) (struct cw1200_common *priv, u32 status);
-
 struct wsm_suspend_resume {
 	/* See 3.52 */
 	/* Link ID */
@@ -1256,9 +1247,6 @@ struct wsm_suspend_resume {
 	/* [out] */ int queue;
 };
 
-typedef void (*wsm_suspend_resume_cb) (struct cw1200_common *priv,
-				       struct wsm_suspend_resume *arg);
-
 /* 3.54 Update-IE request. */
 struct wsm_update_ie {
 	/* WSM_UPDATE_IE_... */
-- 
cgit v1.2.3


From 18fb0bedb5fc2fddc057dbe48b7360a6ffda34b3 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Fri, 19 Feb 2021 13:26:07 +0800
Subject: rtlwifi: 8821ae: upgrade PHY and RF parameters

The signal strength of 5G is quite low, so user can't connect to an AP far
away. New parameters with new format and its parser are updated by the commit
84d26fda52e2 ("rtlwifi: Update 8821ae new phy parameters and its parser."), but
some parameters are missing. Use this commit to update to the novel parameters
that use new format.

Fixes: 84d26fda52e2 ("rtlwifi: Update 8821ae new phy parameters and its parser")
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210219052607.7323-1-pkshih@realtek.com
---
 .../net/wireless/realtek/rtlwifi/rtl8821ae/table.c | 500 +++++++++++++++------
 1 file changed, 370 insertions(+), 130 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/table.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/table.c
index 27c8a5d96520..fcaaf664cbec 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/table.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/table.c
@@ -249,7 +249,7 @@ u32 RTL8821AE_PHY_REG_ARRAY[] = {
 	0x824, 0x00030FE0,
 	0x828, 0x00000000,
 	0x82C, 0x002081DD,
-	0x830, 0x2AAA8E24,
+	0x830, 0x2AAAEEC8,
 	0x834, 0x0037A706,
 	0x838, 0x06489B44,
 	0x83C, 0x0000095B,
@@ -324,10 +324,10 @@ u32 RTL8821AE_PHY_REG_ARRAY[] = {
 	0x9D8, 0x00000000,
 	0x9DC, 0x00000000,
 	0x9E0, 0x00005D00,
-	0x9E4, 0x00000002,
+	0x9E4, 0x00000003,
 	0x9E8, 0x00000001,
 	0xA00, 0x00D047C8,
-	0xA04, 0x01FF000C,
+	0xA04, 0x01FF800C,
 	0xA08, 0x8C8A8300,
 	0xA0C, 0x2E68000F,
 	0xA10, 0x9500BB78,
@@ -1320,7 +1320,11 @@ u32 RTL8821AE_RADIOA_ARRAY[] = {
 		0x083, 0x00021800,
 		0x084, 0x00028000,
 		0x085, 0x00048000,
+	0x80000111,	0x00000000,	0x40000000,	0x00000000,
+		0x086, 0x0009483A,
+	0xA0000000,	0x00000000,
 		0x086, 0x00094838,
+	0xB0000000,	0x00000000,
 		0x087, 0x00044980,
 		0x088, 0x00048000,
 		0x089, 0x0000D480,
@@ -1409,36 +1413,32 @@ u32 RTL8821AE_RADIOA_ARRAY[] = {
 		0x03C, 0x000CA000,
 		0x0EF, 0x00000000,
 		0x0EF, 0x00001100,
-	0xFF0F0104, 0xABCD,
+	0x80000111,	0x00000000,	0x40000000,	0x00000000,
 		0x034, 0x0004ADF3,
 		0x034, 0x00049DF0,
-	0xFF0F0204, 0xCDEF,
+	0x90000110,	0x00000000,	0x40000000,	0x00000000,
 		0x034, 0x0004ADF3,
 		0x034, 0x00049DF0,
-	0xFF0F0404, 0xCDEF,
-		0x034, 0x0004ADF3,
-		0x034, 0x00049DF0,
-	0xFF0F0200, 0xCDEF,
+	0x90000210,	0x00000000,	0x40000000,	0x00000000,
 		0x034, 0x0004ADF5,
 		0x034, 0x00049DF2,
-	0xFF0F02C0, 0xCDEF,
+	0x9000020c,	0x00000000,	0x40000000,	0x00000000,
+		0x034, 0x0004A0F3,
+		0x034, 0x000490B1,
+		0x9000040c,	0x00000000,	0x40000000,	0x00000000,
 		0x034, 0x0004A0F3,
 		0x034, 0x000490B1,
-	0xCDCDCDCD, 0xCDCD,
+	0x90000200,	0x00000000,	0x40000000,	0x00000000,
+		0x034, 0x0004ADF5,
+		0x034, 0x00049DF2,
+	0x90000410,	0x00000000,	0x40000000,	0x00000000,
+		0x034, 0x0004ADF3,
+		0x034, 0x00049DF0,
+	0xA0000000,	0x00000000,
 		0x034, 0x0004ADF7,
 		0x034, 0x00049DF3,
-	0xFF0F0104, 0xDEAD,
-	0xFF0F0104, 0xABCD,
-		0x034, 0x00048DED,
-		0x034, 0x00047DEA,
-		0x034, 0x00046DE7,
-		0x034, 0x00045CE9,
-		0x034, 0x00044CE6,
-		0x034, 0x000438C6,
-		0x034, 0x00042886,
-		0x034, 0x00041486,
-		0x034, 0x00040447,
-	0xFF0F0204, 0xCDEF,
+	0xB0000000,	0x00000000,
+	0x80000111,	0x00000000,	0x40000000,	0x00000000,
 		0x034, 0x00048DED,
 		0x034, 0x00047DEA,
 		0x034, 0x00046DE7,
@@ -1448,7 +1448,7 @@ u32 RTL8821AE_RADIOA_ARRAY[] = {
 		0x034, 0x00042886,
 		0x034, 0x00041486,
 		0x034, 0x00040447,
-	0xFF0F0404, 0xCDEF,
+	0x90000110,	0x00000000,	0x40000000,	0x00000000,
 		0x034, 0x00048DED,
 		0x034, 0x00047DEA,
 		0x034, 0x00046DE7,
@@ -1458,7 +1458,17 @@ u32 RTL8821AE_RADIOA_ARRAY[] = {
 		0x034, 0x00042886,
 		0x034, 0x00041486,
 		0x034, 0x00040447,
-	0xFF0F02C0, 0xCDEF,
+	0x9000020c,	0x00000000,	0x40000000,	0x00000000,
+		0x034, 0x000480AE,
+		0x034, 0x000470AB,
+		0x034, 0x0004608B,
+		0x034, 0x00045069,
+		0x034, 0x00044048,
+		0x034, 0x00043045,
+		0x034, 0x00042026,
+		0x034, 0x00041023,
+		0x034, 0x00040002,
+	0x9000040c,	0x00000000,	0x40000000,	0x00000000,
 		0x034, 0x000480AE,
 		0x034, 0x000470AB,
 		0x034, 0x0004608B,
@@ -1468,7 +1478,17 @@ u32 RTL8821AE_RADIOA_ARRAY[] = {
 		0x034, 0x00042026,
 		0x034, 0x00041023,
 		0x034, 0x00040002,
-	0xCDCDCDCD, 0xCDCD,
+	0x90000410,	0x00000000,	0x40000000,	0x00000000,
+		0x034, 0x00048DED,
+		0x034, 0x00047DEA,
+		0x034, 0x00046DE7,
+		0x034, 0x00045CE9,
+		0x034, 0x00044CE6,
+		0x034, 0x000438C6,
+		0x034, 0x00042886,
+		0x034, 0x00041486,
+		0x034, 0x00040447,
+	0xA0000000,	0x00000000,
 		0x034, 0x00048DEF,
 		0x034, 0x00047DEC,
 		0x034, 0x00046DE9,
@@ -1478,38 +1498,36 @@ u32 RTL8821AE_RADIOA_ARRAY[] = {
 		0x034, 0x0004248A,
 		0x034, 0x0004108D,
 		0x034, 0x0004008A,
-	0xFF0F0104, 0xDEAD,
-	0xFF0F0200, 0xABCD,
+	0xB0000000,	0x00000000,
+	0x80000210,	0x00000000,	0x40000000,	0x00000000,
 		0x034, 0x0002ADF4,
-	0xFF0F02C0, 0xCDEF,
+	0x9000020c,	0x00000000,	0x40000000,	0x00000000,
+		0x034, 0x0002A0F3,
+	0x9000040c,	0x00000000,	0x40000000,	0x00000000,
 		0x034, 0x0002A0F3,
-	0xCDCDCDCD, 0xCDCD,
+	0x90000200,	0x00000000,	0x40000000,	0x00000000,
+		0x034, 0x0002ADF4,
+	0xA0000000,	0x00000000,
 		0x034, 0x0002ADF7,
-	0xFF0F0200, 0xDEAD,
-	0xFF0F0104, 0xABCD,
-		0x034, 0x00029DF4,
-	0xFF0F0204, 0xCDEF,
+	0xB0000000,	0x00000000,
+	0x80000111,	0x00000000,	0x40000000,	0x00000000,
 		0x034, 0x00029DF4,
-	0xFF0F0404, 0xCDEF,
+	0x90000110,	0x00000000,	0x40000000,	0x00000000,
 		0x034, 0x00029DF4,
-	0xFF0F0200, 0xCDEF,
+	0x90000210,	0x00000000,	0x40000000,	0x00000000,
 		0x034, 0x00029DF1,
-	0xFF0F02C0, 0xCDEF,
+	0x9000020c,	0x00000000,	0x40000000,	0x00000000,
+		0x034, 0x000290F0,
+	0x9000040c,	0x00000000,	0x40000000,	0x00000000,
 		0x034, 0x000290F0,
-	0xCDCDCDCD, 0xCDCD,
+	0x90000200,	0x00000000,	0x40000000,	0x00000000,
+		0x034, 0x00029DF1,
+	0x90000410,	0x00000000,	0x40000000,	0x00000000,
+		0x034, 0x00029DF4,
+	0xA0000000,	0x00000000,
 		0x034, 0x00029DF2,
-	0xFF0F0104, 0xDEAD,
-	0xFF0F0104, 0xABCD,
-		0x034, 0x00028DF1,
-		0x034, 0x00027DEE,
-		0x034, 0x00026DEB,
-		0x034, 0x00025CEC,
-		0x034, 0x00024CE9,
-		0x034, 0x000238CA,
-		0x034, 0x00022889,
-		0x034, 0x00021489,
-		0x034, 0x0002044A,
-	0xFF0F0204, 0xCDEF,
+	0xB0000000,	0x00000000,
+	0x80000111,	0x00000000,	0x40000000,	0x00000000,
 		0x034, 0x00028DF1,
 		0x034, 0x00027DEE,
 		0x034, 0x00026DEB,
@@ -1519,7 +1537,7 @@ u32 RTL8821AE_RADIOA_ARRAY[] = {
 		0x034, 0x00022889,
 		0x034, 0x00021489,
 		0x034, 0x0002044A,
-	0xFF0F0404, 0xCDEF,
+	0x90000110,	0x00000000,	0x40000000,	0x00000000,
 		0x034, 0x00028DF1,
 		0x034, 0x00027DEE,
 		0x034, 0x00026DEB,
@@ -1529,7 +1547,7 @@ u32 RTL8821AE_RADIOA_ARRAY[] = {
 		0x034, 0x00022889,
 		0x034, 0x00021489,
 		0x034, 0x0002044A,
-	0xFF0F02C0, 0xCDEF,
+	0x9000020c,	0x00000000,	0x40000000,	0x00000000,
 		0x034, 0x000280AF,
 		0x034, 0x000270AC,
 		0x034, 0x0002608B,
@@ -1539,7 +1557,27 @@ u32 RTL8821AE_RADIOA_ARRAY[] = {
 		0x034, 0x00022026,
 		0x034, 0x00021023,
 		0x034, 0x00020002,
-	0xCDCDCDCD, 0xCDCD,
+	0x9000040c,	0x00000000,	0x40000000,	0x00000000,
+		0x034, 0x000280AF,
+		0x034, 0x000270AC,
+		0x034, 0x0002608B,
+		0x034, 0x00025069,
+		0x034, 0x00024048,
+		0x034, 0x00023045,
+		0x034, 0x00022026,
+		0x034, 0x00021023,
+		0x034, 0x00020002,
+	0x90000410,	0x00000000,	0x40000000,	0x00000000,
+		0x034, 0x00028DF1,
+		0x034, 0x00027DEE,
+		0x034, 0x00026DEB,
+		0x034, 0x00025CEC,
+		0x034, 0x00024CE9,
+		0x034, 0x000238CA,
+		0x034, 0x00022889,
+		0x034, 0x00021489,
+		0x034, 0x0002044A,
+	0xA0000000,	0x00000000,
 		0x034, 0x00028DEE,
 		0x034, 0x00027DEB,
 		0x034, 0x00026CCD,
@@ -1549,27 +1587,24 @@ u32 RTL8821AE_RADIOA_ARRAY[] = {
 		0x034, 0x00022849,
 		0x034, 0x00021449,
 		0x034, 0x0002004D,
-	0xFF0F0104, 0xDEAD,
-	0xFF0F02C0, 0xABCD,
+	0xB0000000,	0x00000000,
+	0x8000020c,	0x00000000,	0x40000000,	0x00000000,
+		0x034, 0x0000A0D7,
+		0x034, 0x000090D3,
+		0x034, 0x000080B1,
+		0x034, 0x000070AE,
+	0x9000040c,	0x00000000,	0x40000000,	0x00000000,
 		0x034, 0x0000A0D7,
 		0x034, 0x000090D3,
 		0x034, 0x000080B1,
 		0x034, 0x000070AE,
-	0xCDCDCDCD, 0xCDCD,
+	0xA0000000,	0x00000000,
 		0x034, 0x0000ADF7,
 		0x034, 0x00009DF4,
 		0x034, 0x00008DF1,
 		0x034, 0x00007DEE,
-	0xFF0F02C0, 0xDEAD,
-	0xFF0F0104, 0xABCD,
-		0x034, 0x00006DEB,
-		0x034, 0x00005CEC,
-		0x034, 0x00004CE9,
-		0x034, 0x000038CA,
-		0x034, 0x00002889,
-		0x034, 0x00001489,
-		0x034, 0x0000044A,
-	0xFF0F0204, 0xCDEF,
+	0xB0000000,	0x00000000,
+	0x80000111,	0x00000000,	0x40000000,	0x00000000,
 		0x034, 0x00006DEB,
 		0x034, 0x00005CEC,
 		0x034, 0x00004CE9,
@@ -1577,7 +1612,7 @@ u32 RTL8821AE_RADIOA_ARRAY[] = {
 		0x034, 0x00002889,
 		0x034, 0x00001489,
 		0x034, 0x0000044A,
-	0xFF0F0404, 0xCDEF,
+	0x90000110,	0x00000000,	0x40000000,	0x00000000,
 		0x034, 0x00006DEB,
 		0x034, 0x00005CEC,
 		0x034, 0x00004CE9,
@@ -1585,7 +1620,7 @@ u32 RTL8821AE_RADIOA_ARRAY[] = {
 		0x034, 0x00002889,
 		0x034, 0x00001489,
 		0x034, 0x0000044A,
-	0xFF0F02C0, 0xCDEF,
+	0x9000020c,	0x00000000,	0x40000000,	0x00000000,
 		0x034, 0x0000608D,
 		0x034, 0x0000506B,
 		0x034, 0x0000404A,
@@ -1593,7 +1628,23 @@ u32 RTL8821AE_RADIOA_ARRAY[] = {
 		0x034, 0x00002044,
 		0x034, 0x00001025,
 		0x034, 0x00000004,
-	0xCDCDCDCD, 0xCDCD,
+	0x9000040c,	0x00000000,	0x40000000,	0x00000000,
+		0x034, 0x0000608D,
+		0x034, 0x0000506B,
+		0x034, 0x0000404A,
+		0x034, 0x00003047,
+		0x034, 0x00002044,
+		0x034, 0x00001025,
+		0x034, 0x00000004,
+	0x90000410,	0x00000000,	0x40000000,	0x00000000,
+		0x034, 0x00006DEB,
+		0x034, 0x00005CEC,
+		0x034, 0x00004CE9,
+		0x034, 0x000038CA,
+		0x034, 0x00002889,
+		0x034, 0x00001489,
+		0x034, 0x0000044A,
+	0xA0000000,	0x00000000,
 		0x034, 0x00006DCD,
 		0x034, 0x00005CCD,
 		0x034, 0x00004CCA,
@@ -1601,11 +1652,11 @@ u32 RTL8821AE_RADIOA_ARRAY[] = {
 		0x034, 0x00002888,
 		0x034, 0x00001488,
 		0x034, 0x00000486,
-	0xFF0F0104, 0xDEAD,
+	0xB0000000,	0x00000000,
 		0x0EF, 0x00000000,
 		0x018, 0x0001712A,
 		0x0EF, 0x00000040,
-	0xFF0F0104, 0xABCD,
+	0x80000111,	0x00000000,	0x40000000,	0x00000000,
 		0x035, 0x00000187,
 		0x035, 0x00008187,
 		0x035, 0x00010187,
@@ -1615,7 +1666,7 @@ u32 RTL8821AE_RADIOA_ARRAY[] = {
 		0x035, 0x00040188,
 		0x035, 0x00048188,
 		0x035, 0x00050188,
-	0xFF0F0204, 0xCDEF,
+	0x90000110,	0x00000000,	0x40000000,	0x00000000,
 		0x035, 0x00000187,
 		0x035, 0x00008187,
 		0x035, 0x00010187,
@@ -1625,7 +1676,37 @@ u32 RTL8821AE_RADIOA_ARRAY[] = {
 		0x035, 0x00040188,
 		0x035, 0x00048188,
 		0x035, 0x00050188,
-	0xFF0F0404, 0xCDEF,
+	0x90000210,	0x00000000,	0x40000000,	0x00000000,
+		0x035, 0x00000128,
+		0x035, 0x00008128,
+		0x035, 0x00010128,
+		0x035, 0x000201C8,
+		0x035, 0x000281C8,
+		0x035, 0x000301C8,
+		0x035, 0x000401C8,
+		0x035, 0x000481C8,
+		0x035, 0x000501C8,
+	0x9000040c,	0x00000000,	0x40000000,	0x00000000,
+		0x035, 0x00000145,
+		0x035, 0x00008145,
+		0x035, 0x00010145,
+		0x035, 0x00020196,
+		0x035, 0x00028196,
+		0x035, 0x00030196,
+		0x035, 0x000401C7,
+		0x035, 0x000481C7,
+		0x035, 0x000501C7,
+	0x90000200,	0x00000000,	0x40000000,	0x00000000,
+		0x035, 0x00000128,
+		0x035, 0x00008128,
+		0x035, 0x00010128,
+		0x035, 0x000201C8,
+		0x035, 0x000281C8,
+		0x035, 0x000301C8,
+		0x035, 0x000401C8,
+		0x035, 0x000481C8,
+		0x035, 0x000501C8,
+	0x90000410,	0x00000000,	0x40000000,	0x00000000,
 		0x035, 0x00000187,
 		0x035, 0x00008187,
 		0x035, 0x00010187,
@@ -1635,7 +1716,7 @@ u32 RTL8821AE_RADIOA_ARRAY[] = {
 		0x035, 0x00040188,
 		0x035, 0x00048188,
 		0x035, 0x00050188,
-	0xCDCDCDCD, 0xCDCD,
+	0xA0000000,	0x00000000,
 		0x035, 0x00000145,
 		0x035, 0x00008145,
 		0x035, 0x00010145,
@@ -1645,11 +1726,11 @@ u32 RTL8821AE_RADIOA_ARRAY[] = {
 		0x035, 0x000401C7,
 		0x035, 0x000481C7,
 		0x035, 0x000501C7,
-	0xFF0F0104, 0xDEAD,
+	0xB0000000,	0x00000000,
 		0x0EF, 0x00000000,
 		0x018, 0x0001712A,
 		0x0EF, 0x00000010,
-	0xFF0F0104, 0xABCD,
+	0x80000111,	0x00000000,	0x40000000,	0x00000000,
 		0x036, 0x00085733,
 		0x036, 0x0008D733,
 		0x036, 0x00095733,
@@ -1662,7 +1743,7 @@ u32 RTL8821AE_RADIOA_ARRAY[] = {
 		0x036, 0x000CE4B4,
 		0x036, 0x000D64B4,
 		0x036, 0x000DE4B4,
-	0xFF0F0204, 0xCDEF,
+	0x90000110,	0x00000000,	0x40000000,	0x00000000,
 		0x036, 0x00085733,
 		0x036, 0x0008D733,
 		0x036, 0x00095733,
@@ -1675,7 +1756,46 @@ u32 RTL8821AE_RADIOA_ARRAY[] = {
 		0x036, 0x000CE4B4,
 		0x036, 0x000D64B4,
 		0x036, 0x000DE4B4,
-	0xFF0F0404, 0xCDEF,
+	0x90000210,	0x00000000,	0x40000000,	0x00000000,
+		0x036, 0x000063B5,
+		0x036, 0x0000E3B5,
+		0x036, 0x000163B5,
+		0x036, 0x0001E3B5,
+		0x036, 0x000263B5,
+		0x036, 0x0002E3B5,
+		0x036, 0x000363B5,
+		0x036, 0x0003E3B5,
+		0x036, 0x000463B5,
+		0x036, 0x0004E3B5,
+		0x036, 0x000563B5,
+		0x036, 0x0005E3B5,
+	0x9000040c,	0x00000000,	0x40000000,	0x00000000,
+		0x036, 0x000056B3,
+		0x036, 0x0000D6B3,
+		0x036, 0x000156B3,
+		0x036, 0x0001D6B3,
+		0x036, 0x00026634,
+		0x036, 0x0002E634,
+		0x036, 0x00036634,
+		0x036, 0x0003E634,
+		0x036, 0x000467B4,
+		0x036, 0x0004E7B4,
+		0x036, 0x000567B4,
+		0x036, 0x0005E7B4,
+	0x90000200,	0x00000000,	0x40000000,	0x00000000,
+		0x036, 0x000063B5,
+		0x036, 0x0000E3B5,
+		0x036, 0x000163B5,
+		0x036, 0x0001E3B5,
+		0x036, 0x000263B5,
+		0x036, 0x0002E3B5,
+		0x036, 0x000363B5,
+		0x036, 0x0003E3B5,
+		0x036, 0x000463B5,
+		0x036, 0x0004E3B5,
+		0x036, 0x000563B5,
+		0x036, 0x0005E3B5,
+	0x90000410,	0x00000000,	0x40000000,	0x00000000,
 		0x036, 0x00085733,
 		0x036, 0x0008D733,
 		0x036, 0x00095733,
@@ -1688,7 +1808,7 @@ u32 RTL8821AE_RADIOA_ARRAY[] = {
 		0x036, 0x000CE4B4,
 		0x036, 0x000D64B4,
 		0x036, 0x000DE4B4,
-	0xCDCDCDCD, 0xCDCD,
+	0xA0000000,	0x00000000,
 		0x036, 0x000056B3,
 		0x036, 0x0000D6B3,
 		0x036, 0x000156B3,
@@ -1701,103 +1821,162 @@ u32 RTL8821AE_RADIOA_ARRAY[] = {
 		0x036, 0x0004E7B4,
 		0x036, 0x000567B4,
 		0x036, 0x0005E7B4,
-	0xFF0F0104, 0xDEAD,
+	0xB0000000,	0x00000000,
 		0x0EF, 0x00000000,
 		0x0EF, 0x00000008,
-	0xFF0F0104, 0xABCD,
+	0x80000111,	0x00000000,	0x40000000,	0x00000000,
 		0x03C, 0x000001C8,
 		0x03C, 0x00000492,
-	0xFF0F0204, 0xCDEF,
+	0x90000110,	0x00000000,	0x40000000,	0x00000000,
 		0x03C, 0x000001C8,
 		0x03C, 0x00000492,
-	0xFF0F0404, 0xCDEF,
+	0x90000210,	0x00000000,	0x40000000,	0x00000000,
+		0x03C, 0x000001B6,
+		0x03C, 0x00000492,
+	0x9000040c,	0x00000000,	0x40000000,	0x00000000,
+		0x03C, 0x0000022A,
+		0x03C, 0x00000594,
+	0x90000200,	0x00000000,	0x40000000,	0x00000000,
+		0x03C, 0x000001B6,
+		0x03C, 0x00000492,
+	0x90000410,	0x00000000,	0x40000000,	0x00000000,
 		0x03C, 0x000001C8,
 		0x03C, 0x00000492,
-	0xCDCDCDCD, 0xCDCD,
+	0xA0000000,	0x00000000,
 		0x03C, 0x0000022A,
 		0x03C, 0x00000594,
-	0xFF0F0104, 0xDEAD,
-	0xFF0F0104, 0xABCD,
+	0xB0000000,	0x00000000,
+	0x80000111,	0x00000000,	0x40000000,	0x00000000,
 		0x03C, 0x00000800,
-	0xFF0F0204, 0xCDEF,
+	0x90000110,	0x00000000,	0x40000000,	0x00000000,
 		0x03C, 0x00000800,
-	0xFF0F0404, 0xCDEF,
+	0x90000210,	0x00000000,	0x40000000,	0x00000000,
 		0x03C, 0x00000800,
-	0xFF0F02C0, 0xCDEF,
+	0x9000020c,	0x00000000,	0x40000000,	0x00000000,
 		0x03C, 0x00000820,
-	0xCDCDCDCD, 0xCDCD,
+	0x9000040c,	0x00000000,	0x40000000,	0x00000000,
+		0x03C, 0x00000820,
+	0x90000200,	0x00000000,	0x40000000,	0x00000000,
+		0x03C, 0x00000800,
+	0x90000410,	0x00000000,	0x40000000,	0x00000000,
+		0x03C, 0x00000800,
+	0xA0000000,	0x00000000,
 		0x03C, 0x00000900,
-	0xFF0F0104, 0xDEAD,
+	0xB0000000,	0x00000000,
 		0x0EF, 0x00000000,
 		0x018, 0x0001712A,
 		0x0EF, 0x00000002,
-	0xFF0F0104, 0xABCD,
+	0x80000111,	0x00000000,	0x40000000,	0x00000000,
 		0x008, 0x0004E400,
-	0xFF0F0204, 0xCDEF,
+	0x90000110,	0x00000000,	0x40000000,	0x00000000,
 		0x008, 0x0004E400,
-	0xFF0F0404, 0xCDEF,
+	0x90000210,	0x00000000,	0x40000000,	0x00000000,
+		0x008, 0x00002000,
+	0x9000020c,	0x00000000,	0x40000000,	0x00000000,
+		0x008, 0x00002000,
+	0x9000040c,	0x00000000,	0x40000000,	0x00000000,
+		0x008, 0x00002000,
+	0x90000200,	0x00000000,	0x40000000,	0x00000000,
+		0x008, 0x00002000,
+	0x90000410,	0x00000000,	0x40000000,	0x00000000,
 		0x008, 0x0004E400,
-	0xCDCDCDCD, 0xCDCD,
+	0xA0000000,	0x00000000,
 		0x008, 0x00002000,
-	0xFF0F0104, 0xDEAD,
+	0xB0000000,	0x00000000,
 		0x0EF, 0x00000000,
 		0x0DF, 0x000000C0,
-		0x01F, 0x00040064,
-	0xFF0F0104, 0xABCD,
+		0x01F, 0x00000064,
+	0x80000111,	0x00000000,	0x40000000,	0x00000000,
 		0x058, 0x000A7284,
 		0x059, 0x000600EC,
-	0xFF0F0204, 0xCDEF,
+	0x90000110,	0x00000000,	0x40000000,	0x00000000,
 		0x058, 0x000A7284,
 		0x059, 0x000600EC,
-	0xFF0F0404, 0xCDEF,
+	0x9000020c,	0x00000000,	0x40000000,	0x00000000,
+		0x058, 0x00081184,
+		0x059, 0x0006016C,
+	0x9000040c,	0x00000000,	0x40000000,	0x00000000,
+		0x058, 0x00081184,
+		0x059, 0x0006016C,
+	0x90000200,	0x00000000,	0x40000000,	0x00000000,
+		0x058, 0x00081184,
+		0x059, 0x0006016C,
+	0x90000410,	0x00000000,	0x40000000,	0x00000000,
 		0x058, 0x000A7284,
 		0x059, 0x000600EC,
-	0xCDCDCDCD, 0xCDCD,
+	0xA0000000,	0x00000000,
 		0x058, 0x00081184,
 		0x059, 0x0006016C,
-	0xFF0F0104, 0xDEAD,
-	0xFF0F0104, 0xABCD,
+	0xB0000000,	0x00000000,
+	0x80000111,	0x00000000,	0x40000000,	0x00000000,
 		0x061, 0x000E8D73,
 		0x062, 0x00093FC5,
-	0xFF0F0204, 0xCDEF,
+	0x90000110,	0x00000000,	0x40000000,	0x00000000,
 		0x061, 0x000E8D73,
 		0x062, 0x00093FC5,
-	0xFF0F0404, 0xCDEF,
+	0x90000210,	0x00000000,	0x40000000,	0x00000000,
+		0x061, 0x000EFD83,
+		0x062, 0x00093FCC,
+	0x9000040c,	0x00000000,	0x40000000,	0x00000000,
+		0x061, 0x000EAD53,
+		0x062, 0x00093BC4,
+	0x90000200,	0x00000000,	0x40000000,	0x00000000,
+		0x061, 0x000EFD83,
+		0x062, 0x00093FCC,
+	0x90000410,	0x00000000,	0x40000000,	0x00000000,
 		0x061, 0x000E8D73,
 		0x062, 0x00093FC5,
-	0xCDCDCDCD, 0xCDCD,
+	0xA0000000,	0x00000000,
 		0x061, 0x000EAD53,
 		0x062, 0x00093BC4,
-	0xFF0F0104, 0xDEAD,
-	0xFF0F0104, 0xABCD,
+	0xB0000000,	0x00000000,
+	0x80000111,	0x00000000,	0x40000000,	0x00000000,
 		0x063, 0x000110E9,
-	0xFF0F0204, 0xCDEF,
+	0x90000110,	0x00000000,	0x40000000,	0x00000000,
 		0x063, 0x000110E9,
-	0xFF0F0404, 0xCDEF,
+	0x90000210,	0x00000000,	0x40000000,	0x00000000,
+		0x063, 0x000110EB,
+	0x9000020c,	0x00000000,	0x40000000,	0x00000000,
 		0x063, 0x000110E9,
-	0xFF0F0200, 0xCDEF,
-		0x063, 0x000710E9,
-	0xFF0F02C0, 0xCDEF,
+	0x9000040c,	0x00000000,	0x40000000,	0x00000000,
 		0x063, 0x000110E9,
-	0xCDCDCDCD, 0xCDCD,
+	0x90000200,	0x00000000,	0x40000000,	0x00000000,
+		0x063, 0x000110EB,
+	0x90000410,	0x00000000,	0x40000000,	0x00000000,
+		0x063, 0x000110E9,
+	0xA0000000,	0x00000000,
 		0x063, 0x000714E9,
-	0xFF0F0104, 0xDEAD,
-	0xFF0F0104, 0xABCD,
+	0xB0000000,	0x00000000,
+	0x80000111,	0x00000000,	0x40000000,	0x00000000,
+		0x064, 0x0001C27C,
+	0x90000110,	0x00000000,	0x40000000,	0x00000000,
+		0x064, 0x0001C27C,
+	0x90000210,	0x00000000,	0x40000000,	0x00000000,
 		0x064, 0x0001C27C,
-	0xFF0F0204, 0xCDEF,
+	0x9000040c,	0x00000000,	0x40000000,	0x00000000,
+		0x064, 0x0001C67C,
+	0x90000200,	0x00000000,	0x40000000,	0x00000000,
 		0x064, 0x0001C27C,
-	0xFF0F0404, 0xCDEF,
+	0x90000410,	0x00000000,	0x40000000,	0x00000000,
 		0x064, 0x0001C27C,
-	0xCDCDCDCD, 0xCDCD,
+	0xA0000000,	0x00000000,
 		0x064, 0x0001C67C,
-	0xFF0F0104, 0xDEAD,
-	0xFF0F0200, 0xABCD,
+	0xB0000000,	0x00000000,
+	0x80000111,	0x00000000,	0x40000000,	0x00000000,
+		0x065, 0x00091016,
+	0x90000110,	0x00000000,	0x40000000,	0x00000000,
+		0x065, 0x00091016,
+	0x90000210,	0x00000000,	0x40000000,	0x00000000,
 		0x065, 0x00093016,
-	0xFF0F02C0, 0xCDEF,
+		0x9000020c,	0x00000000,	0x40000000,	0x00000000,
 		0x065, 0x00093015,
-	0xCDCDCDCD, 0xCDCD,
+		0x9000040c,	0x00000000,	0x40000000,	0x00000000,
+		0x065, 0x00093015,
+		0x90000200,	0x00000000,	0x40000000,	0x00000000,
+		0x065, 0x00093016,
+		0xA0000000,	0x00000000,
 		0x065, 0x00091016,
-	0xFF0F0200, 0xDEAD,
+		0xB0000000,	0x00000000,
 		0x018, 0x00000006,
 		0x0EF, 0x00002000,
 		0x03B, 0x0003824B,
@@ -1895,9 +2074,10 @@ u32 RTL8821AE_RADIOA_ARRAY[] = {
 		0x0B4, 0x0001214C,
 		0x0B7, 0x0003000C,
 		0x01C, 0x000539D2,
+		0x0C4, 0x000AFE00,
 		0x018, 0x0001F12A,
-		0x0FE, 0x00000000,
-		0x0FE, 0x00000000,
+		0xFFE, 0x00000000,
+		0xFFE, 0x00000000,
 		0x018, 0x0001712A,
 
 };
@@ -2017,6 +2197,7 @@ u32 RTL8812AE_MAC_REG_ARRAY[] = {
 u32 RTL8812AE_MAC_1T_ARRAYLEN = ARRAY_SIZE(RTL8812AE_MAC_REG_ARRAY);
 
 u32 RTL8821AE_MAC_REG_ARRAY[] = {
+		0x421, 0x0000000F,
 		0x428, 0x0000000A,
 		0x429, 0x00000010,
 		0x430, 0x00000000,
@@ -2485,7 +2666,7 @@ u32 RTL8821AE_AGC_TAB_ARRAY[] = {
 		0x81C, 0xA6360001,
 		0x81C, 0xA5380001,
 		0x81C, 0xA43A0001,
-		0x81C, 0xA33C0001,
+		0x81C, 0x683C0001,
 		0x81C, 0x673E0001,
 		0x81C, 0x66400001,
 		0x81C, 0x65420001,
@@ -2519,7 +2700,66 @@ u32 RTL8821AE_AGC_TAB_ARRAY[] = {
 		0x81C, 0x017A0001,
 		0x81C, 0x017C0001,
 		0x81C, 0x017E0001,
-	0xFF0F02C0, 0xABCD,
+	0x8000020c,	0x00000000,	0x40000000,	0x00000000,
+		0x81C, 0xFB000101,
+		0x81C, 0xFA020101,
+		0x81C, 0xF9040101,
+		0x81C, 0xF8060101,
+		0x81C, 0xF7080101,
+		0x81C, 0xF60A0101,
+		0x81C, 0xF50C0101,
+		0x81C, 0xF40E0101,
+		0x81C, 0xF3100101,
+		0x81C, 0xF2120101,
+		0x81C, 0xF1140101,
+		0x81C, 0xF0160101,
+		0x81C, 0xEF180101,
+		0x81C, 0xEE1A0101,
+		0x81C, 0xED1C0101,
+		0x81C, 0xEC1E0101,
+		0x81C, 0xEB200101,
+		0x81C, 0xEA220101,
+		0x81C, 0xE9240101,
+		0x81C, 0xE8260101,
+		0x81C, 0xE7280101,
+		0x81C, 0xE62A0101,
+		0x81C, 0xE52C0101,
+		0x81C, 0xE42E0101,
+		0x81C, 0xE3300101,
+		0x81C, 0xA5320101,
+		0x81C, 0xA4340101,
+		0x81C, 0xA3360101,
+		0x81C, 0x87380101,
+		0x81C, 0x863A0101,
+		0x81C, 0x853C0101,
+		0x81C, 0x843E0101,
+		0x81C, 0x69400101,
+		0x81C, 0x68420101,
+		0x81C, 0x67440101,
+		0x81C, 0x66460101,
+		0x81C, 0x49480101,
+		0x81C, 0x484A0101,
+		0x81C, 0x474C0101,
+		0x81C, 0x2A4E0101,
+		0x81C, 0x29500101,
+		0x81C, 0x28520101,
+		0x81C, 0x27540101,
+		0x81C, 0x26560101,
+		0x81C, 0x25580101,
+		0x81C, 0x245A0101,
+		0x81C, 0x235C0101,
+		0x81C, 0x055E0101,
+		0x81C, 0x04600101,
+		0x81C, 0x03620101,
+		0x81C, 0x02640101,
+		0x81C, 0x01660101,
+		0x81C, 0x01680101,
+		0x81C, 0x016A0101,
+		0x81C, 0x016C0101,
+		0x81C, 0x016E0101,
+		0x81C, 0x01700101,
+		0x81C, 0x01720101,
+	0x9000040c,	0x00000000,	0x40000000,	0x00000000,
 		0x81C, 0xFB000101,
 		0x81C, 0xFA020101,
 		0x81C, 0xF9040101,
@@ -2578,7 +2818,7 @@ u32 RTL8821AE_AGC_TAB_ARRAY[] = {
 		0x81C, 0x016E0101,
 		0x81C, 0x01700101,
 		0x81C, 0x01720101,
-	0xCDCDCDCD, 0xCDCD,
+	0xA0000000,	0x00000000,
 		0x81C, 0xFF000101,
 		0x81C, 0xFF020101,
 		0x81C, 0xFE040101,
@@ -2637,7 +2877,7 @@ u32 RTL8821AE_AGC_TAB_ARRAY[] = {
 		0x81C, 0x046E0101,
 		0x81C, 0x03700101,
 		0x81C, 0x02720101,
-	0xFF0F02C0, 0xDEAD,
+	0xB0000000,	0x00000000,
 		0x81C, 0x01740101,
 		0x81C, 0x01760101,
 		0x81C, 0x01780101,
-- 
cgit v1.2.3


From a221d0afbf39fcaadd481acf3551cd7269b647c2 Mon Sep 17 00:00:00 2001
From: wengjianfeng <wengjianfeng@yulong.com>
Date: Tue, 23 Feb 2021 14:57:54 +0800
Subject: qtnfmac: remove meaningless labels

some function's label meaningless, the return statement follows
the goto statement, so just remove it.

Signed-off-by: wengjianfeng <wengjianfeng@yulong.com>
Reviewed-by: Sergey Matyukevich <geomatsi@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210223065754.34392-1-samirweng1979@163.com
---
 drivers/net/wireless/quantenna/qtnfmac/cfg80211.c | 27 +++++------------------
 1 file changed, 6 insertions(+), 21 deletions(-)

diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c
index 504b4d0b98c4..84b15a655eab 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c
+++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c
@@ -680,13 +680,10 @@ qtnf_connect(struct wiphy *wiphy, struct net_device *dev,
 		eth_zero_addr(vif->bssid);
 
 	ret = qtnf_cmd_send_connect(vif, sme);
-	if (ret) {
+	if (ret)
 		pr_err("VIF%u.%u: failed to connect\n",
 		       vif->mac->macid, vif->vifid);
-		goto out;
-	}
 
-out:
 	return ret;
 }
 
@@ -702,13 +699,10 @@ qtnf_external_auth(struct wiphy *wiphy, struct net_device *dev,
 		pr_warn("unexpected bssid: %pM", auth->bssid);
 
 	ret = qtnf_cmd_send_external_auth(vif, auth);
-	if (ret) {
+	if (ret)
 		pr_err("VIF%u.%u: failed to report external auth\n",
 		       vif->mac->macid, vif->vifid);
-		goto out;
-	}
 
-out:
 	return ret;
 }
 
@@ -727,8 +721,7 @@ qtnf_disconnect(struct wiphy *wiphy, struct net_device *dev,
 	}
 
 	if (vif->wdev.iftype != NL80211_IFTYPE_STATION) {
-		ret = -EOPNOTSUPP;
-		goto out;
+		return -EOPNOTSUPP;
 	}
 
 	ret = qtnf_cmd_send_disconnect(vif, reason_code);
@@ -742,7 +735,6 @@ qtnf_disconnect(struct wiphy *wiphy, struct net_device *dev,
 				      NULL, 0, true, GFP_KERNEL);
 	}
 
-out:
 	return ret;
 }
 
@@ -935,13 +927,10 @@ static int qtnf_update_owe_info(struct wiphy *wiphy, struct net_device *dev,
 		return -EOPNOTSUPP;
 
 	ret = qtnf_cmd_send_update_owe(vif, owe_info);
-	if (ret) {
+	if (ret)
 		pr_err("VIF%u.%u: failed to update owe info\n",
 		       vif->mac->macid, vif->vifid);
-		goto out;
-	}
 
-out:
 	return ret;
 }
 
@@ -987,18 +976,14 @@ static int qtnf_resume(struct wiphy *wiphy)
 	vif = qtnf_mac_get_base_vif(mac);
 	if (!vif) {
 		pr_err("MAC%u: primary VIF is not configured\n", mac->macid);
-		ret = -EFAULT;
-		goto exit;
+		return -EFAULT;
 	}
 
 	ret = qtnf_cmd_send_wowlan_set(vif, NULL);
-	if (ret) {
+	if (ret)
 		pr_err("MAC%u: failed to reset WoWLAN triggers\n",
 		       mac->macid);
-		goto exit;
-	}
 
-exit:
 	return ret;
 }
 
-- 
cgit v1.2.3


From 2377b1c49d4831c09af269cdbe21791e8945cf97 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Sun, 21 Mar 2021 01:14:26 +0530
Subject: rtlwifi: Few mundane typo fixes

s/resovle/resolve/
s/broadcase/broadcast/
s/sytem/system/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210320194426.21621-1-unixbhaskar@gmail.com
---
 drivers/net/wireless/realtek/rtlwifi/core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c
index 965bd9589045..c9b6ee81dcb9 100644
--- a/drivers/net/wireless/realtek/rtlwifi/core.c
+++ b/drivers/net/wireless/realtek/rtlwifi/core.c
@@ -564,7 +564,7 @@ static int rtl_op_resume(struct ieee80211_hw *hw)
 	rtlhal->enter_pnp_sleep = false;
 	rtlhal->wake_from_pnp_sleep = true;
 
-	/* to resovle s4 can not wake up*/
+	/* to resolve s4 can not wake up*/
 	now = ktime_get_real_seconds();
 	if (now - rtlhal->last_suspend_sec < 5)
 		return -1;
@@ -806,7 +806,7 @@ static void rtl_op_configure_filter(struct ieee80211_hw *hw,
 	if (0 == changed_flags)
 		return;
 
-	/*TODO: we disable broadcase now, so enable here */
+	/*TODO: we disable broadcast now, so enable here */
 	if (changed_flags & FIF_ALLMULTI) {
 		if (*new_flags & FIF_ALLMULTI) {
 			mac->rx_conf |= rtlpriv->cfg->maps[MAC_RCR_AM] |
@@ -1796,7 +1796,7 @@ bool rtl_hal_pwrseqcmdparsing(struct rtl_priv *rtlpriv, u8 cut_version,
 				value |= (GET_PWR_CFG_VALUE(cfg_cmd) &
 					  GET_PWR_CFG_MASK(cfg_cmd));
 
-				/*Write the value back to sytem register*/
+				/*Write the value back to system register*/
 				rtl_write_byte(rtlpriv, offset, value);
 				break;
 			case PWR_CMD_POLLING:
-- 
cgit v1.2.3


From 87431bc1f0f67aa2d23ca1b9682fe54f68549d42 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sat, 27 Mar 2021 23:00:14 +0000
Subject: rtlwifi: remove redundant assignment to variable err

Variable err is assigned -ENODEV followed by an error return path
via label error_out that does not access the variable and returns
with the -ENODEV error return code. The assignment to err is
redundant and can be removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210327230014.25554-1-colin.king@canonical.com
---
 drivers/net/wireless/realtek/rtlwifi/usb.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c
index 6c5e242b1bc5..37a9a03123f3 100644
--- a/drivers/net/wireless/realtek/rtlwifi/usb.c
+++ b/drivers/net/wireless/realtek/rtlwifi/usb.c
@@ -1070,7 +1070,6 @@ int rtl_usb_probe(struct usb_interface *intf,
 	err = ieee80211_register_hw(hw);
 	if (err) {
 		pr_err("Can't register mac80211 hw.\n");
-		err = -ENODEV;
 		goto error_out;
 	}
 	rtlpriv->mac80211.mac80211_registered = 1;
-- 
cgit v1.2.3


From 8e04a06530c613b6a4c7806c1078d8305a788086 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Wed, 31 Mar 2021 17:13:43 +0800
Subject: rtlwifi: rtl8188ee: remove redundant assignment of variable
 rtlpriv->btcoexist.reg_bt_sco

Assigning value "3" to "rtlpriv->btcoexist.reg_bt_sco" here, but that
stored value is overwritten before it can be used.

Coverity reports this problem as
CWE563: A value assigned to a variable is never used.
drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c:
rtl8188ee_bt_reg_init

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1617182023-110950-1-git-send-email-yang.lee@linux.alibaba.com
---
 drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c
index 861cc663ca93..bf686a916acb 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c
@@ -2466,8 +2466,6 @@ void rtl8188ee_bt_reg_init(struct ieee80211_hw *hw)
 
 	/* 0:Low, 1:High, 2:From Efuse. */
 	rtlpriv->btcoexist.reg_bt_iso = 2;
-	/* 0:Idle, 1:None-SCO, 2:SCO, 3:From Counter. */
-	rtlpriv->btcoexist.reg_bt_sco = 3;
 	/* 0:Disable BT control A-MPDU, 1:Enable BT control A-MPDU. */
 	rtlpriv->btcoexist.reg_bt_sco = 0;
 }
-- 
cgit v1.2.3


From 987e9bcdd0b76a9d35f0b82013294429b401f7bb Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Fri, 2 Apr 2021 19:09:35 +0200
Subject: rtlwifi: remove rtl_get_tid_h

'rtl_get_tid_h()' is the same as 'ieee80211_get_tid()'.
So this function can be removed to save a line of code.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/db340a67a95c119e4f9ba8fa99aea1c73d0dcfc9.1617383263.git.christophe.jaillet@wanadoo.fr
---
 drivers/net/wireless/realtek/rtlwifi/wifi.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h
index fdccfd29fd61..9119144bb5a3 100644
--- a/drivers/net/wireless/realtek/rtlwifi/wifi.h
+++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h
@@ -3086,14 +3086,9 @@ static inline __le16 rtl_get_fc(struct sk_buff *skb)
 	return rtl_get_hdr(skb)->frame_control;
 }
 
-static inline u16 rtl_get_tid_h(struct ieee80211_hdr *hdr)
-{
-	return (ieee80211_get_qos_ctl(hdr))[0] & IEEE80211_QOS_CTL_TID_MASK;
-}
-
 static inline u16 rtl_get_tid(struct sk_buff *skb)
 {
-	return rtl_get_tid_h(rtl_get_hdr(skb));
+	return ieee80211_get_tid(rtl_get_hdr(skb));
 }
 
 static inline struct ieee80211_sta *get_sta(struct ieee80211_hw *hw,
-- 
cgit v1.2.3


From 1186006adee97ebb5b16db0e4ae64e8efb2c8f76 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Mon, 5 Apr 2021 09:57:14 +0200
Subject: rtlwifi: Simplify locking of a skb list accesses

The 'c2hcmd_lock' spinlock is only used to protect some __skb_queue_tail()
and __skb_dequeue() calls.
Use the lock provided in the skb itself and call skb_queue_tail() and
skb_dequeue(). These functions already include the correct locking.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/99cf8894fd52202cb7ce2ec6e3200eef400bc071.1617609346.git.christophe.jaillet@wanadoo.fr
---
 drivers/net/wireless/realtek/rtlwifi/base.c | 15 ++-------------
 drivers/net/wireless/realtek/rtlwifi/wifi.h |  1 -
 2 files changed, 2 insertions(+), 14 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c
index 6e8bd99e8911..2a7ee90a3f54 100644
--- a/drivers/net/wireless/realtek/rtlwifi/base.c
+++ b/drivers/net/wireless/realtek/rtlwifi/base.c
@@ -551,7 +551,6 @@ int rtl_init_core(struct ieee80211_hw *hw)
 	spin_lock_init(&rtlpriv->locks.rf_lock);
 	spin_lock_init(&rtlpriv->locks.waitq_lock);
 	spin_lock_init(&rtlpriv->locks.entry_list_lock);
-	spin_lock_init(&rtlpriv->locks.c2hcmd_lock);
 	spin_lock_init(&rtlpriv->locks.scan_list_lock);
 	spin_lock_init(&rtlpriv->locks.cck_and_rw_pagea_lock);
 	spin_lock_init(&rtlpriv->locks.fw_ps_lock);
@@ -2269,7 +2268,6 @@ static bool rtl_c2h_fast_cmd(struct ieee80211_hw *hw, struct sk_buff *skb)
 void rtl_c2hcmd_enqueue(struct ieee80211_hw *hw, struct sk_buff *skb)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
-	unsigned long flags;
 
 	if (rtl_c2h_fast_cmd(hw, skb)) {
 		rtl_c2h_content_parsing(hw, skb);
@@ -2278,11 +2276,7 @@ void rtl_c2hcmd_enqueue(struct ieee80211_hw *hw, struct sk_buff *skb)
 	}
 
 	/* enqueue */
-	spin_lock_irqsave(&rtlpriv->locks.c2hcmd_lock, flags);
-
-	__skb_queue_tail(&rtlpriv->c2hcmd_queue, skb);
-
-	spin_unlock_irqrestore(&rtlpriv->locks.c2hcmd_lock, flags);
+	skb_queue_tail(&rtlpriv->c2hcmd_queue, skb);
 
 	/* wake up wq */
 	queue_delayed_work(rtlpriv->works.rtl_wq, &rtlpriv->works.c2hcmd_wq, 0);
@@ -2340,16 +2334,11 @@ void rtl_c2hcmd_launcher(struct ieee80211_hw *hw, int exec)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct sk_buff *skb;
-	unsigned long flags;
 	int i;
 
 	for (i = 0; i < 200; i++) {
 		/* dequeue a task */
-		spin_lock_irqsave(&rtlpriv->locks.c2hcmd_lock, flags);
-
-		skb = __skb_dequeue(&rtlpriv->c2hcmd_queue);
-
-		spin_unlock_irqrestore(&rtlpriv->locks.c2hcmd_lock, flags);
+		skb = skb_dequeue(&rtlpriv->c2hcmd_queue);
 
 		/* do it */
 		if (!skb)
diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h
index 9119144bb5a3..877ed6a1589f 100644
--- a/drivers/net/wireless/realtek/rtlwifi/wifi.h
+++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h
@@ -2450,7 +2450,6 @@ struct rtl_locks {
 	spinlock_t waitq_lock;
 	spinlock_t entry_list_lock;
 	spinlock_t usb_lock;
-	spinlock_t c2hcmd_lock;
 	spinlock_t scan_list_lock; /* lock for the scan list */
 
 	/*FW clock change */
-- 
cgit v1.2.3


From fb98734f79365d5aa0737a6fec0a8c84c05c8eaa Mon Sep 17 00:00:00 2001
From: wengjianfeng <wengjianfeng@yulong.com>
Date: Tue, 6 Apr 2021 10:52:06 +0800
Subject: qtnfmac: remove meaningless goto statement and labels

some function's label meaningless, the label statement follows
the goto statement, no other statements, so just remove it.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: wengjianfeng <wengjianfeng@yulong.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210406025206.4924-1-samirweng1979@163.com
---
 drivers/net/wireless/quantenna/qtnfmac/commands.c | 67 -----------------------
 1 file changed, 67 deletions(-)

diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c
index f3ccbd2b1084..c68563c83098 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/commands.c
+++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c
@@ -379,10 +379,6 @@ int qtnf_cmd_send_stop_ap(struct qtnf_vif *vif)
 
 	qtnf_bus_lock(vif->mac->bus);
 	ret = qtnf_cmd_send(vif->mac->bus, cmd_skb);
-	if (ret)
-		goto out;
-
-out:
 	qtnf_bus_unlock(vif->mac->bus);
 
 	return ret;
@@ -407,10 +403,7 @@ int qtnf_cmd_send_register_mgmt(struct qtnf_vif *vif, u16 frame_type, bool reg)
 	cmd->do_register = reg;
 
 	ret = qtnf_cmd_send(vif->mac->bus, cmd_skb);
-	if (ret)
-		goto out;
 
-out:
 	qtnf_bus_unlock(vif->mac->bus);
 
 	return ret;
@@ -446,10 +439,7 @@ int qtnf_cmd_send_frame(struct qtnf_vif *vif, u32 cookie, u16 flags,
 		qtnf_cmd_skb_put_buffer(cmd_skb, buf, len);
 
 	ret = qtnf_cmd_send(vif->mac->bus, cmd_skb);
-	if (ret)
-		goto out;
 
-out:
 	qtnf_bus_unlock(vif->mac->bus);
 
 	return ret;
@@ -477,10 +467,6 @@ int qtnf_cmd_send_mgmt_set_appie(struct qtnf_vif *vif, u8 frame_type,
 
 	qtnf_bus_lock(vif->mac->bus);
 	ret = qtnf_cmd_send(vif->mac->bus, cmd_skb);
-	if (ret)
-		goto out;
-
-out:
 	qtnf_bus_unlock(vif->mac->bus);
 
 	return ret;
@@ -1677,10 +1663,7 @@ int qtnf_cmd_send_update_phy_params(struct qtnf_wmac *mac, u32 changed)
 					 wiphy->retry_short);
 
 	ret = qtnf_cmd_send(mac->bus, cmd_skb);
-	if (ret)
-		goto out;
 
-out:
 	qtnf_bus_unlock(mac->bus);
 
 	return ret;
@@ -1772,10 +1755,7 @@ int qtnf_cmd_send_add_key(struct qtnf_vif *vif, u8 key_index, bool pairwise,
 					 params->seq_len);
 
 	ret = qtnf_cmd_send(vif->mac->bus, cmd_skb);
-	if (ret)
-		goto out;
 
-out:
 	qtnf_bus_unlock(vif->mac->bus);
 
 	return ret;
@@ -1807,10 +1787,7 @@ int qtnf_cmd_send_del_key(struct qtnf_vif *vif, u8 key_index, bool pairwise,
 	cmd->pairwise = pairwise;
 
 	ret = qtnf_cmd_send(vif->mac->bus, cmd_skb);
-	if (ret)
-		goto out;
 
-out:
 	qtnf_bus_unlock(vif->mac->bus);
 
 	return ret;
@@ -1837,10 +1814,7 @@ int qtnf_cmd_send_set_default_key(struct qtnf_vif *vif, u8 key_index,
 	cmd->multicast = multicast;
 
 	ret = qtnf_cmd_send(vif->mac->bus, cmd_skb);
-	if (ret)
-		goto out;
 
-out:
 	qtnf_bus_unlock(vif->mac->bus);
 
 	return ret;
@@ -1864,10 +1838,7 @@ int qtnf_cmd_send_set_default_mgmt_key(struct qtnf_vif *vif, u8 key_index)
 	cmd->key_index = key_index;
 
 	ret = qtnf_cmd_send(vif->mac->bus, cmd_skb);
-	if (ret)
-		goto out;
 
-out:
 	qtnf_bus_unlock(vif->mac->bus);
 
 	return ret;
@@ -1931,8 +1902,6 @@ int qtnf_cmd_send_change_sta(struct qtnf_vif *vif, const u8 *mac,
 	}
 
 	ret = qtnf_cmd_send(vif->mac->bus, cmd_skb);
-	if (ret)
-		goto out;
 
 out:
 	qtnf_bus_unlock(vif->mac->bus);
@@ -1966,10 +1935,7 @@ int qtnf_cmd_send_del_sta(struct qtnf_vif *vif,
 	cmd->reason_code = cpu_to_le16(params->reason_code);
 
 	ret = qtnf_cmd_send(vif->mac->bus, cmd_skb);
-	if (ret)
-		goto out;
 
-out:
 	qtnf_bus_unlock(vif->mac->bus);
 
 	return ret;
@@ -2189,10 +2155,6 @@ int qtnf_cmd_send_connect(struct qtnf_vif *vif,
 
 	qtnf_bus_lock(vif->mac->bus);
 	ret = qtnf_cmd_send(vif->mac->bus, cmd_skb);
-	if (ret)
-		goto out;
-
-out:
 	qtnf_bus_unlock(vif->mac->bus);
 
 	return ret;
@@ -2218,10 +2180,6 @@ int qtnf_cmd_send_external_auth(struct qtnf_vif *vif,
 
 	qtnf_bus_lock(vif->mac->bus);
 	ret = qtnf_cmd_send(vif->mac->bus, cmd_skb);
-	if (ret)
-		goto out;
-
-out:
 	qtnf_bus_unlock(vif->mac->bus);
 
 	return ret;
@@ -2245,10 +2203,7 @@ int qtnf_cmd_send_disconnect(struct qtnf_vif *vif, u16 reason_code)
 	cmd->reason = cpu_to_le16(reason_code);
 
 	ret = qtnf_cmd_send(vif->mac->bus, cmd_skb);
-	if (ret)
-		goto out;
 
-out:
 	qtnf_bus_unlock(vif->mac->bus);
 
 	return ret;
@@ -2271,10 +2226,6 @@ int qtnf_cmd_send_updown_intf(struct qtnf_vif *vif, bool up)
 
 	qtnf_bus_lock(vif->mac->bus);
 	ret = qtnf_cmd_send(vif->mac->bus, cmd_skb);
-	if (ret)
-		goto out;
-
-out:
 	qtnf_bus_unlock(vif->mac->bus);
 
 	return ret;
@@ -2580,10 +2531,6 @@ int qtnf_cmd_start_cac(const struct qtnf_vif *vif,
 
 	qtnf_bus_lock(bus);
 	ret = qtnf_cmd_send(bus, cmd_skb);
-	if (ret)
-		goto out;
-
-out:
 	qtnf_bus_unlock(bus);
 
 	return ret;
@@ -2611,10 +2558,6 @@ int qtnf_cmd_set_mac_acl(const struct qtnf_vif *vif,
 
 	qtnf_bus_lock(bus);
 	ret = qtnf_cmd_send(bus, cmd_skb);
-	if (ret)
-		goto out;
-
-out:
 	qtnf_bus_unlock(bus);
 
 	return ret;
@@ -2639,10 +2582,7 @@ int qtnf_cmd_send_pm_set(const struct qtnf_vif *vif, u8 pm_mode, int timeout)
 	qtnf_bus_lock(bus);
 
 	ret = qtnf_cmd_send(bus, cmd_skb);
-	if (ret)
-		goto out;
 
-out:
 	qtnf_bus_unlock(bus);
 
 	return ret;
@@ -2754,10 +2694,7 @@ int qtnf_cmd_send_wowlan_set(const struct qtnf_vif *vif,
 	cmd->triggers = cpu_to_le32(triggers);
 
 	ret = qtnf_cmd_send(bus, cmd_skb);
-	if (ret)
-		goto out;
 
-out:
 	qtnf_bus_unlock(bus);
 	return ret;
 }
@@ -2821,10 +2758,6 @@ int qtnf_cmd_send_update_owe(struct qtnf_vif *vif,
 
 	qtnf_bus_lock(vif->mac->bus);
 	ret = qtnf_cmd_send(vif->mac->bus, cmd_skb);
-	if (ret)
-		goto out;
-
-out:
 	qtnf_bus_unlock(vif->mac->bus);
 
 	return ret;
-- 
cgit v1.2.3


From e9642be26a372013e47801e1dd98e8f4dcf78f50 Mon Sep 17 00:00:00 2001
From: Guobin Huang <huangguobin4@huawei.com>
Date: Tue, 6 Apr 2021 20:16:46 +0800
Subject: rtlwifi: rtl8192de: Use DEFINE_SPINLOCK() for spinlock

spinlock can be initialized automatically with DEFINE_SPINLOCK()
rather than explicitly calling spin_lock_init().

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Guobin Huang <huangguobin4@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1617711406-49649-1-git-send-email-huangguobin4@huawei.com
---
 drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c
index 1dbdddce0823..a74724c971b9 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c
@@ -372,18 +372,14 @@ static struct pci_driver rtl92de_driver = {
 
 /* add global spin lock to solve the problem that
  * Dul mac register operation on the same time */
-spinlock_t globalmutex_power;
-spinlock_t globalmutex_for_fwdownload;
-spinlock_t globalmutex_for_power_and_efuse;
+DEFINE_SPINLOCK(globalmutex_power);
+DEFINE_SPINLOCK(globalmutex_for_fwdownload);
+DEFINE_SPINLOCK(globalmutex_for_power_and_efuse);
 
 static int __init rtl92de_module_init(void)
 {
 	int ret = 0;
 
-	spin_lock_init(&globalmutex_power);
-	spin_lock_init(&globalmutex_for_fwdownload);
-	spin_lock_init(&globalmutex_for_power_and_efuse);
-
 	ret = pci_register_driver(&rtl92de_driver);
 	if (ret)
 		WARN_ONCE(true, "rtl8192de: No device found\n");
-- 
cgit v1.2.3


From 260a9ad9446723d4063ed802989758852809714d Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 14 Apr 2021 11:29:55 +0300
Subject: ipw2x00: potential buffer overflow in libipw_wx_set_encodeext()

The "ext->key_len" is a u16 that comes from the user.  If it's over
SCM_KEY_LEN (32) that could lead to memory corruption.

Fixes: e0d369d1d969 ("[PATCH] ieee82011: Added WE-18 support to default wireless extension handler")
Cc: stable@vger.kernel.org
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Stanislav Yakovlev <stas.yakovlev@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/YHaoA1i+8uT4ir4h@mwanda
---
 drivers/net/wireless/intel/ipw2x00/libipw_wx.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/ipw2x00/libipw_wx.c b/drivers/net/wireless/intel/ipw2x00/libipw_wx.c
index a0cf78c418ac..903de34028ef 100644
--- a/drivers/net/wireless/intel/ipw2x00/libipw_wx.c
+++ b/drivers/net/wireless/intel/ipw2x00/libipw_wx.c
@@ -633,8 +633,10 @@ int libipw_wx_set_encodeext(struct libipw_device *ieee,
 	}
 
 	if (ext->alg != IW_ENCODE_ALG_NONE) {
-		memcpy(sec.keys[idx], ext->key, ext->key_len);
-		sec.key_sizes[idx] = ext->key_len;
+		int key_len = clamp_val(ext->key_len, 0, SCM_KEY_LEN);
+
+		memcpy(sec.keys[idx], ext->key, key_len);
+		sec.key_sizes[idx] = key_len;
 		sec.flags |= (1 << idx);
 		if (ext->alg == IW_ENCODE_ALG_WEP) {
 			sec.encode_alg[idx] = SEC_ALG_WEP;
-- 
cgit v1.2.3


From d23a962203531ab281f233fa6140cf66ed4fb69f Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Wed, 24 Feb 2021 18:44:54 -0800
Subject: mwifiex: don't print SSID to logs

There are a few reasons not to dump SSIDs as-is in kernel logs:

1) they're not guaranteed to be any particular text encoding (UTF-8,
   ASCII, ...) in general
2) it's somewhat redundant; the BSSID should be enough to uniquely
   identify the AP/STA to which we're connecting
3) BSSIDs have an easily-recognized format, whereas SSIDs do not (they
   are free-form)
4) other common drivers (e.g., everything based on mac80211) get along
   just fine by only including BSSIDs when logging state transitions

Additional notes on reason #3: this is important for the
privacy-conscious, especially when providing tools that convey
kernel logs on behalf of a user -- e.g., when reporting bugs. So for
example, it's easy to automatically filter logs for MAC addresses, but
it's much harder to filter SSIDs out of unstructured text.

Signed-off-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210225024454.4106485-1-briannorris@chromium.org
---
 drivers/net/wireless/marvell/mwifiex/cfg80211.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
index a2ed268ce0da..0961f4a5e415 100644
--- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
@@ -2300,8 +2300,7 @@ done:
 			is_scanning_required = 1;
 		} else {
 			mwifiex_dbg(priv->adapter, MSG,
-				    "info: trying to associate to '%.*s' bssid %pM\n",
-				    req_ssid.ssid_len, (char *)req_ssid.ssid,
+				    "info: trying to associate to bssid %pM\n",
 				    bss->bssid);
 			memcpy(&priv->cfg_bssid, bss->bssid, ETH_ALEN);
 			break;
@@ -2378,8 +2377,7 @@ mwifiex_cfg80211_connect(struct wiphy *wiphy, struct net_device *dev,
 	}
 
 	mwifiex_dbg(adapter, INFO,
-		    "info: Trying to associate to %.*s and bssid %pM\n",
-		    (int)sme->ssid_len, (char *)sme->ssid, sme->bssid);
+		    "info: Trying to associate to bssid %pM\n", sme->bssid);
 
 	if (!mwifiex_stop_bg_scan(priv))
 		cfg80211_sched_scan_stopped_locked(priv->wdev.wiphy, 0);
@@ -2512,9 +2510,8 @@ mwifiex_cfg80211_join_ibss(struct wiphy *wiphy, struct net_device *dev,
 		goto done;
 	}
 
-	mwifiex_dbg(priv->adapter, MSG,
-		    "info: trying to join to %.*s and bssid %pM\n",
-		    params->ssid_len, (char *)params->ssid, params->bssid);
+	mwifiex_dbg(priv->adapter, MSG, "info: trying to join to bssid %pM\n",
+		    params->bssid);
 
 	mwifiex_set_ibss_params(priv, params);
 
-- 
cgit v1.2.3


From f2131fa516b883841a593fc877dede57edd1ab0e Mon Sep 17 00:00:00 2001
From: David Mosberger-Tang <davidm@egauge.net>
Date: Sat, 27 Feb 2021 17:29:14 +0000
Subject: wilc1000: Make SPI transfers work at 48MHz

For CMD_SINGLE_READ and CMD_INTERNAL_READ, WILC may insert one or more
zero bytes between the command response and the DATA Start tag (0xf3).
This behavior appears to be undocumented in "ATWILC1000 USER GUIDE"
(https://tinyurl.com/4hhshdts) but we have observed 1-4 zero bytes
when the SPI bus operates at 48MHz and none when it operates at 1MHz.

This code is derived from the equivalent code of the wilc driver in
the linux-at91 repository.

Signed-off-by: David Mosberger-Tang <davidm@egauge.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210227172818.1711071-1-davidm@egauge.net
---
 drivers/net/wireless/microchip/wilc1000/spi.c | 42 ++++++++++++++++++---------
 1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/drivers/net/wireless/microchip/wilc1000/spi.c b/drivers/net/wireless/microchip/wilc1000/spi.c
index be732929322c..d11e365eeee2 100644
--- a/drivers/net/wireless/microchip/wilc1000/spi.c
+++ b/drivers/net/wireless/microchip/wilc1000/spi.c
@@ -11,6 +11,16 @@
 #include "netdev.h"
 #include "cfg80211.h"
 
+/*
+ * For CMD_SINGLE_READ and CMD_INTERNAL_READ, WILC may insert one or
+ * more zero bytes between the command response and the DATA Start tag
+ * (0xf3).  This behavior appears to be undocumented in "ATWILC1000
+ * USER GUIDE" (https://tinyurl.com/4hhshdts) but we have observed 1-4
+ * zero bytes when the SPI bus operates at 48MHz and none when it
+ * operates at 1MHz.
+ */
+#define WILC_SPI_RSP_HDR_EXTRA_DATA	8
+
 struct wilc_spi {
 	int crc_off;
 };
@@ -79,16 +89,15 @@ struct wilc_spi_cmd {
 } __packed;
 
 struct wilc_spi_read_rsp_data {
-	u8 rsp_cmd_type;
-	u8 status;
-	u8 resp_header;
-	u8 resp_data[4];
+	u8 header;
+	u8 data[4];
 	u8 crc[];
 } __packed;
 
 struct wilc_spi_rsp_data {
 	u8 rsp_cmd_type;
 	u8 status;
+	u8 data[];
 } __packed;
 
 static int wilc_bus_probe(struct spi_device *spi)
@@ -359,10 +368,11 @@ static int wilc_spi_single_read(struct wilc *wilc, u8 cmd, u32 adr, void *b,
 	struct spi_device *spi = to_spi_device(wilc->dev);
 	struct wilc_spi *spi_priv = wilc->bus_data;
 	u8 wb[32], rb[32];
-	int cmd_len, resp_len;
 	u8 crc[2];
+	int cmd_len, resp_len, i;
 	struct wilc_spi_cmd *c;
-	struct wilc_spi_read_rsp_data *r;
+	struct wilc_spi_read_rsp_data *r_data;
+	struct wilc_spi_rsp_data *r;
 
 	memset(wb, 0x0, sizeof(wb));
 	memset(rb, 0x0, sizeof(rb));
@@ -384,7 +394,8 @@ static int wilc_spi_single_read(struct wilc *wilc, u8 cmd, u32 adr, void *b,
 	}
 
 	cmd_len = offsetof(struct wilc_spi_cmd, u.simple_cmd.crc);
-	resp_len = sizeof(*r);
+	resp_len = sizeof(*r) + sizeof(*r_data) + WILC_SPI_RSP_HDR_EXTRA_DATA;
+
 	if (!spi_priv->crc_off) {
 		c->u.simple_cmd.crc[0] = wilc_get_crc7(wb, cmd_len);
 		cmd_len += 1;
@@ -403,7 +414,7 @@ static int wilc_spi_single_read(struct wilc *wilc, u8 cmd, u32 adr, void *b,
 		return -EINVAL;
 	}
 
-	r = (struct wilc_spi_read_rsp_data *)&rb[cmd_len];
+	r = (struct wilc_spi_rsp_data *)&rb[cmd_len];
 	if (r->rsp_cmd_type != cmd) {
 		dev_err(&spi->dev,
 			"Failed cmd response, cmd (%02x), resp (%02x)\n",
@@ -417,17 +428,22 @@ static int wilc_spi_single_read(struct wilc *wilc, u8 cmd, u32 adr, void *b,
 		return -EINVAL;
 	}
 
-	if (WILC_GET_RESP_HDR_START(r->resp_header) != 0xf) {
-		dev_err(&spi->dev, "Error, data read response (%02x)\n",
-			r->resp_header);
+	for (i = 0; i < WILC_SPI_RSP_HDR_EXTRA_DATA; ++i)
+		if (WILC_GET_RESP_HDR_START(r->data[i]) == 0xf)
+			break;
+
+	if (i >= WILC_SPI_RSP_HDR_EXTRA_DATA) {
+		dev_err(&spi->dev, "Error, data start missing\n");
 		return -EINVAL;
 	}
 
+	r_data = (struct wilc_spi_read_rsp_data *)&r->data[i];
+
 	if (b)
-		memcpy(b, r->resp_data, 4);
+		memcpy(b, r_data->data, 4);
 
 	if (!spi_priv->crc_off)
-		memcpy(crc, r->crc, 2);
+		memcpy(crc, r_data->crc, 2);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 5ee2d9dd73fc7b371d208bea0971f81585b5ad3d Mon Sep 17 00:00:00 2001
From: David Mosberger-Tang <davidm@egauge.net>
Date: Sat, 27 Feb 2021 17:29:22 +0000
Subject: wilc1000: Introduce symbolic names for SPI protocol register

The WILC1000 protocol control register has bits for enabling the CRCs
(CRC7 for commands and CRC16 for data) and to set the data packet
size.  Define symbolic names for those so the code is more easily
understood.

Signed-off-by: David Mosberger-Tang <davidm@egauge.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210227172818.1711071-2-davidm@egauge.net
---
 drivers/net/wireless/microchip/wilc1000/spi.c | 38 ++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/microchip/wilc1000/spi.c b/drivers/net/wireless/microchip/wilc1000/spi.c
index d11e365eeee2..fca34d1999ec 100644
--- a/drivers/net/wireless/microchip/wilc1000/spi.c
+++ b/drivers/net/wireless/microchip/wilc1000/spi.c
@@ -46,12 +46,25 @@ static const struct wilc_hif_func wilc_hif_spi;
 #define CMD_RESET				0xcf
 
 #define SPI_ENABLE_VMM_RETRY_LIMIT		2
-#define DATA_PKT_SZ_256				256
-#define DATA_PKT_SZ_512				512
-#define DATA_PKT_SZ_1K				1024
-#define DATA_PKT_SZ_4K				(4 * 1024)
-#define DATA_PKT_SZ_8K				(8 * 1024)
-#define DATA_PKT_SZ				DATA_PKT_SZ_8K
+
+#define PROTOCOL_REG_PKT_SZ_MASK		GENMASK(6, 4)
+#define PROTOCOL_REG_CRC16_MASK			GENMASK(3, 3)
+#define PROTOCOL_REG_CRC7_MASK			GENMASK(2, 2)
+
+/*
+ * The SPI data packet size may be any integer power of two in the
+ * range from 256 to 8192 bytes.
+ */
+#define DATA_PKT_LOG_SZ_MIN			8	/* 256 B */
+#define DATA_PKT_LOG_SZ_MAX			13	/* 8 KiB */
+
+/*
+ * Select the data packet size (log2 of number of bytes): Use the
+ * maximum data packet size.  We only retransmit complete packets, so
+ * there is no benefit from using smaller data packets.
+ */
+#define DATA_PKT_LOG_SZ				DATA_PKT_LOG_SZ_MAX
+#define DATA_PKT_SZ				(1 << DATA_PKT_LOG_SZ)
 
 #define USE_SPI_DMA				0
 
@@ -827,9 +840,16 @@ static int wilc_spi_init(struct wilc *wilc, bool resume)
 		}
 	}
 	if (spi_priv->crc_off == 0) {
-		reg &= ~0xc; /* disable crc checking */
-		reg &= ~0x70;
-		reg |= (0x5 << 4);
+		/* disable crc checking: */
+		reg &= ~(PROTOCOL_REG_CRC7_MASK | PROTOCOL_REG_CRC16_MASK);
+
+		/* set the data packet size: */
+		BUILD_BUG_ON(DATA_PKT_LOG_SZ < DATA_PKT_LOG_SZ_MIN
+			     || DATA_PKT_LOG_SZ > DATA_PKT_LOG_SZ_MAX);
+		reg &= ~PROTOCOL_REG_PKT_SZ_MASK;
+		reg |= FIELD_PREP(PROTOCOL_REG_PKT_SZ_MASK,
+				  DATA_PKT_LOG_SZ - DATA_PKT_LOG_SZ_MIN);
+
 		ret = spi_internal_write(wilc, WILC_SPI_PROTOCOL_OFFSET, reg);
 		if (ret) {
 			dev_err(&spi->dev,
-- 
cgit v1.2.3


From ce3b933832b6286d181c30f646449d6ccc2a2c8c Mon Sep 17 00:00:00 2001
From: David Mosberger-Tang <davidm@egauge.net>
Date: Sat, 27 Feb 2021 17:29:46 +0000
Subject: wilc1000: Check for errors at end of DMA write

After a DMA write to the WILC chip, check for and report any errors.

This is based on code from the wilc driver in the linux-at91
repository.

Signed-off-by: David Mosberger-Tang <davidm@egauge.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210227172818.1711071-3-davidm@egauge.net
---
 drivers/net/wireless/microchip/wilc1000/spi.c | 62 ++++++++++++++++++++++++++-
 1 file changed, 61 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/microchip/wilc1000/spi.c b/drivers/net/wireless/microchip/wilc1000/spi.c
index fca34d1999ec..0be93eabad69 100644
--- a/drivers/net/wireless/microchip/wilc1000/spi.c
+++ b/drivers/net/wireless/microchip/wilc1000/spi.c
@@ -47,6 +47,17 @@ static const struct wilc_hif_func wilc_hif_spi;
 
 #define SPI_ENABLE_VMM_RETRY_LIMIT		2
 
+/* SPI response fields (section 11.1.2 in ATWILC1000 User Guide): */
+#define RSP_START_FIELD				GENMASK(7, 4)
+#define RSP_TYPE_FIELD				GENMASK(3, 0)
+
+/* SPI response values for the response fields: */
+#define RSP_START_TAG				0xc
+#define RSP_TYPE_FIRST_PACKET			0x1
+#define RSP_TYPE_INNER_PACKET			0x2
+#define RSP_TYPE_LAST_PACKET			0x3
+#define RSP_STATE_NO_ERROR			0x00
+
 #define PROTOCOL_REG_PKT_SZ_MASK		GENMASK(6, 4)
 #define PROTOCOL_REG_CRC16_MASK			GENMASK(3, 3)
 #define PROTOCOL_REG_CRC7_MASK			GENMASK(2, 2)
@@ -750,6 +761,52 @@ static int wilc_spi_write_reg(struct wilc *wilc, u32 addr, u32 data)
 	return 0;
 }
 
+static int spi_data_rsp(struct wilc *wilc, u8 cmd)
+{
+	struct spi_device *spi = to_spi_device(wilc->dev);
+	int result, i;
+	u8 rsp[4];
+
+	/*
+	 * The response to data packets is two bytes long.  For
+	 * efficiency's sake, wilc_spi_write() wisely ignores the
+	 * responses for all packets but the final one.  The downside
+	 * of that optimization is that when the final data packet is
+	 * short, we may receive (part of) the response to the
+	 * second-to-last packet before the one for the final packet.
+	 * To handle this, we always read 4 bytes and then search for
+	 * the last byte that contains the "Response Start" code (0xc
+	 * in the top 4 bits).  We then know that this byte is the
+	 * first response byte of the final data packet.
+	 */
+	result = wilc_spi_rx(wilc, rsp, sizeof(rsp));
+	if (result) {
+		dev_err(&spi->dev, "Failed bus error...\n");
+		return result;
+	}
+
+	for (i = sizeof(rsp) - 2; i >= 0; --i)
+		if (FIELD_GET(RSP_START_FIELD, rsp[i]) == RSP_START_TAG)
+			break;
+
+	if (i < 0) {
+		dev_err(&spi->dev,
+			"Data packet response missing (%02x %02x %02x %02x)\n",
+			rsp[0], rsp[1], rsp[2], rsp[3]);
+		return -1;
+	}
+
+	/* rsp[i] is the last response start byte */
+
+	if (FIELD_GET(RSP_TYPE_FIELD, rsp[i]) != RSP_TYPE_LAST_PACKET
+	    || rsp[i + 1] != RSP_STATE_NO_ERROR) {
+		dev_err(&spi->dev, "Data response error (%02x %02x)\n",
+			rsp[i], rsp[i + 1]);
+		return -1;
+	}
+	return 0;
+}
+
 static int wilc_spi_write(struct wilc *wilc, u32 addr, u8 *buf, u32 size)
 {
 	struct spi_device *spi = to_spi_device(wilc->dev);
@@ -777,7 +834,10 @@ static int wilc_spi_write(struct wilc *wilc, u32 addr, u8 *buf, u32 size)
 		return result;
 	}
 
-	return 0;
+	/*
+	 * Data response
+	 */
+	return spi_data_rsp(wilc, CMD_DMA_EXT_WRITE);
 }
 
 /********************************************
-- 
cgit v1.2.3


From c872e7ae056f16e27311fb30d637032cc3b1cb46 Mon Sep 17 00:00:00 2001
From: David Mosberger-Tang <davidm@egauge.net>
Date: Sat, 27 Feb 2021 17:31:38 +0000
Subject: wilc1000: Add support for enabling CRC

The driver so far has always disabled CRC protection.  This means any
data corruption that occurrs during the SPI transfers could go
undetected.  This patch adds module parameters enable_crc7 and
enable_crc16 to selectively turn on CRC7 (for command transfers) and
CRC16 (for data transfers), respectively.

The default configuration remains unchanged, with both CRC7 and CRC16
off.

The performance impact of CRC was measured by running ttcp -t four
times in a row on a SAMA5 device:

 CRC7 CRC16 Throughput: Standard deviation:
 ---- ----- ----------- -------------------
  off   off 1720 	+/- 48 KB/s
   on   off 1658 	+/- 58 KB/s
   on    on 1579 	+/- 84 KB/s

Signed-off-by: David Mosberger-Tang <davidm@egauge.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210227172818.1711071-4-davidm@egauge.net
---
 drivers/net/wireless/microchip/wilc1000/Kconfig |   1 +
 drivers/net/wireless/microchip/wilc1000/spi.c   | 178 +++++++++++++++---------
 2 files changed, 115 insertions(+), 64 deletions(-)

diff --git a/drivers/net/wireless/microchip/wilc1000/Kconfig b/drivers/net/wireless/microchip/wilc1000/Kconfig
index 7f15e42602dd..62cfcdc9aacc 100644
--- a/drivers/net/wireless/microchip/wilc1000/Kconfig
+++ b/drivers/net/wireless/microchip/wilc1000/Kconfig
@@ -27,6 +27,7 @@ config WILC1000_SPI
 	depends on CFG80211 && INET && SPI
 	select WILC1000
 	select CRC7
+	select CRC_ITU_T
 	help
 	  This module adds support for the SPI interface of adapters using
 	  WILC1000 chipset. The Atmel WILC1000 has a Serial Peripheral
diff --git a/drivers/net/wireless/microchip/wilc1000/spi.c b/drivers/net/wireless/microchip/wilc1000/spi.c
index 0be93eabad69..1472e9843896 100644
--- a/drivers/net/wireless/microchip/wilc1000/spi.c
+++ b/drivers/net/wireless/microchip/wilc1000/spi.c
@@ -7,10 +7,27 @@
 #include <linux/clk.h>
 #include <linux/spi/spi.h>
 #include <linux/crc7.h>
+#include <linux/crc-itu-t.h>
 
 #include "netdev.h"
 #include "cfg80211.h"
 
+static bool enable_crc7;	/* protect SPI commands with CRC7 */
+module_param(enable_crc7, bool, 0644);
+MODULE_PARM_DESC(enable_crc7,
+		 "Enable CRC7 checksum to protect command transfers\n"
+		 "\t\t\tagainst corruption during the SPI transfer.\n"
+		 "\t\t\tCommand transfers are short and the CPU-cycle cost\n"
+		 "\t\t\tof enabling this is small.");
+
+static bool enable_crc16;	/* protect SPI data with CRC16 */
+module_param(enable_crc16, bool, 0644);
+MODULE_PARM_DESC(enable_crc16,
+		 "Enable CRC16 checksum to protect data transfers\n"
+		 "\t\t\tagainst corruption during the SPI transfer.\n"
+		 "\t\t\tData transfers can be large and the CPU-cycle cost\n"
+		 "\t\t\tof enabling this may be substantial.");
+
 /*
  * For CMD_SINGLE_READ and CMD_INTERNAL_READ, WILC may insert one or
  * more zero bytes between the command response and the DATA Start tag
@@ -22,7 +39,9 @@
 #define WILC_SPI_RSP_HDR_EXTRA_DATA	8
 
 struct wilc_spi {
-	int crc_off;
+	bool probing_crc;	/* true if we're probing chip's CRC config */
+	bool crc7_enabled;	/* true if crc7 is currently enabled */
+	bool crc16_enabled;	/* true if crc16 is currently enabled */
 };
 
 static const struct wilc_hif_func wilc_hif_spi;
@@ -314,7 +333,8 @@ static int spi_data_write(struct wilc *wilc, u8 *b, u32 sz)
 	struct wilc_spi *spi_priv = wilc->bus_data;
 	int ix, nbytes;
 	int result = 0;
-	u8 cmd, order, crc[2] = {0};
+	u8 cmd, order, crc[2];
+	u16 crc_calc;
 
 	/*
 	 * Data
@@ -356,9 +376,12 @@ static int spi_data_write(struct wilc *wilc, u8 *b, u32 sz)
 		}
 
 		/*
-		 * Write Crc
+		 * Write CRC
 		 */
-		if (!spi_priv->crc_off) {
+		if (spi_priv->crc16_enabled) {
+			crc_calc = crc_itu_t(0xffff, &b[ix], nbytes);
+			crc[0] = crc_calc >> 8;
+			crc[1] = crc_calc;
 			if (wilc_spi_tx(wilc, crc, 2)) {
 				dev_err(&spi->dev, "Failed data block crc write, bus error...\n");
 				result = -EINVAL;
@@ -392,11 +415,11 @@ static int wilc_spi_single_read(struct wilc *wilc, u8 cmd, u32 adr, void *b,
 	struct spi_device *spi = to_spi_device(wilc->dev);
 	struct wilc_spi *spi_priv = wilc->bus_data;
 	u8 wb[32], rb[32];
-	u8 crc[2];
 	int cmd_len, resp_len, i;
+	u16 crc_calc, crc_recv;
 	struct wilc_spi_cmd *c;
-	struct wilc_spi_read_rsp_data *r_data;
 	struct wilc_spi_rsp_data *r;
+	struct wilc_spi_read_rsp_data *r_data;
 
 	memset(wb, 0x0, sizeof(wb));
 	memset(rb, 0x0, sizeof(rb));
@@ -420,7 +443,7 @@ static int wilc_spi_single_read(struct wilc *wilc, u8 cmd, u32 adr, void *b,
 	cmd_len = offsetof(struct wilc_spi_cmd, u.simple_cmd.crc);
 	resp_len = sizeof(*r) + sizeof(*r_data) + WILC_SPI_RSP_HDR_EXTRA_DATA;
 
-	if (!spi_priv->crc_off) {
+	if (spi_priv->crc7_enabled) {
 		c->u.simple_cmd.crc[0] = wilc_get_crc7(wb, cmd_len);
 		cmd_len += 1;
 		resp_len += 2;
@@ -440,9 +463,10 @@ static int wilc_spi_single_read(struct wilc *wilc, u8 cmd, u32 adr, void *b,
 
 	r = (struct wilc_spi_rsp_data *)&rb[cmd_len];
 	if (r->rsp_cmd_type != cmd) {
-		dev_err(&spi->dev,
-			"Failed cmd response, cmd (%02x), resp (%02x)\n",
-			cmd, r->rsp_cmd_type);
+		if (!spi_priv->probing_crc)
+			dev_err(&spi->dev,
+				"Failed cmd, cmd (%02x), resp (%02x)\n",
+				cmd, r->rsp_cmd_type);
 		return -EINVAL;
 	}
 
@@ -466,8 +490,16 @@ static int wilc_spi_single_read(struct wilc *wilc, u8 cmd, u32 adr, void *b,
 	if (b)
 		memcpy(b, r_data->data, 4);
 
-	if (!spi_priv->crc_off)
-		memcpy(crc, r_data->crc, 2);
+	if (!clockless && spi_priv->crc16_enabled) {
+		crc_recv = (r_data->crc[0] << 8) | r_data->crc[1];
+		crc_calc = crc_itu_t(0xffff, r_data->data, 4);
+		if (crc_recv != crc_calc) {
+			dev_err(&spi->dev, "%s: bad CRC 0x%04x "
+				"(calculated 0x%04x)\n", __func__,
+				crc_recv, crc_calc);
+			return -EINVAL;
+		}
+	}
 
 	return 0;
 }
@@ -494,7 +526,7 @@ static int wilc_spi_write_cmd(struct wilc *wilc, u8 cmd, u32 adr, u32 data,
 		c->u.internal_w_cmd.addr[1] = adr;
 		c->u.internal_w_cmd.data = cpu_to_be32(data);
 		cmd_len = offsetof(struct wilc_spi_cmd, u.internal_w_cmd.crc);
-		if (!spi_priv->crc_off)
+		if (spi_priv->crc7_enabled)
 			c->u.internal_w_cmd.crc[0] = wilc_get_crc7(wb, cmd_len);
 	} else if (cmd == CMD_SINGLE_WRITE) {
 		c->u.w_cmd.addr[0] = adr >> 16;
@@ -502,14 +534,14 @@ static int wilc_spi_write_cmd(struct wilc *wilc, u8 cmd, u32 adr, u32 data,
 		c->u.w_cmd.addr[2] = adr;
 		c->u.w_cmd.data = cpu_to_be32(data);
 		cmd_len = offsetof(struct wilc_spi_cmd, u.w_cmd.crc);
-		if (!spi_priv->crc_off)
+		if (spi_priv->crc7_enabled)
 			c->u.w_cmd.crc[0] = wilc_get_crc7(wb, cmd_len);
 	} else {
 		dev_err(&spi->dev, "write cmd [%x] not supported\n", cmd);
 		return -EINVAL;
 	}
 
-	if (!spi_priv->crc_off)
+	if (spi_priv->crc7_enabled)
 		cmd_len += 1;
 
 	resp_len = sizeof(*r);
@@ -547,6 +579,7 @@ static int wilc_spi_dma_rw(struct wilc *wilc, u8 cmd, u32 adr, u8 *b, u32 sz)
 {
 	struct spi_device *spi = to_spi_device(wilc->dev);
 	struct wilc_spi *spi_priv = wilc->bus_data;
+	u16 crc_recv, crc_calc;
 	u8 wb[32], rb[32];
 	int cmd_len, resp_len;
 	int retry, ix = 0;
@@ -565,7 +598,7 @@ static int wilc_spi_dma_rw(struct wilc *wilc, u8 cmd, u32 adr, u8 *b, u32 sz)
 		c->u.dma_cmd.size[0] = sz >> 8;
 		c->u.dma_cmd.size[1] = sz;
 		cmd_len = offsetof(struct wilc_spi_cmd, u.dma_cmd.crc);
-		if (!spi_priv->crc_off)
+		if (spi_priv->crc7_enabled)
 			c->u.dma_cmd.crc[0] = wilc_get_crc7(wb, cmd_len);
 	} else if (cmd == CMD_DMA_EXT_WRITE || cmd == CMD_DMA_EXT_READ) {
 		c->u.dma_cmd_ext.addr[0] = adr >> 16;
@@ -575,14 +608,14 @@ static int wilc_spi_dma_rw(struct wilc *wilc, u8 cmd, u32 adr, u8 *b, u32 sz)
 		c->u.dma_cmd_ext.size[1] = sz >> 8;
 		c->u.dma_cmd_ext.size[2] = sz;
 		cmd_len = offsetof(struct wilc_spi_cmd, u.dma_cmd_ext.crc);
-		if (!spi_priv->crc_off)
+		if (spi_priv->crc7_enabled)
 			c->u.dma_cmd_ext.crc[0] = wilc_get_crc7(wb, cmd_len);
 	} else {
 		dev_err(&spi->dev, "dma read write cmd [%x] not supported\n",
 			cmd);
 		return -EINVAL;
 	}
-	if (!spi_priv->crc_off)
+	if (spi_priv->crc7_enabled)
 		cmd_len += 1;
 
 	resp_len = sizeof(*r);
@@ -648,12 +681,22 @@ static int wilc_spi_dma_rw(struct wilc *wilc, u8 cmd, u32 adr, u8 *b, u32 sz)
 		}
 
 		/*
-		 * Read Crc
+		 * Read CRC
 		 */
-		if (!spi_priv->crc_off && wilc_spi_rx(wilc, crc, 2)) {
-			dev_err(&spi->dev,
-				"Failed block crc read, bus err\n");
-			return -EINVAL;
+		if (spi_priv->crc16_enabled) {
+			if (wilc_spi_rx(wilc, crc, 2)) {
+				dev_err(&spi->dev,
+					"Failed block CRC read, bus err\n");
+				return -EINVAL;
+			}
+			crc_recv = (crc[0] << 8) | crc[1];
+			crc_calc = crc_itu_t(0xffff, &b[ix], nbytes);
+			if (crc_recv != crc_calc) {
+				dev_err(&spi->dev, "%s: bad CRC 0x%04x "
+					"(calculated 0x%04x)\n", __func__,
+					crc_recv, crc_calc);
+				return -EINVAL;
+			}
 		}
 
 		ix += nbytes;
@@ -720,11 +763,13 @@ static int spi_internal_write(struct wilc *wilc, u32 adr, u32 dat)
 static int spi_internal_read(struct wilc *wilc, u32 adr, u32 *data)
 {
 	struct spi_device *spi = to_spi_device(wilc->dev);
+	struct wilc_spi *spi_priv = wilc->bus_data;
 	int result;
 
 	result = wilc_spi_single_read(wilc, CMD_INTERNAL_READ, adr, data, 0);
 	if (result) {
-		dev_err(&spi->dev, "Failed internal read cmd...\n");
+		if (!spi_priv->probing_crc)
+			dev_err(&spi->dev, "Failed internal read cmd...\n");
 		return result;
 	}
 
@@ -861,7 +906,7 @@ static int wilc_spi_init(struct wilc *wilc, bool resume)
 	u32 reg;
 	u32 chipid;
 	static int isinit;
-	int ret;
+	int ret, i;
 
 	if (isinit) {
 		ret = wilc_spi_read_reg(wilc, WILC_CHIPID, &chipid);
@@ -876,49 +921,54 @@ static int wilc_spi_init(struct wilc *wilc, bool resume)
 	 */
 
 	/*
-	 * TODO: We can remove the CRC trials if there is a definite
-	 * way to reset
+	 * Infer the CRC settings that are currently in effect.  This
+	 * is necessary because we can't be sure that the chip has
+	 * been RESET (e.g, after module unload and reload).
 	 */
-	/* the SPI to it's initial value. */
-	ret = spi_internal_read(wilc, WILC_SPI_PROTOCOL_OFFSET, &reg);
-	if (ret) {
-		/*
-		 * Read failed. Try with CRC off. This might happen when module
-		 * is removed but chip isn't reset
-		 */
-		spi_priv->crc_off = 1;
-		dev_err(&spi->dev,
-			"Failed read with CRC on, retrying with CRC off\n");
+	spi_priv->probing_crc = true;
+	spi_priv->crc7_enabled = enable_crc7;
+	spi_priv->crc16_enabled = false; /* don't check CRC16 during probing */
+	for (i = 0; i < 2; ++i) {
 		ret = spi_internal_read(wilc, WILC_SPI_PROTOCOL_OFFSET, &reg);
-		if (ret) {
-			/*
-			 * Read failed with both CRC on and off,
-			 * something went bad
-			 */
-			dev_err(&spi->dev, "Failed internal read protocol\n");
-			return ret;
-		}
+		if (ret == 0)
+			break;
+		spi_priv->crc7_enabled = !enable_crc7;
 	}
-	if (spi_priv->crc_off == 0) {
-		/* disable crc checking: */
-		reg &= ~(PROTOCOL_REG_CRC7_MASK | PROTOCOL_REG_CRC16_MASK);
-
-		/* set the data packet size: */
-		BUILD_BUG_ON(DATA_PKT_LOG_SZ < DATA_PKT_LOG_SZ_MIN
-			     || DATA_PKT_LOG_SZ > DATA_PKT_LOG_SZ_MAX);
-		reg &= ~PROTOCOL_REG_PKT_SZ_MASK;
-		reg |= FIELD_PREP(PROTOCOL_REG_PKT_SZ_MASK,
-				  DATA_PKT_LOG_SZ - DATA_PKT_LOG_SZ_MIN);
-
-		ret = spi_internal_write(wilc, WILC_SPI_PROTOCOL_OFFSET, reg);
-		if (ret) {
-			dev_err(&spi->dev,
-				"[wilc spi %d]: Failed internal write reg\n",
-				__LINE__);
-			return ret;
-		}
-		spi_priv->crc_off = 1;
+	if (ret) {
+		dev_err(&spi->dev, "Failed with CRC7 on and off.\n");
+		return ret;
+	}
+
+	/* set up the desired CRC configuration: */
+	reg &= ~(PROTOCOL_REG_CRC7_MASK | PROTOCOL_REG_CRC16_MASK);
+	if (enable_crc7)
+		reg |= PROTOCOL_REG_CRC7_MASK;
+	if (enable_crc16)
+		reg |= PROTOCOL_REG_CRC16_MASK;
+
+	/* set up the data packet size: */
+	BUILD_BUG_ON(DATA_PKT_LOG_SZ < DATA_PKT_LOG_SZ_MIN
+		     || DATA_PKT_LOG_SZ > DATA_PKT_LOG_SZ_MAX);
+	reg &= ~PROTOCOL_REG_PKT_SZ_MASK;
+	reg |= FIELD_PREP(PROTOCOL_REG_PKT_SZ_MASK,
+			  DATA_PKT_LOG_SZ - DATA_PKT_LOG_SZ_MIN);
+
+	/* establish the new setup: */
+	ret = spi_internal_write(wilc, WILC_SPI_PROTOCOL_OFFSET, reg);
+	if (ret) {
+		dev_err(&spi->dev,
+			"[wilc spi %d]: Failed internal write reg\n",
+			__LINE__);
+		return ret;
 	}
+	/* update our state to match new protocol settings: */
+	spi_priv->crc7_enabled = enable_crc7;
+	spi_priv->crc16_enabled = enable_crc16;
+
+	/* re-read to make sure new settings are in effect: */
+	spi_internal_read(wilc, WILC_SPI_PROTOCOL_OFFSET, &reg);
+
+	spi_priv->probing_crc = false;
 
 	/*
 	 * make sure can read back chip id correctly
-- 
cgit v1.2.3


From a381b78a1598dde34a6e40dae2842024308a6ef2 Mon Sep 17 00:00:00 2001
From: David Mosberger-Tang <davidm@egauge.net>
Date: Wed, 3 Mar 2021 19:50:07 +0000
Subject: wilc1000: Bring MAC address setting in line with typical Linux
 behavior

Linux network drivers normally disallow changing the MAC address when
the interface is up.  This driver has been different in that it allows
to change the MAC address *only* when it's up.  This patch brings
wilc1000 behavior more in line with other network drivers.  We could
have replaced wilc_set_mac_addr() with eth_mac_addr() but that would
break existing documentation on how to change the MAC address.
Likewise, return -EADDRNOTAVAIL (not -EINVAL) when the specified MAC
address is invalid or unavailable.

Signed-off-by: David Mosberger-Tang <davidm@egauge.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210303194846.1823596-1-davidm@egauge.net
---
 drivers/net/wireless/microchip/wilc1000/netdev.c | 25 +++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/microchip/wilc1000/netdev.c b/drivers/net/wireless/microchip/wilc1000/netdev.c
index 9928e91c8ffa..7e4d9235251c 100644
--- a/drivers/net/wireless/microchip/wilc1000/netdev.c
+++ b/drivers/net/wireless/microchip/wilc1000/netdev.c
@@ -571,7 +571,6 @@ static int wilc_mac_open(struct net_device *ndev)
 {
 	struct wilc_vif *vif = netdev_priv(ndev);
 	struct wilc *wl = vif->wilc;
-	unsigned char mac_add[ETH_ALEN] = {0};
 	int ret = 0;
 	struct mgmt_frame_regs mgmt_regs = {};
 
@@ -594,9 +593,12 @@ static int wilc_mac_open(struct net_device *ndev)
 
 	wilc_set_operation_mode(vif, wilc_get_vif_idx(vif), vif->iftype,
 				vif->idx);
-	wilc_get_mac_address(vif, mac_add);
-	netdev_dbg(ndev, "Mac address: %pM\n", mac_add);
-	ether_addr_copy(ndev->dev_addr, mac_add);
+
+	if (is_valid_ether_addr(ndev->dev_addr))
+		wilc_set_mac_address(vif, ndev->dev_addr);
+	else
+		wilc_get_mac_address(vif, ndev->dev_addr);
+	netdev_dbg(ndev, "Mac address: %pM\n", ndev->dev_addr);
 
 	if (!is_valid_ether_addr(ndev->dev_addr)) {
 		netdev_err(ndev, "Wrong MAC address\n");
@@ -635,7 +637,14 @@ static int wilc_set_mac_addr(struct net_device *dev, void *p)
 	int srcu_idx;
 
 	if (!is_valid_ether_addr(addr->sa_data))
-		return -EINVAL;
+		return -EADDRNOTAVAIL;
+
+	if (!vif->mac_opened) {
+		eth_commit_mac_addr_change(dev, p);
+		return 0;
+	}
+
+	/* Verify MAC Address is not already in use: */
 
 	srcu_idx = srcu_read_lock(&wilc->srcu);
 	list_for_each_entry_rcu(tmp_vif, &wilc->vif_list, list) {
@@ -643,7 +652,7 @@ static int wilc_set_mac_addr(struct net_device *dev, void *p)
 		if (ether_addr_equal(addr->sa_data, mac_addr)) {
 			if (vif != tmp_vif) {
 				srcu_read_unlock(&wilc->srcu, srcu_idx);
-				return -EINVAL;
+				return -EADDRNOTAVAIL;
 			}
 			srcu_read_unlock(&wilc->srcu, srcu_idx);
 			return 0;
@@ -655,9 +664,7 @@ static int wilc_set_mac_addr(struct net_device *dev, void *p)
 	if (result)
 		return result;
 
-	ether_addr_copy(vif->bssid, addr->sa_data);
-	ether_addr_copy(vif->ndev->dev_addr, addr->sa_data);
-
+	eth_commit_mac_addr_change(dev, p);
 	return result;
 }
 
-- 
cgit v1.2.3


From bf3365a856a19ac6b96973dbf17069e0e5013e28 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 5 Mar 2021 03:48:50 -0600
Subject: rtl8xxxu: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix
multiple warnings by replacing /* fall through */ comments with
the new pseudo-keyword macro fallthrough; instead of letting the
code fall through to the next case.

Notice that Clang doesn't recognize /* fall through */ comments as
implicit fall-through markings.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210305094850.GA141221@embeddedor
---
 drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
index 5cd7ef3625c5..afc97958fa4d 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
@@ -1145,7 +1145,7 @@ void rtl8xxxu_gen1_config_channel(struct ieee80211_hw *hw)
 	switch (hw->conf.chandef.width) {
 	case NL80211_CHAN_WIDTH_20_NOHT:
 		ht = false;
-		/* fall through */
+		fallthrough;
 	case NL80211_CHAN_WIDTH_20:
 		opmode |= BW_OPMODE_20MHZ;
 		rtl8xxxu_write8(priv, REG_BW_OPMODE, opmode);
@@ -1272,7 +1272,7 @@ void rtl8xxxu_gen2_config_channel(struct ieee80211_hw *hw)
 	switch (hw->conf.chandef.width) {
 	case NL80211_CHAN_WIDTH_20_NOHT:
 		ht = false;
-		/* fall through */
+		fallthrough;
 	case NL80211_CHAN_WIDTH_20:
 		rf_mode_bw |= WMAC_TRXPTCL_CTL_BW_20;
 		subchannel = 0;
@@ -1741,11 +1741,11 @@ static int rtl8xxxu_identify_chip(struct rtl8xxxu_priv *priv)
 		case 3:
 			priv->ep_tx_low_queue = 1;
 			priv->ep_tx_count++;
-			/* fall through */
+			fallthrough;
 		case 2:
 			priv->ep_tx_normal_queue = 1;
 			priv->ep_tx_count++;
-			/* fall through */
+			fallthrough;
 		case 1:
 			priv->ep_tx_high_queue = 1;
 			priv->ep_tx_count++;
-- 
cgit v1.2.3


From c81852a48e137c61e8c85863b5a5104acc1a8270 Mon Sep 17 00:00:00 2001
From: zuoqilin <zuoqilin@yulong.com>
Date: Wed, 17 Mar 2021 14:33:53 +0800
Subject: mwifiex: Remove unneeded variable: "ret"

Remove unneeded variable: "ret"

Signed-off-by: zuoqilin <zuoqilin@yulong.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210317063353.1055-1-zuoqilin1@163.com
---
 drivers/net/wireless/marvell/mwifiex/scan.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/wireless/marvell/mwifiex/scan.c b/drivers/net/wireless/marvell/mwifiex/scan.c
index c2a685f63e95..0b877f3f6b97 100644
--- a/drivers/net/wireless/marvell/mwifiex/scan.c
+++ b/drivers/net/wireless/marvell/mwifiex/scan.c
@@ -1211,7 +1211,6 @@ mwifiex_ret_802_11_scan_get_tlv_ptrs(struct mwifiex_adapter *adapter,
 int mwifiex_update_bss_desc_with_ie(struct mwifiex_adapter *adapter,
 				    struct mwifiex_bssdescriptor *bss_entry)
 {
-	int ret = 0;
 	u8 element_id;
 	struct ieee_types_fh_param_set *fh_param_set;
 	struct ieee_types_ds_param_set *ds_param_set;
@@ -1464,7 +1463,7 @@ int mwifiex_update_bss_desc_with_ie(struct mwifiex_adapter *adapter,
 		bytes_left -= total_ie_len;
 
 	}	/* while (bytes_left > 2) */
-	return ret;
+	return 0;
 }
 
 /*
-- 
cgit v1.2.3


From 2f51061edab942988b1a3c057d21228e938603db Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 19 Mar 2021 17:47:31 +0300
Subject: wilc1000: fix a loop timeout condition

If the loop fails, the "while(trials--) {" loop will exit with "trials"
set to -1.  The test for that expects it to end with "trials" set to 0
so the warning message will not be printed.

Fix this by changing from a post-op to a pre-op.  This does mean that
we only make 99 attempts instead of 100 but that's okay.

Fixes: f135a1571a05 ("wilc1000: Support chip sleep over SPI")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Ajay Singh <ajay.kathat@microchip.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/YFS5gx/gi70zlIaO@mwanda
---
 drivers/net/wireless/microchip/wilc1000/wlan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/microchip/wilc1000/wlan.c b/drivers/net/wireless/microchip/wilc1000/wlan.c
index d4a90c490084..2030fc7f53ca 100644
--- a/drivers/net/wireless/microchip/wilc1000/wlan.c
+++ b/drivers/net/wireless/microchip/wilc1000/wlan.c
@@ -575,7 +575,7 @@ void chip_allow_sleep(struct wilc *wilc)
 		to_host_from_fw_bit = WILC_SPI_FW_TO_HOST_BIT;
 	}
 
-	while (trials--) {
+	while (--trials) {
 		ret = hif_func->hif_read_reg(wilc, to_host_from_fw_reg, &reg);
 		if (ret)
 			return;
-- 
cgit v1.2.3


From 431eb49e87ed8de7728b879e7288d85fb87f83ff Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Mon, 5 Apr 2021 11:09:14 +0200
Subject: rtl8xxxu: Simplify locking of a skb list accesses

The 'c2hcmd_lock' spinlock is only used to protect some __skb_queue_tail()
and __skb_dequeue() calls.
Use the lock provided in the skb itself and call skb_queue_tail() and
skb_dequeue(). These functions already include the correct locking.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/8bcec6429615aeb498482dc7e1955ce09b456585.1617613700.git.christophe.jaillet@wanadoo.fr
---
 drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h      |  1 -
 drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 11 ++---------
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h
index d6d1be4169e5..d1a566cc0c9e 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h
@@ -1391,7 +1391,6 @@ struct rtl8xxxu_priv {
 	struct delayed_work ra_watchdog;
 	struct work_struct c2hcmd_work;
 	struct sk_buff_head c2hcmd_queue;
-	spinlock_t c2hcmd_lock;
 	struct rtl8xxxu_btcoex bt_coex;
 	struct rtl8xxxu_ra_report ra_report;
 };
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
index afc97958fa4d..9ff09cf7eb62 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
@@ -5423,7 +5423,6 @@ static void rtl8xxxu_c2hcmd_callback(struct work_struct *work)
 	struct rtl8xxxu_priv *priv;
 	struct rtl8723bu_c2h *c2h;
 	struct sk_buff *skb = NULL;
-	unsigned long flags;
 	u8 bt_info = 0;
 	struct rtl8xxxu_btcoex *btcoex;
 	struct rtl8xxxu_ra_report *rarpt;
@@ -5439,9 +5438,7 @@ static void rtl8xxxu_c2hcmd_callback(struct work_struct *work)
 		goto out;
 
 	while (!skb_queue_empty(&priv->c2hcmd_queue)) {
-		spin_lock_irqsave(&priv->c2hcmd_lock, flags);
-		skb = __skb_dequeue(&priv->c2hcmd_queue);
-		spin_unlock_irqrestore(&priv->c2hcmd_lock, flags);
+		skb = skb_dequeue(&priv->c2hcmd_queue);
 
 		c2h = (struct rtl8723bu_c2h *)skb->data;
 
@@ -5499,7 +5496,6 @@ static void rtl8723bu_handle_c2h(struct rtl8xxxu_priv *priv,
 	struct rtl8723bu_c2h *c2h = (struct rtl8723bu_c2h *)skb->data;
 	struct device *dev = &priv->udev->dev;
 	int len;
-	unsigned long flags;
 
 	len = skb->len - 2;
 
@@ -5538,9 +5534,7 @@ static void rtl8723bu_handle_c2h(struct rtl8xxxu_priv *priv,
 		break;
 	}
 
-	spin_lock_irqsave(&priv->c2hcmd_lock, flags);
-	__skb_queue_tail(&priv->c2hcmd_queue, skb);
-	spin_unlock_irqrestore(&priv->c2hcmd_lock, flags);
+	skb_queue_tail(&priv->c2hcmd_queue, skb);
 
 	schedule_work(&priv->c2hcmd_work);
 }
@@ -6606,7 +6600,6 @@ static int rtl8xxxu_probe(struct usb_interface *interface,
 	spin_lock_init(&priv->rx_urb_lock);
 	INIT_WORK(&priv->rx_urb_wq, rtl8xxxu_rx_urb_work);
 	INIT_DELAYED_WORK(&priv->ra_watchdog, rtl8xxxu_watchdog_callback);
-	spin_lock_init(&priv->c2hcmd_lock);
 	INIT_WORK(&priv->c2hcmd_work, rtl8xxxu_c2hcmd_callback);
 	skb_queue_head_init(&priv->c2hcmd_queue);
 
-- 
cgit v1.2.3


From 01414f8882f944fe106a52a6d4c2ae8869822195 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 22 Mar 2021 11:43:34 +0100
Subject: libertas: avoid -Wempty-body warning

Building without mesh supports shows a couple of warnings with
'make W=1':

drivers/net/wireless/marvell/libertas/main.c: In function 'lbs_start_card':
drivers/net/wireless/marvell/libertas/main.c:1068:37: error: suggest braces around empty body in an 'if' statement [-Werror=empty-body]
 1068 |                 lbs_start_mesh(priv);

Change the macros to use the usual "do { } while (0)" instead to shut up
the warnings and make the code a litte more robust.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210322104343.948660-4-arnd@kernel.org
---
 drivers/net/wireless/marvell/libertas/mesh.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/marvell/libertas/mesh.h b/drivers/net/wireless/marvell/libertas/mesh.h
index d49717b20c09..44c4cd0230a8 100644
--- a/drivers/net/wireless/marvell/libertas/mesh.h
+++ b/drivers/net/wireless/marvell/libertas/mesh.h
@@ -60,13 +60,13 @@ void lbs_mesh_ethtool_get_strings(struct net_device *dev,
 
 #else
 
-#define lbs_init_mesh(priv)
-#define lbs_deinit_mesh(priv)
-#define lbs_start_mesh(priv)
-#define lbs_add_mesh(priv)
-#define lbs_remove_mesh(priv)
+#define lbs_init_mesh(priv)	do { } while (0)
+#define lbs_deinit_mesh(priv)	do { } while (0)
+#define lbs_start_mesh(priv)	do { } while (0)
+#define lbs_add_mesh(priv)	do { } while (0)
+#define lbs_remove_mesh(priv)	do { } while (0)
 #define lbs_mesh_set_dev(priv, dev, rxpd) (dev)
-#define lbs_mesh_set_txpd(priv, dev, txpd)
+#define lbs_mesh_set_txpd(priv, dev, txpd) do { } while (0)
 #define lbs_mesh_set_channel(priv, channel) (0)
 #define lbs_mesh_activated(priv) (false)
 
-- 
cgit v1.2.3


From 7b0e2c4f6be3ec68bf807c84e985e81c21404cd1 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 23 Mar 2021 13:57:14 +0100
Subject: wlcore: fix overlapping snprintf arguments in debugfs

gcc complains about undefined behavior in calling snprintf()
with the same buffer as input and output:

drivers/net/wireless/ti/wl18xx/debugfs.c: In function 'diversity_num_of_packets_per_ant_read':
drivers/net/wireless/ti/wl18xx/../wlcore/debugfs.h:86:3: error: 'snprintf' argument 4 overlaps destination object 'buf' [-Werror=restrict]
   86 |   snprintf(buf, sizeof(buf), "%s[%d] = %d\n",  \
      |   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   87 |     buf, i, stats->sub.name[i]);   \
      |     ~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/net/wireless/ti/wl18xx/debugfs.c:24:2: note: in expansion of macro 'DEBUGFS_FWSTATS_FILE_ARRAY'
   24 |  DEBUGFS_FWSTATS_FILE_ARRAY(a, b, c, wl18xx_acx_statistics)
      |  ^~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/net/wireless/ti/wl18xx/debugfs.c:159:1: note: in expansion of macro 'WL18XX_DEBUGFS_FWSTATS_FILE_ARRAY'
  159 | WL18XX_DEBUGFS_FWSTATS_FILE_ARRAY(diversity, num_of_packets_per_ant,

There are probably other ways of handling the debugfs file, without
using on-stack buffers, but a simple workaround here is to remember the
current position in the buffer and just keep printing in there.

Fixes: bcca1bbdd412 ("wlcore: add debugfs macro to help print fw statistics arrays")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210323125723.1961432-1-arnd@kernel.org
---
 drivers/net/wireless/ti/wlcore/boot.c    | 13 ++++++++-----
 drivers/net/wireless/ti/wlcore/debugfs.h |  7 ++++---
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/ti/wlcore/boot.c b/drivers/net/wireless/ti/wlcore/boot.c
index e14d88e558f0..85abd0a2d1c9 100644
--- a/drivers/net/wireless/ti/wlcore/boot.c
+++ b/drivers/net/wireless/ti/wlcore/boot.c
@@ -72,6 +72,7 @@ static int wlcore_validate_fw_ver(struct wl1271 *wl)
 	unsigned int *min_ver = (wl->fw_type == WL12XX_FW_TYPE_MULTI) ?
 		wl->min_mr_fw_ver : wl->min_sr_fw_ver;
 	char min_fw_str[32] = "";
+	int off = 0;
 	int i;
 
 	/* the chip must be exactly equal */
@@ -105,13 +106,15 @@ static int wlcore_validate_fw_ver(struct wl1271 *wl)
 	return 0;
 
 fail:
-	for (i = 0; i < NUM_FW_VER; i++)
+	for (i = 0; i < NUM_FW_VER && off < sizeof(min_fw_str); i++)
 		if (min_ver[i] == WLCORE_FW_VER_IGNORE)
-			snprintf(min_fw_str, sizeof(min_fw_str),
-				  "%s*.", min_fw_str);
+			off += snprintf(min_fw_str + off,
+					sizeof(min_fw_str) - off,
+					"*.");
 		else
-			snprintf(min_fw_str, sizeof(min_fw_str),
-				  "%s%u.", min_fw_str, min_ver[i]);
+			off += snprintf(min_fw_str + off,
+					sizeof(min_fw_str) - off,
+					"%u.", min_ver[i]);
 
 	wl1271_error("Your WiFi FW version (%u.%u.%u.%u.%u) is invalid.\n"
 		     "Please use at least FW %s\n"
diff --git a/drivers/net/wireless/ti/wlcore/debugfs.h b/drivers/net/wireless/ti/wlcore/debugfs.h
index b143293e694f..715edfa5f89f 100644
--- a/drivers/net/wireless/ti/wlcore/debugfs.h
+++ b/drivers/net/wireless/ti/wlcore/debugfs.h
@@ -78,13 +78,14 @@ static ssize_t sub## _ ##name## _read(struct file *file,		\
 	struct wl1271 *wl = file->private_data;				\
 	struct struct_type *stats = wl->stats.fw_stats;			\
 	char buf[DEBUGFS_FORMAT_BUFFER_SIZE] = "";			\
+	int pos = 0;							\
 	int i;								\
 									\
 	wl1271_debugfs_update_stats(wl);				\
 									\
-	for (i = 0; i < len; i++)					\
-		snprintf(buf, sizeof(buf), "%s[%d] = %d\n",		\
-			 buf, i, stats->sub.name[i]);			\
+	for (i = 0; i < len && pos < sizeof(buf); i++)			\
+		pos += snprintf(buf + pos, sizeof(buf),			\
+			 "[%d] = %d\n", i, stats->sub.name[i]);		\
 									\
 	return wl1271_format_buffer(userbuf, count, ppos, "%s", buf);	\
 }									\
-- 
cgit v1.2.3


From 7909a590eba6d021f104958857cbc4f0089daceb Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 23 Mar 2021 14:16:28 +0100
Subject: airo: work around stack usage warning

gcc-11 with KASAN on 32-bit arm produces a warning about a function
that needs a lot of stack space:

drivers/net/wireless/cisco/airo.c: In function 'setup_card.constprop':
drivers/net/wireless/cisco/airo.c:3960:1: error: the frame size of 1512 bytes is larger than 1400 bytes [-Werror=frame-larger-than=]

Most of this is from a single large structure that could be dynamically
allocated or moved into the per-device structure.  However, as the callers
all seem to have a fairly well bounded call chain, the easiest change
is to pull out the part of the function that needs the large variables
into a separate function and mark that as noinline_for_stack. This does
not reduce the total stack usage, but it gets rid of the warning and
requires minimal changes otherwise.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210323131634.2669455-1-arnd@kernel.org
---
 drivers/net/wireless/cisco/airo.c | 117 +++++++++++++++++++++-----------------
 1 file changed, 65 insertions(+), 52 deletions(-)

diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c
index 60db38c38960..fd37d4d2983b 100644
--- a/drivers/net/wireless/cisco/airo.c
+++ b/drivers/net/wireless/cisco/airo.c
@@ -3817,6 +3817,68 @@ static inline void set_auth_type(struct airo_info *local, int auth_type)
 		local->last_auth = auth_type;
 }
 
+static int noinline_for_stack airo_readconfig(struct airo_info *ai, u8 *mac, int lock)
+{
+	int i, status;
+	/* large variables, so don't inline this function,
+	 * maybe change to kmalloc
+	 */
+	tdsRssiRid rssi_rid;
+	CapabilityRid cap_rid;
+
+	kfree(ai->SSID);
+	ai->SSID = NULL;
+	// general configuration (read/modify/write)
+	status = readConfigRid(ai, lock);
+	if (status != SUCCESS) return ERROR;
+
+	status = readCapabilityRid(ai, &cap_rid, lock);
+	if (status != SUCCESS) return ERROR;
+
+	status = PC4500_readrid(ai, RID_RSSI, &rssi_rid, sizeof(rssi_rid), lock);
+	if (status == SUCCESS) {
+		if (ai->rssi || (ai->rssi = kmalloc(512, GFP_KERNEL)) != NULL)
+			memcpy(ai->rssi, (u8*)&rssi_rid + 2, 512); /* Skip RID length member */
+	}
+	else {
+		kfree(ai->rssi);
+		ai->rssi = NULL;
+		if (cap_rid.softCap & cpu_to_le16(8))
+			ai->config.rmode |= RXMODE_NORMALIZED_RSSI;
+		else
+			airo_print_warn(ai->dev->name, "unknown received signal "
+					"level scale");
+	}
+	ai->config.opmode = adhoc ? MODE_STA_IBSS : MODE_STA_ESS;
+	set_auth_type(ai, AUTH_OPEN);
+	ai->config.modulation = MOD_CCK;
+
+	if (le16_to_cpu(cap_rid.len) >= sizeof(cap_rid) &&
+	    (cap_rid.extSoftCap & cpu_to_le16(1)) &&
+	    micsetup(ai) == SUCCESS) {
+		ai->config.opmode |= MODE_MIC;
+		set_bit(FLAG_MIC_CAPABLE, &ai->flags);
+	}
+
+	/* Save off the MAC */
+	for (i = 0; i < ETH_ALEN; i++) {
+		mac[i] = ai->config.macAddr[i];
+	}
+
+	/* Check to see if there are any insmod configured
+	   rates to add */
+	if (rates[0]) {
+		memset(ai->config.rates, 0, sizeof(ai->config.rates));
+		for (i = 0; i < 8 && rates[i]; i++) {
+			ai->config.rates[i] = rates[i];
+		}
+	}
+	set_bit (FLAG_COMMIT, &ai->flags);
+
+	return SUCCESS;
+}
+
+
 static u16 setup_card(struct airo_info *ai, u8 *mac, int lock)
 {
 	Cmd cmd;
@@ -3863,58 +3925,9 @@ static u16 setup_card(struct airo_info *ai, u8 *mac, int lock)
 	if (lock)
 		up(&ai->sem);
 	if (ai->config.len == 0) {
-		int i;
-		tdsRssiRid rssi_rid;
-		CapabilityRid cap_rid;
-
-		kfree(ai->SSID);
-		ai->SSID = NULL;
-		// general configuration (read/modify/write)
-		status = readConfigRid(ai, lock);
-		if (status != SUCCESS) return ERROR;
-
-		status = readCapabilityRid(ai, &cap_rid, lock);
-		if (status != SUCCESS) return ERROR;
-
-		status = PC4500_readrid(ai, RID_RSSI,&rssi_rid, sizeof(rssi_rid), lock);
-		if (status == SUCCESS) {
-			if (ai->rssi || (ai->rssi = kmalloc(512, GFP_KERNEL)) != NULL)
-				memcpy(ai->rssi, (u8*)&rssi_rid + 2, 512); /* Skip RID length member */
-		}
-		else {
-			kfree(ai->rssi);
-			ai->rssi = NULL;
-			if (cap_rid.softCap & cpu_to_le16(8))
-				ai->config.rmode |= RXMODE_NORMALIZED_RSSI;
-			else
-				airo_print_warn(ai->dev->name, "unknown received signal "
-						"level scale");
-		}
-		ai->config.opmode = adhoc ? MODE_STA_IBSS : MODE_STA_ESS;
-		set_auth_type(ai, AUTH_OPEN);
-		ai->config.modulation = MOD_CCK;
-
-		if (le16_to_cpu(cap_rid.len) >= sizeof(cap_rid) &&
-		    (cap_rid.extSoftCap & cpu_to_le16(1)) &&
-		    micsetup(ai) == SUCCESS) {
-			ai->config.opmode |= MODE_MIC;
-			set_bit(FLAG_MIC_CAPABLE, &ai->flags);
-		}
-
-		/* Save off the MAC */
-		for (i = 0; i < ETH_ALEN; i++) {
-			mac[i] = ai->config.macAddr[i];
-		}
-
-		/* Check to see if there are any insmod configured
-		   rates to add */
-		if (rates[0]) {
-			memset(ai->config.rates, 0, sizeof(ai->config.rates));
-			for (i = 0; i < 8 && rates[i]; i++) {
-				ai->config.rates[i] = rates[i];
-			}
-		}
-		set_bit (FLAG_COMMIT, &ai->flags);
+		status = airo_readconfig(ai, mac, lock);
+		if (status != SUCCESS)
+			return ERROR;
 	}
 
 	/* Setup the SSIDs if present */
-- 
cgit v1.2.3


From 11ac4e668a449af328379f9fb2e1712357d2eca5 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 15 Apr 2021 20:59:47 +0200
Subject: r8169: keep pause settings on interface down/up cycle

Currently, if the user changes the pause settings, the default settings
will be restored after an interface down/up cycle, and also when
resuming from suspend. This doesn't seem to provide the best user
experience. Change this to keep user settings, and just ensure that in
jumbo mode pause is disabled.
Small drawback: When switching back mtu from jumbo to non-jumbo then
pause remains disabled (but user can enable it using ethtool).
I think that's a not too common scenario and acceptable.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 3d5460133f7c..3e86fbe21431 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -2388,11 +2388,13 @@ static void rtl_jumbo_config(struct rtl8169_private *tp)
 		pcie_set_readrq(tp->pci_dev, readrq);
 
 	/* Chip doesn't support pause in jumbo mode */
-	linkmode_mod_bit(ETHTOOL_LINK_MODE_Pause_BIT,
-			 tp->phydev->advertising, !jumbo);
-	linkmode_mod_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
-			 tp->phydev->advertising, !jumbo);
-	phy_start_aneg(tp->phydev);
+	if (jumbo) {
+		linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT,
+				   tp->phydev->advertising);
+		linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
+				   tp->phydev->advertising);
+		phy_start_aneg(tp->phydev);
+	}
 }
 
 DECLARE_RTL_COND(rtl_chipcmd_cond)
@@ -5121,6 +5123,8 @@ static int r8169_mdio_register(struct rtl8169_private *tp)
 
 	tp->phydev->mac_managed_pm = 1;
 
+	phy_support_asym_pause(tp->phydev);
+
 	/* PHY will be woken up in rtl_open() */
 	phy_suspend(tp->phydev);
 
-- 
cgit v1.2.3


From 3051946056c3b634605b275a223e48bcb796f49b Mon Sep 17 00:00:00 2001
From: Aditya Srivastava <yashsri421@gmail.com>
Date: Mon, 15 Mar 2021 23:02:59 +0530
Subject: rsi: fix comment syntax in file headers

The opening comment mark '/**' is used for highlighting the beginning of
kernel-doc comments.
There are some files in drivers/net/wireless/rsi which follow this syntax
in their file headers, i.e. start with '/**' like comments, which causes
unexpected warnings from kernel-doc.

E.g., running scripts/kernel-doc -none on drivers/net/wireless/rsi/rsi_coex.h
causes this warning:
"warning: wrong kernel-doc identifier on line:
 * Copyright (c) 2018 Redpine Signals Inc."

Similarly for other files too.

Provide a simple fix by replacing such occurrences with general comment
format, i.e., "/*", to prevent kernel-doc from parsing it.

Signed-off-by: Aditya Srivastava <yashsri421@gmail.com>
Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210315173259.8757-1-yashsri421@gmail.com
---
 drivers/net/wireless/rsi/rsi_boot_params.h | 2 +-
 drivers/net/wireless/rsi/rsi_coex.h        | 2 +-
 drivers/net/wireless/rsi/rsi_common.h      | 2 +-
 drivers/net/wireless/rsi/rsi_debugfs.h     | 2 +-
 drivers/net/wireless/rsi/rsi_hal.h         | 2 +-
 drivers/net/wireless/rsi/rsi_main.h        | 2 +-
 drivers/net/wireless/rsi/rsi_mgmt.h        | 2 +-
 drivers/net/wireless/rsi/rsi_ps.h          | 2 +-
 drivers/net/wireless/rsi/rsi_sdio.h        | 2 +-
 drivers/net/wireless/rsi/rsi_usb.h         | 2 +-
 10 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/rsi/rsi_boot_params.h b/drivers/net/wireless/rsi/rsi_boot_params.h
index c1cf19d1e376..30e03aa6a529 100644
--- a/drivers/net/wireless/rsi/rsi_boot_params.h
+++ b/drivers/net/wireless/rsi/rsi_boot_params.h
@@ -1,4 +1,4 @@
-/**
+/*
  * Copyright (c) 2014 Redpine Signals Inc.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
diff --git a/drivers/net/wireless/rsi/rsi_coex.h b/drivers/net/wireless/rsi/rsi_coex.h
index 0fdc67f37a56..2c14e4c651b9 100644
--- a/drivers/net/wireless/rsi/rsi_coex.h
+++ b/drivers/net/wireless/rsi/rsi_coex.h
@@ -1,4 +1,4 @@
-/**
+/*
  * Copyright (c) 2018 Redpine Signals Inc.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
diff --git a/drivers/net/wireless/rsi/rsi_common.h b/drivers/net/wireless/rsi/rsi_common.h
index 60f1f286b030..7aa5124575cf 100644
--- a/drivers/net/wireless/rsi/rsi_common.h
+++ b/drivers/net/wireless/rsi/rsi_common.h
@@ -1,4 +1,4 @@
-/**
+/*
  * Copyright (c) 2014 Redpine Signals Inc.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
diff --git a/drivers/net/wireless/rsi/rsi_debugfs.h b/drivers/net/wireless/rsi/rsi_debugfs.h
index 580ad3b3f710..a6a28640ad40 100644
--- a/drivers/net/wireless/rsi/rsi_debugfs.h
+++ b/drivers/net/wireless/rsi/rsi_debugfs.h
@@ -1,4 +1,4 @@
-/**
+/*
  * Copyright (c) 2014 Redpine Signals Inc.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
diff --git a/drivers/net/wireless/rsi/rsi_hal.h b/drivers/net/wireless/rsi/rsi_hal.h
index 46e36df9e8e3..d044a440fa08 100644
--- a/drivers/net/wireless/rsi/rsi_hal.h
+++ b/drivers/net/wireless/rsi/rsi_hal.h
@@ -1,4 +1,4 @@
-/**
+/*
  * Copyright (c) 2017 Redpine Signals Inc.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
diff --git a/drivers/net/wireless/rsi/rsi_main.h b/drivers/net/wireless/rsi/rsi_main.h
index 73a19e43106b..a1065e5a92b4 100644
--- a/drivers/net/wireless/rsi/rsi_main.h
+++ b/drivers/net/wireless/rsi/rsi_main.h
@@ -1,4 +1,4 @@
-/**
+/*
  * Copyright (c) 2014 Redpine Signals Inc.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
diff --git a/drivers/net/wireless/rsi/rsi_mgmt.h b/drivers/net/wireless/rsi/rsi_mgmt.h
index 2ce2dcf57441..236b21482f38 100644
--- a/drivers/net/wireless/rsi/rsi_mgmt.h
+++ b/drivers/net/wireless/rsi/rsi_mgmt.h
@@ -1,4 +1,4 @@
-/**
+/*
  * Copyright (c) 2014 Redpine Signals Inc.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
diff --git a/drivers/net/wireless/rsi/rsi_ps.h b/drivers/net/wireless/rsi/rsi_ps.h
index 98ff6a4ced57..0be2f1e201e5 100644
--- a/drivers/net/wireless/rsi/rsi_ps.h
+++ b/drivers/net/wireless/rsi/rsi_ps.h
@@ -1,4 +1,4 @@
-/**
+/*
  * Copyright (c) 2017 Redpine Signals Inc.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
diff --git a/drivers/net/wireless/rsi/rsi_sdio.h b/drivers/net/wireless/rsi/rsi_sdio.h
index 1c756263cf15..7c91b126b350 100644
--- a/drivers/net/wireless/rsi/rsi_sdio.h
+++ b/drivers/net/wireless/rsi/rsi_sdio.h
@@ -1,4 +1,4 @@
-/**
+/*
  * @section LICENSE
  * Copyright (c) 2014 Redpine Signals Inc.
  *
diff --git a/drivers/net/wireless/rsi/rsi_usb.h b/drivers/net/wireless/rsi/rsi_usb.h
index 8702f434b569..254d19b66412 100644
--- a/drivers/net/wireless/rsi/rsi_usb.h
+++ b/drivers/net/wireless/rsi/rsi_usb.h
@@ -1,4 +1,4 @@
-/**
+/*
  * @section LICENSE
  * Copyright (c) 2014 Redpine Signals Inc.
  *
-- 
cgit v1.2.3


From 705b5cfab183c78618c4757262ef741a8d2db7e9 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Tue, 23 Mar 2021 10:06:57 +0530
Subject: brcmfmac: A typo fix

s/revsion/revision/

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210323043657.1466296-1-unixbhaskar@gmail.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.h
index ee273e3bb101..e000ef78928c 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.h
@@ -28,7 +28,7 @@ struct brcmf_usbdev {
 	int ntxq, nrxq, rxsize;
 	u32 bus_mtu;
 	int devid;
-	int chiprev; /* chip revsion number */
+	int chiprev; /* chip revision number */
 };
 
 /* IO Request Block (IRB) */
-- 
cgit v1.2.3


From d3240418a6623f0f308d013d621928f4ddf2b8d8 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Thu, 25 Mar 2021 14:41:51 +0800
Subject: libertas: struct lbs_private is declared duplicately

struct lbs_private has been declared at 22nd line.
Remove the duplicate.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210325064154.854245-1-wanjiabing@vivo.com
---
 drivers/net/wireless/marvell/libertas/decl.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/wireless/marvell/libertas/decl.h b/drivers/net/wireless/marvell/libertas/decl.h
index 5d1e30e0c5db..c1e0388ef01d 100644
--- a/drivers/net/wireless/marvell/libertas/decl.h
+++ b/drivers/net/wireless/marvell/libertas/decl.h
@@ -23,7 +23,6 @@ struct lbs_private;
 typedef void (*lbs_fw_cb)(struct lbs_private *priv, int ret,
 		const struct firmware *helper, const struct firmware *mainfw);
 
-struct lbs_private;
 struct sk_buff;
 struct net_device;
 struct cmd_ds_command;
-- 
cgit v1.2.3


From ec7480ed0801dbecd431ae5b7e5c1debcf173b63 Mon Sep 17 00:00:00 2001
From: Po-Hao Huang <phhuang@realtek.com>
Date: Fri, 26 Mar 2021 17:21:47 +0800
Subject: rtw88: update statistics to fw for fine-tuning performance

Since firmware can't have proper statistics, driver update the statistics
periodically to firmware to assist in tuning performance.

Signed-off-by: Po-Hao Huang <phhuang@realtek.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210326092147.30252-1-pkshih@realtek.com
---
 drivers/net/wireless/realtek/rtw88/fw.c  | 15 +++++++++++++++
 drivers/net/wireless/realtek/rtw88/fw.h  | 13 +++++++++++++
 drivers/net/wireless/realtek/rtw88/phy.c |  1 +
 3 files changed, 29 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c
index 6649b84f6b1e..c46b34fbe8bf 100644
--- a/drivers/net/wireless/realtek/rtw88/fw.c
+++ b/drivers/net/wireless/realtek/rtw88/fw.c
@@ -500,6 +500,21 @@ void rtw_fw_media_status_report(struct rtw_dev *rtwdev, u8 mac_id, bool connect)
 	rtw_fw_send_h2c_command(rtwdev, h2c_pkt);
 }
 
+void rtw_fw_update_wl_phy_info(struct rtw_dev *rtwdev)
+{
+	struct rtw_traffic_stats *stats = &rtwdev->stats;
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+	u8 h2c_pkt[H2C_PKT_SIZE] = {0};
+
+	SET_H2C_CMD_ID_CLASS(h2c_pkt, H2C_CMD_WL_PHY_INFO);
+	SET_WL_PHY_INFO_TX_TP(h2c_pkt, stats->tx_throughput);
+	SET_WL_PHY_INFO_RX_TP(h2c_pkt, stats->rx_throughput);
+	SET_WL_PHY_INFO_TX_RATE_DESC(h2c_pkt, dm_info->tx_rate);
+	SET_WL_PHY_INFO_RX_RATE_DESC(h2c_pkt, dm_info->curr_rx_rate);
+	SET_WL_PHY_INFO_RX_EVM(h2c_pkt, dm_info->rx_evm_dbm[RF_PATH_A]);
+	rtw_fw_send_h2c_command(rtwdev, h2c_pkt);
+}
+
 void rtw_fw_set_pwr_mode(struct rtw_dev *rtwdev)
 {
 	struct rtw_lps_conf *conf = &rtwdev->lps_conf;
diff --git a/drivers/net/wireless/realtek/rtw88/fw.h b/drivers/net/wireless/realtek/rtw88/fw.h
index 39c905c1b1d8..5c89a54475dd 100644
--- a/drivers/net/wireless/realtek/rtw88/fw.h
+++ b/drivers/net/wireless/realtek/rtw88/fw.h
@@ -345,6 +345,7 @@ static inline void rtw_h2c_pkt_set_header(u8 *h2c_pkt, u8 sub_id)
 #define H2C_CMD_LPS_PG_INFO		0x2b
 #define H2C_CMD_RA_INFO			0x40
 #define H2C_CMD_RSSI_MONITOR		0x42
+#define H2C_CMD_WL_PHY_INFO		0x58
 
 #define H2C_CMD_COEX_TDMA_TYPE		0x60
 #define H2C_CMD_QUERY_BT_INFO		0x61
@@ -369,6 +370,17 @@ static inline void rtw_h2c_pkt_set_header(u8 *h2c_pkt, u8 sub_id)
 #define MEDIA_STATUS_RPT_SET_MACID(h2c_pkt, value)                             \
 	le32p_replace_bits((__le32 *)(h2c_pkt) + 0x00, value, GENMASK(23, 16))
 
+#define SET_WL_PHY_INFO_TX_TP(h2c_pkt, value)				       \
+	le32p_replace_bits((__le32 *)(h2c_pkt) + 0x00, value, GENMASK(17, 8))
+#define SET_WL_PHY_INFO_RX_TP(h2c_pkt, value)				       \
+	le32p_replace_bits((__le32 *)(h2c_pkt) + 0x00, value, GENMASK(27, 18))
+#define SET_WL_PHY_INFO_TX_RATE_DESC(h2c_pkt, value)			       \
+	le32p_replace_bits((__le32 *)(h2c_pkt) + 0x01, value, GENMASK(7, 0))
+#define SET_WL_PHY_INFO_RX_RATE_DESC(h2c_pkt, value)			       \
+	le32p_replace_bits((__le32 *)(h2c_pkt) + 0x01, value, GENMASK(15, 8))
+#define SET_WL_PHY_INFO_RX_EVM(h2c_pkt, value)				       \
+	le32p_replace_bits((__le32 *)(h2c_pkt) + 0x01, value, GENMASK(23, 16))
+
 #define SET_PWR_MODE_SET_MODE(h2c_pkt, value)                                  \
 	le32p_replace_bits((__le32 *)(h2c_pkt) + 0x00, value, GENMASK(14, 8))
 #define SET_PWR_MODE_SET_RLBM(h2c_pkt, value)                                  \
@@ -559,6 +571,7 @@ void rtw_fw_bt_wifi_control(struct rtw_dev *rtwdev, u8 op_code, u8 *data);
 void rtw_fw_send_rssi_info(struct rtw_dev *rtwdev, struct rtw_sta_info *si);
 void rtw_fw_send_ra_info(struct rtw_dev *rtwdev, struct rtw_sta_info *si);
 void rtw_fw_media_status_report(struct rtw_dev *rtwdev, u8 mac_id, bool conn);
+void rtw_fw_update_wl_phy_info(struct rtw_dev *rtwdev);
 int rtw_fw_write_data_rsvd_page(struct rtw_dev *rtwdev, u16 pg_addr,
 				u8 *buf, u32 size);
 void rtw_remove_rsvd_page(struct rtw_dev *rtwdev,
diff --git a/drivers/net/wireless/realtek/rtw88/phy.c b/drivers/net/wireless/realtek/rtw88/phy.c
index b3c0a38771a7..ae401a7bb91a 100644
--- a/drivers/net/wireless/realtek/rtw88/phy.c
+++ b/drivers/net/wireless/realtek/rtw88/phy.c
@@ -618,6 +618,7 @@ static void rtw_phy_pwr_track(struct rtw_dev *rtwdev)
 
 static void rtw_phy_ra_track(struct rtw_dev *rtwdev)
 {
+	rtw_fw_update_wl_phy_info(rtwdev);
 	rtw_phy_ra_info_update(rtwdev);
 	rtw_phy_rrsr_update(rtwdev);
 }
-- 
cgit v1.2.3


From c434e5e48dc4e626364491455f97e2db0aa137b1 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Sun, 28 Mar 2021 00:59:32 +0100
Subject: rsi: Use resume_noirq for SDIO

The rsi_resume() does access the bus to enable interrupts on the RSI
SDIO WiFi card, however when calling sdio_claim_host() in the resume
path, it is possible the bus is already claimed and sdio_claim_host()
spins indefinitelly. Enable the SDIO card interrupts in resume_noirq
instead to prevent anything else from claiming the SDIO bus first.

Fixes: 20db07332736 ("rsi: sdio suspend and resume support")
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Amitkumar Karwar <amit.karwar@redpinesignals.com>
Cc: Angus Ainslie <angus@akkea.ca>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: Karun Eagalapati <karun256@gmail.com>
Cc: Martin Kepplinger <martink@posteo.de>
Cc: Sebastian Krzyszkowiak <sebastian.krzyszkowiak@puri.sm>
Cc: Siva Rebbagondla <siva8118@gmail.com>
Cc: netdev@vger.kernel.org
Cc: stable@vger.kernel.org
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210327235932.175896-1-marex@denx.de
---
 drivers/net/wireless/rsi/rsi_91x_sdio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c
index fe0287b22a25..e0c502bc4270 100644
--- a/drivers/net/wireless/rsi/rsi_91x_sdio.c
+++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c
@@ -1513,7 +1513,7 @@ static int rsi_restore(struct device *dev)
 }
 static const struct dev_pm_ops rsi_pm_ops = {
 	.suspend = rsi_suspend,
-	.resume = rsi_resume,
+	.resume_noirq = rsi_resume,
 	.freeze = rsi_freeze,
 	.thaw = rsi_thaw,
 	.restore = rsi_restore,
-- 
cgit v1.2.3


From 7f50ddc5d4fe2384fab1f185f0c548dd9da48328 Mon Sep 17 00:00:00 2001
From: Eric Lin <dslin1010@gmail.com>
Date: Wed, 31 Mar 2021 09:04:18 +0800
Subject: wl3501: fix typo of 'Networks' in comment

Signed-off-by: Eric Lin <dslin1010@gmail.com>
Reported-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210331010418.1632816-2-dslin1010@gmail.com
---
 drivers/net/wireless/wl3501.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/wl3501.h b/drivers/net/wireless/wl3501.h
index e98e04ee9a2c..5779ffbe5d0f 100644
--- a/drivers/net/wireless/wl3501.h
+++ b/drivers/net/wireless/wl3501.h
@@ -240,7 +240,7 @@ struct iw_mgmt_essid_pset {
 } __packed;
 
 /*
- * According to 802.11 Wireless Netowors, the definitive guide - O'Reilly
+ * According to 802.11 Wireless Networks, the definitive guide - O'Reilly
  * Pg 75
  */ 
 #define IW_DATA_RATE_MAX_LABELS 8
-- 
cgit v1.2.3


From d663bc3317c9faf04c555331135a083b7648e939 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Wed, 31 Mar 2021 10:35:50 +0800
Subject: brcmfmac: Remove duplicate struct declaration

struct brcmf_bus is declared twice. One has been declared
at 37th line. Remove the duplicate.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210331023557.2804128-2-wanjiabing@vivo.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.h
index 4146faeed344..44ba6f389fa9 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.h
@@ -112,7 +112,6 @@ do {									\
 
 extern int brcmf_msg_level;
 
-struct brcmf_bus;
 struct brcmf_pub;
 #ifdef DEBUG
 struct dentry *brcmf_debugfs_get_devdir(struct brcmf_pub *drvr);
-- 
cgit v1.2.3


From 444a9af68b5cc2ca33d073605f747e0f00d6e7b9 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Wed, 31 Mar 2021 10:35:51 +0800
Subject: wilc1000: Remove duplicate struct declaration

struct wilc is declared twice. One has been declared
at 352nd line. Remove the duplicate.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210331023557.2804128-3-wanjiabing@vivo.com
---
 drivers/net/wireless/microchip/wilc1000/wlan.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/wireless/microchip/wilc1000/wlan.h b/drivers/net/wireless/microchip/wilc1000/wlan.h
index 6479acc12b95..771c25fa849b 100644
--- a/drivers/net/wireless/microchip/wilc1000/wlan.h
+++ b/drivers/net/wireless/microchip/wilc1000/wlan.h
@@ -398,7 +398,6 @@ struct wilc_cfg_rsp {
 	u8 seq_no;
 };
 
-struct wilc;
 struct wilc_vif;
 
 int wilc_wlan_firmware_download(struct wilc *wilc, const u8 *buffer,
-- 
cgit v1.2.3


From 2ff25985ea9ccc6c9af2c77b0b49045adcc62e0e Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Thu, 1 Apr 2021 14:27:17 -0500
Subject: rtw88: Fix array overrun in rtw_get_tx_power_params()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Using a kernel with the Undefined Behaviour Sanity Checker (UBSAN) enabled, the
following array overrun is logged:

================================================================================
UBSAN: array-index-out-of-bounds in /home/finger/wireless-drivers-next/drivers/net/wireless/realtek/rtw88/phy.c:1789:34
index 5 is out of range for type 'u8 [5]'
CPU: 2 PID: 84 Comm: kworker/u16:3 Tainted: G           O      5.12.0-rc5-00086-gd88bba47038e-dirty #651
Hardware name: TOSHIBA TECRA A50-A/TECRA A50-A, BIOS Version 4.50   09/29/2014
Workqueue: phy0 ieee80211_scan_work [mac80211]
Call Trace:
 dump_stack+0x64/0x7c
 ubsan_epilogue+0x5/0x40
 __ubsan_handle_out_of_bounds.cold+0x43/0x48
 rtw_get_tx_power_params+0x83a/drivers/net/wireless/realtek/rtw88/0xad0 [rtw_core]
 ? rtw_pci_read16+0x20/0x20 [rtw_pci]
 ? check_hw_ready+0x50/0x90 [rtw_core]
 rtw_phy_get_tx_power_index+0x4d/0xd0 [rtw_core]
 rtw_phy_set_tx_power_level+0xee/0x1b0 [rtw_core]
 rtw_set_channel+0xab/0x110 [rtw_core]
 rtw_ops_config+0x87/0xc0 [rtw_core]
 ieee80211_hw_config+0x9d/0x130 [mac80211]
 ieee80211_scan_state_set_channel+0x81/0x170 [mac80211]
 ieee80211_scan_work+0x19f/0x2a0 [mac80211]
 process_one_work+0x1dd/0x3a0
 worker_thread+0x49/0x330
 ? rescuer_thread+0x3a0/0x3a0
 kthread+0x134/0x150
 ? kthread_create_worker_on_cpu+0x70/0x70
 ret_from_fork+0x22/0x30
================================================================================

The statement where an array is being overrun is shown in the following snippet:

	if (rate <= DESC_RATE11M)
		tx_power = pwr_idx_2g->cck_base[group];
	else
====>		tx_power = pwr_idx_2g->bw40_base[group];

The associated arrays are defined in main.h as follows:

struct rtw_2g_txpwr_idx {
	u8 cck_base[6];
	u8 bw40_base[5];
	struct rtw_2g_1s_pwr_idx_diff ht_1s_diff;
	struct rtw_2g_ns_pwr_idx_diff ht_2s_diff;
	struct rtw_2g_ns_pwr_idx_diff ht_3s_diff;
	struct rtw_2g_ns_pwr_idx_diff ht_4s_diff;
};

The problem arises because the value of group is 5 for channel 14. The trivial
increase in the dimension of bw40_base fails as this struct must match the layout of
efuse. The fix is to add the rate as an argument to rtw_get_channel_group() and set
the group for channel 14 to 4 if rate <= DESC_RATE11M.

This patch fixes commit fa6dfe6bff24 ("rtw88: resolve order of tx power setting routines")

Fixes: fa6dfe6bff24 ("rtw88: resolve order of tx power setting routines")
Reported-by: Богдан Пилипенко <bogdan.pylypenko107@gmail.com>
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Cc: Stable <stable@vger.kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210401192717.28927-1-Larry.Finger@lwfinger.net
---
 drivers/net/wireless/realtek/rtw88/phy.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/phy.c b/drivers/net/wireless/realtek/rtw88/phy.c
index ae401a7bb91a..fdd5da0bbfd8 100644
--- a/drivers/net/wireless/realtek/rtw88/phy.c
+++ b/drivers/net/wireless/realtek/rtw88/phy.c
@@ -1586,7 +1586,7 @@ void rtw_phy_load_tables(struct rtw_dev *rtwdev)
 }
 EXPORT_SYMBOL(rtw_phy_load_tables);
 
-static u8 rtw_get_channel_group(u8 channel)
+static u8 rtw_get_channel_group(u8 channel, u8 rate)
 {
 	switch (channel) {
 	default:
@@ -1630,6 +1630,7 @@ static u8 rtw_get_channel_group(u8 channel)
 	case 106:
 		return 4;
 	case 14:
+		return rate <= DESC_RATE11M ? 5 : 4;
 	case 108:
 	case 110:
 	case 112:
@@ -1881,7 +1882,7 @@ void rtw_get_tx_power_params(struct rtw_dev *rtwdev, u8 path, u8 rate, u8 bw,
 	s8 *remnant = &pwr_param->pwr_remnant;
 
 	pwr_idx = &rtwdev->efuse.txpwr_idx_table[path];
-	group = rtw_get_channel_group(ch);
+	group = rtw_get_channel_group(ch, rate);
 
 	/* base power index for 2.4G/5G */
 	if (IS_CH_2G_BAND(ch)) {
-- 
cgit v1.2.3


From a8e083ee8e2a6c94c29733835adae8bf5b832748 Mon Sep 17 00:00:00 2001
From: Lv Yunlong <lyl2019@mail.ustc.edu.cn>
Date: Fri, 2 Apr 2021 11:26:27 -0700
Subject: mwl8k: Fix a double Free in mwl8k_probe_hw

In mwl8k_probe_hw, hw->priv->txq is freed at the first time by
dma_free_coherent() in the call chain:
if(!priv->ap_fw)->mwl8k_init_txqs(hw)->mwl8k_txq_init(hw, i).

Then in err_free_queues of mwl8k_probe_hw, hw->priv->txq is freed
at the second time by mwl8k_txq_deinit(hw, i)->dma_free_coherent().

My patch set txq->txd to NULL after the first free to avoid the
double free.

Fixes: a66098daacee2 ("mwl8k: Marvell TOPDOG wireless driver")
Signed-off-by: Lv Yunlong <lyl2019@mail.ustc.edu.cn>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210402182627.4256-1-lyl2019@mail.ustc.edu.cn
---
 drivers/net/wireless/marvell/mwl8k.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/marvell/mwl8k.c b/drivers/net/wireless/marvell/mwl8k.c
index c9f8c056aa51..84b32a5f01ee 100644
--- a/drivers/net/wireless/marvell/mwl8k.c
+++ b/drivers/net/wireless/marvell/mwl8k.c
@@ -1473,6 +1473,7 @@ static int mwl8k_txq_init(struct ieee80211_hw *hw, int index)
 	if (txq->skb == NULL) {
 		dma_free_coherent(&priv->pdev->dev, size, txq->txd,
 				  txq->txd_dma);
+		txq->txd = NULL;
 		return -ENOMEM;
 	}
 
-- 
cgit v1.2.3


From cf366b15470484f41a69fd634e4fc638bcce81ae Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Fri, 2 Apr 2021 23:31:00 +0200
Subject: carl9170: remove get_tid_h

'get_tid_h()' is the same as 'ieee80211_get_tid()'.
So this function can be removed to save a few lines of code.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Christian Lamparter <chunkeey@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/68efad7a597159e22771d37fc8b4a8a613866d60.1617399010.git.christophe.jaillet@wanadoo.fr
---
 drivers/net/wireless/ath/carl9170/carl9170.h | 7 +------
 drivers/net/wireless/ath/carl9170/tx.c       | 2 +-
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/ath/carl9170/carl9170.h b/drivers/net/wireless/ath/carl9170/carl9170.h
index 0d38100d6e4f..84a8ce0784b1 100644
--- a/drivers/net/wireless/ath/carl9170/carl9170.h
+++ b/drivers/net/wireless/ath/carl9170/carl9170.h
@@ -631,14 +631,9 @@ static inline u16 carl9170_get_seq(struct sk_buff *skb)
 	return get_seq_h(carl9170_get_hdr(skb));
 }
 
-static inline u16 get_tid_h(struct ieee80211_hdr *hdr)
-{
-	return (ieee80211_get_qos_ctl(hdr))[0] & IEEE80211_QOS_CTL_TID_MASK;
-}
-
 static inline u16 carl9170_get_tid(struct sk_buff *skb)
 {
-	return get_tid_h(carl9170_get_hdr(skb));
+	return ieee80211_get_tid(carl9170_get_hdr(skb));
 }
 
 static inline struct ieee80211_vif *
diff --git a/drivers/net/wireless/ath/carl9170/tx.c b/drivers/net/wireless/ath/carl9170/tx.c
index 6b8446ff48c8..88444fe6d1c6 100644
--- a/drivers/net/wireless/ath/carl9170/tx.c
+++ b/drivers/net/wireless/ath/carl9170/tx.c
@@ -394,7 +394,7 @@ static void carl9170_tx_status_process_ampdu(struct ar9170 *ar,
 	if (unlikely(!sta))
 		goto out_rcu;
 
-	tid = get_tid_h(hdr);
+	tid = ieee80211_get_tid(hdr);
 
 	sta_info = (void *) sta->drv_priv;
 	tid_info = rcu_dereference(sta_info->agg[tid]);
-- 
cgit v1.2.3


From 5e6087559e85470fbf96b78c3deb79465cefcbfd Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Wed, 7 Apr 2021 17:46:06 +0800
Subject: wil6210: wmi: Remove useless code

Fix the following whitescan warning:

An unsigned value can never be less than 0.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1617788766-91433-1-git-send-email-jiapeng.chong@linux.alibaba.com
---
 drivers/net/wireless/ath/wil6210/wmi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c
index 823ec6e78a22..02ad44997e87 100644
--- a/drivers/net/wireless/ath/wil6210/wmi.c
+++ b/drivers/net/wireless/ath/wil6210/wmi.c
@@ -1456,7 +1456,7 @@ static void wil_link_stats_store_basic(struct wil6210_vif *vif,
 	u8 cid = basic->cid;
 	struct wil_sta_info *sta;
 
-	if (cid < 0 || cid >= wil->max_assoc_sta) {
+	if (cid >= wil->max_assoc_sta) {
 		wil_err(wil, "invalid cid %d\n", cid);
 		return;
 	}
-- 
cgit v1.2.3


From fa84df705260513a6f690275d4fd2c0b054849b0 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Wed, 14 Apr 2021 14:39:14 +0800
Subject: bcma: remove unused function

Fix the following clang warning:

drivers/bcma/driver_mips.c:55:20: warning: unused function
'mips_write32' [-Wunused-function].

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1618382354-866-1-git-send-email-jiapeng.chong@linux.alibaba.com
---
 drivers/bcma/driver_mips.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c
index 87760aa60446..12aca34e8db0 100644
--- a/drivers/bcma/driver_mips.c
+++ b/drivers/bcma/driver_mips.c
@@ -52,13 +52,6 @@ static inline u32 mips_read32(struct bcma_drv_mips *mcore,
 	return bcma_read32(mcore->core, offset);
 }
 
-static inline void mips_write32(struct bcma_drv_mips *mcore,
-				u16 offset,
-				u32 value)
-{
-	bcma_write32(mcore->core, offset, value);
-}
-
 static u32 bcma_core_mips_irqflag(struct bcma_device *dev)
 {
 	u32 flag;
-- 
cgit v1.2.3


From c544d89b0d67d9b714846035913c270375c0ce00 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 15 Apr 2021 16:48:46 +0300
Subject: iwlwifi: pcie: don't enable BHs with IRQs disabled

After the fix from Jiri that disabled local IRQs instead of
just BHs (necessary to fix an issue with submitting a command
with IRQs already disabled), there was still a situation in
which we could deep in there enable BHs, if the device config
sets the apmg_wake_up_wa configuration, which is true on all
7000 series devices.

To fix that, but not require reverting commit 1ed08f6fb5ae
("iwlwifi: remove flags argument for nic_access"), split up
nic access into a version with BH manipulation to use most
of the time, and without it for this specific case where the
local IRQs are already disabled.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/iwlwifi.20210415164821.d0f2edda1651.I75f762e0bed38914d1300ea198b86dd449b4b206@changeid
---
 drivers/net/wireless/intel/iwlwifi/pcie/internal.h |  5 +++++
 drivers/net/wireless/intel/iwlwifi/pcie/trans.c    | 24 +++++++++++++++++++---
 drivers/net/wireless/intel/iwlwifi/pcie/tx.c       |  4 ++--
 3 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
index d9688c7bed07..76a512cd2e5c 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
@@ -447,6 +447,11 @@ struct iwl_trans
 		      const struct iwl_cfg_trans_params *cfg_trans);
 void iwl_trans_pcie_free(struct iwl_trans *trans);
 
+bool __iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans);
+#define _iwl_trans_pcie_grab_nic_access(trans)			\
+	__cond_lock(nic_access_nobh,				\
+		    likely(__iwl_trans_pcie_grab_nic_access(trans)))
+
 /*****************************************************
 * RX
 ******************************************************/
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index 861dbc03d183..239bc177a3e5 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -1978,12 +1978,16 @@ static void iwl_trans_pcie_removal_wk(struct work_struct *wk)
 	module_put(THIS_MODULE);
 }
 
-static bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans)
+/*
+ * This version doesn't disable BHs but rather assumes they're
+ * already disabled.
+ */
+bool __iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans)
 {
 	int ret;
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 
-	spin_lock_bh(&trans_pcie->reg_lock);
+	spin_lock(&trans_pcie->reg_lock);
 
 	if (trans_pcie->cmd_hold_nic_awake)
 		goto out;
@@ -2068,7 +2072,7 @@ static bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans)
 		}
 
 err:
-		spin_unlock_bh(&trans_pcie->reg_lock);
+		spin_unlock(&trans_pcie->reg_lock);
 		return false;
 	}
 
@@ -2081,6 +2085,20 @@ out:
 	return true;
 }
 
+static bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans)
+{
+	bool ret;
+
+	local_bh_disable();
+	ret = __iwl_trans_pcie_grab_nic_access(trans);
+	if (ret) {
+		/* keep BHs disabled until iwl_trans_pcie_release_nic_access */
+		return ret;
+	}
+	local_bh_enable();
+	return false;
+}
+
 static void iwl_trans_pcie_release_nic_access(struct iwl_trans *trans)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
index 0346505351f5..4f6c187eed69 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
@@ -643,7 +643,7 @@ static int iwl_pcie_set_cmd_in_flight(struct iwl_trans *trans,
 	 * returned. This needs to be done only on NICs that have
 	 * apmg_wake_up_wa set (see above.)
 	 */
-	if (!iwl_trans_grab_nic_access(trans))
+	if (!_iwl_trans_pcie_grab_nic_access(trans))
 		return -EIO;
 
 	/*
@@ -652,7 +652,7 @@ static int iwl_pcie_set_cmd_in_flight(struct iwl_trans *trans,
 	 * already true, so it's OK to unconditionally set it to true.
 	 */
 	trans_pcie->cmd_hold_nic_awake = true;
-	spin_unlock_bh(&trans_pcie->reg_lock);
+	spin_unlock(&trans_pcie->reg_lock);
 
 	return 0;
 }
-- 
cgit v1.2.3


From fb8517f4fade44fa5e42e29ca4d6e4a7ed50b512 Mon Sep 17 00:00:00 2001
From: Po-Hao Huang <phhuang@realtek.com>
Date: Fri, 16 Apr 2021 11:09:01 +0800
Subject: rtw88: 8822c: add CFO tracking

Add CFO tracking, which stands for central frequency offset tracking, to
adjust oscillator to align central frequency of connected AP. Then, it can
yield better performance.

Signed-off-by: Po-Hao Huang <phhuang@realtek.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210416030901.7099-1-pkshih@realtek.com
---
 drivers/net/wireless/realtek/rtw88/coex.h     |   8 ++
 drivers/net/wireless/realtek/rtw88/debug.h    |   1 +
 drivers/net/wireless/realtek/rtw88/main.h     |  13 +++
 drivers/net/wireless/realtek/rtw88/phy.c      |  66 +++++++++++++
 drivers/net/wireless/realtek/rtw88/phy.h      |   2 +
 drivers/net/wireless/realtek/rtw88/reg.h      |   1 +
 drivers/net/wireless/realtek/rtw88/rtw8822c.c | 129 +++++++++++++++++++++++++-
 drivers/net/wireless/realtek/rtw88/rtw8822c.h |   5 +
 8 files changed, 224 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtw88/coex.h b/drivers/net/wireless/realtek/rtw88/coex.h
index 57018700ac39..fc61a0cab3e4 100644
--- a/drivers/net/wireless/realtek/rtw88/coex.h
+++ b/drivers/net/wireless/realtek/rtw88/coex.h
@@ -406,4 +406,12 @@ void rtw_coex_switchband_notify(struct rtw_dev *rtwdev, u8 type);
 void rtw_coex_wl_status_change_notify(struct rtw_dev *rtwdev, u32 type);
 void rtw_coex_display_coex_info(struct rtw_dev *rtwdev, struct seq_file *m);
 
+static inline bool rtw_coex_disabled(struct rtw_dev *rtwdev)
+{
+	struct rtw_coex *coex = &rtwdev->coex;
+	struct rtw_coex_stat *coex_stat = &coex->stat;
+
+	return coex_stat->bt_disabled;
+}
+
 #endif
diff --git a/drivers/net/wireless/realtek/rtw88/debug.h b/drivers/net/wireless/realtek/rtw88/debug.h
index e16e0da26e77..c8efd1900a34 100644
--- a/drivers/net/wireless/realtek/rtw88/debug.h
+++ b/drivers/net/wireless/realtek/rtw88/debug.h
@@ -19,6 +19,7 @@ enum rtw_debug_mask {
 	RTW_DBG_PS		= 0x00000400,
 	RTW_DBG_BF		= 0x00000800,
 	RTW_DBG_WOW		= 0x00001000,
+	RTW_DBG_CFO		= 0x00002000,
 
 	RTW_DBG_ALL		= 0xffffffff
 };
diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index 98b18daae1fb..56a19b5a00fc 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -625,6 +625,7 @@ struct rtw_rx_pkt_stat {
 
 	struct rtw_sta_info *si;
 	struct ieee80211_vif *vif;
+	struct ieee80211_hdr *hdr;
 };
 
 DECLARE_EWMA(tp, 10, 2);
@@ -838,6 +839,8 @@ struct rtw_chip_ops {
 			      struct ieee80211_bss_conf *conf);
 	void (*cfg_csi_rate)(struct rtw_dev *rtwdev, u8 rssi, u8 cur_rate,
 			     u8 fixrate_en, u8 *new_rate);
+	void (*cfo_init)(struct rtw_dev *rtwdev);
+	void (*cfo_track)(struct rtw_dev *rtwdev);
 
 	/* for coex */
 	void (*coex_set_init)(struct rtw_dev *rtwdev);
@@ -1499,6 +1502,15 @@ struct rtw_iqk_info {
 	} result;
 };
 
+struct rtw_cfo_track {
+	bool is_adjust;
+	u8 crystal_cap;
+	s32 cfo_tail[RTW_RF_PATH_MAX];
+	s32 cfo_cnt[RTW_RF_PATH_MAX];
+	u32 packet_count;
+	u32 packet_count_pre;
+};
+
 #define RRSR_INIT_2G 0x15f
 #define RRSR_INIT_5G 0x150
 
@@ -1552,6 +1564,7 @@ struct rtw_dm_info {
 	u8 dack_dck[RTW_RF_PATH_MAX][2][DACK_DCK_BACKUP_NUM];
 
 	struct rtw_dpk_info dpk_info;
+	struct rtw_cfo_track cfo_track;
 
 	/* [bandwidth 0:20M/1:40M][number of path] */
 	u8 cck_pd_lv[2][RTW_RF_PATH_MAX];
diff --git a/drivers/net/wireless/realtek/rtw88/phy.c b/drivers/net/wireless/realtek/rtw88/phy.c
index fdd5da0bbfd8..8146acaf1893 100644
--- a/drivers/net/wireless/realtek/rtw88/phy.c
+++ b/drivers/net/wireless/realtek/rtw88/phy.c
@@ -119,6 +119,14 @@ static void rtw_phy_cck_pd_init(struct rtw_dev *rtwdev)
 	dm_info->cck_fa_avg = CCK_FA_AVG_RESET;
 }
 
+static void rtw_phy_cfo_init(struct rtw_dev *rtwdev)
+{
+	struct rtw_chip_info *chip = rtwdev->chip;
+
+	if (chip->ops->cfo_init)
+		chip->ops->cfo_init(rtwdev);
+}
+
 void rtw_phy_init(struct rtw_dev *rtwdev)
 {
 	struct rtw_chip_info *chip = rtwdev->chip;
@@ -140,6 +148,7 @@ void rtw_phy_init(struct rtw_dev *rtwdev)
 	rtw_phy_cck_pd_init(rtwdev);
 
 	dm_info->iqk.done = false;
+	rtw_phy_cfo_init(rtwdev);
 }
 EXPORT_SYMBOL(rtw_phy_init);
 
@@ -528,6 +537,62 @@ static void rtw_phy_dpk_track(struct rtw_dev *rtwdev)
 		chip->ops->dpk_track(rtwdev);
 }
 
+struct rtw_rx_addr_match_data {
+	struct rtw_dev *rtwdev;
+	struct ieee80211_hdr *hdr;
+	struct rtw_rx_pkt_stat *pkt_stat;
+	u8 *bssid;
+};
+
+static void rtw_phy_parsing_cfo_iter(void *data, u8 *mac,
+				     struct ieee80211_vif *vif)
+{
+	struct rtw_rx_addr_match_data *iter_data = data;
+	struct rtw_dev *rtwdev = iter_data->rtwdev;
+	struct rtw_rx_pkt_stat *pkt_stat = iter_data->pkt_stat;
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+	struct rtw_cfo_track *cfo = &dm_info->cfo_track;
+	u8 *bssid = iter_data->bssid;
+	u8 i;
+
+	if (!ether_addr_equal(vif->bss_conf.bssid, bssid))
+		return;
+
+	for (i = 0; i < rtwdev->hal.rf_path_num; i++) {
+		cfo->cfo_tail[i] += pkt_stat->cfo_tail[i];
+		cfo->cfo_cnt[i]++;
+	}
+
+	cfo->packet_count++;
+}
+
+void rtw_phy_parsing_cfo(struct rtw_dev *rtwdev,
+			 struct rtw_rx_pkt_stat *pkt_stat)
+{
+	struct ieee80211_hdr *hdr = pkt_stat->hdr;
+	struct rtw_rx_addr_match_data data = {};
+
+	if (pkt_stat->crc_err || pkt_stat->icv_err || !pkt_stat->phy_status ||
+	    ieee80211_is_ctl(hdr->frame_control))
+		return;
+
+	data.rtwdev = rtwdev;
+	data.hdr = hdr;
+	data.pkt_stat = pkt_stat;
+	data.bssid = get_hdr_bssid(hdr);
+
+	rtw_iterate_vifs_atomic(rtwdev, rtw_phy_parsing_cfo_iter, &data);
+}
+EXPORT_SYMBOL(rtw_phy_parsing_cfo);
+
+static void rtw_phy_cfo_track(struct rtw_dev *rtwdev)
+{
+	struct rtw_chip_info *chip = rtwdev->chip;
+
+	if (chip->ops->cfo_track)
+		chip->ops->cfo_track(rtwdev);
+}
+
 #define CCK_PD_FA_LV1_MIN	1000
 #define CCK_PD_FA_LV0_MAX	500
 
@@ -630,6 +695,7 @@ void rtw_phy_dynamic_mechanism(struct rtw_dev *rtwdev)
 	rtw_phy_dig(rtwdev);
 	rtw_phy_cck_pd(rtwdev);
 	rtw_phy_ra_track(rtwdev);
+	rtw_phy_cfo_track(rtwdev);
 	rtw_phy_dpk_track(rtwdev);
 	rtw_phy_pwr_track(rtwdev);
 }
diff --git a/drivers/net/wireless/realtek/rtw88/phy.h b/drivers/net/wireless/realtek/rtw88/phy.h
index a0742a69446d..0b6f2fc8193c 100644
--- a/drivers/net/wireless/realtek/rtw88/phy.h
+++ b/drivers/net/wireless/realtek/rtw88/phy.h
@@ -59,6 +59,8 @@ bool rtw_phy_pwrtrack_need_lck(struct rtw_dev *rtwdev);
 bool rtw_phy_pwrtrack_need_iqk(struct rtw_dev *rtwdev);
 void rtw_phy_config_swing_table(struct rtw_dev *rtwdev,
 				struct rtw_swing_table *swing_table);
+void rtw_phy_parsing_cfo(struct rtw_dev *rtwdev,
+			 struct rtw_rx_pkt_stat *pkt_stat);
 
 struct rtw_txpwr_lmt_cfg_pair {
 	u8 regd;
diff --git a/drivers/net/wireless/realtek/rtw88/reg.h b/drivers/net/wireless/realtek/rtw88/reg.h
index a85fe29f13f7..95d29f522e27 100644
--- a/drivers/net/wireless/realtek/rtw88/reg.h
+++ b/drivers/net/wireless/realtek/rtw88/reg.h
@@ -516,6 +516,7 @@
 #define BIT_RFE_BUF_EN		BIT(3)
 
 #define REG_ANAPAR_XTAL_0	0x1040
+#define BIT_XCAP_0		GENMASK(23, 10)
 #define REG_CPU_DMEM_CON	0x1080
 #define BIT_WL_PLATFORM_RST	BIT(16)
 #define BIT_WL_SECURITY_CLK	BIT(15)
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
index 07ff0d442e5b..9f05c60c8a03 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
@@ -17,6 +17,7 @@
 #include "util.h"
 #include "bf.h"
 #include "efuse.h"
+#include "coex.h"
 
 #define IQK_DONE_8822C 0xaa
 
@@ -39,7 +40,7 @@ static int rtw8822c_read_efuse(struct rtw_dev *rtwdev, u8 *log_map)
 
 	efuse->rfe_option = map->rfe_option;
 	efuse->rf_board_option = map->rf_board_option;
-	efuse->crystal_cap = map->xtal_k;
+	efuse->crystal_cap = map->xtal_k & XCAP_MASK;
 	efuse->channel_plan = map->channel_plan;
 	efuse->country_code[0] = map->country_code[0];
 	efuse->country_code[1] = map->country_code[1];
@@ -1866,6 +1867,7 @@ static void query_phy_status_page1(struct rtw_dev *rtwdev, u8 *phy_status,
 		}
 		dm_info->rx_evm_dbm[path] = evm_dbm;
 	}
+	rtw_phy_parsing_cfo(rtwdev, pkt_stat);
 }
 
 static void query_phy_status(struct rtw_dev *rtwdev, u8 *phy_status,
@@ -1921,6 +1923,7 @@ static void rtw8822c_query_rx_desc(struct rtw_dev *rtwdev, u8 *rx_desc,
 
 	hdr = (struct ieee80211_hdr *)(rx_desc + desc_sz + pkt_stat->shift +
 				       pkt_stat->drv_info_sz);
+	pkt_stat->hdr = hdr;
 	if (pkt_stat->phy_status) {
 		phy_status = rx_desc + desc_sz + pkt_stat->shift;
 		query_phy_status(rtwdev, phy_status, pkt_stat);
@@ -3436,6 +3439,128 @@ static void rtw8822c_dpk_track(struct rtw_dev *rtwdev)
 	}
 }
 
+#define XCAP_EXTEND(val) ({typeof(val) _v = (val); _v | _v << 7; })
+static void rtw8822c_set_crystal_cap_reg(struct rtw_dev *rtwdev, u8 crystal_cap)
+{
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+	struct rtw_cfo_track *cfo = &dm_info->cfo_track;
+	u32 val = 0;
+
+	val = XCAP_EXTEND(crystal_cap);
+	cfo->crystal_cap = crystal_cap;
+	rtw_write32_mask(rtwdev, REG_ANAPAR_XTAL_0, BIT_XCAP_0, val);
+}
+
+static void rtw8822c_set_crystal_cap(struct rtw_dev *rtwdev, u8 crystal_cap)
+{
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+	struct rtw_cfo_track *cfo = &dm_info->cfo_track;
+
+	if (cfo->crystal_cap == crystal_cap)
+		return;
+
+	rtw8822c_set_crystal_cap_reg(rtwdev, crystal_cap);
+}
+
+static void rtw8822c_cfo_tracking_reset(struct rtw_dev *rtwdev)
+{
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+	struct rtw_cfo_track *cfo = &dm_info->cfo_track;
+
+	cfo->is_adjust = true;
+
+	if (cfo->crystal_cap > rtwdev->efuse.crystal_cap)
+		rtw8822c_set_crystal_cap(rtwdev, cfo->crystal_cap - 1);
+	else if (cfo->crystal_cap < rtwdev->efuse.crystal_cap)
+		rtw8822c_set_crystal_cap(rtwdev, cfo->crystal_cap + 1);
+}
+
+static void rtw8822c_cfo_init(struct rtw_dev *rtwdev)
+{
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+	struct rtw_cfo_track *cfo = &dm_info->cfo_track;
+
+	cfo->crystal_cap = rtwdev->efuse.crystal_cap;
+	cfo->is_adjust = true;
+}
+
+#define REPORT_TO_KHZ(val) ({typeof(val) _v = (val); (_v << 1) + (_v >> 1); })
+static s32 rtw8822c_cfo_calc_avg(struct rtw_dev *rtwdev, u8 path_num)
+{
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+	struct rtw_cfo_track *cfo = &dm_info->cfo_track;
+	s32 cfo_avg, cfo_path_sum = 0, cfo_rpt_sum;
+	u8 i;
+
+	for (i = 0; i < path_num; i++) {
+		cfo_rpt_sum = REPORT_TO_KHZ(cfo->cfo_tail[i]);
+
+		if (cfo->cfo_cnt[i])
+			cfo_avg = cfo_rpt_sum / cfo->cfo_cnt[i];
+		else
+			cfo_avg = 0;
+
+		cfo_path_sum += cfo_avg;
+	}
+
+	for (i = 0; i < path_num; i++) {
+		cfo->cfo_tail[i] = 0;
+		cfo->cfo_cnt[i] = 0;
+	}
+
+	return cfo_path_sum / path_num;
+}
+
+static void rtw8822c_cfo_need_adjust(struct rtw_dev *rtwdev, s32 cfo_avg)
+{
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+	struct rtw_cfo_track *cfo = &dm_info->cfo_track;
+
+	if (!cfo->is_adjust) {
+		if (abs(cfo_avg) > CFO_TRK_ENABLE_TH)
+			cfo->is_adjust = true;
+	} else {
+		if (abs(cfo_avg) <= CFO_TRK_STOP_TH)
+			cfo->is_adjust = false;
+	}
+
+	if (!rtw_coex_disabled(rtwdev)) {
+		cfo->is_adjust = false;
+		rtw8822c_set_crystal_cap(rtwdev, rtwdev->efuse.crystal_cap);
+	}
+}
+
+static void rtw8822c_cfo_track(struct rtw_dev *rtwdev)
+{
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+	struct rtw_cfo_track *cfo = &dm_info->cfo_track;
+	u8 path_num = rtwdev->hal.rf_path_num;
+	s8 crystal_cap = cfo->crystal_cap;
+	s32 cfo_avg = 0;
+
+	if (rtwdev->sta_cnt != 1) {
+		rtw8822c_cfo_tracking_reset(rtwdev);
+		return;
+	}
+
+	if (cfo->packet_count == cfo->packet_count_pre)
+		return;
+
+	cfo->packet_count_pre = cfo->packet_count;
+	cfo_avg = rtw8822c_cfo_calc_avg(rtwdev, path_num);
+	rtw8822c_cfo_need_adjust(rtwdev, cfo_avg);
+
+	if (cfo->is_adjust) {
+		if (cfo_avg > CFO_TRK_ADJ_TH)
+			crystal_cap++;
+		else if (cfo_avg < -CFO_TRK_ADJ_TH)
+			crystal_cap--;
+
+		crystal_cap = clamp_t(s8, crystal_cap, 0, XCAP_MASK);
+		rtw8822c_set_crystal_cap(rtwdev, (u8)crystal_cap);
+	}
+}
+
 static const struct rtw_phy_cck_pd_reg
 rtw8822c_cck_pd_reg[RTW_CHANNEL_WIDTH_40 + 1][RTW_RF_PATH_MAX] = {
 	{
@@ -4016,6 +4141,8 @@ static struct rtw_chip_ops rtw8822c_ops = {
 	.config_bfee		= rtw8822c_bf_config_bfee,
 	.set_gid_table		= rtw_bf_set_gid_table,
 	.cfg_csi_rate		= rtw_bf_cfg_csi_rate,
+	.cfo_init		= rtw8822c_cfo_init,
+	.cfo_track		= rtw8822c_cfo_track,
 
 	.coex_set_init		= rtw8822c_coex_cfg_init,
 	.coex_set_ant_switch	= NULL,
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.h b/drivers/net/wireless/realtek/rtw88/rtw8822c.h
index bb2495b8609e..e2b134ce0b3f 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.h
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.h
@@ -165,6 +165,11 @@ const struct rtw_table name ## _tbl = {			\
 #define REG_ANAPARLDO_POW_MAC	0x0029
 #define BIT_LDOE25_PON		BIT(0)
 
+#define XCAP_MASK		GENMASK(6, 0)
+#define CFO_TRK_ENABLE_TH	20
+#define CFO_TRK_STOP_TH		10
+#define CFO_TRK_ADJ_TH		10
+
 #define REG_TXDFIR0	0x808
 #define REG_DFIRBW	0x810
 #define REG_ANTMAP0	0x820
-- 
cgit v1.2.3


From 14c20643ef9457679cc6934d77adc24296505214 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 12 Apr 2021 14:11:39 +0200
Subject: netfilter: nft_payload: fix C-VLAN offload support

- add another struct flow_dissector_key_vlan for C-VLAN
- update layer 3 dependency to allow to match on IPv4/IPv6

Fixes: 89d8fd44abfb ("netfilter: nft_payload: add C-VLAN offload support")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables_offload.h | 1 +
 net/netfilter/nft_payload.c               | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h
index 1d34fe154fe0..b4d080061399 100644
--- a/include/net/netfilter/nf_tables_offload.h
+++ b/include/net/netfilter/nf_tables_offload.h
@@ -45,6 +45,7 @@ struct nft_flow_key {
 	struct flow_dissector_key_ports			tp;
 	struct flow_dissector_key_ip			ip;
 	struct flow_dissector_key_vlan			vlan;
+	struct flow_dissector_key_vlan			cvlan;
 	struct flow_dissector_key_eth_addrs		eth_addrs;
 	struct flow_dissector_key_meta			meta;
 } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index cb1c8c231880..a990f37e0a60 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -241,7 +241,7 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx,
 		if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
 			return -EOPNOTSUPP;
 
-		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, vlan,
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, cvlan,
 				  vlan_tci, sizeof(__be16), reg);
 		break;
 	case offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto) +
@@ -249,8 +249,9 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx,
 		if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
 			return -EOPNOTSUPP;
 
-		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, vlan,
+		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, cvlan,
 				  vlan_tpid, sizeof(__be16), reg);
+		nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK);
 		break;
 	default:
 		return -EOPNOTSUPP;
-- 
cgit v1.2.3


From ff4d90a89d3d4d9814e0a2696509a7d495be4163 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 12 Apr 2021 14:20:15 +0200
Subject: netfilter: nftables_offload: VLAN id needs host byteorder in flow
 dissector

The flow dissector representation expects the VLAN id in host byteorder.
Add the NFT_OFFLOAD_F_NETWORK2HOST flag to swap the bytes from nft_cmp.

Fixes: a82055af5959 ("netfilter: nft_payload: add VLAN offload support")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables_offload.h | 11 ++++++++-
 net/netfilter/nft_cmp.c                   | 41 +++++++++++++++++++++++++++++--
 net/netfilter/nft_payload.c               | 10 +++++---
 3 files changed, 55 insertions(+), 7 deletions(-)

diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h
index b4d080061399..434a6158852f 100644
--- a/include/net/netfilter/nf_tables_offload.h
+++ b/include/net/netfilter/nf_tables_offload.h
@@ -4,11 +4,16 @@
 #include <net/flow_offload.h>
 #include <net/netfilter/nf_tables.h>
 
+enum nft_offload_reg_flags {
+	NFT_OFFLOAD_F_NETWORK2HOST	= (1 << 0),
+};
+
 struct nft_offload_reg {
 	u32		key;
 	u32		len;
 	u32		base_offset;
 	u32		offset;
+	u32		flags;
 	struct nft_data data;
 	struct nft_data	mask;
 };
@@ -72,13 +77,17 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net, const struct nft_rul
 void nft_flow_rule_destroy(struct nft_flow_rule *flow);
 int nft_flow_rule_offload_commit(struct net *net);
 
-#define NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg)		\
+#define NFT_OFFLOAD_MATCH_FLAGS(__key, __base, __field, __len, __reg, __flags)	\
 	(__reg)->base_offset	=					\
 		offsetof(struct nft_flow_key, __base);			\
 	(__reg)->offset		=					\
 		offsetof(struct nft_flow_key, __base.__field);		\
 	(__reg)->len		= __len;				\
 	(__reg)->key		= __key;				\
+	(__reg)->flags		= __flags;
+
+#define NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg)		\
+	NFT_OFFLOAD_MATCH_FLAGS(__key, __base, __field, __len, __reg, 0)
 
 #define NFT_OFFLOAD_MATCH_EXACT(__key, __base, __field, __len, __reg)	\
 	NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg)		\
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index eb6a43a180bb..47b6d05f1ae6 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -114,19 +114,56 @@ nla_put_failure:
 	return -1;
 }
 
+union nft_cmp_offload_data {
+	u16	val16;
+	u32	val32;
+	u64	val64;
+};
+
+static void nft_payload_n2h(union nft_cmp_offload_data *data,
+			    const u8 *val, u32 len)
+{
+	switch (len) {
+	case 2:
+		data->val16 = ntohs(*((u16 *)val));
+		break;
+	case 4:
+		data->val32 = ntohl(*((u32 *)val));
+		break;
+	case 8:
+		data->val64 = be64_to_cpu(*((u64 *)val));
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		break;
+	}
+}
+
 static int __nft_cmp_offload(struct nft_offload_ctx *ctx,
 			     struct nft_flow_rule *flow,
 			     const struct nft_cmp_expr *priv)
 {
 	struct nft_offload_reg *reg = &ctx->regs[priv->sreg];
+	union nft_cmp_offload_data _data, _datamask;
 	u8 *mask = (u8 *)&flow->match.mask;
 	u8 *key = (u8 *)&flow->match.key;
+	u8 *data, *datamask;
 
 	if (priv->op != NFT_CMP_EQ || priv->len > reg->len)
 		return -EOPNOTSUPP;
 
-	memcpy(key + reg->offset, &priv->data, reg->len);
-	memcpy(mask + reg->offset, &reg->mask, reg->len);
+	if (reg->flags & NFT_OFFLOAD_F_NETWORK2HOST) {
+		nft_payload_n2h(&_data, (u8 *)&priv->data, reg->len);
+		nft_payload_n2h(&_datamask, (u8 *)&reg->mask, reg->len);
+		data = (u8 *)&_data;
+		datamask = (u8 *)&_datamask;
+	} else {
+		data = (u8 *)&priv->data;
+		datamask = (u8 *)&reg->mask;
+	}
+
+	memcpy(key + reg->offset, data, reg->len);
+	memcpy(mask + reg->offset, datamask, reg->len);
 
 	flow->match.dissector.used_keys |= BIT(reg->key);
 	flow->match.dissector.offset[reg->key] = reg->base_offset;
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index a990f37e0a60..501c5b24cc39 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -226,8 +226,9 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx,
 		if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
 			return -EOPNOTSUPP;
 
-		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_VLAN, vlan,
-				  vlan_tci, sizeof(__be16), reg);
+		NFT_OFFLOAD_MATCH_FLAGS(FLOW_DISSECTOR_KEY_VLAN, vlan,
+					vlan_tci, sizeof(__be16), reg,
+					NFT_OFFLOAD_F_NETWORK2HOST);
 		break;
 	case offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto):
 		if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
@@ -241,8 +242,9 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx,
 		if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
 			return -EOPNOTSUPP;
 
-		NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, cvlan,
-				  vlan_tci, sizeof(__be16), reg);
+		NFT_OFFLOAD_MATCH_FLAGS(FLOW_DISSECTOR_KEY_CVLAN, cvlan,
+					vlan_tci, sizeof(__be16), reg,
+					NFT_OFFLOAD_F_NETWORK2HOST);
 		break;
 	case offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto) +
 							sizeof(struct vlan_hdr):
-- 
cgit v1.2.3


From 783003f3bb8a565326e89d18bbd948ad8ffc816a Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 12 Apr 2021 14:20:55 +0200
Subject: netfilter: nftables_offload: special ethertype handling for VLAN

The nftables offload parser sets FLOW_DISSECTOR_KEY_BASIC .n_proto to the
ethertype field in the ethertype frame. However:

- FLOW_DISSECTOR_KEY_BASIC .n_proto field always stores either IPv4 or IPv6
  ethertypes.
- FLOW_DISSECTOR_KEY_VLAN .vlan_tpid stores either the 802.1q and 802.1ad
  ethertypes. Same as for FLOW_DISSECTOR_KEY_CVLAN.

This function adjusts the flow dissector to handle two scenarios:

1) FLOW_DISSECTOR_KEY_VLAN .vlan_tpid is set to 802.1q or 802.1ad.
   Then, transfer:
   - the .n_proto field to FLOW_DISSECTOR_KEY_VLAN .tpid.
   - the original FLOW_DISSECTOR_KEY_VLAN .tpid to the
     FLOW_DISSECTOR_KEY_CVLAN .tpid
   - the original FLOW_DISSECTOR_KEY_CVLAN .tpid to the .n_proto field.

2) .n_proto is set to 802.1q or 802.1ad. Then, transfer:
   - the .n_proto field to FLOW_DISSECTOR_KEY_VLAN .tpid.
   - the original FLOW_DISSECTOR_KEY_VLAN .tpid to the .n_proto field.

Fixes: a82055af5959 ("netfilter: nft_payload: add VLAN offload support")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_offload.c | 44 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 43b56eff3b04..1d428792018f 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -47,6 +47,48 @@ void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow,
 		offsetof(struct nft_flow_key, control);
 }
 
+struct nft_offload_ethertype {
+	__be16 value;
+	__be16 mask;
+};
+
+static void nft_flow_rule_transfer_vlan(struct nft_offload_ctx *ctx,
+					struct nft_flow_rule *flow)
+{
+	struct nft_flow_match *match = &flow->match;
+	struct nft_offload_ethertype ethertype;
+
+	if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL) &&
+	    match->key.basic.n_proto != htons(ETH_P_8021Q) &&
+	    match->key.basic.n_proto != htons(ETH_P_8021AD))
+		return;
+
+	ethertype.value = match->key.basic.n_proto;
+	ethertype.mask = match->mask.basic.n_proto;
+
+	if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_VLAN) &&
+	    (match->key.vlan.vlan_tpid == htons(ETH_P_8021Q) ||
+	     match->key.vlan.vlan_tpid == htons(ETH_P_8021AD))) {
+		match->key.basic.n_proto = match->key.cvlan.vlan_tpid;
+		match->mask.basic.n_proto = match->mask.cvlan.vlan_tpid;
+		match->key.cvlan.vlan_tpid = match->key.vlan.vlan_tpid;
+		match->mask.cvlan.vlan_tpid = match->mask.vlan.vlan_tpid;
+		match->key.vlan.vlan_tpid = ethertype.value;
+		match->mask.vlan.vlan_tpid = ethertype.mask;
+		match->dissector.offset[FLOW_DISSECTOR_KEY_CVLAN] =
+			offsetof(struct nft_flow_key, cvlan);
+		match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CVLAN);
+	} else {
+		match->key.basic.n_proto = match->key.vlan.vlan_tpid;
+		match->mask.basic.n_proto = match->mask.vlan.vlan_tpid;
+		match->key.vlan.vlan_tpid = ethertype.value;
+		match->mask.vlan.vlan_tpid = ethertype.mask;
+		match->dissector.offset[FLOW_DISSECTOR_KEY_VLAN] =
+			offsetof(struct nft_flow_key, vlan);
+		match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_VLAN);
+	}
+}
+
 struct nft_flow_rule *nft_flow_rule_create(struct net *net,
 					   const struct nft_rule *rule)
 {
@@ -91,6 +133,8 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net,
 
 		expr = nft_expr_next(expr);
 	}
+	nft_flow_rule_transfer_vlan(ctx, flow);
+
 	flow->proto = ctx->dep.l3num;
 	kfree(ctx);
 
-- 
cgit v1.2.3


From 812fa71f0d967dea7616810f27e98135d410b27e Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 14 Apr 2021 11:20:32 +0300
Subject: netfilter: Dissect flow after packet mangling

Netfilter tries to reroute mangled packets as a different route might
need to be used following the mangling. When this happens, netfilter
does not populate the IP protocol, the source port and the destination
port in the flow key. Therefore, FIB rules that match on these fields
are ignored and packets can be misrouted.

Solve this by dissecting the outer flow and populating the flow key
before rerouting the packet. Note that flow dissection only happens when
FIB rules that match on these fields are installed, so in the common
case there should not be a penalty.

Reported-by: Michal Soltys <msoltyspl@yandex.pl>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter.c | 2 ++
 net/ipv6/netfilter.c | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 7c841037c533..aff707988e23 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -25,6 +25,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un
 	__be32 saddr = iph->saddr;
 	__u8 flags;
 	struct net_device *dev = skb_dst(skb)->dev;
+	struct flow_keys flkeys;
 	unsigned int hh_len;
 
 	sk = sk_to_full_sk(sk);
@@ -48,6 +49,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un
 		fl4.flowi4_oif = l3mdev_master_ifindex(dev);
 	fl4.flowi4_mark = skb->mark;
 	fl4.flowi4_flags = flags;
+	fib4_rules_early_flow_dissect(net, skb, &fl4, &flkeys);
 	rt = ip_route_output_key(net, &fl4);
 	if (IS_ERR(rt))
 		return PTR_ERR(rt);
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index ab9a279dd6d4..6ab710b5a1a8 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -24,6 +24,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
 {
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
 	struct sock *sk = sk_to_full_sk(sk_partial);
+	struct flow_keys flkeys;
 	unsigned int hh_len;
 	struct dst_entry *dst;
 	int strict = (ipv6_addr_type(&iph->daddr) &
@@ -38,6 +39,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
 	};
 	int err;
 
+	fib6_rules_early_flow_dissect(net, skb, &fl6, &flkeys);
 	dst = ip6_route_output(net, sk, &fl6);
 	err = dst->error;
 	if (err) {
-- 
cgit v1.2.3


From 8826218215de1aae9d89a6ea8d3786f224711334 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 14 Apr 2021 11:20:33 +0300
Subject: selftests: fib_tests: Add test cases for interaction with mangling

Test that packets are correctly routed when netfilter mangling rules are
present.

Without previous patch:

 # ./fib_tests.sh -t ipv4_mangle

 IPv4 mangling tests
     TEST:     Connection with correct parameters                        [ OK ]
     TEST:     Connection with incorrect parameters                      [ OK ]
     TEST:     Connection with correct parameters - mangling             [FAIL]
     TEST:     Connection with correct parameters - no mangling          [ OK ]
     TEST:     Connection check - server side                            [FAIL]

 Tests passed:   3
 Tests failed:   2

 # ./fib_tests.sh -t ipv6_mangle

 IPv6 mangling tests
     TEST:     Connection with correct parameters                        [ OK ]
     TEST:     Connection with incorrect parameters                      [ OK ]
     TEST:     Connection with correct parameters - mangling             [FAIL]
     TEST:     Connection with correct parameters - no mangling          [ OK ]
     TEST:     Connection check - server side                            [FAIL]

 Tests passed:   3
 Tests failed:   2

With previous patch:

 # ./fib_tests.sh -t ipv4_mangle

 IPv4 mangling tests
     TEST:     Connection with correct parameters                        [ OK ]
     TEST:     Connection with incorrect parameters                      [ OK ]
     TEST:     Connection with correct parameters - mangling             [ OK ]
     TEST:     Connection with correct parameters - no mangling          [ OK ]
     TEST:     Connection check - server side                            [ OK ]

 Tests passed:   5
 Tests failed:   0

 # ./fib_tests.sh -t ipv6_mangle

 IPv6 mangling tests
     TEST:     Connection with correct parameters                        [ OK ]
     TEST:     Connection with incorrect parameters                      [ OK ]
     TEST:     Connection with correct parameters - mangling             [ OK ]
     TEST:     Connection with correct parameters - no mangling          [ OK ]
     TEST:     Connection check - server side                            [ OK ]

 Tests passed:   5
 Tests failed:   0

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 tools/testing/selftests/net/fib_tests.sh | 152 ++++++++++++++++++++++++++++++-
 1 file changed, 151 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 2b5707738609..76d9487fb03c 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -9,7 +9,7 @@ ret=0
 ksft_skip=4
 
 # all tests in this script. Can be overridden with -t option
-TESTS="unregister down carrier nexthop suppress ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr"
+TESTS="unregister down carrier nexthop suppress ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr ipv4_mangle ipv6_mangle"
 
 VERBOSE=0
 PAUSE_ON_FAIL=no
@@ -1653,6 +1653,154 @@ ipv4_route_v6_gw_test()
 	route_cleanup
 }
 
+socat_check()
+{
+	if [ ! -x "$(command -v socat)" ]; then
+		echo "socat command not found. Skipping test"
+		return 1
+	fi
+
+	return 0
+}
+
+iptables_check()
+{
+	iptables -t mangle -L OUTPUT &> /dev/null
+	if [ $? -ne 0 ]; then
+		echo "iptables configuration not supported. Skipping test"
+		return 1
+	fi
+
+	return 0
+}
+
+ip6tables_check()
+{
+	ip6tables -t mangle -L OUTPUT &> /dev/null
+	if [ $? -ne 0 ]; then
+		echo "ip6tables configuration not supported. Skipping test"
+		return 1
+	fi
+
+	return 0
+}
+
+ipv4_mangle_test()
+{
+	local rc
+
+	echo
+	echo "IPv4 mangling tests"
+
+	socat_check || return 1
+	iptables_check || return 1
+
+	route_setup
+	sleep 2
+
+	local tmp_file=$(mktemp)
+	ip netns exec ns2 socat UDP4-LISTEN:54321,fork $tmp_file &
+
+	# Add a FIB rule and a route that will direct our connection to the
+	# listening server.
+	$IP rule add pref 100 ipproto udp sport 12345 dport 54321 table 123
+	$IP route add table 123 172.16.101.0/24 dev veth1
+
+	# Add an unreachable route to the main table that will block our
+	# connection in case the FIB rule is not hit.
+	$IP route add unreachable 172.16.101.2/32
+
+	run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=12345"
+	log_test $? 0 "    Connection with correct parameters"
+
+	run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=11111"
+	log_test $? 1 "    Connection with incorrect parameters"
+
+	# Add a mangling rule and make sure connection is still successful.
+	$NS_EXEC iptables -t mangle -A OUTPUT -j MARK --set-mark 1
+
+	run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=12345"
+	log_test $? 0 "    Connection with correct parameters - mangling"
+
+	# Delete the mangling rule and make sure connection is still
+	# successful.
+	$NS_EXEC iptables -t mangle -D OUTPUT -j MARK --set-mark 1
+
+	run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=12345"
+	log_test $? 0 "    Connection with correct parameters - no mangling"
+
+	# Verify connections were indeed successful on server side.
+	[[ $(cat $tmp_file | wc -l) -eq 3 ]]
+	log_test $? 0 "    Connection check - server side"
+
+	$IP route del unreachable 172.16.101.2/32
+	$IP route del table 123 172.16.101.0/24 dev veth1
+	$IP rule del pref 100
+
+	{ kill %% && wait %%; } 2>/dev/null
+	rm $tmp_file
+
+	route_cleanup
+}
+
+ipv6_mangle_test()
+{
+	local rc
+
+	echo
+	echo "IPv6 mangling tests"
+
+	socat_check || return 1
+	ip6tables_check || return 1
+
+	route_setup
+	sleep 2
+
+	local tmp_file=$(mktemp)
+	ip netns exec ns2 socat UDP6-LISTEN:54321,fork $tmp_file &
+
+	# Add a FIB rule and a route that will direct our connection to the
+	# listening server.
+	$IP -6 rule add pref 100 ipproto udp sport 12345 dport 54321 table 123
+	$IP -6 route add table 123 2001:db8:101::/64 dev veth1
+
+	# Add an unreachable route to the main table that will block our
+	# connection in case the FIB rule is not hit.
+	$IP -6 route add unreachable 2001:db8:101::2/128
+
+	run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=12345"
+	log_test $? 0 "    Connection with correct parameters"
+
+	run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=11111"
+	log_test $? 1 "    Connection with incorrect parameters"
+
+	# Add a mangling rule and make sure connection is still successful.
+	$NS_EXEC ip6tables -t mangle -A OUTPUT -j MARK --set-mark 1
+
+	run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=12345"
+	log_test $? 0 "    Connection with correct parameters - mangling"
+
+	# Delete the mangling rule and make sure connection is still
+	# successful.
+	$NS_EXEC ip6tables -t mangle -D OUTPUT -j MARK --set-mark 1
+
+	run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=12345"
+	log_test $? 0 "    Connection with correct parameters - no mangling"
+
+	# Verify connections were indeed successful on server side.
+	[[ $(cat $tmp_file | wc -l) -eq 3 ]]
+	log_test $? 0 "    Connection check - server side"
+
+	$IP -6 route del unreachable 2001:db8:101::2/128
+	$IP -6 route del table 123 2001:db8:101::/64 dev veth1
+	$IP -6 rule del pref 100
+
+	{ kill %% && wait %%; } 2>/dev/null
+	rm $tmp_file
+
+	route_cleanup
+}
+
 ################################################################################
 # usage
 
@@ -1725,6 +1873,8 @@ do
 	ipv6_route_metrics)		ipv6_route_metrics_test;;
 	ipv4_route_metrics)		ipv4_route_metrics_test;;
 	ipv4_route_v6_gw)		ipv4_route_v6_gw_test;;
+	ipv4_mangle)			ipv4_mangle_test;;
+	ipv6_mangle)			ipv6_mangle_test;;
 
 	help) echo "Test names: $TESTS"; exit 0;;
 	esac
-- 
cgit v1.2.3


From b72920f6e4a9d6607b723d69b7f412c829769c75 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 15 Apr 2021 20:10:18 +0200
Subject: netfilter: nftables: counter hardware offload support

This patch adds the .offload_stats operation to synchronize hardware
stats with the expression data. Update the counter expression to use
this new interface. The hardware stats are retrieved from the netlink
dump path via FLOW_CLS_STATS command to the driver.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h         |  2 ++
 include/net/netfilter/nf_tables_offload.h |  1 +
 net/netfilter/nf_tables_api.c             |  3 +++
 net/netfilter/nf_tables_offload.c         | 44 ++++++++++++++++++++++++++-----
 net/netfilter/nft_counter.c               | 29 ++++++++++++++++++++
 5 files changed, 72 insertions(+), 7 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index f0f7a3c5da6a..4a75da2a2e1d 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -867,6 +867,8 @@ struct nft_expr_ops {
 	int				(*offload)(struct nft_offload_ctx *ctx,
 						   struct nft_flow_rule *flow,
 						   const struct nft_expr *expr);
+	void				(*offload_stats)(struct nft_expr *expr,
+							 const struct flow_stats *stats);
 	u32				offload_flags;
 	const struct nft_expr_type	*type;
 	void				*data;
diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h
index 434a6158852f..f9d95ff82df8 100644
--- a/include/net/netfilter/nf_tables_offload.h
+++ b/include/net/netfilter/nf_tables_offload.h
@@ -74,6 +74,7 @@ void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow,
 
 struct nft_rule;
 struct nft_flow_rule *nft_flow_rule_create(struct net *net, const struct nft_rule *rule);
+int nft_flow_rule_stats(const struct nft_chain *chain, const struct nft_rule *rule);
 void nft_flow_rule_destroy(struct nft_flow_rule *flow);
 int nft_flow_rule_offload_commit(struct net *net);
 
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 1b881a84bd01..37e9accd9aeb 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2878,6 +2878,9 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
 			goto nla_put_failure;
 	}
 
+	if (chain->flags & NFT_CHAIN_HW_OFFLOAD)
+		nft_flow_rule_stats(chain, rule);
+
 	list = nla_nest_start_noflag(skb, NFTA_RULE_EXPRESSIONS);
 	if (list == NULL)
 		goto nla_put_failure;
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 1d428792018f..19215e81dd66 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -243,26 +243,56 @@ static void nft_flow_cls_offload_setup(struct flow_cls_offload *cls_flow,
 		cls_flow->rule = flow->rule;
 }
 
-static int nft_flow_offload_rule(struct nft_chain *chain,
-				 struct nft_rule *rule,
-				 struct nft_flow_rule *flow,
-				 enum flow_cls_command command)
+static int nft_flow_offload_cmd(const struct nft_chain *chain,
+				const struct nft_rule *rule,
+				struct nft_flow_rule *flow,
+				enum flow_cls_command command,
+				struct flow_cls_offload *cls_flow)
 {
 	struct netlink_ext_ack extack = {};
-	struct flow_cls_offload cls_flow;
 	struct nft_base_chain *basechain;
 
 	if (!nft_is_base_chain(chain))
 		return -EOPNOTSUPP;
 
 	basechain = nft_base_chain(chain);
-	nft_flow_cls_offload_setup(&cls_flow, basechain, rule, flow, &extack,
+	nft_flow_cls_offload_setup(cls_flow, basechain, rule, flow, &extack,
 				   command);
 
-	return nft_setup_cb_call(TC_SETUP_CLSFLOWER, &cls_flow,
+	return nft_setup_cb_call(TC_SETUP_CLSFLOWER, cls_flow,
 				 &basechain->flow_block.cb_list);
 }
 
+static int nft_flow_offload_rule(const struct nft_chain *chain,
+				 struct nft_rule *rule,
+				 struct nft_flow_rule *flow,
+				 enum flow_cls_command command)
+{
+	struct flow_cls_offload cls_flow;
+
+	return nft_flow_offload_cmd(chain, rule, flow, command, &cls_flow);
+}
+
+int nft_flow_rule_stats(const struct nft_chain *chain,
+			const struct nft_rule *rule)
+{
+	struct flow_cls_offload cls_flow = {};
+	struct nft_expr *expr, *next;
+	int err;
+
+	err = nft_flow_offload_cmd(chain, rule, NULL, FLOW_CLS_STATS,
+				   &cls_flow);
+	if (err < 0)
+		return err;
+
+	nft_rule_for_each_expr(expr, next, rule) {
+		if (expr->ops->offload_stats)
+			expr->ops->offload_stats(expr, &cls_flow.stats);
+	}
+
+	return 0;
+}
+
 static int nft_flow_offload_bind(struct flow_block_offload *bo,
 				 struct nft_base_chain *basechain)
 {
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 85ed461ec24e..8edd3b3c173d 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -13,6 +13,7 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
 
 struct nft_counter {
 	s64		bytes;
@@ -248,6 +249,32 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
 	return 0;
 }
 
+static int nft_counter_offload(struct nft_offload_ctx *ctx,
+			       struct nft_flow_rule *flow,
+			       const struct nft_expr *expr)
+{
+	/* No specific offload action is needed, but report success. */
+	return 0;
+}
+
+static void nft_counter_offload_stats(struct nft_expr *expr,
+				      const struct flow_stats *stats)
+{
+	struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+	struct nft_counter *this_cpu;
+	seqcount_t *myseq;
+
+	preempt_disable();
+	this_cpu = this_cpu_ptr(priv->counter);
+	myseq = this_cpu_ptr(&nft_counter_seq);
+
+	write_seqcount_begin(myseq);
+	this_cpu->packets += stats->pkts;
+	this_cpu->bytes += stats->bytes;
+	write_seqcount_end(myseq);
+	preempt_enable();
+}
+
 static struct nft_expr_type nft_counter_type;
 static const struct nft_expr_ops nft_counter_ops = {
 	.type		= &nft_counter_type,
@@ -258,6 +285,8 @@ static const struct nft_expr_ops nft_counter_ops = {
 	.destroy_clone	= nft_counter_destroy,
 	.dump		= nft_counter_dump,
 	.clone		= nft_counter_clone,
+	.offload	= nft_counter_offload,
+	.offload_stats	= nft_counter_offload_stats,
 };
 
 static struct nft_expr_type nft_counter_type __read_mostly = {
-- 
cgit v1.2.3


From 810344ed07d9ea55e42e99d87034e234e7e6a4a5 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Tue, 13 Apr 2021 14:38:50 +0300
Subject: cfg80211: fix an htmldoc warning

The htmldoc produces this warning which was introduced
bu the commit below.

include/net/cfg80211.h:6643: warning: expecting prototype for wiphy_rfkill_set_hw_state().
Prototype was for wiphy_rfkill_set_hw_state_reason() instead

Fixes: 6f779a66dc84 ("cfg80211: allow specifying a reason for hw_rfkill")
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Link: https://lore.kernel.org/r/20210413113850.59098-1-emmanuel.grumbach@intel.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 3b296f2b7a2c..c6134220dd8f 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -6634,7 +6634,7 @@ void cfg80211_notify_new_peer_candidate(struct net_device *dev,
  */
 
 /**
- * wiphy_rfkill_set_hw_state - notify cfg80211 about hw block state
+ * wiphy_rfkill_set_hw_state_reason - notify cfg80211 about hw block state
  * @wiphy: the wiphy
  * @blocked: block status
  * @reason: one of reasons in &enum rfkill_hard_block_reasons
-- 
cgit v1.2.3


From 5d9c358d05f62aa01ff5d63dae70a897498b0bae Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 16 Apr 2021 23:01:42 -0700
Subject: cfg80211: fix a few kernel-doc warnings

Fix multiple kernel-doc warnings in cfg80211.h.

cfg80211.h:363: warning: missing initial short description on line:
 * struct ieee80211_sband_iftype_data
cfg80211.h:6743: warning: missing initial short description on line:
 * cfg80211_vendor_cmd_get_sender

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://lore.kernel.org/r/20210417060142.1648-1-rdunlap@infradead.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index c6134220dd8f..528bea585bee 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -360,7 +360,7 @@ struct ieee80211_sta_he_cap {
 };
 
 /**
- * struct ieee80211_sband_iftype_data
+ * struct ieee80211_sband_iftype_data - sband data per interface type
  *
  * This structure encapsulates sband data that is relevant for the
  * interface types defined in @types_mask.  Each type in the
@@ -6740,7 +6740,7 @@ cfg80211_vendor_cmd_alloc_reply_skb(struct wiphy *wiphy, int approxlen)
 int cfg80211_vendor_cmd_reply(struct sk_buff *skb);
 
 /**
- * cfg80211_vendor_cmd_get_sender
+ * cfg80211_vendor_cmd_get_sender - get the current sender netlink ID
  * @wiphy: the wiphy
  *
  * Return the current netlink port ID in a vendor command handler.
-- 
cgit v1.2.3


From 623b988f2dcbecf3e638ecfaec97cc56a95eaa6a Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sun, 18 Apr 2021 01:39:53 -0700
Subject: cfg80211: constify ieee80211_get_response_rate return

It's not modified so make it const with the eventual goal of moving
data to text for various static struct ieee80211_rate arrays.

Signed-off-by: Joe Perches <joe@perches.com>
Link: https://lore.kernel.org/r/8b210b5f5972e39eded269b35a1297cf824c4181.camel@perches.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/broadcom/b43/main.c       | 2 +-
 drivers/net/wireless/broadcom/b43legacy/main.c | 2 +-
 include/net/cfg80211.h                         | 2 +-
 net/wireless/util.c                            | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/broadcom/b43/main.c b/drivers/net/wireless/broadcom/b43/main.c
index 150a366e8f62..17bcec5f3ff7 100644
--- a/drivers/net/wireless/broadcom/b43/main.c
+++ b/drivers/net/wireless/broadcom/b43/main.c
@@ -4053,7 +4053,7 @@ static void b43_update_basic_rates(struct b43_wldev *dev, u32 brates)
 {
 	struct ieee80211_supported_band *sband =
 		dev->wl->hw->wiphy->bands[b43_current_band(dev->wl)];
-	struct ieee80211_rate *rate;
+	const struct ieee80211_rate *rate;
 	int i;
 	u16 basic, direct, offset, basic_offset, rateptr;
 
diff --git a/drivers/net/wireless/broadcom/b43legacy/main.c b/drivers/net/wireless/broadcom/b43legacy/main.c
index 7692a2618c97..f64ebff68308 100644
--- a/drivers/net/wireless/broadcom/b43legacy/main.c
+++ b/drivers/net/wireless/broadcom/b43legacy/main.c
@@ -2762,7 +2762,7 @@ static void b43legacy_update_basic_rates(struct b43legacy_wldev *dev, u32 brates
 {
 	struct ieee80211_supported_band *sband =
 		dev->wl->hw->wiphy->bands[NL80211_BAND_2GHZ];
-	struct ieee80211_rate *rate;
+	const struct ieee80211_rate *rate;
 	int i;
 	u16 basic, direct, offset, basic_offset, rateptr;
 
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 528bea585bee..73b17ea89248 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5607,7 +5607,7 @@ static inline bool cfg80211_channel_is_psc(struct ieee80211_channel *chan)
  * which is, for this function, given as a bitmap of indices of
  * rates in the band's bitrate table.
  */
-struct ieee80211_rate *
+const struct ieee80211_rate *
 ieee80211_get_response_rate(struct ieee80211_supported_band *sband,
 			    u32 basic_rates, int bitrate);
 
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 1bf0200f562a..382c5262d997 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -24,7 +24,7 @@
 #include "rdev-ops.h"
 
 
-struct ieee80211_rate *
+const struct ieee80211_rate *
 ieee80211_get_response_rate(struct ieee80211_supported_band *sband,
 			    u32 basic_rates, int bitrate)
 {
-- 
cgit v1.2.3


From ca47b46294eabc5b9e85bc2ec9de0bf097a39af6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 16 Apr 2021 13:47:04 +0200
Subject: mac80211: properly process TXQ management frames
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

My previous commit to not apply flow control to management frames
that are going over TXQs (which is currently only the case for
iwlwifi, I think) broke things, with iwlwifi firmware crashing on
certain frames. As it turns out, that was due to the frame being
too short: space for the MIC wasn't added at the end of encrypted
management frames.

Clearly, this is due to using the 'frags' queue - this is meant
only for frames that have already been processed for TX, and the
code in ieee80211_tx_dequeue() just returns them. This caused all
management frames to now not get any TX processing.

To fix this, use IEEE80211_TX_INTCFL_NEED_TXPROCESSING (which is
currently used only in other circumstances) to indicate that the
frames need processing, and clear it immediately after so that,
at least in theory, MMPDUs can be fragmented.

Fixes: 73bc9e0af594 ("mac80211: don't apply flow control on management frames")
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/r/20210416134702.ef8486a64293.If0a9025b39c71bb91b11dd6ac45547aba682df34@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 3d6c5b8ec0c2..8aeb60182f10 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1394,11 +1394,14 @@ static void ieee80211_txq_enqueue(struct ieee80211_local *local,
 	 * want to simplify the driver API by having them on the
 	 * txqi.
 	 */
-	if (unlikely(txqi->txq.tid == IEEE80211_NUM_TIDS))
+	if (unlikely(txqi->txq.tid == IEEE80211_NUM_TIDS)) {
+		IEEE80211_SKB_CB(skb)->control.flags |=
+			IEEE80211_TX_INTCFL_NEED_TXPROCESSING;
 		__skb_queue_tail(&txqi->frags, skb);
-	else
+	} else {
 		fq_tin_enqueue(fq, tin, flow_idx, skb,
 			       fq_skb_free_func);
+	}
 	spin_unlock_bh(&fq->lock);
 }
 
@@ -3587,10 +3590,16 @@ begin:
 
 	/* Make sure fragments stay together. */
 	skb = __skb_dequeue(&txqi->frags);
-	if (skb)
-		goto out;
+	if (unlikely(skb)) {
+		if (!(IEEE80211_SKB_CB(skb)->control.flags &
+				IEEE80211_TX_INTCFL_NEED_TXPROCESSING))
+			goto out;
+		IEEE80211_SKB_CB(skb)->control.flags &=
+			~IEEE80211_TX_INTCFL_NEED_TXPROCESSING;
+	} else {
+		skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func);
+	}
 
-	skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func);
 	if (!skb)
 		goto out;
 
-- 
cgit v1.2.3


From bab7f5ca81de4a2a5693a837bffe1124744028fe Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 16 Apr 2021 10:51:37 +0100
Subject: mac80211: minstrel_ht: remove extraneous indentation on if statement

The increment of idx is indented one level too deeply, clean up the
code by removing the extraneous tab.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Link: https://lore.kernel.org/r/20210416095137.2033469-1-colin.king@canonical.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rc80211_minstrel_ht.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index f21c85eb906a..6487b05da6fa 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -370,7 +370,7 @@ minstrel_ht_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
 		/* short preamble */
 		if ((mi->supported[group] & BIT(idx + 4)) &&
 		    (rate->flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE))
-				idx += 4;
+			idx += 4;
 		goto out;
 	}
 
-- 
cgit v1.2.3


From 8de8570489d109d969649d67d5f49318128c1fda Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 9 Apr 2021 12:40:28 +0300
Subject: mac80211: aes_cmac: check crypto_shash_setkey() return value

As crypto_shash_setkey() can fail, we should check the return value.

Addresses-Coverity-ID: 1401813 ("Unchecked return value")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210409123755.533ff7acf1d2.I034bafa201c4a6823333f8410aeaa60cca5ee9e0@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/aes_cmac.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c
index b31f1021ad9c..48c04f89de20 100644
--- a/net/mac80211/aes_cmac.c
+++ b/net/mac80211/aes_cmac.c
@@ -2,6 +2,7 @@
 /*
  * AES-128-CMAC with TLen 16 for IEEE 802.11w BIP
  * Copyright 2008, Jouni Malinen <j@w1.fi>
+ * Copyright (C) 2020 Intel Corporation
  */
 
 #include <linux/kernel.h>
@@ -73,8 +74,14 @@ struct crypto_shash *ieee80211_aes_cmac_key_setup(const u8 key[],
 	struct crypto_shash *tfm;
 
 	tfm = crypto_alloc_shash("cmac(aes)", 0, 0);
-	if (!IS_ERR(tfm))
-		crypto_shash_setkey(tfm, key, key_len);
+	if (!IS_ERR(tfm)) {
+		int err = crypto_shash_setkey(tfm, key, key_len);
+
+		if (err) {
+			crypto_free_shash(tfm);
+			return ERR_PTR(err);
+		}
+	}
 
 	return tfm;
 }
-- 
cgit v1.2.3


From efce5b50bad8b63d07719318c34a664ccdb56b70 Mon Sep 17 00:00:00 2001
From: Avraham Stern <avraham.stern@intel.com>
Date: Fri, 9 Apr 2021 12:40:26 +0300
Subject: ieee80211: add the values of ranging parameters max LTF total field

Add an enum with the values of the ranging parameters max LTF total
field, as defined in IEEE802.11az_D2.6, table Table 9-322h23fc.

Signed-off-by: Avraham Stern <avraham.stern@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210409123755.d2588ebb1974.I9424c8ade13c4c938cb9999d8ce99d0d4c1cc198@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 72ff75fb1971..25fc7bee868a 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -3861,4 +3861,11 @@ struct ieee80211_neighbor_ap_info {
        u8 channel;
 } __packed;
 
+enum ieee80211_range_params_max_total_ltf {
+	IEEE80211_RANGE_PARAMS_MAX_TOTAL_LTF_4 = 0,
+	IEEE80211_RANGE_PARAMS_MAX_TOTAL_LTF_8,
+	IEEE80211_RANGE_PARAMS_MAX_TOTAL_LTF_16,
+	IEEE80211_RANGE_PARAMS_MAX_TOTAL_LTF_UNSPECIFIED,
+};
+
 #endif /* LINUX_IEEE80211_H */
-- 
cgit v1.2.3


From 73807523f9a6612106582ab19217f280ed128f24 Mon Sep 17 00:00:00 2001
From: Avraham Stern <avraham.stern@intel.com>
Date: Fri, 9 Apr 2021 12:40:25 +0300
Subject: nl80211/cfg80211: add a flag to negotiate for LMR feedback in NDP
 ranging

Add a flag that indicates that the ISTA shall indicate support for
LMR feedback in NDP ranging negotiation.

Signed-off-by: Avraham Stern <avraham.stern@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210409123755.eff546283504.I2606161e700ac24d94d0b50c8edcdedd4c0395c2@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  5 ++++-
 include/uapi/linux/nl80211.h |  4 ++++
 net/wireless/nl80211.c       |  1 +
 net/wireless/pmsr.c          | 12 +++++++++++-
 4 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 73b17ea89248..5224f885a99a 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3521,6 +3521,8 @@ struct cfg80211_pmsr_result {
  * @non_trigger_based: use non trigger based ranging for the measurement
  *		 If neither @trigger_based nor @non_trigger_based is set,
  *		 EDCA based ranging will be used.
+ * @lmr_feedback: negotiate for I2R LMR feedback. Only valid if either
+ *	@trigger_based or @non_trigger_based is set.
  *
  * See also nl80211 for the respective attribute documentation.
  */
@@ -3532,7 +3534,8 @@ struct cfg80211_pmsr_ftm_request_peer {
 	   request_lci:1,
 	   request_civicloc:1,
 	   trigger_based:1,
-	   non_trigger_based:1;
+	   non_trigger_based:1,
+	   lmr_feedback:1;
 	u8 num_bursts_exp;
 	u8 burst_duration;
 	u8 ftms_per_burst;
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 18dfe744bcb5..00f696d177e6 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -6896,6 +6896,9 @@ enum nl80211_peer_measurement_ftm_capa {
  *      if neither %NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED nor
  *	%NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED is set, EDCA based
  *	ranging will be used.
+ * @NL80211_PMSR_FTM_REQ_ATTR_LMR_FEEDBACK: negotiate for LMR feedback. Only
+ *	valid if either %NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED or
+ *	%NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED is set.
  *
  * @NUM_NL80211_PMSR_FTM_REQ_ATTR: internal
  * @NL80211_PMSR_FTM_REQ_ATTR_MAX: highest attribute number
@@ -6914,6 +6917,7 @@ enum nl80211_peer_measurement_ftm_req {
 	NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC,
 	NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED,
 	NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED,
+	NL80211_PMSR_FTM_REQ_ATTR_LMR_FEEDBACK,
 
 	/* keep last */
 	NUM_NL80211_PMSR_FTM_REQ_ATTR,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index adfa07c67b44..aad19348bb46 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -309,6 +309,7 @@ nl80211_pmsr_ftm_req_attr_policy[NL80211_PMSR_FTM_REQ_ATTR_MAX + 1] = {
 	[NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC] = { .type = NLA_FLAG },
 	[NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED] = { .type = NLA_FLAG },
 	[NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED] = { .type = NLA_FLAG },
+	[NL80211_PMSR_FTM_REQ_ATTR_LMR_FEEDBACK] = { .type = NLA_FLAG },
 };
 
 static const struct nla_policy
diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c
index a95c79d18349..6bdd96408022 100644
--- a/net/wireless/pmsr.c
+++ b/net/wireless/pmsr.c
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright (C) 2018 - 2021 Intel Corporation
  */
 #ifndef __PMSR_H
 #define __PMSR_H
@@ -158,6 +158,16 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev,
 		return -EINVAL;
 	}
 
+	out->ftm.lmr_feedback =
+		!!tb[NL80211_PMSR_FTM_REQ_ATTR_LMR_FEEDBACK];
+	if (!out->ftm.trigger_based && !out->ftm.non_trigger_based &&
+	    out->ftm.lmr_feedback) {
+		NL_SET_ERR_MSG_ATTR(info->extack,
+				    tb[NL80211_PMSR_FTM_REQ_ATTR_LMR_FEEDBACK],
+				    "FTM: LMR feedback set for EDCA based ranging");
+		return -EINVAL;
+	}
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From f30386a85f695aced2fa5b124d65ce5a5f3dc3ac Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Fri, 9 Apr 2021 12:40:19 +0300
Subject: mac80211: make ieee80211_vif_to_wdev work when the vif isn't in the
 driver

This will allow the low level driver to get the wdev during
the add_interface flow.
In order to do that, remove a few checks from there and do
not return NULL for vifs that were not yet added to the
driver. Note that all the current callers of this helper
function assume that the vif already exists:
 - The callers from the drivers already have a vif pointer.
 Before this change, ieee80211_vif_to_wdev would return NULL
 in some cases, but those callers don't even check they
 get a non-NULL pointer from ieee80211_vif_to_wdev.
 - The callers from net/mac80211/cfg.c assume the vif is
 already added to the driver as well.

So, this change has no impact on existing callers of this
helper function.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210409123755.6078d3517095.I1907a45f267a62dab052bcc44428aa7a2005ffc9@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h |  5 +----
 net/mac80211/util.c    | 10 +---------
 2 files changed, 2 insertions(+), 13 deletions(-)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index c21a0e27b35e..445b66c6eb7e 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1768,10 +1768,7 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev);
  *
  * This can be used by mac80211 drivers with direct cfg80211 APIs
  * (like the vendor commands) that needs to get the wdev for a vif.
- *
- * Note that this function may return %NULL if the given wdev isn't
- * associated with a vif that the driver knows about (e.g. monitor
- * or AP_VLAN interfaces.)
+ * This can also be useful to get the netdev associated to a vif.
  */
 struct wireless_dev *ieee80211_vif_to_wdev(struct ieee80211_vif *vif);
 
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index c0fa526a45b4..0a0481f5af48 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -888,18 +888,10 @@ EXPORT_SYMBOL_GPL(wdev_to_ieee80211_vif);
 
 struct wireless_dev *ieee80211_vif_to_wdev(struct ieee80211_vif *vif)
 {
-	struct ieee80211_sub_if_data *sdata;
-
 	if (!vif)
 		return NULL;
 
-	sdata = vif_to_sdata(vif);
-
-	if (!ieee80211_sdata_running(sdata) ||
-	    !(sdata->flags & IEEE80211_SDATA_IN_DRIVER))
-		return NULL;
-
-	return &sdata->wdev;
+	return &vif_to_sdata(vif)->wdev;
 }
 EXPORT_SYMBOL_GPL(ieee80211_vif_to_wdev);
 
-- 
cgit v1.2.3


From 253907ab8bc0818639af382f6398810fa1f022b3 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Fri, 9 Apr 2021 12:40:16 +0300
Subject: mac80211: properly drop the connection in case of invalid CSA IE

In case the frequency is invalid, ieee80211_parse_ch_switch_ie
will fail and we may not even reach the check in
ieee80211_sta_process_chanswitch. Drop the connection
in case ieee80211_parse_ch_switch_ie failed, but still
take into account the CSA mode to remember not to send
a deauth frame in case if it is forbidden to.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210409123755.34712ef96a0a.I75d7ad7f1d654e8b0aa01cd7189ff00a510512b3@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 5f89aae9ea23..36070a991df0 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1405,11 +1405,8 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 		ch_switch.delay = csa_ie.max_switch_time;
 	}
 
-	if (res < 0) {
-		ieee80211_queue_work(&local->hw,
-				     &ifmgd->csa_connection_drop_work);
-		return;
-	}
+	if (res < 0)
+		goto lock_and_drop_connection;
 
 	if (beacon && sdata->vif.csa_active && !ifmgd->csa_waiting_bcn) {
 		if (res)
-- 
cgit v1.2.3


From f12ce9f607ffa5c617cd86cb7a7a0aaefe58f127 Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Fri, 9 Apr 2021 12:40:15 +0300
Subject: nl80211: Add new RSNXE related nl80211 extended features

Draft P802.11ax_D2.5 defines the following capabilities that
can be negotiated using RSNXE capabilities:

- Secure LTF measurement exchange protocol.
- Secure RTT measurement exchange protocol.
- Management frame protection for all management frames exchanged
  during the negotiation and range measurement procedure.

Extend the nl80211 API to allow drivers to declare support for
these new capabilities as part of extended feature.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210409123755.8280e31d8091.Ifcb29f84f432290338f80c8378aa5c9e0a390c93@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 00f696d177e6..f962c06e9818 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -5940,6 +5940,16 @@ enum nl80211_feature_flags {
  * @NL80211_EXT_FEATURE_BEACON_RATE_HE: Driver supports beacon rate
  *	configuration (AP/mesh) with HE rates.
  *
+ * @NL80211_EXT_FEATURE_SECURE_LTF: Device supports secure LTF measurement
+ *      exchange protocol.
+ *
+ * @NL80211_EXT_FEATURE_SECURE_RTT: Device supports secure RTT measurement
+ *      exchange protocol.
+ *
+ * @NL80211_EXT_FEATURE_PROT_RANGE_NEGO_AND_MEASURE: Device supports management
+ *      frame protection for all management frames exchanged during the
+ *      negotiation and range measurement procedure.
+ *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
  */
@@ -6001,6 +6011,9 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_FILS_DISCOVERY,
 	NL80211_EXT_FEATURE_UNSOL_BCAST_PROBE_RESP,
 	NL80211_EXT_FEATURE_BEACON_RATE_HE,
+	NL80211_EXT_FEATURE_SECURE_LTF,
+	NL80211_EXT_FEATURE_SECURE_RTT,
+	NL80211_EXT_FEATURE_PROT_RANGE_NEGO_AND_MEASURE,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
-- 
cgit v1.2.3


From 7dd231eb9ca607d93d00be39df1f01377e2b7d1f Mon Sep 17 00:00:00 2001
From: Naftali Goldstein <naftali.goldstein@intel.com>
Date: Fri, 9 Apr 2021 12:40:14 +0300
Subject: mac80211: drop the connection if firmware crashed while in CSA

Don't bother keeping the link in that case. It is way
too complicated to keep the connection.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Naftali Goldstein <naftali.goldstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210409123755.a126c8833398.I677bdac314dd50d90474a90593902c17f9410cc4@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h | 2 ++
 net/mac80211/main.c        | 9 ++++++++-
 net/mac80211/mlme.c        | 4 ++--
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index ecda126a7026..8fcbaa1eedf3 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1660,6 +1660,8 @@ void ieee80211_mgd_conn_tx_status(struct ieee80211_sub_if_data *sdata,
 void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata);
 void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata);
 void ieee80211_sta_handle_tspec_ac_params(struct ieee80211_sub_if_data *sdata);
+void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata,
+				   u8 *bssid, u8 reason, bool tx);
 
 /* IBSS code */
 void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 0331f3a3c40e..62145e5f9628 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -5,7 +5,7 @@
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (C) 2017     Intel Deutschland GmbH
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
  */
 
 #include <net/mac80211.h>
@@ -282,6 +282,13 @@ static void ieee80211_restart_work(struct work_struct *work)
 			 * Then we can have a race...
 			 */
 			cancel_work_sync(&sdata->u.mgd.csa_connection_drop_work);
+			if (sdata->vif.csa_active) {
+				sdata_lock(sdata);
+				ieee80211_sta_connection_lost(sdata,
+							      sdata->u.mgd.associated->bssid,
+							      WLAN_REASON_UNSPECIFIED, false);
+				sdata_unlock(sdata);
+			}
 		}
 		flush_delayed_work(&sdata->dec_tailroom_needed_wk);
 	}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 36070a991df0..2d88ff76247f 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4384,8 +4384,8 @@ static void ieee80211_sta_timer(struct timer_list *t)
 	ieee80211_queue_work(&sdata->local->hw, &sdata->work);
 }
 
-static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata,
-					  u8 *bssid, u8 reason, bool tx)
+void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata,
+				   u8 *bssid, u8 reason, bool tx)
 {
 	u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
 
-- 
cgit v1.2.3


From b07dd26f07af294ceed9715fd11e312ff8de6138 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 14 Apr 2021 18:12:51 +0200
Subject: flow: remove spi key from flowi struct

xfrm session decode ipv4 path (but not ipv6) sets this, but there are no
consumers.  Remove it.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/flow.h     |  3 ---
 net/xfrm/xfrm_policy.c | 39 ---------------------------------------
 2 files changed, 42 deletions(-)

diff --git a/include/net/flow.h b/include/net/flow.h
index 39d0cedcddee..6f5e70240071 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -59,7 +59,6 @@ union flowi_uli {
 		__le16	sport;
 	} dnports;
 
-	__be32		spi;
 	__be32		gre_key;
 
 	struct {
@@ -90,7 +89,6 @@ struct flowi4 {
 #define fl4_dport		uli.ports.dport
 #define fl4_icmp_type		uli.icmpt.type
 #define fl4_icmp_code		uli.icmpt.code
-#define fl4_ipsec_spi		uli.spi
 #define fl4_mh_type		uli.mht.type
 #define fl4_gre_key		uli.gre_key
 } __attribute__((__aligned__(BITS_PER_LONG/8)));
@@ -150,7 +148,6 @@ struct flowi6 {
 #define fl6_dport		uli.ports.dport
 #define fl6_icmp_type		uli.icmpt.type
 #define fl6_icmp_code		uli.icmpt.code
-#define fl6_ipsec_spi		uli.spi
 #define fl6_mh_type		uli.mht.type
 #define fl6_gre_key		uli.gre_key
 	__u32			mp_hash;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 156347fd7e2e..cc6e02eb76c2 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -3326,39 +3326,6 @@ decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse)
 				fl4->fl4_icmp_code = icmp[1];
 			}
 			break;
-		case IPPROTO_ESP:
-			if (xprth + 4 < skb->data ||
-			    pskb_may_pull(skb, xprth + 4 - skb->data)) {
-				__be32 *ehdr;
-
-				xprth = skb_network_header(skb) + ihl * 4;
-				ehdr = (__be32 *)xprth;
-
-				fl4->fl4_ipsec_spi = ehdr[0];
-			}
-			break;
-		case IPPROTO_AH:
-			if (xprth + 8 < skb->data ||
-			    pskb_may_pull(skb, xprth + 8 - skb->data)) {
-				__be32 *ah_hdr;
-
-				xprth = skb_network_header(skb) + ihl * 4;
-				ah_hdr = (__be32 *)xprth;
-
-				fl4->fl4_ipsec_spi = ah_hdr[1];
-			}
-			break;
-		case IPPROTO_COMP:
-			if (xprth + 4 < skb->data ||
-			    pskb_may_pull(skb, xprth + 4 - skb->data)) {
-				__be16 *ipcomp_hdr;
-
-				xprth = skb_network_header(skb) + ihl * 4;
-				ipcomp_hdr = (__be16 *)xprth;
-
-				fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1]));
-			}
-			break;
 		case IPPROTO_GRE:
 			if (xprth + 12 < skb->data ||
 			    pskb_may_pull(skb, xprth + 12 - skb->data)) {
@@ -3377,7 +3344,6 @@ decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse)
 			}
 			break;
 		default:
-			fl4->fl4_ipsec_spi = 0;
 			break;
 		}
 	}
@@ -3470,12 +3436,7 @@ decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse)
 			fl6->flowi6_proto = nexthdr;
 			return;
 #endif
-		/* XXX Why are there these headers? */
-		case IPPROTO_AH:
-		case IPPROTO_ESP:
-		case IPPROTO_COMP:
 		default:
-			fl6->fl6_ipsec_spi = 0;
 			fl6->flowi6_proto = nexthdr;
 			return;
 		}
-- 
cgit v1.2.3


From 7baf867fef7cc65d666792e9d1b911beffe74ad7 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 14 Apr 2021 18:12:52 +0200
Subject: xfrm: remove stray synchronize_rcu from xfrm_init

This function is called during boot, from ipv4 stack, there is no need
to set the pointer to NULL (static storage duration, so already NULL).

No need for the synchronize_rcu either.  Remove both.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_policy.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index cc6e02eb76c2..ce500f847b99 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -4134,9 +4134,6 @@ void __init xfrm_init(void)
 #ifdef CONFIG_XFRM_ESPINTCP
 	espintcp_init();
 #endif
-
-	RCU_INIT_POINTER(xfrm_if_cb, NULL);
-	synchronize_rcu();
 }
 
 #ifdef CONFIG_AUDITSYSCALL
-- 
cgit v1.2.3


From 6218fe186109b93a2fa2343e13981e016e9961ab Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 14 Apr 2021 18:12:53 +0200
Subject: xfrm: avoid synchronize_rcu during netns destruction

Use the new exit_pre hook to NULL the netlink socket.
The net namespace core will do a synchronize_rcu() between the exit_pre
and exit/exit_batch handlers.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_user.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index df8bc8fc724c..f0aecee4d539 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -3480,18 +3480,22 @@ static int __net_init xfrm_user_net_init(struct net *net)
 	return 0;
 }
 
+static void __net_exit xfrm_user_net_pre_exit(struct net *net)
+{
+	RCU_INIT_POINTER(net->xfrm.nlsk, NULL);
+}
+
 static void __net_exit xfrm_user_net_exit(struct list_head *net_exit_list)
 {
 	struct net *net;
-	list_for_each_entry(net, net_exit_list, exit_list)
-		RCU_INIT_POINTER(net->xfrm.nlsk, NULL);
-	synchronize_net();
+
 	list_for_each_entry(net, net_exit_list, exit_list)
 		netlink_kernel_release(net->xfrm.nlsk_stash);
 }
 
 static struct pernet_operations xfrm_user_net_ops = {
 	.init	    = xfrm_user_net_init,
+	.pre_exit   = xfrm_user_net_pre_exit,
 	.exit_batch = xfrm_user_net_exit,
 };
 
-- 
cgit v1.2.3


From 747b67088f8d34b3ec64d31447a1044be92dd348 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 16 Apr 2021 17:11:46 +0200
Subject: xfrm: ipcomp: remove unnecessary get_cpu()

While testing ipcomp on a realtime kernel, Xiumei reported a "sleeping
in atomic" bug, caused by a memory allocation while preemption is
disabled (ipcomp_decompress -> alloc_page -> ... get_page_from_freelist).

As Sebastian noted [1], this get_cpu() isn't actually needed, since
ipcomp_decompress() is called in napi context anyway, so BH is already
disabled.

This patch replaces get_cpu + per_cpu_ptr with this_cpu_ptr, then
simplifies the error returns, since there isn't any common operation
left.

[1] https://lore.kernel.org/lkml/20190820082810.ixkmi56fp7u7eyn2@linutronix.de/

Cc: Juri Lelli <jlelli@redhat.com>
Reported-by: Xiumei Mu <xmu@redhat.com>
Suggested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_ipcomp.c | 25 ++++++++-----------------
 1 file changed, 8 insertions(+), 17 deletions(-)

diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index 4d422447aadc..2e8afe078d61 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -41,19 +41,16 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
 	const int plen = skb->len;
 	int dlen = IPCOMP_SCRATCH_SIZE;
 	const u8 *start = skb->data;
-	const int cpu = get_cpu();
-	u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
-	struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
+	u8 *scratch = *this_cpu_ptr(ipcomp_scratches);
+	struct crypto_comp *tfm = *this_cpu_ptr(ipcd->tfms);
 	int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
 	int len;
 
 	if (err)
-		goto out;
+		return err;
 
-	if (dlen < (plen + sizeof(struct ip_comp_hdr))) {
-		err = -EINVAL;
-		goto out;
-	}
+	if (dlen < (plen + sizeof(struct ip_comp_hdr)))
+		return -EINVAL;
 
 	len = dlen - plen;
 	if (len > skb_tailroom(skb))
@@ -68,16 +65,14 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
 		skb_frag_t *frag;
 		struct page *page;
 
-		err = -EMSGSIZE;
 		if (WARN_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS))
-			goto out;
+			return -EMSGSIZE;
 
 		frag = skb_shinfo(skb)->frags + skb_shinfo(skb)->nr_frags;
 		page = alloc_page(GFP_ATOMIC);
 
-		err = -ENOMEM;
 		if (!page)
-			goto out;
+			return -ENOMEM;
 
 		__skb_frag_set_page(frag, page);
 
@@ -96,11 +91,7 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
 		skb_shinfo(skb)->nr_frags++;
 	}
 
-	err = 0;
-
-out:
-	put_cpu();
-	return err;
+	return 0;
 }
 
 int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb)
-- 
cgit v1.2.3


From 76cf42213307f0908e010ac4c2bdcb77113202dd Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 9 Apr 2021 12:40:17 +0300
Subject: wireless: align some HE capabilities with the spec

Some names were changed, align that with the spec as of
802.11ax-D6.1.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210409123755.b1e5fbab0d8c.I3eb6076cb0714ec6aec6b8f9dee613ce4a05d825@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath11k/mac.c              | 10 +++++-----
 drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c |  6 +++---
 drivers/net/wireless/mediatek/mt76/mt7915/init.c   | 10 +++++-----
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c    |  4 ++--
 drivers/net/wireless/mediatek/mt76/mt7921/main.c   |  2 +-
 include/linux/ieee80211.h                          | 14 +++++++-------
 net/mac80211/debugfs_sta.c                         | 19 ++++++++++---------
 7 files changed, 33 insertions(+), 32 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index faa2e678e63e..343768afedc4 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -3807,7 +3807,7 @@ ath11k_mac_filter_he_cap_mesh(struct ieee80211_he_cap_elem *he_cap_elem)
 	    IEEE80211_HE_MAC_CAP4_BQR;
 	he_cap_elem->mac_cap_info[4] &= ~m;
 
-	m = IEEE80211_HE_MAC_CAP5_SUBCHAN_SELECVITE_TRANSMISSION |
+	m = IEEE80211_HE_MAC_CAP5_SUBCHAN_SELECTIVE_TRANSMISSION |
 	    IEEE80211_HE_MAC_CAP5_UL_2x996_TONE_RU |
 	    IEEE80211_HE_MAC_CAP5_PUNCTURED_SOUNDING |
 	    IEEE80211_HE_MAC_CAP5_HT_VHT_TRIG_FRAME_RX;
@@ -3817,7 +3817,7 @@ ath11k_mac_filter_he_cap_mesh(struct ieee80211_he_cap_elem *he_cap_elem)
 	    IEEE80211_HE_PHY_CAP2_UL_MU_PARTIAL_MU_MIMO;
 	he_cap_elem->phy_cap_info[2] &= ~m;
 
-	m = IEEE80211_HE_PHY_CAP3_RX_HE_MU_PPDU_FROM_NON_AP_STA |
+	m = IEEE80211_HE_PHY_CAP3_RX_PARTIAL_BW_SU_IN_20MHZ_MU |
 	    IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_MASK |
 	    IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_MASK;
 	he_cap_elem->phy_cap_info[3] &= ~m;
@@ -3829,13 +3829,13 @@ ath11k_mac_filter_he_cap_mesh(struct ieee80211_he_cap_elem *he_cap_elem)
 	he_cap_elem->phy_cap_info[5] &= ~m;
 
 	m = IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_75_MU |
-	    IEEE80211_HE_PHY_CAP6_TRIG_MU_BEAMFORMER_FB |
+	    IEEE80211_HE_PHY_CAP6_TRIG_MU_BEAMFORMING_PARTIAL_BW_FB |
 	    IEEE80211_HE_PHY_CAP6_TRIG_CQI_FB |
 	    IEEE80211_HE_PHY_CAP6_PARTIAL_BANDWIDTH_DL_MUMIMO;
 	he_cap_elem->phy_cap_info[6] &= ~m;
 
-	m = IEEE80211_HE_PHY_CAP7_SRP_BASED_SR |
-	    IEEE80211_HE_PHY_CAP7_POWER_BOOST_FACTOR_AR |
+	m = IEEE80211_HE_PHY_CAP7_PSR_BASED_SR |
+	    IEEE80211_HE_PHY_CAP7_POWER_BOOST_FACTOR_SUPP |
 	    IEEE80211_HE_PHY_CAP7_STBC_TX_ABOVE_80MHZ |
 	    IEEE80211_HE_PHY_CAP7_STBC_RX_ABOVE_80MHZ;
 	he_cap_elem->phy_cap_info[7] &= ~m;
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
index af684f80b0cc..632f20d4027d 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
@@ -583,11 +583,11 @@ static const struct ieee80211_sband_iftype_data iwl_he_capa[] = {
 					IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_2 |
 					IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_2,
 				.phy_cap_info[6] =
-					IEEE80211_HE_PHY_CAP6_TRIG_SU_BEAMFORMER_FB |
-					IEEE80211_HE_PHY_CAP6_TRIG_MU_BEAMFORMER_FB |
+					IEEE80211_HE_PHY_CAP6_TRIG_SU_BEAMFORMING_FB |
+					IEEE80211_HE_PHY_CAP6_TRIG_MU_BEAMFORMING_PARTIAL_BW_FB |
 					IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT,
 				.phy_cap_info[7] =
-					IEEE80211_HE_PHY_CAP7_POWER_BOOST_FACTOR_AR |
+					IEEE80211_HE_PHY_CAP7_POWER_BOOST_FACTOR_SUPP |
 					IEEE80211_HE_PHY_CAP7_HE_SU_MU_PPDU_4XLTF_AND_08_US_GI |
 					IEEE80211_HE_PHY_CAP7_MAX_NC_1,
 				.phy_cap_info[8] =
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index ad4e5b95158b..d9a0587f234b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -370,8 +370,8 @@ mt7915_set_stream_he_txbf_caps(struct ieee80211_sta_he_cap *he_cap,
 	    IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_MASK;
 	elem->phy_cap_info[5] &= ~c;
 
-	c = IEEE80211_HE_PHY_CAP6_TRIG_SU_BEAMFORMER_FB |
-	    IEEE80211_HE_PHY_CAP6_TRIG_MU_BEAMFORMER_FB;
+	c = IEEE80211_HE_PHY_CAP6_TRIG_SU_BEAMFORMING_FB |
+	    IEEE80211_HE_PHY_CAP6_TRIG_MU_BEAMFORMING_PARTIAL_BW_FB;
 	elem->phy_cap_info[6] &= ~c;
 
 	elem->phy_cap_info[7] &= ~IEEE80211_HE_PHY_CAP7_MAX_NC_MASK;
@@ -408,8 +408,8 @@ mt7915_set_stream_he_txbf_caps(struct ieee80211_sta_he_cap *he_cap,
 	c = (nss - 1) | (max_t(int, le16_to_cpu(mcs->tx_mcs_160), 1) << 3);
 	elem->phy_cap_info[5] |= c;
 
-	c = IEEE80211_HE_PHY_CAP6_TRIG_SU_BEAMFORMER_FB |
-	    IEEE80211_HE_PHY_CAP6_TRIG_MU_BEAMFORMER_FB;
+	c = IEEE80211_HE_PHY_CAP6_TRIG_SU_BEAMFORMING_FB |
+	    IEEE80211_HE_PHY_CAP6_TRIG_MU_BEAMFORMING_PARTIAL_BW_FB;
 	elem->phy_cap_info[6] |= c;
 
 	/* the maximum cap is 4 x 3, (Nr, Nc) = (3, 2) */
@@ -535,7 +535,7 @@ mt7915_init_he_caps(struct mt7915_phy *phy, enum nl80211_band band,
 				IEEE80211_HE_PHY_CAP6_PARTIAL_BW_EXT_RANGE |
 				IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT;
 			he_cap_elem->phy_cap_info[7] |=
-				IEEE80211_HE_PHY_CAP7_POWER_BOOST_FACTOR_AR |
+				IEEE80211_HE_PHY_CAP7_POWER_BOOST_FACTOR_SUPP |
 				IEEE80211_HE_PHY_CAP7_HE_SU_MU_PPDU_4XLTF_AND_08_US_GI;
 			he_cap_elem->phy_cap_info[8] |=
 				IEEE80211_HE_PHY_CAP8_20MHZ_IN_40MHZ_HE_PPDU_IN_2G |
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 195929242b72..97ef0265c516 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -1821,9 +1821,9 @@ mt7915_mcu_sta_bfer_he(struct ieee80211_sta *sta, struct ieee80211_vif *vif,
 
 	bf->tx_mode = MT_PHY_TYPE_HE_SU;
 	mt7915_mcu_sta_sounding_rate(bf);
-	bf->trigger_su = HE_PHY(CAP6_TRIG_SU_BEAMFORMER_FB,
+	bf->trigger_su = HE_PHY(CAP6_TRIG_SU_BEAMFORMING_FB,
 				pe->phy_cap_info[6]);
-	bf->trigger_mu = HE_PHY(CAP6_TRIG_MU_BEAMFORMER_FB,
+	bf->trigger_mu = HE_PHY(CAP6_TRIG_MU_BEAMFORMING_PARTIAL_BW_FB,
 				pe->phy_cap_info[6]);
 	bfer_nr = HE_PHY(CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_MASK,
 			 ve->phy_cap_info[5]);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 729f6c42cdde..fdd93926b516 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -108,7 +108,7 @@ mt7921_init_he_caps(struct mt7921_phy *phy, enum nl80211_band band,
 				IEEE80211_HE_PHY_CAP6_PARTIAL_BW_EXT_RANGE |
 				IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT;
 			he_cap_elem->phy_cap_info[7] |=
-				IEEE80211_HE_PHY_CAP7_POWER_BOOST_FACTOR_AR |
+				IEEE80211_HE_PHY_CAP7_POWER_BOOST_FACTOR_SUPP |
 				IEEE80211_HE_PHY_CAP7_HE_SU_MU_PPDU_4XLTF_AND_08_US_GI;
 			he_cap_elem->phy_cap_info[8] |=
 				IEEE80211_HE_PHY_CAP8_20MHZ_IN_40MHZ_HE_PPDU_IN_2G |
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 25fc7bee868a..687db25eb85f 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2020,7 +2020,7 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
 #define IEEE80211_HE_MAC_CAP4_BSRP_BQRP_A_MPDU_AGG		0x01
 #define IEEE80211_HE_MAC_CAP4_QTP				0x02
 #define IEEE80211_HE_MAC_CAP4_BQR				0x04
-#define IEEE80211_HE_MAC_CAP4_SRP_RESP				0x08
+#define IEEE80211_HE_MAC_CAP4_PSR_RESP				0x08
 #define IEEE80211_HE_MAC_CAP4_NDP_FB_REP			0x10
 #define IEEE80211_HE_MAC_CAP4_OPS				0x20
 #define IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU			0x40
@@ -2031,7 +2031,7 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
 
 #define IEEE80211_HE_MAC_CAP5_MULTI_TID_AGG_TX_QOS_B40		0x01
 #define IEEE80211_HE_MAC_CAP5_MULTI_TID_AGG_TX_QOS_B41		0x02
-#define IEEE80211_HE_MAC_CAP5_SUBCHAN_SELECVITE_TRANSMISSION	0x04
+#define IEEE80211_HE_MAC_CAP5_SUBCHAN_SELECTIVE_TRANSMISSION	0x04
 #define IEEE80211_HE_MAC_CAP5_UL_2x996_TONE_RU			0x08
 #define IEEE80211_HE_MAC_CAP5_OM_CTRL_UL_MU_DATA_DIS_RX		0x10
 #define IEEE80211_HE_MAC_CAP5_HE_DYNAMIC_SM_PS			0x20
@@ -2089,7 +2089,7 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_MASK			0x18
 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_RX_NSS_1				0x00
 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_RX_NSS_2				0x20
-#define IEEE80211_HE_PHY_CAP3_RX_HE_MU_PPDU_FROM_NON_AP_STA		0x40
+#define IEEE80211_HE_PHY_CAP3_RX_PARTIAL_BW_SU_IN_20MHZ_MU		0x40
 #define IEEE80211_HE_PHY_CAP3_SU_BEAMFORMER				0x80
 
 #define IEEE80211_HE_PHY_CAP4_SU_BEAMFORMEE				0x01
@@ -2136,15 +2136,15 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
 
 #define IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_42_SU			0x01
 #define IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_75_MU			0x02
-#define IEEE80211_HE_PHY_CAP6_TRIG_SU_BEAMFORMER_FB			0x04
-#define IEEE80211_HE_PHY_CAP6_TRIG_MU_BEAMFORMER_FB			0x08
+#define IEEE80211_HE_PHY_CAP6_TRIG_SU_BEAMFORMING_FB			0x04
+#define IEEE80211_HE_PHY_CAP6_TRIG_MU_BEAMFORMING_PARTIAL_BW_FB		0x08
 #define IEEE80211_HE_PHY_CAP6_TRIG_CQI_FB				0x10
 #define IEEE80211_HE_PHY_CAP6_PARTIAL_BW_EXT_RANGE			0x20
 #define IEEE80211_HE_PHY_CAP6_PARTIAL_BANDWIDTH_DL_MUMIMO		0x40
 #define IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT			0x80
 
-#define IEEE80211_HE_PHY_CAP7_SRP_BASED_SR				0x01
-#define IEEE80211_HE_PHY_CAP7_POWER_BOOST_FACTOR_AR			0x02
+#define IEEE80211_HE_PHY_CAP7_PSR_BASED_SR				0x01
+#define IEEE80211_HE_PHY_CAP7_POWER_BOOST_FACTOR_SUPP			0x02
 #define IEEE80211_HE_PHY_CAP7_HE_SU_MU_PPDU_4XLTF_AND_08_US_GI		0x04
 #define IEEE80211_HE_PHY_CAP7_MAX_NC_1					0x08
 #define IEEE80211_HE_PHY_CAP7_MAX_NC_2					0x10
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 5a27c61a7b38..d350224d45e8 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -732,15 +732,15 @@ static ssize_t sta_he_capa_read(struct file *file, char __user *userbuf,
 	PFLAG(MAC, 4, BSRP_BQRP_A_MPDU_AGG, "BSRP-BQRP-A-MPDU-AGG");
 	PFLAG(MAC, 4, QTP, "QTP");
 	PFLAG(MAC, 4, BQR, "BQR");
-	PFLAG(MAC, 4, SRP_RESP, "SRP-RESP");
+	PFLAG(MAC, 4, PSR_RESP, "PSR-RESP");
 	PFLAG(MAC, 4, NDP_FB_REP, "NDP-FB-REP");
 	PFLAG(MAC, 4, OPS, "OPS");
 	PFLAG(MAC, 4, AMDSU_IN_AMPDU, "AMSDU-IN-AMPDU");
 
 	PRINT("MULTI-TID-AGG-TX-QOS-%d", ((cap[5] << 1) | (cap[4] >> 7)) & 0x7);
 
-	PFLAG(MAC, 5, SUBCHAN_SELECVITE_TRANSMISSION,
-	      "SUBCHAN-SELECVITE-TRANSMISSION");
+	PFLAG(MAC, 5, SUBCHAN_SELECTIVE_TRANSMISSION,
+	      "SUBCHAN-SELECTIVE-TRANSMISSION");
 	PFLAG(MAC, 5, UL_2x996_TONE_RU, "UL-2x996-TONE-RU");
 	PFLAG(MAC, 5, OM_CTRL_UL_MU_DATA_DIS_RX, "OM-CTRL-UL-MU-DATA-DIS-RX");
 	PFLAG(MAC, 5, HE_DYNAMIC_SM_PS, "HE-DYNAMIC-SM-PS");
@@ -832,8 +832,8 @@ static ssize_t sta_he_capa_read(struct file *file, char __user *userbuf,
 
 	PFLAG(PHY, 3, DCM_MAX_RX_NSS_1, "DCM-MAX-RX-NSS-1");
 	PFLAG(PHY, 3, DCM_MAX_RX_NSS_2, "DCM-MAX-RX-NSS-2");
-	PFLAG(PHY, 3, RX_HE_MU_PPDU_FROM_NON_AP_STA,
-	      "RX-HE-MU-PPDU-FROM-NON-AP-STA");
+	PFLAG(PHY, 3, RX_PARTIAL_BW_SU_IN_20MHZ_MU,
+	      "RX-PARTIAL-BW-SU-IN-20MHZ-MU");
 	PFLAG(PHY, 3, SU_BEAMFORMER, "SU-BEAMFORMER");
 
 	PFLAG(PHY, 4, SU_BEAMFORMEE, "SU-BEAMFORMEE");
@@ -853,16 +853,17 @@ static ssize_t sta_he_capa_read(struct file *file, char __user *userbuf,
 
 	PFLAG(PHY, 6, CODEBOOK_SIZE_42_SU, "CODEBOOK-SIZE-42-SU");
 	PFLAG(PHY, 6, CODEBOOK_SIZE_75_MU, "CODEBOOK-SIZE-75-MU");
-	PFLAG(PHY, 6, TRIG_SU_BEAMFORMER_FB, "TRIG-SU-BEAMFORMER-FB");
-	PFLAG(PHY, 6, TRIG_MU_BEAMFORMER_FB, "TRIG-MU-BEAMFORMER-FB");
+	PFLAG(PHY, 6, TRIG_SU_BEAMFORMING_FB, "TRIG-SU-BEAMFORMING-FB");
+	PFLAG(PHY, 6, TRIG_MU_BEAMFORMING_PARTIAL_BW_FB,
+	      "MU-BEAMFORMING-PARTIAL-BW-FB");
 	PFLAG(PHY, 6, TRIG_CQI_FB, "TRIG-CQI-FB");
 	PFLAG(PHY, 6, PARTIAL_BW_EXT_RANGE, "PARTIAL-BW-EXT-RANGE");
 	PFLAG(PHY, 6, PARTIAL_BANDWIDTH_DL_MUMIMO,
 	      "PARTIAL-BANDWIDTH-DL-MUMIMO");
 	PFLAG(PHY, 6, PPE_THRESHOLD_PRESENT, "PPE-THRESHOLD-PRESENT");
 
-	PFLAG(PHY, 7, SRP_BASED_SR, "SRP-BASED-SR");
-	PFLAG(PHY, 7, POWER_BOOST_FACTOR_AR, "POWER-BOOST-FACTOR-AR");
+	PFLAG(PHY, 7, PSR_BASED_SR, "PSR-BASED-SR");
+	PFLAG(PHY, 7, POWER_BOOST_FACTOR_SUPP, "POWER-BOOST-FACTOR-SUPP");
 	PFLAG(PHY, 7, HE_SU_MU_PPDU_4XLTF_AND_08_US_GI,
 	      "HE-SU-MU-PPDU-4XLTF-AND-08-US-GI");
 	PFLAG_RANGE(PHY, 7, MAX_NC, 0, 1, 1, "MAX-NC-%d");
-- 
cgit v1.2.3


From 1f851b8dfd76a0e91560247802dd25a4754753c7 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 9 Apr 2021 12:40:20 +0300
Subject: wireless: align HE capabilities A-MPDU Length Exponent Extension

The A-MPDU length exponent extension is defined differently in
802.11ax D6.1, align with that.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210409123755.c2a257d3e2df.I3455245d388c52c61dace7e7958dbed7e807cfb6@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath11k/mac.c              |  5 ++---
 drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c |  4 ++--
 drivers/net/wireless/mac80211_hwsim.c              |  8 ++++----
 drivers/net/wireless/mediatek/mt76/mt7915/init.c   |  2 +-
 drivers/net/wireless/mediatek/mt76/mt7921/main.c   |  2 +-
 include/linux/ieee80211.h                          | 10 ++++------
 net/mac80211/debugfs_sta.c                         | 16 ++++++++--------
 7 files changed, 22 insertions(+), 25 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index 343768afedc4..a44ca32e9f72 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -1265,9 +1265,8 @@ static void ath11k_peer_assoc_h_he(struct ath11k *ar,
 	 * request, then use MAX_AMPDU_LEN_FACTOR as 16 to calculate max_ampdu
 	 * length.
 	 */
-	ampdu_factor = (he_cap->he_cap_elem.mac_cap_info[3] &
-			IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_MASK) >>
-			IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_SHIFT;
+	ampdu_factor = u8_get_bits(he_cap->he_cap_elem.mac_cap_info[3],
+				   IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_MASK);
 
 	if (ampdu_factor) {
 		if (sta->vht_cap.vht_supported)
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
index 632f20d4027d..d2058cdcb0d8 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
@@ -550,7 +550,7 @@ static const struct ieee80211_sband_iftype_data iwl_he_capa[] = {
 					IEEE80211_HE_MAC_CAP2_32BIT_BA_BITMAP,
 				.mac_cap_info[3] =
 					IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
-					IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_2,
+					IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_2,
 				.mac_cap_info[4] =
 					IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU |
 					IEEE80211_HE_MAC_CAP4_MULTI_TID_AGG_TX_QOS_B39,
@@ -636,7 +636,7 @@ static const struct ieee80211_sband_iftype_data iwl_he_capa[] = {
 					IEEE80211_HE_MAC_CAP2_BSR,
 				.mac_cap_info[3] =
 					IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
-					IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_2,
+					IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_2,
 				.mac_cap_info[4] =
 					IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU,
 				.mac_cap_info[5] =
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index d56d2095a0d4..9630324c535e 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2795,7 +2795,7 @@ static const struct ieee80211_sband_iftype_data he_capa_2ghz[] = {
 					IEEE80211_HE_MAC_CAP2_ACK_EN,
 				.mac_cap_info[3] =
 					IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
-					IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_2,
+					IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_3,
 				.mac_cap_info[4] = IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU,
 				.phy_cap_info[1] =
 					IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_MASK |
@@ -2839,7 +2839,7 @@ static const struct ieee80211_sband_iftype_data he_capa_2ghz[] = {
 					IEEE80211_HE_MAC_CAP2_ACK_EN,
 				.mac_cap_info[3] =
 					IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
-					IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_2,
+					IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_3,
 				.mac_cap_info[4] = IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU,
 				.phy_cap_info[1] =
 					IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_MASK |
@@ -2885,7 +2885,7 @@ static const struct ieee80211_sband_iftype_data he_capa_5ghz[] = {
 					IEEE80211_HE_MAC_CAP2_ACK_EN,
 				.mac_cap_info[3] =
 					IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
-					IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_2,
+					IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_3,
 				.mac_cap_info[4] = IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU,
 				.phy_cap_info[0] =
 					IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G |
@@ -2933,7 +2933,7 @@ static const struct ieee80211_sband_iftype_data he_capa_5ghz[] = {
 					IEEE80211_HE_MAC_CAP2_ACK_EN,
 				.mac_cap_info[3] =
 					IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
-					IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_2,
+					IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_3,
 				.mac_cap_info[4] = IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU,
 				.phy_cap_info[0] =
 					IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G |
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index d9a0587f234b..82b9e15dc6e3 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -476,7 +476,7 @@ mt7915_init_he_caps(struct mt7915_phy *phy, enum nl80211_band band,
 			IEEE80211_HE_MAC_CAP0_HTC_HE;
 		he_cap_elem->mac_cap_info[3] =
 			IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
-			IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_RESERVED;
+			IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_3;
 		he_cap_elem->mac_cap_info[4] =
 			IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index fdd93926b516..23149fcdf413 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -65,7 +65,7 @@ mt7921_init_he_caps(struct mt7921_phy *phy, enum nl80211_band band,
 			IEEE80211_HE_MAC_CAP0_HTC_HE;
 		he_cap_elem->mac_cap_info[3] =
 			IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
-			IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_RESERVED;
+			IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_3;
 		he_cap_elem->mac_cap_info[4] =
 			IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU;
 
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 687db25eb85f..c74033aca726 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2006,17 +2006,15 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
  * A-MDPU Length Exponent field in the HT capabilities, VHT capabilities and the
  * same field in the HE capabilities.
  */
-#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_USE_VHT	0x00
-#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_1		0x08
-#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_2		0x10
-#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_RESERVED	0x18
+#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_0		0x00
+#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_1		0x08
+#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_2		0x10
+#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_3		0x18
 #define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_MASK		0x18
 #define IEEE80211_HE_MAC_CAP3_AMSDU_FRAG			0x20
 #define IEEE80211_HE_MAC_CAP3_FLEX_TWT_SCHED			0x40
 #define IEEE80211_HE_MAC_CAP3_RX_CTRL_FRAME_TO_MULTIBSS		0x80
 
-#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_SHIFT		3
-
 #define IEEE80211_HE_MAC_CAP4_BSRP_BQRP_A_MPDU_AGG		0x01
 #define IEEE80211_HE_MAC_CAP4_QTP				0x02
 #define IEEE80211_HE_MAC_CAP4_BQR				0x04
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index d350224d45e8..25b3d4822aed 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -711,17 +711,17 @@ static ssize_t sta_he_capa_read(struct file *file, char __user *userbuf,
 	PFLAG(MAC, 3, OFDMA_RA, "OFDMA-RA");
 
 	switch (cap[3] & IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_MASK) {
-	case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_USE_VHT:
-		PRINT("MAX-AMPDU-LEN-EXP-USE-VHT");
+	case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_0:
+		PRINT("MAX-AMPDU-LEN-EXP-USE-EXT-0");
 		break;
-	case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_1:
-		PRINT("MAX-AMPDU-LEN-EXP-VHT-1");
+	case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_1:
+		PRINT("MAX-AMPDU-LEN-EXP-VHT-EXT-1");
 		break;
-	case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_2:
-		PRINT("MAX-AMPDU-LEN-EXP-VHT-2");
+	case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_2:
+		PRINT("MAX-AMPDU-LEN-EXP-VHT-EXT-2");
 		break;
-	case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_RESERVED:
-		PRINT("MAX-AMPDU-LEN-EXP-RESERVED");
+	case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_3:
+		PRINT("MAX-AMPDU-LEN-EXP-VHT-EXT-3");
 		break;
 	}
 
-- 
cgit v1.2.3


From 2f5164447cdab6419edddde3a214f93a53aa4e60 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 9 Apr 2021 12:40:24 +0300
Subject: wireless: fix spelling of A-MSDU in HE capabilities

In the HE capabilities, spell A-MSDU correctly, not "A-MDSU".

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210409123755.9e6ff1af1181.If6868bc6902ccd9a95c74c78f716c4b41473ef14@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c   | 4 ++--
 drivers/net/wireless/mac80211_hwsim.c                | 8 ++++----
 drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c | 2 +-
 drivers/net/wireless/mediatek/mt76/mt7915/init.c     | 2 +-
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c      | 2 +-
 drivers/net/wireless/mediatek/mt76/mt7921/main.c     | 2 +-
 include/linux/ieee80211.h                            | 2 +-
 net/mac80211/debugfs_sta.c                           | 2 +-
 8 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
index d2058cdcb0d8..4e7da2e32354 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
@@ -552,7 +552,7 @@ static const struct ieee80211_sband_iftype_data iwl_he_capa[] = {
 					IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
 					IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_2,
 				.mac_cap_info[4] =
-					IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU |
+					IEEE80211_HE_MAC_CAP4_AMSDU_IN_AMPDU |
 					IEEE80211_HE_MAC_CAP4_MULTI_TID_AGG_TX_QOS_B39,
 				.mac_cap_info[5] =
 					IEEE80211_HE_MAC_CAP5_MULTI_TID_AGG_TX_QOS_B40 |
@@ -638,7 +638,7 @@ static const struct ieee80211_sband_iftype_data iwl_he_capa[] = {
 					IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
 					IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_2,
 				.mac_cap_info[4] =
-					IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU,
+					IEEE80211_HE_MAC_CAP4_AMSDU_IN_AMPDU,
 				.mac_cap_info[5] =
 					IEEE80211_HE_MAC_CAP5_UL_2x996_TONE_RU,
 				.phy_cap_info[0] =
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 9630324c535e..51ce767eaf88 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2796,7 +2796,7 @@ static const struct ieee80211_sband_iftype_data he_capa_2ghz[] = {
 				.mac_cap_info[3] =
 					IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
 					IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_3,
-				.mac_cap_info[4] = IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU,
+				.mac_cap_info[4] = IEEE80211_HE_MAC_CAP4_AMSDU_IN_AMPDU,
 				.phy_cap_info[1] =
 					IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_MASK |
 					IEEE80211_HE_PHY_CAP1_DEVICE_CLASS_A |
@@ -2840,7 +2840,7 @@ static const struct ieee80211_sband_iftype_data he_capa_2ghz[] = {
 				.mac_cap_info[3] =
 					IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
 					IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_3,
-				.mac_cap_info[4] = IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU,
+				.mac_cap_info[4] = IEEE80211_HE_MAC_CAP4_AMSDU_IN_AMPDU,
 				.phy_cap_info[1] =
 					IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_MASK |
 					IEEE80211_HE_PHY_CAP1_DEVICE_CLASS_A |
@@ -2886,7 +2886,7 @@ static const struct ieee80211_sband_iftype_data he_capa_5ghz[] = {
 				.mac_cap_info[3] =
 					IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
 					IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_3,
-				.mac_cap_info[4] = IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU,
+				.mac_cap_info[4] = IEEE80211_HE_MAC_CAP4_AMSDU_IN_AMPDU,
 				.phy_cap_info[0] =
 					IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G |
 					IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G |
@@ -2934,7 +2934,7 @@ static const struct ieee80211_sband_iftype_data he_capa_5ghz[] = {
 				.mac_cap_info[3] =
 					IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
 					IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_3,
-				.mac_cap_info[4] = IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU,
+				.mac_cap_info[4] = IEEE80211_HE_MAC_CAP4_AMSDU_IN_AMPDU,
 				.phy_cap_info[0] =
 					IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G |
 					IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G |
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index 6cbccfb05f8b..e7c23f9f0ea5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -496,7 +496,7 @@ mt76_connac_mcu_sta_he_tlv(struct sk_buff *skb, struct ieee80211_sta *sta)
 	if (elem->mac_cap_info[3] & IEEE80211_HE_MAC_CAP3_OMI_CONTROL)
 		cap |= STA_REC_HE_CAP_OM;
 
-	if (elem->mac_cap_info[4] & IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU)
+	if (elem->mac_cap_info[4] & IEEE80211_HE_MAC_CAP4_AMSDU_IN_AMPDU)
 		cap |= STA_REC_HE_CAP_AMSDU_IN_AMPDU;
 
 	if (elem->mac_cap_info[4] & IEEE80211_HE_MAC_CAP4_BQR)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index 82b9e15dc6e3..152ac7192163 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -478,7 +478,7 @@ mt7915_init_he_caps(struct mt7915_phy *phy, enum nl80211_band band,
 			IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
 			IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_3;
 		he_cap_elem->mac_cap_info[4] =
-			IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU;
+			IEEE80211_HE_MAC_CAP4_AMSDU_IN_AMPDU;
 
 		if (band == NL80211_BAND_2GHZ)
 			he_cap_elem->phy_cap_info[0] =
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 97ef0265c516..c44091754a98 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -1330,7 +1330,7 @@ mt7915_mcu_sta_he_tlv(struct sk_buff *skb, struct ieee80211_sta *sta)
 	if (elem->mac_cap_info[3] & IEEE80211_HE_MAC_CAP3_OMI_CONTROL)
 		cap |= STA_REC_HE_CAP_OM;
 
-	if (elem->mac_cap_info[4] & IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU)
+	if (elem->mac_cap_info[4] & IEEE80211_HE_MAC_CAP4_AMSDU_IN_AMPDU)
 		cap |= STA_REC_HE_CAP_AMSDU_IN_AMPDU;
 
 	if (elem->mac_cap_info[4] & IEEE80211_HE_MAC_CAP4_BQR)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 23149fcdf413..07141e98a077 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -67,7 +67,7 @@ mt7921_init_he_caps(struct mt7921_phy *phy, enum nl80211_band band,
 			IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
 			IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_3;
 		he_cap_elem->mac_cap_info[4] =
-			IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU;
+			IEEE80211_HE_MAC_CAP4_AMSDU_IN_AMPDU;
 
 		if (band == NL80211_BAND_2GHZ)
 			he_cap_elem->phy_cap_info[0] =
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index c74033aca726..2967437f1b11 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2021,7 +2021,7 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
 #define IEEE80211_HE_MAC_CAP4_PSR_RESP				0x08
 #define IEEE80211_HE_MAC_CAP4_NDP_FB_REP			0x10
 #define IEEE80211_HE_MAC_CAP4_OPS				0x20
-#define IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU			0x40
+#define IEEE80211_HE_MAC_CAP4_AMSDU_IN_AMPDU			0x40
 /* Multi TID agg TX is split between byte #4 and #5
  * The value is a combination of B39,B40,B41
  */
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 25b3d4822aed..936c9dfa86c8 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -735,7 +735,7 @@ static ssize_t sta_he_capa_read(struct file *file, char __user *userbuf,
 	PFLAG(MAC, 4, PSR_RESP, "PSR-RESP");
 	PFLAG(MAC, 4, NDP_FB_REP, "NDP-FB-REP");
 	PFLAG(MAC, 4, OPS, "OPS");
-	PFLAG(MAC, 4, AMDSU_IN_AMPDU, "AMSDU-IN-AMPDU");
+	PFLAG(MAC, 4, AMSDU_IN_AMPDU, "AMSDU-IN-AMPDU");
 
 	PRINT("MULTI-TID-AGG-TX-QOS-%d", ((cap[5] << 1) | (cap[4] >> 7)) & 0x7);
 
-- 
cgit v1.2.3


From 010bfbe768f7ecc876ffba92db30432de4997e2a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 16 Apr 2021 09:42:14 +0200
Subject: cfg80211: scan: drop entry from hidden_list on overflow

If we overflow the maximum number of BSS entries and free the
new entry, drop it from any hidden_list that it may have been
added to in the code above or in cfg80211_combine_bsses().

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Link: https://lore.kernel.org/r/20210416094212.5de7d1676ad7.Ied283b0bc5f504845e7d6ab90626bdfa68bb3dc0@changeid
Cc: stable@vger.kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/scan.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index c3b51efff5c6..b21058698a9f 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -1751,6 +1751,8 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
 
 		if (rdev->bss_entries >= bss_entries_limit &&
 		    !cfg80211_bss_expire_oldest(rdev)) {
+			if (!list_empty(&new->hidden_list))
+				list_del(&new->hidden_list);
 			kfree(new);
 			goto drop;
 		}
-- 
cgit v1.2.3


From e7020bb068d8be50a92f48e36b236a1a1ef9282e Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Sat, 17 Apr 2021 11:13:39 +0200
Subject: iwlwifi: Fix softirq/hardirq disabling in
 iwl_pcie_gen2_enqueue_hcmd()

Analogically to what we did in 2800aadc18a6 ("iwlwifi: Fix softirq/hardirq
disabling in iwl_pcie_enqueue_hcmd()"), we must apply the same fix to
iwl_pcie_gen2_enqueue_hcmd(), as it's being called from exactly the same
contexts.

Reported-by: Heiner Kallweit <hkallweit1@gmail.com
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/nycvar.YFH.7.76.2104171112390.18270@cbobk.fhfr.pm
---
 drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
index 4456abb9a074..34bde8c87324 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
@@ -40,6 +40,7 @@ int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans,
 	const u8 *cmddata[IWL_MAX_CMD_TBS_PER_TFD];
 	u16 cmdlen[IWL_MAX_CMD_TBS_PER_TFD];
 	struct iwl_tfh_tfd *tfd;
+	unsigned long flags;
 
 	copy_size = sizeof(struct iwl_cmd_header_wide);
 	cmd_size = sizeof(struct iwl_cmd_header_wide);
@@ -108,14 +109,14 @@ int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans,
 		goto free_dup_buf;
 	}
 
-	spin_lock_bh(&txq->lock);
+	spin_lock_irqsave(&txq->lock, flags);
 
 	idx = iwl_txq_get_cmd_index(txq, txq->write_ptr);
 	tfd = iwl_txq_get_tfd(trans, txq, txq->write_ptr);
 	memset(tfd, 0, sizeof(*tfd));
 
 	if (iwl_txq_space(trans, txq) < ((cmd->flags & CMD_ASYNC) ? 2 : 1)) {
-		spin_unlock_bh(&txq->lock);
+		spin_unlock_irqrestore(&txq->lock, flags);
 
 		IWL_ERR(trans, "No space in command queue\n");
 		iwl_op_mode_cmd_queue_full(trans->op_mode);
@@ -250,7 +251,7 @@ int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans,
 	spin_unlock(&trans_pcie->reg_lock);
 
 out:
-	spin_unlock_bh(&txq->lock);
+	spin_unlock_irqrestore(&txq->lock, flags);
 free_dup_buf:
 	if (idx < 0)
 		kfree(dup_buf);
-- 
cgit v1.2.3


From e16edc99d658cd41c60a44cc14d170697aa3271f Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Fri, 16 Apr 2021 12:44:16 +0200
Subject: vsock/vmci: log once the failed queue pair allocation

VMCI feature is not supported in conjunction with the vSphere Fault
Tolerance (FT) feature.

VMware Tools can repeatedly try to create a vsock connection. If FT is
enabled the kernel logs is flooded with the following messages:

    qp_alloc_hypercall result = -20
    Could not attach to queue pair with -20

"qp_alloc_hypercall result = -20" was hidden by commit e8266c4c3307
("VMCI: Stop log spew when qp allocation isn't possible"), but "Could
not attach to queue pair with -20" is still there flooding the log.

Since the error message can be useful in some cases, print it only once.

Fixes: d021c344051a ("VSOCK: Introduce VM Sockets")
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Reviewed-by: Jorgen Hansen <jhansen@vmware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/vmci_transport.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 8b65323207db..1c9ecb18b8e6 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -568,8 +568,7 @@ vmci_transport_queue_pair_alloc(struct vmci_qp **qpair,
 			       peer, flags, VMCI_NO_PRIVILEGE_FLAGS);
 out:
 	if (err < 0) {
-		pr_err("Could not attach to queue pair with %d\n",
-		       err);
+		pr_err_once("Could not attach to queue pair with %d\n", err);
 		err = vmci_transport_error_to_vsock_error(err);
 	}
 
-- 
cgit v1.2.3


From 9e46fb656fdb40baec33a8942743d81a40f30fd3 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Fri, 16 Apr 2021 18:55:34 +0300
Subject: nexthop: Restart nexthop dump based on last dumped nexthop identifier

Currently, a multi-part nexthop dump is restarted based on the number of
nexthops that have been dumped so far. This can result in a lot of
nexthops not being dumped when nexthops are simultaneously deleted:

 # ip nexthop | wc -l
 65536
 # ip nexthop flush
 Dump was interrupted and may be inconsistent.
 Flushed 36040 nexthops
 # ip nexthop | wc -l
 29496

Instead, restart the dump based on the nexthop identifier (fixed number)
of the last successfully dumped nexthop:

 # ip nexthop | wc -l
 65536
 # ip nexthop flush
 Dump was interrupted and may be inconsistent.
 Flushed 65536 nexthops
 # ip nexthop | wc -l
 0

Reported-by: Maksym Yaremchuk <maksymy@nvidia.com>
Tested-by: Maksym Yaremchuk <maksymy@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/nexthop.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 5a2fc8798d20..4075230b14c6 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -3140,26 +3140,24 @@ static int rtm_dump_walk_nexthops(struct sk_buff *skb,
 				  void *data)
 {
 	struct rb_node *node;
-	int idx = 0, s_idx;
+	int s_idx;
 	int err;
 
 	s_idx = ctx->idx;
 	for (node = rb_first(root); node; node = rb_next(node)) {
 		struct nexthop *nh;
 
-		if (idx < s_idx)
-			goto cont;
-
 		nh = rb_entry(node, struct nexthop, rb_node);
-		ctx->idx = idx;
+		if (nh->id < s_idx)
+			continue;
+
+		ctx->idx = nh->id;
 		err = nh_cb(skb, cb, nh, data);
 		if (err)
 			return err;
-cont:
-		idx++;
 	}
 
-	ctx->idx = idx;
+	ctx->idx++;
 	return 0;
 }
 
-- 
cgit v1.2.3


From bf5eb67dc80a75e0756269084b087c06f0360b78 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Fri, 16 Apr 2021 18:55:35 +0300
Subject: selftests: fib_nexthops: Test large scale nexthop flushing

Test that all the nexthops are flushed when a multi-part nexthop dump is
required for the flushing.

Without previous patch:

 # ./fib_nexthops.sh
 TEST: Large scale nexthop flushing                                  [FAIL]

With previous patch:

 # ./fib_nexthops.sh
 TEST: Large scale nexthop flushing                                  [ OK ]

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_nexthops.sh | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 56dd0c6f2e96..49774a8a7736 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -1933,6 +1933,21 @@ basic()
 	log_test $? 2 "Nexthop group and blackhole"
 
 	$IP nexthop flush >/dev/null 2>&1
+
+	# Test to ensure that flushing with a multi-part nexthop dump works as
+	# expected.
+	local batch_file=$(mktemp)
+
+	for i in $(seq 1 $((64 * 1024))); do
+		echo "nexthop add id $i blackhole" >> $batch_file
+	done
+
+	$IP -b $batch_file
+	$IP nexthop flush >/dev/null 2>&1
+	[[ $($IP nexthop | wc -l) -eq 0 ]]
+	log_test $? 0 "Large scale nexthop flushing"
+
+	rm $batch_file
 }
 
 check_nexthop_buckets_balance()
-- 
cgit v1.2.3


From 83c1ca257aca5ecf776858d8a917fcd18623b708 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Fri, 16 Apr 2021 17:41:48 +0000
Subject: mld: remove unnecessary prototypes

Some prototypes are unnecessary, so delete it.

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/mcast.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index ff536a158b85..0d59efb6b49e 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -75,9 +75,6 @@ static void igmp6_leave_group(struct ifmcaddr6 *ma);
 static void mld_mca_work(struct work_struct *work);
 
 static void mld_ifc_event(struct inet6_dev *idev);
-static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc);
-static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc);
-static void mld_clear_delrec(struct inet6_dev *idev);
 static bool mld_in_v1_mode(const struct inet6_dev *idev);
 static int sf_setstate(struct ifmcaddr6 *pmc);
 static void sf_markstate(struct ifmcaddr6 *pmc);
-- 
cgit v1.2.3


From ca9c54d2d6a5ab2430c4eda364c77125d62e5e0f Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Fri, 16 Apr 2021 13:11:59 -0700
Subject: net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)

Add a VF driver for Microsoft Azure Network Adapter (MANA) that will be
available in the future.

Co-developed-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Co-developed-by: Shachar Raindel <shacharr@microsoft.com>
Signed-off-by: Shachar Raindel <shacharr@microsoft.com>
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                                        |    4 +-
 drivers/net/ethernet/Kconfig                       |    1 +
 drivers/net/ethernet/Makefile                      |    1 +
 drivers/net/ethernet/microsoft/Kconfig             |   29 +
 drivers/net/ethernet/microsoft/Makefile            |    5 +
 drivers/net/ethernet/microsoft/mana/Makefile       |    6 +
 drivers/net/ethernet/microsoft/mana/gdma.h         |  673 +++++++
 drivers/net/ethernet/microsoft/mana/gdma_main.c    | 1415 +++++++++++++++
 drivers/net/ethernet/microsoft/mana/hw_channel.c   |  843 +++++++++
 drivers/net/ethernet/microsoft/mana/hw_channel.h   |  190 ++
 drivers/net/ethernet/microsoft/mana/mana.h         |  533 ++++++
 drivers/net/ethernet/microsoft/mana/mana_en.c      | 1895 ++++++++++++++++++++
 drivers/net/ethernet/microsoft/mana/mana_ethtool.c |  250 +++
 drivers/net/ethernet/microsoft/mana/shm_channel.c  |  291 +++
 drivers/net/ethernet/microsoft/mana/shm_channel.h  |   21 +
 15 files changed, 6156 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/microsoft/Kconfig
 create mode 100644 drivers/net/ethernet/microsoft/Makefile
 create mode 100644 drivers/net/ethernet/microsoft/mana/Makefile
 create mode 100644 drivers/net/ethernet/microsoft/mana/gdma.h
 create mode 100644 drivers/net/ethernet/microsoft/mana/gdma_main.c
 create mode 100644 drivers/net/ethernet/microsoft/mana/hw_channel.c
 create mode 100644 drivers/net/ethernet/microsoft/mana/hw_channel.h
 create mode 100644 drivers/net/ethernet/microsoft/mana/mana.h
 create mode 100644 drivers/net/ethernet/microsoft/mana/mana_en.c
 create mode 100644 drivers/net/ethernet/microsoft/mana/mana_ethtool.c
 create mode 100644 drivers/net/ethernet/microsoft/mana/shm_channel.c
 create mode 100644 drivers/net/ethernet/microsoft/mana/shm_channel.h

diff --git a/MAINTAINERS b/MAINTAINERS
index c204b0cf2f3f..50bf0d61a7b0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8267,11 +8267,12 @@ S:	Maintained
 T:	git git://linuxtv.org/media_tree.git
 F:	drivers/media/i2c/hi556.c
 
-Hyper-V CORE AND DRIVERS
+Hyper-V/Azure CORE AND DRIVERS
 M:	"K. Y. Srinivasan" <kys@microsoft.com>
 M:	Haiyang Zhang <haiyangz@microsoft.com>
 M:	Stephen Hemminger <sthemmin@microsoft.com>
 M:	Wei Liu <wei.liu@kernel.org>
+M:	Dexuan Cui <decui@microsoft.com>
 L:	linux-hyperv@vger.kernel.org
 S:	Supported
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git
@@ -8288,6 +8289,7 @@ F:	drivers/hid/hid-hyperv.c
 F:	drivers/hv/
 F:	drivers/input/serio/hyperv-keyboard.c
 F:	drivers/iommu/hyperv-iommu.c
+F:	drivers/net/ethernet/microsoft/
 F:	drivers/net/hyperv/
 F:	drivers/pci/controller/pci-hyperv-intf.c
 F:	drivers/pci/controller/pci-hyperv.c
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index 4b85f2b74872..d46460c5b44d 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -82,6 +82,7 @@ source "drivers/net/ethernet/huawei/Kconfig"
 source "drivers/net/ethernet/i825xx/Kconfig"
 source "drivers/net/ethernet/ibm/Kconfig"
 source "drivers/net/ethernet/intel/Kconfig"
+source "drivers/net/ethernet/microsoft/Kconfig"
 source "drivers/net/ethernet/xscale/Kconfig"
 
 config JME
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index 9394493e8187..cb3f9084a21b 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -45,6 +45,7 @@ obj-$(CONFIG_NET_VENDOR_HUAWEI) += huawei/
 obj-$(CONFIG_NET_VENDOR_IBM) += ibm/
 obj-$(CONFIG_NET_VENDOR_INTEL) += intel/
 obj-$(CONFIG_NET_VENDOR_I825XX) += i825xx/
+obj-$(CONFIG_NET_VENDOR_MICROSOFT) += microsoft/
 obj-$(CONFIG_NET_VENDOR_XSCALE) += xscale/
 obj-$(CONFIG_JME) += jme.o
 obj-$(CONFIG_KORINA) += korina.o
diff --git a/drivers/net/ethernet/microsoft/Kconfig b/drivers/net/ethernet/microsoft/Kconfig
new file mode 100644
index 000000000000..e1ac0a5d808d
--- /dev/null
+++ b/drivers/net/ethernet/microsoft/Kconfig
@@ -0,0 +1,29 @@
+#
+# Microsoft Azure network device configuration
+#
+
+config NET_VENDOR_MICROSOFT
+	bool "Microsoft Network Devices"
+	default y
+	help
+	  If you have a network (Ethernet) device belonging to this class, say Y.
+
+	  Note that the answer to this question doesn't directly affect the
+	  kernel: saying N will just cause the configurator to skip the
+	  question about Microsoft network devices. If you say Y, you will be
+	  asked for your specific device in the following question.
+
+if NET_VENDOR_MICROSOFT
+
+config MICROSOFT_MANA
+	tristate "Microsoft Azure Network Adapter (MANA) support"
+	depends on PCI_MSI && X86_64
+	select PCI_HYPERV
+	help
+	  This driver supports Microsoft Azure Network Adapter (MANA).
+	  So far, the driver is only supported on X86_64.
+
+	  To compile this driver as a module, choose M here.
+	  The module will be called mana.
+
+endif #NET_VENDOR_MICROSOFT
diff --git a/drivers/net/ethernet/microsoft/Makefile b/drivers/net/ethernet/microsoft/Makefile
new file mode 100644
index 000000000000..d2ddc218135f
--- /dev/null
+++ b/drivers/net/ethernet/microsoft/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the Microsoft Azure network device driver.
+#
+
+obj-$(CONFIG_MICROSOFT_MANA) += mana/
diff --git a/drivers/net/ethernet/microsoft/mana/Makefile b/drivers/net/ethernet/microsoft/mana/Makefile
new file mode 100644
index 000000000000..0edd5bb685f3
--- /dev/null
+++ b/drivers/net/ethernet/microsoft/mana/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+#
+# Makefile for the Microsoft Azure Network Adapter driver
+
+obj-$(CONFIG_MICROSOFT_MANA) += mana.o
+mana-objs := gdma_main.o shm_channel.o hw_channel.o mana_en.o mana_ethtool.o
diff --git a/drivers/net/ethernet/microsoft/mana/gdma.h b/drivers/net/ethernet/microsoft/mana/gdma.h
new file mode 100644
index 000000000000..33e53d32e891
--- /dev/null
+++ b/drivers/net/ethernet/microsoft/mana/gdma.h
@@ -0,0 +1,673 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/* Copyright (c) 2021, Microsoft Corporation. */
+
+#ifndef _GDMA_H
+#define _GDMA_H
+
+#include <linux/dma-mapping.h>
+#include <linux/netdevice.h>
+
+#include "shm_channel.h"
+
+/* Structures labeled with "HW DATA" are exchanged with the hardware. All of
+ * them are naturally aligned and hence don't need __packed.
+ */
+
+enum gdma_request_type {
+	GDMA_VERIFY_VF_DRIVER_VERSION	= 1,
+	GDMA_QUERY_MAX_RESOURCES	= 2,
+	GDMA_LIST_DEVICES		= 3,
+	GDMA_REGISTER_DEVICE		= 4,
+	GDMA_DEREGISTER_DEVICE		= 5,
+	GDMA_GENERATE_TEST_EQE		= 10,
+	GDMA_CREATE_QUEUE		= 12,
+	GDMA_DISABLE_QUEUE		= 13,
+	GDMA_CREATE_DMA_REGION		= 25,
+	GDMA_DMA_REGION_ADD_PAGES	= 26,
+	GDMA_DESTROY_DMA_REGION		= 27,
+};
+
+enum gdma_queue_type {
+	GDMA_INVALID_QUEUE,
+	GDMA_SQ,
+	GDMA_RQ,
+	GDMA_CQ,
+	GDMA_EQ,
+};
+
+enum gdma_work_request_flags {
+	GDMA_WR_NONE			= 0,
+	GDMA_WR_OOB_IN_SGL		= BIT(0),
+	GDMA_WR_PAD_BY_SGE0		= BIT(1),
+};
+
+enum gdma_eqe_type {
+	GDMA_EQE_COMPLETION		= 3,
+	GDMA_EQE_TEST_EVENT		= 64,
+	GDMA_EQE_HWC_INIT_EQ_ID_DB	= 129,
+	GDMA_EQE_HWC_INIT_DATA		= 130,
+	GDMA_EQE_HWC_INIT_DONE		= 131,
+};
+
+enum {
+	GDMA_DEVICE_NONE	= 0,
+	GDMA_DEVICE_HWC		= 1,
+	GDMA_DEVICE_MANA	= 2,
+};
+
+struct gdma_resource {
+	/* Protect the bitmap */
+	spinlock_t lock;
+
+	/* The bitmap size in bits. */
+	u32 size;
+
+	/* The bitmap tracks the resources. */
+	unsigned long *map;
+};
+
+union gdma_doorbell_entry {
+	u64	as_uint64;
+
+	struct {
+		u64 id		: 24;
+		u64 reserved	: 8;
+		u64 tail_ptr	: 31;
+		u64 arm		: 1;
+	} cq;
+
+	struct {
+		u64 id		: 24;
+		u64 wqe_cnt	: 8;
+		u64 tail_ptr	: 32;
+	} rq;
+
+	struct {
+		u64 id		: 24;
+		u64 reserved	: 8;
+		u64 tail_ptr	: 32;
+	} sq;
+
+	struct {
+		u64 id		: 16;
+		u64 reserved	: 16;
+		u64 tail_ptr	: 31;
+		u64 arm		: 1;
+	} eq;
+}; /* HW DATA */
+
+struct gdma_msg_hdr {
+	u32 hdr_type;
+	u32 msg_type;
+	u16 msg_version;
+	u16 hwc_msg_id;
+	u32 msg_size;
+}; /* HW DATA */
+
+struct gdma_dev_id {
+	union {
+		struct {
+			u16 type;
+			u16 instance;
+		};
+
+		u32 as_uint32;
+	};
+}; /* HW DATA */
+
+struct gdma_req_hdr {
+	struct gdma_msg_hdr req;
+	struct gdma_msg_hdr resp; /* The expected response */
+	struct gdma_dev_id dev_id;
+	u32 activity_id;
+}; /* HW DATA */
+
+struct gdma_resp_hdr {
+	struct gdma_msg_hdr response;
+	struct gdma_dev_id dev_id;
+	u32 activity_id;
+	u32 status;
+	u32 reserved;
+}; /* HW DATA */
+
+struct gdma_general_req {
+	struct gdma_req_hdr hdr;
+}; /* HW DATA */
+
+#define GDMA_MESSAGE_V1 1
+
+struct gdma_general_resp {
+	struct gdma_resp_hdr hdr;
+}; /* HW DATA */
+
+#define GDMA_STANDARD_HEADER_TYPE 0
+
+static inline void mana_gd_init_req_hdr(struct gdma_req_hdr *hdr, u32 code,
+					u32 req_size, u32 resp_size)
+{
+	hdr->req.hdr_type = GDMA_STANDARD_HEADER_TYPE;
+	hdr->req.msg_type = code;
+	hdr->req.msg_version = GDMA_MESSAGE_V1;
+	hdr->req.msg_size = req_size;
+
+	hdr->resp.hdr_type = GDMA_STANDARD_HEADER_TYPE;
+	hdr->resp.msg_type = code;
+	hdr->resp.msg_version = GDMA_MESSAGE_V1;
+	hdr->resp.msg_size = resp_size;
+}
+
+/* The 16-byte struct is part of the GDMA work queue entry (WQE). */
+struct gdma_sge {
+	u64 address;
+	u32 mem_key;
+	u32 size;
+}; /* HW DATA */
+
+struct gdma_wqe_request {
+	struct gdma_sge *sgl;
+	u32 num_sge;
+
+	u32 inline_oob_size;
+	const void *inline_oob_data;
+
+	u32 flags;
+	u32 client_data_unit;
+};
+
+enum gdma_page_type {
+	GDMA_PAGE_TYPE_4K,
+};
+
+#define GDMA_INVALID_DMA_REGION 0
+
+struct gdma_mem_info {
+	struct device *dev;
+
+	dma_addr_t dma_handle;
+	void *virt_addr;
+	u64 length;
+
+	/* Allocated by the PF driver */
+	u64 gdma_region;
+};
+
+#define REGISTER_ATB_MST_MKEY_LOWER_SIZE 8
+
+struct gdma_dev {
+	struct gdma_context *gdma_context;
+
+	struct gdma_dev_id dev_id;
+
+	u32 pdid;
+	u32 doorbell;
+	u32 gpa_mkey;
+
+	/* GDMA driver specific pointer */
+	void *driver_data;
+};
+
+#define MINIMUM_SUPPORTED_PAGE_SIZE PAGE_SIZE
+
+#define GDMA_CQE_SIZE 64
+#define GDMA_EQE_SIZE 16
+#define GDMA_MAX_SQE_SIZE 512
+#define GDMA_MAX_RQE_SIZE 256
+
+#define GDMA_COMP_DATA_SIZE 0x3C
+
+#define GDMA_EVENT_DATA_SIZE 0xC
+
+/* The WQE size must be a multiple of the Basic Unit, which is 32 bytes. */
+#define GDMA_WQE_BU_SIZE 32
+
+#define INVALID_PDID		UINT_MAX
+#define INVALID_DOORBELL	UINT_MAX
+#define INVALID_MEM_KEY		UINT_MAX
+#define INVALID_QUEUE_ID	UINT_MAX
+#define INVALID_PCI_MSIX_INDEX  UINT_MAX
+
+struct gdma_comp {
+	u32 cqe_data[GDMA_COMP_DATA_SIZE / 4];
+	u32 wq_num;
+	bool is_sq;
+};
+
+struct gdma_event {
+	u32 details[GDMA_EVENT_DATA_SIZE / 4];
+	u8  type;
+};
+
+struct gdma_queue;
+
+#define CQE_POLLING_BUFFER 512
+struct mana_eq {
+	struct gdma_queue *eq;
+	struct gdma_comp cqe_poll[CQE_POLLING_BUFFER];
+};
+
+typedef void gdma_eq_callback(void *context, struct gdma_queue *q,
+			      struct gdma_event *e);
+
+typedef void gdma_cq_callback(void *context, struct gdma_queue *q);
+
+/* The 'head' is the producer index. For SQ/RQ, when the driver posts a WQE
+ * (Note: the WQE size must be a multiple of the 32-byte Basic Unit), the
+ * driver increases the 'head' in BUs rather than in bytes, and notifies
+ * the HW of the updated head. For EQ/CQ, the driver uses the 'head' to track
+ * the HW head, and increases the 'head' by 1 for every processed EQE/CQE.
+ *
+ * The 'tail' is the consumer index for SQ/RQ. After the CQE of the SQ/RQ is
+ * processed, the driver increases the 'tail' to indicate that WQEs have
+ * been consumed by the HW, so the driver can post new WQEs into the SQ/RQ.
+ *
+ * The driver doesn't use the 'tail' for EQ/CQ, because the driver ensures
+ * that the EQ/CQ is big enough so they can't overflow, and the driver uses
+ * the owner bits mechanism to detect if the queue has become empty.
+ */
+struct gdma_queue {
+	struct gdma_dev *gdma_dev;
+
+	enum gdma_queue_type type;
+	u32 id;
+
+	struct gdma_mem_info mem_info;
+
+	void *queue_mem_ptr;
+	u32 queue_size;
+
+	bool monitor_avl_buf;
+
+	u32 head;
+	u32 tail;
+
+	/* Extra fields specific to EQ/CQ. */
+	union {
+		struct {
+			bool disable_needed;
+
+			gdma_eq_callback *callback;
+			void *context;
+
+			unsigned int msix_index;
+
+			u32 log2_throttle_limit;
+
+			/* NAPI data */
+			struct napi_struct napi;
+			int work_done;
+			int budget;
+		} eq;
+
+		struct {
+			gdma_cq_callback *callback;
+			void *context;
+
+			struct gdma_queue *parent; /* For CQ/EQ relationship */
+		} cq;
+	};
+};
+
+struct gdma_queue_spec {
+	enum gdma_queue_type type;
+	bool monitor_avl_buf;
+	unsigned int queue_size;
+
+	/* Extra fields specific to EQ/CQ. */
+	union {
+		struct {
+			gdma_eq_callback *callback;
+			void *context;
+
+			unsigned long log2_throttle_limit;
+
+			/* Only used by the MANA device. */
+			struct net_device *ndev;
+		} eq;
+
+		struct {
+			gdma_cq_callback *callback;
+			void *context;
+
+			struct gdma_queue *parent_eq;
+
+		} cq;
+	};
+};
+
+struct gdma_irq_context {
+	void (*handler)(void *arg);
+	void *arg;
+};
+
+struct gdma_context {
+	struct device		*dev;
+
+	/* Per-vPort max number of queues */
+	unsigned int		max_num_queues;
+	unsigned int		max_num_msix;
+	unsigned int		num_msix_usable;
+	struct gdma_resource	msix_resource;
+	struct gdma_irq_context	*irq_contexts;
+
+	/* This maps a CQ index to the queue structure. */
+	unsigned int		max_num_cqs;
+	struct gdma_queue	**cq_table;
+
+	/* Protect eq_test_event and test_event_eq_id  */
+	struct mutex		eq_test_event_mutex;
+	struct completion	eq_test_event;
+	u32			test_event_eq_id;
+
+	void __iomem		*bar0_va;
+	void __iomem		*shm_base;
+	void __iomem		*db_page_base;
+	u32 db_page_size;
+
+	/* Shared memory chanenl (used to bootstrap HWC) */
+	struct shm_channel	shm_channel;
+
+	/* Hardware communication channel (HWC) */
+	struct gdma_dev		hwc;
+
+	/* Azure network adapter */
+	struct gdma_dev		mana;
+};
+
+#define MAX_NUM_GDMA_DEVICES	4
+
+static inline bool mana_gd_is_mana(struct gdma_dev *gd)
+{
+	return gd->dev_id.type == GDMA_DEVICE_MANA;
+}
+
+static inline bool mana_gd_is_hwc(struct gdma_dev *gd)
+{
+	return gd->dev_id.type == GDMA_DEVICE_HWC;
+}
+
+u8 *mana_gd_get_wqe_ptr(const struct gdma_queue *wq, u32 wqe_offset);
+u32 mana_gd_wq_avail_space(struct gdma_queue *wq);
+
+int mana_gd_test_eq(struct gdma_context *gc, struct gdma_queue *eq);
+
+int mana_gd_create_hwc_queue(struct gdma_dev *gd,
+			     const struct gdma_queue_spec *spec,
+			     struct gdma_queue **queue_ptr);
+
+int mana_gd_create_mana_eq(struct gdma_dev *gd,
+			   const struct gdma_queue_spec *spec,
+			   struct gdma_queue **queue_ptr);
+
+int mana_gd_create_mana_wq_cq(struct gdma_dev *gd,
+			      const struct gdma_queue_spec *spec,
+			      struct gdma_queue **queue_ptr);
+
+void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue);
+
+int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe);
+
+void mana_gd_arm_cq(struct gdma_queue *cq);
+
+struct gdma_wqe {
+	u32 reserved	:24;
+	u32 last_vbytes	:8;
+
+	union {
+		u32 flags;
+
+		struct {
+			u32 num_sge		:8;
+			u32 inline_oob_size_div4:3;
+			u32 client_oob_in_sgl	:1;
+			u32 reserved1		:4;
+			u32 client_data_unit	:14;
+			u32 reserved2		:2;
+		};
+	};
+}; /* HW DATA */
+
+#define INLINE_OOB_SMALL_SIZE 8
+#define INLINE_OOB_LARGE_SIZE 24
+
+#define MAX_TX_WQE_SIZE 512
+#define MAX_RX_WQE_SIZE 256
+
+struct gdma_cqe {
+	u32 cqe_data[GDMA_COMP_DATA_SIZE / 4];
+
+	union {
+		u32 as_uint32;
+
+		struct {
+			u32 wq_num	: 24;
+			u32 is_sq	: 1;
+			u32 reserved	: 4;
+			u32 owner_bits	: 3;
+		};
+	} cqe_info;
+}; /* HW DATA */
+
+#define GDMA_CQE_OWNER_BITS 3
+
+#define GDMA_CQE_OWNER_MASK ((1 << GDMA_CQE_OWNER_BITS) - 1)
+
+#define SET_ARM_BIT 1
+
+#define GDMA_EQE_OWNER_BITS 3
+
+union gdma_eqe_info {
+	u32 as_uint32;
+
+	struct {
+		u32 type	: 8;
+		u32 reserved1	: 8;
+		u32 client_id	: 2;
+		u32 reserved2	: 11;
+		u32 owner_bits	: 3;
+	};
+}; /* HW DATA */
+
+#define GDMA_EQE_OWNER_MASK ((1 << GDMA_EQE_OWNER_BITS) - 1)
+#define INITIALIZED_OWNER_BIT(log2_num_entries) (1UL << (log2_num_entries))
+
+struct gdma_eqe {
+	u32 details[GDMA_EVENT_DATA_SIZE / 4];
+	u32 eqe_info;
+}; /* HW DATA */
+
+#define GDMA_REG_DB_PAGE_OFFSET	8
+#define GDMA_REG_DB_PAGE_SIZE	0x10
+#define GDMA_REG_SHM_OFFSET	0x18
+
+struct gdma_posted_wqe_info {
+	u32 wqe_size_in_bu;
+};
+
+/* GDMA_GENERATE_TEST_EQE */
+struct gdma_generate_test_event_req {
+	struct gdma_req_hdr hdr;
+	u32 queue_index;
+}; /* HW DATA */
+
+/* GDMA_VERIFY_VF_DRIVER_VERSION */
+enum {
+	GDMA_PROTOCOL_V1	= 1,
+	GDMA_PROTOCOL_FIRST	= GDMA_PROTOCOL_V1,
+	GDMA_PROTOCOL_LAST	= GDMA_PROTOCOL_V1,
+};
+
+struct gdma_verify_ver_req {
+	struct gdma_req_hdr hdr;
+
+	/* Mandatory fields required for protocol establishment */
+	u64 protocol_ver_min;
+	u64 protocol_ver_max;
+	u64 drv_cap_flags1;
+	u64 drv_cap_flags2;
+	u64 drv_cap_flags3;
+	u64 drv_cap_flags4;
+
+	/* Advisory fields */
+	u64 drv_ver;
+	u32 os_type; /* Linux = 0x10; Windows = 0x20; Other = 0x30 */
+	u32 reserved;
+	u32 os_ver_major;
+	u32 os_ver_minor;
+	u32 os_ver_build;
+	u32 os_ver_platform;
+	u64 reserved_2;
+	u8 os_ver_str1[128];
+	u8 os_ver_str2[128];
+	u8 os_ver_str3[128];
+	u8 os_ver_str4[128];
+}; /* HW DATA */
+
+struct gdma_verify_ver_resp {
+	struct gdma_resp_hdr hdr;
+	u64 gdma_protocol_ver;
+	u64 pf_cap_flags1;
+	u64 pf_cap_flags2;
+	u64 pf_cap_flags3;
+	u64 pf_cap_flags4;
+}; /* HW DATA */
+
+/* GDMA_QUERY_MAX_RESOURCES */
+struct gdma_query_max_resources_resp {
+	struct gdma_resp_hdr hdr;
+	u32 status;
+	u32 max_sq;
+	u32 max_rq;
+	u32 max_cq;
+	u32 max_eq;
+	u32 max_db;
+	u32 max_mst;
+	u32 max_cq_mod_ctx;
+	u32 max_mod_cq;
+	u32 max_msix;
+}; /* HW DATA */
+
+/* GDMA_LIST_DEVICES */
+struct gdma_list_devices_resp {
+	struct gdma_resp_hdr hdr;
+	u32 num_of_devs;
+	u32 reserved;
+	struct gdma_dev_id devs[64];
+}; /* HW DATA */
+
+/* GDMA_REGISTER_DEVICE */
+struct gdma_register_device_resp {
+	struct gdma_resp_hdr hdr;
+	u32 pdid;
+	u32 gpa_mkey;
+	u32 db_id;
+}; /* HW DATA */
+
+/* GDMA_CREATE_QUEUE */
+struct gdma_create_queue_req {
+	struct gdma_req_hdr hdr;
+	u32 type;
+	u32 reserved1;
+	u32 pdid;
+	u32 doolbell_id;
+	u64 gdma_region;
+	u32 reserved2;
+	u32 queue_size;
+	u32 log2_throttle_limit;
+	u32 eq_pci_msix_index;
+	u32 cq_mod_ctx_id;
+	u32 cq_parent_eq_id;
+	u8  rq_drop_on_overrun;
+	u8  rq_err_on_wqe_overflow;
+	u8  rq_chain_rec_wqes;
+	u8  sq_hw_db;
+	u32 reserved3;
+}; /* HW DATA */
+
+struct gdma_create_queue_resp {
+	struct gdma_resp_hdr hdr;
+	u32 queue_index;
+}; /* HW DATA */
+
+/* GDMA_DISABLE_QUEUE */
+struct gdma_disable_queue_req {
+	struct gdma_req_hdr hdr;
+	u32 type;
+	u32 queue_index;
+	u32 alloc_res_id_on_creation;
+}; /* HW DATA */
+
+/* GDMA_CREATE_DMA_REGION */
+struct gdma_create_dma_region_req {
+	struct gdma_req_hdr hdr;
+
+	/* The total size of the DMA region */
+	u64 length;
+
+	/* The offset in the first page */
+	u32 offset_in_page;
+
+	/* enum gdma_page_type */
+	u32 gdma_page_type;
+
+	/* The total number of pages */
+	u32 page_count;
+
+	/* If page_addr_list_len is smaller than page_count,
+	 * the remaining page addresses will be added via the
+	 * message GDMA_DMA_REGION_ADD_PAGES.
+	 */
+	u32 page_addr_list_len;
+	u64 page_addr_list[];
+}; /* HW DATA */
+
+struct gdma_create_dma_region_resp {
+	struct gdma_resp_hdr hdr;
+	u64 gdma_region;
+}; /* HW DATA */
+
+/* GDMA_DMA_REGION_ADD_PAGES */
+struct gdma_dma_region_add_pages_req {
+	struct gdma_req_hdr hdr;
+
+	u64 gdma_region;
+
+	u32 page_addr_list_len;
+	u32 reserved3;
+
+	u64 page_addr_list[];
+}; /* HW DATA */
+
+/* GDMA_DESTROY_DMA_REGION */
+struct gdma_destroy_dma_region_req {
+	struct gdma_req_hdr hdr;
+
+	u64 gdma_region;
+}; /* HW DATA */
+
+int mana_gd_verify_vf_version(struct pci_dev *pdev);
+
+int mana_gd_register_device(struct gdma_dev *gd);
+int mana_gd_deregister_device(struct gdma_dev *gd);
+
+int mana_gd_post_work_request(struct gdma_queue *wq,
+			      const struct gdma_wqe_request *wqe_req,
+			      struct gdma_posted_wqe_info *wqe_info);
+
+int mana_gd_post_and_ring(struct gdma_queue *queue,
+			  const struct gdma_wqe_request *wqe,
+			  struct gdma_posted_wqe_info *wqe_info);
+
+int mana_gd_alloc_res_map(u32 res_avail, struct gdma_resource *r);
+void mana_gd_free_res_map(struct gdma_resource *r);
+
+void mana_gd_wq_ring_doorbell(struct gdma_context *gc,
+			      struct gdma_queue *queue);
+
+int mana_gd_alloc_memory(struct gdma_context *gc, unsigned int length,
+			 struct gdma_mem_info *gmi);
+
+void mana_gd_free_memory(struct gdma_mem_info *gmi);
+
+int mana_gd_send_request(struct gdma_context *gc, u32 req_len, const void *req,
+			 u32 resp_len, void *resp);
+#endif /* _GDMA_H */
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
new file mode 100644
index 000000000000..2f87bf90f8ec
--- /dev/null
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -0,0 +1,1415 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/* Copyright (c) 2021, Microsoft Corporation. */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "mana.h"
+
+static u32 mana_gd_r32(struct gdma_context *g, u64 offset)
+{
+	return readl(g->bar0_va + offset);
+}
+
+static u64 mana_gd_r64(struct gdma_context *g, u64 offset)
+{
+	return readq(g->bar0_va + offset);
+}
+
+static void mana_gd_init_registers(struct pci_dev *pdev)
+{
+	struct gdma_context *gc = pci_get_drvdata(pdev);
+
+	gc->db_page_size = mana_gd_r32(gc, GDMA_REG_DB_PAGE_SIZE) & 0xFFFF;
+
+	gc->db_page_base = gc->bar0_va +
+				mana_gd_r64(gc, GDMA_REG_DB_PAGE_OFFSET);
+
+	gc->shm_base = gc->bar0_va + mana_gd_r64(gc, GDMA_REG_SHM_OFFSET);
+}
+
+static int mana_gd_query_max_resources(struct pci_dev *pdev)
+{
+	struct gdma_context *gc = pci_get_drvdata(pdev);
+	struct gdma_query_max_resources_resp resp = {};
+	struct gdma_general_req req = {};
+	int err;
+
+	mana_gd_init_req_hdr(&req.hdr, GDMA_QUERY_MAX_RESOURCES,
+			     sizeof(req), sizeof(resp));
+
+	err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+	if (err || resp.hdr.status) {
+		dev_err(gc->dev, "Failed to query resource info: %d, 0x%x\n",
+			err, resp.hdr.status);
+		return err ? err : -EPROTO;
+	}
+
+	if (gc->num_msix_usable > resp.max_msix)
+		gc->num_msix_usable = resp.max_msix;
+
+	if (gc->num_msix_usable <= 1)
+		return -ENOSPC;
+
+	gc->max_num_queues = num_online_cpus();
+	if (gc->max_num_queues > MANA_MAX_NUM_QUEUES)
+		gc->max_num_queues = MANA_MAX_NUM_QUEUES;
+
+	if (gc->max_num_queues > resp.max_eq)
+		gc->max_num_queues = resp.max_eq;
+
+	if (gc->max_num_queues > resp.max_cq)
+		gc->max_num_queues = resp.max_cq;
+
+	if (gc->max_num_queues > resp.max_sq)
+		gc->max_num_queues = resp.max_sq;
+
+	if (gc->max_num_queues > resp.max_rq)
+		gc->max_num_queues = resp.max_rq;
+
+	return 0;
+}
+
+static int mana_gd_detect_devices(struct pci_dev *pdev)
+{
+	struct gdma_context *gc = pci_get_drvdata(pdev);
+	struct gdma_list_devices_resp resp = {};
+	struct gdma_general_req req = {};
+	struct gdma_dev_id dev;
+	u32 i, max_num_devs;
+	u16 dev_type;
+	int err;
+
+	mana_gd_init_req_hdr(&req.hdr, GDMA_LIST_DEVICES, sizeof(req),
+			     sizeof(resp));
+
+	err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+	if (err || resp.hdr.status) {
+		dev_err(gc->dev, "Failed to detect devices: %d, 0x%x\n", err,
+			resp.hdr.status);
+		return err ? err : -EPROTO;
+	}
+
+	max_num_devs = min_t(u32, MAX_NUM_GDMA_DEVICES, resp.num_of_devs);
+
+	for (i = 0; i < max_num_devs; i++) {
+		dev = resp.devs[i];
+		dev_type = dev.type;
+
+		/* HWC is already detected in mana_hwc_create_channel(). */
+		if (dev_type == GDMA_DEVICE_HWC)
+			continue;
+
+		if (dev_type == GDMA_DEVICE_MANA) {
+			gc->mana.gdma_context = gc;
+			gc->mana.dev_id = dev;
+		}
+	}
+
+	return gc->mana.dev_id.type == 0 ? -ENODEV : 0;
+}
+
+int mana_gd_send_request(struct gdma_context *gc, u32 req_len, const void *req,
+			 u32 resp_len, void *resp)
+{
+	struct hw_channel_context *hwc = gc->hwc.driver_data;
+
+	return mana_hwc_send_request(hwc, req_len, req, resp_len, resp);
+}
+
+int mana_gd_alloc_memory(struct gdma_context *gc, unsigned int length,
+			 struct gdma_mem_info *gmi)
+{
+	dma_addr_t dma_handle;
+	void *buf;
+
+	if (length < PAGE_SIZE || !is_power_of_2(length))
+		return -EINVAL;
+
+	gmi->dev = gc->dev;
+	buf = dma_alloc_coherent(gmi->dev, length, &dma_handle, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	gmi->dma_handle = dma_handle;
+	gmi->virt_addr = buf;
+	gmi->length = length;
+
+	return 0;
+}
+
+void mana_gd_free_memory(struct gdma_mem_info *gmi)
+{
+	dma_free_coherent(gmi->dev, gmi->length, gmi->virt_addr,
+			  gmi->dma_handle);
+}
+
+static int mana_gd_create_hw_eq(struct gdma_context *gc,
+				struct gdma_queue *queue)
+{
+	struct gdma_create_queue_resp resp = {};
+	struct gdma_create_queue_req req = {};
+	int err;
+
+	if (queue->type != GDMA_EQ)
+		return -EINVAL;
+
+	mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_QUEUE,
+			     sizeof(req), sizeof(resp));
+
+	req.hdr.dev_id = queue->gdma_dev->dev_id;
+	req.type = queue->type;
+	req.pdid = queue->gdma_dev->pdid;
+	req.doolbell_id = queue->gdma_dev->doorbell;
+	req.gdma_region = queue->mem_info.gdma_region;
+	req.queue_size = queue->queue_size;
+	req.log2_throttle_limit = queue->eq.log2_throttle_limit;
+	req.eq_pci_msix_index = queue->eq.msix_index;
+
+	err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+	if (err || resp.hdr.status) {
+		dev_err(gc->dev, "Failed to create queue: %d, 0x%x\n", err,
+			resp.hdr.status);
+		return err ? err : -EPROTO;
+	}
+
+	queue->id = resp.queue_index;
+	queue->eq.disable_needed = true;
+	queue->mem_info.gdma_region = GDMA_INVALID_DMA_REGION;
+	return 0;
+}
+
+static int mana_gd_disable_queue(struct gdma_queue *queue)
+{
+	struct gdma_context *gc = queue->gdma_dev->gdma_context;
+	struct gdma_disable_queue_req req = {};
+	struct gdma_general_resp resp = {};
+	int err;
+
+	WARN_ON(queue->type != GDMA_EQ);
+
+	mana_gd_init_req_hdr(&req.hdr, GDMA_DISABLE_QUEUE,
+			     sizeof(req), sizeof(resp));
+
+	req.hdr.dev_id = queue->gdma_dev->dev_id;
+	req.type = queue->type;
+	req.queue_index =  queue->id;
+	req.alloc_res_id_on_creation = 1;
+
+	err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+	if (err || resp.hdr.status) {
+		dev_err(gc->dev, "Failed to disable queue: %d, 0x%x\n", err,
+			resp.hdr.status);
+		return err ? err : -EPROTO;
+	}
+
+	return 0;
+}
+
+#define DOORBELL_OFFSET_SQ	0x0
+#define DOORBELL_OFFSET_RQ	0x400
+#define DOORBELL_OFFSET_CQ	0x800
+#define DOORBELL_OFFSET_EQ	0xFF8
+
+static void mana_gd_ring_doorbell(struct gdma_context *gc, u32 db_index,
+				  enum gdma_queue_type q_type, u32 qid,
+				  u32 tail_ptr, u8 num_req)
+{
+	void __iomem *addr = gc->db_page_base + gc->db_page_size * db_index;
+	union gdma_doorbell_entry e = {};
+
+	switch (q_type) {
+	case GDMA_EQ:
+		e.eq.id = qid;
+		e.eq.tail_ptr = tail_ptr;
+		e.eq.arm = num_req;
+
+		addr += DOORBELL_OFFSET_EQ;
+		break;
+
+	case GDMA_CQ:
+		e.cq.id = qid;
+		e.cq.tail_ptr = tail_ptr;
+		e.cq.arm = num_req;
+
+		addr += DOORBELL_OFFSET_CQ;
+		break;
+
+	case GDMA_RQ:
+		e.rq.id = qid;
+		e.rq.tail_ptr = tail_ptr;
+		e.rq.wqe_cnt = num_req;
+
+		addr += DOORBELL_OFFSET_RQ;
+		break;
+
+	case GDMA_SQ:
+		e.sq.id = qid;
+		e.sq.tail_ptr = tail_ptr;
+
+		addr += DOORBELL_OFFSET_SQ;
+		break;
+
+	default:
+		WARN_ON(1);
+		return;
+	}
+
+	/* Ensure all writes are done before ring doorbell */
+	wmb();
+
+	writeq(e.as_uint64, addr);
+}
+
+void mana_gd_wq_ring_doorbell(struct gdma_context *gc, struct gdma_queue *queue)
+{
+	mana_gd_ring_doorbell(gc, queue->gdma_dev->doorbell, queue->type,
+			      queue->id, queue->head * GDMA_WQE_BU_SIZE, 1);
+}
+
+void mana_gd_arm_cq(struct gdma_queue *cq)
+{
+	struct gdma_context *gc = cq->gdma_dev->gdma_context;
+
+	u32 num_cqe = cq->queue_size / GDMA_CQE_SIZE;
+
+	u32 head = cq->head % (num_cqe << GDMA_CQE_OWNER_BITS);
+
+	mana_gd_ring_doorbell(gc, cq->gdma_dev->doorbell, cq->type, cq->id,
+			      head, SET_ARM_BIT);
+}
+
+static void mana_gd_process_eqe(struct gdma_queue *eq)
+{
+	u32 head = eq->head % (eq->queue_size / GDMA_EQE_SIZE);
+	struct gdma_context *gc = eq->gdma_dev->gdma_context;
+	struct gdma_eqe *eq_eqe_ptr = eq->queue_mem_ptr;
+	union gdma_eqe_info eqe_info;
+	enum gdma_eqe_type type;
+	struct gdma_event event;
+	struct gdma_queue *cq;
+	struct gdma_eqe *eqe;
+	u32 cq_id;
+
+	eqe = &eq_eqe_ptr[head];
+	eqe_info.as_uint32 = eqe->eqe_info;
+	type = eqe_info.type;
+
+	switch (type) {
+	case GDMA_EQE_COMPLETION:
+		cq_id = eqe->details[0] & 0xFFFFFF;
+		if (WARN_ON_ONCE(cq_id >= gc->max_num_cqs))
+			break;
+
+		cq = gc->cq_table[cq_id];
+		if (WARN_ON_ONCE(!cq || cq->type != GDMA_CQ || cq->id != cq_id))
+			break;
+
+		if (cq->cq.callback)
+			cq->cq.callback(cq->cq.context, cq);
+
+		break;
+
+	case GDMA_EQE_TEST_EVENT:
+		gc->test_event_eq_id = eq->id;
+		complete(&gc->eq_test_event);
+		break;
+
+	case GDMA_EQE_HWC_INIT_EQ_ID_DB:
+	case GDMA_EQE_HWC_INIT_DATA:
+	case GDMA_EQE_HWC_INIT_DONE:
+		if (!eq->eq.callback)
+			break;
+
+		event.type = type;
+		memcpy(&event.details, &eqe->details, GDMA_EVENT_DATA_SIZE);
+		eq->eq.callback(eq->eq.context, eq, &event);
+		break;
+
+	default:
+		break;
+	}
+}
+
+static void mana_gd_process_eq_events(void *arg)
+{
+	u32 owner_bits, new_bits, old_bits;
+	union gdma_eqe_info eqe_info;
+	struct gdma_eqe *eq_eqe_ptr;
+	struct gdma_queue *eq = arg;
+	struct gdma_context *gc;
+	struct gdma_eqe *eqe;
+	unsigned int arm_bit;
+	u32 head, num_eqe;
+	int i;
+
+	gc = eq->gdma_dev->gdma_context;
+
+	num_eqe = eq->queue_size / GDMA_EQE_SIZE;
+	eq_eqe_ptr = eq->queue_mem_ptr;
+
+	/* Process up to 5 EQEs at a time, and update the HW head. */
+	for (i = 0; i < 5; i++) {
+		eqe = &eq_eqe_ptr[eq->head % num_eqe];
+		eqe_info.as_uint32 = eqe->eqe_info;
+		owner_bits = eqe_info.owner_bits;
+
+		old_bits = (eq->head / num_eqe - 1) & GDMA_EQE_OWNER_MASK;
+		/* No more entries */
+		if (owner_bits == old_bits)
+			break;
+
+		new_bits = (eq->head / num_eqe) & GDMA_EQE_OWNER_MASK;
+		if (owner_bits != new_bits) {
+			dev_err(gc->dev, "EQ %d: overflow detected\n", eq->id);
+			break;
+		}
+
+		mana_gd_process_eqe(eq);
+
+		eq->head++;
+	}
+
+	/* Always rearm the EQ for HWC. For MANA, rearm it when NAPI is done. */
+	if (mana_gd_is_hwc(eq->gdma_dev)) {
+		arm_bit = SET_ARM_BIT;
+	} else if (eq->eq.work_done < eq->eq.budget &&
+		   napi_complete_done(&eq->eq.napi, eq->eq.work_done)) {
+		arm_bit = SET_ARM_BIT;
+	} else {
+		arm_bit = 0;
+	}
+
+	head = eq->head % (num_eqe << GDMA_EQE_OWNER_BITS);
+
+	mana_gd_ring_doorbell(gc, eq->gdma_dev->doorbell, eq->type, eq->id,
+			      head, arm_bit);
+}
+
+static int mana_poll(struct napi_struct *napi, int budget)
+{
+	struct gdma_queue *eq = container_of(napi, struct gdma_queue, eq.napi);
+
+	eq->eq.work_done = 0;
+	eq->eq.budget = budget;
+
+	mana_gd_process_eq_events(eq);
+
+	return min(eq->eq.work_done, budget);
+}
+
+static void mana_gd_schedule_napi(void *arg)
+{
+	struct gdma_queue *eq = arg;
+	struct napi_struct *napi;
+
+	napi = &eq->eq.napi;
+	napi_schedule_irqoff(napi);
+}
+
+static int mana_gd_register_irq(struct gdma_queue *queue,
+				const struct gdma_queue_spec *spec)
+{
+	struct gdma_dev *gd = queue->gdma_dev;
+	bool is_mana = mana_gd_is_mana(gd);
+	struct gdma_irq_context *gic;
+	struct gdma_context *gc;
+	struct gdma_resource *r;
+	unsigned int msi_index;
+	unsigned long flags;
+	int err;
+
+	gc = gd->gdma_context;
+	r = &gc->msix_resource;
+
+	spin_lock_irqsave(&r->lock, flags);
+
+	msi_index = find_first_zero_bit(r->map, r->size);
+	if (msi_index >= r->size) {
+		err = -ENOSPC;
+	} else {
+		bitmap_set(r->map, msi_index, 1);
+		queue->eq.msix_index = msi_index;
+		err = 0;
+	}
+
+	spin_unlock_irqrestore(&r->lock, flags);
+
+	if (err)
+		return err;
+
+	WARN_ON(msi_index >= gc->num_msix_usable);
+
+	gic = &gc->irq_contexts[msi_index];
+
+	if (is_mana) {
+		netif_napi_add(spec->eq.ndev, &queue->eq.napi, mana_poll,
+			       NAPI_POLL_WEIGHT);
+		napi_enable(&queue->eq.napi);
+	}
+
+	WARN_ON(gic->handler || gic->arg);
+
+	gic->arg = queue;
+
+	if (is_mana)
+		gic->handler = mana_gd_schedule_napi;
+	else
+		gic->handler = mana_gd_process_eq_events;
+
+	return 0;
+}
+
+static void mana_gd_deregiser_irq(struct gdma_queue *queue)
+{
+	struct gdma_dev *gd = queue->gdma_dev;
+	struct gdma_irq_context *gic;
+	struct gdma_context *gc;
+	struct gdma_resource *r;
+	unsigned int msix_index;
+	unsigned long flags;
+
+	gc = gd->gdma_context;
+	r = &gc->msix_resource;
+
+	/* At most num_online_cpus() + 1 interrupts are used. */
+	msix_index = queue->eq.msix_index;
+	if (WARN_ON(msix_index >= gc->num_msix_usable))
+		return;
+
+	gic = &gc->irq_contexts[msix_index];
+	gic->handler = NULL;
+	gic->arg = NULL;
+
+	spin_lock_irqsave(&r->lock, flags);
+	bitmap_clear(r->map, msix_index, 1);
+	spin_unlock_irqrestore(&r->lock, flags);
+
+	queue->eq.msix_index = INVALID_PCI_MSIX_INDEX;
+}
+
+int mana_gd_test_eq(struct gdma_context *gc, struct gdma_queue *eq)
+{
+	struct gdma_generate_test_event_req req = {};
+	struct gdma_general_resp resp = {};
+	struct device *dev = gc->dev;
+	int err;
+
+	mutex_lock(&gc->eq_test_event_mutex);
+
+	init_completion(&gc->eq_test_event);
+	gc->test_event_eq_id = INVALID_QUEUE_ID;
+
+	mana_gd_init_req_hdr(&req.hdr, GDMA_GENERATE_TEST_EQE,
+			     sizeof(req), sizeof(resp));
+
+	req.hdr.dev_id = eq->gdma_dev->dev_id;
+	req.queue_index = eq->id;
+
+	err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+	if (err) {
+		dev_err(dev, "test_eq failed: %d\n", err);
+		goto out;
+	}
+
+	err = -EPROTO;
+
+	if (resp.hdr.status) {
+		dev_err(dev, "test_eq failed: 0x%x\n", resp.hdr.status);
+		goto out;
+	}
+
+	if (!wait_for_completion_timeout(&gc->eq_test_event, 30 * HZ)) {
+		dev_err(dev, "test_eq timed out on queue %d\n", eq->id);
+		goto out;
+	}
+
+	if (eq->id != gc->test_event_eq_id) {
+		dev_err(dev, "test_eq got an event on wrong queue %d (%d)\n",
+			gc->test_event_eq_id, eq->id);
+		goto out;
+	}
+
+	err = 0;
+out:
+	mutex_unlock(&gc->eq_test_event_mutex);
+	return err;
+}
+
+static void mana_gd_destroy_eq(struct gdma_context *gc, bool flush_evenets,
+			       struct gdma_queue *queue)
+{
+	int err;
+
+	if (flush_evenets) {
+		err = mana_gd_test_eq(gc, queue);
+		if (err)
+			dev_warn(gc->dev, "Failed to flush EQ: %d\n", err);
+	}
+
+	mana_gd_deregiser_irq(queue);
+
+	if (mana_gd_is_mana(queue->gdma_dev)) {
+		napi_disable(&queue->eq.napi);
+		netif_napi_del(&queue->eq.napi);
+	}
+
+	if (queue->eq.disable_needed)
+		mana_gd_disable_queue(queue);
+}
+
+static int mana_gd_create_eq(struct gdma_dev *gd,
+			     const struct gdma_queue_spec *spec,
+			     bool create_hwq, struct gdma_queue *queue)
+{
+	struct gdma_context *gc = gd->gdma_context;
+	struct device *dev = gc->dev;
+	u32 log2_num_entries;
+	int err;
+
+	queue->eq.msix_index = INVALID_PCI_MSIX_INDEX;
+
+	log2_num_entries = ilog2(queue->queue_size / GDMA_EQE_SIZE);
+
+	if (spec->eq.log2_throttle_limit > log2_num_entries) {
+		dev_err(dev, "EQ throttling limit (%lu) > maximum EQE (%u)\n",
+			spec->eq.log2_throttle_limit, log2_num_entries);
+		return -EINVAL;
+	}
+
+	err = mana_gd_register_irq(queue, spec);
+	if (err) {
+		dev_err(dev, "Failed to register irq: %d\n", err);
+		return err;
+	}
+
+	queue->eq.callback = spec->eq.callback;
+	queue->eq.context = spec->eq.context;
+	queue->head |= INITIALIZED_OWNER_BIT(log2_num_entries);
+	queue->eq.log2_throttle_limit = spec->eq.log2_throttle_limit ?: 1;
+
+	if (create_hwq) {
+		err = mana_gd_create_hw_eq(gc, queue);
+		if (err)
+			goto out;
+
+		err = mana_gd_test_eq(gc, queue);
+		if (err)
+			goto out;
+	}
+
+	return 0;
+out:
+	dev_err(dev, "Failed to create EQ: %d\n", err);
+	mana_gd_destroy_eq(gc, false, queue);
+	return err;
+}
+
+static void mana_gd_create_cq(const struct gdma_queue_spec *spec,
+			      struct gdma_queue *queue)
+{
+	u32 log2_num_entries = ilog2(spec->queue_size / GDMA_CQE_SIZE);
+
+	queue->head |= INITIALIZED_OWNER_BIT(log2_num_entries);
+	queue->cq.parent = spec->cq.parent_eq;
+	queue->cq.context = spec->cq.context;
+	queue->cq.callback = spec->cq.callback;
+}
+
+static void mana_gd_destroy_cq(struct gdma_context *gc,
+			       struct gdma_queue *queue)
+{
+	u32 id = queue->id;
+
+	if (id >= gc->max_num_cqs)
+		return;
+
+	if (!gc->cq_table[id])
+		return;
+
+	gc->cq_table[id] = NULL;
+}
+
+int mana_gd_create_hwc_queue(struct gdma_dev *gd,
+			     const struct gdma_queue_spec *spec,
+			     struct gdma_queue **queue_ptr)
+{
+	struct gdma_context *gc = gd->gdma_context;
+	struct gdma_mem_info *gmi;
+	struct gdma_queue *queue;
+	int err;
+
+	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
+	if (!queue)
+		return -ENOMEM;
+
+	gmi = &queue->mem_info;
+	err = mana_gd_alloc_memory(gc, spec->queue_size, gmi);
+	if (err)
+		goto free_q;
+
+	queue->head = 0;
+	queue->tail = 0;
+	queue->queue_mem_ptr = gmi->virt_addr;
+	queue->queue_size = spec->queue_size;
+	queue->monitor_avl_buf = spec->monitor_avl_buf;
+	queue->type = spec->type;
+	queue->gdma_dev = gd;
+
+	if (spec->type == GDMA_EQ)
+		err = mana_gd_create_eq(gd, spec, false, queue);
+	else if (spec->type == GDMA_CQ)
+		mana_gd_create_cq(spec, queue);
+
+	if (err)
+		goto out;
+
+	*queue_ptr = queue;
+	return 0;
+out:
+	mana_gd_free_memory(gmi);
+free_q:
+	kfree(queue);
+	return err;
+}
+
+static void mana_gd_destroy_dma_region(struct gdma_context *gc, u64 gdma_region)
+{
+	struct gdma_destroy_dma_region_req req = {};
+	struct gdma_general_resp resp = {};
+	int err;
+
+	if (gdma_region == GDMA_INVALID_DMA_REGION)
+		return;
+
+	mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_DMA_REGION, sizeof(req),
+			     sizeof(resp));
+	req.gdma_region = gdma_region;
+
+	err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+	if (err || resp.hdr.status)
+		dev_err(gc->dev, "Failed to destroy DMA region: %d, 0x%x\n",
+			err, resp.hdr.status);
+}
+
+static int mana_gd_create_dma_region(struct gdma_dev *gd,
+				     struct gdma_mem_info *gmi)
+{
+	unsigned int num_page = gmi->length / PAGE_SIZE;
+	struct gdma_create_dma_region_req *req = NULL;
+	struct gdma_create_dma_region_resp resp = {};
+	struct gdma_context *gc = gd->gdma_context;
+	struct hw_channel_context *hwc;
+	u32 length = gmi->length;
+	u32 req_msg_size;
+	int err;
+	int i;
+
+	if (length < PAGE_SIZE || !is_power_of_2(length))
+		return -EINVAL;
+
+	if (offset_in_page(gmi->virt_addr) != 0)
+		return -EINVAL;
+
+	hwc = gc->hwc.driver_data;
+	req_msg_size = sizeof(*req) + num_page * sizeof(u64);
+	if (req_msg_size > hwc->max_req_msg_size)
+		return -EINVAL;
+
+	req = kzalloc(req_msg_size, GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	mana_gd_init_req_hdr(&req->hdr, GDMA_CREATE_DMA_REGION,
+			     req_msg_size, sizeof(resp));
+	req->length = length;
+	req->offset_in_page = 0;
+	req->gdma_page_type = GDMA_PAGE_TYPE_4K;
+	req->page_count = num_page;
+	req->page_addr_list_len = num_page;
+
+	for (i = 0; i < num_page; i++)
+		req->page_addr_list[i] = gmi->dma_handle +  i * PAGE_SIZE;
+
+	err = mana_gd_send_request(gc, req_msg_size, req, sizeof(resp), &resp);
+	if (err)
+		goto out;
+
+	if (resp.hdr.status || resp.gdma_region == GDMA_INVALID_DMA_REGION) {
+		dev_err(gc->dev, "Failed to create DMA region: 0x%x\n",
+			resp.hdr.status);
+		err = -EPROTO;
+		goto out;
+	}
+
+	gmi->gdma_region = resp.gdma_region;
+out:
+	kfree(req);
+	return err;
+}
+
+int mana_gd_create_mana_eq(struct gdma_dev *gd,
+			   const struct gdma_queue_spec *spec,
+			   struct gdma_queue **queue_ptr)
+{
+	struct gdma_context *gc = gd->gdma_context;
+	struct gdma_mem_info *gmi;
+	struct gdma_queue *queue;
+	int err;
+
+	if (spec->type != GDMA_EQ)
+		return -EINVAL;
+
+	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
+	if (!queue)
+		return -ENOMEM;
+
+	gmi = &queue->mem_info;
+	err = mana_gd_alloc_memory(gc, spec->queue_size, gmi);
+	if (err)
+		goto free_q;
+
+	err = mana_gd_create_dma_region(gd, gmi);
+	if (err)
+		goto out;
+
+	queue->head = 0;
+	queue->tail = 0;
+	queue->queue_mem_ptr = gmi->virt_addr;
+	queue->queue_size = spec->queue_size;
+	queue->monitor_avl_buf = spec->monitor_avl_buf;
+	queue->type = spec->type;
+	queue->gdma_dev = gd;
+
+	err = mana_gd_create_eq(gd, spec, true, queue);
+	if (err)
+		goto out;
+
+	*queue_ptr = queue;
+	return 0;
+out:
+	mana_gd_free_memory(gmi);
+free_q:
+	kfree(queue);
+	return err;
+}
+
+int mana_gd_create_mana_wq_cq(struct gdma_dev *gd,
+			      const struct gdma_queue_spec *spec,
+			      struct gdma_queue **queue_ptr)
+{
+	struct gdma_context *gc = gd->gdma_context;
+	struct gdma_mem_info *gmi;
+	struct gdma_queue *queue;
+	int err;
+
+	if (spec->type != GDMA_CQ && spec->type != GDMA_SQ &&
+	    spec->type != GDMA_RQ)
+		return -EINVAL;
+
+	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
+	if (!queue)
+		return -ENOMEM;
+
+	gmi = &queue->mem_info;
+	err = mana_gd_alloc_memory(gc, spec->queue_size, gmi);
+	if (err)
+		goto free_q;
+
+	err = mana_gd_create_dma_region(gd, gmi);
+	if (err)
+		goto out;
+
+	queue->head = 0;
+	queue->tail = 0;
+	queue->queue_mem_ptr = gmi->virt_addr;
+	queue->queue_size = spec->queue_size;
+	queue->monitor_avl_buf = spec->monitor_avl_buf;
+	queue->type = spec->type;
+	queue->gdma_dev = gd;
+
+	if (spec->type == GDMA_CQ)
+		mana_gd_create_cq(spec, queue);
+
+	*queue_ptr = queue;
+	return 0;
+out:
+	mana_gd_free_memory(gmi);
+free_q:
+	kfree(queue);
+	return err;
+}
+
+void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue)
+{
+	struct gdma_mem_info *gmi = &queue->mem_info;
+
+	switch (queue->type) {
+	case GDMA_EQ:
+		mana_gd_destroy_eq(gc, queue->eq.disable_needed, queue);
+		break;
+
+	case GDMA_CQ:
+		mana_gd_destroy_cq(gc, queue);
+		break;
+
+	case GDMA_RQ:
+		break;
+
+	case GDMA_SQ:
+		break;
+
+	default:
+		dev_err(gc->dev, "Can't destroy unknown queue: type=%d\n",
+			queue->type);
+		return;
+	}
+
+	mana_gd_destroy_dma_region(gc, gmi->gdma_region);
+	mana_gd_free_memory(gmi);
+	kfree(queue);
+}
+
+int mana_gd_verify_vf_version(struct pci_dev *pdev)
+{
+	struct gdma_context *gc = pci_get_drvdata(pdev);
+	struct gdma_verify_ver_resp resp = {};
+	struct gdma_verify_ver_req req = {};
+	int err;
+
+	mana_gd_init_req_hdr(&req.hdr, GDMA_VERIFY_VF_DRIVER_VERSION,
+			     sizeof(req), sizeof(resp));
+
+	req.protocol_ver_min = GDMA_PROTOCOL_FIRST;
+	req.protocol_ver_max = GDMA_PROTOCOL_LAST;
+
+	err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+	if (err || resp.hdr.status) {
+		dev_err(gc->dev, "VfVerifyVersionOutput: %d, status=0x%x\n",
+			err, resp.hdr.status);
+		return err ? err : -EPROTO;
+	}
+
+	return 0;
+}
+
+int mana_gd_register_device(struct gdma_dev *gd)
+{
+	struct gdma_context *gc = gd->gdma_context;
+	struct gdma_register_device_resp resp = {};
+	struct gdma_general_req req = {};
+	int err;
+
+	gd->pdid = INVALID_PDID;
+	gd->doorbell = INVALID_DOORBELL;
+	gd->gpa_mkey = INVALID_MEM_KEY;
+
+	mana_gd_init_req_hdr(&req.hdr, GDMA_REGISTER_DEVICE, sizeof(req),
+			     sizeof(resp));
+
+	req.hdr.dev_id = gd->dev_id;
+
+	err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+	if (err || resp.hdr.status) {
+		dev_err(gc->dev, "gdma_register_device_resp failed: %d, 0x%x\n",
+			err, resp.hdr.status);
+		return err ? err : -EPROTO;
+	}
+
+	gd->pdid = resp.pdid;
+	gd->gpa_mkey = resp.gpa_mkey;
+	gd->doorbell = resp.db_id;
+
+	return 0;
+}
+
+int mana_gd_deregister_device(struct gdma_dev *gd)
+{
+	struct gdma_context *gc = gd->gdma_context;
+	struct gdma_general_resp resp = {};
+	struct gdma_general_req req = {};
+	int err;
+
+	if (gd->pdid == INVALID_PDID)
+		return -EINVAL;
+
+	mana_gd_init_req_hdr(&req.hdr, GDMA_DEREGISTER_DEVICE, sizeof(req),
+			     sizeof(resp));
+
+	req.hdr.dev_id = gd->dev_id;
+
+	err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+	if (err || resp.hdr.status) {
+		dev_err(gc->dev, "Failed to deregister device: %d, 0x%x\n",
+			err, resp.hdr.status);
+		if (!err)
+			err = -EPROTO;
+	}
+
+	gd->pdid = INVALID_PDID;
+	gd->doorbell = INVALID_DOORBELL;
+	gd->gpa_mkey = INVALID_MEM_KEY;
+
+	return err;
+}
+
+u32 mana_gd_wq_avail_space(struct gdma_queue *wq)
+{
+	u32 used_space = (wq->head - wq->tail) * GDMA_WQE_BU_SIZE;
+	u32 wq_size = wq->queue_size;
+
+	WARN_ON_ONCE(used_space > wq_size);
+
+	return wq_size - used_space;
+}
+
+u8 *mana_gd_get_wqe_ptr(const struct gdma_queue *wq, u32 wqe_offset)
+{
+	u32 offset = (wqe_offset * GDMA_WQE_BU_SIZE) & (wq->queue_size - 1);
+
+	WARN_ON_ONCE((offset + GDMA_WQE_BU_SIZE) > wq->queue_size);
+
+	return wq->queue_mem_ptr + offset;
+}
+
+static u32 mana_gd_write_client_oob(const struct gdma_wqe_request *wqe_req,
+				    enum gdma_queue_type q_type,
+				    u32 client_oob_size, u32 sgl_data_size,
+				    u8 *wqe_ptr)
+{
+	bool oob_in_sgl = !!(wqe_req->flags & GDMA_WR_OOB_IN_SGL);
+	bool pad_data = !!(wqe_req->flags & GDMA_WR_PAD_BY_SGE0);
+	struct gdma_wqe *header = (struct gdma_wqe *)wqe_ptr;
+	u8 *ptr;
+
+	memset(header, 0, sizeof(struct gdma_wqe));
+	header->num_sge = wqe_req->num_sge;
+	header->inline_oob_size_div4 = client_oob_size / sizeof(u32);
+
+	if (oob_in_sgl) {
+		WARN_ON_ONCE(!pad_data || wqe_req->num_sge < 2);
+
+		header->client_oob_in_sgl = 1;
+
+		if (pad_data)
+			header->last_vbytes = wqe_req->sgl[0].size;
+	}
+
+	if (q_type == GDMA_SQ)
+		header->client_data_unit = wqe_req->client_data_unit;
+
+	/* The size of gdma_wqe + client_oob_size must be less than or equal
+	 * to one Basic Unit (i.e. 32 bytes), so the pointer can't go beyond
+	 * the queue memory buffer boundary.
+	 */
+	ptr = wqe_ptr + sizeof(header);
+
+	if (wqe_req->inline_oob_data && wqe_req->inline_oob_size > 0) {
+		memcpy(ptr, wqe_req->inline_oob_data, wqe_req->inline_oob_size);
+
+		if (client_oob_size > wqe_req->inline_oob_size)
+			memset(ptr + wqe_req->inline_oob_size, 0,
+			       client_oob_size - wqe_req->inline_oob_size);
+	}
+
+	return sizeof(header) + client_oob_size;
+}
+
+static void mana_gd_write_sgl(struct gdma_queue *wq, u8 *wqe_ptr,
+			      const struct gdma_wqe_request *wqe_req)
+{
+	u32 sgl_size = sizeof(struct gdma_sge) * wqe_req->num_sge;
+	const u8 *address = (u8 *)wqe_req->sgl;
+	u8 *base_ptr, *end_ptr;
+	u32 size_to_end;
+
+	base_ptr = wq->queue_mem_ptr;
+	end_ptr = base_ptr + wq->queue_size;
+	size_to_end = (u32)(end_ptr - wqe_ptr);
+
+	if (size_to_end < sgl_size) {
+		memcpy(wqe_ptr, address, size_to_end);
+
+		wqe_ptr = base_ptr;
+		address += size_to_end;
+		sgl_size -= size_to_end;
+	}
+
+	memcpy(wqe_ptr, address, sgl_size);
+}
+
+int mana_gd_post_work_request(struct gdma_queue *wq,
+			      const struct gdma_wqe_request *wqe_req,
+			      struct gdma_posted_wqe_info *wqe_info)
+{
+	u32 client_oob_size = wqe_req->inline_oob_size;
+	struct gdma_context *gc;
+	u32 sgl_data_size;
+	u32 max_wqe_size;
+	u32 wqe_size;
+	u8 *wqe_ptr;
+
+	if (wqe_req->num_sge == 0)
+		return -EINVAL;
+
+	if (wq->type == GDMA_RQ) {
+		if (client_oob_size != 0)
+			return -EINVAL;
+
+		client_oob_size = INLINE_OOB_SMALL_SIZE;
+
+		max_wqe_size = GDMA_MAX_RQE_SIZE;
+	} else {
+		if (client_oob_size != INLINE_OOB_SMALL_SIZE &&
+		    client_oob_size != INLINE_OOB_LARGE_SIZE)
+			return -EINVAL;
+
+		max_wqe_size = GDMA_MAX_SQE_SIZE;
+	}
+
+	sgl_data_size = sizeof(struct gdma_sge) * wqe_req->num_sge;
+	wqe_size = ALIGN(sizeof(struct gdma_wqe) + client_oob_size +
+			 sgl_data_size, GDMA_WQE_BU_SIZE);
+	if (wqe_size > max_wqe_size)
+		return -EINVAL;
+
+	if (wq->monitor_avl_buf && wqe_size > mana_gd_wq_avail_space(wq)) {
+		gc = wq->gdma_dev->gdma_context;
+		dev_err(gc->dev, "unsuccessful flow control!\n");
+		return -ENOSPC;
+	}
+
+	if (wqe_info)
+		wqe_info->wqe_size_in_bu = wqe_size / GDMA_WQE_BU_SIZE;
+
+	wqe_ptr = mana_gd_get_wqe_ptr(wq, wq->head);
+	wqe_ptr += mana_gd_write_client_oob(wqe_req, wq->type, client_oob_size,
+					    sgl_data_size, wqe_ptr);
+	if (wqe_ptr >= (u8 *)wq->queue_mem_ptr + wq->queue_size)
+		wqe_ptr -= wq->queue_size;
+
+	mana_gd_write_sgl(wq, wqe_ptr, wqe_req);
+
+	wq->head += wqe_size / GDMA_WQE_BU_SIZE;
+
+	return 0;
+}
+
+int mana_gd_post_and_ring(struct gdma_queue *queue,
+			  const struct gdma_wqe_request *wqe_req,
+			  struct gdma_posted_wqe_info *wqe_info)
+{
+	struct gdma_context *gc = queue->gdma_dev->gdma_context;
+	int err;
+
+	err = mana_gd_post_work_request(queue, wqe_req, wqe_info);
+	if (err)
+		return err;
+
+	mana_gd_wq_ring_doorbell(gc, queue);
+
+	return 0;
+}
+
+static int mana_gd_read_cqe(struct gdma_queue *cq, struct gdma_comp *comp)
+{
+	unsigned int num_cqe = cq->queue_size / sizeof(struct gdma_cqe);
+	struct gdma_cqe *cq_cqe = cq->queue_mem_ptr;
+	u32 owner_bits, new_bits, old_bits;
+	struct gdma_cqe *cqe;
+
+	cqe = &cq_cqe[cq->head % num_cqe];
+	owner_bits = cqe->cqe_info.owner_bits;
+
+	old_bits = (cq->head / num_cqe - 1) & GDMA_CQE_OWNER_MASK;
+	/* Return 0 if no more entries. */
+	if (owner_bits == old_bits)
+		return 0;
+
+	new_bits = (cq->head / num_cqe) & GDMA_CQE_OWNER_MASK;
+	/* Return -1 if overflow detected. */
+	if (owner_bits != new_bits)
+		return -1;
+
+	comp->wq_num = cqe->cqe_info.wq_num;
+	comp->is_sq = cqe->cqe_info.is_sq;
+	memcpy(comp->cqe_data, cqe->cqe_data, GDMA_COMP_DATA_SIZE);
+
+	return 1;
+}
+
+int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe)
+{
+	int cqe_idx;
+	int ret;
+
+	for (cqe_idx = 0; cqe_idx < num_cqe; cqe_idx++) {
+		ret = mana_gd_read_cqe(cq, &comp[cqe_idx]);
+
+		if (ret < 0) {
+			cq->head -= cqe_idx;
+			return ret;
+		}
+
+		if (ret == 0)
+			break;
+
+		cq->head++;
+	}
+
+	return cqe_idx;
+}
+
+static irqreturn_t mana_gd_intr(int irq, void *arg)
+{
+	struct gdma_irq_context *gic = arg;
+
+	if (gic->handler)
+		gic->handler(gic->arg);
+
+	return IRQ_HANDLED;
+}
+
+int mana_gd_alloc_res_map(u32 res_avail, struct gdma_resource *r)
+{
+	r->map = bitmap_zalloc(res_avail, GFP_KERNEL);
+	if (!r->map)
+		return -ENOMEM;
+
+	r->size = res_avail;
+	spin_lock_init(&r->lock);
+
+	return 0;
+}
+
+void mana_gd_free_res_map(struct gdma_resource *r)
+{
+	bitmap_free(r->map);
+	r->map = NULL;
+	r->size = 0;
+}
+
+static int mana_gd_setup_irqs(struct pci_dev *pdev)
+{
+	unsigned int max_queues_per_port = num_online_cpus();
+	struct gdma_context *gc = pci_get_drvdata(pdev);
+	struct gdma_irq_context *gic;
+	unsigned int max_irqs;
+	int nvec, irq;
+	int err, i, j;
+
+	if (max_queues_per_port > MANA_MAX_NUM_QUEUES)
+		max_queues_per_port = MANA_MAX_NUM_QUEUES;
+
+	max_irqs = max_queues_per_port * MAX_PORTS_IN_MANA_DEV;
+
+	/* Need 1 interrupt for the Hardware communication Channel (HWC) */
+	max_irqs++;
+
+	nvec = pci_alloc_irq_vectors(pdev, 2, max_irqs, PCI_IRQ_MSIX);
+	if (nvec < 0)
+		return nvec;
+
+	gc->irq_contexts = kcalloc(nvec, sizeof(struct gdma_irq_context),
+				   GFP_KERNEL);
+	if (!gc->irq_contexts) {
+		err = -ENOMEM;
+		goto free_irq_vector;
+	}
+
+	for (i = 0; i < nvec; i++) {
+		gic = &gc->irq_contexts[i];
+		gic->handler = NULL;
+		gic->arg = NULL;
+
+		irq = pci_irq_vector(pdev, i);
+		if (irq < 0) {
+			err = irq;
+			goto free_irq;
+		}
+
+		err = request_irq(irq, mana_gd_intr, 0, "mana_intr", gic);
+		if (err)
+			goto free_irq;
+	}
+
+	err = mana_gd_alloc_res_map(nvec, &gc->msix_resource);
+	if (err)
+		goto free_irq;
+
+	gc->max_num_msix = nvec;
+	gc->num_msix_usable = nvec;
+
+	return 0;
+
+free_irq:
+	for (j = i - 1; j >= 0; j--) {
+		irq = pci_irq_vector(pdev, j);
+		gic = &gc->irq_contexts[j];
+		free_irq(irq, gic);
+	}
+
+	kfree(gc->irq_contexts);
+	gc->irq_contexts = NULL;
+free_irq_vector:
+	pci_free_irq_vectors(pdev);
+	return err;
+}
+
+static void mana_gd_remove_irqs(struct pci_dev *pdev)
+{
+	struct gdma_context *gc = pci_get_drvdata(pdev);
+	struct gdma_irq_context *gic;
+	int irq, i;
+
+	if (gc->max_num_msix < 1)
+		return;
+
+	mana_gd_free_res_map(&gc->msix_resource);
+
+	for (i = 0; i < gc->max_num_msix; i++) {
+		irq = pci_irq_vector(pdev, i);
+		if (irq < 0)
+			continue;
+
+		gic = &gc->irq_contexts[i];
+		free_irq(irq, gic);
+	}
+
+	pci_free_irq_vectors(pdev);
+
+	gc->max_num_msix = 0;
+	gc->num_msix_usable = 0;
+	kfree(gc->irq_contexts);
+	gc->irq_contexts = NULL;
+}
+
+static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct gdma_context *gc;
+	void __iomem *bar0_va;
+	int bar = 0;
+	int err;
+
+	err = pci_enable_device(pdev);
+	if (err)
+		return -ENXIO;
+
+	pci_set_master(pdev);
+
+	err = pci_request_regions(pdev, "mana");
+	if (err)
+		goto disable_dev;
+
+	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (err)
+		goto release_region;
+
+	err = -ENOMEM;
+	gc = vzalloc(sizeof(*gc));
+	if (!gc)
+		goto release_region;
+
+	bar0_va = pci_iomap(pdev, bar, 0);
+	if (!bar0_va)
+		goto free_gc;
+
+	gc->bar0_va = bar0_va;
+	gc->dev = &pdev->dev;
+
+	pci_set_drvdata(pdev, gc);
+
+	mana_gd_init_registers(pdev);
+
+	mana_smc_init(&gc->shm_channel, gc->dev, gc->shm_base);
+
+	err = mana_gd_setup_irqs(pdev);
+	if (err)
+		goto unmap_bar;
+
+	mutex_init(&gc->eq_test_event_mutex);
+
+	err = mana_hwc_create_channel(gc);
+	if (err)
+		goto remove_irq;
+
+	err = mana_gd_verify_vf_version(pdev);
+	if (err)
+		goto remove_irq;
+
+	err = mana_gd_query_max_resources(pdev);
+	if (err)
+		goto remove_irq;
+
+	err = mana_gd_detect_devices(pdev);
+	if (err)
+		goto remove_irq;
+
+	err = mana_probe(&gc->mana);
+	if (err)
+		goto clean_up_gdma;
+
+	return 0;
+
+clean_up_gdma:
+	mana_hwc_destroy_channel(gc);
+	vfree(gc->cq_table);
+	gc->cq_table = NULL;
+remove_irq:
+	mana_gd_remove_irqs(pdev);
+unmap_bar:
+	pci_iounmap(pdev, bar0_va);
+free_gc:
+	vfree(gc);
+release_region:
+	pci_release_regions(pdev);
+disable_dev:
+	pci_clear_master(pdev);
+	pci_disable_device(pdev);
+	dev_err(&pdev->dev, "gdma probe failed: err = %d\n", err);
+	return err;
+}
+
+static void mana_gd_remove(struct pci_dev *pdev)
+{
+	struct gdma_context *gc = pci_get_drvdata(pdev);
+
+	mana_remove(&gc->mana);
+
+	mana_hwc_destroy_channel(gc);
+	vfree(gc->cq_table);
+	gc->cq_table = NULL;
+
+	mana_gd_remove_irqs(pdev);
+
+	pci_iounmap(pdev, gc->bar0_va);
+
+	vfree(gc);
+
+	pci_release_regions(pdev);
+	pci_clear_master(pdev);
+	pci_disable_device(pdev);
+}
+
+#ifndef PCI_VENDOR_ID_MICROSOFT
+#define PCI_VENDOR_ID_MICROSOFT 0x1414
+#endif
+
+static const struct pci_device_id mana_id_table[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT, 0x00BA) },
+	{ }
+};
+
+static struct pci_driver mana_driver = {
+	.name		= "mana",
+	.id_table	= mana_id_table,
+	.probe		= mana_gd_probe,
+	.remove		= mana_gd_remove,
+};
+
+module_pci_driver(mana_driver);
+
+MODULE_DEVICE_TABLE(pci, mana_id_table);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Microsoft Azure Network Adapter driver");
diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c
new file mode 100644
index 000000000000..462bc577692a
--- /dev/null
+++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c
@@ -0,0 +1,843 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/* Copyright (c) 2021, Microsoft Corporation. */
+
+#include "gdma.h"
+#include "hw_channel.h"
+
+static int mana_hwc_get_msg_index(struct hw_channel_context *hwc, u16 *msg_id)
+{
+	struct gdma_resource *r = &hwc->inflight_msg_res;
+	unsigned long flags;
+	u32 index;
+
+	down(&hwc->sema);
+
+	spin_lock_irqsave(&r->lock, flags);
+
+	index = find_first_zero_bit(hwc->inflight_msg_res.map,
+				    hwc->inflight_msg_res.size);
+
+	bitmap_set(hwc->inflight_msg_res.map, index, 1);
+
+	spin_unlock_irqrestore(&r->lock, flags);
+
+	*msg_id = index;
+
+	return 0;
+}
+
+static void mana_hwc_put_msg_index(struct hw_channel_context *hwc, u16 msg_id)
+{
+	struct gdma_resource *r = &hwc->inflight_msg_res;
+	unsigned long flags;
+
+	spin_lock_irqsave(&r->lock, flags);
+	bitmap_clear(hwc->inflight_msg_res.map, msg_id, 1);
+	spin_unlock_irqrestore(&r->lock, flags);
+
+	up(&hwc->sema);
+}
+
+static int mana_hwc_verify_resp_msg(const struct hwc_caller_ctx *caller_ctx,
+				    const struct gdma_resp_hdr *resp_msg,
+				    u32 resp_len)
+{
+	if (resp_len < sizeof(*resp_msg))
+		return -EPROTO;
+
+	if (resp_len > caller_ctx->output_buflen)
+		return -EPROTO;
+
+	return 0;
+}
+
+static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len,
+				 const struct gdma_resp_hdr *resp_msg)
+{
+	struct hwc_caller_ctx *ctx;
+	int err = -EPROTO;
+
+	if (!test_bit(resp_msg->response.hwc_msg_id,
+		      hwc->inflight_msg_res.map)) {
+		dev_err(hwc->dev, "hwc_rx: invalid msg_id = %u\n",
+			resp_msg->response.hwc_msg_id);
+		return;
+	}
+
+	ctx = hwc->caller_ctx + resp_msg->response.hwc_msg_id;
+	err = mana_hwc_verify_resp_msg(ctx, resp_msg, resp_len);
+	if (err)
+		goto out;
+
+	ctx->status_code = resp_msg->status;
+
+	memcpy(ctx->output_buf, resp_msg, resp_len);
+out:
+	ctx->error = err;
+	complete(&ctx->comp_event);
+}
+
+static int mana_hwc_post_rx_wqe(const struct hwc_wq *hwc_rxq,
+				struct hwc_work_request *req)
+{
+	struct device *dev = hwc_rxq->hwc->dev;
+	struct gdma_sge *sge;
+	int err;
+
+	sge = &req->sge;
+	sge->address = (u64)req->buf_sge_addr;
+	sge->mem_key = hwc_rxq->msg_buf->gpa_mkey;
+	sge->size = req->buf_len;
+
+	memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request));
+	req->wqe_req.sgl = sge;
+	req->wqe_req.num_sge = 1;
+	req->wqe_req.client_data_unit = 0;
+
+	err = mana_gd_post_and_ring(hwc_rxq->gdma_wq, &req->wqe_req, NULL);
+	if (err)
+		dev_err(dev, "Failed to post WQE on HWC RQ: %d\n", err);
+	return err;
+}
+
+static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self,
+					struct gdma_event *event)
+{
+	struct hw_channel_context *hwc = ctx;
+	struct gdma_dev *gd = hwc->gdma_dev;
+	union hwc_init_type_data type_data;
+	union hwc_init_eq_id_db eq_db;
+	u32 type, val;
+
+	switch (event->type) {
+	case GDMA_EQE_HWC_INIT_EQ_ID_DB:
+		eq_db.as_uint32 = event->details[0];
+		hwc->cq->gdma_eq->id = eq_db.eq_id;
+		gd->doorbell = eq_db.doorbell;
+		break;
+
+	case GDMA_EQE_HWC_INIT_DATA:
+		type_data.as_uint32 = event->details[0];
+		type = type_data.type;
+		val = type_data.value;
+
+		switch (type) {
+		case HWC_INIT_DATA_CQID:
+			hwc->cq->gdma_cq->id = val;
+			break;
+
+		case HWC_INIT_DATA_RQID:
+			hwc->rxq->gdma_wq->id = val;
+			break;
+
+		case HWC_INIT_DATA_SQID:
+			hwc->txq->gdma_wq->id = val;
+			break;
+
+		case HWC_INIT_DATA_QUEUE_DEPTH:
+			hwc->hwc_init_q_depth_max = (u16)val;
+			break;
+
+		case HWC_INIT_DATA_MAX_REQUEST:
+			hwc->hwc_init_max_req_msg_size = val;
+			break;
+
+		case HWC_INIT_DATA_MAX_RESPONSE:
+			hwc->hwc_init_max_resp_msg_size = val;
+			break;
+
+		case HWC_INIT_DATA_MAX_NUM_CQS:
+			gd->gdma_context->max_num_cqs = val;
+			break;
+
+		case HWC_INIT_DATA_PDID:
+			hwc->gdma_dev->pdid = val;
+			break;
+
+		case HWC_INIT_DATA_GPA_MKEY:
+			hwc->rxq->msg_buf->gpa_mkey = val;
+			hwc->txq->msg_buf->gpa_mkey = val;
+			break;
+		}
+
+		break;
+
+	case GDMA_EQE_HWC_INIT_DONE:
+		complete(&hwc->hwc_init_eqe_comp);
+		break;
+
+	default:
+		/* Ignore unknown events, which should never happen. */
+		break;
+	}
+}
+
+static void mana_hwc_rx_event_handler(void *ctx, u32 gdma_rxq_id,
+				      const struct hwc_rx_oob *rx_oob)
+{
+	struct hw_channel_context *hwc = ctx;
+	struct hwc_wq *hwc_rxq = hwc->rxq;
+	struct hwc_work_request *rx_req;
+	struct gdma_resp_hdr *resp;
+	struct gdma_wqe *dma_oob;
+	struct gdma_queue *rq;
+	struct gdma_sge *sge;
+	u64 rq_base_addr;
+	u64 rx_req_idx;
+	u8 *wqe;
+
+	if (WARN_ON_ONCE(hwc_rxq->gdma_wq->id != gdma_rxq_id))
+		return;
+
+	rq = hwc_rxq->gdma_wq;
+	wqe = mana_gd_get_wqe_ptr(rq, rx_oob->wqe_offset / GDMA_WQE_BU_SIZE);
+	dma_oob = (struct gdma_wqe *)wqe;
+
+	sge = (struct gdma_sge *)(wqe + 8 + dma_oob->inline_oob_size_div4 * 4);
+
+	/* Select the RX work request for virtual address and for reposting. */
+	rq_base_addr = hwc_rxq->msg_buf->mem_info.dma_handle;
+	rx_req_idx = (sge->address - rq_base_addr) / hwc->max_req_msg_size;
+
+	rx_req = &hwc_rxq->msg_buf->reqs[rx_req_idx];
+	resp = (struct gdma_resp_hdr *)rx_req->buf_va;
+
+	if (resp->response.hwc_msg_id >= hwc->num_inflight_msg) {
+		dev_err(hwc->dev, "HWC RX: wrong msg_id=%u\n",
+			resp->response.hwc_msg_id);
+		return;
+	}
+
+	mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, resp);
+
+	/* Do no longer use 'resp', because the buffer is posted to the HW
+	 * in the below mana_hwc_post_rx_wqe().
+	 */
+	resp = NULL;
+
+	mana_hwc_post_rx_wqe(hwc_rxq, rx_req);
+}
+
+static void mana_hwc_tx_event_handler(void *ctx, u32 gdma_txq_id,
+				      const struct hwc_rx_oob *rx_oob)
+{
+	struct hw_channel_context *hwc = ctx;
+	struct hwc_wq *hwc_txq = hwc->txq;
+
+	WARN_ON_ONCE(!hwc_txq || hwc_txq->gdma_wq->id != gdma_txq_id);
+}
+
+static int mana_hwc_create_gdma_wq(struct hw_channel_context *hwc,
+				   enum gdma_queue_type type, u64 queue_size,
+				   struct gdma_queue **queue)
+{
+	struct gdma_queue_spec spec = {};
+
+	if (type != GDMA_SQ && type != GDMA_RQ)
+		return -EINVAL;
+
+	spec.type = type;
+	spec.monitor_avl_buf = false;
+	spec.queue_size = queue_size;
+
+	return mana_gd_create_hwc_queue(hwc->gdma_dev, &spec, queue);
+}
+
+static int mana_hwc_create_gdma_cq(struct hw_channel_context *hwc,
+				   u64 queue_size,
+				   void *ctx, gdma_cq_callback *cb,
+				   struct gdma_queue *parent_eq,
+				   struct gdma_queue **queue)
+{
+	struct gdma_queue_spec spec = {};
+
+	spec.type = GDMA_CQ;
+	spec.monitor_avl_buf = false;
+	spec.queue_size = queue_size;
+	spec.cq.context = ctx;
+	spec.cq.callback = cb;
+	spec.cq.parent_eq = parent_eq;
+
+	return mana_gd_create_hwc_queue(hwc->gdma_dev, &spec, queue);
+}
+
+static int mana_hwc_create_gdma_eq(struct hw_channel_context *hwc,
+				   u64 queue_size,
+				   void *ctx, gdma_eq_callback *cb,
+				   struct gdma_queue **queue)
+{
+	struct gdma_queue_spec spec = {};
+
+	spec.type = GDMA_EQ;
+	spec.monitor_avl_buf = false;
+	spec.queue_size = queue_size;
+	spec.eq.context = ctx;
+	spec.eq.callback = cb;
+	spec.eq.log2_throttle_limit = DEFAULT_LOG2_THROTTLING_FOR_ERROR_EQ;
+
+	return mana_gd_create_hwc_queue(hwc->gdma_dev, &spec, queue);
+}
+
+static void mana_hwc_comp_event(void *ctx, struct gdma_queue *q_self)
+{
+	struct hwc_rx_oob comp_data = {};
+	struct gdma_comp *completions;
+	struct hwc_cq *hwc_cq = ctx;
+	u32 comp_read, i;
+
+	WARN_ON_ONCE(hwc_cq->gdma_cq != q_self);
+
+	completions = hwc_cq->comp_buf;
+	comp_read = mana_gd_poll_cq(q_self, completions, hwc_cq->queue_depth);
+	WARN_ON_ONCE(comp_read <= 0 || comp_read > hwc_cq->queue_depth);
+
+	for (i = 0; i < comp_read; ++i) {
+		comp_data = *(struct hwc_rx_oob *)completions[i].cqe_data;
+
+		if (completions[i].is_sq)
+			hwc_cq->tx_event_handler(hwc_cq->tx_event_ctx,
+						completions[i].wq_num,
+						&comp_data);
+		else
+			hwc_cq->rx_event_handler(hwc_cq->rx_event_ctx,
+						completions[i].wq_num,
+						&comp_data);
+	}
+
+	mana_gd_arm_cq(q_self);
+}
+
+static void mana_hwc_destroy_cq(struct gdma_context *gc, struct hwc_cq *hwc_cq)
+{
+	if (!hwc_cq)
+		return;
+
+	kfree(hwc_cq->comp_buf);
+
+	if (hwc_cq->gdma_cq)
+		mana_gd_destroy_queue(gc, hwc_cq->gdma_cq);
+
+	if (hwc_cq->gdma_eq)
+		mana_gd_destroy_queue(gc, hwc_cq->gdma_eq);
+
+	kfree(hwc_cq);
+}
+
+static int mana_hwc_create_cq(struct hw_channel_context *hwc, u16 q_depth,
+			      gdma_eq_callback *callback, void *ctx,
+			      hwc_rx_event_handler_t *rx_ev_hdlr,
+			      void *rx_ev_ctx,
+			      hwc_tx_event_handler_t *tx_ev_hdlr,
+			      void *tx_ev_ctx, struct hwc_cq **hwc_cq_ptr)
+{
+	struct gdma_queue *eq, *cq;
+	struct gdma_comp *comp_buf;
+	struct hwc_cq *hwc_cq;
+	u32 eq_size, cq_size;
+	int err;
+
+	eq_size = roundup_pow_of_two(GDMA_EQE_SIZE * q_depth);
+	if (eq_size < MINIMUM_SUPPORTED_PAGE_SIZE)
+		eq_size = MINIMUM_SUPPORTED_PAGE_SIZE;
+
+	cq_size = roundup_pow_of_two(GDMA_CQE_SIZE * q_depth);
+	if (cq_size < MINIMUM_SUPPORTED_PAGE_SIZE)
+		cq_size = MINIMUM_SUPPORTED_PAGE_SIZE;
+
+	hwc_cq = kzalloc(sizeof(*hwc_cq), GFP_KERNEL);
+	if (!hwc_cq)
+		return -ENOMEM;
+
+	err = mana_hwc_create_gdma_eq(hwc, eq_size, ctx, callback, &eq);
+	if (err) {
+		dev_err(hwc->dev, "Failed to create HWC EQ for RQ: %d\n", err);
+		goto out;
+	}
+	hwc_cq->gdma_eq = eq;
+
+	err = mana_hwc_create_gdma_cq(hwc, cq_size, hwc_cq, mana_hwc_comp_event,
+				      eq, &cq);
+	if (err) {
+		dev_err(hwc->dev, "Failed to create HWC CQ for RQ: %d\n", err);
+		goto out;
+	}
+	hwc_cq->gdma_cq = cq;
+
+	comp_buf = kcalloc(q_depth, sizeof(struct gdma_comp), GFP_KERNEL);
+	if (!comp_buf) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	hwc_cq->hwc = hwc;
+	hwc_cq->comp_buf = comp_buf;
+	hwc_cq->queue_depth = q_depth;
+	hwc_cq->rx_event_handler = rx_ev_hdlr;
+	hwc_cq->rx_event_ctx = rx_ev_ctx;
+	hwc_cq->tx_event_handler = tx_ev_hdlr;
+	hwc_cq->tx_event_ctx = tx_ev_ctx;
+
+	*hwc_cq_ptr = hwc_cq;
+	return 0;
+out:
+	mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc_cq);
+	return err;
+}
+
+static int mana_hwc_alloc_dma_buf(struct hw_channel_context *hwc, u16 q_depth,
+				  u32 max_msg_size,
+				  struct hwc_dma_buf **dma_buf_ptr)
+{
+	struct gdma_context *gc = hwc->gdma_dev->gdma_context;
+	struct hwc_work_request *hwc_wr;
+	struct hwc_dma_buf *dma_buf;
+	struct gdma_mem_info *gmi;
+	void *virt_addr;
+	u32 buf_size;
+	u8 *base_pa;
+	int err;
+	u16 i;
+
+	dma_buf = kzalloc(sizeof(*dma_buf) +
+			  q_depth * sizeof(struct hwc_work_request),
+			  GFP_KERNEL);
+	if (!dma_buf)
+		return -ENOMEM;
+
+	dma_buf->num_reqs = q_depth;
+
+	buf_size = PAGE_ALIGN(q_depth * max_msg_size);
+
+	gmi = &dma_buf->mem_info;
+	err = mana_gd_alloc_memory(gc, buf_size, gmi);
+	if (err) {
+		dev_err(hwc->dev, "Failed to allocate DMA buffer: %d\n", err);
+		goto out;
+	}
+
+	virt_addr = dma_buf->mem_info.virt_addr;
+	base_pa = (u8 *)dma_buf->mem_info.dma_handle;
+
+	for (i = 0; i < q_depth; i++) {
+		hwc_wr = &dma_buf->reqs[i];
+
+		hwc_wr->buf_va = virt_addr + i * max_msg_size;
+		hwc_wr->buf_sge_addr = base_pa + i * max_msg_size;
+
+		hwc_wr->buf_len = max_msg_size;
+	}
+
+	*dma_buf_ptr = dma_buf;
+	return 0;
+out:
+	kfree(dma_buf);
+	return err;
+}
+
+static void mana_hwc_dealloc_dma_buf(struct hw_channel_context *hwc,
+				     struct hwc_dma_buf *dma_buf)
+{
+	if (!dma_buf)
+		return;
+
+	mana_gd_free_memory(&dma_buf->mem_info);
+
+	kfree(dma_buf);
+}
+
+static void mana_hwc_destroy_wq(struct hw_channel_context *hwc,
+				struct hwc_wq *hwc_wq)
+{
+	if (!hwc_wq)
+		return;
+
+	mana_hwc_dealloc_dma_buf(hwc, hwc_wq->msg_buf);
+
+	if (hwc_wq->gdma_wq)
+		mana_gd_destroy_queue(hwc->gdma_dev->gdma_context,
+				      hwc_wq->gdma_wq);
+
+	kfree(hwc_wq);
+}
+
+static int mana_hwc_create_wq(struct hw_channel_context *hwc,
+			      enum gdma_queue_type q_type, u16 q_depth,
+			      u32 max_msg_size, struct hwc_cq *hwc_cq,
+			      struct hwc_wq **hwc_wq_ptr)
+{
+	struct gdma_queue *queue;
+	struct hwc_wq *hwc_wq;
+	u32 queue_size;
+	int err;
+
+	WARN_ON(q_type != GDMA_SQ && q_type != GDMA_RQ);
+
+	if (q_type == GDMA_RQ)
+		queue_size = roundup_pow_of_two(GDMA_MAX_RQE_SIZE * q_depth);
+	else
+		queue_size = roundup_pow_of_two(GDMA_MAX_SQE_SIZE * q_depth);
+
+	if (queue_size < MINIMUM_SUPPORTED_PAGE_SIZE)
+		queue_size = MINIMUM_SUPPORTED_PAGE_SIZE;
+
+	hwc_wq = kzalloc(sizeof(*hwc_wq), GFP_KERNEL);
+	if (!hwc_wq)
+		return -ENOMEM;
+
+	err = mana_hwc_create_gdma_wq(hwc, q_type, queue_size, &queue);
+	if (err)
+		goto out;
+
+	err = mana_hwc_alloc_dma_buf(hwc, q_depth, max_msg_size,
+				     &hwc_wq->msg_buf);
+	if (err)
+		goto out;
+
+	hwc_wq->hwc = hwc;
+	hwc_wq->gdma_wq = queue;
+	hwc_wq->queue_depth = q_depth;
+	hwc_wq->hwc_cq = hwc_cq;
+
+	*hwc_wq_ptr = hwc_wq;
+	return 0;
+out:
+	if (err)
+		mana_hwc_destroy_wq(hwc, hwc_wq);
+	return err;
+}
+
+static int mana_hwc_post_tx_wqe(const struct hwc_wq *hwc_txq,
+				struct hwc_work_request *req,
+				u32 dest_virt_rq_id, u32 dest_virt_rcq_id,
+				bool dest_pf)
+{
+	struct device *dev = hwc_txq->hwc->dev;
+	struct hwc_tx_oob *tx_oob;
+	struct gdma_sge *sge;
+	int err;
+
+	if (req->msg_size == 0 || req->msg_size > req->buf_len) {
+		dev_err(dev, "wrong msg_size: %u, buf_len: %u\n",
+			req->msg_size, req->buf_len);
+		return -EINVAL;
+	}
+
+	tx_oob = &req->tx_oob;
+
+	tx_oob->vrq_id = dest_virt_rq_id;
+	tx_oob->dest_vfid = 0;
+	tx_oob->vrcq_id = dest_virt_rcq_id;
+	tx_oob->vscq_id = hwc_txq->hwc_cq->gdma_cq->id;
+	tx_oob->loopback = false;
+	tx_oob->lso_override = false;
+	tx_oob->dest_pf = dest_pf;
+	tx_oob->vsq_id = hwc_txq->gdma_wq->id;
+
+	sge = &req->sge;
+	sge->address = (u64)req->buf_sge_addr;
+	sge->mem_key = hwc_txq->msg_buf->gpa_mkey;
+	sge->size = req->msg_size;
+
+	memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request));
+	req->wqe_req.sgl = sge;
+	req->wqe_req.num_sge = 1;
+	req->wqe_req.inline_oob_size = sizeof(struct hwc_tx_oob);
+	req->wqe_req.inline_oob_data = tx_oob;
+	req->wqe_req.client_data_unit = 0;
+
+	err = mana_gd_post_and_ring(hwc_txq->gdma_wq, &req->wqe_req, NULL);
+	if (err)
+		dev_err(dev, "Failed to post WQE on HWC SQ: %d\n", err);
+	return err;
+}
+
+static int mana_hwc_init_inflight_msg(struct hw_channel_context *hwc,
+				      u16 num_msg)
+{
+	int err;
+
+	sema_init(&hwc->sema, num_msg);
+
+	err = mana_gd_alloc_res_map(num_msg, &hwc->inflight_msg_res);
+	if (err)
+		dev_err(hwc->dev, "Failed to init inflight_msg_res: %d\n", err);
+	return err;
+}
+
+static int mana_hwc_test_channel(struct hw_channel_context *hwc, u16 q_depth,
+				 u32 max_req_msg_size, u32 max_resp_msg_size)
+{
+	struct gdma_context *gc = hwc->gdma_dev->gdma_context;
+	struct hwc_wq *hwc_rxq = hwc->rxq;
+	struct hwc_work_request *req;
+	struct hwc_caller_ctx *ctx;
+	int err;
+	int i;
+
+	/* Post all WQEs on the RQ */
+	for (i = 0; i < q_depth; i++) {
+		req = &hwc_rxq->msg_buf->reqs[i];
+		err = mana_hwc_post_rx_wqe(hwc_rxq, req);
+		if (err)
+			return err;
+	}
+
+	ctx = kzalloc(q_depth * sizeof(struct hwc_caller_ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	for (i = 0; i < q_depth; ++i)
+		init_completion(&ctx[i].comp_event);
+
+	hwc->caller_ctx = ctx;
+
+	return mana_gd_test_eq(gc, hwc->cq->gdma_eq);
+}
+
+static int mana_hwc_establish_channel(struct gdma_context *gc, u16 *q_depth,
+				      u32 *max_req_msg_size,
+				      u32 *max_resp_msg_size)
+{
+	struct hw_channel_context *hwc = gc->hwc.driver_data;
+	struct gdma_queue *rq = hwc->rxq->gdma_wq;
+	struct gdma_queue *sq = hwc->txq->gdma_wq;
+	struct gdma_queue *eq = hwc->cq->gdma_eq;
+	struct gdma_queue *cq = hwc->cq->gdma_cq;
+	int err;
+
+	init_completion(&hwc->hwc_init_eqe_comp);
+
+	err = mana_smc_setup_hwc(&gc->shm_channel, false,
+				 eq->mem_info.dma_handle,
+				 cq->mem_info.dma_handle,
+				 rq->mem_info.dma_handle,
+				 sq->mem_info.dma_handle,
+				 eq->eq.msix_index);
+	if (err)
+		return err;
+
+	if (!wait_for_completion_timeout(&hwc->hwc_init_eqe_comp, 60 * HZ))
+		return -ETIMEDOUT;
+
+	*q_depth = hwc->hwc_init_q_depth_max;
+	*max_req_msg_size = hwc->hwc_init_max_req_msg_size;
+	*max_resp_msg_size = hwc->hwc_init_max_resp_msg_size;
+
+	if (WARN_ON(cq->id >= gc->max_num_cqs))
+		return -EPROTO;
+
+	gc->cq_table = vzalloc(gc->max_num_cqs * sizeof(struct gdma_queue *));
+	if (!gc->cq_table)
+		return -ENOMEM;
+
+	gc->cq_table[cq->id] = cq;
+
+	return 0;
+}
+
+static int mana_hwc_init_queues(struct hw_channel_context *hwc, u16 q_depth,
+				u32 max_req_msg_size, u32 max_resp_msg_size)
+{
+	struct hwc_wq *hwc_rxq = NULL;
+	struct hwc_wq *hwc_txq = NULL;
+	struct hwc_cq *hwc_cq = NULL;
+	int err;
+
+	err = mana_hwc_init_inflight_msg(hwc, q_depth);
+	if (err)
+		return err;
+
+	/* CQ is shared by SQ and RQ, so CQ's queue depth is the sum of SQ
+	 * queue depth and RQ queue depth.
+	 */
+	err = mana_hwc_create_cq(hwc, q_depth * 2,
+				 mana_hwc_init_event_handler, hwc,
+				 mana_hwc_rx_event_handler, hwc,
+				 mana_hwc_tx_event_handler, hwc, &hwc_cq);
+	if (err) {
+		dev_err(hwc->dev, "Failed to create HWC CQ: %d\n", err);
+		goto out;
+	}
+	hwc->cq = hwc_cq;
+
+	err = mana_hwc_create_wq(hwc, GDMA_RQ, q_depth, max_req_msg_size,
+				 hwc_cq, &hwc_rxq);
+	if (err) {
+		dev_err(hwc->dev, "Failed to create HWC RQ: %d\n", err);
+		goto out;
+	}
+	hwc->rxq = hwc_rxq;
+
+	err = mana_hwc_create_wq(hwc, GDMA_SQ, q_depth, max_resp_msg_size,
+				 hwc_cq, &hwc_txq);
+	if (err) {
+		dev_err(hwc->dev, "Failed to create HWC SQ: %d\n", err);
+		goto out;
+	}
+	hwc->txq = hwc_txq;
+
+	hwc->num_inflight_msg = q_depth;
+	hwc->max_req_msg_size = max_req_msg_size;
+
+	return 0;
+out:
+	if (hwc_txq)
+		mana_hwc_destroy_wq(hwc, hwc_txq);
+
+	if (hwc_rxq)
+		mana_hwc_destroy_wq(hwc, hwc_rxq);
+
+	if (hwc_cq)
+		mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc_cq);
+
+	mana_gd_free_res_map(&hwc->inflight_msg_res);
+	return err;
+}
+
+int mana_hwc_create_channel(struct gdma_context *gc)
+{
+	u32 max_req_msg_size, max_resp_msg_size;
+	struct gdma_dev *gd = &gc->hwc;
+	struct hw_channel_context *hwc;
+	u16 q_depth_max;
+	int err;
+
+	hwc = kzalloc(sizeof(*hwc), GFP_KERNEL);
+	if (!hwc)
+		return -ENOMEM;
+
+	gd->gdma_context = gc;
+	gd->driver_data = hwc;
+	hwc->gdma_dev = gd;
+	hwc->dev = gc->dev;
+
+	/* HWC's instance number is always 0. */
+	gd->dev_id.as_uint32 = 0;
+	gd->dev_id.type = GDMA_DEVICE_HWC;
+
+	gd->pdid = INVALID_PDID;
+	gd->doorbell = INVALID_DOORBELL;
+
+	err = mana_hwc_init_queues(hwc, HW_CHANNEL_VF_BOOTSTRAP_QUEUE_DEPTH,
+				   HW_CHANNEL_MAX_REQUEST_SIZE,
+				   HW_CHANNEL_MAX_RESPONSE_SIZE);
+	if (err) {
+		dev_err(hwc->dev, "Failed to initialize HWC: %d\n", err);
+		goto out;
+	}
+
+	err = mana_hwc_establish_channel(gc, &q_depth_max, &max_req_msg_size,
+					 &max_resp_msg_size);
+	if (err) {
+		dev_err(hwc->dev, "Failed to establish HWC: %d\n", err);
+		goto out;
+	}
+
+	err = mana_hwc_test_channel(gc->hwc.driver_data,
+				    HW_CHANNEL_VF_BOOTSTRAP_QUEUE_DEPTH,
+				    max_req_msg_size, max_resp_msg_size);
+	if (err) {
+		dev_err(hwc->dev, "Failed to test HWC: %d\n", err);
+		goto out;
+	}
+
+	return 0;
+out:
+	kfree(hwc);
+	return err;
+}
+
+void mana_hwc_destroy_channel(struct gdma_context *gc)
+{
+	struct hw_channel_context *hwc = gc->hwc.driver_data;
+	struct hwc_caller_ctx *ctx;
+
+	mana_smc_teardown_hwc(&gc->shm_channel, false);
+
+	ctx = hwc->caller_ctx;
+	kfree(ctx);
+	hwc->caller_ctx = NULL;
+
+	mana_hwc_destroy_wq(hwc, hwc->txq);
+	hwc->txq = NULL;
+
+	mana_hwc_destroy_wq(hwc, hwc->rxq);
+	hwc->rxq = NULL;
+
+	mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc->cq);
+	hwc->cq = NULL;
+
+	mana_gd_free_res_map(&hwc->inflight_msg_res);
+
+	hwc->num_inflight_msg = 0;
+
+	if (hwc->gdma_dev->pdid != INVALID_PDID) {
+		hwc->gdma_dev->doorbell = INVALID_DOORBELL;
+		hwc->gdma_dev->pdid = INVALID_PDID;
+	}
+
+	kfree(hwc);
+	gc->hwc.driver_data = NULL;
+	gc->hwc.gdma_context = NULL;
+}
+
+int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len,
+			  const void *req, u32 resp_len, void *resp)
+{
+	struct hwc_work_request *tx_wr;
+	struct hwc_wq *txq = hwc->txq;
+	struct gdma_req_hdr *req_msg;
+	struct hwc_caller_ctx *ctx;
+	u16 msg_id;
+	int err;
+
+	mana_hwc_get_msg_index(hwc, &msg_id);
+
+	tx_wr = &txq->msg_buf->reqs[msg_id];
+
+	if (req_len > tx_wr->buf_len) {
+		dev_err(hwc->dev, "HWC: req msg size: %d > %d\n", req_len,
+			tx_wr->buf_len);
+		err = -EINVAL;
+		goto out;
+	}
+
+	ctx = hwc->caller_ctx + msg_id;
+	ctx->output_buf = resp;
+	ctx->output_buflen = resp_len;
+
+	req_msg = (struct gdma_req_hdr *)tx_wr->buf_va;
+	if (req)
+		memcpy(req_msg, req, req_len);
+
+	req_msg->req.hwc_msg_id = msg_id;
+
+	tx_wr->msg_size = req_len;
+
+	err = mana_hwc_post_tx_wqe(txq, tx_wr, 0, 0, false);
+	if (err) {
+		dev_err(hwc->dev, "HWC: Failed to post send WQE: %d\n", err);
+		goto out;
+	}
+
+	if (!wait_for_completion_timeout(&ctx->comp_event, 30 * HZ)) {
+		dev_err(hwc->dev, "HWC: Request timed out!\n");
+		err = -ETIMEDOUT;
+		goto out;
+	}
+
+	if (ctx->error) {
+		err = ctx->error;
+		goto out;
+	}
+
+	if (ctx->status_code) {
+		dev_err(hwc->dev, "HWC: Failed hw_channel req: 0x%x\n",
+			ctx->status_code);
+		err = -EPROTO;
+		goto out;
+	}
+out:
+	mana_hwc_put_msg_index(hwc, msg_id);
+	return err;
+}
diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.h b/drivers/net/ethernet/microsoft/mana/hw_channel.h
new file mode 100644
index 000000000000..31c6e83c454a
--- /dev/null
+++ b/drivers/net/ethernet/microsoft/mana/hw_channel.h
@@ -0,0 +1,190 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/* Copyright (c) 2021, Microsoft Corporation. */
+
+#ifndef _HW_CHANNEL_H
+#define _HW_CHANNEL_H
+
+#define DEFAULT_LOG2_THROTTLING_FOR_ERROR_EQ  4
+
+#define HW_CHANNEL_MAX_REQUEST_SIZE  0x1000
+#define HW_CHANNEL_MAX_RESPONSE_SIZE 0x1000
+
+#define HW_CHANNEL_VF_BOOTSTRAP_QUEUE_DEPTH 1
+
+#define HWC_INIT_DATA_CQID		1
+#define HWC_INIT_DATA_RQID		2
+#define HWC_INIT_DATA_SQID		3
+#define HWC_INIT_DATA_QUEUE_DEPTH	4
+#define HWC_INIT_DATA_MAX_REQUEST	5
+#define HWC_INIT_DATA_MAX_RESPONSE	6
+#define HWC_INIT_DATA_MAX_NUM_CQS	7
+#define HWC_INIT_DATA_PDID		8
+#define HWC_INIT_DATA_GPA_MKEY		9
+
+/* Structures labeled with "HW DATA" are exchanged with the hardware. All of
+ * them are naturally aligned and hence don't need __packed.
+ */
+
+union hwc_init_eq_id_db {
+	u32 as_uint32;
+
+	struct {
+		u32 eq_id	: 16;
+		u32 doorbell	: 16;
+	};
+}; /* HW DATA */
+
+union hwc_init_type_data {
+	u32 as_uint32;
+
+	struct {
+		u32 value	: 24;
+		u32 type	:  8;
+	};
+}; /* HW DATA */
+
+struct hwc_rx_oob {
+	u32 type	: 6;
+	u32 eom		: 1;
+	u32 som		: 1;
+	u32 vendor_err	: 8;
+	u32 reserved1	: 16;
+
+	u32 src_virt_wq	: 24;
+	u32 src_vfid	: 8;
+
+	u32 reserved2;
+
+	union {
+		u32 wqe_addr_low;
+		u32 wqe_offset;
+	};
+
+	u32 wqe_addr_high;
+
+	u32 client_data_unit	: 14;
+	u32 reserved3		: 18;
+
+	u32 tx_oob_data_size;
+
+	u32 chunk_offset	: 21;
+	u32 reserved4		: 11;
+}; /* HW DATA */
+
+struct hwc_tx_oob {
+	u32 reserved1;
+
+	u32 reserved2;
+
+	u32 vrq_id	: 24;
+	u32 dest_vfid	: 8;
+
+	u32 vrcq_id	: 24;
+	u32 reserved3	: 8;
+
+	u32 vscq_id	: 24;
+	u32 loopback	: 1;
+	u32 lso_override: 1;
+	u32 dest_pf	: 1;
+	u32 reserved4	: 5;
+
+	u32 vsq_id	: 24;
+	u32 reserved5	: 8;
+}; /* HW DATA */
+
+struct hwc_work_request {
+	void *buf_va;
+	void *buf_sge_addr;
+	u32 buf_len;
+	u32 msg_size;
+
+	struct gdma_wqe_request wqe_req;
+	struct hwc_tx_oob tx_oob;
+
+	struct gdma_sge sge;
+};
+
+/* hwc_dma_buf represents the array of in-flight WQEs.
+ * mem_info as know as the GDMA mapped memory is partitioned and used by
+ * in-flight WQEs.
+ * The number of WQEs is determined by the number of in-flight messages.
+ */
+struct hwc_dma_buf {
+	struct gdma_mem_info mem_info;
+
+	u32 gpa_mkey;
+
+	u32 num_reqs;
+	struct hwc_work_request reqs[];
+};
+
+typedef void hwc_rx_event_handler_t(void *ctx, u32 gdma_rxq_id,
+				    const struct hwc_rx_oob *rx_oob);
+
+typedef void hwc_tx_event_handler_t(void *ctx, u32 gdma_txq_id,
+				    const struct hwc_rx_oob *rx_oob);
+
+struct hwc_cq {
+	struct hw_channel_context *hwc;
+
+	struct gdma_queue *gdma_cq;
+	struct gdma_queue *gdma_eq;
+	struct gdma_comp *comp_buf;
+	u16 queue_depth;
+
+	hwc_rx_event_handler_t *rx_event_handler;
+	void *rx_event_ctx;
+
+	hwc_tx_event_handler_t *tx_event_handler;
+	void *tx_event_ctx;
+};
+
+struct hwc_wq {
+	struct hw_channel_context *hwc;
+
+	struct gdma_queue *gdma_wq;
+	struct hwc_dma_buf *msg_buf;
+	u16 queue_depth;
+
+	struct hwc_cq *hwc_cq;
+};
+
+struct hwc_caller_ctx {
+	struct completion comp_event;
+	void *output_buf;
+	u32 output_buflen;
+
+	u32 error; /* Linux error code */
+	u32 status_code;
+};
+
+struct hw_channel_context {
+	struct gdma_dev *gdma_dev;
+	struct device *dev;
+
+	u16 num_inflight_msg;
+	u32 max_req_msg_size;
+
+	u16 hwc_init_q_depth_max;
+	u32 hwc_init_max_req_msg_size;
+	u32 hwc_init_max_resp_msg_size;
+
+	struct completion hwc_init_eqe_comp;
+
+	struct hwc_wq *rxq;
+	struct hwc_wq *txq;
+	struct hwc_cq *cq;
+
+	struct semaphore sema;
+	struct gdma_resource inflight_msg_res;
+
+	struct hwc_caller_ctx *caller_ctx;
+};
+
+int mana_hwc_create_channel(struct gdma_context *gc);
+void mana_hwc_destroy_channel(struct gdma_context *gc);
+
+int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len,
+			  const void *req, u32 resp_len, void *resp);
+
+#endif /* _HW_CHANNEL_H */
diff --git a/drivers/net/ethernet/microsoft/mana/mana.h b/drivers/net/ethernet/microsoft/mana/mana.h
new file mode 100644
index 000000000000..a2c3f826f022
--- /dev/null
+++ b/drivers/net/ethernet/microsoft/mana/mana.h
@@ -0,0 +1,533 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/* Copyright (c) 2021, Microsoft Corporation. */
+
+#ifndef _MANA_H
+#define _MANA_H
+
+#include "gdma.h"
+#include "hw_channel.h"
+
+/* Microsoft Azure Network Adapter (MANA)'s definitions
+ *
+ * Structures labeled with "HW DATA" are exchanged with the hardware. All of
+ * them are naturally aligned and hence don't need __packed.
+ */
+
+/* MANA protocol version */
+#define MANA_MAJOR_VERSION	0
+#define MANA_MINOR_VERSION	1
+#define MANA_MICRO_VERSION	1
+
+typedef u64 mana_handle_t;
+#define INVALID_MANA_HANDLE ((mana_handle_t)-1)
+
+enum TRI_STATE {
+	TRI_STATE_UNKNOWN = -1,
+	TRI_STATE_FALSE = 0,
+	TRI_STATE_TRUE = 1
+};
+
+/* Number of entries for hardware indirection table must be in power of 2 */
+#define MANA_INDIRECT_TABLE_SIZE 64
+#define MANA_INDIRECT_TABLE_MASK (MANA_INDIRECT_TABLE_SIZE - 1)
+
+/* The Toeplitz hash key's length in bytes: should be multiple of 8 */
+#define MANA_HASH_KEY_SIZE 40
+
+#define COMP_ENTRY_SIZE 64
+
+#define ADAPTER_MTU_SIZE 1500
+#define MAX_FRAME_SIZE (ADAPTER_MTU_SIZE + 14)
+
+#define RX_BUFFERS_PER_QUEUE 512
+
+#define MAX_SEND_BUFFERS_PER_QUEUE 256
+
+#define EQ_SIZE (8 * PAGE_SIZE)
+#define LOG2_EQ_THROTTLE 3
+
+#define MAX_PORTS_IN_MANA_DEV 16
+
+struct mana_stats {
+	u64 packets;
+	u64 bytes;
+	struct u64_stats_sync syncp;
+};
+
+struct mana_txq {
+	struct gdma_queue *gdma_sq;
+
+	union {
+		u32 gdma_txq_id;
+		struct {
+			u32 reserved1	: 10;
+			u32 vsq_frame	: 14;
+			u32 reserved2	: 8;
+		};
+	};
+
+	u16 vp_offset;
+
+	struct net_device *ndev;
+
+	/* The SKBs are sent to the HW and we are waiting for the CQEs. */
+	struct sk_buff_head pending_skbs;
+	struct netdev_queue *net_txq;
+
+	atomic_t pending_sends;
+
+	struct mana_stats stats;
+};
+
+/* skb data and frags dma mappings */
+struct mana_skb_head {
+	dma_addr_t dma_handle[MAX_SKB_FRAGS + 1];
+
+	u32 size[MAX_SKB_FRAGS + 1];
+};
+
+#define MANA_HEADROOM sizeof(struct mana_skb_head)
+
+enum mana_tx_pkt_format {
+	MANA_SHORT_PKT_FMT	= 0,
+	MANA_LONG_PKT_FMT	= 1,
+};
+
+struct mana_tx_short_oob {
+	u32 pkt_fmt		: 2;
+	u32 is_outer_ipv4	: 1;
+	u32 is_outer_ipv6	: 1;
+	u32 comp_iphdr_csum	: 1;
+	u32 comp_tcp_csum	: 1;
+	u32 comp_udp_csum	: 1;
+	u32 supress_txcqe_gen	: 1;
+	u32 vcq_num		: 24;
+
+	u32 trans_off		: 10; /* Transport header offset */
+	u32 vsq_frame		: 14;
+	u32 short_vp_offset	: 8;
+}; /* HW DATA */
+
+struct mana_tx_long_oob {
+	u32 is_encap		: 1;
+	u32 inner_is_ipv6	: 1;
+	u32 inner_tcp_opt	: 1;
+	u32 inject_vlan_pri_tag : 1;
+	u32 reserved1		: 12;
+	u32 pcp			: 3;  /* 802.1Q */
+	u32 dei			: 1;  /* 802.1Q */
+	u32 vlan_id		: 12; /* 802.1Q */
+
+	u32 inner_frame_offset	: 10;
+	u32 inner_ip_rel_offset : 6;
+	u32 long_vp_offset	: 12;
+	u32 reserved2		: 4;
+
+	u32 reserved3;
+	u32 reserved4;
+}; /* HW DATA */
+
+struct mana_tx_oob {
+	struct mana_tx_short_oob s_oob;
+	struct mana_tx_long_oob l_oob;
+}; /* HW DATA */
+
+enum mana_cq_type {
+	MANA_CQ_TYPE_RX,
+	MANA_CQ_TYPE_TX,
+};
+
+enum mana_cqe_type {
+	CQE_INVALID			= 0,
+	CQE_RX_OKAY			= 1,
+	CQE_RX_COALESCED_4		= 2,
+	CQE_RX_OBJECT_FENCE		= 3,
+	CQE_RX_TRUNCATED		= 4,
+
+	CQE_TX_OKAY			= 32,
+	CQE_TX_SA_DROP			= 33,
+	CQE_TX_MTU_DROP			= 34,
+	CQE_TX_INVALID_OOB		= 35,
+	CQE_TX_INVALID_ETH_TYPE		= 36,
+	CQE_TX_HDR_PROCESSING_ERROR	= 37,
+	CQE_TX_VF_DISABLED		= 38,
+	CQE_TX_VPORT_IDX_OUT_OF_RANGE	= 39,
+	CQE_TX_VPORT_DISABLED		= 40,
+	CQE_TX_VLAN_TAGGING_VIOLATION	= 41,
+};
+
+#define MANA_CQE_COMPLETION 1
+
+struct mana_cqe_header {
+	u32 cqe_type	: 6;
+	u32 client_type	: 2;
+	u32 vendor_err	: 24;
+}; /* HW DATA */
+
+/* NDIS HASH Types */
+#define NDIS_HASH_IPV4		BIT(0)
+#define NDIS_HASH_TCP_IPV4	BIT(1)
+#define NDIS_HASH_UDP_IPV4	BIT(2)
+#define NDIS_HASH_IPV6		BIT(3)
+#define NDIS_HASH_TCP_IPV6	BIT(4)
+#define NDIS_HASH_UDP_IPV6	BIT(5)
+#define NDIS_HASH_IPV6_EX	BIT(6)
+#define NDIS_HASH_TCP_IPV6_EX	BIT(7)
+#define NDIS_HASH_UDP_IPV6_EX	BIT(8)
+
+#define MANA_HASH_L3 (NDIS_HASH_IPV4 | NDIS_HASH_IPV6 | NDIS_HASH_IPV6_EX)
+#define MANA_HASH_L4                                                         \
+	(NDIS_HASH_TCP_IPV4 | NDIS_HASH_UDP_IPV4 | NDIS_HASH_TCP_IPV6 |      \
+	 NDIS_HASH_UDP_IPV6 | NDIS_HASH_TCP_IPV6_EX | NDIS_HASH_UDP_IPV6_EX)
+
+struct mana_rxcomp_perpkt_info {
+	u32 pkt_len	: 16;
+	u32 reserved1	: 16;
+	u32 reserved2;
+	u32 pkt_hash;
+}; /* HW DATA */
+
+#define MANA_RXCOMP_OOB_NUM_PPI 4
+
+/* Receive completion OOB */
+struct mana_rxcomp_oob {
+	struct mana_cqe_header cqe_hdr;
+
+	u32 rx_vlan_id			: 12;
+	u32 rx_vlantag_present		: 1;
+	u32 rx_outer_iphdr_csum_succeed	: 1;
+	u32 rx_outer_iphdr_csum_fail	: 1;
+	u32 reserved1			: 1;
+	u32 rx_hashtype			: 9;
+	u32 rx_iphdr_csum_succeed	: 1;
+	u32 rx_iphdr_csum_fail		: 1;
+	u32 rx_tcp_csum_succeed		: 1;
+	u32 rx_tcp_csum_fail		: 1;
+	u32 rx_udp_csum_succeed		: 1;
+	u32 rx_udp_csum_fail		: 1;
+	u32 reserved2			: 1;
+
+	struct mana_rxcomp_perpkt_info ppi[MANA_RXCOMP_OOB_NUM_PPI];
+
+	u32 rx_wqe_offset;
+}; /* HW DATA */
+
+struct mana_tx_comp_oob {
+	struct mana_cqe_header cqe_hdr;
+
+	u32 tx_data_offset;
+
+	u32 tx_sgl_offset	: 5;
+	u32 tx_wqe_offset	: 27;
+
+	u32 reserved[12];
+}; /* HW DATA */
+
+struct mana_rxq;
+
+struct mana_cq {
+	struct gdma_queue *gdma_cq;
+
+	/* Cache the CQ id (used to verify if each CQE comes to the right CQ. */
+	u32 gdma_id;
+
+	/* Type of the CQ: TX or RX */
+	enum mana_cq_type type;
+
+	/* Pointer to the mana_rxq that is pushing RX CQEs to the queue.
+	 * Only and must be non-NULL if type is MANA_CQ_TYPE_RX.
+	 */
+	struct mana_rxq *rxq;
+
+	/* Pointer to the mana_txq that is pushing TX CQEs to the queue.
+	 * Only and must be non-NULL if type is MANA_CQ_TYPE_TX.
+	 */
+	struct mana_txq *txq;
+
+	/* Pointer to a buffer which the CQ handler can copy the CQE's into. */
+	struct gdma_comp *gdma_comp_buf;
+};
+
+#define GDMA_MAX_RQE_SGES 15
+
+struct mana_recv_buf_oob {
+	/* A valid GDMA work request representing the data buffer. */
+	struct gdma_wqe_request wqe_req;
+
+	void *buf_va;
+	dma_addr_t buf_dma_addr;
+
+	/* SGL of the buffer going to be sent has part of the work request. */
+	u32 num_sge;
+	struct gdma_sge sgl[GDMA_MAX_RQE_SGES];
+
+	/* Required to store the result of mana_gd_post_work_request.
+	 * gdma_posted_wqe_info.wqe_size_in_bu is required for progressing the
+	 * work queue when the WQE is consumed.
+	 */
+	struct gdma_posted_wqe_info wqe_inf;
+};
+
+struct mana_rxq {
+	struct gdma_queue *gdma_rq;
+	/* Cache the gdma receive queue id */
+	u32 gdma_id;
+
+	/* Index of RQ in the vPort, not gdma receive queue id */
+	u32 rxq_idx;
+
+	u32 datasize;
+
+	mana_handle_t rxobj;
+
+	struct mana_cq rx_cq;
+
+	struct net_device *ndev;
+
+	/* Total number of receive buffers to be allocated */
+	u32 num_rx_buf;
+
+	u32 buf_index;
+
+	struct mana_stats stats;
+
+	/* MUST BE THE LAST MEMBER:
+	 * Each receive buffer has an associated mana_recv_buf_oob.
+	 */
+	struct mana_recv_buf_oob rx_oobs[];
+};
+
+struct mana_tx_qp {
+	struct mana_txq txq;
+
+	struct mana_cq tx_cq;
+
+	mana_handle_t tx_object;
+};
+
+struct mana_ethtool_stats {
+	u64 stop_queue;
+	u64 wake_queue;
+};
+
+struct mana_context {
+	struct gdma_dev *gdma_dev;
+
+	u16 num_ports;
+
+	struct net_device *ports[MAX_PORTS_IN_MANA_DEV];
+};
+
+struct mana_port_context {
+	struct mana_context *ac;
+	struct net_device *ndev;
+
+	u8 mac_addr[ETH_ALEN];
+
+	struct mana_eq *eqs;
+
+	enum TRI_STATE rss_state;
+
+	mana_handle_t default_rxobj;
+	bool tx_shortform_allowed;
+	u16 tx_vp_offset;
+
+	struct mana_tx_qp *tx_qp;
+
+	/* Indirection Table for RX & TX. The values are queue indexes */
+	u32 indir_table[MANA_INDIRECT_TABLE_SIZE];
+
+	/* Indirection table containing RxObject Handles */
+	mana_handle_t rxobj_table[MANA_INDIRECT_TABLE_SIZE];
+
+	/*  Hash key used by the NIC */
+	u8 hashkey[MANA_HASH_KEY_SIZE];
+
+	/* This points to an array of num_queues of RQ pointers. */
+	struct mana_rxq **rxqs;
+
+	/* Create num_queues EQs, SQs, SQ-CQs, RQs and RQ-CQs, respectively. */
+	unsigned int max_queues;
+	unsigned int num_queues;
+
+	mana_handle_t port_handle;
+
+	u16 port_idx;
+
+	bool port_is_up;
+	bool port_st_save; /* Saved port state */
+
+	struct mana_ethtool_stats eth_stats;
+};
+
+int mana_config_rss(struct mana_port_context *ac, enum TRI_STATE rx,
+		    bool update_hash, bool update_tab);
+
+int mana_alloc_queues(struct net_device *ndev);
+int mana_attach(struct net_device *ndev);
+int mana_detach(struct net_device *ndev, bool from_close);
+
+int mana_probe(struct gdma_dev *gd);
+void mana_remove(struct gdma_dev *gd);
+
+extern const struct ethtool_ops mana_ethtool_ops;
+
+struct mana_obj_spec {
+	u32 queue_index;
+	u64 gdma_region;
+	u32 queue_size;
+	u32 attached_eq;
+	u32 modr_ctx_id;
+};
+
+enum mana_command_code {
+	MANA_QUERY_DEV_CONFIG	= 0x20001,
+	MANA_QUERY_GF_STAT	= 0x20002,
+	MANA_CONFIG_VPORT_TX	= 0x20003,
+	MANA_CREATE_WQ_OBJ	= 0x20004,
+	MANA_DESTROY_WQ_OBJ	= 0x20005,
+	MANA_FENCE_RQ		= 0x20006,
+	MANA_CONFIG_VPORT_RX	= 0x20007,
+	MANA_QUERY_VPORT_CONFIG	= 0x20008,
+};
+
+/* Query Device Configuration */
+struct mana_query_device_cfg_req {
+	struct gdma_req_hdr hdr;
+
+	/* Driver Capability flags */
+	u64 drv_cap_flags1;
+	u64 drv_cap_flags2;
+	u64 drv_cap_flags3;
+	u64 drv_cap_flags4;
+
+	u32 proto_major_ver;
+	u32 proto_minor_ver;
+	u32 proto_micro_ver;
+
+	u32 reserved;
+}; /* HW DATA */
+
+struct mana_query_device_cfg_resp {
+	struct gdma_resp_hdr hdr;
+
+	u64 pf_cap_flags1;
+	u64 pf_cap_flags2;
+	u64 pf_cap_flags3;
+	u64 pf_cap_flags4;
+
+	u16 max_num_vports;
+	u16 reserved;
+	u32 max_num_eqs;
+}; /* HW DATA */
+
+/* Query vPort Configuration */
+struct mana_query_vport_cfg_req {
+	struct gdma_req_hdr hdr;
+	u32 vport_index;
+}; /* HW DATA */
+
+struct mana_query_vport_cfg_resp {
+	struct gdma_resp_hdr hdr;
+	u32 max_num_sq;
+	u32 max_num_rq;
+	u32 num_indirection_ent;
+	u32 reserved1;
+	u8 mac_addr[6];
+	u8 reserved2[2];
+	mana_handle_t vport;
+}; /* HW DATA */
+
+/* Configure vPort */
+struct mana_config_vport_req {
+	struct gdma_req_hdr hdr;
+	mana_handle_t vport;
+	u32 pdid;
+	u32 doorbell_pageid;
+}; /* HW DATA */
+
+struct mana_config_vport_resp {
+	struct gdma_resp_hdr hdr;
+	u16 tx_vport_offset;
+	u8 short_form_allowed;
+	u8 reserved;
+}; /* HW DATA */
+
+/* Create WQ Object */
+struct mana_create_wqobj_req {
+	struct gdma_req_hdr hdr;
+	mana_handle_t vport;
+	u32 wq_type;
+	u32 reserved;
+	u64 wq_gdma_region;
+	u64 cq_gdma_region;
+	u32 wq_size;
+	u32 cq_size;
+	u32 cq_moderation_ctx_id;
+	u32 cq_parent_qid;
+}; /* HW DATA */
+
+struct mana_create_wqobj_resp {
+	struct gdma_resp_hdr hdr;
+	u32 wq_id;
+	u32 cq_id;
+	mana_handle_t wq_obj;
+}; /* HW DATA */
+
+/* Destroy WQ Object */
+struct mana_destroy_wqobj_req {
+	struct gdma_req_hdr hdr;
+	u32 wq_type;
+	u32 reserved;
+	mana_handle_t wq_obj_handle;
+}; /* HW DATA */
+
+struct mana_destroy_wqobj_resp {
+	struct gdma_resp_hdr hdr;
+}; /* HW DATA */
+
+/* Fence RQ */
+struct mana_fence_rq_req {
+	struct gdma_req_hdr hdr;
+	mana_handle_t wq_obj_handle;
+}; /* HW DATA */
+
+struct mana_fence_rq_resp {
+	struct gdma_resp_hdr hdr;
+}; /* HW DATA */
+
+/* Configure vPort Rx Steering */
+struct mana_cfg_rx_steer_req {
+	struct gdma_req_hdr hdr;
+	mana_handle_t vport;
+	u16 num_indir_entries;
+	u16 indir_tab_offset;
+	u32 rx_enable;
+	u32 rss_enable;
+	u8 update_default_rxobj;
+	u8 update_hashkey;
+	u8 update_indir_tab;
+	u8 reserved;
+	mana_handle_t default_rxobj;
+	u8 hashkey[MANA_HASH_KEY_SIZE];
+}; /* HW DATA */
+
+struct mana_cfg_rx_steer_resp {
+	struct gdma_resp_hdr hdr;
+}; /* HW DATA */
+
+#define MANA_MAX_NUM_QUEUES 16
+
+#define MANA_SHORT_VPORT_OFFSET_MAX ((1U << 8) - 1)
+
+struct mana_tx_package {
+	struct gdma_wqe_request wqe_req;
+	struct gdma_sge sgl_array[5];
+	struct gdma_sge *sgl_ptr;
+
+	struct mana_tx_oob tx_oob;
+
+	struct gdma_posted_wqe_info wqe_info;
+};
+
+#endif /* _MANA_H */
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
new file mode 100644
index 000000000000..a744ca0b6c19
--- /dev/null
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -0,0 +1,1895 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/* Copyright (c) 2021, Microsoft Corporation. */
+
+#include <linux/inetdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/mm.h>
+
+#include <net/checksum.h>
+#include <net/ip6_checksum.h>
+
+#include "mana.h"
+
+/* Microsoft Azure Network Adapter (MANA) functions */
+
+static int mana_open(struct net_device *ndev)
+{
+	struct mana_port_context *apc = netdev_priv(ndev);
+	int err;
+
+	err = mana_alloc_queues(ndev);
+	if (err)
+		return err;
+
+	apc->port_is_up = true;
+
+	/* Ensure port state updated before txq state */
+	smp_wmb();
+
+	netif_carrier_on(ndev);
+	netif_tx_wake_all_queues(ndev);
+
+	return 0;
+}
+
+static int mana_close(struct net_device *ndev)
+{
+	struct mana_port_context *apc = netdev_priv(ndev);
+
+	if (!apc->port_is_up)
+		return 0;
+
+	return mana_detach(ndev, true);
+}
+
+static bool mana_can_tx(struct gdma_queue *wq)
+{
+	return mana_gd_wq_avail_space(wq) >= MAX_TX_WQE_SIZE;
+}
+
+static unsigned int mana_checksum_info(struct sk_buff *skb)
+{
+	if (skb->protocol == htons(ETH_P_IP)) {
+		struct iphdr *ip = ip_hdr(skb);
+
+		if (ip->protocol == IPPROTO_TCP)
+			return IPPROTO_TCP;
+
+		if (ip->protocol == IPPROTO_UDP)
+			return IPPROTO_UDP;
+	} else if (skb->protocol == htons(ETH_P_IPV6)) {
+		struct ipv6hdr *ip6 = ipv6_hdr(skb);
+
+		if (ip6->nexthdr == IPPROTO_TCP)
+			return IPPROTO_TCP;
+
+		if (ip6->nexthdr == IPPROTO_UDP)
+			return IPPROTO_UDP;
+	}
+
+	/* No csum offloading */
+	return 0;
+}
+
+static int mana_map_skb(struct sk_buff *skb, struct mana_port_context *apc,
+			struct mana_tx_package *tp)
+{
+	struct mana_skb_head *ash = (struct mana_skb_head *)skb->head;
+	struct gdma_dev *gd = apc->ac->gdma_dev;
+	struct gdma_context *gc;
+	struct device *dev;
+	skb_frag_t *frag;
+	dma_addr_t da;
+	int i;
+
+	gc = gd->gdma_context;
+	dev = gc->dev;
+	da = dma_map_single(dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE);
+
+	if (dma_mapping_error(dev, da))
+		return -ENOMEM;
+
+	ash->dma_handle[0] = da;
+	ash->size[0] = skb_headlen(skb);
+
+	tp->wqe_req.sgl[0].address = ash->dma_handle[0];
+	tp->wqe_req.sgl[0].mem_key = gd->gpa_mkey;
+	tp->wqe_req.sgl[0].size = ash->size[0];
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		frag = &skb_shinfo(skb)->frags[i];
+		da = skb_frag_dma_map(dev, frag, 0, skb_frag_size(frag),
+				      DMA_TO_DEVICE);
+
+		if (dma_mapping_error(dev, da))
+			goto frag_err;
+
+		ash->dma_handle[i + 1] = da;
+		ash->size[i + 1] = skb_frag_size(frag);
+
+		tp->wqe_req.sgl[i + 1].address = ash->dma_handle[i + 1];
+		tp->wqe_req.sgl[i + 1].mem_key = gd->gpa_mkey;
+		tp->wqe_req.sgl[i + 1].size = ash->size[i + 1];
+	}
+
+	return 0;
+
+frag_err:
+	for (i = i - 1; i >= 0; i--)
+		dma_unmap_page(dev, ash->dma_handle[i + 1], ash->size[i + 1],
+			       DMA_TO_DEVICE);
+
+	dma_unmap_single(dev, ash->dma_handle[0], ash->size[0], DMA_TO_DEVICE);
+
+	return -ENOMEM;
+}
+
+static int mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+	enum mana_tx_pkt_format pkt_fmt = MANA_SHORT_PKT_FMT;
+	struct mana_port_context *apc = netdev_priv(ndev);
+	u16 txq_idx = skb_get_queue_mapping(skb);
+	struct gdma_dev *gd = apc->ac->gdma_dev;
+	bool ipv4 = false, ipv6 = false;
+	struct mana_tx_package pkg = {};
+	struct netdev_queue *net_txq;
+	struct mana_stats *tx_stats;
+	struct gdma_queue *gdma_sq;
+	unsigned int csum_type;
+	struct mana_txq *txq;
+	struct mana_cq *cq;
+	int err, len;
+
+	if (unlikely(!apc->port_is_up))
+		goto tx_drop;
+
+	if (skb_cow_head(skb, MANA_HEADROOM))
+		goto tx_drop_count;
+
+	txq = &apc->tx_qp[txq_idx].txq;
+	gdma_sq = txq->gdma_sq;
+	cq = &apc->tx_qp[txq_idx].tx_cq;
+
+	pkg.tx_oob.s_oob.vcq_num = cq->gdma_id;
+	pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame;
+
+	if (txq->vp_offset > MANA_SHORT_VPORT_OFFSET_MAX) {
+		pkg.tx_oob.l_oob.long_vp_offset = txq->vp_offset;
+		pkt_fmt = MANA_LONG_PKT_FMT;
+	} else {
+		pkg.tx_oob.s_oob.short_vp_offset = txq->vp_offset;
+	}
+
+	pkg.tx_oob.s_oob.pkt_fmt = pkt_fmt;
+
+	if (pkt_fmt == MANA_SHORT_PKT_FMT)
+		pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_short_oob);
+	else
+		pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_oob);
+
+	pkg.wqe_req.inline_oob_data = &pkg.tx_oob;
+	pkg.wqe_req.flags = 0;
+	pkg.wqe_req.client_data_unit = 0;
+
+	pkg.wqe_req.num_sge = 1 + skb_shinfo(skb)->nr_frags;
+	WARN_ON_ONCE(pkg.wqe_req.num_sge > 30);
+
+	if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) {
+		pkg.wqe_req.sgl = pkg.sgl_array;
+	} else {
+		pkg.sgl_ptr = kmalloc_array(pkg.wqe_req.num_sge,
+					    sizeof(struct gdma_sge),
+					    GFP_ATOMIC);
+		if (!pkg.sgl_ptr)
+			goto tx_drop_count;
+
+		pkg.wqe_req.sgl = pkg.sgl_ptr;
+	}
+
+	if (skb->protocol == htons(ETH_P_IP))
+		ipv4 = true;
+	else if (skb->protocol == htons(ETH_P_IPV6))
+		ipv6 = true;
+
+	if (skb_is_gso(skb)) {
+		pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4;
+		pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6;
+
+		pkg.tx_oob.s_oob.comp_iphdr_csum = 1;
+		pkg.tx_oob.s_oob.comp_tcp_csum = 1;
+		pkg.tx_oob.s_oob.trans_off = skb_transport_offset(skb);
+
+		pkg.wqe_req.client_data_unit = skb_shinfo(skb)->gso_size;
+		pkg.wqe_req.flags = GDMA_WR_OOB_IN_SGL | GDMA_WR_PAD_BY_SGE0;
+		if (ipv4) {
+			ip_hdr(skb)->tot_len = 0;
+			ip_hdr(skb)->check = 0;
+			tcp_hdr(skb)->check =
+				~csum_tcpudp_magic(ip_hdr(skb)->saddr,
+						   ip_hdr(skb)->daddr, 0,
+						   IPPROTO_TCP, 0);
+		} else {
+			ipv6_hdr(skb)->payload_len = 0;
+			tcp_hdr(skb)->check =
+				~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+						 &ipv6_hdr(skb)->daddr, 0,
+						 IPPROTO_TCP, 0);
+		}
+	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		csum_type = mana_checksum_info(skb);
+
+		if (csum_type == IPPROTO_TCP) {
+			pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4;
+			pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6;
+
+			pkg.tx_oob.s_oob.comp_tcp_csum = 1;
+			pkg.tx_oob.s_oob.trans_off = skb_transport_offset(skb);
+
+		} else if (csum_type == IPPROTO_UDP) {
+			pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4;
+			pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6;
+
+			pkg.tx_oob.s_oob.comp_udp_csum = 1;
+		} else {
+			/* Can't do offload of this type of checksum */
+			if (skb_checksum_help(skb))
+				goto free_sgl_ptr;
+		}
+	}
+
+	if (mana_map_skb(skb, apc, &pkg))
+		goto free_sgl_ptr;
+
+	skb_queue_tail(&txq->pending_skbs, skb);
+
+	len = skb->len;
+	net_txq = netdev_get_tx_queue(ndev, txq_idx);
+
+	err = mana_gd_post_work_request(gdma_sq, &pkg.wqe_req,
+					(struct gdma_posted_wqe_info *)skb->cb);
+	if (!mana_can_tx(gdma_sq)) {
+		netif_tx_stop_queue(net_txq);
+		apc->eth_stats.stop_queue++;
+	}
+
+	if (err) {
+		(void)skb_dequeue_tail(&txq->pending_skbs);
+		netdev_warn(ndev, "Failed to post TX OOB: %d\n", err);
+		err = NETDEV_TX_BUSY;
+		goto tx_busy;
+	}
+
+	err = NETDEV_TX_OK;
+	atomic_inc(&txq->pending_sends);
+
+	mana_gd_wq_ring_doorbell(gd->gdma_context, gdma_sq);
+
+	/* skb may be freed after mana_gd_post_work_request. Do not use it. */
+	skb = NULL;
+
+	tx_stats = &txq->stats;
+	u64_stats_update_begin(&tx_stats->syncp);
+	tx_stats->packets++;
+	tx_stats->bytes += len;
+	u64_stats_update_end(&tx_stats->syncp);
+
+tx_busy:
+	if (netif_tx_queue_stopped(net_txq) && mana_can_tx(gdma_sq)) {
+		netif_tx_wake_queue(net_txq);
+		apc->eth_stats.wake_queue++;
+	}
+
+	kfree(pkg.sgl_ptr);
+	return err;
+
+free_sgl_ptr:
+	kfree(pkg.sgl_ptr);
+tx_drop_count:
+	ndev->stats.tx_dropped++;
+tx_drop:
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_OK;
+}
+
+static void mana_get_stats64(struct net_device *ndev,
+			     struct rtnl_link_stats64 *st)
+{
+	struct mana_port_context *apc = netdev_priv(ndev);
+	unsigned int num_queues = apc->num_queues;
+	struct mana_stats *stats;
+	unsigned int start;
+	u64 packets, bytes;
+	int q;
+
+	if (!apc->port_is_up)
+		return;
+
+	netdev_stats_to_stats64(st, &ndev->stats);
+
+	for (q = 0; q < num_queues; q++) {
+		stats = &apc->rxqs[q]->stats;
+
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			packets = stats->packets;
+			bytes = stats->bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+		st->rx_packets += packets;
+		st->rx_bytes += bytes;
+	}
+
+	for (q = 0; q < num_queues; q++) {
+		stats = &apc->tx_qp[q].txq.stats;
+
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			packets = stats->packets;
+			bytes = stats->bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+		st->tx_packets += packets;
+		st->tx_bytes += bytes;
+	}
+}
+
+static int mana_get_tx_queue(struct net_device *ndev, struct sk_buff *skb,
+			     int old_q)
+{
+	struct mana_port_context *apc = netdev_priv(ndev);
+	u32 hash = skb_get_hash(skb);
+	struct sock *sk = skb->sk;
+	int txq;
+
+	txq = apc->indir_table[hash & MANA_INDIRECT_TABLE_MASK];
+
+	if (txq != old_q && sk && sk_fullsock(sk) &&
+	    rcu_access_pointer(sk->sk_dst_cache))
+		sk_tx_queue_set(sk, txq);
+
+	return txq;
+}
+
+static u16 mana_select_queue(struct net_device *ndev, struct sk_buff *skb,
+			     struct net_device *sb_dev)
+{
+	int txq;
+
+	if (ndev->real_num_tx_queues == 1)
+		return 0;
+
+	txq = sk_tx_queue_get(skb->sk);
+
+	if (txq < 0 || skb->ooo_okay || txq >= ndev->real_num_tx_queues) {
+		if (skb_rx_queue_recorded(skb))
+			txq = skb_get_rx_queue(skb);
+		else
+			txq = mana_get_tx_queue(ndev, skb, txq);
+	}
+
+	return txq;
+}
+
+static const struct net_device_ops mana_devops = {
+	.ndo_open		= mana_open,
+	.ndo_stop		= mana_close,
+	.ndo_select_queue	= mana_select_queue,
+	.ndo_start_xmit		= mana_start_xmit,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_get_stats64	= mana_get_stats64,
+};
+
+static void mana_cleanup_port_context(struct mana_port_context *apc)
+{
+	kfree(apc->rxqs);
+	apc->rxqs = NULL;
+}
+
+static int mana_init_port_context(struct mana_port_context *apc)
+{
+	apc->rxqs = kcalloc(apc->num_queues, sizeof(struct mana_rxq *),
+			    GFP_KERNEL);
+
+	return !apc->rxqs ? -ENOMEM : 0;
+}
+
+static int mana_send_request(struct mana_context *ac, void *in_buf,
+			     u32 in_len, void *out_buf, u32 out_len)
+{
+	struct gdma_context *gc = ac->gdma_dev->gdma_context;
+	struct gdma_resp_hdr *resp = out_buf;
+	struct gdma_req_hdr *req = in_buf;
+	struct device *dev = gc->dev;
+	static atomic_t activity_id;
+	int err;
+
+	req->dev_id = gc->mana.dev_id;
+	req->activity_id = atomic_inc_return(&activity_id);
+
+	err = mana_gd_send_request(gc, in_len, in_buf, out_len,
+				   out_buf);
+	if (err || resp->status) {
+		dev_err(dev, "Failed to send mana message: %d, 0x%x\n",
+			err, resp->status);
+		return err ? err : -EPROTO;
+	}
+
+	if (req->dev_id.as_uint32 != resp->dev_id.as_uint32 ||
+	    req->activity_id != resp->activity_id) {
+		dev_err(dev, "Unexpected mana message response: %x,%x,%x,%x\n",
+			req->dev_id.as_uint32, resp->dev_id.as_uint32,
+			req->activity_id, resp->activity_id);
+		return -EPROTO;
+	}
+
+	return 0;
+}
+
+static int mana_verify_resp_hdr(const struct gdma_resp_hdr *resp_hdr,
+				const enum mana_command_code expected_code,
+				const u32 min_size)
+{
+	if (resp_hdr->response.msg_type != expected_code)
+		return -EPROTO;
+
+	if (resp_hdr->response.msg_version < GDMA_MESSAGE_V1)
+		return -EPROTO;
+
+	if (resp_hdr->response.msg_size < min_size)
+		return -EPROTO;
+
+	return 0;
+}
+
+static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver,
+				 u32 proto_minor_ver, u32 proto_micro_ver,
+				 u16 *max_num_vports)
+{
+	struct gdma_context *gc = ac->gdma_dev->gdma_context;
+	struct mana_query_device_cfg_resp resp = {};
+	struct mana_query_device_cfg_req req = {};
+	struct device *dev = gc->dev;
+	int err = 0;
+
+	mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_DEV_CONFIG,
+			     sizeof(req), sizeof(resp));
+	req.proto_major_ver = proto_major_ver;
+	req.proto_minor_ver = proto_minor_ver;
+	req.proto_micro_ver = proto_micro_ver;
+
+	err = mana_send_request(ac, &req, sizeof(req), &resp, sizeof(resp));
+	if (err) {
+		dev_err(dev, "Failed to query config: %d", err);
+		return err;
+	}
+
+	err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_DEV_CONFIG,
+				   sizeof(resp));
+	if (err || resp.hdr.status) {
+		dev_err(dev, "Invalid query result: %d, 0x%x\n", err,
+			resp.hdr.status);
+		if (!err)
+			err = -EPROTO;
+		return err;
+	}
+
+	*max_num_vports = resp.max_num_vports;
+
+	return 0;
+}
+
+static int mana_query_vport_cfg(struct mana_port_context *apc, u32 vport_index,
+				u32 *max_sq, u32 *max_rq, u32 *num_indir_entry)
+{
+	struct mana_query_vport_cfg_resp resp = {};
+	struct mana_query_vport_cfg_req req = {};
+	int err;
+
+	mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_VPORT_CONFIG,
+			     sizeof(req), sizeof(resp));
+
+	req.vport_index = vport_index;
+
+	err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
+				sizeof(resp));
+	if (err)
+		return err;
+
+	err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_VPORT_CONFIG,
+				   sizeof(resp));
+	if (err)
+		return err;
+
+	if (resp.hdr.status)
+		return -EPROTO;
+
+	*max_sq = resp.max_num_sq;
+	*max_rq = resp.max_num_rq;
+	*num_indir_entry = resp.num_indirection_ent;
+
+	apc->port_handle = resp.vport;
+	ether_addr_copy(apc->mac_addr, resp.mac_addr);
+
+	return 0;
+}
+
+static int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id,
+			  u32 doorbell_pg_id)
+{
+	struct mana_config_vport_resp resp = {};
+	struct mana_config_vport_req req = {};
+	int err;
+
+	mana_gd_init_req_hdr(&req.hdr, MANA_CONFIG_VPORT_TX,
+			     sizeof(req), sizeof(resp));
+	req.vport = apc->port_handle;
+	req.pdid = protection_dom_id;
+	req.doorbell_pageid = doorbell_pg_id;
+
+	err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
+				sizeof(resp));
+	if (err) {
+		netdev_err(apc->ndev, "Failed to configure vPort: %d\n", err);
+		goto out;
+	}
+
+	err = mana_verify_resp_hdr(&resp.hdr, MANA_CONFIG_VPORT_TX,
+				   sizeof(resp));
+	if (err || resp.hdr.status) {
+		netdev_err(apc->ndev, "Failed to configure vPort: %d, 0x%x\n",
+			   err, resp.hdr.status);
+		if (!err)
+			err = -EPROTO;
+
+		goto out;
+	}
+
+	apc->tx_shortform_allowed = resp.short_form_allowed;
+	apc->tx_vp_offset = resp.tx_vport_offset;
+out:
+	return err;
+}
+
+static int mana_cfg_vport_steering(struct mana_port_context *apc,
+				   enum TRI_STATE rx,
+				   bool update_default_rxobj, bool update_key,
+				   bool update_tab)
+{
+	u16 num_entries = MANA_INDIRECT_TABLE_SIZE;
+	struct mana_cfg_rx_steer_req *req = NULL;
+	struct mana_cfg_rx_steer_resp resp = {};
+	struct net_device *ndev = apc->ndev;
+	mana_handle_t *req_indir_tab;
+	u32 req_buf_size;
+	int err;
+
+	req_buf_size = sizeof(*req) + sizeof(mana_handle_t) * num_entries;
+	req = kzalloc(req_buf_size, GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size,
+			     sizeof(resp));
+
+	req->vport = apc->port_handle;
+	req->num_indir_entries = num_entries;
+	req->indir_tab_offset = sizeof(*req);
+	req->rx_enable = rx;
+	req->rss_enable = apc->rss_state;
+	req->update_default_rxobj = update_default_rxobj;
+	req->update_hashkey = update_key;
+	req->update_indir_tab = update_tab;
+	req->default_rxobj = apc->default_rxobj;
+
+	if (update_key)
+		memcpy(&req->hashkey, apc->hashkey, MANA_HASH_KEY_SIZE);
+
+	if (update_tab) {
+		req_indir_tab = (mana_handle_t *)(req + 1);
+		memcpy(req_indir_tab, apc->rxobj_table,
+		       req->num_indir_entries * sizeof(mana_handle_t));
+	}
+
+	err = mana_send_request(apc->ac, req, req_buf_size, &resp,
+				sizeof(resp));
+	if (err) {
+		netdev_err(ndev, "Failed to configure vPort RX: %d\n", err);
+		goto out;
+	}
+
+	err = mana_verify_resp_hdr(&resp.hdr, MANA_CONFIG_VPORT_RX,
+				   sizeof(resp));
+	if (err) {
+		netdev_err(ndev, "vPort RX configuration failed: %d\n", err);
+		goto out;
+	}
+
+	if (resp.hdr.status) {
+		netdev_err(ndev, "vPort RX configuration failed: 0x%x\n",
+			   resp.hdr.status);
+		err = -EPROTO;
+	}
+out:
+	kfree(req);
+	return err;
+}
+
+static int mana_create_wq_obj(struct mana_port_context *apc,
+			      mana_handle_t vport,
+			      u32 wq_type, struct mana_obj_spec *wq_spec,
+			      struct mana_obj_spec *cq_spec,
+			      mana_handle_t *wq_obj)
+{
+	struct mana_create_wqobj_resp resp = {};
+	struct mana_create_wqobj_req req = {};
+	struct net_device *ndev = apc->ndev;
+	int err;
+
+	mana_gd_init_req_hdr(&req.hdr, MANA_CREATE_WQ_OBJ,
+			     sizeof(req), sizeof(resp));
+	req.vport = vport;
+	req.wq_type = wq_type;
+	req.wq_gdma_region = wq_spec->gdma_region;
+	req.cq_gdma_region = cq_spec->gdma_region;
+	req.wq_size = wq_spec->queue_size;
+	req.cq_size = cq_spec->queue_size;
+	req.cq_moderation_ctx_id = cq_spec->modr_ctx_id;
+	req.cq_parent_qid = cq_spec->attached_eq;
+
+	err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
+				sizeof(resp));
+	if (err) {
+		netdev_err(ndev, "Failed to create WQ object: %d\n", err);
+		goto out;
+	}
+
+	err = mana_verify_resp_hdr(&resp.hdr, MANA_CREATE_WQ_OBJ,
+				   sizeof(resp));
+	if (err || resp.hdr.status) {
+		netdev_err(ndev, "Failed to create WQ object: %d, 0x%x\n", err,
+			   resp.hdr.status);
+		if (!err)
+			err = -EPROTO;
+		goto out;
+	}
+
+	if (resp.wq_obj == INVALID_MANA_HANDLE) {
+		netdev_err(ndev, "Got an invalid WQ object handle\n");
+		err = -EPROTO;
+		goto out;
+	}
+
+	*wq_obj = resp.wq_obj;
+	wq_spec->queue_index = resp.wq_id;
+	cq_spec->queue_index = resp.cq_id;
+
+	return 0;
+out:
+	return err;
+}
+
+static void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type,
+				mana_handle_t wq_obj)
+{
+	struct mana_destroy_wqobj_resp resp = {};
+	struct mana_destroy_wqobj_req req = {};
+	struct net_device *ndev = apc->ndev;
+	int err;
+
+	mana_gd_init_req_hdr(&req.hdr, MANA_DESTROY_WQ_OBJ,
+			     sizeof(req), sizeof(resp));
+	req.wq_type = wq_type;
+	req.wq_obj_handle = wq_obj;
+
+	err = mana_send_request(apc->ac, &req, sizeof(req), &resp,
+				sizeof(resp));
+	if (err) {
+		netdev_err(ndev, "Failed to destroy WQ object: %d\n", err);
+		return;
+	}
+
+	err = mana_verify_resp_hdr(&resp.hdr, MANA_DESTROY_WQ_OBJ,
+				   sizeof(resp));
+	if (err || resp.hdr.status)
+		netdev_err(ndev, "Failed to destroy WQ object: %d, 0x%x\n", err,
+			   resp.hdr.status);
+}
+
+static void mana_init_cqe_poll_buf(struct gdma_comp *cqe_poll_buf)
+{
+	int i;
+
+	for (i = 0; i < CQE_POLLING_BUFFER; i++)
+		memset(&cqe_poll_buf[i], 0, sizeof(struct gdma_comp));
+}
+
+static void mana_destroy_eq(struct gdma_context *gc,
+			    struct mana_port_context *apc)
+{
+	struct gdma_queue *eq;
+	int i;
+
+	if (!apc->eqs)
+		return;
+
+	for (i = 0; i < apc->num_queues; i++) {
+		eq = apc->eqs[i].eq;
+		if (!eq)
+			continue;
+
+		mana_gd_destroy_queue(gc, eq);
+	}
+
+	kfree(apc->eqs);
+	apc->eqs = NULL;
+}
+
+static int mana_create_eq(struct mana_port_context *apc)
+{
+	struct gdma_dev *gd = apc->ac->gdma_dev;
+	struct gdma_queue_spec spec = {};
+	int err;
+	int i;
+
+	apc->eqs = kcalloc(apc->num_queues, sizeof(struct mana_eq),
+			   GFP_KERNEL);
+	if (!apc->eqs)
+		return -ENOMEM;
+
+	spec.type = GDMA_EQ;
+	spec.monitor_avl_buf = false;
+	spec.queue_size = EQ_SIZE;
+	spec.eq.callback = NULL;
+	spec.eq.context = apc->eqs;
+	spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE;
+	spec.eq.ndev = apc->ndev;
+
+	for (i = 0; i < apc->num_queues; i++) {
+		mana_init_cqe_poll_buf(apc->eqs[i].cqe_poll);
+
+		err = mana_gd_create_mana_eq(gd, &spec, &apc->eqs[i].eq);
+		if (err)
+			goto out;
+	}
+
+	return 0;
+out:
+	mana_destroy_eq(gd->gdma_context, apc);
+	return err;
+}
+
+static int mana_move_wq_tail(struct gdma_queue *wq, u32 num_units)
+{
+	u32 used_space_old;
+	u32 used_space_new;
+
+	used_space_old = wq->head - wq->tail;
+	used_space_new = wq->head - (wq->tail + num_units);
+
+	if (WARN_ON_ONCE(used_space_new > used_space_old))
+		return -ERANGE;
+
+	wq->tail += num_units;
+	return 0;
+}
+
+static void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc)
+{
+	struct mana_skb_head *ash = (struct mana_skb_head *)skb->head;
+	struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
+	struct device *dev = gc->dev;
+	int i;
+
+	dma_unmap_single(dev, ash->dma_handle[0], ash->size[0], DMA_TO_DEVICE);
+
+	for (i = 1; i < skb_shinfo(skb)->nr_frags + 1; i++)
+		dma_unmap_page(dev, ash->dma_handle[i], ash->size[i],
+			       DMA_TO_DEVICE);
+}
+
+static void mana_poll_tx_cq(struct mana_cq *cq)
+{
+	struct gdma_queue *gdma_eq = cq->gdma_cq->cq.parent;
+	struct gdma_comp *completions = cq->gdma_comp_buf;
+	struct gdma_posted_wqe_info *wqe_info;
+	unsigned int pkt_transmitted = 0;
+	unsigned int wqe_unit_cnt = 0;
+	struct mana_txq *txq = cq->txq;
+	struct mana_port_context *apc;
+	struct netdev_queue *net_txq;
+	struct gdma_queue *gdma_wq;
+	unsigned int avail_space;
+	struct net_device *ndev;
+	struct sk_buff *skb;
+	bool txq_stopped;
+	int comp_read;
+	int i;
+
+	ndev = txq->ndev;
+	apc = netdev_priv(ndev);
+
+	comp_read = mana_gd_poll_cq(cq->gdma_cq, completions,
+				    CQE_POLLING_BUFFER);
+
+	for (i = 0; i < comp_read; i++) {
+		struct mana_tx_comp_oob *cqe_oob;
+
+		if (WARN_ON_ONCE(!completions[i].is_sq))
+			return;
+
+		cqe_oob = (struct mana_tx_comp_oob *)completions[i].cqe_data;
+		if (WARN_ON_ONCE(cqe_oob->cqe_hdr.client_type !=
+				 MANA_CQE_COMPLETION))
+			return;
+
+		switch (cqe_oob->cqe_hdr.cqe_type) {
+		case CQE_TX_OKAY:
+			break;
+
+		case CQE_TX_SA_DROP:
+		case CQE_TX_MTU_DROP:
+		case CQE_TX_INVALID_OOB:
+		case CQE_TX_INVALID_ETH_TYPE:
+		case CQE_TX_HDR_PROCESSING_ERROR:
+		case CQE_TX_VF_DISABLED:
+		case CQE_TX_VPORT_IDX_OUT_OF_RANGE:
+		case CQE_TX_VPORT_DISABLED:
+		case CQE_TX_VLAN_TAGGING_VIOLATION:
+			WARN_ONCE(1, "TX: CQE error %d: ignored.\n",
+				  cqe_oob->cqe_hdr.cqe_type);
+			break;
+
+		default:
+			/* If the CQE type is unexpected, log an error, assert,
+			 * and go through the error path.
+			 */
+			WARN_ONCE(1, "TX: Unexpected CQE type %d: HW BUG?\n",
+				  cqe_oob->cqe_hdr.cqe_type);
+			return;
+		}
+
+		if (WARN_ON_ONCE(txq->gdma_txq_id != completions[i].wq_num))
+			return;
+
+		skb = skb_dequeue(&txq->pending_skbs);
+		if (WARN_ON_ONCE(!skb))
+			return;
+
+		wqe_info = (struct gdma_posted_wqe_info *)skb->cb;
+		wqe_unit_cnt += wqe_info->wqe_size_in_bu;
+
+		mana_unmap_skb(skb, apc);
+
+		napi_consume_skb(skb, gdma_eq->eq.budget);
+
+		pkt_transmitted++;
+	}
+
+	if (WARN_ON_ONCE(wqe_unit_cnt == 0))
+		return;
+
+	mana_move_wq_tail(txq->gdma_sq, wqe_unit_cnt);
+
+	gdma_wq = txq->gdma_sq;
+	avail_space = mana_gd_wq_avail_space(gdma_wq);
+
+	/* Ensure tail updated before checking q stop */
+	smp_mb();
+
+	net_txq = txq->net_txq;
+	txq_stopped = netif_tx_queue_stopped(net_txq);
+
+	/* Ensure checking txq_stopped before apc->port_is_up. */
+	smp_rmb();
+
+	if (txq_stopped && apc->port_is_up && avail_space >= MAX_TX_WQE_SIZE) {
+		netif_tx_wake_queue(net_txq);
+		apc->eth_stats.wake_queue++;
+	}
+
+	if (atomic_sub_return(pkt_transmitted, &txq->pending_sends) < 0)
+		WARN_ON_ONCE(1);
+}
+
+static void mana_post_pkt_rxq(struct mana_rxq *rxq)
+{
+	struct mana_recv_buf_oob *recv_buf_oob;
+	u32 curr_index;
+	int err;
+
+	curr_index = rxq->buf_index++;
+	if (rxq->buf_index == rxq->num_rx_buf)
+		rxq->buf_index = 0;
+
+	recv_buf_oob = &rxq->rx_oobs[curr_index];
+
+	err = mana_gd_post_and_ring(rxq->gdma_rq, &recv_buf_oob->wqe_req,
+				    &recv_buf_oob->wqe_inf);
+	if (WARN_ON_ONCE(err))
+		return;
+
+	WARN_ON_ONCE(recv_buf_oob->wqe_inf.wqe_size_in_bu != 1);
+}
+
+static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
+			struct mana_rxq *rxq)
+{
+	struct mana_stats *rx_stats = &rxq->stats;
+	struct net_device *ndev = rxq->ndev;
+	uint pkt_len = cqe->ppi[0].pkt_len;
+	struct mana_port_context *apc;
+	u16 rxq_idx = rxq->rxq_idx;
+	struct napi_struct *napi;
+	struct gdma_queue *eq;
+	struct sk_buff *skb;
+	u32 hash_value;
+
+	apc = netdev_priv(ndev);
+	eq = apc->eqs[rxq_idx].eq;
+	eq->eq.work_done++;
+	napi = &eq->eq.napi;
+
+	if (!buf_va) {
+		++ndev->stats.rx_dropped;
+		return;
+	}
+
+	skb = build_skb(buf_va, PAGE_SIZE);
+
+	if (!skb) {
+		free_page((unsigned long)buf_va);
+		++ndev->stats.rx_dropped;
+		return;
+	}
+
+	skb_put(skb, pkt_len);
+	skb->dev = napi->dev;
+
+	skb->protocol = eth_type_trans(skb, ndev);
+	skb_checksum_none_assert(skb);
+	skb_record_rx_queue(skb, rxq_idx);
+
+	if ((ndev->features & NETIF_F_RXCSUM) && cqe->rx_iphdr_csum_succeed) {
+		if (cqe->rx_tcp_csum_succeed || cqe->rx_udp_csum_succeed)
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+
+	if (cqe->rx_hashtype != 0 && (ndev->features & NETIF_F_RXHASH)) {
+		hash_value = cqe->ppi[0].pkt_hash;
+
+		if (cqe->rx_hashtype & MANA_HASH_L4)
+			skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L4);
+		else
+			skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L3);
+	}
+
+	napi_gro_receive(napi, skb);
+
+	u64_stats_update_begin(&rx_stats->syncp);
+	rx_stats->packets++;
+	rx_stats->bytes += pkt_len;
+	u64_stats_update_end(&rx_stats->syncp);
+}
+
+static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
+				struct gdma_comp *cqe)
+{
+	struct mana_rxcomp_oob *oob = (struct mana_rxcomp_oob *)cqe->cqe_data;
+	struct gdma_context *gc = rxq->gdma_rq->gdma_dev->gdma_context;
+	struct net_device *ndev = rxq->ndev;
+	struct mana_recv_buf_oob *rxbuf_oob;
+	struct device *dev = gc->dev;
+	void *new_buf, *old_buf;
+	struct page *new_page;
+	u32 curr, pktlen;
+	dma_addr_t da;
+
+	switch (oob->cqe_hdr.cqe_type) {
+	case CQE_RX_OKAY:
+		break;
+
+	case CQE_RX_TRUNCATED:
+		netdev_err(ndev, "Dropped a truncated packet\n");
+		return;
+
+	case CQE_RX_COALESCED_4:
+		netdev_err(ndev, "RX coalescing is unsupported\n");
+		return;
+
+	case CQE_RX_OBJECT_FENCE:
+		netdev_err(ndev, "RX Fencing is unsupported\n");
+		return;
+
+	default:
+		netdev_err(ndev, "Unknown RX CQE type = %d\n",
+			   oob->cqe_hdr.cqe_type);
+		return;
+	}
+
+	if (oob->cqe_hdr.cqe_type != CQE_RX_OKAY)
+		return;
+
+	pktlen = oob->ppi[0].pkt_len;
+
+	if (pktlen == 0) {
+		/* data packets should never have packetlength of zero */
+		netdev_err(ndev, "RX pkt len=0, rq=%u, cq=%u, rxobj=0x%llx\n",
+			   rxq->gdma_id, cq->gdma_id, rxq->rxobj);
+		return;
+	}
+
+	curr = rxq->buf_index;
+	rxbuf_oob = &rxq->rx_oobs[curr];
+	WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1);
+
+	new_page = alloc_page(GFP_ATOMIC);
+
+	if (new_page) {
+		da = dma_map_page(dev, new_page, 0, rxq->datasize,
+				  DMA_FROM_DEVICE);
+
+		if (dma_mapping_error(dev, da)) {
+			__free_page(new_page);
+			new_page = NULL;
+		}
+	}
+
+	new_buf = new_page ? page_to_virt(new_page) : NULL;
+
+	if (new_buf) {
+		dma_unmap_page(dev, rxbuf_oob->buf_dma_addr, rxq->datasize,
+			       DMA_FROM_DEVICE);
+
+		old_buf = rxbuf_oob->buf_va;
+
+		/* refresh the rxbuf_oob with the new page */
+		rxbuf_oob->buf_va = new_buf;
+		rxbuf_oob->buf_dma_addr = da;
+		rxbuf_oob->sgl[0].address = rxbuf_oob->buf_dma_addr;
+	} else {
+		old_buf = NULL; /* drop the packet if no memory */
+	}
+
+	mana_rx_skb(old_buf, oob, rxq);
+
+	mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu);
+
+	mana_post_pkt_rxq(rxq);
+}
+
+static void mana_poll_rx_cq(struct mana_cq *cq)
+{
+	struct gdma_comp *comp = cq->gdma_comp_buf;
+	u32 comp_read, i;
+
+	comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER);
+	WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER);
+
+	for (i = 0; i < comp_read; i++) {
+		if (WARN_ON_ONCE(comp[i].is_sq))
+			return;
+
+		/* verify recv cqe references the right rxq */
+		if (WARN_ON_ONCE(comp[i].wq_num != cq->rxq->gdma_id))
+			return;
+
+		mana_process_rx_cqe(cq->rxq, cq, &comp[i]);
+	}
+}
+
+static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
+{
+	struct mana_cq *cq = context;
+
+	WARN_ON_ONCE(cq->gdma_cq != gdma_queue);
+
+	if (cq->type == MANA_CQ_TYPE_RX)
+		mana_poll_rx_cq(cq);
+	else
+		mana_poll_tx_cq(cq);
+
+	mana_gd_arm_cq(gdma_queue);
+}
+
+static void mana_deinit_cq(struct mana_port_context *apc, struct mana_cq *cq)
+{
+	struct gdma_dev *gd = apc->ac->gdma_dev;
+
+	if (!cq->gdma_cq)
+		return;
+
+	mana_gd_destroy_queue(gd->gdma_context, cq->gdma_cq);
+}
+
+static void mana_deinit_txq(struct mana_port_context *apc, struct mana_txq *txq)
+{
+	struct gdma_dev *gd = apc->ac->gdma_dev;
+
+	if (!txq->gdma_sq)
+		return;
+
+	mana_gd_destroy_queue(gd->gdma_context, txq->gdma_sq);
+}
+
+static void mana_destroy_txq(struct mana_port_context *apc)
+{
+	int i;
+
+	if (!apc->tx_qp)
+		return;
+
+	for (i = 0; i < apc->num_queues; i++) {
+		mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object);
+
+		mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq);
+
+		mana_deinit_txq(apc, &apc->tx_qp[i].txq);
+	}
+
+	kfree(apc->tx_qp);
+	apc->tx_qp = NULL;
+}
+
+static int mana_create_txq(struct mana_port_context *apc,
+			   struct net_device *net)
+{
+	struct gdma_dev *gd = apc->ac->gdma_dev;
+	struct mana_obj_spec wq_spec;
+	struct mana_obj_spec cq_spec;
+	struct gdma_queue_spec spec;
+	struct gdma_context *gc;
+	struct mana_txq *txq;
+	struct mana_cq *cq;
+	u32 txq_size;
+	u32 cq_size;
+	int err;
+	int i;
+
+	apc->tx_qp = kcalloc(apc->num_queues, sizeof(struct mana_tx_qp),
+			     GFP_KERNEL);
+	if (!apc->tx_qp)
+		return -ENOMEM;
+
+	/*  The minimum size of the WQE is 32 bytes, hence
+	 *  MAX_SEND_BUFFERS_PER_QUEUE represents the maximum number of WQEs
+	 *  the SQ can store. This value is then used to size other queues
+	 *  to prevent overflow.
+	 */
+	txq_size = MAX_SEND_BUFFERS_PER_QUEUE * 32;
+	BUILD_BUG_ON(!PAGE_ALIGNED(txq_size));
+
+	cq_size = MAX_SEND_BUFFERS_PER_QUEUE * COMP_ENTRY_SIZE;
+	cq_size = PAGE_ALIGN(cq_size);
+
+	gc = gd->gdma_context;
+
+	for (i = 0; i < apc->num_queues; i++) {
+		apc->tx_qp[i].tx_object = INVALID_MANA_HANDLE;
+
+		/* Create SQ */
+		txq = &apc->tx_qp[i].txq;
+
+		u64_stats_init(&txq->stats.syncp);
+		txq->ndev = net;
+		txq->net_txq = netdev_get_tx_queue(net, i);
+		txq->vp_offset = apc->tx_vp_offset;
+		skb_queue_head_init(&txq->pending_skbs);
+
+		memset(&spec, 0, sizeof(spec));
+		spec.type = GDMA_SQ;
+		spec.monitor_avl_buf = true;
+		spec.queue_size = txq_size;
+		err = mana_gd_create_mana_wq_cq(gd, &spec, &txq->gdma_sq);
+		if (err)
+			goto out;
+
+		/* Create SQ's CQ */
+		cq = &apc->tx_qp[i].tx_cq;
+		cq->gdma_comp_buf = apc->eqs[i].cqe_poll;
+		cq->type = MANA_CQ_TYPE_TX;
+
+		cq->txq = txq;
+
+		memset(&spec, 0, sizeof(spec));
+		spec.type = GDMA_CQ;
+		spec.monitor_avl_buf = false;
+		spec.queue_size = cq_size;
+		spec.cq.callback = mana_cq_handler;
+		spec.cq.parent_eq = apc->eqs[i].eq;
+		spec.cq.context = cq;
+		err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq);
+		if (err)
+			goto out;
+
+		memset(&wq_spec, 0, sizeof(wq_spec));
+		memset(&cq_spec, 0, sizeof(cq_spec));
+
+		wq_spec.gdma_region = txq->gdma_sq->mem_info.gdma_region;
+		wq_spec.queue_size = txq->gdma_sq->queue_size;
+
+		cq_spec.gdma_region = cq->gdma_cq->mem_info.gdma_region;
+		cq_spec.queue_size = cq->gdma_cq->queue_size;
+		cq_spec.modr_ctx_id = 0;
+		cq_spec.attached_eq = cq->gdma_cq->cq.parent->id;
+
+		err = mana_create_wq_obj(apc, apc->port_handle, GDMA_SQ,
+					 &wq_spec, &cq_spec,
+					 &apc->tx_qp[i].tx_object);
+
+		if (err)
+			goto out;
+
+		txq->gdma_sq->id = wq_spec.queue_index;
+		cq->gdma_cq->id = cq_spec.queue_index;
+
+		txq->gdma_sq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION;
+		cq->gdma_cq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION;
+
+		txq->gdma_txq_id = txq->gdma_sq->id;
+
+		cq->gdma_id = cq->gdma_cq->id;
+
+		if (WARN_ON(cq->gdma_id >= gc->max_num_cqs))
+			return -EINVAL;
+
+		gc->cq_table[cq->gdma_id] = cq->gdma_cq;
+
+		mana_gd_arm_cq(cq->gdma_cq);
+	}
+
+	return 0;
+out:
+	mana_destroy_txq(apc);
+	return err;
+}
+
+static void mana_napi_sync_for_rx(struct mana_rxq *rxq)
+{
+	struct net_device *ndev = rxq->ndev;
+	struct mana_port_context *apc;
+	u16 rxq_idx = rxq->rxq_idx;
+	struct napi_struct *napi;
+	struct gdma_queue *eq;
+
+	apc = netdev_priv(ndev);
+	eq = apc->eqs[rxq_idx].eq;
+	napi = &eq->eq.napi;
+
+	napi_synchronize(napi);
+}
+
+static void mana_destroy_rxq(struct mana_port_context *apc,
+			     struct mana_rxq *rxq, bool validate_state)
+
+{
+	struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
+	struct mana_recv_buf_oob *rx_oob;
+	struct device *dev = gc->dev;
+	int i;
+
+	if (!rxq)
+		return;
+
+	if (validate_state)
+		mana_napi_sync_for_rx(rxq);
+
+	mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj);
+
+	mana_deinit_cq(apc, &rxq->rx_cq);
+
+	for (i = 0; i < rxq->num_rx_buf; i++) {
+		rx_oob = &rxq->rx_oobs[i];
+
+		if (!rx_oob->buf_va)
+			continue;
+
+		dma_unmap_page(dev, rx_oob->buf_dma_addr, rxq->datasize,
+			       DMA_FROM_DEVICE);
+
+		free_page((unsigned long)rx_oob->buf_va);
+		rx_oob->buf_va = NULL;
+	}
+
+	if (rxq->gdma_rq)
+		mana_gd_destroy_queue(gc, rxq->gdma_rq);
+
+	kfree(rxq);
+}
+
+#define MANA_WQE_HEADER_SIZE 16
+#define MANA_WQE_SGE_SIZE 16
+
+static int mana_alloc_rx_wqe(struct mana_port_context *apc,
+			     struct mana_rxq *rxq, u32 *rxq_size, u32 *cq_size)
+{
+	struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
+	struct mana_recv_buf_oob *rx_oob;
+	struct device *dev = gc->dev;
+	struct page *page;
+	dma_addr_t da;
+	u32 buf_idx;
+
+	WARN_ON(rxq->datasize == 0 || rxq->datasize > PAGE_SIZE);
+
+	*rxq_size = 0;
+	*cq_size = 0;
+
+	for (buf_idx = 0; buf_idx < rxq->num_rx_buf; buf_idx++) {
+		rx_oob = &rxq->rx_oobs[buf_idx];
+		memset(rx_oob, 0, sizeof(*rx_oob));
+
+		page = alloc_page(GFP_KERNEL);
+		if (!page)
+			return -ENOMEM;
+
+		da = dma_map_page(dev, page, 0, rxq->datasize, DMA_FROM_DEVICE);
+
+		if (dma_mapping_error(dev, da)) {
+			__free_page(page);
+			return -ENOMEM;
+		}
+
+		rx_oob->buf_va = page_to_virt(page);
+		rx_oob->buf_dma_addr = da;
+
+		rx_oob->num_sge = 1;
+		rx_oob->sgl[0].address = rx_oob->buf_dma_addr;
+		rx_oob->sgl[0].size = rxq->datasize;
+		rx_oob->sgl[0].mem_key = apc->ac->gdma_dev->gpa_mkey;
+
+		rx_oob->wqe_req.sgl = rx_oob->sgl;
+		rx_oob->wqe_req.num_sge = rx_oob->num_sge;
+		rx_oob->wqe_req.inline_oob_size = 0;
+		rx_oob->wqe_req.inline_oob_data = NULL;
+		rx_oob->wqe_req.flags = 0;
+		rx_oob->wqe_req.client_data_unit = 0;
+
+		*rxq_size += ALIGN(MANA_WQE_HEADER_SIZE +
+				   MANA_WQE_SGE_SIZE * rx_oob->num_sge, 32);
+		*cq_size += COMP_ENTRY_SIZE;
+	}
+
+	return 0;
+}
+
+static int mana_push_wqe(struct mana_rxq *rxq)
+{
+	struct mana_recv_buf_oob *rx_oob;
+	u32 buf_idx;
+	int err;
+
+	for (buf_idx = 0; buf_idx < rxq->num_rx_buf; buf_idx++) {
+		rx_oob = &rxq->rx_oobs[buf_idx];
+
+		err = mana_gd_post_and_ring(rxq->gdma_rq, &rx_oob->wqe_req,
+					    &rx_oob->wqe_inf);
+		if (err)
+			return -ENOSPC;
+	}
+
+	return 0;
+}
+
+static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
+					u32 rxq_idx, struct mana_eq *eq,
+					struct net_device *ndev)
+{
+	struct gdma_dev *gd = apc->ac->gdma_dev;
+	struct mana_obj_spec wq_spec;
+	struct mana_obj_spec cq_spec;
+	struct gdma_queue_spec spec;
+	struct mana_cq *cq = NULL;
+	struct gdma_context *gc;
+	u32 cq_size, rq_size;
+	struct mana_rxq *rxq;
+	int err;
+
+	gc = gd->gdma_context;
+
+	rxq = kzalloc(sizeof(*rxq) +
+		      RX_BUFFERS_PER_QUEUE * sizeof(struct mana_recv_buf_oob),
+		      GFP_KERNEL);
+	if (!rxq)
+		return NULL;
+
+	rxq->ndev = ndev;
+	rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE;
+	rxq->rxq_idx = rxq_idx;
+	rxq->datasize = ALIGN(MAX_FRAME_SIZE, 64);
+	rxq->rxobj = INVALID_MANA_HANDLE;
+
+	err = mana_alloc_rx_wqe(apc, rxq, &rq_size, &cq_size);
+	if (err)
+		goto out;
+
+	rq_size = PAGE_ALIGN(rq_size);
+	cq_size = PAGE_ALIGN(cq_size);
+
+	/* Create RQ */
+	memset(&spec, 0, sizeof(spec));
+	spec.type = GDMA_RQ;
+	spec.monitor_avl_buf = true;
+	spec.queue_size = rq_size;
+	err = mana_gd_create_mana_wq_cq(gd, &spec, &rxq->gdma_rq);
+	if (err)
+		goto out;
+
+	/* Create RQ's CQ */
+	cq = &rxq->rx_cq;
+	cq->gdma_comp_buf = eq->cqe_poll;
+	cq->type = MANA_CQ_TYPE_RX;
+	cq->rxq = rxq;
+
+	memset(&spec, 0, sizeof(spec));
+	spec.type = GDMA_CQ;
+	spec.monitor_avl_buf = false;
+	spec.queue_size = cq_size;
+	spec.cq.callback = mana_cq_handler;
+	spec.cq.parent_eq = eq->eq;
+	spec.cq.context = cq;
+	err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq);
+	if (err)
+		goto out;
+
+	memset(&wq_spec, 0, sizeof(wq_spec));
+	memset(&cq_spec, 0, sizeof(cq_spec));
+	wq_spec.gdma_region = rxq->gdma_rq->mem_info.gdma_region;
+	wq_spec.queue_size = rxq->gdma_rq->queue_size;
+
+	cq_spec.gdma_region = cq->gdma_cq->mem_info.gdma_region;
+	cq_spec.queue_size = cq->gdma_cq->queue_size;
+	cq_spec.modr_ctx_id = 0;
+	cq_spec.attached_eq = cq->gdma_cq->cq.parent->id;
+
+	err = mana_create_wq_obj(apc, apc->port_handle, GDMA_RQ,
+				 &wq_spec, &cq_spec, &rxq->rxobj);
+	if (err)
+		goto out;
+
+	rxq->gdma_rq->id = wq_spec.queue_index;
+	cq->gdma_cq->id = cq_spec.queue_index;
+
+	rxq->gdma_rq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION;
+	cq->gdma_cq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION;
+
+	rxq->gdma_id = rxq->gdma_rq->id;
+	cq->gdma_id = cq->gdma_cq->id;
+
+	err = mana_push_wqe(rxq);
+	if (err)
+		goto out;
+
+	if (cq->gdma_id >= gc->max_num_cqs)
+		goto out;
+
+	gc->cq_table[cq->gdma_id] = cq->gdma_cq;
+
+	mana_gd_arm_cq(cq->gdma_cq);
+out:
+	if (!err)
+		return rxq;
+
+	netdev_err(ndev, "Failed to create RXQ: err = %d\n", err);
+
+	mana_destroy_rxq(apc, rxq, false);
+
+	if (cq)
+		mana_deinit_cq(apc, cq);
+
+	return NULL;
+}
+
+static int mana_add_rx_queues(struct mana_port_context *apc,
+			      struct net_device *ndev)
+{
+	struct mana_rxq *rxq;
+	int err = 0;
+	int i;
+
+	for (i = 0; i < apc->num_queues; i++) {
+		rxq = mana_create_rxq(apc, i, &apc->eqs[i], ndev);
+		if (!rxq) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		u64_stats_init(&rxq->stats.syncp);
+
+		apc->rxqs[i] = rxq;
+	}
+
+	apc->default_rxobj = apc->rxqs[0]->rxobj;
+out:
+	return err;
+}
+
+static void mana_destroy_vport(struct mana_port_context *apc)
+{
+	struct mana_rxq *rxq;
+	u32 rxq_idx;
+
+	for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) {
+		rxq = apc->rxqs[rxq_idx];
+		if (!rxq)
+			continue;
+
+		mana_destroy_rxq(apc, rxq, true);
+		apc->rxqs[rxq_idx] = NULL;
+	}
+
+	mana_destroy_txq(apc);
+}
+
+static int mana_create_vport(struct mana_port_context *apc,
+			     struct net_device *net)
+{
+	struct gdma_dev *gd = apc->ac->gdma_dev;
+	int err;
+
+	apc->default_rxobj = INVALID_MANA_HANDLE;
+
+	err = mana_cfg_vport(apc, gd->pdid, gd->doorbell);
+	if (err)
+		return err;
+
+	return mana_create_txq(apc, net);
+}
+
+static void mana_rss_table_init(struct mana_port_context *apc)
+{
+	int i;
+
+	for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++)
+		apc->indir_table[i] =
+			ethtool_rxfh_indir_default(i, apc->num_queues);
+}
+
+int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx,
+		    bool update_hash, bool update_tab)
+{
+	u32 queue_idx;
+	int i;
+
+	if (update_tab) {
+		for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) {
+			queue_idx = apc->indir_table[i];
+			apc->rxobj_table[i] = apc->rxqs[queue_idx]->rxobj;
+		}
+	}
+
+	return mana_cfg_vport_steering(apc, rx, true, update_hash, update_tab);
+}
+
+static int mana_init_port(struct net_device *ndev)
+{
+	struct mana_port_context *apc = netdev_priv(ndev);
+	u32 max_txq, max_rxq, max_queues;
+	int port_idx = apc->port_idx;
+	u32 num_indirect_entries;
+	int err;
+
+	err = mana_init_port_context(apc);
+	if (err)
+		return err;
+
+	err = mana_query_vport_cfg(apc, port_idx, &max_txq, &max_rxq,
+				   &num_indirect_entries);
+	if (err) {
+		netdev_err(ndev, "Failed to query info for vPort 0\n");
+		goto reset_apc;
+	}
+
+	max_queues = min_t(u32, max_txq, max_rxq);
+	if (apc->max_queues > max_queues)
+		apc->max_queues = max_queues;
+
+	if (apc->num_queues > apc->max_queues)
+		apc->num_queues = apc->max_queues;
+
+	ether_addr_copy(ndev->dev_addr, apc->mac_addr);
+
+	return 0;
+
+reset_apc:
+	kfree(apc->rxqs);
+	apc->rxqs = NULL;
+	return err;
+}
+
+int mana_alloc_queues(struct net_device *ndev)
+{
+	struct mana_port_context *apc = netdev_priv(ndev);
+	struct gdma_dev *gd = apc->ac->gdma_dev;
+	int err;
+
+	err = mana_create_eq(apc);
+	if (err)
+		return err;
+
+	err = mana_create_vport(apc, ndev);
+	if (err)
+		goto destroy_eq;
+
+	err = netif_set_real_num_tx_queues(ndev, apc->num_queues);
+	if (err)
+		goto destroy_vport;
+
+	err = mana_add_rx_queues(apc, ndev);
+	if (err)
+		goto destroy_vport;
+
+	apc->rss_state = apc->num_queues > 1 ? TRI_STATE_TRUE : TRI_STATE_FALSE;
+
+	err = netif_set_real_num_rx_queues(ndev, apc->num_queues);
+	if (err)
+		goto destroy_vport;
+
+	mana_rss_table_init(apc);
+
+	err = mana_config_rss(apc, TRI_STATE_TRUE, true, true);
+	if (err)
+		goto destroy_vport;
+
+	return 0;
+
+destroy_vport:
+	mana_destroy_vport(apc);
+destroy_eq:
+	mana_destroy_eq(gd->gdma_context, apc);
+	return err;
+}
+
+int mana_attach(struct net_device *ndev)
+{
+	struct mana_port_context *apc = netdev_priv(ndev);
+	int err;
+
+	ASSERT_RTNL();
+
+	err = mana_init_port(ndev);
+	if (err)
+		return err;
+
+	err = mana_alloc_queues(ndev);
+	if (err) {
+		kfree(apc->rxqs);
+		apc->rxqs = NULL;
+		return err;
+	}
+
+	netif_device_attach(ndev);
+
+	apc->port_is_up = apc->port_st_save;
+
+	/* Ensure port state updated before txq state */
+	smp_wmb();
+
+	if (apc->port_is_up) {
+		netif_carrier_on(ndev);
+		netif_tx_wake_all_queues(ndev);
+	}
+
+	return 0;
+}
+
+static int mana_dealloc_queues(struct net_device *ndev)
+{
+	struct mana_port_context *apc = netdev_priv(ndev);
+	struct mana_txq *txq;
+	int i, err;
+
+	if (apc->port_is_up)
+		return -EINVAL;
+
+	/* No packet can be transmitted now since apc->port_is_up is false.
+	 * There is still a tiny chance that mana_poll_tx_cq() can re-enable
+	 * a txq because it may not timely see apc->port_is_up being cleared
+	 * to false, but it doesn't matter since mana_start_xmit() drops any
+	 * new packets due to apc->port_is_up being false.
+	 *
+	 * Drain all the in-flight TX packets
+	 */
+	for (i = 0; i < apc->num_queues; i++) {
+		txq = &apc->tx_qp[i].txq;
+
+		while (atomic_read(&txq->pending_sends) > 0)
+			usleep_range(1000, 2000);
+	}
+
+	/* We're 100% sure the queues can no longer be woken up, because
+	 * we're sure now mana_poll_tx_cq() can't be running.
+	 */
+
+	apc->rss_state = TRI_STATE_FALSE;
+	err = mana_config_rss(apc, TRI_STATE_FALSE, false, false);
+	if (err) {
+		netdev_err(ndev, "Failed to disable vPort: %d\n", err);
+		return err;
+	}
+
+	/* TODO: Implement RX fencing */
+	ssleep(1);
+
+	mana_destroy_vport(apc);
+
+	mana_destroy_eq(apc->ac->gdma_dev->gdma_context, apc);
+
+	return 0;
+}
+
+int mana_detach(struct net_device *ndev, bool from_close)
+{
+	struct mana_port_context *apc = netdev_priv(ndev);
+	int err;
+
+	ASSERT_RTNL();
+
+	apc->port_st_save = apc->port_is_up;
+	apc->port_is_up = false;
+
+	/* Ensure port state updated before txq state */
+	smp_wmb();
+
+	netif_tx_disable(ndev);
+	netif_carrier_off(ndev);
+
+	if (apc->port_st_save) {
+		err = mana_dealloc_queues(ndev);
+		if (err)
+			return err;
+	}
+
+	if (!from_close) {
+		netif_device_detach(ndev);
+		mana_cleanup_port_context(apc);
+	}
+
+	return 0;
+}
+
+static int mana_probe_port(struct mana_context *ac, int port_idx,
+			   struct net_device **ndev_storage)
+{
+	struct gdma_context *gc = ac->gdma_dev->gdma_context;
+	struct mana_port_context *apc;
+	struct net_device *ndev;
+	int err;
+
+	ndev = alloc_etherdev_mq(sizeof(struct mana_port_context),
+				 gc->max_num_queues);
+	if (!ndev)
+		return -ENOMEM;
+
+	*ndev_storage = ndev;
+
+	apc = netdev_priv(ndev);
+	apc->ac = ac;
+	apc->ndev = ndev;
+	apc->max_queues = gc->max_num_queues;
+	apc->num_queues = min_t(uint, gc->max_num_queues, MANA_MAX_NUM_QUEUES);
+	apc->port_handle = INVALID_MANA_HANDLE;
+	apc->port_idx = port_idx;
+
+	ndev->netdev_ops = &mana_devops;
+	ndev->ethtool_ops = &mana_ethtool_ops;
+	ndev->mtu = ETH_DATA_LEN;
+	ndev->max_mtu = ndev->mtu;
+	ndev->min_mtu = ndev->mtu;
+	ndev->needed_headroom = MANA_HEADROOM;
+	SET_NETDEV_DEV(ndev, gc->dev);
+
+	netif_carrier_off(ndev);
+
+	netdev_rss_key_fill(apc->hashkey, MANA_HASH_KEY_SIZE);
+
+	err = mana_init_port(ndev);
+	if (err)
+		goto free_net;
+
+	netdev_lockdep_set_classes(ndev);
+
+	ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+	ndev->hw_features |= NETIF_F_RXCSUM;
+	ndev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
+	ndev->hw_features |= NETIF_F_RXHASH;
+	ndev->features = ndev->hw_features;
+	ndev->vlan_features = 0;
+
+	err = register_netdev(ndev);
+	if (err) {
+		netdev_err(ndev, "Unable to register netdev.\n");
+		goto reset_apc;
+	}
+
+	return 0;
+
+reset_apc:
+	kfree(apc->rxqs);
+	apc->rxqs = NULL;
+free_net:
+	*ndev_storage = NULL;
+	netdev_err(ndev, "Failed to probe vPort %d: %d\n", port_idx, err);
+	free_netdev(ndev);
+	return err;
+}
+
+int mana_probe(struct gdma_dev *gd)
+{
+	struct gdma_context *gc = gd->gdma_context;
+	struct device *dev = gc->dev;
+	struct mana_context *ac;
+	int err;
+	int i;
+
+	dev_info(dev,
+		 "Microsoft Azure Network Adapter protocol version: %d.%d.%d\n",
+		 MANA_MAJOR_VERSION, MANA_MINOR_VERSION, MANA_MICRO_VERSION);
+
+	err = mana_gd_register_device(gd);
+	if (err)
+		return err;
+
+	ac = kzalloc(sizeof(*ac), GFP_KERNEL);
+	if (!ac)
+		return -ENOMEM;
+
+	ac->gdma_dev = gd;
+	ac->num_ports = 1;
+	gd->driver_data = ac;
+
+	err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION,
+				    MANA_MICRO_VERSION, &ac->num_ports);
+	if (err)
+		goto out;
+
+	if (ac->num_ports > MAX_PORTS_IN_MANA_DEV)
+		ac->num_ports = MAX_PORTS_IN_MANA_DEV;
+
+	for (i = 0; i < ac->num_ports; i++) {
+		err = mana_probe_port(ac, i, &ac->ports[i]);
+		if (err)
+			break;
+	}
+out:
+	if (err)
+		mana_remove(gd);
+
+	return err;
+}
+
+void mana_remove(struct gdma_dev *gd)
+{
+	struct gdma_context *gc = gd->gdma_context;
+	struct mana_context *ac = gd->driver_data;
+	struct device *dev = gc->dev;
+	struct net_device *ndev;
+	int i;
+
+	for (i = 0; i < ac->num_ports; i++) {
+		ndev = ac->ports[i];
+		if (!ndev) {
+			if (i == 0)
+				dev_err(dev, "No net device to remove\n");
+			goto out;
+		}
+
+		/* All cleanup actions should stay after rtnl_lock(), otherwise
+		 * other functions may access partially cleaned up data.
+		 */
+		rtnl_lock();
+
+		mana_detach(ndev, false);
+
+		unregister_netdevice(ndev);
+
+		rtnl_unlock();
+
+		free_netdev(ndev);
+	}
+out:
+	mana_gd_deregister_device(gd);
+	gd->driver_data = NULL;
+	gd->gdma_context = NULL;
+	kfree(ac);
+}
diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
new file mode 100644
index 000000000000..7e74339f39ae
--- /dev/null
+++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
@@ -0,0 +1,250 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/* Copyright (c) 2021, Microsoft Corporation. */
+
+#include <linux/inetdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+
+#include "mana.h"
+
+static const struct {
+	char name[ETH_GSTRING_LEN];
+	u16 offset;
+} mana_eth_stats[] = {
+	{"stop_queue", offsetof(struct mana_ethtool_stats, stop_queue)},
+	{"wake_queue", offsetof(struct mana_ethtool_stats, wake_queue)},
+};
+
+static int mana_get_sset_count(struct net_device *ndev, int stringset)
+{
+	struct mana_port_context *apc = netdev_priv(ndev);
+	unsigned int num_queues = apc->num_queues;
+
+	if (stringset != ETH_SS_STATS)
+		return -EINVAL;
+
+	return ARRAY_SIZE(mana_eth_stats) + num_queues * 4;
+}
+
+static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
+{
+	struct mana_port_context *apc = netdev_priv(ndev);
+	unsigned int num_queues = apc->num_queues;
+	u8 *p = data;
+	int i;
+
+	if (stringset != ETH_SS_STATS)
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(mana_eth_stats); i++) {
+		memcpy(p, mana_eth_stats[i].name, ETH_GSTRING_LEN);
+		p += ETH_GSTRING_LEN;
+	}
+
+	for (i = 0; i < num_queues; i++) {
+		sprintf(p, "rx_%d_packets", i);
+		p += ETH_GSTRING_LEN;
+		sprintf(p, "rx_%d_bytes", i);
+		p += ETH_GSTRING_LEN;
+	}
+
+	for (i = 0; i < num_queues; i++) {
+		sprintf(p, "tx_%d_packets", i);
+		p += ETH_GSTRING_LEN;
+		sprintf(p, "tx_%d_bytes", i);
+		p += ETH_GSTRING_LEN;
+	}
+}
+
+static void mana_get_ethtool_stats(struct net_device *ndev,
+				   struct ethtool_stats *e_stats, u64 *data)
+{
+	struct mana_port_context *apc = netdev_priv(ndev);
+	unsigned int num_queues = apc->num_queues;
+	void *eth_stats = &apc->eth_stats;
+	struct mana_stats *stats;
+	unsigned int start;
+	u64 packets, bytes;
+	int q, i = 0;
+
+	if (!apc->port_is_up)
+		return;
+
+	for (q = 0; q < ARRAY_SIZE(mana_eth_stats); q++)
+		data[i++] = *(u64 *)(eth_stats + mana_eth_stats[q].offset);
+
+	for (q = 0; q < num_queues; q++) {
+		stats = &apc->rxqs[q]->stats;
+
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			packets = stats->packets;
+			bytes = stats->bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+		data[i++] = packets;
+		data[i++] = bytes;
+	}
+
+	for (q = 0; q < num_queues; q++) {
+		stats = &apc->tx_qp[q].txq.stats;
+
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			packets = stats->packets;
+			bytes = stats->bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+		data[i++] = packets;
+		data[i++] = bytes;
+	}
+}
+
+static int mana_get_rxnfc(struct net_device *ndev, struct ethtool_rxnfc *cmd,
+			  u32 *rules)
+{
+	struct mana_port_context *apc = netdev_priv(ndev);
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = apc->num_queues;
+		return 0;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static u32 mana_get_rxfh_key_size(struct net_device *ndev)
+{
+	return MANA_HASH_KEY_SIZE;
+}
+
+static u32 mana_rss_indir_size(struct net_device *ndev)
+{
+	return MANA_INDIRECT_TABLE_SIZE;
+}
+
+static int mana_get_rxfh(struct net_device *ndev, u32 *indir, u8 *key,
+			 u8 *hfunc)
+{
+	struct mana_port_context *apc = netdev_priv(ndev);
+	int i;
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */
+
+	if (indir) {
+		for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++)
+			indir[i] = apc->indir_table[i];
+	}
+
+	if (key)
+		memcpy(key, apc->hashkey, MANA_HASH_KEY_SIZE);
+
+	return 0;
+}
+
+static int mana_set_rxfh(struct net_device *ndev, const u32 *indir,
+			 const u8 *key, const u8 hfunc)
+{
+	struct mana_port_context *apc = netdev_priv(ndev);
+	bool update_hash = false, update_table = false;
+	u32 save_table[MANA_INDIRECT_TABLE_SIZE];
+	u8 save_key[MANA_HASH_KEY_SIZE];
+	int i, err;
+
+	if (!apc->port_is_up)
+		return -EOPNOTSUPP;
+
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
+		return -EOPNOTSUPP;
+
+	if (indir) {
+		for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++)
+			if (indir[i] >= apc->num_queues)
+				return -EINVAL;
+
+		update_table = true;
+		for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) {
+			save_table[i] = apc->indir_table[i];
+			apc->indir_table[i] = indir[i];
+		}
+	}
+
+	if (key) {
+		update_hash = true;
+		memcpy(save_key, apc->hashkey, MANA_HASH_KEY_SIZE);
+		memcpy(apc->hashkey, key, MANA_HASH_KEY_SIZE);
+	}
+
+	err = mana_config_rss(apc, TRI_STATE_TRUE, update_hash, update_table);
+
+	if (err) { /* recover to original values */
+		if (update_table) {
+			for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++)
+				apc->indir_table[i] = save_table[i];
+		}
+
+		if (update_hash)
+			memcpy(apc->hashkey, save_key, MANA_HASH_KEY_SIZE);
+
+		mana_config_rss(apc, TRI_STATE_TRUE, update_hash, update_table);
+	}
+
+	return err;
+}
+
+static void mana_get_channels(struct net_device *ndev,
+			      struct ethtool_channels *channel)
+{
+	struct mana_port_context *apc = netdev_priv(ndev);
+
+	channel->max_combined = apc->max_queues;
+	channel->combined_count = apc->num_queues;
+}
+
+static int mana_set_channels(struct net_device *ndev,
+			     struct ethtool_channels *channels)
+{
+	struct mana_port_context *apc = netdev_priv(ndev);
+	unsigned int new_count = channels->combined_count;
+	unsigned int old_count = apc->num_queues;
+	int err, err2;
+
+	if (!apc->port_is_up)
+		return -EOPNOTSUPP;
+
+	err = mana_detach(ndev, false);
+	if (err) {
+		netdev_err(ndev, "mana_detach failed: %d\n", err);
+		return err;
+	}
+
+	apc->num_queues = new_count;
+	err = mana_attach(ndev);
+	if (!err)
+		return 0;
+
+	netdev_err(ndev, "mana_attach failed: %d\n", err);
+
+	/* Try to roll it back to the old configuration. */
+	apc->num_queues = old_count;
+	err2 = mana_attach(ndev);
+	if (err2)
+		netdev_err(ndev, "mana re-attach failed: %d\n", err2);
+
+	return err;
+}
+
+const struct ethtool_ops mana_ethtool_ops = {
+	.get_ethtool_stats	= mana_get_ethtool_stats,
+	.get_sset_count		= mana_get_sset_count,
+	.get_strings		= mana_get_strings,
+	.get_rxnfc		= mana_get_rxnfc,
+	.get_rxfh_key_size	= mana_get_rxfh_key_size,
+	.get_rxfh_indir_size	= mana_rss_indir_size,
+	.get_rxfh		= mana_get_rxfh,
+	.set_rxfh		= mana_set_rxfh,
+	.get_channels		= mana_get_channels,
+	.set_channels		= mana_set_channels,
+};
diff --git a/drivers/net/ethernet/microsoft/mana/shm_channel.c b/drivers/net/ethernet/microsoft/mana/shm_channel.c
new file mode 100644
index 000000000000..da255da62176
--- /dev/null
+++ b/drivers/net/ethernet/microsoft/mana/shm_channel.c
@@ -0,0 +1,291 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/* Copyright (c) 2021, Microsoft Corporation. */
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+
+#include "shm_channel.h"
+
+#define PAGE_FRAME_L48_WIDTH_BYTES 6
+#define PAGE_FRAME_L48_WIDTH_BITS (PAGE_FRAME_L48_WIDTH_BYTES * 8)
+#define PAGE_FRAME_L48_MASK 0x0000FFFFFFFFFFFF
+#define PAGE_FRAME_H4_WIDTH_BITS 4
+#define VECTOR_MASK 0xFFFF
+#define SHMEM_VF_RESET_STATE ((u32)-1)
+
+#define SMC_MSG_TYPE_ESTABLISH_HWC 1
+#define SMC_MSG_TYPE_ESTABLISH_HWC_VERSION 0
+
+#define SMC_MSG_TYPE_DESTROY_HWC 2
+#define SMC_MSG_TYPE_DESTROY_HWC_VERSION 0
+
+#define SMC_MSG_DIRECTION_REQUEST 0
+#define SMC_MSG_DIRECTION_RESPONSE 1
+
+/* Structures labeled with "HW DATA" are exchanged with the hardware. All of
+ * them are naturally aligned and hence don't need __packed.
+ */
+
+/* Shared memory channel protocol header
+ *
+ * msg_type: set on request and response; response matches request.
+ * msg_version: newer PF writes back older response (matching request)
+ *  older PF acts on latest version known and sets that version in result
+ *  (less than request).
+ * direction: 0 for request, VF->PF; 1 for response, PF->VF.
+ * status: 0 on request,
+ *   operation result on response (success = 0, failure = 1 or greater).
+ * reset_vf: If set on either establish or destroy request, indicates perform
+ *  FLR before/after the operation.
+ * owner_is_pf: 1 indicates PF owned, 0 indicates VF owned.
+ */
+union smc_proto_hdr {
+	u32 as_uint32;
+
+	struct {
+		u8 msg_type	: 3;
+		u8 msg_version	: 3;
+		u8 reserved_1	: 1;
+		u8 direction	: 1;
+
+		u8 status;
+
+		u8 reserved_2;
+
+		u8 reset_vf	: 1;
+		u8 reserved_3	: 6;
+		u8 owner_is_pf	: 1;
+	};
+}; /* HW DATA */
+
+#define SMC_APERTURE_BITS 256
+#define SMC_BASIC_UNIT (sizeof(u32))
+#define SMC_APERTURE_DWORDS (SMC_APERTURE_BITS / (SMC_BASIC_UNIT * 8))
+#define SMC_LAST_DWORD (SMC_APERTURE_DWORDS - 1)
+
+static int mana_smc_poll_register(void __iomem *base, bool reset)
+{
+	void __iomem *ptr = base + SMC_LAST_DWORD * SMC_BASIC_UNIT;
+	u32 last_dword;
+	int i;
+
+	/* Poll the hardware for the ownership bit. This should be pretty fast,
+	 * but let's do it in a loop just in case the hardware or the PF
+	 * driver are temporarily busy.
+	 */
+	for (i = 0; i < 20 * 1000; i++)  {
+		last_dword = readl(ptr);
+
+		/* shmem reads as 0xFFFFFFFF in the reset case */
+		if (reset && last_dword == SHMEM_VF_RESET_STATE)
+			return 0;
+
+		/* If bit_31 is set, the PF currently owns the SMC. */
+		if (!(last_dword & BIT(31)))
+			return 0;
+
+		usleep_range(1000, 2000);
+	}
+
+	return -ETIMEDOUT;
+}
+
+static int mana_smc_read_response(struct shm_channel *sc, u32 msg_type,
+				  u32 msg_version, bool reset_vf)
+{
+	void __iomem *base = sc->base;
+	union smc_proto_hdr hdr;
+	int err;
+
+	/* Wait for PF to respond. */
+	err = mana_smc_poll_register(base, reset_vf);
+	if (err)
+		return err;
+
+	hdr.as_uint32 = readl(base + SMC_LAST_DWORD * SMC_BASIC_UNIT);
+
+	if (reset_vf && hdr.as_uint32 == SHMEM_VF_RESET_STATE)
+		return 0;
+
+	/* Validate protocol fields from the PF driver */
+	if (hdr.msg_type != msg_type || hdr.msg_version > msg_version ||
+	    hdr.direction != SMC_MSG_DIRECTION_RESPONSE) {
+		dev_err(sc->dev, "Wrong SMC response 0x%x, type=%d, ver=%d\n",
+			hdr.as_uint32, msg_type, msg_version);
+		return -EPROTO;
+	}
+
+	/* Validate the operation result */
+	if (hdr.status != 0) {
+		dev_err(sc->dev, "SMC operation failed: 0x%x\n", hdr.status);
+		return -EPROTO;
+	}
+
+	return 0;
+}
+
+void mana_smc_init(struct shm_channel *sc, struct device *dev,
+		   void __iomem *base)
+{
+	sc->dev = dev;
+	sc->base = base;
+}
+
+int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr,
+		       u64 cq_addr, u64 rq_addr, u64 sq_addr,
+		       u32 eq_msix_index)
+{
+	union smc_proto_hdr *hdr;
+	u16 all_addr_h4bits = 0;
+	u16 frame_addr_seq = 0;
+	u64 frame_addr = 0;
+	u8 shm_buf[32];
+	u64 *shmem;
+	u32 *dword;
+	u8 *ptr;
+	int err;
+	int i;
+
+	/* Ensure VF already has possession of shared memory */
+	err = mana_smc_poll_register(sc->base, false);
+	if (err) {
+		dev_err(sc->dev, "Timeout when setting up HWC: %d\n", err);
+		return err;
+	}
+
+	if (!PAGE_ALIGNED(eq_addr) || !PAGE_ALIGNED(cq_addr) ||
+	    !PAGE_ALIGNED(rq_addr) || !PAGE_ALIGNED(sq_addr))
+		return -EINVAL;
+
+	if ((eq_msix_index & VECTOR_MASK) != eq_msix_index)
+		return -EINVAL;
+
+	/* Scheme for packing four addresses and extra info into 256 bits.
+	 *
+	 * Addresses must be page frame aligned, so only frame address bits
+	 * are transferred.
+	 *
+	 * 52-bit frame addresses are split into the lower 48 bits and upper
+	 * 4 bits. Lower 48 bits of 4 address are written sequentially from
+	 * the start of the 256-bit shared memory region followed by 16 bits
+	 * containing the upper 4 bits of the 4 addresses in sequence.
+	 *
+	 * A 16 bit EQ vector number fills out the next-to-last 32-bit dword.
+	 *
+	 * The final 32-bit dword is used for protocol control information as
+	 * defined in smc_proto_hdr.
+	 */
+
+	memset(shm_buf, 0, sizeof(shm_buf));
+	ptr = shm_buf;
+
+	/* EQ addr: low 48 bits of frame address */
+	shmem = (u64 *)ptr;
+	frame_addr = PHYS_PFN(eq_addr);
+	*shmem = frame_addr & PAGE_FRAME_L48_MASK;
+	all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) <<
+		(frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS);
+	ptr += PAGE_FRAME_L48_WIDTH_BYTES;
+
+	/* CQ addr: low 48 bits of frame address */
+	shmem = (u64 *)ptr;
+	frame_addr = PHYS_PFN(cq_addr);
+	*shmem = frame_addr & PAGE_FRAME_L48_MASK;
+	all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) <<
+		(frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS);
+	ptr += PAGE_FRAME_L48_WIDTH_BYTES;
+
+	/* RQ addr: low 48 bits of frame address */
+	shmem = (u64 *)ptr;
+	frame_addr = PHYS_PFN(rq_addr);
+	*shmem = frame_addr & PAGE_FRAME_L48_MASK;
+	all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) <<
+		(frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS);
+	ptr += PAGE_FRAME_L48_WIDTH_BYTES;
+
+	/* SQ addr: low 48 bits of frame address */
+	shmem = (u64 *)ptr;
+	frame_addr = PHYS_PFN(sq_addr);
+	*shmem = frame_addr & PAGE_FRAME_L48_MASK;
+	all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) <<
+		(frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS);
+	ptr += PAGE_FRAME_L48_WIDTH_BYTES;
+
+	/* High 4 bits of the four frame addresses */
+	*((u16 *)ptr) = all_addr_h4bits;
+	ptr += sizeof(u16);
+
+	/* EQ MSIX vector number */
+	*((u16 *)ptr) = (u16)eq_msix_index;
+	ptr += sizeof(u16);
+
+	/* 32-bit protocol header in final dword */
+	*((u32 *)ptr) = 0;
+
+	hdr = (union smc_proto_hdr *)ptr;
+	hdr->msg_type = SMC_MSG_TYPE_ESTABLISH_HWC;
+	hdr->msg_version = SMC_MSG_TYPE_ESTABLISH_HWC_VERSION;
+	hdr->direction = SMC_MSG_DIRECTION_REQUEST;
+	hdr->reset_vf = reset_vf;
+
+	/* Write 256-message buffer to shared memory (final 32-bit write
+	 * triggers HW to set possession bit to PF).
+	 */
+	dword = (u32 *)shm_buf;
+	for (i = 0; i < SMC_APERTURE_DWORDS; i++)
+		writel(*dword++, sc->base + i * SMC_BASIC_UNIT);
+
+	/* Read shmem response (polling for VF possession) and validate.
+	 * For setup, waiting for response on shared memory is not strictly
+	 * necessary, since wait occurs later for results to appear in EQE's.
+	 */
+	err = mana_smc_read_response(sc, SMC_MSG_TYPE_ESTABLISH_HWC,
+				     SMC_MSG_TYPE_ESTABLISH_HWC_VERSION,
+				     reset_vf);
+	if (err) {
+		dev_err(sc->dev, "Error when setting up HWC: %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+int mana_smc_teardown_hwc(struct shm_channel *sc, bool reset_vf)
+{
+	union smc_proto_hdr hdr = {};
+	int err;
+
+	/* Ensure already has possession of shared memory */
+	err = mana_smc_poll_register(sc->base, false);
+	if (err) {
+		dev_err(sc->dev, "Timeout when tearing down HWC\n");
+		return err;
+	}
+
+	/* Set up protocol header for HWC destroy message */
+	hdr.msg_type = SMC_MSG_TYPE_DESTROY_HWC;
+	hdr.msg_version = SMC_MSG_TYPE_DESTROY_HWC_VERSION;
+	hdr.direction = SMC_MSG_DIRECTION_REQUEST;
+	hdr.reset_vf = reset_vf;
+
+	/* Write message in high 32 bits of 256-bit shared memory, causing HW
+	 * to set possession bit to PF.
+	 */
+	writel(hdr.as_uint32, sc->base + SMC_LAST_DWORD * SMC_BASIC_UNIT);
+
+	/* Read shmem response (polling for VF possession) and validate.
+	 * For teardown, waiting for response is required to ensure hardware
+	 * invalidates MST entries before software frees memory.
+	 */
+	err = mana_smc_read_response(sc, SMC_MSG_TYPE_DESTROY_HWC,
+				     SMC_MSG_TYPE_DESTROY_HWC_VERSION,
+				     reset_vf);
+	if (err) {
+		dev_err(sc->dev, "Error when tearing down HWC: %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/microsoft/mana/shm_channel.h b/drivers/net/ethernet/microsoft/mana/shm_channel.h
new file mode 100644
index 000000000000..5199b41497ff
--- /dev/null
+++ b/drivers/net/ethernet/microsoft/mana/shm_channel.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/* Copyright (c) 2021, Microsoft Corporation. */
+
+#ifndef _SHM_CHANNEL_H
+#define _SHM_CHANNEL_H
+
+struct shm_channel {
+	struct device *dev;
+	void __iomem *base;
+};
+
+void mana_smc_init(struct shm_channel *sc, struct device *dev,
+		   void __iomem *base);
+
+int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr,
+		       u64 cq_addr, u64 rq_addr, u64 sq_addr,
+		       u32 eq_msix_index);
+
+int mana_smc_teardown_hwc(struct shm_channel *sc, bool reset_vf);
+
+#endif /* _SHM_CHANNEL_H */
-- 
cgit v1.2.3


From d9c9e4db186ab4d81f84e6f22b225d333b9424e3 Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Mon, 19 Apr 2021 17:52:38 +0200
Subject: bpf: Factorize bpf_trace_printk and bpf_seq_printf

Two helpers (trace_printk and seq_printf) have very similar
implementations of format string parsing and a third one is coming
(snprintf). To avoid code duplication and make the code easier to
maintain, this moves the operations associated with format string
parsing (validation and argument sanitization) into one generic
function.

The implementation of the two existing helpers already drifted quite a
bit so unifying them entailed a lot of changes:

- bpf_trace_printk always expected fmt[fmt_size] to be the terminating
  NULL character, this is no longer true, the first 0 is terminating.
- bpf_trace_printk now supports %% (which produces the percentage char).
- bpf_trace_printk now skips width formating fields.
- bpf_trace_printk now supports the X modifier (capital hexadecimal).
- bpf_trace_printk now supports %pK, %px, %pB, %pi4, %pI4, %pi6 and %pI6
- argument casting on 32 bit has been simplified into one macro and
  using an enum instead of obscure int increments.

- bpf_seq_printf now uses bpf_trace_copy_string instead of
  strncpy_from_kernel_nofault and handles the %pks %pus specifiers.
- bpf_seq_printf now prints longs correctly on 32 bit architectures.

- both were changed to use a global per-cpu tmp buffer instead of one
  stack buffer for trace_printk and 6 small buffers for seq_printf.
- to avoid per-cpu buffer usage conflict, these helpers disable
  preemption while the per-cpu buffer is in use.
- both helpers now support the %ps and %pS specifiers to print symbols.

The implementation is also moved from bpf_trace.c to helpers.c because
the upcoming bpf_snprintf helper will be made available to all BPF
programs and will need it.

Signed-off-by: Florent Revest <revest@chromium.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210419155243.1632274-2-revest@chromium.org
---
 include/linux/bpf.h      |  20 +++
 kernel/bpf/helpers.c     | 256 ++++++++++++++++++++++++++++++++
 kernel/trace/bpf_trace.c | 371 +++++------------------------------------------
 3 files changed, 313 insertions(+), 334 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ff8cd68c01b3..77d1d8c65b81 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2077,4 +2077,24 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
 struct btf_id_set;
 bool btf_id_set_contains(const struct btf_id_set *set, u32 id);
 
+enum bpf_printf_mod_type {
+	BPF_PRINTF_INT,
+	BPF_PRINTF_LONG,
+	BPF_PRINTF_LONG_LONG,
+};
+
+/* Workaround for getting va_list handling working with different argument type
+ * combinations generically for 32 and 64 bit archs.
+ */
+#define BPF_CAST_FMT_ARG(arg_nb, args, mod)				\
+	(mod[arg_nb] == BPF_PRINTF_LONG_LONG ||				\
+	 (mod[arg_nb] == BPF_PRINTF_LONG && __BITS_PER_LONG == 64)	\
+	  ? (u64)args[arg_nb]						\
+	  : (u32)args[arg_nb])
+
+int bpf_printf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
+		       u64 *final_args, enum bpf_printf_mod_type *mod,
+		       u32 num_args);
+void bpf_printf_cleanup(void);
+
 #endif /* _LINUX_BPF_H */
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index f306611c4ddf..9ca57eb1fc0d 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -669,6 +669,262 @@ const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
 	.arg1_type	= ARG_PTR_TO_PERCPU_BTF_ID,
 };
 
+static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
+		size_t bufsz)
+{
+	void __user *user_ptr = (__force void __user *)unsafe_ptr;
+
+	buf[0] = 0;
+
+	switch (fmt_ptype) {
+	case 's':
+#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+		if ((unsigned long)unsafe_ptr < TASK_SIZE)
+			return strncpy_from_user_nofault(buf, user_ptr, bufsz);
+		fallthrough;
+#endif
+	case 'k':
+		return strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz);
+	case 'u':
+		return strncpy_from_user_nofault(buf, user_ptr, bufsz);
+	}
+
+	return -EINVAL;
+}
+
+/* Per-cpu temp buffers which can be used by printf-like helpers for %s or %p
+ */
+#define MAX_PRINTF_BUF_LEN	512
+
+struct bpf_printf_buf {
+	char tmp_buf[MAX_PRINTF_BUF_LEN];
+};
+static DEFINE_PER_CPU(struct bpf_printf_buf, bpf_printf_buf);
+static DEFINE_PER_CPU(int, bpf_printf_buf_used);
+
+static int try_get_fmt_tmp_buf(char **tmp_buf)
+{
+	struct bpf_printf_buf *bufs;
+	int used;
+
+	if (*tmp_buf)
+		return 0;
+
+	preempt_disable();
+	used = this_cpu_inc_return(bpf_printf_buf_used);
+	if (WARN_ON_ONCE(used > 1)) {
+		this_cpu_dec(bpf_printf_buf_used);
+		preempt_enable();
+		return -EBUSY;
+	}
+	bufs = this_cpu_ptr(&bpf_printf_buf);
+	*tmp_buf = bufs->tmp_buf;
+
+	return 0;
+}
+
+void bpf_printf_cleanup(void)
+{
+	if (this_cpu_read(bpf_printf_buf_used)) {
+		this_cpu_dec(bpf_printf_buf_used);
+		preempt_enable();
+	}
+}
+
+/*
+ * bpf_parse_fmt_str - Generic pass on format strings for printf-like helpers
+ *
+ * Returns a negative value if fmt is an invalid format string or 0 otherwise.
+ *
+ * This can be used in two ways:
+ * - Format string verification only: when final_args and mod are NULL
+ * - Arguments preparation: in addition to the above verification, it writes in
+ *   final_args a copy of raw_args where pointers from BPF have been sanitized
+ *   into pointers safe to use by snprintf. This also writes in the mod array
+ *   the size requirement of each argument, usable by BPF_CAST_FMT_ARG for ex.
+ *
+ * In argument preparation mode, if 0 is returned, safe temporary buffers are
+ * allocated and bpf_printf_cleanup should be called to free them after use.
+ */
+int bpf_printf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
+			u64 *final_args, enum bpf_printf_mod_type *mod,
+			u32 num_args)
+{
+	char *unsafe_ptr = NULL, *tmp_buf = NULL, *fmt_end;
+	size_t tmp_buf_len = MAX_PRINTF_BUF_LEN;
+	int err, i, num_spec = 0, copy_size;
+	enum bpf_printf_mod_type cur_mod;
+	u64 cur_arg;
+	char fmt_ptype;
+
+	if (!!final_args != !!mod)
+		return -EINVAL;
+
+	fmt_end = strnchr(fmt, fmt_size, 0);
+	if (!fmt_end)
+		return -EINVAL;
+	fmt_size = fmt_end - fmt;
+
+	for (i = 0; i < fmt_size; i++) {
+		if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
+			err = -EINVAL;
+			goto cleanup;
+		}
+
+		if (fmt[i] != '%')
+			continue;
+
+		if (fmt[i + 1] == '%') {
+			i++;
+			continue;
+		}
+
+		if (num_spec >= num_args) {
+			err = -EINVAL;
+			goto cleanup;
+		}
+
+		/* The string is zero-terminated so if fmt[i] != 0, we can
+		 * always access fmt[i + 1], in the worst case it will be a 0
+		 */
+		i++;
+
+		/* skip optional "[0 +-][num]" width formatting field */
+		while (fmt[i] == '0' || fmt[i] == '+'  || fmt[i] == '-' ||
+		       fmt[i] == ' ')
+			i++;
+		if (fmt[i] >= '1' && fmt[i] <= '9') {
+			i++;
+			while (fmt[i] >= '0' && fmt[i] <= '9')
+				i++;
+		}
+
+		if (fmt[i] == 'p') {
+			cur_mod = BPF_PRINTF_LONG;
+
+			if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') &&
+			    fmt[i + 2] == 's') {
+				fmt_ptype = fmt[i + 1];
+				i += 2;
+				goto fmt_str;
+			}
+
+			if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) ||
+			    ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' ||
+			    fmt[i + 1] == 'x' || fmt[i + 1] == 'B' ||
+			    fmt[i + 1] == 's' || fmt[i + 1] == 'S') {
+				/* just kernel pointers */
+				if (final_args)
+					cur_arg = raw_args[num_spec];
+				goto fmt_next;
+			}
+
+			/* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
+			if ((fmt[i + 1] != 'i' && fmt[i + 1] != 'I') ||
+			    (fmt[i + 2] != '4' && fmt[i + 2] != '6')) {
+				err = -EINVAL;
+				goto cleanup;
+			}
+
+			i += 2;
+			if (!final_args)
+				goto fmt_next;
+
+			if (try_get_fmt_tmp_buf(&tmp_buf)) {
+				err = -EBUSY;
+				goto out;
+			}
+
+			copy_size = (fmt[i + 2] == '4') ? 4 : 16;
+			if (tmp_buf_len < copy_size) {
+				err = -ENOSPC;
+				goto cleanup;
+			}
+
+			unsafe_ptr = (char *)(long)raw_args[num_spec];
+			err = copy_from_kernel_nofault(tmp_buf, unsafe_ptr,
+						       copy_size);
+			if (err < 0)
+				memset(tmp_buf, 0, copy_size);
+			cur_arg = (u64)(long)tmp_buf;
+			tmp_buf += copy_size;
+			tmp_buf_len -= copy_size;
+
+			goto fmt_next;
+		} else if (fmt[i] == 's') {
+			cur_mod = BPF_PRINTF_LONG;
+			fmt_ptype = fmt[i];
+fmt_str:
+			if (fmt[i + 1] != 0 &&
+			    !isspace(fmt[i + 1]) &&
+			    !ispunct(fmt[i + 1])) {
+				err = -EINVAL;
+				goto cleanup;
+			}
+
+			if (!final_args)
+				goto fmt_next;
+
+			if (try_get_fmt_tmp_buf(&tmp_buf)) {
+				err = -EBUSY;
+				goto out;
+			}
+
+			if (!tmp_buf_len) {
+				err = -ENOSPC;
+				goto cleanup;
+			}
+
+			unsafe_ptr = (char *)(long)raw_args[num_spec];
+			err = bpf_trace_copy_string(tmp_buf, unsafe_ptr,
+						    fmt_ptype, tmp_buf_len);
+			if (err < 0) {
+				tmp_buf[0] = '\0';
+				err = 1;
+			}
+
+			cur_arg = (u64)(long)tmp_buf;
+			tmp_buf += err;
+			tmp_buf_len -= err;
+
+			goto fmt_next;
+		}
+
+		cur_mod = BPF_PRINTF_INT;
+
+		if (fmt[i] == 'l') {
+			cur_mod = BPF_PRINTF_LONG;
+			i++;
+		}
+		if (fmt[i] == 'l') {
+			cur_mod = BPF_PRINTF_LONG_LONG;
+			i++;
+		}
+
+		if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' &&
+		    fmt[i] != 'x' && fmt[i] != 'X') {
+			err = -EINVAL;
+			goto cleanup;
+		}
+
+		if (final_args)
+			cur_arg = raw_args[num_spec];
+fmt_next:
+		if (final_args) {
+			mod[num_spec] = cur_mod;
+			final_args[num_spec] = cur_arg;
+		}
+		num_spec++;
+	}
+
+	err = 0;
+cleanup:
+	if (err)
+		bpf_printf_cleanup();
+out:
+	return err;
+}
+
 const struct bpf_func_proto bpf_get_current_task_proto __weak;
 const struct bpf_func_proto bpf_probe_read_user_proto __weak;
 const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 0d23755c2747..a13f8644b357 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -372,188 +372,38 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
 	return &bpf_probe_write_user_proto;
 }
 
-static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
-		size_t bufsz)
-{
-	void __user *user_ptr = (__force void __user *)unsafe_ptr;
-
-	buf[0] = 0;
-
-	switch (fmt_ptype) {
-	case 's':
-#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
-		if ((unsigned long)unsafe_ptr < TASK_SIZE) {
-			strncpy_from_user_nofault(buf, user_ptr, bufsz);
-			break;
-		}
-		fallthrough;
-#endif
-	case 'k':
-		strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz);
-		break;
-	case 'u':
-		strncpy_from_user_nofault(buf, user_ptr, bufsz);
-		break;
-	}
-}
-
 static DEFINE_RAW_SPINLOCK(trace_printk_lock);
 
-#define BPF_TRACE_PRINTK_SIZE   1024
+#define MAX_TRACE_PRINTK_VARARGS	3
+#define BPF_TRACE_PRINTK_SIZE		1024
 
-static __printf(1, 0) int bpf_do_trace_printk(const char *fmt, ...)
+BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
+	   u64, arg2, u64, arg3)
 {
+	u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
+	enum bpf_printf_mod_type mod[MAX_TRACE_PRINTK_VARARGS];
 	static char buf[BPF_TRACE_PRINTK_SIZE];
 	unsigned long flags;
-	va_list ap;
 	int ret;
 
-	raw_spin_lock_irqsave(&trace_printk_lock, flags);
-	va_start(ap, fmt);
-	ret = vsnprintf(buf, sizeof(buf), fmt, ap);
-	va_end(ap);
-	/* vsnprintf() will not append null for zero-length strings */
+	ret = bpf_printf_prepare(fmt, fmt_size, args, args, mod,
+				 MAX_TRACE_PRINTK_VARARGS);
+	if (ret < 0)
+		return ret;
+
+	ret = snprintf(buf, sizeof(buf), fmt, BPF_CAST_FMT_ARG(0, args, mod),
+		BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod));
+	/* snprintf() will not append null for zero-length strings */
 	if (ret == 0)
 		buf[0] = '\0';
+
+	raw_spin_lock_irqsave(&trace_printk_lock, flags);
 	trace_bpf_trace_printk(buf);
 	raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
 
-	return ret;
-}
-
-/*
- * Only limited trace_printk() conversion specifiers allowed:
- * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pB %pks %pus %s
- */
-BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
-	   u64, arg2, u64, arg3)
-{
-	int i, mod[3] = {}, fmt_cnt = 0;
-	char buf[64], fmt_ptype;
-	void *unsafe_ptr = NULL;
-	bool str_seen = false;
+	bpf_printf_cleanup();
 
-	/*
-	 * bpf_check()->check_func_arg()->check_stack_boundary()
-	 * guarantees that fmt points to bpf program stack,
-	 * fmt_size bytes of it were initialized and fmt_size > 0
-	 */
-	if (fmt[--fmt_size] != 0)
-		return -EINVAL;
-
-	/* check format string for allowed specifiers */
-	for (i = 0; i < fmt_size; i++) {
-		if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i]))
-			return -EINVAL;
-
-		if (fmt[i] != '%')
-			continue;
-
-		if (fmt_cnt >= 3)
-			return -EINVAL;
-
-		/* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
-		i++;
-		if (fmt[i] == 'l') {
-			mod[fmt_cnt]++;
-			i++;
-		} else if (fmt[i] == 'p') {
-			mod[fmt_cnt]++;
-			if ((fmt[i + 1] == 'k' ||
-			     fmt[i + 1] == 'u') &&
-			    fmt[i + 2] == 's') {
-				fmt_ptype = fmt[i + 1];
-				i += 2;
-				goto fmt_str;
-			}
-
-			if (fmt[i + 1] == 'B') {
-				i++;
-				goto fmt_next;
-			}
-
-			/* disallow any further format extensions */
-			if (fmt[i + 1] != 0 &&
-			    !isspace(fmt[i + 1]) &&
-			    !ispunct(fmt[i + 1]))
-				return -EINVAL;
-
-			goto fmt_next;
-		} else if (fmt[i] == 's') {
-			mod[fmt_cnt]++;
-			fmt_ptype = fmt[i];
-fmt_str:
-			if (str_seen)
-				/* allow only one '%s' per fmt string */
-				return -EINVAL;
-			str_seen = true;
-
-			if (fmt[i + 1] != 0 &&
-			    !isspace(fmt[i + 1]) &&
-			    !ispunct(fmt[i + 1]))
-				return -EINVAL;
-
-			switch (fmt_cnt) {
-			case 0:
-				unsafe_ptr = (void *)(long)arg1;
-				arg1 = (long)buf;
-				break;
-			case 1:
-				unsafe_ptr = (void *)(long)arg2;
-				arg2 = (long)buf;
-				break;
-			case 2:
-				unsafe_ptr = (void *)(long)arg3;
-				arg3 = (long)buf;
-				break;
-			}
-
-			bpf_trace_copy_string(buf, unsafe_ptr, fmt_ptype,
-					sizeof(buf));
-			goto fmt_next;
-		}
-
-		if (fmt[i] == 'l') {
-			mod[fmt_cnt]++;
-			i++;
-		}
-
-		if (fmt[i] != 'i' && fmt[i] != 'd' &&
-		    fmt[i] != 'u' && fmt[i] != 'x')
-			return -EINVAL;
-fmt_next:
-		fmt_cnt++;
-	}
-
-/* Horrid workaround for getting va_list handling working with different
- * argument type combinations generically for 32 and 64 bit archs.
- */
-#define __BPF_TP_EMIT()	__BPF_ARG3_TP()
-#define __BPF_TP(...)							\
-	bpf_do_trace_printk(fmt, ##__VA_ARGS__)
-
-#define __BPF_ARG1_TP(...)						\
-	((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64))	\
-	  ? __BPF_TP(arg1, ##__VA_ARGS__)				\
-	  : ((mod[0] == 1 || (mod[0] == 0 && __BITS_PER_LONG == 32))	\
-	      ? __BPF_TP((long)arg1, ##__VA_ARGS__)			\
-	      : __BPF_TP((u32)arg1, ##__VA_ARGS__)))
-
-#define __BPF_ARG2_TP(...)						\
-	((mod[1] == 2 || (mod[1] == 1 && __BITS_PER_LONG == 64))	\
-	  ? __BPF_ARG1_TP(arg2, ##__VA_ARGS__)				\
-	  : ((mod[1] == 1 || (mod[1] == 0 && __BITS_PER_LONG == 32))	\
-	      ? __BPF_ARG1_TP((long)arg2, ##__VA_ARGS__)		\
-	      : __BPF_ARG1_TP((u32)arg2, ##__VA_ARGS__)))
-
-#define __BPF_ARG3_TP(...)						\
-	((mod[2] == 2 || (mod[2] == 1 && __BITS_PER_LONG == 64))	\
-	  ? __BPF_ARG2_TP(arg3, ##__VA_ARGS__)				\
-	  : ((mod[2] == 1 || (mod[2] == 0 && __BITS_PER_LONG == 32))	\
-	      ? __BPF_ARG2_TP((long)arg3, ##__VA_ARGS__)		\
-	      : __BPF_ARG2_TP((u32)arg3, ##__VA_ARGS__)))
-
-	return __BPF_TP_EMIT();
+	return ret;
 }
 
 static const struct bpf_func_proto bpf_trace_printk_proto = {
@@ -581,184 +431,37 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
 }
 
 #define MAX_SEQ_PRINTF_VARARGS		12
-#define MAX_SEQ_PRINTF_MAX_MEMCPY	6
-#define MAX_SEQ_PRINTF_STR_LEN		128
-
-struct bpf_seq_printf_buf {
-	char buf[MAX_SEQ_PRINTF_MAX_MEMCPY][MAX_SEQ_PRINTF_STR_LEN];
-};
-static DEFINE_PER_CPU(struct bpf_seq_printf_buf, bpf_seq_printf_buf);
-static DEFINE_PER_CPU(int, bpf_seq_printf_buf_used);
 
 BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
 	   const void *, data, u32, data_len)
 {
-	int err = -EINVAL, fmt_cnt = 0, memcpy_cnt = 0;
-	int i, buf_used, copy_size, num_args;
-	u64 params[MAX_SEQ_PRINTF_VARARGS];
-	struct bpf_seq_printf_buf *bufs;
-	const u64 *args = data;
-
-	buf_used = this_cpu_inc_return(bpf_seq_printf_buf_used);
-	if (WARN_ON_ONCE(buf_used > 1)) {
-		err = -EBUSY;
-		goto out;
-	}
-
-	bufs = this_cpu_ptr(&bpf_seq_printf_buf);
-
-	/*
-	 * bpf_check()->check_func_arg()->check_stack_boundary()
-	 * guarantees that fmt points to bpf program stack,
-	 * fmt_size bytes of it were initialized and fmt_size > 0
-	 */
-	if (fmt[--fmt_size] != 0)
-		goto out;
-
-	if (data_len & 7)
-		goto out;
-
-	for (i = 0; i < fmt_size; i++) {
-		if (fmt[i] == '%') {
-			if (fmt[i + 1] == '%')
-				i++;
-			else if (!data || !data_len)
-				goto out;
-		}
-	}
+	enum bpf_printf_mod_type mod[MAX_SEQ_PRINTF_VARARGS];
+	u64 args[MAX_SEQ_PRINTF_VARARGS];
+	int err, num_args;
 
+	if (data_len & 7 || data_len > MAX_SEQ_PRINTF_VARARGS * 8 ||
+	    (data_len && !data))
+		return -EINVAL;
 	num_args = data_len / 8;
 
-	/* check format string for allowed specifiers */
-	for (i = 0; i < fmt_size; i++) {
-		/* only printable ascii for now. */
-		if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
-			err = -EINVAL;
-			goto out;
-		}
-
-		if (fmt[i] != '%')
-			continue;
-
-		if (fmt[i + 1] == '%') {
-			i++;
-			continue;
-		}
-
-		if (fmt_cnt >= MAX_SEQ_PRINTF_VARARGS) {
-			err = -E2BIG;
-			goto out;
-		}
-
-		if (fmt_cnt >= num_args) {
-			err = -EINVAL;
-			goto out;
-		}
-
-		/* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
-		i++;
-
-		/* skip optional "[0 +-][num]" width formating field */
-		while (fmt[i] == '0' || fmt[i] == '+'  || fmt[i] == '-' ||
-		       fmt[i] == ' ')
-			i++;
-		if (fmt[i] >= '1' && fmt[i] <= '9') {
-			i++;
-			while (fmt[i] >= '0' && fmt[i] <= '9')
-				i++;
-		}
-
-		if (fmt[i] == 's') {
-			void *unsafe_ptr;
-
-			/* try our best to copy */
-			if (memcpy_cnt >= MAX_SEQ_PRINTF_MAX_MEMCPY) {
-				err = -E2BIG;
-				goto out;
-			}
-
-			unsafe_ptr = (void *)(long)args[fmt_cnt];
-			err = strncpy_from_kernel_nofault(bufs->buf[memcpy_cnt],
-					unsafe_ptr, MAX_SEQ_PRINTF_STR_LEN);
-			if (err < 0)
-				bufs->buf[memcpy_cnt][0] = '\0';
-			params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt];
-
-			fmt_cnt++;
-			memcpy_cnt++;
-			continue;
-		}
-
-		if (fmt[i] == 'p') {
-			if (fmt[i + 1] == 0 ||
-			    fmt[i + 1] == 'K' ||
-			    fmt[i + 1] == 'x' ||
-			    fmt[i + 1] == 'B') {
-				/* just kernel pointers */
-				params[fmt_cnt] = args[fmt_cnt];
-				fmt_cnt++;
-				continue;
-			}
-
-			/* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
-			if (fmt[i + 1] != 'i' && fmt[i + 1] != 'I') {
-				err = -EINVAL;
-				goto out;
-			}
-			if (fmt[i + 2] != '4' && fmt[i + 2] != '6') {
-				err = -EINVAL;
-				goto out;
-			}
-
-			if (memcpy_cnt >= MAX_SEQ_PRINTF_MAX_MEMCPY) {
-				err = -E2BIG;
-				goto out;
-			}
-
-
-			copy_size = (fmt[i + 2] == '4') ? 4 : 16;
-
-			err = copy_from_kernel_nofault(bufs->buf[memcpy_cnt],
-						(void *) (long) args[fmt_cnt],
-						copy_size);
-			if (err < 0)
-				memset(bufs->buf[memcpy_cnt], 0, copy_size);
-			params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt];
-
-			i += 2;
-			fmt_cnt++;
-			memcpy_cnt++;
-			continue;
-		}
-
-		if (fmt[i] == 'l') {
-			i++;
-			if (fmt[i] == 'l')
-				i++;
-		}
-
-		if (fmt[i] != 'i' && fmt[i] != 'd' &&
-		    fmt[i] != 'u' && fmt[i] != 'x' &&
-		    fmt[i] != 'X') {
-			err = -EINVAL;
-			goto out;
-		}
-
-		params[fmt_cnt] = args[fmt_cnt];
-		fmt_cnt++;
-	}
+	err = bpf_printf_prepare(fmt, fmt_size, data, args, mod, num_args);
+	if (err < 0)
+		return err;
 
 	/* Maximumly we can have MAX_SEQ_PRINTF_VARARGS parameter, just give
 	 * all of them to seq_printf().
 	 */
-	seq_printf(m, fmt, params[0], params[1], params[2], params[3],
-		   params[4], params[5], params[6], params[7], params[8],
-		   params[9], params[10], params[11]);
+	seq_printf(m, fmt, BPF_CAST_FMT_ARG(0, args, mod),
+		BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod),
+		BPF_CAST_FMT_ARG(3, args, mod), BPF_CAST_FMT_ARG(4, args, mod),
+		BPF_CAST_FMT_ARG(5, args, mod), BPF_CAST_FMT_ARG(6, args, mod),
+		BPF_CAST_FMT_ARG(7, args, mod), BPF_CAST_FMT_ARG(8, args, mod),
+		BPF_CAST_FMT_ARG(9, args, mod), BPF_CAST_FMT_ARG(10, args, mod),
+		BPF_CAST_FMT_ARG(11, args, mod));
 
-	err = seq_has_overflowed(m) ? -EOVERFLOW : 0;
-out:
-	this_cpu_dec(bpf_seq_printf_buf_used);
-	return err;
+	bpf_printf_cleanup();
+
+	return seq_has_overflowed(m) ? -EOVERFLOW : 0;
 }
 
 BTF_ID_LIST_SINGLE(btf_seq_file_ids, struct, seq_file)
-- 
cgit v1.2.3


From fff13c4bb646ef849fd74ced87eef54340d28c21 Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Mon, 19 Apr 2021 17:52:39 +0200
Subject: bpf: Add a ARG_PTR_TO_CONST_STR argument type

This type provides the guarantee that an argument is going to be a const
pointer to somewhere in a read-only map value. It also checks that this
pointer is followed by a zero character before the end of the map value.

Signed-off-by: Florent Revest <revest@chromium.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210419155243.1632274-3-revest@chromium.org
---
 include/linux/bpf.h   |  1 +
 kernel/bpf/verifier.c | 41 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 77d1d8c65b81..c160526fc8bf 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -309,6 +309,7 @@ enum bpf_arg_type {
 	ARG_PTR_TO_PERCPU_BTF_ID,	/* pointer to in-kernel percpu type */
 	ARG_PTR_TO_FUNC,	/* pointer to a bpf program function */
 	ARG_PTR_TO_STACK_OR_NULL,	/* pointer to stack or NULL */
+	ARG_PTR_TO_CONST_STR,	/* pointer to a null terminated read-only string */
 	__BPF_ARG_TYPE_MAX,
 };
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 852541a435ef..5f46dd6f3383 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4787,6 +4787,7 @@ static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALU
 static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
 static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
 static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
+static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
 
 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
 	[ARG_PTR_TO_MAP_KEY]		= &map_key_value_types,
@@ -4817,6 +4818,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
 	[ARG_PTR_TO_PERCPU_BTF_ID]	= &percpu_btf_ptr_types,
 	[ARG_PTR_TO_FUNC]		= &func_ptr_types,
 	[ARG_PTR_TO_STACK_OR_NULL]	= &stack_ptr_types,
+	[ARG_PTR_TO_CONST_STR]		= &const_str_ptr_types,
 };
 
 static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
@@ -5067,6 +5069,45 @@ skip_type_check:
 		if (err)
 			return err;
 		err = check_ptr_alignment(env, reg, 0, size, true);
+	} else if (arg_type == ARG_PTR_TO_CONST_STR) {
+		struct bpf_map *map = reg->map_ptr;
+		int map_off;
+		u64 map_addr;
+		char *str_ptr;
+
+		if (reg->type != PTR_TO_MAP_VALUE || !map ||
+		    !bpf_map_is_rdonly(map)) {
+			verbose(env, "R%d does not point to a readonly map'\n", regno);
+			return -EACCES;
+		}
+
+		if (!tnum_is_const(reg->var_off)) {
+			verbose(env, "R%d is not a constant address'\n", regno);
+			return -EACCES;
+		}
+
+		if (!map->ops->map_direct_value_addr) {
+			verbose(env, "no direct value access support for this map type\n");
+			return -EACCES;
+		}
+
+		err = check_map_access(env, regno, reg->off,
+				       map->value_size - reg->off, false);
+		if (err)
+			return err;
+
+		map_off = reg->off + reg->var_off.value;
+		err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
+		if (err) {
+			verbose(env, "direct value access on string failed\n");
+			return err;
+		}
+
+		str_ptr = (char *)(long)(map_addr);
+		if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
+			verbose(env, "string is not zero-terminated\n");
+			return -EINVAL;
+		}
 	}
 
 	return err;
-- 
cgit v1.2.3


From 7b15523a989b63927c2bb08e9b5b0bbc10b58bef Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Mon, 19 Apr 2021 17:52:40 +0200
Subject: bpf: Add a bpf_snprintf helper

The implementation takes inspiration from the existing bpf_trace_printk
helper but there are a few differences:

To allow for a large number of format-specifiers, parameters are
provided in an array, like in bpf_seq_printf.

Because the output string takes two arguments and the array of
parameters also takes two arguments, the format string needs to fit in
one argument. Thankfully, ARG_PTR_TO_CONST_STR is guaranteed to point to
a zero-terminated read-only map so we don't need a format string length
arg.

Because the format-string is known at verification time, we also do
a first pass of format string validation in the verifier logic. This
makes debugging easier.

Signed-off-by: Florent Revest <revest@chromium.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210419155243.1632274-4-revest@chromium.org
---
 include/linux/bpf.h            |  1 +
 include/uapi/linux/bpf.h       | 28 +++++++++++++++++++++++
 kernel/bpf/helpers.c           | 50 ++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/verifier.c          | 41 ++++++++++++++++++++++++++++++++++
 kernel/trace/bpf_trace.c       |  2 ++
 tools/include/uapi/linux/bpf.h | 28 +++++++++++++++++++++++
 6 files changed, 150 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c160526fc8bf..f8a45f109e96 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1953,6 +1953,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
 extern const struct bpf_func_proto bpf_copy_from_user_proto;
 extern const struct bpf_func_proto bpf_snprintf_btf_proto;
+extern const struct bpf_func_proto bpf_snprintf_proto;
 extern const struct bpf_func_proto bpf_per_cpu_ptr_proto;
 extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
 extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index df164a44bb41..ec6d85a81744 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4708,6 +4708,33 @@ union bpf_attr {
  *	Return
  *		The number of traversed map elements for success, **-EINVAL** for
  *		invalid **flags**.
+ *
+ * long bpf_snprintf(char *str, u32 str_size, const char *fmt, u64 *data, u32 data_len)
+ *	Description
+ *		Outputs a string into the **str** buffer of size **str_size**
+ *		based on a format string stored in a read-only map pointed by
+ *		**fmt**.
+ *
+ *		Each format specifier in **fmt** corresponds to one u64 element
+ *		in the **data** array. For strings and pointers where pointees
+ *		are accessed, only the pointer values are stored in the *data*
+ *		array. The *data_len* is the size of *data* in bytes.
+ *
+ *		Formats **%s** and **%p{i,I}{4,6}** require to read kernel
+ *		memory. Reading kernel memory may fail due to either invalid
+ *		address or valid address but requiring a major memory fault. If
+ *		reading kernel memory fails, the string for **%s** will be an
+ *		empty string, and the ip address for **%p{i,I}{4,6}** will be 0.
+ *		Not returning error to bpf program is consistent with what
+ *		**bpf_trace_printk**\ () does for now.
+ *
+ *	Return
+ *		The strictly positive length of the formatted string, including
+ *		the trailing zero character. If the return value is greater than
+ *		**str_size**, **str** contains a truncated string, guaranteed to
+ *		be zero-terminated except when **str_size** is 0.
+ *
+ *		Or **-EBUSY** if the per-CPU memory copy buffer is busy.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -4875,6 +4902,7 @@ union bpf_attr {
 	FN(sock_from_file),		\
 	FN(check_mtu),			\
 	FN(for_each_map_elem),		\
+	FN(snprintf),			\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 9ca57eb1fc0d..85b26ca5aacd 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -925,6 +925,54 @@ out:
 	return err;
 }
 
+#define MAX_SNPRINTF_VARARGS		12
+
+BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt,
+	   const void *, data, u32, data_len)
+{
+	enum bpf_printf_mod_type mod[MAX_SNPRINTF_VARARGS];
+	u64 args[MAX_SNPRINTF_VARARGS];
+	int err, num_args;
+
+	if (data_len % 8 || data_len > MAX_SNPRINTF_VARARGS * 8 ||
+	    (data_len && !data))
+		return -EINVAL;
+	num_args = data_len / 8;
+
+	/* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we
+	 * can safely give an unbounded size.
+	 */
+	err = bpf_printf_prepare(fmt, UINT_MAX, data, args, mod, num_args);
+	if (err < 0)
+		return err;
+
+	/* Maximumly we can have MAX_SNPRINTF_VARARGS parameters, just give
+	 * all of them to snprintf().
+	 */
+	err = snprintf(str, str_size, fmt, BPF_CAST_FMT_ARG(0, args, mod),
+		BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod),
+		BPF_CAST_FMT_ARG(3, args, mod), BPF_CAST_FMT_ARG(4, args, mod),
+		BPF_CAST_FMT_ARG(5, args, mod), BPF_CAST_FMT_ARG(6, args, mod),
+		BPF_CAST_FMT_ARG(7, args, mod), BPF_CAST_FMT_ARG(8, args, mod),
+		BPF_CAST_FMT_ARG(9, args, mod), BPF_CAST_FMT_ARG(10, args, mod),
+		BPF_CAST_FMT_ARG(11, args, mod));
+
+	bpf_printf_cleanup();
+
+	return err + 1;
+}
+
+const struct bpf_func_proto bpf_snprintf_proto = {
+	.func		= bpf_snprintf,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_MEM_OR_NULL,
+	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
+	.arg3_type	= ARG_PTR_TO_CONST_STR,
+	.arg4_type	= ARG_PTR_TO_MEM_OR_NULL,
+	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
+};
+
 const struct bpf_func_proto bpf_get_current_task_proto __weak;
 const struct bpf_func_proto bpf_probe_read_user_proto __weak;
 const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
@@ -1013,6 +1061,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_probe_read_kernel_str_proto;
 	case BPF_FUNC_snprintf_btf:
 		return &bpf_snprintf_btf_proto;
+	case BPF_FUNC_snprintf:
+		return &bpf_snprintf_proto;
 	default:
 		return NULL;
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5f46dd6f3383..994ef36c5f60 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5918,6 +5918,41 @@ static int check_reference_leak(struct bpf_verifier_env *env)
 	return state->acquired_refs ? -EINVAL : 0;
 }
 
+static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
+				   struct bpf_reg_state *regs)
+{
+	struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
+	struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
+	struct bpf_map *fmt_map = fmt_reg->map_ptr;
+	int err, fmt_map_off, num_args;
+	u64 fmt_addr;
+	char *fmt;
+
+	/* data must be an array of u64 */
+	if (data_len_reg->var_off.value % 8)
+		return -EINVAL;
+	num_args = data_len_reg->var_off.value / 8;
+
+	/* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
+	 * and map_direct_value_addr is set.
+	 */
+	fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
+	err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
+						  fmt_map_off);
+	if (err)
+		return err;
+	fmt = (char *)(long)fmt_addr + fmt_map_off;
+
+	/* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
+	 * can focus on validating the format specifiers.
+	 */
+	err = bpf_printf_prepare(fmt, UINT_MAX, NULL, NULL, NULL, num_args);
+	if (err < 0)
+		verbose(env, "Invalid format string\n");
+
+	return err;
+}
+
 static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			     int *insn_idx_p)
 {
@@ -6032,6 +6067,12 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 			return -EINVAL;
 	}
 
+	if (func_id == BPF_FUNC_snprintf) {
+		err = check_bpf_snprintf_call(env, regs);
+		if (err < 0)
+			return err;
+	}
+
 	/* reset caller saved regs */
 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
 		mark_reg_not_init(env, regs, caller_saved[i]);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index a13f8644b357..2a8bcdc927c7 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1076,6 +1076,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_task_storage_delete_proto;
 	case BPF_FUNC_for_each_map_elem:
 		return &bpf_for_each_map_elem_proto;
+	case BPF_FUNC_snprintf:
+		return &bpf_snprintf_proto;
 	default:
 		return NULL;
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index df164a44bb41..ec6d85a81744 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4708,6 +4708,33 @@ union bpf_attr {
  *	Return
  *		The number of traversed map elements for success, **-EINVAL** for
  *		invalid **flags**.
+ *
+ * long bpf_snprintf(char *str, u32 str_size, const char *fmt, u64 *data, u32 data_len)
+ *	Description
+ *		Outputs a string into the **str** buffer of size **str_size**
+ *		based on a format string stored in a read-only map pointed by
+ *		**fmt**.
+ *
+ *		Each format specifier in **fmt** corresponds to one u64 element
+ *		in the **data** array. For strings and pointers where pointees
+ *		are accessed, only the pointer values are stored in the *data*
+ *		array. The *data_len* is the size of *data* in bytes.
+ *
+ *		Formats **%s** and **%p{i,I}{4,6}** require to read kernel
+ *		memory. Reading kernel memory may fail due to either invalid
+ *		address or valid address but requiring a major memory fault. If
+ *		reading kernel memory fails, the string for **%s** will be an
+ *		empty string, and the ip address for **%p{i,I}{4,6}** will be 0.
+ *		Not returning error to bpf program is consistent with what
+ *		**bpf_trace_printk**\ () does for now.
+ *
+ *	Return
+ *		The strictly positive length of the formatted string, including
+ *		the trailing zero character. If the return value is greater than
+ *		**str_size**, **str** contains a truncated string, guaranteed to
+ *		be zero-terminated except when **str_size** is 0.
+ *
+ *		Or **-EBUSY** if the per-CPU memory copy buffer is busy.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -4875,6 +4902,7 @@ union bpf_attr {
 	FN(sock_from_file),		\
 	FN(check_mtu),			\
 	FN(for_each_map_elem),		\
+	FN(snprintf),			\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 83cd92b46484aa8f64cdc0bff8ac6940d1f78519 Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Mon, 19 Apr 2021 17:52:41 +0200
Subject: libbpf: Initialize the bpf_seq_printf parameters array field by field

When initializing the __param array with a one liner, if all args are
const, the initial array value will be placed in the rodata section but
because libbpf does not support relocation in the rodata section, any
pointer in this array will stay NULL.

Fixes: c09add2fbc5a ("tools/libbpf: Add bpf_iter support")
Signed-off-by: Florent Revest <revest@chromium.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210419155243.1632274-5-revest@chromium.org
---
 tools/lib/bpf/bpf_tracing.h | 40 +++++++++++++++++++++++++++++-----------
 1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index f9ef37707888..1c2e91ee041d 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -413,20 +413,38 @@ typeof(name(0)) name(struct pt_regs *ctx)				    \
 }									    \
 static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
 
+#define ___bpf_fill0(arr, p, x) do {} while (0)
+#define ___bpf_fill1(arr, p, x) arr[p] = x
+#define ___bpf_fill2(arr, p, x, args...) arr[p] = x; ___bpf_fill1(arr, p + 1, args)
+#define ___bpf_fill3(arr, p, x, args...) arr[p] = x; ___bpf_fill2(arr, p + 1, args)
+#define ___bpf_fill4(arr, p, x, args...) arr[p] = x; ___bpf_fill3(arr, p + 1, args)
+#define ___bpf_fill5(arr, p, x, args...) arr[p] = x; ___bpf_fill4(arr, p + 1, args)
+#define ___bpf_fill6(arr, p, x, args...) arr[p] = x; ___bpf_fill5(arr, p + 1, args)
+#define ___bpf_fill7(arr, p, x, args...) arr[p] = x; ___bpf_fill6(arr, p + 1, args)
+#define ___bpf_fill8(arr, p, x, args...) arr[p] = x; ___bpf_fill7(arr, p + 1, args)
+#define ___bpf_fill9(arr, p, x, args...) arr[p] = x; ___bpf_fill8(arr, p + 1, args)
+#define ___bpf_fill10(arr, p, x, args...) arr[p] = x; ___bpf_fill9(arr, p + 1, args)
+#define ___bpf_fill11(arr, p, x, args...) arr[p] = x; ___bpf_fill10(arr, p + 1, args)
+#define ___bpf_fill12(arr, p, x, args...) arr[p] = x; ___bpf_fill11(arr, p + 1, args)
+#define ___bpf_fill(arr, args...) \
+	___bpf_apply(___bpf_fill, ___bpf_narg(args))(arr, 0, args)
+
 /*
  * BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values
  * in a structure.
  */
-#define BPF_SEQ_PRINTF(seq, fmt, args...)				    \
-	({								    \
-		_Pragma("GCC diagnostic push")				    \
-		_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")	    \
-		static const char ___fmt[] = fmt;			    \
-		unsigned long long ___param[] = { args };		    \
-		_Pragma("GCC diagnostic pop")				    \
-		int ___ret = bpf_seq_printf(seq, ___fmt, sizeof(___fmt),    \
-					    ___param, sizeof(___param));    \
-		___ret;							    \
-	})
+#define BPF_SEQ_PRINTF(seq, fmt, args...)			\
+({								\
+	static const char ___fmt[] = fmt;			\
+	unsigned long long ___param[___bpf_narg(args)];		\
+								\
+	_Pragma("GCC diagnostic push")				\
+	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")	\
+	___bpf_fill(___param, args);				\
+	_Pragma("GCC diagnostic pop")				\
+								\
+	bpf_seq_printf(seq, ___fmt, sizeof(___fmt),		\
+		       ___param, sizeof(___param));		\
+})
 
 #endif
-- 
cgit v1.2.3


From 58c2b1f5e0121efd698b6ec8e45e47e58ca9caee Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Mon, 19 Apr 2021 17:52:42 +0200
Subject: libbpf: Introduce a BPF_SNPRINTF helper macro

Similarly to BPF_SEQ_PRINTF, this macro turns variadic arguments into an
array of u64, making it more natural to call the bpf_snprintf helper.

Signed-off-by: Florent Revest <revest@chromium.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210419155243.1632274-6-revest@chromium.org
---
 tools/lib/bpf/bpf_tracing.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index 1c2e91ee041d..8c954ebc0c7c 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -447,4 +447,22 @@ static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
 		       ___param, sizeof(___param));		\
 })
 
+/*
+ * BPF_SNPRINTF wraps the bpf_snprintf helper with variadic arguments instead of
+ * an array of u64.
+ */
+#define BPF_SNPRINTF(out, out_size, fmt, args...)		\
+({								\
+	static const char ___fmt[] = fmt;			\
+	unsigned long long ___param[___bpf_narg(args)];		\
+								\
+	_Pragma("GCC diagnostic push")				\
+	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")	\
+	___bpf_fill(___param, args);				\
+	_Pragma("GCC diagnostic pop")				\
+								\
+	bpf_snprintf(out, out_size, ___fmt,			\
+		     ___param, sizeof(___param));		\
+})
+
 #endif
-- 
cgit v1.2.3


From c2e39c6bdc7eb48459ec1d34d4f27eb82299f4b7 Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Mon, 19 Apr 2021 17:52:43 +0200
Subject: selftests/bpf: Add a series of tests for bpf_snprintf

The "positive" part tests all format specifiers when things go well.

The "negative" part makes sure that incorrect format strings fail at
load time.

Signed-off-by: Florent Revest <revest@chromium.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210419155243.1632274-7-revest@chromium.org
---
 tools/testing/selftests/bpf/prog_tests/snprintf.c  | 125 +++++++++++++++++++++
 tools/testing/selftests/bpf/progs/test_snprintf.c  |  73 ++++++++++++
 .../selftests/bpf/progs/test_snprintf_single.c     |  20 ++++
 3 files changed, 218 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/snprintf.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_snprintf.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_snprintf_single.c

diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c
new file mode 100644
index 000000000000..a958c22aec75
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Google LLC. */
+
+#include <test_progs.h>
+#include "test_snprintf.skel.h"
+#include "test_snprintf_single.skel.h"
+
+#define EXP_NUM_OUT  "-8 9 96 -424242 1337 DABBAD00"
+#define EXP_NUM_RET  sizeof(EXP_NUM_OUT)
+
+#define EXP_IP_OUT   "127.000.000.001 0000:0000:0000:0000:0000:0000:0000:0001"
+#define EXP_IP_RET   sizeof(EXP_IP_OUT)
+
+/* The third specifier, %pB, depends on compiler inlining so don't check it */
+#define EXP_SYM_OUT  "schedule schedule+0x0/"
+#define MIN_SYM_RET  sizeof(EXP_SYM_OUT)
+
+/* The third specifier, %p, is a hashed pointer which changes on every reboot */
+#define EXP_ADDR_OUT "0000000000000000 ffff00000add4e55 "
+#define EXP_ADDR_RET sizeof(EXP_ADDR_OUT "unknownhashedptr")
+
+#define EXP_STR_OUT  "str1 longstr"
+#define EXP_STR_RET  sizeof(EXP_STR_OUT)
+
+#define EXP_OVER_OUT "%over"
+#define EXP_OVER_RET 10
+
+#define EXP_PAD_OUT "    4 000"
+#define EXP_PAD_RET 900007
+
+#define EXP_NO_ARG_OUT "simple case"
+#define EXP_NO_ARG_RET 12
+
+#define EXP_NO_BUF_RET 29
+
+void test_snprintf_positive(void)
+{
+	char exp_addr_out[] = EXP_ADDR_OUT;
+	char exp_sym_out[]  = EXP_SYM_OUT;
+	struct test_snprintf *skel;
+
+	skel = test_snprintf__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	if (!ASSERT_OK(test_snprintf__attach(skel), "skel_attach"))
+		goto cleanup;
+
+	/* trigger tracepoint */
+	usleep(1);
+
+	ASSERT_STREQ(skel->bss->num_out, EXP_NUM_OUT, "num_out");
+	ASSERT_EQ(skel->bss->num_ret, EXP_NUM_RET, "num_ret");
+
+	ASSERT_STREQ(skel->bss->ip_out, EXP_IP_OUT, "ip_out");
+	ASSERT_EQ(skel->bss->ip_ret, EXP_IP_RET, "ip_ret");
+
+	ASSERT_OK(memcmp(skel->bss->sym_out, exp_sym_out,
+			 sizeof(exp_sym_out) - 1), "sym_out");
+	ASSERT_LT(MIN_SYM_RET, skel->bss->sym_ret, "sym_ret");
+
+	ASSERT_OK(memcmp(skel->bss->addr_out, exp_addr_out,
+			 sizeof(exp_addr_out) - 1), "addr_out");
+	ASSERT_EQ(skel->bss->addr_ret, EXP_ADDR_RET, "addr_ret");
+
+	ASSERT_STREQ(skel->bss->str_out, EXP_STR_OUT, "str_out");
+	ASSERT_EQ(skel->bss->str_ret, EXP_STR_RET, "str_ret");
+
+	ASSERT_STREQ(skel->bss->over_out, EXP_OVER_OUT, "over_out");
+	ASSERT_EQ(skel->bss->over_ret, EXP_OVER_RET, "over_ret");
+
+	ASSERT_STREQ(skel->bss->pad_out, EXP_PAD_OUT, "pad_out");
+	ASSERT_EQ(skel->bss->pad_ret, EXP_PAD_RET, "pad_ret");
+
+	ASSERT_STREQ(skel->bss->noarg_out, EXP_NO_ARG_OUT, "no_arg_out");
+	ASSERT_EQ(skel->bss->noarg_ret, EXP_NO_ARG_RET, "no_arg_ret");
+
+	ASSERT_EQ(skel->bss->nobuf_ret, EXP_NO_BUF_RET, "no_buf_ret");
+
+cleanup:
+	test_snprintf__destroy(skel);
+}
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
+/* Loads an eBPF object calling bpf_snprintf with up to 10 characters of fmt */
+static int load_single_snprintf(char *fmt)
+{
+	struct test_snprintf_single *skel;
+	int ret;
+
+	skel = test_snprintf_single__open();
+	if (!skel)
+		return -EINVAL;
+
+	memcpy(skel->rodata->fmt, fmt, min(strlen(fmt) + 1, 10));
+
+	ret = test_snprintf_single__load(skel);
+	test_snprintf_single__destroy(skel);
+
+	return ret;
+}
+
+void test_snprintf_negative(void)
+{
+	ASSERT_OK(load_single_snprintf("valid %d"), "valid usage");
+
+	ASSERT_ERR(load_single_snprintf("0123456789"), "no terminating zero");
+	ASSERT_ERR(load_single_snprintf("%d %d"), "too many specifiers");
+	ASSERT_ERR(load_single_snprintf("%pi5"), "invalid specifier 1");
+	ASSERT_ERR(load_single_snprintf("%a"), "invalid specifier 2");
+	ASSERT_ERR(load_single_snprintf("%"), "invalid specifier 3");
+	ASSERT_ERR(load_single_snprintf("%12345678"), "invalid specifier 4");
+	ASSERT_ERR(load_single_snprintf("%--------"), "invalid specifier 5");
+	ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character");
+	ASSERT_ERR(load_single_snprintf("\x1"), "non printable character");
+}
+
+void test_snprintf(void)
+{
+	if (test__start_subtest("snprintf_positive"))
+		test_snprintf_positive();
+	if (test__start_subtest("snprintf_negative"))
+		test_snprintf_negative();
+}
diff --git a/tools/testing/selftests/bpf/progs/test_snprintf.c b/tools/testing/selftests/bpf/progs/test_snprintf.c
new file mode 100644
index 000000000000..951a0301c553
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_snprintf.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Google LLC. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char num_out[64] = {};
+long num_ret = 0;
+
+char ip_out[64] = {};
+long ip_ret = 0;
+
+char sym_out[64] = {};
+long sym_ret = 0;
+
+char addr_out[64] = {};
+long addr_ret = 0;
+
+char str_out[64] = {};
+long str_ret = 0;
+
+char over_out[6] = {};
+long over_ret = 0;
+
+char pad_out[10] = {};
+long pad_ret = 0;
+
+char noarg_out[64] = {};
+long noarg_ret = 0;
+
+long nobuf_ret = 0;
+
+extern const void schedule __ksym;
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+	/* Convenient values to pretty-print */
+	const __u8 ex_ipv4[] = {127, 0, 0, 1};
+	const __u8 ex_ipv6[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+	static const char str1[] = "str1";
+	static const char longstr[] = "longstr";
+
+	/* Integer types */
+	num_ret  = BPF_SNPRINTF(num_out, sizeof(num_out),
+				"%d %u %x %li %llu %lX",
+				-8, 9, 150, -424242, 1337, 0xDABBAD00);
+	/* IP addresses */
+	ip_ret   = BPF_SNPRINTF(ip_out, sizeof(ip_out), "%pi4 %pI6",
+				&ex_ipv4, &ex_ipv6);
+	/* Symbol lookup formatting */
+	sym_ret  = BPF_SNPRINTF(sym_out,  sizeof(sym_out), "%ps %pS %pB",
+				&schedule, &schedule, &schedule);
+	/* Kernel pointers */
+	addr_ret = BPF_SNPRINTF(addr_out, sizeof(addr_out), "%pK %px %p",
+				0, 0xFFFF00000ADD4E55, 0xFFFF00000ADD4E55);
+	/* Strings embedding */
+	str_ret  = BPF_SNPRINTF(str_out, sizeof(str_out), "%s %+05s",
+				str1, longstr);
+	/* Overflow */
+	over_ret = BPF_SNPRINTF(over_out, sizeof(over_out), "%%overflow");
+	/* Padding of fixed width numbers */
+	pad_ret = BPF_SNPRINTF(pad_out, sizeof(pad_out), "%5d %0900000X", 4, 4);
+	/* No args */
+	noarg_ret = BPF_SNPRINTF(noarg_out, sizeof(noarg_out), "simple case");
+	/* No buffer */
+	nobuf_ret = BPF_SNPRINTF(NULL, 0, "only interested in length %d", 60);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_snprintf_single.c b/tools/testing/selftests/bpf/progs/test_snprintf_single.c
new file mode 100644
index 000000000000..402adaf344f9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_snprintf_single.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Google LLC. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* The format string is filled from the userspace such that loading fails */
+static const char fmt[10];
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+	unsigned long long arg = 42;
+
+	bpf_snprintf(NULL, 0, fmt, &arg, sizeof(arg));
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From c1d9e34e11281a8ba1a1c54e4db554232a461488 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 16 Apr 2021 15:15:40 -0500
Subject: ethtool: ioctl: Fix out-of-bounds warning in
 store_link_ksettings_for_user()

Fix the following out-of-bounds warning:

net/ethtool/ioctl.c:492:2: warning: 'memcpy' offset [49, 84] from the object at 'link_usettings' is out of the bounds of referenced subobject 'base' with type 'struct ethtool_link_settings' at offset 0 [-Warray-bounds]

The problem is that the original code is trying to copy data into a
some struct members adjacent to each other in a single call to
memcpy(). This causes a legitimate compiler warning because memcpy()
overruns the length of &link_usettings.base. Fix this by directly
using &link_usettings and _from_ as destination and source addresses,
instead.

This helps with the ongoing efforts to globally enable -Warray-bounds
and get us closer to being able to tighten the FORTIFY_SOURCE routines
on memcpy().

Link: https://github.com/KSPP/linux/issues/109
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethtool/ioctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 27f1c5224acb..3fa7a394eabf 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -489,7 +489,7 @@ store_link_ksettings_for_user(void __user *to,
 {
 	struct ethtool_link_usettings link_usettings;
 
-	memcpy(&link_usettings.base, &from->base, sizeof(link_usettings));
+	memcpy(&link_usettings, from, sizeof(link_usettings));
 	bitmap_to_arr32(link_usettings.link_modes.supported,
 			from->link_modes.supported,
 			__ETHTOOL_LINK_MODE_MASK_NBITS);
-- 
cgit v1.2.3


From ed8157f1ebf1ae81a8fa2653e3f20d2076fad1c9 Mon Sep 17 00:00:00 2001
From: Du Cheng <ducheng2@gmail.com>
Date: Sat, 17 Apr 2021 07:30:46 +0800
Subject: net: sched: tapr: prevent cycle_time == 0 in parse_taprio_schedule

There is a reproducible sequence from the userland that will trigger a WARN_ON()
condition in taprio_get_start_time, which causes kernel to panic if configured
as "panic_on_warn". Catch this condition in parse_taprio_schedule to
prevent this condition.

Reported as bug on syzkaller:
https://syzkaller.appspot.com/bug?extid=d50710fd0873a9c6b40c

Reported-by: syzbot+d50710fd0873a9c6b40c@syzkaller.appspotmail.com
Signed-off-by: Du Cheng <ducheng2@gmail.com>
Acked-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_taprio.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 8287894541e3..909c798b7403 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -901,6 +901,12 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
 
 		list_for_each_entry(entry, &new->entries, list)
 			cycle = ktime_add_ns(cycle, entry->interval);
+
+		if (!cycle) {
+			NL_SET_ERR_MSG(extack, "'cycle_time' can never be 0");
+			return -EINVAL;
+		}
+
 		new->cycle_time = cycle;
 	}
 
-- 
cgit v1.2.3


From 87614b931c24d9dfc934ef9deaaf55d1cbdc2ac2 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 17 Apr 2021 02:42:21 +0300
Subject: net: enetc: create a common enetc_pf_to_port helper

Even though ENETC interfaces are exposed as individual PCIe PFs with
their own driver instances, the ENETC is still fundamentally a
multi-port Ethernet controller, and some parts of the IP take a port
number (as can be seen in the PSFP implementation).

Create a common helper that can be used outside of the TSN code for
retrieving the ENETC port number based on the PF number. This is only
correct for LS1028A, the only Linux-capable instantiation of ENETC thus
far.

Note that ENETC port 3 is PF 6. The TSN code did not care about this
because ENETC port 3 does not support TSN, so the wrong mapping done by
enetc_get_port for PF 6 could have never been hit.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.h     | 16 ++++++++++++++++
 drivers/net/ethernet/freescale/enetc/enetc_qos.c | 16 ++++++----------
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index 3de71669e317..08b283347d9c 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -237,6 +237,22 @@ static inline bool enetc_si_is_pf(struct enetc_si *si)
 	return !!(si->hw.port);
 }
 
+static inline int enetc_pf_to_port(struct pci_dev *pf_pdev)
+{
+	switch (pf_pdev->devfn) {
+	case 0:
+		return 0;
+	case 1:
+		return 1;
+	case 2:
+		return 2;
+	case 6:
+		return 3;
+	default:
+		return -1;
+	}
+}
+
 #define ENETC_MAX_NUM_TXQS	8
 #define ENETC_INT_NAME_MAX	(IFNAMSIZ + 8)
 
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
index cb7fa4bceaf2..af699f2ad095 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
@@ -455,11 +455,6 @@ static struct enetc_psfp epsfp = {
 
 static LIST_HEAD(enetc_block_cb_list);
 
-static inline int enetc_get_port(struct enetc_ndev_priv *priv)
-{
-	return priv->si->pdev->devfn & 0x7;
-}
-
 /* Stream Identity Entry Set Descriptor */
 static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv,
 				 struct enetc_streamid *sid,
@@ -504,7 +499,7 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv,
 
 	si_conf = &cbd.sid_set;
 	/* Only one port supported for one entry, set itself */
-	si_conf->iports = cpu_to_le32(1 << enetc_get_port(priv));
+	si_conf->iports = cpu_to_le32(1 << enetc_pf_to_port(priv->si->pdev));
 	si_conf->id_type = 1;
 	si_conf->oui[2] = 0x0;
 	si_conf->oui[1] = 0x80;
@@ -529,7 +524,7 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv,
 
 	si_conf->en = 0x80;
 	si_conf->stream_handle = cpu_to_le32(sid->handle);
-	si_conf->iports = cpu_to_le32(1 << enetc_get_port(priv));
+	si_conf->iports = cpu_to_le32(1 << enetc_pf_to_port(priv->si->pdev));
 	si_conf->id_type = sid->filtertype;
 	si_conf->oui[2] = 0x0;
 	si_conf->oui[1] = 0x80;
@@ -591,7 +586,8 @@ static int enetc_streamfilter_hw_set(struct enetc_ndev_priv *priv,
 	}
 
 	sfi_config->sg_inst_table_index = cpu_to_le16(sfi->gate_id);
-	sfi_config->input_ports = cpu_to_le32(1 << enetc_get_port(priv));
+	sfi_config->input_ports =
+		cpu_to_le32(1 << enetc_pf_to_port(priv->si->pdev));
 
 	/* The priority value which may be matched against the
 	 * frame’s priority value to determine a match for this entry.
@@ -1562,10 +1558,10 @@ int enetc_setup_tc_psfp(struct net_device *ndev, void *type_data)
 
 	switch (f->command) {
 	case FLOW_BLOCK_BIND:
-		set_bit(enetc_get_port(priv), &epsfp.dev_bitmap);
+		set_bit(enetc_pf_to_port(priv->si->pdev), &epsfp.dev_bitmap);
 		break;
 	case FLOW_BLOCK_UNBIND:
-		clear_bit(enetc_get_port(priv), &epsfp.dev_bitmap);
+		clear_bit(enetc_pf_to_port(priv->si->pdev), &epsfp.dev_bitmap);
 		if (!epsfp.dev_bitmap)
 			clean_psfp_all();
 		break;
-- 
cgit v1.2.3


From 4ac7acc67f29927975e2493a9f4ede0c631bb87a Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 17 Apr 2021 02:42:22 +0300
Subject: dt-bindings: net: fsl: enetc: add the IERB documentation

Mention the required compatible string and base address for the
Integrated Endpoint Register Block node.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/fsl-enetc.txt | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/fsl-enetc.txt b/Documentation/devicetree/bindings/net/fsl-enetc.txt
index b7034ccbc1bd..9b9a3f197e2d 100644
--- a/Documentation/devicetree/bindings/net/fsl-enetc.txt
+++ b/Documentation/devicetree/bindings/net/fsl-enetc.txt
@@ -102,3 +102,18 @@ Example:
 			full-duplex;
 		};
 	};
+
+* Integrated Endpoint Register Block bindings
+
+Optionally, the fsl_enetc driver can probe on the Integrated Endpoint Register
+Block, which preconfigures the FIFO limits for the ENETC ports. This is a node
+with the following properties:
+
+- reg		: Specifies the address in the SoC memory space.
+- compatible	: Must be "fsl,ls1028a-enetc-ierb".
+
+Example:
+	ierb@1f0800000 {
+		compatible = "fsl,ls1028a-enetc-ierb";
+		reg = <0x01 0xf0800000 0x0 0x10000>;
+	};
-- 
cgit v1.2.3


From e7d48e5fbf30f85c89d83683c3d2dbdaa8884103 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 17 Apr 2021 02:42:23 +0300
Subject: net: enetc: add a mini driver for the Integrated Endpoint Register
 Block

The NXP ENETC is a 4-port Ethernet controller which 'smells' to
operating systems like 4 distinct PCIe PFs with SR-IOV, each PF having
its own driver instance, but in fact there are some hardware resources
which are shared between all ports, like for example the 256 KB SRAM
FIFO between the MACs and the Host Transfer Agent which DMAs frames to
DRAM.

To hide the stuff that cannot be neatly exposed per port, the hardware
designers came up with this idea of having a dedicated register block
which is supposed to be populated by the bootloader, and contains
everything configuration-related: MAC addresses, FIFO partitioning, etc.

When a port is reset using PCIe Function Level Reset, its defaults are
transferred from the IERB configuration. Most of the time, the settings
made through the IERB are read-only in the port's memory space (if they
are even visible), so they cannot be modified at runtime.

Linux doesn't have any advanced FIFO partitioning requirements at all,
but when reading through the hardware manual, it became clear that, even
though there are many good 'recommendations' for default values, many of
them were not actually put in practice on LS1028A. So we end up with a
default configuration that:

(a) does not have enough TX and RX byte credits to support the max MTU
    of 9600 (which the Linux driver claims already) properly (at full speed)
(b) allows the FIFO to be overrun with RX traffic, potentially
    overwriting internal data structures.

The last part sounds a bit catastrophic, but it isn't. Frames are
supposed to transit the FIFO for a very short time, but they can
actually accumulate there under 2 conditions:

(a) there is very severe congestion on DRAM memory, or
(b) the RX rings visible to the operating system were configured for
    lossless operation, and they just ran out of free buffers to copy
    the frame to. This is what is used to put backpressure onto the MAC
    with flow control.

So since ENETC has not supported flow control thus far, RX FIFO overruns
were never seen with Linux. But with the addition of flow control, we
should configure some registers to prevent this from happening. What we
are trying to protect against are bad actors which continue to send us
traffic despite the fact that we have signaled a PAUSE condition. Of
course we can't be lossless in that case, but it is best to configure
the FIFO to do tail dropping rather than letting it overrun.

So in a nutshell, this driver is a fixup for all the IERB default values
that should have been but aren't.

The IERB configuration needs to be done _before_ the PFs are enabled.
So every PF searches for the presence of the "fsl,ls1028a-enetc-ierb"
node in the device tree, and if it finds it, it "registers" with the
IERB, which means that it requests the IERB to fix up its default
values. This is done through -EPROBE_DEFER. The IERB driver is part of
the fsl_enetc module, but is technically a platform driver, since the
IERB is a good old fashioned MMIO region, as opposed to ENETC ports
which pretend to be PCIe devices.

The driver was already configuring ENETC_PTXMBAR (FIFO allocation for
TX) because due to an omission, TXMBAR is a read/write register in the
PF memory space. But the manual is quite clear that the formula for this
should depend upon the TX byte credits (TXBCR). In turn, the TX byte
credits are only readable/writable through the IERB. So if we want to
ensure that the TXBCR register also has a value that is correct and in
line with TXMBAR, there is simply no way this can be done from the PF
driver, access to the IERB is needed.

I could have modified U-Boot to fix up the IERB values, but that is
quite undesirable, as old U-Boot versions are likely to be floating
around for quite some time from now.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/Kconfig      |   9 ++
 drivers/net/ethernet/freescale/enetc/Makefile     |   3 +
 drivers/net/ethernet/freescale/enetc/enetc_ierb.c | 155 ++++++++++++++++++++++
 drivers/net/ethernet/freescale/enetc/enetc_ierb.h |  20 +++
 drivers/net/ethernet/freescale/enetc/enetc_pf.c   |  35 ++++-
 5 files changed, 221 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/freescale/enetc/enetc_ierb.c
 create mode 100644 drivers/net/ethernet/freescale/enetc/enetc_ierb.h

diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig
index ab92382c399a..d88f60c2bb82 100644
--- a/drivers/net/ethernet/freescale/enetc/Kconfig
+++ b/drivers/net/ethernet/freescale/enetc/Kconfig
@@ -2,6 +2,7 @@
 config FSL_ENETC
 	tristate "ENETC PF driver"
 	depends on PCI && PCI_MSI
+	depends on FSL_ENETC_IERB || FSL_ENETC_IERB=n
 	select FSL_ENETC_MDIO
 	select PHYLINK
 	select PCS_LYNX
@@ -25,6 +26,14 @@ config FSL_ENETC_VF
 
 	  If compiled as module (M), the module name is fsl-enetc-vf.
 
+config FSL_ENETC_IERB
+	tristate "ENETC IERB driver"
+	help
+	  This driver configures the Integrated Endpoint Register Block on NXP
+	  LS1028A.
+
+	  If compiled as module (M), the module name is fsl-enetc-ierb.
+
 config FSL_ENETC_MDIO
 	tristate "ENETC MDIO driver"
 	depends on PCI && MDIO_DEVRES && MDIO_BUS
diff --git a/drivers/net/ethernet/freescale/enetc/Makefile b/drivers/net/ethernet/freescale/enetc/Makefile
index 74f7ac253b8b..a139f2e9d59f 100644
--- a/drivers/net/ethernet/freescale/enetc/Makefile
+++ b/drivers/net/ethernet/freescale/enetc/Makefile
@@ -11,6 +11,9 @@ obj-$(CONFIG_FSL_ENETC_VF) += fsl-enetc-vf.o
 fsl-enetc-vf-y := enetc_vf.o $(common-objs)
 fsl-enetc-vf-$(CONFIG_FSL_ENETC_QOS) += enetc_qos.o
 
+obj-$(CONFIG_FSL_ENETC_IERB) += fsl-enetc-ierb.o
+fsl-enetc-ierb-y := enetc_ierb.o
+
 obj-$(CONFIG_FSL_ENETC_MDIO) += fsl-enetc-mdio.o
 fsl-enetc-mdio-y := enetc_pci_mdio.o enetc_mdio.o
 
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ierb.c b/drivers/net/ethernet/freescale/enetc/enetc_ierb.c
new file mode 100644
index 000000000000..8b356c485507
--- /dev/null
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ierb.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/* Copyright 2021 NXP Semiconductors
+ *
+ * The Integrated Endpoint Register Block (IERB) is configured by pre-boot
+ * software and is supposed to be to ENETC what a NVRAM is to a 'real' PCIe
+ * card. Upon FLR, values from the IERB are transferred to the ENETC PFs, and
+ * are read-only in the PF memory space.
+ *
+ * This driver fixes up the power-on reset values for the ENETC shared FIFO,
+ * such that the TX and RX allocations are sufficient for jumbo frames, and
+ * that intelligent FIFO dropping is enabled before the internal data
+ * structures are corrupted.
+ *
+ * Even though not all ports might be used on a given board, we are not
+ * concerned with partitioning the FIFO, because the default values configure
+ * no strict reservations, so the entire FIFO can be used by the RX of a single
+ * port, or the TX of a single port.
+ */
+
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include "enetc.h"
+#include "enetc_ierb.h"
+
+/* IERB registers */
+#define ENETC_IERB_TXMBAR(port)			(((port) * 0x100) + 0x8080)
+#define ENETC_IERB_RXMBER(port)			(((port) * 0x100) + 0x8090)
+#define ENETC_IERB_RXMBLR(port)			(((port) * 0x100) + 0x8094)
+#define ENETC_IERB_RXBCR(port)			(((port) * 0x100) + 0x80a0)
+#define ENETC_IERB_TXBCR(port)			(((port) * 0x100) + 0x80a8)
+#define ENETC_IERB_FMBDTR			0xa000
+
+#define ENETC_RESERVED_FOR_ICM			1024
+
+struct enetc_ierb {
+	void __iomem *regs;
+};
+
+static void enetc_ierb_write(struct enetc_ierb *ierb, u32 offset, u32 val)
+{
+	iowrite32(val, ierb->regs + offset);
+}
+
+int enetc_ierb_register_pf(struct platform_device *pdev,
+			   struct pci_dev *pf_pdev)
+{
+	struct enetc_ierb *ierb = platform_get_drvdata(pdev);
+	int port = enetc_pf_to_port(pf_pdev);
+	u16 tx_credit, rx_credit, tx_alloc;
+
+	if (port < 0)
+		return -ENODEV;
+
+	if (!ierb)
+		return -EPROBE_DEFER;
+
+	/* By default, it is recommended to set the Host Transfer Agent
+	 * per port transmit byte credit to "1000 + max_frame_size/2".
+	 * The power-on reset value (1800 bytes) is rounded up to the nearest
+	 * 100 assuming a maximum frame size of 1536 bytes.
+	 */
+	tx_credit = roundup(1000 + ENETC_MAC_MAXFRM_SIZE / 2, 100);
+
+	/* Internal memory allocated for transmit buffering is guaranteed but
+	 * not reserved; i.e. if the total transmit allocation is not used,
+	 * then the unused portion is not left idle, it can be used for receive
+	 * buffering but it will be reclaimed, if required, from receive by
+	 * intelligently dropping already stored receive frames in the internal
+	 * memory to ensure that the transmit allocation is respected.
+	 *
+	 * PaTXMBAR must be set to a value larger than
+	 *     PaTXBCR + 2 * max_frame_size + 32
+	 * if frame preemption is not enabled, or to
+	 *     2 * PaTXBCR + 2 * p_max_frame_size (pMAC maximum frame size) +
+	 *     2 * np_max_frame_size (eMAC maximum frame size) + 64
+	 * if frame preemption is enabled.
+	 */
+	tx_alloc = roundup(2 * tx_credit + 4 * ENETC_MAC_MAXFRM_SIZE + 64, 16);
+
+	/* Initial credits, in units of 8 bytes, to the Ingress Congestion
+	 * Manager for the maximum amount of bytes the port is allocated for
+	 * pending traffic.
+	 * It is recommended to set the initial credits to 2 times the maximum
+	 * frame size (2 frames of maximum size).
+	 */
+	rx_credit = DIV_ROUND_UP(ENETC_MAC_MAXFRM_SIZE * 2, 8);
+
+	enetc_ierb_write(ierb, ENETC_IERB_TXBCR(port), tx_credit);
+	enetc_ierb_write(ierb, ENETC_IERB_TXMBAR(port), tx_alloc);
+	enetc_ierb_write(ierb, ENETC_IERB_RXBCR(port), rx_credit);
+
+	return 0;
+}
+EXPORT_SYMBOL(enetc_ierb_register_pf);
+
+static int enetc_ierb_probe(struct platform_device *pdev)
+{
+	struct enetc_ierb *ierb;
+	struct resource *res;
+	void __iomem *regs;
+
+	ierb = devm_kzalloc(&pdev->dev, sizeof(*ierb), GFP_KERNEL);
+	if (!ierb)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(regs))
+		return PTR_ERR(regs);
+
+	ierb->regs = regs;
+
+	/* Free buffer depletion threshold in bytes.
+	 * This sets the minimum amount of free buffer memory that should be
+	 * maintained in the datapath sub system, and when the amount of free
+	 * buffer memory falls below this threshold, a depletion indication is
+	 * asserted, which may trigger "intelligent drop" frame releases from
+	 * the ingress queues in the ICM.
+	 * It is recommended to set the free buffer depletion threshold to 1024
+	 * bytes, since the ICM needs some FIFO memory for its own use.
+	 */
+	enetc_ierb_write(ierb, ENETC_IERB_FMBDTR, ENETC_RESERVED_FOR_ICM);
+
+	platform_set_drvdata(pdev, ierb);
+
+	return 0;
+}
+
+static int enetc_ierb_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static const struct of_device_id enetc_ierb_match[] = {
+	{ .compatible = "fsl,ls1028a-enetc-ierb", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, enetc_ierb_match);
+
+static struct platform_driver enetc_ierb_driver = {
+	.driver = {
+		.name = "fsl-enetc-ierb",
+		.of_match_table = enetc_ierb_match,
+	},
+	.probe = enetc_ierb_probe,
+	.remove = enetc_ierb_remove,
+};
+
+module_platform_driver(enetc_ierb_driver);
+
+MODULE_DESCRIPTION("NXP ENETC IERB");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ierb.h b/drivers/net/ethernet/freescale/enetc/enetc_ierb.h
new file mode 100644
index 000000000000..b3b774e0998a
--- /dev/null
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ierb.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/* Copyright 2021 NXP Semiconductors */
+
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+
+#if IS_ENABLED(CONFIG_FSL_ENETC_IERB)
+
+int enetc_ierb_register_pf(struct platform_device *pdev,
+			   struct pci_dev *pf_pdev);
+
+#else
+
+static inline int enetc_ierb_register_pf(struct platform_device *pdev,
+					 struct pci_dev *pf_pdev)
+{
+	return -EOPNOTSUPP;
+}
+
+#endif
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index 30b22b71630e..7ca17bb024eb 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -4,8 +4,10 @@
 #include <linux/mdio.h>
 #include <linux/module.h>
 #include <linux/fsl/enetc_mdio.h>
+#include <linux/of_platform.h>
 #include <linux/of_mdio.h>
 #include <linux/of_net.h>
+#include "enetc_ierb.h"
 #include "enetc_pf.h"
 
 #define ENETC_DRV_NAME_STR "ENETC PF driver"
@@ -518,7 +520,6 @@ static void enetc_configure_port_mac(struct enetc_hw *hw)
 		      ENETC_SET_MAXFRM(ENETC_RX_MAXFRM_SIZE));
 
 	enetc_port_wr(hw, ENETC_PTCMSDUR(0), ENETC_MAC_MAXFRM_SIZE);
-	enetc_port_wr(hw, ENETC_PTXMBAR, 2 * ENETC_MAC_MAXFRM_SIZE);
 
 	enetc_port_wr(hw, ENETC_PM0_CMD_CFG, ENETC_PM0_CMD_PHY_TX_EN |
 		      ENETC_PM0_CMD_TXP	| ENETC_PM0_PROMISC);
@@ -1115,6 +1116,30 @@ static int enetc_init_port_rss_memory(struct enetc_si *si)
 	return err;
 }
 
+static int enetc_pf_register_with_ierb(struct pci_dev *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	struct platform_device *ierb_pdev;
+	struct device_node *ierb_node;
+
+	/* Don't register with the IERB if the PF itself is disabled */
+	if (!node || !of_device_is_available(node))
+		return 0;
+
+	ierb_node = of_find_compatible_node(NULL, NULL,
+					    "fsl,ls1028a-enetc-ierb");
+	if (!ierb_node || !of_device_is_available(ierb_node))
+		return -ENODEV;
+
+	ierb_pdev = of_find_device_by_node(ierb_node);
+	of_node_put(ierb_node);
+
+	if (!ierb_pdev)
+		return -EPROBE_DEFER;
+
+	return enetc_ierb_register_pf(ierb_pdev, pdev);
+}
+
 static int enetc_pf_probe(struct pci_dev *pdev,
 			  const struct pci_device_id *ent)
 {
@@ -1125,6 +1150,14 @@ static int enetc_pf_probe(struct pci_dev *pdev,
 	struct enetc_pf *pf;
 	int err;
 
+	err = enetc_pf_register_with_ierb(pdev);
+	if (err == -EPROBE_DEFER)
+		return err;
+	if (err)
+		dev_warn(&pdev->dev,
+			 "Could not register with IERB driver: %pe, please update the device tree\n",
+			 ERR_PTR(err));
+
 	err = enetc_pci_probe(pdev, KBUILD_MODNAME, sizeof(*pf));
 	if (err) {
 		dev_err(&pdev->dev, "PCI probing failed\n");
-- 
cgit v1.2.3


From b764dc6cc1ba8b82d844bbcfe97e1d432a2dca5b Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 17 Apr 2021 02:42:24 +0300
Subject: arm64: dts: ls1028a: declare the Integrated Endpoint Register Block
 node

Add a node describing the address in the SoC memory space for the IERB.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
index 262fbad8f0ec..79e7c4909a6a 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
@@ -1116,6 +1116,12 @@
 			};
 		};
 
+		/* Integrated Endpoint Register Block */
+		ierb@1f0800000 {
+			compatible = "fsl,ls1028a-enetc-ierb";
+			reg = <0x01 0xf0800000 0x0 0x10000>;
+		};
+
 		rcpm: power-controller@1e34040 {
 			compatible = "fsl,ls1028a-rcpm", "fsl,qoriq-rcpm-2.1+";
 			reg = <0x0 0x1e34040 0x0 0x1c>;
-- 
cgit v1.2.3


From a8648887880f90137f0893aeb1a0abef30858c01 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 17 Apr 2021 02:42:25 +0300
Subject: net: enetc: add support for flow control

In the ENETC receive path, a frame received by the MAC is first stored
in a 256KB 'FIFO' memory, then transferred to DRAM when enqueuing it to
the RX ring. The FIFO is a shared resource for all ENETC ports, but
every port keeps track of its own memory utilization, on RX and on TX.

There is a setting for RX rings through which they can either operate in
'lossy' mode (where the lack of a free buffer causes an immediate
discard of the frame) or in 'lossless' mode (where the lack of a free
buffer in the ring makes the frame stay longer in the FIFO).

In turn, when the memory utilization of the FIFO exceeds a certain
margin, the MAC can be configured to emit PAUSE frames.

There is enough FIFO memory to buffer up to 3 MTU-sized frames per RX
port while not jeopardizing the other use cases (jumbo frames), and
also not consume bytes from the port TX allocations. Also, 3 MTU-sized
frames worth of memory is enough to ensure zero loss for 64 byte packets
at 1G line rate.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/freescale/enetc/enetc_ethtool.c   | 18 +++++++
 drivers/net/ethernet/freescale/enetc/enetc_hw.h    |  9 ++++
 drivers/net/ethernet/freescale/enetc/enetc_pf.c    | 60 +++++++++++++++++++++-
 3 files changed, 85 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
index 49835e878bbb..ebccaf02411c 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
@@ -708,6 +708,22 @@ static int enetc_set_wol(struct net_device *dev,
 	return ret;
 }
 
+static void enetc_get_pauseparam(struct net_device *dev,
+				 struct ethtool_pauseparam *pause)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(dev);
+
+	phylink_ethtool_get_pauseparam(priv->phylink, pause);
+}
+
+static int enetc_set_pauseparam(struct net_device *dev,
+				struct ethtool_pauseparam *pause)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(dev);
+
+	return phylink_ethtool_set_pauseparam(priv->phylink, pause);
+}
+
 static int enetc_get_link_ksettings(struct net_device *dev,
 				    struct ethtool_link_ksettings *cmd)
 {
@@ -754,6 +770,8 @@ static const struct ethtool_ops enetc_pf_ethtool_ops = {
 	.get_ts_info = enetc_get_ts_info,
 	.get_wol = enetc_get_wol,
 	.set_wol = enetc_set_wol,
+	.get_pauseparam = enetc_get_pauseparam,
+	.set_pauseparam = enetc_set_pauseparam,
 };
 
 static const struct ethtool_ops enetc_vf_ethtool_ops = {
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
index 04ac7fc23ead..0f5f081a5baf 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
@@ -109,6 +109,7 @@ enum enetc_bdr_type {TX, RX};
 /* RX BDR reg offsets */
 #define ENETC_RBMR	0
 #define ENETC_RBMR_BDS	BIT(2)
+#define ENETC_RBMR_CM	BIT(4)
 #define ENETC_RBMR_VTE	BIT(5)
 #define ENETC_RBMR_EN	BIT(31)
 #define ENETC_RBSR	0x4
@@ -180,6 +181,8 @@ enum enetc_bdr_type {TX, RX};
 #define ENETC_PSIVLANR(n)	(0x0240 + (n) * 4) /* n = SI index */
 #define ENETC_PSIVLAN_EN	BIT(31)
 #define ENETC_PSIVLAN_SET_QOS(val)	((u32)(val) << 12)
+#define ENETC_PPAUONTR		0x0410
+#define ENETC_PPAUOFFTR		0x0414
 #define ENETC_PTXMBAR		0x0608
 #define ENETC_PCAPR0		0x0900
 #define ENETC_PCAPR0_RXBDR(val)	((val) >> 24)
@@ -227,6 +230,7 @@ enum enetc_bdr_type {TX, RX};
 #define ENETC_PM0_TX_EN		BIT(0)
 #define ENETC_PM0_RX_EN		BIT(1)
 #define ENETC_PM0_PROMISC	BIT(4)
+#define ENETC_PM0_PAUSE_IGN	BIT(8)
 #define ENETC_PM0_CMD_XGLP	BIT(10)
 #define ENETC_PM0_CMD_TXP	BIT(11)
 #define ENETC_PM0_CMD_PHY_TX_EN	BIT(15)
@@ -239,6 +243,11 @@ enum enetc_bdr_type {TX, RX};
 
 #define ENETC_PM_IMDIO_BASE	0x8030
 
+#define ENETC_PM0_PAUSE_QUANTA	0x8054
+#define ENETC_PM0_PAUSE_THRESH	0x8064
+#define ENETC_PM1_PAUSE_QUANTA	0x9054
+#define ENETC_PM1_PAUSE_THRESH	0x9064
+
 #define ENETC_PM0_SINGLE_STEP		0x80c0
 #define ENETC_PM1_SINGLE_STEP		0x90c0
 #define ENETC_PM0_SINGLE_STEP_CH	BIT(7)
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index 7ca17bb024eb..31274325159a 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -1014,7 +1014,12 @@ static void enetc_pl_mac_link_up(struct phylink_config *config,
 				 int duplex, bool tx_pause, bool rx_pause)
 {
 	struct enetc_pf *pf = phylink_to_enetc_pf(config);
+	u32 pause_off_thresh = 0, pause_on_thresh = 0;
+	u32 init_quanta = 0, refresh_quanta = 0;
+	struct enetc_hw *hw = &pf->si->hw;
 	struct enetc_ndev_priv *priv;
+	u32 rbmr, cmd_cfg;
+	int idx;
 
 	priv = netdev_priv(pf->si->ndev);
 	if (priv->active_offloads & ENETC_F_QBV)
@@ -1022,9 +1027,60 @@ static void enetc_pl_mac_link_up(struct phylink_config *config,
 
 	if (!phylink_autoneg_inband(mode) &&
 	    phy_interface_mode_is_rgmii(interface))
-		enetc_force_rgmii_mac(&pf->si->hw, speed, duplex);
+		enetc_force_rgmii_mac(hw, speed, duplex);
+
+	/* Flow control */
+	for (idx = 0; idx < priv->num_rx_rings; idx++) {
+		rbmr = enetc_rxbdr_rd(hw, idx, ENETC_RBMR);
+
+		if (tx_pause)
+			rbmr |= ENETC_RBMR_CM;
+		else
+			rbmr &= ~ENETC_RBMR_CM;
+
+		enetc_rxbdr_wr(hw, idx, ENETC_RBMR, rbmr);
+	}
+
+	if (tx_pause) {
+		/* When the port first enters congestion, send a PAUSE request
+		 * with the maximum number of quanta. When the port exits
+		 * congestion, it will automatically send a PAUSE frame with
+		 * zero quanta.
+		 */
+		init_quanta = 0xffff;
+
+		/* Also, set up the refresh timer to send follow-up PAUSE
+		 * frames at half the quanta value, in case the congestion
+		 * condition persists.
+		 */
+		refresh_quanta = 0xffff / 2;
+
+		/* Start emitting PAUSE frames when 3 large frames (or more
+		 * smaller frames) have accumulated in the FIFO waiting to be
+		 * DMAed to the RX ring.
+		 */
+		pause_on_thresh = 3 * ENETC_MAC_MAXFRM_SIZE;
+		pause_off_thresh = 1 * ENETC_MAC_MAXFRM_SIZE;
+	}
+
+	enetc_port_wr(hw, ENETC_PM0_PAUSE_QUANTA, init_quanta);
+	enetc_port_wr(hw, ENETC_PM1_PAUSE_QUANTA, init_quanta);
+	enetc_port_wr(hw, ENETC_PM0_PAUSE_THRESH, refresh_quanta);
+	enetc_port_wr(hw, ENETC_PM1_PAUSE_THRESH, refresh_quanta);
+	enetc_port_wr(hw, ENETC_PPAUONTR, pause_on_thresh);
+	enetc_port_wr(hw, ENETC_PPAUOFFTR, pause_off_thresh);
+
+	cmd_cfg = enetc_port_rd(hw, ENETC_PM0_CMD_CFG);
+
+	if (rx_pause)
+		cmd_cfg &= ~ENETC_PM0_PAUSE_IGN;
+	else
+		cmd_cfg |= ENETC_PM0_PAUSE_IGN;
+
+	enetc_port_wr(hw, ENETC_PM0_CMD_CFG, cmd_cfg);
+	enetc_port_wr(hw, ENETC_PM1_CMD_CFG, cmd_cfg);
 
-	enetc_mac_enable(&pf->si->hw, true);
+	enetc_mac_enable(hw, true);
 }
 
 static void enetc_pl_mac_link_down(struct phylink_config *config,
-- 
cgit v1.2.3


From 46fd4471615c1bff9d87c411140807762c25667a Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 16 Apr 2021 23:55:54 -0700
Subject: net: xilinx: drivers need/depend on HAS_IOMEM

kernel test robot reports build errors in 3 Xilinx ethernet drivers.
They all use ioremap functions that are only available when HAS_IOMEM
is set/enabled. If it is not enabled, they all have build errors,
so make these 3 drivers depend on HAS_IOMEM.

ld: drivers/net/ethernet/xilinx/xilinx_emaclite.o: in function `xemaclite_of_probe':
xilinx_emaclite.c:(.text+0x9fc): undefined reference to `devm_ioremap_resource'

ld: drivers/net/ethernet/xilinx/xilinx_axienet_main.o: in function `axienet_probe':
xilinx_axienet_main.c:(.text+0x942): undefined reference to `devm_ioremap_resource'

ld: drivers/net/ethernet/xilinx/ll_temac_main.o: in function `temac_probe':
ll_temac_main.c:(.text+0x1283): undefined reference to `devm_platform_ioremap_resource_byname'
ld: ll_temac_main.c:(.text+0x13ad): undefined reference to `devm_of_iomap'
ld: ll_temac_main.c:(.text+0x162e): undefined reference to `devm_platform_ioremap_resource'

Fixes: 8a3b7a252dca ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kernel test robot <lkp@intel.com>
Cc: Radhey Shyam Pandey <radhey.shyam.pandey@xilinx.com>
Cc: Gary Guo <gary@garyguo.net>
Cc: Zhang Changzhong <zhangchangzhong@huawei.com>
Cc: Andre Przywara <andre.przywara@arm.com>
Cc: stable@vger.kernel.org
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: netdev@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/xilinx/Kconfig | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/xilinx/Kconfig b/drivers/net/ethernet/xilinx/Kconfig
index c6eb7f2368aa..911b5ef9e680 100644
--- a/drivers/net/ethernet/xilinx/Kconfig
+++ b/drivers/net/ethernet/xilinx/Kconfig
@@ -18,12 +18,14 @@ if NET_VENDOR_XILINX
 
 config XILINX_EMACLITE
 	tristate "Xilinx 10/100 Ethernet Lite support"
+	depends on HAS_IOMEM
 	select PHYLIB
 	help
 	  This driver supports the 10/100 Ethernet Lite from Xilinx.
 
 config XILINX_AXI_EMAC
 	tristate "Xilinx 10/100/1000 AXI Ethernet support"
+	depends on HAS_IOMEM
 	select PHYLINK
 	help
 	  This driver supports the 10/100/1000 Ethernet from Xilinx for the
@@ -31,6 +33,7 @@ config XILINX_AXI_EMAC
 
 config XILINX_LL_TEMAC
 	tristate "Xilinx LL TEMAC (LocalLink Tri-mode Ethernet MAC) driver"
+	depends on HAS_IOMEM
 	select PHYLIB
 	help
 	  This driver supports the Xilinx 10/100/1000 LocalLink TEMAC
-- 
cgit v1.2.3


From 1c5a2ba67989c01b8aeda81969b7a4a3702c51b5 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Sat, 17 Apr 2021 15:09:22 +0800
Subject: net: hns3: remove a duplicate pf reset counting

When enter suspend mode the counter of pf reset will be increased
twice, since both hclge_prepare_general() and hclge_prepare_wait()
increase this counter. So remove the duplicate counting in
hclge_prepare_general().

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index dc06986055e4..c296ab64fb0a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -11126,8 +11126,6 @@ retry:
 
 	if (hdev->reset_type == HNAE3_FLR_RESET)
 		hdev->rst_stats.flr_rst_cnt++;
-	else if (hdev->reset_type == HNAE3_FUNC_RESET)
-		hdev->rst_stats.pf_rst_cnt++;
 }
 
 static void hclge_reset_done(struct hnae3_ae_dev *ae_dev)
-- 
cgit v1.2.3


From 8ed64dbe0bdf39479772896b2b4e5cbbdf89f086 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Sat, 17 Apr 2021 15:09:23 +0800
Subject: net: hns3: cleanup inappropriate spaces in struct hlcgevf_tqp_stats

Modify some inappropriate spaces in comments of struct
hlcgevf_tqp_stats.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index 956095b89a1b..265c9b0b4728 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -177,9 +177,9 @@ struct hclgevf_hw {
 
 /* TQP stats */
 struct hlcgevf_tqp_stats {
-	/* query_tqp_tx_queue_statistics ,opcode id:  0x0B03 */
+	/* query_tqp_tx_queue_statistics, opcode id: 0x0B03 */
 	u64 rcb_tx_ring_pktnum_rcd; /* 32bit */
-	/* query_tqp_rx_queue_statistics ,opcode id:  0x0B13 */
+	/* query_tqp_rx_queue_statistics, opcode id: 0x0B13 */
 	u64 rcb_rx_ring_pktnum_rcd; /* 32bit */
 };
 
-- 
cgit v1.2.3


From e407efdd94cde88c8b84588cdb5ab31dc97589b0 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Sat, 17 Apr 2021 15:09:24 +0800
Subject: net: hns3: change the value of the SEPARATOR_VALUE macro in
 hclgevf_main.c

The SEPARATOR_VALUE macro is used as separator when getting
the register value, but the value of this macro is different
between pf and vf, it is a bit confusing for the user, so
synchronize the value of vf with pf.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 07066c416d42..0db51ef15ef6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -3646,7 +3646,7 @@ static void hclgevf_get_link_mode(struct hnae3_handle *handle,
 }
 
 #define MAX_SEPARATE_NUM	4
-#define SEPARATOR_VALUE		0xFFFFFFFF
+#define SEPARATOR_VALUE		0xFDFCFBFA
 #define REG_NUM_PER_LINE	4
 #define REG_LEN_PER_LINE	(REG_NUM_PER_LINE * sizeof(u32))
 
-- 
cgit v1.2.3


From 6ecaf81d4ac6365f9284f9d68d74f7c209e74f98 Mon Sep 17 00:00:00 2001
From: DENG Qingfang <dqfext@gmail.com>
Date: Sat, 17 Apr 2021 15:29:04 +0800
Subject: net: ethernet: mediatek: fix a typo bug in flow offloading

Issue was traffic problems after a while with increased ping times if
flow offload is active. It turns out that key_offset with cookie is
needed in rhashtable_params but was re-assigned to head_offset.
Fix the assignment.

Fixes: 502e84e2382d ("net: ethernet: mtk_eth_soc: add flow offloading support")
Signed-off-by: DENG Qingfang <dqfext@gmail.com>
Tested-by: Frank Wunderlich <frank-w@public-files.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_ppe_offload.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
index 4975106fbc42..f47f319f3ae0 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
@@ -43,7 +43,7 @@ struct mtk_flow_entry {
 
 static const struct rhashtable_params mtk_flow_ht_params = {
 	.head_offset = offsetof(struct mtk_flow_entry, node),
-	.head_offset = offsetof(struct mtk_flow_entry, cookie),
+	.key_offset = offsetof(struct mtk_flow_entry, cookie),
 	.key_len = sizeof(unsigned long),
 	.automatic_shrinking = true,
 };
-- 
cgit v1.2.3


From c6400e3fc3fa821a26a58cf867331e0877a4c56b Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 17 Apr 2021 14:38:07 +0300
Subject: netlink: simplify nl_set_extack_cookie_u64(),
 nl_set_extack_cookie_u32()

Taking address of a function argument directly works just fine.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 0bcf98098c5a..61b1c7fcc401 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -129,23 +129,19 @@ struct netlink_ext_ack {
 static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack,
 					    u64 cookie)
 {
-	u64 __cookie = cookie;
-
 	if (!extack)
 		return;
-	memcpy(extack->cookie, &__cookie, sizeof(__cookie));
-	extack->cookie_len = sizeof(__cookie);
+	memcpy(extack->cookie, &cookie, sizeof(cookie));
+	extack->cookie_len = sizeof(cookie);
 }
 
 static inline void nl_set_extack_cookie_u32(struct netlink_ext_ack *extack,
 					    u32 cookie)
 {
-	u32 __cookie = cookie;
-
 	if (!extack)
 		return;
-	memcpy(extack->cookie, &__cookie, sizeof(__cookie));
-	extack->cookie_len = sizeof(__cookie);
+	memcpy(extack->cookie, &cookie, sizeof(cookie));
+	extack->cookie_len = sizeof(cookie);
 }
 
 void netlink_kernel_release(struct sock *sk);
-- 
cgit v1.2.3


From 8d892d60941b00c86d2029c8a99db24ab4979673 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 18 Apr 2021 20:28:53 +0200
Subject: net: ethernet: ixp4xx: Set the DMA masks explicitly

The former fix only papered over the actual problem: the
ethernet core expects the netdev .dev member to have the
proper DMA masks set, or there will be BUG_ON() triggered
in kernel/dma/mapping.c.

Fix this by simply copying dma_mask and dma_mask_coherent
from the parent device.

Fixes: e45d0fad4a5f ("net: ethernet: ixp4xx: Use parent dev for DMA pool")
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/xscale/ixp4xx_eth.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c
index 0152f1e70783..9defaa21a1a9 100644
--- a/drivers/net/ethernet/xscale/ixp4xx_eth.c
+++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c
@@ -1085,7 +1085,7 @@ static int init_queues(struct port *port)
 	int i;
 
 	if (!ports_open) {
-		dma_pool = dma_pool_create(DRV_NAME, port->netdev->dev.parent,
+		dma_pool = dma_pool_create(DRV_NAME, &port->netdev->dev,
 					   POOL_ALLOC_SIZE, 32, 0);
 		if (!dma_pool)
 			return -ENOMEM;
@@ -1435,6 +1435,9 @@ static int ixp4xx_eth_probe(struct platform_device *pdev)
 	ndev->netdev_ops = &ixp4xx_netdev_ops;
 	ndev->ethtool_ops = &ixp4xx_ethtool_ops;
 	ndev->tx_queue_len = 100;
+	/* Inherit the DMA masks from the platform device */
+	ndev->dev.dma_mask = dev->dma_mask;
+	ndev->dev.coherent_dma_mask = dev->coherent_dma_mask;
 
 	netif_napi_add(ndev, &port->napi, eth_poll, NAPI_WEIGHT);
 
-- 
cgit v1.2.3


From 0e389028ad75412ff624b304913bba14f8d46ec4 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 18 Apr 2021 23:11:43 +0200
Subject: net: ethernet: mtk_eth_soc: fix undefined reference to
 `dsa_port_from_netdev'

Caused by:

 CONFIG_NET_DSA=m
 CONFIG_NET_MEDIATEK_SOC=y

mtk_ppe_offload.c:undefined reference to `dsa_port_from_netdev'

Fixes: 502e84e2382d ("net: ethernet: mtk_eth_soc: add flow offloading support")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mediatek/Kconfig b/drivers/net/ethernet/mediatek/Kconfig
index 3362b148de23..08c2e446d3d5 100644
--- a/drivers/net/ethernet/mediatek/Kconfig
+++ b/drivers/net/ethernet/mediatek/Kconfig
@@ -9,6 +9,7 @@ if NET_VENDOR_MEDIATEK
 
 config NET_MEDIATEK_SOC
 	tristate "MediaTek SoC Gigabit Ethernet support"
+	depends on NET_DSA || !NET_DSA
 	select PHYLINK
 	help
 	  This driver supports the gigabit ethernet MACs in the
-- 
cgit v1.2.3


From 014d029876b23f5963c792c4622eb0eaea930f19 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 18 Apr 2021 23:11:44 +0200
Subject: net: ethernet: mtk_eth_soc: missing mutex

Patch 2ed37183abb7 ("netfilter: flowtable: separate replace, destroy and
stats to different workqueues") splits the workqueue per event type. Add
a mutex to serialize updates.

Fixes: 502e84e2382d ("net: ethernet: mtk_eth_soc: add flow offloading support")
Reported-by: Frank Wunderlich <frank-w@public-files.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_ppe_offload.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
index f47f319f3ae0..b35b60adc033 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
@@ -391,6 +391,8 @@ mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f)
 	return 0;
 }
 
+static DEFINE_MUTEX(mtk_flow_offload_mutex);
+
 static int
 mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
 {
@@ -398,6 +400,7 @@ mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_pri
 	struct net_device *dev = cb_priv;
 	struct mtk_mac *mac = netdev_priv(dev);
 	struct mtk_eth *eth = mac->hw;
+	int err;
 
 	if (!tc_can_offload(dev))
 		return -EOPNOTSUPP;
@@ -405,18 +408,24 @@ mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_pri
 	if (type != TC_SETUP_CLSFLOWER)
 		return -EOPNOTSUPP;
 
+	mutex_lock(&mtk_flow_offload_mutex);
 	switch (cls->command) {
 	case FLOW_CLS_REPLACE:
-		return mtk_flow_offload_replace(eth, cls);
+		err = mtk_flow_offload_replace(eth, cls);
+		break;
 	case FLOW_CLS_DESTROY:
-		return mtk_flow_offload_destroy(eth, cls);
+		err = mtk_flow_offload_destroy(eth, cls);
+		break;
 	case FLOW_CLS_STATS:
-		return mtk_flow_offload_stats(eth, cls);
+		err = mtk_flow_offload_stats(eth, cls);
+		break;
 	default:
-		return -EOPNOTSUPP;
+		err = -EOPNOTSUPP;
+		break;
 	}
+	mutex_unlock(&mtk_flow_offload_mutex);
 
-	return 0;
+	return err;
 }
 
 static int
-- 
cgit v1.2.3


From f5c2cb583abe8a5049a32c7b093e2852f344a3e9 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 18 Apr 2021 23:11:45 +0200
Subject: net: ethernet: mtk_eth_soc: handle VLAN pop action

Do not hit EOPNOTSUPP when flowtable offload provides a VLAN pop action.

Fixes: efce49dfe6a8 ("netfilter: flowtable: add vlan pop action offload support")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_ppe_offload.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
index b35b60adc033..b5f68f66d42a 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
@@ -232,6 +232,8 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
 			data.vlan.proto = act->vlan.proto;
 			data.vlan.num++;
 			break;
+		case FLOW_ACTION_VLAN_POP:
+			break;
 		case FLOW_ACTION_PPPOE_PUSH:
 			if (data.pppoe.num == 1)
 				return -EOPNOTSUPP;
-- 
cgit v1.2.3


From 89f9d5400b53bedbeb8f1d7854abe6f7412251da Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Date: Mon, 19 Apr 2021 00:19:39 +0200
Subject: net: korina: Fix MDIO functions

Fixed MDIO functions to work reliable and not just by accident.

Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/Kconfig  |  1 +
 drivers/net/ethernet/korina.c | 56 ++++++++++++++++++++++++++++---------------
 2 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index d46460c5b44d..172cae5dee4a 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -100,6 +100,7 @@ config JME
 config KORINA
 	tristate "Korina (IDT RC32434) Ethernet support"
 	depends on MIKROTIK_RB532
+	select MII
 	help
 	  If you have a Mikrotik RouterBoard 500 or IDT RC32434
 	  based system say Y. Otherwise say N.
diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index 925161959b9b..1b7e1c75ed9e 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -41,6 +41,7 @@
 #include <linux/types.h>
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
+#include <linux/iopoll.h>
 #include <linux/in.h>
 #include <linux/slab.h>
 #include <linux/string.h>
@@ -137,7 +138,6 @@ struct korina_private {
 	struct mii_if_info mii_if;
 	struct work_struct restart_task;
 	struct net_device *dev;
-	int phy_addr;
 };
 
 extern unsigned int idt_cpu_freq;
@@ -292,32 +292,48 @@ static int korina_send_packet(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 }
 
-static int mdio_read(struct net_device *dev, int mii_id, int reg)
+static int korina_mdio_wait(struct korina_private *lp)
+{
+	u32 value;
+
+	return readl_poll_timeout_atomic(&lp->eth_regs->miimind,
+					 value, value & ETH_MII_IND_BSY,
+					 1, 1000);
+}
+
+static int korina_mdio_read(struct net_device *dev, int phy, int reg)
 {
 	struct korina_private *lp = netdev_priv(dev);
 	int ret;
 
-	mii_id = ((lp->rx_irq == 0x2c ? 1 : 0) << 8);
+	ret = korina_mdio_wait(lp);
+	if (ret < 0)
+		return ret;
 
-	writel(0, &lp->eth_regs->miimcfg);
-	writel(0, &lp->eth_regs->miimcmd);
-	writel(mii_id | reg, &lp->eth_regs->miimaddr);
-	writel(ETH_MII_CMD_SCN, &lp->eth_regs->miimcmd);
+	writel(phy << 8 | reg, &lp->eth_regs->miimaddr);
+	writel(1, &lp->eth_regs->miimcmd);
 
-	ret = (int)(readl(&lp->eth_regs->miimrdd));
+	ret = korina_mdio_wait(lp);
+	if (ret < 0)
+		return ret;
+
+	if (readl(&lp->eth_regs->miimind) & ETH_MII_IND_NV)
+		return -EINVAL;
+
+	ret = readl(&lp->eth_regs->miimrdd);
+	writel(0, &lp->eth_regs->miimcmd);
 	return ret;
 }
 
-static void mdio_write(struct net_device *dev, int mii_id, int reg, int val)
+static void korina_mdio_write(struct net_device *dev, int phy, int reg, int val)
 {
 	struct korina_private *lp = netdev_priv(dev);
 
-	mii_id = ((lp->rx_irq == 0x2c ? 1 : 0) << 8);
+	if (korina_mdio_wait(lp))
+		return;
 
-	writel(0, &lp->eth_regs->miimcfg);
-	writel(1, &lp->eth_regs->miimcmd);
-	writel(mii_id | reg, &lp->eth_regs->miimaddr);
-	writel(ETH_MII_CMD_SCN, &lp->eth_regs->miimcmd);
+	writel(0, &lp->eth_regs->miimcmd);
+	writel(phy << 8 | reg, &lp->eth_regs->miimaddr);
 	writel(val, &lp->eth_regs->miimwtd);
 }
 
@@ -643,7 +659,7 @@ static void korina_check_media(struct net_device *dev, unsigned int init_media)
 {
 	struct korina_private *lp = netdev_priv(dev);
 
-	mii_check_media(&lp->mii_if, 0, init_media);
+	mii_check_media(&lp->mii_if, 1, init_media);
 
 	if (lp->mii_if.full_duplex)
 		writel(readl(&lp->eth_regs->ethmac2) | ETH_MAC2_FD,
@@ -869,12 +885,15 @@ static int korina_init(struct net_device *dev)
 	 * Clock independent setting */
 	writel(((idt_cpu_freq) / MII_CLOCK + 1) & ~1,
 			&lp->eth_regs->ethmcp);
+	writel(0, &lp->eth_regs->miimcfg);
 
 	/* don't transmit until fifo contains 48b */
 	writel(48, &lp->eth_regs->ethfifott);
 
 	writel(ETH_MAC1_RE, &lp->eth_regs->ethmac1);
 
+	korina_check_media(dev, 1);
+
 	napi_enable(&lp->napi);
 	netif_start_queue(dev);
 
@@ -1089,11 +1108,10 @@ static int korina_probe(struct platform_device *pdev)
 	dev->watchdog_timeo = TX_TIMEOUT;
 	netif_napi_add(dev, &lp->napi, korina_poll, NAPI_POLL_WEIGHT);
 
-	lp->phy_addr = (((lp->rx_irq == 0x2c? 1:0) << 8) | 0x05);
 	lp->mii_if.dev = dev;
-	lp->mii_if.mdio_read = mdio_read;
-	lp->mii_if.mdio_write = mdio_write;
-	lp->mii_if.phy_id = lp->phy_addr;
+	lp->mii_if.mdio_read = korina_mdio_read;
+	lp->mii_if.mdio_write = korina_mdio_write;
+	lp->mii_if.phy_id = 1;
 	lp->mii_if.phy_id_mask = 0x1f;
 	lp->mii_if.reg_num_mask = 0x1f;
 
-- 
cgit v1.2.3


From b4cd249a8cc017e0f7910baf6b7cdc282fe8c4e5 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Date: Mon, 19 Apr 2021 00:19:40 +0200
Subject: net: korina: Use devres functions

Simplify probe/remove code by using devm_ functions.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/korina.c | 64 ++++++++++++++-----------------------------
 1 file changed, 21 insertions(+), 43 deletions(-)

diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index 1b7e1c75ed9e..b56de01f6bb8 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -105,9 +105,9 @@ enum chain_status {
 
 /* Information that need to be kept for each board. */
 struct korina_private {
-	struct eth_regs *eth_regs;
-	struct dma_reg *rx_dma_regs;
-	struct dma_reg *tx_dma_regs;
+	struct eth_regs __iomem *eth_regs;
+	struct dma_reg __iomem *rx_dma_regs;
+	struct dma_reg __iomem *tx_dma_regs;
 	struct dma_desc *td_ring; /* transmit descriptor ring */
 	struct dma_desc *rd_ring; /* receive descriptor ring  */
 
@@ -1044,10 +1044,10 @@ static int korina_probe(struct platform_device *pdev)
 	struct korina_device *bif = platform_get_drvdata(pdev);
 	struct korina_private *lp;
 	struct net_device *dev;
-	struct resource *r;
+	void __iomem *p;
 	int rc;
 
-	dev = alloc_etherdev(sizeof(struct korina_private));
+	dev = devm_alloc_etherdev(&pdev->dev, sizeof(struct korina_private));
 	if (!dev)
 		return -ENOMEM;
 
@@ -1060,36 +1060,30 @@ static int korina_probe(struct platform_device *pdev)
 	lp->rx_irq = platform_get_irq_byname(pdev, "korina_rx");
 	lp->tx_irq = platform_get_irq_byname(pdev, "korina_tx");
 
-	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "korina_regs");
-	dev->base_addr = r->start;
-	lp->eth_regs = ioremap(r->start, resource_size(r));
-	if (!lp->eth_regs) {
+	p = devm_platform_ioremap_resource_byname(pdev, "korina_regs");
+	if (!p) {
 		printk(KERN_ERR DRV_NAME ": cannot remap registers\n");
-		rc = -ENXIO;
-		goto probe_err_out;
+		return -ENOMEM;
 	}
+	lp->eth_regs = p;
 
-	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "korina_dma_rx");
-	lp->rx_dma_regs = ioremap(r->start, resource_size(r));
-	if (!lp->rx_dma_regs) {
+	p = devm_platform_ioremap_resource_byname(pdev, "korina_dma_rx");
+	if (!p) {
 		printk(KERN_ERR DRV_NAME ": cannot remap Rx DMA registers\n");
-		rc = -ENXIO;
-		goto probe_err_dma_rx;
+		return -ENOMEM;
 	}
+	lp->rx_dma_regs = p;
 
-	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "korina_dma_tx");
-	lp->tx_dma_regs = ioremap(r->start, resource_size(r));
-	if (!lp->tx_dma_regs) {
+	p = devm_platform_ioremap_resource_byname(pdev, "korina_dma_tx");
+	if (!p) {
 		printk(KERN_ERR DRV_NAME ": cannot remap Tx DMA registers\n");
-		rc = -ENXIO;
-		goto probe_err_dma_tx;
+		return -ENOMEM;
 	}
+	lp->tx_dma_regs = p;
 
 	lp->td_ring = kmalloc(TD_RING_SIZE + RD_RING_SIZE, GFP_KERNEL);
-	if (!lp->td_ring) {
-		rc = -ENXIO;
-		goto probe_err_td_ring;
-	}
+	if (!lp->td_ring)
+		return -ENOMEM;
 
 	dma_cache_inv((unsigned long)(lp->td_ring),
 			TD_RING_SIZE + RD_RING_SIZE);
@@ -1119,7 +1113,8 @@ static int korina_probe(struct platform_device *pdev)
 	if (rc < 0) {
 		printk(KERN_ERR DRV_NAME
 			": cannot register net device: %d\n", rc);
-		goto probe_err_register;
+		kfree((struct dma_desc *)KSEG0ADDR(lp->td_ring));
+		return rc;
 	}
 	timer_setup(&lp->media_check_timer, korina_poll_media, 0);
 
@@ -1127,20 +1122,7 @@ static int korina_probe(struct platform_device *pdev)
 
 	printk(KERN_INFO "%s: " DRV_NAME "-" DRV_VERSION " " DRV_RELDATE "\n",
 			dev->name);
-out:
 	return rc;
-
-probe_err_register:
-	kfree((struct dma_desc *)KSEG0ADDR(lp->td_ring));
-probe_err_td_ring:
-	iounmap(lp->tx_dma_regs);
-probe_err_dma_tx:
-	iounmap(lp->rx_dma_regs);
-probe_err_dma_rx:
-	iounmap(lp->eth_regs);
-probe_err_out:
-	free_netdev(dev);
-	goto out;
 }
 
 static int korina_remove(struct platform_device *pdev)
@@ -1148,13 +1130,9 @@ static int korina_remove(struct platform_device *pdev)
 	struct korina_device *bif = platform_get_drvdata(pdev);
 	struct korina_private *lp = netdev_priv(bif->dev);
 
-	iounmap(lp->eth_regs);
-	iounmap(lp->rx_dma_regs);
-	iounmap(lp->tx_dma_regs);
 	kfree((struct dma_desc *)KSEG0ADDR(lp->td_ring));
 
 	unregister_netdev(bif->dev);
-	free_netdev(bif->dev);
 
 	return 0;
 }
-- 
cgit v1.2.3


From e42f10533d7c8a512399571804c59b0dc862eefe Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Date: Mon, 19 Apr 2021 00:19:41 +0200
Subject: net: korina: Remove not needed cache flushes

Descriptors are mapped uncached so there is no need to do any cache
handling for them.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/korina.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index b56de01f6bb8..c7abb4a8dd37 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -231,7 +231,6 @@ static int korina_send_packet(struct sk_buff *skb, struct net_device *dev)
 	dma_cache_wback((u32)skb->data, skb->len);
 
 	/* Setup the transmit descriptor. */
-	dma_cache_inv((u32) td, sizeof(*td));
 	td->ca = CPHYSADDR(skb->data);
 	chain_prev = (lp->tx_chain_tail - 1) & KORINA_TDS_MASK;
 	chain_next = (lp->tx_chain_tail + 1) & KORINA_TDS_MASK;
@@ -284,7 +283,6 @@ static int korina_send_packet(struct sk_buff *skb, struct net_device *dev)
 			lp->tx_chain_tail = chain_next;
 		}
 	}
-	dma_cache_wback((u32) td, sizeof(*td));
 
 	netif_trans_update(dev);
 	spin_unlock_irqrestore(&lp->lock, flags);
@@ -373,8 +371,6 @@ static int korina_rx(struct net_device *dev, int limit)
 	u32 devcs, pkt_len, dmas;
 	int count;
 
-	dma_cache_inv((u32)rd, sizeof(*rd));
-
 	for (count = 0; count < limit; count++) {
 		skb = lp->rx_skb[lp->rx_next_done];
 		skb_new = NULL;
@@ -453,7 +449,6 @@ next:
 			~DMA_DESC_COD;
 
 		lp->rx_next_done = (lp->rx_next_done + 1) & KORINA_RDS_MASK;
-		dma_cache_wback((u32)rd, sizeof(*rd));
 		rd = &lp->rd_ring[lp->rx_next_done];
 		writel(~DMA_STAT_DONE, &lp->rx_dma_regs->dmas);
 	}
@@ -468,7 +463,6 @@ next:
 		rd->devcs = 0;
 		skb = lp->rx_skb[lp->rx_next_done];
 		rd->ca = CPHYSADDR(skb->data);
-		dma_cache_wback((u32)rd, sizeof(*rd));
 		korina_chain_rx(lp, rd);
 	}
 
-- 
cgit v1.2.3


From 0fe632471aeb0b0b6cc9150c5c43780d58975988 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Date: Mon, 19 Apr 2021 00:19:42 +0200
Subject: net: korina: Remove nested helpers

Remove helpers, which are only used in one call site.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/korina.c | 28 +++-------------------------
 1 file changed, 3 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index c7abb4a8dd37..955fe3d3da06 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -142,12 +142,6 @@ struct korina_private {
 
 extern unsigned int idt_cpu_freq;
 
-static inline void korina_start_dma(struct dma_reg *ch, u32 dma_addr)
-{
-	writel(0, &ch->dmandptr);
-	writel(dma_addr, &ch->dmadptr);
-}
-
 static inline void korina_abort_dma(struct net_device *dev,
 					struct dma_reg *ch)
 {
@@ -164,11 +158,6 @@ static inline void korina_abort_dma(struct net_device *dev,
 	writel(0, &ch->dmandptr);
 }
 
-static inline void korina_chain_dma(struct dma_reg *ch, u32 dma_addr)
-{
-	writel(dma_addr, &ch->dmandptr);
-}
-
 static void korina_abort_tx(struct net_device *dev)
 {
 	struct korina_private *lp = netdev_priv(dev);
@@ -183,18 +172,6 @@ static void korina_abort_rx(struct net_device *dev)
 	korina_abort_dma(dev, lp->rx_dma_regs);
 }
 
-static void korina_start_rx(struct korina_private *lp,
-					struct dma_desc *rd)
-{
-	korina_start_dma(lp->rx_dma_regs, CPHYSADDR(rd));
-}
-
-static void korina_chain_rx(struct korina_private *lp,
-					struct dma_desc *rd)
-{
-	korina_chain_dma(lp->rx_dma_regs, CPHYSADDR(rd));
-}
-
 /* transmit packet */
 static int korina_send_packet(struct sk_buff *skb, struct net_device *dev)
 {
@@ -463,7 +440,7 @@ next:
 		rd->devcs = 0;
 		skb = lp->rx_skb[lp->rx_next_done];
 		rd->ca = CPHYSADDR(skb->data);
-		korina_chain_rx(lp, rd);
+		writel(CPHYSADDR(rd), &lp->rx_dma_regs->dmandptr);
 	}
 
 	return count;
@@ -840,7 +817,8 @@ static int korina_init(struct net_device *dev)
 
 	writel(0, &lp->rx_dma_regs->dmas);
 	/* Start Rx DMA */
-	korina_start_rx(lp, &lp->rd_ring[0]);
+	writel(0, &lp->rx_dma_regs->dmandptr);
+	writel(CPHYSADDR(&lp->rd_ring[0]), &lp->rx_dma_regs->dmadptr);
 
 	writel(readl(&lp->tx_dma_regs->dmasm) &
 			~(DMA_STAT_FINI | DMA_STAT_ERR),
-- 
cgit v1.2.3


From 0fc96939a97ffd4929b19a3e2d1b3858ab9efa8b Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Date: Mon, 19 Apr 2021 00:19:43 +0200
Subject: net: korina: Use DMA API

Instead of messing with MIPS specific macros use DMA API for mapping
descriptors and skbs.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/korina.c | 158 ++++++++++++++++++++++++++----------------
 1 file changed, 98 insertions(+), 60 deletions(-)

diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index 955fe3d3da06..44fad9e924ca 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -110,10 +110,15 @@ struct korina_private {
 	struct dma_reg __iomem *tx_dma_regs;
 	struct dma_desc *td_ring; /* transmit descriptor ring */
 	struct dma_desc *rd_ring; /* receive descriptor ring  */
+	dma_addr_t td_dma;
+	dma_addr_t rd_dma;
 
 	struct sk_buff *tx_skb[KORINA_NUM_TDS];
 	struct sk_buff *rx_skb[KORINA_NUM_RDS];
 
+	dma_addr_t rx_skb_dma[KORINA_NUM_RDS];
+	dma_addr_t tx_skb_dma[KORINA_NUM_TDS];
+
 	int rx_next_done;
 	int rx_chain_head;
 	int rx_chain_tail;
@@ -138,10 +143,21 @@ struct korina_private {
 	struct mii_if_info mii_if;
 	struct work_struct restart_task;
 	struct net_device *dev;
+	struct device *dmadev;
 };
 
 extern unsigned int idt_cpu_freq;
 
+static dma_addr_t korina_tx_dma(struct korina_private *lp, int idx)
+{
+	return lp->td_dma + (idx * sizeof(struct dma_desc));
+}
+
+static dma_addr_t korina_rx_dma(struct korina_private *lp, int idx)
+{
+	return lp->rd_dma + (idx * sizeof(struct dma_desc));
+}
+
 static inline void korina_abort_dma(struct net_device *dev,
 					struct dma_reg *ch)
 {
@@ -176,14 +192,17 @@ static void korina_abort_rx(struct net_device *dev)
 static int korina_send_packet(struct sk_buff *skb, struct net_device *dev)
 {
 	struct korina_private *lp = netdev_priv(dev);
-	unsigned long flags;
-	u32 length;
 	u32 chain_prev, chain_next;
+	unsigned long flags;
 	struct dma_desc *td;
+	dma_addr_t ca;
+	u32 length;
+	int idx;
 
 	spin_lock_irqsave(&lp->lock, flags);
 
-	td = &lp->td_ring[lp->tx_chain_tail];
+	idx = lp->tx_chain_tail;
+	td = &lp->td_ring[idx];
 
 	/* stop queue when full, drop pkts if queue already full */
 	if (lp->tx_count >= (KORINA_NUM_TDS - 2)) {
@@ -191,26 +210,26 @@ static int korina_send_packet(struct sk_buff *skb, struct net_device *dev)
 
 		if (lp->tx_count == (KORINA_NUM_TDS - 2))
 			netif_stop_queue(dev);
-		else {
-			dev->stats.tx_dropped++;
-			dev_kfree_skb_any(skb);
-			spin_unlock_irqrestore(&lp->lock, flags);
-
-			return NETDEV_TX_OK;
-		}
+		else
+			goto drop_packet;
 	}
 
 	lp->tx_count++;
 
-	lp->tx_skb[lp->tx_chain_tail] = skb;
+	lp->tx_skb[idx] = skb;
 
 	length = skb->len;
-	dma_cache_wback((u32)skb->data, skb->len);
 
 	/* Setup the transmit descriptor. */
-	td->ca = CPHYSADDR(skb->data);
-	chain_prev = (lp->tx_chain_tail - 1) & KORINA_TDS_MASK;
-	chain_next = (lp->tx_chain_tail + 1) & KORINA_TDS_MASK;
+	ca = dma_map_single(lp->dmadev, skb->data, length, DMA_TO_DEVICE);
+	if (dma_mapping_error(lp->dmadev, ca))
+		goto drop_packet;
+
+	lp->tx_skb_dma[idx] = ca;
+	td->ca = ca;
+
+	chain_prev = (idx - 1) & KORINA_TDS_MASK;
+	chain_next = (idx + 1) & KORINA_TDS_MASK;
 
 	if (readl(&(lp->tx_dma_regs->dmandptr)) == 0) {
 		if (lp->tx_chain_status == desc_empty) {
@@ -220,8 +239,8 @@ static int korina_send_packet(struct sk_buff *skb, struct net_device *dev)
 			/* Move tail */
 			lp->tx_chain_tail = chain_next;
 			/* Write to NDPTR */
-			writel(CPHYSADDR(&lp->td_ring[lp->tx_chain_head]),
-					&lp->tx_dma_regs->dmandptr);
+			writel(korina_tx_dma(lp, lp->tx_chain_head),
+			       &lp->tx_dma_regs->dmandptr);
 			/* Move head to tail */
 			lp->tx_chain_head = lp->tx_chain_tail;
 		} else {
@@ -232,12 +251,12 @@ static int korina_send_packet(struct sk_buff *skb, struct net_device *dev)
 			lp->td_ring[chain_prev].control &=
 					~DMA_DESC_COF;
 			/* Link to prev */
-			lp->td_ring[chain_prev].link =  CPHYSADDR(td);
+			lp->td_ring[chain_prev].link = korina_tx_dma(lp, idx);
 			/* Move tail */
 			lp->tx_chain_tail = chain_next;
 			/* Write to NDPTR */
-			writel(CPHYSADDR(&lp->td_ring[lp->tx_chain_head]),
-					&(lp->tx_dma_regs->dmandptr));
+			writel(korina_tx_dma(lp, lp->tx_chain_head),
+			       &lp->tx_dma_regs->dmandptr);
 			/* Move head to tail */
 			lp->tx_chain_head = lp->tx_chain_tail;
 			lp->tx_chain_status = desc_empty;
@@ -256,7 +275,7 @@ static int korina_send_packet(struct sk_buff *skb, struct net_device *dev)
 					DMA_DESC_COF | DMA_DESC_IOF;
 			lp->td_ring[chain_prev].control &=
 					~DMA_DESC_COF;
-			lp->td_ring[chain_prev].link =  CPHYSADDR(td);
+			lp->td_ring[chain_prev].link = korina_tx_dma(lp, idx);
 			lp->tx_chain_tail = chain_next;
 		}
 	}
@@ -264,6 +283,13 @@ static int korina_send_packet(struct sk_buff *skb, struct net_device *dev)
 	netif_trans_update(dev);
 	spin_unlock_irqrestore(&lp->lock, flags);
 
+	return NETDEV_TX_OK;
+
+drop_packet:
+	dev->stats.tx_dropped++;
+	dev_kfree_skb_any(skb);
+	spin_unlock_irqrestore(&lp->lock, flags);
+
 	return NETDEV_TX_OK;
 }
 
@@ -344,8 +370,8 @@ static int korina_rx(struct net_device *dev, int limit)
 	struct korina_private *lp = netdev_priv(dev);
 	struct dma_desc *rd = &lp->rd_ring[lp->rx_next_done];
 	struct sk_buff *skb, *skb_new;
-	u8 *pkt_buf;
 	u32 devcs, pkt_len, dmas;
+	dma_addr_t ca;
 	int count;
 
 	for (count = 0; count < limit; count++) {
@@ -381,20 +407,22 @@ static int korina_rx(struct net_device *dev, int limit)
 			goto next;
 		}
 
-		pkt_len = RCVPKT_LENGTH(devcs);
-
-		/* must be the (first and) last
-		 * descriptor then */
-		pkt_buf = (u8 *)lp->rx_skb[lp->rx_next_done]->data;
-
-		/* invalidate the cache */
-		dma_cache_inv((unsigned long)pkt_buf, pkt_len - 4);
-
 		/* Malloc up new buffer. */
 		skb_new = netdev_alloc_skb_ip_align(dev, KORINA_RBSIZE);
-
 		if (!skb_new)
 			break;
+
+		ca = dma_map_single(lp->dmadev, skb_new->data, KORINA_RBSIZE,
+				    DMA_FROM_DEVICE);
+		if (dma_mapping_error(lp->dmadev, ca)) {
+			dev_kfree_skb_any(skb_new);
+			break;
+		}
+
+		pkt_len = RCVPKT_LENGTH(devcs);
+		dma_unmap_single(lp->dmadev, lp->rx_skb_dma[lp->rx_next_done],
+				 pkt_len, DMA_FROM_DEVICE);
+
 		/* Do not count the CRC */
 		skb_put(skb, pkt_len - 4);
 		skb->protocol = eth_type_trans(skb, dev);
@@ -409,15 +437,13 @@ static int korina_rx(struct net_device *dev, int limit)
 			dev->stats.multicast++;
 
 		lp->rx_skb[lp->rx_next_done] = skb_new;
+		lp->rx_skb_dma[lp->rx_next_done] = ca;
 
 next:
 		rd->devcs = 0;
 
 		/* Restore descriptor's curr_addr */
-		if (skb_new)
-			rd->ca = CPHYSADDR(skb_new->data);
-		else
-			rd->ca = CPHYSADDR(skb->data);
+		rd->ca = lp->rx_skb_dma[lp->rx_next_done];
 
 		rd->control = DMA_COUNT(KORINA_RBSIZE) |
 			DMA_DESC_COD | DMA_DESC_IOD;
@@ -438,9 +464,9 @@ next:
 
 		lp->dma_halt_cnt++;
 		rd->devcs = 0;
-		skb = lp->rx_skb[lp->rx_next_done];
-		rd->ca = CPHYSADDR(skb->data);
-		writel(CPHYSADDR(rd), &lp->rx_dma_regs->dmandptr);
+		rd->ca = lp->rx_skb_dma[lp->rx_next_done];
+		writel(korina_rx_dma(lp, rd - lp->rd_ring),
+		       &lp->rx_dma_regs->dmandptr);
 	}
 
 	return count;
@@ -563,6 +589,10 @@ static void korina_tx(struct net_device *dev)
 
 		/* We must always free the original skb */
 		if (lp->tx_skb[lp->tx_next_done]) {
+			dma_unmap_single(lp->dmadev,
+					 lp->tx_skb_dma[lp->tx_next_done],
+					 lp->tx_skb[lp->tx_next_done]->len,
+					 DMA_TO_DEVICE);
 			dev_kfree_skb_any(lp->tx_skb[lp->tx_next_done]);
 			lp->tx_skb[lp->tx_next_done] = NULL;
 		}
@@ -609,8 +639,8 @@ korina_tx_dma_interrupt(int irq, void *dev_id)
 
 		if (lp->tx_chain_status == desc_filled &&
 			(readl(&(lp->tx_dma_regs->dmandptr)) == 0)) {
-			writel(CPHYSADDR(&lp->td_ring[lp->tx_chain_head]),
-				&(lp->tx_dma_regs->dmandptr));
+			writel(korina_tx_dma(lp, lp->tx_chain_head),
+			       &lp->tx_dma_regs->dmandptr);
 			lp->tx_chain_status = desc_empty;
 			lp->tx_chain_head = lp->tx_chain_tail;
 			netif_trans_update(dev);
@@ -730,6 +760,7 @@ static int korina_alloc_ring(struct net_device *dev)
 {
 	struct korina_private *lp = netdev_priv(dev);
 	struct sk_buff *skb;
+	dma_addr_t ca;
 	int i;
 
 	/* Initialize the transmit descriptors */
@@ -752,13 +783,18 @@ static int korina_alloc_ring(struct net_device *dev)
 		lp->rd_ring[i].control = DMA_DESC_IOD |
 				DMA_COUNT(KORINA_RBSIZE);
 		lp->rd_ring[i].devcs = 0;
-		lp->rd_ring[i].ca = CPHYSADDR(skb->data);
-		lp->rd_ring[i].link = CPHYSADDR(&lp->rd_ring[i+1]);
+		ca = dma_map_single(lp->dmadev, skb->data, KORINA_RBSIZE,
+				    DMA_FROM_DEVICE);
+		if (dma_mapping_error(lp->dmadev, ca))
+			return -ENOMEM;
+		lp->rd_ring[i].ca = ca;
+		lp->rx_skb_dma[i] = ca;
+		lp->rd_ring[i].link = korina_rx_dma(lp, i + 1);
 	}
 
 	/* loop back receive descriptors, so the last
 	 * descriptor points to the first one */
-	lp->rd_ring[i - 1].link = CPHYSADDR(&lp->rd_ring[0]);
+	lp->rd_ring[i - 1].link = lp->rd_dma;
 	lp->rd_ring[i - 1].control |= DMA_DESC_COD;
 
 	lp->rx_next_done  = 0;
@@ -776,16 +812,22 @@ static void korina_free_ring(struct net_device *dev)
 
 	for (i = 0; i < KORINA_NUM_RDS; i++) {
 		lp->rd_ring[i].control = 0;
-		if (lp->rx_skb[i])
+		if (lp->rx_skb[i]) {
+			dma_unmap_single(lp->dmadev, lp->rx_skb_dma[i],
+					 KORINA_RBSIZE, DMA_FROM_DEVICE);
 			dev_kfree_skb_any(lp->rx_skb[i]);
-		lp->rx_skb[i] = NULL;
+			lp->rx_skb[i] = NULL;
+		}
 	}
 
 	for (i = 0; i < KORINA_NUM_TDS; i++) {
 		lp->td_ring[i].control = 0;
-		if (lp->tx_skb[i])
+		if (lp->tx_skb[i]) {
+			dma_unmap_single(lp->dmadev, lp->tx_skb_dma[i],
+					 lp->tx_skb[i]->len, DMA_TO_DEVICE);
 			dev_kfree_skb_any(lp->tx_skb[i]);
-		lp->tx_skb[i] = NULL;
+			lp->tx_skb[i] = NULL;
+		}
 	}
 }
 
@@ -818,7 +860,7 @@ static int korina_init(struct net_device *dev)
 	writel(0, &lp->rx_dma_regs->dmas);
 	/* Start Rx DMA */
 	writel(0, &lp->rx_dma_regs->dmandptr);
-	writel(CPHYSADDR(&lp->rd_ring[0]), &lp->rx_dma_regs->dmadptr);
+	writel(korina_rx_dma(lp, 0), &lp->rx_dma_regs->dmadptr);
 
 	writel(readl(&lp->tx_dma_regs->dmasm) &
 			~(DMA_STAT_FINI | DMA_STAT_ERR),
@@ -1053,21 +1095,21 @@ static int korina_probe(struct platform_device *pdev)
 	}
 	lp->tx_dma_regs = p;
 
-	lp->td_ring = kmalloc(TD_RING_SIZE + RD_RING_SIZE, GFP_KERNEL);
+	lp->td_ring = dmam_alloc_coherent(&pdev->dev, TD_RING_SIZE,
+					  &lp->td_dma, GFP_KERNEL);
 	if (!lp->td_ring)
 		return -ENOMEM;
 
-	dma_cache_inv((unsigned long)(lp->td_ring),
-			TD_RING_SIZE + RD_RING_SIZE);
-
-	/* now convert TD_RING pointer to KSEG1 */
-	lp->td_ring = (struct dma_desc *)KSEG1ADDR(lp->td_ring);
-	lp->rd_ring = &lp->td_ring[KORINA_NUM_TDS];
+	lp->rd_ring = dmam_alloc_coherent(&pdev->dev, RD_RING_SIZE,
+					  &lp->rd_dma, GFP_KERNEL);
+	if (!lp->rd_ring)
+		return -ENOMEM;
 
 	spin_lock_init(&lp->lock);
 	/* just use the rx dma irq */
 	dev->irq = lp->rx_irq;
 	lp->dev = dev;
+	lp->dmadev = &pdev->dev;
 
 	dev->netdev_ops = &korina_netdev_ops;
 	dev->ethtool_ops = &netdev_ethtool_ops;
@@ -1085,7 +1127,6 @@ static int korina_probe(struct platform_device *pdev)
 	if (rc < 0) {
 		printk(KERN_ERR DRV_NAME
 			": cannot register net device: %d\n", rc);
-		kfree((struct dma_desc *)KSEG0ADDR(lp->td_ring));
 		return rc;
 	}
 	timer_setup(&lp->media_check_timer, korina_poll_media, 0);
@@ -1100,9 +1141,6 @@ static int korina_probe(struct platform_device *pdev)
 static int korina_remove(struct platform_device *pdev)
 {
 	struct korina_device *bif = platform_get_drvdata(pdev);
-	struct korina_private *lp = netdev_priv(bif->dev);
-
-	kfree((struct dma_desc *)KSEG0ADDR(lp->td_ring));
 
 	unregister_netdev(bif->dev);
 
-- 
cgit v1.2.3


From af80425e05b23e937e4a3490442f37eedb5242f6 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Date: Mon, 19 Apr 2021 00:19:44 +0200
Subject: net: korina: Only pass mac address via platform data

Get rid of access to struct korina_device by just passing the mac
address via platform data and use drvdata for passing netdev to remove
function.

Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/mips/rb532/devices.c     |  5 +++--
 drivers/net/ethernet/korina.c | 11 ++++++-----
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c
index dd34f1b32b79..5fc3c8ee4f31 100644
--- a/arch/mips/rb532/devices.c
+++ b/arch/mips/rb532/devices.c
@@ -105,6 +105,9 @@ static struct platform_device korina_dev0 = {
 	.name = "korina",
 	.resource = korina_dev0_res,
 	.num_resources = ARRAY_SIZE(korina_dev0_res),
+	.dev = {
+		.platform_data = &korina_dev0_data.mac,
+	}
 };
 
 static struct resource cf_slot0_res[] = {
@@ -299,8 +302,6 @@ static int __init plat_setup_devices(void)
 	/* set the uart clock to the current cpu frequency */
 	rb532_uart_res[0].uartclk = idt_cpu_freq;
 
-	dev_set_drvdata(&korina_dev0.dev, &korina_dev0_data);
-
 	gpiod_add_lookup_table(&cf_slot0_gpio_table);
 	return platform_add_devices(rb532_devs, ARRAY_SIZE(rb532_devs));
 }
diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index 44fad9e924ca..d6dbbdd43d7c 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -1055,7 +1055,7 @@ static const struct net_device_ops korina_netdev_ops = {
 
 static int korina_probe(struct platform_device *pdev)
 {
-	struct korina_device *bif = platform_get_drvdata(pdev);
+	u8 *mac_addr = dev_get_platdata(&pdev->dev);
 	struct korina_private *lp;
 	struct net_device *dev;
 	void __iomem *p;
@@ -1068,8 +1068,7 @@ static int korina_probe(struct platform_device *pdev)
 	SET_NETDEV_DEV(dev, &pdev->dev);
 	lp = netdev_priv(dev);
 
-	bif->dev = dev;
-	memcpy(dev->dev_addr, bif->mac, ETH_ALEN);
+	memcpy(dev->dev_addr, mac_addr, ETH_ALEN);
 
 	lp->rx_irq = platform_get_irq_byname(pdev, "korina_rx");
 	lp->tx_irq = platform_get_irq_byname(pdev, "korina_tx");
@@ -1123,6 +1122,8 @@ static int korina_probe(struct platform_device *pdev)
 	lp->mii_if.phy_id_mask = 0x1f;
 	lp->mii_if.reg_num_mask = 0x1f;
 
+	platform_set_drvdata(pdev, dev);
+
 	rc = register_netdev(dev);
 	if (rc < 0) {
 		printk(KERN_ERR DRV_NAME
@@ -1140,9 +1141,9 @@ static int korina_probe(struct platform_device *pdev)
 
 static int korina_remove(struct platform_device *pdev)
 {
-	struct korina_device *bif = platform_get_drvdata(pdev);
+	struct net_device *dev = platform_get_drvdata(pdev);
 
-	unregister_netdev(bif->dev);
+	unregister_netdev(dev);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 10b26f0781511dc5c1b29303ee431cad08aa9944 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Date: Mon, 19 Apr 2021 00:19:45 +0200
Subject: net: korina: Add support for device tree

If there is no mac address passed via platform data try to get it via
device tree and fall back to a random mac address, if all fail.

Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/mips/rb532/devices.c     | 20 +++++---------------
 drivers/net/ethernet/korina.c | 32 +++++++++++++++++++++++++-------
 2 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c
index 5fc3c8ee4f31..04684990e28e 100644
--- a/arch/mips/rb532/devices.c
+++ b/arch/mips/rb532/devices.c
@@ -58,37 +58,27 @@ EXPORT_SYMBOL(get_latch_u5);
 
 static struct resource korina_dev0_res[] = {
 	{
-		.name = "korina_regs",
+		.name = "emac",
 		.start = ETH0_BASE_ADDR,
 		.end = ETH0_BASE_ADDR + sizeof(struct eth_regs),
 		.flags = IORESOURCE_MEM,
 	 }, {
-		.name = "korina_rx",
+		.name = "rx",
 		.start = ETH0_DMA_RX_IRQ,
 		.end = ETH0_DMA_RX_IRQ,
 		.flags = IORESOURCE_IRQ
 	}, {
-		.name = "korina_tx",
+		.name = "tx",
 		.start = ETH0_DMA_TX_IRQ,
 		.end = ETH0_DMA_TX_IRQ,
 		.flags = IORESOURCE_IRQ
 	}, {
-		.name = "korina_ovr",
-		.start = ETH0_RX_OVR_IRQ,
-		.end = ETH0_RX_OVR_IRQ,
-		.flags = IORESOURCE_IRQ
-	}, {
-		.name = "korina_und",
-		.start = ETH0_TX_UND_IRQ,
-		.end = ETH0_TX_UND_IRQ,
-		.flags = IORESOURCE_IRQ
-	}, {
-		.name = "korina_dma_rx",
+		.name = "dma_rx",
 		.start = ETH0_RX_DMA_ADDR,
 		.end = ETH0_RX_DMA_ADDR + DMA_CHAN_OFFSET - 1,
 		.flags = IORESOURCE_MEM,
 	 }, {
-		.name = "korina_dma_tx",
+		.name = "dma_tx",
 		.start = ETH0_TX_DMA_ADDR,
 		.end = ETH0_TX_DMA_ADDR + DMA_CHAN_OFFSET - 1,
 		.flags = IORESOURCE_MEM,
diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index d6dbbdd43d7c..a1f53d7753ae 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -43,6 +43,8 @@
 #include <linux/ioport.h>
 #include <linux/iopoll.h>
 #include <linux/in.h>
+#include <linux/of_device.h>
+#include <linux/of_net.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/delay.h>
@@ -1068,26 +1070,29 @@ static int korina_probe(struct platform_device *pdev)
 	SET_NETDEV_DEV(dev, &pdev->dev);
 	lp = netdev_priv(dev);
 
-	memcpy(dev->dev_addr, mac_addr, ETH_ALEN);
+	if (mac_addr)
+		ether_addr_copy(dev->dev_addr, mac_addr);
+	else if (of_get_mac_address(pdev->dev.of_node, dev->dev_addr) < 0)
+		eth_hw_addr_random(dev);
 
-	lp->rx_irq = platform_get_irq_byname(pdev, "korina_rx");
-	lp->tx_irq = platform_get_irq_byname(pdev, "korina_tx");
+	lp->rx_irq = platform_get_irq_byname(pdev, "rx");
+	lp->tx_irq = platform_get_irq_byname(pdev, "tx");
 
-	p = devm_platform_ioremap_resource_byname(pdev, "korina_regs");
+	p = devm_platform_ioremap_resource_byname(pdev, "emac");
 	if (!p) {
 		printk(KERN_ERR DRV_NAME ": cannot remap registers\n");
 		return -ENOMEM;
 	}
 	lp->eth_regs = p;
 
-	p = devm_platform_ioremap_resource_byname(pdev, "korina_dma_rx");
+	p = devm_platform_ioremap_resource_byname(pdev, "dma_rx");
 	if (!p) {
 		printk(KERN_ERR DRV_NAME ": cannot remap Rx DMA registers\n");
 		return -ENOMEM;
 	}
 	lp->rx_dma_regs = p;
 
-	p = devm_platform_ioremap_resource_byname(pdev, "korina_dma_tx");
+	p = devm_platform_ioremap_resource_byname(pdev, "dma_tx");
 	if (!p) {
 		printk(KERN_ERR DRV_NAME ": cannot remap Tx DMA registers\n");
 		return -ENOMEM;
@@ -1148,8 +1153,21 @@ static int korina_remove(struct platform_device *pdev)
 	return 0;
 }
 
+#ifdef CONFIG_OF
+static const struct of_device_id korina_match[] = {
+	{
+		.compatible = "idt,3243x-emac",
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(of, korina_match);
+#endif
+
 static struct platform_driver korina_driver = {
-	.driver.name = "korina",
+	.driver = {
+		.name = "korina",
+		.of_match_table = of_match_ptr(korina_match),
+	},
 	.probe = korina_probe,
 	.remove = korina_remove,
 };
-- 
cgit v1.2.3


From e4cd854ec487fde631fe57049f588d2396da281c Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Date: Mon, 19 Apr 2021 00:19:46 +0200
Subject: net: korina: Get mdio input clock via common clock framework

With device tree clock is provided via CCF. For non device tree
use a maximum clock value to not overclock the PHY. The non device
tree usage will go away after platform is converted to DT.

Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/korina.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index a1f53d7753ae..19f226428f89 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -57,14 +57,13 @@
 #include <linux/ethtool.h>
 #include <linux/crc32.h>
 #include <linux/pgtable.h>
+#include <linux/clk.h>
 
 #include <asm/bootinfo.h>
 #include <asm/bitops.h>
 #include <asm/io.h>
 #include <asm/dma.h>
 
-#include <asm/mach-rc32434/rb.h>
-#include <asm/mach-rc32434/rc32434.h>
 #include <asm/mach-rc32434/eth.h>
 #include <asm/mach-rc32434/dma_v.h>
 
@@ -146,10 +145,9 @@ struct korina_private {
 	struct work_struct restart_task;
 	struct net_device *dev;
 	struct device *dmadev;
+	int mii_clock_freq;
 };
 
-extern unsigned int idt_cpu_freq;
-
 static dma_addr_t korina_tx_dma(struct korina_private *lp, int idx)
 {
 	return lp->td_dma + (idx * sizeof(struct dma_desc));
@@ -899,8 +897,8 @@ static int korina_init(struct net_device *dev)
 
 	/* Management Clock Prescaler Divisor
 	 * Clock independent setting */
-	writel(((idt_cpu_freq) / MII_CLOCK + 1) & ~1,
-			&lp->eth_regs->ethmcp);
+	writel(((lp->mii_clock_freq) / MII_CLOCK + 1) & ~1,
+	       &lp->eth_regs->ethmcp);
 	writel(0, &lp->eth_regs->miimcfg);
 
 	/* don't transmit until fifo contains 48b */
@@ -1060,6 +1058,7 @@ static int korina_probe(struct platform_device *pdev)
 	u8 *mac_addr = dev_get_platdata(&pdev->dev);
 	struct korina_private *lp;
 	struct net_device *dev;
+	struct clk *clk;
 	void __iomem *p;
 	int rc;
 
@@ -1075,6 +1074,16 @@ static int korina_probe(struct platform_device *pdev)
 	else if (of_get_mac_address(pdev->dev.of_node, dev->dev_addr) < 0)
 		eth_hw_addr_random(dev);
 
+	clk = devm_clk_get_optional(&pdev->dev, "mdioclk");
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+	if (clk) {
+		clk_prepare_enable(clk);
+		lp->mii_clock_freq = clk_get_rate(clk);
+	} else {
+		lp->mii_clock_freq = 200000000; /* max possible input clk */
+	}
+
 	lp->rx_irq = platform_get_irq_byname(pdev, "rx");
 	lp->tx_irq = platform_get_irq_byname(pdev, "tx");
 
-- 
cgit v1.2.3


From 6ef92063bf94cd8a6fa9fea3a82596955eb25424 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Date: Mon, 19 Apr 2021 00:19:47 +0200
Subject: net: korina: Make driver COMPILE_TESTable

Move structs/defines for ethernet/dma register into driver, since they
are only used for this driver and remove any MIPS specific includes.
This makes it possible to COMPILE_TEST the driver.

Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/Kconfig  |   2 +-
 drivers/net/ethernet/korina.c | 249 ++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 239 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index 172cae5dee4a..1cdff1dca790 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -99,7 +99,7 @@ config JME
 
 config KORINA
 	tristate "Korina (IDT RC32434) Ethernet support"
-	depends on MIKROTIK_RB532
+	depends on MIKROTIK_RB532 || COMPILE_TEST
 	select MII
 	help
 	  If you have a Mikrotik RouterBoard 500 or IDT RC32434
diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index 19f226428f89..4878e527e3c8 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -59,18 +59,244 @@
 #include <linux/pgtable.h>
 #include <linux/clk.h>
 
-#include <asm/bootinfo.h>
-#include <asm/bitops.h>
-#include <asm/io.h>
-#include <asm/dma.h>
-
-#include <asm/mach-rc32434/eth.h>
-#include <asm/mach-rc32434/dma_v.h>
-
 #define DRV_NAME	"korina"
 #define DRV_VERSION	"0.20"
 #define DRV_RELDATE	"15Sep2017"
 
+struct eth_regs {
+	u32 ethintfc;
+	u32 ethfifott;
+	u32 etharc;
+	u32 ethhash0;
+	u32 ethhash1;
+	u32 ethu0[4];		/* Reserved. */
+	u32 ethpfs;
+	u32 ethmcp;
+	u32 eth_u1[10];		/* Reserved. */
+	u32 ethspare;
+	u32 eth_u2[42];		/* Reserved. */
+	u32 ethsal0;
+	u32 ethsah0;
+	u32 ethsal1;
+	u32 ethsah1;
+	u32 ethsal2;
+	u32 ethsah2;
+	u32 ethsal3;
+	u32 ethsah3;
+	u32 ethrbc;
+	u32 ethrpc;
+	u32 ethrupc;
+	u32 ethrfc;
+	u32 ethtbc;
+	u32 ethgpf;
+	u32 eth_u9[50];		/* Reserved. */
+	u32 ethmac1;
+	u32 ethmac2;
+	u32 ethipgt;
+	u32 ethipgr;
+	u32 ethclrt;
+	u32 ethmaxf;
+	u32 eth_u10;		/* Reserved. */
+	u32 ethmtest;
+	u32 miimcfg;
+	u32 miimcmd;
+	u32 miimaddr;
+	u32 miimwtd;
+	u32 miimrdd;
+	u32 miimind;
+	u32 eth_u11;		/* Reserved. */
+	u32 eth_u12;		/* Reserved. */
+	u32 ethcfsa0;
+	u32 ethcfsa1;
+	u32 ethcfsa2;
+};
+
+/* Ethernet interrupt registers */
+#define ETH_INT_FC_EN		BIT(0)
+#define ETH_INT_FC_ITS		BIT(1)
+#define ETH_INT_FC_RIP		BIT(2)
+#define ETH_INT_FC_JAM		BIT(3)
+#define ETH_INT_FC_OVR		BIT(4)
+#define ETH_INT_FC_UND		BIT(5)
+#define ETH_INT_FC_IOC		0x000000c0
+
+/* Ethernet FIFO registers */
+#define ETH_FIFI_TT_TTH_BIT	0
+#define ETH_FIFO_TT_TTH		0x0000007f
+
+/* Ethernet ARC/multicast registers */
+#define ETH_ARC_PRO		BIT(0)
+#define ETH_ARC_AM		BIT(1)
+#define ETH_ARC_AFM		BIT(2)
+#define ETH_ARC_AB		BIT(3)
+
+/* Ethernet SAL registers */
+#define ETH_SAL_BYTE_5		0x000000ff
+#define ETH_SAL_BYTE_4		0x0000ff00
+#define ETH_SAL_BYTE_3		0x00ff0000
+#define ETH_SAL_BYTE_2		0xff000000
+
+/* Ethernet SAH registers */
+#define ETH_SAH_BYTE1		0x000000ff
+#define ETH_SAH_BYTE0		0x0000ff00
+
+/* Ethernet GPF register */
+#define ETH_GPF_PTV		0x0000ffff
+
+/* Ethernet PFG register */
+#define ETH_PFS_PFD		BIT(0)
+
+/* Ethernet CFSA[0-3] registers */
+#define ETH_CFSA0_CFSA4		0x000000ff
+#define ETH_CFSA0_CFSA5		0x0000ff00
+#define ETH_CFSA1_CFSA2		0x000000ff
+#define ETH_CFSA1_CFSA3		0x0000ff00
+#define ETH_CFSA1_CFSA0		0x000000ff
+#define ETH_CFSA1_CFSA1		0x0000ff00
+
+/* Ethernet MAC1 registers */
+#define ETH_MAC1_RE		BIT(0)
+#define ETH_MAC1_PAF		BIT(1)
+#define ETH_MAC1_RFC		BIT(2)
+#define ETH_MAC1_TFC		BIT(3)
+#define ETH_MAC1_LB		BIT(4)
+#define ETH_MAC1_MR		BIT(31)
+
+/* Ethernet MAC2 registers */
+#define ETH_MAC2_FD		BIT(0)
+#define ETH_MAC2_FLC		BIT(1)
+#define ETH_MAC2_HFE		BIT(2)
+#define ETH_MAC2_DC		BIT(3)
+#define ETH_MAC2_CEN		BIT(4)
+#define ETH_MAC2_PE		BIT(5)
+#define ETH_MAC2_VPE		BIT(6)
+#define ETH_MAC2_APE		BIT(7)
+#define ETH_MAC2_PPE		BIT(8)
+#define ETH_MAC2_LPE		BIT(9)
+#define ETH_MAC2_NB		BIT(12)
+#define ETH_MAC2_BP		BIT(13)
+#define ETH_MAC2_ED		BIT(14)
+
+/* Ethernet IPGT register */
+#define ETH_IPGT		0x0000007f
+
+/* Ethernet IPGR registers */
+#define ETH_IPGR_IPGR2		0x0000007f
+#define ETH_IPGR_IPGR1		0x00007f00
+
+/* Ethernet CLRT registers */
+#define ETH_CLRT_MAX_RET	0x0000000f
+#define ETH_CLRT_COL_WIN	0x00003f00
+
+/* Ethernet MAXF register */
+#define ETH_MAXF		0x0000ffff
+
+/* Ethernet test registers */
+#define ETH_TEST_REG		BIT(2)
+#define ETH_MCP_DIV		0x000000ff
+
+/* MII registers */
+#define ETH_MII_CFG_RSVD	0x0000000c
+#define ETH_MII_CMD_RD		BIT(0)
+#define ETH_MII_CMD_SCN		BIT(1)
+#define ETH_MII_REG_ADDR	0x0000001f
+#define ETH_MII_PHY_ADDR	0x00001f00
+#define ETH_MII_WTD_DATA	0x0000ffff
+#define ETH_MII_RDD_DATA	0x0000ffff
+#define ETH_MII_IND_BSY		BIT(0)
+#define ETH_MII_IND_SCN		BIT(1)
+#define ETH_MII_IND_NV		BIT(2)
+
+/* Values for the DEVCS field of the Ethernet DMA Rx and Tx descriptors. */
+#define ETH_RX_FD		BIT(0)
+#define ETH_RX_LD		BIT(1)
+#define ETH_RX_ROK		BIT(2)
+#define ETH_RX_FM		BIT(3)
+#define ETH_RX_MP		BIT(4)
+#define ETH_RX_BP		BIT(5)
+#define ETH_RX_VLT		BIT(6)
+#define ETH_RX_CF		BIT(7)
+#define ETH_RX_OVR		BIT(8)
+#define ETH_RX_CRC		BIT(9)
+#define ETH_RX_CV		BIT(10)
+#define ETH_RX_DB		BIT(11)
+#define ETH_RX_LE		BIT(12)
+#define ETH_RX_LOR		BIT(13)
+#define ETH_RX_CES		BIT(14)
+#define ETH_RX_LEN_BIT		16
+#define ETH_RX_LEN		0xffff0000
+
+#define ETH_TX_FD		BIT(0)
+#define ETH_TX_LD		BIT(1)
+#define ETH_TX_OEN		BIT(2)
+#define ETH_TX_PEN		BIT(3)
+#define ETH_TX_CEN		BIT(4)
+#define ETH_TX_HEN		BIT(5)
+#define ETH_TX_TOK		BIT(6)
+#define ETH_TX_MP		BIT(7)
+#define ETH_TX_BP		BIT(8)
+#define ETH_TX_UND		BIT(9)
+#define ETH_TX_OF		BIT(10)
+#define ETH_TX_ED		BIT(11)
+#define ETH_TX_EC		BIT(12)
+#define ETH_TX_LC		BIT(13)
+#define ETH_TX_TD		BIT(14)
+#define ETH_TX_CRC		BIT(15)
+#define ETH_TX_LE		BIT(16)
+#define ETH_TX_CC		0x001E0000
+
+/* DMA descriptor (in physical memory). */
+struct dma_desc {
+	u32 control;			/* Control. use DMAD_* */
+	u32 ca;				/* Current Address. */
+	u32 devcs;			/* Device control and status. */
+	u32 link;			/* Next descriptor in chain. */
+};
+
+#define DMA_DESC_COUNT_BIT		0
+#define DMA_DESC_COUNT_MSK		0x0003ffff
+#define DMA_DESC_DS_BIT			20
+#define DMA_DESC_DS_MSK			0x00300000
+
+#define DMA_DESC_DEV_CMD_BIT		22
+#define DMA_DESC_DEV_CMD_MSK		0x01c00000
+
+/* DMA descriptors interrupts */
+#define DMA_DESC_COF			BIT(25) /* Chain on finished */
+#define DMA_DESC_COD			BIT(26) /* Chain on done */
+#define DMA_DESC_IOF			BIT(27) /* Interrupt on finished */
+#define DMA_DESC_IOD			BIT(28) /* Interrupt on done */
+#define DMA_DESC_TERM			BIT(29) /* Terminated */
+#define DMA_DESC_DONE			BIT(30) /* Done */
+#define DMA_DESC_FINI			BIT(31) /* Finished */
+
+/* DMA register (within Internal Register Map).  */
+struct dma_reg {
+	u32 dmac;		/* Control. */
+	u32 dmas;		/* Status. */
+	u32 dmasm;		/* Mask. */
+	u32 dmadptr;		/* Descriptor pointer. */
+	u32 dmandptr;		/* Next descriptor pointer. */
+};
+
+/* DMA channels specific registers */
+#define DMA_CHAN_RUN_BIT		BIT(0)
+#define DMA_CHAN_DONE_BIT		BIT(1)
+#define DMA_CHAN_MODE_BIT		BIT(2)
+#define DMA_CHAN_MODE_MSK		0x0000000c
+#define	 DMA_CHAN_MODE_AUTO		0
+#define	 DMA_CHAN_MODE_BURST		1
+#define	 DMA_CHAN_MODE_XFRT		2
+#define	 DMA_CHAN_MODE_RSVD		3
+#define DMA_CHAN_ACT_BIT		BIT(4)
+
+/* DMA status registers */
+#define DMA_STAT_FINI			BIT(0)
+#define DMA_STAT_DONE			BIT(1)
+#define DMA_STAT_CHAIN			BIT(2)
+#define DMA_STAT_ERR			BIT(3)
+#define DMA_STAT_HALT			BIT(4)
+
 #define STATION_ADDRESS_HIGH(dev) (((dev)->dev_addr[0] << 8) | \
 				   ((dev)->dev_addr[1]))
 #define STATION_ADDRESS_LOW(dev)  (((dev)->dev_addr[2] << 24) | \
@@ -100,6 +326,7 @@ enum chain_status {
 	desc_empty
 };
 
+#define DMA_COUNT(count)	((count) & DMA_DESC_COUNT_MSK)
 #define IS_DMA_FINISHED(X)	(((X) & (DMA_DESC_FINI)) != 0)
 #define IS_DMA_DONE(X)		(((X) & (DMA_DESC_DONE)) != 0)
 #define RCVPKT_LENGTH(X)	(((X) & ETH_RX_LEN) >> ETH_RX_LEN_BIT)
@@ -453,14 +680,14 @@ next:
 
 		lp->rx_next_done = (lp->rx_next_done + 1) & KORINA_RDS_MASK;
 		rd = &lp->rd_ring[lp->rx_next_done];
-		writel(~DMA_STAT_DONE, &lp->rx_dma_regs->dmas);
+		writel((u32)~DMA_STAT_DONE, &lp->rx_dma_regs->dmas);
 	}
 
 	dmas = readl(&lp->rx_dma_regs->dmas);
 
 	if (dmas & DMA_STAT_HALT) {
-		writel(~(DMA_STAT_HALT | DMA_STAT_ERR),
-				&lp->rx_dma_regs->dmas);
+		writel((u32)~(DMA_STAT_HALT | DMA_STAT_ERR),
+		       &lp->rx_dma_regs->dmas);
 
 		lp->dma_halt_cnt++;
 		rd->devcs = 0;
-- 
cgit v1.2.3


From d1a2c2315cc90f7220da8975b79349cf727334ec Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Date: Mon, 19 Apr 2021 00:19:48 +0200
Subject: dt-bindings: net: korina: Add DT bindings for IDT 79RC3243x SoCs

Add device tree bindings for ethernet controller integrated into
IDT 79RC3243x SoCs.

Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/idt,3243x-emac.yaml    | 73 ++++++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/idt,3243x-emac.yaml

diff --git a/Documentation/devicetree/bindings/net/idt,3243x-emac.yaml b/Documentation/devicetree/bindings/net/idt,3243x-emac.yaml
new file mode 100644
index 000000000000..11ffc306dd54
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/idt,3243x-emac.yaml
@@ -0,0 +1,73 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/idt,3243x-emac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: IDT 79rc3243x Ethernet controller
+
+description: Ethernet controller integrated into IDT 79RC3243x family SoCs
+
+maintainers:
+  - Thomas Bogendoerfer <tsbogend@alpha.franken.de>
+
+allOf:
+  - $ref: ethernet-controller.yaml#
+
+properties:
+  compatible:
+    const: idt,3243x-emac
+
+  reg:
+    maxItems: 3
+
+  reg-names:
+    items:
+      - const: emac
+      - const: dma_rx
+      - const: dma_tx
+
+  interrupts:
+    items:
+      - description: RX interrupt
+      - description: TX interrupt
+
+  interrupt-names:
+    items:
+      - const: rx
+      - const: tx
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: mdioclk
+
+required:
+  - compatible
+  - reg
+  - reg-names
+  - interrupts
+  - interrupt-names
+
+additionalProperties: false
+
+examples:
+  - |
+
+    ethernet@60000 {
+        compatible = "idt,3243x-emac";
+
+        reg = <0x60000 0x10000>,
+              <0x40000 0x14>,
+              <0x40014 0x14>;
+        reg-names = "emac", "dma_rx", "dma_tx";
+
+        interrupt-parent = <&rcpic3>;
+        interrupts = <0>, <1>;
+        interrupt-names = "rx", "tx";
+
+        clocks = <&iclk>;
+        clock-names = "mdioclk";
+    };
-- 
cgit v1.2.3


From d7f576dc98364fcb076234f76b54d07cdeca54eb Mon Sep 17 00:00:00 2001
From: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Date: Mon, 19 Apr 2021 19:25:30 +0800
Subject: net: stmmac: fix memory leak during driver probe

On driver probe, kmemleak reported the following memory leak which was
due to allocated bitmap that was not being freed in stmmac_dvr_probe().

unreferenced object 0xffff9276014b13c0 (size 8):
  comm "systemd-udevd", pid 2143, jiffies 4294681112 (age 116.720s)
  hex dump (first 8 bytes):
    00 00 00 00 00 00 00 00                          ........
  backtrace:
    [<00000000c51e34b2>] stmmac_dvr_probe+0x1c0/0x440 [stmmac]
    [<00000000b530eb41>] intel_eth_pci_probe.cold+0x2b/0x14e [dwmac_intel]
    [<00000000b10f8929>] pci_device_probe+0xd2/0x150
    [<00000000fb254c74>] really_probe+0xf8/0x410
    [<0000000034128a59>] driver_probe_device+0x5d/0x150
    [<00000000016104d5>] device_driver_attach+0x53/0x60
    [<00000000cb18cd07>] __driver_attach+0x96/0x140
    [<00000000da9ffd5c>] bus_for_each_dev+0x7a/0xc0
    [<00000000af061a88>] bus_add_driver+0x184/0x1f0
    [<000000008be5c1c5>] driver_register+0x6c/0xc0
    [<0000000052b18a9e>] do_one_initcall+0x4d/0x210
    [<00000000154d4f07>] do_init_module+0x5c/0x230
    [<000000009b648d09>] load_module+0x2a5a/0x2d40
    [<000000000d86b76d>] __do_sys_finit_module+0xb5/0x120
    [<000000002b0cef95>] do_syscall_64+0x33/0x40
    [<0000000067b45bbb>] entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fixes: bba2556efad6 ("net: stmmac: Enable RX via AF_XDP zero-copy")
Cc: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 9f396648d76f..d1ca07c846e6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -7035,6 +7035,7 @@ error_mdio_register:
 error_hw_init:
 	destroy_workqueue(priv->wq);
 	stmmac_bus_clks_config(priv, false);
+	bitmap_free(priv->af_xdp_zc_qps);
 
 	return ret;
 }
@@ -7077,6 +7078,7 @@ int stmmac_dvr_remove(struct device *dev)
 		stmmac_mdio_unregister(ndev);
 	destroy_workqueue(priv->wq);
 	mutex_destroy(&priv->lock);
+	bitmap_free(priv->af_xdp_zc_qps);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 7ad18ff6449cbd6beb26b53128ddf56d2685aa93 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <alobakin@pm.me>
Date: Mon, 19 Apr 2021 12:53:06 +0000
Subject: gro: fix napi_gro_frags() Fast GRO breakage due to IP alignment check

Commit 38ec4944b593 ("gro: ensure frag0 meets IP header alignment")
did the right thing, but missed the fact that napi_gro_frags() logics
calls for skb_gro_reset_offset() *before* pulling Ethernet header
to the skb linear space.
That said, the introduced check for frag0 address being aligned to 4
always fails for it as Ethernet header is obviously 14 bytes long,
and in case with NET_IP_ALIGN its start is not aligned to 4.

Fix this by adding @nhoff argument to skb_gro_reset_offset() which
tells if an IP header is placed right at the start of frag0 or not.
This restores Fast GRO for napi_gro_frags() that became very slow
after the mentioned commit, and preserves the introduced check to
avoid silent unaligned accesses.

From v1 [0]:
 - inline tiny skb_gro_reset_offset() to let the code be optimized
   more efficively (esp. for the !NET_IP_ALIGN case) (Eric);
 - pull in Reviewed-by from Eric.

[0] https://lore.kernel.org/netdev/20210418114200.5839-1-alobakin@pm.me

Fixes: 38ec4944b593 ("gro: ensure frag0 meets IP header alignment")
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Alexander Lobakin <alobakin@pm.me>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 1f79b9aa9a3f..15fe36332fb8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5914,7 +5914,7 @@ static struct list_head *gro_list_prepare(struct napi_struct *napi,
 	return head;
 }
 
-static void skb_gro_reset_offset(struct sk_buff *skb)
+static inline void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff)
 {
 	const struct skb_shared_info *pinfo = skb_shinfo(skb);
 	const skb_frag_t *frag0 = &pinfo->frags[0];
@@ -5925,7 +5925,7 @@ static void skb_gro_reset_offset(struct sk_buff *skb)
 
 	if (!skb_headlen(skb) && pinfo->nr_frags &&
 	    !PageHighMem(skb_frag_page(frag0)) &&
-	    (!NET_IP_ALIGN || !(skb_frag_off(frag0) & 3))) {
+	    (!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) {
 		NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
 		NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
 						    skb_frag_size(frag0),
@@ -6143,7 +6143,7 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 	skb_mark_napi_id(skb, napi);
 	trace_napi_gro_receive_entry(skb);
 
-	skb_gro_reset_offset(skb);
+	skb_gro_reset_offset(skb, 0);
 
 	ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb));
 	trace_napi_gro_receive_exit(ret);
@@ -6232,7 +6232,7 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
 	napi->skb = NULL;
 
 	skb_reset_mac_header(skb);
-	skb_gro_reset_offset(skb);
+	skb_gro_reset_offset(skb, hlen);
 
 	if (unlikely(skb_gro_header_hard(skb, hlen))) {
 		eth = skb_gro_header_slow(skb, hlen, 0);
-- 
cgit v1.2.3


From c1102e9d49eb36c0be18cb3e16f6e46ffb717964 Mon Sep 17 00:00:00 2001
From: Di Zhu <zhudi21@huawei.com>
Date: Mon, 19 Apr 2021 21:56:41 +0800
Subject: net: fix a data race when get vlan device

We encountered a crash: in the packet receiving process, we got an
illegal VLAN device address, but the VLAN device address saved in vmcore
is correct. After checking the code, we found a possible data
competition:
CPU 0:                             CPU 1:
    (RCU read lock)                  (RTNL lock)
    vlan_do_receive()		       register_vlan_dev()
      vlan_find_dev()

        ->__vlan_group_get_device()	 ->vlan_group_prealloc_vid()

In vlan_group_prealloc_vid(), We need to make sure that memset()
in kzalloc() is executed before assigning  value to vlan devices array:
=================================
kzalloc()
    ->memset(object, 0, size)

smp_wmb()

vg->vlan_devices_arrays[pidx][vidx] = array;
==================================

Because __vlan_group_get_device() function depends on this order.
otherwise we may get a wrong address from the hardware cache on
another cpu.

So fix it by adding memory barrier instruction to ensure the order
of memory operations.

Signed-off-by: Di Zhu <zhudi21@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c | 3 +++
 net/8021q/vlan.h | 4 ++++
 2 files changed, 7 insertions(+)

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 8b644113715e..fb3d3262dc1a 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -71,6 +71,9 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg,
 	if (array == NULL)
 		return -ENOBUFS;
 
+	/* paired with smp_rmb() in __vlan_group_get_device() */
+	smp_wmb();
+
 	vg->vlan_devices_arrays[pidx][vidx] = array;
 	return 0;
 }
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 953405362795..fa3ad3d4d58c 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -57,6 +57,10 @@ static inline struct net_device *__vlan_group_get_device(struct vlan_group *vg,
 
 	array = vg->vlan_devices_arrays[pidx]
 				       [vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
+
+	/* paired with smp_wmb() in vlan_group_prealloc_vid() */
+	smp_rmb();
+
 	return array ? array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] : NULL;
 }
 
-- 
cgit v1.2.3


From ced97eea3974e6eaa33e771d8790f4c0ada0d226 Mon Sep 17 00:00:00 2001
From: Vadym Kochan <vkochan@marvell.com>
Date: Sat, 17 Apr 2021 02:17:51 +0300
Subject: net: marvell: prestera: add support for AC3X 98DX3265 device

Add PCI match for AC3X 98DX3265 device which is supported by the current
driver and firmware.

Signed-off-by: Vadym Kochan <vkochan@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/prestera/prestera_pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/marvell/prestera/prestera_pci.c b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
index be5677623455..298110119272 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_pci.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
@@ -756,6 +756,7 @@ static void prestera_pci_remove(struct pci_dev *pdev)
 
 static const struct pci_device_id prestera_pci_devices[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0xC804) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0xC80C) },
 	{ }
 };
 MODULE_DEVICE_TABLE(pci, prestera_pci_devices);
-- 
cgit v1.2.3


From da702f34e3cc4b6b87ed2d63c17d65d841fa81c6 Mon Sep 17 00:00:00 2001
From: "Radu Pirea (NXP OSS)" <radu-nicolae.pirea@oss.nxp.com>
Date: Mon, 19 Apr 2021 19:13:59 +0300
Subject: net: phy: add genphy_c45_pma_suspend/resume

Add generic PMA suspend and resume callback functions for C45 PHYs.

Signed-off-by: Radu Pirea (NXP OSS) <radu-nicolae.pirea@oss.nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy-c45.c | 43 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/phy.h       |  2 ++
 2 files changed, 45 insertions(+)

diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index 91e3acb9e397..f4816b7d31b3 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -8,6 +8,49 @@
 #include <linux/mii.h>
 #include <linux/phy.h>
 
+/**
+ * genphy_c45_pma_can_sleep - checks if the PMA have sleep support
+ * @phydev: target phy_device struct
+ */
+static bool genphy_c45_pma_can_sleep(struct phy_device *phydev)
+{
+	int stat1;
+
+	stat1 = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_STAT1);
+	if (stat1 < 0)
+		return false;
+
+	return !!(stat1 & MDIO_STAT1_LPOWERABLE);
+}
+
+/**
+ * genphy_c45_pma_resume - wakes up the PMA module
+ * @phydev: target phy_device struct
+ */
+int genphy_c45_pma_resume(struct phy_device *phydev)
+{
+	if (!genphy_c45_pma_can_sleep(phydev))
+		return -EOPNOTSUPP;
+
+	return phy_clear_bits_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_CTRL1,
+				  MDIO_CTRL1_LPOWER);
+}
+EXPORT_SYMBOL_GPL(genphy_c45_pma_resume);
+
+/**
+ * genphy_c45_pma_suspend - suspends the PMA module
+ * @phydev: target phy_device struct
+ */
+int genphy_c45_pma_suspend(struct phy_device *phydev)
+{
+	if (!genphy_c45_pma_can_sleep(phydev))
+		return -EOPNOTSUPP;
+
+	return phy_set_bits_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_CTRL1,
+				MDIO_CTRL1_LPOWER);
+}
+EXPORT_SYMBOL_GPL(genphy_c45_pma_suspend);
+
 /**
  * genphy_c45_pma_setup_forced - configures a forced speed
  * @phydev: target phy_device struct
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 98fb441dd72e..e3d4d583463b 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1535,6 +1535,8 @@ int genphy_c45_pma_read_abilities(struct phy_device *phydev);
 int genphy_c45_read_status(struct phy_device *phydev);
 int genphy_c45_config_aneg(struct phy_device *phydev);
 int genphy_c45_loopback(struct phy_device *phydev, bool enable);
+int genphy_c45_pma_resume(struct phy_device *phydev);
+int genphy_c45_pma_suspend(struct phy_device *phydev);
 
 /* Generic C45 PHY driver */
 extern struct phy_driver genphy_c45_driver;
-- 
cgit v1.2.3


From b050f2f15e04f0416118f689f740fda466a47752 Mon Sep 17 00:00:00 2001
From: "Radu Pirea (NXP OSS)" <radu-nicolae.pirea@oss.nxp.com>
Date: Mon, 19 Apr 2021 19:14:00 +0300
Subject: phy: nxp-c45: add driver for tja1103

Add driver for tja1103 driver and for future NXP C45 PHYs.

Signed-off-by: Radu Pirea (NXP OSS) <radu-nicolae.pirea@oss.nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                       |   6 +
 drivers/net/phy/Kconfig           |   6 +
 drivers/net/phy/Makefile          |   1 +
 drivers/net/phy/nxp-c45-tja11xx.c | 588 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 601 insertions(+)
 create mode 100644 drivers/net/phy/nxp-c45-tja11xx.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 50bf0d61a7b0..c3c8fa572580 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12840,6 +12840,12 @@ F:	drivers/nvmem/
 F:	include/linux/nvmem-consumer.h
 F:	include/linux/nvmem-provider.h
 
+NXP C45 TJA11XX PHY DRIVER
+M:	Radu Pirea <radu-nicolae.pirea@oss.nxp.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/net/phy/nxp-c45-tja11xx.c
+
 NXP FSPI DRIVER
 M:	Ashish Kumar <ashish.kumar@nxp.com>
 R:	Yogesh Gaur <yogeshgaur.83@gmail.com>
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index a615b3660b05..288bf405ebdb 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -234,6 +234,12 @@ config NATIONAL_PHY
 	help
 	  Currently supports the DP83865 PHY.
 
+config NXP_C45_TJA11XX_PHY
+	tristate "NXP C45 TJA11XX PHYs"
+	help
+	  Enable support for NXP C45 TJA11XX PHYs.
+	  Currently supports only the TJA1103 PHY.
+
 config NXP_TJA11XX_PHY
 	tristate "NXP TJA11xx PHYs support"
 	depends on HWMON
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index de683e3abe63..bcda7ed2455d 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -71,6 +71,7 @@ obj-$(CONFIG_MICROCHIP_PHY)	+= microchip.o
 obj-$(CONFIG_MICROCHIP_T1_PHY)	+= microchip_t1.o
 obj-$(CONFIG_MICROSEMI_PHY)	+= mscc/
 obj-$(CONFIG_NATIONAL_PHY)	+= national.o
+obj-$(CONFIG_NXP_C45_TJA11XX_PHY)	+= nxp-c45-tja11xx.o
 obj-$(CONFIG_NXP_TJA11XX_PHY)	+= nxp-tja11xx.o
 obj-$(CONFIG_QSEMI_PHY)		+= qsemi.o
 obj-$(CONFIG_REALTEK_PHY)	+= realtek.o
diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c
new file mode 100644
index 000000000000..47cacda8836f
--- /dev/null
+++ b/drivers/net/phy/nxp-c45-tja11xx.c
@@ -0,0 +1,588 @@
+// SPDX-License-Identifier: GPL-2.0
+/* NXP C45 PHY driver
+ * Copyright (C) 2021 NXP
+ * Author: Radu Pirea <radu-nicolae.pirea@oss.nxp.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/ethtool_netlink.h>
+#include <linux/kernel.h>
+#include <linux/mii.h>
+#include <linux/module.h>
+#include <linux/phy.h>
+#include <linux/processor.h>
+#include <linux/property.h>
+
+#define PHY_ID_TJA_1103			0x001BB010
+
+#define PMAPMD_B100T1_PMAPMD_CTL	0x0834
+#define B100T1_PMAPMD_CONFIG_EN		BIT(15)
+#define B100T1_PMAPMD_MASTER		BIT(14)
+#define MASTER_MODE			(B100T1_PMAPMD_CONFIG_EN | \
+					 B100T1_PMAPMD_MASTER)
+#define SLAVE_MODE			(B100T1_PMAPMD_CONFIG_EN)
+
+#define VEND1_DEVICE_CONTROL		0x0040
+#define DEVICE_CONTROL_RESET		BIT(15)
+#define DEVICE_CONTROL_CONFIG_GLOBAL_EN	BIT(14)
+#define DEVICE_CONTROL_CONFIG_ALL_EN	BIT(13)
+
+#define VEND1_PHY_CONTROL		0x8100
+#define PHY_CONFIG_EN			BIT(14)
+#define PHY_START_OP			BIT(0)
+
+#define VEND1_PHY_CONFIG		0x8108
+#define PHY_CONFIG_AUTO			BIT(0)
+
+#define VEND1_SIGNAL_QUALITY		0x8320
+#define SQI_VALID			BIT(14)
+#define SQI_MASK			GENMASK(2, 0)
+#define MAX_SQI				SQI_MASK
+
+#define VEND1_CABLE_TEST		0x8330
+#define CABLE_TEST_ENABLE		BIT(15)
+#define CABLE_TEST_START		BIT(14)
+#define CABLE_TEST_VALID		BIT(13)
+#define CABLE_TEST_OK			0x00
+#define CABLE_TEST_SHORTED		0x01
+#define CABLE_TEST_OPEN			0x02
+#define CABLE_TEST_UNKNOWN		0x07
+
+#define VEND1_PORT_CONTROL		0x8040
+#define PORT_CONTROL_EN			BIT(14)
+
+#define VEND1_PORT_INFRA_CONTROL	0xAC00
+#define PORT_INFRA_CONTROL_EN		BIT(14)
+
+#define VEND1_RXID			0xAFCC
+#define VEND1_TXID			0xAFCD
+#define ID_ENABLE			BIT(15)
+
+#define VEND1_ABILITIES			0xAFC4
+#define RGMII_ID_ABILITY		BIT(15)
+#define RGMII_ABILITY			BIT(14)
+#define RMII_ABILITY			BIT(10)
+#define REVMII_ABILITY			BIT(9)
+#define MII_ABILITY			BIT(8)
+#define SGMII_ABILITY			BIT(0)
+
+#define VEND1_MII_BASIC_CONFIG		0xAFC6
+#define MII_BASIC_CONFIG_REV		BIT(8)
+#define MII_BASIC_CONFIG_SGMII		0x9
+#define MII_BASIC_CONFIG_RGMII		0x7
+#define MII_BASIC_CONFIG_RMII		0x5
+#define MII_BASIC_CONFIG_MII		0x4
+
+#define VEND1_SYMBOL_ERROR_COUNTER	0x8350
+#define VEND1_LINK_DROP_COUNTER		0x8352
+#define VEND1_LINK_LOSSES_AND_FAILURES	0x8353
+#define VEND1_R_GOOD_FRAME_CNT		0xA950
+#define VEND1_R_BAD_FRAME_CNT		0xA952
+#define VEND1_R_RXER_FRAME_CNT		0xA954
+#define VEND1_RX_PREAMBLE_COUNT		0xAFCE
+#define VEND1_TX_PREAMBLE_COUNT		0xAFCF
+#define VEND1_RX_IPG_LENGTH		0xAFD0
+#define VEND1_TX_IPG_LENGTH		0xAFD1
+#define COUNTER_EN			BIT(15)
+
+#define RGMII_PERIOD_PS			8000U
+#define PS_PER_DEGREE			div_u64(RGMII_PERIOD_PS, 360)
+#define MIN_ID_PS			1644U
+#define MAX_ID_PS			2260U
+#define DEFAULT_ID_PS			2000U
+
+struct nxp_c45_phy {
+	u32 tx_delay;
+	u32 rx_delay;
+};
+
+struct nxp_c45_phy_stats {
+	const char	*name;
+	u8		mmd;
+	u16		reg;
+	u8		off;
+	u16		mask;
+};
+
+static const struct nxp_c45_phy_stats nxp_c45_hw_stats[] = {
+	{ "phy_symbol_error_cnt", MDIO_MMD_VEND1,
+		VEND1_SYMBOL_ERROR_COUNTER, 0, GENMASK(15, 0) },
+	{ "phy_link_status_drop_cnt", MDIO_MMD_VEND1,
+		VEND1_LINK_DROP_COUNTER, 8, GENMASK(13, 8) },
+	{ "phy_link_availability_drop_cnt", MDIO_MMD_VEND1,
+		VEND1_LINK_DROP_COUNTER, 0, GENMASK(5, 0) },
+	{ "phy_link_loss_cnt", MDIO_MMD_VEND1,
+		VEND1_LINK_LOSSES_AND_FAILURES, 10, GENMASK(15, 10) },
+	{ "phy_link_failure_cnt", MDIO_MMD_VEND1,
+		VEND1_LINK_LOSSES_AND_FAILURES, 0, GENMASK(9, 0) },
+	{ "r_good_frame_cnt", MDIO_MMD_VEND1,
+		VEND1_R_GOOD_FRAME_CNT, 0, GENMASK(15, 0) },
+	{ "r_bad_frame_cnt", MDIO_MMD_VEND1,
+		VEND1_R_BAD_FRAME_CNT, 0, GENMASK(15, 0) },
+	{ "r_rxer_frame_cnt", MDIO_MMD_VEND1,
+		VEND1_R_RXER_FRAME_CNT, 0, GENMASK(15, 0) },
+	{ "rx_preamble_count", MDIO_MMD_VEND1,
+		VEND1_RX_PREAMBLE_COUNT, 0, GENMASK(5, 0) },
+	{ "tx_preamble_count", MDIO_MMD_VEND1,
+		VEND1_TX_PREAMBLE_COUNT, 0, GENMASK(5, 0) },
+	{ "rx_ipg_length", MDIO_MMD_VEND1,
+		VEND1_RX_IPG_LENGTH, 0, GENMASK(8, 0) },
+	{ "tx_ipg_length", MDIO_MMD_VEND1,
+		VEND1_TX_IPG_LENGTH, 0, GENMASK(8, 0) },
+};
+
+static int nxp_c45_get_sset_count(struct phy_device *phydev)
+{
+	return ARRAY_SIZE(nxp_c45_hw_stats);
+}
+
+static void nxp_c45_get_strings(struct phy_device *phydev, u8 *data)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(nxp_c45_hw_stats); i++) {
+		strncpy(data + i * ETH_GSTRING_LEN,
+			nxp_c45_hw_stats[i].name, ETH_GSTRING_LEN);
+	}
+}
+
+static void nxp_c45_get_stats(struct phy_device *phydev,
+			      struct ethtool_stats *stats, u64 *data)
+{
+	size_t i;
+	int ret;
+
+	for (i = 0; i < ARRAY_SIZE(nxp_c45_hw_stats); i++) {
+		ret = phy_read_mmd(phydev, nxp_c45_hw_stats[i].mmd,
+				   nxp_c45_hw_stats[i].reg);
+		if (ret < 0) {
+			data[i] = U64_MAX;
+		} else {
+			data[i] = ret & nxp_c45_hw_stats[i].mask;
+			data[i] >>= nxp_c45_hw_stats[i].off;
+		}
+	}
+}
+
+static int nxp_c45_config_enable(struct phy_device *phydev)
+{
+	phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_DEVICE_CONTROL,
+		      DEVICE_CONTROL_CONFIG_GLOBAL_EN |
+		      DEVICE_CONTROL_CONFIG_ALL_EN);
+	usleep_range(400, 450);
+
+	phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_PORT_CONTROL,
+		      PORT_CONTROL_EN);
+	phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_PHY_CONTROL,
+		      PHY_CONFIG_EN);
+	phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_PORT_INFRA_CONTROL,
+		      PORT_INFRA_CONTROL_EN);
+
+	return 0;
+}
+
+static int nxp_c45_start_op(struct phy_device *phydev)
+{
+	return phy_set_bits_mmd(phydev, MDIO_MMD_VEND1, VEND1_PHY_CONTROL,
+				PHY_START_OP);
+}
+
+static int nxp_c45_soft_reset(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_DEVICE_CONTROL,
+			    DEVICE_CONTROL_RESET);
+	if (ret)
+		return ret;
+
+	return phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1,
+					 VEND1_DEVICE_CONTROL, ret,
+					 !(ret & DEVICE_CONTROL_RESET), 20000,
+					 240000, false);
+}
+
+static int nxp_c45_cable_test_start(struct phy_device *phydev)
+{
+	return phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_CABLE_TEST,
+			     CABLE_TEST_ENABLE | CABLE_TEST_START);
+}
+
+static int nxp_c45_cable_test_get_status(struct phy_device *phydev,
+					 bool *finished)
+{
+	int ret;
+	u8 cable_test_result;
+
+	ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, VEND1_CABLE_TEST);
+	if (!(ret & CABLE_TEST_VALID)) {
+		*finished = false;
+		return 0;
+	}
+
+	*finished = true;
+	cable_test_result = ret & GENMASK(2, 0);
+
+	switch (cable_test_result) {
+	case CABLE_TEST_OK:
+		ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_A,
+					ETHTOOL_A_CABLE_RESULT_CODE_OK);
+		break;
+	case CABLE_TEST_SHORTED:
+		ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_A,
+					ETHTOOL_A_CABLE_RESULT_CODE_SAME_SHORT);
+		break;
+	case CABLE_TEST_OPEN:
+		ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_A,
+					ETHTOOL_A_CABLE_RESULT_CODE_OPEN);
+		break;
+	default:
+		ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_A,
+					ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC);
+	}
+
+	phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, VEND1_CABLE_TEST,
+			   CABLE_TEST_ENABLE);
+
+	return nxp_c45_start_op(phydev);
+}
+
+static int nxp_c45_setup_master_slave(struct phy_device *phydev)
+{
+	switch (phydev->master_slave_set) {
+	case MASTER_SLAVE_CFG_MASTER_FORCE:
+	case MASTER_SLAVE_CFG_MASTER_PREFERRED:
+		phy_write_mmd(phydev, MDIO_MMD_PMAPMD, PMAPMD_B100T1_PMAPMD_CTL,
+			      MASTER_MODE);
+		break;
+	case MASTER_SLAVE_CFG_SLAVE_PREFERRED:
+	case MASTER_SLAVE_CFG_SLAVE_FORCE:
+		phy_write_mmd(phydev, MDIO_MMD_PMAPMD, PMAPMD_B100T1_PMAPMD_CTL,
+			      SLAVE_MODE);
+		break;
+	case MASTER_SLAVE_CFG_UNKNOWN:
+	case MASTER_SLAVE_CFG_UNSUPPORTED:
+		return 0;
+	default:
+		phydev_warn(phydev, "Unsupported Master/Slave mode\n");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int nxp_c45_read_master_slave(struct phy_device *phydev)
+{
+	int reg;
+
+	phydev->master_slave_get = MASTER_SLAVE_CFG_UNKNOWN;
+	phydev->master_slave_state = MASTER_SLAVE_STATE_UNKNOWN;
+
+	reg = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, PMAPMD_B100T1_PMAPMD_CTL);
+	if (reg < 0)
+		return reg;
+
+	if (reg & B100T1_PMAPMD_MASTER) {
+		phydev->master_slave_get = MASTER_SLAVE_CFG_MASTER_FORCE;
+		phydev->master_slave_state = MASTER_SLAVE_STATE_MASTER;
+	} else {
+		phydev->master_slave_get = MASTER_SLAVE_CFG_SLAVE_FORCE;
+		phydev->master_slave_state = MASTER_SLAVE_STATE_SLAVE;
+	}
+
+	return 0;
+}
+
+static int nxp_c45_config_aneg(struct phy_device *phydev)
+{
+	return nxp_c45_setup_master_slave(phydev);
+}
+
+static int nxp_c45_read_status(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = genphy_c45_read_status(phydev);
+	if (ret)
+		return ret;
+
+	ret = nxp_c45_read_master_slave(phydev);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int nxp_c45_get_sqi(struct phy_device *phydev)
+{
+	int reg;
+
+	reg = phy_read_mmd(phydev, MDIO_MMD_VEND1, VEND1_SIGNAL_QUALITY);
+	if (!(reg & SQI_VALID))
+		return -EINVAL;
+
+	reg &= SQI_MASK;
+
+	return reg;
+}
+
+static int nxp_c45_get_sqi_max(struct phy_device *phydev)
+{
+	return MAX_SQI;
+}
+
+static int nxp_c45_check_delay(struct phy_device *phydev, u32 delay)
+{
+	if (delay < MIN_ID_PS) {
+		phydev_err(phydev, "delay value smaller than %u\n", MIN_ID_PS);
+		return -EINVAL;
+	}
+
+	if (delay > MAX_ID_PS) {
+		phydev_err(phydev, "delay value higher than %u\n", MAX_ID_PS);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static u64 nxp_c45_get_phase_shift(u64 phase_offset_raw)
+{
+	/* The delay in degree phase is 73.8 + phase_offset_raw * 0.9.
+	 * To avoid floating point operations we'll multiply by 10
+	 * and get 1 decimal point precision.
+	 */
+	phase_offset_raw *= 10;
+	phase_offset_raw -= phase_offset_raw;
+	return div_u64(phase_offset_raw, 9);
+}
+
+static void nxp_c45_disable_delays(struct phy_device *phydev)
+{
+	phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, VEND1_TXID, ID_ENABLE);
+	phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, VEND1_RXID, ID_ENABLE);
+}
+
+static void nxp_c45_set_delays(struct phy_device *phydev)
+{
+	struct nxp_c45_phy *priv = phydev->priv;
+	u64 tx_delay = priv->tx_delay;
+	u64 rx_delay = priv->rx_delay;
+	u64 degree;
+
+	if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
+	    phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) {
+		degree = div_u64(tx_delay, PS_PER_DEGREE);
+		phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_TXID,
+			      ID_ENABLE | nxp_c45_get_phase_shift(degree));
+	} else {
+		phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, VEND1_TXID,
+				   ID_ENABLE);
+	}
+
+	if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
+	    phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) {
+		degree = div_u64(rx_delay, PS_PER_DEGREE);
+		phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_RXID,
+			      ID_ENABLE | nxp_c45_get_phase_shift(degree));
+	} else {
+		phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, VEND1_RXID,
+				   ID_ENABLE);
+	}
+}
+
+static int nxp_c45_get_delays(struct phy_device *phydev)
+{
+	struct nxp_c45_phy *priv = phydev->priv;
+	int ret;
+
+	if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
+	    phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) {
+		ret = device_property_read_u32(&phydev->mdio.dev,
+					       "tx-internal-delay-ps",
+					       &priv->tx_delay);
+		if (ret)
+			priv->tx_delay = DEFAULT_ID_PS;
+
+		ret = nxp_c45_check_delay(phydev, priv->tx_delay);
+		if (ret) {
+			phydev_err(phydev,
+				   "tx-internal-delay-ps invalid value\n");
+			return ret;
+		}
+	}
+
+	if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
+	    phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) {
+		ret = device_property_read_u32(&phydev->mdio.dev,
+					       "rx-internal-delay-ps",
+					       &priv->rx_delay);
+		if (ret)
+			priv->rx_delay = DEFAULT_ID_PS;
+
+		ret = nxp_c45_check_delay(phydev, priv->rx_delay);
+		if (ret) {
+			phydev_err(phydev,
+				   "rx-internal-delay-ps invalid value\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int nxp_c45_set_phy_mode(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, VEND1_ABILITIES);
+	phydev_dbg(phydev, "Clause 45 managed PHY abilities 0x%x\n", ret);
+
+	switch (phydev->interface) {
+	case PHY_INTERFACE_MODE_RGMII:
+		if (!(ret & RGMII_ABILITY)) {
+			phydev_err(phydev, "rgmii mode not supported\n");
+			return -EINVAL;
+		}
+		phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_MII_BASIC_CONFIG,
+			      MII_BASIC_CONFIG_RGMII);
+		nxp_c45_disable_delays(phydev);
+		break;
+	case PHY_INTERFACE_MODE_RGMII_ID:
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+	case PHY_INTERFACE_MODE_RGMII_RXID:
+		if (!(ret & RGMII_ID_ABILITY)) {
+			phydev_err(phydev, "rgmii-id, rgmii-txid, rgmii-rxid modes are not supported\n");
+			return -EINVAL;
+		}
+		phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_MII_BASIC_CONFIG,
+			      MII_BASIC_CONFIG_RGMII);
+		ret = nxp_c45_get_delays(phydev);
+		if (ret)
+			return ret;
+
+		nxp_c45_set_delays(phydev);
+		break;
+	case PHY_INTERFACE_MODE_MII:
+		if (!(ret & MII_ABILITY)) {
+			phydev_err(phydev, "mii mode not supported\n");
+			return -EINVAL;
+		}
+		phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_MII_BASIC_CONFIG,
+			      MII_BASIC_CONFIG_MII);
+		break;
+	case PHY_INTERFACE_MODE_REVMII:
+		if (!(ret & REVMII_ABILITY)) {
+			phydev_err(phydev, "rev-mii mode not supported\n");
+			return -EINVAL;
+		}
+		phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_MII_BASIC_CONFIG,
+			      MII_BASIC_CONFIG_MII | MII_BASIC_CONFIG_REV);
+		break;
+	case PHY_INTERFACE_MODE_RMII:
+		if (!(ret & RMII_ABILITY)) {
+			phydev_err(phydev, "rmii mode not supported\n");
+			return -EINVAL;
+		}
+		phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_MII_BASIC_CONFIG,
+			      MII_BASIC_CONFIG_RMII);
+		break;
+	case PHY_INTERFACE_MODE_SGMII:
+		if (!(ret & SGMII_ABILITY)) {
+			phydev_err(phydev, "sgmii mode not supported\n");
+			return -EINVAL;
+		}
+		phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_MII_BASIC_CONFIG,
+			      MII_BASIC_CONFIG_SGMII);
+		break;
+	case PHY_INTERFACE_MODE_INTERNAL:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int nxp_c45_config_init(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = nxp_c45_config_enable(phydev);
+	if (ret) {
+		phydev_err(phydev, "Failed to enable config\n");
+		return ret;
+	}
+
+	phy_set_bits_mmd(phydev, MDIO_MMD_VEND1, VEND1_PHY_CONFIG,
+			 PHY_CONFIG_AUTO);
+
+	phy_set_bits_mmd(phydev, MDIO_MMD_VEND1, VEND1_LINK_DROP_COUNTER,
+			 COUNTER_EN);
+	phy_set_bits_mmd(phydev, MDIO_MMD_VEND1, VEND1_RX_PREAMBLE_COUNT,
+			 COUNTER_EN);
+	phy_set_bits_mmd(phydev, MDIO_MMD_VEND1, VEND1_TX_PREAMBLE_COUNT,
+			 COUNTER_EN);
+	phy_set_bits_mmd(phydev, MDIO_MMD_VEND1, VEND1_RX_IPG_LENGTH,
+			 COUNTER_EN);
+	phy_set_bits_mmd(phydev, MDIO_MMD_VEND1, VEND1_TX_IPG_LENGTH,
+			 COUNTER_EN);
+
+	ret = nxp_c45_set_phy_mode(phydev);
+	if (ret)
+		return ret;
+
+	phydev->autoneg = AUTONEG_DISABLE;
+
+	return nxp_c45_start_op(phydev);
+}
+
+static int nxp_c45_probe(struct phy_device *phydev)
+{
+	struct nxp_c45_phy *priv;
+
+	priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	phydev->priv = priv;
+
+	return 0;
+}
+
+static struct phy_driver nxp_c45_driver[] = {
+	{
+		PHY_ID_MATCH_MODEL(PHY_ID_TJA_1103),
+		.name			= "NXP C45 TJA1103",
+		.features		= PHY_BASIC_T1_FEATURES,
+		.probe			= nxp_c45_probe,
+		.soft_reset		= nxp_c45_soft_reset,
+		.config_aneg		= nxp_c45_config_aneg,
+		.config_init		= nxp_c45_config_init,
+		.read_status		= nxp_c45_read_status,
+		.suspend		= genphy_c45_pma_suspend,
+		.resume			= genphy_c45_pma_resume,
+		.get_sset_count		= nxp_c45_get_sset_count,
+		.get_strings		= nxp_c45_get_strings,
+		.get_stats		= nxp_c45_get_stats,
+		.cable_test_start	= nxp_c45_cable_test_start,
+		.cable_test_get_status	= nxp_c45_cable_test_get_status,
+		.set_loopback		= genphy_c45_loopback,
+		.get_sqi		= nxp_c45_get_sqi,
+		.get_sqi_max		= nxp_c45_get_sqi_max,
+	},
+};
+
+module_phy_driver(nxp_c45_driver);
+
+static struct mdio_device_id __maybe_unused nxp_c45_tbl[] = {
+	{ PHY_ID_MATCH_MODEL(PHY_ID_TJA_1103) },
+	{ /*sentinel*/ },
+};
+
+MODULE_DEVICE_TABLE(mdio, nxp_c45_tbl);
+
+MODULE_AUTHOR("Radu Pirea <radu-nicolae.pirea@oss.nxp.com>");
+MODULE_DESCRIPTION("NXP C45 PHY driver");
+MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3


From 37434782d63f89de5b9c383a449b6a82dc3fa4fb Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 19 Apr 2021 13:02:42 -0700
Subject: bnxt: add more ethtool standard stats

Michael suggest a few more stats we can expose.

$ ethtool -S eth0 --groups eth-mac
Standard stats for eth0:
eth-mac-FramesTransmittedOK: 902623288966
eth-mac-FramesReceivedOK: 28727667047
eth-mac-FrameCheckSequenceErrors: 1
eth-mac-AlignmentErrors: 0
eth-mac-OutOfRangeLengthField: 0
$ ethtool -S eth0 | grep '\(fcs\|align\|oor\)'
     rx_fcs_err_frames: 1
     rx_align_err_frames: 0
     tx_fcs_err_frames: 0

Suggested-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 832252313b18..3b66e300c962 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -4020,6 +4020,12 @@ static void bnxt_get_eth_mac_stats(struct net_device *dev,
 		BNXT_GET_RX_PORT_STATS64(rx, rx_good_frames);
 	mac_stats->FramesTransmittedOK =
 		BNXT_GET_TX_PORT_STATS64(tx, tx_good_frames);
+	mac_stats->FrameCheckSequenceErrors =
+		BNXT_GET_RX_PORT_STATS64(rx, rx_fcs_err_frames);
+	mac_stats->AlignmentErrors =
+		BNXT_GET_RX_PORT_STATS64(rx, rx_align_err_frames);
+	mac_stats->OutOfRangeLengthField =
+		BNXT_GET_RX_PORT_STATS64(rx, rx_oor_len_frames);
 }
 
 static void bnxt_get_eth_ctrl_stats(struct net_device *dev,
-- 
cgit v1.2.3


From d1f0a5e1fb4e2f2f603bec8df79ca51768f2bdae Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 19 Apr 2021 13:03:45 -0700
Subject: ethtool: stats: clarify the initialization to ETHTOOL_STAT_NOT_SET

Ido suggests we add a comment about the init of stats to -1.
This is unlikely to be clear to first time readers.

Suggested-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethtool/stats.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/ethtool/stats.c b/net/ethtool/stats.c
index acb2b080c358..b7642dc96d50 100644
--- a/net/ethtool/stats.c
+++ b/net/ethtool/stats.c
@@ -114,6 +114,9 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base,
 	if (ret < 0)
 		return ret;
 
+	/* Mark all stats as unset (see ETHTOOL_STAT_NOT_SET) to prevent them
+	 * from being reported to user space in case driver did not set them.
+	 */
 	memset(&data->phy_stats, 0xff, sizeof(data->phy_stats));
 	memset(&data->mac_stats, 0xff, sizeof(data->mac_stats));
 	memset(&data->ctrl_stats, 0xff, sizeof(data->mac_stats));
-- 
cgit v1.2.3


From 4acd47644ef1e1c8f8f5bc40b7cf1c5b9bcbbc4e Mon Sep 17 00:00:00 2001
From: Lijun Pan <ljp@linux.ibm.com>
Date: Mon, 19 Apr 2021 16:25:25 -0500
Subject: MAINTAINERS: update

I am making this change again since I received the following instruction.

"As an IBM employee, you are not allowed to use your gmail account to work
in any way on VNIC. You are not allowed to use your personal email account
as a "hobby". You are an IBM employee 100% of the time.
Please remove yourself completely from the maintainers file.
I grant you a 1 time exception on contributions to VNIC to make this
change."

Signed-off-by: Lijun Pan <ljp@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 95e6766718b0..933a6f3c2369 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8526,7 +8526,6 @@ IBM Power SRIOV Virtual NIC Device Driver
 M:	Dany Madden <drt@linux.ibm.com>
 M:	Sukadev Bhattiprolu <sukadev@linux.ibm.com>
 R:	Thomas Falcon <tlfalcon@linux.ibm.com>
-R:	Lijun Pan <lijunp213@gmail.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/ibm/ibmvnic.*
-- 
cgit v1.2.3


From e9377a911d772d27ef2810c241154ba479bad368 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 19 Apr 2021 14:52:35 -0700
Subject: ethtool: add missing EEPROM to list of messages

ETHTOOL_MSG_MODULE_EEPROM_GET is missing from the list of messages.
ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY is sadly a rather long name
so we need to adjust column length.

v2: use spaces (Andrew)

Fixes: c781ff12a2f3 ("ethtool: Allow network drivers to dump arbitrary EEPROM data")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ethtool-netlink.rst | 70 ++++++++++++++--------------
 1 file changed, 36 insertions(+), 34 deletions(-)

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index 48cad2fce147..25131df3c2bd 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -210,45 +210,47 @@ Userspace to kernel:
   ``ETHTOOL_MSG_TUNNEL_INFO_GET``       get tunnel offload info
   ``ETHTOOL_MSG_FEC_GET``               get FEC settings
   ``ETHTOOL_MSG_FEC_SET``               set FEC settings
+  ``ETHTOOL_MSG_MODULE_EEPROM_GET``     read SFP module EEPROM
   ``ETHTOOL_MSG_STATS_GET``             get standard statistics
   ===================================== ================================
 
 Kernel to userspace:
 
-  ===================================== =================================
-  ``ETHTOOL_MSG_STRSET_GET_REPLY``      string set contents
-  ``ETHTOOL_MSG_LINKINFO_GET_REPLY``    link settings
-  ``ETHTOOL_MSG_LINKINFO_NTF``          link settings notification
-  ``ETHTOOL_MSG_LINKMODES_GET_REPLY``   link modes info
-  ``ETHTOOL_MSG_LINKMODES_NTF``         link modes notification
-  ``ETHTOOL_MSG_LINKSTATE_GET_REPLY``   link state info
-  ``ETHTOOL_MSG_DEBUG_GET_REPLY``       debugging settings
-  ``ETHTOOL_MSG_DEBUG_NTF``             debugging settings notification
-  ``ETHTOOL_MSG_WOL_GET_REPLY``         wake-on-lan settings
-  ``ETHTOOL_MSG_WOL_NTF``               wake-on-lan settings notification
-  ``ETHTOOL_MSG_FEATURES_GET_REPLY``    device features
-  ``ETHTOOL_MSG_FEATURES_SET_REPLY``    optional reply to FEATURES_SET
-  ``ETHTOOL_MSG_FEATURES_NTF``          netdev features notification
-  ``ETHTOOL_MSG_PRIVFLAGS_GET_REPLY``   private flags
-  ``ETHTOOL_MSG_PRIVFLAGS_NTF``         private flags
-  ``ETHTOOL_MSG_RINGS_GET_REPLY``       ring sizes
-  ``ETHTOOL_MSG_RINGS_NTF``             ring sizes
-  ``ETHTOOL_MSG_CHANNELS_GET_REPLY``    channel counts
-  ``ETHTOOL_MSG_CHANNELS_NTF``          channel counts
-  ``ETHTOOL_MSG_COALESCE_GET_REPLY``    coalescing parameters
-  ``ETHTOOL_MSG_COALESCE_NTF``          coalescing parameters
-  ``ETHTOOL_MSG_PAUSE_GET_REPLY``       pause parameters
-  ``ETHTOOL_MSG_PAUSE_NTF``             pause parameters
-  ``ETHTOOL_MSG_EEE_GET_REPLY``         EEE settings
-  ``ETHTOOL_MSG_EEE_NTF``               EEE settings
-  ``ETHTOOL_MSG_TSINFO_GET_REPLY``	timestamping info
-  ``ETHTOOL_MSG_CABLE_TEST_NTF``        Cable test results
-  ``ETHTOOL_MSG_CABLE_TEST_TDR_NTF``    Cable test TDR results
-  ``ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY`` tunnel offload info
-  ``ETHTOOL_MSG_FEC_GET_REPLY``         FEC settings
-  ``ETHTOOL_MSG_FEC_NTF``               FEC settings
-  ``ETHTOOL_MSG_STATS_GET_REPLY``       standard statistics
-  ===================================== =================================
+  ======================================== =================================
+  ``ETHTOOL_MSG_STRSET_GET_REPLY``         string set contents
+  ``ETHTOOL_MSG_LINKINFO_GET_REPLY``       link settings
+  ``ETHTOOL_MSG_LINKINFO_NTF``             link settings notification
+  ``ETHTOOL_MSG_LINKMODES_GET_REPLY``      link modes info
+  ``ETHTOOL_MSG_LINKMODES_NTF``            link modes notification
+  ``ETHTOOL_MSG_LINKSTATE_GET_REPLY``      link state info
+  ``ETHTOOL_MSG_DEBUG_GET_REPLY``          debugging settings
+  ``ETHTOOL_MSG_DEBUG_NTF``                debugging settings notification
+  ``ETHTOOL_MSG_WOL_GET_REPLY``            wake-on-lan settings
+  ``ETHTOOL_MSG_WOL_NTF``                  wake-on-lan settings notification
+  ``ETHTOOL_MSG_FEATURES_GET_REPLY``       device features
+  ``ETHTOOL_MSG_FEATURES_SET_REPLY``       optional reply to FEATURES_SET
+  ``ETHTOOL_MSG_FEATURES_NTF``             netdev features notification
+  ``ETHTOOL_MSG_PRIVFLAGS_GET_REPLY``      private flags
+  ``ETHTOOL_MSG_PRIVFLAGS_NTF``            private flags
+  ``ETHTOOL_MSG_RINGS_GET_REPLY``          ring sizes
+  ``ETHTOOL_MSG_RINGS_NTF``                ring sizes
+  ``ETHTOOL_MSG_CHANNELS_GET_REPLY``       channel counts
+  ``ETHTOOL_MSG_CHANNELS_NTF``             channel counts
+  ``ETHTOOL_MSG_COALESCE_GET_REPLY``       coalescing parameters
+  ``ETHTOOL_MSG_COALESCE_NTF``             coalescing parameters
+  ``ETHTOOL_MSG_PAUSE_GET_REPLY``          pause parameters
+  ``ETHTOOL_MSG_PAUSE_NTF``                pause parameters
+  ``ETHTOOL_MSG_EEE_GET_REPLY``            EEE settings
+  ``ETHTOOL_MSG_EEE_NTF``                  EEE settings
+  ``ETHTOOL_MSG_TSINFO_GET_REPLY``         timestamping info
+  ``ETHTOOL_MSG_CABLE_TEST_NTF``           Cable test results
+  ``ETHTOOL_MSG_CABLE_TEST_TDR_NTF``       Cable test TDR results
+  ``ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY``    tunnel offload info
+  ``ETHTOOL_MSG_FEC_GET_REPLY``            FEC settings
+  ``ETHTOOL_MSG_FEC_NTF``                  FEC settings
+  ``ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY``  read SFP module EEPROM
+  ``ETHTOOL_MSG_STATS_GET_REPLY``          standard statistics
+  ======================================== =================================
 
 ``GET`` requests are sent by userspace applications to retrieve device
 information. They usually do not contain any message specific attributes.
-- 
cgit v1.2.3


From 137733d08f4ab14a354dacaa9a8fc35217747605 Mon Sep 17 00:00:00 2001
From: Yaqi Chen <chendotjs@gmail.com>
Date: Fri, 16 Apr 2021 23:48:03 +0800
Subject: samples/bpf: Fix broken tracex1 due to kprobe argument change

>From commit c0bbbdc32feb ("__netif_receive_skb_core: pass skb by
reference"), the first argument passed into __netif_receive_skb_core
has changed to reference of a skb pointer.

This commit fixes by using bpf_probe_read_kernel.

Signed-off-by: Yaqi Chen <chendotjs@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210416154803.37157-1-chendotjs@gmail.com
---
 samples/bpf/tracex1_kern.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/samples/bpf/tracex1_kern.c b/samples/bpf/tracex1_kern.c
index 3f4599c9a202..ef30d2b353b0 100644
--- a/samples/bpf/tracex1_kern.c
+++ b/samples/bpf/tracex1_kern.c
@@ -26,7 +26,7 @@
 SEC("kprobe/__netif_receive_skb_core")
 int bpf_prog1(struct pt_regs *ctx)
 {
-	/* attaches to kprobe netif_receive_skb,
+	/* attaches to kprobe __netif_receive_skb_core,
 	 * looks for packets on loobpack device and prints them
 	 */
 	char devname[IFNAMSIZ];
@@ -35,7 +35,7 @@ int bpf_prog1(struct pt_regs *ctx)
 	int len;
 
 	/* non-portable! works for the given kernel only */
-	skb = (struct sk_buff *) PT_REGS_PARM1(ctx);
+	bpf_probe_read_kernel(&skb, sizeof(skb), (void *)PT_REGS_PARM1(ctx));
 	dev = _(skb->dev);
 	len = _(skb->len);
 
-- 
cgit v1.2.3


From fd0b88f73f5372c08ceff5cc7ddd8ceac502679c Mon Sep 17 00:00:00 2001
From: Dave Marchevsky <davemarchevsky@fb.com>
Date: Fri, 16 Apr 2021 13:47:02 -0700
Subject: bpf: Refine retval for bpf_get_task_stack helper

Verifier can constrain the min/max bounds of bpf_get_task_stack's return
value more tightly than the default tnum_unknown. Like bpf_get_stack,
return value is num bytes written into a caller-supplied buf, or error,
so do_refine_retval_range will work.

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20210416204704.2816874-2-davemarchevsky@fb.com
---
 kernel/bpf/verifier.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 994ef36c5f60..58730872f7e5 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5808,6 +5808,7 @@ static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
 
 	if (ret_type != RET_INTEGER ||
 	    (func_id != BPF_FUNC_get_stack &&
+	     func_id != BPF_FUNC_get_task_stack &&
 	     func_id != BPF_FUNC_probe_read_str &&
 	     func_id != BPF_FUNC_probe_read_kernel_str &&
 	     func_id != BPF_FUNC_probe_read_user_str))
-- 
cgit v1.2.3


From bdc4e369454fcae108e18feb0fcbb6f06815f94b Mon Sep 17 00:00:00 2001
From: Dave Marchevsky <davemarchevsky@fb.com>
Date: Fri, 16 Apr 2021 13:47:03 -0700
Subject: bpf/selftests: Add bpf_get_task_stack retval bounds verifier test

Add a bpf_iter test which feeds bpf_get_task_stack's return value into
seq_write after confirming it's positive. No attempt to bound the value
from above is made.

Load will fail if verifier does not refine retval range based on
buf sz input to bpf_get_task_stack.

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20210416204704.2816874-3-davemarchevsky@fb.com
---
 .../testing/selftests/bpf/verifier/bpf_get_stack.c | 43 ++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/tools/testing/selftests/bpf/verifier/bpf_get_stack.c b/tools/testing/selftests/bpf/verifier/bpf_get_stack.c
index 69b048cf46d9..3e024c891178 100644
--- a/tools/testing/selftests/bpf/verifier/bpf_get_stack.c
+++ b/tools/testing/selftests/bpf/verifier/bpf_get_stack.c
@@ -42,3 +42,46 @@
 	.result = ACCEPT,
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 },
+{
+	"bpf_get_task_stack return R0 range is refined",
+	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_6, 0), // ctx->meta->seq
+	BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1, 8), // ctx->task
+	BPF_LD_MAP_FD(BPF_REG_1, 0), // fixup_map_array_48b
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+	BPF_MOV64_REG(BPF_REG_9, BPF_REG_0), // keep buf for seq_write
+	BPF_MOV64_IMM(BPF_REG_3, 48),
+	BPF_MOV64_IMM(BPF_REG_4, 0),
+	BPF_EMIT_CALL(BPF_FUNC_get_task_stack),
+	BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_9),
+	BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+	BPF_EMIT_CALL(BPF_FUNC_seq_write),
+
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_TRACING,
+	.expected_attach_type = BPF_TRACE_ITER,
+	.kfunc = "task",
+	.runs = -1, // Don't run, just load
+	.fixup_map_array_48b = { 3 },
+},
-- 
cgit v1.2.3


From c77cec5c207b68a3cbc2af2f81070ec428f41145 Mon Sep 17 00:00:00 2001
From: Dave Marchevsky <davemarchevsky@fb.com>
Date: Fri, 16 Apr 2021 13:47:04 -0700
Subject: bpf/selftests: Add bpf_get_task_stack retval bounds test_prog

Add a libbpf test prog which feeds bpf_get_task_stack's return value
into seq_write after confirming it's positive. No attempt to bound the
value from above is made.

Load will fail if verifier does not refine retval range based on buf sz
input to bpf_get_task_stack.

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20210416204704.2816874-4-davemarchevsky@fb.com
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c  |  1 +
 .../selftests/bpf/progs/bpf_iter_task_stack.c      | 27 ++++++++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 74c45d557a2b..2d3590cfb5e1 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -147,6 +147,7 @@ static void test_task_stack(void)
 		return;
 
 	do_dummy_read(skel->progs.dump_task_stack);
+	do_dummy_read(skel->progs.get_task_user_stacks);
 
 	bpf_iter_task_stack__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
index 50e59a2e142e..43c36f5f7649 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
@@ -35,3 +35,30 @@ int dump_task_stack(struct bpf_iter__task *ctx)
 
 	return 0;
 }
+
+SEC("iter/task")
+int get_task_user_stacks(struct bpf_iter__task *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct task_struct *task = ctx->task;
+	uint64_t buf_sz = 0;
+	int64_t res;
+
+	if (task == (void *)0)
+		return 0;
+
+	res = bpf_get_task_stack(task, entries,
+			MAX_STACK_TRACE_DEPTH * SIZE_OF_ULONG, BPF_F_USER_STACK);
+	if (res <= 0)
+		return 0;
+
+	buf_sz += res;
+
+	/* If the verifier doesn't refine bpf_get_task_stack res, and instead
+	 * assumes res is entirely unknown, this program will fail to load as
+	 * the verifier will believe that max buf_sz value allows reading
+	 * past the end of entries in bpf_seq_write call
+	 */
+	bpf_seq_write(seq, &entries, buf_sz);
+	return 0;
+}
-- 
cgit v1.2.3


From d408c01caef41d8ab1b43203164c8b6cbf18d084 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Wed, 14 Apr 2021 20:43:33 +0300
Subject: net/mlx5e: Fix lost changes during code movements

The changes done in commit [1] were missed by the code movements
done in [2], as they were developed in ~parallel.
Here we re-apply them.

[1] commit e4484d9df500 ("net/mlx5e: Enable striding RQ for Connect-X IPsec capable devices")
[2] commit b3a131c2a160 ("net/mlx5e: Move params logic into its dedicated file")

Fixes: b3a131c2a160 ("net/mlx5e: Move params logic into its dedicated file")
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Aya Levin <ayal@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index f6ba568e00be..69f1f41b2b83 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -6,6 +6,7 @@
 #include "en/port.h"
 #include "en_accel/en_accel.h"
 #include "accel/ipsec.h"
+#include "fpga/ipsec.h"
 
 static bool mlx5e_rx_is_xdp(struct mlx5e_params *params,
 			    struct mlx5e_xsk_param *xsk)
@@ -282,7 +283,7 @@ bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
 	if (!mlx5e_check_fragmented_striding_rq_cap(mdev))
 		return false;
 
-	if (MLX5_IPSEC_DEV(mdev))
+	if (mlx5_fpga_is_ipsec_device(mdev))
 		return false;
 
 	if (params->xdp_prog) {
@@ -364,7 +365,7 @@ static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
 	u32 buf_size = 0;
 	int i;
 
-	if (MLX5_IPSEC_DEV(mdev))
+	if (mlx5_fpga_is_ipsec_device(mdev))
 		byte_count += MLX5E_METADATA_ETHER_LEN;
 
 	if (mlx5e_rx_is_linear_skb(params, xsk)) {
-- 
cgit v1.2.3


From 6a5689ba0259acded00f69856fc364b158c54c2e Mon Sep 17 00:00:00 2001
From: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
Date: Tue, 13 Apr 2021 11:15:49 +0300
Subject: net/mlx5e: Fix possible non-initialized struct usage

If mlx5e_devlink_port_register() fails, driver may try to register
devlink health TX and RX reporters on non-registered devlink port.

Instead, create health reporters only if mlx5e_devlink_port_register()
does not fail. And destroy reporters only if devlink_port is registered.

Also, change mlx5e_get_devlink_port() behavior and return NULL in case
port is not registered to replicate devlink's wrapper when ndo is not
implemented.

Signed-off-by: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c |  9 +++++++--
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c    | 10 ++++++++--
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
index 765f3064689d..0dd7615e5931 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
@@ -55,12 +55,17 @@ void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv)
 {
 	struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
 
-	devlink_port_unregister(dl_port);
+	if (dl_port->registered)
+		devlink_port_unregister(dl_port);
 }
 
 struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct devlink_port *port;
 
-	return mlx5e_devlink_get_dl_port(priv);
+	port = mlx5e_devlink_get_dl_port(priv);
+	if (port->registered)
+		return port;
+	return NULL;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index feb347e81448..bc2d37d2806f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4886,6 +4886,7 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
 			  struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct devlink_port *dl_port;
 	int err;
 
 	mlx5e_build_nic_params(priv, &priv->xsk, netdev->mtu);
@@ -4901,14 +4902,19 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
 	if (err)
 		mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
 
-	mlx5e_health_create_reporters(priv);
+	dl_port = mlx5e_devlink_get_dl_port(priv);
+	if (dl_port->registered)
+		mlx5e_health_create_reporters(priv);
 
 	return 0;
 }
 
 static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
 {
-	mlx5e_health_destroy_reporters(priv);
+	struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
+
+	if (dl_port->registered)
+		mlx5e_health_destroy_reporters(priv);
 	mlx5e_tls_cleanup(priv);
 	mlx5e_ipsec_cleanup(priv);
 }
-- 
cgit v1.2.3


From 6980ffa0c5a8e65d53ff803d2cafdba3e2022714 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Sun, 21 Jun 2020 21:35:34 +0300
Subject: net/mlx5e: RX, Add checks for calculated Striding RQ attributes

Striding RQ attributes below are mutually dependent. An unaware
change to one might take the others out of the valid range derived
by the HW caps:
- The MPWQE size in bytes
- The number of strides in a MPWQE
- The stride size

Add checks to verify they are valid and comply to the HW spec
and SW assumptions/requirements.
This is not a fix, no particular issue exists today.

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/en/params.c    | 86 ++++++++++++++--------
 .../net/ethernet/mellanox/mlx5/core/en/params.h    | 20 ++---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  5 +-
 include/linux/mlx5/device.h                        |  7 +-
 4 files changed, 76 insertions(+), 42 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 69f1f41b2b83..f410c1268422 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -90,30 +90,39 @@ bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
 	return !params->lro_en && linear_frag_sz <= PAGE_SIZE;
 }
 
-#define MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ ((BIT(__mlx5_bit_sz(wq, log_wqe_stride_size)) - 1) + \
-					  MLX5_MPWQE_LOG_STRIDE_SZ_BASE)
-bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
-				  struct mlx5e_params *params,
-				  struct mlx5e_xsk_param *xsk)
+bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
+				   u8 log_stride_sz, u8 log_num_strides)
 {
-	u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk);
-	s8 signed_log_num_strides_param;
-	u8 log_num_strides;
+	if (log_stride_sz + log_num_strides != MLX5_MPWRQ_LOG_WQE_SZ)
+		return false;
 
-	if (!mlx5e_rx_is_linear_skb(params, xsk))
+	if (log_stride_sz < MLX5_MPWQE_LOG_STRIDE_SZ_BASE ||
+	    log_stride_sz > MLX5_MPWQE_LOG_STRIDE_SZ_MAX)
 		return false;
 
-	if (order_base_2(linear_frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ)
+	if (log_num_strides > MLX5_MPWQE_LOG_NUM_STRIDES_MAX)
 		return false;
 
 	if (MLX5_CAP_GEN(mdev, ext_stride_num_range))
-		return true;
+		return log_num_strides >= MLX5_MPWQE_LOG_NUM_STRIDES_EXT_BASE;
 
-	log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz);
-	signed_log_num_strides_param =
-		(s8)log_num_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE;
+	return log_num_strides >= MLX5_MPWQE_LOG_NUM_STRIDES_BASE;
+}
 
-	return signed_log_num_strides_param >= 0;
+bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
+				  struct mlx5e_params *params,
+				  struct mlx5e_xsk_param *xsk)
+{
+	s8 log_num_strides;
+	u8 log_stride_sz;
+
+	if (!mlx5e_rx_is_linear_skb(params, xsk))
+		return false;
+
+	log_stride_sz = order_base_2(mlx5e_rx_get_linear_frag_sz(params, xsk));
+	log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - log_stride_sz;
+
+	return mlx5e_verify_rx_mpwqe_strides(mdev, log_stride_sz, log_num_strides);
 }
 
 u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params,
@@ -462,26 +471,36 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev,
 	param->cq_period_mode = params->rx_cq_moderation.cq_period_mode;
 }
 
-void mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
-			  struct mlx5e_params *params,
-			  struct mlx5e_xsk_param *xsk,
-			  u16 q_counter,
-			  struct mlx5e_rq_param *param)
+int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
+			 struct mlx5e_params *params,
+			 struct mlx5e_xsk_param *xsk,
+			 u16 q_counter,
+			 struct mlx5e_rq_param *param)
 {
 	void *rqc = param->rqc;
 	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
 	int ndsegs = 1;
 
 	switch (params->rq_wq_type) {
-	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: {
+		u8 log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
+		u8 log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
+
+		if (!mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size,
+						   log_wqe_num_of_strides)) {
+			mlx5_core_err(mdev,
+				      "Bad RX MPWQE params: log_stride_size %u, log_num_strides %u\n",
+				      log_wqe_stride_size, log_wqe_num_of_strides);
+			return -EINVAL;
+		}
+
 		MLX5_SET(wq, wq, log_wqe_num_of_strides,
-			 mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk) -
-			 MLX5_MPWQE_LOG_NUM_STRIDES_BASE);
+			 log_wqe_num_of_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE);
 		MLX5_SET(wq, wq, log_wqe_stride_size,
-			 mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk) -
-			 MLX5_MPWQE_LOG_STRIDE_SZ_BASE);
+			 log_wqe_stride_size - MLX5_MPWQE_LOG_STRIDE_SZ_BASE);
 		MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk));
 		break;
+	}
 	default: /* MLX5_WQ_TYPE_CYCLIC */
 		MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames);
 		mlx5e_build_rq_frags_info(mdev, params, xsk, &param->frags_info);
@@ -499,6 +518,8 @@ void mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
 
 	param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
 	mlx5e_build_rx_cq_param(mdev, params, xsk, &param->cqp);
+
+	return 0;
 }
 
 void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev,
@@ -643,14 +664,17 @@ void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev,
 	mlx5e_build_tx_cq_param(mdev, params, &param->cqp);
 }
 
-void mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
-			       struct mlx5e_params *params,
-			       u16 q_counter,
-			       struct mlx5e_channel_param *cparam)
+int mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
+			      struct mlx5e_params *params,
+			      u16 q_counter,
+			      struct mlx5e_channel_param *cparam)
 {
 	u8 icosq_log_wq_sz, async_icosq_log_wq_sz;
+	int err;
 
-	mlx5e_build_rq_param(mdev, params, NULL, q_counter, &cparam->rq);
+	err = mlx5e_build_rq_param(mdev, params, NULL, q_counter, &cparam->rq);
+	if (err)
+		return err;
 
 	icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(params, &cparam->rq);
 	async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(mdev);
@@ -659,4 +683,6 @@ void mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
 	mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq);
 	mlx5e_build_icosq_param(mdev, icosq_log_wq_sz, &cparam->icosq);
 	mlx5e_build_async_icosq_param(mdev, async_icosq_log_wq_sz, &cparam->async_icosq);
+
+	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index fcc51ec6084e..e9593f5f0661 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -96,6 +96,8 @@ void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *para
 void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
 void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
 
+bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
+				   u8 log_stride_sz, u8 log_num_strides);
 u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
 				 struct mlx5e_xsk_param *xsk);
 u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params,
@@ -122,11 +124,11 @@ u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
 /* Build queue parameters */
 
 void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e_channel *c);
-void mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
-			  struct mlx5e_params *params,
-			  struct mlx5e_xsk_param *xsk,
-			  u16 q_counter,
-			  struct mlx5e_rq_param *param);
+int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
+			 struct mlx5e_params *params,
+			 struct mlx5e_xsk_param *xsk,
+			 u16 q_counter,
+			 struct mlx5e_rq_param *param);
 void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev,
 			       u16 q_counter,
 			       struct mlx5e_rq_param *param);
@@ -141,10 +143,10 @@ void mlx5e_build_tx_cq_param(struct mlx5_core_dev *mdev,
 void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev,
 			     struct mlx5e_params *params,
 			     struct mlx5e_sq_param *param);
-void mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
-			       struct mlx5e_params *params,
-			       u16 q_counter,
-			       struct mlx5e_channel_param *cparam);
+int mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
+			      struct mlx5e_params *params,
+			      u16 q_counter,
+			      struct mlx5e_channel_param *cparam);
 
 u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
 int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index bc2d37d2806f..bca832cdc4cb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2086,7 +2086,10 @@ int mlx5e_open_channels(struct mlx5e_priv *priv,
 	if (!chs->c || !cparam)
 		goto err_free;
 
-	mlx5e_build_channel_param(priv->mdev, &chs->params, priv->q_counter, cparam);
+	err = mlx5e_build_channel_param(priv->mdev, &chs->params, priv->q_counter, cparam);
+	if (err)
+		goto err_free;
+
 	for (i = 0; i < chs->num; i++) {
 		struct xsk_buff_pool *xsk_pool = NULL;
 
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 92a029a800a0..578c4ccae91c 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -911,8 +911,11 @@ static inline u16 get_cqe_flow_tag(struct mlx5_cqe64 *cqe)
 	return be32_to_cpu(cqe->sop_drop_qpn) & 0xFFF;
 }
 
-#define MLX5_MPWQE_LOG_NUM_STRIDES_BASE	(9)
-#define MLX5_MPWQE_LOG_STRIDE_SZ_BASE	(6)
+#define MLX5_MPWQE_LOG_NUM_STRIDES_EXT_BASE	3
+#define MLX5_MPWQE_LOG_NUM_STRIDES_BASE		9
+#define MLX5_MPWQE_LOG_NUM_STRIDES_MAX		16
+#define MLX5_MPWQE_LOG_STRIDE_SZ_BASE		6
+#define MLX5_MPWQE_LOG_STRIDE_SZ_MAX		13
 
 struct mpwrq_cqe_bc {
 	__be16	filler_consumed_strides;
-- 
cgit v1.2.3


From 7d22ad732d15a35d49b66756cfa396c43562a7f8 Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Thu, 24 Sep 2020 20:58:16 +0300
Subject: net/mlx5: DR, Rename an argument in dr_rdma_segments

Rename the argument to better reflect that the meaning is
not number of records, but wheather or not we should
ring the dorbell.

Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
index c1926d927008..1f2e9fee96bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -223,7 +223,7 @@ static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl)
 
 static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
 			     u32 rkey, struct dr_data_seg *data_seg,
-			     u32 opcode, int nreq)
+			     u32 opcode, bool notify_hw)
 {
 	struct mlx5_wqe_raddr_seg *wq_raddr;
 	struct mlx5_wqe_ctrl_seg *wq_ctrl;
@@ -255,16 +255,16 @@ static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
 
 	dr_qp->sq.wqe_head[idx] = dr_qp->sq.pc++;
 
-	if (nreq)
+	if (notify_hw)
 		dr_cmd_notify_hw(dr_qp, wq_ctrl);
 }
 
 static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info)
 {
 	dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
-			 &send_info->write, MLX5_OPCODE_RDMA_WRITE, 0);
+			 &send_info->write, MLX5_OPCODE_RDMA_WRITE, false);
 	dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
-			 &send_info->read, MLX5_OPCODE_RDMA_READ, 1);
+			 &send_info->read, MLX5_OPCODE_RDMA_READ, true);
 }
 
 /**
-- 
cgit v1.2.3


From ff1925bb0de4c6e657e40e2c0d5ecf0fabbfbdd3 Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Sat, 6 Feb 2021 22:44:41 +0200
Subject: net/mlx5: DR, Fix SQ/RQ in doorbell bitmask

QP doorbell size is 16 bits.
Fixing sw steering's QP doorbel bitmask, which had 20 bits.

Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
index 1f2e9fee96bc..37377d668057 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -213,7 +213,7 @@ static void dr_destroy_qp(struct mlx5_core_dev *mdev,
 static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl)
 {
 	dma_wmb();
-	*dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xfffff);
+	*dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xffff);
 
 	/* After wmb() the hw aware of new work */
 	wmb();
-- 
cgit v1.2.3


From 25cb317680422f199ec6ac6ba359eb98f2748429 Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Sat, 6 Feb 2021 15:44:53 +0200
Subject: net/mlx5: E-Switch, Improve error messages in term table creation

Add error code to the error messages and removed duplicated message:
if termination table creation failed, we already get an error message
in mlx5_eswitch_termtbl_create, so no need for the additional error print
in the calling function.

Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
index ec679560a95d..a81ece94f599 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
@@ -83,14 +83,16 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev,
 	ft_attr.autogroup.max_num_groups = 1;
 	tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
 	if (IS_ERR(tt->termtbl)) {
-		esw_warn(dev, "Failed to create termination table\n");
+		esw_warn(dev, "Failed to create termination table (error %d)\n",
+			 IS_ERR(tt->termtbl));
 		return -EOPNOTSUPP;
 	}
 
 	tt->rule = mlx5_add_flow_rules(tt->termtbl, NULL, flow_act,
 				       &tt->dest, 1);
 	if (IS_ERR(tt->rule)) {
-		esw_warn(dev, "Failed to create termination table rule\n");
+		esw_warn(dev, "Failed to create termination table rule (error %d)\n",
+			 IS_ERR(tt->rule));
 		goto add_flow_err;
 	}
 	return 0;
@@ -140,10 +142,9 @@ mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw,
 	memcpy(&tt->flow_act, flow_act, sizeof(*flow_act));
 
 	err = mlx5_eswitch_termtbl_create(esw->dev, tt, flow_act);
-	if (err) {
-		esw_warn(esw->dev, "Failed to create termination table\n");
+	if (err)
 		goto tt_create_err;
-	}
+
 	hash_add(esw->offloads.termtbl_tbl, &tt->termtbl_hlist, hash_key);
 tt_add_ref:
 	tt->ref_count++;
@@ -282,7 +283,8 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw,
 		tt = mlx5_eswitch_termtbl_get_create(esw, &term_tbl_act,
 						     &dest[i], attr);
 		if (IS_ERR(tt)) {
-			esw_warn(esw->dev, "Failed to create termination table\n");
+			esw_warn(esw->dev, "Failed to get termination table (error %d)\n",
+				 IS_ERR(tt));
 			goto revert_changes;
 		}
 		attr->dests[num_vport_dests].termtbl = tt;
-- 
cgit v1.2.3


From 704cfecdd03d7b84403ed96ba0009ea07270e74e Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Sun, 28 Feb 2021 23:48:27 +0200
Subject: net/mlx5: mlx5_ifc updates for flex parser

Added the required definitions for supporting more protocols by flex parsers
(GTP-U, Geneve TLV options), and for using the right flex parser that was
configured for this protocol.

Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/mlx5_ifc.h | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f2c51d6833c6..aa6effe1dd6d 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -622,7 +622,19 @@ struct mlx5_ifc_fte_match_set_misc3_bits {
 
 	u8         geneve_tlv_option_0_data[0x20];
 
-	u8         reserved_at_140[0xc0];
+	u8	   gtpu_teid[0x20];
+
+	u8	   gtpu_msg_type[0x8];
+	u8	   gtpu_msg_flags[0x8];
+	u8	   reserved_at_170[0x10];
+
+	u8	   gtpu_dw_2[0x20];
+
+	u8	   gtpu_first_ext_dw_0[0x20];
+
+	u8	   gtpu_dw_0[0x20];
+
+	u8	   reserved_at_1e0[0x20];
 };
 
 struct mlx5_ifc_fte_match_set_misc4_bits {
@@ -1237,9 +1249,17 @@ enum {
 
 enum {
 	MLX5_FLEX_PARSER_GENEVE_ENABLED		= 1 << 3,
+	MLX5_FLEX_PARSER_MPLS_OVER_GRE_ENABLED	= 1 << 4,
+	mlx5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED	= 1 << 5,
 	MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED	= 1 << 7,
 	MLX5_FLEX_PARSER_ICMP_V4_ENABLED	= 1 << 8,
 	MLX5_FLEX_PARSER_ICMP_V6_ENABLED	= 1 << 9,
+	MLX5_FLEX_PARSER_GENEVE_TLV_OPTION_0_ENABLED = 1 << 10,
+	MLX5_FLEX_PARSER_GTPU_ENABLED		= 1 << 11,
+	MLX5_FLEX_PARSER_GTPU_DW_2_ENABLED	= 1 << 16,
+	MLX5_FLEX_PARSER_GTPU_FIRST_EXT_DW_0_ENABLED = 1 << 17,
+	MLX5_FLEX_PARSER_GTPU_DW_0_ENABLED	= 1 << 18,
+	MLX5_FLEX_PARSER_GTPU_TEID_ENABLED	= 1 << 19,
 };
 
 enum {
@@ -1637,7 +1657,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         cqe_compression_timeout[0x10];
 	u8         cqe_compression_max_num[0x10];
 
-	u8         reserved_at_5e0[0x10];
+	u8         reserved_at_5e0[0x8];
+	u8         flex_parser_id_gtpu_dw_0[0x4];
+	u8         reserved_at_5ec[0x4];
 	u8         tag_matching[0x1];
 	u8         rndv_offload_rc[0x1];
 	u8         rndv_offload_dc[0x1];
@@ -1648,7 +1670,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8	   affiliate_nic_vport_criteria[0x8];
 	u8	   native_port_num[0x8];
 	u8	   num_vhca_ports[0x8];
-	u8	   reserved_at_618[0x6];
+	u8         flex_parser_id_gtpu_teid[0x4];
+	u8         reserved_at_61c[0x2];
 	u8	   sw_owner_id[0x1];
 	u8         reserved_at_61f[0x1];
 
@@ -1683,7 +1706,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8	   reserved_at_6e0[0x10];
 	u8	   sf_base_id[0x10];
 
-	u8	   reserved_at_700[0x8];
+	u8         flex_parser_id_gtpu_dw_2[0x4];
+	u8         flex_parser_id_gtpu_first_ext_dw_0[0x4];
 	u8	   num_total_dynamic_vf_msix[0x18];
 	u8	   reserved_at_720[0x14];
 	u8	   dynamic_msix_table_size[0xc];
-- 
cgit v1.2.3


From 323b91acc1898281da8c5cec32a50aa272ef5f5a Mon Sep 17 00:00:00 2001
From: Muhammad Sammar <muhammads@nvidia.com>
Date: Wed, 21 Oct 2020 08:29:49 +0300
Subject: net/mlx5: DR, Remove protocol-specific flex_parser_3 definitions

Remove MPLS specific fields from flex parser 3 layout.
Flex parser can be used for multiple protocols and should
not be hardcoded to a specific type.

Signed-off-by: Muhammad Sammar <muhammads@nvidia.com>
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/steering/dr_ste.h  |  7 ++++
 .../mellanox/mlx5/core/steering/dr_ste_v0.c        | 41 ++++++++++------------
 .../mellanox/mlx5/core/steering/mlx5_ifc_dr.h      |  5 +--
 3 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
index 06bcb0ee8f96..eb0384150675 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
@@ -86,6 +86,13 @@ enum dr_ste_action_modify_type_l4 {
 	DR_STE_ACTION_MDFY_TYPE_L4_UDP	= 0x2,
 };
 
+enum {
+	HDR_MPLS_OFFSET_LABEL	= 12,
+	HDR_MPLS_OFFSET_EXP	= 9,
+	HDR_MPLS_OFFSET_S_BOS	= 8,
+	HDR_MPLS_OFFSET_TTL	= 0,
+};
+
 u16 mlx5dr_ste_conv_bit_to_byte_mask(u8 *bit_mask);
 
 #define DR_STE_CTX_BUILDER(fname) \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
index c5f62d2a058f..d9ac57d94609 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
@@ -1248,32 +1248,29 @@ dr_ste_v0_build_tnl_mpls_tag(struct mlx5dr_match_param *value,
 			     u8 *tag)
 {
 	struct mlx5dr_match_misc2 *misc_2 = &value->misc2;
+	u32 mpls_hdr;
 
 	if (DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(misc_2)) {
-		DR_STE_SET_TAG(flex_parser_0, tag, parser_3_label,
-			       misc_2, outer_first_mpls_over_gre_label);
-
-		DR_STE_SET_TAG(flex_parser_0, tag, parser_3_exp,
-			       misc_2, outer_first_mpls_over_gre_exp);
-
-		DR_STE_SET_TAG(flex_parser_0, tag, parser_3_s_bos,
-			       misc_2, outer_first_mpls_over_gre_s_bos);
-
-		DR_STE_SET_TAG(flex_parser_0, tag, parser_3_ttl,
-			       misc_2, outer_first_mpls_over_gre_ttl);
+		mpls_hdr = misc_2->outer_first_mpls_over_gre_label << HDR_MPLS_OFFSET_LABEL;
+		misc_2->outer_first_mpls_over_gre_label = 0;
+		mpls_hdr |= misc_2->outer_first_mpls_over_gre_exp << HDR_MPLS_OFFSET_EXP;
+		misc_2->outer_first_mpls_over_gre_exp = 0;
+		mpls_hdr |= misc_2->outer_first_mpls_over_gre_s_bos << HDR_MPLS_OFFSET_S_BOS;
+		misc_2->outer_first_mpls_over_gre_s_bos = 0;
+		mpls_hdr |= misc_2->outer_first_mpls_over_gre_ttl << HDR_MPLS_OFFSET_TTL;
+		misc_2->outer_first_mpls_over_gre_ttl = 0;
 	} else {
-		DR_STE_SET_TAG(flex_parser_0, tag, parser_3_label,
-			       misc_2, outer_first_mpls_over_udp_label);
-
-		DR_STE_SET_TAG(flex_parser_0, tag, parser_3_exp,
-			       misc_2, outer_first_mpls_over_udp_exp);
-
-		DR_STE_SET_TAG(flex_parser_0, tag, parser_3_s_bos,
-			       misc_2, outer_first_mpls_over_udp_s_bos);
-
-		DR_STE_SET_TAG(flex_parser_0, tag, parser_3_ttl,
-			       misc_2, outer_first_mpls_over_udp_ttl);
+		mpls_hdr = misc_2->outer_first_mpls_over_udp_label << HDR_MPLS_OFFSET_LABEL;
+		misc_2->outer_first_mpls_over_udp_label = 0;
+		mpls_hdr |= misc_2->outer_first_mpls_over_udp_exp << HDR_MPLS_OFFSET_EXP;
+		misc_2->outer_first_mpls_over_udp_exp = 0;
+		mpls_hdr |= misc_2->outer_first_mpls_over_udp_s_bos << HDR_MPLS_OFFSET_S_BOS;
+		misc_2->outer_first_mpls_over_udp_s_bos = 0;
+		mpls_hdr |= misc_2->outer_first_mpls_over_udp_ttl << HDR_MPLS_OFFSET_TTL;
+		misc_2->outer_first_mpls_over_udp_ttl = 0;
 	}
+
+	MLX5_SET(ste_flex_parser_0, tag, flex_parser_3, mpls_hdr);
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h
index 83df6df6b459..601bbb3a107c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h
@@ -434,10 +434,7 @@ struct mlx5_ifc_ste_gre_bits {
 };
 
 struct mlx5_ifc_ste_flex_parser_0_bits {
-	u8         parser_3_label[0x14];
-	u8         parser_3_exp[0x3];
-	u8         parser_3_s_bos[0x1];
-	u8         parser_3_ttl[0x8];
+	u8         flex_parser_3[0x20];
 
 	u8         flex_parser_2[0x20];
 
-- 
cgit v1.2.3


From 160e9cb37a8496edfe4ce74abe33ade103f59db2 Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Tue, 24 Nov 2020 19:48:09 +0200
Subject: net/mlx5: DR, Add support for dynamic flex parser

Flex parser is a HW parser that can support protocols that are not
natively supported by the HCA, such as Geneve (TLV options) and GTP-U.
There are 8 such parsers, and each of them can be assigned to parse a
specific set of protocols.
This patch adds misc4 match params which allows using a correct flex parser
that was programmed to the required protocol.

Signed-off-by: Muhammad Sammar <muhammads@nvidia.com>
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../mellanox/mlx5/core/steering/dr_matcher.c       | 50 +++++++++++++++++
 .../ethernet/mellanox/mlx5/core/steering/dr_rule.c | 11 ++++
 .../ethernet/mellanox/mlx5/core/steering/dr_ste.c  | 54 ++++++++++++++++++
 .../ethernet/mellanox/mlx5/core/steering/dr_ste.h  |  9 +++
 .../mellanox/mlx5/core/steering/dr_ste_v0.c        | 65 ++++++++++++++++++++++
 .../mellanox/mlx5/core/steering/dr_ste_v1.c        | 65 ++++++++++++++++++++++
 .../mellanox/mlx5/core/steering/dr_types.h         | 26 ++++++++-
 7 files changed, 279 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
index 15673cd10039..3a7576125404 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
@@ -199,6 +199,42 @@ static bool dr_mask_is_gvmi_or_qpn_set(struct mlx5dr_match_misc *misc)
 	return (misc->source_sqn || misc->source_port);
 }
 
+static bool dr_mask_is_flex_parser_id_0_3_set(u32 flex_parser_id,
+					      u32 flex_parser_value)
+{
+	if (flex_parser_id)
+		return flex_parser_id <= DR_STE_MAX_FLEX_0_ID;
+
+	/* Using flex_parser 0 means that id is zero, thus value must be set. */
+	return flex_parser_value;
+}
+
+static bool dr_mask_is_flex_parser_0_3_set(struct mlx5dr_match_misc4 *misc4)
+{
+	return (dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_0,
+						  misc4->prog_sample_field_value_0) ||
+		dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_1,
+						  misc4->prog_sample_field_value_1) ||
+		dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_2,
+						  misc4->prog_sample_field_value_2) ||
+		dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_3,
+						  misc4->prog_sample_field_value_3));
+}
+
+static bool dr_mask_is_flex_parser_id_4_7_set(u32 flex_parser_id)
+{
+	return flex_parser_id > DR_STE_MAX_FLEX_0_ID &&
+	       flex_parser_id <= DR_STE_MAX_FLEX_1_ID;
+}
+
+static bool dr_mask_is_flex_parser_4_7_set(struct mlx5dr_match_misc4 *misc4)
+{
+	return (dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_0) ||
+		dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_1) ||
+		dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_2) ||
+		dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_3));
+}
+
 int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher,
 				   struct mlx5dr_matcher_rx_tx *nic_matcher,
 				   enum mlx5dr_ipv outer_ipv,
@@ -251,6 +287,9 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
 	if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC3)
 		mask.misc3 = matcher->mask.misc3;
 
+	if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC4)
+		mask.misc4 = matcher->mask.misc4;
+
 	ret = mlx5dr_ste_build_pre_check(dmn, matcher->match_criteria,
 					 &matcher->mask, NULL);
 	if (ret)
@@ -408,6 +447,17 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
 			mlx5dr_ste_build_tnl_mpls(ste_ctx, &sb[idx++],
 						  &mask, inner, rx);
 	}
+
+	if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC4) {
+		if (dr_mask_is_flex_parser_0_3_set(&mask.misc4))
+			mlx5dr_ste_build_flex_parser_0(ste_ctx, &sb[idx++],
+						       &mask, false, rx);
+
+		if (dr_mask_is_flex_parser_4_7_set(&mask.misc4))
+			mlx5dr_ste_build_flex_parser_1(ste_ctx, &sb[idx++],
+						       &mask, false, rx);
+	}
+
 	/* Empty matcher, takes all */
 	if (matcher->match_criteria == DR_MATCHER_CRITERIA_EMPTY)
 		mlx5dr_ste_build_empty_always_hit(&sb[idx++], rx);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
index b337d6626bff..43356fad53de 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
@@ -952,6 +952,17 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher,
 			return false;
 		}
 	}
+
+	if (match_criteria & DR_MATCHER_CRITERIA_MISC4) {
+		s_idx = offsetof(struct mlx5dr_match_param, misc4);
+		e_idx = min(s_idx + sizeof(param->misc4), value_size);
+
+		if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) {
+			mlx5dr_err(matcher->tbl->dmn,
+				   "Rule misc4 parameters contains a value not specified by mask\n");
+			return false;
+		}
+	}
 	return true;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
index f49abc7a4b9b..445481f01a46 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
@@ -854,6 +854,26 @@ static void dr_ste_copy_mask_misc3(char *mask, struct mlx5dr_match_misc3 *spec)
 	spec->icmpv6_code = MLX5_GET(fte_match_set_misc3, mask, icmpv6_code);
 }
 
+static void dr_ste_copy_mask_misc4(char *mask, struct mlx5dr_match_misc4 *spec)
+{
+	spec->prog_sample_field_id_0 =
+		MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_id_0);
+	spec->prog_sample_field_value_0 =
+		MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_value_0);
+	spec->prog_sample_field_id_1 =
+		MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_id_1);
+	spec->prog_sample_field_value_1 =
+		MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_value_1);
+	spec->prog_sample_field_id_2 =
+		MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_id_2);
+	spec->prog_sample_field_value_2 =
+		MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_value_2);
+	spec->prog_sample_field_id_3 =
+		MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_id_3);
+	spec->prog_sample_field_value_3 =
+		MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_value_3);
+}
+
 void mlx5dr_ste_copy_param(u8 match_criteria,
 			   struct mlx5dr_match_param *set_param,
 			   struct mlx5dr_match_parameters *mask)
@@ -925,6 +945,20 @@ void mlx5dr_ste_copy_param(u8 match_criteria,
 		}
 		dr_ste_copy_mask_misc3(buff, &set_param->misc3);
 	}
+
+	param_location += sizeof(struct mlx5dr_match_misc3);
+
+	if (match_criteria & DR_MATCHER_CRITERIA_MISC4) {
+		if (mask->match_sz < param_location +
+		    sizeof(struct mlx5dr_match_misc4)) {
+			memcpy(tail_param, data + param_location,
+			       mask->match_sz - param_location);
+			buff = tail_param;
+		} else {
+			buff = data + param_location;
+		}
+		dr_ste_copy_mask_misc4(buff, &set_param->misc4);
+	}
 }
 
 void mlx5dr_ste_build_eth_l2_src_dst(struct mlx5dr_ste_ctx *ste_ctx,
@@ -1148,6 +1182,26 @@ void mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_ctx *ste_ctx,
 	ste_ctx->build_src_gvmi_qpn_init(sb, mask);
 }
 
+void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx,
+				    struct mlx5dr_ste_build *sb,
+				    struct mlx5dr_match_param *mask,
+				    bool inner, bool rx)
+{
+	sb->rx = rx;
+	sb->inner = inner;
+	ste_ctx->build_flex_parser_0_init(sb, mask);
+}
+
+void mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx,
+				    struct mlx5dr_ste_build *sb,
+				    struct mlx5dr_match_param *mask,
+				    bool inner, bool rx)
+{
+	sb->rx = rx;
+	sb->inner = inner;
+	ste_ctx->build_flex_parser_1_init(sb, mask);
+}
+
 static struct mlx5dr_ste_ctx *mlx5dr_ste_ctx_arr[] = {
 	[MLX5_STEERING_FORMAT_CONNECTX_5] = &ste_ctx_v0,
 	[MLX5_STEERING_FORMAT_CONNECTX_6DX] = &ste_ctx_v1,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
index eb0384150675..5900f177d865 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
@@ -95,6 +95,13 @@ enum {
 
 u16 mlx5dr_ste_conv_bit_to_byte_mask(u8 *bit_mask);
 
+static inline u8 *
+dr_ste_calc_flex_parser_offset(u8 *tag, u8 parser_id)
+{
+	/* Calculate tag byte offset based on flex parser id */
+	return tag + 4 * (3 - (parser_id % 4));
+}
+
 #define DR_STE_CTX_BUILDER(fname) \
 	((*build_##fname##_init)(struct mlx5dr_ste_build *sb, \
 				 struct mlx5dr_match_param *mask))
@@ -121,6 +128,8 @@ struct mlx5dr_ste_ctx {
 	void DR_STE_CTX_BUILDER(register_0);
 	void DR_STE_CTX_BUILDER(register_1);
 	void DR_STE_CTX_BUILDER(src_gvmi_qpn);
+	void DR_STE_CTX_BUILDER(flex_parser_0);
+	void DR_STE_CTX_BUILDER(flex_parser_1);
 
 	/* Getters and Setters */
 	void (*ste_init)(u8 *hw_ste_p, u16 lu_type,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
index d9ac57d94609..db19d99366ba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
@@ -1592,6 +1592,69 @@ dr_ste_v0_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb,
 	sb->ste_build_tag_func = &dr_ste_v0_build_src_gvmi_qpn_tag;
 }
 
+static void dr_ste_v0_set_flex_parser(u32 *misc4_field_id,
+				      u32 *misc4_field_value,
+				      bool *parser_is_used,
+				      u8 *tag)
+{
+	u32 id = *misc4_field_id;
+	u8 *parser_ptr;
+
+	if (parser_is_used[id])
+		return;
+
+	parser_is_used[id] = true;
+	parser_ptr = dr_ste_calc_flex_parser_offset(tag, id);
+
+	*(__be32 *)parser_ptr = cpu_to_be32(*misc4_field_value);
+	*misc4_field_id = 0;
+	*misc4_field_value = 0;
+}
+
+static int dr_ste_v0_build_flex_parser_tag(struct mlx5dr_match_param *value,
+					   struct mlx5dr_ste_build *sb,
+					   u8 *tag)
+{
+	struct mlx5dr_match_misc4 *misc_4_mask = &value->misc4;
+	bool parser_is_used[DR_NUM_OF_FLEX_PARSERS] = {};
+
+	dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_0,
+				  &misc_4_mask->prog_sample_field_value_0,
+				  parser_is_used, tag);
+
+	dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_1,
+				  &misc_4_mask->prog_sample_field_value_1,
+				  parser_is_used, tag);
+
+	dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_2,
+				  &misc_4_mask->prog_sample_field_value_2,
+				  parser_is_used, tag);
+
+	dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_3,
+				  &misc_4_mask->prog_sample_field_value_3,
+				  parser_is_used, tag);
+
+	return 0;
+}
+
+static void dr_ste_v0_build_flex_parser_0_init(struct mlx5dr_ste_build *sb,
+					       struct mlx5dr_match_param *mask)
+{
+	sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_0;
+	dr_ste_v0_build_flex_parser_tag(mask, sb, sb->bit_mask);
+	sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tag;
+}
+
+static void dr_ste_v0_build_flex_parser_1_init(struct mlx5dr_ste_build *sb,
+					       struct mlx5dr_match_param *mask)
+{
+	sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_1;
+	dr_ste_v0_build_flex_parser_tag(mask, sb, sb->bit_mask);
+	sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tag;
+}
+
 struct mlx5dr_ste_ctx ste_ctx_v0 = {
 	/* Builders */
 	.build_eth_l2_src_dst_init	= &dr_ste_v0_build_eth_l2_src_dst_init,
@@ -1614,6 +1677,8 @@ struct mlx5dr_ste_ctx ste_ctx_v0 = {
 	.build_register_0_init		= &dr_ste_v0_build_register_0_init,
 	.build_register_1_init		= &dr_ste_v0_build_register_1_init,
 	.build_src_gvmi_qpn_init	= &dr_ste_v0_build_src_gvmi_qpn_init,
+	.build_flex_parser_0_init	= &dr_ste_v0_build_flex_parser_0_init,
+	.build_flex_parser_1_init	= &dr_ste_v0_build_flex_parser_1_init,
 
 	/* Getters and Setters */
 	.ste_init			= &dr_ste_v0_init,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
index 616ebc38381a..1c8c08bc2d38 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
@@ -1571,6 +1571,69 @@ static void dr_ste_v1_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb,
 	sb->ste_build_tag_func = &dr_ste_v1_build_src_gvmi_qpn_tag;
 }
 
+static void dr_ste_v1_set_flex_parser(u32 *misc4_field_id,
+				      u32 *misc4_field_value,
+				      bool *parser_is_used,
+				      u8 *tag)
+{
+	u32 id = *misc4_field_id;
+	u8 *parser_ptr;
+
+	if (parser_is_used[id])
+		return;
+
+	parser_is_used[id] = true;
+	parser_ptr = dr_ste_calc_flex_parser_offset(tag, id);
+
+	*(__be32 *)parser_ptr = cpu_to_be32(*misc4_field_value);
+	*misc4_field_id = 0;
+	*misc4_field_value = 0;
+}
+
+static int dr_ste_v1_build_felx_parser_tag(struct mlx5dr_match_param *value,
+					   struct mlx5dr_ste_build *sb,
+					   u8 *tag)
+{
+	struct mlx5dr_match_misc4 *misc_4_mask = &value->misc4;
+	bool parser_is_used[DR_NUM_OF_FLEX_PARSERS] = {};
+
+	dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_0,
+				  &misc_4_mask->prog_sample_field_value_0,
+				  parser_is_used, tag);
+
+	dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_1,
+				  &misc_4_mask->prog_sample_field_value_1,
+				  parser_is_used, tag);
+
+	dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_2,
+				  &misc_4_mask->prog_sample_field_value_2,
+				  parser_is_used, tag);
+
+	dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_3,
+				  &misc_4_mask->prog_sample_field_value_3,
+				  parser_is_used, tag);
+
+	return 0;
+}
+
+static void dr_ste_v1_build_flex_parser_0_init(struct mlx5dr_ste_build *sb,
+					       struct mlx5dr_match_param *mask)
+{
+	sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_0;
+	dr_ste_v1_build_felx_parser_tag(mask, sb, sb->bit_mask);
+	sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_v1_build_felx_parser_tag;
+}
+
+static void dr_ste_v1_build_flex_parser_1_init(struct mlx5dr_ste_build *sb,
+					       struct mlx5dr_match_param *mask)
+{
+	sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_1;
+	dr_ste_v1_build_felx_parser_tag(mask, sb, sb->bit_mask);
+	sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_v1_build_felx_parser_tag;
+}
+
 struct mlx5dr_ste_ctx ste_ctx_v1 = {
 	/* Builders */
 	.build_eth_l2_src_dst_init	= &dr_ste_v1_build_eth_l2_src_dst_init,
@@ -1593,6 +1656,8 @@ struct mlx5dr_ste_ctx ste_ctx_v1 = {
 	.build_register_0_init		= &dr_ste_v1_build_register_0_init,
 	.build_register_1_init		= &dr_ste_v1_build_register_1_init,
 	.build_src_gvmi_qpn_init	= &dr_ste_v1_build_src_gvmi_qpn_init,
+	.build_flex_parser_0_init	= &dr_ste_v1_build_flex_parser_0_init,
+	.build_flex_parser_1_init	= &dr_ste_v1_build_flex_parser_1_init,
 	/* Getters and Setters */
 	.ste_init			= &dr_ste_v1_init,
 	.set_next_lu_type		= &dr_ste_v1_set_next_lu_type,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 462673947f3c..3ccdb806bf68 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -18,6 +18,9 @@
 #define DR_STE_SVLAN 0x1
 #define DR_STE_CVLAN 0x2
 #define DR_SZ_MATCH_PARAM (MLX5_ST_SZ_DW_MATCH_PARAM * 4)
+#define DR_NUM_OF_FLEX_PARSERS 8
+#define DR_STE_MAX_FLEX_0_ID 3
+#define DR_STE_MAX_FLEX_1_ID 7
 
 #define mlx5dr_err(dmn, arg...) mlx5_core_err((dmn)->mdev, ##arg)
 #define mlx5dr_info(dmn, arg...) mlx5_core_info((dmn)->mdev, ##arg)
@@ -87,7 +90,8 @@ enum mlx5dr_matcher_criteria {
 	DR_MATCHER_CRITERIA_INNER = 1 << 2,
 	DR_MATCHER_CRITERIA_MISC2 = 1 << 3,
 	DR_MATCHER_CRITERIA_MISC3 = 1 << 4,
-	DR_MATCHER_CRITERIA_MAX = 1 << 5,
+	DR_MATCHER_CRITERIA_MISC4 = 1 << 5,
+	DR_MATCHER_CRITERIA_MAX = 1 << 6,
 };
 
 enum mlx5dr_action_type {
@@ -419,6 +423,14 @@ void mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_ctx *ste_ctx,
 				   struct mlx5dr_match_param *mask,
 				   struct mlx5dr_domain *dmn,
 				   bool inner, bool rx);
+void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx,
+				    struct mlx5dr_ste_build *sb,
+				    struct mlx5dr_match_param *mask,
+				    bool inner, bool rx);
+void mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx,
+				    struct mlx5dr_ste_build *sb,
+				    struct mlx5dr_match_param *mask,
+				    bool inner, bool rx);
 void mlx5dr_ste_build_empty_always_hit(struct mlx5dr_ste_build *sb, bool rx);
 
 /* Actions utils */
@@ -649,12 +661,24 @@ struct mlx5dr_match_misc3 {
 	u8 reserved_auto3[0x1c];
 };
 
+struct mlx5dr_match_misc4 {
+	u32 prog_sample_field_value_0;
+	u32 prog_sample_field_id_0;
+	u32 prog_sample_field_value_1;
+	u32 prog_sample_field_id_1;
+	u32 prog_sample_field_value_2;
+	u32 prog_sample_field_id_2;
+	u32 prog_sample_field_value_3;
+	u32 prog_sample_field_id_3;
+};
+
 struct mlx5dr_match_param {
 	struct mlx5dr_match_spec outer;
 	struct mlx5dr_match_misc misc;
 	struct mlx5dr_match_spec inner;
 	struct mlx5dr_match_misc2 misc2;
 	struct mlx5dr_match_misc3 misc3;
+	struct mlx5dr_match_misc4 misc4;
 };
 
 #define DR_MASK_IS_ICMPV4_SET(_misc3) ((_misc3)->icmpv4_type || \
-- 
cgit v1.2.3


From 4923938d2fb589e6684e484c2e6031fae7048b02 Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Sun, 7 Feb 2021 04:19:27 +0200
Subject: net/mlx5: DR, Set STEv0 ICMP flex parser dynamically

Set the flex parser ID dynamicly for ICMP instead of relying
on hardcoded values.

Signed-off-by: Muhammad Sammar <muhammads@nvidia.com>
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../mellanox/mlx5/core/steering/dr_matcher.c       | 12 ++---
 .../ethernet/mellanox/mlx5/core/steering/dr_ste.c  | 12 ++---
 .../ethernet/mellanox/mlx5/core/steering/dr_ste.h  |  2 +-
 .../mellanox/mlx5/core/steering/dr_ste_v0.c        | 53 ++++++++++------------
 .../mellanox/mlx5/core/steering/dr_ste_v1.c        |  6 +--
 .../mellanox/mlx5/core/steering/dr_types.h         | 10 ++--
 6 files changed, 43 insertions(+), 52 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
index 3a7576125404..f83fea98cc46 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
@@ -376,13 +376,11 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
 			mlx5dr_ste_build_tnl_mpls(ste_ctx, &sb[idx++],
 						  &mask, inner, rx);
 
-		if (dr_mask_is_icmp(&mask, dmn)) {
-			ret = mlx5dr_ste_build_icmp(ste_ctx, &sb[idx++],
-						    &mask, &dmn->info.caps,
-						    inner, rx);
-			if (ret)
-				return ret;
-		}
+		if (dr_mask_is_icmp(&mask, dmn))
+			mlx5dr_ste_build_icmp(ste_ctx, &sb[idx++],
+					      &mask, &dmn->info.caps,
+					      inner, rx);
+
 		if (dr_mask_is_tnl_gre_set(&mask.misc))
 			mlx5dr_ste_build_tnl_gre(ste_ctx, &sb[idx++],
 						 &mask, inner, rx);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
index 445481f01a46..7ae718bb41eb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
@@ -1095,16 +1095,16 @@ void mlx5dr_ste_build_tnl_mpls(struct mlx5dr_ste_ctx *ste_ctx,
 	ste_ctx->build_tnl_mpls_init(sb, mask);
 }
 
-int mlx5dr_ste_build_icmp(struct mlx5dr_ste_ctx *ste_ctx,
-			  struct mlx5dr_ste_build *sb,
-			  struct mlx5dr_match_param *mask,
-			  struct mlx5dr_cmd_caps *caps,
-			  bool inner, bool rx)
+void mlx5dr_ste_build_icmp(struct mlx5dr_ste_ctx *ste_ctx,
+			   struct mlx5dr_ste_build *sb,
+			   struct mlx5dr_match_param *mask,
+			   struct mlx5dr_cmd_caps *caps,
+			   bool inner, bool rx)
 {
 	sb->rx = rx;
 	sb->inner = inner;
 	sb->caps = caps;
-	return ste_ctx->build_icmp_init(sb, mask);
+	ste_ctx->build_icmp_init(sb, mask);
 }
 
 void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_ctx *ste_ctx,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
index 5900f177d865..a00dab3b6944 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
@@ -120,7 +120,7 @@ struct mlx5dr_ste_ctx {
 	void DR_STE_CTX_BUILDER(mpls);
 	void DR_STE_CTX_BUILDER(tnl_gre);
 	void DR_STE_CTX_BUILDER(tnl_mpls);
-	int  DR_STE_CTX_BUILDER(icmp);
+	void DR_STE_CTX_BUILDER(icmp);
 	void DR_STE_CTX_BUILDER(general_purpose);
 	void DR_STE_CTX_BUILDER(eth_l4_misc);
 	void DR_STE_CTX_BUILDER(tnl_vxlan_gpe);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
index db19d99366ba..62421c33a9ca 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
@@ -1297,9 +1297,11 @@ dr_ste_v0_build_icmp_tag(struct mlx5dr_match_param *value,
 	u32 *icmp_header_data;
 	int dw0_location;
 	int dw1_location;
+	u8 *parser_ptr;
 	u8 *icmp_type;
 	u8 *icmp_code;
 	bool is_ipv4;
+	u32 icmp_hdr;
 
 	is_ipv4 = DR_MASK_IS_ICMPV4_SET(misc_3);
 	if (is_ipv4) {
@@ -1316,47 +1318,40 @@ dr_ste_v0_build_icmp_tag(struct mlx5dr_match_param *value,
 		dw1_location		= sb->caps->flex_parser_id_icmpv6_dw1;
 	}
 
-	switch (dw0_location) {
-	case 4:
-		MLX5_SET(ste_flex_parser_1, tag, flex_parser_4,
-			 (*icmp_type << ICMP_TYPE_OFFSET_FIRST_DW) |
-			 (*icmp_code << ICMP_TYPE_OFFSET_FIRST_DW));
-
-		*icmp_type = 0;
-		*icmp_code = 0;
-		break;
-	default:
-		return -EINVAL;
-	}
+	parser_ptr = dr_ste_calc_flex_parser_offset(tag, dw0_location);
+	icmp_hdr = (*icmp_type << ICMP_TYPE_OFFSET_FIRST_DW) |
+		   (*icmp_code << ICMP_CODE_OFFSET_FIRST_DW);
+	*(__be32 *)parser_ptr = cpu_to_be32(icmp_hdr);
+	*icmp_code = 0;
+	*icmp_type = 0;
 
-	switch (dw1_location) {
-	case 5:
-		MLX5_SET(ste_flex_parser_1, tag, flex_parser_5,
-			 *icmp_header_data);
-		*icmp_header_data = 0;
-		break;
-	default:
-		return -EINVAL;
-	}
+	parser_ptr = dr_ste_calc_flex_parser_offset(tag, dw1_location);
+	*(__be32 *)parser_ptr = cpu_to_be32(*icmp_header_data);
+	*icmp_header_data = 0;
 
 	return 0;
 }
 
-static int
+static void
 dr_ste_v0_build_icmp_init(struct mlx5dr_ste_build *sb,
 			  struct mlx5dr_match_param *mask)
 {
-	int ret;
+	u8 parser_id;
+	bool is_ipv4;
 
-	ret = dr_ste_v0_build_icmp_tag(mask, sb, sb->bit_mask);
-	if (ret)
-		return ret;
+	dr_ste_v0_build_icmp_tag(mask, sb, sb->bit_mask);
 
-	sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_1;
+	/* STEs with lookup type FLEX_PARSER_{0/1} includes
+	 * flex parsers_{0-3}/{4-7} respectively.
+	 */
+	is_ipv4 = DR_MASK_IS_ICMPV4_SET(&mask->misc3);
+	parser_id = is_ipv4 ? sb->caps->flex_parser_id_icmp_dw0 :
+		    sb->caps->flex_parser_id_icmpv6_dw0;
+	sb->lu_type = parser_id > DR_STE_MAX_FLEX_0_ID ?
+		      DR_STE_V0_LU_TYPE_FLEX_PARSER_1 :
+		      DR_STE_V0_LU_TYPE_FLEX_PARSER_0;
 	sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
 	sb->ste_build_tag_func = &dr_ste_v0_build_icmp_tag;
-
-	return 0;
 }
 
 static int
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
index 1c8c08bc2d38..f77b1e9103ce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
@@ -1337,16 +1337,14 @@ static int dr_ste_v1_build_icmp_tag(struct mlx5dr_match_param *value,
 	return 0;
 }
 
-static int dr_ste_v1_build_icmp_init(struct mlx5dr_ste_build *sb,
-				     struct mlx5dr_match_param *mask)
+static void dr_ste_v1_build_icmp_init(struct mlx5dr_ste_build *sb,
+				      struct mlx5dr_match_param *mask)
 {
 	dr_ste_v1_build_icmp_tag(mask, sb, sb->bit_mask);
 
 	sb->lu_type = DR_STE_V1_LU_TYPE_ETHL4_MISC_O;
 	sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
 	sb->ste_build_tag_func = &dr_ste_v1_build_icmp_tag;
-
-	return 0;
 }
 
 static int dr_ste_v1_build_general_purpose_tag(struct mlx5dr_match_param *value,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 3ccdb806bf68..ee2ea215c4b9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -393,11 +393,11 @@ void mlx5dr_ste_build_tnl_mpls(struct mlx5dr_ste_ctx *ste_ctx,
 			       struct mlx5dr_ste_build *sb,
 			       struct mlx5dr_match_param *mask,
 			       bool inner, bool rx);
-int mlx5dr_ste_build_icmp(struct mlx5dr_ste_ctx *ste_ctx,
-			  struct mlx5dr_ste_build *sb,
-			  struct mlx5dr_match_param *mask,
-			  struct mlx5dr_cmd_caps *caps,
-			  bool inner, bool rx);
+void mlx5dr_ste_build_icmp(struct mlx5dr_ste_ctx *ste_ctx,
+			   struct mlx5dr_ste_build *sb,
+			   struct mlx5dr_match_param *mask,
+			   struct mlx5dr_cmd_caps *caps,
+			   bool inner, bool rx);
 void mlx5dr_ste_build_tnl_vxlan_gpe(struct mlx5dr_ste_ctx *ste_ctx,
 				    struct mlx5dr_ste_build *sb,
 				    struct mlx5dr_match_param *mask,
-- 
cgit v1.2.3


From 3442e0335e70f348728c17bca924ec507ad6358a Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Sun, 7 Feb 2021 04:27:48 +0200
Subject: net/mlx5: DR, Add support for matching on geneve TLV option

Enable matching on tunnel geneve TLV option using the flex parser.

Signed-off-by: Muhammad Sammar <muhammads@nvidia.com>
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/steering/dr_cmd.c  |  4 +++
 .../mellanox/mlx5/core/steering/dr_matcher.c       | 13 +++++++--
 .../ethernet/mellanox/mlx5/core/steering/dr_ste.c  | 14 +++++++++
 .../ethernet/mellanox/mlx5/core/steering/dr_ste.h  |  1 +
 .../mellanox/mlx5/core/steering/dr_ste_v0.c        | 34 ++++++++++++++++++++++
 .../mellanox/mlx5/core/steering/dr_ste_v1.c        | 34 ++++++++++++++++++++++
 .../mellanox/mlx5/core/steering/dr_types.h         | 11 +++++--
 7 files changed, 107 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
index 461473d31e2e..0561df8616c7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
@@ -106,6 +106,10 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
 			MLX5_CAP_GEN(mdev, flex_parser_id_icmpv6_dw1);
 	}
 
+	if (caps->flex_protocols & MLX5_FLEX_PARSER_GENEVE_TLV_OPTION_0_ENABLED)
+		caps->flex_parser_id_geneve_tlv_option_0 =
+			MLX5_CAP_GEN(mdev, flex_parser_id_geneve_tlv_option_0);
+
 	caps->nic_rx_drop_address =
 		MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_rx_action_drop_icm_address);
 	caps->nic_tx_drop_address =
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
index f83fea98cc46..0aa4a994fe77 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
@@ -133,6 +133,11 @@ static bool dr_mask_is_tnl_geneve_set(struct mlx5dr_match_misc *misc)
 	       misc->geneve_opt_len;
 }
 
+static bool dr_mask_is_tnl_geneve_tlv_opt(struct mlx5dr_match_misc3 *misc3)
+{
+	return misc3->geneve_tlv_option_0_data;
+}
+
 static bool
 dr_matcher_supp_tnl_geneve(struct mlx5dr_cmd_caps *caps)
 {
@@ -360,10 +365,14 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
 		if (dr_mask_is_tnl_vxlan_gpe(&mask, dmn))
 			mlx5dr_ste_build_tnl_vxlan_gpe(ste_ctx, &sb[idx++],
 						       &mask, inner, rx);
-		else if (dr_mask_is_tnl_geneve(&mask, dmn))
+		else if (dr_mask_is_tnl_geneve(&mask, dmn)) {
 			mlx5dr_ste_build_tnl_geneve(ste_ctx, &sb[idx++],
 						    &mask, inner, rx);
-
+			if (dr_mask_is_tnl_geneve_tlv_opt(&mask.misc3))
+				mlx5dr_ste_build_tnl_geneve_tlv_opt(ste_ctx, &sb[idx++],
+								    &mask, &dmn->info.caps,
+								    inner, rx);
+		}
 		if (DR_MASK_IS_ETH_L4_MISC_SET(mask.misc3, outer))
 			mlx5dr_ste_build_eth_l4_misc(ste_ctx, &sb[idx++],
 						     &mask, inner, rx);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
index 7ae718bb41eb..8d98341802e3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
@@ -852,6 +852,8 @@ static void dr_ste_copy_mask_misc3(char *mask, struct mlx5dr_match_misc3 *spec)
 	spec->icmpv4_code = MLX5_GET(fte_match_set_misc3, mask, icmp_code);
 	spec->icmpv6_type = MLX5_GET(fte_match_set_misc3, mask, icmpv6_type);
 	spec->icmpv6_code = MLX5_GET(fte_match_set_misc3, mask, icmpv6_code);
+	spec->geneve_tlv_option_0_data =
+		MLX5_GET(fte_match_set_misc3, mask, geneve_tlv_option_0_data);
 }
 
 static void dr_ste_copy_mask_misc4(char *mask, struct mlx5dr_match_misc4 *spec)
@@ -1147,6 +1149,18 @@ void mlx5dr_ste_build_tnl_geneve(struct mlx5dr_ste_ctx *ste_ctx,
 	ste_ctx->build_tnl_geneve_init(sb, mask);
 }
 
+void mlx5dr_ste_build_tnl_geneve_tlv_opt(struct mlx5dr_ste_ctx *ste_ctx,
+					 struct mlx5dr_ste_build *sb,
+					 struct mlx5dr_match_param *mask,
+					 struct mlx5dr_cmd_caps *caps,
+					 bool inner, bool rx)
+{
+	sb->rx = rx;
+	sb->caps = caps;
+	sb->inner = inner;
+	ste_ctx->build_tnl_geneve_tlv_opt_init(sb, mask);
+}
+
 void mlx5dr_ste_build_register_0(struct mlx5dr_ste_ctx *ste_ctx,
 				 struct mlx5dr_ste_build *sb,
 				 struct mlx5dr_match_param *mask,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
index a00dab3b6944..5fa268a6c7df 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
@@ -125,6 +125,7 @@ struct mlx5dr_ste_ctx {
 	void DR_STE_CTX_BUILDER(eth_l4_misc);
 	void DR_STE_CTX_BUILDER(tnl_vxlan_gpe);
 	void DR_STE_CTX_BUILDER(tnl_geneve);
+	void DR_STE_CTX_BUILDER(tnl_geneve_tlv_opt);
 	void DR_STE_CTX_BUILDER(register_0);
 	void DR_STE_CTX_BUILDER(register_1);
 	void DR_STE_CTX_BUILDER(src_gvmi_qpn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
index 62421c33a9ca..cf923a7e9b3b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
@@ -1650,6 +1650,39 @@ static void dr_ste_v0_build_flex_parser_1_init(struct mlx5dr_ste_build *sb,
 	sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tag;
 }
 
+static int
+dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_tag(struct mlx5dr_match_param *value,
+						   struct mlx5dr_ste_build *sb,
+						   u8 *tag)
+{
+	struct mlx5dr_match_misc3 *misc3 = &value->misc3;
+	u8 parser_id = sb->caps->flex_parser_id_geneve_tlv_option_0;
+	u8 *parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id);
+
+	MLX5_SET(ste_flex_parser_0, parser_ptr, flex_parser_3,
+		 misc3->geneve_tlv_option_0_data);
+	misc3->geneve_tlv_option_0_data = 0;
+
+	return 0;
+}
+
+static void
+dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb,
+						    struct mlx5dr_match_param *mask)
+{
+	dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_tag(mask, sb, sb->bit_mask);
+
+	/* STEs with lookup type FLEX_PARSER_{0/1} includes
+	 * flex parsers_{0-3}/{4-7} respectively.
+	 */
+	sb->lu_type = sb->caps->flex_parser_id_geneve_tlv_option_0 > 3 ?
+		DR_STE_V0_LU_TYPE_FLEX_PARSER_1 :
+		DR_STE_V0_LU_TYPE_FLEX_PARSER_0;
+
+	sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_tag;
+}
+
 struct mlx5dr_ste_ctx ste_ctx_v0 = {
 	/* Builders */
 	.build_eth_l2_src_dst_init	= &dr_ste_v0_build_eth_l2_src_dst_init,
@@ -1669,6 +1702,7 @@ struct mlx5dr_ste_ctx ste_ctx_v0 = {
 	.build_eth_l4_misc_init		= &dr_ste_v0_build_eth_l4_misc_init,
 	.build_tnl_vxlan_gpe_init	= &dr_ste_v0_build_flex_parser_tnl_vxlan_gpe_init,
 	.build_tnl_geneve_init		= &dr_ste_v0_build_flex_parser_tnl_geneve_init,
+	.build_tnl_geneve_tlv_opt_init	= &dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_init,
 	.build_register_0_init		= &dr_ste_v0_build_register_0_init,
 	.build_register_1_init		= &dr_ste_v0_build_register_1_init,
 	.build_src_gvmi_qpn_init	= &dr_ste_v0_build_src_gvmi_qpn_init,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
index f77b1e9103ce..f15a15da0acb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
@@ -1632,6 +1632,39 @@ static void dr_ste_v1_build_flex_parser_1_init(struct mlx5dr_ste_build *sb,
 	sb->ste_build_tag_func = &dr_ste_v1_build_felx_parser_tag;
 }
 
+static int
+dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag(struct mlx5dr_match_param *value,
+						   struct mlx5dr_ste_build *sb,
+						   u8 *tag)
+{
+	struct mlx5dr_match_misc3 *misc3 = &value->misc3;
+	u8 parser_id = sb->caps->flex_parser_id_geneve_tlv_option_0;
+	u8 *parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id);
+
+	MLX5_SET(ste_flex_parser_0, parser_ptr, flex_parser_3,
+		 misc3->geneve_tlv_option_0_data);
+	misc3->geneve_tlv_option_0_data = 0;
+
+	return 0;
+}
+
+static void
+dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb,
+						    struct mlx5dr_match_param *mask)
+{
+	dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag(mask, sb, sb->bit_mask);
+
+	/* STEs with lookup type FLEX_PARSER_{0/1} includes
+	 * flex parsers_{0-3}/{4-7} respectively.
+	 */
+	sb->lu_type = sb->caps->flex_parser_id_geneve_tlv_option_0 > 3 ?
+		      DR_STE_V1_LU_TYPE_FLEX_PARSER_1 :
+		      DR_STE_V1_LU_TYPE_FLEX_PARSER_0;
+
+	sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag;
+}
+
 struct mlx5dr_ste_ctx ste_ctx_v1 = {
 	/* Builders */
 	.build_eth_l2_src_dst_init	= &dr_ste_v1_build_eth_l2_src_dst_init,
@@ -1651,6 +1684,7 @@ struct mlx5dr_ste_ctx ste_ctx_v1 = {
 	.build_eth_l4_misc_init		= &dr_ste_v1_build_eth_l4_misc_init,
 	.build_tnl_vxlan_gpe_init	= &dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init,
 	.build_tnl_geneve_init		= &dr_ste_v1_build_flex_parser_tnl_geneve_init,
+	.build_tnl_geneve_tlv_opt_init	= &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init,
 	.build_register_0_init		= &dr_ste_v1_build_register_0_init,
 	.build_register_1_init		= &dr_ste_v1_build_register_1_init,
 	.build_src_gvmi_qpn_init	= &dr_ste_v1_build_src_gvmi_qpn_init,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index ee2ea215c4b9..3c07ec61c8be 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -12,7 +12,7 @@
 #include "mlx5_ifc_dr.h"
 #include "mlx5dr.h"
 
-#define DR_RULE_MAX_STES 17
+#define DR_RULE_MAX_STES 18
 #define DR_ACTION_MAX_STES 5
 #define WIRE_PORT 0xFFFF
 #define DR_STE_SVLAN 0x1
@@ -406,6 +406,11 @@ void mlx5dr_ste_build_tnl_geneve(struct mlx5dr_ste_ctx *ste_ctx,
 				 struct mlx5dr_ste_build *sb,
 				 struct mlx5dr_match_param *mask,
 				 bool inner, bool rx);
+void mlx5dr_ste_build_tnl_geneve_tlv_opt(struct mlx5dr_ste_ctx *ste_ctx,
+					 struct mlx5dr_ste_build *sb,
+					 struct mlx5dr_match_param *mask,
+					 struct mlx5dr_cmd_caps *caps,
+					 bool inner, bool rx);
 void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_ctx *ste_ctx,
 				      struct mlx5dr_ste_build *sb,
 				      struct mlx5dr_match_param *mask,
@@ -658,7 +663,8 @@ struct mlx5dr_match_misc3 {
 	u8 icmpv6_type;
 	u8 icmpv4_code;
 	u8 icmpv4_type;
-	u8 reserved_auto3[0x1c];
+	u32 geneve_tlv_option_0_data;
+	u8 reserved_auto3[0x18];
 };
 
 struct mlx5dr_match_misc4 {
@@ -716,6 +722,7 @@ struct mlx5dr_cmd_caps {
 	u8 flex_parser_id_icmp_dw1;
 	u8 flex_parser_id_icmpv6_dw0;
 	u8 flex_parser_id_icmpv6_dw1;
+	u8 flex_parser_id_geneve_tlv_option_0;
 	u8 max_ft_level;
 	u16 roce_min_src_udp;
 	u8 num_esw_ports;
-- 
cgit v1.2.3


From 35ba005d820b541d69c188fd415f0d41fe4919e6 Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Sun, 7 Feb 2021 04:37:56 +0200
Subject: net/mlx5: DR, Set flex parser for TNL_MPLS dynamically

Query the flex_parser id that's intended for TNL_MPLS
and use an appropriate flex parser for MPLS over UDP/GRE.

Signed-off-by: Muhammad Sammar <muhammads@nvidia.com>
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/steering/dr_cmd.c  |  8 ++
 .../mellanox/mlx5/core/steering/dr_matcher.c       | 65 ++++++++++++----
 .../ethernet/mellanox/mlx5/core/steering/dr_ste.c  | 24 ++++--
 .../ethernet/mellanox/mlx5/core/steering/dr_ste.h  |  2 +
 .../mellanox/mlx5/core/steering/dr_ste_v0.c        | 87 ++++++++++++++++++++++
 .../mellanox/mlx5/core/steering/dr_ste_v1.c        | 84 +++++++++++++++++++++
 .../mellanox/mlx5/core/steering/dr_types.h         | 12 +++
 7 files changed, 262 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
index 0561df8616c7..dfb1e955409e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
@@ -110,6 +110,14 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
 		caps->flex_parser_id_geneve_tlv_option_0 =
 			MLX5_CAP_GEN(mdev, flex_parser_id_geneve_tlv_option_0);
 
+	if (caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_GRE_ENABLED)
+		caps->flex_parser_id_mpls_over_gre =
+			MLX5_CAP_GEN(mdev, flex_parser_id_outer_first_mpls_over_gre);
+
+	if (caps->flex_protocols & mlx5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED)
+		caps->flex_parser_id_mpls_over_udp =
+			MLX5_CAP_GEN(mdev, flex_parser_id_outer_first_mpls_over_udp_label);
+
 	caps->nic_rx_drop_address =
 		MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_rx_action_drop_icm_address);
 	caps->nic_tx_drop_address =
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
index 0aa4a994fe77..531eb69eeea1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
@@ -92,15 +92,17 @@ static bool dr_mask_is_tnl_gre_set(struct mlx5dr_match_misc *misc)
 		misc->gre_k_present || misc->gre_s_present);
 }
 
-#define DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET(_misc2, gre_udp) ( \
-	(_misc2).outer_first_mpls_over_##gre_udp##_label || \
-	(_misc2).outer_first_mpls_over_##gre_udp##_exp || \
-	(_misc2).outer_first_mpls_over_##gre_udp##_s_bos || \
-	(_misc2).outer_first_mpls_over_##gre_udp##_ttl)
-
-#define DR_MASK_IS_TNL_MPLS_SET(_misc2) ( \
-	DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET((_misc2), gre) || \
-	DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET((_misc2), udp))
+#define DR_MASK_IS_OUTER_MPLS_OVER_GRE_SET(_misc) (\
+	(_misc)->outer_first_mpls_over_gre_label || \
+	(_misc)->outer_first_mpls_over_gre_exp || \
+	(_misc)->outer_first_mpls_over_gre_s_bos || \
+	(_misc)->outer_first_mpls_over_gre_ttl)
+
+#define DR_MASK_IS_OUTER_MPLS_OVER_UDP_SET(_misc) (\
+	(_misc)->outer_first_mpls_over_udp_label || \
+	(_misc)->outer_first_mpls_over_udp_exp || \
+	(_misc)->outer_first_mpls_over_udp_s_bos || \
+	(_misc)->outer_first_mpls_over_udp_ttl)
 
 static bool
 dr_mask_is_vxlan_gpe_set(struct mlx5dr_match_misc3 *misc3)
@@ -240,6 +242,29 @@ static bool dr_mask_is_flex_parser_4_7_set(struct mlx5dr_match_misc4 *misc4)
 		dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_3));
 }
 
+static int dr_matcher_supp_tnl_mpls_over_gre(struct mlx5dr_cmd_caps *caps)
+{
+	return caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_GRE_ENABLED;
+}
+
+static bool dr_mask_is_tnl_mpls_over_gre(struct mlx5dr_match_param *mask,
+					 struct mlx5dr_domain *dmn)
+{
+	return DR_MASK_IS_OUTER_MPLS_OVER_GRE_SET(&mask->misc2) &&
+	       dr_matcher_supp_tnl_mpls_over_gre(&dmn->info.caps);
+}
+
+static int dr_matcher_supp_tnl_mpls_over_udp(struct mlx5dr_cmd_caps *caps)
+{
+	return caps->flex_protocols & mlx5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED;
+}
+
+static bool dr_mask_is_tnl_mpls_over_udp(struct mlx5dr_match_param *mask,
+					 struct mlx5dr_domain *dmn)
+{
+	return DR_MASK_IS_OUTER_MPLS_OVER_UDP_SET(&mask->misc2) &&
+	       dr_matcher_supp_tnl_mpls_over_udp(&dmn->info.caps);
+}
 int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher,
 				   struct mlx5dr_matcher_rx_tx *nic_matcher,
 				   enum mlx5dr_ipv outer_ipv,
@@ -381,9 +406,14 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
 			mlx5dr_ste_build_mpls(ste_ctx, &sb[idx++],
 					      &mask, inner, rx);
 
-		if (DR_MASK_IS_TNL_MPLS_SET(mask.misc2))
-			mlx5dr_ste_build_tnl_mpls(ste_ctx, &sb[idx++],
-						  &mask, inner, rx);
+		if (dr_mask_is_tnl_mpls_over_gre(&mask, dmn))
+			mlx5dr_ste_build_tnl_mpls_over_gre(ste_ctx, &sb[idx++],
+							   &mask, &dmn->info.caps,
+							   inner, rx);
+		else if (dr_mask_is_tnl_mpls_over_udp(&mask, dmn))
+			mlx5dr_ste_build_tnl_mpls_over_udp(ste_ctx, &sb[idx++],
+							   &mask, &dmn->info.caps,
+							   inner, rx);
 
 		if (dr_mask_is_icmp(&mask, dmn))
 			mlx5dr_ste_build_icmp(ste_ctx, &sb[idx++],
@@ -450,9 +480,14 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
 			mlx5dr_ste_build_mpls(ste_ctx, &sb[idx++],
 					      &mask, inner, rx);
 
-		if (DR_MASK_IS_TNL_MPLS_SET(mask.misc2))
-			mlx5dr_ste_build_tnl_mpls(ste_ctx, &sb[idx++],
-						  &mask, inner, rx);
+		if (dr_mask_is_tnl_mpls_over_gre(&mask, dmn))
+			mlx5dr_ste_build_tnl_mpls_over_gre(ste_ctx, &sb[idx++],
+							   &mask, &dmn->info.caps,
+							   inner, rx);
+		else if (dr_mask_is_tnl_mpls_over_udp(&mask, dmn))
+			mlx5dr_ste_build_tnl_mpls_over_udp(ste_ctx, &sb[idx++],
+							   &mask, &dmn->info.caps,
+							   inner, rx);
 	}
 
 	if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC4) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
index 8d98341802e3..6ae839b1ec8a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
@@ -1087,14 +1087,28 @@ void mlx5dr_ste_build_tnl_gre(struct mlx5dr_ste_ctx *ste_ctx,
 	ste_ctx->build_tnl_gre_init(sb, mask);
 }
 
-void mlx5dr_ste_build_tnl_mpls(struct mlx5dr_ste_ctx *ste_ctx,
-			       struct mlx5dr_ste_build *sb,
-			       struct mlx5dr_match_param *mask,
-			       bool inner, bool rx)
+void mlx5dr_ste_build_tnl_mpls_over_gre(struct mlx5dr_ste_ctx *ste_ctx,
+					struct mlx5dr_ste_build *sb,
+					struct mlx5dr_match_param *mask,
+					struct mlx5dr_cmd_caps *caps,
+					bool inner, bool rx)
+{
+	sb->rx = rx;
+	sb->inner = inner;
+	sb->caps = caps;
+	return ste_ctx->build_tnl_mpls_over_gre_init(sb, mask);
+}
+
+void mlx5dr_ste_build_tnl_mpls_over_udp(struct mlx5dr_ste_ctx *ste_ctx,
+					struct mlx5dr_ste_build *sb,
+					struct mlx5dr_match_param *mask,
+					struct mlx5dr_cmd_caps *caps,
+					bool inner, bool rx)
 {
 	sb->rx = rx;
 	sb->inner = inner;
-	ste_ctx->build_tnl_mpls_init(sb, mask);
+	sb->caps = caps;
+	return ste_ctx->build_tnl_mpls_over_udp_init(sb, mask);
 }
 
 void mlx5dr_ste_build_icmp(struct mlx5dr_ste_ctx *ste_ctx,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
index 5fa268a6c7df..2d2dc680ad13 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
@@ -120,6 +120,8 @@ struct mlx5dr_ste_ctx {
 	void DR_STE_CTX_BUILDER(mpls);
 	void DR_STE_CTX_BUILDER(tnl_gre);
 	void DR_STE_CTX_BUILDER(tnl_mpls);
+	void DR_STE_CTX_BUILDER(tnl_mpls_over_gre);
+	void DR_STE_CTX_BUILDER(tnl_mpls_over_udp);
 	void DR_STE_CTX_BUILDER(icmp);
 	void DR_STE_CTX_BUILDER(general_purpose);
 	void DR_STE_CTX_BUILDER(eth_l4_misc);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
index cf923a7e9b3b..c62f64d48213 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
@@ -1285,6 +1285,91 @@ dr_ste_v0_build_tnl_mpls_init(struct mlx5dr_ste_build *sb,
 	sb->ste_build_tag_func = &dr_ste_v0_build_tnl_mpls_tag;
 }
 
+static int
+dr_ste_v0_build_tnl_mpls_over_udp_tag(struct mlx5dr_match_param *value,
+				      struct mlx5dr_ste_build *sb,
+				      u8 *tag)
+{
+	struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+	u8 *parser_ptr;
+	u8 parser_id;
+	u32 mpls_hdr;
+
+	mpls_hdr = misc2->outer_first_mpls_over_udp_label << HDR_MPLS_OFFSET_LABEL;
+	misc2->outer_first_mpls_over_udp_label = 0;
+	mpls_hdr |= misc2->outer_first_mpls_over_udp_exp << HDR_MPLS_OFFSET_EXP;
+	misc2->outer_first_mpls_over_udp_exp = 0;
+	mpls_hdr |= misc2->outer_first_mpls_over_udp_s_bos << HDR_MPLS_OFFSET_S_BOS;
+	misc2->outer_first_mpls_over_udp_s_bos = 0;
+	mpls_hdr |= misc2->outer_first_mpls_over_udp_ttl << HDR_MPLS_OFFSET_TTL;
+	misc2->outer_first_mpls_over_udp_ttl = 0;
+
+	parser_id = sb->caps->flex_parser_id_mpls_over_udp;
+	parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id);
+	*(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr);
+
+	return 0;
+}
+
+static void
+dr_ste_v0_build_tnl_mpls_over_udp_init(struct mlx5dr_ste_build *sb,
+				       struct mlx5dr_match_param *mask)
+{
+	dr_ste_v0_build_tnl_mpls_over_udp_tag(mask, sb, sb->bit_mask);
+	/* STEs with lookup type FLEX_PARSER_{0/1} includes
+	 * flex parsers_{0-3}/{4-7} respectively.
+	 */
+	sb->lu_type = sb->caps->flex_parser_id_mpls_over_udp > DR_STE_MAX_FLEX_0_ID ?
+		      DR_STE_V0_LU_TYPE_FLEX_PARSER_1 :
+		      DR_STE_V0_LU_TYPE_FLEX_PARSER_0;
+
+	sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_v0_build_tnl_mpls_over_udp_tag;
+}
+
+static int
+dr_ste_v0_build_tnl_mpls_over_gre_tag(struct mlx5dr_match_param *value,
+				      struct mlx5dr_ste_build *sb,
+				      u8 *tag)
+{
+	struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+	u8 *parser_ptr;
+	u8 parser_id;
+	u32 mpls_hdr;
+
+	mpls_hdr = misc2->outer_first_mpls_over_gre_label << HDR_MPLS_OFFSET_LABEL;
+	misc2->outer_first_mpls_over_gre_label = 0;
+	mpls_hdr |= misc2->outer_first_mpls_over_gre_exp << HDR_MPLS_OFFSET_EXP;
+	misc2->outer_first_mpls_over_gre_exp = 0;
+	mpls_hdr |= misc2->outer_first_mpls_over_gre_s_bos << HDR_MPLS_OFFSET_S_BOS;
+	misc2->outer_first_mpls_over_gre_s_bos = 0;
+	mpls_hdr |= misc2->outer_first_mpls_over_gre_ttl << HDR_MPLS_OFFSET_TTL;
+	misc2->outer_first_mpls_over_gre_ttl = 0;
+
+	parser_id = sb->caps->flex_parser_id_mpls_over_gre;
+	parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id);
+	*(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr);
+
+	return 0;
+}
+
+static void
+dr_ste_v0_build_tnl_mpls_over_gre_init(struct mlx5dr_ste_build *sb,
+				       struct mlx5dr_match_param *mask)
+{
+	dr_ste_v0_build_tnl_mpls_over_gre_tag(mask, sb, sb->bit_mask);
+
+	/* STEs with lookup type FLEX_PARSER_{0/1} includes
+	 * flex parsers_{0-3}/{4-7} respectively.
+	 */
+	sb->lu_type = sb->caps->flex_parser_id_mpls_over_gre > DR_STE_MAX_FLEX_0_ID ?
+		      DR_STE_V0_LU_TYPE_FLEX_PARSER_1 :
+		      DR_STE_V0_LU_TYPE_FLEX_PARSER_0;
+
+	sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_v0_build_tnl_mpls_over_gre_tag;
+}
+
 #define ICMP_TYPE_OFFSET_FIRST_DW	24
 #define ICMP_CODE_OFFSET_FIRST_DW	16
 
@@ -1697,6 +1782,8 @@ struct mlx5dr_ste_ctx ste_ctx_v0 = {
 	.build_mpls_init		= &dr_ste_v0_build_mpls_init,
 	.build_tnl_gre_init		= &dr_ste_v0_build_tnl_gre_init,
 	.build_tnl_mpls_init		= &dr_ste_v0_build_tnl_mpls_init,
+	.build_tnl_mpls_over_udp_init	= &dr_ste_v0_build_tnl_mpls_over_udp_init,
+	.build_tnl_mpls_over_gre_init	= &dr_ste_v0_build_tnl_mpls_over_gre_init,
 	.build_icmp_init		= &dr_ste_v0_build_icmp_init,
 	.build_general_purpose_init	= &dr_ste_v0_build_general_purpose_init,
 	.build_eth_l4_misc_init		= &dr_ste_v0_build_eth_l4_misc_init,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
index f15a15da0acb..ad062bfeef2b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
@@ -1306,6 +1306,88 @@ static void dr_ste_v1_build_tnl_mpls_init(struct mlx5dr_ste_build *sb,
 	sb->ste_build_tag_func = &dr_ste_v1_build_tnl_mpls_tag;
 }
 
+static int dr_ste_v1_build_tnl_mpls_over_udp_tag(struct mlx5dr_match_param *value,
+						 struct mlx5dr_ste_build *sb,
+						 u8 *tag)
+{
+	struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+	u8 *parser_ptr;
+	u8 parser_id;
+	u32 mpls_hdr;
+
+	mpls_hdr = misc2->outer_first_mpls_over_udp_label << HDR_MPLS_OFFSET_LABEL;
+	misc2->outer_first_mpls_over_udp_label = 0;
+	mpls_hdr |= misc2->outer_first_mpls_over_udp_exp << HDR_MPLS_OFFSET_EXP;
+	misc2->outer_first_mpls_over_udp_exp = 0;
+	mpls_hdr |= misc2->outer_first_mpls_over_udp_s_bos << HDR_MPLS_OFFSET_S_BOS;
+	misc2->outer_first_mpls_over_udp_s_bos = 0;
+	mpls_hdr |= misc2->outer_first_mpls_over_udp_ttl << HDR_MPLS_OFFSET_TTL;
+	misc2->outer_first_mpls_over_udp_ttl = 0;
+
+	parser_id = sb->caps->flex_parser_id_mpls_over_udp;
+	parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id);
+	*(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr);
+
+	return 0;
+}
+
+static void dr_ste_v1_build_tnl_mpls_over_udp_init(struct mlx5dr_ste_build *sb,
+						   struct mlx5dr_match_param *mask)
+{
+	dr_ste_v1_build_tnl_mpls_over_udp_tag(mask, sb, sb->bit_mask);
+
+	/* STEs with lookup type FLEX_PARSER_{0/1} includes
+	 * flex parsers_{0-3}/{4-7} respectively.
+	 */
+	sb->lu_type = sb->caps->flex_parser_id_mpls_over_udp > DR_STE_MAX_FLEX_0_ID ?
+		      DR_STE_V1_LU_TYPE_FLEX_PARSER_1 :
+		      DR_STE_V1_LU_TYPE_FLEX_PARSER_0;
+
+	sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_v1_build_tnl_mpls_over_udp_tag;
+}
+
+static int dr_ste_v1_build_tnl_mpls_over_gre_tag(struct mlx5dr_match_param *value,
+						 struct mlx5dr_ste_build *sb,
+						 u8 *tag)
+{
+	struct mlx5dr_match_misc2 *misc2 = &value->misc2;
+	u8 *parser_ptr;
+	u8 parser_id;
+	u32 mpls_hdr;
+
+	mpls_hdr = misc2->outer_first_mpls_over_gre_label << HDR_MPLS_OFFSET_LABEL;
+	misc2->outer_first_mpls_over_gre_label = 0;
+	mpls_hdr |= misc2->outer_first_mpls_over_gre_exp << HDR_MPLS_OFFSET_EXP;
+	misc2->outer_first_mpls_over_gre_exp = 0;
+	mpls_hdr |= misc2->outer_first_mpls_over_gre_s_bos << HDR_MPLS_OFFSET_S_BOS;
+	misc2->outer_first_mpls_over_gre_s_bos = 0;
+	mpls_hdr |= misc2->outer_first_mpls_over_gre_ttl << HDR_MPLS_OFFSET_TTL;
+	misc2->outer_first_mpls_over_gre_ttl = 0;
+
+	parser_id = sb->caps->flex_parser_id_mpls_over_gre;
+	parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id);
+	*(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr);
+
+	return 0;
+}
+
+static void dr_ste_v1_build_tnl_mpls_over_gre_init(struct mlx5dr_ste_build *sb,
+						   struct mlx5dr_match_param *mask)
+{
+	dr_ste_v1_build_tnl_mpls_over_gre_tag(mask, sb, sb->bit_mask);
+
+	/* STEs with lookup type FLEX_PARSER_{0/1} includes
+	 * flex parsers_{0-3}/{4-7} respectively.
+	 */
+	sb->lu_type = sb->caps->flex_parser_id_mpls_over_gre > DR_STE_MAX_FLEX_0_ID ?
+		      DR_STE_V1_LU_TYPE_FLEX_PARSER_1 :
+		      DR_STE_V1_LU_TYPE_FLEX_PARSER_0;
+
+	sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_v1_build_tnl_mpls_over_gre_tag;
+}
+
 static int dr_ste_v1_build_icmp_tag(struct mlx5dr_match_param *value,
 				    struct mlx5dr_ste_build *sb,
 				    u8 *tag)
@@ -1679,6 +1761,8 @@ struct mlx5dr_ste_ctx ste_ctx_v1 = {
 	.build_mpls_init		= &dr_ste_v1_build_mpls_init,
 	.build_tnl_gre_init		= &dr_ste_v1_build_tnl_gre_init,
 	.build_tnl_mpls_init		= &dr_ste_v1_build_tnl_mpls_init,
+	.build_tnl_mpls_over_udp_init	= &dr_ste_v1_build_tnl_mpls_over_udp_init,
+	.build_tnl_mpls_over_gre_init	= &dr_ste_v1_build_tnl_mpls_over_gre_init,
 	.build_icmp_init		= &dr_ste_v1_build_icmp_init,
 	.build_general_purpose_init	= &dr_ste_v1_build_general_purpose_init,
 	.build_eth_l4_misc_init		= &dr_ste_v1_build_eth_l4_misc_init,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 3c07ec61c8be..8eed8ce5a9d9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -393,6 +393,16 @@ void mlx5dr_ste_build_tnl_mpls(struct mlx5dr_ste_ctx *ste_ctx,
 			       struct mlx5dr_ste_build *sb,
 			       struct mlx5dr_match_param *mask,
 			       bool inner, bool rx);
+void mlx5dr_ste_build_tnl_mpls_over_gre(struct mlx5dr_ste_ctx *ste_ctx,
+					struct mlx5dr_ste_build *sb,
+					struct mlx5dr_match_param *mask,
+					struct mlx5dr_cmd_caps *caps,
+					bool inner, bool rx);
+void mlx5dr_ste_build_tnl_mpls_over_udp(struct mlx5dr_ste_ctx *ste_ctx,
+					struct mlx5dr_ste_build *sb,
+					struct mlx5dr_match_param *mask,
+					struct mlx5dr_cmd_caps *caps,
+					bool inner, bool rx);
 void mlx5dr_ste_build_icmp(struct mlx5dr_ste_ctx *ste_ctx,
 			   struct mlx5dr_ste_build *sb,
 			   struct mlx5dr_match_param *mask,
@@ -723,6 +733,8 @@ struct mlx5dr_cmd_caps {
 	u8 flex_parser_id_icmpv6_dw0;
 	u8 flex_parser_id_icmpv6_dw1;
 	u8 flex_parser_id_geneve_tlv_option_0;
+	u8 flex_parser_id_mpls_over_gre;
+	u8 flex_parser_id_mpls_over_udp;
 	u8 max_ft_level;
 	u16 roce_min_src_udp;
 	u8 num_esw_ports;
-- 
cgit v1.2.3


From df9dd15ae118e4c95fba6fe2d870ae23a99c3de2 Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Sun, 7 Feb 2021 04:40:16 +0200
Subject: net/mlx5: DR, Add support for matching tunnel GTP-U

Enable matching on tunnel GTP-U and GTP-U first extension
header using dynamic flex parser.

Signed-off-by: Muhammad Sammar <muhammads@nvidia.com>
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/steering/dr_cmd.c  |  16 +++
 .../mellanox/mlx5/core/steering/dr_matcher.c       | 118 +++++++++++++++++++++
 .../ethernet/mellanox/mlx5/core/steering/dr_ste.c  |  41 +++++++
 .../ethernet/mellanox/mlx5/core/steering/dr_ste.h  |  10 ++
 .../mellanox/mlx5/core/steering/dr_ste_v0.c        |  86 +++++++++++++++
 .../mellanox/mlx5/core/steering/dr_ste_v1.c        |  81 ++++++++++++++
 .../mellanox/mlx5/core/steering/dr_types.h         |  35 +++++-
 .../mellanox/mlx5/core/steering/mlx5_ifc_dr.h      |  11 ++
 8 files changed, 397 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
index dfb1e955409e..6f9d7aa9fb4c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
@@ -118,6 +118,22 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
 		caps->flex_parser_id_mpls_over_udp =
 			MLX5_CAP_GEN(mdev, flex_parser_id_outer_first_mpls_over_udp_label);
 
+	if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_0_ENABLED)
+		caps->flex_parser_id_gtpu_dw_0 =
+			MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_dw_0);
+
+	if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_TEID_ENABLED)
+		caps->flex_parser_id_gtpu_teid =
+			MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_teid);
+
+	if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_2_ENABLED)
+		caps->flex_parser_id_gtpu_dw_2 =
+			MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_dw_2);
+
+	if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_FIRST_EXT_DW_0_ENABLED)
+		caps->flex_parser_id_gtpu_first_ext_dw_0 =
+			MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_first_ext_dw_0);
+
 	caps->nic_rx_drop_address =
 		MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_rx_action_drop_icm_address);
 	caps->nic_tx_drop_address =
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
index 531eb69eeea1..6f6191d1d5a6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
@@ -155,6 +155,109 @@ dr_mask_is_tnl_geneve(struct mlx5dr_match_param *mask,
 	       dr_matcher_supp_tnl_geneve(&dmn->info.caps);
 }
 
+static bool dr_mask_is_tnl_gtpu_set(struct mlx5dr_match_misc3 *misc3)
+{
+	return misc3->gtpu_msg_flags || misc3->gtpu_msg_type || misc3->gtpu_teid;
+}
+
+static bool dr_matcher_supp_tnl_gtpu(struct mlx5dr_cmd_caps *caps)
+{
+	return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_ENABLED;
+}
+
+static bool dr_mask_is_tnl_gtpu(struct mlx5dr_match_param *mask,
+				struct mlx5dr_domain *dmn)
+{
+	return dr_mask_is_tnl_gtpu_set(&mask->misc3) &&
+	       dr_matcher_supp_tnl_gtpu(&dmn->info.caps);
+}
+
+static int dr_matcher_supp_tnl_gtpu_dw_0(struct mlx5dr_cmd_caps *caps)
+{
+	return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_0_ENABLED;
+}
+
+static bool dr_mask_is_tnl_gtpu_dw_0(struct mlx5dr_match_param *mask,
+				     struct mlx5dr_domain *dmn)
+{
+	return mask->misc3.gtpu_dw_0 &&
+	       dr_matcher_supp_tnl_gtpu_dw_0(&dmn->info.caps);
+}
+
+static int dr_matcher_supp_tnl_gtpu_teid(struct mlx5dr_cmd_caps *caps)
+{
+	return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_TEID_ENABLED;
+}
+
+static bool dr_mask_is_tnl_gtpu_teid(struct mlx5dr_match_param *mask,
+				     struct mlx5dr_domain *dmn)
+{
+	return mask->misc3.gtpu_teid &&
+	       dr_matcher_supp_tnl_gtpu_teid(&dmn->info.caps);
+}
+
+static int dr_matcher_supp_tnl_gtpu_dw_2(struct mlx5dr_cmd_caps *caps)
+{
+	return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_2_ENABLED;
+}
+
+static bool dr_mask_is_tnl_gtpu_dw_2(struct mlx5dr_match_param *mask,
+				     struct mlx5dr_domain *dmn)
+{
+	return mask->misc3.gtpu_dw_2 &&
+	       dr_matcher_supp_tnl_gtpu_dw_2(&dmn->info.caps);
+}
+
+static int dr_matcher_supp_tnl_gtpu_first_ext(struct mlx5dr_cmd_caps *caps)
+{
+	return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_FIRST_EXT_DW_0_ENABLED;
+}
+
+static bool dr_mask_is_tnl_gtpu_first_ext(struct mlx5dr_match_param *mask,
+					  struct mlx5dr_domain *dmn)
+{
+	return mask->misc3.gtpu_first_ext_dw_0 &&
+	       dr_matcher_supp_tnl_gtpu_first_ext(&dmn->info.caps);
+}
+
+static bool dr_mask_is_tnl_gtpu_flex_parser_0(struct mlx5dr_match_param *mask,
+					      struct mlx5dr_domain *dmn)
+{
+	struct mlx5dr_cmd_caps *caps = &dmn->info.caps;
+
+	return (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_dw_0) &&
+		dr_mask_is_tnl_gtpu_dw_0(mask, dmn)) ||
+	       (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_teid) &&
+		dr_mask_is_tnl_gtpu_teid(mask, dmn)) ||
+	       (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_dw_2) &&
+		dr_mask_is_tnl_gtpu_dw_2(mask, dmn)) ||
+	       (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_first_ext_dw_0) &&
+		dr_mask_is_tnl_gtpu_first_ext(mask, dmn));
+}
+
+static bool dr_mask_is_tnl_gtpu_flex_parser_1(struct mlx5dr_match_param *mask,
+					      struct mlx5dr_domain *dmn)
+{
+	struct mlx5dr_cmd_caps *caps = &dmn->info.caps;
+
+	return (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_dw_0) &&
+		dr_mask_is_tnl_gtpu_dw_0(mask, dmn)) ||
+	       (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_teid) &&
+		dr_mask_is_tnl_gtpu_teid(mask, dmn)) ||
+	       (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_dw_2) &&
+		dr_mask_is_tnl_gtpu_dw_2(mask, dmn)) ||
+	       (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_first_ext_dw_0) &&
+		dr_mask_is_tnl_gtpu_first_ext(mask, dmn));
+}
+
+static bool dr_mask_is_tnl_gtpu_any(struct mlx5dr_match_param *mask,
+				    struct mlx5dr_domain *dmn)
+{
+	return dr_mask_is_tnl_gtpu_flex_parser_0(mask, dmn) ||
+	       dr_mask_is_tnl_gtpu_flex_parser_1(mask, dmn) ||
+	       dr_mask_is_tnl_gtpu(mask, dmn);
+}
+
 static int dr_matcher_supp_icmp_v4(struct mlx5dr_cmd_caps *caps)
 {
 	return (caps->sw_format_ver == MLX5_STEERING_FORMAT_CONNECTX_6DX) ||
@@ -397,7 +500,22 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher,
 				mlx5dr_ste_build_tnl_geneve_tlv_opt(ste_ctx, &sb[idx++],
 								    &mask, &dmn->info.caps,
 								    inner, rx);
+		} else if (dr_mask_is_tnl_gtpu_any(&mask, dmn)) {
+			if (dr_mask_is_tnl_gtpu_flex_parser_0(&mask, dmn))
+				mlx5dr_ste_build_tnl_gtpu_flex_parser_0(ste_ctx, &sb[idx++],
+									&mask, &dmn->info.caps,
+									inner, rx);
+
+			if (dr_mask_is_tnl_gtpu_flex_parser_1(&mask, dmn))
+				mlx5dr_ste_build_tnl_gtpu_flex_parser_1(ste_ctx, &sb[idx++],
+									&mask, &dmn->info.caps,
+									inner, rx);
+
+			if (dr_mask_is_tnl_gtpu(&mask, dmn))
+				mlx5dr_ste_build_tnl_gtpu(ste_ctx, &sb[idx++],
+							  &mask, inner, rx);
 		}
+
 		if (DR_MASK_IS_ETH_L4_MISC_SET(mask.misc3, outer))
 			mlx5dr_ste_build_eth_l4_misc(ste_ctx, &sb[idx++],
 						     &mask, inner, rx);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
index 6ae839b1ec8a..9b1529137cba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
@@ -854,6 +854,13 @@ static void dr_ste_copy_mask_misc3(char *mask, struct mlx5dr_match_misc3 *spec)
 	spec->icmpv6_code = MLX5_GET(fte_match_set_misc3, mask, icmpv6_code);
 	spec->geneve_tlv_option_0_data =
 		MLX5_GET(fte_match_set_misc3, mask, geneve_tlv_option_0_data);
+	spec->gtpu_msg_flags = MLX5_GET(fte_match_set_misc3, mask, gtpu_msg_flags);
+	spec->gtpu_msg_type = MLX5_GET(fte_match_set_misc3, mask, gtpu_msg_type);
+	spec->gtpu_teid = MLX5_GET(fte_match_set_misc3, mask, gtpu_teid);
+	spec->gtpu_dw_0 = MLX5_GET(fte_match_set_misc3, mask, gtpu_dw_0);
+	spec->gtpu_dw_2 = MLX5_GET(fte_match_set_misc3, mask, gtpu_dw_2);
+	spec->gtpu_first_ext_dw_0 =
+		MLX5_GET(fte_match_set_misc3, mask, gtpu_first_ext_dw_0);
 }
 
 static void dr_ste_copy_mask_misc4(char *mask, struct mlx5dr_match_misc4 *spec)
@@ -1175,6 +1182,40 @@ void mlx5dr_ste_build_tnl_geneve_tlv_opt(struct mlx5dr_ste_ctx *ste_ctx,
 	ste_ctx->build_tnl_geneve_tlv_opt_init(sb, mask);
 }
 
+void mlx5dr_ste_build_tnl_gtpu(struct mlx5dr_ste_ctx *ste_ctx,
+			       struct mlx5dr_ste_build *sb,
+			       struct mlx5dr_match_param *mask,
+			       bool inner, bool rx)
+{
+	sb->rx = rx;
+	sb->inner = inner;
+	ste_ctx->build_tnl_gtpu_init(sb, mask);
+}
+
+void mlx5dr_ste_build_tnl_gtpu_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx,
+					     struct mlx5dr_ste_build *sb,
+					     struct mlx5dr_match_param *mask,
+					     struct mlx5dr_cmd_caps *caps,
+					     bool inner, bool rx)
+{
+	sb->rx = rx;
+	sb->caps = caps;
+	sb->inner = inner;
+	ste_ctx->build_tnl_gtpu_flex_parser_0_init(sb, mask);
+}
+
+void mlx5dr_ste_build_tnl_gtpu_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx,
+					     struct mlx5dr_ste_build *sb,
+					     struct mlx5dr_match_param *mask,
+					     struct mlx5dr_cmd_caps *caps,
+					     bool inner, bool rx)
+{
+	sb->rx = rx;
+	sb->caps = caps;
+	sb->inner = inner;
+	ste_ctx->build_tnl_gtpu_flex_parser_1_init(sb, mask);
+}
+
 void mlx5dr_ste_build_register_0(struct mlx5dr_ste_ctx *ste_ctx,
 				 struct mlx5dr_ste_build *sb,
 				 struct mlx5dr_match_param *mask,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
index 2d2dc680ad13..992b591bf0c5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h
@@ -62,6 +62,13 @@
 		       in_out##_first_mpls_ttl); \
 } while (0)
 
+#define DR_STE_SET_FLEX_PARSER_FIELD(tag, fname, caps, spec) do { \
+	u8 parser_id = (caps)->flex_parser_id_##fname; \
+	u8 *parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); \
+	*(__be32 *)parser_ptr = cpu_to_be32((spec)->fname);\
+	(spec)->fname = 0;\
+} while (0)
+
 #define DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(_misc) (\
 	(_misc)->outer_first_mpls_over_gre_label || \
 	(_misc)->outer_first_mpls_over_gre_exp || \
@@ -133,6 +140,9 @@ struct mlx5dr_ste_ctx {
 	void DR_STE_CTX_BUILDER(src_gvmi_qpn);
 	void DR_STE_CTX_BUILDER(flex_parser_0);
 	void DR_STE_CTX_BUILDER(flex_parser_1);
+	void DR_STE_CTX_BUILDER(tnl_gtpu);
+	void DR_STE_CTX_BUILDER(tnl_gtpu_flex_parser_0);
+	void DR_STE_CTX_BUILDER(tnl_gtpu_flex_parser_1);
 
 	/* Getters and Setters */
 	void (*ste_init)(u8 *hw_ste_p, u16 lu_type,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
index c62f64d48213..0757a4e8540e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
@@ -1768,6 +1768,89 @@ dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb,
 	sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_tag;
 }
 
+static int dr_ste_v0_build_flex_parser_tnl_gtpu_tag(struct mlx5dr_match_param *value,
+						    struct mlx5dr_ste_build *sb,
+						    uint8_t *tag)
+{
+	struct mlx5dr_match_misc3 *misc3 = &value->misc3;
+
+	DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag,
+		       gtpu_msg_flags, misc3,
+		       gtpu_msg_flags);
+	DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag,
+		       gtpu_msg_type, misc3,
+		       gtpu_msg_type);
+	DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag,
+		       gtpu_teid, misc3,
+		       gtpu_teid);
+
+	return 0;
+}
+
+static void dr_ste_v0_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *sb,
+						      struct mlx5dr_match_param *mask)
+{
+	dr_ste_v0_build_flex_parser_tnl_gtpu_tag(mask, sb, sb->bit_mask);
+
+	sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_TNL_HEADER;
+	sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tnl_gtpu_tag;
+}
+
+static int
+dr_ste_v0_build_tnl_gtpu_flex_parser_0_tag(struct mlx5dr_match_param *value,
+					   struct mlx5dr_ste_build *sb,
+					   uint8_t *tag)
+{
+	if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_0))
+		DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3);
+	if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_teid))
+		DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3);
+	if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_2))
+		DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3);
+	if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0))
+		DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3);
+	return 0;
+}
+
+static void
+dr_ste_v0_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb,
+					    struct mlx5dr_match_param *mask)
+{
+	dr_ste_v0_build_tnl_gtpu_flex_parser_0_tag(mask, sb, sb->bit_mask);
+
+	sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_0;
+	sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_v0_build_tnl_gtpu_flex_parser_0_tag;
+}
+
+static int
+dr_ste_v0_build_tnl_gtpu_flex_parser_1_tag(struct mlx5dr_match_param *value,
+					   struct mlx5dr_ste_build *sb,
+					   uint8_t *tag)
+{
+	if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_0))
+		DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3);
+	if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_teid))
+		DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3);
+	if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_2))
+		DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3);
+	if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0))
+		DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3);
+	return 0;
+}
+
+static void
+dr_ste_v0_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb,
+					    struct mlx5dr_match_param *mask)
+{
+	dr_ste_v0_build_tnl_gtpu_flex_parser_1_tag(mask, sb, sb->bit_mask);
+
+	sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_1;
+	sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_v0_build_tnl_gtpu_flex_parser_1_tag;
+}
+
 struct mlx5dr_ste_ctx ste_ctx_v0 = {
 	/* Builders */
 	.build_eth_l2_src_dst_init	= &dr_ste_v0_build_eth_l2_src_dst_init,
@@ -1795,6 +1878,9 @@ struct mlx5dr_ste_ctx ste_ctx_v0 = {
 	.build_src_gvmi_qpn_init	= &dr_ste_v0_build_src_gvmi_qpn_init,
 	.build_flex_parser_0_init	= &dr_ste_v0_build_flex_parser_0_init,
 	.build_flex_parser_1_init	= &dr_ste_v0_build_flex_parser_1_init,
+	.build_tnl_gtpu_init		= &dr_ste_v0_build_flex_parser_tnl_gtpu_init,
+	.build_tnl_gtpu_flex_parser_0_init   = &dr_ste_v0_build_tnl_gtpu_flex_parser_0_init,
+	.build_tnl_gtpu_flex_parser_1_init   = &dr_ste_v0_build_tnl_gtpu_flex_parser_1_init,
 
 	/* Getters and Setters */
 	.ste_init			= &dr_ste_v0_init,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
index ad062bfeef2b..054c2e2b6554 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
@@ -1747,6 +1747,83 @@ dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb,
 	sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag;
 }
 
+static int dr_ste_v1_build_flex_parser_tnl_gtpu_tag(struct mlx5dr_match_param *value,
+						    struct mlx5dr_ste_build *sb,
+						    uint8_t *tag)
+{
+	struct mlx5dr_match_misc3 *misc3 = &value->misc3;
+
+	DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, gtpu_msg_flags, misc3, gtpu_msg_flags);
+	DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, gtpu_msg_type, misc3, gtpu_msg_type);
+	DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, gtpu_teid, misc3, gtpu_teid);
+
+	return 0;
+}
+
+static void dr_ste_v1_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *sb,
+						      struct mlx5dr_match_param *mask)
+{
+	dr_ste_v1_build_flex_parser_tnl_gtpu_tag(mask, sb, sb->bit_mask);
+
+	sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER;
+	sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_gtpu_tag;
+}
+
+static int
+dr_ste_v1_build_tnl_gtpu_flex_parser_0_tag(struct mlx5dr_match_param *value,
+					   struct mlx5dr_ste_build *sb,
+					   uint8_t *tag)
+{
+	if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_0))
+		DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3);
+	if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_teid))
+		DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3);
+	if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_2))
+		DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3);
+	if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0))
+		DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3);
+	return 0;
+}
+
+static void
+dr_ste_v1_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb,
+					    struct mlx5dr_match_param *mask)
+{
+	dr_ste_v1_build_tnl_gtpu_flex_parser_0_tag(mask, sb, sb->bit_mask);
+
+	sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_0;
+	sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_v1_build_tnl_gtpu_flex_parser_0_tag;
+}
+
+static int
+dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag(struct mlx5dr_match_param *value,
+					   struct mlx5dr_ste_build *sb,
+					   uint8_t *tag)
+{
+	if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_0))
+		DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3);
+	if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_teid))
+		DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3);
+	if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_2))
+		DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3);
+	if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0))
+		DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3);
+	return 0;
+}
+
+static void
+dr_ste_v1_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb,
+					    struct mlx5dr_match_param *mask)
+{
+	dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag(mask, sb, sb->bit_mask);
+
+	sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_1;
+	sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask);
+	sb->ste_build_tag_func = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag;
+}
+
 struct mlx5dr_ste_ctx ste_ctx_v1 = {
 	/* Builders */
 	.build_eth_l2_src_dst_init	= &dr_ste_v1_build_eth_l2_src_dst_init,
@@ -1774,6 +1851,10 @@ struct mlx5dr_ste_ctx ste_ctx_v1 = {
 	.build_src_gvmi_qpn_init	= &dr_ste_v1_build_src_gvmi_qpn_init,
 	.build_flex_parser_0_init	= &dr_ste_v1_build_flex_parser_0_init,
 	.build_flex_parser_1_init	= &dr_ste_v1_build_flex_parser_1_init,
+	.build_tnl_gtpu_init		= &dr_ste_v1_build_flex_parser_tnl_gtpu_init,
+	.build_tnl_gtpu_flex_parser_0_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_0_init,
+	.build_tnl_gtpu_flex_parser_1_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_init,
+
 	/* Getters and Setters */
 	.ste_init			= &dr_ste_v1_init,
 	.set_next_lu_type		= &dr_ste_v1_set_next_lu_type,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 8eed8ce5a9d9..7c1ab0b6417e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -26,6 +26,16 @@
 #define mlx5dr_info(dmn, arg...) mlx5_core_info((dmn)->mdev, ##arg)
 #define mlx5dr_dbg(dmn, arg...) mlx5_core_dbg((dmn)->mdev, ##arg)
 
+static inline bool dr_is_flex_parser_0_id(u8 parser_id)
+{
+	return parser_id <= DR_STE_MAX_FLEX_0_ID;
+}
+
+static inline bool dr_is_flex_parser_1_id(u8 parser_id)
+{
+	return parser_id > DR_STE_MAX_FLEX_0_ID;
+}
+
 enum mlx5dr_icm_chunk_size {
 	DR_CHUNK_SIZE_1,
 	DR_CHUNK_SIZE_MIN = DR_CHUNK_SIZE_1, /* keep updated when changing */
@@ -421,6 +431,20 @@ void mlx5dr_ste_build_tnl_geneve_tlv_opt(struct mlx5dr_ste_ctx *ste_ctx,
 					 struct mlx5dr_match_param *mask,
 					 struct mlx5dr_cmd_caps *caps,
 					 bool inner, bool rx);
+void mlx5dr_ste_build_tnl_gtpu(struct mlx5dr_ste_ctx *ste_ctx,
+			       struct mlx5dr_ste_build *sb,
+			       struct mlx5dr_match_param *mask,
+			       bool inner, bool rx);
+void mlx5dr_ste_build_tnl_gtpu_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx,
+					     struct mlx5dr_ste_build *sb,
+					     struct mlx5dr_match_param *mask,
+					     struct mlx5dr_cmd_caps *caps,
+					     bool inner, bool rx);
+void mlx5dr_ste_build_tnl_gtpu_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx,
+					     struct mlx5dr_ste_build *sb,
+					     struct mlx5dr_match_param *mask,
+					     struct mlx5dr_cmd_caps *caps,
+					     bool inner, bool rx);
 void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_ctx *ste_ctx,
 				      struct mlx5dr_ste_build *sb,
 				      struct mlx5dr_match_param *mask,
@@ -674,7 +698,12 @@ struct mlx5dr_match_misc3 {
 	u8 icmpv4_code;
 	u8 icmpv4_type;
 	u32 geneve_tlv_option_0_data;
-	u8 reserved_auto3[0x18];
+	u8 gtpu_msg_flags;
+	u8 gtpu_msg_type;
+	u32 gtpu_teid;
+	u32 gtpu_dw_2;
+	u32 gtpu_first_ext_dw_0;
+	u32 gtpu_dw_0;
 };
 
 struct mlx5dr_match_misc4 {
@@ -735,6 +764,10 @@ struct mlx5dr_cmd_caps {
 	u8 flex_parser_id_geneve_tlv_option_0;
 	u8 flex_parser_id_mpls_over_gre;
 	u8 flex_parser_id_mpls_over_udp;
+	u8 flex_parser_id_gtpu_dw_0;
+	u8 flex_parser_id_gtpu_teid;
+	u8 flex_parser_id_gtpu_dw_2;
+	u8 flex_parser_id_gtpu_first_ext_dw_0;
 	u8 max_ft_level;
 	u16 roce_min_src_udp;
 	u8 num_esw_ports;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h
index 601bbb3a107c..9643ee647f57 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h
@@ -485,6 +485,17 @@ struct mlx5_ifc_ste_flex_parser_tnl_geneve_bits {
 	u8         reserved_at_40[0x40];
 };
 
+struct mlx5_ifc_ste_flex_parser_tnl_gtpu_bits {
+	u8	   reserved_at_0[0x5];
+	u8	   gtpu_msg_flags[0x3];
+	u8	   gtpu_msg_type[0x8];
+	u8	   reserved_at_10[0x10];
+
+	u8	   gtpu_teid[0x20];
+
+	u8	   reserved_at_40[0x40];
+};
+
 struct mlx5_ifc_ste_general_purpose_bits {
 	u8         general_purpose_lookup_field[0x20];
 
-- 
cgit v1.2.3


From 7304d603a57a1edecfecfbcc26f85edcda4cae81 Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Mon, 2 Nov 2020 23:57:13 +0200
Subject: net/mlx5: DR, Add support for force-loopback QP

When supported by the device, SW steering RoCE RC QP that is used to
write/read to/from ICM will be created with force-loopback attribute.
Such QP doesn't require GID index upon creation.

Signed-off-by: Erez Shitrit <erezsh@mellanox.com>
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/steering/dr_cmd.c  | 36 ++++++++++++++++++++++
 .../ethernet/mellanox/mlx5/core/steering/dr_send.c | 34 +++++++++++++++++---
 .../mellanox/mlx5/core/steering/dr_types.h         |  7 +++++
 include/linux/mlx5/mlx5_ifc.h                      |  7 +++--
 4 files changed, 77 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
index 6f9d7aa9fb4c..68d898e144fb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
@@ -85,15 +85,51 @@ int mlx5dr_cmd_query_esw_caps(struct mlx5_core_dev *mdev,
 	return 0;
 }
 
+static int dr_cmd_query_nic_vport_roce_en(struct mlx5_core_dev *mdev,
+					  u16 vport, bool *roce_en)
+{
+	u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {};
+	int err;
+
+	MLX5_SET(query_nic_vport_context_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
+	MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
+	MLX5_SET(query_nic_vport_context_in, in, other_vport, !!vport);
+
+	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	if (err)
+		return err;
+
+	*roce_en = MLX5_GET(query_nic_vport_context_out, out,
+			    nic_vport_context.roce_en);
+	return 0;
+}
+
 int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
 			    struct mlx5dr_cmd_caps *caps)
 {
+	bool roce_en;
+	int err;
+
 	caps->prio_tag_required	= MLX5_CAP_GEN(mdev, prio_tag_required);
 	caps->eswitch_manager	= MLX5_CAP_GEN(mdev, eswitch_manager);
 	caps->gvmi		= MLX5_CAP_GEN(mdev, vhca_id);
 	caps->flex_protocols	= MLX5_CAP_GEN(mdev, flex_parser_protocols);
 	caps->sw_format_ver	= MLX5_CAP_GEN(mdev, steering_format_version);
 
+	if (MLX5_CAP_GEN(mdev, roce)) {
+		err = dr_cmd_query_nic_vport_roce_en(mdev, 0, &roce_en);
+		if (err)
+			return err;
+
+		caps->roce_caps.roce_en = roce_en;
+		caps->roce_caps.fl_rc_qp_when_roce_disabled =
+			MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_disabled);
+		caps->roce_caps.fl_rc_qp_when_roce_enabled =
+			MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_enabled);
+	}
+
 	if (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED) {
 		caps->flex_parser_id_icmp_dw0 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw0);
 		caps->flex_parser_id_icmp_dw1 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw1);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
index 37377d668057..69d623bedefe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -32,6 +32,7 @@ struct dr_qp_rtr_attr {
 	u8 min_rnr_timer;
 	u8 sgid_index;
 	u16 udp_src_port;
+	u8 fl:1;
 };
 
 struct dr_qp_rts_attr {
@@ -650,6 +651,7 @@ static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev,
 			 attr->udp_src_port);
 
 	MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num);
+	MLX5_SET(qpc, qpc, primary_address_path.fl, attr->fl);
 	MLX5_SET(qpc, qpc, min_rnr_nak, 1);
 
 	MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP);
@@ -658,6 +660,19 @@ static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev,
 	return mlx5_cmd_exec_in(mdev, init2rtr_qp, in);
 }
 
+static bool dr_send_allow_fl(struct mlx5dr_cmd_caps *caps)
+{
+	/* Check whether RC RoCE QP creation with force loopback is allowed.
+	 * There are two separate capability bits for this:
+	 *  - force loopback when RoCE is enabled
+	 *  - force loopback when RoCE is disabled
+	 */
+	return ((caps->roce_caps.roce_en &&
+		 caps->roce_caps.fl_rc_qp_when_roce_enabled) ||
+		(!caps->roce_caps.roce_en &&
+		 caps->roce_caps.fl_rc_qp_when_roce_disabled));
+}
+
 static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn)
 {
 	struct mlx5dr_qp *dr_qp = dmn->send_ring->qp;
@@ -676,17 +691,26 @@ static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn)
 	}
 
 	/* RTR */
-	ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, &rtr_attr.dgid_attr);
-	if (ret)
-		return ret;
-
 	rtr_attr.mtu		= mtu;
 	rtr_attr.qp_num		= dr_qp->qpn;
 	rtr_attr.min_rnr_timer	= 12;
 	rtr_attr.port_num	= port;
-	rtr_attr.sgid_index	= gid_index;
 	rtr_attr.udp_src_port	= dmn->info.caps.roce_min_src_udp;
 
+	/* If QP creation with force loopback is allowed, then there
+	 * is no need for GID index when creating the QP.
+	 * Otherwise we query GID attributes and use GID index.
+	 */
+	rtr_attr.fl = dr_send_allow_fl(&dmn->info.caps);
+	if (!rtr_attr.fl) {
+		ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index,
+					   &rtr_attr.dgid_attr);
+		if (ret)
+			return ret;
+
+		rtr_attr.sgid_index = gid_index;
+	}
+
 	ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr);
 	if (ret) {
 		mlx5dr_err(dmn, "Failed modify QP init2rtr\n");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 7c1ab0b6417e..8de70566f85b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -747,6 +747,12 @@ struct mlx5dr_cmd_vport_cap {
 	u32 num;
 };
 
+struct mlx5dr_roce_cap {
+	u8 roce_en:1;
+	u8 fl_rc_qp_when_roce_disabled:1;
+	u8 fl_rc_qp_when_roce_enabled:1;
+};
+
 struct mlx5dr_cmd_caps {
 	u16 gvmi;
 	u64 nic_rx_drop_address;
@@ -783,6 +789,7 @@ struct mlx5dr_cmd_caps {
 	struct mlx5dr_esw_caps esw_caps;
 	struct mlx5dr_cmd_vport_cap *vports_caps;
 	bool prio_tag_required;
+	struct mlx5dr_roce_cap roce_caps;
 };
 
 struct mlx5dr_domain_rx_tx {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index aa6effe1dd6d..4d9569c4b96c 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -961,7 +961,9 @@ struct mlx5_ifc_roce_cap_bits {
 	u8         roce_apm[0x1];
 	u8         reserved_at_1[0x3];
 	u8         sw_r_roce_src_udp_port[0x1];
-	u8         reserved_at_5[0x19];
+	u8         fl_rc_qp_when_roce_disabled[0x1];
+	u8         fl_rc_qp_when_roce_enabled[0x1];
+	u8         reserved_at_7[0x17];
 	u8	   qp_ts_format[0x2];
 
 	u8         reserved_at_20[0x60];
@@ -2942,7 +2944,8 @@ struct mlx5_ifc_qpc_bits {
 	u8         state[0x4];
 	u8         lag_tx_port_affinity[0x4];
 	u8         st[0x8];
-	u8         reserved_at_10[0x3];
+	u8         reserved_at_10[0x2];
+	u8	   isolate_vl_tc[0x1];
 	u8         pm_state[0x2];
 	u8         reserved_at_15[0x1];
 	u8         req_e2e_credit_mode[0x2];
-- 
cgit v1.2.3


From aeacb52a8de7046be5399ba311f49bce96e1b269 Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Tue, 3 Nov 2020 01:31:53 +0200
Subject: net/mlx5: DR, Add support for isolate_vl_tc QP

When using SW steering, rule insertion rate depends on the RDMA RC QP
performance used for writing to the ICM. During stress this QP is competing
on the HW resources with all the other QPs that are used to send data.
To protect SW steering QP's performance in such cases, we set this QP to
use isolated VL. The VL number is reserved by FW and is not exposed to the
driver.
Support for this QP on isolated VL exists only when both force-loopback and
isolate_vl_tc capabilities are set.

Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c   | 2 ++
 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c  | 7 +++++++
 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h | 2 ++
 include/linux/mlx5/mlx5_ifc.h                               | 4 +++-
 4 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
index 68d898e144fb..5970cb8fc0c0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
@@ -130,6 +130,8 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
 			MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_enabled);
 	}
 
+	caps->isolate_vl_tc = MLX5_CAP_GEN(mdev, isolate_vl_tc_new);
+
 	if (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED) {
 		caps->flex_parser_id_icmp_dw0 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw0);
 		caps->flex_parser_id_icmp_dw1 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw1);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
index 69d623bedefe..12cf323a5943 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -46,6 +46,7 @@ struct dr_qp_init_attr {
 	u32 pdn;
 	u32 max_send_wr;
 	struct mlx5_uars_page *uar;
+	u8 isolate_vl_tc:1;
 };
 
 static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64)
@@ -158,6 +159,7 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+	MLX5_SET(qpc, qpc, isolate_vl_tc, attr->isolate_vl_tc);
 	MLX5_SET(qpc, qpc, pd, attr->pdn);
 	MLX5_SET(qpc, qpc, uar_page, attr->uar->index);
 	MLX5_SET(qpc, qpc, log_page_size,
@@ -924,6 +926,11 @@ int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn)
 	init_attr.pdn = dmn->pdn;
 	init_attr.uar = dmn->uar;
 	init_attr.max_send_wr = QUEUE_SIZE;
+
+	/* Isolated VL is applicable only if force loopback is supported */
+	if (dr_send_allow_fl(&dmn->info.caps))
+		init_attr.isolate_vl_tc = dmn->info.caps.isolate_vl_tc;
+
 	spin_lock_init(&dmn->send_ring->lock);
 
 	dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 8de70566f85b..67460c42a99b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -790,6 +790,7 @@ struct mlx5dr_cmd_caps {
 	struct mlx5dr_cmd_vport_cap *vports_caps;
 	bool prio_tag_required;
 	struct mlx5dr_roce_cap roce_caps;
+	u8 isolate_vl_tc:1;
 };
 
 struct mlx5dr_domain_rx_tx {
@@ -1164,6 +1165,7 @@ struct mlx5dr_cmd_qp_create_attr {
 	u32 sq_wqe_cnt;
 	u32 rq_wqe_cnt;
 	u32 rq_wqe_shift;
+	u8 isolate_vl_tc:1;
 };
 
 int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 4d9569c4b96c..52b7cabcde08 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1319,7 +1319,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         log_max_srq_sz[0x8];
 	u8         log_max_qp_sz[0x8];
 	u8         event_cap[0x1];
-	u8         reserved_at_91[0x7];
+	u8         reserved_at_91[0x2];
+	u8         isolate_vl_tc_new[0x1];
+	u8         reserved_at_94[0x4];
 	u8         prio_tag_required[0x1];
 	u8         reserved_at_99[0x2];
 	u8         log_max_qp[0x5];
-- 
cgit v1.2.3


From d044d9fc1380b66917dcb418ef4ec7e59dd6e597 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 20 Apr 2021 15:24:28 +0200
Subject: selftests/bpf: Add docs target as all dependency

Currently docs target is make dependency for TEST_GEN_FILES,
which makes tests to be rebuilt every time you run make.

Adding docs as all target dependency, so when running make
on top of built selftests it will show just:

  $ make
  make[1]: Nothing to be done for 'docs'.

After cleaning docs, only docs is rebuilt:

  $ make docs-clean
  CLEAN    eBPF_helpers-manpage
  CLEAN    eBPF_syscall-manpage
  $ make
  GEN      ...selftests/bpf/bpf-helpers.rst
  GEN      ...selftests/bpf/bpf-helpers.7
  GEN      ...selftests/bpf/bpf-syscall.rst
  GEN      ...selftests/bpf/bpf-syscall.2
  $ make
  make[1]: Nothing to be done for 'docs'.

Fixes: a01d935b2e09 ("tools/bpf: Remove bpf-helpers from bpftool docs")
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210420132428.15710-1-jolsa@kernel.org
---
 tools/testing/selftests/bpf/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index c45ae13b88a0..c5bcdb3d4b12 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -187,7 +187,6 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL)
 		    cp $(SCRATCH_DIR)/runqslower $@
 
 $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ)
-$(TEST_GEN_FILES): docs
 
 $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
 $(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c
@@ -210,6 +209,8 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile)    \
 		    OUTPUT=$(HOST_BUILD_DIR)/bpftool/			       \
 		    prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install
 
+all: docs
+
 docs:
 	$(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras)	\
 	            -f Makefile.docs					\
-- 
cgit v1.2.3


From f4f86d8d2c04bc0c90f8d944a1fcc30349ba01b3 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Mon, 19 Apr 2021 15:01:01 +0200
Subject: net: phy: execute genphy_loopback() per default on all PHYs

The generic loopback is really generic and is defined by the 802.3
standard, we should just mandate that drivers implement a custom
loopback if the generic one cannot work.

Suggested-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 73d29fd5e03d..320a3e5cd10a 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1777,6 +1777,9 @@ int phy_loopback(struct phy_device *phydev, bool enable)
 	struct phy_driver *phydrv = to_phy_driver(phydev->mdio.dev.driver);
 	int ret = 0;
 
+	if (!phydrv)
+		return -ENODEV;
+
 	mutex_lock(&phydev->lock);
 
 	if (enable && phydev->loopback_enabled) {
@@ -1789,10 +1792,10 @@ int phy_loopback(struct phy_device *phydev, bool enable)
 		goto out;
 	}
 
-	if (phydev->drv && phydrv->set_loopback)
+	if (phydrv->set_loopback)
 		ret = phydrv->set_loopback(phydev, enable);
 	else
-		ret = -EOPNOTSUPP;
+		ret = genphy_loopback(phydev, enable);
 
 	if (ret)
 		goto out;
-- 
cgit v1.2.3


From 014068dcb5b17dae110354c4de241833124edba1 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Mon, 19 Apr 2021 15:01:02 +0200
Subject: net: phy: genphy_loopback: add link speed configuration

In case of loopback, in most cases we need to disable autoneg support
and force some speed configuration. Otherwise, depending on currently
active auto negotiated link speed, the loopback may or may not work.

This patch was tested with following PHYs: TJA1102, KSZ8081, KSZ9031,
AT8035, AR9331.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c        |  3 ++-
 drivers/net/phy/phy_device.c | 28 ++++++++++++++++++++++++++--
 include/linux/phy.h          |  1 +
 3 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index fc2e7cb5b2e5..1f0512e39c65 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -701,7 +701,7 @@ out:
 }
 EXPORT_SYMBOL(phy_start_cable_test_tdr);
 
-static int phy_config_aneg(struct phy_device *phydev)
+int phy_config_aneg(struct phy_device *phydev)
 {
 	if (phydev->drv->config_aneg)
 		return phydev->drv->config_aneg(phydev);
@@ -714,6 +714,7 @@ static int phy_config_aneg(struct phy_device *phydev)
 
 	return genphy_config_aneg(phydev);
 }
+EXPORT_SYMBOL(phy_config_aneg);
 
 /**
  * phy_check_link_status - check link status and set state accordingly
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 320a3e5cd10a..0a2d8bedf73d 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -2565,8 +2565,32 @@ EXPORT_SYMBOL(genphy_resume);
 
 int genphy_loopback(struct phy_device *phydev, bool enable)
 {
-	return phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK,
-			  enable ? BMCR_LOOPBACK : 0);
+	if (enable) {
+		u16 val, ctl = BMCR_LOOPBACK;
+		int ret;
+
+		if (phydev->speed == SPEED_1000)
+			ctl |= BMCR_SPEED1000;
+		else if (phydev->speed == SPEED_100)
+			ctl |= BMCR_SPEED100;
+
+		if (phydev->duplex == DUPLEX_FULL)
+			ctl |= BMCR_FULLDPLX;
+
+		phy_modify(phydev, MII_BMCR, ~0, ctl);
+
+		ret = phy_read_poll_timeout(phydev, MII_BMSR, val,
+					    val & BMSR_LSTATUS,
+				    5000, 500000, true);
+		if (ret)
+			return ret;
+	} else {
+		phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK, 0);
+
+		phy_config_aneg(phydev);
+	}
+
+	return 0;
 }
 EXPORT_SYMBOL(genphy_loopback);
 
diff --git a/include/linux/phy.h b/include/linux/phy.h
index e3d4d583463b..60d2b26026a2 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1410,6 +1410,7 @@ void phy_disconnect(struct phy_device *phydev);
 void phy_detach(struct phy_device *phydev);
 void phy_start(struct phy_device *phydev);
 void phy_stop(struct phy_device *phydev);
+int phy_config_aneg(struct phy_device *phydev);
 int phy_start_aneg(struct phy_device *phydev);
 int phy_aneg_done(struct phy_device *phydev);
 int phy_speed_down(struct phy_device *phydev, bool sync);
-- 
cgit v1.2.3


From 3e1e58d64c3d0a6789f9d865936c4ce46b20f3f5 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Mon, 19 Apr 2021 15:01:03 +0200
Subject: net: add generic selftest support

Port some parts of the stmmac selftest and reuse it as basic generic selftest
library. This patch was tested with following combinations:
- iMX6DL FEC -> AT8035
- iMX6DL FEC -> SJA1105Q switch -> KSZ8081
- iMX6DL FEC -> SJA1105Q switch -> KSZ9031
- AR9331 ag71xx -> AR9331 PHY
- AR9331 ag71xx -> AR9331 switch -> AR9331 PHY

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/selftests.h |  12 ++
 net/Kconfig             |   4 +
 net/core/Makefile       |   1 +
 net/core/selftests.c    | 400 ++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 417 insertions(+)
 create mode 100644 include/net/selftests.h
 create mode 100644 net/core/selftests.c

diff --git a/include/net/selftests.h b/include/net/selftests.h
new file mode 100644
index 000000000000..9993b9498cf3
--- /dev/null
+++ b/include/net/selftests.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NET_SELFTESTS
+#define _NET_SELFTESTS
+
+#include <linux/ethtool.h>
+
+void net_selftest(struct net_device *ndev, struct ethtool_test *etest,
+		  u64 *buf);
+int net_selftest_get_count(void);
+void net_selftest_get_strings(u8 *data);
+
+#endif /* _NET_SELFTESTS */
diff --git a/net/Kconfig b/net/Kconfig
index 9c456acc379e..8d955195c069 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -429,6 +429,10 @@ config GRO_CELLS
 config SOCK_VALIDATE_XMIT
 	bool
 
+config NET_SELFTESTS
+	def_tristate PHYLIB
+	depends on PHYLIB
+
 config NET_SOCK_MSG
 	bool
 	default n
diff --git a/net/core/Makefile b/net/core/Makefile
index 0c2233c826fd..1a6168d8f23b 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -33,6 +33,7 @@ obj-$(CONFIG_NET_DEVLINK) += devlink.o
 obj-$(CONFIG_GRO_CELLS) += gro_cells.o
 obj-$(CONFIG_FAILOVER) += failover.o
 ifeq ($(CONFIG_INET),y)
+obj-$(CONFIG_NET_SELFTESTS) += selftests.o
 obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
 obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
 endif
diff --git a/net/core/selftests.c b/net/core/selftests.c
new file mode 100644
index 000000000000..ba7b0171974c
--- /dev/null
+++ b/net/core/selftests.c
@@ -0,0 +1,400 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 Synopsys, Inc. and/or its affiliates.
+ * stmmac Selftests Support
+ *
+ * Author: Jose Abreu <joabreu@synopsys.com>
+ *
+ * Ported from stmmac by:
+ * Copyright (C) 2021 Oleksij Rempel <o.rempel@pengutronix.de>
+ */
+
+#include <linux/phy.h>
+#include <net/selftests.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+
+struct net_packet_attrs {
+	unsigned char *src;
+	unsigned char *dst;
+	u32 ip_src;
+	u32 ip_dst;
+	bool tcp;
+	u16 sport;
+	u16 dport;
+	int timeout;
+	int size;
+	int max_size;
+	u8 id;
+	u16 queue_mapping;
+};
+
+struct net_test_priv {
+	struct net_packet_attrs *packet;
+	struct packet_type pt;
+	struct completion comp;
+	int double_vlan;
+	int vlan_id;
+	int ok;
+};
+
+struct netsfhdr {
+	__be32 version;
+	__be64 magic;
+	u8 id;
+} __packed;
+
+static u8 net_test_next_id;
+
+#define NET_TEST_PKT_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
+			   sizeof(struct netsfhdr))
+#define NET_TEST_PKT_MAGIC	0xdeadcafecafedeadULL
+#define NET_LB_TIMEOUT		msecs_to_jiffies(200)
+
+static struct sk_buff *net_test_get_skb(struct net_device *ndev,
+					struct net_packet_attrs *attr)
+{
+	struct sk_buff *skb = NULL;
+	struct udphdr *uhdr = NULL;
+	struct tcphdr *thdr = NULL;
+	struct netsfhdr *shdr;
+	struct ethhdr *ehdr;
+	struct iphdr *ihdr;
+	int iplen, size;
+
+	size = attr->size + NET_TEST_PKT_SIZE;
+
+	if (attr->tcp)
+		size += sizeof(struct tcphdr);
+	else
+		size += sizeof(struct udphdr);
+
+	if (attr->max_size && attr->max_size > size)
+		size = attr->max_size;
+
+	skb = netdev_alloc_skb(ndev, size);
+	if (!skb)
+		return NULL;
+
+	prefetchw(skb->data);
+
+	ehdr = skb_push(skb, ETH_HLEN);
+	skb_reset_mac_header(skb);
+
+	skb_set_network_header(skb, skb->len);
+	ihdr = skb_put(skb, sizeof(*ihdr));
+
+	skb_set_transport_header(skb, skb->len);
+	if (attr->tcp)
+		thdr = skb_put(skb, sizeof(*thdr));
+	else
+		uhdr = skb_put(skb, sizeof(*uhdr));
+
+	eth_zero_addr(ehdr->h_dest);
+
+	if (attr->src)
+		ether_addr_copy(ehdr->h_source, attr->src);
+	if (attr->dst)
+		ether_addr_copy(ehdr->h_dest, attr->dst);
+
+	ehdr->h_proto = htons(ETH_P_IP);
+
+	if (attr->tcp) {
+		thdr->source = htons(attr->sport);
+		thdr->dest = htons(attr->dport);
+		thdr->doff = sizeof(struct tcphdr) / 4;
+		thdr->check = 0;
+	} else {
+		uhdr->source = htons(attr->sport);
+		uhdr->dest = htons(attr->dport);
+		uhdr->len = htons(sizeof(*shdr) + sizeof(*uhdr) + attr->size);
+		if (attr->max_size)
+			uhdr->len = htons(attr->max_size -
+					  (sizeof(*ihdr) + sizeof(*ehdr)));
+		uhdr->check = 0;
+	}
+
+	ihdr->ihl = 5;
+	ihdr->ttl = 32;
+	ihdr->version = 4;
+	if (attr->tcp)
+		ihdr->protocol = IPPROTO_TCP;
+	else
+		ihdr->protocol = IPPROTO_UDP;
+	iplen = sizeof(*ihdr) + sizeof(*shdr) + attr->size;
+	if (attr->tcp)
+		iplen += sizeof(*thdr);
+	else
+		iplen += sizeof(*uhdr);
+
+	if (attr->max_size)
+		iplen = attr->max_size - sizeof(*ehdr);
+
+	ihdr->tot_len = htons(iplen);
+	ihdr->frag_off = 0;
+	ihdr->saddr = htonl(attr->ip_src);
+	ihdr->daddr = htonl(attr->ip_dst);
+	ihdr->tos = 0;
+	ihdr->id = 0;
+	ip_send_check(ihdr);
+
+	shdr = skb_put(skb, sizeof(*shdr));
+	shdr->version = 0;
+	shdr->magic = cpu_to_be64(NET_TEST_PKT_MAGIC);
+	attr->id = net_test_next_id;
+	shdr->id = net_test_next_id++;
+
+	if (attr->size)
+		skb_put(skb, attr->size);
+	if (attr->max_size && attr->max_size > skb->len)
+		skb_put(skb, attr->max_size - skb->len);
+
+	skb->csum = 0;
+	skb->ip_summed = CHECKSUM_PARTIAL;
+	if (attr->tcp) {
+		thdr->check = ~tcp_v4_check(skb->len, ihdr->saddr,
+					    ihdr->daddr, 0);
+		skb->csum_start = skb_transport_header(skb) - skb->head;
+		skb->csum_offset = offsetof(struct tcphdr, check);
+	} else {
+		udp4_hwcsum(skb, ihdr->saddr, ihdr->daddr);
+	}
+
+	skb->protocol = htons(ETH_P_IP);
+	skb->pkt_type = PACKET_HOST;
+	skb->dev = ndev;
+
+	return skb;
+}
+
+static int net_test_loopback_validate(struct sk_buff *skb,
+				      struct net_device *ndev,
+				      struct packet_type *pt,
+				      struct net_device *orig_ndev)
+{
+	struct net_test_priv *tpriv = pt->af_packet_priv;
+	unsigned char *src = tpriv->packet->src;
+	unsigned char *dst = tpriv->packet->dst;
+	struct netsfhdr *shdr;
+	struct ethhdr *ehdr;
+	struct udphdr *uhdr;
+	struct tcphdr *thdr;
+	struct iphdr *ihdr;
+
+	skb = skb_unshare(skb, GFP_ATOMIC);
+	if (!skb)
+		goto out;
+
+	if (skb_linearize(skb))
+		goto out;
+	if (skb_headlen(skb) < (NET_TEST_PKT_SIZE - ETH_HLEN))
+		goto out;
+
+	ehdr = (struct ethhdr *)skb_mac_header(skb);
+	if (dst) {
+		if (!ether_addr_equal_unaligned(ehdr->h_dest, dst))
+			goto out;
+	}
+
+	if (src) {
+		if (!ether_addr_equal_unaligned(ehdr->h_source, src))
+			goto out;
+	}
+
+	ihdr = ip_hdr(skb);
+	if (tpriv->double_vlan)
+		ihdr = (struct iphdr *)(skb_network_header(skb) + 4);
+
+	if (tpriv->packet->tcp) {
+		if (ihdr->protocol != IPPROTO_TCP)
+			goto out;
+
+		thdr = (struct tcphdr *)((u8 *)ihdr + 4 * ihdr->ihl);
+		if (thdr->dest != htons(tpriv->packet->dport))
+			goto out;
+
+		shdr = (struct netsfhdr *)((u8 *)thdr + sizeof(*thdr));
+	} else {
+		if (ihdr->protocol != IPPROTO_UDP)
+			goto out;
+
+		uhdr = (struct udphdr *)((u8 *)ihdr + 4 * ihdr->ihl);
+		if (uhdr->dest != htons(tpriv->packet->dport))
+			goto out;
+
+		shdr = (struct netsfhdr *)((u8 *)uhdr + sizeof(*uhdr));
+	}
+
+	if (shdr->magic != cpu_to_be64(NET_TEST_PKT_MAGIC))
+		goto out;
+	if (tpriv->packet->id != shdr->id)
+		goto out;
+
+	tpriv->ok = true;
+	complete(&tpriv->comp);
+out:
+	kfree_skb(skb);
+	return 0;
+}
+
+static int __net_test_loopback(struct net_device *ndev,
+			       struct net_packet_attrs *attr)
+{
+	struct net_test_priv *tpriv;
+	struct sk_buff *skb = NULL;
+	int ret = 0;
+
+	tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL);
+	if (!tpriv)
+		return -ENOMEM;
+
+	tpriv->ok = false;
+	init_completion(&tpriv->comp);
+
+	tpriv->pt.type = htons(ETH_P_IP);
+	tpriv->pt.func = net_test_loopback_validate;
+	tpriv->pt.dev = ndev;
+	tpriv->pt.af_packet_priv = tpriv;
+	tpriv->packet = attr;
+	dev_add_pack(&tpriv->pt);
+
+	skb = net_test_get_skb(ndev, attr);
+	if (!skb) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+
+	ret = dev_direct_xmit(skb, attr->queue_mapping);
+	if (ret < 0) {
+		goto cleanup;
+	} else if (ret > 0) {
+		ret = -ENETUNREACH;
+		goto cleanup;
+	}
+
+	if (!attr->timeout)
+		attr->timeout = NET_LB_TIMEOUT;
+
+	wait_for_completion_timeout(&tpriv->comp, attr->timeout);
+	ret = tpriv->ok ? 0 : -ETIMEDOUT;
+
+cleanup:
+	dev_remove_pack(&tpriv->pt);
+	kfree(tpriv);
+	return ret;
+}
+
+static int net_test_netif_carrier(struct net_device *ndev)
+{
+	return netif_carrier_ok(ndev) ? 0 : -ENOLINK;
+}
+
+static int net_test_phy_phydev(struct net_device *ndev)
+{
+	return ndev->phydev ? 0 : -EOPNOTSUPP;
+}
+
+static int net_test_phy_loopback_enable(struct net_device *ndev)
+{
+	if (!ndev->phydev)
+		return -EOPNOTSUPP;
+
+	return phy_loopback(ndev->phydev, true);
+}
+
+static int net_test_phy_loopback_disable(struct net_device *ndev)
+{
+	if (!ndev->phydev)
+		return -EOPNOTSUPP;
+
+	return phy_loopback(ndev->phydev, false);
+}
+
+static int net_test_phy_loopback_udp(struct net_device *ndev)
+{
+	struct net_packet_attrs attr = { };
+
+	attr.dst = ndev->dev_addr;
+	return __net_test_loopback(ndev, &attr);
+}
+
+static int net_test_phy_loopback_tcp(struct net_device *ndev)
+{
+	struct net_packet_attrs attr = { };
+
+	attr.dst = ndev->dev_addr;
+	attr.tcp = true;
+	return __net_test_loopback(ndev, &attr);
+}
+
+static const struct net_test {
+	char name[ETH_GSTRING_LEN];
+	int (*fn)(struct net_device *ndev);
+} net_selftests[] = {
+	{
+		.name = "Carrier                       ",
+		.fn = net_test_netif_carrier,
+	}, {
+		.name = "PHY dev is present            ",
+		.fn = net_test_phy_phydev,
+	}, {
+		/* This test should be done before all PHY loopback test */
+		.name = "PHY internal loopback, enable ",
+		.fn = net_test_phy_loopback_enable,
+	}, {
+		.name = "PHY internal loopback, UDP    ",
+		.fn = net_test_phy_loopback_udp,
+	}, {
+		.name = "PHY internal loopback, TCP    ",
+		.fn = net_test_phy_loopback_tcp,
+	}, {
+		/* This test should be done after all PHY loopback test */
+		.name = "PHY internal loopback, disable",
+		.fn = net_test_phy_loopback_disable,
+	},
+};
+
+void net_selftest(struct net_device *ndev, struct ethtool_test *etest, u64 *buf)
+{
+	int count = net_selftest_get_count();
+	int i;
+
+	memset(buf, 0, sizeof(*buf) * count);
+	net_test_next_id = 0;
+
+	if (etest->flags != ETH_TEST_FL_OFFLINE) {
+		netdev_err(ndev, "Only offline tests are supported\n");
+		etest->flags |= ETH_TEST_FL_FAILED;
+		return;
+	}
+
+
+	for (i = 0; i < count; i++) {
+		buf[i] = net_selftests[i].fn(ndev);
+		if (buf[i] && (buf[i] != -EOPNOTSUPP))
+			etest->flags |= ETH_TEST_FL_FAILED;
+	}
+}
+EXPORT_SYMBOL_GPL(net_selftest);
+
+int net_selftest_get_count(void)
+{
+	return ARRAY_SIZE(net_selftests);
+}
+EXPORT_SYMBOL_GPL(net_selftest_get_count);
+
+void net_selftest_get_strings(u8 *data)
+{
+	u8 *p = data;
+	int i;
+
+	for (i = 0; i < net_selftest_get_count(); i++) {
+		snprintf(p, ETH_GSTRING_LEN, "%2d. %s", i + 1,
+			 net_selftests[i].name);
+		p += ETH_GSTRING_LEN;
+	}
+}
+EXPORT_SYMBOL_GPL(net_selftest_get_strings);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Oleksij Rempel <o.rempel@pengutronix.de>");
-- 
cgit v1.2.3


From 6016ba345f97d9da485efc5d274d9185fe4e787b Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Mon, 19 Apr 2021 15:01:04 +0200
Subject: net: fec: make use of generic NET_SELFTESTS library

With this patch FEC on iMX will able to run generic net selftests

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/Kconfig    | 1 +
 drivers/net/ethernet/freescale/fec_main.c | 7 +++++++
 2 files changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig
index 3f9175bdce77..3d937b4650b2 100644
--- a/drivers/net/ethernet/freescale/Kconfig
+++ b/drivers/net/ethernet/freescale/Kconfig
@@ -26,6 +26,7 @@ config FEC
 		   ARCH_MXC || SOC_IMX28 || COMPILE_TEST)
 	default ARCH_MXC || SOC_IMX28 if ARM
 	select CRC32
+	select NET_SELFTESTS
 	select PHYLIB
 	imply PTP_1588_CLOCK
 	help
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index aecc111fbe73..f2065f9d02e6 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -38,6 +38,7 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <net/ip.h>
+#include <net/selftests.h>
 #include <net/tso.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
@@ -2482,6 +2483,9 @@ static void fec_enet_get_strings(struct net_device *netdev,
 			memcpy(data + i * ETH_GSTRING_LEN,
 				fec_stats[i].name, ETH_GSTRING_LEN);
 		break;
+	case ETH_SS_TEST:
+		net_selftest_get_strings(data);
+		break;
 	}
 }
 
@@ -2490,6 +2494,8 @@ static int fec_enet_get_sset_count(struct net_device *dev, int sset)
 	switch (sset) {
 	case ETH_SS_STATS:
 		return ARRAY_SIZE(fec_stats);
+	case ETH_SS_TEST:
+		return net_selftest_get_count();
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -2741,6 +2747,7 @@ static const struct ethtool_ops fec_enet_ethtool_ops = {
 	.set_wol		= fec_enet_set_wol,
 	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
 	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
+	.self_test		= net_selftest,
 };
 
 static int fec_enet_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
-- 
cgit v1.2.3


From b62a12fc047d5382f1904c29de4f27dfde48ca28 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Mon, 19 Apr 2021 15:01:05 +0200
Subject: net: ag71xx: make use of generic NET_SELFTESTS library

With this patch the ag71xx on Atheros AR9331 will able to run generic net
selftests.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/Kconfig  |  1 +
 drivers/net/ethernet/atheros/ag71xx.c | 20 ++++++++++++++++----
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/atheros/Kconfig b/drivers/net/ethernet/atheros/Kconfig
index fb803bf92ded..6842b74b0696 100644
--- a/drivers/net/ethernet/atheros/Kconfig
+++ b/drivers/net/ethernet/atheros/Kconfig
@@ -20,6 +20,7 @@ if NET_VENDOR_ATHEROS
 config AG71XX
 	tristate "Atheros AR7XXX/AR9XXX built-in ethernet mac support"
 	depends on ATH79
+	select NET_SELFTESTS
 	select PHYLINK
 	help
 	  If you wish to compile a kernel for AR7XXX/91XXX and enable
diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c
index 3a23b92ebfe3..1ba81b1eb6fd 100644
--- a/drivers/net/ethernet/atheros/ag71xx.c
+++ b/drivers/net/ethernet/atheros/ag71xx.c
@@ -37,6 +37,7 @@
 #include <linux/reset.h>
 #include <linux/clk.h>
 #include <linux/io.h>
+#include <net/selftests.h>
 
 /* For our NAPI weight bigger does *NOT* mean better - it means more
  * D-cache misses and lots more wasted cycles than we'll ever
@@ -497,12 +498,17 @@ static int ag71xx_ethtool_set_pauseparam(struct net_device *ndev,
 static void ag71xx_ethtool_get_strings(struct net_device *netdev, u32 sset,
 				       u8 *data)
 {
-	if (sset == ETH_SS_STATS) {
-		int i;
+	int i;
 
+	switch (sset) {
+	case ETH_SS_STATS:
 		for (i = 0; i < ARRAY_SIZE(ag71xx_statistics); i++)
 			memcpy(data + i * ETH_GSTRING_LEN,
 			       ag71xx_statistics[i].name, ETH_GSTRING_LEN);
+		break;
+	case ETH_SS_TEST:
+		net_selftest_get_strings(data);
+		break;
 	}
 }
 
@@ -519,9 +525,14 @@ static void ag71xx_ethtool_get_stats(struct net_device *ndev,
 
 static int ag71xx_ethtool_get_sset_count(struct net_device *ndev, int sset)
 {
-	if (sset == ETH_SS_STATS)
+	switch (sset) {
+	case ETH_SS_STATS:
 		return ARRAY_SIZE(ag71xx_statistics);
-	return -EOPNOTSUPP;
+	case ETH_SS_TEST:
+		return net_selftest_get_count();
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
 static const struct ethtool_ops ag71xx_ethtool_ops = {
@@ -536,6 +547,7 @@ static const struct ethtool_ops ag71xx_ethtool_ops = {
 	.get_strings			= ag71xx_ethtool_get_strings,
 	.get_ethtool_stats		= ag71xx_ethtool_get_stats,
 	.get_sset_count			= ag71xx_ethtool_get_sset_count,
+	.self_test			= net_selftest,
 };
 
 static int ag71xx_mdio_wait_busy(struct ag71xx *ag)
-- 
cgit v1.2.3


From a71acad90a3f079685efcb068e2251b912083d68 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Mon, 19 Apr 2021 15:01:06 +0200
Subject: net: dsa: enable selftest support for all switches by default

Most of generic selftest should be able to work with probably all ethernet
controllers. The DSA switches are not exception, so enable it by default at
least for DSA.

This patch was tested with SJA1105 and AR9331.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h |  2 ++
 net/dsa/Kconfig   |  1 +
 net/dsa/slave.c   | 21 +++++++++++++++++++++
 3 files changed, 24 insertions(+)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 1259b0f40684..b52e9b057be4 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -577,6 +577,8 @@ struct dsa_switch_ops {
 					 int port, uint64_t *data);
 	void	(*get_stats64)(struct dsa_switch *ds, int port,
 				   struct rtnl_link_stats64 *s);
+	void	(*self_test)(struct dsa_switch *ds, int port,
+			     struct ethtool_test *etest, u64 *data);
 
 	/*
 	 * ethtool Wake-on-LAN
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 8746b07668ae..cbc2bd643ab2 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -9,6 +9,7 @@ menuconfig NET_DSA
 	select NET_SWITCHDEV
 	select PHYLINK
 	select NET_DEVLINK
+	select NET_SELFTESTS
 	help
 	  Say Y if you want to enable support for the hardware switches supported
 	  by the Distributed Switch Architecture.
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 3ae67202fda2..77b33bd161b8 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -15,6 +15,7 @@
 #include <linux/mdio.h>
 #include <net/rtnetlink.h>
 #include <net/pkt_cls.h>
+#include <net/selftests.h>
 #include <net/tc_act/tc_mirred.h>
 #include <linux/if_bridge.h>
 #include <linux/if_hsr.h>
@@ -748,7 +749,10 @@ static void dsa_slave_get_strings(struct net_device *dev,
 		if (ds->ops->get_strings)
 			ds->ops->get_strings(ds, dp->index, stringset,
 					     data + 4 * len);
+	} else if (stringset ==  ETH_SS_TEST) {
+		net_selftest_get_strings(data);
 	}
+
 }
 
 static void dsa_slave_get_ethtool_stats(struct net_device *dev,
@@ -794,11 +798,27 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
 			count += ds->ops->get_sset_count(ds, dp->index, sset);
 
 		return count;
+	} else if (sset ==  ETH_SS_TEST) {
+		return net_selftest_get_count();
 	}
 
 	return -EOPNOTSUPP;
 }
 
+static void dsa_slave_net_selftest(struct net_device *ndev,
+				   struct ethtool_test *etest, u64 *buf)
+{
+	struct dsa_port *dp = dsa_slave_to_port(ndev);
+	struct dsa_switch *ds = dp->ds;
+
+	if (ds->ops->self_test) {
+		ds->ops->self_test(ds, dp->index, etest, buf);
+		return;
+	}
+
+	net_selftest(ndev, etest, buf);
+}
+
 static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w)
 {
 	struct dsa_port *dp = dsa_slave_to_port(dev);
@@ -1630,6 +1650,7 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
 	.get_rxnfc		= dsa_slave_get_rxnfc,
 	.set_rxnfc		= dsa_slave_set_rxnfc,
 	.get_ts_info		= dsa_slave_get_ts_info,
+	.self_test		= dsa_slave_net_selftest,
 };
 
 /* legacy way, bypassing the bridge *****************************************/
-- 
cgit v1.2.3


From 316bcffe44798d37144e908dea96ad7f8093114c Mon Sep 17 00:00:00 2001
From: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Date: Mon, 19 Apr 2021 18:25:30 +0800
Subject: net: dsa: felix: disable always guard band bit for TAS config

ALWAYS_GUARD_BAND_SCH_Q bit in TAS config register is descripted as
this:
	0: Guard band is implemented for nonschedule queues to schedule
	   queues transition.
	1: Guard band is implemented for any queue to schedule queue
	   transition.

The driver set guard band be implemented for any queue to schedule queue
transition before, which will make each GCL time slot reserve a guard
band time that can pass the max SDU frame. Because guard band time could
not be set in tc-taprio now, it will use about 12000ns to pass 1500B max
SDU. This limits each GCL time interval to be more than 12000ns.

This patch change the guard band to be only implemented for nonschedule
queues to schedule queues transition, so that there is no need to reserve
guard band on each GCL. Users can manually add guard band time for each
schedule queues in their configuration if they want.

Signed-off-by: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/ocelot/felix_vsc9959.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
index 789fe08cae50..2473bebe48e6 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -1227,8 +1227,12 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
 	if (taprio->num_entries > VSC9959_TAS_GCL_ENTRY_MAX)
 		return -ERANGE;
 
-	ocelot_rmw(ocelot, QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM(port) |
-		   QSYS_TAS_PARAM_CFG_CTRL_ALWAYS_GUARD_BAND_SCH_Q,
+	/* Set port num and disable ALWAYS_GUARD_BAND_SCH_Q, which means set
+	 * guard band to be implemented for nonschedule queues to schedule
+	 * queues transition.
+	 */
+	ocelot_rmw(ocelot,
+		   QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM(port),
 		   QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM_M |
 		   QSYS_TAS_PARAM_CFG_CTRL_ALWAYS_GUARD_BAND_SCH_Q,
 		   QSYS_TAS_PARAM_CFG_CTRL);
-- 
cgit v1.2.3


From 56e2e5de441a3a6590c94e70d071a6c1790c6124 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 20 Apr 2021 16:24:26 -0700
Subject: korina: Fix conflict with global symbol desc_empty on x86.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/korina.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index 4878e527e3c8..4613bc21130b 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -1,4 +1,4 @@
-/*
+>/*
  *  Driver for the IDT RC32434 (Korina) on-chip ethernet controller.
  *
  *  Copyright 2004 IDT Inc. (rischelp@idt.com)
@@ -323,7 +323,7 @@ struct dma_reg {
 
 enum chain_status {
 	desc_filled,
-	desc_empty
+	desc_is_empty
 };
 
 #define DMA_COUNT(count)	((count) & DMA_DESC_COUNT_MSK)
@@ -459,7 +459,7 @@ static int korina_send_packet(struct sk_buff *skb, struct net_device *dev)
 	chain_next = (idx + 1) & KORINA_TDS_MASK;
 
 	if (readl(&(lp->tx_dma_regs->dmandptr)) == 0) {
-		if (lp->tx_chain_status == desc_empty) {
+		if (lp->tx_chain_status == desc_is_empty) {
 			/* Update tail */
 			td->control = DMA_COUNT(length) |
 					DMA_DESC_COF | DMA_DESC_IOF;
@@ -486,10 +486,10 @@ static int korina_send_packet(struct sk_buff *skb, struct net_device *dev)
 			       &lp->tx_dma_regs->dmandptr);
 			/* Move head to tail */
 			lp->tx_chain_head = lp->tx_chain_tail;
-			lp->tx_chain_status = desc_empty;
+			lp->tx_chain_status = desc_is_empty;
 		}
 	} else {
-		if (lp->tx_chain_status == desc_empty) {
+		if (lp->tx_chain_status == desc_is_empty) {
 			/* Update tail */
 			td->control = DMA_COUNT(length) |
 					DMA_DESC_COF | DMA_DESC_IOF;
@@ -868,7 +868,7 @@ korina_tx_dma_interrupt(int irq, void *dev_id)
 			(readl(&(lp->tx_dma_regs->dmandptr)) == 0)) {
 			writel(korina_tx_dma(lp, lp->tx_chain_head),
 			       &lp->tx_dma_regs->dmandptr);
-			lp->tx_chain_status = desc_empty;
+			lp->tx_chain_status = desc_is_empty;
 			lp->tx_chain_head = lp->tx_chain_tail;
 			netif_trans_update(dev);
 		}
@@ -999,7 +999,7 @@ static int korina_alloc_ring(struct net_device *dev)
 	}
 	lp->tx_next_done = lp->tx_chain_head = lp->tx_chain_tail =
 			lp->tx_full = lp->tx_count = 0;
-	lp->tx_chain_status = desc_empty;
+	lp->tx_chain_status = desc_is_empty;
 
 	/* Initialize the receive descriptors */
 	for (i = 0; i < KORINA_NUM_RDS; i++) {
@@ -1027,7 +1027,7 @@ static int korina_alloc_ring(struct net_device *dev)
 	lp->rx_next_done  = 0;
 	lp->rx_chain_head = 0;
 	lp->rx_chain_tail = 0;
-	lp->rx_chain_status = desc_empty;
+	lp->rx_chain_status = desc_is_empty;
 
 	return 0;
 }
-- 
cgit v1.2.3


From 41d26bf4aba070dfd2ab48866cc27a48ee6228c7 Mon Sep 17 00:00:00 2001
From: Marek Behún <kabel@kernel.org>
Date: Tue, 20 Apr 2021 09:53:59 +0200
Subject: net: phy: marvell: refactor HWMON OOP style
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use a structure of Marvell PHY specific HWMON methods to reduce code
duplication. Store a pointer to this structure into the PHY driver's
driver_data member.

Signed-off-by: Marek Behún <kabel@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell.c | 369 ++++++++++++++++------------------------------
 1 file changed, 125 insertions(+), 244 deletions(-)

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 8018ddf7f316..70cfcfcdd8f2 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -2216,6 +2216,19 @@ static int marvell_vct7_cable_test_get_status(struct phy_device *phydev,
 }
 
 #ifdef CONFIG_HWMON
+struct marvell_hwmon_ops {
+	int (*get_temp)(struct phy_device *phydev, long *temp);
+	int (*get_temp_critical)(struct phy_device *phydev, long *temp);
+	int (*set_temp_critical)(struct phy_device *phydev, long temp);
+	int (*get_temp_alarm)(struct phy_device *phydev, long *alarm);
+};
+
+static const struct marvell_hwmon_ops *
+to_marvell_hwmon_ops(const struct phy_device *phydev)
+{
+	return phydev->drv->driver_data;
+}
+
 static int m88e1121_get_temp(struct phy_device *phydev, long *temp)
 {
 	int oldpage;
@@ -2259,75 +2272,6 @@ error:
 	return phy_restore_page(phydev, oldpage, ret);
 }
 
-static int m88e1121_hwmon_read(struct device *dev,
-			       enum hwmon_sensor_types type,
-			       u32 attr, int channel, long *temp)
-{
-	struct phy_device *phydev = dev_get_drvdata(dev);
-	int err;
-
-	switch (attr) {
-	case hwmon_temp_input:
-		err = m88e1121_get_temp(phydev, temp);
-		break;
-	default:
-		return -EOPNOTSUPP;
-	}
-
-	return err;
-}
-
-static umode_t m88e1121_hwmon_is_visible(const void *data,
-					 enum hwmon_sensor_types type,
-					 u32 attr, int channel)
-{
-	if (type != hwmon_temp)
-		return 0;
-
-	switch (attr) {
-	case hwmon_temp_input:
-		return 0444;
-	default:
-		return 0;
-	}
-}
-
-static u32 m88e1121_hwmon_chip_config[] = {
-	HWMON_C_REGISTER_TZ,
-	0
-};
-
-static const struct hwmon_channel_info m88e1121_hwmon_chip = {
-	.type = hwmon_chip,
-	.config = m88e1121_hwmon_chip_config,
-};
-
-static u32 m88e1121_hwmon_temp_config[] = {
-	HWMON_T_INPUT,
-	0
-};
-
-static const struct hwmon_channel_info m88e1121_hwmon_temp = {
-	.type = hwmon_temp,
-	.config = m88e1121_hwmon_temp_config,
-};
-
-static const struct hwmon_channel_info *m88e1121_hwmon_info[] = {
-	&m88e1121_hwmon_chip,
-	&m88e1121_hwmon_temp,
-	NULL
-};
-
-static const struct hwmon_ops m88e1121_hwmon_hwmon_ops = {
-	.is_visible = m88e1121_hwmon_is_visible,
-	.read = m88e1121_hwmon_read,
-};
-
-static const struct hwmon_chip_info m88e1121_hwmon_chip_info = {
-	.ops = &m88e1121_hwmon_hwmon_ops,
-	.info = m88e1121_hwmon_info,
-};
-
 static int m88e1510_get_temp(struct phy_device *phydev, long *temp)
 {
 	int ret;
@@ -2390,92 +2334,6 @@ static int m88e1510_get_temp_alarm(struct phy_device *phydev, long *alarm)
 	return 0;
 }
 
-static int m88e1510_hwmon_read(struct device *dev,
-			       enum hwmon_sensor_types type,
-			       u32 attr, int channel, long *temp)
-{
-	struct phy_device *phydev = dev_get_drvdata(dev);
-	int err;
-
-	switch (attr) {
-	case hwmon_temp_input:
-		err = m88e1510_get_temp(phydev, temp);
-		break;
-	case hwmon_temp_crit:
-		err = m88e1510_get_temp_critical(phydev, temp);
-		break;
-	case hwmon_temp_max_alarm:
-		err = m88e1510_get_temp_alarm(phydev, temp);
-		break;
-	default:
-		return -EOPNOTSUPP;
-	}
-
-	return err;
-}
-
-static int m88e1510_hwmon_write(struct device *dev,
-				enum hwmon_sensor_types type,
-				u32 attr, int channel, long temp)
-{
-	struct phy_device *phydev = dev_get_drvdata(dev);
-	int err;
-
-	switch (attr) {
-	case hwmon_temp_crit:
-		err = m88e1510_set_temp_critical(phydev, temp);
-		break;
-	default:
-		return -EOPNOTSUPP;
-	}
-	return err;
-}
-
-static umode_t m88e1510_hwmon_is_visible(const void *data,
-					 enum hwmon_sensor_types type,
-					 u32 attr, int channel)
-{
-	if (type != hwmon_temp)
-		return 0;
-
-	switch (attr) {
-	case hwmon_temp_input:
-	case hwmon_temp_max_alarm:
-		return 0444;
-	case hwmon_temp_crit:
-		return 0644;
-	default:
-		return 0;
-	}
-}
-
-static u32 m88e1510_hwmon_temp_config[] = {
-	HWMON_T_INPUT | HWMON_T_CRIT | HWMON_T_MAX_ALARM,
-	0
-};
-
-static const struct hwmon_channel_info m88e1510_hwmon_temp = {
-	.type = hwmon_temp,
-	.config = m88e1510_hwmon_temp_config,
-};
-
-static const struct hwmon_channel_info *m88e1510_hwmon_info[] = {
-	&m88e1121_hwmon_chip,
-	&m88e1510_hwmon_temp,
-	NULL
-};
-
-static const struct hwmon_ops m88e1510_hwmon_hwmon_ops = {
-	.is_visible = m88e1510_hwmon_is_visible,
-	.read = m88e1510_hwmon_read,
-	.write = m88e1510_hwmon_write,
-};
-
-static const struct hwmon_chip_info m88e1510_hwmon_chip_info = {
-	.ops = &m88e1510_hwmon_hwmon_ops,
-	.info = m88e1510_hwmon_info,
-};
-
 static int m88e6390_get_temp(struct phy_device *phydev, long *temp)
 {
 	int sum = 0;
@@ -2534,63 +2392,112 @@ error:
 	return ret;
 }
 
-static int m88e6390_hwmon_read(struct device *dev,
-			       enum hwmon_sensor_types type,
-			       u32 attr, int channel, long *temp)
+static int marvell_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
+			      u32 attr, int channel, long *temp)
 {
 	struct phy_device *phydev = dev_get_drvdata(dev);
-	int err;
+	const struct marvell_hwmon_ops *ops = to_marvell_hwmon_ops(phydev);
+	int err = -EOPNOTSUPP;
 
 	switch (attr) {
 	case hwmon_temp_input:
-		err = m88e6390_get_temp(phydev, temp);
+		if (ops->get_temp)
+			err = ops->get_temp(phydev, temp);
+		break;
+	case hwmon_temp_crit:
+		if (ops->get_temp_critical)
+			err = ops->get_temp_critical(phydev, temp);
+		break;
+	case hwmon_temp_max_alarm:
+		if (ops->get_temp_alarm)
+			err = ops->get_temp_alarm(phydev, temp);
+		break;
+	}
+
+	return err;
+}
+
+static int marvell_hwmon_write(struct device *dev, enum hwmon_sensor_types type,
+			       u32 attr, int channel, long temp)
+{
+	struct phy_device *phydev = dev_get_drvdata(dev);
+	const struct marvell_hwmon_ops *ops = to_marvell_hwmon_ops(phydev);
+	int err = -EOPNOTSUPP;
+
+	switch (attr) {
+	case hwmon_temp_crit:
+		if (ops->set_temp_critical)
+			err = ops->set_temp_critical(phydev, temp);
 		break;
 	default:
-		return -EOPNOTSUPP;
+		fallthrough;
 	}
 
 	return err;
 }
 
-static umode_t m88e6390_hwmon_is_visible(const void *data,
-					 enum hwmon_sensor_types type,
-					 u32 attr, int channel)
+static umode_t marvell_hwmon_is_visible(const void *data,
+					enum hwmon_sensor_types type,
+					u32 attr, int channel)
 {
+	const struct phy_device *phydev = data;
+	const struct marvell_hwmon_ops *ops = to_marvell_hwmon_ops(phydev);
+
 	if (type != hwmon_temp)
 		return 0;
 
 	switch (attr) {
 	case hwmon_temp_input:
-		return 0444;
+		return ops->get_temp ? 0444 : 0;
+	case hwmon_temp_max_alarm:
+		return ops->get_temp_alarm ? 0444 : 0;
+	case hwmon_temp_crit:
+		return (ops->get_temp_critical ? 0444 : 0) |
+		       (ops->set_temp_critical ? 0200 : 0);
 	default:
 		return 0;
 	}
 }
 
-static u32 m88e6390_hwmon_temp_config[] = {
-	HWMON_T_INPUT,
+static u32 marvell_hwmon_chip_config[] = {
+	HWMON_C_REGISTER_TZ,
 	0
 };
 
-static const struct hwmon_channel_info m88e6390_hwmon_temp = {
+static const struct hwmon_channel_info marvell_hwmon_chip = {
+	.type = hwmon_chip,
+	.config = marvell_hwmon_chip_config,
+};
+
+/* we can define HWMON_T_CRIT and HWMON_T_MAX_ALARM even though these are not
+ * defined for all PHYs, because the hwmon code checks whether the attributes
+ * exists via the .is_visible method
+ */
+static u32 marvell_hwmon_temp_config[] = {
+	HWMON_T_INPUT | HWMON_T_CRIT | HWMON_T_MAX_ALARM,
+	0
+};
+
+static const struct hwmon_channel_info marvell_hwmon_temp = {
 	.type = hwmon_temp,
-	.config = m88e6390_hwmon_temp_config,
+	.config = marvell_hwmon_temp_config,
 };
 
-static const struct hwmon_channel_info *m88e6390_hwmon_info[] = {
-	&m88e1121_hwmon_chip,
-	&m88e6390_hwmon_temp,
+static const struct hwmon_channel_info *marvell_hwmon_info[] = {
+	&marvell_hwmon_chip,
+	&marvell_hwmon_temp,
 	NULL
 };
 
-static const struct hwmon_ops m88e6390_hwmon_hwmon_ops = {
-	.is_visible = m88e6390_hwmon_is_visible,
-	.read = m88e6390_hwmon_read,
+static const struct hwmon_ops marvell_hwmon_hwmon_ops = {
+	.is_visible = marvell_hwmon_is_visible,
+	.read = marvell_hwmon_read,
+	.write = marvell_hwmon_write,
 };
 
-static const struct hwmon_chip_info m88e6390_hwmon_chip_info = {
-	.ops = &m88e6390_hwmon_hwmon_ops,
-	.info = m88e6390_hwmon_info,
+static const struct hwmon_chip_info marvell_hwmon_chip_info = {
+	.ops = &marvell_hwmon_hwmon_ops,
+	.info = marvell_hwmon_info,
 };
 
 static int marvell_hwmon_name(struct phy_device *phydev)
@@ -2613,49 +2520,48 @@ static int marvell_hwmon_name(struct phy_device *phydev)
 	return 0;
 }
 
-static int marvell_hwmon_probe(struct phy_device *phydev,
-			       const struct hwmon_chip_info *chip)
+static int marvell_hwmon_probe(struct phy_device *phydev)
 {
+	const struct marvell_hwmon_ops *ops = to_marvell_hwmon_ops(phydev);
 	struct marvell_priv *priv = phydev->priv;
 	struct device *dev = &phydev->mdio.dev;
 	int err;
 
+	if (!ops)
+		return 0;
+
 	err = marvell_hwmon_name(phydev);
 	if (err)
 		return err;
 
 	priv->hwmon_dev = devm_hwmon_device_register_with_info(
-		dev, priv->hwmon_name, phydev, chip, NULL);
+		dev, priv->hwmon_name, phydev, &marvell_hwmon_chip_info, NULL);
 
 	return PTR_ERR_OR_ZERO(priv->hwmon_dev);
 }
 
-static int m88e1121_hwmon_probe(struct phy_device *phydev)
-{
-	return marvell_hwmon_probe(phydev, &m88e1121_hwmon_chip_info);
-}
+static const struct marvell_hwmon_ops m88e1121_hwmon_ops = {
+	.get_temp = m88e1121_get_temp,
+};
 
-static int m88e1510_hwmon_probe(struct phy_device *phydev)
-{
-	return marvell_hwmon_probe(phydev, &m88e1510_hwmon_chip_info);
-}
+static const struct marvell_hwmon_ops m88e1510_hwmon_ops = {
+	.get_temp = m88e1510_get_temp,
+	.get_temp_critical = m88e1510_get_temp_critical,
+	.set_temp_critical = m88e1510_set_temp_critical,
+	.get_temp_alarm = m88e1510_get_temp_alarm,
+};
+
+static const struct marvell_hwmon_ops m88e6390_hwmon_ops = {
+	.get_temp = m88e6390_get_temp,
+};
+
+#define DEF_MARVELL_HWMON_OPS(s) (&(s))
 
-static int m88e6390_hwmon_probe(struct phy_device *phydev)
-{
-	return marvell_hwmon_probe(phydev, &m88e6390_hwmon_chip_info);
-}
 #else
-static int m88e1121_hwmon_probe(struct phy_device *phydev)
-{
-	return 0;
-}
 
-static int m88e1510_hwmon_probe(struct phy_device *phydev)
-{
-	return 0;
-}
+#define DEF_MARVELL_HWMON_OPS(s) NULL
 
-static int m88e6390_hwmon_probe(struct phy_device *phydev)
+static int marvell_hwmon_probe(struct phy_device *phydev)
 {
 	return 0;
 }
@@ -2671,40 +2577,7 @@ static int marvell_probe(struct phy_device *phydev)
 
 	phydev->priv = priv;
 
-	return 0;
-}
-
-static int m88e1121_probe(struct phy_device *phydev)
-{
-	int err;
-
-	err = marvell_probe(phydev);
-	if (err)
-		return err;
-
-	return m88e1121_hwmon_probe(phydev);
-}
-
-static int m88e1510_probe(struct phy_device *phydev)
-{
-	int err;
-
-	err = marvell_probe(phydev);
-	if (err)
-		return err;
-
-	return m88e1510_hwmon_probe(phydev);
-}
-
-static int m88e6390_probe(struct phy_device *phydev)
-{
-	int err;
-
-	err = marvell_probe(phydev);
-	if (err)
-		return err;
-
-	return m88e6390_hwmon_probe(phydev);
+	return marvell_hwmon_probe(phydev);
 }
 
 static struct phy_driver marvell_drivers[] = {
@@ -2810,8 +2683,9 @@ static struct phy_driver marvell_drivers[] = {
 		.phy_id = MARVELL_PHY_ID_88E1121R,
 		.phy_id_mask = MARVELL_PHY_ID_MASK,
 		.name = "Marvell 88E1121R",
+		.driver_data = DEF_MARVELL_HWMON_OPS(m88e1121_hwmon_ops),
 		/* PHY_GBIT_FEATURES */
-		.probe = m88e1121_probe,
+		.probe = marvell_probe,
 		.config_init = marvell_config_init,
 		.config_aneg = m88e1121_config_aneg,
 		.read_status = marvell_read_status,
@@ -2927,9 +2801,10 @@ static struct phy_driver marvell_drivers[] = {
 		.phy_id = MARVELL_PHY_ID_88E1510,
 		.phy_id_mask = MARVELL_PHY_ID_MASK,
 		.name = "Marvell 88E1510",
+		.driver_data = DEF_MARVELL_HWMON_OPS(m88e1510_hwmon_ops),
 		.features = PHY_GBIT_FIBRE_FEATURES,
 		.flags = PHY_POLL_CABLE_TEST,
-		.probe = m88e1510_probe,
+		.probe = marvell_probe,
 		.config_init = m88e1510_config_init,
 		.config_aneg = m88e1510_config_aneg,
 		.read_status = marvell_read_status,
@@ -2955,9 +2830,10 @@ static struct phy_driver marvell_drivers[] = {
 		.phy_id = MARVELL_PHY_ID_88E1540,
 		.phy_id_mask = MARVELL_PHY_ID_MASK,
 		.name = "Marvell 88E1540",
+		.driver_data = DEF_MARVELL_HWMON_OPS(m88e1510_hwmon_ops),
 		/* PHY_GBIT_FEATURES */
 		.flags = PHY_POLL_CABLE_TEST,
-		.probe = m88e1510_probe,
+		.probe = marvell_probe,
 		.config_init = marvell_config_init,
 		.config_aneg = m88e1510_config_aneg,
 		.read_status = marvell_read_status,
@@ -2980,7 +2856,8 @@ static struct phy_driver marvell_drivers[] = {
 		.phy_id = MARVELL_PHY_ID_88E1545,
 		.phy_id_mask = MARVELL_PHY_ID_MASK,
 		.name = "Marvell 88E1545",
-		.probe = m88e1510_probe,
+		.driver_data = DEF_MARVELL_HWMON_OPS(m88e1510_hwmon_ops),
+		.probe = marvell_probe,
 		/* PHY_GBIT_FEATURES */
 		.flags = PHY_POLL_CABLE_TEST,
 		.config_init = marvell_config_init,
@@ -3024,9 +2901,10 @@ static struct phy_driver marvell_drivers[] = {
 		.phy_id = MARVELL_PHY_ID_88E6341_FAMILY,
 		.phy_id_mask = MARVELL_PHY_ID_MASK,
 		.name = "Marvell 88E6341 Family",
+		.driver_data = DEF_MARVELL_HWMON_OPS(m88e1510_hwmon_ops),
 		/* PHY_GBIT_FEATURES */
 		.flags = PHY_POLL_CABLE_TEST,
-		.probe = m88e1510_probe,
+		.probe = marvell_probe,
 		.config_init = marvell_config_init,
 		.config_aneg = m88e6390_config_aneg,
 		.read_status = marvell_read_status,
@@ -3049,9 +2927,10 @@ static struct phy_driver marvell_drivers[] = {
 		.phy_id = MARVELL_PHY_ID_88E6390_FAMILY,
 		.phy_id_mask = MARVELL_PHY_ID_MASK,
 		.name = "Marvell 88E6390 Family",
+		.driver_data = DEF_MARVELL_HWMON_OPS(m88e6390_hwmon_ops),
 		/* PHY_GBIT_FEATURES */
 		.flags = PHY_POLL_CABLE_TEST,
-		.probe = m88e6390_probe,
+		.probe = marvell_probe,
 		.config_init = marvell_config_init,
 		.config_aneg = m88e6390_config_aneg,
 		.read_status = marvell_read_status,
@@ -3074,7 +2953,8 @@ static struct phy_driver marvell_drivers[] = {
 		.phy_id = MARVELL_PHY_ID_88E1340S,
 		.phy_id_mask = MARVELL_PHY_ID_MASK,
 		.name = "Marvell 88E1340S",
-		.probe = m88e1510_probe,
+		.driver_data = DEF_MARVELL_HWMON_OPS(m88e1510_hwmon_ops),
+		.probe = marvell_probe,
 		/* PHY_GBIT_FEATURES */
 		.config_init = marvell_config_init,
 		.config_aneg = m88e1510_config_aneg,
@@ -3095,7 +2975,8 @@ static struct phy_driver marvell_drivers[] = {
 		.phy_id = MARVELL_PHY_ID_88E1548P,
 		.phy_id_mask = MARVELL_PHY_ID_MASK,
 		.name = "Marvell 88E1548P",
-		.probe = m88e1510_probe,
+		.driver_data = DEF_MARVELL_HWMON_OPS(m88e1510_hwmon_ops),
+		.probe = marvell_probe,
 		.features = PHY_GBIT_FIBRE_FEATURES,
 		.config_init = marvell_config_init,
 		.config_aneg = m88e1510_config_aneg,
-- 
cgit v1.2.3


From 4f920c299d4c5226f5bc37df8bf288a55bce01a8 Mon Sep 17 00:00:00 2001
From: Marek Behún <kabel@kernel.org>
Date: Tue, 20 Apr 2021 09:54:00 +0200
Subject: net: phy: marvell: fix HWMON enable register for 6390
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Register 27_6.15:14 has the following description in 88E6393X
documentation:
  Temperature Sensor Enable
    0x0 - Sample every 1s
    0x1 - Sense rate decided by bits 10:8 of this register
    0x2 - Use 26_6.5 (One shot Temperature Sample) to enable
    0x3 - Disable

This is compatible with how the 6390 code uses this register currently,
but the 6390 code handles it as two 1-bit registers (somewhat), instead
of one register with 4 possible values.

(A newer version of the 6390 documentation removed temperature sensor
 section completely. In an older version, the above mentioned register
 is reserved, although it is R/W. Since the code works, I think we can
 assume that it is correct.)

Rename this register and define all 4 values according to 6393X
documentation.

Signed-off-by: Marek Behún <kabel@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 70cfcfcdd8f2..9529aaa3bed3 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -113,11 +113,11 @@
 #define MII_88E1540_COPPER_CTRL3_FAST_LINK_DOWN		BIT(9)
 
 #define MII_88E6390_MISC_TEST		0x1b
-#define MII_88E6390_MISC_TEST_SAMPLE_1S		0
-#define MII_88E6390_MISC_TEST_SAMPLE_10MS	BIT(14)
-#define MII_88E6390_MISC_TEST_SAMPLE_DISABLE	BIT(15)
-#define MII_88E6390_MISC_TEST_SAMPLE_ENABLE	0
-#define MII_88E6390_MISC_TEST_SAMPLE_MASK	(0x3 << 14)
+#define MII_88E6390_MISC_TEST_TEMP_SENSOR_ENABLE_SAMPLE_1S	(0x0 << 14)
+#define MII_88E6390_MISC_TEST_TEMP_SENSOR_ENABLE		(0x1 << 14)
+#define MII_88E6390_MISC_TEST_TEMP_SENSOR_ENABLE_ONESHOT	(0x2 << 14)
+#define MII_88E6390_MISC_TEST_TEMP_SENSOR_DISABLE		(0x3 << 14)
+#define MII_88E6390_MISC_TEST_TEMP_SENSOR_MASK			(0x3 << 14)
 
 #define MII_88E6390_TEMP_SENSOR		0x1c
 #define MII_88E6390_TEMP_SENSOR_MASK	0xff
@@ -2352,9 +2352,8 @@ static int m88e6390_get_temp(struct phy_device *phydev, long *temp)
 	if (ret < 0)
 		goto error;
 
-	ret = ret & ~MII_88E6390_MISC_TEST_SAMPLE_MASK;
-	ret |= MII_88E6390_MISC_TEST_SAMPLE_ENABLE |
-		MII_88E6390_MISC_TEST_SAMPLE_1S;
+	ret = ret & ~MII_88E6390_MISC_TEST_TEMP_SENSOR_MASK;
+	ret |= MII_88E6390_MISC_TEST_TEMP_SENSOR_ENABLE_SAMPLE_1S;
 
 	ret = __phy_write(phydev, MII_88E6390_MISC_TEST, ret);
 	if (ret < 0)
@@ -2381,8 +2380,8 @@ static int m88e6390_get_temp(struct phy_device *phydev, long *temp)
 	if (ret < 0)
 		goto error;
 
-	ret = ret & ~MII_88E6390_MISC_TEST_SAMPLE_MASK;
-	ret |= MII_88E6390_MISC_TEST_SAMPLE_DISABLE;
+	ret = ret & ~MII_88E6390_MISC_TEST_TEMP_SENSOR_MASK;
+	ret |= MII_88E6390_MISC_TEST_TEMP_SENSOR_DISABLE;
 
 	ret = __phy_write(phydev, MII_88E6390_MISC_TEST, ret);
 
-- 
cgit v1.2.3


From 002181735184a2660fd081abea2ac560896f874b Mon Sep 17 00:00:00 2001
From: Marek Behún <kabel@kernel.org>
Date: Tue, 20 Apr 2021 09:54:01 +0200
Subject: net: phy: marvell: use assignment by bitwise AND operator
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use the &= operator instead of
  ret = ret & ...

Signed-off-by: Marek Behún <kabel@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 9529aaa3bed3..e505060d0743 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -2352,7 +2352,7 @@ static int m88e6390_get_temp(struct phy_device *phydev, long *temp)
 	if (ret < 0)
 		goto error;
 
-	ret = ret & ~MII_88E6390_MISC_TEST_TEMP_SENSOR_MASK;
+	ret &= ~MII_88E6390_MISC_TEST_TEMP_SENSOR_MASK;
 	ret |= MII_88E6390_MISC_TEST_TEMP_SENSOR_ENABLE_SAMPLE_1S;
 
 	ret = __phy_write(phydev, MII_88E6390_MISC_TEST, ret);
-- 
cgit v1.2.3


From c5d015b0e09700bfd8ec120cc8ebe25f6fc5c32e Mon Sep 17 00:00:00 2001
From: Marek Behún <kabel@kernel.org>
Date: Tue, 20 Apr 2021 09:54:02 +0200
Subject: net: dsa: mv88e6xxx: simulate Amethyst PHY model number
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Amethyst internal PHYs also report empty model number in MII_PHYSID2.

Fill in switch product number, as is done for Topaz and Peridot.

Signed-off-by: Marek Behún <kabel@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 860dca41cf4b..9c4f8517c34b 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3165,6 +3165,7 @@ out_resources:
 static const u16 family_prod_id_table[] = {
 	[MV88E6XXX_FAMILY_6341] = MV88E6XXX_PORT_SWITCH_ID_PROD_6341,
 	[MV88E6XXX_FAMILY_6390] = MV88E6XXX_PORT_SWITCH_ID_PROD_6390,
+	[MV88E6XXX_FAMILY_6393] = MV88E6XXX_PORT_SWITCH_ID_PROD_6393X,
 };
 
 static int mv88e6xxx_mdio_read(struct mii_bus *bus, int phy, int reg)
-- 
cgit v1.2.3


From a978f7c479ea68d68a6267a37cbd44362bdd9811 Mon Sep 17 00:00:00 2001
From: Marek Behún <kabel@kernel.org>
Date: Tue, 20 Apr 2021 09:54:03 +0200
Subject: net: phy: marvell: add support for Amethyst internal PHY
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support for Amethyst internal PHY.

The only difference from Peridot is HWMON.

Signed-off-by: Marek Behún <kabel@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell.c   | 117 ++++++++++++++++++++++++++++++++++++++++++--
 include/linux/marvell_phy.h |   1 +
 2 files changed, 115 insertions(+), 3 deletions(-)

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index e505060d0743..1cce86b280af 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -118,10 +118,21 @@
 #define MII_88E6390_MISC_TEST_TEMP_SENSOR_ENABLE_ONESHOT	(0x2 << 14)
 #define MII_88E6390_MISC_TEST_TEMP_SENSOR_DISABLE		(0x3 << 14)
 #define MII_88E6390_MISC_TEST_TEMP_SENSOR_MASK			(0x3 << 14)
+#define MII_88E6393_MISC_TEST_SAMPLES_2048	(0x0 << 11)
+#define MII_88E6393_MISC_TEST_SAMPLES_4096	(0x1 << 11)
+#define MII_88E6393_MISC_TEST_SAMPLES_8192	(0x2 << 11)
+#define MII_88E6393_MISC_TEST_SAMPLES_16384	(0x3 << 11)
+#define MII_88E6393_MISC_TEST_SAMPLES_MASK	(0x3 << 11)
+#define MII_88E6393_MISC_TEST_RATE_2_3MS	(0x5 << 8)
+#define MII_88E6393_MISC_TEST_RATE_6_4MS	(0x6 << 8)
+#define MII_88E6393_MISC_TEST_RATE_11_9MS	(0x7 << 8)
+#define MII_88E6393_MISC_TEST_RATE_MASK		(0x7 << 8)
 
 #define MII_88E6390_TEMP_SENSOR		0x1c
-#define MII_88E6390_TEMP_SENSOR_MASK	0xff
-#define MII_88E6390_TEMP_SENSOR_SAMPLES 10
+#define MII_88E6393_TEMP_SENSOR_THRESHOLD_MASK	0xff00
+#define MII_88E6393_TEMP_SENSOR_THRESHOLD_SHIFT	8
+#define MII_88E6390_TEMP_SENSOR_MASK		0xff
+#define MII_88E6390_TEMP_SENSOR_SAMPLES		10
 
 #define MII_88E1318S_PHY_MSCR1_REG	16
 #define MII_88E1318S_PHY_MSCR1_PAD_ODD	BIT(6)
@@ -2217,6 +2228,7 @@ static int marvell_vct7_cable_test_get_status(struct phy_device *phydev,
 
 #ifdef CONFIG_HWMON
 struct marvell_hwmon_ops {
+	int (*config)(struct phy_device *phydev);
 	int (*get_temp)(struct phy_device *phydev, long *temp);
 	int (*get_temp_critical)(struct phy_device *phydev, long *temp);
 	int (*set_temp_critical)(struct phy_device *phydev, long temp);
@@ -2391,6 +2403,65 @@ error:
 	return ret;
 }
 
+static int m88e6393_get_temp(struct phy_device *phydev, long *temp)
+{
+	int err;
+
+	err = m88e1510_get_temp(phydev, temp);
+
+	/* 88E1510 measures T + 25, while the PHY on 88E6393X switch
+	 * T + 75, so we have to subtract another 50
+	 */
+	*temp -= 50000;
+
+	return err;
+}
+
+static int m88e6393_get_temp_critical(struct phy_device *phydev, long *temp)
+{
+	int ret;
+
+	*temp = 0;
+
+	ret = phy_read_paged(phydev, MII_MARVELL_MISC_TEST_PAGE,
+			     MII_88E6390_TEMP_SENSOR);
+	if (ret < 0)
+		return ret;
+
+	*temp = (((ret & MII_88E6393_TEMP_SENSOR_THRESHOLD_MASK) >>
+		  MII_88E6393_TEMP_SENSOR_THRESHOLD_SHIFT) - 75) * 1000;
+
+	return 0;
+}
+
+static int m88e6393_set_temp_critical(struct phy_device *phydev, long temp)
+{
+	temp = (temp / 1000) + 75;
+
+	return phy_modify_paged(phydev, MII_MARVELL_MISC_TEST_PAGE,
+				MII_88E6390_TEMP_SENSOR,
+				MII_88E6393_TEMP_SENSOR_THRESHOLD_MASK,
+				temp << MII_88E6393_TEMP_SENSOR_THRESHOLD_SHIFT);
+}
+
+static int m88e6393_hwmon_config(struct phy_device *phydev)
+{
+	int err;
+
+	err = m88e6393_set_temp_critical(phydev, 100000);
+	if (err)
+		return err;
+
+	return phy_modify_paged(phydev, MII_MARVELL_MISC_TEST_PAGE,
+				MII_88E6390_MISC_TEST,
+				MII_88E6390_MISC_TEST_TEMP_SENSOR_MASK |
+				MII_88E6393_MISC_TEST_SAMPLES_MASK |
+				MII_88E6393_MISC_TEST_RATE_MASK,
+				MII_88E6390_MISC_TEST_TEMP_SENSOR_ENABLE |
+				MII_88E6393_MISC_TEST_SAMPLES_2048 |
+				MII_88E6393_MISC_TEST_RATE_2_3MS);
+}
+
 static int marvell_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
 			      u32 attr, int channel, long *temp)
 {
@@ -2535,8 +2606,13 @@ static int marvell_hwmon_probe(struct phy_device *phydev)
 
 	priv->hwmon_dev = devm_hwmon_device_register_with_info(
 		dev, priv->hwmon_name, phydev, &marvell_hwmon_chip_info, NULL);
+	if (IS_ERR(priv->hwmon_dev))
+		return PTR_ERR(priv->hwmon_dev);
 
-	return PTR_ERR_OR_ZERO(priv->hwmon_dev);
+	if (ops->config)
+		err = ops->config(phydev);
+
+	return err;
 }
 
 static const struct marvell_hwmon_ops m88e1121_hwmon_ops = {
@@ -2554,6 +2630,14 @@ static const struct marvell_hwmon_ops m88e6390_hwmon_ops = {
 	.get_temp = m88e6390_get_temp,
 };
 
+static const struct marvell_hwmon_ops m88e6393_hwmon_ops = {
+	.config = m88e6393_hwmon_config,
+	.get_temp = m88e6393_get_temp,
+	.get_temp_critical = m88e6393_get_temp_critical,
+	.set_temp_critical = m88e6393_set_temp_critical,
+	.get_temp_alarm = m88e1510_get_temp_alarm,
+};
+
 #define DEF_MARVELL_HWMON_OPS(s) (&(s))
 
 #else
@@ -2948,6 +3032,32 @@ static struct phy_driver marvell_drivers[] = {
 		.cable_test_tdr_start = marvell_vct5_cable_test_tdr_start,
 		.cable_test_get_status = marvell_vct7_cable_test_get_status,
 	},
+	{
+		.phy_id = MARVELL_PHY_ID_88E6393_FAMILY,
+		.phy_id_mask = MARVELL_PHY_ID_MASK,
+		.name = "Marvell 88E6393 Family",
+		.driver_data = DEF_MARVELL_HWMON_OPS(m88e6393_hwmon_ops),
+		/* PHY_GBIT_FEATURES */
+		.flags = PHY_POLL_CABLE_TEST,
+		.probe = marvell_probe,
+		.config_init = marvell_config_init,
+		.config_aneg = m88e1510_config_aneg,
+		.read_status = marvell_read_status,
+		.config_intr = marvell_config_intr,
+		.handle_interrupt = marvell_handle_interrupt,
+		.resume = genphy_resume,
+		.suspend = genphy_suspend,
+		.read_page = marvell_read_page,
+		.write_page = marvell_write_page,
+		.get_sset_count = marvell_get_sset_count,
+		.get_strings = marvell_get_strings,
+		.get_stats = marvell_get_stats,
+		.get_tunable = m88e1540_get_tunable,
+		.set_tunable = m88e1540_set_tunable,
+		.cable_test_start = marvell_vct7_cable_test_start,
+		.cable_test_tdr_start = marvell_vct5_cable_test_tdr_start,
+		.cable_test_get_status = marvell_vct7_cable_test_get_status,
+	},
 	{
 		.phy_id = MARVELL_PHY_ID_88E1340S,
 		.phy_id_mask = MARVELL_PHY_ID_MASK,
@@ -3014,6 +3124,7 @@ static struct mdio_device_id __maybe_unused marvell_tbl[] = {
 	{ MARVELL_PHY_ID_88E3016, MARVELL_PHY_ID_MASK },
 	{ MARVELL_PHY_ID_88E6341_FAMILY, MARVELL_PHY_ID_MASK },
 	{ MARVELL_PHY_ID_88E6390_FAMILY, MARVELL_PHY_ID_MASK },
+	{ MARVELL_PHY_ID_88E6393_FAMILY, MARVELL_PHY_ID_MASK },
 	{ MARVELL_PHY_ID_88E1340S, MARVELL_PHY_ID_MASK },
 	{ MARVELL_PHY_ID_88E1548P, MARVELL_PHY_ID_MASK },
 	{ }
diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
index f61d82c53f30..acee44b9db26 100644
--- a/include/linux/marvell_phy.h
+++ b/include/linux/marvell_phy.h
@@ -39,6 +39,7 @@
  */
 #define MARVELL_PHY_ID_88E6341_FAMILY	0x01410f41
 #define MARVELL_PHY_ID_88E6390_FAMILY	0x01410f90
+#define MARVELL_PHY_ID_88E6393_FAMILY	0x002b0b9b
 
 #define MARVELL_PHY_FAMILY_ID(id)	((id) >> 4)
 
-- 
cgit v1.2.3


From 790aad0eccd206643f134bcf724b7078a04f63e0 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 20 Apr 2021 16:40:08 -0700
Subject: korina: Fix build.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/korina.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index 4613bc21130b..6f987a7ffcb3 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -1,4 +1,4 @@
->/*
+/*
  *  Driver for the IDT RC32434 (Korina) on-chip ethernet controller.
  *
  *  Copyright 2004 IDT Inc. (rischelp@idt.com)
-- 
cgit v1.2.3


From 17c0e6d1757f01ce54c7dee19396053d721ca006 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Tue, 20 Apr 2021 16:53:39 +0200
Subject: mlxsw: spectrum_qdisc: Drop one argument from check_params callback

The mlxsw_sp_qdisc argument is not used in any of the actual callbacks.
Drop it.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index baf17c0b2702..644ffc021abe 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -29,7 +29,6 @@ struct mlxsw_sp_qdisc;
 struct mlxsw_sp_qdisc_ops {
 	enum mlxsw_sp_qdisc_type type;
 	int (*check_params)(struct mlxsw_sp_port *mlxsw_sp_port,
-			    struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
 			    void *params);
 	int (*replace)(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
 		       struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params);
@@ -198,7 +197,7 @@ mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
 			goto err_hdroom_configure;
 	}
 
-	err = ops->check_params(mlxsw_sp_port, mlxsw_sp_qdisc, params);
+	err = ops->check_params(mlxsw_sp_port, params);
 	if (err)
 		goto err_bad_param;
 
@@ -434,7 +433,6 @@ mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 
 static int
 mlxsw_sp_qdisc_red_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
-				struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
 				void *params)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
@@ -678,7 +676,6 @@ mlxsw_sp_qdisc_tbf_rate_kbps(struct tc_tbf_qopt_offload_replace_params *p)
 
 static int
 mlxsw_sp_qdisc_tbf_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
-				struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
 				void *params)
 {
 	struct tc_tbf_qopt_offload_replace_params *p = params;
@@ -813,7 +810,6 @@ mlxsw_sp_qdisc_fifo_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 
 static int
 mlxsw_sp_qdisc_fifo_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
-				 struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
 				 void *params)
 {
 	return 0;
@@ -948,7 +944,6 @@ __mlxsw_sp_qdisc_ets_check_params(unsigned int nbands)
 
 static int
 mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
-				 struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
 				 void *params)
 {
 	struct tc_prio_qopt_offload_params *p = params;
@@ -1124,7 +1119,6 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = {
 
 static int
 mlxsw_sp_qdisc_ets_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
-				struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
 				void *params)
 {
 	struct tc_ets_qopt_offload_replace_params *p = params;
-- 
cgit v1.2.3


From 290fe2c595fbf9b9099a93528d6f85d3d7d21fa4 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Tue, 20 Apr 2021 16:53:40 +0200
Subject: mlxsw: spectrum_qdisc: Simplify mlxsw_sp_qdisc_compare()

The purpose of this function is to filter out events that are related to
qdiscs that are not offloaded, or are not offloaded anymore. But the
function is unnecessarily thorough:

- mlxsw_sp_qdisc pointer is never NULL in the context where it is called
- Two qdiscs with the same handle will never have different types. Even
  when replacing one qdisc with another in the same class, Linux will not
  permit handle reuse unless the qdisc type also matches.

Simplify the function by omitting these two unnecessary conditions.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_qdisc.c   | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index 644ffc021abe..013398ecd15b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -87,12 +87,9 @@ struct mlxsw_sp_qdisc_state {
 };
 
 static bool
-mlxsw_sp_qdisc_compare(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u32 handle,
-		       enum mlxsw_sp_qdisc_type type)
+mlxsw_sp_qdisc_compare(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u32 handle)
 {
-	return mlxsw_sp_qdisc && mlxsw_sp_qdisc->ops &&
-	       mlxsw_sp_qdisc->ops->type == type &&
-	       mlxsw_sp_qdisc->handle == handle;
+	return mlxsw_sp_qdisc->ops && mlxsw_sp_qdisc->handle == handle;
 }
 
 static struct mlxsw_sp_qdisc *
@@ -579,8 +576,7 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
 					      &mlxsw_sp_qdisc_ops_red,
 					      &p->set);
 
-	if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle,
-				    MLXSW_SP_QDISC_RED))
+	if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle))
 		return -EOPNOTSUPP;
 
 	switch (p->command) {
@@ -780,8 +776,7 @@ int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port,
 					      &mlxsw_sp_qdisc_ops_tbf,
 					      &p->replace_params);
 
-	if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle,
-				    MLXSW_SP_QDISC_TBF))
+	if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle))
 		return -EOPNOTSUPP;
 
 	switch (p->command) {
@@ -886,8 +881,7 @@ int mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port,
 					      &mlxsw_sp_qdisc_ops_fifo, NULL);
 	}
 
-	if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle,
-				    MLXSW_SP_QDISC_FIFO))
+	if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle))
 		return -EOPNOTSUPP;
 
 	switch (p->command) {
@@ -1247,8 +1241,7 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
 					      &mlxsw_sp_qdisc_ops_prio,
 					      &p->replace_params);
 
-	if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle,
-				    MLXSW_SP_QDISC_PRIO))
+	if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle))
 		return -EOPNOTSUPP;
 
 	switch (p->command) {
@@ -1280,8 +1273,7 @@ int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port,
 					      &mlxsw_sp_qdisc_ops_ets,
 					      &p->replace_params);
 
-	if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle,
-				    MLXSW_SP_QDISC_ETS))
+	if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle))
 		return -EOPNOTSUPP;
 
 	switch (p->command) {
-- 
cgit v1.2.3


From 549f2aae84ddf574dda36b0bf7e3b4c0872c6675 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Tue, 20 Apr 2021 16:53:41 +0200
Subject: mlxsw: spectrum_qdisc: Drop an always-true condition

The function mlxsw_sp_qdisc_compare() is invoked a couple lines above this
check, which will bounce any requests where this condition does not hold.
Therefore drop it.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index 013398ecd15b..f1d32bfc4bed 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -886,10 +886,7 @@ int mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port,
 
 	switch (p->command) {
 	case TC_FIFO_DESTROY:
-		if (p->handle == mlxsw_sp_qdisc->handle)
-			return mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
-						      mlxsw_sp_qdisc);
-		return 0;
+		return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
 	case TC_FIFO_STATS:
 		return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
 						&p->stats);
-- 
cgit v1.2.3


From 017a131cdec6d3dd375d353fd25293a21896346d Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Tue, 20 Apr 2021 16:53:42 +0200
Subject: mlxsw: spectrum_qdisc: Track tclass_num as int, not u8

tclass_num is just a number, a value that would be ordinarily passed around
as an int. (Which is unlike a u8 prio_bitmap.) In several places,
tclass_num already is an int. Convert the remaining instances.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index f1d32bfc4bed..da1f6314df60 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -51,7 +51,7 @@ struct mlxsw_sp_qdisc_ops {
 
 struct mlxsw_sp_qdisc {
 	u32 handle;
-	u8 tclass_num;
+	int tclass_num;
 	u8 prio_bitmap;
 	union {
 		struct red_stats red;
@@ -291,7 +291,7 @@ mlxsw_sp_qdisc_collect_tc_stats(struct mlxsw_sp_port *mlxsw_sp_port,
 				u64 *p_tx_bytes, u64 *p_tx_packets,
 				u64 *p_drops, u64 *p_backlog)
 {
-	u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
+	int tclass_num = mlxsw_sp_qdisc->tclass_num;
 	struct mlxsw_sp_port_xstats *xstats;
 	u64 tx_bytes, tx_packets;
 
@@ -391,7 +391,7 @@ static void
 mlxsw_sp_setup_tc_qdisc_red_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
 					struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
 {
-	u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
+	int tclass_num = mlxsw_sp_qdisc->tclass_num;
 	struct mlxsw_sp_qdisc_stats *stats_base;
 	struct mlxsw_sp_port_xstats *xstats;
 	struct red_stats *red_base;
@@ -462,7 +462,7 @@ mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	struct tc_red_qopt_offload_params *p = params;
-	u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
+	int tclass_num = mlxsw_sp_qdisc->tclass_num;
 	u32 min, max;
 	u64 prob;
 
@@ -507,7 +507,7 @@ mlxsw_sp_qdisc_get_red_xstats(struct mlxsw_sp_port *mlxsw_sp_port,
 			      void *xstats_ptr)
 {
 	struct red_stats *xstats_base = &mlxsw_sp_qdisc->xstats_base.red;
-	u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
+	int tclass_num = mlxsw_sp_qdisc->tclass_num;
 	struct mlxsw_sp_port_xstats *xstats;
 	struct red_stats *res = xstats_ptr;
 	int early_drops, pdrops;
@@ -531,7 +531,7 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port,
 			     struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
 			     struct tc_qopt_offload_stats *stats_ptr)
 {
-	u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
+	int tclass_num = mlxsw_sp_qdisc->tclass_num;
 	struct mlxsw_sp_qdisc_stats *stats_base;
 	struct mlxsw_sp_port_xstats *xstats;
 	u64 overlimits;
-- 
cgit v1.2.3


From b21832b56807aa513efcb3b06c5e3e5550d28de1 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Tue, 20 Apr 2021 16:53:43 +0200
Subject: mlxsw: spectrum_qdisc: Promote backlog reduction to
 mlxsw_sp_qdisc_destroy()

When a qdisc is removed, it is necessary to update the backlog value at its
parent--unless the qdisc is at root position. RED, TBF and FIFO all do
that, each separately. Since all of them need to do this, just promote the
operation directly to mlxsw_sp_qdisc_destroy(), instead of deferring it to
individual destructors. Since FIFO dtor thus becomes trivial, remove it.

Add struct mlxsw_sp_qdisc.parent to point at the parent qdisc. This will be
handy later as deeper structures are offloaded. Use the parent qdisc to
find the chain of parents whose backlog value needs to be updated.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_qdisc.c   | 48 ++++++++--------------
 1 file changed, 18 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index da1f6314df60..a8a7e9c88a4d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -65,6 +65,7 @@ struct mlxsw_sp_qdisc {
 	} stats_base;
 
 	struct mlxsw_sp_qdisc_ops *ops;
+	struct mlxsw_sp_qdisc *parent;
 };
 
 struct mlxsw_sp_qdisc_state {
@@ -132,6 +133,15 @@ mlxsw_sp_qdisc_find_by_handle(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle)
 	return NULL;
 }
 
+static void
+mlxsw_sp_qdisc_reduce_parent_backlog(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
+{
+	struct mlxsw_sp_qdisc *tmp;
+
+	for (tmp = mlxsw_sp_qdisc->parent; tmp; tmp = tmp->parent)
+		tmp->stats_base.backlog -= mlxsw_sp_qdisc->stats_base.backlog;
+}
+
 static int
 mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 		       struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
@@ -153,7 +163,11 @@ mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 		err_hdroom = mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom);
 	}
 
-	if (mlxsw_sp_qdisc->ops && mlxsw_sp_qdisc->ops->destroy)
+	if (!mlxsw_sp_qdisc->ops)
+		return 0;
+
+	mlxsw_sp_qdisc_reduce_parent_backlog(mlxsw_sp_qdisc);
+	if (mlxsw_sp_qdisc->ops->destroy)
 		err = mlxsw_sp_qdisc->ops->destroy(mlxsw_sp_port,
 						   mlxsw_sp_qdisc);
 
@@ -417,13 +431,6 @@ static int
 mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 			   struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
 {
-	struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc;
-	struct mlxsw_sp_qdisc *root_qdisc = &qdisc_state->root_qdisc;
-
-	if (root_qdisc != mlxsw_sp_qdisc)
-		root_qdisc->stats_base.backlog -=
-					mlxsw_sp_qdisc->stats_base.backlog;
-
 	return mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port,
 						  mlxsw_sp_qdisc->tclass_num);
 }
@@ -616,13 +623,6 @@ static int
 mlxsw_sp_qdisc_tbf_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 			   struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
 {
-	struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc;
-	struct mlxsw_sp_qdisc *root_qdisc = &qdisc_state->root_qdisc;
-
-	if (root_qdisc != mlxsw_sp_qdisc)
-		root_qdisc->stats_base.backlog -=
-					mlxsw_sp_qdisc->stats_base.backlog;
-
 	return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
 					     MLXSW_REG_QEEC_HR_SUBGROUP,
 					     mlxsw_sp_qdisc->tclass_num, 0,
@@ -790,19 +790,6 @@ int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port,
 	}
 }
 
-static int
-mlxsw_sp_qdisc_fifo_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
-			    struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
-{
-	struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc;
-	struct mlxsw_sp_qdisc *root_qdisc = &qdisc_state->root_qdisc;
-
-	if (root_qdisc != mlxsw_sp_qdisc)
-		root_qdisc->stats_base.backlog -=
-					mlxsw_sp_qdisc->stats_base.backlog;
-	return 0;
-}
-
 static int
 mlxsw_sp_qdisc_fifo_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
 				 void *params)
@@ -832,7 +819,6 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_fifo = {
 	.type = MLXSW_SP_QDISC_FIFO,
 	.check_params = mlxsw_sp_qdisc_fifo_check_params,
 	.replace = mlxsw_sp_qdisc_fifo_replace,
-	.destroy = mlxsw_sp_qdisc_fifo_destroy,
 	.get_stats = mlxsw_sp_qdisc_get_fifo_stats,
 	.clean_stats = mlxsw_sp_setup_tc_qdisc_leaf_clean_stats,
 };
@@ -1825,8 +1811,10 @@ int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port)
 
 	qdisc_state->root_qdisc.prio_bitmap = 0xff;
 	qdisc_state->root_qdisc.tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS;
-	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
 		qdisc_state->tclass_qdiscs[i].tclass_num = i;
+		qdisc_state->tclass_qdiscs[i].parent = &qdisc_state->root_qdisc;
+	}
 
 	mlxsw_sp_port->qdisc = qdisc_state;
 	return 0;
-- 
cgit v1.2.3


From 51d52ed955509d34d1a57c50efdce1300047f865 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Tue, 20 Apr 2021 16:53:44 +0200
Subject: mlxsw: spectrum_qdisc: Track children per qdisc

mlxsw currently allows a two-level structure of qdiscs: the root and
possibly a number of children. In order to support offloading more general
qdisc trees, introduce to struct mlxsw_sp_qdisc a pointer to child qdiscs.
Refer to the child qdiscs through this pointer, instead of going through
the tclass_qdiscs in qdisc_state. Additionally introduce a field
num_classes, which holds number of given qdisc's children.

Also introduce a generic function for walking qdisc trees. Rewrite
mlxsw_sp_qdisc_find() and _find_by_handle() to use the generic walker.

For now, keep the qdisc_state.tclass_qdisc, and just point root_qdiscs's
children to this array. Following patches will make the allocation dynamic.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_qdisc.c   | 164 +++++++++++++++------
 1 file changed, 118 insertions(+), 46 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index a8a7e9c88a4d..f42ea958919b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -47,6 +47,8 @@ struct mlxsw_sp_qdisc_ops {
 	 */
 	void (*unoffload)(struct mlxsw_sp_port *mlxsw_sp_port,
 			  struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params);
+	struct mlxsw_sp_qdisc *(*find_class)(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+					     u32 parent);
 };
 
 struct mlxsw_sp_qdisc {
@@ -66,6 +68,8 @@ struct mlxsw_sp_qdisc {
 
 	struct mlxsw_sp_qdisc_ops *ops;
 	struct mlxsw_sp_qdisc *parent;
+	struct mlxsw_sp_qdisc *qdiscs;
+	unsigned int num_classes;
 };
 
 struct mlxsw_sp_qdisc_state {
@@ -93,44 +97,84 @@ mlxsw_sp_qdisc_compare(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u32 handle)
 	return mlxsw_sp_qdisc->ops && mlxsw_sp_qdisc->handle == handle;
 }
 
+static struct mlxsw_sp_qdisc *
+mlxsw_sp_qdisc_walk(struct mlxsw_sp_qdisc *qdisc,
+		    struct mlxsw_sp_qdisc *(*pre)(struct mlxsw_sp_qdisc *,
+						  void *),
+		    void *data)
+{
+	struct mlxsw_sp_qdisc *tmp;
+	unsigned int i;
+
+	if (pre) {
+		tmp = pre(qdisc, data);
+		if (tmp)
+			return tmp;
+	}
+
+	if (qdisc->ops) {
+		for (i = 0; i < qdisc->num_classes; i++) {
+			tmp = &qdisc->qdiscs[i];
+			if (qdisc->ops) {
+				tmp = mlxsw_sp_qdisc_walk(tmp, pre, data);
+				if (tmp)
+					return tmp;
+			}
+		}
+	}
+
+	return NULL;
+}
+
+static struct mlxsw_sp_qdisc *
+mlxsw_sp_qdisc_walk_cb_find(struct mlxsw_sp_qdisc *qdisc, void *data)
+{
+	u32 parent = *(u32 *)data;
+
+	if (qdisc->ops && TC_H_MAJ(qdisc->handle) == TC_H_MAJ(parent)) {
+		if (qdisc->ops->find_class)
+			return qdisc->ops->find_class(qdisc, parent);
+	}
+
+	return NULL;
+}
+
 static struct mlxsw_sp_qdisc *
 mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent,
 		    bool root_only)
 {
 	struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc;
-	int tclass, child_index;
 
+	if (!qdisc_state)
+		return NULL;
 	if (parent == TC_H_ROOT)
 		return &qdisc_state->root_qdisc;
-
-	if (root_only || !qdisc_state ||
-	    !qdisc_state->root_qdisc.ops ||
-	    TC_H_MAJ(parent) != qdisc_state->root_qdisc.handle ||
-	    TC_H_MIN(parent) > IEEE_8021QAZ_MAX_TCS)
+	if (root_only)
 		return NULL;
+	return mlxsw_sp_qdisc_walk(&qdisc_state->root_qdisc,
+				   mlxsw_sp_qdisc_walk_cb_find, &parent);
+}
 
-	child_index = TC_H_MIN(parent);
-	tclass = MLXSW_SP_PRIO_CHILD_TO_TCLASS(child_index);
-	return &qdisc_state->tclass_qdiscs[tclass];
+static struct mlxsw_sp_qdisc *
+mlxsw_sp_qdisc_walk_cb_find_by_handle(struct mlxsw_sp_qdisc *qdisc, void *data)
+{
+	u32 handle = *(u32 *)data;
+
+	if (qdisc->ops && qdisc->handle == handle)
+		return qdisc;
+	return NULL;
 }
 
 static struct mlxsw_sp_qdisc *
 mlxsw_sp_qdisc_find_by_handle(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle)
 {
 	struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc;
-	int i;
-
-	if (qdisc_state->root_qdisc.handle == handle)
-		return &qdisc_state->root_qdisc;
 
-	if (qdisc_state->root_qdisc.handle == TC_H_UNSPEC)
+	if (!qdisc_state)
 		return NULL;
-
-	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
-		if (qdisc_state->tclass_qdiscs[i].handle == handle)
-			return &qdisc_state->tclass_qdiscs[i];
-
-	return NULL;
+	return mlxsw_sp_qdisc_walk(&qdisc_state->root_qdisc,
+				   mlxsw_sp_qdisc_walk_cb_find_by_handle,
+				   &handle);
 }
 
 static void
@@ -555,6 +599,13 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port,
 	return 0;
 }
 
+static struct mlxsw_sp_qdisc *
+mlxsw_sp_qdisc_leaf_find_class(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+			       u32 parent)
+{
+	return NULL;
+}
+
 #define MLXSW_SP_PORT_DEFAULT_TCLASS 0
 
 static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_red = {
@@ -566,6 +617,7 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_red = {
 	.get_stats = mlxsw_sp_qdisc_get_red_stats,
 	.get_xstats = mlxsw_sp_qdisc_get_red_xstats,
 	.clean_stats = mlxsw_sp_setup_tc_qdisc_red_clean_stats,
+	.find_class = mlxsw_sp_qdisc_leaf_find_class,
 };
 
 int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -759,6 +811,7 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_tbf = {
 	.destroy = mlxsw_sp_qdisc_tbf_destroy,
 	.get_stats = mlxsw_sp_qdisc_get_tbf_stats,
 	.clean_stats = mlxsw_sp_setup_tc_qdisc_leaf_clean_stats,
+	.find_class = mlxsw_sp_qdisc_leaf_find_class,
 };
 
 int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -883,21 +936,20 @@ int mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port,
 	return -EOPNOTSUPP;
 }
 
-static int
-__mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port)
+static int __mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
+					struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
 {
-	struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc;
 	int i;
 
-	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+	for (i = 0; i < mlxsw_sp_qdisc->num_classes; i++) {
 		mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i,
 					  MLXSW_SP_PORT_DEFAULT_TCLASS);
 		mlxsw_sp_port_ets_set(mlxsw_sp_port,
 				      MLXSW_REG_QEEC_HR_SUBGROUP,
 				      i, 0, false, 0);
 		mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
-				       &qdisc_state->tclass_qdiscs[i]);
-		qdisc_state->tclass_qdiscs[i].prio_bitmap = 0;
+				       &mlxsw_sp_qdisc->qdiscs[i]);
+		mlxsw_sp_qdisc->qdiscs[i].prio_bitmap = 0;
 	}
 
 	return 0;
@@ -907,7 +959,7 @@ static int
 mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 			    struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
 {
-	return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port);
+	return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
 }
 
 static int
@@ -929,8 +981,9 @@ mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
 }
 
 static int
-__mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
-			     unsigned int nbands,
+__mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port,
+			     struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+			     u32 handle, unsigned int nbands,
 			     const unsigned int *quanta,
 			     const unsigned int *weights,
 			     const u8 *priomap)
@@ -943,7 +996,7 @@ __mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
 
 	for (band = 0; band < nbands; band++) {
 		tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
-		child_qdisc = &qdisc_state->tclass_qdiscs[tclass];
+		child_qdisc = &mlxsw_sp_qdisc->qdiscs[band];
 		old_priomap = child_qdisc->prio_bitmap;
 		child_qdisc->prio_bitmap = 0;
 
@@ -985,7 +1038,7 @@ __mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
 	}
 	for (; band < IEEE_8021QAZ_MAX_TCS; band++) {
 		tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
-		child_qdisc = &qdisc_state->tclass_qdiscs[tclass];
+		child_qdisc = &mlxsw_sp_qdisc->qdiscs[band];
 		child_qdisc->prio_bitmap = 0;
 		mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc);
 		mlxsw_sp_port_ets_set(mlxsw_sp_port,
@@ -1006,8 +1059,9 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
 	struct tc_prio_qopt_offload_params *p = params;
 	unsigned int zeroes[TCQ_ETS_MAX_BANDS] = {0};
 
-	return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, handle, p->bands,
-					    zeroes, zeroes, p->priomap);
+	return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, mlxsw_sp_qdisc,
+					    handle, p->bands, zeroes,
+					    zeroes, p->priomap);
 }
 
 static void
@@ -1038,7 +1092,6 @@ mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port,
 			      struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
 			      struct tc_qopt_offload_stats *stats_ptr)
 {
-	struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc;
 	struct mlxsw_sp_qdisc *tc_qdisc;
 	u64 tx_packets = 0;
 	u64 tx_bytes = 0;
@@ -1046,8 +1099,8 @@ mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port,
 	u64 drops = 0;
 	int i;
 
-	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
-		tc_qdisc = &qdisc_state->tclass_qdiscs[i];
+	for (i = 0; i < mlxsw_sp_qdisc->num_classes; i++) {
+		tc_qdisc = &mlxsw_sp_qdisc->qdiscs[i];
 		mlxsw_sp_qdisc_collect_tc_stats(mlxsw_sp_port, tc_qdisc,
 						&tx_bytes, &tx_packets,
 						&drops, &backlog);
@@ -1084,6 +1137,18 @@ mlxsw_sp_setup_tc_qdisc_prio_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
 	mlxsw_sp_qdisc->stats_base.backlog = 0;
 }
 
+static struct mlxsw_sp_qdisc *
+mlxsw_sp_qdisc_prio_find_class(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+			       u32 parent)
+{
+	int child_index = TC_H_MIN(parent);
+	int band = child_index - 1;
+
+	if (band < 0 || band >= mlxsw_sp_qdisc->num_classes)
+		return NULL;
+	return &mlxsw_sp_qdisc->qdiscs[band];
+}
+
 static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = {
 	.type = MLXSW_SP_QDISC_PRIO,
 	.check_params = mlxsw_sp_qdisc_prio_check_params,
@@ -1092,6 +1157,7 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = {
 	.destroy = mlxsw_sp_qdisc_prio_destroy,
 	.get_stats = mlxsw_sp_qdisc_get_prio_stats,
 	.clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats,
+	.find_class = mlxsw_sp_qdisc_prio_find_class,
 };
 
 static int
@@ -1110,8 +1176,9 @@ mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
 {
 	struct tc_ets_qopt_offload_replace_params *p = params;
 
-	return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, handle, p->bands,
-					    p->quanta, p->weights, p->priomap);
+	return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, mlxsw_sp_qdisc,
+					    handle, p->bands, p->quanta,
+					    p->weights, p->priomap);
 }
 
 static void
@@ -1129,7 +1196,7 @@ static int
 mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 			   struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
 {
-	return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port);
+	return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
 }
 
 static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_ets = {
@@ -1140,6 +1207,7 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_ets = {
 	.destroy = mlxsw_sp_qdisc_ets_destroy,
 	.get_stats = mlxsw_sp_qdisc_get_prio_stats,
 	.clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats,
+	.find_class = mlxsw_sp_qdisc_prio_find_class,
 };
 
 /* Linux allows linking of Qdiscs to arbitrary classes (so long as the resulting
@@ -1172,12 +1240,10 @@ __mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port,
 			   struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
 			   u8 band, u32 child_handle)
 {
-	struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc;
-	int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
 	struct mlxsw_sp_qdisc *old_qdisc;
 
-	if (band < IEEE_8021QAZ_MAX_TCS &&
-	    qdisc_state->tclass_qdiscs[tclass_num].handle == child_handle)
+	if (band < mlxsw_sp_qdisc->num_classes &&
+	    mlxsw_sp_qdisc->qdiscs[band].handle == child_handle)
 		return 0;
 
 	if (!child_handle) {
@@ -1195,8 +1261,10 @@ __mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port,
 	if (old_qdisc)
 		mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc);
 
-	mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
-			       &qdisc_state->tclass_qdiscs[tclass_num]);
+	mlxsw_sp_qdisc = mlxsw_sp_qdisc->ops->find_class(mlxsw_sp_qdisc, band);
+	if (!WARN_ON(!mlxsw_sp_qdisc))
+		mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
+
 	return -EOPNOTSUPP;
 }
 
@@ -1811,8 +1879,12 @@ int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port)
 
 	qdisc_state->root_qdisc.prio_bitmap = 0xff;
 	qdisc_state->root_qdisc.tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS;
+	qdisc_state->root_qdisc.qdiscs = qdisc_state->tclass_qdiscs;
+	qdisc_state->root_qdisc.num_classes = IEEE_8021QAZ_MAX_TCS;
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
-		qdisc_state->tclass_qdiscs[i].tclass_num = i;
+		int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(i);
+
+		qdisc_state->tclass_qdiscs[i].tclass_num = tclass_num;
 		qdisc_state->tclass_qdiscs[i].parent = &qdisc_state->root_qdisc;
 	}
 
-- 
cgit v1.2.3


From cff99e204553c500459712b523d06002a7aae18e Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Tue, 20 Apr 2021 16:53:45 +0200
Subject: mlxsw: spectrum_qdisc: Guard all qdisc accesses with a lock

The FIFO handler currently guards accesses to the future FIFO tracking by
asserting RTNL. In the future, the changes to the qdisc state will be more
thorough, so other qdiscs will need this guarding is as well. In order
to not further the RTNL infestation, instead convert to a custom lock that
will guard accesses to the qdisc state.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_qdisc.c   | 89 ++++++++++++++++++----
 1 file changed, 73 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index f42ea958919b..9e7f1a0188e8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -89,6 +89,7 @@ struct mlxsw_sp_qdisc_state {
 	 */
 	u32 future_handle;
 	bool future_fifos[IEEE_8021QAZ_MAX_TCS];
+	struct mutex lock; /* Protects qdisc state. */
 };
 
 static bool
@@ -620,8 +621,8 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_red = {
 	.find_class = mlxsw_sp_qdisc_leaf_find_class,
 };
 
-int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
-			  struct tc_red_qopt_offload *p)
+static int __mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
+				   struct tc_red_qopt_offload *p)
 {
 	struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
 
@@ -652,6 +653,18 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
 	}
 }
 
+int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
+			  struct tc_red_qopt_offload *p)
+{
+	int err;
+
+	mutex_lock(&mlxsw_sp_port->qdisc->lock);
+	err = __mlxsw_sp_setup_tc_red(mlxsw_sp_port, p);
+	mutex_unlock(&mlxsw_sp_port->qdisc->lock);
+
+	return err;
+}
+
 static void
 mlxsw_sp_setup_tc_qdisc_leaf_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
 					 struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
@@ -814,8 +827,8 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_tbf = {
 	.find_class = mlxsw_sp_qdisc_leaf_find_class,
 };
 
-int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port,
-			  struct tc_tbf_qopt_offload *p)
+static int __mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port,
+				   struct tc_tbf_qopt_offload *p)
 {
 	struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
 
@@ -843,6 +856,18 @@ int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port,
 	}
 }
 
+int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port,
+			  struct tc_tbf_qopt_offload *p)
+{
+	int err;
+
+	mutex_lock(&mlxsw_sp_port->qdisc->lock);
+	err = __mlxsw_sp_setup_tc_tbf(mlxsw_sp_port, p);
+	mutex_unlock(&mlxsw_sp_port->qdisc->lock);
+
+	return err;
+}
+
 static int
 mlxsw_sp_qdisc_fifo_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
 				 void *params)
@@ -876,20 +901,14 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_fifo = {
 	.clean_stats = mlxsw_sp_setup_tc_qdisc_leaf_clean_stats,
 };
 
-int mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port,
-			   struct tc_fifo_qopt_offload *p)
+static int __mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port,
+				    struct tc_fifo_qopt_offload *p)
 {
 	struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc;
 	struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
 	int tclass, child_index;
 	u32 parent_handle;
 
-	/* Invisible FIFOs are tracked in future_handle and future_fifos. Make
-	 * sure that not more than one qdisc is created for a port at a time.
-	 * RTNL is a simple proxy for that.
-	 */
-	ASSERT_RTNL();
-
 	mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false);
 	if (!mlxsw_sp_qdisc && p->handle == TC_H_UNSPEC) {
 		parent_handle = TC_H_MAJ(p->parent);
@@ -936,6 +955,18 @@ int mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port,
 	return -EOPNOTSUPP;
 }
 
+int mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port,
+			   struct tc_fifo_qopt_offload *p)
+{
+	int err;
+
+	mutex_lock(&mlxsw_sp_port->qdisc->lock);
+	err = __mlxsw_sp_setup_tc_fifo(mlxsw_sp_port, p);
+	mutex_unlock(&mlxsw_sp_port->qdisc->lock);
+
+	return err;
+}
+
 static int __mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 					struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
 {
@@ -1277,8 +1308,8 @@ mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port,
 					  p->band, p->child_handle);
 }
 
-int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
-			   struct tc_prio_qopt_offload *p)
+static int __mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
+				    struct tc_prio_qopt_offload *p)
 {
 	struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
 
@@ -1309,8 +1340,20 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
 	}
 }
 
-int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port,
-			  struct tc_ets_qopt_offload *p)
+int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
+			   struct tc_prio_qopt_offload *p)
+{
+	int err;
+
+	mutex_lock(&mlxsw_sp_port->qdisc->lock);
+	err = __mlxsw_sp_setup_tc_prio(mlxsw_sp_port, p);
+	mutex_unlock(&mlxsw_sp_port->qdisc->lock);
+
+	return err;
+}
+
+static int __mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port,
+				   struct tc_ets_qopt_offload *p)
 {
 	struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
 
@@ -1342,6 +1385,18 @@ int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port,
 	}
 }
 
+int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port,
+			  struct tc_ets_qopt_offload *p)
+{
+	int err;
+
+	mutex_lock(&mlxsw_sp_port->qdisc->lock);
+	err = __mlxsw_sp_setup_tc_ets(mlxsw_sp_port, p);
+	mutex_unlock(&mlxsw_sp_port->qdisc->lock);
+
+	return err;
+}
+
 struct mlxsw_sp_qevent_block {
 	struct list_head binding_list;
 	struct list_head mall_entry_list;
@@ -1877,6 +1932,7 @@ int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port)
 	if (!qdisc_state)
 		return -ENOMEM;
 
+	mutex_init(&qdisc_state->lock);
 	qdisc_state->root_qdisc.prio_bitmap = 0xff;
 	qdisc_state->root_qdisc.tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS;
 	qdisc_state->root_qdisc.qdiscs = qdisc_state->tclass_qdiscs;
@@ -1894,5 +1950,6 @@ int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port)
 
 void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port)
 {
+	mutex_destroy(&mlxsw_sp_port->qdisc->lock);
 	kfree(mlxsw_sp_port->qdisc);
 }
-- 
cgit v1.2.3


From 5cbd96025330c4dacdf8c0c49203a6ef0aea21a2 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Tue, 20 Apr 2021 16:53:46 +0200
Subject: mlxsw: spectrum_qdisc: Allocate child qdiscs dynamically

Instead of keeping qdiscs in globally-preallocated arrays, introduce a
per-qdisc-kind value num_classes, and then allocate the necessary child
qdiscs (if any) based on that value. Since now dynamic allocation is
involved, mlxsw_sp_qdisc_replace() gets messy enough that it is worth it to
split it to two cases: a new qdisc allocation and a change of existing
qdisc. (Note that the change also includes what TC formally calls replace,
if the qdisc kind is the same.)

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_qdisc.c   | 115 +++++++++++++++------
 1 file changed, 83 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index 9e7f1a0188e8..03c131027fa7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -49,6 +49,7 @@ struct mlxsw_sp_qdisc_ops {
 			  struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params);
 	struct mlxsw_sp_qdisc *(*find_class)(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
 					     u32 parent);
+	unsigned int num_classes;
 };
 
 struct mlxsw_sp_qdisc {
@@ -74,7 +75,6 @@ struct mlxsw_sp_qdisc {
 
 struct mlxsw_sp_qdisc_state {
 	struct mlxsw_sp_qdisc root_qdisc;
-	struct mlxsw_sp_qdisc tclass_qdiscs[IEEE_8021QAZ_MAX_TCS];
 
 	/* When a PRIO or ETS are added, the invisible FIFOs in their bands are
 	 * created first. When notifications for these FIFOs arrive, it is not
@@ -215,29 +215,41 @@ mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 	if (mlxsw_sp_qdisc->ops->destroy)
 		err = mlxsw_sp_qdisc->ops->destroy(mlxsw_sp_port,
 						   mlxsw_sp_qdisc);
+	if (mlxsw_sp_qdisc->ops->clean_stats)
+		mlxsw_sp_qdisc->ops->clean_stats(mlxsw_sp_port, mlxsw_sp_qdisc);
 
 	mlxsw_sp_qdisc->handle = TC_H_UNSPEC;
 	mlxsw_sp_qdisc->ops = NULL;
-
+	mlxsw_sp_qdisc->num_classes = 0;
+	kfree(mlxsw_sp_qdisc->qdiscs);
+	mlxsw_sp_qdisc->qdiscs = NULL;
 	return err_hdroom ?: err;
 }
 
-static int
-mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
-		       struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
-		       struct mlxsw_sp_qdisc_ops *ops, void *params)
+static int mlxsw_sp_qdisc_create(struct mlxsw_sp_port *mlxsw_sp_port,
+				 u32 handle,
+				 struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+				 struct mlxsw_sp_qdisc_ops *ops, void *params)
 {
 	struct mlxsw_sp_qdisc *root_qdisc = &mlxsw_sp_port->qdisc->root_qdisc;
 	struct mlxsw_sp_hdroom orig_hdroom;
+	unsigned int i;
 	int err;
 
-	if (mlxsw_sp_qdisc->ops && mlxsw_sp_qdisc->ops->type != ops->type)
-		/* In case this location contained a different qdisc of the
-		 * same type we can override the old qdisc configuration.
-		 * Otherwise, we need to remove the old qdisc before setting the
-		 * new one.
-		 */
-		mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
+	err = ops->check_params(mlxsw_sp_port, params);
+	if (err)
+		return err;
+
+	if (ops->num_classes) {
+		mlxsw_sp_qdisc->qdiscs = kcalloc(ops->num_classes,
+						 sizeof(*mlxsw_sp_qdisc->qdiscs),
+						 GFP_KERNEL);
+		if (!mlxsw_sp_qdisc->qdiscs)
+			return -ENOMEM;
+
+		for (i = 0; i < ops->num_classes; i++)
+			mlxsw_sp_qdisc->qdiscs[i].parent = mlxsw_sp_qdisc;
+	}
 
 	orig_hdroom = *mlxsw_sp_port->hdroom;
 	if (root_qdisc == mlxsw_sp_qdisc) {
@@ -253,20 +265,46 @@ mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
 			goto err_hdroom_configure;
 	}
 
+	mlxsw_sp_qdisc->num_classes = ops->num_classes;
+	mlxsw_sp_qdisc->ops = ops;
+	mlxsw_sp_qdisc->handle = handle;
+	err = ops->replace(mlxsw_sp_port, handle, mlxsw_sp_qdisc, params);
+	if (err)
+		goto err_replace;
+
+	return 0;
+
+err_replace:
+	mlxsw_sp_qdisc->handle = TC_H_UNSPEC;
+	mlxsw_sp_qdisc->ops = NULL;
+	mlxsw_sp_qdisc->num_classes = 0;
+	mlxsw_sp_hdroom_configure(mlxsw_sp_port, &orig_hdroom);
+err_hdroom_configure:
+	kfree(mlxsw_sp_qdisc->qdiscs);
+	mlxsw_sp_qdisc->qdiscs = NULL;
+	return err;
+}
+
+static int
+mlxsw_sp_qdisc_change(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
+		      struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params)
+{
+	struct mlxsw_sp_qdisc_ops *ops = mlxsw_sp_qdisc->ops;
+	int err;
+
 	err = ops->check_params(mlxsw_sp_port, params);
 	if (err)
-		goto err_bad_param;
+		goto unoffload;
 
 	err = ops->replace(mlxsw_sp_port, handle, mlxsw_sp_qdisc, params);
 	if (err)
-		goto err_config;
+		goto unoffload;
 
 	/* Check if the Qdisc changed. That includes a situation where an
 	 * invisible Qdisc replaces another one, or is being added for the
 	 * first time.
 	 */
-	if (mlxsw_sp_qdisc->handle != handle || handle == TC_H_UNSPEC) {
-		mlxsw_sp_qdisc->ops = ops;
+	if (mlxsw_sp_qdisc->handle != handle) {
 		if (ops->clean_stats)
 			ops->clean_stats(mlxsw_sp_port, mlxsw_sp_qdisc);
 	}
@@ -274,17 +312,35 @@ mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
 	mlxsw_sp_qdisc->handle = handle;
 	return 0;
 
-err_bad_param:
-err_config:
-	mlxsw_sp_hdroom_configure(mlxsw_sp_port, &orig_hdroom);
-err_hdroom_configure:
-	if (mlxsw_sp_qdisc->handle == handle && ops->unoffload)
+unoffload:
+	if (ops->unoffload)
 		ops->unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, params);
 
 	mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
 	return err;
 }
 
+static int
+mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
+		       struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+		       struct mlxsw_sp_qdisc_ops *ops, void *params)
+{
+	if (mlxsw_sp_qdisc->ops && mlxsw_sp_qdisc->ops->type != ops->type)
+		/* In case this location contained a different qdisc of the
+		 * same type we can override the old qdisc configuration.
+		 * Otherwise, we need to remove the old qdisc before setting the
+		 * new one.
+		 */
+		mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
+
+	if (!mlxsw_sp_qdisc->ops)
+		return mlxsw_sp_qdisc_create(mlxsw_sp_port, handle,
+					     mlxsw_sp_qdisc, ops, params);
+	else
+		return mlxsw_sp_qdisc_change(mlxsw_sp_port, handle,
+					     mlxsw_sp_qdisc, params);
+}
+
 static int
 mlxsw_sp_qdisc_get_stats(struct mlxsw_sp_port *mlxsw_sp_port,
 			 struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
@@ -1049,6 +1105,9 @@ __mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port,
 					return err;
 			}
 		}
+
+		child_qdisc->tclass_num = tclass;
+
 		if (old_priomap != child_qdisc->prio_bitmap &&
 		    child_qdisc->ops && child_qdisc->ops->clean_stats) {
 			backlog = child_qdisc->stats_base.backlog;
@@ -1189,6 +1248,7 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = {
 	.get_stats = mlxsw_sp_qdisc_get_prio_stats,
 	.clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats,
 	.find_class = mlxsw_sp_qdisc_prio_find_class,
+	.num_classes = IEEE_8021QAZ_MAX_TCS,
 };
 
 static int
@@ -1239,6 +1299,7 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_ets = {
 	.get_stats = mlxsw_sp_qdisc_get_prio_stats,
 	.clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats,
 	.find_class = mlxsw_sp_qdisc_prio_find_class,
+	.num_classes = IEEE_8021QAZ_MAX_TCS,
 };
 
 /* Linux allows linking of Qdiscs to arbitrary classes (so long as the resulting
@@ -1926,7 +1987,6 @@ int mlxsw_sp_setup_tc_block_qevent_early_drop(struct mlxsw_sp_port *mlxsw_sp_por
 int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port)
 {
 	struct mlxsw_sp_qdisc_state *qdisc_state;
-	int i;
 
 	qdisc_state = kzalloc(sizeof(*qdisc_state), GFP_KERNEL);
 	if (!qdisc_state)
@@ -1935,15 +1995,6 @@ int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port)
 	mutex_init(&qdisc_state->lock);
 	qdisc_state->root_qdisc.prio_bitmap = 0xff;
 	qdisc_state->root_qdisc.tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS;
-	qdisc_state->root_qdisc.qdiscs = qdisc_state->tclass_qdiscs;
-	qdisc_state->root_qdisc.num_classes = IEEE_8021QAZ_MAX_TCS;
-	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
-		int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(i);
-
-		qdisc_state->tclass_qdiscs[i].tclass_num = tclass_num;
-		qdisc_state->tclass_qdiscs[i].parent = &qdisc_state->root_qdisc;
-	}
-
 	mlxsw_sp_port->qdisc = qdisc_state;
 	return 0;
 }
-- 
cgit v1.2.3


From 7de85b0431cd46db24f800a67011616ce0aa6120 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Tue, 20 Apr 2021 16:53:47 +0200
Subject: mlxsw: spectrum_qdisc: Index future FIFOs by band number

mlxsw used to hold an array of qdiscs indexed by the TC number. In the
previous patch, it was changed to allocate child qdiscs dynamically, and
they are now indexed by band number. Follow suit with the array of future
FIFOs.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index 03c131027fa7..04672eb5c7f3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -962,7 +962,7 @@ static int __mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port,
 {
 	struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc;
 	struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
-	int tclass, child_index;
+	unsigned int band;
 	u32 parent_handle;
 
 	mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false);
@@ -977,13 +977,12 @@ static int __mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port,
 			qdisc_state->future_handle = parent_handle;
 		}
 
-		child_index = TC_H_MIN(p->parent);
-		tclass = MLXSW_SP_PRIO_CHILD_TO_TCLASS(child_index);
-		if (tclass < IEEE_8021QAZ_MAX_TCS) {
+		band = TC_H_MIN(p->parent) - 1;
+		if (band < IEEE_8021QAZ_MAX_TCS) {
 			if (p->command == TC_FIFO_REPLACE)
-				qdisc_state->future_fifos[tclass] = true;
+				qdisc_state->future_fifos[band] = true;
 			else if (p->command == TC_FIFO_DESTROY)
-				qdisc_state->future_fifos[tclass] = false;
+				qdisc_state->future_fifos[band] = false;
 		}
 	}
 	if (!mlxsw_sp_qdisc)
@@ -1117,7 +1116,7 @@ __mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port,
 		}
 
 		if (handle == qdisc_state->future_handle &&
-		    qdisc_state->future_fifos[tclass]) {
+		    qdisc_state->future_fifos[band]) {
 			err = mlxsw_sp_qdisc_replace(mlxsw_sp_port, TC_H_UNSPEC,
 						     child_qdisc,
 						     &mlxsw_sp_qdisc_ops_fifo,
-- 
cgit v1.2.3


From 0a4d0cb1a326cf0070a625036e19871f544f2d25 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Tue, 20 Apr 2021 16:53:48 +0200
Subject: selftests: mlxsw: sch_red_ets: Test proper counter cleaning in ETS

There was a bug introduced during the rework which cause non-zero backlog
being stuck at ETS. Introduce a selftest that would have caught the issue
earlier.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
index 3f007c5f8361..f3ef3274f9b3 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
@@ -67,6 +67,13 @@ red_test()
 {
 	install_qdisc
 
+	# Make sure that we get the non-zero value if there is any.
+	local cur=$(busywait 1100 until_counter_is "> 0" \
+			    qdisc_stats_get $swp3 10: .backlog)
+	(( cur == 0 ))
+	check_err $? "backlog of $cur observed on non-busy qdisc"
+	log_test "$QDISC backlog properly cleaned"
+
 	do_red_test 10 $BACKLOG1
 	do_red_test 11 $BACKLOG2
 
-- 
cgit v1.2.3


From 670bb80f8196ab2189e7f51473da236450dca1aa Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Tue, 20 Apr 2021 20:53:07 +0200
Subject: net: dsa: mv88e6xxx: Mark chips with undocumented EDSA tag support

All devices are capable of using regular DSA tags. Support for
Ethertyped DSA tags sort into three categories:

1. No support. Older chips fall into this category.

2. Full support. Datasheet explicitly supports configuring the CPU
   port to receive FORWARDs with a DSA tag.

3. Undocumented support. Datasheet lists the configuration from
   category 2 as "reserved for future use", but does empirically
   behave like a category 2 device.

So, instead of listing the one true protocol that should be used by a
particular chip, specify the level of support for EDSA (support for
regular DSA is implicit on all chips). As before, we use EDSA for all
chips that fully supports it.

In upcoming changes, we will use this information to support
dynamically changing the tag protocol.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 60 +++++++++++++++++-----------------------
 drivers/net/dsa/mv88e6xxx/chip.h | 21 +++++++++++++-
 2 files changed, 46 insertions(+), 35 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 9c4f8517c34b..6fbc1eed05c1 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2531,10 +2531,10 @@ static int mv88e6xxx_setup_port_mode(struct mv88e6xxx_chip *chip, int port)
 		return mv88e6xxx_set_port_mode_normal(chip, port);
 
 	/* Setup CPU port mode depending on its supported tag format */
-	if (chip->info->tag_protocol == DSA_TAG_PROTO_DSA)
+	if (chip->tag_protocol == DSA_TAG_PROTO_DSA)
 		return mv88e6xxx_set_port_mode_dsa(chip, port);
 
-	if (chip->info->tag_protocol == DSA_TAG_PROTO_EDSA)
+	if (chip->tag_protocol == DSA_TAG_PROTO_EDSA)
 		return mv88e6xxx_set_port_mode_edsa(chip, port);
 
 	return -EINVAL;
@@ -4786,7 +4786,6 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_DSA,
 		.ops = &mv88e6085_ops,
 	},
 
@@ -4807,7 +4806,6 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.g1_irqs = 8,
 		.atu_move_port_mask = 0xf,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_DSA,
 		.ops = &mv88e6095_ops,
 	},
 
@@ -4830,7 +4828,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.edsa_support = MV88E6XXX_EDSA_SUPPORTED,
 		.ops = &mv88e6097_ops,
 	},
 
@@ -4853,7 +4851,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.edsa_support = MV88E6XXX_EDSA_SUPPORTED,
 		.ops = &mv88e6123_ops,
 	},
 
@@ -4874,7 +4872,6 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.g1_irqs = 9,
 		.atu_move_port_mask = 0xf,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_DSA,
 		.ops = &mv88e6131_ops,
 	},
 
@@ -4898,7 +4895,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.g2_irqs = 10,
 		.pvt = true,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.edsa_support = MV88E6XXX_EDSA_SUPPORTED,
 		.ops = &mv88e6141_ops,
 	},
 
@@ -4921,7 +4918,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.edsa_support = MV88E6XXX_EDSA_SUPPORTED,
 		.ptp_support = true,
 		.ops = &mv88e6161_ops,
 	},
@@ -4945,7 +4942,6 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_DSA,
 		.ptp_support = true,
 		.ops = &mv88e6165_ops,
 	},
@@ -4969,7 +4965,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.edsa_support = MV88E6XXX_EDSA_SUPPORTED,
 		.ops = &mv88e6171_ops,
 	},
 
@@ -4993,7 +4989,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.edsa_support = MV88E6XXX_EDSA_SUPPORTED,
 		.ops = &mv88e6172_ops,
 	},
 
@@ -5016,7 +5012,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.edsa_support = MV88E6XXX_EDSA_SUPPORTED,
 		.ops = &mv88e6175_ops,
 	},
 
@@ -5040,7 +5036,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.edsa_support = MV88E6XXX_EDSA_SUPPORTED,
 		.ops = &mv88e6176_ops,
 	},
 
@@ -5061,7 +5057,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.g1_irqs = 8,
 		.atu_move_port_mask = 0xf,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.edsa_support = MV88E6XXX_EDSA_SUPPORTED,
 		.ops = &mv88e6185_ops,
 	},
 
@@ -5079,7 +5075,6 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.phy_base_addr = 0x0,
 		.global1_addr = 0x1b,
 		.global2_addr = 0x1c,
-		.tag_protocol = DSA_TAG_PROTO_DSA,
 		.age_time_coeff = 3750,
 		.g1_irqs = 9,
 		.g2_irqs = 14,
@@ -5109,7 +5104,6 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.atu_move_port_mask = 0x1f,
 		.pvt = true,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_DSA,
 		.ops = &mv88e6190x_ops,
 	},
 
@@ -5132,7 +5126,6 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.atu_move_port_mask = 0x1f,
 		.pvt = true,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_DSA,
 		.ptp_support = true,
 		.ops = &mv88e6191_ops,
 	},
@@ -5155,7 +5148,6 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.atu_move_port_mask = 0x1f,
 		.pvt = true,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_DSA,
 		.ptp_support = true,
 		.ops = &mv88e6393x_ops,
 	},
@@ -5178,7 +5170,6 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.atu_move_port_mask = 0x1f,
 		.pvt = true,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_DSA,
 		.ptp_support = true,
 		.ops = &mv88e6393x_ops,
 	},
@@ -5205,7 +5196,6 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.dual_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_DSA,
 		.ptp_support = true,
 		.ops = &mv88e6250_ops,
 	},
@@ -5230,7 +5220,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.edsa_support = MV88E6XXX_EDSA_SUPPORTED,
 		.ptp_support = true,
 		.ops = &mv88e6240_ops,
 	},
@@ -5252,7 +5242,6 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.dual_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_DSA,
 		.ptp_support = true,
 		.ops = &mv88e6250_ops,
 	},
@@ -5276,7 +5265,6 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.atu_move_port_mask = 0x1f,
 		.pvt = true,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_DSA,
 		.ptp_support = true,
 		.ops = &mv88e6290_ops,
 	},
@@ -5301,7 +5289,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.edsa_support = MV88E6XXX_EDSA_SUPPORTED,
 		.ptp_support = true,
 		.ops = &mv88e6320_ops,
 	},
@@ -5325,7 +5313,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.g2_irqs = 10,
 		.atu_move_port_mask = 0xf,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.edsa_support = MV88E6XXX_EDSA_SUPPORTED,
 		.ptp_support = true,
 		.ops = &mv88e6321_ops,
 	},
@@ -5350,7 +5338,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.g2_irqs = 10,
 		.pvt = true,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.edsa_support = MV88E6XXX_EDSA_SUPPORTED,
 		.ptp_support = true,
 		.ops = &mv88e6341_ops,
 	},
@@ -5374,7 +5362,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.edsa_support = MV88E6XXX_EDSA_SUPPORTED,
 		.ops = &mv88e6350_ops,
 	},
 
@@ -5397,7 +5385,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.edsa_support = MV88E6XXX_EDSA_SUPPORTED,
 		.ops = &mv88e6351_ops,
 	},
 
@@ -5421,7 +5409,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.atu_move_port_mask = 0xf,
 		.pvt = true,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_EDSA,
+		.edsa_support = MV88E6XXX_EDSA_SUPPORTED,
 		.ptp_support = true,
 		.ops = &mv88e6352_ops,
 	},
@@ -5445,7 +5433,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.atu_move_port_mask = 0x1f,
 		.pvt = true,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_DSA,
+		.edsa_support = MV88E6XXX_EDSA_UNDOCUMENTED,
 		.ptp_support = true,
 		.ops = &mv88e6390_ops,
 	},
@@ -5469,7 +5457,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.atu_move_port_mask = 0x1f,
 		.pvt = true,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_DSA,
+		.edsa_support = MV88E6XXX_EDSA_UNDOCUMENTED,
 		.ptp_support = true,
 		.ops = &mv88e6390x_ops,
 	},
@@ -5492,7 +5480,6 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 		.atu_move_port_mask = 0x1f,
 		.pvt = true,
 		.multi_chip = true,
-		.tag_protocol = DSA_TAG_PROTO_DSA,
 		.ptp_support = true,
 		.ops = &mv88e6393x_ops,
 	},
@@ -5561,7 +5548,7 @@ static enum dsa_tag_protocol mv88e6xxx_get_tag_protocol(struct dsa_switch *ds,
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
 
-	return chip->info->tag_protocol;
+	return chip->tag_protocol;
 }
 
 static int mv88e6xxx_port_mdb_add(struct dsa_switch *ds, int port,
@@ -6206,6 +6193,11 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
 	if (err)
 		goto out;
 
+	if (chip->info->edsa_support == MV88E6XXX_EDSA_SUPPORTED)
+		chip->tag_protocol = DSA_TAG_PROTO_EDSA;
+	else
+		chip->tag_protocol = DSA_TAG_PROTO_DSA;
+
 	mv88e6xxx_phy_init(chip);
 
 	if (chip->info->ops->get_eeprom) {
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index bce6e0dc8535..4f116f73a74b 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -96,6 +96,22 @@ enum mv88e6xxx_family {
 	MV88E6XXX_FAMILY_6393,	/* 6191X 6193X 6393X */
 };
 
+/**
+ * enum mv88e6xxx_edsa_support - Ethertype DSA tag support level
+ * @MV88E6XXX_EDSA_UNSUPPORTED:  Device has no support for EDSA tags
+ * @MV88E6XXX_EDSA_UNDOCUMENTED: Documentation indicates that
+ *                               egressing FORWARD frames with an EDSA
+ *                               tag is reserved for future use, but
+ *                               empirical data shows that this mode
+ *                               is supported.
+ * @MV88E6XXX_EDSA_SUPPORTED:    EDSA tags are fully supported.
+ */
+enum mv88e6xxx_edsa_support {
+	MV88E6XXX_EDSA_UNSUPPORTED = 0,
+	MV88E6XXX_EDSA_UNDOCUMENTED,
+	MV88E6XXX_EDSA_SUPPORTED,
+};
+
 struct mv88e6xxx_ops;
 
 struct mv88e6xxx_info {
@@ -133,7 +149,7 @@ struct mv88e6xxx_info {
 	 */
 	bool dual_chip;
 
-	enum dsa_tag_protocol tag_protocol;
+	enum mv88e6xxx_edsa_support edsa_support;
 
 	/* Mask for FromPort and ToPort value of PortVec used in ATU Move
 	 * operation. 0 means that the ATU Move operation is not supported.
@@ -261,6 +277,9 @@ struct mv88e6xxx_region_priv {
 struct mv88e6xxx_chip {
 	const struct mv88e6xxx_info *info;
 
+	/* Currently configured tagging protocol */
+	enum dsa_tag_protocol tag_protocol;
+
 	/* The dsa_switch this private structure is related to */
 	struct dsa_switch *ds;
 
-- 
cgit v1.2.3


From 9a99bef5f87f2fb025e9a51ff4ad820f7b8a9ffb Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Tue, 20 Apr 2021 20:53:08 +0200
Subject: net: dsa: mv88e6xxx: Allow dynamic reconfiguration of tag protocol

For devices that supports both regular and Ethertyped DSA tags, allow
the user to change the protocol.

Additionally, because there are ethernet controllers that do not
handle regular DSA tags in all cases, also allow the protocol to be
changed on devices with undocumented support for EDSA. But, in those
cases, make sure to log the fact that an undocumented feature has been
enabled.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 6fbc1eed05c1..9ff1a10993b1 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -5551,6 +5551,44 @@ static enum dsa_tag_protocol mv88e6xxx_get_tag_protocol(struct dsa_switch *ds,
 	return chip->tag_protocol;
 }
 
+static int mv88e6xxx_change_tag_protocol(struct dsa_switch *ds, int port,
+					 enum dsa_tag_protocol proto)
+{
+	struct mv88e6xxx_chip *chip = ds->priv;
+	enum dsa_tag_protocol old_protocol;
+	int err;
+
+	switch (proto) {
+	case DSA_TAG_PROTO_EDSA:
+		switch (chip->info->edsa_support) {
+		case MV88E6XXX_EDSA_UNSUPPORTED:
+			return -EPROTONOSUPPORT;
+		case MV88E6XXX_EDSA_UNDOCUMENTED:
+			dev_warn(chip->dev, "Relying on undocumented EDSA tagging behavior\n");
+			fallthrough;
+		case MV88E6XXX_EDSA_SUPPORTED:
+			break;
+		}
+		break;
+	case DSA_TAG_PROTO_DSA:
+		break;
+	default:
+		return -EPROTONOSUPPORT;
+	}
+
+	old_protocol = chip->tag_protocol;
+	chip->tag_protocol = proto;
+
+	mv88e6xxx_reg_lock(chip);
+	err = mv88e6xxx_setup_port_mode(chip, port);
+	mv88e6xxx_reg_unlock(chip);
+
+	if (err)
+		chip->tag_protocol = old_protocol;
+
+	return err;
+}
+
 static int mv88e6xxx_port_mdb_add(struct dsa_switch *ds, int port,
 				  const struct switchdev_obj_port_mdb *mdb)
 {
@@ -6013,6 +6051,7 @@ static int mv88e6xxx_crosschip_lag_leave(struct dsa_switch *ds, int sw_index,
 
 static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
 	.get_tag_protocol	= mv88e6xxx_get_tag_protocol,
+	.change_tag_protocol	= mv88e6xxx_change_tag_protocol,
 	.setup			= mv88e6xxx_setup,
 	.teardown		= mv88e6xxx_teardown,
 	.phylink_validate	= mv88e6xxx_validate,
-- 
cgit v1.2.3


From 21e0b508c8d1fd7f1a4b91794391d1978431e083 Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Tue, 20 Apr 2021 20:53:09 +0200
Subject: net: dsa: Only notify CPU ports of changes to the tag protocol

Previously DSA ports were also included, on the assumption that the
protocol used by the CPU port had to the matched throughout the entire
tree.

As there is not yet any consumer in need of this, drop the call.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/switch.c | 25 +++++++------------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 32963276452f..9bf8e20ecdf3 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -323,15 +323,6 @@ static int dsa_switch_vlan_del(struct dsa_switch *ds,
 	return 0;
 }
 
-static bool dsa_switch_tag_proto_match(struct dsa_switch *ds, int port,
-				       struct dsa_notifier_tag_proto_info *info)
-{
-	if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
-		return true;
-
-	return false;
-}
-
 static int dsa_switch_change_tag_proto(struct dsa_switch *ds,
 				       struct dsa_notifier_tag_proto_info *info)
 {
@@ -344,16 +335,14 @@ static int dsa_switch_change_tag_proto(struct dsa_switch *ds,
 	ASSERT_RTNL();
 
 	for (port = 0; port < ds->num_ports; port++) {
-		if (dsa_switch_tag_proto_match(ds, port, info)) {
-			err = ds->ops->change_tag_protocol(ds, port,
-							   tag_ops->proto);
-			if (err)
-				return err;
+		if (!dsa_is_cpu_port(ds, port))
+			continue;
 
-			if (dsa_is_cpu_port(ds, port))
-				dsa_port_set_tag_protocol(dsa_to_port(ds, port),
-							  tag_ops);
-		}
+		err = ds->ops->change_tag_protocol(ds, port, tag_ops->proto);
+		if (err)
+			return err;
+
+		dsa_port_set_tag_protocol(dsa_to_port(ds, port), tag_ops);
 	}
 
 	/* Now that changing the tag protocol can no longer fail, let's update
-- 
cgit v1.2.3


From deff710703d80c942c9c85a3f00a053025cfb1e4 Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Tue, 20 Apr 2021 20:53:10 +0200
Subject: net: dsa: Allow default tag protocol to be overridden from DT

Some combinations of tag protocols and Ethernet controllers are
incompatible, and it is hard for the driver to keep track of these.

Therefore, allow the device tree author (typically the board vendor)
to inform the driver of this fact by selecting an alternate protocol
that is known to work.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h |   5 +++
 net/dsa/dsa2.c    | 103 +++++++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 91 insertions(+), 17 deletions(-)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index b52e9b057be4..507082959aa4 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -149,6 +149,11 @@ struct dsa_switch_tree {
 	/* Tagging protocol operations */
 	const struct dsa_device_ops *tag_ops;
 
+	/* Default tagging protocol preferred by the switches in this
+	 * tree.
+	 */
+	enum dsa_tag_protocol default_proto;
+
 	/*
 	 * Configuration data for the platform device that owns
 	 * this dsa switch tree instance.
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index d7c22e3a1fbf..b71e87909f0e 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -668,6 +668,30 @@ static const struct devlink_ops dsa_devlink_ops = {
 	.sb_occ_tc_port_bind_get	= dsa_devlink_sb_occ_tc_port_bind_get,
 };
 
+static int dsa_switch_setup_tag_protocol(struct dsa_switch *ds)
+{
+	const struct dsa_device_ops *tag_ops = ds->dst->tag_ops;
+	struct dsa_switch_tree *dst = ds->dst;
+	int port, err;
+
+	if (tag_ops->proto == dst->default_proto)
+		return 0;
+
+	for (port = 0; port < ds->num_ports; port++) {
+		if (!dsa_is_cpu_port(ds, port))
+			continue;
+
+		err = ds->ops->change_tag_protocol(ds, port, tag_ops->proto);
+		if (err) {
+			dev_err(ds->dev, "Unable to use tag protocol \"%s\": %pe\n",
+				tag_ops->name, ERR_PTR(err));
+			return err;
+		}
+	}
+
+	return 0;
+}
+
 static int dsa_switch_setup(struct dsa_switch *ds)
 {
 	struct dsa_devlink_priv *dl_priv;
@@ -718,6 +742,10 @@ static int dsa_switch_setup(struct dsa_switch *ds)
 	if (err < 0)
 		goto unregister_notifier;
 
+	err = dsa_switch_setup_tag_protocol(ds);
+	if (err)
+		goto teardown;
+
 	devlink_params_publish(ds->devlink);
 
 	if (!ds->slave_mii_bus && ds->ops->phy_read) {
@@ -1068,34 +1096,60 @@ static enum dsa_tag_protocol dsa_get_tag_protocol(struct dsa_port *dp,
 	return ds->ops->get_tag_protocol(ds, dp->index, tag_protocol);
 }
 
-static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master)
+static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master,
+			      const char *user_protocol)
 {
 	struct dsa_switch *ds = dp->ds;
 	struct dsa_switch_tree *dst = ds->dst;
 	const struct dsa_device_ops *tag_ops;
-	enum dsa_tag_protocol tag_protocol;
+	enum dsa_tag_protocol default_proto;
+
+	/* Find out which protocol the switch would prefer. */
+	default_proto = dsa_get_tag_protocol(dp, master);
+	if (dst->default_proto) {
+		if (dst->default_proto != default_proto) {
+			dev_err(ds->dev,
+				"A DSA switch tree can have only one tagging protocol\n");
+			return -EINVAL;
+		}
+	} else {
+		dst->default_proto = default_proto;
+	}
+
+	/* See if the user wants to override that preference. */
+	if (user_protocol) {
+		if (!ds->ops->change_tag_protocol) {
+			dev_err(ds->dev, "Tag protocol cannot be modified\n");
+			return -EINVAL;
+		}
+
+		tag_ops = dsa_find_tagger_by_name(user_protocol);
+	} else {
+		tag_ops = dsa_tag_driver_get(default_proto);
+	}
+
+	if (IS_ERR(tag_ops)) {
+		if (PTR_ERR(tag_ops) == -ENOPROTOOPT)
+			return -EPROBE_DEFER;
+
+		dev_warn(ds->dev, "No tagger for this switch\n");
+		return PTR_ERR(tag_ops);
+	}
 
-	tag_protocol = dsa_get_tag_protocol(dp, master);
 	if (dst->tag_ops) {
-		if (dst->tag_ops->proto != tag_protocol) {
+		if (dst->tag_ops != tag_ops) {
 			dev_err(ds->dev,
 				"A DSA switch tree can have only one tagging protocol\n");
+
+			dsa_tag_driver_put(tag_ops);
 			return -EINVAL;
 		}
+
 		/* In the case of multiple CPU ports per switch, the tagging
-		 * protocol is still reference-counted only per switch tree, so
-		 * nothing to do here.
+		 * protocol is still reference-counted only per switch tree.
 		 */
+		dsa_tag_driver_put(tag_ops);
 	} else {
-		tag_ops = dsa_tag_driver_get(tag_protocol);
-		if (IS_ERR(tag_ops)) {
-			if (PTR_ERR(tag_ops) == -ENOPROTOOPT)
-				return -EPROBE_DEFER;
-			dev_warn(ds->dev, "No tagger for this switch\n");
-			dp->master = NULL;
-			return PTR_ERR(tag_ops);
-		}
-
 		dst->tag_ops = tag_ops;
 	}
 
@@ -1104,6 +1158,19 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master)
 	dsa_port_set_tag_protocol(dp, dst->tag_ops);
 	dp->dst = dst;
 
+	/* At this point, the tree may be configured to use a different
+	 * tagger than the one chosen by the switch driver during
+	 * .setup, in the case when a user selects a custom protocol
+	 * through the DT.
+	 *
+	 * This is resolved by syncing the driver with the tree in
+	 * dsa_switch_setup_tag_protocol once .setup has run and the
+	 * driver is ready to accept calls to .change_tag_protocol. If
+	 * the driver does not support the custom protocol at that
+	 * point, the tree is wholly rejected, thereby ensuring that the
+	 * tree and driver are always in agreement on the protocol to
+	 * use.
+	 */
 	return 0;
 }
 
@@ -1117,12 +1184,14 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn)
 
 	if (ethernet) {
 		struct net_device *master;
+		const char *user_protocol;
 
 		master = of_find_net_device_by_node(ethernet);
 		if (!master)
 			return -EPROBE_DEFER;
 
-		return dsa_port_parse_cpu(dp, master);
+		user_protocol = of_get_property(dn, "dsa-tag-protocol", NULL);
+		return dsa_port_parse_cpu(dp, master, user_protocol);
 	}
 
 	if (link)
@@ -1234,7 +1303,7 @@ static int dsa_port_parse(struct dsa_port *dp, const char *name,
 
 		dev_put(master);
 
-		return dsa_port_parse_cpu(dp, master);
+		return dsa_port_parse_cpu(dp, master, NULL);
 	}
 
 	if (!strcmp(name, "dsa"))
-- 
cgit v1.2.3


From eb78cacebaf2ff76e787c8b8a0d70eacf6c4fa4e Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Tue, 20 Apr 2021 20:53:11 +0200
Subject: dt-bindings: net: dsa: Document dsa-tag-protocol property

The 'dsa-tag-protocol' is used to force a switch tree to use a
particular tag protocol, typically because the Ethernet controller
that it is connected to is not compatible with the default one.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/dsa/dsa.yaml | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.yaml b/Documentation/devicetree/bindings/net/dsa/dsa.yaml
index 8a3494db4d8d..16aa192c118e 100644
--- a/Documentation/devicetree/bindings/net/dsa/dsa.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/dsa.yaml
@@ -70,6 +70,15 @@ patternProperties:
               device is what the switch port is connected to
             $ref: /schemas/types.yaml#/definitions/phandle
 
+          dsa-tag-protocol:
+            description:
+              Instead of the default, the switch will use this tag protocol if
+              possible. Useful when a device supports multiple protcols and
+              the default is incompatible with the Ethernet device.
+            enum:
+              - dsa
+              - edsa
+
           phy-handle: true
 
           phy-mode: true
-- 
cgit v1.2.3


From b8c55ce266dee09b0e359ff9af885eb94e11480a Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@linaro.org>
Date: Tue, 20 Apr 2021 21:09:57 +0200
Subject: net: wwan: Fix bit ops double shift

bit operation helpers such as test_bit, clear_bit, etc take bit
position as parameter and not value. Current usage causes double
shift => BIT(BIT(0)). Fix that in wwan_core and mhi_wwan_ctrl.

Fixes: 9a44c1cc6388 ("net: Add a WWAN subsystem")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/mhi_wwan_ctrl.c | 8 +++++---
 drivers/net/wwan/wwan_core.c     | 2 +-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wwan/mhi_wwan_ctrl.c b/drivers/net/wwan/mhi_wwan_ctrl.c
index 11475ade4be5..416ae6abafb7 100644
--- a/drivers/net/wwan/mhi_wwan_ctrl.c
+++ b/drivers/net/wwan/mhi_wwan_ctrl.c
@@ -7,9 +7,11 @@
 #include <linux/wwan.h>
 
 /* MHI wwan flags */
-#define MHI_WWAN_DL_CAP		BIT(0)
-#define MHI_WWAN_UL_CAP		BIT(1)
-#define MHI_WWAN_RX_REFILL	BIT(2)
+enum mhi_wwan_flags {
+	MHI_WWAN_DL_CAP,
+	MHI_WWAN_UL_CAP,
+	MHI_WWAN_RX_REFILL,
+};
 
 #define MHI_WWAN_MAX_MTU	0x8000
 
diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
index b618b7937846..5be5e1e3534c 100644
--- a/drivers/net/wwan/wwan_core.c
+++ b/drivers/net/wwan/wwan_core.c
@@ -26,7 +26,7 @@ static int wwan_major;
 #define to_wwan_port(d) container_of(d, struct wwan_port, dev)
 
 /* WWAN port flags */
-#define WWAN_PORT_TX_OFF	BIT(0)
+#define WWAN_PORT_TX_OFF	0
 
 /**
  * struct wwan_device - The structure that defines a WWAN device
-- 
cgit v1.2.3


From f5d7872a8b8a3176e65dc6f7f0705ce7e9a699e6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 20 Apr 2021 13:01:44 -0700
Subject: virtio-net: restrict build_skb() use to some arches

build_skb() is supposed to be followed by
skb_reserve(skb, NET_IP_ALIGN), so that IP headers are word-aligned.
(Best practice is to reserve NET_IP_ALIGN+NET_SKB_PAD, but the NET_SKB_PAD
part is only a performance optimization if tunnel encaps are added.)

Unfortunately virtio_net has not provisioned this reserve.
We can only use build_skb() for arches where NET_IP_ALIGN == 0

We might refine this later, with enough testing.

Fixes: fb32856b16ad ("virtio-net: page_to_skb() use build_skb when there's sufficient tailroom")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Guenter Roeck <linux@roeck-us.net>
Cc: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 8cd76037c724..0c84b9f2c5e9 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -415,7 +415,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 
 	shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
-	if (len > GOOD_COPY_LEN && tailroom >= shinfo_size) {
+	if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) {
 		skb = build_skb(p, truesize);
 		if (unlikely(!skb))
 			return NULL;
-- 
cgit v1.2.3


From 1b8caefaf4f063fdc43e4078384d38ce96147b35 Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Tue, 20 Apr 2021 16:28:21 +0200
Subject: net: enetc: automatically select IERB module

Now that enetc supports flow control we have to make sure the settings in
the IERB are correct. Therefore, we actually depend on the enetc-ierb
module. Previously it was possible that this module was disabled while the
enetc was enabled. Fix it by automatically select the enetc-ierb module.

Fixes: e7d48e5fbf30 ("net: enetc: add a mini driver for the Integrated Endpoint Register Block")
Signed-off-by: Michael Walle <michael@walle.cc>
Acked-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig
index d88f60c2bb82..cdc0ff89388a 100644
--- a/drivers/net/ethernet/freescale/enetc/Kconfig
+++ b/drivers/net/ethernet/freescale/enetc/Kconfig
@@ -2,7 +2,7 @@
 config FSL_ENETC
 	tristate "ENETC PF driver"
 	depends on PCI && PCI_MSI
-	depends on FSL_ENETC_IERB || FSL_ENETC_IERB=n
+	select FSL_ENETC_IERB
 	select FSL_ENETC_MDIO
 	select PHYLINK
 	select PCS_LYNX
-- 
cgit v1.2.3


From 6b3a63100dedfa1f0887eb316110d5d7b0c51ed4 Mon Sep 17 00:00:00 2001
From: "Radu Pirea (NXP OSS)" <radu-nicolae.pirea@oss.nxp.com>
Date: Tue, 20 Apr 2021 16:11:33 +0300
Subject: phy: nxp-c45-tja11xx: fix phase offset calculation

Fix phase offset calculation.

Signed-off-by: Radu Pirea (NXP OSS) <radu-nicolae.pirea@oss.nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/nxp-c45-tja11xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c
index 47cacda8836f..95307097ebff 100644
--- a/drivers/net/phy/nxp-c45-tja11xx.c
+++ b/drivers/net/phy/nxp-c45-tja11xx.c
@@ -354,7 +354,7 @@ static u64 nxp_c45_get_phase_shift(u64 phase_offset_raw)
 	 * and get 1 decimal point precision.
 	 */
 	phase_offset_raw *= 10;
-	phase_offset_raw -= phase_offset_raw;
+	phase_offset_raw -= 738;
 	return div_u64(phase_offset_raw, 9);
 }
 
-- 
cgit v1.2.3


From af39c8f72301b268ad8b04bae646b6025918b82b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 20 Apr 2021 02:43:41 -0700
Subject: virtio-net: fix use-after-free in page_to_skb()

KASAN/syzbot had 4 reports, one of them being:

BUG: KASAN: slab-out-of-bounds in memcpy include/linux/fortify-string.h:191 [inline]
BUG: KASAN: slab-out-of-bounds in page_to_skb+0x5cf/0xb70 drivers/net/virtio_net.c:480
Read of size 12 at addr ffff888014a5f800 by task systemd-udevd/8445

CPU: 0 PID: 8445 Comm: systemd-udevd Not tainted 5.12.0-rc8-next-20210419-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 <IRQ>
 __dump_stack lib/dump_stack.c:79 [inline]
 dump_stack+0x141/0x1d7 lib/dump_stack.c:120
 print_address_description.constprop.0.cold+0x5b/0x2f8 mm/kasan/report.c:233
 __kasan_report mm/kasan/report.c:419 [inline]
 kasan_report.cold+0x7c/0xd8 mm/kasan/report.c:436
 check_region_inline mm/kasan/generic.c:180 [inline]
 kasan_check_range+0x13d/0x180 mm/kasan/generic.c:186
 memcpy+0x20/0x60 mm/kasan/shadow.c:65
 memcpy include/linux/fortify-string.h:191 [inline]
 page_to_skb+0x5cf/0xb70 drivers/net/virtio_net.c:480
 receive_mergeable drivers/net/virtio_net.c:1009 [inline]
 receive_buf+0x2bc0/0x6250 drivers/net/virtio_net.c:1119
 virtnet_receive drivers/net/virtio_net.c:1411 [inline]
 virtnet_poll+0x568/0x10b0 drivers/net/virtio_net.c:1516
 __napi_poll+0xaf/0x440 net/core/dev.c:6962
 napi_poll net/core/dev.c:7029 [inline]
 net_rx_action+0x801/0xb40 net/core/dev.c:7116
 __do_softirq+0x29b/0x9fe kernel/softirq.c:559
 invoke_softirq kernel/softirq.c:433 [inline]
 __irq_exit_rcu+0x136/0x200 kernel/softirq.c:637
 irq_exit_rcu+0x5/0x20 kernel/softirq.c:649
 common_interrupt+0xa4/0xd0 arch/x86/kernel/irq.c:240

Fixes: fb32856b16ad ("virtio-net: page_to_skb() use build_skb when there's sufficient tailroom")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Reported-by: Guenter Roeck <linux@roeck-us.net>
Reported-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Cc: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: virtualization@lists.linux-foundation.org
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 0c84b9f2c5e9..74d2d49264f3 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -385,6 +385,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 	struct sk_buff *skb;
 	struct virtio_net_hdr_mrg_rxbuf *hdr;
 	unsigned int copy, hdr_len, hdr_padded_len;
+	struct page *page_to_free = NULL;
 	int tailroom, shinfo_size;
 	char *p, *hdr_p;
 
@@ -445,7 +446,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 		if (len)
 			skb_add_rx_frag(skb, 0, page, offset, len, truesize);
 		else
-			put_page(page);
+			page_to_free = page;
 		goto ok;
 	}
 
@@ -479,6 +480,8 @@ ok:
 		hdr = skb_vnet_hdr(skb);
 		memcpy(hdr, hdr_p, hdr_len);
 	}
+	if (page_to_free)
+		put_page(page_to_free);
 
 	if (metasize) {
 		__skb_pull(skb, metasize);
-- 
cgit v1.2.3


From 5b1faa92289b53cad654123ed2bc8e10f6ddd4ac Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree.xilinx@gmail.com>
Date: Tue, 20 Apr 2021 13:27:22 +0100
Subject: sfc: farch: fix TX queue lookup in TX flush done handling

We're starting from a TXQ instance number ('qid'), not a TXQ type, so
 efx_get_tx_queue() is inappropriate (and could return NULL, leading
 to panics).

Fixes: 12804793b17c ("sfc: decouple TXQ type from label")
Reported-by: Trevor Hemsley <themsley@voiceflex.com>
Cc: stable@vger.kernel.org
Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/farch.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index d75cf5ff5686..f89ebe0073dd 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -1081,16 +1081,16 @@ static void
 efx_farch_handle_tx_flush_done(struct efx_nic *efx, efx_qword_t *event)
 {
 	struct efx_tx_queue *tx_queue;
+	struct efx_channel *channel;
 	int qid;
 
 	qid = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA);
 	if (qid < EFX_MAX_TXQ_PER_CHANNEL * (efx->n_tx_channels + efx->n_extra_tx_channels)) {
-		tx_queue = efx_get_tx_queue(efx, qid / EFX_MAX_TXQ_PER_CHANNEL,
-					    qid % EFX_MAX_TXQ_PER_CHANNEL);
-		if (atomic_cmpxchg(&tx_queue->flush_outstanding, 1, 0)) {
+		channel = efx_get_tx_channel(efx, qid / EFX_MAX_TXQ_PER_CHANNEL);
+		tx_queue = channel->tx_queue + (qid % EFX_MAX_TXQ_PER_CHANNEL);
+		if (atomic_cmpxchg(&tx_queue->flush_outstanding, 1, 0))
 			efx_farch_magic_event(tx_queue->channel,
 					      EFX_CHANNEL_MAGIC_TX_DRAIN(tx_queue));
-		}
 	}
 }
 
-- 
cgit v1.2.3


From 83b09a1807415608b387c7bc748d329fefc5617e Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree.xilinx@gmail.com>
Date: Tue, 20 Apr 2021 13:28:28 +0100
Subject: sfc: farch: fix TX queue lookup in TX event handling

We're starting from a TXQ label, not a TXQ type, so
 efx_channel_get_tx_queue() is inappropriate (and could return NULL,
 leading to panics).

Fixes: 12804793b17c ("sfc: decouple TXQ type from label")
Cc: stable@vger.kernel.org
Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/farch.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index f89ebe0073dd..49df02ecee91 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -835,14 +835,14 @@ efx_farch_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
 		/* Transmit completion */
 		tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_DESC_PTR);
 		tx_ev_q_label = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL);
-		tx_queue = efx_channel_get_tx_queue(
-			channel, tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
+		tx_queue = channel->tx_queue +
+				(tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
 		efx_xmit_done(tx_queue, tx_ev_desc_ptr);
 	} else if (EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_WQ_FF_FULL)) {
 		/* Rewrite the FIFO write pointer */
 		tx_ev_q_label = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL);
-		tx_queue = efx_channel_get_tx_queue(
-			channel, tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
+		tx_queue = channel->tx_queue +
+				(tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
 
 		netif_tx_lock(efx->net_dev);
 		efx_farch_notify_tx_desc(tx_queue);
-- 
cgit v1.2.3


From 172e269edfce34bac7c61c15551816bda4b0f140 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree.xilinx@gmail.com>
Date: Tue, 20 Apr 2021 13:29:35 +0100
Subject: sfc: ef10: fix TX queue lookup in TX event handling

We're starting from a TXQ label, not a TXQ type, so
 efx_channel_get_tx_queue() is inappropriate.  This worked by chance,
 because labels and types currently match on EF10, but we shouldn't
 rely on that.

Fixes: 12804793b17c ("sfc: decouple TXQ type from label")
Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ef10.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index da6886dcac37..4fa72b573c17 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -2928,8 +2928,7 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
 
 	/* Get the transmit queue */
 	tx_ev_q_label = EFX_QWORD_FIELD(*event, ESF_DZ_TX_QLABEL);
-	tx_queue = efx_channel_get_tx_queue(channel,
-					    tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
+	tx_queue = channel->tx_queue + (tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
 
 	if (!tx_queue->timestamping) {
 		/* Transmit completion */
-- 
cgit v1.2.3


From 55cdc26a91ac270887583945aef2bd460a2805f7 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 20 Apr 2021 13:27:30 +0100
Subject: net: mana: remove redundant initialization of variable err

The variable err is being initialized with a value that is
never read and it is being updated later with a new value.  The
initialization is redundant and can be removed

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microsoft/mana/hw_channel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c
index 462bc577692a..0cf0322702ed 100644
--- a/drivers/net/ethernet/microsoft/mana/hw_channel.c
+++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c
@@ -55,7 +55,7 @@ static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len,
 				 const struct gdma_resp_hdr *resp_msg)
 {
 	struct hwc_caller_ctx *ctx;
-	int err = -EPROTO;
+	int err;
 
 	if (!test_bit(resp_msg->response.hwc_msg_id,
 		      hwc->inflight_msg_res.map)) {
-- 
cgit v1.2.3


From 8432b8114957235f42e070a16118a7f750de9d39 Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Tue, 20 Apr 2021 13:07:27 +0200
Subject: vsock/virtio: free queued packets when closing socket

As reported by syzbot [1], there is a memory leak while closing the
socket. We partially solved this issue with commit ac03046ece2b
("vsock/virtio: free packets during the socket release"), but we
forgot to drain the RX queue when the socket is definitely closed by
the scheduled work.

To avoid future issues, let's use the new virtio_transport_remove_sock()
to drain the RX queue before removing the socket from the af_vsock lists
calling vsock_remove_sock().

[1] https://syzkaller.appspot.com/bug?extid=24452624fc4c571eedd9

Fixes: ac03046ece2b ("vsock/virtio: free packets during the socket release")
Reported-and-tested-by: syzbot+24452624fc4c571eedd9@syzkaller.appspotmail.com
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/virtio_transport_common.c | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index e4370b1b7494..902cb6dd710b 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -733,6 +733,23 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
 	return t->send_pkt(reply);
 }
 
+/* This function should be called with sk_lock held and SOCK_DONE set */
+static void virtio_transport_remove_sock(struct vsock_sock *vsk)
+{
+	struct virtio_vsock_sock *vvs = vsk->trans;
+	struct virtio_vsock_pkt *pkt, *tmp;
+
+	/* We don't need to take rx_lock, as the socket is closing and we are
+	 * removing it.
+	 */
+	list_for_each_entry_safe(pkt, tmp, &vvs->rx_queue, list) {
+		list_del(&pkt->list);
+		virtio_transport_free_pkt(pkt);
+	}
+
+	vsock_remove_sock(vsk);
+}
+
 static void virtio_transport_wait_close(struct sock *sk, long timeout)
 {
 	if (timeout) {
@@ -765,7 +782,7 @@ static void virtio_transport_do_close(struct vsock_sock *vsk,
 	    (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) {
 		vsk->close_work_scheduled = false;
 
-		vsock_remove_sock(vsk);
+		virtio_transport_remove_sock(vsk);
 
 		/* Release refcnt obtained when we scheduled the timeout */
 		sock_put(sk);
@@ -828,22 +845,15 @@ static bool virtio_transport_close(struct vsock_sock *vsk)
 
 void virtio_transport_release(struct vsock_sock *vsk)
 {
-	struct virtio_vsock_sock *vvs = vsk->trans;
-	struct virtio_vsock_pkt *pkt, *tmp;
 	struct sock *sk = &vsk->sk;
 	bool remove_sock = true;
 
 	if (sk->sk_type == SOCK_STREAM)
 		remove_sock = virtio_transport_close(vsk);
 
-	list_for_each_entry_safe(pkt, tmp, &vvs->rx_queue, list) {
-		list_del(&pkt->list);
-		virtio_transport_free_pkt(pkt);
-	}
-
 	if (remove_sock) {
 		sock_set_flag(sk, SOCK_DONE);
-		vsock_remove_sock(vsk);
+		virtio_transport_remove_sock(vsk);
 	}
 }
 EXPORT_SYMBOL_GPL(virtio_transport_release);
-- 
cgit v1.2.3


From 8f7e876273e294b732b42af2e5e6bba91d798954 Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Tue, 20 Apr 2021 12:29:29 +0200
Subject: net: phy: at803x: fix probe error if copper page is selected

The commit c329e5afb42f ("net: phy: at803x: select correct page on
config init") selects the copper page during probe. This fails if the
copper page was already selected. In this case, the value of the copper
page (which is 1) is propagated through phy_restore_page() and is
finally returned for at803x_probe(). Fix it, by just using the
at803x_page_write() directly.

Also in case of an error, the regulator is not disabled and leads to a
WARN_ON() when the probe fails. This couldn't happen before, because
at803x_parse_dt() was the last call in at803x_probe(). It is hard to
see, that the parse_dt() actually enables the regulator. Thus move the
regulator_enable() to the probe function and undo it in case of an
error.

Fixes: c329e5afb42f ("net: phy: at803x: select correct page on config init")
Signed-off-by: Michael Walle <michael@walle.cc>
Reviewed-by: David Bauer <mail@david-bauer.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/at803x.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index e0f56850edc5..32af52dd5aed 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -554,10 +554,6 @@ static int at803x_parse_dt(struct phy_device *phydev)
 			phydev_err(phydev, "failed to get VDDIO regulator\n");
 			return PTR_ERR(priv->vddio);
 		}
-
-		ret = regulator_enable(priv->vddio);
-		if (ret < 0)
-			return ret;
 	}
 
 	return 0;
@@ -579,15 +575,30 @@ static int at803x_probe(struct phy_device *phydev)
 	if (ret)
 		return ret;
 
+	if (priv->vddio) {
+		ret = regulator_enable(priv->vddio);
+		if (ret < 0)
+			return ret;
+	}
+
 	/* Some bootloaders leave the fiber page selected.
 	 * Switch to the copper page, as otherwise we read
 	 * the PHY capabilities from the fiber side.
 	 */
 	if (at803x_match_phy_id(phydev, ATH8031_PHY_ID)) {
-		ret = phy_select_page(phydev, AT803X_PAGE_COPPER);
-		ret = phy_restore_page(phydev, AT803X_PAGE_COPPER, ret);
+		phy_lock_mdio_bus(phydev);
+		ret = at803x_write_page(phydev, AT803X_PAGE_COPPER);
+		phy_unlock_mdio_bus(phydev);
+		if (ret)
+			goto err;
 	}
 
+	return 0;
+
+err:
+	if (priv->vddio)
+		regulator_disable(priv->vddio);
+
 	return ret;
 }
 
-- 
cgit v1.2.3


From 333980481b99edb24ebd5d1a53af70a15d9146de Mon Sep 17 00:00:00 2001
From: Vadym Kochan <vkochan@marvell.com>
Date: Tue, 20 Apr 2021 16:31:51 +0300
Subject: net: marvell: prestera: fix port event handling on init

For some reason there might be a crash during ports creation if port
events are handling at the same time  because fw may send initial
port event with down state.

The crash points to cancel_delayed_work() which is called when port went
is down.  Currently I did not find out the real cause of the issue, so
fixed it by cancel port stats work only if previous port's state was up
& runnig.

The following is the crash which can be triggered:

[   28.311104] Unable to handle kernel paging request at virtual address
000071775f776600
[   28.319097] Mem abort info:
[   28.321914]   ESR = 0x96000004
[   28.324996]   EC = 0x25: DABT (current EL), IL = 32 bits
[   28.330350]   SET = 0, FnV = 0
[   28.333430]   EA = 0, S1PTW = 0
[   28.336597] Data abort info:
[   28.339499]   ISV = 0, ISS = 0x00000004
[   28.343362]   CM = 0, WnR = 0
[   28.346354] user pgtable: 4k pages, 48-bit VAs, pgdp=0000000100bf7000
[   28.352842] [000071775f776600] pgd=0000000000000000,
p4d=0000000000000000
[   28.359695] Internal error: Oops: 96000004 [#1] PREEMPT SMP
[   28.365310] Modules linked in: prestera_pci(+) prestera
uio_pdrv_genirq
[   28.372005] CPU: 0 PID: 1291 Comm: kworker/0:1H Not tainted
5.11.0-rc4 #1
[   28.378846] Hardware name: DNI AmazonGo1 A7040 board (DT)
[   28.384283] Workqueue: prestera_fw_wq prestera_fw_evt_work_fn
[prestera_pci]
[   28.391413] pstate: 60000085 (nZCv daIf -PAN -UAO -TCO BTYPE=--)
[   28.397468] pc : get_work_pool+0x48/0x60
[   28.401442] lr : try_to_grab_pending+0x6c/0x1b0
[   28.406018] sp : ffff80001391bc60
[   28.409358] x29: ffff80001391bc60 x28: 0000000000000000
[   28.414725] x27: ffff000104fc8b40 x26: ffff80001127de88
[   28.420089] x25: 0000000000000000 x24: ffff000106119760
[   28.425452] x23: ffff00010775dd60 x22: ffff00010567e000
[   28.430814] x21: 0000000000000000 x20: ffff80001391bcb0
[   28.436175] x19: ffff00010775deb8 x18: 00000000000000c0
[   28.441537] x17: 0000000000000000 x16: 000000008d9b0e88
[   28.446898] x15: 0000000000000001 x14: 00000000000002ba
[   28.452261] x13: 80a3002c00000002 x12: 00000000000005f4
[   28.457622] x11: 0000000000000030 x10: 000000000000000c
[   28.462985] x9 : 000000000000000c x8 : 0000000000000030
[   28.468346] x7 : ffff800014400000 x6 : ffff000106119758
[   28.473708] x5 : 0000000000000003 x4 : ffff00010775dc60
[   28.479068] x3 : 0000000000000000 x2 : 0000000000000060
[   28.484429] x1 : 000071775f776600 x0 : ffff00010775deb8
[   28.489791] Call trace:
[   28.492259]  get_work_pool+0x48/0x60
[   28.495874]  cancel_delayed_work+0x38/0xb0
[   28.500011]  prestera_port_handle_event+0x90/0xa0 [prestera]
[   28.505743]  prestera_evt_recv+0x98/0xe0 [prestera]
[   28.510683]  prestera_fw_evt_work_fn+0x180/0x228 [prestera_pci]
[   28.516660]  process_one_work+0x1e8/0x360
[   28.520710]  worker_thread+0x44/0x480
[   28.524412]  kthread+0x154/0x160
[   28.527670]  ret_from_fork+0x10/0x38
[   28.531290] Code: a8c17bfd d50323bf d65f03c0 9278dc21 (f9400020)
[   28.537429] ---[ end trace 5eced933df3a080b ]---

Fixes: 501ef3066c89 ("net: marvell: prestera: Add driver for Prestera family ASIC devices")
Signed-off-by: Vadym Kochan <vkochan@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/prestera/prestera_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c
index 25dd903a3e92..d849b0f65de2 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_main.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
@@ -431,7 +431,8 @@ static void prestera_port_handle_event(struct prestera_switch *sw,
 			netif_carrier_on(port->dev);
 			if (!delayed_work_pending(caching_dw))
 				queue_delayed_work(prestera_wq, caching_dw, 0);
-		} else {
+		} else if (netif_running(port->dev) &&
+			   netif_carrier_ok(port->dev)) {
 			netif_carrier_off(port->dev);
 			if (delayed_work_pending(caching_dw))
 				cancel_delayed_work(caching_dw);
-- 
cgit v1.2.3


From d83b8aa5207d81f9f6daec9888390f079cc5db3f Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 20 Apr 2021 18:16:14 +0100
Subject: net: davinci_emac: Fix incorrect masking of tx and rx error channel

The bit-masks used for the TXERRCH and RXERRCH (tx and rx error channels)
are incorrect and always lead to a zero result. The mask values are
currently the incorrect post-right shifted values, fix this by setting
them to the currect values.

(I double checked these against the TMS320TCI6482 data sheet, section
5.30, page 127 to ensure I had the correct mask values for the TXERRCH
and RXERRCH fields in the MACSTATUS register).

Addresses-Coverity: ("Operands don't affect result")
Fixes: a6286ee630f6 ("net: Add TI DaVinci EMAC driver")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/davinci_emac.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index c7031e1960d4..03055c96f076 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -169,11 +169,11 @@ static const char emac_version_string[] = "TI DaVinci EMAC Linux v6.1";
 /* EMAC mac_status register */
 #define EMAC_MACSTATUS_TXERRCODE_MASK	(0xF00000)
 #define EMAC_MACSTATUS_TXERRCODE_SHIFT	(20)
-#define EMAC_MACSTATUS_TXERRCH_MASK	(0x7)
+#define EMAC_MACSTATUS_TXERRCH_MASK	(0x70000)
 #define EMAC_MACSTATUS_TXERRCH_SHIFT	(16)
 #define EMAC_MACSTATUS_RXERRCODE_MASK	(0xF000)
 #define EMAC_MACSTATUS_RXERRCODE_SHIFT	(12)
-#define EMAC_MACSTATUS_RXERRCH_MASK	(0x7)
+#define EMAC_MACSTATUS_RXERRCH_MASK	(0x700)
 #define EMAC_MACSTATUS_RXERRCH_SHIFT	(8)
 
 /* EMAC RX register masks */
-- 
cgit v1.2.3


From a926c025d56bb1acd8a192fca0e307331ee91b30 Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@linaro.org>
Date: Tue, 20 Apr 2021 11:36:22 +0200
Subject: net: wwan: mhi_wwan_ctrl: Fix RX buffer starvation

The mhi_wwan_rx_budget_dec function is supposed to return true if
RX buffer budget has been successfully decremented, allowing to queue
a new RX buffer for transfer. However the current implementation is
broken when RX budget is '1', in which case budget is decremented but
false is returned, preventing to requeue one buffer, and leading to
RX buffer starvation.

Fixes: fa588eba632d ("net: Add Qcom WWAN control driver")
Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/mhi_wwan_ctrl.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wwan/mhi_wwan_ctrl.c b/drivers/net/wwan/mhi_wwan_ctrl.c
index 416ae6abafb7..1bc6b69aa530 100644
--- a/drivers/net/wwan/mhi_wwan_ctrl.c
+++ b/drivers/net/wwan/mhi_wwan_ctrl.c
@@ -58,11 +58,11 @@ static bool mhi_wwan_rx_budget_dec(struct mhi_wwan_dev *mhiwwan)
 
 	spin_lock(&mhiwwan->rx_lock);
 
-	if (mhiwwan->rx_budget)
+	if (mhiwwan->rx_budget) {
 		mhiwwan->rx_budget--;
-
-	if (mhiwwan->rx_budget && test_bit(MHI_WWAN_RX_REFILL, &mhiwwan->flags))
-		ret = true;
+		if (test_bit(MHI_WWAN_RX_REFILL, &mhiwwan->flags))
+			ret = true;
+	}
 
 	spin_unlock(&mhiwwan->rx_lock);
 
-- 
cgit v1.2.3


From f98bf9ee63122d91ab93bfd549c5dd73bf0fd55e Mon Sep 17 00:00:00 2001
From: Guo-Feng Fan <vincent_fann@realtek.com>
Date: Mon, 19 Apr 2021 08:37:46 +0800
Subject: rtw88: 8822c: reorder macro position according to the register number

This patch doesn't change logic at all, just a refactor patch.

1. Move BIT MASK and BIT definition along with the register definition
2. Remove redundant definition
3. Align macros with Tab key

Signed-off-by: Guo-Feng Fan <vincent_fann@realtek.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210419003748.3224-2-pkshih@realtek.com
---
 drivers/net/wireless/realtek/rtw88/rtw8822c.c |  16 +-
 drivers/net/wireless/realtek/rtw88/rtw8822c.h | 275 +++++++++++++-------------
 2 files changed, 148 insertions(+), 143 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
index 9f05c60c8a03..39232a4aa65f 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
@@ -1095,13 +1095,13 @@ static void rtw8822c_pa_bias(struct rtw_dev *rtwdev)
 		if (pg_pa_bias == EFUSE_READ_FAIL)
 			return;
 		pg_pa_bias = FIELD_GET(PPG_PABIAS_MASK, pg_pa_bias);
-		rtw_write_rf(rtwdev, path, 0x60, RF_PABIAS_2G_MASK, pg_pa_bias);
+		rtw_write_rf(rtwdev, path, RF_PA, RF_PABIAS_2G_MASK, pg_pa_bias);
 	}
 	for (path = 0; path < rtwdev->hal.rf_path_num; path++) {
 		rtw_read8_physical_efuse(rtwdev, rf_efuse_5g[path],
 					 &pg_pa_bias);
 		pg_pa_bias = FIELD_GET(PPG_PABIAS_MASK, pg_pa_bias);
-		rtw_write_rf(rtwdev, path, 0x60, RF_PABIAS_5G_MASK, pg_pa_bias);
+		rtw_write_rf(rtwdev, path, RF_PA, RF_PABIAS_5G_MASK, pg_pa_bias);
 	}
 }
 
@@ -2546,9 +2546,9 @@ static void rtw8822c_dpk_pre_setting(struct rtw_dev *rtwdev)
 		rtw_write_rf(rtwdev, path, RF_RXAGC_OFFSET, RFREG_MASK, 0x0);
 		rtw_write32(rtwdev, REG_NCTL0, 0x8 | (path << 1));
 		if (rtwdev->dm_info.dpk_info.dpk_band == RTW_BAND_2G)
-			rtw_write32(rtwdev, REG_DPD_LUT3, 0x1f100000);
+			rtw_write32(rtwdev, REG_DPD_CTL1_S1, 0x1f100000);
 		else
-			rtw_write32(rtwdev, REG_DPD_LUT3, 0x1f0d0000);
+			rtw_write32(rtwdev, REG_DPD_CTL1_S1, 0x1f0d0000);
 		rtw_write32_mask(rtwdev, REG_DPD_LUT0, BIT_GLOSS_DB, 0x4);
 		rtw_write32_mask(rtwdev, REG_IQK_CTL1, BIT_TX_CFIR, 0x3);
 	}
@@ -2566,11 +2566,11 @@ static u32 rtw8822c_dpk_rf_setting(struct rtw_dev *rtwdev, u8 path)
 
 	rtw_write_rf(rtwdev, path, RF_DEBUG, BIT_DE_TX_GAIN, 0x1);
 	rtw_write_rf(rtwdev, path, RF_DEBUG, BIT_DE_PWR_TRIM, 0x1);
-	rtw_write_rf(rtwdev, path, RF_TX_GAIN_OFFSET, BIT_TX_OFFSET_VAL, 0x0);
+	rtw_write_rf(rtwdev, path, RF_TX_GAIN_OFFSET, BIT_BB_GAIN, 0x0);
 	rtw_write_rf(rtwdev, path, RF_TX_GAIN, RFREG_MASK, ori_txbb);
 
 	if (rtwdev->dm_info.dpk_info.dpk_band == RTW_BAND_2G) {
-		rtw_write_rf(rtwdev, path, RF_TX_GAIN_OFFSET, BIT_LB_ATT, 0x1);
+		rtw_write_rf(rtwdev, path, RF_TX_GAIN_OFFSET, BIT_RF_GAIN, 0x1);
 		rtw_write_rf(rtwdev, path, RF_RXG_GAIN, BIT_RXG_GAIN, 0x0);
 	} else {
 		rtw_write_rf(rtwdev, path, RF_TXA_LB_SW, BIT_TXA_LB_ATT, 0x0);
@@ -3317,9 +3317,9 @@ static void rtw8822c_dpk_reload_data(struct rtw_dev *rtwdev)
 		rtw_write32_mask(rtwdev, REG_NCTL0, BIT_SUBPAGE,
 				 0x8 | (path << 1));
 		if (dpk_info->dpk_band == RTW_BAND_2G)
-			rtw_write32(rtwdev, REG_DPD_LUT3, 0x1f100000);
+			rtw_write32(rtwdev, REG_DPD_CTL1_S1, 0x1f100000);
 		else
-			rtw_write32(rtwdev, REG_DPD_LUT3, 0x1f0d0000);
+			rtw_write32(rtwdev, REG_DPD_CTL1_S1, 0x1f0d0000);
 
 		rtw_write8(rtwdev, REG_DPD_AGC, dpk_info->dpk_txagc[path]);
 
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.h b/drivers/net/wireless/realtek/rtw88/rtw8822c.h
index e2b134ce0b3f..04ec1d7ab8f6 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.h
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.h
@@ -164,180 +164,185 @@ const struct rtw_table name ## _tbl = {			\
 
 #define REG_ANAPARLDO_POW_MAC	0x0029
 #define BIT_LDOE25_PON		BIT(0)
-
 #define XCAP_MASK		GENMASK(6, 0)
 #define CFO_TRK_ENABLE_TH	20
 #define CFO_TRK_STOP_TH		10
 #define CFO_TRK_ADJ_TH		10
 
-#define REG_TXDFIR0	0x808
-#define REG_DFIRBW	0x810
-#define REG_ANTMAP0	0x820
-#define REG_ANTMAP	0x824
-#define REG_DYMPRITH	0x86c
-#define REG_DYMENTH0	0x870
-#define REG_DYMENTH	0x874
-#define REG_SBD		0x88c
+#define REG_TXDFIR0		0x808
+#define REG_DFIRBW		0x810
+#define REG_ANTMAP0		0x820
+#define REG_ANTMAP		0x824
+#define REG_DYMPRITH		0x86c
+#define REG_DYMENTH0		0x870
+#define REG_DYMENTH		0x874
+#define REG_SBD			0x88c
 #define BITS_SUBTUNE		GENMASK(15, 12)
-#define REG_DYMTHMIN	0x8a4
-#define REG_TXBWCTL	0x9b0
-#define REG_TXCLK	0x9b4
-#define REG_SCOTRK	0xc30
-#define REG_MRCM	0xc38
-#define REG_AGCSWSH	0xc44
-#define REG_ANTWTPD	0xc54
-#define REG_PT_CHSMO	0xcbc
+#define REG_DYMTHMIN		0x8a4
+
+#define REG_TXBWCTL		0x9b0
+#define REG_TXCLK		0x9b4
+
+#define REG_SCOTRK		0xc30
+#define REG_MRCM		0xc38
+#define REG_AGCSWSH		0xc44
+#define REG_ANTWTPD		0xc54
+#define REG_PT_CHSMO		0xcbc
 #define BIT_PT_OPT		BIT(21)
-#define REG_ORITXCODE	0x1800
-#define REG_3WIRE	0x180c
+
+#define REG_ORITXCODE		0x1800
+#define REG_3WIRE		0x180c
 #define BIT_3WIRE_TX_EN		BIT(0)
 #define BIT_3WIRE_RX_EN		BIT(1)
 #define BIT_3WIRE_PI_ON		BIT(28)
-#define REG_ANAPAR_A	0x1830
+#define REG_ANAPAR_A		0x1830
 #define BIT_ANAPAR_UPDATE	BIT(29)
-#define REG_RXAGCCTL0	0x18ac
+#define REG_RXAGCCTL0		0x18ac
 #define BITS_RXAGC_CCK		GENMASK(15, 12)
 #define BITS_RXAGC_OFDM		GENMASK(8, 4)
-#define REG_DCKA_I_0	0x18bc
-#define REG_DCKA_I_1	0x18c0
-#define REG_DCKA_Q_0	0x18d8
-#define REG_DCKA_Q_1	0x18dc
-#define REG_CCKSB	0x1a00
-#define REG_RXCCKSEL	0x1a04
-#define REG_BGCTRL	0x1a14
+#define REG_DCKA_I_0		0x18bc
+#define REG_DCKA_I_1		0x18c0
+#define REG_DCKA_Q_0		0x18d8
+#define REG_DCKA_Q_1		0x18dc
+
+#define REG_CCKSB		0x1a00
+#define REG_RXCCKSEL		0x1a04
+#define REG_BGCTRL		0x1a14
 #define BITS_RX_IQ_WEIGHT	(BIT(8) | BIT(9))
-#define REG_TXF0	0x1a20
-#define REG_TXF1	0x1a24
-#define REG_TXF2	0x1a28
-#define REG_CCANRX	0x1a2c
+#define REG_TXF0		0x1a20
+#define REG_TXF1		0x1a24
+#define REG_TXF2		0x1a28
+#define REG_CCANRX		0x1a2c
 #define BIT_CCK_FA_RST		(BIT(14) | BIT(15))
 #define BIT_OFDM_FA_RST		(BIT(12) | BIT(13))
-#define REG_CCK_FACNT	0x1a5c
-#define REG_CCKTXONLY	0x1a80
+#define REG_CCK_FACNT		0x1a5c
+#define REG_CCKTXONLY		0x1a80
 #define BIT_BB_CCK_CHECK_EN	BIT(18)
-#define REG_TXF3	0x1a98
-#define REG_TXF4	0x1a9c
-#define REG_TXF5	0x1aa0
-#define REG_TXF6	0x1aac
-#define REG_TXF7	0x1ab0
-#define REG_CCK_SOURCE	0x1abc
+#define REG_TXF3		0x1a98
+#define REG_TXF4		0x1a9c
+#define REG_TXF5		0x1aa0
+#define REG_TXF6		0x1aac
+#define REG_TXF7		0x1ab0
+#define REG_CCK_SOURCE		0x1abc
 #define BIT_NBI_EN		BIT(30)
-#define REG_IQKSTAT	0x1b10
-#define REG_TXANT	0x1c28
-#define REG_ENCCK	0x1c3c
-#define BIT_CCK_BLK_EN		BIT(1)
-#define BIT_CCK_OFDM_BLK_EN	(BIT(0) | BIT(1))
-#define REG_CCAMSK	0x1c80
-#define REG_RSTB	0x1c90
-#define BIT_RSTB_3WIRE		BIT(8)
-#define REG_RX_BREAK	0x1d2c
-#define BIT_COM_RX_GCK_EN	BIT(31)
-#define REG_RXFNCTL	0x1d30
-#define REG_RXIGI	0x1d70
-#define REG_ENFN	0x1e24
-#define REG_TXANTSEG	0x1e28
-#define REG_TXLGMAP	0x1e2c
-#define REG_CCKPATH	0x1e5c
-#define REG_CNT_CTRL	0x1eb4
-#define BIT_ALL_CNT_RST		BIT(25)
-#define REG_OFDM_FACNT	0x2d00
-#define REG_OFDM_FACNT1	0x2d04
-#define REG_OFDM_FACNT2	0x2d08
-#define REG_OFDM_FACNT3	0x2d0c
-#define REG_OFDM_FACNT4	0x2d10
-#define REG_OFDM_FACNT5	0x2d20
-#define REG_RPT_CIP	0x2d9c
-#define REG_OFDM_TXCNT	0x2de0
-#define REG_ORITXCODE2	0x4100
-#define REG_3WIRE2	0x410c
-#define REG_ANAPAR_B	0x4130
-#define REG_RXAGCCTL	0x41ac
-#define REG_DCKB_I_0	0x41bc
-#define REG_DCKB_I_1	0x41c0
-#define REG_DCKB_Q_0	0x41d8
-#define REG_DCKB_Q_1	0x41dc
-
-#define RF_MODE_TRXAGC		0x00
-#define RF_RXAGC_OFFSET		0x19
-#define RF_BW_TRXBB		0x1a
-#define RF_TX_GAIN_OFFSET	0x55
-#define RF_TX_GAIN		0x56
-#define RF_TXA_LB_SW		0x63
-#define RF_RXG_GAIN		0x87
-#define RF_RXA_MIX_GAIN		0x8a
-#define RF_EXT_TIA_BW		0x8f
-#define RF_DEBUG		0xde
 
 #define REG_NCTL0		0x1b00
+#define BIT_SUBPAGE		GENMASK(3, 0)
 #define REG_DPD_CTL0_S0		0x1b04
+#define BIT_GS_PWSF		GENMASK(27, 0)
 #define REG_DPD_CTL1_S0		0x1b08
+#define BIT_DPD_EN		BIT(31)
+#define REG_IQKSTAT		0x1b10
 #define REG_IQK_CTL1		0x1b20
+#define BIT_BYPASS_DPD		BIT(25)
+#define BIT_TX_CFIR		GENMASK(31, 30)
 #define REG_DPD_LUT0		0x1b44
+#define BIT_GLOSS_DB		GENMASK(14, 12)
 #define REG_DPD_CTL0_S1		0x1b5c
-#define REG_DPD_LUT3		0x1b60
 #define REG_DPD_CTL1_S1		0x1b60
 #define REG_DPD_AGC		0x1b67
 #define REG_DPD_CTL0		0x1bb4
 #define REG_R_CONFIG		0x1bcc
+#define BIT_INNER_LB		BIT(21)
+#define BIT_IQ_SWITCH		GENMASK(5, 0)
 #define REG_RXSRAM_CTL		0x1bd4
+#define BIT_RPT_SEL		GENMASK(20, 16)
+#define BIT_DPD_CLK		GENMASK(7, 4)
 #define REG_DPD_CTL11		0x1be4
 #define REG_DPD_CTL12		0x1be8
 #define REG_DPD_CTL15		0x1bf4
 #define REG_DPD_CTL16		0x1bf8
 #define REG_STAT_RPT		0x1bfc
+#define BIT_RPT_DGAIN		GENMASK(27, 16)
 
-#define BIT_EXT_TIA_BW		BIT(1)
-#define BIT_DE_TRXBW		BIT(2)
-#define BIT_DE_TX_GAIN		BIT(16)
-#define BIT_RXG_GAIN		BIT(18)
-#define BIT_DE_PWR_TRIM		BIT(19)
-#define BIT_INNER_LB		BIT(21)
-#define BIT_BYPASS_DPD		BIT(25)
-#define BIT_DPD_EN		BIT(31)
-#define BIT_SUBPAGE		GENMASK(3, 0)
+#define REG_TXANT		0x1c28
+#define REG_ENCCK		0x1c3c
+#define BIT_CCK_BLK_EN		BIT(1)
+#define BIT_CCK_OFDM_BLK_EN	(BIT(0) | BIT(1))
+#define REG_CCAMSK		0x1c80
+#define REG_RSTB		0x1c90
+#define BIT_RSTB_3WIRE		BIT(8)
+
+#define REG_RX_BREAK		0x1d2c
+#define BIT_COM_RX_GCK_EN	BIT(31)
+#define REG_RXFNCTL		0x1d30
+#define REG_RXIGI		0x1d70
+#define REG_ENFN		0x1e24
+#define REG_TXANTSEG		0x1e28
+#define REG_TXLGMAP		0x1e2c
+#define REG_CCKPATH		0x1e5c
+#define REG_CNT_CTRL		0x1eb4
+#define BIT_ALL_CNT_RST		BIT(25)
+
+#define REG_OFDM_FACNT		0x2d00
+#define REG_OFDM_FACNT1		0x2d04
+#define REG_OFDM_FACNT2		0x2d08
+#define REG_OFDM_FACNT3		0x2d0c
+#define REG_OFDM_FACNT4		0x2d10
+#define REG_OFDM_FACNT5		0x2d20
+#define REG_RPT_CIP		0x2d9c
+#define REG_OFDM_TXCNT		0x2de0
+
+#define REG_ORITXCODE2		0x4100
+#define REG_3WIRE2		0x410c
+#define REG_ANAPAR_B		0x4130
+#define REG_RXAGCCTL		0x41ac
+#define REG_DCKB_I_0		0x41bc
+#define REG_DCKB_I_1		0x41c0
+#define REG_DCKB_Q_0		0x41d8
+#define REG_DCKB_Q_1		0x41dc
+
+#define RF_MODE_TRXAGC		0x00
+#define BIT_RXAGC		GENMASK(9, 5)
 #define BIT_TXAGC		GENMASK(4, 0)
+#define RF_RXAGC_OFFSET		0x19
+#define RF_BW_TRXBB		0x1a
+#define BIT_BW_TXBB		GENMASK(14, 12)
+#define BIT_BW_RXBB		GENMASK(11, 10)
+#define RF_TX_GAIN_OFFSET	0x55
+#define BIT_BB_GAIN		GENMASK(18, 14)
+#define BIT_RF_GAIN		GENMASK(4, 2)
+#define RF_TX_GAIN		0x56
 #define BIT_GAIN_TXBB		GENMASK(4, 0)
+#define RF_PA			0x60
+#define RF_PABIAS_2G_MASK	GENMASK(15, 12)
+#define RF_PABIAS_5G_MASK	GENMASK(19, 16)
+#define RF_TXA_LB_SW		0x63
+#define BIT_TXA_LB_ATT		GENMASK(15, 14)
+#define BIT_LB_SW		GENMASK(13, 12)
 #define BIT_LB_ATT		GENMASK(4, 2)
+#define RF_RXG_GAIN		0x87
+#define BIT_RXG_GAIN		BIT(18)
+#define RF_RXA_MIX_GAIN		0x8a
 #define BIT_RXA_MIX_GAIN	GENMASK(4, 3)
-#define BIT_IQ_SWITCH		GENMASK(5, 0)
-#define BIT_DPD_CLK		GENMASK(7, 4)
-#define BIT_RXAGC		GENMASK(9, 5)
-#define BIT_BW_RXBB		GENMASK(11, 10)
-#define BIT_LB_SW		GENMASK(13, 12)
-#define BIT_BW_TXBB		GENMASK(14, 12)
-#define BIT_GLOSS_DB		GENMASK(14, 12)
-#define BIT_TXA_LB_ATT		GENMASK(15, 14)
-#define BIT_TX_OFFSET_VAL	GENMASK(18, 14)
-#define BIT_RPT_SEL		GENMASK(20, 16)
-#define BIT_GS_PWSF		GENMASK(27, 0)
-#define BIT_RPT_DGAIN		GENMASK(27, 16)
-#define BIT_TX_CFIR		GENMASK(31, 30)
-
-#define PPG_THERMAL_A 0x1ef
-#define PPG_THERMAL_B 0x1b0
-#define RF_THEMAL_MASK GENMASK(19, 16)
-#define PPG_2GL_TXAB 0x1d4
-#define PPG_2GM_TXAB 0x1ee
-#define PPG_2GH_TXAB 0x1d2
-#define PPG_2G_A_MASK GENMASK(3, 0)
-#define PPG_2G_B_MASK GENMASK(7, 4)
-#define PPG_5GL1_TXA 0x1ec
-#define PPG_5GL2_TXA 0x1e8
-#define PPG_5GM1_TXA 0x1e4
-#define PPG_5GM2_TXA 0x1e0
-#define PPG_5GH1_TXA 0x1dc
-#define PPG_5GL1_TXB 0x1eb
-#define PPG_5GL2_TXB 0x1e7
-#define PPG_5GM1_TXB 0x1e3
-#define PPG_5GM2_TXB 0x1df
-#define PPG_5GH1_TXB 0x1db
-#define PPG_5G_MASK GENMASK(4, 0)
-#define PPG_PABIAS_2GA 0x1d6
-#define PPG_PABIAS_2GB 0x1d5
-#define PPG_PABIAS_5GA 0x1d8
-#define PPG_PABIAS_5GB 0x1d7
-#define PPG_PABIAS_MASK GENMASK(3, 0)
-#define RF_PABIAS_2G_MASK GENMASK(15, 12)
-#define RF_PABIAS_5G_MASK GENMASK(19, 16)
+#define RF_EXT_TIA_BW		0x8f
+#define RF_DEBUG		0xde
+#define BIT_DE_PWR_TRIM		BIT(19)
+#define BIT_DE_TX_GAIN		BIT(16)
+#define BIT_DE_TRXBW		BIT(2)
 
+#define PPG_THERMAL_B		0x1b0
+#define RF_THEMAL_MASK		GENMASK(19, 16)
+#define PPG_2GH_TXAB		0x1d2
+#define PPG_2G_A_MASK		GENMASK(3, 0)
+#define PPG_2G_B_MASK		GENMASK(7, 4)
+#define PPG_2GL_TXAB		0x1d4
+#define PPG_PABIAS_2GB		0x1d5
+#define PPG_PABIAS_2GA		0x1d6
+#define PPG_PABIAS_MASK		GENMASK(3, 0)
+#define PPG_PABIAS_5GB		0x1d7
+#define PPG_PABIAS_5GA		0x1d8
+#define PPG_5G_MASK		GENMASK(4, 0)
+#define PPG_5GH1_TXB		0x1db
+#define PPG_5GH1_TXA		0x1dc
+#define PPG_5GM2_TXB		0x1df
+#define PPG_5GM2_TXA		0x1e0
+#define PPG_5GM1_TXB		0x1e3
+#define PPG_5GM1_TXA		0x1e4
+#define PPG_5GL2_TXB		0x1e7
+#define PPG_5GL2_TXA		0x1e8
+#define PPG_5GL1_TXB		0x1eb
+#define PPG_5GL1_TXA		0x1ec
+#define PPG_2GM_TXAB		0x1ee
+#define PPG_THERMAL_A		0x1ef
 #endif
-- 
cgit v1.2.3


From 056b239f867274b573f7da15c43ff8afb523e35c Mon Sep 17 00:00:00 2001
From: Guo-Feng Fan <vincent_fann@realtek.com>
Date: Mon, 19 Apr 2021 08:37:47 +0800
Subject: rtw88: 8822c: Add gap-k calibration to improve long range performance

gap-k is a calibration mechanism to eliminate power gaps between
two nearly rate groups.

This mechanism improves performance in long range test by applying
proper power value to those rate groups which have nonlinear power gap.

Signed-off-by: Guo-Feng Fan <vincent_fann@realtek.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210419003748.3224-3-pkshih@realtek.com
---
 drivers/net/wireless/realtek/rtw88/fw.c       |  12 +
 drivers/net/wireless/realtek/rtw88/fw.h       |   5 +
 drivers/net/wireless/realtek/rtw88/main.h     |  23 +
 drivers/net/wireless/realtek/rtw88/reg.h      |   8 +
 drivers/net/wireless/realtek/rtw88/rtw8822c.c | 697 ++++++++++++++++++++++++++
 drivers/net/wireless/realtek/rtw88/rtw8822c.h |  65 ++-
 6 files changed, 809 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c
index c46b34fbe8bf..ea2cd4db1d3c 100644
--- a/drivers/net/wireless/realtek/rtw88/fw.c
+++ b/drivers/net/wireless/realtek/rtw88/fw.c
@@ -350,6 +350,18 @@ void rtw_fw_do_iqk(struct rtw_dev *rtwdev, struct rtw_iqk_para *para)
 }
 EXPORT_SYMBOL(rtw_fw_do_iqk);
 
+void rtw_fw_inform_rfk_status(struct rtw_dev *rtwdev, bool start)
+{
+	u8 h2c_pkt[H2C_PKT_SIZE] = {0};
+
+	SET_H2C_CMD_ID_CLASS(h2c_pkt, H2C_CMD_WIFI_CALIBRATION);
+
+	RFK_SET_INFORM_START(h2c_pkt, start);
+
+	rtw_fw_send_h2c_command(rtwdev, h2c_pkt);
+}
+EXPORT_SYMBOL(rtw_fw_inform_rfk_status);
+
 void rtw_fw_query_bt_info(struct rtw_dev *rtwdev)
 {
 	u8 h2c_pkt[H2C_PKT_SIZE] = {0};
diff --git a/drivers/net/wireless/realtek/rtw88/fw.h b/drivers/net/wireless/realtek/rtw88/fw.h
index 5c89a54475dd..7c5b1d75e26f 100644
--- a/drivers/net/wireless/realtek/rtw88/fw.h
+++ b/drivers/net/wireless/realtek/rtw88/fw.h
@@ -354,6 +354,7 @@ static inline void rtw_h2c_pkt_set_header(u8 *h2c_pkt, u8 sub_id)
 #define H2C_CMD_WL_CH_INFO		0x66
 #define H2C_CMD_QUERY_BT_MP_INFO	0x67
 #define H2C_CMD_BT_WIFI_CONTROL		0x69
+#define H2C_CMD_WIFI_CALIBRATION	0x6d
 
 #define H2C_CMD_KEEP_ALIVE		0x03
 #define H2C_CMD_DISCONNECT_DECISION	0x04
@@ -542,6 +543,9 @@ static inline void rtw_h2c_pkt_set_header(u8 *h2c_pkt, u8 sub_id)
 	le32_get_bits(*((__le32 *)(_header) + 0x01), GENMASK(31, 16))
 #define GET_FW_DUMP_TLV_VAL(_header)					\
 	le32_get_bits(*((__le32 *)(_header) + 0x02), GENMASK(31, 0))
+
+#define RFK_SET_INFORM_START(h2c_pkt, value)				\
+	le32p_replace_bits((__le32 *)(h2c_pkt) + 0x00, value, BIT(8))
 static inline struct rtw_c2h_cmd *get_c2h_from_skb(struct sk_buff *skb)
 {
 	u32 pkt_offset;
@@ -557,6 +561,7 @@ void rtw_fw_send_general_info(struct rtw_dev *rtwdev);
 void rtw_fw_send_phydm_info(struct rtw_dev *rtwdev);
 
 void rtw_fw_do_iqk(struct rtw_dev *rtwdev, struct rtw_iqk_para *para);
+void rtw_fw_inform_rfk_status(struct rtw_dev *rtwdev, bool start);
 void rtw_fw_set_pwr_mode(struct rtw_dev *rtwdev);
 void rtw_fw_set_pg_info(struct rtw_dev *rtwdev);
 void rtw_fw_query_bt_info(struct rtw_dev *rtwdev);
diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index 56a19b5a00fc..1e14dc4ef012 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -1502,6 +1502,27 @@ struct rtw_iqk_info {
 	} result;
 };
 
+enum rtw_rf_band {
+	RF_BAND_2G_CCK,
+	RF_BAND_2G_OFDM,
+	RF_BAND_5G_L,
+	RF_BAND_5G_M,
+	RF_BAND_5G_H,
+	RF_BAND_MAX
+};
+
+#define RF_GAIN_NUM 11
+#define RF_HW_OFFSET_NUM 10
+
+struct rtw_gapk_info {
+	u32 rf3f_bp[RF_BAND_MAX][RF_GAIN_NUM][RTW_RF_PATH_MAX];
+	bool txgapk_bp_done;
+	s8 offset[RF_GAIN_NUM][RTW_RF_PATH_MAX];
+	s8 fianl_offset[RF_GAIN_NUM][RTW_RF_PATH_MAX];
+	u8 read_txgain;
+	u8 channel;
+};
+
 struct rtw_cfo_track {
 	bool is_adjust;
 	u8 crystal_cap;
@@ -1583,6 +1604,8 @@ struct rtw_dm_info {
 	struct ewma_snr ewma_snr[RTW_SNR_NUM];
 
 	struct rtw_iqk_info iqk;
+	struct rtw_gapk_info gapk;
+	bool is_bt_iqk_timeout;
 };
 
 struct rtw_efuse {
diff --git a/drivers/net/wireless/realtek/rtw88/reg.h b/drivers/net/wireless/realtek/rtw88/reg.h
index 95d29f522e27..f5ce75095e90 100644
--- a/drivers/net/wireless/realtek/rtw88/reg.h
+++ b/drivers/net/wireless/realtek/rtw88/reg.h
@@ -129,6 +129,9 @@
 #define REG_MCU_TST_CFG		0x84
 #define VAL_FW_TRIGGER		0x1
 
+#define REG_PMC_DBG_CTRL1	0xa8
+#define BITS_PMC_BT_IQK_STS	GENMASK(22, 21)
+
 #define REG_EFUSE_ACCESS	0x00CF
 #define EFUSE_ACCESS_ON		0x69
 #define EFUSE_ACCESS_OFF	0x00
@@ -360,6 +363,7 @@
 #define REG_TX_PTCL_CTRL	0x0520
 #define BIT_SIFS_BK_EN		BIT(12)
 #define REG_TXPAUSE		0x0522
+#define BIT_AC_QUEUE		GENMASK(7, 0)
 #define REG_RD_CTRL		0x0524
 #define BIT_DIS_TXOP_CFE	BIT(10)
 #define BIT_DIS_LSIG_CFE	BIT(9)
@@ -644,10 +648,13 @@
 #define RF_WLSEL	0x02
 #define RF_DTXLOK	0x08
 #define RF_CFGCH	0x18
+#define BIT_BAND	GENMASK(18, 16)
 #define RF_RCK		0x1d
 #define RF_LUTWA	0x33
 #define RF_LUTWD1	0x3e
 #define RF_LUTWD0	0x3f
+#define BIT_GAIN_EXT	BIT(12)
+#define BIT_DATA_L	GENMASK(11, 0)
 #define RF_T_METER	0x42
 #define RF_BSPAD	0x54
 #define RF_GAINTX	0x56
@@ -664,6 +671,7 @@
 #define RF_RCKD		0xde
 #define RF_TXADBG	0xde
 #define RF_LUTDBG	0xdf
+#define BIT_TXA_TANK	BIT(4)
 #define RF_LUTWE2	0xee
 #define RF_LUTWE	0xef
 
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
index 39232a4aa65f..daa5150de87b 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
@@ -1105,6 +1105,700 @@ static void rtw8822c_pa_bias(struct rtw_dev *rtwdev)
 	}
 }
 
+static void rtw8822c_rfk_handshake(struct rtw_dev *rtwdev, bool is_before_k)
+{
+	struct rtw_dm_info *dm = &rtwdev->dm_info;
+	u8 u1b_tmp;
+	u8 u4b_tmp;
+	int ret;
+
+	if (is_before_k) {
+		rtw_dbg(rtwdev, RTW_DBG_RFK,
+			"[RFK] WiFi / BT RFK handshake start!!\n");
+
+		if (!dm->is_bt_iqk_timeout) {
+			ret = read_poll_timeout(rtw_read32_mask, u4b_tmp,
+						u4b_tmp == 0, 20, 600000, false,
+						rtwdev, REG_PMC_DBG_CTRL1,
+						BITS_PMC_BT_IQK_STS);
+			if (ret) {
+				rtw_dbg(rtwdev, RTW_DBG_RFK,
+					"[RFK] Wait BT IQK finish timeout!!\n");
+				dm->is_bt_iqk_timeout = true;
+			}
+		}
+
+		rtw_fw_inform_rfk_status(rtwdev, true);
+
+		ret = read_poll_timeout(rtw_read8_mask, u1b_tmp,
+					u1b_tmp == 1, 20, 100000, false,
+					rtwdev, REG_ARFR4, BIT_WL_RFK);
+		if (ret)
+			rtw_dbg(rtwdev, RTW_DBG_RFK,
+				"[RFK] Send WiFi RFK start H2C cmd FAIL!!\n");
+	} else {
+		rtw_fw_inform_rfk_status(rtwdev, false);
+		ret = read_poll_timeout(rtw_read8_mask, u1b_tmp,
+					u1b_tmp == 1, 20, 100000, false,
+					rtwdev, REG_ARFR4,
+					BIT_WL_RFK);
+		if (ret)
+			rtw_dbg(rtwdev, RTW_DBG_RFK,
+				"[RFK] Send WiFi RFK finish H2C cmd FAIL!!\n");
+
+		rtw_dbg(rtwdev, RTW_DBG_RFK,
+			"[RFK] WiFi / BT RFK handshake finish!!\n");
+	}
+}
+
+static void rtw8822c_rfk_power_save(struct rtw_dev *rtwdev,
+				    bool is_power_save)
+{
+	u8 path;
+
+	for (path = 0; path < rtwdev->hal.rf_path_num; path++) {
+		rtw_write32_mask(rtwdev, REG_NCTL0, BIT_SEL_PATH, path);
+		rtw_write32_mask(rtwdev, REG_DPD_CTL1_S0, BIT_PS_EN,
+				 is_power_save ? 0 : 1);
+	}
+}
+
+static void rtw8822c_txgapk_backup_bb_reg(struct rtw_dev *rtwdev, const u32 reg[],
+					  u32 reg_backup[], u32 reg_num)
+{
+	u32 i;
+
+	for (i = 0; i < reg_num; i++) {
+		reg_backup[i] = rtw_read32(rtwdev, reg[i]);
+
+		rtw_dbg(rtwdev, RTW_DBG_RFK, "[TXGAPK] Backup BB 0x%x = 0x%x\n",
+			reg[i], reg_backup[i]);
+	}
+}
+
+static void rtw8822c_txgapk_reload_bb_reg(struct rtw_dev *rtwdev,
+					  const u32 reg[], u32 reg_backup[],
+					  u32 reg_num)
+{
+	u32 i;
+
+	for (i = 0; i < reg_num; i++) {
+		rtw_write32(rtwdev, reg[i], reg_backup[i]);
+		rtw_dbg(rtwdev, RTW_DBG_RFK, "[TXGAPK] Reload BB 0x%x = 0x%x\n",
+			reg[i], reg_backup[i]);
+	}
+}
+
+static bool check_rf_status(struct rtw_dev *rtwdev, u8 status)
+{
+	u8 reg_rf0_a, reg_rf0_b;
+
+	reg_rf0_a = (u8)rtw_read_rf(rtwdev, RF_PATH_A,
+				    RF_MODE_TRXAGC, BIT_RF_MODE);
+	reg_rf0_b = (u8)rtw_read_rf(rtwdev, RF_PATH_B,
+				    RF_MODE_TRXAGC, BIT_RF_MODE);
+
+	if (reg_rf0_a == status || reg_rf0_b == status)
+		return false;
+
+	return true;
+}
+
+static void rtw8822c_txgapk_tx_pause(struct rtw_dev *rtwdev)
+{
+	bool status;
+	int ret;
+
+	rtw_write8(rtwdev, REG_TXPAUSE, BIT_AC_QUEUE);
+	rtw_write32_mask(rtwdev, REG_TX_FIFO, BIT_STOP_TX, 0x2);
+
+	ret = read_poll_timeout_atomic(check_rf_status, status, status,
+				       2, 5000, false, rtwdev, 2);
+	if (ret)
+		rtw_warn(rtwdev, "failed to pause TX\n");
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[TXGAPK] Tx pause!!\n");
+}
+
+static void rtw8822c_txgapk_bb_dpk(struct rtw_dev *rtwdev, u8 path)
+{
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[TXGAPK] ======>%s\n", __func__);
+
+	rtw_write32_mask(rtwdev, REG_ENFN, BIT_IQK_DPK_EN, 0x1);
+	rtw_write32_mask(rtwdev, REG_CH_DELAY_EXTR2,
+			 BIT_IQK_DPK_CLOCK_SRC, 0x1);
+	rtw_write32_mask(rtwdev, REG_CH_DELAY_EXTR2,
+			 BIT_IQK_DPK_RESET_SRC, 0x1);
+	rtw_write32_mask(rtwdev, REG_CH_DELAY_EXTR2, BIT_EN_IOQ_IQK_DPK, 0x1);
+	rtw_write32_mask(rtwdev, REG_CH_DELAY_EXTR2, BIT_TST_IQK2SET_SRC, 0x0);
+	rtw_write32_mask(rtwdev, REG_CCA_OFF, BIT_CCA_ON_BY_PW, 0x1ff);
+
+	if (path == RF_PATH_A) {
+		rtw_write32_mask(rtwdev, REG_RFTXEN_GCK_A,
+				 BIT_RFTXEN_GCK_FORCE_ON, 0x1);
+		rtw_write32_mask(rtwdev, REG_3WIRE, BIT_DIS_SHARERX_TXGAT, 0x1);
+		rtw_write32_mask(rtwdev, REG_DIS_SHARE_RX_A,
+				 BIT_TX_SCALE_0DB, 0x1);
+		rtw_write32_mask(rtwdev, REG_3WIRE, BIT_3WIRE_EN, 0x0);
+	} else if (path == RF_PATH_B) {
+		rtw_write32_mask(rtwdev, REG_RFTXEN_GCK_B,
+				 BIT_RFTXEN_GCK_FORCE_ON, 0x1);
+		rtw_write32_mask(rtwdev, REG_3WIRE2,
+				 BIT_DIS_SHARERX_TXGAT, 0x1);
+		rtw_write32_mask(rtwdev, REG_DIS_SHARE_RX_B,
+				 BIT_TX_SCALE_0DB, 0x1);
+		rtw_write32_mask(rtwdev, REG_3WIRE2, BIT_3WIRE_EN, 0x0);
+	}
+	rtw_write32_mask(rtwdev, REG_CCKSB, BIT_BBMODE, 0x2);
+}
+
+static void rtw8822c_txgapk_afe_dpk(struct rtw_dev *rtwdev, u8 path)
+{
+	u32 reg;
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[TXGAPK] ======>%s\n", __func__);
+
+	if (path == RF_PATH_A) {
+		reg = REG_ANAPAR_A;
+	} else if (path == RF_PATH_B) {
+		reg = REG_ANAPAR_B;
+	} else {
+		rtw_err(rtwdev, "[TXGAPK] unknown path %d!!\n", path);
+		return;
+	}
+
+	rtw_write32_mask(rtwdev, REG_IQK_CTRL, MASKDWORD, MASKDWORD);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x700f0001);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x700f0001);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x701f0001);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x702f0001);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x703f0001);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x704f0001);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x705f0001);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x706f0001);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x707f0001);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x708f0001);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x709f0001);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x70af0001);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x70bf0001);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x70cf0001);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x70df0001);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x70ef0001);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x70ff0001);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x70ff0001);
+}
+
+static void rtw8822c_txgapk_afe_dpk_restore(struct rtw_dev *rtwdev, u8 path)
+{
+	u32 reg;
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[TXGAPK] ======>%s\n", __func__);
+
+	if (path == RF_PATH_A) {
+		reg = REG_ANAPAR_A;
+	} else if (path == RF_PATH_B) {
+		reg = REG_ANAPAR_B;
+	} else {
+		rtw_err(rtwdev, "[TXGAPK] unknown path %d!!\n", path);
+		return;
+	}
+	rtw_write32_mask(rtwdev, REG_IQK_CTRL, MASKDWORD, 0xffa1005e);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x700b8041);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x70144041);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x70244041);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x70344041);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x70444041);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x705b8041);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x70644041);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x707b8041);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x708b8041);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x709b8041);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x70ab8041);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x70bb8041);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x70cb8041);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x70db8041);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x70eb8041);
+	rtw_write32_mask(rtwdev, reg, MASKDWORD, 0x70fb8041);
+}
+
+static void rtw8822c_txgapk_bb_dpk_restore(struct rtw_dev *rtwdev, u8 path)
+{
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[TXGAPK] ======>%s\n", __func__);
+
+	rtw_write_rf(rtwdev, path, RF_DEBUG, BIT_DE_TX_GAIN, 0x0);
+	rtw_write_rf(rtwdev, path, RF_DIS_BYPASS_TXBB, BIT_TIA_BYPASS, 0x0);
+	rtw_write_rf(rtwdev, path, RF_DIS_BYPASS_TXBB, BIT_TXBB, 0x0);
+
+	rtw_write32_mask(rtwdev, REG_NCTL0, BIT_SEL_PATH, 0x0);
+	rtw_write32_mask(rtwdev, REG_IQK_CTL1, BIT_TX_CFIR, 0x0);
+	rtw_write32_mask(rtwdev, REG_SINGLE_TONE_SW, BIT_IRQ_TEST_MODE, 0x0);
+	rtw_write32_mask(rtwdev, REG_R_CONFIG, MASKBYTE0, 0x00);
+	rtw_write32_mask(rtwdev, REG_NCTL0, BIT_SEL_PATH, 0x1);
+	rtw_write32_mask(rtwdev, REG_IQK_CTL1, BIT_TX_CFIR, 0x0);
+	rtw_write32_mask(rtwdev, REG_SINGLE_TONE_SW, BIT_IRQ_TEST_MODE, 0x0);
+	rtw_write32_mask(rtwdev, REG_R_CONFIG, MASKBYTE0, 0x00);
+	rtw_write32_mask(rtwdev, REG_NCTL0, BIT_SEL_PATH, 0x0);
+	rtw_write32_mask(rtwdev, REG_CCA_OFF, BIT_CCA_ON_BY_PW, 0x0);
+
+	if (path == RF_PATH_A) {
+		rtw_write32_mask(rtwdev, REG_RFTXEN_GCK_A,
+				 BIT_RFTXEN_GCK_FORCE_ON, 0x0);
+		rtw_write32_mask(rtwdev, REG_3WIRE, BIT_DIS_SHARERX_TXGAT, 0x0);
+		rtw_write32_mask(rtwdev, REG_DIS_SHARE_RX_A,
+				 BIT_TX_SCALE_0DB, 0x0);
+		rtw_write32_mask(rtwdev, REG_3WIRE, BIT_3WIRE_EN, 0x3);
+	} else if (path == RF_PATH_B) {
+		rtw_write32_mask(rtwdev, REG_RFTXEN_GCK_B,
+				 BIT_RFTXEN_GCK_FORCE_ON, 0x0);
+		rtw_write32_mask(rtwdev, REG_3WIRE2,
+				 BIT_DIS_SHARERX_TXGAT, 0x0);
+		rtw_write32_mask(rtwdev, REG_DIS_SHARE_RX_B,
+				 BIT_TX_SCALE_0DB, 0x0);
+		rtw_write32_mask(rtwdev, REG_3WIRE2, BIT_3WIRE_EN, 0x3);
+	}
+
+	rtw_write32_mask(rtwdev, REG_CCKSB, BIT_BBMODE, 0x0);
+	rtw_write32_mask(rtwdev, REG_IQK_CTL1, BIT_CFIR_EN, 0x5);
+}
+
+static bool _rtw8822c_txgapk_gain_valid(struct rtw_dev *rtwdev, u32 gain)
+{
+	if ((FIELD_GET(BIT_GAIN_TX_PAD_H, gain) >= 0xc) &&
+	    (FIELD_GET(BIT_GAIN_TX_PAD_L, gain) >= 0xe))
+		return true;
+
+	return false;
+}
+
+static void _rtw8822c_txgapk_write_gain_bb_table(struct rtw_dev *rtwdev,
+						 u8 band, u8 path)
+{
+	struct rtw_gapk_info *txgapk = &rtwdev->dm_info.gapk;
+	u32 v, tmp_3f = 0;
+	u8 gain, check_txgain;
+
+	rtw_write32_mask(rtwdev, REG_NCTL0, BIT_SEL_PATH, path);
+
+	switch (band) {
+	case RF_BAND_2G_OFDM:
+		rtw_write32_mask(rtwdev, REG_TABLE_SEL, BIT_Q_GAIN_SEL, 0x0);
+		break;
+	case RF_BAND_5G_L:
+		rtw_write32_mask(rtwdev, REG_TABLE_SEL, BIT_Q_GAIN_SEL, 0x2);
+		break;
+	case RF_BAND_5G_M:
+		rtw_write32_mask(rtwdev, REG_TABLE_SEL, BIT_Q_GAIN_SEL, 0x3);
+		break;
+	case RF_BAND_5G_H:
+		rtw_write32_mask(rtwdev, REG_TABLE_SEL, BIT_Q_GAIN_SEL, 0x4);
+		break;
+	default:
+		break;
+	}
+
+	rtw_write32_mask(rtwdev, REG_TX_GAIN_SET, MASKBYTE0, 0x88);
+
+	check_txgain = 0;
+	for (gain = 0; gain < RF_GAIN_NUM; gain++) {
+		v = txgapk->rf3f_bp[band][gain][path];
+		if (_rtw8822c_txgapk_gain_valid(rtwdev, v)) {
+			if (!check_txgain) {
+				tmp_3f = txgapk->rf3f_bp[band][gain][path];
+				check_txgain = 1;
+			}
+			rtw_dbg(rtwdev, RTW_DBG_RFK,
+				"[TXGAPK] tx_gain=0x%03X >= 0xCEX\n",
+				txgapk->rf3f_bp[band][gain][path]);
+		} else {
+			tmp_3f = txgapk->rf3f_bp[band][gain][path];
+		}
+
+		rtw_write32_mask(rtwdev, REG_TABLE_SEL, BIT_Q_GAIN, tmp_3f);
+		rtw_write32_mask(rtwdev, REG_TABLE_SEL, BIT_I_GAIN, gain);
+		rtw_write32_mask(rtwdev, REG_TABLE_SEL, BIT_GAIN_RST, 0x1);
+		rtw_write32_mask(rtwdev, REG_TABLE_SEL, BIT_GAIN_RST, 0x0);
+
+		rtw_dbg(rtwdev, RTW_DBG_RFK,
+			"[TXGAPK] Band=%d 0x1b98[11:0]=0x%03X path=%d\n",
+			band, tmp_3f, path);
+	}
+}
+
+static void rtw8822c_txgapk_write_gain_bb_table(struct rtw_dev *rtwdev)
+{
+	u8 path, band;
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[TXGAPK] ======>%s channel=%d\n",
+		__func__, rtwdev->dm_info.gapk.channel);
+
+	for (band = 0; band < RF_BAND_MAX; band++) {
+		for (path = 0; path < rtwdev->hal.rf_path_num; path++) {
+			_rtw8822c_txgapk_write_gain_bb_table(rtwdev,
+							     band, path);
+		}
+	}
+}
+
+static void rtw8822c_txgapk_read_offset(struct rtw_dev *rtwdev, u8 path)
+{
+	static const u32 cfg1_1b00[2] = {0x00000d18, 0x00000d2a};
+	static const u32 cfg2_1b00[2] = {0x00000d19, 0x00000d2b};
+	static const u32 set_pi[2] = {REG_RSV_CTRL, REG_WLRF1};
+	static const u32 path_setting[2] = {REG_ORITXCODE, REG_ORITXCODE2};
+	struct rtw_gapk_info *txgapk = &rtwdev->dm_info.gapk;
+	u8 channel = txgapk->channel;
+	u32 val;
+	int i;
+
+	if (path >= ARRAY_SIZE(cfg1_1b00) ||
+	    path >= ARRAY_SIZE(cfg2_1b00) ||
+	    path >= ARRAY_SIZE(set_pi) ||
+	    path >= ARRAY_SIZE(path_setting)) {
+		rtw_warn(rtwdev, "[TXGAPK] wrong path %d\n", path);
+		return;
+	}
+
+	rtw_write32_mask(rtwdev, REG_ANTMAP0, BIT_ANT_PATH, path + 1);
+	rtw_write32_mask(rtwdev, REG_TXLGMAP, MASKDWORD, 0xe4e40000);
+	rtw_write32_mask(rtwdev, REG_TXANTSEG, BIT_ANTSEG, 0x3);
+	rtw_write32_mask(rtwdev, path_setting[path], MASK20BITS, 0x33312);
+	rtw_write32_mask(rtwdev, path_setting[path], BIT_PATH_EN, 0x1);
+	rtw_write32_mask(rtwdev, set_pi[path], BITS_RFC_DIRECT, 0x0);
+	rtw_write_rf(rtwdev, path, RF_LUTDBG, BIT_TXA_TANK, 0x1);
+	rtw_write_rf(rtwdev, path, RF_IDAC, BIT_TX_MODE, 0x820);
+	rtw_write32_mask(rtwdev, REG_NCTL0, BIT_SEL_PATH, path);
+	rtw_write32_mask(rtwdev, REG_IQKSTAT, MASKBYTE0, 0x0);
+
+	rtw_write32_mask(rtwdev, REG_TX_TONE_IDX, MASKBYTE0, 0x018);
+	fsleep(1000);
+	if (channel >= 1 && channel <= 14)
+		rtw_write32_mask(rtwdev, REG_R_CONFIG, MASKBYTE0, BIT_2G_SWING);
+	else
+		rtw_write32_mask(rtwdev, REG_R_CONFIG, MASKBYTE0, BIT_5G_SWING);
+	fsleep(1000);
+
+	rtw_write32_mask(rtwdev, REG_NCTL0, MASKDWORD, cfg1_1b00[path]);
+	rtw_write32_mask(rtwdev, REG_NCTL0, MASKDWORD, cfg2_1b00[path]);
+
+	read_poll_timeout(rtw_read32_mask, val,
+			  val == 0x55, 1000, 100000, false,
+			  rtwdev, REG_RPT_CIP, BIT_RPT_CIP_STATUS);
+
+	rtw_write32_mask(rtwdev, set_pi[path], BITS_RFC_DIRECT, 0x2);
+	rtw_write32_mask(rtwdev, REG_NCTL0, BIT_SEL_PATH, path);
+	rtw_write32_mask(rtwdev, REG_RXSRAM_CTL, BIT_RPT_EN, 0x1);
+	rtw_write32_mask(rtwdev, REG_RXSRAM_CTL, BIT_RPT_SEL, 0x12);
+	rtw_write32_mask(rtwdev, REG_TX_GAIN_SET, BIT_GAPK_RPT_IDX, 0x3);
+	val = rtw_read32(rtwdev, REG_STAT_RPT);
+
+	txgapk->offset[0][path] = (s8)FIELD_GET(BIT_GAPK_RPT0, val);
+	txgapk->offset[1][path] = (s8)FIELD_GET(BIT_GAPK_RPT1, val);
+	txgapk->offset[2][path] = (s8)FIELD_GET(BIT_GAPK_RPT2, val);
+	txgapk->offset[3][path] = (s8)FIELD_GET(BIT_GAPK_RPT3, val);
+	txgapk->offset[4][path] = (s8)FIELD_GET(BIT_GAPK_RPT4, val);
+	txgapk->offset[5][path] = (s8)FIELD_GET(BIT_GAPK_RPT5, val);
+	txgapk->offset[6][path] = (s8)FIELD_GET(BIT_GAPK_RPT6, val);
+	txgapk->offset[7][path] = (s8)FIELD_GET(BIT_GAPK_RPT7, val);
+
+	rtw_write32_mask(rtwdev, REG_TX_GAIN_SET, BIT_GAPK_RPT_IDX, 0x4);
+	val = rtw_read32(rtwdev, REG_STAT_RPT);
+
+	txgapk->offset[8][path] = (s8)FIELD_GET(BIT_GAPK_RPT0, val);
+	txgapk->offset[9][path] = (s8)FIELD_GET(BIT_GAPK_RPT1, val);
+
+	for (i = 0; i < RF_HW_OFFSET_NUM; i++)
+		if (txgapk->offset[i][path] & BIT(3))
+			txgapk->offset[i][path] = txgapk->offset[i][path] |
+						  0xf0;
+	for (i = 0; i < RF_HW_OFFSET_NUM; i++)
+		rtw_dbg(rtwdev, RTW_DBG_RFK,
+			"[TXGAPK] offset %d %d path=%d\n",
+			txgapk->offset[i][path], i, path);
+}
+
+static void rtw8822c_txgapk_calculate_offset(struct rtw_dev *rtwdev, u8 path)
+{
+	static const u32 bb_reg[] = {REG_ANTMAP0, REG_TXLGMAP, REG_TXANTSEG,
+				     REG_ORITXCODE, REG_ORITXCODE2};
+	struct rtw_gapk_info *txgapk = &rtwdev->dm_info.gapk;
+	u8 channel = txgapk->channel;
+	u32 reg_backup[ARRAY_SIZE(bb_reg)] = {0};
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[TXGAPK] ======>%s channel=%d\n",
+		__func__, channel);
+
+	rtw8822c_txgapk_backup_bb_reg(rtwdev, bb_reg,
+				      reg_backup, ARRAY_SIZE(bb_reg));
+
+	if (channel >= 1 && channel <= 14) {
+		rtw_write32_mask(rtwdev,
+				 REG_SINGLE_TONE_SW, BIT_IRQ_TEST_MODE, 0x0);
+		rtw_write32_mask(rtwdev, REG_NCTL0, BIT_SEL_PATH, path);
+		rtw_write32_mask(rtwdev, REG_R_CONFIG, BIT_IQ_SWITCH, 0x3f);
+		rtw_write32_mask(rtwdev, REG_IQK_CTL1, BIT_TX_CFIR, 0x0);
+		rtw_write_rf(rtwdev, path, RF_DEBUG, BIT_DE_TX_GAIN, 0x1);
+		rtw_write_rf(rtwdev, path, RF_MODE_TRXAGC, RFREG_MASK, 0x5000f);
+		rtw_write_rf(rtwdev, path, RF_TX_GAIN_OFFSET, BIT_RF_GAIN, 0x0);
+		rtw_write_rf(rtwdev, path, RF_RXG_GAIN, BIT_RXG_GAIN, 0x1);
+		rtw_write_rf(rtwdev, path, RF_MODE_TRXAGC, BIT_RXAGC, 0x0f);
+		rtw_write_rf(rtwdev, path, RF_DEBUG, BIT_DE_TRXBW, 0x1);
+		rtw_write_rf(rtwdev, path, RF_BW_TRXBB, BIT_BW_TXBB, 0x1);
+		rtw_write_rf(rtwdev, path, RF_BW_TRXBB, BIT_BW_RXBB, 0x0);
+		rtw_write_rf(rtwdev, path, RF_EXT_TIA_BW, BIT_PW_EXT_TIA, 0x1);
+
+		rtw_write32_mask(rtwdev, REG_IQKSTAT, MASKBYTE0, 0x00);
+		rtw_write32_mask(rtwdev, REG_TABLE_SEL, BIT_Q_GAIN_SEL, 0x0);
+
+		rtw8822c_txgapk_read_offset(rtwdev, path);
+		rtw_dbg(rtwdev, RTW_DBG_RFK, "=============================\n");
+
+	} else {
+		rtw_write32_mask(rtwdev,
+				 REG_SINGLE_TONE_SW, BIT_IRQ_TEST_MODE, 0x0);
+		rtw_write32_mask(rtwdev, REG_NCTL0, BIT_SEL_PATH, path);
+		rtw_write32_mask(rtwdev, REG_R_CONFIG, BIT_IQ_SWITCH, 0x3f);
+		rtw_write32_mask(rtwdev, REG_IQK_CTL1, BIT_TX_CFIR, 0x0);
+		rtw_write_rf(rtwdev, path, RF_DEBUG, BIT_DE_TX_GAIN, 0x1);
+		rtw_write_rf(rtwdev, path, RF_MODE_TRXAGC, RFREG_MASK, 0x50011);
+		rtw_write_rf(rtwdev, path, RF_TXA_LB_SW, BIT_TXA_LB_ATT, 0x3);
+		rtw_write_rf(rtwdev, path, RF_TXA_LB_SW, BIT_LB_ATT, 0x3);
+		rtw_write_rf(rtwdev, path, RF_TXA_LB_SW, BIT_LB_SW, 0x1);
+		rtw_write_rf(rtwdev, path,
+			     RF_RXA_MIX_GAIN, BIT_RXA_MIX_GAIN, 0x2);
+		rtw_write_rf(rtwdev, path, RF_MODE_TRXAGC, BIT_RXAGC, 0x12);
+		rtw_write_rf(rtwdev, path, RF_DEBUG, BIT_DE_TRXBW, 0x1);
+		rtw_write_rf(rtwdev, path, RF_BW_TRXBB, BIT_BW_RXBB, 0x0);
+		rtw_write_rf(rtwdev, path, RF_EXT_TIA_BW, BIT_PW_EXT_TIA, 0x1);
+		rtw_write_rf(rtwdev, path, RF_MODE_TRXAGC, BIT_RF_MODE, 0x5);
+
+		rtw_write32_mask(rtwdev, REG_IQKSTAT, MASKBYTE0, 0x0);
+
+		if (channel >= 36 && channel <= 64)
+			rtw_write32_mask(rtwdev,
+					 REG_TABLE_SEL, BIT_Q_GAIN_SEL, 0x2);
+		else if (channel >= 100 && channel <= 144)
+			rtw_write32_mask(rtwdev,
+					 REG_TABLE_SEL, BIT_Q_GAIN_SEL, 0x3);
+		else if (channel >= 149 && channel <= 177)
+			rtw_write32_mask(rtwdev,
+					 REG_TABLE_SEL, BIT_Q_GAIN_SEL, 0x4);
+
+		rtw8822c_txgapk_read_offset(rtwdev, path);
+		rtw_dbg(rtwdev, RTW_DBG_RFK, "=============================\n");
+	}
+	rtw8822c_txgapk_reload_bb_reg(rtwdev, bb_reg,
+				      reg_backup, ARRAY_SIZE(bb_reg));
+}
+
+static void rtw8822c_txgapk_rf_restore(struct rtw_dev *rtwdev, u8 path)
+{
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[TXGAPK] ======>%s\n", __func__);
+
+	if (path >= rtwdev->hal.rf_path_num)
+		return;
+
+	rtw_write_rf(rtwdev, path, RF_MODE_TRXAGC, BIT_RF_MODE, 0x3);
+	rtw_write_rf(rtwdev, path, RF_DEBUG, BIT_DE_TRXBW, 0x0);
+	rtw_write_rf(rtwdev, path, RF_EXT_TIA_BW, BIT_PW_EXT_TIA, 0x0);
+}
+
+static u32 rtw8822c_txgapk_cal_gain(struct rtw_dev *rtwdev, u32 gain, s8 offset)
+{
+	u32 gain_x2, new_gain;
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[TXGAPK] ======>%s\n", __func__);
+
+	if (_rtw8822c_txgapk_gain_valid(rtwdev, gain)) {
+		new_gain = gain;
+		rtw_dbg(rtwdev, RTW_DBG_RFK,
+			"[TXGAPK] gain=0x%03X(>=0xCEX) offset=%d new_gain=0x%03X\n",
+			gain, offset, new_gain);
+		return new_gain;
+	}
+
+	gain_x2 = (gain << 1) + offset;
+	new_gain = (gain_x2 >> 1) | (gain_x2 & BIT(0) ? BIT_GAIN_EXT : 0);
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK,
+		"[TXGAPK] gain=0x%X offset=%d new_gain=0x%X\n",
+		gain, offset, new_gain);
+
+	return new_gain;
+}
+
+static void rtw8822c_txgapk_write_tx_gain(struct rtw_dev *rtwdev)
+{
+	struct rtw_gapk_info *txgapk = &rtwdev->dm_info.gapk;
+	u32 i, j, tmp = 0x20, tmp_3f, v;
+	s8 offset_tmp[RF_GAIN_NUM] = {0};
+	u8 path, band = RF_BAND_2G_OFDM, channel = txgapk->channel;
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[TXGAPK] ======>%s\n", __func__);
+
+	if (channel >= 1 && channel <= 14) {
+		tmp = 0x20;
+		band = RF_BAND_2G_OFDM;
+	} else if (channel >= 36 && channel <= 64) {
+		tmp = 0x200;
+		band = RF_BAND_5G_L;
+	} else if (channel >= 100 && channel <= 144) {
+		tmp = 0x280;
+		band = RF_BAND_5G_M;
+	} else if (channel >= 149 && channel <= 177) {
+		tmp = 0x300;
+		band = RF_BAND_5G_H;
+	} else {
+		rtw_err(rtwdev, "[TXGAPK] unknown channel %d!!\n", channel);
+		return;
+	}
+
+	for (path = 0; path < rtwdev->hal.rf_path_num; path++) {
+		for (i = 0; i < RF_GAIN_NUM; i++) {
+			offset_tmp[i] = 0;
+			for (j = i; j < RF_GAIN_NUM; j++) {
+				v = txgapk->rf3f_bp[band][j][path];
+				if (_rtw8822c_txgapk_gain_valid(rtwdev, v))
+					continue;
+
+				offset_tmp[i] += txgapk->offset[j][path];
+				txgapk->fianl_offset[i][path] = offset_tmp[i];
+			}
+
+			v = txgapk->rf3f_bp[band][i][path];
+			if (_rtw8822c_txgapk_gain_valid(rtwdev, v))
+				rtw_dbg(rtwdev, RTW_DBG_RFK,
+					"[TXGAPK] tx_gain=0x%03X >= 0xCEX\n",
+					txgapk->rf3f_bp[band][i][path]);
+			else
+				rtw_dbg(rtwdev, RTW_DBG_RFK,
+					"[TXGAPK] offset %d %d\n",
+					offset_tmp[i], i);
+		}
+
+		rtw_write_rf(rtwdev, path, RF_LUTWE2, RFREG_MASK, 0x10000);
+		for (i = 0; i < RF_GAIN_NUM; i++) {
+			rtw_write_rf(rtwdev, path,
+				     RF_LUTWA, RFREG_MASK, tmp + i);
+
+			tmp_3f = rtw8822c_txgapk_cal_gain(rtwdev,
+							  txgapk->rf3f_bp[band][i][path],
+							  offset_tmp[i]);
+			rtw_write_rf(rtwdev, path, RF_LUTWD0,
+				     BIT_GAIN_EXT | BIT_DATA_L, tmp_3f);
+
+			rtw_dbg(rtwdev, RTW_DBG_RFK,
+				"[TXGAPK] 0x33=0x%05X 0x3f=0x%04X\n",
+				tmp + i, tmp_3f);
+		}
+		rtw_write_rf(rtwdev, path, RF_LUTWE2, RFREG_MASK, 0x0);
+	}
+}
+
+static void rtw8822c_txgapk_save_all_tx_gain_table(struct rtw_dev *rtwdev)
+{
+	struct rtw_gapk_info *txgapk = &rtwdev->dm_info.gapk;
+	static const u32 three_wire[2] = {REG_3WIRE, REG_3WIRE2};
+	static const u8 ch_num[RF_BAND_MAX] = {1, 1, 36, 100, 149};
+	static const u8 band_num[RF_BAND_MAX] = {0x0, 0x0, 0x1, 0x3, 0x5};
+	static const u8 cck[RF_BAND_MAX] = {0x1, 0x0, 0x0, 0x0, 0x0};
+	u8 path, band, gain, rf0_idx;
+	u32 rf18, v;
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[TXGAPK] ======>%s\n", __func__);
+
+	if (txgapk->read_txgain == 1) {
+		rtw_dbg(rtwdev, RTW_DBG_RFK,
+			"[TXGAPK] Already Read txgapk->read_txgain return!!!\n");
+		rtw8822c_txgapk_write_gain_bb_table(rtwdev);
+		return;
+	}
+
+	for (band = 0; band < RF_BAND_MAX; band++) {
+		for (path = 0; path < rtwdev->hal.rf_path_num; path++) {
+			rf18 = rtw_read_rf(rtwdev, path, RF_CFGCH, RFREG_MASK);
+
+			rtw_write32_mask(rtwdev,
+					 three_wire[path], BIT_3WIRE_EN, 0x0);
+			rtw_write_rf(rtwdev, path,
+				     RF_CFGCH, MASKBYTE0, ch_num[band]);
+			rtw_write_rf(rtwdev, path,
+				     RF_CFGCH, BIT_BAND, band_num[band]);
+			rtw_write_rf(rtwdev, path,
+				     RF_BW_TRXBB, BIT_DBG_CCK_CCA, cck[band]);
+			rtw_write_rf(rtwdev, path,
+				     RF_BW_TRXBB, BIT_TX_CCK_IND, cck[band]);
+			gain = 0;
+			for (rf0_idx = 1; rf0_idx < 32; rf0_idx += 3) {
+				rtw_write_rf(rtwdev, path, RF_MODE_TRXAGC,
+					     MASKBYTE0, rf0_idx);
+				v = rtw_read_rf(rtwdev, path,
+						RF_TX_RESULT, RFREG_MASK);
+				txgapk->rf3f_bp[band][gain][path] = v & BIT_DATA_L;
+
+				rtw_dbg(rtwdev, RTW_DBG_RFK,
+					"[TXGAPK] 0x5f=0x%03X band=%d path=%d\n",
+					txgapk->rf3f_bp[band][gain][path],
+					band, path);
+				gain++;
+			}
+			rtw_write_rf(rtwdev, path, RF_CFGCH, RFREG_MASK, rf18);
+			rtw_write32_mask(rtwdev,
+					 three_wire[path], BIT_3WIRE_EN, 0x3);
+		}
+	}
+	rtw8822c_txgapk_write_gain_bb_table(rtwdev);
+	txgapk->read_txgain = 1;
+}
+
+static void rtw8822c_txgapk(struct rtw_dev *rtwdev)
+{
+	static const u32 bb_reg[2] = {REG_TX_PTCL_CTRL, REG_TX_FIFO};
+	struct rtw_gapk_info *txgapk = &rtwdev->dm_info.gapk;
+	u32 bb_reg_backup[2];
+	u8 path;
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[TXGAPK] ======>%s\n", __func__);
+
+	rtw8822c_txgapk_save_all_tx_gain_table(rtwdev);
+
+	if (txgapk->read_txgain == 0) {
+		rtw_dbg(rtwdev, RTW_DBG_RFK,
+			"[TXGAPK] txgapk->read_txgain == 0 return!!!\n");
+		return;
+	}
+
+	if (rtwdev->efuse.power_track_type >= 4 &&
+	    rtwdev->efuse.power_track_type <= 7) {
+		rtw_dbg(rtwdev, RTW_DBG_RFK,
+			"[TXGAPK] Normal Mode in TSSI mode. return!!!\n");
+		return;
+	}
+
+	rtw8822c_txgapk_backup_bb_reg(rtwdev, bb_reg,
+				      bb_reg_backup, ARRAY_SIZE(bb_reg));
+	rtw8822c_txgapk_tx_pause(rtwdev);
+	for (path = 0; path < rtwdev->hal.rf_path_num; path++) {
+		txgapk->channel = rtw_read_rf(rtwdev, path,
+					      RF_CFGCH, RFREG_MASK) & MASKBYTE0;
+		rtw8822c_txgapk_bb_dpk(rtwdev, path);
+		rtw8822c_txgapk_afe_dpk(rtwdev, path);
+		rtw8822c_txgapk_calculate_offset(rtwdev, path);
+		rtw8822c_txgapk_rf_restore(rtwdev, path);
+		rtw8822c_txgapk_afe_dpk_restore(rtwdev, path);
+		rtw8822c_txgapk_bb_dpk_restore(rtwdev, path);
+	}
+	rtw8822c_txgapk_write_tx_gain(rtwdev);
+	rtw8822c_txgapk_reload_bb_reg(rtwdev, bb_reg,
+				      bb_reg_backup, ARRAY_SIZE(bb_reg));
+}
+
+static void rtw8822c_do_gapk(struct rtw_dev *rtwdev)
+{
+	rtw8822c_rfk_handshake(rtwdev, true);
+	rtw8822c_txgapk(rtwdev);
+	rtw8822c_rfk_handshake(rtwdev, false);
+}
+
 static void rtw8822c_rf_init(struct rtw_dev *rtwdev)
 {
 	rtw8822c_rf_dac_cal(rtwdev);
@@ -3403,8 +4097,11 @@ static void rtw8822c_do_dpk(struct rtw_dev *rtwdev)
 
 static void rtw8822c_phy_calibration(struct rtw_dev *rtwdev)
 {
+	rtw8822c_rfk_power_save(rtwdev, false);
+	rtw8822c_do_gapk(rtwdev);
 	rtw8822c_do_iqk(rtwdev);
 	rtw8822c_do_dpk(rtwdev);
+	rtw8822c_rfk_power_save(rtwdev, true);
 }
 
 static void rtw8822c_dpk_track(struct rtw_dev *rtwdev)
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.h b/drivers/net/wireless/realtek/rtw88/rtw8822c.h
index 04ec1d7ab8f6..364afc6d851b 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.h
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.h
@@ -172,6 +172,7 @@ const struct rtw_table name ## _tbl = {			\
 #define REG_TXDFIR0		0x808
 #define REG_DFIRBW		0x810
 #define REG_ANTMAP0		0x820
+#define BIT_ANT_PATH		GENMASK(1, 0)
 #define REG_ANTMAP		0x824
 #define REG_DYMPRITH		0x86c
 #define REG_DYMENTH0		0x870
@@ -191,12 +192,19 @@ const struct rtw_table name ## _tbl = {			\
 #define BIT_PT_OPT		BIT(21)
 
 #define REG_ORITXCODE		0x1800
+#define BIT_PATH_EN		BIT(31)
 #define REG_3WIRE		0x180c
+#define BIT_DIS_SHARERX_TXGAT	BIT(27)
 #define BIT_3WIRE_TX_EN		BIT(0)
 #define BIT_3WIRE_RX_EN		BIT(1)
+#define BIT_3WIRE_EN		GENMASK(1, 0)
 #define BIT_3WIRE_PI_ON		BIT(28)
 #define REG_ANAPAR_A		0x1830
 #define BIT_ANAPAR_UPDATE	BIT(29)
+#define REG_RFTXEN_GCK_A	0x1864
+#define BIT_RFTXEN_GCK_FORCE_ON	BIT(31)
+#define REG_DIS_SHARE_RX_A	0x186c
+#define BIT_TX_SCALE_0DB	BIT(7)
 #define REG_RXAGCCTL0		0x18ac
 #define BITS_RXAGC_CCK		GENMASK(15, 12)
 #define BITS_RXAGC_OFDM		GENMASK(8, 4)
@@ -206,6 +214,7 @@ const struct rtw_table name ## _tbl = {			\
 #define REG_DCKA_Q_1		0x18dc
 
 #define REG_CCKSB		0x1a00
+#define BIT_BBMODE		GENMASK(2, 1)
 #define REG_RXCCKSEL		0x1a04
 #define REG_BGCTRL		0x1a14
 #define BITS_RX_IQ_WEIGHT	(BIT(8) | BIT(9))
@@ -227,25 +236,42 @@ const struct rtw_table name ## _tbl = {			\
 #define BIT_NBI_EN		BIT(30)
 
 #define REG_NCTL0		0x1b00
+#define BIT_SEL_PATH		GENMASK(2, 1)
 #define BIT_SUBPAGE		GENMASK(3, 0)
 #define REG_DPD_CTL0_S0		0x1b04
 #define BIT_GS_PWSF		GENMASK(27, 0)
 #define REG_DPD_CTL1_S0		0x1b08
 #define BIT_DPD_EN		BIT(31)
+#define BIT_PS_EN		BIT(7)
 #define REG_IQKSTAT		0x1b10
 #define REG_IQK_CTL1		0x1b20
-#define BIT_BYPASS_DPD		BIT(25)
 #define BIT_TX_CFIR		GENMASK(31, 30)
+#define BIT_CFIR_EN		GENMASK(26, 24)
+#define BIT_BYPASS_DPD		BIT(25)
+
+#define REG_TX_TONE_IDX		0x1b2c
 #define REG_DPD_LUT0		0x1b44
 #define BIT_GLOSS_DB		GENMASK(14, 12)
 #define REG_DPD_CTL0_S1		0x1b5c
 #define REG_DPD_CTL1_S1		0x1b60
 #define REG_DPD_AGC		0x1b67
+#define REG_TABLE_SEL		0x1b98
+#define BIT_I_GAIN		GENMASK(19, 16)
+#define BIT_GAIN_RST		BIT(15)
+#define BIT_Q_GAIN_SEL		GENMASK(14, 12)
+#define BIT_Q_GAIN		GENMASK(11, 0)
+#define REG_TX_GAIN_SET		0x1b9c
+#define BIT_GAPK_RPT_IDX	GENMASK(11, 8)
 #define REG_DPD_CTL0		0x1bb4
+#define REG_SINGLE_TONE_SW	0x1bb8
+#define BIT_IRQ_TEST_MODE	BIT(20)
 #define REG_R_CONFIG		0x1bcc
 #define BIT_INNER_LB		BIT(21)
 #define BIT_IQ_SWITCH		GENMASK(5, 0)
+#define BIT_2G_SWING		0x2d
+#define BIT_5G_SWING		0x36
 #define REG_RXSRAM_CTL		0x1bd4
+#define BIT_RPT_EN		BIT(21)
 #define BIT_RPT_SEL		GENMASK(20, 16)
 #define BIT_DPD_CLK		GENMASK(7, 4)
 #define REG_DPD_CTL11		0x1be4
@@ -254,23 +280,44 @@ const struct rtw_table name ## _tbl = {			\
 #define REG_DPD_CTL16		0x1bf8
 #define REG_STAT_RPT		0x1bfc
 #define BIT_RPT_DGAIN		GENMASK(27, 16)
+#define BIT_GAPK_RPT0		GENMASK(3, 0)
+#define BIT_GAPK_RPT1		GENMASK(7, 4)
+#define BIT_GAPK_RPT2		GENMASK(11, 8)
+#define BIT_GAPK_RPT3		GENMASK(15, 12)
+#define BIT_GAPK_RPT4		GENMASK(19, 16)
+#define BIT_GAPK_RPT5		GENMASK(23, 20)
+#define BIT_GAPK_RPT6		GENMASK(27, 24)
+#define BIT_GAPK_RPT7		GENMASK(31, 28)
 
 #define REG_TXANT		0x1c28
+#define REG_IQK_CTRL		0x1c38
 #define REG_ENCCK		0x1c3c
 #define BIT_CCK_BLK_EN		BIT(1)
 #define BIT_CCK_OFDM_BLK_EN	(BIT(0) | BIT(1))
 #define REG_CCAMSK		0x1c80
 #define REG_RSTB		0x1c90
 #define BIT_RSTB_3WIRE		BIT(8)
+#define REG_CH_DELAY_EXTR2	0x1cd0
+#define BIT_TST_IQK2SET_SRC	BIT(31)
+#define BIT_EN_IOQ_IQK_DPK	BIT(30)
+#define BIT_IQK_DPK_RESET_SRC	BIT(29)
+#define BIT_IQK_DPK_CLOCK_SRC	BIT(28)
 
 #define REG_RX_BREAK		0x1d2c
 #define BIT_COM_RX_GCK_EN	BIT(31)
 #define REG_RXFNCTL		0x1d30
+#define REG_CCA_OFF		0x1d58
+#define BIT_CCA_ON_BY_PW	GENMASK(11, 3)
 #define REG_RXIGI		0x1d70
+
 #define REG_ENFN		0x1e24
+#define BIT_IQK_DPK_EN		BIT(17)
 #define REG_TXANTSEG		0x1e28
+#define BIT_ANTSEG		GENMASK(3, 0)
 #define REG_TXLGMAP		0x1e2c
 #define REG_CCKPATH		0x1e5c
+#define REG_TX_FIFO		0x1e70
+#define BIT_STOP_TX		GENMASK(3, 0)
 #define REG_CNT_CTRL		0x1eb4
 #define BIT_ALL_CNT_RST		BIT(25)
 
@@ -281,11 +328,15 @@ const struct rtw_table name ## _tbl = {			\
 #define REG_OFDM_FACNT4		0x2d10
 #define REG_OFDM_FACNT5		0x2d20
 #define REG_RPT_CIP		0x2d9c
+#define BIT_RPT_CIP_STATUS	GENMASK(7, 0)
 #define REG_OFDM_TXCNT		0x2de0
 
 #define REG_ORITXCODE2		0x4100
 #define REG_3WIRE2		0x410c
 #define REG_ANAPAR_B		0x4130
+#define REG_RFTXEN_GCK_B	0x4164
+#define REG_DIS_SHARE_RX_B	0x416c
+#define BIT_EXT_TIA_BW		BIT(1)
 #define REG_RXAGCCTL		0x41ac
 #define REG_DCKB_I_0		0x41bc
 #define REG_DCKB_I_1		0x41c0
@@ -293,17 +344,25 @@ const struct rtw_table name ## _tbl = {			\
 #define REG_DCKB_Q_1		0x41dc
 
 #define RF_MODE_TRXAGC		0x00
+#define BIT_RF_MODE		GENMASK(19, 16)
 #define BIT_RXAGC		GENMASK(9, 5)
 #define BIT_TXAGC		GENMASK(4, 0)
 #define RF_RXAGC_OFFSET		0x19
 #define RF_BW_TRXBB		0x1a
+#define BIT_TX_CCK_IND		BIT(16)
 #define BIT_BW_TXBB		GENMASK(14, 12)
 #define BIT_BW_RXBB		GENMASK(11, 10)
+#define BIT_DBG_CCK_CCA		BIT(1)
 #define RF_TX_GAIN_OFFSET	0x55
 #define BIT_BB_GAIN		GENMASK(18, 14)
 #define BIT_RF_GAIN		GENMASK(4, 2)
 #define RF_TX_GAIN		0x56
 #define BIT_GAIN_TXBB		GENMASK(4, 0)
+#define RF_IDAC			0x58
+#define BIT_TX_MODE		GENMASK(19, 8)
+#define RF_TX_RESULT		0x5f
+#define BIT_GAIN_TX_PAD_H	GENMASK(11, 8)
+#define BIT_GAIN_TX_PAD_L	GENMASK(7, 4)
 #define RF_PA			0x60
 #define RF_PABIAS_2G_MASK	GENMASK(15, 12)
 #define RF_PABIAS_5G_MASK	GENMASK(19, 16)
@@ -316,6 +375,10 @@ const struct rtw_table name ## _tbl = {			\
 #define RF_RXA_MIX_GAIN		0x8a
 #define BIT_RXA_MIX_GAIN	GENMASK(4, 3)
 #define RF_EXT_TIA_BW		0x8f
+#define BIT_PW_EXT_TIA		BIT(1)
+#define RF_DIS_BYPASS_TXBB	0x9e
+#define BIT_TXBB		BIT(10)
+#define BIT_TIA_BYPASS		BIT(5)
 #define RF_DEBUG		0xde
 #define BIT_DE_PWR_TRIM		BIT(19)
 #define BIT_DE_TX_GAIN		BIT(16)
-- 
cgit v1.2.3


From 3b25bac893534858a8cd0ff79afa929beb8e3b20 Mon Sep 17 00:00:00 2001
From: Guo-Feng Fan <vincent_fann@realtek.com>
Date: Mon, 19 Apr 2021 08:37:48 +0800
Subject: rtw88: 8822c: debug: allow debugfs to enable/disable TXGAPK

Use "cat dm_cap" to show all features; where, prefix +/- means feature is
enabled/disabled:

$ cat dm_cap
DM capability 0x00000002
( 1) +TXGAPK

To control dm_cap:
use "echo +1 > dm_cap" to enable TXGAPK
use "echo -1 > dm_cap" to disable TXGAPK

Below is an example to disable TXGAPK.

$ echo -1 > dm_cap
$ cat dm_cap
DM capability 0x00000000
( 1) -TXGAPK

Below is an example to show TXGAPK status
$ echo 1 > dm_cap; cat dm_cap

( 1) +TXGAPK

path 0:
0x56 = 0x88c89
[TXGAPK] offset 1 0
[TXGAPK] offset 1 1
[TXGAPK] offset 1 2
[TXGAPK] offset 1 3
[TXGAPK] offset 0 4
[TXGAPK] offset 0 5
[TXGAPK] offset 0 6
[TXGAPK] offset 0 7
[TXGAPK] offset 0 8
[TXGAPK] offset 0 9

path 1:
0x56 = 0x89c89
[TXGAPK] offset 1 0
[TXGAPK] offset 1 1
[TXGAPK] offset 1 2
[TXGAPK] offset 1 3
[TXGAPK] offset 0 4
[TXGAPK] offset 0 5
[TXGAPK] offset 0 6
[TXGAPK] offset 0 7
[TXGAPK] offset 0 8
[TXGAPK] offset 0 9

Signed-off-by: Guo-Feng Fan <vincent_fann@realtek.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210419003748.3224-4-pkshih@realtek.com
---
 drivers/net/wireless/realtek/rtw88/debug.c    | 91 +++++++++++++++++++++++++++
 drivers/net/wireless/realtek/rtw88/main.h     |  8 +++
 drivers/net/wireless/realtek/rtw88/rtw8822c.c | 15 ++++-
 3 files changed, 112 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/debug.c b/drivers/net/wireless/realtek/rtw88/debug.c
index 5c44fa87ed2e..18ab472ea46c 100644
--- a/drivers/net/wireless/realtek/rtw88/debug.c
+++ b/drivers/net/wireless/realtek/rtw88/debug.c
@@ -35,9 +35,17 @@ struct rtw_debugfs_priv {
 			u32 addr;
 			u32 len;
 		} read_reg;
+		struct {
+			u8 bit;
+		} dm_cap;
 	};
 };
 
+static const char * const rtw_dm_cap_strs[] = {
+	[RTW_DM_CAP_NA] = "NA",
+	[RTW_DM_CAP_TXGAPK] = "TXGAPK",
+};
+
 static int rtw_debugfs_single_show(struct seq_file *m, void *v)
 {
 	struct rtw_debugfs_priv *debugfs_priv = m->private;
@@ -853,6 +861,83 @@ static int rtw_debugfs_get_fw_crash(struct seq_file *m, void *v)
 	return 0;
 }
 
+static ssize_t rtw_debugfs_set_dm_cap(struct file *filp,
+				      const char __user *buffer,
+				      size_t count, loff_t *loff)
+{
+	struct seq_file *seqpriv = (struct seq_file *)filp->private_data;
+	struct rtw_debugfs_priv *debugfs_priv = seqpriv->private;
+	struct rtw_dev *rtwdev = debugfs_priv->rtwdev;
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+	int bit;
+	bool en;
+
+	if (kstrtoint_from_user(buffer, count, 10, &bit))
+		return -EINVAL;
+
+	en = bit > 0;
+	bit = abs(bit);
+
+	if (bit >= RTW_DM_CAP_NUM) {
+		rtw_warn(rtwdev, "unknown DM CAP %d\n", bit);
+		return -EINVAL;
+	}
+
+	if (en)
+		dm_info->dm_flags &= ~BIT(bit);
+	else
+		dm_info->dm_flags |= BIT(bit);
+
+	debugfs_priv->dm_cap.bit = bit;
+
+	return count;
+}
+
+static void dump_gapk_status(struct rtw_dev *rtwdev, struct seq_file *m)
+{
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+	struct rtw_gapk_info *txgapk = &rtwdev->dm_info.gapk;
+	int i, path;
+	u32 val;
+
+	seq_printf(m, "\n(%2d) %c%s\n\n", RTW_DM_CAP_TXGAPK,
+		   dm_info->dm_flags & BIT(RTW_DM_CAP_TXGAPK) ? '-' : '+',
+		   rtw_dm_cap_strs[RTW_DM_CAP_TXGAPK]);
+
+	for (path = 0; path < rtwdev->hal.rf_path_num; path++) {
+		val = rtw_read_rf(rtwdev, path, RF_GAINTX, RFREG_MASK);
+		seq_printf(m, "path %d:\n0x%x = 0x%x\n", path, RF_GAINTX, val);
+
+		for (i = 0; i < RF_HW_OFFSET_NUM; i++)
+			seq_printf(m, "[TXGAPK] offset %d %d\n",
+				   txgapk->rf3f_fs[path][i], i);
+		seq_puts(m, "\n");
+	}
+}
+
+static int rtw_debugfs_get_dm_cap(struct seq_file *m, void *v)
+{
+	struct rtw_debugfs_priv *debugfs_priv = m->private;
+	struct rtw_dev *rtwdev = debugfs_priv->rtwdev;
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+	int i;
+
+	switch (debugfs_priv->dm_cap.bit) {
+	case RTW_DM_CAP_TXGAPK:
+		dump_gapk_status(rtwdev, m);
+		break;
+	default:
+		for (i = 1; i < RTW_DM_CAP_NUM; i++) {
+			seq_printf(m, "(%2d) %c%s\n", i,
+				   dm_info->dm_flags & BIT(i) ? '-' : '+',
+				   rtw_dm_cap_strs[i]);
+		}
+		break;
+	}
+	debugfs_priv->dm_cap.bit = RTW_DM_CAP_NA;
+	return 0;
+}
+
 #define rtw_debug_impl_mac(page, addr)				\
 static struct rtw_debugfs_priv rtw_debug_priv_mac_ ##page = {	\
 	.cb_read = rtw_debug_get_mac_page,			\
@@ -961,6 +1046,11 @@ static struct rtw_debugfs_priv rtw_debug_priv_fw_crash = {
 	.cb_read = rtw_debugfs_get_fw_crash,
 };
 
+static struct rtw_debugfs_priv rtw_debug_priv_dm_cap = {
+	.cb_write = rtw_debugfs_set_dm_cap,
+	.cb_read = rtw_debugfs_get_dm_cap,
+};
+
 #define rtw_debugfs_add_core(name, mode, fopname, parent)		\
 	do {								\
 		rtw_debug_priv_ ##name.rtwdev = rtwdev;			\
@@ -1035,6 +1125,7 @@ void rtw_debugfs_init(struct rtw_dev *rtwdev)
 	rtw_debugfs_add_r(rf_dump);
 	rtw_debugfs_add_r(tx_pwr_tbl);
 	rtw_debugfs_add_rw(fw_crash);
+	rtw_debugfs_add_rw(dm_cap);
 }
 
 #endif /* CONFIG_RTW88_DEBUGFS */
diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index 1e14dc4ef012..dc3744847ba9 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -1516,6 +1516,7 @@ enum rtw_rf_band {
 
 struct rtw_gapk_info {
 	u32 rf3f_bp[RF_BAND_MAX][RF_GAIN_NUM][RTW_RF_PATH_MAX];
+	u32 rf3f_fs[RTW_RF_PATH_MAX][RF_GAIN_NUM];
 	bool txgapk_bp_done;
 	s8 offset[RF_GAIN_NUM][RTW_RF_PATH_MAX];
 	s8 fianl_offset[RF_GAIN_NUM][RTW_RF_PATH_MAX];
@@ -1535,6 +1536,12 @@ struct rtw_cfo_track {
 #define RRSR_INIT_2G 0x15f
 #define RRSR_INIT_5G 0x150
 
+enum rtw_dm_cap {
+	RTW_DM_CAP_NA,
+	RTW_DM_CAP_TXGAPK,
+	RTW_DM_CAP_NUM
+};
+
 struct rtw_dm_info {
 	u32 cck_fa_cnt;
 	u32 ofdm_fa_cnt;
@@ -1603,6 +1610,7 @@ struct rtw_dm_info {
 	struct ewma_evm ewma_evm[RTW_EVM_NUM];
 	struct ewma_snr ewma_snr[RTW_SNR_NUM];
 
+	u32 dm_flags; /* enum rtw_dm_cap */
 	struct rtw_iqk_info iqk;
 	struct rtw_gapk_info gapk;
 	bool is_bt_iqk_timeout;
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
index daa5150de87b..6cb593cc33c2 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
@@ -1665,14 +1665,16 @@ static void rtw8822c_txgapk_write_tx_gain(struct rtw_dev *rtwdev)
 			}
 
 			v = txgapk->rf3f_bp[band][i][path];
-			if (_rtw8822c_txgapk_gain_valid(rtwdev, v))
+			if (_rtw8822c_txgapk_gain_valid(rtwdev, v)) {
 				rtw_dbg(rtwdev, RTW_DBG_RFK,
 					"[TXGAPK] tx_gain=0x%03X >= 0xCEX\n",
 					txgapk->rf3f_bp[band][i][path]);
-			else
+			} else {
+				txgapk->rf3f_fs[path][i] = offset_tmp[i];
 				rtw_dbg(rtwdev, RTW_DBG_RFK,
 					"[TXGAPK] offset %d %d\n",
 					offset_tmp[i], i);
+			}
 		}
 
 		rtw_write_rf(rtwdev, path, RF_LUTWE2, RFREG_MASK, 0x10000);
@@ -1704,6 +1706,9 @@ static void rtw8822c_txgapk_save_all_tx_gain_table(struct rtw_dev *rtwdev)
 	u8 path, band, gain, rf0_idx;
 	u32 rf18, v;
 
+	if (rtwdev->dm_info.dm_flags & BIT(RTW_DM_CAP_TXGAPK))
+		return;
+
 	rtw_dbg(rtwdev, RTW_DBG_RFK, "[TXGAPK] ======>%s\n", __func__);
 
 	if (txgapk->read_txgain == 1) {
@@ -1794,6 +1799,12 @@ static void rtw8822c_txgapk(struct rtw_dev *rtwdev)
 
 static void rtw8822c_do_gapk(struct rtw_dev *rtwdev)
 {
+	struct rtw_dm_info *dm = &rtwdev->dm_info;
+
+	if (dm->dm_flags & BIT(RTW_DM_CAP_TXGAPK)) {
+		rtw_dbg(rtwdev, RTW_DBG_RFK, "[TXGAPK] feature disable!!!\n");
+		return;
+	}
 	rtw8822c_rfk_handshake(rtwdev, true);
 	rtw8822c_txgapk(rtwdev);
 	rtw8822c_rfk_handshake(rtwdev, false);
-- 
cgit v1.2.3


From 559f6cb318375e9deb01d7d0e957d0d90a2db63d Mon Sep 17 00:00:00 2001
From: Guo-Feng Fan <vincent_fann@realtek.com>
Date: Thu, 15 Apr 2021 16:47:01 +0800
Subject: rtw88: 8821c: Don't set RX_FLAG_DECRYPTED if packet has no encryption

The value of GET_RX_DESC_SWDEC() indicates that if this RX
packet requires software decryption or not. And software
decryption is required when the packet was encrypted and the
hardware failed to decrypt it.

So, GET_RX_DESC_SWDEC() is negative does not mean that this
packet is decrypted, it might just have no encryption at all.
To actually see if the packet is decrypted, driver needs to
further check if the hardware has successfully decrypted it,
with a specific type of encryption algorithm.

Signed-off-by: Guo-Feng Fan <vincent_fann@realtek.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210415084703.27255-2-pkshih@realtek.com
---
 drivers/net/wireless/realtek/rtw88/rtw8821c.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.c b/drivers/net/wireless/realtek/rtw88/rtw8821c.c
index 33c6cf1206c8..785b8181513f 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8821c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.c
@@ -581,7 +581,8 @@ static void rtw8821c_query_rx_desc(struct rtw_dev *rtwdev, u8 *rx_desc,
 	pkt_stat->phy_status = GET_RX_DESC_PHYST(rx_desc);
 	pkt_stat->icv_err = GET_RX_DESC_ICV_ERR(rx_desc);
 	pkt_stat->crc_err = GET_RX_DESC_CRC32(rx_desc);
-	pkt_stat->decrypted = !GET_RX_DESC_SWDEC(rx_desc);
+	pkt_stat->decrypted = !GET_RX_DESC_SWDEC(rx_desc) &&
+			      GET_RX_DESC_ENC_TYPE(rx_desc) != RX_DESC_ENC_NONE;
 	pkt_stat->is_c2h = GET_RX_DESC_C2H(rx_desc);
 	pkt_stat->pkt_len = GET_RX_DESC_PKT_LEN(rx_desc);
 	pkt_stat->drv_info_sz = GET_RX_DESC_DRV_INFO_SIZE(rx_desc);
-- 
cgit v1.2.3


From a548909d7ad7e334c6c923a71f0b694d60980232 Mon Sep 17 00:00:00 2001
From: Yu-Yen Ting <steventing@realtek.com>
Date: Thu, 15 Apr 2021 16:47:02 +0800
Subject: rtw88: Fix potential unrecoverable tx queue stop

If there are lots of packets to be transmitted, the driver would check
whether the available descriptors are sufficient according the read/write
point of tx queue. Once the available descriptor is not enough,
ieee80211_stop_queue is called.

TX ISR, meanwhile, is releasing the tx resources after the packets are
transmitted. This routine may call ieee80211_wake_queue by checking the
available descriptor.

The potential queue stop problem would occur when the tx queue is
stopped due to the heavy traffic. Then thare is no chance to wake the
queue up because the read point is not updated immediately, as a result,
no more packets coulde be transmitted in this queue.

This patch makes sure the ieee80211_wake_queue could be called properly
and avoids the race condition when ring->r.rp, ring->queue_stopped are
updated.

Signed-off-by: Yu-Yen Ting <steventing@realtek.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210415084703.27255-3-pkshih@realtek.com
---
 drivers/net/wireless/realtek/rtw88/pci.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c
index b8115b31839e..adea3c7551ab 100644
--- a/drivers/net/wireless/realtek/rtw88/pci.c
+++ b/drivers/net/wireless/realtek/rtw88/pci.c
@@ -950,10 +950,12 @@ static int rtw_pci_tx_write(struct rtw_dev *rtwdev,
 		return ret;
 
 	ring = &rtwpci->tx_rings[queue];
+	spin_lock_bh(&rtwpci->irq_lock);
 	if (avail_desc(ring->r.wp, ring->r.rp, ring->r.len) < 2) {
 		ieee80211_stop_queue(rtwdev->hw, skb_get_queue_mapping(skb));
 		ring->queue_stopped = true;
 	}
+	spin_unlock_bh(&rtwpci->irq_lock);
 
 	return 0;
 }
@@ -968,7 +970,7 @@ static void rtw_pci_tx_isr(struct rtw_dev *rtwdev, struct rtw_pci *rtwpci,
 	struct sk_buff *skb;
 	u32 count;
 	u32 bd_idx_addr;
-	u32 bd_idx, cur_rp;
+	u32 bd_idx, cur_rp, rp_idx;
 	u16 q_map;
 
 	ring = &rtwpci->tx_rings[hw_queue];
@@ -977,6 +979,7 @@ static void rtw_pci_tx_isr(struct rtw_dev *rtwdev, struct rtw_pci *rtwpci,
 	bd_idx = rtw_read32(rtwdev, bd_idx_addr);
 	cur_rp = bd_idx >> 16;
 	cur_rp &= TRX_BD_IDX_MASK;
+	rp_idx = ring->r.rp;
 	if (cur_rp >= ring->r.rp)
 		count = cur_rp - ring->r.rp;
 	else
@@ -1000,12 +1003,15 @@ static void rtw_pci_tx_isr(struct rtw_dev *rtwdev, struct rtw_pci *rtwpci,
 		}
 
 		if (ring->queue_stopped &&
-		    avail_desc(ring->r.wp, ring->r.rp, ring->r.len) > 4) {
+		    avail_desc(ring->r.wp, rp_idx, ring->r.len) > 4) {
 			q_map = skb_get_queue_mapping(skb);
 			ieee80211_wake_queue(hw, q_map);
 			ring->queue_stopped = false;
 		}
 
+		if (++rp_idx >= ring->r.len)
+			rp_idx = 0;
+
 		skb_pull(skb, rtwdev->chip->tx_pkt_desc_sz);
 
 		info = IEEE80211_SKB_CB(skb);
-- 
cgit v1.2.3


From 7bd3760c71f7a18485d2c10ea0887e1d41519f4e Mon Sep 17 00:00:00 2001
From: Po-Hao Huang <phhuang@realtek.com>
Date: Thu, 15 Apr 2021 16:47:03 +0800
Subject: rtw88: refine napi deinit flow

We used to stop napi before disabling irqs. And it turns out
to cause some problem when we try to stop device while interrupt arrives.

To safely stop pci, we do three steps:
1. disable interrupt
2. synchronize_irq
3. stop_napi
Since step 2 and 3 may not finish as expected when interrupt is enabled,
use rtwpci->running to decide whether interrupt should be re-enabled at
the time.

Fixes: 9e2fd29864c5 ("rtw88: add napi support")
Signed-off-by: Po-Hao Huang <phhuang@realtek.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210415084703.27255-4-pkshih@realtek.com
---
 drivers/net/wireless/realtek/rtw88/pci.c | 19 ++++++++++++++-----
 drivers/net/wireless/realtek/rtw88/pci.h |  1 +
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c
index adea3c7551ab..f59a4c462e3b 100644
--- a/drivers/net/wireless/realtek/rtw88/pci.c
+++ b/drivers/net/wireless/realtek/rtw88/pci.c
@@ -581,23 +581,30 @@ static int rtw_pci_start(struct rtw_dev *rtwdev)
 {
 	struct rtw_pci *rtwpci = (struct rtw_pci *)rtwdev->priv;
 
+	rtw_pci_napi_start(rtwdev);
+
 	spin_lock_bh(&rtwpci->irq_lock);
+	rtwpci->running = true;
 	rtw_pci_enable_interrupt(rtwdev, rtwpci, false);
 	spin_unlock_bh(&rtwpci->irq_lock);
 
-	rtw_pci_napi_start(rtwdev);
-
 	return 0;
 }
 
 static void rtw_pci_stop(struct rtw_dev *rtwdev)
 {
 	struct rtw_pci *rtwpci = (struct rtw_pci *)rtwdev->priv;
+	struct pci_dev *pdev = rtwpci->pdev;
 
+	spin_lock_bh(&rtwpci->irq_lock);
+	rtwpci->running = false;
+	rtw_pci_disable_interrupt(rtwdev, rtwpci);
+	spin_unlock_bh(&rtwpci->irq_lock);
+
+	synchronize_irq(pdev->irq);
 	rtw_pci_napi_stop(rtwdev);
 
 	spin_lock_bh(&rtwpci->irq_lock);
-	rtw_pci_disable_interrupt(rtwdev, rtwpci);
 	rtw_pci_dma_release(rtwdev, rtwpci);
 	spin_unlock_bh(&rtwpci->irq_lock);
 }
@@ -1212,7 +1219,8 @@ static irqreturn_t rtw_pci_interrupt_threadfn(int irq, void *dev)
 		rtw_fw_c2h_cmd_isr(rtwdev);
 
 	/* all of the jobs for this interrupt have been done */
-	rtw_pci_enable_interrupt(rtwdev, rtwpci, rx);
+	if (rtwpci->running)
+		rtw_pci_enable_interrupt(rtwdev, rtwpci, rx);
 	spin_unlock_bh(&rtwpci->irq_lock);
 
 	return IRQ_HANDLED;
@@ -1633,7 +1641,8 @@ static int rtw_pci_napi_poll(struct napi_struct *napi, int budget)
 	if (work_done < budget) {
 		napi_complete_done(napi, work_done);
 		spin_lock_bh(&rtwpci->irq_lock);
-		rtw_pci_enable_interrupt(rtwdev, rtwpci, false);
+		if (rtwpci->running)
+			rtw_pci_enable_interrupt(rtwdev, rtwpci, false);
 		spin_unlock_bh(&rtwpci->irq_lock);
 		/* When ISR happens during polling and before napi_complete
 		 * while no further data is received. Data on the dma_ring will
diff --git a/drivers/net/wireless/realtek/rtw88/pci.h b/drivers/net/wireless/realtek/rtw88/pci.h
index e76fc549a788..0ffae887527a 100644
--- a/drivers/net/wireless/realtek/rtw88/pci.h
+++ b/drivers/net/wireless/realtek/rtw88/pci.h
@@ -211,6 +211,7 @@ struct rtw_pci {
 	spinlock_t irq_lock;
 	u32 irq_mask[4];
 	bool irq_enabled;
+	bool running;
 
 	/* napi structure */
 	struct net_device netdev;
-- 
cgit v1.2.3


From ff0224e97d5d1f8bd52c7d4dd31d38089503a6d8 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Wed, 31 Mar 2021 10:35:52 +0800
Subject: libertas_tf: Remove duplicate struct declaration

struct lbtf_private is declared twice. One has been declared
at 157th line. Remove the duplicate.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210331023557.2804128-4-wanjiabing@vivo.com
---
 drivers/net/wireless/marvell/libertas_tf/libertas_tf.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/wireless/marvell/libertas_tf/libertas_tf.h b/drivers/net/wireless/marvell/libertas_tf/libertas_tf.h
index 67bbb6a8f113..5d726545d987 100644
--- a/drivers/net/wireless/marvell/libertas_tf/libertas_tf.h
+++ b/drivers/net/wireless/marvell/libertas_tf/libertas_tf.h
@@ -453,7 +453,6 @@ struct cmd_ds_802_11_beacon_set {
 	u8 beacon[MRVL_MAX_BCN_SIZE];
 };
 
-struct lbtf_private;
 struct cmd_ctrl_node;
 
 /** Function Prototype Declaration */
-- 
cgit v1.2.3


From afda33499bea154dc792338c5c51a2038a206785 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 19 Apr 2021 14:59:56 +0800
Subject: rtlwifi: implement set_tim by update beacon content

Once beacon content is changed, we update the content to wifi card by
send_beacon_frame(). Then, STA with PS can wake up properly to receive its
packets.

Since we update beacon content to PCI wifi devices every beacon interval,
the only one usb device, 8192CU, needs to update beacon content when
mac80211 calling set_tim.

Reported-by: Maciej S. Szmigiero <mail@maciej.szmigiero.name>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Tested-by: Maciej S. Szmigiero <mail@maciej.szmigiero.name>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210419065956.6085-1-pkshih@realtek.com
---
 drivers/net/wireless/realtek/rtlwifi/core.c | 32 +++++++++++++++++++++++++++++
 drivers/net/wireless/realtek/rtlwifi/core.h |  1 +
 drivers/net/wireless/realtek/rtlwifi/usb.c  |  3 +++
 drivers/net/wireless/realtek/rtlwifi/wifi.h |  1 +
 4 files changed, 37 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c
index c9b6ee81dcb9..8efe2f5e5b9f 100644
--- a/drivers/net/wireless/realtek/rtlwifi/core.c
+++ b/drivers/net/wireless/realtek/rtlwifi/core.c
@@ -1018,6 +1018,25 @@ static void send_beacon_frame(struct ieee80211_hw *hw,
 	}
 }
 
+void rtl_update_beacon_work_callback(struct work_struct *work)
+{
+	struct rtl_works *rtlworks =
+	    container_of(work, struct rtl_works, update_beacon_work);
+	struct ieee80211_hw *hw = rtlworks->hw;
+	struct rtl_priv *rtlpriv = rtl_priv(hw);
+	struct ieee80211_vif *vif = rtlpriv->mac80211.vif;
+
+	if (!vif) {
+		WARN_ONCE(true, "no vif to update beacon\n");
+		return;
+	}
+
+	mutex_lock(&rtlpriv->locks.conf_mutex);
+	send_beacon_frame(hw, vif);
+	mutex_unlock(&rtlpriv->locks.conf_mutex);
+}
+EXPORT_SYMBOL_GPL(rtl_update_beacon_work_callback);
+
 static void rtl_op_bss_info_changed(struct ieee80211_hw *hw,
 				    struct ieee80211_vif *vif,
 				    struct ieee80211_bss_conf *bss_conf,
@@ -1747,6 +1766,18 @@ static void rtl_op_flush(struct ieee80211_hw *hw,
 		rtlpriv->intf_ops->flush(hw, queues, drop);
 }
 
+static int rtl_op_set_tim(struct ieee80211_hw *hw, struct ieee80211_sta *sta,
+			  bool set)
+{
+	struct rtl_priv *rtlpriv = rtl_priv(hw);
+	struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw));
+
+	if (rtlhal->hw_type == HARDWARE_TYPE_RTL8192CU)
+		schedule_work(&rtlpriv->works.update_beacon_work);
+
+	return 0;
+}
+
 /*	Description:
  *		This routine deals with the Power Configuration CMD
  *		 parsing for RTL8723/RTL8188E Series IC.
@@ -1903,6 +1934,7 @@ const struct ieee80211_ops rtl_ops = {
 	.sta_add = rtl_op_sta_add,
 	.sta_remove = rtl_op_sta_remove,
 	.flush = rtl_op_flush,
+	.set_tim = rtl_op_set_tim,
 };
 EXPORT_SYMBOL_GPL(rtl_ops);
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/core.h b/drivers/net/wireless/realtek/rtlwifi/core.h
index 7447ff456710..345161b47442 100644
--- a/drivers/net/wireless/realtek/rtlwifi/core.h
+++ b/drivers/net/wireless/realtek/rtlwifi/core.h
@@ -60,5 +60,6 @@ void rtl_bb_delay(struct ieee80211_hw *hw, u32 addr, u32 data);
 bool rtl_cmd_send_packet(struct ieee80211_hw *hw, struct sk_buff *skb);
 bool rtl_btc_status_false(void);
 void rtl_dm_diginit(struct ieee80211_hw *hw, u32 cur_igval);
+void rtl_update_beacon_work_callback(struct work_struct *work);
 
 #endif
diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c
index 37a9a03123f3..86a236873254 100644
--- a/drivers/net/wireless/realtek/rtlwifi/usb.c
+++ b/drivers/net/wireless/realtek/rtlwifi/usb.c
@@ -805,6 +805,7 @@ static void rtl_usb_stop(struct ieee80211_hw *hw)
 
 	tasklet_kill(&rtlusb->rx_work_tasklet);
 	cancel_work_sync(&rtlpriv->works.lps_change_work);
+	cancel_work_sync(&rtlpriv->works.update_beacon_work);
 
 	flush_workqueue(rtlpriv->works.rtl_wq);
 
@@ -1031,6 +1032,8 @@ int rtl_usb_probe(struct usb_interface *intf,
 		  rtl_fill_h2c_cmd_work_callback);
 	INIT_WORK(&rtlpriv->works.lps_change_work,
 		  rtl_lps_change_work_callback);
+	INIT_WORK(&rtlpriv->works.update_beacon_work,
+		  rtl_update_beacon_work_callback);
 
 	rtlpriv->usb_data_index = 0;
 	init_completion(&rtlpriv->firmware_loading_complete);
diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h
index 877ed6a1589f..aa07856411b1 100644
--- a/drivers/net/wireless/realtek/rtlwifi/wifi.h
+++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h
@@ -2486,6 +2486,7 @@ struct rtl_works {
 
 	struct work_struct lps_change_work;
 	struct work_struct fill_h2c_cmd;
+	struct work_struct update_beacon_work;
 };
 
 struct rtl_debug {
-- 
cgit v1.2.3


From 2601dda8faa7685bab921d63c86f04e9e356f9ac Mon Sep 17 00:00:00 2001
From: Shayne Chen <shayne.chen@mediatek.com>
Date: Mon, 12 Apr 2021 13:39:52 +0800
Subject: mt76: testmode: add support to send larger packet

Add support to send larger packet in testmode to meet requirements
of some test cases.
The limit of max packet size is determined based on tx rate mode setting.

Signed-off-by: Shayne Chen <shayne.chen@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76.h     |   3 +-
 drivers/net/wireless/mediatek/mt76/testmode.c | 159 ++++++++++++++++++++------
 drivers/net/wireless/mediatek/mt76/testmode.h |   2 +-
 drivers/net/wireless/mediatek/mt76/tx.c       |   2 +-
 4 files changed, 131 insertions(+), 35 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index d121c176c37c..d60a1dd8f314 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -538,7 +538,7 @@ struct mt76_testmode_data {
 	struct sk_buff *tx_skb;
 
 	u32 tx_count;
-	u16 tx_msdu_len;
+	u16 tx_mpdu_len;
 
 	u8 tx_rate_mode;
 	u8 tx_rate_idx;
@@ -1074,6 +1074,7 @@ int mt76_testmode_cmd(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 int mt76_testmode_dump(struct ieee80211_hw *hw, struct sk_buff *skb,
 		       struct netlink_callback *cb, void *data, int len);
 int mt76_testmode_set_state(struct mt76_phy *phy, enum mt76_testmode_state state);
+int mt76_testmode_alloc_skb(struct mt76_phy *phy, u32 len);
 
 static inline void mt76_testmode_reset(struct mt76_phy *phy, bool disable)
 {
diff --git a/drivers/net/wireless/mediatek/mt76/testmode.c b/drivers/net/wireless/mediatek/mt76/testmode.c
index cc769645afa5..001d0ba5f73e 100644
--- a/drivers/net/wireless/mediatek/mt76/testmode.c
+++ b/drivers/net/wireless/mediatek/mt76/testmode.c
@@ -62,36 +62,83 @@ void mt76_testmode_tx_pending(struct mt76_phy *phy)
 	spin_unlock_bh(&q->lock);
 }
 
+static u32
+mt76_testmode_max_mpdu_len(struct mt76_phy *phy, u8 tx_rate_mode)
+{
+	switch (tx_rate_mode) {
+	case MT76_TM_TX_MODE_HT:
+		return IEEE80211_MAX_MPDU_LEN_HT_7935;
+	case MT76_TM_TX_MODE_VHT:
+	case MT76_TM_TX_MODE_HE_SU:
+	case MT76_TM_TX_MODE_HE_EXT_SU:
+	case MT76_TM_TX_MODE_HE_TB:
+	case MT76_TM_TX_MODE_HE_MU:
+		if (phy->sband_5g.sband.vht_cap.cap &
+		    IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991)
+			return IEEE80211_MAX_MPDU_LEN_VHT_7991;
+		return IEEE80211_MAX_MPDU_LEN_VHT_11454;
+	case MT76_TM_TX_MODE_CCK:
+	case MT76_TM_TX_MODE_OFDM:
+	default:
+		return IEEE80211_MAX_FRAME_LEN;
+	}
+}
 
-static int
-mt76_testmode_tx_init(struct mt76_phy *phy)
+static void
+mt76_testmode_free_skb(struct mt76_phy *phy)
 {
 	struct mt76_testmode_data *td = &phy->test;
-	struct ieee80211_tx_info *info;
-	struct ieee80211_hdr *hdr;
-	struct sk_buff *skb;
+	struct sk_buff *skb = td->tx_skb;
+
+	if (!skb)
+		return;
+
+	if (skb_has_frag_list(skb)) {
+		kfree_skb_list(skb_shinfo(skb)->frag_list);
+		skb_shinfo(skb)->frag_list = NULL;
+	}
+
+	dev_kfree_skb(skb);
+	td->tx_skb = NULL;
+}
+
+int mt76_testmode_alloc_skb(struct mt76_phy *phy, u32 len)
+{
+#define MT_TXP_MAX_LEN	4095
 	u16 fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA |
 		 IEEE80211_FCTL_FROMDS;
-	struct ieee80211_tx_rate *rate;
-	u8 max_nss = hweight8(phy->antenna_mask);
+	struct mt76_testmode_data *td = &phy->test;
 	bool ext_phy = phy != &phy->dev->phy;
+	struct sk_buff **frag_tail, *head;
+	struct ieee80211_tx_info *info;
+	struct ieee80211_hdr *hdr;
+	u32 max_len, head_len;
+	int nfrags, i;
 
-	if (td->tx_antenna_mask)
-		max_nss = min_t(u8, max_nss, hweight8(td->tx_antenna_mask));
+	max_len = mt76_testmode_max_mpdu_len(phy, td->tx_rate_mode);
+	if (len > max_len)
+		len = max_len;
+	else if (len < sizeof(struct ieee80211_hdr))
+		len = sizeof(struct ieee80211_hdr);
 
-	skb = alloc_skb(td->tx_msdu_len, GFP_KERNEL);
-	if (!skb)
+	nfrags = len / MT_TXP_MAX_LEN;
+	head_len = nfrags ? MT_TXP_MAX_LEN : len;
+
+	if (len > IEEE80211_MAX_FRAME_LEN)
+		fc |= IEEE80211_STYPE_QOS_DATA;
+
+	head = alloc_skb(head_len, GFP_KERNEL);
+	if (!head)
 		return -ENOMEM;
 
-	dev_kfree_skb(td->tx_skb);
-	td->tx_skb = skb;
-	hdr = __skb_put_zero(skb, td->tx_msdu_len);
+	hdr = __skb_put_zero(head, head_len);
 	hdr->frame_control = cpu_to_le16(fc);
 	memcpy(hdr->addr1, phy->macaddr, sizeof(phy->macaddr));
 	memcpy(hdr->addr2, phy->macaddr, sizeof(phy->macaddr));
 	memcpy(hdr->addr3, phy->macaddr, sizeof(phy->macaddr));
+	skb_set_queue_mapping(head, IEEE80211_AC_BE);
 
-	info = IEEE80211_SKB_CB(skb);
+	info = IEEE80211_SKB_CB(head);
 	info->flags = IEEE80211_TX_CTL_INJECTED |
 		      IEEE80211_TX_CTL_NO_ACK |
 		      IEEE80211_TX_CTL_NO_PS_BUFFER;
@@ -99,9 +146,60 @@ mt76_testmode_tx_init(struct mt76_phy *phy)
 	if (ext_phy)
 		info->hw_queue |= MT_TX_HW_QUEUE_EXT_PHY;
 
+	frag_tail = &skb_shinfo(head)->frag_list;
+
+	for (i = 0; i < nfrags; i++) {
+		struct sk_buff *frag;
+		u16 frag_len;
+
+		if (i == nfrags - 1)
+			frag_len = len % MT_TXP_MAX_LEN;
+		else
+			frag_len = MT_TXP_MAX_LEN;
+
+		frag = alloc_skb(frag_len, GFP_KERNEL);
+		if (!frag)
+			return -ENOMEM;
+
+		__skb_put_zero(frag, frag_len);
+		head->len += frag->len;
+		head->data_len += frag->len;
+
+		if (*frag_tail) {
+			(*frag_tail)->next = frag;
+			frag_tail = &frag;
+		} else {
+			*frag_tail = frag;
+		}
+	}
+
+	mt76_testmode_free_skb(phy);
+	td->tx_skb = head;
+
+	return 0;
+}
+EXPORT_SYMBOL(mt76_testmode_alloc_skb);
+
+static int
+mt76_testmode_tx_init(struct mt76_phy *phy)
+{
+	struct mt76_testmode_data *td = &phy->test;
+	struct ieee80211_tx_info *info;
+	struct ieee80211_tx_rate *rate;
+	u8 max_nss = hweight8(phy->antenna_mask);
+	int ret;
+
+	ret = mt76_testmode_alloc_skb(phy, td->tx_mpdu_len);
+	if (ret)
+		return ret;
+
 	if (td->tx_rate_mode > MT76_TM_TX_MODE_VHT)
 		goto out;
 
+	if (td->tx_antenna_mask)
+		max_nss = min_t(u8, max_nss, hweight8(td->tx_antenna_mask));
+
+	info = IEEE80211_SKB_CB(td->tx_skb);
 	rate = &info->control.rates[0];
 	rate->count = 1;
 	rate->idx = td->tx_rate_idx;
@@ -171,8 +269,6 @@ mt76_testmode_tx_init(struct mt76_phy *phy)
 		}
 	}
 out:
-	skb_set_queue_mapping(skb, IEEE80211_AC_BE);
-
 	return 0;
 }
 
@@ -203,8 +299,7 @@ mt76_testmode_tx_stop(struct mt76_phy *phy)
 	wait_event_timeout(dev->tx_wait, td->tx_done == td->tx_queued,
 			   MT76_TM_TIMEOUT * HZ);
 
-	dev_kfree_skb(td->tx_skb);
-	td->tx_skb = NULL;
+	mt76_testmode_free_skb(phy);
 }
 
 static inline void
@@ -224,10 +319,10 @@ mt76_testmode_init_defaults(struct mt76_phy *phy)
 {
 	struct mt76_testmode_data *td = &phy->test;
 
-	if (td->tx_msdu_len > 0)
+	if (td->tx_mpdu_len > 0)
 		return;
 
-	td->tx_msdu_len = 1024;
+	td->tx_mpdu_len = 1024;
 	td->tx_count = 1;
 	td->tx_rate_mode = MT76_TM_TX_MODE_OFDM;
 	td->tx_rate_nss = 1;
@@ -345,16 +440,6 @@ int mt76_testmode_cmd(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 	if (tb[MT76_TM_ATTR_TX_COUNT])
 		td->tx_count = nla_get_u32(tb[MT76_TM_ATTR_TX_COUNT]);
 
-	if (tb[MT76_TM_ATTR_TX_LENGTH]) {
-		u32 val = nla_get_u32(tb[MT76_TM_ATTR_TX_LENGTH]);
-
-		if (val > IEEE80211_MAX_FRAME_LEN ||
-		    val < sizeof(struct ieee80211_hdr))
-			goto out;
-
-		td->tx_msdu_len = val;
-	}
-
 	if (tb[MT76_TM_ATTR_TX_RATE_IDX])
 		td->tx_rate_idx = nla_get_u8(tb[MT76_TM_ATTR_TX_RATE_IDX]);
 
@@ -375,6 +460,16 @@ int mt76_testmode_cmd(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			   &td->tx_power_control, 0, 1))
 		goto out;
 
+	if (tb[MT76_TM_ATTR_TX_LENGTH]) {
+		u32 val = nla_get_u32(tb[MT76_TM_ATTR_TX_LENGTH]);
+
+		if (val > mt76_testmode_max_mpdu_len(phy, td->tx_rate_mode) ||
+		    val < sizeof(struct ieee80211_hdr))
+			goto out;
+
+		td->tx_mpdu_len = val;
+	}
+
 	if (tb[MT76_TM_ATTR_TX_IPG])
 		td->tx_ipg = nla_get_u32(tb[MT76_TM_ATTR_TX_IPG]);
 
@@ -506,7 +601,7 @@ int mt76_testmode_dump(struct ieee80211_hw *hw, struct sk_buff *msg,
 		goto out;
 
 	if (nla_put_u32(msg, MT76_TM_ATTR_TX_COUNT, td->tx_count) ||
-	    nla_put_u32(msg, MT76_TM_ATTR_TX_LENGTH, td->tx_msdu_len) ||
+	    nla_put_u32(msg, MT76_TM_ATTR_TX_LENGTH, td->tx_mpdu_len) ||
 	    nla_put_u8(msg, MT76_TM_ATTR_TX_RATE_MODE, td->tx_rate_mode) ||
 	    nla_put_u8(msg, MT76_TM_ATTR_TX_RATE_NSS, td->tx_rate_nss) ||
 	    nla_put_u8(msg, MT76_TM_ATTR_TX_RATE_IDX, td->tx_rate_idx) ||
diff --git a/drivers/net/wireless/mediatek/mt76/testmode.h b/drivers/net/wireless/mediatek/mt76/testmode.h
index e0c706ce9b42..d32a7654c47e 100644
--- a/drivers/net/wireless/mediatek/mt76/testmode.h
+++ b/drivers/net/wireless/mediatek/mt76/testmode.h
@@ -21,7 +21,7 @@
  * @MT76_TM_ATTR_TX_COUNT: configured number of frames to send when setting
  *	state to MT76_TM_STATE_TX_FRAMES (u32)
  * @MT76_TM_ATTR_TX_PENDING: pending frames during MT76_TM_STATE_TX_FRAMES (u32)
- * @MT76_TM_ATTR_TX_LENGTH: packet tx msdu length (u32)
+ * @MT76_TM_ATTR_TX_LENGTH: packet tx mpdu length (u32)
  * @MT76_TM_ATTR_TX_RATE_MODE: packet tx mode (u8, see &enum mt76_testmode_tx_mode)
  * @MT76_TM_ATTR_TX_RATE_NSS: packet tx number of spatial streams (u8)
  * @MT76_TM_ATTR_TX_RATE_IDX: packet tx rate/MCS index (u8)
diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c
index 451ed60c6296..04e47259ea5f 100644
--- a/drivers/net/wireless/mediatek/mt76/tx.c
+++ b/drivers/net/wireless/mediatek/mt76/tx.c
@@ -213,7 +213,7 @@ void mt76_tx_complete_skb(struct mt76_dev *dev, u16 wcid_idx, struct sk_buff *sk
 		if (phy->test.tx_queued == phy->test.tx_done)
 			wake_up(&dev->tx_wait);
 
-		ieee80211_free_txskb(hw, skb);
+		dev_kfree_skb_any(skb);
 		return;
 	}
 #endif
-- 
cgit v1.2.3


From e6678f9dc59ab1535ba29d8b28e80fad212156a0 Mon Sep 17 00:00:00 2001
From: Shayne Chen <shayne.chen@mediatek.com>
Date: Mon, 12 Apr 2021 13:39:53 +0800
Subject: mt76: mt7915: rework mt7915_tm_set_tx_len()

Rework mt7915_tm_set_tx_len() with mt76_testmode_alloc_skb() to support
larger packet based on a longer tx_time.

Signed-off-by: Shayne Chen <shayne.chen@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7915/testmode.c   | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/testmode.c b/drivers/net/wireless/mediatek/mt76/mt7915/testmode.c
index bd798df748ba..f9d81e36ef09 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/testmode.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/testmode.c
@@ -257,13 +257,13 @@ mt7915_tm_set_tx_len(struct mt7915_phy *phy, u32 tx_time)
 {
 	struct mt76_phy *mphy = phy->mt76;
 	struct mt76_testmode_data *td = &mphy->test;
-	struct sk_buff *old = td->tx_skb, *new;
 	struct ieee80211_supported_band *sband;
 	struct rate_info rate = {};
 	u16 flags = 0, tx_len;
 	u32 bitrate;
+	int ret;
 
-	if (!tx_time || !old)
+	if (!tx_time)
 		return 0;
 
 	rate.mcs = td->tx_rate_idx;
@@ -323,21 +323,9 @@ mt7915_tm_set_tx_len(struct mt7915_phy *phy, u32 tx_time)
 	bitrate = cfg80211_calculate_bitrate(&rate);
 	tx_len = bitrate * tx_time / 10 / 8;
 
-	if (tx_len < sizeof(struct ieee80211_hdr))
-		tx_len = sizeof(struct ieee80211_hdr);
-	else if (tx_len > IEEE80211_MAX_FRAME_LEN)
-		tx_len = IEEE80211_MAX_FRAME_LEN;
-
-	new = alloc_skb(tx_len, GFP_KERNEL);
-	if (!new)
-		return -ENOMEM;
-
-	skb_copy_header(new, old);
-	__skb_put_zero(new, tx_len);
-	memcpy(new->data, old->data, sizeof(struct ieee80211_hdr));
-
-	dev_kfree_skb(old);
-	td->tx_skb = new;
+	ret = mt76_testmode_alloc_skb(phy->mt76, tx_len);
+	if (ret)
+		return ret;
 
 	return 0;
 }
-- 
cgit v1.2.3


From cc91747be98f2a3fc305cf3efc8f3a9b7f6a9f3b Mon Sep 17 00:00:00 2001
From: Shayne Chen <shayne.chen@mediatek.com>
Date: Mon, 12 Apr 2021 13:39:54 +0800
Subject: mt76: mt7915: fix rate setting of tx descriptor in testmode

Fix ofdm rate index and ldpc setting in rate setting field of tx
descriptor.

Signed-off-by: Shayne Chen <shayne.chen@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/mac.c | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index 3f3bfead1ce7..f99c2690d73c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -661,19 +661,18 @@ mt7915_mac_write_txwi_tm(struct mt7915_phy *phy, __le32 *txwi,
 {
 #ifdef CONFIG_NL80211_TESTMODE
 	struct mt76_testmode_data *td = &phy->mt76->test;
+	const struct ieee80211_rate *r;
+	u8 bw, mode, nss = td->tx_rate_nss;
 	u8 rate_idx = td->tx_rate_idx;
-	u8 nss = td->tx_rate_nss;
-	u8 bw, mode;
 	u16 rateval = 0;
 	u32 val;
+	bool cck = false;
+	int band;
 
 	if (skb != phy->mt76->test.tx_skb)
 		return;
 
 	switch (td->tx_rate_mode) {
-	case MT76_TM_TX_MODE_CCK:
-		mode = MT_PHY_TYPE_CCK;
-		break;
 	case MT76_TM_TX_MODE_HT:
 		nss = 1 + (rate_idx >> 3);
 		mode = MT_PHY_TYPE_HT;
@@ -693,7 +692,20 @@ mt7915_mac_write_txwi_tm(struct mt7915_phy *phy, __le32 *txwi,
 	case MT76_TM_TX_MODE_HE_MU:
 		mode = MT_PHY_TYPE_HE_MU;
 		break;
+	case MT76_TM_TX_MODE_CCK:
+		cck = true;
+		fallthrough;
 	case MT76_TM_TX_MODE_OFDM:
+		band = phy->mt76->chandef.chan->band;
+		if (band == NL80211_BAND_2GHZ && !cck)
+			rate_idx += 4;
+
+		r = &phy->mt76->hw->wiphy->bands[band]->bitrates[rate_idx];
+		val = cck ? r->hw_value_short : r->hw_value;
+
+		mode = val >> 8;
+		rate_idx = val & 0xff;
+		break;
 	default:
 		mode = MT_PHY_TYPE_OFDM;
 		break;
@@ -748,9 +760,10 @@ mt7915_mac_write_txwi_tm(struct mt7915_phy *phy, __le32 *txwi,
 	if (mode >= MT_PHY_TYPE_HE_SU)
 		val |= FIELD_PREP(MT_TXD6_HELTF, td->tx_ltf);
 
-	if (td->tx_rate_ldpc || bw > 0)
+	if (td->tx_rate_ldpc || (bw > 0 && mode >= MT_PHY_TYPE_HE_SU))
 		val |= MT_TXD6_LDPC;
 
+	txwi[3] &= ~cpu_to_le32(MT_TXD3_SN_VALID);
 	txwi[6] |= cpu_to_le32(val);
 	txwi[7] |= cpu_to_le32(FIELD_PREP(MT_TXD7_SPE_IDX,
 					  phy->test.spe_idx));
-- 
cgit v1.2.3


From 8ab31da7b89f71c4c2defcca989fab7b42f87d71 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Tue, 13 Apr 2021 13:34:56 +0800
Subject: mt76: mt7615: fix memleak when mt7615_unregister_device()

mt7615_tx_token_put() should get call before mt76_free_pending_txwi().

Fixes: a6275e934605 ("mt76: mt7615: reset token when mac_reset happens")
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
index a629d9cb3806..a03484e34bb4 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
@@ -164,10 +164,9 @@ void mt7615_unregister_device(struct mt7615_dev *dev)
 	mt76_unregister_device(&dev->mt76);
 	if (mcu_running)
 		mt7615_mcu_exit(dev);
-	mt7615_dma_cleanup(dev);
 
 	mt7615_tx_token_put(dev);
-
+	mt7615_dma_cleanup(dev);
 	tasklet_disable(&dev->irq_tasklet);
 
 	mt76_free_device(&dev->mt76);
-- 
cgit v1.2.3


From e9d32af478cfc3744a45245c0b126738af4b3ac4 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Tue, 13 Apr 2021 13:34:57 +0800
Subject: mt76: mt7915: fix memleak when mt7915_unregister_device()

mt7915_tx_token_put() should get call before mt76_free_pending_txwi().

Fixes: f285dfb98562 ("mt76: mt7915: reset token when mac_reset happens")
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/init.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index 32c371055596..d242e186424d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -740,9 +740,8 @@ void mt7915_unregister_device(struct mt7915_dev *dev)
 	mt7915_unregister_ext_phy(dev);
 	mt76_unregister_device(&dev->mt76);
 	mt7915_mcu_exit(dev);
-	mt7915_dma_cleanup(dev);
-
 	mt7915_tx_token_put(dev);
+	mt7915_dma_cleanup(dev);
 
 	mt76_free_device(&dev->mt76);
 }
-- 
cgit v1.2.3


From 6362dd16596e8a694f895089726fac103b7f47ef Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Tue, 13 Apr 2021 15:26:20 +0800
Subject: mt76: mt7915: only free skbs after mt7915_dma_reset() when reset
 happens

In mt7915_mac_reset_work(), make sure freeing skbs after mt7915_dma_reset().

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/mac.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index f99c2690d73c..35ab4bf011eb 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -1643,12 +1643,12 @@ void mt7915_mac_reset_work(struct work_struct *work)
 
 	mt76_wr(dev, MT_MCU_INT_EVENT, MT_MCU_INT_EVENT_DMA_STOPPED);
 
-	mt7915_tx_token_put(dev);
-	idr_init(&dev->token);
-
 	if (mt7915_wait_reset_state(dev, MT_MCU_CMD_RESET_DONE)) {
 		mt7915_dma_reset(dev);
 
+		mt7915_tx_token_put(dev);
+		idr_init(&dev->token);
+
 		mt76_wr(dev, MT_MCU_INT_EVENT, MT_MCU_INT_EVENT_DMA_INIT);
 		mt7915_wait_reset_state(dev, MT_MCU_CMD_RECOVERY_DONE);
 	}
-- 
cgit v1.2.3


From 91577ccae6461506a06889849dd944d9bdec09dd Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Tue, 13 Apr 2021 16:00:06 +0800
Subject: mt76: mt7615: only free skbs after mt7615_dma_reset() when reset
 happens

In mt7615_mac_reset_work(), make sure freeing skbs after mt7615_dma_reset().

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
index 1b206ccdadf2..9ac4bdabc0ef 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
@@ -303,12 +303,12 @@ void mt7615_mac_reset_work(struct work_struct *work)
 
 	mt7615_hif_int_event_trigger(dev, MT_MCU_INT_EVENT_PDMA_STOPPED);
 
-	mt7615_tx_token_put(dev);
-	idr_init(&dev->token);
-
 	if (mt7615_wait_reset_state(dev, MT_MCU_CMD_RESET_DONE)) {
 		mt7615_dma_reset(dev);
 
+		mt7615_tx_token_put(dev);
+		idr_init(&dev->token);
+
 		mt76_wr(dev, MT_WPDMA_MEM_RNG_ERR, 0);
 
 		mt7615_hif_int_event_trigger(dev, MT_MCU_INT_EVENT_PDMA_INIT);
-- 
cgit v1.2.3


From 06991d1f73a9bdbc5f234ee96737b9102705b89c Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Tue, 13 Apr 2021 16:00:07 +0800
Subject: mt76: mt7615: use ieee80211_free_txskb() in mt7615_tx_token_put()

We should use ieee80211_free_txskb() to report skb status avoid wrong
aql accounting after reset.

Cc: stable@vger.kernel.org
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 005c2829d3df..0b386030dbd8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1976,8 +1976,12 @@ void mt7615_tx_token_put(struct mt7615_dev *dev)
 	spin_lock_bh(&dev->token_lock);
 	idr_for_each_entry(&dev->token, txwi, id) {
 		mt7615_txp_skb_unmap(&dev->mt76, txwi);
-		if (txwi->skb)
-			dev_kfree_skb_any(txwi->skb);
+		if (txwi->skb) {
+			struct ieee80211_hw *hw;
+
+			hw = mt76_tx_status_get_hw(&dev->mt76, txwi->skb);
+			ieee80211_free_txskb(hw, txwi->skb);
+		}
 		mt76_put_txwi(&dev->mt76, txwi);
 	}
 	spin_unlock_bh(&dev->token_lock);
-- 
cgit v1.2.3


From 2b9ea5a8cf1bdc82f494da5a90191aa8b042980d Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 13 Apr 2021 14:13:54 +0200
Subject: mt76: mt7921: add mt7921_dma_cleanup in mt7921_unregister_device

In order to avoid memory leaks, clean the dma engine unloading the
module

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/init.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index 5bb0a7b9e9e5..94fe2eadf285 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -272,8 +272,9 @@ int mt7921_register_device(struct mt7921_dev *dev)
 void mt7921_unregister_device(struct mt7921_dev *dev)
 {
 	mt76_unregister_device(&dev->mt76);
-	mt7921_mcu_exit(dev);
 	mt7921_tx_token_put(dev);
+	mt7921_dma_cleanup(dev);
+	mt7921_mcu_exit(dev);
 
 	tasklet_disable(&dev->irq_tasklet);
 	mt76_free_device(&dev->mt76);
-- 
cgit v1.2.3


From 6929d1d747b3934df3b0b2bb8af31b3f1f539ae4 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 13 Apr 2021 20:09:33 +0200
Subject: mt76: flush tx status queue on DMA reset

After DMA reset, tx status information for queued frames will never arrive.
Flush the queue to free skbs immediately instead of waiting for a timeout

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7603/mac.c     | 2 ++
 drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c | 2 ++
 drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c   | 2 ++
 drivers/net/wireless/mediatek/mt76/mt7915/mac.c     | 2 ++
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c     | 2 ++
 5 files changed, 10 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
index e3a9dd6fbd87..fbceb07c5f37 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
@@ -1445,6 +1445,8 @@ static void mt7603_mac_watchdog_reset(struct mt7603_dev *dev)
 		mt76_queue_rx_reset(dev, i);
 	}
 
+	mt76_tx_status_check(&dev->mt76, NULL, true);
+
 	mt7603_dma_sched_reset(dev);
 
 	mt7603_mac_dma_start(dev);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
index 9ac4bdabc0ef..8cd79e849045 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
@@ -201,6 +201,8 @@ void mt7615_dma_reset(struct mt7615_dev *dev)
 	mt76_for_each_q_rx(&dev->mt76, i)
 		mt76_queue_rx_reset(dev, i);
 
+	mt76_tx_status_check(&dev->mt76, NULL, true);
+
 	mt7615_dma_start(dev);
 }
 EXPORT_SYMBOL_GPL(mt7615_dma_reset);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
index fc12824ab74e..ce1e9ad23fec 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
@@ -472,6 +472,8 @@ static void mt76x02_watchdog_reset(struct mt76x02_dev *dev)
 		mt76_queue_rx_reset(dev, i);
 	}
 
+	mt76_tx_status_check(&dev->mt76, NULL, true);
+
 	mt76x02_mac_start(dev);
 
 	if (dev->ed_monitor)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index 35ab4bf011eb..6a4b57509751 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -1564,6 +1564,8 @@ mt7915_dma_reset(struct mt7915_dev *dev)
 	mt76_for_each_q_rx(&dev->mt76, i)
 		mt76_queue_rx_reset(dev, i);
 
+	mt76_tx_status_check(&dev->mt76, NULL, true);
+
 	/* re-init prefetch settings after reset */
 	mt7915_dma_prefetch(dev);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index b507f3917830..572bab82315a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1254,6 +1254,8 @@ mt7921_dma_reset(struct mt7921_dev *dev)
 	mt76_for_each_q_rx(&dev->mt76, i)
 		mt76_queue_reset(dev, &dev->mt76.q_rx[i]);
 
+	mt76_tx_status_check(&dev->mt76, NULL, true);
+
 	/* configure perfetch settings */
 	mt7921_dma_prefetch(dev);
 
-- 
cgit v1.2.3


From 2de6ccebe0e778b80b4092eff33918a752c48804 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 13 Apr 2021 11:08:35 +0200
Subject: dt-bindings:net:wireless:mediatek,mt76: introduce power-limits node

Introduce power-limits node in mt76 binding in order to specify
per-rate power limit values for each 802.11n/802.11ac rate

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../bindings/net/wireless/mediatek,mt76.yaml       | 107 +++++++++++++++++++++
 1 file changed, 107 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml b/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml
index d6f835d17d66..3e2c2e43175e 100644
--- a/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml
+++ b/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml
@@ -72,6 +72,90 @@ properties:
       led-sources:
         maxItems: 1
 
+  power-limits:
+    type: object
+    additionalProperties: false
+    patternProperties:
+      "^r[0-9]+":
+        type: object
+        additionalProperties: false
+        properties:
+          regdomain:
+            $ref: /schemas/types.yaml#/definitions/string
+            description:
+              Regdomain refers to a legal regulatory region. Different
+              countries define different levels of allowable transmitter
+              power, time that a channel can be occupied, and different
+              available channels
+            enum:
+              - FCC
+              - ETSI
+              - JP
+
+        patternProperties:
+          "^txpower-[256]g$":
+            type: object
+            additionalProperties: false
+            patternProperties:
+              "^b[0-9]+$":
+                type: object
+                additionalProperties: false
+                properties:
+                  channels:
+                    $ref: /schemas/types.yaml#/definitions/uint32-array
+                    minItems: 2
+                    maxItems: 2
+                    description:
+                      Pairs of first and last channel number of the selected
+                      band
+
+                  rates-cck:
+                    $ref: /schemas/types.yaml#/definitions/uint8-array
+                    minItems: 4
+                    maxItems: 4
+                    description:
+                      4 half-dBm per-rate power limit values
+
+                  rates-ofdm:
+                    $ref: /schemas/types.yaml#/definitions/uint8-array
+                    minItems: 8
+                    maxItems: 8
+                    description:
+                      8 half-dBm per-rate power limit values
+
+                  rates-mcs:
+                    $ref: /schemas/types.yaml#/definitions/uint8-matrix
+                    description:
+                      Sets of per-rate power limit values for 802.11n/802.11ac
+                      rates for multiple channel bandwidth settings.
+                      Each set starts with the number of channel bandwidth
+                      settings for which the rate set applies, followed by
+                      either 8 or 10 power limit values. The order of the
+                      channel bandwidth settings is 20, 40, 80 and 160 MHz.
+                    maxItems: 4
+                    items:
+                      minItems: 9
+                      maxItems: 11
+
+                  rates-ru:
+                    $ref: /schemas/types.yaml#/definitions/uint8-matrix
+                    description:
+                      Sets of per-rate power limit values for 802.11ax rates
+                      for multiple channel bandwidth or resource unit settings.
+                      Each set starts with the number of channel bandwidth or
+                      resource unit settings for which the rate set applies,
+                      followed by 12 power limit values. The order of the
+                      channel resource unit settings is RU26, RU52, RU106,
+                      RU242/SU20, RU484/SU40, RU996/SU80 and RU2x996/SU160.
+                    items:
+                      minItems: 13
+                      maxItems: 13
+
+                  txs-delta:
+                    $ref: /schemas/types.yaml#/definitions/uint32-array
+                    description:
+                      Half-dBm power delta for different numbers of antennas
+
 required:
   - compatible
   - reg
@@ -93,6 +177,29 @@ examples:
         led {
           led-sources = <2>;
         };
+
+        power-limits {
+          r0 {
+            regdomain = "FCC";
+            txpower-5g {
+               b0 {
+                   channels = <36 48>;
+                   rates-ofdm = /bits/ 8 <23 23 23 23 23 23 23 23>;
+                   rates-mcs = /bits/ 8 <1 23 23 23 23 23 23 23 23 23 23>,
+                                        <3 22 22 22 22 22 22 22 22 22 22>;
+                   rates-ru = /bits/ 8 <3 22 22 22 22 22 22 22 22 22 22 22 22>,
+                                       <4 20 20 20 20 20 20 20 20 20 20 20 20>;
+               };
+               b1 {
+                   channels = <100 181>;
+                   rates-ofdm = /bits/ 8 <14 14 14 14 14 14 14 14>;
+                   rates-mcs = /bits/ 8  <4 14 14 14 14 14 14 14 14 14 14>;
+                   txs-delta = <12 9 6>;
+                   rates-ru = /bits/ 8  <7 14 14 14 14 14 14 14 14 14 14 14 14>;
+               };
+             };
+          };
+        };
       };
     };
 
-- 
cgit v1.2.3


From 22b980badc0fc746431b81b9d402cf0612f59a7a Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 13 Apr 2021 11:08:36 +0200
Subject: mt76: add functions for parsing rate power limits from DT

This subnode can be used to set per-rate tx power limits either per
country code / regdomain or globally.
These limits are typically provided by the device manufacturers and are
used to limit sideband emissions and stay within regulatory limits

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/eeprom.c | 204 ++++++++++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt76.h   |  12 ++
 2 files changed, 216 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/eeprom.c b/drivers/net/wireless/mediatek/mt76/eeprom.c
index 6d895738222a..2dbc371654cd 100644
--- a/drivers/net/wireless/mediatek/mt76/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/eeprom.c
@@ -104,6 +104,210 @@ mt76_eeprom_override(struct mt76_phy *phy)
 }
 EXPORT_SYMBOL_GPL(mt76_eeprom_override);
 
+static bool mt76_string_prop_find(struct property *prop, const char *str)
+{
+	const char *cp = NULL;
+
+	if (!prop || !str || !str[0])
+		return false;
+
+	while ((cp = of_prop_next_string(prop, cp)) != NULL)
+		if (!strcasecmp(cp, str))
+			return true;
+
+	return false;
+}
+
+static struct device_node *
+mt76_find_power_limits_node(struct mt76_dev *dev)
+{
+	struct device_node *np = dev->dev->of_node;
+	const char *const region_names[] = {
+		[NL80211_DFS_ETSI] = "etsi",
+		[NL80211_DFS_FCC] = "fcc",
+		[NL80211_DFS_JP] = "jp",
+	};
+	struct device_node *cur, *fallback = NULL;
+	const char *region_name = NULL;
+
+	if (dev->region < ARRAY_SIZE(region_names))
+		region_name = region_names[dev->region];
+
+	np = of_get_child_by_name(np, "power-limits");
+	if (!np)
+		return NULL;
+
+	for_each_child_of_node(np, cur) {
+		struct property *country = of_find_property(cur, "country", NULL);
+		struct property *regd = of_find_property(cur, "regdomain", NULL);
+
+		if (!country && !regd) {
+			fallback = cur;
+			continue;
+		}
+
+		if (mt76_string_prop_find(country, dev->alpha2) ||
+		    mt76_string_prop_find(regd, region_name))
+			return cur;
+	}
+
+	return fallback;
+}
+
+static const __be32 *
+mt76_get_of_array(struct device_node *np, char *name, size_t *len, int min)
+{
+	struct property *prop = of_find_property(np, name, NULL);
+
+	if (!prop || !prop->value || prop->length < min * 4)
+		return NULL;
+
+	*len = prop->length;
+
+	return prop->value;
+}
+
+static struct device_node *
+mt76_find_channel_node(struct device_node *np, struct ieee80211_channel *chan)
+{
+	struct device_node *cur;
+	const __be32 *val;
+	size_t len;
+
+	for_each_child_of_node(np, cur) {
+		val = mt76_get_of_array(cur, "channels", &len, 2);
+		if (!val)
+			continue;
+
+		while (len >= 2 * sizeof(*val)) {
+			if (chan->hw_value >= be32_to_cpu(val[0]) &&
+			    chan->hw_value <= be32_to_cpu(val[1]))
+				return cur;
+
+			val += 2;
+			len -= 2 * sizeof(*val);
+		}
+	}
+
+	return NULL;
+}
+
+static s8
+mt76_get_txs_delta(struct device_node *np, u8 nss)
+{
+	const __be32 *val;
+	size_t len;
+
+	val = mt76_get_of_array(np, "txs-delta", &len, nss);
+	if (!val)
+		return 0;
+
+	return be32_to_cpu(val[nss - 1]);
+}
+
+static void
+mt76_apply_array_limit(s8 *pwr, size_t pwr_len, const __be32 *data,
+		       s8 target_power, s8 nss_delta, s8 *max_power)
+{
+	int i;
+
+	if (!data)
+		return;
+
+	for (i = 0; i < pwr_len; i++) {
+		pwr[i] = min_t(s8, target_power,
+			       be32_to_cpu(data[i]) + nss_delta);
+		*max_power = max(*max_power, pwr[i]);
+	}
+}
+
+s8 mt76_get_rate_power_limits(struct mt76_phy *phy,
+			      struct ieee80211_channel *chan,
+			      struct mt76_power_limits *dest,
+			      s8 target_power)
+{
+	struct mt76_dev *dev = phy->dev;
+	struct device_node *np;
+	const __be32 *val;
+	char name[16];
+	u32 mcs_rates = dev->drv->mcs_rates;
+	char band;
+	size_t len;
+	int i, cur;
+	s8 max_power = 0;
+	s8 txs_delta;
+
+	if (!mcs_rates)
+		mcs_rates = 10;
+
+	memset(dest, target_power, sizeof(*dest));
+
+	if (!IS_ENABLED(CONFIG_OF))
+		return target_power;
+
+	np = mt76_find_power_limits_node(dev);
+	if (!np)
+		return target_power;
+
+	switch (chan->band) {
+	case NL80211_BAND_2GHZ:
+		band = '2';
+		break;
+	case NL80211_BAND_5GHZ:
+		band = '5';
+		break;
+	default:
+		return target_power;
+	}
+
+	snprintf(name, sizeof(name), "txpower-%cg", band);
+	np = of_get_child_by_name(np, name);
+	if (!np)
+		return target_power;
+
+	np = mt76_find_channel_node(np, chan);
+	if (!np)
+		return target_power;
+
+	txs_delta = mt76_get_txs_delta(np, hweight8(phy->antenna_mask));
+
+	val = mt76_get_of_array(np, "rates-cck", &len, ARRAY_SIZE(dest->cck));
+	mt76_apply_array_limit(dest->cck, ARRAY_SIZE(dest->cck), val,
+			       target_power, txs_delta, &max_power);
+
+	val = mt76_get_of_array(np, "rates-ofdm",
+				&len, ARRAY_SIZE(dest->ofdm));
+	mt76_apply_array_limit(dest->ofdm, ARRAY_SIZE(dest->ofdm), val,
+			       target_power, txs_delta, &max_power);
+
+	val = mt76_get_of_array(np, "rates-mcs", &len, mcs_rates + 1);
+	if (!val)
+		return max_power;
+
+	len /= 4;
+	cur = be32_to_cpu(val[0]);
+	for (i = 0; i < ARRAY_SIZE(dest->mcs); i++) {
+		if (len < mcs_rates + 1)
+			break;
+
+		mt76_apply_array_limit(dest->mcs[i], ARRAY_SIZE(dest->mcs[i]),
+				       val + 1, target_power, txs_delta,
+				       &max_power);
+		if (--cur > 0)
+			continue;
+
+		val += mcs_rates + 1;
+		len -= mcs_rates + 1;
+		if (!len)
+			break;
+
+		cur = be32_to_cpu(val[0]);
+	}
+
+	return max_power;
+}
+EXPORT_SYMBOL_GPL(mt76_get_rate_power_limits);
+
 int
 mt76_eeprom_init(struct mt76_dev *dev, int len)
 {
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index d60a1dd8f314..122a58c87f46 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -331,6 +331,7 @@ struct mt76_driver_ops {
 	u32 drv_flags;
 	u32 survey_flags;
 	u16 txwi_size;
+	u8 mcs_rates;
 
 	void (*update_survey)(struct mt76_dev *dev);
 
@@ -711,6 +712,12 @@ struct mt76_dev {
 	};
 };
 
+struct mt76_power_limits {
+	s8 cck[4];
+	s8 ofdm[8];
+	s8 mcs[4][10];
+};
+
 enum mt76_phy_type {
 	MT_PHY_TYPE_CCK,
 	MT_PHY_TYPE_OFDM,
@@ -1195,4 +1202,9 @@ mt76_mcu_skb_send_msg(struct mt76_dev *dev, struct sk_buff *skb, int cmd,
 
 void mt76_set_irq_mask(struct mt76_dev *dev, u32 addr, u32 clear, u32 set);
 
+s8 mt76_get_rate_power_limits(struct mt76_phy *phy,
+			      struct ieee80211_channel *chan,
+			      struct mt76_power_limits *dest,
+			      s8 target_power);
+
 #endif
-- 
cgit v1.2.3


From a9627d992b5e3aa18315094b501eba0f4d883419 Mon Sep 17 00:00:00 2001
From: Shayne Chen <shayne.chen@mediatek.com>
Date: Tue, 13 Apr 2021 11:08:37 +0200
Subject: mt76: extend DT rate power limits to support 11ax devices

Enable parsing per-rate txpower limits from DT for 11ax chipsets.

Co-developed-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Tested-by: Evelyn Tsai <evelyn.tsai@mediatek.com>
Signed-off-by: Shayne Chen <shayne.chen@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/eeprom.c | 62 ++++++++++++++++++-----------
 drivers/net/wireless/mediatek/mt76/mt76.h   |  1 +
 2 files changed, 40 insertions(+), 23 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/eeprom.c b/drivers/net/wireless/mediatek/mt76/eeprom.c
index 2dbc371654cd..6f75e949e4fd 100644
--- a/drivers/net/wireless/mediatek/mt76/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/eeprom.c
@@ -221,6 +221,36 @@ mt76_apply_array_limit(s8 *pwr, size_t pwr_len, const __be32 *data,
 	}
 }
 
+static void
+mt76_apply_multi_array_limit(s8 *pwr, size_t pwr_len, s8 pwr_num,
+			     const __be32 *data, size_t len, s8 target_power,
+			     s8 nss_delta, s8 *max_power)
+{
+	int i, cur;
+
+	if (!data)
+		return;
+
+	len /= 4;
+	cur = be32_to_cpu(data[0]);
+	for (i = 0; i < pwr_num; i++) {
+		if (len < pwr_len + 1)
+			break;
+
+		mt76_apply_array_limit(pwr + pwr_len * i, pwr_len, data + 1,
+				       target_power, nss_delta, max_power);
+		if (--cur > 0)
+			continue;
+
+		data += pwr_len + 1;
+		len -= pwr_len + 1;
+		if (!len)
+			break;
+
+		cur = be32_to_cpu(data[0]);
+	}
+}
+
 s8 mt76_get_rate_power_limits(struct mt76_phy *phy,
 			      struct ieee80211_channel *chan,
 			      struct mt76_power_limits *dest,
@@ -231,9 +261,9 @@ s8 mt76_get_rate_power_limits(struct mt76_phy *phy,
 	const __be32 *val;
 	char name[16];
 	u32 mcs_rates = dev->drv->mcs_rates;
+	u32 ru_rates = ARRAY_SIZE(dest->ru[0]);
 	char band;
 	size_t len;
-	int i, cur;
 	s8 max_power = 0;
 	s8 txs_delta;
 
@@ -281,28 +311,14 @@ s8 mt76_get_rate_power_limits(struct mt76_phy *phy,
 			       target_power, txs_delta, &max_power);
 
 	val = mt76_get_of_array(np, "rates-mcs", &len, mcs_rates + 1);
-	if (!val)
-		return max_power;
-
-	len /= 4;
-	cur = be32_to_cpu(val[0]);
-	for (i = 0; i < ARRAY_SIZE(dest->mcs); i++) {
-		if (len < mcs_rates + 1)
-			break;
-
-		mt76_apply_array_limit(dest->mcs[i], ARRAY_SIZE(dest->mcs[i]),
-				       val + 1, target_power, txs_delta,
-				       &max_power);
-		if (--cur > 0)
-			continue;
-
-		val += mcs_rates + 1;
-		len -= mcs_rates + 1;
-		if (!len)
-			break;
-
-		cur = be32_to_cpu(val[0]);
-	}
+	mt76_apply_multi_array_limit(dest->mcs[0], ARRAY_SIZE(dest->mcs[0]),
+				     ARRAY_SIZE(dest->mcs), val, len,
+				     target_power, txs_delta, &max_power);
+
+	val = mt76_get_of_array(np, "rates-ru", &len, ru_rates + 1);
+	mt76_apply_multi_array_limit(dest->ru[0], ARRAY_SIZE(dest->ru[0]),
+				     ARRAY_SIZE(dest->ru), val, len,
+				     target_power, txs_delta, &max_power);
 
 	return max_power;
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 122a58c87f46..bf049fb4c412 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -716,6 +716,7 @@ struct mt76_power_limits {
 	s8 cck[4];
 	s8 ofdm[8];
 	s8 mcs[4][10];
+	s8 ru[7][12];
 };
 
 enum mt76_phy_type {
-- 
cgit v1.2.3


From fb0d90540b66523069d15ac05acab4ceb8e01055 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 13 Apr 2021 11:08:38 +0200
Subject: mt76: mt7615: implement support for using DT rate power limits

Limits are used to update the channel max_power settings and also passed
to the firmware on channel changes

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/init.c | 10 +++-
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c  | 61 +++++++++++++++++++++++-
 2 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 1e418740c17b..e58f46d6a85b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -252,6 +252,7 @@ void mt7615_init_txpower(struct mt7615_dev *dev,
 	int delta_idx, delta = mt76_tx_power_nss_delta(n_chains);
 	u8 *eep = (u8 *)dev->mt76.eeprom.data;
 	enum nl80211_band band = sband->band;
+	struct mt76_power_limits limits;
 	u8 rate_val;
 
 	delta_idx = mt7615_eeprom_get_power_delta_index(dev, band);
@@ -280,7 +281,11 @@ void mt7615_init_txpower(struct mt7615_dev *dev,
 			target_power = max(target_power, eep[index]);
 		}
 
-		target_power = DIV_ROUND_UP(target_power + delta, 2);
+		target_power = mt76_get_rate_power_limits(&dev->mphy, chan,
+							  &limits,
+							  target_power);
+		target_power += delta;
+		target_power = DIV_ROUND_UP(target_power, 2);
 		chan->max_power = min_t(int, chan->max_reg_power,
 					target_power);
 		chan->orig_mpwr = target_power;
@@ -311,6 +316,9 @@ mt7615_regd_notifier(struct wiphy *wiphy,
 	memcpy(dev->mt76.alpha2, request->alpha2, sizeof(dev->mt76.alpha2));
 	dev->mt76.region = request->dfs_region;
 
+	mt7615_init_txpower(dev, &mphy->sband_2g.sband);
+	mt7615_init_txpower(dev, &mphy->sband_5g.sband);
+
 	mt7615_mutex_acquire(dev);
 
 	if (chandef->chan->flags & IEEE80211_CHAN_RADAR)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 9b9f8d88e9bb..cb325b0efe9b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -2137,16 +2137,75 @@ static void mt7615_mcu_set_txpower_sku(struct mt7615_phy *phy, u8 *sku)
 {
 	struct mt76_phy *mphy = phy->mt76;
 	struct ieee80211_hw *hw = mphy->hw;
+	struct mt76_power_limits limits;
+	s8 *limits_array = (s8 *)&limits;
 	int n_chains = hweight8(mphy->antenna_mask);
 	int tx_power;
 	int i;
+	static const u8 sku_mapping[] = {
+#define SKU_FIELD(_type, _field) \
+		[MT_SKU_##_type] = offsetof(struct mt76_power_limits, _field)
+		SKU_FIELD(CCK_1_2, cck[0]),
+		SKU_FIELD(CCK_55_11, cck[2]),
+		SKU_FIELD(OFDM_6_9, ofdm[0]),
+		SKU_FIELD(OFDM_12_18, ofdm[2]),
+		SKU_FIELD(OFDM_24_36, ofdm[4]),
+		SKU_FIELD(OFDM_48, ofdm[6]),
+		SKU_FIELD(OFDM_54, ofdm[7]),
+		SKU_FIELD(HT20_0_8, mcs[0][0]),
+		SKU_FIELD(HT20_32, ofdm[0]),
+		SKU_FIELD(HT20_1_2_9_10, mcs[0][1]),
+		SKU_FIELD(HT20_3_4_11_12, mcs[0][3]),
+		SKU_FIELD(HT20_5_13, mcs[0][5]),
+		SKU_FIELD(HT20_6_14, mcs[0][6]),
+		SKU_FIELD(HT20_7_15, mcs[0][7]),
+		SKU_FIELD(HT40_0_8, mcs[1][0]),
+		SKU_FIELD(HT40_32, ofdm[0]),
+		SKU_FIELD(HT40_1_2_9_10, mcs[1][1]),
+		SKU_FIELD(HT40_3_4_11_12, mcs[1][3]),
+		SKU_FIELD(HT40_5_13, mcs[1][5]),
+		SKU_FIELD(HT40_6_14, mcs[1][6]),
+		SKU_FIELD(HT40_7_15, mcs[1][7]),
+		SKU_FIELD(VHT20_0, mcs[0][0]),
+		SKU_FIELD(VHT20_1_2, mcs[0][1]),
+		SKU_FIELD(VHT20_3_4, mcs[0][3]),
+		SKU_FIELD(VHT20_5_6, mcs[0][5]),
+		SKU_FIELD(VHT20_7, mcs[0][7]),
+		SKU_FIELD(VHT20_8, mcs[0][8]),
+		SKU_FIELD(VHT20_9, mcs[0][9]),
+		SKU_FIELD(VHT40_0, mcs[1][0]),
+		SKU_FIELD(VHT40_1_2, mcs[1][1]),
+		SKU_FIELD(VHT40_3_4, mcs[1][3]),
+		SKU_FIELD(VHT40_5_6, mcs[1][5]),
+		SKU_FIELD(VHT40_7, mcs[1][7]),
+		SKU_FIELD(VHT40_8, mcs[1][8]),
+		SKU_FIELD(VHT40_9, mcs[1][9]),
+		SKU_FIELD(VHT80_0, mcs[2][0]),
+		SKU_FIELD(VHT80_1_2, mcs[2][1]),
+		SKU_FIELD(VHT80_3_4, mcs[2][3]),
+		SKU_FIELD(VHT80_5_6, mcs[2][5]),
+		SKU_FIELD(VHT80_7, mcs[2][7]),
+		SKU_FIELD(VHT80_8, mcs[2][8]),
+		SKU_FIELD(VHT80_9, mcs[2][9]),
+		SKU_FIELD(VHT160_0, mcs[3][0]),
+		SKU_FIELD(VHT160_1_2, mcs[3][1]),
+		SKU_FIELD(VHT160_3_4, mcs[3][3]),
+		SKU_FIELD(VHT160_5_6, mcs[3][5]),
+		SKU_FIELD(VHT160_7, mcs[3][7]),
+		SKU_FIELD(VHT160_8, mcs[3][8]),
+		SKU_FIELD(VHT160_9, mcs[3][9]),
+#undef SKU_FIELD
+	};
 
 	tx_power = hw->conf.power_level * 2 -
 		   mt76_tx_power_nss_delta(n_chains);
+
+	tx_power = mt76_get_rate_power_limits(mphy, mphy->chandef.chan,
+					      &limits, tx_power);
 	mphy->txpower_cur = tx_power;
 
 	for (i = 0; i < MT_SKU_1SS_DELTA; i++)
-		sku[i] = tx_power;
+		sku[i] = limits_array[sku_mapping[i]];
 
 	for (i = 0; i < 4; i++) {
 		int delta = 0;
-- 
cgit v1.2.3


From 729d3dbd3bf23d03b8259e692c5505d6a647726a Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 13 Apr 2021 11:08:39 +0200
Subject: mt76: mt7615: do not use mt7615 single-sku values for mt7663

mt7663 mcu relies on different APIs to configure APIs per-rate power limit
respect to mt7615 driver.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index cb325b0efe9b..eab490d07255 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -2204,6 +2204,11 @@ static void mt7615_mcu_set_txpower_sku(struct mt7615_phy *phy, u8 *sku)
 					      &limits, tx_power);
 	mphy->txpower_cur = tx_power;
 
+	if (is_mt7663(mphy->dev)) {
+		memset(sku, tx_power, MT_SKU_4SS_DELTA + 1);
+		return;
+	}
+
 	for (i = 0; i < MT_SKU_1SS_DELTA; i++)
 		sku[i] = limits_array[sku_mapping[i]];
 
-- 
cgit v1.2.3


From 18369a4f9d73bf0ccd43d8df691d394281ee3ed4 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 13 Apr 2021 11:08:40 +0200
Subject: mt76: introduce single-sku support for mt7663/mt7921

Introduce support for rate-txpower compensation for mt7663/mt7921 chipsets.
Rate-txpower limit is specified through dts

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/init.c   |   5 +-
 drivers/net/wireless/mediatek/mt76/mt7615/main.c   |   4 +
 .../net/wireless/mediatek/mt76/mt76_connac_mcu.c   | 133 +++++++++++++++++++++
 .../net/wireless/mediatek/mt76/mt76_connac_mcu.h   |  23 ++++
 drivers/net/wireless/mediatek/mt76/mt7921/init.c   |   2 +
 drivers/net/wireless/mediatek/mt76/mt7921/main.c   |   4 +
 6 files changed, 170 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index e58f46d6a85b..d84662fb0304 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -323,8 +323,11 @@ mt7615_regd_notifier(struct wiphy *wiphy,
 
 	if (chandef->chan->flags & IEEE80211_CHAN_RADAR)
 		mt7615_dfs_init_radar_detector(phy);
-	if (mt7615_firmware_offload(phy->dev))
+
+	if (mt7615_firmware_offload(phy->dev)) {
 		mt76_connac_mcu_set_channel_domain(mphy);
+		mt76_connac_mcu_set_rate_txpower(mphy);
+	}
 
 	mt7615_mutex_release(dev);
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index e30b256784e0..62d9df47a1f6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -66,6 +66,10 @@ static int mt7615_start(struct ieee80211_hw *hw)
 		ret = mt76_connac_mcu_set_channel_domain(phy->mt76);
 		if (ret)
 			goto out;
+
+		ret = mt76_connac_mcu_set_rate_txpower(phy->mt76);
+		if (ret)
+			goto out;
 	}
 
 	ret = mt7615_mcu_set_chan_info(phy, MCU_EXT_CMD_SET_RX_PATH);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index cc842e3e0027..e057347398bb 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -1560,6 +1560,139 @@ void mt76_connac_mcu_coredump_event(struct mt76_dev *dev, struct sk_buff *skb,
 }
 EXPORT_SYMBOL_GPL(mt76_connac_mcu_coredump_event);
 
+static void
+mt76_connac_mcu_build_sku(struct mt76_dev *dev, s8 *sku,
+			  struct mt76_power_limits *limits,
+			  enum nl80211_band band)
+{
+	int max_power = is_mt7921(dev) ? 127 : 63;
+	int i, offset = sizeof(limits->cck);
+
+	memset(sku, max_power, MT_SKU_POWER_LIMIT);
+
+	if (band == NL80211_BAND_2GHZ) {
+		/* cck */
+		memcpy(sku, limits->cck, sizeof(limits->cck));
+	}
+
+	/* ofdm */
+	memcpy(&sku[offset], limits->ofdm, sizeof(limits->ofdm));
+	offset += sizeof(limits->ofdm);
+
+	/* ht */
+	for (i = 0; i < 2; i++) {
+		memcpy(&sku[offset], limits->mcs[i], 8);
+		offset += 8;
+	}
+	sku[offset++] = limits->mcs[0][0];
+
+	/* vht */
+	for (i = 0; i < ARRAY_SIZE(limits->mcs); i++) {
+		memcpy(&sku[offset], limits->mcs[i],
+		       ARRAY_SIZE(limits->mcs[i]));
+		offset += 12;
+	}
+
+	if (!is_mt7921(dev))
+		return;
+
+	/* he */
+	for (i = 0; i < ARRAY_SIZE(limits->ru); i++) {
+		memcpy(&sku[offset], limits->ru[i], ARRAY_SIZE(limits->ru[i]));
+		offset += ARRAY_SIZE(limits->ru[i]);
+	}
+}
+
+static int
+mt76_connac_mcu_rate_txpower_band(struct mt76_phy *phy,
+				  enum nl80211_band band)
+{
+	struct mt76_dev *dev = phy->dev;
+	int sku_len, batch_len = is_mt7921(dev) ? 8 : 16;
+	static const u8 chan_list_2ghz[] = {
+		1, 2,  3,  4,  5,  6,  7,
+		8, 9, 10, 11, 12, 13, 14
+	};
+	static const u8 chan_list_5ghz[] = {
+		 36,  38,  40,  42,  44,  46,  48,
+		 50,  52,  54,  56,  58,  60,  62,
+		 64, 100, 102, 104, 106, 108, 110,
+		112, 114, 116, 118, 120, 122, 124,
+		126, 128, 132, 134, 136, 138, 140,
+		142, 144, 149, 151, 153, 155, 157,
+		159, 161, 165
+	};
+	struct mt76_connac_sku_tlv sku_tlbv;
+	int i, n_chan, batch_size, idx = 0;
+	struct mt76_power_limits limits;
+	const u8 *ch_list;
+
+	sku_len = is_mt7921(dev) ? sizeof(sku_tlbv) : sizeof(sku_tlbv) - 92;
+
+	if (band == NL80211_BAND_2GHZ) {
+		n_chan = ARRAY_SIZE(chan_list_2ghz);
+		ch_list = chan_list_2ghz;
+	} else {
+		n_chan = ARRAY_SIZE(chan_list_5ghz);
+		ch_list = chan_list_5ghz;
+	}
+	batch_size = DIV_ROUND_UP(n_chan, batch_len);
+
+	for (i = 0; i < batch_size; i++) {
+		bool last_msg = i == batch_size - 1;
+		int num_ch = last_msg ? n_chan % batch_len : batch_len;
+		struct mt76_connac_tx_power_limit_tlv tx_power_tlv = {
+			.band = band == NL80211_BAND_2GHZ ? 1 : 2,
+			.n_chan = num_ch,
+			.last_msg = last_msg,
+		};
+		struct sk_buff *skb;
+		int j, err, msg_len;
+
+		msg_len = sizeof(tx_power_tlv) + num_ch * sizeof(sku_tlbv);
+		skb = mt76_mcu_msg_alloc(dev, NULL, msg_len);
+		if (!skb)
+			return -ENOMEM;
+
+		BUILD_BUG_ON(sizeof(dev->alpha2) > sizeof(tx_power_tlv.alpha2));
+		memcpy(tx_power_tlv.alpha2, dev->alpha2, sizeof(dev->alpha2));
+
+		skb_put_data(skb, &tx_power_tlv, sizeof(tx_power_tlv));
+		for (j = 0; j < num_ch; j++, idx++) {
+			struct ieee80211_channel chan = {
+				.hw_value = ch_list[idx],
+				.band = band,
+			};
+
+			mt76_get_rate_power_limits(phy, &chan, &limits, 127);
+
+			sku_tlbv.channel = ch_list[idx];
+			mt76_connac_mcu_build_sku(dev, sku_tlbv.pwr_limit,
+						  &limits, band);
+			skb_put_data(skb, &sku_tlbv, sku_len);
+		}
+
+		err = mt76_mcu_skb_send_msg(dev, skb,
+					    MCU_CMD_SET_RATE_TX_POWER, false);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
+int mt76_connac_mcu_set_rate_txpower(struct mt76_phy *phy)
+{
+	int err;
+
+	err = mt76_connac_mcu_rate_txpower_band(phy, NL80211_BAND_2GHZ);
+	if (err < 0)
+		return err;
+
+	return mt76_connac_mcu_rate_txpower_band(phy, NL80211_BAND_5GHZ);
+}
+EXPORT_SYMBOL_GPL(mt76_connac_mcu_set_rate_txpower);
+
 #ifdef CONFIG_PM
 
 const struct wiphy_wowlan_support mt76_connac_wowlan_support = {
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
index 587097450416..8d1b8e06af00 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
@@ -895,6 +895,28 @@ struct mt76_sta_cmd_info {
 	u8 rcpi;
 };
 
+#define MT_SKU_POWER_LIMIT	161
+
+struct mt76_connac_sku_tlv {
+	u8 channel;
+	s8 pwr_limit[MT_SKU_POWER_LIMIT];
+} __packed;
+
+struct mt76_connac_tx_power_limit_tlv {
+	/* DW0 - common info*/
+	u8 ver;
+	u8 pad0;
+	__le16 len;
+	/* DW1 - cmd hint */
+	u8 n_chan; /* # channel */
+	u8 band; /* 2.4GHz - 5GHz */
+	u8 last_msg;
+	u8 pad1;
+	/* DW3 */
+	u8 alpha2[4]; /* regulatory_request.alpha2 */
+	u8 pad2[32];
+} __packed;
+
 #define to_wcid_lo(id)		FIELD_GET(GENMASK(7, 0), (u16)id)
 #define to_wcid_hi(id)		FIELD_GET(GENMASK(9, 8), (u16)id)
 
@@ -996,4 +1018,5 @@ void mt76_connac_mcu_set_suspend_iter(void *priv, u8 *mac,
 int mt76_connac_mcu_chip_config(struct mt76_dev *dev);
 void mt76_connac_mcu_coredump_event(struct mt76_dev *dev, struct sk_buff *skb,
 				    struct mt76_connac_coredump *coredump);
+int mt76_connac_mcu_set_rate_txpower(struct mt76_phy *phy);
 #endif /* __MT76_CONNAC_MCU_H */
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index 94fe2eadf285..0aedddb90858 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -58,12 +58,14 @@ mt7921_regd_notifier(struct wiphy *wiphy,
 {
 	struct ieee80211_hw *hw = wiphy_to_ieee80211_hw(wiphy);
 	struct mt7921_dev *dev = mt7921_hw_dev(hw);
+	struct mt7921_phy *phy = mt7921_hw_phy(hw);
 
 	memcpy(dev->mt76.alpha2, request->alpha2, sizeof(dev->mt76.alpha2));
 	dev->mt76.region = request->dfs_region;
 
 	mt7921_mutex_acquire(dev);
 	mt76_connac_mcu_set_channel_domain(hw->priv);
+	mt76_connac_mcu_set_rate_txpower(phy->mt76);
 	mt7921_mutex_release(dev);
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index c888e8249e2f..13910ac78df1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -182,6 +182,10 @@ int __mt7921_start(struct mt7921_phy *phy)
 	if (err)
 		return err;
 
+	err = mt76_connac_mcu_set_rate_txpower(phy->mt76);
+	if (err)
+		return err;
+
 	mt7921_mac_reset_counters(phy);
 	set_bit(MT76_STATE_RUNNING, &mphy->state);
 
-- 
cgit v1.2.3


From ea29acc97c555bc4c295cd0ad78083a33b0272a2 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Tue, 13 Apr 2021 11:08:41 +0200
Subject: mt76: mt7921: add dumping Tx power table

Dump the tx power table saved in offload firmware.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt76_connac_mcu.h   |  1 +
 .../net/wireless/mediatek/mt76/mt7921/debugfs.c    | 79 ++++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.c    | 23 +++++++
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.h    | 17 +++++
 drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h | 31 +++++++++
 5 files changed, 151 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
index 8d1b8e06af00..ff9fca52f344 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
@@ -564,6 +564,7 @@ enum {
 	MCU_CMD_CHIP_CONFIG = MCU_CE_PREFIX | 0xca,
 	MCU_CMD_FWLOG_2_HOST = MCU_CE_PREFIX | 0xc5,
 	MCU_CMD_GET_WTBL = MCU_CE_PREFIX | 0xcd,
+	MCU_CMD_GET_TXPWR = MCU_CE_PREFIX | 0xd0,
 };
 
 enum {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
index 94f0320ab2d4..5207ad3157e8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
@@ -146,6 +146,83 @@ mt7921_queues_read(struct seq_file *s, void *data)
 	return 0;
 }
 
+static void
+mt7921_seq_puts_array(struct seq_file *file, const char *str,
+		      s8 *val, int len)
+{
+	int i;
+
+	seq_printf(file, "%-16s:", str);
+	for (i = 0; i < len; i++)
+		if (val[i] == 127)
+			seq_printf(file, " %6s", "N.A");
+		else
+			seq_printf(file, " %6d", val[i]);
+	seq_puts(file, "\n");
+}
+
+#define mt7921_print_txpwr_entry(prefix, rate)				\
+({									\
+	mt7921_seq_puts_array(s, #prefix " (user)",			\
+			      txpwr.data[TXPWR_USER].rate,		\
+			      ARRAY_SIZE(txpwr.data[TXPWR_USER].rate)); \
+	mt7921_seq_puts_array(s, #prefix " (eeprom)",			\
+			      txpwr.data[TXPWR_EEPROM].rate,		\
+			      ARRAY_SIZE(txpwr.data[TXPWR_EEPROM].rate)); \
+	mt7921_seq_puts_array(s, #prefix " (tmac)",			\
+			      txpwr.data[TXPWR_MAC].rate,		\
+			      ARRAY_SIZE(txpwr.data[TXPWR_MAC].rate));	\
+})
+
+static int
+mt7921_txpwr(struct seq_file *s, void *data)
+{
+	struct mt7921_dev *dev = dev_get_drvdata(s->private);
+	struct mt7921_txpwr txpwr;
+	int ret;
+
+	ret = mt7921_get_txpwr_info(dev, &txpwr);
+	if (ret)
+		return ret;
+
+	seq_printf(s, "Tx power table (channel %d)\n", txpwr.ch);
+	seq_printf(s, "%-16s  %6s %6s %6s %6s\n",
+		   " ", "1m", "2m", "5m", "11m");
+	mt7921_print_txpwr_entry(CCK, cck);
+
+	seq_printf(s, "%-16s  %6s %6s %6s %6s %6s %6s %6s %6s\n",
+		   " ", "6m", "9m", "12m", "18m", "24m", "36m",
+		   "48m", "54m");
+	mt7921_print_txpwr_entry(OFDM, ofdm);
+
+	seq_printf(s, "%-16s  %6s %6s %6s %6s %6s %6s %6s %6s\n",
+		   " ", "mcs0", "mcs1", "mcs2", "mcs3", "mcs4", "mcs5",
+		   "mcs6", "mcs7");
+	mt7921_print_txpwr_entry(HT20, ht20);
+
+	seq_printf(s, "%-16s  %6s %6s %6s %6s %6s %6s %6s %6s %6s\n",
+		   " ", "mcs0", "mcs1", "mcs2", "mcs3", "mcs4", "mcs5",
+		   "mcs6", "mcs7", "mcs32");
+	mt7921_print_txpwr_entry(HT40, ht40);
+
+	seq_printf(s, "%-16s  %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s\n",
+		   " ", "mcs0", "mcs1", "mcs2", "mcs3", "mcs4", "mcs5",
+		   "mcs6", "mcs7", "mcs8", "mcs9", "mcs10", "mcs11");
+	mt7921_print_txpwr_entry(VHT20, vht20);
+	mt7921_print_txpwr_entry(VHT40, vht40);
+	mt7921_print_txpwr_entry(VHT80, vht80);
+	mt7921_print_txpwr_entry(VHT160, vht160);
+	mt7921_print_txpwr_entry(HE26, he26);
+	mt7921_print_txpwr_entry(HE52, he52);
+	mt7921_print_txpwr_entry(HE106, he106);
+	mt7921_print_txpwr_entry(HE242, he242);
+	mt7921_print_txpwr_entry(HE484, he484);
+	mt7921_print_txpwr_entry(HE996, he996);
+	mt7921_print_txpwr_entry(HE996x2, he996x2);
+
+	return 0;
+}
+
 static int
 mt7921_pm_set(void *data, u64 val)
 {
@@ -225,6 +302,8 @@ int mt7921_init_debugfs(struct mt7921_dev *dev)
 				    mt7921_queues_read);
 	debugfs_create_devm_seqfile(dev->mt76.dev, "acq", dir,
 				    mt7921_queues_acq);
+	debugfs_create_devm_seqfile(dev->mt76.dev, "txpower_sku", dir,
+				    mt7921_txpwr);
 	debugfs_create_file("tx_stats", 0400, dir, dev, &mt7921_tx_stats_fops);
 	debugfs_create_file("fw_debug", 0600, dir, dev, &fops_fw_debug);
 	debugfs_create_file("runtime-pm", 0600, dir, dev, &fops_pm);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index aa55667b6ed7..1f231088f287 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -1382,3 +1382,26 @@ int mt7921_mcu_update_arp_filter(struct ieee80211_hw *hw,
 	return mt76_mcu_skb_send_msg(&dev->mt76, skb, MCU_UNI_CMD_OFFLOAD,
 				     true);
 }
+
+int mt7921_get_txpwr_info(struct mt7921_dev *dev, struct mt7921_txpwr *txpwr)
+{
+	struct mt7921_txpwr_event *event;
+	struct mt7921_txpwr_req req = {
+		.dbdc_idx = 0,
+	};
+	struct sk_buff *skb;
+	int ret;
+
+	ret = mt76_mcu_send_and_get_msg(&dev->mt76, MCU_CMD_GET_TXPWR,
+					&req, sizeof(req), true, &skb);
+	if (ret)
+		return ret;
+
+	event = (struct mt7921_txpwr_event *)skb->data;
+	WARN_ON(skb->len != le16_to_cpu(event->len));
+	memcpy(txpwr, &event->txpwr, sizeof(event->txpwr));
+
+	dev_kfree_skb(skb);
+
+	return 0;
+}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h
index 13e125f32283..49823d0a3d0a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h
@@ -86,6 +86,7 @@ enum {
 	MCU_EVENT_CH_PRIVILEGE = 0x18,
 	MCU_EVENT_SCHED_SCAN_DONE = 0x23,
 	MCU_EVENT_DBG_MSG = 0x27,
+	MCU_EVENT_TXPWR = 0xd0,
 	MCU_EVENT_COREDUMP = 0xf0,
 };
 
@@ -390,4 +391,20 @@ struct mt7921_mcu_wlan_info {
 	__le32 wlan_idx;
 	struct mt7921_mcu_wlan_info_event event;
 } __packed;
+
+struct mt7921_txpwr_req {
+	u8 ver;
+	u8 action;
+	__le16 len;
+	u8 dbdc_idx;
+	u8 rsv[3];
+} __packed;
+
+struct mt7921_txpwr_event {
+	u8 ver;
+	u8 action;
+	__le16 len;
+	struct mt7921_txpwr txpwr;
+} __packed;
+
 #endif
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index e3d83d3d954c..5cc01efee989 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -172,6 +172,36 @@ struct mt7921_dev {
 	struct mt76_connac_coredump coredump;
 };
 
+enum {
+	TXPWR_USER,
+	TXPWR_EEPROM,
+	TXPWR_MAC,
+	TXPWR_MAX_NUM,
+};
+
+struct mt7921_txpwr {
+	u8 ch;
+	u8 rsv[3];
+	struct {
+		u8 ch;
+		u8 cck[4];
+		u8 ofdm[8];
+		u8 ht20[8];
+		u8 ht40[9];
+		u8 vht20[12];
+		u8 vht40[12];
+		u8 vht80[12];
+		u8 vht160[12];
+		u8 he26[12];
+		u8 he52[12];
+		u8 he106[12];
+		u8 he242[12];
+		u8 he484[12];
+		u8 he996[12];
+		u8 he996x2[12];
+	} data[TXPWR_MAX_NUM];
+};
+
 enum {
 	MT_LMAC_AC00,
 	MT_LMAC_AC01,
@@ -352,4 +382,5 @@ int mt7921_mcu_update_arp_filter(struct ieee80211_hw *hw,
 				 struct ieee80211_vif *vif,
 				 struct ieee80211_bss_conf *info);
 int mt7921_wfsys_reset(struct mt7921_dev *dev);
+int mt7921_get_txpwr_info(struct mt7921_dev *dev, struct mt7921_txpwr *txpwr);
 #endif
-- 
cgit v1.2.3


From 453873637b85b413456fb6257df336940b1d598a Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 15 Apr 2021 20:57:42 +0200
Subject: mt76: mt7615: fix hardware error recovery for mt7663

MT7663 uses different bits for communicating reset commands/status between MCU
and host. Also add an extra initial reset command.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/dma.c    | 17 +++++++++++++---
 drivers/net/wireless/mediatek/mt76/mt7615/mmio.c   | 23 +++++++++++++++-------
 .../net/wireless/mediatek/mt76/mt7615/pci_init.c   |  6 ++++++
 .../net/wireless/mediatek/mt76/mt7615/pci_mac.c    |  7 ++++++-
 drivers/net/wireless/mediatek/mt76/mt7615/regs.h   | 11 +++++++++++
 5 files changed, 53 insertions(+), 11 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
index 0ec1c526f583..642b4eab0d8b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
@@ -187,14 +187,19 @@ void mt7615_dma_start(struct mt7615_dev *dev)
 	if (is_mt7622(&dev->mt76))
 		mt7622_dma_sched_init(dev);
 
-	if (is_mt7663(&dev->mt76))
+	if (is_mt7663(&dev->mt76)) {
 		mt7663_dma_sched_init(dev);
+
+		mt76_wr(dev, MT_MCU2HOST_INT_ENABLE, MT7663_MCU_CMD_ERROR_MASK);
+	}
+
 }
 
 int mt7615_dma_init(struct mt7615_dev *dev)
 {
 	int rx_ring_size = MT7615_RX_RING_SIZE;
 	int rx_buf_size = MT_RX_BUF_SIZE;
+	u32 mask;
 	int ret;
 
 	/* Increase buffer size to receive large VHT MPDUs */
@@ -269,8 +274,14 @@ int mt7615_dma_init(struct mt7615_dev *dev)
 		  MT_WPDMA_GLO_CFG_RX_DMA_BUSY, 0, 1000);
 
 	/* enable interrupts for TX/RX rings */
-	mt7615_irq_enable(dev, MT_INT_RX_DONE_ALL | mt7615_tx_mcu_int_mask(dev) |
-			       MT_INT_MCU_CMD);
+
+	mask = MT_INT_RX_DONE_ALL | mt7615_tx_mcu_int_mask(dev);
+	if (is_mt7663(&dev->mt76))
+	    mask |= MT7663_INT_MCU_CMD;
+	else
+	    mask |= MT_INT_MCU_CMD;
+
+	mt7615_irq_enable(dev, mask);
 
 	mt7615_dma_start(dev);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
index eaa22752e7cd..be93e7ad1279 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
@@ -105,6 +105,7 @@ static void mt7615_irq_tasklet(struct tasklet_struct *t)
 {
 	struct mt7615_dev *dev = from_tasklet(dev, t, irq_tasklet);
 	u32 intr, mask = 0, tx_mcu_mask = mt7615_tx_mcu_int_mask(dev);
+	u32 mcu_int;
 
 	mt76_wr(dev, MT_INT_MASK_CSR, 0);
 
@@ -128,15 +129,23 @@ static void mt7615_irq_tasklet(struct tasklet_struct *t)
 	if (intr & MT_INT_RX_DONE(1))
 		napi_schedule(&dev->mt76.napi[1]);
 
-	if (intr & MT_INT_MCU_CMD) {
-		u32 val = mt76_rr(dev, MT_MCU_CMD);
+	if (!(intr & (MT_INT_MCU_CMD | MT7663_INT_MCU_CMD)))
+		return;
 
-		if (val & MT_MCU_CMD_ERROR_MASK) {
-			dev->reset_state = val;
-			ieee80211_queue_work(mt76_hw(dev), &dev->reset_work);
-			wake_up(&dev->reset_wait);
-		}
+	if (is_mt7663(&dev->mt76)) {
+		mcu_int = mt76_rr(dev, MT_MCU2HOST_INT_STATUS);
+		mcu_int &= MT7663_MCU_CMD_ERROR_MASK;
+	} else {
+		mcu_int = mt76_rr(dev, MT_MCU_CMD);
+		mcu_int &= MT_MCU_CMD_ERROR_MASK;
 	}
+
+	if (!mcu_int)
+		return;
+
+	dev->reset_state = mcu_int;
+	ieee80211_queue_work(mt76_hw(dev), &dev->reset_work);
+	wake_up(&dev->reset_wait);
 }
 
 static u32 __mt7615_reg_addr(struct mt7615_dev *dev, u32 addr)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
index a03484e34bb4..49540b00519d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
@@ -47,6 +47,12 @@ static int mt7615_init_hardware(struct mt7615_dev *dev)
 	if (ret < 0)
 		return ret;
 
+	if (is_mt7663(&dev->mt76)) {
+		/* Reset RGU */
+		mt76_clear(dev, MT_MCU_CIRQ_IRQ_SEL(4), BIT(1));
+		mt76_set(dev, MT_MCU_CIRQ_IRQ_SEL(4), BIT(1));
+	}
+
 	ret = mt7615_dma_init(dev);
 	if (ret)
 		return ret;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
index 8cd79e849045..d20962cdecc8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
@@ -210,7 +210,12 @@ EXPORT_SYMBOL_GPL(mt7615_dma_reset);
 static void
 mt7615_hif_int_event_trigger(struct mt7615_dev *dev, u8 event)
 {
-	mt76_wr(dev, MT_MCU_INT_EVENT, event);
+	u32 reg = MT_MCU_INT_EVENT;
+
+	if (is_mt7663(&dev->mt76))
+		reg = MT7663_MCU_INT_EVENT;
+
+	mt76_wr(dev, reg, event);
 
 	mt7622_trigger_hif_int(dev, true);
 	mt7622_trigger_hif_int(dev, false);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
index 190a02670795..63c081bb04d0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
@@ -61,6 +61,11 @@ enum mt7615_reg_base {
 #define MT_MCU_PCIE_REMAP_2_BASE	GENMASK(31, 19)
 #define MT_PCIE_REMAP_BASE_2		((dev)->reg_map[MT_PCIE_REMAP_BASE2])
 
+#define MT_MCU_CIRQ_BASE		0xc0000
+#define MT_MCU_CIRQ(ofs)		(MT_MCU_CIRQ_BASE + (ofs))
+
+#define MT_MCU_CIRQ_IRQ_SEL(n)		MT_MCU_CIRQ((n) << 2)
+
 #define MT_HIF(ofs)			((dev)->reg_map[MT_HIF_BASE] + (ofs))
 #define MT_HIF_RST			MT_HIF(0x100)
 #define MT_HIF_LOGIC_RST_N		BIT(4)
@@ -88,6 +93,10 @@ enum mt7615_reg_base {
 #define MT_CFG_LPCR_HOST_FW_OWN		BIT(0)
 #define MT_CFG_LPCR_HOST_DRV_OWN	BIT(1)
 
+#define MT_MCU2HOST_INT_STATUS		MT_HIF(0x1f0)
+#define MT_MCU2HOST_INT_ENABLE		MT_HIF(0x1f4)
+
+#define MT7663_MCU_INT_EVENT		MT_HIF(0x108)
 #define MT_MCU_INT_EVENT		MT_HIF(0x1f8)
 #define MT_MCU_INT_EVENT_PDMA_STOPPED	BIT(0)
 #define MT_MCU_INT_EVENT_PDMA_INIT	BIT(1)
@@ -102,6 +111,7 @@ enum mt7615_reg_base {
 #define MT_INT_RX_DONE_ALL		GENMASK(1, 0)
 #define MT_INT_TX_DONE_ALL		GENMASK(19, 4)
 #define MT_INT_TX_DONE(_n)		BIT((_n) + 4)
+#define MT7663_INT_MCU_CMD		BIT(29)
 #define MT_INT_MCU_CMD			BIT(30)
 
 #define MT_WPDMA_GLO_CFG		MT_HIF(0x208)
@@ -138,6 +148,7 @@ enum mt7615_reg_base {
 #define MT_MCU_CMD_PDMA_ERROR		BIT(27)
 #define MT_MCU_CMD_PCIE_ERROR		BIT(28)
 #define MT_MCU_CMD_ERROR_MASK		(GENMASK(5, 1) | GENMASK(28, 24))
+#define MT7663_MCU_CMD_ERROR_MASK	GENMASK(5, 2)
 
 #define MT_TX_RING_BASE			MT_HIF(0x300)
 #define MT_RX_RING_BASE			MT_HIF(0x400)
-- 
cgit v1.2.3


From 5c7d374444afdeb9dd534a37c4f6c13af032da0c Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 15 Apr 2021 21:07:53 +0200
Subject: mt76: mt7615: fix entering driver-own state on mt7663

Fixes hardware wakeup issues

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index eab490d07255..5f37255476e3 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -292,12 +292,20 @@ static int mt7615_mcu_drv_pmctrl(struct mt7615_dev *dev)
 	u32 addr;
 	int err;
 
-	addr = is_mt7663(mdev) ? MT_PCIE_DOORBELL_PUSH : MT_CFG_LPCR_HOST;
+	if (is_mt7663(mdev)) {
+		/* Clear firmware own via N9 eint */
+		mt76_wr(dev, MT_PCIE_DOORBELL_PUSH, MT_CFG_LPCR_HOST_DRV_OWN);
+		mt76_poll(dev, MT_CONN_ON_MISC, MT_CFG_LPCR_HOST_FW_OWN, 0, 3000);
+
+		addr = MT_CONN_HIF_ON_LPCTL;
+	} else {
+		addr = MT_CFG_LPCR_HOST;
+	}
+
 	mt76_wr(dev, addr, MT_CFG_LPCR_HOST_DRV_OWN);
 
 	mt7622_trigger_hif_int(dev, true);
 
-	addr = is_mt7663(mdev) ? MT_CONN_HIF_ON_LPCTL : MT_CFG_LPCR_HOST;
 	err = !mt76_poll_msec(dev, addr, MT_CFG_LPCR_HOST_FW_OWN, 0, 3000);
 
 	mt7622_trigger_hif_int(dev, false);
-- 
cgit v1.2.3


From 4efcfd5c36bd0d7c0f62713216a2291562eccfaa Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 16 Apr 2021 10:24:46 +0200
Subject: mt76: mt7615: load ROM patch before checking patch semaphore status

For MT7663, the availability of the patch files is used to detect, which
corresponding firmware is going to be used (AP firmware or STA offload
firmware). If the ROM patch was already applied, it could attempt to
load the wrong firmware (without considering the alternative).

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 30 +++++++++++++------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 5f37255476e3..2e113fd431f5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -1341,25 +1341,26 @@ static int mt7615_load_patch(struct mt7615_dev *dev, u32 addr, const char *name)
 	const struct firmware *fw = NULL;
 	int len, ret, sem;
 
+	ret = firmware_request_nowarn(&fw, name, dev->mt76.dev);
+	if (ret)
+		return ret;
+
+	if (!fw || !fw->data || fw->size < sizeof(*hdr)) {
+		dev_err(dev->mt76.dev, "Invalid firmware\n");
+		ret = -EINVAL;
+		goto release_fw;
+	}
+
 	sem = mt76_connac_mcu_patch_sem_ctrl(&dev->mt76, true);
 	switch (sem) {
 	case PATCH_IS_DL:
-		return 0;
+		goto release_fw;
 	case PATCH_NOT_DL_SEM_SUCCESS:
 		break;
 	default:
 		dev_err(dev->mt76.dev, "Failed to get patch semaphore\n");
-		return -EAGAIN;
-	}
-
-	ret = firmware_request_nowarn(&fw, name, dev->mt76.dev);
-	if (ret)
-		goto out;
-
-	if (!fw || !fw->data || fw->size < sizeof(*hdr)) {
-		dev_err(dev->mt76.dev, "Invalid firmware\n");
-		ret = -EINVAL;
-		goto out;
+		ret = -EAGAIN;
+		goto release_fw;
 	}
 
 	hdr = (const struct mt7615_patch_hdr *)(fw->data);
@@ -1388,8 +1389,6 @@ static int mt7615_load_patch(struct mt7615_dev *dev, u32 addr, const char *name)
 		dev_err(dev->mt76.dev, "Failed to start patch\n");
 
 out:
-	release_firmware(fw);
-
 	sem = mt76_connac_mcu_patch_sem_ctrl(&dev->mt76, false);
 	switch (sem) {
 	case PATCH_REL_SEM_SUCCESS:
@@ -1400,6 +1399,9 @@ out:
 		break;
 	}
 
+release_fw:
+	release_firmware(fw);
+
 	return ret;
 }
 
-- 
cgit v1.2.3


From 495184ac91bb866ad7d794ad6ceb064e191319d4 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Thu, 15 Apr 2021 00:45:49 +0800
Subject: mt76: mt7915: add support for applying pre-calibration data

When the EEPROM data is read from flash, it can contain pre-calibration
data, which can save calibration time.

Note that group_cal can save 30% bootup calibration time, and dpd_cal can
save 75% channel switching time.

Tested-by: Bo Jiao <bo.jiao@mediatek.com>
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/eeprom.c        |  11 +-
 drivers/net/wireless/mediatek/mt76/mt76.h          |   1 +
 drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c |  25 +++-
 drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h |  11 +-
 drivers/net/wireless/mediatek/mt76/mt7915/init.c   |   7 +
 drivers/net/wireless/mediatek/mt76/mt7915/main.c   |   6 +
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c    | 142 +++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.h    |   2 +
 drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h |   4 +
 9 files changed, 199 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/eeprom.c b/drivers/net/wireless/mediatek/mt76/eeprom.c
index 6f75e949e4fd..3b47e85e95e7 100644
--- a/drivers/net/wireless/mediatek/mt76/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/eeprom.c
@@ -9,8 +9,7 @@
 #include <linux/etherdevice.h>
 #include "mt76.h"
 
-static int
-mt76_get_of_eeprom(struct mt76_dev *dev, int len)
+int mt76_get_of_eeprom(struct mt76_dev *dev, void *eep, int offset, int len)
 {
 #if defined(CONFIG_OF) && defined(CONFIG_MTD)
 	struct device_node *np = dev->dev->of_node;
@@ -18,7 +17,6 @@ mt76_get_of_eeprom(struct mt76_dev *dev, int len)
 	const __be32 *list;
 	const char *part;
 	phandle phandle;
-	int offset = 0;
 	int size;
 	size_t retlen;
 	int ret;
@@ -54,7 +52,7 @@ mt76_get_of_eeprom(struct mt76_dev *dev, int len)
 	}
 
 	offset = be32_to_cpup(list);
-	ret = mtd_read(mtd, offset, len, &retlen, dev->eeprom.data);
+	ret = mtd_read(mtd, offset, len, &retlen, eep);
 	put_mtd_device(mtd);
 	if (ret)
 		goto out_put_node;
@@ -65,7 +63,7 @@ mt76_get_of_eeprom(struct mt76_dev *dev, int len)
 	}
 
 	if (of_property_read_bool(dev->dev->of_node, "big-endian")) {
-		u8 *data = (u8 *)dev->eeprom.data;
+		u8 *data = (u8 *)eep;
 		int i;
 
 		/* convert eeprom data in Little Endian */
@@ -86,6 +84,7 @@ out_put_node:
 	return -ENOENT;
 #endif
 }
+EXPORT_SYMBOL_GPL(mt76_get_of_eeprom);
 
 void
 mt76_eeprom_override(struct mt76_phy *phy)
@@ -332,6 +331,6 @@ mt76_eeprom_init(struct mt76_dev *dev, int len)
 	if (!dev->eeprom.data)
 		return -ENOMEM;
 
-	return !mt76_get_of_eeprom(dev, len);
+	return !mt76_get_of_eeprom(dev, dev->eeprom.data, 0, len);
 }
 EXPORT_SYMBOL_GPL(mt76_eeprom_init);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index bf049fb4c412..289f195a7951 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -837,6 +837,7 @@ void mt76_seq_puts_array(struct seq_file *file, const char *str,
 
 int mt76_eeprom_init(struct mt76_dev *dev, int len);
 void mt76_eeprom_override(struct mt76_phy *phy);
+int mt76_get_of_eeprom(struct mt76_dev *dev, void *data, int offset, int len);
 
 struct mt76_queue *
 mt76_init_queue(struct mt76_dev *dev, int qid, int idx, int n_desc,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c
index 738ecf8f4fa2..353f8d8497d5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c
@@ -14,6 +14,23 @@ static u32 mt7915_eeprom_read(struct mt7915_dev *dev, u32 offset)
 	return data[offset];
 }
 
+static int mt7915_eeprom_load_precal(struct mt7915_dev *dev)
+{
+	struct mt76_dev *mdev = &dev->mt76;
+	u32 val;
+
+	val = mt7915_eeprom_read(dev, MT_EE_DO_PRE_CAL);
+	if (val != (MT_EE_WIFI_CAL_DPD | MT_EE_WIFI_CAL_GROUP))
+		return 0;
+
+	val = MT_EE_CAL_GROUP_SIZE + MT_EE_CAL_DPD_SIZE;
+	dev->cal = devm_kzalloc(mdev->dev, val, GFP_KERNEL);
+	if (!dev->cal)
+		return -ENOMEM;
+
+	return mt76_get_of_eeprom(mdev, dev->cal, MT_EE_PRECAL, val);
+}
+
 static int mt7915_eeprom_load(struct mt7915_dev *dev)
 {
 	int ret;
@@ -22,12 +39,14 @@ static int mt7915_eeprom_load(struct mt7915_dev *dev)
 	if (ret < 0)
 		return ret;
 
-	if (ret)
+	if (ret) {
 		dev->flash_mode = true;
-	else
+		ret = mt7915_eeprom_load_precal(dev);
+	} else {
 		memset(dev->mt76.eeprom.data, -1, MT7915_EEPROM_SIZE);
+	}
 
-	return 0;
+	return ret;
 }
 
 static int mt7915_check_eeprom(struct mt7915_dev *dev)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h
index 3ee8c27bb61b..8ef5ebfad706 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h
@@ -17,14 +17,23 @@ enum mt7915_eeprom_field {
 	MT_EE_MAC_ADDR =	0x004,
 	MT_EE_MAC_ADDR2 =	0x00a,
 	MT_EE_DDIE_FT_VERSION =	0x050,
+	MT_EE_DO_PRE_CAL =	0x062,
 	MT_EE_WIFI_CONF =	0x190,
 	MT_EE_TX0_POWER_2G =	0x2fc,
 	MT_EE_TX0_POWER_5G =	0x34b,
 	MT_EE_ADIE_FT_VERSION =	0x9a0,
 
-	__MT_EE_MAX =		0xe00
+	__MT_EE_MAX =		0xe00,
+	/* 0xe10 ~ 0x5780 used to save group cal data */
+	MT_EE_PRECAL =		0xe10
 };
 
+#define MT_EE_WIFI_CAL_GROUP			BIT(0)
+#define MT_EE_WIFI_CAL_DPD			GENMASK(2, 1)
+#define MT_EE_CAL_UNIT				1024
+#define MT_EE_CAL_GROUP_SIZE			(44 * MT_EE_CAL_UNIT)
+#define MT_EE_CAL_DPD_SIZE			(54 * MT_EE_CAL_UNIT)
+
 #define MT_EE_WIFI_CONF0_TX_PATH		GENMASK(2, 0)
 #define MT_EE_WIFI_CONF0_BAND_SEL		GENMASK(7, 6)
 #define MT_EE_WIFI_CONF1_BAND_SEL		GENMASK(7, 6)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index d242e186424d..19fa9ad7d5d7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -381,6 +381,13 @@ static int mt7915_init_hardware(struct mt7915_dev *dev)
 	if (ret < 0)
 		return ret;
 
+
+	if (dev->flash_mode) {
+		ret = mt7915_mcu_apply_group_cal(dev);
+		if (ret)
+			return ret;
+	}
+
 	/* Beacon and mgmt frames should occupy wcid 0 */
 	idx = mt76_wcid_alloc(dev->mt76.wcid_mask, MT7915_WTBL_STA - 1);
 	if (idx)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
index 2fd87987312e..413abbca3246 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
@@ -313,6 +313,12 @@ int mt7915_set_channel(struct mt7915_phy *phy)
 	mt7915_init_dfs_state(phy);
 	mt76_set_channel(phy->mt76);
 
+	if (dev->flash_mode) {
+		ret = mt7915_mcu_apply_tx_dpd(phy);
+		if (ret)
+			goto out;
+	}
+
 	ret = mt7915_mcu_set_chan_info(phy, MCU_EXT_CMD(CHANNEL_SWITCH));
 	if (ret)
 		goto out;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 908e74a6b8e6..bcf6aed074de 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -3327,6 +3327,148 @@ int mt7915_mcu_get_eeprom(struct mt7915_dev *dev, u32 offset)
 	return 0;
 }
 
+static int mt7915_mcu_set_pre_cal(struct mt7915_dev *dev, u8 idx,
+				  u8 *data, u32 len, int cmd)
+{
+	struct {
+		u8 dir;
+		u8 valid;
+		__le16 bitmap;
+		s8 precal;
+		u8 action;
+		u8 band;
+		u8 idx;
+		u8 rsv[4];
+		__le32 len;
+	} req;
+	struct sk_buff *skb;
+
+	skb = mt76_mcu_msg_alloc(&dev->mt76, NULL, sizeof(req) + len);
+	if (!skb)
+		return -ENOMEM;
+
+	req.idx = idx;
+	req.len = cpu_to_le32(len);
+	skb_put_data(skb, &req, sizeof(req));
+	skb_put_data(skb, data, len);
+
+	return mt76_mcu_skb_send_msg(&dev->mt76, skb, cmd, false);
+}
+
+int mt7915_mcu_apply_group_cal(struct mt7915_dev *dev)
+{
+	u8 idx = 0, *cal = dev->cal, *eep = dev->mt76.eeprom.data;
+	u32 total = MT_EE_CAL_GROUP_SIZE;
+
+	if (!(eep[MT_EE_DO_PRE_CAL] & MT_EE_WIFI_CAL_GROUP))
+		return 0;
+
+	/*
+	 * Items: Rx DCOC, RSSI DCOC, Tx TSSI DCOC, Tx LPFG
+	 * Tx FDIQ, Tx DCIQ, Rx FDIQ, Rx FIIQ, ADCDCOC
+	 */
+	while (total > 0) {
+		int ret, len;
+
+		len = min_t(u32, total, MT_EE_CAL_UNIT);
+
+		ret = mt7915_mcu_set_pre_cal(dev, idx, cal, len,
+					     MCU_EXT_CMD(GROUP_PRE_CAL_INFO));
+		if (ret)
+			return ret;
+
+		total -= len;
+		cal += len;
+		idx++;
+	}
+
+	return 0;
+}
+
+static int mt7915_find_freq_idx(const u16 *freqs, int n_freqs, u16 cur)
+{
+	int i;
+
+	for (i = 0; i < n_freqs; i++)
+		if (cur == freqs[i])
+			return i;
+
+	return -1;
+}
+
+static int mt7915_dpd_freq_idx(u16 freq, u8 bw)
+{
+	static const u16 freq_list[] = {
+		5180, 5200, 5220, 5240,
+		5260, 5280, 5300, 5320,
+		5500, 5520, 5540, 5560,
+		5580, 5600, 5620, 5640,
+		5660, 5680, 5700, 5745,
+		5765, 5785, 5805, 5825
+	};
+	int offset_2g = ARRAY_SIZE(freq_list);
+	int idx;
+
+	if (freq < 4000) {
+		if (freq < 2432)
+			return offset_2g;
+		if (freq < 2457)
+			return offset_2g + 1;
+
+		return offset_2g + 2;
+	}
+
+	if (bw == NL80211_CHAN_WIDTH_80P80 || bw == NL80211_CHAN_WIDTH_160)
+		return -1;
+
+	if (bw != NL80211_CHAN_WIDTH_20) {
+		idx = mt7915_find_freq_idx(freq_list, ARRAY_SIZE(freq_list),
+					   freq + 10);
+		if (idx >= 0)
+			return idx;
+
+		idx = mt7915_find_freq_idx(freq_list, ARRAY_SIZE(freq_list),
+					   freq - 10);
+		if (idx >= 0)
+			return idx;
+	}
+
+	return mt7915_find_freq_idx(freq_list, ARRAY_SIZE(freq_list), freq);
+}
+
+int mt7915_mcu_apply_tx_dpd(struct mt7915_phy *phy)
+{
+	struct mt7915_dev *dev = phy->dev;
+	struct cfg80211_chan_def *chandef = &phy->mt76->chandef;
+	u16 total = 2, idx, center_freq = chandef->center_freq1;
+	u8 *cal = dev->cal, *eep = dev->mt76.eeprom.data;
+
+	if (!(eep[MT_EE_DO_PRE_CAL] & MT_EE_WIFI_CAL_DPD))
+		return 0;
+
+	idx = mt7915_dpd_freq_idx(center_freq, chandef->width);
+	if (idx < 0)
+		return -EINVAL;
+
+	/* Items: Tx DPD, Tx Flatness */
+	idx = idx * 2;
+	cal += MT_EE_CAL_GROUP_SIZE;
+
+	while (total--) {
+		int ret;
+
+		cal += (idx * MT_EE_CAL_UNIT);
+		ret = mt7915_mcu_set_pre_cal(dev, idx, cal, MT_EE_CAL_UNIT,
+					     MCU_EXT_CMD(DPD_PRE_CAL_INFO));
+		if (ret)
+			return ret;
+
+		idx++;
+	}
+
+	return 0;
+}
+
 int mt7915_mcu_get_temperature(struct mt7915_dev *dev, int index)
 {
 	struct {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
index 4a932140a7c3..42582a66e42d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
@@ -284,6 +284,8 @@ enum {
 	MCU_EXT_CMD_FW_DBG_CTRL = 0x95,
 	MCU_EXT_CMD_SET_RDD_TH = 0x9d,
 	MCU_EXT_CMD_SET_SPR = 0xa8,
+	MCU_EXT_CMD_GROUP_PRE_CAL_INFO = 0xab,
+	MCU_EXT_CMD_DPD_PRE_CAL_INFO = 0xac,
 	MCU_EXT_CMD_PHY_STAT_INFO = 0xad,
 };
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index dbc70c0d6668..456daf3ad853 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -201,6 +201,8 @@ struct mt7915_dev {
 	bool flash_mode;
 	bool fw_debug;
 	bool ibf;
+
+	void *cal;
 };
 
 enum {
@@ -359,6 +361,8 @@ int mt7915_mcu_set_pulse_th(struct mt7915_dev *dev,
 			    const struct mt7915_dfs_pulse *pulse);
 int mt7915_mcu_set_radar_th(struct mt7915_dev *dev, int index,
 			    const struct mt7915_dfs_pattern *pattern);
+int mt7915_mcu_apply_group_cal(struct mt7915_dev *dev);
+int mt7915_mcu_apply_tx_dpd(struct mt7915_phy *phy);
 int mt7915_mcu_get_temperature(struct mt7915_dev *dev, int index);
 int mt7915_mcu_get_tx_rate(struct mt7915_dev *dev, u32 cmd, u16 wlan_idx);
 int mt7915_mcu_get_rx_rate(struct mt7915_phy *phy, struct ieee80211_vif *vif,
-- 
cgit v1.2.3


From a8333801d69d98f0b9def7c5370939100ae3160d Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 15 Apr 2021 11:03:58 +0200
Subject: mt76: mt7921: move hw configuration in mt7921_register_device

Get rid of init work since firmware loading is already performed in
mt7921_init_hardware

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/init.c   | 18 ++++++------------
 drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h |  1 -
 2 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index 0aedddb90858..eab6e2dcdb96 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -166,20 +166,10 @@ void mt7921_mac_init(struct mt7921_dev *dev)
 	mt76_connac_mcu_set_rts_thresh(&dev->mt76, 0x92b, 0);
 }
 
-static void mt7921_init_work(struct work_struct *work)
-{
-	struct mt7921_dev *dev = container_of(work, struct mt7921_dev,
-				 init_work);
-
-	mt7921_mcu_set_eeprom(dev);
-	mt7921_mac_init(dev);
-}
-
 static int mt7921_init_hardware(struct mt7921_dev *dev)
 {
 	int ret, idx;
 
-	INIT_WORK(&dev->init_work, mt7921_init_work);
 	spin_lock_init(&dev->token_lock);
 	idr_init(&dev->token);
 
@@ -202,6 +192,10 @@ static int mt7921_init_hardware(struct mt7921_dev *dev)
 	if (ret < 0)
 		return ret;
 
+	ret = mt7921_mcu_set_eeprom(dev);
+	if (ret)
+		return ret;
+
 	/* Beacon and mgmt frames should occupy wcid 0 */
 	idx = mt76_wcid_alloc(dev->mt76.wcid_mask, MT7921_WTBL_STA - 1);
 	if (idx)
@@ -212,6 +206,8 @@ static int mt7921_init_hardware(struct mt7921_dev *dev)
 	dev->mt76.global_wcid.tx_info |= MT_WCID_TX_INFO_SET;
 	rcu_assign_pointer(dev->mt76.wcid[idx], &dev->mt76.global_wcid);
 
+	mt7921_mac_init(dev);
+
 	return 0;
 }
 
@@ -266,8 +262,6 @@ int mt7921_register_device(struct mt7921_dev *dev)
 	if (ret)
 		return ret;
 
-	ieee80211_queue_work(mt76_hw(dev), &dev->init_work);
-
 	return mt7921_init_debugfs(dev);
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index 5cc01efee989..c34cf3e3a26b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -156,7 +156,6 @@ struct mt7921_dev {
 
 	u16 chainmask;
 
-	struct work_struct init_work;
 	struct work_struct reset_work;
 
 	struct list_head sta_poll_list;
-- 
cgit v1.2.3


From 53d35b1aa0bd8a781a0252680b4495fd0193cc2c Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 15 Apr 2021 17:50:02 +0200
Subject: mt76: improve mcu error logging

Dump mcu command code in hex and related prefix to help debugging

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7603/mcu.c  | 5 ++---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c  | 4 ++--
 drivers/net/wireless/mediatek/mt76/mt76x02_mcu.c | 5 ++---
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c  | 2 +-
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.c  | 2 +-
 5 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7603/mcu.c
index 96b6c8916730..6abfe6b19afa 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mcu.c
@@ -21,9 +21,8 @@ mt7603_mcu_parse_response(struct mt76_dev *mdev, int cmd,
 	struct mt7603_mcu_rxd *rxd;
 
 	if (!skb) {
-		dev_err(mdev->dev,
-			"MCU message %d (seq %d) timed out\n",
-			cmd, seq);
+		dev_err(mdev->dev, "MCU message %02x (seq %d) timed out\n",
+			abs(cmd), seq);
 		dev->mcu_hang = MT7603_WATCHDOG_TIMEOUT;
 		return -ETIMEDOUT;
 	}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 2e113fd431f5..be976fe97290 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -175,8 +175,8 @@ int mt7615_mcu_parse_response(struct mt76_dev *mdev, int cmd,
 	int ret = 0;
 
 	if (!skb) {
-		dev_err(mdev->dev, "Message %ld (seq %d) timeout\n",
-			cmd & MCU_CMD_MASK, seq);
+		dev_err(mdev->dev, "Message %08x (seq %d) timeout\n",
+			cmd, seq);
 		return -ETIMEDOUT;
 	}
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mcu.c
index 4aa5c36afeaf..75978820a260 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mcu.c
@@ -17,9 +17,8 @@ int mt76x02_mcu_parse_response(struct mt76_dev *mdev, int cmd,
 	u32 *rxfce;
 
 	if (!skb) {
-		dev_err(mdev->dev,
-			"MCU message %d (seq %d) timed out\n", cmd,
-			seq);
+		dev_err(mdev->dev, "MCU message %02x (seq %d) timed out\n",
+			abs(cmd), seq);
 		dev->mcu_timeout = 1;
 		return -ETIMEDOUT;
 	}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index bcf6aed074de..1a7c36cb435b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -217,7 +217,7 @@ mt7915_mcu_parse_response(struct mt76_dev *mdev, int cmd,
 	int ret = 0;
 
 	if (!skb) {
-		dev_err(mdev->dev, "Message %d (seq %d) timeout\n",
+		dev_err(mdev->dev, "Message %08x (seq %d) timeout\n",
 			cmd, seq);
 		return -ETIMEDOUT;
 	}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index 1f231088f287..a360929983ea 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -160,7 +160,7 @@ mt7921_mcu_parse_response(struct mt76_dev *mdev, int cmd,
 	int ret = 0;
 
 	if (!skb) {
-		dev_err(mdev->dev, "Message %d (seq %d) timeout\n",
+		dev_err(mdev->dev, "Message %08x (seq %d) timeout\n",
 			cmd, seq);
 		return -ETIMEDOUT;
 	}
-- 
cgit v1.2.3


From 987c8fb4de437344f19a23d074c06faf67520a11 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 16 Apr 2021 00:21:54 +0200
Subject: mt76: mt7921: run mt7921_mcu_fw_log_2_host holding mt76 mutex

Wake the chip before configuring the mcu log level

Fixes: 1d8efc741df8 ("mt76: mt7921: introduce Runtime PM support")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
index 5207ad3157e8..1a378387b0a9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
@@ -9,10 +9,13 @@ mt7921_fw_debug_set(void *data, u64 val)
 {
 	struct mt7921_dev *dev = data;
 
-	dev->fw_debug = (u8)val;
+	mt7921_mutex_acquire(dev);
 
+	dev->fw_debug = (u8)val;
 	mt7921_mcu_fw_log_2_host(dev, dev->fw_debug);
 
+	mt7921_mutex_release(dev);
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From f1ae92bbc43b68521bc0e866327dc896f10c11ee Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Fri, 16 Apr 2021 23:30:35 +0800
Subject: mt76: mt7921: add wifisys reset support in debugfs

Introduce chip_reset knob in mt7921 debugfs to export a way to users
able to trigger wifi reset, and group the similar operations
previously defined in chip_config in the same knob.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7921/debugfs.c    | 25 +++++++++++++++-------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
index 1a378387b0a9..5a54cd8d2ce4 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
@@ -279,19 +279,28 @@ mt7921_pm_idle_timeout_get(void *data, u64 *val)
 DEFINE_DEBUGFS_ATTRIBUTE(fops_pm_idle_timeout, mt7921_pm_idle_timeout_get,
 			 mt7921_pm_idle_timeout_set, "%lld\n");
 
-static int mt7921_config(void *data, u64 val)
+static int mt7921_chip_reset(void *data, u64 val)
 {
 	struct mt7921_dev *dev = data;
-	int ret;
-
-	mt7921_mutex_acquire(dev);
-	ret = mt76_connac_mcu_chip_config(&dev->mt76);
-	mt7921_mutex_release(dev);
+	int ret = 0;
+
+	switch (val) {
+	case 1:
+		/* Reset wifisys directly. */
+		mt7921_reset(&dev->mt76);
+		break;
+	default:
+		/* Collect the core dump before reset wifisys. */
+		mt7921_mutex_acquire(dev);
+		ret = mt76_connac_mcu_chip_config(&dev->mt76);
+		mt7921_mutex_release(dev);
+		break;
+	}
 
 	return ret;
 }
 
-DEFINE_DEBUGFS_ATTRIBUTE(fops_config, NULL, mt7921_config, "%lld\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_reset, NULL, mt7921_chip_reset, "%lld\n");
 
 int mt7921_init_debugfs(struct mt7921_dev *dev)
 {
@@ -312,7 +321,7 @@ int mt7921_init_debugfs(struct mt7921_dev *dev)
 	debugfs_create_file("runtime-pm", 0600, dir, dev, &fops_pm);
 	debugfs_create_file("idle-timeout", 0600, dir, dev,
 			    &fops_pm_idle_timeout);
-	debugfs_create_file("chip_config", 0600, dir, dev, &fops_config);
+	debugfs_create_file("chip_reset", 0600, dir, dev, &fops_reset);
 
 	return 0;
 }
-- 
cgit v1.2.3


From e513ae49088bbb0d00299a9f996f88f08cca7dc6 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Fri, 16 Apr 2021 23:30:36 +0800
Subject: mt76: mt7921: abort uncompleted scan by wifi reset

Scan abort should be required for the uncompleted hardware scan
interrupted by wifi reset. Otherwise, it is possible that the scan
request after wifi reset gets error code -EBUSY from mac80211 and
then blocks the reconnectting to the access point.

Fixes: 0c1ce9884607 ("mt76: mt7921: add wifi reset support")
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 572bab82315a..3145880df6e7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1413,6 +1413,14 @@ void mt7921_mac_reset_work(struct work_struct *work)
 	if (i == 10)
 		dev_err(dev->mt76.dev, "chip reset failed\n");
 
+	if (test_and_clear_bit(MT76_HW_SCANNING, &dev->mphy.state)) {
+		struct cfg80211_scan_info info = {
+			.aborted = true,
+		};
+
+		ieee80211_scan_completed(dev->mphy.hw, &info);
+	}
+
 	ieee80211_wake_queues(hw);
 	ieee80211_iterate_active_interfaces(hw,
 					    IEEE80211_IFACE_ITER_RESUME_ALL,
-- 
cgit v1.2.3


From 790d228a68745624c266c27aded0d7f46a0d5af4 Mon Sep 17 00:00:00 2001
From: Shayne Chen <shayne.chen@mediatek.com>
Date: Sat, 17 Apr 2021 06:16:17 +0800
Subject: mt76: mt7915: add support for DT rate power limits

Enable to limit per-rate max txpower from DT.

Tested-by: Evelyn Tsai <evelyn.tsai@mediatek.com>
Signed-off-by: Shayne Chen <shayne.chen@mediatek.com>
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 44 +++++++++++++++++++++----
 1 file changed, 38 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 1a7c36cb435b..74c375910eb7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -3517,15 +3517,47 @@ int mt7915_mcu_set_sku(struct mt7915_phy *phy)
 		.format_id = 4,
 		.dbdc_idx = phy != &dev->phy,
 	};
-	int i;
-	s8 *delta;
+	struct mt76_power_limits limits_array;
+	s8 *delta, *la = (s8 *)&limits_array;
+	int i, idx, n_chains = hweight8(mphy->antenna_mask);
+	int tx_power;
 
 	delta = dev->rate_power[mphy->chandef.chan->band];
-	mphy->txpower_cur = hw->conf.power_level * 2 +
-			    delta[MT7915_SKU_MAX_DELTA_IDX];
+	tx_power = hw->conf.power_level * 2 -
+		   mt76_tx_power_nss_delta(n_chains);
+
+	tx_power = mt76_get_rate_power_limits(mphy, mphy->chandef.chan,
+					      &limits_array, tx_power);
+	mphy->txpower_cur = tx_power;
+
+	for (i = 0, idx = 0; i < MAX_SKU_RATE_GROUP_NUM; i++) {
+		const struct sku_group *sku = &mt7915_sku_groups[i];
+		u32 offset = sku->offset[mphy->chandef.chan->band];
+		u8 mcs_num = sku->len;
+		int j;
+
+		if (i >= SKU_HT_BW20 && i <= SKU_VHT_BW160) {
+			mcs_num = 10;
+
+			if (i == SKU_HT_BW20 || i == SKU_VHT_BW20)
+				la = (s8 *)&limits_array + 12;
+		}
 
-	for (i = 0; i < MT7915_SKU_RATE_NUM; i++)
-		req.val[i] = hw->conf.power_level * 2 + delta[i];
+		if (!offset) {
+			idx += sku->len;
+			la += mcs_num;
+			continue;
+		}
+
+		for (j = 0; j < min_t(u8, mcs_num, sku->len); j++) {
+			s8 rate_power;
+
+			rate_power = hw->conf.power_level * 2 + delta[idx + j];
+			req.val[idx + j] = min_t(s8, la[j], rate_power);
+		}
+		la += mcs_num;
+		idx += sku->len;
+	}
 
 	return mt76_mcu_send_msg(&dev->mt76,
 				 MCU_EXT_CMD(TX_POWER_FEATURE_CTRL), &req,
-- 
cgit v1.2.3


From ecb187a74e1846156fac7c14a60650130cbe3c22 Mon Sep 17 00:00:00 2001
From: Shayne Chen <shayne.chen@mediatek.com>
Date: Sat, 17 Apr 2021 06:16:18 +0800
Subject: mt76: mt7915: rework the flow of txpower setting

Clean up the flow of per-rate txpower limit setting to get rid of
duplicate work since it has already been handled by firmware, and set
proper max_power based on different channels and regdomains.

Signed-off-by: Shayne Chen <shayne.chen@mediatek.com>
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7915/debugfs.c    |  34 ++---
 drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c | 138 +++++----------------
 drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h |  40 ++----
 drivers/net/wireless/mediatek/mt76/mt7915/init.c   |  72 ++++++-----
 drivers/net/wireless/mediatek/mt76/mt7915/main.c   |   2 +-
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c    |  29 ++---
 drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h |  10 +-
 drivers/net/wireless/mediatek/mt76/mt7915/pci.c    |  25 ----
 8 files changed, 99 insertions(+), 251 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
index 587d55d240a1..44e7127d4495 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
@@ -299,8 +299,7 @@ mt7915_queues_read(struct seq_file *s, void *data)
 }
 
 static void
-mt7915_puts_rate_txpower(struct seq_file *s, s8 *delta,
-			 s8 txpower_cur, int band)
+mt7915_puts_rate_txpower(struct seq_file *s, s8 txpower_cur, int band)
 {
 	static const char * const sku_group_name[] = {
 		"CCK", "OFDM", "HT20", "HT40",
@@ -308,24 +307,18 @@ mt7915_puts_rate_txpower(struct seq_file *s, s8 *delta,
 		"RU26", "RU52", "RU106", "RU242/SU20",
 		"RU484/SU40", "RU996/SU80", "RU2x996/SU160"
 	};
-	s8 txpower[MT7915_SKU_RATE_NUM];
+	s8 txpower[161];
 	int i, idx = 0;
 
-	for (i = 0; i < MT7915_SKU_RATE_NUM; i++)
-		txpower[i] = DIV_ROUND_UP(txpower_cur + delta[i], 2);
+	for (i = 0; i < ARRAY_SIZE(txpower); i++)
+		txpower[i] = DIV_ROUND_UP(txpower_cur, 2);
 
-	for (i = 0; i < MAX_SKU_RATE_GROUP_NUM; i++) {
-		const struct sku_group *sku = &mt7915_sku_groups[i];
-		u32 offset = sku->offset[band];
-
-		if (!offset) {
-			idx += sku->len;
-			continue;
-		}
+	for (i = 0; i < ARRAY_SIZE(mt7915_sku_group_len); i++) {
+		u8 len = mt7915_sku_group_len[i];
 
 		mt76_seq_puts_array(s, sku_group_name[i],
-				    txpower + idx, sku->len);
-		idx += sku->len;
+				    txpower + idx, len);
+		idx += len;
 	}
 }
 
@@ -335,21 +328,18 @@ mt7915_read_rate_txpower(struct seq_file *s, void *data)
 	struct mt7915_dev *dev = dev_get_drvdata(s->private);
 	struct mt76_phy *mphy = &dev->mphy;
 	enum nl80211_band band = mphy->chandef.chan->band;
-	s8 *delta = dev->rate_power[band];
-	s8 txpower_base = mphy->txpower_cur - delta[MT7915_SKU_MAX_DELTA_IDX];
+	s8 txpower = mphy->txpower_cur;
 
 	seq_puts(s, "Band 0:\n");
-	mt7915_puts_rate_txpower(s, delta, txpower_base, band);
+	mt7915_puts_rate_txpower(s, txpower, band);
 
 	if (dev->mt76.phy2) {
 		mphy = dev->mt76.phy2;
 		band = mphy->chandef.chan->band;
-		delta = dev->rate_power[band];
-		txpower_base = mphy->txpower_cur -
-			       delta[MT7915_SKU_MAX_DELTA_IDX];
+		txpower = mphy->txpower_cur;
 
 		seq_puts(s, "Band 1:\n");
-		mt7915_puts_rate_txpower(s, delta, txpower_base, band);
+		mt7915_puts_rate_txpower(s, txpower, band);
 	}
 
 	return 0;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c
index 353f8d8497d5..677c913c51b0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c
@@ -170,120 +170,38 @@ int mt7915_eeprom_get_target_power(struct mt7915_dev *dev,
 	return target_power;
 }
 
-static const u8 sku_cck_delta_map[] = {
-	SKU_CCK_GROUP0,
-	SKU_CCK_GROUP0,
-	SKU_CCK_GROUP1,
-	SKU_CCK_GROUP1,
-};
-
-static const u8 sku_ofdm_delta_map[] = {
-	SKU_OFDM_GROUP0,
-	SKU_OFDM_GROUP0,
-	SKU_OFDM_GROUP1,
-	SKU_OFDM_GROUP1,
-	SKU_OFDM_GROUP2,
-	SKU_OFDM_GROUP2,
-	SKU_OFDM_GROUP3,
-	SKU_OFDM_GROUP4,
-};
-
-static const u8 sku_mcs_delta_map[] = {
-	SKU_MCS_GROUP0,
-	SKU_MCS_GROUP1,
-	SKU_MCS_GROUP1,
-	SKU_MCS_GROUP2,
-	SKU_MCS_GROUP2,
-	SKU_MCS_GROUP3,
-	SKU_MCS_GROUP4,
-	SKU_MCS_GROUP5,
-	SKU_MCS_GROUP6,
-	SKU_MCS_GROUP7,
-	SKU_MCS_GROUP8,
-	SKU_MCS_GROUP9,
-};
-
-#define SKU_GROUP(_mode, _len, _ofs_2g, _ofs_5g, _map)	\
-	[_mode] = {					\
-	.len = _len,					\
-	.offset = {					\
-		_ofs_2g,				\
-		_ofs_5g,				\
-	},						\
-	.delta_map = _map				\
-}
-
-const struct sku_group mt7915_sku_groups[] = {
-	SKU_GROUP(SKU_CCK, 4, 0x252, 0, sku_cck_delta_map),
-	SKU_GROUP(SKU_OFDM, 8, 0x254, 0x29d, sku_ofdm_delta_map),
-
-	SKU_GROUP(SKU_HT_BW20, 8, 0x259, 0x2a2, sku_mcs_delta_map),
-	SKU_GROUP(SKU_HT_BW40, 9, 0x262, 0x2ab, sku_mcs_delta_map),
-	SKU_GROUP(SKU_VHT_BW20, 12, 0x259, 0x2a2, sku_mcs_delta_map),
-	SKU_GROUP(SKU_VHT_BW40, 12, 0x262, 0x2ab, sku_mcs_delta_map),
-	SKU_GROUP(SKU_VHT_BW80, 12, 0, 0x2b4, sku_mcs_delta_map),
-	SKU_GROUP(SKU_VHT_BW160, 12, 0, 0, sku_mcs_delta_map),
-
-	SKU_GROUP(SKU_HE_RU26, 12, 0x27f, 0x2dd, sku_mcs_delta_map),
-	SKU_GROUP(SKU_HE_RU52, 12, 0x289, 0x2e7, sku_mcs_delta_map),
-	SKU_GROUP(SKU_HE_RU106, 12, 0x293, 0x2f1, sku_mcs_delta_map),
-	SKU_GROUP(SKU_HE_RU242, 12, 0x26b, 0x2bf, sku_mcs_delta_map),
-	SKU_GROUP(SKU_HE_RU484, 12, 0x275, 0x2c9, sku_mcs_delta_map),
-	SKU_GROUP(SKU_HE_RU996, 12, 0, 0x2d3, sku_mcs_delta_map),
-	SKU_GROUP(SKU_HE_RU2x996, 12, 0, 0, sku_mcs_delta_map),
-};
-
-static s8
-mt7915_get_sku_delta(struct mt7915_dev *dev, u32 addr)
+s8 mt7915_eeprom_get_power_delta(struct mt7915_dev *dev, int band)
 {
-	u32 val = mt7915_eeprom_read(dev, addr);
-	s8 delta = FIELD_GET(SKU_DELTA_VAL, val);
+	u32 val;
+	s8 delta;
+
+	if (band == NL80211_BAND_2GHZ)
+		val = mt7915_eeprom_read(dev, MT_EE_RATE_DELTA_2G);
+	else
+		val = mt7915_eeprom_read(dev, MT_EE_RATE_DELTA_5G);
 
-	if (!(val & SKU_DELTA_EN))
+	if (!(val & MT_EE_RATE_DELTA_EN))
 		return 0;
 
-	return val & SKU_DELTA_ADD ? delta : -delta;
-}
+	delta = FIELD_GET(MT_EE_RATE_DELTA_MASK, val);
 
-static void
-mt7915_eeprom_init_sku_band(struct mt7915_dev *dev,
-			    struct ieee80211_supported_band *sband)
-{
-	int i, band = sband->band;
-	s8 *rate_power = dev->rate_power[band], max_delta = 0;
-	u8 idx = 0;
-
-	for (i = 0; i < ARRAY_SIZE(mt7915_sku_groups); i++) {
-		const struct sku_group *sku = &mt7915_sku_groups[i];
-		u32 offset = sku->offset[band];
-		int j;
-
-		if (!offset) {
-			idx += sku->len;
-			continue;
-		}
-
-		rate_power[idx++] = mt7915_get_sku_delta(dev, offset);
-		if (rate_power[idx - 1] > max_delta)
-			max_delta = rate_power[idx - 1];
-
-		if (i == SKU_HT_BW20 || i == SKU_VHT_BW20)
-			offset += 1;
-
-		for (j = 1; j < sku->len; j++) {
-			u32 addr = offset + sku->delta_map[j];
-
-			rate_power[idx++] = mt7915_get_sku_delta(dev, addr);
-			if (rate_power[idx - 1] > max_delta)
-				max_delta = rate_power[idx - 1];
-		}
-	}
-
-	rate_power[idx] = max_delta;
+	return val & MT_EE_RATE_DELTA_SIGN ? delta : -delta;
 }
 
-void mt7915_eeprom_init_sku(struct mt7915_dev *dev)
-{
-	mt7915_eeprom_init_sku_band(dev, &dev->mphy.sband_2g.sband);
-	mt7915_eeprom_init_sku_band(dev, &dev->mphy.sband_5g.sband);
-}
+const u8 mt7915_sku_group_len[] = {
+	[SKU_CCK] = 4,
+	[SKU_OFDM] = 8,
+	[SKU_HT_BW20] = 8,
+	[SKU_HT_BW40] = 9,
+	[SKU_VHT_BW20] = 12,
+	[SKU_VHT_BW40] = 12,
+	[SKU_VHT_BW80] = 12,
+	[SKU_VHT_BW160] = 12,
+	[SKU_HE_RU26] = 12,
+	[SKU_HE_RU52] = 12,
+	[SKU_HE_RU106] = 12,
+	[SKU_HE_RU242] = 12,
+	[SKU_HE_RU484] = 12,
+	[SKU_HE_RU996] = 12,
+	[SKU_HE_RU2x996] = 12
+};
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h
index 8ef5ebfad706..033fb592bdf0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h
@@ -19,6 +19,8 @@ enum mt7915_eeprom_field {
 	MT_EE_DDIE_FT_VERSION =	0x050,
 	MT_EE_DO_PRE_CAL =	0x062,
 	MT_EE_WIFI_CONF =	0x190,
+	MT_EE_RATE_DELTA_2G =	0x252,
+	MT_EE_RATE_DELTA_5G =	0x29d,
 	MT_EE_TX0_POWER_2G =	0x2fc,
 	MT_EE_TX0_POWER_5G =	0x34b,
 	MT_EE_ADIE_FT_VERSION =	0x9a0,
@@ -43,6 +45,10 @@ enum mt7915_eeprom_field {
 #define MT_EE_WIFI_CONF7_TSSI0_5G		BIT(2)
 #define MT_EE_WIFI_CONF7_TSSI1_5G		BIT(4)
 
+#define MT_EE_RATE_DELTA_MASK			GENMASK(5, 0)
+#define MT_EE_RATE_DELTA_SIGN			BIT(6)
+#define MT_EE_RATE_DELTA_EN			BIT(7)
+
 enum mt7915_eeprom_band {
 	MT_EE_BAND_SEL_DEFAULT,
 	MT_EE_BAND_SEL_5GHZ,
@@ -50,32 +56,6 @@ enum mt7915_eeprom_band {
 	MT_EE_BAND_SEL_DUAL,
 };
 
-#define SKU_DELTA_VAL		GENMASK(5, 0)
-#define SKU_DELTA_ADD		BIT(6)
-#define SKU_DELTA_EN		BIT(7)
-
-enum mt7915_sku_delta_group {
-	SKU_CCK_GROUP0,
-	SKU_CCK_GROUP1,
-
-	SKU_OFDM_GROUP0 = 0,
-	SKU_OFDM_GROUP1,
-	SKU_OFDM_GROUP2,
-	SKU_OFDM_GROUP3,
-	SKU_OFDM_GROUP4,
-
-	SKU_MCS_GROUP0 = 0,
-	SKU_MCS_GROUP1,
-	SKU_MCS_GROUP2,
-	SKU_MCS_GROUP3,
-	SKU_MCS_GROUP4,
-	SKU_MCS_GROUP5,
-	SKU_MCS_GROUP6,
-	SKU_MCS_GROUP7,
-	SKU_MCS_GROUP8,
-	SKU_MCS_GROUP9,
-};
-
 enum mt7915_sku_rate_group {
 	SKU_CCK,
 	SKU_OFDM,
@@ -95,12 +75,6 @@ enum mt7915_sku_rate_group {
 	MAX_SKU_RATE_GROUP_NUM,
 };
 
-struct sku_group {
-	u8 len;
-	u16 offset[2];
-	const u8 *delta_map;
-};
-
 static inline int
 mt7915_get_channel_group(int channel)
 {
@@ -133,6 +107,6 @@ mt7915_tssi_enabled(struct mt7915_dev *dev, enum nl80211_band band)
 		return eep[MT_EE_WIFI_CONF + 7] & MT_EE_WIFI_CONF7_TSSI0_2G;
 }
 
-extern const struct sku_group mt7915_sku_groups[];
+extern const u8 mt7915_sku_group_len[MAX_SKU_RATE_GROUP_NUM];
 
 #endif
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index 19fa9ad7d5d7..2d6b38d39356 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -67,6 +67,39 @@ static const struct ieee80211_iface_combination if_comb[] = {
 	}
 };
 
+static void
+mt7915_init_txpower(struct mt7915_dev *dev,
+		    struct ieee80211_supported_band *sband)
+{
+	int i, n_chains = hweight8(dev->mphy.antenna_mask);
+	int nss_delta = mt76_tx_power_nss_delta(n_chains);
+	int pwr_delta = mt7915_eeprom_get_power_delta(dev, sband->band);
+	struct mt76_power_limits limits;
+
+	for (i = 0; i < sband->n_channels; i++) {
+		struct ieee80211_channel *chan = &sband->channels[i];
+		u32 target_power = 0;
+		int j;
+
+		for (j = 0; j < n_chains; j++) {
+			u32 val;
+
+			val = mt7915_eeprom_get_target_power(dev, chan, j);
+			target_power = max(target_power, val);
+		}
+
+		target_power += pwr_delta;
+		target_power = mt76_get_rate_power_limits(&dev->mphy, chan,
+							  &limits,
+							  target_power);
+		target_power += nss_delta;
+		target_power = DIV_ROUND_UP(target_power, 2);
+		chan->max_power = min_t(int, chan->max_reg_power,
+					target_power);
+		chan->orig_mpwr = target_power;
+	}
+}
+
 static void
 mt7915_regd_notifier(struct wiphy *wiphy,
 		     struct regulatory_request *request)
@@ -77,8 +110,12 @@ mt7915_regd_notifier(struct wiphy *wiphy,
 	struct mt7915_phy *phy = mphy->priv;
 	struct cfg80211_chan_def *chandef = &mphy->chandef;
 
+	memcpy(dev->mt76.alpha2, request->alpha2, sizeof(dev->mt76.alpha2));
 	dev->mt76.region = request->dfs_region;
 
+	mt7915_init_txpower(dev, &mphy->sband_2g.sband);
+	mt7915_init_txpower(dev, &mphy->sband_5g.sband);
+
 	if (!(chandef->chan->flags & IEEE80211_CHAN_RADAR))
 		return;
 
@@ -207,38 +244,6 @@ static int mt7915_txbf_init(struct mt7915_dev *dev)
 	return mt7915_mcu_set_txbf_type(dev);
 }
 
-static void
-mt7915_init_txpower_band(struct mt7915_dev *dev,
-			 struct ieee80211_supported_band *sband)
-{
-	int i, n_chains = hweight8(dev->mphy.antenna_mask);
-
-	for (i = 0; i < sband->n_channels; i++) {
-		struct ieee80211_channel *chan = &sband->channels[i];
-		u32 target_power = 0;
-		int j;
-
-		for (j = 0; j < n_chains; j++) {
-			u32 val;
-
-			val = mt7915_eeprom_get_target_power(dev, chan, j);
-			target_power = max(target_power, val);
-		}
-
-		chan->max_power = min_t(int, chan->max_reg_power,
-					target_power / 2);
-		chan->orig_mpwr = target_power / 2;
-	}
-}
-
-static void mt7915_init_txpower(struct mt7915_dev *dev)
-{
-	mt7915_init_txpower_band(dev, &dev->mphy.sband_2g.sband);
-	mt7915_init_txpower_band(dev, &dev->mphy.sband_5g.sband);
-
-	mt7915_eeprom_init_sku(dev);
-}
-
 static int mt7915_register_ext_phy(struct mt7915_dev *dev)
 {
 	struct mt7915_phy *phy = mt7915_ext_phy(dev);
@@ -295,7 +300,8 @@ static void mt7915_init_work(struct work_struct *work)
 
 	mt7915_mcu_set_eeprom(dev);
 	mt7915_mac_init(dev);
-	mt7915_init_txpower(dev);
+	mt7915_init_txpower(dev, &dev->mphy.sband_2g.sband);
+	mt7915_init_txpower(dev, &dev->mphy.sband_5g.sband);
 	mt7915_txbf_init(dev);
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
index 413abbca3246..e5bd687546b6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
@@ -429,7 +429,7 @@ static int mt7915_config(struct ieee80211_hw *hw, u32 changed)
 	}
 
 	if (changed & IEEE80211_CONF_CHANGE_POWER) {
-		ret = mt7915_mcu_set_sku(phy);
+		ret = mt7915_mcu_set_txpower_sku(phy);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 74c375910eb7..92e0c782bf40 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -3503,8 +3503,9 @@ int mt7915_mcu_get_tx_rate(struct mt7915_dev *dev, u32 cmd, u16 wlan_idx)
 				 sizeof(req), false);
 }
 
-int mt7915_mcu_set_sku(struct mt7915_phy *phy)
+int mt7915_mcu_set_txpower_sku(struct mt7915_phy *phy)
 {
+#define MT7915_SKU_RATE_NUM		161
 	struct mt7915_dev *dev = phy->dev;
 	struct mt76_phy *mphy = phy->mt76;
 	struct ieee80211_hw *hw = mphy->hw;
@@ -3518,11 +3519,10 @@ int mt7915_mcu_set_sku(struct mt7915_phy *phy)
 		.dbdc_idx = phy != &dev->phy,
 	};
 	struct mt76_power_limits limits_array;
-	s8 *delta, *la = (s8 *)&limits_array;
+	s8 *la = (s8 *)&limits_array;
 	int i, idx, n_chains = hweight8(mphy->antenna_mask);
 	int tx_power;
 
-	delta = dev->rate_power[mphy->chandef.chan->band];
 	tx_power = hw->conf.power_level * 2 -
 		   mt76_tx_power_nss_delta(n_chains);
 
@@ -3530,10 +3530,8 @@ int mt7915_mcu_set_sku(struct mt7915_phy *phy)
 					      &limits_array, tx_power);
 	mphy->txpower_cur = tx_power;
 
-	for (i = 0, idx = 0; i < MAX_SKU_RATE_GROUP_NUM; i++) {
-		const struct sku_group *sku = &mt7915_sku_groups[i];
-		u32 offset = sku->offset[mphy->chandef.chan->band];
-		u8 mcs_num = sku->len;
+	for (i = 0, idx = 0; i < ARRAY_SIZE(mt7915_sku_group_len); i++) {
+		u8 mcs_num, len = mt7915_sku_group_len[i];
 		int j;
 
 		if (i >= SKU_HT_BW20 && i <= SKU_VHT_BW160) {
@@ -3541,22 +3539,15 @@ int mt7915_mcu_set_sku(struct mt7915_phy *phy)
 
 			if (i == SKU_HT_BW20 || i == SKU_VHT_BW20)
 				la = (s8 *)&limits_array + 12;
+		} else {
+			mcs_num = len;
 		}
 
-		if (!offset) {
-			idx += sku->len;
-			la += mcs_num;
-			continue;
-		}
-
-		for (j = 0; j < min_t(u8, mcs_num, sku->len); j++) {
-			s8 rate_power;
+		for (j = 0; j < min_t(u8, mcs_num, len); j++)
+			req.val[idx + j] = la[j];
 
-			rate_power = hw->conf.power_level * 2 + delta[idx + j];
-			req.val[idx + j] = min_t(s8, la[j], rate_power);
-		}
 		la += mcs_num;
-		idx += sku->len;
+		idx += len;
 	}
 
 	return mt76_mcu_send_msg(&dev->mt76,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index 456daf3ad853..80eb35231a1a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -39,10 +39,6 @@
 #define MT7915_5G_RATE_DEFAULT		0x4b	/* OFDM 6M */
 #define MT7915_2G_RATE_DEFAULT		0x0	/* CCK 1M */
 
-#define MT7915_SKU_RATE_NUM		161
-#define MT7915_SKU_MAX_DELTA_IDX	MT7915_SKU_RATE_NUM
-#define MT7915_SKU_TABLE_SIZE		(MT7915_SKU_RATE_NUM + 1)
-
 struct mt7915_vif;
 struct mt7915_sta;
 struct mt7915_dfs_pulse;
@@ -195,8 +191,6 @@ struct mt7915_dev {
 	int token_count;
 	struct idr token;
 
-	s8 **rate_power; /* TODO: use mt76_rate_power */
-
 	bool dbdc_support;
 	bool flash_mode;
 	bool fw_debug;
@@ -302,7 +296,7 @@ void mt7915_eeprom_parse_band_config(struct mt7915_phy *phy);
 int mt7915_eeprom_get_target_power(struct mt7915_dev *dev,
 				   struct ieee80211_channel *chan,
 				   u8 chain_idx);
-void mt7915_eeprom_init_sku(struct mt7915_dev *dev);
+s8 mt7915_eeprom_get_power_delta(struct mt7915_dev *dev, int band);
 int mt7915_dma_init(struct mt7915_dev *dev);
 void mt7915_dma_prefetch(struct mt7915_dev *dev);
 void mt7915_dma_cleanup(struct mt7915_dev *dev);
@@ -352,7 +346,7 @@ int mt7915_mcu_set_ser(struct mt7915_dev *dev, u8 action, u8 set, u8 band);
 int mt7915_mcu_set_rts_thresh(struct mt7915_phy *phy, u32 val);
 int mt7915_mcu_set_pm(struct mt7915_dev *dev, int band, int enter);
 int mt7915_mcu_set_sku_en(struct mt7915_phy *phy, bool enable);
-int mt7915_mcu_set_sku(struct mt7915_phy *phy);
+int mt7915_mcu_set_txpower_sku(struct mt7915_phy *phy);
 int mt7915_mcu_set_txbf_type(struct mt7915_dev *dev);
 int mt7915_mcu_set_txbf_module(struct mt7915_dev *dev);
 int mt7915_mcu_set_txbf_sounding(struct mt7915_dev *dev);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/pci.c b/drivers/net/wireless/mediatek/mt76/mt7915/pci.c
index 75769595d1e1..ebfc4c15fef2 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/pci.c
@@ -154,28 +154,6 @@ static irqreturn_t mt7915_irq_handler(int irq, void *dev_instance)
 	return IRQ_HANDLED;
 }
 
-static int
-mt7915_alloc_device(struct pci_dev *pdev, struct mt7915_dev *dev)
-{
-#define NUM_BANDS	2
-	int i;
-	s8 **sku;
-
-	sku = devm_kzalloc(&pdev->dev, NUM_BANDS * sizeof(*sku), GFP_KERNEL);
-	if (!sku)
-		return -ENOMEM;
-
-	for (i = 0; i < NUM_BANDS; i++) {
-		sku[i] = devm_kzalloc(&pdev->dev, MT7915_SKU_TABLE_SIZE *
-				      sizeof(**sku), GFP_KERNEL);
-		if (!sku[i])
-			return -ENOMEM;
-	}
-	dev->rate_power = sku;
-
-	return 0;
-}
-
 static void mt7915_pci_init_hif2(struct mt7915_dev *dev)
 {
 	struct mt7915_hif *hif;
@@ -270,9 +248,6 @@ static int mt7915_pci_probe(struct pci_dev *pdev,
 		return -ENOMEM;
 
 	dev = container_of(mdev, struct mt7915_dev, mt76);
-	ret = mt7915_alloc_device(pdev, dev);
-	if (ret)
-		goto error;
 
 	ret = mt7915_mmio_init(mdev, pcim_iomap_table(pdev)[0], pdev->irq);
 	if (ret)
-- 
cgit v1.2.3


From 5352efaed0812dc23308498a2e700630f603579f Mon Sep 17 00:00:00 2001
From: Shayne Chen <shayne.chen@mediatek.com>
Date: Sat, 17 Apr 2021 06:16:19 +0800
Subject: mt76: mt7915: directly read per-rate tx power from registers

Since driver no longer handler per-rate tx power setting, we need to
read the power values directly from registers.

Tested-by: Evelyn Tsai <evelyn.tsai@mediatek.com>
Signed-off-by: Shayne Chen <shayne.chen@mediatek.com>
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7915/debugfs.c    | 68 +++++++++++++++-------
 drivers/net/wireless/mediatek/mt76/mt7915/regs.h   |  5 ++
 2 files changed, 51 insertions(+), 22 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
index 44e7127d4495..6a8ddeeecbe9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
@@ -299,7 +299,7 @@ mt7915_queues_read(struct seq_file *s, void *data)
 }
 
 static void
-mt7915_puts_rate_txpower(struct seq_file *s, s8 txpower_cur, int band)
+mt7915_puts_rate_txpower(struct seq_file *s, struct mt7915_phy *phy)
 {
 	static const char * const sku_group_name[] = {
 		"CCK", "OFDM", "HT20", "HT40",
@@ -307,18 +307,54 @@ mt7915_puts_rate_txpower(struct seq_file *s, s8 txpower_cur, int band)
 		"RU26", "RU52", "RU106", "RU242/SU20",
 		"RU484/SU40", "RU996/SU80", "RU2x996/SU160"
 	};
-	s8 txpower[161];
+	struct mt7915_dev *dev = dev_get_drvdata(s->private);
+	bool ext_phy = phy != &dev->phy;
+	u32 reg_base;
 	int i, idx = 0;
 
-	for (i = 0; i < ARRAY_SIZE(txpower); i++)
-		txpower[i] = DIV_ROUND_UP(txpower_cur, 2);
+	if (!phy)
+		return;
+
+	reg_base = MT_TMAC_FP0R0(ext_phy);
+	seq_printf(s, "\nBand %d\n", ext_phy);
 
 	for (i = 0; i < ARRAY_SIZE(mt7915_sku_group_len); i++) {
-		u8 len = mt7915_sku_group_len[i];
+		u8 cnt, mcs_num = mt7915_sku_group_len[i];
+		s8 txpower[12];
+		int j;
+
+		if (i == SKU_HT_BW20 || i == SKU_HT_BW40) {
+			mcs_num = 8;
+		} else if (i >= SKU_VHT_BW20 && i <= SKU_VHT_BW160) {
+			mcs_num = 10;
+		} else if (i == SKU_HE_RU26) {
+			reg_base = MT_TMAC_FP0R18(ext_phy);
+			idx = 0;
+		}
+
+		for (j = 0, cnt = 0; j < DIV_ROUND_UP(mcs_num, 4); j++) {
+			u32 val;
+
+			if (i == SKU_VHT_BW160 && idx == 60) {
+				reg_base = MT_TMAC_FP0R15(ext_phy);
+				idx = 0;
+			}
 
-		mt76_seq_puts_array(s, sku_group_name[i],
-				    txpower + idx, len);
-		idx += len;
+			val = mt76_rr(dev, reg_base + (idx / 4) * 4);
+
+			if (idx && idx % 4)
+				val >>= (idx % 4) * 8;
+
+			while (val > 0 && cnt < mcs_num) {
+				s8 pwr = FIELD_GET(MT_TMAC_FP_MASK, val);
+
+				txpower[cnt++] = pwr;
+				val >>= 8;
+				idx++;
+			}
+		}
+
+		mt76_seq_puts_array(s, sku_group_name[i], txpower, mcs_num);
 	}
 }
 
@@ -326,21 +362,9 @@ static int
 mt7915_read_rate_txpower(struct seq_file *s, void *data)
 {
 	struct mt7915_dev *dev = dev_get_drvdata(s->private);
-	struct mt76_phy *mphy = &dev->mphy;
-	enum nl80211_band band = mphy->chandef.chan->band;
-	s8 txpower = mphy->txpower_cur;
-
-	seq_puts(s, "Band 0:\n");
-	mt7915_puts_rate_txpower(s, txpower, band);
 
-	if (dev->mt76.phy2) {
-		mphy = dev->mt76.phy2;
-		band = mphy->chandef.chan->band;
-		txpower = mphy->txpower_cur;
-
-		seq_puts(s, "Band 1:\n");
-		mt7915_puts_rate_txpower(s, txpower, band);
-	}
+	mt7915_puts_rate_txpower(s, &dev->phy);
+	mt7915_puts_rate_txpower(s, mt7915_ext_phy(dev));
 
 	return 0;
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/regs.h b/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
index dfb8880657bf..efe0f2904c66 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
@@ -82,6 +82,11 @@
 #define MT_TMAC_CTCR0_INS_DDLMT_EN		BIT(17)
 #define MT_TMAC_CTCR0_INS_DDLMT_VHT_SMPDU_EN	BIT(18)
 
+#define MT_TMAC_FP0R0(_band)		MT_WF_TMAC(_band, 0x020)
+#define MT_TMAC_FP0R15(_band)		MT_WF_TMAC(_band, 0x080)
+#define MT_TMAC_FP0R18(_band)		MT_WF_TMAC(_band, 0x270)
+#define MT_TMAC_FP_MASK			GENMASK(7, 0)
+
 #define MT_TMAC_TFCR0(_band)		MT_WF_TMAC(_band, 0x1e0)
 
 #define MT_WF_DMA_BASE(_band)		((_band) ? 0xa1e00 : 0x21e00)
-- 
cgit v1.2.3


From 367518858e78b80ef09a0075b637a6d8e0b88dfb Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 17 Apr 2021 12:27:06 +0200
Subject: mt76: mt7921: do not use 0 as NULL pointer

Fix the following sparse warning:
drivers/net/wireless/mediatek/mt76/mt7921/mac.c:1425:70:
  warning: Using plain integer as NULL pointer

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 3145880df6e7..c8819e78cea3 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1424,7 +1424,7 @@ void mt7921_mac_reset_work(struct work_struct *work)
 	ieee80211_wake_queues(hw);
 	ieee80211_iterate_active_interfaces(hw,
 					    IEEE80211_IFACE_ITER_RESUME_ALL,
-					    mt7921_vif_connect_iter, 0);
+					    mt7921_vif_connect_iter, NULL);
 }
 
 void mt7921_reset(struct mt76_dev *mdev)
-- 
cgit v1.2.3


From e4bbc5c53a8f6b9235d8f1292377705cf7bcf59b Mon Sep 17 00:00:00 2001
From: Srujana Challa <schalla@marvell.com>
Date: Wed, 21 Apr 2021 14:53:00 +0530
Subject: octeontx2-af: cn10k: Mailbox changes for CN10K CPT

Adds changes to existing CPT mailbox messages to support
CN10K CPT block. This patch also adds new register defines
for CN10K CPT.

Signed-off-by: Vidya Sagar Velumuri <vvelumuri@marvell.com>
Signed-off-by: Srujana Challa <schalla@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c | 11 ++++++++++-
 drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h | 21 +++++++++++++++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
index 0945c3a3b180..42c474957b69 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
@@ -9,6 +9,10 @@
 
 /* CPT PF device id */
 #define	PCI_DEVID_OTX2_CPT_PF	0xA0FD
+#define	PCI_DEVID_OTX2_CPT10K_PF 0xA0F2
+
+/* Length of initial context fetch in 128 byte words */
+#define CPT_CTX_ILEN    2
 
 static int get_cpt_pf_num(struct rvu *rvu)
 {
@@ -21,7 +25,8 @@ static int get_cpt_pf_num(struct rvu *rvu)
 		if (!pdev)
 			continue;
 
-		if (pdev->device == PCI_DEVID_OTX2_CPT_PF) {
+		if (pdev->device == PCI_DEVID_OTX2_CPT_PF ||
+		    pdev->device == PCI_DEVID_OTX2_CPT10K_PF) {
 			cpt_pf_num = i;
 			put_device(&pdev->dev);
 			break;
@@ -103,6 +108,9 @@ int rvu_mbox_handler_cpt_lf_alloc(struct rvu *rvu,
 
 		/* Set CPT LF group and priority */
 		val = (u64)req->eng_grpmsk << 48 | 1;
+		if (!is_rvu_otx2(rvu))
+			val |= (CPT_CTX_ILEN << 17);
+
 		rvu_write64(rvu, blkaddr, CPT_AF_LFX_CTL(cptlf), val);
 
 		/* Set CPT LF NIX_PF_FUNC and SSO_PF_FUNC */
@@ -192,6 +200,7 @@ static bool is_valid_offset(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req)
 		case CPT_AF_PF_FUNC:
 		case CPT_AF_BLK_RST:
 		case CPT_AF_CONSTANTS1:
+		case CPT_AF_CTX_FLUSH_TIMER:
 			return true;
 		}
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
index 3e401fd8ac63..ac71c0f2f960 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
@@ -494,6 +494,27 @@
 #define CPT_AF_RAS_INT_W1S              (0x47028)
 #define CPT_AF_RAS_INT_ENA_W1S          (0x47030)
 #define CPT_AF_RAS_INT_ENA_W1C          (0x47038)
+#define CPT_AF_CTX_FLUSH_TIMER          (0x48000ull)
+#define CPT_AF_CTX_ERR                  (0x48008ull)
+#define CPT_AF_CTX_ENC_ID               (0x48010ull)
+#define CPT_AF_CTX_MIS_PC		(0x49400ull)
+#define CPT_AF_CTX_HIT_PC		(0x49408ull)
+#define CPT_AF_CTX_AOP_PC		(0x49410ull)
+#define CPT_AF_CTX_AOP_LATENCY_PC       (0x49418ull)
+#define CPT_AF_CTX_IFETCH_PC            (0x49420ull)
+#define CPT_AF_CTX_IFETCH_LATENCY_PC    (0x49428ull)
+#define CPT_AF_CTX_FFETCH_PC            (0x49430ull)
+#define CPT_AF_CTX_FFETCH_LATENCY_PC    (0x49438ull)
+#define CPT_AF_CTX_WBACK_PC             (0x49440ull)
+#define CPT_AF_CTX_WBACK_LATENCY_PC     (0x49448ull)
+#define CPT_AF_CTX_PSH_PC               (0x49450ull)
+#define CPT_AF_CTX_PSH_LATENCY_PC       (0x49458ull)
+#define CPT_AF_RXC_TIME                 (0x50010ull)
+#define CPT_AF_RXC_TIME_CFG             (0x50018ull)
+#define CPT_AF_RXC_DFRG                 (0x50020ull)
+#define CPT_AF_RXC_ACTIVE_STS           (0x50028ull)
+#define CPT_AF_RXC_ZOMBIE_STS           (0x50030ull)
+#define CPT_AF_X2PX_LINK_CFG(a)         (0x51000ull | (u64)(a) << 3)
 
 #define AF_BAR2_ALIASX(a, b)            (0x9100000ull | (a) << 12 | (b))
 #define CPT_AF_BAR2_SEL                 0x9000000
-- 
cgit v1.2.3


From ecad2ce8c48fcaa23c6efd07e8d1467319a7bf8a Mon Sep 17 00:00:00 2001
From: Srujana Challa <schalla@marvell.com>
Date: Wed, 21 Apr 2021 14:53:01 +0530
Subject: octeontx2-af: cn10k: Add mailbox to configure reassembly timeout

CN10K CPT coprocessor includes a component named RXC which
is responsible for reassembly of inner IP packets. RXC has
the feature to evict oldest entries based on age/threshold.
This patch adds a new mailbox to configure reassembly age
or threshold.

Signed-off-by: Jerin Jacob Kollanukkaran <jerinj@marvell.com>
Signed-off-by: Srujana Challa <schalla@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/mbox.h   | 13 ++++
 .../net/ethernet/marvell/octeontx2/af/rvu_cpt.c    | 69 +++++++++++++++++++---
 2 files changed, 75 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index 55629c66586e..84e4178e8a13 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -177,6 +177,8 @@ M(CPT_LF_ALLOC,		0xA00, cpt_lf_alloc, cpt_lf_alloc_req_msg,	\
 M(CPT_LF_FREE,		0xA01, cpt_lf_free, msg_req, msg_rsp)		\
 M(CPT_RD_WR_REGISTER,	0xA02, cpt_rd_wr_register,  cpt_rd_wr_reg_msg,	\
 			       cpt_rd_wr_reg_msg)			\
+M(CPT_RXC_TIME_CFG,     0xA06, cpt_rxc_time_cfg, cpt_rxc_time_cfg_req,  \
+			       msg_rsp)                                 \
 /* NPC mbox IDs (range 0x6000 - 0x7FFF) */				\
 M(NPC_MCAM_ALLOC_ENTRY,	0x6000, npc_mcam_alloc_entry, npc_mcam_alloc_entry_req,\
 				npc_mcam_alloc_entry_rsp)		\
@@ -1255,4 +1257,15 @@ struct cpt_lf_alloc_req_msg {
 	int blkaddr;
 };
 
+/* Mailbox message request format to configure reassembly timeout. */
+struct cpt_rxc_time_cfg_req {
+	struct mbox_msghdr hdr;
+	int blkaddr;
+	u32 step;
+	u16 zombie_thres;
+	u16 zombie_limit;
+	u16 active_thres;
+	u16 active_limit;
+};
+
 #endif /* MBOX_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
index 42c474957b69..2824175dceaa 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (C) 2020 Marvell. */
 
+#include <linux/bitfield.h>
 #include <linux/pci.h>
 #include "rvu_struct.h"
 #include "rvu_reg.h"
@@ -60,6 +61,17 @@ static bool is_cpt_vf(struct rvu *rvu, u16 pcifunc)
 	return true;
 }
 
+static int validate_and_get_cpt_blkaddr(int req_blkaddr)
+{
+	int blkaddr;
+
+	blkaddr = req_blkaddr ? req_blkaddr : BLKADDR_CPT0;
+	if (blkaddr != BLKADDR_CPT0 && blkaddr != BLKADDR_CPT1)
+		return -EINVAL;
+
+	return blkaddr;
+}
+
 int rvu_mbox_handler_cpt_lf_alloc(struct rvu *rvu,
 				  struct cpt_lf_alloc_req_msg *req,
 				  struct msg_rsp *rsp)
@@ -70,9 +82,9 @@ int rvu_mbox_handler_cpt_lf_alloc(struct rvu *rvu,
 	int num_lfs, slot;
 	u64 val;
 
-	blkaddr = req->blkaddr ? req->blkaddr : BLKADDR_CPT0;
-	if (blkaddr != BLKADDR_CPT0 && blkaddr != BLKADDR_CPT1)
-		return -ENODEV;
+	blkaddr = validate_and_get_cpt_blkaddr(req->blkaddr);
+	if (blkaddr < 0)
+		return blkaddr;
 
 	if (req->eng_grpmsk == 0x0)
 		return CPT_AF_ERR_GRP_INVALID;
@@ -170,7 +182,9 @@ static bool is_valid_offset(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req)
 	struct rvu_block *block;
 	struct rvu_pfvf *pfvf;
 
-	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_CPT, 0);
+	blkaddr = validate_and_get_cpt_blkaddr(req->blkaddr);
+	if (blkaddr < 0)
+		return blkaddr;
 
 	/* Registers that can be accessed from PF/VF */
 	if ((offset & 0xFF000) ==  CPT_AF_LFX_CTL(0) ||
@@ -226,9 +240,9 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu,
 {
 	int blkaddr;
 
-	blkaddr = req->blkaddr ? req->blkaddr : BLKADDR_CPT0;
-	if (blkaddr != BLKADDR_CPT0 && blkaddr != BLKADDR_CPT1)
-		return -ENODEV;
+	blkaddr = validate_and_get_cpt_blkaddr(req->blkaddr);
+	if (blkaddr < 0)
+		return blkaddr;
 
 	/* This message is accepted only if sent from CPT PF/VF */
 	if (!is_cpt_pf(rvu, req->hdr.pcifunc) &&
@@ -250,6 +264,47 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu,
 	return 0;
 }
 
+#define RXC_ZOMBIE_THRES  GENMASK_ULL(59, 48)
+#define RXC_ZOMBIE_LIMIT  GENMASK_ULL(43, 32)
+#define RXC_ACTIVE_THRES  GENMASK_ULL(27, 16)
+#define RXC_ACTIVE_LIMIT  GENMASK_ULL(11, 0)
+#define RXC_ACTIVE_COUNT  GENMASK_ULL(60, 48)
+#define RXC_ZOMBIE_COUNT  GENMASK_ULL(60, 48)
+
+static void cpt_rxc_time_cfg(struct rvu *rvu, struct cpt_rxc_time_cfg_req *req,
+			     int blkaddr)
+{
+	u64 dfrg_reg;
+
+	dfrg_reg = FIELD_PREP(RXC_ZOMBIE_THRES, req->zombie_thres);
+	dfrg_reg |= FIELD_PREP(RXC_ZOMBIE_LIMIT, req->zombie_limit);
+	dfrg_reg |= FIELD_PREP(RXC_ACTIVE_THRES, req->active_thres);
+	dfrg_reg |= FIELD_PREP(RXC_ACTIVE_LIMIT, req->active_limit);
+
+	rvu_write64(rvu, blkaddr, CPT_AF_RXC_TIME_CFG, req->step);
+	rvu_write64(rvu, blkaddr, CPT_AF_RXC_DFRG, dfrg_reg);
+}
+
+int rvu_mbox_handler_cpt_rxc_time_cfg(struct rvu *rvu,
+				      struct cpt_rxc_time_cfg_req *req,
+				      struct msg_rsp *rsp)
+{
+	int blkaddr;
+
+	blkaddr = validate_and_get_cpt_blkaddr(req->blkaddr);
+	if (blkaddr < 0)
+		return blkaddr;
+
+	/* This message is accepted only if sent from CPT PF/VF */
+	if (!is_cpt_pf(rvu, req->hdr.pcifunc) &&
+	    !is_cpt_vf(rvu, req->hdr.pcifunc))
+		return CPT_AF_ERR_ACCESS_DENIED;
+
+	cpt_rxc_time_cfg(rvu, req, blkaddr);
+
+	return 0;
+}
+
 #define INPROG_INFLIGHT(reg)    ((reg) & 0x1FF)
 #define INPROG_GRB_PARTIAL(reg) ((reg) & BIT_ULL(31))
 #define INPROG_GRB(reg)         (((reg) >> 32) & 0xFF)
-- 
cgit v1.2.3


From 2e2ee4cd0ab546859b5b5b2874b973b6caf855b3 Mon Sep 17 00:00:00 2001
From: Srujana Challa <schalla@marvell.com>
Date: Wed, 21 Apr 2021 14:53:02 +0530
Subject: octeontx2-af: Add mailbox for CPT stats

Adds a new mailbox to get CPT stats, includes performance
counters, CPT engines status and RXC status.

Signed-off-by: Narayana Prasad Raju Atherya <pathreya@marvell.com>
Signed-off-by: Srujana Challa <schalla@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/mbox.h   |  48 +++++++++
 .../net/ethernet/marvell/octeontx2/af/rvu_cpt.c    | 112 +++++++++++++++++++++
 2 files changed, 160 insertions(+)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index 84e4178e8a13..cedb2616c509 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -177,6 +177,7 @@ M(CPT_LF_ALLOC,		0xA00, cpt_lf_alloc, cpt_lf_alloc_req_msg,	\
 M(CPT_LF_FREE,		0xA01, cpt_lf_free, msg_req, msg_rsp)		\
 M(CPT_RD_WR_REGISTER,	0xA02, cpt_rd_wr_register,  cpt_rd_wr_reg_msg,	\
 			       cpt_rd_wr_reg_msg)			\
+M(CPT_STATS,            0xA05, cpt_sts, cpt_sts_req, cpt_sts_rsp)	\
 M(CPT_RXC_TIME_CFG,     0xA06, cpt_rxc_time_cfg, cpt_rxc_time_cfg_req,  \
 			       msg_rsp)                                 \
 /* NPC mbox IDs (range 0x6000 - 0x7FFF) */				\
@@ -1257,6 +1258,53 @@ struct cpt_lf_alloc_req_msg {
 	int blkaddr;
 };
 
+/* Mailbox message request and response format for CPT stats. */
+struct cpt_sts_req {
+	struct mbox_msghdr hdr;
+	u8 blkaddr;
+};
+
+struct cpt_sts_rsp {
+	struct mbox_msghdr hdr;
+	u64 inst_req_pc;
+	u64 inst_lat_pc;
+	u64 rd_req_pc;
+	u64 rd_lat_pc;
+	u64 rd_uc_pc;
+	u64 active_cycles_pc;
+	u64 ctx_mis_pc;
+	u64 ctx_hit_pc;
+	u64 ctx_aop_pc;
+	u64 ctx_aop_lat_pc;
+	u64 ctx_ifetch_pc;
+	u64 ctx_ifetch_lat_pc;
+	u64 ctx_ffetch_pc;
+	u64 ctx_ffetch_lat_pc;
+	u64 ctx_wback_pc;
+	u64 ctx_wback_lat_pc;
+	u64 ctx_psh_pc;
+	u64 ctx_psh_lat_pc;
+	u64 ctx_err;
+	u64 ctx_enc_id;
+	u64 ctx_flush_timer;
+	u64 rxc_time;
+	u64 rxc_time_cfg;
+	u64 rxc_active_sts;
+	u64 rxc_zombie_sts;
+	u64 busy_sts_ae;
+	u64 free_sts_ae;
+	u64 busy_sts_se;
+	u64 free_sts_se;
+	u64 busy_sts_ie;
+	u64 free_sts_ie;
+	u64 exe_err_info;
+	u64 cptclk_cnt;
+	u64 diag;
+	u64 rxc_dfrg;
+	u64 x2p_link_cfg0;
+	u64 x2p_link_cfg1;
+};
+
 /* Mailbox message request format to configure reassembly timeout. */
 struct cpt_rxc_time_cfg_req {
 	struct mbox_msghdr hdr;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
index 2824175dceaa..89253f7bdadb 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
@@ -15,6 +15,24 @@
 /* Length of initial context fetch in 128 byte words */
 #define CPT_CTX_ILEN    2
 
+#define cpt_get_eng_sts(e_min, e_max, rsp, etype)                   \
+({                                                                  \
+	u64 free_sts = 0, busy_sts = 0;                             \
+	typeof(rsp) _rsp = rsp;                                     \
+	u32 e, i;                                                   \
+								    \
+	for (e = (e_min), i = 0; e < (e_max); e++, i++) {           \
+		reg = rvu_read64(rvu, blkaddr, CPT_AF_EXEX_STS(e)); \
+		if (reg & 0x1)                                      \
+			busy_sts |= 1ULL << i;                      \
+								    \
+		if (reg & 0x2)                                      \
+			free_sts |= 1ULL << i;                      \
+	}                                                           \
+	(_rsp)->busy_sts_##etype = busy_sts;                        \
+	(_rsp)->free_sts_##etype = free_sts;                        \
+})
+
 static int get_cpt_pf_num(struct rvu *rvu)
 {
 	int i, domain_nr, cpt_pf_num = -1;
@@ -264,6 +282,100 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu,
 	return 0;
 }
 
+static void get_ctx_pc(struct rvu *rvu, struct cpt_sts_rsp *rsp, int blkaddr)
+{
+	if (is_rvu_otx2(rvu))
+		return;
+
+	rsp->ctx_mis_pc = rvu_read64(rvu, blkaddr, CPT_AF_CTX_MIS_PC);
+	rsp->ctx_hit_pc = rvu_read64(rvu, blkaddr, CPT_AF_CTX_HIT_PC);
+	rsp->ctx_aop_pc = rvu_read64(rvu, blkaddr, CPT_AF_CTX_AOP_PC);
+	rsp->ctx_aop_lat_pc = rvu_read64(rvu, blkaddr,
+					 CPT_AF_CTX_AOP_LATENCY_PC);
+	rsp->ctx_ifetch_pc = rvu_read64(rvu, blkaddr, CPT_AF_CTX_IFETCH_PC);
+	rsp->ctx_ifetch_lat_pc = rvu_read64(rvu, blkaddr,
+					    CPT_AF_CTX_IFETCH_LATENCY_PC);
+	rsp->ctx_ffetch_pc = rvu_read64(rvu, blkaddr, CPT_AF_CTX_FFETCH_PC);
+	rsp->ctx_ffetch_lat_pc = rvu_read64(rvu, blkaddr,
+					    CPT_AF_CTX_FFETCH_LATENCY_PC);
+	rsp->ctx_wback_pc = rvu_read64(rvu, blkaddr, CPT_AF_CTX_FFETCH_PC);
+	rsp->ctx_wback_lat_pc = rvu_read64(rvu, blkaddr,
+					   CPT_AF_CTX_FFETCH_LATENCY_PC);
+	rsp->ctx_psh_pc = rvu_read64(rvu, blkaddr, CPT_AF_CTX_FFETCH_PC);
+	rsp->ctx_psh_lat_pc = rvu_read64(rvu, blkaddr,
+					 CPT_AF_CTX_FFETCH_LATENCY_PC);
+	rsp->ctx_err = rvu_read64(rvu, blkaddr, CPT_AF_CTX_ERR);
+	rsp->ctx_enc_id = rvu_read64(rvu, blkaddr, CPT_AF_CTX_ENC_ID);
+	rsp->ctx_flush_timer = rvu_read64(rvu, blkaddr, CPT_AF_CTX_FLUSH_TIMER);
+
+	rsp->rxc_time = rvu_read64(rvu, blkaddr, CPT_AF_RXC_TIME);
+	rsp->rxc_time_cfg = rvu_read64(rvu, blkaddr, CPT_AF_RXC_TIME_CFG);
+	rsp->rxc_active_sts = rvu_read64(rvu, blkaddr, CPT_AF_RXC_ACTIVE_STS);
+	rsp->rxc_zombie_sts = rvu_read64(rvu, blkaddr, CPT_AF_RXC_ZOMBIE_STS);
+	rsp->rxc_dfrg = rvu_read64(rvu, blkaddr, CPT_AF_RXC_DFRG);
+	rsp->x2p_link_cfg0 = rvu_read64(rvu, blkaddr, CPT_AF_X2PX_LINK_CFG(0));
+	rsp->x2p_link_cfg1 = rvu_read64(rvu, blkaddr, CPT_AF_X2PX_LINK_CFG(1));
+}
+
+static void get_eng_sts(struct rvu *rvu, struct cpt_sts_rsp *rsp, int blkaddr)
+{
+	u16 max_ses, max_ies, max_aes;
+	u32 e_min = 0, e_max = 0;
+	u64 reg;
+
+	reg = rvu_read64(rvu, blkaddr, CPT_AF_CONSTANTS1);
+	max_ses = reg & 0xffff;
+	max_ies = (reg >> 16) & 0xffff;
+	max_aes = (reg >> 32) & 0xffff;
+
+	/* Get AE status */
+	e_min = max_ses + max_ies;
+	e_max = max_ses + max_ies + max_aes;
+	cpt_get_eng_sts(e_min, e_max, rsp, ae);
+	/* Get SE status */
+	e_min = 0;
+	e_max = max_ses;
+	cpt_get_eng_sts(e_min, e_max, rsp, se);
+	/* Get IE status */
+	e_min = max_ses;
+	e_max = max_ses + max_ies;
+	cpt_get_eng_sts(e_min, e_max, rsp, ie);
+}
+
+int rvu_mbox_handler_cpt_sts(struct rvu *rvu, struct cpt_sts_req *req,
+			     struct cpt_sts_rsp *rsp)
+{
+	int blkaddr;
+
+	blkaddr = validate_and_get_cpt_blkaddr(req->blkaddr);
+	if (blkaddr < 0)
+		return blkaddr;
+
+	/* This message is accepted only if sent from CPT PF/VF */
+	if (!is_cpt_pf(rvu, req->hdr.pcifunc) &&
+	    !is_cpt_vf(rvu, req->hdr.pcifunc))
+		return CPT_AF_ERR_ACCESS_DENIED;
+
+	get_ctx_pc(rvu, rsp, blkaddr);
+
+	/* Get CPT engines status */
+	get_eng_sts(rvu, rsp, blkaddr);
+
+	/* Read CPT instruction PC registers */
+	rsp->inst_req_pc = rvu_read64(rvu, blkaddr, CPT_AF_INST_REQ_PC);
+	rsp->inst_lat_pc = rvu_read64(rvu, blkaddr, CPT_AF_INST_LATENCY_PC);
+	rsp->rd_req_pc = rvu_read64(rvu, blkaddr, CPT_AF_RD_REQ_PC);
+	rsp->rd_lat_pc = rvu_read64(rvu, blkaddr, CPT_AF_RD_LATENCY_PC);
+	rsp->rd_uc_pc = rvu_read64(rvu, blkaddr, CPT_AF_RD_UC_PC);
+	rsp->active_cycles_pc = rvu_read64(rvu, blkaddr,
+					   CPT_AF_ACTIVE_CYCLES_PC);
+	rsp->exe_err_info = rvu_read64(rvu, blkaddr, CPT_AF_EXE_ERR_INFO);
+	rsp->cptclk_cnt = rvu_read64(rvu, blkaddr, CPT_AF_CPTCLK_CNT);
+	rsp->diag = rvu_read64(rvu, blkaddr, CPT_AF_DIAG);
+
+	return 0;
+}
+
 #define RXC_ZOMBIE_THRES  GENMASK_ULL(59, 48)
 #define RXC_ZOMBIE_LIMIT  GENMASK_ULL(43, 32)
 #define RXC_ACTIVE_THRES  GENMASK_ULL(27, 16)
-- 
cgit v1.2.3


From 78e70dbcfd0334c0eaf61c09e2083107f4762506 Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Wed, 21 Apr 2021 14:04:52 +0200
Subject: net: dsa: mv88e6xxx: Correct spelling of define "ADRR" -> "ADDR"

Because ADRR is not a thing.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c    | 2 +-
 drivers/net/dsa/mv88e6xxx/global2.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 9ff1a10993b1..eca285aaf72f 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -1440,7 +1440,7 @@ static int mv88e6xxx_pvt_map(struct mv88e6xxx_chip *chip, int dev, int port)
 			 * the special "LAG device" in the PVT, using
 			 * the LAG ID as the port number.
 			 */
-			dev = MV88E6XXX_G2_PVT_ADRR_DEV_TRUNK;
+			dev = MV88E6XXX_G2_PVT_ADDR_DEV_TRUNK;
 			port = dsa_lag_id(dst, dp->lag_dev);
 		}
 	}
diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
index c78769cdbb59..8f85c23ec9c7 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.h
+++ b/drivers/net/dsa/mv88e6xxx/global2.h
@@ -109,7 +109,7 @@
 #define MV88E6XXX_G2_PVT_ADDR_OP_WRITE_PVLAN	0x3000
 #define MV88E6XXX_G2_PVT_ADDR_OP_READ		0x4000
 #define MV88E6XXX_G2_PVT_ADDR_PTR_MASK		0x01ff
-#define MV88E6XXX_G2_PVT_ADRR_DEV_TRUNK		0x1f
+#define MV88E6XXX_G2_PVT_ADDR_DEV_TRUNK		0x1f
 
 /* Offset 0x0C: Cross-chip Port VLAN Data Register */
 #define MV88E6XXX_G2_PVT_DATA		0x0c
-- 
cgit v1.2.3


From 281140a0a2ce4febf2c0ce5d29d0e7d961a826b1 Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Wed, 21 Apr 2021 14:04:53 +0200
Subject: net: dsa: mv88e6xxx: Fix off-by-one in VTU devlink region size

In the unlikely event of the VTU being loaded to the brim with 4k
entries, the last one was placed in the buffer, but the size reported
to devlink was off-by-one. Make sure that the final entry is available
to the caller.

Fixes: ca4d632aef03 ("net: dsa: mv88e6xxx: Export VTU as devlink region")
Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/devlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/mv88e6xxx/devlink.c b/drivers/net/dsa/mv88e6xxx/devlink.c
index 21953d6d484c..ada7a38d4d31 100644
--- a/drivers/net/dsa/mv88e6xxx/devlink.c
+++ b/drivers/net/dsa/mv88e6xxx/devlink.c
@@ -678,7 +678,7 @@ static int mv88e6xxx_setup_devlink_regions_global(struct dsa_switch *ds,
 				sizeof(struct mv88e6xxx_devlink_atu_entry);
 			break;
 		case MV88E6XXX_REGION_VTU:
-			size = mv88e6xxx_max_vid(chip) *
+			size = (mv88e6xxx_max_vid(chip) + 1) *
 				sizeof(struct mv88e6xxx_devlink_vtu_entry);
 			break;
 		}
-- 
cgit v1.2.3


From 836021a2d0e0e4c90b895a35bd9c0342071855fb Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Wed, 21 Apr 2021 14:04:54 +0200
Subject: net: dsa: mv88e6xxx: Export cross-chip PVT as devlink region

Export the raw PVT data in a devlink region so that it can be
inspected from userspace and compared to the current bridge
configuration.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.h    |  3 ++
 drivers/net/dsa/mv88e6xxx/devlink.c | 56 +++++++++++++++++++++++++++++++++++++
 drivers/net/dsa/mv88e6xxx/global2.c | 17 +++++++++++
 drivers/net/dsa/mv88e6xxx/global2.h |  2 ++
 4 files changed, 78 insertions(+)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 4f116f73a74b..675b1f3e43b7 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -23,6 +23,8 @@
 /* PVT limits for 4-bit port and 5-bit switch */
 #define MV88E6XXX_MAX_PVT_SWITCHES	32
 #define MV88E6XXX_MAX_PVT_PORTS		16
+#define MV88E6XXX_MAX_PVT_ENTRIES	\
+	(MV88E6XXX_MAX_PVT_SWITCHES * MV88E6XXX_MAX_PVT_PORTS)
 
 #define MV88E6XXX_MAX_GPIO	16
 
@@ -266,6 +268,7 @@ enum mv88e6xxx_region_id {
 	MV88E6XXX_REGION_GLOBAL2,
 	MV88E6XXX_REGION_ATU,
 	MV88E6XXX_REGION_VTU,
+	MV88E6XXX_REGION_PVT,
 
 	_MV88E6XXX_REGION_MAX,
 };
diff --git a/drivers/net/dsa/mv88e6xxx/devlink.c b/drivers/net/dsa/mv88e6xxx/devlink.c
index ada7a38d4d31..0c0f5ea6680c 100644
--- a/drivers/net/dsa/mv88e6xxx/devlink.c
+++ b/drivers/net/dsa/mv88e6xxx/devlink.c
@@ -503,6 +503,44 @@ static int mv88e6xxx_region_vtu_snapshot(struct devlink *dl,
 	return 0;
 }
 
+static int mv88e6xxx_region_pvt_snapshot(struct devlink *dl,
+					 const struct devlink_region_ops *ops,
+					 struct netlink_ext_ack *extack,
+					 u8 **data)
+{
+	struct dsa_switch *ds = dsa_devlink_to_ds(dl);
+	struct mv88e6xxx_chip *chip = ds->priv;
+	int dev, port, err;
+	u16 *pvt, *cur;
+
+	pvt = kcalloc(MV88E6XXX_MAX_PVT_ENTRIES, sizeof(*pvt), GFP_KERNEL);
+	if (!pvt)
+		return -ENOMEM;
+
+	mv88e6xxx_reg_lock(chip);
+
+	cur = pvt;
+	for (dev = 0; dev < MV88E6XXX_MAX_PVT_SWITCHES; dev++) {
+		for (port = 0; port < MV88E6XXX_MAX_PVT_PORTS; port++) {
+			err = mv88e6xxx_g2_pvt_read(chip, dev, port, cur);
+			if (err)
+				break;
+
+			cur++;
+		}
+	}
+
+	mv88e6xxx_reg_unlock(chip);
+
+	if (err) {
+		kfree(pvt);
+		return err;
+	}
+
+	*data = (u8 *)pvt;
+	return 0;
+}
+
 static int mv88e6xxx_region_port_snapshot(struct devlink_port *devlink_port,
 					  const struct devlink_port_region_ops *ops,
 					  struct netlink_ext_ack *extack,
@@ -567,6 +605,12 @@ static struct devlink_region_ops mv88e6xxx_region_vtu_ops = {
 	.destructor = kfree,
 };
 
+static struct devlink_region_ops mv88e6xxx_region_pvt_ops = {
+	.name = "pvt",
+	.snapshot = mv88e6xxx_region_pvt_snapshot,
+	.destructor = kfree,
+};
+
 static const struct devlink_port_region_ops mv88e6xxx_region_port_ops = {
 	.name = "port",
 	.snapshot = mv88e6xxx_region_port_snapshot,
@@ -576,6 +620,8 @@ static const struct devlink_port_region_ops mv88e6xxx_region_port_ops = {
 struct mv88e6xxx_region {
 	struct devlink_region_ops *ops;
 	u64 size;
+
+	bool (*cond)(struct mv88e6xxx_chip *chip);
 };
 
 static struct mv88e6xxx_region mv88e6xxx_regions[] = {
@@ -594,6 +640,11 @@ static struct mv88e6xxx_region mv88e6xxx_regions[] = {
 		.ops = &mv88e6xxx_region_vtu_ops
 	  /* calculated at runtime */
 	},
+	[MV88E6XXX_REGION_PVT] = {
+		.ops = &mv88e6xxx_region_pvt_ops,
+		.size = MV88E6XXX_MAX_PVT_ENTRIES * sizeof(u16),
+		.cond = mv88e6xxx_has_pvt,
+	},
 };
 
 static void
@@ -663,6 +714,7 @@ out:
 static int mv88e6xxx_setup_devlink_regions_global(struct dsa_switch *ds,
 						  struct mv88e6xxx_chip *chip)
 {
+	bool (*cond)(struct mv88e6xxx_chip *chip);
 	struct devlink_region_ops *ops;
 	struct devlink_region *region;
 	u64 size;
@@ -671,6 +723,10 @@ static int mv88e6xxx_setup_devlink_regions_global(struct dsa_switch *ds,
 	for (i = 0; i < ARRAY_SIZE(mv88e6xxx_regions); i++) {
 		ops = mv88e6xxx_regions[i].ops;
 		size = mv88e6xxx_regions[i].size;
+		cond = mv88e6xxx_regions[i].cond;
+
+		if (cond && !cond(chip))
+			continue;
 
 		switch (i) {
 		case MV88E6XXX_REGION_ATU:
diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c
index da8bac8813e1..fa65ecd9cb85 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.c
+++ b/drivers/net/dsa/mv88e6xxx/global2.c
@@ -239,6 +239,23 @@ static int mv88e6xxx_g2_pvt_op(struct mv88e6xxx_chip *chip, int src_dev,
 	return mv88e6xxx_g2_pvt_op_wait(chip);
 }
 
+int mv88e6xxx_g2_pvt_read(struct mv88e6xxx_chip *chip, int src_dev,
+			  int src_port, u16 *data)
+{
+	int err;
+
+	err = mv88e6xxx_g2_pvt_op_wait(chip);
+	if (err)
+		return err;
+
+	err = mv88e6xxx_g2_pvt_op(chip, src_dev, src_port,
+				  MV88E6XXX_G2_PVT_ADDR_OP_READ);
+	if (err)
+		return err;
+
+	return mv88e6xxx_g2_read(chip, MV88E6XXX_G2_PVT_DATA, data);
+}
+
 int mv88e6xxx_g2_pvt_write(struct mv88e6xxx_chip *chip, int src_dev,
 			   int src_port, u16 data)
 {
diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
index 8f85c23ec9c7..f3e27573a386 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.h
+++ b/drivers/net/dsa/mv88e6xxx/global2.h
@@ -330,6 +330,8 @@ int mv88e6xxx_g2_get_eeprom16(struct mv88e6xxx_chip *chip,
 int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip,
 			      struct ethtool_eeprom *eeprom, u8 *data);
 
+int mv88e6xxx_g2_pvt_read(struct mv88e6xxx_chip *chip, int src_dev,
+			  int src_port, u16 *data);
 int mv88e6xxx_g2_pvt_write(struct mv88e6xxx_chip *chip, int src_dev,
 			   int src_port, u16 data);
 int mv88e6xxx_g2_misc_4_bit_port(struct mv88e6xxx_chip *chip);
-- 
cgit v1.2.3


From 90b669d65d99a3ee6965275269967cdee4da106e Mon Sep 17 00:00:00 2001
From: Yinjun Zhang <yinjun.zhang@corigine.com>
Date: Wed, 21 Apr 2021 11:24:15 +0200
Subject: nfp: devlink: initialize the devlink port attribute "lanes"

The number of lanes of devlink port should be correctly initialized
when registering the port, so that the input check when running
"devlink port split <port> count <N>" can pass.

Fixes: a21cf0a8330b ("devlink: Add a new devlink port lanes attribute and pass to netlink")
Signed-off-by: Yinjun Zhang <yinjun.zhang@corigine.com>
Signed-off-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/nfp_devlink.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c
index 713ee3041d49..bea978df7713 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c
@@ -364,6 +364,7 @@ int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port)
 
 	attrs.split = eth_port.is_split;
 	attrs.splittable = !attrs.split;
+	attrs.lanes = eth_port.port_lanes;
 	attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
 	attrs.phys.port_number = eth_port.label_port;
 	attrs.phys.split_subport_number = eth_port.label_subport;
-- 
cgit v1.2.3


From 53e35ebb9a17fd953d9b8fe059aaf4282fa524f2 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 21 Apr 2021 16:22:50 +0300
Subject: stmmac: intel: unlock on error path in intel_crosststamp()

We recently added some new locking to this function but one error path
was overlooked.  We need to drop the lock before returning.

Fixes: f4da56529da6 ("net: stmmac: Add support for external trigger timestamping")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index ec140fc4a0f5..bd662aaf664a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -320,6 +320,7 @@ static int intel_crosststamp(ktime_t *device,
 		acr_value |= PTP_ACR_ATSEN3;
 		break;
 	default:
+		mutex_unlock(&priv->aux_ts_lock);
 		return -EINVAL;
 	}
 	writel(acr_value, ptpaddr + PTP_ACR);
-- 
cgit v1.2.3


From 70a7c484c7c3eaa17b679db2c74ec8ecbe8dc0e8 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Wed, 21 Apr 2021 15:05:40 +0200
Subject: net: dsa: fix bridge support for drivers without port_bridge_flags
 callback

Starting with patch:
a8b659e7ff75 ("net: dsa: act as passthrough for bridge port flags")

drivers without "port_bridge_flags" callback will fail to join the bridge.
Looking at the code, -EOPNOTSUPP seems to be the proper return value,
which makes at least microchip and atheros switches work again.

Fixes: 5961d6a12c13 ("net: dsa: inherit the actual bridge port flags at join time")
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/port.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/dsa/port.c b/net/dsa/port.c
index 01e30264b25b..6379d66a6bb3 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -550,7 +550,7 @@ int dsa_port_bridge_flags(const struct dsa_port *dp,
 	struct dsa_switch *ds = dp->ds;
 
 	if (!ds->ops->port_bridge_flags)
-		return -EINVAL;
+		return -EOPNOTSUPP;
 
 	return ds->ops->port_bridge_flags(ds, dp->index, flags, extack);
 }
-- 
cgit v1.2.3


From 17cb00704c217d88a93791c914a01904e685b499 Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Wed, 21 Apr 2021 16:46:06 +0800
Subject: stmmac: intel: set TSO/TBS TX Queues default settings

TSO and TBS cannot coexist, for now we set Intel mGbE controller to use
below TX Queue mapping: TxQ0 uses TSO and the rest of TXQs supports TBS.

Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index bd662aaf664a..4bd038acb1b9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -447,6 +447,9 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
 
 		/* Disable Priority config by default */
 		plat->tx_queues_cfg[i].use_prio = false;
+		/* Default TX Q0 to use TSO and rest TXQ for TBS */
+		if (i > 0)
+			plat->tx_queues_cfg[i].tbs_en = 1;
 	}
 
 	/* FIFO size is 4096 bytes for 1 tx/rx queue */
-- 
cgit v1.2.3


From 5e6038b88a5718910dd74b949946d9d9cee9a041 Mon Sep 17 00:00:00 2001
From: Ong Boon Leong <boon.leong.ong@intel.com>
Date: Wed, 21 Apr 2021 17:11:49 +0800
Subject: net: stmmac: fix TSO and TBS feature enabling during driver open

TSO and TBS cannot co-exist and current implementation requires two
fixes:

 1) stmmac_open() does not need to call stmmac_enable_tbs() because
    the MAC is reset in stmmac_init_dma_engine() anyway.
 2) Inside stmmac_hw_setup(), we should call stmmac_enable_tso() for
    TX Q that is _not_ configured for TBS.

Fixes: 579a25a854d4 ("net: stmmac: Initial support for TBS")
Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 4749bd0af160..c6f24abf6432 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2757,8 +2757,15 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 
 	/* Enable TSO */
 	if (priv->tso) {
-		for (chan = 0; chan < tx_cnt; chan++)
+		for (chan = 0; chan < tx_cnt; chan++) {
+			struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
+
+			/* TSO and TBS cannot co-exist */
+			if (tx_q->tbs & STMMAC_TBS_AVAIL)
+				continue;
+
 			stmmac_enable_tso(priv, priv->ioaddr, 1, chan);
+		}
 	}
 
 	/* Enable Split Header */
@@ -2850,9 +2857,8 @@ static int stmmac_open(struct net_device *dev)
 		struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
 		int tbs_en = priv->plat->tx_queues_cfg[chan].tbs_en;
 
+		/* Setup per-TXQ tbs flag before TX descriptor alloc */
 		tx_q->tbs |= tbs_en ? STMMAC_TBS_AVAIL : 0;
-		if (stmmac_enable_tbs(priv, priv->ioaddr, tbs_en, chan))
-			tx_q->tbs &= ~STMMAC_TBS_AVAIL;
 	}
 
 	ret = alloc_dma_desc_resources(priv);
-- 
cgit v1.2.3


From 5718458b092bf6bf4482c5df32affba3c3259517 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Wed, 21 Apr 2021 13:52:46 +0900
Subject: net: renesas: ravb: Fix a stuck issue when a lot of frames are
 received

When a lot of frames were received in the short term, the driver
caused a stuck of receiving until a new frame was received. For example,
the following command from other device could cause this issue.

    $ sudo ping -f -l 1000 -c 1000 <this driver's ipaddress>

The previous code always cleared the interrupt flag of RX but checks
the interrupt flags in ravb_poll(). So, ravb_poll() could not call
ravb_rx() in the next time until a new RX frame was received if
ravb_rx() returned true. To fix the issue, always calls ravb_rx()
regardless the interrupt flags condition.

Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper")
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/ravb_main.c | 35 +++++++++++---------------------
 1 file changed, 12 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index eb0c03bdb12d..cad57d58d764 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -911,31 +911,20 @@ static int ravb_poll(struct napi_struct *napi, int budget)
 	int q = napi - priv->napi;
 	int mask = BIT(q);
 	int quota = budget;
-	u32 ris0, tis;
 
-	for (;;) {
-		tis = ravb_read(ndev, TIS);
-		ris0 = ravb_read(ndev, RIS0);
-		if (!((ris0 & mask) || (tis & mask)))
-			break;
+	/* Processing RX Descriptor Ring */
+	/* Clear RX interrupt */
+	ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0);
+	if (ravb_rx(ndev, &quota, q))
+		goto out;
 
-		/* Processing RX Descriptor Ring */
-		if (ris0 & mask) {
-			/* Clear RX interrupt */
-			ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0);
-			if (ravb_rx(ndev, &quota, q))
-				goto out;
-		}
-		/* Processing TX Descriptor Ring */
-		if (tis & mask) {
-			spin_lock_irqsave(&priv->lock, flags);
-			/* Clear TX interrupt */
-			ravb_write(ndev, ~(mask | TIS_RESERVED), TIS);
-			ravb_tx_free(ndev, q, true);
-			netif_wake_subqueue(ndev, q);
-			spin_unlock_irqrestore(&priv->lock, flags);
-		}
-	}
+	/* Processing RX Descriptor Ring */
+	spin_lock_irqsave(&priv->lock, flags);
+	/* Clear TX interrupt */
+	ravb_write(ndev, ~(mask | TIS_RESERVED), TIS);
+	ravb_tx_free(ndev, q, true);
+	netif_wake_subqueue(ndev, q);
+	spin_unlock_irqrestore(&priv->lock, flags);
 
 	napi_complete(napi);
 
-- 
cgit v1.2.3


From 357a07c26697a770d39d28b6b111f978deb4017d Mon Sep 17 00:00:00 2001
From: Martin Schiller <ms@dev.tdt.de>
Date: Wed, 21 Apr 2021 07:50:47 +0200
Subject: net: phy: intel-xway: enable integrated led functions

The Intel xway phys offer the possibility to deactivate the integrated
LED function and to control the LEDs manually.
If this was set by the bootloader, it must be ensured that the
integrated LED function is enabled for all LEDs when loading the driver.

Before commit 6e2d85ec0559 ("net: phy: Stop with excessive soft reset")
the LEDs were enabled by a soft-reset of the PHY (using
genphy_soft_reset). Initialize the XWAY_MDIO_LED with it's default
value (which is applied during a soft reset) instead of adding back
the soft reset. This brings back the default LED configuration while
still preventing an excessive amount of soft resets.

Fixes: 6e2d85ec0559 ("net: phy: Stop with excessive soft reset")
Signed-off-by: Martin Schiller <ms@dev.tdt.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/intel-xway.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/drivers/net/phy/intel-xway.c b/drivers/net/phy/intel-xway.c
index 6eac50d4b42f..d453ec016168 100644
--- a/drivers/net/phy/intel-xway.c
+++ b/drivers/net/phy/intel-xway.c
@@ -11,6 +11,18 @@
 
 #define XWAY_MDIO_IMASK			0x19	/* interrupt mask */
 #define XWAY_MDIO_ISTAT			0x1A	/* interrupt status */
+#define XWAY_MDIO_LED			0x1B	/* led control */
+
+/* bit 15:12 are reserved */
+#define XWAY_MDIO_LED_LED3_EN		BIT(11)	/* Enable the integrated function of LED3 */
+#define XWAY_MDIO_LED_LED2_EN		BIT(10)	/* Enable the integrated function of LED2 */
+#define XWAY_MDIO_LED_LED1_EN		BIT(9)	/* Enable the integrated function of LED1 */
+#define XWAY_MDIO_LED_LED0_EN		BIT(8)	/* Enable the integrated function of LED0 */
+/* bit 7:4 are reserved */
+#define XWAY_MDIO_LED_LED3_DA		BIT(3)	/* Direct Access to LED3 */
+#define XWAY_MDIO_LED_LED2_DA		BIT(2)	/* Direct Access to LED2 */
+#define XWAY_MDIO_LED_LED1_DA		BIT(1)	/* Direct Access to LED1 */
+#define XWAY_MDIO_LED_LED0_DA		BIT(0)	/* Direct Access to LED0 */
 
 #define XWAY_MDIO_INIT_WOL		BIT(15)	/* Wake-On-LAN */
 #define XWAY_MDIO_INIT_MSRE		BIT(14)
@@ -159,6 +171,15 @@ static int xway_gphy_config_init(struct phy_device *phydev)
 	/* Clear all pending interrupts */
 	phy_read(phydev, XWAY_MDIO_ISTAT);
 
+	/* Ensure that integrated led function is enabled for all leds */
+	err = phy_write(phydev, XWAY_MDIO_LED,
+			XWAY_MDIO_LED_LED0_EN |
+			XWAY_MDIO_LED_LED1_EN |
+			XWAY_MDIO_LED_LED2_EN |
+			XWAY_MDIO_LED_LED3_EN);
+	if (err)
+		return err;
+
 	phy_write_mmd(phydev, MDIO_MMD_VEND2, XWAY_MMD_LEDCH,
 		      XWAY_MMD_LEDCH_NACS_NONE |
 		      XWAY_MMD_LEDCH_SBF_F02HZ |
-- 
cgit v1.2.3


From 47a017f33943278570c072bc71681809b2567b3a Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Wed, 21 Apr 2021 10:40:07 -0700
Subject: net: qrtr: Avoid potential use after free in MHI send

It is possible that the MHI ul_callback will be invoked immediately
following the queueing of the skb for transmission, leading to the
callback decrementing the refcount of the associated sk and freeing the
skb.

As such the dereference of skb and the increment of the sk refcount must
happen before the skb is queued, to avoid the skb to be used after free
and potentially the sk to drop its last refcount..

Fixes: 6e728f321393 ("net: qrtr: Add MHI transport layer")
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/qrtr/mhi.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c
index 2bf2b1943e61..fa611678af05 100644
--- a/net/qrtr/mhi.c
+++ b/net/qrtr/mhi.c
@@ -50,6 +50,9 @@ static int qcom_mhi_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
 	struct qrtr_mhi_dev *qdev = container_of(ep, struct qrtr_mhi_dev, ep);
 	int rc;
 
+	if (skb->sk)
+		sock_hold(skb->sk);
+
 	rc = skb_linearize(skb);
 	if (rc)
 		goto free_skb;
@@ -59,12 +62,11 @@ static int qcom_mhi_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
 	if (rc)
 		goto free_skb;
 
-	if (skb->sk)
-		sock_hold(skb->sk);
-
 	return rc;
 
 free_skb:
+	if (skb->sk)
+		sock_put(skb->sk);
 	kfree_skb(skb);
 
 	return rc;
-- 
cgit v1.2.3


From 36e69da892f1224dabc4a5d0a5948764c318b117 Mon Sep 17 00:00:00 2001
From: Adam Ford <aford173@gmail.com>
Date: Wed, 21 Apr 2021 09:05:05 -0500
Subject: net: ethernet: ravb: Fix release of refclk

The call to clk_disable_unprepare() can happen before priv is
initialized. This means moving clk_disable_unprepare out of
out_release into a new label.

Fixes: 8ef7adc6beb2 ("net: ethernet: ravb: Enable optional refclk")
Signed-off-by: Adam Ford <aford173@gmail.com>
Reviewed-by: Sergei Shtylyov <sergei.shtylyov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/ravb_main.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 8c84c40ab9a0..9e5dad41cdc9 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -2173,7 +2173,7 @@ static int ravb_probe(struct platform_device *pdev)
 	/* Set GTI value */
 	error = ravb_set_gti(ndev);
 	if (error)
-		goto out_release;
+		goto out_disable_refclk;
 
 	/* Request GTI loading */
 	ravb_modify(ndev, GCCR, GCCR_LTI, GCCR_LTI);
@@ -2192,7 +2192,7 @@ static int ravb_probe(struct platform_device *pdev)
 			"Cannot allocate desc base address table (size %d bytes)\n",
 			priv->desc_bat_size);
 		error = -ENOMEM;
-		goto out_release;
+		goto out_disable_refclk;
 	}
 	for (q = RAVB_BE; q < DBAT_ENTRY_NUM; q++)
 		priv->desc_bat[q].die_dt = DT_EOS;
@@ -2252,8 +2252,9 @@ out_dma_free:
 	/* Stop PTP Clock driver */
 	if (chip_id != RCAR_GEN2)
 		ravb_ptp_stop(ndev);
-out_release:
+out_disable_refclk:
 	clk_disable_unprepare(priv->refclk);
+out_release:
 	free_netdev(ndev);
 
 	pm_runtime_put(&pdev->dev);
-- 
cgit v1.2.3


From f4f4089eb145d18af93977aebdcb899d8eaa890a Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 17 Apr 2021 12:28:49 +0200
Subject: mt76: connac: move mcu_update_arp_filter in mt76_connac module

Move mt76_connac_mcu_update_arp_filter in mt76_connac module since the
code is shared between mt7615 and mt7921

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/main.c   |  9 ++++-
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    | 47 ----------------------
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  3 --
 .../net/wireless/mediatek/mt76/mt76_connac_mcu.c   | 42 +++++++++++++++++++
 .../net/wireless/mediatek/mt76/mt76_connac_mcu.h   |  3 ++
 drivers/net/wireless/mediatek/mt76/mt7921/main.c   |  8 +++-
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.c    | 44 --------------------
 drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h |  6 ---
 8 files changed, 58 insertions(+), 104 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index 62d9df47a1f6..8313bf468db2 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -578,8 +578,13 @@ static void mt7615_bss_info_changed(struct ieee80211_hw *hw,
 	if (changed & BSS_CHANGED_PS)
 		mt76_connac_mcu_set_vif_ps(&dev->mt76, vif);
 
-	if (changed & BSS_CHANGED_ARP_FILTER)
-		mt7615_mcu_update_arp_filter(hw, vif, info);
+	if ((changed & BSS_CHANGED_ARP_FILTER) &&
+	    mt7615_firmware_offload(dev)) {
+		struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
+
+		mt76_connac_mcu_update_arp_filter(&dev->mt76, &mvif->mt76,
+						  info);
+	}
 
 	if (changed & BSS_CHANGED_ASSOC)
 		mt7615_mac_set_beacon_filter(phy, vif, info->assoc);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index be976fe97290..364daef4b0be 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -2704,53 +2704,6 @@ int mt7615_mcu_set_roc(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 				 sizeof(req), false);
 }
 
-int mt7615_mcu_update_arp_filter(struct ieee80211_hw *hw,
-				 struct ieee80211_vif *vif,
-				 struct ieee80211_bss_conf *info)
-{
-	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
-	struct mt7615_dev *dev = mt7615_hw_dev(hw);
-	struct sk_buff *skb;
-	int i, len = min_t(int, info->arp_addr_cnt,
-			   IEEE80211_BSS_ARP_ADDR_LIST_LEN);
-	struct {
-		struct {
-			u8 bss_idx;
-			u8 pad[3];
-		} __packed hdr;
-		struct mt76_connac_arpns_tlv arp;
-	} req_hdr = {
-		.hdr = {
-			.bss_idx = mvif->mt76.idx,
-		},
-		.arp = {
-			.tag = cpu_to_le16(UNI_OFFLOAD_OFFLOAD_ARP),
-			.len = cpu_to_le16(sizeof(struct mt76_connac_arpns_tlv)),
-			.ips_num = len,
-			.mode = 2,  /* update */
-			.option = 1,
-		},
-	};
-
-	if (!mt7615_firmware_offload(dev))
-		return 0;
-
-	skb = mt76_mcu_msg_alloc(&dev->mt76, NULL,
-				 sizeof(req_hdr) + len * sizeof(__be32));
-	if (!skb)
-		return -ENOMEM;
-
-	skb_put_data(skb, &req_hdr, sizeof(req_hdr));
-	for (i = 0; i < len; i++) {
-		u8 *addr = (u8 *)skb_put(skb, sizeof(__be32));
-
-		memcpy(addr, &info->arp_addr_list[i], sizeof(__be32));
-	}
-
-	return mt76_mcu_skb_send_msg(&dev->mt76, skb, MCU_UNI_CMD_OFFLOAD,
-				     true);
-}
-
 int mt7615_mcu_set_p2p_oppps(struct ieee80211_hw *hw,
 			     struct ieee80211_vif *vif)
 {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 6a50338ec9f5..5262b84a28c7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -549,9 +549,6 @@ int mt7615_mac_set_beacon_filter(struct mt7615_phy *phy,
 				 bool enable);
 int mt7615_mcu_set_bss_pm(struct mt7615_dev *dev, struct ieee80211_vif *vif,
 			  bool enable);
-int mt7615_mcu_update_arp_filter(struct ieee80211_hw *hw,
-				 struct ieee80211_vif *vif,
-				 struct ieee80211_bss_conf *info);
 int __mt7663_load_firmware(struct mt7615_dev *dev);
 u32 mt7615_mcu_reg_rr(struct mt76_dev *dev, u32 offset);
 void mt7615_mcu_reg_wr(struct mt76_dev *dev, u32 offset, u32 val);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index e057347398bb..79626e60a6ff 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -1693,6 +1693,48 @@ int mt76_connac_mcu_set_rate_txpower(struct mt76_phy *phy)
 }
 EXPORT_SYMBOL_GPL(mt76_connac_mcu_set_rate_txpower);
 
+int mt76_connac_mcu_update_arp_filter(struct mt76_dev *dev,
+				      struct mt76_vif *vif,
+				      struct ieee80211_bss_conf *info)
+{
+	struct sk_buff *skb;
+	int i, len = min_t(int, info->arp_addr_cnt,
+			   IEEE80211_BSS_ARP_ADDR_LIST_LEN);
+	struct {
+		struct {
+			u8 bss_idx;
+			u8 pad[3];
+		} __packed hdr;
+		struct mt76_connac_arpns_tlv arp;
+	} req_hdr = {
+		.hdr = {
+			.bss_idx = vif->idx,
+		},
+		.arp = {
+			.tag = cpu_to_le16(UNI_OFFLOAD_OFFLOAD_ARP),
+			.len = cpu_to_le16(sizeof(struct mt76_connac_arpns_tlv)),
+			.ips_num = len,
+			.mode = 2,  /* update */
+			.option = 1,
+		},
+	};
+
+	skb = mt76_mcu_msg_alloc(dev, NULL,
+				 sizeof(req_hdr) + len * sizeof(__be32));
+	if (!skb)
+		return -ENOMEM;
+
+	skb_put_data(skb, &req_hdr, sizeof(req_hdr));
+	for (i = 0; i < len; i++) {
+		u8 *addr = (u8 *)skb_put(skb, sizeof(__be32));
+
+		memcpy(addr, &info->arp_addr_list[i], sizeof(__be32));
+	}
+
+	return mt76_mcu_skb_send_msg(dev, skb, MCU_UNI_CMD_OFFLOAD, true);
+}
+EXPORT_SYMBOL_GPL(mt76_connac_mcu_update_arp_filter);
+
 #ifdef CONFIG_PM
 
 const struct wiphy_wowlan_support mt76_connac_wowlan_support = {
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
index ff9fca52f344..abefd9d3e5ea 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
@@ -1010,6 +1010,9 @@ int mt76_connac_mcu_sched_scan_req(struct mt76_phy *phy,
 int mt76_connac_mcu_sched_scan_enable(struct mt76_phy *phy,
 				      struct ieee80211_vif *vif,
 				      bool enable);
+int mt76_connac_mcu_update_arp_filter(struct mt76_dev *dev,
+				      struct mt76_vif *vif,
+				      struct ieee80211_bss_conf *info);
 int mt76_connac_mcu_update_gtk_rekey(struct ieee80211_hw *hw,
 				     struct ieee80211_vif *vif,
 				     struct cfg80211_gtk_rekey_data *key);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 13910ac78df1..0ded32732004 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -626,8 +626,12 @@ static void mt7921_bss_info_changed(struct ieee80211_hw *hw,
 	if (changed & BSS_CHANGED_ASSOC)
 		mt7921_bss_bcnft_apply(dev, vif, info->assoc);
 
-	if (changed & BSS_CHANGED_ARP_FILTER)
-		mt7921_mcu_update_arp_filter(hw, vif, info);
+	if (changed & BSS_CHANGED_ARP_FILTER) {
+		struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv;
+
+		mt76_connac_mcu_update_arp_filter(&dev->mt76, &mvif->mt76,
+						  info);
+	}
 
 	mt7921_mutex_release(dev);
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index a360929983ea..14ba856de0b6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -1339,50 +1339,6 @@ mt7921_pm_interface_iter(void *priv, u8 *mac, struct ieee80211_vif *vif)
 	}
 }
 
-int mt7921_mcu_update_arp_filter(struct ieee80211_hw *hw,
-				 struct ieee80211_vif *vif,
-				 struct ieee80211_bss_conf *info)
-{
-	struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv;
-	struct mt7921_dev *dev = mt7921_hw_dev(hw);
-	struct sk_buff *skb;
-	int i, len = min_t(int, info->arp_addr_cnt,
-			   IEEE80211_BSS_ARP_ADDR_LIST_LEN);
-	struct {
-		struct {
-			u8 bss_idx;
-			u8 pad[3];
-		} __packed hdr;
-		struct mt76_connac_arpns_tlv arp;
-	} req_hdr = {
-		.hdr = {
-			.bss_idx = mvif->mt76.idx,
-		},
-		.arp = {
-			.tag = cpu_to_le16(UNI_OFFLOAD_OFFLOAD_ARP),
-			.len = cpu_to_le16(sizeof(struct mt76_connac_arpns_tlv)),
-			.ips_num = len,
-			.mode = 2,  /* update */
-			.option = 1,
-		},
-	};
-
-	skb = mt76_mcu_msg_alloc(&dev->mt76, NULL,
-				 sizeof(req_hdr) + len * sizeof(__be32));
-	if (!skb)
-		return -ENOMEM;
-
-	skb_put_data(skb, &req_hdr, sizeof(req_hdr));
-	for (i = 0; i < len; i++) {
-		u8 *addr = (u8 *)skb_put(skb, sizeof(__be32));
-
-		memcpy(addr, &info->arp_addr_list[i], sizeof(__be32));
-	}
-
-	return mt76_mcu_skb_send_msg(&dev->mt76, skb, MCU_UNI_CMD_OFFLOAD,
-				     true);
-}
-
 int mt7921_get_txpwr_info(struct mt7921_dev *dev, struct mt7921_txpwr *txpwr)
 {
 	struct mt7921_txpwr_event *event;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index c34cf3e3a26b..3982e074ff50 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -364,9 +364,6 @@ int mt7921_mcu_uni_bss_bcnft(struct mt7921_dev *dev, struct ieee80211_vif *vif,
 			     bool enable);
 int mt7921_mcu_set_bss_pm(struct mt7921_dev *dev, struct ieee80211_vif *vif,
 			  bool enable);
-int mt7921_mcu_update_arp_filter(struct ieee80211_hw *hw,
-				 struct ieee80211_vif *vif,
-				 struct ieee80211_bss_conf *info);
 int mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev);
 int mt7921_mcu_fw_pmctrl(struct mt7921_dev *dev);
 void mt7921_pm_wake_work(struct work_struct *work);
@@ -377,9 +374,6 @@ int mt7921_mac_set_beacon_filter(struct mt7921_phy *phy,
 				 bool enable);
 void mt7921_pm_interface_iter(void *priv, u8 *mac, struct ieee80211_vif *vif);
 void mt7921_coredump_work(struct work_struct *work);
-int mt7921_mcu_update_arp_filter(struct ieee80211_hw *hw,
-				 struct ieee80211_vif *vif,
-				 struct ieee80211_bss_conf *info);
 int mt7921_wfsys_reset(struct mt7921_dev *dev);
 int mt7921_get_txpwr_info(struct mt7921_dev *dev, struct mt7921_txpwr *txpwr);
 #endif
-- 
cgit v1.2.3


From d5a2abb0db9ea05f24d1e48d3e4787247e0c5248 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 17 Apr 2021 18:51:37 +0200
Subject: mt76: mt7921: remove leftover function declaration

Get rid of leftover mt7921_mcu_add_bss_info routine declaration

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index 3982e074ff50..3780a7f4cb16 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -257,8 +257,6 @@ void mt7921_dma_prefetch(struct mt7921_dev *dev);
 void mt7921_dma_cleanup(struct mt7921_dev *dev);
 int mt7921_run_firmware(struct mt7921_dev *dev);
 int mt7921_mcu_init(struct mt7921_dev *dev);
-int mt7921_mcu_add_bss_info(struct mt7921_phy *phy,
-			    struct ieee80211_vif *vif, int enable);
 int mt7921_mcu_add_key(struct mt7921_dev *dev, struct ieee80211_vif *vif,
 		       struct mt7921_sta *msta, struct ieee80211_key_conf *key,
 		       enum set_key_cmd cmd);
-- 
cgit v1.2.3


From fad90e43eac0434108af18e326e179d1b5153135 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 18 Apr 2021 18:45:27 +0200
Subject: mt76: mt7921: fix a race between mt7921_mcu_drv_pmctrl and
 mt7921_mcu_fw_pmctrl

Introduce a mutex in order to avoid a race between mt7921_mcu_drv_pmctrl and
mt7921_mcu_fw_pmctrl routines since they are run two independent works

Fixes: 1d8efc741df8 ("mt76: mt7921: introduce Runtime PM support")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76_connac.h |  1 +
 drivers/net/wireless/mediatek/mt76/mt7921/init.c |  1 +
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.c  | 34 +++++++++++++++++-------
 3 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac.h b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
index b811f3c410a1..2b31c9794e92 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
@@ -54,6 +54,7 @@ struct mt76_connac_pm {
 
 	struct work_struct wake_work;
 	struct completion wake_cmpl;
+	struct mutex mutex;
 
 	struct delayed_work ps_work;
 	unsigned long last_activity;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index eab6e2dcdb96..0b8a5a7f4362 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -222,6 +222,7 @@ int mt7921_register_device(struct mt7921_dev *dev)
 
 	INIT_DELAYED_WORK(&dev->pm.ps_work, mt7921_pm_power_save_work);
 	INIT_WORK(&dev->pm.wake_work, mt7921_pm_wake_work);
+	mutex_init(&dev->pm.mutex);
 	init_completion(&dev->pm.wake_cmpl);
 	spin_lock_init(&dev->pm.txq_lock);
 	set_bit(MT76_STATE_PM, &dev->mphy.state);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index 14ba856de0b6..ea00f6b6af56 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -1267,9 +1267,11 @@ int mt7921_mcu_set_bss_pm(struct mt7921_dev *dev, struct ieee80211_vif *vif,
 int mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev)
 {
 	struct mt76_phy *mphy = &dev->mt76.phy;
-	int i;
+	int i, err = 0;
 
-	if (!test_and_clear_bit(MT76_STATE_PM, &mphy->state))
+	mutex_lock(&dev->pm.mutex);
+
+	if (!test_bit(MT76_STATE_PM, &mphy->state))
 		goto out;
 
 	for (i = 0; i < MT7921_DRV_OWN_RETRY_COUNT; i++) {
@@ -1281,23 +1283,30 @@ int mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev)
 
 	if (i == MT7921_DRV_OWN_RETRY_COUNT) {
 		dev_err(dev->mt76.dev, "driver own failed\n");
-		mt7921_reset(&dev->mt76);
-		return -EIO;
+		err = -EIO;
+		goto out;
 	}
+	clear_bit(MT76_STATE_PM, &mphy->state);
 
 out:
 	dev->pm.last_activity = jiffies;
+	mutex_unlock(&dev->pm.mutex);
 
-	return 0;
+	if (err)
+		mt7921_reset(&dev->mt76);
+
+	return err;
 }
 
 int mt7921_mcu_fw_pmctrl(struct mt7921_dev *dev)
 {
 	struct mt76_phy *mphy = &dev->mt76.phy;
-	int i;
+	int i, err = 0;
+
+	mutex_lock(&dev->pm.mutex);
 
 	if (test_and_set_bit(MT76_STATE_PM, &mphy->state))
-		return 0;
+		goto out;
 
 	for (i = 0; i < MT7921_DRV_OWN_RETRY_COUNT; i++) {
 		mt76_wr(dev, MT_CONN_ON_LPCTL, PCIE_LPCR_HOST_SET_OWN);
@@ -1308,11 +1317,16 @@ int mt7921_mcu_fw_pmctrl(struct mt7921_dev *dev)
 
 	if (i == MT7921_DRV_OWN_RETRY_COUNT) {
 		dev_err(dev->mt76.dev, "firmware own failed\n");
-		mt7921_reset(&dev->mt76);
-		return -EIO;
+		clear_bit(MT76_STATE_PM, &mphy->state);
+		err = -EIO;
 	}
+out:
+	mutex_unlock(&dev->pm.mutex);
 
-	return 0;
+	if (err)
+		mt7921_reset(&dev->mt76);
+
+	return err;
 }
 
 void
-- 
cgit v1.2.3


From 7cd740f0e499d9bfd672ff1f3f6512503141abbe Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 18 Apr 2021 18:45:28 +0200
Subject: mt76: mt7663: fix a race between mt7615_mcu_drv_pmctrl and
 mt7615_mcu_fw_pmctrl

Introduce a mutex in order to avoid a race between mt7615_mcu_lp_drv_pmctrl and
mt7615_mcu_fw_pmctrl routines since they are run two independent works

Fixes: 1f549009b5b2 ("mt76: mt7615: do not request {driver,fw}_own if already granted")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/init.c |  1 +
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c  | 20 ++++++++++++++------
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index d84662fb0304..5429536e8d43 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -505,6 +505,7 @@ void mt7615_init_device(struct mt7615_dev *dev)
 
 	INIT_DELAYED_WORK(&dev->pm.ps_work, mt7615_pm_power_save_work);
 	INIT_WORK(&dev->pm.wake_work, mt7615_pm_wake_work);
+	mutex_init(&dev->pm.mutex);
 	init_completion(&dev->pm.wake_cmpl);
 	spin_lock_init(&dev->pm.txq_lock);
 	set_bit(MT76_STATE_PM, &dev->mphy.state);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 364daef4b0be..7081bb4a28bd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -323,9 +323,11 @@ static int mt7615_mcu_drv_pmctrl(struct mt7615_dev *dev)
 static int mt7615_mcu_lp_drv_pmctrl(struct mt7615_dev *dev)
 {
 	struct mt76_phy *mphy = &dev->mt76.phy;
-	int i;
+	int i, err = 0;
+
+	mutex_lock(&dev->pm.mutex);
 
-	if (!test_and_clear_bit(MT76_STATE_PM, &mphy->state))
+	if (!test_bit(MT76_STATE_PM, &mphy->state))
 		goto out;
 
 	for (i = 0; i < MT7615_DRV_OWN_RETRY_COUNT; i++) {
@@ -337,14 +339,16 @@ static int mt7615_mcu_lp_drv_pmctrl(struct mt7615_dev *dev)
 
 	if (i == MT7615_DRV_OWN_RETRY_COUNT) {
 		dev_err(dev->mt76.dev, "driver own failed\n");
-		set_bit(MT76_STATE_PM, &mphy->state);
-		return -EIO;
+		err = -EIO;
+		goto out;
 	}
+	clear_bit(MT76_STATE_PM, &mphy->state);
 
 out:
 	dev->pm.last_activity = jiffies;
+	mutex_unlock(&dev->pm.mutex);
 
-	return 0;
+	return err;
 }
 
 static int mt7615_mcu_fw_pmctrl(struct mt7615_dev *dev)
@@ -353,8 +357,10 @@ static int mt7615_mcu_fw_pmctrl(struct mt7615_dev *dev)
 	int err = 0;
 	u32 addr;
 
+	mutex_lock(&dev->pm.mutex);
+
 	if (test_and_set_bit(MT76_STATE_PM, &mphy->state))
-		return 0;
+		goto out;
 
 	mt7622_trigger_hif_int(dev, true);
 
@@ -370,6 +376,8 @@ static int mt7615_mcu_fw_pmctrl(struct mt7615_dev *dev)
 	}
 
 	mt7622_trigger_hif_int(dev, false);
+out:
+	mutex_unlock(&dev->pm.mutex);
 
 	return err;
 }
-- 
cgit v1.2.3


From 7f2bc8ba11a0e82d474f0047933c3baeebf4406c Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 18 Apr 2021 18:45:29 +0200
Subject: mt76: connac: introduce wake counter for fw_pmctrl synchronization

Introduce wake counter and related spinlock in order to synchronize
tx/rx path and fw_pmctrl request.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76_connac.h | 31 ++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac.h b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
index 2b31c9794e92..85846eab8d7d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
@@ -54,6 +54,11 @@ struct mt76_connac_pm {
 
 	struct work_struct wake_work;
 	struct completion wake_cmpl;
+
+	struct {
+		spinlock_t lock;
+		u32 count;
+	} wake;
 	struct mutex mutex;
 
 	struct delayed_work ps_work;
@@ -85,6 +90,32 @@ void mt76_connac_power_save_sched(struct mt76_phy *phy,
 void mt76_connac_free_pending_tx_skbs(struct mt76_connac_pm *pm,
 				      struct mt76_wcid *wcid);
 
+static inline bool
+mt76_connac_pm_ref(struct mt76_phy *phy, struct mt76_connac_pm *pm)
+{
+	bool ret = false;
+
+	spin_lock_bh(&pm->wake.lock);
+	if (test_bit(MT76_STATE_PM, &phy->state))
+		goto out;
+
+	pm->wake.count++;
+	ret = true;
+out:
+	spin_unlock_bh(&pm->wake.lock);
+
+	return ret;
+}
+
+static inline void
+mt76_connac_pm_unref(struct mt76_connac_pm *pm)
+{
+	spin_lock_bh(&pm->wake.lock);
+	pm->wake.count--;
+	pm->last_activity = jiffies;
+	spin_unlock_bh(&pm->wake.lock);
+}
+
 static inline void
 mt76_connac_mutex_acquire(struct mt76_dev *dev, struct mt76_connac_pm *pm)
 	__acquires(&dev->mutex)
-- 
cgit v1.2.3


From 9800462ddc58ace3d96a006156ba6764824992f2 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 18 Apr 2021 18:45:30 +0200
Subject: mt76: mt7921: rely on mt76_connac_pm_ref/mt76_connac_pm_unref in tx
 path

Introduce mt7921_tx_worker routine as mt76 tx worker callback for
mt7921.
Rely on mt76_connac_pm_ref/mt76_connac_pm_unref to check PM state and
increment/decrement wake counter

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/init.c   |  2 ++
 drivers/net/wireless/mediatek/mt76/mt7921/main.c   | 23 +++++++++-------------
 drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h |  2 ++
 3 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index 0b8a5a7f4362..ce0e231ab772 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -219,9 +219,11 @@ int mt7921_register_device(struct mt7921_dev *dev)
 	dev->phy.dev = dev;
 	dev->phy.mt76 = &dev->mt76.phy;
 	dev->mt76.phy.priv = &dev->phy;
+	dev->mt76.tx_worker.fn = mt7921_tx_worker;
 
 	INIT_DELAYED_WORK(&dev->pm.ps_work, mt7921_pm_power_save_work);
 	INIT_WORK(&dev->pm.wake_work, mt7921_pm_wake_work);
+	spin_lock_init(&dev->pm.wake.lock);
 	mutex_init(&dev->pm.mutex);
 	init_completion(&dev->pm.wake_cmpl);
 	spin_lock_init(&dev->pm.txq_lock);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 0ded32732004..f53a1326d8d8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -725,23 +725,18 @@ void mt7921_mac_sta_remove(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 	mt76_connac_power_save_sched(&dev->mphy, &dev->pm);
 }
 
-static void
-mt7921_wake_tx_queue(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
+void mt7921_tx_worker(struct mt76_worker *w)
 {
-	struct mt7921_dev *dev = mt7921_hw_dev(hw);
-	struct mt7921_phy *phy = mt7921_hw_phy(hw);
-	struct mt76_phy *mphy = phy->mt76;
-
-	if (!test_bit(MT76_STATE_RUNNING, &mphy->state))
-		return;
+	struct mt7921_dev *dev = container_of(w, struct mt7921_dev,
+					      mt76.tx_worker);
 
-	if (test_bit(MT76_STATE_PM, &mphy->state)) {
+	if (!mt76_connac_pm_ref(&dev->mphy, &dev->pm)) {
 		queue_work(dev->mt76.wq, &dev->pm.wake_work);
 		return;
 	}
 
-	dev->pm.last_activity = jiffies;
-	mt76_worker_schedule(&dev->mt76.tx_worker);
+	mt76_txq_schedule_all(&dev->mphy);
+	mt76_connac_pm_unref(&dev->pm);
 }
 
 static void mt7921_tx(struct ieee80211_hw *hw,
@@ -769,9 +764,9 @@ static void mt7921_tx(struct ieee80211_hw *hw,
 		wcid = &mvif->sta.wcid;
 	}
 
-	if (!test_bit(MT76_STATE_PM, &mphy->state)) {
-		dev->pm.last_activity = jiffies;
+	if (mt76_connac_pm_ref(mphy, &dev->pm)) {
 		mt76_tx(mphy, control->sta, wcid, skb);
+		mt76_connac_pm_unref(&dev->pm);
 		return;
 	}
 
@@ -1200,7 +1195,7 @@ const struct ieee80211_ops mt7921_ops = {
 	.set_key = mt7921_set_key,
 	.ampdu_action = mt7921_ampdu_action,
 	.set_rts_threshold = mt7921_set_rts_threshold,
-	.wake_tx_queue = mt7921_wake_tx_queue,
+	.wake_tx_queue = mt76_wake_tx_queue,
 	.release_buffered_frames = mt76_release_buffered_frames,
 	.get_txpower = mt76_get_txpower,
 	.get_stats = mt7921_get_stats,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index 3780a7f4cb16..a794c074867e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -336,6 +336,8 @@ int mt7921_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
 			  enum mt76_txq_id qid, struct mt76_wcid *wcid,
 			  struct ieee80211_sta *sta,
 			  struct mt76_tx_info *tx_info);
+
+void mt7921_tx_worker(struct mt76_worker *w);
 void mt7921_tx_complete_skb(struct mt76_dev *mdev, struct mt76_queue_entry *e);
 int mt7921_init_tx_queues(struct mt7921_phy *phy, int idx, int n_desc);
 void mt7921_tx_token_put(struct mt7921_dev *dev);
-- 
cgit v1.2.3


From 335e97ace24ade90aa5d5e8713bc448d2c276322 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 18 Apr 2021 18:45:31 +0200
Subject: mt76: mt7663: rely on mt76_connac_pm_ref/mt76_connac_pm_unref in tx
 path

Introduce mt7615_tx_worker routine as mt76 tx worker callback for
mt7663.
Rely on mt76_connac_pm_ref/mt76_connac_pm_unref to check PM state and
increment/decrement wake counter

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76.h          |  1 +
 drivers/net/wireless/mediatek/mt76/mt7615/init.c   |  2 ++
 drivers/net/wireless/mediatek/mt76/mt7615/main.c   | 27 ++++++++++------------
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  1 +
 drivers/net/wireless/mediatek/mt76/tx.c            | 12 +++++++---
 5 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 289f195a7951..0660b708156d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -1015,6 +1015,7 @@ void mt76_stop_tx_queues(struct mt76_phy *phy, struct ieee80211_sta *sta,
 void mt76_tx_check_agg_ssn(struct ieee80211_sta *sta, struct sk_buff *skb);
 void mt76_txq_schedule(struct mt76_phy *phy, enum mt76_txq_id qid);
 void mt76_txq_schedule_all(struct mt76_phy *phy);
+void mt76_tx_worker_run(struct mt76_dev *dev);
 void mt76_tx_worker(struct mt76_worker *w);
 void mt76_release_buffered_frames(struct ieee80211_hw *hw,
 				  struct ieee80211_sta *sta,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 5429536e8d43..3ef6bcdf38c4 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -502,9 +502,11 @@ void mt7615_init_device(struct mt7615_dev *dev)
 	dev->phy.dev = dev;
 	dev->phy.mt76 = &dev->mt76.phy;
 	dev->mt76.phy.priv = &dev->phy;
+	dev->mt76.tx_worker.fn = mt7615_tx_worker;
 
 	INIT_DELAYED_WORK(&dev->pm.ps_work, mt7615_pm_power_save_work);
 	INIT_WORK(&dev->pm.wake_work, mt7615_pm_wake_work);
+	spin_lock_init(&dev->pm.wake.lock);
 	mutex_init(&dev->pm.mutex);
 	init_completion(&dev->pm.wake_cmpl);
 	spin_lock_init(&dev->pm.txq_lock);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index 8313bf468db2..a3e53d3aec02 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -694,28 +694,25 @@ static void mt7615_sta_rate_tbl_update(struct ieee80211_hw *hw,
 			break;
 	}
 	msta->n_rates = i;
-	if (!test_bit(MT76_STATE_PM, &phy->mt76->state))
+	if (mt76_connac_pm_ref(phy->mt76, &dev->pm)) {
 		mt7615_mac_set_rates(phy, msta, NULL, msta->rates);
+		mt76_connac_pm_unref(&dev->pm);
+	}
 	spin_unlock_bh(&dev->mt76.lock);
 }
 
-static void
-mt7615_wake_tx_queue(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
+void mt7615_tx_worker(struct mt76_worker *w)
 {
-	struct mt7615_dev *dev = mt7615_hw_dev(hw);
-	struct mt7615_phy *phy = mt7615_hw_phy(hw);
-	struct mt76_phy *mphy = phy->mt76;
-
-	if (!test_bit(MT76_STATE_RUNNING, &mphy->state))
-		return;
+	struct mt7615_dev *dev = container_of(w, struct mt7615_dev,
+					      mt76.tx_worker);
 
-	if (test_bit(MT76_STATE_PM, &mphy->state)) {
+	if (!mt76_connac_pm_ref(&dev->mphy, &dev->pm)) {
 		queue_work(dev->mt76.wq, &dev->pm.wake_work);
 		return;
 	}
 
-	dev->pm.last_activity = jiffies;
-	mt76_worker_schedule(&dev->mt76.tx_worker);
+	mt76_tx_worker_run(&dev->mt76);
+	mt76_connac_pm_unref(&dev->pm);
 }
 
 static void mt7615_tx(struct ieee80211_hw *hw,
@@ -743,9 +740,9 @@ static void mt7615_tx(struct ieee80211_hw *hw,
 		wcid = &msta->wcid;
 	}
 
-	if (!test_bit(MT76_STATE_PM, &mphy->state)) {
-		dev->pm.last_activity = jiffies;
+	if (mt76_connac_pm_ref(mphy, &dev->pm)) {
 		mt76_tx(mphy, control->sta, wcid, skb);
+		mt76_connac_pm_unref(&dev->pm);
 		return;
 	}
 
@@ -1272,7 +1269,7 @@ const struct ieee80211_ops mt7615_ops = {
 	.sta_set_decap_offload = mt7615_sta_set_decap_offload,
 	.ampdu_action = mt7615_ampdu_action,
 	.set_rts_threshold = mt7615_set_rts_threshold,
-	.wake_tx_queue = mt7615_wake_tx_queue,
+	.wake_tx_queue = mt76_wake_tx_queue,
 	.sta_rate_tbl_update = mt7615_sta_rate_tbl_update,
 	.sw_scan_start = mt76_sw_scan,
 	.sw_scan_complete = mt76_sw_scan_complete,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 5262b84a28c7..4c533b8ffa47 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -508,6 +508,7 @@ int mt7615_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
 			  struct ieee80211_sta *sta,
 			  struct mt76_tx_info *tx_info);
 
+void mt7615_tx_worker(struct mt76_worker *w);
 void mt7615_tx_complete_skb(struct mt76_dev *mdev, struct mt76_queue_entry *e);
 void mt7615_tx_token_put(struct mt7615_dev *dev);
 void mt7615_queue_rx_skb(struct mt76_dev *mdev, enum mt76_rxq_id q,
diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c
index 04e47259ea5f..cfc7229aa7b0 100644
--- a/drivers/net/wireless/mediatek/mt76/tx.c
+++ b/drivers/net/wireless/mediatek/mt76/tx.c
@@ -540,10 +540,8 @@ void mt76_txq_schedule_all(struct mt76_phy *phy)
 }
 EXPORT_SYMBOL_GPL(mt76_txq_schedule_all);
 
-void mt76_tx_worker(struct mt76_worker *w)
+void mt76_tx_worker_run(struct mt76_dev *dev)
 {
-	struct mt76_dev *dev = container_of(w, struct mt76_dev, tx_worker);
-
 	mt76_txq_schedule_all(&dev->phy);
 	if (dev->phy2)
 		mt76_txq_schedule_all(dev->phy2);
@@ -555,6 +553,14 @@ void mt76_tx_worker(struct mt76_worker *w)
 		mt76_testmode_tx_pending(dev->phy2);
 #endif
 }
+EXPORT_SYMBOL_GPL(mt76_tx_worker_run);
+
+void mt76_tx_worker(struct mt76_worker *w)
+{
+	struct mt76_dev *dev = container_of(w, struct mt76_dev, tx_worker);
+
+	mt76_tx_worker_run(dev);
+}
 
 void mt76_stop_tx_queues(struct mt76_phy *phy, struct ieee80211_sta *sta,
 			 bool send_bar)
-- 
cgit v1.2.3


From cb8ed33d4b3f4ef8cbff2d164bffeca678427f5a Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 18 Apr 2021 18:45:32 +0200
Subject: mt76: dma: add the capability to define a custom rx napi poll routine

Add the capability to define a custom rx napi callback for each driver.
This is a preliminary patch to properly support runtime-pm on rx side

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/dma.c          | 10 +++++-----
 drivers/net/wireless/mediatek/mt76/dma.h          |  1 +
 drivers/net/wireless/mediatek/mt76/mt76.h         |  5 +++--
 drivers/net/wireless/mediatek/mt76/mt7603/dma.c   |  2 +-
 drivers/net/wireless/mediatek/mt76/mt7615/dma.c   |  2 +-
 drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c |  2 +-
 drivers/net/wireless/mediatek/mt76/mt7915/dma.c   |  2 +-
 drivers/net/wireless/mediatek/mt76/mt7921/dma.c   |  2 +-
 8 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index 6ea58aecca41..72b1cc0ecfda 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -602,8 +602,7 @@ mt76_dma_rx_process(struct mt76_dev *dev, struct mt76_queue *q, int budget)
 	return done;
 }
 
-static int
-mt76_dma_rx_poll(struct napi_struct *napi, int budget)
+int mt76_dma_rx_poll(struct napi_struct *napi, int budget)
 {
 	struct mt76_dev *dev;
 	int qid, done = 0, cur;
@@ -626,9 +625,11 @@ mt76_dma_rx_poll(struct napi_struct *napi, int budget)
 
 	return done;
 }
+EXPORT_SYMBOL_GPL(mt76_dma_rx_poll);
 
 static int
-mt76_dma_init(struct mt76_dev *dev)
+mt76_dma_init(struct mt76_dev *dev,
+	      int (*poll)(struct napi_struct *napi, int budget))
 {
 	int i;
 
@@ -639,8 +640,7 @@ mt76_dma_init(struct mt76_dev *dev)
 	dev->napi_dev.threaded = 1;
 
 	mt76_for_each_q_rx(dev, i) {
-		netif_napi_add(&dev->napi_dev, &dev->napi[i], mt76_dma_rx_poll,
-			       64);
+		netif_napi_add(&dev->napi_dev, &dev->napi[i], poll, 64);
 		mt76_dma_rx_fill(dev, &dev->q_rx[i]);
 		napi_enable(&dev->napi[i]);
 	}
diff --git a/drivers/net/wireless/mediatek/mt76/dma.h b/drivers/net/wireless/mediatek/mt76/dma.h
index e7c27697ef04..fdf786f975ea 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.h
+++ b/drivers/net/wireless/mediatek/mt76/dma.h
@@ -45,6 +45,7 @@ enum mt76_mcu_evt_type {
 	EVT_EVENT_DFS_DETECT_RSP,
 };
 
+int mt76_dma_rx_poll(struct napi_struct *napi, int budget);
 void mt76_dma_attach(struct mt76_dev *dev);
 void mt76_dma_cleanup(struct mt76_dev *dev);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 0660b708156d..49f3e5985422 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -170,7 +170,8 @@ struct mt76_mcu_ops {
 };
 
 struct mt76_queue_ops {
-	int (*init)(struct mt76_dev *dev);
+	int (*init)(struct mt76_dev *dev,
+		    int (*poll)(struct napi_struct *napi, int budget));
 
 	int (*alloc)(struct mt76_dev *dev, struct mt76_queue *q,
 		     int idx, int n_desc, int bufsize,
@@ -802,7 +803,7 @@ static inline u16 mt76_rev(struct mt76_dev *dev)
 #define mt76xx_chip(dev) mt76_chip(&((dev)->mt76))
 #define mt76xx_rev(dev) mt76_rev(&((dev)->mt76))
 
-#define mt76_init_queues(dev)		(dev)->mt76.queue_ops->init(&((dev)->mt76))
+#define mt76_init_queues(dev, ...)		(dev)->mt76.queue_ops->init(&((dev)->mt76), __VA_ARGS__)
 #define mt76_queue_alloc(dev, ...)	(dev)->mt76.queue_ops->alloc(&((dev)->mt76), __VA_ARGS__)
 #define mt76_tx_queue_skb_raw(dev, ...)	(dev)->mt76.queue_ops->tx_queue_skb_raw(&((dev)->mt76), __VA_ARGS__)
 #define mt76_tx_queue_skb(dev, ...)	(dev)->mt76.queue_ops->tx_queue_skb(&((dev)->mt76), __VA_ARGS__)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c
index 2b6244116842..415ea17b9be6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c
@@ -219,7 +219,7 @@ int mt7603_dma_init(struct mt7603_dev *dev)
 		return ret;
 
 	mt76_wr(dev, MT_DELAY_INT_CFG, 0);
-	ret = mt76_init_queues(dev);
+	ret = mt76_init_queues(dev, mt76_dma_rx_poll);
 	if (ret)
 		return ret;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
index 642b4eab0d8b..d658555f5965 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
@@ -261,7 +261,7 @@ int mt7615_dma_init(struct mt7615_dev *dev)
 
 	mt76_wr(dev, MT_DELAY_INT_CFG, 0);
 
-	ret = mt76_init_queues(dev);
+	ret = mt76_init_queues(dev, mt76_dma_rx_poll);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
index ce1e9ad23fec..b50084bbe83d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
@@ -226,7 +226,7 @@ int mt76x02_dma_init(struct mt76x02_dev *dev)
 	if (ret)
 		return ret;
 
-	ret = mt76_init_queues(dev);
+	ret = mt76_init_queues(dev, mt76_dma_rx_poll);
 	if (ret)
 		return ret;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/dma.c b/drivers/net/wireless/mediatek/mt76/mt7915/dma.c
index 9fe09870f8f0..11d0b760abd7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/dma.c
@@ -213,7 +213,7 @@ int mt7915_dma_init(struct mt7915_dev *dev)
 			return ret;
 	}
 
-	ret = mt76_init_queues(dev);
+	ret = mt76_init_queues(dev, mt76_dma_rx_poll);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
index 992faf82ad09..b056ac15f09a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
@@ -295,7 +295,7 @@ int mt7921_dma_init(struct mt7921_dev *dev)
 	if (ret)
 		return ret;
 
-	ret = mt76_init_queues(dev);
+	ret = mt76_init_queues(dev, mt76_dma_rx_poll);
 	if (ret < 0)
 		return ret;
 
-- 
cgit v1.2.3


From 917dccb6eebcafd2a5ff73d75d2b0c5c7251e5f5 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 18 Apr 2021 18:45:33 +0200
Subject: mt76: mt7921: rely on mt76_connac_pm_ref/mt76_connac_pm_unref in
 tx/rx napi

Introduce mt7921_poll_rx rx napi callback for mt7921.
Do not access device registers in tx/rx napi if the device is not awake.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/dma.c    | 33 ++++++++++++++++++----
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c    | 11 ++++++--
 drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h |  1 +
 3 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
index b056ac15f09a..c26979614113 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
@@ -53,8 +53,7 @@ void mt7921_queue_rx_skb(struct mt76_dev *mdev, enum mt76_rxq_id q,
 	}
 }
 
-static void
-mt7921_tx_cleanup(struct mt7921_dev *dev)
+void mt7921_tx_cleanup(struct mt7921_dev *dev)
 {
 	mt76_queue_tx_cleanup(dev, dev->mt76.q_mcu[MT_MCUQ_WM], false);
 	mt76_queue_tx_cleanup(dev, dev->mt76.q_mcu[MT_MCUQ_WA], false);
@@ -66,14 +65,38 @@ static int mt7921_poll_tx(struct napi_struct *napi, int budget)
 
 	dev = container_of(napi, struct mt7921_dev, mt76.tx_napi);
 
-	mt7921_tx_cleanup(dev);
+	if (!mt76_connac_pm_ref(&dev->mphy, &dev->pm)) {
+		napi_complete(napi);
+		queue_work(dev->mt76.wq, &dev->pm.wake_work);
+		return 0;
+	}
 
-	if (napi_complete_done(napi, 0))
+	mt7921_tx_cleanup(dev);
+	if (napi_complete(napi))
 		mt7921_irq_enable(dev, MT_INT_TX_DONE_ALL);
+	mt76_connac_pm_unref(&dev->pm);
 
 	return 0;
 }
 
+static int mt7921_poll_rx(struct napi_struct *napi, int budget)
+{
+	struct mt7921_dev *dev;
+	int done;
+
+	dev = container_of(napi->dev, struct mt7921_dev, mt76.napi_dev);
+
+	if (!mt76_connac_pm_ref(&dev->mphy, &dev->pm)) {
+		napi_complete(napi);
+		queue_work(dev->mt76.wq, &dev->pm.wake_work);
+		return 0;
+	}
+	done = mt76_dma_rx_poll(napi, budget);
+	mt76_connac_pm_unref(&dev->pm);
+
+	return done;
+}
+
 void mt7921_dma_prefetch(struct mt7921_dev *dev)
 {
 #define PREFETCH(base, depth)	((base) << 16 | (depth))
@@ -295,7 +318,7 @@ int mt7921_dma_init(struct mt7921_dev *dev)
 	if (ret)
 		return ret;
 
-	ret = mt76_init_queues(dev, mt76_dma_rx_poll);
+	ret = mt76_init_queues(dev, mt7921_poll_rx);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index c8819e78cea3..def00b255495 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1367,6 +1367,7 @@ mt7921_mac_reset(struct mt7921_dev *dev)
 	mt76_worker_enable(&dev->mt76.tx_worker);
 
 	clear_bit(MT76_MCU_RESET, &dev->mphy.state);
+	clear_bit(MT76_STATE_PM, &dev->mphy.state);
 
 	mt76_wr(dev, MT_WFDMA0_HOST_INT_ENA, 0);
 	mt76_wr(dev, MT_PCIE_MAC_INT_ENABLE, 0xff);
@@ -1530,10 +1531,14 @@ void mt7921_pm_wake_work(struct work_struct *work)
 						pm.wake_work);
 	mphy = dev->phy.mt76;
 
-	if (!mt7921_mcu_drv_pmctrl(dev))
+	if (!mt7921_mcu_drv_pmctrl(dev)) {
+		int i;
+
+		mt76_for_each_q_rx(&dev->mt76, i)
+			napi_schedule(&dev->mt76.napi[i]);
 		mt76_connac_pm_dequeue_skbs(mphy, &dev->pm);
-	else
-		dev_err(mphy->dev->dev, "failed to wake device\n");
+		mt7921_tx_cleanup(dev);
+	}
 
 	ieee80211_wake_queues(mphy->hw);
 	complete_all(&dev->pm.wake_cmpl);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index a794c074867e..c9687c57cbe7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -332,6 +332,7 @@ void mt7921_mac_sta_remove(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 void mt7921_mac_work(struct work_struct *work);
 void mt7921_mac_reset_work(struct work_struct *work);
 void mt7921_reset(struct mt76_dev *mdev);
+void mt7921_tx_cleanup(struct mt7921_dev *dev);
 int mt7921_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
 			  enum mt76_txq_id qid, struct mt76_wcid *wcid,
 			  struct ieee80211_sta *sta,
-- 
cgit v1.2.3


From db928f1ab9789f99a0e57b35f3c8d652ad5350f8 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 18 Apr 2021 18:45:34 +0200
Subject: mt76: mt7663: rely on mt76_connac_pm_ref/mt76_connac_pm_unref in
 tx/rx napi

Introduce mt7615_poll_rx rx napi callback for mt7663.
Do not access device registers in tx/rx napi if the device is not awake.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/dma.c | 30 ++++++++++++++++++++++---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c | 10 ++++++---
 2 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
index d658555f5965..8004ae5c16a9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
@@ -71,15 +71,39 @@ static int mt7615_poll_tx(struct napi_struct *napi, int budget)
 	struct mt7615_dev *dev;
 
 	dev = container_of(napi, struct mt7615_dev, mt76.tx_napi);
+	if (!mt76_connac_pm_ref(&dev->mphy, &dev->pm)) {
+		napi_complete(napi);
+		queue_work(dev->mt76.wq, &dev->pm.wake_work);
+		return 0;
+	}
 
 	mt76_queue_tx_cleanup(dev, dev->mt76.q_mcu[MT_MCUQ_WM], false);
-
-	if (napi_complete_done(napi, 0))
+	if (napi_complete(napi))
 		mt7615_irq_enable(dev, mt7615_tx_mcu_int_mask(dev));
 
+	mt76_connac_pm_unref(&dev->pm);
+
 	return 0;
 }
 
+static int mt7615_poll_rx(struct napi_struct *napi, int budget)
+{
+	struct mt7615_dev *dev;
+	int done;
+
+	dev = container_of(napi->dev, struct mt7615_dev, mt76.napi_dev);
+
+	if (!mt76_connac_pm_ref(&dev->mphy, &dev->pm)) {
+		napi_complete(napi);
+		queue_work(dev->mt76.wq, &dev->pm.wake_work);
+		return 0;
+	}
+	done = mt76_dma_rx_poll(napi, budget);
+	mt76_connac_pm_unref(&dev->pm);
+
+	return done;
+}
+
 int mt7615_wait_pdma_busy(struct mt7615_dev *dev)
 {
 	struct mt76_dev *mdev = &dev->mt76;
@@ -261,7 +285,7 @@ int mt7615_dma_init(struct mt7615_dev *dev)
 
 	mt76_wr(dev, MT_DELAY_INT_CFG, 0);
 
-	ret = mt76_init_queues(dev, mt76_dma_rx_poll);
+	ret = mt76_init_queues(dev, mt7615_poll_rx);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 0b386030dbd8..e81c7d322811 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1913,10 +1913,14 @@ void mt7615_pm_wake_work(struct work_struct *work)
 						pm.wake_work);
 	mphy = dev->phy.mt76;
 
-	if (!mt7615_mcu_set_drv_ctrl(dev))
+	if (!mt7615_mcu_set_drv_ctrl(dev)) {
+		int i;
+
+		mt76_for_each_q_rx(&dev->mt76, i)
+			napi_schedule(&dev->mt76.napi[i]);
 		mt76_connac_pm_dequeue_skbs(mphy, &dev->pm);
-	else
-		dev_err(mphy->dev->dev, "failed to wake device\n");
+		mt76_queue_tx_cleanup(dev, dev->mt76.q_mcu[MT_MCUQ_WM], false);
+	}
 
 	ieee80211_wake_queues(mphy->hw);
 	complete_all(&dev->pm.wake_cmpl);
-- 
cgit v1.2.3


From 4f9b3aeb837a9df029b56179be7b0505de4400de Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 18 Apr 2021 18:45:35 +0200
Subject: mt76: connac: unschedule ps_work in mt76_connac_pm_wake

In order to avoid synchronization issues between wake and ps works,
cancel ps_work in mt76_connac_pm_wake routine

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c      | 1 -
 drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c | 1 +
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.c      | 1 -
 3 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 7081bb4a28bd..5890fee98d97 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -345,7 +345,6 @@ static int mt7615_mcu_lp_drv_pmctrl(struct mt7615_dev *dev)
 	clear_bit(MT76_STATE_PM, &mphy->state);
 
 out:
-	dev->pm.last_activity = jiffies;
 	mutex_unlock(&dev->pm.mutex);
 
 	return err;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
index c5f5037f5757..32d664ac1e35 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
@@ -13,6 +13,7 @@ int mt76_connac_pm_wake(struct mt76_phy *phy, struct mt76_connac_pm *pm)
 	if (!mt76_is_mmio(dev))
 		return 0;
 
+	cancel_delayed_work_sync(&pm->ps_work);
 	if (!test_bit(MT76_STATE_PM, &phy->state))
 		return 0;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index ea00f6b6af56..44f02cbf9cc7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -1289,7 +1289,6 @@ int mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev)
 	clear_bit(MT76_STATE_PM, &mphy->state);
 
 out:
-	dev->pm.last_activity = jiffies;
 	mutex_unlock(&dev->pm.mutex);
 
 	if (err)
-- 
cgit v1.2.3


From ec7bd7b4a9c0e7e90d23b4f6a7dca2c713fe93ab Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 18 Apr 2021 18:45:36 +0200
Subject: mt76: connac: check wake refcount in mcu_fw_pmctrl

In order to avoid synchronization races between tx and rx path, rely on
mt76_connac_skip_fw_pmctrl putting the chip in sleep mode for mt7921 and
mt7663 devices

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c  |  2 +-
 drivers/net/wireless/mediatek/mt76/mt76_connac.h | 12 ++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.c  |  2 +-
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 5890fee98d97..45c6fb5832b8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -358,7 +358,7 @@ static int mt7615_mcu_fw_pmctrl(struct mt7615_dev *dev)
 
 	mutex_lock(&dev->pm.mutex);
 
-	if (test_and_set_bit(MT76_STATE_PM, &mphy->state))
+	if (mt76_connac_skip_fw_pmctrl(mphy, &dev->pm))
 		goto out;
 
 	mt7622_trigger_hif_int(dev, true);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac.h b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
index 85846eab8d7d..116d800c9f9d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
@@ -116,6 +116,18 @@ mt76_connac_pm_unref(struct mt76_connac_pm *pm)
 	spin_unlock_bh(&pm->wake.lock);
 }
 
+static inline bool
+mt76_connac_skip_fw_pmctrl(struct mt76_phy *phy, struct mt76_connac_pm *pm)
+{
+	bool ret;
+
+	spin_lock_bh(&pm->wake.lock);
+	ret = pm->wake.count || test_and_set_bit(MT76_STATE_PM, &phy->state);
+	spin_unlock_bh(&pm->wake.lock);
+
+	return ret;
+}
+
 static inline void
 mt76_connac_mutex_acquire(struct mt76_dev *dev, struct mt76_connac_pm *pm)
 	__acquires(&dev->mutex)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index 44f02cbf9cc7..1204f5d324f8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -1304,7 +1304,7 @@ int mt7921_mcu_fw_pmctrl(struct mt7921_dev *dev)
 
 	mutex_lock(&dev->pm.mutex);
 
-	if (test_and_set_bit(MT76_STATE_PM, &mphy->state))
+	if (mt76_connac_skip_fw_pmctrl(mphy, &dev->pm))
 		goto out;
 
 	for (i = 0; i < MT7921_DRV_OWN_RETRY_COUNT; i++) {
-- 
cgit v1.2.3


From efe9ec5cec38181bf4faa871c73b63c4d25efef0 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 18 Apr 2021 18:45:37 +0200
Subject: mt76: connac: remove MT76_STATE_PM in mac_tx_free

Get rid of MT76_STATE_PM chec in mt7615_mac_tx_free and
mt7921_mac_tx_free since we already rely on mt76_connac_pm_unref in the
NAPI callback.
Remove mt76_connac_power_save_sched calls in mt7615_mac_tx_free and
mt7921_mac_tx_free

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c | 4 ----
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c | 6 ------
 2 files changed, 10 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index e81c7d322811..ad1e236727cb 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1514,14 +1514,10 @@ static void mt7615_mac_tx_free(struct mt7615_dev *dev, struct sk_buff *skb)
 
 	dev_kfree_skb(skb);
 
-	if (test_bit(MT76_STATE_PM, &dev->phy.mt76->state))
-		return;
-
 	rcu_read_lock();
 	mt7615_mac_sta_poll(dev);
 	rcu_read_unlock();
 
-	mt76_connac_power_save_sched(&dev->mphy, &dev->pm);
 	mt76_worker_schedule(&dev->mt76.tx_worker);
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index def00b255495..a8247a6d5bc7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1043,13 +1043,7 @@ void mt7921_mac_tx_free(struct mt7921_dev *dev, struct sk_buff *skb)
 		napi_consume_skb(skb, 1);
 	}
 
-	if (test_bit(MT76_STATE_PM, &dev->phy.mt76->state))
-		return;
-
 	mt7921_mac_sta_poll(dev);
-
-	mt76_connac_power_save_sched(&dev->mphy, &dev->pm);
-
 	mt76_worker_schedule(&dev->mt76.tx_worker);
 }
 
-- 
cgit v1.2.3


From 1d4f5c68a0ed1838383013b3aca69a124b2dc9ec Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 18 Apr 2021 18:45:38 +0200
Subject: mt76: mt7921: get rid of useless MT76_STATE_PM in mt7921_mac_work

Remove useless MT76_STATE_PM check in mt7921_mac_work since
mt7921_mutex_acquire will wake up the device if necessary

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index a8247a6d5bc7..5dcb574a2768 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1493,9 +1493,6 @@ void mt7921_mac_work(struct work_struct *work)
 					       mac_work.work);
 	phy = mphy->priv;
 
-	if (test_bit(MT76_STATE_PM, &mphy->state))
-		goto out;
-
 	mt7921_mutex_acquire(phy->dev);
 
 	mt76_update_survey(mphy->dev);
@@ -1510,8 +1507,6 @@ void mt7921_mac_work(struct work_struct *work)
 	}
 
 	mt7921_mutex_release(phy->dev);
-
-out:
 	ieee80211_queue_delayed_work(phy->mt76->hw, &mphy->mac_work,
 				     MT7921_WATCHDOG_TIME);
 }
-- 
cgit v1.2.3


From a61826203ba8806b4cdffd36bafdce3e9ad35c24 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 18 Apr 2021 18:45:39 +0200
Subject: mt76: connac: alaways wake the device before scanning

move scanning check from mt76_connac_power_save_sched routine
to mt7921_pm_power_save_work/mt7615_pm_power_save_work ones

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c      | 4 ++++
 drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c | 8 --------
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c      | 4 ++++
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index ad1e236727cb..adbc726149a9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1931,6 +1931,10 @@ void mt7615_pm_power_save_work(struct work_struct *work)
 						pm.ps_work.work);
 
 	delta = dev->pm.idle_timeout;
+	if (test_bit(MT76_HW_SCANNING, &dev->mphy.state) ||
+	    test_bit(MT76_HW_SCHED_SCANNING, &dev->mphy.state))
+		goto out;
+
 	if (time_is_after_jiffies(dev->pm.last_activity + delta)) {
 		delta = dev->pm.last_activity + delta - jiffies;
 		goto out;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
index 32d664ac1e35..a263921d9f42 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
@@ -17,10 +17,6 @@ int mt76_connac_pm_wake(struct mt76_phy *phy, struct mt76_connac_pm *pm)
 	if (!test_bit(MT76_STATE_PM, &phy->state))
 		return 0;
 
-	if (test_bit(MT76_HW_SCANNING, &phy->state) ||
-	    test_bit(MT76_HW_SCHED_SCANNING, &phy->state))
-		return 0;
-
 	if (queue_work(dev->wq, &pm->wake_work))
 		reinit_completion(&pm->wake_cmpl);
 
@@ -46,10 +42,6 @@ void mt76_connac_power_save_sched(struct mt76_phy *phy,
 
 	pm->last_activity = jiffies;
 
-	if (test_bit(MT76_HW_SCANNING, &phy->state) ||
-	    test_bit(MT76_HW_SCHED_SCANNING, &phy->state))
-		return;
-
 	if (!test_bit(MT76_STATE_PM, &phy->state))
 		queue_delayed_work(dev->wq, &pm->ps_work, pm->idle_timeout);
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 5dcb574a2768..90ede75cfba8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1542,6 +1542,10 @@ void mt7921_pm_power_save_work(struct work_struct *work)
 						pm.ps_work.work);
 
 	delta = dev->pm.idle_timeout;
+	if (test_bit(MT76_HW_SCANNING, &dev->mphy.state) ||
+	    test_bit(MT76_HW_SCHED_SCANNING, &dev->mphy.state))
+		goto out;
+
 	if (time_is_after_jiffies(dev->pm.last_activity + delta)) {
 		delta = dev->pm.last_activity + delta - jiffies;
 		goto out;
-- 
cgit v1.2.3


From 75e83c2035debe419ba25f6dc66fcd11d0dc0bcd Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 18 Apr 2021 18:45:40 +0200
Subject: mt76: mt7615: rely on pm refcounting in mt7615_led_set_config

Rely on mt76_connac_pm_ref/mt76_connac_pm_unref utility routines in
mt7615_led_set_config

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
index 49540b00519d..736d19699a03 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
@@ -82,7 +82,7 @@ mt7615_led_set_config(struct led_classdev *led_cdev,
 	mt76 = container_of(led_cdev, struct mt76_dev, led_cdev);
 	dev = container_of(mt76, struct mt7615_dev, mt76);
 
-	if (test_bit(MT76_STATE_PM, &mt76->phy.state))
+	if (!mt76_connac_pm_ref(&dev->mphy, &dev->pm))
 		return;
 
 	val = FIELD_PREP(MT_LED_STATUS_DURATION, 0xffff) |
@@ -100,6 +100,8 @@ mt7615_led_set_config(struct led_classdev *led_cdev,
 		val |= MT_LED_CTRL_POLARITY(mt76->led_pin);
 	addr = mt7615_reg_map(dev, MT_LED_CTRL);
 	mt76_wr(dev, addr, val);
+
+	mt76_connac_pm_unref(&dev->pm);
 }
 
 static int
-- 
cgit v1.2.3


From 310718ba6a13a5d0d65ea1ea338ea9f9f992dacf Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 18 Apr 2021 18:45:41 +0200
Subject: mt76: connac: do not run mt76_txq_schedule_all directly

In order to not break runtime-pm, do run mt76_txq_schedule_all in
mt7615_set_channel/mt7921_set_channel but rely on mt76_worker_schedule

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/main.c | 3 +--
 drivers/net/wireless/mediatek/mt76/mt7921/main.c | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index a3e53d3aec02..39733b351ac4 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -351,8 +351,7 @@ out:
 
 	mt7615_mutex_release(dev);
 
-	mt76_txq_schedule_all(phy->mt76);
-
+	mt76_worker_schedule(&dev->mt76.tx_worker);
 	if (!mt76_testmode_enabled(phy->mt76))
 		ieee80211_queue_delayed_work(phy->mt76->hw,
 					     &phy->mt76->mac_work,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index f53a1326d8d8..3bbe7317ea3f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -395,8 +395,7 @@ out:
 	clear_bit(MT76_RESET, &phy->mt76->state);
 	mt7921_mutex_release(dev);
 
-	mt76_txq_schedule_all(phy->mt76);
-
+	mt76_worker_schedule(&dev->mt76.tx_worker);
 	ieee80211_queue_delayed_work(phy->mt76->hw, &phy->mt76->mac_work,
 				     MT7921_WATCHDOG_TIME);
 
-- 
cgit v1.2.3


From e5f35576c8a986c6456f7d0c7d0f1ff34ccaa165 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 18 Apr 2021 18:45:42 +0200
Subject: mt76: connac: use waitqueue for runtime-pm

Simplify the code using a wait_queue_head_t instead of a completion to
wait the chip is fully awake in mt76_connac_pm_wake routine

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/init.c     | 2 +-
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c      | 2 +-
 drivers/net/wireless/mediatek/mt76/mt76_connac.h     | 2 +-
 drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c | 8 ++++----
 drivers/net/wireless/mediatek/mt76/mt7921/init.c     | 2 +-
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c      | 2 +-
 6 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 3ef6bcdf38c4..894b2588e075 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -508,7 +508,7 @@ void mt7615_init_device(struct mt7615_dev *dev)
 	INIT_WORK(&dev->pm.wake_work, mt7615_pm_wake_work);
 	spin_lock_init(&dev->pm.wake.lock);
 	mutex_init(&dev->pm.mutex);
-	init_completion(&dev->pm.wake_cmpl);
+	init_waitqueue_head(&dev->pm.wait);
 	spin_lock_init(&dev->pm.txq_lock);
 	set_bit(MT76_STATE_PM, &dev->mphy.state);
 	INIT_DELAYED_WORK(&dev->mphy.mac_work, mt7615_mac_work);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index adbc726149a9..747bf90f9d8a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1919,7 +1919,7 @@ void mt7615_pm_wake_work(struct work_struct *work)
 	}
 
 	ieee80211_wake_queues(mphy->hw);
-	complete_all(&dev->pm.wake_cmpl);
+	wake_up(&dev->pm.wait);
 }
 
 void mt7615_pm_power_save_work(struct work_struct *work)
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac.h b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
index 116d800c9f9d..e3937f6d3640 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
@@ -53,7 +53,7 @@ struct mt76_connac_pm {
 	} tx_q[IEEE80211_NUM_ACS];
 
 	struct work_struct wake_work;
-	struct completion wake_cmpl;
+	wait_queue_head_t wait;
 
 	struct {
 		spinlock_t lock;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
index a263921d9f42..66f1667481e6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
@@ -17,10 +17,10 @@ int mt76_connac_pm_wake(struct mt76_phy *phy, struct mt76_connac_pm *pm)
 	if (!test_bit(MT76_STATE_PM, &phy->state))
 		return 0;
 
-	if (queue_work(dev->wq, &pm->wake_work))
-		reinit_completion(&pm->wake_cmpl);
-
-	if (!wait_for_completion_timeout(&pm->wake_cmpl, 3 * HZ)) {
+	queue_work(dev->wq, &pm->wake_work);
+	if (!wait_event_timeout(pm->wait,
+				!test_bit(MT76_STATE_PM, &phy->state),
+				3 * HZ)) {
 		ieee80211_wake_queues(phy->hw);
 		return -ETIMEDOUT;
 	}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index ce0e231ab772..2d8dba000d0b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -225,7 +225,7 @@ int mt7921_register_device(struct mt7921_dev *dev)
 	INIT_WORK(&dev->pm.wake_work, mt7921_pm_wake_work);
 	spin_lock_init(&dev->pm.wake.lock);
 	mutex_init(&dev->pm.mutex);
-	init_completion(&dev->pm.wake_cmpl);
+	init_waitqueue_head(&dev->pm.wait);
 	spin_lock_init(&dev->pm.txq_lock);
 	set_bit(MT76_STATE_PM, &dev->mphy.state);
 	INIT_LIST_HEAD(&dev->phy.stats_list);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 90ede75cfba8..7b5323181fac 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1530,7 +1530,7 @@ void mt7921_pm_wake_work(struct work_struct *work)
 	}
 
 	ieee80211_wake_queues(mphy->hw);
-	complete_all(&dev->pm.wake_cmpl);
+	wake_up(&dev->pm.wait);
 }
 
 void mt7921_pm_power_save_work(struct work_struct *work)
-- 
cgit v1.2.3


From 37a8648889f6aa398be67e254834372f5d5f8a78 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 18 Apr 2021 18:45:43 +0200
Subject: mt76: remove MT76_STATE_PM in tx path

since tx/rx path is now relying pm ref counting, get rid of MT76_STATE_PM
check in the tx path

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/tx.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c
index cfc7229aa7b0..236eaa351f53 100644
--- a/drivers/net/wireless/mediatek/mt76/tx.c
+++ b/drivers/net/wireless/mediatek/mt76/tx.c
@@ -422,8 +422,7 @@ mt76_txq_send_burst(struct mt76_phy *phy, struct mt76_queue *q,
 		return idx;
 
 	do {
-		if (test_bit(MT76_STATE_PM, &phy->state) ||
-		    test_bit(MT76_RESET, &phy->state))
+		if (test_bit(MT76_RESET, &phy->state))
 			return -EBUSY;
 
 		if (stop || mt76_txq_stopped(q))
@@ -463,8 +462,7 @@ mt76_txq_schedule_list(struct mt76_phy *phy, enum mt76_txq_id qid)
 	while (1) {
 		int n_frames = 0;
 
-		if (test_bit(MT76_STATE_PM, &phy->state) ||
-		    test_bit(MT76_RESET, &phy->state))
+		if (test_bit(MT76_RESET, &phy->state))
 			return -EBUSY;
 
 		if (dev->queue_ops->tx_cleanup &&
-- 
cgit v1.2.3


From 36873246f78a2d82eb8c43f74af52f199757dcff Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 18 Apr 2021 18:45:44 +0200
Subject: mt76: mt7921: add awake and doze time accounting

Introduce awake and doze time accounting for runtime pm.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76_connac.h   |  6 ++++
 .../net/wireless/mediatek/mt76/mt7921/debugfs.c    | 32 +++++++++++++++++++++-
 drivers/net/wireless/mediatek/mt76/mt7921/init.c   |  5 +++-
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.c    | 19 +++++++++----
 4 files changed, 55 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac.h b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
index e3937f6d3640..9e61c107c640 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
@@ -64,6 +64,12 @@ struct mt76_connac_pm {
 	struct delayed_work ps_work;
 	unsigned long last_activity;
 	unsigned long idle_timeout;
+	struct {
+		unsigned long last_wake_event;
+		unsigned long awake_time;
+		unsigned long last_doze_event;
+		unsigned long doze_time;
+	} stats;
 };
 
 struct mt76_connac_coredump {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
index 5a54cd8d2ce4..f3982578cc56 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
@@ -230,11 +230,19 @@ static int
 mt7921_pm_set(void *data, u64 val)
 {
 	struct mt7921_dev *dev = data;
+	struct mt76_connac_pm *pm = &dev->pm;
 	struct mt76_phy *mphy = dev->phy.mt76;
 
+	if (val == pm->enable)
+		return 0;
+
 	mt7921_mutex_acquire(dev);
 
-	dev->pm.enable = val;
+	if (!pm->enable) {
+		pm->stats.last_wake_event = jiffies;
+		pm->stats.last_doze_event = jiffies;
+	}
+	pm->enable = val;
 
 	ieee80211_iterate_active_interfaces(mphy->hw,
 					    IEEE80211_IFACE_ITER_RESUME_ALL,
@@ -256,6 +264,26 @@ mt7921_pm_get(void *data, u64 *val)
 
 DEFINE_DEBUGFS_ATTRIBUTE(fops_pm, mt7921_pm_get, mt7921_pm_set, "%lld\n");
 
+static int
+mt7921_pm_stats(struct seq_file *s, void *data)
+{
+	struct mt7921_dev *dev = dev_get_drvdata(s->private);
+	struct mt76_connac_pm *pm = &dev->pm;
+	unsigned long awake_time = pm->stats.awake_time;
+	unsigned long doze_time = pm->stats.doze_time;
+
+	if (!test_bit(MT76_STATE_PM, &dev->mphy.state))
+		awake_time += jiffies - pm->stats.last_wake_event;
+	else
+		doze_time += jiffies - pm->stats.last_doze_event;
+
+	seq_printf(s, "awake time: %14u\ndoze time: %15u\n",
+		   jiffies_to_msecs(awake_time),
+		   jiffies_to_msecs(doze_time));
+
+	return 0;
+}
+
 static int
 mt7921_pm_idle_timeout_set(void *data, u64 val)
 {
@@ -322,6 +350,8 @@ int mt7921_init_debugfs(struct mt7921_dev *dev)
 	debugfs_create_file("idle-timeout", 0600, dir, dev,
 			    &fops_pm_idle_timeout);
 	debugfs_create_file("chip_reset", 0600, dir, dev, &fops_reset);
+	debugfs_create_devm_seqfile(dev->mt76.dev, "runtime_pm_stats", dir,
+				    mt7921_pm_stats);
 
 	return 0;
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index 2d8dba000d0b..cec17a249a8c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -239,12 +239,15 @@ int mt7921_register_device(struct mt7921_dev *dev)
 
 	INIT_WORK(&dev->reset_work, mt7921_mac_reset_work);
 
+	dev->pm.idle_timeout = MT7921_PM_TIMEOUT;
+	dev->pm.stats.last_wake_event = jiffies;
+	dev->pm.stats.last_doze_event = jiffies;
+
 	ret = mt7921_init_hardware(dev);
 	if (ret)
 		return ret;
 
 	mt7921_init_wiphy(hw);
-	dev->pm.idle_timeout = MT7921_PM_TIMEOUT;
 	dev->mphy.sband_2g.sband.ht_cap.cap |=
 			IEEE80211_HT_CAP_LDPC_CODING |
 			IEEE80211_HT_CAP_MAX_AMSDU;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index 1204f5d324f8..c00295b63ba8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -1267,9 +1267,10 @@ int mt7921_mcu_set_bss_pm(struct mt7921_dev *dev, struct ieee80211_vif *vif,
 int mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev)
 {
 	struct mt76_phy *mphy = &dev->mt76.phy;
+	struct mt76_connac_pm *pm = &dev->pm;
 	int i, err = 0;
 
-	mutex_lock(&dev->pm.mutex);
+	mutex_lock(&pm->mutex);
 
 	if (!test_bit(MT76_STATE_PM, &mphy->state))
 		goto out;
@@ -1288,8 +1289,11 @@ int mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev)
 	}
 	clear_bit(MT76_STATE_PM, &mphy->state);
 
+	pm->stats.last_wake_event = jiffies;
+	pm->stats.doze_time += pm->stats.last_wake_event -
+			       pm->stats.last_doze_event;
 out:
-	mutex_unlock(&dev->pm.mutex);
+	mutex_unlock(&pm->mutex);
 
 	if (err)
 		mt7921_reset(&dev->mt76);
@@ -1300,11 +1304,12 @@ out:
 int mt7921_mcu_fw_pmctrl(struct mt7921_dev *dev)
 {
 	struct mt76_phy *mphy = &dev->mt76.phy;
+	struct mt76_connac_pm *pm = &dev->pm;
 	int i, err = 0;
 
-	mutex_lock(&dev->pm.mutex);
+	mutex_lock(&pm->mutex);
 
-	if (mt76_connac_skip_fw_pmctrl(mphy, &dev->pm))
+	if (mt76_connac_skip_fw_pmctrl(mphy, pm))
 		goto out;
 
 	for (i = 0; i < MT7921_DRV_OWN_RETRY_COUNT; i++) {
@@ -1319,8 +1324,12 @@ int mt7921_mcu_fw_pmctrl(struct mt7921_dev *dev)
 		clear_bit(MT76_STATE_PM, &mphy->state);
 		err = -EIO;
 	}
+
+	pm->stats.last_doze_event = jiffies;
+	pm->stats.awake_time += pm->stats.last_doze_event -
+				pm->stats.last_wake_event;
 out:
-	mutex_unlock(&dev->pm.mutex);
+	mutex_unlock(&pm->mutex);
 
 	if (err)
 		mt7921_reset(&dev->mt76);
-- 
cgit v1.2.3


From dc5d5f9d3fe4d0c26b4e4beb25d056ffcc5fbf02 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 18 Apr 2021 18:45:45 +0200
Subject: mt76: mt7921: enable sw interrupts

Enable sw interrupts in order to wake the device from deep sleep
receiving packets

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/dma.c  |  1 +
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c  |  1 +
 drivers/net/wireless/mediatek/mt76/mt7921/pci.c  | 13 +++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7921/regs.h | 17 ++++++++++-------
 4 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
index c26979614113..7ada8339b74f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
@@ -351,6 +351,7 @@ int mt7921_dma_init(struct mt7921_dev *dev)
 	/* enable interrupts for TX/RX rings */
 	mt7921_irq_enable(dev, MT_INT_RX_DONE_ALL | MT_INT_TX_DONE_ALL |
 			  MT_INT_MCU_CMD);
+	mt76_set(dev, MT_MCU2HOST_SW_INT_ENA, MT_MCU_CMD_WAKE_RX_PCIE);
 
 	return 0;
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 7b5323181fac..8770e0d93f45 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1276,6 +1276,7 @@ mt7921_dma_reset(struct mt7921_dev *dev)
 	mt7921_irq_enable(dev,
 			  MT_INT_RX_DONE_ALL | MT_INT_TX_DONE_ALL |
 			  MT_INT_MCU_CMD);
+	mt76_set(dev, MT_MCU2HOST_SW_INT_ENA, MT_MCU_CMD_WAKE_RX_PCIE);
 }
 
 void mt7921_tx_token_put(struct mt7921_dev *dev)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
index 40e2086d075c..d5da98d36f63 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
@@ -61,6 +61,18 @@ static void mt7921_irq_tasklet(unsigned long data)
 	if (intr & MT_INT_TX_DONE_MCU)
 		mask |= MT_INT_TX_DONE_MCU;
 
+	if (intr & MT_INT_MCU_CMD) {
+		u32 intr_sw;
+
+		intr_sw = mt76_rr(dev, MT_MCU_CMD);
+		/* ack MCU2HOST_SW_INT_STA */
+		mt76_wr(dev, MT_MCU_CMD, intr_sw);
+		if (intr_sw & MT_MCU_CMD_WAKE_RX_PCIE) {
+			mask |= MT_INT_RX_DONE_DATA;
+			intr |= MT_INT_RX_DONE_DATA;
+		}
+	}
+
 	mt76_set_irq_mask(&dev->mt76, MT_WFDMA0_HOST_INT_ENA, mask, 0);
 
 	if (intr & MT_INT_TX_DONE_ALL)
@@ -253,6 +265,7 @@ static int mt7921_pci_resume(struct pci_dev *pdev)
 	mt76_wr(dev, MT_PCIE_MAC_INT_ENABLE, 0xff);
 	mt7921_irq_enable(dev, MT_INT_RX_DONE_ALL | MT_INT_TX_DONE_ALL |
 			  MT_INT_MCU_CMD);
+	mt76_set(dev, MT_MCU2HOST_SW_INT_ENA, MT_MCU_CMD_WAKE_RX_PCIE);
 
 	/* put dma enabled */
 	mt76_set(dev, MT_WFDMA0_GLO_CFG,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/regs.h b/drivers/net/wireless/mediatek/mt76/mt7921/regs.h
index 76ecfea21dce..b6944c867a57 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/regs.h
@@ -251,13 +251,16 @@
 #define MT_WFDMA0_BUSY_ENA_TX_FIFO1	BIT(1)
 #define MT_WFDMA0_BUSY_ENA_RX_FIFO	BIT(2)
 
-#define MT_MCU_CMD                     MT_WFDMA0(0x1f0)
-#define MT_MCU_CMD_STOP_DMA_FW_RELOAD  BIT(1)
-#define MT_MCU_CMD_STOP_DMA            BIT(2)
-#define MT_MCU_CMD_RESET_DONE          BIT(3)
-#define MT_MCU_CMD_RECOVERY_DONE       BIT(4)
-#define MT_MCU_CMD_NORMAL_STATE	       BIT(5)
-#define MT_MCU_CMD_ERROR_MASK          GENMASK(5, 1)
+#define MT_MCU_CMD			MT_WFDMA0(0x1f0)
+#define MT_MCU_CMD_WAKE_RX_PCIE		BIT(0)
+#define MT_MCU_CMD_STOP_DMA_FW_RELOAD	BIT(1)
+#define MT_MCU_CMD_STOP_DMA		BIT(2)
+#define MT_MCU_CMD_RESET_DONE		BIT(3)
+#define MT_MCU_CMD_RECOVERY_DONE	BIT(4)
+#define MT_MCU_CMD_NORMAL_STATE		BIT(5)
+#define MT_MCU_CMD_ERROR_MASK		GENMASK(5, 1)
+
+#define MT_MCU2HOST_SW_INT_ENA		MT_WFDMA0(0x1f4)
 
 #define MT_WFDMA0_HOST_INT_STA		MT_WFDMA0(0x200)
 #define HOST_RX_DONE_INT_STS0		BIT(0)	/* Rx mcu */
-- 
cgit v1.2.3


From 4a52d6abb193aea0f2923a2c917502bd2d718630 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 19 Apr 2021 14:20:32 +0100
Subject: mt76: mt7615: Fix a dereference of pointer sta before it is null
 checked

Currently the assignment of idx dereferences pointer sta before
sta is null checked, leading to a potential null pointer dereference.
Fix this by assigning idx when it is required after the null check on
pointer sta.

Addresses-Coverity: ("Dereference before null check")
Fixes: a4a5a430b076 ("mt76: mt7615: fix TSF configuration")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c
index 4a370b9f7a17..f8d3673c2cae 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c
@@ -67,7 +67,7 @@ static int mt7663_usb_sdio_set_rates(struct mt7615_dev *dev,
 	struct mt7615_rate_desc *rate = &wrd->rate;
 	struct mt7615_sta *sta = wrd->sta;
 	u32 w5, w27, addr, val;
-	u16 idx = sta->vif->mt76.omac_idx;
+	u16 idx;
 
 	lockdep_assert_held(&dev->mt76.mutex);
 
@@ -119,6 +119,7 @@ static int mt7663_usb_sdio_set_rates(struct mt7615_dev *dev,
 
 	sta->rate_probe = sta->rateset[rate->rateset].probe_rate.idx != -1;
 
+	idx = sta->vif->mt76.omac_idx;
 	idx = idx > HW_BSSID_MAX ? HW_BSSID_0 : idx;
 	addr = idx > 1 ? MT_LPON_TCR2(idx): MT_LPON_TCR0(idx);
 
-- 
cgit v1.2.3


From 0a1059d0f06023a7d045d05055c9d2ebad3b9c9d Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 19 Apr 2021 22:20:54 +0800
Subject: mt76: mt7921: move mt7921_dma_reset in dma.c

Move mt7921_dma_reset routine in dma.c and make mt7921_dma_prefetch
static. Moreover add force parameter to mt7921_dma_reset signature.
This is a preliminary patch to reset dma mt7921_mcu_drv_pmctrl.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/dma.c    | 73 +++++++++++++++++++++-
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c    | 67 +-------------------
 drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h |  2 +-
 3 files changed, 74 insertions(+), 68 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
index 7ada8339b74f..0c577a5bac13 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
@@ -97,7 +97,7 @@ static int mt7921_poll_rx(struct napi_struct *napi, int budget)
 	return done;
 }
 
-void mt7921_dma_prefetch(struct mt7921_dev *dev)
+static void mt7921_dma_prefetch(struct mt7921_dev *dev)
 {
 #define PREFETCH(base, depth)	((base) << 16 | (depth))
 
@@ -229,6 +229,77 @@ static int mt7921_dmashdl_disabled(struct mt7921_dev *dev)
 	return 0;
 }
 
+int mt7921_dma_reset(struct mt7921_dev *dev, bool force)
+{
+	int i;
+
+	if (force) {
+		/* reset */
+		mt76_clear(dev, MT_WFDMA0_RST,
+			   MT_WFDMA0_RST_DMASHDL_ALL_RST |
+			   MT_WFDMA0_RST_LOGIC_RST);
+
+		mt76_set(dev, MT_WFDMA0_RST,
+			 MT_WFDMA0_RST_DMASHDL_ALL_RST |
+			 MT_WFDMA0_RST_LOGIC_RST);
+	}
+
+	/* disable WFDMA0 */
+	mt76_clear(dev, MT_WFDMA0_GLO_CFG,
+		   MT_WFDMA0_GLO_CFG_TX_DMA_EN | MT_WFDMA0_GLO_CFG_RX_DMA_EN |
+		   MT_WFDMA0_GLO_CFG_CSR_DISP_BASE_PTR_CHAIN_EN |
+		   MT_WFDMA0_GLO_CFG_OMIT_TX_INFO |
+		   MT_WFDMA0_GLO_CFG_OMIT_RX_INFO |
+		   MT_WFDMA0_GLO_CFG_OMIT_RX_INFO_PFET2);
+
+	if (!mt76_poll(dev, MT_WFDMA0_GLO_CFG,
+		       MT_WFDMA0_GLO_CFG_TX_DMA_BUSY |
+		       MT_WFDMA0_GLO_CFG_RX_DMA_BUSY, 0, 1000))
+		return -ETIMEDOUT;
+
+	/* reset hw queues */
+	for (i = 0; i < __MT_TXQ_MAX; i++)
+		mt76_queue_reset(dev, dev->mphy.q_tx[i]);
+
+	for (i = 0; i < __MT_MCUQ_MAX; i++)
+		mt76_queue_reset(dev, dev->mt76.q_mcu[i]);
+
+	mt76_for_each_q_rx(&dev->mt76, i)
+		mt76_queue_reset(dev, &dev->mt76.q_rx[i]);
+
+	mt76_tx_status_check(&dev->mt76, NULL, true);
+
+	/* configure perfetch settings */
+	mt7921_dma_prefetch(dev);
+
+	/* reset dma idx */
+	mt76_wr(dev, MT_WFDMA0_RST_DTX_PTR, ~0);
+
+	/* configure delay interrupt */
+	mt76_wr(dev, MT_WFDMA0_PRI_DLY_INT_CFG0, 0);
+
+	mt76_set(dev, MT_WFDMA0_GLO_CFG,
+		 MT_WFDMA0_GLO_CFG_TX_WB_DDONE |
+		 MT_WFDMA0_GLO_CFG_FIFO_LITTLE_ENDIAN |
+		 MT_WFDMA0_GLO_CFG_CLK_GAT_DIS |
+		 MT_WFDMA0_GLO_CFG_OMIT_TX_INFO |
+		 MT_WFDMA0_GLO_CFG_CSR_DISP_BASE_PTR_CHAIN_EN |
+		 MT_WFDMA0_GLO_CFG_OMIT_RX_INFO_PFET2);
+
+	mt76_set(dev, MT_WFDMA0_GLO_CFG,
+		 MT_WFDMA0_GLO_CFG_TX_DMA_EN | MT_WFDMA0_GLO_CFG_RX_DMA_EN);
+
+	mt76_set(dev, MT_WFDMA_DUMMY_CR, MT_WFDMA_NEED_REINIT);
+
+	/* enable interrupts for TX/RX rings */
+	mt7921_irq_enable(dev,
+			  MT_INT_RX_DONE_ALL | MT_INT_TX_DONE_ALL |
+			  MT_INT_MCU_CMD);
+	mt76_set(dev, MT_MCU2HOST_SW_INT_ENA, MT_MCU_CMD_WAKE_RX_PCIE);
+
+	return 0;
+}
+
 int mt7921_dma_init(struct mt7921_dev *dev)
 {
 	/* Increase buffer size to receive large VHT/HE MPDUs */
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 8770e0d93f45..8b4fbb7f304c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1214,71 +1214,6 @@ int mt7921_wfsys_reset(struct mt7921_dev *dev)
 				WFSYS_SW_INIT_DONE, WFSYS_SW_INIT_DONE, 500);
 }
 
-static void
-mt7921_dma_reset(struct mt7921_dev *dev)
-{
-	int i;
-
-	/* reset */
-	mt76_clear(dev, MT_WFDMA0_RST,
-		   MT_WFDMA0_RST_DMASHDL_ALL_RST | MT_WFDMA0_RST_LOGIC_RST);
-
-	mt76_set(dev, MT_WFDMA0_RST,
-		 MT_WFDMA0_RST_DMASHDL_ALL_RST | MT_WFDMA0_RST_LOGIC_RST);
-
-	/* disable WFDMA0 */
-	mt76_clear(dev, MT_WFDMA0_GLO_CFG,
-		   MT_WFDMA0_GLO_CFG_TX_DMA_EN | MT_WFDMA0_GLO_CFG_RX_DMA_EN |
-		   MT_WFDMA0_GLO_CFG_CSR_DISP_BASE_PTR_CHAIN_EN |
-		   MT_WFDMA0_GLO_CFG_OMIT_TX_INFO |
-		   MT_WFDMA0_GLO_CFG_OMIT_RX_INFO |
-		   MT_WFDMA0_GLO_CFG_OMIT_RX_INFO_PFET2);
-
-	mt76_poll(dev, MT_WFDMA0_GLO_CFG,
-		  MT_WFDMA0_GLO_CFG_TX_DMA_BUSY |
-		  MT_WFDMA0_GLO_CFG_RX_DMA_BUSY, 0, 1000);
-
-	/* reset hw queues */
-	for (i = 0; i < __MT_TXQ_MAX; i++)
-		mt76_queue_reset(dev, dev->mphy.q_tx[i]);
-
-	for (i = 0; i < __MT_MCUQ_MAX; i++)
-		mt76_queue_reset(dev, dev->mt76.q_mcu[i]);
-
-	mt76_for_each_q_rx(&dev->mt76, i)
-		mt76_queue_reset(dev, &dev->mt76.q_rx[i]);
-
-	mt76_tx_status_check(&dev->mt76, NULL, true);
-
-	/* configure perfetch settings */
-	mt7921_dma_prefetch(dev);
-
-	/* reset dma idx */
-	mt76_wr(dev, MT_WFDMA0_RST_DTX_PTR, ~0);
-
-	/* configure delay interrupt */
-	mt76_wr(dev, MT_WFDMA0_PRI_DLY_INT_CFG0, 0);
-
-	mt76_set(dev, MT_WFDMA0_GLO_CFG,
-		 MT_WFDMA0_GLO_CFG_TX_WB_DDONE |
-		 MT_WFDMA0_GLO_CFG_FIFO_LITTLE_ENDIAN |
-		 MT_WFDMA0_GLO_CFG_CLK_GAT_DIS |
-		 MT_WFDMA0_GLO_CFG_OMIT_TX_INFO |
-		 MT_WFDMA0_GLO_CFG_CSR_DISP_BASE_PTR_CHAIN_EN |
-		 MT_WFDMA0_GLO_CFG_OMIT_RX_INFO_PFET2);
-
-	mt76_set(dev, MT_WFDMA0_GLO_CFG,
-		 MT_WFDMA0_GLO_CFG_TX_DMA_EN | MT_WFDMA0_GLO_CFG_RX_DMA_EN);
-
-	mt76_set(dev, MT_WFDMA_DUMMY_CR, MT_WFDMA_NEED_REINIT);
-
-	/* enable interrupts for TX/RX rings */
-	mt7921_irq_enable(dev,
-			  MT_INT_RX_DONE_ALL | MT_INT_TX_DONE_ALL |
-			  MT_INT_MCU_CMD);
-	mt76_set(dev, MT_MCU2HOST_SW_INT_ENA, MT_MCU_CMD_WAKE_RX_PCIE);
-}
-
 void mt7921_tx_token_put(struct mt7921_dev *dev)
 {
 	struct mt76_txwi_cache *txwi;
@@ -1349,7 +1284,7 @@ mt7921_mac_reset(struct mt7921_dev *dev)
 		mt76_queue_rx_cleanup(dev, &dev->mt76.q_rx[i]);
 
 	mt7921_wfsys_reset(dev);
-	mt7921_dma_reset(dev);
+	mt7921_dma_reset(dev, true);
 
 	mt76_for_each_q_rx(&dev->mt76, i) {
 		mt76_queue_rx_reset(dev, i);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index c9687c57cbe7..53f9bb8c8281 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -253,7 +253,7 @@ int mt7921_eeprom_get_target_power(struct mt7921_dev *dev,
 				   u8 chain_idx);
 void mt7921_eeprom_init_sku(struct mt7921_dev *dev);
 int mt7921_dma_init(struct mt7921_dev *dev);
-void mt7921_dma_prefetch(struct mt7921_dev *dev);
+int mt7921_dma_reset(struct mt7921_dev *dev, bool force);
 void mt7921_dma_cleanup(struct mt7921_dev *dev);
 int mt7921_run_firmware(struct mt7921_dev *dev);
 int mt7921_mcu_init(struct mt7921_dev *dev);
-- 
cgit v1.2.3


From fcad15d52ef52002e069ed9a091a0c0a54691c27 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 19 Apr 2021 22:20:55 +0800
Subject: mt76: mt7921: introduce mt7921_wpdma_reset utility routine

Introduce mt7921_wpdma_reset routine to reset wpdma during chip reset
or driver_own request.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/dma.c    | 44 +++++++++++++++++++++-
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c    | 29 ++------------
 drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h |  2 +-
 3 files changed, 47 insertions(+), 28 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
index 0c577a5bac13..af4b6cf38929 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
@@ -229,7 +229,7 @@ static int mt7921_dmashdl_disabled(struct mt7921_dev *dev)
 	return 0;
 }
 
-int mt7921_dma_reset(struct mt7921_dev *dev, bool force)
+static int mt7921_dma_reset(struct mt7921_dev *dev, bool force)
 {
 	int i;
 
@@ -300,6 +300,48 @@ int mt7921_dma_reset(struct mt7921_dev *dev, bool force)
 	return 0;
 }
 
+int mt7921_wfsys_reset(struct mt7921_dev *dev)
+{
+	mt76_set(dev, 0x70002600, BIT(0));
+	msleep(200);
+	mt76_clear(dev, 0x70002600, BIT(0));
+
+	if (!__mt76_poll_msec(&dev->mt76, MT_WFSYS_SW_RST_B,
+			      WFSYS_SW_INIT_DONE, WFSYS_SW_INIT_DONE, 500))
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+int mt7921_wpdma_reset(struct mt7921_dev *dev, bool force)
+{
+	int i, err;
+
+	/* clean up hw queues */
+	for (i = 0; i < ARRAY_SIZE(dev->mt76.phy.q_tx); i++)
+		mt76_queue_tx_cleanup(dev, dev->mphy.q_tx[i], true);
+
+	for (i = 0; i < ARRAY_SIZE(dev->mt76.q_mcu); i++)
+		mt76_queue_tx_cleanup(dev, dev->mt76.q_mcu[i], true);
+
+	mt76_for_each_q_rx(&dev->mt76, i)
+		mt76_queue_rx_cleanup(dev, &dev->mt76.q_rx[i]);
+
+	if (force) {
+		err = mt7921_wfsys_reset(dev);
+		if (err)
+			return err;
+	}
+	err = mt7921_dma_reset(dev, force);
+	if (err)
+		return err;
+
+	mt76_for_each_q_rx(&dev->mt76, i)
+		mt76_queue_rx_reset(dev, i);
+
+	return 0;
+}
+
 int mt7921_dma_init(struct mt7921_dev *dev)
 {
 	/* Increase buffer size to receive large VHT/HE MPDUs */
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 8b4fbb7f304c..f1efe9eaf791 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1204,16 +1204,6 @@ void mt7921_update_channel(struct mt76_dev *mdev)
 	mt76_connac_power_save_sched(&dev->mphy, &dev->pm);
 }
 
-int mt7921_wfsys_reset(struct mt7921_dev *dev)
-{
-	mt76_set(dev, 0x70002600, BIT(0));
-	msleep(200);
-	mt76_clear(dev, 0x70002600, BIT(0));
-
-	return __mt76_poll_msec(&dev->mt76, MT_WFSYS_SW_RST_B,
-				WFSYS_SW_INIT_DONE, WFSYS_SW_INIT_DONE, 500);
-}
-
 void mt7921_tx_token_put(struct mt7921_dev *dev)
 {
 	struct mt76_txwi_cache *txwi;
@@ -1273,21 +1263,11 @@ mt7921_mac_reset(struct mt7921_dev *dev)
 	mt7921_tx_token_put(dev);
 	idr_init(&dev->token);
 
-	/* clean up hw queues */
-	for (i = 0; i < ARRAY_SIZE(dev->mt76.phy.q_tx); i++)
-		mt76_queue_tx_cleanup(dev, dev->mphy.q_tx[i], true);
-
-	for (i = 0; i < ARRAY_SIZE(dev->mt76.q_mcu); i++)
-		mt76_queue_tx_cleanup(dev, dev->mt76.q_mcu[i], true);
-
-	mt76_for_each_q_rx(&dev->mt76, i)
-		mt76_queue_rx_cleanup(dev, &dev->mt76.q_rx[i]);
-
-	mt7921_wfsys_reset(dev);
-	mt7921_dma_reset(dev, true);
+	err = mt7921_wpdma_reset(dev, true);
+	if (err)
+		return err;
 
 	mt76_for_each_q_rx(&dev->mt76, i) {
-		mt76_queue_rx_reset(dev, i);
 		napi_enable(&dev->mt76.napi[i]);
 		napi_schedule(&dev->mt76.napi[i]);
 	}
@@ -1301,9 +1281,6 @@ mt7921_mac_reset(struct mt7921_dev *dev)
 
 	mt76_wr(dev, MT_WFDMA0_HOST_INT_ENA, 0);
 	mt76_wr(dev, MT_PCIE_MAC_INT_ENABLE, 0xff);
-	mt7921_irq_enable(dev,
-			  MT_INT_RX_DONE_ALL | MT_INT_TX_DONE_ALL |
-			  MT_INT_MCU_CMD);
 
 	err = mt7921_run_firmware(dev);
 	if (err)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index 53f9bb8c8281..67a2571aa470 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -253,7 +253,7 @@ int mt7921_eeprom_get_target_power(struct mt7921_dev *dev,
 				   u8 chain_idx);
 void mt7921_eeprom_init_sku(struct mt7921_dev *dev);
 int mt7921_dma_init(struct mt7921_dev *dev);
-int mt7921_dma_reset(struct mt7921_dev *dev, bool force);
+int mt7921_wpdma_reset(struct mt7921_dev *dev, bool force);
 void mt7921_dma_cleanup(struct mt7921_dev *dev);
 int mt7921_run_firmware(struct mt7921_dev *dev);
 int mt7921_mcu_init(struct mt7921_dev *dev);
-- 
cgit v1.2.3


From 5536e7354aa8abf0e27a1bc58f4b4653b4884bdf Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 19 Apr 2021 22:20:56 +0800
Subject: mt76: mt7921: introduce mt7921_dma_{enable,disable} utilities

Introduce mt7921_dma_enable and mt7921_dma_disable utilities routine in
order for code reusing between mt7921_dma_reset and mt7921_dma_init.
This is a preliminary patch to reset dma during device driver_own
request.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/dma.c | 108 ++++++++----------------
 1 file changed, 35 insertions(+), 73 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
index af4b6cf38929..fb7f98d92377 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
@@ -221,18 +221,8 @@ static u32 mt7921_rmw(struct mt76_dev *mdev, u32 offset, u32 mask, u32 val)
 	return dev->bus_ops->rmw(mdev, addr, mask, val);
 }
 
-static int mt7921_dmashdl_disabled(struct mt7921_dev *dev)
+static int mt7921_dma_disable(struct mt7921_dev *dev, bool force)
 {
-	mt76_clear(dev, MT_WFDMA0_GLO_CFG_EXT0, MT_WFDMA0_CSR_TX_DMASHDL_ENABLE);
-	mt76_set(dev, MT_DMASHDL_SW_CONTROL, MT_DMASHDL_DMASHDL_BYPASS);
-
-	return 0;
-}
-
-static int mt7921_dma_reset(struct mt7921_dev *dev, bool force)
-{
-	int i;
-
 	if (force) {
 		/* reset */
 		mt76_clear(dev, MT_WFDMA0_RST,
@@ -244,6 +234,11 @@ static int mt7921_dma_reset(struct mt7921_dev *dev, bool force)
 			 MT_WFDMA0_RST_LOGIC_RST);
 	}
 
+	/* disable dmashdl */
+	mt76_clear(dev, MT_WFDMA0_GLO_CFG_EXT0,
+		   MT_WFDMA0_CSR_TX_DMASHDL_ENABLE);
+	mt76_set(dev, MT_DMASHDL_SW_CONTROL, MT_DMASHDL_DMASHDL_BYPASS);
+
 	/* disable WFDMA0 */
 	mt76_clear(dev, MT_WFDMA0_GLO_CFG,
 		   MT_WFDMA0_GLO_CFG_TX_DMA_EN | MT_WFDMA0_GLO_CFG_RX_DMA_EN |
@@ -257,18 +252,11 @@ static int mt7921_dma_reset(struct mt7921_dev *dev, bool force)
 		       MT_WFDMA0_GLO_CFG_RX_DMA_BUSY, 0, 1000))
 		return -ETIMEDOUT;
 
-	/* reset hw queues */
-	for (i = 0; i < __MT_TXQ_MAX; i++)
-		mt76_queue_reset(dev, dev->mphy.q_tx[i]);
-
-	for (i = 0; i < __MT_MCUQ_MAX; i++)
-		mt76_queue_reset(dev, dev->mt76.q_mcu[i]);
-
-	mt76_for_each_q_rx(&dev->mt76, i)
-		mt76_queue_reset(dev, &dev->mt76.q_rx[i]);
-
-	mt76_tx_status_check(&dev->mt76, NULL, true);
+	return 0;
+}
 
+static int mt7921_dma_enable(struct mt7921_dev *dev)
+{
 	/* configure perfetch settings */
 	mt7921_dma_prefetch(dev);
 
@@ -300,6 +288,29 @@ static int mt7921_dma_reset(struct mt7921_dev *dev, bool force)
 	return 0;
 }
 
+static int mt7921_dma_reset(struct mt7921_dev *dev, bool force)
+{
+	int i, err;
+
+	err = mt7921_dma_disable(dev, force);
+	if (err)
+		return err;
+
+	/* reset hw queues */
+	for (i = 0; i < __MT_TXQ_MAX; i++)
+		mt76_queue_reset(dev, dev->mphy.q_tx[i]);
+
+	for (i = 0; i < __MT_MCUQ_MAX; i++)
+		mt76_queue_reset(dev, dev->mt76.q_mcu[i]);
+
+	mt76_for_each_q_rx(&dev->mt76, i)
+		mt76_queue_reset(dev, &dev->mt76.q_rx[i]);
+
+	mt76_tx_status_check(&dev->mt76, NULL, true);
+
+	return mt7921_dma_enable(dev);
+}
+
 int mt7921_wfsys_reset(struct mt7921_dev *dev)
 {
 	mt76_set(dev, 0x70002600, BIT(0));
@@ -362,32 +373,10 @@ int mt7921_dma_init(struct mt7921_dev *dev)
 
 	mt76_dma_attach(&dev->mt76);
 
-	/* reset */
-	mt76_clear(dev, MT_WFDMA0_RST,
-		   MT_WFDMA0_RST_DMASHDL_ALL_RST |
-		   MT_WFDMA0_RST_LOGIC_RST);
-
-	mt76_set(dev, MT_WFDMA0_RST,
-		 MT_WFDMA0_RST_DMASHDL_ALL_RST |
-		 MT_WFDMA0_RST_LOGIC_RST);
-
-	ret = mt7921_dmashdl_disabled(dev);
+	ret = mt7921_dma_disable(dev, true);
 	if (ret)
 		return ret;
 
-	/* disable WFDMA0 */
-	mt76_clear(dev, MT_WFDMA0_GLO_CFG,
-		   MT_WFDMA0_GLO_CFG_TX_DMA_EN |
-		   MT_WFDMA0_GLO_CFG_RX_DMA_EN |
-		   MT_WFDMA0_GLO_CFG_CSR_DISP_BASE_PTR_CHAIN_EN |
-		   MT_WFDMA0_GLO_CFG_OMIT_TX_INFO |
-		   MT_WFDMA0_GLO_CFG_OMIT_RX_INFO |
-		   MT_WFDMA0_GLO_CFG_OMIT_RX_INFO_PFET2);
-
-	mt76_poll(dev, MT_WFDMA0_GLO_CFG,
-		  MT_WFDMA0_GLO_CFG_TX_DMA_BUSY |
-		  MT_WFDMA0_GLO_CFG_RX_DMA_BUSY, 0, 1000);
-
 	/* init tx queue */
 	ret = mt7921_init_tx_queues(&dev->phy, MT7921_TXQ_BAND0,
 				    MT7921_TX_RING_SIZE);
@@ -439,34 +428,7 @@ int mt7921_dma_init(struct mt7921_dev *dev)
 			  mt7921_poll_tx, NAPI_POLL_WEIGHT);
 	napi_enable(&dev->mt76.tx_napi);
 
-	/* configure perfetch settings */
-	mt7921_dma_prefetch(dev);
-
-	/* reset dma idx */
-	mt76_wr(dev, MT_WFDMA0_RST_DTX_PTR, ~0);
-
-	/* configure delay interrupt */
-	mt76_wr(dev, MT_WFDMA0_PRI_DLY_INT_CFG0, 0);
-
-	mt76_set(dev, MT_WFDMA0_GLO_CFG,
-		 MT_WFDMA0_GLO_CFG_TX_WB_DDONE |
-		 MT_WFDMA0_GLO_CFG_FIFO_LITTLE_ENDIAN |
-		 MT_WFDMA0_GLO_CFG_CLK_GAT_DIS |
-		 MT_WFDMA0_GLO_CFG_OMIT_TX_INFO |
-		 MT_WFDMA0_GLO_CFG_CSR_DISP_BASE_PTR_CHAIN_EN |
-		 MT_WFDMA0_GLO_CFG_OMIT_RX_INFO_PFET2);
-
-	mt76_set(dev, MT_WFDMA0_GLO_CFG,
-		 MT_WFDMA0_GLO_CFG_TX_DMA_EN | MT_WFDMA0_GLO_CFG_RX_DMA_EN);
-
-	mt76_set(dev, MT_WFDMA_DUMMY_CR, MT_WFDMA_NEED_REINIT);
-
-	/* enable interrupts for TX/RX rings */
-	mt7921_irq_enable(dev, MT_INT_RX_DONE_ALL | MT_INT_TX_DONE_ALL |
-			  MT_INT_MCU_CMD);
-	mt76_set(dev, MT_MCU2HOST_SW_INT_ENA, MT_MCU_CMD_WAKE_RX_PCIE);
-
-	return 0;
+	return mt7921_dma_enable(dev);
 }
 
 void mt7921_dma_cleanup(struct mt7921_dev *dev)
-- 
cgit v1.2.3


From 77ba349101ac22bae2d4e635245b60173d49de2b Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 19 Apr 2021 22:20:57 +0800
Subject: mt76: mt7921: introduce mt7921_wpdma_reinit_cond utility routine

Add mt7921_wpdma_reinit_cond to check dummy reg if driver needs to
reinitialized WPDMA after driver_own operation

Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Co-developed-by: Leon Yen <leon.yen@mediatek.com>
Signed-off-by: Leon Yen <leon.yen@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76_connac.h   |  2 ++
 .../net/wireless/mediatek/mt76/mt7921/debugfs.c    |  3 +++
 drivers/net/wireless/mediatek/mt76/mt7921/dma.c    | 25 ++++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h |  6 ++++++
 4 files changed, 36 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac.h b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
index 9e61c107c640..6c889b90fd12 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
@@ -64,11 +64,13 @@ struct mt76_connac_pm {
 	struct delayed_work ps_work;
 	unsigned long last_activity;
 	unsigned long idle_timeout;
+
 	struct {
 		unsigned long last_wake_event;
 		unsigned long awake_time;
 		unsigned long last_doze_event;
 		unsigned long doze_time;
+		unsigned int lp_wake;
 	} stats;
 };
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
index f3982578cc56..6ee423dd4027 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/debugfs.c
@@ -269,6 +269,7 @@ mt7921_pm_stats(struct seq_file *s, void *data)
 {
 	struct mt7921_dev *dev = dev_get_drvdata(s->private);
 	struct mt76_connac_pm *pm = &dev->pm;
+
 	unsigned long awake_time = pm->stats.awake_time;
 	unsigned long doze_time = pm->stats.doze_time;
 
@@ -281,6 +282,8 @@ mt7921_pm_stats(struct seq_file *s, void *data)
 		   jiffies_to_msecs(awake_time),
 		   jiffies_to_msecs(doze_time));
 
+	seq_printf(s, "low power wakes: %9d\n", pm->stats.lp_wake);
+
 	return 0;
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
index fb7f98d92377..71e664ee7652 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/dma.c
@@ -353,6 +353,31 @@ int mt7921_wpdma_reset(struct mt7921_dev *dev, bool force)
 	return 0;
 }
 
+int mt7921_wpdma_reinit_cond(struct mt7921_dev *dev)
+{
+	struct mt76_connac_pm *pm = &dev->pm;
+	int err;
+
+	/* check if the wpdma must be reinitialized */
+	if (mt7921_dma_need_reinit(dev)) {
+		/* disable interrutpts */
+		mt76_wr(dev, MT_WFDMA0_HOST_INT_ENA, 0);
+		mt76_wr(dev, MT_PCIE_MAC_INT_ENABLE, 0x0);
+
+		err = mt7921_wpdma_reset(dev, false);
+		if (err) {
+			dev_err(dev->mt76.dev, "wpdma reset failed\n");
+			return err;
+		}
+
+		/* enable interrutpts */
+		mt76_wr(dev, MT_PCIE_MAC_INT_ENABLE, 0xff);
+		pm->stats.lp_wake++;
+	}
+
+	return 0;
+}
+
 int mt7921_dma_init(struct mt7921_dev *dev)
 {
 	/* Increase buffer size to receive large VHT/HE MPDUs */
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index 67a2571aa470..5bcbf1bc0746 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -254,6 +254,7 @@ int mt7921_eeprom_get_target_power(struct mt7921_dev *dev,
 void mt7921_eeprom_init_sku(struct mt7921_dev *dev);
 int mt7921_dma_init(struct mt7921_dev *dev);
 int mt7921_wpdma_reset(struct mt7921_dev *dev, bool force);
+int mt7921_wpdma_reinit_cond(struct mt7921_dev *dev);
 void mt7921_dma_cleanup(struct mt7921_dev *dev);
 int mt7921_run_firmware(struct mt7921_dev *dev);
 int mt7921_mcu_init(struct mt7921_dev *dev);
@@ -315,6 +316,11 @@ mt7921_l1_rmw(struct mt7921_dev *dev, u32 addr, u32 mask, u32 val)
 #define mt7921_l1_set(dev, addr, val)	mt7921_l1_rmw(dev, addr, 0, val)
 #define mt7921_l1_clear(dev, addr, val)	mt7921_l1_rmw(dev, addr, val, 0)
 
+static inline bool mt7921_dma_need_reinit(struct mt7921_dev *dev)
+{
+	return !mt76_get_field(dev, MT_WFDMA_DUMMY_CR, MT_WFDMA_NEED_REINIT);
+}
+
 void mt7921_mac_init(struct mt7921_dev *dev);
 bool mt7921_mac_wtbl_update(struct mt7921_dev *dev, int idx, u32 mask);
 void mt7921_mac_reset_counters(struct mt7921_phy *phy);
-- 
cgit v1.2.3


From c0b21255de9be39498b39e0f15e7598f3991e2ea Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Mon, 19 Apr 2021 22:20:58 +0800
Subject: mt76: connac: introduce mt76_connac_mcu_set_deep_sleep utility

Introduce mt76_connac_mcu_set_deep_sleep to enable deep sleep mode
and will be activated immediately when the host returns the ownership
to the device.

Co-developed-by: Leon Yen <leon.yen@mediatek.com>
Signed-off-by: Leon Yen <leon.yen@mediatek.com>
Co-developed-by: YN Chen <YN.Chen@mediatek.com>
Signed-off-by: YN Chen <YN.Chen@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt76_connac_mcu.c   | 22 ++++++++++++++--------
 .../net/wireless/mediatek/mt76/mt76_connac_mcu.h   | 10 ++++++++++
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index 79626e60a6ff..fe0ab5e5ff81 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -1528,14 +1528,7 @@ EXPORT_SYMBOL_GPL(mt76_connac_mcu_sched_scan_enable);
 
 int mt76_connac_mcu_chip_config(struct mt76_dev *dev)
 {
-	struct {
-		__le16 id;
-		u8 type;
-		u8 resp_type;
-		__le16 data_size;
-		__le16 resv;
-		u8 data[320];
-	} req = {
+	struct mt76_connac_config req = {
 		.resp_type = 0,
 	};
 
@@ -1546,6 +1539,19 @@ int mt76_connac_mcu_chip_config(struct mt76_dev *dev)
 }
 EXPORT_SYMBOL_GPL(mt76_connac_mcu_chip_config);
 
+int mt76_connac_mcu_set_deep_sleep(struct mt76_dev *dev, bool enable)
+{
+	struct mt76_connac_config req = {
+		.resp_type = 0,
+	};
+
+	snprintf(req.data, sizeof(req.data), "KeepFullPwr %d", !enable);
+
+	return mt76_mcu_send_msg(dev, MCU_CMD_CHIP_CONFIG, &req, sizeof(req),
+				 false);
+}
+EXPORT_SYMBOL_GPL(mt76_connac_mcu_set_deep_sleep);
+
 void mt76_connac_mcu_coredump_event(struct mt76_dev *dev, struct sk_buff *skb,
 				    struct mt76_connac_coredump *coredump)
 {
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
index abefd9d3e5ea..a1096861d04a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.h
@@ -918,6 +918,15 @@ struct mt76_connac_tx_power_limit_tlv {
 	u8 pad2[32];
 } __packed;
 
+struct mt76_connac_config {
+	__le16 id;
+	u8 type;
+	u8 resp_type;
+	__le16 data_size;
+	__le16 resv;
+	u8 data[320];
+} __packed;
+
 #define to_wcid_lo(id)		FIELD_GET(GENMASK(7, 0), (u16)id)
 #define to_wcid_hi(id)		FIELD_GET(GENMASK(9, 8), (u16)id)
 
@@ -1020,6 +1029,7 @@ int mt76_connac_mcu_set_hif_suspend(struct mt76_dev *dev, bool suspend);
 void mt76_connac_mcu_set_suspend_iter(void *priv, u8 *mac,
 				      struct ieee80211_vif *vif);
 int mt76_connac_mcu_chip_config(struct mt76_dev *dev);
+int mt76_connac_mcu_set_deep_sleep(struct mt76_dev *dev, bool enable);
 void mt76_connac_mcu_coredump_event(struct mt76_dev *dev, struct sk_buff *skb,
 				    struct mt76_connac_coredump *coredump);
 int mt76_connac_mcu_set_rate_txpower(struct mt76_phy *phy);
-- 
cgit v1.2.3


From 1792eb0ecdc51282d37c7ad43167d088e2bf71df Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Mon, 19 Apr 2021 22:20:59 +0800
Subject: mt76: mt7921: enable deep sleep when the device suspends

Enable the deep sleep mode in suspend handler to reduce the power
consumption further.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/pci.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
index d5da98d36f63..25f27e9d379b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
@@ -201,6 +201,9 @@ static int mt7921_pci_suspend(struct pci_dev *pdev, pm_message_t state)
 			return err;
 	}
 
+	if (!dev->pm.enable)
+		mt76_connac_mcu_set_deep_sleep(&dev->mt76, true);
+
 	napi_disable(&mdev->tx_napi);
 	mt76_worker_disable(&mdev->tx_worker);
 
@@ -239,6 +242,10 @@ restore:
 		napi_enable(&mdev->napi[i]);
 	}
 	napi_enable(&mdev->tx_napi);
+
+	if (!dev->pm.enable)
+		mt76_connac_mcu_set_deep_sleep(&dev->mt76, false);
+
 	if (hif_suspend)
 		mt76_connac_mcu_set_hif_suspend(mdev, false);
 
@@ -261,6 +268,8 @@ static int mt7921_pci_resume(struct pci_dev *pdev)
 	if (err < 0)
 		return err;
 
+	mt7921_wpdma_reinit_cond(dev);
+
 	/* enable interrupt */
 	mt76_wr(dev, MT_PCIE_MAC_INT_ENABLE, 0xff);
 	mt7921_irq_enable(dev, MT_INT_RX_DONE_ALL | MT_INT_TX_DONE_ALL |
@@ -279,6 +288,9 @@ static int mt7921_pci_resume(struct pci_dev *pdev)
 	napi_enable(&mdev->tx_napi);
 	napi_schedule(&mdev->tx_napi);
 
+	if (!dev->pm.enable)
+		mt76_connac_mcu_set_deep_sleep(&dev->mt76, false);
+
 	if (!test_bit(MT76_STATE_SUSPEND, &dev->mphy.state))
 		err = mt76_connac_mcu_set_hif_suspend(mdev, false);
 
-- 
cgit v1.2.3


From fe3fccde8870764ba3e60610774bd7bc9f8faeff Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Mon, 19 Apr 2021 23:58:05 +0800
Subject: mt76: mt7921: fix possible invalid register access

Disable the interrupt and synchronze for the pending irq handlers to ensure
the irq tasklet is not being scheduled after the suspend to avoid the
possible invalid register access acts when the host pcie controller is
suspended.

[17932.910534] mt7921e 0000:01:00.0: pci_pm_suspend+0x0/0x22c returned 0 after 21375 usecs
[17932.910590] pcieport 0000:00:00.0: calling pci_pm_suspend+0x0/0x22c @ 18565, parent: pci0000:00
[17932.910602] pcieport 0000:00:00.0: pci_pm_suspend+0x0/0x22c returned 0 after 8 usecs
[17932.910671] mtk-pcie 11230000.pcie: calling platform_pm_suspend+0x0/0x60 @ 22783, parent: soc
[17932.910674] mtk-pcie 11230000.pcie: platform_pm_suspend+0x0/0x60 returned 0 after 0 usecs

...

17933.615352] x1 : 00000000000d4200 x0 : ffffff8269ca2300
[17933.620666] Call trace:
[17933.623127]  mt76_mmio_rr+0x28/0xf0 [mt76]
[17933.627234]  mt7921_rr+0x38/0x44 [mt7921e]
[17933.631339]  mt7921_irq_tasklet+0x54/0x1d8 [mt7921e]
[17933.636309]  tasklet_action_common+0x12c/0x16c
[17933.640754]  tasklet_action+0x24/0x2c
[17933.644418]  __do_softirq+0x16c/0x344
[17933.648082]  irq_exit+0xa8/0xac
[17933.651224]  scheduler_ipi+0xd4/0x148
[17933.654890]  handle_IPI+0x164/0x2d4
[17933.658379]  gic_handle_irq+0x140/0x178
[17933.662216]  el1_irq+0xb8/0x180
[17933.665361]  cpuidle_enter_state+0xf8/0x204
[17933.669544]  cpuidle_enter+0x38/0x4c
[17933.673122]  do_idle+0x1a4/0x2a8
[17933.676352]  cpu_startup_entry+0x24/0x28
[17933.680276]  rest_init+0xd4/0xe0
[17933.683508]  arch_call_rest_init+0x10/0x18
[17933.687606]  start_kernel+0x340/0x3b4
[17933.691279] Code: aa0003f5 d503201f f953eaa8 8b344108 (b9400113)
[17933.697373] ---[ end trace a24b8e26ffbda3c5 ]---
[17933.767846] Kernel panic - not syncing: Fatal exception in interrupt

Fixes: ffa1bf97425b ("mt76: mt7921: introduce PM support")
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/pci.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
index 25f27e9d379b..c91c02e8f183 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
@@ -210,7 +210,6 @@ static int mt7921_pci_suspend(struct pci_dev *pdev, pm_message_t state)
 	mt76_for_each_q_rx(mdev, i) {
 		napi_disable(&mdev->napi[i]);
 	}
-	tasklet_kill(&dev->irq_tasklet);
 
 	pci_enable_wake(pdev, pci_choose_state(pdev, state), true);
 
@@ -225,6 +224,9 @@ static int mt7921_pci_suspend(struct pci_dev *pdev, pm_message_t state)
 
 	/* disable interrupt */
 	mt76_wr(dev, MT_WFDMA0_HOST_INT_ENA, 0);
+	mt76_wr(dev, MT_PCIE_MAC_INT_ENABLE, 0x0);
+	synchronize_irq(pdev->irq);
+	tasklet_kill(&dev->irq_tasklet);
 
 	err = mt7921_mcu_fw_pmctrl(dev);
 	if (err)
-- 
cgit v1.2.3


From b17aff3368916136ba2a87669bb3c319e5c6d0b2 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 20 Apr 2021 23:05:31 +0200
Subject: mt76: move token_lock, token and token_count in mt76_dev

Move token_lock, token and token_count data structures in mt76_dev.
This is a preliminary patch to move token management in mt76 common
module since it is shared by mt7615, mt7915 and mt7921 drivers.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76.h          |  4 +++
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c    | 14 ++++----
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  3 --
 .../net/wireless/mediatek/mt76/mt7615/pci_init.c   |  4 +--
 .../net/wireless/mediatek/mt76/mt7615/pci_mac.c    | 14 ++++----
 drivers/net/wireless/mediatek/mt76/mt7915/init.c   |  4 +--
 drivers/net/wireless/mediatek/mt76/mt7915/mac.c    | 42 +++++++++++-----------
 drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h |  4 ---
 drivers/net/wireless/mediatek/mt76/mt7921/init.c   |  4 +--
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c    | 42 +++++++++++-----------
 drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h |  4 ---
 11 files changed, 66 insertions(+), 73 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 49f3e5985422..54adc4921d03 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -659,6 +659,10 @@ struct mt76_dev {
 	struct mt76_worker tx_worker;
 	struct napi_struct tx_napi;
 
+	spinlock_t token_lock;
+	struct idr token;
+	int token_count;
+
 	wait_queue_head_t tx_wait;
 	struct sk_buff_head status_list;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 747bf90f9d8a..e3b727c17129 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1466,9 +1466,9 @@ mt7615_mac_tx_free_token(struct mt7615_dev *dev, u16 token)
 
 	trace_mac_tx_free(dev, token);
 
-	spin_lock_bh(&dev->token_lock);
-	txwi = idr_remove(&dev->token, token);
-	spin_unlock_bh(&dev->token_lock);
+	spin_lock_bh(&mdev->token_lock);
+	txwi = idr_remove(&mdev->token, token);
+	spin_unlock_bh(&mdev->token_lock);
 
 	if (!txwi)
 		return;
@@ -1977,8 +1977,8 @@ void mt7615_tx_token_put(struct mt7615_dev *dev)
 	struct mt76_txwi_cache *txwi;
 	int id;
 
-	spin_lock_bh(&dev->token_lock);
-	idr_for_each_entry(&dev->token, txwi, id) {
+	spin_lock_bh(&dev->mt76.token_lock);
+	idr_for_each_entry(&dev->mt76.token, txwi, id) {
 		mt7615_txp_skb_unmap(&dev->mt76, txwi);
 		if (txwi->skb) {
 			struct ieee80211_hw *hw;
@@ -1988,8 +1988,8 @@ void mt7615_tx_token_put(struct mt7615_dev *dev)
 		}
 		mt76_put_txwi(&dev->mt76, txwi);
 	}
-	spin_unlock_bh(&dev->token_lock);
-	idr_destroy(&dev->token);
+	spin_unlock_bh(&dev->mt76.token_lock);
+	idr_destroy(&dev->mt76.token);
 }
 EXPORT_SYMBOL_GPL(mt7615_tx_token_put);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 4c533b8ffa47..989f05ed4377 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -263,9 +263,6 @@ struct mt7615_dev {
 	bool flash_eeprom;
 	bool dbdc_support;
 
-	spinlock_t token_lock;
-	struct idr token;
-
 	u8 fw_ver;
 
 	struct work_struct rate_work;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
index 736d19699a03..89475d5931a6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
@@ -40,8 +40,8 @@ static int mt7615_init_hardware(struct mt7615_dev *dev)
 	mt76_wr(dev, MT_INT_SOURCE_CSR, ~0);
 
 	INIT_WORK(&dev->mcu_work, mt7615_pci_init_work);
-	spin_lock_init(&dev->token_lock);
-	idr_init(&dev->token);
+	spin_lock_init(&dev->mt76.token_lock);
+	idr_init(&dev->mt76.token);
 
 	ret = mt7615_eeprom_init(dev, addr);
 	if (ret < 0)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
index d20962cdecc8..d5aff409132d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
@@ -37,9 +37,9 @@ void mt7615_tx_complete_skb(struct mt76_dev *mdev, struct mt76_queue_entry *e)
 			token = le16_to_cpu(txp->hw.msdu_id[0]) &
 				~MT_MSDU_ID_VALID;
 
-		spin_lock_bh(&dev->token_lock);
-		t = idr_remove(&dev->token, token);
-		spin_unlock_bh(&dev->token_lock);
+		spin_lock_bh(&mdev->token_lock);
+		t = idr_remove(&mdev->token, token);
+		spin_unlock_bh(&mdev->token_lock);
 		e->skb = t ? t->skb : NULL;
 	}
 
@@ -161,9 +161,9 @@ int mt7615_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
 	t = (struct mt76_txwi_cache *)(txwi + mdev->drv->txwi_size);
 	t->skb = tx_info->skb;
 
-	spin_lock_bh(&dev->token_lock);
-	id = idr_alloc(&dev->token, t, 0, MT7615_TOKEN_SIZE, GFP_ATOMIC);
-	spin_unlock_bh(&dev->token_lock);
+	spin_lock_bh(&mdev->token_lock);
+	id = idr_alloc(&mdev->token, t, 0, MT7615_TOKEN_SIZE, GFP_ATOMIC);
+	spin_unlock_bh(&mdev->token_lock);
 	if (id < 0)
 		return id;
 
@@ -314,7 +314,7 @@ void mt7615_mac_reset_work(struct work_struct *work)
 		mt7615_dma_reset(dev);
 
 		mt7615_tx_token_put(dev);
-		idr_init(&dev->token);
+		idr_init(&dev->mt76.token);
 
 		mt76_wr(dev, MT_WPDMA_MEM_RNG_ERR, 0);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index 2d6b38d39356..b97834fae0ba 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -351,8 +351,8 @@ static int mt7915_init_hardware(struct mt7915_dev *dev)
 	mt76_wr(dev, MT_INT_SOURCE_CSR, ~0);
 
 	INIT_WORK(&dev->init_work, mt7915_init_work);
-	spin_lock_init(&dev->token_lock);
-	idr_init(&dev->token);
+	spin_lock_init(&dev->mt76.token_lock);
+	idr_init(&dev->mt76.token);
 
 	dev->dbdc_support = !!(mt76_rr(dev, MT_HW_BOUND) & BIT(5));
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index 6a4b57509751..a2d60be00f9f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -1046,14 +1046,14 @@ int mt7915_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
 	t = (struct mt76_txwi_cache *)(txwi + mdev->drv->txwi_size);
 	t->skb = tx_info->skb;
 
-	spin_lock_bh(&dev->token_lock);
-	id = idr_alloc(&dev->token, t, 0, MT7915_TOKEN_SIZE, GFP_ATOMIC);
+	spin_lock_bh(&mdev->token_lock);
+	id = idr_alloc(&mdev->token, t, 0, MT7915_TOKEN_SIZE, GFP_ATOMIC);
 	if (id >= 0)
-		dev->token_count++;
+		mdev->token_count++;
 
-	if (dev->token_count >= MT7915_TOKEN_SIZE - MT7915_TOKEN_FREE_THR)
+	if (mdev->token_count >= MT7915_TOKEN_SIZE - MT7915_TOKEN_FREE_THR)
 		mt7915_set_tx_blocked(dev, true);
-	spin_unlock_bh(&dev->token_lock);
+	spin_unlock_bh(&mdev->token_lock);
 
 	if (id < 0)
 		return id;
@@ -1218,14 +1218,14 @@ void mt7915_mac_tx_free(struct mt7915_dev *dev, struct sk_buff *skb)
 		msdu = FIELD_GET(MT_TX_FREE_MSDU_ID, info);
 		stat = FIELD_GET(MT_TX_FREE_STATUS, info);
 
-		spin_lock_bh(&dev->token_lock);
-		txwi = idr_remove(&dev->token, msdu);
+		spin_lock_bh(&mdev->token_lock);
+		txwi = idr_remove(&mdev->token, msdu);
 		if (txwi)
-			dev->token_count--;
-		if (dev->token_count < MT7915_TOKEN_SIZE - MT7915_TOKEN_FREE_THR &&
+			mdev->token_count--;
+		if (mdev->token_count < MT7915_TOKEN_SIZE - MT7915_TOKEN_FREE_THR &&
 		    dev->mphy.q_tx[0]->blocked)
 			wake = true;
-		spin_unlock_bh(&dev->token_lock);
+		spin_unlock_bh(&mdev->token_lock);
 
 		if (!txwi)
 			continue;
@@ -1257,9 +1257,9 @@ void mt7915_mac_tx_free(struct mt7915_dev *dev, struct sk_buff *skb)
 	mt7915_mac_sta_poll(dev);
 
 	if (wake) {
-		spin_lock_bh(&dev->token_lock);
+		spin_lock_bh(&mdev->token_lock);
 		mt7915_set_tx_blocked(dev, false);
-		spin_unlock_bh(&dev->token_lock);
+		spin_unlock_bh(&mdev->token_lock);
 	}
 
 	mt76_worker_schedule(&dev->mt76.tx_worker);
@@ -1290,9 +1290,9 @@ void mt7915_tx_complete_skb(struct mt76_dev *mdev, struct mt76_queue_entry *e)
 
 		txp = mt7915_txwi_to_txp(mdev, e->txwi);
 
-		spin_lock_bh(&dev->token_lock);
-		t = idr_remove(&dev->token, le16_to_cpu(txp->token));
-		spin_unlock_bh(&dev->token_lock);
+		spin_lock_bh(&mdev->token_lock);
+		t = idr_remove(&mdev->token, le16_to_cpu(txp->token));
+		spin_unlock_bh(&mdev->token_lock);
 		e->skb = t ? t->skb : NULL;
 	}
 
@@ -1588,8 +1588,8 @@ void mt7915_tx_token_put(struct mt7915_dev *dev)
 	struct mt76_txwi_cache *txwi;
 	int id;
 
-	spin_lock_bh(&dev->token_lock);
-	idr_for_each_entry(&dev->token, txwi, id) {
+	spin_lock_bh(&dev->mt76.token_lock);
+	idr_for_each_entry(&dev->mt76.token, txwi, id) {
 		mt7915_txp_skb_unmap(&dev->mt76, txwi);
 		if (txwi->skb) {
 			struct ieee80211_hw *hw;
@@ -1598,10 +1598,10 @@ void mt7915_tx_token_put(struct mt7915_dev *dev)
 			ieee80211_free_txskb(hw, txwi->skb);
 		}
 		mt76_put_txwi(&dev->mt76, txwi);
-		dev->token_count--;
+		dev->mt76.token_count--;
 	}
-	spin_unlock_bh(&dev->token_lock);
-	idr_destroy(&dev->token);
+	spin_unlock_bh(&dev->mt76.token_lock);
+	idr_destroy(&dev->mt76.token);
 }
 
 /* system error recovery */
@@ -1649,7 +1649,7 @@ void mt7915_mac_reset_work(struct work_struct *work)
 		mt7915_dma_reset(dev);
 
 		mt7915_tx_token_put(dev);
-		idr_init(&dev->token);
+		idr_init(&dev->mt76.token);
 
 		mt76_wr(dev, MT_MCU_INT_EVENT, MT_MCU_INT_EVENT_DMA_INIT);
 		mt7915_wait_reset_state(dev, MT_MCU_CMD_RECOVERY_DONE);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index 80eb35231a1a..582c2bfdf070 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -187,10 +187,6 @@ struct mt7915_dev {
 
 	u32 hw_pattern;
 
-	spinlock_t token_lock;
-	int token_count;
-	struct idr token;
-
 	bool dbdc_support;
 	bool flash_mode;
 	bool fw_debug;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index cec17a249a8c..a35fdd1b34f9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -170,8 +170,8 @@ static int mt7921_init_hardware(struct mt7921_dev *dev)
 {
 	int ret, idx;
 
-	spin_lock_init(&dev->token_lock);
-	idr_init(&dev->token);
+	spin_lock_init(&dev->mt76.token_lock);
+	idr_init(&dev->mt76.token);
 
 	ret = mt7921_dma_init(dev);
 	if (ret)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index f1efe9eaf791..d1ae4c51c14b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -824,14 +824,14 @@ int mt7921_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
 	t = (struct mt76_txwi_cache *)(txwi + mdev->drv->txwi_size);
 	t->skb = tx_info->skb;
 
-	spin_lock_bh(&dev->token_lock);
-	id = idr_alloc(&dev->token, t, 0, MT7921_TOKEN_SIZE, GFP_ATOMIC);
+	spin_lock_bh(&mdev->token_lock);
+	id = idr_alloc(&mdev->token, t, 0, MT7921_TOKEN_SIZE, GFP_ATOMIC);
 	if (id >= 0)
-		dev->token_count++;
+		mdev->token_count++;
 
-	if (dev->token_count >= MT7921_TOKEN_SIZE - MT7921_TOKEN_FREE_THR)
+	if (mdev->token_count >= MT7921_TOKEN_SIZE - MT7921_TOKEN_FREE_THR)
 		mt7921_set_tx_blocked(dev, true);
-	spin_unlock_bh(&dev->token_lock);
+	spin_unlock_bh(&mdev->token_lock);
 
 	if (id < 0)
 		return id;
@@ -994,14 +994,14 @@ void mt7921_mac_tx_free(struct mt7921_dev *dev, struct sk_buff *skb)
 		msdu = FIELD_GET(MT_TX_FREE_MSDU_ID, info);
 		stat = FIELD_GET(MT_TX_FREE_STATUS, info);
 
-		spin_lock_bh(&dev->token_lock);
-		txwi = idr_remove(&dev->token, msdu);
+		spin_lock_bh(&mdev->token_lock);
+		txwi = idr_remove(&mdev->token, msdu);
 		if (txwi)
-			dev->token_count--;
-		if (dev->token_count < MT7921_TOKEN_SIZE - MT7921_TOKEN_FREE_THR &&
+			mdev->token_count--;
+		if (mdev->token_count < MT7921_TOKEN_SIZE - MT7921_TOKEN_FREE_THR &&
 		    dev->mphy.q_tx[0]->blocked)
 			wake = true;
-		spin_unlock_bh(&dev->token_lock);
+		spin_unlock_bh(&mdev->token_lock);
 
 		if (!txwi)
 			continue;
@@ -1031,9 +1031,9 @@ void mt7921_mac_tx_free(struct mt7921_dev *dev, struct sk_buff *skb)
 	}
 
 	if (wake) {
-		spin_lock_bh(&dev->token_lock);
+		spin_lock_bh(&mdev->token_lock);
 		mt7921_set_tx_blocked(dev, false);
-		spin_unlock_bh(&dev->token_lock);
+		spin_unlock_bh(&mdev->token_lock);
 	}
 
 	napi_consume_skb(skb, 1);
@@ -1067,9 +1067,9 @@ void mt7921_tx_complete_skb(struct mt76_dev *mdev, struct mt76_queue_entry *e)
 		txp = mt7921_txwi_to_txp(mdev, e->txwi);
 
 		token = le16_to_cpu(txp->hw.msdu_id[0]) & ~MT_MSDU_ID_VALID;
-		spin_lock_bh(&dev->token_lock);
-		t = idr_remove(&dev->token, token);
-		spin_unlock_bh(&dev->token_lock);
+		spin_lock_bh(&mdev->token_lock);
+		t = idr_remove(&mdev->token, token);
+		spin_unlock_bh(&mdev->token_lock);
 		e->skb = t ? t->skb : NULL;
 	}
 
@@ -1209,8 +1209,8 @@ void mt7921_tx_token_put(struct mt7921_dev *dev)
 	struct mt76_txwi_cache *txwi;
 	int id;
 
-	spin_lock_bh(&dev->token_lock);
-	idr_for_each_entry(&dev->token, txwi, id) {
+	spin_lock_bh(&dev->mt76.token_lock);
+	idr_for_each_entry(&dev->mt76.token, txwi, id) {
 		mt7921_txp_skb_unmap(&dev->mt76, txwi);
 		if (txwi->skb) {
 			struct ieee80211_hw *hw;
@@ -1219,10 +1219,10 @@ void mt7921_tx_token_put(struct mt7921_dev *dev)
 			ieee80211_free_txskb(hw, txwi->skb);
 		}
 		mt76_put_txwi(&dev->mt76, txwi);
-		dev->token_count--;
+		dev->mt76.token_count--;
 	}
-	spin_unlock_bh(&dev->token_lock);
-	idr_destroy(&dev->token);
+	spin_unlock_bh(&dev->mt76.token_lock);
+	idr_destroy(&dev->mt76.token);
 }
 
 static void
@@ -1261,7 +1261,7 @@ mt7921_mac_reset(struct mt7921_dev *dev)
 	napi_disable(&dev->mt76.tx_napi);
 
 	mt7921_tx_token_put(dev);
-	idr_init(&dev->token);
+	idr_init(&dev->mt76.token);
 
 	err = mt7921_wpdma_reset(dev, true);
 	if (err)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index 5bcbf1bc0746..f209069ab1b2 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -161,10 +161,6 @@ struct mt7921_dev {
 	struct list_head sta_poll_list;
 	spinlock_t sta_poll_lock;
 
-	spinlock_t token_lock;
-	int token_count;
-	struct idr token;
-
 	u8 fw_debug;
 
 	struct mt76_connac_pm pm;
-- 
cgit v1.2.3


From d089692bc7938a1030db98d493497cda9afe4b43 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 20 Apr 2021 23:05:32 +0200
Subject: mt76: move token utilities in mt76 common module

Move token management in mt76 common module since it is shared between
mt7615, mt7915 and mt7921 drivers

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76.h          | 50 ++++++++++++++--
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c    |  6 +-
 drivers/net/wireless/mediatek/mt76/mt7615/mmio.c   |  1 +
 .../net/wireless/mediatek/mt76/mt7615/pci_init.c   |  3 +-
 .../net/wireless/mediatek/mt76/mt7615/pci_mac.c    |  8 +--
 drivers/net/wireless/mediatek/mt76/mt7915/init.c   |  3 +-
 drivers/net/wireless/mediatek/mt76/mt7915/mac.c    | 52 ++---------------
 drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h |  1 -
 drivers/net/wireless/mediatek/mt76/mt7915/pci.c    |  1 +
 drivers/net/wireless/mediatek/mt76/mt7921/init.c   |  4 +-
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c    | 46 ++-------------
 drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h |  1 -
 drivers/net/wireless/mediatek/mt76/mt7921/pci.c    |  1 +
 drivers/net/wireless/mediatek/mt76/tx.c            | 68 ++++++++++++++++++++++
 14 files changed, 132 insertions(+), 113 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 54adc4921d03..cc5d95aeebbd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -17,12 +17,14 @@
 #include "util.h"
 #include "testmode.h"
 
-#define MT_MCU_RING_SIZE    32
-#define MT_RX_BUF_SIZE      2048
-#define MT_SKB_HEAD_LEN     128
+#define MT_MCU_RING_SIZE	32
+#define MT_RX_BUF_SIZE		2048
+#define MT_SKB_HEAD_LEN		128
 
-#define MT_MAX_NON_AQL_PKT  16
-#define MT_TXQ_FREE_THR     32
+#define MT_MAX_NON_AQL_PKT	16
+#define MT_TXQ_FREE_THR		32
+
+#define MT76_TOKEN_FREE_THR	64
 
 struct mt76_dev;
 struct mt76_phy;
@@ -332,6 +334,7 @@ struct mt76_driver_ops {
 	u32 drv_flags;
 	u32 survey_flags;
 	u16 txwi_size;
+	u16 token_size;
 	u8 mcs_rates;
 
 	void (*update_survey)(struct mt76_dev *dev);
@@ -1215,4 +1218,41 @@ s8 mt76_get_rate_power_limits(struct mt76_phy *phy,
 			      struct mt76_power_limits *dest,
 			      s8 target_power);
 
+struct mt76_txwi_cache *
+mt76_token_release(struct mt76_dev *dev, int token, bool *wake);
+int mt76_token_consume(struct mt76_dev *dev, struct mt76_txwi_cache **ptxwi);
+void mt76_token_init(struct mt76_dev *dev);
+void __mt76_set_tx_blocked(struct mt76_dev *dev, bool blocked);
+
+static inline void mt76_set_tx_blocked(struct mt76_dev *dev, bool blocked)
+{
+	spin_lock_bh(&dev->token_lock);
+	__mt76_set_tx_blocked(dev, blocked);
+	spin_unlock_bh(&dev->token_lock);
+}
+
+static inline int
+mt76_token_get(struct mt76_dev *dev, struct mt76_txwi_cache **ptxwi)
+{
+	int token;
+
+	spin_lock_bh(&dev->token_lock);
+	token = idr_alloc(&dev->token, *ptxwi, 0, dev->drv->token_size,
+			  GFP_ATOMIC);
+	spin_unlock_bh(&dev->token_lock);
+
+	return token;
+}
+
+static inline struct mt76_txwi_cache *
+mt76_token_put(struct mt76_dev *dev, int token)
+{
+	struct mt76_txwi_cache *txwi;
+
+	spin_lock_bh(&dev->token_lock);
+	txwi = idr_remove(&dev->token, token);
+	spin_unlock_bh(&dev->token_lock);
+
+	return txwi;
+}
 #endif
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index e3b727c17129..d11375661875 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1465,11 +1465,7 @@ mt7615_mac_tx_free_token(struct mt7615_dev *dev, u16 token)
 	u8 wcid;
 
 	trace_mac_tx_free(dev, token);
-
-	spin_lock_bh(&mdev->token_lock);
-	txwi = idr_remove(&mdev->token, token);
-	spin_unlock_bh(&mdev->token_lock);
-
+	txwi = mt76_token_put(mdev, token);
 	if (!txwi)
 		return;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
index be93e7ad1279..202ea235415e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
@@ -190,6 +190,7 @@ int mt7615_mmio_probe(struct device *pdev, void __iomem *mem_base,
 		.survey_flags = SURVEY_INFO_TIME_TX |
 				SURVEY_INFO_TIME_RX |
 				SURVEY_INFO_TIME_BSS_RX,
+		.token_size = MT7615_TOKEN_SIZE,
 		.tx_prepare_skb = mt7615_tx_prepare_skb,
 		.tx_complete_skb = mt7615_tx_complete_skb,
 		.rx_skb = mt7615_queue_rx_skb,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
index 89475d5931a6..5d174aa7f88d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
@@ -40,8 +40,7 @@ static int mt7615_init_hardware(struct mt7615_dev *dev)
 	mt76_wr(dev, MT_INT_SOURCE_CSR, ~0);
 
 	INIT_WORK(&dev->mcu_work, mt7615_pci_init_work);
-	spin_lock_init(&dev->mt76.token_lock);
-	idr_init(&dev->mt76.token);
+	mt76_token_init(&dev->mt76);
 
 	ret = mt7615_eeprom_init(dev, addr);
 	if (ret < 0)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
index d5aff409132d..d7cbef752f9f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
@@ -37,9 +37,7 @@ void mt7615_tx_complete_skb(struct mt76_dev *mdev, struct mt76_queue_entry *e)
 			token = le16_to_cpu(txp->hw.msdu_id[0]) &
 				~MT_MSDU_ID_VALID;
 
-		spin_lock_bh(&mdev->token_lock);
-		t = idr_remove(&mdev->token, token);
-		spin_unlock_bh(&mdev->token_lock);
+		t = mt76_token_put(mdev, token);
 		e->skb = t ? t->skb : NULL;
 	}
 
@@ -161,9 +159,7 @@ int mt7615_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
 	t = (struct mt76_txwi_cache *)(txwi + mdev->drv->txwi_size);
 	t->skb = tx_info->skb;
 
-	spin_lock_bh(&mdev->token_lock);
-	id = idr_alloc(&mdev->token, t, 0, MT7615_TOKEN_SIZE, GFP_ATOMIC);
-	spin_unlock_bh(&mdev->token_lock);
+	id = mt76_token_get(mdev, &t);
 	if (id < 0)
 		return id;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index b97834fae0ba..c7e9808f228f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -351,8 +351,7 @@ static int mt7915_init_hardware(struct mt7915_dev *dev)
 	mt76_wr(dev, MT_INT_SOURCE_CSR, ~0);
 
 	INIT_WORK(&dev->init_work, mt7915_init_work);
-	spin_lock_init(&dev->mt76.token_lock);
-	idr_init(&dev->mt76.token);
+	mt76_token_init(&dev->mt76);
 
 	dev->dbdc_support = !!(mt76_rr(dev, MT_HW_BOUND) & BIT(5));
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index a2d60be00f9f..7a9759fb79d8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -974,26 +974,6 @@ void mt7915_mac_write_txwi(struct mt7915_dev *dev, __le32 *txwi,
 		mt7915_mac_write_txwi_tm(mphy->priv, txwi, skb);
 }
 
-static void
-mt7915_set_tx_blocked(struct mt7915_dev *dev, bool blocked)
-{
-	struct mt76_phy *mphy = &dev->mphy, *mphy2 = dev->mt76.phy2;
-	struct mt76_queue *q, *q2 = NULL;
-
-	q = mphy->q_tx[0];
-	if (blocked == q->blocked)
-		return;
-
-	q->blocked = blocked;
-	if (mphy2) {
-		q2 = mphy2->q_tx[0];
-		q2->blocked = blocked;
-	}
-
-	if (!blocked)
-		mt76_worker_schedule(&dev->mt76.tx_worker);
-}
-
 int mt7915_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
 			  enum mt76_txq_id qid, struct mt76_wcid *wcid,
 			  struct ieee80211_sta *sta,
@@ -1046,15 +1026,7 @@ int mt7915_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
 	t = (struct mt76_txwi_cache *)(txwi + mdev->drv->txwi_size);
 	t->skb = tx_info->skb;
 
-	spin_lock_bh(&mdev->token_lock);
-	id = idr_alloc(&mdev->token, t, 0, MT7915_TOKEN_SIZE, GFP_ATOMIC);
-	if (id >= 0)
-		mdev->token_count++;
-
-	if (mdev->token_count >= MT7915_TOKEN_SIZE - MT7915_TOKEN_FREE_THR)
-		mt7915_set_tx_blocked(dev, true);
-	spin_unlock_bh(&mdev->token_lock);
-
+	id = mt76_token_consume(mdev, &t);
 	if (id < 0)
 		return id;
 
@@ -1218,15 +1190,7 @@ void mt7915_mac_tx_free(struct mt7915_dev *dev, struct sk_buff *skb)
 		msdu = FIELD_GET(MT_TX_FREE_MSDU_ID, info);
 		stat = FIELD_GET(MT_TX_FREE_STATUS, info);
 
-		spin_lock_bh(&mdev->token_lock);
-		txwi = idr_remove(&mdev->token, msdu);
-		if (txwi)
-			mdev->token_count--;
-		if (mdev->token_count < MT7915_TOKEN_SIZE - MT7915_TOKEN_FREE_THR &&
-		    dev->mphy.q_tx[0]->blocked)
-			wake = true;
-		spin_unlock_bh(&mdev->token_lock);
-
+		txwi = mt76_token_release(mdev, msdu, &wake);
 		if (!txwi)
 			continue;
 
@@ -1256,11 +1220,8 @@ void mt7915_mac_tx_free(struct mt7915_dev *dev, struct sk_buff *skb)
 
 	mt7915_mac_sta_poll(dev);
 
-	if (wake) {
-		spin_lock_bh(&mdev->token_lock);
-		mt7915_set_tx_blocked(dev, false);
-		spin_unlock_bh(&mdev->token_lock);
-	}
+	if (wake)
+		mt76_set_tx_blocked(&dev->mt76, false);
 
 	mt76_worker_schedule(&dev->mt76.tx_worker);
 
@@ -1289,10 +1250,7 @@ void mt7915_tx_complete_skb(struct mt76_dev *mdev, struct mt76_queue_entry *e)
 		struct mt7915_txp *txp;
 
 		txp = mt7915_txwi_to_txp(mdev, e->txwi);
-
-		spin_lock_bh(&mdev->token_lock);
-		t = idr_remove(&mdev->token, le16_to_cpu(txp->token));
-		spin_unlock_bh(&mdev->token_lock);
+		t = mt76_token_put(mdev, le16_to_cpu(txp->token));
 		e->skb = t ? t->skb : NULL;
 	}
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index 582c2bfdf070..4ea8972d4e2f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -32,7 +32,6 @@
 
 #define MT7915_EEPROM_SIZE		3584
 #define MT7915_TOKEN_SIZE		8192
-#define MT7915_TOKEN_FREE_THR		64
 
 #define MT7915_CFEND_RATE_DEFAULT	0x49	/* OFDM 24M */
 #define MT7915_CFEND_RATE_11B		0x03	/* 11B LP, 11M */
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/pci.c b/drivers/net/wireless/mediatek/mt76/mt7915/pci.c
index ebfc4c15fef2..643f171884cf 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/pci.c
@@ -212,6 +212,7 @@ static int mt7915_pci_probe(struct pci_dev *pdev,
 		.survey_flags = SURVEY_INFO_TIME_TX |
 				SURVEY_INFO_TIME_RX |
 				SURVEY_INFO_TIME_BSS_RX,
+		.token_size = MT7915_TOKEN_SIZE,
 		.tx_prepare_skb = mt7915_tx_prepare_skb,
 		.tx_complete_skb = mt7915_tx_complete_skb,
 		.rx_skb = mt7915_queue_rx_skb,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index a35fdd1b34f9..c32fea6378af 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -170,9 +170,7 @@ static int mt7921_init_hardware(struct mt7921_dev *dev)
 {
 	int ret, idx;
 
-	spin_lock_init(&dev->mt76.token_lock);
-	idr_init(&dev->mt76.token);
-
+	mt76_token_init(&dev->mt76);
 	ret = mt7921_dma_init(dev);
 	if (ret)
 		return ret;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index d1ae4c51c14b..256151bf6bf8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -785,20 +785,6 @@ mt7921_write_hw_txp(struct mt7921_dev *dev, struct mt76_tx_info *tx_info,
 	}
 }
 
-static void mt7921_set_tx_blocked(struct mt7921_dev *dev, bool blocked)
-{
-	struct mt76_phy *mphy = &dev->mphy;
-	struct mt76_queue *q;
-
-	q = mphy->q_tx[0];
-	if (blocked == q->blocked)
-		return;
-
-	q->blocked = blocked;
-	if (!blocked)
-		mt76_worker_schedule(&dev->mt76.tx_worker);
-}
-
 int mt7921_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
 			  enum mt76_txq_id qid, struct mt76_wcid *wcid,
 			  struct ieee80211_sta *sta,
@@ -824,15 +810,7 @@ int mt7921_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
 	t = (struct mt76_txwi_cache *)(txwi + mdev->drv->txwi_size);
 	t->skb = tx_info->skb;
 
-	spin_lock_bh(&mdev->token_lock);
-	id = idr_alloc(&mdev->token, t, 0, MT7921_TOKEN_SIZE, GFP_ATOMIC);
-	if (id >= 0)
-		mdev->token_count++;
-
-	if (mdev->token_count >= MT7921_TOKEN_SIZE - MT7921_TOKEN_FREE_THR)
-		mt7921_set_tx_blocked(dev, true);
-	spin_unlock_bh(&mdev->token_lock);
-
+	id = mt76_token_consume(mdev, &t);
 	if (id < 0)
 		return id;
 
@@ -994,15 +972,7 @@ void mt7921_mac_tx_free(struct mt7921_dev *dev, struct sk_buff *skb)
 		msdu = FIELD_GET(MT_TX_FREE_MSDU_ID, info);
 		stat = FIELD_GET(MT_TX_FREE_STATUS, info);
 
-		spin_lock_bh(&mdev->token_lock);
-		txwi = idr_remove(&mdev->token, msdu);
-		if (txwi)
-			mdev->token_count--;
-		if (mdev->token_count < MT7921_TOKEN_SIZE - MT7921_TOKEN_FREE_THR &&
-		    dev->mphy.q_tx[0]->blocked)
-			wake = true;
-		spin_unlock_bh(&mdev->token_lock);
-
+		txwi = mt76_token_release(mdev, msdu, &wake);
 		if (!txwi)
 			continue;
 
@@ -1030,11 +1000,8 @@ void mt7921_mac_tx_free(struct mt7921_dev *dev, struct sk_buff *skb)
 		mt76_put_txwi(mdev, txwi);
 	}
 
-	if (wake) {
-		spin_lock_bh(&mdev->token_lock);
-		mt7921_set_tx_blocked(dev, false);
-		spin_unlock_bh(&mdev->token_lock);
-	}
+	if (wake)
+		mt76_set_tx_blocked(&dev->mt76, false);
 
 	napi_consume_skb(skb, 1);
 
@@ -1065,11 +1032,8 @@ void mt7921_tx_complete_skb(struct mt76_dev *mdev, struct mt76_queue_entry *e)
 		u16 token;
 
 		txp = mt7921_txwi_to_txp(mdev, e->txwi);
-
 		token = le16_to_cpu(txp->hw.msdu_id[0]) & ~MT_MSDU_ID_VALID;
-		spin_lock_bh(&mdev->token_lock);
-		t = idr_remove(&mdev->token, token);
-		spin_unlock_bh(&mdev->token_lock);
+		t = mt76_token_put(mdev, token);
 		e->skb = t ? t->skb : NULL;
 	}
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index f209069ab1b2..7997e34ac54b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -35,7 +35,6 @@
 
 #define MT7921_EEPROM_SIZE		3584
 #define MT7921_TOKEN_SIZE		8192
-#define MT7921_TOKEN_FREE_THR		64
 
 #define MT7921_CFEND_RATE_DEFAULT	0x49	/* OFDM 24M */
 #define MT7921_CFEND_RATE_11B		0x03	/* 11B LP, 11M */
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
index c91c02e8f183..fa02d934f0bf 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
@@ -99,6 +99,7 @@ static int mt7921_pci_probe(struct pci_dev *pdev,
 		.survey_flags = SURVEY_INFO_TIME_TX |
 				SURVEY_INFO_TIME_RX |
 				SURVEY_INFO_TIME_BSS_RX,
+		.token_size = MT7921_TOKEN_SIZE,
 		.tx_prepare_skb = mt7921_tx_prepare_skb,
 		.tx_complete_skb = mt7921_tx_complete_skb,
 		.rx_skb = mt7921_queue_rx_skb,
diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c
index 236eaa351f53..29a56f304aae 100644
--- a/drivers/net/wireless/mediatek/mt76/tx.c
+++ b/drivers/net/wireless/mediatek/mt76/tx.c
@@ -648,3 +648,71 @@ void mt76_queue_tx_complete(struct mt76_dev *dev, struct mt76_queue *q,
 	spin_unlock_bh(&q->lock);
 }
 EXPORT_SYMBOL_GPL(mt76_queue_tx_complete);
+
+void mt76_token_init(struct mt76_dev *dev)
+{
+	spin_lock_init(&dev->token_lock);
+	idr_init(&dev->token);
+}
+EXPORT_SYMBOL_GPL(mt76_token_init);
+
+void __mt76_set_tx_blocked(struct mt76_dev *dev, bool blocked)
+{
+	struct mt76_phy *phy = &dev->phy, *phy2 = dev->phy2;
+	struct mt76_queue *q, *q2 = NULL;
+
+	q = phy->q_tx[0];
+	if (blocked == q->blocked)
+		return;
+
+	q->blocked = blocked;
+	if (phy2) {
+		q2 = phy2->q_tx[0];
+		q2->blocked = blocked;
+	}
+
+	if (!blocked)
+		mt76_worker_schedule(&dev->tx_worker);
+}
+EXPORT_SYMBOL_GPL(__mt76_set_tx_blocked);
+
+int mt76_token_consume(struct mt76_dev *dev, struct mt76_txwi_cache **ptxwi)
+{
+	int token;
+
+	spin_lock_bh(&dev->token_lock);
+
+	token = idr_alloc(&dev->token, *ptxwi, 0, dev->drv->token_size,
+			  GFP_ATOMIC);
+	if (token >= 0)
+		dev->token_count++;
+
+	if (dev->token_count >= dev->drv->token_size - MT76_TOKEN_FREE_THR)
+		__mt76_set_tx_blocked(dev, true);
+
+	spin_unlock_bh(&dev->token_lock);
+
+	return token;
+}
+EXPORT_SYMBOL_GPL(mt76_token_consume);
+
+struct mt76_txwi_cache *
+mt76_token_release(struct mt76_dev *dev, int token, bool *wake)
+{
+	struct mt76_txwi_cache *txwi;
+
+	spin_lock_bh(&dev->token_lock);
+
+	txwi = idr_remove(&dev->token, token);
+	if (txwi)
+		dev->token_count--;
+
+	if (dev->token_count < dev->drv->token_size - MT76_TOKEN_FREE_THR &&
+	    dev->phy.q_tx[0]->blocked)
+		*wake = true;
+
+	spin_unlock_bh(&dev->token_lock);
+
+	return txwi;
+}
+EXPORT_SYMBOL_GPL(mt76_token_release);
-- 
cgit v1.2.3


From 422f351193401428d62035c3f5a933ed46967517 Mon Sep 17 00:00:00 2001
From: Shayne Chen <shayne.chen@mediatek.com>
Date: Wed, 21 Apr 2021 16:39:45 +0800
Subject: mt76: mt7915: do not read rf value from efuse in flash mode

Do not read rf value from efuse when driver is configured to
use flash mode.

Tested-by: Bruce Chuang <bruce-ss.chuang@mediatek.com>
Signed-off-by: Shayne Chen <shayne.chen@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c
index 677c913c51b0..8ededf2e5279 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c
@@ -8,7 +8,7 @@ static u32 mt7915_eeprom_read(struct mt7915_dev *dev, u32 offset)
 {
 	u8 *data = dev->mt76.eeprom.data;
 
-	if (data[offset] == 0xff)
+	if (data[offset] == 0xff && !dev->flash_mode)
 		mt7915_mcu_get_eeprom(dev, offset);
 
 	return data[offset];
-- 
cgit v1.2.3


From d43b3257621dfe57c71d875afd3f624b9a042fc5 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 21 Apr 2021 12:28:33 +0200
Subject: mt76: mt7921: get rid of mcu_reset function pointer

since mcu_reset it used only by mt7921, move the reset callback to
mt7921_mcu_parse_response routine and get rid of the function pointer.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mcu.c        | 4 ----
 drivers/net/wireless/mediatek/mt76/mt76.h       | 1 -
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.c | 3 ++-
 3 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mcu.c b/drivers/net/wireless/mediatek/mt76/mcu.c
index 70624cd07449..d3a5e2c4f12a 100644
--- a/drivers/net/wireless/mediatek/mt76/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mcu.c
@@ -99,10 +99,6 @@ int mt76_mcu_skb_send_and_get_msg(struct mt76_dev *dev, struct sk_buff *skb,
 			dev_kfree_skb(skb);
 	} while (ret == -EAGAIN);
 
-	/* notify driver code to reset the mcu */
-	if (ret == -ETIMEDOUT && dev->mcu_ops->mcu_reset)
-		dev->mcu_ops->mcu_reset(dev);
-
 out:
 	mutex_unlock(&dev->mcu.mutex);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index cc5d95aeebbd..3b09ea8176a3 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -168,7 +168,6 @@ struct mt76_mcu_ops {
 	int (*mcu_rd_rp)(struct mt76_dev *dev, u32 base,
 			 struct mt76_reg_pair *rp, int len);
 	int (*mcu_restart)(struct mt76_dev *dev);
-	void (*mcu_reset)(struct mt76_dev *dev);
 };
 
 struct mt76_queue_ops {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index c00295b63ba8..35aaba7deb5d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -162,6 +162,8 @@ mt7921_mcu_parse_response(struct mt76_dev *mdev, int cmd,
 	if (!skb) {
 		dev_err(mdev->dev, "Message %08x (seq %d) timeout\n",
 			cmd, seq);
+		mt7921_reset(mdev);
+
 		return -ETIMEDOUT;
 	}
 
@@ -974,7 +976,6 @@ int mt7921_mcu_init(struct mt7921_dev *dev)
 		.mcu_skb_send_msg = mt7921_mcu_send_message,
 		.mcu_parse_response = mt7921_mcu_parse_response,
 		.mcu_restart = mt7921_mcu_restart,
-		.mcu_reset = mt7921_reset,
 	};
 
 	dev->mt76.mcu_ops = &mt7921_mcu_ops;
-- 
cgit v1.2.3


From c18ba14c4bc953250aa497d03855592bd133ccde Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 21 Apr 2021 12:43:48 +0200
Subject: mt76: mt7921: improve doze opportunity

Increase mt7921 mac work timeout in oder to have move sleep
opportunities

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c    | 4 ++--
 drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 256151bf6bf8..cd5661ec1f1f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1373,12 +1373,12 @@ void mt7921_mac_work(struct work_struct *work)
 	mt7921_mutex_acquire(phy->dev);
 
 	mt76_update_survey(mphy->dev);
-	if (++mphy->mac_work_count == 5) {
+	if (++mphy->mac_work_count == 2) {
 		mphy->mac_work_count = 0;
 
 		mt7921_mac_update_mib_stats(phy);
 	}
-	if (++phy->sta_work_count == 10) {
+	if (++phy->sta_work_count == 4) {
 		phy->sta_work_count = 0;
 		mt7921_mac_sta_stats_work(phy);
 	}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index 7997e34ac54b..aa3459723029 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -18,7 +18,7 @@
 
 #define MT7921_PM_TIMEOUT		(HZ / 12)
 #define MT7921_HW_SCAN_TIMEOUT		(HZ / 10)
-#define MT7921_WATCHDOG_TIME		(HZ / 10)
+#define MT7921_WATCHDOG_TIME		(HZ / 4)
 #define MT7921_RESET_TIMEOUT		(30 * HZ)
 
 #define MT7921_TX_RING_SIZE		2048
-- 
cgit v1.2.3


From abe912ae3cd42f95beeff8eb67acbe0ca8b8aedd Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 21 Apr 2021 12:43:49 +0200
Subject: mt76: mt7663: add awake and doze time accounting

Similar to mt7921, introduce awake and doze time accounting
for runtime pm.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7615/debugfs.c    | 32 +++++++++++++++++++++-
 drivers/net/wireless/mediatek/mt76/mt7615/init.c   |  2 ++
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    | 24 ++++++++++++----
 3 files changed, 52 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
index 1b414220521a..676bb22726d6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
@@ -69,6 +69,7 @@ static int
 mt7615_pm_set(void *data, u64 val)
 {
 	struct mt7615_dev *dev = data;
+	struct mt76_connac_pm *pm = &dev->pm;
 	int ret = 0;
 
 	if (!mt7615_wait_for_mcu_init(dev))
@@ -77,6 +78,9 @@ mt7615_pm_set(void *data, u64 val)
 	if (!mt7615_firmware_offload(dev) || !mt76_is_mmio(&dev->mt76))
 		return -EOPNOTSUPP;
 
+	if (val == pm->enable)
+		return 0;
+
 	mt7615_mutex_acquire(dev);
 
 	if (dev->phy.n_beacon_vif) {
@@ -84,7 +88,11 @@ mt7615_pm_set(void *data, u64 val)
 		goto out;
 	}
 
-	dev->pm.enable = val;
+	if (!pm->enable) {
+		pm->stats.last_wake_event = jiffies;
+		pm->stats.last_doze_event = jiffies;
+	}
+	pm->enable = val;
 out:
 	mt7615_mutex_release(dev);
 
@@ -103,6 +111,26 @@ mt7615_pm_get(void *data, u64 *val)
 
 DEFINE_DEBUGFS_ATTRIBUTE(fops_pm, mt7615_pm_get, mt7615_pm_set, "%lld\n");
 
+static int
+mt7615_pm_stats(struct seq_file *s, void *data)
+{
+	struct mt7615_dev *dev = dev_get_drvdata(s->private);
+	struct mt76_connac_pm *pm = &dev->pm;
+	unsigned long awake_time = pm->stats.awake_time;
+	unsigned long doze_time = pm->stats.doze_time;
+
+	if (!test_bit(MT76_STATE_PM, &dev->mphy.state))
+		awake_time += jiffies - pm->stats.last_wake_event;
+	else
+		doze_time += jiffies - pm->stats.last_doze_event;
+
+	seq_printf(s, "awake time: %14u\ndoze time: %15u\n",
+		   jiffies_to_msecs(awake_time),
+		   jiffies_to_msecs(doze_time));
+
+	return 0;
+}
+
 static int
 mt7615_pm_idle_timeout_set(void *data, u64 val)
 {
@@ -515,6 +543,8 @@ int mt7615_init_debugfs(struct mt7615_dev *dev)
 	debugfs_create_file("runtime-pm", 0600, dir, dev, &fops_pm);
 	debugfs_create_file("idle-timeout", 0600, dir, dev,
 			    &fops_pm_idle_timeout);
+	debugfs_create_devm_seqfile(dev->mt76.dev, "runtime_pm_stats", dir,
+				    mt7615_pm_stats);
 	debugfs_create_devm_seqfile(dev->mt76.dev, "radio", dir,
 				    mt7615_radio_read);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 894b2588e075..86341d1f82f3 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -526,6 +526,8 @@ void mt7615_init_device(struct mt7615_dev *dev)
 
 	mt7615_init_wiphy(hw);
 	dev->pm.idle_timeout = MT7615_PM_TIMEOUT;
+	dev->pm.stats.last_wake_event = jiffies;
+	dev->pm.stats.last_doze_event = jiffies;
 	mt7615_cap_dbdc_disable(dev);
 	dev->phy.dfs_state = -1;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 45c6fb5832b8..3c1528ed2110 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -288,6 +288,7 @@ EXPORT_SYMBOL_GPL(mt7622_trigger_hif_int);
 static int mt7615_mcu_drv_pmctrl(struct mt7615_dev *dev)
 {
 	struct mt76_phy *mphy = &dev->mt76.phy;
+	struct mt76_connac_pm *pm = &dev->pm;
 	struct mt76_dev *mdev = &dev->mt76;
 	u32 addr;
 	int err;
@@ -317,15 +318,20 @@ static int mt7615_mcu_drv_pmctrl(struct mt7615_dev *dev)
 
 	clear_bit(MT76_STATE_PM, &mphy->state);
 
+	pm->stats.last_wake_event = jiffies;
+	pm->stats.doze_time += pm->stats.last_wake_event -
+			       pm->stats.last_doze_event;
+
 	return 0;
 }
 
 static int mt7615_mcu_lp_drv_pmctrl(struct mt7615_dev *dev)
 {
 	struct mt76_phy *mphy = &dev->mt76.phy;
+	struct mt76_connac_pm *pm = &dev->pm;
 	int i, err = 0;
 
-	mutex_lock(&dev->pm.mutex);
+	mutex_lock(&pm->mutex);
 
 	if (!test_bit(MT76_STATE_PM, &mphy->state))
 		goto out;
@@ -344,8 +350,11 @@ static int mt7615_mcu_lp_drv_pmctrl(struct mt7615_dev *dev)
 	}
 	clear_bit(MT76_STATE_PM, &mphy->state);
 
+	pm->stats.last_wake_event = jiffies;
+	pm->stats.doze_time += pm->stats.last_wake_event -
+			       pm->stats.last_doze_event;
 out:
-	mutex_unlock(&dev->pm.mutex);
+	mutex_unlock(&pm->mutex);
 
 	return err;
 }
@@ -353,12 +362,13 @@ out:
 static int mt7615_mcu_fw_pmctrl(struct mt7615_dev *dev)
 {
 	struct mt76_phy *mphy = &dev->mt76.phy;
+	struct mt76_connac_pm *pm = &dev->pm;
 	int err = 0;
 	u32 addr;
 
-	mutex_lock(&dev->pm.mutex);
+	mutex_lock(&pm->mutex);
 
-	if (mt76_connac_skip_fw_pmctrl(mphy, &dev->pm))
+	if (mt76_connac_skip_fw_pmctrl(mphy, pm))
 		goto out;
 
 	mt7622_trigger_hif_int(dev, true);
@@ -375,8 +385,12 @@ static int mt7615_mcu_fw_pmctrl(struct mt7615_dev *dev)
 	}
 
 	mt7622_trigger_hif_int(dev, false);
+
+	pm->stats.last_doze_event = jiffies;
+	pm->stats.awake_time += pm->stats.last_doze_event -
+				pm->stats.last_wake_event;
 out:
-	mutex_unlock(&dev->pm.mutex);
+	mutex_unlock(&pm->mutex);
 
 	return err;
 }
-- 
cgit v1.2.3


From b1bd7bb8121d89518b2248357a070d4bf8defd3e Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 21 Apr 2021 12:43:50 +0200
Subject: mt76: connac: unschedule mac_work before going to sleep

In order to wake the device less frequently and so reduce power
consumpation, unschedule mac_work before going to sleep

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c      | 2 ++
 drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c | 4 +++-
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c      | 2 ++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index d11375661875..f81a17d56008 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1912,6 +1912,8 @@ void mt7615_pm_wake_work(struct work_struct *work)
 			napi_schedule(&dev->mt76.napi[i]);
 		mt76_connac_pm_dequeue_skbs(mphy, &dev->pm);
 		mt76_queue_tx_cleanup(dev, dev->mt76.q_mcu[MT_MCUQ_WM], false);
+		ieee80211_queue_delayed_work(mphy->hw, &mphy->mac_work,
+					     MT7615_WATCHDOG_TIME);
 	}
 
 	ieee80211_wake_queues(mphy->hw);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
index 66f1667481e6..b32f26c1f8b3 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
@@ -42,8 +42,10 @@ void mt76_connac_power_save_sched(struct mt76_phy *phy,
 
 	pm->last_activity = jiffies;
 
-	if (!test_bit(MT76_STATE_PM, &phy->state))
+	if (!test_bit(MT76_STATE_PM, &phy->state)) {
+		cancel_delayed_work(&phy->mac_work);
 		queue_delayed_work(dev->wq, &pm->ps_work, pm->idle_timeout);
+	}
 }
 EXPORT_SYMBOL_GPL(mt76_connac_power_save_sched);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index cd5661ec1f1f..214bd1859792 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1404,6 +1404,8 @@ void mt7921_pm_wake_work(struct work_struct *work)
 			napi_schedule(&dev->mt76.napi[i]);
 		mt76_connac_pm_dequeue_skbs(mphy, &dev->pm);
 		mt7921_tx_cleanup(dev);
+		ieee80211_queue_delayed_work(mphy->hw, &mphy->mac_work,
+					     MT7921_WATCHDOG_TIME);
 	}
 
 	ieee80211_wake_queues(mphy->hw);
-- 
cgit v1.2.3


From 081b37aea5085fd1535651150c5742e19ccfea82 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Wed, 21 Apr 2021 12:43:51 +0200
Subject: mt76: mt7921: mt7921_stop should put device in fw_own state

mt7921_stop should put device in fw_own state to reduce
power consumption.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
index b32f26c1f8b3..6f180c92d413 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
@@ -37,7 +37,7 @@ void mt76_connac_power_save_sched(struct mt76_phy *phy,
 	if (!mt76_is_mmio(dev))
 		return;
 
-	if (!pm->enable || !test_bit(MT76_STATE_RUNNING, &phy->state))
+	if (!pm->enable)
 		return;
 
 	pm->last_activity = jiffies;
-- 
cgit v1.2.3


From 36fcc8cff592ed4c6c308f23390e481885b136fc Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 19 Apr 2021 14:03:00 +0200
Subject: mt76: mt7921: introduce mt7921_mcu_sta_add routine

mt7921_mcu_sta_add will be used to add and remove wtbl entries.
Create broadcast wtbl entry after AP association

Co-developed-by: Deren Wu <deren.wu@mediatek.com>
Signed-off-by: Deren Wu <deren.wu@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/main.c   | 24 +++++-----------------
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.c    | 20 ++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h |  2 ++
 3 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 3bbe7317ea3f..f4c27aa41048 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -622,8 +622,10 @@ static void mt7921_bss_info_changed(struct ieee80211_hw *hw,
 	if (changed & BSS_CHANGED_PS)
 		mt7921_mcu_uni_bss_ps(dev, vif);
 
-	if (changed & BSS_CHANGED_ASSOC)
+	if (changed & BSS_CHANGED_ASSOC) {
+		mt7921_mcu_sta_add(dev, NULL, vif, true);
 		mt7921_bss_bcnft_apply(dev, vif, info->assoc);
+	}
 
 	if (changed & BSS_CHANGED_ARP_FILTER) {
 		struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv;
@@ -641,15 +643,6 @@ int mt7921_mac_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 	struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76);
 	struct mt7921_sta *msta = (struct mt7921_sta *)sta->drv_priv;
 	struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv;
-	int rssi = -ewma_rssi_read(&mvif->rssi);
-	struct mt76_sta_cmd_info info = {
-		.sta = sta,
-		.vif = vif,
-		.enable = true,
-		.cmd = MCU_UNI_CMD_STA_REC_UPDATE,
-		.wcid = &msta->wcid,
-		.rcpi = to_rcpi(rssi),
-	};
 	int ret, idx;
 
 	idx = mt76_wcid_alloc(dev->mt76.wcid_mask, MT7921_WTBL_STA - 1);
@@ -676,7 +669,7 @@ int mt7921_mac_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 	mt7921_mac_wtbl_update(dev, idx,
 			       MT_WTBL_UPDATE_ADM_COUNT_CLEAR);
 
-	ret = mt76_connac_mcu_add_sta_cmd(&dev->mphy, &info);
+	ret = mt7921_mcu_sta_add(dev, sta, vif, true);
 	if (ret)
 		return ret;
 
@@ -690,18 +683,11 @@ void mt7921_mac_sta_remove(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 {
 	struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76);
 	struct mt7921_sta *msta = (struct mt7921_sta *)sta->drv_priv;
-	struct mt76_sta_cmd_info info = {
-		.sta = sta,
-		.vif = vif,
-		.cmd = MCU_UNI_CMD_STA_REC_UPDATE,
-		.wcid = &msta->wcid,
-	};
 
 	mt76_connac_free_pending_tx_skbs(&dev->pm, &msta->wcid);
 	mt76_connac_pm_wake(&dev->mphy, &dev->pm);
 
-	mt76_connac_mcu_add_sta_cmd(&dev->mphy, &info);
-
+	mt7921_mcu_sta_add(dev, sta, vif, false);
 	mt7921_mac_wtbl_update(dev, msta->wcid.idx,
 			       MT_WTBL_UPDATE_ADM_COUNT_CLEAR);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index 35aaba7deb5d..43e3bf895b60 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -1265,6 +1265,26 @@ int mt7921_mcu_set_bss_pm(struct mt7921_dev *dev, struct ieee80211_vif *vif,
 				 sizeof(req), false);
 }
 
+int mt7921_mcu_sta_add(struct mt7921_dev *dev, struct ieee80211_sta *sta,
+		       struct ieee80211_vif *vif, bool enable)
+{
+	struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv;
+	int rssi = -ewma_rssi_read(&mvif->rssi);
+	struct mt76_sta_cmd_info info = {
+		.sta = sta,
+		.vif = vif,
+		.enable = enable,
+		.cmd = MCU_UNI_CMD_STA_REC_UPDATE,
+		.rcpi = to_rcpi(rssi),
+	};
+	struct mt7921_sta *msta;
+
+	msta = sta ? (struct mt7921_sta *)sta->drv_priv : NULL;
+	info.wcid = msta ? &msta->wcid : &mvif->sta.wcid;
+
+	return mt76_connac_mcu_add_sta_cmd(&dev->mphy, &info);
+}
+
 int mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev)
 {
 	struct mt76_phy *mphy = &dev->mt76.phy;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index aa3459723029..59862ea4951c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -257,6 +257,8 @@ int mt7921_mcu_add_key(struct mt7921_dev *dev, struct ieee80211_vif *vif,
 		       struct mt7921_sta *msta, struct ieee80211_key_conf *key,
 		       enum set_key_cmd cmd);
 int mt7921_set_channel(struct mt7921_phy *phy);
+int mt7921_mcu_sta_add(struct mt7921_dev *dev, struct ieee80211_sta *sta,
+		       struct ieee80211_vif *vif, bool enable);
 int mt7921_mcu_set_chan_info(struct mt7921_phy *phy, int cmd);
 int mt7921_mcu_set_tx(struct mt7921_dev *dev, struct ieee80211_vif *vif);
 int mt7921_mcu_set_eeprom(struct mt7921_dev *dev);
-- 
cgit v1.2.3


From fdc088a7f4b0fe5204995b9c236e338c200cc44c Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 15 Mar 2021 22:49:15 +0100
Subject: mt76: debugfs: introduce napi_threaded node

Introduce napi_threaded debugfs knob in order to enable/disable NAPI
threaded support

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/debugfs.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/debugfs.c b/drivers/net/wireless/mediatek/mt76/debugfs.c
index d4a6b8108971..fa48cc3a7a8f 100644
--- a/drivers/net/wireless/mediatek/mt76/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/debugfs.c
@@ -25,6 +25,32 @@ mt76_reg_get(void *data, u64 *val)
 DEFINE_DEBUGFS_ATTRIBUTE(fops_regval, mt76_reg_get, mt76_reg_set,
 			 "0x%08llx\n");
 
+static int
+mt76_napi_threaded_set(void *data, u64 val)
+{
+	struct mt76_dev *dev = data;
+
+	if (!mt76_is_mmio(dev))
+		return -EOPNOTSUPP;
+
+	if (dev->napi_dev.threaded != val)
+		return dev_set_threaded(&dev->napi_dev, val);
+
+	return 0;
+}
+
+static int
+mt76_napi_threaded_get(void *data, u64 *val)
+{
+	struct mt76_dev *dev = data;
+
+	*val = dev->napi_dev.threaded;
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_napi_threaded, mt76_napi_threaded_get,
+			 mt76_napi_threaded_set, "%llu\n");
+
 int mt76_queues_read(struct seq_file *s, void *data)
 {
 	struct mt76_dev *dev = dev_get_drvdata(s->private);
@@ -102,6 +128,8 @@ struct dentry *mt76_register_debugfs(struct mt76_dev *dev)
 	debugfs_create_u32("regidx", 0600, dir, &dev->debugfs_reg);
 	debugfs_create_file_unsafe("regval", 0600, dir, dev,
 				   &fops_regval);
+	debugfs_create_file_unsafe("napi_threaded", 0600, dir, dev,
+				   &fops_napi_threaded);
 	debugfs_create_blob("eeprom", 0400, dir, &dev->eeprom);
 	if (dev->otp.data)
 		debugfs_create_blob("otp", 0400, dir, &dev->otp);
-- 
cgit v1.2.3


From c8131dc32be24d4413e7ed534f53e8b0cc5d3c36 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 21 Apr 2021 16:14:40 +0300
Subject: mt76: mt7615: fix a precision vs width bug in printk

Precision "%.*s" was intended instead of width "%*s".  The original code
will print garbage from beyond the end of the skb->data.

Fixes: d76d6c3ba2b0 ("mt76: mt7615: limit firmware log message printk to buffer length")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 3c1528ed2110..aa42af9ebfd6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -453,7 +453,7 @@ mt7615_mcu_rx_log_message(struct mt7615_dev *dev, struct sk_buff *skb)
 		break;
 	}
 
-	wiphy_info(mt76_hw(dev)->wiphy, "%s: %*s", type,
+	wiphy_info(mt76_hw(dev)->wiphy, "%s: %.*s", type,
 		   (int)(skb->len - sizeof(*rxd)), data);
 }
 
-- 
cgit v1.2.3


From b2bcc6d2a5874b0265aeeb926618e2d265f96b50 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 21 Apr 2021 16:40:40 +0300
Subject: mt76: mt7915: fix a precision vs width bug in printk

Precision %.*s was intended instead of width %*s.  The original code
will still print unintended data from beyond the end of skb->data.

Fixes: 665b2c780d63 ("mt76: mt7915: limit firmware log message printk to buffer length")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 92e0c782bf40..b3f14ff67c5a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -521,7 +521,7 @@ mt7915_mcu_rx_log_message(struct mt7915_dev *dev, struct sk_buff *skb)
 		break;
 	}
 
-	wiphy_info(mt76_hw(dev)->wiphy, "%s: %*s", type,
+	wiphy_info(mt76_hw(dev)->wiphy, "%s: %.*s", type,
 		   (int)(skb->len - sizeof(*rxd)), data);
 }
 
-- 
cgit v1.2.3


From 2bf301bc81df81907ceabbfd7bf57743696899bb Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 21 Apr 2021 16:16:06 +0300
Subject: mt76: mt7921: fix a precision vs width bug in printk

Precision %.*s was intended instead of width %*s.  The original code
is potentially an information leak.

Fixes: c7cc5ec57303 ("mt76: mt7921: rework mt7921_mcu_debug_msg_event routine")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index 43e3bf895b60..78bd965d8009 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -502,7 +502,7 @@ mt7921_mcu_debug_msg_event(struct mt7921_dev *dev, struct sk_buff *skb)
 			if (!msg->content[i])
 				msg->content[i] = ' ';
 		}
-		wiphy_info(mt76_hw(dev)->wiphy, "%*s", len, msg->content);
+		wiphy_info(mt76_hw(dev)->wiphy, "%.*s", len, msg->content);
 	}
 }
 
-- 
cgit v1.2.3


From 51252cc56ec9aaac71445e849c75b40b17277d7e Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 21 Apr 2021 15:21:31 +0200
Subject: mt76: move mt76_token_init in mt76_alloc_device

In order to remove duplicated code, move mt76_token_init in
mt76_alloc_device routine

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mac80211.c        | 3 +++
 drivers/net/wireless/mediatek/mt76/mt76.h            | 1 -
 drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c | 2 --
 drivers/net/wireless/mediatek/mt76/mt7915/init.c     | 2 --
 drivers/net/wireless/mediatek/mt76/mt7921/init.c     | 1 -
 drivers/net/wireless/mediatek/mt76/tx.c              | 7 -------
 6 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
index 29ef15ec22fe..977acab0360a 100644
--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
+++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
@@ -428,6 +428,9 @@ mt76_alloc_device(struct device *pdev, unsigned int size,
 	mutex_init(&dev->mcu.mutex);
 	dev->tx_worker.fn = mt76_tx_worker;
 
+	spin_lock_init(&dev->token_lock);
+	idr_init(&dev->token);
+
 	INIT_LIST_HEAD(&dev->txwi_cache);
 
 	for (i = 0; i < ARRAY_SIZE(dev->q_rx); i++)
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 3b09ea8176a3..36ede65919f8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -1220,7 +1220,6 @@ s8 mt76_get_rate_power_limits(struct mt76_phy *phy,
 struct mt76_txwi_cache *
 mt76_token_release(struct mt76_dev *dev, int token, bool *wake);
 int mt76_token_consume(struct mt76_dev *dev, struct mt76_txwi_cache **ptxwi);
-void mt76_token_init(struct mt76_dev *dev);
 void __mt76_set_tx_blocked(struct mt76_dev *dev, bool blocked);
 
 static inline void mt76_set_tx_blocked(struct mt76_dev *dev, bool blocked)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
index 5d174aa7f88d..ec8ec1a2033f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
@@ -40,8 +40,6 @@ static int mt7615_init_hardware(struct mt7615_dev *dev)
 	mt76_wr(dev, MT_INT_SOURCE_CSR, ~0);
 
 	INIT_WORK(&dev->mcu_work, mt7615_pci_init_work);
-	mt76_token_init(&dev->mt76);
-
 	ret = mt7615_eeprom_init(dev, addr);
 	if (ret < 0)
 		return ret;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index c7e9808f228f..822f3aa6bb8b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -351,8 +351,6 @@ static int mt7915_init_hardware(struct mt7915_dev *dev)
 	mt76_wr(dev, MT_INT_SOURCE_CSR, ~0);
 
 	INIT_WORK(&dev->init_work, mt7915_init_work);
-	mt76_token_init(&dev->mt76);
-
 	dev->dbdc_support = !!(mt76_rr(dev, MT_HW_BOUND) & BIT(5));
 
 	/* If MCU was already running, it is likely in a bad state */
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index c32fea6378af..fe28bf4050c4 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -170,7 +170,6 @@ static int mt7921_init_hardware(struct mt7921_dev *dev)
 {
 	int ret, idx;
 
-	mt76_token_init(&dev->mt76);
 	ret = mt7921_dma_init(dev);
 	if (ret)
 		return ret;
diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c
index 29a56f304aae..53ea8de82df0 100644
--- a/drivers/net/wireless/mediatek/mt76/tx.c
+++ b/drivers/net/wireless/mediatek/mt76/tx.c
@@ -649,13 +649,6 @@ void mt76_queue_tx_complete(struct mt76_dev *dev, struct mt76_queue *q,
 }
 EXPORT_SYMBOL_GPL(mt76_queue_tx_complete);
 
-void mt76_token_init(struct mt76_dev *dev)
-{
-	spin_lock_init(&dev->token_lock);
-	idr_init(&dev->token);
-}
-EXPORT_SYMBOL_GPL(mt76_token_init);
-
 void __mt76_set_tx_blocked(struct mt76_dev *dev, bool blocked)
 {
 	struct mt76_phy *phy = &dev->phy, *phy2 = dev->phy2;
-- 
cgit v1.2.3


From 3df932141e4fa3a39f8e0839af9ee7bdedb1da0c Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 21 Apr 2021 15:37:21 +0200
Subject: mt76: mt7921: reinit wpdma during drv_own if necessary

Check dummy reg to reinitialized WPDMA during driver_own operation

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index 78bd965d8009..5f3d56d570a5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -1308,6 +1308,8 @@ int mt7921_mcu_drv_pmctrl(struct mt7921_dev *dev)
 		err = -EIO;
 		goto out;
 	}
+
+	mt7921_wpdma_reinit_cond(dev);
 	clear_bit(MT76_STATE_PM, &mphy->state);
 
 	pm->stats.last_wake_event = jiffies;
-- 
cgit v1.2.3


From 68f5c12abbc9b6f8c5eea16c62f8b7be70793163 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 21 Apr 2021 21:44:20 +0300
Subject: net: bridge: fix error in br_multicast_add_port when
 CONFIG_NET_SWITCHDEV=n

When CONFIG_NET_SWITCHDEV is disabled, the shim for switchdev_port_attr_set
inside br_mc_disabled_update returns -EOPNOTSUPP. This is not caught,
and propagated to the caller of br_multicast_add_port, preventing ports
from joining the bridge.

Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Fixes: ae1ea84b33da ("net: bridge: propagate error code and extack from br_mc_disabled_update")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 4daa95c913d0..2883601d5c8b 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1625,7 +1625,7 @@ int br_multicast_add_port(struct net_bridge_port *port)
 				    br_opt_get(port->br,
 					       BROPT_MULTICAST_ENABLED),
 				    NULL);
-	if (err)
+	if (err && err != -EOPNOTSUPP)
 		return err;
 
 	port->mcast_stats = netdev_alloc_pcpu_stats(struct bridge_mcast_stats);
-- 
cgit v1.2.3


From 83d686a6822322c4981b745dc1d7185f1f40811b Mon Sep 17 00:00:00 2001
From: jinyiting <jinyiting@huawei.com>
Date: Wed, 21 Apr 2021 16:38:21 +0800
Subject: bonding: 3ad: Fix the conflict between bond_update_slave_arr and the
 state machine
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The bond works in mode 4, and performs down/up operations on the bond
that is normally negotiated. The probability of bond-> slave_arr is NULL

Test commands:
   ifconfig bond1 down
   ifconfig bond1 up

The conflict occurs in the following process：

__dev_open (CPU A)
--bond_open
  --queue_delayed_work(bond->wq,&bond->ad_work,0);
  --bond_update_slave_arr
    --bond_3ad_get_active_agg_info

ad_work(CPU B)
--bond_3ad_state_machine_handler
  --ad_agg_selection_logic

ad_work runs on cpu B. In the function ad_agg_selection_logic, all
agg->is_active will be cleared. Before the new active aggregator is
selected on CPU B, bond_3ad_get_active_agg_info failed on CPU A,
bond->slave_arr will be set to NULL. The best aggregator in
ad_agg_selection_logic has not changed, no need to update slave arr.

The conflict occurred in that ad_agg_selection_logic clears
agg->is_active under mode_lock, but bond_open -> bond_update_slave_arr
is inspecting agg->is_active outside the lock.

Also, bond_update_slave_arr is normal for potential sleep when
allocating memory, so replace the WARN_ON with a call to might_sleep.

Signed-off-by: jinyiting <jinyiting@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 74cbbb22470b..83ef62db6285 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4391,9 +4391,7 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
 	int agg_id = 0;
 	int ret = 0;
 
-#ifdef CONFIG_LOCKDEP
-	WARN_ON(lockdep_is_held(&bond->mode_lock));
-#endif
+	might_sleep();
 
 	usable_slaves = kzalloc(struct_size(usable_slaves, arr,
 					    bond->slave_cnt), GFP_KERNEL);
@@ -4406,7 +4404,9 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
 	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
 		struct ad_info ad_info;
 
+		spin_lock_bh(&bond->mode_lock);
 		if (bond_3ad_get_active_agg_info(bond, &ad_info)) {
+			spin_unlock_bh(&bond->mode_lock);
 			pr_debug("bond_3ad_get_active_agg_info failed\n");
 			/* No active aggragator means it's not safe to use
 			 * the previous array.
@@ -4414,6 +4414,7 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
 			bond_reset_slave_arr(bond);
 			goto out;
 		}
+		spin_unlock_bh(&bond->mode_lock);
 		agg_id = ad_info.aggregator_id;
 	}
 	bond_for_each_slave(bond, slave, iter) {
-- 
cgit v1.2.3


From 5d869070569a23aa909c6e7e9d010fc438a492ef Mon Sep 17 00:00:00 2001
From: Marek Behún <kabel@kernel.org>
Date: Wed, 21 Apr 2021 16:08:03 +0200
Subject: net: phy: marvell: don't use empty switch default case
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This causes error reported by kernel test robot.

Signed-off-by: Marek Behún <kabel@kernel.org>
Fixes: 41d26bf4aba0 ("net: phy: marvell: refactor HWMON OOP style")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 1cce86b280af..e2b2b20c0dc5 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -2499,8 +2499,6 @@ static int marvell_hwmon_write(struct device *dev, enum hwmon_sensor_types type,
 		if (ops->set_temp_critical)
 			err = ops->set_temp_critical(phydev, temp);
 		break;
-	default:
-		fallthrough;
 	}
 
 	return err;
-- 
cgit v1.2.3


From eefb45eef5c4c425e87667af8f5e904fbdd47abf Mon Sep 17 00:00:00 2001
From: Chinmay Agarwal <chinagar@codeaurora.org>
Date: Thu, 22 Apr 2021 01:12:22 +0530
Subject: neighbour: Prevent Race condition in neighbour subsytem

Following Race Condition was detected:

<CPU A, t0>: Executing: __netif_receive_skb() ->__netif_receive_skb_core()
-> arp_rcv() -> arp_process().arp_process() calls __neigh_lookup() which
takes a reference on neighbour entry 'n'.
Moves further along, arp_process() and calls neigh_update()->
__neigh_update(). Neighbour entry is unlocked just before a call to
neigh_update_gc_list.

This unlocking paves way for another thread that may take a reference on
the same and mark it dead and remove it from gc_list.

<CPU B, t1> - neigh_flush_dev() is under execution and calls
neigh_mark_dead(n) marking the neighbour entry 'n' as dead. Also n will be
removed from gc_list.
Moves further along neigh_flush_dev() and calls
neigh_cleanup_and_release(n), but since reference count increased in t1,
'n' couldn't be destroyed.

<CPU A, t3>- Code hits neigh_update_gc_list, with neighbour entry
set as dead.

<CPU A, t4> - arp_process() finally calls neigh_release(n), destroying
the neighbour entry and we have a destroyed ntry still part of gc_list.

Fixes: eb4e8fac00d1("neighbour: Prevent a dead entry from updating gc_list")
Signed-off-by: Chinmay Agarwal <chinagar@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 8379719d1dce..98f20efbfadf 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -131,6 +131,9 @@ static void neigh_update_gc_list(struct neighbour *n)
 	write_lock_bh(&n->tbl->lock);
 	write_lock(&n->lock);
 
+	if (n->dead)
+		goto out;
+
 	/* remove from the gc list if new state is permanent or if neighbor
 	 * is externally learned; otherwise entry should be on the gc list
 	 */
@@ -147,6 +150,7 @@ static void neigh_update_gc_list(struct neighbour *n)
 		atomic_inc(&n->tbl->gc_entries);
 	}
 
+out:
 	write_unlock(&n->lock);
 	write_unlock_bh(&n->tbl->lock);
 }
-- 
cgit v1.2.3


From 7dd9a40fd6e0d0f1fd8e1931c007e080801dfdce Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Fri, 26 Mar 2021 19:08:19 +0100
Subject: ath9k: Fix error check in ath9k_hw_read_revisions() for PCI devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the error check in ath9k_hw_read_revisions() was added, it checked for
-EIO which is what ath9k_regread() in the ath9k_htc driver uses. However,
for plain ath9k, the register read function uses ioread32(), which just
returns -1 on error. So if such a read fails, it still gets passed through
and ends up as a weird mac revision in the log output.

Fix this by changing ath9k_regread() to return -1 on error like ioread32()
does, and fix the error check to look for that instead of -EIO.

Fixes: 2f90c7e5d094 ("ath9k: Check for errors when reading SREV register")
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Reviewed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210326180819.142480-1-toke@redhat.com
---
 drivers/net/wireless/ath/ath9k/htc_drv_init.c | 2 +-
 drivers/net/wireless/ath/ath9k/hw.c           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c
index db0c6fa9c9dc..ff61ae34ecdf 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c
@@ -246,7 +246,7 @@ static unsigned int ath9k_regread(void *hw_priv, u32 reg_offset)
 	if (unlikely(r)) {
 		ath_dbg(common, WMI, "REGISTER READ FAILED: (0x%04x, %d)\n",
 			reg_offset, r);
-		return -EIO;
+		return -1;
 	}
 
 	return be32_to_cpu(val);
diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c
index 5abc2a5526ec..2ca3b86714a9 100644
--- a/drivers/net/wireless/ath/ath9k/hw.c
+++ b/drivers/net/wireless/ath/ath9k/hw.c
@@ -286,7 +286,7 @@ static bool ath9k_hw_read_revisions(struct ath_hw *ah)
 
 	srev = REG_READ(ah, AR_SREV);
 
-	if (srev == -EIO) {
+	if (srev == -1) {
 		ath_err(ath9k_hw_common(ah),
 			"Failed to read SREV register");
 		return false;
-- 
cgit v1.2.3


From 8392df5d7e0b6a7d21440da1fc259f9938f4dec3 Mon Sep 17 00:00:00 2001
From: Lv Yunlong <lyl2019@mail.ustc.edu.cn>
Date: Mon, 29 Mar 2021 05:01:54 -0700
Subject: ath10k: Fix a use after free in ath10k_htc_send_bundle

In ath10k_htc_send_bundle, the bundle_skb could be freed by
dev_kfree_skb_any(bundle_skb). But the bundle_skb is used later
by bundle_skb->len.

As skb_len = bundle_skb->len, my patch replaces bundle_skb->len to
skb_len after the bundle_skb was freed.

Fixes: c8334512f3dd1 ("ath10k: add htt TX bundle for sdio")
Signed-off-by: Lv Yunlong <lyl2019@mail.ustc.edu.cn>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210329120154.8963-1-lyl2019@mail.ustc.edu.cn
---
 drivers/net/wireless/ath/ath10k/htc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/htc.c b/drivers/net/wireless/ath/ath10k/htc.c
index 0a37be6a7d33..fab398046a3f 100644
--- a/drivers/net/wireless/ath/ath10k/htc.c
+++ b/drivers/net/wireless/ath/ath10k/htc.c
@@ -669,7 +669,7 @@ static int ath10k_htc_send_bundle(struct ath10k_htc_ep *ep,
 
 	ath10k_dbg(ar, ATH10K_DBG_HTC,
 		   "bundle tx status %d eid %d req count %d count %d len %d\n",
-		   ret, ep->eid, skb_queue_len(&ep->tx_req_head), cn, bundle_skb->len);
+		   ret, ep->eid, skb_queue_len(&ep->tx_req_head), cn, skb_len);
 	return ret;
 }
 
-- 
cgit v1.2.3


From eaaf52e4b866f265eb791897d622961293fd48c1 Mon Sep 17 00:00:00 2001
From: Shuah Khan <skhan@linuxfoundation.org>
Date: Tue, 6 Apr 2021 17:02:28 -0600
Subject: ath10k: Fix ath10k_wmi_tlv_op_pull_peer_stats_info() unlock without
 lock

ath10k_wmi_tlv_op_pull_peer_stats_info() could try to unlock RCU lock
winthout locking it first when peer reason doesn't match the valid
cases for this function.

Add a default case to return without unlocking.

Fixes: 09078368d516 ("ath10k: hold RCU lock when calling ieee80211_find_sta_by_ifaddr()")
Reported-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210406230228.31301-1-skhan@linuxfoundation.org
---
 drivers/net/wireless/ath/ath10k/wmi-tlv.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
index d97b33f789e4..7efbe03fbca8 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c
+++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
@@ -592,6 +592,9 @@ static void ath10k_wmi_event_tdls_peer(struct ath10k *ar, struct sk_buff *skb)
 					GFP_ATOMIC
 					);
 		break;
+	default:
+		kfree(tb);
+		return;
 	}
 
 exit:
-- 
cgit v1.2.3


From 6dc89f070d2844900891b4efff0bf300ad8c07d2 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 16 Mar 2021 09:19:24 +0000
Subject: ath11k: qmi: Fix spelling mistake "requeqst" -> "request"

There is a spelling mistake in an ath11k_warn message. Fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210316091924.15627-1-colin.king@canonical.com
---
 drivers/net/wireless/ath/ath11k/qmi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath11k/qmi.c b/drivers/net/wireless/ath/ath11k/qmi.c
index a612e279ea5b..b5e34d670715 100644
--- a/drivers/net/wireless/ath/ath11k/qmi.c
+++ b/drivers/net/wireless/ath/ath11k/qmi.c
@@ -2514,7 +2514,7 @@ static int ath11k_qmi_event_load_bdf(struct ath11k_qmi *qmi)
 
 	ret = ath11k_qmi_request_target_cap(ab);
 	if (ret < 0) {
-		ath11k_warn(ab, "failed to requeqst qmi target capabilities: %d\n",
+		ath11k_warn(ab, "failed to request qmi target capabilities: %d\n",
 			    ret);
 		return ret;
 	}
-- 
cgit v1.2.3


From ff9f732a87caa5f7bab72bea3aaad58db9b1ac60 Mon Sep 17 00:00:00 2001
From: Anilkumar Kolli <akolli@codeaurora.org>
Date: Thu, 8 Apr 2021 10:27:10 +0530
Subject: ath11k: fix warning in ath11k_mhi_config

Initialize static variable ath11k_mhi_config for all hw_rev,
return error for unknown hw_rev.
This patch fixes below Smatch warning:
    drivers/net/wireless/ath/ath11k/mhi.c:357 ath11k_mhi_register()
    error: uninitialized symbol 'ath11k_mhi_config'.

Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.4.0.1-01734-QCAHKSWPL_SILICONZ-1

Fixes: a233811ef600 ("ath11k: Add qcn9074 mhi controller config")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Anilkumar Kolli <akolli@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1617857830-19315-1-git-send-email-akolli@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/mhi.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c
index 626764da4d6f..27b394d115e2 100644
--- a/drivers/net/wireless/ath/ath11k/mhi.c
+++ b/drivers/net/wireless/ath/ath11k/mhi.c
@@ -349,10 +349,19 @@ int ath11k_mhi_register(struct ath11k_pci *ab_pci)
 	mhi_ctrl->read_reg = ath11k_mhi_op_read_reg;
 	mhi_ctrl->write_reg = ath11k_mhi_op_write_reg;
 
-	if (ab->hw_rev == ATH11K_HW_QCA6390_HW20)
-		ath11k_mhi_config = &ath11k_mhi_config_qca6390;
-	else if (ab->hw_rev == ATH11K_HW_QCN9074_HW10)
+	switch (ab->hw_rev) {
+	case ATH11K_HW_QCN9074_HW10:
 		ath11k_mhi_config = &ath11k_mhi_config_qcn9074;
+		break;
+	case ATH11K_HW_QCA6390_HW20:
+		ath11k_mhi_config = &ath11k_mhi_config_qca6390;
+		break;
+	default:
+		ath11k_err(ab, "failed assign mhi_config for unknown hw rev %d\n",
+			   ab->hw_rev);
+		mhi_free_controller(mhi_ctrl);
+		return -EINVAL;
+	}
 
 	ret = mhi_register_controller(mhi_ctrl, ath11k_mhi_config);
 	if (ret) {
-- 
cgit v1.2.3


From 820aa37638a252b57967bdf4038a514b1ab85d45 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Wed, 14 Apr 2021 18:43:19 -0500
Subject: wl3501_cs: Fix out-of-bounds warnings in wl3501_send_pkt

Fix the following out-of-bounds warnings by enclosing structure members
daddr and saddr into new struct addr, in structures wl3501_md_req and
wl3501_md_ind:

arch/x86/include/asm/string_32.h:182:25: warning: '__builtin_memcpy' offset [18, 23] from the object at 'sig' is out of the bounds of referenced subobject 'daddr' with type 'u8[6]' {aka 'unsigned char[6]'} at offset 11 [-Warray-bounds]
arch/x86/include/asm/string_32.h:182:25: warning: '__builtin_memcpy' offset [18, 23] from the object at 'sig' is out of the bounds of referenced subobject 'daddr' with type 'u8[6]' {aka 'unsigned char[6]'} at offset 11 [-Warray-bounds]

Refactor the code, accordingly:

$ pahole -C wl3501_md_req drivers/net/wireless/wl3501_cs.o
struct wl3501_md_req {
	u16                        next_blk;             /*     0     2 */
	u8                         sig_id;               /*     2     1 */
	u8                         routing;              /*     3     1 */
	u16                        data;                 /*     4     2 */
	u16                        size;                 /*     6     2 */
	u8                         pri;                  /*     8     1 */
	u8                         service_class;        /*     9     1 */
	struct {
		u8                 daddr[6];             /*    10     6 */
		u8                 saddr[6];             /*    16     6 */
	} addr;                                          /*    10    12 */

	/* size: 22, cachelines: 1, members: 8 */
	/* last cacheline: 22 bytes */
};

$ pahole -C wl3501_md_ind drivers/net/wireless/wl3501_cs.o
struct wl3501_md_ind {
	u16                        next_blk;             /*     0     2 */
	u8                         sig_id;               /*     2     1 */
	u8                         routing;              /*     3     1 */
	u16                        data;                 /*     4     2 */
	u16                        size;                 /*     6     2 */
	u8                         reception;            /*     8     1 */
	u8                         pri;                  /*     9     1 */
	u8                         service_class;        /*    10     1 */
	struct {
		u8                 daddr[6];             /*    11     6 */
		u8                 saddr[6];             /*    17     6 */
	} addr;                                          /*    11    12 */

	/* size: 24, cachelines: 1, members: 9 */
	/* padding: 1 */
	/* last cacheline: 24 bytes */
};

The problem is that the original code is trying to copy data into a
couple of arrays adjacent to each other in a single call to memcpy().
Now that a new struct _addr_ enclosing those two adjacent arrays
is introduced, memcpy() doesn't overrun the length of &sig.daddr[0]
and &sig.daddr, because the address of the new struct object _addr_
is used, instead.

This helps with the ongoing efforts to globally enable -Warray-bounds
and get us closer to being able to tighten the FORTIFY_SOURCE routines
on memcpy().

Link: https://github.com/KSPP/linux/issues/109
Reported-by: kernel test robot <lkp@intel.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/d260fe56aed7112bff2be5b4d152d03ad7b78e78.1618442265.git.gustavoars@kernel.org
---
 drivers/net/wireless/wl3501.h    | 12 ++++++++----
 drivers/net/wireless/wl3501_cs.c | 10 ++++++----
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/wl3501.h b/drivers/net/wireless/wl3501.h
index 5779ffbe5d0f..8c5480bec104 100644
--- a/drivers/net/wireless/wl3501.h
+++ b/drivers/net/wireless/wl3501.h
@@ -471,8 +471,10 @@ struct wl3501_md_req {
 	u16	size;
 	u8	pri;
 	u8	service_class;
-	u8	daddr[ETH_ALEN];
-	u8	saddr[ETH_ALEN];
+	struct {
+		u8	daddr[ETH_ALEN];
+		u8	saddr[ETH_ALEN];
+	} addr;
 };
 
 struct wl3501_md_ind {
@@ -484,8 +486,10 @@ struct wl3501_md_ind {
 	u8	reception;
 	u8	pri;
 	u8	service_class;
-	u8	daddr[ETH_ALEN];
-	u8	saddr[ETH_ALEN];
+	struct {
+		u8	daddr[ETH_ALEN];
+		u8	saddr[ETH_ALEN];
+	} addr;
 };
 
 struct wl3501_md_confirm {
diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c
index 8ca5789c7b37..70307308635f 100644
--- a/drivers/net/wireless/wl3501_cs.c
+++ b/drivers/net/wireless/wl3501_cs.c
@@ -469,6 +469,7 @@ static int wl3501_send_pkt(struct wl3501_card *this, u8 *data, u16 len)
 	struct wl3501_md_req sig = {
 		.sig_id = WL3501_SIG_MD_REQ,
 	};
+	size_t sig_addr_len = sizeof(sig.addr);
 	u8 *pdata = (char *)data;
 	int rc = -EIO;
 
@@ -484,9 +485,9 @@ static int wl3501_send_pkt(struct wl3501_card *this, u8 *data, u16 len)
 			goto out;
 		}
 		rc = 0;
-		memcpy(&sig.daddr[0], pdata, 12);
-		pktlen = len - 12;
-		pdata += 12;
+		memcpy(&sig.addr, pdata, sig_addr_len);
+		pktlen = len - sig_addr_len;
+		pdata += sig_addr_len;
 		sig.data = bf;
 		if (((*pdata) * 256 + (*(pdata + 1))) > 1500) {
 			u8 addr4[ETH_ALEN] = {
@@ -980,7 +981,8 @@ static inline void wl3501_md_ind_interrupt(struct net_device *dev,
 	} else {
 		skb->dev = dev;
 		skb_reserve(skb, 2); /* IP headers on 16 bytes boundaries */
-		skb_copy_to_linear_data(skb, (unsigned char *)&sig.daddr, 12);
+		skb_copy_to_linear_data(skb, (unsigned char *)&sig.addr,
+					sizeof(sig.addr));
 		wl3501_receive(this, skb->data, pkt_len);
 		skb_put(skb, pkt_len);
 		skb->protocol	= eth_type_trans(skb, dev);
-- 
cgit v1.2.3


From bb43e5718d8f1b46e7a77e7b39be3c691f293050 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Wed, 14 Apr 2021 18:45:15 -0500
Subject: wl3501_cs: Fix out-of-bounds warnings in wl3501_mgmt_join

Fix the following out-of-bounds warnings by adding a new structure
wl3501_req instead of duplicating the same members in structure
wl3501_join_req and wl3501_scan_confirm:

arch/x86/include/asm/string_32.h:182:25: warning: '__builtin_memcpy' offset [39, 108] from the object at 'sig' is out of the bounds of referenced subobject 'beacon_period' with type 'short unsigned int' at offset 36 [-Warray-bounds]
arch/x86/include/asm/string_32.h:182:25: warning: '__builtin_memcpy' offset [25, 95] from the object at 'sig' is out of the bounds of referenced subobject 'beacon_period' with type 'short unsigned int' at offset 22 [-Warray-bounds]

Refactor the code, accordingly:

$ pahole -C wl3501_req drivers/net/wireless/wl3501_cs.o
struct wl3501_req {
        u16                        beacon_period;        /*     0     2 */
        u16                        dtim_period;          /*     2     2 */
        u16                        cap_info;             /*     4     2 */
        u8                         bss_type;             /*     6     1 */
        u8                         bssid[6];             /*     7     6 */
        struct iw_mgmt_essid_pset  ssid;                 /*    13    34 */
        struct iw_mgmt_ds_pset     ds_pset;              /*    47     3 */
        struct iw_mgmt_cf_pset     cf_pset;              /*    50     8 */
        struct iw_mgmt_ibss_pset   ibss_pset;            /*    58     4 */
        struct iw_mgmt_data_rset   bss_basic_rset;       /*    62    10 */

        /* size: 72, cachelines: 2, members: 10 */
        /* last cacheline: 8 bytes */
};

$ pahole -C wl3501_join_req drivers/net/wireless/wl3501_cs.o
struct wl3501_join_req {
        u16                        next_blk;             /*     0     2 */
        u8                         sig_id;               /*     2     1 */
        u8                         reserved;             /*     3     1 */
        struct iw_mgmt_data_rset   operational_rset;     /*     4    10 */
        u16                        reserved2;            /*    14     2 */
        u16                        timeout;              /*    16     2 */
        u16                        probe_delay;          /*    18     2 */
        u8                         timestamp[8];         /*    20     8 */
        u8                         local_time[8];        /*    28     8 */
        struct wl3501_req          req;                  /*    36    72 */

        /* size: 108, cachelines: 2, members: 10 */
        /* last cacheline: 44 bytes */
};

$ pahole -C wl3501_scan_confirm drivers/net/wireless/wl3501_cs.o
struct wl3501_scan_confirm {
        u16                        next_blk;             /*     0     2 */
        u8                         sig_id;               /*     2     1 */
        u8                         reserved;             /*     3     1 */
        u16                        status;               /*     4     2 */
        char                       timestamp[8];         /*     6     8 */
        char                       localtime[8];         /*    14     8 */
        struct wl3501_req          req;                  /*    22    72 */
        /* --- cacheline 1 boundary (64 bytes) was 30 bytes ago --- */
        u8                         rssi;                 /*    94     1 */

        /* size: 96, cachelines: 2, members: 8 */
        /* padding: 1 */
        /* last cacheline: 32 bytes */
};

The problem is that the original code is trying to copy data into a
bunch of struct members adjacent to each other in a single call to
memcpy(). Now that a new struct wl3501_req enclosing all those adjacent
members is introduced, memcpy() doesn't overrun the length of
&sig.beacon_period and &this->bss_set[i].beacon_period, because the
address of the new struct object _req_ is used as the destination,
instead.

This helps with the ongoing efforts to globally enable -Warray-bounds
and get us closer to being able to tighten the FORTIFY_SOURCE routines
on memcpy().

Link: https://github.com/KSPP/linux/issues/109
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1fbaf516da763b50edac47d792a9145aa4482e29.1618442265.git.gustavoars@kernel.org
---
 drivers/net/wireless/wl3501.h    | 35 ++++++++++++++------------------
 drivers/net/wireless/wl3501_cs.c | 44 +++++++++++++++++++++-------------------
 2 files changed, 38 insertions(+), 41 deletions(-)

diff --git a/drivers/net/wireless/wl3501.h b/drivers/net/wireless/wl3501.h
index 8c5480bec104..91f276dd22a1 100644
--- a/drivers/net/wireless/wl3501.h
+++ b/drivers/net/wireless/wl3501.h
@@ -379,16 +379,7 @@ struct wl3501_get_confirm {
 	u8	mib_value[100];
 };
 
-struct wl3501_join_req {
-	u16			    next_blk;
-	u8			    sig_id;
-	u8			    reserved;
-	struct iw_mgmt_data_rset    operational_rset;
-	u16			    reserved2;
-	u16			    timeout;
-	u16			    probe_delay;
-	u8			    timestamp[8];
-	u8			    local_time[8];
+struct wl3501_req {
 	u16			    beacon_period;
 	u16			    dtim_period;
 	u16			    cap_info;
@@ -401,6 +392,19 @@ struct wl3501_join_req {
 	struct iw_mgmt_data_rset    bss_basic_rset;
 };
 
+struct wl3501_join_req {
+	u16			    next_blk;
+	u8			    sig_id;
+	u8			    reserved;
+	struct iw_mgmt_data_rset    operational_rset;
+	u16			    reserved2;
+	u16			    timeout;
+	u16			    probe_delay;
+	u8			    timestamp[8];
+	u8			    local_time[8];
+	struct wl3501_req	    req;
+};
+
 struct wl3501_join_confirm {
 	u16	next_blk;
 	u8	sig_id;
@@ -443,16 +447,7 @@ struct wl3501_scan_confirm {
 	u16			    status;
 	char			    timestamp[8];
 	char			    localtime[8];
-	u16			    beacon_period;
-	u16			    dtim_period;
-	u16			    cap_info;
-	u8			    bss_type;
-	u8			    bssid[ETH_ALEN];
-	struct iw_mgmt_essid_pset   ssid;
-	struct iw_mgmt_ds_pset	    ds_pset;
-	struct iw_mgmt_cf_pset	    cf_pset;
-	struct iw_mgmt_ibss_pset    ibss_pset;
-	struct iw_mgmt_data_rset    bss_basic_rset;
+	struct wl3501_req	    req;
 	u8			    rssi;
 };
 
diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c
index 70307308635f..672f5d5f3f2c 100644
--- a/drivers/net/wireless/wl3501_cs.c
+++ b/drivers/net/wireless/wl3501_cs.c
@@ -590,7 +590,7 @@ static int wl3501_mgmt_join(struct wl3501_card *this, u16 stas)
 	struct wl3501_join_req sig = {
 		.sig_id		  = WL3501_SIG_JOIN_REQ,
 		.timeout	  = 10,
-		.ds_pset = {
+		.req.ds_pset = {
 			.el = {
 				.id  = IW_MGMT_INFO_ELEMENT_DS_PARAMETER_SET,
 				.len = 1,
@@ -599,7 +599,7 @@ static int wl3501_mgmt_join(struct wl3501_card *this, u16 stas)
 		},
 	};
 
-	memcpy(&sig.beacon_period, &this->bss_set[stas].beacon_period, 72);
+	memcpy(&sig.req, &this->bss_set[stas].req, sizeof(sig.req));
 	return wl3501_esbq_exec(this, &sig, sizeof(sig));
 }
 
@@ -667,35 +667,37 @@ static void wl3501_mgmt_scan_confirm(struct wl3501_card *this, u16 addr)
 	if (sig.status == WL3501_STATUS_SUCCESS) {
 		pr_debug("success");
 		if ((this->net_type == IW_MODE_INFRA &&
-		     (sig.cap_info & WL3501_MGMT_CAPABILITY_ESS)) ||
+		     (sig.req.cap_info & WL3501_MGMT_CAPABILITY_ESS)) ||
 		    (this->net_type == IW_MODE_ADHOC &&
-		     (sig.cap_info & WL3501_MGMT_CAPABILITY_IBSS)) ||
+		     (sig.req.cap_info & WL3501_MGMT_CAPABILITY_IBSS)) ||
 		    this->net_type == IW_MODE_AUTO) {
 			if (!this->essid.el.len)
 				matchflag = 1;
 			else if (this->essid.el.len == 3 &&
 				 !memcmp(this->essid.essid, "ANY", 3))
 				matchflag = 1;
-			else if (this->essid.el.len != sig.ssid.el.len)
+			else if (this->essid.el.len != sig.req.ssid.el.len)
 				matchflag = 0;
-			else if (memcmp(this->essid.essid, sig.ssid.essid,
+			else if (memcmp(this->essid.essid, sig.req.ssid.essid,
 					this->essid.el.len))
 				matchflag = 0;
 			else
 				matchflag = 1;
 			if (matchflag) {
 				for (i = 0; i < this->bss_cnt; i++) {
-					if (ether_addr_equal_unaligned(this->bss_set[i].bssid, sig.bssid)) {
+					if (ether_addr_equal_unaligned(this->bss_set[i].req.bssid,
+								       sig.req.bssid)) {
 						matchflag = 0;
 						break;
 					}
 				}
 			}
 			if (matchflag && (i < 20)) {
-				memcpy(&this->bss_set[i].beacon_period,
-				       &sig.beacon_period, 73);
+				memcpy(&this->bss_set[i].req,
+				       &sig.req, sizeof(sig.req));
 				this->bss_cnt++;
 				this->rssi = sig.rssi;
+				this->bss_set[i].rssi = sig.rssi;
 			}
 		}
 	} else if (sig.status == WL3501_STATUS_TIMEOUT) {
@@ -887,19 +889,19 @@ static void wl3501_mgmt_join_confirm(struct net_device *dev, u16 addr)
 			if (this->join_sta_bss < this->bss_cnt) {
 				const int i = this->join_sta_bss;
 				memcpy(this->bssid,
-				       this->bss_set[i].bssid, ETH_ALEN);
-				this->chan = this->bss_set[i].ds_pset.chan;
+				       this->bss_set[i].req.bssid, ETH_ALEN);
+				this->chan = this->bss_set[i].req.ds_pset.chan;
 				iw_copy_mgmt_info_element(&this->keep_essid.el,
-						     &this->bss_set[i].ssid.el);
+						     &this->bss_set[i].req.ssid.el);
 				wl3501_mgmt_auth(this);
 			}
 		} else {
 			const int i = this->join_sta_bss;
 
-			memcpy(&this->bssid, &this->bss_set[i].bssid, ETH_ALEN);
-			this->chan = this->bss_set[i].ds_pset.chan;
+			memcpy(&this->bssid, &this->bss_set[i].req.bssid, ETH_ALEN);
+			this->chan = this->bss_set[i].req.ds_pset.chan;
 			iw_copy_mgmt_info_element(&this->keep_essid.el,
-						  &this->bss_set[i].ssid.el);
+						  &this->bss_set[i].req.ssid.el);
 			wl3501_online(dev);
 		}
 	} else {
@@ -1573,30 +1575,30 @@ static int wl3501_get_scan(struct net_device *dev, struct iw_request_info *info,
 	for (i = 0; i < this->bss_cnt; ++i) {
 		iwe.cmd			= SIOCGIWAP;
 		iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
-		memcpy(iwe.u.ap_addr.sa_data, this->bss_set[i].bssid, ETH_ALEN);
+		memcpy(iwe.u.ap_addr.sa_data, this->bss_set[i].req.bssid, ETH_ALEN);
 		current_ev = iwe_stream_add_event(info, current_ev,
 						  extra + IW_SCAN_MAX_DATA,
 						  &iwe, IW_EV_ADDR_LEN);
 		iwe.cmd		  = SIOCGIWESSID;
 		iwe.u.data.flags  = 1;
-		iwe.u.data.length = this->bss_set[i].ssid.el.len;
+		iwe.u.data.length = this->bss_set[i].req.ssid.el.len;
 		current_ev = iwe_stream_add_point(info, current_ev,
 						  extra + IW_SCAN_MAX_DATA,
 						  &iwe,
-						  this->bss_set[i].ssid.essid);
+						  this->bss_set[i].req.ssid.essid);
 		iwe.cmd	   = SIOCGIWMODE;
-		iwe.u.mode = this->bss_set[i].bss_type;
+		iwe.u.mode = this->bss_set[i].req.bss_type;
 		current_ev = iwe_stream_add_event(info, current_ev,
 						  extra + IW_SCAN_MAX_DATA,
 						  &iwe, IW_EV_UINT_LEN);
 		iwe.cmd = SIOCGIWFREQ;
-		iwe.u.freq.m = this->bss_set[i].ds_pset.chan;
+		iwe.u.freq.m = this->bss_set[i].req.ds_pset.chan;
 		iwe.u.freq.e = 0;
 		current_ev = iwe_stream_add_event(info, current_ev,
 						  extra + IW_SCAN_MAX_DATA,
 						  &iwe, IW_EV_FREQ_LEN);
 		iwe.cmd = SIOCGIWENCODE;
-		if (this->bss_set[i].cap_info & WL3501_MGMT_CAPABILITY_PRIVACY)
+		if (this->bss_set[i].req.cap_info & WL3501_MGMT_CAPABILITY_PRIVACY)
 			iwe.u.data.flags = IW_ENCODE_ENABLED | IW_ENCODE_NOKEY;
 		else
 			iwe.u.data.flags = IW_ENCODE_DISABLED;
-- 
cgit v1.2.3


From a9a4c080deb33f44e08afe35f4ca4bb9ece89f4e Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 19 Apr 2021 15:14:05 +0100
Subject: wlcore: Fix buffer overrun by snprintf due to incorrect buffer size

The size of the buffer than can be written to is currently incorrect, it is
always the size of the entire buffer even though the snprintf is writing
as position pos into the buffer. Fix this by setting the buffer size to be
the number of bytes left in the buffer, namely sizeof(buf) - pos.

Addresses-Coverity: ("Out-of-bounds access")
Fixes: 7b0e2c4f6be3 ("wlcore: fix overlapping snprintf arguments in debugfs")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210419141405.180582-1-colin.king@canonical.com
---
 drivers/net/wireless/ti/wlcore/debugfs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ti/wlcore/debugfs.h b/drivers/net/wireless/ti/wlcore/debugfs.h
index 715edfa5f89f..a9e13e6d65c5 100644
--- a/drivers/net/wireless/ti/wlcore/debugfs.h
+++ b/drivers/net/wireless/ti/wlcore/debugfs.h
@@ -84,7 +84,7 @@ static ssize_t sub## _ ##name## _read(struct file *file,		\
 	wl1271_debugfs_update_stats(wl);				\
 									\
 	for (i = 0; i < len && pos < sizeof(buf); i++)			\
-		pos += snprintf(buf + pos, sizeof(buf),			\
+		pos += snprintf(buf + pos, sizeof(buf) - pos,		\
 			 "[%d] = %d\n", i, stats->sub.name[i]);		\
 									\
 	return wl1271_format_buffer(userbuf, count, ppos, "%s", buf);	\
-- 
cgit v1.2.3


From 130f634da1af649205f4a3dd86cbe5c126b57914 Mon Sep 17 00:00:00 2001
From: Lee Gibson <leegib@gmail.com>
Date: Mon, 19 Apr 2021 15:58:42 +0100
Subject: qtnfmac: Fix possible buffer overflow in
 qtnf_event_handle_external_auth

Function qtnf_event_handle_external_auth calls memcpy without
checking the length.
A user could control that length and trigger a buffer overflow.
Fix by checking the length is within the maximum allowed size.

Signed-off-by: Lee Gibson <leegib@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210419145842.345787-1-leegib@gmail.com
---
 drivers/net/wireless/quantenna/qtnfmac/event.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/quantenna/qtnfmac/event.c b/drivers/net/wireless/quantenna/qtnfmac/event.c
index c775c177933b..8dc80574d08d 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/event.c
+++ b/drivers/net/wireless/quantenna/qtnfmac/event.c
@@ -570,8 +570,10 @@ qtnf_event_handle_external_auth(struct qtnf_vif *vif,
 		return 0;
 
 	if (ev->ssid_len) {
-		memcpy(auth.ssid.ssid, ev->ssid, ev->ssid_len);
-		auth.ssid.ssid_len = ev->ssid_len;
+		int len = clamp_val(ev->ssid_len, 0, IEEE80211_MAX_SSID_LEN);
+
+		memcpy(auth.ssid.ssid, ev->ssid, len);
+		auth.ssid.ssid_len = len;
 	}
 
 	auth.key_mgmt_suite = le32_to_cpu(ev->akm_suite);
-- 
cgit v1.2.3


From 7a4fc7154e3275c5ce166d0ebd385b3def7a7ab3 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Mon, 19 Apr 2021 21:35:17 +0200
Subject: brcmfmac: Avoid GFP_ATOMIC when GFP_KERNEL is enough

A workqueue is not atomic, so constraints can be relaxed here.
GFP_KERNEL can be used instead of GFP_ATOMIC.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/b6e619415db4ee5de95389280d7195bb56e45f77.1618860716.git.christophe.jaillet@wanadoo.fr
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
index ea78fe527c5d..838b09b23abf 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
@@ -151,7 +151,7 @@ static void _brcmf_set_multicast_list(struct work_struct *work)
 	/* Send down the multicast list first. */
 	cnt = netdev_mc_count(ndev);
 	buflen = sizeof(cnt) + (cnt * ETH_ALEN);
-	buf = kmalloc(buflen, GFP_ATOMIC);
+	buf = kmalloc(buflen, GFP_KERNEL);
 	if (!buf)
 		return;
 	bufp = buf;
-- 
cgit v1.2.3


From 0891c89674e8d39eb47310e7c0646c2b07228fe7 Mon Sep 17 00:00:00 2001
From: Vignesh Sridhar <vignesh.sridhar@intel.com>
Date: Tue, 2 Mar 2021 10:12:00 -0800
Subject: ice: warn about potentially malicious VFs

Attempt to detect malicious VFs and, if suspected, log the information but
keep going to allow the user to take any desired actions.

Potentially malicious VFs are identified by checking if the VFs are
transmitting too many messages via the PF-VF mailbox which could cause an
overflow of this channel resulting in denial of service. This is done by
creating a snapshot or static capture of the mailbox buffer which can be
traversed and in which the messages sent by VFs are tracked.

Co-developed-by: Yashaswini Raghuram Prathivadi Bhayankaram <yashaswini.raghuram.prathivadi.bhayankaram@intel.com>
Signed-off-by: Yashaswini Raghuram Prathivadi Bhayankaram <yashaswini.raghuram.prathivadi.bhayankaram@intel.com>
Co-developed-by: Paul M Stillwell Jr <paul.m.stillwell.jr@intel.com>
Signed-off-by: Paul M Stillwell Jr <paul.m.stillwell.jr@intel.com>
Co-developed-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Vignesh Sridhar <vignesh.sridhar@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h             |   1 +
 drivers/net/ethernet/intel/ice/ice_main.c        |   7 +-
 drivers/net/ethernet/intel/ice/ice_sriov.c       | 400 ++++++++++++++++++++++-
 drivers/net/ethernet/intel/ice/ice_sriov.h       |  20 +-
 drivers/net/ethernet/intel/ice/ice_type.h        |  75 +++++
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c |  94 +++++-
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h |  12 +
 7 files changed, 605 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 7ae10fd87265..e35db3ff583b 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -426,6 +426,7 @@ struct ice_pf {
 	u16 num_msix_per_vf;
 	/* used to ratelimit the MDD event logging */
 	unsigned long last_printed_mdd_jiffies;
+	DECLARE_BITMAP(malvfs, ICE_MAX_VF_COUNT);
 	DECLARE_BITMAP(state, ICE_STATE_NBITS);
 	DECLARE_BITMAP(flags, ICE_PF_FLAGS_NBITS);
 	unsigned long *avail_txqs;	/* bitmap to track PF Tx queue usage */
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 6dbaa9099fdf..4ee85a217c6f 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -1193,6 +1193,10 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
 	case ICE_CTL_Q_MAILBOX:
 		cq = &hw->mailboxq;
 		qtype = "Mailbox";
+		/* we are going to try to detect a malicious VF, so set the
+		 * state to begin detection
+		 */
+		hw->mbx_snapshot.mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT;
 		break;
 	default:
 		dev_warn(dev, "Unknown control queue type 0x%x\n", q_type);
@@ -1274,7 +1278,8 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
 			ice_vf_lan_overflow_event(pf, &event);
 			break;
 		case ice_mbx_opc_send_msg_to_pf:
-			ice_vc_process_vf_msg(pf, &event);
+			if (!ice_is_malicious_vf(pf, &event, i, pending))
+				ice_vc_process_vf_msg(pf, &event);
 			break;
 		case ice_aqc_opc_fw_logging:
 			ice_output_fw_log(hw, &event.desc, event.msg_buf);
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c
index 554f567476f3..aa11d07793d4 100644
--- a/drivers/net/ethernet/intel/ice/ice_sriov.c
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.c
@@ -2,7 +2,6 @@
 /* Copyright (c) 2018, Intel Corporation. */
 
 #include "ice_common.h"
-#include "ice_adminq_cmd.h"
 #include "ice_sriov.h"
 
 /**
@@ -132,3 +131,402 @@ u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed)
 
 	return speed;
 }
+
+/* The mailbox overflow detection algorithm helps to check if there
+ * is a possibility of a malicious VF transmitting too many MBX messages to the
+ * PF.
+ * 1. The mailbox snapshot structure, ice_mbx_snapshot, is initialized during
+ * driver initialization in ice_init_hw() using ice_mbx_init_snapshot().
+ * The struct ice_mbx_snapshot helps to track and traverse a static window of
+ * messages within the mailbox queue while looking for a malicious VF.
+ *
+ * 2. When the caller starts processing its mailbox queue in response to an
+ * interrupt, the structure ice_mbx_snapshot is expected to be cleared before
+ * the algorithm can be run for the first time for that interrupt. This can be
+ * done via ice_mbx_reset_snapshot().
+ *
+ * 3. For every message read by the caller from the MBX Queue, the caller must
+ * call the detection algorithm's entry function ice_mbx_vf_state_handler().
+ * Before every call to ice_mbx_vf_state_handler() the struct ice_mbx_data is
+ * filled as it is required to be passed to the algorithm.
+ *
+ * 4. Every time a message is read from the MBX queue, a VFId is received which
+ * is passed to the state handler. The boolean output is_malvf of the state
+ * handler ice_mbx_vf_state_handler() serves as an indicator to the caller
+ * whether this VF is malicious or not.
+ *
+ * 5. When a VF is identified to be malicious, the caller can send a message
+ * to the system administrator. The caller can invoke ice_mbx_report_malvf()
+ * to help determine if a malicious VF is to be reported or not. This function
+ * requires the caller to maintain a global bitmap to track all malicious VFs
+ * and pass that to ice_mbx_report_malvf() along with the VFID which was identified
+ * to be malicious by ice_mbx_vf_state_handler().
+ *
+ * 6. The global bitmap maintained by PF can be cleared completely if PF is in
+ * reset or the bit corresponding to a VF can be cleared if that VF is in reset.
+ * When a VF is shut down and brought back up, we assume that the new VF
+ * brought up is not malicious and hence report it if found malicious.
+ *
+ * 7. The function ice_mbx_reset_snapshot() is called to reset the information
+ * in ice_mbx_snapshot for every new mailbox interrupt handled.
+ *
+ * 8. The memory allocated for variables in ice_mbx_snapshot is de-allocated
+ * when driver is unloaded.
+ */
+#define ICE_RQ_DATA_MASK(rq_data) ((rq_data) & PF_MBX_ARQH_ARQH_M)
+/* Using the highest value for an unsigned 16-bit value 0xFFFF to indicate that
+ * the max messages check must be ignored in the algorithm
+ */
+#define ICE_IGNORE_MAX_MSG_CNT	0xFFFF
+
+/**
+ * ice_mbx_traverse - Pass through mailbox snapshot
+ * @hw: pointer to the HW struct
+ * @new_state: new algorithm state
+ *
+ * Traversing the mailbox static snapshot without checking
+ * for malicious VFs.
+ */
+static void
+ice_mbx_traverse(struct ice_hw *hw,
+		 enum ice_mbx_snapshot_state *new_state)
+{
+	struct ice_mbx_snap_buffer_data *snap_buf;
+	u32 num_iterations;
+
+	snap_buf = &hw->mbx_snapshot.mbx_buf;
+
+	/* As mailbox buffer is circular, applying a mask
+	 * on the incremented iteration count.
+	 */
+	num_iterations = ICE_RQ_DATA_MASK(++snap_buf->num_iterations);
+
+	/* Checking either of the below conditions to exit snapshot traversal:
+	 * Condition-1: If the number of iterations in the mailbox is equal to
+	 * the mailbox head which would indicate that we have reached the end
+	 * of the static snapshot.
+	 * Condition-2: If the maximum messages serviced in the mailbox for a
+	 * given interrupt is the highest possible value then there is no need
+	 * to check if the number of messages processed is equal to it. If not
+	 * check if the number of messages processed is greater than or equal
+	 * to the maximum number of mailbox entries serviced in current work item.
+	 */
+	if (num_iterations == snap_buf->head ||
+	    (snap_buf->max_num_msgs_mbx < ICE_IGNORE_MAX_MSG_CNT &&
+	     ++snap_buf->num_msg_proc >= snap_buf->max_num_msgs_mbx))
+		*new_state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT;
+}
+
+/**
+ * ice_mbx_detect_malvf - Detect malicious VF in snapshot
+ * @hw: pointer to the HW struct
+ * @vf_id: relative virtual function ID
+ * @new_state: new algorithm state
+ * @is_malvf: boolean output to indicate if VF is malicious
+ *
+ * This function tracks the number of asynchronous messages
+ * sent per VF and marks the VF as malicious if it exceeds
+ * the permissible number of messages to send.
+ */
+static enum ice_status
+ice_mbx_detect_malvf(struct ice_hw *hw, u16 vf_id,
+		     enum ice_mbx_snapshot_state *new_state,
+		     bool *is_malvf)
+{
+	struct ice_mbx_snapshot *snap = &hw->mbx_snapshot;
+
+	if (vf_id >= snap->mbx_vf.vfcntr_len)
+		return ICE_ERR_OUT_OF_RANGE;
+
+	/* increment the message count in the VF array */
+	snap->mbx_vf.vf_cntr[vf_id]++;
+
+	if (snap->mbx_vf.vf_cntr[vf_id] >= ICE_ASYNC_VF_MSG_THRESHOLD)
+		*is_malvf = true;
+
+	/* continue to iterate through the mailbox snapshot */
+	ice_mbx_traverse(hw, new_state);
+
+	return 0;
+}
+
+/**
+ * ice_mbx_reset_snapshot - Reset mailbox snapshot structure
+ * @snap: pointer to mailbox snapshot structure in the ice_hw struct
+ *
+ * Reset the mailbox snapshot structure and clear VF counter array.
+ */
+static void ice_mbx_reset_snapshot(struct ice_mbx_snapshot *snap)
+{
+	u32 vfcntr_len;
+
+	if (!snap || !snap->mbx_vf.vf_cntr)
+		return;
+
+	/* Clear VF counters. */
+	vfcntr_len = snap->mbx_vf.vfcntr_len;
+	if (vfcntr_len)
+		memset(snap->mbx_vf.vf_cntr, 0,
+		       (vfcntr_len * sizeof(*snap->mbx_vf.vf_cntr)));
+
+	/* Reset mailbox snapshot for a new capture. */
+	memset(&snap->mbx_buf, 0, sizeof(snap->mbx_buf));
+	snap->mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT;
+}
+
+/**
+ * ice_mbx_vf_state_handler - Handle states of the overflow algorithm
+ * @hw: pointer to the HW struct
+ * @mbx_data: pointer to structure containing mailbox data
+ * @vf_id: relative virtual function (VF) ID
+ * @is_malvf: boolean output to indicate if VF is malicious
+ *
+ * The function serves as an entry point for the malicious VF
+ * detection algorithm by handling the different states and state
+ * transitions of the algorithm:
+ * New snapshot: This state is entered when creating a new static
+ * snapshot. The data from any previous mailbox snapshot is
+ * cleared and a new capture of the mailbox head and tail is
+ * logged. This will be the new static snapshot to detect
+ * asynchronous messages sent by VFs. On capturing the snapshot
+ * and depending on whether the number of pending messages in that
+ * snapshot exceed the watermark value, the state machine enters
+ * traverse or detect states.
+ * Traverse: If pending message count is below watermark then iterate
+ * through the snapshot without any action on VF.
+ * Detect: If pending message count exceeds watermark traverse
+ * the static snapshot and look for a malicious VF.
+ */
+enum ice_status
+ice_mbx_vf_state_handler(struct ice_hw *hw,
+			 struct ice_mbx_data *mbx_data, u16 vf_id,
+			 bool *is_malvf)
+{
+	struct ice_mbx_snapshot *snap = &hw->mbx_snapshot;
+	struct ice_mbx_snap_buffer_data *snap_buf;
+	struct ice_ctl_q_info *cq = &hw->mailboxq;
+	enum ice_mbx_snapshot_state new_state;
+	enum ice_status status = 0;
+
+	if (!is_malvf || !mbx_data)
+		return ICE_ERR_BAD_PTR;
+
+	/* When entering the mailbox state machine assume that the VF
+	 * is not malicious until detected.
+	 */
+	*is_malvf = false;
+
+	 /* Checking if max messages allowed to be processed while servicing current
+	  * interrupt is not less than the defined AVF message threshold.
+	  */
+	if (mbx_data->max_num_msgs_mbx <= ICE_ASYNC_VF_MSG_THRESHOLD)
+		return ICE_ERR_INVAL_SIZE;
+
+	/* The watermark value should not be lesser than the threshold limit
+	 * set for the number of asynchronous messages a VF can send to mailbox
+	 * nor should it be greater than the maximum number of messages in the
+	 * mailbox serviced in current interrupt.
+	 */
+	if (mbx_data->async_watermark_val < ICE_ASYNC_VF_MSG_THRESHOLD ||
+	    mbx_data->async_watermark_val > mbx_data->max_num_msgs_mbx)
+		return ICE_ERR_PARAM;
+
+	new_state = ICE_MAL_VF_DETECT_STATE_INVALID;
+	snap_buf = &snap->mbx_buf;
+
+	switch (snap_buf->state) {
+	case ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT:
+		/* Clear any previously held data in mailbox snapshot structure. */
+		ice_mbx_reset_snapshot(snap);
+
+		/* Collect the pending ARQ count, number of messages processed and
+		 * the maximum number of messages allowed to be processed from the
+		 * Mailbox for current interrupt.
+		 */
+		snap_buf->num_pending_arq = mbx_data->num_pending_arq;
+		snap_buf->num_msg_proc = mbx_data->num_msg_proc;
+		snap_buf->max_num_msgs_mbx = mbx_data->max_num_msgs_mbx;
+
+		/* Capture a new static snapshot of the mailbox by logging the
+		 * head and tail of snapshot and set num_iterations to the tail
+		 * value to mark the start of the iteration through the snapshot.
+		 */
+		snap_buf->head = ICE_RQ_DATA_MASK(cq->rq.next_to_clean +
+						  mbx_data->num_pending_arq);
+		snap_buf->tail = ICE_RQ_DATA_MASK(cq->rq.next_to_clean - 1);
+		snap_buf->num_iterations = snap_buf->tail;
+
+		/* Pending ARQ messages returned by ice_clean_rq_elem
+		 * is the difference between the head and tail of the
+		 * mailbox queue. Comparing this value against the watermark
+		 * helps to check if we potentially have malicious VFs.
+		 */
+		if (snap_buf->num_pending_arq >=
+		    mbx_data->async_watermark_val) {
+			new_state = ICE_MAL_VF_DETECT_STATE_DETECT;
+			status = ice_mbx_detect_malvf(hw, vf_id, &new_state, is_malvf);
+		} else {
+			new_state = ICE_MAL_VF_DETECT_STATE_TRAVERSE;
+			ice_mbx_traverse(hw, &new_state);
+		}
+		break;
+
+	case ICE_MAL_VF_DETECT_STATE_TRAVERSE:
+		new_state = ICE_MAL_VF_DETECT_STATE_TRAVERSE;
+		ice_mbx_traverse(hw, &new_state);
+		break;
+
+	case ICE_MAL_VF_DETECT_STATE_DETECT:
+		new_state = ICE_MAL_VF_DETECT_STATE_DETECT;
+		status = ice_mbx_detect_malvf(hw, vf_id, &new_state, is_malvf);
+		break;
+
+	default:
+		new_state = ICE_MAL_VF_DETECT_STATE_INVALID;
+		status = ICE_ERR_CFG;
+	}
+
+	snap_buf->state = new_state;
+
+	return status;
+}
+
+/**
+ * ice_mbx_report_malvf - Track and note malicious VF
+ * @hw: pointer to the HW struct
+ * @all_malvfs: all malicious VFs tracked by PF
+ * @bitmap_len: length of bitmap in bits
+ * @vf_id: relative virtual function ID of the malicious VF
+ * @report_malvf: boolean to indicate if malicious VF must be reported
+ *
+ * This function will update a bitmap that keeps track of the malicious
+ * VFs attached to the PF. A malicious VF must be reported only once if
+ * discovered between VF resets or loading so the function checks
+ * the input vf_id against the bitmap to verify if the VF has been
+ * detected in any previous mailbox iterations.
+ */
+enum ice_status
+ice_mbx_report_malvf(struct ice_hw *hw, unsigned long *all_malvfs,
+		     u16 bitmap_len, u16 vf_id, bool *report_malvf)
+{
+	if (!all_malvfs || !report_malvf)
+		return ICE_ERR_PARAM;
+
+	*report_malvf = false;
+
+	if (bitmap_len < hw->mbx_snapshot.mbx_vf.vfcntr_len)
+		return ICE_ERR_INVAL_SIZE;
+
+	if (vf_id >= bitmap_len)
+		return ICE_ERR_OUT_OF_RANGE;
+
+	/* If the vf_id is found in the bitmap set bit and boolean to true */
+	if (!test_and_set_bit(vf_id, all_malvfs))
+		*report_malvf = true;
+
+	return 0;
+}
+
+/**
+ * ice_mbx_clear_malvf - Clear VF bitmap and counter for VF ID
+ * @snap: pointer to the mailbox snapshot structure
+ * @all_malvfs: all malicious VFs tracked by PF
+ * @bitmap_len: length of bitmap in bits
+ * @vf_id: relative virtual function ID of the malicious VF
+ *
+ * In case of a VF reset, this function can be called to clear
+ * the bit corresponding to the VF ID in the bitmap tracking all
+ * malicious VFs attached to the PF. The function also clears the
+ * VF counter array at the index of the VF ID. This is to ensure
+ * that the new VF loaded is not considered malicious before going
+ * through the overflow detection algorithm.
+ */
+enum ice_status
+ice_mbx_clear_malvf(struct ice_mbx_snapshot *snap, unsigned long *all_malvfs,
+		    u16 bitmap_len, u16 vf_id)
+{
+	if (!snap || !all_malvfs)
+		return ICE_ERR_PARAM;
+
+	if (bitmap_len < snap->mbx_vf.vfcntr_len)
+		return ICE_ERR_INVAL_SIZE;
+
+	/* Ensure VF ID value is not larger than bitmap or VF counter length */
+	if (vf_id >= bitmap_len || vf_id >= snap->mbx_vf.vfcntr_len)
+		return ICE_ERR_OUT_OF_RANGE;
+
+	/* Clear VF ID bit in the bitmap tracking malicious VFs attached to PF */
+	clear_bit(vf_id, all_malvfs);
+
+	/* Clear the VF counter in the mailbox snapshot structure for that VF ID.
+	 * This is to ensure that if a VF is unloaded and a new one brought back
+	 * up with the same VF ID for a snapshot currently in traversal or detect
+	 * state the counter for that VF ID does not increment on top of existing
+	 * values in the mailbox overflow detection algorithm.
+	 */
+	snap->mbx_vf.vf_cntr[vf_id] = 0;
+
+	return 0;
+}
+
+/**
+ * ice_mbx_init_snapshot - Initialize mailbox snapshot structure
+ * @hw: pointer to the hardware structure
+ * @vf_count: number of VFs allocated on a PF
+ *
+ * Clear the mailbox snapshot structure and allocate memory
+ * for the VF counter array based on the number of VFs allocated
+ * on that PF.
+ *
+ * Assumption: This function will assume ice_get_caps() has already been
+ * called to ensure that the vf_count can be compared against the number
+ * of VFs supported as defined in the functional capabilities of the device.
+ */
+enum ice_status ice_mbx_init_snapshot(struct ice_hw *hw, u16 vf_count)
+{
+	struct ice_mbx_snapshot *snap = &hw->mbx_snapshot;
+
+	/* Ensure that the number of VFs allocated is non-zero and
+	 * is not greater than the number of supported VFs defined in
+	 * the functional capabilities of the PF.
+	 */
+	if (!vf_count || vf_count > hw->func_caps.num_allocd_vfs)
+		return ICE_ERR_INVAL_SIZE;
+
+	snap->mbx_vf.vf_cntr = devm_kcalloc(ice_hw_to_dev(hw), vf_count,
+					    sizeof(*snap->mbx_vf.vf_cntr),
+					    GFP_KERNEL);
+	if (!snap->mbx_vf.vf_cntr)
+		return ICE_ERR_NO_MEMORY;
+
+	/* Setting the VF counter length to the number of allocated
+	 * VFs for given PF's functional capabilities.
+	 */
+	snap->mbx_vf.vfcntr_len = vf_count;
+
+	/* Clear mbx_buf in the mailbox snaphot structure and setting the
+	 * mailbox snapshot state to a new capture.
+	 */
+	memset(&snap->mbx_buf, 0, sizeof(snap->mbx_buf));
+	snap->mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT;
+
+	return 0;
+}
+
+/**
+ * ice_mbx_deinit_snapshot - Free mailbox snapshot structure
+ * @hw: pointer to the hardware structure
+ *
+ * Clear the mailbox snapshot structure and free the VF counter array.
+ */
+void ice_mbx_deinit_snapshot(struct ice_hw *hw)
+{
+	struct ice_mbx_snapshot *snap = &hw->mbx_snapshot;
+
+	/* Free VF counter array and reset VF counter length */
+	devm_kfree(ice_hw_to_dev(hw), snap->mbx_vf.vf_cntr);
+	snap->mbx_vf.vfcntr_len = 0;
+
+	/* Clear mbx_buf in the mailbox snaphot structure */
+	memset(&snap->mbx_buf, 0, sizeof(snap->mbx_buf));
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.h b/drivers/net/ethernet/intel/ice/ice_sriov.h
index 3d78a0795138..161dc55d9e9c 100644
--- a/drivers/net/ethernet/intel/ice/ice_sriov.h
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.h
@@ -4,7 +4,14 @@
 #ifndef _ICE_SRIOV_H_
 #define _ICE_SRIOV_H_
 
-#include "ice_common.h"
+#include "ice_type.h"
+#include "ice_controlq.h"
+
+/* Defining the mailbox message threshold as 63 asynchronous
+ * pending messages. Normal VF functionality does not require
+ * sending more than 63 asynchronous pending message.
+ */
+#define ICE_ASYNC_VF_MSG_THRESHOLD	63
 
 #ifdef CONFIG_PCI_IOV
 enum ice_status
@@ -12,6 +19,17 @@ ice_aq_send_msg_to_vf(struct ice_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval,
 		      u8 *msg, u16 msglen, struct ice_sq_cd *cd);
 
 u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed);
+enum ice_status
+ice_mbx_vf_state_handler(struct ice_hw *hw, struct ice_mbx_data *mbx_data,
+			 u16 vf_id, bool *is_mal_vf);
+enum ice_status
+ice_mbx_clear_malvf(struct ice_mbx_snapshot *snap, unsigned long *all_malvfs,
+		    u16 bitmap_len, u16 vf_id);
+enum ice_status ice_mbx_init_snapshot(struct ice_hw *hw, u16 vf_count);
+void ice_mbx_deinit_snapshot(struct ice_hw *hw);
+enum ice_status
+ice_mbx_report_malvf(struct ice_hw *hw, unsigned long *all_malvfs,
+		     u16 bitmap_len, u16 vf_id, bool *report_malvf);
 #else /* CONFIG_PCI_IOV */
 static inline enum ice_status
 ice_aq_send_msg_to_vf(struct ice_hw __always_unused *hw,
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 9b80962ff92f..4474dd6a7ba1 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -630,6 +630,80 @@ struct ice_fw_log_cfg {
 	struct ice_fw_log_evnt evnts[ICE_AQC_FW_LOG_ID_MAX];
 };
 
+/* Enum defining the different states of the mailbox snapshot in the
+ * PF-VF mailbox overflow detection algorithm. The snapshot can be in
+ * states:
+ * 1. ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT - generate a new static snapshot
+ * within the mailbox buffer.
+ * 2. ICE_MAL_VF_DETECT_STATE_TRAVERSE - iterate through the mailbox snaphot
+ * 3. ICE_MAL_VF_DETECT_STATE_DETECT - track the messages sent per VF via the
+ * mailbox and mark any VFs sending more messages than the threshold limit set.
+ * 4. ICE_MAL_VF_DETECT_STATE_INVALID - Invalid mailbox state set to 0xFFFFFFFF.
+ */
+enum ice_mbx_snapshot_state {
+	ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT = 0,
+	ICE_MAL_VF_DETECT_STATE_TRAVERSE,
+	ICE_MAL_VF_DETECT_STATE_DETECT,
+	ICE_MAL_VF_DETECT_STATE_INVALID = 0xFFFFFFFF,
+};
+
+/* Structure to hold information of the static snapshot and the mailbox
+ * buffer data used to generate and track the snapshot.
+ * 1. state: the state of the mailbox snapshot in the malicious VF
+ * detection state handler ice_mbx_vf_state_handler()
+ * 2. head: head of the mailbox snapshot in a circular mailbox buffer
+ * 3. tail: tail of the mailbox snapshot in a circular mailbox buffer
+ * 4. num_iterations: number of messages traversed in circular mailbox buffer
+ * 5. num_msg_proc: number of messages processed in mailbox
+ * 6. num_pending_arq: number of pending asynchronous messages
+ * 7. max_num_msgs_mbx: maximum messages in mailbox for currently
+ * serviced work item or interrupt.
+ */
+struct ice_mbx_snap_buffer_data {
+	enum ice_mbx_snapshot_state state;
+	u32 head;
+	u32 tail;
+	u32 num_iterations;
+	u16 num_msg_proc;
+	u16 num_pending_arq;
+	u16 max_num_msgs_mbx;
+};
+
+/* Structure to track messages sent by VFs on mailbox:
+ * 1. vf_cntr: a counter array of VFs to track the number of
+ * asynchronous messages sent by each VF
+ * 2. vfcntr_len: number of entries in VF counter array
+ */
+struct ice_mbx_vf_counter {
+	u32 *vf_cntr;
+	u32 vfcntr_len;
+};
+
+/* Structure to hold data relevant to the captured static snapshot
+ * of the PF-VF mailbox.
+ */
+struct ice_mbx_snapshot {
+	struct ice_mbx_snap_buffer_data mbx_buf;
+	struct ice_mbx_vf_counter mbx_vf;
+};
+
+/* Structure to hold data to be used for capturing or updating a
+ * static snapshot.
+ * 1. num_msg_proc: number of messages processed in mailbox
+ * 2. num_pending_arq: number of pending asynchronous messages
+ * 3. max_num_msgs_mbx: maximum messages in mailbox for currently
+ * serviced work item or interrupt.
+ * 4. async_watermark_val: An upper threshold set by caller to determine
+ * if the pending arq count is large enough to assume that there is
+ * the possibility of a mailicious VF.
+ */
+struct ice_mbx_data {
+	u16 num_msg_proc;
+	u16 num_pending_arq;
+	u16 max_num_msgs_mbx;
+	u16 async_watermark_val;
+};
+
 /* Port hardware description */
 struct ice_hw {
 	u8 __iomem *hw_addr;
@@ -761,6 +835,7 @@ struct ice_hw {
 	DECLARE_BITMAP(fdir_perfect_fltr, ICE_FLTR_PTYPE_MAX);
 	struct mutex rss_locks;	/* protect RSS configuration */
 	struct list_head rss_list_head;
+	struct ice_mbx_snapshot mbx_snapshot;
 };
 
 /* Statistics collected by each port, VSI, VEB, and S-channel */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index e38d4adc5b8d..a3ed4b84bba6 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -424,6 +424,14 @@ void ice_free_vfs(struct ice_pf *pf)
 			wr32(hw, GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx));
 		}
 	}
+
+	/* clear malicious info if the VFs are getting released */
+	for (i = 0; i < tmp; i++)
+		if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->malvfs,
+					ICE_MAX_VF_COUNT, i))
+			dev_dbg(dev, "failed to clear malicious VF state for VF %u\n",
+				i);
+
 	clear_bit(ICE_VF_DIS, pf->state);
 	clear_bit(ICE_FLAG_SRIOV_ENA, pf->flags);
 }
@@ -1257,6 +1265,11 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
 	if (!pf->num_alloc_vfs)
 		return false;
 
+	/* clear all malicious info if the VFs are getting reset */
+	ice_for_each_vf(pf, i)
+		if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->malvfs, ICE_MAX_VF_COUNT, i))
+			dev_dbg(dev, "failed to clear malicious VF state for VF %u\n", i);
+
 	/* If VFs have been disabled, there is no need to reset */
 	if (test_and_set_bit(ICE_VF_DIS, pf->state))
 		return false;
@@ -1437,6 +1450,10 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
 	ice_vf_rebuild_vsi_with_release(vf);
 	ice_vf_post_vsi_rebuild(vf);
 
+	/* if the VF has been reset allow it to come up again */
+	if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->malvfs, ICE_MAX_VF_COUNT, vf->vf_id))
+		dev_dbg(dev, "failed to clear malicious VF state for VF %u\n", i);
+
 	return true;
 }
 
@@ -1769,6 +1786,7 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs)
 {
 	struct ice_pf *pf = pci_get_drvdata(pdev);
 	struct device *dev = ice_pf_to_dev(pf);
+	enum ice_status status;
 	int err;
 
 	err = ice_check_sriov_allowed(pf);
@@ -1777,6 +1795,7 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs)
 
 	if (!num_vfs) {
 		if (!pci_vfs_assigned(pdev)) {
+			ice_mbx_deinit_snapshot(&pf->hw);
 			ice_free_vfs(pf);
 			if (pf->lag)
 				ice_enable_lag(pf->lag);
@@ -1787,9 +1806,15 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs)
 		return -EBUSY;
 	}
 
+	status = ice_mbx_init_snapshot(&pf->hw, num_vfs);
+	if (status)
+		return ice_status_to_errno(status);
+
 	err = ice_pci_sriov_ena(pf, num_vfs);
-	if (err)
+	if (err) {
+		ice_mbx_deinit_snapshot(&pf->hw);
 		return err;
+	}
 
 	if (pf->lag)
 		ice_disable_lag(pf->lag);
@@ -4255,3 +4280,70 @@ void ice_restore_all_vfs_msi_state(struct pci_dev *pdev)
 		}
 	}
 }
+
+/**
+ * ice_is_malicious_vf - helper function to detect a malicious VF
+ * @pf: ptr to struct ice_pf
+ * @event: pointer to the AQ event
+ * @num_msg_proc: the number of messages processed so far
+ * @num_msg_pending: the number of messages peinding in admin queue
+ */
+bool
+ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event,
+		    u16 num_msg_proc, u16 num_msg_pending)
+{
+	s16 vf_id = le16_to_cpu(event->desc.retval);
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_mbx_data mbxdata;
+	enum ice_status status;
+	bool malvf = false;
+	struct ice_vf *vf;
+
+	if (ice_validate_vf_id(pf, vf_id))
+		return false;
+
+	vf = &pf->vf[vf_id];
+	/* Check if VF is disabled. */
+	if (test_bit(ICE_VF_STATE_DIS, vf->vf_states))
+		return false;
+
+	mbxdata.num_msg_proc = num_msg_proc;
+	mbxdata.num_pending_arq = num_msg_pending;
+	mbxdata.max_num_msgs_mbx = pf->hw.mailboxq.num_rq_entries;
+#define ICE_MBX_OVERFLOW_WATERMARK 64
+	mbxdata.async_watermark_val = ICE_MBX_OVERFLOW_WATERMARK;
+
+	/* check to see if we have a malicious VF */
+	status = ice_mbx_vf_state_handler(&pf->hw, &mbxdata, vf_id, &malvf);
+	if (status)
+		return false;
+
+	if (malvf) {
+		bool report_vf = false;
+
+		/* if the VF is malicious and we haven't let the user
+		 * know about it, then let them know now
+		 */
+		status = ice_mbx_report_malvf(&pf->hw, pf->malvfs,
+					      ICE_MAX_VF_COUNT, vf_id,
+					      &report_vf);
+		if (status)
+			dev_dbg(dev, "Error reporting malicious VF\n");
+
+		if (report_vf) {
+			struct ice_vsi *pf_vsi = ice_get_main_vsi(pf);
+
+			if (pf_vsi)
+				dev_warn(dev, "VF MAC %pM on PF MAC %pM is generating asynchronous messages and may be overflowing the PF message queue. Please see the Adapter User Guide for more information\n",
+					 &vf->dflt_lan_addr.addr[0],
+					 pf_vsi->netdev->dev_addr);
+		}
+
+		return true;
+	}
+
+	/* if there was an error in detection or the VF is not malicious then
+	 * return false
+	 */
+	return false;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index 46abc5388fc7..bcc2890c930a 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -119,6 +119,9 @@ void ice_vc_notify_reset(struct ice_pf *pf);
 bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr);
 bool ice_reset_vf(struct ice_vf *vf, bool is_vflr);
 void ice_restore_all_vfs_msi_state(struct pci_dev *pdev);
+bool
+ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event,
+		    u16 num_msg_proc, u16 num_msg_pending);
 
 int
 ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
@@ -158,6 +161,15 @@ bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id);
 #define ice_print_vf_rx_mdd_event(vf) do {} while (0)
 #define ice_restore_all_vfs_msi_state(pdev) do {} while (0)
 
+static inline bool
+ice_is_malicious_vf(struct ice_pf __always_unused *pf,
+		    struct ice_rq_event_info __always_unused *event,
+		    u16 __always_unused num_msg_proc,
+		    u16 __always_unused num_msg_pending)
+{
+	return false;
+}
+
 static inline bool
 ice_reset_all_vfs(struct ice_pf __always_unused *pf,
 		  bool __always_unused is_vflr)
-- 
cgit v1.2.3


From c0dcaa55f91d925c9ac2c950ff84138534337a6c Mon Sep 17 00:00:00 2001
From: Michal Swiatkowski <michal.swiatkowski@intel.com>
Date: Tue, 2 Mar 2021 10:12:01 -0800
Subject: ice: Allow ignoring opcodes on specific VF

Declare bitmap of allowed commands on VF. Initialize default
opcodes list that should be always supported. Declare array of
supported opcodes for each caps used in virtchnl code.

Change allowed bitmap by setting or clearing corresponding
bit to allowlist (bit set) or denylist (bit clear).

Signed-off-by: Michal Swiatkowski <michal.swiatkowski@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/Makefile            |   1 +
 .../ethernet/intel/ice/ice_virtchnl_allowlist.c    | 165 +++++++++++++++++++++
 .../ethernet/intel/ice/ice_virtchnl_allowlist.h    |  13 ++
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c   |  18 +++
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h   |   1 +
 include/linux/avf/virtchnl.h                       |   1 +
 6 files changed, 199 insertions(+)
 create mode 100644 drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c
 create mode 100644 drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.h

diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index f391691e2c7e..07fe857e9e3a 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -26,6 +26,7 @@ ice-y := ice_main.o	\
 	 ice_fw_update.o \
 	 ice_lag.o	\
 	 ice_ethtool.o
+ice-$(CONFIG_PCI_IOV) += ice_virtchnl_allowlist.o
 ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o ice_virtchnl_fdir.o
 ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o
 ice-$(CONFIG_RFS_ACCEL) += ice_arfs.o
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c
new file mode 100644
index 000000000000..5a0fbb47346f
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021, Intel Corporation. */
+
+#include "ice_virtchnl_allowlist.h"
+
+/* Purpose of this file is to share functionality to allowlist or denylist
+ * opcodes used in PF <-> VF communication. Group of opcodes:
+ * - default -> should be always allowed after creating VF,
+ *   default_allowlist_opcodes
+ * - opcodes needed by VF to work correctly, but not associated with caps ->
+ *   should be allowed after successful VF resources allocation,
+ *   working_allowlist_opcodes
+ * - opcodes needed by VF when caps are activated
+ *
+ * Caps that don't use new opcodes (no opcodes should be allowed):
+ * - VIRTCHNL_VF_OFFLOAD_RSS_AQ
+ * - VIRTCHNL_VF_OFFLOAD_RSS_REG
+ * - VIRTCHNL_VF_OFFLOAD_WB_ON_ITR
+ * - VIRTCHNL_VF_OFFLOAD_CRC
+ * - VIRTCHNL_VF_OFFLOAD_RX_POLLING
+ * - VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2
+ * - VIRTCHNL_VF_OFFLOAD_ENCAP
+ * - VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM
+ * - VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM
+ * - VIRTCHNL_VF_OFFLOAD_USO
+ */
+
+/* default opcodes to communicate with VF */
+static const u32 default_allowlist_opcodes[] = {
+	VIRTCHNL_OP_GET_VF_RESOURCES, VIRTCHNL_OP_VERSION, VIRTCHNL_OP_RESET_VF,
+};
+
+/* opcodes supported after successful VIRTCHNL_OP_GET_VF_RESOURCES */
+static const u32 working_allowlist_opcodes[] = {
+	VIRTCHNL_OP_CONFIG_TX_QUEUE, VIRTCHNL_OP_CONFIG_RX_QUEUE,
+	VIRTCHNL_OP_CONFIG_VSI_QUEUES, VIRTCHNL_OP_CONFIG_IRQ_MAP,
+	VIRTCHNL_OP_ENABLE_QUEUES, VIRTCHNL_OP_DISABLE_QUEUES,
+	VIRTCHNL_OP_GET_STATS, VIRTCHNL_OP_EVENT,
+};
+
+/* VIRTCHNL_VF_OFFLOAD_L2 */
+static const u32 l2_allowlist_opcodes[] = {
+	VIRTCHNL_OP_ADD_ETH_ADDR, VIRTCHNL_OP_DEL_ETH_ADDR,
+	VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
+};
+
+/* VIRTCHNL_VF_OFFLOAD_REQ_QUEUES */
+static const u32 req_queues_allowlist_opcodes[] = {
+	VIRTCHNL_OP_REQUEST_QUEUES,
+};
+
+/* VIRTCHNL_VF_OFFLOAD_VLAN */
+static const u32 vlan_allowlist_opcodes[] = {
+	VIRTCHNL_OP_ADD_VLAN, VIRTCHNL_OP_DEL_VLAN,
+	VIRTCHNL_OP_ENABLE_VLAN_STRIPPING, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING,
+};
+
+/* VIRTCHNL_VF_OFFLOAD_RSS_PF */
+static const u32 rss_pf_allowlist_opcodes[] = {
+	VIRTCHNL_OP_CONFIG_RSS_KEY, VIRTCHNL_OP_CONFIG_RSS_LUT,
+	VIRTCHNL_OP_GET_RSS_HENA_CAPS, VIRTCHNL_OP_SET_RSS_HENA,
+};
+
+/* VIRTCHNL_VF_OFFLOAD_FDIR_PF */
+static const u32 fdir_pf_allowlist_opcodes[] = {
+	VIRTCHNL_OP_ADD_FDIR_FILTER, VIRTCHNL_OP_DEL_FDIR_FILTER,
+};
+
+struct allowlist_opcode_info {
+	const u32 *opcodes;
+	size_t size;
+};
+
+#define BIT_INDEX(caps) (HWEIGHT((caps) - 1))
+#define ALLOW_ITEM(caps, list) \
+	[BIT_INDEX(caps)] = { \
+		.opcodes = list, \
+		.size = ARRAY_SIZE(list) \
+	}
+static const struct allowlist_opcode_info allowlist_opcodes[] = {
+	ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_L2, l2_allowlist_opcodes),
+	ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_REQ_QUEUES, req_queues_allowlist_opcodes),
+	ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_VLAN, vlan_allowlist_opcodes),
+	ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_RSS_PF, rss_pf_allowlist_opcodes),
+	ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_FDIR_PF, fdir_pf_allowlist_opcodes),
+};
+
+/**
+ * ice_vc_is_opcode_allowed - check if this opcode is allowed on this VF
+ * @vf: pointer to VF structure
+ * @opcode: virtchnl opcode
+ *
+ * Return true if message is allowed on this VF
+ */
+bool ice_vc_is_opcode_allowed(struct ice_vf *vf, u32 opcode)
+{
+	if (opcode >= VIRTCHNL_OP_MAX)
+		return false;
+
+	return test_bit(opcode, vf->opcodes_allowlist);
+}
+
+/**
+ * ice_vc_allowlist_opcodes - allowlist selected opcodes
+ * @vf: pointer to VF structure
+ * @opcodes: array of opocodes to allowlist
+ * @size: size of opcodes array
+ *
+ * Function should be called to allowlist opcodes on VF.
+ */
+static void
+ice_vc_allowlist_opcodes(struct ice_vf *vf, const u32 *opcodes, size_t size)
+{
+	unsigned int i;
+
+	for (i = 0; i < size; i++)
+		set_bit(opcodes[i], vf->opcodes_allowlist);
+}
+
+/**
+ * ice_vc_clear_allowlist - clear all allowlist opcodes
+ * @vf: pointer to VF structure
+ */
+static void ice_vc_clear_allowlist(struct ice_vf *vf)
+{
+	bitmap_zero(vf->opcodes_allowlist, VIRTCHNL_OP_MAX);
+}
+
+/**
+ * ice_vc_set_default_allowlist - allowlist default opcodes for VF
+ * @vf: pointer to VF structure
+ */
+void ice_vc_set_default_allowlist(struct ice_vf *vf)
+{
+	ice_vc_clear_allowlist(vf);
+	ice_vc_allowlist_opcodes(vf, default_allowlist_opcodes,
+				 ARRAY_SIZE(default_allowlist_opcodes));
+}
+
+/**
+ * ice_vc_set_working_allowlist - allowlist opcodes needed to by VF to work
+ * @vf: pointer to VF structure
+ *
+ * allowlist opcodes that aren't associated with specific caps, but
+ * are needed by VF to work.
+ */
+void ice_vc_set_working_allowlist(struct ice_vf *vf)
+{
+	ice_vc_allowlist_opcodes(vf, working_allowlist_opcodes,
+				 ARRAY_SIZE(working_allowlist_opcodes));
+}
+
+/**
+ * ice_vc_set_caps_allowlist - allowlist VF opcodes according caps
+ * @vf: pointer to VF structure
+ */
+void ice_vc_set_caps_allowlist(struct ice_vf *vf)
+{
+	unsigned long caps = vf->driver_caps;
+	unsigned int i;
+
+	for_each_set_bit(i, &caps, ARRAY_SIZE(allowlist_opcodes))
+		ice_vc_allowlist_opcodes(vf, allowlist_opcodes[i].opcodes,
+					 allowlist_opcodes[i].size);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.h
new file mode 100644
index 000000000000..d3ae86ded219
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021, Intel Corporation. */
+
+#ifndef _ICE_VIRTCHNL_ALLOWLIST_H_
+#define _ICE_VIRTCHNL_ALLOWLIST_H_
+#include "ice.h"
+
+bool ice_vc_is_opcode_allowed(struct ice_vf *vf, u32 opcode);
+
+void ice_vc_set_default_allowlist(struct ice_vf *vf);
+void ice_vc_set_working_allowlist(struct ice_vf *vf);
+void ice_vc_set_caps_allowlist(struct ice_vf *vf);
+#endif /* _ICE_VIRTCHNL_ALLOWLIST_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index a3ed4b84bba6..ccd6b3e8a5a9 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -5,6 +5,7 @@
 #include "ice_base.h"
 #include "ice_lib.h"
 #include "ice_fltr.h"
+#include "ice_virtchnl_allowlist.h"
 
 /**
  * ice_validate_vf_id - helper to check if VF ID is valid
@@ -1314,6 +1315,9 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
 	ice_for_each_vf(pf, v) {
 		vf = &pf->vf[v];
 
+		vf->driver_caps = 0;
+		ice_vc_set_default_allowlist(vf);
+
 		ice_vf_fdir_exit(vf);
 		/* clean VF control VSI when resetting VFs since it should be
 		 * setup only when VF creates its first FDIR rule.
@@ -1418,6 +1422,9 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
 		usleep_range(10, 20);
 	}
 
+	vf->driver_caps = 0;
+	ice_vc_set_default_allowlist(vf);
+
 	/* Display a warning if VF didn't manage to reset in time, but need to
 	 * continue on with the operation.
 	 */
@@ -1625,6 +1632,7 @@ static void ice_set_dflt_settings_vfs(struct ice_pf *pf)
 		set_bit(ICE_VIRTCHNL_VF_CAP_L2, &vf->vf_caps);
 		vf->spoofchk = true;
 		vf->num_vf_qs = pf->num_qps_per_vf;
+		ice_vc_set_default_allowlist(vf);
 
 		/* ctrl_vsi_idx will be set to a valid value only when VF
 		 * creates its first fdir rule.
@@ -2127,6 +2135,9 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg)
 	/* match guest capabilities */
 	vf->driver_caps = vfres->vf_cap_flags;
 
+	ice_vc_set_caps_allowlist(vf);
+	ice_vc_set_working_allowlist(vf);
+
 	set_bit(ICE_VF_STATE_ACTIVE, vf->vf_states);
 
 err:
@@ -3840,6 +3851,13 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event)
 			err = -EINVAL;
 	}
 
+	if (!ice_vc_is_opcode_allowed(vf, v_opcode)) {
+		ice_vc_send_msg_to_vf(vf, v_opcode,
+				      VIRTCHNL_STATUS_ERR_NOT_SUPPORTED, NULL,
+				      0);
+		return;
+	}
+
 error_handler:
 	if (err) {
 		ice_vc_send_msg_to_vf(vf, v_opcode, VIRTCHNL_STATUS_ERR_PARAM,
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index bcc2890c930a..d800ed83d6c3 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -103,6 +103,7 @@ struct ice_vf {
 	u16 num_vf_qs;			/* num of queue configured per VF */
 	struct ice_mdd_vf_events mdd_rx_events;
 	struct ice_mdd_vf_events mdd_tx_events;
+	DECLARE_BITMAP(opcodes_allowlist, VIRTCHNL_OP_MAX);
 };
 
 #ifdef CONFIG_PCI_IOV
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 40dd6afbfd81..debdd196773b 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -139,6 +139,7 @@ enum virtchnl_ops {
 	/* opcode 34 - 46 are reserved */
 	VIRTCHNL_OP_ADD_FDIR_FILTER = 47,
 	VIRTCHNL_OP_DEL_FDIR_FILTER = 48,
+	VIRTCHNL_OP_MAX,
 };
 
 /* These macros are used to generate compilation errors if a structure/union
-- 
cgit v1.2.3


From 142da08c4dc0afd07f9136b4812d5386bd6e1717 Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Tue, 2 Mar 2021 10:12:12 -0800
Subject: ice: Advertise virtchnl UDP segmentation offload capability

As the hardware is capable of supporting UDP segmentation offload, add a
capability bit to virtchnl.h to communicate this and have the driver
advertise its support.

Suggested-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 3 +++
 include/linux/avf/virtchnl.h                     | 1 +
 2 files changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index ccd6b3e8a5a9..1292a0b06eb5 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -2118,6 +2118,9 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg)
 	if (vf->driver_caps & VIRTCHNL_VF_CAP_ADV_LINK_SPEED)
 		vfres->vf_cap_flags |= VIRTCHNL_VF_CAP_ADV_LINK_SPEED;
 
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_USO)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_USO;
+
 	vfres->num_vsis = 1;
 	/* Tx and Rx queue are equal for VF */
 	vfres->num_queue_pairs = vsi->num_txq;
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index debdd196773b..9e0341cf2c36 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -251,6 +251,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
 #define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM		0X00200000
 #define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM	0X00400000
 #define VIRTCHNL_VF_OFFLOAD_ADQ			0X00800000
+#define VIRTCHNL_VF_OFFLOAD_USO			0X02000000
 #define VIRTCHNL_VF_OFFLOAD_FDIR_PF		0X10000000
 
 /* Define below the capability flags that are not offloads */
-- 
cgit v1.2.3


From c91a4f9feb67a199c27c2fe4df98ef9a49ab8ba0 Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Tue, 2 Mar 2021 10:12:13 -0800
Subject: iavf: add support for UDP Segmentation Offload

Add code to support UDP segmentation offload (USO) for
hardware that supports it.

Suggested-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/iavf/iavf_main.c     |  2 ++
 drivers/net/ethernet/intel/iavf/iavf_txrx.c     | 15 +++++++++++----
 drivers/net/ethernet/intel/iavf/iavf_virtchnl.c |  1 +
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index a3268c894d85..6cd6b9dfe5d4 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -3542,6 +3542,8 @@ int iavf_process_config(struct iavf_adapter *adapter)
 	/* Enable cloud filter if ADQ is supported */
 	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ)
 		hw_features |= NETIF_F_HW_TC;
+	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_USO)
+		hw_features |= NETIF_F_GSO_UDP_L4;
 
 	netdev->hw_features |= hw_features;
 
diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
index d6cba53a3a21..3525eab8e9f9 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
@@ -1905,13 +1905,20 @@ static int iavf_tso(struct iavf_tx_buffer *first, u8 *hdr_len,
 
 	/* determine offset of inner transport header */
 	l4_offset = l4.hdr - skb->data;
-
 	/* remove payload length from inner checksum */
 	paylen = skb->len - l4_offset;
-	csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen));
 
-	/* compute length of segmentation header */
-	*hdr_len = (l4.tcp->doff * 4) + l4_offset;
+	if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
+		csum_replace_by_diff(&l4.udp->check,
+				     (__force __wsum)htonl(paylen));
+		/* compute length of UDP segmentation header */
+		*hdr_len = (u8)sizeof(l4.udp) + l4_offset;
+	} else {
+		csum_replace_by_diff(&l4.tcp->check,
+				     (__force __wsum)htonl(paylen));
+		/* compute length of TCP segmentation header */
+		*hdr_len = (u8)((l4.tcp->doff * 4) + l4_offset);
+	}
 
 	/* pull values out of skb_shinfo */
 	gso_size = skb_shinfo(skb)->gso_size;
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index 3069092468b2..9aaade0aae4c 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -140,6 +140,7 @@ int iavf_send_vf_config_msg(struct iavf_adapter *adapter)
 	       VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM |
 	       VIRTCHNL_VF_OFFLOAD_REQ_QUEUES |
 	       VIRTCHNL_VF_OFFLOAD_ADQ |
+	       VIRTCHNL_VF_OFFLOAD_USO |
 	       VIRTCHNL_VF_OFFLOAD_FDIR_PF |
 	       VIRTCHNL_VF_CAP_ADV_LINK_SPEED;
 
-- 
cgit v1.2.3


From c9b5f681fe418d68f1804512c7fbcd5920d0594e Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sat, 22 Feb 2020 17:10:54 +0000
Subject: ice: remove redundant assignment to pointer vsi

Pointer vsi is being re-assigned a value that is never read,
the assignment is redundant and can be removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 1292a0b06eb5..c40560a9443b 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -2792,7 +2792,6 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg)
 		set_bit(vf_q_id, vf->rxq_ena);
 	}
 
-	vsi = pf->vsi[vf->lan_vsi_idx];
 	q_map = vqs->tx_queues;
 	for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
 		if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
-- 
cgit v1.2.3


From c5afbe99b778c15254d4496a74d3252ef6ba0a14 Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Tue, 2 Mar 2021 10:15:39 -0800
Subject: ice: Add helper function to get the VF's VSI

Currently, the driver gets the VF's VSI by using a long string of
dereferences (i.e. vf->pf->vsi[vf->lan_vsi_idx]). If the method to get
the VF's VSI were to change the driver would have to change it in every
location. Fix this by adding the helper ice_get_vf_vsi().

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 82 +++++++++++-------------
 1 file changed, 39 insertions(+), 43 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index c40560a9443b..baada80c98ab 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -7,6 +7,15 @@
 #include "ice_fltr.h"
 #include "ice_virtchnl_allowlist.h"
 
+/**
+ * ice_get_vf_vsi - get VF's VSI based on the stored index
+ * @vf: VF used to get VSI
+ */
+static struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf)
+{
+	return vf->pf->vsi[vf->lan_vsi_idx];
+}
+
 /**
  * ice_validate_vf_id - helper to check if VF ID is valid
  * @pf: pointer to the PF structure
@@ -198,7 +207,7 @@ static void ice_vf_invalidate_vsi(struct ice_vf *vf)
  */
 static void ice_vf_vsi_release(struct ice_vf *vf)
 {
-	ice_vsi_release(vf->pf->vsi[vf->lan_vsi_idx]);
+	ice_vsi_release(ice_get_vf_vsi(vf));
 	ice_vf_invalidate_vsi(vf);
 }
 
@@ -274,7 +283,7 @@ static void ice_dis_vf_mappings(struct ice_vf *vf)
 	struct ice_hw *hw;
 
 	hw = &pf->hw;
-	vsi = pf->vsi[vf->lan_vsi_idx];
+	vsi = ice_get_vf_vsi(vf);
 
 	dev = ice_pf_to_dev(pf);
 	wr32(hw, VPINT_ALLOC(vf->vf_id), 0);
@@ -349,10 +358,7 @@ void ice_set_vf_state_qs_dis(struct ice_vf *vf)
  */
 static void ice_dis_vf_qs(struct ice_vf *vf)
 {
-	struct ice_pf *pf = vf->pf;
-	struct ice_vsi *vsi;
-
-	vsi = pf->vsi[vf->lan_vsi_idx];
+	struct ice_vsi *vsi = ice_get_vf_vsi(vf);
 
 	ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id);
 	ice_vsi_stop_all_rx_rings(vsi);
@@ -639,8 +645,8 @@ static int ice_calc_vf_first_vector_idx(struct ice_pf *pf, struct ice_vf *vf)
  */
 static int ice_vf_rebuild_host_vlan_cfg(struct ice_vf *vf)
 {
-	struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
 	struct device *dev = ice_pf_to_dev(vf->pf);
+	struct ice_vsi *vsi = ice_get_vf_vsi(vf);
 	u16 vlan_id = 0;
 	int err;
 
@@ -676,8 +682,8 @@ static int ice_vf_rebuild_host_vlan_cfg(struct ice_vf *vf)
  */
 static int ice_vf_rebuild_host_mac_cfg(struct ice_vf *vf)
 {
-	struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
 	struct device *dev = ice_pf_to_dev(vf->pf);
+	struct ice_vsi *vsi = ice_get_vf_vsi(vf);
 	enum ice_status status;
 	u8 broadcast[ETH_ALEN];
 
@@ -778,8 +784,8 @@ static void ice_ena_vf_msix_mappings(struct ice_vf *vf)
  */
 static void ice_ena_vf_q_mappings(struct ice_vf *vf, u16 max_txq, u16 max_rxq)
 {
-	struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
 	struct device *dev = ice_pf_to_dev(vf->pf);
+	struct ice_vsi *vsi = ice_get_vf_vsi(vf);
 	struct ice_hw *hw = &vf->pf->hw;
 	u32 reg;
 
@@ -826,7 +832,7 @@ static void ice_ena_vf_q_mappings(struct ice_vf *vf, u16 max_txq, u16 max_rxq)
  */
 static void ice_ena_vf_mappings(struct ice_vf *vf)
 {
-	struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
+	struct ice_vsi *vsi = ice_get_vf_vsi(vf);
 
 	ice_ena_vf_msix_mappings(vf);
 	ice_ena_vf_q_mappings(vf, vsi->alloc_txq, vsi->alloc_rxq);
@@ -1089,7 +1095,7 @@ ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m,
 
 static void ice_vf_clear_counters(struct ice_vf *vf)
 {
-	struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
+	struct ice_vsi *vsi = ice_get_vf_vsi(vf);
 
 	vf->num_mac = 0;
 	vsi->num_vlan = 0;
@@ -1149,8 +1155,8 @@ static void ice_vf_rebuild_aggregator_node_cfg(struct ice_vsi *vsi)
  */
 static void ice_vf_rebuild_host_cfg(struct ice_vf *vf)
 {
-	struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
 	struct device *dev = ice_pf_to_dev(vf->pf);
+	struct ice_vsi *vsi = ice_get_vf_vsi(vf);
 
 	ice_vf_set_host_trust_cfg(vf);
 
@@ -1190,10 +1196,8 @@ static int ice_vf_rebuild_vsi_with_release(struct ice_vf *vf)
  */
 static int ice_vf_rebuild_vsi(struct ice_vf *vf)
 {
+	struct ice_vsi *vsi = ice_get_vf_vsi(vf);
 	struct ice_pf *pf = vf->pf;
-	struct ice_vsi *vsi;
-
-	vsi = pf->vsi[vf->lan_vsi_idx];
 
 	if (ice_vsi_rebuild(vsi, true)) {
 		dev_err(ice_pf_to_dev(pf), "failed to rebuild VF %d VSI\n",
@@ -1392,7 +1396,7 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
 	set_bit(ICE_VF_STATE_DIS, vf->vf_states);
 	ice_trigger_vf_reset(vf, is_vflr, false);
 
-	vsi = pf->vsi[vf->lan_vsi_idx];
+	vsi = ice_get_vf_vsi(vf);
 
 	if (test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states))
 		ice_dis_vf_qs(vf);
@@ -1441,7 +1445,7 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
 		else
 			promisc_m = ICE_UCAST_PROMISC_BITS;
 
-		vsi = pf->vsi[vf->lan_vsi_idx];
+		vsi = ice_get_vf_vsi(vf);
 		if (ice_vf_set_vsi_promisc(vf, vsi, promisc_m, true))
 			dev_err(dev, "disabling promiscuous mode failed\n");
 	}
@@ -1887,7 +1891,7 @@ static struct ice_vf *ice_get_vf_from_pfq(struct ice_pf *pf, u16 pfq)
 		struct ice_vsi *vsi;
 		u16 rxq_idx;
 
-		vsi = pf->vsi[vf->lan_vsi_idx];
+		vsi = ice_get_vf_vsi(vf);
 
 		ice_for_each_rxq(vsi, rxq_idx)
 			if (vsi->rxq_map[rxq_idx] == pfq)
@@ -2027,8 +2031,7 @@ static int ice_vc_get_ver_msg(struct ice_vf *vf, u8 *msg)
  */
 static u16 ice_vc_get_max_frame_size(struct ice_vf *vf)
 {
-	struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
-	struct ice_port_info *pi = vsi->port_info;
+	struct ice_port_info *pi = ice_vf_get_port_info(vf);
 	u16 max_frame_size;
 
 	max_frame_size = pi->phy.link_info.max_frame_size;
@@ -2076,7 +2079,7 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg)
 				  VIRTCHNL_VF_OFFLOAD_VLAN;
 
 	vfres->vf_cap_flags = VIRTCHNL_VF_OFFLOAD_L2;
-	vsi = pf->vsi[vf->lan_vsi_idx];
+	vsi = ice_get_vf_vsi(vf);
 	if (!vsi) {
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 		goto err;
@@ -2243,7 +2246,6 @@ static int ice_vc_config_rss_key(struct ice_vf *vf, u8 *msg)
 	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
 	struct virtchnl_rss_key *vrk =
 		(struct virtchnl_rss_key *)msg;
-	struct ice_pf *pf = vf->pf;
 	struct ice_vsi *vsi;
 
 	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
@@ -2266,7 +2268,7 @@ static int ice_vc_config_rss_key(struct ice_vf *vf, u8 *msg)
 		goto error_param;
 	}
 
-	vsi = pf->vsi[vf->lan_vsi_idx];
+	vsi = ice_get_vf_vsi(vf);
 	if (!vsi) {
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 		goto error_param;
@@ -2290,7 +2292,6 @@ static int ice_vc_config_rss_lut(struct ice_vf *vf, u8 *msg)
 {
 	struct virtchnl_rss_lut *vrl = (struct virtchnl_rss_lut *)msg;
 	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
-	struct ice_pf *pf = vf->pf;
 	struct ice_vsi *vsi;
 
 	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
@@ -2313,7 +2314,7 @@ static int ice_vc_config_rss_lut(struct ice_vf *vf, u8 *msg)
 		goto error_param;
 	}
 
-	vsi = pf->vsi[vf->lan_vsi_idx];
+	vsi = ice_get_vf_vsi(vf);
 	if (!vsi) {
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 		goto error_param;
@@ -2396,7 +2397,7 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
 	if (ret)
 		return ret;
 
-	vf_vsi = pf->vsi[vf->lan_vsi_idx];
+	vf_vsi = ice_get_vf_vsi(vf);
 	if (!vf_vsi) {
 		netdev_err(netdev, "VSI %d for VF %d is null\n",
 			   vf->lan_vsi_idx, vf->vf_id);
@@ -2501,7 +2502,7 @@ static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg)
 		goto error_param;
 	}
 
-	vsi = pf->vsi[vf->lan_vsi_idx];
+	vsi = ice_get_vf_vsi(vf);
 	if (!vsi) {
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 		goto error_param;
@@ -2637,7 +2638,6 @@ static int ice_vc_get_stats_msg(struct ice_vf *vf, u8 *msg)
 	struct virtchnl_queue_select *vqs =
 		(struct virtchnl_queue_select *)msg;
 	struct ice_eth_stats stats = { 0 };
-	struct ice_pf *pf = vf->pf;
 	struct ice_vsi *vsi;
 
 	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
@@ -2650,7 +2650,7 @@ static int ice_vc_get_stats_msg(struct ice_vf *vf, u8 *msg)
 		goto error_param;
 	}
 
-	vsi = pf->vsi[vf->lan_vsi_idx];
+	vsi = ice_get_vf_vsi(vf);
 	if (!vsi) {
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 		goto error_param;
@@ -2740,7 +2740,6 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg)
 	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
 	struct virtchnl_queue_select *vqs =
 	    (struct virtchnl_queue_select *)msg;
-	struct ice_pf *pf = vf->pf;
 	struct ice_vsi *vsi;
 	unsigned long q_map;
 	u16 vf_q_id;
@@ -2760,7 +2759,7 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg)
 		goto error_param;
 	}
 
-	vsi = pf->vsi[vf->lan_vsi_idx];
+	vsi = ice_get_vf_vsi(vf);
 	if (!vsi) {
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 		goto error_param;
@@ -2830,7 +2829,6 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg)
 	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
 	struct virtchnl_queue_select *vqs =
 	    (struct virtchnl_queue_select *)msg;
-	struct ice_pf *pf = vf->pf;
 	struct ice_vsi *vsi;
 	unsigned long q_map;
 	u16 vf_q_id;
@@ -2851,7 +2849,7 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg)
 		goto error_param;
 	}
 
-	vsi = pf->vsi[vf->lan_vsi_idx];
+	vsi = ice_get_vf_vsi(vf);
 	if (!vsi) {
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 		goto error_param;
@@ -3016,7 +3014,7 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg)
 		goto error_param;
 	}
 
-	vsi = pf->vsi[vf->lan_vsi_idx];
+	vsi = ice_get_vf_vsi(vf);
 	if (!vsi) {
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 		goto error_param;
@@ -3093,7 +3091,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
 		goto error_param;
 	}
 
-	vsi = pf->vsi[vf->lan_vsi_idx];
+	vsi = ice_get_vf_vsi(vf);
 	if (!vsi) {
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 		goto error_param;
@@ -3328,7 +3326,7 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set)
 		goto handle_mac_exit;
 	}
 
-	vsi = pf->vsi[vf->lan_vsi_idx];
+	vsi = ice_get_vf_vsi(vf);
 	if (!vsi) {
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 		goto handle_mac_exit;
@@ -3560,7 +3558,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
 	}
 
 	hw = &pf->hw;
-	vsi = pf->vsi[vf->lan_vsi_idx];
+	vsi = ice_get_vf_vsi(vf);
 	if (!vsi) {
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 		goto error_param;
@@ -3727,7 +3725,6 @@ static int ice_vc_remove_vlan_msg(struct ice_vf *vf, u8 *msg)
 static int ice_vc_ena_vlan_stripping(struct ice_vf *vf)
 {
 	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
-	struct ice_pf *pf = vf->pf;
 	struct ice_vsi *vsi;
 
 	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
@@ -3740,7 +3737,7 @@ static int ice_vc_ena_vlan_stripping(struct ice_vf *vf)
 		goto error_param;
 	}
 
-	vsi = pf->vsi[vf->lan_vsi_idx];
+	vsi = ice_get_vf_vsi(vf);
 	if (ice_vsi_manage_vlan_stripping(vsi, true))
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 
@@ -3758,7 +3755,6 @@ error_param:
 static int ice_vc_dis_vlan_stripping(struct ice_vf *vf)
 {
 	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
-	struct ice_pf *pf = vf->pf;
 	struct ice_vsi *vsi;
 
 	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
@@ -3771,7 +3767,7 @@ static int ice_vc_dis_vlan_stripping(struct ice_vf *vf)
 		goto error_param;
 	}
 
-	vsi = pf->vsi[vf->lan_vsi_idx];
+	vsi = ice_get_vf_vsi(vf);
 	if (!vsi) {
 		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 		goto error_param;
@@ -3797,7 +3793,7 @@ error_param:
  */
 static int ice_vf_init_vlan_stripping(struct ice_vf *vf)
 {
-	struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
+	struct ice_vsi *vsi = ice_get_vf_vsi(vf);
 
 	if (!vsi)
 		return -EINVAL;
@@ -4185,7 +4181,7 @@ int ice_get_vf_stats(struct net_device *netdev, int vf_id,
 	if (ret)
 		return ret;
 
-	vsi = pf->vsi[vf->lan_vsi_idx];
+	vsi = ice_get_vf_vsi(vf);
 	if (!vsi)
 		return -EINVAL;
 
-- 
cgit v1.2.3


From 222a8ab01698148c00c271cda82d96f4e6e7b0a8 Mon Sep 17 00:00:00 2001
From: Qi Zhang <qi.z.zhang@intel.com>
Date: Tue, 13 Apr 2021 08:48:39 +0800
Subject: ice: Enable RSS configure for AVF

Currently, RSS hash input is not available to AVF by ethtool, it is set
by the PF directly.

Add the RSS configure support for AVF through new virtchnl message, and
define the capability flag VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF to query this
new RSS offload support.

Signed-off-by: Jia Guo <jia.guo@intel.com>
Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
Signed-off-by: Haiyue Wang <haiyue.wang@intel.com>
Tested-by: Bo Chen <BoX.C.Chen@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_flow.h          |   3 +
 .../ethernet/intel/ice/ice_virtchnl_allowlist.c    |   6 +
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c   | 453 +++++++++++++++++++++
 include/linux/avf/virtchnl.h                       |  25 +-
 4 files changed, 486 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_flow.h b/drivers/net/ethernet/intel/ice/ice_flow.h
index eec9def8ffca..2f68b59ace7e 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.h
+++ b/drivers/net/ethernet/intel/ice/ice_flow.h
@@ -8,6 +8,9 @@
 #define ICE_FLOW_FLD_OFF_INVAL		0xffff
 
 /* Generate flow hash field from flow field type(s) */
+#define ICE_FLOW_HASH_ETH	\
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_DA) | \
+	 BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_SA))
 #define ICE_FLOW_HASH_IPV4	\
 	(BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) | \
 	 BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA))
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c
index 5a0fbb47346f..9feebe5f556c 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c
@@ -61,6 +61,11 @@ static const u32 rss_pf_allowlist_opcodes[] = {
 	VIRTCHNL_OP_GET_RSS_HENA_CAPS, VIRTCHNL_OP_SET_RSS_HENA,
 };
 
+/* VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF */
+static const u32 adv_rss_pf_allowlist_opcodes[] = {
+	VIRTCHNL_OP_ADD_RSS_CFG, VIRTCHNL_OP_DEL_RSS_CFG,
+};
+
 /* VIRTCHNL_VF_OFFLOAD_FDIR_PF */
 static const u32 fdir_pf_allowlist_opcodes[] = {
 	VIRTCHNL_OP_ADD_FDIR_FILTER, VIRTCHNL_OP_DEL_FDIR_FILTER,
@@ -82,6 +87,7 @@ static const struct allowlist_opcode_info allowlist_opcodes[] = {
 	ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_REQ_QUEUES, req_queues_allowlist_opcodes),
 	ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_VLAN, vlan_allowlist_opcodes),
 	ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_RSS_PF, rss_pf_allowlist_opcodes),
+	ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF, adv_rss_pf_allowlist_opcodes),
 	ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_FDIR_PF, fdir_pf_allowlist_opcodes),
 };
 
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index baada80c98ab..ca778a80d363 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -5,8 +5,248 @@
 #include "ice_base.h"
 #include "ice_lib.h"
 #include "ice_fltr.h"
+#include "ice_flow.h"
 #include "ice_virtchnl_allowlist.h"
 
+#define FIELD_SELECTOR(proto_hdr_field) \
+		BIT((proto_hdr_field) & PROTO_HDR_FIELD_MASK)
+
+struct ice_vc_hdr_match_type {
+	u32 vc_hdr;	/* virtchnl headers (VIRTCHNL_PROTO_HDR_XXX) */
+	u32 ice_hdr;	/* ice headers (ICE_FLOW_SEG_HDR_XXX) */
+};
+
+static const struct ice_vc_hdr_match_type ice_vc_hdr_list_os[] = {
+	{VIRTCHNL_PROTO_HDR_NONE,	ICE_FLOW_SEG_HDR_NONE},
+	{VIRTCHNL_PROTO_HDR_IPV4,	ICE_FLOW_SEG_HDR_IPV4 |
+					ICE_FLOW_SEG_HDR_IPV_OTHER},
+	{VIRTCHNL_PROTO_HDR_IPV6,	ICE_FLOW_SEG_HDR_IPV6 |
+					ICE_FLOW_SEG_HDR_IPV_OTHER},
+	{VIRTCHNL_PROTO_HDR_TCP,	ICE_FLOW_SEG_HDR_TCP},
+	{VIRTCHNL_PROTO_HDR_UDP,	ICE_FLOW_SEG_HDR_UDP},
+	{VIRTCHNL_PROTO_HDR_SCTP,	ICE_FLOW_SEG_HDR_SCTP},
+};
+
+static const struct ice_vc_hdr_match_type ice_vc_hdr_list_comms[] = {
+	{VIRTCHNL_PROTO_HDR_NONE,	ICE_FLOW_SEG_HDR_NONE},
+	{VIRTCHNL_PROTO_HDR_ETH,	ICE_FLOW_SEG_HDR_ETH},
+	{VIRTCHNL_PROTO_HDR_S_VLAN,	ICE_FLOW_SEG_HDR_VLAN},
+	{VIRTCHNL_PROTO_HDR_C_VLAN,	ICE_FLOW_SEG_HDR_VLAN},
+	{VIRTCHNL_PROTO_HDR_IPV4,	ICE_FLOW_SEG_HDR_IPV4 |
+					ICE_FLOW_SEG_HDR_IPV_OTHER},
+	{VIRTCHNL_PROTO_HDR_IPV6,	ICE_FLOW_SEG_HDR_IPV6 |
+					ICE_FLOW_SEG_HDR_IPV_OTHER},
+	{VIRTCHNL_PROTO_HDR_TCP,	ICE_FLOW_SEG_HDR_TCP},
+	{VIRTCHNL_PROTO_HDR_UDP,	ICE_FLOW_SEG_HDR_UDP},
+	{VIRTCHNL_PROTO_HDR_SCTP,	ICE_FLOW_SEG_HDR_SCTP},
+	{VIRTCHNL_PROTO_HDR_PPPOE,	ICE_FLOW_SEG_HDR_PPPOE},
+	{VIRTCHNL_PROTO_HDR_GTPU_IP,	ICE_FLOW_SEG_HDR_GTPU_IP},
+	{VIRTCHNL_PROTO_HDR_GTPU_EH,	ICE_FLOW_SEG_HDR_GTPU_EH},
+	{VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_DWN,
+					ICE_FLOW_SEG_HDR_GTPU_DWN},
+	{VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_UP,
+					ICE_FLOW_SEG_HDR_GTPU_UP},
+	{VIRTCHNL_PROTO_HDR_L2TPV3,	ICE_FLOW_SEG_HDR_L2TPV3},
+	{VIRTCHNL_PROTO_HDR_ESP,	ICE_FLOW_SEG_HDR_ESP},
+	{VIRTCHNL_PROTO_HDR_AH,		ICE_FLOW_SEG_HDR_AH},
+	{VIRTCHNL_PROTO_HDR_PFCP,	ICE_FLOW_SEG_HDR_PFCP_SESSION},
+};
+
+struct ice_vc_hash_field_match_type {
+	u32 vc_hdr;		/* virtchnl headers
+				 * (VIRTCHNL_PROTO_HDR_XXX)
+				 */
+	u32 vc_hash_field;	/* virtchnl hash fields selector
+				 * FIELD_SELECTOR((VIRTCHNL_PROTO_HDR_ETH_XXX))
+				 */
+	u64 ice_hash_field;	/* ice hash fields
+				 * (BIT_ULL(ICE_FLOW_FIELD_IDX_XXX))
+				 */
+};
+
+static const struct
+ice_vc_hash_field_match_type ice_vc_hash_field_list_os[] = {
+	{VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA)},
+	{VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA)},
+	{VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST),
+		ICE_FLOW_HASH_IPV4},
+	{VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) |
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)},
+	{VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA) |
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)},
+	{VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT),
+		ICE_FLOW_HASH_IPV4 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)},
+	{VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)},
+	{VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA)},
+	{VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA)},
+	{VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST),
+		ICE_FLOW_HASH_IPV6},
+	{VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA) |
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)},
+	{VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA) |
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)},
+	{VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT),
+		ICE_FLOW_HASH_IPV6 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)},
+	{VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)},
+	{VIRTCHNL_PROTO_HDR_TCP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT)},
+	{VIRTCHNL_PROTO_HDR_TCP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_DST_PORT)},
+	{VIRTCHNL_PROTO_HDR_TCP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT),
+		ICE_FLOW_HASH_TCP_PORT},
+	{VIRTCHNL_PROTO_HDR_UDP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_SRC_PORT)},
+	{VIRTCHNL_PROTO_HDR_UDP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_DST_PORT)},
+	{VIRTCHNL_PROTO_HDR_UDP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT),
+		ICE_FLOW_HASH_UDP_PORT},
+	{VIRTCHNL_PROTO_HDR_SCTP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT)},
+	{VIRTCHNL_PROTO_HDR_SCTP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_DST_PORT)},
+	{VIRTCHNL_PROTO_HDR_SCTP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT),
+		ICE_FLOW_HASH_SCTP_PORT},
+};
+
+static const struct
+ice_vc_hash_field_match_type ice_vc_hash_field_list_comms[] = {
+	{VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_SRC),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_SA)},
+	{VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_DST),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_DA)},
+	{VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_SRC) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_DST),
+		ICE_FLOW_HASH_ETH},
+	{VIRTCHNL_PROTO_HDR_ETH,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_ETHERTYPE),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_TYPE)},
+	{VIRTCHNL_PROTO_HDR_S_VLAN,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_S_VLAN_ID),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_S_VLAN)},
+	{VIRTCHNL_PROTO_HDR_C_VLAN,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_C_VLAN_ID),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_C_VLAN)},
+	{VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA)},
+	{VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA)},
+	{VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST),
+		ICE_FLOW_HASH_IPV4},
+	{VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) |
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)},
+	{VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA) |
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)},
+	{VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT),
+		ICE_FLOW_HASH_IPV4 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)},
+	{VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)},
+	{VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA)},
+	{VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA)},
+	{VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST),
+		ICE_FLOW_HASH_IPV6},
+	{VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA) |
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)},
+	{VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA) |
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)},
+	{VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT),
+		ICE_FLOW_HASH_IPV6 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)},
+	{VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)},
+	{VIRTCHNL_PROTO_HDR_TCP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT)},
+	{VIRTCHNL_PROTO_HDR_TCP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_DST_PORT)},
+	{VIRTCHNL_PROTO_HDR_TCP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT),
+		ICE_FLOW_HASH_TCP_PORT},
+	{VIRTCHNL_PROTO_HDR_UDP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_SRC_PORT)},
+	{VIRTCHNL_PROTO_HDR_UDP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_DST_PORT)},
+	{VIRTCHNL_PROTO_HDR_UDP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT),
+		ICE_FLOW_HASH_UDP_PORT},
+	{VIRTCHNL_PROTO_HDR_SCTP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT)},
+	{VIRTCHNL_PROTO_HDR_SCTP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_DST_PORT)},
+	{VIRTCHNL_PROTO_HDR_SCTP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT),
+		ICE_FLOW_HASH_SCTP_PORT},
+	{VIRTCHNL_PROTO_HDR_PPPOE,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_PPPOE_SESS_ID),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID)},
+	{VIRTCHNL_PROTO_HDR_GTPU_IP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_GTPU_IP_TEID),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_IP_TEID)},
+	{VIRTCHNL_PROTO_HDR_L2TPV3,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_L2TPV3_SESS_ID),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID)},
+	{VIRTCHNL_PROTO_HDR_ESP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ESP_SPI),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_ESP_SPI)},
+	{VIRTCHNL_PROTO_HDR_AH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_AH_SPI),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_AH_SPI)},
+	{VIRTCHNL_PROTO_HDR_PFCP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_PFCP_SEID),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_PFCP_SEID)},
+};
+
 /**
  * ice_get_vf_vsi - get VF's VSI based on the stored index
  * @vf: VF used to get VSI
@@ -2121,6 +2361,9 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg)
 	if (vf->driver_caps & VIRTCHNL_VF_CAP_ADV_LINK_SPEED)
 		vfres->vf_cap_flags |= VIRTCHNL_VF_CAP_ADV_LINK_SPEED;
 
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF;
+
 	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_USO)
 		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_USO;
 
@@ -2234,6 +2477,210 @@ static bool ice_vc_isvalid_ring_len(u16 ring_len)
 		!(ring_len % ICE_REQ_DESC_MULTIPLE));
 }
 
+/**
+ * ice_vc_parse_rss_cfg - parses hash fields and headers from
+ * a specific virtchnl RSS cfg
+ * @hw: pointer to the hardware
+ * @rss_cfg: pointer to the virtchnl RSS cfg
+ * @addl_hdrs: pointer to the protocol header fields (ICE_FLOW_SEG_HDR_*)
+ * to configure
+ * @hash_flds: pointer to the hash bit fields (ICE_FLOW_HASH_*) to configure
+ *
+ * Return true if all the protocol header and hash fields in the RSS cfg could
+ * be parsed, else return false
+ *
+ * This function parses the virtchnl RSS cfg to be the intended
+ * hash fields and the intended header for RSS configuration
+ */
+static bool
+ice_vc_parse_rss_cfg(struct ice_hw *hw, struct virtchnl_rss_cfg *rss_cfg,
+		     u32 *addl_hdrs, u64 *hash_flds)
+{
+	const struct ice_vc_hash_field_match_type *hf_list;
+	const struct ice_vc_hdr_match_type *hdr_list;
+	int i, hf_list_len, hdr_list_len;
+
+	if (!strncmp(hw->active_pkg_name, "ICE COMMS Package",
+		     sizeof(hw->active_pkg_name))) {
+		hf_list = ice_vc_hash_field_list_comms;
+		hf_list_len = ARRAY_SIZE(ice_vc_hash_field_list_comms);
+		hdr_list = ice_vc_hdr_list_comms;
+		hdr_list_len = ARRAY_SIZE(ice_vc_hdr_list_comms);
+	} else {
+		hf_list = ice_vc_hash_field_list_os;
+		hf_list_len = ARRAY_SIZE(ice_vc_hash_field_list_os);
+		hdr_list = ice_vc_hdr_list_os;
+		hdr_list_len = ARRAY_SIZE(ice_vc_hdr_list_os);
+	}
+
+	for (i = 0; i < rss_cfg->proto_hdrs.count; i++) {
+		struct virtchnl_proto_hdr *proto_hdr =
+					&rss_cfg->proto_hdrs.proto_hdr[i];
+		bool hdr_found = false;
+		int j;
+
+		/* Find matched ice headers according to virtchnl headers. */
+		for (j = 0; j < hdr_list_len; j++) {
+			struct ice_vc_hdr_match_type hdr_map = hdr_list[j];
+
+			if (proto_hdr->type == hdr_map.vc_hdr) {
+				*addl_hdrs |= hdr_map.ice_hdr;
+				hdr_found = true;
+			}
+		}
+
+		if (!hdr_found)
+			return false;
+
+		/* Find matched ice hash fields according to
+		 * virtchnl hash fields.
+		 */
+		for (j = 0; j < hf_list_len; j++) {
+			struct ice_vc_hash_field_match_type hf_map = hf_list[j];
+
+			if (proto_hdr->type == hf_map.vc_hdr &&
+			    proto_hdr->field_selector == hf_map.vc_hash_field) {
+				*hash_flds |= hf_map.ice_hash_field;
+				break;
+			}
+		}
+	}
+
+	return true;
+}
+
+/**
+ * ice_vf_adv_rss_offload_ena - determine if capabilities support advanced
+ * RSS offloads
+ * @caps: VF driver negotiated capabilities
+ *
+ * Return true if VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF capability is set,
+ * else return false
+ */
+static bool ice_vf_adv_rss_offload_ena(u32 caps)
+{
+	return !!(caps & VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF);
+}
+
+/**
+ * ice_vc_handle_rss_cfg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the message buffer
+ * @add: add a RSS config if true, otherwise delete a RSS config
+ *
+ * This function adds/deletes a RSS config
+ */
+static int ice_vc_handle_rss_cfg(struct ice_vf *vf, u8 *msg, bool add)
+{
+	u32 v_opcode = add ? VIRTCHNL_OP_ADD_RSS_CFG : VIRTCHNL_OP_DEL_RSS_CFG;
+	struct virtchnl_rss_cfg *rss_cfg = (struct virtchnl_rss_cfg *)msg;
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	struct ice_hw *hw = &vf->pf->hw;
+	struct ice_vsi *vsi;
+
+	if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) {
+		dev_dbg(dev, "VF %d attempting to configure RSS, but RSS is not supported by the PF\n",
+			vf->vf_id);
+		v_ret = VIRTCHNL_STATUS_ERR_NOT_SUPPORTED;
+		goto error_param;
+	}
+
+	if (!ice_vf_adv_rss_offload_ena(vf->driver_caps)) {
+		dev_dbg(dev, "VF %d attempting to configure RSS, but Advanced RSS offload is not supported\n",
+			vf->vf_id);
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (rss_cfg->proto_hdrs.count > VIRTCHNL_MAX_NUM_PROTO_HDRS ||
+	    rss_cfg->rss_algorithm < VIRTCHNL_RSS_ALG_TOEPLITZ_ASYMMETRIC ||
+	    rss_cfg->rss_algorithm > VIRTCHNL_RSS_ALG_XOR_SYMMETRIC) {
+		dev_dbg(dev, "VF %d attempting to configure RSS, but RSS configuration is not valid\n",
+			vf->vf_id);
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (rss_cfg->rss_algorithm == VIRTCHNL_RSS_ALG_R_ASYMMETRIC) {
+		struct ice_vsi_ctx *ctx;
+		enum ice_status status;
+		u8 lut_type, hash_type;
+
+		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI;
+		hash_type = add ? ICE_AQ_VSI_Q_OPT_RSS_XOR :
+				ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
+
+		ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+		if (!ctx) {
+			v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+			goto error_param;
+		}
+
+		ctx->info.q_opt_rss = ((lut_type <<
+					ICE_AQ_VSI_Q_OPT_RSS_LUT_S) &
+				       ICE_AQ_VSI_Q_OPT_RSS_LUT_M) |
+				       (hash_type &
+					ICE_AQ_VSI_Q_OPT_RSS_HASH_M);
+
+		/* Preserve existing queueing option setting */
+		ctx->info.q_opt_rss |= (vsi->info.q_opt_rss &
+					  ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_M);
+		ctx->info.q_opt_tc = vsi->info.q_opt_tc;
+		ctx->info.q_opt_flags = vsi->info.q_opt_rss;
+
+		ctx->info.valid_sections =
+				cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID);
+
+		status = ice_update_vsi(hw, vsi->idx, ctx, NULL);
+		if (status) {
+			dev_err(dev, "update VSI for RSS failed, err %s aq_err %s\n",
+				ice_stat_str(status),
+				ice_aq_str(hw->adminq.sq_last_status));
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		} else {
+			vsi->info.q_opt_rss = ctx->info.q_opt_rss;
+		}
+
+		kfree(ctx);
+	} else {
+		u32 addl_hdrs = ICE_FLOW_SEG_HDR_NONE;
+		u64 hash_flds = ICE_HASH_INVALID;
+
+		if (!ice_vc_parse_rss_cfg(hw, rss_cfg, &addl_hdrs,
+					  &hash_flds)) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+
+		if (add) {
+			if (ice_add_rss_cfg(hw, vsi->idx, hash_flds,
+					    addl_hdrs)) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				dev_err(dev, "ice_add_rss_cfg failed for vsi = %d, v_ret = %d\n",
+					vsi->vsi_num, v_ret);
+			}
+		} else {
+			v_ret = VIRTCHNL_STATUS_ERR_NOT_SUPPORTED;
+			dev_err(dev, "RSS removal not supported\n");
+		}
+	}
+
+error_param:
+	return ice_vc_send_msg_to_vf(vf, v_opcode, v_ret, NULL, 0);
+}
+
 /**
  * ice_vc_config_rss_key
  * @vf: pointer to the VF info
@@ -3931,6 +4378,12 @@ error_handler:
 	case VIRTCHNL_OP_DEL_FDIR_FILTER:
 		err = ice_vc_del_fdir_fltr(vf, msg);
 		break;
+	case VIRTCHNL_OP_ADD_RSS_CFG:
+		err = ice_vc_handle_rss_cfg(vf, msg, true);
+		break;
+	case VIRTCHNL_OP_DEL_RSS_CFG:
+		err = ice_vc_handle_rss_cfg(vf, msg, false);
+		break;
 	case VIRTCHNL_OP_UNKNOWN:
 	default:
 		dev_err(dev, "Unsupported opcode %d from VF %d\n", v_opcode,
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 9e0341cf2c36..565deea6ffe8 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -136,7 +136,9 @@ enum virtchnl_ops {
 	VIRTCHNL_OP_DISABLE_CHANNELS = 31,
 	VIRTCHNL_OP_ADD_CLOUD_FILTER = 32,
 	VIRTCHNL_OP_DEL_CLOUD_FILTER = 33,
-	/* opcode 34 - 46 are reserved */
+	/* opcode 34 - 44 are reserved */
+	VIRTCHNL_OP_ADD_RSS_CFG = 45,
+	VIRTCHNL_OP_DEL_RSS_CFG = 46,
 	VIRTCHNL_OP_ADD_FDIR_FILTER = 47,
 	VIRTCHNL_OP_DEL_FDIR_FILTER = 48,
 	VIRTCHNL_OP_MAX,
@@ -252,6 +254,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
 #define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM	0X00400000
 #define VIRTCHNL_VF_OFFLOAD_ADQ			0X00800000
 #define VIRTCHNL_VF_OFFLOAD_USO			0X02000000
+#define VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF		0X08000000
 #define VIRTCHNL_VF_OFFLOAD_FDIR_PF		0X10000000
 
 /* Define below the capability flags that are not offloads */
@@ -677,6 +680,14 @@ enum virtchnl_vfr_states {
 	VIRTCHNL_VFR_VFACTIVE,
 };
 
+/* Type of RSS algorithm */
+enum virtchnl_rss_algorithm {
+	VIRTCHNL_RSS_ALG_TOEPLITZ_ASYMMETRIC	= 0,
+	VIRTCHNL_RSS_ALG_R_ASYMMETRIC		= 1,
+	VIRTCHNL_RSS_ALG_TOEPLITZ_SYMMETRIC	= 2,
+	VIRTCHNL_RSS_ALG_XOR_SYMMETRIC		= 3,
+};
+
 #define VIRTCHNL_MAX_NUM_PROTO_HDRS	32
 #define PROTO_HDR_SHIFT			5
 #define PROTO_HDR_FIELD_START(proto_hdr_type) ((proto_hdr_type) << PROTO_HDR_SHIFT)
@@ -832,6 +843,14 @@ struct virtchnl_proto_hdrs {
 
 VIRTCHNL_CHECK_STRUCT_LEN(2312, virtchnl_proto_hdrs);
 
+struct virtchnl_rss_cfg {
+	struct virtchnl_proto_hdrs proto_hdrs;	   /* protocol headers */
+	enum virtchnl_rss_algorithm rss_algorithm; /* RSS algorithm type */
+	u8 reserved[128];			   /* reserve for future */
+};
+
+VIRTCHNL_CHECK_STRUCT_LEN(2444, virtchnl_rss_cfg);
+
 /* action configuration for FDIR */
 struct virtchnl_filter_action {
 	enum virtchnl_action type;
@@ -1100,6 +1119,10 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
 	case VIRTCHNL_OP_DEL_CLOUD_FILTER:
 		valid_len = sizeof(struct virtchnl_filter);
 		break;
+	case VIRTCHNL_OP_ADD_RSS_CFG:
+	case VIRTCHNL_OP_DEL_RSS_CFG:
+		valid_len = sizeof(struct virtchnl_rss_cfg);
+		break;
 	case VIRTCHNL_OP_ADD_FDIR_FILTER:
 		valid_len = sizeof(struct virtchnl_fdir_add);
 		break;
-- 
cgit v1.2.3


From ddd1f3cfed3f06906c25f917eb703d683c415e24 Mon Sep 17 00:00:00 2001
From: Qi Zhang <qi.z.zhang@intel.com>
Date: Tue, 13 Apr 2021 08:48:40 +0800
Subject: ice: Support RSS configure removal for AVF

Add the handler for virtchnl message VIRTCHNL_OP_DEL_RSS_CFG to remove
an existing RSS configuration with matching hashed fields.

Signed-off-by: Vignesh Sridhar <vignesh.sridhar@intel.com>
Co-developed-by: Jia Guo <jia.guo@intel.com>
Signed-off-by: Jia Guo <jia.guo@intel.com>
Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
Signed-off-by: Haiyue Wang <haiyue.wang@intel.com>
Tested-by: Bo Chen <BoX.C.Chen@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_flow.c        | 88 ++++++++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_flow.h        |  3 +
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 16 ++++-
 3 files changed, 105 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c
index 4d59eb96383b..f160672448a0 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.c
+++ b/drivers/net/ethernet/intel/ice/ice_flow.c
@@ -2149,6 +2149,94 @@ ice_add_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds,
 	return status;
 }
 
+/**
+ * ice_rem_rss_cfg_sync - remove an existing RSS configuration
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @hashed_flds: Packet hash types (ICE_FLOW_HASH_*) to remove
+ * @addl_hdrs: Protocol header fields within a packet segment
+ * @segs_cnt: packet segment count
+ *
+ * Assumption: lock has already been acquired for RSS list
+ */
+static enum ice_status
+ice_rem_rss_cfg_sync(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds,
+		     u32 addl_hdrs, u8 segs_cnt)
+{
+	const enum ice_block blk = ICE_BLK_RSS;
+	struct ice_flow_seg_info *segs;
+	struct ice_flow_prof *prof;
+	enum ice_status status;
+
+	segs = kcalloc(segs_cnt, sizeof(*segs), GFP_KERNEL);
+	if (!segs)
+		return ICE_ERR_NO_MEMORY;
+
+	/* Construct the packet segment info from the hashed fields */
+	status = ice_flow_set_rss_seg_info(&segs[segs_cnt - 1], hashed_flds,
+					   addl_hdrs);
+	if (status)
+		goto out;
+
+	prof = ice_flow_find_prof_conds(hw, blk, ICE_FLOW_RX, segs, segs_cnt,
+					vsi_handle,
+					ICE_FLOW_FIND_PROF_CHK_FLDS);
+	if (!prof) {
+		status = ICE_ERR_DOES_NOT_EXIST;
+		goto out;
+	}
+
+	status = ice_flow_disassoc_prof(hw, blk, prof, vsi_handle);
+	if (status)
+		goto out;
+
+	/* Remove RSS configuration from VSI context before deleting
+	 * the flow profile.
+	 */
+	ice_rem_rss_list(hw, vsi_handle, prof);
+
+	if (bitmap_empty(prof->vsis, ICE_MAX_VSI))
+		status = ice_flow_rem_prof(hw, blk, prof->id);
+
+out:
+	kfree(segs);
+	return status;
+}
+
+/**
+ * ice_rem_rss_cfg - remove an existing RSS config with matching hashed fields
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @hashed_flds: Packet hash types (ICE_FLOW_HASH_*) to remove
+ * @addl_hdrs: Protocol header fields within a packet segment
+ *
+ * This function will lookup the flow profile based on the input
+ * hash field bitmap, iterate through the profile entry list of
+ * that profile and find entry associated with input VSI to be
+ * removed. Calls are made to underlying flow s which will APIs
+ * turn build or update buffers for RSS XLT1 section.
+ */
+enum ice_status __maybe_unused
+ice_rem_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds,
+		u32 addl_hdrs)
+{
+	enum ice_status status;
+
+	if (hashed_flds == ICE_HASH_INVALID ||
+	    !ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_ERR_PARAM;
+
+	mutex_lock(&hw->rss_locks);
+	status = ice_rem_rss_cfg_sync(hw, vsi_handle, hashed_flds, addl_hdrs,
+				      ICE_RSS_OUTER_HEADERS);
+	if (!status)
+		status = ice_rem_rss_cfg_sync(hw, vsi_handle, hashed_flds,
+					      addl_hdrs, ICE_RSS_INNER_HEADERS);
+	mutex_unlock(&hw->rss_locks);
+
+	return status;
+}
+
 /* Mapping of AVF hash bit fields to an L3-L4 hash combination.
  * As the ice_flow_avf_hdr_field represent individual bit shifts in a hash,
  * convert its values to their appropriate flow L3, L4 values.
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.h b/drivers/net/ethernet/intel/ice/ice_flow.h
index 2f68b59ace7e..2a2d8c1536cb 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.h
+++ b/drivers/net/ethernet/intel/ice/ice_flow.h
@@ -409,5 +409,8 @@ enum ice_status ice_rem_vsi_rss_cfg(struct ice_hw *hw, u16 vsi_handle);
 enum ice_status
 ice_add_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds,
 		u32 addl_hdrs);
+enum ice_status
+ice_rem_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds,
+		u32 addl_hdrs);
 u64 ice_get_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u32 hdrs);
 #endif /* _ICE_FLOW_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index ca778a80d363..a1d22d2aa0bd 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -2672,8 +2672,20 @@ static int ice_vc_handle_rss_cfg(struct ice_vf *vf, u8 *msg, bool add)
 					vsi->vsi_num, v_ret);
 			}
 		} else {
-			v_ret = VIRTCHNL_STATUS_ERR_NOT_SUPPORTED;
-			dev_err(dev, "RSS removal not supported\n");
+			enum ice_status status;
+
+			status = ice_rem_rss_cfg(hw, vsi->idx, hash_flds,
+						 addl_hdrs);
+			/* We just ignore ICE_ERR_DOES_NOT_EXIST, because
+			 * if two configurations share the same profile remove
+			 * one of them actually removes both, since the
+			 * profile is deleted.
+			 */
+			if (status && status != ICE_ERR_DOES_NOT_EXIST) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				dev_err(dev, "ice_rem_rss_cfg failed for VF ID:%d, error:%s\n",
+					vf->vf_id, ice_stat_str(status));
+			}
 		}
 	}
 
-- 
cgit v1.2.3


From 0aaeb4fbc842b9e6ef11ee1415e6e88171056afb Mon Sep 17 00:00:00 2001
From: Haiyue Wang <haiyue.wang@intel.com>
Date: Tue, 13 Apr 2021 08:48:41 +0800
Subject: iavf: Add framework to enable ethtool RSS config

Add the virtchnl message interface to VF, so that VF can request RSS
input set(s) based on PF's capability.

This framework allows ethtool RSS config support on the VF driver.

Signed-off-by: Haiyue Wang <haiyue.wang@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/iavf/iavf.h          |  10 ++
 drivers/net/ethernet/intel/iavf/iavf_adv_rss.h  |  25 ++++
 drivers/net/ethernet/intel/iavf/iavf_main.c     |  27 +++++
 drivers/net/ethernet/intel/iavf/iavf_virtchnl.c | 152 ++++++++++++++++++++++++
 4 files changed, 214 insertions(+)
 create mode 100644 drivers/net/ethernet/intel/iavf/iavf_adv_rss.h

diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
index bda2a900df8e..e8bd04100ecd 100644
--- a/drivers/net/ethernet/intel/iavf/iavf.h
+++ b/drivers/net/ethernet/intel/iavf/iavf.h
@@ -38,6 +38,7 @@
 #include <linux/avf/virtchnl.h>
 #include "iavf_txrx.h"
 #include "iavf_fdir.h"
+#include "iavf_adv_rss.h"
 
 #define DEFAULT_DEBUG_LEVEL_SHIFT 3
 #define PFX "iavf: "
@@ -303,6 +304,8 @@ struct iavf_adapter {
 #define IAVF_FLAG_AQ_DEL_CLOUD_FILTER		BIT(24)
 #define IAVF_FLAG_AQ_ADD_FDIR_FILTER		BIT(25)
 #define IAVF_FLAG_AQ_DEL_FDIR_FILTER		BIT(26)
+#define IAVF_FLAG_AQ_ADD_ADV_RSS_CFG		BIT(27)
+#define IAVF_FLAG_AQ_DEL_ADV_RSS_CFG		BIT(28)
 
 	/* OS defined structs */
 	struct net_device *netdev;
@@ -345,6 +348,8 @@ struct iavf_adapter {
 			      VIRTCHNL_VF_CAP_ADV_LINK_SPEED)
 #define FDIR_FLTR_SUPPORT(_a) ((_a)->vf_res->vf_cap_flags & \
 			       VIRTCHNL_VF_OFFLOAD_FDIR_PF)
+#define ADV_RSS_SUPPORT(_a) ((_a)->vf_res->vf_cap_flags & \
+			     VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF)
 	struct virtchnl_vf_resource *vf_res; /* incl. all VSIs */
 	struct virtchnl_vsi_resource *vsi_res; /* our LAN VSI */
 	struct virtchnl_version_info pf_version;
@@ -372,6 +377,9 @@ struct iavf_adapter {
 	u16 fdir_active_fltr;
 	struct list_head fdir_list_head;
 	spinlock_t fdir_fltr_lock;	/* protect the Flow Director filter list */
+
+	struct list_head adv_rss_list_head;
+	spinlock_t adv_rss_lock;	/* protect the RSS management list */
 };
 
 
@@ -444,6 +452,8 @@ void iavf_add_cloud_filter(struct iavf_adapter *adapter);
 void iavf_del_cloud_filter(struct iavf_adapter *adapter);
 void iavf_add_fdir_filter(struct iavf_adapter *adapter);
 void iavf_del_fdir_filter(struct iavf_adapter *adapter);
+void iavf_add_adv_rss_cfg(struct iavf_adapter *adapter);
+void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter);
 struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter,
 					const u8 *macaddr);
 #endif /* _IAVF_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_adv_rss.h b/drivers/net/ethernet/intel/iavf/iavf_adv_rss.h
new file mode 100644
index 000000000000..66262090e697
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_adv_rss.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021, Intel Corporation. */
+
+#ifndef _IAVF_ADV_RSS_H_
+#define _IAVF_ADV_RSS_H_
+
+struct iavf_adapter;
+
+/* State of advanced RSS configuration */
+enum iavf_adv_rss_state_t {
+	IAVF_ADV_RSS_ADD_REQUEST,	/* User requests to add RSS */
+	IAVF_ADV_RSS_ADD_PENDING,	/* RSS pending add by the PF */
+	IAVF_ADV_RSS_DEL_REQUEST,	/* Driver requests to delete RSS */
+	IAVF_ADV_RSS_DEL_PENDING,	/* RSS pending delete by the PF */
+	IAVF_ADV_RSS_ACTIVE,		/* RSS configuration is active */
+};
+
+/* bookkeeping of advanced RSS configuration */
+struct iavf_adv_rss {
+	enum iavf_adv_rss_state_t state;
+	struct list_head list;
+
+	struct virtchnl_rss_cfg cfg_msg;
+};
+#endif /* _IAVF_ADV_RSS_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 6cd6b9dfe5d4..7a81e7ceea65 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -962,6 +962,7 @@ void iavf_down(struct iavf_adapter *adapter)
 	struct iavf_cloud_filter *cf;
 	struct iavf_fdir_fltr *fdir;
 	struct iavf_mac_filter *f;
+	struct iavf_adv_rss *rss;
 
 	if (adapter->state <= __IAVF_DOWN_PENDING)
 		return;
@@ -1004,6 +1005,12 @@ void iavf_down(struct iavf_adapter *adapter)
 	}
 	spin_unlock_bh(&adapter->fdir_fltr_lock);
 
+	/* remove all advance RSS configuration */
+	spin_lock_bh(&adapter->adv_rss_lock);
+	list_for_each_entry(rss, &adapter->adv_rss_list_head, list)
+		rss->state = IAVF_ADV_RSS_DEL_REQUEST;
+	spin_unlock_bh(&adapter->adv_rss_lock);
+
 	if (!(adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) &&
 	    adapter->state != __IAVF_RESETTING) {
 		/* cancel any current operation */
@@ -1016,6 +1023,7 @@ void iavf_down(struct iavf_adapter *adapter)
 		adapter->aq_required |= IAVF_FLAG_AQ_DEL_VLAN_FILTER;
 		adapter->aq_required |= IAVF_FLAG_AQ_DEL_CLOUD_FILTER;
 		adapter->aq_required |= IAVF_FLAG_AQ_DEL_FDIR_FILTER;
+		adapter->aq_required |= IAVF_FLAG_AQ_DEL_ADV_RSS_CFG;
 		adapter->aq_required |= IAVF_FLAG_AQ_DISABLE_QUEUES;
 	}
 
@@ -1646,6 +1654,14 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter)
 		iavf_del_fdir_filter(adapter);
 		return IAVF_SUCCESS;
 	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_ADD_ADV_RSS_CFG) {
+		iavf_add_adv_rss_cfg(adapter);
+		return 0;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_DEL_ADV_RSS_CFG) {
+		iavf_del_adv_rss_cfg(adapter);
+		return 0;
+	}
 	return -EAGAIN;
 }
 
@@ -3758,11 +3774,13 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	spin_lock_init(&adapter->mac_vlan_list_lock);
 	spin_lock_init(&adapter->cloud_filter_list_lock);
 	spin_lock_init(&adapter->fdir_fltr_lock);
+	spin_lock_init(&adapter->adv_rss_lock);
 
 	INIT_LIST_HEAD(&adapter->mac_filter_list);
 	INIT_LIST_HEAD(&adapter->vlan_filter_list);
 	INIT_LIST_HEAD(&adapter->cloud_filter_list);
 	INIT_LIST_HEAD(&adapter->fdir_list_head);
+	INIT_LIST_HEAD(&adapter->adv_rss_list_head);
 
 	INIT_WORK(&adapter->reset_task, iavf_reset_task);
 	INIT_WORK(&adapter->adminq_task, iavf_adminq_task);
@@ -3868,6 +3886,7 @@ static void iavf_remove(struct pci_dev *pdev)
 	struct iavf_adapter *adapter = netdev_priv(netdev);
 	struct iavf_fdir_fltr *fdir, *fdirtmp;
 	struct iavf_vlan_filter *vlf, *vlftmp;
+	struct iavf_adv_rss *rss, *rsstmp;
 	struct iavf_mac_filter *f, *ftmp;
 	struct iavf_cloud_filter *cf, *cftmp;
 	struct iavf_hw *hw = &adapter->hw;
@@ -3955,6 +3974,14 @@ static void iavf_remove(struct pci_dev *pdev)
 	}
 	spin_unlock_bh(&adapter->fdir_fltr_lock);
 
+	spin_lock_bh(&adapter->adv_rss_lock);
+	list_for_each_entry_safe(rss, rsstmp, &adapter->adv_rss_list_head,
+				 list) {
+		list_del(&rss->list);
+		kfree(rss);
+	}
+	spin_unlock_bh(&adapter->adv_rss_lock);
+
 	free_netdev(netdev);
 
 	pci_disable_pcie_error_reporting(pdev);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index 9aaade0aae4c..54d2efe1732d 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -142,6 +142,7 @@ int iavf_send_vf_config_msg(struct iavf_adapter *adapter)
 	       VIRTCHNL_VF_OFFLOAD_ADQ |
 	       VIRTCHNL_VF_OFFLOAD_USO |
 	       VIRTCHNL_VF_OFFLOAD_FDIR_PF |
+	       VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF |
 	       VIRTCHNL_VF_CAP_ADV_LINK_SPEED;
 
 	adapter->current_op = VIRTCHNL_OP_GET_VF_RESOURCES;
@@ -1294,6 +1295,102 @@ void iavf_del_fdir_filter(struct iavf_adapter *adapter)
 	iavf_send_pf_msg(adapter, VIRTCHNL_OP_DEL_FDIR_FILTER, (u8 *)&f, len);
 }
 
+/**
+ * iavf_add_adv_rss_cfg
+ * @adapter: the VF adapter structure
+ *
+ * Request that the PF add RSS configuration as specified
+ * by the user via ethtool.
+ **/
+void iavf_add_adv_rss_cfg(struct iavf_adapter *adapter)
+{
+	struct virtchnl_rss_cfg *rss_cfg;
+	struct iavf_adv_rss *rss;
+	bool process_rss = false;
+	int len;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot add RSS configuration, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+
+	len = sizeof(struct virtchnl_rss_cfg);
+	rss_cfg = kzalloc(len, GFP_KERNEL);
+	if (!rss_cfg)
+		return;
+
+	spin_lock_bh(&adapter->adv_rss_lock);
+	list_for_each_entry(rss, &adapter->adv_rss_list_head, list) {
+		if (rss->state == IAVF_ADV_RSS_ADD_REQUEST) {
+			process_rss = true;
+			rss->state = IAVF_ADV_RSS_ADD_PENDING;
+			memcpy(rss_cfg, &rss->cfg_msg, len);
+			break;
+		}
+	}
+	spin_unlock_bh(&adapter->adv_rss_lock);
+
+	if (process_rss) {
+		adapter->current_op = VIRTCHNL_OP_ADD_RSS_CFG;
+		iavf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_RSS_CFG,
+				 (u8 *)rss_cfg, len);
+	} else {
+		adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_ADV_RSS_CFG;
+	}
+
+	kfree(rss_cfg);
+}
+
+/**
+ * iavf_del_adv_rss_cfg
+ * @adapter: the VF adapter structure
+ *
+ * Request that the PF delete RSS configuration as specified
+ * by the user via ethtool.
+ **/
+void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter)
+{
+	struct virtchnl_rss_cfg *rss_cfg;
+	struct iavf_adv_rss *rss;
+	bool process_rss = false;
+	int len;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot remove RSS configuration, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+
+	len = sizeof(struct virtchnl_rss_cfg);
+	rss_cfg = kzalloc(len, GFP_KERNEL);
+	if (!rss_cfg)
+		return;
+
+	spin_lock_bh(&adapter->adv_rss_lock);
+	list_for_each_entry(rss, &adapter->adv_rss_list_head, list) {
+		if (rss->state == IAVF_ADV_RSS_DEL_REQUEST) {
+			process_rss = true;
+			rss->state = IAVF_ADV_RSS_DEL_PENDING;
+			memcpy(rss_cfg, &rss->cfg_msg, len);
+			break;
+		}
+	}
+	spin_unlock_bh(&adapter->adv_rss_lock);
+
+	if (process_rss) {
+		adapter->current_op = VIRTCHNL_OP_DEL_RSS_CFG;
+		iavf_send_pf_msg(adapter, VIRTCHNL_OP_DEL_RSS_CFG,
+				 (u8 *)rss_cfg, len);
+	} else {
+		adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_ADV_RSS_CFG;
+	}
+
+	kfree(rss_cfg);
+}
+
 /**
  * iavf_request_reset
  * @adapter: adapter structure
@@ -1494,6 +1591,37 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
 			spin_unlock_bh(&adapter->fdir_fltr_lock);
 			}
 			break;
+		case VIRTCHNL_OP_ADD_RSS_CFG: {
+			struct iavf_adv_rss *rss, *rss_tmp;
+
+			spin_lock_bh(&adapter->adv_rss_lock);
+			list_for_each_entry_safe(rss, rss_tmp,
+						 &adapter->adv_rss_list_head,
+						 list) {
+				if (rss->state == IAVF_ADV_RSS_ADD_PENDING) {
+					list_del(&rss->list);
+					kfree(rss);
+				}
+			}
+			spin_unlock_bh(&adapter->adv_rss_lock);
+			}
+			break;
+		case VIRTCHNL_OP_DEL_RSS_CFG: {
+			struct iavf_adv_rss *rss;
+
+			spin_lock_bh(&adapter->adv_rss_lock);
+			list_for_each_entry(rss, &adapter->adv_rss_list_head,
+					    list) {
+				if (rss->state == IAVF_ADV_RSS_DEL_PENDING) {
+					rss->state = IAVF_ADV_RSS_ACTIVE;
+					dev_err(&adapter->pdev->dev, "Failed to delete RSS configuration, error %s\n",
+						iavf_stat_str(&adapter->hw,
+							      v_retval));
+				}
+			}
+			spin_unlock_bh(&adapter->adv_rss_lock);
+			}
+			break;
 		case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
 		case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING:
 			dev_warn(&adapter->pdev->dev, "Changing VLAN Stripping is not allowed when Port VLAN is configured\n");
@@ -1683,6 +1811,30 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
 		spin_unlock_bh(&adapter->fdir_fltr_lock);
 		}
 		break;
+	case VIRTCHNL_OP_ADD_RSS_CFG: {
+		struct iavf_adv_rss *rss;
+
+		spin_lock_bh(&adapter->adv_rss_lock);
+		list_for_each_entry(rss, &adapter->adv_rss_list_head, list)
+			if (rss->state == IAVF_ADV_RSS_ADD_PENDING)
+				rss->state = IAVF_ADV_RSS_ACTIVE;
+		spin_unlock_bh(&adapter->adv_rss_lock);
+		}
+		break;
+	case VIRTCHNL_OP_DEL_RSS_CFG: {
+		struct iavf_adv_rss *rss, *rss_tmp;
+
+		spin_lock_bh(&adapter->adv_rss_lock);
+		list_for_each_entry_safe(rss, rss_tmp,
+					 &adapter->adv_rss_list_head, list) {
+			if (rss->state == IAVF_ADV_RSS_DEL_PENDING) {
+				list_del(&rss->list);
+				kfree(rss);
+			}
+		}
+		spin_unlock_bh(&adapter->adv_rss_lock);
+		}
+		break;
 	default:
 		if (adapter->current_op && (v_opcode != adapter->current_op))
 			dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n",
-- 
cgit v1.2.3


From 5ab91e0593a15652d31d3eb0bd6d28bf0bc9b36c Mon Sep 17 00:00:00 2001
From: Haiyue Wang <haiyue.wang@intel.com>
Date: Tue, 13 Apr 2021 08:48:42 +0800
Subject: iavf: Support for modifying TCP RSS flow hashing

Provides the ability to enable TCP RSS hashing by ethtool.

It gives users option of generating RSS hash based on the TCP source
and destination ports numbers, IPv4 or IPv6 source and destination
addresses.

Signed-off-by: Haiyue Wang <haiyue.wang@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/iavf/Makefile        |   1 +
 drivers/net/ethernet/intel/iavf/iavf_adv_rss.c  | 170 +++++++++++++++++++
 drivers/net/ethernet/intel/iavf/iavf_adv_rss.h  |  54 ++++++
 drivers/net/ethernet/intel/iavf/iavf_ethtool.c  | 214 +++++++++++++++++++++++-
 drivers/net/ethernet/intel/iavf/iavf_virtchnl.c |  15 +-
 5 files changed, 450 insertions(+), 4 deletions(-)
 create mode 100644 drivers/net/ethernet/intel/iavf/iavf_adv_rss.c

diff --git a/drivers/net/ethernet/intel/iavf/Makefile b/drivers/net/ethernet/intel/iavf/Makefile
index 121e194ee734..9c3e45c54d01 100644
--- a/drivers/net/ethernet/intel/iavf/Makefile
+++ b/drivers/net/ethernet/intel/iavf/Makefile
@@ -12,4 +12,5 @@ subdir-ccflags-y += -I$(src)
 obj-$(CONFIG_IAVF) += iavf.o
 
 iavf-objs := iavf_main.o iavf_ethtool.o iavf_virtchnl.o iavf_fdir.o \
+	     iavf_adv_rss.o \
 	     iavf_txrx.o iavf_common.o iavf_adminq.o iavf_client.o
diff --git a/drivers/net/ethernet/intel/iavf/iavf_adv_rss.c b/drivers/net/ethernet/intel/iavf/iavf_adv_rss.c
new file mode 100644
index 000000000000..4c5771cdc445
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_adv_rss.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021, Intel Corporation. */
+
+/* advanced RSS configuration ethtool support for iavf */
+
+#include "iavf.h"
+
+/**
+ * iavf_fill_adv_rss_ip4_hdr - fill the IPv4 RSS protocol header
+ * @hdr: the virtchnl message protocol header data structure
+ * @hash_flds: the RSS configuration protocol hash fields
+ */
+static void
+iavf_fill_adv_rss_ip4_hdr(struct virtchnl_proto_hdr *hdr, u64 hash_flds)
+{
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, IPV4);
+
+	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_IPV4_SA)
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, SRC);
+
+	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_IPV4_DA)
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, DST);
+}
+
+/**
+ * iavf_fill_adv_rss_ip6_hdr - fill the IPv6 RSS protocol header
+ * @hdr: the virtchnl message protocol header data structure
+ * @hash_flds: the RSS configuration protocol hash fields
+ */
+static void
+iavf_fill_adv_rss_ip6_hdr(struct virtchnl_proto_hdr *hdr, u64 hash_flds)
+{
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, IPV6);
+
+	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_IPV6_SA)
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, SRC);
+
+	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_IPV6_DA)
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, DST);
+}
+
+/**
+ * iavf_fill_adv_rss_tcp_hdr - fill the TCP RSS protocol header
+ * @hdr: the virtchnl message protocol header data structure
+ * @hash_flds: the RSS configuration protocol hash fields
+ */
+static void
+iavf_fill_adv_rss_tcp_hdr(struct virtchnl_proto_hdr *hdr, u64 hash_flds)
+{
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, TCP);
+
+	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_TCP_SRC_PORT)
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, TCP, SRC_PORT);
+
+	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_TCP_DST_PORT)
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, TCP, DST_PORT);
+}
+
+/**
+ * iavf_fill_adv_rss_cfg_msg - fill the RSS configuration into virtchnl message
+ * @rss_cfg: the virtchnl message to be filled with RSS configuration setting
+ * @packet_hdrs: the RSS configuration protocol header types
+ * @hash_flds: the RSS configuration protocol hash fields
+ *
+ * Returns 0 if the RSS configuration virtchnl message is filled successfully
+ */
+int
+iavf_fill_adv_rss_cfg_msg(struct virtchnl_rss_cfg *rss_cfg,
+			  u32 packet_hdrs, u64 hash_flds)
+{
+	struct virtchnl_proto_hdrs *proto_hdrs = &rss_cfg->proto_hdrs;
+	struct virtchnl_proto_hdr *hdr;
+
+	rss_cfg->rss_algorithm = VIRTCHNL_RSS_ALG_TOEPLITZ_ASYMMETRIC;
+
+	proto_hdrs->tunnel_level = 0;	/* always outer layer */
+
+	hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	switch (packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_L3) {
+	case IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4:
+		iavf_fill_adv_rss_ip4_hdr(hdr, hash_flds);
+		break;
+	case IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6:
+		iavf_fill_adv_rss_ip6_hdr(hdr, hash_flds);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	switch (packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_L4) {
+	case IAVF_ADV_RSS_FLOW_SEG_HDR_TCP:
+		iavf_fill_adv_rss_tcp_hdr(hdr, hash_flds);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_find_adv_rss_cfg_by_hdrs - find RSS configuration with header type
+ * @adapter: pointer to the VF adapter structure
+ * @packet_hdrs: protocol header type to find.
+ *
+ * Returns pointer to advance RSS configuration if found or null
+ */
+struct iavf_adv_rss *
+iavf_find_adv_rss_cfg_by_hdrs(struct iavf_adapter *adapter, u32 packet_hdrs)
+{
+	struct iavf_adv_rss *rss;
+
+	list_for_each_entry(rss, &adapter->adv_rss_list_head, list)
+		if (rss->packet_hdrs == packet_hdrs)
+			return rss;
+
+	return NULL;
+}
+
+/**
+ * iavf_print_adv_rss_cfg
+ * @adapter: pointer to the VF adapter structure
+ * @rss: pointer to the advance RSS configuration to print
+ * @action: the string description about how to handle the RSS
+ * @result: the string description about the virtchnl result
+ *
+ * Print the advance RSS configuration
+ **/
+void
+iavf_print_adv_rss_cfg(struct iavf_adapter *adapter, struct iavf_adv_rss *rss,
+		       const char *action, const char *result)
+{
+	u32 packet_hdrs = rss->packet_hdrs;
+	u64 hash_flds = rss->hash_flds;
+	static char hash_opt[300];
+	const char *proto;
+
+	if (packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_TCP)
+		proto = "TCP";
+	else
+		return;
+
+	memset(hash_opt, 0, sizeof(hash_opt));
+
+	strcat(hash_opt, proto);
+	if (packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4)
+		strcat(hash_opt, "v4 ");
+	else
+		strcat(hash_opt, "v6 ");
+
+	if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_IPV4_SA |
+			 IAVF_ADV_RSS_HASH_FLD_IPV6_SA))
+		strcat(hash_opt, "IP SA,");
+	if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_IPV4_DA |
+			 IAVF_ADV_RSS_HASH_FLD_IPV6_DA))
+		strcat(hash_opt, "IP DA,");
+	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_TCP_SRC_PORT)
+		strcat(hash_opt, "src port,");
+	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_TCP_DST_PORT)
+		strcat(hash_opt, "dst port,");
+
+	if (!action)
+		action = "";
+
+	if (!result)
+		result = "";
+
+	dev_info(&adapter->pdev->dev, "%s %s %s\n", action, hash_opt, result);
+}
diff --git a/drivers/net/ethernet/intel/iavf/iavf_adv_rss.h b/drivers/net/ethernet/intel/iavf/iavf_adv_rss.h
index 66262090e697..339ecb42938b 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_adv_rss.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_adv_rss.h
@@ -15,11 +15,65 @@ enum iavf_adv_rss_state_t {
 	IAVF_ADV_RSS_ACTIVE,		/* RSS configuration is active */
 };
 
+enum iavf_adv_rss_flow_seg_hdr {
+	IAVF_ADV_RSS_FLOW_SEG_HDR_NONE	= 0x00000000,
+	IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4	= 0x00000001,
+	IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6	= 0x00000002,
+	IAVF_ADV_RSS_FLOW_SEG_HDR_TCP	= 0x00000004,
+};
+
+#define IAVF_ADV_RSS_FLOW_SEG_HDR_L3		\
+	(IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4	|	\
+	 IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6)
+
+#define IAVF_ADV_RSS_FLOW_SEG_HDR_L4		\
+	(IAVF_ADV_RSS_FLOW_SEG_HDR_TCP)
+
+enum iavf_adv_rss_flow_field {
+	/* L3 */
+	IAVF_ADV_RSS_FLOW_FIELD_IDX_IPV4_SA,
+	IAVF_ADV_RSS_FLOW_FIELD_IDX_IPV4_DA,
+	IAVF_ADV_RSS_FLOW_FIELD_IDX_IPV6_SA,
+	IAVF_ADV_RSS_FLOW_FIELD_IDX_IPV6_DA,
+	/* L4 */
+	IAVF_ADV_RSS_FLOW_FIELD_IDX_TCP_SRC_PORT,
+	IAVF_ADV_RSS_FLOW_FIELD_IDX_TCP_DST_PORT,
+
+	/* The total number of enums must not exceed 64 */
+	IAVF_ADV_RSS_FLOW_FIELD_IDX_MAX
+};
+
+#define IAVF_ADV_RSS_HASH_INVALID	0
+#define IAVF_ADV_RSS_HASH_FLD_IPV4_SA	\
+	BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_IPV4_SA)
+#define IAVF_ADV_RSS_HASH_FLD_IPV6_SA	\
+	BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_IPV6_SA)
+#define IAVF_ADV_RSS_HASH_FLD_IPV4_DA	\
+	BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_IPV4_DA)
+#define IAVF_ADV_RSS_HASH_FLD_IPV6_DA	\
+	BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_IPV6_DA)
+#define IAVF_ADV_RSS_HASH_FLD_TCP_SRC_PORT	\
+	BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_TCP_SRC_PORT)
+#define IAVF_ADV_RSS_HASH_FLD_TCP_DST_PORT	\
+	BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_TCP_DST_PORT)
+
 /* bookkeeping of advanced RSS configuration */
 struct iavf_adv_rss {
 	enum iavf_adv_rss_state_t state;
 	struct list_head list;
 
+	u32 packet_hdrs;
+	u64 hash_flds;
+
 	struct virtchnl_rss_cfg cfg_msg;
 };
+
+int
+iavf_fill_adv_rss_cfg_msg(struct virtchnl_rss_cfg *rss_cfg,
+			  u32 packet_hdrs, u64 hash_flds);
+struct iavf_adv_rss *
+iavf_find_adv_rss_cfg_by_hdrs(struct iavf_adapter *adapter, u32 packet_hdrs);
+void
+iavf_print_adv_rss_cfg(struct iavf_adapter *adapter, struct iavf_adv_rss *rss,
+		       const char *action, const char *result);
 #endif /* _IAVF_ADV_RSS_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
index 3ebfef737f5c..d8dca5645c21 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -1418,6 +1418,214 @@ static int iavf_del_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx
 	return err;
 }
 
+/**
+ * iavf_adv_rss_parse_hdrs - parses headers from RSS hash input
+ * @cmd: ethtool rxnfc command
+ *
+ * This function parses the rxnfc command and returns intended
+ * header types for RSS configuration
+ */
+static u32 iavf_adv_rss_parse_hdrs(struct ethtool_rxnfc *cmd)
+{
+	u32 hdrs = IAVF_ADV_RSS_FLOW_SEG_HDR_NONE;
+
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+		hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_TCP |
+			IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4;
+		break;
+	case TCP_V6_FLOW:
+		hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_TCP |
+			IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6;
+		break;
+	default:
+		break;
+	}
+
+	return hdrs;
+}
+
+/**
+ * iavf_adv_rss_parse_hash_flds - parses hash fields from RSS hash input
+ * @cmd: ethtool rxnfc command
+ *
+ * This function parses the rxnfc command and returns intended hash fields for
+ * RSS configuration
+ */
+static u64 iavf_adv_rss_parse_hash_flds(struct ethtool_rxnfc *cmd)
+{
+	u64 hfld = IAVF_ADV_RSS_HASH_INVALID;
+
+	if (cmd->data & RXH_IP_SRC || cmd->data & RXH_IP_DST) {
+		switch (cmd->flow_type) {
+		case TCP_V4_FLOW:
+			if (cmd->data & RXH_IP_SRC)
+				hfld |= IAVF_ADV_RSS_HASH_FLD_IPV4_SA;
+			if (cmd->data & RXH_IP_DST)
+				hfld |= IAVF_ADV_RSS_HASH_FLD_IPV4_DA;
+			break;
+		case TCP_V6_FLOW:
+			if (cmd->data & RXH_IP_SRC)
+				hfld |= IAVF_ADV_RSS_HASH_FLD_IPV6_SA;
+			if (cmd->data & RXH_IP_DST)
+				hfld |= IAVF_ADV_RSS_HASH_FLD_IPV6_DA;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (cmd->data & RXH_L4_B_0_1 || cmd->data & RXH_L4_B_2_3) {
+		switch (cmd->flow_type) {
+		case TCP_V4_FLOW:
+		case TCP_V6_FLOW:
+			if (cmd->data & RXH_L4_B_0_1)
+				hfld |= IAVF_ADV_RSS_HASH_FLD_TCP_SRC_PORT;
+			if (cmd->data & RXH_L4_B_2_3)
+				hfld |= IAVF_ADV_RSS_HASH_FLD_TCP_DST_PORT;
+			break;
+		default:
+			break;
+		}
+	}
+
+	return hfld;
+}
+
+/**
+ * iavf_set_adv_rss_hash_opt - Enable/Disable flow types for RSS hash
+ * @adapter: pointer to the VF adapter structure
+ * @cmd: ethtool rxnfc command
+ *
+ * Returns Success if the flow input set is supported.
+ */
+static int
+iavf_set_adv_rss_hash_opt(struct iavf_adapter *adapter,
+			  struct ethtool_rxnfc *cmd)
+{
+	struct iavf_adv_rss *rss_old, *rss_new;
+	bool rss_new_add = false;
+	int count = 50, err = 0;
+	u64 hash_flds;
+	u32 hdrs;
+
+	if (!ADV_RSS_SUPPORT(adapter))
+		return -EOPNOTSUPP;
+
+	hdrs = iavf_adv_rss_parse_hdrs(cmd);
+	if (hdrs == IAVF_ADV_RSS_FLOW_SEG_HDR_NONE)
+		return -EINVAL;
+
+	hash_flds = iavf_adv_rss_parse_hash_flds(cmd);
+	if (hash_flds == IAVF_ADV_RSS_HASH_INVALID)
+		return -EINVAL;
+
+	rss_new = kzalloc(sizeof(*rss_new), GFP_KERNEL);
+	if (!rss_new)
+		return -ENOMEM;
+
+	if (iavf_fill_adv_rss_cfg_msg(&rss_new->cfg_msg, hdrs, hash_flds)) {
+		kfree(rss_new);
+		return -EINVAL;
+	}
+
+	while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK,
+				&adapter->crit_section)) {
+		if (--count == 0) {
+			kfree(rss_new);
+			return -EINVAL;
+		}
+
+		udelay(1);
+	}
+
+	spin_lock_bh(&adapter->adv_rss_lock);
+	rss_old = iavf_find_adv_rss_cfg_by_hdrs(adapter, hdrs);
+	if (rss_old) {
+		if (rss_old->state != IAVF_ADV_RSS_ACTIVE) {
+			err = -EBUSY;
+		} else if (rss_old->hash_flds != hash_flds) {
+			rss_old->state = IAVF_ADV_RSS_ADD_REQUEST;
+			rss_old->hash_flds = hash_flds;
+			memcpy(&rss_old->cfg_msg, &rss_new->cfg_msg,
+			       sizeof(rss_new->cfg_msg));
+			adapter->aq_required |= IAVF_FLAG_AQ_ADD_ADV_RSS_CFG;
+		} else {
+			err = -EEXIST;
+		}
+	} else {
+		rss_new_add = true;
+		rss_new->state = IAVF_ADV_RSS_ADD_REQUEST;
+		rss_new->packet_hdrs = hdrs;
+		rss_new->hash_flds = hash_flds;
+		list_add_tail(&rss_new->list, &adapter->adv_rss_list_head);
+		adapter->aq_required |= IAVF_FLAG_AQ_ADD_ADV_RSS_CFG;
+	}
+	spin_unlock_bh(&adapter->adv_rss_lock);
+
+	if (!err)
+		mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0);
+
+	clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section);
+
+	if (!rss_new_add)
+		kfree(rss_new);
+
+	return err;
+}
+
+/**
+ * iavf_get_adv_rss_hash_opt - Retrieve hash fields for a given flow-type
+ * @adapter: pointer to the VF adapter structure
+ * @cmd: ethtool rxnfc command
+ *
+ * Returns Success if the flow input set is supported.
+ */
+static int
+iavf_get_adv_rss_hash_opt(struct iavf_adapter *adapter,
+			  struct ethtool_rxnfc *cmd)
+{
+	struct iavf_adv_rss *rss;
+	u64 hash_flds;
+	u32 hdrs;
+
+	if (!ADV_RSS_SUPPORT(adapter))
+		return -EOPNOTSUPP;
+
+	cmd->data = 0;
+
+	hdrs = iavf_adv_rss_parse_hdrs(cmd);
+	if (hdrs == IAVF_ADV_RSS_FLOW_SEG_HDR_NONE)
+		return -EINVAL;
+
+	spin_lock_bh(&adapter->adv_rss_lock);
+	rss = iavf_find_adv_rss_cfg_by_hdrs(adapter, hdrs);
+	if (rss)
+		hash_flds = rss->hash_flds;
+	else
+		hash_flds = IAVF_ADV_RSS_HASH_INVALID;
+	spin_unlock_bh(&adapter->adv_rss_lock);
+
+	if (hash_flds == IAVF_ADV_RSS_HASH_INVALID)
+		return -EINVAL;
+
+	if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_IPV4_SA |
+			 IAVF_ADV_RSS_HASH_FLD_IPV6_SA))
+		cmd->data |= (u64)RXH_IP_SRC;
+
+	if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_IPV4_DA |
+			 IAVF_ADV_RSS_HASH_FLD_IPV6_DA))
+		cmd->data |= (u64)RXH_IP_DST;
+
+	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_TCP_SRC_PORT)
+		cmd->data |= (u64)RXH_L4_B_0_1;
+
+	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_TCP_DST_PORT)
+		cmd->data |= (u64)RXH_L4_B_2_3;
+
+	return 0;
+}
+
 /**
  * iavf_set_rxnfc - command to set Rx flow rules.
  * @netdev: network interface device structure
@@ -1437,6 +1645,9 @@ static int iavf_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
 	case ETHTOOL_SRXCLSRLDEL:
 		ret = iavf_del_fdir_ethtool(adapter, cmd);
 		break;
+	case ETHTOOL_SRXFH:
+		ret = iavf_set_adv_rss_hash_opt(adapter, cmd);
+		break;
 	default:
 		break;
 	}
@@ -1477,8 +1688,7 @@ static int iavf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
 		ret = iavf_get_fdir_fltr_ids(adapter, cmd, (u32 *)rule_locs);
 		break;
 	case ETHTOOL_GRXFH:
-		netdev_info(netdev,
-			    "RSS hash info is not available to vf, use pf.\n");
+		ret = iavf_get_adv_rss_hash_opt(adapter, cmd);
 		break;
 	default:
 		break;
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index 54d2efe1732d..0eab3c43bdc5 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -1327,6 +1327,9 @@ void iavf_add_adv_rss_cfg(struct iavf_adapter *adapter)
 			process_rss = true;
 			rss->state = IAVF_ADV_RSS_ADD_PENDING;
 			memcpy(rss_cfg, &rss->cfg_msg, len);
+			iavf_print_adv_rss_cfg(adapter, rss,
+					       "Input set change for",
+					       "is pending");
 			break;
 		}
 	}
@@ -1599,6 +1602,9 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
 						 &adapter->adv_rss_list_head,
 						 list) {
 				if (rss->state == IAVF_ADV_RSS_ADD_PENDING) {
+					iavf_print_adv_rss_cfg(adapter, rss,
+							       "Failed to change the input set for",
+							       NULL);
 					list_del(&rss->list);
 					kfree(rss);
 				}
@@ -1815,9 +1821,14 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
 		struct iavf_adv_rss *rss;
 
 		spin_lock_bh(&adapter->adv_rss_lock);
-		list_for_each_entry(rss, &adapter->adv_rss_list_head, list)
-			if (rss->state == IAVF_ADV_RSS_ADD_PENDING)
+		list_for_each_entry(rss, &adapter->adv_rss_list_head, list) {
+			if (rss->state == IAVF_ADV_RSS_ADD_PENDING) {
+				iavf_print_adv_rss_cfg(adapter, rss,
+						       "Input set change for",
+						       "successful");
 				rss->state = IAVF_ADV_RSS_ACTIVE;
+			}
+		}
 		spin_unlock_bh(&adapter->adv_rss_lock);
 		}
 		break;
-- 
cgit v1.2.3


From 7b8f3f957b22746bc9a410d7cd2e9edd0efcc9f5 Mon Sep 17 00:00:00 2001
From: Haiyue Wang <haiyue.wang@intel.com>
Date: Tue, 13 Apr 2021 08:48:43 +0800
Subject: iavf: Support for modifying UDP RSS flow hashing

Provides the ability to enable UDP RSS hashing by ethtool.

It gives users option of generating RSS hash based on the UDP source
and destination ports numbers, IPv4 or IPv6 source and destination
addresses.

Signed-off-by: Haiyue Wang <haiyue.wang@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/iavf/iavf_adv_rss.c | 28 ++++++++++++++++++++++++--
 drivers/net/ethernet/intel/iavf/iavf_adv_rss.h | 10 ++++++++-
 drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 23 +++++++++++++++++++--
 3 files changed, 56 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/iavf/iavf_adv_rss.c b/drivers/net/ethernet/intel/iavf/iavf_adv_rss.c
index 4c5771cdc445..a8e03aaccc6b 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_adv_rss.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_adv_rss.c
@@ -56,6 +56,23 @@ iavf_fill_adv_rss_tcp_hdr(struct virtchnl_proto_hdr *hdr, u64 hash_flds)
 		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, TCP, DST_PORT);
 }
 
+/**
+ * iavf_fill_adv_rss_udp_hdr - fill the UDP RSS protocol header
+ * @hdr: the virtchnl message protocol header data structure
+ * @hash_flds: the RSS configuration protocol hash fields
+ */
+static void
+iavf_fill_adv_rss_udp_hdr(struct virtchnl_proto_hdr *hdr, u64 hash_flds)
+{
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, UDP);
+
+	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_UDP_SRC_PORT)
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, UDP, SRC_PORT);
+
+	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_UDP_DST_PORT)
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, UDP, DST_PORT);
+}
+
 /**
  * iavf_fill_adv_rss_cfg_msg - fill the RSS configuration into virtchnl message
  * @rss_cfg: the virtchnl message to be filled with RSS configuration setting
@@ -92,6 +109,9 @@ iavf_fill_adv_rss_cfg_msg(struct virtchnl_rss_cfg *rss_cfg,
 	case IAVF_ADV_RSS_FLOW_SEG_HDR_TCP:
 		iavf_fill_adv_rss_tcp_hdr(hdr, hash_flds);
 		break;
+	case IAVF_ADV_RSS_FLOW_SEG_HDR_UDP:
+		iavf_fill_adv_rss_udp_hdr(hdr, hash_flds);
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -138,6 +158,8 @@ iavf_print_adv_rss_cfg(struct iavf_adapter *adapter, struct iavf_adv_rss *rss,
 
 	if (packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_TCP)
 		proto = "TCP";
+	else if (packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_UDP)
+		proto = "UDP";
 	else
 		return;
 
@@ -155,9 +177,11 @@ iavf_print_adv_rss_cfg(struct iavf_adapter *adapter, struct iavf_adv_rss *rss,
 	if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_IPV4_DA |
 			 IAVF_ADV_RSS_HASH_FLD_IPV6_DA))
 		strcat(hash_opt, "IP DA,");
-	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_TCP_SRC_PORT)
+	if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_TCP_SRC_PORT |
+			 IAVF_ADV_RSS_HASH_FLD_UDP_SRC_PORT))
 		strcat(hash_opt, "src port,");
-	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_TCP_DST_PORT)
+	if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_TCP_DST_PORT |
+			 IAVF_ADV_RSS_HASH_FLD_UDP_DST_PORT))
 		strcat(hash_opt, "dst port,");
 
 	if (!action)
diff --git a/drivers/net/ethernet/intel/iavf/iavf_adv_rss.h b/drivers/net/ethernet/intel/iavf/iavf_adv_rss.h
index 339ecb42938b..4681f5e8321d 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_adv_rss.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_adv_rss.h
@@ -20,6 +20,7 @@ enum iavf_adv_rss_flow_seg_hdr {
 	IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4	= 0x00000001,
 	IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6	= 0x00000002,
 	IAVF_ADV_RSS_FLOW_SEG_HDR_TCP	= 0x00000004,
+	IAVF_ADV_RSS_FLOW_SEG_HDR_UDP	= 0x00000008,
 };
 
 #define IAVF_ADV_RSS_FLOW_SEG_HDR_L3		\
@@ -27,7 +28,8 @@ enum iavf_adv_rss_flow_seg_hdr {
 	 IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6)
 
 #define IAVF_ADV_RSS_FLOW_SEG_HDR_L4		\
-	(IAVF_ADV_RSS_FLOW_SEG_HDR_TCP)
+	(IAVF_ADV_RSS_FLOW_SEG_HDR_TCP |	\
+	 IAVF_ADV_RSS_FLOW_SEG_HDR_UDP)
 
 enum iavf_adv_rss_flow_field {
 	/* L3 */
@@ -38,6 +40,8 @@ enum iavf_adv_rss_flow_field {
 	/* L4 */
 	IAVF_ADV_RSS_FLOW_FIELD_IDX_TCP_SRC_PORT,
 	IAVF_ADV_RSS_FLOW_FIELD_IDX_TCP_DST_PORT,
+	IAVF_ADV_RSS_FLOW_FIELD_IDX_UDP_SRC_PORT,
+	IAVF_ADV_RSS_FLOW_FIELD_IDX_UDP_DST_PORT,
 
 	/* The total number of enums must not exceed 64 */
 	IAVF_ADV_RSS_FLOW_FIELD_IDX_MAX
@@ -56,6 +60,10 @@ enum iavf_adv_rss_flow_field {
 	BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_TCP_SRC_PORT)
 #define IAVF_ADV_RSS_HASH_FLD_TCP_DST_PORT	\
 	BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_TCP_DST_PORT)
+#define IAVF_ADV_RSS_HASH_FLD_UDP_SRC_PORT	\
+	BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_UDP_SRC_PORT)
+#define IAVF_ADV_RSS_HASH_FLD_UDP_DST_PORT	\
+	BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_UDP_DST_PORT)
 
 /* bookkeeping of advanced RSS configuration */
 struct iavf_adv_rss {
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
index d8dca5645c21..e6169a336694 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -1434,10 +1434,18 @@ static u32 iavf_adv_rss_parse_hdrs(struct ethtool_rxnfc *cmd)
 		hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_TCP |
 			IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4;
 		break;
+	case UDP_V4_FLOW:
+		hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_UDP |
+			IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4;
+		break;
 	case TCP_V6_FLOW:
 		hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_TCP |
 			IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6;
 		break;
+	case UDP_V6_FLOW:
+		hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_UDP |
+			IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6;
+		break;
 	default:
 		break;
 	}
@@ -1459,12 +1467,14 @@ static u64 iavf_adv_rss_parse_hash_flds(struct ethtool_rxnfc *cmd)
 	if (cmd->data & RXH_IP_SRC || cmd->data & RXH_IP_DST) {
 		switch (cmd->flow_type) {
 		case TCP_V4_FLOW:
+		case UDP_V4_FLOW:
 			if (cmd->data & RXH_IP_SRC)
 				hfld |= IAVF_ADV_RSS_HASH_FLD_IPV4_SA;
 			if (cmd->data & RXH_IP_DST)
 				hfld |= IAVF_ADV_RSS_HASH_FLD_IPV4_DA;
 			break;
 		case TCP_V6_FLOW:
+		case UDP_V6_FLOW:
 			if (cmd->data & RXH_IP_SRC)
 				hfld |= IAVF_ADV_RSS_HASH_FLD_IPV6_SA;
 			if (cmd->data & RXH_IP_DST)
@@ -1484,6 +1494,13 @@ static u64 iavf_adv_rss_parse_hash_flds(struct ethtool_rxnfc *cmd)
 			if (cmd->data & RXH_L4_B_2_3)
 				hfld |= IAVF_ADV_RSS_HASH_FLD_TCP_DST_PORT;
 			break;
+		case UDP_V4_FLOW:
+		case UDP_V6_FLOW:
+			if (cmd->data & RXH_L4_B_0_1)
+				hfld |= IAVF_ADV_RSS_HASH_FLD_UDP_SRC_PORT;
+			if (cmd->data & RXH_L4_B_2_3)
+				hfld |= IAVF_ADV_RSS_HASH_FLD_UDP_DST_PORT;
+			break;
 		default:
 			break;
 		}
@@ -1617,10 +1634,12 @@ iavf_get_adv_rss_hash_opt(struct iavf_adapter *adapter,
 			 IAVF_ADV_RSS_HASH_FLD_IPV6_DA))
 		cmd->data |= (u64)RXH_IP_DST;
 
-	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_TCP_SRC_PORT)
+	if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_TCP_SRC_PORT |
+			 IAVF_ADV_RSS_HASH_FLD_UDP_SRC_PORT))
 		cmd->data |= (u64)RXH_L4_B_0_1;
 
-	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_TCP_DST_PORT)
+	if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_TCP_DST_PORT |
+			 IAVF_ADV_RSS_HASH_FLD_UDP_DST_PORT))
 		cmd->data |= (u64)RXH_L4_B_2_3;
 
 	return 0;
-- 
cgit v1.2.3


From e41985f0fe8b68d1ac321bd4eda460fb553e65ad Mon Sep 17 00:00:00 2001
From: Haiyue Wang <haiyue.wang@intel.com>
Date: Tue, 13 Apr 2021 08:48:44 +0800
Subject: iavf: Support for modifying SCTP RSS flow hashing

Provide the ability to enable SCTP RSS hashing by ethtool.

It gives users option of generating RSS hash based on the SCTP source
and destination ports numbers, IPv4 or IPv6 source and destination
addresses.

Signed-off-by: Haiyue Wang <haiyue.wang@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/iavf/iavf_adv_rss.c | 28 ++++++++++++++++++++++++--
 drivers/net/ethernet/intel/iavf/iavf_adv_rss.h | 10 ++++++++-
 drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 23 +++++++++++++++++++--
 3 files changed, 56 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/iavf/iavf_adv_rss.c b/drivers/net/ethernet/intel/iavf/iavf_adv_rss.c
index a8e03aaccc6b..6edbf134b73f 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_adv_rss.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_adv_rss.c
@@ -73,6 +73,23 @@ iavf_fill_adv_rss_udp_hdr(struct virtchnl_proto_hdr *hdr, u64 hash_flds)
 		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, UDP, DST_PORT);
 }
 
+/**
+ * iavf_fill_adv_rss_sctp_hdr - fill the SCTP RSS protocol header
+ * @hdr: the virtchnl message protocol header data structure
+ * @hash_flds: the RSS configuration protocol hash fields
+ */
+static void
+iavf_fill_adv_rss_sctp_hdr(struct virtchnl_proto_hdr *hdr, u64 hash_flds)
+{
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, SCTP);
+
+	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_SCTP_SRC_PORT)
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, SCTP, SRC_PORT);
+
+	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_SCTP_DST_PORT)
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, SCTP, DST_PORT);
+}
+
 /**
  * iavf_fill_adv_rss_cfg_msg - fill the RSS configuration into virtchnl message
  * @rss_cfg: the virtchnl message to be filled with RSS configuration setting
@@ -112,6 +129,9 @@ iavf_fill_adv_rss_cfg_msg(struct virtchnl_rss_cfg *rss_cfg,
 	case IAVF_ADV_RSS_FLOW_SEG_HDR_UDP:
 		iavf_fill_adv_rss_udp_hdr(hdr, hash_flds);
 		break;
+	case IAVF_ADV_RSS_FLOW_SEG_HDR_SCTP:
+		iavf_fill_adv_rss_sctp_hdr(hdr, hash_flds);
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -160,6 +180,8 @@ iavf_print_adv_rss_cfg(struct iavf_adapter *adapter, struct iavf_adv_rss *rss,
 		proto = "TCP";
 	else if (packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_UDP)
 		proto = "UDP";
+	else if (packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_SCTP)
+		proto = "SCTP";
 	else
 		return;
 
@@ -178,10 +200,12 @@ iavf_print_adv_rss_cfg(struct iavf_adapter *adapter, struct iavf_adv_rss *rss,
 			 IAVF_ADV_RSS_HASH_FLD_IPV6_DA))
 		strcat(hash_opt, "IP DA,");
 	if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_TCP_SRC_PORT |
-			 IAVF_ADV_RSS_HASH_FLD_UDP_SRC_PORT))
+			 IAVF_ADV_RSS_HASH_FLD_UDP_SRC_PORT |
+			 IAVF_ADV_RSS_HASH_FLD_SCTP_SRC_PORT))
 		strcat(hash_opt, "src port,");
 	if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_TCP_DST_PORT |
-			 IAVF_ADV_RSS_HASH_FLD_UDP_DST_PORT))
+			 IAVF_ADV_RSS_HASH_FLD_UDP_DST_PORT |
+			 IAVF_ADV_RSS_HASH_FLD_SCTP_DST_PORT))
 		strcat(hash_opt, "dst port,");
 
 	if (!action)
diff --git a/drivers/net/ethernet/intel/iavf/iavf_adv_rss.h b/drivers/net/ethernet/intel/iavf/iavf_adv_rss.h
index 4681f5e8321d..4d3be11af7aa 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_adv_rss.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_adv_rss.h
@@ -21,6 +21,7 @@ enum iavf_adv_rss_flow_seg_hdr {
 	IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6	= 0x00000002,
 	IAVF_ADV_RSS_FLOW_SEG_HDR_TCP	= 0x00000004,
 	IAVF_ADV_RSS_FLOW_SEG_HDR_UDP	= 0x00000008,
+	IAVF_ADV_RSS_FLOW_SEG_HDR_SCTP	= 0x00000010,
 };
 
 #define IAVF_ADV_RSS_FLOW_SEG_HDR_L3		\
@@ -29,7 +30,8 @@ enum iavf_adv_rss_flow_seg_hdr {
 
 #define IAVF_ADV_RSS_FLOW_SEG_HDR_L4		\
 	(IAVF_ADV_RSS_FLOW_SEG_HDR_TCP |	\
-	 IAVF_ADV_RSS_FLOW_SEG_HDR_UDP)
+	 IAVF_ADV_RSS_FLOW_SEG_HDR_UDP |	\
+	 IAVF_ADV_RSS_FLOW_SEG_HDR_SCTP)
 
 enum iavf_adv_rss_flow_field {
 	/* L3 */
@@ -42,6 +44,8 @@ enum iavf_adv_rss_flow_field {
 	IAVF_ADV_RSS_FLOW_FIELD_IDX_TCP_DST_PORT,
 	IAVF_ADV_RSS_FLOW_FIELD_IDX_UDP_SRC_PORT,
 	IAVF_ADV_RSS_FLOW_FIELD_IDX_UDP_DST_PORT,
+	IAVF_ADV_RSS_FLOW_FIELD_IDX_SCTP_SRC_PORT,
+	IAVF_ADV_RSS_FLOW_FIELD_IDX_SCTP_DST_PORT,
 
 	/* The total number of enums must not exceed 64 */
 	IAVF_ADV_RSS_FLOW_FIELD_IDX_MAX
@@ -64,6 +68,10 @@ enum iavf_adv_rss_flow_field {
 	BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_UDP_SRC_PORT)
 #define IAVF_ADV_RSS_HASH_FLD_UDP_DST_PORT	\
 	BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_UDP_DST_PORT)
+#define IAVF_ADV_RSS_HASH_FLD_SCTP_SRC_PORT	\
+	BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_SCTP_SRC_PORT)
+#define IAVF_ADV_RSS_HASH_FLD_SCTP_DST_PORT	\
+	BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_SCTP_DST_PORT)
 
 /* bookkeeping of advanced RSS configuration */
 struct iavf_adv_rss {
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
index e6169a336694..3d904bc6ee76 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -1438,6 +1438,10 @@ static u32 iavf_adv_rss_parse_hdrs(struct ethtool_rxnfc *cmd)
 		hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_UDP |
 			IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4;
 		break;
+	case SCTP_V4_FLOW:
+		hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_SCTP |
+			IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4;
+		break;
 	case TCP_V6_FLOW:
 		hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_TCP |
 			IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6;
@@ -1446,6 +1450,10 @@ static u32 iavf_adv_rss_parse_hdrs(struct ethtool_rxnfc *cmd)
 		hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_UDP |
 			IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6;
 		break;
+	case SCTP_V6_FLOW:
+		hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_SCTP |
+			IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6;
+		break;
 	default:
 		break;
 	}
@@ -1468,6 +1476,7 @@ static u64 iavf_adv_rss_parse_hash_flds(struct ethtool_rxnfc *cmd)
 		switch (cmd->flow_type) {
 		case TCP_V4_FLOW:
 		case UDP_V4_FLOW:
+		case SCTP_V4_FLOW:
 			if (cmd->data & RXH_IP_SRC)
 				hfld |= IAVF_ADV_RSS_HASH_FLD_IPV4_SA;
 			if (cmd->data & RXH_IP_DST)
@@ -1475,6 +1484,7 @@ static u64 iavf_adv_rss_parse_hash_flds(struct ethtool_rxnfc *cmd)
 			break;
 		case TCP_V6_FLOW:
 		case UDP_V6_FLOW:
+		case SCTP_V6_FLOW:
 			if (cmd->data & RXH_IP_SRC)
 				hfld |= IAVF_ADV_RSS_HASH_FLD_IPV6_SA;
 			if (cmd->data & RXH_IP_DST)
@@ -1501,6 +1511,13 @@ static u64 iavf_adv_rss_parse_hash_flds(struct ethtool_rxnfc *cmd)
 			if (cmd->data & RXH_L4_B_2_3)
 				hfld |= IAVF_ADV_RSS_HASH_FLD_UDP_DST_PORT;
 			break;
+		case SCTP_V4_FLOW:
+		case SCTP_V6_FLOW:
+			if (cmd->data & RXH_L4_B_0_1)
+				hfld |= IAVF_ADV_RSS_HASH_FLD_SCTP_SRC_PORT;
+			if (cmd->data & RXH_L4_B_2_3)
+				hfld |= IAVF_ADV_RSS_HASH_FLD_SCTP_DST_PORT;
+			break;
 		default:
 			break;
 		}
@@ -1635,11 +1652,13 @@ iavf_get_adv_rss_hash_opt(struct iavf_adapter *adapter,
 		cmd->data |= (u64)RXH_IP_DST;
 
 	if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_TCP_SRC_PORT |
-			 IAVF_ADV_RSS_HASH_FLD_UDP_SRC_PORT))
+			 IAVF_ADV_RSS_HASH_FLD_UDP_SRC_PORT |
+			 IAVF_ADV_RSS_HASH_FLD_SCTP_SRC_PORT))
 		cmd->data |= (u64)RXH_L4_B_0_1;
 
 	if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_TCP_DST_PORT |
-			 IAVF_ADV_RSS_HASH_FLD_UDP_DST_PORT))
+			 IAVF_ADV_RSS_HASH_FLD_UDP_DST_PORT |
+			 IAVF_ADV_RSS_HASH_FLD_SCTP_DST_PORT))
 		cmd->data |= (u64)RXH_L4_B_2_3;
 
 	return 0;
-- 
cgit v1.2.3


From 990875b299b8612aeb85cb2e2751796f1add65ff Mon Sep 17 00:00:00 2001
From: Maxim Kochetkov <fido_max@inbox.ru>
Date: Thu, 22 Apr 2021 13:46:43 +0300
Subject: net: phy: marvell: fix m88e1011_set_downshift

Changing downshift params without software reset has no effect,
so call genphy_soft_reset() after change downshift params.

As the datasheet says:
Changes to these bits are disruptive to the normal operation therefore,
any changes to these registers must be followed by software reset
to take effect.

Fixes: 911af5e149bb ("net: phy: marvell: fix downshift function naming")
Signed-off-by: Maxim Kochetkov <fido_max@inbox.ru>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 8018ddf7f316..723f25f6138d 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -1025,22 +1025,28 @@ static int m88e1011_get_downshift(struct phy_device *phydev, u8 *data)
 
 static int m88e1011_set_downshift(struct phy_device *phydev, u8 cnt)
 {
-	int val;
+	int val, err;
 
 	if (cnt > MII_M1011_PHY_SCR_DOWNSHIFT_MAX)
 		return -E2BIG;
 
-	if (!cnt)
-		return phy_clear_bits(phydev, MII_M1011_PHY_SCR,
-				      MII_M1011_PHY_SCR_DOWNSHIFT_EN);
+	if (!cnt) {
+		err = phy_clear_bits(phydev, MII_M1011_PHY_SCR,
+				     MII_M1011_PHY_SCR_DOWNSHIFT_EN);
+	} else {
+		val = MII_M1011_PHY_SCR_DOWNSHIFT_EN;
+		val |= FIELD_PREP(MII_M1011_PHY_SCR_DOWNSHIFT_MASK, cnt - 1);
 
-	val = MII_M1011_PHY_SCR_DOWNSHIFT_EN;
-	val |= FIELD_PREP(MII_M1011_PHY_SCR_DOWNSHIFT_MASK, cnt - 1);
+		err = phy_modify(phydev, MII_M1011_PHY_SCR,
+				 MII_M1011_PHY_SCR_DOWNSHIFT_EN |
+				 MII_M1011_PHY_SCR_DOWNSHIFT_MASK,
+				 val);
+	}
 
-	return phy_modify(phydev, MII_M1011_PHY_SCR,
-			  MII_M1011_PHY_SCR_DOWNSHIFT_EN |
-			  MII_M1011_PHY_SCR_DOWNSHIFT_MASK,
-			  val);
+	if (err < 0)
+		return err;
+
+	return genphy_soft_reset(phydev);
 }
 
 static int m88e1011_get_tunable(struct phy_device *phydev,
-- 
cgit v1.2.3


From e7679c55a7249f1315256cfc672d53e84072e223 Mon Sep 17 00:00:00 2001
From: Maxim Kochetkov <fido_max@inbox.ru>
Date: Thu, 22 Apr 2021 13:46:44 +0300
Subject: net: phy: marvell: fix m88e1111_set_downshift

Changing downshift params without software reset has no effect,
so call genphy_soft_reset() after change downshift params.

As the datasheet says:
Changes to these bits are disruptive to the normal operation therefore,
any changes to these registers must be followed by software reset
to take effect.

Fixes: 5c6bc5199b5d ("net: phy: marvell: add downshift support for M88E1111")
Signed-off-by: Maxim Kochetkov <fido_max@inbox.ru>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 723f25f6138d..f86c9ddc609e 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -967,22 +967,28 @@ static int m88e1111_get_downshift(struct phy_device *phydev, u8 *data)
 
 static int m88e1111_set_downshift(struct phy_device *phydev, u8 cnt)
 {
-	int val;
+	int val, err;
 
 	if (cnt > MII_M1111_PHY_EXT_CR_DOWNSHIFT_MAX)
 		return -E2BIG;
 
-	if (!cnt)
-		return phy_clear_bits(phydev, MII_M1111_PHY_EXT_CR,
-				      MII_M1111_PHY_EXT_CR_DOWNSHIFT_EN);
+	if (!cnt) {
+		err = phy_clear_bits(phydev, MII_M1111_PHY_EXT_CR,
+				     MII_M1111_PHY_EXT_CR_DOWNSHIFT_EN);
+	} else {
+		val = MII_M1111_PHY_EXT_CR_DOWNSHIFT_EN;
+		val |= FIELD_PREP(MII_M1111_PHY_EXT_CR_DOWNSHIFT_MASK, cnt - 1);
 
-	val = MII_M1111_PHY_EXT_CR_DOWNSHIFT_EN;
-	val |= FIELD_PREP(MII_M1111_PHY_EXT_CR_DOWNSHIFT_MASK, cnt - 1);
+		err = phy_modify(phydev, MII_M1111_PHY_EXT_CR,
+				 MII_M1111_PHY_EXT_CR_DOWNSHIFT_EN |
+				 MII_M1111_PHY_EXT_CR_DOWNSHIFT_MASK,
+				 val);
+	}
 
-	return phy_modify(phydev, MII_M1111_PHY_EXT_CR,
-			  MII_M1111_PHY_EXT_CR_DOWNSHIFT_EN |
-			  MII_M1111_PHY_EXT_CR_DOWNSHIFT_MASK,
-			  val);
+	if (err < 0)
+		return err;
+
+	return genphy_soft_reset(phydev);
 }
 
 static int m88e1111_get_tunable(struct phy_device *phydev,
-- 
cgit v1.2.3


From 45b102dd81491e30ac7596b5515856141f99319f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 22 Apr 2021 15:34:34 +0200
Subject: net: mana: fix PCI_HYPERV dependency

The MANA driver causes a build failure in some configurations when
it selects an unavailable symbol:

WARNING: unmet direct dependencies detected for PCI_HYPERV
  Depends on [n]: PCI [=y] && X86_64 [=y] && HYPERV [=n] && PCI_MSI [=y] && PCI_MSI_IRQ_DOMAIN [=y] && SYSFS [=y]
  Selected by [y]:
  - MICROSOFT_MANA [=y] && NETDEVICES [=y] && ETHERNET [=y] && NET_VENDOR_MICROSOFT [=y] && PCI_MSI [=y] && X86_64 [=y]
drivers/pci/controller/pci-hyperv.c: In function 'hv_irq_unmask':
drivers/pci/controller/pci-hyperv.c:1217:9: error: implicit declaration of function 'hv_set_msi_entry_from_desc' [-Werror=implicit-function-declaration]
 1217 |         hv_set_msi_entry_from_desc(&params->int_entry.msi_entry, msi_desc);
      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~

A PCI driver should never depend on a particular host bridge
implementation in the first place, but if we have this dependency
it's better to express it as a 'depends on' rather than 'select'.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microsoft/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/microsoft/Kconfig b/drivers/net/ethernet/microsoft/Kconfig
index e1ac0a5d808d..fe4e7a7d9c0b 100644
--- a/drivers/net/ethernet/microsoft/Kconfig
+++ b/drivers/net/ethernet/microsoft/Kconfig
@@ -18,7 +18,7 @@ if NET_VENDOR_MICROSOFT
 config MICROSOFT_MANA
 	tristate "Microsoft Azure Network Adapter (MANA) support"
 	depends on PCI_MSI && X86_64
-	select PCI_HYPERV
+	depends on PCI_HYPERV
 	help
 	  This driver supports Microsoft Azure Network Adapter (MANA).
 	  So far, the driver is only supported on X86_64.
-- 
cgit v1.2.3


From 74c97ea3b61e4ce149444f904ee8d4fc7073505b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 22 Apr 2021 15:35:11 +0200
Subject: net: enetc: fix link error again

A link time bug that I had fixed before has come back now that
another sub-module was added to the enetc driver:

ERROR: modpost: "enetc_ierb_register_pf" [drivers/net/ethernet/freescale/enetc/fsl-enetc.ko] undefined!

The problem is that the enetc Makefile is not actually used for
the ierb module if that is the only built-in driver in there
and everything else is a loadable module.

Fix it by always entering the directory this time, regardless
of which symbols are configured. This should reliably fix the
problem and prevent it from coming back another time.

Fixes: 112463ddbe82 ("net: dsa: felix: fix link error")
Fixes: e7d48e5fbf30 ("net: enetc: add a mini driver for the Integrated Endpoint Register Block")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/Makefile | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/freescale/Makefile b/drivers/net/ethernet/freescale/Makefile
index 67c436400352..de7b31842233 100644
--- a/drivers/net/ethernet/freescale/Makefile
+++ b/drivers/net/ethernet/freescale/Makefile
@@ -24,6 +24,4 @@ obj-$(CONFIG_FSL_DPAA_ETH) += dpaa/
 
 obj-$(CONFIG_FSL_DPAA2_ETH) += dpaa2/
 
-obj-$(CONFIG_FSL_ENETC) += enetc/
-obj-$(CONFIG_FSL_ENETC_MDIO) += enetc/
-obj-$(CONFIG_FSL_ENETC_VF) += enetc/
+obj-y += enetc/
-- 
cgit v1.2.3


From a9b5d871abc417cf65a05a9ba50c6b81a6e427eb Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 22 Apr 2021 16:50:50 +0300
Subject: netdevsim: Only use sampling truncation length when valid

When the sampling truncation length is invalid (zero), pass the length
of the packet. Without the fix, no payload is reported to user space
when the truncation length is zero.

Fixes: a8700c3dd0a4 ("netdevsim: Add dummy psample implementation")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/psample.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/netdevsim/psample.c b/drivers/net/netdevsim/psample.c
index 5ec3bd7f891b..f0c6477dd0ae 100644
--- a/drivers/net/netdevsim/psample.c
+++ b/drivers/net/netdevsim/psample.c
@@ -79,9 +79,10 @@ static struct sk_buff *nsim_dev_psample_skb_build(void)
 }
 
 static void nsim_dev_psample_md_prepare(const struct nsim_dev_psample *psample,
-					struct psample_metadata *md)
+					struct psample_metadata *md,
+					unsigned int len)
 {
-	md->trunc_size = psample->trunc_size;
+	md->trunc_size = psample->trunc_size ? psample->trunc_size : len;
 	md->in_ifindex = psample->in_ifindex;
 	md->out_ifindex = psample->out_ifindex;
 
@@ -120,7 +121,7 @@ static void nsim_dev_psample_report_work(struct work_struct *work)
 	if (!skb)
 		goto out;
 
-	nsim_dev_psample_md_prepare(psample, &md);
+	nsim_dev_psample_md_prepare(psample, &md, skb->len);
 	psample_sample_packet(psample->group, skb, psample->rate, &md);
 	consume_skb(skb);
 
-- 
cgit v1.2.3


From 57e222475545f457ecf4833db31f156e8b7674c7 Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@linaro.org>
Date: Thu, 22 Apr 2021 16:06:01 +0200
Subject: net: wwan: core: Return poll error in case of port removal

Ensure that the poll system call returns proper error flags when port
is removed (nullified port ops), allowing user side to properly fail,
without further read or write.

Fixes: 9a44c1cc6388 ("net: Add a WWAN subsystem")
Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/wwan_core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
index 5be5e1e3534c..cff04e532c1e 100644
--- a/drivers/net/wwan/wwan_core.c
+++ b/drivers/net/wwan/wwan_core.c
@@ -508,6 +508,8 @@ static __poll_t wwan_port_fops_poll(struct file *filp, poll_table *wait)
 		mask |= EPOLLOUT | EPOLLWRNORM;
 	if (!is_read_blocked(port))
 		mask |= EPOLLIN | EPOLLRDNORM;
+	if (!port->ops)
+		mask |= EPOLLHUP | EPOLLERR;
 
 	return mask;
 }
-- 
cgit v1.2.3


From 3197a98c7081a1c3db6ef63fece55d7f66c79bdc Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 22 Apr 2021 17:35:33 +0200
Subject: vxge: avoid -Wemtpy-body warnings

There are a few warnings about empty debug macros in this driver:

drivers/net/ethernet/neterion/vxge/vxge-main.c: In function 'vxge_probe':
drivers/net/ethernet/neterion/vxge/vxge-main.c:4480:76: error: suggest braces around empty body in an 'if' statement [-Werror=empty-body]
 4480 |                                 "Failed in enabling SRIOV mode: %d\n", ret);

Change them to proper 'do { } while (0)' expressions to make the
code a little more robust and avoid the warnings.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/neterion/vxge/vxge-main.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.h b/drivers/net/ethernet/neterion/vxge/vxge-main.h
index 9c86f4f9cd42..63f65193dd49 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.h
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.h
@@ -454,49 +454,49 @@ int vxge_fw_upgrade(struct vxgedev *vdev, char *fw_name, int override);
 #define vxge_debug_ll_config(level, fmt, ...) \
 	vxge_debug_ll(level, VXGE_DEBUG_LL_CONFIG, fmt, ##__VA_ARGS__)
 #else
-#define vxge_debug_ll_config(level, fmt, ...)
+#define vxge_debug_ll_config(level, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 #if (VXGE_DEBUG_INIT & VXGE_DEBUG_MASK)
 #define vxge_debug_init(level, fmt, ...) \
 	vxge_debug_ll(level, VXGE_DEBUG_INIT, fmt, ##__VA_ARGS__)
 #else
-#define vxge_debug_init(level, fmt, ...)
+#define vxge_debug_init(level, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 #if (VXGE_DEBUG_TX & VXGE_DEBUG_MASK)
 #define vxge_debug_tx(level, fmt, ...) \
 	vxge_debug_ll(level, VXGE_DEBUG_TX, fmt, ##__VA_ARGS__)
 #else
-#define vxge_debug_tx(level, fmt, ...)
+#define vxge_debug_tx(level, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 #if (VXGE_DEBUG_RX & VXGE_DEBUG_MASK)
 #define vxge_debug_rx(level, fmt, ...) \
 	vxge_debug_ll(level, VXGE_DEBUG_RX, fmt, ##__VA_ARGS__)
 #else
-#define vxge_debug_rx(level, fmt, ...)
+#define vxge_debug_rx(level, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 #if (VXGE_DEBUG_MEM & VXGE_DEBUG_MASK)
 #define vxge_debug_mem(level, fmt, ...) \
 	vxge_debug_ll(level, VXGE_DEBUG_MEM, fmt, ##__VA_ARGS__)
 #else
-#define vxge_debug_mem(level, fmt, ...)
+#define vxge_debug_mem(level, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 #if (VXGE_DEBUG_ENTRYEXIT & VXGE_DEBUG_MASK)
 #define vxge_debug_entryexit(level, fmt, ...) \
 	vxge_debug_ll(level, VXGE_DEBUG_ENTRYEXIT, fmt, ##__VA_ARGS__)
 #else
-#define vxge_debug_entryexit(level, fmt, ...)
+#define vxge_debug_entryexit(level, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 #if (VXGE_DEBUG_INTR & VXGE_DEBUG_MASK)
 #define vxge_debug_intr(level, fmt, ...) \
 	vxge_debug_ll(level, VXGE_DEBUG_INTR, fmt, ##__VA_ARGS__)
 #else
-#define vxge_debug_intr(level, fmt, ...)
+#define vxge_debug_intr(level, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 #define VXGE_DEVICE_DEBUG_LEVEL_SET(level, mask, vdev) {\
-- 
cgit v1.2.3


From f49c35b89b784c20a8868bb6f57f3e25277268c3 Mon Sep 17 00:00:00 2001
From: Hayes Wang <hayeswang@realtek.com>
Date: Thu, 22 Apr 2021 16:48:02 +0800
Subject: r8152: replace return with break for ram code speedup mode timeout

When the timeout occurs, we still have to run the following process
for releasing patch request. Otherwise, the PHY would keep no link.
Therefore, use break to stop the loop of loading firmware and
release the patch request rather than return the function directly.

Fixes: 4a51b0e8a014 ("r8152: support PHY firmware for RTL8156 series")
Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 9119a860e9bd..5b4ed69df64f 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -4803,7 +4803,7 @@ static void rtl_ram_code_speed_up(struct r8152 *tp, struct fw_phy_speed_up *phy,
 
 		if (i == 1000) {
 			dev_err(&tp->intf->dev, "ram code speedup mode timeout\n");
-			return;
+			break;
 		}
 	}
 
-- 
cgit v1.2.3


From 22b6034323fd736f260e00b9ea85c634abeb3446 Mon Sep 17 00:00:00 2001
From: Martin Willi <martin@strongswan.org>
Date: Mon, 19 Apr 2021 16:15:59 +0200
Subject: net, xdp: Update pkt_type if generic XDP changes unicast MAC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If a generic XDP program changes the destination MAC address from/to
multicast/broadcast, the skb->pkt_type is updated to properly handle
the packet when passed up the stack. When changing the MAC from/to
the NICs MAC, PACKET_HOST/OTHERHOST is not updated, though, making
the behavior different from that of native XDP.

Remember the PACKET_HOST/OTHERHOST state before calling the program
in generic XDP, and update pkt_type accordingly if the destination
MAC address has changed. As eth_type_trans() assumes a default
pkt_type of PACKET_HOST, restore that before calling it.

The use case for this is when a XDP program wants to push received
packets up the stack by rewriting the MAC to the NICs MAC, for
example by cluster nodes sharing MAC addresses.

Fixes: 297249569932 ("net: fix generic XDP to handle if eth header was mangled")
Signed-off-by: Martin Willi <martin@strongswan.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20210419141559.8611-1-martin@strongswan.org
---
 net/core/dev.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index b4c67a5be606..6a1ef7a15bed 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4723,10 +4723,10 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 	void *orig_data, *orig_data_end, *hard_start;
 	struct netdev_rx_queue *rxqueue;
 	u32 metalen, act = XDP_DROP;
+	bool orig_bcast, orig_host;
 	u32 mac_len, frame_sz;
 	__be16 orig_eth_type;
 	struct ethhdr *eth;
-	bool orig_bcast;
 	int off;
 
 	/* Reinjected packets coming from act_mirred or similar should
@@ -4773,6 +4773,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 	orig_data_end = xdp->data_end;
 	orig_data = xdp->data;
 	eth = (struct ethhdr *)xdp->data;
+	orig_host = ether_addr_equal_64bits(eth->h_dest, skb->dev->dev_addr);
 	orig_bcast = is_multicast_ether_addr_64bits(eth->h_dest);
 	orig_eth_type = eth->h_proto;
 
@@ -4800,8 +4801,11 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 	/* check if XDP changed eth hdr such SKB needs update */
 	eth = (struct ethhdr *)xdp->data;
 	if ((orig_eth_type != eth->h_proto) ||
+	    (orig_host != ether_addr_equal_64bits(eth->h_dest,
+						  skb->dev->dev_addr)) ||
 	    (orig_bcast != is_multicast_ether_addr_64bits(eth->h_dest))) {
 		__skb_push(skb, ETH_HLEN);
+		skb->pkt_type = PACKET_HOST;
 		skb->protocol = eth_type_trans(skb, skb->dev);
 	}
 
-- 
cgit v1.2.3


From 64ef3ddfa95ebf4606eedd3ec09a838e1c1af341 Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Thu, 22 Apr 2021 11:36:00 +0800
Subject: bpf, doc: Fix some invalid links in bpf_devel_QA.rst

There exist some errors "404 Not Found" when I click the link
of "MAINTAINERS" [1], "samples/bpf/" [2] and "selftests" [3]
in the documentation "HOWTO interact with BPF subsystem" [4].

As Alexei Starovoitov suggested, just remove "MAINTAINERS" and
"samples/bpf/" links and use correct link of "selftests".

  [1] https://www.kernel.org/doc/html/MAINTAINERS
  [2] https://www.kernel.org/doc/html/samples/bpf/
  [3] https://www.kernel.org/doc/html/tools/testing/selftests/bpf/
  [4] https://www.kernel.org/doc/html/latest/bpf/bpf_devel_QA.html

Fixes: 542228384888 ("bpf, doc: convert bpf_devel_QA.rst to use RST formatting")
Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Link: https://lore.kernel.org/bpf/1619062560-30483-1-git-send-email-yangtiezhu@loongson.cn
---
 Documentation/bpf/bpf_devel_QA.rst | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst
index 2ed89abbf9a4..d05e67e72c10 100644
--- a/Documentation/bpf/bpf_devel_QA.rst
+++ b/Documentation/bpf/bpf_devel_QA.rst
@@ -29,7 +29,7 @@ list:
 This may also include issues related to XDP, BPF tracing, etc.
 
 Given netdev has a high volume of traffic, please also add the BPF
-maintainers to Cc (from kernel MAINTAINERS_ file):
+maintainers to Cc (from kernel ``MAINTAINERS`` file):
 
 * Alexei Starovoitov <ast@kernel.org>
 * Daniel Borkmann <daniel@iogearbox.net>
@@ -234,11 +234,11 @@ be subject to change.
 
 Q: samples/bpf preference vs selftests?
 ---------------------------------------
-Q: When should I add code to `samples/bpf/`_ and when to BPF kernel
-selftests_ ?
+Q: When should I add code to ``samples/bpf/`` and when to BPF kernel
+selftests_?
 
 A: In general, we prefer additions to BPF kernel selftests_ rather than
-`samples/bpf/`_. The rationale is very simple: kernel selftests are
+``samples/bpf/``. The rationale is very simple: kernel selftests are
 regularly run by various bots to test for kernel regressions.
 
 The more test cases we add to BPF selftests, the better the coverage
@@ -246,9 +246,9 @@ and the less likely it is that those could accidentally break. It is
 not that BPF kernel selftests cannot demo how a specific feature can
 be used.
 
-That said, `samples/bpf/`_ may be a good place for people to get started,
+That said, ``samples/bpf/`` may be a good place for people to get started,
 so it might be advisable that simple demos of features could go into
-`samples/bpf/`_, but advanced functional and corner-case testing rather
+``samples/bpf/``, but advanced functional and corner-case testing rather
 into kernel selftests.
 
 If your sample looks like a test case, then go for BPF kernel selftests
@@ -645,10 +645,9 @@ when:
 
 .. Links
 .. _Documentation/process/: https://www.kernel.org/doc/html/latest/process/
-.. _MAINTAINERS: ../../MAINTAINERS
 .. _netdev-FAQ: ../networking/netdev-FAQ.rst
-.. _samples/bpf/: ../../samples/bpf/
-.. _selftests: ../../tools/testing/selftests/bpf/
+.. _selftests:
+   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/
 .. _Documentation/dev-tools/kselftest.rst:
    https://www.kernel.org/doc/html/latest/dev-tools/kselftest.html
 .. _Documentation/bpf/btf.rst: btf.rst
-- 
cgit v1.2.3


From 27537929f30d3136a71ef29db56127a33c92dad7 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 22 Apr 2021 12:10:28 +0300
Subject: bnxt_en: fix ternary sign extension bug in bnxt_show_temp()

The problem is that bnxt_show_temp() returns long but "rc" is an int
and "len" is a u32.  With ternary operations the type promotion is quite
tricky.  The negative "rc" is first promoted to u32 and then to long so
it ends up being a high positive value instead of a a negative as we
intended.

Fix this by removing the ternary.

Fixes: d69753fa1ecb ("bnxt_en: return proper error codes in bnxt_show_temp")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index b53a0d87371a..aeb8c61c0f87 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -9736,7 +9736,9 @@ static ssize_t bnxt_show_temp(struct device *dev,
 	if (!rc)
 		len = sprintf(buf, "%u\n", resp->temp * 1000); /* display millidegree */
 	mutex_unlock(&bp->hwrm_cmd_lock);
-	return rc ?: len;
+	if (rc)
+		return rc;
+	return len;
 }
 static SENSOR_DEVICE_ATTR(temp1_input, 0444, bnxt_show_temp, NULL, 0);
 
-- 
cgit v1.2.3


From 79ebfb11fe0848e916950787bb105f1c0559a577 Mon Sep 17 00:00:00 2001
From: Hans Westgaard Ry <hans.westgaard.ry@oracle.com>
Date: Thu, 22 Apr 2021 08:21:40 +0200
Subject: net/mlx4: Treat VFs fair when handling comm_channel_events
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Handling comm_channel_event in mlx4_master_comm_channel uses a double
loop to determine which slaves have requested work. The search is
always started at lowest slave. This leads to unfairness; lower VFs
tends to be prioritized over higher VFs.

The patch uses find_next_bit to determine which slaves to handle.
Fairness is implemented by always starting at the next to the last
start.

An MPI program has been used to measure improvements. It runs 500
ibv_reg_mr, synchronizes with all other instances and then runs 500
ibv_dereg_mr.

The results running 500 processes, time reported is for running 500
calls:

ibv_reg_mr:
             Mod.   Org.
mlx4_1    403.356ms 424.674ms
mlx4_2    403.355ms 424.674ms
mlx4_3    403.354ms 424.674ms
mlx4_4    403.355ms 424.674ms
mlx4_5    403.357ms 424.677ms
mlx4_6    403.354ms 424.676ms
mlx4_7    403.357ms 424.675ms
mlx4_8    403.355ms 424.675ms

ibv_dereg_mr:
             Mod.   Org.
mlx4_1    116.408ms 142.818ms
mlx4_2    116.434ms 142.793ms
mlx4_3    116.488ms 143.247ms
mlx4_4    116.679ms 143.230ms
mlx4_5    112.017ms 107.204ms
mlx4_6    112.032ms 107.516ms
mlx4_7    112.083ms 184.195ms
mlx4_8    115.089ms 190.618ms

Suggested-by: Håkon Bugge <haakon.bugge@oracle.com>
Signed-off-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c  | 69 ++++++++++++++++++-------------
 drivers/net/ethernet/mellanox/mlx4/mlx4.h |  1 +
 2 files changed, 41 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index c678344d22a2..8d751383530b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -2241,43 +2241,52 @@ void mlx4_master_comm_channel(struct work_struct *work)
 	struct mlx4_priv *priv =
 		container_of(mfunc, struct mlx4_priv, mfunc);
 	struct mlx4_dev *dev = &priv->dev;
-	__be32 *bit_vec;
+	u32 lbit_vec[COMM_CHANNEL_BIT_ARRAY_SIZE];
+	u32 nmbr_bits;
 	u32 comm_cmd;
-	u32 vec;
-	int i, j, slave;
+	int i, slave;
 	int toggle;
+	bool first = true;
 	int served = 0;
 	int reported = 0;
 	u32 slt;
 
-	bit_vec = master->comm_arm_bit_vector;
-	for (i = 0; i < COMM_CHANNEL_BIT_ARRAY_SIZE; i++) {
-		vec = be32_to_cpu(bit_vec[i]);
-		for (j = 0; j < 32; j++) {
-			if (!(vec & (1 << j)))
-				continue;
-			++reported;
-			slave = (i * 32) + j;
-			comm_cmd = swab32(readl(
-					  &mfunc->comm[slave].slave_write));
-			slt = swab32(readl(&mfunc->comm[slave].slave_read))
-				     >> 31;
-			toggle = comm_cmd >> 31;
-			if (toggle != slt) {
-				if (master->slave_state[slave].comm_toggle
-				    != slt) {
-					pr_info("slave %d out of sync. read toggle %d, state toggle %d. Resynching.\n",
-						slave, slt,
-						master->slave_state[slave].comm_toggle);
-					master->slave_state[slave].comm_toggle =
-						slt;
-				}
-				mlx4_master_do_cmd(dev, slave,
-						   comm_cmd >> 16 & 0xff,
-						   comm_cmd & 0xffff, toggle);
-				++served;
+	for (i = 0; i < COMM_CHANNEL_BIT_ARRAY_SIZE; i++)
+		lbit_vec[i] = be32_to_cpu(master->comm_arm_bit_vector[i]);
+	nmbr_bits = dev->persist->num_vfs + 1;
+	if (++master->next_slave >= nmbr_bits)
+		master->next_slave = 0;
+	slave = master->next_slave;
+	while (true) {
+		slave = find_next_bit((const unsigned long *)&lbit_vec, nmbr_bits, slave);
+		if  (!first && slave >= master->next_slave)
+			break;
+		if (slave == nmbr_bits) {
+			if (!first)
+				break;
+			first = false;
+			slave = 0;
+			continue;
+		}
+		++reported;
+		comm_cmd = swab32(readl(&mfunc->comm[slave].slave_write));
+		slt = swab32(readl(&mfunc->comm[slave].slave_read)) >> 31;
+		toggle = comm_cmd >> 31;
+		if (toggle != slt) {
+			if (master->slave_state[slave].comm_toggle
+			    != slt) {
+				pr_info("slave %d out of sync. read toggle %d, state toggle %d. Resynching.\n",
+					slave, slt,
+					master->slave_state[slave].comm_toggle);
+				master->slave_state[slave].comm_toggle =
+					slt;
 			}
+			mlx4_master_do_cmd(dev, slave,
+					   comm_cmd >> 16 & 0xff,
+					   comm_cmd & 0xffff, toggle);
+			++served;
 		}
+		slave++;
 	}
 
 	if (reported && reported != served)
@@ -2389,6 +2398,8 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
 		if (!priv->mfunc.master.vf_oper)
 			goto err_comm_oper;
 
+		priv->mfunc.master.next_slave = 0;
+
 		for (i = 0; i < dev->num_slaves; ++i) {
 			vf_admin = &priv->mfunc.master.vf_admin[i];
 			vf_oper = &priv->mfunc.master.vf_oper[i];
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 64bed7ac3836..6ccf340660d9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -603,6 +603,7 @@ struct mlx4_mfunc_master_ctx {
 	struct mlx4_slave_event_eq slave_eq;
 	struct mutex		gen_eqe_mutex[MLX4_MFUNC_MAX];
 	struct mlx4_qos_manager qos_ctl[MLX4_MAX_PORTS + 1];
+	u32			next_slave; /* mlx4_master_comm_channel */
 };
 
 struct mlx4_mfunc {
-- 
cgit v1.2.3


From 96874c619c200bc704ae2d8e34a3746350922135 Mon Sep 17 00:00:00 2001
From: Mohammad Athari Bin Ismail <mohammad.athari.ismail@intel.com>
Date: Thu, 22 Apr 2021 15:55:00 +0800
Subject: net: stmmac: Add HW descriptor prefetch setting for DWMAC Core 5.20
 onwards

DWMAC Core 5.20 onwards supports HW descriptor prefetching.
Additionally, it also depends on platform specific RTL configuration.
This capability could be enabled by setting DMA_Mode bit-19 (DCHE).

So, to enable this cability, platform must set plat->dma_cfg->dche = true
and the DWMAC core version must be 5.20 onwards. Else, this capability
wouldn`t be configured

Signed-off-by: Mohammad Athari Bin Ismail <mohammad.athari.ismail@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/common.h      |  1 +
 drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c  | 10 ++++++++--
 drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h  |  1 +
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |  5 +++++
 include/linux/stmmac.h                            |  1 +
 5 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index c54a56b732b3..619e3c0760d6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -33,6 +33,7 @@
 #define DWMAC_CORE_4_10		0x41
 #define DWMAC_CORE_5_00		0x50
 #define DWMAC_CORE_5_10		0x51
+#define DWMAC_CORE_5_20		0x52
 #define DWXGMAC_CORE_2_10	0x21
 #define DWXLGMAC_CORE_2_00	0x20
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
index cb17f6c35e54..a602d16b9e53 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
@@ -162,12 +162,18 @@ static void dwmac4_dma_init(void __iomem *ioaddr,
 
 	writel(value, ioaddr + DMA_SYS_BUS_MODE);
 
+	value = readl(ioaddr + DMA_BUS_MODE);
+
 	if (dma_cfg->multi_msi_en) {
-		value = readl(ioaddr + DMA_BUS_MODE);
 		value &= ~DMA_BUS_MODE_INTM_MASK;
 		value |= (DMA_BUS_MODE_INTM_MODE1 << DMA_BUS_MODE_INTM_SHIFT);
-		writel(value, ioaddr + DMA_BUS_MODE);
 	}
+
+	if (dma_cfg->dche)
+		value |= DMA_BUS_MODE_DCHE;
+
+	writel(value, ioaddr + DMA_BUS_MODE);
+
 }
 
 static void _dwmac4_dump_dma_regs(void __iomem *ioaddr, u32 channel,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
index 05481eb13ba6..9321879b599c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
@@ -25,6 +25,7 @@
 #define DMA_TBS_CTRL			0x00001050
 
 /* DMA Bus Mode bitmap */
+#define DMA_BUS_MODE_DCHE		BIT(19)
 #define DMA_BUS_MODE_INTM_MASK		GENMASK(17, 16)
 #define DMA_BUS_MODE_INTM_SHIFT		16
 #define DMA_BUS_MODE_INTM_MODE1		0x1
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index d1ca07c846e6..372090e8ee6f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -6849,6 +6849,11 @@ int stmmac_dvr_probe(struct device *device,
 	if (ret)
 		goto error_hw_init;
 
+	/* Only DWMAC core version 5.20 onwards supports HW descriptor prefetch.
+	 */
+	if (priv->synopsys_id < DWMAC_CORE_5_20)
+		priv->plat->dma_cfg->dche = false;
+
 	stmmac_check_ether_addr(priv);
 
 	ndev->netdev_ops = &stmmac_netdev_ops;
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 97edb31d6310..0db36360ef21 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -97,6 +97,7 @@ struct stmmac_dma_cfg {
 	bool aal;
 	bool eame;
 	bool multi_msi_en;
+	bool dche;
 };
 
 #define AXI_BLEN	7
-- 
cgit v1.2.3


From 676b7ec67d79ae77c6634e75344d82fc4b885e65 Mon Sep 17 00:00:00 2001
From: Mohammad Athari Bin Ismail <mohammad.athari.ismail@intel.com>
Date: Thu, 22 Apr 2021 15:55:01 +0800
Subject: stmmac: intel: Enable HW descriptor prefetch by default

Enable HW descriptor prefetch by default by setting plat->dma_cfg->dche =
true in intel_mgbe_common_data(). Need to be noted that this capability
only be supported in DWMAC core version 5.20 onwards. In stmmac, there is
a checking to check the core version. If the core version is below 5.20,
this capability wouldn`t be configured.

Below is the iperf result comparison between HW descriptor prefetch
disabled(DCHE=0b) and enabled(DCHE=1b). Tested on Intel Elkhartlake
platform with DWMAC Core 5.20. Observed line rate performance
improvement with HW descriptor prefetch enabled.

DCHE = 0b
[  5] local 169.254.1.162 port 42123 connected to 169.254.244.142 port 5201
[ ID] Interval           Transfer     Bitrate         Total Datagrams
[  5]   0.00-1.00   sec  96.7 MBytes   811 Mbits/sec  70050
[  5]   1.00-2.00   sec  96.5 MBytes   809 Mbits/sec  69850
[  5]   2.00-3.00   sec  96.3 MBytes   808 Mbits/sec  69720
[  5]   3.00-4.00   sec  95.9 MBytes   804 Mbits/sec  69450
[  5]   4.00-5.00   sec  96.0 MBytes   806 Mbits/sec  69530
[  5]   5.00-6.00   sec  96.8 MBytes   812 Mbits/sec  70080
[  5]   6.00-7.00   sec  96.9 MBytes   813 Mbits/sec  70140
[  5]   7.00-8.00   sec  96.8 MBytes   812 Mbits/sec  70080
[  5]   8.00-9.00   sec  97.0 MBytes   814 Mbits/sec  70230
[  5]   9.00-10.00  sec  96.9 MBytes   813 Mbits/sec  70170
- - - - - - - - - - - - - - - - - - - - - - - - -
[ ID] Interval           Transfer     Bitrate         Jitter    Lost/Total Datagrams
[  5]   0.00-10.00  sec   966 MBytes   810 Mbits/sec  0.000 ms  0/699300 (0%)  sender
[  5]   0.00-10.00  sec   966 MBytes   810 Mbits/sec  0.011 ms  0/699265 (0%)  receiver

DCHE = 1b
[  5] local 169.254.1.162 port 49740 connected to 169.254.244.142 port 5201
[ ID] Interval           Transfer     Bitrate         Total Datagrams
[  5]   0.00-1.00   sec  97.9 MBytes   821 Mbits/sec  70880
[  5]   1.00-2.00   sec  98.1 MBytes   823 Mbits/sec  71060
[  5]   2.00-3.00   sec  98.2 MBytes   824 Mbits/sec  71140
[  5]   3.00-4.00   sec  98.2 MBytes   824 Mbits/sec  71090
[  5]   4.00-5.00   sec  98.1 MBytes   823 Mbits/sec  71050
[  5]   5.00-6.00   sec  98.1 MBytes   823 Mbits/sec  71040
[  5]   6.00-7.00   sec  98.1 MBytes   823 Mbits/sec  71050
[  5]   7.00-8.00   sec  98.2 MBytes   824 Mbits/sec  71140
[  5]   8.00-9.00   sec  98.2 MBytes   824 Mbits/sec  71120
[  5]   9.00-10.00  sec  98.3 MBytes   824 Mbits/sec  71150
- - - - - - - - - - - - - - - - - - - - - - - - -
[ ID] Interval           Transfer     Bitrate         Jitter    Lost/Total Datagrams
[  5]   0.00-10.00  sec   981 MBytes   823 Mbits/sec  0.000 ms  0/710720 (0%)  sender
[  5]   0.00-10.00  sec   981 MBytes   823 Mbits/sec  0.041 ms  0/710650 (0%) receiver

Signed-off-by: Mohammad Athari Bin Ismail <mohammad.athari.ismail@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 4bd038acb1b9..80728a4c0e3f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -471,6 +471,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
 	plat->dma_cfg->fixed_burst = 0;
 	plat->dma_cfg->mixed_burst = 0;
 	plat->dma_cfg->aal = 0;
+	plat->dma_cfg->dche = true;
 
 	plat->axi = devm_kzalloc(&pdev->dev, sizeof(*plat->axi),
 				 GFP_KERNEL);
-- 
cgit v1.2.3


From 9ba585cc5b56ea14a453ba6be9bdb984ed33471a Mon Sep 17 00:00:00 2001
From: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Date: Fri, 23 Apr 2021 02:31:48 +0900
Subject: ARM: dts: uniphier: Change phy-mode to RGMII-ID to enable delay pins
 for RTL8211E

UniPhier PXs2 boards have RTL8211E ethernet phy, and the phy have the RX/TX
delays of RGMII interface using pull-ups on the RXDLY and TXDLY pins.

After the commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx
delay config"), the delays are working correctly, however, "rgmii" means
no delay and the phy doesn't work. So need to set the phy-mode to
"rgmii-id" to show that RX/TX delays are enabled.

Fixes: e3cc931921d2 ("ARM: dts: uniphier: add AVE ethernet node")
Signed-off-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/boot/dts/uniphier-pxs2.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/uniphier-pxs2.dtsi b/arch/arm/boot/dts/uniphier-pxs2.dtsi
index b0b15c97306b..e81e5937a60a 100644
--- a/arch/arm/boot/dts/uniphier-pxs2.dtsi
+++ b/arch/arm/boot/dts/uniphier-pxs2.dtsi
@@ -583,7 +583,7 @@
 			clocks = <&sys_clk 6>;
 			reset-names = "ether";
 			resets = <&sys_rst 6>;
-			phy-mode = "rgmii";
+			phy-mode = "rgmii-id";
 			local-mac-address = [00 00 00 00 00 00];
 			socionext,syscon-phy-mode = <&soc_glue 0>;
 
-- 
cgit v1.2.3


From dcabb06bf127b3e0d3fbc94a2b65dd56c2725851 Mon Sep 17 00:00:00 2001
From: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Date: Fri, 23 Apr 2021 02:31:49 +0900
Subject: arm64: dts: uniphier: Change phy-mode to RGMII-ID to enable delay
 pins for RTL8211E

UniPhier LD20 and PXs3 boards have RTL8211E ethernet phy, and the phy have
the RX/TX delays of RGMII interface using pull-ups on the RXDLY and TXDLY
pins.

After the commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx
delay config"), the delays are working correctly, however, "rgmii" means
no delay and the phy doesn't work. So need to set the phy-mode to
"rgmii-id" to show that RX/TX delays are enabled.

Fixes: c73730ee4c9a ("arm64: dts: uniphier: add AVE ethernet node")
Signed-off-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi | 2 +-
 arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
index a87b8a678719..8f2c1c1e2c64 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
+++ b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
@@ -734,7 +734,7 @@
 			clocks = <&sys_clk 6>;
 			reset-names = "ether";
 			resets = <&sys_rst 6>;
-			phy-mode = "rgmii";
+			phy-mode = "rgmii-id";
 			local-mac-address = [00 00 00 00 00 00];
 			socionext,syscon-phy-mode = <&soc_glue 0>;
 
diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
index 0e52dadf54b3..be97da132258 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
+++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
@@ -564,7 +564,7 @@
 			clocks = <&sys_clk 6>;
 			reset-names = "ether";
 			resets = <&sys_rst 6>;
-			phy-mode = "rgmii";
+			phy-mode = "rgmii-id";
 			local-mac-address = [00 00 00 00 00 00];
 			socionext,syscon-phy-mode = <&soc_glue 0>;
 
@@ -585,7 +585,7 @@
 			clocks = <&sys_clk 7>;
 			reset-names = "ether";
 			resets = <&sys_rst 7>;
-			phy-mode = "rgmii";
+			phy-mode = "rgmii-id";
 			local-mac-address = [00 00 00 00 00 00];
 			socionext,syscon-phy-mode = <&soc_glue 1>;
 
-- 
cgit v1.2.3


From e7a1c1300891d8f11d05b42665e299cc22a4b383 Mon Sep 17 00:00:00 2001
From: Li RongQing <lirongqing@baidu.com>
Date: Wed, 14 Apr 2021 13:39:12 +0800
Subject: xsk: Align XDP socket batch size with DPDK

DPDK default burst size is 32, however, kernel xsk sendto
syscall can not handle all 32 at one time, and return with
error.

So make kernel XDP socket batch size larger to avoid
unnecessary syscall fail and context switch which will help
to increase performance.

Signed-off-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/bpf/1618378752-4191-1-git-send-email-lirongqing@baidu.com
---
 net/xdp/xsk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index a71ed664da0a..cd62d4ba87a9 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -30,7 +30,7 @@
 #include "xdp_umem.h"
 #include "xsk.h"
 
-#define TX_BATCH_SIZE 16
+#define TX_BATCH_SIZE 32
 
 static DEFINE_PER_CPU(struct list_head, xskmap_flush_list);
 
-- 
cgit v1.2.3


From 6d2c322cce04c177d96baafdbd9bad5c49456719 Mon Sep 17 00:00:00 2001
From: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Date: Wed, 27 Jan 2021 10:58:36 +0000
Subject: i40e: refactor repeated link state reporting code

Refactor repeated link state reporting code into a separate helper
functions: i40e_set_vf_link_state() i40e_vc_link_speed2mbps().
Add support of VIRTCHNL_VF_CAP_ADV_LINK_SPEED;

Signed-off-by: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
Signed-off-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 108 +++++++++++++--------
 1 file changed, 69 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 5d301a466f5c..eff0a30790dd 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -39,6 +39,66 @@ static void i40e_vc_vf_broadcast(struct i40e_pf *pf,
 	}
 }
 
+/**
+ * i40e_vc_link_speed2mbps
+ * converts i40e_aq_link_speed to integer value of Mbps
+ * @link_speed: the speed to convert
+ *
+ * return the speed as direct value of Mbps.
+ **/
+static u32
+i40e_vc_link_speed2mbps(enum i40e_aq_link_speed link_speed)
+{
+	switch (link_speed) {
+	case I40E_LINK_SPEED_100MB:
+		return SPEED_100;
+	case I40E_LINK_SPEED_1GB:
+		return SPEED_1000;
+	case I40E_LINK_SPEED_2_5GB:
+		return SPEED_2500;
+	case I40E_LINK_SPEED_5GB:
+		return SPEED_5000;
+	case I40E_LINK_SPEED_10GB:
+		return SPEED_10000;
+	case I40E_LINK_SPEED_20GB:
+		return SPEED_20000;
+	case I40E_LINK_SPEED_25GB:
+		return SPEED_25000;
+	case I40E_LINK_SPEED_40GB:
+		return SPEED_40000;
+	case I40E_LINK_SPEED_UNKNOWN:
+		return SPEED_UNKNOWN;
+	}
+	return SPEED_UNKNOWN;
+}
+
+/**
+ * i40e_set_vf_link_state
+ * @vf: pointer to the VF structure
+ * @pfe: pointer to PF event structure
+ * @ls: pointer to link status structure
+ *
+ * set a link state on a single vf
+ **/
+static void i40e_set_vf_link_state(struct i40e_vf *vf,
+				   struct virtchnl_pf_event *pfe, struct i40e_link_status *ls)
+{
+	u8 link_status = ls->link_info & I40E_AQ_LINK_UP;
+
+	if (vf->link_forced)
+		link_status = vf->link_up;
+
+	if (vf->driver_caps & VIRTCHNL_VF_CAP_ADV_LINK_SPEED) {
+		pfe->event_data.link_event_adv.link_speed = link_status ?
+			i40e_vc_link_speed2mbps(ls->link_speed) : 0;
+		pfe->event_data.link_event_adv.link_status = link_status;
+	} else {
+		pfe->event_data.link_event.link_speed = link_status ?
+			i40e_virtchnl_link_speed(ls->link_speed) : 0;
+		pfe->event_data.link_event.link_status = link_status;
+	}
+}
+
 /**
  * i40e_vc_notify_vf_link_state
  * @vf: pointer to the VF structure
@@ -55,16 +115,9 @@ static void i40e_vc_notify_vf_link_state(struct i40e_vf *vf)
 
 	pfe.event = VIRTCHNL_EVENT_LINK_CHANGE;
 	pfe.severity = PF_EVENT_SEVERITY_INFO;
-	if (vf->link_forced) {
-		pfe.event_data.link_event.link_status = vf->link_up;
-		pfe.event_data.link_event.link_speed =
-			(vf->link_up ? i40e_virtchnl_link_speed(ls->link_speed) : 0);
-	} else {
-		pfe.event_data.link_event.link_status =
-			ls->link_info & I40E_AQ_LINK_UP;
-		pfe.event_data.link_event.link_speed =
-			i40e_virtchnl_link_speed(ls->link_speed);
-	}
+
+	i40e_set_vf_link_state(vf, &pfe, ls);
+
 	i40e_aq_send_msg_to_vf(hw, abs_vf_id, VIRTCHNL_OP_EVENT,
 			       0, (u8 *)&pfe, sizeof(pfe), NULL);
 }
@@ -1949,6 +2002,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
 				  VIRTCHNL_VF_OFFLOAD_VLAN;
 
 	vfres->vf_cap_flags = VIRTCHNL_VF_OFFLOAD_L2;
+	vfres->vf_cap_flags |= VIRTCHNL_VF_CAP_ADV_LINK_SPEED;
 	vsi = pf->vsi[vf->lan_vsi_idx];
 	if (!vsi->info.pvid)
 		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_VLAN;
@@ -3696,26 +3750,8 @@ static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg)
 	}
 
 	/* get link speed in MB to validate rate limit */
-	switch (ls->link_speed) {
-	case VIRTCHNL_LINK_SPEED_100MB:
-		speed = SPEED_100;
-		break;
-	case VIRTCHNL_LINK_SPEED_1GB:
-		speed = SPEED_1000;
-		break;
-	case VIRTCHNL_LINK_SPEED_10GB:
-		speed = SPEED_10000;
-		break;
-	case VIRTCHNL_LINK_SPEED_20GB:
-		speed = SPEED_20000;
-		break;
-	case VIRTCHNL_LINK_SPEED_25GB:
-		speed = SPEED_25000;
-		break;
-	case VIRTCHNL_LINK_SPEED_40GB:
-		speed = SPEED_40000;
-		break;
-	default:
+	speed = i40e_vc_link_speed2mbps(ls->link_speed);
+	if (speed == SPEED_UNKNOWN) {
 		dev_err(&pf->pdev->dev,
 			"Cannot detect link speed\n");
 		aq_ret = I40E_ERR_PARAM;
@@ -4464,23 +4500,17 @@ int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link)
 	switch (link) {
 	case IFLA_VF_LINK_STATE_AUTO:
 		vf->link_forced = false;
-		pfe.event_data.link_event.link_status =
-			pf->hw.phy.link_info.link_info & I40E_AQ_LINK_UP;
-		pfe.event_data.link_event.link_speed =
-			(enum virtchnl_link_speed)
-			pf->hw.phy.link_info.link_speed;
+		i40e_set_vf_link_state(vf, &pfe, ls);
 		break;
 	case IFLA_VF_LINK_STATE_ENABLE:
 		vf->link_forced = true;
 		vf->link_up = true;
-		pfe.event_data.link_event.link_status = true;
-		pfe.event_data.link_event.link_speed = i40e_virtchnl_link_speed(ls->link_speed);
+		i40e_set_vf_link_state(vf, &pfe, ls);
 		break;
 	case IFLA_VF_LINK_STATE_DISABLE:
 		vf->link_forced = true;
 		vf->link_up = false;
-		pfe.event_data.link_event.link_status = false;
-		pfe.event_data.link_event.link_speed = 0;
+		i40e_set_vf_link_state(vf, &pfe, ls);
 		break;
 	default:
 		ret = -EINVAL;
-- 
cgit v1.2.3


From 065aa694a76e213d5774eeb70a9c11b8cf0dfdb7 Mon Sep 17 00:00:00 2001
From: Coiby Xu <coxu@redhat.com>
Date: Thu, 4 Mar 2021 10:55:41 +0800
Subject: i40e: use minimal Tx and Rx pairs for kdump

Set the number of the MSI-X vectors to 1. When MSI-X is enabled,
it's not allowed to use more TC queue pairs than MSI-X vectors
(pf->num_lan_msix) exist. Thus the number of Tx and Rx pairs
(vsi->num_queue_pairs) will be equal to the number of MSI-X vectors,
i.e., 1.

Signed-off-by: Coiby Xu <coxu@redhat.com>
Tested-by: Dave Switzer <david.switzer@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 687ef52a8116..b51b8e25501d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -6,6 +6,7 @@
 #include <linux/pci.h>
 #include <linux/bpf.h>
 #include <generated/utsrelease.h>
+#include <linux/crash_dump.h>
 
 /* Local includes */
 #include "i40e.h"
@@ -15506,6 +15507,14 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (err)
 		goto err_switch_setup;
 
+	/* Reduce Tx and Rx pairs for kdump
+	 * When MSI-X is enabled, it's not allowed to use more TC queue
+	 * pairs than MSI-X vectors (pf->num_lan_msix) exist. Thus
+	 * vsi->num_queue_pairs will be equal to pf->num_lan_msix, i.e., 1.
+	 */
+	if (is_kdump_kernel())
+		pf->num_lan_msix = 1;
+
 	pf->udp_tunnel_nic.set_port = i40e_udp_tunnel_set_port;
 	pf->udp_tunnel_nic.unset_port = i40e_udp_tunnel_unset_port;
 	pf->udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
-- 
cgit v1.2.3


From dcb75338f6e7092324cc6784f1c30c5c6da6340e Mon Sep 17 00:00:00 2001
From: Coiby Xu <coxu@redhat.com>
Date: Thu, 4 Mar 2021 10:55:42 +0800
Subject: i40e: use minimal Rx and Tx ring buffers for kdump

Use the minimum of the number of descriptors thus we will allocate the
minimal ring buffers for kdump.

Signed-off-by: Coiby Xu <coxu@redhat.com>
Tested-by: Dave Switzer <david.switzer@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index b51b8e25501d..2d3dbb3c0e14 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -11040,6 +11040,11 @@ static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi)
 		return -ENODATA;
 	}
 
+	if (is_kdump_kernel()) {
+		vsi->num_tx_desc = I40E_MIN_NUM_DESCRIPTORS;
+		vsi->num_rx_desc = I40E_MIN_NUM_DESCRIPTORS;
+	}
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 5c208e9f498ed76ad03a09c5089efd4491631562 Mon Sep 17 00:00:00 2001
From: Coiby Xu <coxu@redhat.com>
Date: Thu, 4 Mar 2021 10:55:43 +0800
Subject: i40e: use minimal admin queue for kdump

The minimum size of admin send/receive queue is 1 and 2 respectively.
The admin send queue can't be set to 1 because in that case, the
firmware would fail to init.

Signed-off-by: Coiby Xu <coxu@redhat.com>
Tested-by: Dave Switzer <david.switzer@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e.h      | 2 ++
 drivers/net/ethernet/intel/i40e/i40e_main.c | 9 +++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 15f93b355099..9067cd3ce243 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -66,6 +66,8 @@
 #define I40E_FDIR_RING_COUNT		32
 #define I40E_MAX_AQ_BUF_SIZE		4096
 #define I40E_AQ_LEN			256
+#define I40E_MIN_ARQ_LEN		1
+#define I40E_MIN_ASQ_LEN		2
 #define I40E_AQ_WORK_LIMIT		66 /* max number of VFs + a little */
 #define I40E_MAX_USER_PRIORITY		8
 #define I40E_DEFAULT_TRAFFIC_CLASS	BIT(0)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 2d3dbb3c0e14..c2d145a56b5e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -15348,8 +15348,13 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	i40e_check_recovery_mode(pf);
 
-	hw->aq.num_arq_entries = I40E_AQ_LEN;
-	hw->aq.num_asq_entries = I40E_AQ_LEN;
+	if (is_kdump_kernel()) {
+		hw->aq.num_arq_entries = I40E_MIN_ARQ_LEN;
+		hw->aq.num_asq_entries = I40E_MIN_ASQ_LEN;
+	} else {
+		hw->aq.num_arq_entries = I40E_AQ_LEN;
+		hw->aq.num_asq_entries = I40E_AQ_LEN;
+	}
 	hw->aq.arq_buf_size = I40E_MAX_AQ_BUF_SIZE;
 	hw->aq.asq_buf_size = I40E_MAX_AQ_BUF_SIZE;
 	pf->adminq_work_limit = I40E_AQ_WORK_LIMIT;
-- 
cgit v1.2.3


From 1a0e880b028f97478dc689e2900b312741d0d772 Mon Sep 17 00:00:00 2001
From: Stefan Assmann <sassmann@kpanic.de>
Date: Tue, 9 Mar 2021 15:41:42 +0100
Subject: iavf: remove duplicate free resources calls

Both iavf_free_all_tx_resources() and iavf_free_all_rx_resources() have
already been called in the very same function.
Remove the duplicate calls.

Signed-off-by: Stefan Assmann <sassmann@kpanic.de>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/iavf/iavf_main.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 7a81e7ceea65..e612c24fa384 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -3940,8 +3940,6 @@ static void iavf_remove(struct pci_dev *pdev)
 
 	iounmap(hw->hw_addr);
 	pci_release_regions(pdev);
-	iavf_free_all_tx_resources(adapter);
-	iavf_free_all_rx_resources(adapter);
 	iavf_free_queues(adapter);
 	kfree(adapter->vf_res);
 	spin_lock_bh(&adapter->mac_vlan_list_lock);
-- 
cgit v1.2.3


From f995f95af626cb1867cbfc702d011a50c4ff9538 Mon Sep 17 00:00:00 2001
From: Haiyue Wang <haiyue.wang@intel.com>
Date: Wed, 31 Mar 2021 10:08:35 +0800
Subject: iavf: change the flex-byte support number to macro definition

The maximum number (2) of flex-byte support is derived from ethtool
use-def data size (8 byte).

Change the magic number 2 to macro definition, and add the comment to
track the design thinking, so the code is clear and easily maintained.

Signed-off-by: Haiyue Wang <haiyue.wang@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 2 +-
 drivers/net/ethernet/intel/iavf/iavf_fdir.h    | 9 +++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
index 3d904bc6ee76..af43fbd8cb75 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -939,7 +939,7 @@ iavf_parse_rx_flow_user_data(struct ethtool_rx_flow_spec *fsp,
 	if (!(fsp->flow_type & FLOW_EXT))
 		return 0;
 
-	for (i = 0; i < 2; i++) {
+	for (i = 0; i < IAVF_FLEX_WORD_NUM; i++) {
 #define IAVF_USERDEF_FLEX_WORD_M	GENMASK(15, 0)
 #define IAVF_USERDEF_FLEX_OFFS_S	16
 #define IAVF_USERDEF_FLEX_OFFS_M	GENMASK(31, IAVF_USERDEF_FLEX_OFFS_S)
diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.h b/drivers/net/ethernet/intel/iavf/iavf_fdir.h
index 2439c970b657..33c55c366315 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_fdir.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.h
@@ -35,6 +35,11 @@ enum iavf_fdir_flow_type {
 	IAVF_FDIR_FLOW_PTYPE_MAX,
 };
 
+/* Must not exceed the array element number of '__be32 data[2]' in the ethtool
+ * 'struct ethtool_rx_flow_spec.m_ext.data[2]' to express the flex-byte (word).
+ */
+#define IAVF_FLEX_WORD_NUM	2
+
 struct iavf_flex_word {
 	u16 offset;
 	u16 word;
@@ -71,7 +76,7 @@ struct iavf_fdir_ip {
 };
 
 struct iavf_fdir_extra {
-	u32 usr_def[2];
+	u32 usr_def[IAVF_FLEX_WORD_NUM];
 };
 
 /* bookkeeping of Flow Director filters */
@@ -95,7 +100,7 @@ struct iavf_fdir_fltr {
 	/* flex byte filter data */
 	u8 ip_ver; /* used to adjust the flex offset, 4 : IPv4, 6 : IPv6 */
 	u8 flex_cnt;
-	struct iavf_flex_word flex_words[2];
+	struct iavf_flex_word flex_words[IAVF_FLEX_WORD_NUM];
 
 	u32 flow_id;
 
-- 
cgit v1.2.3


From f3b9da31f0e36a3cd3edad51131d63c044cd1ec4 Mon Sep 17 00:00:00 2001
From: Haiyue Wang <haiyue.wang@intel.com>
Date: Wed, 31 Mar 2021 10:08:36 +0800
Subject: iavf: enhance the duplicated FDIR list scan handling

When the FDIR entry is found, just return the result directly to break
the loop.

Signed-off-by: Haiyue Wang <haiyue.wang@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/iavf/iavf_fdir.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.c b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
index 3e687189d737..af872ea3163f 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_fdir.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
@@ -713,7 +713,6 @@ void iavf_print_fdir_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *f
 bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr)
 {
 	struct iavf_fdir_fltr *tmp;
-	bool ret = false;
 
 	list_for_each_entry(tmp, &adapter->fdir_list_head, list) {
 		if (tmp->flow_type != fltr->flow_type)
@@ -724,13 +723,11 @@ bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *
 		    !memcmp(&tmp->ip_data, &fltr->ip_data,
 			    sizeof(fltr->ip_data)) &&
 		    !memcmp(&tmp->ext_data, &fltr->ext_data,
-			    sizeof(fltr->ext_data))) {
-			ret = true;
-			break;
-		}
+			    sizeof(fltr->ext_data)))
+			return true;
 	}
 
-	return ret;
+	return false;
 }
 
 /**
-- 
cgit v1.2.3


From 1f70dfc542e88492a3bba3017e5e286dab7d3be6 Mon Sep 17 00:00:00 2001
From: Haiyue Wang <haiyue.wang@intel.com>
Date: Wed, 31 Mar 2021 10:08:37 +0800
Subject: iavf: redefine the magic number for FDIR GTP-U header fields

The flex-byte for GTP-U protocol header fields uses the magic number,
which is hard to maintain and understand, define the interested fields
with meaningful macro name, based on the GTP-U protocol stack:

GTP-U header
     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
     | 0x1 |1|0|1|0|0|     0xff      |           Length              |
     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
     |                           TEID = 1654                         |
     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
     |    Sequence Number = 0        |N-PDU Number=0 |NextExtHdr=0x85|
     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

GTP-U Extension Header (PDU Session Container)
     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
     |  ExtHdrLen=2  |Type=0 | Spare |0|0|   QFI     | PPI |  Spare  |
     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
     |                    Padding                    |NextExtHdr=0x0 |
     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

Signed-off-by: Haiyue Wang <haiyue.wang@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/iavf/iavf_fdir.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.c b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
index af872ea3163f..6146203efd84 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_fdir.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
@@ -54,8 +54,13 @@ iavf_fill_fdir_gtpu_hdr(struct iavf_fdir_fltr *fltr,
 #define IAVF_GTPU_HDR_TEID_OFFS0	4
 #define IAVF_GTPU_HDR_TEID_OFFS1	6
 #define IAVF_GTPU_HDR_N_PDU_AND_NEXT_EXTHDR_OFFS	10
+#define IAVF_GTPU_HDR_NEXT_EXTHDR_TYPE_MASK		0x00FF /* skip N_PDU */
+/* PDU Session Container Extension Header (PSC) */
+#define IAVF_GTPU_PSC_EXTHDR_TYPE			0x85
 #define IAVF_GTPU_HDR_PSC_PDU_TYPE_AND_QFI_OFFS		13
-#define IAVF_GTPU_PSC_EXTHDR_TYPE	0x85 /* PDU Session Container Extension Header */
+#define IAVF_GTPU_HDR_PSC_PDU_QFI_MASK			0x3F /* skip Type */
+#define IAVF_GTPU_EH_QFI_IDX				1
+
 		if (fltr->flex_words[i].offset < adj_offs)
 			return -EINVAL;
 
@@ -71,7 +76,9 @@ iavf_fill_fdir_gtpu_hdr(struct iavf_fdir_fltr *fltr,
 			}
 			break;
 		case IAVF_GTPU_HDR_N_PDU_AND_NEXT_EXTHDR_OFFS:
-			if ((fltr->flex_words[i].word & 0xff) != IAVF_GTPU_PSC_EXTHDR_TYPE)
+			if ((fltr->flex_words[i].word &
+			     IAVF_GTPU_HDR_NEXT_EXTHDR_TYPE_MASK) !=
+						IAVF_GTPU_PSC_EXTHDR_TYPE)
 				return -EOPNOTSUPP;
 			if (!ehdr)
 				ehdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
@@ -80,7 +87,9 @@ iavf_fill_fdir_gtpu_hdr(struct iavf_fdir_fltr *fltr,
 		case IAVF_GTPU_HDR_PSC_PDU_TYPE_AND_QFI_OFFS:
 			if (!ehdr)
 				return -EINVAL;
-			ehdr->buffer[1] = fltr->flex_words[i].word & 0x3F;
+			ehdr->buffer[IAVF_GTPU_EH_QFI_IDX] =
+					fltr->flex_words[i].word &
+						IAVF_GTPU_HDR_PSC_PDU_QFI_MASK;
 			VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(ehdr, GTPU_EH, QFI);
 			break;
 		default:
-- 
cgit v1.2.3


From 8e8ee109b02c0e90021d63cd20dd0157c021f7a4 Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Fri, 23 Apr 2021 01:55:42 +0200
Subject: bpf: Notify user if we ever hit a bpf_snprintf verifier bug

In check_bpf_snprintf_call(), a map_direct_value_addr() of the fmt map
should never fail because it has already been checked by
ARG_PTR_TO_CONST_STR. But if it ever fails, it's better to error out
with an explicit debug message rather than silently fail.

Reported-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Florent Revest <revest@chromium.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210422235543.4007694-2-revest@chromium.org
---
 kernel/bpf/verifier.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 58730872f7e5..59799a9b014a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5940,8 +5940,10 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
 	fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
 	err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
 						  fmt_map_off);
-	if (err)
-		return err;
+	if (err) {
+		verbose(env, "verifier bug\n");
+		return -EFAULT;
+	}
 	fmt = (char *)(long)fmt_addr + fmt_map_off;
 
 	/* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
-- 
cgit v1.2.3


From a8fad73e3334151196acb28c4dcde37732c82542 Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Fri, 23 Apr 2021 01:55:43 +0200
Subject: bpf: Remove unnecessary map checks for ARG_PTR_TO_CONST_STR

reg->type is enforced by check_reg_type() and map should never be NULL
(it would already have been dereferenced anyway) so these checks are
unnecessary.

Reported-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Florent Revest <revest@chromium.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210422235543.4007694-3-revest@chromium.org
---
 kernel/bpf/verifier.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 59799a9b014a..2579f6fbb5c3 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5075,8 +5075,7 @@ skip_type_check:
 		u64 map_addr;
 		char *str_ptr;
 
-		if (reg->type != PTR_TO_MAP_VALUE || !map ||
-		    !bpf_map_is_rdonly(map)) {
+		if (!bpf_map_is_rdonly(map)) {
 			verbose(env, "R%d does not point to a readonly map'\n", regno);
 			return -EACCES;
 		}
-- 
cgit v1.2.3


From 7d742b509dd773f6ae2f32ffe3d2c0f3ea598a6d Mon Sep 17 00:00:00 2001
From: Ilya Maximets <i.maximets@ovn.org>
Date: Wed, 21 Apr 2021 15:57:47 +0200
Subject: openvswitch: meter: remove rate from the bucket size calculation

Implementation of meters supposed to be a classic token bucket with 2
typical parameters: rate and burst size.

Burst size in this schema is the maximum number of bytes/packets that
could pass without being rate limited.

Recent changes to userspace datapath made meter implementation to be
in line with the kernel one, and this uncovered several issues.

The main problem is that maximum bucket size for unknown reason
accounts not only burst size, but also the numerical value of rate.
This creates a lot of confusion around behavior of meters.

For example, if rate is configured as 1000 pps and burst size set to 1,
this should mean that meter will tolerate bursts of 1 packet at most,
i.e. not a single packet above the rate should pass the meter.
However, current implementation calculates maximum bucket size as
(rate + burst size), so the effective bucket size will be 1001.  This
means that first 1000 packets will not be rate limited and average
rate might be twice as high as the configured rate.  This also makes
it practically impossible to configure meter that will have burst size
lower than the rate, which might be a desirable configuration if the
rate is high.

Inability to configure low values of a burst size and overall inability
for a user to predict what will be a maximum and average rate from the
configured parameters of a meter without looking at the OVS and kernel
code might be also classified as a security issue, because drop meters
are frequently used as a way of protection from DoS attacks.

This change removes rate from the calculation of a bucket size, making
it in line with the classic token bucket algorithm and essentially
making the rate and burst tolerance being predictable from a users'
perspective.

Same change proposed for the userspace implementation.

Fixes: 96fbc13d7e77 ("openvswitch: Add meter infrastructure")
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/meter.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 15424d26e85d..96b524ceabca 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -392,7 +392,7 @@ static struct dp_meter *dp_meter_create(struct nlattr **a)
 		 *
 		 * Start with a full bucket.
 		 */
-		band->bucket = (band->burst_size + band->rate) * 1000ULL;
+		band->bucket = band->burst_size * 1000ULL;
 		band_max_delta_t = div_u64(band->bucket, band->rate);
 		if (band_max_delta_t > meter->max_delta_t)
 			meter->max_delta_t = band_max_delta_t;
@@ -641,7 +641,7 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
 		long long int max_bucket_size;
 
 		band = &meter->bands[i];
-		max_bucket_size = (band->burst_size + band->rate) * 1000LL;
+		max_bucket_size = band->burst_size * 1000LL;
 
 		band->bucket += delta_ms * band->rate;
 		if (band->bucket > max_bucket_size)
-- 
cgit v1.2.3


From ed744d819379ddeec5744b0bfc7eb6d0a8ac4e46 Mon Sep 17 00:00:00 2001
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Date: Thu, 22 Apr 2021 21:41:51 +0800
Subject: net: sock: remove the unnecessary check in proto_register

tw_prot_cleanup will check the twsk_prot.

Fixes: 0f5907af3913 ("net: Fix potential memory leak in proto_register()")
Cc: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index 5ec90f99e102..c761c4a0b66b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3531,7 +3531,7 @@ int proto_register(struct proto *prot, int alloc_slab)
 	return ret;
 
 out_free_timewait_sock_slab:
-	if (alloc_slab && prot->twsk_prot)
+	if (alloc_slab)
 		tw_prot_cleanup(prot->twsk_prot);
 out_free_request_sock_slab:
 	if (alloc_slab) {
-- 
cgit v1.2.3


From f80bd740cb7c954791279590b2e810ba6c214e52 Mon Sep 17 00:00:00 2001
From: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Date: Thu, 22 Apr 2021 23:16:20 +0800
Subject: virtio-net: fix use-after-free in skb_gro_receive

When "headroom" > 0, the actual allocated memory space is the entire
page, so the address of the page should be used when passing it to
build_skb().

BUG: KASAN: use-after-free in skb_gro_receive (net/core/skbuff.c:4260)
Write of size 16 at addr ffff88811619fffc by task kworker/u9:0/534
CPU: 2 PID: 534 Comm: kworker/u9:0 Not tainted 5.12.0-rc7-custom-16372-gb150be05b806 #3382
Hardware name: QEMU MSN2700, BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
Workqueue: xprtiod xs_stream_data_receive_workfn [sunrpc]
Call Trace:
 <IRQ>
dump_stack (lib/dump_stack.c:122)
print_address_description.constprop.0 (mm/kasan/report.c:233)
kasan_report.cold (mm/kasan/report.c:400 mm/kasan/report.c:416)
skb_gro_receive (net/core/skbuff.c:4260)
tcp_gro_receive (net/ipv4/tcp_offload.c:266 (discriminator 1))
tcp4_gro_receive (net/ipv4/tcp_offload.c:316)
inet_gro_receive (net/ipv4/af_inet.c:1545 (discriminator 2))
dev_gro_receive (net/core/dev.c:6075)
napi_gro_receive (net/core/dev.c:6168 net/core/dev.c:6198)
receive_buf (drivers/net/virtio_net.c:1151) virtio_net
virtnet_poll (drivers/net/virtio_net.c:1415 drivers/net/virtio_net.c:1519) virtio_net
__napi_poll (net/core/dev.c:6964)
net_rx_action (net/core/dev.c:7033 net/core/dev.c:7118)
__do_softirq (./arch/x86/include/asm/jump_label.h:25 ./include/linux/jump_label.h:200 ./include/trace/events/irq.h:142 kernel/softirq.c:346)
irq_exit_rcu (kernel/softirq.c:221 kernel/softirq.c:422 kernel/softirq.c:434)
common_interrupt (arch/x86/kernel/irq.c:240 (discriminator 14))
</IRQ>

Fixes: fb32856b16ad ("virtio-net: page_to_skb() use build_skb when there's sufficient tailroom")
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Reported-by: Ido Schimmel <idosch@nvidia.com>
Tested-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 74d2d49264f3..7fda2ae4c40f 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -387,7 +387,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 	unsigned int copy, hdr_len, hdr_padded_len;
 	struct page *page_to_free = NULL;
 	int tailroom, shinfo_size;
-	char *p, *hdr_p;
+	char *p, *hdr_p, *buf;
 
 	p = page_address(page) + offset;
 	hdr_p = p;
@@ -403,11 +403,15 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 	 * space are aligned.
 	 */
 	if (headroom) {
-		/* The actual allocated space size is PAGE_SIZE. */
+		/* Buffers with headroom use PAGE_SIZE as alloc size,
+		 * see add_recvbuf_mergeable() + get_mergeable_buf_len()
+		 */
 		truesize = PAGE_SIZE;
 		tailroom = truesize - len - offset;
+		buf = page_address(page);
 	} else {
 		tailroom = truesize - len;
+		buf = p;
 	}
 
 	len -= hdr_len;
@@ -416,11 +420,13 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 
 	shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
+	/* copy small packet so we can reuse these pages */
 	if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) {
-		skb = build_skb(p, truesize);
+		skb = build_skb(buf, truesize);
 		if (unlikely(!skb))
 			return NULL;
 
+		skb_reserve(skb, p - buf);
 		skb_put(skb, len);
 		goto ok;
 	}
-- 
cgit v1.2.3


From d90a94680bc0a8069d93282bc5f2966d00b9c4a4 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Thu, 22 Apr 2021 13:08:16 -0700
Subject: net: mana: Use int to check the return value of mana_gd_poll_cq()

mana_gd_poll_cq() may return -1 if an overflow error is detected (this
should never happen unless there is a bug in the driver or the hardware).

Fix the type of the variable "comp_read" by using int rather than u32.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)")
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microsoft/mana/hw_channel.c | 2 +-
 drivers/net/ethernet/microsoft/mana/mana_en.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c
index 0cf0322702ed..1a923fd99990 100644
--- a/drivers/net/ethernet/microsoft/mana/hw_channel.c
+++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c
@@ -283,7 +283,7 @@ static void mana_hwc_comp_event(void *ctx, struct gdma_queue *q_self)
 	struct hwc_rx_oob comp_data = {};
 	struct gdma_comp *completions;
 	struct hwc_cq *hwc_cq = ctx;
-	u32 comp_read, i;
+	int comp_read, i;
 
 	WARN_ON_ONCE(hwc_cq->gdma_cq != q_self);
 
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index a744ca0b6c19..04d067243457 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1061,7 +1061,7 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
 static void mana_poll_rx_cq(struct mana_cq *cq)
 {
 	struct gdma_comp *comp = cq->gdma_comp_buf;
-	u32 comp_read, i;
+	int comp_read, i;
 
 	comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER);
 	WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER);
-- 
cgit v1.2.3


From d13f048dd40e8577260cd43faea8ec9b77520197 Mon Sep 17 00:00:00 2001
From: Phillip Potter <phil@philpotter.co.uk>
Date: Fri, 23 Apr 2021 00:49:45 +0100
Subject: net: geneve: modify IP header check in geneve6_xmit_skb and
 geneve_xmit_skb

Modify the header size check in geneve6_xmit_skb and geneve_xmit_skb
to use pskb_inet_may_pull rather than pskb_network_may_pull. This fixes
two kernel selftest failures introduced by the commit introducing the
checks:
IPv4 over geneve6: PMTU exceptions
IPv4 over geneve6: PMTU exceptions - nexthop objects

It does this by correctly accounting for the fact that IPv4 packets may
transit over geneve IPv6 tunnels (and vice versa), and still fixes the
uninit-value bug fixed by the original commit.

Reported-by: kernel test robot <oliver.sang@intel.com>
Fixes: 6628ddfec758 ("net: geneve: check skb is large enough for IPv4/IPv6 header")
Suggested-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Phillip Potter <phil@philpotter.co.uk>
Acked-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/geneve.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 42f31c681846..61cd3dd4deab 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -891,7 +891,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
 	__be16 sport;
 	int err;
 
-	if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
+	if (!pskb_inet_may_pull(skb))
 		return -EINVAL;
 
 	sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
@@ -988,7 +988,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
 	__be16 sport;
 	int err;
 
-	if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
+	if (!pskb_inet_may_pull(skb))
 		return -EINVAL;
 
 	sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
-- 
cgit v1.2.3


From 3f57d8c40fea9b20543cab4da12f4680d2ef182c Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 22 Apr 2021 22:20:54 -0700
Subject: net: ethernet: mtk_eth_soc: fix RX VLAN offload

The VLAN ID in the rx descriptor is only valid if the RX_DMA_VTAG bit is
set. Fixes frames wrongly marked with VLAN tags.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
[Ilya: fix commit message]
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +-
 drivers/net/ethernet/mediatek/mtk_eth_soc.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 6b00c12c6c43..b2175ec451ab 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1319,7 +1319,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
 		skb->protocol = eth_type_trans(skb, netdev);
 
 		if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX &&
-		    RX_DMA_VID(trxd.rxd3))
+		    (trxd.rxd2 & RX_DMA_VTAG))
 			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
 					       RX_DMA_VID(trxd.rxd3));
 		skb_record_rx_queue(skb, 0);
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 1a6750c08bb9..875e67b41561 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -301,6 +301,7 @@
 #define RX_DMA_LSO		BIT(30)
 #define RX_DMA_PLEN0(_x)	(((_x) & 0x3fff) << 16)
 #define RX_DMA_GET_PLEN0(_x)	(((_x) >> 16) & 0x3fff)
+#define RX_DMA_VTAG		BIT(15)
 
 /* QDMA descriptor rxd3 */
 #define RX_DMA_VID(_x)		((_x) & 0xfff)
-- 
cgit v1.2.3


From 5196c417854942e218a59ec87bf7d414b3bd581e Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 22 Apr 2021 22:20:55 -0700
Subject: net: ethernet: mtk_eth_soc: unmap RX data before calling build_skb

Since build_skb accesses the data area (for initializing shinfo), dma unmap
needs to happen before that call

Signed-off-by: Felix Fietkau <nbd@nbd.name>
[Ilya: split build_skb cleanup fix into a separate commit]
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index b2175ec451ab..540003f3fcb8 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1298,6 +1298,9 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
 			goto release_desc;
 		}
 
+		dma_unmap_single(eth->dev, trxd.rxd1,
+				 ring->buf_size, DMA_FROM_DEVICE);
+
 		/* receive data */
 		skb = build_skb(data, ring->frag_size);
 		if (unlikely(!skb)) {
@@ -1307,8 +1310,6 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
 		}
 		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
 
-		dma_unmap_single(eth->dev, trxd.rxd1,
-				 ring->buf_size, DMA_FROM_DEVICE);
 		pktlen = RX_DMA_GET_PLEN0(trxd.rxd2);
 		skb->dev = netdev;
 		skb_put(skb, pktlen);
-- 
cgit v1.2.3


From 787082ab9f7be4711e52f67c388535eda74a1269 Mon Sep 17 00:00:00 2001
From: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Date: Thu, 22 Apr 2021 22:20:56 -0700
Subject: net: ethernet: mtk_eth_soc: fix build_skb cleanup

In case build_skb fails, call skb_free_frag on the correct pointer. Also
update the DMA structures with the new mapping before exiting, because
the mapping was successful

Suggested-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 540003f3fcb8..07daa5de8bec 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1304,9 +1304,9 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
 		/* receive data */
 		skb = build_skb(data, ring->frag_size);
 		if (unlikely(!skb)) {
-			skb_free_frag(new_data);
+			skb_free_frag(data);
 			netdev->stats.rx_dropped++;
-			goto release_desc;
+			goto skip_rx;
 		}
 		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
 
@@ -1326,6 +1326,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
 		skb_record_rx_queue(skb, 0);
 		napi_gro_receive(napi, skb);
 
+skip_rx:
 		ring->data[idx] = new_data;
 		rxd->rxd1 = (unsigned int)dma_addr;
 
-- 
cgit v1.2.3


From c30c4a82739090a2de4a4e3f245355ea4fb3ec14 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 22 Apr 2021 22:20:57 -0700
Subject: net: ethernet: mtk_eth_soc: use napi_consume_skb

Should improve performance, since it can use bulk free

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 07daa5de8bec..5cf64de3ddf8 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -858,7 +858,8 @@ static int txd_to_idx(struct mtk_tx_ring *ring, struct mtk_tx_dma *dma)
 	return ((void *)dma - (void *)ring->dma) / sizeof(*dma);
 }
 
-static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf)
+static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf,
+			 bool napi)
 {
 	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
 		if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) {
@@ -890,8 +891,12 @@ static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf)
 
 	tx_buf->flags = 0;
 	if (tx_buf->skb &&
-	    (tx_buf->skb != (struct sk_buff *)MTK_DMA_DUMMY_DESC))
-		dev_kfree_skb_any(tx_buf->skb);
+	    (tx_buf->skb != (struct sk_buff *)MTK_DMA_DUMMY_DESC)) {
+		if (napi)
+			napi_consume_skb(tx_buf->skb, napi);
+		else
+			dev_kfree_skb_any(tx_buf->skb);
+	}
 	tx_buf->skb = NULL;
 }
 
@@ -1069,7 +1074,7 @@ err_dma:
 		tx_buf = mtk_desc_to_tx_buf(ring, itxd);
 
 		/* unmap dma */
-		mtk_tx_unmap(eth, tx_buf);
+		mtk_tx_unmap(eth, tx_buf, false);
 
 		itxd->txd3 = TX_DMA_LS0 | TX_DMA_OWNER_CPU;
 		if (!MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
@@ -1388,7 +1393,7 @@ static int mtk_poll_tx_qdma(struct mtk_eth *eth, int budget,
 			done[mac]++;
 			budget--;
 		}
-		mtk_tx_unmap(eth, tx_buf);
+		mtk_tx_unmap(eth, tx_buf, true);
 
 		ring->last_free = desc;
 		atomic_inc(&ring->free_count);
@@ -1425,7 +1430,7 @@ static int mtk_poll_tx_pdma(struct mtk_eth *eth, int budget,
 			budget--;
 		}
 
-		mtk_tx_unmap(eth, tx_buf);
+		mtk_tx_unmap(eth, tx_buf, true);
 
 		desc = &ring->dma[cpu];
 		ring->last_free = desc;
@@ -1627,7 +1632,7 @@ static void mtk_tx_clean(struct mtk_eth *eth)
 
 	if (ring->buf) {
 		for (i = 0; i < MTK_DMA_SIZE; i++)
-			mtk_tx_unmap(eth, &ring->buf[i]);
+			mtk_tx_unmap(eth, &ring->buf[i], false);
 		kfree(ring->buf);
 		ring->buf = NULL;
 	}
-- 
cgit v1.2.3


From 3630d519d7c3eab92567658690e44ffe0517d109 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 22 Apr 2021 22:20:58 -0700
Subject: net: ethernet: mtk_eth_soc: reduce MDIO bus access latency

usleep_range often ends up sleeping much longer than the 10-20us provided
as a range here. This causes significant latency in mdio bus acceses,
which easily adds multiple seconds to the boot time on MT7621 when polling
DSA slave ports.
Use cond_resched instead of usleep_range, since the MDIO access does not
take much time

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 5cf64de3ddf8..d992d4f1f400 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -86,7 +86,7 @@ static int mtk_mdio_busy_wait(struct mtk_eth *eth)
 			return 0;
 		if (time_after(jiffies, t_start + PHY_IAC_TIMEOUT))
 			break;
-		usleep_range(10, 20);
+		cond_resched();
 	}
 
 	dev_err(eth->dev, "mdio: MDIO timeout\n");
-- 
cgit v1.2.3


From 16ef670789b252b221700adc413497ed2f941d8a Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 22 Apr 2021 22:20:59 -0700
Subject: net: ethernet: mtk_eth_soc: remove unnecessary TX queue stops

When running short on descriptors, only stop the queue for the netdev that
tx was attempted for. By the time something tries to send on the other
netdev, the ring might have some more room already.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index d992d4f1f400..e6f832dde9a6 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1131,17 +1131,6 @@ static void mtk_wake_queue(struct mtk_eth *eth)
 	}
 }
 
-static void mtk_stop_queue(struct mtk_eth *eth)
-{
-	int i;
-
-	for (i = 0; i < MTK_MAC_COUNT; i++) {
-		if (!eth->netdev[i])
-			continue;
-		netif_stop_queue(eth->netdev[i]);
-	}
-}
-
 static netdev_tx_t mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct mtk_mac *mac = netdev_priv(dev);
@@ -1162,7 +1151,7 @@ static netdev_tx_t mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	tx_num = mtk_cal_txd_req(skb);
 	if (unlikely(atomic_read(&ring->free_count) <= tx_num)) {
-		mtk_stop_queue(eth);
+		netif_stop_queue(dev);
 		netif_err(eth, tx_queued, dev,
 			  "Tx Ring full when queue awake!\n");
 		spin_unlock(&eth->page_lock);
@@ -1188,7 +1177,7 @@ static netdev_tx_t mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		goto drop;
 
 	if (unlikely(atomic_read(&ring->free_count) <= ring->thresh))
-		mtk_stop_queue(eth);
+		netif_stop_queue(dev);
 
 	spin_unlock(&eth->page_lock);
 
-- 
cgit v1.2.3


From 59555a8d0dd39bf60b7ca1ba5e7393d293f7398d Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 22 Apr 2021 22:21:00 -0700
Subject: net: ethernet: mtk_eth_soc: use larger burst size for QDMA TX

Improves tx performance

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +-
 drivers/net/ethernet/mediatek/mtk_eth_soc.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index e6f832dde9a6..645360cfdfe9 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -2193,7 +2193,7 @@ static int mtk_start_dma(struct mtk_eth *eth)
 	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
 		mtk_w32(eth,
 			MTK_TX_WB_DDONE | MTK_TX_DMA_EN |
-			MTK_DMA_SIZE_16DWORDS | MTK_NDP_CO_PRO |
+			MTK_TX_BT_32DWORDS | MTK_NDP_CO_PRO |
 			MTK_RX_DMA_EN | MTK_RX_2B_OFFSET |
 			MTK_RX_BT_32DWORDS,
 			MTK_QDMA_GLO_CFG);
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 875e67b41561..83883d86b881 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -203,7 +203,7 @@
 #define MTK_RX_BT_32DWORDS	(3 << 11)
 #define MTK_NDP_CO_PRO		BIT(10)
 #define MTK_TX_WB_DDONE		BIT(6)
-#define MTK_DMA_SIZE_16DWORDS	(2 << 4)
+#define MTK_TX_BT_32DWORDS	(3 << 4)
 #define MTK_RX_DMA_BUSY		BIT(3)
 #define MTK_TX_DMA_BUSY		BIT(1)
 #define MTK_RX_DMA_EN		BIT(2)
-- 
cgit v1.2.3


From 6b4423b258b91032c50a5efca15d3d9bb194ea1d Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 22 Apr 2021 22:21:01 -0700
Subject: net: ethernet: mtk_eth_soc: increase DMA ring sizes

256 descriptors is not enough for multi-gigabit traffic under load on
MT7622. Bump it to 512 to improve performance.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 83883d86b881..540a5771b7bf 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -22,7 +22,7 @@
 #define MTK_MAX_RX_LENGTH	1536
 #define MTK_MAX_RX_LENGTH_2K	2048
 #define MTK_TX_DMA_BUF_LEN	0x3fff
-#define MTK_DMA_SIZE		256
+#define MTK_DMA_SIZE		512
 #define MTK_NAPI_WEIGHT		64
 #define MTK_MAC_COUNT		2
 #define MTK_RX_ETH_HLEN		(ETH_HLEN + ETH_FCS_LEN)
-- 
cgit v1.2.3


From e9229ffd550b2d8c4997c67a501dbc3919fd4e26 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 22 Apr 2021 22:21:02 -0700
Subject: net: ethernet: mtk_eth_soc: implement dynamic interrupt moderation

Reduces the number of interrupts under load

Signed-off-by: Felix Fietkau <nbd@nbd.name>
[Ilya: add documentation for new struct fields]
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/Kconfig       |  1 +
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 96 ++++++++++++++++++++++++++---
 drivers/net/ethernet/mediatek/mtk_eth_soc.h | 41 +++++++++---
 3 files changed, 124 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/Kconfig b/drivers/net/ethernet/mediatek/Kconfig
index 08c2e446d3d5..c357c193378e 100644
--- a/drivers/net/ethernet/mediatek/Kconfig
+++ b/drivers/net/ethernet/mediatek/Kconfig
@@ -11,6 +11,7 @@ config NET_MEDIATEK_SOC
 	tristate "MediaTek SoC Gigabit Ethernet support"
 	depends on NET_DSA || !NET_DSA
 	select PHYLINK
+	select DIMLIB
 	help
 	  This driver supports the gigabit ethernet MACs in the
 	  MediaTek SoC family.
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 645360cfdfe9..762e985fa294 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1233,12 +1233,13 @@ static void mtk_update_rx_cpu_idx(struct mtk_eth *eth)
 static int mtk_poll_rx(struct napi_struct *napi, int budget,
 		       struct mtk_eth *eth)
 {
+	struct dim_sample dim_sample = {};
 	struct mtk_rx_ring *ring;
 	int idx;
 	struct sk_buff *skb;
 	u8 *data, *new_data;
 	struct mtk_rx_dma *rxd, trxd;
-	int done = 0;
+	int done = 0, bytes = 0;
 
 	while (done < budget) {
 		struct net_device *netdev;
@@ -1312,6 +1313,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
 		else
 			skb_checksum_none_assert(skb);
 		skb->protocol = eth_type_trans(skb, netdev);
+		bytes += pktlen;
 
 		if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX &&
 		    (trxd.rxd2 & RX_DMA_VTAG))
@@ -1344,6 +1346,12 @@ rx_done:
 		mtk_update_rx_cpu_idx(eth);
 	}
 
+	eth->rx_packets += done;
+	eth->rx_bytes += bytes;
+	dim_update_sample(eth->rx_events, eth->rx_packets, eth->rx_bytes,
+			  &dim_sample);
+	net_dim(&eth->rx_dim, dim_sample);
+
 	return done;
 }
 
@@ -1436,6 +1444,7 @@ static int mtk_poll_tx_pdma(struct mtk_eth *eth, int budget,
 static int mtk_poll_tx(struct mtk_eth *eth, int budget)
 {
 	struct mtk_tx_ring *ring = &eth->tx_ring;
+	struct dim_sample dim_sample = {};
 	unsigned int done[MTK_MAX_DEVS];
 	unsigned int bytes[MTK_MAX_DEVS];
 	int total = 0, i;
@@ -1453,8 +1462,14 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget)
 			continue;
 		netdev_completed_queue(eth->netdev[i], done[i], bytes[i]);
 		total += done[i];
+		eth->tx_packets += done[i];
+		eth->tx_bytes += bytes[i];
 	}
 
+	dim_update_sample(eth->tx_events, eth->tx_packets, eth->tx_bytes,
+			  &dim_sample);
+	net_dim(&eth->tx_dim, dim_sample);
+
 	if (mtk_queue_stopped(eth) &&
 	    (atomic_read(&ring->free_count) > ring->thresh))
 		mtk_wake_queue(eth);
@@ -2129,6 +2144,7 @@ static irqreturn_t mtk_handle_irq_rx(int irq, void *_eth)
 {
 	struct mtk_eth *eth = _eth;
 
+	eth->rx_events++;
 	if (likely(napi_schedule_prep(&eth->rx_napi))) {
 		__napi_schedule(&eth->rx_napi);
 		mtk_rx_irq_disable(eth, MTK_RX_DONE_INT);
@@ -2141,6 +2157,7 @@ static irqreturn_t mtk_handle_irq_tx(int irq, void *_eth)
 {
 	struct mtk_eth *eth = _eth;
 
+	eth->tx_events++;
 	if (likely(napi_schedule_prep(&eth->tx_napi))) {
 		__napi_schedule(&eth->tx_napi);
 		mtk_tx_irq_disable(eth, MTK_TX_DONE_INT);
@@ -2325,6 +2342,9 @@ static int mtk_stop(struct net_device *dev)
 	napi_disable(&eth->tx_napi);
 	napi_disable(&eth->rx_napi);
 
+	cancel_work_sync(&eth->rx_dim.work);
+	cancel_work_sync(&eth->tx_dim.work);
+
 	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
 		mtk_stop_dma(eth, MTK_QDMA_GLO_CFG);
 	mtk_stop_dma(eth, MTK_PDMA_GLO_CFG);
@@ -2377,6 +2397,64 @@ err_disable_clks:
 	return ret;
 }
 
+static void mtk_dim_rx(struct work_struct *work)
+{
+	struct dim *dim = container_of(work, struct dim, work);
+	struct mtk_eth *eth = container_of(dim, struct mtk_eth, rx_dim);
+	struct dim_cq_moder cur_profile;
+	u32 val, cur;
+
+	cur_profile = net_dim_get_rx_moderation(eth->rx_dim.mode,
+						dim->profile_ix);
+	spin_lock_bh(&eth->dim_lock);
+
+	val = mtk_r32(eth, MTK_PDMA_DELAY_INT);
+	val &= MTK_PDMA_DELAY_TX_MASK;
+	val |= MTK_PDMA_DELAY_RX_EN;
+
+	cur = min_t(u32, DIV_ROUND_UP(cur_profile.usec, 20), MTK_PDMA_DELAY_PTIME_MASK);
+	val |= cur << MTK_PDMA_DELAY_RX_PTIME_SHIFT;
+
+	cur = min_t(u32, cur_profile.pkts, MTK_PDMA_DELAY_PINT_MASK);
+	val |= cur << MTK_PDMA_DELAY_RX_PINT_SHIFT;
+
+	mtk_w32(eth, val, MTK_PDMA_DELAY_INT);
+	mtk_w32(eth, val, MTK_QDMA_DELAY_INT);
+
+	spin_unlock_bh(&eth->dim_lock);
+
+	dim->state = DIM_START_MEASURE;
+}
+
+static void mtk_dim_tx(struct work_struct *work)
+{
+	struct dim *dim = container_of(work, struct dim, work);
+	struct mtk_eth *eth = container_of(dim, struct mtk_eth, tx_dim);
+	struct dim_cq_moder cur_profile;
+	u32 val, cur;
+
+	cur_profile = net_dim_get_tx_moderation(eth->tx_dim.mode,
+						dim->profile_ix);
+	spin_lock_bh(&eth->dim_lock);
+
+	val = mtk_r32(eth, MTK_PDMA_DELAY_INT);
+	val &= MTK_PDMA_DELAY_RX_MASK;
+	val |= MTK_PDMA_DELAY_TX_EN;
+
+	cur = min_t(u32, DIV_ROUND_UP(cur_profile.usec, 20), MTK_PDMA_DELAY_PTIME_MASK);
+	val |= cur << MTK_PDMA_DELAY_TX_PTIME_SHIFT;
+
+	cur = min_t(u32, cur_profile.pkts, MTK_PDMA_DELAY_PINT_MASK);
+	val |= cur << MTK_PDMA_DELAY_TX_PINT_SHIFT;
+
+	mtk_w32(eth, val, MTK_PDMA_DELAY_INT);
+	mtk_w32(eth, val, MTK_QDMA_DELAY_INT);
+
+	spin_unlock_bh(&eth->dim_lock);
+
+	dim->state = DIM_START_MEASURE;
+}
+
 static int mtk_hw_init(struct mtk_eth *eth)
 {
 	int i, val, ret;
@@ -2398,9 +2476,6 @@ static int mtk_hw_init(struct mtk_eth *eth)
 			goto err_disable_pm;
 		}
 
-		/* enable interrupt delay for RX */
-		mtk_w32(eth, MTK_PDMA_DELAY_RX_DELAY, MTK_PDMA_DELAY_INT);
-
 		/* disable delay and normal interrupt */
 		mtk_tx_irq_disable(eth, ~0);
 		mtk_rx_irq_disable(eth, ~0);
@@ -2439,11 +2514,11 @@ static int mtk_hw_init(struct mtk_eth *eth)
 	/* Enable RX VLan Offloading */
 	mtk_w32(eth, 1, MTK_CDMP_EG_CTRL);
 
-	/* enable interrupt delay for RX */
-	mtk_w32(eth, MTK_PDMA_DELAY_RX_DELAY, MTK_PDMA_DELAY_INT);
+	/* set interrupt delays based on current Net DIM sample */
+	mtk_dim_rx(&eth->rx_dim.work);
+	mtk_dim_tx(&eth->tx_dim.work);
 
 	/* disable delay and normal interrupt */
-	mtk_w32(eth, 0, MTK_QDMA_DELAY_INT);
 	mtk_tx_irq_disable(eth, ~0);
 	mtk_rx_irq_disable(eth, ~0);
 
@@ -2978,6 +3053,13 @@ static int mtk_probe(struct platform_device *pdev)
 	spin_lock_init(&eth->page_lock);
 	spin_lock_init(&eth->tx_irq_lock);
 	spin_lock_init(&eth->rx_irq_lock);
+	spin_lock_init(&eth->dim_lock);
+
+	eth->rx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+	INIT_WORK(&eth->rx_dim.work, mtk_dim_rx);
+
+	eth->tx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+	INIT_WORK(&eth->tx_dim.work, mtk_dim_tx);
 
 	if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
 		eth->ethsys = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 540a5771b7bf..ceb5a4a661e6 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -16,6 +16,7 @@
 #include <linux/refcount.h>
 #include <linux/phylink.h>
 #include <linux/rhashtable.h>
+#include <linux/dim.h>
 #include "mtk_ppe.h"
 
 #define MTK_QDMA_PAGE_SIZE	2048
@@ -137,13 +138,18 @@
 
 /* PDMA Delay Interrupt Register */
 #define MTK_PDMA_DELAY_INT		0xa0c
+#define MTK_PDMA_DELAY_RX_MASK		GENMASK(15, 0)
 #define MTK_PDMA_DELAY_RX_EN		BIT(15)
-#define MTK_PDMA_DELAY_RX_PINT		4
 #define MTK_PDMA_DELAY_RX_PINT_SHIFT	8
-#define MTK_PDMA_DELAY_RX_PTIME		4
-#define MTK_PDMA_DELAY_RX_DELAY		\
-	(MTK_PDMA_DELAY_RX_EN | MTK_PDMA_DELAY_RX_PTIME | \
-	(MTK_PDMA_DELAY_RX_PINT << MTK_PDMA_DELAY_RX_PINT_SHIFT))
+#define MTK_PDMA_DELAY_RX_PTIME_SHIFT	0
+
+#define MTK_PDMA_DELAY_TX_MASK		GENMASK(31, 16)
+#define MTK_PDMA_DELAY_TX_EN		BIT(31)
+#define MTK_PDMA_DELAY_TX_PINT_SHIFT	24
+#define MTK_PDMA_DELAY_TX_PTIME_SHIFT	16
+
+#define MTK_PDMA_DELAY_PINT_MASK	0x7f
+#define MTK_PDMA_DELAY_PTIME_MASK	0xff
 
 /* PDMA Interrupt Status Register */
 #define MTK_PDMA_INT_STATUS	0xa20
@@ -225,6 +231,7 @@
 /* QDMA Interrupt Status Register */
 #define MTK_QDMA_INT_STATUS	0x1A18
 #define MTK_RX_DONE_DLY		BIT(30)
+#define MTK_TX_DONE_DLY		BIT(28)
 #define MTK_RX_DONE_INT3	BIT(19)
 #define MTK_RX_DONE_INT2	BIT(18)
 #define MTK_RX_DONE_INT1	BIT(17)
@@ -234,8 +241,7 @@
 #define MTK_TX_DONE_INT1	BIT(1)
 #define MTK_TX_DONE_INT0	BIT(0)
 #define MTK_RX_DONE_INT		MTK_RX_DONE_DLY
-#define MTK_TX_DONE_INT		(MTK_TX_DONE_INT0 | MTK_TX_DONE_INT1 | \
-				 MTK_TX_DONE_INT2 | MTK_TX_DONE_INT3)
+#define MTK_TX_DONE_INT		MTK_TX_DONE_DLY
 
 /* QDMA Interrupt grouping registers */
 #define MTK_QDMA_INT_GRP1	0x1a20
@@ -849,6 +855,7 @@ struct mtk_sgmii {
  * @page_lock:		Make sure that register operations are atomic
  * @tx_irq__lock:	Make sure that IRQ register operations are atomic
  * @rx_irq__lock:	Make sure that IRQ register operations are atomic
+ * @dim_lock:		Make sure that Net DIM operations are atomic
  * @dummy_dev:		we run 2 netdevs on 1 physical DMA ring and need a
  *			dummy for NAPI to work
  * @netdev:		The netdev instances
@@ -867,6 +874,14 @@ struct mtk_sgmii {
  * @rx_ring_qdma:	Pointer to the memory holding info about the QDMA RX ring
  * @tx_napi:		The TX NAPI struct
  * @rx_napi:		The RX NAPI struct
+ * @rx_events:		Net DIM RX event counter
+ * @rx_packets:		Net DIM RX packet counter
+ * @rx_bytes:		Net DIM RX byte counter
+ * @rx_dim:		Net DIM RX context
+ * @tx_events:		Net DIM TX event counter
+ * @tx_packets:		Net DIM TX packet counter
+ * @tx_bytes:		Net DIM TX byte counter
+ * @tx_dim:		Net DIM TX context
  * @scratch_ring:	Newer SoCs need memory for a second HW managed TX ring
  * @phy_scratch_ring:	physical address of scratch_ring
  * @scratch_head:	The scratch memory that scratch_ring points to.
@@ -911,6 +926,18 @@ struct mtk_eth {
 
 	const struct mtk_soc_data	*soc;
 
+	spinlock_t			dim_lock;
+
+	u32				rx_events;
+	u32				rx_packets;
+	u32				rx_bytes;
+	struct dim			rx_dim;
+
+	u32				tx_events;
+	u32				tx_packets;
+	u32				tx_bytes;
+	struct dim			tx_dim;
+
 	u32				tx_int_mask_reg;
 	u32				tx_int_status_reg;
 	u32				rx_dma_l4_valid;
-- 
cgit v1.2.3


From 4e6bf609569c59b6bd6acf4a607c096cbd820d79 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 22 Apr 2021 22:21:03 -0700
Subject: net: ethernet: mtk_eth_soc: cache HW pointer of last freed TX
 descriptor

The value is only updated by the CPU, so it is cheaper to access from the
ring data structure than from a hardware register.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 8 ++++----
 drivers/net/ethernet/mediatek/mtk_eth_soc.h | 2 ++
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 762e985fa294..6d23118b7a6c 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1364,7 +1364,7 @@ static int mtk_poll_tx_qdma(struct mtk_eth *eth, int budget,
 	struct mtk_tx_buf *tx_buf;
 	u32 cpu, dma;
 
-	cpu = mtk_r32(eth, MTK_QTX_CRX_PTR);
+	cpu = ring->last_free_ptr;
 	dma = mtk_r32(eth, MTK_QTX_DRX_PTR);
 
 	desc = mtk_qdma_phys_to_virt(ring, cpu);
@@ -1398,6 +1398,7 @@ static int mtk_poll_tx_qdma(struct mtk_eth *eth, int budget,
 		cpu = next_cpu;
 	}
 
+	ring->last_free_ptr = cpu;
 	mtk_w32(eth, cpu, MTK_QTX_CRX_PTR);
 
 	return budget;
@@ -1598,6 +1599,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth)
 	atomic_set(&ring->free_count, MTK_DMA_SIZE - 2);
 	ring->next_free = &ring->dma[0];
 	ring->last_free = &ring->dma[MTK_DMA_SIZE - 1];
+	ring->last_free_ptr = (u32)(ring->phys + ((MTK_DMA_SIZE - 1) * sz));
 	ring->thresh = MAX_SKB_FRAGS;
 
 	/* make sure that all changes to the dma ring are flushed before we
@@ -1611,9 +1613,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth)
 		mtk_w32(eth,
 			ring->phys + ((MTK_DMA_SIZE - 1) * sz),
 			MTK_QTX_CRX_PTR);
-		mtk_w32(eth,
-			ring->phys + ((MTK_DMA_SIZE - 1) * sz),
-			MTK_QTX_DRX_PTR);
+		mtk_w32(eth, ring->last_free_ptr, MTK_QTX_DRX_PTR);
 		mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES,
 			MTK_QTX_CFG(0));
 	} else {
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index ceb5a4a661e6..6b92dd6c2cda 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -642,6 +642,7 @@ struct mtk_tx_buf {
  * @phys:		The physical addr of tx_buf
  * @next_free:		Pointer to the next free descriptor
  * @last_free:		Pointer to the last free descriptor
+ * @last_free_ptr:	Hardware pointer value of the last free descriptor
  * @thresh:		The threshold of minimum amount of free descriptors
  * @free_count:		QDMA uses a linked list. Track how many free descriptors
  *			are present
@@ -652,6 +653,7 @@ struct mtk_tx_ring {
 	dma_addr_t phys;
 	struct mtk_tx_dma *next_free;
 	struct mtk_tx_dma *last_free;
+	u32 last_free_ptr;
 	u16 thresh;
 	atomic_t free_count;
 	int dma_size;
-- 
cgit v1.2.3


From 816ac3e6e67bdd78d86226c6eb53619780750e92 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 22 Apr 2021 22:21:04 -0700
Subject: net: ethernet: mtk_eth_soc: only read the full RX descriptor if DMA
 is done

Uncached memory access is expensive, and there is no need to access all
descriptor words if we can't process them anyway

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 6d23118b7a6c..4735942ed283 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -777,13 +777,18 @@ static inline int mtk_max_buf_size(int frag_size)
 	return buf_size;
 }
 
-static inline void mtk_rx_get_desc(struct mtk_rx_dma *rxd,
+static inline bool mtk_rx_get_desc(struct mtk_rx_dma *rxd,
 				   struct mtk_rx_dma *dma_rxd)
 {
-	rxd->rxd1 = READ_ONCE(dma_rxd->rxd1);
 	rxd->rxd2 = READ_ONCE(dma_rxd->rxd2);
+	if (!(rxd->rxd2 & RX_DMA_DONE))
+		return false;
+
+	rxd->rxd1 = READ_ONCE(dma_rxd->rxd1);
 	rxd->rxd3 = READ_ONCE(dma_rxd->rxd3);
 	rxd->rxd4 = READ_ONCE(dma_rxd->rxd4);
+
+	return true;
 }
 
 /* the qdma core needs scratch memory to be setup */
@@ -1255,8 +1260,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
 		rxd = &ring->dma[idx];
 		data = ring->data[idx];
 
-		mtk_rx_get_desc(&trxd, rxd);
-		if (!(trxd.rxd2 & RX_DMA_DONE))
+		if (!mtk_rx_get_desc(&trxd, rxd))
 			break;
 
 		/* find out which mac the packet come from. values start at 1 */
-- 
cgit v1.2.3


From 16769a8923fad5a5377253bcd76b0e0d64976c73 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 22 Apr 2021 22:21:05 -0700
Subject: net: ethernet: mtk_eth_soc: reduce unnecessary interrupts

Avoid rearming interrupt if napi_complete returns false

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 4735942ed283..e1792ccaedc3 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1519,8 +1519,8 @@ static int mtk_napi_tx(struct napi_struct *napi, int budget)
 	if (status & MTK_TX_DONE_INT)
 		return budget;
 
-	napi_complete(napi);
-	mtk_tx_irq_enable(eth, MTK_TX_DONE_INT);
+	if (napi_complete(napi))
+		mtk_tx_irq_enable(eth, MTK_TX_DONE_INT);
 
 	return tx_done;
 }
@@ -1553,8 +1553,9 @@ poll_again:
 		remain_budget -= rx_done;
 		goto poll_again;
 	}
-	napi_complete(napi);
-	mtk_rx_irq_enable(eth, MTK_RX_DONE_INT);
+
+	if (napi_complete(napi))
+		mtk_rx_irq_enable(eth, MTK_RX_DONE_INT);
 
 	return rx_done + budget - remain_budget;
 }
-- 
cgit v1.2.3


From db2c7b353db3b3f71b55f9ff4627d8a786446fbe Mon Sep 17 00:00:00 2001
From: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Date: Thu, 22 Apr 2021 22:21:06 -0700
Subject: net: ethernet: mtk_eth_soc: rework NAPI callbacks

Use napi_complete_done to communicate total TX and RX work done to NAPI.
Count total RX work up instead of remaining work down for clarity.
Remove unneeded local variables for clarity. Use do {} while instead of
goto for clarity.

Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 54 +++++++++++++----------------
 1 file changed, 24 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index e1792ccaedc3..8faf8fb1c83a 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1496,7 +1496,6 @@ static void mtk_handle_status_irq(struct mtk_eth *eth)
 static int mtk_napi_tx(struct napi_struct *napi, int budget)
 {
 	struct mtk_eth *eth = container_of(napi, struct mtk_eth, tx_napi);
-	u32 status, mask;
 	int tx_done = 0;
 
 	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
@@ -1505,21 +1504,19 @@ static int mtk_napi_tx(struct napi_struct *napi, int budget)
 	tx_done = mtk_poll_tx(eth, budget);
 
 	if (unlikely(netif_msg_intr(eth))) {
-		status = mtk_r32(eth, eth->tx_int_status_reg);
-		mask = mtk_r32(eth, eth->tx_int_mask_reg);
 		dev_info(eth->dev,
-			 "done tx %d, intr 0x%08x/0x%x\n",
-			 tx_done, status, mask);
+			 "done tx %d, intr 0x%08x/0x%x\n", tx_done,
+			 mtk_r32(eth, eth->tx_int_status_reg),
+			 mtk_r32(eth, eth->tx_int_mask_reg));
 	}
 
 	if (tx_done == budget)
 		return budget;
 
-	status = mtk_r32(eth, eth->tx_int_status_reg);
-	if (status & MTK_TX_DONE_INT)
+	if (mtk_r32(eth, eth->tx_int_status_reg) & MTK_TX_DONE_INT)
 		return budget;
 
-	if (napi_complete(napi))
+	if (napi_complete_done(napi, tx_done))
 		mtk_tx_irq_enable(eth, MTK_TX_DONE_INT);
 
 	return tx_done;
@@ -1528,36 +1525,33 @@ static int mtk_napi_tx(struct napi_struct *napi, int budget)
 static int mtk_napi_rx(struct napi_struct *napi, int budget)
 {
 	struct mtk_eth *eth = container_of(napi, struct mtk_eth, rx_napi);
-	u32 status, mask;
-	int rx_done = 0;
-	int remain_budget = budget;
+	int rx_done_total = 0;
 
 	mtk_handle_status_irq(eth);
 
-poll_again:
-	mtk_w32(eth, MTK_RX_DONE_INT, MTK_PDMA_INT_STATUS);
-	rx_done = mtk_poll_rx(napi, remain_budget, eth);
+	do {
+		int rx_done;
 
-	if (unlikely(netif_msg_intr(eth))) {
-		status = mtk_r32(eth, MTK_PDMA_INT_STATUS);
-		mask = mtk_r32(eth, MTK_PDMA_INT_MASK);
-		dev_info(eth->dev,
-			 "done rx %d, intr 0x%08x/0x%x\n",
-			 rx_done, status, mask);
-	}
-	if (rx_done == remain_budget)
-		return budget;
+		mtk_w32(eth, MTK_RX_DONE_INT, MTK_PDMA_INT_STATUS);
+		rx_done = mtk_poll_rx(napi, budget - rx_done_total, eth);
+		rx_done_total += rx_done;
 
-	status = mtk_r32(eth, MTK_PDMA_INT_STATUS);
-	if (status & MTK_RX_DONE_INT) {
-		remain_budget -= rx_done;
-		goto poll_again;
-	}
+		if (unlikely(netif_msg_intr(eth))) {
+			dev_info(eth->dev,
+				 "done rx %d, intr 0x%08x/0x%x\n", rx_done,
+				 mtk_r32(eth, MTK_PDMA_INT_STATUS),
+				 mtk_r32(eth, MTK_PDMA_INT_MASK));
+		}
+
+		if (rx_done_total == budget)
+			return budget;
+
+	} while (mtk_r32(eth, MTK_PDMA_INT_STATUS) & MTK_RX_DONE_INT);
 
-	if (napi_complete(napi))
+	if (napi_complete_done(napi, rx_done_total))
 		mtk_rx_irq_enable(eth, MTK_RX_DONE_INT);
 
-	return rx_done + budget - remain_budget;
+	return rx_done_total;
 }
 
 static int mtk_tx_alloc(struct mtk_eth *eth)
-- 
cgit v1.2.3


From fa817272c37ef78e25dc14e4760ac78a7043a18a Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 22 Apr 2021 22:21:07 -0700
Subject: net: ethernet: mtk_eth_soc: set PPE flow hash as skb hash if present

This improves GRO performance

Signed-off-by: Felix Fietkau <nbd@nbd.name>
[Ilya: Use MTK_RXD4_FOE_ENTRY instead of GENMASK(13, 0)]
Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 8faf8fb1c83a..37e50a2e7d0e 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -19,6 +19,7 @@
 #include <linux/interrupt.h>
 #include <linux/pinctrl/devinfo.h>
 #include <linux/phylink.h>
+#include <linux/jhash.h>
 #include <net/dsa.h>
 
 #include "mtk_eth_soc.h"
@@ -1250,6 +1251,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
 		struct net_device *netdev;
 		unsigned int pktlen;
 		dma_addr_t dma_addr;
+		u32 hash;
 		int mac;
 
 		ring = mtk_get_rx_ring(eth);
@@ -1319,6 +1321,12 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
 		skb->protocol = eth_type_trans(skb, netdev);
 		bytes += pktlen;
 
+		hash = trxd.rxd4 & MTK_RXD4_FOE_ENTRY;
+		if (hash != MTK_RXD4_FOE_ENTRY) {
+			hash = jhash_1word(hash, 0);
+			skb_set_hash(skb, hash, PKT_HASH_TYPE_L4);
+		}
+
 		if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX &&
 		    (trxd.rxd2 & RX_DMA_VTAG))
 			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
-- 
cgit v1.2.3


From 3bc8e0aff23be0526af0dbc7973a8866a08d73f1 Mon Sep 17 00:00:00 2001
From: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Date: Thu, 22 Apr 2021 22:21:08 -0700
Subject: net: ethernet: mtk_eth_soc: use iopoll.h macro for DMA init

Replace a tight busy-wait loop without a pause with a standard
readx_poll_timeout_atomic routine with a 5 us poll period.

Tested by booting a MT7621 device to ensure the driver initializes
properly.

Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 29 +++++++++++++----------------
 drivers/net/ethernet/mediatek/mtk_eth_soc.h |  2 +-
 2 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 37e50a2e7d0e..ed4eacef17ce 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -2033,25 +2033,22 @@ static int mtk_set_features(struct net_device *dev, netdev_features_t features)
 /* wait for DMA to finish whatever it is doing before we start using it again */
 static int mtk_dma_busy_wait(struct mtk_eth *eth)
 {
-	unsigned long t_start = jiffies;
+	unsigned int reg;
+	int ret;
+	u32 val;
 
-	while (1) {
-		if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
-			if (!(mtk_r32(eth, MTK_QDMA_GLO_CFG) &
-			      (MTK_RX_DMA_BUSY | MTK_TX_DMA_BUSY)))
-				return 0;
-		} else {
-			if (!(mtk_r32(eth, MTK_PDMA_GLO_CFG) &
-			      (MTK_RX_DMA_BUSY | MTK_TX_DMA_BUSY)))
-				return 0;
-		}
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+		reg = MTK_QDMA_GLO_CFG;
+	else
+		reg = MTK_PDMA_GLO_CFG;
 
-		if (time_after(jiffies, t_start + MTK_DMA_BUSY_TIMEOUT))
-			break;
-	}
+	ret = readx_poll_timeout_atomic(__raw_readl, eth->base + reg, val,
+					!(val & (MTK_RX_DMA_BUSY | MTK_TX_DMA_BUSY)),
+					5, MTK_DMA_BUSY_TIMEOUT_US);
+	if (ret)
+		dev_err(eth->dev, "DMA init timeout\n");
 
-	dev_err(eth->dev, "DMA init timeout\n");
-	return -1;
+	return ret;
 }
 
 static int mtk_dma_init(struct mtk_eth *eth)
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 6b92dd6c2cda..11331b44ba07 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -214,7 +214,7 @@
 #define MTK_TX_DMA_BUSY		BIT(1)
 #define MTK_RX_DMA_EN		BIT(2)
 #define MTK_TX_DMA_EN		BIT(0)
-#define MTK_DMA_BUSY_TIMEOUT	HZ
+#define MTK_DMA_BUSY_TIMEOUT_US	1000000
 
 /* QDMA Reset Index Register */
 #define MTK_QDMA_RST_IDX	0x1A08
-- 
cgit v1.2.3


From 7ce9c3d363ac1af38fb7add7ef2db7e8509a5962 Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Fri, 23 Apr 2021 17:33:55 +0800
Subject: enetc: fix locking for one-step timestamping packet transfer

The previous patch to support PTP Sync packet one-step timestamping
described one-step timestamping packet handling logic as below in
commit message:

- Trasmit packet immediately if no other one in transfer, or queue to
  skb queue if there is already one in transfer.
  The test_and_set_bit_lock() is used here to lock and check state.
- Start a work when complete transfer on hardware, to release the bit
  lock and to send one skb in skb queue if has.

There was not problem of the description, but there was a mistake in
implementation. The locking/test_and_set_bit_lock() should be put in
enetc_start_xmit() which may be called by worker, rather than in
enetc_xmit(). Otherwise, the worker calling enetc_start_xmit() after
bit lock released is not able to lock again for transfer.

Fixes: 7294380c5211 ("enetc: support PTP Sync packet one-step timestamping")
Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Reviewed-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 4f23829e7317..3ca93adb9662 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -321,6 +321,15 @@ static netdev_tx_t enetc_start_xmit(struct sk_buff *skb,
 	struct enetc_bdr *tx_ring;
 	int count;
 
+	/* Queue one-step Sync packet if already locked */
+	if (skb->cb[0] & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) {
+		if (test_and_set_bit_lock(ENETC_TX_ONESTEP_TSTAMP_IN_PROGRESS,
+					  &priv->flags)) {
+			skb_queue_tail(&priv->tx_skbs, skb);
+			return NETDEV_TX_OK;
+		}
+	}
+
 	tx_ring = priv->tx_ring[skb->queue_mapping];
 
 	if (unlikely(skb_shinfo(skb)->nr_frags > ENETC_MAX_SKB_FRAGS))
@@ -372,15 +381,6 @@ netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev)
 			skb->cb[0] = ENETC_F_TX_TSTAMP;
 	}
 
-	/* Queue one-step Sync packet if already locked */
-	if (skb->cb[0] & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) {
-		if (test_and_set_bit_lock(ENETC_TX_ONESTEP_TSTAMP_IN_PROGRESS,
-					  &priv->flags)) {
-			skb_queue_tail(&priv->tx_skbs, skb);
-			return NETDEV_TX_OK;
-		}
-	}
-
 	return enetc_start_xmit(skb, ndev);
 }
 
-- 
cgit v1.2.3


From e7865ea51b0ba2b5eb793ea3ca701571b477674a Mon Sep 17 00:00:00 2001
From: Hayes Wang <hayeswang@realtek.com>
Date: Fri, 23 Apr 2021 17:44:54 +0800
Subject: r8152: remove NCM mode from REALTEK_USB_DEVICE macro

The RTL8156 support CDC NCM mode. And users could set the configuration
of the USB device between vendor and NCM mode dynamically by themselves.
That is, the driver doesn't need to set vendor mode from NCM mode.

Fixes: 195aae321c82 ("r8152: support new chips")
Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 5b4ed69df64f..47198c125719 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -9593,15 +9593,6 @@ static void rtl8152_disconnect(struct usb_interface *intf)
 	.idProduct = (prod), \
 	.bInterfaceClass = USB_CLASS_COMM, \
 	.bInterfaceSubClass = USB_CDC_SUBCLASS_ETHERNET, \
-	.bInterfaceProtocol = USB_CDC_PROTO_NONE \
-}, \
-{ \
-	.match_flags = USB_DEVICE_ID_MATCH_INT_INFO | \
-		       USB_DEVICE_ID_MATCH_DEVICE, \
-	.idVendor = (vend), \
-	.idProduct = (prod), \
-	.bInterfaceClass = USB_CLASS_COMM, \
-	.bInterfaceSubClass = USB_CDC_SUBCLASS_NCM, \
 	.bInterfaceProtocol = USB_CDC_PROTO_NONE
 
 /* table of devices that work with this driver */
-- 
cgit v1.2.3


From 55319eeb5bbcd3c73366de92ff224bd62325a68d Mon Sep 17 00:00:00 2001
From: Hayes Wang <hayeswang@realtek.com>
Date: Fri, 23 Apr 2021 17:44:55 +0800
Subject: r8152: redefine REALTEK_USB_DEVICE macro

Redefine REALTEK_USB_DEVICE macro with USB_DEVICE_INTERFACE_CLASS and
USB_DEVICE_AND_INTERFACE_INFO to simply the code.

Although checkpatch.pl shows the following error, it is more readable.

	ERROR: Macros with complex values should be enclosed in parentheses

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 62 +++++++++++++++++++++----------------------------
 1 file changed, 27 insertions(+), 35 deletions(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 47198c125719..9986f8969d02 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -9579,49 +9579,41 @@ static void rtl8152_disconnect(struct usb_interface *intf)
 	}
 }
 
-#define REALTEK_USB_DEVICE(vend, prod)	\
-	.match_flags = USB_DEVICE_ID_MATCH_DEVICE | \
-		       USB_DEVICE_ID_MATCH_INT_CLASS, \
-	.idVendor = (vend), \
-	.idProduct = (prod), \
-	.bInterfaceClass = USB_CLASS_VENDOR_SPEC \
+#define REALTEK_USB_DEVICE(vend, prod)	{ \
+	USB_DEVICE_INTERFACE_CLASS(vend, prod, USB_CLASS_VENDOR_SPEC), \
 }, \
 { \
-	.match_flags = USB_DEVICE_ID_MATCH_INT_INFO | \
-		       USB_DEVICE_ID_MATCH_DEVICE, \
-	.idVendor = (vend), \
-	.idProduct = (prod), \
-	.bInterfaceClass = USB_CLASS_COMM, \
-	.bInterfaceSubClass = USB_CDC_SUBCLASS_ETHERNET, \
-	.bInterfaceProtocol = USB_CDC_PROTO_NONE
+	USB_DEVICE_AND_INTERFACE_INFO(vend, prod, USB_CLASS_COMM, \
+			USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), \
+}
 
 /* table of devices that work with this driver */
 static const struct usb_device_id rtl8152_table[] = {
 	/* Realtek */
-	{REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8050)},
-	{REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8053)},
-	{REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8152)},
-	{REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8153)},
-	{REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8155)},
-	{REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8156)},
+	REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8050),
+	REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8053),
+	REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8152),
+	REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8153),
+	REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8155),
+	REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8156),
 
 	/* Microsoft */
-	{REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07ab)},
-	{REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07c6)},
-	{REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x0927)},
-	{REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101)},
-	{REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x304f)},
-	{REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x3062)},
-	{REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x3069)},
-	{REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x3082)},
-	{REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x7205)},
-	{REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x720c)},
-	{REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x7214)},
-	{REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x721e)},
-	{REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0xa387)},
-	{REALTEK_USB_DEVICE(VENDOR_ID_LINKSYS, 0x0041)},
-	{REALTEK_USB_DEVICE(VENDOR_ID_NVIDIA,  0x09ff)},
-	{REALTEK_USB_DEVICE(VENDOR_ID_TPLINK,  0x0601)},
+	REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07ab),
+	REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07c6),
+	REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x0927),
+	REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101),
+	REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x304f),
+	REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x3062),
+	REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x3069),
+	REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x3082),
+	REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x7205),
+	REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x720c),
+	REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x7214),
+	REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x721e),
+	REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0xa387),
+	REALTEK_USB_DEVICE(VENDOR_ID_LINKSYS, 0x0041),
+	REALTEK_USB_DEVICE(VENDOR_ID_NVIDIA,  0x09ff),
+	REALTEK_USB_DEVICE(VENDOR_ID_TPLINK,  0x0601),
 	{}
 };
 
-- 
cgit v1.2.3


From c8d0260cdd96fdccdef0509c4160e28a1012a5d7 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Fri, 23 Apr 2021 14:19:43 +0200
Subject: selftests: net: mirror_gre_vlan_bridge_1q: Make an FDB entry static

The FDB roaming test installs a destination MAC address on the wrong
interface of an FDB database and tests whether the mirroring fails, because
packets are sent to the wrong port. The test by mistake installs the FDB
entry as local. This worked previously, because drivers were notified of
local FDB entries in the same way as of static entries. However that has
been fixed in the commit 6ab4c3117aec ("net: bridge: don't notify switchdev
for local FDB addresses"), and local entries are not notified anymore. As a
result, the HW is not reconfigured for the FDB roam, and mirroring keeps
working, failing the test.

To fix the issue, mark the FDB entry as static.

Fixes: 9c7c8a82442c ("selftests: forwarding: mirror_gre_vlan_bridge_1q: Add more tests")
Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
index c02291e9841e..880e3ab9d088 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
@@ -271,7 +271,7 @@ test_span_gre_fdb_roaming()
 
 	while ((RET == 0)); do
 		bridge fdb del dev $swp3 $h3mac vlan 555 master 2>/dev/null
-		bridge fdb add dev $swp2 $h3mac vlan 555 master
+		bridge fdb add dev $swp2 $h3mac vlan 555 master static
 		sleep 1
 		fail_test_span_gre_dir $tundev ingress
 
-- 
cgit v1.2.3


From b6fc2f212108b3676f54d00a2c38e3bc36753980 Mon Sep 17 00:00:00 2001
From: Danielle Ratson <danieller@nvidia.com>
Date: Fri, 23 Apr 2021 14:19:44 +0200
Subject: selftests: mlxsw: Remove a redundant if statement in port_scale test

Currently, the error return code of the failure condition is lost after
using an if statement, so the test doesn't fail when it should.

Remove the if statement that separates the condition and the error code
check, so the test won't always pass.

Fixes: 5154b1b826d9b ("selftests: mlxsw: Add a scale test for physical ports")
Reported-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Danielle Ratson <danieller@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/drivers/net/mlxsw/port_scale.sh | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
index f813ffefc07e..65f43a7ce9c9 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
@@ -55,10 +55,6 @@ port_test()
 	      | jq '.[][][] | select(.name=="physical_ports") |.["occ"]')
 
 	[[ $occ -eq $max_ports ]]
-	if [[ $should_fail -eq 0 ]]; then
-		check_err $? "Mismatch ports number: Expected $max_ports, got $occ."
-	else
-		check_err_fail $should_fail $? "Reached more ports than expected"
-	fi
+	check_err_fail $should_fail $? "Attempt to create $max_ports ports (actual result $occ)"
 
 }
-- 
cgit v1.2.3


From 1f1c92139e36223b89d8140f2b72f75e79baf8bd Mon Sep 17 00:00:00 2001
From: Danielle Ratson <danieller@nvidia.com>
Date: Fri, 23 Apr 2021 14:19:45 +0200
Subject: selftests: mlxsw: Remove a redundant if statement in tc_flower_scale
 test

Currently, the error return code of the failure condition is lost after
using an if statement, so the test doesn't fail when it should.

Remove the if statement that separates the condition and the error code
check, so the test won't always pass.

Fixes: abfce9e062021 ("selftests: mlxsw: Reduce running time using offload indication")
Reported-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Danielle Ratson <danieller@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
index cc0f07e72cf2..aa74be9f47c8 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
@@ -98,11 +98,7 @@ __tc_flower_test()
 			jq -r '[ .[] | select(.kind == "flower") |
 			.options | .in_hw ]' | jq .[] | wc -l)
 	[[ $((offload_count - 1)) -eq $count ]]
-	if [[ $should_fail -eq 0 ]]; then
-		check_err $? "Offload mismatch"
-	else
-		check_err_fail $should_fail $? "Offload more than expacted"
-	fi
+	check_err_fail $should_fail $? "Attempt to offload $count rules (actual result $((offload_count - 1)))"
 }
 
 tc_flower_test()
-- 
cgit v1.2.3


From 059b18e21c631b0eb1668831ae99f4764fb8e7eb Mon Sep 17 00:00:00 2001
From: Danielle Ratson <danieller@nvidia.com>
Date: Fri, 23 Apr 2021 14:19:46 +0200
Subject: selftests: mlxsw: Return correct error code in resource scale tests

Currently, the resource scale test checks a few cases, when the error code
resets between the cases. So for example, if one case fails and the
consecutive case passes, the error code eventually will fit the last test
and will be 0.

Save a new return code that will hold the 'or' return codes of all the
cases, so the final return code will consider all the cases.

Signed-off-by: Danielle Ratson <danieller@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh  | 4 +++-
 tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh  | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
index 4a1c9328555f..50654f8a8c37 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
@@ -30,6 +30,7 @@ trap cleanup EXIT
 
 ALL_TESTS="router tc_flower mirror_gre tc_police port"
 for current_test in ${TESTS:-$ALL_TESTS}; do
+	RET_FIN=0
 	source ${current_test}_scale.sh
 
 	num_netifs_var=${current_test^^}_NUM_NETIFS
@@ -48,8 +49,9 @@ for current_test in ${TESTS:-$ALL_TESTS}; do
 		else
 			log_test "'$current_test' overflow $target"
 		fi
+		RET_FIN=$(( RET_FIN || RET ))
 	done
 done
 current_test=""
 
-exit "$RET"
+exit "$RET_FIN"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
index 087a884f66cd..685dfb3478b3 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
@@ -24,6 +24,7 @@ trap cleanup EXIT
 
 ALL_TESTS="router tc_flower mirror_gre tc_police port"
 for current_test in ${TESTS:-$ALL_TESTS}; do
+	RET_FIN=0
 	source ${current_test}_scale.sh
 
 	num_netifs_var=${current_test^^}_NUM_NETIFS
@@ -50,8 +51,9 @@ for current_test in ${TESTS:-$ALL_TESTS}; do
 				log_test "'$current_test' [$profile] overflow $target"
 			fi
 		done
+		RET_FIN=$(( RET_FIN || RET ))
 	done
 done
 current_test=""
 
-exit "$RET"
+exit "$RET_FIN"
-- 
cgit v1.2.3


From dda7f4fa55839baeb72ae040aeaf9ccf89d3e416 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Fri, 23 Apr 2021 14:19:47 +0200
Subject: selftests: mlxsw: Increase the tolerance of backlog buildup

The intention behind this test is to make sure that qdisc limit is
correctly projected to the HW. However, first, due to rounding in the
qdisc, and then in the driver, the number cannot actually be accurate. And
second, the approach to testing this is to oversubscribe the port with
traffic generated on the same switch. The actual backlog size therefore
fluctuates.

In practice, this test proved to be noisier than the rest, and spuriously
fails every now and then. Increase the tolerance to 10 % to avoid these
issues.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Acked-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
index b0cb1aaffdda..33ddd01689be 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
@@ -507,8 +507,8 @@ do_red_test()
 	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
 	local diff=$((limit - backlog))
 	pct=$((100 * diff / limit))
-	((0 <= pct && pct <= 5))
-	check_err $? "backlog $backlog / $limit expected <= 5% distance"
+	((0 <= pct && pct <= 10))
+	check_err $? "backlog $backlog / $limit expected <= 10% distance"
 	log_test "TC $((vlan - 10)): RED backlog > limit"
 
 	stop_traffic
-- 
cgit v1.2.3


From 1233898ab758cbcf5f6fea10b8dd16a0b2c24fab Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Fri, 23 Apr 2021 14:19:48 +0200
Subject: selftests: mlxsw: Fix mausezahn invocation in ERSPAN scale test

The mirror_gre_scale test creates as many ERSPAN sessions as the underlying
chip supports, and tests that they all work. In order to determine that it
issues a stream of ICMP packets and checks if they are mirrored as
expected.

However, the mausezahn invocation missed the -6 flag to identify the use of
IPv6 protocol, and was sending ICMP messages over IPv6, as opposed to
ICMP6. It also didn't pass an explicit source IP address, which apparently
worked at some point in the past, but does not anymore.

To fix these issues, extend the function mirror_test() in mirror_lib by
detecting the IPv6 protocol addresses, and using a different ICMP scheme.
Fix __mirror_gre_test() in the selftest itself to pass a source IP address.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/mlxsw/mirror_gre_scale.sh   |  3 ++-
 tools/testing/selftests/net/forwarding/mirror_lib.sh  | 19 +++++++++++++++++--
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
index 6f3a70df63bc..e00435753008 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
@@ -120,12 +120,13 @@ __mirror_gre_test()
 	sleep 5
 
 	for ((i = 0; i < count; ++i)); do
+		local sip=$(mirror_gre_ipv6_addr 1 $i)::1
 		local dip=$(mirror_gre_ipv6_addr 1 $i)::2
 		local htun=h3-gt6-$i
 		local message
 
 		icmp6_capture_install $htun
-		mirror_test v$h1 "" $dip $htun 100 10
+		mirror_test v$h1 $sip $dip $htun 100 10
 		icmp6_capture_uninstall $htun
 	done
 }
diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh
index 13db1cb50e57..6406cd76a19d 100644
--- a/tools/testing/selftests/net/forwarding/mirror_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh
@@ -20,6 +20,13 @@ mirror_uninstall()
 	tc filter del dev $swp1 $direction pref 1000
 }
 
+is_ipv6()
+{
+	local addr=$1; shift
+
+	[[ -z ${addr//[0-9a-fA-F:]/} ]]
+}
+
 mirror_test()
 {
 	local vrf_name=$1; shift
@@ -29,9 +36,17 @@ mirror_test()
 	local pref=$1; shift
 	local expect=$1; shift
 
+	if is_ipv6 $dip; then
+		local proto=-6
+		local type="icmp6 type=128" # Echo request.
+	else
+		local proto=
+		local type="icmp echoreq"
+	fi
+
 	local t0=$(tc_rule_stats_get $dev $pref)
-	$MZ $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \
-	    -c 10 -d 100msec -t icmp type=8
+	$MZ $proto $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \
+	    -c 10 -d 100msec -t $type
 	sleep 0.5
 	local t1=$(tc_rule_stats_get $dev $pref)
 	local delta=$((t1 - t0))
-- 
cgit v1.2.3


From 6477dd39e62c3a67cfa368ddc127410b4ae424c6 Mon Sep 17 00:00:00 2001
From: Mat Martineau <mathew.j.martineau@linux.intel.com>
Date: Fri, 23 Apr 2021 09:40:33 -0700
Subject: mptcp: Retransmit DATA_FIN

With this change, the MPTCP-level retransmission timer is used to resend
DATA_FIN. The retranmit timer is not stopped while waiting for a
MPTCP-level ACK of DATA_FIN, and retransmitted DATA_FINs are sent on all
subflows. The retry interval starts at TCP_RTO_MIN and then doubles on
each attempt, up to TCP_RTO_MAX.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/146
Fixes: 43b54c6ee382 ("mptcp: Use full MPTCP-level disconnect state machine")
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 4bde960e19dc..61329b8181ea 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -399,6 +399,14 @@ static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq)
 	return false;
 }
 
+static void mptcp_set_datafin_timeout(const struct sock *sk)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+
+	mptcp_sk(sk)->timer_ival = min(TCP_RTO_MAX,
+				       TCP_RTO_MIN << icsk->icsk_retransmits);
+}
+
 static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk)
 {
 	long tout = ssk && inet_csk(ssk)->icsk_pending ?
@@ -1052,7 +1060,7 @@ out:
 	}
 
 	if (snd_una == READ_ONCE(msk->snd_nxt)) {
-		if (msk->timer_ival)
+		if (msk->timer_ival && !mptcp_data_fin_enabled(msk))
 			mptcp_stop_timer(sk);
 	} else {
 		mptcp_reset_timer(sk);
@@ -2276,8 +2284,19 @@ static void __mptcp_retrans(struct sock *sk)
 
 	__mptcp_clean_una_wakeup(sk);
 	dfrag = mptcp_rtx_head(sk);
-	if (!dfrag)
+	if (!dfrag) {
+		if (mptcp_data_fin_enabled(msk)) {
+			struct inet_connection_sock *icsk = inet_csk(sk);
+
+			icsk->icsk_retransmits++;
+			mptcp_set_datafin_timeout(sk);
+			mptcp_send_ack(msk);
+
+			goto reset_timer;
+		}
+
 		return;
+	}
 
 	ssk = mptcp_subflow_get_retrans(msk);
 	if (!ssk)
@@ -2460,6 +2479,8 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
 			pr_debug("Sending DATA_FIN on subflow %p", ssk);
 			mptcp_set_timeout(sk, ssk);
 			tcp_send_ack(ssk);
+			if (!mptcp_timer_pending(sk))
+				mptcp_reset_timer(sk);
 		}
 		break;
 	}
-- 
cgit v1.2.3


From 0dd7e456bb049ec2b5a9e00250918b346c0d17d5 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 23 Apr 2021 11:13:31 -0700
Subject: bpftool: Support dumping BTF VAR's "extern" linkage

Add dumping of "extern" linkage for BTF VAR kind. Also shorten
"global-allocated" to "global" to be in line with FUNC's "global".

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210423181348.1801389-2-andrii@kernel.org
---
 tools/bpf/bpftool/btf.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 62953bbf68b4..001749a34899 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -71,7 +71,9 @@ static const char *btf_var_linkage_str(__u32 linkage)
 	case BTF_VAR_STATIC:
 		return "static";
 	case BTF_VAR_GLOBAL_ALLOCATED:
-		return "global-alloc";
+		return "global";
+	case BTF_VAR_GLOBAL_EXTERN:
+		return "extern";
 	default:
 		return "(unknown)";
 	}
-- 
cgit v1.2.3


From 5b438f01d7eb2dc9bec7cd79de881b5f155d9a71 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 23 Apr 2021 11:13:32 -0700
Subject: bpftool: Dump more info about DATASEC members

Dump succinct information for each member of DATASEC: its kinds and name. This
is extremely helpful to see at a quick glance what is inside each DATASEC of
a given BTF. Without this, one has to jump around BTF data to just find out
the name of a VAR or FUNC. DATASEC's var_secinfo member is special in that
regard because it doesn't itself contain the name of the member, delegating
that to the referenced VAR and FUNC kinds. Other kinds, like
STRUCT/UNION/FUNC/ENUM, encode member names directly and thus are clearly
identifiable in BTF dump.

The new output looks like this:

[35] DATASEC '.bss' size=0 vlen=6
        type_id=8 offset=0 size=4 (VAR 'input_bss1')
        type_id=13 offset=0 size=4 (VAR 'input_bss_weak')
        type_id=16 offset=0 size=4 (VAR 'output_bss1')
        type_id=17 offset=0 size=4 (VAR 'output_data1')
        type_id=18 offset=0 size=4 (VAR 'output_rodata1')
        type_id=20 offset=0 size=8 (VAR 'output_sink1')
[36] DATASEC '.data' size=0 vlen=2
        type_id=9 offset=0 size=4 (VAR 'input_data1')
        type_id=14 offset=0 size=4 (VAR 'input_data_weak')
[37] DATASEC '.kconfig' size=0 vlen=2
        type_id=25 offset=0 size=4 (VAR 'LINUX_KERNEL_VERSION')
        type_id=28 offset=0 size=1 (VAR 'CONFIG_BPF_SYSCALL')
[38] DATASEC '.ksyms' size=0 vlen=1
        type_id=30 offset=0 size=1 (VAR 'bpf_link_fops')
[39] DATASEC '.rodata' size=0 vlen=2
        type_id=12 offset=0 size=4 (VAR 'input_rodata1')
        type_id=15 offset=0 size=4 (VAR 'input_rodata_weak')
[40] DATASEC 'license' size=0 vlen=1
        type_id=24 offset=0 size=4 (VAR 'LICENSE')

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210423181348.1801389-3-andrii@kernel.org
---
 tools/bpf/bpftool/btf.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 001749a34899..385d5c955cf3 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -100,26 +100,28 @@ static const char *btf_str(const struct btf *btf, __u32 off)
 	return btf__name_by_offset(btf, off) ? : "(invalid)";
 }
 
+static int btf_kind_safe(int kind)
+{
+	return kind <= BTF_KIND_MAX ? kind : BTF_KIND_UNKN;
+}
+
 static int dump_btf_type(const struct btf *btf, __u32 id,
 			 const struct btf_type *t)
 {
 	json_writer_t *w = json_wtr;
-	int kind, safe_kind;
-
-	kind = BTF_INFO_KIND(t->info);
-	safe_kind = kind <= BTF_KIND_MAX ? kind : BTF_KIND_UNKN;
+	int kind = btf_kind(t);
 
 	if (json_output) {
 		jsonw_start_object(w);
 		jsonw_uint_field(w, "id", id);
-		jsonw_string_field(w, "kind", btf_kind_str[safe_kind]);
+		jsonw_string_field(w, "kind", btf_kind_str[btf_kind_safe(kind)]);
 		jsonw_string_field(w, "name", btf_str(btf, t->name_off));
 	} else {
-		printf("[%u] %s '%s'", id, btf_kind_str[safe_kind],
+		printf("[%u] %s '%s'", id, btf_kind_str[btf_kind_safe(kind)],
 		       btf_str(btf, t->name_off));
 	}
 
-	switch (BTF_INFO_KIND(t->info)) {
+	switch (kind) {
 	case BTF_KIND_INT: {
 		__u32 v = *(__u32 *)(t + 1);
 		const char *enc;
@@ -302,7 +304,8 @@ static int dump_btf_type(const struct btf *btf, __u32 id,
 		break;
 	}
 	case BTF_KIND_DATASEC: {
-		const struct btf_var_secinfo *v = (const void *)(t+1);
+		const struct btf_var_secinfo *v = (const void *)(t + 1);
+		const struct btf_type *vt;
 		__u16 vlen = BTF_INFO_VLEN(t->info);
 		int i;
 
@@ -324,6 +327,13 @@ static int dump_btf_type(const struct btf *btf, __u32 id,
 			} else {
 				printf("\n\ttype_id=%u offset=%u size=%u",
 				       v->type, v->offset, v->size);
+
+				if (v->type <= btf__get_nr_types(btf)) {
+					vt = btf__type_by_id(btf, v->type);
+					printf(" (%s '%s')",
+					       btf_kind_str[btf_kind_safe(btf_kind(vt))],
+					       btf_str(btf, vt->name_off));
+				}
 			}
 		}
 		if (json_output)
-- 
cgit v1.2.3


From 0fec7a3cee1cf8e4f86ff563d229408ccbdc2d66 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 23 Apr 2021 11:13:33 -0700
Subject: libbpf: Suppress compiler warning when using SEC() macro with externs

When used on externs SEC() macro will trigger compilation warning about
inapplicable `__attribute__((used))`. That's expected for extern declarations,
so suppress it with the corresponding _Pragma.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210423181348.1801389-4-andrii@kernel.org
---
 tools/lib/bpf/bpf_helpers.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index b904128626c2..75c7581b304c 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -25,9 +25,16 @@
 /*
  * Helper macro to place programs, maps, license in
  * different sections in elf_bpf file. Section names
- * are interpreted by elf_bpf loader
+ * are interpreted by libbpf depending on the context (BPF programs, BPF maps,
+ * extern variables, etc).
+ * To allow use of SEC() with externs (e.g., for extern .maps declarations),
+ * make sure __attribute__((unused)) doesn't trigger compilation warning.
  */
-#define SEC(NAME) __attribute__((section(NAME), used))
+#define SEC(name) \
+	_Pragma("GCC diagnostic push")					    \
+	_Pragma("GCC diagnostic ignored \"-Wignored-attributes\"")	    \
+	__attribute__((section(name), used))				    \
+	_Pragma("GCC diagnostic pop")					    \
 
 /* Avoid 'linux/stddef.h' definition of '__always_inline'. */
 #undef __always_inline
-- 
cgit v1.2.3


From aea28a602fa19fb4fe66374030ab7e2c8ddf643e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 23 Apr 2021 11:13:34 -0700
Subject: libbpf: Mark BPF subprogs with hidden visibility as static for BPF
 verifier

Define __hidden helper macro in bpf_helpers.h, which is a short-hand for
__attribute__((visibility("hidden"))). Add libbpf support to mark BPF
subprograms marked with __hidden as static in BTF information to enforce BPF
verifier's static function validation algorithm, which takes more information
(caller's context) into account during a subprogram validation.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210423181348.1801389-5-andrii@kernel.org
---
 tools/lib/bpf/bpf_helpers.h     |  8 ++++++++
 tools/lib/bpf/btf.c             |  5 -----
 tools/lib/bpf/libbpf.c          | 45 ++++++++++++++++++++++++++++++++++++++++-
 tools/lib/bpf/libbpf_internal.h |  6 ++++++
 4 files changed, 58 insertions(+), 6 deletions(-)

diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 75c7581b304c..9720dc0b4605 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -47,6 +47,14 @@
 #define __weak __attribute__((weak))
 #endif
 
+/*
+ * Use __hidden attribute to mark a non-static BPF subprogram effectively
+ * static for BPF verifier's verification algorithm purposes, allowing more
+ * extensive and permissive BPF verification process, taking into account
+ * subprogram's caller context.
+ */
+#define __hidden __attribute__((visibility("hidden")))
+
 /* When utilizing vmlinux.h with BPF CO-RE, user BPF programs can't include
  * any system-level headers (such as stddef.h, linux/version.h, etc), and
  * commonly-used macros like NULL and KERNEL_VERSION aren't available through
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index d30e67e7e1e5..d57e13a13798 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1605,11 +1605,6 @@ static void *btf_add_type_mem(struct btf *btf, size_t add_sz)
 			      btf->hdr->type_len, UINT_MAX, add_sz);
 }
 
-static __u32 btf_type_info(int kind, int vlen, int kflag)
-{
-	return (kflag << 31) | (kind << 24) | vlen;
-}
-
 static void btf_type_inc_vlen(struct btf_type *t)
 {
 	t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, btf_kflag(t));
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 9cc2d45b0080..8bac9c7627aa 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -71,6 +71,7 @@
 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
 static const struct btf_type *
 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id);
+static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
 
 static int __base_pr(enum libbpf_print_level level, const char *format,
 		     va_list args)
@@ -274,6 +275,7 @@ struct bpf_program {
 	bpf_program_clear_priv_t clear_priv;
 
 	bool load;
+	bool mark_btf_static;
 	enum bpf_prog_type type;
 	enum bpf_attach_type expected_attach_type;
 	int prog_ifindex;
@@ -698,6 +700,15 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
 		if (err)
 			return err;
 
+		/* if function is a global/weak symbol, but has hidden
+		 * visibility (STV_HIDDEN), mark its BTF FUNC as static to
+		 * enable more permissive BPF verification mode with more
+		 * outside context available to BPF verifier
+		 */
+		if (GELF_ST_BIND(sym.st_info) != STB_LOCAL
+		    && GELF_ST_VISIBILITY(sym.st_other) == STV_HIDDEN)
+			prog->mark_btf_static = true;
+
 		nr_progs++;
 		obj->nr_programs = nr_progs;
 
@@ -2618,7 +2629,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
 {
 	struct btf *kern_btf = obj->btf;
 	bool btf_mandatory, sanitize;
-	int err = 0;
+	int i, err = 0;
 
 	if (!obj->btf)
 		return 0;
@@ -2632,6 +2643,38 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
 		return 0;
 	}
 
+	/* Even though some subprogs are global/weak, user might prefer more
+	 * permissive BPF verification process that BPF verifier performs for
+	 * static functions, taking into account more context from the caller
+	 * functions. In such case, they need to mark such subprogs with
+	 * __attribute__((visibility("hidden"))) and libbpf will adjust
+	 * corresponding FUNC BTF type to be marked as static and trigger more
+	 * involved BPF verification process.
+	 */
+	for (i = 0; i < obj->nr_programs; i++) {
+		struct bpf_program *prog = &obj->programs[i];
+		struct btf_type *t;
+		const char *name;
+		int j, n;
+
+		if (!prog->mark_btf_static || !prog_is_subprog(obj, prog))
+			continue;
+
+		n = btf__get_nr_types(obj->btf);
+		for (j = 1; j <= n; j++) {
+			t = btf_type_by_id(obj->btf, j);
+			if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL)
+				continue;
+
+			name = btf__str_by_offset(obj->btf, t->name_off);
+			if (strcmp(name, prog->name) != 0)
+				continue;
+
+			t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0);
+			break;
+		}
+	}
+
 	sanitize = btf_needs_sanitization(obj);
 	if (sanitize) {
 		const void *raw_data;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 6017902c687e..92b7eae10c6d 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -19,6 +19,7 @@
 #pragma GCC poison reallocarray
 
 #include "libbpf.h"
+#include "btf.h"
 
 #ifndef EM_BPF
 #define EM_BPF 247
@@ -132,6 +133,11 @@ struct btf_type;
 
 struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id);
 
+static inline __u32 btf_type_info(int kind, int vlen, int kflag)
+{
+	return (kflag << 31) | (kind << 24) | vlen;
+}
+
 void *libbpf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
 		     size_t cur_cnt, size_t max_cnt, size_t add_cnt);
 int libbpf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt);
-- 
cgit v1.2.3


From 6245947c1b3c6783976e3af113bac30250d0a93e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 23 Apr 2021 11:13:35 -0700
Subject: libbpf: Allow gaps in BPF program sections to support overriden weak
 functions

Currently libbpf is very strict about parsing BPF program instruction
sections. No gaps are allowed between sequential BPF programs within a given
ELF section. Libbpf enforced that by keeping track of the next section offset
that should start a new BPF (sub)program and cross-checks that by searching
for a corresponding STT_FUNC ELF symbol.

But this is too restrictive once we allow to have weak BPF programs and link
together two or more BPF object files. In such case, some weak BPF programs
might be "overridden" by either non-weak BPF program with the same name and
signature, or even by another weak BPF program that just happened to be linked
first. That, in turn, leaves BPF instructions of the "lost" BPF (sub)program
intact, but there is no corresponding ELF symbol, because no one is going to
be referencing it.

Libbpf already correctly handles such cases in the sense that it won't append
such dead code to actual BPF programs loaded into kernel. So the only change
that needs to be done is to relax the logic of parsing BPF instruction
sections. Instead of assuming next BPF (sub)program section offset, iterate
available STT_FUNC ELF symbols to discover all available BPF subprograms and
programs.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210423181348.1801389-6-andrii@kernel.org
---
 tools/lib/bpf/libbpf.c | 58 +++++++++++++++++++-------------------------------
 1 file changed, 22 insertions(+), 36 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 8bac9c7627aa..198d6e149beb 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -502,8 +502,6 @@ static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
 static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr);
 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
-static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx,
-			      size_t off, __u32 sym_type, GElf_Sym *sym);
 
 void bpf_program__unload(struct bpf_program *prog)
 {
@@ -644,25 +642,29 @@ static int
 bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
 			 const char *sec_name, int sec_idx)
 {
+	Elf_Data *symbols = obj->efile.symbols;
 	struct bpf_program *prog, *progs;
 	void *data = sec_data->d_buf;
-	size_t sec_sz = sec_data->d_size, sec_off, prog_sz;
-	int nr_progs, err;
+	size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms;
+	int nr_progs, err, i;
 	const char *name;
 	GElf_Sym sym;
 
 	progs = obj->programs;
 	nr_progs = obj->nr_programs;
+	nr_syms = symbols->d_size / sizeof(GElf_Sym);
 	sec_off = 0;
 
-	while (sec_off < sec_sz) {
-		if (elf_sym_by_sec_off(obj, sec_idx, sec_off, STT_FUNC, &sym)) {
-			pr_warn("sec '%s': failed to find program symbol at offset %zu\n",
-				sec_name, sec_off);
-			return -LIBBPF_ERRNO__FORMAT;
-		}
+	for (i = 0; i < nr_syms; i++) {
+		if (!gelf_getsym(symbols, i, &sym))
+			continue;
+		if (sym.st_shndx != sec_idx)
+			continue;
+		if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
+			continue;
 
 		prog_sz = sym.st_size;
+		sec_off = sym.st_value;
 
 		name = elf_sym_str(obj, sym.st_name);
 		if (!name) {
@@ -711,8 +713,6 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
 
 		nr_progs++;
 		obj->nr_programs = nr_progs;
-
-		sec_off += prog_sz;
 	}
 
 	return 0;
@@ -2825,26 +2825,6 @@ static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
 	return data;
 }
 
-static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx,
-			      size_t off, __u32 sym_type, GElf_Sym *sym)
-{
-	Elf_Data *symbols = obj->efile.symbols;
-	size_t n = symbols->d_size / sizeof(GElf_Sym);
-	int i;
-
-	for (i = 0; i < n; i++) {
-		if (!gelf_getsym(symbols, i, sym))
-			continue;
-		if (sym->st_shndx != sec_idx || sym->st_value != off)
-			continue;
-		if (GELF_ST_TYPE(sym->st_info) != sym_type)
-			continue;
-		return 0;
-	}
-
-	return -ENOENT;
-}
-
 static bool is_sec_name_dwarf(const char *name)
 {
 	/* approximation, but the actual list is too long */
@@ -3723,11 +3703,16 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data
 	int err, i, nrels;
 	const char *sym_name;
 	__u32 insn_idx;
+	Elf_Scn *scn;
+	Elf_Data *scn_data;
 	GElf_Sym sym;
 	GElf_Rel rel;
 
+	scn = elf_sec_by_idx(obj, sec_idx);
+	scn_data = elf_sec_data(obj, scn);
+
 	relo_sec_name = elf_sec_str(obj, shdr->sh_name);
-	sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
+	sec_name = elf_sec_name(obj, scn);
 	if (!relo_sec_name || !sec_name)
 		return -EINVAL;
 
@@ -3745,7 +3730,8 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data
 				relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
 			return -LIBBPF_ERRNO__FORMAT;
 		}
-		if (rel.r_offset % BPF_INSN_SZ) {
+
+		if (rel.r_offset % BPF_INSN_SZ || rel.r_offset >= scn_data->d_size) {
 			pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
 				relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
 			return -LIBBPF_ERRNO__FORMAT;
@@ -3769,9 +3755,9 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data
 
 		prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
 		if (!prog) {
-			pr_warn("sec '%s': relo #%d: program not found in section '%s' for insn #%u\n",
+			pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n",
 				relo_sec_name, i, sec_name, insn_idx);
-			return -LIBBPF_ERRNO__RELOC;
+			continue;
 		}
 
 		relos = libbpf_reallocarray(prog->reloc_desc,
-- 
cgit v1.2.3


From c7ef5ec9573f05535370d8716576263681cabec7 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 23 Apr 2021 11:13:36 -0700
Subject: libbpf: Refactor BTF map definition parsing

Refactor BTF-defined maps parsing logic to allow it to be nicely reused by BPF
static linker. Further, at least for BPF static linker, it's important to know
which attributes of a BPF map were defined explicitly, so provide a bit set
for each known portion of BTF map definition. This allows BPF static linker to
do a simple check when dealing with extern map declarations.

The same capabilities allow to distinguish attributes explicitly set to zero
(e.g., __uint(max_entries, 0)) vs the case of not specifying it at all (no
max_entries attribute at all). Libbpf is currently not utilizing that, but it
could be useful for backwards compatibility reasons later.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210423181348.1801389-7-andrii@kernel.org
---
 tools/lib/bpf/libbpf.c          | 257 +++++++++++++++++++++++-----------------
 tools/lib/bpf/libbpf_internal.h |  32 +++++
 2 files changed, 178 insertions(+), 111 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 198d6e149beb..4f8335fc0bc9 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2025,255 +2025,262 @@ static int build_map_pin_path(struct bpf_map *map, const char *path)
 	return bpf_map__set_pin_path(map, buf);
 }
 
-
-static int parse_btf_map_def(struct bpf_object *obj,
-			     struct bpf_map *map,
-			     const struct btf_type *def,
-			     bool strict, bool is_inner,
-			     const char *pin_root_path)
+int parse_btf_map_def(const char *map_name, struct btf *btf,
+		      const struct btf_type *def_t, bool strict,
+		      struct btf_map_def *map_def, struct btf_map_def *inner_def)
 {
 	const struct btf_type *t;
 	const struct btf_member *m;
+	bool is_inner = inner_def == NULL;
 	int vlen, i;
 
-	vlen = btf_vlen(def);
-	m = btf_members(def);
+	vlen = btf_vlen(def_t);
+	m = btf_members(def_t);
 	for (i = 0; i < vlen; i++, m++) {
-		const char *name = btf__name_by_offset(obj->btf, m->name_off);
+		const char *name = btf__name_by_offset(btf, m->name_off);
 
 		if (!name) {
-			pr_warn("map '%s': invalid field #%d.\n", map->name, i);
+			pr_warn("map '%s': invalid field #%d.\n", map_name, i);
 			return -EINVAL;
 		}
 		if (strcmp(name, "type") == 0) {
-			if (!get_map_field_int(map->name, obj->btf, m,
-					       &map->def.type))
+			if (!get_map_field_int(map_name, btf, m, &map_def->map_type))
 				return -EINVAL;
-			pr_debug("map '%s': found type = %u.\n",
-				 map->name, map->def.type);
+			map_def->parts |= MAP_DEF_MAP_TYPE;
 		} else if (strcmp(name, "max_entries") == 0) {
-			if (!get_map_field_int(map->name, obj->btf, m,
-					       &map->def.max_entries))
+			if (!get_map_field_int(map_name, btf, m, &map_def->max_entries))
 				return -EINVAL;
-			pr_debug("map '%s': found max_entries = %u.\n",
-				 map->name, map->def.max_entries);
+			map_def->parts |= MAP_DEF_MAX_ENTRIES;
 		} else if (strcmp(name, "map_flags") == 0) {
-			if (!get_map_field_int(map->name, obj->btf, m,
-					       &map->def.map_flags))
+			if (!get_map_field_int(map_name, btf, m, &map_def->map_flags))
 				return -EINVAL;
-			pr_debug("map '%s': found map_flags = %u.\n",
-				 map->name, map->def.map_flags);
+			map_def->parts |= MAP_DEF_MAP_FLAGS;
 		} else if (strcmp(name, "numa_node") == 0) {
-			if (!get_map_field_int(map->name, obj->btf, m, &map->numa_node))
+			if (!get_map_field_int(map_name, btf, m, &map_def->numa_node))
 				return -EINVAL;
-			pr_debug("map '%s': found numa_node = %u.\n", map->name, map->numa_node);
+			map_def->parts |= MAP_DEF_NUMA_NODE;
 		} else if (strcmp(name, "key_size") == 0) {
 			__u32 sz;
 
-			if (!get_map_field_int(map->name, obj->btf, m, &sz))
+			if (!get_map_field_int(map_name, btf, m, &sz))
 				return -EINVAL;
-			pr_debug("map '%s': found key_size = %u.\n",
-				 map->name, sz);
-			if (map->def.key_size && map->def.key_size != sz) {
+			if (map_def->key_size && map_def->key_size != sz) {
 				pr_warn("map '%s': conflicting key size %u != %u.\n",
-					map->name, map->def.key_size, sz);
+					map_name, map_def->key_size, sz);
 				return -EINVAL;
 			}
-			map->def.key_size = sz;
+			map_def->key_size = sz;
+			map_def->parts |= MAP_DEF_KEY_SIZE;
 		} else if (strcmp(name, "key") == 0) {
 			__s64 sz;
 
-			t = btf__type_by_id(obj->btf, m->type);
+			t = btf__type_by_id(btf, m->type);
 			if (!t) {
 				pr_warn("map '%s': key type [%d] not found.\n",
-					map->name, m->type);
+					map_name, m->type);
 				return -EINVAL;
 			}
 			if (!btf_is_ptr(t)) {
 				pr_warn("map '%s': key spec is not PTR: %s.\n",
-					map->name, btf_kind_str(t));
+					map_name, btf_kind_str(t));
 				return -EINVAL;
 			}
-			sz = btf__resolve_size(obj->btf, t->type);
+			sz = btf__resolve_size(btf, t->type);
 			if (sz < 0) {
 				pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
-					map->name, t->type, (ssize_t)sz);
+					map_name, t->type, (ssize_t)sz);
 				return sz;
 			}
-			pr_debug("map '%s': found key [%u], sz = %zd.\n",
-				 map->name, t->type, (ssize_t)sz);
-			if (map->def.key_size && map->def.key_size != sz) {
+			if (map_def->key_size && map_def->key_size != sz) {
 				pr_warn("map '%s': conflicting key size %u != %zd.\n",
-					map->name, map->def.key_size, (ssize_t)sz);
+					map_name, map_def->key_size, (ssize_t)sz);
 				return -EINVAL;
 			}
-			map->def.key_size = sz;
-			map->btf_key_type_id = t->type;
+			map_def->key_size = sz;
+			map_def->key_type_id = t->type;
+			map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE;
 		} else if (strcmp(name, "value_size") == 0) {
 			__u32 sz;
 
-			if (!get_map_field_int(map->name, obj->btf, m, &sz))
+			if (!get_map_field_int(map_name, btf, m, &sz))
 				return -EINVAL;
-			pr_debug("map '%s': found value_size = %u.\n",
-				 map->name, sz);
-			if (map->def.value_size && map->def.value_size != sz) {
+			if (map_def->value_size && map_def->value_size != sz) {
 				pr_warn("map '%s': conflicting value size %u != %u.\n",
-					map->name, map->def.value_size, sz);
+					map_name, map_def->value_size, sz);
 				return -EINVAL;
 			}
-			map->def.value_size = sz;
+			map_def->value_size = sz;
+			map_def->parts |= MAP_DEF_VALUE_SIZE;
 		} else if (strcmp(name, "value") == 0) {
 			__s64 sz;
 
-			t = btf__type_by_id(obj->btf, m->type);
+			t = btf__type_by_id(btf, m->type);
 			if (!t) {
 				pr_warn("map '%s': value type [%d] not found.\n",
-					map->name, m->type);
+					map_name, m->type);
 				return -EINVAL;
 			}
 			if (!btf_is_ptr(t)) {
 				pr_warn("map '%s': value spec is not PTR: %s.\n",
-					map->name, btf_kind_str(t));
+					map_name, btf_kind_str(t));
 				return -EINVAL;
 			}
-			sz = btf__resolve_size(obj->btf, t->type);
+			sz = btf__resolve_size(btf, t->type);
 			if (sz < 0) {
 				pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
-					map->name, t->type, (ssize_t)sz);
+					map_name, t->type, (ssize_t)sz);
 				return sz;
 			}
-			pr_debug("map '%s': found value [%u], sz = %zd.\n",
-				 map->name, t->type, (ssize_t)sz);
-			if (map->def.value_size && map->def.value_size != sz) {
+			if (map_def->value_size && map_def->value_size != sz) {
 				pr_warn("map '%s': conflicting value size %u != %zd.\n",
-					map->name, map->def.value_size, (ssize_t)sz);
+					map_name, map_def->value_size, (ssize_t)sz);
 				return -EINVAL;
 			}
-			map->def.value_size = sz;
-			map->btf_value_type_id = t->type;
+			map_def->value_size = sz;
+			map_def->value_type_id = t->type;
+			map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE;
 		}
 		else if (strcmp(name, "values") == 0) {
+			char inner_map_name[128];
 			int err;
 
 			if (is_inner) {
 				pr_warn("map '%s': multi-level inner maps not supported.\n",
-					map->name);
+					map_name);
 				return -ENOTSUP;
 			}
 			if (i != vlen - 1) {
 				pr_warn("map '%s': '%s' member should be last.\n",
-					map->name, name);
+					map_name, name);
 				return -EINVAL;
 			}
-			if (!bpf_map_type__is_map_in_map(map->def.type)) {
+			if (!bpf_map_type__is_map_in_map(map_def->map_type)) {
 				pr_warn("map '%s': should be map-in-map.\n",
-					map->name);
+					map_name);
 				return -ENOTSUP;
 			}
-			if (map->def.value_size && map->def.value_size != 4) {
+			if (map_def->value_size && map_def->value_size != 4) {
 				pr_warn("map '%s': conflicting value size %u != 4.\n",
-					map->name, map->def.value_size);
+					map_name, map_def->value_size);
 				return -EINVAL;
 			}
-			map->def.value_size = 4;
-			t = btf__type_by_id(obj->btf, m->type);
+			map_def->value_size = 4;
+			t = btf__type_by_id(btf, m->type);
 			if (!t) {
 				pr_warn("map '%s': map-in-map inner type [%d] not found.\n",
-					map->name, m->type);
+					map_name, m->type);
 				return -EINVAL;
 			}
 			if (!btf_is_array(t) || btf_array(t)->nelems) {
 				pr_warn("map '%s': map-in-map inner spec is not a zero-sized array.\n",
-					map->name);
+					map_name);
 				return -EINVAL;
 			}
-			t = skip_mods_and_typedefs(obj->btf, btf_array(t)->type,
-						   NULL);
+			t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL);
 			if (!btf_is_ptr(t)) {
 				pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
-					map->name, btf_kind_str(t));
+					map_name, btf_kind_str(t));
 				return -EINVAL;
 			}
-			t = skip_mods_and_typedefs(obj->btf, t->type, NULL);
+			t = skip_mods_and_typedefs(btf, t->type, NULL);
 			if (!btf_is_struct(t)) {
 				pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
-					map->name, btf_kind_str(t));
+					map_name, btf_kind_str(t));
 				return -EINVAL;
 			}
 
-			map->inner_map = calloc(1, sizeof(*map->inner_map));
-			if (!map->inner_map)
-				return -ENOMEM;
-			map->inner_map->fd = -1;
-			map->inner_map->sec_idx = obj->efile.btf_maps_shndx;
-			map->inner_map->name = malloc(strlen(map->name) +
-						      sizeof(".inner") + 1);
-			if (!map->inner_map->name)
-				return -ENOMEM;
-			sprintf(map->inner_map->name, "%s.inner", map->name);
-
-			err = parse_btf_map_def(obj, map->inner_map, t, strict,
-						true /* is_inner */, NULL);
+			snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name);
+			err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL);
 			if (err)
 				return err;
+
+			map_def->parts |= MAP_DEF_INNER_MAP;
 		} else if (strcmp(name, "pinning") == 0) {
 			__u32 val;
-			int err;
 
 			if (is_inner) {
-				pr_debug("map '%s': inner def can't be pinned.\n",
-					 map->name);
+				pr_warn("map '%s': inner def can't be pinned.\n", map_name);
 				return -EINVAL;
 			}
-			if (!get_map_field_int(map->name, obj->btf, m, &val))
+			if (!get_map_field_int(map_name, btf, m, &val))
 				return -EINVAL;
-			pr_debug("map '%s': found pinning = %u.\n",
-				 map->name, val);
-
-			if (val != LIBBPF_PIN_NONE &&
-			    val != LIBBPF_PIN_BY_NAME) {
+			if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) {
 				pr_warn("map '%s': invalid pinning value %u.\n",
-					map->name, val);
+					map_name, val);
 				return -EINVAL;
 			}
-			if (val == LIBBPF_PIN_BY_NAME) {
-				err = build_map_pin_path(map, pin_root_path);
-				if (err) {
-					pr_warn("map '%s': couldn't build pin path.\n",
-						map->name);
-					return err;
-				}
-			}
+			map_def->pinning = val;
+			map_def->parts |= MAP_DEF_PINNING;
 		} else {
 			if (strict) {
-				pr_warn("map '%s': unknown field '%s'.\n",
-					map->name, name);
+				pr_warn("map '%s': unknown field '%s'.\n", map_name, name);
 				return -ENOTSUP;
 			}
-			pr_debug("map '%s': ignoring unknown field '%s'.\n",
-				 map->name, name);
+			pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name);
 		}
 	}
 
-	if (map->def.type == BPF_MAP_TYPE_UNSPEC) {
-		pr_warn("map '%s': map type isn't specified.\n", map->name);
+	if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) {
+		pr_warn("map '%s': map type isn't specified.\n", map_name);
 		return -EINVAL;
 	}
 
 	return 0;
 }
 
+static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
+{
+	map->def.type = def->map_type;
+	map->def.key_size = def->key_size;
+	map->def.value_size = def->value_size;
+	map->def.max_entries = def->max_entries;
+	map->def.map_flags = def->map_flags;
+
+	map->numa_node = def->numa_node;
+	map->btf_key_type_id = def->key_type_id;
+	map->btf_value_type_id = def->value_type_id;
+
+	if (def->parts & MAP_DEF_MAP_TYPE)
+		pr_debug("map '%s': found type = %u.\n", map->name, def->map_type);
+
+	if (def->parts & MAP_DEF_KEY_TYPE)
+		pr_debug("map '%s': found key [%u], sz = %u.\n",
+			 map->name, def->key_type_id, def->key_size);
+	else if (def->parts & MAP_DEF_KEY_SIZE)
+		pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size);
+
+	if (def->parts & MAP_DEF_VALUE_TYPE)
+		pr_debug("map '%s': found value [%u], sz = %u.\n",
+			 map->name, def->value_type_id, def->value_size);
+	else if (def->parts & MAP_DEF_VALUE_SIZE)
+		pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size);
+
+	if (def->parts & MAP_DEF_MAX_ENTRIES)
+		pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries);
+	if (def->parts & MAP_DEF_MAP_FLAGS)
+		pr_debug("map '%s': found map_flags = %u.\n", map->name, def->map_flags);
+	if (def->parts & MAP_DEF_PINNING)
+		pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning);
+	if (def->parts & MAP_DEF_NUMA_NODE)
+		pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node);
+
+	if (def->parts & MAP_DEF_INNER_MAP)
+		pr_debug("map '%s': found inner map definition.\n", map->name);
+}
+
 static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 					 const struct btf_type *sec,
 					 int var_idx, int sec_idx,
 					 const Elf_Data *data, bool strict,
 					 const char *pin_root_path)
 {
+	struct btf_map_def map_def = {}, inner_def = {};
 	const struct btf_type *var, *def;
 	const struct btf_var_secinfo *vi;
 	const struct btf_var *var_extra;
 	const char *map_name;
 	struct bpf_map *map;
+	int err;
 
 	vi = btf_var_secinfos(sec) + var_idx;
 	var = btf__type_by_id(obj->btf, vi->type);
@@ -2327,7 +2334,35 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 	pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
 		 map_name, map->sec_idx, map->sec_offset);
 
-	return parse_btf_map_def(obj, map, def, strict, false, pin_root_path);
+	err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def);
+	if (err)
+		return err;
+
+	fill_map_from_def(map, &map_def);
+
+	if (map_def.pinning == LIBBPF_PIN_BY_NAME) {
+		err = build_map_pin_path(map, pin_root_path);
+		if (err) {
+			pr_warn("map '%s': couldn't build pin path.\n", map->name);
+			return err;
+		}
+	}
+
+	if (map_def.parts & MAP_DEF_INNER_MAP) {
+		map->inner_map = calloc(1, sizeof(*map->inner_map));
+		if (!map->inner_map)
+			return -ENOMEM;
+		map->inner_map->fd = -1;
+		map->inner_map->sec_idx = sec_idx;
+		map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1);
+		if (!map->inner_map->name)
+			return -ENOMEM;
+		sprintf(map->inner_map->name, "%s.inner", map_name);
+
+		fill_map_from_def(map->inner_map, &inner_def);
+	}
+
+	return 0;
 }
 
 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 92b7eae10c6d..17883073710c 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -138,6 +138,38 @@ static inline __u32 btf_type_info(int kind, int vlen, int kflag)
 	return (kflag << 31) | (kind << 24) | vlen;
 }
 
+enum map_def_parts {
+	MAP_DEF_MAP_TYPE	= 0x001,
+	MAP_DEF_KEY_TYPE	= 0x002,
+	MAP_DEF_KEY_SIZE	= 0x004,
+	MAP_DEF_VALUE_TYPE	= 0x008,
+	MAP_DEF_VALUE_SIZE	= 0x010,
+	MAP_DEF_MAX_ENTRIES	= 0x020,
+	MAP_DEF_MAP_FLAGS	= 0x040,
+	MAP_DEF_NUMA_NODE	= 0x080,
+	MAP_DEF_PINNING		= 0x100,
+	MAP_DEF_INNER_MAP	= 0x200,
+
+	MAP_DEF_ALL		= 0x3ff, /* combination of all above */
+};
+
+struct btf_map_def {
+	enum map_def_parts parts;
+	__u32 map_type;
+	__u32 key_type_id;
+	__u32 key_size;
+	__u32 value_type_id;
+	__u32 value_size;
+	__u32 max_entries;
+	__u32 map_flags;
+	__u32 numa_node;
+	__u32 pinning;
+};
+
+int parse_btf_map_def(const char *map_name, struct btf *btf,
+		      const struct btf_type *def_t, bool strict,
+		      struct btf_map_def *map_def, struct btf_map_def *inner_def);
+
 void *libbpf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
 		     size_t cur_cnt, size_t max_cnt, size_t add_cnt);
 int libbpf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt);
-- 
cgit v1.2.3


From beaa3711ada4e4a0c8e03a78fec72330185213bf Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 23 Apr 2021 11:13:37 -0700
Subject: libbpf: Factor out symtab and relos sanity checks

Factor out logic for sanity checking SHT_SYMTAB and SHT_REL sections into
separate sections. They are already quite extensive and are suffering from too
deep indentation. Subsequent changes will extend SYMTAB sanity checking
further, so it's better to factor each into a separate function.

No functional changes are intended.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210423181348.1801389-8-andrii@kernel.org
---
 tools/lib/bpf/linker.c | 233 +++++++++++++++++++++++++++----------------------
 1 file changed, 127 insertions(+), 106 deletions(-)

diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 4e08bc07e635..0bb927226370 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -131,6 +131,8 @@ static int init_output_elf(struct bpf_linker *linker, const char *file);
 
 static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, struct src_obj *obj);
 static int linker_sanity_check_elf(struct src_obj *obj);
+static int linker_sanity_check_elf_symtab(struct src_obj *obj, struct src_sec *sec);
+static int linker_sanity_check_elf_relos(struct src_obj *obj, struct src_sec *sec);
 static int linker_sanity_check_btf(struct src_obj *obj);
 static int linker_sanity_check_btf_ext(struct src_obj *obj);
 static int linker_fixup_btf(struct src_obj *obj);
@@ -663,8 +665,8 @@ static bool is_pow_of_2(size_t x)
 
 static int linker_sanity_check_elf(struct src_obj *obj)
 {
-	struct src_sec *sec, *link_sec;
-	int i, j, n;
+	struct src_sec *sec;
+	int i, err;
 
 	if (!obj->symtab_sec_idx) {
 		pr_warn("ELF is missing SYMTAB section in %s\n", obj->filename);
@@ -692,43 +694,11 @@ static int linker_sanity_check_elf(struct src_obj *obj)
 			return -EINVAL;
 
 		switch (sec->shdr->sh_type) {
-		case SHT_SYMTAB: {
-			Elf64_Sym *sym;
-
-			if (sec->shdr->sh_entsize != sizeof(Elf64_Sym))
-				return -EINVAL;
-			if (sec->shdr->sh_size % sec->shdr->sh_entsize != 0)
-				return -EINVAL;
-
-			if (!sec->shdr->sh_link || sec->shdr->sh_link >= obj->sec_cnt) {
-				pr_warn("ELF SYMTAB section #%zu points to missing STRTAB section #%zu in %s\n",
-					sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
-				return -EINVAL;
-			}
-			link_sec = &obj->secs[sec->shdr->sh_link];
-			if (link_sec->shdr->sh_type != SHT_STRTAB) {
-				pr_warn("ELF SYMTAB section #%zu points to invalid STRTAB section #%zu in %s\n",
-					sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
-				return -EINVAL;
-			}
-
-			n = sec->shdr->sh_size / sec->shdr->sh_entsize;
-			sym = sec->data->d_buf;
-			for (j = 0; j < n; j++, sym++) {
-				if (sym->st_shndx
-				    && sym->st_shndx < SHN_LORESERVE
-				    && sym->st_shndx >= obj->sec_cnt) {
-					pr_warn("ELF sym #%d in section #%zu points to missing section #%zu in %s\n",
-						j, sec->sec_idx, (size_t)sym->st_shndx, obj->filename);
-					return -EINVAL;
-				}
-				if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION) {
-					if (sym->st_value != 0)
-						return -EINVAL;
-				}
-			}
+		case SHT_SYMTAB:
+			err = linker_sanity_check_elf_symtab(obj, sec);
+			if (err)
+				return err;
 			break;
-		}
 		case SHT_STRTAB:
 			break;
 		case SHT_PROGBITS:
@@ -739,87 +709,138 @@ static int linker_sanity_check_elf(struct src_obj *obj)
 			break;
 		case SHT_NOBITS:
 			break;
-		case SHT_REL: {
-			Elf64_Rel *relo;
-			struct src_sec *sym_sec;
+		case SHT_REL:
+			err = linker_sanity_check_elf_relos(obj, sec);
+			if (err)
+				return err;
+			break;
+		case SHT_LLVM_ADDRSIG:
+			break;
+		default:
+			pr_warn("ELF section #%zu (%s) has unrecognized type %zu in %s\n",
+				sec->sec_idx, sec->sec_name, (size_t)sec->shdr->sh_type, obj->filename);
+			return -EINVAL;
+		}
+	}
 
-			if (sec->shdr->sh_entsize != sizeof(Elf64_Rel))
-				return -EINVAL;
-			if (sec->shdr->sh_size % sec->shdr->sh_entsize != 0)
-				return -EINVAL;
+	return 0;
+}
 
-			/* SHT_REL's sh_link should point to SYMTAB */
-			if (sec->shdr->sh_link != obj->symtab_sec_idx) {
-				pr_warn("ELF relo section #%zu points to invalid SYMTAB section #%zu in %s\n",
-					sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
-				return -EINVAL;
-			}
+static int linker_sanity_check_elf_symtab(struct src_obj *obj, struct src_sec *sec)
+{
+	struct src_sec *link_sec;
+	Elf64_Sym *sym;
+	int i, n;
 
-			/* SHT_REL's sh_info points to relocated section */
-			if (!sec->shdr->sh_info || sec->shdr->sh_info >= obj->sec_cnt) {
-				pr_warn("ELF relo section #%zu points to missing section #%zu in %s\n",
-					sec->sec_idx, (size_t)sec->shdr->sh_info, obj->filename);
-				return -EINVAL;
-			}
-			link_sec = &obj->secs[sec->shdr->sh_info];
+	if (sec->shdr->sh_entsize != sizeof(Elf64_Sym))
+		return -EINVAL;
+	if (sec->shdr->sh_size % sec->shdr->sh_entsize != 0)
+		return -EINVAL;
+
+	if (!sec->shdr->sh_link || sec->shdr->sh_link >= obj->sec_cnt) {
+		pr_warn("ELF SYMTAB section #%zu points to missing STRTAB section #%zu in %s\n",
+			sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
+		return -EINVAL;
+	}
+	link_sec = &obj->secs[sec->shdr->sh_link];
+	if (link_sec->shdr->sh_type != SHT_STRTAB) {
+		pr_warn("ELF SYMTAB section #%zu points to invalid STRTAB section #%zu in %s\n",
+			sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
+		return -EINVAL;
+	}
 
-			/* .rel<secname> -> <secname> pattern is followed */
-			if (strncmp(sec->sec_name, ".rel", sizeof(".rel") - 1) != 0
-			    || strcmp(sec->sec_name + sizeof(".rel") - 1, link_sec->sec_name) != 0) {
-				pr_warn("ELF relo section #%zu name has invalid name in %s\n",
-					sec->sec_idx, obj->filename);
+	n = sec->shdr->sh_size / sec->shdr->sh_entsize;
+	sym = sec->data->d_buf;
+	for (i = 0; i < n; i++, sym++) {
+		if (sym->st_shndx
+		    && sym->st_shndx < SHN_LORESERVE
+		    && sym->st_shndx >= obj->sec_cnt) {
+			pr_warn("ELF sym #%d in section #%zu points to missing section #%zu in %s\n",
+				i, sec->sec_idx, (size_t)sym->st_shndx, obj->filename);
+			return -EINVAL;
+		}
+		if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION) {
+			if (sym->st_value != 0)
 				return -EINVAL;
-			}
+			continue;
+		}
+	}
 
-			/* don't further validate relocations for ignored sections */
-			if (link_sec->skipped)
-				break;
+	return 0;
+}
 
-			/* relocatable section is data or instructions */
-			if (link_sec->shdr->sh_type != SHT_PROGBITS
-			    && link_sec->shdr->sh_type != SHT_NOBITS) {
-				pr_warn("ELF relo section #%zu points to invalid section #%zu in %s\n",
-					sec->sec_idx, (size_t)sec->shdr->sh_info, obj->filename);
-				return -EINVAL;
-			}
+static int linker_sanity_check_elf_relos(struct src_obj *obj, struct src_sec *sec)
+{
+	struct src_sec *link_sec, *sym_sec;
+	Elf64_Rel *relo;
+	int i, n;
 
-			/* check sanity of each relocation */
-			n = sec->shdr->sh_size / sec->shdr->sh_entsize;
-			relo = sec->data->d_buf;
-			sym_sec = &obj->secs[obj->symtab_sec_idx];
-			for (j = 0; j < n; j++, relo++) {
-				size_t sym_idx = ELF64_R_SYM(relo->r_info);
-				size_t sym_type = ELF64_R_TYPE(relo->r_info);
-
-				if (sym_type != R_BPF_64_64 && sym_type != R_BPF_64_32) {
-					pr_warn("ELF relo #%d in section #%zu has unexpected type %zu in %s\n",
-						j, sec->sec_idx, sym_type, obj->filename);
-					return -EINVAL;
-				}
+	if (sec->shdr->sh_entsize != sizeof(Elf64_Rel))
+		return -EINVAL;
+	if (sec->shdr->sh_size % sec->shdr->sh_entsize != 0)
+		return -EINVAL;
 
-				if (!sym_idx || sym_idx * sizeof(Elf64_Sym) >= sym_sec->shdr->sh_size) {
-					pr_warn("ELF relo #%d in section #%zu points to invalid symbol #%zu in %s\n",
-						j, sec->sec_idx, sym_idx, obj->filename);
-					return -EINVAL;
-				}
+	/* SHT_REL's sh_link should point to SYMTAB */
+	if (sec->shdr->sh_link != obj->symtab_sec_idx) {
+		pr_warn("ELF relo section #%zu points to invalid SYMTAB section #%zu in %s\n",
+			sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
+		return -EINVAL;
+	}
 
-				if (link_sec->shdr->sh_flags & SHF_EXECINSTR) {
-					if (relo->r_offset % sizeof(struct bpf_insn) != 0) {
-						pr_warn("ELF relo #%d in section #%zu points to missing symbol #%zu in %s\n",
-							j, sec->sec_idx, sym_idx, obj->filename);
-						return -EINVAL;
-					}
-				}
-			}
-			break;
+	/* SHT_REL's sh_info points to relocated section */
+	if (!sec->shdr->sh_info || sec->shdr->sh_info >= obj->sec_cnt) {
+		pr_warn("ELF relo section #%zu points to missing section #%zu in %s\n",
+			sec->sec_idx, (size_t)sec->shdr->sh_info, obj->filename);
+		return -EINVAL;
+	}
+	link_sec = &obj->secs[sec->shdr->sh_info];
+
+	/* .rel<secname> -> <secname> pattern is followed */
+	if (strncmp(sec->sec_name, ".rel", sizeof(".rel") - 1) != 0
+	    || strcmp(sec->sec_name + sizeof(".rel") - 1, link_sec->sec_name) != 0) {
+		pr_warn("ELF relo section #%zu name has invalid name in %s\n",
+			sec->sec_idx, obj->filename);
+		return -EINVAL;
+	}
+
+	/* don't further validate relocations for ignored sections */
+	if (link_sec->skipped)
+		return 0;
+
+	/* relocatable section is data or instructions */
+	if (link_sec->shdr->sh_type != SHT_PROGBITS && link_sec->shdr->sh_type != SHT_NOBITS) {
+		pr_warn("ELF relo section #%zu points to invalid section #%zu in %s\n",
+			sec->sec_idx, (size_t)sec->shdr->sh_info, obj->filename);
+		return -EINVAL;
+	}
+
+	/* check sanity of each relocation */
+	n = sec->shdr->sh_size / sec->shdr->sh_entsize;
+	relo = sec->data->d_buf;
+	sym_sec = &obj->secs[obj->symtab_sec_idx];
+	for (i = 0; i < n; i++, relo++) {
+		size_t sym_idx = ELF64_R_SYM(relo->r_info);
+		size_t sym_type = ELF64_R_TYPE(relo->r_info);
+
+		if (sym_type != R_BPF_64_64 && sym_type != R_BPF_64_32) {
+			pr_warn("ELF relo #%d in section #%zu has unexpected type %zu in %s\n",
+				i, sec->sec_idx, sym_type, obj->filename);
+			return -EINVAL;
 		}
-		case SHT_LLVM_ADDRSIG:
-			break;
-		default:
-			pr_warn("ELF section #%zu (%s) has unrecognized type %zu in %s\n",
-				sec->sec_idx, sec->sec_name, (size_t)sec->shdr->sh_type, obj->filename);
+
+		if (!sym_idx || sym_idx * sizeof(Elf64_Sym) >= sym_sec->shdr->sh_size) {
+			pr_warn("ELF relo #%d in section #%zu points to invalid symbol #%zu in %s\n",
+				i, sec->sec_idx, sym_idx, obj->filename);
 			return -EINVAL;
 		}
+
+		if (link_sec->shdr->sh_flags & SHF_EXECINSTR) {
+			if (relo->r_offset % sizeof(struct bpf_insn) != 0) {
+				pr_warn("ELF relo #%d in section #%zu points to missing symbol #%zu in %s\n",
+					i, sec->sec_idx, sym_idx, obj->filename);
+				return -EINVAL;
+			}
+		}
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 42869d28527695a75346c988ceeedbba7e3880b7 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 23 Apr 2021 11:13:38 -0700
Subject: libbpf: Make few internal helpers available outside of libbpf.c

Make skip_mods_and_typedefs(), btf_kind_str(), and btf_func_linkage() helpers
available outside of libbpf.c, to be used by static linker code.

Also do few cleanups (error code fixes, comment clean up, etc) that don't
deserve their own commit.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210423181348.1801389-9-andrii@kernel.org
---
 tools/lib/bpf/libbpf.c          | 13 +++----------
 tools/lib/bpf/libbpf_internal.h |  7 +++++++
 tools/lib/bpf/linker.c          | 14 ++++++--------
 3 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 4f8335fc0bc9..a1cddd17af7d 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -69,8 +69,6 @@
 #define __printf(a, b)	__attribute__((format(printf, a, b)))
 
 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
-static const struct btf_type *
-skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id);
 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
 
 static int __base_pr(enum libbpf_print_level level, const char *format,
@@ -1906,7 +1904,7 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
 	return 0;
 }
 
-static const struct btf_type *
+const struct btf_type *
 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
 {
 	const struct btf_type *t = btf__type_by_id(btf, id);
@@ -1961,16 +1959,11 @@ static const char *__btf_kind_str(__u16 kind)
 	}
 }
 
-static const char *btf_kind_str(const struct btf_type *t)
+const char *btf_kind_str(const struct btf_type *t)
 {
 	return __btf_kind_str(btf_kind(t));
 }
 
-static enum btf_func_linkage btf_func_linkage(const struct btf_type *t)
-{
-	return (enum btf_func_linkage)BTF_INFO_VLEN(t->info);
-}
-
 /*
  * Fetch integer attribute of BTF map definition. Such attributes are
  * represented using a pointer to an array, in which dimensionality of array
@@ -7036,7 +7029,7 @@ static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id
 	return false;
 }
 
-static int bpf_object__sanitize_prog(struct bpf_object* obj, struct bpf_program *prog)
+static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog)
 {
 	struct bpf_insn *insn = prog->insns;
 	enum bpf_func_id func_id;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 17883073710c..ee426226928f 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -132,6 +132,13 @@ struct btf;
 struct btf_type;
 
 struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id);
+const char *btf_kind_str(const struct btf_type *t);
+const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id);
+
+static inline enum btf_func_linkage btf_func_linkage(const struct btf_type *t)
+{
+	return (enum btf_func_linkage)(int)btf_vlen(t);
+}
 
 static inline __u32 btf_type_info(int kind, int vlen, int kflag)
 {
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 0bb927226370..1263641e8b97 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -123,9 +123,7 @@ struct bpf_linker {
 };
 
 #define pr_warn_elf(fmt, ...)									\
-do {												\
-	libbpf_print(LIBBPF_WARN, "libbpf: " fmt ": %s\n", ##__VA_ARGS__, elf_errmsg(-1));	\
-} while (0)
+	libbpf_print(LIBBPF_WARN, "libbpf: " fmt ": %s\n", ##__VA_ARGS__, elf_errmsg(-1))
 
 static int init_output_elf(struct bpf_linker *linker, const char *file);
 
@@ -284,7 +282,7 @@ static int init_output_elf(struct bpf_linker *linker, const char *file)
 
 	/* ELF header */
 	linker->elf_hdr = elf64_newehdr(linker->elf);
-	if (!linker->elf_hdr){
+	if (!linker->elf_hdr) {
 		pr_warn_elf("failed to create ELF header");
 		return -EINVAL;
 	}
@@ -925,13 +923,13 @@ static int init_sec(struct bpf_linker *linker, struct dst_sec *dst_sec, struct s
 
 	scn = elf_newscn(linker->elf);
 	if (!scn)
-		return -1;
+		return -ENOMEM;
 	data = elf_newdata(scn);
 	if (!data)
-		return -1;
+		return -ENOMEM;
 	shdr = elf64_getshdr(scn);
 	if (!shdr)
-		return -1;
+		return -ENOMEM;
 
 	dst_sec->scn = scn;
 	dst_sec->shdr = shdr;
@@ -1221,7 +1219,7 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob
 				return err;
 			}
 		} else if (!secs_match(dst_sec, src_sec)) {
-			pr_warn("Secs %s are not compatible\n", src_sec->sec_name);
+			pr_warn("sections %s are not compatible\n", src_sec->sec_name);
 			return -1;
 		}
 
-- 
cgit v1.2.3


From 386b1d241e1b975a239d33be836bc183a52ab18c Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 23 Apr 2021 11:13:39 -0700
Subject: libbpf: Extend sanity checking ELF symbols with externs validation

Add logic to validate extern symbols, plus some other minor extra checks, like
ELF symbol #0 validation, general symbol visibility and binding validations.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210423181348.1801389-10-andrii@kernel.org
---
 tools/lib/bpf/linker.c | 49 ++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 40 insertions(+), 9 deletions(-)

diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 1263641e8b97..b0e038480300 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -750,14 +750,45 @@ static int linker_sanity_check_elf_symtab(struct src_obj *obj, struct src_sec *s
 	n = sec->shdr->sh_size / sec->shdr->sh_entsize;
 	sym = sec->data->d_buf;
 	for (i = 0; i < n; i++, sym++) {
-		if (sym->st_shndx
-		    && sym->st_shndx < SHN_LORESERVE
-		    && sym->st_shndx >= obj->sec_cnt) {
+		int sym_type = ELF64_ST_TYPE(sym->st_info);
+		int sym_bind = ELF64_ST_BIND(sym->st_info);
+		int sym_vis = ELF64_ST_VISIBILITY(sym->st_other);
+
+		if (i == 0) {
+			if (sym->st_name != 0 || sym->st_info != 0
+			    || sym->st_other != 0 || sym->st_shndx != 0
+			    || sym->st_value != 0 || sym->st_size != 0) {
+				pr_warn("ELF sym #0 is invalid in %s\n", obj->filename);
+				return -EINVAL;
+			}
+			continue;
+		}
+		if (sym_bind != STB_LOCAL && sym_bind != STB_GLOBAL && sym_bind != STB_WEAK) {
+			pr_warn("ELF sym #%d in section #%zu has unsupported symbol binding %d\n",
+				i, sec->sec_idx, sym_bind);
+			return -EINVAL;
+		}
+		if (sym_vis != STV_DEFAULT && sym_vis != STV_HIDDEN) {
+			pr_warn("ELF sym #%d in section #%zu has unsupported symbol visibility %d\n",
+				i, sec->sec_idx, sym_vis);
+			return -EINVAL;
+		}
+		if (sym->st_shndx == 0) {
+			if (sym_type != STT_NOTYPE || sym_bind == STB_LOCAL
+			    || sym->st_value != 0 || sym->st_size != 0) {
+				pr_warn("ELF sym #%d is invalid extern symbol in %s\n",
+					i, obj->filename);
+
+				return -EINVAL;
+			}
+			continue;
+		}
+		if (sym->st_shndx < SHN_LORESERVE && sym->st_shndx >= obj->sec_cnt) {
 			pr_warn("ELF sym #%d in section #%zu points to missing section #%zu in %s\n",
 				i, sec->sec_idx, (size_t)sym->st_shndx, obj->filename);
 			return -EINVAL;
 		}
-		if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION) {
+		if (sym_type == STT_SECTION) {
 			if (sym->st_value != 0)
 				return -EINVAL;
 			continue;
@@ -1135,16 +1166,16 @@ static int linker_append_elf_syms(struct bpf_linker *linker, struct src_obj *obj
 		size_t dst_sym_idx;
 		int name_off;
 
-		/* we already have all-zero initial symbol */
-		if (sym->st_name == 0 && sym->st_info == 0 &&
-		    sym->st_other == 0 && sym->st_shndx == SHN_UNDEF &&
-		    sym->st_value == 0 && sym->st_size ==0)
+		/* We already validated all-zero symbol #0 and we already
+		 * appended it preventively to the final SYMTAB, so skip it.
+		 */
+		if (i == 0)
 			continue;
 
 		sym_name = elf_strptr(obj->elf, str_sec_idx, sym->st_name);
 		if (!sym_name) {
 			pr_warn("can't fetch symbol name for symbol #%d in '%s'\n", i, obj->filename);
-			return -1;
+			return -EINVAL;
 		}
 
 		if (sym->st_shndx && sym->st_shndx < SHN_LORESERVE) {
-- 
cgit v1.2.3


From 83a157279f2125ce1c4d6d93750440853746dff0 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 23 Apr 2021 11:13:40 -0700
Subject: libbpf: Tighten BTF type ID rewriting with error checking

It should never fail, but if it does, it's better to know about this rather
than end up with nonsensical type IDs.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210423181348.1801389-11-andrii@kernel.org
---
 tools/lib/bpf/linker.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index b0e038480300..5505c85e8b7b 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -1429,6 +1429,13 @@ static int linker_fixup_btf(struct src_obj *obj)
 static int remap_type_id(__u32 *type_id, void *ctx)
 {
 	int *id_map = ctx;
+	int new_id = id_map[*type_id];
+
+	/* Error out if the type wasn't remapped. Ignore VOID which stays VOID. */
+	if (new_id == 0 && *type_id != 0) {
+		pr_warn("failed to find new ID mapping for original BTF type ID %u\n", *type_id);
+		return -EINVAL;
+	}
 
 	*type_id = id_map[*type_id];
 
-- 
cgit v1.2.3


From a46349227cd832b33c12f74b85712ea67de3c6c4 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 23 Apr 2021 11:13:41 -0700
Subject: libbpf: Add linker extern resolution support for functions and global
 variables

Add BPF static linker logic to resolve extern variables and functions across
multiple linked together BPF object files.

For that, linker maintains a separate list of struct glob_sym structures,
which keeps track of few pieces of metadata (is it extern or resolved global,
is it a weak symbol, which ELF section it belongs to, etc) and ties together
BTF type info and ELF symbol information and keeps them in sync.

With adding support for extern variables/funcs, it's now possible for some
sections to contain both extern and non-extern definitions. This means that
some sections may start out as ephemeral (if only externs are present and thus
there is not corresponding ELF section), but will be "upgraded" to actual ELF
section as symbols are resolved or new non-extern definitions are appended.

Additional care is taken to not duplicate extern entries in sections like
.kconfig and .ksyms.

Given libbpf requires BTF type to always be present for .kconfig/.ksym
externs, linker extends this requirement to all the externs, even those that
are supposed to be resolved during static linking and which won't be visible
to libbpf. With BTF information always present, static linker will check not
just ELF symbol matches, but entire BTF type signature match as well. That
logic is stricter that BPF CO-RE checks. It probably should be re-used by
.ksym resolution logic in libbpf as well, but that's left for follow up
patches.

To make it unnecessary to rewrite ELF symbols and minimize BTF type
rewriting/removal, ELF symbols that correspond to externs initially will be
updated in place once they are resolved. Similarly for BTF type info, VAR/FUNC
and var_secinfo's (sec_vars in struct bpf_linker) are staying stable, but
types they point to might get replaced when extern is resolved. This might
leave some left-over types (even though we try to minimize this for common
cases of having extern funcs with not argument names vs concrete function with
names properly specified). That can be addresses later with a generic BTF
garbage collection. That's left for a follow up as well.

Given BTF type appending phase is separate from ELF symbol
appending/resolution, special struct glob_sym->underlying_btf_id variable is
used to communicate resolution and rewrite decisions. 0 means
underlying_btf_id needs to be appended (it's not yet in final linker->btf), <0
values are used for temporary storage of source BTF type ID (not yet
rewritten), so -glob_sym->underlying_btf_id is BTF type id in obj-btf. But by
the end of linker_append_btf() phase, that underlying_btf_id will be remapped
and will always be > 0. This is the uglies part of the whole process, but
keeps the other parts much simpler due to stability of sec_var and VAR/FUNC
types, as well as ELF symbol, so please keep that in mind while reviewing.

BTF-defined maps require some extra custom logic and is addressed separate in
the next patch, so that to keep this one smaller and easier to review.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210423181348.1801389-12-andrii@kernel.org
---
 tools/lib/bpf/linker.c | 849 +++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 790 insertions(+), 59 deletions(-)

diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 5505c85e8b7b..23f49945dc7a 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -22,6 +22,8 @@
 #include "libbpf_internal.h"
 #include "strset.h"
 
+#define BTF_EXTERN_SEC ".extern"
+
 struct src_sec {
 	const char *sec_name;
 	/* positional (not necessarily ELF) index in an array of sections */
@@ -74,11 +76,36 @@ struct btf_ext_sec_data {
 	void *recs;
 };
 
+struct glob_sym {
+	/* ELF symbol index */
+	int sym_idx;
+	/* associated section id for .ksyms, .kconfig, etc, but not .extern */
+	int sec_id;
+	/* extern name offset in STRTAB */
+	int name_off;
+	/* optional associated BTF type ID */
+	int btf_id;
+	/* BTF type ID to which VAR/FUNC type is pointing to; used for
+	 * rewriting types when extern VAR/FUNC is resolved to a concrete
+	 * definition
+	 */
+	int underlying_btf_id;
+	/* sec_var index in the corresponding dst_sec, if exists */
+	int var_idx;
+
+	/* extern or resolved/global symbol */
+	bool is_extern;
+	/* weak or strong symbol, never goes back from strong to weak */
+	bool is_weak;
+};
+
 struct dst_sec {
 	char *sec_name;
 	/* positional (not necessarily ELF) index in an array of sections */
 	int id;
 
+	bool ephemeral;
+
 	/* ELF info */
 	size_t sec_idx;
 	Elf_Scn *scn;
@@ -120,6 +147,10 @@ struct bpf_linker {
 
 	struct btf *btf;
 	struct btf_ext *btf_ext;
+
+	/* global (including extern) ELF symbols */
+	int glob_sym_cnt;
+	struct glob_sym *glob_syms;
 };
 
 #define pr_warn_elf(fmt, ...)									\
@@ -136,6 +167,8 @@ static int linker_sanity_check_btf_ext(struct src_obj *obj);
 static int linker_fixup_btf(struct src_obj *obj);
 static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj);
 static int linker_append_elf_syms(struct bpf_linker *linker, struct src_obj *obj);
+static int linker_append_elf_sym(struct bpf_linker *linker, struct src_obj *obj,
+				 Elf64_Sym *sym, const char *sym_name, int src_sym_idx);
 static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *obj);
 static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj);
 static int linker_append_btf_ext(struct bpf_linker *linker, struct src_obj *obj);
@@ -947,6 +980,7 @@ static int init_sec(struct bpf_linker *linker, struct dst_sec *dst_sec, struct s
 
 	dst_sec->sec_sz = 0;
 	dst_sec->sec_idx = 0;
+	dst_sec->ephemeral = src_sec->ephemeral;
 
 	/* ephemeral sections are just thin section shells lacking most parts */
 	if (src_sec->ephemeral)
@@ -1010,6 +1044,9 @@ static struct dst_sec *find_dst_sec_by_name(struct bpf_linker *linker, const cha
 
 static bool secs_match(struct dst_sec *dst, struct src_sec *src)
 {
+	if (dst->ephemeral || src->ephemeral)
+		return true;
+
 	if (dst->shdr->sh_type != src->shdr->sh_type) {
 		pr_warn("sec %s types mismatch\n", dst->sec_name);
 		return false;
@@ -1035,13 +1072,33 @@ static bool sec_content_is_same(struct dst_sec *dst_sec, struct src_sec *src_sec
 	return true;
 }
 
-static int extend_sec(struct dst_sec *dst, struct src_sec *src)
+static int extend_sec(struct bpf_linker *linker, struct dst_sec *dst, struct src_sec *src)
 {
 	void *tmp;
-	size_t dst_align = dst->shdr->sh_addralign;
-	size_t src_align = src->shdr->sh_addralign;
+	size_t dst_align, src_align;
 	size_t dst_align_sz, dst_final_sz;
+	int err;
+
+	/* Ephemeral source section doesn't contribute anything to ELF
+	 * section data.
+	 */
+	if (src->ephemeral)
+		return 0;
+
+	/* Some sections (like .maps) can contain both externs (and thus be
+	 * ephemeral) and non-externs (map definitions). So it's possible that
+	 * it has to be "upgraded" from ephemeral to non-ephemeral when the
+	 * first non-ephemeral entity appears. In such case, we add ELF
+	 * section, data, etc.
+	 */
+	if (dst->ephemeral) {
+		err = init_sec(linker, dst, src);
+		if (err)
+			return err;
+	}
 
+	dst_align = dst->shdr->sh_addralign;
+	src_align = src->shdr->sh_addralign;
 	if (dst_align == 0)
 		dst_align = 1;
 	if (dst_align < src_align)
@@ -1137,10 +1194,7 @@ static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj
 		/* record mapped section index */
 		src_sec->dst_id = dst_sec->id;
 
-		if (src_sec->ephemeral)
-			continue;
-
-		err = extend_sec(dst_sec, src_sec);
+		err = extend_sec(linker, dst_sec, src_sec);
 		if (err)
 			return err;
 	}
@@ -1151,21 +1205,16 @@ static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj
 static int linker_append_elf_syms(struct bpf_linker *linker, struct src_obj *obj)
 {
 	struct src_sec *symtab = &obj->secs[obj->symtab_sec_idx];
-	Elf64_Sym *sym = symtab->data->d_buf, *dst_sym;
-	int i, n = symtab->shdr->sh_size / symtab->shdr->sh_entsize;
+	Elf64_Sym *sym = symtab->data->d_buf;
+	int i, n = symtab->shdr->sh_size / symtab->shdr->sh_entsize, err;
 	int str_sec_idx = symtab->shdr->sh_link;
+	const char *sym_name;
 
 	obj->sym_map = calloc(n + 1, sizeof(*obj->sym_map));
 	if (!obj->sym_map)
 		return -ENOMEM;
 
 	for (i = 0; i < n; i++, sym++) {
-		struct src_sec *src_sec = NULL;
-		struct dst_sec *dst_sec = NULL;
-		const char *sym_name;
-		size_t dst_sym_idx;
-		int name_off;
-
 		/* We already validated all-zero symbol #0 and we already
 		 * appended it preventively to the final SYMTAB, so skip it.
 		 */
@@ -1178,41 +1227,624 @@ static int linker_append_elf_syms(struct bpf_linker *linker, struct src_obj *obj
 			return -EINVAL;
 		}
 
-		if (sym->st_shndx && sym->st_shndx < SHN_LORESERVE) {
-			src_sec = &obj->secs[sym->st_shndx];
-			if (src_sec->skipped)
+		err = linker_append_elf_sym(linker, obj, sym, sym_name, i);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static Elf64_Sym *get_sym_by_idx(struct bpf_linker *linker, size_t sym_idx)
+{
+	struct dst_sec *symtab = &linker->secs[linker->symtab_sec_idx];
+	Elf64_Sym *syms = symtab->raw_data;
+
+	return &syms[sym_idx];
+}
+
+static struct glob_sym *find_glob_sym(struct bpf_linker *linker, const char *sym_name)
+{
+	struct glob_sym *glob_sym;
+	const char *name;
+	int i;
+
+	for (i = 0; i < linker->glob_sym_cnt; i++) {
+		glob_sym = &linker->glob_syms[i];
+		name = strset__data(linker->strtab_strs) + glob_sym->name_off;
+
+		if (strcmp(name, sym_name) == 0)
+			return glob_sym;
+	}
+
+	return NULL;
+}
+
+static struct glob_sym *add_glob_sym(struct bpf_linker *linker)
+{
+	struct glob_sym *syms, *sym;
+
+	syms = libbpf_reallocarray(linker->glob_syms, linker->glob_sym_cnt + 1,
+				   sizeof(*linker->glob_syms));
+	if (!syms)
+		return NULL;
+
+	sym = &syms[linker->glob_sym_cnt];
+	memset(sym, 0, sizeof(*sym));
+	sym->var_idx = -1;
+
+	linker->glob_syms = syms;
+	linker->glob_sym_cnt++;
+
+	return sym;
+}
+
+static bool glob_sym_btf_matches(const char *sym_name, bool exact,
+				 const struct btf *btf1, __u32 id1,
+				 const struct btf *btf2, __u32 id2)
+{
+	const struct btf_type *t1, *t2;
+	bool is_static1, is_static2;
+	const char *n1, *n2;
+	int i, n;
+
+recur:
+	n1 = n2 = NULL;
+	t1 = skip_mods_and_typedefs(btf1, id1, &id1);
+	t2 = skip_mods_and_typedefs(btf2, id2, &id2);
+
+	/* check if only one side is FWD, otherwise handle with common logic */
+	if (!exact && btf_is_fwd(t1) != btf_is_fwd(t2)) {
+		n1 = btf__str_by_offset(btf1, t1->name_off);
+		n2 = btf__str_by_offset(btf2, t2->name_off);
+		if (strcmp(n1, n2) != 0) {
+			pr_warn("global '%s': incompatible forward declaration names '%s' and '%s'\n",
+				sym_name, n1, n2);
+			return false;
+		}
+		/* validate if FWD kind matches concrete kind */
+		if (btf_is_fwd(t1)) {
+			if (btf_kflag(t1) && btf_is_union(t2))
+				return true;
+			if (!btf_kflag(t1) && btf_is_struct(t2))
+				return true;
+			pr_warn("global '%s': incompatible %s forward declaration and concrete kind %s\n",
+				sym_name, btf_kflag(t1) ? "union" : "struct", btf_kind_str(t2));
+		} else {
+			if (btf_kflag(t2) && btf_is_union(t1))
+				return true;
+			if (!btf_kflag(t2) && btf_is_struct(t1))
+				return true;
+			pr_warn("global '%s': incompatible %s forward declaration and concrete kind %s\n",
+				sym_name, btf_kflag(t2) ? "union" : "struct", btf_kind_str(t1));
+		}
+		return false;
+	}
+
+	if (btf_kind(t1) != btf_kind(t2)) {
+		pr_warn("global '%s': incompatible BTF kinds %s and %s\n",
+			sym_name, btf_kind_str(t1), btf_kind_str(t2));
+		return false;
+	}
+
+	switch (btf_kind(t1)) {
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION:
+	case BTF_KIND_ENUM:
+	case BTF_KIND_FWD:
+	case BTF_KIND_FUNC:
+	case BTF_KIND_VAR:
+		n1 = btf__str_by_offset(btf1, t1->name_off);
+		n2 = btf__str_by_offset(btf2, t2->name_off);
+		if (strcmp(n1, n2) != 0) {
+			pr_warn("global '%s': incompatible %s names '%s' and '%s'\n",
+				sym_name, btf_kind_str(t1), n1, n2);
+			return false;
+		}
+		break;
+	default:
+		break;
+	}
+
+	switch (btf_kind(t1)) {
+	case BTF_KIND_UNKN: /* void */
+	case BTF_KIND_FWD:
+		return true;
+	case BTF_KIND_INT:
+	case BTF_KIND_FLOAT:
+	case BTF_KIND_ENUM:
+		/* ignore encoding for int and enum values for enum */
+		if (t1->size != t2->size) {
+			pr_warn("global '%s': incompatible %s '%s' size %u and %u\n",
+				sym_name, btf_kind_str(t1), n1, t1->size, t2->size);
+			return false;
+		}
+		return true;
+	case BTF_KIND_PTR:
+		/* just validate overall shape of the referenced type, so no
+		 * contents comparison for struct/union, and allowd fwd vs
+		 * struct/union
+		 */
+		exact = false;
+		id1 = t1->type;
+		id2 = t2->type;
+		goto recur;
+	case BTF_KIND_ARRAY:
+		/* ignore index type and array size */
+		id1 = btf_array(t1)->type;
+		id2 = btf_array(t2)->type;
+		goto recur;
+	case BTF_KIND_FUNC:
+		/* extern and global linkages are compatible */
+		is_static1 = btf_func_linkage(t1) == BTF_FUNC_STATIC;
+		is_static2 = btf_func_linkage(t2) == BTF_FUNC_STATIC;
+		if (is_static1 != is_static2) {
+			pr_warn("global '%s': incompatible func '%s' linkage\n", sym_name, n1);
+			return false;
+		}
+
+		id1 = t1->type;
+		id2 = t2->type;
+		goto recur;
+	case BTF_KIND_VAR:
+		/* extern and global linkages are compatible */
+		is_static1 = btf_var(t1)->linkage == BTF_VAR_STATIC;
+		is_static2 = btf_var(t2)->linkage == BTF_VAR_STATIC;
+		if (is_static1 != is_static2) {
+			pr_warn("global '%s': incompatible var '%s' linkage\n", sym_name, n1);
+			return false;
+		}
+
+		id1 = t1->type;
+		id2 = t2->type;
+		goto recur;
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION: {
+		const struct btf_member *m1, *m2;
+
+		if (!exact)
+			return true;
+
+		if (btf_vlen(t1) != btf_vlen(t2)) {
+			pr_warn("global '%s': incompatible number of %s fields %u and %u\n",
+				sym_name, btf_kind_str(t1), btf_vlen(t1), btf_vlen(t2));
+			return false;
+		}
+
+		n = btf_vlen(t1);
+		m1 = btf_members(t1);
+		m2 = btf_members(t2);
+		for (i = 0; i < n; i++, m1++, m2++) {
+			n1 = btf__str_by_offset(btf1, m1->name_off);
+			n2 = btf__str_by_offset(btf2, m2->name_off);
+			if (strcmp(n1, n2) != 0) {
+				pr_warn("global '%s': incompatible field #%d names '%s' and '%s'\n",
+					sym_name, i, n1, n2);
+				return false;
+			}
+			if (m1->offset != m2->offset) {
+				pr_warn("global '%s': incompatible field #%d ('%s') offsets\n",
+					sym_name, i, n1);
+				return false;
+			}
+			if (!glob_sym_btf_matches(sym_name, exact, btf1, m1->type, btf2, m2->type))
+				return false;
+		}
+
+		return true;
+	}
+	case BTF_KIND_FUNC_PROTO: {
+		const struct btf_param *m1, *m2;
+
+		if (btf_vlen(t1) != btf_vlen(t2)) {
+			pr_warn("global '%s': incompatible number of %s params %u and %u\n",
+				sym_name, btf_kind_str(t1), btf_vlen(t1), btf_vlen(t2));
+			return false;
+		}
+
+		n = btf_vlen(t1);
+		m1 = btf_params(t1);
+		m2 = btf_params(t2);
+		for (i = 0; i < n; i++, m1++, m2++) {
+			/* ignore func arg names */
+			if (!glob_sym_btf_matches(sym_name, exact, btf1, m1->type, btf2, m2->type))
+				return false;
+		}
+
+		/* now check return type as well */
+		id1 = t1->type;
+		id2 = t2->type;
+		goto recur;
+	}
+
+	/* skip_mods_and_typedefs() make this impossible */
+	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_CONST:
+	case BTF_KIND_RESTRICT:
+	/* DATASECs are never compared with each other */
+	case BTF_KIND_DATASEC:
+	default:
+		pr_warn("global '%s': unsupported BTF kind %s\n",
+			sym_name, btf_kind_str(t1));
+		return false;
+	}
+}
+
+static bool glob_syms_match(const char *sym_name,
+			    struct bpf_linker *linker, struct glob_sym *glob_sym,
+			    struct src_obj *obj, Elf64_Sym *sym, size_t sym_idx, int btf_id)
+{
+	const struct btf_type *src_t;
+
+	/* if we are dealing with externs, BTF types describing both global
+	 * and extern VARs/FUNCs should be completely present in all files
+	 */
+	if (!glob_sym->btf_id || !btf_id) {
+		pr_warn("BTF info is missing for global symbol '%s'\n", sym_name);
+		return false;
+	}
+
+	src_t = btf__type_by_id(obj->btf, btf_id);
+	if (!btf_is_var(src_t) && !btf_is_func(src_t)) {
+		pr_warn("only extern variables and functions are supported, but got '%s' for '%s'\n",
+			btf_kind_str(src_t), sym_name);
+		return false;
+	}
+
+	if (!glob_sym_btf_matches(sym_name, true /*exact*/,
+				  linker->btf, glob_sym->btf_id, obj->btf, btf_id))
+		return false;
+
+	return true;
+}
+
+static bool btf_is_non_static(const struct btf_type *t)
+{
+	return (btf_is_var(t) && btf_var(t)->linkage != BTF_VAR_STATIC)
+	       || (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_STATIC);
+}
+
+static int find_glob_sym_btf(struct src_obj *obj, Elf64_Sym *sym, const char *sym_name,
+			     int *out_btf_sec_id, int *out_btf_id)
+{
+	int i, j, n = btf__get_nr_types(obj->btf), m, btf_id = 0;
+	const struct btf_type *t;
+	const struct btf_var_secinfo *vi;
+	const char *name;
+
+	for (i = 1; i <= n; i++) {
+		t = btf__type_by_id(obj->btf, i);
+
+		/* some global and extern FUNCs and VARs might not be associated with any
+		 * DATASEC, so try to detect them in the same pass
+		 */
+		if (btf_is_non_static(t)) {
+			name = btf__str_by_offset(obj->btf, t->name_off);
+			if (strcmp(name, sym_name) != 0)
 				continue;
-			dst_sec = &linker->secs[src_sec->dst_id];
 
-			/* allow only one STT_SECTION symbol per section */
-			if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && dst_sec->sec_sym_idx) {
-				obj->sym_map[i] = dst_sec->sec_sym_idx;
+			/* remember and still try to find DATASEC */
+			btf_id = i;
+			continue;
+		}
+
+		if (!btf_is_datasec(t))
+			continue;
+
+		vi = btf_var_secinfos(t);
+		for (j = 0, m = btf_vlen(t); j < m; j++, vi++) {
+			t = btf__type_by_id(obj->btf, vi->type);
+			name = btf__str_by_offset(obj->btf, t->name_off);
+
+			if (strcmp(name, sym_name) != 0)
+				continue;
+			if (btf_is_var(t) && btf_var(t)->linkage == BTF_VAR_STATIC)
+				continue;
+			if (btf_is_func(t) && btf_func_linkage(t) == BTF_FUNC_STATIC)
 				continue;
+
+			if (btf_id && btf_id != vi->type) {
+				pr_warn("global/extern '%s' BTF is ambiguous: both types #%d and #%u match\n",
+					sym_name, btf_id, vi->type);
+				return -EINVAL;
 			}
+
+			*out_btf_sec_id = i;
+			*out_btf_id = vi->type;
+
+			return 0;
 		}
+	}
 
-		name_off = strset__add_str(linker->strtab_strs, sym_name);
-		if (name_off < 0)
-			return name_off;
+	/* free-floating extern or global FUNC */
+	if (btf_id) {
+		*out_btf_sec_id = 0;
+		*out_btf_id = btf_id;
+		return 0;
+	}
 
-		dst_sym = add_new_sym(linker, &dst_sym_idx);
-		if (!dst_sym)
-			return -ENOMEM;
+	pr_warn("failed to find BTF info for global/extern symbol '%s'\n", sym_name);
+	return -ENOENT;
+}
 
-		dst_sym->st_name = name_off;
-		dst_sym->st_info = sym->st_info;
-		dst_sym->st_other = sym->st_other;
-		dst_sym->st_shndx = src_sec ? dst_sec->sec_idx : sym->st_shndx;
-		dst_sym->st_value = (src_sec ? src_sec->dst_off : 0) + sym->st_value;
-		dst_sym->st_size = sym->st_size;
+static struct src_sec *find_src_sec_by_name(struct src_obj *obj, const char *sec_name)
+{
+	struct src_sec *sec;
+	int i;
 
-		obj->sym_map[i] = dst_sym_idx;
+	for (i = 1; i < obj->sec_cnt; i++) {
+		sec = &obj->secs[i];
 
-		if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && dst_sym) {
-			dst_sec->sec_sym_idx = dst_sym_idx;
-			dst_sym->st_value = 0;
+		if (strcmp(sec->sec_name, sec_name) == 0)
+			return sec;
+	}
+
+	return NULL;
+}
+
+static int complete_extern_btf_info(struct btf *dst_btf, int dst_id,
+				    struct btf *src_btf, int src_id)
+{
+	struct btf_type *dst_t = btf_type_by_id(dst_btf, dst_id);
+	struct btf_type *src_t = btf_type_by_id(src_btf, src_id);
+	struct btf_param *src_p, *dst_p;
+	const char *s;
+	int i, n, off;
+
+	/* We already made sure that source and destination types (FUNC or
+	 * VAR) match in terms of types and argument names.
+	 */
+	if (btf_is_var(dst_t)) {
+		btf_var(dst_t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+		return 0;
+	}
+
+	dst_t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_GLOBAL, 0);
+
+	/* now onto FUNC_PROTO types */
+	src_t = btf_type_by_id(src_btf, src_t->type);
+	dst_t = btf_type_by_id(dst_btf, dst_t->type);
+
+	/* Fill in all the argument names, which for extern FUNCs are missing.
+	 * We'll end up with two copies of FUNCs/VARs for externs, but that
+	 * will be taken care of by BTF dedup at the very end.
+	 * It might be that BTF types for extern in one file has less/more BTF
+	 * information (e.g., FWD instead of full STRUCT/UNION information),
+	 * but that should be (in most cases, subject to BTF dedup rules)
+	 * handled and resolved by BTF dedup algorithm as well, so we won't
+	 * worry about it. Our only job is to make sure that argument names
+	 * are populated on both sides, otherwise BTF dedup will pedantically
+	 * consider them different.
+	 */
+	src_p = btf_params(src_t);
+	dst_p = btf_params(dst_t);
+	for (i = 0, n = btf_vlen(dst_t); i < n; i++, src_p++, dst_p++) {
+		if (!src_p->name_off)
+			continue;
+
+		/* src_btf has more complete info, so add name to dst_btf */
+		s = btf__str_by_offset(src_btf, src_p->name_off);
+		off = btf__add_str(dst_btf, s);
+		if (off < 0)
+			return off;
+		dst_p->name_off = off;
+	}
+	return 0;
+}
+
+static void sym_update_bind(Elf64_Sym *sym, int sym_bind)
+{
+	sym->st_info = ELF64_ST_INFO(sym_bind, ELF64_ST_TYPE(sym->st_info));
+}
+
+static void sym_update_type(Elf64_Sym *sym, int sym_type)
+{
+	sym->st_info = ELF64_ST_INFO(ELF64_ST_BIND(sym->st_info), sym_type);
+}
+
+static void sym_update_visibility(Elf64_Sym *sym, int sym_vis)
+{
+	/* libelf doesn't provide setters for ST_VISIBILITY,
+	 * but it is stored in the lower 2 bits of st_other
+	 */
+	sym->st_other &= 0x03;
+	sym->st_other |= sym_vis;
+}
+
+static int linker_append_elf_sym(struct bpf_linker *linker, struct src_obj *obj,
+				 Elf64_Sym *sym, const char *sym_name, int src_sym_idx)
+{
+	struct src_sec *src_sec = NULL;
+	struct dst_sec *dst_sec = NULL;
+	struct glob_sym *glob_sym = NULL;
+	int name_off, sym_type, sym_bind, sym_vis, err;
+	int btf_sec_id = 0, btf_id = 0;
+	size_t dst_sym_idx;
+	Elf64_Sym *dst_sym;
+	bool sym_is_extern;
+
+	sym_type = ELF64_ST_TYPE(sym->st_info);
+	sym_bind = ELF64_ST_BIND(sym->st_info);
+	sym_vis = ELF64_ST_VISIBILITY(sym->st_other);
+	sym_is_extern = sym->st_shndx == SHN_UNDEF;
+
+	if (sym_is_extern) {
+		if (!obj->btf) {
+			pr_warn("externs without BTF info are not supported\n");
+			return -ENOTSUP;
+		}
+	} else if (sym->st_shndx < SHN_LORESERVE) {
+		src_sec = &obj->secs[sym->st_shndx];
+		if (src_sec->skipped)
+			return 0;
+		dst_sec = &linker->secs[src_sec->dst_id];
+
+		/* allow only one STT_SECTION symbol per section */
+		if (sym_type == STT_SECTION && dst_sec->sec_sym_idx) {
+			obj->sym_map[src_sym_idx] = dst_sec->sec_sym_idx;
+			return 0;
+		}
+	}
+
+	if (sym_bind == STB_LOCAL)
+		goto add_sym;
+
+	/* find matching BTF info */
+	err = find_glob_sym_btf(obj, sym, sym_name, &btf_sec_id, &btf_id);
+	if (err)
+		return err;
+
+	if (sym_is_extern && btf_sec_id) {
+		const char *sec_name = NULL;
+		const struct btf_type *t;
+
+		t = btf__type_by_id(obj->btf, btf_sec_id);
+		sec_name = btf__str_by_offset(obj->btf, t->name_off);
+
+		/* Clang puts unannotated extern vars into
+		 * '.extern' BTF DATASEC. Treat them the same
+		 * as unannotated extern funcs (which are
+		 * currently not put into any DATASECs).
+		 * Those don't have associated src_sec/dst_sec.
+		 */
+		if (strcmp(sec_name, BTF_EXTERN_SEC) != 0) {
+			src_sec = find_src_sec_by_name(obj, sec_name);
+			if (!src_sec) {
+				pr_warn("failed to find matching ELF sec '%s'\n", sec_name);
+				return -ENOENT;
+			}
+			dst_sec = &linker->secs[src_sec->dst_id];
+		}
+	}
+
+	glob_sym = find_glob_sym(linker, sym_name);
+	if (glob_sym) {
+		/* Preventively resolve to existing symbol. This is
+		 * needed for further relocation symbol remapping in
+		 * the next step of linking.
+		 */
+		obj->sym_map[src_sym_idx] = glob_sym->sym_idx;
+
+		/* If both symbols are non-externs, at least one of
+		 * them has to be STB_WEAK, otherwise they are in
+		 * a conflict with each other.
+		 */
+		if (!sym_is_extern && !glob_sym->is_extern
+		    && !glob_sym->is_weak && sym_bind != STB_WEAK) {
+			pr_warn("conflicting non-weak symbol #%d (%s) definition in '%s'\n",
+				src_sym_idx, sym_name, obj->filename);
+			return -EINVAL;
+		}
+
+		if (!glob_syms_match(sym_name, linker, glob_sym, obj, sym, src_sym_idx, btf_id))
+			return -EINVAL;
+
+		dst_sym = get_sym_by_idx(linker, glob_sym->sym_idx);
+
+		/* If new symbol is strong, then force dst_sym to be strong as
+		 * well; this way a mix of weak and non-weak extern
+		 * definitions will end up being strong.
+		 */
+		if (sym_bind == STB_GLOBAL) {
+			/* We still need to preserve type (NOTYPE or
+			 * OBJECT/FUNC, depending on whether the symbol is
+			 * extern or not)
+			 */
+			sym_update_bind(dst_sym, STB_GLOBAL);
+			glob_sym->is_weak = false;
 		}
 
+		/* Non-default visibility is "contaminating", with stricter
+		 * visibility overwriting more permissive ones, even if more
+		 * permissive visibility comes from just an extern definition.
+		 * Currently only STV_DEFAULT and STV_HIDDEN are allowed and
+		 * ensured by ELF symbol sanity checks above.
+		 */
+		if (sym_vis > ELF64_ST_VISIBILITY(dst_sym->st_other))
+			sym_update_visibility(dst_sym, sym_vis);
+
+		/* If the new symbol is extern, then regardless if
+		 * existing symbol is extern or resolved global, just
+		 * keep the existing one untouched.
+		 */
+		if (sym_is_extern)
+			return 0;
+
+		/* If existing symbol is a strong resolved symbol, bail out,
+		 * because we lost resolution battle have nothing to
+		 * contribute. We already checked abover that there is no
+		 * strong-strong conflict. We also already tightened binding
+		 * and visibility, so nothing else to contribute at that point.
+		 */
+		if (!glob_sym->is_extern && sym_bind == STB_WEAK)
+			return 0;
+
+		/* At this point, new symbol is strong non-extern,
+		 * so overwrite glob_sym with new symbol information.
+		 * Preserve binding and visibility.
+		 */
+		sym_update_type(dst_sym, sym_type);
+		dst_sym->st_shndx = dst_sec->sec_idx;
+		dst_sym->st_value = src_sec->dst_off + sym->st_value;
+		dst_sym->st_size = sym->st_size;
+
+		/* see comment below about dst_sec->id vs dst_sec->sec_idx */
+		glob_sym->sec_id = dst_sec->id;
+		glob_sym->is_extern = false;
+
+		if (complete_extern_btf_info(linker->btf, glob_sym->btf_id,
+					     obj->btf, btf_id))
+			return -EINVAL;
+
+		/* request updating VAR's/FUNC's underlying BTF type when appending BTF type */
+		glob_sym->underlying_btf_id = 0;
+
+		obj->sym_map[src_sym_idx] = glob_sym->sym_idx;
+		return 0;
+	}
+
+add_sym:
+	name_off = strset__add_str(linker->strtab_strs, sym_name);
+	if (name_off < 0)
+		return name_off;
+
+	dst_sym = add_new_sym(linker, &dst_sym_idx);
+	if (!dst_sym)
+		return -ENOMEM;
+
+	dst_sym->st_name = name_off;
+	dst_sym->st_info = sym->st_info;
+	dst_sym->st_other = sym->st_other;
+	dst_sym->st_shndx = dst_sec ? dst_sec->sec_idx : sym->st_shndx;
+	dst_sym->st_value = (src_sec ? src_sec->dst_off : 0) + sym->st_value;
+	dst_sym->st_size = sym->st_size;
+
+	obj->sym_map[src_sym_idx] = dst_sym_idx;
+
+	if (sym_type == STT_SECTION && dst_sym) {
+		dst_sec->sec_sym_idx = dst_sym_idx;
+		dst_sym->st_value = 0;
+	}
+
+	if (sym_bind != STB_LOCAL) {
+		glob_sym = add_glob_sym(linker);
+		if (!glob_sym)
+			return -ENOMEM;
+
+		glob_sym->sym_idx = dst_sym_idx;
+		/* we use dst_sec->id (and not dst_sec->sec_idx), because
+		 * ephemeral sections (.kconfig, .ksyms, etc) don't have
+		 * sec_idx (as they don't have corresponding ELF section), but
+		 * still have id. .extern doesn't have even ephemeral section
+		 * associated with it, so dst_sec->id == dst_sec->sec_idx == 0.
+		 */
+		glob_sym->sec_id = dst_sec ? dst_sec->id : 0;
+		glob_sym->name_off = name_off;
+		/* we will fill btf_id in during BTF merging step */
+		glob_sym->btf_id = 0;
+		glob_sym->is_extern = sym_is_extern;
+		glob_sym->is_weak = sym_bind == STB_WEAK;
 	}
 
 	return 0;
@@ -1262,7 +1894,7 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob
 		dst_sec->shdr->sh_info = dst_linked_sec->sec_idx;
 
 		src_sec->dst_id = dst_sec->id;
-		err = extend_sec(dst_sec, src_sec);
+		err = extend_sec(linker, dst_sec, src_sec);
 		if (err)
 			return err;
 
@@ -1315,21 +1947,6 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob
 	return 0;
 }
 
-static struct src_sec *find_src_sec_by_name(struct src_obj *obj, const char *sec_name)
-{
-	struct src_sec *sec;
-	int i;
-
-	for (i = 1; i < obj->sec_cnt; i++) {
-		sec = &obj->secs[i];
-
-		if (strcmp(sec->sec_name, sec_name) == 0)
-			return sec;
-	}
-
-	return NULL;
-}
-
 static Elf64_Sym *find_sym_by_name(struct src_obj *obj, size_t sec_idx,
 				   int sym_type, const char *sym_name)
 {
@@ -1384,12 +2001,32 @@ static int linker_fixup_btf(struct src_obj *obj)
 				t->size = sec->shdr->sh_size;
 		} else {
 			/* BTF can have some sections that are not represented
-			 * in ELF, e.g., .kconfig and .ksyms, which are used
-			 * for special extern variables.  Here we'll
-			 * pre-create "section shells" for them to be able to
-			 * keep track of extra per-section metadata later
-			 * (e.g., BTF variables).
+			 * in ELF, e.g., .kconfig, .ksyms, .extern, which are used
+			 * for special extern variables.
+			 *
+			 * For all but one such special (ephemeral)
+			 * sections, we pre-create "section shells" to be able
+			 * to keep track of extra per-section metadata later
+			 * (e.g., those BTF extern variables).
+			 *
+			 * .extern is even more special, though, because it
+			 * contains extern variables that need to be resolved
+			 * by static linker, not libbpf and kernel. When such
+			 * externs are resolved, we are going to remove them
+			 * from .extern BTF section and might end up not
+			 * needing it at all. Each resolved extern should have
+			 * matching non-extern VAR/FUNC in other sections.
+			 *
+			 * We do support leaving some of the externs
+			 * unresolved, though, to support cases of building
+			 * libraries, which will later be linked against final
+			 * BPF applications. So if at finalization we still
+			 * see unresolved externs, we'll create .extern
+			 * section on our own.
 			 */
+			if (strcmp(sec_name, BTF_EXTERN_SEC) == 0)
+				continue;
+
 			sec = add_src_sec(obj, sec_name);
 			if (!sec)
 				return -ENOMEM;
@@ -1446,6 +2083,7 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj)
 {
 	const struct btf_type *t;
 	int i, j, n, start_id, id;
+	const char *name;
 
 	if (!obj->btf)
 		return 0;
@@ -1458,12 +2096,44 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj)
 		return -ENOMEM;
 
 	for (i = 1; i <= n; i++) {
+		struct glob_sym *glob_sym = NULL;
+
 		t = btf__type_by_id(obj->btf, i);
 
 		/* DATASECs are handled specially below */
 		if (btf_kind(t) == BTF_KIND_DATASEC)
 			continue;
 
+		if (btf_is_non_static(t)) {
+			/* there should be glob_sym already */
+			name = btf__str_by_offset(obj->btf, t->name_off);
+			glob_sym = find_glob_sym(linker, name);
+
+			/* VARs without corresponding glob_sym are those that
+			 * belong to skipped/deduplicated sections (i.e.,
+			 * license and version), so just skip them
+			 */
+			if (!glob_sym)
+				continue;
+
+			/* linker_append_elf_sym() might have requested
+			 * updating underlying type ID, if extern was resolved
+			 * to strong symbol or weak got upgraded to non-weak
+			 */
+			if (glob_sym->underlying_btf_id == 0)
+				glob_sym->underlying_btf_id = -t->type;
+
+			/* globals from previous object files that match our
+			 * VAR/FUNC already have a corresponding associated
+			 * BTF type, so just make sure to use it
+			 */
+			if (glob_sym->btf_id) {
+				/* reuse existing BTF type for global var/func */
+				obj->btf_type_map[i] = glob_sym->btf_id;
+				continue;
+			}
+		}
+
 		id = btf__add_type(linker->btf, obj->btf, t);
 		if (id < 0) {
 			pr_warn("failed to append BTF type #%d from file '%s'\n", i, obj->filename);
@@ -1471,6 +2141,12 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj)
 		}
 
 		obj->btf_type_map[i] = id;
+
+		/* record just appended BTF type for var/func */
+		if (glob_sym) {
+			glob_sym->btf_id = id;
+			glob_sym->underlying_btf_id = -t->type;
+		}
 	}
 
 	/* remap all the types except DATASECs */
@@ -1482,6 +2158,22 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj)
 			return -EINVAL;
 	}
 
+	/* Rewrite VAR/FUNC underlying types (i.e., FUNC's FUNC_PROTO and VAR's
+	 * actual type), if necessary
+	 */
+	for (i = 0; i < linker->glob_sym_cnt; i++) {
+		struct glob_sym *glob_sym = &linker->glob_syms[i];
+		struct btf_type *glob_t;
+
+		if (glob_sym->underlying_btf_id >= 0)
+			continue;
+
+		glob_sym->underlying_btf_id = obj->btf_type_map[-glob_sym->underlying_btf_id];
+
+		glob_t = btf_type_by_id(linker->btf, glob_sym->btf_id);
+		glob_t->type = glob_sym->underlying_btf_id;
+	}
+
 	/* append DATASEC info */
 	for (i = 1; i < obj->sec_cnt; i++) {
 		struct src_sec *src_sec;
@@ -1509,6 +2201,42 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj)
 		n = btf_vlen(t);
 		for (j = 0; j < n; j++, src_var++) {
 			void *sec_vars = dst_sec->sec_vars;
+			int new_id = obj->btf_type_map[src_var->type];
+			struct glob_sym *glob_sym = NULL;
+
+			t = btf_type_by_id(linker->btf, new_id);
+			if (btf_is_non_static(t)) {
+				name = btf__str_by_offset(linker->btf, t->name_off);
+				glob_sym = find_glob_sym(linker, name);
+				if (glob_sym->sec_id != dst_sec->id) {
+					pr_warn("global '%s': section mismatch %d vs %d\n",
+						name, glob_sym->sec_id, dst_sec->id);
+					return -EINVAL;
+				}
+			}
+
+			/* If there is already a member (VAR or FUNC) mapped
+			 * to the same type, don't add a duplicate entry.
+			 * This will happen when multiple object files define
+			 * the same extern VARs/FUNCs.
+			 */
+			if (glob_sym && glob_sym->var_idx >= 0) {
+				__s64 sz;
+
+				dst_var = &dst_sec->sec_vars[glob_sym->var_idx];
+				/* Because underlying BTF type might have
+				 * changed, so might its size have changed, so
+				 * re-calculate and update it in sec_var.
+				 */
+				sz = btf__resolve_size(linker->btf, glob_sym->underlying_btf_id);
+				if (sz < 0) {
+					pr_warn("global '%s': failed to resolve size of underlying type: %d\n",
+						name, (int)sz);
+					return -EINVAL;
+				}
+				dst_var->size = sz;
+				continue;
+			}
 
 			sec_vars = libbpf_reallocarray(sec_vars,
 						       dst_sec->sec_var_cnt + 1,
@@ -1523,6 +2251,9 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj)
 			dst_var->type = obj->btf_type_map[src_var->type];
 			dst_var->size = src_var->size;
 			dst_var->offset = src_sec->dst_off + src_var->offset;
+
+			if (glob_sym)
+				glob_sym->var_idx = dst_sec->sec_var_cnt - 1;
 		}
 	}
 
-- 
cgit v1.2.3


From 0a342457b3bd36e6f9b558da3ff520dee35c5363 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 23 Apr 2021 11:13:42 -0700
Subject: libbpf: Support extern resolution for BTF-defined maps in .maps
 section

Add extra logic to handle map externs (only BTF-defined maps are supported for
linking). Re-use the map parsing logic used during bpf_object__open(). Map
externs are currently restricted to always match complete map definition. So
all the specified attributes will be compared (down to pining, map_flags,
numa_node, etc). In the future this restriction might be relaxed with no
backwards compatibility issues. If any attribute is mismatched between extern
and actual map definition, linker will report an error, pointing out which one
mismatches.

The original intent was to allow for extern to specify attributes that matters
(to user) to enforce. E.g., if you specify just key information and omit
value, then any value fits. Similarly, it should have been possible to enforce
map_flags, pinning, and any other possible map attribute. Unfortunately, that
means that multiple externs can be only partially overlapping with each other,
which means linker would need to combine their type definitions to end up with
the most restrictive and fullest map definition. This requires an extra amount
of BTF manipulation which at this time was deemed unnecessary and would
require further extending generic BTF writer APIs. So that is left for future
follow ups, if there will be demand for that. But the idea seems intresting
and useful, so I want to document it here.

Weak definitions are also supported, but are pretty strict as well, just
like externs: all weak map definitions have to match exactly. In the follow up
patches this most probably will be relaxed, with __weak map definitions being
able to differ between each other (with non-weak definition always winning, of
course).

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210423181348.1801389-13-andrii@kernel.org
---
 tools/lib/bpf/linker.c | 132 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 132 insertions(+)

diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 23f49945dc7a..9de084b1c699 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -1471,6 +1471,134 @@ recur:
 	}
 }
 
+static bool map_defs_match(const char *sym_name,
+			   const struct btf *main_btf,
+			   const struct btf_map_def *main_def,
+			   const struct btf_map_def *main_inner_def,
+			   const struct btf *extra_btf,
+			   const struct btf_map_def *extra_def,
+			   const struct btf_map_def *extra_inner_def)
+{
+	const char *reason;
+
+	if (main_def->map_type != extra_def->map_type) {
+		reason = "type";
+		goto mismatch;
+	}
+
+	/* check key type/size match */
+	if (main_def->key_size != extra_def->key_size) {
+		reason = "key_size";
+		goto mismatch;
+	}
+	if (!!main_def->key_type_id != !!extra_def->key_type_id) {
+		reason = "key type";
+		goto mismatch;
+	}
+	if ((main_def->parts & MAP_DEF_KEY_TYPE)
+	     && !glob_sym_btf_matches(sym_name, true /*exact*/,
+				      main_btf, main_def->key_type_id,
+				      extra_btf, extra_def->key_type_id)) {
+		reason = "key type";
+		goto mismatch;
+	}
+
+	/* validate value type/size match */
+	if (main_def->value_size != extra_def->value_size) {
+		reason = "value_size";
+		goto mismatch;
+	}
+	if (!!main_def->value_type_id != !!extra_def->value_type_id) {
+		reason = "value type";
+		goto mismatch;
+	}
+	if ((main_def->parts & MAP_DEF_VALUE_TYPE)
+	     && !glob_sym_btf_matches(sym_name, true /*exact*/,
+				      main_btf, main_def->value_type_id,
+				      extra_btf, extra_def->value_type_id)) {
+		reason = "key type";
+		goto mismatch;
+	}
+
+	if (main_def->max_entries != extra_def->max_entries) {
+		reason = "max_entries";
+		goto mismatch;
+	}
+	if (main_def->map_flags != extra_def->map_flags) {
+		reason = "map_flags";
+		goto mismatch;
+	}
+	if (main_def->numa_node != extra_def->numa_node) {
+		reason = "numa_node";
+		goto mismatch;
+	}
+	if (main_def->pinning != extra_def->pinning) {
+		reason = "pinning";
+		goto mismatch;
+	}
+
+	if ((main_def->parts & MAP_DEF_INNER_MAP) != (extra_def->parts & MAP_DEF_INNER_MAP)) {
+		reason = "inner map";
+		goto mismatch;
+	}
+
+	if (main_def->parts & MAP_DEF_INNER_MAP) {
+		char inner_map_name[128];
+
+		snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", sym_name);
+
+		return map_defs_match(inner_map_name,
+				      main_btf, main_inner_def, NULL,
+				      extra_btf, extra_inner_def, NULL);
+	}
+
+	return true;
+
+mismatch:
+	pr_warn("global '%s': map %s mismatch\n", sym_name, reason);
+	return false;
+}
+
+static bool glob_map_defs_match(const char *sym_name,
+				struct bpf_linker *linker, struct glob_sym *glob_sym,
+				struct src_obj *obj, Elf64_Sym *sym, int btf_id)
+{
+	struct btf_map_def dst_def = {}, dst_inner_def = {};
+	struct btf_map_def src_def = {}, src_inner_def = {};
+	const struct btf_type *t;
+	int err;
+
+	t = btf__type_by_id(obj->btf, btf_id);
+	if (!btf_is_var(t)) {
+		pr_warn("global '%s': invalid map definition type [%d]\n", sym_name, btf_id);
+		return false;
+	}
+	t = skip_mods_and_typedefs(obj->btf, t->type, NULL);
+
+	err = parse_btf_map_def(sym_name, obj->btf, t, true /*strict*/, &src_def, &src_inner_def);
+	if (err) {
+		pr_warn("global '%s': invalid map definition\n", sym_name);
+		return false;
+	}
+
+	/* re-parse existing map definition */
+	t = btf__type_by_id(linker->btf, glob_sym->btf_id);
+	t = skip_mods_and_typedefs(linker->btf, t->type, NULL);
+	err = parse_btf_map_def(sym_name, linker->btf, t, true /*strict*/, &dst_def, &dst_inner_def);
+	if (err) {
+		/* this should not happen, because we already validated it */
+		pr_warn("global '%s': invalid dst map definition\n", sym_name);
+		return false;
+	}
+
+	/* Currently extern map definition has to be complete and match
+	 * concrete map definition exactly. This restriction might be lifted
+	 * in the future.
+	 */
+	return map_defs_match(sym_name, linker->btf, &dst_def, &dst_inner_def,
+			      obj->btf, &src_def, &src_inner_def);
+}
+
 static bool glob_syms_match(const char *sym_name,
 			    struct bpf_linker *linker, struct glob_sym *glob_sym,
 			    struct src_obj *obj, Elf64_Sym *sym, size_t sym_idx, int btf_id)
@@ -1492,6 +1620,10 @@ static bool glob_syms_match(const char *sym_name,
 		return false;
 	}
 
+	/* deal with .maps definitions specially */
+	if (glob_sym->sec_id && strcmp(linker->secs[glob_sym->sec_id].sec_name, MAPS_ELF_SEC) == 0)
+		return glob_map_defs_match(sym_name, linker, glob_sym, obj, sym, btf_id);
+
 	if (!glob_sym_btf_matches(sym_name, true /*exact*/,
 				  linker->btf, glob_sym->btf_id, obj->btf, btf_id))
 		return false;
-- 
cgit v1.2.3


From 41c472e85b531a228067bee9be59a508900bcd9f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 23 Apr 2021 11:13:43 -0700
Subject: selftests/bpf: Use -O0 instead of -Og in selftests builds

While -Og is designed to work well with debugger, it's still inferior to -O0
in terms of debuggability experience. It will cause some variables to still be
inlined, it will also prevent single-stepping some statements and otherwise
interfere with debugging experience. So switch to -O0 which turns off any
optimization and provides the best debugging experience.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210423181348.1801389-14-andrii@kernel.org
---
 tools/testing/selftests/bpf/Makefile | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index c5bcdb3d4b12..dd8061069b08 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -21,7 +21,7 @@ endif
 
 BPF_GCC		?= $(shell command -v bpf-gcc;)
 SAN_CFLAGS	?=
-CFLAGS += -g -Og -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS)		\
+CFLAGS += -g -O0 -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS)		\
 	  -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR)		\
 	  -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT)			\
 	  -Dbpf_prog_load=bpf_prog_test_load				\
@@ -205,7 +205,7 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile)    \
 		    $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool
 	$(Q)$(MAKE) $(submake_extras)  -C $(BPFTOOLDIR)			       \
 		    CC=$(HOSTCC) LD=$(HOSTLD)				       \
-		    EXTRA_CFLAGS='-g -Og'				       \
+		    EXTRA_CFLAGS='-g -O0'				       \
 		    OUTPUT=$(HOST_BUILD_DIR)/bpftool/			       \
 		    prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install
 
@@ -225,7 +225,7 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)		       \
 	   ../../../include/uapi/linux/bpf.h                                   \
 	   | $(INCLUDE_DIR) $(BUILD_DIR)/libbpf
 	$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
-		    EXTRA_CFLAGS='-g -Og'					       \
+		    EXTRA_CFLAGS='-g -O0'				       \
 		    DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
 
 ifneq ($(BPFOBJ),$(HOST_BPFOBJ))
@@ -233,7 +233,7 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)                \
 	   ../../../include/uapi/linux/bpf.h                                   \
 	   | $(INCLUDE_DIR) $(HOST_BUILD_DIR)/libbpf
 	$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR)                             \
-		    EXTRA_CFLAGS='-g -Og'					       \
+		    EXTRA_CFLAGS='-g -O0'				       \
 		    OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \
 		    DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers
 endif
-- 
cgit v1.2.3


From b131aed910097a2eeac8180bf3cf214eea475d9a Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 23 Apr 2021 11:13:44 -0700
Subject: selftests/bpf: Omit skeleton generation for multi-linked BPF object
 files

Skip generating individual BPF skeletons for files that are supposed to be
linked together to form the final BPF object file. Very often such files are
"incomplete" BPF object files, which will fail libbpf bpf_object__open() step,
if used individually, thus failing BPF skeleton generation. This is by design,
so skip individual BPF skeletons and only validate them as part of their
linked final BPF object file and skeleton.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210423181348.1801389-15-andrii@kernel.org
---
 tools/testing/selftests/bpf/Makefile | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index dd8061069b08..dd3692ec56d3 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -313,6 +313,8 @@ LINKED_SKELS := test_static_linked.skel.h
 
 test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o
 
+LINKED_BPF_SRCS := $(patsubst %.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps)))
+
 # Set up extra TRUNNER_XXX "temporary" variables in the environment (relies on
 # $eval()) and pass control to DEFINE_TEST_RUNNER_RULES.
 # Parameters:
@@ -331,7 +333,7 @@ TRUNNER_TESTS_HDR := $(TRUNNER_TESTS_DIR)/tests.h
 TRUNNER_BPF_SRCS := $$(notdir $$(wildcard $(TRUNNER_BPF_PROGS_DIR)/*.c))
 TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS))
 TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h,	\
-				 $$(filter-out $(SKEL_BLACKLIST),	\
+				 $$(filter-out $(SKEL_BLACKLIST) $(LINKED_BPF_SRCS),\
 					       $$(TRUNNER_BPF_SRCS)))
 TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS))
 TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS)
-- 
cgit v1.2.3


From f2644fb44de9abd54e57b55f584c7c67526f7c02 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 23 Apr 2021 11:13:45 -0700
Subject: selftests/bpf: Add function linking selftest

Add selftest validating various aspects of statically linking functions:
  - no conflicts and correct resolution for name-conflicting static funcs;
  - correct resolution of extern functions;
  - correct handling of weak functions, both resolution itself and libbpf's
    handling of unused weak function that "lost" (it leaves gaps in code with
    no ELF symbols);
  - correct handling of hidden visibility to turn global function into
    "static" for the purpose of BPF verification.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210423181348.1801389-16-andrii@kernel.org
---
 tools/testing/selftests/bpf/Makefile               |  3 +-
 .../selftests/bpf/prog_tests/linked_funcs.c        | 42 +++++++++++++
 tools/testing/selftests/bpf/progs/linked_funcs1.c  | 73 ++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/linked_funcs2.c  | 73 ++++++++++++++++++++++
 4 files changed, 190 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/linked_funcs.c
 create mode 100644 tools/testing/selftests/bpf/progs/linked_funcs1.c
 create mode 100644 tools/testing/selftests/bpf/progs/linked_funcs2.c

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index dd3692ec56d3..ab7b129a8408 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -309,9 +309,10 @@ endef
 
 SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
 
-LINKED_SKELS := test_static_linked.skel.h
+LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h
 
 test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o
+linked_funcs.skel.h-deps := linked_funcs1.o linked_funcs2.o
 
 LINKED_BPF_SRCS := $(patsubst %.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps)))
 
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c
new file mode 100644
index 000000000000..e9916f2817ec
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include "linked_funcs.skel.h"
+
+void test_linked_funcs(void)
+{
+	int err;
+	struct linked_funcs *skel;
+
+	skel = linked_funcs__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	skel->rodata->my_tid = syscall(SYS_gettid);
+	skel->bss->syscall_id = SYS_getpgid;
+
+	err = linked_funcs__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto cleanup;
+
+	err = linked_funcs__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto cleanup;
+
+	/* trigger */
+	syscall(SYS_getpgid);
+
+	ASSERT_EQ(skel->bss->output_val1, 2000 + 2000, "output_val1");
+	ASSERT_EQ(skel->bss->output_ctx1, SYS_getpgid, "output_ctx1");
+	ASSERT_EQ(skel->bss->output_weak1, 42, "output_weak1");
+
+	ASSERT_EQ(skel->bss->output_val2, 2 * 1000 + 2 * (2 * 1000), "output_val2");
+	ASSERT_EQ(skel->bss->output_ctx2, SYS_getpgid, "output_ctx2");
+	/* output_weak2 should never be updated */
+	ASSERT_EQ(skel->bss->output_weak2, 0, "output_weak2");
+
+cleanup:
+	linked_funcs__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/linked_funcs1.c b/tools/testing/selftests/bpf/progs/linked_funcs1.c
new file mode 100644
index 000000000000..b964ec1390c2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_funcs1.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/* weak and shared between two files */
+const volatile int my_tid __weak;
+long syscall_id __weak;
+
+int output_val1;
+int output_ctx1;
+int output_weak1;
+
+/* same "subprog" name in all files, but it's ok because they all are static */
+static __noinline int subprog(int x)
+{
+	/* but different formula */
+	return x * 1;
+}
+
+/* Global functions can't be void */
+int set_output_val1(int x)
+{
+	output_val1 = x + subprog(x);
+	return x;
+}
+
+/* This function can't be verified as global, as it assumes raw_tp/sys_enter
+ * context and accesses syscall id (second argument). So we mark it as
+ * __hidden, so that libbpf will mark it as static in the final object file,
+ * right before verifying it in the kernel.
+ *
+ * But we don't mark it as __hidden here, rather at extern site. __hidden is
+ * "contaminating" visibility, so it will get propagated from either extern or
+ * actual definition (including from the losing __weak definition).
+ */
+void set_output_ctx1(__u64 *ctx)
+{
+	output_ctx1 = ctx[1]; /* long id, same as in BPF_PROG below */
+}
+
+/* this weak instance should win because it's the first one */
+__weak int set_output_weak(int x)
+{
+	output_weak1 = x;
+	return x;
+}
+
+extern int set_output_val2(int x);
+
+/* here we'll force set_output_ctx2() to be __hidden in the final obj file */
+__hidden extern void set_output_ctx2(__u64 *ctx);
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler1, struct pt_regs *regs, long id)
+{
+	if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id)
+		return 0;
+
+	set_output_val2(1000);
+	set_output_ctx2(ctx); /* ctx definition is hidden in BPF_PROG macro */
+
+	/* keep input value the same across both files to avoid dependency on
+	 * handler call order; differentiate by output_weak1 vs output_weak2.
+	 */
+	set_output_weak(42);
+
+	return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_funcs2.c b/tools/testing/selftests/bpf/progs/linked_funcs2.c
new file mode 100644
index 000000000000..575e958e60b7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_funcs2.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/* weak and shared between both files */
+const volatile int my_tid __weak;
+long syscall_id __weak;
+
+int output_val2;
+int output_ctx2;
+int output_weak2; /* should stay zero */
+
+/* same "subprog" name in all files, but it's ok because they all are static */
+static __noinline int subprog(int x)
+{
+	/* but different formula */
+	return x * 2;
+}
+
+/* Global functions can't be void */
+int set_output_val2(int x)
+{
+	output_val2 = 2 * x + 2 * subprog(x);
+	return 2 * x;
+}
+
+/* This function can't be verified as global, as it assumes raw_tp/sys_enter
+ * context and accesses syscall id (second argument). So we mark it as
+ * __hidden, so that libbpf will mark it as static in the final object file,
+ * right before verifying it in the kernel.
+ *
+ * But we don't mark it as __hidden here, rather at extern site. __hidden is
+ * "contaminating" visibility, so it will get propagated from either extern or
+ * actual definition (including from the losing __weak definition).
+ */
+void set_output_ctx2(__u64 *ctx)
+{
+	output_ctx2 = ctx[1]; /* long id, same as in BPF_PROG below */
+}
+
+/* this weak instance should lose, because it will be processed second */
+__weak int set_output_weak(int x)
+{
+	output_weak2 = x;
+	return 2 * x;
+}
+
+extern int set_output_val1(int x);
+
+/* here we'll force set_output_ctx1() to be __hidden in the final obj file */
+__hidden extern void set_output_ctx1(__u64 *ctx);
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler2, struct pt_regs *regs, long id)
+{
+	if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id)
+		return 0;
+
+	set_output_val1(2000);
+	set_output_ctx1(ctx); /* ctx definition is hidden in BPF_PROG macro */
+
+	/* keep input value the same across both files to avoid dependency on
+	 * handler call order; differentiate by output_weak1 vs output_weak2.
+	 */
+	set_output_weak(42);
+
+	return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From 14f1aae17ee13d08315873d4b68d573e91df892f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 23 Apr 2021 11:13:46 -0700
Subject: selftests/bpf: Add global variables linking selftest

Add selftest validating various aspects of statically linking global
variables:
  - correct resolution of extern variables across .bss, .data, and .rodata
    sections;
  - correct handling of weak definitions;
  - correct de-duplication of repeating special externs (.kconfig, .ksyms).

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210423181348.1801389-17-andrii@kernel.org
---
 tools/testing/selftests/bpf/Makefile               |  3 +-
 .../testing/selftests/bpf/prog_tests/linked_vars.c | 43 +++++++++++++++++
 tools/testing/selftests/bpf/progs/linked_vars1.c   | 54 +++++++++++++++++++++
 tools/testing/selftests/bpf/progs/linked_vars2.c   | 55 ++++++++++++++++++++++
 4 files changed, 154 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/linked_vars.c
 create mode 100644 tools/testing/selftests/bpf/progs/linked_vars1.c
 create mode 100644 tools/testing/selftests/bpf/progs/linked_vars2.c

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index ab7b129a8408..411e2cf07ba5 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -309,10 +309,11 @@ endef
 
 SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
 
-LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h
+LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h linked_vars.skel.h
 
 test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o
 linked_funcs.skel.h-deps := linked_funcs1.o linked_funcs2.o
+linked_vars.skel.h-deps := linked_vars1.o linked_vars2.o
 
 LINKED_BPF_SRCS := $(patsubst %.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps)))
 
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_vars.c b/tools/testing/selftests/bpf/prog_tests/linked_vars.c
new file mode 100644
index 000000000000..267166abe4c1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/linked_vars.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include "linked_vars.skel.h"
+
+void test_linked_vars(void)
+{
+	int err;
+	struct linked_vars *skel;
+
+	skel = linked_vars__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	skel->bss->input_bss1 = 1000;
+	skel->bss->input_bss2 = 2000;
+	skel->bss->input_bss_weak = 3000;
+
+	err = linked_vars__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto cleanup;
+
+	err = linked_vars__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto cleanup;
+
+	/* trigger */
+	syscall(SYS_getpgid);
+
+	ASSERT_EQ(skel->bss->output_bss1, 1000 + 2000 + 3000, "output_bss1");
+	ASSERT_EQ(skel->bss->output_bss2, 1000 + 2000 + 3000, "output_bss2");
+	/* 10 comes from "winner" input_data_weak in first obj file */
+	ASSERT_EQ(skel->bss->output_data1, 1 + 2 + 10, "output_bss1");
+	ASSERT_EQ(skel->bss->output_data2, 1 + 2 + 10, "output_bss2");
+	/* 100 comes from "winner" input_rodata_weak in first obj file */
+	ASSERT_EQ(skel->bss->output_rodata1, 11 + 22 + 100, "output_weak1");
+	ASSERT_EQ(skel->bss->output_rodata2, 11 + 22 + 100, "output_weak2");
+
+cleanup:
+	linked_vars__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/linked_vars1.c b/tools/testing/selftests/bpf/progs/linked_vars1.c
new file mode 100644
index 000000000000..ef9e9d0bb0ca
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_vars1.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+extern int LINUX_KERNEL_VERSION __kconfig;
+/* this weak extern will be strict due to the other file's strong extern */
+extern bool CONFIG_BPF_SYSCALL __kconfig __weak;
+extern const void bpf_link_fops __ksym __weak;
+
+int input_bss1;
+int input_data1 = 1;
+const volatile int input_rodata1 = 11;
+
+int input_bss_weak __weak;
+/* these two definitions should win */
+int input_data_weak __weak = 10;
+const volatile int input_rodata_weak __weak = 100;
+
+extern int input_bss2;
+extern int input_data2;
+extern const int input_rodata2;
+
+int output_bss1;
+int output_data1;
+int output_rodata1;
+
+long output_sink1;
+
+static __noinline int get_bss_res(void)
+{
+	/* just make sure all the relocations work against .text as well */
+	return input_bss1 + input_bss2 + input_bss_weak;
+}
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler1)
+{
+	output_bss1 = get_bss_res();
+	output_data1 = input_data1 + input_data2 + input_data_weak;
+	output_rodata1 = input_rodata1 + input_rodata2 + input_rodata_weak;
+
+	/* make sure we actually use above special externs, otherwise compiler
+	 * will optimize them out
+	 */
+	output_sink1 = LINUX_KERNEL_VERSION
+		       + CONFIG_BPF_SYSCALL
+		       + (long)&bpf_link_fops;
+	return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_vars2.c b/tools/testing/selftests/bpf/progs/linked_vars2.c
new file mode 100644
index 000000000000..e4f5bd388a3c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_vars2.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+extern int LINUX_KERNEL_VERSION __kconfig;
+/* when an extern is defined as both strong and weak, resulting symbol will be strong */
+extern bool CONFIG_BPF_SYSCALL __kconfig;
+extern const void __start_BTF __ksym;
+
+int input_bss2;
+int input_data2 = 2;
+const volatile int input_rodata2 = 22;
+
+int input_bss_weak __weak;
+/* these two weak variables should lose */
+int input_data_weak __weak = 20;
+const volatile int input_rodata_weak __weak = 200;
+
+extern int input_bss1;
+extern int input_data1;
+extern const int input_rodata1;
+
+int output_bss2;
+int output_data2;
+int output_rodata2;
+
+int output_sink2;
+
+static __noinline int get_data_res(void)
+{
+	/* just make sure all the relocations work against .text as well */
+	return input_data1 + input_data2 + input_data_weak;
+}
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler2)
+{
+	output_bss2 = input_bss1 + input_bss2 + input_bss_weak;
+	output_data2 = get_data_res();
+	output_rodata2 = input_rodata1 + input_rodata2 + input_rodata_weak;
+
+	/* make sure we actually use above special externs, otherwise compiler
+	 * will optimize them out
+	 */
+	output_sink2 = LINUX_KERNEL_VERSION
+		       + CONFIG_BPF_SYSCALL
+		       + (long)&__start_BTF;
+
+	return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From 3b2ad502256b7f0f9415978fd7f158656d11401e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 23 Apr 2021 11:13:47 -0700
Subject: selftests/bpf: Add map linking selftest

Add selftest validating various aspects of statically linking BTF-defined map
definitions. Legacy map definitions do not support extern resolution between
object files. Some of the aspects validated:
  - correct resolution of extern maps against concrete map definitions;
  - extern maps can currently only specify map type and key/value size and/or
    type information;
  - weak concrete map definitions are resolved properly.

Static map definitions are not yet supported by libbpf, so they are not
explicitly tested, though manual testing showes that BPF linker handles them
properly.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210423181348.1801389-18-andrii@kernel.org
---
 tools/testing/selftests/bpf/Makefile               |  4 +-
 .../testing/selftests/bpf/prog_tests/linked_maps.c | 30 ++++++++
 tools/testing/selftests/bpf/progs/linked_maps1.c   | 82 ++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/linked_maps2.c   | 76 ++++++++++++++++++++
 4 files changed, 191 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/linked_maps.c
 create mode 100644 tools/testing/selftests/bpf/progs/linked_maps1.c
 create mode 100644 tools/testing/selftests/bpf/progs/linked_maps2.c

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 411e2cf07ba5..283e5ad8385e 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -309,11 +309,13 @@ endef
 
 SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
 
-LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h linked_vars.skel.h
+LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h		\
+		linked_vars.skel.h linked_maps.skel.h
 
 test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o
 linked_funcs.skel.h-deps := linked_funcs1.o linked_funcs2.o
 linked_vars.skel.h-deps := linked_vars1.o linked_vars2.o
+linked_maps.skel.h-deps := linked_maps1.o linked_maps2.o
 
 LINKED_BPF_SRCS := $(patsubst %.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps)))
 
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_maps.c b/tools/testing/selftests/bpf/prog_tests/linked_maps.c
new file mode 100644
index 000000000000..85dcaaaf2775
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/linked_maps.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include "linked_maps.skel.h"
+
+void test_linked_maps(void)
+{
+	int err;
+	struct linked_maps *skel;
+
+	skel = linked_maps__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	err = linked_maps__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto cleanup;
+
+	/* trigger */
+	syscall(SYS_getpgid);
+
+	ASSERT_EQ(skel->bss->output_first1, 2000, "output_first1");
+	ASSERT_EQ(skel->bss->output_second1, 2, "output_second1");
+	ASSERT_EQ(skel->bss->output_weak1, 2, "output_weak1");
+
+cleanup:
+	linked_maps__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/linked_maps1.c b/tools/testing/selftests/bpf/progs/linked_maps1.c
new file mode 100644
index 000000000000..52291515cc72
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_maps1.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct my_key { long x; };
+struct my_value { long x; };
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, struct my_key);
+	__type(value, struct my_value);
+	__uint(max_entries, 16);
+} map1 SEC(".maps");
+
+ /* Matches map2 definition in linked_maps2.c. Order of the attributes doesn't
+  * matter.
+  */
+typedef struct {
+	__uint(max_entries, 8);
+	__type(key, int);
+	__type(value, int);
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+} map2_t;
+
+extern map2_t map2 SEC(".maps");
+
+/* This should be the winning map definition, but we have no way of verifying,
+ * so we just make sure that it links and works without errors
+ */
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, int);
+	__type(value, int);
+	__uint(max_entries, 16);
+} map_weak __weak SEC(".maps");
+
+int output_first1;
+int output_second1;
+int output_weak1;
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler_enter1)
+{
+	/* update values with key = 1 */
+	int key = 1, val = 1;
+	struct my_key key_struct = { .x = 1 };
+	struct my_value val_struct = { .x = 1000 };
+
+	bpf_map_update_elem(&map1, &key_struct, &val_struct, 0);
+	bpf_map_update_elem(&map2, &key, &val, 0);
+	bpf_map_update_elem(&map_weak, &key, &val, 0);
+
+	return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int BPF_PROG(handler_exit1)
+{
+	/* lookup values with key = 2, set in another file */
+	int key = 2, *val;
+	struct my_key key_struct = { .x = 2 };
+	struct my_value *value_struct;
+
+	value_struct = bpf_map_lookup_elem(&map1, &key_struct);
+	if (value_struct)
+		output_first1 = value_struct->x;
+
+	val = bpf_map_lookup_elem(&map2, &key);
+	if (val)
+		output_second1 = *val;
+
+	val = bpf_map_lookup_elem(&map_weak, &key);
+	if (val)
+		output_weak1 = *val;
+	
+	return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_maps2.c b/tools/testing/selftests/bpf/progs/linked_maps2.c
new file mode 100644
index 000000000000..0693687474ed
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_maps2.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/* modifiers and typedefs are ignored when comparing key/value types */
+typedef struct my_key { long x; } key_type;
+typedef struct my_value { long x; } value_type;
+
+extern struct {
+	__uint(max_entries, 16);
+	__type(key, key_type);
+	__type(value, value_type);
+	__uint(type, BPF_MAP_TYPE_HASH);
+} map1 SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, int);
+	__type(value, int);
+	__uint(max_entries, 8);
+} map2 SEC(".maps");
+
+/* this definition will lose, but it has to exactly match the winner */
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, int);
+	__type(value, int);
+	__uint(max_entries, 16);
+} map_weak __weak SEC(".maps");
+
+int output_first2;
+int output_second2;
+int output_weak2;
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler_enter2)
+{
+	/* update values with key = 2 */
+	int key = 2, val = 2;
+	key_type key_struct = { .x = 2 };
+	value_type val_struct = { .x = 2000 };
+
+	bpf_map_update_elem(&map1, &key_struct, &val_struct, 0);
+	bpf_map_update_elem(&map2, &key, &val, 0);
+	bpf_map_update_elem(&map_weak, &key, &val, 0);
+
+	return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int BPF_PROG(handler_exit2)
+{
+	/* lookup values with key = 1, set in another file */
+	int key = 1, *val;
+	key_type key_struct = { .x = 1 };
+	value_type *value_struct;
+
+	value_struct = bpf_map_lookup_elem(&map1, &key_struct);
+	if (value_struct)
+		output_first2 = value_struct->x;
+
+	val = bpf_map_lookup_elem(&map2, &key);
+	if (val)
+		output_second2 = *val;
+
+	val = bpf_map_lookup_elem(&map_weak, &key);
+	if (val)
+		output_weak2 = *val;
+
+	return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From a9dab4e4569425e26cd9c2d8bdcc74bd12fcb8bf Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 23 Apr 2021 11:13:48 -0700
Subject: selftests/bpf: Document latest Clang fix expectations for linking
 tests

Document which fixes are required to generate correct static linking
selftests.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210423181348.1801389-19-andrii@kernel.org
---
 tools/testing/selftests/bpf/README.rst | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index 65fe318d1e71..3353778c30f8 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -193,3 +193,12 @@ Without it, the error from compiling bpf selftests looks like:
   libbpf: failed to find BTF for extern 'tcp_slow_start' [25] section: -2
 
 __ https://reviews.llvm.org/D93563
+
+Clang dependencies for static linking tests
+===========================================
+
+linked_vars, linked_maps, and linked_funcs tests depend on `Clang fix`__ to
+generate valid BTF information for weak variables. Please make sure you use
+Clang that contains the fix.
+
+__ https://reviews.llvm.org/D100362
-- 
cgit v1.2.3


From cb9d80f4940ee5d4b7c7cad7418a6c893c6c4279 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 23 Apr 2021 11:17:05 -0700
Subject: mptcp: implement dummy MSG_ERRQUEUE support

mptcp_recvmsg() currently silently ignores MSG_ERRQUEUE, returning
input data instead of error cmsg.

This change provides a dummy implementation for MSG_ERRQUEUE - always
returns no data. That is consistent with the current lack of a suitable
IP_RECVERR setsockopt() support.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index c14ac2975736..1d5f23bd640c 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1945,6 +1945,10 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	int target;
 	long timeo;
 
+	/* MSG_ERRQUEUE is really a no-op till we support IP_RECVERR */
+	if (unlikely(flags & MSG_ERRQUEUE))
+		return inet_recv_error(sk, msg, len, addr_len);
+
 	if (msg->msg_flags & ~(MSG_WAITALL | MSG_DONTWAIT))
 		return -EOPNOTSUPP;
 
-- 
cgit v1.2.3


From d976092ce1b04d634d408f475224347cfae81201 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 23 Apr 2021 11:17:06 -0700
Subject: mptcp: implement MSG_TRUNC support

The mentioned flag is currently silenlty ignored. This
change implements the TCP-like behaviour, dropping the
pending data up to the specified length.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Sigend-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 1d5f23bd640c..ae08c563c712 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1739,7 +1739,7 @@ static void mptcp_wait_data(struct sock *sk, long *timeo)
 
 static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
 				struct msghdr *msg,
-				size_t len)
+				size_t len, int flags)
 {
 	struct sk_buff *skb;
 	int copied = 0;
@@ -1750,11 +1750,13 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
 		u32 count = min_t(size_t, len - copied, data_len);
 		int err;
 
-		err = skb_copy_datagram_msg(skb, offset, msg, count);
-		if (unlikely(err < 0)) {
-			if (!copied)
-				return err;
-			break;
+		if (!(flags & MSG_TRUNC)) {
+			err = skb_copy_datagram_msg(skb, offset, msg, count);
+			if (unlikely(err < 0)) {
+				if (!copied)
+					return err;
+				break;
+			}
 		}
 
 		copied += count;
@@ -1966,7 +1968,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	while (copied < len) {
 		int bytes_read;
 
-		bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied);
+		bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags);
 		if (unlikely(bytes_read < 0)) {
 			if (!copied)
 				copied = bytes_read;
-- 
cgit v1.2.3


From 987858e5d026d355535b34f17c6ceeb1d71ccf75 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 23 Apr 2021 11:17:07 -0700
Subject: mptcp: ignore unsupported msg flags

Currently mptcp_sendmsg() fails with EOPNOTSUPP if the
user-space provides some unsupported flag. That is unexpected
and may foul existing applications migrated to MPTCP, which
expect a different behavior.

Change the mentioned function to silently ignore the unsupported
flags except MSG_FASTOPEN. This is the only flags currently not
supported by MPTCP with user-space visible side-effects.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/162
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index ae08c563c712..a996dd5bb0c2 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1614,9 +1614,13 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	int ret = 0;
 	long timeo;
 
-	if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
+	/* we don't support FASTOPEN yet */
+	if (msg->msg_flags & MSG_FASTOPEN)
 		return -EOPNOTSUPP;
 
+	/* silently ignore everything else */
+	msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL;
+
 	mptcp_lock_sock(sk, __mptcp_wmem_reserve(sk, min_t(size_t, 1 << 20, len)));
 
 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
@@ -1951,9 +1955,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	if (unlikely(flags & MSG_ERRQUEUE))
 		return inet_recv_error(sk, msg, len, addr_len);
 
-	if (msg->msg_flags & ~(MSG_WAITALL | MSG_DONTWAIT))
-		return -EOPNOTSUPP;
-
 	mptcp_lock_sock(sk, __mptcp_splice_receive_queue(sk));
 	if (unlikely(sk->sk_state == TCP_LISTEN)) {
 		copied = -ENOTCONN;
-- 
cgit v1.2.3


From ca4fb892579f110d3ab4865eb2aef36be7683a7c Mon Sep 17 00:00:00 2001
From: Yonglong Li <liyonglong@chinatelecom.cn>
Date: Fri, 23 Apr 2021 11:17:08 -0700
Subject: mptcp: add MSG_PEEK support

This patch adds support for MSG_PEEK flag. Packets are not removed
from the receive_queue if MSG_PEEK set in recv() system call.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Yonglong Li <liyonglong@chinatelecom.cn>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index a996dd5bb0c2..8bf21996734d 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1745,10 +1745,10 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
 				struct msghdr *msg,
 				size_t len, int flags)
 {
-	struct sk_buff *skb;
+	struct sk_buff *skb, *tmp;
 	int copied = 0;
 
-	while ((skb = skb_peek(&msk->receive_queue)) != NULL) {
+	skb_queue_walk_safe(&msk->receive_queue, skb, tmp) {
 		u32 offset = MPTCP_SKB_CB(skb)->offset;
 		u32 data_len = skb->len - offset;
 		u32 count = min_t(size_t, len - copied, data_len);
@@ -1766,15 +1766,18 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
 		copied += count;
 
 		if (count < data_len) {
-			MPTCP_SKB_CB(skb)->offset += count;
+			if (!(flags & MSG_PEEK))
+				MPTCP_SKB_CB(skb)->offset += count;
 			break;
 		}
 
-		/* we will bulk release the skb memory later */
-		skb->destructor = NULL;
-		msk->rmem_released += skb->truesize;
-		__skb_unlink(skb, &msk->receive_queue);
-		__kfree_skb(skb);
+		if (!(flags & MSG_PEEK)) {
+			/* we will bulk release the skb memory later */
+			skb->destructor = NULL;
+			msk->rmem_released += skb->truesize;
+			__skb_unlink(skb, &msk->receive_queue);
+			__kfree_skb(skb);
+		}
 
 		if (copied >= len)
 			break;
@@ -2053,7 +2056,8 @@ out_err:
 	pr_debug("msk=%p data_ready=%d rx queue empty=%d copied=%d",
 		 msk, test_bit(MPTCP_DATA_READY, &msk->flags),
 		 skb_queue_empty_lockless(&sk->sk_receive_queue), copied);
-	mptcp_rcv_space_adjust(msk, copied);
+	if (!(flags & MSG_PEEK))
+		mptcp_rcv_space_adjust(msk, copied);
 
 	release_sock(sk);
 	return copied;
-- 
cgit v1.2.3


From df8aee6d6fa520ff77f48d46ebd2034249669033 Mon Sep 17 00:00:00 2001
From: Yonglong Li <liyonglong@chinatelecom.cn>
Date: Fri, 23 Apr 2021 11:17:09 -0700
Subject: selftests: mptcp: add a test case for MSG_PEEK

Extend mptcp_connect tool with MSG_PEEK support and add a test case in
mptcp_connect.sh that checks the data received from/after recv() with
MSG_PEEK.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Yonglong Li <liyonglong@chinatelecom.cn>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/mptcp_connect.c  | 48 +++++++++++++++++++++-
 tools/testing/selftests/net/mptcp/mptcp_connect.sh | 29 ++++++++++---
 2 files changed, 69 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 2f207cf33661..d88e1fdfb147 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -45,7 +45,14 @@ enum cfg_mode {
 	CFG_MODE_SENDFILE,
 };
 
+enum cfg_peek {
+	CFG_NONE_PEEK,
+	CFG_WITH_PEEK,
+	CFG_AFTER_PEEK,
+};
+
 static enum cfg_mode cfg_mode = CFG_MODE_POLL;
+static enum cfg_peek cfg_peek = CFG_NONE_PEEK;
 static const char *cfg_host;
 static const char *cfg_port	= "12000";
 static int cfg_sock_proto	= IPPROTO_MPTCP;
@@ -73,6 +80,8 @@ static void die_usage(void)
 	fprintf(stderr, "\t-M mark -- set socket packet mark\n");
 	fprintf(stderr, "\t-u -- check mptcp ulp\n");
 	fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
+	fprintf(stderr,
+		"\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n");
 	exit(1);
 }
 
@@ -331,6 +340,8 @@ static size_t do_write(const int fd, char *buf, const size_t len)
 
 static ssize_t do_rnd_read(const int fd, char *buf, const size_t len)
 {
+	int ret = 0;
+	char tmp[16384];
 	size_t cap = rand();
 
 	cap &= 0xffff;
@@ -340,7 +351,17 @@ static ssize_t do_rnd_read(const int fd, char *buf, const size_t len)
 	else if (cap > len)
 		cap = len;
 
-	return read(fd, buf, cap);
+	if (cfg_peek == CFG_WITH_PEEK) {
+		ret = recv(fd, buf, cap, MSG_PEEK);
+		ret = (ret < 0) ? ret : read(fd, tmp, ret);
+	} else if (cfg_peek == CFG_AFTER_PEEK) {
+		ret = recv(fd, buf, cap, MSG_PEEK);
+		ret = (ret < 0) ? ret : read(fd, buf, cap);
+	} else {
+		ret = read(fd, buf, cap);
+	}
+
+	return ret;
 }
 
 static void set_nonblock(int fd)
@@ -819,6 +840,26 @@ int parse_mode(const char *mode)
 	return 0;
 }
 
+int parse_peek(const char *mode)
+{
+	if (!strcasecmp(mode, "saveWithPeek"))
+		return CFG_WITH_PEEK;
+	if (!strcasecmp(mode, "saveAfterPeek"))
+		return CFG_AFTER_PEEK;
+
+	fprintf(stderr, "Unknown: %s\n", mode);
+	fprintf(stderr, "Supported MSG_PEEK mode are:\n");
+	fprintf(stderr,
+		"\t\t\"saveWithPeek\" - recv data with flags 'MSG_PEEK' and save the peek data into file\n");
+	fprintf(stderr,
+		"\t\t\"saveAfterPeek\" - read and save data into file after recv with flags 'MSG_PEEK'\n");
+
+	die_usage();
+
+	/* silence compiler warning */
+	return 0;
+}
+
 static int parse_int(const char *size)
 {
 	unsigned long s;
@@ -846,7 +887,7 @@ static void parse_opts(int argc, char **argv)
 {
 	int c;
 
-	while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:M:")) != -1) {
+	while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:M:P:")) != -1) {
 		switch (c) {
 		case 'j':
 			cfg_join = true;
@@ -899,6 +940,9 @@ static void parse_opts(int argc, char **argv)
 		case 'M':
 			cfg_mark = strtol(optarg, NULL, 0);
 			break;
+		case 'P':
+			cfg_peek = parse_peek(optarg);
+			break;
 		}
 	}
 
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 385cdc98aed8..9236609731b1 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -375,7 +375,7 @@ do_transfer()
 	local srv_proto="$4"
 	local connect_addr="$5"
 	local local_addr="$6"
-	local extra_args=""
+	local extra_args="$7"
 
 	local port
 	port=$((10000+$TEST_COUNT))
@@ -394,9 +394,9 @@ do_transfer()
 	fi
 
 	if [ -n "$extra_args" ] && $options_log; then
-		options_log=false
 		echo "INFO: extra options: $extra_args"
 	fi
+	options_log=false
 
 	:> "$cout"
 	:> "$sout"
@@ -589,6 +589,7 @@ run_tests_lo()
 	local connector_ns="$2"
 	local connect_addr="$3"
 	local loopback="$4"
+	local extra_args="$5"
 	local lret=0
 
 	# skip if test programs are running inside same netns for subsequent runs.
@@ -608,7 +609,8 @@ run_tests_lo()
 		local_addr="0.0.0.0"
 	fi
 
-	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${local_addr}
+	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP \
+		    ${connect_addr} ${local_addr} "${extra_args}"
 	lret=$?
 	if [ $lret -ne 0 ]; then
 		ret=$lret
@@ -622,14 +624,16 @@ run_tests_lo()
 		fi
 	fi
 
-	do_transfer ${listener_ns} ${connector_ns} MPTCP TCP ${connect_addr} ${local_addr}
+	do_transfer ${listener_ns} ${connector_ns} MPTCP TCP \
+		    ${connect_addr} ${local_addr} "${extra_args}"
 	lret=$?
 	if [ $lret -ne 0 ]; then
 		ret=$lret
 		return 1
 	fi
 
-	do_transfer ${listener_ns} ${connector_ns} TCP MPTCP ${connect_addr} ${local_addr}
+	do_transfer ${listener_ns} ${connector_ns} TCP MPTCP \
+		    ${connect_addr} ${local_addr} "${extra_args}"
 	lret=$?
 	if [ $lret -ne 0 ]; then
 		ret=$lret
@@ -637,7 +641,8 @@ run_tests_lo()
 	fi
 
 	if [ $do_tcp -gt 1 ] ;then
-		do_transfer ${listener_ns} ${connector_ns} TCP TCP ${connect_addr} ${local_addr}
+		do_transfer ${listener_ns} ${connector_ns} TCP TCP \
+			    ${connect_addr} ${local_addr} "${extra_args}"
 		lret=$?
 		if [ $lret -ne 0 ]; then
 			ret=$lret
@@ -653,6 +658,15 @@ run_tests()
 	run_tests_lo $1 $2 $3 0
 }
 
+run_tests_peekmode()
+{
+	local peekmode="$1"
+
+	echo "INFO: with peek mode: ${peekmode}"
+	run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 "-P ${peekmode}"
+	run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}"
+}
+
 make_file "$cin" "client"
 make_file "$sin" "server"
 
@@ -732,6 +746,9 @@ for sender in $ns1 $ns2 $ns3 $ns4;do
 	run_tests "$ns4" $sender dead:beef:3::1
 done
 
+run_tests_peekmode "saveWithPeek"
+run_tests_peekmode "saveAfterPeek"
+
 time_end=$(date +%s)
 time_run=$((time_end-time_start))
 
-- 
cgit v1.2.3


From b881d089c7c9c7032da812cda1b4b0818f477780 Mon Sep 17 00:00:00 2001
From: Po-Hsu Lin <po-hsu.lin@canonical.com>
Date: Fri, 23 Apr 2021 19:15:38 +0800
Subject: selftests/net: bump timeout to 5 minutes

We found that with the latest mainline kernel (5.12.0-051200rc8) on
some KVM instances / bare-metal systems, the following tests will take
longer than the kselftest framework default timeout (45 seconds) to
run and thus got terminated with TIMEOUT error:
* xfrm_policy.sh - took about 2m20s
* pmtu.sh - took about 3m5s
* udpgso_bench.sh - took about 60s

Bump the timeout setting to 5 minutes to allow them have a chance to
finish.

https://bugs.launchpad.net/bugs/1856010
Signed-off-by: Po-Hsu Lin <po-hsu.lin@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/Makefile | 2 ++
 tools/testing/selftests/net/settings | 1 +
 2 files changed, 3 insertions(+)
 create mode 100644 tools/testing/selftests/net/settings

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index f4242a961088..3915bb7bfc39 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -39,6 +39,8 @@ TEST_GEN_FILES += ipsec
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
 TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
 
+TEST_FILES := settings
+
 KSFT_KHDR_INSTALL := 1
 include ../lib.mk
 
diff --git a/tools/testing/selftests/net/settings b/tools/testing/selftests/net/settings
new file mode 100644
index 000000000000..694d70710ff0
--- /dev/null
+++ b/tools/testing/selftests/net/settings
@@ -0,0 +1 @@
+timeout=300
-- 
cgit v1.2.3


From cbbd21a47f83023665dff171a696d2af70c6e51e Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 23 Apr 2021 14:28:36 +0100
Subject: net/atm: Fix spelling mistake "requed" -> "requeued"

There is a spelling mistake in a printk message. Fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/iphase.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c
index eef637fd90b3..933e3ff2ee8d 100644
--- a/drivers/atm/iphase.c
+++ b/drivers/atm/iphase.c
@@ -680,7 +680,7 @@ static void ia_tx_poll (IADEV *iadev) {
           skb1 = skb_dequeue(&iavcc->txing_skb);
        }                                                        
        if (!skb1) {
-          IF_EVENT(printk("IA: Vci %d - skb not found requed\n",vcc->vci);)
+          IF_EVENT(printk("IA: Vci %d - skb not found requeued\n",vcc->vci);)
           ia_enque_head_rtn_q (&iadev->tx_return_q, rtne);
           break;
        }
-- 
cgit v1.2.3


From b2f0ca00e6b34bd57c9298a869ea133699e8ec39 Mon Sep 17 00:00:00 2001
From: "Radu Pirea (NXP OSS)" <radu-nicolae.pirea@oss.nxp.com>
Date: Fri, 23 Apr 2021 18:00:50 +0300
Subject: phy: nxp-c45-tja11xx: add interrupt support

Added .config_intr and .handle_interrupt callbacks.

Link event interrupt will trigger an interrupt every time when the link
goes up or down.

Signed-off-by: Radu Pirea (NXP OSS) <radu-nicolae.pirea@oss.nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/nxp-c45-tja11xx.c | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c
index 95307097ebff..26b9c0d7cb9d 100644
--- a/drivers/net/phy/nxp-c45-tja11xx.c
+++ b/drivers/net/phy/nxp-c45-tja11xx.c
@@ -28,6 +28,11 @@
 #define DEVICE_CONTROL_CONFIG_GLOBAL_EN	BIT(14)
 #define DEVICE_CONTROL_CONFIG_ALL_EN	BIT(13)
 
+#define VEND1_PHY_IRQ_ACK		0x80A0
+#define VEND1_PHY_IRQ_EN		0x80A1
+#define VEND1_PHY_IRQ_STATUS		0x80A2
+#define PHY_IRQ_LINK_EVENT		BIT(1)
+
 #define VEND1_PHY_CONTROL		0x8100
 #define PHY_CONFIG_EN			BIT(14)
 #define PHY_START_OP			BIT(0)
@@ -188,6 +193,32 @@ static int nxp_c45_start_op(struct phy_device *phydev)
 				PHY_START_OP);
 }
 
+static int nxp_c45_config_intr(struct phy_device *phydev)
+{
+	if (phydev->interrupts == PHY_INTERRUPT_ENABLED)
+		return phy_set_bits_mmd(phydev, MDIO_MMD_VEND1,
+					VEND1_PHY_IRQ_EN, PHY_IRQ_LINK_EVENT);
+	else
+		return phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1,
+					  VEND1_PHY_IRQ_EN, PHY_IRQ_LINK_EVENT);
+}
+
+static irqreturn_t nxp_c45_handle_interrupt(struct phy_device *phydev)
+{
+	irqreturn_t ret = IRQ_NONE;
+	int irq;
+
+	irq = phy_read_mmd(phydev, MDIO_MMD_VEND1, VEND1_PHY_IRQ_STATUS);
+	if (irq & PHY_IRQ_LINK_EVENT) {
+		phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_PHY_IRQ_ACK,
+			      PHY_IRQ_LINK_EVENT);
+		phy_trigger_machine(phydev);
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
 static int nxp_c45_soft_reset(struct phy_device *phydev)
 {
 	int ret;
@@ -560,6 +591,8 @@ static struct phy_driver nxp_c45_driver[] = {
 		.soft_reset		= nxp_c45_soft_reset,
 		.config_aneg		= nxp_c45_config_aneg,
 		.config_init		= nxp_c45_config_init,
+		.config_intr		= nxp_c45_config_intr,
+		.handle_interrupt	= nxp_c45_handle_interrupt,
 		.read_status		= nxp_c45_read_status,
 		.suspend		= genphy_c45_pma_suspend,
 		.resume			= genphy_c45_pma_resume,
-- 
cgit v1.2.3


From 350a62ca065be252ababc43a7c96f8aca390a18f Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Fri, 23 Apr 2021 09:23:30 +0800
Subject: bpf: Document the pahole release info related to libbpf in
 bpf_devel_QA.rst

pahole starts to use libbpf definitions and APIs since v1.13 after the
commit 21507cd3e97b ("pahole: add libbpf as submodule under lib/bpf").
It works well with the git repository because the libbpf submodule will
use "git submodule update --init --recursive" to update.

Unfortunately, the default github release source code does not contain
libbpf submodule source code and this will cause build issues, the tarball
from https://git.kernel.org/pub/scm/devel/pahole/pahole.git/ is same with
github, you can get the source tarball with corresponding libbpf submodule
codes from

https://fedorapeople.org/~acme/dwarves

This change documents the above issues to give more information so that
we can get the tarball from the right place, early discussion is here:

https://lore.kernel.org/bpf/2de4aad5-fa9e-1c39-3c92-9bb9229d0966@loongson.cn/

Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Link: https://lore.kernel.org/bpf/1619141010-12521-1-git-send-email-yangtiezhu@loongson.cn
---
 Documentation/bpf/bpf_devel_QA.rst | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst
index d05e67e72c10..253496af8fef 100644
--- a/Documentation/bpf/bpf_devel_QA.rst
+++ b/Documentation/bpf/bpf_devel_QA.rst
@@ -449,6 +449,19 @@ from source at
 
 https://github.com/acmel/dwarves
 
+pahole starts to use libbpf definitions and APIs since v1.13 after the
+commit 21507cd3e97b ("pahole: add libbpf as submodule under lib/bpf").
+It works well with the git repository because the libbpf submodule will
+use "git submodule update --init --recursive" to update.
+
+Unfortunately, the default github release source code does not contain
+libbpf submodule source code and this will cause build issues, the tarball
+from https://git.kernel.org/pub/scm/devel/pahole/pahole.git/ is same with
+github, you can get the source tarball with corresponding libbpf submodule
+codes from
+
+https://fedorapeople.org/~acme/dwarves
+
 Some distros have pahole version 1.16 packaged already, e.g.
 Fedora, Gentoo.
 
-- 
cgit v1.2.3


From 06ec5acc7747f225154fcafaf2afe52324694baa Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Tue, 2 Mar 2021 13:54:42 +0200
Subject: net/mlx5: E-Switch, Return eswitch max ports when eswitch is
 supported

mlx5_eswitch_get_total_vports() doesn't honor MLX5_ESWICH Kconfig flag.

When MLX5_ESWITCH is disabled, FS layer continues to initialize eswitch
specific ACL namespaces.
Instead, start honoring MLX5_ESWITCH flag and perform vport specific
initialization only when vport count is non zero.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Reviewed-by: Vu Pham <vuhuong@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 13 +++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/vport.c   | 14 --------------
 include/linux/mlx5/eswitch.h                      | 11 +++++++++--
 3 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 1bb229ecd43b..c3a58224ae12 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -2205,3 +2205,16 @@ void mlx5_esw_unlock(struct mlx5_eswitch *esw)
 {
 	up_write(&esw->mode_lock);
 }
+
+/**
+ * mlx5_eswitch_get_total_vports - Get total vports of the eswitch
+ *
+ * @dev: Pointer to core device
+ *
+ * mlx5_eswitch_get_total_vports returns total number of eswitch vports.
+ */
+u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
+{
+	return MLX5_SPECIAL_VPORTS(dev) + mlx5_core_max_vfs(dev) + mlx5_sf_max_functions(dev);
+}
+EXPORT_SYMBOL_GPL(mlx5_eswitch_get_total_vports);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index e05c5c0f3ae1..457ad42eaa2a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -1151,20 +1151,6 @@ u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev)
 }
 EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid);
 
-/**
- * mlx5_eswitch_get_total_vports - Get total vports of the eswitch
- *
- * @dev:	Pointer to core device
- *
- * mlx5_eswitch_get_total_vports returns total number of vports for
- * the eswitch.
- */
-u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
-{
-	return MLX5_SPECIAL_VPORTS(dev) + mlx5_core_max_vfs(dev) + mlx5_sf_max_functions(dev);
-}
-EXPORT_SYMBOL_GPL(mlx5_eswitch_get_total_vports);
-
 int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out)
 {
 	u16 opmod = (MLX5_CAP_GENERAL << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01);
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index 9cf1da2883c6..17109b65c1ac 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -65,8 +65,6 @@ struct mlx5_flow_handle *
 mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
 				    struct mlx5_eswitch_rep *rep, u32 sqn);
 
-u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev);
-
 #ifdef CONFIG_MLX5_ESWITCH
 enum devlink_eswitch_encap_mode
 mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev);
@@ -126,6 +124,8 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw,
 #define ESW_TUN_SLOW_TABLE_GOTO_VPORT_MARK ESW_TUN_OPTS_MASK
 
 u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev);
+u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev);
+
 #else  /* CONFIG_MLX5_ESWITCH */
 
 static inline u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev)
@@ -162,10 +162,17 @@ mlx5_eswitch_get_vport_metadata_mask(void)
 {
 	return 0;
 }
+
+static inline u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
+{
+	return 0;
+}
+
 #endif /* CONFIG_MLX5_ESWITCH */
 
 static inline bool is_mdev_switchdev_mode(struct mlx5_core_dev *dev)
 {
 	return mlx5_eswitch_mode(dev) == MLX5_ESWITCH_OFFLOADS;
 }
+
 #endif
-- 
cgit v1.2.3


From 9f8c7100c8f9879b7e972205cd1f33f0bc1cc8cb Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Tue, 2 Mar 2021 14:10:49 +0200
Subject: net/mlx5: E-Switch, Prepare to return total vports from eswitch
 struct

Total vports are already stored during eswitch initialization. Instead
of calculating everytime, read directly from eswitch.

Additionally, host PF's SF vport information is available using
QUERY_HCA_CAP command. It is not available through HCA_CAP of the
eswitch manager PF.
Hence, this patch prepares the return total eswitch vport count from the
existing eswitch struct.

This further helps to keep eswitch port counting macros and logic within
eswitch.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 9 ++++++---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 8 ++++++++
 include/linux/mlx5/vport.h                        | 8 --------
 3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index c3a58224ae12..f0974aa94574 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1573,8 +1573,8 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 	if (!MLX5_VPORT_MANAGER(dev))
 		return 0;
 
-	total_vports = mlx5_eswitch_get_total_vports(dev);
-
+	total_vports = MLX5_SPECIAL_VPORTS(dev) + mlx5_core_max_vfs(dev) +
+			mlx5_sf_max_functions(dev);
 	esw_info(dev,
 		 "Total vports %d, per vport: max uc(%d) max mc(%d)\n",
 		 total_vports,
@@ -2215,6 +2215,9 @@ void mlx5_esw_unlock(struct mlx5_eswitch *esw)
  */
 u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
 {
-	return MLX5_SPECIAL_VPORTS(dev) + mlx5_core_max_vfs(dev) + mlx5_sf_max_functions(dev);
+	struct mlx5_eswitch *esw;
+
+	esw = dev->priv.eswitch;
+	return mlx5_esw_allowed(esw) ? esw->total_vports : 0;
 }
 EXPORT_SYMBOL_GPL(mlx5_eswitch_get_total_vports);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index b289d756a7e4..5ab480a5745d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -545,6 +545,14 @@ static inline u16 mlx5_eswitch_first_host_vport_num(struct mlx5_core_dev *dev)
 		MLX5_VPORT_PF : MLX5_VPORT_FIRST_VF;
 }
 
+#define MLX5_VPORT_PF_PLACEHOLDER		(1u)
+#define MLX5_VPORT_UPLINK_PLACEHOLDER		(1u)
+#define MLX5_VPORT_ECPF_PLACEHOLDER(mdev)	(mlx5_ecpf_vport_exists(mdev))
+
+#define MLX5_SPECIAL_VPORTS(mdev) (MLX5_VPORT_PF_PLACEHOLDER +		\
+				   MLX5_VPORT_UPLINK_PLACEHOLDER +	\
+				   MLX5_VPORT_ECPF_PLACEHOLDER(mdev))
+
 static inline int mlx5_esw_sf_start_idx(const struct mlx5_eswitch *esw)
 {
 	/* PF and VF vports indices start from 0 to max_vfs */
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 4db87bcfce7b..aad53cb72f17 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -36,14 +36,6 @@
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/device.h>
 
-#define MLX5_VPORT_PF_PLACEHOLDER		(1u)
-#define MLX5_VPORT_UPLINK_PLACEHOLDER		(1u)
-#define MLX5_VPORT_ECPF_PLACEHOLDER(mdev)	(mlx5_ecpf_vport_exists(mdev))
-
-#define MLX5_SPECIAL_VPORTS(mdev) (MLX5_VPORT_PF_PLACEHOLDER +		\
-				   MLX5_VPORT_UPLINK_PLACEHOLDER +	\
-				   MLX5_VPORT_ECPF_PLACEHOLDER(mdev))
-
 #define MLX5_VPORT_MANAGER(mdev)					\
 	(MLX5_CAP_GEN(mdev, vport_group_manager) &&			\
 	 (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&	\
-- 
cgit v1.2.3


From 47dd7e609f6957437b721af4d027737b63b217b8 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Fri, 19 Mar 2021 05:21:31 +0200
Subject: net/mlx5: E-Switch, Use xarray for vport number to vport and rep
 mapping

Currently vport number to vport and its representor are mapped using an
array and an index.

Vport numbers of different types of functions are not contiguous. Adding
new such discontiguous range using index and number mapping is increasingly
complex and hard to maintain.

Hence, maintain an xarray of vport and rep whose lookup is done based on
the vport number.
Each VF and SF entry is marked with a xarray mark to identify the function
type. Additionally PF and VF needs special handling for legacy inline
mode. They are additionally marked as host function using additional
HOST_FN mark.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Reviewed-by: Vu Pham <vuhuong@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../mellanox/mlx5/core/esw/acl/egress_lgcy.c       |   2 +-
 .../mellanox/mlx5/core/esw/acl/egress_ofld.c       |   4 +-
 .../ethernet/mellanox/mlx5/core/esw/acl/helper.c   |   8 +-
 .../ethernet/mellanox/mlx5/core/esw/acl/helper.h   |   2 +-
 .../mellanox/mlx5/core/esw/acl/ingress_lgcy.c      |   2 +-
 .../mellanox/mlx5/core/esw/acl/ingress_ofld.c      |   4 +-
 .../ethernet/mellanox/mlx5/core/esw/devlink_port.c |   3 +-
 .../net/ethernet/mellanox/mlx5/core/esw/legacy.c   |   3 +-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  | 205 +++++++++++----
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  | 194 +++------------
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 276 ++++++++++++++-------
 11 files changed, 380 insertions(+), 323 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
index 3e19b1721303..0399a396d166 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
@@ -96,7 +96,7 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw,
 	}
 
 	if (!vport->egress.acl) {
-		vport->egress.acl = esw_acl_table_create(esw, vport->vport,
+		vport->egress.acl = esw_acl_table_create(esw, vport,
 							 MLX5_FLOW_NAMESPACE_ESW_EGRESS,
 							 table_size);
 		if (IS_ERR(vport->egress.acl)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
index 26b37a0f8762..505bf811984a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
@@ -148,7 +148,7 @@ static void esw_acl_egress_ofld_groups_destroy(struct mlx5_vport *vport)
 	esw_acl_egress_vlan_grp_destroy(vport);
 }
 
-static bool esw_acl_egress_needed(const struct mlx5_eswitch *esw, u16 vport_num)
+static bool esw_acl_egress_needed(struct mlx5_eswitch *esw, u16 vport_num)
 {
 	return mlx5_eswitch_is_vf_vport(esw, vport_num) || mlx5_esw_is_sf_vport(esw, vport_num);
 }
@@ -171,7 +171,7 @@ int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport
 		table_size++;
 	if (MLX5_CAP_GEN(esw->dev, prio_tag_required))
 		table_size++;
-	vport->egress.acl = esw_acl_table_create(esw, vport->vport,
+	vport->egress.acl = esw_acl_table_create(esw, vport,
 						 MLX5_FLOW_NAMESPACE_ESW_EGRESS, table_size);
 	if (IS_ERR(vport->egress.acl)) {
 		err = PTR_ERR(vport->egress.acl);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c
index 4a369669e51e..45b839116212 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c
@@ -6,14 +6,14 @@
 #include "helper.h"
 
 struct mlx5_flow_table *
-esw_acl_table_create(struct mlx5_eswitch *esw, u16 vport_num, int ns, int size)
+esw_acl_table_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int ns, int size)
 {
 	struct mlx5_flow_table_attr ft_attr = {};
 	struct mlx5_core_dev *dev = esw->dev;
 	struct mlx5_flow_namespace *root_ns;
 	struct mlx5_flow_table *acl;
 	int acl_supported;
-	int vport_index;
+	u16 vport_num;
 	int err;
 
 	acl_supported = (ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS) ?
@@ -23,11 +23,11 @@ esw_acl_table_create(struct mlx5_eswitch *esw, u16 vport_num, int ns, int size)
 	if (!acl_supported)
 		return ERR_PTR(-EOPNOTSUPP);
 
+	vport_num = vport->vport;
 	esw_debug(dev, "Create vport[%d] %s ACL table\n", vport_num,
 		  ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS ? "ingress" : "egress");
 
-	vport_index = mlx5_eswitch_vport_num_to_index(esw, vport_num);
-	root_ns = mlx5_get_flow_vport_acl_namespace(dev, ns, vport_index);
+	root_ns = mlx5_get_flow_vport_acl_namespace(dev, ns, vport->index);
 	if (!root_ns) {
 		esw_warn(dev, "Failed to get E-Switch root namespace for vport (%d)\n",
 			 vport_num);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h
index 8dc4cab66a71..a47063fab57e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h
@@ -8,7 +8,7 @@
 
 /* General acl helper functions */
 struct mlx5_flow_table *
-esw_acl_table_create(struct mlx5_eswitch *esw, u16 vport_num, int ns, int size);
+esw_acl_table_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int ns, int size);
 
 /* Egress acl helper functions */
 void esw_acl_egress_table_destroy(struct mlx5_vport *vport);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
index d64fad2823e7..f75b86abaf1c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
@@ -177,7 +177,7 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
 	}
 
 	if (!vport->ingress.acl) {
-		vport->ingress.acl = esw_acl_table_create(esw, vport->vport,
+		vport->ingress.acl = esw_acl_table_create(esw, vport,
 							  MLX5_FLOW_NAMESPACE_ESW_INGRESS,
 							  table_size);
 		if (IS_ERR(vport->ingress.acl)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
index 548c005ea633..39e948bc1204 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
@@ -7,7 +7,7 @@
 #include "ofld.h"
 
 static bool
-esw_acl_ingress_prio_tag_enabled(const struct mlx5_eswitch *esw,
+esw_acl_ingress_prio_tag_enabled(struct mlx5_eswitch *esw,
 				 const struct mlx5_vport *vport)
 {
 	return (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
@@ -255,7 +255,7 @@ int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw,
 	if (esw_acl_ingress_prio_tag_enabled(esw, vport))
 		num_ftes++;
 
-	vport->ingress.acl = esw_acl_table_create(esw, vport->vport,
+	vport->ingress.acl = esw_acl_table_create(esw, vport,
 						  MLX5_FLOW_NAMESPACE_ESW_INGRESS,
 						  num_ftes);
 	if (IS_ERR(vport->ingress.acl)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
index 7bfc84238b3d..8e825ef35cb7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
@@ -14,8 +14,7 @@ mlx5_esw_get_port_parent_id(struct mlx5_core_dev *dev, struct netdev_phys_item_i
 	memcpy(ppid->id, &parent_id, sizeof(parent_id));
 }
 
-static bool
-mlx5_esw_devlink_port_supported(const struct mlx5_eswitch *esw, u16 vport_num)
+static bool mlx5_esw_devlink_port_supported(struct mlx5_eswitch *esw, u16 vport_num)
 {
 	return vport_num == MLX5_VPORT_UPLINK ||
 	       (mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF) ||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
index 8ab1224653a4..d9041b16611d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
@@ -216,7 +216,8 @@ static void esw_destroy_legacy_table(struct mlx5_eswitch *esw)
 int esw_legacy_enable(struct mlx5_eswitch *esw)
 {
 	struct mlx5_vport *vport;
-	int ret, i;
+	unsigned long i;
+	int ret;
 
 	ret = esw_create_legacy_table(esw);
 	if (ret)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index f0974aa94574..90d8bda87579 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -88,20 +88,17 @@ struct mlx5_eswitch *mlx5_devlink_eswitch_get(struct devlink *devlink)
 struct mlx5_vport *__must_check
 mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num)
 {
-	u16 idx;
+	struct mlx5_vport *vport;
 
 	if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager))
 		return ERR_PTR(-EPERM);
 
-	idx = mlx5_eswitch_vport_num_to_index(esw, vport_num);
-
-	if (idx > esw->total_vports - 1) {
-		esw_debug(esw->dev, "vport out of range: num(0x%x), idx(0x%x)\n",
-			  vport_num, idx);
+	vport = xa_load(&esw->vports, vport_num);
+	if (!vport) {
+		esw_debug(esw->dev, "vport out of range: num(0x%x)\n", vport_num);
 		return ERR_PTR(-EINVAL);
 	}
-
-	return &esw->vports[idx];
+	return vport;
 }
 
 static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
@@ -345,9 +342,10 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw,
 {
 	u8 *mac = vaddr->node.addr;
 	struct mlx5_vport *vport;
-	u16 i, vport_num;
+	unsigned long i;
+	u16 vport_num;
 
-	mlx5_esw_for_all_vports(esw, i, vport) {
+	mlx5_esw_for_each_vport(esw, i, vport) {
 		struct hlist_head *vport_hash = vport->mc_list;
 		struct vport_addr *iter_vaddr =
 					l2addr_hash_find(vport_hash,
@@ -1175,7 +1173,7 @@ static void mlx5_eswitch_event_handlers_unregister(struct mlx5_eswitch *esw)
 static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw)
 {
 	struct mlx5_vport *vport;
-	int i;
+	unsigned long i;
 
 	mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
 		memset(&vport->qos, 0, sizeof(vport->qos));
@@ -1213,20 +1211,25 @@ void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num)
 
 void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs)
 {
-	int i;
+	struct mlx5_vport *vport;
+	unsigned long i;
 
-	mlx5_esw_for_each_vf_vport_num_reverse(esw, i, num_vfs)
-		mlx5_eswitch_unload_vport(esw, i);
+	mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) {
+		if (!vport->enabled)
+			continue;
+		mlx5_eswitch_unload_vport(esw, vport->vport);
+	}
 }
 
 int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs,
 				enum mlx5_eswitch_vport_event enabled_events)
 {
+	struct mlx5_vport *vport;
+	unsigned long i;
 	int err;
-	int i;
 
-	mlx5_esw_for_each_vf_vport_num(esw, i, num_vfs) {
-		err = mlx5_eswitch_load_vport(esw, i, enabled_events);
+	mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) {
+		err = mlx5_eswitch_load_vport(esw, vport->vport, enabled_events);
 		if (err)
 			goto vf_err;
 	}
@@ -1234,7 +1237,7 @@ int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs,
 	return 0;
 
 vf_err:
-	mlx5_eswitch_unload_vf_vports(esw, i - 1);
+	mlx5_eswitch_unload_vf_vports(esw, num_vfs);
 	return err;
 }
 
@@ -1563,24 +1566,106 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf)
 	up_write(&esw->mode_lock);
 }
 
+static int mlx5_esw_vport_alloc(struct mlx5_eswitch *esw, struct mlx5_core_dev *dev,
+				int index, u16 vport_num)
+{
+	struct mlx5_vport *vport;
+	int err;
+
+	vport = kzalloc(sizeof(*vport), GFP_KERNEL);
+	if (!vport)
+		return -ENOMEM;
+
+	vport->dev = esw->dev;
+	vport->vport = vport_num;
+	vport->index = index;
+	vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
+	INIT_WORK(&vport->vport_change_handler, esw_vport_change_handler);
+	err = xa_insert(&esw->vports, vport_num, vport, GFP_KERNEL);
+	if (err)
+		goto insert_err;
+
+	esw->total_vports++;
+	return 0;
+
+insert_err:
+	kfree(vport);
+	return err;
+}
+
+static void mlx5_esw_vport_free(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+	xa_erase(&esw->vports, vport->vport);
+	kfree(vport);
+}
+
+static void mlx5_esw_vports_cleanup(struct mlx5_eswitch *esw)
+{
+	struct mlx5_vport *vport;
+	unsigned long i;
+
+	mlx5_esw_for_each_vport(esw, i, vport)
+		mlx5_esw_vport_free(esw, vport);
+	xa_destroy(&esw->vports);
+}
+
+static int mlx5_esw_vports_init(struct mlx5_eswitch *esw)
+{
+	struct mlx5_core_dev *dev = esw->dev;
+	u16 base_sf_num;
+	int idx = 0;
+	int err;
+	int i;
+
+	xa_init(&esw->vports);
+
+	err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_PF);
+	if (err)
+		goto err;
+	if (esw->first_host_vport == MLX5_VPORT_PF)
+		xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN);
+	idx++;
+
+	for (i = 0; i < mlx5_core_max_vfs(dev); i++) {
+		err = mlx5_esw_vport_alloc(esw, dev, idx, idx);
+		if (err)
+			goto err;
+		xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_VF);
+		xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN);
+		idx++;
+	}
+	base_sf_num = mlx5_sf_start_function_id(dev);
+	for (i = 0; i < mlx5_sf_max_functions(dev); i++) {
+		err = mlx5_esw_vport_alloc(esw, dev, idx, base_sf_num + i);
+		if (err)
+			goto err;
+		xa_set_mark(&esw->vports, base_sf_num + i, MLX5_ESW_VPT_SF);
+		idx++;
+	}
+	if (mlx5_ecpf_vport_exists(dev)) {
+		err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_ECPF);
+		if (err)
+			goto err;
+		idx++;
+	}
+	err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_UPLINK);
+	if (err)
+		goto err;
+	return 0;
+
+err:
+	mlx5_esw_vports_cleanup(esw);
+	return err;
+}
+
 int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 {
 	struct mlx5_eswitch *esw;
-	struct mlx5_vport *vport;
-	int total_vports;
-	int err, i;
+	int err;
 
 	if (!MLX5_VPORT_MANAGER(dev))
 		return 0;
 
-	total_vports = MLX5_SPECIAL_VPORTS(dev) + mlx5_core_max_vfs(dev) +
-			mlx5_sf_max_functions(dev);
-	esw_info(dev,
-		 "Total vports %d, per vport: max uc(%d) max mc(%d)\n",
-		 total_vports,
-		 MLX5_MAX_UC_PER_VPORT(dev),
-		 MLX5_MAX_MC_PER_VPORT(dev));
-
 	esw = kzalloc(sizeof(*esw), GFP_KERNEL);
 	if (!esw)
 		return -ENOMEM;
@@ -1595,18 +1680,13 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 		goto abort;
 	}
 
-	esw->vports = kcalloc(total_vports, sizeof(struct mlx5_vport),
-			      GFP_KERNEL);
-	if (!esw->vports) {
-		err = -ENOMEM;
+	err = mlx5_esw_vports_init(esw);
+	if (err)
 		goto abort;
-	}
-
-	esw->total_vports = total_vports;
 
 	err = esw_offloads_init_reps(esw);
 	if (err)
-		goto abort;
+		goto reps_err;
 
 	mutex_init(&esw->offloads.encap_tbl_lock);
 	hash_init(esw->offloads.encap_tbl);
@@ -1619,25 +1699,25 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 	mutex_init(&esw->state_lock);
 	init_rwsem(&esw->mode_lock);
 
-	mlx5_esw_for_all_vports(esw, i, vport) {
-		vport->vport = mlx5_eswitch_index_to_vport_num(esw, i);
-		vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
-		vport->dev = dev;
-		INIT_WORK(&vport->vport_change_handler,
-			  esw_vport_change_handler);
-	}
-
 	esw->enabled_vports = 0;
 	esw->mode = MLX5_ESWITCH_NONE;
 	esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
 
 	dev->priv.eswitch = esw;
 	BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head);
+
+	esw_info(dev,
+		 "Total vports %d, per vport: max uc(%d) max mc(%d)\n",
+		 esw->total_vports,
+		 MLX5_MAX_UC_PER_VPORT(dev),
+		 MLX5_MAX_MC_PER_VPORT(dev));
 	return 0;
+
+reps_err:
+	mlx5_esw_vports_cleanup(esw);
 abort:
 	if (esw->work_queue)
 		destroy_workqueue(esw->work_queue);
-	kfree(esw->vports);
 	kfree(esw);
 	return err;
 }
@@ -1659,7 +1739,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
 	mutex_destroy(&esw->offloads.encap_tbl_lock);
 	mutex_destroy(&esw->offloads.decap_tbl_lock);
 	esw_offloads_cleanup_reps(esw);
-	kfree(esw->vports);
+	mlx5_esw_vports_cleanup(esw);
 	kfree(esw);
 }
 
@@ -1718,8 +1798,29 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
 	return err;
 }
 
+static bool mlx5_esw_check_port_type(struct mlx5_eswitch *esw, u16 vport_num, xa_mark_t mark)
+{
+	struct mlx5_vport *vport;
+
+	vport = mlx5_eswitch_get_vport(esw, vport_num);
+	if (IS_ERR(vport))
+		return false;
+
+	return xa_get_mark(&esw->vports, vport_num, mark);
+}
+
+bool mlx5_eswitch_is_vf_vport(struct mlx5_eswitch *esw, u16 vport_num)
+{
+	return mlx5_esw_check_port_type(esw, vport_num, MLX5_ESW_VPT_VF);
+}
+
+bool mlx5_esw_is_sf_vport(struct mlx5_eswitch *esw, u16 vport_num)
+{
+	return mlx5_esw_check_port_type(esw, vport_num, MLX5_ESW_VPT_SF);
+}
+
 static bool
-is_port_function_supported(const struct mlx5_eswitch *esw, u16 vport_num)
+is_port_function_supported(struct mlx5_eswitch *esw, u16 vport_num)
 {
 	return vport_num == MLX5_VPORT_PF ||
 	       mlx5_eswitch_is_vf_vport(esw, vport_num) ||
@@ -1891,9 +1992,9 @@ static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw)
 	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
 	struct mlx5_vport *evport;
 	u32 max_guarantee = 0;
-	int i;
+	unsigned long i;
 
-	mlx5_esw_for_all_vports(esw, i, evport) {
+	mlx5_esw_for_each_vport(esw, i, evport) {
 		if (!evport->enabled || evport->qos.min_rate < max_guarantee)
 			continue;
 		max_guarantee = evport->qos.min_rate;
@@ -1911,11 +2012,11 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw)
 	struct mlx5_vport *evport;
 	u32 vport_max_rate;
 	u32 vport_min_rate;
+	unsigned long i;
 	u32 bw_share;
 	int err;
-	int i;
 
-	mlx5_esw_for_all_vports(esw, i, evport) {
+	mlx5_esw_for_each_vport(esw, i, evport) {
 		if (!evport->enabled)
 			continue;
 		vport_min_rate = evport->qos.min_rate;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 5ab480a5745d..7b5f9b8dc7df 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -176,6 +176,7 @@ struct mlx5_vport {
 	u16 vport;
 	bool                    enabled;
 	enum mlx5_eswitch_vport_event enabled_events;
+	int index;
 	struct devlink_port *dl_port;
 };
 
@@ -228,7 +229,7 @@ struct mlx5_esw_offload {
 
 	struct mlx5_flow_table *ft_offloads;
 	struct mlx5_flow_group *vport_rx_group;
-	struct mlx5_eswitch_rep *vport_reps;
+	struct xarray vport_reps;
 	struct list_head peer_flows;
 	struct mutex peer_mutex;
 	struct mutex encap_tbl_lock; /* protects encap_tbl */
@@ -278,7 +279,7 @@ struct mlx5_eswitch {
 	struct esw_mc_addr mc_promisc;
 	/* end of legacy */
 	struct workqueue_struct *work_queue;
-	struct mlx5_vport       *vports;
+	struct xarray vports;
 	u32 flags;
 	int                     total_vports;
 	int                     enabled_vports;
@@ -545,102 +546,11 @@ static inline u16 mlx5_eswitch_first_host_vport_num(struct mlx5_core_dev *dev)
 		MLX5_VPORT_PF : MLX5_VPORT_FIRST_VF;
 }
 
-#define MLX5_VPORT_PF_PLACEHOLDER		(1u)
-#define MLX5_VPORT_UPLINK_PLACEHOLDER		(1u)
-#define MLX5_VPORT_ECPF_PLACEHOLDER(mdev)	(mlx5_ecpf_vport_exists(mdev))
-
-#define MLX5_SPECIAL_VPORTS(mdev) (MLX5_VPORT_PF_PLACEHOLDER +		\
-				   MLX5_VPORT_UPLINK_PLACEHOLDER +	\
-				   MLX5_VPORT_ECPF_PLACEHOLDER(mdev))
-
-static inline int mlx5_esw_sf_start_idx(const struct mlx5_eswitch *esw)
-{
-	/* PF and VF vports indices start from 0 to max_vfs */
-	return MLX5_VPORT_PF_PLACEHOLDER + mlx5_core_max_vfs(esw->dev);
-}
-
-static inline int mlx5_esw_sf_end_idx(const struct mlx5_eswitch *esw)
-{
-	return mlx5_esw_sf_start_idx(esw) + mlx5_sf_max_functions(esw->dev);
-}
-
-static inline int
-mlx5_esw_sf_vport_num_to_index(const struct mlx5_eswitch *esw, u16 vport_num)
-{
-	return vport_num - mlx5_sf_start_function_id(esw->dev) +
-	       MLX5_VPORT_PF_PLACEHOLDER + mlx5_core_max_vfs(esw->dev);
-}
-
-static inline u16
-mlx5_esw_sf_vport_index_to_num(const struct mlx5_eswitch *esw, int idx)
-{
-	return mlx5_sf_start_function_id(esw->dev) + idx -
-	       (MLX5_VPORT_PF_PLACEHOLDER + mlx5_core_max_vfs(esw->dev));
-}
-
-static inline bool
-mlx5_esw_is_sf_vport(const struct mlx5_eswitch *esw, u16 vport_num)
-{
-	return mlx5_sf_supported(esw->dev) &&
-	       vport_num >= mlx5_sf_start_function_id(esw->dev) &&
-	       (vport_num < (mlx5_sf_start_function_id(esw->dev) +
-			     mlx5_sf_max_functions(esw->dev)));
-}
-
 static inline bool mlx5_eswitch_is_funcs_handler(const struct mlx5_core_dev *dev)
 {
 	return mlx5_core_is_ecpf_esw_manager(dev);
 }
 
-static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw)
-{
-	/* Uplink always locate at the last element of the array.*/
-	return esw->total_vports - 1;
-}
-
-static inline int mlx5_eswitch_ecpf_idx(struct mlx5_eswitch *esw)
-{
-	return esw->total_vports - 2;
-}
-
-static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw,
-						  u16 vport_num)
-{
-	if (vport_num == MLX5_VPORT_ECPF) {
-		if (!mlx5_ecpf_vport_exists(esw->dev))
-			esw_warn(esw->dev, "ECPF vport doesn't exist!\n");
-		return mlx5_eswitch_ecpf_idx(esw);
-	}
-
-	if (vport_num == MLX5_VPORT_UPLINK)
-		return mlx5_eswitch_uplink_idx(esw);
-
-	if (mlx5_esw_is_sf_vport(esw, vport_num))
-		return mlx5_esw_sf_vport_num_to_index(esw, vport_num);
-
-	/* PF and VF vports start from 0 to max_vfs */
-	return vport_num;
-}
-
-static inline u16 mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw,
-						  int index)
-{
-	if (index == mlx5_eswitch_ecpf_idx(esw) &&
-	    mlx5_ecpf_vport_exists(esw->dev))
-		return MLX5_VPORT_ECPF;
-
-	if (index == mlx5_eswitch_uplink_idx(esw))
-		return MLX5_VPORT_UPLINK;
-
-	/* SF vports indices are after VFs and before ECPF */
-	if (mlx5_sf_supported(esw->dev) &&
-	    index > mlx5_core_max_vfs(esw->dev))
-		return mlx5_esw_sf_vport_index_to_num(esw, index);
-
-	/* PF and VF vports start from 0 to max_vfs */
-	return index;
-}
-
 static inline unsigned int
 mlx5_esw_vport_to_devlink_port_index(const struct mlx5_core_dev *dev,
 				     u16 vport_num)
@@ -657,82 +567,42 @@ mlx5_esw_devlink_port_index_to_vport_num(unsigned int dl_port_index)
 /* TODO: This mlx5e_tc function shouldn't be called by eswitch */
 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw);
 
-/* The vport getter/iterator are only valid after esw->total_vports
- * and vport->vport are initialized in mlx5_eswitch_init.
+/* Each mark identifies eswitch vport type.
+ * MLX5_ESW_VPT_HOST_FN is used to identify both PF and VF ports using
+ * a single mark.
+ * MLX5_ESW_VPT_VF identifies a SRIOV VF vport.
+ * MLX5_ESW_VPT_SF identifies SF vport.
  */
-#define mlx5_esw_for_all_vports(esw, i, vport)		\
-	for ((i) = MLX5_VPORT_PF;			\
-	     (vport) = &(esw)->vports[i],		\
-	     (i) < (esw)->total_vports; (i)++)
-
-#define mlx5_esw_for_all_vports_reverse(esw, i, vport)	\
-	for ((i) = (esw)->total_vports - 1;		\
-	     (vport) = &(esw)->vports[i],		\
-	     (i) >= MLX5_VPORT_PF; (i)--)
-
-#define mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs)	\
-	for ((i) = MLX5_VPORT_FIRST_VF;			\
-	     (vport) = &(esw)->vports[(i)],		\
-	     (i) <= (nvfs); (i)++)
-
-#define mlx5_esw_for_each_vf_vport_reverse(esw, i, vport, nvfs)	\
-	for ((i) = (nvfs);					\
-	     (vport) = &(esw)->vports[(i)],			\
-	     (i) >= MLX5_VPORT_FIRST_VF; (i)--)
-
-/* The rep getter/iterator are only valid after esw->total_vports
- * and vport->vport are initialized in mlx5_eswitch_init.
+#define MLX5_ESW_VPT_HOST_FN XA_MARK_0
+#define MLX5_ESW_VPT_VF XA_MARK_1
+#define MLX5_ESW_VPT_SF XA_MARK_2
+
+/* The vport iterator is valid only after vport are initialized in mlx5_eswitch_init.
+ * Borrowed the idea from xa_for_each_marked() but with support for desired last element.
  */
-#define mlx5_esw_for_all_reps(esw, i, rep)			\
-	for ((i) = MLX5_VPORT_PF;				\
-	     (rep) = &(esw)->offloads.vport_reps[i],		\
-	     (i) < (esw)->total_vports; (i)++)
-
-#define mlx5_esw_for_each_vf_rep(esw, i, rep, nvfs)		\
-	for ((i) = MLX5_VPORT_FIRST_VF;				\
-	     (rep) = &(esw)->offloads.vport_reps[i],		\
-	     (i) <= (nvfs); (i)++)
-
-#define mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvfs)	\
-	for ((i) = (nvfs);					\
-	     (rep) = &(esw)->offloads.vport_reps[i],		\
-	     (i) >= MLX5_VPORT_FIRST_VF; (i)--)
-
-#define mlx5_esw_for_each_vf_vport_num(esw, vport, nvfs)	\
-	for ((vport) = MLX5_VPORT_FIRST_VF; (vport) <= (nvfs); (vport)++)
-
-#define mlx5_esw_for_each_vf_vport_num_reverse(esw, vport, nvfs)	\
-	for ((vport) = (nvfs); (vport) >= MLX5_VPORT_FIRST_VF; (vport)--)
-
-/* Includes host PF (vport 0) if it's not esw manager. */
-#define mlx5_esw_for_each_host_func_rep(esw, i, rep, nvfs)	\
-	for ((i) = (esw)->first_host_vport;			\
-	     (rep) = &(esw)->offloads.vport_reps[i],		\
-	     (i) <= (nvfs); (i)++)
-
-#define mlx5_esw_for_each_host_func_rep_reverse(esw, i, rep, nvfs)	\
-	for ((i) = (nvfs);						\
-	     (rep) = &(esw)->offloads.vport_reps[i],			\
-	     (i) >= (esw)->first_host_vport; (i)--)
-
-#define mlx5_esw_for_each_host_func_vport(esw, vport, nvfs)	\
-	for ((vport) = (esw)->first_host_vport;			\
-	     (vport) <= (nvfs); (vport)++)
-
-#define mlx5_esw_for_each_host_func_vport_reverse(esw, vport, nvfs)	\
-	for ((vport) = (nvfs);						\
-	     (vport) >= (esw)->first_host_vport; (vport)--)
-
-#define mlx5_esw_for_each_sf_rep(esw, i, rep)		\
-	for ((i) = mlx5_esw_sf_start_idx(esw);		\
-	     (rep) = &(esw)->offloads.vport_reps[(i)],	\
-	     (i) < mlx5_esw_sf_end_idx(esw); (i++))
+
+#define mlx5_esw_for_each_vport(esw, index, vport) \
+	xa_for_each(&((esw)->vports), index, vport)
+
+#define mlx5_esw_for_each_entry_marked(xa, index, entry, last, filter)	\
+	for (index = 0, entry = xa_find(xa, &index, last, filter); \
+	     entry; entry = xa_find_after(xa, &index, last, filter))
+
+#define mlx5_esw_for_each_vport_marked(esw, index, vport, last, filter)	\
+	mlx5_esw_for_each_entry_marked(&((esw)->vports), index, vport, last, filter)
+
+#define mlx5_esw_for_each_vf_vport(esw, index, vport, last)	\
+	mlx5_esw_for_each_vport_marked(esw, index, vport, last, MLX5_ESW_VPT_VF)
+
+#define mlx5_esw_for_each_host_func_vport(esw, index, vport, last)	\
+	mlx5_esw_for_each_vport_marked(esw, index, vport, last, MLX5_ESW_VPT_HOST_FN)
 
 struct mlx5_eswitch *mlx5_devlink_eswitch_get(struct devlink *devlink);
 struct mlx5_vport *__must_check
 mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num);
 
-bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num);
+bool mlx5_eswitch_is_vf_vport(struct mlx5_eswitch *esw, u16 vport_num);
+bool mlx5_esw_is_sf_vport(struct mlx5_eswitch *esw, u16 vport_num);
 
 int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index bbb707117296..a1dd66540ba0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -49,6 +49,16 @@
 #include "en_tc.h"
 #include "en/mapping.h"
 
+#define mlx5_esw_for_each_rep(esw, i, rep) \
+	xa_for_each(&((esw)->offloads.vport_reps), i, rep)
+
+#define mlx5_esw_for_each_sf_rep(esw, i, rep) \
+	xa_for_each_marked(&((esw)->offloads.vport_reps), i, rep, MLX5_ESW_VPT_SF)
+
+#define mlx5_esw_for_each_vf_rep(esw, index, rep)	\
+	mlx5_esw_for_each_entry_marked(&((esw)->offloads.vport_reps), index, \
+				       rep, (esw)->esw_funcs.num_vfs, MLX5_ESW_VPT_VF)
+
 /* There are two match-all miss flows, one for unicast dst mac and
  * one for multicast.
  */
@@ -67,10 +77,7 @@ static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = {
 static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw,
 						     u16 vport_num)
 {
-	int idx = mlx5_eswitch_vport_num_to_index(esw, vport_num);
-
-	WARN_ON(idx > esw->total_vports - 1);
-	return &esw->offloads.vport_reps[idx];
+	return xa_load(&esw->offloads.vport_reps, vport_num);
 }
 
 static void
@@ -720,10 +727,11 @@ mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
 static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
 {
 	struct mlx5_eswitch_rep *rep;
-	int i, err = 0;
+	unsigned long i;
+	int err = 0;
 
 	esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none");
-	mlx5_esw_for_each_host_func_rep(esw, i, rep, esw->esw_funcs.num_vfs) {
+	mlx5_esw_for_each_host_func_vport(esw, i, rep, esw->esw_funcs.num_vfs) {
 		if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED)
 			continue;
 
@@ -972,13 +980,13 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule)
 static void mlx5_eswitch_del_send_to_vport_meta_rules(struct mlx5_eswitch *esw)
 {
 	struct mlx5_flow_handle **flows = esw->fdb_table.offloads.send_to_vport_meta_rules;
-	int i = 0, num_vfs = esw->esw_funcs.num_vfs, vport_num;
+	int i = 0, num_vfs = esw->esw_funcs.num_vfs;
 
 	if (!num_vfs || !flows)
 		return;
 
-	mlx5_esw_for_each_vf_vport_num(esw, vport_num, num_vfs)
-		mlx5_del_flow_rules(flows[i++]);
+	for (i = 0; i < num_vfs; i++)
+		mlx5_del_flow_rules(flows[i]);
 
 	kvfree(flows);
 }
@@ -992,6 +1000,8 @@ mlx5_eswitch_add_send_to_vport_meta_rules(struct mlx5_eswitch *esw)
 	struct mlx5_flow_handle *flow_rule;
 	struct mlx5_flow_handle **flows;
 	struct mlx5_flow_spec *spec;
+	struct mlx5_vport *vport;
+	unsigned long i;
 	u16 vport_num;
 
 	num_vfs = esw->esw_funcs.num_vfs;
@@ -1016,7 +1026,8 @@ mlx5_eswitch_add_send_to_vport_meta_rules(struct mlx5_eswitch *esw)
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 
-	mlx5_esw_for_each_vf_vport_num(esw, vport_num, num_vfs) {
+	mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) {
+		vport_num = vport->vport;
 		MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_0,
 			 mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num));
 		dest.vport.num = vport_num;
@@ -1158,12 +1169,14 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
 	struct mlx5_flow_destination dest = {};
 	struct mlx5_flow_act flow_act = {0};
 	struct mlx5_flow_handle **flows;
-	struct mlx5_flow_handle *flow;
-	struct mlx5_flow_spec *spec;
 	/* total vports is the same for both e-switches */
 	int nvports = esw->total_vports;
+	struct mlx5_flow_handle *flow;
+	struct mlx5_flow_spec *spec;
+	struct mlx5_vport *vport;
+	unsigned long i;
 	void *misc;
-	int err, i;
+	int err;
 
 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 	if (!spec)
@@ -1182,6 +1195,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
 			    misc_parameters);
 
 	if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+		vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
 		esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch,
 						   spec, MLX5_VPORT_PF);
 
@@ -1191,10 +1205,11 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
 			err = PTR_ERR(flow);
 			goto add_pf_flow_err;
 		}
-		flows[MLX5_VPORT_PF] = flow;
+		flows[vport->index] = flow;
 	}
 
 	if (mlx5_ecpf_vport_exists(esw->dev)) {
+		vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
 		MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF);
 		flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
 					   spec, &flow_act, &dest, 1);
@@ -1202,13 +1217,13 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
 			err = PTR_ERR(flow);
 			goto add_ecpf_flow_err;
 		}
-		flows[mlx5_eswitch_ecpf_idx(esw)] = flow;
+		flows[vport->index] = flow;
 	}
 
-	mlx5_esw_for_each_vf_vport_num(esw, i, mlx5_core_max_vfs(esw->dev)) {
+	mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) {
 		esw_set_peer_miss_rule_source_port(esw,
 						   peer_dev->priv.eswitch,
-						   spec, i);
+						   spec, vport->vport);
 
 		flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
 					   spec, &flow_act, &dest, 1);
@@ -1216,7 +1231,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
 			err = PTR_ERR(flow);
 			goto add_vf_flow_err;
 		}
-		flows[i] = flow;
+		flows[vport->index] = flow;
 	}
 
 	esw->fdb_table.offloads.peer_miss_rules = flows;
@@ -1225,15 +1240,20 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
 	return 0;
 
 add_vf_flow_err:
-	nvports = --i;
-	mlx5_esw_for_each_vf_vport_num_reverse(esw, i, nvports)
-		mlx5_del_flow_rules(flows[i]);
-
-	if (mlx5_ecpf_vport_exists(esw->dev))
-		mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]);
+	mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) {
+		if (!flows[vport->index])
+			continue;
+		mlx5_del_flow_rules(flows[vport->index]);
+	}
+	if (mlx5_ecpf_vport_exists(esw->dev)) {
+		vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
+		mlx5_del_flow_rules(flows[vport->index]);
+	}
 add_ecpf_flow_err:
-	if (mlx5_core_is_ecpf_esw_manager(esw->dev))
-		mlx5_del_flow_rules(flows[MLX5_VPORT_PF]);
+	if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+		vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
+		mlx5_del_flow_rules(flows[vport->index]);
+	}
 add_pf_flow_err:
 	esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err);
 	kvfree(flows);
@@ -1245,20 +1265,23 @@ alloc_flows_err:
 static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw)
 {
 	struct mlx5_flow_handle **flows;
-	int i;
+	struct mlx5_vport *vport;
+	unsigned long i;
 
 	flows = esw->fdb_table.offloads.peer_miss_rules;
 
-	mlx5_esw_for_each_vf_vport_num_reverse(esw, i,
-					       mlx5_core_max_vfs(esw->dev))
-		mlx5_del_flow_rules(flows[i]);
+	mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev))
+		mlx5_del_flow_rules(flows[vport->index]);
 
-	if (mlx5_ecpf_vport_exists(esw->dev))
-		mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]);
-
-	if (mlx5_core_is_ecpf_esw_manager(esw->dev))
-		mlx5_del_flow_rules(flows[MLX5_VPORT_PF]);
+	if (mlx5_ecpf_vport_exists(esw->dev)) {
+		vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
+		mlx5_del_flow_rules(flows[vport->index]);
+	}
 
+	if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+		vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
+		mlx5_del_flow_rules(flows[vport->index]);
+	}
 	kvfree(flows);
 }
 
@@ -1402,11 +1425,11 @@ static void esw_vport_tbl_put(struct mlx5_eswitch *esw)
 {
 	struct mlx5_vport_tbl_attr attr;
 	struct mlx5_vport *vport;
-	int i;
+	unsigned long i;
 
 	attr.chain = 0;
 	attr.prio = 1;
-	mlx5_esw_for_all_vports(esw, i, vport) {
+	mlx5_esw_for_each_vport(esw, i, vport) {
 		attr.vport = vport->vport;
 		attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
 		mlx5_esw_vporttbl_put(esw, &attr);
@@ -1418,11 +1441,11 @@ static int esw_vport_tbl_get(struct mlx5_eswitch *esw)
 	struct mlx5_vport_tbl_attr attr;
 	struct mlx5_flow_table *fdb;
 	struct mlx5_vport *vport;
-	int i;
+	unsigned long i;
 
 	attr.chain = 0;
 	attr.prio = 1;
-	mlx5_esw_for_all_vports(esw, i, vport) {
+	mlx5_esw_for_each_vport(esw, i, vport) {
 		attr.vport = vport->vport;
 		attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
 		fdb = mlx5_esw_vporttbl_get(esw, &attr);
@@ -1910,12 +1933,12 @@ out:
 	return flow_rule;
 }
 
-
-static int mlx5_eswitch_inline_mode_get(const struct mlx5_eswitch *esw, u8 *mode)
+static int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode)
 {
 	u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
 	struct mlx5_core_dev *dev = esw->dev;
-	int vport;
+	struct mlx5_vport *vport;
+	unsigned long i;
 
 	if (!MLX5_CAP_GEN(dev, vport_group_manager))
 		return -EOPNOTSUPP;
@@ -1936,8 +1959,8 @@ static int mlx5_eswitch_inline_mode_get(const struct mlx5_eswitch *esw, u8 *mode
 
 query_vports:
 	mlx5_query_nic_vport_min_inline(dev, esw->first_host_vport, &prev_mlx5_mode);
-	mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) {
-		mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode);
+	mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
+		mlx5_query_nic_vport_min_inline(dev, vport->vport, &mlx5_mode);
 		if (prev_mlx5_mode != mlx5_mode)
 			return -EINVAL;
 		prev_mlx5_mode = mlx5_mode;
@@ -2080,34 +2103,82 @@ static int esw_offloads_start(struct mlx5_eswitch *esw,
 	return err;
 }
 
-void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw)
+static void mlx5_esw_offloads_rep_mark_set(struct mlx5_eswitch *esw,
+					   struct mlx5_eswitch_rep *rep,
+					   xa_mark_t mark)
 {
-	kfree(esw->offloads.vport_reps);
+	bool mark_set;
+
+	/* Copy the mark from vport to its rep */
+	mark_set = xa_get_mark(&esw->vports, rep->vport, mark);
+	if (mark_set)
+		xa_set_mark(&esw->offloads.vport_reps, rep->vport, mark);
 }
 
-int esw_offloads_init_reps(struct mlx5_eswitch *esw)
+static int mlx5_esw_offloads_rep_init(struct mlx5_eswitch *esw, const struct mlx5_vport *vport)
 {
-	int total_vports = esw->total_vports;
 	struct mlx5_eswitch_rep *rep;
-	int vport_index;
-	u8 rep_type;
+	int rep_type;
+	int err;
 
-	esw->offloads.vport_reps = kcalloc(total_vports,
-					   sizeof(struct mlx5_eswitch_rep),
-					   GFP_KERNEL);
-	if (!esw->offloads.vport_reps)
+	rep = kzalloc(sizeof(*rep), GFP_KERNEL);
+	if (!rep)
 		return -ENOMEM;
 
-	mlx5_esw_for_all_reps(esw, vport_index, rep) {
-		rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport_index);
-		rep->vport_index = vport_index;
+	rep->vport = vport->vport;
+	rep->vport_index = vport->index;
+	for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
+		atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED);
 
-		for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
-			atomic_set(&rep->rep_data[rep_type].state,
-				   REP_UNREGISTERED);
-	}
+	err = xa_insert(&esw->offloads.vport_reps, rep->vport, rep, GFP_KERNEL);
+	if (err)
+		goto insert_err;
+
+	mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_HOST_FN);
+	mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_VF);
+	mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_SF);
+	return 0;
+
+insert_err:
+	kfree(rep);
+	return err;
+}
+
+static void mlx5_esw_offloads_rep_cleanup(struct mlx5_eswitch *esw,
+					  struct mlx5_eswitch_rep *rep)
+{
+	xa_erase(&esw->offloads.vport_reps, rep->vport);
+	kfree(rep);
+}
+
+void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw)
+{
+	struct mlx5_eswitch_rep *rep;
+	unsigned long i;
 
+	mlx5_esw_for_each_rep(esw, i, rep)
+		mlx5_esw_offloads_rep_cleanup(esw, rep);
+	xa_destroy(&esw->offloads.vport_reps);
+}
+
+int esw_offloads_init_reps(struct mlx5_eswitch *esw)
+{
+	struct mlx5_vport *vport;
+	unsigned long i;
+	int err;
+
+	xa_init(&esw->offloads.vport_reps);
+
+	mlx5_esw_for_each_vport(esw, i, vport) {
+		err = mlx5_esw_offloads_rep_init(esw, vport);
+		if (err)
+			goto err;
+	}
 	return 0;
+
+err:
+	esw_offloads_cleanup_reps(esw);
+	return err;
 }
 
 static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw,
@@ -2121,7 +2192,7 @@ static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw,
 static void __unload_reps_sf_vport(struct mlx5_eswitch *esw, u8 rep_type)
 {
 	struct mlx5_eswitch_rep *rep;
-	int i;
+	unsigned long i;
 
 	mlx5_esw_for_each_sf_rep(esw, i, rep)
 		__esw_offloads_unload_rep(esw, rep, rep_type);
@@ -2130,11 +2201,11 @@ static void __unload_reps_sf_vport(struct mlx5_eswitch *esw, u8 rep_type)
 static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type)
 {
 	struct mlx5_eswitch_rep *rep;
-	int i;
+	unsigned long i;
 
 	__unload_reps_sf_vport(esw, rep_type);
 
-	mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, esw->esw_funcs.num_vfs)
+	mlx5_esw_for_each_vf_rep(esw, i, rep)
 		__esw_offloads_unload_rep(esw, rep, rep_type);
 
 	if (mlx5_ecpf_vport_exists(esw->dev)) {
@@ -2421,25 +2492,25 @@ static void esw_offloads_vport_metadata_cleanup(struct mlx5_eswitch *esw,
 static void esw_offloads_metadata_uninit(struct mlx5_eswitch *esw)
 {
 	struct mlx5_vport *vport;
-	int i;
+	unsigned long i;
 
 	if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
 		return;
 
-	mlx5_esw_for_all_vports_reverse(esw, i, vport)
+	mlx5_esw_for_each_vport(esw, i, vport)
 		esw_offloads_vport_metadata_cleanup(esw, vport);
 }
 
 static int esw_offloads_metadata_init(struct mlx5_eswitch *esw)
 {
 	struct mlx5_vport *vport;
+	unsigned long i;
 	int err;
-	int i;
 
 	if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
 		return 0;
 
-	mlx5_esw_for_all_vports(esw, i, vport) {
+	mlx5_esw_for_each_vport(esw, i, vport) {
 		err = esw_offloads_vport_metadata_setup(esw, vport);
 		if (err)
 			goto metadata_err;
@@ -2680,7 +2751,8 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
 {
 	struct mapping_ctx *reg_c0_obj_pool;
 	struct mlx5_vport *vport;
-	int err, i;
+	unsigned long i;
+	int err;
 
 	if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat) &&
 	    MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, decap))
@@ -2926,13 +2998,44 @@ unlock:
 	return err;
 }
 
+static int mlx5_esw_vports_inline_set(struct mlx5_eswitch *esw, u8 mlx5_mode,
+				      struct netlink_ext_ack *extack)
+{
+	struct mlx5_core_dev *dev = esw->dev;
+	struct mlx5_vport *vport;
+	u16 err_vport_num = 0;
+	unsigned long i;
+	int err = 0;
+
+	mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
+		err = mlx5_modify_nic_vport_min_inline(dev, vport->vport, mlx5_mode);
+		if (err) {
+			err_vport_num = vport->vport;
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Failed to set min inline on vport");
+			goto revert_inline_mode;
+		}
+	}
+	return 0;
+
+revert_inline_mode:
+	mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
+		if (vport->vport == err_vport_num)
+			break;
+		mlx5_modify_nic_vport_min_inline(dev,
+						 vport->vport,
+						 esw->offloads.inline_mode);
+	}
+	return err;
+}
+
 int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
 					 struct netlink_ext_ack *extack)
 {
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
-	int err, vport, num_vport;
 	struct mlx5_eswitch *esw;
 	u8 mlx5_mode;
+	int err;
 
 	esw = mlx5_devlink_eswitch_get(devlink);
 	if (IS_ERR(esw))
@@ -2967,25 +3070,14 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
 	if (err)
 		goto out;
 
-	mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) {
-		err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode);
-		if (err) {
-			NL_SET_ERR_MSG_MOD(extack,
-					   "Failed to set min inline on vport");
-			goto revert_inline_mode;
-		}
-	}
+	err = mlx5_esw_vports_inline_set(esw, mlx5_mode, extack);
+	if (err)
+		goto out;
 
 	esw->offloads.inline_mode = mlx5_mode;
 	up_write(&esw->mode_lock);
 	return 0;
 
-revert_inline_mode:
-	num_vport = --vport;
-	mlx5_esw_for_each_host_func_vport_reverse(esw, vport, num_vport)
-		mlx5_modify_nic_vport_min_inline(dev,
-						 vport,
-						 esw->offloads.inline_mode);
 out:
 	up_write(&esw->mode_lock);
 	return err;
@@ -3116,11 +3208,11 @@ void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw,
 {
 	struct mlx5_eswitch_rep_data *rep_data;
 	struct mlx5_eswitch_rep *rep;
-	int i;
+	unsigned long i;
 
 	esw->offloads.rep_ops[rep_type] = ops;
-	mlx5_esw_for_all_reps(esw, i, rep) {
-		if (likely(mlx5_eswitch_vport_has_rep(esw, i))) {
+	mlx5_esw_for_each_rep(esw, i, rep) {
+		if (likely(mlx5_eswitch_vport_has_rep(esw, rep->vport))) {
 			rep->esw = esw;
 			rep_data = &rep->rep_data[rep_type];
 			atomic_set(&rep_data->state, REP_REGISTERED);
@@ -3132,12 +3224,12 @@ EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps);
 void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type)
 {
 	struct mlx5_eswitch_rep *rep;
-	int i;
+	unsigned long i;
 
 	if (esw->mode == MLX5_ESWITCH_OFFLOADS)
 		__unload_reps_all_vport(esw, rep_type);
 
-	mlx5_esw_for_all_reps(esw, i, rep)
+	mlx5_esw_for_each_rep(esw, i, rep)
 		atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED);
 }
 EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps);
@@ -3178,12 +3270,6 @@ struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
 }
 EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
 
-bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num)
-{
-	return vport_num >= MLX5_VPORT_FIRST_VF &&
-	       vport_num <= esw->dev->priv.sriov.max_vfs;
-}
-
 bool mlx5_eswitch_reg_c1_loopback_enabled(const struct mlx5_eswitch *esw)
 {
 	return !!(esw->flags & MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED);
-- 
cgit v1.2.3


From 87bd418ea7515d904a3dc69de2479396f5cbd7a4 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Tue, 2 Mar 2021 14:20:21 +0200
Subject: net/mlx5: E-Switch, Consider SF ports of host PF

Query SF vports count and base id of host PF from the firmware.

Account these ports in the total port calculation whenever it is non
zero.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Reviewed-by: Vu Pham <vuhuong@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 55 +++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h |  1 +
 2 files changed, 56 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 90d8bda87579..570f2280823c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1566,6 +1566,48 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf)
 	up_write(&esw->mode_lock);
 }
 
+static int mlx5_query_hca_cap_host_pf(struct mlx5_core_dev *dev, void *out)
+{
+	u16 opmod = (MLX5_CAP_GENERAL << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01);
+	u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)] = {};
+
+	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+	MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
+	MLX5_SET(query_hca_cap_in, in, function_id, MLX5_VPORT_PF);
+	MLX5_SET(query_hca_cap_in, in, other_function, true);
+	return mlx5_cmd_exec_inout(dev, query_hca_cap, in, out);
+}
+
+int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 *sf_base_id)
+
+{
+	int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+	void *query_ctx;
+	void *hca_caps;
+	int err;
+
+	if (!mlx5_core_is_ecpf(dev)) {
+		*max_sfs = 0;
+		return 0;
+	}
+
+	query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
+	if (!query_ctx)
+		return -ENOMEM;
+
+	err = mlx5_query_hca_cap_host_pf(dev, query_ctx);
+	if (err)
+		goto out_free;
+
+	hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
+	*max_sfs = MLX5_GET(cmd_hca_cap, hca_caps, max_num_sf);
+	*sf_base_id = MLX5_GET(cmd_hca_cap, hca_caps, sf_base_id);
+
+out_free:
+	kfree(query_ctx);
+	return err;
+}
+
 static int mlx5_esw_vport_alloc(struct mlx5_eswitch *esw, struct mlx5_core_dev *dev,
 				int index, u16 vport_num)
 {
@@ -1612,6 +1654,7 @@ static void mlx5_esw_vports_cleanup(struct mlx5_eswitch *esw)
 static int mlx5_esw_vports_init(struct mlx5_eswitch *esw)
 {
 	struct mlx5_core_dev *dev = esw->dev;
+	u16 max_host_pf_sfs;
 	u16 base_sf_num;
 	int idx = 0;
 	int err;
@@ -1642,6 +1685,18 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw)
 		xa_set_mark(&esw->vports, base_sf_num + i, MLX5_ESW_VPT_SF);
 		idx++;
 	}
+
+	err = mlx5_esw_sf_max_hpf_functions(dev, &max_host_pf_sfs, &base_sf_num);
+	if (err)
+		goto err;
+	for (i = 0; i < max_host_pf_sfs; i++) {
+		err = mlx5_esw_vport_alloc(esw, dev, idx, base_sf_num + i);
+		if (err)
+			goto err;
+		xa_set_mark(&esw->vports, base_sf_num + i, MLX5_ESW_VPT_SF);
+		idx++;
+	}
+
 	if (mlx5_ecpf_vport_exists(dev)) {
 		err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_ECPF);
 		if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 7b5f9b8dc7df..0812cee8f603 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -668,6 +668,7 @@ void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num
 int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
 				      u16 vport_num, u32 sfnum);
 void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num);
+int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 *sf_base_id);
 
 int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num);
 void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num);
-- 
cgit v1.2.3


From 1d7979352f9f0d32743528fb72425f4ff29efcb9 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Mon, 8 Mar 2021 11:18:53 +0200
Subject: net/mlx5: SF, Rely on hw table for SF devlink port allocation

Supporting SF allocation is currently checked at two places:
(a) SF devlink port allocator and
(b) SF HW table handler.

Both layers are using HCA CAP to identify it using helper routine
mlx5_sf_supported() and mlx5_sf_max_functions().

Instead, rely on the HW table handler to check if SF is supported
or not.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Vu Pham <vuhuong@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c  | 9 ++-------
 drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c | 7 ++++++-
 drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h     | 1 +
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
index 52226d9b9a6d..5fa261334cd0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
@@ -437,9 +437,6 @@ sf_err:
 
 static void mlx5_sf_table_enable(struct mlx5_sf_table *table)
 {
-	if (!mlx5_sf_max_functions(table->dev))
-		return;
-
 	init_completion(&table->disable_complete);
 	refcount_set(&table->refcount, 1);
 }
@@ -462,9 +459,6 @@ static void mlx5_sf_deactivate_all(struct mlx5_sf_table *table)
 
 static void mlx5_sf_table_disable(struct mlx5_sf_table *table)
 {
-	if (!mlx5_sf_max_functions(table->dev))
-		return;
-
 	if (!refcount_read(&table->refcount))
 		return;
 
@@ -498,7 +492,8 @@ static int mlx5_sf_esw_event(struct notifier_block *nb, unsigned long event, voi
 
 static bool mlx5_sf_table_supported(const struct mlx5_core_dev *dev)
 {
-	return dev->priv.eswitch && MLX5_ESWITCH_MANAGER(dev) && mlx5_sf_supported(dev);
+	return dev->priv.eswitch && MLX5_ESWITCH_MANAGER(dev) &&
+	       mlx5_sf_hw_table_supported(dev);
 }
 
 int mlx5_sf_table_init(struct mlx5_core_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
index ec53c11c8344..9140c81aa03a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
@@ -41,7 +41,7 @@ int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum)
 	int err;
 	int i;
 
-	if (!table->max_local_functions)
+	if (!table || !table->max_local_functions)
 		return -EOPNOTSUPP;
 
 	mutex_lock(&table->table_lock);
@@ -230,3 +230,8 @@ void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev)
 	/* Dealloc SFs whose firmware event has been missed. */
 	mlx5_sf_hw_dealloc_all(table);
 }
+
+bool mlx5_sf_hw_table_supported(const struct mlx5_core_dev *dev)
+{
+	return !!dev->priv.sf_hw_table;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h
index cb02a51d0986..b36be5ecb496 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h
@@ -17,5 +17,6 @@ u16 mlx5_sf_sw_to_hw_id(const struct mlx5_core_dev *dev, u16 sw_id);
 int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum);
 void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u16 id);
 void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u16 id);
+bool mlx5_sf_hw_table_supported(const struct mlx5_core_dev *dev);
 
 #endif
-- 
cgit v1.2.3


From a1ab3e4554b5342b34845df452601ebd5a310d0a Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Wed, 10 Mar 2021 15:35:03 +0200
Subject: devlink: Extend SF port attributes to have external attribute

Extended SF port attributes to have optional external flag similar to
PCI PF and VF port attributes.

External atttibute is required to generate unique phys_port_name when PF number
and SF number are overlapping between two controllers similar to SR-IOV
VFs.

When a SF is for external controller an example view of external SF
port and config sequence.

On eswitch system:
$ devlink dev eswitch set pci/0033:01:00.0 mode switchdev

$ devlink port show
pci/0033:01:00.0/196607: type eth netdev enP51p1s0f0np0 flavour physical port 0 splittable false
pci/0033:01:00.0/131072: type eth netdev eth0 flavour pcipf controller 1 pfnum 0 external true splittable false
  function:
    hw_addr 00:00:00:00:00:00

$ devlink port add pci/0033:01:00.0 flavour pcisf pfnum 0 sfnum 77 controller 1
pci/0033:01:00.0/163840: type eth netdev eth1 flavour pcisf controller 1 pfnum 0 sfnum 77 splittable false
  function:
    hw_addr 00:00:00:00:00:00 state inactive opstate detached

phys_port_name construction:
$ cat /sys/class/net/eth1/phys_port_name
c1pf0sf77

Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Vu Pham <vuhuong@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c |  2 +-
 include/net/devlink.h                                      |  5 ++++-
 net/core/devlink.c                                         | 11 ++++++++++-
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
index 8e825ef35cb7..183f782b940f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
@@ -141,7 +141,7 @@ int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_p
 	mlx5_esw_get_port_parent_id(dev, &ppid);
 	memcpy(dl_port->attrs.switch_id.id, &ppid.id[0], ppid.id_len);
 	dl_port->attrs.switch_id.id_len = ppid.id_len;
-	devlink_port_attrs_pci_sf_set(dl_port, 0, pfnum, sfnum);
+	devlink_port_attrs_pci_sf_set(dl_port, 0, pfnum, sfnum, false);
 	devlink = priv_to_devlink(dev);
 	dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num);
 	err = devlink_port_register(devlink, dl_port, dl_port_index);
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 853420db5d32..7c984cadfec4 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -98,11 +98,13 @@ struct devlink_port_pci_vf_attrs {
  * @controller: Associated controller number
  * @sf: Associated PCI SF for of the PCI PF for this port.
  * @pf: Associated PCI PF number for this port.
+ * @external: when set, indicates if a port is for an external controller
  */
 struct devlink_port_pci_sf_attrs {
 	u32 controller;
 	u32 sf;
 	u16 pf;
+	u8 external:1;
 };
 
 /**
@@ -1508,7 +1510,8 @@ void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 contro
 void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 controller,
 				   u16 pf, u16 vf, bool external);
 void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port,
-				   u32 controller, u16 pf, u32 sf);
+				   u32 controller, u16 pf, u32 sf,
+				   bool external);
 int devlink_sb_register(struct devlink *devlink, unsigned int sb_index,
 			u32 size, u16 ingress_pools_count,
 			u16 egress_pools_count, u16 ingress_tc_count,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 737b61c2976e..4eb969518ee0 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -8599,9 +8599,10 @@ EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_vf_set);
  *	@controller: associated controller number for the devlink port instance
  *	@pf: associated PF for the devlink port instance
  *	@sf: associated SF of a PF for the devlink port instance
+ *	@external: indicates if the port is for an external controller
  */
 void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 controller,
-				   u16 pf, u32 sf)
+				   u16 pf, u32 sf, bool external)
 {
 	struct devlink_port_attrs *attrs = &devlink_port->attrs;
 	int ret;
@@ -8615,6 +8616,7 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro
 	attrs->pci_sf.controller = controller;
 	attrs->pci_sf.pf = pf;
 	attrs->pci_sf.sf = sf;
+	attrs->pci_sf.external = external;
 }
 EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set);
 
@@ -8667,6 +8669,13 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
 			     attrs->pci_vf.pf, attrs->pci_vf.vf);
 		break;
 	case DEVLINK_PORT_FLAVOUR_PCI_SF:
+		if (attrs->pci_sf.external) {
+			n = snprintf(name, len, "c%u", attrs->pci_sf.controller);
+			if (n >= len)
+				return -EINVAL;
+			len -= n;
+			name += n;
+		}
 		n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf,
 			     attrs->pci_sf.sf);
 		break;
-- 
cgit v1.2.3


From 7e6ccbc1878413b2a2dca717a1ae450eb19e1537 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Fri, 5 Mar 2021 08:51:10 +0200
Subject: net/mlx5: SF, Store and use start function id

SF ids in the device are in two different contiguous ranges. One for
the local controller and second for the external host controller.

Prepare code to handle multiple start function id by storing it in the
table.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Vu Pham <vuhuong@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
index 9140c81aa03a..c3126031c2bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
@@ -19,18 +19,23 @@ struct mlx5_sf_hw_table {
 	struct mlx5_core_dev *dev;
 	struct mlx5_sf_hw *sfs;
 	int max_local_functions;
+	u16 start_fn_id;
 	struct mutex table_lock; /* Serializes sf deletion and vhca state change handler. */
 	struct notifier_block vhca_nb;
 };
 
 u16 mlx5_sf_sw_to_hw_id(const struct mlx5_core_dev *dev, u16 sw_id)
 {
-	return sw_id + mlx5_sf_start_function_id(dev);
+	struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
+
+	return table->start_fn_id + sw_id;
 }
 
 static u16 mlx5_sf_hw_to_sw_id(const struct mlx5_core_dev *dev, u16 hw_id)
 {
-	return hw_id - mlx5_sf_start_function_id(dev);
+	struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
+
+	return hw_id - table->start_fn_id;
 }
 
 int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum)
@@ -164,6 +169,7 @@ int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev)
 	table->dev = dev;
 	table->sfs = sfs;
 	table->max_local_functions = max_functions;
+	table->start_fn_id = mlx5_sf_start_function_id(dev);
 	dev->priv.sf_hw_table = table;
 	mlx5_core_dbg(dev, "SF HW table: max sfs = %d\n", max_functions);
 	return 0;
-- 
cgit v1.2.3


From 326c08a02034ada6586b55860e34c0f4695f62ec Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Thu, 11 Mar 2021 13:51:38 +0200
Subject: net/mlx5: SF, Consider own vhca events of SF devices

Vhca events on eswitch manager are received for all the functions on the
NIC, including for SFs of external host PF controllers.

While SF device handler is only interested in SF devices events related
to its own PF.
Hence, validate if the function belongs to self or not.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Vu Pham <vuhuong@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
index 90b524c59f3c..6a0c6f965ad1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
@@ -148,9 +148,19 @@ mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_
 	struct mlx5_sf_dev_table *table = container_of(nb, struct mlx5_sf_dev_table, nb);
 	const struct mlx5_vhca_state_event *event = data;
 	struct mlx5_sf_dev *sf_dev;
+	u16 max_functions;
 	u16 sf_index;
+	u16 base_id;
+
+	max_functions = mlx5_sf_max_functions(table->dev);
+	if (!max_functions)
+		return 0;
+
+	base_id = MLX5_CAP_GEN(table->dev, sf_base_id);
+	if (event->function_id < base_id || event->function_id >= (base_id + max_functions))
+		return 0;
 
-	sf_index = event->function_id - MLX5_CAP_GEN(table->dev, sf_base_id);
+	sf_index = event->function_id - base_id;
 	sf_dev = xa_load(&table->devices, sf_index);
 	switch (event->new_vhca_state) {
 	case MLX5_VHCA_STATE_ALLOCATED:
-- 
cgit v1.2.3


From 01ed9550e8b41e28f27a9ebf515e178fb5e3718b Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Fri, 5 Mar 2021 09:35:21 +0200
Subject: net/mlx5: SF, Use helpers for allocation and free

Use helper routines for SF id and SF table allocation and free
so that subsequent patch can reuse it for multiple SF function
id range.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Vu Pham <vuhuong@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/sf/hw_table.c  | 97 +++++++++++++---------
 1 file changed, 60 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
index c3126031c2bf..172bfad372ec 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
@@ -38,37 +38,46 @@ static u16 mlx5_sf_hw_to_sw_id(const struct mlx5_core_dev *dev, u16 hw_id)
 	return hw_id - table->start_fn_id;
 }
 
-int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum)
+static int mlx5_sf_hw_table_id_alloc(struct mlx5_sf_hw_table *table, u32 usr_sfnum)
 {
-	struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
-	int sw_id = -ENOSPC;
-	u16 hw_fn_id;
-	int err;
 	int i;
 
-	if (!table || !table->max_local_functions)
-		return -EOPNOTSUPP;
-
-	mutex_lock(&table->table_lock);
 	/* Check if sf with same sfnum already exists or not. */
 	for (i = 0; i < table->max_local_functions; i++) {
-		if (table->sfs[i].allocated && table->sfs[i].usr_sfnum == usr_sfnum) {
-			err = -EEXIST;
-			goto exist_err;
-		}
+		if (table->sfs[i].allocated && table->sfs[i].usr_sfnum == usr_sfnum)
+			return -EEXIST;
 	}
-
 	/* Find the free entry and allocate the entry from the array */
 	for (i = 0; i < table->max_local_functions; i++) {
 		if (!table->sfs[i].allocated) {
 			table->sfs[i].usr_sfnum = usr_sfnum;
 			table->sfs[i].allocated = true;
-			sw_id = i;
-			break;
+			return i;
 		}
 	}
-	if (sw_id == -ENOSPC) {
-		err = -ENOSPC;
+	return -ENOSPC;
+}
+
+static void mlx5_sf_hw_table_id_free(struct mlx5_sf_hw_table *table, int id)
+{
+	table->sfs[id].allocated = false;
+	table->sfs[id].pending_delete = false;
+}
+
+int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum)
+{
+	struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
+	u16 hw_fn_id;
+	int sw_id;
+	int err;
+
+	if (!table)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&table->table_lock);
+	sw_id = mlx5_sf_hw_table_id_alloc(table, usr_sfnum);
+	if (sw_id < 0) {
+		err = sw_id;
 		goto exist_err;
 	}
 
@@ -87,21 +96,20 @@ int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum)
 vhca_err:
 	mlx5_cmd_dealloc_sf(dev, hw_fn_id);
 err:
-	table->sfs[i].allocated = false;
+	mlx5_sf_hw_table_id_free(table, sw_id);
 exist_err:
 	mutex_unlock(&table->table_lock);
 	return err;
 }
 
-static void _mlx5_sf_hw_id_free(struct mlx5_core_dev *dev, u16 id)
+static void _mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u16 id)
 {
 	struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
 	u16 hw_fn_id;
 
 	hw_fn_id = mlx5_sf_sw_to_hw_id(dev, id);
 	mlx5_cmd_dealloc_sf(dev, hw_fn_id);
-	table->sfs[id].allocated = false;
-	table->sfs[id].pending_delete = false;
+	mlx5_sf_hw_table_id_free(table, id);
 }
 
 void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u16 id)
@@ -109,7 +117,7 @@ void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u16 id)
 	struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
 
 	mutex_lock(&table->table_lock);
-	_mlx5_sf_hw_id_free(dev, id);
+	_mlx5_sf_hw_table_sf_free(dev, id);
 	mutex_unlock(&table->table_lock);
 }
 
@@ -143,40 +151,55 @@ static void mlx5_sf_hw_dealloc_all(struct mlx5_sf_hw_table *table)
 
 	for (i = 0; i < table->max_local_functions; i++) {
 		if (table->sfs[i].allocated)
-			_mlx5_sf_hw_id_free(table->dev, i);
+			_mlx5_sf_hw_table_sf_free(table->dev, i);
 	}
 }
 
+static int mlx5_sf_hw_table_alloc(struct mlx5_sf_hw_table *table, u16 max_fn, u16 base_id)
+{
+	struct mlx5_sf_hw *sfs;
+
+	sfs = kcalloc(max_fn, sizeof(*sfs), GFP_KERNEL);
+	if (!sfs)
+		return -ENOMEM;
+
+	table->sfs = sfs;
+	table->max_local_functions = max_fn;
+	table->start_fn_id = base_id;
+	return 0;
+}
+
 int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev)
 {
 	struct mlx5_sf_hw_table *table;
-	struct mlx5_sf_hw *sfs;
-	int max_functions;
+	u16 base_id;
+	u16 max_fn;
+	int err;
 
 	if (!mlx5_sf_supported(dev) || !mlx5_vhca_event_supported(dev))
 		return 0;
 
-	max_functions = mlx5_sf_max_functions(dev);
+	max_fn = mlx5_sf_max_functions(dev);
 	table = kzalloc(sizeof(*table), GFP_KERNEL);
 	if (!table)
 		return -ENOMEM;
 
-	sfs = kcalloc(max_functions, sizeof(*sfs), GFP_KERNEL);
-	if (!sfs)
-		goto table_err;
-
 	mutex_init(&table->table_lock);
 	table->dev = dev;
-	table->sfs = sfs;
-	table->max_local_functions = max_functions;
-	table->start_fn_id = mlx5_sf_start_function_id(dev);
 	dev->priv.sf_hw_table = table;
-	mlx5_core_dbg(dev, "SF HW table: max sfs = %d\n", max_functions);
+
+	base_id = mlx5_sf_start_function_id(dev);
+	err = mlx5_sf_hw_table_alloc(table, max_fn, base_id);
+	if (err)
+		goto table_err;
+
+	mlx5_core_dbg(dev, "SF HW table: max sfs = %d\n", max_fn);
 	return 0;
 
 table_err:
+	mutex_destroy(&table->table_lock);
 	kfree(table);
-	return -ENOMEM;
+	return err;
 }
 
 void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev)
@@ -209,7 +232,7 @@ static int mlx5_sf_hw_vhca_event(struct notifier_block *nb, unsigned long opcode
 	 * Hence recycle the sf hardware id for reuse.
 	 */
 	if (sf_hw->allocated && sf_hw->pending_delete)
-		_mlx5_sf_hw_id_free(table->dev, sw_id);
+		_mlx5_sf_hw_table_sf_free(table->dev, sw_id);
 	mutex_unlock(&table->table_lock);
 	return 0;
 }
-- 
cgit v1.2.3


From a3088f87d984b3dddde3b3a3e453cef8033a0bd1 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Fri, 5 Mar 2021 10:06:06 +0200
Subject: net/mlx5: SF, Split mlx5_sf_hw_table into two parts

Device has SF ids in two different contiguous ranges. One for the local
controller and second for the external controller's PF.

Each such range has its own maximum number of functions and base id.
To allocate SF from either of the range, prepare code to split into
range specific fields into its own structure.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Vu Pham <vuhuong@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/sf/hw_table.c  | 87 ++++++++++++++--------
 1 file changed, 58 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
index 172bfad372ec..b5eab48bbe08 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
@@ -15,43 +15,55 @@ struct mlx5_sf_hw {
 	u8 pending_delete: 1;
 };
 
-struct mlx5_sf_hw_table {
-	struct mlx5_core_dev *dev;
+struct mlx5_sf_hwc_table {
 	struct mlx5_sf_hw *sfs;
-	int max_local_functions;
+	int max_fn;
 	u16 start_fn_id;
+};
+
+enum mlx5_sf_hwc_index {
+	MLX5_SF_HWC_LOCAL,
+	MLX5_SF_HWC_MAX,
+};
+
+struct mlx5_sf_hw_table {
+	struct mlx5_core_dev *dev;
 	struct mutex table_lock; /* Serializes sf deletion and vhca state change handler. */
 	struct notifier_block vhca_nb;
+	struct mlx5_sf_hwc_table hwc[MLX5_SF_HWC_MAX];
 };
 
 u16 mlx5_sf_sw_to_hw_id(const struct mlx5_core_dev *dev, u16 sw_id)
 {
-	struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
+	struct mlx5_sf_hwc_table *hwc = &dev->priv.sf_hw_table->hwc[MLX5_SF_HWC_LOCAL];
 
-	return table->start_fn_id + sw_id;
+	return hwc->start_fn_id + sw_id;
 }
 
 static u16 mlx5_sf_hw_to_sw_id(const struct mlx5_core_dev *dev, u16 hw_id)
 {
-	struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
+	struct mlx5_sf_hwc_table *hwc = &dev->priv.sf_hw_table->hwc[MLX5_SF_HWC_LOCAL];
 
-	return hw_id - table->start_fn_id;
+	return hw_id - hwc->start_fn_id;
 }
 
 static int mlx5_sf_hw_table_id_alloc(struct mlx5_sf_hw_table *table, u32 usr_sfnum)
 {
+	struct mlx5_sf_hwc_table *hwc;
 	int i;
 
+	hwc = &table->hwc[MLX5_SF_HWC_LOCAL];
+
 	/* Check if sf with same sfnum already exists or not. */
-	for (i = 0; i < table->max_local_functions; i++) {
-		if (table->sfs[i].allocated && table->sfs[i].usr_sfnum == usr_sfnum)
+	for (i = 0; i < hwc->max_fn; i++) {
+		if (hwc->sfs[i].allocated && hwc->sfs[i].usr_sfnum == usr_sfnum)
 			return -EEXIST;
 	}
 	/* Find the free entry and allocate the entry from the array */
-	for (i = 0; i < table->max_local_functions; i++) {
-		if (!table->sfs[i].allocated) {
-			table->sfs[i].usr_sfnum = usr_sfnum;
-			table->sfs[i].allocated = true;
+	for (i = 0; i < hwc->max_fn; i++) {
+		if (!hwc->sfs[i].allocated) {
+			hwc->sfs[i].usr_sfnum = usr_sfnum;
+			hwc->sfs[i].allocated = true;
 			return i;
 		}
 	}
@@ -60,8 +72,10 @@ static int mlx5_sf_hw_table_id_alloc(struct mlx5_sf_hw_table *table, u32 usr_sfn
 
 static void mlx5_sf_hw_table_id_free(struct mlx5_sf_hw_table *table, int id)
 {
-	table->sfs[id].allocated = false;
-	table->sfs[id].pending_delete = false;
+	struct mlx5_sf_hwc_table *hwc = &table->hwc[MLX5_SF_HWC_LOCAL];
+
+	hwc->sfs[id].allocated = false;
+	hwc->sfs[id].pending_delete = false;
 }
 
 int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum)
@@ -125,11 +139,13 @@ void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u16 id)
 {
 	struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
 	u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {};
+	struct mlx5_sf_hwc_table *hwc;
 	u16 hw_fn_id;
 	u8 state;
 	int err;
 
 	hw_fn_id = mlx5_sf_sw_to_hw_id(dev, id);
+	hwc = &table->hwc[MLX5_SF_HWC_LOCAL];
 	mutex_lock(&table->table_lock);
 	err = mlx5_cmd_query_vhca_state(dev, hw_fn_id, out, sizeof(out));
 	if (err)
@@ -137,25 +153,31 @@ void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u16 id)
 	state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state);
 	if (state == MLX5_VHCA_STATE_ALLOCATED) {
 		mlx5_cmd_dealloc_sf(dev, hw_fn_id);
-		table->sfs[id].allocated = false;
+		hwc->sfs[id].allocated = false;
 	} else {
-		table->sfs[id].pending_delete = true;
+		hwc->sfs[id].pending_delete = true;
 	}
 err:
 	mutex_unlock(&table->table_lock);
 }
 
-static void mlx5_sf_hw_dealloc_all(struct mlx5_sf_hw_table *table)
+static void mlx5_sf_hw_table_hwc_dealloc_all(struct mlx5_core_dev *dev,
+					     struct mlx5_sf_hwc_table *hwc)
 {
 	int i;
 
-	for (i = 0; i < table->max_local_functions; i++) {
-		if (table->sfs[i].allocated)
-			_mlx5_sf_hw_table_sf_free(table->dev, i);
+	for (i = 0; i < hwc->max_fn; i++) {
+		if (hwc->sfs[i].allocated)
+			_mlx5_sf_hw_table_sf_free(dev, i);
 	}
 }
 
-static int mlx5_sf_hw_table_alloc(struct mlx5_sf_hw_table *table, u16 max_fn, u16 base_id)
+static void mlx5_sf_hw_table_dealloc_all(struct mlx5_sf_hw_table *table)
+{
+	mlx5_sf_hw_table_hwc_dealloc_all(table->dev, &table->hwc[MLX5_SF_HWC_LOCAL]);
+}
+
+static int mlx5_sf_hw_table_hwc_init(struct mlx5_sf_hwc_table *hwc, u16 max_fn, u16 base_id)
 {
 	struct mlx5_sf_hw *sfs;
 
@@ -163,12 +185,17 @@ static int mlx5_sf_hw_table_alloc(struct mlx5_sf_hw_table *table, u16 max_fn, u1
 	if (!sfs)
 		return -ENOMEM;
 
-	table->sfs = sfs;
-	table->max_local_functions = max_fn;
-	table->start_fn_id = base_id;
+	hwc->sfs = sfs;
+	hwc->max_fn = max_fn;
+	hwc->start_fn_id = base_id;
 	return 0;
 }
 
+static void mlx5_sf_hw_table_hwc_cleanup(struct mlx5_sf_hwc_table *hwc)
+{
+	kfree(hwc->sfs);
+}
+
 int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev)
 {
 	struct mlx5_sf_hw_table *table;
@@ -189,7 +216,7 @@ int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev)
 	dev->priv.sf_hw_table = table;
 
 	base_id = mlx5_sf_start_function_id(dev);
-	err = mlx5_sf_hw_table_alloc(table, max_fn, base_id);
+	err = mlx5_sf_hw_table_hwc_init(&table->hwc[MLX5_SF_HWC_LOCAL], max_fn, base_id);
 	if (err)
 		goto table_err;
 
@@ -210,7 +237,7 @@ void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev)
 		return;
 
 	mutex_destroy(&table->table_lock);
-	kfree(table->sfs);
+	mlx5_sf_hw_table_hwc_cleanup(&table->hwc[MLX5_SF_HWC_LOCAL]);
 	kfree(table);
 }
 
@@ -218,14 +245,16 @@ static int mlx5_sf_hw_vhca_event(struct notifier_block *nb, unsigned long opcode
 {
 	struct mlx5_sf_hw_table *table = container_of(nb, struct mlx5_sf_hw_table, vhca_nb);
 	const struct mlx5_vhca_state_event *event = data;
+	struct mlx5_sf_hwc_table *hwc;
 	struct mlx5_sf_hw *sf_hw;
 	u16 sw_id;
 
 	if (event->new_vhca_state != MLX5_VHCA_STATE_ALLOCATED)
 		return 0;
 
+	hwc = &table->hwc[MLX5_SF_HWC_LOCAL];
 	sw_id = mlx5_sf_hw_to_sw_id(table->dev, event->function_id);
-	sf_hw = &table->sfs[sw_id];
+	sf_hw = &hwc->sfs[sw_id];
 
 	mutex_lock(&table->table_lock);
 	/* SF driver notified through firmware that SF is finally detached.
@@ -257,7 +286,7 @@ void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev)
 
 	mlx5_vhca_event_notifier_unregister(dev, &table->vhca_nb);
 	/* Dealloc SFs whose firmware event has been missed. */
-	mlx5_sf_hw_dealloc_all(table);
+	mlx5_sf_hw_table_dealloc_all(table);
 }
 
 bool mlx5_sf_hw_table_supported(const struct mlx5_core_dev *dev)
-- 
cgit v1.2.3


From f1b9acd3a5e800bb68e7b8abc5b56d01faf68bbc Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Mon, 8 Mar 2021 13:19:53 +0200
Subject: net/mlx5: SF, Extend SF table for additional SF id range

Extended the SF table to cover additioanl SF id range of external
controller.

A user optionallly provides the external controller number when user
wants to create SF on the external controller.

An example on eswitch system:
$ devlink dev eswitch set pci/0033:01:00.0 mode switchdev

$ devlink port show
pci/0033:01:00.0/196607: type eth netdev enP51p1s0f0np0 flavour physical port 0 splittable false
pci/0033:01:00.0/131072: type eth netdev eth0 flavour pcipf controller 1 pfnum 0 external true splittable false
  function:
    hw_addr 00:00:00:00:00:00

$ devlink port add pci/0033:01:00.0 flavour pcisf pfnum 0 sfnum 77 controller 1
pci/0033:01:00.0/163840: type eth netdev eth1 flavour pcisf controller 1 pfnum 0 sfnum 77 external true splittable false
  function:
    hw_addr 00:00:00:00:00:00 state inactive opstate detached

Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/esw/devlink_port.c |   4 +-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |   6 +-
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c |  17 ++-
 .../net/ethernet/mellanox/mlx5/core/sf/devlink.c   |  29 +++--
 .../net/ethernet/mellanox/mlx5/core/sf/hw_table.c  | 129 +++++++++++++++------
 drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h  |   8 +-
 6 files changed, 140 insertions(+), 53 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
index 183f782b940f..1703384eca95 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
@@ -123,7 +123,7 @@ struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u1
 }
 
 int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
-				      u16 vport_num, u32 sfnum)
+				      u16 vport_num, u32 controller, u32 sfnum)
 {
 	struct mlx5_core_dev *dev = esw->dev;
 	struct netdev_phys_item_id ppid = {};
@@ -141,7 +141,7 @@ int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_p
 	mlx5_esw_get_port_parent_id(dev, &ppid);
 	memcpy(dl_port->attrs.switch_id.id, &ppid.id[0], ppid.id_len);
 	dl_port->attrs.switch_id.id_len = ppid.id_len;
-	devlink_port_attrs_pci_sf_set(dl_port, 0, pfnum, sfnum, false);
+	devlink_port_attrs_pci_sf_set(dl_port, controller, pfnum, sfnum, !!controller);
 	devlink = priv_to_devlink(dev);
 	dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num);
 	err = devlink_port_register(devlink, dl_port, dl_port_index);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 0812cee8f603..64ccb2bc0b58 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -662,11 +662,11 @@ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vpo
 struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num);
 
 int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
-				      u16 vport_num, u32 sfnum);
+				      u16 vport_num, u32 controller, u32 sfnum);
 void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num);
 
 int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
-				      u16 vport_num, u32 sfnum);
+				      u16 vport_num, u32 controller, u32 sfnum);
 void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num);
 int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 *sf_base_id);
 
@@ -695,6 +695,8 @@ void mlx5_esw_unlock(struct mlx5_eswitch *esw);
 
 void esw_vport_change_handle_locked(struct mlx5_vport *vport);
 
+bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller);
+
 #else  /* CONFIG_MLX5_ESWITCH */
 /* eswitch API stubs */
 static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index a1dd66540ba0..db1e74280e57 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -2747,6 +2747,19 @@ static int mlx5_esw_host_number_init(struct mlx5_eswitch *esw)
 	return 0;
 }
 
+bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller)
+{
+	/* Local controller is always valid */
+	if (controller == 0)
+		return true;
+
+	if (!mlx5_core_is_ecpf_esw_manager(esw->dev))
+		return false;
+
+	/* External host number starts with zero in device */
+	return (controller == esw->offloads.host_number + 1);
+}
+
 int esw_offloads_enable(struct mlx5_eswitch *esw)
 {
 	struct mapping_ctx *reg_c0_obj_pool;
@@ -3295,7 +3308,7 @@ u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw,
 EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match);
 
 int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
-				      u16 vport_num, u32 sfnum)
+				      u16 vport_num, u32 controller, u32 sfnum)
 {
 	int err;
 
@@ -3303,7 +3316,7 @@ int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_p
 	if (err)
 		return err;
 
-	err = mlx5_esw_devlink_sf_port_register(esw, dl_port, vport_num, sfnum);
+	err = mlx5_esw_devlink_sf_port_register(esw, dl_port, vport_num, controller, sfnum);
 	if (err)
 		goto devlink_err;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
index 5fa261334cd0..a8e73c9ed1ea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
@@ -12,6 +12,7 @@
 struct mlx5_sf {
 	struct devlink_port dl_port;
 	unsigned int port_index;
+	u32 controller;
 	u16 id;
 	u16 hw_fn_id;
 	u16 hw_state;
@@ -58,7 +59,8 @@ static void mlx5_sf_id_erase(struct mlx5_sf_table *table, struct mlx5_sf *sf)
 }
 
 static struct mlx5_sf *
-mlx5_sf_alloc(struct mlx5_sf_table *table, u32 sfnum, struct netlink_ext_ack *extack)
+mlx5_sf_alloc(struct mlx5_sf_table *table, struct mlx5_eswitch *esw,
+	      u32 controller, u32 sfnum, struct netlink_ext_ack *extack)
 {
 	unsigned int dl_port_index;
 	struct mlx5_sf *sf;
@@ -66,7 +68,12 @@ mlx5_sf_alloc(struct mlx5_sf_table *table, u32 sfnum, struct netlink_ext_ack *ex
 	int id_err;
 	int err;
 
-	id_err = mlx5_sf_hw_table_sf_alloc(table->dev, sfnum);
+	if (!mlx5_esw_offloads_controller_valid(esw, controller)) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid controller number");
+		return ERR_PTR(-EINVAL);
+	}
+
+	id_err = mlx5_sf_hw_table_sf_alloc(table->dev, controller, sfnum);
 	if (id_err < 0) {
 		err = id_err;
 		goto id_err;
@@ -78,11 +85,12 @@ mlx5_sf_alloc(struct mlx5_sf_table *table, u32 sfnum, struct netlink_ext_ack *ex
 		goto alloc_err;
 	}
 	sf->id = id_err;
-	hw_fn_id = mlx5_sf_sw_to_hw_id(table->dev, sf->id);
+	hw_fn_id = mlx5_sf_sw_to_hw_id(table->dev, controller, sf->id);
 	dl_port_index = mlx5_esw_vport_to_devlink_port_index(table->dev, hw_fn_id);
 	sf->port_index = dl_port_index;
 	sf->hw_fn_id = hw_fn_id;
 	sf->hw_state = MLX5_VHCA_STATE_ALLOCATED;
+	sf->controller = controller;
 
 	err = mlx5_sf_id_insert(table, sf);
 	if (err)
@@ -93,7 +101,7 @@ mlx5_sf_alloc(struct mlx5_sf_table *table, u32 sfnum, struct netlink_ext_ack *ex
 insert_err:
 	kfree(sf);
 alloc_err:
-	mlx5_sf_hw_table_sf_free(table->dev, id_err);
+	mlx5_sf_hw_table_sf_free(table->dev, controller, id_err);
 id_err:
 	if (err == -EEXIST)
 		NL_SET_ERR_MSG_MOD(extack, "SF already exist. Choose different sfnum");
@@ -103,7 +111,7 @@ id_err:
 static void mlx5_sf_free(struct mlx5_sf_table *table, struct mlx5_sf *sf)
 {
 	mlx5_sf_id_erase(table, sf);
-	mlx5_sf_hw_table_sf_free(table->dev, sf->id);
+	mlx5_sf_hw_table_sf_free(table->dev, sf->controller, sf->id);
 	kfree(sf);
 }
 
@@ -272,12 +280,12 @@ static int mlx5_sf_add(struct mlx5_core_dev *dev, struct mlx5_sf_table *table,
 	struct mlx5_sf *sf;
 	int err;
 
-	sf = mlx5_sf_alloc(table, new_attr->sfnum, extack);
+	sf = mlx5_sf_alloc(table, esw, new_attr->controller, new_attr->sfnum, extack);
 	if (IS_ERR(sf))
 		return PTR_ERR(sf);
 
 	err = mlx5_esw_offloads_sf_vport_enable(esw, &sf->dl_port, sf->hw_fn_id,
-						new_attr->sfnum);
+						new_attr->controller, new_attr->sfnum);
 	if (err)
 		goto esw_err;
 	*new_port_index = sf->port_index;
@@ -306,7 +314,8 @@ mlx5_sf_new_check_attr(struct mlx5_core_dev *dev, const struct devlink_port_new_
 				   "User must provide unique sfnum. Driver does not support auto assignment");
 		return -EOPNOTSUPP;
 	}
-	if (new_attr->controller_valid && new_attr->controller) {
+	if (new_attr->controller_valid && new_attr->controller &&
+	    !mlx5_core_is_ecpf_esw_manager(dev)) {
 		NL_SET_ERR_MSG_MOD(extack, "External controller is unsupported");
 		return -EOPNOTSUPP;
 	}
@@ -352,10 +361,10 @@ static void mlx5_sf_dealloc(struct mlx5_sf_table *table, struct mlx5_sf *sf)
 		 * firmware gives confirmation that it is detached by the driver.
 		 */
 		mlx5_cmd_sf_disable_hca(table->dev, sf->hw_fn_id);
-		mlx5_sf_hw_table_sf_deferred_free(table->dev, sf->id);
+		mlx5_sf_hw_table_sf_deferred_free(table->dev, sf->controller, sf->id);
 		kfree(sf);
 	} else {
-		mlx5_sf_hw_table_sf_deferred_free(table->dev, sf->id);
+		mlx5_sf_hw_table_sf_deferred_free(table->dev, sf->controller, sf->id);
 		kfree(sf);
 	}
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
index b5eab48bbe08..ef5f892aafad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
@@ -8,6 +8,7 @@
 #include "ecpf.h"
 #include "vhca_event.h"
 #include "mlx5_core.h"
+#include "eswitch.h"
 
 struct mlx5_sf_hw {
 	u32 usr_sfnum;
@@ -23,6 +24,7 @@ struct mlx5_sf_hwc_table {
 
 enum mlx5_sf_hwc_index {
 	MLX5_SF_HWC_LOCAL,
+	MLX5_SF_HWC_EXTERNAL,
 	MLX5_SF_HWC_MAX,
 };
 
@@ -33,26 +35,50 @@ struct mlx5_sf_hw_table {
 	struct mlx5_sf_hwc_table hwc[MLX5_SF_HWC_MAX];
 };
 
-u16 mlx5_sf_sw_to_hw_id(const struct mlx5_core_dev *dev, u16 sw_id)
+static struct mlx5_sf_hwc_table *
+mlx5_sf_controller_to_hwc(struct mlx5_core_dev *dev, u32 controller)
 {
-	struct mlx5_sf_hwc_table *hwc = &dev->priv.sf_hw_table->hwc[MLX5_SF_HWC_LOCAL];
+	int idx = !!controller;
 
-	return hwc->start_fn_id + sw_id;
+	return &dev->priv.sf_hw_table->hwc[idx];
 }
 
-static u16 mlx5_sf_hw_to_sw_id(const struct mlx5_core_dev *dev, u16 hw_id)
+u16 mlx5_sf_sw_to_hw_id(struct mlx5_core_dev *dev, u32 controller, u16 sw_id)
 {
-	struct mlx5_sf_hwc_table *hwc = &dev->priv.sf_hw_table->hwc[MLX5_SF_HWC_LOCAL];
+	struct mlx5_sf_hwc_table *hwc;
+
+	hwc = mlx5_sf_controller_to_hwc(dev, controller);
+	return hwc->start_fn_id + sw_id;
+}
 
+static u16 mlx5_sf_hw_to_sw_id(struct mlx5_sf_hwc_table *hwc, u16 hw_id)
+{
 	return hw_id - hwc->start_fn_id;
 }
 
-static int mlx5_sf_hw_table_id_alloc(struct mlx5_sf_hw_table *table, u32 usr_sfnum)
+static struct mlx5_sf_hwc_table *
+mlx5_sf_table_fn_to_hwc(struct mlx5_sf_hw_table *table, u16 fn_id)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(table->hwc); i++) {
+		if (table->hwc[i].max_fn &&
+		    fn_id >= table->hwc[i].start_fn_id &&
+		    fn_id < (table->hwc[i].start_fn_id + table->hwc[i].max_fn))
+			return &table->hwc[i];
+	}
+	return NULL;
+}
+
+static int mlx5_sf_hw_table_id_alloc(struct mlx5_sf_hw_table *table, u32 controller,
+				     u32 usr_sfnum)
 {
 	struct mlx5_sf_hwc_table *hwc;
 	int i;
 
-	hwc = &table->hwc[MLX5_SF_HWC_LOCAL];
+	hwc = mlx5_sf_controller_to_hwc(table->dev, controller);
+	if (!hwc->sfs)
+		return -ENOSPC;
 
 	/* Check if sf with same sfnum already exists or not. */
 	for (i = 0; i < hwc->max_fn; i++) {
@@ -70,15 +96,16 @@ static int mlx5_sf_hw_table_id_alloc(struct mlx5_sf_hw_table *table, u32 usr_sfn
 	return -ENOSPC;
 }
 
-static void mlx5_sf_hw_table_id_free(struct mlx5_sf_hw_table *table, int id)
+static void mlx5_sf_hw_table_id_free(struct mlx5_sf_hw_table *table, u32 controller, int id)
 {
-	struct mlx5_sf_hwc_table *hwc = &table->hwc[MLX5_SF_HWC_LOCAL];
+	struct mlx5_sf_hwc_table *hwc;
 
+	hwc = mlx5_sf_controller_to_hwc(table->dev, controller);
 	hwc->sfs[id].allocated = false;
 	hwc->sfs[id].pending_delete = false;
 }
 
-int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum)
+int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 controller, u32 usr_sfnum)
 {
 	struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
 	u16 hw_fn_id;
@@ -89,13 +116,13 @@ int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum)
 		return -EOPNOTSUPP;
 
 	mutex_lock(&table->table_lock);
-	sw_id = mlx5_sf_hw_table_id_alloc(table, usr_sfnum);
+	sw_id = mlx5_sf_hw_table_id_alloc(table, controller, usr_sfnum);
 	if (sw_id < 0) {
 		err = sw_id;
 		goto exist_err;
 	}
 
-	hw_fn_id = mlx5_sf_sw_to_hw_id(dev, sw_id);
+	hw_fn_id = mlx5_sf_sw_to_hw_id(dev, controller, sw_id);
 	err = mlx5_cmd_alloc_sf(dev, hw_fn_id);
 	if (err)
 		goto err;
@@ -104,38 +131,48 @@ int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum)
 	if (err)
 		goto vhca_err;
 
+	if (controller) {
+		/* If this SF is for external controller, SF manager
+		 * needs to arm firmware to receive the events.
+		 */
+		err = mlx5_vhca_event_arm(dev, hw_fn_id);
+		if (err)
+			goto vhca_err;
+	}
+
 	mutex_unlock(&table->table_lock);
 	return sw_id;
 
 vhca_err:
 	mlx5_cmd_dealloc_sf(dev, hw_fn_id);
 err:
-	mlx5_sf_hw_table_id_free(table, sw_id);
+	mlx5_sf_hw_table_id_free(table, controller, sw_id);
 exist_err:
 	mutex_unlock(&table->table_lock);
 	return err;
 }
 
-static void _mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u16 id)
+void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u32 controller, u16 id)
 {
 	struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
 	u16 hw_fn_id;
 
-	hw_fn_id = mlx5_sf_sw_to_hw_id(dev, id);
+	mutex_lock(&table->table_lock);
+	hw_fn_id = mlx5_sf_sw_to_hw_id(dev, controller, id);
 	mlx5_cmd_dealloc_sf(dev, hw_fn_id);
-	mlx5_sf_hw_table_id_free(table, id);
+	mlx5_sf_hw_table_id_free(table, controller, id);
+	mutex_unlock(&table->table_lock);
 }
 
-void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u16 id)
+static void mlx5_sf_hw_table_hwc_sf_free(struct mlx5_core_dev *dev,
+					 struct mlx5_sf_hwc_table *hwc, int idx)
 {
-	struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
-
-	mutex_lock(&table->table_lock);
-	_mlx5_sf_hw_table_sf_free(dev, id);
-	mutex_unlock(&table->table_lock);
+	mlx5_cmd_dealloc_sf(dev, hwc->start_fn_id + idx);
+	hwc->sfs[idx].allocated = false;
+	hwc->sfs[idx].pending_delete = false;
 }
 
-void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u16 id)
+void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u32 controller, u16 id)
 {
 	struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table;
 	u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {};
@@ -144,8 +181,8 @@ void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u16 id)
 	u8 state;
 	int err;
 
-	hw_fn_id = mlx5_sf_sw_to_hw_id(dev, id);
-	hwc = &table->hwc[MLX5_SF_HWC_LOCAL];
+	hw_fn_id = mlx5_sf_sw_to_hw_id(dev, controller, id);
+	hwc = mlx5_sf_controller_to_hwc(dev, controller);
 	mutex_lock(&table->table_lock);
 	err = mlx5_cmd_query_vhca_state(dev, hw_fn_id, out, sizeof(out));
 	if (err)
@@ -168,12 +205,13 @@ static void mlx5_sf_hw_table_hwc_dealloc_all(struct mlx5_core_dev *dev,
 
 	for (i = 0; i < hwc->max_fn; i++) {
 		if (hwc->sfs[i].allocated)
-			_mlx5_sf_hw_table_sf_free(dev, i);
+			mlx5_sf_hw_table_hwc_sf_free(dev, hwc, i);
 	}
 }
 
 static void mlx5_sf_hw_table_dealloc_all(struct mlx5_sf_hw_table *table)
 {
+	mlx5_sf_hw_table_hwc_dealloc_all(table->dev, &table->hwc[MLX5_SF_HWC_EXTERNAL]);
 	mlx5_sf_hw_table_hwc_dealloc_all(table->dev, &table->hwc[MLX5_SF_HWC_LOCAL]);
 }
 
@@ -181,6 +219,9 @@ static int mlx5_sf_hw_table_hwc_init(struct mlx5_sf_hwc_table *hwc, u16 max_fn,
 {
 	struct mlx5_sf_hw *sfs;
 
+	if (!max_fn)
+		return 0;
+
 	sfs = kcalloc(max_fn, sizeof(*sfs), GFP_KERNEL);
 	if (!sfs)
 		return -ENOMEM;
@@ -199,14 +240,25 @@ static void mlx5_sf_hw_table_hwc_cleanup(struct mlx5_sf_hwc_table *hwc)
 int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev)
 {
 	struct mlx5_sf_hw_table *table;
+	u16 max_ext_fn = 0;
+	u16 ext_base_id;
+	u16 max_fn = 0;
 	u16 base_id;
-	u16 max_fn;
 	int err;
 
-	if (!mlx5_sf_supported(dev) || !mlx5_vhca_event_supported(dev))
+	if (!mlx5_vhca_event_supported(dev))
+		return 0;
+
+	if (mlx5_sf_supported(dev))
+		max_fn = mlx5_sf_max_functions(dev);
+
+	err = mlx5_esw_sf_max_hpf_functions(dev, &max_ext_fn, &ext_base_id);
+	if (err)
+		return err;
+
+	if (!max_fn && !max_ext_fn)
 		return 0;
 
-	max_fn = mlx5_sf_max_functions(dev);
 	table = kzalloc(sizeof(*table), GFP_KERNEL);
 	if (!table)
 		return -ENOMEM;
@@ -220,9 +272,16 @@ int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev)
 	if (err)
 		goto table_err;
 
-	mlx5_core_dbg(dev, "SF HW table: max sfs = %d\n", max_fn);
+	err = mlx5_sf_hw_table_hwc_init(&table->hwc[MLX5_SF_HWC_EXTERNAL],
+					max_ext_fn, ext_base_id);
+	if (err)
+		goto ext_err;
+
+	mlx5_core_dbg(dev, "SF HW table: max sfs = %d, ext sfs = %d\n", max_fn, max_ext_fn);
 	return 0;
 
+ext_err:
+	mlx5_sf_hw_table_hwc_cleanup(&table->hwc[MLX5_SF_HWC_LOCAL]);
 table_err:
 	mutex_destroy(&table->table_lock);
 	kfree(table);
@@ -237,6 +296,7 @@ void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev)
 		return;
 
 	mutex_destroy(&table->table_lock);
+	mlx5_sf_hw_table_hwc_cleanup(&table->hwc[MLX5_SF_HWC_EXTERNAL]);
 	mlx5_sf_hw_table_hwc_cleanup(&table->hwc[MLX5_SF_HWC_LOCAL]);
 	kfree(table);
 }
@@ -252,8 +312,11 @@ static int mlx5_sf_hw_vhca_event(struct notifier_block *nb, unsigned long opcode
 	if (event->new_vhca_state != MLX5_VHCA_STATE_ALLOCATED)
 		return 0;
 
-	hwc = &table->hwc[MLX5_SF_HWC_LOCAL];
-	sw_id = mlx5_sf_hw_to_sw_id(table->dev, event->function_id);
+	hwc = mlx5_sf_table_fn_to_hwc(table, event->function_id);
+	if (!hwc)
+		return 0;
+
+	sw_id = mlx5_sf_hw_to_sw_id(hwc, event->function_id);
 	sf_hw = &hwc->sfs[sw_id];
 
 	mutex_lock(&table->table_lock);
@@ -261,7 +324,7 @@ static int mlx5_sf_hw_vhca_event(struct notifier_block *nb, unsigned long opcode
 	 * Hence recycle the sf hardware id for reuse.
 	 */
 	if (sf_hw->allocated && sf_hw->pending_delete)
-		_mlx5_sf_hw_table_sf_free(table->dev, sw_id);
+		mlx5_sf_hw_table_hwc_sf_free(table->dev, hwc, sw_id);
 	mutex_unlock(&table->table_lock);
 	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h
index b36be5ecb496..7114f3fc335f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h
@@ -12,11 +12,11 @@ int mlx5_cmd_dealloc_sf(struct mlx5_core_dev *dev, u16 function_id);
 int mlx5_cmd_sf_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
 int mlx5_cmd_sf_disable_hca(struct mlx5_core_dev *dev, u16 func_id);
 
-u16 mlx5_sf_sw_to_hw_id(const struct mlx5_core_dev *dev, u16 sw_id);
+u16 mlx5_sf_sw_to_hw_id(struct mlx5_core_dev *dev, u32 controller, u16 sw_id);
 
-int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum);
-void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u16 id);
-void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u16 id);
+int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 controller, u32 usr_sfnum);
+void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u32 controller, u16 id);
+void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u32 controller, u16 id);
 bool mlx5_sf_hw_table_supported(const struct mlx5_core_dev *dev);
 
 #endif
-- 
cgit v1.2.3


From 2ce4fd5a0039b805a6716779e8669dd69a20ad60 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 15 Apr 2021 09:47:23 +0100
Subject: can: etas_es58x: Fix missing null check on netdev pointer

There is an assignment to *netdev that is that can potentially be null
but the null check is checking netdev and not *netdev as intended. Fix
this by adding in the missing * operator.

Fixes: 8537257874e9 ("can: etas_es58x: add core support for ETAS ES58X CAN USB interfaces")
Link: https://lore.kernel.org/r/20210415084723.1807935-1-colin.king@canonical.com
Addresses-Coverity: ("Dereference before null check")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/etas_es58x/es58x_core.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.h b/drivers/net/can/usb/etas_es58x/es58x_core.h
index 5f4e7dc5be35..fcf219e727bf 100644
--- a/drivers/net/can/usb/etas_es58x/es58x_core.h
+++ b/drivers/net/can/usb/etas_es58x/es58x_core.h
@@ -625,7 +625,7 @@ static inline int es58x_get_netdev(struct es58x_device *es58x_dev,
 		return -ECHRNG;
 
 	*netdev = es58x_dev->netdev[channel_idx];
-	if (!netdev || !netif_device_present(*netdev))
+	if (!*netdev || !netif_device_present(*netdev))
 		return -ENODEV;
 
 	return 0;
-- 
cgit v1.2.3


From 1c9690dd308efd05e7f390c15bc4f26842822bf5 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 15 Apr 2021 12:30:50 +0100
Subject: can: etas_es58x: Fix a couple of spelling mistakes

There are spelling mistakes in netdev_dbg and netdev_dbg messages,
fix these.

Link: https://lore.kernel.org/r/20210415113050.1942333-1-colin.king@canonical.com
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/etas_es58x/es58x_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.c b/drivers/net/can/usb/etas_es58x/es58x_core.c
index 57e5f94468e9..8e9102482c52 100644
--- a/drivers/net/can/usb/etas_es58x/es58x_core.c
+++ b/drivers/net/can/usb/etas_es58x/es58x_core.c
@@ -688,7 +688,7 @@ int es58x_rx_err_msg(struct net_device *netdev, enum es58x_err error,
 
 	case ES58X_ERR_PROT_STUFF:
 		if (net_ratelimit())
-			netdev_dbg(netdev, "Error BITSUFF\n");
+			netdev_dbg(netdev, "Error BITSTUFF\n");
 		if (cf)
 			cf->data[2] |= CAN_ERR_PROT_STUFF;
 		break;
@@ -1015,7 +1015,7 @@ int es58x_rx_cmd_ret_u32(struct net_device *netdev,
 			int ret;
 
 			netdev_warn(netdev,
-				    "%s: channel is already opened, closing and re-openning it to reflect new configuration\n",
+				    "%s: channel is already opened, closing and re-opening it to reflect new configuration\n",
 				    ret_desc);
 			ret = ops->disable_channel(es58x_priv(netdev));
 			if (ret)
-- 
cgit v1.2.3


From 924e464f4a8a0bb9e011ed37342e7c23c1670dc2 Mon Sep 17 00:00:00 2001
From: Erik Flodin <erik@flodin.me>
Date: Tue, 20 Apr 2021 21:12:00 +0200
Subject: can: add a note that RECV_OWN_MSGS frames are subject to filtering

Some parts of the documentation may lead the reader to think that the
socket's own frames are always received when CAN_RAW_RECV_OWN_MSGS is
enabled, but all frames are subject to filtering.

As explained by Marc Kleine-Budde:

On TX complete of a CAN frame it's pushed into the RX path of the
networking stack, along with the information of the originating socket.

Then the CAN frame is delivered into AF_CAN, where it is passed on to
all registered receivers depending on filters. One receiver is the
sending socket in CAN_RAW. Then in CAN_RAW the it is checked if the
sending socket has RECV_OWN_MSGS enabled.

Link: https://lore.kernel.org/r/20210420191212.42753-1-erik@flodin.me
Signed-off-by: Erik Flodin <erik@flodin.me>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 Documentation/networking/can.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/networking/can.rst b/Documentation/networking/can.rst
index f8dae662e454..f34cb0e4460e 100644
--- a/Documentation/networking/can.rst
+++ b/Documentation/networking/can.rst
@@ -608,6 +608,8 @@ demand:
     setsockopt(s, SOL_CAN_RAW, CAN_RAW_RECV_OWN_MSGS,
                &recv_own_msgs, sizeof(recv_own_msgs));
 
+Note that reception of a socket's own CAN frames are subject to the same
+filtering as other CAN frames (see :ref:`socketcan-rawfilter`).
 
 .. _socketcan-rawfd:
 
-- 
cgit v1.2.3


From e6b031d3c37f79d135c642834bdda7233a29db8d Mon Sep 17 00:00:00 2001
From: Erik Flodin <erik@flodin.me>
Date: Sun, 25 Apr 2021 16:14:35 +0200
Subject: can: proc: fix rcvlist_* header alignment on 64-bit system

Before this fix, the function and userdata columns weren't aligned:
  device   can_id   can_mask  function  userdata   matches  ident
   vcan0  92345678  9fffffff  0000000000000000  0000000000000000         0  raw
   vcan0     123    00000123  0000000000000000  0000000000000000         0  raw

After the fix they are:
  device   can_id   can_mask      function          userdata       matches  ident
   vcan0  92345678  9fffffff  0000000000000000  0000000000000000         0  raw
   vcan0     123    00000123  0000000000000000  0000000000000000         0  raw

Link: Link: https://lore.kernel.org/r/20210425141440.229653-1-erik@flodin.me
Signed-off-by: Erik Flodin <erik@flodin.me>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/proc.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/can/proc.c b/net/can/proc.c
index b15760b5c1cc..d1fe49e6f16d 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -205,8 +205,10 @@ static void can_print_recv_banner(struct seq_file *m)
 	 *                  can1.  00000000  00000000  00000000
 	 *                 .......          0  tp20
 	 */
-	seq_puts(m, "  device   can_id   can_mask  function"
-			"  userdata   matches  ident\n");
+	if (IS_ENABLED(CONFIG_64BIT))
+		seq_puts(m, "  device   can_id   can_mask      function          userdata       matches  ident\n");
+	else
+		seq_puts(m, "  device   can_id   can_mask  function  userdata   matches  ident\n");
 }
 
 static int can_stats_proc_show(struct seq_file *m, void *v)
-- 
cgit v1.2.3


From bf7d20cd51d7b6aa969e263b33805af6e147a70e Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Fri, 23 Apr 2021 17:52:23 +0800
Subject: ch_ktls: Remove redundant variable result

Variable result is being assigned a value from a calculation
however the variable is never read, so this redundant variable
can be removed.

Cleans up the following clang-analyzer warning:

drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c:1488:2:
warning: Value stored to 'pos' is never read
[clang-analyzer-deadcode.DeadStores].

drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c:876:3:
warning: Value stored to 'pos' is never read
[clang-analyzer-deadcode.DeadStores].

drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c:36:3:
warning: Value stored to 'start' is never read
[clang-analyzer-deadcode.DeadStores].

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
index a3f5b80888e5..ef3f1e92632f 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
@@ -33,7 +33,6 @@ static int chcr_get_nfrags_to_send(struct sk_buff *skb, u32 start, u32 len)
 
 	if (unlikely(start < skb_linear_data_len)) {
 		frag_size = min(len, skb_linear_data_len - start);
-		start = 0;
 	} else {
 		start -= skb_linear_data_len;
 
@@ -873,10 +872,10 @@ static int chcr_ktls_xmit_tcb_cpls(struct chcr_ktls_info *tx_info,
 	}
 	/* update receive window */
 	if (first_wr || tx_info->prev_win != tcp_win) {
-		pos = chcr_write_cpl_set_tcb_ulp(tx_info, q, tx_info->tid, pos,
-						 TCB_RCV_WND_W,
-						 TCB_RCV_WND_V(TCB_RCV_WND_M),
-						 TCB_RCV_WND_V(tcp_win), 0);
+		chcr_write_cpl_set_tcb_ulp(tx_info, q, tx_info->tid, pos,
+					   TCB_RCV_WND_W,
+					   TCB_RCV_WND_V(TCB_RCV_WND_M),
+					   TCB_RCV_WND_V(tcp_win), 0);
 		tx_info->prev_win = tcp_win;
 		cpl++;
 	}
@@ -1485,7 +1484,6 @@ static int chcr_ktls_tx_plaintxt(struct chcr_ktls_info *tx_info,
 	wr->op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR));
 	wr->flowid_len16 = htonl(wr_mid | FW_WR_LEN16_V(len16));
 	wr->cookie = 0;
-	pos += sizeof(*wr);
 	/* ULP_TXPKT */
 	ulptx = (struct ulp_txpkt *)(wr + 1);
 	ulptx->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) |
-- 
cgit v1.2.3


From bbd6f0a948139970f4a615dff189d9a503681a39 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Fri, 23 Apr 2021 18:13:19 -0400
Subject: bnxt_en: Fix RX consumer index logic in the error path.

In bnxt_rx_pkt(), the RX buffers are expected to complete in order.
If the RX consumer index indicates an out of order buffer completion,
it means we are hitting a hardware bug and the driver will abort all
remaining RX packets and reset the RX ring.  The RX consumer index
that we pass to bnxt_discard_rx() is not correct.  We should be
passing the current index (tmp_raw_cons) instead of the old index
(raw_cons).  This bug can cause us to be at the wrong index when
trying to abort the next RX packet.  It can crash like this:

 #0 [ffff9bbcdf5c39a8] machine_kexec at ffffffff9b05e007
 #1 [ffff9bbcdf5c3a00] __crash_kexec at ffffffff9b111232
 #2 [ffff9bbcdf5c3ad0] panic at ffffffff9b07d61e
 #3 [ffff9bbcdf5c3b50] oops_end at ffffffff9b030978
 #4 [ffff9bbcdf5c3b78] no_context at ffffffff9b06aaf0
 #5 [ffff9bbcdf5c3bd8] __bad_area_nosemaphore at ffffffff9b06ae2e
 #6 [ffff9bbcdf5c3c28] bad_area_nosemaphore at ffffffff9b06af24
 #7 [ffff9bbcdf5c3c38] __do_page_fault at ffffffff9b06b67e
 #8 [ffff9bbcdf5c3cb0] do_page_fault at ffffffff9b06bb12
 #9 [ffff9bbcdf5c3ce0] page_fault at ffffffff9bc015c5
    [exception RIP: bnxt_rx_pkt+237]
    RIP: ffffffffc0259cdd  RSP: ffff9bbcdf5c3d98  RFLAGS: 00010213
    RAX: 000000005dd8097f  RBX: ffff9ba4cb11b7e0  RCX: ffffa923cf6e9000
    RDX: 0000000000000fff  RSI: 0000000000000627  RDI: 0000000000001000
    RBP: ffff9bbcdf5c3e60   R8: 0000000000420003   R9: 000000000000020d
    R10: ffffa923cf6ec138  R11: ffff9bbcdf5c3e83  R12: ffff9ba4d6f928c0
    R13: ffff9ba4cac28080  R14: ffff9ba4cb11b7f0  R15: ffff9ba4d5a30000
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018

Fixes: a1b0e4e684e9 ("bnxt_en: Improve RX consumer index validity check.")
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Reviewed-by: Andy Gospodarek <gospo@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index aeb8c61c0f87..73239d3eaca1 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1732,14 +1732,16 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 
 	cons = rxcmp->rx_cmp_opaque;
 	if (unlikely(cons != rxr->rx_next_cons)) {
-		int rc1 = bnxt_discard_rx(bp, cpr, raw_cons, rxcmp);
+		int rc1 = bnxt_discard_rx(bp, cpr, &tmp_raw_cons, rxcmp);
 
 		/* 0xffff is forced error, don't print it */
 		if (rxr->rx_next_cons != 0xffff)
 			netdev_warn(bp->dev, "RX cons %x != expected cons %x\n",
 				    cons, rxr->rx_next_cons);
 		bnxt_sched_reset(bp, rxr);
-		return rc1;
+		if (rc1)
+			return rc1;
+		goto next_rx_no_prod_no_len;
 	}
 	rx_buf = &rxr->rx_buf_ring[cons];
 	data = rx_buf->data;
-- 
cgit v1.2.3


From 64ff412ad41fe3a5bf759ff4844dc1382176485c Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Fri, 23 Apr 2021 18:12:35 -0700
Subject: hv_netvsc: Make netvsc/VF binding check both MAC and serial number

Currently the netvsc/VF binding logic only checks the PCI serial number.

The Microsoft Azure Network Adapter (MANA) supports multiple net_device
interfaces (each such interface is called a "vPort", and has its unique
MAC address) which are backed by the same VF PCI device, so the binding
logic should check both the MAC address and the PCI serial number.

The change should not break any other existing VF drivers, because
Hyper-V NIC SR-IOV implementation requires the netvsc network
interface and the VF network interface have the same MAC address.

Co-developed-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Co-developed-by: Shachar Raindel <shacharr@microsoft.com>
Signed-off-by: Shachar Raindel <shacharr@microsoft.com>
Acked-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/netvsc_drv.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 7349a70af083..f682a5572d84 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -2297,6 +2297,7 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev)
 {
 	struct device *parent = vf_netdev->dev.parent;
 	struct net_device_context *ndev_ctx;
+	struct net_device *ndev;
 	struct pci_dev *pdev;
 	u32 serial;
 
@@ -2319,8 +2320,17 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev)
 		if (!ndev_ctx->vf_alloc)
 			continue;
 
-		if (ndev_ctx->vf_serial == serial)
-			return hv_get_drvdata(ndev_ctx->device_ctx);
+		if (ndev_ctx->vf_serial != serial)
+			continue;
+
+		ndev = hv_get_drvdata(ndev_ctx->device_ctx);
+		if (ndev->addr_len != vf_netdev->addr_len ||
+		    memcmp(ndev->perm_addr, vf_netdev->perm_addr,
+			   ndev->addr_len) != 0)
+			continue;
+
+		return ndev;
+
 	}
 
 	netdev_notice(vf_netdev,
-- 
cgit v1.2.3


From 885e8c68247cc2a9f1761a3d66fd274247a0faaf Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 19 Apr 2021 18:16:49 +0200
Subject: netfilter: nat: move nf_xfrm_me_harder to where it is used

remove the export and make it static.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_nat.h |  2 --
 net/netfilter/nf_nat_core.c    | 37 -------------------------------------
 net/netfilter/nf_nat_proto.c   | 38 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 38 insertions(+), 39 deletions(-)

diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 0d412dd63707..987111ae5240 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -104,8 +104,6 @@ unsigned int
 nf_nat_inet_fn(void *priv, struct sk_buff *skb,
 	       const struct nf_hook_state *state);
 
-int nf_xfrm_me_harder(struct net *n, struct sk_buff *s, unsigned int family);
-
 static inline int nf_nat_initialized(struct nf_conn *ct,
 				     enum nf_nat_manip_type manip)
 {
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index b7c3c902290f..7de595ead06a 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -146,43 +146,6 @@ static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl)
 		return;
 	}
 }
-
-int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
-{
-	struct flowi fl;
-	unsigned int hh_len;
-	struct dst_entry *dst;
-	struct sock *sk = skb->sk;
-	int err;
-
-	err = xfrm_decode_session(skb, &fl, family);
-	if (err < 0)
-		return err;
-
-	dst = skb_dst(skb);
-	if (dst->xfrm)
-		dst = ((struct xfrm_dst *)dst)->route;
-	if (!dst_hold_safe(dst))
-		return -EHOSTUNREACH;
-
-	if (sk && !net_eq(net, sock_net(sk)))
-		sk = NULL;
-
-	dst = xfrm_lookup(net, dst, &fl, sk, 0);
-	if (IS_ERR(dst))
-		return PTR_ERR(dst);
-
-	skb_dst_drop(skb);
-	skb_dst_set(skb, dst);
-
-	/* Change in oif may mean change in hh_len. */
-	hh_len = skb_dst(skb)->dev->hard_header_len;
-	if (skb_headroom(skb) < hh_len &&
-	    pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
-		return -ENOMEM;
-	return 0;
-}
-EXPORT_SYMBOL(nf_xfrm_me_harder);
 #endif /* CONFIG_XFRM */
 
 /* We keep an extra hash for each conntrack, for fast searching. */
diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
index 4731d21fc3ad..48cc60084d28 100644
--- a/net/netfilter/nf_nat_proto.c
+++ b/net/netfilter/nf_nat_proto.c
@@ -659,6 +659,44 @@ nf_nat_ipv4_pre_routing(void *priv, struct sk_buff *skb,
 	return ret;
 }
 
+#ifdef CONFIG_XFRM
+static int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
+{
+	struct sock *sk = skb->sk;
+	struct dst_entry *dst;
+	unsigned int hh_len;
+	struct flowi fl;
+	int err;
+
+	err = xfrm_decode_session(skb, &fl, family);
+	if (err < 0)
+		return err;
+
+	dst = skb_dst(skb);
+	if (dst->xfrm)
+		dst = ((struct xfrm_dst *)dst)->route;
+	if (!dst_hold_safe(dst))
+		return -EHOSTUNREACH;
+
+	if (sk && !net_eq(net, sock_net(sk)))
+		sk = NULL;
+
+	dst = xfrm_lookup(net, dst, &fl, sk, 0);
+	if (IS_ERR(dst))
+		return PTR_ERR(dst);
+
+	skb_dst_drop(skb);
+	skb_dst_set(skb, dst);
+
+	/* Change in oif may mean change in hh_len. */
+	hh_len = skb_dst(skb)->dev->hard_header_len;
+	if (skb_headroom(skb) < hh_len &&
+	    pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
+		return -ENOMEM;
+	return 0;
+}
+#endif
+
 static unsigned int
 nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb,
 		     const struct nf_hook_state *state)
-- 
cgit v1.2.3


From e0bb96db96f8ca94349344a2ea7bebc6f8cefdae Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 21 Apr 2021 01:12:44 +0200
Subject: netfilter: nft_socket: add support for cgroupsv2

Allow to match on the cgroupsv2 id from ancestor level.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |  4 +++
 net/netfilter/nft_socket.c               | 48 +++++++++++++++++++++++++++++++-
 2 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 79bab7a36b30..467365ed59a7 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1014,11 +1014,13 @@ enum nft_rt_attributes {
  *
  * @NFTA_SOCKET_KEY: socket key to match
  * @NFTA_SOCKET_DREG: destination register
+ * @NFTA_SOCKET_LEVEL: cgroups2 ancestor level (only for cgroupsv2)
  */
 enum nft_socket_attributes {
 	NFTA_SOCKET_UNSPEC,
 	NFTA_SOCKET_KEY,
 	NFTA_SOCKET_DREG,
+	NFTA_SOCKET_LEVEL,
 	__NFTA_SOCKET_MAX
 };
 #define NFTA_SOCKET_MAX		(__NFTA_SOCKET_MAX - 1)
@@ -1029,11 +1031,13 @@ enum nft_socket_attributes {
  * @NFT_SOCKET_TRANSPARENT: Value of the IP(V6)_TRANSPARENT socket option
  * @NFT_SOCKET_MARK: Value of the socket mark
  * @NFT_SOCKET_WILDCARD: Whether the socket is zero-bound (e.g. 0.0.0.0 or ::0)
+ * @NFT_SOCKET_CGROUPV2: Match on cgroups version 2
  */
 enum nft_socket_keys {
 	NFT_SOCKET_TRANSPARENT,
 	NFT_SOCKET_MARK,
 	NFT_SOCKET_WILDCARD,
+	NFT_SOCKET_CGROUPV2,
 	__NFT_SOCKET_MAX
 };
 #define NFT_SOCKET_MAX	(__NFT_SOCKET_MAX - 1)
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index c9b8a2b03b71..9c169d100651 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -9,6 +9,7 @@
 
 struct nft_socket {
 	enum nft_socket_keys		key:8;
+	u8				level;
 	union {
 		u8			dreg;
 	};
@@ -33,6 +34,26 @@ static void nft_socket_wildcard(const struct nft_pktinfo *pkt,
 	}
 }
 
+#ifdef CONFIG_CGROUPS
+static noinline bool
+nft_sock_get_eval_cgroupv2(u32 *dest, const struct nft_pktinfo *pkt, u32 level)
+{
+	struct sock *sk = skb_to_full_sk(pkt->skb);
+	struct cgroup *cgrp;
+
+	if (!sk || !sk_fullsock(sk) || !net_eq(nft_net(pkt), sock_net(sk)))
+		return false;
+
+	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+	if (level > cgrp->level)
+		return false;
+
+	memcpy(dest, &cgrp->ancestor_ids[level], sizeof(u64));
+
+	return true;
+}
+#endif
+
 static void nft_socket_eval(const struct nft_expr *expr,
 			    struct nft_regs *regs,
 			    const struct nft_pktinfo *pkt)
@@ -85,6 +106,14 @@ static void nft_socket_eval(const struct nft_expr *expr,
 		}
 		nft_socket_wildcard(pkt, regs, sk, dest);
 		break;
+#ifdef CONFIG_CGROUPS
+	case NFT_SOCKET_CGROUPV2:
+		if (!nft_sock_get_eval_cgroupv2(dest, pkt, priv->level)) {
+			regs->verdict.code = NFT_BREAK;
+			return;
+		}
+		break;
+#endif
 	default:
 		WARN_ON(1);
 		regs->verdict.code = NFT_BREAK;
@@ -97,6 +126,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
 static const struct nla_policy nft_socket_policy[NFTA_SOCKET_MAX + 1] = {
 	[NFTA_SOCKET_KEY]		= { .type = NLA_U32 },
 	[NFTA_SOCKET_DREG]		= { .type = NLA_U32 },
+	[NFTA_SOCKET_LEVEL]		= { .type = NLA_U32 },
 };
 
 static int nft_socket_init(const struct nft_ctx *ctx,
@@ -104,7 +134,7 @@ static int nft_socket_init(const struct nft_ctx *ctx,
 			   const struct nlattr * const tb[])
 {
 	struct nft_socket *priv = nft_expr_priv(expr);
-	unsigned int len;
+	unsigned int len, level;
 
 	if (!tb[NFTA_SOCKET_DREG] || !tb[NFTA_SOCKET_KEY])
 		return -EINVAL;
@@ -129,6 +159,19 @@ static int nft_socket_init(const struct nft_ctx *ctx,
 	case NFT_SOCKET_MARK:
 		len = sizeof(u32);
 		break;
+#ifdef CONFIG_CGROUPS
+	case NFT_SOCKET_CGROUPV2:
+		if (!tb[NFTA_SOCKET_LEVEL])
+			return -EINVAL;
+
+		level = ntohl(nla_get_u32(tb[NFTA_SOCKET_LEVEL]));
+		if (level > 255)
+			return -EOPNOTSUPP;
+
+		priv->level = level;
+		len = sizeof(u64);
+		break;
+#endif
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -146,6 +189,9 @@ static int nft_socket_dump(struct sk_buff *skb,
 		return -1;
 	if (nft_dump_register(skb, NFTA_SOCKET_DREG, priv->dreg))
 		return -1;
+	if (priv->key == NFT_SOCKET_CGROUPV2 &&
+	    nla_put_u32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level)))
+		return -1;
 	return 0;
 }
 
-- 
cgit v1.2.3


From de8c12110a130337c8e7e7b8250de0580e644dee Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 21 Apr 2021 09:45:40 +0200
Subject: netfilter: disable defrag once its no longer needed

When I changed defrag hooks to no longer get registered by default I
intentionally made it so that registration can only be un-done by unloading
the nf_defrag_ipv4/6 module.

In hindsight this was too conservative; there is no reason to keep defrag
on while there is no feature dependency anymore.

Moreover, this won't work if user isn't allowed to remove nf_defrag module.

This adds the disable() functions for both ipv4 and ipv6 and calls them
from conntrack, TPROXY and the xtables socket module.

ipvs isn't converted here, it will behave as before this patch and
will need module removal.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/ipv4/nf_defrag_ipv4.h |  3 ++-
 include/net/netfilter/ipv6/nf_defrag_ipv6.h |  3 ++-
 net/ipv4/netfilter/nf_defrag_ipv4.c         | 30 +++++++++++++++++++++++------
 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c   | 29 ++++++++++++++++++++++------
 net/netfilter/nf_conntrack_proto.c          |  8 ++++++--
 net/netfilter/nft_tproxy.c                  | 24 +++++++++++++++++++++++
 net/netfilter/xt_TPROXY.c                   | 13 +++++++++++++
 net/netfilter/xt_socket.c                   | 14 ++++++++++++++
 8 files changed, 108 insertions(+), 16 deletions(-)

diff --git a/include/net/netfilter/ipv4/nf_defrag_ipv4.h b/include/net/netfilter/ipv4/nf_defrag_ipv4.h
index bcbd724cc048..7fda9ce9f694 100644
--- a/include/net/netfilter/ipv4/nf_defrag_ipv4.h
+++ b/include/net/netfilter/ipv4/nf_defrag_ipv4.h
@@ -3,6 +3,7 @@
 #define _NF_DEFRAG_IPV4_H
 
 struct net;
-int nf_defrag_ipv4_enable(struct net *);
+int nf_defrag_ipv4_enable(struct net *net);
+void nf_defrag_ipv4_disable(struct net *net);
 
 #endif /* _NF_DEFRAG_IPV4_H */
diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
index ece923e2035b..0fd8a4159662 100644
--- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
@@ -5,7 +5,8 @@
 #include <linux/skbuff.h>
 #include <linux/types.h>
 
-int nf_defrag_ipv6_enable(struct net *);
+int nf_defrag_ipv6_enable(struct net *net);
+void nf_defrag_ipv6_disable(struct net *net);
 
 int nf_ct_frag6_init(void);
 void nf_ct_frag6_cleanup(void);
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index ffdcc2b9360f..613432a36f0a 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -141,14 +141,16 @@ int nf_defrag_ipv4_enable(struct net *net)
 	struct defrag4_pernet *nf_defrag = net_generic(net, defrag4_pernet_id);
 	int err = 0;
 
-	might_sleep();
-
-	if (nf_defrag->users)
-		return 0;
-
 	mutex_lock(&defrag4_mutex);
-	if (nf_defrag->users)
+	if (nf_defrag->users == UINT_MAX) {
+		err = -EOVERFLOW;
 		goto out_unlock;
+	}
+
+	if (nf_defrag->users) {
+		nf_defrag->users++;
+		goto out_unlock;
+	}
 
 	err = nf_register_net_hooks(net, ipv4_defrag_ops,
 				    ARRAY_SIZE(ipv4_defrag_ops));
@@ -161,6 +163,22 @@ int nf_defrag_ipv4_enable(struct net *net)
 }
 EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable);
 
+void nf_defrag_ipv4_disable(struct net *net)
+{
+	struct defrag4_pernet *nf_defrag = net_generic(net, defrag4_pernet_id);
+
+	mutex_lock(&defrag4_mutex);
+	if (nf_defrag->users) {
+		nf_defrag->users--;
+		if (nf_defrag->users == 0)
+			nf_unregister_net_hooks(net, ipv4_defrag_ops,
+						ARRAY_SIZE(ipv4_defrag_ops));
+	}
+
+	mutex_unlock(&defrag4_mutex);
+}
+EXPORT_SYMBOL_GPL(nf_defrag_ipv4_disable);
+
 module_init(nf_defrag_init);
 module_exit(nf_defrag_fini);
 
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index 402dc4ca9504..e8a59d8bf2ad 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -137,14 +137,16 @@ int nf_defrag_ipv6_enable(struct net *net)
 	struct nft_ct_frag6_pernet *nf_frag = net_generic(net, nf_frag_pernet_id);
 	int err = 0;
 
-	might_sleep();
-
-	if (nf_frag->users)
-		return 0;
-
 	mutex_lock(&defrag6_mutex);
-	if (nf_frag->users)
+	if (nf_frag->users == UINT_MAX) {
+		err = -EOVERFLOW;
+		goto out_unlock;
+	}
+
+	if (nf_frag->users) {
+		nf_frag->users++;
 		goto out_unlock;
+	}
 
 	err = nf_register_net_hooks(net, ipv6_defrag_ops,
 				    ARRAY_SIZE(ipv6_defrag_ops));
@@ -157,6 +159,21 @@ int nf_defrag_ipv6_enable(struct net *net)
 }
 EXPORT_SYMBOL_GPL(nf_defrag_ipv6_enable);
 
+void nf_defrag_ipv6_disable(struct net *net)
+{
+	struct nft_ct_frag6_pernet *nf_frag = net_generic(net, nf_frag_pernet_id);
+
+	mutex_lock(&defrag6_mutex);
+	if (nf_frag->users) {
+		nf_frag->users--;
+		if (nf_frag->users == 0)
+			nf_unregister_net_hooks(net, ipv6_defrag_ops,
+						ARRAY_SIZE(ipv6_defrag_ops));
+	}
+	mutex_unlock(&defrag6_mutex);
+}
+EXPORT_SYMBOL_GPL(nf_defrag_ipv6_disable);
+
 module_init(nf_defrag_init);
 module_exit(nf_defrag_fini);
 
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 47e9319d2cf3..89e5bac384d7 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -536,15 +536,19 @@ static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
 	mutex_lock(&nf_ct_proto_mutex);
 	switch (nfproto) {
 	case NFPROTO_IPV4:
-		if (cnet->users4 && (--cnet->users4 == 0))
+		if (cnet->users4 && (--cnet->users4 == 0)) {
 			nf_unregister_net_hooks(net, ipv4_conntrack_ops,
 						ARRAY_SIZE(ipv4_conntrack_ops));
+			nf_defrag_ipv4_disable(net);
+		}
 		break;
 #if IS_ENABLED(CONFIG_IPV6)
 	case NFPROTO_IPV6:
-		if (cnet->users6 && (--cnet->users6 == 0))
+		if (cnet->users6 && (--cnet->users6 == 0)) {
 			nf_unregister_net_hooks(net, ipv6_conntrack_ops,
 						ARRAY_SIZE(ipv6_conntrack_ops));
+			nf_defrag_ipv6_disable(net);
+		}
 		break;
 #endif
 	case NFPROTO_BRIDGE:
diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
index 43a5a780a6d3..accef672088c 100644
--- a/net/netfilter/nft_tproxy.c
+++ b/net/netfilter/nft_tproxy.c
@@ -263,6 +263,29 @@ static int nft_tproxy_init(const struct nft_ctx *ctx,
 	return 0;
 }
 
+static void nft_tproxy_destroy(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr)
+{
+	const struct nft_tproxy *priv = nft_expr_priv(expr);
+
+	switch (priv->family) {
+	case NFPROTO_IPV4:
+		nf_defrag_ipv4_disable(ctx->net);
+		break;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+	case NFPROTO_IPV6:
+		nf_defrag_ipv6_disable(ctx->net);
+		break;
+#endif
+	case NFPROTO_UNSPEC:
+		nf_defrag_ipv4_disable(ctx->net);
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+		nf_defrag_ipv6_disable(ctx->net);
+#endif
+		break;
+	}
+}
+
 static int nft_tproxy_dump(struct sk_buff *skb,
 			   const struct nft_expr *expr)
 {
@@ -288,6 +311,7 @@ static const struct nft_expr_ops nft_tproxy_ops = {
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_tproxy)),
 	.eval		= nft_tproxy_eval,
 	.init		= nft_tproxy_init,
+	.destroy	= nft_tproxy_destroy,
 	.dump		= nft_tproxy_dump,
 };
 
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 194dc03341f3..459d0696c91a 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -200,6 +200,11 @@ static int tproxy_tg6_check(const struct xt_tgchk_param *par)
 	pr_info_ratelimited("Can be used only with -p tcp or -p udp\n");
 	return -EINVAL;
 }
+
+static void tproxy_tg6_destroy(const struct xt_tgdtor_param *par)
+{
+	nf_defrag_ipv6_disable(par->net);
+}
 #endif
 
 static int tproxy_tg4_check(const struct xt_tgchk_param *par)
@@ -219,6 +224,11 @@ static int tproxy_tg4_check(const struct xt_tgchk_param *par)
 	return -EINVAL;
 }
 
+static void tproxy_tg4_destroy(const struct xt_tgdtor_param *par)
+{
+	nf_defrag_ipv4_disable(par->net);
+}
+
 static struct xt_target tproxy_tg_reg[] __read_mostly = {
 	{
 		.name		= "TPROXY",
@@ -228,6 +238,7 @@ static struct xt_target tproxy_tg_reg[] __read_mostly = {
 		.revision	= 0,
 		.targetsize	= sizeof(struct xt_tproxy_target_info),
 		.checkentry	= tproxy_tg4_check,
+		.destroy	= tproxy_tg4_destroy,
 		.hooks		= 1 << NF_INET_PRE_ROUTING,
 		.me		= THIS_MODULE,
 	},
@@ -239,6 +250,7 @@ static struct xt_target tproxy_tg_reg[] __read_mostly = {
 		.revision	= 1,
 		.targetsize	= sizeof(struct xt_tproxy_target_info_v1),
 		.checkentry	= tproxy_tg4_check,
+		.destroy	= tproxy_tg4_destroy,
 		.hooks		= 1 << NF_INET_PRE_ROUTING,
 		.me		= THIS_MODULE,
 	},
@@ -251,6 +263,7 @@ static struct xt_target tproxy_tg_reg[] __read_mostly = {
 		.revision	= 1,
 		.targetsize	= sizeof(struct xt_tproxy_target_info_v1),
 		.checkentry	= tproxy_tg6_check,
+		.destroy	= tproxy_tg6_destroy,
 		.hooks		= 1 << NF_INET_PRE_ROUTING,
 		.me		= THIS_MODULE,
 	},
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 5f973987265d..5e6459e11605 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -216,6 +216,14 @@ static int socket_mt_v3_check(const struct xt_mtchk_param *par)
 	return 0;
 }
 
+static void socket_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	if (par->family == NFPROTO_IPV4)
+		nf_defrag_ipv4_disable(par->net);
+	else if (par->family == NFPROTO_IPV6)
+		nf_defrag_ipv4_disable(par->net);
+}
+
 static struct xt_match socket_mt_reg[] __read_mostly = {
 	{
 		.name		= "socket",
@@ -231,6 +239,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
 		.revision	= 1,
 		.family		= NFPROTO_IPV4,
 		.match		= socket_mt4_v1_v2_v3,
+		.destroy	= socket_mt_destroy,
 		.checkentry	= socket_mt_v1_check,
 		.matchsize	= sizeof(struct xt_socket_mtinfo1),
 		.hooks		= (1 << NF_INET_PRE_ROUTING) |
@@ -245,6 +254,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
 		.match		= socket_mt6_v1_v2_v3,
 		.checkentry	= socket_mt_v1_check,
 		.matchsize	= sizeof(struct xt_socket_mtinfo1),
+		.destroy	= socket_mt_destroy,
 		.hooks		= (1 << NF_INET_PRE_ROUTING) |
 				  (1 << NF_INET_LOCAL_IN),
 		.me		= THIS_MODULE,
@@ -256,6 +266,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
 		.family		= NFPROTO_IPV4,
 		.match		= socket_mt4_v1_v2_v3,
 		.checkentry	= socket_mt_v2_check,
+		.destroy	= socket_mt_destroy,
 		.matchsize	= sizeof(struct xt_socket_mtinfo1),
 		.hooks		= (1 << NF_INET_PRE_ROUTING) |
 				  (1 << NF_INET_LOCAL_IN),
@@ -268,6 +279,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
 		.family		= NFPROTO_IPV6,
 		.match		= socket_mt6_v1_v2_v3,
 		.checkentry	= socket_mt_v2_check,
+		.destroy	= socket_mt_destroy,
 		.matchsize	= sizeof(struct xt_socket_mtinfo1),
 		.hooks		= (1 << NF_INET_PRE_ROUTING) |
 				  (1 << NF_INET_LOCAL_IN),
@@ -280,6 +292,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
 		.family		= NFPROTO_IPV4,
 		.match		= socket_mt4_v1_v2_v3,
 		.checkentry	= socket_mt_v3_check,
+		.destroy	= socket_mt_destroy,
 		.matchsize	= sizeof(struct xt_socket_mtinfo1),
 		.hooks		= (1 << NF_INET_PRE_ROUTING) |
 				  (1 << NF_INET_LOCAL_IN),
@@ -292,6 +305,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
 		.family		= NFPROTO_IPV6,
 		.match		= socket_mt6_v1_v2_v3,
 		.checkentry	= socket_mt_v3_check,
+		.destroy	= socket_mt_destroy,
 		.matchsize	= sizeof(struct xt_socket_mtinfo1),
 		.hooks		= (1 << NF_INET_PRE_ROUTING) |
 				  (1 << NF_INET_LOCAL_IN),
-- 
cgit v1.2.3


From 4c95e0728eee33df6b029a5fca82a67daeca201e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 21 Apr 2021 09:50:59 +0200
Subject: netfilter: ebtables: remove the 3 ebtables pointers from struct net

ebtables stores the table internal data (what gets passed to the
ebt_do_table() interpreter) in struct net.

nftables keeps the internal interpreter format in pernet lists
and passes it via the netfilter core infrastructure (priv pointer).

Do the same for ebtables: the nf_hook_ops are duplicated via kmemdup,
then the ops->priv pointer is set to the table that is being registered.

After that, the netfilter core passes this table info to the hookfn.

This allows to remove the pointers from struct net.

Same pattern can be applied to ip/ip6/arptables.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_bridge/ebtables.h |  9 +++----
 include/net/netns/x_tables.h              |  8 ------
 net/bridge/netfilter/ebtable_broute.c     | 10 +++-----
 net/bridge/netfilter/ebtable_filter.c     | 26 +++++++------------
 net/bridge/netfilter/ebtable_nat.c        | 27 +++++++-------------
 net/bridge/netfilter/ebtables.c           | 42 +++++++++++++++++++++++--------
 6 files changed, 58 insertions(+), 64 deletions(-)

diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index 3a956145a25c..a8178253ce53 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -100,6 +100,7 @@ struct ebt_table {
 	   unsigned int valid_hooks);
 	/* the data used by the kernel */
 	struct ebt_table_info *private;
+	struct nf_hook_ops *ops;
 	struct module *me;
 };
 
@@ -108,11 +109,9 @@ struct ebt_table {
 
 extern int ebt_register_table(struct net *net,
 			      const struct ebt_table *table,
-			      const struct nf_hook_ops *ops,
-			      struct ebt_table **res);
-extern void ebt_unregister_table(struct net *net, struct ebt_table *table);
-void ebt_unregister_table_pre_exit(struct net *net, const char *tablename,
-				   const struct nf_hook_ops *ops);
+			      const struct nf_hook_ops *ops);
+extern void ebt_unregister_table(struct net *net, const char *tablename);
+void ebt_unregister_table_pre_exit(struct net *net, const char *tablename);
 extern unsigned int ebt_do_table(struct sk_buff *skb,
 				 const struct nf_hook_state *state,
 				 struct ebt_table *table);
diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h
index 83c8ea2e87a6..d02316ec2906 100644
--- a/include/net/netns/x_tables.h
+++ b/include/net/netns/x_tables.h
@@ -5,16 +5,8 @@
 #include <linux/list.h>
 #include <linux/netfilter_defs.h>
 
-struct ebt_table;
-
 struct netns_xt {
 	bool notrack_deprecated_warning;
 	bool clusterip_deprecated_warning;
-#if defined(CONFIG_BRIDGE_NF_EBTABLES) || \
-    defined(CONFIG_BRIDGE_NF_EBTABLES_MODULE)
-	struct ebt_table *broute_table;
-	struct ebt_table *frame_filter;
-	struct ebt_table *frame_nat;
-#endif
 };
 #endif
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 32bc2821027f..020b1487ee0c 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -66,8 +66,7 @@ static unsigned int ebt_broute(void *priv, struct sk_buff *skb,
 			   NFPROTO_BRIDGE, s->in, NULL, NULL,
 			   s->net, NULL);
 
-	ret = ebt_do_table(skb, &state, state.net->xt.broute_table);
-
+	ret = ebt_do_table(skb, &state, priv);
 	if (ret != NF_DROP)
 		return ret;
 
@@ -101,18 +100,17 @@ static const struct nf_hook_ops ebt_ops_broute = {
 
 static int __net_init broute_net_init(struct net *net)
 {
-	return ebt_register_table(net, &broute_table, &ebt_ops_broute,
-				  &net->xt.broute_table);
+	return ebt_register_table(net, &broute_table, &ebt_ops_broute);
 }
 
 static void __net_exit broute_net_pre_exit(struct net *net)
 {
-	ebt_unregister_table_pre_exit(net, "broute", &ebt_ops_broute);
+	ebt_unregister_table_pre_exit(net, "broute");
 }
 
 static void __net_exit broute_net_exit(struct net *net)
 {
-	ebt_unregister_table(net, net->xt.broute_table);
+	ebt_unregister_table(net, "broute");
 }
 
 static struct pernet_operations broute_net_ops = {
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index bcf982e12f16..8ec0b3736803 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -59,34 +59,27 @@ static const struct ebt_table frame_filter = {
 };
 
 static unsigned int
-ebt_in_hook(void *priv, struct sk_buff *skb,
-	    const struct nf_hook_state *state)
+ebt_filter_hook(void *priv, struct sk_buff *skb,
+		const struct nf_hook_state *state)
 {
-	return ebt_do_table(skb, state, state->net->xt.frame_filter);
-}
-
-static unsigned int
-ebt_out_hook(void *priv, struct sk_buff *skb,
-	     const struct nf_hook_state *state)
-{
-	return ebt_do_table(skb, state, state->net->xt.frame_filter);
+	return ebt_do_table(skb, state, priv);
 }
 
 static const struct nf_hook_ops ebt_ops_filter[] = {
 	{
-		.hook		= ebt_in_hook,
+		.hook		= ebt_filter_hook,
 		.pf		= NFPROTO_BRIDGE,
 		.hooknum	= NF_BR_LOCAL_IN,
 		.priority	= NF_BR_PRI_FILTER_BRIDGED,
 	},
 	{
-		.hook		= ebt_in_hook,
+		.hook		= ebt_filter_hook,
 		.pf		= NFPROTO_BRIDGE,
 		.hooknum	= NF_BR_FORWARD,
 		.priority	= NF_BR_PRI_FILTER_BRIDGED,
 	},
 	{
-		.hook		= ebt_out_hook,
+		.hook		= ebt_filter_hook,
 		.pf		= NFPROTO_BRIDGE,
 		.hooknum	= NF_BR_LOCAL_OUT,
 		.priority	= NF_BR_PRI_FILTER_OTHER,
@@ -95,18 +88,17 @@ static const struct nf_hook_ops ebt_ops_filter[] = {
 
 static int __net_init frame_filter_net_init(struct net *net)
 {
-	return ebt_register_table(net, &frame_filter, ebt_ops_filter,
-				  &net->xt.frame_filter);
+	return ebt_register_table(net, &frame_filter, ebt_ops_filter);
 }
 
 static void __net_exit frame_filter_net_pre_exit(struct net *net)
 {
-	ebt_unregister_table_pre_exit(net, "filter", ebt_ops_filter);
+	ebt_unregister_table_pre_exit(net, "filter");
 }
 
 static void __net_exit frame_filter_net_exit(struct net *net)
 {
-	ebt_unregister_table(net, net->xt.frame_filter);
+	ebt_unregister_table(net, "filter");
 }
 
 static struct pernet_operations frame_filter_net_ops = {
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 0d092773f816..7c8a1064a531 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -58,35 +58,27 @@ static const struct ebt_table frame_nat = {
 	.me		= THIS_MODULE,
 };
 
-static unsigned int
-ebt_nat_in(void *priv, struct sk_buff *skb,
-	   const struct nf_hook_state *state)
+static unsigned int ebt_nat_hook(void *priv, struct sk_buff *skb,
+				 const struct nf_hook_state *state)
 {
-	return ebt_do_table(skb, state, state->net->xt.frame_nat);
-}
-
-static unsigned int
-ebt_nat_out(void *priv, struct sk_buff *skb,
-	    const struct nf_hook_state *state)
-{
-	return ebt_do_table(skb, state, state->net->xt.frame_nat);
+	return ebt_do_table(skb, state, priv);
 }
 
 static const struct nf_hook_ops ebt_ops_nat[] = {
 	{
-		.hook		= ebt_nat_out,
+		.hook		= ebt_nat_hook,
 		.pf		= NFPROTO_BRIDGE,
 		.hooknum	= NF_BR_LOCAL_OUT,
 		.priority	= NF_BR_PRI_NAT_DST_OTHER,
 	},
 	{
-		.hook		= ebt_nat_out,
+		.hook		= ebt_nat_hook,
 		.pf		= NFPROTO_BRIDGE,
 		.hooknum	= NF_BR_POST_ROUTING,
 		.priority	= NF_BR_PRI_NAT_SRC,
 	},
 	{
-		.hook		= ebt_nat_in,
+		.hook		= ebt_nat_hook,
 		.pf		= NFPROTO_BRIDGE,
 		.hooknum	= NF_BR_PRE_ROUTING,
 		.priority	= NF_BR_PRI_NAT_DST_BRIDGED,
@@ -95,18 +87,17 @@ static const struct nf_hook_ops ebt_ops_nat[] = {
 
 static int __net_init frame_nat_net_init(struct net *net)
 {
-	return ebt_register_table(net, &frame_nat, ebt_ops_nat,
-				  &net->xt.frame_nat);
+	return ebt_register_table(net, &frame_nat, ebt_ops_nat);
 }
 
 static void __net_exit frame_nat_net_pre_exit(struct net *net)
 {
-	ebt_unregister_table_pre_exit(net, "nat", ebt_ops_nat);
+	ebt_unregister_table_pre_exit(net, "nat");
 }
 
 static void __net_exit frame_nat_net_exit(struct net *net)
 {
-	ebt_unregister_table(net, net->xt.frame_nat);
+	ebt_unregister_table(net, "nat");
 }
 
 static struct pernet_operations frame_nat_net_ops = {
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 96d789c8d1c7..a04596bb2a6e 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1136,15 +1136,18 @@ static void __ebt_unregister_table(struct net *net, struct ebt_table *table)
 	vfree(table->private->entries);
 	ebt_free_table_info(table->private);
 	vfree(table->private);
+	kfree(table->ops);
 	kfree(table);
 }
 
 int ebt_register_table(struct net *net, const struct ebt_table *input_table,
-		       const struct nf_hook_ops *ops, struct ebt_table **res)
+		       const struct nf_hook_ops *template_ops)
 {
 	struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id);
 	struct ebt_table_info *newinfo;
 	struct ebt_table *t, *table;
+	struct nf_hook_ops *ops;
+	unsigned int num_ops;
 	struct ebt_replace_kernel *repl;
 	int ret, i, countersize;
 	void *p;
@@ -1213,15 +1216,31 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table,
 		ret = -ENOENT;
 		goto free_unlock;
 	}
+
+	num_ops = hweight32(table->valid_hooks);
+	if (num_ops == 0) {
+		ret = -EINVAL;
+		goto free_unlock;
+	}
+
+	ops = kmemdup(template_ops, sizeof(*ops) * num_ops, GFP_KERNEL);
+	if (!ops) {
+		ret = -ENOMEM;
+		if (newinfo->nentries)
+			module_put(table->me);
+		goto free_unlock;
+	}
+
+	for (i = 0; i < num_ops; i++)
+		ops[i].priv = table;
+
 	list_add(&table->list, &ebt_net->tables);
 	mutex_unlock(&ebt_mutex);
 
-	WRITE_ONCE(*res, table);
-	ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
-	if (ret) {
+	table->ops = ops;
+	ret = nf_register_net_hooks(net, ops, num_ops);
+	if (ret)
 		__ebt_unregister_table(net, table);
-		*res = NULL;
-	}
 
 	audit_log_nfcfg(repl->name, AF_BRIDGE, repl->nentries,
 			AUDIT_XT_OP_REGISTER, GFP_KERNEL);
@@ -1257,18 +1276,21 @@ static struct ebt_table *__ebt_find_table(struct net *net, const char *name)
 	return NULL;
 }
 
-void ebt_unregister_table_pre_exit(struct net *net, const char *name, const struct nf_hook_ops *ops)
+void ebt_unregister_table_pre_exit(struct net *net, const char *name)
 {
 	struct ebt_table *table = __ebt_find_table(net, name);
 
 	if (table)
-		nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+		nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks));
 }
 EXPORT_SYMBOL(ebt_unregister_table_pre_exit);
 
-void ebt_unregister_table(struct net *net, struct ebt_table *table)
+void ebt_unregister_table(struct net *net, const char *name)
 {
-	__ebt_unregister_table(net, table);
+	struct ebt_table *table = __ebt_find_table(net, name);
+
+	if (table)
+		__ebt_unregister_table(net, table);
 }
 
 /* userspace just supplied us with counters */
-- 
cgit v1.2.3


From 7716bf090e97aec45e97907ec6a382e4610bdd8f Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 21 Apr 2021 09:51:00 +0200
Subject: netfilter: x_tables: remove ipt_unregister_table

Its the same function as ipt_unregister_table_exit.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ipv4/ip_tables.h  | 3 ---
 include/linux/netfilter_ipv6/ip6_tables.h | 2 --
 net/ipv4/netfilter/ip_tables.c            | 9 ---------
 net/ipv4/netfilter/iptable_nat.c          | 2 +-
 net/ipv6/netfilter/ip6_tables.c           | 9 ---------
 net/ipv6/netfilter/ip6table_nat.c         | 2 +-
 6 files changed, 2 insertions(+), 25 deletions(-)

diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index c4676d6feeff..9f440eb6cf6c 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -31,9 +31,6 @@ void ipt_unregister_table_pre_exit(struct net *net, struct xt_table *table,
 
 void ipt_unregister_table_exit(struct net *net, struct xt_table *table);
 
-void ipt_unregister_table(struct net *net, struct xt_table *table,
-			  const struct nf_hook_ops *ops);
-
 /* Standard entry. */
 struct ipt_standard {
 	struct ipt_entry entry;
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 1547d5f9ae06..b88a27ce61b0 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -27,8 +27,6 @@ extern void *ip6t_alloc_initial_table(const struct xt_table *);
 int ip6t_register_table(struct net *net, const struct xt_table *table,
 			const struct ip6t_replace *repl,
 			const struct nf_hook_ops *ops, struct xt_table **res);
-void ip6t_unregister_table(struct net *net, struct xt_table *table,
-			   const struct nf_hook_ops *ops);
 void ip6t_unregister_table_pre_exit(struct net *net, struct xt_table *table,
 				    const struct nf_hook_ops *ops);
 void ip6t_unregister_table_exit(struct net *net, struct xt_table *table);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index f77ea0dbe656..2fa7f28b88e3 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1770,14 +1770,6 @@ void ipt_unregister_table_exit(struct net *net, struct xt_table *table)
 	__ipt_unregister_table(net, table);
 }
 
-void ipt_unregister_table(struct net *net, struct xt_table *table,
-			  const struct nf_hook_ops *ops)
-{
-	if (ops)
-		ipt_unregister_table_pre_exit(net, table, ops);
-	__ipt_unregister_table(net, table);
-}
-
 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
 static inline bool
 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
@@ -1924,7 +1916,6 @@ static void __exit ip_tables_fini(void)
 }
 
 EXPORT_SYMBOL(ipt_register_table);
-EXPORT_SYMBOL(ipt_unregister_table);
 EXPORT_SYMBOL(ipt_unregister_table_pre_exit);
 EXPORT_SYMBOL(ipt_unregister_table_exit);
 EXPORT_SYMBOL(ipt_do_table);
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index b0143b109f25..a89c1b9f94c2 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -105,7 +105,7 @@ static int __net_init iptable_nat_table_init(struct net *net)
 
 	ret = ipt_nat_register_lookups(net);
 	if (ret < 0) {
-		ipt_unregister_table(net, net->ipv4.nat_table, NULL);
+		ipt_unregister_table_exit(net, net->ipv4.nat_table);
 		net->ipv4.nat_table = NULL;
 	}
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index eb2b5404806c..e605c28cfed5 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1780,14 +1780,6 @@ void ip6t_unregister_table_exit(struct net *net, struct xt_table *table)
 	__ip6t_unregister_table(net, table);
 }
 
-void ip6t_unregister_table(struct net *net, struct xt_table *table,
-			   const struct nf_hook_ops *ops)
-{
-	if (ops)
-		ip6t_unregister_table_pre_exit(net, table, ops);
-	__ip6t_unregister_table(net, table);
-}
-
 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
 static inline bool
 icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
@@ -1935,7 +1927,6 @@ static void __exit ip6_tables_fini(void)
 }
 
 EXPORT_SYMBOL(ip6t_register_table);
-EXPORT_SYMBOL(ip6t_unregister_table);
 EXPORT_SYMBOL(ip6t_unregister_table_pre_exit);
 EXPORT_SYMBOL(ip6t_unregister_table_exit);
 EXPORT_SYMBOL(ip6t_do_table);
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 0a23265e3caa..4cef1b405074 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -107,7 +107,7 @@ static int __net_init ip6table_nat_table_init(struct net *net)
 
 	ret = ip6t_nat_register_lookups(net);
 	if (ret < 0) {
-		ip6t_unregister_table(net, net->ipv6.ip6table_nat, NULL);
+		ip6t_unregister_table_exit(net, net->ipv6.ip6table_nat);
 		net->ipv6.ip6table_nat = NULL;
 	}
 	kfree(repl);
-- 
cgit v1.2.3


From 1ef4d6d1af2d0c0c7c9b391365a3894bea291e34 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 21 Apr 2021 09:51:01 +0200
Subject: netfilter: x_tables: add xt_find_table

This will be used to obtain the xt_table struct given address family and
table name.

Followup patches will reduce the number of direct accesses to the xt_table
structures via net->ipv{4,6}.ip(6)table_{nat,mangle,...} pointers, then
remove them.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h |  1 +
 net/netfilter/x_tables.c           | 17 +++++++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 8ec48466410a..b2eec7de5280 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -322,6 +322,7 @@ struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision);
 int xt_find_revision(u8 af, const char *name, u8 revision, int target,
 		     int *err);
 
+struct xt_table *xt_find_table(struct net *net, u8 af, const char *name);
 struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
 				    const char *name);
 struct xt_table *xt_request_find_table_lock(struct net *net, u_int8_t af,
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index b7f8d2ed3cc2..1caba9507228 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1199,6 +1199,23 @@ void xt_free_table_info(struct xt_table_info *info)
 }
 EXPORT_SYMBOL(xt_free_table_info);
 
+struct xt_table *xt_find_table(struct net *net, u8 af, const char *name)
+{
+	struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
+	struct xt_table *t;
+
+	mutex_lock(&xt[af].mutex);
+	list_for_each_entry(t, &xt_net->tables[af], list) {
+		if (strcmp(t->name, name) == 0) {
+			mutex_unlock(&xt[af].mutex);
+			return t;
+		}
+	}
+	mutex_unlock(&xt[af].mutex);
+	return NULL;
+}
+EXPORT_SYMBOL(xt_find_table);
+
 /* Find table by name, grabs mutex & ref.  Returns ERR_PTR on error. */
 struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
 				    const char *name)
-- 
cgit v1.2.3


From 20a9df33594fe643f9cf46375a9243e3ab8ed3a6 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 21 Apr 2021 09:51:02 +0200
Subject: netfilter: iptables: unregister the tables by name

xtables stores the xt_table structs in the struct net.  This isn't
needed anymore, the structures could be passed via the netfilter hook
'private' pointer to the hook functions, which would allow us to remove
those pointers from struct net.

As a first step, reduce the number of accesses to the
net->ipv4.ip6table_{raw,filter,...} pointers.
This allows the tables to get unregistered by name instead of having to
pass the raw address.

The xt_table structure cane looked up by name+address family instead.

This patch is useless as-is (the backends still have the raw pointer
address), but it lowers the bar to remove those.

It also allows to put the 'was table registered in the first place' check
into ip_tables.c rather than have it in each table sub module.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ipv4/ip_tables.h |  6 +++---
 net/ipv4/netfilter/ip_tables.c           | 14 ++++++++++----
 net/ipv4/netfilter/iptable_filter.c      |  8 ++------
 net/ipv4/netfilter/iptable_mangle.c      |  8 ++------
 net/ipv4/netfilter/iptable_nat.c         |  6 ++----
 net/ipv4/netfilter/iptable_raw.c         |  8 ++------
 net/ipv4/netfilter/iptable_security.c    |  8 ++------
 7 files changed, 23 insertions(+), 35 deletions(-)

diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index 9f440eb6cf6c..73bcf7f261d2 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -26,10 +26,10 @@ int ipt_register_table(struct net *net, const struct xt_table *table,
 		       const struct ipt_replace *repl,
 		       const struct nf_hook_ops *ops, struct xt_table **res);
 
-void ipt_unregister_table_pre_exit(struct net *net, struct xt_table *table,
-		       const struct nf_hook_ops *ops);
+void ipt_unregister_table_pre_exit(struct net *net, const char *name,
+				   const struct nf_hook_ops *ops);
 
-void ipt_unregister_table_exit(struct net *net, struct xt_table *table);
+void ipt_unregister_table_exit(struct net *net, const char *name);
 
 /* Standard entry. */
 struct ipt_standard {
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 2fa7f28b88e3..0b859ec2d3f8 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1759,15 +1759,21 @@ out_free:
 	return ret;
 }
 
-void ipt_unregister_table_pre_exit(struct net *net, struct xt_table *table,
+void ipt_unregister_table_pre_exit(struct net *net, const char *name,
 				   const struct nf_hook_ops *ops)
 {
-	nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+	struct xt_table *table = xt_find_table(net, NFPROTO_IPV4, name);
+
+	if (table)
+		nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
 }
 
-void ipt_unregister_table_exit(struct net *net, struct xt_table *table)
+void ipt_unregister_table_exit(struct net *net, const char *name)
 {
-	__ipt_unregister_table(net, table);
+	struct xt_table *table = xt_find_table(net, NFPROTO_IPV4, name);
+
+	if (table)
+		__ipt_unregister_table(net, table);
 }
 
 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 8f7bc1ee7453..a39998c7977f 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -74,16 +74,12 @@ static int __net_init iptable_filter_net_init(struct net *net)
 
 static void __net_exit iptable_filter_net_pre_exit(struct net *net)
 {
-	if (net->ipv4.iptable_filter)
-		ipt_unregister_table_pre_exit(net, net->ipv4.iptable_filter,
-					      filter_ops);
+	ipt_unregister_table_pre_exit(net, "filter", filter_ops);
 }
 
 static void __net_exit iptable_filter_net_exit(struct net *net)
 {
-	if (!net->ipv4.iptable_filter)
-		return;
-	ipt_unregister_table_exit(net, net->ipv4.iptable_filter);
+	ipt_unregister_table_exit(net, "filter");
 	net->ipv4.iptable_filter = NULL;
 }
 
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 833079589273..7d1713e22553 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -102,16 +102,12 @@ static int __net_init iptable_mangle_table_init(struct net *net)
 
 static void __net_exit iptable_mangle_net_pre_exit(struct net *net)
 {
-	if (net->ipv4.iptable_mangle)
-		ipt_unregister_table_pre_exit(net, net->ipv4.iptable_mangle,
-					      mangle_ops);
+	ipt_unregister_table_pre_exit(net, "mangle", mangle_ops);
 }
 
 static void __net_exit iptable_mangle_net_exit(struct net *net)
 {
-	if (!net->ipv4.iptable_mangle)
-		return;
-	ipt_unregister_table_exit(net, net->ipv4.iptable_mangle);
+	ipt_unregister_table_exit(net, "mangle");
 	net->ipv4.iptable_mangle = NULL;
 }
 
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index a89c1b9f94c2..16bf3009642e 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -105,7 +105,7 @@ static int __net_init iptable_nat_table_init(struct net *net)
 
 	ret = ipt_nat_register_lookups(net);
 	if (ret < 0) {
-		ipt_unregister_table_exit(net, net->ipv4.nat_table);
+		ipt_unregister_table_exit(net, "nat");
 		net->ipv4.nat_table = NULL;
 	}
 
@@ -121,9 +121,7 @@ static void __net_exit iptable_nat_net_pre_exit(struct net *net)
 
 static void __net_exit iptable_nat_net_exit(struct net *net)
 {
-	if (!net->ipv4.nat_table)
-		return;
-	ipt_unregister_table_exit(net, net->ipv4.nat_table);
+	ipt_unregister_table_exit(net, "nat");
 	net->ipv4.nat_table = NULL;
 }
 
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 9abfe6bf2cb9..a1f556464b93 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -69,16 +69,12 @@ static int __net_init iptable_raw_table_init(struct net *net)
 
 static void __net_exit iptable_raw_net_pre_exit(struct net *net)
 {
-	if (net->ipv4.iptable_raw)
-		ipt_unregister_table_pre_exit(net, net->ipv4.iptable_raw,
-					      rawtable_ops);
+	ipt_unregister_table_pre_exit(net, "raw", rawtable_ops);
 }
 
 static void __net_exit iptable_raw_net_exit(struct net *net)
 {
-	if (!net->ipv4.iptable_raw)
-		return;
-	ipt_unregister_table_exit(net, net->ipv4.iptable_raw);
+	ipt_unregister_table_exit(net, "raw");
 	net->ipv4.iptable_raw = NULL;
 }
 
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index 415c1975d770..33eded4f9080 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -64,16 +64,12 @@ static int __net_init iptable_security_table_init(struct net *net)
 
 static void __net_exit iptable_security_net_pre_exit(struct net *net)
 {
-	if (net->ipv4.iptable_security)
-		ipt_unregister_table_pre_exit(net, net->ipv4.iptable_security,
-					      sectbl_ops);
+	ipt_unregister_table_pre_exit(net, "security", sectbl_ops);
 }
 
 static void __net_exit iptable_security_net_exit(struct net *net)
 {
-	if (!net->ipv4.iptable_security)
-		return;
-	ipt_unregister_table_exit(net, net->ipv4.iptable_security);
+	ipt_unregister_table_exit(net, "security");
 	net->ipv4.iptable_security = NULL;
 }
 
-- 
cgit v1.2.3


From 6c0717545f2ca61c95f5f739da845e77cc8bd498 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 21 Apr 2021 09:51:03 +0200
Subject: netfilter: ip6tables: unregister the tables by name

Same as the previous patch, but for ip6tables.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ipv6/ip6_tables.h |  4 ++--
 net/ipv6/netfilter/ip6_tables.c           | 14 ++++++++++----
 net/ipv6/netfilter/ip6table_filter.c      |  9 +++------
 net/ipv6/netfilter/ip6table_mangle.c      |  9 ++-------
 net/ipv6/netfilter/ip6table_nat.c         |  6 ++----
 net/ipv6/netfilter/ip6table_raw.c         |  9 +++------
 net/ipv6/netfilter/ip6table_security.c    |  8 ++------
 7 files changed, 24 insertions(+), 35 deletions(-)

diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index b88a27ce61b0..8c07426e18a8 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -27,9 +27,9 @@ extern void *ip6t_alloc_initial_table(const struct xt_table *);
 int ip6t_register_table(struct net *net, const struct xt_table *table,
 			const struct ip6t_replace *repl,
 			const struct nf_hook_ops *ops, struct xt_table **res);
-void ip6t_unregister_table_pre_exit(struct net *net, struct xt_table *table,
+void ip6t_unregister_table_pre_exit(struct net *net, const char *name,
 				    const struct nf_hook_ops *ops);
-void ip6t_unregister_table_exit(struct net *net, struct xt_table *table);
+void ip6t_unregister_table_exit(struct net *net, const char *name);
 extern unsigned int ip6t_do_table(struct sk_buff *skb,
 				  const struct nf_hook_state *state,
 				  struct xt_table *table);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index e605c28cfed5..11c80da12ee3 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1769,15 +1769,21 @@ out_free:
 	return ret;
 }
 
-void ip6t_unregister_table_pre_exit(struct net *net, struct xt_table *table,
+void ip6t_unregister_table_pre_exit(struct net *net, const char *name,
 				    const struct nf_hook_ops *ops)
 {
-	nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+	struct xt_table *table = xt_find_table(net, NFPROTO_IPV6, name);
+
+	if (table)
+		nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
 }
 
-void ip6t_unregister_table_exit(struct net *net, struct xt_table *table)
+void ip6t_unregister_table_exit(struct net *net, const char *name)
 {
-	__ip6t_unregister_table(net, table);
+	struct xt_table *table = xt_find_table(net, NFPROTO_IPV6, name);
+
+	if (table)
+		__ip6t_unregister_table(net, table);
 }
 
 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 88337b51ffbf..0c9f75e23ca0 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -75,16 +75,13 @@ static int __net_init ip6table_filter_net_init(struct net *net)
 
 static void __net_exit ip6table_filter_net_pre_exit(struct net *net)
 {
-	if (net->ipv6.ip6table_filter)
-		ip6t_unregister_table_pre_exit(net, net->ipv6.ip6table_filter,
-					       filter_ops);
+	ip6t_unregister_table_pre_exit(net, "filter",
+				       filter_ops);
 }
 
 static void __net_exit ip6table_filter_net_exit(struct net *net)
 {
-	if (!net->ipv6.ip6table_filter)
-		return;
-	ip6t_unregister_table_exit(net, net->ipv6.ip6table_filter);
+	ip6t_unregister_table_exit(net, "filter");
 	net->ipv6.ip6table_filter = NULL;
 }
 
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index cee74803d7a1..9a2266662508 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -95,17 +95,12 @@ static int __net_init ip6table_mangle_table_init(struct net *net)
 
 static void __net_exit ip6table_mangle_net_pre_exit(struct net *net)
 {
-	if (net->ipv6.ip6table_mangle)
-		ip6t_unregister_table_pre_exit(net, net->ipv6.ip6table_mangle,
-					       mangle_ops);
+	ip6t_unregister_table_pre_exit(net, "mangle", mangle_ops);
 }
 
 static void __net_exit ip6table_mangle_net_exit(struct net *net)
 {
-	if (!net->ipv6.ip6table_mangle)
-		return;
-
-	ip6t_unregister_table_exit(net, net->ipv6.ip6table_mangle);
+	ip6t_unregister_table_exit(net, "mangle");
 	net->ipv6.ip6table_mangle = NULL;
 }
 
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 4cef1b405074..7eb61e6b1e52 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -107,7 +107,7 @@ static int __net_init ip6table_nat_table_init(struct net *net)
 
 	ret = ip6t_nat_register_lookups(net);
 	if (ret < 0) {
-		ip6t_unregister_table_exit(net, net->ipv6.ip6table_nat);
+		ip6t_unregister_table_exit(net, "nat");
 		net->ipv6.ip6table_nat = NULL;
 	}
 	kfree(repl);
@@ -122,9 +122,7 @@ static void __net_exit ip6table_nat_net_pre_exit(struct net *net)
 
 static void __net_exit ip6table_nat_net_exit(struct net *net)
 {
-	if (!net->ipv6.ip6table_nat)
-		return;
-	ip6t_unregister_table_exit(net, net->ipv6.ip6table_nat);
+	ip6t_unregister_table_exit(net, "nat");
 	net->ipv6.ip6table_nat = NULL;
 }
 
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 8f9e742226f7..c9a4aada40ba 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -68,16 +68,13 @@ static int __net_init ip6table_raw_table_init(struct net *net)
 
 static void __net_exit ip6table_raw_net_pre_exit(struct net *net)
 {
-	if (net->ipv6.ip6table_raw)
-		ip6t_unregister_table_pre_exit(net, net->ipv6.ip6table_raw,
-					       rawtable_ops);
+	ip6t_unregister_table_pre_exit(net, "raw",
+				       rawtable_ops);
 }
 
 static void __net_exit ip6table_raw_net_exit(struct net *net)
 {
-	if (!net->ipv6.ip6table_raw)
-		return;
-	ip6t_unregister_table_exit(net, net->ipv6.ip6table_raw);
+	ip6t_unregister_table_exit(net, "raw");
 	net->ipv6.ip6table_raw = NULL;
 }
 
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 5e8c48fed032..73067e08662f 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -63,16 +63,12 @@ static int __net_init ip6table_security_table_init(struct net *net)
 
 static void __net_exit ip6table_security_net_pre_exit(struct net *net)
 {
-	if (net->ipv6.ip6table_security)
-		ip6t_unregister_table_pre_exit(net, net->ipv6.ip6table_security,
-					       sectbl_ops);
+	ip6t_unregister_table_pre_exit(net, "security", sectbl_ops);
 }
 
 static void __net_exit ip6table_security_net_exit(struct net *net)
 {
-	if (!net->ipv6.ip6table_security)
-		return;
-	ip6t_unregister_table_exit(net, net->ipv6.ip6table_security);
+	ip6t_unregister_table_exit(net, "security");
 	net->ipv6.ip6table_security = NULL;
 }
 
-- 
cgit v1.2.3


From 4d705399191c3cfe1264588b3a4a8115e6c3b161 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 21 Apr 2021 09:51:04 +0200
Subject: netfilter: arptables: unregister the tables by name

and again, this time for arptables.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_arp/arp_tables.h |  4 ++--
 net/ipv4/netfilter/arp_tables.c          | 14 ++++++++++----
 net/ipv4/netfilter/arptable_filter.c     |  8 ++------
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index 26a13294318c..9ec73dcc8fd6 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -52,8 +52,8 @@ extern void *arpt_alloc_initial_table(const struct xt_table *);
 int arpt_register_table(struct net *net, const struct xt_table *table,
 			const struct arpt_replace *repl,
 			const struct nf_hook_ops *ops, struct xt_table **res);
-void arpt_unregister_table(struct net *net, struct xt_table *table);
-void arpt_unregister_table_pre_exit(struct net *net, struct xt_table *table,
+void arpt_unregister_table(struct net *net, const char *name);
+void arpt_unregister_table_pre_exit(struct net *net, const char *name,
 				    const struct nf_hook_ops *ops);
 extern unsigned int arpt_do_table(struct sk_buff *skb,
 				  const struct nf_hook_state *state,
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index d6d45d820d79..8a16b0dc5271 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1541,16 +1541,22 @@ out_free:
 	return ret;
 }
 
-void arpt_unregister_table_pre_exit(struct net *net, struct xt_table *table,
+void arpt_unregister_table_pre_exit(struct net *net, const char *name,
 				    const struct nf_hook_ops *ops)
 {
-	nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+	struct xt_table *table = xt_find_table(net, NFPROTO_ARP, name);
+
+	if (table)
+		nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
 }
 EXPORT_SYMBOL(arpt_unregister_table_pre_exit);
 
-void arpt_unregister_table(struct net *net, struct xt_table *table)
+void arpt_unregister_table(struct net *net, const char *name)
 {
-	__arpt_unregister_table(net, table);
+	struct xt_table *table = xt_find_table(net, NFPROTO_ARP, name);
+
+	if (table)
+		__arpt_unregister_table(net, table);
 }
 
 /* The built-in targets: standard (NULL) and error. */
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 6c300ba5634e..c121e13dc78c 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -58,16 +58,12 @@ static int __net_init arptable_filter_table_init(struct net *net)
 
 static void __net_exit arptable_filter_net_pre_exit(struct net *net)
 {
-	if (net->ipv4.arptable_filter)
-		arpt_unregister_table_pre_exit(net, net->ipv4.arptable_filter,
-					       arpfilter_ops);
+	arpt_unregister_table_pre_exit(net, "filter", arpfilter_ops);
 }
 
 static void __net_exit arptable_filter_net_exit(struct net *net)
 {
-	if (!net->ipv4.arptable_filter)
-		return;
-	arpt_unregister_table(net, net->ipv4.arptable_filter);
+	arpt_unregister_table(net, "filter");
 	net->ipv4.arptable_filter = NULL;
 }
 
-- 
cgit v1.2.3


From f68772ed678376f52dbb2e20c9f982e6d8b3407b Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 21 Apr 2021 09:51:05 +0200
Subject: netfilter: x_tables: remove paranoia tests

No need for these.
There is only one caller, the xtables core, when the table is registered
for the first time with a particular network namespace.

After ->table_init() call, the table is linked into the tables[af] list,
so next call to that function will skip the ->table_init().

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/arptable_filter.c   | 3 ---
 net/ipv4/netfilter/iptable_filter.c    | 3 ---
 net/ipv4/netfilter/iptable_mangle.c    | 3 ---
 net/ipv4/netfilter/iptable_nat.c       | 3 ---
 net/ipv4/netfilter/iptable_raw.c       | 3 ---
 net/ipv4/netfilter/iptable_security.c  | 3 ---
 net/ipv6/netfilter/ip6table_filter.c   | 3 ---
 net/ipv6/netfilter/ip6table_mangle.c   | 3 ---
 net/ipv6/netfilter/ip6table_nat.c      | 3 ---
 net/ipv6/netfilter/ip6table_raw.c      | 3 ---
 net/ipv6/netfilter/ip6table_security.c | 3 ---
 11 files changed, 33 deletions(-)

diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index c121e13dc78c..924f096a6d89 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -44,9 +44,6 @@ static int __net_init arptable_filter_table_init(struct net *net)
 	struct arpt_replace *repl;
 	int err;
 
-	if (net->ipv4.arptable_filter)
-		return 0;
-
 	repl = arpt_alloc_initial_table(&packet_filter);
 	if (repl == NULL)
 		return -ENOMEM;
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index a39998c7977f..84573fa78d1e 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -48,9 +48,6 @@ static int __net_init iptable_filter_table_init(struct net *net)
 	struct ipt_replace *repl;
 	int err;
 
-	if (net->ipv4.iptable_filter)
-		return 0;
-
 	repl = ipt_alloc_initial_table(&packet_filter);
 	if (repl == NULL)
 		return -ENOMEM;
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 7d1713e22553..98e9e9053d85 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -88,9 +88,6 @@ static int __net_init iptable_mangle_table_init(struct net *net)
 	struct ipt_replace *repl;
 	int ret;
 
-	if (net->ipv4.iptable_mangle)
-		return 0;
-
 	repl = ipt_alloc_initial_table(&packet_mangler);
 	if (repl == NULL)
 		return -ENOMEM;
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 16bf3009642e..f4afd28ccc06 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -90,9 +90,6 @@ static int __net_init iptable_nat_table_init(struct net *net)
 	struct ipt_replace *repl;
 	int ret;
 
-	if (net->ipv4.nat_table)
-		return 0;
-
 	repl = ipt_alloc_initial_table(&nf_nat_ipv4_table);
 	if (repl == NULL)
 		return -ENOMEM;
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index a1f556464b93..18776f5a4055 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -55,9 +55,6 @@ static int __net_init iptable_raw_table_init(struct net *net)
 	if (raw_before_defrag)
 		table = &packet_raw_before_defrag;
 
-	if (net->ipv4.iptable_raw)
-		return 0;
-
 	repl = ipt_alloc_initial_table(table);
 	if (repl == NULL)
 		return -ENOMEM;
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index 33eded4f9080..3df92fb394c5 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -50,9 +50,6 @@ static int __net_init iptable_security_table_init(struct net *net)
 	struct ipt_replace *repl;
 	int ret;
 
-	if (net->ipv4.iptable_security)
-		return 0;
-
 	repl = ipt_alloc_initial_table(&security_table);
 	if (repl == NULL)
 		return -ENOMEM;
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 0c9f75e23ca0..2bcafa3e2d35 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -49,9 +49,6 @@ static int __net_init ip6table_filter_table_init(struct net *net)
 	struct ip6t_replace *repl;
 	int err;
 
-	if (net->ipv6.ip6table_filter)
-		return 0;
-
 	repl = ip6t_alloc_initial_table(&packet_filter);
 	if (repl == NULL)
 		return -ENOMEM;
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 9a2266662508..14e22022bf41 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -81,9 +81,6 @@ static int __net_init ip6table_mangle_table_init(struct net *net)
 	struct ip6t_replace *repl;
 	int ret;
 
-	if (net->ipv6.ip6table_mangle)
-		return 0;
-
 	repl = ip6t_alloc_initial_table(&packet_mangler);
 	if (repl == NULL)
 		return -ENOMEM;
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 7eb61e6b1e52..c7f98755191b 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -92,9 +92,6 @@ static int __net_init ip6table_nat_table_init(struct net *net)
 	struct ip6t_replace *repl;
 	int ret;
 
-	if (net->ipv6.ip6table_nat)
-		return 0;
-
 	repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table);
 	if (repl == NULL)
 		return -ENOMEM;
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index c9a4aada40ba..ae3df59f0350 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -54,9 +54,6 @@ static int __net_init ip6table_raw_table_init(struct net *net)
 	if (raw_before_defrag)
 		table = &packet_raw_before_defrag;
 
-	if (net->ipv6.ip6table_raw)
-		return 0;
-
 	repl = ip6t_alloc_initial_table(table);
 	if (repl == NULL)
 		return -ENOMEM;
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 73067e08662f..83ca632cbf88 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -49,9 +49,6 @@ static int __net_init ip6table_security_table_init(struct net *net)
 	struct ip6t_replace *repl;
 	int ret;
 
-	if (net->ipv6.ip6table_security)
-		return 0;
-
 	repl = ip6t_alloc_initial_table(&security_table);
 	if (repl == NULL)
 		return -ENOMEM;
-- 
cgit v1.2.3


From a4aeafa28cf706f65f763026c26d83e7e8c96592 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 21 Apr 2021 09:51:06 +0200
Subject: netfilter: xt_nat: pass table to hookfn

This changes how ip(6)table nat passes the ruleset/table to the
evaluation loop.

At the moment, it will fetch the table from struct net.

This change stores the table in the hook_ops 'priv' argument
instead.

This requires to duplicate the hook_ops for each netns, so
they can store the (per-net) xt_table structure.

The dupliated nat hook_ops get stored in net_generic data area.
They are free'd in the namespace exit path.

This is a pre-requisite to remove the xt_table/ruleset pointers
from struct net.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/iptable_nat.c  | 44 +++++++++++++++++++++++++++++---------
 net/ipv6/netfilter/ip6table_nat.c | 45 ++++++++++++++++++++++++++++++---------
 2 files changed, 69 insertions(+), 20 deletions(-)

diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index f4afd28ccc06..dfa9dc63a7b5 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -13,8 +13,14 @@
 
 #include <net/netfilter/nf_nat.h>
 
+struct iptable_nat_pernet {
+	struct nf_hook_ops *nf_nat_ops;
+};
+
 static int __net_init iptable_nat_table_init(struct net *net);
 
+static unsigned int iptable_nat_net_id __read_mostly;
+
 static const struct xt_table nf_nat_ipv4_table = {
 	.name		= "nat",
 	.valid_hooks	= (1 << NF_INET_PRE_ROUTING) |
@@ -30,7 +36,7 @@ static unsigned int iptable_nat_do_chain(void *priv,
 					 struct sk_buff *skb,
 					 const struct nf_hook_state *state)
 {
-	return ipt_do_table(skb, state, state->net->ipv4.nat_table);
+	return ipt_do_table(skb, state, priv);
 }
 
 static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
@@ -60,50 +66,67 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
 	},
 };
 
-static int ipt_nat_register_lookups(struct net *net)
+static int ipt_nat_register_lookups(struct net *net, struct xt_table *table)
 {
+	struct nf_hook_ops *ops = kmemdup(nf_nat_ipv4_ops, sizeof(nf_nat_ipv4_ops), GFP_KERNEL);
+	struct iptable_nat_pernet *xt_nat_net = net_generic(net, iptable_nat_net_id);
 	int i, ret;
 
+	if (!ops)
+		return -ENOMEM;
+
 	for (i = 0; i < ARRAY_SIZE(nf_nat_ipv4_ops); i++) {
-		ret = nf_nat_ipv4_register_fn(net, &nf_nat_ipv4_ops[i]);
+		ops[i].priv = table;
+		ret = nf_nat_ipv4_register_fn(net, &ops[i]);
 		if (ret) {
 			while (i)
-				nf_nat_ipv4_unregister_fn(net, &nf_nat_ipv4_ops[--i]);
+				nf_nat_ipv4_unregister_fn(net, &ops[--i]);
 
+			kfree(ops);
 			return ret;
 		}
 	}
 
+	xt_nat_net->nf_nat_ops = ops;
 	return 0;
 }
 
 static void ipt_nat_unregister_lookups(struct net *net)
 {
+	struct iptable_nat_pernet *xt_nat_net = net_generic(net, iptable_nat_net_id);
+	struct nf_hook_ops *ops = xt_nat_net->nf_nat_ops;
 	int i;
 
+	if (!ops)
+		return;
+
 	for (i = 0; i < ARRAY_SIZE(nf_nat_ipv4_ops); i++)
-		nf_nat_ipv4_unregister_fn(net, &nf_nat_ipv4_ops[i]);
+		nf_nat_ipv4_unregister_fn(net, &ops[i]);
+
+	kfree(ops);
 }
 
 static int __net_init iptable_nat_table_init(struct net *net)
 {
 	struct ipt_replace *repl;
+	struct xt_table *table;
 	int ret;
 
 	repl = ipt_alloc_initial_table(&nf_nat_ipv4_table);
 	if (repl == NULL)
 		return -ENOMEM;
 	ret = ipt_register_table(net, &nf_nat_ipv4_table, repl,
-				 NULL, &net->ipv4.nat_table);
+				 NULL, &table);
 	if (ret < 0) {
 		kfree(repl);
 		return ret;
 	}
 
-	ret = ipt_nat_register_lookups(net);
+	ret = ipt_nat_register_lookups(net, table);
 	if (ret < 0) {
 		ipt_unregister_table_exit(net, "nat");
-		net->ipv4.nat_table = NULL;
+	} else {
+		net->ipv4.nat_table = table;
 	}
 
 	kfree(repl);
@@ -112,8 +135,7 @@ static int __net_init iptable_nat_table_init(struct net *net)
 
 static void __net_exit iptable_nat_net_pre_exit(struct net *net)
 {
-	if (net->ipv4.nat_table)
-		ipt_nat_unregister_lookups(net);
+	ipt_nat_unregister_lookups(net);
 }
 
 static void __net_exit iptable_nat_net_exit(struct net *net)
@@ -125,6 +147,8 @@ static void __net_exit iptable_nat_net_exit(struct net *net)
 static struct pernet_operations iptable_nat_net_ops = {
 	.pre_exit = iptable_nat_net_pre_exit,
 	.exit	= iptable_nat_net_exit,
+	.id	= &iptable_nat_net_id,
+	.size	= sizeof(struct iptable_nat_pernet),
 };
 
 static int __init iptable_nat_init(void)
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index c7f98755191b..69b7f9601d03 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -15,8 +15,14 @@
 
 #include <net/netfilter/nf_nat.h>
 
+struct ip6table_nat_pernet {
+	struct nf_hook_ops *nf_nat_ops;
+};
+
 static int __net_init ip6table_nat_table_init(struct net *net);
 
+static unsigned int ip6table_nat_net_id __read_mostly;
+
 static const struct xt_table nf_nat_ipv6_table = {
 	.name		= "nat",
 	.valid_hooks	= (1 << NF_INET_PRE_ROUTING) |
@@ -32,7 +38,7 @@ static unsigned int ip6table_nat_do_chain(void *priv,
 					  struct sk_buff *skb,
 					  const struct nf_hook_state *state)
 {
-	return ip6t_do_table(skb, state, state->net->ipv6.ip6table_nat);
+	return ip6t_do_table(skb, state, priv);
 }
 
 static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
@@ -62,59 +68,76 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
 	},
 };
 
-static int ip6t_nat_register_lookups(struct net *net)
+static int ip6t_nat_register_lookups(struct net *net, struct xt_table *table)
 {
+	struct nf_hook_ops *ops = kmemdup(nf_nat_ipv6_ops, sizeof(nf_nat_ipv6_ops), GFP_KERNEL);
+	struct ip6table_nat_pernet *xt_nat_net = net_generic(net, ip6table_nat_net_id);
 	int i, ret;
 
+	if (!ops)
+		return -ENOMEM;
+
 	for (i = 0; i < ARRAY_SIZE(nf_nat_ipv6_ops); i++) {
-		ret = nf_nat_ipv6_register_fn(net, &nf_nat_ipv6_ops[i]);
+		ops[i].priv = table;
+		ret = nf_nat_ipv6_register_fn(net, &ops[i]);
 		if (ret) {
 			while (i)
-				nf_nat_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[--i]);
+				nf_nat_ipv6_unregister_fn(net, &ops[--i]);
 
+			kfree(ops);
 			return ret;
 		}
 	}
 
+	xt_nat_net->nf_nat_ops = ops;
 	return 0;
 }
 
 static void ip6t_nat_unregister_lookups(struct net *net)
 {
+	struct ip6table_nat_pernet *xt_nat_net = net_generic(net, ip6table_nat_net_id);
+	struct nf_hook_ops *ops = xt_nat_net->nf_nat_ops;
 	int i;
 
+	if (!ops)
+		return;
+
 	for (i = 0; i < ARRAY_SIZE(nf_nat_ipv6_ops); i++)
-		nf_nat_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[i]);
+		nf_nat_ipv6_unregister_fn(net, &ops[i]);
+
+	kfree(ops);
 }
 
 static int __net_init ip6table_nat_table_init(struct net *net)
 {
 	struct ip6t_replace *repl;
+	struct xt_table *table;
 	int ret;
 
 	repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table);
 	if (repl == NULL)
 		return -ENOMEM;
 	ret = ip6t_register_table(net, &nf_nat_ipv6_table, repl,
-				  NULL, &net->ipv6.ip6table_nat);
+				  NULL, &table);
 	if (ret < 0) {
 		kfree(repl);
 		return ret;
 	}
 
-	ret = ip6t_nat_register_lookups(net);
+	ret = ip6t_nat_register_lookups(net, table);
 	if (ret < 0) {
 		ip6t_unregister_table_exit(net, "nat");
-		net->ipv6.ip6table_nat = NULL;
+	} else {
+		net->ipv6.ip6table_nat = table;
 	}
+
 	kfree(repl);
 	return ret;
 }
 
 static void __net_exit ip6table_nat_net_pre_exit(struct net *net)
 {
-	if (net->ipv6.ip6table_nat)
-		ip6t_nat_unregister_lookups(net);
+	ip6t_nat_unregister_lookups(net);
 }
 
 static void __net_exit ip6table_nat_net_exit(struct net *net)
@@ -126,6 +149,8 @@ static void __net_exit ip6table_nat_net_exit(struct net *net)
 static struct pernet_operations ip6table_nat_net_ops = {
 	.pre_exit = ip6table_nat_net_pre_exit,
 	.exit	= ip6table_nat_net_exit,
+	.id	= &ip6table_nat_net_id,
+	.size	= sizeof(struct ip6table_nat_pernet),
 };
 
 static int __init ip6table_nat_init(void)
-- 
cgit v1.2.3


From ae689334225ff0e4ef112459ecd24aea932c2b00 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 21 Apr 2021 09:51:07 +0200
Subject: netfilter: ip_tables: pass table pointer via nf_hook_ops

iptable_x modules rely on 'struct net' to contain a pointer to the
table that should be evaluated.

In order to remove these pointers from struct net, pass them via
the 'priv' pointer in a similar fashion as nf_tables passes the
rule data.

To do that, duplicate the nf_hook_info array passed in from the
iptable_x modules, update the ops->priv pointers of the copy to
refer to the table and then change the hookfn implementations to
just pass the 'priv' argument to the traverser.

After this patch, the xt_table pointers can already be removed
from struct net.

However, changes to struct net result in re-compile of the entire
network stack, so do the removal after arptables and ip6tables
have been converted as well.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h       |  3 ++
 include/linux/netfilter_ipv4/ip_tables.h |  6 ++--
 net/ipv4/netfilter/ip_tables.c           | 53 ++++++++++++++++++++++----------
 net/ipv4/netfilter/iptable_filter.c      |  8 ++---
 net/ipv4/netfilter/iptable_mangle.c      | 14 ++++-----
 net/ipv4/netfilter/iptable_nat.c         | 26 ++++++++--------
 net/ipv4/netfilter/iptable_raw.c         |  8 ++---
 net/ipv4/netfilter/iptable_security.c    |  8 ++---
 net/netfilter/x_tables.c                 |  1 +
 9 files changed, 71 insertions(+), 56 deletions(-)

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index b2eec7de5280..a52cc22f806a 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -229,6 +229,9 @@ struct xt_table {
 	/* Man behind the curtain... */
 	struct xt_table_info *private;
 
+	/* hook ops that register the table with the netfilter core */
+	struct nf_hook_ops *ops;
+
 	/* Set this to THIS_MODULE if you are a module, otherwise NULL */
 	struct module *me;
 
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index 73bcf7f261d2..0fdab3246ef5 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -24,11 +24,9 @@
 
 int ipt_register_table(struct net *net, const struct xt_table *table,
 		       const struct ipt_replace *repl,
-		       const struct nf_hook_ops *ops, struct xt_table **res);
-
-void ipt_unregister_table_pre_exit(struct net *net, const char *name,
-				   const struct nf_hook_ops *ops);
+		       const struct nf_hook_ops *ops);
 
+void ipt_unregister_table_pre_exit(struct net *net, const char *name);
 void ipt_unregister_table_exit(struct net *net, const char *name);
 
 /* Standard entry. */
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 0b859ec2d3f8..d6caaed5dd45 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1716,9 +1716,11 @@ static void __ipt_unregister_table(struct net *net, struct xt_table *table)
 
 int ipt_register_table(struct net *net, const struct xt_table *table,
 		       const struct ipt_replace *repl,
-		       const struct nf_hook_ops *ops, struct xt_table **res)
+		       const struct nf_hook_ops *template_ops)
 {
-	int ret;
+	struct nf_hook_ops *ops;
+	unsigned int num_ops;
+	int ret, i;
 	struct xt_table_info *newinfo;
 	struct xt_table_info bootstrap = {0};
 	void *loc_cpu_entry;
@@ -1732,40 +1734,57 @@ int ipt_register_table(struct net *net, const struct xt_table *table,
 	memcpy(loc_cpu_entry, repl->entries, repl->size);
 
 	ret = translate_table(net, newinfo, loc_cpu_entry, repl);
-	if (ret != 0)
-		goto out_free;
+	if (ret != 0) {
+		xt_free_table_info(newinfo);
+		return ret;
+	}
 
 	new_table = xt_register_table(net, table, &bootstrap, newinfo);
 	if (IS_ERR(new_table)) {
-		ret = PTR_ERR(new_table);
-		goto out_free;
+		xt_free_table_info(newinfo);
+		return PTR_ERR(new_table);
 	}
 
-	/* set res now, will see skbs right after nf_register_net_hooks */
-	WRITE_ONCE(*res, new_table);
-	if (!ops)
+	/* No template? No need to do anything. This is used by 'nat' table, it registers
+	 * with the nat core instead of the netfilter core.
+	 */
+	if (!template_ops)
 		return 0;
 
-	ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
-	if (ret != 0) {
-		__ipt_unregister_table(net, new_table);
-		*res = NULL;
+	num_ops = hweight32(table->valid_hooks);
+	if (num_ops == 0) {
+		ret = -EINVAL;
+		goto out_free;
 	}
 
+	ops = kmemdup(template_ops, sizeof(*ops) * num_ops, GFP_KERNEL);
+	if (!ops) {
+		ret = -ENOMEM;
+		goto out_free;
+	}
+
+	for (i = 0; i < num_ops; i++)
+		ops[i].priv = new_table;
+
+	new_table->ops = ops;
+
+	ret = nf_register_net_hooks(net, ops, num_ops);
+	if (ret != 0)
+		goto out_free;
+
 	return ret;
 
 out_free:
-	xt_free_table_info(newinfo);
+	__ipt_unregister_table(net, new_table);
 	return ret;
 }
 
-void ipt_unregister_table_pre_exit(struct net *net, const char *name,
-				   const struct nf_hook_ops *ops)
+void ipt_unregister_table_pre_exit(struct net *net, const char *name)
 {
 	struct xt_table *table = xt_find_table(net, NFPROTO_IPV4, name);
 
 	if (table)
-		nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+		nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks));
 }
 
 void ipt_unregister_table_exit(struct net *net, const char *name)
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 84573fa78d1e..8272df7c6ad5 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -34,7 +34,7 @@ static unsigned int
 iptable_filter_hook(void *priv, struct sk_buff *skb,
 		    const struct nf_hook_state *state)
 {
-	return ipt_do_table(skb, state, state->net->ipv4.iptable_filter);
+	return ipt_do_table(skb, state, priv);
 }
 
 static struct nf_hook_ops *filter_ops __read_mostly;
@@ -55,8 +55,7 @@ static int __net_init iptable_filter_table_init(struct net *net)
 	((struct ipt_standard *)repl->entries)[1].target.verdict =
 		forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
 
-	err = ipt_register_table(net, &packet_filter, repl, filter_ops,
-				 &net->ipv4.iptable_filter);
+	err = ipt_register_table(net, &packet_filter, repl, filter_ops);
 	kfree(repl);
 	return err;
 }
@@ -71,13 +70,12 @@ static int __net_init iptable_filter_net_init(struct net *net)
 
 static void __net_exit iptable_filter_net_pre_exit(struct net *net)
 {
-	ipt_unregister_table_pre_exit(net, "filter", filter_ops);
+	ipt_unregister_table_pre_exit(net, "filter");
 }
 
 static void __net_exit iptable_filter_net_exit(struct net *net)
 {
 	ipt_unregister_table_exit(net, "filter");
-	net->ipv4.iptable_filter = NULL;
 }
 
 static struct pernet_operations iptable_filter_net_ops = {
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 98e9e9053d85..2abc3836f391 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -37,7 +37,7 @@ static const struct xt_table packet_mangler = {
 };
 
 static unsigned int
-ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
+ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *priv)
 {
 	unsigned int ret;
 	const struct iphdr *iph;
@@ -53,7 +53,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
 	daddr = iph->daddr;
 	tos = iph->tos;
 
-	ret = ipt_do_table(skb, state, state->net->ipv4.iptable_mangle);
+	ret = ipt_do_table(skb, state, priv);
 	/* Reroute for ANY change. */
 	if (ret != NF_DROP && ret != NF_STOLEN) {
 		iph = ip_hdr(skb);
@@ -78,8 +78,8 @@ iptable_mangle_hook(void *priv,
 		     const struct nf_hook_state *state)
 {
 	if (state->hook == NF_INET_LOCAL_OUT)
-		return ipt_mangle_out(skb, state);
-	return ipt_do_table(skb, state, state->net->ipv4.iptable_mangle);
+		return ipt_mangle_out(skb, state, priv);
+	return ipt_do_table(skb, state, priv);
 }
 
 static struct nf_hook_ops *mangle_ops __read_mostly;
@@ -91,21 +91,19 @@ static int __net_init iptable_mangle_table_init(struct net *net)
 	repl = ipt_alloc_initial_table(&packet_mangler);
 	if (repl == NULL)
 		return -ENOMEM;
-	ret = ipt_register_table(net, &packet_mangler, repl, mangle_ops,
-				 &net->ipv4.iptable_mangle);
+	ret = ipt_register_table(net, &packet_mangler, repl, mangle_ops);
 	kfree(repl);
 	return ret;
 }
 
 static void __net_exit iptable_mangle_net_pre_exit(struct net *net)
 {
-	ipt_unregister_table_pre_exit(net, "mangle", mangle_ops);
+	ipt_unregister_table_pre_exit(net, "mangle");
 }
 
 static void __net_exit iptable_mangle_net_exit(struct net *net)
 {
 	ipt_unregister_table_exit(net, "mangle");
-	net->ipv4.iptable_mangle = NULL;
 }
 
 static struct pernet_operations iptable_mangle_net_ops = {
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index dfa9dc63a7b5..a9913842ef18 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -66,12 +66,19 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
 	},
 };
 
-static int ipt_nat_register_lookups(struct net *net, struct xt_table *table)
+static int ipt_nat_register_lookups(struct net *net)
 {
-	struct nf_hook_ops *ops = kmemdup(nf_nat_ipv4_ops, sizeof(nf_nat_ipv4_ops), GFP_KERNEL);
-	struct iptable_nat_pernet *xt_nat_net = net_generic(net, iptable_nat_net_id);
+	struct iptable_nat_pernet *xt_nat_net;
+	struct nf_hook_ops *ops;
+	struct xt_table *table;
 	int i, ret;
 
+	xt_nat_net = net_generic(net, iptable_nat_net_id);
+	table = xt_find_table(net, NFPROTO_IPV4, "nat");
+	if (WARN_ON_ONCE(!table))
+		return -ENOENT;
+
+	ops = kmemdup(nf_nat_ipv4_ops, sizeof(nf_nat_ipv4_ops), GFP_KERNEL);
 	if (!ops)
 		return -ENOMEM;
 
@@ -109,25 +116,21 @@ static void ipt_nat_unregister_lookups(struct net *net)
 static int __net_init iptable_nat_table_init(struct net *net)
 {
 	struct ipt_replace *repl;
-	struct xt_table *table;
 	int ret;
 
 	repl = ipt_alloc_initial_table(&nf_nat_ipv4_table);
 	if (repl == NULL)
 		return -ENOMEM;
-	ret = ipt_register_table(net, &nf_nat_ipv4_table, repl,
-				 NULL, &table);
+
+	ret = ipt_register_table(net, &nf_nat_ipv4_table, repl, NULL);
 	if (ret < 0) {
 		kfree(repl);
 		return ret;
 	}
 
-	ret = ipt_nat_register_lookups(net, table);
-	if (ret < 0) {
+	ret = ipt_nat_register_lookups(net);
+	if (ret < 0)
 		ipt_unregister_table_exit(net, "nat");
-	} else {
-		net->ipv4.nat_table = table;
-	}
 
 	kfree(repl);
 	return ret;
@@ -141,7 +144,6 @@ static void __net_exit iptable_nat_net_pre_exit(struct net *net)
 static void __net_exit iptable_nat_net_exit(struct net *net)
 {
 	ipt_unregister_table_exit(net, "nat");
-	net->ipv4.nat_table = NULL;
 }
 
 static struct pernet_operations iptable_nat_net_ops = {
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 18776f5a4055..ceef397c1f5f 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -41,7 +41,7 @@ static unsigned int
 iptable_raw_hook(void *priv, struct sk_buff *skb,
 		 const struct nf_hook_state *state)
 {
-	return ipt_do_table(skb, state, state->net->ipv4.iptable_raw);
+	return ipt_do_table(skb, state, priv);
 }
 
 static struct nf_hook_ops *rawtable_ops __read_mostly;
@@ -58,21 +58,19 @@ static int __net_init iptable_raw_table_init(struct net *net)
 	repl = ipt_alloc_initial_table(table);
 	if (repl == NULL)
 		return -ENOMEM;
-	ret = ipt_register_table(net, table, repl, rawtable_ops,
-				 &net->ipv4.iptable_raw);
+	ret = ipt_register_table(net, table, repl, rawtable_ops);
 	kfree(repl);
 	return ret;
 }
 
 static void __net_exit iptable_raw_net_pre_exit(struct net *net)
 {
-	ipt_unregister_table_pre_exit(net, "raw", rawtable_ops);
+	ipt_unregister_table_pre_exit(net, "raw");
 }
 
 static void __net_exit iptable_raw_net_exit(struct net *net)
 {
 	ipt_unregister_table_exit(net, "raw");
-	net->ipv4.iptable_raw = NULL;
 }
 
 static struct pernet_operations iptable_raw_net_ops = {
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index 3df92fb394c5..77973f5fd8f6 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -40,7 +40,7 @@ static unsigned int
 iptable_security_hook(void *priv, struct sk_buff *skb,
 		      const struct nf_hook_state *state)
 {
-	return ipt_do_table(skb, state, state->net->ipv4.iptable_security);
+	return ipt_do_table(skb, state, priv);
 }
 
 static struct nf_hook_ops *sectbl_ops __read_mostly;
@@ -53,21 +53,19 @@ static int __net_init iptable_security_table_init(struct net *net)
 	repl = ipt_alloc_initial_table(&security_table);
 	if (repl == NULL)
 		return -ENOMEM;
-	ret = ipt_register_table(net, &security_table, repl, sectbl_ops,
-				 &net->ipv4.iptable_security);
+	ret = ipt_register_table(net, &security_table, repl, sectbl_ops);
 	kfree(repl);
 	return ret;
 }
 
 static void __net_exit iptable_security_net_pre_exit(struct net *net)
 {
-	ipt_unregister_table_pre_exit(net, "security", sectbl_ops);
+	ipt_unregister_table_pre_exit(net, "security");
 }
 
 static void __net_exit iptable_security_net_exit(struct net *net)
 {
 	ipt_unregister_table_exit(net, "security");
-	net->ipv4.iptable_security = NULL;
 }
 
 static struct pernet_operations iptable_security_net_ops = {
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 1caba9507228..ef37deff8405 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1498,6 +1498,7 @@ void *xt_unregister_table(struct xt_table *table)
 	mutex_unlock(&xt[table->af].mutex);
 	audit_log_nfcfg(table->name, table->af, private->number,
 			AUDIT_XT_OP_UNREGISTER, GFP_KERNEL);
+	kfree(table->ops);
 	kfree(table);
 
 	return private;
-- 
cgit v1.2.3


From f9006acc8dfe59e25aa75729728ac57a8d84fc32 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 21 Apr 2021 09:51:08 +0200
Subject: netfilter: arp_tables: pass table pointer via nf_hook_ops

Same change as previous patch.  Only difference:
no need to handle NULL template_ops parameter, the only caller
(arptable_filter) always passes non-NULL argument.

This removes all remaining accesses to net->ipv4.arptable_filter.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_arp/arp_tables.h |  2 +-
 net/ipv4/netfilter/arp_tables.c          | 43 +++++++++++++++++++++-----------
 net/ipv4/netfilter/arptable_filter.c     |  6 ++---
 3 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index 9ec73dcc8fd6..a0474b4e7782 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -51,7 +51,7 @@ struct arpt_error {
 extern void *arpt_alloc_initial_table(const struct xt_table *);
 int arpt_register_table(struct net *net, const struct xt_table *table,
 			const struct arpt_replace *repl,
-			const struct nf_hook_ops *ops, struct xt_table **res);
+			const struct nf_hook_ops *ops);
 void arpt_unregister_table(struct net *net, const char *name);
 void arpt_unregister_table_pre_exit(struct net *net, const char *name,
 				    const struct nf_hook_ops *ops);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 8a16b0dc5271..b1bb6a7e2dd7 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1499,10 +1499,11 @@ static void __arpt_unregister_table(struct net *net, struct xt_table *table)
 int arpt_register_table(struct net *net,
 			const struct xt_table *table,
 			const struct arpt_replace *repl,
-			const struct nf_hook_ops *ops,
-			struct xt_table **res)
+			const struct nf_hook_ops *template_ops)
 {
-	int ret;
+	struct nf_hook_ops *ops;
+	unsigned int num_ops;
+	int ret, i;
 	struct xt_table_info *newinfo;
 	struct xt_table_info bootstrap = {0};
 	void *loc_cpu_entry;
@@ -1516,28 +1517,42 @@ int arpt_register_table(struct net *net,
 	memcpy(loc_cpu_entry, repl->entries, repl->size);
 
 	ret = translate_table(net, newinfo, loc_cpu_entry, repl);
-	if (ret != 0)
-		goto out_free;
+	if (ret != 0) {
+		xt_free_table_info(newinfo);
+		return ret;
+	}
 
 	new_table = xt_register_table(net, table, &bootstrap, newinfo);
 	if (IS_ERR(new_table)) {
-		ret = PTR_ERR(new_table);
-		goto out_free;
+		xt_free_table_info(newinfo);
+		return PTR_ERR(new_table);
 	}
 
-	/* set res now, will see skbs right after nf_register_net_hooks */
-	WRITE_ONCE(*res, new_table);
+	num_ops = hweight32(table->valid_hooks);
+	if (num_ops == 0) {
+		ret = -EINVAL;
+		goto out_free;
+	}
 
-	ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
-	if (ret != 0) {
-		__arpt_unregister_table(net, new_table);
-		*res = NULL;
+	ops = kmemdup(template_ops, sizeof(*ops) * num_ops, GFP_KERNEL);
+	if (!ops) {
+		ret = -ENOMEM;
+		goto out_free;
 	}
 
+	for (i = 0; i < num_ops; i++)
+		ops[i].priv = new_table;
+
+	new_table->ops = ops;
+
+	ret = nf_register_net_hooks(net, ops, num_ops);
+	if (ret != 0)
+		goto out_free;
+
 	return ret;
 
 out_free:
-	xt_free_table_info(newinfo);
+	__arpt_unregister_table(net, new_table);
 	return ret;
 }
 
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 924f096a6d89..b8f45e9bbec8 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -34,7 +34,7 @@ static unsigned int
 arptable_filter_hook(void *priv, struct sk_buff *skb,
 		     const struct nf_hook_state *state)
 {
-	return arpt_do_table(skb, state, state->net->ipv4.arptable_filter);
+	return arpt_do_table(skb, state, priv);
 }
 
 static struct nf_hook_ops *arpfilter_ops __read_mostly;
@@ -47,8 +47,7 @@ static int __net_init arptable_filter_table_init(struct net *net)
 	repl = arpt_alloc_initial_table(&packet_filter);
 	if (repl == NULL)
 		return -ENOMEM;
-	err = arpt_register_table(net, &packet_filter, repl, arpfilter_ops,
-				  &net->ipv4.arptable_filter);
+	err = arpt_register_table(net, &packet_filter, repl, arpfilter_ops);
 	kfree(repl);
 	return err;
 }
@@ -61,7 +60,6 @@ static void __net_exit arptable_filter_net_pre_exit(struct net *net)
 static void __net_exit arptable_filter_net_exit(struct net *net)
 {
 	arpt_unregister_table(net, "filter");
-	net->ipv4.arptable_filter = NULL;
 }
 
 static struct pernet_operations arptable_filter_net_ops = {
-- 
cgit v1.2.3


From ee177a54413a33fe474d55fabb5f8ff390bb27d7 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 21 Apr 2021 09:51:09 +0200
Subject: netfilter: ip6_tables: pass table pointer via nf_hook_ops

Same patch as the ip_tables one: removal of all accesses to ip6_tables
xt_table pointers.  After this patch the struct net xt_table anchors
can be removed.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ipv6/ip6_tables.h |  5 ++-
 net/ipv6/netfilter/ip6_tables.c           | 51 ++++++++++++++++++++-----------
 net/ipv6/netfilter/ip6table_filter.c      |  9 ++----
 net/ipv6/netfilter/ip6table_mangle.c      | 14 ++++-----
 net/ipv6/netfilter/ip6table_nat.c         | 24 ++++++++-------
 net/ipv6/netfilter/ip6table_raw.c         |  9 ++----
 net/ipv6/netfilter/ip6table_security.c    |  8 ++---
 7 files changed, 63 insertions(+), 57 deletions(-)

diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 8c07426e18a8..11d0e725fe79 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -26,9 +26,8 @@ extern void *ip6t_alloc_initial_table(const struct xt_table *);
 
 int ip6t_register_table(struct net *net, const struct xt_table *table,
 			const struct ip6t_replace *repl,
-			const struct nf_hook_ops *ops, struct xt_table **res);
-void ip6t_unregister_table_pre_exit(struct net *net, const char *name,
-				    const struct nf_hook_ops *ops);
+			const struct nf_hook_ops *ops);
+void ip6t_unregister_table_pre_exit(struct net *net, const char *name);
 void ip6t_unregister_table_exit(struct net *net, const char *name);
 extern unsigned int ip6t_do_table(struct sk_buff *skb,
 				  const struct nf_hook_state *state,
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 11c80da12ee3..e763716ffa25 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1725,10 +1725,11 @@ static void __ip6t_unregister_table(struct net *net, struct xt_table *table)
 
 int ip6t_register_table(struct net *net, const struct xt_table *table,
 			const struct ip6t_replace *repl,
-			const struct nf_hook_ops *ops,
-			struct xt_table **res)
+			const struct nf_hook_ops *template_ops)
 {
-	int ret;
+	struct nf_hook_ops *ops;
+	unsigned int num_ops;
+	int ret, i;
 	struct xt_table_info *newinfo;
 	struct xt_table_info bootstrap = {0};
 	void *loc_cpu_entry;
@@ -1742,40 +1743,54 @@ int ip6t_register_table(struct net *net, const struct xt_table *table,
 	memcpy(loc_cpu_entry, repl->entries, repl->size);
 
 	ret = translate_table(net, newinfo, loc_cpu_entry, repl);
-	if (ret != 0)
-		goto out_free;
+	if (ret != 0) {
+		xt_free_table_info(newinfo);
+		return ret;
+	}
 
 	new_table = xt_register_table(net, table, &bootstrap, newinfo);
 	if (IS_ERR(new_table)) {
-		ret = PTR_ERR(new_table);
-		goto out_free;
+		xt_free_table_info(newinfo);
+		return PTR_ERR(new_table);
 	}
 
-	/* set res now, will see skbs right after nf_register_net_hooks */
-	WRITE_ONCE(*res, new_table);
-	if (!ops)
+	if (!template_ops)
 		return 0;
 
-	ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
-	if (ret != 0) {
-		__ip6t_unregister_table(net, new_table);
-		*res = NULL;
+	num_ops = hweight32(table->valid_hooks);
+	if (num_ops == 0) {
+		ret = -EINVAL;
+		goto out_free;
 	}
 
+	ops = kmemdup(template_ops, sizeof(*ops) * num_ops, GFP_KERNEL);
+	if (!ops) {
+		ret = -ENOMEM;
+		goto out_free;
+	}
+
+	for (i = 0; i < num_ops; i++)
+		ops[i].priv = new_table;
+
+	new_table->ops = ops;
+
+	ret = nf_register_net_hooks(net, ops, num_ops);
+	if (ret != 0)
+		goto out_free;
+
 	return ret;
 
 out_free:
-	xt_free_table_info(newinfo);
+	__ip6t_unregister_table(net, new_table);
 	return ret;
 }
 
-void ip6t_unregister_table_pre_exit(struct net *net, const char *name,
-				    const struct nf_hook_ops *ops)
+void ip6t_unregister_table_pre_exit(struct net *net, const char *name)
 {
 	struct xt_table *table = xt_find_table(net, NFPROTO_IPV6, name);
 
 	if (table)
-		nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+		nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks));
 }
 
 void ip6t_unregister_table_exit(struct net *net, const char *name)
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 2bcafa3e2d35..bb784ea7bbd3 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -35,7 +35,7 @@ static unsigned int
 ip6table_filter_hook(void *priv, struct sk_buff *skb,
 		     const struct nf_hook_state *state)
 {
-	return ip6t_do_table(skb, state, state->net->ipv6.ip6table_filter);
+	return ip6t_do_table(skb, state, priv);
 }
 
 static struct nf_hook_ops *filter_ops __read_mostly;
@@ -56,8 +56,7 @@ static int __net_init ip6table_filter_table_init(struct net *net)
 	((struct ip6t_standard *)repl->entries)[1].target.verdict =
 		forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
 
-	err = ip6t_register_table(net, &packet_filter, repl, filter_ops,
-				  &net->ipv6.ip6table_filter);
+	err = ip6t_register_table(net, &packet_filter, repl, filter_ops);
 	kfree(repl);
 	return err;
 }
@@ -72,14 +71,12 @@ static int __net_init ip6table_filter_net_init(struct net *net)
 
 static void __net_exit ip6table_filter_net_pre_exit(struct net *net)
 {
-	ip6t_unregister_table_pre_exit(net, "filter",
-				       filter_ops);
+	ip6t_unregister_table_pre_exit(net, "filter");
 }
 
 static void __net_exit ip6table_filter_net_exit(struct net *net)
 {
 	ip6t_unregister_table_exit(net, "filter");
-	net->ipv6.ip6table_filter = NULL;
 }
 
 static struct pernet_operations ip6table_filter_net_ops = {
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 14e22022bf41..c76cffd63041 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -32,7 +32,7 @@ static const struct xt_table packet_mangler = {
 };
 
 static unsigned int
-ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
+ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *priv)
 {
 	unsigned int ret;
 	struct in6_addr saddr, daddr;
@@ -49,7 +49,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
 	/* flowlabel and prio (includes version, which shouldn't change either */
 	flowlabel = *((u_int32_t *)ipv6_hdr(skb));
 
-	ret = ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle);
+	ret = ip6t_do_table(skb, state, priv);
 
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) ||
@@ -71,8 +71,8 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb,
 		     const struct nf_hook_state *state)
 {
 	if (state->hook == NF_INET_LOCAL_OUT)
-		return ip6t_mangle_out(skb, state);
-	return ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle);
+		return ip6t_mangle_out(skb, state, priv);
+	return ip6t_do_table(skb, state, priv);
 }
 
 static struct nf_hook_ops *mangle_ops __read_mostly;
@@ -84,21 +84,19 @@ static int __net_init ip6table_mangle_table_init(struct net *net)
 	repl = ip6t_alloc_initial_table(&packet_mangler);
 	if (repl == NULL)
 		return -ENOMEM;
-	ret = ip6t_register_table(net, &packet_mangler, repl, mangle_ops,
-				  &net->ipv6.ip6table_mangle);
+	ret = ip6t_register_table(net, &packet_mangler, repl, mangle_ops);
 	kfree(repl);
 	return ret;
 }
 
 static void __net_exit ip6table_mangle_net_pre_exit(struct net *net)
 {
-	ip6t_unregister_table_pre_exit(net, "mangle", mangle_ops);
+	ip6t_unregister_table_pre_exit(net, "mangle");
 }
 
 static void __net_exit ip6table_mangle_net_exit(struct net *net)
 {
 	ip6t_unregister_table_exit(net, "mangle");
-	net->ipv6.ip6table_mangle = NULL;
 }
 
 static struct pernet_operations ip6table_mangle_net_ops = {
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 69b7f9601d03..b0292251e655 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -68,12 +68,19 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
 	},
 };
 
-static int ip6t_nat_register_lookups(struct net *net, struct xt_table *table)
+static int ip6t_nat_register_lookups(struct net *net)
 {
-	struct nf_hook_ops *ops = kmemdup(nf_nat_ipv6_ops, sizeof(nf_nat_ipv6_ops), GFP_KERNEL);
-	struct ip6table_nat_pernet *xt_nat_net = net_generic(net, ip6table_nat_net_id);
+	struct ip6table_nat_pernet *xt_nat_net;
+	struct nf_hook_ops *ops;
+	struct xt_table *table;
 	int i, ret;
 
+	table = xt_find_table(net, NFPROTO_IPV6, "nat");
+	if (WARN_ON_ONCE(!table))
+		return -ENOENT;
+
+	xt_nat_net = net_generic(net, ip6table_nat_net_id);
+	ops = kmemdup(nf_nat_ipv6_ops, sizeof(nf_nat_ipv6_ops), GFP_KERNEL);
 	if (!ops)
 		return -ENOMEM;
 
@@ -111,25 +118,21 @@ static void ip6t_nat_unregister_lookups(struct net *net)
 static int __net_init ip6table_nat_table_init(struct net *net)
 {
 	struct ip6t_replace *repl;
-	struct xt_table *table;
 	int ret;
 
 	repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table);
 	if (repl == NULL)
 		return -ENOMEM;
 	ret = ip6t_register_table(net, &nf_nat_ipv6_table, repl,
-				  NULL, &table);
+				  NULL);
 	if (ret < 0) {
 		kfree(repl);
 		return ret;
 	}
 
-	ret = ip6t_nat_register_lookups(net, table);
-	if (ret < 0) {
+	ret = ip6t_nat_register_lookups(net);
+	if (ret < 0)
 		ip6t_unregister_table_exit(net, "nat");
-	} else {
-		net->ipv6.ip6table_nat = table;
-	}
 
 	kfree(repl);
 	return ret;
@@ -143,7 +146,6 @@ static void __net_exit ip6table_nat_net_pre_exit(struct net *net)
 static void __net_exit ip6table_nat_net_exit(struct net *net)
 {
 	ip6t_unregister_table_exit(net, "nat");
-	net->ipv6.ip6table_nat = NULL;
 }
 
 static struct pernet_operations ip6table_nat_net_ops = {
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index ae3df59f0350..f63c106c521e 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -40,7 +40,7 @@ static unsigned int
 ip6table_raw_hook(void *priv, struct sk_buff *skb,
 		  const struct nf_hook_state *state)
 {
-	return ip6t_do_table(skb, state, state->net->ipv6.ip6table_raw);
+	return ip6t_do_table(skb, state, priv);
 }
 
 static struct nf_hook_ops *rawtable_ops __read_mostly;
@@ -57,22 +57,19 @@ static int __net_init ip6table_raw_table_init(struct net *net)
 	repl = ip6t_alloc_initial_table(table);
 	if (repl == NULL)
 		return -ENOMEM;
-	ret = ip6t_register_table(net, table, repl, rawtable_ops,
-				  &net->ipv6.ip6table_raw);
+	ret = ip6t_register_table(net, table, repl, rawtable_ops);
 	kfree(repl);
 	return ret;
 }
 
 static void __net_exit ip6table_raw_net_pre_exit(struct net *net)
 {
-	ip6t_unregister_table_pre_exit(net, "raw",
-				       rawtable_ops);
+	ip6t_unregister_table_pre_exit(net, "raw");
 }
 
 static void __net_exit ip6table_raw_net_exit(struct net *net)
 {
 	ip6t_unregister_table_exit(net, "raw");
-	net->ipv6.ip6table_raw = NULL;
 }
 
 static struct pernet_operations ip6table_raw_net_ops = {
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 83ca632cbf88..8dc335cf450b 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -39,7 +39,7 @@ static unsigned int
 ip6table_security_hook(void *priv, struct sk_buff *skb,
 		       const struct nf_hook_state *state)
 {
-	return ip6t_do_table(skb, state, state->net->ipv6.ip6table_security);
+	return ip6t_do_table(skb, state, priv);
 }
 
 static struct nf_hook_ops *sectbl_ops __read_mostly;
@@ -52,21 +52,19 @@ static int __net_init ip6table_security_table_init(struct net *net)
 	repl = ip6t_alloc_initial_table(&security_table);
 	if (repl == NULL)
 		return -ENOMEM;
-	ret = ip6t_register_table(net, &security_table, repl, sectbl_ops,
-				  &net->ipv6.ip6table_security);
+	ret = ip6t_register_table(net, &security_table, repl, sectbl_ops);
 	kfree(repl);
 	return ret;
 }
 
 static void __net_exit ip6table_security_net_pre_exit(struct net *net)
 {
-	ip6t_unregister_table_pre_exit(net, "security", sectbl_ops);
+	ip6t_unregister_table_pre_exit(net, "security");
 }
 
 static void __net_exit ip6table_security_net_exit(struct net *net)
 {
 	ip6t_unregister_table_exit(net, "security");
-	net->ipv6.ip6table_security = NULL;
 }
 
 static struct pernet_operations ip6table_security_net_ops = {
-- 
cgit v1.2.3


From f7163c4882e883fabdafb894176994fd2ade33e2 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 21 Apr 2021 09:51:10 +0200
Subject: netfilter: remove all xt_table anchors from struct net

No longer needed, table pointer arg is now passed via netfilter core.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netns/ipv4.h | 10 ----------
 include/net/netns/ipv6.h |  9 ---------
 2 files changed, 19 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 87e1612497ea..f6af8d96d3c6 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -76,16 +76,6 @@ struct netns_ipv4 {
 	struct inet_peer_base	*peers;
 	struct sock  * __percpu	*tcp_sk;
 	struct fqdir		*fqdir;
-#ifdef CONFIG_NETFILTER
-	struct xt_table		*iptable_filter;
-	struct xt_table		*iptable_mangle;
-	struct xt_table		*iptable_raw;
-	struct xt_table		*arptable_filter;
-#ifdef CONFIG_SECURITY
-	struct xt_table		*iptable_security;
-#endif
-	struct xt_table		*nat_table;
-#endif
 
 	u8 sysctl_icmp_echo_ignore_all;
 	u8 sysctl_icmp_echo_enable_probe;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 808f0f79ea9c..6153c8067009 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -63,15 +63,6 @@ struct netns_ipv6 {
 	struct ipv6_devconf	*devconf_dflt;
 	struct inet_peer_base	*peers;
 	struct fqdir		*fqdir;
-#ifdef CONFIG_NETFILTER
-	struct xt_table		*ip6table_filter;
-	struct xt_table		*ip6table_mangle;
-	struct xt_table		*ip6table_raw;
-#ifdef CONFIG_SECURITY
-	struct xt_table		*ip6table_security;
-#endif
-	struct xt_table		*ip6table_nat;
-#endif
 	struct fib6_info	*fib6_null_entry;
 	struct rt6_info		*ip6_null_entry;
 	struct rt6_statistics   *rt6_stats;
-- 
cgit v1.2.3


From 593268ddf3887362ba8b8998cb85433596a3e8f5 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Wed, 21 Apr 2021 12:34:21 +0200
Subject: netfilter: nf_log_syslog: Unset bridge logger in pernet exit

Without this, a stale pointer remains in pernet loggers after module
unload causing a kernel oops during dereference. Easily reproduced by:

| # modprobe nf_log_syslog
| # rmmod nf_log_syslog
| # cat /proc/net/netfilter/nf_log

Fixes: 77ccee96a6742 ("netfilter: nf_log_bridge: merge with nf_log_syslog")
Signed-off-by: Phil Sutter <phil@nwl.cc>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_log_syslog.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c
index 2518818ed479..13234641cdb3 100644
--- a/net/netfilter/nf_log_syslog.c
+++ b/net/netfilter/nf_log_syslog.c
@@ -1011,6 +1011,7 @@ static void __net_exit nf_log_syslog_net_exit(struct net *net)
 	nf_log_unset(net, &nf_arp_logger);
 	nf_log_unset(net, &nf_ip6_logger);
 	nf_log_unset(net, &nf_netdev_logger);
+	nf_log_unset(net, &nf_bridge_logger);
 }
 
 static struct pernet_operations nf_log_syslog_net_ops = {
-- 
cgit v1.2.3


From 9c68011bd7e477ee8d03824c8cb40eab9c64027d Mon Sep 17 00:00:00 2001
From: Hayes Wang <hayeswang@realtek.com>
Date: Sat, 24 Apr 2021 14:09:03 +0800
Subject: r8152: remove some bit operations

Remove DELL_TB_RX_AGG_BUG and LENOVO_MACPASSTHRU flags of rtl8152_flags.
They are only set when initializing and wouldn't be change. It is enough
to record them with variables.

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 9986f8969d02..136ea06540ff 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -767,8 +767,6 @@ enum rtl8152_flags {
 	PHY_RESET,
 	SCHEDULE_TASKLET,
 	GREEN_ETHERNET,
-	DELL_TB_RX_AGG_BUG,
-	LENOVO_MACPASSTHRU,
 };
 
 #define DEVICE_ID_THINKPAD_THUNDERBOLT3_DOCK_GEN2	0x3082
@@ -934,6 +932,8 @@ struct r8152 {
 	u32 fc_pause_on, fc_pause_off;
 
 	u32 support_2500full:1;
+	u32 lenovo_macpassthru:1;
+	u32 dell_tb_rx_agg_bug:1;
 	u16 ocp_base;
 	u16 speed;
 	u16 eee_adv;
@@ -1594,7 +1594,7 @@ static int vendor_mac_passthru_addr_read(struct r8152 *tp, struct sockaddr *sa)
 	acpi_object_type mac_obj_type;
 	int mac_strlen;
 
-	if (test_bit(LENOVO_MACPASSTHRU, &tp->flags)) {
+	if (tp->lenovo_macpassthru) {
 		mac_obj_name = "\\MACA";
 		mac_obj_type = ACPI_TYPE_STRING;
 		mac_strlen = 0x16;
@@ -2283,7 +2283,7 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg)
 
 		remain = agg_buf_sz - (int)(tx_agg_align(tx_data) - agg->head);
 
-		if (test_bit(DELL_TB_RX_AGG_BUG, &tp->flags))
+		if (tp->dell_tb_rx_agg_bug)
 			break;
 	}
 
@@ -6941,7 +6941,7 @@ static void r8153_init(struct r8152 *tp)
 	/* rx aggregation */
 	ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL);
 	ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN);
-	if (test_bit(DELL_TB_RX_AGG_BUG, &tp->flags))
+	if (tp->dell_tb_rx_agg_bug)
 		ocp_data |= RX_AGG_DISABLE;
 
 	ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data);
@@ -9447,7 +9447,7 @@ static int rtl8152_probe(struct usb_interface *intf,
 		switch (le16_to_cpu(udev->descriptor.idProduct)) {
 		case DEVICE_ID_THINKPAD_THUNDERBOLT3_DOCK_GEN2:
 		case DEVICE_ID_THINKPAD_USB_C_DOCK_GEN2:
-			set_bit(LENOVO_MACPASSTHRU, &tp->flags);
+			tp->lenovo_macpassthru = 1;
 		}
 	}
 
@@ -9455,7 +9455,7 @@ static int rtl8152_probe(struct usb_interface *intf,
 	    (!strcmp(udev->serial, "000001000000") ||
 	     !strcmp(udev->serial, "000002000000"))) {
 		dev_info(&udev->dev, "Dell TB16 Dock, disable RX aggregation");
-		set_bit(DELL_TB_RX_AGG_BUG, &tp->flags);
+		tp->dell_tb_rx_agg_bug = 1;
 	}
 
 	netdev->ethtool_ops = &ops;
-- 
cgit v1.2.3


From 48ac0b5805dd9b10546d5a89a2702fd78a8ca69f Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 25 Apr 2021 02:30:36 +0200
Subject: net: ethernet: ixp4xx: Add DT bindings

This adds device tree bindings for the IXP4xx ethernet
controller with optional MDIO bridge.

Cc: Zoltan HERPAI <wigyori@uid0.hu>
Cc: Raylynn Knight <rayknight@me.com>
Cc: devicetree@vger.kernel.org
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../bindings/net/intel,ixp4xx-ethernet.yaml        | 102 +++++++++++++++++++++
 1 file changed, 102 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/intel,ixp4xx-ethernet.yaml

diff --git a/Documentation/devicetree/bindings/net/intel,ixp4xx-ethernet.yaml b/Documentation/devicetree/bindings/net/intel,ixp4xx-ethernet.yaml
new file mode 100644
index 000000000000..f2e91d1bf7d7
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/intel,ixp4xx-ethernet.yaml
@@ -0,0 +1,102 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2018 Linaro Ltd.
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/net/intel,ixp4xx-ethernet.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Intel IXP4xx ethernet
+
+allOf:
+  - $ref: "ethernet-controller.yaml#"
+
+maintainers:
+  - Linus Walleij <linus.walleij@linaro.org>
+
+description: |
+  The Intel IXP4xx ethernet makes use of the IXP4xx NPE (Network
+  Processing Engine) and the IXP4xx Queue Manager to process
+  the ethernet frames. It can optionally contain an MDIO bus to
+  talk to PHYs.
+
+properties:
+  compatible:
+    const: intel,ixp4xx-ethernet
+
+  reg:
+    maxItems: 1
+    description: Ethernet MMIO address range
+
+  queue-rx:
+    $ref: '/schemas/types.yaml#/definitions/phandle-array'
+    maxItems: 1
+    description: phandle to the RX queue on the NPE
+
+  queue-txready:
+    $ref: '/schemas/types.yaml#/definitions/phandle-array'
+    maxItems: 1
+    description: phandle to the TX READY queue on the NPE
+
+  phy-mode: true
+
+  phy-handle: true
+
+  intel,npe-handle:
+    $ref: '/schemas/types.yaml#/definitions/phandle-array'
+    maxItems: 1
+    description: phandle to the NPE this ethernet instance is using
+      and the instance to use in the second cell
+
+  mdio:
+    type: object
+    $ref: "mdio.yaml#"
+    description: optional node for embedded MDIO controller
+
+required:
+  - compatible
+  - reg
+  - queue-rx
+  - queue-txready
+  - intel,npe-handle
+
+additionalProperties: false
+
+examples:
+  - |
+    npe: npe@c8006000 {
+      compatible = "intel,ixp4xx-network-processing-engine";
+      reg = <0xc8006000 0x1000>, <0xc8007000 0x1000>, <0xc8008000 0x1000>;
+    };
+
+    ethernet@c8009000 {
+      compatible = "intel,ixp4xx-ethernet";
+      reg = <0xc8009000 0x1000>;
+      status = "disabled";
+      queue-rx = <&qmgr 4>;
+      queue-txready = <&qmgr 21>;
+      intel,npe-handle = <&npe 1>;
+      phy-mode = "rgmii";
+      phy-handle = <&phy1>;
+    };
+
+    ethernet@c800c000 {
+      compatible = "intel,ixp4xx-ethernet";
+      reg = <0xc800c000 0x1000>;
+      status = "disabled";
+      queue-rx = <&qmgr 3>;
+      queue-txready = <&qmgr 20>;
+      intel,npe-handle = <&npe 2>;
+      phy-mode = "rgmii";
+      phy-handle = <&phy2>;
+
+      mdio {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        phy1: ethernet-phy@1 {
+          reg = <1>;
+        };
+        phy2: ethernet-phy@2 {
+          reg = <2>;
+        };
+      };
+    };
-- 
cgit v1.2.3


From 3e8047a98553e234a751f4f7f42d687ba98c0822 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 25 Apr 2021 02:30:37 +0200
Subject: net: ethernet: ixp4xx: Retire ancient phy retrieveal

This driver was using a really dated way of obtaining the
phy by printing a string and using it with phy_connect().
Switch to using more reasonable modern interfaces.

Suggested-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/xscale/ixp4xx_eth.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c
index 0152f1e70783..9d323e8595e2 100644
--- a/drivers/net/ethernet/xscale/ixp4xx_eth.c
+++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c
@@ -1360,7 +1360,6 @@ static const struct net_device_ops ixp4xx_netdev_ops = {
 
 static int ixp4xx_eth_probe(struct platform_device *pdev)
 {
-	char phy_id[MII_BUS_ID_SIZE + 3];
 	struct phy_device *phydev = NULL;
 	struct device *dev = &pdev->dev;
 	struct eth_plat_info *plat;
@@ -1459,14 +1458,15 @@ static int ixp4xx_eth_probe(struct platform_device *pdev)
 	__raw_writel(DEFAULT_CORE_CNTRL, &port->regs->core_control);
 	udelay(50);
 
-	snprintf(phy_id, MII_BUS_ID_SIZE + 3, PHY_ID_FMT,
-		mdio_bus->id, plat->phy);
-	phydev = phy_connect(ndev, phy_id, &ixp4xx_adjust_link,
-			     PHY_INTERFACE_MODE_MII);
+	phydev = mdiobus_get_phy(mdio_bus, plat->phy);
 	if (IS_ERR(phydev)) {
 		err = PTR_ERR(phydev);
 		goto err_free_mem;
 	}
+	err = phy_connect_direct(ndev, phydev, ixp4xx_adjust_link,
+				 PHY_INTERFACE_MODE_MII);
+	if (err)
+		goto err_free_mem;
 
 	phydev->irq = PHY_POLL;
 
-- 
cgit v1.2.3


From 95aafe911db602d19b00d2a88c3d54a84119f5dc Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 25 Apr 2021 02:30:38 +0200
Subject: net: ethernet: ixp4xx: Support device tree probing

This adds device tree probing to the IXP4xx ethernet
driver.

Add a platform data bool to tell us whether to
register an MDIO bus for the device or not, as well
as the corresponding NPE.

We need to drop the memory region request as part of
this since the OF core will request the memory for the
device.

Cc: Zoltan HERPAI <wigyori@uid0.hu>
Cc: Raylynn Knight <rayknight@me.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/xscale/Kconfig      |   1 +
 drivers/net/ethernet/xscale/ixp4xx_eth.c | 210 +++++++++++++++++++++----------
 include/linux/platform_data/eth_ixp4xx.h |   2 +
 3 files changed, 150 insertions(+), 63 deletions(-)

diff --git a/drivers/net/ethernet/xscale/Kconfig b/drivers/net/ethernet/xscale/Kconfig
index 7b83a6e5d894..468ffe3d1707 100644
--- a/drivers/net/ethernet/xscale/Kconfig
+++ b/drivers/net/ethernet/xscale/Kconfig
@@ -22,6 +22,7 @@ config IXP4XX_ETH
 	tristate "Intel IXP4xx Ethernet support"
 	depends on ARM && ARCH_IXP4XX && IXP4XX_NPE && IXP4XX_QMGR
 	select PHYLIB
+	select OF_MDIO if OF
 	select NET_PTP_CLASSIFY
 	help
 	  Say Y here if you want to use built-in Ethernet ports
diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c
index 9d323e8595e2..1149e88e6454 100644
--- a/drivers/net/ethernet/xscale/ixp4xx_eth.c
+++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c
@@ -28,6 +28,7 @@
 #include <linux/kernel.h>
 #include <linux/net_tstamp.h>
 #include <linux/of.h>
+#include <linux/of_mdio.h>
 #include <linux/phy.h>
 #include <linux/platform_data/eth_ixp4xx.h>
 #include <linux/platform_device.h>
@@ -165,7 +166,6 @@ struct eth_regs {
 };
 
 struct port {
-	struct resource *mem_res;
 	struct eth_regs __iomem *regs;
 	struct npe *npe;
 	struct net_device *netdev;
@@ -250,6 +250,7 @@ static inline void memcpy_swab32(u32 *dest, u32 *src, int cnt)
 static DEFINE_SPINLOCK(mdio_lock);
 static struct eth_regs __iomem *mdio_regs; /* mdio command and status only */
 static struct mii_bus *mdio_bus;
+static struct device_node *mdio_bus_np;
 static int ports_open;
 static struct port *npe_port_tab[MAX_NPES];
 static struct dma_pool *dma_pool;
@@ -533,7 +534,8 @@ static int ixp4xx_mdio_register(struct eth_regs __iomem *regs)
 	mdio_bus->write = &ixp4xx_mdio_write;
 	snprintf(mdio_bus->id, MII_BUS_ID_SIZE, "ixp4xx-eth-0");
 
-	if ((err = mdiobus_register(mdio_bus)))
+	err = of_mdiobus_register(mdio_bus, mdio_bus_np);
+	if (err)
 		mdiobus_free(mdio_bus);
 	return err;
 }
@@ -1358,18 +1360,118 @@ static const struct net_device_ops ixp4xx_netdev_ops = {
 	.ndo_validate_addr = eth_validate_addr,
 };
 
+#ifdef CONFIG_OF
+static struct eth_plat_info *ixp4xx_of_get_platdata(struct device *dev)
+{
+	struct device_node *np = dev->of_node;
+	struct of_phandle_args queue_spec;
+	struct of_phandle_args npe_spec;
+	struct device_node *mdio_np;
+	struct eth_plat_info *plat;
+	int ret;
+
+	plat = devm_kzalloc(dev, sizeof(*plat), GFP_KERNEL);
+	if (!plat)
+		return NULL;
+
+	ret = of_parse_phandle_with_fixed_args(np, "intel,npe-handle", 1, 0,
+					       &npe_spec);
+	if (ret) {
+		dev_err(dev, "no NPE engine specified\n");
+		return NULL;
+	}
+	/* NPE ID 0x00, 0x10, 0x20... */
+	plat->npe = (npe_spec.args[0] << 4);
+
+	/* Check if this device has an MDIO bus */
+	mdio_np = of_get_child_by_name(np, "mdio");
+	if (mdio_np) {
+		plat->has_mdio = true;
+		mdio_bus_np = mdio_np;
+		/* DO NOT put the mdio_np, it will be used */
+	}
+
+	/* Get the rx queue as a resource from queue manager */
+	ret = of_parse_phandle_with_fixed_args(np, "queue-rx", 1, 0,
+					       &queue_spec);
+	if (ret) {
+		dev_err(dev, "no rx queue phandle\n");
+		return NULL;
+	}
+	plat->rxq = queue_spec.args[0];
+
+	/* Get the txready queue as resource from queue manager */
+	ret = of_parse_phandle_with_fixed_args(np, "queue-txready", 1, 0,
+					       &queue_spec);
+	if (ret) {
+		dev_err(dev, "no txready queue phandle\n");
+		return NULL;
+	}
+	plat->txreadyq = queue_spec.args[0];
+
+	return plat;
+}
+#else
+static struct eth_plat_info *ixp4xx_of_get_platdata(struct device *dev)
+{
+	return NULL;
+}
+#endif
+
 static int ixp4xx_eth_probe(struct platform_device *pdev)
 {
 	struct phy_device *phydev = NULL;
 	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
 	struct eth_plat_info *plat;
-	resource_size_t regs_phys;
 	struct net_device *ndev;
 	struct resource *res;
 	struct port *port;
 	int err;
 
-	plat = dev_get_platdata(dev);
+	if (np) {
+		plat = ixp4xx_of_get_platdata(dev);
+		if (!plat)
+			return -ENODEV;
+	} else {
+		plat = dev_get_platdata(dev);
+		if (!plat)
+			return -ENODEV;
+		plat->npe = pdev->id;
+		switch (plat->npe) {
+		case IXP4XX_ETH_NPEA:
+			/* If the MDIO bus is not up yet, defer probe */
+			break;
+		case IXP4XX_ETH_NPEB:
+			/* On all except IXP43x, NPE-B is used for the MDIO bus.
+			 * If there is no NPE-B in the feature set, bail out,
+			 * else we have the MDIO bus here.
+			 */
+			if (!cpu_is_ixp43x()) {
+				if (!(ixp4xx_read_feature_bits() &
+				      IXP4XX_FEATURE_NPEB_ETH0))
+					return -ENODEV;
+				/* Else register the MDIO bus on NPE-B */
+				plat->has_mdio = true;
+			}
+			break;
+		case IXP4XX_ETH_NPEC:
+			/* IXP43x lacks NPE-B and uses NPE-C for the MDIO bus
+			 * access, if there is no NPE-C, no bus, nothing works,
+			 * so bail out.
+			 */
+			if (cpu_is_ixp43x()) {
+				if (!(ixp4xx_read_feature_bits() &
+				      IXP4XX_FEATURE_NPEC_ETH))
+					return -ENODEV;
+				/* Else register the MDIO bus on NPE-B */
+				plat->has_mdio = true;
+			}
+			break;
+		default:
+			return -ENODEV;
+		}
+	}
 
 	if (!(ndev = devm_alloc_etherdev(dev, sizeof(struct port))))
 		return -ENOMEM;
@@ -1377,59 +1479,29 @@ static int ixp4xx_eth_probe(struct platform_device *pdev)
 	SET_NETDEV_DEV(ndev, dev);
 	port = netdev_priv(ndev);
 	port->netdev = ndev;
-	port->id = pdev->id;
+	port->id = plat->npe;
 
 	/* Get the port resource and remap */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res)
 		return -ENODEV;
-	regs_phys = res->start;
 	port->regs = devm_ioremap_resource(dev, res);
 	if (IS_ERR(port->regs))
 		return PTR_ERR(port->regs);
 
-	switch (port->id) {
-	case IXP4XX_ETH_NPEA:
-		/* If the MDIO bus is not up yet, defer probe */
-		if (!mdio_bus)
-			return -EPROBE_DEFER;
-		break;
-	case IXP4XX_ETH_NPEB:
-		/*
-		 * On all except IXP43x, NPE-B is used for the MDIO bus.
-		 * If there is no NPE-B in the feature set, bail out, else
-		 * register the MDIO bus.
-		 */
-		if (!cpu_is_ixp43x()) {
-			if (!(ixp4xx_read_feature_bits() &
-			      IXP4XX_FEATURE_NPEB_ETH0))
-				return -ENODEV;
-			/* Else register the MDIO bus on NPE-B */
-			if ((err = ixp4xx_mdio_register(port->regs)))
-				return err;
-		}
-		if (!mdio_bus)
-			return -EPROBE_DEFER;
-		break;
-	case IXP4XX_ETH_NPEC:
-		/*
-		 * IXP43x lacks NPE-B and uses NPE-C for the MDIO bus access,
-		 * of there is no NPE-C, no bus, nothing works, so bail out.
-		 */
-		if (cpu_is_ixp43x()) {
-			if (!(ixp4xx_read_feature_bits() &
-			      IXP4XX_FEATURE_NPEC_ETH))
-				return -ENODEV;
-			/* Else register the MDIO bus on NPE-C */
-			if ((err = ixp4xx_mdio_register(port->regs)))
-				return err;
+	/* Register the MDIO bus if we have it */
+	if (plat->has_mdio) {
+		err = ixp4xx_mdio_register(port->regs);
+		if (err) {
+			dev_err(dev, "failed to register MDIO bus\n");
+			return err;
 		}
-		if (!mdio_bus)
-			return -EPROBE_DEFER;
-		break;
-	default:
-		return -ENODEV;
 	}
+	/* If the instance with the MDIO bus has not yet appeared,
+	 * defer probing until it gets probed.
+	 */
+	if (!mdio_bus)
+		return -EPROBE_DEFER;
 
 	ndev->netdev_ops = &ixp4xx_netdev_ops;
 	ndev->ethtool_ops = &ixp4xx_ethtool_ops;
@@ -1440,12 +1512,6 @@ static int ixp4xx_eth_probe(struct platform_device *pdev)
 	if (!(port->npe = npe_request(NPE_ID(port->id))))
 		return -EIO;
 
-	port->mem_res = request_mem_region(regs_phys, REGS_SIZE, ndev->name);
-	if (!port->mem_res) {
-		err = -EBUSY;
-		goto err_npe_rel;
-	}
-
 	port->plat = plat;
 	npe_port_tab[NPE_ID(port->id)] = port;
 	memcpy(ndev->dev_addr, plat->hwaddr, ETH_ALEN);
@@ -1458,15 +1524,26 @@ static int ixp4xx_eth_probe(struct platform_device *pdev)
 	__raw_writel(DEFAULT_CORE_CNTRL, &port->regs->core_control);
 	udelay(50);
 
-	phydev = mdiobus_get_phy(mdio_bus, plat->phy);
-	if (IS_ERR(phydev)) {
-		err = PTR_ERR(phydev);
-		goto err_free_mem;
+	if (np) {
+		phydev = of_phy_get_and_connect(ndev, np, ixp4xx_adjust_link);
+	} else {
+		phydev = mdiobus_get_phy(mdio_bus, plat->phy);
+		if (IS_ERR(phydev)) {
+			err = PTR_ERR(phydev);
+			dev_err(dev, "could not connect phydev (%d)\n", err);
+			goto err_free_mem;
+		}
+		err = phy_connect_direct(ndev, phydev, ixp4xx_adjust_link,
+					 PHY_INTERFACE_MODE_MII);
+		if (err)
+			goto err_free_mem;
+
 	}
-	err = phy_connect_direct(ndev, phydev, ixp4xx_adjust_link,
-				 PHY_INTERFACE_MODE_MII);
-	if (err)
+	if (!phydev) {
+		err = -ENODEV;
+		dev_err(dev, "no phydev\n");
 		goto err_free_mem;
+	}
 
 	phydev->irq = PHY_POLL;
 
@@ -1482,8 +1559,6 @@ err_phy_dis:
 	phy_disconnect(phydev);
 err_free_mem:
 	npe_port_tab[NPE_ID(port->id)] = NULL;
-	release_resource(port->mem_res);
-err_npe_rel:
 	npe_release(port->npe);
 	return err;
 }
@@ -1499,12 +1574,21 @@ static int ixp4xx_eth_remove(struct platform_device *pdev)
 	ixp4xx_mdio_remove();
 	npe_port_tab[NPE_ID(port->id)] = NULL;
 	npe_release(port->npe);
-	release_resource(port->mem_res);
 	return 0;
 }
 
+static const struct of_device_id ixp4xx_eth_of_match[] = {
+	{
+		.compatible = "intel,ixp4xx-ethernet",
+	},
+	{ },
+};
+
 static struct platform_driver ixp4xx_eth_driver = {
-	.driver.name	= DRV_NAME,
+	.driver = {
+		.name = DRV_NAME,
+		.of_match_table = of_match_ptr(ixp4xx_eth_of_match),
+	},
 	.probe		= ixp4xx_eth_probe,
 	.remove		= ixp4xx_eth_remove,
 };
diff --git a/include/linux/platform_data/eth_ixp4xx.h b/include/linux/platform_data/eth_ixp4xx.h
index 6f652ea0c6ae..114b0940729f 100644
--- a/include/linux/platform_data/eth_ixp4xx.h
+++ b/include/linux/platform_data/eth_ixp4xx.h
@@ -14,6 +14,8 @@ struct eth_plat_info {
 	u8 rxq;		/* configurable, currently 0 - 31 only */
 	u8 txreadyq;
 	u8 hwaddr[6];
+	u8 npe;		/* NPE instance used by this interface */
+	bool has_mdio;	/* If this instance has an MDIO bus */
 };
 
 #endif
-- 
cgit v1.2.3


From 427f0c8c194b22edcafef1b0a42995ddc5c2227d Mon Sep 17 00:00:00 2001
From: Jethro Beekman <kernel@jbeekman.nl>
Date: Sun, 25 Apr 2021 11:22:03 +0200
Subject: macvlan: Add nodst option to macvlan type source

The default behavior for source MACVLAN is to duplicate packets to
appropriate type source devices, and then do the normal destination MACVLAN
flow. This patch adds an option to skip destination MACVLAN processing if
any matching source MACVLAN device has the option set.

This allows setting up a "catch all" device for source MACVLAN: create one
or more devices with type source nodst, and one device with e.g. type vepa,
and incoming traffic will be received on exactly one device.

v2: netdev wants non-standard line length

Signed-off-by: Jethro Beekman <kernel@jbeekman.nl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c        | 19 ++++++++++++++-----
 include/uapi/linux/if_link.h |  1 +
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 9a9a5cf36a4b..7427b989607e 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -423,18 +423,24 @@ static void macvlan_forward_source_one(struct sk_buff *skb,
 	macvlan_count_rx(vlan, len, ret == NET_RX_SUCCESS, false);
 }
 
-static void macvlan_forward_source(struct sk_buff *skb,
+static bool macvlan_forward_source(struct sk_buff *skb,
 				   struct macvlan_port *port,
 				   const unsigned char *addr)
 {
 	struct macvlan_source_entry *entry;
 	u32 idx = macvlan_eth_hash(addr);
 	struct hlist_head *h = &port->vlan_source_hash[idx];
+	bool consume = false;
 
 	hlist_for_each_entry_rcu(entry, h, hlist) {
-		if (ether_addr_equal_64bits(entry->addr, addr))
+		if (ether_addr_equal_64bits(entry->addr, addr)) {
+			if (entry->vlan->flags & MACVLAN_FLAG_NODST)
+				consume = true;
 			macvlan_forward_source_one(skb, entry->vlan);
+		}
 	}
+
+	return consume;
 }
 
 /* called under rcu_read_lock() from netif_receive_skb */
@@ -463,7 +469,8 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
 			return RX_HANDLER_CONSUMED;
 		*pskb = skb;
 		eth = eth_hdr(skb);
-		macvlan_forward_source(skb, port, eth->h_source);
+		if (macvlan_forward_source(skb, port, eth->h_source))
+			return RX_HANDLER_CONSUMED;
 		src = macvlan_hash_lookup(port, eth->h_source);
 		if (src && src->mode != MACVLAN_MODE_VEPA &&
 		    src->mode != MACVLAN_MODE_BRIDGE) {
@@ -482,7 +489,8 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
 		return RX_HANDLER_PASS;
 	}
 
-	macvlan_forward_source(skb, port, eth->h_source);
+	if (macvlan_forward_source(skb, port, eth->h_source))
+		return RX_HANDLER_CONSUMED;
 	if (macvlan_passthru(port))
 		vlan = list_first_or_null_rcu(&port->vlans,
 					      struct macvlan_dev, list);
@@ -1286,7 +1294,8 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[],
 		return 0;
 
 	if (data[IFLA_MACVLAN_FLAGS] &&
-	    nla_get_u16(data[IFLA_MACVLAN_FLAGS]) & ~MACVLAN_FLAG_NOPROMISC)
+	    nla_get_u16(data[IFLA_MACVLAN_FLAGS]) & ~(MACVLAN_FLAG_NOPROMISC |
+						      MACVLAN_FLAG_NODST))
 		return -EINVAL;
 
 	if (data[IFLA_MACVLAN_MODE]) {
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 91c8dda6d95d..cd5b382a4138 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -614,6 +614,7 @@ enum macvlan_macaddr_mode {
 };
 
 #define MACVLAN_FLAG_NOPROMISC	1
+#define MACVLAN_FLAG_NODST	2 /* skip dst macvlan if matching src macvlan */
 
 /* VRF section */
 enum {
-- 
cgit v1.2.3


From 1d2deb61f095a7df231cc394c06d07a2893ac9eb Mon Sep 17 00:00:00 2001
From: Edwin Peer <edwin.peer@broadcom.com>
Date: Sun, 25 Apr 2021 13:45:18 -0400
Subject: bnxt_en: report signal mode in link up messages

Firmware reports link signalling mode for certain speeds. In these
cases, print the signalling modes in kernel log link up messages.

Reviewed-by: Andy Gospodarek <gospo@broadcom.com>
Signed-off-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index e15d454e33f0..573c039e6046 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -9075,8 +9075,9 @@ static char *bnxt_report_fec(struct bnxt_link_info *link_info)
 static void bnxt_report_link(struct bnxt *bp)
 {
 	if (bp->link_info.link_up) {
-		const char *duplex;
+		const char *signal = "";
 		const char *flow_ctrl;
+		const char *duplex;
 		u32 speed;
 		u16 fec;
 
@@ -9098,8 +9099,23 @@ static void bnxt_report_link(struct bnxt *bp)
 			flow_ctrl = "ON - receive";
 		else
 			flow_ctrl = "none";
-		netdev_info(bp->dev, "NIC Link is Up, %u Mbps %s duplex, Flow control: %s\n",
-			    speed, duplex, flow_ctrl);
+		if (bp->link_info.phy_qcfg_resp.option_flags &
+		    PORT_PHY_QCFG_RESP_OPTION_FLAGS_SIGNAL_MODE_KNOWN) {
+			u8 sig_mode = bp->link_info.active_fec_sig_mode &
+				      PORT_PHY_QCFG_RESP_SIGNAL_MODE_MASK;
+			switch (sig_mode) {
+			case PORT_PHY_QCFG_RESP_SIGNAL_MODE_NRZ:
+				signal = "(NRZ) ";
+				break;
+			case PORT_PHY_QCFG_RESP_SIGNAL_MODE_PAM4:
+				signal = "(PAM4) ";
+				break;
+			default:
+				break;
+			}
+		}
+		netdev_info(bp->dev, "NIC Link is Up, %u Mbps %s%s duplex, Flow control: %s\n",
+			    speed, signal, duplex, flow_ctrl);
 		if (bp->flags & BNXT_FLAG_EEE_CAP)
 			netdev_info(bp->dev, "EEE is %s\n",
 				    bp->eee.eee_active ? "active" :
-- 
cgit v1.2.3


From b0d28207ced88b3909547d8299f679353a87fd35 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sun, 25 Apr 2021 13:45:19 -0400
Subject: bnxt_en: Add a new phy_flags field to the main driver structure.

Copy the phy related feature flags from the firmware call
HWRM_PORT_PHY_QCAPS to this new field.  We can also remove the flags
field in the bnxt_test_info structure.  It's cleaner to have all PHY
related flags in one location, directly copied from the firmware.

To keep the BNXT_PHY_CFG_ABLE() macro logic the same, we need to make
a slight adjustment to check that it is a PF.

Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c         | 29 +++++------------------
 drivers/net/ethernet/broadcom/bnxt/bnxt.h         | 19 +++++++++------
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c |  8 +++----
 3 files changed, 22 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 573c039e6046..f08427b7dbe7 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4145,7 +4145,7 @@ static void bnxt_free_mem(struct bnxt *bp, bool irq_re_init)
 	bnxt_free_ntp_fltrs(bp, irq_re_init);
 	if (irq_re_init) {
 		bnxt_free_ring_stats(bp);
-		if (!(bp->fw_cap & BNXT_FW_CAP_PORT_STATS_NO_RESET) ||
+		if (!(bp->phy_flags & BNXT_PHY_FL_PORT_STATS_NO_RESET) ||
 		    test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
 			bnxt_free_port_stats(bp);
 		bnxt_free_ring_grps(bp);
@@ -9116,7 +9116,7 @@ static void bnxt_report_link(struct bnxt *bp)
 		}
 		netdev_info(bp->dev, "NIC Link is Up, %u Mbps %s%s duplex, Flow control: %s\n",
 			    speed, signal, duplex, flow_ctrl);
-		if (bp->flags & BNXT_FLAG_EEE_CAP)
+		if (bp->phy_flags & BNXT_PHY_FL_EEE_CAP)
 			netdev_info(bp->dev, "EEE is %s\n",
 				    bp->eee.eee_active ? "active" :
 							 "not active");
@@ -9148,10 +9148,6 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp)
 	struct hwrm_port_phy_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
 	struct bnxt_link_info *link_info = &bp->link_info;
 
-	bp->flags &= ~BNXT_FLAG_EEE_CAP;
-	if (bp->test_info)
-		bp->test_info->flags &= ~(BNXT_TEST_FL_EXT_LPBK |
-					  BNXT_TEST_FL_AN_PHY_LPBK);
 	if (bp->hwrm_spec_code < 0x10201)
 		return 0;
 
@@ -9162,31 +9158,17 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp)
 	if (rc)
 		goto hwrm_phy_qcaps_exit;
 
+	bp->phy_flags = resp->flags;
 	if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_EEE_SUPPORTED) {
 		struct ethtool_eee *eee = &bp->eee;
 		u16 fw_speeds = le16_to_cpu(resp->supported_speeds_eee_mode);
 
-		bp->flags |= BNXT_FLAG_EEE_CAP;
 		eee->supported = _bnxt_fw_to_ethtool_adv_spds(fw_speeds, 0);
 		bp->lpi_tmr_lo = le32_to_cpu(resp->tx_lpi_timer_low) &
 				 PORT_PHY_QCAPS_RESP_TX_LPI_TIMER_LOW_MASK;
 		bp->lpi_tmr_hi = le32_to_cpu(resp->valid_tx_lpi_timer_high) &
 				 PORT_PHY_QCAPS_RESP_TX_LPI_TIMER_HIGH_MASK;
 	}
-	if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_EXTERNAL_LPBK_SUPPORTED) {
-		if (bp->test_info)
-			bp->test_info->flags |= BNXT_TEST_FL_EXT_LPBK;
-	}
-	if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_AUTONEG_LPBK_SUPPORTED) {
-		if (bp->test_info)
-			bp->test_info->flags |= BNXT_TEST_FL_AN_PHY_LPBK;
-	}
-	if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_SHARED_PHY_CFG_SUPPORTED) {
-		if (BNXT_PF(bp))
-			bp->fw_cap |= BNXT_FW_CAP_SHARED_PORT_CFG;
-	}
-	if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_CUMULATIVE_COUNTERS_ON_RESET)
-		bp->fw_cap |= BNXT_FW_CAP_PORT_STATS_NO_RESET;
 
 	if (bp->hwrm_spec_code >= 0x10a01) {
 		if (bnxt_phy_qcaps_no_speed(resp)) {
@@ -9277,7 +9259,7 @@ int bnxt_update_link(struct bnxt *bp, bool chng_link_state)
 			      PORT_PHY_QCFG_RESP_PHY_ADDR_MASK;
 	link_info->module_status = resp->module_status;
 
-	if (bp->flags & BNXT_FLAG_EEE_CAP) {
+	if (bp->phy_flags & BNXT_PHY_FL_EEE_CAP) {
 		struct ethtool_eee *eee = &bp->eee;
 		u16 fw_speeds;
 
@@ -9855,7 +9837,7 @@ static bool bnxt_eee_config_ok(struct bnxt *bp)
 	struct ethtool_eee *eee = &bp->eee;
 	struct bnxt_link_info *link_info = &bp->link_info;
 
-	if (!(bp->flags & BNXT_FLAG_EEE_CAP))
+	if (!(bp->phy_flags & BNXT_PHY_FL_EEE_CAP))
 		return true;
 
 	if (eee->eee_enabled) {
@@ -12450,6 +12432,7 @@ static int bnxt_probe_phy(struct bnxt *bp, bool fw_dflt)
 	int rc = 0;
 	struct bnxt_link_info *link_info = &bp->link_info;
 
+	bp->phy_flags = 0;
 	rc = bnxt_hwrm_phy_qcaps(bp);
 	if (rc) {
 		netdev_err(bp->dev, "Probe phy can't get phy capabilities (rc: %x)\n",
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 29061c577baa..6c4fb78c59fe 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1341,9 +1341,6 @@ struct bnxt_led_info {
 
 struct bnxt_test_info {
 	u8 offline_mask;
-	u8 flags;
-#define BNXT_TEST_FL_EXT_LPBK		0x1
-#define BNXT_TEST_FL_AN_PHY_LPBK	0x2
 	u16 timeout;
 	char string[BNXT_MAX_TEST][ETH_GSTRING_LEN];
 };
@@ -1693,7 +1690,6 @@ struct bnxt {
 	#define BNXT_FLAG_SHARED_RINGS	0x200
 	#define BNXT_FLAG_PORT_STATS	0x400
 	#define BNXT_FLAG_UDP_RSS_CAP	0x800
-	#define BNXT_FLAG_EEE_CAP	0x1000
 	#define BNXT_FLAG_NEW_RSS_CAP	0x2000
 	#define BNXT_FLAG_WOL_CAP	0x4000
 	#define BNXT_FLAG_ROCEV1_CAP	0x8000
@@ -1720,8 +1716,10 @@ struct bnxt {
 #define BNXT_NPAR(bp)		((bp)->port_partition_type)
 #define BNXT_MH(bp)		((bp)->flags & BNXT_FLAG_MULTI_HOST)
 #define BNXT_SINGLE_PF(bp)	(BNXT_PF(bp) && !BNXT_NPAR(bp) && !BNXT_MH(bp))
+#define BNXT_SH_PORT_CFG_OK(bp)	(BNXT_PF(bp) &&				\
+				 ((bp)->phy_flags & BNXT_PHY_FL_SHARED_PORT_CFG))
 #define BNXT_PHY_CFG_ABLE(bp)	((BNXT_SINGLE_PF(bp) ||			\
-				  ((bp)->fw_cap & BNXT_FW_CAP_SHARED_PORT_CFG)) && \
+				  BNXT_SH_PORT_CFG_OK(bp)) &&		\
 				 (bp)->link_info.phy_state == BNXT_PHY_STATE_ENABLED)
 #define BNXT_CHIP_TYPE_NITRO_A0(bp) ((bp)->flags & BNXT_FLAG_CHIP_NITRO_A0)
 #define BNXT_RX_PAGE_MODE(bp)	((bp)->flags & BNXT_FLAG_RX_PAGE_MODE)
@@ -1871,11 +1869,9 @@ struct bnxt {
 	#define BNXT_FW_CAP_EXT_STATS_SUPPORTED		0x00040000
 	#define BNXT_FW_CAP_ERR_RECOVER_RELOAD		0x00100000
 	#define BNXT_FW_CAP_HOT_RESET			0x00200000
-	#define BNXT_FW_CAP_SHARED_PORT_CFG		0x00400000
 	#define BNXT_FW_CAP_VLAN_RX_STRIP		0x01000000
 	#define BNXT_FW_CAP_VLAN_TX_INSERT		0x02000000
 	#define BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED	0x04000000
-	#define BNXT_FW_CAP_PORT_STATS_NO_RESET		0x10000000
 	#define BNXT_FW_CAP_RING_MONITOR		0x40000000
 
 #define BNXT_NEW_RM(bp)		((bp)->fw_cap & BNXT_FW_CAP_NEW_RM)
@@ -2010,6 +2006,15 @@ struct bnxt {
 	u32			lpi_tmr_lo;
 	u32			lpi_tmr_hi;
 
+	/* copied from flags in hwrm_port_phy_qcaps_output */
+	u8			phy_flags;
+#define BNXT_PHY_FL_EEE_CAP		PORT_PHY_QCAPS_RESP_FLAGS_EEE_SUPPORTED
+#define BNXT_PHY_FL_EXT_LPBK		PORT_PHY_QCAPS_RESP_FLAGS_EXTERNAL_LPBK_SUPPORTED
+#define BNXT_PHY_FL_AN_PHY_LPBK		PORT_PHY_QCAPS_RESP_FLAGS_AUTONEG_LPBK_SUPPORTED
+#define BNXT_PHY_FL_SHARED_PORT_CFG	PORT_PHY_QCAPS_RESP_FLAGS_SHARED_PHY_CFG_SUPPORTED
+#define BNXT_PHY_FL_PORT_STATS_NO_RESET	PORT_PHY_QCAPS_RESP_FLAGS_CUMULATIVE_COUNTERS_ON_RESET
+#define BNXT_PHY_FL_NO_PHY_LPBK		PORT_PHY_QCAPS_RESP_FLAGS_LOCAL_LPBK_NOT_SUPPORTED
+
 	u8			num_tests;
 	struct bnxt_test_info	*test_info;
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 3b66e300c962..c664ec52ebcf 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -2912,7 +2912,7 @@ static int bnxt_set_eee(struct net_device *dev, struct ethtool_eee *edata)
 	if (!BNXT_PHY_CFG_ABLE(bp))
 		return -EOPNOTSUPP;
 
-	if (!(bp->flags & BNXT_FLAG_EEE_CAP))
+	if (!(bp->phy_flags & BNXT_PHY_FL_EEE_CAP))
 		return -EOPNOTSUPP;
 
 	mutex_lock(&bp->link_lock);
@@ -2963,7 +2963,7 @@ static int bnxt_get_eee(struct net_device *dev, struct ethtool_eee *edata)
 {
 	struct bnxt *bp = netdev_priv(dev);
 
-	if (!(bp->flags & BNXT_FLAG_EEE_CAP))
+	if (!(bp->phy_flags & BNXT_PHY_FL_EEE_CAP))
 		return -EOPNOTSUPP;
 
 	*edata = bp->eee;
@@ -3215,7 +3215,7 @@ static int bnxt_disable_an_for_lpbk(struct bnxt *bp,
 	int rc;
 
 	if (!link_info->autoneg ||
-	    (bp->test_info->flags & BNXT_TEST_FL_AN_PHY_LPBK))
+	    (bp->phy_flags & BNXT_PHY_FL_AN_PHY_LPBK))
 		return 0;
 
 	rc = bnxt_query_force_speeds(bp, &fw_advertising);
@@ -3416,7 +3416,7 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest,
 	}
 
 	if ((etest->flags & ETH_TEST_FL_EXTERNAL_LB) &&
-	    (bp->test_info->flags & BNXT_TEST_FL_EXT_LPBK))
+	    (bp->phy_flags & BNXT_PHY_FL_EXT_LPBK))
 		do_ext_lpbk = true;
 
 	if (etest->flags & ETH_TEST_FL_OFFLINE) {
-- 
cgit v1.2.3


From d5ca99054f8e25384390d41c0123d930eed510b6 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sun, 25 Apr 2021 13:45:20 -0400
Subject: bnxt_en: Add support for fw managed link down feature.

In the current code, the driver will not shutdown the link during
IFDOWN if there are still VFs sharing the port.  Newer firmware will
manage the link down decision when the port is shared by VFs, so
we can just call firmware to shutdown the port unconditionally and
let firmware make the final decision.

Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++-
 drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index f08427b7dbe7..dcf1598afac2 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -9495,7 +9495,8 @@ static int bnxt_hwrm_shutdown_link(struct bnxt *bp)
 	if (!BNXT_SINGLE_PF(bp))
 		return 0;
 
-	if (pci_num_vf(bp->pdev))
+	if (pci_num_vf(bp->pdev) &&
+	    !(bp->phy_flags & BNXT_PHY_FL_FW_MANAGED_LKDN))
 		return 0;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_CFG, -1, -1);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 6c4fb78c59fe..5835d8ca8c22 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -2014,6 +2014,7 @@ struct bnxt {
 #define BNXT_PHY_FL_SHARED_PORT_CFG	PORT_PHY_QCAPS_RESP_FLAGS_SHARED_PHY_CFG_SUPPORTED
 #define BNXT_PHY_FL_PORT_STATS_NO_RESET	PORT_PHY_QCAPS_RESP_FLAGS_CUMULATIVE_COUNTERS_ON_RESET
 #define BNXT_PHY_FL_NO_PHY_LPBK		PORT_PHY_QCAPS_RESP_FLAGS_LOCAL_LPBK_NOT_SUPPORTED
+#define BNXT_PHY_FL_FW_MANAGED_LKDN	PORT_PHY_QCAPS_RESP_FLAGS_FW_MANAGED_LINK_DOWN
 
 	u8			num_tests;
 	struct bnxt_test_info	*test_info;
-- 
cgit v1.2.3


From dd85fc0ab5b4daa496bd3e2832b51963022182d0 Mon Sep 17 00:00:00 2001
From: Edwin Peer <edwin.peer@broadcom.com>
Date: Sun, 25 Apr 2021 13:45:21 -0400
Subject: bnxt_en: allow promiscuous mode for trusted VFs

Firmware previously only allowed promiscuous mode for VFs associated with
a default VLAN. It is now possible to enable promiscuous mode for a VF
having no VLAN configured provided that it is trusted. In such cases the
VF will see all packets received by the PF, irrespective of destination
MAC or VLAN.

Note, it is necessary to query firmware at the time of bnxt_promisc_ok()
instead of in bnxt_hwrm_func_qcfg() because the trusted status might be
altered by the PF after the VF has been configured. This check must now
also be deferred because the firmware call sleeps.

Signed-off-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c       | 11 +++++++----
 drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c |  6 +++---
 drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h |  1 +
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index dcf1598afac2..9862f517960d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -8340,11 +8340,11 @@ static int bnxt_alloc_rfs_vnics(struct bnxt *bp)
 #endif
 }
 
-/* Allow PF and VF with default VLAN to be in promiscuous mode */
+/* Allow PF, trusted VFs and VFs with default VLAN to be in promiscuous mode */
 static bool bnxt_promisc_ok(struct bnxt *bp)
 {
 #ifdef CONFIG_BNXT_SRIOV
-	if (BNXT_VF(bp) && !bp->vf.vlan)
+	if (BNXT_VF(bp) && !bp->vf.vlan && !bnxt_is_trusted_vf(bp, &bp->vf))
 		return false;
 #endif
 	return true;
@@ -8441,7 +8441,7 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
 	if (bp->dev->flags & IFF_BROADCAST)
 		vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_BCAST;
 
-	if ((bp->dev->flags & IFF_PROMISC) && bnxt_promisc_ok(bp))
+	if (bp->dev->flags & IFF_PROMISC)
 		vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS;
 
 	if (bp->dev->flags & IFF_ALLMULTI) {
@@ -10485,7 +10485,7 @@ static void bnxt_set_rx_mode(struct net_device *dev)
 		  CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST |
 		  CFA_L2_SET_RX_MASK_REQ_MASK_BCAST);
 
-	if ((dev->flags & IFF_PROMISC) && bnxt_promisc_ok(bp))
+	if (dev->flags & IFF_PROMISC)
 		mask |= CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS;
 
 	uc_update = bnxt_uc_list_updated(bp);
@@ -10561,6 +10561,9 @@ static int bnxt_cfg_rx_mode(struct bnxt *bp)
 	}
 
 skip_uc:
+	if ((vnic->rx_mask & CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS) &&
+	    !bnxt_promisc_ok(bp))
+		vnic->rx_mask &= ~CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS;
 	rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0);
 	if (rc && vnic->mc_list_count) {
 		netdev_info(bp->dev, "Failed setting MC filters rc: %d, turning on ALL_MCAST mode\n",
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index a217316228f4..4da52f812585 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -113,7 +113,7 @@ static int bnxt_hwrm_func_qcfg_flags(struct bnxt *bp, struct bnxt_vf_info *vf)
 	int rc;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCFG, -1, -1);
-	req.fid = cpu_to_le16(vf->fw_fid);
+	req.fid = cpu_to_le16(BNXT_PF(bp) ? vf->fw_fid : 0xffff);
 	mutex_lock(&bp->hwrm_cmd_lock);
 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 	if (rc) {
@@ -125,9 +125,9 @@ static int bnxt_hwrm_func_qcfg_flags(struct bnxt *bp, struct bnxt_vf_info *vf)
 	return 0;
 }
 
-static bool bnxt_is_trusted_vf(struct bnxt *bp, struct bnxt_vf_info *vf)
+bool bnxt_is_trusted_vf(struct bnxt *bp, struct bnxt_vf_info *vf)
 {
-	if (!(bp->fw_cap & BNXT_FW_CAP_TRUSTED_VF))
+	if (BNXT_PF(bp) && !(bp->fw_cap & BNXT_FW_CAP_TRUSTED_VF))
 		return !!(vf->flags & BNXT_VF_TRUST);
 
 	bnxt_hwrm_func_qcfg_flags(bp, vf);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
index 629641bf6fc5..995535e4c11b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
@@ -34,6 +34,7 @@ int bnxt_set_vf_vlan(struct net_device *, int, u16, u8, __be16);
 int bnxt_set_vf_bw(struct net_device *, int, int, int);
 int bnxt_set_vf_link_state(struct net_device *, int, int);
 int bnxt_set_vf_spoofchk(struct net_device *, int, bool);
+bool bnxt_is_trusted_vf(struct bnxt *bp, struct bnxt_vf_info *vf);
 int bnxt_set_vf_trust(struct net_device *dev, int vf_id, bool trust);
 int bnxt_sriov_configure(struct pci_dev *pdev, int num_vfs);
 int bnxt_cfg_hw_sriov(struct bnxt *bp, int *num_vfs, bool reset);
-- 
cgit v1.2.3


From 6b7027689890c590373fc58f362fae43d0517e21 Mon Sep 17 00:00:00 2001
From: Edwin Peer <edwin.peer@broadcom.com>
Date: Sun, 25 Apr 2021 13:45:22 -0400
Subject: bnxt_en: allow VF config ops when PF is closed

It is perfectly legal for the stack to query and configure VFs via PF
NDOs while the NIC is administratively down.  Remove the unnecessary
check for the PF to be in open state.

Signed-off-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index 4da52f812585..67856dbf9ce9 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -49,10 +49,6 @@ static int bnxt_hwrm_fwd_async_event_cmpl(struct bnxt *bp,
 
 static int bnxt_vf_ndo_prep(struct bnxt *bp, int vf_id)
 {
-	if (!test_bit(BNXT_STATE_OPEN, &bp->state)) {
-		netdev_err(bp->dev, "vf ndo called though PF is down\n");
-		return -EINVAL;
-	}
 	if (!bp->pf.active_vfs) {
 		netdev_err(bp->dev, "vf ndo called though sriov is disabled\n");
 		return -EINVAL;
-- 
cgit v1.2.3


From 7b3c8e27d67e2b04c1ce099261469c12d09c13d4 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sun, 25 Apr 2021 13:45:23 -0400
Subject: bnxt_en: Move bnxt_approve_mac().

Move it before bnxt_update_vf_mac().  In the next patch, we need to call
bnxt_approve_mac() from bnxt_update_mac() under some conditions.  This
will avoid forward declaration.

Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c | 53 +++++++++++++------------
 1 file changed, 27 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index 67856dbf9ce9..e65093f4aa7a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -1116,6 +1116,33 @@ void bnxt_hwrm_exec_fwd_req(struct bnxt *bp)
 	}
 }
 
+int bnxt_approve_mac(struct bnxt *bp, u8 *mac, bool strict)
+{
+	struct hwrm_func_vf_cfg_input req = {0};
+	int rc = 0;
+
+	if (!BNXT_VF(bp))
+		return 0;
+
+	if (bp->hwrm_spec_code < 0x10202) {
+		if (is_valid_ether_addr(bp->vf.mac_addr))
+			rc = -EADDRNOTAVAIL;
+		goto mac_done;
+	}
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_CFG, -1, -1);
+	req.enables = cpu_to_le32(FUNC_VF_CFG_REQ_ENABLES_DFLT_MAC_ADDR);
+	memcpy(req.dflt_mac_addr, mac, ETH_ALEN);
+	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+mac_done:
+	if (rc && strict) {
+		rc = -EADDRNOTAVAIL;
+		netdev_warn(bp->dev, "VF MAC address %pM not approved by the PF\n",
+			    mac);
+		return rc;
+	}
+	return 0;
+}
+
 void bnxt_update_vf_mac(struct bnxt *bp)
 {
 	struct hwrm_func_qcaps_input req = {0};
@@ -1145,32 +1172,6 @@ update_vf_mac_exit:
 	mutex_unlock(&bp->hwrm_cmd_lock);
 }
 
-int bnxt_approve_mac(struct bnxt *bp, u8 *mac, bool strict)
-{
-	struct hwrm_func_vf_cfg_input req = {0};
-	int rc = 0;
-
-	if (!BNXT_VF(bp))
-		return 0;
-
-	if (bp->hwrm_spec_code < 0x10202) {
-		if (is_valid_ether_addr(bp->vf.mac_addr))
-			rc = -EADDRNOTAVAIL;
-		goto mac_done;
-	}
-	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_CFG, -1, -1);
-	req.enables = cpu_to_le32(FUNC_VF_CFG_REQ_ENABLES_DFLT_MAC_ADDR);
-	memcpy(req.dflt_mac_addr, mac, ETH_ALEN);
-	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-mac_done:
-	if (rc && strict) {
-		rc = -EADDRNOTAVAIL;
-		netdev_warn(bp->dev, "VF MAC address %pM not approved by the PF\n",
-			    mac);
-		return rc;
-	}
-	return 0;
-}
 #else
 
 int bnxt_cfg_hw_sriov(struct bnxt *bp, int *num_vfs, bool reset)
-- 
cgit v1.2.3


From 92923cc71012535cc5d760b1319675ad4c404c08 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sun, 25 Apr 2021 13:45:24 -0400
Subject: bnxt_en: Call bnxt_approve_mac() after the PF gives up control of the
 VF MAC.

When the PF is no longer enforcing an assigned MAC address on a VF, the
VF needs to call bnxt_approve_mac() to tell the PF what MAC address it is
now using.  Otherwise it gets out of sync and the PF won't know what
MAC address the VF wants to use.  Ultimately the VF will fail when it
tries to setup the L2 MAC filter for the vnic.

Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index e65093f4aa7a..eb00a219aa51 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -1147,6 +1147,7 @@ void bnxt_update_vf_mac(struct bnxt *bp)
 {
 	struct hwrm_func_qcaps_input req = {0};
 	struct hwrm_func_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
+	bool inform_pf = false;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCAPS, -1, -1);
 	req.fid = cpu_to_le16(0xffff);
@@ -1162,14 +1163,22 @@ void bnxt_update_vf_mac(struct bnxt *bp)
 	 *    default but the stored zero MAC will allow the VF user to change
 	 *    the random MAC address using ndo_set_mac_address() if he wants.
 	 */
-	if (!ether_addr_equal(resp->mac_address, bp->vf.mac_addr))
+	if (!ether_addr_equal(resp->mac_address, bp->vf.mac_addr)) {
 		memcpy(bp->vf.mac_addr, resp->mac_address, ETH_ALEN);
+		/* This means we are now using our own MAC address, let
+		 * the PF know about this MAC address.
+		 */
+		if (!is_valid_ether_addr(bp->vf.mac_addr))
+			inform_pf = true;
+	}
 
 	/* overwrite netdev dev_addr with admin VF MAC */
 	if (is_valid_ether_addr(bp->vf.mac_addr))
 		memcpy(bp->dev->dev_addr, bp->vf.mac_addr, ETH_ALEN);
 update_vf_mac_exit:
 	mutex_unlock(&bp->hwrm_cmd_lock);
+	if (inform_pf)
+		bnxt_approve_mac(bp, bp->dev->dev_addr, false);
 }
 
 #else
-- 
cgit v1.2.3


From 7fbf359bb2c19c824cbb1954020680824f6ee5a5 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sun, 25 Apr 2021 13:45:25 -0400
Subject: bnxt_en: Add PCI IDs for Hyper-V VF devices.

Support VF device IDs used by the Hyper-V hypervisor.

Reviewed-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Reviewed-by: Andy Gospodarek <gospo@broadcom.com>
Signed-off-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 9862f517960d..77ebafbd2dce 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -122,7 +122,10 @@ enum board_idx {
 	NETXTREME_E_VF,
 	NETXTREME_C_VF,
 	NETXTREME_S_VF,
+	NETXTREME_C_VF_HV,
+	NETXTREME_E_VF_HV,
 	NETXTREME_E_P5_VF,
+	NETXTREME_E_P5_VF_HV,
 };
 
 /* indexed by enum above */
@@ -170,7 +173,10 @@ static const struct {
 	[NETXTREME_E_VF] = { "Broadcom NetXtreme-E Ethernet Virtual Function" },
 	[NETXTREME_C_VF] = { "Broadcom NetXtreme-C Ethernet Virtual Function" },
 	[NETXTREME_S_VF] = { "Broadcom NetXtreme-S Ethernet Virtual Function" },
+	[NETXTREME_C_VF_HV] = { "Broadcom NetXtreme-C Virtual Function for Hyper-V" },
+	[NETXTREME_E_VF_HV] = { "Broadcom NetXtreme-E Virtual Function for Hyper-V" },
 	[NETXTREME_E_P5_VF] = { "Broadcom BCM5750X NetXtreme-E Ethernet Virtual Function" },
+	[NETXTREME_E_P5_VF_HV] = { "Broadcom BCM5750X NetXtreme-E Virtual Function for Hyper-V" },
 };
 
 static const struct pci_device_id bnxt_pci_tbl[] = {
@@ -222,15 +228,25 @@ static const struct pci_device_id bnxt_pci_tbl[] = {
 	{ PCI_VDEVICE(BROADCOM, 0xd804), .driver_data = BCM58804 },
 #ifdef CONFIG_BNXT_SRIOV
 	{ PCI_VDEVICE(BROADCOM, 0x1606), .driver_data = NETXTREME_E_VF },
+	{ PCI_VDEVICE(BROADCOM, 0x1607), .driver_data = NETXTREME_E_VF_HV },
+	{ PCI_VDEVICE(BROADCOM, 0x1608), .driver_data = NETXTREME_E_VF_HV },
 	{ PCI_VDEVICE(BROADCOM, 0x1609), .driver_data = NETXTREME_E_VF },
+	{ PCI_VDEVICE(BROADCOM, 0x16bd), .driver_data = NETXTREME_E_VF_HV },
 	{ PCI_VDEVICE(BROADCOM, 0x16c1), .driver_data = NETXTREME_E_VF },
+	{ PCI_VDEVICE(BROADCOM, 0x16c2), .driver_data = NETXTREME_C_VF_HV },
+	{ PCI_VDEVICE(BROADCOM, 0x16c3), .driver_data = NETXTREME_C_VF_HV },
+	{ PCI_VDEVICE(BROADCOM, 0x16c4), .driver_data = NETXTREME_E_VF_HV },
+	{ PCI_VDEVICE(BROADCOM, 0x16c5), .driver_data = NETXTREME_E_VF_HV },
 	{ PCI_VDEVICE(BROADCOM, 0x16cb), .driver_data = NETXTREME_C_VF },
 	{ PCI_VDEVICE(BROADCOM, 0x16d3), .driver_data = NETXTREME_E_VF },
 	{ PCI_VDEVICE(BROADCOM, 0x16dc), .driver_data = NETXTREME_E_VF },
 	{ PCI_VDEVICE(BROADCOM, 0x16e1), .driver_data = NETXTREME_C_VF },
 	{ PCI_VDEVICE(BROADCOM, 0x16e5), .driver_data = NETXTREME_C_VF },
+	{ PCI_VDEVICE(BROADCOM, 0x16e6), .driver_data = NETXTREME_C_VF_HV },
 	{ PCI_VDEVICE(BROADCOM, 0x1806), .driver_data = NETXTREME_E_P5_VF },
 	{ PCI_VDEVICE(BROADCOM, 0x1807), .driver_data = NETXTREME_E_P5_VF },
+	{ PCI_VDEVICE(BROADCOM, 0x1808), .driver_data = NETXTREME_E_P5_VF_HV },
+	{ PCI_VDEVICE(BROADCOM, 0x1809), .driver_data = NETXTREME_E_P5_VF_HV },
 	{ PCI_VDEVICE(BROADCOM, 0xd800), .driver_data = NETXTREME_S_VF },
 #endif
 	{ 0 }
@@ -265,7 +281,8 @@ static struct workqueue_struct *bnxt_pf_wq;
 static bool bnxt_vf_pciid(enum board_idx idx)
 {
 	return (idx == NETXTREME_C_VF || idx == NETXTREME_E_VF ||
-		idx == NETXTREME_S_VF || idx == NETXTREME_E_P5_VF);
+		idx == NETXTREME_S_VF || idx == NETXTREME_C_VF_HV ||
+		idx == NETXTREME_E_VF_HV || idx == NETXTREME_E_P5_VF);
 }
 
 #define DB_CP_REARM_FLAGS	(DB_KEY_CP | DB_IDX_VALID)
-- 
cgit v1.2.3


From dade5e15fade59a789c30bc47abfe926ddd856d6 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sun, 25 Apr 2021 13:45:26 -0400
Subject: bnxt_en: Support IFF_SUPP_NOFCS feature to transmit without ethernet
 FCS.

If firmware is capable, set the IFF_SUPP_NOFCS flag to support the
sockets option to transmit packets without FCS.  This is mainly used
for testing.

Reviewed-by: Edwin Peer <edwin.peer@broadcom.com
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 16 +++++++++++++---
 drivers/net/ethernet/broadcom/bnxt/bnxt.h |  1 +
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 77ebafbd2dce..53db073b457c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -375,6 +375,7 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct pci_dev *pdev = bp->pdev;
 	struct bnxt_tx_ring_info *txr;
 	struct bnxt_sw_tx_bd *tx_buf;
+	__le32 lflags = 0;
 
 	i = skb_get_queue_mapping(skb);
 	if (unlikely(i >= bp->tx_nr_rings)) {
@@ -416,6 +417,11 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			vlan_tag_flags |= 1 << TX_BD_CFA_META_TPID_SHIFT;
 	}
 
+	if (unlikely(skb->no_fcs)) {
+		lflags |= cpu_to_le32(TX_BD_FLAGS_NO_CRC);
+		goto normal_tx;
+	}
+
 	if (free_size == bp->tx_ring_size && length <= bp->tx_push_thresh) {
 		struct tx_push_buffer *tx_push_buf = txr->tx_push;
 		struct tx_push_bd *tx_push = &tx_push_buf->push_bd;
@@ -517,7 +523,7 @@ normal_tx:
 	txbd1 = (struct tx_bd_ext *)
 		&txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
 
-	txbd1->tx_bd_hsize_lflags = 0;
+	txbd1->tx_bd_hsize_lflags = lflags;
 	if (skb_is_gso(skb)) {
 		u32 hdr_len;
 
@@ -529,14 +535,14 @@ normal_tx:
 			hdr_len = skb_transport_offset(skb) +
 				tcp_hdrlen(skb);
 
-		txbd1->tx_bd_hsize_lflags = cpu_to_le32(TX_BD_FLAGS_LSO |
+		txbd1->tx_bd_hsize_lflags |= cpu_to_le32(TX_BD_FLAGS_LSO |
 					TX_BD_FLAGS_T_IPID |
 					(hdr_len << (TX_BD_HSIZE_SHIFT - 1)));
 		length = skb_shinfo(skb)->gso_size;
 		txbd1->tx_bd_mss = cpu_to_le32(length);
 		length += hdr_len;
 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		txbd1->tx_bd_hsize_lflags =
+		txbd1->tx_bd_hsize_lflags |=
 			cpu_to_le32(TX_BD_FLAGS_TCP_UDP_CHKSUM);
 		txbd1->tx_bd_mss = 0;
 	}
@@ -12460,6 +12466,10 @@ static int bnxt_probe_phy(struct bnxt *bp, bool fw_dflt)
 			   rc);
 		return rc;
 	}
+	if (bp->phy_flags & BNXT_PHY_FL_NO_FCS)
+		bp->dev->priv_flags |= IFF_SUPP_NOFCS;
+	else
+		bp->dev->priv_flags &= ~IFF_SUPP_NOFCS;
 	if (!fw_dflt)
 		return 0;
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 5835d8ca8c22..a3744247740b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -2015,6 +2015,7 @@ struct bnxt {
 #define BNXT_PHY_FL_PORT_STATS_NO_RESET	PORT_PHY_QCAPS_RESP_FLAGS_CUMULATIVE_COUNTERS_ON_RESET
 #define BNXT_PHY_FL_NO_PHY_LPBK		PORT_PHY_QCAPS_RESP_FLAGS_LOCAL_LPBK_NOT_SUPPORTED
 #define BNXT_PHY_FL_FW_MANAGED_LKDN	PORT_PHY_QCAPS_RESP_FLAGS_FW_MANAGED_LINK_DOWN
+#define BNXT_PHY_FL_NO_FCS		PORT_PHY_QCAPS_RESP_FLAGS_NO_FCS
 
 	u8			num_tests;
 	struct bnxt_test_info	*test_info;
-- 
cgit v1.2.3


From 1698d600b361915fbe5eda63a613da55c435bd34 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sun, 25 Apr 2021 13:45:27 -0400
Subject: bnxt_en: Implement .ndo_features_check().

For UDP encapsultions, we only support the offloaded Vxlan port and
Geneve port.  All other ports included FOU and GUE are not supported so
we need to turn off TSO and checksum features.

v2: Reverse the check for supported UDP ports to be more straight forward.

Reviewed-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 42 +++++++++++++++++++++++++++++--
 drivers/net/ethernet/broadcom/bnxt/bnxt.h |  2 ++
 2 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 53db073b457c..5d0ab5629aa4 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -10781,6 +10781,40 @@ static int bnxt_set_features(struct net_device *dev, netdev_features_t features)
 	return rc;
 }
 
+static netdev_features_t bnxt_features_check(struct sk_buff *skb,
+					     struct net_device *dev,
+					     netdev_features_t features)
+{
+	struct bnxt *bp;
+	__be16 udp_port;
+	u8 l4_proto = 0;
+
+	features = vlan_features_check(skb, features);
+	if (!skb->encapsulation)
+		return features;
+
+	switch (vlan_get_protocol(skb)) {
+	case htons(ETH_P_IP):
+		l4_proto = ip_hdr(skb)->protocol;
+		break;
+	case htons(ETH_P_IPV6):
+		l4_proto = ipv6_hdr(skb)->nexthdr;
+		break;
+	default:
+		return features;
+	}
+
+	if (l4_proto != IPPROTO_UDP)
+		return features;
+
+	bp = netdev_priv(dev);
+	/* For UDP, we can only handle 1 Vxlan port and 1 Geneve port. */
+	udp_port = udp_hdr(skb)->dest;
+	if (udp_port == bp->vxlan_port || udp_port == bp->nge_port)
+		return features;
+	return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+}
+
 int bnxt_dbg_hwrm_rd_reg(struct bnxt *bp, u32 reg_off, u16 num_words,
 			 u32 *reg_buf)
 {
@@ -12288,10 +12322,13 @@ static int bnxt_udp_tunnel_sync(struct net_device *netdev, unsigned int table)
 	unsigned int cmd;
 
 	udp_tunnel_nic_get_port(netdev, table, 0, &ti);
-	if (ti.type == UDP_TUNNEL_TYPE_VXLAN)
+	if (ti.type == UDP_TUNNEL_TYPE_VXLAN) {
+		bp->vxlan_port = ti.port;
 		cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN;
-	else
+	} else {
+		bp->nge_port = ti.port;
 		cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE;
+	}
 
 	if (ti.port)
 		return bnxt_hwrm_tunnel_dst_port_alloc(bp, ti.port, cmd);
@@ -12391,6 +12428,7 @@ static const struct net_device_ops bnxt_netdev_ops = {
 	.ndo_change_mtu		= bnxt_change_mtu,
 	.ndo_fix_features	= bnxt_fix_features,
 	.ndo_set_features	= bnxt_set_features,
+	.ndo_features_check	= bnxt_features_check,
 	.ndo_tx_timeout		= bnxt_tx_timeout,
 #ifdef CONFIG_BNXT_SRIOV
 	.ndo_get_vf_config	= bnxt_get_vf_config,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index a3744247740b..24d2ad6a8740 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1914,6 +1914,8 @@ struct bnxt {
 
 	u16			vxlan_fw_dst_port_id;
 	u16			nge_fw_dst_port_id;
+	__be16			vxlan_port;
+	__be16			nge_port;
 	u8			port_partition_type;
 	u8			port_count;
 	u16			br_mode;
-- 
cgit v1.2.3


From d59d2f82f984df44b31c5d7837fc2f62268b7571 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 23 Apr 2021 00:17:08 +0200
Subject: netfilter: nftables: add nft_pernet() helper function

Consolidate call to net_generic(net, nf_tables_net_id) in this
wrapper function.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |   8 +++
 net/netfilter/nf_tables_api.c     | 112 +++++++++++++++++++-------------------
 net/netfilter/nf_tables_offload.c |  10 ++--
 net/netfilter/nft_chain_filter.c  |   5 +-
 net/netfilter/nft_dynset.c        |   5 +-
 5 files changed, 69 insertions(+), 71 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 4a75da2a2e1d..eb708b77c4a5 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -13,6 +13,7 @@
 #include <net/netfilter/nf_flow_table.h>
 #include <net/netlink.h>
 #include <net/flow_offload.h>
+#include <net/netns/generic.h>
 
 #define NFT_MAX_HOOKS	(NF_INET_INGRESS + 1)
 
@@ -1580,4 +1581,11 @@ struct nftables_pernet {
 	u8			validate_state;
 };
 
+extern unsigned int nf_tables_net_id;
+
+static inline struct nftables_pernet *nft_pernet(const struct net *net)
+{
+	return net_generic(net, nf_tables_net_id);
+}
+
 #endif /* _NET_NF_TABLES_H */
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 357443b3c0e4..155b85553fcc 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -21,7 +21,6 @@
 #include <net/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables_offload.h>
 #include <net/net_namespace.h>
-#include <net/netns/generic.h>
 #include <net/sock.h>
 
 #define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-"))
@@ -106,7 +105,7 @@ static const u8 nft2audit_op[NFT_MSG_MAX] = { // enum nf_tables_msg_types
 
 static void nft_validate_state_update(struct net *net, u8 new_validate_state)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 
 	switch (nft_net->validate_state) {
 	case NFT_VALIDATE_SKIP:
@@ -181,7 +180,7 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
 	if (!nft_set_is_anonymous(set))
 		return;
 
-	nft_net = net_generic(net, nf_tables_net_id);
+	nft_net = nft_pernet(net);
 	list_for_each_entry_reverse(trans, &nft_net->commit_list, list) {
 		switch (trans->msg_type) {
 		case NFT_MSG_NEWSET:
@@ -278,9 +277,8 @@ static void nf_tables_unregister_hook(struct net *net,
 
 static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans)
 {
-	struct nftables_pernet *nft_net;
+	struct nftables_pernet *nft_net = nft_pernet(net);
 
-	nft_net = net_generic(net, nf_tables_net_id);
 	list_add_tail(&trans->list, &nft_net->commit_list);
 }
 
@@ -566,7 +564,7 @@ static struct nft_table *nft_table_lookup(const struct net *net,
 	if (nla == NULL)
 		return ERR_PTR(-EINVAL);
 
-	nft_net = net_generic(net, nf_tables_net_id);
+	nft_net = nft_pernet(net);
 	list_for_each_entry_rcu(table, &nft_net->tables, list,
 				lockdep_is_held(&nft_net->commit_mutex)) {
 		if (!nla_strcmp(nla, table->name) &&
@@ -590,7 +588,7 @@ static struct nft_table *nft_table_lookup_byhandle(const struct net *net,
 	struct nftables_pernet *nft_net;
 	struct nft_table *table;
 
-	nft_net = net_generic(net, nf_tables_net_id);
+	nft_net = nft_pernet(net);
 	list_for_each_entry(table, &nft_net->tables, list) {
 		if (be64_to_cpu(nla_get_be64(nla)) == table->handle &&
 		    nft_active_genmask(table, genmask))
@@ -655,7 +653,7 @@ __printf(2, 3) int nft_request_module(struct net *net, const char *fmt,
 	if (ret >= MODULE_NAME_LEN)
 		return 0;
 
-	nft_net = net_generic(net, nf_tables_net_id);
+	nft_net = nft_pernet(net);
 	list_for_each_entry(req, &nft_net->module_list, list) {
 		if (!strcmp(req->module, module_name)) {
 			if (req->done)
@@ -711,7 +709,7 @@ nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla,
 
 static __be16 nft_base_seq(const struct net *net)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 
 	return htons(nft_net->base_seq & 0xffff);
 }
@@ -793,7 +791,7 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event)
 		goto err;
 	}
 
-	nft_net = net_generic(ctx->net, nf_tables_net_id);
+	nft_net = nft_pernet(ctx->net);
 	nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list);
 	return;
 err:
@@ -811,7 +809,7 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
 	int family = nfmsg->nfgen_family;
 
 	rcu_read_lock();
-	nft_net = net_generic(net, nf_tables_net_id);
+	nft_net = nft_pernet(net);
 	cb->seq = nft_net->base_seq;
 
 	list_for_each_entry_rcu(table, &nft_net->tables, list) {
@@ -1062,7 +1060,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 			      const struct nlattr * const nla[],
 			      struct netlink_ext_ack *extack)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
 	int family = nfmsg->nfgen_family;
@@ -1221,9 +1219,9 @@ out:
 
 static int nft_flush(struct nft_ctx *ctx, int family)
 {
-	struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id);
-	struct nft_table *table, *nt;
+	struct nftables_pernet *nft_net = nft_pernet(ctx->net);
 	const struct nlattr * const *nla = ctx->nla;
+	struct nft_table *table, *nt;
 	int err = 0;
 
 	list_for_each_entry_safe(table, nt, &nft_net->tables, list) {
@@ -1345,7 +1343,7 @@ nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask)
 static bool lockdep_commit_lock_is_held(const struct net *net)
 {
 #ifdef CONFIG_PROVE_LOCKING
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 
 	return lockdep_is_held(&nft_net->commit_mutex);
 #else
@@ -1570,7 +1568,7 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
 		goto err;
 	}
 
-	nft_net = net_generic(ctx->net, nf_tables_net_id);
+	nft_net = nft_pernet(ctx->net);
 	nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list);
 	return;
 err:
@@ -1581,15 +1579,15 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
 				 struct netlink_callback *cb)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
-	const struct nft_table *table;
-	const struct nft_chain *chain;
 	unsigned int idx = 0, s_idx = cb->args[0];
 	struct net *net = sock_net(skb->sk);
 	int family = nfmsg->nfgen_family;
 	struct nftables_pernet *nft_net;
+	const struct nft_table *table;
+	const struct nft_chain *chain;
 
 	rcu_read_lock();
-	nft_net = net_generic(net, nf_tables_net_id);
+	nft_net = nft_pernet(net);
 	cb->seq = nft_net->base_seq;
 
 	list_for_each_entry_rcu(table, &nft_net->tables, list) {
@@ -1908,7 +1906,7 @@ static int nft_chain_parse_hook(struct net *net,
 				struct nft_chain_hook *hook, u8 family,
 				bool autoload)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 	struct nlattr *ha[NFTA_HOOK_MAX + 1];
 	const struct nft_chain_type *type;
 	int err;
@@ -2302,7 +2300,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
 
 	if (nla[NFTA_CHAIN_HANDLE] &&
 	    nla[NFTA_CHAIN_NAME]) {
-		struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id);
+		struct nftables_pernet *nft_net = nft_pernet(ctx->net);
 		struct nft_trans *tmp;
 		char *name;
 
@@ -2338,7 +2336,7 @@ err:
 static struct nft_chain *nft_chain_lookup_byid(const struct net *net,
 					       const struct nlattr *nla)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 	u32 id = ntohl(nla_get_be32(nla));
 	struct nft_trans *trans;
 
@@ -2357,7 +2355,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 			      const struct nlattr * const nla[],
 			      struct netlink_ext_ack *extack)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
 	int family = nfmsg->nfgen_family;
@@ -2908,7 +2906,7 @@ nla_put_failure:
 static void nf_tables_rule_notify(const struct nft_ctx *ctx,
 				  const struct nft_rule *rule, int event)
 {
-	struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(ctx->net);
 	struct sk_buff *skb;
 	int err;
 
@@ -2989,7 +2987,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
 	struct nftables_pernet *nft_net;
 
 	rcu_read_lock();
-	nft_net = net_generic(net, nf_tables_net_id);
+	nft_net = nft_pernet(net);
 	cb->seq = nft_net->base_seq;
 
 	list_for_each_entry_rcu(table, &nft_net->tables, list) {
@@ -3223,7 +3221,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 			     const struct nlattr * const nla[],
 			     struct netlink_ext_ack *extack)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u8 genmask = nft_genmask_next(net);
 	struct nft_expr_info *info = NULL;
@@ -3442,7 +3440,7 @@ err1:
 static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
 					     const struct nlattr *nla)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 	u32 id = ntohl(nla_get_be32(nla));
 	struct nft_trans *trans;
 
@@ -3559,7 +3557,7 @@ nft_select_set_ops(const struct nft_ctx *ctx,
 		   const struct nft_set_desc *desc,
 		   enum nft_set_policies policy)
 {
-	struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(ctx->net);
 	const struct nft_set_ops *ops, *bops;
 	struct nft_set_estimate est, best;
 	const struct nft_set_type *type;
@@ -3704,9 +3702,9 @@ static struct nft_set *nft_set_lookup_byhandle(const struct nft_table *table,
 static struct nft_set *nft_set_lookup_byid(const struct net *net,
 					   const struct nlattr *nla, u8 genmask)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
-	struct nft_trans *trans;
+	struct nftables_pernet *nft_net = nft_pernet(net);
 	u32 id = ntohl(nla_get_be32(nla));
+	struct nft_trans *trans;
 
 	list_for_each_entry(trans, &nft_net->commit_list, list) {
 		if (trans->msg_type == NFT_MSG_NEWSET) {
@@ -3942,7 +3940,7 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx,
 				 const struct nft_set *set, int event,
 			         gfp_t gfp_flags)
 {
-	struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(ctx->net);
 	struct sk_buff *skb;
 	u32 portid = ctx->portid;
 	int err;
@@ -3980,7 +3978,7 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
 		return skb->len;
 
 	rcu_read_lock();
-	nft_net = net_generic(net, nf_tables_net_id);
+	nft_net = nft_pernet(net);
 	cb->seq = nft_net->base_seq;
 
 	list_for_each_entry_rcu(table, &nft_net->tables, list) {
@@ -4833,7 +4831,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 	int event;
 
 	rcu_read_lock();
-	nft_net = net_generic(net, nf_tables_net_id);
+	nft_net = nft_pernet(net);
 	list_for_each_entry_rcu(table, &nft_net->tables, list) {
 		if (dump_ctx->ctx.family != NFPROTO_UNSPEC &&
 		    dump_ctx->ctx.family != table->family)
@@ -5138,7 +5136,7 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
 		goto err;
 	}
 
-	nft_net = net_generic(net, nf_tables_net_id);
+	nft_net = nft_pernet(net);
 	nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list);
 	return;
 err:
@@ -5660,7 +5658,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
 				const struct nlattr * const nla[],
 				struct netlink_ext_ack *extack)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 	u8 genmask = nft_genmask_next(net);
 	const struct nlattr *attr;
 	struct nft_set *set;
@@ -6323,7 +6321,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
 		reset = true;
 
 	rcu_read_lock();
-	nft_net = net_generic(net, nf_tables_net_id);
+	nft_net = nft_pernet(net);
 	cb->seq = nft_net->base_seq;
 
 	list_for_each_entry_rcu(table, &nft_net->tables, list) {
@@ -6473,7 +6471,7 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
 		const struct nftables_pernet *nft_net;
 		char *buf;
 
-		nft_net = net_generic(net, nf_tables_net_id);
+		nft_net = nft_pernet(net);
 		buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, nft_net->base_seq);
 
 		audit_log_nfcfg(buf,
@@ -6560,7 +6558,7 @@ void nft_obj_notify(struct net *net, const struct nft_table *table,
 		    struct nft_object *obj, u32 portid, u32 seq, int event,
 		    int family, int report, gfp_t gfp)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 	struct sk_buff *skb;
 	int err;
 	char *buf = kasprintf(gfp, "%s:%u",
@@ -7246,7 +7244,7 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb,
 	const struct nft_table *table;
 
 	rcu_read_lock();
-	nft_net = net_generic(net, nf_tables_net_id);
+	nft_net = nft_pernet(net);
 	cb->seq = nft_net->base_seq;
 
 	list_for_each_entry_rcu(table, &nft_net->tables, list) {
@@ -7384,7 +7382,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
 				       struct list_head *hook_list,
 				       int event)
 {
-	struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(ctx->net);
 	struct sk_buff *skb;
 	int err;
 
@@ -7429,7 +7427,7 @@ static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
 static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
 				   u32 portid, u32 seq)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 	struct nlmsghdr *nlh;
 	char buf[TASK_COMM_LEN];
 	int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN);
@@ -7482,7 +7480,7 @@ static int nf_tables_flowtable_event(struct notifier_block *this,
 		return 0;
 
 	net = dev_net(dev);
-	nft_net = net_generic(net, nf_tables_net_id);
+	nft_net = nft_pernet(net);
 	mutex_lock(&nft_net->commit_mutex);
 	list_for_each_entry(table, &nft_net->tables, list) {
 		list_for_each_entry(flowtable, &table->flowtables, list) {
@@ -7670,7 +7668,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
 
 static int nf_tables_validate(struct net *net)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 	struct nft_table *table;
 
 	switch (nft_net->validate_state) {
@@ -7855,7 +7853,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
 
 static void nf_tables_commit_chain_prepare_cancel(struct net *net)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 	struct nft_trans *trans, *next;
 
 	list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) {
@@ -7967,7 +7965,7 @@ static void nft_flowtable_hooks_del(struct nft_flowtable *flowtable,
 
 static void nf_tables_module_autoload_cleanup(struct net *net)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 	struct nft_module_request *req, *next;
 
 	WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
@@ -7980,7 +7978,7 @@ static void nf_tables_module_autoload_cleanup(struct net *net)
 
 static void nf_tables_commit_release(struct net *net)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 	struct nft_trans *trans;
 
 	/* all side effects have to be made visible.
@@ -8014,7 +8012,7 @@ static void nf_tables_commit_release(struct net *net)
 
 static void nft_commit_notify(struct net *net, u32 portid)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 	struct sk_buff *batch_skb = NULL, *nskb, *skb;
 	unsigned char *data;
 	int len;
@@ -8101,7 +8099,7 @@ static void nf_tables_commit_audit_log(struct list_head *adl, u32 generation)
 
 static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 	struct nft_trans *trans, *next;
 	struct nft_trans_elem *te;
 	struct nft_chain *chain;
@@ -8322,7 +8320,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 
 static void nf_tables_module_autoload(struct net *net)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 	struct nft_module_request *req, *next;
 	LIST_HEAD(module_list);
 
@@ -8370,7 +8368,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
 
 static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 	struct nft_trans *trans, *next;
 	struct nft_trans_elem *te;
 	struct nft_hook *hook;
@@ -8524,7 +8522,7 @@ static void nf_tables_cleanup(struct net *net)
 static int nf_tables_abort(struct net *net, struct sk_buff *skb,
 			   enum nfnl_abort_action action)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 	int ret = __nf_tables_abort(net, action);
 
 	mutex_unlock(&nft_net->commit_mutex);
@@ -8534,7 +8532,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb,
 
 static bool nf_tables_valid_genid(struct net *net, u32 genid)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 	bool genid_ok;
 
 	mutex_lock(&nft_net->commit_mutex);
@@ -9096,7 +9094,7 @@ static void __nft_release_hook(struct net *net, struct nft_table *table)
 
 static void __nft_release_hooks(struct net *net)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 	struct nft_table *table;
 
 	list_for_each_entry(table, &nft_net->tables, list) {
@@ -9156,7 +9154,7 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
 
 static void __nft_release_tables(struct net *net)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 	struct nft_table *table, *nt;
 
 	list_for_each_entry_safe(table, nt, &nft_net->tables, list) {
@@ -9179,7 +9177,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
 	if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER)
 		return NOTIFY_DONE;
 
-	nft_net = net_generic(net, nf_tables_net_id);
+	nft_net = nft_pernet(net);
 	mutex_lock(&nft_net->commit_mutex);
 	list_for_each_entry(table, &nft_net->tables, list) {
 		if (nft_table_has_owner(table) &&
@@ -9207,7 +9205,7 @@ static struct notifier_block nft_nl_notifier = {
 
 static int __net_init nf_tables_init_net(struct net *net)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 
 	INIT_LIST_HEAD(&nft_net->tables);
 	INIT_LIST_HEAD(&nft_net->commit_list);
@@ -9227,7 +9225,7 @@ static void __net_exit nf_tables_pre_exit_net(struct net *net)
 
 static void __net_exit nf_tables_exit_net(struct net *net)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 
 	mutex_lock(&nft_net->commit_mutex);
 	if (!list_empty(&nft_net->commit_list))
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 19215e81dd66..a48c5fd53a80 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -7,8 +7,6 @@
 #include <net/netfilter/nf_tables_offload.h>
 #include <net/pkt_cls.h>
 
-extern unsigned int nf_tables_net_id;
-
 static struct nft_flow_rule *nft_flow_rule_alloc(int num_actions)
 {
 	struct nft_flow_rule *flow;
@@ -389,7 +387,7 @@ static void nft_indr_block_cleanup(struct flow_block_cb *block_cb)
 
 	nft_flow_block_offload_init(&bo, dev_net(dev), FLOW_BLOCK_UNBIND,
 				    basechain, &extack);
-	nft_net = net_generic(net, nf_tables_net_id);
+	nft_net = nft_pernet(net);
 	mutex_lock(&nft_net->commit_mutex);
 	list_del(&block_cb->driver_list);
 	list_move(&block_cb->list, &bo.cb_list);
@@ -490,7 +488,7 @@ static int nft_flow_offload_chain(struct nft_chain *chain, u8 *ppolicy,
 static void nft_flow_rule_offload_abort(struct net *net,
 					struct nft_trans *trans)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 	int err = 0;
 
 	list_for_each_entry_continue_reverse(trans, &nft_net->commit_list, list) {
@@ -539,7 +537,7 @@ static void nft_flow_rule_offload_abort(struct net *net,
 
 int nft_flow_rule_offload_commit(struct net *net)
 {
-	struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(net);
 	struct nft_trans *trans;
 	int err = 0;
 	u8 policy;
@@ -663,7 +661,7 @@ static int nft_offload_netdev_event(struct notifier_block *this,
 	if (event != NETDEV_UNREGISTER)
 		return NOTIFY_DONE;
 
-	nft_net = net_generic(net, nf_tables_net_id);
+	nft_net = nft_pernet(net);
 	mutex_lock(&nft_net->commit_mutex);
 	chain = __nft_offload_get_chain(nft_net, dev);
 	if (chain)
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index 7a9aa57b195b..363bdd7044ec 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -2,7 +2,6 @@
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 #include <net/net_namespace.h>
-#include <net/netns/generic.h>
 #include <net/netfilter/nf_tables.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv6.h>
@@ -11,8 +10,6 @@
 #include <net/netfilter/nf_tables_ipv4.h>
 #include <net/netfilter/nf_tables_ipv6.h>
 
-extern unsigned int nf_tables_net_id;
-
 #ifdef CONFIG_NF_TABLES_IPV4
 static unsigned int nft_do_chain_ipv4(void *priv,
 				      struct sk_buff *skb,
@@ -369,7 +366,7 @@ static int nf_tables_netdev_event(struct notifier_block *this,
 	    event != NETDEV_CHANGENAME)
 		return NOTIFY_DONE;
 
-	nft_net = net_generic(ctx.net, nf_tables_net_id);
+	nft_net = nft_pernet(ctx.net);
 	mutex_lock(&nft_net->commit_mutex);
 	list_for_each_entry(table, &nft_net->tables, list) {
 		if (table->family != NFPROTO_NETDEV)
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index f9437a0dcfef..6ba3256fa844 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -11,9 +11,6 @@
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables_core.h>
-#include <net/netns/generic.h>
-
-extern unsigned int nf_tables_net_id;
 
 struct nft_dynset {
 	struct nft_set			*set;
@@ -164,7 +161,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
 			   const struct nft_expr *expr,
 			   const struct nlattr * const tb[])
 {
-	struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id);
+	struct nftables_pernet *nft_net = nft_pernet(ctx->net);
 	struct nft_dynset *priv = nft_expr_priv(expr);
 	u8 genmask = nft_genmask_next(ctx->net);
 	struct nft_set *set;
-- 
cgit v1.2.3


From a655536571747575fcaac3c93252b0032d878545 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 23 Apr 2021 00:17:09 +0200
Subject: netfilter: nfnetlink: add struct nfnl_info and pass it to callbacks

Add a new structure to reduce callback footprint and to facilite
extensions of the nfnetlink callback interface in the future.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nfnetlink.h  |  13 ++-
 net/netfilter/ipset/ip_set_core.c    | 149 ++++++++++--------------
 net/netfilter/nf_conntrack_netlink.c | 214 +++++++++++++++++------------------
 net/netfilter/nfnetlink.c            |  18 ++-
 net/netfilter/nfnetlink_acct.c       |  44 ++++---
 net/netfilter/nfnetlink_cthelper.c   |  30 ++---
 net/netfilter/nfnetlink_cttimeout.c  | 101 ++++++++---------
 net/netfilter/nfnetlink_log.c        |  26 ++---
 net/netfilter/nfnetlink_osf.c        |  19 ++--
 net/netfilter/nfnetlink_queue.c      |  12 +-
 10 files changed, 286 insertions(+), 340 deletions(-)

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index d4c14257db5d..1baa3205b199 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -7,11 +7,16 @@
 #include <net/netlink.h>
 #include <uapi/linux/netfilter/nfnetlink.h>
 
+struct nfnl_info {
+	struct net		*net;
+	struct sock		*sk;
+	const struct nlmsghdr	*nlh;
+	struct netlink_ext_ack	*extack;
+};
+
 struct nfnl_callback {
-	int (*call)(struct net *net, struct sock *nl, struct sk_buff *skb,
-		    const struct nlmsghdr *nlh,
-		    const struct nlattr * const cda[],
-		    struct netlink_ext_ack *extack);
+	int (*call)(struct sk_buff *skb, const struct nfnl_info *info,
+		    const struct nlattr * const cda[]);
 	int (*call_rcu)(struct net *net, struct sock *nl, struct sk_buff *skb,
 			const struct nlmsghdr *nlh,
 			const struct nlattr * const cda[],
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 359ff8ec236a..bf9902c1daa8 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1031,26 +1031,22 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index,
 	return 0;
 }
 
-static int ip_set_none(struct net *net, struct sock *ctnl, struct sk_buff *skb,
-		       const struct nlmsghdr *nlh,
-		       const struct nlattr * const attr[],
-		       struct netlink_ext_ack *extack)
+static int ip_set_none(struct sk_buff *skb, const struct nfnl_info *info,
+		       const struct nlattr * const attr[])
 {
 	return -EOPNOTSUPP;
 }
 
-static int ip_set_create(struct net *net, struct sock *ctnl,
-			 struct sk_buff *skb, const struct nlmsghdr *nlh,
-			 const struct nlattr * const attr[],
-			 struct netlink_ext_ack *extack)
+static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info,
+			 const struct nlattr * const attr[])
 {
-	struct ip_set_net *inst = ip_set_pernet(net);
+	struct ip_set_net *inst = ip_set_pernet(info->net);
 	struct ip_set *set, *clash = NULL;
 	ip_set_id_t index = IPSET_INVALID_ID;
 	struct nlattr *tb[IPSET_ATTR_CREATE_MAX + 1] = {};
 	const char *name, *typename;
 	u8 family, revision;
-	u32 flags = flag_exist(nlh);
+	u32 flags = flag_exist(info->nlh);
 	int ret = 0;
 
 	if (unlikely(protocol_min_failed(attr) ||
@@ -1101,7 +1097,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl,
 	/* Set create flags depending on the type revision */
 	set->flags |= set->type->create_flags[revision];
 
-	ret = set->type->create(net, set, tb, flags);
+	ret = set->type->create(info->net, set, tb, flags);
 	if (ret != 0)
 		goto put_out;
 
@@ -1183,12 +1179,10 @@ ip_set_destroy_set(struct ip_set *set)
 	kfree(set);
 }
 
-static int ip_set_destroy(struct net *net, struct sock *ctnl,
-			  struct sk_buff *skb, const struct nlmsghdr *nlh,
-			  const struct nlattr * const attr[],
-			  struct netlink_ext_ack *extack)
+static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
+			  const struct nlattr * const attr[])
 {
-	struct ip_set_net *inst = ip_set_pernet(net);
+	struct ip_set_net *inst = ip_set_pernet(info->net);
 	struct ip_set *s;
 	ip_set_id_t i;
 	int ret = 0;
@@ -1230,7 +1224,7 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl,
 		/* Modified by ip_set_destroy() only, which is serialized */
 		inst->is_destroyed = false;
 	} else {
-		u32 flags = flag_exist(nlh);
+		u32 flags = flag_exist(info->nlh);
 		s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
 				    &i);
 		if (!s) {
@@ -1264,12 +1258,10 @@ ip_set_flush_set(struct ip_set *set)
 	ip_set_unlock(set);
 }
 
-static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb,
-			const struct nlmsghdr *nlh,
-			const struct nlattr * const attr[],
-			struct netlink_ext_ack *extack)
+static int ip_set_flush(struct sk_buff *skb, const struct nfnl_info *info,
+			const struct nlattr * const attr[])
 {
-	struct ip_set_net *inst = ip_set_pernet(net);
+	struct ip_set_net *inst = ip_set_pernet(info->net);
 	struct ip_set *s;
 	ip_set_id_t i;
 
@@ -1304,12 +1296,10 @@ ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = {
 				    .len = IPSET_MAXNAMELEN - 1 },
 };
 
-static int ip_set_rename(struct net *net, struct sock *ctnl,
-			 struct sk_buff *skb, const struct nlmsghdr *nlh,
-			 const struct nlattr * const attr[],
-			 struct netlink_ext_ack *extack)
+static int ip_set_rename(struct sk_buff *skb, const struct nfnl_info *info,
+			 const struct nlattr * const attr[])
 {
-	struct ip_set_net *inst = ip_set_pernet(net);
+	struct ip_set_net *inst = ip_set_pernet(info->net);
 	struct ip_set *set, *s;
 	const char *name2;
 	ip_set_id_t i;
@@ -1354,12 +1344,10 @@ out:
  * so the ip_set_list always contains valid pointers to the sets.
  */
 
-static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb,
-		       const struct nlmsghdr *nlh,
-		       const struct nlattr * const attr[],
-		       struct netlink_ext_ack *extack)
+static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info,
+		       const struct nlattr * const attr[])
 {
-	struct ip_set_net *inst = ip_set_pernet(net);
+	struct ip_set_net *inst = ip_set_pernet(info->net);
 	struct ip_set *from, *to;
 	ip_set_id_t from_id, to_id;
 	char from_name[IPSET_MAXNAMELEN];
@@ -1669,10 +1657,8 @@ out:
 	return ret < 0 ? ret : skb->len;
 }
 
-static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb,
-		       const struct nlmsghdr *nlh,
-		       const struct nlattr * const attr[],
-		       struct netlink_ext_ack *extack)
+static int ip_set_dump(struct sk_buff *skb, const struct nfnl_info *info,
+		       const struct nlattr * const attr[])
 {
 	if (unlikely(protocol_min_failed(attr)))
 		return -IPSET_ERR_PROTOCOL;
@@ -1683,7 +1669,7 @@ static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 			.dump = ip_set_dump_do,
 			.done = ip_set_dump_done,
 		};
-		return netlink_dump_start(ctnl, skb, nlh, &c);
+		return netlink_dump_start(info->sk, skb, info->nlh, &c);
 	}
 }
 
@@ -1817,30 +1803,24 @@ static int ip_set_ad(struct net *net, struct sock *ctnl,
 	return ret;
 }
 
-static int ip_set_uadd(struct net *net, struct sock *ctnl,
-		       struct sk_buff *skb, const struct nlmsghdr *nlh,
-		       const struct nlattr * const attr[],
-		       struct netlink_ext_ack *extack)
+static int ip_set_uadd(struct sk_buff *skb, const struct nfnl_info *info,
+		       const struct nlattr * const attr[])
 {
-	return ip_set_ad(net, ctnl, skb,
-			 IPSET_ADD, nlh, attr, extack);
+	return ip_set_ad(info->net, info->sk, skb,
+			 IPSET_ADD, info->nlh, attr, info->extack);
 }
 
-static int ip_set_udel(struct net *net, struct sock *ctnl,
-		       struct sk_buff *skb, const struct nlmsghdr *nlh,
-		       const struct nlattr * const attr[],
-		       struct netlink_ext_ack *extack)
+static int ip_set_udel(struct sk_buff *skb, const struct nfnl_info *info,
+		       const struct nlattr * const attr[])
 {
-	return ip_set_ad(net, ctnl, skb,
-			 IPSET_DEL, nlh, attr, extack);
+	return ip_set_ad(info->net, info->sk, skb,
+			 IPSET_DEL, info->nlh, attr, info->extack);
 }
 
-static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
-			const struct nlmsghdr *nlh,
-			const struct nlattr * const attr[],
-			struct netlink_ext_ack *extack)
+static int ip_set_utest(struct sk_buff *skb, const struct nfnl_info *info,
+			const struct nlattr * const attr[])
 {
-	struct ip_set_net *inst = ip_set_pernet(net);
+	struct ip_set_net *inst = ip_set_pernet(info->net);
 	struct ip_set *set;
 	struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
 	int ret = 0;
@@ -1872,12 +1852,10 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 
 /* Get headed data of a set */
 
-static int ip_set_header(struct net *net, struct sock *ctnl,
-			 struct sk_buff *skb, const struct nlmsghdr *nlh,
-			 const struct nlattr * const attr[],
-			 struct netlink_ext_ack *extack)
+static int ip_set_header(struct sk_buff *skb, const struct nfnl_info *info,
+			 const struct nlattr * const attr[])
 {
-	struct ip_set_net *inst = ip_set_pernet(net);
+	struct ip_set_net *inst = ip_set_pernet(info->net);
 	const struct ip_set *set;
 	struct sk_buff *skb2;
 	struct nlmsghdr *nlh2;
@@ -1895,7 +1873,7 @@ static int ip_set_header(struct net *net, struct sock *ctnl,
 	if (!skb2)
 		return -ENOMEM;
 
-	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
+	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, 0,
 			 IPSET_CMD_HEADER);
 	if (!nlh2)
 		goto nlmsg_failure;
@@ -1907,7 +1885,8 @@ static int ip_set_header(struct net *net, struct sock *ctnl,
 		goto nla_put_failure;
 	nlmsg_end(skb2, nlh2);
 
-	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+	ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
+			      MSG_DONTWAIT);
 	if (ret < 0)
 		return ret;
 
@@ -1929,10 +1908,8 @@ static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = {
 	[IPSET_ATTR_FAMILY]	= { .type = NLA_U8 },
 };
 
-static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb,
-		       const struct nlmsghdr *nlh,
-		       const struct nlattr * const attr[],
-		       struct netlink_ext_ack *extack)
+static int ip_set_type(struct sk_buff *skb, const struct nfnl_info *info,
+		       const struct nlattr * const attr[])
 {
 	struct sk_buff *skb2;
 	struct nlmsghdr *nlh2;
@@ -1955,7 +1932,7 @@ static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 	if (!skb2)
 		return -ENOMEM;
 
-	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
+	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, 0,
 			 IPSET_CMD_TYPE);
 	if (!nlh2)
 		goto nlmsg_failure;
@@ -1968,7 +1945,8 @@ static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 	nlmsg_end(skb2, nlh2);
 
 	pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len);
-	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+	ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
+			      MSG_DONTWAIT);
 	if (ret < 0)
 		return ret;
 
@@ -1988,10 +1966,8 @@ ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = {
 	[IPSET_ATTR_PROTOCOL]	= { .type = NLA_U8 },
 };
 
-static int ip_set_protocol(struct net *net, struct sock *ctnl,
-			   struct sk_buff *skb, const struct nlmsghdr *nlh,
-			   const struct nlattr * const attr[],
-			   struct netlink_ext_ack *extack)
+static int ip_set_protocol(struct sk_buff *skb, const struct nfnl_info *info,
+			   const struct nlattr * const attr[])
 {
 	struct sk_buff *skb2;
 	struct nlmsghdr *nlh2;
@@ -2004,7 +1980,7 @@ static int ip_set_protocol(struct net *net, struct sock *ctnl,
 	if (!skb2)
 		return -ENOMEM;
 
-	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
+	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, 0,
 			 IPSET_CMD_PROTOCOL);
 	if (!nlh2)
 		goto nlmsg_failure;
@@ -2014,7 +1990,8 @@ static int ip_set_protocol(struct net *net, struct sock *ctnl,
 		goto nla_put_failure;
 	nlmsg_end(skb2, nlh2);
 
-	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+	ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
+			      MSG_DONTWAIT);
 	if (ret < 0)
 		return ret;
 
@@ -2029,12 +2006,10 @@ nlmsg_failure:
 
 /* Get set by name or index, from userspace */
 
-static int ip_set_byname(struct net *net, struct sock *ctnl,
-			 struct sk_buff *skb, const struct nlmsghdr *nlh,
-			 const struct nlattr * const attr[],
-			 struct netlink_ext_ack *extack)
+static int ip_set_byname(struct sk_buff *skb, const struct nfnl_info *info,
+			 const struct nlattr * const attr[])
 {
-	struct ip_set_net *inst = ip_set_pernet(net);
+	struct ip_set_net *inst = ip_set_pernet(info->net);
 	struct sk_buff *skb2;
 	struct nlmsghdr *nlh2;
 	ip_set_id_t id = IPSET_INVALID_ID;
@@ -2053,7 +2028,7 @@ static int ip_set_byname(struct net *net, struct sock *ctnl,
 	if (!skb2)
 		return -ENOMEM;
 
-	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
+	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, 0,
 			 IPSET_CMD_GET_BYNAME);
 	if (!nlh2)
 		goto nlmsg_failure;
@@ -2063,7 +2038,8 @@ static int ip_set_byname(struct net *net, struct sock *ctnl,
 		goto nla_put_failure;
 	nlmsg_end(skb2, nlh2);
 
-	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+	ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
+			      MSG_DONTWAIT);
 	if (ret < 0)
 		return ret;
 
@@ -2081,12 +2057,10 @@ static const struct nla_policy ip_set_index_policy[IPSET_ATTR_CMD_MAX + 1] = {
 	[IPSET_ATTR_INDEX]	= { .type = NLA_U16 },
 };
 
-static int ip_set_byindex(struct net *net, struct sock *ctnl,
-			  struct sk_buff *skb, const struct nlmsghdr *nlh,
-			  const struct nlattr * const attr[],
-			  struct netlink_ext_ack *extack)
+static int ip_set_byindex(struct sk_buff *skb, const struct nfnl_info *info,
+			  const struct nlattr * const attr[])
 {
-	struct ip_set_net *inst = ip_set_pernet(net);
+	struct ip_set_net *inst = ip_set_pernet(info->net);
 	struct sk_buff *skb2;
 	struct nlmsghdr *nlh2;
 	ip_set_id_t id = IPSET_INVALID_ID;
@@ -2108,7 +2082,7 @@ static int ip_set_byindex(struct net *net, struct sock *ctnl,
 	if (!skb2)
 		return -ENOMEM;
 
-	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
+	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, 0,
 			 IPSET_CMD_GET_BYINDEX);
 	if (!nlh2)
 		goto nlmsg_failure;
@@ -2117,7 +2091,8 @@ static int ip_set_byindex(struct net *net, struct sock *ctnl,
 		goto nla_put_failure;
 	nlmsg_end(skb2, nlh2);
 
-	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+	ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
+			      MSG_DONTWAIT);
 	if (ret < 0)
 		return ret;
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 44e3cb80e2e0..5147a63b3d1b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1524,17 +1524,15 @@ static int ctnetlink_flush_conntrack(struct net *net,
 	return 0;
 }
 
-static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
-				   struct sk_buff *skb,
-				   const struct nlmsghdr *nlh,
-				   const struct nlattr * const cda[],
-				   struct netlink_ext_ack *extack)
+static int ctnetlink_del_conntrack(struct sk_buff *skb,
+				   const struct nfnl_info *info,
+				   const struct nlattr * const cda[])
 {
+	struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_tuple tuple;
-	struct nf_conn *ct;
-	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	struct nf_conntrack_zone zone;
+	struct nf_conn *ct;
 	int err;
 
 	err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
@@ -1550,15 +1548,15 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
 	else {
 		u_int8_t u3 = nfmsg->version ? nfmsg->nfgen_family : AF_UNSPEC;
 
-		return ctnetlink_flush_conntrack(net, cda,
+		return ctnetlink_flush_conntrack(info->net, cda,
 						 NETLINK_CB(skb).portid,
-						 nlmsg_report(nlh), u3);
+						 nlmsg_report(info->nlh), u3);
 	}
 
 	if (err < 0)
 		return err;
 
-	h = nf_conntrack_find_get(net, &zone, &tuple);
+	h = nf_conntrack_find_get(info->net, &zone, &tuple);
 	if (!h)
 		return -ENOENT;
 
@@ -1578,28 +1576,26 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
 		}
 	}
 
-	nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh));
+	nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(info->nlh));
 	nf_ct_put(ct);
 
 	return 0;
 }
 
-static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl,
-				   struct sk_buff *skb,
-				   const struct nlmsghdr *nlh,
-				   const struct nlattr * const cda[],
-				   struct netlink_ext_ack *extack)
+static int ctnetlink_get_conntrack(struct sk_buff *skb,
+				   const struct nfnl_info *info,
+				   const struct nlattr * const cda[])
 {
+	struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+	u_int8_t u3 = nfmsg->nfgen_family;
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_tuple tuple;
-	struct nf_conn *ct;
-	struct sk_buff *skb2 = NULL;
-	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	u_int8_t u3 = nfmsg->nfgen_family;
 	struct nf_conntrack_zone zone;
+	struct sk_buff *skb2;
+	struct nf_conn *ct;
 	int err;
 
-	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.start = ctnetlink_start,
 			.dump = ctnetlink_dump_table,
@@ -1607,7 +1603,7 @@ static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl,
 			.data = (void *)cda,
 		};
 
-		return netlink_dump_start(ctnl, skb, nlh, &c);
+		return netlink_dump_start(info->sk, skb, info->nlh, &c);
 	}
 
 	err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
@@ -1626,7 +1622,7 @@ static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl,
 	if (err < 0)
 		return err;
 
-	h = nf_conntrack_find_get(net, &zone, &tuple);
+	h = nf_conntrack_find_get(info->net, &zone, &tuple);
 	if (!h)
 		return -ENOENT;
 
@@ -1639,13 +1635,16 @@ static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl,
 		return -ENOMEM;
 	}
 
-	err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
-				  NFNL_MSG_TYPE(nlh->nlmsg_type), ct, true, 0);
+	err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).portid,
+				  info->nlh->nlmsg_seq,
+				  NFNL_MSG_TYPE(info->nlh->nlmsg_type), ct,
+				  true, 0);
 	nf_ct_put(ct);
 	if (err <= 0)
 		goto free;
 
-	err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+	err = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
+			      MSG_DONTWAIT);
 	if (err < 0)
 		goto out;
 
@@ -1743,18 +1742,16 @@ ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb)
 	return ctnetlink_dump_list(skb, cb, true);
 }
 
-static int ctnetlink_get_ct_dying(struct net *net, struct sock *ctnl,
-				  struct sk_buff *skb,
-				  const struct nlmsghdr *nlh,
-				  const struct nlattr * const cda[],
-				  struct netlink_ext_ack *extack)
+static int ctnetlink_get_ct_dying(struct sk_buff *skb,
+				  const struct nfnl_info *info,
+				  const struct nlattr * const cda[])
 {
-	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.dump = ctnetlink_dump_dying,
 			.done = ctnetlink_done_list,
 		};
-		return netlink_dump_start(ctnl, skb, nlh, &c);
+		return netlink_dump_start(info->sk, skb, info->nlh, &c);
 	}
 
 	return -EOPNOTSUPP;
@@ -1766,18 +1763,16 @@ ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb)
 	return ctnetlink_dump_list(skb, cb, false);
 }
 
-static int ctnetlink_get_ct_unconfirmed(struct net *net, struct sock *ctnl,
-					struct sk_buff *skb,
-					const struct nlmsghdr *nlh,
-					const struct nlattr * const cda[],
-					struct netlink_ext_ack *extack)
+static int ctnetlink_get_ct_unconfirmed(struct sk_buff *skb,
+					const struct nfnl_info *info,
+					const struct nlattr * const cda[])
 {
-	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.dump = ctnetlink_dump_unconfirmed,
 			.done = ctnetlink_done_list,
 		};
-		return netlink_dump_start(ctnl, skb, nlh, &c);
+		return netlink_dump_start(info->sk, skb, info->nlh, &c);
 	}
 
 	return -EOPNOTSUPP;
@@ -2374,18 +2369,16 @@ err1:
 	return ERR_PTR(err);
 }
 
-static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
-				   struct sk_buff *skb,
-				   const struct nlmsghdr *nlh,
-				   const struct nlattr * const cda[],
-				   struct netlink_ext_ack *extack)
+static int ctnetlink_new_conntrack(struct sk_buff *skb,
+				   const struct nfnl_info *info,
+				   const struct nlattr * const cda[])
 {
+	struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
 	struct nf_conntrack_tuple otuple, rtuple;
 	struct nf_conntrack_tuple_hash *h = NULL;
-	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	struct nf_conn *ct;
 	u_int8_t u3 = nfmsg->nfgen_family;
 	struct nf_conntrack_zone zone;
+	struct nf_conn *ct;
 	int err;
 
 	err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
@@ -2407,13 +2400,13 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
 	}
 
 	if (cda[CTA_TUPLE_ORIG])
-		h = nf_conntrack_find_get(net, &zone, &otuple);
+		h = nf_conntrack_find_get(info->net, &zone, &otuple);
 	else if (cda[CTA_TUPLE_REPLY])
-		h = nf_conntrack_find_get(net, &zone, &rtuple);
+		h = nf_conntrack_find_get(info->net, &zone, &rtuple);
 
 	if (h == NULL) {
 		err = -ENOENT;
-		if (nlh->nlmsg_flags & NLM_F_CREATE) {
+		if (info->nlh->nlmsg_flags & NLM_F_CREATE) {
 			enum ip_conntrack_events events;
 
 			if (!cda[CTA_TUPLE_ORIG] || !cda[CTA_TUPLE_REPLY])
@@ -2421,8 +2414,8 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
 			if (otuple.dst.protonum != rtuple.dst.protonum)
 				return -EINVAL;
 
-			ct = ctnetlink_create_conntrack(net, &zone, cda, &otuple,
-							&rtuple, u3);
+			ct = ctnetlink_create_conntrack(info->net, &zone, cda,
+							&otuple, &rtuple, u3);
 			if (IS_ERR(ct))
 				return PTR_ERR(ct);
 
@@ -2445,7 +2438,7 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
 						      (1 << IPCT_SYNPROXY) |
 						      events,
 						      ct, NETLINK_CB(skb).portid,
-						      nlmsg_report(nlh));
+						      nlmsg_report(info->nlh));
 			nf_ct_put(ct);
 		}
 
@@ -2455,7 +2448,7 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
 
 	err = -EEXIST;
 	ct = nf_ct_tuplehash_to_ctrack(h);
-	if (!(nlh->nlmsg_flags & NLM_F_EXCL)) {
+	if (!(info->nlh->nlmsg_flags & NLM_F_EXCL)) {
 		err = ctnetlink_change_conntrack(ct, cda);
 		if (err == 0) {
 			nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
@@ -2467,7 +2460,7 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
 						      (1 << IPCT_MARK) |
 						      (1 << IPCT_SYNPROXY),
 						      ct, NETLINK_CB(skb).portid,
-						      nlmsg_report(nlh));
+						      nlmsg_report(info->nlh));
 		}
 	}
 
@@ -2539,17 +2532,15 @@ ctnetlink_ct_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
-static int ctnetlink_stat_ct_cpu(struct net *net, struct sock *ctnl,
-				 struct sk_buff *skb,
-				 const struct nlmsghdr *nlh,
-				 const struct nlattr * const cda[],
-				 struct netlink_ext_ack *extack)
+static int ctnetlink_stat_ct_cpu(struct sk_buff *skb,
+				 const struct nfnl_info *info,
+				 const struct nlattr * const cda[])
 {
-	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.dump = ctnetlink_ct_stat_cpu_dump,
 		};
-		return netlink_dump_start(ctnl, skb, nlh, &c);
+		return netlink_dump_start(info->sk, skb, info->nlh, &c);
 	}
 
 	return 0;
@@ -2585,10 +2576,8 @@ nlmsg_failure:
 	return -1;
 }
 
-static int ctnetlink_stat_ct(struct net *net, struct sock *ctnl,
-			     struct sk_buff *skb, const struct nlmsghdr *nlh,
-			     const struct nlattr * const cda[],
-			     struct netlink_ext_ack *extack)
+static int ctnetlink_stat_ct(struct sk_buff *skb, const struct nfnl_info *info,
+			     const struct nlattr * const cda[])
 {
 	struct sk_buff *skb2;
 	int err;
@@ -2598,13 +2587,14 @@ static int ctnetlink_stat_ct(struct net *net, struct sock *ctnl,
 		return -ENOMEM;
 
 	err = ctnetlink_stat_ct_fill_info(skb2, NETLINK_CB(skb).portid,
-					  nlh->nlmsg_seq,
-					  NFNL_MSG_TYPE(nlh->nlmsg_type),
+					  info->nlh->nlmsg_seq,
+					  NFNL_MSG_TYPE(info->nlh->nlmsg_type),
 					  sock_net(skb->sk));
 	if (err <= 0)
 		goto free;
 
-	err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+	err = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
+			      MSG_DONTWAIT);
 	if (err < 0)
 		goto out;
 
@@ -3284,29 +3274,29 @@ static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl,
 	return err;
 }
 
-static int ctnetlink_get_expect(struct net *net, struct sock *ctnl,
-				struct sk_buff *skb, const struct nlmsghdr *nlh,
-				const struct nlattr * const cda[],
-				struct netlink_ext_ack *extack)
+static int ctnetlink_get_expect(struct sk_buff *skb,
+				const struct nfnl_info *info,
+				const struct nlattr * const cda[])
 {
+	struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+	u_int8_t u3 = nfmsg->nfgen_family;
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_expect *exp;
-	struct sk_buff *skb2;
-	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	u_int8_t u3 = nfmsg->nfgen_family;
 	struct nf_conntrack_zone zone;
+	struct sk_buff *skb2;
 	int err;
 
-	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
 		if (cda[CTA_EXPECT_MASTER])
-			return ctnetlink_dump_exp_ct(net, ctnl, skb, nlh, cda,
-						     extack);
+			return ctnetlink_dump_exp_ct(info->net, info->sk, skb,
+						     info->nlh, cda,
+						     info->extack);
 		else {
 			struct netlink_dump_control c = {
 				.dump = ctnetlink_exp_dump_table,
 				.done = ctnetlink_exp_done,
 			};
-			return netlink_dump_start(ctnl, skb, nlh, &c);
+			return netlink_dump_start(info->sk, skb, info->nlh, &c);
 		}
 	}
 
@@ -3326,7 +3316,7 @@ static int ctnetlink_get_expect(struct net *net, struct sock *ctnl,
 	if (err < 0)
 		return err;
 
-	exp = nf_ct_expect_find_get(net, &zone, &tuple);
+	exp = nf_ct_expect_find_get(info->net, &zone, &tuple);
 	if (!exp)
 		return -ENOENT;
 
@@ -3348,13 +3338,15 @@ static int ctnetlink_get_expect(struct net *net, struct sock *ctnl,
 
 	rcu_read_lock();
 	err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).portid,
-				      nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp);
+				      info->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
+				      exp);
 	rcu_read_unlock();
 	nf_ct_expect_put(exp);
 	if (err <= 0)
 		goto free;
 
-	err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+	err = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
+			      MSG_DONTWAIT);
 	if (err < 0)
 		goto out;
 
@@ -3382,15 +3374,14 @@ static bool expect_iter_all(struct nf_conntrack_expect *exp, void *data)
 	return true;
 }
 
-static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
-				struct sk_buff *skb, const struct nlmsghdr *nlh,
-				const struct nlattr * const cda[],
-				struct netlink_ext_ack *extack)
+static int ctnetlink_del_expect(struct sk_buff *skb,
+				const struct nfnl_info *info,
+				const struct nlattr * const cda[])
 {
+	struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+	u_int8_t u3 = nfmsg->nfgen_family;
 	struct nf_conntrack_expect *exp;
 	struct nf_conntrack_tuple tuple;
-	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	u_int8_t u3 = nfmsg->nfgen_family;
 	struct nf_conntrack_zone zone;
 	int err;
 
@@ -3406,7 +3397,7 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
 			return err;
 
 		/* bump usage count to 2 */
-		exp = nf_ct_expect_find_get(net, &zone, &tuple);
+		exp = nf_ct_expect_find_get(info->net, &zone, &tuple);
 		if (!exp)
 			return -ENOENT;
 
@@ -3422,7 +3413,7 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
 		spin_lock_bh(&nf_conntrack_expect_lock);
 		if (del_timer(&exp->timeout)) {
 			nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid,
-						   nlmsg_report(nlh));
+						   nlmsg_report(info->nlh));
 			nf_ct_expect_put(exp);
 		}
 		spin_unlock_bh(&nf_conntrack_expect_lock);
@@ -3432,14 +3423,14 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
 	} else if (cda[CTA_EXPECT_HELP_NAME]) {
 		char *name = nla_data(cda[CTA_EXPECT_HELP_NAME]);
 
-		nf_ct_expect_iterate_net(net, expect_iter_name, name,
+		nf_ct_expect_iterate_net(info->net, expect_iter_name, name,
 					 NETLINK_CB(skb).portid,
-					 nlmsg_report(nlh));
+					 nlmsg_report(info->nlh));
 	} else {
 		/* This basically means we have to flush everything*/
-		nf_ct_expect_iterate_net(net, expect_iter_all, NULL,
+		nf_ct_expect_iterate_net(info->net, expect_iter_all, NULL,
 					 NETLINK_CB(skb).portid,
-					 nlmsg_report(nlh));
+					 nlmsg_report(info->nlh));
 	}
 
 	return 0;
@@ -3635,15 +3626,14 @@ err_ct:
 	return err;
 }
 
-static int ctnetlink_new_expect(struct net *net, struct sock *ctnl,
-				struct sk_buff *skb, const struct nlmsghdr *nlh,
-				const struct nlattr * const cda[],
-				struct netlink_ext_ack *extack)
+static int ctnetlink_new_expect(struct sk_buff *skb,
+				const struct nfnl_info *info,
+				const struct nlattr * const cda[])
 {
+	struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+	u_int8_t u3 = nfmsg->nfgen_family;
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_expect *exp;
-	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	u_int8_t u3 = nfmsg->nfgen_family;
 	struct nf_conntrack_zone zone;
 	int err;
 
@@ -3662,20 +3652,20 @@ static int ctnetlink_new_expect(struct net *net, struct sock *ctnl,
 		return err;
 
 	spin_lock_bh(&nf_conntrack_expect_lock);
-	exp = __nf_ct_expect_find(net, &zone, &tuple);
+	exp = __nf_ct_expect_find(info->net, &zone, &tuple);
 	if (!exp) {
 		spin_unlock_bh(&nf_conntrack_expect_lock);
 		err = -ENOENT;
-		if (nlh->nlmsg_flags & NLM_F_CREATE) {
-			err = ctnetlink_create_expect(net, &zone, cda, u3,
+		if (info->nlh->nlmsg_flags & NLM_F_CREATE) {
+			err = ctnetlink_create_expect(info->net, &zone, cda, u3,
 						      NETLINK_CB(skb).portid,
-						      nlmsg_report(nlh));
+						      nlmsg_report(info->nlh));
 		}
 		return err;
 	}
 
 	err = -EEXIST;
-	if (!(nlh->nlmsg_flags & NLM_F_EXCL))
+	if (!(info->nlh->nlmsg_flags & NLM_F_EXCL))
 		err = ctnetlink_change_expect(exp, cda);
 	spin_unlock_bh(&nf_conntrack_expect_lock);
 
@@ -3736,17 +3726,15 @@ ctnetlink_exp_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
-static int ctnetlink_stat_exp_cpu(struct net *net, struct sock *ctnl,
-				  struct sk_buff *skb,
-				  const struct nlmsghdr *nlh,
-				  const struct nlattr * const cda[],
-				  struct netlink_ext_ack *extack)
+static int ctnetlink_stat_exp_cpu(struct sk_buff *skb,
+				  const struct nfnl_info *info,
+				  const struct nlattr * const cda[])
 {
-	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.dump = ctnetlink_exp_stat_cpu_dump,
 		};
-		return netlink_dump_start(ctnl, skb, nlh, &c);
+		return netlink_dump_start(info->sk, skb, info->nlh, &c);
 	}
 
 	return 0;
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 06f5886f652e..5f04b67bf47e 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -252,6 +252,12 @@ replay:
 		struct nlattr *attr = (void *)nlh + min_len;
 		int attrlen = nlh->nlmsg_len - min_len;
 		__u8 subsys_id = NFNL_SUBSYS_ID(type);
+		struct nfnl_info info = {
+			.net	= net,
+			.sk	= nfnlnet->nfnl,
+			.nlh	= nlh,
+			.extack	= extack,
+		};
 
 		/* Sanity-check NFNL_MAX_ATTR_COUNT */
 		if (ss->cb[cb_id].attr_count > NFNL_MAX_ATTR_COUNT) {
@@ -276,14 +282,14 @@ replay:
 			rcu_read_unlock();
 			nfnl_lock(subsys_id);
 			if (nfnl_dereference_protected(subsys_id) != ss ||
-			    nfnetlink_find_client(type, ss) != nc)
+			    nfnetlink_find_client(type, ss) != nc) {
 				err = -EAGAIN;
-			else if (nc->call)
-				err = nc->call(net, nfnlnet->nfnl, skb, nlh,
-					       (const struct nlattr **)cda,
-					       extack);
-			else
+			} else if (nc->call) {
+				err = nc->call(skb, &info,
+					       (const struct nlattr **)cda);
+			} else {
 				err = -EINVAL;
+			}
 			nfnl_unlock(subsys_id);
 		}
 		if (err == -EAGAIN)
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 6895f31c5fbb..9cb4b21b8e95 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -56,15 +56,13 @@ static inline struct nfnl_acct_net *nfnl_acct_pernet(struct net *net)
 #define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES)
 #define NFACCT_OVERQUOTA_BIT	2	/* NFACCT_F_OVERQUOTA */
 
-static int nfnl_acct_new(struct net *net, struct sock *nfnl,
-			 struct sk_buff *skb, const struct nlmsghdr *nlh,
-			 const struct nlattr * const tb[],
-			 struct netlink_ext_ack *extack)
+static int nfnl_acct_new(struct sk_buff *skb, const struct nfnl_info *info,
+			 const struct nlattr * const tb[])
 {
-	struct nfnl_acct_net *nfnl_acct_net = nfnl_acct_pernet(net);
+	struct nfnl_acct_net *nfnl_acct_net = nfnl_acct_pernet(info->net);
 	struct nf_acct *nfacct, *matching = NULL;
-	char *acct_name;
 	unsigned int size = 0;
+	char *acct_name;
 	u32 flags = 0;
 
 	if (!tb[NFACCT_NAME])
@@ -78,7 +76,7 @@ static int nfnl_acct_new(struct net *net, struct sock *nfnl,
 		if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0)
 			continue;
 
-                if (nlh->nlmsg_flags & NLM_F_EXCL)
+                if (info->nlh->nlmsg_flags & NLM_F_EXCL)
 			return -EEXIST;
 
 		matching = nfacct;
@@ -86,7 +84,7 @@ static int nfnl_acct_new(struct net *net, struct sock *nfnl,
         }
 
 	if (matching) {
-		if (nlh->nlmsg_flags & NLM_F_REPLACE) {
+		if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
 			/* reset counters if you request a replacement. */
 			atomic64_set(&matching->pkts, 0);
 			atomic64_set(&matching->bytes, 0);
@@ -273,17 +271,15 @@ static int nfnl_acct_start(struct netlink_callback *cb)
 	return 0;
 }
 
-static int nfnl_acct_get(struct net *net, struct sock *nfnl,
-			 struct sk_buff *skb, const struct nlmsghdr *nlh,
-			 const struct nlattr * const tb[],
-			 struct netlink_ext_ack *extack)
+static int nfnl_acct_get(struct sk_buff *skb, const struct nfnl_info *info,
+			 const struct nlattr * const tb[])
 {
-	struct nfnl_acct_net *nfnl_acct_net = nfnl_acct_pernet(net);
+	struct nfnl_acct_net *nfnl_acct_net = nfnl_acct_pernet(info->net);
 	int ret = -ENOENT;
 	struct nf_acct *cur;
 	char *acct_name;
 
-	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.dump = nfnl_acct_dump,
 			.start = nfnl_acct_start,
@@ -291,7 +287,7 @@ static int nfnl_acct_get(struct net *net, struct sock *nfnl,
 			.data = (void *)tb[NFACCT_FILTER],
 		};
 
-		return netlink_dump_start(nfnl, skb, nlh, &c);
+		return netlink_dump_start(info->sk, skb, info->nlh, &c);
 	}
 
 	if (!tb[NFACCT_NAME])
@@ -311,15 +307,15 @@ static int nfnl_acct_get(struct net *net, struct sock *nfnl,
 		}
 
 		ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).portid,
-					 nlh->nlmsg_seq,
-					 NFNL_MSG_TYPE(nlh->nlmsg_type),
-					 NFNL_MSG_ACCT_NEW, cur);
+					  info->nlh->nlmsg_seq,
+					  NFNL_MSG_TYPE(info->nlh->nlmsg_type),
+					  NFNL_MSG_ACCT_NEW, cur);
 		if (ret <= 0) {
 			kfree_skb(skb2);
 			break;
 		}
-		ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
-					MSG_DONTWAIT);
+		ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
+				      MSG_DONTWAIT);
 		if (ret > 0)
 			ret = 0;
 
@@ -347,12 +343,10 @@ static int nfnl_acct_try_del(struct nf_acct *cur)
 	return ret;
 }
 
-static int nfnl_acct_del(struct net *net, struct sock *nfnl,
-			 struct sk_buff *skb, const struct nlmsghdr *nlh,
-			 const struct nlattr * const tb[],
-			 struct netlink_ext_ack *extack)
+static int nfnl_acct_del(struct sk_buff *skb, const struct nfnl_info *info,
+			 const struct nlattr * const tb[])
 {
-	struct nfnl_acct_net *nfnl_acct_net = nfnl_acct_pernet(net);
+	struct nfnl_acct_net *nfnl_acct_net = nfnl_acct_pernet(info->net);
 	struct nf_acct *cur, *tmp;
 	int ret = -ENOENT;
 	char *acct_name;
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 22f6f7fcc724..3d1a5215177b 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -408,10 +408,8 @@ nfnl_cthelper_update(const struct nlattr * const tb[],
 	return 0;
 }
 
-static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
-			     struct sk_buff *skb, const struct nlmsghdr *nlh,
-			     const struct nlattr * const tb[],
-			     struct netlink_ext_ack *extack)
+static int nfnl_cthelper_new(struct sk_buff *skb, const struct nfnl_info *info,
+			     const struct nlattr * const tb[])
 {
 	const char *helper_name;
 	struct nf_conntrack_helper *cur, *helper = NULL;
@@ -441,7 +439,7 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
 		     tuple.dst.protonum != cur->tuple.dst.protonum))
 			continue;
 
-		if (nlh->nlmsg_flags & NLM_F_EXCL)
+		if (info->nlh->nlmsg_flags & NLM_F_EXCL)
 			return -EEXIST;
 
 		helper = cur;
@@ -607,10 +605,8 @@ out:
 	return skb->len;
 }
 
-static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
-			     struct sk_buff *skb, const struct nlmsghdr *nlh,
-			     const struct nlattr * const tb[],
-			     struct netlink_ext_ack *extack)
+static int nfnl_cthelper_get(struct sk_buff *skb, const struct nfnl_info *info,
+			     const struct nlattr * const tb[])
 {
 	int ret = -ENOENT;
 	struct nf_conntrack_helper *cur;
@@ -623,11 +619,11 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
-	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.dump = nfnl_cthelper_dump_table,
 		};
-		return netlink_dump_start(nfnl, skb, nlh, &c);
+		return netlink_dump_start(info->sk, skb, info->nlh, &c);
 	}
 
 	if (tb[NFCTH_NAME])
@@ -659,15 +655,15 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
 		}
 
 		ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid,
-					      nlh->nlmsg_seq,
-					      NFNL_MSG_TYPE(nlh->nlmsg_type),
+					      info->nlh->nlmsg_seq,
+					      NFNL_MSG_TYPE(info->nlh->nlmsg_type),
 					      NFNL_MSG_CTHELPER_NEW, cur);
 		if (ret <= 0) {
 			kfree_skb(skb2);
 			break;
 		}
 
-		ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
+		ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
 				      MSG_DONTWAIT);
 		if (ret > 0)
 			ret = 0;
@@ -678,10 +674,8 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
 	return ret;
 }
 
-static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
-			     struct sk_buff *skb, const struct nlmsghdr *nlh,
-			     const struct nlattr * const tb[],
-			     struct netlink_ext_ack *extack)
+static int nfnl_cthelper_del(struct sk_buff *skb, const struct nfnl_info *info,
+			     const struct nlattr * const tb[])
 {
 	char *helper_name = NULL;
 	struct nf_conntrack_helper *cur;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 46da5548d0b3..994f3172bf42 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -83,13 +83,11 @@ err:
 	return ret;
 }
 
-static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
-				 struct sk_buff *skb,
-				 const struct nlmsghdr *nlh,
-				 const struct nlattr * const cda[],
-				 struct netlink_ext_ack *extack)
+static int cttimeout_new_timeout(struct sk_buff *skb,
+				 const struct nfnl_info *info,
+				 const struct nlattr * const cda[])
 {
-	struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net);
+	struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(info->net);
 	__u16 l3num;
 	__u8 l4num;
 	const struct nf_conntrack_l4proto *l4proto;
@@ -111,7 +109,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
 		if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
 			continue;
 
-		if (nlh->nlmsg_flags & NLM_F_EXCL)
+		if (info->nlh->nlmsg_flags & NLM_F_EXCL)
 			return -EEXIST;
 
 		matching = timeout;
@@ -119,7 +117,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
 	}
 
 	if (matching) {
-		if (nlh->nlmsg_flags & NLM_F_REPLACE) {
+		if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
 			/* You cannot replace one timeout policy by another of
 			 * different kind, sorry.
 			 */
@@ -129,7 +127,8 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
 
 			return ctnl_timeout_parse_policy(&matching->timeout.data,
 							 matching->timeout.l4proto,
-							 net, cda[CTA_TIMEOUT_DATA]);
+							 info->net,
+							 cda[CTA_TIMEOUT_DATA]);
 		}
 
 		return -EBUSY;
@@ -150,8 +149,8 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
 		goto err_proto_put;
 	}
 
-	ret = ctnl_timeout_parse_policy(&timeout->timeout.data, l4proto, net,
-					cda[CTA_TIMEOUT_DATA]);
+	ret = ctnl_timeout_parse_policy(&timeout->timeout.data, l4proto,
+					info->net, cda[CTA_TIMEOUT_DATA]);
 	if (ret < 0)
 		goto err;
 
@@ -248,22 +247,20 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
-static int cttimeout_get_timeout(struct net *net, struct sock *ctnl,
-				 struct sk_buff *skb,
-				 const struct nlmsghdr *nlh,
-				 const struct nlattr * const cda[],
-				 struct netlink_ext_ack *extack)
+static int cttimeout_get_timeout(struct sk_buff *skb,
+				 const struct nfnl_info *info,
+				 const struct nlattr * const cda[])
 {
-	struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net);
+	struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(info->net);
 	int ret = -ENOENT;
 	char *name;
 	struct ctnl_timeout *cur;
 
-	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.dump = ctnl_timeout_dump,
 		};
-		return netlink_dump_start(ctnl, skb, nlh, &c);
+		return netlink_dump_start(info->sk, skb, info->nlh, &c);
 	}
 
 	if (!cda[CTA_TIMEOUT_NAME])
@@ -283,15 +280,15 @@ static int cttimeout_get_timeout(struct net *net, struct sock *ctnl,
 		}
 
 		ret = ctnl_timeout_fill_info(skb2, NETLINK_CB(skb).portid,
-					     nlh->nlmsg_seq,
-					     NFNL_MSG_TYPE(nlh->nlmsg_type),
+					     info->nlh->nlmsg_seq,
+					     NFNL_MSG_TYPE(info->nlh->nlmsg_type),
 					     IPCTNL_MSG_TIMEOUT_NEW, cur);
 		if (ret <= 0) {
 			kfree_skb(skb2);
 			break;
 		}
-		ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid,
-					MSG_DONTWAIT);
+		ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
+				      MSG_DONTWAIT);
 		if (ret > 0)
 			ret = 0;
 
@@ -320,13 +317,11 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout)
 	return ret;
 }
 
-static int cttimeout_del_timeout(struct net *net, struct sock *ctnl,
-				 struct sk_buff *skb,
-				 const struct nlmsghdr *nlh,
-				 const struct nlattr * const cda[],
-				 struct netlink_ext_ack *extack)
+static int cttimeout_del_timeout(struct sk_buff *skb,
+				 const struct nfnl_info *info,
+				 const struct nlattr * const cda[])
 {
-	struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net);
+	struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(info->net);
 	struct ctnl_timeout *cur, *tmp;
 	int ret = -ENOENT;
 	char *name;
@@ -334,7 +329,7 @@ static int cttimeout_del_timeout(struct net *net, struct sock *ctnl,
 	if (!cda[CTA_TIMEOUT_NAME]) {
 		list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_list,
 					 head)
-			ctnl_timeout_try_del(net, cur);
+			ctnl_timeout_try_del(info->net, cur);
 
 		return 0;
 	}
@@ -344,7 +339,7 @@ static int cttimeout_del_timeout(struct net *net, struct sock *ctnl,
 		if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
 			continue;
 
-		ret = ctnl_timeout_try_del(net, cur);
+		ret = ctnl_timeout_try_del(info->net, cur);
 		if (ret < 0)
 			return ret;
 
@@ -353,11 +348,9 @@ static int cttimeout_del_timeout(struct net *net, struct sock *ctnl,
 	return ret;
 }
 
-static int cttimeout_default_set(struct net *net, struct sock *ctnl,
-				 struct sk_buff *skb,
-				 const struct nlmsghdr *nlh,
-				 const struct nlattr * const cda[],
-				 struct netlink_ext_ack *extack)
+static int cttimeout_default_set(struct sk_buff *skb,
+				 const struct nfnl_info *info,
+				 const struct nlattr * const cda[])
 {
 	const struct nf_conntrack_l4proto *l4proto;
 	__u8 l4num;
@@ -377,7 +370,7 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
 		goto err;
 	}
 
-	ret = ctnl_timeout_parse_policy(NULL, l4proto, net,
+	ret = ctnl_timeout_parse_policy(NULL, l4proto, info->net,
 					cda[CTA_TIMEOUT_DATA]);
 	if (ret < 0)
 		goto err;
@@ -427,11 +420,9 @@ nla_put_failure:
 	return -1;
 }
 
-static int cttimeout_default_get(struct net *net, struct sock *ctnl,
-				 struct sk_buff *skb,
-				 const struct nlmsghdr *nlh,
-				 const struct nlattr * const cda[],
-				 struct netlink_ext_ack *extack)
+static int cttimeout_default_get(struct sk_buff *skb,
+				 const struct nfnl_info *info,
+				 const struct nlattr * const cda[])
 {
 	const struct nf_conntrack_l4proto *l4proto;
 	unsigned int *timeouts = NULL;
@@ -453,35 +444,35 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
 
 	switch (l4proto->l4proto) {
 	case IPPROTO_ICMP:
-		timeouts = &nf_icmp_pernet(net)->timeout;
+		timeouts = &nf_icmp_pernet(info->net)->timeout;
 		break;
 	case IPPROTO_TCP:
-		timeouts = nf_tcp_pernet(net)->timeouts;
+		timeouts = nf_tcp_pernet(info->net)->timeouts;
 		break;
 	case IPPROTO_UDP:
 	case IPPROTO_UDPLITE:
-		timeouts = nf_udp_pernet(net)->timeouts;
+		timeouts = nf_udp_pernet(info->net)->timeouts;
 		break;
 	case IPPROTO_DCCP:
 #ifdef CONFIG_NF_CT_PROTO_DCCP
-		timeouts = nf_dccp_pernet(net)->dccp_timeout;
+		timeouts = nf_dccp_pernet(info->net)->dccp_timeout;
 #endif
 		break;
 	case IPPROTO_ICMPV6:
-		timeouts = &nf_icmpv6_pernet(net)->timeout;
+		timeouts = &nf_icmpv6_pernet(info->net)->timeout;
 		break;
 	case IPPROTO_SCTP:
 #ifdef CONFIG_NF_CT_PROTO_SCTP
-		timeouts = nf_sctp_pernet(net)->timeouts;
+		timeouts = nf_sctp_pernet(info->net)->timeouts;
 #endif
 		break;
 	case IPPROTO_GRE:
 #ifdef CONFIG_NF_CT_PROTO_GRE
-		timeouts = nf_gre_pernet(net)->timeouts;
+		timeouts = nf_gre_pernet(info->net)->timeouts;
 #endif
 		break;
 	case 255:
-		timeouts = &nf_generic_pernet(net)->timeout;
+		timeouts = &nf_generic_pernet(info->net)->timeout;
 		break;
 	default:
 		WARN_ONCE(1, "Missing timeouts for proto %d", l4proto->l4proto);
@@ -497,9 +488,10 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
 		goto err;
 	}
 
-	ret = cttimeout_default_fill_info(net, skb2, NETLINK_CB(skb).portid,
-					  nlh->nlmsg_seq,
-					  NFNL_MSG_TYPE(nlh->nlmsg_type),
+	ret = cttimeout_default_fill_info(info->net, skb2,
+					  NETLINK_CB(skb).portid,
+					  info->nlh->nlmsg_seq,
+					  NFNL_MSG_TYPE(info->nlh->nlmsg_type),
 					  IPCTNL_MSG_TIMEOUT_DEFAULT_SET,
 					  l3num, l4proto, timeouts);
 	if (ret <= 0) {
@@ -507,7 +499,8 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
 		err = -ENOMEM;
 		goto err;
 	}
-	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+	ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
+			      MSG_DONTWAIT);
 	if (ret > 0)
 		ret = 0;
 
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index d5f458d0ff3d..81630600b4ef 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -845,10 +845,8 @@ static struct notifier_block nfulnl_rtnl_notifier = {
 	.notifier_call	= nfulnl_rcv_nl_event,
 };
 
-static int nfulnl_recv_unsupp(struct net *net, struct sock *ctnl,
-			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nfqa[],
-			      struct netlink_ext_ack *extack)
+static int nfulnl_recv_unsupp(struct sk_buff *skb, const struct nfnl_info *info,
+			      const struct nlattr * const nfula[])
 {
 	return -ENOTSUPP;
 }
@@ -869,18 +867,16 @@ static const struct nla_policy nfula_cfg_policy[NFULA_CFG_MAX+1] = {
 	[NFULA_CFG_FLAGS]	= { .type = NLA_U16 },
 };
 
-static int nfulnl_recv_config(struct net *net, struct sock *ctnl,
-			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nfula[],
-			      struct netlink_ext_ack *extack)
+static int nfulnl_recv_config(struct sk_buff *skb, const struct nfnl_info *info,
+			      const struct nlattr * const nfula[])
 {
-	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	struct nfnl_log_net *log = nfnl_log_pernet(info->net);
+	struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
 	u_int16_t group_num = ntohs(nfmsg->res_id);
-	struct nfulnl_instance *inst;
 	struct nfulnl_msg_config_cmd *cmd = NULL;
-	struct nfnl_log_net *log = nfnl_log_pernet(net);
-	int ret = 0;
+	struct nfulnl_instance *inst;
 	u16 flags = 0;
+	int ret = 0;
 
 	if (nfula[NFULA_CFG_CMD]) {
 		u_int8_t pf = nfmsg->nfgen_family;
@@ -889,9 +885,9 @@ static int nfulnl_recv_config(struct net *net, struct sock *ctnl,
 		/* Commands without queue context */
 		switch (cmd->command) {
 		case NFULNL_CFG_CMD_PF_BIND:
-			return nf_log_bind_pf(net, pf, &nfulnl_logger);
+			return nf_log_bind_pf(info->net, pf, &nfulnl_logger);
 		case NFULNL_CFG_CMD_PF_UNBIND:
-			nf_log_unbind_pf(net, pf);
+			nf_log_unbind_pf(info->net, pf);
 			return 0;
 		}
 	}
@@ -932,7 +928,7 @@ static int nfulnl_recv_config(struct net *net, struct sock *ctnl,
 				goto out_put;
 			}
 
-			inst = instance_create(net, group_num,
+			inst = instance_create(info->net, group_num,
 					       NETLINK_CB(skb).portid,
 					       sk_user_ns(NETLINK_CB(skb).sk));
 			if (IS_ERR(inst)) {
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index 916a3c7f9eaf..1fd537ef4496 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -292,10 +292,9 @@ static const struct nla_policy nfnl_osf_policy[OSF_ATTR_MAX + 1] = {
 	[OSF_ATTR_FINGER]	= { .len = sizeof(struct nf_osf_user_finger) },
 };
 
-static int nfnl_osf_add_callback(struct net *net, struct sock *ctnl,
-				 struct sk_buff *skb, const struct nlmsghdr *nlh,
-				 const struct nlattr * const osf_attrs[],
-				 struct netlink_ext_ack *extack)
+static int nfnl_osf_add_callback(struct sk_buff *skb,
+				 const struct nfnl_info *info,
+				 const struct nlattr * const osf_attrs[])
 {
 	struct nf_osf_user_finger *f;
 	struct nf_osf_finger *kf = NULL, *sf;
@@ -307,7 +306,7 @@ static int nfnl_osf_add_callback(struct net *net, struct sock *ctnl,
 	if (!osf_attrs[OSF_ATTR_FINGER])
 		return -EINVAL;
 
-	if (!(nlh->nlmsg_flags & NLM_F_CREATE))
+	if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
 		return -EINVAL;
 
 	f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
@@ -325,7 +324,7 @@ static int nfnl_osf_add_callback(struct net *net, struct sock *ctnl,
 		kfree(kf);
 		kf = NULL;
 
-		if (nlh->nlmsg_flags & NLM_F_EXCL)
+		if (info->nlh->nlmsg_flags & NLM_F_EXCL)
 			err = -EEXIST;
 		break;
 	}
@@ -339,11 +338,9 @@ static int nfnl_osf_add_callback(struct net *net, struct sock *ctnl,
 	return err;
 }
 
-static int nfnl_osf_remove_callback(struct net *net, struct sock *ctnl,
-				    struct sk_buff *skb,
-				    const struct nlmsghdr *nlh,
-				    const struct nlattr * const osf_attrs[],
-				    struct netlink_ext_ack *extack)
+static int nfnl_osf_remove_callback(struct sk_buff *skb,
+				    const struct nfnl_info *info,
+				    const struct nlattr * const osf_attrs[])
 {
 	struct nf_osf_user_finger *f;
 	struct nf_osf_finger *sf;
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 37e81d895e61..9d7e06d85199 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1245,16 +1245,14 @@ static const struct nf_queue_handler nfqh = {
 	.nf_hook_drop	= nfqnl_nf_hook_drop,
 };
 
-static int nfqnl_recv_config(struct net *net, struct sock *ctnl,
-			     struct sk_buff *skb, const struct nlmsghdr *nlh,
-			     const struct nlattr * const nfqa[],
-			     struct netlink_ext_ack *extack)
+static int nfqnl_recv_config(struct sk_buff *skb, const struct nfnl_info *info,
+			     const struct nlattr * const nfqa[])
 {
-	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	struct nfnl_queue_net *q = nfnl_queue_pernet(info->net);
+	struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
 	u_int16_t queue_num = ntohs(nfmsg->res_id);
-	struct nfqnl_instance *queue;
 	struct nfqnl_msg_config_cmd *cmd = NULL;
-	struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+	struct nfqnl_instance *queue;
 	__u32 flags = 0, mask = 0;
 	int ret = 0;
 
-- 
cgit v1.2.3


From f3a95075549e0e5c36db922caf86847db7a35403 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 14 Apr 2021 21:51:41 +0200
Subject: bpf: Allow trampoline re-attach for tracing and lsm programs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently we don't allow re-attaching of trampolines. Once
it's detached, it can't be re-attach even when the program
is still loaded.

Adding the possibility to re-attach the loaded tracing and
lsm programs.

Fixing missing unlock with proper cleanup goto jump reported
by Julia.

Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Julia Lawall <julia.lawall@lip6.fr>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: KP Singh <kpsingh@kernel.org>
Link: https://lore.kernel.org/bpf/20210414195147.1624932-2-jolsa@kernel.org
---
 kernel/bpf/syscall.c    | 23 +++++++++++++++++------
 kernel/bpf/trampoline.c |  4 ++--
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index fd495190115e..941ca06d9dfa 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2648,14 +2648,25 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
 	 *   target_btf_id using the link_create API.
 	 *
 	 * - if tgt_prog == NULL when this function was called using the old
-         *   raw_tracepoint_open API, and we need a target from prog->aux
-         *
-         * The combination of no saved target in prog->aux, and no target
-         * specified on load is illegal, and we reject that here.
+	 *   raw_tracepoint_open API, and we need a target from prog->aux
+	 *
+	 * - if prog->aux->dst_trampoline and tgt_prog is NULL, the program
+	 *   was detached and is going for re-attachment.
 	 */
 	if (!prog->aux->dst_trampoline && !tgt_prog) {
-		err = -ENOENT;
-		goto out_unlock;
+		/*
+		 * Allow re-attach for TRACING and LSM programs. If it's
+		 * currently linked, bpf_trampoline_link_prog will fail.
+		 * EXT programs need to specify tgt_prog_fd, so they
+		 * re-attach in separate code path.
+		 */
+		if (prog->type != BPF_PROG_TYPE_TRACING &&
+		    prog->type != BPF_PROG_TYPE_LSM) {
+			err = -EINVAL;
+			goto out_unlock;
+		}
+		btf_id = prog->aux->attach_btf_id;
+		key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, btf_id);
 	}
 
 	if (!prog->aux->dst_trampoline ||
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 4aa8b52adf25..2d44b5aa0057 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -444,7 +444,7 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
 	tr->progs_cnt[kind]++;
 	err = bpf_trampoline_update(tr);
 	if (err) {
-		hlist_del(&prog->aux->tramp_hlist);
+		hlist_del_init(&prog->aux->tramp_hlist);
 		tr->progs_cnt[kind]--;
 	}
 out:
@@ -467,7 +467,7 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
 		tr->extension_prog = NULL;
 		goto out;
 	}
-	hlist_del(&prog->aux->tramp_hlist);
+	hlist_del_init(&prog->aux->tramp_hlist);
 	tr->progs_cnt[kind]--;
 	err = bpf_trampoline_update(tr);
 out:
-- 
cgit v1.2.3


From 56dda5a48f4f610ac9a0487c6fb64d31950e4a3e Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 14 Apr 2021 21:51:43 +0200
Subject: selftests/bpf: Add re-attach test to fentry_test

Adding the test to re-attach (detach/attach again) tracing
fentry programs, plus check that already linked program can't
be attached again.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210414195147.1624932-4-jolsa@kernel.org
---
 .../testing/selftests/bpf/prog_tests/fentry_test.c | 52 +++++++++++++++-------
 1 file changed, 37 insertions(+), 15 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
index 04ebbf1cb390..7cb111b11995 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
@@ -3,35 +3,57 @@
 #include <test_progs.h>
 #include "fentry_test.skel.h"
 
-void test_fentry_test(void)
+static int fentry_test(struct fentry_test *fentry_skel)
 {
-	struct fentry_test *fentry_skel = NULL;
 	int err, prog_fd, i;
 	__u32 duration = 0, retval;
+	struct bpf_link *link;
 	__u64 *result;
 
-	fentry_skel = fentry_test__open_and_load();
-	if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n"))
-		goto cleanup;
-
 	err = fentry_test__attach(fentry_skel);
-	if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err))
-		goto cleanup;
+	if (!ASSERT_OK(err, "fentry_attach"))
+		return err;
+
+	/* Check that already linked program can't be attached again. */
+	link = bpf_program__attach(fentry_skel->progs.test1);
+	if (!ASSERT_ERR_PTR(link, "fentry_attach_link"))
+		return -1;
 
 	prog_fd = bpf_program__fd(fentry_skel->progs.test1);
 	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
 				NULL, NULL, &retval, &duration);
-	CHECK(err || retval, "test_run",
-	      "err %d errno %d retval %d duration %d\n",
-	      err, errno, retval, duration);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(retval, 0, "test_run");
 
 	result = (__u64 *)fentry_skel->bss;
-	for (i = 0; i < 6; i++) {
-		if (CHECK(result[i] != 1, "result",
-			  "fentry_test%d failed err %lld\n", i + 1, result[i]))
-			goto cleanup;
+	for (i = 0; i < sizeof(*fentry_skel->bss) / sizeof(__u64); i++) {
+		if (!ASSERT_EQ(result[i], 1, "fentry_result"))
+			return -1;
 	}
 
+	fentry_test__detach(fentry_skel);
+
+	/* zero results for re-attach test */
+	memset(fentry_skel->bss, 0, sizeof(*fentry_skel->bss));
+	return 0;
+}
+
+void test_fentry_test(void)
+{
+	struct fentry_test *fentry_skel = NULL;
+	int err;
+
+	fentry_skel = fentry_test__open_and_load();
+	if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load"))
+		goto cleanup;
+
+	err = fentry_test(fentry_skel);
+	if (!ASSERT_OK(err, "fentry_first_attach"))
+		goto cleanup;
+
+	err = fentry_test(fentry_skel);
+	ASSERT_OK(err, "fentry_second_attach");
+
 cleanup:
 	fentry_test__destroy(fentry_skel);
 }
-- 
cgit v1.2.3


From 8caadc43f2019caebbf314f7a6ae2faed791e783 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 14 Apr 2021 21:51:44 +0200
Subject: selftests/bpf: Add re-attach test to fexit_test

Adding the test to re-attach (detach/attach again) tracing
fexit programs, plus check that already linked program can't
be attached again.

Also switching to ASSERT* macros.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210414195147.1624932-5-jolsa@kernel.org
---
 .../testing/selftests/bpf/prog_tests/fexit_test.c  | 52 +++++++++++++++-------
 1 file changed, 37 insertions(+), 15 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
index 78d7a2765c27..6792e41f7f69 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
@@ -3,35 +3,57 @@
 #include <test_progs.h>
 #include "fexit_test.skel.h"
 
-void test_fexit_test(void)
+static int fexit_test(struct fexit_test *fexit_skel)
 {
-	struct fexit_test *fexit_skel = NULL;
 	int err, prog_fd, i;
 	__u32 duration = 0, retval;
+	struct bpf_link *link;
 	__u64 *result;
 
-	fexit_skel = fexit_test__open_and_load();
-	if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n"))
-		goto cleanup;
-
 	err = fexit_test__attach(fexit_skel);
-	if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
-		goto cleanup;
+	if (!ASSERT_OK(err, "fexit_attach"))
+		return err;
+
+	/* Check that already linked program can't be attached again. */
+	link = bpf_program__attach(fexit_skel->progs.test1);
+	if (!ASSERT_ERR_PTR(link, "fexit_attach_link"))
+		return -1;
 
 	prog_fd = bpf_program__fd(fexit_skel->progs.test1);
 	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
 				NULL, NULL, &retval, &duration);
-	CHECK(err || retval, "test_run",
-	      "err %d errno %d retval %d duration %d\n",
-	      err, errno, retval, duration);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(retval, 0, "test_run");
 
 	result = (__u64 *)fexit_skel->bss;
-	for (i = 0; i < 6; i++) {
-		if (CHECK(result[i] != 1, "result",
-			  "fexit_test%d failed err %lld\n", i + 1, result[i]))
-			goto cleanup;
+	for (i = 0; i < sizeof(*fexit_skel->bss) / sizeof(__u64); i++) {
+		if (!ASSERT_EQ(result[i], 1, "fexit_result"))
+			return -1;
 	}
 
+	fexit_test__detach(fexit_skel);
+
+	/* zero results for re-attach test */
+	memset(fexit_skel->bss, 0, sizeof(*fexit_skel->bss));
+	return 0;
+}
+
+void test_fexit_test(void)
+{
+	struct fexit_test *fexit_skel = NULL;
+	int err;
+
+	fexit_skel = fexit_test__open_and_load();
+	if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load"))
+		goto cleanup;
+
+	err = fexit_test(fexit_skel);
+	if (!ASSERT_OK(err, "fexit_first_attach"))
+		goto cleanup;
+
+	err = fexit_test(fexit_skel);
+	ASSERT_OK(err, "fexit_second_attach");
+
 cleanup:
 	fexit_test__destroy(fexit_skel);
 }
-- 
cgit v1.2.3


From cede72ad367a105852e814ef91717aac4383b853 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 14 Apr 2021 21:51:45 +0200
Subject: selftests/bpf: Add re-attach test to lsm test

Adding the test to re-attach (detach/attach again) lsm programs,
plus check that already linked program can't be attached again.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210414195147.1624932-6-jolsa@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/test_lsm.c | 48 ++++++++++++++++++-----
 1 file changed, 38 insertions(+), 10 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
index 2755e4f81499..d492e76e01cf 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
@@ -18,6 +18,8 @@ char *CMD_ARGS[] = {"true", NULL};
 #define GET_PAGE_ADDR(ADDR, PAGE_SIZE)					\
 	(char *)(((unsigned long) (ADDR + PAGE_SIZE)) & ~(PAGE_SIZE-1))
 
+static int duration = 0;
+
 int stack_mprotect(void)
 {
 	void *buf;
@@ -51,23 +53,25 @@ int exec_cmd(int *monitored_pid)
 	return -EINVAL;
 }
 
-void test_test_lsm(void)
+static int test_lsm(struct lsm *skel)
 {
-	struct lsm *skel = NULL;
-	int err, duration = 0;
+	struct bpf_link *link;
 	int buf = 1234;
-
-	skel = lsm__open_and_load();
-	if (CHECK(!skel, "skel_load", "lsm skeleton failed\n"))
-		goto close_prog;
+	int err;
 
 	err = lsm__attach(skel);
 	if (CHECK(err, "attach", "lsm attach failed: %d\n", err))
-		goto close_prog;
+		return err;
+
+	/* Check that already linked program can't be attached again. */
+	link = bpf_program__attach(skel->progs.test_int_hook);
+	if (CHECK(!IS_ERR(link), "attach_link",
+		  "re-attach without detach should not succeed"))
+		return -1;
 
 	err = exec_cmd(&skel->bss->monitored_pid);
 	if (CHECK(err < 0, "exec_cmd", "err %d errno %d\n", err, errno))
-		goto close_prog;
+		return err;
 
 	CHECK(skel->bss->bprm_count != 1, "bprm_count", "bprm_count = %d\n",
 	      skel->bss->bprm_count);
@@ -77,7 +81,7 @@ void test_test_lsm(void)
 	err = stack_mprotect();
 	if (CHECK(errno != EPERM, "stack_mprotect", "want err=EPERM, got %d\n",
 		  errno))
-		goto close_prog;
+		return err;
 
 	CHECK(skel->bss->mprotect_count != 1, "mprotect_count",
 	      "mprotect_count = %d\n", skel->bss->mprotect_count);
@@ -89,6 +93,30 @@ void test_test_lsm(void)
 	CHECK(skel->bss->copy_test != 3, "copy_test",
 	      "copy_test = %d\n", skel->bss->copy_test);
 
+	lsm__detach(skel);
+
+	skel->bss->copy_test = 0;
+	skel->bss->bprm_count = 0;
+	skel->bss->mprotect_count = 0;
+	return 0;
+}
+
+void test_test_lsm(void)
+{
+	struct lsm *skel = NULL;
+	int err;
+
+	skel = lsm__open_and_load();
+	if (CHECK(!skel, "lsm_skel_load", "lsm skeleton failed\n"))
+		goto close_prog;
+
+	err = test_lsm(skel);
+	if (CHECK(err, "test_lsm", "first attach failed\n"))
+		goto close_prog;
+
+	err = test_lsm(skel);
+	CHECK(err, "test_lsm", "second attach failed\n");
+
 close_prog:
 	lsm__destroy(skel);
 }
-- 
cgit v1.2.3


From a1c05c3b09e0a92b26b94650837bf06c664beb1b Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 14 Apr 2021 21:51:46 +0200
Subject: selftests/bpf: Test that module can't be unloaded with attached
 trampoline

Adding test to verify that once we attach module's trampoline,
the module can't be unloaded.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210414195147.1624932-7-jolsa@kernel.org
---
 .../selftests/bpf/prog_tests/module_attach.c       | 23 ++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c
index 5bc53d53d86e..d85a69b7ce44 100644
--- a/tools/testing/selftests/bpf/prog_tests/module_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c
@@ -45,12 +45,18 @@ static int trigger_module_test_write(int write_sz)
 	return 0;
 }
 
+static int delete_module(const char *name, int flags)
+{
+	return syscall(__NR_delete_module, name, flags);
+}
+
 void test_module_attach(void)
 {
 	const int READ_SZ = 456;
 	const int WRITE_SZ = 457;
 	struct test_module_attach* skel;
 	struct test_module_attach__bss *bss;
+	struct bpf_link *link;
 	int err;
 
 	skel = test_module_attach__open();
@@ -84,6 +90,23 @@ void test_module_attach(void)
 	ASSERT_EQ(bss->fexit_ret, -EIO, "fexit_tet");
 	ASSERT_EQ(bss->fmod_ret_read_sz, READ_SZ, "fmod_ret");
 
+	test_module_attach__detach(skel);
+
+	/* attach fentry/fexit and make sure it get's module reference */
+	link = bpf_program__attach(skel->progs.handle_fentry);
+	if (!ASSERT_OK_PTR(link, "attach_fentry"))
+		goto cleanup;
+
+	ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module");
+	bpf_link__destroy(link);
+
+	link = bpf_program__attach(skel->progs.handle_fexit);
+	if (!ASSERT_OK_PTR(link, "attach_fexit"))
+		goto cleanup;
+
+	ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module");
+	bpf_link__destroy(link);
+
 cleanup:
 	test_module_attach__destroy(skel);
 }
-- 
cgit v1.2.3


From 7bb2cc19aee8f7150851bb8668c9ff655a5e7678 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 14 Apr 2021 21:51:47 +0200
Subject: selftests/bpf: Use ASSERT macros in lsm test

Replacing CHECK with ASSERT macros.

Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210414195147.1624932-8-jolsa@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/test_lsm.c | 27 +++++++++--------------
 1 file changed, 10 insertions(+), 17 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
index d492e76e01cf..244c01125126 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
@@ -18,8 +18,6 @@ char *CMD_ARGS[] = {"true", NULL};
 #define GET_PAGE_ADDR(ADDR, PAGE_SIZE)					\
 	(char *)(((unsigned long) (ADDR + PAGE_SIZE)) & ~(PAGE_SIZE-1))
 
-static int duration = 0;
-
 int stack_mprotect(void)
 {
 	void *buf;
@@ -60,38 +58,33 @@ static int test_lsm(struct lsm *skel)
 	int err;
 
 	err = lsm__attach(skel);
-	if (CHECK(err, "attach", "lsm attach failed: %d\n", err))
+	if (!ASSERT_OK(err, "attach"))
 		return err;
 
 	/* Check that already linked program can't be attached again. */
 	link = bpf_program__attach(skel->progs.test_int_hook);
-	if (CHECK(!IS_ERR(link), "attach_link",
-		  "re-attach without detach should not succeed"))
+	if (!ASSERT_ERR_PTR(link, "attach_link"))
 		return -1;
 
 	err = exec_cmd(&skel->bss->monitored_pid);
-	if (CHECK(err < 0, "exec_cmd", "err %d errno %d\n", err, errno))
+	if (!ASSERT_OK(err, "exec_cmd"))
 		return err;
 
-	CHECK(skel->bss->bprm_count != 1, "bprm_count", "bprm_count = %d\n",
-	      skel->bss->bprm_count);
+	ASSERT_EQ(skel->bss->bprm_count, 1, "bprm_count");
 
 	skel->bss->monitored_pid = getpid();
 
 	err = stack_mprotect();
-	if (CHECK(errno != EPERM, "stack_mprotect", "want err=EPERM, got %d\n",
-		  errno))
+	if (!ASSERT_EQ(errno, EPERM, "stack_mprotect"))
 		return err;
 
-	CHECK(skel->bss->mprotect_count != 1, "mprotect_count",
-	      "mprotect_count = %d\n", skel->bss->mprotect_count);
+	ASSERT_EQ(skel->bss->mprotect_count, 1, "mprotect_count");
 
 	syscall(__NR_setdomainname, &buf, -2L);
 	syscall(__NR_setdomainname, 0, -3L);
 	syscall(__NR_setdomainname, ~0L, -4L);
 
-	CHECK(skel->bss->copy_test != 3, "copy_test",
-	      "copy_test = %d\n", skel->bss->copy_test);
+	ASSERT_EQ(skel->bss->copy_test, 3, "copy_test");
 
 	lsm__detach(skel);
 
@@ -107,15 +100,15 @@ void test_test_lsm(void)
 	int err;
 
 	skel = lsm__open_and_load();
-	if (CHECK(!skel, "lsm_skel_load", "lsm skeleton failed\n"))
+	if (!ASSERT_OK_PTR(skel, "lsm_skel_load"))
 		goto close_prog;
 
 	err = test_lsm(skel);
-	if (CHECK(err, "test_lsm", "first attach failed\n"))
+	if (!ASSERT_OK(err, "test_lsm_first_attach"))
 		goto close_prog;
 
 	err = test_lsm(skel);
-	CHECK(err, "test_lsm", "second attach failed\n");
+	ASSERT_OK(err, "test_lsm_second_attach");
 
 close_prog:
 	lsm__destroy(skel);
-- 
cgit v1.2.3


From 797d49805ddc6595b2fafe3e9ceff7f562be1f2c Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 23 Apr 2021 00:17:10 +0200
Subject: netfilter: nfnetlink: pass struct nfnl_info to rcu callbacks

Update rcu callbacks to use the nfnl_info structure.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nfnetlink.h |   6 +-
 net/netfilter/nf_tables_api.c       | 152 ++++++++++++++++++------------------
 net/netfilter/nfnetlink.c           |   5 +-
 net/netfilter/nfnetlink_queue.c     |  40 ++++------
 net/netfilter/nft_compat.c          |  24 +++---
 5 files changed, 107 insertions(+), 120 deletions(-)

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 1baa3205b199..c11f2f99eac4 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -17,10 +17,8 @@ struct nfnl_info {
 struct nfnl_callback {
 	int (*call)(struct sk_buff *skb, const struct nfnl_info *info,
 		    const struct nlattr * const cda[]);
-	int (*call_rcu)(struct net *net, struct sock *nl, struct sk_buff *skb,
-			const struct nlmsghdr *nlh,
-			const struct nlattr * const cda[],
-			struct netlink_ext_ack *extack);
+	int (*call_rcu)(struct sk_buff *skb, const struct nfnl_info *info,
+			const struct nlattr * const cda[]);
 	int (*call_batch)(struct net *net, struct sock *nl, struct sk_buff *skb,
 			  const struct nlmsghdr *nlh,
 			  const struct nlattr * const cda[],
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 155b85553fcc..f7c4e6f14130 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -858,25 +858,25 @@ static int nft_netlink_dump_start_rcu(struct sock *nlsk, struct sk_buff *skb,
 }
 
 /* called with rcu_read_lock held */
-static int nf_tables_gettable(struct net *net, struct sock *nlsk,
-			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nla[],
-			      struct netlink_ext_ack *extack)
+static int nf_tables_gettable(struct sk_buff *skb, const struct nfnl_info *info,
+			      const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	u8 genmask = nft_genmask_cur(net);
+	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_cur(info->net);
+	int family = nfmsg->nfgen_family;
 	const struct nft_table *table;
+	struct net *net = info->net;
 	struct sk_buff *skb2;
-	int family = nfmsg->nfgen_family;
 	int err;
 
-	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.dump = nf_tables_dump_tables,
 			.module = THIS_MODULE,
 		};
 
-		return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
+		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
 	}
 
 	table = nft_table_lookup(net, nla[NFTA_TABLE_NAME], family, genmask, 0);
@@ -890,8 +890,8 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk,
 		return -ENOMEM;
 
 	err = nf_tables_fill_table_info(skb2, net, NETLINK_CB(skb).portid,
-					nlh->nlmsg_seq, NFT_MSG_NEWTABLE, 0,
-					family, table);
+					info->nlh->nlmsg_seq, NFT_MSG_NEWTABLE,
+					0, family, table);
 	if (err < 0)
 		goto err_fill_table_info;
 
@@ -1623,26 +1623,26 @@ done:
 }
 
 /* called with rcu_read_lock held */
-static int nf_tables_getchain(struct net *net, struct sock *nlsk,
-			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nla[],
-			      struct netlink_ext_ack *extack)
+static int nf_tables_getchain(struct sk_buff *skb, const struct nfnl_info *info,
+			      const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	u8 genmask = nft_genmask_cur(net);
+	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_cur(info->net);
+	int family = nfmsg->nfgen_family;
 	const struct nft_chain *chain;
+	struct net *net = info->net;
 	struct nft_table *table;
 	struct sk_buff *skb2;
-	int family = nfmsg->nfgen_family;
 	int err;
 
-	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.dump = nf_tables_dump_chains,
 			.module = THIS_MODULE,
 		};
 
-		return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
+		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
 	}
 
 	table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask, 0);
@@ -1662,8 +1662,8 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
 		return -ENOMEM;
 
 	err = nf_tables_fill_chain_info(skb2, net, NETLINK_CB(skb).portid,
-					nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, 0,
-					family, table, chain);
+					info->nlh->nlmsg_seq, NFT_MSG_NEWCHAIN,
+					0, family, table, chain);
 	if (err < 0)
 		goto err_fill_chain_info;
 
@@ -3076,21 +3076,21 @@ static int nf_tables_dump_rules_done(struct netlink_callback *cb)
 }
 
 /* called with rcu_read_lock held */
-static int nf_tables_getrule(struct net *net, struct sock *nlsk,
-			     struct sk_buff *skb, const struct nlmsghdr *nlh,
-			     const struct nlattr * const nla[],
-			     struct netlink_ext_ack *extack)
+static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
+			     const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	u8 genmask = nft_genmask_cur(net);
+	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_cur(info->net);
+	int family = nfmsg->nfgen_family;
 	const struct nft_chain *chain;
 	const struct nft_rule *rule;
+	struct net *net = info->net;
 	struct nft_table *table;
 	struct sk_buff *skb2;
-	int family = nfmsg->nfgen_family;
 	int err;
 
-	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.start= nf_tables_dump_rules_start,
 			.dump = nf_tables_dump_rules,
@@ -3099,7 +3099,7 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
 			.data = (void *)nla,
 		};
 
-		return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
+		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
 	}
 
 	table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask, 0);
@@ -3125,7 +3125,7 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
 		return -ENOMEM;
 
 	err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid,
-				       nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
+				       info->nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
 				       family, table, chain, rule, NULL);
 	if (err < 0)
 		goto err_fill_rule_info;
@@ -4045,25 +4045,25 @@ static int nf_tables_dump_sets_done(struct netlink_callback *cb)
 }
 
 /* called with rcu_read_lock held */
-static int nf_tables_getset(struct net *net, struct sock *nlsk,
-			    struct sk_buff *skb, const struct nlmsghdr *nlh,
-			    const struct nlattr * const nla[],
-			    struct netlink_ext_ack *extack)
+static int nf_tables_getset(struct sk_buff *skb, const struct nfnl_info *info,
+			    const struct nlattr * const nla[])
 {
-	u8 genmask = nft_genmask_cur(net);
+	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_cur(info->net);
+	struct net *net = info->net;
 	const struct nft_set *set;
-	struct nft_ctx ctx;
 	struct sk_buff *skb2;
-	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	struct nft_ctx ctx;
 	int err;
 
 	/* Verify existence before starting dump */
-	err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, extack,
+	err = nft_ctx_init_from_setattr(&ctx, net, skb, info->nlh, nla, extack,
 					genmask, 0);
 	if (err < 0)
 		return err;
 
-	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.start = nf_tables_dump_sets_start,
 			.dump = nf_tables_dump_sets,
@@ -4072,7 +4072,7 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk,
 			.module = THIS_MODULE,
 		};
 
-		return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
+		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
 	}
 
 	/* Only accept unspec with dump */
@@ -5063,18 +5063,19 @@ err_fill_setelem:
 }
 
 /* called with rcu_read_lock held */
-static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
-				struct sk_buff *skb, const struct nlmsghdr *nlh,
-				const struct nlattr * const nla[],
-				struct netlink_ext_ack *extack)
+static int nf_tables_getsetelem(struct sk_buff *skb,
+				const struct nfnl_info *info,
+				const struct nlattr * const nla[])
 {
-	u8 genmask = nft_genmask_cur(net);
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_cur(info->net);
+	struct net *net = info->net;
 	struct nft_set *set;
 	struct nlattr *attr;
 	struct nft_ctx ctx;
 	int rem, err = 0;
 
-	err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, extack,
+	err = nft_ctx_init_from_elemattr(&ctx, net, skb, info->nlh, nla, extack,
 					 genmask, NETLINK_CB(skb).portid);
 	if (err < 0)
 		return err;
@@ -5083,7 +5084,7 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
 	if (IS_ERR(set))
 		return PTR_ERR(set);
 
-	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.start = nf_tables_dump_set_start,
 			.dump = nf_tables_dump_set,
@@ -5096,7 +5097,7 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
 		};
 
 		c.data = &dump_ctx;
-		return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
+		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
 	}
 
 	if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS])
@@ -6416,22 +6417,22 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
 }
 
 /* called with rcu_read_lock held */
-static int nf_tables_getobj(struct net *net, struct sock *nlsk,
-			    struct sk_buff *skb, const struct nlmsghdr *nlh,
-			    const struct nlattr * const nla[],
-			    struct netlink_ext_ack *extack)
+static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
+			    const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	u8 genmask = nft_genmask_cur(net);
+	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_cur(info->net);
 	int family = nfmsg->nfgen_family;
 	const struct nft_table *table;
+	struct net *net = info->net;
 	struct nft_object *obj;
 	struct sk_buff *skb2;
 	bool reset = false;
 	u32 objtype;
 	int err;
 
-	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.start = nf_tables_dump_obj_start,
 			.dump = nf_tables_dump_obj,
@@ -6440,7 +6441,7 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
 			.data = (void *)nla,
 		};
 
-		return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
+		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
 	}
 
 	if (!nla[NFTA_OBJ_NAME] ||
@@ -6464,7 +6465,7 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
 	if (!skb2)
 		return -ENOMEM;
 
-	if (NFNL_MSG_TYPE(nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
+	if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
 		reset = true;
 
 	if (reset) {
@@ -6483,7 +6484,7 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
 	}
 
 	err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid,
-				      nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0,
+				      info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0,
 				      family, table, obj, reset);
 	if (err < 0)
 		goto err_fill_obj_info;
@@ -7320,21 +7321,20 @@ static int nf_tables_dump_flowtable_done(struct netlink_callback *cb)
 }
 
 /* called with rcu_read_lock held */
-static int nf_tables_getflowtable(struct net *net, struct sock *nlsk,
-				  struct sk_buff *skb,
-				  const struct nlmsghdr *nlh,
-				  const struct nlattr * const nla[],
-				  struct netlink_ext_ack *extack)
+static int nf_tables_getflowtable(struct sk_buff *skb,
+				  const struct nfnl_info *info,
+				  const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	u8 genmask = nft_genmask_cur(net);
+	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+	u8 genmask = nft_genmask_cur(info->net);
 	int family = nfmsg->nfgen_family;
 	struct nft_flowtable *flowtable;
 	const struct nft_table *table;
+	struct net *net = info->net;
 	struct sk_buff *skb2;
 	int err;
 
-	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+	if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.start = nf_tables_dump_flowtable_start,
 			.dump = nf_tables_dump_flowtable,
@@ -7343,7 +7343,7 @@ static int nf_tables_getflowtable(struct net *net, struct sock *nlsk,
 			.data = (void *)nla,
 		};
 
-		return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
+		return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
 	}
 
 	if (!nla[NFTA_FLOWTABLE_NAME])
@@ -7364,7 +7364,7 @@ static int nf_tables_getflowtable(struct net *net, struct sock *nlsk,
 		return -ENOMEM;
 
 	err = nf_tables_fill_flowtable_info(skb2, net, NETLINK_CB(skb).portid,
-					    nlh->nlmsg_seq,
+					    info->nlh->nlmsg_seq,
 					    NFT_MSG_NEWFLOWTABLE, 0, family,
 					    flowtable, &flowtable->hook_list);
 	if (err < 0)
@@ -7526,10 +7526,8 @@ err:
 			  -ENOBUFS);
 }
 
-static int nf_tables_getgen(struct net *net, struct sock *nlsk,
-			    struct sk_buff *skb, const struct nlmsghdr *nlh,
-			    const struct nlattr * const nla[],
-			    struct netlink_ext_ack *extack)
+static int nf_tables_getgen(struct sk_buff *skb, const struct nfnl_info *info,
+			    const struct nlattr * const nla[])
 {
 	struct sk_buff *skb2;
 	int err;
@@ -7538,12 +7536,12 @@ static int nf_tables_getgen(struct net *net, struct sock *nlsk,
 	if (skb2 == NULL)
 		return -ENOMEM;
 
-	err = nf_tables_fill_gen_info(skb2, net, NETLINK_CB(skb).portid,
-				      nlh->nlmsg_seq);
+	err = nf_tables_fill_gen_info(skb2, info->net, NETLINK_CB(skb).portid,
+				      info->nlh->nlmsg_seq);
 	if (err < 0)
 		goto err_fill_gen_info;
 
-	return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
+	return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
 
 err_fill_gen_info:
 	kfree_skb(skb2);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 5f04b67bf47e..7920f6c4ff69 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -274,9 +274,8 @@ replay:
 		}
 
 		if (nc->call_rcu) {
-			err = nc->call_rcu(net, nfnlnet->nfnl, skb, nlh,
-					   (const struct nlattr **)cda,
-					   extack);
+			err = nc->call_rcu(skb, &info,
+					   (const struct nlattr **)cda);
 			rcu_read_unlock();
 		} else {
 			rcu_read_unlock();
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 9d7e06d85199..ede9252c8de1 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1046,20 +1046,18 @@ static int nfq_id_after(unsigned int id, unsigned int max)
 	return (int)(id - max) > 0;
 }
 
-static int nfqnl_recv_verdict_batch(struct net *net, struct sock *ctnl,
-				    struct sk_buff *skb,
-				    const struct nlmsghdr *nlh,
-			            const struct nlattr * const nfqa[],
-				    struct netlink_ext_ack *extack)
+static int nfqnl_recv_verdict_batch(struct sk_buff *skb,
+				    const struct nfnl_info *info,
+				    const struct nlattr * const nfqa[])
 {
-	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	struct nfnl_queue_net *q = nfnl_queue_pernet(info->net);
+	struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+	u16 queue_num = ntohs(nfmsg->res_id);
 	struct nf_queue_entry *entry, *tmp;
-	unsigned int verdict, maxid;
 	struct nfqnl_msg_verdict_hdr *vhdr;
 	struct nfqnl_instance *queue;
+	unsigned int verdict, maxid;
 	LIST_HEAD(batch_list);
-	u16 queue_num = ntohs(nfmsg->res_id);
-	struct nfnl_queue_net *q = nfnl_queue_pernet(net);
 
 	queue = verdict_instance_lookup(q, queue_num,
 					NETLINK_CB(skb).portid);
@@ -1158,22 +1156,19 @@ static int nfqa_parse_bridge(struct nf_queue_entry *entry,
 	return 0;
 }
 
-static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl,
-			      struct sk_buff *skb,
-			      const struct nlmsghdr *nlh,
-			      const struct nlattr * const nfqa[],
-			      struct netlink_ext_ack *extack)
+static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info,
+			      const struct nlattr * const nfqa[])
 {
-	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	struct nfnl_queue_net *q = nfnl_queue_pernet(info->net);
+	struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
 	u_int16_t queue_num = ntohs(nfmsg->res_id);
 	struct nfqnl_msg_verdict_hdr *vhdr;
+	enum ip_conntrack_info ctinfo;
 	struct nfqnl_instance *queue;
-	unsigned int verdict;
 	struct nf_queue_entry *entry;
-	enum ip_conntrack_info ctinfo;
 	struct nfnl_ct_hook *nfnl_ct;
 	struct nf_conn *ct = NULL;
-	struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+	unsigned int verdict;
 	int err;
 
 	queue = verdict_instance_lookup(q, queue_num,
@@ -1196,7 +1191,8 @@ static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl,
 
 	if (nfqa[NFQA_CT]) {
 		if (nfnl_ct != NULL)
-			ct = nfqnl_ct_parse(nfnl_ct, nlh, nfqa, entry, &ctinfo);
+			ct = nfqnl_ct_parse(nfnl_ct, info->nlh, nfqa, entry,
+					    &ctinfo);
 	}
 
 	if (entry->state.pf == PF_BRIDGE) {
@@ -1224,10 +1220,8 @@ static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl,
 	return 0;
 }
 
-static int nfqnl_recv_unsupp(struct net *net, struct sock *ctnl,
-			     struct sk_buff *skb, const struct nlmsghdr *nlh,
-			     const struct nlattr * const nfqa[],
-			     struct netlink_ext_ack *extack)
+static int nfqnl_recv_unsupp(struct sk_buff *skb, const struct nfnl_info *info,
+			     const struct nlattr * const cda[])
 {
 	return -ENOTSUPP;
 }
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index b8dbd20a6a4c..4c0657245d5a 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -613,17 +613,15 @@ nla_put_failure:
 	return -1;
 }
 
-static int nfnl_compat_get_rcu(struct net *net, struct sock *nfnl,
-			       struct sk_buff *skb, const struct nlmsghdr *nlh,
-			       const struct nlattr * const tb[],
-			       struct netlink_ext_ack *extack)
+static int nfnl_compat_get_rcu(struct sk_buff *skb,
+			       const struct nfnl_info *info,
+			       const struct nlattr * const tb[])
 {
-	int ret = 0, target;
 	struct nfgenmsg *nfmsg;
-	const char *fmt;
-	const char *name;
-	u32 rev;
+	const char *name, *fmt;
 	struct sk_buff *skb2;
+	int ret = 0, target;
+	u32 rev;
 
 	if (tb[NFTA_COMPAT_NAME] == NULL ||
 	    tb[NFTA_COMPAT_REV] == NULL ||
@@ -634,7 +632,7 @@ static int nfnl_compat_get_rcu(struct net *net, struct sock *nfnl,
 	rev = ntohl(nla_get_be32(tb[NFTA_COMPAT_REV]));
 	target = ntohl(nla_get_be32(tb[NFTA_COMPAT_TYPE]));
 
-	nfmsg = nlmsg_data(nlh);
+	nfmsg = nlmsg_data(info->nlh);
 
 	switch(nfmsg->nfgen_family) {
 	case AF_INET:
@@ -673,8 +671,8 @@ static int nfnl_compat_get_rcu(struct net *net, struct sock *nfnl,
 
 	/* include the best revision for this extension in the message */
 	if (nfnl_compat_fill_info(skb2, NETLINK_CB(skb).portid,
-				  nlh->nlmsg_seq,
-				  NFNL_MSG_TYPE(nlh->nlmsg_type),
+				  info->nlh->nlmsg_seq,
+				  NFNL_MSG_TYPE(info->nlh->nlmsg_type),
 				  NFNL_MSG_COMPAT_GET,
 				  nfmsg->nfgen_family,
 				  name, ret, target) <= 0) {
@@ -682,8 +680,8 @@ static int nfnl_compat_get_rcu(struct net *net, struct sock *nfnl,
 		goto out_put;
 	}
 
-	ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
-				MSG_DONTWAIT);
+	ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
+			      MSG_DONTWAIT);
 	if (ret > 0)
 		ret = 0;
 out_put:
-- 
cgit v1.2.3


From 7dab8ee3b6e7ec856a616d07ebb9ebd736c92520 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 23 Apr 2021 00:17:11 +0200
Subject: netfilter: nfnetlink: pass struct nfnl_info to batch callbacks

Update batch callbacks to use the nfnl_info structure. Rename one
clashing info variable to expr_info.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nfnetlink.h |   6 +-
 net/netfilter/nf_tables_api.c       | 338 ++++++++++++++++++------------------
 net/netfilter/nfnetlink.c           |  14 +-
 3 files changed, 182 insertions(+), 176 deletions(-)

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index c11f2f99eac4..df0e3254c57b 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -19,10 +19,8 @@ struct nfnl_callback {
 		    const struct nlattr * const cda[]);
 	int (*call_rcu)(struct sk_buff *skb, const struct nfnl_info *info,
 			const struct nlattr * const cda[]);
-	int (*call_batch)(struct net *net, struct sock *nl, struct sk_buff *skb,
-			  const struct nlmsghdr *nlh,
-			  const struct nlattr * const cda[],
-			  struct netlink_ext_ack *extack);
+	int (*call_batch)(struct sk_buff *skb, const struct nfnl_info *info,
+			  const struct nlattr * const cda[]);
 	const struct nla_policy *policy;	/* netlink attribute policy */
 	const u_int16_t attr_count;		/* number of nlattr's */
 };
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index f7c4e6f14130..280ca136df56 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1055,15 +1055,15 @@ static int nft_objname_hash_cmp(struct rhashtable_compare_arg *arg,
 	return strcmp(obj->key.name, k->name);
 }
 
-static int nf_tables_newtable(struct net *net, struct sock *nlsk,
-			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nla[],
-			      struct netlink_ext_ack *extack)
+static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info,
+			      const struct nlattr * const nla[])
 {
-	struct nftables_pernet *nft_net = nft_pernet(net);
-	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	u8 genmask = nft_genmask_next(net);
+	struct nftables_pernet *nft_net = nft_pernet(info->net);
+	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_next(info->net);
 	int family = nfmsg->nfgen_family;
+	struct net *net = info->net;
 	const struct nlattr *attr;
 	struct nft_table *table;
 	struct nft_ctx ctx;
@@ -1078,14 +1078,15 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 		if (PTR_ERR(table) != -ENOENT)
 			return PTR_ERR(table);
 	} else {
-		if (nlh->nlmsg_flags & NLM_F_EXCL) {
+		if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
 			NL_SET_BAD_ATTR(extack, attr);
 			return -EEXIST;
 		}
-		if (nlh->nlmsg_flags & NLM_F_REPLACE)
+		if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
 			return -EOPNOTSUPP;
 
-		nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+		nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+
 		return nf_tables_updtable(&ctx);
 	}
 
@@ -1126,7 +1127,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 	if (table->flags & NFT_TABLE_F_OWNER)
 		table->nlpid = NETLINK_CB(skb).portid;
 
-	nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
 	err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
 	if (err < 0)
 		goto err_trans;
@@ -1250,19 +1251,19 @@ out:
 	return err;
 }
 
-static int nf_tables_deltable(struct net *net, struct sock *nlsk,
-			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nla[],
-			      struct netlink_ext_ack *extack)
+static int nf_tables_deltable(struct sk_buff *skb, const struct nfnl_info *info,
+			      const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	u8 genmask = nft_genmask_next(net);
+	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_next(info->net);
 	int family = nfmsg->nfgen_family;
+	struct net *net = info->net;
 	const struct nlattr *attr;
 	struct nft_table *table;
 	struct nft_ctx ctx;
 
-	nft_ctx_init(&ctx, net, skb, nlh, 0, NULL, NULL, nla);
+	nft_ctx_init(&ctx, net, skb, info->nlh, 0, NULL, NULL, nla);
 	if (family == AF_UNSPEC ||
 	    (!nla[NFTA_TABLE_NAME] && !nla[NFTA_TABLE_HANDLE]))
 		return nft_flush(&ctx, family);
@@ -1281,7 +1282,7 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk,
 		return PTR_ERR(table);
 	}
 
-	if (nlh->nlmsg_flags & NLM_F_NONREC &&
+	if (info->nlh->nlmsg_flags & NLM_F_NONREC &&
 	    table->use > 0)
 		return -EBUSY;
 
@@ -2350,16 +2351,16 @@ static struct nft_chain *nft_chain_lookup_byid(const struct net *net,
 	return ERR_PTR(-ENOENT);
 }
 
-static int nf_tables_newchain(struct net *net, struct sock *nlsk,
-			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nla[],
-			      struct netlink_ext_ack *extack)
+static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info,
+			      const struct nlattr * const nla[])
 {
-	struct nftables_pernet *nft_net = nft_pernet(net);
-	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	u8 genmask = nft_genmask_next(net);
+	struct nftables_pernet *nft_net = nft_pernet(info->net);
+	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_next(info->net);
 	int family = nfmsg->nfgen_family;
 	struct nft_chain *chain = NULL;
+	struct net *net = info->net;
 	const struct nlattr *attr;
 	struct nft_table *table;
 	u8 policy = NF_ACCEPT;
@@ -2431,14 +2432,14 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 	if (flags & ~NFT_CHAIN_FLAGS)
 		return -EOPNOTSUPP;
 
-	nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
 
 	if (chain != NULL) {
-		if (nlh->nlmsg_flags & NLM_F_EXCL) {
+		if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
 			NL_SET_BAD_ATTR(extack, attr);
 			return -EEXIST;
 		}
-		if (nlh->nlmsg_flags & NLM_F_REPLACE)
+		if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
 			return -EOPNOTSUPP;
 
 		flags |= chain->flags & NFT_CHAIN_BASE;
@@ -2449,14 +2450,14 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 	return nf_tables_addchain(&ctx, family, genmask, policy, flags);
 }
 
-static int nf_tables_delchain(struct net *net, struct sock *nlsk,
-			      struct sk_buff *skb, const struct nlmsghdr *nlh,
-			      const struct nlattr * const nla[],
-			      struct netlink_ext_ack *extack)
+static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info,
+			      const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	u8 genmask = nft_genmask_next(net);
+	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_next(info->net);
 	int family = nfmsg->nfgen_family;
+	struct net *net = info->net;
 	const struct nlattr *attr;
 	struct nft_table *table;
 	struct nft_chain *chain;
@@ -2486,11 +2487,11 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
 		return PTR_ERR(chain);
 	}
 
-	if (nlh->nlmsg_flags & NLM_F_NONREC &&
+	if (info->nlh->nlmsg_flags & NLM_F_NONREC &&
 	    chain->use > 0)
 		return -EBUSY;
 
-	nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
 
 	use = chain->use;
 	list_for_each_entry(rule, &chain->rules, list) {
@@ -2713,15 +2714,15 @@ err1:
 }
 
 static int nf_tables_newexpr(const struct nft_ctx *ctx,
-			     const struct nft_expr_info *info,
+			     const struct nft_expr_info *expr_info,
 			     struct nft_expr *expr)
 {
-	const struct nft_expr_ops *ops = info->ops;
+	const struct nft_expr_ops *ops = expr_info->ops;
 	int err;
 
 	expr->ops = ops;
 	if (ops->init) {
-		err = ops->init(ctx, expr, (const struct nlattr **)info->tb);
+		err = ops->init(ctx, expr, (const struct nlattr **)expr_info->tb);
 		if (err < 0)
 			goto err1;
 	}
@@ -2745,21 +2746,21 @@ static void nf_tables_expr_destroy(const struct nft_ctx *ctx,
 static struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
 				      const struct nlattr *nla)
 {
-	struct nft_expr_info info;
+	struct nft_expr_info expr_info;
 	struct nft_expr *expr;
 	struct module *owner;
 	int err;
 
-	err = nf_tables_expr_parse(ctx, nla, &info);
+	err = nf_tables_expr_parse(ctx, nla, &expr_info);
 	if (err < 0)
 		goto err1;
 
 	err = -ENOMEM;
-	expr = kzalloc(info.ops->size, GFP_KERNEL);
+	expr = kzalloc(expr_info.ops->size, GFP_KERNEL);
 	if (expr == NULL)
 		goto err2;
 
-	err = nf_tables_newexpr(ctx, &info, expr);
+	err = nf_tables_newexpr(ctx, &expr_info, expr);
 	if (err < 0)
 		goto err3;
 
@@ -2767,9 +2768,9 @@ static struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
 err3:
 	kfree(expr);
 err2:
-	owner = info.ops->type->owner;
-	if (info.ops->type->release_ops)
-		info.ops->type->release_ops(info.ops);
+	owner = expr_info.ops->type->owner;
+	if (expr_info.ops->type->release_ops)
+		expr_info.ops->type->release_ops(expr_info.ops);
 
 	module_put(owner);
 err1:
@@ -3216,28 +3217,28 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
 
 #define NFT_RULE_MAXEXPRS	128
 
-static int nf_tables_newrule(struct net *net, struct sock *nlsk,
-			     struct sk_buff *skb, const struct nlmsghdr *nlh,
-			     const struct nlattr * const nla[],
-			     struct netlink_ext_ack *extack)
+static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
+			     const struct nlattr * const nla[])
 {
-	struct nftables_pernet *nft_net = nft_pernet(net);
-	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	u8 genmask = nft_genmask_next(net);
-	struct nft_expr_info *info = NULL;
+	struct nftables_pernet *nft_net = nft_pernet(info->net);
+	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+	struct netlink_ext_ack *extack = info->extack;
+	unsigned int size, i, n, ulen = 0, usize = 0;
+	u8 genmask = nft_genmask_next(info->net);
+	struct nft_rule *rule, *old_rule = NULL;
+	struct nft_expr_info *expr_info = NULL;
 	int family = nfmsg->nfgen_family;
+	struct net *net = info->net;
 	struct nft_flow_rule *flow;
+	struct nft_userdata *udata;
 	struct nft_table *table;
 	struct nft_chain *chain;
-	struct nft_rule *rule, *old_rule = NULL;
-	struct nft_userdata *udata;
-	struct nft_trans *trans = NULL;
+	struct nft_trans *trans;
+	u64 handle, pos_handle;
 	struct nft_expr *expr;
 	struct nft_ctx ctx;
 	struct nlattr *tmp;
-	unsigned int size, i, n, ulen = 0, usize = 0;
 	int err, rem;
-	u64 handle, pos_handle;
 
 	lockdep_assert_held(&nft_net->commit_mutex);
 
@@ -3276,17 +3277,17 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 			return PTR_ERR(rule);
 		}
 
-		if (nlh->nlmsg_flags & NLM_F_EXCL) {
+		if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
 			NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
 			return -EEXIST;
 		}
-		if (nlh->nlmsg_flags & NLM_F_REPLACE)
+		if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
 			old_rule = rule;
 		else
 			return -EOPNOTSUPP;
 	} else {
-		if (!(nlh->nlmsg_flags & NLM_F_CREATE) ||
-		    nlh->nlmsg_flags & NLM_F_REPLACE)
+		if (!(info->nlh->nlmsg_flags & NLM_F_CREATE) ||
+		    info->nlh->nlmsg_flags & NLM_F_REPLACE)
 			return -EINVAL;
 		handle = nf_tables_alloc_handle(table);
 
@@ -3309,15 +3310,15 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 		}
 	}
 
-	nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
 
 	n = 0;
 	size = 0;
 	if (nla[NFTA_RULE_EXPRESSIONS]) {
-		info = kvmalloc_array(NFT_RULE_MAXEXPRS,
-				      sizeof(struct nft_expr_info),
-				      GFP_KERNEL);
-		if (!info)
+		expr_info = kvmalloc_array(NFT_RULE_MAXEXPRS,
+					   sizeof(struct nft_expr_info),
+					   GFP_KERNEL);
+		if (!expr_info)
 			return -ENOMEM;
 
 		nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) {
@@ -3326,10 +3327,10 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 				goto err1;
 			if (n == NFT_RULE_MAXEXPRS)
 				goto err1;
-			err = nf_tables_expr_parse(&ctx, tmp, &info[n]);
+			err = nf_tables_expr_parse(&ctx, tmp, &expr_info[n]);
 			if (err < 0)
 				goto err1;
-			size += info[n].ops->size;
+			size += expr_info[n].ops->size;
 			n++;
 		}
 	}
@@ -3363,20 +3364,20 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 
 	expr = nft_expr_first(rule);
 	for (i = 0; i < n; i++) {
-		err = nf_tables_newexpr(&ctx, &info[i], expr);
+		err = nf_tables_newexpr(&ctx, &expr_info[i], expr);
 		if (err < 0) {
-			NL_SET_BAD_ATTR(extack, info[i].attr);
+			NL_SET_BAD_ATTR(extack, expr_info[i].attr);
 			goto err2;
 		}
 
-		if (info[i].ops->validate)
+		if (expr_info[i].ops->validate)
 			nft_validate_state_update(net, NFT_VALIDATE_NEED);
 
-		info[i].ops = NULL;
+		expr_info[i].ops = NULL;
 		expr = nft_expr_next(expr);
 	}
 
-	if (nlh->nlmsg_flags & NLM_F_REPLACE) {
+	if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
 		trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
 		if (trans == NULL) {
 			err = -ENOMEM;
@@ -3396,7 +3397,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 			goto err2;
 		}
 
-		if (nlh->nlmsg_flags & NLM_F_APPEND) {
+		if (info->nlh->nlmsg_flags & NLM_F_APPEND) {
 			if (old_rule)
 				list_add_rcu(&rule->list, &old_rule->list);
 			else
@@ -3408,7 +3409,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 				list_add_rcu(&rule->list, &chain->rules);
 		}
 	}
-	kvfree(info);
+	kvfree(expr_info);
 	chain->use++;
 
 	if (nft_net->validate_state == NFT_VALIDATE_DO)
@@ -3427,13 +3428,14 @@ err2:
 	nf_tables_rule_release(&ctx, rule);
 err1:
 	for (i = 0; i < n; i++) {
-		if (info[i].ops) {
-			module_put(info[i].ops->type->owner);
-			if (info[i].ops->type->release_ops)
-				info[i].ops->type->release_ops(info[i].ops);
+		if (expr_info[i].ops) {
+			module_put(expr_info[i].ops->type->owner);
+			if (expr_info[i].ops->type->release_ops)
+				expr_info[i].ops->type->release_ops(expr_info[i].ops);
 		}
 	}
-	kvfree(info);
+	kvfree(expr_info);
+
 	return err;
 }
 
@@ -3454,17 +3456,17 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
 	return ERR_PTR(-ENOENT);
 }
 
-static int nf_tables_delrule(struct net *net, struct sock *nlsk,
-			     struct sk_buff *skb, const struct nlmsghdr *nlh,
-			     const struct nlattr * const nla[],
-			     struct netlink_ext_ack *extack)
+static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
+			     const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	u8 genmask = nft_genmask_next(net);
-	struct nft_table *table;
+	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+	struct netlink_ext_ack *extack = info->extack;
+	int family = nfmsg->nfgen_family, err = 0;
+	u8 genmask = nft_genmask_next(info->net);
 	struct nft_chain *chain = NULL;
+	struct net *net = info->net;
+	struct nft_table *table;
 	struct nft_rule *rule;
-	int family = nfmsg->nfgen_family, err = 0;
 	struct nft_ctx ctx;
 
 	table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask,
@@ -3485,7 +3487,7 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
 			return -EOPNOTSUPP;
 	}
 
-	nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
 
 	if (chain) {
 		if (nla[NFTA_RULE_HANDLE]) {
@@ -4166,28 +4168,27 @@ static int nf_tables_set_desc_parse(struct nft_set_desc *desc,
 	return err;
 }
 
-static int nf_tables_newset(struct net *net, struct sock *nlsk,
-			    struct sk_buff *skb, const struct nlmsghdr *nlh,
-			    const struct nlattr * const nla[],
-			    struct netlink_ext_ack *extack)
+static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
+			    const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	u8 genmask = nft_genmask_next(net);
+	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+	u32 ktype, dtype, flags, policy, gc_int, objtype;
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_next(info->net);
 	int family = nfmsg->nfgen_family;
 	const struct nft_set_ops *ops;
 	struct nft_expr *expr = NULL;
+	struct net *net = info->net;
+	struct nft_set_desc desc;
 	struct nft_table *table;
+	unsigned char *udata;
 	struct nft_set *set;
 	struct nft_ctx ctx;
-	char *name;
-	u64 size;
 	u64 timeout;
-	u32 ktype, dtype, flags, policy, gc_int, objtype;
-	struct nft_set_desc desc;
-	unsigned char *udata;
+	char *name;
+	int err, i;
 	u16 udlen;
-	int err;
-	int i;
+	u64 size;
 
 	if (nla[NFTA_SET_TABLE] == NULL ||
 	    nla[NFTA_SET_NAME] == NULL ||
@@ -4295,7 +4296,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 		return PTR_ERR(table);
 	}
 
-	nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
 
 	set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask);
 	if (IS_ERR(set)) {
@@ -4304,17 +4305,17 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 			return PTR_ERR(set);
 		}
 	} else {
-		if (nlh->nlmsg_flags & NLM_F_EXCL) {
+		if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
 			NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
 			return -EEXIST;
 		}
-		if (nlh->nlmsg_flags & NLM_F_REPLACE)
+		if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
 			return -EOPNOTSUPP;
 
 		return 0;
 	}
 
-	if (!(nlh->nlmsg_flags & NLM_F_CREATE))
+	if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
 		return -ENOENT;
 
 	ops = nft_select_set_ops(&ctx, nla, &desc, policy);
@@ -4448,13 +4449,13 @@ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
 	kvfree(set);
 }
 
-static int nf_tables_delset(struct net *net, struct sock *nlsk,
-			    struct sk_buff *skb, const struct nlmsghdr *nlh,
-			    const struct nlattr * const nla[],
-			    struct netlink_ext_ack *extack)
+static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info,
+			    const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	u8 genmask = nft_genmask_next(net);
+	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_next(info->net);
+	struct net *net = info->net;
 	const struct nlattr *attr;
 	struct nft_set *set;
 	struct nft_ctx ctx;
@@ -4465,7 +4466,7 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
 	if (nla[NFTA_SET_TABLE] == NULL)
 		return -EINVAL;
 
-	err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, extack,
+	err = nft_ctx_init_from_setattr(&ctx, net, skb, info->nlh, nla, extack,
 					genmask, NETLINK_CB(skb).portid);
 	if (err < 0)
 		return err;
@@ -4483,7 +4484,8 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
 		return PTR_ERR(set);
 	}
 	if (set->use ||
-	    (nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0)) {
+	    (info->nlh->nlmsg_flags & NLM_F_NONREC &&
+	     atomic_read(&set->nelems) > 0)) {
 		NL_SET_BAD_ATTR(extack, attr);
 		return -EBUSY;
 	}
@@ -5654,13 +5656,14 @@ err_set_elem_expr_clone:
 	return err;
 }
 
-static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
-				struct sk_buff *skb, const struct nlmsghdr *nlh,
-				const struct nlattr * const nla[],
-				struct netlink_ext_ack *extack)
+static int nf_tables_newsetelem(struct sk_buff *skb,
+				const struct nfnl_info *info,
+				const struct nlattr * const nla[])
 {
-	struct nftables_pernet *nft_net = nft_pernet(net);
-	u8 genmask = nft_genmask_next(net);
+	struct nftables_pernet *nft_net = nft_pernet(info->net);
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_next(info->net);
+	struct net *net = info->net;
 	const struct nlattr *attr;
 	struct nft_set *set;
 	struct nft_ctx ctx;
@@ -5669,7 +5672,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
 	if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
 		return -EINVAL;
 
-	err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, extack,
+	err = nft_ctx_init_from_elemattr(&ctx, net, skb, info->nlh, nla, extack,
 					 genmask, NETLINK_CB(skb).portid);
 	if (err < 0)
 		return err;
@@ -5683,7 +5686,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
 		return -EBUSY;
 
 	nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
-		err = nft_add_set_elem(&ctx, set, attr, nlh->nlmsg_flags);
+		err = nft_add_set_elem(&ctx, set, attr, info->nlh->nlmsg_flags);
 		if (err < 0)
 			return err;
 	}
@@ -5866,18 +5869,19 @@ err1:
 	return err;
 }
 
-static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
-				struct sk_buff *skb, const struct nlmsghdr *nlh,
-				const struct nlattr * const nla[],
-				struct netlink_ext_ack *extack)
+static int nf_tables_delsetelem(struct sk_buff *skb,
+				const struct nfnl_info *info,
+				const struct nlattr * const nla[])
 {
-	u8 genmask = nft_genmask_next(net);
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_next(info->net);
+	struct net *net = info->net;
 	const struct nlattr *attr;
 	struct nft_set *set;
 	struct nft_ctx ctx;
 	int rem, err = 0;
 
-	err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, extack,
+	err = nft_ctx_init_from_elemattr(&ctx, net, skb, info->nlh, nla, extack,
 					 genmask, NETLINK_CB(skb).portid);
 	if (err < 0)
 		return err;
@@ -6161,15 +6165,15 @@ err_free_trans:
 	return err;
 }
 
-static int nf_tables_newobj(struct net *net, struct sock *nlsk,
-			    struct sk_buff *skb, const struct nlmsghdr *nlh,
-			    const struct nlattr * const nla[],
-			    struct netlink_ext_ack *extack)
+static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
+			    const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_next(info->net);
 	const struct nft_object_type *type;
-	u8 genmask = nft_genmask_next(net);
 	int family = nfmsg->nfgen_family;
+	struct net *net = info->net;
 	struct nft_table *table;
 	struct nft_object *obj;
 	struct nft_ctx ctx;
@@ -6197,20 +6201,20 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
 			return err;
 		}
 	} else {
-		if (nlh->nlmsg_flags & NLM_F_EXCL) {
+		if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
 			NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
 			return -EEXIST;
 		}
-		if (nlh->nlmsg_flags & NLM_F_REPLACE)
+		if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
 			return -EOPNOTSUPP;
 
 		type = __nft_obj_type_get(objtype);
-		nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+		nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
 
 		return nf_tables_updobj(&ctx, type, nla[NFTA_OBJ_DATA], obj);
 	}
 
-	nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
 
 	type = nft_obj_type_get(net, objtype);
 	if (IS_ERR(type))
@@ -6507,14 +6511,14 @@ static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj)
 	kfree(obj);
 }
 
-static int nf_tables_delobj(struct net *net, struct sock *nlsk,
-			    struct sk_buff *skb, const struct nlmsghdr *nlh,
-			    const struct nlattr * const nla[],
-			    struct netlink_ext_ack *extack)
+static int nf_tables_delobj(struct sk_buff *skb, const struct nfnl_info *info,
+			    const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	u8 genmask = nft_genmask_next(net);
+	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_next(info->net);
 	int family = nfmsg->nfgen_family;
+	struct net *net = info->net;
 	const struct nlattr *attr;
 	struct nft_table *table;
 	struct nft_object *obj;
@@ -6550,7 +6554,7 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk,
 		return -EBUSY;
 	}
 
-	nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
 
 	return nft_delobj(&ctx, obj);
 }
@@ -6937,19 +6941,19 @@ err_flowtable_update_hook:
 
 }
 
-static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
-				  struct sk_buff *skb,
-				  const struct nlmsghdr *nlh,
-				  const struct nlattr * const nla[],
-				  struct netlink_ext_ack *extack)
+static int nf_tables_newflowtable(struct sk_buff *skb,
+				  const struct nfnl_info *info,
+				  const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+	struct netlink_ext_ack *extack = info->extack;
 	struct nft_flowtable_hook flowtable_hook;
+	u8 genmask = nft_genmask_next(info->net);
 	const struct nf_flowtable_type *type;
-	u8 genmask = nft_genmask_next(net);
 	int family = nfmsg->nfgen_family;
 	struct nft_flowtable *flowtable;
 	struct nft_hook *hook, *next;
+	struct net *net = info->net;
 	struct nft_table *table;
 	struct nft_ctx ctx;
 	int err;
@@ -6975,17 +6979,17 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
 			return err;
 		}
 	} else {
-		if (nlh->nlmsg_flags & NLM_F_EXCL) {
+		if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
 			NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]);
 			return -EEXIST;
 		}
 
-		nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+		nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
 
-		return nft_flowtable_update(&ctx, nlh, flowtable);
+		return nft_flowtable_update(&ctx, info->nlh, flowtable);
 	}
 
-	nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
 
 	flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL);
 	if (!flowtable)
@@ -7126,16 +7130,16 @@ err_flowtable_del_hook:
 	return err;
 }
 
-static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
-				  struct sk_buff *skb,
-				  const struct nlmsghdr *nlh,
-				  const struct nlattr * const nla[],
-				  struct netlink_ext_ack *extack)
+static int nf_tables_delflowtable(struct sk_buff *skb,
+				  const struct nfnl_info *info,
+				  const struct nlattr * const nla[])
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	u8 genmask = nft_genmask_next(net);
+	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+	struct netlink_ext_ack *extack = info->extack;
+	u8 genmask = nft_genmask_next(info->net);
 	int family = nfmsg->nfgen_family;
 	struct nft_flowtable *flowtable;
+	struct net *net = info->net;
 	const struct nlattr *attr;
 	struct nft_table *table;
 	struct nft_ctx ctx;
@@ -7165,7 +7169,7 @@ static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
 		return PTR_ERR(flowtable);
 	}
 
-	nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
 
 	if (nla[NFTA_FLOWTABLE_HOOK])
 		return nft_delflowtable_hook(&ctx, flowtable);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 7920f6c4ff69..e62c5af4b631 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -469,10 +469,17 @@ replay_abort:
 
 		{
 			int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
+			struct nfnl_net *nfnlnet = nfnl_pernet(net);
 			u8 cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
 			struct nlattr *cda[NFNL_MAX_ATTR_COUNT + 1];
 			struct nlattr *attr = (void *)nlh + min_len;
 			int attrlen = nlh->nlmsg_len - min_len;
+			struct nfnl_info info = {
+				.net	= net,
+				.sk	= nfnlnet->nfnl,
+				.nlh	= nlh,
+				.extack	= &extack,
+			};
 
 			/* Sanity-check NFTA_MAX_ATTR */
 			if (ss->cb[cb_id].attr_count > NFNL_MAX_ATTR_COUNT) {
@@ -488,11 +495,8 @@ replay_abort:
 				goto ack;
 
 			if (nc->call_batch) {
-				struct nfnl_net *nfnlnet = nfnl_pernet(net);
-
-				err = nc->call_batch(net, nfnlnet->nfnl, skb, nlh,
-						     (const struct nlattr **)cda,
-						     &extack);
+				err = nc->call_batch(skb, &info,
+						     (const struct nlattr **)cda);
 			}
 
 			/* The lock was released to autoload some module, we
-- 
cgit v1.2.3


From 50f2db9e368f73ecbbaa92da365183fa953aaba7 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 23 Apr 2021 00:17:12 +0200
Subject: netfilter: nfnetlink: consolidate callback types

Add enum nfnl_callback_type to identify the callback type to provide one
single callback.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nfnetlink.h  | 16 ++++---
 net/netfilter/ipset/ip_set_core.c    | 16 +++++++
 net/netfilter/nf_conntrack_netlink.c | 88 +++++++++++++++++++++++++-----------
 net/netfilter/nf_tables_api.c        | 69 ++++++++++++++++++----------
 net/netfilter/nfnetlink.c            | 37 +++++++++------
 net/netfilter/nfnetlink_acct.c       | 36 ++++++++++-----
 net/netfilter/nfnetlink_cthelper.c   | 27 +++++++----
 net/netfilter/nfnetlink_cttimeout.c  | 45 ++++++++++++------
 net/netfilter/nfnetlink_log.c        | 16 +++++--
 net/netfilter/nfnetlink_osf.c        |  2 +
 net/netfilter/nfnetlink_queue.c      | 34 +++++++++-----
 net/netfilter/nft_compat.c           |  9 ++--
 12 files changed, 271 insertions(+), 124 deletions(-)

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index df0e3254c57b..515ce53aa20d 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -14,15 +14,19 @@ struct nfnl_info {
 	struct netlink_ext_ack	*extack;
 };
 
+enum nfnl_callback_type {
+	NFNL_CB_UNSPEC	= 0,
+	NFNL_CB_MUTEX,
+	NFNL_CB_RCU,
+	NFNL_CB_BATCH,
+};
+
 struct nfnl_callback {
 	int (*call)(struct sk_buff *skb, const struct nfnl_info *info,
 		    const struct nlattr * const cda[]);
-	int (*call_rcu)(struct sk_buff *skb, const struct nfnl_info *info,
-			const struct nlattr * const cda[]);
-	int (*call_batch)(struct sk_buff *skb, const struct nfnl_info *info,
-			  const struct nlattr * const cda[]);
-	const struct nla_policy *policy;	/* netlink attribute policy */
-	const u_int16_t attr_count;		/* number of nlattr's */
+	const struct nla_policy	*policy;
+	enum nfnl_callback_type	type;
+	__u16			attr_count;
 };
 
 enum nfnl_abort_action {
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index bf9902c1daa8..de2d20c37cda 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -2108,80 +2108,96 @@ nlmsg_failure:
 static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
 	[IPSET_CMD_NONE]	= {
 		.call		= ip_set_none,
+		.type		= NFNL_CB_MUTEX,
 		.attr_count	= IPSET_ATTR_CMD_MAX,
 	},
 	[IPSET_CMD_CREATE]	= {
 		.call		= ip_set_create,
+		.type		= NFNL_CB_MUTEX,
 		.attr_count	= IPSET_ATTR_CMD_MAX,
 		.policy		= ip_set_create_policy,
 	},
 	[IPSET_CMD_DESTROY]	= {
 		.call		= ip_set_destroy,
+		.type		= NFNL_CB_MUTEX,
 		.attr_count	= IPSET_ATTR_CMD_MAX,
 		.policy		= ip_set_setname_policy,
 	},
 	[IPSET_CMD_FLUSH]	= {
 		.call		= ip_set_flush,
+		.type		= NFNL_CB_MUTEX,
 		.attr_count	= IPSET_ATTR_CMD_MAX,
 		.policy		= ip_set_setname_policy,
 	},
 	[IPSET_CMD_RENAME]	= {
 		.call		= ip_set_rename,
+		.type		= NFNL_CB_MUTEX,
 		.attr_count	= IPSET_ATTR_CMD_MAX,
 		.policy		= ip_set_setname2_policy,
 	},
 	[IPSET_CMD_SWAP]	= {
 		.call		= ip_set_swap,
+		.type		= NFNL_CB_MUTEX,
 		.attr_count	= IPSET_ATTR_CMD_MAX,
 		.policy		= ip_set_setname2_policy,
 	},
 	[IPSET_CMD_LIST]	= {
 		.call		= ip_set_dump,
+		.type		= NFNL_CB_MUTEX,
 		.attr_count	= IPSET_ATTR_CMD_MAX,
 		.policy		= ip_set_dump_policy,
 	},
 	[IPSET_CMD_SAVE]	= {
 		.call		= ip_set_dump,
+		.type		= NFNL_CB_MUTEX,
 		.attr_count	= IPSET_ATTR_CMD_MAX,
 		.policy		= ip_set_setname_policy,
 	},
 	[IPSET_CMD_ADD]	= {
 		.call		= ip_set_uadd,
+		.type		= NFNL_CB_MUTEX,
 		.attr_count	= IPSET_ATTR_CMD_MAX,
 		.policy		= ip_set_adt_policy,
 	},
 	[IPSET_CMD_DEL]	= {
 		.call		= ip_set_udel,
+		.type		= NFNL_CB_MUTEX,
 		.attr_count	= IPSET_ATTR_CMD_MAX,
 		.policy		= ip_set_adt_policy,
 	},
 	[IPSET_CMD_TEST]	= {
 		.call		= ip_set_utest,
+		.type		= NFNL_CB_MUTEX,
 		.attr_count	= IPSET_ATTR_CMD_MAX,
 		.policy		= ip_set_adt_policy,
 	},
 	[IPSET_CMD_HEADER]	= {
 		.call		= ip_set_header,
+		.type		= NFNL_CB_MUTEX,
 		.attr_count	= IPSET_ATTR_CMD_MAX,
 		.policy		= ip_set_setname_policy,
 	},
 	[IPSET_CMD_TYPE]	= {
 		.call		= ip_set_type,
+		.type		= NFNL_CB_MUTEX,
 		.attr_count	= IPSET_ATTR_CMD_MAX,
 		.policy		= ip_set_type_policy,
 	},
 	[IPSET_CMD_PROTOCOL]	= {
 		.call		= ip_set_protocol,
+		.type		= NFNL_CB_MUTEX,
 		.attr_count	= IPSET_ATTR_CMD_MAX,
 		.policy		= ip_set_protocol_policy,
 	},
 	[IPSET_CMD_GET_BYNAME]	= {
 		.call		= ip_set_byname,
+		.type		= NFNL_CB_MUTEX,
 		.attr_count	= IPSET_ATTR_CMD_MAX,
 		.policy		= ip_set_setname_policy,
 	},
 	[IPSET_CMD_GET_BYINDEX]	= {
 		.call		= ip_set_byindex,
+		.type		= NFNL_CB_MUTEX,
 		.attr_count	= IPSET_ATTR_CMD_MAX,
 		.policy		= ip_set_index_policy,
 	},
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 5147a63b3d1b..8690fc07030f 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -3751,35 +3751,71 @@ static struct nf_exp_event_notifier ctnl_notifier_exp = {
 #endif
 
 static const struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = {
-	[IPCTNL_MSG_CT_NEW]		= { .call = ctnetlink_new_conntrack,
-					    .attr_count = CTA_MAX,
-					    .policy = ct_nla_policy },
-	[IPCTNL_MSG_CT_GET] 		= { .call = ctnetlink_get_conntrack,
-					    .attr_count = CTA_MAX,
-					    .policy = ct_nla_policy },
-	[IPCTNL_MSG_CT_DELETE]  	= { .call = ctnetlink_del_conntrack,
-					    .attr_count = CTA_MAX,
-					    .policy = ct_nla_policy },
-	[IPCTNL_MSG_CT_GET_CTRZERO] 	= { .call = ctnetlink_get_conntrack,
-					    .attr_count = CTA_MAX,
-					    .policy = ct_nla_policy },
-	[IPCTNL_MSG_CT_GET_STATS_CPU]	= { .call = ctnetlink_stat_ct_cpu },
-	[IPCTNL_MSG_CT_GET_STATS]	= { .call = ctnetlink_stat_ct },
-	[IPCTNL_MSG_CT_GET_DYING]	= { .call = ctnetlink_get_ct_dying },
-	[IPCTNL_MSG_CT_GET_UNCONFIRMED]	= { .call = ctnetlink_get_ct_unconfirmed },
+	[IPCTNL_MSG_CT_NEW]	= {
+		.call		= ctnetlink_new_conntrack,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= CTA_MAX,
+		.policy		= ct_nla_policy
+	},
+	[IPCTNL_MSG_CT_GET]	= {
+		.call		= ctnetlink_get_conntrack,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= CTA_MAX,
+		.policy		= ct_nla_policy
+	},
+	[IPCTNL_MSG_CT_DELETE]	= {
+		.call		= ctnetlink_del_conntrack,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= CTA_MAX,
+		.policy		= ct_nla_policy
+	},
+	[IPCTNL_MSG_CT_GET_CTRZERO] = {
+		.call		= ctnetlink_get_conntrack,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= CTA_MAX,
+		.policy		= ct_nla_policy
+	},
+	[IPCTNL_MSG_CT_GET_STATS_CPU] = {
+		.call		= ctnetlink_stat_ct_cpu,
+		.type		= NFNL_CB_MUTEX,
+	},
+	[IPCTNL_MSG_CT_GET_STATS] = {
+		.call		= ctnetlink_stat_ct,
+		.type		= NFNL_CB_MUTEX,
+	},
+	[IPCTNL_MSG_CT_GET_DYING] = {
+		.call		= ctnetlink_get_ct_dying,
+		.type		= NFNL_CB_MUTEX,
+	},
+	[IPCTNL_MSG_CT_GET_UNCONFIRMED]	= {
+		.call		= ctnetlink_get_ct_unconfirmed,
+		.type		= NFNL_CB_MUTEX,
+	},
 };
 
 static const struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = {
-	[IPCTNL_MSG_EXP_GET]		= { .call = ctnetlink_get_expect,
-					    .attr_count = CTA_EXPECT_MAX,
-					    .policy = exp_nla_policy },
-	[IPCTNL_MSG_EXP_NEW]		= { .call = ctnetlink_new_expect,
-					    .attr_count = CTA_EXPECT_MAX,
-					    .policy = exp_nla_policy },
-	[IPCTNL_MSG_EXP_DELETE]		= { .call = ctnetlink_del_expect,
-					    .attr_count = CTA_EXPECT_MAX,
-					    .policy = exp_nla_policy },
-	[IPCTNL_MSG_EXP_GET_STATS_CPU]	= { .call = ctnetlink_stat_exp_cpu },
+	[IPCTNL_MSG_EXP_GET] = {
+		.call		= ctnetlink_get_expect,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= CTA_EXPECT_MAX,
+		.policy		= exp_nla_policy
+	},
+	[IPCTNL_MSG_EXP_NEW] = {
+		.call		= ctnetlink_new_expect,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= CTA_EXPECT_MAX,
+		.policy		= exp_nla_policy
+	},
+	[IPCTNL_MSG_EXP_DELETE] = {
+		.call		= ctnetlink_del_expect,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= CTA_EXPECT_MAX,
+		.policy		= exp_nla_policy
+	},
+	[IPCTNL_MSG_EXP_GET_STATS_CPU] = {
+		.call		= ctnetlink_stat_exp_cpu,
+		.type		= NFNL_CB_MUTEX,
+	},
 };
 
 static const struct nfnetlink_subsystem ctnl_subsys = {
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 280ca136df56..1050f23c0d29 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -7554,115 +7554,138 @@ err_fill_gen_info:
 
 static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
 	[NFT_MSG_NEWTABLE] = {
-		.call_batch	= nf_tables_newtable,
+		.call		= nf_tables_newtable,
+		.type		= NFNL_CB_BATCH,
 		.attr_count	= NFTA_TABLE_MAX,
 		.policy		= nft_table_policy,
 	},
 	[NFT_MSG_GETTABLE] = {
-		.call_rcu	= nf_tables_gettable,
+		.call		= nf_tables_gettable,
+		.type		= NFNL_CB_RCU,
 		.attr_count	= NFTA_TABLE_MAX,
 		.policy		= nft_table_policy,
 	},
 	[NFT_MSG_DELTABLE] = {
-		.call_batch	= nf_tables_deltable,
+		.call		= nf_tables_deltable,
+		.type		= NFNL_CB_BATCH,
 		.attr_count	= NFTA_TABLE_MAX,
 		.policy		= nft_table_policy,
 	},
 	[NFT_MSG_NEWCHAIN] = {
-		.call_batch	= nf_tables_newchain,
+		.call		= nf_tables_newchain,
+		.type		= NFNL_CB_BATCH,
 		.attr_count	= NFTA_CHAIN_MAX,
 		.policy		= nft_chain_policy,
 	},
 	[NFT_MSG_GETCHAIN] = {
-		.call_rcu	= nf_tables_getchain,
+		.call		= nf_tables_getchain,
+		.type		= NFNL_CB_RCU,
 		.attr_count	= NFTA_CHAIN_MAX,
 		.policy		= nft_chain_policy,
 	},
 	[NFT_MSG_DELCHAIN] = {
-		.call_batch	= nf_tables_delchain,
+		.call		= nf_tables_delchain,
+		.type		= NFNL_CB_BATCH,
 		.attr_count	= NFTA_CHAIN_MAX,
 		.policy		= nft_chain_policy,
 	},
 	[NFT_MSG_NEWRULE] = {
-		.call_batch	= nf_tables_newrule,
+		.call		= nf_tables_newrule,
+		.type		= NFNL_CB_BATCH,
 		.attr_count	= NFTA_RULE_MAX,
 		.policy		= nft_rule_policy,
 	},
 	[NFT_MSG_GETRULE] = {
-		.call_rcu	= nf_tables_getrule,
+		.call		= nf_tables_getrule,
+		.type		= NFNL_CB_RCU,
 		.attr_count	= NFTA_RULE_MAX,
 		.policy		= nft_rule_policy,
 	},
 	[NFT_MSG_DELRULE] = {
-		.call_batch	= nf_tables_delrule,
+		.call		= nf_tables_delrule,
+		.type		= NFNL_CB_BATCH,
 		.attr_count	= NFTA_RULE_MAX,
 		.policy		= nft_rule_policy,
 	},
 	[NFT_MSG_NEWSET] = {
-		.call_batch	= nf_tables_newset,
+		.call		= nf_tables_newset,
+		.type		= NFNL_CB_BATCH,
 		.attr_count	= NFTA_SET_MAX,
 		.policy		= nft_set_policy,
 	},
 	[NFT_MSG_GETSET] = {
-		.call_rcu	= nf_tables_getset,
+		.call		= nf_tables_getset,
+		.type		= NFNL_CB_RCU,
 		.attr_count	= NFTA_SET_MAX,
 		.policy		= nft_set_policy,
 	},
 	[NFT_MSG_DELSET] = {
-		.call_batch	= nf_tables_delset,
+		.call		= nf_tables_delset,
+		.type		= NFNL_CB_BATCH,
 		.attr_count	= NFTA_SET_MAX,
 		.policy		= nft_set_policy,
 	},
 	[NFT_MSG_NEWSETELEM] = {
-		.call_batch	= nf_tables_newsetelem,
+		.call		= nf_tables_newsetelem,
+		.type		= NFNL_CB_BATCH,
 		.attr_count	= NFTA_SET_ELEM_LIST_MAX,
 		.policy		= nft_set_elem_list_policy,
 	},
 	[NFT_MSG_GETSETELEM] = {
-		.call_rcu	= nf_tables_getsetelem,
+		.call		= nf_tables_getsetelem,
+		.type		= NFNL_CB_RCU,
 		.attr_count	= NFTA_SET_ELEM_LIST_MAX,
 		.policy		= nft_set_elem_list_policy,
 	},
 	[NFT_MSG_DELSETELEM] = {
-		.call_batch	= nf_tables_delsetelem,
+		.call		= nf_tables_delsetelem,
+		.type		= NFNL_CB_BATCH,
 		.attr_count	= NFTA_SET_ELEM_LIST_MAX,
 		.policy		= nft_set_elem_list_policy,
 	},
 	[NFT_MSG_GETGEN] = {
-		.call_rcu	= nf_tables_getgen,
+		.call		= nf_tables_getgen,
+		.type		= NFNL_CB_RCU,
 	},
 	[NFT_MSG_NEWOBJ] = {
-		.call_batch	= nf_tables_newobj,
+		.call		= nf_tables_newobj,
+		.type		= NFNL_CB_BATCH,
 		.attr_count	= NFTA_OBJ_MAX,
 		.policy		= nft_obj_policy,
 	},
 	[NFT_MSG_GETOBJ] = {
-		.call_rcu	= nf_tables_getobj,
+		.call		= nf_tables_getobj,
+		.type		= NFNL_CB_RCU,
 		.attr_count	= NFTA_OBJ_MAX,
 		.policy		= nft_obj_policy,
 	},
 	[NFT_MSG_DELOBJ] = {
-		.call_batch	= nf_tables_delobj,
+		.call		= nf_tables_delobj,
+		.type		= NFNL_CB_BATCH,
 		.attr_count	= NFTA_OBJ_MAX,
 		.policy		= nft_obj_policy,
 	},
 	[NFT_MSG_GETOBJ_RESET] = {
-		.call_rcu	= nf_tables_getobj,
+		.call		= nf_tables_getobj,
+		.type		= NFNL_CB_RCU,
 		.attr_count	= NFTA_OBJ_MAX,
 		.policy		= nft_obj_policy,
 	},
 	[NFT_MSG_NEWFLOWTABLE] = {
-		.call_batch	= nf_tables_newflowtable,
+		.call		= nf_tables_newflowtable,
+		.type		= NFNL_CB_BATCH,
 		.attr_count	= NFTA_FLOWTABLE_MAX,
 		.policy		= nft_flowtable_policy,
 	},
 	[NFT_MSG_GETFLOWTABLE] = {
-		.call_rcu	= nf_tables_getflowtable,
+		.call		= nf_tables_getflowtable,
+		.type		= NFNL_CB_RCU,
 		.attr_count	= NFTA_FLOWTABLE_MAX,
 		.policy		= nft_flowtable_policy,
 	},
 	[NFT_MSG_DELFLOWTABLE] = {
-		.call_batch	= nf_tables_delflowtable,
+		.call		= nf_tables_delflowtable,
+		.type		= NFNL_CB_BATCH,
 		.attr_count	= NFTA_FLOWTABLE_MAX,
 		.policy		= nft_flowtable_policy,
 	},
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index e62c5af4b631..d7a9628b6cee 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -273,23 +273,30 @@ replay:
 			return err;
 		}
 
-		if (nc->call_rcu) {
-			err = nc->call_rcu(skb, &info,
-					   (const struct nlattr **)cda);
+		if (!nc->call) {
 			rcu_read_unlock();
-		} else {
+			return -EINVAL;
+		}
+
+		switch (nc->type) {
+		case NFNL_CB_RCU:
+			err = nc->call(skb, &info, (const struct nlattr **)cda);
+			rcu_read_unlock();
+			break;
+		case NFNL_CB_MUTEX:
 			rcu_read_unlock();
 			nfnl_lock(subsys_id);
 			if (nfnl_dereference_protected(subsys_id) != ss ||
 			    nfnetlink_find_client(type, ss) != nc) {
 				err = -EAGAIN;
-			} else if (nc->call) {
-				err = nc->call(skb, &info,
-					       (const struct nlattr **)cda);
-			} else {
-				err = -EINVAL;
+				break;
 			}
+			err = nc->call(skb, &info, (const struct nlattr **)cda);
 			nfnl_unlock(subsys_id);
+			break;
+		default:
+			err = -EINVAL;
+			break;
 		}
 		if (err == -EAGAIN)
 			goto replay;
@@ -467,12 +474,17 @@ replay_abort:
 			goto ack;
 		}
 
+		if (nc->type != NFNL_CB_BATCH) {
+			err = -EINVAL;
+			goto ack;
+		}
+
 		{
 			int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
 			struct nfnl_net *nfnlnet = nfnl_pernet(net);
-			u8 cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
 			struct nlattr *cda[NFNL_MAX_ATTR_COUNT + 1];
 			struct nlattr *attr = (void *)nlh + min_len;
+			u8 cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
 			int attrlen = nlh->nlmsg_len - min_len;
 			struct nfnl_info info = {
 				.net	= net,
@@ -494,10 +506,7 @@ replay_abort:
 			if (err < 0)
 				goto ack;
 
-			if (nc->call_batch) {
-				err = nc->call_batch(skb, &info,
-						     (const struct nlattr **)cda);
-			}
+			err = nc->call(skb, &info, (const struct nlattr **)cda);
 
 			/* The lock was released to autoload some module, we
 			 * have to abort and start from scratch using the
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 9cb4b21b8e95..3c8cf8748cfb 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -382,18 +382,30 @@ static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = {
 };
 
 static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
-	[NFNL_MSG_ACCT_NEW]		= { .call = nfnl_acct_new,
-					    .attr_count = NFACCT_MAX,
-					    .policy = nfnl_acct_policy },
-	[NFNL_MSG_ACCT_GET] 		= { .call = nfnl_acct_get,
-					    .attr_count = NFACCT_MAX,
-					    .policy = nfnl_acct_policy },
-	[NFNL_MSG_ACCT_GET_CTRZERO] 	= { .call = nfnl_acct_get,
-					    .attr_count = NFACCT_MAX,
-					    .policy = nfnl_acct_policy },
-	[NFNL_MSG_ACCT_DEL]		= { .call = nfnl_acct_del,
-					    .attr_count = NFACCT_MAX,
-					    .policy = nfnl_acct_policy },
+	[NFNL_MSG_ACCT_NEW] = {
+		.call		= nfnl_acct_new,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= NFACCT_MAX,
+		.policy		= nfnl_acct_policy
+	},
+	[NFNL_MSG_ACCT_GET] = {
+		.call		= nfnl_acct_get,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= NFACCT_MAX,
+		.policy		= nfnl_acct_policy
+	},
+	[NFNL_MSG_ACCT_GET_CTRZERO] = {
+		.call		= nfnl_acct_get,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= NFACCT_MAX,
+		.policy		= nfnl_acct_policy
+	},
+	[NFNL_MSG_ACCT_DEL] = {
+		.call		= nfnl_acct_del,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= NFACCT_MAX,
+		.policy		= nfnl_acct_policy
+	},
 };
 
 static const struct nfnetlink_subsystem nfnl_acct_subsys = {
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 3d1a5215177b..322ac5dd5402 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -737,15 +737,24 @@ static const struct nla_policy nfnl_cthelper_policy[NFCTH_MAX+1] = {
 };
 
 static const struct nfnl_callback nfnl_cthelper_cb[NFNL_MSG_CTHELPER_MAX] = {
-	[NFNL_MSG_CTHELPER_NEW]		= { .call = nfnl_cthelper_new,
-					    .attr_count = NFCTH_MAX,
-					    .policy = nfnl_cthelper_policy },
-	[NFNL_MSG_CTHELPER_GET]		= { .call = nfnl_cthelper_get,
-					    .attr_count = NFCTH_MAX,
-					    .policy = nfnl_cthelper_policy },
-	[NFNL_MSG_CTHELPER_DEL]		= { .call = nfnl_cthelper_del,
-					    .attr_count = NFCTH_MAX,
-					    .policy = nfnl_cthelper_policy },
+	[NFNL_MSG_CTHELPER_NEW]	= {
+		.call		= nfnl_cthelper_new,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= NFCTH_MAX,
+		.policy		= nfnl_cthelper_policy
+	},
+	[NFNL_MSG_CTHELPER_GET] = {
+		.call		= nfnl_cthelper_get,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= NFCTH_MAX,
+		.policy		= nfnl_cthelper_policy
+	},
+	[NFNL_MSG_CTHELPER_DEL]	= {
+		.call		= nfnl_cthelper_del,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= NFCTH_MAX,
+		.policy		= nfnl_cthelper_policy
+	},
 };
 
 static const struct nfnetlink_subsystem nfnl_cthelper_subsys = {
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 994f3172bf42..38848ad68899 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -546,21 +546,36 @@ static void ctnl_timeout_put(struct nf_ct_timeout *t)
 }
 
 static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = {
-	[IPCTNL_MSG_TIMEOUT_NEW]	= { .call = cttimeout_new_timeout,
-					    .attr_count = CTA_TIMEOUT_MAX,
-					    .policy = cttimeout_nla_policy },
-	[IPCTNL_MSG_TIMEOUT_GET]	= { .call = cttimeout_get_timeout,
-					    .attr_count = CTA_TIMEOUT_MAX,
-					    .policy = cttimeout_nla_policy },
-	[IPCTNL_MSG_TIMEOUT_DELETE]	= { .call = cttimeout_del_timeout,
-					    .attr_count = CTA_TIMEOUT_MAX,
-					    .policy = cttimeout_nla_policy },
-	[IPCTNL_MSG_TIMEOUT_DEFAULT_SET]= { .call = cttimeout_default_set,
-					    .attr_count = CTA_TIMEOUT_MAX,
-					    .policy = cttimeout_nla_policy },
-	[IPCTNL_MSG_TIMEOUT_DEFAULT_GET]= { .call = cttimeout_default_get,
-					    .attr_count = CTA_TIMEOUT_MAX,
-					    .policy = cttimeout_nla_policy },
+	[IPCTNL_MSG_TIMEOUT_NEW] = {
+		.call		= cttimeout_new_timeout,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= CTA_TIMEOUT_MAX,
+		.policy		= cttimeout_nla_policy
+	},
+	[IPCTNL_MSG_TIMEOUT_GET] = {
+		.call		= cttimeout_get_timeout,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= CTA_TIMEOUT_MAX,
+		.policy		= cttimeout_nla_policy
+	},
+	[IPCTNL_MSG_TIMEOUT_DELETE] = {
+		.call		= cttimeout_del_timeout,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= CTA_TIMEOUT_MAX,
+		.policy		= cttimeout_nla_policy
+	},
+	[IPCTNL_MSG_TIMEOUT_DEFAULT_SET] = {
+		.call		= cttimeout_default_set,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= CTA_TIMEOUT_MAX,
+		.policy		= cttimeout_nla_policy
+	},
+	[IPCTNL_MSG_TIMEOUT_DEFAULT_GET] = {
+		.call		= cttimeout_default_get,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= CTA_TIMEOUT_MAX,
+		.policy		= cttimeout_nla_policy
+	},
 };
 
 static const struct nfnetlink_subsystem cttimeout_subsys = {
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 81630600b4ef..587086b18c36 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -989,11 +989,17 @@ out:
 }
 
 static const struct nfnl_callback nfulnl_cb[NFULNL_MSG_MAX] = {
-	[NFULNL_MSG_PACKET]	= { .call = nfulnl_recv_unsupp,
-				    .attr_count = NFULA_MAX, },
-	[NFULNL_MSG_CONFIG]	= { .call = nfulnl_recv_config,
-				    .attr_count = NFULA_CFG_MAX,
-				    .policy = nfula_cfg_policy },
+	[NFULNL_MSG_PACKET]	= {
+		.call		= nfulnl_recv_unsupp,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= NFULA_MAX,
+	},
+	[NFULNL_MSG_CONFIG]	= {
+		.call		= nfulnl_recv_config,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= NFULA_CFG_MAX,
+		.policy		= nfula_cfg_policy
+	},
 };
 
 static const struct nfnetlink_subsystem nfulnl_subsys = {
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index 1fd537ef4496..e8f8875c6884 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -374,11 +374,13 @@ static int nfnl_osf_remove_callback(struct sk_buff *skb,
 static const struct nfnl_callback nfnl_osf_callbacks[OSF_MSG_MAX] = {
 	[OSF_MSG_ADD]	= {
 		.call		= nfnl_osf_add_callback,
+		.type		= NFNL_CB_MUTEX,
 		.attr_count	= OSF_ATTR_MAX,
 		.policy		= nfnl_osf_policy,
 	},
 	[OSF_MSG_REMOVE]	= {
 		.call		= nfnl_osf_remove_callback,
+		.type		= NFNL_CB_MUTEX,
 		.attr_count	= OSF_ATTR_MAX,
 		.policy		= nfnl_osf_policy,
 	},
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index ede9252c8de1..f37a575ebd7f 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1365,17 +1365,29 @@ err_out_unlock:
 }
 
 static const struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = {
-	[NFQNL_MSG_PACKET]	= { .call_rcu = nfqnl_recv_unsupp,
-				    .attr_count = NFQA_MAX, },
-	[NFQNL_MSG_VERDICT]	= { .call_rcu = nfqnl_recv_verdict,
-				    .attr_count = NFQA_MAX,
-				    .policy = nfqa_verdict_policy },
-	[NFQNL_MSG_CONFIG]	= { .call = nfqnl_recv_config,
-				    .attr_count = NFQA_CFG_MAX,
-				    .policy = nfqa_cfg_policy },
-	[NFQNL_MSG_VERDICT_BATCH]={ .call_rcu = nfqnl_recv_verdict_batch,
-				    .attr_count = NFQA_MAX,
-				    .policy = nfqa_verdict_batch_policy },
+	[NFQNL_MSG_PACKET]	= {
+		.call		= nfqnl_recv_unsupp,
+		.type		= NFNL_CB_RCU,
+		.attr_count	= NFQA_MAX,
+	},
+	[NFQNL_MSG_VERDICT]	= {
+		.call		= nfqnl_recv_verdict,
+		.type		= NFNL_CB_RCU,
+		.attr_count	= NFQA_MAX,
+		.policy		= nfqa_verdict_policy
+	},
+	[NFQNL_MSG_CONFIG]	= {
+		.call		= nfqnl_recv_config,
+		.type		= NFNL_CB_MUTEX,
+		.attr_count	= NFQA_CFG_MAX,
+		.policy		= nfqa_cfg_policy
+	},
+	[NFQNL_MSG_VERDICT_BATCH] = {
+		.call		= nfqnl_recv_verdict_batch,
+		.type		= NFNL_CB_RCU,
+		.attr_count	= NFQA_MAX,
+		.policy		= nfqa_verdict_batch_policy
+	},
 };
 
 static const struct nfnetlink_subsystem nfqnl_subsys = {
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 4c0657245d5a..5415ab14400d 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -698,9 +698,12 @@ static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = {
 };
 
 static const struct nfnl_callback nfnl_nft_compat_cb[NFNL_MSG_COMPAT_MAX] = {
-	[NFNL_MSG_COMPAT_GET]		= { .call_rcu = nfnl_compat_get_rcu,
-					    .attr_count = NFTA_COMPAT_MAX,
-					    .policy = nfnl_compat_policy_get },
+	[NFNL_MSG_COMPAT_GET]	= {
+		.call		= nfnl_compat_get_rcu,
+		.type		= NFNL_CB_RCU,
+		.attr_count	= NFTA_COMPAT_MAX,
+		.policy		= nfnl_compat_policy_get
+	},
 };
 
 static const struct nfnetlink_subsystem nfnl_compat_subsys = {
-- 
cgit v1.2.3


From 47a6959fa331fe892a4fc3b48ca08e92045c6bda Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 26 Apr 2021 12:14:40 +0200
Subject: netfilter: allow to turn off xtables compat layer

The compat layer needs to parse untrusted input (the ruleset)
to translate it to a 64bit compatible format.

We had a number of bugs in this department in the past, so allow users
to turn this feature off.

Add CONFIG_NETFILTER_XTABLES_COMPAT kconfig knob and make it default to y
to keep existing behaviour.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h        | 12 ++++++------
 include/linux/netfilter_arp/arp_tables.h  |  2 +-
 include/linux/netfilter_ipv4/ip_tables.h  |  2 +-
 include/linux/netfilter_ipv6/ip6_tables.h |  2 +-
 net/bridge/netfilter/ebt_limit.c          |  4 ++--
 net/bridge/netfilter/ebt_mark.c           |  4 ++--
 net/bridge/netfilter/ebt_mark_m.c         |  4 ++--
 net/bridge/netfilter/ebtables.c           | 12 ++++++------
 net/ipv4/netfilter/arp_tables.c           | 16 ++++++++--------
 net/ipv4/netfilter/ip_tables.c            | 16 ++++++++--------
 net/ipv4/netfilter/ipt_CLUSTERIP.c        |  8 ++++----
 net/ipv6/netfilter/ip6_tables.c           | 16 ++++++++--------
 net/netfilter/Kconfig                     | 10 ++++++++++
 net/netfilter/x_tables.c                  | 16 ++++++++--------
 net/netfilter/xt_limit.c                  |  6 +++---
 15 files changed, 70 insertions(+), 60 deletions(-)

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index a52cc22f806a..07c6ad8f2a02 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -158,7 +158,7 @@ struct xt_match {
 
 	/* Called when entry of this type deleted. */
 	void (*destroy)(const struct xt_mtdtor_param *);
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 	/* Called when userspace align differs from kernel space one */
 	void (*compat_from_user)(void *dst, const void *src);
 	int (*compat_to_user)(void __user *dst, const void *src);
@@ -169,7 +169,7 @@ struct xt_match {
 	const char *table;
 	unsigned int matchsize;
 	unsigned int usersize;
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 	unsigned int compatsize;
 #endif
 	unsigned int hooks;
@@ -199,7 +199,7 @@ struct xt_target {
 
 	/* Called when entry of this type deleted. */
 	void (*destroy)(const struct xt_tgdtor_param *);
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 	/* Called when userspace align differs from kernel space one */
 	void (*compat_from_user)(void *dst, const void *src);
 	int (*compat_to_user)(void __user *dst, const void *src);
@@ -210,7 +210,7 @@ struct xt_target {
 	const char *table;
 	unsigned int targetsize;
 	unsigned int usersize;
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 	unsigned int compatsize;
 #endif
 	unsigned int hooks;
@@ -452,7 +452,7 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu)
 
 struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *);
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 #include <net/compat.h>
 
 struct compat_xt_entry_match {
@@ -533,5 +533,5 @@ int xt_compat_check_entry_offsets(const void *base, const char *elems,
 				  unsigned int target_offset,
 				  unsigned int next_offset);
 
-#endif /* CONFIG_COMPAT */
+#endif /* CONFIG_NETFILTER_XTABLES_COMPAT */
 #endif /* _X_TABLES_H */
diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index a0474b4e7782..2aab9612f6ab 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -59,7 +59,7 @@ extern unsigned int arpt_do_table(struct sk_buff *skb,
 				  const struct nf_hook_state *state,
 				  struct xt_table *table);
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 #include <net/compat.h>
 
 struct compat_arpt_entry {
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index 0fdab3246ef5..8d09bfe850dc 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -67,7 +67,7 @@ extern unsigned int ipt_do_table(struct sk_buff *skb,
 				 const struct nf_hook_state *state,
 				 struct xt_table *table);
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 #include <net/compat.h>
 
 struct compat_ipt_entry {
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 11d0e725fe79..79e73fd7d965 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -33,7 +33,7 @@ extern unsigned int ip6t_do_table(struct sk_buff *skb,
 				  const struct nf_hook_state *state,
 				  struct xt_table *table);
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 #include <net/compat.h>
 
 struct compat_ip6t_entry {
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index fa199556e122..e16183bd1bb8 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -87,7 +87,7 @@ static int ebt_limit_mt_check(const struct xt_mtchk_param *par)
 }
 
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 /*
  * no conversion function needed --
  * only avg/burst have meaningful values in userspace.
@@ -107,7 +107,7 @@ static struct xt_match ebt_limit_mt_reg __read_mostly = {
 	.checkentry	= ebt_limit_mt_check,
 	.matchsize	= sizeof(struct ebt_limit_info),
 	.usersize	= offsetof(struct ebt_limit_info, prev),
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 	.compatsize	= sizeof(struct ebt_compat_limit_info),
 #endif
 	.me		= THIS_MODULE,
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 21fd3d3d77f6..8cf653c72fd8 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -53,7 +53,7 @@ static int ebt_mark_tg_check(const struct xt_tgchk_param *par)
 		return -EINVAL;
 	return 0;
 }
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 struct compat_ebt_mark_t_info {
 	compat_ulong_t mark;
 	compat_uint_t target;
@@ -87,7 +87,7 @@ static struct xt_target ebt_mark_tg_reg __read_mostly = {
 	.target		= ebt_mark_tg,
 	.checkentry	= ebt_mark_tg_check,
 	.targetsize	= sizeof(struct ebt_mark_t_info),
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 	.compatsize	= sizeof(struct compat_ebt_mark_t_info),
 	.compat_from_user = mark_tg_compat_from_user,
 	.compat_to_user	= mark_tg_compat_to_user,
diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
index 81fb59dec499..5872e73c741e 100644
--- a/net/bridge/netfilter/ebt_mark_m.c
+++ b/net/bridge/netfilter/ebt_mark_m.c
@@ -37,7 +37,7 @@ static int ebt_mark_mt_check(const struct xt_mtchk_param *par)
 }
 
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 struct compat_ebt_mark_m_info {
 	compat_ulong_t mark, mask;
 	uint8_t invert, bitmask;
@@ -75,7 +75,7 @@ static struct xt_match ebt_mark_mt_reg __read_mostly = {
 	.match		= ebt_mark_mt,
 	.checkentry	= ebt_mark_mt_check,
 	.matchsize	= sizeof(struct ebt_mark_m_info),
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 	.compatsize	= sizeof(struct compat_ebt_mark_m_info),
 	.compat_from_user = mark_mt_compat_from_user,
 	.compat_to_user	= mark_mt_compat_to_user,
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index a04596bb2a6e..f022deb3721e 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -47,7 +47,7 @@ struct ebt_pernet {
 static unsigned int ebt_pernet_id __read_mostly;
 static DEFINE_MUTEX(ebt_mutex);
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 static void ebt_standard_compat_from_user(void *dst, const void *src)
 {
 	int v = *(compat_int_t *)src;
@@ -73,7 +73,7 @@ static struct xt_target ebt_standard_target = {
 	.revision   = 0,
 	.family     = NFPROTO_BRIDGE,
 	.targetsize = sizeof(int),
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 	.compatsize = sizeof(compat_int_t),
 	.compat_from_user = ebt_standard_compat_from_user,
 	.compat_to_user =  ebt_standard_compat_to_user,
@@ -1502,7 +1502,7 @@ static int copy_everything_to_user(struct ebt_table *t, void __user *user,
 	   ebt_entry_to_user, entries, tmp.entries);
 }
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 /* 32 bit-userspace compatibility definitions. */
 struct compat_ebt_replace {
 	char name[EBT_TABLE_MAXNAMELEN];
@@ -2367,7 +2367,7 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 	/* try real handler in case userland supplied needed padding */
 	if (in_compat_syscall() &&
 	    ((cmd != EBT_SO_GET_INFO && cmd != EBT_SO_GET_INIT_INFO) ||
@@ -2434,7 +2434,7 @@ static int do_ebt_set_ctl(struct sock *sk, int cmd, sockptr_t arg,
 
 	switch (cmd) {
 	case EBT_SO_SET_ENTRIES:
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 		if (in_compat_syscall())
 			ret = compat_do_replace(net, arg, len);
 		else
@@ -2442,7 +2442,7 @@ static int do_ebt_set_ctl(struct sock *sk, int cmd, sockptr_t arg,
 			ret = do_replace(net, arg, len);
 		break;
 	case EBT_SO_SET_COUNTERS:
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 		if (in_compat_syscall())
 			ret = compat_update_counters(net, arg, len);
 		else
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index b1bb6a7e2dd7..cf20316094d0 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -713,7 +713,7 @@ static int copy_entries_to_user(unsigned int total_size,
 	return ret;
 }
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 static void compat_standard_from_user(void *dst, const void *src)
 {
 	int v = *(compat_int_t *)src;
@@ -800,7 +800,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
 		return -EFAULT;
 
 	name[XT_TABLE_MAXNAMELEN-1] = '\0';
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 	if (in_compat_syscall())
 		xt_compat_lock(NFPROTO_ARP);
 #endif
@@ -808,7 +808,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
 	if (!IS_ERR(t)) {
 		struct arpt_getinfo info;
 		const struct xt_table_info *private = t->private;
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 		struct xt_table_info tmp;
 
 		if (in_compat_syscall()) {
@@ -835,7 +835,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
 		module_put(t->me);
 	} else
 		ret = PTR_ERR(t);
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 	if (in_compat_syscall())
 		xt_compat_unlock(NFPROTO_ARP);
 #endif
@@ -1044,7 +1044,7 @@ static int do_add_counters(struct net *net, sockptr_t arg, unsigned int len)
 	return ret;
 }
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 struct compat_arpt_replace {
 	char				name[XT_TABLE_MAXNAMELEN];
 	u32				valid_hooks;
@@ -1412,7 +1412,7 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, sockptr_t arg,
 
 	switch (cmd) {
 	case ARPT_SO_SET_REPLACE:
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 		if (in_compat_syscall())
 			ret = compat_do_replace(sock_net(sk), arg, len);
 		else
@@ -1444,7 +1444,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
 		break;
 
 	case ARPT_SO_GET_ENTRIES:
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 		if (in_compat_syscall())
 			ret = compat_get_entries(sock_net(sk), user, len);
 		else
@@ -1580,7 +1580,7 @@ static struct xt_target arpt_builtin_tg[] __read_mostly = {
 		.name             = XT_STANDARD_TARGET,
 		.targetsize       = sizeof(int),
 		.family           = NFPROTO_ARP,
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 		.compatsize       = sizeof(compat_int_t),
 		.compat_from_user = compat_standard_from_user,
 		.compat_to_user   = compat_standard_to_user,
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index d6caaed5dd45..13acb687c19a 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -868,7 +868,7 @@ copy_entries_to_user(unsigned int total_size,
 	return ret;
 }
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 static void compat_standard_from_user(void *dst, const void *src)
 {
 	int v = *(compat_int_t *)src;
@@ -957,7 +957,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
 		return -EFAULT;
 
 	name[XT_TABLE_MAXNAMELEN-1] = '\0';
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 	if (in_compat_syscall())
 		xt_compat_lock(AF_INET);
 #endif
@@ -965,7 +965,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
 	if (!IS_ERR(t)) {
 		struct ipt_getinfo info;
 		const struct xt_table_info *private = t->private;
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 		struct xt_table_info tmp;
 
 		if (in_compat_syscall()) {
@@ -993,7 +993,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
 		module_put(t->me);
 	} else
 		ret = PTR_ERR(t);
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 	if (in_compat_syscall())
 		xt_compat_unlock(AF_INET);
 #endif
@@ -1199,7 +1199,7 @@ do_add_counters(struct net *net, sockptr_t arg, unsigned int len)
 	return ret;
 }
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 struct compat_ipt_replace {
 	char			name[XT_TABLE_MAXNAMELEN];
 	u32			valid_hooks;
@@ -1621,7 +1621,7 @@ do_ipt_set_ctl(struct sock *sk, int cmd, sockptr_t arg, unsigned int len)
 
 	switch (cmd) {
 	case IPT_SO_SET_REPLACE:
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 		if (in_compat_syscall())
 			ret = compat_do_replace(sock_net(sk), arg, len);
 		else
@@ -1654,7 +1654,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		break;
 
 	case IPT_SO_GET_ENTRIES:
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 		if (in_compat_syscall())
 			ret = compat_get_entries(sock_net(sk), user, len);
 		else
@@ -1846,7 +1846,7 @@ static struct xt_target ipt_builtin_tg[] __read_mostly = {
 		.name             = XT_STANDARD_TARGET,
 		.targetsize       = sizeof(int),
 		.family           = NFPROTO_IPV4,
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 		.compatsize       = sizeof(compat_int_t),
 		.compat_from_user = compat_standard_from_user,
 		.compat_to_user   = compat_standard_to_user,
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index a8b980ad11d4..8f7ca67475b7 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -541,7 +541,7 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
 	nf_ct_netns_put(par->net, par->family);
 }
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 struct compat_ipt_clusterip_tgt_info
 {
 	u_int32_t	flags;
@@ -553,7 +553,7 @@ struct compat_ipt_clusterip_tgt_info
 	u_int32_t	hash_initval;
 	compat_uptr_t	config;
 };
-#endif /* CONFIG_COMPAT */
+#endif /* CONFIG_NETFILTER_XTABLES_COMPAT */
 
 static struct xt_target clusterip_tg_reg __read_mostly = {
 	.name		= "CLUSTERIP",
@@ -563,9 +563,9 @@ static struct xt_target clusterip_tg_reg __read_mostly = {
 	.destroy	= clusterip_tg_destroy,
 	.targetsize	= sizeof(struct ipt_clusterip_tgt_info),
 	.usersize	= offsetof(struct ipt_clusterip_tgt_info, config),
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 	.compatsize	= sizeof(struct compat_ipt_clusterip_tgt_info),
-#endif /* CONFIG_COMPAT */
+#endif /* CONFIG_NETFILTER_XTABLES_COMPAT */
 	.me		= THIS_MODULE
 };
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index e763716ffa25..e810a23baf99 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -884,7 +884,7 @@ copy_entries_to_user(unsigned int total_size,
 	return ret;
 }
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 static void compat_standard_from_user(void *dst, const void *src)
 {
 	int v = *(compat_int_t *)src;
@@ -973,7 +973,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
 		return -EFAULT;
 
 	name[XT_TABLE_MAXNAMELEN-1] = '\0';
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 	if (in_compat_syscall())
 		xt_compat_lock(AF_INET6);
 #endif
@@ -981,7 +981,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
 	if (!IS_ERR(t)) {
 		struct ip6t_getinfo info;
 		const struct xt_table_info *private = t->private;
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 		struct xt_table_info tmp;
 
 		if (in_compat_syscall()) {
@@ -1009,7 +1009,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
 		module_put(t->me);
 	} else
 		ret = PTR_ERR(t);
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 	if (in_compat_syscall())
 		xt_compat_unlock(AF_INET6);
 #endif
@@ -1215,7 +1215,7 @@ do_add_counters(struct net *net, sockptr_t arg, unsigned int len)
 	return ret;
 }
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 struct compat_ip6t_replace {
 	char			name[XT_TABLE_MAXNAMELEN];
 	u32			valid_hooks;
@@ -1630,7 +1630,7 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, sockptr_t arg, unsigned int len)
 
 	switch (cmd) {
 	case IP6T_SO_SET_REPLACE:
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 		if (in_compat_syscall())
 			ret = compat_do_replace(sock_net(sk), arg, len);
 		else
@@ -1663,7 +1663,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		break;
 
 	case IP6T_SO_GET_ENTRIES:
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 		if (in_compat_syscall())
 			ret = compat_get_entries(sock_net(sk), user, len);
 		else
@@ -1853,7 +1853,7 @@ static struct xt_target ip6t_builtin_tg[] __read_mostly = {
 		.name             = XT_STANDARD_TARGET,
 		.targetsize       = sizeof(int),
 		.family           = NFPROTO_IPV6,
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 		.compatsize       = sizeof(compat_int_t),
 		.compat_from_user = compat_standard_from_user,
 		.compat_to_user   = compat_standard_to_user,
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index fcd8682704c4..56a2531a3402 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -728,6 +728,16 @@ config NETFILTER_XTABLES
 
 if NETFILTER_XTABLES
 
+config NETFILTER_XTABLES_COMPAT
+	bool "Netfilter Xtables 32bit support"
+	depends on COMPAT
+	default y
+	help
+	   This option provides a translation layer to run 32bit arp,ip(6),ebtables
+	   binaries on 64bit kernels.
+
+	   If unsure, say N.
+
 comment "Xtables combined modules"
 
 config NETFILTER_XT_MARK
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index ef37deff8405..84e58ee501a4 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -52,7 +52,7 @@ struct xt_af {
 	struct mutex mutex;
 	struct list_head match;
 	struct list_head target;
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 	struct mutex compat_mutex;
 	struct compat_delta *compat_tab;
 	unsigned int number; /* number of slots in compat_tab[] */
@@ -647,7 +647,7 @@ static bool error_tg_ok(unsigned int usersize, unsigned int kernsize,
 	return usersize == kernsize && strnlen(msg, msglen) < msglen;
 }
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta)
 {
 	struct xt_af *xp = &xt[af];
@@ -850,7 +850,7 @@ int xt_compat_check_entry_offsets(const void *base, const char *elems,
 				    __alignof__(struct compat_xt_entry_match));
 }
 EXPORT_SYMBOL(xt_compat_check_entry_offsets);
-#endif /* CONFIG_COMPAT */
+#endif /* CONFIG_NETFILTER_XTABLES_COMPAT */
 
 /**
  * xt_check_entry_offsets - validate arp/ip/ip6t_entry
@@ -868,7 +868,7 @@ EXPORT_SYMBOL(xt_compat_check_entry_offsets);
  * match structures are aligned, and that the last structure ends where
  * the target structure begins.
  *
- * Also see xt_compat_check_entry_offsets for CONFIG_COMPAT version.
+ * Also see xt_compat_check_entry_offsets for CONFIG_NETFILTER_XTABLES_COMPAT version.
  *
  * The arp/ip/ip6t_entry structure @base must have passed following tests:
  * - it must point to a valid memory location
@@ -1059,7 +1059,7 @@ void *xt_copy_counters(sockptr_t arg, unsigned int len,
 	void *mem;
 	u64 size;
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 	if (in_compat_syscall()) {
 		/* structures only differ in size due to alignment */
 		struct compat_xt_counters_info compat_tmp;
@@ -1106,7 +1106,7 @@ void *xt_copy_counters(sockptr_t arg, unsigned int len,
 }
 EXPORT_SYMBOL_GPL(xt_copy_counters);
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 int xt_compat_target_offset(const struct xt_target *target)
 {
 	u_int16_t csize = target->compatsize ? : target->targetsize;
@@ -1293,7 +1293,7 @@ void xt_table_unlock(struct xt_table *table)
 }
 EXPORT_SYMBOL_GPL(xt_table_unlock);
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 void xt_compat_lock(u_int8_t af)
 {
 	mutex_lock(&xt[af].compat_mutex);
@@ -1931,7 +1931,7 @@ static int __init xt_init(void)
 
 	for (i = 0; i < NFPROTO_NUMPROTO; i++) {
 		mutex_init(&xt[i].mutex);
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 		mutex_init(&xt[i].compat_mutex);
 		xt[i].compat_tab = NULL;
 #endif
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index bd1dea9c7b88..24d4afb9988d 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -134,7 +134,7 @@ static void limit_mt_destroy(const struct xt_mtdtor_param *par)
 	kfree(info->master);
 }
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 struct compat_xt_rateinfo {
 	u_int32_t avg;
 	u_int32_t burst;
@@ -176,7 +176,7 @@ static int limit_mt_compat_to_user(void __user *dst, const void *src)
 	};
 	return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
 }
-#endif /* CONFIG_COMPAT */
+#endif /* CONFIG_NETFILTER_XTABLES_COMPAT */
 
 static struct xt_match limit_mt_reg __read_mostly = {
 	.name             = "limit",
@@ -186,7 +186,7 @@ static struct xt_match limit_mt_reg __read_mostly = {
 	.checkentry       = limit_mt_check,
 	.destroy          = limit_mt_destroy,
 	.matchsize        = sizeof(struct xt_rateinfo),
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 	.compatsize       = sizeof(struct compat_xt_rateinfo),
 	.compat_from_user = limit_mt_compat_from_user,
 	.compat_to_user   = limit_mt_compat_to_user,
-- 
cgit v1.2.3


From 930d2d619d0a341693af4a7db9b37b96434ac65e Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Sun, 25 Apr 2021 18:35:18 +0800
Subject: pcnet32: Remove redundant variable prev_link and curr_link

Variable prev_link and curr_link is being assigned a value from a
calculation however the variable is never read, so this redundant
variable can be removed.

Cleans up the following clang-analyzer warning:

drivers/net/ethernet/amd/pcnet32.c:2857:4: warning: Value stored to
'prev_link' is never read [clang-analyzer-deadcode.DeadStores].

drivers/net/ethernet/amd/pcnet32.c:2856:4: warning: Value stored to
'curr_link' is never read [clang-analyzer-deadcode.DeadStores].

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/pcnet32.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c
index f78daba60b35..aa412506832d 100644
--- a/drivers/net/ethernet/amd/pcnet32.c
+++ b/drivers/net/ethernet/amd/pcnet32.c
@@ -2853,8 +2853,7 @@ static void pcnet32_check_media(struct net_device *dev, int verbose)
 			netif_info(lp, link, dev, "link down\n");
 		}
 		if (lp->phycount > 1) {
-			curr_link = pcnet32_check_otherphy(dev);
-			prev_link = 0;
+			pcnet32_check_otherphy(dev);
 		}
 	} else if (verbose || !prev_link) {
 		netif_carrier_on(dev);
-- 
cgit v1.2.3


From 9176e38027195346f50ab885498678ca7ae55a21 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Sun, 25 Apr 2021 18:42:56 +0800
Subject: net: davicom: Remove redundant assignment to ret

Variable ret is set to zero but this value is never read as it is
overwritten with a new value later on, hence it is a redundant
assignment and can be removed.

Cleans up the following clang-analyzer warning:

drivers/net/ethernet/davicom/dm9000.c:1527:5: warning: Value stored to
'ret' is never read [clang-analyzer-deadcode.DeadStores].

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/davicom/dm9000.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index e7f7121821be..2a8bf53c2f75 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -1522,7 +1522,6 @@ dm9000_probe(struct platform_device *pdev)
 			if (ret) {
 				dev_err(db->dev, "irq %d cannot set wakeup (%d)\n",
 					db->irq_wake, ret);
-				ret = 0;
 			} else {
 				irq_set_irq_wake(db->irq_wake, 0);
 				db->wake_supported = 1;
-- 
cgit v1.2.3


From 2ad5692db72874f02b9ad551d26345437ea4f7f3 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 26 Apr 2021 10:11:49 +0200
Subject: net: hso: fix NULL-deref on disconnect regression

Commit 8a12f8836145 ("net: hso: fix null-ptr-deref during tty device
unregistration") fixed the racy minor allocation reported by syzbot, but
introduced an unconditional NULL-pointer dereference on every disconnect
instead.

Specifically, the serial device table must no longer be accessed after
the minor has been released by hso_serial_tty_unregister().

Fixes: 8a12f8836145 ("net: hso: fix null-ptr-deref during tty device unregistration")
Cc: stable@vger.kernel.org
Cc: Anirudh Rayabharam <mail@anirudhrb.com>
Reported-by: Leonardo Antoniazzi <leoanto@aruba.it>
Signed-off-by: Johan Hovold <johan@kernel.org>
Reviewed-by: Anirudh Rayabharam <mail@anirudhrb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/hso.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index 9bc58e64b5b7..3ef4b2841402 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -3104,7 +3104,7 @@ static void hso_free_interface(struct usb_interface *interface)
 			cancel_work_sync(&serial_table[i]->async_put_intf);
 			cancel_work_sync(&serial_table[i]->async_get_intf);
 			hso_serial_tty_unregister(serial);
-			kref_put(&serial_table[i]->ref, hso_serial_ref_free);
+			kref_put(&serial->parent->ref, hso_serial_ref_free);
 		}
 	}
 
-- 
cgit v1.2.3


From b9460dd84aa6f160995459c7f766b05c74b219db Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Sun, 25 Apr 2021 23:41:16 -0300
Subject: arm64: dts: rockchip: Remove unnecessary reset in rk3328.dtsi

Rockchip DWMAC glue driver uses the phy node (phy-handle)
reset specifier, and not a "mac-phy" reset specifier.

Remove it.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Reviewed-by: David Wu <david.wu@rock-chips.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm64/boot/dts/rockchip/rk3328.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
index 063ed0adbec4..9709d659a5fc 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi
@@ -919,8 +919,8 @@
 			      "mac_clk_tx", "clk_mac_ref",
 			      "aclk_mac", "pclk_mac",
 			      "clk_macphy";
-		resets = <&cru SRST_GMAC2PHY_A>, <&cru SRST_MACPHY>;
-		reset-names = "stmmaceth", "mac-phy";
+		resets = <&cru SRST_GMAC2PHY_A>;
+		reset-names = "stmmaceth";
 		phy-mode = "rmii";
 		phy-handle = <&phy>;
 		snps,txpbl = <0x4>;
-- 
cgit v1.2.3


From 517a882aa2b586b5c1b3cf9b1dec1593d191776d Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Sun, 25 Apr 2021 23:41:17 -0300
Subject: dt-bindings: net: dwmac: Add Rockchip DWMAC support

Add Rockchip DWMAC controllers, which are based on snps,dwmac.
Some of the SoCs require up to eight clocks, so maxItems
for clocks and clock-names need to be increased.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Reviewed-by: David Wu <david.wu@rock-chips.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/snps,dwmac.yaml | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index 0642b0f59491..2edd8bea993e 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -56,6 +56,15 @@ properties:
         - amlogic,meson8m2-dwmac
         - amlogic,meson-gxbb-dwmac
         - amlogic,meson-axg-dwmac
+        - rockchip,px30-gmac
+        - rockchip,rk3128-gmac
+        - rockchip,rk3228-gmac
+        - rockchip,rk3288-gmac
+        - rockchip,rk3328-gmac
+        - rockchip,rk3366-gmac
+        - rockchip,rk3368-gmac
+        - rockchip,rk3399-gmac
+        - rockchip,rv1108-gmac
         - snps,dwmac
         - snps,dwmac-3.50a
         - snps,dwmac-3.610
@@ -89,7 +98,7 @@ properties:
 
   clocks:
     minItems: 1
-    maxItems: 5
+    maxItems: 8
     additionalItems: true
     items:
       - description: GMAC main clock
@@ -101,7 +110,7 @@ properties:
 
   clock-names:
     minItems: 1
-    maxItems: 5
+    maxItems: 8
     additionalItems: true
     contains:
       enum:
-- 
cgit v1.2.3


From b331b8ef86f07276a9acb78f10bd5538a29d5546 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Sun, 25 Apr 2021 23:41:18 -0300
Subject: dt-bindings: net: convert rockchip-dwmac to json-schema

Convert Rockchip dwmac controller dt-bindings to YAML.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Reviewed-by: David Wu <david.wu@rock-chips.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/rockchip-dwmac.txt     |  76 -------------
 .../devicetree/bindings/net/rockchip-dwmac.yaml    | 120 +++++++++++++++++++++
 2 files changed, 120 insertions(+), 76 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/net/rockchip-dwmac.txt
 create mode 100644 Documentation/devicetree/bindings/net/rockchip-dwmac.yaml

diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
deleted file mode 100644
index 3b71da7e8742..000000000000
--- a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
+++ /dev/null
@@ -1,76 +0,0 @@
-Rockchip SoC RK3288 10/100/1000 Ethernet driver(GMAC)
-
-The device node has following properties.
-
-Required properties:
- - compatible: should be "rockchip,<name>-gamc"
-   "rockchip,px30-gmac":   found on PX30 SoCs
-   "rockchip,rk3128-gmac": found on RK312x SoCs
-   "rockchip,rk3228-gmac": found on RK322x SoCs
-   "rockchip,rk3288-gmac": found on RK3288 SoCs
-   "rockchip,rk3328-gmac": found on RK3328 SoCs
-   "rockchip,rk3366-gmac": found on RK3366 SoCs
-   "rockchip,rk3368-gmac": found on RK3368 SoCs
-   "rockchip,rk3399-gmac": found on RK3399 SoCs
-   "rockchip,rv1108-gmac": found on RV1108 SoCs
- - reg: addresses and length of the register sets for the device.
- - interrupts: Should contain the GMAC interrupts.
- - interrupt-names: Should contain the interrupt names "macirq".
- - rockchip,grf: phandle to the syscon grf used to control speed and mode.
- - clocks: <&cru SCLK_MAC>: clock selector for main clock, from PLL or PHY.
-	   <&cru SCLK_MAC_PLL>: PLL clock for SCLK_MAC
-	   <&cru SCLK_MAC_RX>: clock gate for RX
-	   <&cru SCLK_MAC_TX>: clock gate for TX
-	   <&cru SCLK_MACREF>: clock gate for RMII referce clock
-	   <&cru SCLK_MACREF_OUT> clock gate for RMII reference clock output
-	   <&cru ACLK_GMAC>: AXI clock gate for GMAC
-	   <&cru PCLK_GMAC>: APB clock gate for GMAC
- - clock-names: One name for each entry in the clocks property.
- - phy-mode: See ethernet.txt file in the same directory.
- - pinctrl-names: Names corresponding to the numbered pinctrl states.
- - pinctrl-0: pin-control mode. can be <&rgmii_pins> or <&rmii_pins>.
- - clock_in_out: For RGMII, it must be "input", means main clock(125MHz)
-   is not sourced from SoC's PLL, but input from PHY; For RMII, "input" means
-   PHY provides the reference clock(50MHz), "output" means GMAC provides the
-   reference clock.
- - snps,reset-gpio       gpio number for phy reset.
- - snps,reset-active-low boolean flag to indicate if phy reset is active low.
- - assigned-clocks: main clock, should be <&cru SCLK_MAC>;
- - assigned-clock-parents = parent of main clock.
-   can be <&ext_gmac> or <&cru SCLK_MAC_PLL>.
-
-Optional properties:
- - tx_delay: Delay value for TXD timing. Range value is 0~0x7F, 0x30 as default.
- - rx_delay: Delay value for RXD timing. Range value is 0~0x7F, 0x10 as default.
- - phy-supply: phandle to a regulator if the PHY needs one
-
-Example:
-
-gmac: ethernet@ff290000 {
-	compatible = "rockchip,rk3288-gmac";
-	reg = <0xff290000 0x10000>;
-	interrupts = <GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>;
-	interrupt-names = "macirq";
-	rockchip,grf = <&grf>;
-	clocks = <&cru SCLK_MAC>,
-		<&cru SCLK_MAC_RX>, <&cru SCLK_MAC_TX>,
-		<&cru SCLK_MACREF>, <&cru SCLK_MACREF_OUT>,
-		<&cru ACLK_GMAC>, <&cru PCLK_GMAC>;
-	clock-names = "stmmaceth",
-		"mac_clk_rx", "mac_clk_tx",
-		"clk_mac_ref", "clk_mac_refout",
-		"aclk_mac", "pclk_mac";
-	phy-mode = "rgmii";
-	pinctrl-names = "default";
-	pinctrl-0 = <&rgmii_pins /*&rmii_pins*/>;
-
-	clock_in_out = "input";
-	snps,reset-gpio = <&gpio4 7 0>;
-	snps,reset-active-low;
-
-	assigned-clocks = <&cru SCLK_MAC>;
-	assigned-clock-parents = <&ext_gmac>;
-	tx_delay = <0x30>;
-	rx_delay = <0x10>;
-
-};
diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml
new file mode 100644
index 000000000000..5acddb6171bf
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml
@@ -0,0 +1,120 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/net/rockchip-dwmac.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Rockchip 10/100/1000 Ethernet driver(GMAC)
+
+maintainers:
+  - David Wu <david.wu@rock-chips.com>
+
+# We need a select here so we don't match all nodes with 'snps,dwmac'
+select:
+  properties:
+    compatible:
+      contains:
+        enum:
+          - rockchip,px30-gmac
+          - rockchip,rk3128-gmac
+          - rockchip,rk3228-gmac
+          - rockchip,rk3288-gmac
+          - rockchip,rk3328-gmac
+          - rockchip,rk3366-gmac
+          - rockchip,rk3368-gmac
+          - rockchip,rk3399-gmac
+          - rockchip,rv1108-gmac
+  required:
+    - compatible
+
+allOf:
+  - $ref: "snps,dwmac.yaml#"
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - rockchip,px30-gmac
+          - rockchip,rk3128-gmac
+          - rockchip,rk3228-gmac
+          - rockchip,rk3288-gmac
+          - rockchip,rk3328-gmac
+          - rockchip,rk3366-gmac
+          - rockchip,rk3368-gmac
+          - rockchip,rk3399-gmac
+          - rockchip,rv1108-gmac
+
+  clocks:
+    minItems: 5
+    maxItems: 8
+
+  clock-names:
+    contains:
+      enum:
+        - stmmaceth
+        - mac_clk_tx
+        - mac_clk_rx
+        - aclk_mac
+        - pclk_mac
+        - clk_mac_ref
+        - clk_mac_refout
+        - clk_mac_speed
+
+  clock_in_out:
+    description:
+      For RGMII, it must be "input", means main clock(125MHz)
+      is not sourced from SoC's PLL, but input from PHY.
+      For RMII, "input" means PHY provides the reference clock(50MHz),
+      "output" means GMAC provides the reference clock.
+    $ref: /schemas/types.yaml#/definitions/string
+    enum: [input, output]
+
+  rockchip,grf:
+    description: The phandle of the syscon node for the general register file.
+    $ref: /schemas/types.yaml#/definitions/phandle
+
+  tx_delay:
+    description: Delay value for TXD timing. Range value is 0~0x7F, 0x30 as default.
+    $ref: /schemas/types.yaml#/definitions/uint32
+
+  rx_delay:
+    description: Delay value for RXD timing. Range value is 0~0x7F, 0x10 as default.
+    $ref: /schemas/types.yaml#/definitions/uint32
+
+  phy-supply:
+    description: PHY regulator
+
+required:
+  - compatible
+  - clocks
+  - clock-names
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/rk3288-cru.h>
+
+    gmac: ethernet@ff290000 {
+        compatible = "rockchip,rk3288-gmac";
+        reg = <0xff290000 0x10000>;
+        interrupts = <GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>;
+        interrupt-names = "macirq";
+        clocks = <&cru SCLK_MAC>,
+                 <&cru SCLK_MAC_RX>, <&cru SCLK_MAC_TX>,
+                 <&cru SCLK_MACREF>, <&cru SCLK_MACREF_OUT>,
+                 <&cru ACLK_GMAC>, <&cru PCLK_GMAC>;
+        clock-names = "stmmaceth",
+                      "mac_clk_rx", "mac_clk_tx",
+                      "clk_mac_ref", "clk_mac_refout",
+                      "aclk_mac", "pclk_mac";
+        assigned-clocks = <&cru SCLK_MAC>;
+        assigned-clock-parents = <&ext_gmac>;
+
+        rockchip,grf = <&grf>;
+        phy-mode = "rgmii";
+        clock_in_out = "input";
+        tx_delay = <0x30>;
+        rx_delay = <0x10>;
+    };
-- 
cgit v1.2.3


From 63fa73e2151848ed5930dfe0040c823ffe1f2cc4 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@kernel.org>
Date: Mon, 26 Apr 2021 09:44:47 -0300
Subject: net: Fix typo in comment about ancillary data

Ingo sent typo fixes for tools/ and this resulted in a warning when
building the perf/core branch that will be sent upstream in the next
merge window:

  Warning: Kernel ABI header at 'tools/perf/trace/beauty/include/linux/socket.h' differs from latest version at 'include/linux/socket.h'
  diff -u tools/perf/trace/beauty/include/linux/socket.h include/linux/socket.h

Fix the typo on the kernel file to address this.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 385894b4a8bb..b8fc5c53ba6f 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -85,7 +85,7 @@ struct mmsghdr {
 
 /*
  *	POSIX 1003.1g - ancillary data object information
- *	Ancillary data consits of a sequence of pairs of
+ *	Ancillary data consists of a sequence of pairs of
  *	(cmsghdr, cmsg_data[])
  */
 
-- 
cgit v1.2.3


From f77bd544a6bbe69aa50d9ed09f13494cf36ff806 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Mon, 26 Apr 2021 17:45:51 +0200
Subject: net/sched: act_ct: fix wild memory access when clearing fragments

while testing re-assembly/re-fragmentation using act_ct, it's possible to
observe a crash like the following one:

 KASAN: maybe wild-memory-access in range [0x0001000000000448-0x000100000000044f]
 CPU: 50 PID: 0 Comm: swapper/50 Tainted: G S                5.12.0-rc7+ #424
 Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.4.3 01/17/2017
 RIP: 0010:inet_frag_rbtree_purge+0x50/0xc0
 Code: 00 fc ff df 48 89 c3 31 ed 48 89 df e8 a9 7a 38 ff 4c 89 fe 48 89 df 49 89 c6 e8 5b 3a 38 ff 48 8d 7b 40 48 89 f8 48 c1 e8 03 <42> 80 3c 20 00 75 59 48 8d bb d0 00 00 00 4c 8b 6b 40 48 89 f8 48
 RSP: 0018:ffff888c31449db8 EFLAGS: 00010203
 RAX: 0000200000000089 RBX: 000100000000040e RCX: ffffffff989eb960
 RDX: 0000000000000140 RSI: ffffffff97cfb977 RDI: 000100000000044e
 RBP: 0000000000000900 R08: 0000000000000000 R09: ffffed1186289350
 R10: 0000000000000003 R11: ffffed1186289350 R12: dffffc0000000000
 R13: 000100000000040e R14: 0000000000000000 R15: ffff888155e02160
 FS:  0000000000000000(0000) GS:ffff888c31440000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00005600cb70a5b8 CR3: 0000000a2c014005 CR4: 00000000003706e0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
 Call Trace:
  <IRQ>
  inet_frag_destroy+0xa9/0x150
  call_timer_fn+0x2d/0x180
  run_timer_softirq+0x4fe/0xe70
  __do_softirq+0x197/0x5a0
  irq_exit_rcu+0x1de/0x200
  sysvec_apic_timer_interrupt+0x6b/0x80
  </IRQ>

when act_ct temporarily stores an IP fragment, restoring the skb qdisc cb
results in putting random data in FRAG_CB(), and this causes those "wild"
memory accesses later, when the rbtree is purged. Never overwrite the skb
cb in case tcf_ct_handle_fragments() returns -EINPROGRESS.

Fixes: ae372cb1750f ("net/sched: act_ct: fix restore the qdisc_skb_cb after defrag")
Fixes: 7baf2429a1a9 ("net/sched: cls_flower add CT_FLAGS_INVALID flag support")
Reported-by: Shuang Li <shuali@redhat.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_ct.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 16e888a9601d..48fdf7293dea 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -732,7 +732,8 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
 #endif
 	}
 
-	*qdisc_skb_cb(skb) = cb;
+	if (err != -EINPROGRESS)
+		*qdisc_skb_cb(skb) = cb;
 	skb_clear_hash(skb);
 	skb->ignore_df = 1;
 	return err;
@@ -967,7 +968,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
 	err = tcf_ct_handle_fragments(net, skb, family, p->zone, &defrag);
 	if (err == -EINPROGRESS) {
 		retval = TC_ACT_STOLEN;
-		goto out;
+		goto out_clear;
 	}
 	if (err)
 		goto drop;
@@ -1030,7 +1031,6 @@ do_nat:
 out_push:
 	skb_push_rcsum(skb, nh_ofs);
 
-out:
 	qdisc_skb_cb(skb)->post_ct = true;
 out_clear:
 	tcf_action_update_bstats(&c->common, skb);
-- 
cgit v1.2.3


From 6d72e7c767acbbdd44ebc7d89c6690b405b32b57 Mon Sep 17 00:00:00 2001
From: Lv Yunlong <lyl2019@mail.ustc.edu.cn>
Date: Mon, 26 Apr 2021 09:06:25 -0700
Subject: net:emac/emac-mac: Fix a use after free in emac_mac_tx_buf_send

In emac_mac_tx_buf_send, it calls emac_tx_fill_tpd(..,skb,..).
If some error happens in emac_tx_fill_tpd(), the skb will be freed via
dev_kfree_skb(skb) in error branch of emac_tx_fill_tpd().
But the freed skb is still used via skb->len by netdev_sent_queue(,skb->len).

As i observed that emac_tx_fill_tpd() haven't modified the value of skb->len,
thus my patch assigns skb->len to 'len' before the possible free and
use 'len' instead of skb->len later.

Fixes: b9b17debc69d2 ("net: emac: emac gigabit ethernet controller driver")
Signed-off-by: Lv Yunlong <lyl2019@mail.ustc.edu.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/emac/emac-mac.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
index 117188e3c7de..87b8c032195d 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
@@ -1437,6 +1437,7 @@ netdev_tx_t emac_mac_tx_buf_send(struct emac_adapter *adpt,
 {
 	struct emac_tpd tpd;
 	u32 prod_idx;
+	int len;
 
 	memset(&tpd, 0, sizeof(tpd));
 
@@ -1456,9 +1457,10 @@ netdev_tx_t emac_mac_tx_buf_send(struct emac_adapter *adpt,
 	if (skb_network_offset(skb) != ETH_HLEN)
 		TPD_TYP_SET(&tpd, 1);
 
+	len = skb->len;
 	emac_tx_fill_tpd(adpt, tx_q, skb, &tpd);
 
-	netdev_sent_queue(adpt->netdev, skb->len);
+	netdev_sent_queue(adpt->netdev, len);
 
 	/* Make sure the are enough free descriptors to hold one
 	 * maximum-sized SKB.  We need one desc for each fragment,
-- 
cgit v1.2.3


From 7a2fa70aaffc2f8823feca22709a00f5c069a8a9 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 26 Apr 2021 12:29:45 -0700
Subject: selftests/bpf: Add remaining ASSERT_xxx() variants

Add ASSERT_TRUE/ASSERT_FALSE for conditions calculated with custom logic to
true/false. Also add remaining arithmetical assertions:
  - ASSERT_LE -- less than or equal;
  - ASSERT_GT -- greater than;
  - ASSERT_GE -- greater than or equal.
This should cover most scenarios where people fall back to error-prone
CHECK()s.

Also extend ASSERT_ERR() to print out errno, in addition to direct error.

Also convert few CHECK() instances to ensure new ASSERT_xxx() variants work as
expected. Subsequent patch will also use ASSERT_TRUE/ASSERT_FALSE more
extensively.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Lorenz Bauer <lmb@cloudflare.com>
Link: https://lore.kernel.org/bpf/20210426192949.416837-2-andrii@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/btf_dump.c  |  2 +-
 .../testing/selftests/bpf/prog_tests/btf_endian.c  |  4 +-
 .../testing/selftests/bpf/prog_tests/cgroup_link.c |  2 +-
 tools/testing/selftests/bpf/prog_tests/kfree_skb.c |  2 +-
 .../selftests/bpf/prog_tests/resolve_btfids.c      |  7 +--
 .../selftests/bpf/prog_tests/snprintf_btf.c        |  4 +-
 tools/testing/selftests/bpf/test_progs.h           | 50 +++++++++++++++++++++-
 7 files changed, 56 insertions(+), 15 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index c60091ee8a21..5e129dc2073c 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -77,7 +77,7 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t)
 
 	snprintf(out_file, sizeof(out_file), "/tmp/%s.output.XXXXXX", t->file);
 	fd = mkstemp(out_file);
-	if (CHECK(fd < 0, "create_tmp", "failed to create file: %d\n", fd)) {
+	if (!ASSERT_GE(fd, 0, "create_tmp")) {
 		err = fd;
 		goto done;
 	}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_endian.c b/tools/testing/selftests/bpf/prog_tests/btf_endian.c
index 8c52d72c876e..8ab5d3e358dd 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_endian.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_endian.c
@@ -6,8 +6,6 @@
 #include <test_progs.h>
 #include <bpf/btf.h>
 
-static int duration = 0;
-
 void test_btf_endian() {
 #if __BYTE_ORDER == __LITTLE_ENDIAN
 	enum btf_endianness endian = BTF_LITTLE_ENDIAN;
@@ -71,7 +69,7 @@ void test_btf_endian() {
 
 	/* now modify original BTF */
 	var_id = btf__add_var(btf, "some_var", BTF_VAR_GLOBAL_ALLOCATED, 1);
-	CHECK(var_id <= 0, "var_id", "failed %d\n", var_id);
+	ASSERT_GT(var_id, 0, "var_id");
 
 	btf__free(swap_btf);
 	swap_btf = NULL;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
index 4d9b514b3fd9..736796e56ed1 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
@@ -54,7 +54,7 @@ void test_cgroup_link(void)
 
 	for (i = 0; i < cg_nr; i++) {
 		cgs[i].fd = create_and_get_cgroup(cgs[i].path);
-		if (CHECK(cgs[i].fd < 0, "cg_create", "fail: %d\n", cgs[i].fd))
+		if (!ASSERT_GE(cgs[i].fd, 0, "cg_create"))
 			goto cleanup;
 	}
 
diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
index 42c3a3103c26..d65107919998 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
@@ -134,7 +134,7 @@ void test_kfree_skb(void)
 	/* make sure kfree_skb program was triggered
 	 * and it sent expected skb into ring buffer
 	 */
-	CHECK_FAIL(!passed);
+	ASSERT_TRUE(passed, "passed");
 
 	err = bpf_map_lookup_elem(bpf_map__fd(global_data), &zero, test_ok);
 	if (CHECK(err, "get_result",
diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
index 6ace5e9efec1..d3c2de2c24d1 100644
--- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
+++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
@@ -160,11 +160,8 @@ int test_resolve_btfids(void)
 			break;
 
 		if (i > 0) {
-			ret = CHECK(test_set.ids[i - 1] > test_set.ids[i],
-				    "sort_check",
-				    "test_set is not sorted\n");
-			if (ret)
-				break;
+			if (!ASSERT_LE(test_set.ids[i - 1], test_set.ids[i], "sort_check"))
+				return -1;
 		}
 	}
 
diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c
index 686b40f11a45..76e1f5fe18fa 100644
--- a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c
@@ -42,9 +42,7 @@ void test_snprintf_btf(void)
 	 * and it set expected return values from bpf_trace_printk()s
 	 * and all tests ran.
 	 */
-	if (CHECK(bss->ret <= 0,
-		  "bpf_snprintf_btf: got return value",
-		  "ret <= 0 %ld test %d\n", bss->ret, bss->ran_subtests))
+	if (!ASSERT_GT(bss->ret, 0, "bpf_snprintf_ret"))
 		goto cleanup;
 
 	if (CHECK(bss->ran_subtests == 0, "check if subtests ran",
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index ee7e3b45182a..dda52cb649dc 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -130,6 +130,20 @@ extern int test__join_cgroup(const char *path);
 #define CHECK_ATTR(condition, tag, format...) \
 	_CHECK(condition, tag, tattr.duration, format)
 
+#define ASSERT_TRUE(actual, name) ({					\
+	static int duration = 0;					\
+	bool ___ok = (actual);						\
+	CHECK(!___ok, (name), "unexpected %s: got FALSE\n", (name));	\
+	___ok;								\
+})
+
+#define ASSERT_FALSE(actual, name) ({					\
+	static int duration = 0;					\
+	bool ___ok = !(actual);						\
+	CHECK(!___ok, (name), "unexpected %s: got TRUE\n", (name));	\
+	___ok;								\
+})
+
 #define ASSERT_EQ(actual, expected, name) ({				\
 	static int duration = 0;					\
 	typeof(actual) ___act = (actual);				\
@@ -163,6 +177,39 @@ extern int test__join_cgroup(const char *path);
 	___ok;								\
 })
 
+#define ASSERT_LE(actual, expected, name) ({				\
+	static int duration = 0;					\
+	typeof(actual) ___act = (actual);				\
+	typeof(expected) ___exp = (expected);				\
+	bool ___ok = ___act <= ___exp;					\
+	CHECK(!___ok, (name),						\
+	      "unexpected %s: actual %lld > expected %lld\n",		\
+	      (name), (long long)(___act), (long long)(___exp));	\
+	___ok;								\
+})
+
+#define ASSERT_GT(actual, expected, name) ({				\
+	static int duration = 0;					\
+	typeof(actual) ___act = (actual);				\
+	typeof(expected) ___exp = (expected);				\
+	bool ___ok = ___act > ___exp;					\
+	CHECK(!___ok, (name),						\
+	      "unexpected %s: actual %lld <= expected %lld\n",		\
+	      (name), (long long)(___act), (long long)(___exp));	\
+	___ok;								\
+})
+
+#define ASSERT_GE(actual, expected, name) ({				\
+	static int duration = 0;					\
+	typeof(actual) ___act = (actual);				\
+	typeof(expected) ___exp = (expected);				\
+	bool ___ok = ___act >= ___exp;					\
+	CHECK(!___ok, (name),						\
+	      "unexpected %s: actual %lld < expected %lld\n",		\
+	      (name), (long long)(___act), (long long)(___exp));	\
+	___ok;								\
+})
+
 #define ASSERT_STREQ(actual, expected, name) ({				\
 	static int duration = 0;					\
 	const char *___act = actual;					\
@@ -178,7 +225,8 @@ extern int test__join_cgroup(const char *path);
 	static int duration = 0;					\
 	long long ___res = (res);					\
 	bool ___ok = ___res == 0;					\
-	CHECK(!___ok, (name), "unexpected error: %lld\n", ___res);	\
+	CHECK(!___ok, (name), "unexpected error: %lld (errno %d)\n",	\
+	      ___res, errno);						\
 	___ok;								\
 })
 
-- 
cgit v1.2.3


From 6709a914c8498f42b1498b3d31f4b078d092fd35 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 26 Apr 2021 12:29:46 -0700
Subject: libbpf: Support BTF_KIND_FLOAT during type compatibility checks in
 CO-RE

Add BTF_KIND_FLOAT support when doing CO-RE field type compatibility check.
Without this, relocations against float/double fields will fail.

Also adjust one error message to emit instruction index instead of less
convenient instruction byte offset.

Fixes: 22541a9eeb0d ("libbpf: Add BTF_KIND_FLOAT support")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Lorenz Bauer <lmb@cloudflare.com>
Link: https://lore.kernel.org/bpf/20210426192949.416837-3-andrii@kernel.org
---
 tools/lib/bpf/libbpf.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index a1cddd17af7d..e2a3cf437814 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -5115,6 +5115,7 @@ err_out:
  *     least one of enums should be anonymous;
  *   - for ENUMs, check sizes, names are ignored;
  *   - for INT, size and signedness are ignored;
+ *   - any two FLOATs are always compatible;
  *   - for ARRAY, dimensionality is ignored, element types are checked for
  *     compatibility recursively;
  *   - everything else shouldn't be ever a target of relocation.
@@ -5141,6 +5142,7 @@ recur:
 
 	switch (btf_kind(local_type)) {
 	case BTF_KIND_PTR:
+	case BTF_KIND_FLOAT:
 		return 1;
 	case BTF_KIND_FWD:
 	case BTF_KIND_ENUM: {
@@ -6245,8 +6247,8 @@ patch_insn:
 	/* bpf_core_patch_insn() should know how to handle missing targ_spec */
 	err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res);
 	if (err) {
-		pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n",
-			prog->name, relo_idx, relo->insn_off, err);
+		pr_warn("prog '%s': relo #%d: failed to patch insn #%zu: %d\n",
+			prog->name, relo_idx, relo->insn_off / BPF_INSN_SZ, err);
 		return -EINVAL;
 	}
 
-- 
cgit v1.2.3


From 0f20615d64ee2ad5e2a133a812382d0c4071589b Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 26 Apr 2021 12:29:47 -0700
Subject: selftests/bpf: Fix BPF_CORE_READ_BITFIELD() macro

Fix BPF_CORE_READ_BITFIELD() macro used for reading CO-RE-relocatable
bitfields. Missing breaks in a switch caused 8-byte reads always. This can
confuse libbpf because it does strict checks that memory load size corresponds
to the original size of the field, which in this case quite often would be
wrong.

After fixing that, we run into another problem, which quite subtle, so worth
documenting here. The issue is in Clang optimization and CO-RE relocation
interactions. Without that asm volatile construct (also known as
barrier_var()), Clang will re-order BYTE_OFFSET and BYTE_SIZE relocations and
will apply BYTE_OFFSET 4 times for each switch case arm. This will result in
the same error from libbpf about mismatch of memory load size and original
field size. I.e., if we were reading u32, we'd still have *(u8 *), *(u16 *),
*(u32 *), and *(u64 *) memory loads, three of which will fail. Using
barrier_var() forces Clang to apply BYTE_OFFSET relocation first (and once) to
calculate p, after which value of p is used without relocation in each of
switch case arms, doing appropiately-sized memory load.

Here's the list of relevant relocations and pieces of generated BPF code
before and after this patch for test_core_reloc_bitfields_direct selftests.

BEFORE
=====
 #45: core_reloc: insn #160 --> [5] + 0:5: byte_sz --> struct core_reloc_bitfields.u32
 #46: core_reloc: insn #167 --> [5] + 0:5: byte_off --> struct core_reloc_bitfields.u32
 #47: core_reloc: insn #174 --> [5] + 0:5: byte_off --> struct core_reloc_bitfields.u32
 #48: core_reloc: insn #178 --> [5] + 0:5: byte_off --> struct core_reloc_bitfields.u32
 #49: core_reloc: insn #182 --> [5] + 0:5: byte_off --> struct core_reloc_bitfields.u32

     157:       18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0 ll
     159:       7b 12 20 01 00 00 00 00 *(u64 *)(r2 + 288) = r1
     160:       b7 02 00 00 04 00 00 00 r2 = 4
; BYTE_SIZE relocation here                 ^^^
     161:       66 02 07 00 03 00 00 00 if w2 s> 3 goto +7 <LBB0_63>
     162:       16 02 0d 00 01 00 00 00 if w2 == 1 goto +13 <LBB0_65>
     163:       16 02 01 00 02 00 00 00 if w2 == 2 goto +1 <LBB0_66>
     164:       05 00 12 00 00 00 00 00 goto +18 <LBB0_69>

0000000000000528 <LBB0_66>:
     165:       18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll
     167:       69 11 08 00 00 00 00 00 r1 = *(u16 *)(r1 + 8)
; BYTE_OFFSET relo here w/ WRONG size        ^^^^^^^^^^^^^^^^
     168:       05 00 0e 00 00 00 00 00 goto +14 <LBB0_69>

0000000000000548 <LBB0_63>:
     169:       16 02 0a 00 04 00 00 00 if w2 == 4 goto +10 <LBB0_67>
     170:       16 02 01 00 08 00 00 00 if w2 == 8 goto +1 <LBB0_68>
     171:       05 00 0b 00 00 00 00 00 goto +11 <LBB0_69>

0000000000000560 <LBB0_68>:
     172:       18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll
     174:       79 11 08 00 00 00 00 00 r1 = *(u64 *)(r1 + 8)
; BYTE_OFFSET relo here w/ WRONG size        ^^^^^^^^^^^^^^^^
     175:       05 00 07 00 00 00 00 00 goto +7 <LBB0_69>

0000000000000580 <LBB0_65>:
     176:       18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll
     178:       71 11 08 00 00 00 00 00 r1 = *(u8 *)(r1 + 8)
; BYTE_OFFSET relo here w/ WRONG size        ^^^^^^^^^^^^^^^^
     179:       05 00 03 00 00 00 00 00 goto +3 <LBB0_69>

00000000000005a0 <LBB0_67>:
     180:       18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll
     182:       61 11 08 00 00 00 00 00 r1 = *(u32 *)(r1 + 8)
; BYTE_OFFSET relo here w/ RIGHT size        ^^^^^^^^^^^^^^^^

00000000000005b8 <LBB0_69>:
     183:       67 01 00 00 20 00 00 00 r1 <<= 32
     184:       b7 02 00 00 00 00 00 00 r2 = 0
     185:       16 02 02 00 00 00 00 00 if w2 == 0 goto +2 <LBB0_71>
     186:       c7 01 00 00 20 00 00 00 r1 s>>= 32
     187:       05 00 01 00 00 00 00 00 goto +1 <LBB0_72>

00000000000005e0 <LBB0_71>:
     188:       77 01 00 00 20 00 00 00 r1 >>= 32

AFTER
=====

 #30: core_reloc: insn #132 --> [5] + 0:5: byte_off --> struct core_reloc_bitfields.u32
 #31: core_reloc: insn #134 --> [5] + 0:5: byte_sz --> struct core_reloc_bitfields.u32

     129:       18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0 ll
     131:       7b 12 20 01 00 00 00 00 *(u64 *)(r2 + 288) = r1
     132:       b7 01 00 00 08 00 00 00 r1 = 8
; BYTE_OFFSET relo here                     ^^^
; no size check for non-memory dereferencing instructions
     133:       0f 12 00 00 00 00 00 00 r2 += r1
     134:       b7 03 00 00 04 00 00 00 r3 = 4
; BYTE_SIZE relocation here                 ^^^
     135:       66 03 05 00 03 00 00 00 if w3 s> 3 goto +5 <LBB0_63>
     136:       16 03 09 00 01 00 00 00 if w3 == 1 goto +9 <LBB0_65>
     137:       16 03 01 00 02 00 00 00 if w3 == 2 goto +1 <LBB0_66>
     138:       05 00 0a 00 00 00 00 00 goto +10 <LBB0_69>

0000000000000458 <LBB0_66>:
     139:       69 21 00 00 00 00 00 00 r1 = *(u16 *)(r2 + 0)
; NO CO-RE relocation here                   ^^^^^^^^^^^^^^^^
     140:       05 00 08 00 00 00 00 00 goto +8 <LBB0_69>

0000000000000468 <LBB0_63>:
     141:       16 03 06 00 04 00 00 00 if w3 == 4 goto +6 <LBB0_67>
     142:       16 03 01 00 08 00 00 00 if w3 == 8 goto +1 <LBB0_68>
     143:       05 00 05 00 00 00 00 00 goto +5 <LBB0_69>

0000000000000480 <LBB0_68>:
     144:       79 21 00 00 00 00 00 00 r1 = *(u64 *)(r2 + 0)
; NO CO-RE relocation here                   ^^^^^^^^^^^^^^^^
     145:       05 00 03 00 00 00 00 00 goto +3 <LBB0_69>

0000000000000490 <LBB0_65>:
     146:       71 21 00 00 00 00 00 00 r1 = *(u8 *)(r2 + 0)
; NO CO-RE relocation here                   ^^^^^^^^^^^^^^^^
     147:       05 00 01 00 00 00 00 00 goto +1 <LBB0_69>

00000000000004a0 <LBB0_67>:
     148:       61 21 00 00 00 00 00 00 r1 = *(u32 *)(r2 + 0)
; NO CO-RE relocation here                   ^^^^^^^^^^^^^^^^

00000000000004a8 <LBB0_69>:
     149:       67 01 00 00 20 00 00 00 r1 <<= 32
     150:       b7 02 00 00 00 00 00 00 r2 = 0
     151:       16 02 02 00 00 00 00 00 if w2 == 0 goto +2 <LBB0_71>
     152:       c7 01 00 00 20 00 00 00 r1 s>>= 32
     153:       05 00 01 00 00 00 00 00 goto +1 <LBB0_72>

00000000000004d0 <LBB0_71>:
     154:       77 01 00 00 20 00 00 00 r1 >>= 323

Fixes: ee26dade0e3b ("libbpf: Add support for relocatable bitfields")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Lorenz Bauer <lmb@cloudflare.com>
Link: https://lore.kernel.org/bpf/20210426192949.416837-4-andrii@kernel.org
---
 tools/lib/bpf/bpf_core_read.h | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h
index 53b3e199fb25..09ebe3db5f2f 100644
--- a/tools/lib/bpf/bpf_core_read.h
+++ b/tools/lib/bpf/bpf_core_read.h
@@ -88,11 +88,19 @@ enum bpf_enum_value_kind {
 	const void *p = (const void *)s + __CORE_RELO(s, field, BYTE_OFFSET); \
 	unsigned long long val;						      \
 									      \
+	/* This is a so-called barrier_var() operation that makes specified   \
+	 * variable "a black box" for optimizing compiler.		      \
+	 * It forces compiler to perform BYTE_OFFSET relocation on p and use  \
+	 * its calculated value in the switch below, instead of applying      \
+	 * the same relocation 4 times for each individual memory load.       \
+	 */								      \
+	asm volatile("" : "=r"(p) : "0"(p));				      \
+									      \
 	switch (__CORE_RELO(s, field, BYTE_SIZE)) {			      \
-	case 1: val = *(const unsigned char *)p;			      \
-	case 2: val = *(const unsigned short *)p;			      \
-	case 4: val = *(const unsigned int *)p;				      \
-	case 8: val = *(const unsigned long long *)p;			      \
+	case 1: val = *(const unsigned char *)p; break;			      \
+	case 2: val = *(const unsigned short *)p; break;		      \
+	case 4: val = *(const unsigned int *)p; break;			      \
+	case 8: val = *(const unsigned long long *)p; break;		      \
 	}								      \
 	val <<= __CORE_RELO(s, field, LSHIFT_U64);			      \
 	if (__CORE_RELO(s, field, SIGNED))				      \
-- 
cgit v1.2.3


From 5a30eb23922b52f33222c6729b6b3ff1c37a6c66 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 26 Apr 2021 12:29:48 -0700
Subject: selftests/bpf: Fix field existence CO-RE reloc tests

Negative field existence cases for have a broken assumption that FIELD_EXISTS
CO-RE relo will fail for fields that match the name but have incompatible type
signature. That's not how CO-RE relocations generally behave. Types and fields
that match by name but not by expected type are treated as non-matching
candidates and are skipped. Error later is reported if no matching candidate
was found. That's what happens for most relocations, but existence relocations
(FIELD_EXISTS and TYPE_EXISTS) are more permissive and they are designed to
return 0 or 1, depending if a match is found. This allows to handle
name-conflicting but incompatible types in BPF code easily. Combined with
___flavor suffixes, it's possible to handle pretty much any structural type
changes in kernel within the compiled once BPF source code.

So, long story short, negative field existence test cases are invalid in their
assumptions, so this patch reworks them into a single consolidated positive
case that doesn't match any of the fields.

Fixes: c7566a69695c ("selftests/bpf: Add field existence CO-RE relocs tests")
Reported-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Lorenz Bauer <lmb@cloudflare.com>
Link: https://lore.kernel.org/bpf/20210426192949.416837-5-andrii@kernel.org
---
 .../testing/selftests/bpf/prog_tests/core_reloc.c  | 31 +++++++++++++---------
 ...tf__core_reloc_existence___err_wrong_arr_kind.c |  3 ---
 ...re_reloc_existence___err_wrong_arr_value_type.c |  3 ---
 ...tf__core_reloc_existence___err_wrong_int_kind.c |  3 ---
 .../btf__core_reloc_existence___err_wrong_int_sz.c |  3 ---
 ...tf__core_reloc_existence___err_wrong_int_type.c |  3 ---
 ..._core_reloc_existence___err_wrong_struct_type.c |  3 ---
 .../btf__core_reloc_existence___wrong_field_defs.c |  3 +++
 .../testing/selftests/bpf/progs/core_reloc_types.h | 20 ++------------
 9 files changed, 24 insertions(+), 48 deletions(-)
 delete mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c
 delete mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c
 delete mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c
 delete mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c
 delete mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c
 delete mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c
 create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_existence___wrong_field_defs.c

diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index d94dcead72e6..385fd7696a2e 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -210,11 +210,6 @@ static int duration = 0;
 	.bpf_obj_file = "test_core_reloc_existence.o",			\
 	.btf_src_file = "btf__core_reloc_" #name ".o"			\
 
-#define FIELD_EXISTS_ERR_CASE(name) {					\
-	FIELD_EXISTS_CASE_COMMON(name),					\
-	.fails = true,							\
-}
-
 #define BITFIELDS_CASE_COMMON(objfile, test_name_prefix,  name)		\
 	.case_name = test_name_prefix#name,				\
 	.bpf_obj_file = objfile,					\
@@ -643,13 +638,25 @@ static struct core_reloc_test_case test_cases[] = {
 		},
 		.output_len = sizeof(struct core_reloc_existence_output),
 	},
-
-	FIELD_EXISTS_ERR_CASE(existence__err_int_sz),
-	FIELD_EXISTS_ERR_CASE(existence__err_int_type),
-	FIELD_EXISTS_ERR_CASE(existence__err_int_kind),
-	FIELD_EXISTS_ERR_CASE(existence__err_arr_kind),
-	FIELD_EXISTS_ERR_CASE(existence__err_arr_value_type),
-	FIELD_EXISTS_ERR_CASE(existence__err_struct_type),
+	{
+		FIELD_EXISTS_CASE_COMMON(existence___wrong_field_defs),
+		.input = STRUCT_TO_CHAR_PTR(core_reloc_existence___wrong_field_defs) {
+		},
+		.input_len = sizeof(struct core_reloc_existence___wrong_field_defs),
+		.output = STRUCT_TO_CHAR_PTR(core_reloc_existence_output) {
+			.a_exists = 0,
+			.b_exists = 0,
+			.c_exists = 0,
+			.arr_exists = 0,
+			.s_exists = 0,
+			.a_value = 0xff000001u,
+			.b_value = 0xff000002u,
+			.c_value = 0xff000003u,
+			.arr_value = 0xff000004u,
+			.s_value = 0xff000005u,
+		},
+		.output_len = sizeof(struct core_reloc_existence_output),
+	},
 
 	/* bitfield relocation checks */
 	BITFIELDS_CASE(bitfields, {
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c
deleted file mode 100644
index dd0ffa518f36..000000000000
--- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "core_reloc_types.h"
-
-void f(struct core_reloc_existence___err_wrong_arr_kind x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c
deleted file mode 100644
index bc83372088ad..000000000000
--- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "core_reloc_types.h"
-
-void f(struct core_reloc_existence___err_wrong_arr_value_type x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c
deleted file mode 100644
index 917bec41be08..000000000000
--- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "core_reloc_types.h"
-
-void f(struct core_reloc_existence___err_wrong_int_kind x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c
deleted file mode 100644
index 6ec7e6ec1c91..000000000000
--- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "core_reloc_types.h"
-
-void f(struct core_reloc_existence___err_wrong_int_sz x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c
deleted file mode 100644
index 7bbcacf2b0d1..000000000000
--- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "core_reloc_types.h"
-
-void f(struct core_reloc_existence___err_wrong_int_type x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c
deleted file mode 100644
index f384dd38ec70..000000000000
--- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "core_reloc_types.h"
-
-void f(struct core_reloc_existence___err_wrong_struct_type x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___wrong_field_defs.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___wrong_field_defs.c
new file mode 100644
index 000000000000..d14b496190c3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___wrong_field_defs.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_existence___wrong_field_defs x) {}
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index 9982eb969048..c95c0cabe951 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -700,27 +700,11 @@ struct core_reloc_existence___minimal {
 	int a;
 };
 
-struct core_reloc_existence___err_wrong_int_sz {
-	short a;
-};
-
-struct core_reloc_existence___err_wrong_int_type {
+struct core_reloc_existence___wrong_field_defs {
+	void *a;
 	int b[1];
-};
-
-struct core_reloc_existence___err_wrong_int_kind {
 	struct{ int x; } c;
-};
-
-struct core_reloc_existence___err_wrong_arr_kind {
 	int arr;
-};
-
-struct core_reloc_existence___err_wrong_arr_value_type {
-	short arr[1];
-};
-
-struct core_reloc_existence___err_wrong_struct_type {
 	int s;
 };
 
-- 
cgit v1.2.3


From bede0ebf0be87e9678103486a77f39e0334c6791 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 26 Apr 2021 12:29:49 -0700
Subject: selftests/bpf: Fix core_reloc test runner

Fix failed tests checks in core_reloc test runner, which allowed failing tests
to pass quietly. Also add extra check to make sure that expected to fail test cases with
invalid names are caught as test failure anyway, as this is not an expected
failure mode. Also fix mislabeled probed vs direct bitfield test cases.

Fixes: 124a892d1c41 ("selftests/bpf: Test TYPE_EXISTS and TYPE_SIZE CO-RE relocations")
Reported-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Lorenz Bauer <lmb@cloudflare.com>
Link: https://lore.kernel.org/bpf/20210426192949.416837-6-andrii@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/core_reloc.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 385fd7696a2e..607710826dca 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -217,7 +217,7 @@ static int duration = 0;
 
 #define BITFIELDS_CASE(name, ...) {					\
 	BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.o",	\
-			      "direct:", name),				\
+			      "probed:", name),				\
 	.input = STRUCT_TO_CHAR_PTR(core_reloc_##name) __VA_ARGS__,	\
 	.input_len = sizeof(struct core_reloc_##name),			\
 	.output = STRUCT_TO_CHAR_PTR(core_reloc_bitfields_output)	\
@@ -225,7 +225,7 @@ static int duration = 0;
 	.output_len = sizeof(struct core_reloc_bitfields_output),	\
 }, {									\
 	BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o",	\
-			      "probed:", name),				\
+			      "direct:", name),				\
 	.input = STRUCT_TO_CHAR_PTR(core_reloc_##name) __VA_ARGS__,	\
 	.input_len = sizeof(struct core_reloc_##name),			\
 	.output = STRUCT_TO_CHAR_PTR(core_reloc_bitfields_output)	\
@@ -546,8 +546,7 @@ static struct core_reloc_test_case test_cases[] = {
 	ARRAYS_ERR_CASE(arrays___err_too_small),
 	ARRAYS_ERR_CASE(arrays___err_too_shallow),
 	ARRAYS_ERR_CASE(arrays___err_non_array),
-	ARRAYS_ERR_CASE(arrays___err_wrong_val_type1),
-	ARRAYS_ERR_CASE(arrays___err_wrong_val_type2),
+	ARRAYS_ERR_CASE(arrays___err_wrong_val_type),
 	ARRAYS_ERR_CASE(arrays___err_bad_zero_sz_arr),
 
 	/* enum/ptr/int handling scenarios */
@@ -865,13 +864,20 @@ void test_core_reloc(void)
 			  "prog '%s' not found\n", probe_name))
 			goto cleanup;
 
+
+		if (test_case->btf_src_file) {
+			err = access(test_case->btf_src_file, R_OK);
+			if (!ASSERT_OK(err, "btf_src_file"))
+				goto cleanup;
+		}
+
 		load_attr.obj = obj;
 		load_attr.log_level = 0;
 		load_attr.target_btf_path = test_case->btf_src_file;
 		err = bpf_object__load_xattr(&load_attr);
 		if (err) {
 			if (!test_case->fails)
-				CHECK(false, "obj_load", "failed to load prog '%s': %d\n", probe_name, err);
+				ASSERT_OK(err, "obj_load");
 			goto cleanup;
 		}
 
@@ -910,10 +916,8 @@ void test_core_reloc(void)
 			goto cleanup;
 		}
 
-		if (test_case->fails) {
-			CHECK(false, "obj_load_fail", "should fail to load prog '%s'\n", probe_name);
+		if (!ASSERT_FALSE(test_case->fails, "obj_load_should_fail"))
 			goto cleanup;
-		}
 
 		equal = memcmp(data->out, test_case->output,
 			       test_case->output_len) == 0;
-- 
cgit v1.2.3


From 38d26d89b31d0766d431471572cc9b007ca19c98 Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Tue, 27 Apr 2021 13:29:58 +0200
Subject: bpf: Lock bpf_trace_printk's tmp buf before it is written to

bpf_trace_printk uses a shared static buffer to hold strings before they
are printed. A recent refactoring moved the locking of that buffer after
it gets filled by mistake.

Fixes: d9c9e4db186a ("bpf: Factorize bpf_trace_printk and bpf_seq_printf")
Reported-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Florent Revest <revest@chromium.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210427112958.773132-1-revest@chromium.org
---
 kernel/trace/bpf_trace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 2a8bcdc927c7..0e67d12a8f40 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -391,13 +391,13 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
 	if (ret < 0)
 		return ret;
 
+	raw_spin_lock_irqsave(&trace_printk_lock, flags);
 	ret = snprintf(buf, sizeof(buf), fmt, BPF_CAST_FMT_ARG(0, args, mod),
 		BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod));
 	/* snprintf() will not append null for zero-length strings */
 	if (ret == 0)
 		buf[0] = '\0';
 
-	raw_spin_lock_irqsave(&trace_printk_lock, flags);
 	trace_bpf_trace_printk(buf);
 	raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
 
-- 
cgit v1.2.3


From 10bf4e83167cc68595b85fd73bb91e8f2c086e36 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 23 Apr 2021 13:59:55 +0000
Subject: bpf: Fix propagation of 32 bit unsigned bounds from 64 bit bounds

Similarly as b02709587ea3 ("bpf: Fix propagation of 32-bit signed bounds
from 64-bit bounds."), we also need to fix the propagation of 32 bit
unsigned bounds from 64 bit counterparts. That is, really only set the
u32_{min,max}_value when /both/ {umin,umax}_value safely fit in 32 bit
space. For example, the register with a umin_value == 1 does /not/ imply
that u32_min_value is also equal to 1, since umax_value could be much
larger than 32 bit subregister can hold, and thus u32_min_value is in
the interval [0,1] instead.

Before fix, invalid tracking result of R2_w=inv1:

  [...]
  5: R0_w=inv1337 R1=ctx(id=0,off=0,imm=0) R2_w=inv(id=0) R10=fp0
  5: (35) if r2 >= 0x1 goto pc+1
  [...] // goto path
  7: R0=inv1337 R1=ctx(id=0,off=0,imm=0) R2=inv(id=0,umin_value=1) R10=fp0
  7: (b6) if w2 <= 0x1 goto pc+1
  [...] // goto path
  9: R0=inv1337 R1=ctx(id=0,off=0,imm=0) R2=inv(id=0,smin_value=-9223372036854775807,smax_value=9223372032559808513,umin_value=1,umax_value=18446744069414584321,var_off=(0x1; 0xffffffff00000000),s32_min_value=1,s32_max_value=1,u32_max_value=1) R10=fp0
  9: (bc) w2 = w2
  10: R0=inv1337 R1=ctx(id=0,off=0,imm=0) R2_w=inv1 R10=fp0
  [...]

After fix, correct tracking result of R2_w=inv(id=0,umax_value=1,var_off=(0x0; 0x1)):

  [...]
  5: R0_w=inv1337 R1=ctx(id=0,off=0,imm=0) R2_w=inv(id=0) R10=fp0
  5: (35) if r2 >= 0x1 goto pc+1
  [...] // goto path
  7: R0=inv1337 R1=ctx(id=0,off=0,imm=0) R2=inv(id=0,umin_value=1) R10=fp0
  7: (b6) if w2 <= 0x1 goto pc+1
  [...] // goto path
  9: R0=inv1337 R1=ctx(id=0,off=0,imm=0) R2=inv(id=0,smax_value=9223372032559808513,umax_value=18446744069414584321,var_off=(0x0; 0xffffffff00000001),s32_min_value=0,s32_max_value=1,u32_max_value=1) R10=fp0
  9: (bc) w2 = w2
  10: R0=inv1337 R1=ctx(id=0,off=0,imm=0) R2_w=inv(id=0,umax_value=1,var_off=(0x0; 0x1)) R10=fp0
  [...]

Thus, same issue as in b02709587ea3 holds for unsigned subregister tracking.
Also, align __reg64_bound_u32() similarly to __reg64_bound_s32() as done in
b02709587ea3 to make them uniform again.

Fixes: 3f50f132d840 ("bpf: Verifier, do explicit ALU32 bounds tracking")
Reported-by: Manfred Paul (@_manfp)
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c                               | 8 +++-----
 tools/testing/selftests/bpf/verifier/array_access.c | 2 +-
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 637462e9b6ee..9145f88b2a0a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1398,9 +1398,7 @@ static bool __reg64_bound_s32(s64 a)
 
 static bool __reg64_bound_u32(u64 a)
 {
-	if (a > U32_MIN && a < U32_MAX)
-		return true;
-	return false;
+	return a > U32_MIN && a < U32_MAX;
 }
 
 static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
@@ -1411,10 +1409,10 @@ static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
 		reg->s32_min_value = (s32)reg->smin_value;
 		reg->s32_max_value = (s32)reg->smax_value;
 	}
-	if (__reg64_bound_u32(reg->umin_value))
+	if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) {
 		reg->u32_min_value = (u32)reg->umin_value;
-	if (__reg64_bound_u32(reg->umax_value))
 		reg->u32_max_value = (u32)reg->umax_value;
+	}
 
 	/* Intersecting with the old var_off might have improved our bounds
 	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c
index 1b138cd2b187..1b1c798e9248 100644
--- a/tools/testing/selftests/bpf/verifier/array_access.c
+++ b/tools/testing/selftests/bpf/verifier/array_access.c
@@ -186,7 +186,7 @@
 	},
 	.fixup_map_hash_48b = { 3 },
 	.errstr_unpriv = "R0 leaks addr",
-	.errstr = "invalid access to map value, value_size=48 off=44 size=8",
+	.errstr = "R0 unbounded memory access",
 	.result_unpriv = REJECT,
 	.result = REJECT,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-- 
cgit v1.2.3


From bb0247807744dc93407771e13ba20af0b270ca6a Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 23 Apr 2021 11:27:27 +0200
Subject: bpf, cpumap: Bulk skb using netif_receive_skb_list

Rely on netif_receive_skb_list routine to send skbs converted from
xdp_frames in cpu_map_kthread_run in order to improve i-cache usage.
The proposed patch has been tested running xdp_redirect_cpu bpf sample
available in the kernel tree that is used to redirect UDP frames from
ixgbe driver to a cpumap entry and then to the networking stack. UDP
frames are generated using pktgen. Packets are discarded by the UDP
layer.

$ xdp_redirect_cpu  --cpu <cpu> --progname xdp_cpu_map0 --dev <eth>

bpf-next: ~2.35Mpps
bpf-next + cpumap skb-list: ~2.72Mpps

Rename drops counter in kmem_alloc_drops since now it reports just
kmem_cache_alloc_bulk failures

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Link: https://lore.kernel.org/bpf/c729f83e5d7482d9329e0f165bdbe5adcefd1510.1619169700.git.lorenzo@kernel.org
---
 kernel/bpf/cpumap.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 0cf2791d5099..5dd3e866599a 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -27,7 +27,7 @@
 #include <linux/capability.h>
 #include <trace/events/xdp.h>
 
-#include <linux/netdevice.h>   /* netif_receive_skb_core */
+#include <linux/netdevice.h>   /* netif_receive_skb_list */
 #include <linux/etherdevice.h> /* eth_type_trans */
 
 /* General idea: XDP packets getting XDP redirected to another CPU,
@@ -252,11 +252,12 @@ static int cpu_map_kthread_run(void *data)
 	 */
 	while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) {
 		struct xdp_cpumap_stats stats = {}; /* zero stats */
+		unsigned int kmem_alloc_drops = 0, sched = 0;
 		gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
-		unsigned int drops = 0, sched = 0;
 		void *frames[CPUMAP_BATCH];
 		void *skbs[CPUMAP_BATCH];
 		int i, n, m, nframes;
+		LIST_HEAD(list);
 
 		/* Release CPU reschedule checks */
 		if (__ptr_ring_empty(rcpu->queue)) {
@@ -297,7 +298,7 @@ static int cpu_map_kthread_run(void *data)
 			if (unlikely(m == 0)) {
 				for (i = 0; i < nframes; i++)
 					skbs[i] = NULL; /* effect: xdp_return_frame */
-				drops += nframes;
+				kmem_alloc_drops += nframes;
 			}
 		}
 
@@ -305,7 +306,6 @@ static int cpu_map_kthread_run(void *data)
 		for (i = 0; i < nframes; i++) {
 			struct xdp_frame *xdpf = frames[i];
 			struct sk_buff *skb = skbs[i];
-			int ret;
 
 			skb = __xdp_build_skb_from_frame(xdpf, skb,
 							 xdpf->dev_rx);
@@ -314,13 +314,13 @@ static int cpu_map_kthread_run(void *data)
 				continue;
 			}
 
-			/* Inject into network stack */
-			ret = netif_receive_skb_core(skb);
-			if (ret == NET_RX_DROP)
-				drops++;
+			list_add_tail(&skb->list, &list);
 		}
+		netif_receive_skb_list(&list);
+
 		/* Feedback loop via tracepoint */
-		trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched, &stats);
+		trace_xdp_cpumap_kthread(rcpu->map_id, n, kmem_alloc_drops,
+					 sched, &stats);
 
 		local_bh_enable(); /* resched point, may call do_softirq() */
 	}
-- 
cgit v1.2.3


From 2551c2d19c04cd1c7b6c99ec04a8ff08193b0ccc Mon Sep 17 00:00:00 2001
From: Hengqi Chen <hengqi.chen@gmail.com>
Date: Sat, 24 Apr 2021 10:12:08 +0800
Subject: bpf, docs: Fix literal block for example code

Add a missing colon so that the code block followed can be rendered
properly.

Signed-off-by: Hengqi Chen <hengqi.chen@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210424021208.832116-1-hengqi.chen@gmail.com
---
 Documentation/networking/filter.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/networking/filter.rst b/Documentation/networking/filter.rst
index 251c6bd73d15..3e2221f4abe4 100644
--- a/Documentation/networking/filter.rst
+++ b/Documentation/networking/filter.rst
@@ -327,7 +327,7 @@ Examples for low-level BPF:
   ret #-1
   drop: ret #0
 
-**icmp random packet sampling, 1 in 4**:
+**icmp random packet sampling, 1 in 4**::
 
   ldh [12]
   jne #0x800, drop
-- 
cgit v1.2.3


From f8bb7889af58d8e74d2d61c76b1418230f1610fa Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 27 Apr 2021 18:05:36 +0200
Subject: netfilter: nftables: rename set element data activation/deactivation
 functions

Rename:

- nft_set_elem_activate() to nft_set_elem_data_activate().
- nft_set_elem_deactivate() to nft_set_elem_data_deactivate().

To prepare for updates in the set element infrastructure to add support
for the special catch-all element.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 1050f23c0d29..d66be7d8f3e5 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5263,8 +5263,8 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
 }
 EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
 
-/* Only called from commit path, nft_set_elem_deactivate() already deals with
- * the refcounting from the preparation phase.
+/* Only called from commit path, nft_setelem_data_deactivate() already deals
+ * with the refcounting from the preparation phase.
  */
 static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
 				       const struct nft_set *set, void *elem)
@@ -5733,9 +5733,9 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
 	}
 }
 
-static void nft_set_elem_activate(const struct net *net,
-				  const struct nft_set *set,
-				  struct nft_set_elem *elem)
+static void nft_setelem_data_activate(const struct net *net,
+				      const struct nft_set *set,
+				      struct nft_set_elem *elem)
 {
 	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
 
@@ -5745,9 +5745,9 @@ static void nft_set_elem_activate(const struct net *net,
 		(*nft_set_ext_obj(ext))->use++;
 }
 
-static void nft_set_elem_deactivate(const struct net *net,
-				    const struct nft_set *set,
-				    struct nft_set_elem *elem)
+static void nft_setelem_data_deactivate(const struct net *net,
+					const struct nft_set *set,
+					struct nft_set_elem *elem)
 {
 	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
 
@@ -5824,7 +5824,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
 	kfree(elem.priv);
 	elem.priv = priv;
 
-	nft_set_elem_deactivate(ctx->net, set, &elem);
+	nft_setelem_data_deactivate(ctx->net, set, &elem);
 
 	nft_trans_elem(trans) = elem;
 	nft_trans_commit_list_add_tail(ctx->net, trans);
@@ -5858,7 +5858,7 @@ static int nft_flush_set(const struct nft_ctx *ctx,
 	}
 	set->ndeact++;
 
-	nft_set_elem_deactivate(ctx->net, set, elem);
+	nft_setelem_data_deactivate(ctx->net, set, elem);
 	nft_trans_elem_set(trans) = set;
 	nft_trans_elem(trans) = *elem;
 	nft_trans_commit_list_add_tail(ctx->net, trans);
@@ -8479,7 +8479,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
 		case NFT_MSG_DELSETELEM:
 			te = (struct nft_trans_elem *)trans->data;
 
-			nft_set_elem_activate(net, te->set, &te->elem);
+			nft_setelem_data_activate(net, te->set, &te->elem);
 			te->set->ops->activate(net, te->set, &te->elem);
 			te->set->ndeact--;
 
-- 
cgit v1.2.3


From 6387aa6e59be8d1158c5703f34553c93d7743d8c Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 27 Apr 2021 18:05:41 +0200
Subject: netfilter: nftables: add loop check helper function

This patch adds nft_check_loops() to reuse it in the new catch-all
element codebase.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d66be7d8f3e5..502240fbb087 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -8626,26 +8626,38 @@ EXPORT_SYMBOL_GPL(nft_chain_validate_hooks);
 static int nf_tables_check_loops(const struct nft_ctx *ctx,
 				 const struct nft_chain *chain);
 
+static int nft_check_loops(const struct nft_ctx *ctx,
+			   const struct nft_set_ext *ext)
+{
+	const struct nft_data *data;
+	int ret;
+
+	data = nft_set_ext_data(ext);
+	switch (data->verdict.code) {
+	case NFT_JUMP:
+	case NFT_GOTO:
+		ret = nf_tables_check_loops(ctx, data->verdict.chain);
+		break;
+	default:
+		ret = 0;
+		break;
+	}
+
+	return ret;
+}
+
 static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
 					struct nft_set *set,
 					const struct nft_set_iter *iter,
 					struct nft_set_elem *elem)
 {
 	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
-	const struct nft_data *data;
 
 	if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
 	    *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
 		return 0;
 
-	data = nft_set_ext_data(ext);
-	switch (data->verdict.code) {
-	case NFT_JUMP:
-	case NFT_GOTO:
-		return nf_tables_check_loops(ctx, data->verdict.chain);
-	default:
-		return 0;
-	}
+	return nft_check_loops(ctx, ext);
 }
 
 static int nf_tables_check_loops(const struct nft_ctx *ctx,
-- 
cgit v1.2.3


From e6ba7cb63b8ae0e13e6c2acc4067097c1181f6bf Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 27 Apr 2021 18:05:45 +0200
Subject: netfilter: nftables: add helper function to flush set elements

This patch adds nft_set_flush() which prepares for the catch-all
element support.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 502240fbb087..3342f260d534 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5839,10 +5839,10 @@ fail_elem:
 	return err;
 }
 
-static int nft_flush_set(const struct nft_ctx *ctx,
-			 struct nft_set *set,
-			 const struct nft_set_iter *iter,
-			 struct nft_set_elem *elem)
+static int nft_setelem_flush(const struct nft_ctx *ctx,
+			     struct nft_set *set,
+			     const struct nft_set_iter *iter,
+			     struct nft_set_elem *elem)
 {
 	struct nft_trans *trans;
 	int err;
@@ -5869,6 +5869,18 @@ err1:
 	return err;
 }
 
+static int nft_set_flush(struct nft_ctx *ctx, struct nft_set *set, u8 genmask)
+{
+	struct nft_set_iter iter = {
+		.genmask	= genmask,
+		.fn		= nft_setelem_flush,
+	};
+
+	set->ops->walk(ctx, set, &iter);
+
+	return iter.err;
+}
+
 static int nf_tables_delsetelem(struct sk_buff *skb,
 				const struct nfnl_info *info,
 				const struct nlattr * const nla[])
@@ -5892,15 +5904,8 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
 	if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
 		return -EBUSY;
 
-	if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) {
-		struct nft_set_iter iter = {
-			.genmask	= genmask,
-			.fn		= nft_flush_set,
-		};
-		set->ops->walk(&ctx, set, &iter);
-
-		return iter.err;
-	}
+	if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS])
+		return nft_set_flush(&ctx, set, genmask);
 
 	nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
 		err = nft_del_setelem(&ctx, set, attr);
-- 
cgit v1.2.3


From 97c976d662fb9080a6a5d1e1a108c7a1f5c9484d Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 27 Apr 2021 18:05:48 +0200
Subject: netfilter: nftables: add helper function to validate set element data

When binding sets to rule, validate set element data according to
set definition. This patch adds a helper function to be reused by
the catch-all set element support.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3342f260d534..faf0424375e8 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4499,10 +4499,9 @@ static int nft_validate_register_store(const struct nft_ctx *ctx,
 				       enum nft_data_types type,
 				       unsigned int len);
 
-static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
-					struct nft_set *set,
-					const struct nft_set_iter *iter,
-					struct nft_set_elem *elem)
+static int nft_setelem_data_validate(const struct nft_ctx *ctx,
+				     struct nft_set *set,
+				     struct nft_set_elem *elem)
 {
 	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
 	enum nft_registers dreg;
@@ -4514,6 +4513,14 @@ static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
 					   set->dlen);
 }
 
+static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
+					struct nft_set *set,
+					const struct nft_set_iter *iter,
+					struct nft_set_elem *elem)
+{
+	return nft_setelem_data_validate(ctx, set, elem);
+}
+
 int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
 		       struct nft_set_binding *binding)
 {
-- 
cgit v1.2.3


From aaa31047a6d25da0fa101da1ed544e1247949b40 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 27 Apr 2021 18:05:55 +0200
Subject: netfilter: nftables: add catch-all set element support

This patch extends the set infrastructure to add a special catch-all set
element. If the lookup fails to find an element (or range) in the set,
then the catch-all element is selected. Users can specify a mapping,
expression(s) and timeout to be attached to the catch-all element.

This patch adds a catchall list to the set, this list might contain more
than one single catch-all element (e.g. in case that the catch-all
element is removed and a new one is added in the same transaction).
However, most of the time, there will be either one element or no
elements at all in this list.

The catch-all element is identified via NFT_SET_ELEM_CATCHALL flag and
such special element has no NFTA_SET_ELEM_KEY attribute. There is a new
nft_set_elem_catchall object that stores a reference to the dummy
catch-all element (catchall->elem) whose layout is the same of the set
element type to reuse the existing set element codebase.

The set size does not apply to the catch-all element, users can define a
catch-all element even if the set is full.

The check for valid set element flags hava been updates to report
EOPNOTSUPP in case userspace requests flags that are not supported when
using new userspace nftables and old kernel.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        |   5 +
 include/uapi/linux/netfilter/nf_tables.h |   2 +
 net/netfilter/nf_tables_api.c            | 480 +++++++++++++++++++++++++++----
 net/netfilter/nft_lookup.c               |  12 +-
 net/netfilter/nft_objref.c               |  11 +-
 net/netfilter/nft_set_hash.c             |   6 +
 net/netfilter/nft_set_pipapo.c           |   6 +-
 net/netfilter/nft_set_rbtree.c           |   6 +
 8 files changed, 465 insertions(+), 63 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index eb708b77c4a5..27eeb613bb4e 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -497,6 +497,7 @@ struct nft_set {
 	u8				dlen;
 	u8				num_exprs;
 	struct nft_expr			*exprs[NFT_SET_EXPR_MAX];
+	struct list_head		catchall_list;
 	unsigned char			data[]
 		__attribute__((aligned(__alignof__(u64))));
 };
@@ -522,6 +523,10 @@ struct nft_set *nft_set_lookup_global(const struct net *net,
 				      const struct nlattr *nla_set_id,
 				      u8 genmask);
 
+struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
+					    const struct nft_set *set);
+void *nft_set_catchall_gc(const struct nft_set *set);
+
 static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
 {
 	return set->gc_int ? msecs_to_jiffies(set->gc_int) : HZ;
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 467365ed59a7..1fb4ca18ffbb 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -398,9 +398,11 @@ enum nft_set_attributes {
  * enum nft_set_elem_flags - nf_tables set element flags
  *
  * @NFT_SET_ELEM_INTERVAL_END: element ends the previous interval
+ * @NFT_SET_ELEM_CATCHALL: special catch-all element
  */
 enum nft_set_elem_flags {
 	NFT_SET_ELEM_INTERVAL_END	= 0x1,
+	NFT_SET_ELEM_CATCHALL		= 0x2,
 };
 
 /**
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index faf0424375e8..0b7fe0a902ff 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4389,6 +4389,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
 	}
 
 	INIT_LIST_HEAD(&set->bindings);
+	INIT_LIST_HEAD(&set->catchall_list);
 	set->table = table;
 	write_pnet(&set->net, net);
 	set->ops   = ops;
@@ -4434,6 +4435,24 @@ err_set_name:
 	return err;
 }
 
+struct nft_set_elem_catchall {
+	struct list_head	list;
+	struct rcu_head		rcu;
+	void			*elem;
+};
+
+static void nft_set_catchall_destroy(const struct nft_ctx *ctx,
+				     struct nft_set *set)
+{
+	struct nft_set_elem_catchall *catchall;
+
+	list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+		list_del_rcu(&catchall->list);
+		nft_set_elem_destroy(set, catchall->elem, true);
+		kfree_rcu(catchall);
+	}
+}
+
 static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
 {
 	int i;
@@ -4445,6 +4464,7 @@ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
 		nft_expr_destroy(ctx, set->exprs[i]);
 
 	set->ops->destroy(set);
+	nft_set_catchall_destroy(ctx, set);
 	kfree(set->name);
 	kvfree(set);
 }
@@ -4521,6 +4541,29 @@ static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
 	return nft_setelem_data_validate(ctx, set, elem);
 }
 
+static int nft_set_catchall_bind_check(const struct nft_ctx *ctx,
+				       struct nft_set *set)
+{
+	u8 genmask = nft_genmask_next(ctx->net);
+	struct nft_set_elem_catchall *catchall;
+	struct nft_set_elem elem;
+	struct nft_set_ext *ext;
+	int ret = 0;
+
+	list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+		ext = nft_set_elem_ext(set, catchall->elem);
+		if (!nft_set_elem_active(ext, genmask))
+			continue;
+
+		elem.priv = catchall->elem;
+		ret = nft_setelem_data_validate(ctx, set, &elem);
+		if (ret < 0)
+			break;
+	}
+
+	return ret;
+}
+
 int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
 		       struct nft_set_binding *binding)
 {
@@ -4550,6 +4593,9 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
 		iter.fn		= nf_tables_bind_check_setelem;
 
 		set->ops->walk(ctx, set, &iter);
+		if (!iter.err)
+			iter.err = nft_set_catchall_bind_check(ctx, set);
+
 		if (iter.err < 0)
 			return iter.err;
 	}
@@ -4736,7 +4782,8 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
 	if (nest == NULL)
 		goto nla_put_failure;
 
-	if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, nft_set_ext_key(ext),
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY) &&
+	    nft_data_dump(skb, NFTA_SET_ELEM_KEY, nft_set_ext_key(ext),
 			  NFT_DATA_VALUE, set->klen) < 0)
 		goto nla_put_failure;
 
@@ -4825,6 +4872,29 @@ struct nft_set_dump_ctx {
 	struct nft_ctx		ctx;
 };
 
+static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb,
+				 const struct nft_set *set)
+{
+	struct nft_set_elem_catchall *catchall;
+	u8 genmask = nft_genmask_cur(net);
+	struct nft_set_elem elem;
+	struct nft_set_ext *ext;
+	int ret = 0;
+
+	list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+		ext = nft_set_elem_ext(set, catchall->elem);
+		if (!nft_set_elem_active(ext, genmask) ||
+		    nft_set_elem_expired(ext))
+			continue;
+
+		elem.priv = catchall->elem;
+		ret = nf_tables_fill_setelem(skb, set, &elem);
+		break;
+	}
+
+	return ret;
+}
+
 static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct nft_set_dump_ctx *dump_ctx = cb->data;
@@ -4889,6 +4959,9 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 	args.iter.err		= 0;
 	args.iter.fn		= nf_tables_dump_setelem;
 	set->ops->walk(&dump_ctx->ctx, set, &args.iter);
+
+	if (!args.iter.err && args.iter.count == cb->args[0])
+		args.iter.err = nft_set_catchall_dump(net, skb, set);
 	rcu_read_unlock();
 
 	nla_nest_end(skb, nest);
@@ -4968,8 +5041,8 @@ static int nft_setelem_parse_flags(const struct nft_set *set,
 		return 0;
 
 	*flags = ntohl(nla_get_be32(attr));
-	if (*flags & ~NFT_SET_ELEM_INTERVAL_END)
-		return -EINVAL;
+	if (*flags & ~(NFT_SET_ELEM_INTERVAL_END | NFT_SET_ELEM_CATCHALL))
+		return -EOPNOTSUPP;
 	if (!(set->flags & NFT_SET_INTERVAL) &&
 	    *flags & NFT_SET_ELEM_INTERVAL_END)
 		return -EINVAL;
@@ -5014,6 +5087,46 @@ static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set,
 	return 0;
 }
 
+static void *nft_setelem_catchall_get(const struct net *net,
+				      const struct nft_set *set)
+{
+	struct nft_set_elem_catchall *catchall;
+	u8 genmask = nft_genmask_cur(net);
+	struct nft_set_ext *ext;
+	void *priv = NULL;
+
+	list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+		ext = nft_set_elem_ext(set, catchall->elem);
+		if (!nft_set_elem_active(ext, genmask) ||
+		    nft_set_elem_expired(ext))
+			continue;
+
+		priv = catchall->elem;
+		break;
+	}
+
+	return priv;
+}
+
+static int nft_setelem_get(struct nft_ctx *ctx, struct nft_set *set,
+			   struct nft_set_elem *elem, u32 flags)
+{
+	void *priv;
+
+	if (!(flags & NFT_SET_ELEM_CATCHALL)) {
+		priv = set->ops->get(ctx->net, set, elem, flags);
+		if (IS_ERR(priv))
+			return PTR_ERR(priv);
+	} else {
+		priv = nft_setelem_catchall_get(ctx->net, set);
+		if (!priv)
+			return -ENOENT;
+	}
+	elem->priv = priv;
+
+	return 0;
+}
+
 static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 			    const struct nlattr *attr)
 {
@@ -5021,7 +5134,6 @@ static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 	struct nft_set_elem elem;
 	struct sk_buff *skb;
 	uint32_t flags = 0;
-	void *priv;
 	int err;
 
 	err = nla_parse_nested_deprecated(nla, NFTA_SET_ELEM_MAX, attr,
@@ -5029,17 +5141,19 @@ static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 	if (err < 0)
 		return err;
 
-	if (!nla[NFTA_SET_ELEM_KEY])
-		return -EINVAL;
-
 	err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags);
 	if (err < 0)
 		return err;
 
-	err = nft_setelem_parse_key(ctx, set, &elem.key.val,
-				    nla[NFTA_SET_ELEM_KEY]);
-	if (err < 0)
-		return err;
+	if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL))
+		return -EINVAL;
+
+	if (nla[NFTA_SET_ELEM_KEY]) {
+		err = nft_setelem_parse_key(ctx, set, &elem.key.val,
+					    nla[NFTA_SET_ELEM_KEY]);
+		if (err < 0)
+			return err;
+	}
 
 	if (nla[NFTA_SET_ELEM_KEY_END]) {
 		err = nft_setelem_parse_key(ctx, set, &elem.key_end.val,
@@ -5048,11 +5162,9 @@ static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 			return err;
 	}
 
-	priv = set->ops->get(ctx->net, set, &elem, flags);
-	if (IS_ERR(priv))
-		return PTR_ERR(priv);
-
-	elem.priv = priv;
+	err = nft_setelem_get(ctx, set, &elem, flags);
+	if (err < 0)
+		return err;
 
 	err = -ENOMEM;
 	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
@@ -5212,7 +5324,8 @@ void *nft_set_elem_init(const struct nft_set *set,
 	ext = nft_set_elem_ext(set, elem);
 	nft_set_ext_init(ext, tmpl);
 
-	memcpy(nft_set_ext_key(ext), key, set->klen);
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY))
+		memcpy(nft_set_ext_key(ext), key, set->klen);
 	if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END))
 		memcpy(nft_set_ext_key_end(ext), key_end, set->klen);
 	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
@@ -5343,6 +5456,192 @@ err_elem_expr_setup:
 	return -ENOMEM;
 }
 
+struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
+					    const struct nft_set *set)
+{
+	struct nft_set_elem_catchall *catchall;
+	u8 genmask = nft_genmask_cur(net);
+	struct nft_set_ext *ext;
+
+	list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+		ext = nft_set_elem_ext(set, catchall->elem);
+		if (nft_set_elem_active(ext, genmask) &&
+		    !nft_set_elem_expired(ext))
+			return ext;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(nft_set_catchall_lookup);
+
+void *nft_set_catchall_gc(const struct nft_set *set)
+{
+	struct nft_set_elem_catchall *catchall, *next;
+	struct nft_set_ext *ext;
+	void *elem = NULL;
+
+	list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
+		ext = nft_set_elem_ext(set, catchall->elem);
+
+		if (!nft_set_elem_expired(ext) ||
+		    nft_set_elem_mark_busy(ext))
+			continue;
+
+		elem = catchall->elem;
+		list_del_rcu(&catchall->list);
+		kfree_rcu(catchall, rcu);
+		break;
+	}
+
+	return elem;
+}
+EXPORT_SYMBOL_GPL(nft_set_catchall_gc);
+
+static int nft_setelem_catchall_insert(const struct net *net,
+				       struct nft_set *set,
+				       const struct nft_set_elem *elem,
+				       struct nft_set_ext **pext)
+{
+	struct nft_set_elem_catchall *catchall;
+	u8 genmask = nft_genmask_next(net);
+	struct nft_set_ext *ext;
+
+	list_for_each_entry(catchall, &set->catchall_list, list) {
+		ext = nft_set_elem_ext(set, catchall->elem);
+		if (nft_set_elem_active(ext, genmask)) {
+			*pext = ext;
+			return -EEXIST;
+		}
+	}
+
+	catchall = kmalloc(sizeof(*catchall), GFP_KERNEL);
+	if (!catchall)
+		return -ENOMEM;
+
+	catchall->elem = elem->priv;
+	list_add_tail_rcu(&catchall->list, &set->catchall_list);
+
+	return 0;
+}
+
+static int nft_setelem_insert(const struct net *net,
+			      struct nft_set *set,
+			      const struct nft_set_elem *elem,
+			      struct nft_set_ext **ext, unsigned int flags)
+{
+	int ret;
+
+	if (flags & NFT_SET_ELEM_CATCHALL)
+		ret = nft_setelem_catchall_insert(net, set, elem, ext);
+	else
+		ret = set->ops->insert(net, set, elem, ext);
+
+	return ret;
+}
+
+static bool nft_setelem_is_catchall(const struct nft_set *set,
+				    const struct nft_set_elem *elem)
+{
+	struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
+	    *nft_set_ext_flags(ext) & NFT_SET_ELEM_CATCHALL)
+		return true;
+
+	return false;
+}
+
+static void nft_setelem_activate(struct net *net, struct nft_set *set,
+				 struct nft_set_elem *elem)
+{
+	struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+
+	if (nft_setelem_is_catchall(set, elem)) {
+		nft_set_elem_change_active(net, set, ext);
+		nft_set_elem_clear_busy(ext);
+	} else {
+		set->ops->activate(net, set, elem);
+	}
+}
+
+static int nft_setelem_catchall_deactivate(const struct net *net,
+					   struct nft_set *set,
+					   struct nft_set_elem *elem)
+{
+	struct nft_set_elem_catchall *catchall;
+	struct nft_set_ext *ext;
+
+	list_for_each_entry(catchall, &set->catchall_list, list) {
+		ext = nft_set_elem_ext(set, catchall->elem);
+		if (!nft_is_active(net, ext) ||
+		    nft_set_elem_mark_busy(ext))
+			continue;
+
+		kfree(elem->priv);
+		elem->priv = catchall->elem;
+		nft_set_elem_change_active(net, set, ext);
+		return 0;
+	}
+
+	return -ENOENT;
+}
+
+static int __nft_setelem_deactivate(const struct net *net,
+				    struct nft_set *set,
+				    struct nft_set_elem *elem)
+{
+	void *priv;
+
+	priv = set->ops->deactivate(net, set, elem);
+	if (!priv)
+		return -ENOENT;
+
+	kfree(elem->priv);
+	elem->priv = priv;
+	set->ndeact++;
+
+	return 0;
+}
+
+static int nft_setelem_deactivate(const struct net *net,
+				  struct nft_set *set,
+				  struct nft_set_elem *elem, u32 flags)
+{
+	int ret;
+
+	if (flags & NFT_SET_ELEM_CATCHALL)
+		ret = nft_setelem_catchall_deactivate(net, set, elem);
+	else
+		ret = __nft_setelem_deactivate(net, set, elem);
+
+	return ret;
+}
+
+static void nft_setelem_catchall_remove(const struct net *net,
+					const struct nft_set *set,
+					const struct nft_set_elem *elem)
+{
+	struct nft_set_elem_catchall *catchall, *next;
+
+	list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
+		if (catchall->elem == elem->priv) {
+			list_del_rcu(&catchall->list);
+			kfree_rcu(catchall);
+			break;
+		}
+	}
+}
+
+static void nft_setelem_remove(const struct net *net,
+			       const struct nft_set *set,
+			       const struct nft_set_elem *elem)
+{
+	if (nft_setelem_is_catchall(set, elem))
+		nft_setelem_catchall_remove(net, set, elem);
+	else
+		set->ops->remove(net, set, elem);
+}
+
 static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 			    const struct nlattr *attr, u32 nlmsg_flags)
 {
@@ -5369,14 +5668,15 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 	if (err < 0)
 		return err;
 
-	if (nla[NFTA_SET_ELEM_KEY] == NULL)
-		return -EINVAL;
-
 	nft_set_ext_prepare(&tmpl);
 
 	err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags);
 	if (err < 0)
 		return err;
+
+	if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL))
+		return -EINVAL;
+
 	if (flags != 0)
 		nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
 
@@ -5481,12 +5781,14 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 		num_exprs = set->num_exprs;
 	}
 
-	err = nft_setelem_parse_key(ctx, set, &elem.key.val,
-				    nla[NFTA_SET_ELEM_KEY]);
-	if (err < 0)
-		goto err_set_elem_expr;
+	if (nla[NFTA_SET_ELEM_KEY]) {
+		err = nft_setelem_parse_key(ctx, set, &elem.key.val,
+					    nla[NFTA_SET_ELEM_KEY]);
+		if (err < 0)
+			goto err_set_elem_expr;
 
-	nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
+		nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
+	}
 
 	if (nla[NFTA_SET_ELEM_KEY_END]) {
 		err = nft_setelem_parse_key(ctx, set, &elem.key_end.val,
@@ -5603,7 +5905,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 	}
 
 	ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
-	err = set->ops->insert(ctx->net, set, &elem, &ext2);
+
+	err = nft_setelem_insert(ctx->net, set, &elem, &ext2, flags);
 	if (err) {
 		if (err == -EEXIST) {
 			if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) ^
@@ -5630,7 +5933,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 		goto err_element_clash;
 	}
 
-	if (set->size &&
+	if (!(flags & NFT_SET_ELEM_CATCHALL) && set->size &&
 	    !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact)) {
 		err = -ENFILE;
 		goto err_set_full;
@@ -5641,7 +5944,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 	return 0;
 
 err_set_full:
-	set->ops->remove(ctx->net, set, &elem);
+	nft_setelem_remove(ctx->net, set, &elem);
 err_element_clash:
 	kfree(trans);
 err_elem_expr:
@@ -5773,7 +6076,6 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
 	struct nft_set_ext *ext;
 	struct nft_trans *trans;
 	u32 flags = 0;
-	void *priv;
 	int err;
 
 	err = nla_parse_nested_deprecated(nla, NFTA_SET_ELEM_MAX, attr,
@@ -5781,23 +6083,26 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
 	if (err < 0)
 		return err;
 
-	if (nla[NFTA_SET_ELEM_KEY] == NULL)
+	err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags);
+	if (err < 0)
+		return err;
+
+	if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL))
 		return -EINVAL;
 
 	nft_set_ext_prepare(&tmpl);
 
-	err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags);
-	if (err < 0)
-		return err;
 	if (flags != 0)
 		nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
 
-	err = nft_setelem_parse_key(ctx, set, &elem.key.val,
-				    nla[NFTA_SET_ELEM_KEY]);
-	if (err < 0)
-		return err;
+	if (nla[NFTA_SET_ELEM_KEY]) {
+		err = nft_setelem_parse_key(ctx, set, &elem.key.val,
+					    nla[NFTA_SET_ELEM_KEY]);
+		if (err < 0)
+			return err;
 
-	nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
+		nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
+	}
 
 	if (nla[NFTA_SET_ELEM_KEY_END]) {
 		err = nft_setelem_parse_key(ctx, set, &elem.key_end.val,
@@ -5823,13 +6128,9 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
 	if (trans == NULL)
 		goto fail_trans;
 
-	priv = set->ops->deactivate(ctx->net, set, &elem);
-	if (priv == NULL) {
-		err = -ENOENT;
+	err = nft_setelem_deactivate(ctx->net, set, &elem, flags);
+	if (err < 0)
 		goto fail_ops;
-	}
-	kfree(elem.priv);
-	elem.priv = priv;
 
 	nft_setelem_data_deactivate(ctx->net, set, &elem);
 
@@ -5876,6 +6177,49 @@ err1:
 	return err;
 }
 
+static int __nft_set_catchall_flush(const struct nft_ctx *ctx,
+				    struct nft_set *set,
+				    struct nft_set_elem *elem)
+{
+	struct nft_trans *trans;
+
+	trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM,
+				    sizeof(struct nft_trans_elem), GFP_KERNEL);
+	if (!trans)
+		return -ENOMEM;
+
+	nft_setelem_data_deactivate(ctx->net, set, elem);
+	nft_trans_elem_set(trans) = set;
+	nft_trans_elem(trans) = *elem;
+	nft_trans_commit_list_add_tail(ctx->net, trans);
+
+	return 0;
+}
+
+static int nft_set_catchall_flush(const struct nft_ctx *ctx,
+				  struct nft_set *set)
+{
+	u8 genmask = nft_genmask_next(ctx->net);
+	struct nft_set_elem_catchall *catchall;
+	struct nft_set_elem elem;
+	struct nft_set_ext *ext;
+	int ret = 0;
+
+	list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+		ext = nft_set_elem_ext(set, catchall->elem);
+		if (!nft_set_elem_active(ext, genmask) ||
+		    nft_set_elem_mark_busy(ext))
+			continue;
+
+		elem.priv = catchall->elem;
+		ret = __nft_set_catchall_flush(ctx, set, &elem);
+		if (ret < 0)
+			break;
+	}
+
+	return ret;
+}
+
 static int nft_set_flush(struct nft_ctx *ctx, struct nft_set *set, u8 genmask)
 {
 	struct nft_set_iter iter = {
@@ -5884,6 +6228,8 @@ static int nft_set_flush(struct nft_ctx *ctx, struct nft_set *set, u8 genmask)
 	};
 
 	set->ops->walk(ctx, set, &iter);
+	if (!iter.err)
+		iter.err = nft_set_catchall_flush(ctx, set);
 
 	return iter.err;
 }
@@ -5918,8 +6264,6 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
 		err = nft_del_setelem(&ctx, set, attr);
 		if (err < 0)
 			break;
-
-		set->ndeact++;
 	}
 	return err;
 }
@@ -8270,7 +8614,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 		case NFT_MSG_NEWSETELEM:
 			te = (struct nft_trans_elem *)trans->data;
 
-			te->set->ops->activate(net, te->set, &te->elem);
+			nft_setelem_activate(net, te->set, &te->elem);
 			nf_tables_setelem_notify(&trans->ctx, te->set,
 						 &te->elem,
 						 NFT_MSG_NEWSETELEM, 0);
@@ -8282,9 +8626,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 			nf_tables_setelem_notify(&trans->ctx, te->set,
 						 &te->elem,
 						 NFT_MSG_DELSETELEM, 0);
-			te->set->ops->remove(net, te->set, &te->elem);
-			atomic_dec(&te->set->nelems);
-			te->set->ndeact--;
+			nft_setelem_remove(net, te->set, &te->elem);
+			if (!nft_setelem_is_catchall(te->set, &te->elem)) {
+				atomic_dec(&te->set->nelems);
+				te->set->ndeact--;
+			}
 			break;
 		case NFT_MSG_NEWOBJ:
 			if (nft_trans_obj_update(trans)) {
@@ -8485,15 +8831,17 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
 				break;
 			}
 			te = (struct nft_trans_elem *)trans->data;
-			te->set->ops->remove(net, te->set, &te->elem);
-			atomic_dec(&te->set->nelems);
+			nft_setelem_remove(net, te->set, &te->elem);
+			if (!nft_setelem_is_catchall(te->set, &te->elem))
+				atomic_dec(&te->set->nelems);
 			break;
 		case NFT_MSG_DELSETELEM:
 			te = (struct nft_trans_elem *)trans->data;
 
 			nft_setelem_data_activate(net, te->set, &te->elem);
-			te->set->ops->activate(net, te->set, &te->elem);
-			te->set->ndeact--;
+			nft_setelem_activate(net, te->set, &te->elem);
+			if (!nft_setelem_is_catchall(te->set, &te->elem))
+				te->set->ndeact--;
 
 			nft_trans_destroy(trans);
 			break;
@@ -8672,6 +9020,27 @@ static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
 	return nft_check_loops(ctx, ext);
 }
 
+static int nft_set_catchall_loops(const struct nft_ctx *ctx,
+				  struct nft_set *set)
+{
+	u8 genmask = nft_genmask_next(ctx->net);
+	struct nft_set_elem_catchall *catchall;
+	struct nft_set_ext *ext;
+	int ret = 0;
+
+	list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+		ext = nft_set_elem_ext(set, catchall->elem);
+		if (!nft_set_elem_active(ext, genmask))
+			continue;
+
+		ret = nft_check_loops(ctx, ext);
+		if (ret < 0)
+			return ret;
+	}
+
+	return ret;
+}
+
 static int nf_tables_check_loops(const struct nft_ctx *ctx,
 				 const struct nft_chain *chain)
 {
@@ -8731,6 +9100,9 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
 			iter.fn		= nf_tables_loop_check_setelem;
 
 			set->ops->walk(ctx, set, &iter);
+			if (!iter.err)
+				iter.err = nft_set_catchall_loops(ctx, set);
+
 			if (iter.err < 0)
 				return iter.err;
 		}
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index b0f558b4fea5..a479f8a1270c 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -30,13 +30,17 @@ void nft_lookup_eval(const struct nft_expr *expr,
 	const struct nft_lookup *priv = nft_expr_priv(expr);
 	const struct nft_set *set = priv->set;
 	const struct nft_set_ext *ext = NULL;
+	const struct net *net = nft_net(pkt);
 	bool found;
 
-	found = set->ops->lookup(nft_net(pkt), set, &regs->data[priv->sreg],
-				 &ext) ^ priv->invert;
+	found = set->ops->lookup(net, set, &regs->data[priv->sreg], &ext) ^
+				 priv->invert;
 	if (!found) {
-		regs->verdict.code = NFT_BREAK;
-		return;
+		ext = nft_set_catchall_lookup(net, set);
+		if (!ext) {
+			regs->verdict.code = NFT_BREAK;
+			return;
+		}
 	}
 
 	if (ext) {
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index bc104d36d3bb..7e47edee88ee 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -105,15 +105,18 @@ static void nft_objref_map_eval(const struct nft_expr *expr,
 {
 	struct nft_objref_map *priv = nft_expr_priv(expr);
 	const struct nft_set *set = priv->set;
+	struct net *net = nft_net(pkt);
 	const struct nft_set_ext *ext;
 	struct nft_object *obj;
 	bool found;
 
-	found = set->ops->lookup(nft_net(pkt), set, &regs->data[priv->sreg],
-				 &ext);
+	found = set->ops->lookup(net, set, &regs->data[priv->sreg], &ext);
 	if (!found) {
-		regs->verdict.code = NFT_BREAK;
-		return;
+		ext = nft_set_catchall_lookup(net, set);
+		if (!ext) {
+			regs->verdict.code = NFT_BREAK;
+			return;
+		}
 	}
 	obj = *nft_set_ext_obj(ext);
 	obj->ops->eval(obj, regs, pkt);
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index bf618b7ec1ae..58f576abcd4a 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -350,6 +350,12 @@ needs_gc_run:
 	rhashtable_walk_stop(&hti);
 	rhashtable_walk_exit(&hti);
 
+	he = nft_set_catchall_gc(set);
+	if (he) {
+		gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
+		if (gcb)
+			nft_set_gc_batch_add(gcb, he);
+	}
 	nft_set_gc_batch_complete(gcb);
 	queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
 			   nft_set_gc_interval(set));
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 9944523f5c2c..528a2d7ca991 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1529,11 +1529,11 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
 {
 	struct nft_pipapo *priv = nft_set_priv(set);
 	int rules_f0, first_rule = 0;
+	struct nft_pipapo_elem *e;
 
 	while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
 		union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
 		struct nft_pipapo_field *f;
-		struct nft_pipapo_elem *e;
 		int i, start, rules_fx;
 
 		start = first_rule;
@@ -1569,6 +1569,10 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
 		}
 	}
 
+	e = nft_set_catchall_gc(set);
+	if (e)
+		nft_set_elem_destroy(set, e, true);
+
 	priv->last_gc = jiffies;
 }
 
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 217ab3644c25..9e36eb4a7429 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -541,6 +541,12 @@ static void nft_rbtree_gc(struct work_struct *work)
 	write_seqcount_end(&priv->count);
 	write_unlock_bh(&priv->lock);
 
+	rbe = nft_set_catchall_gc(set);
+	if (rbe) {
+		gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
+		if (gcb)
+			nft_set_gc_batch_add(gcb, rbe);
+	}
 	nft_set_gc_batch_complete(gcb);
 
 	queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
-- 
cgit v1.2.3


From 8a7363f8497900e33d4ac391315e8a8d53a03d89 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 27 Apr 2021 21:45:18 +0200
Subject: netfilter: nft_socket: fix an unused variable warning

The variable is only used in an #ifdef, causing a harmless warning:

net/netfilter/nft_socket.c: In function 'nft_socket_init':
net/netfilter/nft_socket.c:137:27: error: unused variable 'level' [-Werror=unused-variable]
  137 |         unsigned int len, level;
      |                           ^~~~~

Move it into the same #ifdef block.

Fixes: e0bb96db96f8 ("netfilter: nft_socket: add support for cgroupsv2")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_socket.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index 9c169d100651..f9c5ff6024e0 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -134,7 +134,7 @@ static int nft_socket_init(const struct nft_ctx *ctx,
 			   const struct nlattr * const tb[])
 {
 	struct nft_socket *priv = nft_expr_priv(expr);
-	unsigned int len, level;
+	unsigned int len;
 
 	if (!tb[NFTA_SOCKET_DREG] || !tb[NFTA_SOCKET_KEY])
 		return -EINVAL;
@@ -160,7 +160,9 @@ static int nft_socket_init(const struct nft_ctx *ctx,
 		len = sizeof(u32);
 		break;
 #ifdef CONFIG_CGROUPS
-	case NFT_SOCKET_CGROUPV2:
+	case NFT_SOCKET_CGROUPV2: {
+		unsigned int level;
+
 		if (!tb[NFTA_SOCKET_LEVEL])
 			return -EINVAL;
 
@@ -171,6 +173,7 @@ static int nft_socket_init(const struct nft_ctx *ctx,
 		priv->level = level;
 		len = sizeof(u64);
 		break;
+	}
 #endif
 	default:
 		return -EOPNOTSUPP;
-- 
cgit v1.2.3


From 7acc0bb490c85012bcbda142b6755fd1fdf1fba1 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 27 Apr 2021 21:45:19 +0200
Subject: netfilter: nft_socket: fix build with CONFIG_SOCK_CGROUP_DATA=n

In some configurations, the sock_cgroup_ptr() function is not available:

net/netfilter/nft_socket.c: In function 'nft_sock_get_eval_cgroupv2':
net/netfilter/nft_socket.c:47:16: error: implicit declaration of function 'sock_cgroup_ptr'; did you mean 'obj_cgroup_put'? [-Werror=implicit-function-declaration]
   47 |         cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
      |                ^~~~~~~~~~~~~~~
      |                obj_cgroup_put
net/netfilter/nft_socket.c:47:14: error: assignment to 'struct cgroup *' from 'int' makes pointer from integer without a cast [-Werror=int-conversion]
   47 |         cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
      |              ^

Change the caller to match the same #ifdef check, only calling it
when the function is defined.

Fixes: e0bb96db96f8 ("netfilter: nft_socket: add support for cgroupsv2")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_socket.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index f9c5ff6024e0..d601974c9d2e 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -34,7 +34,7 @@ static void nft_socket_wildcard(const struct nft_pktinfo *pkt,
 	}
 }
 
-#ifdef CONFIG_CGROUPS
+#ifdef CONFIG_SOCK_CGROUP_DATA
 static noinline bool
 nft_sock_get_eval_cgroupv2(u32 *dest, const struct nft_pktinfo *pkt, u32 level)
 {
@@ -106,7 +106,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
 		}
 		nft_socket_wildcard(pkt, regs, sk, dest);
 		break;
-#ifdef CONFIG_CGROUPS
+#ifdef CONFIG_SOCK_CGROUP_DATA
 	case NFT_SOCKET_CGROUPV2:
 		if (!nft_sock_get_eval_cgroupv2(dest, pkt, priv->level)) {
 			regs->verdict.code = NFT_BREAK;
-- 
cgit v1.2.3


From 99014088156cd78867d19514a0bc771c4b86b93b Mon Sep 17 00:00:00 2001
From: Linus Lüssing <linus.luessing@c0d3.blue>
Date: Sun, 25 Apr 2021 17:27:35 +0200
Subject: net: bridge: mcast: fix broken length + header check for MRDv6 Adv.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The IPv6 Multicast Router Advertisements parsing has the following two
issues:

For one thing, ICMPv6 MRD Advertisements are smaller than ICMPv6 MLD
messages (ICMPv6 MRD Adv.: 8 bytes vs. ICMPv6 MLDv1/2: >= 24 bytes,
assuming MLDv2 Reports with at least one multicast address entry).
When ipv6_mc_check_mld_msg() tries to parse an Multicast Router
Advertisement its MLD length check will fail - and it will wrongly
return -EINVAL, even if we have a valid MRD Advertisement. With the
returned -EINVAL the bridge code will assume a broken packet and will
wrongly discard it, potentially leading to multicast packet loss towards
multicast routers.

The second issue is the MRD header parsing in
br_ip6_multicast_mrd_rcv(): It wrongly checks for an ICMPv6 header
immediately after the IPv6 header (IPv6 next header type). However
according to RFC4286, section 2 all MRD messages contain a Router Alert
option (just like MLD). So instead there is an IPv6 Hop-by-Hop option
for the Router Alert between the IPv6 and ICMPv6 header, again leading
to the bridge wrongly discarding Multicast Router Advertisements.

To fix these two issues, introduce a new return value -ENODATA to
ipv6_mc_check_mld() to indicate a valid ICMPv6 packet with a hop-by-hop
option which is not an MLD but potentially an MRD packet. This also
simplifies further parsing in the bridge code, as ipv6_mc_check_mld()
already fully checks the ICMPv6 header and hop-by-hop option.

These issues were found and fixed with the help of the mrdisc tool
(https://github.com/troglobit/mrdisc).

Fixes: 4b3087c7e37f ("bridge: Snoop Multicast Router Advertisements")
Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/addrconf.h    |  1 -
 net/bridge/br_multicast.c | 33 ++++++++-------------------------
 net/ipv6/mcast_snoop.c    | 12 +++++++-----
 3 files changed, 15 insertions(+), 31 deletions(-)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 18f783dcd55f..78ea3e332688 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -233,7 +233,6 @@ void ipv6_mc_unmap(struct inet6_dev *idev);
 void ipv6_mc_remap(struct inet6_dev *idev);
 void ipv6_mc_init_dev(struct inet6_dev *idev);
 void ipv6_mc_destroy_dev(struct inet6_dev *idev);
-int ipv6_mc_check_icmpv6(struct sk_buff *skb);
 int ipv6_mc_check_mld(struct sk_buff *skb);
 void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp);
 
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 2883601d5c8b..226bb05c3b42 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -3159,25 +3159,14 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static int br_ip6_multicast_mrd_rcv(struct net_bridge *br,
-				    struct net_bridge_port *port,
-				    struct sk_buff *skb)
+static void br_ip6_multicast_mrd_rcv(struct net_bridge *br,
+				     struct net_bridge_port *port,
+				     struct sk_buff *skb)
 {
-	int ret;
-
-	if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6)
-		return -ENOMSG;
-
-	ret = ipv6_mc_check_icmpv6(skb);
-	if (ret < 0)
-		return ret;
-
 	if (icmp6_hdr(skb)->icmp6_type != ICMPV6_MRDISC_ADV)
-		return -ENOMSG;
+		return;
 
 	br_multicast_mark_router(br, port);
-
-	return 0;
 }
 
 static int br_multicast_ipv6_rcv(struct net_bridge *br,
@@ -3191,18 +3180,12 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 
 	err = ipv6_mc_check_mld(skb);
 
-	if (err == -ENOMSG) {
+	if (err == -ENOMSG || err == -ENODATA) {
 		if (!ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr))
 			BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
-
-		if (ipv6_addr_is_all_snoopers(&ipv6_hdr(skb)->daddr)) {
-			err = br_ip6_multicast_mrd_rcv(br, port, skb);
-
-			if (err < 0 && err != -ENOMSG) {
-				br_multicast_err_count(br, port, skb->protocol);
-				return err;
-			}
-		}
+		if (err == -ENODATA &&
+		    ipv6_addr_is_all_snoopers(&ipv6_hdr(skb)->daddr))
+			br_ip6_multicast_mrd_rcv(br, port, skb);
 
 		return 0;
 	} else if (err < 0) {
diff --git a/net/ipv6/mcast_snoop.c b/net/ipv6/mcast_snoop.c
index d3d6b6a66e5f..04d5fcdfa6e0 100644
--- a/net/ipv6/mcast_snoop.c
+++ b/net/ipv6/mcast_snoop.c
@@ -109,7 +109,7 @@ static int ipv6_mc_check_mld_msg(struct sk_buff *skb)
 	struct mld_msg *mld;
 
 	if (!ipv6_mc_may_pull(skb, len))
-		return -EINVAL;
+		return -ENODATA;
 
 	mld = (struct mld_msg *)skb_transport_header(skb);
 
@@ -122,7 +122,7 @@ static int ipv6_mc_check_mld_msg(struct sk_buff *skb)
 	case ICMPV6_MGM_QUERY:
 		return ipv6_mc_check_mld_query(skb);
 	default:
-		return -ENOMSG;
+		return -ENODATA;
 	}
 }
 
@@ -131,7 +131,7 @@ static inline __sum16 ipv6_mc_validate_checksum(struct sk_buff *skb)
 	return skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo);
 }
 
-int ipv6_mc_check_icmpv6(struct sk_buff *skb)
+static int ipv6_mc_check_icmpv6(struct sk_buff *skb)
 {
 	unsigned int len = skb_transport_offset(skb) + sizeof(struct icmp6hdr);
 	unsigned int transport_len = ipv6_transport_len(skb);
@@ -150,7 +150,6 @@ int ipv6_mc_check_icmpv6(struct sk_buff *skb)
 
 	return 0;
 }
-EXPORT_SYMBOL(ipv6_mc_check_icmpv6);
 
 /**
  * ipv6_mc_check_mld - checks whether this is a sane MLD packet
@@ -161,7 +160,10 @@ EXPORT_SYMBOL(ipv6_mc_check_icmpv6);
  *
  * -EINVAL: A broken packet was detected, i.e. it violates some internet
  *  standard
- * -ENOMSG: IP header validation succeeded but it is not an MLD packet.
+ * -ENOMSG: IP header validation succeeded but it is not an ICMPv6 packet
+ *  with a hop-by-hop option.
+ * -ENODATA: IP+ICMPv6 header with hop-by-hop option validation succeeded
+ *  but it is not an MLD packet.
  * -ENOMEM: A memory allocation failure happened.
  *
  * Caller needs to set the skb network header and free any returned skb if it
-- 
cgit v1.2.3


From bb23ffa1015cb57e0c9ec3c6135275b38d66a780 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 25 Apr 2021 18:14:10 +0200
Subject: macvlan: Use 'hash' iterators to simplify code

Use 'hash_for_each_rcu' and 'hash_for_each_safe' instead of hand writing
them. This saves some lines of code, reduce indentation and improve
readability.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c | 45 ++++++++++++++++++---------------------------
 1 file changed, 18 insertions(+), 27 deletions(-)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 7427b989607e..1b998aa481f8 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -272,25 +272,22 @@ static void macvlan_broadcast(struct sk_buff *skb,
 	if (skb->protocol == htons(ETH_P_PAUSE))
 		return;
 
-	for (i = 0; i < MACVLAN_HASH_SIZE; i++) {
-		hlist_for_each_entry_rcu(vlan, &port->vlan_hash[i], hlist) {
-			if (vlan->dev == src || !(vlan->mode & mode))
-				continue;
+	hash_for_each_rcu(port->vlan_hash, i, vlan, hlist) {
+		if (vlan->dev == src || !(vlan->mode & mode))
+			continue;
 
-			hash = mc_hash(vlan, eth->h_dest);
-			if (!test_bit(hash, vlan->mc_filter))
-				continue;
+		hash = mc_hash(vlan, eth->h_dest);
+		if (!test_bit(hash, vlan->mc_filter))
+			continue;
 
-			err = NET_RX_DROP;
-			nskb = skb_clone(skb, GFP_ATOMIC);
-			if (likely(nskb))
-				err = macvlan_broadcast_one(
-					nskb, vlan, eth,
+		err = NET_RX_DROP;
+		nskb = skb_clone(skb, GFP_ATOMIC);
+		if (likely(nskb))
+			err = macvlan_broadcast_one(nskb, vlan, eth,
 					mode == MACVLAN_MODE_BRIDGE) ?:
-				      netif_rx_ni(nskb);
-			macvlan_count_rx(vlan, skb->len + ETH_HLEN,
-					 err == NET_RX_SUCCESS, true);
-		}
+			      netif_rx_ni(nskb);
+		macvlan_count_rx(vlan, skb->len + ETH_HLEN,
+				 err == NET_RX_SUCCESS, true);
 	}
 }
 
@@ -380,20 +377,14 @@ err:
 static void macvlan_flush_sources(struct macvlan_port *port,
 				  struct macvlan_dev *vlan)
 {
+	struct macvlan_source_entry *entry;
+	struct hlist_node *next;
 	int i;
 
-	for (i = 0; i < MACVLAN_HASH_SIZE; i++) {
-		struct hlist_node *h, *n;
-
-		hlist_for_each_safe(h, n, &port->vlan_source_hash[i]) {
-			struct macvlan_source_entry *entry;
+	hash_for_each_safe(port->vlan_source_hash, i, next, entry, hlist)
+		if (entry->vlan == vlan)
+			macvlan_hash_del_source(entry);
 
-			entry = hlist_entry(h, struct macvlan_source_entry,
-					    hlist);
-			if (entry->vlan == vlan)
-				macvlan_hash_del_source(entry);
-		}
-	}
 	vlan->macaddr_count = 0;
 }
 
-- 
cgit v1.2.3


From 6c375d793be601976966bc3c1a14af8ce645a5ff Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Mon, 26 Apr 2021 18:13:03 +0800
Subject: rxrpc: rxkad: Remove redundant variable offset

Variable offset is being assigned a value from a calculation
however the variable is never read, so this redundant variable
can be removed.

Cleans up the following clang-analyzer warning:

net/rxrpc/rxkad.c:579:2: warning: Value stored to 'offset' is never read
[clang-analyzer-deadcode.DeadStores].

net/rxrpc/rxkad.c:485:2: warning: Value stored to 'offset' is never read
[clang-analyzer-deadcode.DeadStores].

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/rxkad.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index e2e9e9b0a6d7..08aab5c01437 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -482,7 +482,6 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
 					     RXKADDATALEN);
 		goto protocol_error;
 	}
-	offset += sizeof(sechdr);
 	len -= sizeof(sechdr);
 
 	buf = ntohl(sechdr.data_size);
@@ -576,7 +575,6 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
 					     RXKADDATALEN);
 		goto protocol_error;
 	}
-	offset += sizeof(sechdr);
 	len -= sizeof(sechdr);
 
 	buf = ntohl(sechdr.data_size);
-- 
cgit v1.2.3


From 152fa81109a8766c45bfd4aad9e8f4005566648d Mon Sep 17 00:00:00 2001
From: Ivan Bornyakov <i.bornyakov@metrotek.ru>
Date: Mon, 26 Apr 2021 19:08:23 +0300
Subject: net: phy: marvell-88x2222: enable autoneg by default

There is no real need for disabling autonigotiation in config_init().
Leave it enabled by default.

Signed-off-by: Ivan Bornyakov <i.bornyakov@metrotek.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell-88x2222.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/phy/marvell-88x2222.c b/drivers/net/phy/marvell-88x2222.c
index 9b9ac3ef735d..d8b31d4d2a73 100644
--- a/drivers/net/phy/marvell-88x2222.c
+++ b/drivers/net/phy/marvell-88x2222.c
@@ -473,8 +473,6 @@ static int mv2222_config_init(struct phy_device *phydev)
 	if (phydev->interface != PHY_INTERFACE_MODE_XAUI)
 		return -EINVAL;
 
-	phydev->autoneg = AUTONEG_DISABLE;
-
 	return 0;
 }
 
-- 
cgit v1.2.3


From 6066234aa33850e9e35e7be82d92b9e9091e774b Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Mon, 26 Apr 2021 18:17:34 +0200
Subject: net: dsa: mv88e6xxx: Fix 6095/6097/6185 ports in non-SERDES CMODE

The .serdes_get_lane op used the magic value 0xff to indicate a valid
SERDES lane and 0 signaled that a non-SERDES mode was set on the port.

Unfortunately, "0" is also a valid lane ID, so even when these ports
where configured to e.g. RGMII the driver would set them up as SERDES
ports.

- Replace 0xff with 0 to indicate a valid lane ID. The number is on
  the one hand just as arbitrary, but it is at least the first valid one
  and therefore less of a surprise.

- Follow the other .serdes_get_lane implementations and return -ENODEV
  in the case where no SERDES is assigned to the port.

Fixes: f5be107c3338 ("net: dsa: mv88e6xxx: Support serdes ports on MV88E6097/6095/6185")
Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/serdes.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c
index 470856bcd2f3..e4fbef81bc52 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.c
+++ b/drivers/net/dsa/mv88e6xxx/serdes.c
@@ -443,15 +443,15 @@ int mv88e6185_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
 int mv88e6185_serdes_get_lane(struct mv88e6xxx_chip *chip, int port)
 {
 	/* There are no configurable serdes lanes on this switch chip but we
-	 * need to return non-zero so that callers of
+	 * need to return a non-negative lane number so that callers of
 	 * mv88e6xxx_serdes_get_lane() know this is a serdes port.
 	 */
 	switch (chip->ports[port].cmode) {
 	case MV88E6185_PORT_STS_CMODE_SERDES:
 	case MV88E6185_PORT_STS_CMODE_1000BASE_X:
-		return 0xff;
-	default:
 		return 0;
+	default:
+		return -ENODEV;
 	}
 }
 
-- 
cgit v1.2.3


From 23c9c2b314bab7f7f807a2f0cfe06cc4451b6eb7 Mon Sep 17 00:00:00 2001
From: qhjindev <qhjin_dev@163.com>
Date: Tue, 27 Apr 2021 07:57:52 +0800
Subject: fddi/skfp: fix typo

change 'privae' to 'private'

Signed-off-by: qhjindev <qhjin_dev@163.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/fddi/skfp/h/smc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/fddi/skfp/h/smc.h b/drivers/net/fddi/skfp/h/smc.h
index 706fa619b703..3814a2ff64ae 100644
--- a/drivers/net/fddi/skfp/h/smc.h
+++ b/drivers/net/fddi/skfp/h/smc.h
@@ -228,7 +228,7 @@ struct s_phy {
 	u_char timer1_exp ;
 	u_char timer2_exp ;
 	u_char pcm_pad1[1] ;
-	int	cem_pst ;	/* CEM privae state; used for dual homing */
+	int	cem_pst ;	/* CEM private state; used for dual homing */
 	struct lem_counter lem ;
 #ifdef	AMDPLC
 	struct s_plc	plc ;
-- 
cgit v1.2.3


From cfd12c06cdceac094aab3f097cce24c279bfd43b Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Tue, 27 Apr 2021 12:21:57 +0800
Subject: net: dsa: check tx timestamp request in core driver

Check tx timestamp request in core driver at very beginning of
dsa_skb_tx_timestamp(), so that most skbs not requiring tx
timestamp just return. And drop such checking in device drivers.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Tested-by: Kurt Kanzenbach <kurt@linutronix.de>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c | 4 ----
 drivers/net/dsa/mv88e6xxx/hwtstamp.c            | 3 ---
 drivers/net/dsa/ocelot/felix.c                  | 3 +--
 net/dsa/slave.c                                 | 3 +++
 4 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c
index 69dd9a2e8bb6..6ba5e2333066 100644
--- a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c
+++ b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c
@@ -382,10 +382,6 @@ bool hellcreek_port_txtstamp(struct dsa_switch *ds, int port,
 
 	ps = &hellcreek->ports[port].port_hwtstamp;
 
-	/* Check if the driver is expected to do HW timestamping */
-	if (!(skb_shinfo(clone)->tx_flags & SKBTX_HW_TSTAMP))
-		return false;
-
 	/* Make sure the message is a PTP message that needs to be timestamped
 	 * and the interaction with the HW timestamping is enabled. If not, stop
 	 * here
diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.c b/drivers/net/dsa/mv88e6xxx/hwtstamp.c
index 094d17a1d037..05ca1d3c6498 100644
--- a/drivers/net/dsa/mv88e6xxx/hwtstamp.c
+++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.c
@@ -475,9 +475,6 @@ bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
 	struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
 	struct ptp_header *hdr;
 
-	if (!(skb_shinfo(clone)->tx_flags & SKBTX_HW_TSTAMP))
-		return false;
-
 	hdr = mv88e6xxx_should_tstamp(chip, port, clone, type);
 	if (!hdr)
 		return false;
diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index 6b5442be0230..1379f86d71ec 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -1401,8 +1401,7 @@ static bool felix_txtstamp(struct dsa_switch *ds, int port,
 	struct ocelot *ocelot = ds->priv;
 	struct ocelot_port *ocelot_port = ocelot->ports[port];
 
-	if (ocelot->ptp && (skb_shinfo(clone)->tx_flags & SKBTX_HW_TSTAMP) &&
-	    ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) {
+	if (ocelot->ptp && ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) {
 		ocelot_port_add_txtstamp_skb(ocelot, port, clone);
 		return true;
 	}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 77b33bd161b8..b2a802e9330e 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -559,6 +559,9 @@ static void dsa_skb_tx_timestamp(struct dsa_slave_priv *p,
 	struct sk_buff *clone;
 	unsigned int type;
 
+	if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+		return;
+
 	type = ptp_classify_raw(skb);
 	if (type == PTP_CLASS_NONE)
 		return;
-- 
cgit v1.2.3


From cf536ea3c7eefb26082836eb7f930b293dd38345 Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Tue, 27 Apr 2021 12:21:58 +0800
Subject: net: dsa: no longer identify PTP packet in core driver

Move ptp_classify_raw out of dsa core driver for handling tx
timestamp request. Let device drivers do this if they want.
Not all drivers want to limit tx timestamping for only PTP
packet.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Tested-by: Kurt Kanzenbach <kurt@linutronix.de>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c |  7 ++++++-
 drivers/net/dsa/hirschmann/hellcreek_hwtstamp.h |  2 +-
 drivers/net/dsa/mv88e6xxx/hwtstamp.c            |  7 ++++++-
 drivers/net/dsa/mv88e6xxx/hwtstamp.h            |  5 ++---
 drivers/net/dsa/ocelot/felix.c                  |  2 +-
 drivers/net/dsa/sja1105/sja1105_ptp.c           |  3 +--
 drivers/net/dsa/sja1105/sja1105_ptp.h           |  2 +-
 include/net/dsa.h                               |  2 +-
 net/dsa/slave.c                                 | 12 ++----------
 9 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c
index 6ba5e2333066..5b2e023468fe 100644
--- a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c
+++ b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c
@@ -374,14 +374,19 @@ long hellcreek_hwtstamp_work(struct ptp_clock_info *ptp)
 }
 
 bool hellcreek_port_txtstamp(struct dsa_switch *ds, int port,
-			     struct sk_buff *clone, unsigned int type)
+			     struct sk_buff *clone)
 {
 	struct hellcreek *hellcreek = ds->priv;
 	struct hellcreek_port_hwtstamp *ps;
 	struct ptp_header *hdr;
+	unsigned int type;
 
 	ps = &hellcreek->ports[port].port_hwtstamp;
 
+	type = ptp_classify_raw(clone);
+	if (type == PTP_CLASS_NONE)
+		return false;
+
 	/* Make sure the message is a PTP message that needs to be timestamped
 	 * and the interaction with the HW timestamping is enabled. If not, stop
 	 * here
diff --git a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.h b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.h
index c0745ffa1ebb..728cd5dc650f 100644
--- a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.h
+++ b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.h
@@ -45,7 +45,7 @@ int hellcreek_port_hwtstamp_get(struct dsa_switch *ds, int port,
 bool hellcreek_port_rxtstamp(struct dsa_switch *ds, int port,
 			     struct sk_buff *clone, unsigned int type);
 bool hellcreek_port_txtstamp(struct dsa_switch *ds, int port,
-			     struct sk_buff *clone, unsigned int type);
+			     struct sk_buff *clone);
 
 int hellcreek_get_ts_info(struct dsa_switch *ds, int port,
 			  struct ethtool_ts_info *info);
diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.c b/drivers/net/dsa/mv88e6xxx/hwtstamp.c
index 05ca1d3c6498..79514a54d903 100644
--- a/drivers/net/dsa/mv88e6xxx/hwtstamp.c
+++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.c
@@ -469,11 +469,16 @@ long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp)
 }
 
 bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
-			     struct sk_buff *clone, unsigned int type)
+			     struct sk_buff *clone)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
 	struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
 	struct ptp_header *hdr;
+	unsigned int type;
+
+	type = ptp_classify_raw(clone);
+	if (type == PTP_CLASS_NONE)
+		return false;
 
 	hdr = mv88e6xxx_should_tstamp(chip, port, clone, type);
 	if (!hdr)
diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.h b/drivers/net/dsa/mv88e6xxx/hwtstamp.h
index 9da9f197ba02..91fbc7838fc8 100644
--- a/drivers/net/dsa/mv88e6xxx/hwtstamp.h
+++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.h
@@ -118,7 +118,7 @@ int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, int port,
 bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port,
 			     struct sk_buff *clone, unsigned int type);
 bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
-			     struct sk_buff *clone, unsigned int type);
+			     struct sk_buff *clone);
 
 int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port,
 			  struct ethtool_ts_info *info);
@@ -152,8 +152,7 @@ static inline bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port,
 }
 
 static inline bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
-					   struct sk_buff *clone,
-					   unsigned int type)
+					   struct sk_buff *clone)
 {
 	return false;
 }
diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index 1379f86d71ec..d679f023dc00 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -1396,7 +1396,7 @@ static bool felix_rxtstamp(struct dsa_switch *ds, int port,
 }
 
 static bool felix_txtstamp(struct dsa_switch *ds, int port,
-			   struct sk_buff *clone, unsigned int type)
+			   struct sk_buff *clone)
 {
 	struct ocelot *ocelot = ds->priv;
 	struct ocelot_port *ocelot_port = ocelot->ports[port];
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c
index 1b90570b257b..72d052de82d8 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.c
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.c
@@ -435,8 +435,7 @@ bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port,
  * the skb and have it available in DSA_SKB_CB in the .port_deferred_xmit
  * callback, where we will timestamp it synchronously.
  */
-bool sja1105_port_txtstamp(struct dsa_switch *ds, int port,
-			   struct sk_buff *skb, unsigned int type)
+bool sja1105_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb)
 {
 	struct sja1105_private *priv = ds->priv;
 	struct sja1105_port *sp = &priv->ports[port];
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.h b/drivers/net/dsa/sja1105/sja1105_ptp.h
index 3daa33e98e77..c70c4729a06d 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.h
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.h
@@ -105,7 +105,7 @@ bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port,
 			   struct sk_buff *skb, unsigned int type);
 
 bool sja1105_port_txtstamp(struct dsa_switch *ds, int port,
-			   struct sk_buff *skb, unsigned int type);
+			   struct sk_buff *skb);
 
 int sja1105_hwtstamp_get(struct dsa_switch *ds, int port, struct ifreq *ifr);
 
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 507082959aa4..905066055b08 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -741,7 +741,7 @@ struct dsa_switch_ops {
 	int	(*port_hwtstamp_set)(struct dsa_switch *ds, int port,
 				     struct ifreq *ifr);
 	bool	(*port_txtstamp)(struct dsa_switch *ds, int port,
-				 struct sk_buff *clone, unsigned int type);
+				 struct sk_buff *clone);
 	bool	(*port_rxtstamp)(struct dsa_switch *ds, int port,
 				 struct sk_buff *skb, unsigned int type);
 
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index b2a802e9330e..acaa52e60d7f 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -20,7 +20,6 @@
 #include <linux/if_bridge.h>
 #include <linux/if_hsr.h>
 #include <linux/netpoll.h>
-#include <linux/ptp_classify.h>
 
 #include "dsa_priv.h"
 
@@ -557,15 +556,10 @@ static void dsa_skb_tx_timestamp(struct dsa_slave_priv *p,
 {
 	struct dsa_switch *ds = p->dp->ds;
 	struct sk_buff *clone;
-	unsigned int type;
 
 	if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
 		return;
 
-	type = ptp_classify_raw(skb);
-	if (type == PTP_CLASS_NONE)
-		return;
-
 	if (!ds->ops->port_txtstamp)
 		return;
 
@@ -573,7 +567,7 @@ static void dsa_skb_tx_timestamp(struct dsa_slave_priv *p,
 	if (!clone)
 		return;
 
-	if (ds->ops->port_txtstamp(ds, p->dp->index, clone, type)) {
+	if (ds->ops->port_txtstamp(ds, p->dp->index, clone)) {
 		DSA_SKB_CB(skb)->clone = clone;
 		return;
 	}
@@ -632,9 +626,7 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	DSA_SKB_CB(skb)->clone = NULL;
 
-	/* Identify PTP protocol packets, clone them, and pass them to the
-	 * switch driver
-	 */
+	/* Handle tx timestamp if any */
 	dsa_skb_tx_timestamp(p, skb);
 
 	if (dsa_realloc_skb(skb, dev)) {
-- 
cgit v1.2.3


From 5c5416f5d4c75fe6aba56f6c2c45a070b5e7cc78 Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Tue, 27 Apr 2021 12:21:59 +0800
Subject: net: dsa: no longer clone skb in core driver

It was a waste to clone skb directly in dsa_skb_tx_timestamp().
For one-step timestamping, a clone was not needed. For any failure of
port_txtstamp (this may usually happen), the skb clone had to be freed.

So this patch moves skb cloning for tx timestamp out of dsa core, and
let drivers clone skb in port_txtstamp if they really need.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Tested-by: Kurt Kanzenbach <kurt@linutronix.de>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c | 25 +++++++++++++++----------
 drivers/net/dsa/hirschmann/hellcreek_hwtstamp.h |  4 ++--
 drivers/net/dsa/mv88e6xxx/hwtstamp.c            | 24 +++++++++++++++---------
 drivers/net/dsa/mv88e6xxx/hwtstamp.h            |  9 ++++-----
 drivers/net/dsa/ocelot/felix.c                  | 13 ++++++++-----
 drivers/net/dsa/sja1105/sja1105_ptp.c           | 13 +++++++++----
 drivers/net/dsa/sja1105/sja1105_ptp.h           |  2 +-
 include/net/dsa.h                               |  4 ++--
 net/dsa/slave.c                                 | 12 +-----------
 9 files changed, 57 insertions(+), 49 deletions(-)

diff --git a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c
index 5b2e023468fe..40b41c794dfa 100644
--- a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c
+++ b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c
@@ -373,31 +373,38 @@ long hellcreek_hwtstamp_work(struct ptp_clock_info *ptp)
 	return restart ? 1 : -1;
 }
 
-bool hellcreek_port_txtstamp(struct dsa_switch *ds, int port,
-			     struct sk_buff *clone)
+void hellcreek_port_txtstamp(struct dsa_switch *ds, int port,
+			     struct sk_buff *skb)
 {
 	struct hellcreek *hellcreek = ds->priv;
 	struct hellcreek_port_hwtstamp *ps;
 	struct ptp_header *hdr;
+	struct sk_buff *clone;
 	unsigned int type;
 
 	ps = &hellcreek->ports[port].port_hwtstamp;
 
-	type = ptp_classify_raw(clone);
+	type = ptp_classify_raw(skb);
 	if (type == PTP_CLASS_NONE)
-		return false;
+		return;
 
 	/* Make sure the message is a PTP message that needs to be timestamped
 	 * and the interaction with the HW timestamping is enabled. If not, stop
 	 * here
 	 */
-	hdr = hellcreek_should_tstamp(hellcreek, port, clone, type);
+	hdr = hellcreek_should_tstamp(hellcreek, port, skb, type);
 	if (!hdr)
-		return false;
+		return;
+
+	clone = skb_clone_sk(skb);
+	if (!clone)
+		return;
 
 	if (test_and_set_bit_lock(HELLCREEK_HWTSTAMP_TX_IN_PROGRESS,
-				  &ps->state))
-		return false;
+				  &ps->state)) {
+		kfree_skb(clone);
+		return;
+	}
 
 	ps->tx_skb = clone;
 
@@ -407,8 +414,6 @@ bool hellcreek_port_txtstamp(struct dsa_switch *ds, int port,
 	ps->tx_tstamp_start = jiffies;
 
 	ptp_schedule_worker(hellcreek->ptp_clock, 0);
-
-	return true;
 }
 
 bool hellcreek_port_rxtstamp(struct dsa_switch *ds, int port,
diff --git a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.h b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.h
index 728cd5dc650f..71af77efb28b 100644
--- a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.h
+++ b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.h
@@ -44,8 +44,8 @@ int hellcreek_port_hwtstamp_get(struct dsa_switch *ds, int port,
 
 bool hellcreek_port_rxtstamp(struct dsa_switch *ds, int port,
 			     struct sk_buff *clone, unsigned int type);
-bool hellcreek_port_txtstamp(struct dsa_switch *ds, int port,
-			     struct sk_buff *clone);
+void hellcreek_port_txtstamp(struct dsa_switch *ds, int port,
+			     struct sk_buff *skb);
 
 int hellcreek_get_ts_info(struct dsa_switch *ds, int port,
 			  struct ethtool_ts_info *info);
diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.c b/drivers/net/dsa/mv88e6xxx/hwtstamp.c
index 79514a54d903..8f74ffc7a279 100644
--- a/drivers/net/dsa/mv88e6xxx/hwtstamp.c
+++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.c
@@ -468,32 +468,38 @@ long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp)
 	return restart ? 1 : -1;
 }
 
-bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
-			     struct sk_buff *clone)
+void mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
+			     struct sk_buff *skb)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
 	struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port];
 	struct ptp_header *hdr;
+	struct sk_buff *clone;
 	unsigned int type;
 
-	type = ptp_classify_raw(clone);
+	type = ptp_classify_raw(skb);
 	if (type == PTP_CLASS_NONE)
-		return false;
+		return;
 
-	hdr = mv88e6xxx_should_tstamp(chip, port, clone, type);
+	hdr = mv88e6xxx_should_tstamp(chip, port, skb, type);
 	if (!hdr)
-		return false;
+		return;
+
+	clone = skb_clone_sk(skb);
+	if (!clone)
+		return;
 
 	if (test_and_set_bit_lock(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS,
-				  &ps->state))
-		return false;
+				  &ps->state)) {
+		kfree_skb(clone);
+		return;
+	}
 
 	ps->tx_skb = clone;
 	ps->tx_tstamp_start = jiffies;
 	ps->tx_seq_id = be16_to_cpu(hdr->sequence_id);
 
 	ptp_schedule_worker(chip->ptp_clock, 0);
-	return true;
 }
 
 int mv88e6165_global_disable(struct mv88e6xxx_chip *chip)
diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.h b/drivers/net/dsa/mv88e6xxx/hwtstamp.h
index 91fbc7838fc8..cf7fb6d660b1 100644
--- a/drivers/net/dsa/mv88e6xxx/hwtstamp.h
+++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.h
@@ -117,8 +117,8 @@ int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, int port,
 
 bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port,
 			     struct sk_buff *clone, unsigned int type);
-bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
-			     struct sk_buff *clone);
+void mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
+			     struct sk_buff *skb);
 
 int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port,
 			  struct ethtool_ts_info *info);
@@ -151,10 +151,9 @@ static inline bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port,
 	return false;
 }
 
-static inline bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
-					   struct sk_buff *clone)
+static inline void mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port,
+					   struct sk_buff *skb)
 {
-	return false;
 }
 
 static inline int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port,
diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index d679f023dc00..fe7e8bad90df 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -1395,18 +1395,21 @@ static bool felix_rxtstamp(struct dsa_switch *ds, int port,
 	return false;
 }
 
-static bool felix_txtstamp(struct dsa_switch *ds, int port,
-			   struct sk_buff *clone)
+static void felix_txtstamp(struct dsa_switch *ds, int port,
+			   struct sk_buff *skb)
 {
 	struct ocelot *ocelot = ds->priv;
 	struct ocelot_port *ocelot_port = ocelot->ports[port];
+	struct sk_buff *clone;
 
 	if (ocelot->ptp && ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) {
+		clone = skb_clone_sk(skb);
+		if (!clone)
+			return;
+
 		ocelot_port_add_txtstamp_skb(ocelot, port, clone);
-		return true;
+		DSA_SKB_CB(skb)->clone = clone;
 	}
-
-	return false;
 }
 
 static int felix_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c
index 72d052de82d8..a5140084000d 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.c
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.c
@@ -431,19 +431,24 @@ bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port,
 	return true;
 }
 
-/* Called from dsa_skb_tx_timestamp. This callback is just to make DSA clone
+/* Called from dsa_skb_tx_timestamp. This callback is just to clone
  * the skb and have it available in DSA_SKB_CB in the .port_deferred_xmit
  * callback, where we will timestamp it synchronously.
  */
-bool sja1105_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb)
+void sja1105_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb)
 {
 	struct sja1105_private *priv = ds->priv;
 	struct sja1105_port *sp = &priv->ports[port];
+	struct sk_buff *clone;
 
 	if (!sp->hwts_tx_en)
-		return false;
+		return;
 
-	return true;
+	clone = skb_clone_sk(skb);
+	if (!clone)
+		return;
+
+	DSA_SKB_CB(skb)->clone = clone;
 }
 
 static int sja1105_ptp_reset(struct dsa_switch *ds)
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.h b/drivers/net/dsa/sja1105/sja1105_ptp.h
index c70c4729a06d..34f97f58a355 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.h
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.h
@@ -104,7 +104,7 @@ void sja1105_ptp_txtstamp_skb(struct dsa_switch *ds, int slot,
 bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port,
 			   struct sk_buff *skb, unsigned int type);
 
-bool sja1105_port_txtstamp(struct dsa_switch *ds, int port,
+void sja1105_port_txtstamp(struct dsa_switch *ds, int port,
 			   struct sk_buff *skb);
 
 int sja1105_hwtstamp_get(struct dsa_switch *ds, int port, struct ifreq *ifr);
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 905066055b08..73ce6ce38aa1 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -740,8 +740,8 @@ struct dsa_switch_ops {
 				     struct ifreq *ifr);
 	int	(*port_hwtstamp_set)(struct dsa_switch *ds, int port,
 				     struct ifreq *ifr);
-	bool	(*port_txtstamp)(struct dsa_switch *ds, int port,
-				 struct sk_buff *clone);
+	void	(*port_txtstamp)(struct dsa_switch *ds, int port,
+				 struct sk_buff *skb);
 	bool	(*port_rxtstamp)(struct dsa_switch *ds, int port,
 				 struct sk_buff *skb, unsigned int type);
 
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index acaa52e60d7f..85e51f46a9d5 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -555,7 +555,6 @@ static void dsa_skb_tx_timestamp(struct dsa_slave_priv *p,
 				 struct sk_buff *skb)
 {
 	struct dsa_switch *ds = p->dp->ds;
-	struct sk_buff *clone;
 
 	if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
 		return;
@@ -563,16 +562,7 @@ static void dsa_skb_tx_timestamp(struct dsa_slave_priv *p,
 	if (!ds->ops->port_txtstamp)
 		return;
 
-	clone = skb_clone_sk(skb);
-	if (!clone)
-		return;
-
-	if (ds->ops->port_txtstamp(ds, p->dp->index, clone)) {
-		DSA_SKB_CB(skb)->clone = clone;
-		return;
-	}
-
-	kfree_skb(clone);
+	ds->ops->port_txtstamp(ds, p->dp->index, skb);
 }
 
 netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev)
-- 
cgit v1.2.3


From c4b364ce1270d689ee5010001344b8eae3685f32 Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Tue, 27 Apr 2021 12:22:00 +0800
Subject: net: dsa: free skb->cb usage in core driver

Free skb->cb usage in core driver and let device drivers decide to
use or not. The reason having a DSA_SKB_CB(skb)->clone was because
dsa_skb_tx_timestamp() which may set the clone pointer was called
before p->xmit() which would use the clone if any, and the device
driver has no way to initialize the clone pointer.

This patch just put memset(skb->cb, 0, sizeof(skb->cb)) at beginning
of dsa_slave_xmit(). Some new features in the future, like one-step
timestamp may need more bytes of skb->cb to use in
dsa_skb_tx_timestamp(), and p->xmit().

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/ocelot/felix.c         |  2 +-
 drivers/net/dsa/sja1105/sja1105_main.c |  2 +-
 drivers/net/dsa/sja1105/sja1105_ptp.c  |  4 ++--
 drivers/net/ethernet/mscc/ocelot.c     |  6 +++---
 drivers/net/ethernet/mscc/ocelot_net.c |  2 +-
 include/linux/dsa/sja1105.h            |  3 ++-
 include/net/dsa.h                      | 14 --------------
 include/soc/mscc/ocelot.h              |  8 ++++++++
 net/dsa/slave.c                        |  2 +-
 net/dsa/tag_ocelot.c                   |  8 ++++----
 net/dsa/tag_ocelot_8021q.c             |  8 ++++----
 11 files changed, 27 insertions(+), 32 deletions(-)

diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index fe7e8bad90df..b28280b6e91a 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -1408,7 +1408,7 @@ static void felix_txtstamp(struct dsa_switch *ds, int port,
 			return;
 
 		ocelot_port_add_txtstamp_skb(ocelot, port, clone);
-		DSA_SKB_CB(skb)->clone = clone;
+		OCELOT_SKB_CB(skb)->clone = clone;
 	}
 }
 
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index d9c198ca0197..405024b637d6 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -3137,7 +3137,7 @@ static void sja1105_port_deferred_xmit(struct kthread_work *work)
 	struct sk_buff *skb;
 
 	while ((skb = skb_dequeue(&sp->xmit_queue)) != NULL) {
-		struct sk_buff *clone = DSA_SKB_CB(skb)->clone;
+		struct sk_buff *clone = SJA1105_SKB_CB(skb)->clone;
 
 		mutex_lock(&priv->mgmt_lock);
 
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c
index a5140084000d..0bc566b9e958 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.c
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.c
@@ -432,7 +432,7 @@ bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port,
 }
 
 /* Called from dsa_skb_tx_timestamp. This callback is just to clone
- * the skb and have it available in DSA_SKB_CB in the .port_deferred_xmit
+ * the skb and have it available in SJA1105_SKB_CB in the .port_deferred_xmit
  * callback, where we will timestamp it synchronously.
  */
 void sja1105_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb)
@@ -448,7 +448,7 @@ void sja1105_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb)
 	if (!clone)
 		return;
 
-	DSA_SKB_CB(skb)->clone = clone;
+	SJA1105_SKB_CB(skb)->clone = clone;
 }
 
 static int sja1105_ptp_reset(struct dsa_switch *ds)
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 8d06ffaf318a..7da2dd1632b1 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -538,8 +538,8 @@ void ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port,
 	spin_lock(&ocelot_port->ts_id_lock);
 
 	skb_shinfo(clone)->tx_flags |= SKBTX_IN_PROGRESS;
-	/* Store timestamp ID in cb[0] of sk_buff */
-	clone->cb[0] = ocelot_port->ts_id;
+	/* Store timestamp ID in OCELOT_SKB_CB(clone)->ts_id */
+	OCELOT_SKB_CB(clone)->ts_id = ocelot_port->ts_id;
 	ocelot_port->ts_id = (ocelot_port->ts_id + 1) % 4;
 	skb_queue_tail(&ocelot_port->tx_skbs, clone);
 
@@ -604,7 +604,7 @@ void ocelot_get_txtstamp(struct ocelot *ocelot)
 		spin_lock_irqsave(&port->tx_skbs.lock, flags);
 
 		skb_queue_walk_safe(&port->tx_skbs, skb, skb_tmp) {
-			if (skb->cb[0] != id)
+			if (OCELOT_SKB_CB(skb)->ts_id != id)
 				continue;
 			__skb_unlink(skb, &port->tx_skbs);
 			skb_match = skb;
diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
index 36f32a4d9b0f..789a5fba146c 100644
--- a/drivers/net/ethernet/mscc/ocelot_net.c
+++ b/drivers/net/ethernet/mscc/ocelot_net.c
@@ -520,7 +520,7 @@ static netdev_tx_t ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev)
 
 			ocelot_port_add_txtstamp_skb(ocelot, port, clone);
 
-			rew_op |= clone->cb[0] << 3;
+			rew_op |= OCELOT_SKB_CB(clone)->ts_id << 3;
 		}
 	}
 
diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
index dd93735ae228..1eb84562b311 100644
--- a/include/linux/dsa/sja1105.h
+++ b/include/linux/dsa/sja1105.h
@@ -47,11 +47,12 @@ struct sja1105_tagger_data {
 };
 
 struct sja1105_skb_cb {
+	struct sk_buff *clone;
 	u32 meta_tstamp;
 };
 
 #define SJA1105_SKB_CB(skb) \
-	((struct sja1105_skb_cb *)DSA_SKB_CB_PRIV(skb))
+	((struct sja1105_skb_cb *)((skb)->cb))
 
 struct sja1105_port {
 	u16 subvlan_map[DSA_8021Q_N_SUBVLAN];
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 73ce6ce38aa1..e1a2610a0e06 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -117,20 +117,6 @@ struct dsa_netdevice_ops {
 #define MODULE_ALIAS_DSA_TAG_DRIVER(__proto)				\
 	MODULE_ALIAS(DSA_TAG_DRIVER_ALIAS __stringify(__proto##_VALUE))
 
-struct dsa_skb_cb {
-	struct sk_buff *clone;
-};
-
-struct __dsa_skb_cb {
-	struct dsa_skb_cb cb;
-	u8 priv[48 - sizeof(struct dsa_skb_cb)];
-};
-
-#define DSA_SKB_CB(skb) ((struct dsa_skb_cb *)((skb)->cb))
-
-#define DSA_SKB_CB_PRIV(skb)			\
-	((void *)(skb)->cb + offsetof(struct __dsa_skb_cb, priv))
-
 struct dsa_switch_tree {
 	struct list_head	list;
 
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 68cdc7ceaf4d..f075aaf70eee 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -689,6 +689,14 @@ struct ocelot_policer {
 	u32 burst; /* bytes */
 };
 
+struct ocelot_skb_cb {
+	struct sk_buff *clone;
+	u8 ts_id;
+};
+
+#define OCELOT_SKB_CB(skb) \
+	((struct ocelot_skb_cb *)((skb)->cb))
+
 #define ocelot_read_ix(ocelot, reg, gi, ri) __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri))
 #define ocelot_read_gix(ocelot, reg, gi) __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi))
 #define ocelot_read_rix(ocelot, reg, ri) __ocelot_read_ix(ocelot, reg, reg##_RSZ * (ri))
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 85e51f46a9d5..8c0f3c6ab365 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -614,7 +614,7 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	dev_sw_netstats_tx_add(dev, 1, skb->len);
 
-	DSA_SKB_CB(skb)->clone = NULL;
+	memset(skb->cb, 0, sizeof(skb->cb));
 
 	/* Handle tx timestamp if any */
 	dsa_skb_tx_timestamp(p, skb);
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index f9df9cac81c5..1100a16f1032 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -15,11 +15,11 @@ static void ocelot_xmit_ptp(struct dsa_port *dp, void *injection,
 	ocelot_port = ocelot->ports[dp->index];
 	rew_op = ocelot_port->ptp_cmd;
 
-	/* Retrieve timestamp ID populated inside skb->cb[0] of the
-	 * clone by ocelot_port_add_txtstamp_skb
+	/* Retrieve timestamp ID populated inside OCELOT_SKB_CB(clone)->ts_id
+	 * by ocelot_port_add_txtstamp_skb
 	 */
 	if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP)
-		rew_op |= clone->cb[0] << 3;
+		rew_op |= OCELOT_SKB_CB(clone)->ts_id << 3;
 
 	ocelot_ifh_set_rew_op(injection, rew_op);
 }
@@ -28,7 +28,7 @@ static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev,
 			       __be32 ifh_prefix, void **ifh)
 {
 	struct dsa_port *dp = dsa_slave_to_port(netdev);
-	struct sk_buff *clone = DSA_SKB_CB(skb)->clone;
+	struct sk_buff *clone = OCELOT_SKB_CB(skb)->clone;
 	struct dsa_switch *ds = dp->ds;
 	void *injection;
 	__be32 *prefix;
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index 5f3e8e124a82..a001a7e3f575 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -28,11 +28,11 @@ static struct sk_buff *ocelot_xmit_ptp(struct dsa_port *dp,
 	ocelot_port = ocelot->ports[port];
 	rew_op = ocelot_port->ptp_cmd;
 
-	/* Retrieve timestamp ID populated inside skb->cb[0] of the
-	 * clone by ocelot_port_add_txtstamp_skb
+	/* Retrieve timestamp ID populated inside OCELOT_SKB_CB(clone)->ts_id
+	 * by ocelot_port_add_txtstamp_skb
 	 */
 	if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP)
-		rew_op |= clone->cb[0] << 3;
+		rew_op |= OCELOT_SKB_CB(clone)->ts_id << 3;
 
 	ocelot_port_inject_frame(ocelot, port, 0, rew_op, skb);
 
@@ -46,7 +46,7 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
 	u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index);
 	u16 queue_mapping = skb_get_queue_mapping(skb);
 	u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
-	struct sk_buff *clone = DSA_SKB_CB(skb)->clone;
+	struct sk_buff *clone = OCELOT_SKB_CB(skb)->clone;
 
 	/* TX timestamping was requested, so inject through MMIO */
 	if (clone)
-- 
cgit v1.2.3


From d150946ed878d566ac55003b4722621bb55d9ac2 Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Tue, 27 Apr 2021 12:22:01 +0800
Subject: docs: networking: timestamping: update for DSA switches

Update timestamping doc for DSA switches to describe current
implementation accurately. On TX, the skb cloning is no longer
in DSA generic code.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/timestamping.rst | 63 +++++++++++++++++++------------
 1 file changed, 39 insertions(+), 24 deletions(-)

diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst
index f682e88fa87e..7db3985359bc 100644
--- a/Documentation/networking/timestamping.rst
+++ b/Documentation/networking/timestamping.rst
@@ -630,30 +630,45 @@ hardware timestamping on it. This is because the SO_TIMESTAMPING API does not
 allow the delivery of multiple hardware timestamps for the same packet, so
 anybody else except for the DSA switch port must be prevented from doing so.
 
-In code, DSA provides for most of the infrastructure for timestamping already,
-in generic code: a BPF classifier (``ptp_classify_raw``) is used to identify
-PTP event messages (any other packets, including PTP general messages, are not
-timestamped), and provides two hooks to drivers:
-
-- ``.port_txtstamp()``: The driver is passed a clone of the timestampable skb
-  to be transmitted, before actually transmitting it. Typically, a switch will
-  have a PTP TX timestamp register (or sometimes a FIFO) where the timestamp
-  becomes available. There may be an IRQ that is raised upon this timestamp's
-  availability, or the driver might have to poll after invoking
-  ``dev_queue_xmit()`` towards the host interface. Either way, in the
-  ``.port_txtstamp()`` method, the driver only needs to save the clone for
-  later use (when the timestamp becomes available). Each skb is annotated with
-  a pointer to its clone, in ``DSA_SKB_CB(skb)->clone``, to ease the driver's
-  job of keeping track of which clone belongs to which skb.
-
-- ``.port_rxtstamp()``: The original (and only) timestampable skb is provided
-  to the driver, for it to annotate it with a timestamp, if that is immediately
-  available, or defer to later. On reception, timestamps might either be
-  available in-band (through metadata in the DSA header, or attached in other
-  ways to the packet), or out-of-band (through another RX timestamping FIFO).
-  Deferral on RX is typically necessary when retrieving the timestamp needs a
-  sleepable context. In that case, it is the responsibility of the DSA driver
-  to call ``netif_rx_ni()`` on the freshly timestamped skb.
+In the generic layer, DSA provides the following infrastructure for PTP
+timestamping:
+
+- ``.port_txtstamp()``: a hook called prior to the transmission of
+  packets with a hardware TX timestamping request from user space.
+  This is required for two-step timestamping, since the hardware
+  timestamp becomes available after the actual MAC transmission, so the
+  driver must be prepared to correlate the timestamp with the original
+  packet so that it can re-enqueue the packet back into the socket's
+  error queue. To save the packet for when the timestamp becomes
+  available, the driver can call ``skb_clone_sk`` , save the clone pointer
+  in skb->cb and enqueue a tx skb queue. Typically, a switch will have a
+  PTP TX timestamp register (or sometimes a FIFO) where the timestamp
+  becomes available. In case of a FIFO, the hardware might store
+  key-value pairs of PTP sequence ID/message type/domain number and the
+  actual timestamp. To perform the correlation correctly between the
+  packets in a queue waiting for timestamping and the actual timestamps,
+  drivers can use a BPF classifier (``ptp_classify_raw``) to identify
+  the PTP transport type, and ``ptp_parse_header`` to interpret the PTP
+  header fields. There may be an IRQ that is raised upon this
+  timestamp's availability, or the driver might have to poll after
+  invoking ``dev_queue_xmit()`` towards the host interface.
+  One-step TX timestamping do not require packet cloning, since there is
+  no follow-up message required by the PTP protocol (because the
+  TX timestamp is embedded into the packet by the MAC), and therefore
+  user space does not expect the packet annotated with the TX timestamp
+  to be re-enqueued into its socket's error queue.
+
+- ``.port_rxtstamp()``: On RX, the BPF classifier is run by DSA to
+  identify PTP event messages (any other packets, including PTP general
+  messages, are not timestamped). The original (and only) timestampable
+  skb is provided to the driver, for it to annotate it with a timestamp,
+  if that is immediately available, or defer to later. On reception,
+  timestamps might either be available in-band (through metadata in the
+  DSA header, or attached in other ways to the packet), or out-of-band
+  (through another RX timestamping FIFO). Deferral on RX is typically
+  necessary when retrieving the timestamp needs a sleepable context. In
+  that case, it is the responsibility of the DSA driver to call
+  ``netif_rx_ni()`` on the freshly timestamped skb.
 
 3.2.2 Ethernet PHYs
 ^^^^^^^^^^^^^^^^^^^
-- 
cgit v1.2.3


From 682eaad93e8cfaaa439af39861ab8610eae5ff33 Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Tue, 27 Apr 2021 12:22:02 +0800
Subject: net: mscc: ocelot: convert to ocelot_port_txtstamp_request()

Convert to a common ocelot_port_txtstamp_request() for TX timestamp
request handling.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/ocelot/felix.c         | 15 +++++++--------
 drivers/net/ethernet/mscc/ocelot.c     | 24 +++++++++++++++++++++---
 drivers/net/ethernet/mscc/ocelot_net.c | 18 +++++++-----------
 include/soc/mscc/ocelot.h              |  5 +++--
 4 files changed, 38 insertions(+), 24 deletions(-)

diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index b28280b6e91a..ce607fbaaa3a 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -1399,17 +1399,16 @@ static void felix_txtstamp(struct dsa_switch *ds, int port,
 			   struct sk_buff *skb)
 {
 	struct ocelot *ocelot = ds->priv;
-	struct ocelot_port *ocelot_port = ocelot->ports[port];
-	struct sk_buff *clone;
+	struct sk_buff *clone = NULL;
 
-	if (ocelot->ptp && ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) {
-		clone = skb_clone_sk(skb);
-		if (!clone)
-			return;
+	if (!ocelot->ptp)
+		return;
 
-		ocelot_port_add_txtstamp_skb(ocelot, port, clone);
+	if (ocelot_port_txtstamp_request(ocelot, port, skb, &clone))
+		return;
+
+	if (clone)
 		OCELOT_SKB_CB(skb)->clone = clone;
-	}
 }
 
 static int felix_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 7da2dd1632b1..3ff4cce1ce7d 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -530,8 +530,8 @@ void ocelot_port_disable(struct ocelot *ocelot, int port)
 }
 EXPORT_SYMBOL(ocelot_port_disable);
 
-void ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port,
-				  struct sk_buff *clone)
+static void ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port,
+					 struct sk_buff *clone)
 {
 	struct ocelot_port *ocelot_port = ocelot->ports[port];
 
@@ -545,7 +545,25 @@ void ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port,
 
 	spin_unlock(&ocelot_port->ts_id_lock);
 }
-EXPORT_SYMBOL(ocelot_port_add_txtstamp_skb);
+
+int ocelot_port_txtstamp_request(struct ocelot *ocelot, int port,
+				 struct sk_buff *skb,
+				 struct sk_buff **clone)
+{
+	struct ocelot_port *ocelot_port = ocelot->ports[port];
+	u8 ptp_cmd = ocelot_port->ptp_cmd;
+
+	if (ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) {
+		*clone = skb_clone_sk(skb);
+		if (!(*clone))
+			return -ENOMEM;
+
+		ocelot_port_add_txtstamp_skb(ocelot, port, *clone);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(ocelot_port_txtstamp_request);
 
 static void ocelot_get_hwtimestamp(struct ocelot *ocelot,
 				   struct timespec64 *ts)
diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
index 789a5fba146c..e99c8fb3cb15 100644
--- a/drivers/net/ethernet/mscc/ocelot_net.c
+++ b/drivers/net/ethernet/mscc/ocelot_net.c
@@ -507,19 +507,15 @@ static netdev_tx_t ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/* Check if timestamping is needed */
 	if (ocelot->ptp && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
-		rew_op = ocelot_port->ptp_cmd;
+		struct sk_buff *clone = NULL;
 
-		if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) {
-			struct sk_buff *clone;
-
-			clone = skb_clone_sk(skb);
-			if (!clone) {
-				kfree_skb(skb);
-				return NETDEV_TX_OK;
-			}
-
-			ocelot_port_add_txtstamp_skb(ocelot, port, clone);
+		if (ocelot_port_txtstamp_request(ocelot, port, skb, &clone)) {
+			kfree_skb(skb);
+			return NETDEV_TX_OK;
+		}
 
+		if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) {
+			rew_op = ocelot_port->ptp_cmd;
 			rew_op |= OCELOT_SKB_CB(clone)->ts_id << 3;
 		}
 	}
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index f075aaf70eee..f7632519cb9c 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -828,8 +828,9 @@ int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid,
 int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid);
 int ocelot_hwstamp_get(struct ocelot *ocelot, int port, struct ifreq *ifr);
 int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr);
-void ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port,
-				  struct sk_buff *clone);
+int ocelot_port_txtstamp_request(struct ocelot *ocelot, int port,
+				 struct sk_buff *skb,
+				 struct sk_buff **clone);
 void ocelot_get_txtstamp(struct ocelot *ocelot);
 void ocelot_port_set_maxlen(struct ocelot *ocelot, int port, size_t sdu);
 int ocelot_get_max_mtu(struct ocelot *ocelot, int port);
-- 
cgit v1.2.3


From 39e5308b3250666cc92c5ca33a667698ac645bd2 Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Tue, 27 Apr 2021 12:22:03 +0800
Subject: net: mscc: ocelot: support PTP Sync one-step timestamping

Although HWTSTAMP_TX_ONESTEP_SYNC existed in ioctl for hardware timestamp
configuration, the PTP Sync one-step timestamping had never been supported.

This patch is to truely support it.

- ocelot_port_txtstamp_request()
  This function handles tx timestamp request by storing
  ptp_cmd(tx timestamp type) in OCELOT_SKB_CB(skb)->ptp_cmd,
  and additionally for two-step timestamp storing ts_id in
  OCELOT_SKB_CB(clone)->ptp_cmd.

- ocelot_ptp_rew_op()
  During xmit, this function is called to get rew_op (rewriter option) by
  checking skb->cb for tx timestamp request, and configure to transmitting.

Non-onestep-Sync packet with one-step timestamp request falls back to use
two-step timestamp.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot.c     | 53 ++++++++++++++++++++++++++++++++++
 drivers/net/ethernet/mscc/ocelot_net.c |  8 ++---
 include/soc/mscc/ocelot.h              |  8 ++++-
 net/dsa/Kconfig                        |  2 ++
 net/dsa/tag_ocelot.c                   | 27 +++--------------
 net/dsa/tag_ocelot_8021q.c             | 41 +++++++-------------------
 6 files changed, 81 insertions(+), 58 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 3ff4cce1ce7d..0c4283319d7f 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -6,6 +6,7 @@
  */
 #include <linux/dsa/ocelot.h>
 #include <linux/if_bridge.h>
+#include <linux/ptp_classify.h>
 #include <soc/mscc/ocelot_vcap.h>
 #include "ocelot.h"
 #include "ocelot_vcap.h"
@@ -546,6 +547,46 @@ static void ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port,
 	spin_unlock(&ocelot_port->ts_id_lock);
 }
 
+u32 ocelot_ptp_rew_op(struct sk_buff *skb)
+{
+	struct sk_buff *clone = OCELOT_SKB_CB(skb)->clone;
+	u8 ptp_cmd = OCELOT_SKB_CB(skb)->ptp_cmd;
+	u32 rew_op = 0;
+
+	if (ptp_cmd == IFH_REW_OP_TWO_STEP_PTP && clone) {
+		rew_op = ptp_cmd;
+		rew_op |= OCELOT_SKB_CB(clone)->ts_id << 3;
+	} else if (ptp_cmd == IFH_REW_OP_ORIGIN_PTP) {
+		rew_op = ptp_cmd;
+	}
+
+	return rew_op;
+}
+EXPORT_SYMBOL(ocelot_ptp_rew_op);
+
+static bool ocelot_ptp_is_onestep_sync(struct sk_buff *skb)
+{
+	struct ptp_header *hdr;
+	unsigned int ptp_class;
+	u8 msgtype, twostep;
+
+	ptp_class = ptp_classify_raw(skb);
+	if (ptp_class == PTP_CLASS_NONE)
+		return false;
+
+	hdr = ptp_parse_header(skb, ptp_class);
+	if (!hdr)
+		return false;
+
+	msgtype = ptp_get_msgtype(hdr, ptp_class);
+	twostep = hdr->flag_field[0] & 0x2;
+
+	if (msgtype == PTP_MSGTYPE_SYNC && twostep == 0)
+		return true;
+
+	return false;
+}
+
 int ocelot_port_txtstamp_request(struct ocelot *ocelot, int port,
 				 struct sk_buff *skb,
 				 struct sk_buff **clone)
@@ -553,12 +594,24 @@ int ocelot_port_txtstamp_request(struct ocelot *ocelot, int port,
 	struct ocelot_port *ocelot_port = ocelot->ports[port];
 	u8 ptp_cmd = ocelot_port->ptp_cmd;
 
+	/* Store ptp_cmd in OCELOT_SKB_CB(skb)->ptp_cmd */
+	if (ptp_cmd == IFH_REW_OP_ORIGIN_PTP) {
+		if (ocelot_ptp_is_onestep_sync(skb)) {
+			OCELOT_SKB_CB(skb)->ptp_cmd = ptp_cmd;
+			return 0;
+		}
+
+		/* Fall back to two-step timestamping */
+		ptp_cmd = IFH_REW_OP_TWO_STEP_PTP;
+	}
+
 	if (ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) {
 		*clone = skb_clone_sk(skb);
 		if (!(*clone))
 			return -ENOMEM;
 
 		ocelot_port_add_txtstamp_skb(ocelot, port, *clone);
+		OCELOT_SKB_CB(skb)->ptp_cmd = ptp_cmd;
 	}
 
 	return 0;
diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
index e99c8fb3cb15..aad33d22c33f 100644
--- a/drivers/net/ethernet/mscc/ocelot_net.c
+++ b/drivers/net/ethernet/mscc/ocelot_net.c
@@ -514,10 +514,10 @@ static netdev_tx_t ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev)
 			return NETDEV_TX_OK;
 		}
 
-		if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) {
-			rew_op = ocelot_port->ptp_cmd;
-			rew_op |= OCELOT_SKB_CB(clone)->ts_id << 3;
-		}
+		if (clone)
+			OCELOT_SKB_CB(skb)->clone = clone;
+
+		rew_op = ocelot_ptp_rew_op(skb);
 	}
 
 	ocelot_port_inject_frame(ocelot, port, 0, rew_op, skb);
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index f7632519cb9c..2f5ce4d4fdbf 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -691,6 +691,7 @@ struct ocelot_policer {
 
 struct ocelot_skb_cb {
 	struct sk_buff *clone;
+	u8 ptp_cmd;
 	u8 ts_id;
 };
 
@@ -748,15 +749,16 @@ u32 __ocelot_target_read_ix(struct ocelot *ocelot, enum ocelot_target target,
 void __ocelot_target_write_ix(struct ocelot *ocelot, enum ocelot_target target,
 			      u32 val, u32 reg, u32 offset);
 
-/* Packet I/O */
 #if IS_ENABLED(CONFIG_MSCC_OCELOT_SWITCH_LIB)
 
+/* Packet I/O */
 bool ocelot_can_inject(struct ocelot *ocelot, int grp);
 void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp,
 			      u32 rew_op, struct sk_buff *skb);
 int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **skb);
 void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp);
 
+u32 ocelot_ptp_rew_op(struct sk_buff *skb);
 #else
 
 static inline bool ocelot_can_inject(struct ocelot *ocelot, int grp)
@@ -780,6 +782,10 @@ static inline void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp)
 {
 }
 
+static inline u32 ocelot_ptp_rew_op(struct sk_buff *skb)
+{
+	return 0;
+}
 #endif
 
 /* Hardware initialization */
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index cbc2bd643ab2..5baba7021427 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -111,6 +111,8 @@ config NET_DSA_TAG_RTL4_A
 
 config NET_DSA_TAG_OCELOT
 	tristate "Tag driver for Ocelot family of switches, using NPI port"
+	depends on MSCC_OCELOT_SWITCH_LIB || \
+		   (MSCC_OCELOT_SWITCH_LIB=n && COMPILE_TEST)
 	select PACKING
 	help
 	  Say Y or M if you want to enable NPI tagging for the Ocelot switches
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index 1100a16f1032..91f0fd1242cd 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -5,33 +5,14 @@
 #include <soc/mscc/ocelot.h>
 #include "dsa_priv.h"
 
-static void ocelot_xmit_ptp(struct dsa_port *dp, void *injection,
-			    struct sk_buff *clone)
-{
-	struct ocelot *ocelot = dp->ds->priv;
-	struct ocelot_port *ocelot_port;
-	u64 rew_op;
-
-	ocelot_port = ocelot->ports[dp->index];
-	rew_op = ocelot_port->ptp_cmd;
-
-	/* Retrieve timestamp ID populated inside OCELOT_SKB_CB(clone)->ts_id
-	 * by ocelot_port_add_txtstamp_skb
-	 */
-	if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP)
-		rew_op |= OCELOT_SKB_CB(clone)->ts_id << 3;
-
-	ocelot_ifh_set_rew_op(injection, rew_op);
-}
-
 static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev,
 			       __be32 ifh_prefix, void **ifh)
 {
 	struct dsa_port *dp = dsa_slave_to_port(netdev);
-	struct sk_buff *clone = OCELOT_SKB_CB(skb)->clone;
 	struct dsa_switch *ds = dp->ds;
 	void *injection;
 	__be32 *prefix;
+	u32 rew_op = 0;
 
 	injection = skb_push(skb, OCELOT_TAG_LEN);
 	prefix = skb_push(skb, OCELOT_SHORT_PREFIX_LEN);
@@ -42,9 +23,9 @@ static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev,
 	ocelot_ifh_set_src(injection, ds->num_ports);
 	ocelot_ifh_set_qos_class(injection, skb->priority);
 
-	/* TX timestamping was requested */
-	if (clone)
-		ocelot_xmit_ptp(dp, injection, clone);
+	rew_op = ocelot_ptp_rew_op(skb);
+	if (rew_op)
+		ocelot_ifh_set_rew_op(injection, rew_op);
 
 	*ifh = injection;
 }
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index a001a7e3f575..62a93303bd63 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -13,32 +13,6 @@
 #include <soc/mscc/ocelot_ptp.h>
 #include "dsa_priv.h"
 
-static struct sk_buff *ocelot_xmit_ptp(struct dsa_port *dp,
-				       struct sk_buff *skb,
-				       struct sk_buff *clone)
-{
-	struct ocelot *ocelot = dp->ds->priv;
-	struct ocelot_port *ocelot_port;
-	int port = dp->index;
-	u32 rew_op;
-
-	if (!ocelot_can_inject(ocelot, 0))
-		return NULL;
-
-	ocelot_port = ocelot->ports[port];
-	rew_op = ocelot_port->ptp_cmd;
-
-	/* Retrieve timestamp ID populated inside OCELOT_SKB_CB(clone)->ts_id
-	 * by ocelot_port_add_txtstamp_skb
-	 */
-	if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP)
-		rew_op |= OCELOT_SKB_CB(clone)->ts_id << 3;
-
-	ocelot_port_inject_frame(ocelot, port, 0, rew_op, skb);
-
-	return NULL;
-}
-
 static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
 				   struct net_device *netdev)
 {
@@ -46,11 +20,18 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
 	u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index);
 	u16 queue_mapping = skb_get_queue_mapping(skb);
 	u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
-	struct sk_buff *clone = OCELOT_SKB_CB(skb)->clone;
+	struct ocelot *ocelot = dp->ds->priv;
+	int port = dp->index;
+	u32 rew_op = 0;
+
+	rew_op = ocelot_ptp_rew_op(skb);
+	if (rew_op) {
+		if (!ocelot_can_inject(ocelot, 0))
+			return NULL;
 
-	/* TX timestamping was requested, so inject through MMIO */
-	if (clone)
-		return ocelot_xmit_ptp(dp, skb, clone);
+		ocelot_port_inject_frame(ocelot, port, 0, rew_op, skb);
+		return NULL;
+	}
 
 	return dsa_8021q_xmit(skb, netdev, ETH_P_8021Q,
 			      ((pcp << VLAN_PRIO_SHIFT) | tx_vid));
-- 
cgit v1.2.3


From 4b5baca0403e2b6308e68938dc4d94912f5b8e28 Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik@pengutronix.de>
Date: Tue, 27 Apr 2021 09:09:01 +0200
Subject: net: dsa: microchip: ksz8795: change drivers prefix to be generic

The driver can be used on other chips of this type. To reflect
this we rename the drivers prefix from ksz8795 to ksz8.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/microchip/ksz8795.c     | 224 ++++++++++++++++----------------
 drivers/net/dsa/microchip/ksz8795_spi.c |   2 +-
 drivers/net/dsa/microchip/ksz_common.h  |   2 +-
 3 files changed, 111 insertions(+), 117 deletions(-)

diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
index b4b7de63ca79..91297df4371c 100644
--- a/drivers/net/dsa/microchip/ksz8795.c
+++ b/drivers/net/dsa/microchip/ksz8795.c
@@ -74,7 +74,7 @@ static void ksz_port_cfg(struct ksz_device *dev, int port, int offset, u8 bits,
 			   bits, set ? bits : 0);
 }
 
-static int ksz8795_reset_switch(struct ksz_device *dev)
+static int ksz8_reset_switch(struct ksz_device *dev)
 {
 	/* reset switch */
 	ksz_write8(dev, REG_POWER_MANAGEMENT_1,
@@ -117,8 +117,7 @@ static void ksz8795_set_prio_queue(struct ksz_device *dev, int port, int queue)
 			true);
 }
 
-static void ksz8795_r_mib_cnt(struct ksz_device *dev, int port, u16 addr,
-			      u64 *cnt)
+static void ksz8_r_mib_cnt(struct ksz_device *dev, int port, u16 addr, u64 *cnt)
 {
 	u16 ctrl_addr;
 	u32 data;
@@ -148,8 +147,8 @@ static void ksz8795_r_mib_cnt(struct ksz_device *dev, int port, u16 addr,
 	mutex_unlock(&dev->alu_mutex);
 }
 
-static void ksz8795_r_mib_pkt(struct ksz_device *dev, int port, u16 addr,
-			      u64 *dropped, u64 *cnt)
+static void ksz8_r_mib_pkt(struct ksz_device *dev, int port, u16 addr,
+			   u64 *dropped, u64 *cnt)
 {
 	u16 ctrl_addr;
 	u32 data;
@@ -195,7 +194,7 @@ static void ksz8795_r_mib_pkt(struct ksz_device *dev, int port, u16 addr,
 	mutex_unlock(&dev->alu_mutex);
 }
 
-static void ksz8795_freeze_mib(struct ksz_device *dev, int port, bool freeze)
+static void ksz8_freeze_mib(struct ksz_device *dev, int port, bool freeze)
 {
 	/* enable the port for flush/freeze function */
 	if (freeze)
@@ -207,7 +206,7 @@ static void ksz8795_freeze_mib(struct ksz_device *dev, int port, bool freeze)
 		ksz_cfg(dev, REG_SW_CTRL_6, BIT(port), false);
 }
 
-static void ksz8795_port_init_cnt(struct ksz_device *dev, int port)
+static void ksz8_port_init_cnt(struct ksz_device *dev, int port)
 {
 	struct ksz_port_mib *mib = &dev->ports[port].mib;
 
@@ -235,8 +234,7 @@ static void ksz8795_port_init_cnt(struct ksz_device *dev, int port)
 	memset(mib->counters, 0, dev->mib_cnt * sizeof(u64));
 }
 
-static void ksz8795_r_table(struct ksz_device *dev, int table, u16 addr,
-			    u64 *data)
+static void ksz8_r_table(struct ksz_device *dev, int table, u16 addr, u64 *data)
 {
 	u16 ctrl_addr;
 
@@ -248,8 +246,7 @@ static void ksz8795_r_table(struct ksz_device *dev, int table, u16 addr,
 	mutex_unlock(&dev->alu_mutex);
 }
 
-static void ksz8795_w_table(struct ksz_device *dev, int table, u16 addr,
-			    u64 data)
+static void ksz8_w_table(struct ksz_device *dev, int table, u16 addr, u64 data)
 {
 	u16 ctrl_addr;
 
@@ -261,7 +258,7 @@ static void ksz8795_w_table(struct ksz_device *dev, int table, u16 addr,
 	mutex_unlock(&dev->alu_mutex);
 }
 
-static int ksz8795_valid_dyn_entry(struct ksz_device *dev, u8 *data)
+static int ksz8_valid_dyn_entry(struct ksz_device *dev, u8 *data)
 {
 	int timeout = 100;
 
@@ -284,9 +281,9 @@ static int ksz8795_valid_dyn_entry(struct ksz_device *dev, u8 *data)
 	return 0;
 }
 
-static int ksz8795_r_dyn_mac_table(struct ksz_device *dev, u16 addr,
-				   u8 *mac_addr, u8 *fid, u8 *src_port,
-				   u8 *timestamp, u16 *entries)
+static int ksz8_r_dyn_mac_table(struct ksz_device *dev, u16 addr,
+				u8 *mac_addr, u8 *fid, u8 *src_port,
+				u8 *timestamp, u16 *entries)
 {
 	u32 data_hi, data_lo;
 	u16 ctrl_addr;
@@ -298,7 +295,7 @@ static int ksz8795_r_dyn_mac_table(struct ksz_device *dev, u16 addr,
 	mutex_lock(&dev->alu_mutex);
 	ksz_write16(dev, REG_IND_CTRL_0, ctrl_addr);
 
-	rc = ksz8795_valid_dyn_entry(dev, &data);
+	rc = ksz8_valid_dyn_entry(dev, &data);
 	if (rc == -EAGAIN) {
 		if (addr == 0)
 			*entries = 0;
@@ -341,13 +338,13 @@ static int ksz8795_r_dyn_mac_table(struct ksz_device *dev, u16 addr,
 	return rc;
 }
 
-static int ksz8795_r_sta_mac_table(struct ksz_device *dev, u16 addr,
-				   struct alu_struct *alu)
+static int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr,
+				struct alu_struct *alu)
 {
 	u32 data_hi, data_lo;
 	u64 data;
 
-	ksz8795_r_table(dev, TABLE_STATIC_MAC, addr, &data);
+	ksz8_r_table(dev, TABLE_STATIC_MAC, addr, &data);
 	data_hi = data >> 32;
 	data_lo = (u32)data;
 	if (data_hi & (STATIC_MAC_TABLE_VALID | STATIC_MAC_TABLE_OVERRIDE)) {
@@ -370,8 +367,8 @@ static int ksz8795_r_sta_mac_table(struct ksz_device *dev, u16 addr,
 	return -ENXIO;
 }
 
-static void ksz8795_w_sta_mac_table(struct ksz_device *dev, u16 addr,
-				    struct alu_struct *alu)
+static void ksz8_w_sta_mac_table(struct ksz_device *dev, u16 addr,
+				 struct alu_struct *alu)
 {
 	u32 data_hi, data_lo;
 	u64 data;
@@ -394,17 +391,17 @@ static void ksz8795_w_sta_mac_table(struct ksz_device *dev, u16 addr,
 		data_hi &= ~STATIC_MAC_TABLE_OVERRIDE;
 
 	data = (u64)data_hi << 32 | data_lo;
-	ksz8795_w_table(dev, TABLE_STATIC_MAC, addr, data);
+	ksz8_w_table(dev, TABLE_STATIC_MAC, addr, data);
 }
 
-static void ksz8795_from_vlan(u16 vlan, u8 *fid, u8 *member, u8 *valid)
+static void ksz8_from_vlan(u16 vlan, u8 *fid, u8 *member, u8 *valid)
 {
 	*fid = vlan & VLAN_TABLE_FID;
 	*member = (vlan & VLAN_TABLE_MEMBERSHIP) >> VLAN_TABLE_MEMBERSHIP_S;
 	*valid = !!(vlan & VLAN_TABLE_VALID);
 }
 
-static void ksz8795_to_vlan(u8 fid, u8 member, u8 valid, u16 *vlan)
+static void ksz8_to_vlan(u8 fid, u8 member, u8 valid, u16 *vlan)
 {
 	*vlan = fid;
 	*vlan |= (u16)member << VLAN_TABLE_MEMBERSHIP_S;
@@ -412,12 +409,12 @@ static void ksz8795_to_vlan(u8 fid, u8 member, u8 valid, u16 *vlan)
 		*vlan |= VLAN_TABLE_VALID;
 }
 
-static void ksz8795_r_vlan_entries(struct ksz_device *dev, u16 addr)
+static void ksz8_r_vlan_entries(struct ksz_device *dev, u16 addr)
 {
 	u64 data;
 	int i;
 
-	ksz8795_r_table(dev, TABLE_VLAN, addr, &data);
+	ksz8_r_table(dev, TABLE_VLAN, addr, &data);
 	addr *= dev->phy_port_cnt;
 	for (i = 0; i < dev->phy_port_cnt; i++) {
 		dev->vlan_cache[addr + i].table[0] = (u16)data;
@@ -425,7 +422,7 @@ static void ksz8795_r_vlan_entries(struct ksz_device *dev, u16 addr)
 	}
 }
 
-static void ksz8795_r_vlan_table(struct ksz_device *dev, u16 vid, u16 *vlan)
+static void ksz8_r_vlan_table(struct ksz_device *dev, u16 vid, u16 *vlan)
 {
 	int index;
 	u16 *data;
@@ -435,11 +432,11 @@ static void ksz8795_r_vlan_table(struct ksz_device *dev, u16 vid, u16 *vlan)
 	data = (u16 *)&buf;
 	addr = vid / dev->phy_port_cnt;
 	index = vid & 3;
-	ksz8795_r_table(dev, TABLE_VLAN, addr, &buf);
+	ksz8_r_table(dev, TABLE_VLAN, addr, &buf);
 	*vlan = data[index];
 }
 
-static void ksz8795_w_vlan_table(struct ksz_device *dev, u16 vid, u16 vlan)
+static void ksz8_w_vlan_table(struct ksz_device *dev, u16 vid, u16 vlan)
 {
 	int index;
 	u16 *data;
@@ -449,13 +446,13 @@ static void ksz8795_w_vlan_table(struct ksz_device *dev, u16 vid, u16 vlan)
 	data = (u16 *)&buf;
 	addr = vid / dev->phy_port_cnt;
 	index = vid & 3;
-	ksz8795_r_table(dev, TABLE_VLAN, addr, &buf);
+	ksz8_r_table(dev, TABLE_VLAN, addr, &buf);
 	data[index] = vlan;
 	dev->vlan_cache[vid].table[0] = vlan;
-	ksz8795_w_table(dev, TABLE_VLAN, addr, buf);
+	ksz8_w_table(dev, TABLE_VLAN, addr, buf);
 }
 
-static void ksz8795_r_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 *val)
+static void ksz8_r_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 *val)
 {
 	u8 restart, speed, ctrl, link;
 	int processed = true;
@@ -546,7 +543,7 @@ static void ksz8795_r_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 *val)
 		*val = data;
 }
 
-static void ksz8795_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val)
+static void ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val)
 {
 	u8 p = phy;
 	u8 restart, speed, ctrl, data;
@@ -644,15 +641,15 @@ static void ksz8795_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val)
 	}
 }
 
-static enum dsa_tag_protocol ksz8795_get_tag_protocol(struct dsa_switch *ds,
-						      int port,
-						      enum dsa_tag_protocol mp)
+static enum dsa_tag_protocol ksz8_get_tag_protocol(struct dsa_switch *ds,
+						   int port,
+						   enum dsa_tag_protocol mp)
 {
 	return DSA_TAG_PROTO_KSZ8795;
 }
 
-static void ksz8795_get_strings(struct dsa_switch *ds, int port,
-				u32 stringset, uint8_t *buf)
+static void ksz8_get_strings(struct dsa_switch *ds, int port,
+			     u32 stringset, uint8_t *buf)
 {
 	struct ksz_device *dev = ds->priv;
 	int i;
@@ -663,8 +660,7 @@ static void ksz8795_get_strings(struct dsa_switch *ds, int port,
 	}
 }
 
-static void ksz8795_cfg_port_member(struct ksz_device *dev, int port,
-				    u8 member)
+static void ksz8_cfg_port_member(struct ksz_device *dev, int port, u8 member)
 {
 	u8 data;
 
@@ -675,8 +671,7 @@ static void ksz8795_cfg_port_member(struct ksz_device *dev, int port,
 	dev->ports[port].member = member;
 }
 
-static void ksz8795_port_stp_state_set(struct dsa_switch *ds, int port,
-				       u8 state)
+static void ksz8_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
 {
 	struct ksz_device *dev = ds->priv;
 	int forward = dev->member;
@@ -734,7 +729,7 @@ static void ksz8795_port_stp_state_set(struct dsa_switch *ds, int port,
 	p->stp_state = state;
 	/* Port membership may share register with STP state. */
 	if (member >= 0 && member != p->member)
-		ksz8795_cfg_port_member(dev, port, (u8)member);
+		ksz8_cfg_port_member(dev, port, (u8)member);
 
 	/* Check if forwarding needs to be updated. */
 	if (state != BR_STATE_FORWARDING) {
@@ -749,7 +744,7 @@ static void ksz8795_port_stp_state_set(struct dsa_switch *ds, int port,
 		ksz_update_port_member(dev, port);
 }
 
-static void ksz8795_flush_dyn_mac_table(struct ksz_device *dev, int port)
+static void ksz8_flush_dyn_mac_table(struct ksz_device *dev, int port)
 {
 	u8 learn[DSA_MAX_PORTS];
 	int first, index, cnt;
@@ -782,9 +777,8 @@ static void ksz8795_flush_dyn_mac_table(struct ksz_device *dev, int port)
 	}
 }
 
-static int ksz8795_port_vlan_filtering(struct dsa_switch *ds, int port,
-				       bool flag,
-				       struct netlink_ext_ack *extack)
+static int ksz8_port_vlan_filtering(struct dsa_switch *ds, int port, bool flag,
+				    struct netlink_ext_ack *extack)
 {
 	struct ksz_device *dev = ds->priv;
 
@@ -793,9 +787,9 @@ static int ksz8795_port_vlan_filtering(struct dsa_switch *ds, int port,
 	return 0;
 }
 
-static int ksz8795_port_vlan_add(struct dsa_switch *ds, int port,
-				 const struct switchdev_obj_port_vlan *vlan,
-				 struct netlink_ext_ack *extack)
+static int ksz8_port_vlan_add(struct dsa_switch *ds, int port,
+			      const struct switchdev_obj_port_vlan *vlan,
+			      struct netlink_ext_ack *extack)
 {
 	bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
 	struct ksz_device *dev = ds->priv;
@@ -804,8 +798,8 @@ static int ksz8795_port_vlan_add(struct dsa_switch *ds, int port,
 
 	ksz_port_cfg(dev, port, P_TAG_CTRL, PORT_REMOVE_TAG, untagged);
 
-	ksz8795_r_vlan_table(dev, vlan->vid, &data);
-	ksz8795_from_vlan(data, &fid, &member, &valid);
+	ksz8_r_vlan_table(dev, vlan->vid, &data);
+	ksz8_from_vlan(data, &fid, &member, &valid);
 
 	/* First time to setup the VLAN entry. */
 	if (!valid) {
@@ -815,8 +809,8 @@ static int ksz8795_port_vlan_add(struct dsa_switch *ds, int port,
 	}
 	member |= BIT(port);
 
-	ksz8795_to_vlan(fid, member, valid, &data);
-	ksz8795_w_vlan_table(dev, vlan->vid, data);
+	ksz8_to_vlan(fid, member, valid, &data);
+	ksz8_w_vlan_table(dev, vlan->vid, data);
 
 	/* change PVID */
 	if (vlan->flags & BRIDGE_VLAN_INFO_PVID)
@@ -834,8 +828,8 @@ static int ksz8795_port_vlan_add(struct dsa_switch *ds, int port,
 	return 0;
 }
 
-static int ksz8795_port_vlan_del(struct dsa_switch *ds, int port,
-				 const struct switchdev_obj_port_vlan *vlan)
+static int ksz8_port_vlan_del(struct dsa_switch *ds, int port,
+			      const struct switchdev_obj_port_vlan *vlan)
 {
 	bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
 	struct ksz_device *dev = ds->priv;
@@ -847,8 +841,8 @@ static int ksz8795_port_vlan_del(struct dsa_switch *ds, int port,
 
 	ksz_port_cfg(dev, port, P_TAG_CTRL, PORT_REMOVE_TAG, untagged);
 
-	ksz8795_r_vlan_table(dev, vlan->vid, &data);
-	ksz8795_from_vlan(data, &fid, &member, &valid);
+	ksz8_r_vlan_table(dev, vlan->vid, &data);
+	ksz8_from_vlan(data, &fid, &member, &valid);
 
 	member &= ~BIT(port);
 
@@ -861,8 +855,8 @@ static int ksz8795_port_vlan_del(struct dsa_switch *ds, int port,
 	if (pvid == vlan->vid)
 		new_pvid = 1;
 
-	ksz8795_to_vlan(fid, member, valid, &data);
-	ksz8795_w_vlan_table(dev, vlan->vid, data);
+	ksz8_to_vlan(fid, member, valid, &data);
+	ksz8_w_vlan_table(dev, vlan->vid, data);
 
 	if (new_pvid != pvid)
 		ksz_pwrite16(dev, port, REG_PORT_CTRL_VID, pvid);
@@ -870,9 +864,9 @@ static int ksz8795_port_vlan_del(struct dsa_switch *ds, int port,
 	return 0;
 }
 
-static int ksz8795_port_mirror_add(struct dsa_switch *ds, int port,
-				   struct dsa_mall_mirror_tc_entry *mirror,
-				   bool ingress)
+static int ksz8_port_mirror_add(struct dsa_switch *ds, int port,
+				struct dsa_mall_mirror_tc_entry *mirror,
+				bool ingress)
 {
 	struct ksz_device *dev = ds->priv;
 
@@ -894,8 +888,8 @@ static int ksz8795_port_mirror_add(struct dsa_switch *ds, int port,
 	return 0;
 }
 
-static void ksz8795_port_mirror_del(struct dsa_switch *ds, int port,
-				    struct dsa_mall_mirror_tc_entry *mirror)
+static void ksz8_port_mirror_del(struct dsa_switch *ds, int port,
+				 struct dsa_mall_mirror_tc_entry *mirror)
 {
 	struct ksz_device *dev = ds->priv;
 	u8 data;
@@ -915,7 +909,7 @@ static void ksz8795_port_mirror_del(struct dsa_switch *ds, int port,
 			     PORT_MIRROR_SNIFFER, false);
 }
 
-static void ksz8795_port_setup(struct ksz_device *dev, int port, bool cpu_port)
+static void ksz8_port_setup(struct ksz_device *dev, int port, bool cpu_port)
 {
 	struct ksz_port *p = &dev->ports[port];
 	u8 data8, member;
@@ -981,10 +975,10 @@ static void ksz8795_port_setup(struct ksz_device *dev, int port, bool cpu_port)
 	} else {
 		member = dev->host_mask | p->vid_member;
 	}
-	ksz8795_cfg_port_member(dev, port, member);
+	ksz8_cfg_port_member(dev, port, member);
 }
 
-static void ksz8795_config_cpu_port(struct dsa_switch *ds)
+static void ksz8_config_cpu_port(struct dsa_switch *ds)
 {
 	struct ksz_device *dev = ds->priv;
 	struct ksz_port *p;
@@ -999,7 +993,7 @@ static void ksz8795_config_cpu_port(struct dsa_switch *ds)
 	p->vid_member = dev->port_mask;
 	p->on = 1;
 
-	ksz8795_port_setup(dev, dev->cpu_port, true);
+	ksz8_port_setup(dev, dev->cpu_port, true);
 	dev->member = dev->host_mask;
 
 	for (i = 0; i < dev->phy_port_cnt; i++) {
@@ -1010,7 +1004,7 @@ static void ksz8795_config_cpu_port(struct dsa_switch *ds)
 		 */
 		p->vid_member = BIT(i);
 		p->member = dev->port_mask;
-		ksz8795_port_stp_state_set(ds, i, BR_STATE_DISABLED);
+		ksz8_port_stp_state_set(ds, i, BR_STATE_DISABLED);
 
 		/* Last port may be disabled. */
 		if (i == dev->phy_port_cnt)
@@ -1034,7 +1028,7 @@ static void ksz8795_config_cpu_port(struct dsa_switch *ds)
 	}
 }
 
-static int ksz8795_setup(struct dsa_switch *ds)
+static int ksz8_setup(struct dsa_switch *ds)
 {
 	struct ksz_device *dev = ds->priv;
 	struct alu_struct alu;
@@ -1045,7 +1039,7 @@ static int ksz8795_setup(struct dsa_switch *ds)
 	if (!dev->vlan_cache)
 		return -ENOMEM;
 
-	ret = ksz8795_reset_switch(dev);
+	ret = ksz8_reset_switch(dev);
 	if (ret) {
 		dev_err(ds->dev, "failed to reset switch\n");
 		return ret;
@@ -1068,7 +1062,7 @@ static int ksz8795_setup(struct dsa_switch *ds)
 			   UNICAST_VLAN_BOUNDARY | NO_EXC_COLLISION_DROP,
 			   UNICAST_VLAN_BOUNDARY | NO_EXC_COLLISION_DROP);
 
-	ksz8795_config_cpu_port(ds);
+	ksz8_config_cpu_port(ds);
 
 	ksz_cfg(dev, REG_SW_CTRL_2, MULTICAST_STORM_DISABLE, true);
 
@@ -1083,7 +1077,7 @@ static int ksz8795_setup(struct dsa_switch *ds)
 			   BROADCAST_STORM_PROT_RATE) / 100);
 
 	for (i = 0; i < (dev->num_vlans / 4); i++)
-		ksz8795_r_vlan_entries(dev, i);
+		ksz8_r_vlan_entries(dev, i);
 
 	/* Setup STP address for STP operation. */
 	memset(&alu, 0, sizeof(alu));
@@ -1092,7 +1086,7 @@ static int ksz8795_setup(struct dsa_switch *ds)
 	alu.is_override = true;
 	alu.port_forward = dev->host_mask;
 
-	ksz8795_w_sta_mac_table(dev, 0, &alu);
+	ksz8_w_sta_mac_table(dev, 0, &alu);
 
 	ksz_init_mib_timer(dev);
 
@@ -1101,36 +1095,36 @@ static int ksz8795_setup(struct dsa_switch *ds)
 	return 0;
 }
 
-static const struct dsa_switch_ops ksz8795_switch_ops = {
-	.get_tag_protocol	= ksz8795_get_tag_protocol,
-	.setup			= ksz8795_setup,
+static const struct dsa_switch_ops ksz8_switch_ops = {
+	.get_tag_protocol	= ksz8_get_tag_protocol,
+	.setup			= ksz8_setup,
 	.phy_read		= ksz_phy_read16,
 	.phy_write		= ksz_phy_write16,
 	.phylink_mac_link_down	= ksz_mac_link_down,
 	.port_enable		= ksz_enable_port,
-	.get_strings		= ksz8795_get_strings,
+	.get_strings		= ksz8_get_strings,
 	.get_ethtool_stats	= ksz_get_ethtool_stats,
 	.get_sset_count		= ksz_sset_count,
 	.port_bridge_join	= ksz_port_bridge_join,
 	.port_bridge_leave	= ksz_port_bridge_leave,
-	.port_stp_state_set	= ksz8795_port_stp_state_set,
+	.port_stp_state_set	= ksz8_port_stp_state_set,
 	.port_fast_age		= ksz_port_fast_age,
-	.port_vlan_filtering	= ksz8795_port_vlan_filtering,
-	.port_vlan_add		= ksz8795_port_vlan_add,
-	.port_vlan_del		= ksz8795_port_vlan_del,
+	.port_vlan_filtering	= ksz8_port_vlan_filtering,
+	.port_vlan_add		= ksz8_port_vlan_add,
+	.port_vlan_del		= ksz8_port_vlan_del,
 	.port_fdb_dump		= ksz_port_fdb_dump,
 	.port_mdb_add           = ksz_port_mdb_add,
 	.port_mdb_del           = ksz_port_mdb_del,
-	.port_mirror_add	= ksz8795_port_mirror_add,
-	.port_mirror_del	= ksz8795_port_mirror_del,
+	.port_mirror_add	= ksz8_port_mirror_add,
+	.port_mirror_del	= ksz8_port_mirror_del,
 };
 
-static u32 ksz8795_get_port_addr(int port, int offset)
+static u32 ksz8_get_port_addr(int port, int offset)
 {
 	return PORT_CTRL_ADDR(port, offset);
 }
 
-static int ksz8795_switch_detect(struct ksz_device *dev)
+static int ksz8_switch_detect(struct ksz_device *dev)
 {
 	u8 id1, id2;
 	u16 id16;
@@ -1174,7 +1168,7 @@ struct ksz_chip_data {
 	int port_cnt;
 };
 
-static const struct ksz_chip_data ksz8795_switch_chips[] = {
+static const struct ksz_chip_data ksz8_switch_chips[] = {
 	{
 		.chip_id = 0x8795,
 		.dev_name = "KSZ8795",
@@ -1218,14 +1212,14 @@ static const struct ksz_chip_data ksz8795_switch_chips[] = {
 	},
 };
 
-static int ksz8795_switch_init(struct ksz_device *dev)
+static int ksz8_switch_init(struct ksz_device *dev)
 {
 	int i;
 
-	dev->ds->ops = &ksz8795_switch_ops;
+	dev->ds->ops = &ksz8_switch_ops;
 
-	for (i = 0; i < ARRAY_SIZE(ksz8795_switch_chips); i++) {
-		const struct ksz_chip_data *chip = &ksz8795_switch_chips[i];
+	for (i = 0; i < ARRAY_SIZE(ksz8_switch_chips); i++) {
+		const struct ksz_chip_data *chip = &ksz8_switch_chips[i];
 
 		if (dev->chip_id == chip->chip_id) {
 			dev->name = chip->dev_name;
@@ -1272,36 +1266,36 @@ static int ksz8795_switch_init(struct ksz_device *dev)
 	return 0;
 }
 
-static void ksz8795_switch_exit(struct ksz_device *dev)
+static void ksz8_switch_exit(struct ksz_device *dev)
 {
-	ksz8795_reset_switch(dev);
+	ksz8_reset_switch(dev);
 }
 
-static const struct ksz_dev_ops ksz8795_dev_ops = {
-	.get_port_addr = ksz8795_get_port_addr,
-	.cfg_port_member = ksz8795_cfg_port_member,
-	.flush_dyn_mac_table = ksz8795_flush_dyn_mac_table,
-	.port_setup = ksz8795_port_setup,
-	.r_phy = ksz8795_r_phy,
-	.w_phy = ksz8795_w_phy,
-	.r_dyn_mac_table = ksz8795_r_dyn_mac_table,
-	.r_sta_mac_table = ksz8795_r_sta_mac_table,
-	.w_sta_mac_table = ksz8795_w_sta_mac_table,
-	.r_mib_cnt = ksz8795_r_mib_cnt,
-	.r_mib_pkt = ksz8795_r_mib_pkt,
-	.freeze_mib = ksz8795_freeze_mib,
-	.port_init_cnt = ksz8795_port_init_cnt,
-	.shutdown = ksz8795_reset_switch,
-	.detect = ksz8795_switch_detect,
-	.init = ksz8795_switch_init,
-	.exit = ksz8795_switch_exit,
+static const struct ksz_dev_ops ksz8_dev_ops = {
+	.get_port_addr = ksz8_get_port_addr,
+	.cfg_port_member = ksz8_cfg_port_member,
+	.flush_dyn_mac_table = ksz8_flush_dyn_mac_table,
+	.port_setup = ksz8_port_setup,
+	.r_phy = ksz8_r_phy,
+	.w_phy = ksz8_w_phy,
+	.r_dyn_mac_table = ksz8_r_dyn_mac_table,
+	.r_sta_mac_table = ksz8_r_sta_mac_table,
+	.w_sta_mac_table = ksz8_w_sta_mac_table,
+	.r_mib_cnt = ksz8_r_mib_cnt,
+	.r_mib_pkt = ksz8_r_mib_pkt,
+	.freeze_mib = ksz8_freeze_mib,
+	.port_init_cnt = ksz8_port_init_cnt,
+	.shutdown = ksz8_reset_switch,
+	.detect = ksz8_switch_detect,
+	.init = ksz8_switch_init,
+	.exit = ksz8_switch_exit,
 };
 
-int ksz8795_switch_register(struct ksz_device *dev)
+int ksz8_switch_register(struct ksz_device *dev)
 {
-	return ksz_switch_register(dev, &ksz8795_dev_ops);
+	return ksz_switch_register(dev, &ksz8_dev_ops);
 }
-EXPORT_SYMBOL(ksz8795_switch_register);
+EXPORT_SYMBOL(ksz8_switch_register);
 
 MODULE_AUTHOR("Tristram Ha <Tristram.Ha@microchip.com>");
 MODULE_DESCRIPTION("Microchip KSZ8795 Series Switch DSA Driver");
diff --git a/drivers/net/dsa/microchip/ksz8795_spi.c b/drivers/net/dsa/microchip/ksz8795_spi.c
index f98432a3e2b5..45420c07c99f 100644
--- a/drivers/net/dsa/microchip/ksz8795_spi.c
+++ b/drivers/net/dsa/microchip/ksz8795_spi.c
@@ -55,7 +55,7 @@ static int ksz8795_spi_probe(struct spi_device *spi)
 	if (ret)
 		return ret;
 
-	ret = ksz8795_switch_register(dev);
+	ret = ksz8_switch_register(dev);
 
 	/* Main DSA driver may not be started yet. */
 	if (ret)
diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
index f212775372ce..f0681a891cca 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -142,7 +142,7 @@ int ksz_switch_register(struct ksz_device *dev,
 			const struct ksz_dev_ops *ops);
 void ksz_switch_remove(struct ksz_device *dev);
 
-int ksz8795_switch_register(struct ksz_device *dev);
+int ksz8_switch_register(struct ksz_device *dev);
 int ksz9477_switch_register(struct ksz_device *dev);
 
 void ksz_update_port_member(struct ksz_device *dev, int port);
-- 
cgit v1.2.3


From c2ac4d2ac5347a0d2aaabf3eca5ba2478d0617a9 Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik@pengutronix.de>
Date: Tue, 27 Apr 2021 09:09:02 +0200
Subject: net: dsa: microchip: ksz8795: move cpu_select_interface to extra
 function

This patch moves the cpu interface selection code to a individual
function specific for ksz8795. It will make it simpler to customize the
code path for different switches supported by this driver.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/microchip/ksz8795.c | 92 ++++++++++++++++++++-----------------
 1 file changed, 50 insertions(+), 42 deletions(-)

diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
index 91297df4371c..85fb727c13eb 100644
--- a/drivers/net/dsa/microchip/ksz8795.c
+++ b/drivers/net/dsa/microchip/ksz8795.c
@@ -909,10 +909,58 @@ static void ksz8_port_mirror_del(struct dsa_switch *ds, int port,
 			     PORT_MIRROR_SNIFFER, false);
 }
 
+static void ksz8795_cpu_interface_select(struct ksz_device *dev, int port)
+{
+	struct ksz_port *p = &dev->ports[port];
+	u8 data8;
+
+	if (!p->interface && dev->compat_interface) {
+		dev_warn(dev->dev,
+			 "Using legacy switch \"phy-mode\" property, because it is missing on port %d node. "
+			 "Please update your device tree.\n",
+			 port);
+		p->interface = dev->compat_interface;
+	}
+
+	/* Configure MII interface for proper network communication. */
+	ksz_read8(dev, REG_PORT_5_CTRL_6, &data8);
+	data8 &= ~PORT_INTERFACE_TYPE;
+	data8 &= ~PORT_GMII_1GPS_MODE;
+	switch (p->interface) {
+	case PHY_INTERFACE_MODE_MII:
+		p->phydev.speed = SPEED_100;
+		break;
+	case PHY_INTERFACE_MODE_RMII:
+		data8 |= PORT_INTERFACE_RMII;
+		p->phydev.speed = SPEED_100;
+		break;
+	case PHY_INTERFACE_MODE_GMII:
+		data8 |= PORT_GMII_1GPS_MODE;
+		data8 |= PORT_INTERFACE_GMII;
+		p->phydev.speed = SPEED_1000;
+		break;
+	default:
+		data8 &= ~PORT_RGMII_ID_IN_ENABLE;
+		data8 &= ~PORT_RGMII_ID_OUT_ENABLE;
+		if (p->interface == PHY_INTERFACE_MODE_RGMII_ID ||
+		    p->interface == PHY_INTERFACE_MODE_RGMII_RXID)
+			data8 |= PORT_RGMII_ID_IN_ENABLE;
+		if (p->interface == PHY_INTERFACE_MODE_RGMII_ID ||
+		    p->interface == PHY_INTERFACE_MODE_RGMII_TXID)
+			data8 |= PORT_RGMII_ID_OUT_ENABLE;
+		data8 |= PORT_GMII_1GPS_MODE;
+		data8 |= PORT_INTERFACE_RGMII;
+		p->phydev.speed = SPEED_1000;
+		break;
+	}
+	ksz_write8(dev, REG_PORT_5_CTRL_6, data8);
+	p->phydev.duplex = 1;
+}
+
 static void ksz8_port_setup(struct ksz_device *dev, int port, bool cpu_port)
 {
 	struct ksz_port *p = &dev->ports[port];
-	u8 data8, member;
+	u8 member;
 
 	/* enable broadcast storm limit */
 	ksz_port_cfg(dev, port, P_BCAST_STORM_CTRL, PORT_BROADCAST_STORM, true);
@@ -929,47 +977,7 @@ static void ksz8_port_setup(struct ksz_device *dev, int port, bool cpu_port)
 	ksz_port_cfg(dev, port, P_PRIO_CTRL, PORT_802_1P_ENABLE, true);
 
 	if (cpu_port) {
-		if (!p->interface && dev->compat_interface) {
-			dev_warn(dev->dev,
-				 "Using legacy switch \"phy-mode\" property, because it is missing on port %d node. "
-				 "Please update your device tree.\n",
-				 port);
-			p->interface = dev->compat_interface;
-		}
-
-		/* Configure MII interface for proper network communication. */
-		ksz_read8(dev, REG_PORT_5_CTRL_6, &data8);
-		data8 &= ~PORT_INTERFACE_TYPE;
-		data8 &= ~PORT_GMII_1GPS_MODE;
-		switch (p->interface) {
-		case PHY_INTERFACE_MODE_MII:
-			p->phydev.speed = SPEED_100;
-			break;
-		case PHY_INTERFACE_MODE_RMII:
-			data8 |= PORT_INTERFACE_RMII;
-			p->phydev.speed = SPEED_100;
-			break;
-		case PHY_INTERFACE_MODE_GMII:
-			data8 |= PORT_GMII_1GPS_MODE;
-			data8 |= PORT_INTERFACE_GMII;
-			p->phydev.speed = SPEED_1000;
-			break;
-		default:
-			data8 &= ~PORT_RGMII_ID_IN_ENABLE;
-			data8 &= ~PORT_RGMII_ID_OUT_ENABLE;
-			if (p->interface == PHY_INTERFACE_MODE_RGMII_ID ||
-			    p->interface == PHY_INTERFACE_MODE_RGMII_RXID)
-				data8 |= PORT_RGMII_ID_IN_ENABLE;
-			if (p->interface == PHY_INTERFACE_MODE_RGMII_ID ||
-			    p->interface == PHY_INTERFACE_MODE_RGMII_TXID)
-				data8 |= PORT_RGMII_ID_OUT_ENABLE;
-			data8 |= PORT_GMII_1GPS_MODE;
-			data8 |= PORT_INTERFACE_RGMII;
-			p->phydev.speed = SPEED_1000;
-			break;
-		}
-		ksz_write8(dev, REG_PORT_5_CTRL_6, data8);
-		p->phydev.duplex = 1;
+		ksz8795_cpu_interface_select(dev, port);
 
 		member = dev->port_mask;
 	} else {
-- 
cgit v1.2.3


From 9f73e11250fb3948a8599d72318951d5e93b1eaf Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik@pengutronix.de>
Date: Tue, 27 Apr 2021 09:09:03 +0200
Subject: net: dsa: microchip: ksz8795: move register offsets and shifts to
 separate struct

In order to get this driver used with other switches the functions need
to use different offsets and register shifts. This patch changes the
direct use of the register defines to register description structures,
which can be set depending on the chips register layout.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/microchip/ksz8.h        |  69 ++++++++
 drivers/net/dsa/microchip/ksz8795.c     | 287 +++++++++++++++++++++++---------
 drivers/net/dsa/microchip/ksz8795_reg.h |  85 ----------
 3 files changed, 281 insertions(+), 160 deletions(-)
 create mode 100644 drivers/net/dsa/microchip/ksz8.h

diff --git a/drivers/net/dsa/microchip/ksz8.h b/drivers/net/dsa/microchip/ksz8.h
new file mode 100644
index 000000000000..9d611895d3cf
--- /dev/null
+++ b/drivers/net/dsa/microchip/ksz8.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Microchip KSZ8XXX series register access
+ *
+ * Copyright (C) 2020 Pengutronix, Michael Grzeschik <kernel@pengutronix.de>
+ */
+
+#ifndef __KSZ8XXX_H
+#define __KSZ8XXX_H
+#include <linux/kernel.h>
+
+enum ksz_regs {
+	REG_IND_CTRL_0,
+	REG_IND_DATA_8,
+	REG_IND_DATA_CHECK,
+	REG_IND_DATA_HI,
+	REG_IND_DATA_LO,
+	REG_IND_MIB_CHECK,
+	P_FORCE_CTRL,
+	P_LINK_STATUS,
+	P_LOCAL_CTRL,
+	P_NEG_RESTART_CTRL,
+	P_REMOTE_STATUS,
+	P_SPEED_STATUS,
+	S_TAIL_TAG_CTRL,
+};
+
+enum ksz_masks {
+	PORT_802_1P_REMAPPING,
+	SW_TAIL_TAG_ENABLE,
+	MIB_COUNTER_OVERFLOW,
+	MIB_COUNTER_VALID,
+	VLAN_TABLE_FID,
+	VLAN_TABLE_MEMBERSHIP,
+	VLAN_TABLE_VALID,
+	STATIC_MAC_TABLE_VALID,
+	STATIC_MAC_TABLE_USE_FID,
+	STATIC_MAC_TABLE_FID,
+	STATIC_MAC_TABLE_OVERRIDE,
+	STATIC_MAC_TABLE_FWD_PORTS,
+	DYNAMIC_MAC_TABLE_ENTRIES_H,
+	DYNAMIC_MAC_TABLE_MAC_EMPTY,
+	DYNAMIC_MAC_TABLE_NOT_READY,
+	DYNAMIC_MAC_TABLE_ENTRIES,
+	DYNAMIC_MAC_TABLE_FID,
+	DYNAMIC_MAC_TABLE_SRC_PORT,
+	DYNAMIC_MAC_TABLE_TIMESTAMP,
+};
+
+enum ksz_shifts {
+	VLAN_TABLE_MEMBERSHIP_S,
+	VLAN_TABLE,
+	STATIC_MAC_FWD_PORTS,
+	STATIC_MAC_FID,
+	DYNAMIC_MAC_ENTRIES_H,
+	DYNAMIC_MAC_ENTRIES,
+	DYNAMIC_MAC_FID,
+	DYNAMIC_MAC_TIMESTAMP,
+	DYNAMIC_MAC_SRC_PORT,
+};
+
+struct ksz8 {
+	const u8 *regs;
+	const u32 *masks;
+	const u8 *shifts;
+	void *priv;
+};
+
+#endif
diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
index 85fb727c13eb..75518b4a253d 100644
--- a/drivers/net/dsa/microchip/ksz8795.c
+++ b/drivers/net/dsa/microchip/ksz8795.c
@@ -20,6 +20,57 @@
 
 #include "ksz_common.h"
 #include "ksz8795_reg.h"
+#include "ksz8.h"
+
+static const u8 ksz8795_regs[] = {
+	[REG_IND_CTRL_0]		= 0x6E,
+	[REG_IND_DATA_8]		= 0x70,
+	[REG_IND_DATA_CHECK]		= 0x72,
+	[REG_IND_DATA_HI]		= 0x71,
+	[REG_IND_DATA_LO]		= 0x75,
+	[REG_IND_MIB_CHECK]		= 0x74,
+	[P_FORCE_CTRL]			= 0x0C,
+	[P_LINK_STATUS]			= 0x0E,
+	[P_LOCAL_CTRL]			= 0x07,
+	[P_NEG_RESTART_CTRL]		= 0x0D,
+	[P_REMOTE_STATUS]		= 0x08,
+	[P_SPEED_STATUS]		= 0x09,
+	[S_TAIL_TAG_CTRL]		= 0x0C,
+};
+
+static const u32 ksz8795_masks[] = {
+	[PORT_802_1P_REMAPPING]		= BIT(7),
+	[SW_TAIL_TAG_ENABLE]		= BIT(1),
+	[MIB_COUNTER_OVERFLOW]		= BIT(6),
+	[MIB_COUNTER_VALID]		= BIT(5),
+	[VLAN_TABLE_FID]		= GENMASK(6, 0),
+	[VLAN_TABLE_MEMBERSHIP]		= GENMASK(11, 7),
+	[VLAN_TABLE_VALID]		= BIT(12),
+	[STATIC_MAC_TABLE_VALID]	= BIT(21),
+	[STATIC_MAC_TABLE_USE_FID]	= BIT(23),
+	[STATIC_MAC_TABLE_FID]		= GENMASK(30, 24),
+	[STATIC_MAC_TABLE_OVERRIDE]	= BIT(26),
+	[STATIC_MAC_TABLE_FWD_PORTS]	= GENMASK(24, 20),
+	[DYNAMIC_MAC_TABLE_ENTRIES_H]	= GENMASK(6, 0),
+	[DYNAMIC_MAC_TABLE_MAC_EMPTY]	= BIT(8),
+	[DYNAMIC_MAC_TABLE_NOT_READY]	= BIT(7),
+	[DYNAMIC_MAC_TABLE_ENTRIES]	= GENMASK(31, 29),
+	[DYNAMIC_MAC_TABLE_FID]		= GENMASK(26, 20),
+	[DYNAMIC_MAC_TABLE_SRC_PORT]	= GENMASK(26, 24),
+	[DYNAMIC_MAC_TABLE_TIMESTAMP]	= GENMASK(28, 27),
+};
+
+static const u8 ksz8795_shifts[] = {
+	[VLAN_TABLE_MEMBERSHIP_S]	= 7,
+	[VLAN_TABLE]			= 16,
+	[STATIC_MAC_FWD_PORTS]		= 16,
+	[STATIC_MAC_FID]		= 24,
+	[DYNAMIC_MAC_ENTRIES_H]		= 3,
+	[DYNAMIC_MAC_ENTRIES]		= 29,
+	[DYNAMIC_MAC_FID]		= 16,
+	[DYNAMIC_MAC_TIMESTAMP]		= 27,
+	[DYNAMIC_MAC_SRC_PORT]		= 24,
+};
 
 static const struct {
 	char string[ETH_GSTRING_LEN];
@@ -119,26 +170,32 @@ static void ksz8795_set_prio_queue(struct ksz_device *dev, int port, int queue)
 
 static void ksz8_r_mib_cnt(struct ksz_device *dev, int port, u16 addr, u64 *cnt)
 {
+	struct ksz8 *ksz8 = dev->priv;
+	const u32 *masks;
+	const u8 *regs;
 	u16 ctrl_addr;
 	u32 data;
 	u8 check;
 	int loop;
 
+	masks = ksz8->masks;
+	regs = ksz8->regs;
+
 	ctrl_addr = addr + dev->reg_mib_cnt * port;
 	ctrl_addr |= IND_ACC_TABLE(TABLE_MIB | TABLE_READ);
 
 	mutex_lock(&dev->alu_mutex);
-	ksz_write16(dev, REG_IND_CTRL_0, ctrl_addr);
+	ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr);
 
 	/* It is almost guaranteed to always read the valid bit because of
 	 * slow SPI speed.
 	 */
 	for (loop = 2; loop > 0; loop--) {
-		ksz_read8(dev, REG_IND_MIB_CHECK, &check);
+		ksz_read8(dev, regs[REG_IND_MIB_CHECK], &check);
 
-		if (check & MIB_COUNTER_VALID) {
-			ksz_read32(dev, REG_IND_DATA_LO, &data);
-			if (check & MIB_COUNTER_OVERFLOW)
+		if (check & masks[MIB_COUNTER_VALID]) {
+			ksz_read32(dev, regs[REG_IND_DATA_LO], &data);
+			if (check & masks[MIB_COUNTER_OVERFLOW])
 				*cnt += MIB_COUNTER_VALUE + 1;
 			*cnt += data & MIB_COUNTER_VALUE;
 			break;
@@ -150,27 +207,33 @@ static void ksz8_r_mib_cnt(struct ksz_device *dev, int port, u16 addr, u64 *cnt)
 static void ksz8_r_mib_pkt(struct ksz_device *dev, int port, u16 addr,
 			   u64 *dropped, u64 *cnt)
 {
+	struct ksz8 *ksz8 = dev->priv;
+	const u32 *masks;
+	const u8 *regs;
 	u16 ctrl_addr;
 	u32 data;
 	u8 check;
 	int loop;
 
+	masks = ksz8->masks;
+	regs = ksz8->regs;
+
 	addr -= dev->reg_mib_cnt;
 	ctrl_addr = (KS_MIB_TOTAL_RX_1 - KS_MIB_TOTAL_RX_0) * port;
 	ctrl_addr += addr + KS_MIB_TOTAL_RX_0;
 	ctrl_addr |= IND_ACC_TABLE(TABLE_MIB | TABLE_READ);
 
 	mutex_lock(&dev->alu_mutex);
-	ksz_write16(dev, REG_IND_CTRL_0, ctrl_addr);
+	ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr);
 
 	/* It is almost guaranteed to always read the valid bit because of
 	 * slow SPI speed.
 	 */
 	for (loop = 2; loop > 0; loop--) {
-		ksz_read8(dev, REG_IND_MIB_CHECK, &check);
+		ksz_read8(dev, regs[REG_IND_MIB_CHECK], &check);
 
-		if (check & MIB_COUNTER_VALID) {
-			ksz_read32(dev, REG_IND_DATA_LO, &data);
+		if (check & masks[MIB_COUNTER_VALID]) {
+			ksz_read32(dev, regs[REG_IND_DATA_LO], &data);
 			if (addr < 2) {
 				u64 total;
 
@@ -178,13 +241,13 @@ static void ksz8_r_mib_pkt(struct ksz_device *dev, int port, u16 addr,
 				total <<= 32;
 				*cnt += total;
 				*cnt += data;
-				if (check & MIB_COUNTER_OVERFLOW) {
+				if (check & masks[MIB_COUNTER_OVERFLOW]) {
 					total = MIB_TOTAL_BYTES_H + 1;
 					total <<= 32;
 					*cnt += total;
 				}
 			} else {
-				if (check & MIB_COUNTER_OVERFLOW)
+				if (check & masks[MIB_COUNTER_OVERFLOW])
 					*cnt += MIB_PACKET_DROPPED + 1;
 				*cnt += data & MIB_PACKET_DROPPED;
 			}
@@ -236,46 +299,56 @@ static void ksz8_port_init_cnt(struct ksz_device *dev, int port)
 
 static void ksz8_r_table(struct ksz_device *dev, int table, u16 addr, u64 *data)
 {
+	struct ksz8 *ksz8 = dev->priv;
+	const u8 *regs = ksz8->regs;
 	u16 ctrl_addr;
 
 	ctrl_addr = IND_ACC_TABLE(table | TABLE_READ) | addr;
 
 	mutex_lock(&dev->alu_mutex);
-	ksz_write16(dev, REG_IND_CTRL_0, ctrl_addr);
-	ksz_read64(dev, REG_IND_DATA_HI, data);
+	ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr);
+	ksz_read64(dev, regs[REG_IND_DATA_HI], data);
 	mutex_unlock(&dev->alu_mutex);
 }
 
 static void ksz8_w_table(struct ksz_device *dev, int table, u16 addr, u64 data)
 {
+	struct ksz8 *ksz8 = dev->priv;
+	const u8 *regs = ksz8->regs;
 	u16 ctrl_addr;
 
 	ctrl_addr = IND_ACC_TABLE(table) | addr;
 
 	mutex_lock(&dev->alu_mutex);
-	ksz_write64(dev, REG_IND_DATA_HI, data);
-	ksz_write16(dev, REG_IND_CTRL_0, ctrl_addr);
+	ksz_write64(dev, regs[REG_IND_DATA_HI], data);
+	ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr);
 	mutex_unlock(&dev->alu_mutex);
 }
 
 static int ksz8_valid_dyn_entry(struct ksz_device *dev, u8 *data)
 {
+	struct ksz8 *ksz8 = dev->priv;
 	int timeout = 100;
+	const u32 *masks;
+	const u8 *regs;
+
+	masks = ksz8->masks;
+	regs = ksz8->regs;
 
 	do {
-		ksz_read8(dev, REG_IND_DATA_CHECK, data);
+		ksz_read8(dev, regs[REG_IND_DATA_CHECK], data);
 		timeout--;
-	} while ((*data & DYNAMIC_MAC_TABLE_NOT_READY) && timeout);
+	} while ((*data & masks[DYNAMIC_MAC_TABLE_NOT_READY]) && timeout);
 
 	/* Entry is not ready for accessing. */
-	if (*data & DYNAMIC_MAC_TABLE_NOT_READY) {
+	if (*data & masks[DYNAMIC_MAC_TABLE_NOT_READY]) {
 		return -EAGAIN;
 	/* Entry is ready for accessing. */
 	} else {
-		ksz_read8(dev, REG_IND_DATA_8, data);
+		ksz_read8(dev, regs[REG_IND_DATA_8], data);
 
 		/* There is no valid entry in the table. */
-		if (*data & DYNAMIC_MAC_TABLE_MAC_EMPTY)
+		if (*data & masks[DYNAMIC_MAC_TABLE_MAC_EMPTY])
 			return -ENXIO;
 	}
 	return 0;
@@ -285,15 +358,23 @@ static int ksz8_r_dyn_mac_table(struct ksz_device *dev, u16 addr,
 				u8 *mac_addr, u8 *fid, u8 *src_port,
 				u8 *timestamp, u16 *entries)
 {
+	struct ksz8 *ksz8 = dev->priv;
 	u32 data_hi, data_lo;
+	const u8 *shifts;
+	const u32 *masks;
+	const u8 *regs;
 	u16 ctrl_addr;
 	u8 data;
 	int rc;
 
+	shifts = ksz8->shifts;
+	masks = ksz8->masks;
+	regs = ksz8->regs;
+
 	ctrl_addr = IND_ACC_TABLE(TABLE_DYNAMIC_MAC | TABLE_READ) | addr;
 
 	mutex_lock(&dev->alu_mutex);
-	ksz_write16(dev, REG_IND_CTRL_0, ctrl_addr);
+	ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr);
 
 	rc = ksz8_valid_dyn_entry(dev, &data);
 	if (rc == -EAGAIN) {
@@ -306,23 +387,23 @@ static int ksz8_r_dyn_mac_table(struct ksz_device *dev, u16 addr,
 		u64 buf = 0;
 		int cnt;
 
-		ksz_read64(dev, REG_IND_DATA_HI, &buf);
+		ksz_read64(dev, regs[REG_IND_DATA_HI], &buf);
 		data_hi = (u32)(buf >> 32);
 		data_lo = (u32)buf;
 
 		/* Check out how many valid entry in the table. */
-		cnt = data & DYNAMIC_MAC_TABLE_ENTRIES_H;
-		cnt <<= DYNAMIC_MAC_ENTRIES_H_S;
-		cnt |= (data_hi & DYNAMIC_MAC_TABLE_ENTRIES) >>
-			DYNAMIC_MAC_ENTRIES_S;
+		cnt = data & masks[DYNAMIC_MAC_TABLE_ENTRIES_H];
+		cnt <<= shifts[DYNAMIC_MAC_ENTRIES_H];
+		cnt |= (data_hi & masks[DYNAMIC_MAC_TABLE_ENTRIES]) >>
+			shifts[DYNAMIC_MAC_ENTRIES];
 		*entries = cnt + 1;
 
-		*fid = (data_hi & DYNAMIC_MAC_TABLE_FID) >>
-			DYNAMIC_MAC_FID_S;
-		*src_port = (data_hi & DYNAMIC_MAC_TABLE_SRC_PORT) >>
-			DYNAMIC_MAC_SRC_PORT_S;
-		*timestamp = (data_hi & DYNAMIC_MAC_TABLE_TIMESTAMP) >>
-			DYNAMIC_MAC_TIMESTAMP_S;
+		*fid = (data_hi & masks[DYNAMIC_MAC_TABLE_FID]) >>
+			shifts[DYNAMIC_MAC_FID];
+		*src_port = (data_hi & masks[DYNAMIC_MAC_TABLE_SRC_PORT]) >>
+			shifts[DYNAMIC_MAC_SRC_PORT];
+		*timestamp = (data_hi & masks[DYNAMIC_MAC_TABLE_TIMESTAMP]) >>
+			shifts[DYNAMIC_MAC_TIMESTAMP];
 
 		mac_addr[5] = (u8)data_lo;
 		mac_addr[4] = (u8)(data_lo >> 8);
@@ -341,27 +422,37 @@ static int ksz8_r_dyn_mac_table(struct ksz_device *dev, u16 addr,
 static int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr,
 				struct alu_struct *alu)
 {
+	struct ksz8 *ksz8 = dev->priv;
 	u32 data_hi, data_lo;
+	const u8 *shifts;
+	const u32 *masks;
 	u64 data;
 
+	shifts = ksz8->shifts;
+	masks = ksz8->masks;
+
 	ksz8_r_table(dev, TABLE_STATIC_MAC, addr, &data);
 	data_hi = data >> 32;
 	data_lo = (u32)data;
-	if (data_hi & (STATIC_MAC_TABLE_VALID | STATIC_MAC_TABLE_OVERRIDE)) {
+	if (data_hi & (masks[STATIC_MAC_TABLE_VALID] |
+			masks[STATIC_MAC_TABLE_OVERRIDE])) {
 		alu->mac[5] = (u8)data_lo;
 		alu->mac[4] = (u8)(data_lo >> 8);
 		alu->mac[3] = (u8)(data_lo >> 16);
 		alu->mac[2] = (u8)(data_lo >> 24);
 		alu->mac[1] = (u8)data_hi;
 		alu->mac[0] = (u8)(data_hi >> 8);
-		alu->port_forward = (data_hi & STATIC_MAC_TABLE_FWD_PORTS) >>
-			STATIC_MAC_FWD_PORTS_S;
+		alu->port_forward =
+			(data_hi & masks[STATIC_MAC_TABLE_FWD_PORTS]) >>
+				shifts[STATIC_MAC_FWD_PORTS];
 		alu->is_override =
-			(data_hi & STATIC_MAC_TABLE_OVERRIDE) ? 1 : 0;
+			(data_hi & masks[STATIC_MAC_TABLE_OVERRIDE]) ? 1 : 0;
 		data_hi >>= 1;
-		alu->is_use_fid = (data_hi & STATIC_MAC_TABLE_USE_FID) ? 1 : 0;
-		alu->fid = (data_hi & STATIC_MAC_TABLE_FID) >>
-			STATIC_MAC_FID_S;
+		alu->is_static = true;
+		alu->is_use_fid =
+			(data_hi & masks[STATIC_MAC_TABLE_USE_FID]) ? 1 : 0;
+		alu->fid = (data_hi & masks[STATIC_MAC_TABLE_FID]) >>
+				shifts[STATIC_MAC_FID];
 		return 0;
 	}
 	return -ENXIO;
@@ -370,55 +461,82 @@ static int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr,
 static void ksz8_w_sta_mac_table(struct ksz_device *dev, u16 addr,
 				 struct alu_struct *alu)
 {
+	struct ksz8 *ksz8 = dev->priv;
 	u32 data_hi, data_lo;
+	const u8 *shifts;
+	const u32 *masks;
 	u64 data;
 
+	shifts = ksz8->shifts;
+	masks = ksz8->masks;
+
 	data_lo = ((u32)alu->mac[2] << 24) |
 		((u32)alu->mac[3] << 16) |
 		((u32)alu->mac[4] << 8) | alu->mac[5];
 	data_hi = ((u32)alu->mac[0] << 8) | alu->mac[1];
-	data_hi |= (u32)alu->port_forward << STATIC_MAC_FWD_PORTS_S;
+	data_hi |= (u32)alu->port_forward << shifts[STATIC_MAC_FWD_PORTS];
 
 	if (alu->is_override)
-		data_hi |= STATIC_MAC_TABLE_OVERRIDE;
+		data_hi |= masks[STATIC_MAC_TABLE_OVERRIDE];
 	if (alu->is_use_fid) {
-		data_hi |= STATIC_MAC_TABLE_USE_FID;
-		data_hi |= (u32)alu->fid << STATIC_MAC_FID_S;
+		data_hi |= masks[STATIC_MAC_TABLE_USE_FID];
+		data_hi |= (u32)alu->fid << shifts[STATIC_MAC_FID];
 	}
 	if (alu->is_static)
-		data_hi |= STATIC_MAC_TABLE_VALID;
+		data_hi |= masks[STATIC_MAC_TABLE_VALID];
 	else
-		data_hi &= ~STATIC_MAC_TABLE_OVERRIDE;
+		data_hi &= ~masks[STATIC_MAC_TABLE_OVERRIDE];
 
 	data = (u64)data_hi << 32 | data_lo;
 	ksz8_w_table(dev, TABLE_STATIC_MAC, addr, data);
 }
 
-static void ksz8_from_vlan(u16 vlan, u8 *fid, u8 *member, u8 *valid)
+static void ksz8_from_vlan(struct ksz_device *dev, u32 vlan, u8 *fid,
+			   u8 *member, u8 *valid)
 {
-	*fid = vlan & VLAN_TABLE_FID;
-	*member = (vlan & VLAN_TABLE_MEMBERSHIP) >> VLAN_TABLE_MEMBERSHIP_S;
-	*valid = !!(vlan & VLAN_TABLE_VALID);
+	struct ksz8 *ksz8 = dev->priv;
+	const u8 *shifts;
+	const u32 *masks;
+
+	shifts = ksz8->shifts;
+	masks = ksz8->masks;
+
+	*fid = vlan & masks[VLAN_TABLE_FID];
+	*member = (vlan & masks[VLAN_TABLE_MEMBERSHIP]) >>
+			shifts[VLAN_TABLE_MEMBERSHIP_S];
+	*valid = !!(vlan & masks[VLAN_TABLE_VALID]);
 }
 
-static void ksz8_to_vlan(u8 fid, u8 member, u8 valid, u16 *vlan)
+static void ksz8_to_vlan(struct ksz_device *dev, u8 fid, u8 member, u8 valid,
+			 u16 *vlan)
 {
+	struct ksz8 *ksz8 = dev->priv;
+	const u8 *shifts;
+	const u32 *masks;
+
+	shifts = ksz8->shifts;
+	masks = ksz8->masks;
+
 	*vlan = fid;
-	*vlan |= (u16)member << VLAN_TABLE_MEMBERSHIP_S;
+	*vlan |= (u16)member << shifts[VLAN_TABLE_MEMBERSHIP_S];
 	if (valid)
-		*vlan |= VLAN_TABLE_VALID;
+		*vlan |= masks[VLAN_TABLE_VALID];
 }
 
 static void ksz8_r_vlan_entries(struct ksz_device *dev, u16 addr)
 {
+	struct ksz8 *ksz8 = dev->priv;
+	const u8 *shifts;
 	u64 data;
 	int i;
 
+	shifts = ksz8->shifts;
+
 	ksz8_r_table(dev, TABLE_VLAN, addr, &data);
 	addr *= dev->phy_port_cnt;
 	for (i = 0; i < dev->phy_port_cnt; i++) {
 		dev->vlan_cache[addr + i].table[0] = (u16)data;
-		data >>= VLAN_TABLE_S;
+		data >>= shifts[VLAN_TABLE];
 	}
 }
 
@@ -454,16 +572,18 @@ static void ksz8_w_vlan_table(struct ksz_device *dev, u16 vid, u16 vlan)
 
 static void ksz8_r_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 *val)
 {
+	struct ksz8 *ksz8 = dev->priv;
 	u8 restart, speed, ctrl, link;
+	const u8 *regs = ksz8->regs;
 	int processed = true;
 	u16 data = 0;
 	u8 p = phy;
 
 	switch (reg) {
 	case PHY_REG_CTRL:
-		ksz_pread8(dev, p, P_NEG_RESTART_CTRL, &restart);
-		ksz_pread8(dev, p, P_SPEED_STATUS, &speed);
-		ksz_pread8(dev, p, P_FORCE_CTRL, &ctrl);
+		ksz_pread8(dev, p, regs[P_NEG_RESTART_CTRL], &restart);
+		ksz_pread8(dev, p, regs[P_SPEED_STATUS], &speed);
+		ksz_pread8(dev, p, regs[P_FORCE_CTRL], &ctrl);
 		if (restart & PORT_PHY_LOOPBACK)
 			data |= PHY_LOOPBACK;
 		if (ctrl & PORT_FORCE_100_MBIT)
@@ -488,7 +608,7 @@ static void ksz8_r_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 *val)
 			data |= PHY_LED_DISABLE;
 		break;
 	case PHY_REG_STATUS:
-		ksz_pread8(dev, p, P_LINK_STATUS, &link);
+		ksz_pread8(dev, p, regs[P_LINK_STATUS], &link);
 		data = PHY_100BTX_FD_CAPABLE |
 		       PHY_100BTX_CAPABLE |
 		       PHY_10BT_FD_CAPABLE |
@@ -506,7 +626,7 @@ static void ksz8_r_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 *val)
 		data = KSZ8795_ID_LO;
 		break;
 	case PHY_REG_AUTO_NEGOTIATION:
-		ksz_pread8(dev, p, P_LOCAL_CTRL, &ctrl);
+		ksz_pread8(dev, p, regs[P_LOCAL_CTRL], &ctrl);
 		data = PHY_AUTO_NEG_802_3;
 		if (ctrl & PORT_AUTO_NEG_SYM_PAUSE)
 			data |= PHY_AUTO_NEG_SYM_PAUSE;
@@ -520,7 +640,7 @@ static void ksz8_r_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 *val)
 			data |= PHY_AUTO_NEG_10BT;
 		break;
 	case PHY_REG_REMOTE_CAPABILITY:
-		ksz_pread8(dev, p, P_REMOTE_STATUS, &link);
+		ksz_pread8(dev, p, regs[P_REMOTE_STATUS], &link);
 		data = PHY_AUTO_NEG_802_3;
 		if (link & PORT_REMOTE_SYM_PAUSE)
 			data |= PHY_AUTO_NEG_SYM_PAUSE;
@@ -545,8 +665,10 @@ static void ksz8_r_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 *val)
 
 static void ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val)
 {
-	u8 p = phy;
+	struct ksz8 *ksz8 = dev->priv;
 	u8 restart, speed, ctrl, data;
+	const u8 *regs = ksz8->regs;
+	u8 p = phy;
 
 	switch (reg) {
 	case PHY_REG_CTRL:
@@ -554,15 +676,15 @@ static void ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val)
 		/* Do not support PHY reset function. */
 		if (val & PHY_RESET)
 			break;
-		ksz_pread8(dev, p, P_SPEED_STATUS, &speed);
+		ksz_pread8(dev, p, regs[P_SPEED_STATUS], &speed);
 		data = speed;
 		if (val & PHY_HP_MDIX)
 			data |= PORT_HP_MDIX;
 		else
 			data &= ~PORT_HP_MDIX;
 		if (data != speed)
-			ksz_pwrite8(dev, p, P_SPEED_STATUS, data);
-		ksz_pread8(dev, p, P_FORCE_CTRL, &ctrl);
+			ksz_pwrite8(dev, p, regs[P_SPEED_STATUS], data);
+		ksz_pread8(dev, p, regs[P_FORCE_CTRL], &ctrl);
 		data = ctrl;
 		if (!(val & PHY_AUTO_NEG_ENABLE))
 			data |= PORT_AUTO_NEG_DISABLE;
@@ -581,8 +703,8 @@ static void ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val)
 		else
 			data &= ~PORT_FORCE_FULL_DUPLEX;
 		if (data != ctrl)
-			ksz_pwrite8(dev, p, P_FORCE_CTRL, data);
-		ksz_pread8(dev, p, P_NEG_RESTART_CTRL, &restart);
+			ksz_pwrite8(dev, p, regs[P_FORCE_CTRL], data);
+		ksz_pread8(dev, p, regs[P_NEG_RESTART_CTRL], &restart);
 		data = restart;
 		if (val & PHY_LED_DISABLE)
 			data |= PORT_LED_OFF;
@@ -613,10 +735,10 @@ static void ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val)
 		else
 			data &= ~PORT_PHY_LOOPBACK;
 		if (data != restart)
-			ksz_pwrite8(dev, p, P_NEG_RESTART_CTRL, data);
+			ksz_pwrite8(dev, p, regs[P_NEG_RESTART_CTRL], data);
 		break;
 	case PHY_REG_AUTO_NEGOTIATION:
-		ksz_pread8(dev, p, P_LOCAL_CTRL, &ctrl);
+		ksz_pread8(dev, p, regs[P_LOCAL_CTRL], &ctrl);
 		data = ctrl;
 		data &= ~(PORT_AUTO_NEG_SYM_PAUSE |
 			  PORT_AUTO_NEG_100BTX_FD |
@@ -634,7 +756,7 @@ static void ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val)
 		if (val & PHY_AUTO_NEG_10BT)
 			data |= PORT_AUTO_NEG_10BT;
 		if (data != ctrl)
-			ksz_pwrite8(dev, p, P_LOCAL_CTRL, data);
+			ksz_pwrite8(dev, p, regs[P_LOCAL_CTRL], data);
 		break;
 	default:
 		break;
@@ -799,7 +921,7 @@ static int ksz8_port_vlan_add(struct dsa_switch *ds, int port,
 	ksz_port_cfg(dev, port, P_TAG_CTRL, PORT_REMOVE_TAG, untagged);
 
 	ksz8_r_vlan_table(dev, vlan->vid, &data);
-	ksz8_from_vlan(data, &fid, &member, &valid);
+	ksz8_from_vlan(dev, data, &fid, &member, &valid);
 
 	/* First time to setup the VLAN entry. */
 	if (!valid) {
@@ -809,7 +931,7 @@ static int ksz8_port_vlan_add(struct dsa_switch *ds, int port,
 	}
 	member |= BIT(port);
 
-	ksz8_to_vlan(fid, member, valid, &data);
+	ksz8_to_vlan(dev, fid, member, valid, &data);
 	ksz8_w_vlan_table(dev, vlan->vid, data);
 
 	/* change PVID */
@@ -842,7 +964,7 @@ static int ksz8_port_vlan_del(struct dsa_switch *ds, int port,
 	ksz_port_cfg(dev, port, P_TAG_CTRL, PORT_REMOVE_TAG, untagged);
 
 	ksz8_r_vlan_table(dev, vlan->vid, &data);
-	ksz8_from_vlan(data, &fid, &member, &valid);
+	ksz8_from_vlan(dev, data, &fid, &member, &valid);
 
 	member &= ~BIT(port);
 
@@ -855,7 +977,7 @@ static int ksz8_port_vlan_del(struct dsa_switch *ds, int port,
 	if (pvid == vlan->vid)
 		new_pvid = 1;
 
-	ksz8_to_vlan(fid, member, valid, &data);
+	ksz8_to_vlan(dev, fid, member, valid, &data);
 	ksz8_w_vlan_table(dev, vlan->vid, data);
 
 	if (new_pvid != pvid)
@@ -960,8 +1082,12 @@ static void ksz8795_cpu_interface_select(struct ksz_device *dev, int port)
 static void ksz8_port_setup(struct ksz_device *dev, int port, bool cpu_port)
 {
 	struct ksz_port *p = &dev->ports[port];
+	struct ksz8 *ksz8 = dev->priv;
+	const u32 *masks;
 	u8 member;
 
+	masks = ksz8->masks;
+
 	/* enable broadcast storm limit */
 	ksz_port_cfg(dev, port, P_BCAST_STORM_CTRL, PORT_BROADCAST_STORM, true);
 
@@ -971,7 +1097,8 @@ static void ksz8_port_setup(struct ksz_device *dev, int port, bool cpu_port)
 	ksz_port_cfg(dev, port, P_PRIO_CTRL, PORT_DIFFSERV_ENABLE, false);
 
 	/* replace priority */
-	ksz_port_cfg(dev, port, P_802_1P_CTRL, PORT_802_1P_REMAPPING, false);
+	ksz_port_cfg(dev, port, P_802_1P_CTRL,
+		     masks[PORT_802_1P_REMAPPING], false);
 
 	/* enable 802.1p priority */
 	ksz_port_cfg(dev, port, P_PRIO_CTRL, PORT_802_1P_ENABLE, true);
@@ -989,13 +1116,18 @@ static void ksz8_port_setup(struct ksz_device *dev, int port, bool cpu_port)
 static void ksz8_config_cpu_port(struct dsa_switch *ds)
 {
 	struct ksz_device *dev = ds->priv;
+	struct ksz8 *ksz8 = dev->priv;
+	const u8 *regs = ksz8->regs;
 	struct ksz_port *p;
+	const u32 *masks;
 	u8 remote;
 	int i;
 
+	masks = ksz8->masks;
+
 	/* Switch marks the maximum frame with extra byte as oversize. */
 	ksz_cfg(dev, REG_SW_CTRL_2, SW_LEGAL_PACKET_DISABLE, true);
-	ksz_cfg(dev, S_TAIL_TAG_CTRL, SW_TAIL_TAG_ENABLE, true);
+	ksz_cfg(dev, regs[S_TAIL_TAG_CTRL], masks[SW_TAIL_TAG_ENABLE], true);
 
 	p = &dev->ports[dev->cpu_port];
 	p->vid_member = dev->port_mask;
@@ -1024,7 +1156,7 @@ static void ksz8_config_cpu_port(struct dsa_switch *ds)
 		p = &dev->ports[i];
 		if (!p->on)
 			continue;
-		ksz_pread8(dev, i, P_REMOTE_STATUS, &remote);
+		ksz_pread8(dev, i, regs[P_REMOTE_STATUS], &remote);
 		if (remote & PORT_FIBER_MODE)
 			p->fiber = 1;
 		if (p->fiber)
@@ -1222,6 +1354,7 @@ static const struct ksz_chip_data ksz8_switch_chips[] = {
 
 static int ksz8_switch_init(struct ksz_device *dev)
 {
+	struct ksz8 *ksz8 = dev->priv;
 	int i;
 
 	dev->ds->ops = &ksz8_switch_ops;
@@ -1249,6 +1382,10 @@ static int ksz8_switch_init(struct ksz_device *dev)
 	if (!dev->cpu_ports)
 		return -ENODEV;
 
+	ksz8->regs = ksz8795_regs;
+	ksz8->masks = ksz8795_masks;
+	ksz8->shifts = ksz8795_shifts;
+
 	dev->reg_mib_cnt = KSZ8795_COUNTER_NUM;
 	dev->mib_cnt = ARRAY_SIZE(mib_names);
 
diff --git a/drivers/net/dsa/microchip/ksz8795_reg.h b/drivers/net/dsa/microchip/ksz8795_reg.h
index 40372047d40d..eea78c5636a0 100644
--- a/drivers/net/dsa/microchip/ksz8795_reg.h
+++ b/drivers/net/dsa/microchip/ksz8795_reg.h
@@ -98,7 +98,6 @@
 
 #define REG_SW_CTRL_10			0x0C
 
-#define SW_TAIL_TAG_ENABLE		BIT(1)
 #define SW_PASS_PAUSE			BIT(0)
 
 #define REG_SW_CTRL_11			0x0D
@@ -150,7 +149,6 @@
 #define REG_PORT_4_CTRL_2		0x42
 #define REG_PORT_5_CTRL_2		0x52
 
-#define PORT_802_1P_REMAPPING		BIT(7)
 #define PORT_INGRESS_FILTER		BIT(6)
 #define PORT_DISCARD_NON_VID		BIT(5)
 #define PORT_FORCE_FLOW_CTRL		BIT(4)
@@ -319,14 +317,12 @@
 
 #define REG_PORT_CTRL_5			0x05
 
-#define REG_PORT_CTRL_7			0x07
 #define REG_PORT_STATUS_0		0x08
 #define REG_PORT_STATUS_1		0x09
 #define REG_PORT_LINK_MD_CTRL		0x0A
 #define REG_PORT_LINK_MD_RESULT		0x0B
 #define REG_PORT_CTRL_9			0x0C
 #define REG_PORT_CTRL_10		0x0D
-#define REG_PORT_STATUS_2		0x0E
 #define REG_PORT_STATUS_3		0x0F
 
 #define REG_PORT_CTRL_12		0xA0
@@ -356,8 +352,6 @@
 #define REG_SW_MAC_ADDR_4		0x6C
 #define REG_SW_MAC_ADDR_5		0x6D
 
-#define REG_IND_CTRL_0			0x6E
-
 #define TABLE_EXT_SELECT_S		5
 #define TABLE_EEE_V			1
 #define TABLE_ACL_V			2
@@ -383,23 +377,13 @@
 #define TABLE_ENTRY_MASK		0x03FF
 #define TABLE_EXT_ENTRY_MASK		0x0FFF
 
-#define REG_IND_DATA_8			0x70
-#define REG_IND_DATA_7			0x71
-#define REG_IND_DATA_6			0x72
 #define REG_IND_DATA_5			0x73
-#define REG_IND_DATA_4			0x74
-#define REG_IND_DATA_3			0x75
 #define REG_IND_DATA_2			0x76
 #define REG_IND_DATA_1			0x77
 #define REG_IND_DATA_0			0x78
 
 #define REG_IND_DATA_PME_EEE_ACL	0xA0
 
-#define REG_IND_DATA_CHECK		REG_IND_DATA_6
-#define REG_IND_MIB_CHECK		REG_IND_DATA_4
-#define REG_IND_DATA_HI			REG_IND_DATA_7
-#define REG_IND_DATA_LO			REG_IND_DATA_3
-
 #define REG_INT_STATUS			0x7C
 #define REG_INT_ENABLE			0x7D
 
@@ -856,12 +840,6 @@
 #define P_MIRROR_CTRL			REG_PORT_CTRL_1
 #define P_802_1P_CTRL			REG_PORT_CTRL_2
 #define P_STP_CTRL			REG_PORT_CTRL_2
-#define P_LOCAL_CTRL			REG_PORT_CTRL_7
-#define P_REMOTE_STATUS			REG_PORT_STATUS_0
-#define P_FORCE_CTRL			REG_PORT_CTRL_9
-#define P_NEG_RESTART_CTRL		REG_PORT_CTRL_10
-#define P_SPEED_STATUS			REG_PORT_STATUS_1
-#define P_LINK_STATUS			REG_PORT_STATUS_2
 #define P_PASS_ALL_CTRL			REG_PORT_CTRL_12
 #define P_INS_SRC_PVID_CTRL		REG_PORT_CTRL_12
 #define P_DROP_TAG_CTRL			REG_PORT_CTRL_13
@@ -876,7 +854,6 @@
 #define S_MIRROR_CTRL			REG_SW_CTRL_3
 #define S_REPLACE_VID_CTRL		REG_SW_CTRL_4
 #define S_PASS_PAUSE_CTRL		REG_SW_CTRL_10
-#define S_TAIL_TAG_CTRL			REG_SW_CTRL_10
 #define S_802_1P_PRIO_CTRL		REG_SW_CTRL_12
 #define S_TOS_PRIO_CTRL			REG_TOS_PRIO_CTRL_0
 #define S_IPV6_MLD_CTRL			REG_SW_CTRL_21
@@ -889,65 +866,6 @@
 /* 148,800 frames * 67 ms / 100 */
 #define BROADCAST_STORM_VALUE		9969
 
-/**
- * STATIC_MAC_TABLE_ADDR		00-0000FFFF-FFFFFFFF
- * STATIC_MAC_TABLE_FWD_PORTS		00-001F0000-00000000
- * STATIC_MAC_TABLE_VALID		00-00200000-00000000
- * STATIC_MAC_TABLE_OVERRIDE		00-00400000-00000000
- * STATIC_MAC_TABLE_USE_FID		00-00800000-00000000
- * STATIC_MAC_TABLE_FID			00-7F000000-00000000
- */
-
-#define STATIC_MAC_TABLE_ADDR		0x0000FFFF
-#define STATIC_MAC_TABLE_FWD_PORTS	0x001F0000
-#define STATIC_MAC_TABLE_VALID		0x00200000
-#define STATIC_MAC_TABLE_OVERRIDE	0x00400000
-#define STATIC_MAC_TABLE_USE_FID	0x00800000
-#define STATIC_MAC_TABLE_FID		0x7F000000
-
-#define STATIC_MAC_FWD_PORTS_S		16
-#define STATIC_MAC_FID_S		24
-
-/**
- * VLAN_TABLE_FID			00-007F007F-007F007F
- * VLAN_TABLE_MEMBERSHIP		00-0F800F80-0F800F80
- * VLAN_TABLE_VALID			00-10001000-10001000
- */
-
-#define VLAN_TABLE_FID			0x007F
-#define VLAN_TABLE_MEMBERSHIP		0x0F80
-#define VLAN_TABLE_VALID		0x1000
-
-#define VLAN_TABLE_MEMBERSHIP_S		7
-#define VLAN_TABLE_S			16
-
-/**
- * DYNAMIC_MAC_TABLE_ADDR		00-0000FFFF-FFFFFFFF
- * DYNAMIC_MAC_TABLE_FID		00-007F0000-00000000
- * DYNAMIC_MAC_TABLE_NOT_READY		00-00800000-00000000
- * DYNAMIC_MAC_TABLE_SRC_PORT		00-07000000-00000000
- * DYNAMIC_MAC_TABLE_TIMESTAMP		00-18000000-00000000
- * DYNAMIC_MAC_TABLE_ENTRIES		7F-E0000000-00000000
- * DYNAMIC_MAC_TABLE_MAC_EMPTY		80-00000000-00000000
- */
-
-#define DYNAMIC_MAC_TABLE_ADDR		0x0000FFFF
-#define DYNAMIC_MAC_TABLE_FID		0x007F0000
-#define DYNAMIC_MAC_TABLE_SRC_PORT	0x07000000
-#define DYNAMIC_MAC_TABLE_TIMESTAMP	0x18000000
-#define DYNAMIC_MAC_TABLE_ENTRIES	0xE0000000
-
-#define DYNAMIC_MAC_TABLE_NOT_READY	0x80
-
-#define DYNAMIC_MAC_TABLE_ENTRIES_H	0x7F
-#define DYNAMIC_MAC_TABLE_MAC_EMPTY	0x80
-
-#define DYNAMIC_MAC_FID_S		16
-#define DYNAMIC_MAC_SRC_PORT_S		24
-#define DYNAMIC_MAC_TIMESTAMP_S		27
-#define DYNAMIC_MAC_ENTRIES_S		29
-#define DYNAMIC_MAC_ENTRIES_H_S		3
-
 /**
  * MIB_COUNTER_VALUE			00-00000000-3FFFFFFF
  * MIB_TOTAL_BYTES			00-0000000F-FFFFFFFF
@@ -956,9 +874,6 @@
  * MIB_COUNTER_OVERFLOW			00-00000040-00000000
  */
 
-#define MIB_COUNTER_OVERFLOW		BIT(6)
-#define MIB_COUNTER_VALID		BIT(5)
-
 #define MIB_COUNTER_VALUE		0x3FFFFFFF
 
 #define KS_MIB_TOTAL_RX_0		0x100
-- 
cgit v1.2.3


From 4b20a07e103f0b38b376b4b45c7c082202a876ff Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Tue, 27 Apr 2021 09:09:04 +0200
Subject: net: dsa: microchip: ksz8795: add support for ksz88xx chips

We add support for the ksz8863 and ksz8873 chips which are
using the same register patterns but other offsets as the
ksz8795.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/microchip/ksz8795.c     | 311 +++++++++++++++++++++++++++-----
 drivers/net/dsa/microchip/ksz8795_reg.h |  40 ++--
 drivers/net/dsa/microchip/ksz_common.h  |   1 +
 3 files changed, 281 insertions(+), 71 deletions(-)

diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
index 75518b4a253d..ad509a57a945 100644
--- a/drivers/net/dsa/microchip/ksz8795.c
+++ b/drivers/net/dsa/microchip/ksz8795.c
@@ -72,9 +72,60 @@ static const u8 ksz8795_shifts[] = {
 	[DYNAMIC_MAC_SRC_PORT]		= 24,
 };
 
-static const struct {
+static const u8 ksz8863_regs[] = {
+	[REG_IND_CTRL_0]		= 0x79,
+	[REG_IND_DATA_8]		= 0x7B,
+	[REG_IND_DATA_CHECK]		= 0x7B,
+	[REG_IND_DATA_HI]		= 0x7C,
+	[REG_IND_DATA_LO]		= 0x80,
+	[REG_IND_MIB_CHECK]		= 0x80,
+	[P_FORCE_CTRL]			= 0x0C,
+	[P_LINK_STATUS]			= 0x0E,
+	[P_LOCAL_CTRL]			= 0x0C,
+	[P_NEG_RESTART_CTRL]		= 0x0D,
+	[P_REMOTE_STATUS]		= 0x0E,
+	[P_SPEED_STATUS]		= 0x0F,
+	[S_TAIL_TAG_CTRL]		= 0x03,
+};
+
+static const u32 ksz8863_masks[] = {
+	[PORT_802_1P_REMAPPING]		= BIT(3),
+	[SW_TAIL_TAG_ENABLE]		= BIT(6),
+	[MIB_COUNTER_OVERFLOW]		= BIT(7),
+	[MIB_COUNTER_VALID]		= BIT(6),
+	[VLAN_TABLE_FID]		= GENMASK(15, 12),
+	[VLAN_TABLE_MEMBERSHIP]		= GENMASK(18, 16),
+	[VLAN_TABLE_VALID]		= BIT(19),
+	[STATIC_MAC_TABLE_VALID]	= BIT(19),
+	[STATIC_MAC_TABLE_USE_FID]	= BIT(21),
+	[STATIC_MAC_TABLE_FID]		= GENMASK(29, 26),
+	[STATIC_MAC_TABLE_OVERRIDE]	= BIT(20),
+	[STATIC_MAC_TABLE_FWD_PORTS]	= GENMASK(18, 16),
+	[DYNAMIC_MAC_TABLE_ENTRIES_H]	= GENMASK(5, 0),
+	[DYNAMIC_MAC_TABLE_MAC_EMPTY]	= BIT(7),
+	[DYNAMIC_MAC_TABLE_NOT_READY]	= BIT(7),
+	[DYNAMIC_MAC_TABLE_ENTRIES]	= GENMASK(31, 28),
+	[DYNAMIC_MAC_TABLE_FID]		= GENMASK(19, 16),
+	[DYNAMIC_MAC_TABLE_SRC_PORT]	= GENMASK(21, 20),
+	[DYNAMIC_MAC_TABLE_TIMESTAMP]	= GENMASK(23, 22),
+};
+
+static u8 ksz8863_shifts[] = {
+	[VLAN_TABLE_MEMBERSHIP_S]	= 16,
+	[STATIC_MAC_FWD_PORTS]		= 16,
+	[STATIC_MAC_FID]		= 22,
+	[DYNAMIC_MAC_ENTRIES_H]		= 3,
+	[DYNAMIC_MAC_ENTRIES]		= 24,
+	[DYNAMIC_MAC_FID]		= 16,
+	[DYNAMIC_MAC_TIMESTAMP]		= 24,
+	[DYNAMIC_MAC_SRC_PORT]		= 20,
+};
+
+struct mib_names {
 	char string[ETH_GSTRING_LEN];
-} mib_names[] = {
+};
+
+static const struct mib_names ksz87xx_mib_names[] = {
 	{ "rx_hi" },
 	{ "rx_undersize" },
 	{ "rx_fragments" },
@@ -113,6 +164,48 @@ static const struct {
 	{ "tx_discards" },
 };
 
+static const struct mib_names ksz88xx_mib_names[] = {
+	{ "rx" },
+	{ "rx_hi" },
+	{ "rx_undersize" },
+	{ "rx_fragments" },
+	{ "rx_oversize" },
+	{ "rx_jabbers" },
+	{ "rx_symbol_err" },
+	{ "rx_crc_err" },
+	{ "rx_align_err" },
+	{ "rx_mac_ctrl" },
+	{ "rx_pause" },
+	{ "rx_bcast" },
+	{ "rx_mcast" },
+	{ "rx_ucast" },
+	{ "rx_64_or_less" },
+	{ "rx_65_127" },
+	{ "rx_128_255" },
+	{ "rx_256_511" },
+	{ "rx_512_1023" },
+	{ "rx_1024_1522" },
+	{ "tx" },
+	{ "tx_hi" },
+	{ "tx_late_col" },
+	{ "tx_pause" },
+	{ "tx_bcast" },
+	{ "tx_mcast" },
+	{ "tx_ucast" },
+	{ "tx_deferred" },
+	{ "tx_total_col" },
+	{ "tx_exc_col" },
+	{ "tx_single_col" },
+	{ "tx_mult_col" },
+	{ "rx_discards" },
+	{ "tx_discards" },
+};
+
+static bool ksz_is_ksz88x3(struct ksz_device *dev)
+{
+	return dev->chip_id == 0x8830;
+}
+
 static void ksz_cfg(struct ksz_device *dev, u32 addr, u8 bits, bool set)
 {
 	regmap_update_bits(dev->regmap[0], addr, bits, set ? bits : 0);
@@ -127,10 +220,18 @@ static void ksz_port_cfg(struct ksz_device *dev, int port, int offset, u8 bits,
 
 static int ksz8_reset_switch(struct ksz_device *dev)
 {
-	/* reset switch */
-	ksz_write8(dev, REG_POWER_MANAGEMENT_1,
-		   SW_SOFTWARE_POWER_DOWN << SW_POWER_MANAGEMENT_MODE_S);
-	ksz_write8(dev, REG_POWER_MANAGEMENT_1, 0);
+	if (ksz_is_ksz88x3(dev)) {
+		/* reset switch */
+		ksz_cfg(dev, KSZ8863_REG_SW_RESET,
+			KSZ8863_GLOBAL_SOFTWARE_RESET | KSZ8863_PCS_RESET, true);
+		ksz_cfg(dev, KSZ8863_REG_SW_RESET,
+			KSZ8863_GLOBAL_SOFTWARE_RESET | KSZ8863_PCS_RESET, false);
+	} else {
+		/* reset switch */
+		ksz_write8(dev, REG_POWER_MANAGEMENT_1,
+			   SW_SOFTWARE_POWER_DOWN << SW_POWER_MANAGEMENT_MODE_S);
+		ksz_write8(dev, REG_POWER_MANAGEMENT_1, 0);
+	}
 
 	return 0;
 }
@@ -204,8 +305,8 @@ static void ksz8_r_mib_cnt(struct ksz_device *dev, int port, u16 addr, u64 *cnt)
 	mutex_unlock(&dev->alu_mutex);
 }
 
-static void ksz8_r_mib_pkt(struct ksz_device *dev, int port, u16 addr,
-			   u64 *dropped, u64 *cnt)
+static void ksz8795_r_mib_pkt(struct ksz_device *dev, int port, u16 addr,
+			      u64 *dropped, u64 *cnt)
 {
 	struct ksz8 *ksz8 = dev->priv;
 	const u32 *masks;
@@ -219,8 +320,8 @@ static void ksz8_r_mib_pkt(struct ksz_device *dev, int port, u16 addr,
 	regs = ksz8->regs;
 
 	addr -= dev->reg_mib_cnt;
-	ctrl_addr = (KS_MIB_TOTAL_RX_1 - KS_MIB_TOTAL_RX_0) * port;
-	ctrl_addr += addr + KS_MIB_TOTAL_RX_0;
+	ctrl_addr = (KSZ8795_MIB_TOTAL_RX_1 - KSZ8795_MIB_TOTAL_RX_0) * port;
+	ctrl_addr += addr + KSZ8795_MIB_TOTAL_RX_0;
 	ctrl_addr |= IND_ACC_TABLE(TABLE_MIB | TABLE_READ);
 
 	mutex_lock(&dev->alu_mutex);
@@ -257,8 +358,52 @@ static void ksz8_r_mib_pkt(struct ksz_device *dev, int port, u16 addr,
 	mutex_unlock(&dev->alu_mutex);
 }
 
+static void ksz8863_r_mib_pkt(struct ksz_device *dev, int port, u16 addr,
+			      u64 *dropped, u64 *cnt)
+{
+	struct ksz8 *ksz8 = dev->priv;
+	const u8 *regs = ksz8->regs;
+	u32 *last = (u32 *)dropped;
+	u16 ctrl_addr;
+	u32 data;
+	u32 cur;
+
+	addr -= dev->reg_mib_cnt;
+	ctrl_addr = addr ? KSZ8863_MIB_PACKET_DROPPED_TX_0 :
+			   KSZ8863_MIB_PACKET_DROPPED_RX_0;
+	ctrl_addr += port;
+	ctrl_addr |= IND_ACC_TABLE(TABLE_MIB | TABLE_READ);
+
+	mutex_lock(&dev->alu_mutex);
+	ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr);
+	ksz_read32(dev, regs[REG_IND_DATA_LO], &data);
+	mutex_unlock(&dev->alu_mutex);
+
+	data &= MIB_PACKET_DROPPED;
+	cur = last[addr];
+	if (data != cur) {
+		last[addr] = data;
+		if (data < cur)
+			data += MIB_PACKET_DROPPED + 1;
+		data -= cur;
+		*cnt += data;
+	}
+}
+
+static void ksz8_r_mib_pkt(struct ksz_device *dev, int port, u16 addr,
+			   u64 *dropped, u64 *cnt)
+{
+	if (ksz_is_ksz88x3(dev))
+		ksz8863_r_mib_pkt(dev, port, addr, dropped, cnt);
+	else
+		ksz8795_r_mib_pkt(dev, port, addr, dropped, cnt);
+}
+
 static void ksz8_freeze_mib(struct ksz_device *dev, int port, bool freeze)
 {
+	if (ksz_is_ksz88x3(dev))
+		return;
+
 	/* enable the port for flush/freeze function */
 	if (freeze)
 		ksz_cfg(dev, REG_SW_CTRL_6, BIT(port), true);
@@ -272,11 +417,14 @@ static void ksz8_freeze_mib(struct ksz_device *dev, int port, bool freeze)
 static void ksz8_port_init_cnt(struct ksz_device *dev, int port)
 {
 	struct ksz_port_mib *mib = &dev->ports[port].mib;
+	u64 *dropped;
 
-	/* flush all enabled port MIB counters */
-	ksz_cfg(dev, REG_SW_CTRL_6, BIT(port), true);
-	ksz_cfg(dev, REG_SW_CTRL_6, SW_MIB_COUNTER_FLUSH, true);
-	ksz_cfg(dev, REG_SW_CTRL_6, BIT(port), false);
+	if (!ksz_is_ksz88x3(dev)) {
+		/* flush all enabled port MIB counters */
+		ksz_cfg(dev, REG_SW_CTRL_6, BIT(port), true);
+		ksz_cfg(dev, REG_SW_CTRL_6, SW_MIB_COUNTER_FLUSH, true);
+		ksz_cfg(dev, REG_SW_CTRL_6, BIT(port), false);
+	}
 
 	mib->cnt_ptr = 0;
 
@@ -287,10 +435,13 @@ static void ksz8_port_init_cnt(struct ksz_device *dev, int port)
 		++mib->cnt_ptr;
 	}
 
+	/* last one in storage */
+	dropped = &mib->counters[dev->mib_cnt];
+
 	/* Some ports may not have MIB counters after SWITCH_COUNTER_NUM. */
 	while (mib->cnt_ptr < dev->mib_cnt) {
 		dev->dev_ops->r_mib_pkt(dev, port, mib->cnt_ptr,
-					NULL, &mib->counters[mib->cnt_ptr]);
+					dropped, &mib->counters[mib->cnt_ptr]);
 		++mib->cnt_ptr;
 	}
 	mib->cnt_ptr = 0;
@@ -588,8 +739,13 @@ static void ksz8_r_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 *val)
 			data |= PHY_LOOPBACK;
 		if (ctrl & PORT_FORCE_100_MBIT)
 			data |= PHY_SPEED_100MBIT;
-		if (!(ctrl & PORT_AUTO_NEG_DISABLE))
-			data |= PHY_AUTO_NEG_ENABLE;
+		if (ksz_is_ksz88x3(dev)) {
+			if ((ctrl & PORT_AUTO_NEG_ENABLE))
+				data |= PHY_AUTO_NEG_ENABLE;
+		} else {
+			if (!(ctrl & PORT_AUTO_NEG_DISABLE))
+				data |= PHY_AUTO_NEG_ENABLE;
+		}
 		if (restart & PORT_POWER_DOWN)
 			data |= PHY_POWER_DOWN;
 		if (restart & PORT_AUTO_NEG_RESTART)
@@ -623,7 +779,10 @@ static void ksz8_r_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 *val)
 		data = KSZ8795_ID_HI;
 		break;
 	case PHY_REG_ID_2:
-		data = KSZ8795_ID_LO;
+		if (ksz_is_ksz88x3(dev))
+			data = KSZ8863_ID_LO;
+		else
+			data = KSZ8795_ID_LO;
 		break;
 	case PHY_REG_AUTO_NEGOTIATION:
 		ksz_pread8(dev, p, regs[P_LOCAL_CTRL], &ctrl);
@@ -686,14 +845,22 @@ static void ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val)
 			ksz_pwrite8(dev, p, regs[P_SPEED_STATUS], data);
 		ksz_pread8(dev, p, regs[P_FORCE_CTRL], &ctrl);
 		data = ctrl;
-		if (!(val & PHY_AUTO_NEG_ENABLE))
-			data |= PORT_AUTO_NEG_DISABLE;
-		else
-			data &= ~PORT_AUTO_NEG_DISABLE;
+		if (ksz_is_ksz88x3(dev)) {
+			if ((val & PHY_AUTO_NEG_ENABLE))
+				data |= PORT_AUTO_NEG_ENABLE;
+			else
+				data &= ~PORT_AUTO_NEG_ENABLE;
+		} else {
+			if (!(val & PHY_AUTO_NEG_ENABLE))
+				data |= PORT_AUTO_NEG_DISABLE;
+			else
+				data &= ~PORT_AUTO_NEG_DISABLE;
+
+			/* Fiber port does not support auto-negotiation. */
+			if (dev->ports[p].fiber)
+				data |= PORT_AUTO_NEG_DISABLE;
+		}
 
-		/* Fiber port does not support auto-negotiation. */
-		if (dev->ports[p].fiber)
-			data |= PORT_AUTO_NEG_DISABLE;
 		if (val & PHY_SPEED_100MBIT)
 			data |= PORT_FORCE_100_MBIT;
 		else
@@ -767,7 +934,11 @@ static enum dsa_tag_protocol ksz8_get_tag_protocol(struct dsa_switch *ds,
 						   int port,
 						   enum dsa_tag_protocol mp)
 {
-	return DSA_TAG_PROTO_KSZ8795;
+	struct ksz_device *dev = ds->priv;
+
+	/* ksz88x3 uses the same tag schema as KSZ9893 */
+	return ksz_is_ksz88x3(dev) ?
+		DSA_TAG_PROTO_KSZ9893 : DSA_TAG_PROTO_KSZ8795;
 }
 
 static void ksz8_get_strings(struct dsa_switch *ds, int port,
@@ -777,8 +948,8 @@ static void ksz8_get_strings(struct dsa_switch *ds, int port,
 	int i;
 
 	for (i = 0; i < dev->mib_cnt; i++) {
-		memcpy(buf + i * ETH_GSTRING_LEN, mib_names[i].string,
-		       ETH_GSTRING_LEN);
+		memcpy(buf + i * ETH_GSTRING_LEN,
+		       dev->mib_names[i].string, ETH_GSTRING_LEN);
 	}
 }
 
@@ -904,6 +1075,9 @@ static int ksz8_port_vlan_filtering(struct dsa_switch *ds, int port, bool flag,
 {
 	struct ksz_device *dev = ds->priv;
 
+	if (ksz_is_ksz88x3(dev))
+		return -ENOTSUPP;
+
 	ksz_cfg(dev, S_MIRROR_CTRL, SW_VLAN_ENABLE, flag);
 
 	return 0;
@@ -918,6 +1092,9 @@ static int ksz8_port_vlan_add(struct dsa_switch *ds, int port,
 	u16 data, new_pvid = 0;
 	u8 fid, member, valid;
 
+	if (ksz_is_ksz88x3(dev))
+		return -ENOTSUPP;
+
 	ksz_port_cfg(dev, port, P_TAG_CTRL, PORT_REMOVE_TAG, untagged);
 
 	ksz8_r_vlan_table(dev, vlan->vid, &data);
@@ -958,6 +1135,9 @@ static int ksz8_port_vlan_del(struct dsa_switch *ds, int port,
 	u16 data, pvid, new_pvid = 0;
 	u8 fid, member, valid;
 
+	if (ksz_is_ksz88x3(dev))
+		return -ENOTSUPP;
+
 	ksz_pread16(dev, port, REG_PORT_CTRL_VID, &pvid);
 	pvid = pvid & 0xFFF;
 
@@ -1091,7 +1271,8 @@ static void ksz8_port_setup(struct ksz_device *dev, int port, bool cpu_port)
 	/* enable broadcast storm limit */
 	ksz_port_cfg(dev, port, P_BCAST_STORM_CTRL, PORT_BROADCAST_STORM, true);
 
-	ksz8795_set_prio_queue(dev, port, 4);
+	if (!ksz_is_ksz88x3(dev))
+		ksz8795_set_prio_queue(dev, port, 4);
 
 	/* disable DiffServ priority */
 	ksz_port_cfg(dev, port, P_PRIO_CTRL, PORT_DIFFSERV_ENABLE, false);
@@ -1104,7 +1285,8 @@ static void ksz8_port_setup(struct ksz_device *dev, int port, bool cpu_port)
 	ksz_port_cfg(dev, port, P_PRIO_CTRL, PORT_802_1P_ENABLE, true);
 
 	if (cpu_port) {
-		ksz8795_cpu_interface_select(dev, port);
+		if (!ksz_is_ksz88x3(dev))
+			ksz8795_cpu_interface_select(dev, port);
 
 		member = dev->port_mask;
 	} else {
@@ -1156,9 +1338,11 @@ static void ksz8_config_cpu_port(struct dsa_switch *ds)
 		p = &dev->ports[i];
 		if (!p->on)
 			continue;
-		ksz_pread8(dev, i, regs[P_REMOTE_STATUS], &remote);
-		if (remote & PORT_FIBER_MODE)
-			p->fiber = 1;
+		if (!ksz_is_ksz88x3(dev)) {
+			ksz_pread8(dev, i, regs[P_REMOTE_STATUS], &remote);
+			if (remote & PORT_FIBER_MODE)
+				p->fiber = 1;
+		}
 		if (p->fiber)
 			ksz_port_cfg(dev, i, P_STP_CTRL, PORT_FORCE_FLOW_CTRL,
 				     true);
@@ -1277,19 +1461,30 @@ static int ksz8_switch_detect(struct ksz_device *dev)
 
 	id1 = id16 >> 8;
 	id2 = id16 & SW_CHIP_ID_M;
-	if (id1 != FAMILY_ID ||
-	    (id2 != CHIP_ID_94 && id2 != CHIP_ID_95))
-		return -ENODEV;
 
-	if (id2 == CHIP_ID_95) {
-		u8 val;
+	switch (id1) {
+	case KSZ87_FAMILY_ID:
+		if ((id2 != CHIP_ID_94 && id2 != CHIP_ID_95))
+			return -ENODEV;
+
+		if (id2 == CHIP_ID_95) {
+			u8 val;
 
-		id2 = 0x95;
-		ksz_read8(dev, REG_PORT_1_STATUS_0, &val);
-		if (val & PORT_FIBER_MODE)
-			id2 = 0x65;
-	} else if (id2 == CHIP_ID_94) {
-		id2 = 0x94;
+			id2 = 0x95;
+			ksz_read8(dev, REG_PORT_STATUS_0, &val);
+			if (val & PORT_FIBER_MODE)
+				id2 = 0x65;
+		} else if (id2 == CHIP_ID_94) {
+			id2 = 0x94;
+		}
+		break;
+	case KSZ88_FAMILY_ID:
+		if (id2 != CHIP_ID_63)
+			return -ENODEV;
+		break;
+	default:
+		dev_err(dev->dev, "invalid family id: %d\n", id1);
+		return -ENODEV;
 	}
 	id16 &= ~0xff;
 	id16 |= id2;
@@ -1350,6 +1545,15 @@ static const struct ksz_chip_data ksz8_switch_chips[] = {
 		.cpu_ports = 0x10,	/* can be configured as cpu port */
 		.port_cnt = 5,		/* total cpu and user ports */
 	},
+	{
+		.chip_id = 0x8830,
+		.dev_name = "KSZ8863/KSZ8873",
+		.num_vlans = 16,
+		.num_alus = 0,
+		.num_statics = 8,
+		.cpu_ports = 0x4,	/* can be configured as cpu port */
+		.port_cnt = 3,
+	},
 };
 
 static int ksz8_switch_init(struct ksz_device *dev)
@@ -1382,12 +1586,21 @@ static int ksz8_switch_init(struct ksz_device *dev)
 	if (!dev->cpu_ports)
 		return -ENODEV;
 
-	ksz8->regs = ksz8795_regs;
-	ksz8->masks = ksz8795_masks;
-	ksz8->shifts = ksz8795_shifts;
+	if (ksz_is_ksz88x3(dev)) {
+		ksz8->regs = ksz8863_regs;
+		ksz8->masks = ksz8863_masks;
+		ksz8->shifts = ksz8863_shifts;
+		dev->mib_cnt = ARRAY_SIZE(ksz88xx_mib_names);
+		dev->mib_names = ksz88xx_mib_names;
+	} else {
+		ksz8->regs = ksz8795_regs;
+		ksz8->masks = ksz8795_masks;
+		ksz8->shifts = ksz8795_shifts;
+		dev->mib_cnt = ARRAY_SIZE(ksz87xx_mib_names);
+		dev->mib_names = ksz87xx_mib_names;
+	}
 
-	dev->reg_mib_cnt = KSZ8795_COUNTER_NUM;
-	dev->mib_cnt = ARRAY_SIZE(mib_names);
+	dev->reg_mib_cnt = MIB_COUNTER_NUM;
 
 	dev->ports = devm_kzalloc(dev->dev,
 				  dev->port_cnt * sizeof(struct ksz_port),
diff --git a/drivers/net/dsa/microchip/ksz8795_reg.h b/drivers/net/dsa/microchip/ksz8795_reg.h
index eea78c5636a0..c2e52c40a54c 100644
--- a/drivers/net/dsa/microchip/ksz8795_reg.h
+++ b/drivers/net/dsa/microchip/ksz8795_reg.h
@@ -16,7 +16,8 @@
 
 #define REG_CHIP_ID0			0x00
 
-#define FAMILY_ID			0x87
+#define KSZ87_FAMILY_ID			0x87
+#define KSZ88_FAMILY_ID			0x88
 
 #define REG_CHIP_ID1			0x01
 
@@ -28,6 +29,12 @@
 
 #define CHIP_ID_94			0x60
 #define CHIP_ID_95			0x90
+#define CHIP_ID_63			0x30
+
+#define KSZ8863_REG_SW_RESET		0x43
+
+#define KSZ8863_GLOBAL_SOFTWARE_RESET	BIT(4)
+#define KSZ8863_PCS_RESET		BIT(0)
 
 #define REG_SW_CTRL_0			0x02
 
@@ -267,6 +274,7 @@
 #define REG_PORT_3_CTRL_9		0x3C
 #define REG_PORT_4_CTRL_9		0x4C
 
+#define PORT_AUTO_NEG_ENABLE		BIT(7)
 #define PORT_AUTO_NEG_DISABLE		BIT(7)
 #define PORT_FORCE_100_MBIT		BIT(6)
 #define PORT_FORCE_FULL_DUPLEX		BIT(5)
@@ -800,6 +808,7 @@
 
 #define KSZ8795_ID_HI			0x0022
 #define KSZ8795_ID_LO			0x1550
+#define KSZ8863_ID_LO			0x1430
 
 #define KSZ8795_SW_ID			0x8795
 
@@ -830,7 +839,7 @@
 
 #define KS_PRIO_IN_REG			4
 
-#define KSZ8795_COUNTER_NUM		0x20
+#define MIB_COUNTER_NUM		0x20
 
 /* Common names used by other drivers */
 
@@ -876,26 +885,13 @@
 
 #define MIB_COUNTER_VALUE		0x3FFFFFFF
 
-#define KS_MIB_TOTAL_RX_0		0x100
-#define KS_MIB_TOTAL_TX_0		0x101
-#define KS_MIB_PACKET_DROPPED_RX_0	0x102
-#define KS_MIB_PACKET_DROPPED_TX_0	0x103
-#define KS_MIB_TOTAL_RX_1		0x104
-#define KS_MIB_TOTAL_TX_1		0x105
-#define KS_MIB_PACKET_DROPPED_TX_1	0x106
-#define KS_MIB_PACKET_DROPPED_RX_1	0x107
-#define KS_MIB_TOTAL_RX_2		0x108
-#define KS_MIB_TOTAL_TX_2		0x109
-#define KS_MIB_PACKET_DROPPED_TX_2	0x10A
-#define KS_MIB_PACKET_DROPPED_RX_2	0x10B
-#define KS_MIB_TOTAL_RX_3		0x10C
-#define KS_MIB_TOTAL_TX_3		0x10D
-#define KS_MIB_PACKET_DROPPED_TX_3	0x10E
-#define KS_MIB_PACKET_DROPPED_RX_3	0x10F
-#define KS_MIB_TOTAL_RX_4		0x110
-#define KS_MIB_TOTAL_TX_4		0x111
-#define KS_MIB_PACKET_DROPPED_TX_4	0x112
-#define KS_MIB_PACKET_DROPPED_RX_4	0x113
+#define KSZ8795_MIB_TOTAL_RX_0		0x100
+#define KSZ8795_MIB_TOTAL_TX_0		0x101
+#define KSZ8795_MIB_TOTAL_RX_1		0x104
+#define KSZ8795_MIB_TOTAL_TX_1		0x105
+
+#define KSZ8863_MIB_PACKET_DROPPED_TX_0 0x100
+#define KSZ8863_MIB_PACKET_DROPPED_RX_0 0x105
 
 #define MIB_PACKET_DROPPED		0x0000FFFF
 
diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
index f0681a891cca..e0bbdca64375 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -71,6 +71,7 @@ struct ksz_device {
 	int port_cnt;
 	int reg_mib_cnt;
 	int mib_cnt;
+	const struct mib_names *mib_names;
 	phy_interface_t compat_interface;
 	u32 regs_size;
 	bool phy_errata_9477;
-- 
cgit v1.2.3


From cc13e52c3a894e407f5b95052b0012b07101ebec Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik@pengutronix.de>
Date: Tue, 27 Apr 2021 09:09:05 +0200
Subject: net: dsa: microchip: Add Microchip KSZ8863 SPI based driver support

Add KSZ88X3 driver support. We add support for the KXZ88X3 three port
switches using the SPI Interface.

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/microchip/ksz8795_spi.c | 44 ++++++++++++++++++++++++---------
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/drivers/net/dsa/microchip/ksz8795_spi.c b/drivers/net/dsa/microchip/ksz8795_spi.c
index 45420c07c99f..85ba12aa82d8 100644
--- a/drivers/net/dsa/microchip/ksz8795_spi.c
+++ b/drivers/net/dsa/microchip/ksz8795_spi.c
@@ -14,34 +14,52 @@
 #include <linux/regmap.h>
 #include <linux/spi/spi.h>
 
+#include "ksz8.h"
 #include "ksz_common.h"
 
-#define SPI_ADDR_SHIFT			12
-#define SPI_ADDR_ALIGN			3
-#define SPI_TURNAROUND_SHIFT		1
+#define KSZ8795_SPI_ADDR_SHIFT			12
+#define KSZ8795_SPI_ADDR_ALIGN			3
+#define KSZ8795_SPI_TURNAROUND_SHIFT		1
 
-KSZ_REGMAP_TABLE(ksz8795, 16, SPI_ADDR_SHIFT,
-		 SPI_TURNAROUND_SHIFT, SPI_ADDR_ALIGN);
+#define KSZ8863_SPI_ADDR_SHIFT			8
+#define KSZ8863_SPI_ADDR_ALIGN			8
+#define KSZ8863_SPI_TURNAROUND_SHIFT		0
+
+KSZ_REGMAP_TABLE(ksz8795, 16, KSZ8795_SPI_ADDR_SHIFT,
+		 KSZ8795_SPI_TURNAROUND_SHIFT, KSZ8795_SPI_ADDR_ALIGN);
+
+KSZ_REGMAP_TABLE(ksz8863, 16, KSZ8863_SPI_ADDR_SHIFT,
+		 KSZ8863_SPI_TURNAROUND_SHIFT, KSZ8863_SPI_ADDR_ALIGN);
 
 static int ksz8795_spi_probe(struct spi_device *spi)
 {
+	const struct regmap_config *regmap_config;
+	struct device *ddev = &spi->dev;
 	struct regmap_config rc;
 	struct ksz_device *dev;
-	int i, ret;
+	struct ksz8 *ksz8;
+	int i, ret = 0;
 
-	dev = ksz_switch_alloc(&spi->dev, spi);
+	ksz8 = devm_kzalloc(&spi->dev, sizeof(struct ksz8), GFP_KERNEL);
+	ksz8->priv = spi;
+
+	dev = ksz_switch_alloc(&spi->dev, ksz8);
 	if (!dev)
 		return -ENOMEM;
 
+	regmap_config = device_get_match_data(ddev);
+	if (!regmap_config)
+		return -EINVAL;
+
 	for (i = 0; i < ARRAY_SIZE(ksz8795_regmap_config); i++) {
-		rc = ksz8795_regmap_config[i];
+		rc = regmap_config[i];
 		rc.lock_arg = &dev->regmap_mutex;
 		dev->regmap[i] = devm_regmap_init_spi(spi, &rc);
 		if (IS_ERR(dev->regmap[i])) {
 			ret = PTR_ERR(dev->regmap[i]);
 			dev_err(&spi->dev,
 				"Failed to initialize regmap%i: %d\n",
-				ksz8795_regmap_config[i].val_bits, ret);
+				regmap_config[i].val_bits, ret);
 			return ret;
 		}
 	}
@@ -85,9 +103,11 @@ static void ksz8795_spi_shutdown(struct spi_device *spi)
 }
 
 static const struct of_device_id ksz8795_dt_ids[] = {
-	{ .compatible = "microchip,ksz8765" },
-	{ .compatible = "microchip,ksz8794" },
-	{ .compatible = "microchip,ksz8795" },
+	{ .compatible = "microchip,ksz8765", .data = &ksz8795_regmap_config },
+	{ .compatible = "microchip,ksz8794", .data = &ksz8795_regmap_config },
+	{ .compatible = "microchip,ksz8795", .data = &ksz8795_regmap_config },
+	{ .compatible = "microchip,ksz8863", .data = &ksz8863_regmap_config },
+	{ .compatible = "microchip,ksz8873", .data = &ksz8863_regmap_config },
 	{},
 };
 MODULE_DEVICE_TABLE(of, ksz8795_dt_ids);
-- 
cgit v1.2.3


From 61df0e7bbb90fac8c77203e0fa570804617f137d Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik@pengutronix.de>
Date: Tue, 27 Apr 2021 09:09:06 +0200
Subject: dt-bindings: net: dsa: document additional Microchip KSZ8863/8873
 switch

It is a 3-Port 10/100 Ethernet Switch. One CPU-Port and two
Switch-Ports.

Cc: devicetree@vger.kernel.org
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml b/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml
index 9f7d131bbcef..84985f53bffd 100644
--- a/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml
@@ -21,6 +21,8 @@ properties:
       - microchip,ksz8765
       - microchip,ksz8794
       - microchip,ksz8795
+      - microchip,ksz8863
+      - microchip,ksz8873
       - microchip,ksz9477
       - microchip,ksz9897
       - microchip,ksz9896
-- 
cgit v1.2.3


From 800fcab8230f622544a12403977b5b7259a076f8 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Tue, 27 Apr 2021 09:09:07 +0200
Subject: net: phy: Add support for microchip SMI0 MDIO bus

SMI0 is a mangled version of MDIO. The main low level difference is
the MDIO C22 OP code is always 0, not 0x2 or 0x1 for Read/Write. The
read/write information is instead encoded in the PHY address.

Extend the bit-bang code to allow the op code to be overridden, but
default to normal C22 values. Add an extra compatible to the mdio-gpio
driver, and when this compatible is present, set the op codes to 0.

A higher level driver, sitting on top of the basic MDIO bus driver can
then implement the rest of the microchip SMI0 odderties.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mdio/mdio-bitbang.c | 8 ++++++--
 drivers/net/mdio/mdio-gpio.c    | 8 ++++++++
 include/linux/mdio-bitbang.h    | 3 +++
 3 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mdio/mdio-bitbang.c b/drivers/net/mdio/mdio-bitbang.c
index 0f457c436335..07609114a26b 100644
--- a/drivers/net/mdio/mdio-bitbang.c
+++ b/drivers/net/mdio/mdio-bitbang.c
@@ -158,7 +158,7 @@ int mdiobb_read(struct mii_bus *bus, int phy, int reg)
 		reg = mdiobb_cmd_addr(ctrl, phy, reg);
 		mdiobb_cmd(ctrl, MDIO_C45_READ, phy, reg);
 	} else
-		mdiobb_cmd(ctrl, MDIO_READ, phy, reg);
+		mdiobb_cmd(ctrl, ctrl->op_c22_read, phy, reg);
 
 	ctrl->ops->set_mdio_dir(ctrl, 0);
 
@@ -190,7 +190,7 @@ int mdiobb_write(struct mii_bus *bus, int phy, int reg, u16 val)
 		reg = mdiobb_cmd_addr(ctrl, phy, reg);
 		mdiobb_cmd(ctrl, MDIO_C45_WRITE, phy, reg);
 	} else
-		mdiobb_cmd(ctrl, MDIO_WRITE, phy, reg);
+		mdiobb_cmd(ctrl, ctrl->op_c22_write, phy, reg);
 
 	/* send the turnaround (10) */
 	mdiobb_send_bit(ctrl, 1);
@@ -217,6 +217,10 @@ struct mii_bus *alloc_mdio_bitbang(struct mdiobb_ctrl *ctrl)
 	bus->read = mdiobb_read;
 	bus->write = mdiobb_write;
 	bus->priv = ctrl;
+	if (!ctrl->override_op_c22) {
+		ctrl->op_c22_read = MDIO_READ;
+		ctrl->op_c22_write = MDIO_WRITE;
+	}
 
 	return bus;
 }
diff --git a/drivers/net/mdio/mdio-gpio.c b/drivers/net/mdio/mdio-gpio.c
index 56c8f914f893..0fb3c2de0845 100644
--- a/drivers/net/mdio/mdio-gpio.c
+++ b/drivers/net/mdio/mdio-gpio.c
@@ -132,6 +132,13 @@ static struct mii_bus *mdio_gpio_bus_init(struct device *dev,
 		new_bus->phy_ignore_ta_mask = pdata->phy_ignore_ta_mask;
 	}
 
+	if (dev->of_node &&
+	    of_device_is_compatible(dev->of_node, "microchip,mdio-smi0")) {
+		bitbang->ctrl.op_c22_read = 0;
+		bitbang->ctrl.op_c22_write = 0;
+		bitbang->ctrl.override_op_c22 = 1;
+	}
+
 	dev_set_drvdata(dev, new_bus);
 
 	return new_bus;
@@ -196,6 +203,7 @@ static int mdio_gpio_remove(struct platform_device *pdev)
 
 static const struct of_device_id mdio_gpio_of_match[] = {
 	{ .compatible = "virtual,mdio-gpio", },
+	{ .compatible = "microchip,mdio-smi0" },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, mdio_gpio_of_match);
diff --git a/include/linux/mdio-bitbang.h b/include/linux/mdio-bitbang.h
index aca4dc037b70..373630fe5c28 100644
--- a/include/linux/mdio-bitbang.h
+++ b/include/linux/mdio-bitbang.h
@@ -33,6 +33,9 @@ struct mdiobb_ops {
 
 struct mdiobb_ctrl {
 	const struct mdiobb_ops *ops;
+	unsigned int override_op_c22;
+	u8 op_c22_read;
+	u8 op_c22_write;
 };
 
 int mdiobb_read(struct mii_bus *bus, int phy, int reg);
-- 
cgit v1.2.3


From 60a3647600027cbd54eb21997af3e175fbfa5592 Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik@pengutronix.de>
Date: Tue, 27 Apr 2021 09:09:08 +0200
Subject: net: dsa: microchip: Add Microchip KSZ8863 SMI based driver support

Add KSZ88X3 driver support. We add support for the KXZ88X3 three port
switches using the Microchip SMI Interface. They are supported using the
MDIO-Bitbang Interface.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/microchip/Kconfig       |  10 +-
 drivers/net/dsa/microchip/Makefile      |   1 +
 drivers/net/dsa/microchip/ksz8863_smi.c | 213 ++++++++++++++++++++++++++++++++
 3 files changed, 223 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/dsa/microchip/ksz8863_smi.c

diff --git a/drivers/net/dsa/microchip/Kconfig b/drivers/net/dsa/microchip/Kconfig
index 4ec6a47b7f72..c9e2a8989556 100644
--- a/drivers/net/dsa/microchip/Kconfig
+++ b/drivers/net/dsa/microchip/Kconfig
@@ -29,7 +29,7 @@ menuconfig NET_DSA_MICROCHIP_KSZ8795
 	depends on NET_DSA
 	select NET_DSA_MICROCHIP_KSZ_COMMON
 	help
-	  This driver adds support for Microchip KSZ8795 switch chips.
+	  This driver adds support for Microchip KSZ8795/KSZ88X3 switch chips.
 
 config NET_DSA_MICROCHIP_KSZ8795_SPI
 	tristate "KSZ8795 series SPI connected switch driver"
@@ -40,3 +40,11 @@ config NET_DSA_MICROCHIP_KSZ8795_SPI
 
 	  It is required to use the KSZ8795 switch driver as the only access
 	  is through SPI.
+
+config NET_DSA_MICROCHIP_KSZ8863_SMI
+	tristate "KSZ series SMI connected switch driver"
+	depends on NET_DSA_MICROCHIP_KSZ8795
+	select MDIO_BITBANG
+	help
+	  Select to enable support for registering switches configured through
+	  Microchip SMI. It supports the KSZ8863 and KSZ8873 switch.
diff --git a/drivers/net/dsa/microchip/Makefile b/drivers/net/dsa/microchip/Makefile
index 929caa81e782..2a03b21a3386 100644
--- a/drivers/net/dsa/microchip/Makefile
+++ b/drivers/net/dsa/microchip/Makefile
@@ -5,3 +5,4 @@ obj-$(CONFIG_NET_DSA_MICROCHIP_KSZ9477_I2C)	+= ksz9477_i2c.o
 obj-$(CONFIG_NET_DSA_MICROCHIP_KSZ9477_SPI)	+= ksz9477_spi.o
 obj-$(CONFIG_NET_DSA_MICROCHIP_KSZ8795)		+= ksz8795.o
 obj-$(CONFIG_NET_DSA_MICROCHIP_KSZ8795_SPI)	+= ksz8795_spi.o
+obj-$(CONFIG_NET_DSA_MICROCHIP_KSZ8863_SMI)	+= ksz8863_smi.o
diff --git a/drivers/net/dsa/microchip/ksz8863_smi.c b/drivers/net/dsa/microchip/ksz8863_smi.c
new file mode 100644
index 000000000000..30d97ea7a949
--- /dev/null
+++ b/drivers/net/dsa/microchip/ksz8863_smi.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Microchip KSZ8863 series register access through SMI
+ *
+ * Copyright (C) 2019 Pengutronix, Michael Grzeschik <kernel@pengutronix.de>
+ */
+
+#include "ksz8.h"
+#include "ksz_common.h"
+
+/* Serial Management Interface (SMI) uses the following frame format:
+ *
+ *       preamble|start|Read/Write|  PHY   |  REG  |TA|   Data bits      | Idle
+ *               |frame| OP code  |address |address|  |                  |
+ * read | 32x1´s | 01  |    00    | 1xRRR  | RRRRR |Z0| 00000000DDDDDDDD |  Z
+ * write| 32x1´s | 01  |    00    | 0xRRR  | RRRRR |10| xxxxxxxxDDDDDDDD |  Z
+ *
+ */
+
+#define SMI_KSZ88XX_READ_PHY	BIT(4)
+
+static int ksz8863_mdio_read(void *ctx, const void *reg_buf, size_t reg_len,
+			     void *val_buf, size_t val_len)
+{
+	struct ksz_device *dev = ctx;
+	struct mdio_device *mdev;
+	u8 reg = *(u8 *)reg_buf;
+	u8 *val = val_buf;
+	struct ksz8 *ksz8;
+	int i, ret = 0;
+
+	ksz8 = dev->priv;
+	mdev = ksz8->priv;
+
+	mutex_lock_nested(&mdev->bus->mdio_lock, MDIO_MUTEX_NESTED);
+	for (i = 0; i < val_len; i++) {
+		int tmp = reg + i;
+
+		ret = __mdiobus_read(mdev->bus, ((tmp & 0xE0) >> 5) |
+				     SMI_KSZ88XX_READ_PHY, tmp);
+		if (ret < 0)
+			goto out;
+
+		val[i] = ret;
+	}
+	ret = 0;
+
+ out:
+	mutex_unlock(&mdev->bus->mdio_lock);
+
+	return ret;
+}
+
+static int ksz8863_mdio_write(void *ctx, const void *data, size_t count)
+{
+	struct ksz_device *dev = ctx;
+	struct mdio_device *mdev;
+	struct ksz8 *ksz8;
+	int i, ret = 0;
+	u32 reg;
+	u8 *val;
+
+	ksz8 = dev->priv;
+	mdev = ksz8->priv;
+
+	val = (u8 *)(data + 4);
+	reg = *(u32 *)data;
+
+	mutex_lock_nested(&mdev->bus->mdio_lock, MDIO_MUTEX_NESTED);
+	for (i = 0; i < (count - 4); i++) {
+		int tmp = reg + i;
+
+		ret = __mdiobus_write(mdev->bus, ((tmp & 0xE0) >> 5),
+				      tmp, val[i]);
+		if (ret < 0)
+			goto out;
+	}
+
+ out:
+	mutex_unlock(&mdev->bus->mdio_lock);
+
+	return ret;
+}
+
+static const struct regmap_bus regmap_smi[] = {
+	{
+		.read = ksz8863_mdio_read,
+		.write = ksz8863_mdio_write,
+		.max_raw_read = 1,
+		.max_raw_write = 1,
+	},
+	{
+		.read = ksz8863_mdio_read,
+		.write = ksz8863_mdio_write,
+		.val_format_endian_default = REGMAP_ENDIAN_BIG,
+		.max_raw_read = 2,
+		.max_raw_write = 2,
+	},
+	{
+		.read = ksz8863_mdio_read,
+		.write = ksz8863_mdio_write,
+		.val_format_endian_default = REGMAP_ENDIAN_BIG,
+		.max_raw_read = 4,
+		.max_raw_write = 4,
+	}
+};
+
+static const struct regmap_config ksz8863_regmap_config[] = {
+	{
+		.name = "#8",
+		.reg_bits = 8,
+		.pad_bits = 24,
+		.val_bits = 8,
+		.cache_type = REGCACHE_NONE,
+		.use_single_read = 1,
+		.lock = ksz_regmap_lock,
+		.unlock = ksz_regmap_unlock,
+	},
+	{
+		.name = "#16",
+		.reg_bits = 8,
+		.pad_bits = 24,
+		.val_bits = 16,
+		.cache_type = REGCACHE_NONE,
+		.use_single_read = 1,
+		.lock = ksz_regmap_lock,
+		.unlock = ksz_regmap_unlock,
+	},
+	{
+		.name = "#32",
+		.reg_bits = 8,
+		.pad_bits = 24,
+		.val_bits = 32,
+		.cache_type = REGCACHE_NONE,
+		.use_single_read = 1,
+		.lock = ksz_regmap_lock,
+		.unlock = ksz_regmap_unlock,
+	}
+};
+
+static int ksz8863_smi_probe(struct mdio_device *mdiodev)
+{
+	struct regmap_config rc;
+	struct ksz_device *dev;
+	struct ksz8 *ksz8;
+	int ret;
+	int i;
+
+	ksz8 = devm_kzalloc(&mdiodev->dev, sizeof(struct ksz8), GFP_KERNEL);
+	ksz8->priv = mdiodev;
+
+	dev = ksz_switch_alloc(&mdiodev->dev, ksz8);
+	if (!dev)
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(ksz8863_regmap_config); i++) {
+		rc = ksz8863_regmap_config[i];
+		rc.lock_arg = &dev->regmap_mutex;
+		dev->regmap[i] = devm_regmap_init(&mdiodev->dev,
+						  &regmap_smi[i], dev,
+						  &rc);
+		if (IS_ERR(dev->regmap[i])) {
+			ret = PTR_ERR(dev->regmap[i]);
+			dev_err(&mdiodev->dev,
+				"Failed to initialize regmap%i: %d\n",
+				ksz8863_regmap_config[i].val_bits, ret);
+			return ret;
+		}
+	}
+
+	if (mdiodev->dev.platform_data)
+		dev->pdata = mdiodev->dev.platform_data;
+
+	ret = ksz8_switch_register(dev);
+
+	/* Main DSA driver may not be started yet. */
+	if (ret)
+		return ret;
+
+	dev_set_drvdata(&mdiodev->dev, dev);
+
+	return 0;
+}
+
+static void ksz8863_smi_remove(struct mdio_device *mdiodev)
+{
+	struct ksz_device *dev = dev_get_drvdata(&mdiodev->dev);
+
+	if (dev)
+		ksz_switch_remove(dev);
+}
+
+static const struct of_device_id ksz8863_dt_ids[] = {
+	{ .compatible = "microchip,ksz8863" },
+	{ .compatible = "microchip,ksz8873" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, ksz8863_dt_ids);
+
+static struct mdio_driver ksz8863_driver = {
+	.probe	= ksz8863_smi_probe,
+	.remove	= ksz8863_smi_remove,
+	.mdiodrv.driver = {
+		.name	= "ksz8863-switch",
+		.of_match_table = ksz8863_dt_ids,
+	},
+};
+
+mdio_module_driver(ksz8863_driver);
+
+MODULE_AUTHOR("Michael Grzeschik <m.grzeschik@pengutronix.de>");
+MODULE_DESCRIPTION("Microchip KSZ8863 SMI Switch driver");
+MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3


From 61b405985a6b6ffc24c98cd2b8b986262626eeba Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik@pengutronix.de>
Date: Tue, 27 Apr 2021 09:09:09 +0200
Subject: dt-bindings: net: mdio-gpio: add compatible for microchip,mdio-smi0

Microchip SMI0 Mode is a special mode, where the MDIO Read/Write
commands are part of the PHY Address and the OP Code is always 0. We add
the compatible for this special mode of the bitbanged mdio driver.

Cc: devicetree@vger.kernel.org
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/mdio-gpio.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/net/mdio-gpio.txt b/Documentation/devicetree/bindings/net/mdio-gpio.txt
index 8dbcf8295c6c..4d91a36c5cf5 100644
--- a/Documentation/devicetree/bindings/net/mdio-gpio.txt
+++ b/Documentation/devicetree/bindings/net/mdio-gpio.txt
@@ -2,6 +2,7 @@ MDIO on GPIOs
 
 Currently defined compatibles:
 - virtual,gpio-mdio
+- microchip,mdio-smi0
 
 MDC and MDIO lines connected to GPIO controllers are listed in the
 gpios property as described in section VIII.1 in the following order:
-- 
cgit v1.2.3


From 4db6187d721ed6a30df658da137a12246fe6a1b7 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Tue, 27 Apr 2021 18:24:47 +0800
Subject: rds: Remove redundant assignment to nr_sig

Variable nr_sig is being assigned a value however the assignment is
never read, so this redundant assignment can be removed.

Cleans up the following clang-analyzer warning:

net/rds/ib_send.c:297:2: warning: Value stored to 'nr_sig' is never read
[clang-analyzer-deadcode.DeadStores].

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib_send.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 92b4a8689aae..4190b90ff3b1 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -294,7 +294,6 @@ void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
 
 	rds_ib_ring_free(&ic->i_send_ring, completed);
 	rds_ib_sub_signaled(ic, nr_sig);
-	nr_sig = 0;
 
 	if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
 	    test_bit(0, &conn->c_map_queued))
-- 
cgit v1.2.3


From 3afef8c7aa2de3574021c848b5f7c62687e6b166 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Tue, 27 Apr 2021 18:28:22 +0800
Subject: net/tls: Remove redundant initialization of record

record is being initialized to ctx->open_record but this is never
read as record is overwritten later on.  Remove the redundant
initialization.

Cleans up the following clang-analyzer warning:

net/tls/tls_device.c:421:26: warning: Value stored to 'record' during
its initialization is never read [clang-analyzer-deadcode.DeadStores].

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tls/tls_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 790c6b7ecb26..76a6f8c2eec4 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -418,7 +418,7 @@ static int tls_push_data(struct sock *sk,
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_prot_info *prot = &tls_ctx->prot_info;
 	struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
-	struct tls_record_info *record = ctx->open_record;
+	struct tls_record_info *record;
 	int tls_push_record_flags;
 	struct page_frag *pfrag;
 	size_t orig_size = size;
-- 
cgit v1.2.3


From 2342eb1afe00586a018536c4bf9e04d7aa4bf63e Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Tue, 27 Apr 2021 18:29:48 +0800
Subject: llc2: Remove redundant assignment to rc

Variable rc is set to zero but this value is never read as it is
overwritten with a new value later on, hence it is a redundant
assignment and can be removed.

Cleans up the following clang-analyzer warning:

net/llc/llc_station.c:86:2: warning: Value stored to 'rc' is never read
[clang-analyzer-deadcode.DeadStores].

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/llc_station.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index c29170e767a8..05c6ae092053 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -54,7 +54,6 @@ static int llc_station_ac_send_xid_r(struct sk_buff *skb)
 
 	if (!nskb)
 		goto out;
-	rc = 0;
 	llc_pdu_decode_sa(skb, mac_da);
 	llc_pdu_decode_ssap(skb, &dsap);
 	llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, 0, dsap, LLC_PDU_RSP);
@@ -83,7 +82,6 @@ static int llc_station_ac_send_test_r(struct sk_buff *skb)
 
 	if (!nskb)
 		goto out;
-	rc = 0;
 	llc_pdu_decode_sa(skb, mac_da);
 	llc_pdu_decode_ssap(skb, &dsap);
 	llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, 0, dsap, LLC_PDU_RSP);
-- 
cgit v1.2.3


From ad542fb7f2e2bb30c06381e77d4b29e895576ddc Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Tue, 27 Apr 2021 18:30:56 +0800
Subject: mpls: Remove redundant assignment to err

Variable err is set to -ENOMEM but this value is never read as it is
overwritten with a new value later on, hence it is a redundant
assignment and can be removed.

Cleans up the following clang-analyzer warning:

net/mpls/af_mpls.c:1022:2: warning: Value stored to 'err' is never read
[clang-analyzer-deadcode.DeadStores].

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mpls/af_mpls.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 47bab701555f..05a21dd072df 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1019,7 +1019,6 @@ static int mpls_route_add(struct mpls_route_config *cfg,
 		goto errout;
 	}
 
-	err = -ENOMEM;
 	rt = mpls_rt_alloc(nhs, max_via_alen, max_labels);
 	if (IS_ERR(rt)) {
 		err = PTR_ERR(rt);
-- 
cgit v1.2.3


From 6fd6c483e7abf0f67d02d9a49b217efcd01314f4 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Tue, 27 Apr 2021 18:32:22 +0800
Subject: net/smc: Remove redundant assignment to rc

Variable rc is set to zero but this value is never read as it is
overwritten with a new value later on, hence it is a redundant
assignment and can be removed.

Cleans up the following clang-analyzer warning:

net/smc/af_smc.c:1079:3: warning: Value stored to 'rc' is never read
[clang-analyzer-deadcode.DeadStores].

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 47340b3b514f..be3e80b3e27f 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1076,7 +1076,6 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
 		rc = -EISCONN;
 		goto out;
 	case SMC_INIT:
-		rc = 0;
 		break;
 	}
 
-- 
cgit v1.2.3


From 69e16d01d1de4f1249869de342915f608feb55d5 Mon Sep 17 00:00:00 2001
From: "Gong, Sishuai" <sishuai@purdue.edu>
Date: Tue, 27 Apr 2021 15:04:24 +0000
Subject: net: fix a concurrency bug in l2tp_tunnel_register()

l2tp_tunnel_register() registers a tunnel without fully
initializing its attribute. This can allow another kernel thread
running l2tp_xmit_core() to access the uninitialized data and
then cause a kernel NULL pointer dereference error, as shown below.

Thread 1    Thread 2
//l2tp_tunnel_register()
list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
           //pppol2tp_connect()
           tunnel = l2tp_tunnel_get(sock_net(sk), info.tunnel_id);
           // Fetch the new tunnel
           ...
           //l2tp_xmit_core()
           struct sock *sk = tunnel->sock;
           ...
           bh_lock_sock(sk);
           //Null pointer error happens
tunnel->sock = sk;

Fix this bug by initializing tunnel->sock before adding the
tunnel into l2tp_tunnel_list.

Reviewed-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Sishuai Gong <sishuai@purdue.edu>
Reported-by: Sishuai Gong <sishuai@purdue.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 2ee20743cb41..53486b162f01 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1478,11 +1478,15 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
 	tunnel->l2tp_net = net;
 	pn = l2tp_pernet(net);
 
+	sk = sock->sk;
+	sock_hold(sk);
+	tunnel->sock = sk;
+
 	spin_lock_bh(&pn->l2tp_tunnel_list_lock);
 	list_for_each_entry(tunnel_walk, &pn->l2tp_tunnel_list, list) {
 		if (tunnel_walk->tunnel_id == tunnel->tunnel_id) {
 			spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
-
+			sock_put(sk);
 			ret = -EEXIST;
 			goto err_sock;
 		}
@@ -1490,10 +1494,6 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
 	list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
 	spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
 
-	sk = sock->sk;
-	sock_hold(sk);
-	tunnel->sock = sk;
-
 	if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
 		struct udp_tunnel_sock_cfg udp_cfg = {
 			.sk_user_data = tunnel,
-- 
cgit v1.2.3


From 75258586793efc521e5dd52a5bf6c7a4cf7002be Mon Sep 17 00:00:00 2001
From: Lv Yunlong <lyl2019@mail.ustc.edu.cn>
Date: Tue, 27 Apr 2021 09:22:58 -0700
Subject: net:nfc:digital: Fix a double free in digital_tg_recv_dep_req

In digital_tg_recv_dep_req, it calls nfc_tm_data_received(..,resp).
If nfc_tm_data_received() failed, the callee will free the resp via
kfree_skb() and return error. But in the exit branch, the resp
will be freed again.

My patch sets resp to NULL if nfc_tm_data_received() failed, to
avoid the double free.

Fixes: 1c7a4c24fbfd9 ("NFC Digital: Add target NFC-DEP support")
Signed-off-by: Lv Yunlong <lyl2019@mail.ustc.edu.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/nfc/digital_dep.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/nfc/digital_dep.c b/net/nfc/digital_dep.c
index 1150731126e2..3982fa084737 100644
--- a/net/nfc/digital_dep.c
+++ b/net/nfc/digital_dep.c
@@ -1273,6 +1273,8 @@ static void digital_tg_recv_dep_req(struct nfc_digital_dev *ddev, void *arg,
 	}
 
 	rc = nfc_tm_data_received(ddev->nfc_dev, resp);
+	if (rc)
+		resp = NULL;
 
 exit:
 	kfree_skb(ddev->chaining_skb);
-- 
cgit v1.2.3


From 99ba0ea616aabdc8e26259fd722503e012199a76 Mon Sep 17 00:00:00 2001
From: Ignat Korchagin <ignat@cloudflare.com>
Date: Tue, 27 Apr 2021 22:09:38 +0100
Subject: sfc: adjust efx->xdp_tx_queue_count with the real number of
 initialized queues

efx->xdp_tx_queue_count is initially initialized to num_possible_cpus() and is
later used to allocate and traverse efx->xdp_tx_queues lookup array. However,
we may end up not initializing all the array slots with real queues during
probing. This results, for example, in a NULL pointer dereference, when running
"# ethtool -S <iface>", similar to below

[2570283.664955][T4126959] BUG: kernel NULL pointer dereference, address: 00000000000000f8
[2570283.681283][T4126959] #PF: supervisor read access in kernel mode
[2570283.695678][T4126959] #PF: error_code(0x0000) - not-present page
[2570283.710013][T4126959] PGD 0 P4D 0
[2570283.721649][T4126959] Oops: 0000 [#1] SMP PTI
[2570283.734108][T4126959] CPU: 23 PID: 4126959 Comm: ethtool Tainted: G           O      5.10.20-cloudflare-2021.3.1 #1
[2570283.752641][T4126959] Hardware name: <redacted>
[2570283.781408][T4126959] RIP: 0010:efx_ethtool_get_stats+0x2ca/0x330 [sfc]
[2570283.796073][T4126959] Code: 00 85 c0 74 39 48 8b 95 a8 0f 00 00 48 85 d2 74 2d 31 c0 eb 07 48 8b 95 a8 0f 00 00 48 63 c8 49 83 c4 08 83 c0 01 48 8b 14 ca <48> 8b 92 f8 00 00 00 49 89 54 24 f8 39 85 a0 0f 00 00 77 d7 48 8b
[2570283.831259][T4126959] RSP: 0018:ffffb79a77657ce8 EFLAGS: 00010202
[2570283.845121][T4126959] RAX: 0000000000000019 RBX: ffffb799cd0c9280 RCX: 0000000000000018
[2570283.860872][T4126959] RDX: 0000000000000000 RSI: ffff96dd970ce000 RDI: 0000000000000005
[2570283.876525][T4126959] RBP: ffff96dd86f0a000 R08: ffff96dd970ce480 R09: 000000000000005f
[2570283.892014][T4126959] R10: ffffb799cd0c9fff R11: ffffb799cd0c9000 R12: ffffb799cd0c94f8
[2570283.907406][T4126959] R13: ffffffffc11b1090 R14: ffff96dd970ce000 R15: ffffffffc11cd66c
[2570283.922705][T4126959] FS:  00007fa7723f8740(0000) GS:ffff96f51fac0000(0000) knlGS:0000000000000000
[2570283.938848][T4126959] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[2570283.952524][T4126959] CR2: 00000000000000f8 CR3: 0000001a73e6e006 CR4: 00000000007706e0
[2570283.967529][T4126959] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[2570283.982400][T4126959] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[2570283.997308][T4126959] PKRU: 55555554
[2570284.007649][T4126959] Call Trace:
[2570284.017598][T4126959]  dev_ethtool+0x1832/0x2830

Fix this by adjusting efx->xdp_tx_queue_count after probing to reflect the true
value of initialized slots in efx->xdp_tx_queues.

Signed-off-by: Ignat Korchagin <ignat@cloudflare.com>
Fixes: e26ca4b53582 ("sfc: reduce the number of requested xdp ev queues")
Cc: <stable@vger.kernel.org> # 5.12.x
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/efx_channels.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c
index 1bfeee283ea9..a3ca406a3561 100644
--- a/drivers/net/ethernet/sfc/efx_channels.c
+++ b/drivers/net/ethernet/sfc/efx_channels.c
@@ -914,6 +914,8 @@ int efx_set_channels(struct efx_nic *efx)
 			}
 		}
 	}
+	if (xdp_queue_number)
+		efx->xdp_tx_queue_count = xdp_queue_number;
 
 	rc = netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
 	if (rc)
-- 
cgit v1.2.3


From 76d6a13383b8e3ff20a9cf52aa9c3de39e485632 Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Tue, 27 Apr 2021 19:43:12 +0200
Subject: seq_file: Add a seq_bprintf function

Similarly to seq_buf_bprintf in lib/seq_buf.c, this function writes a
printf formatted string with arguments provided in a "binary
representation" built by functions such as vbin_printf.

Signed-off-by: Florent Revest <revest@chromium.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210427174313.860948-2-revest@chromium.org
---
 fs/seq_file.c            | 18 ++++++++++++++++++
 include/linux/seq_file.h |  4 ++++
 2 files changed, 22 insertions(+)

diff --git a/fs/seq_file.c b/fs/seq_file.c
index cb11a34fb871..5059248f2d64 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -412,6 +412,24 @@ void seq_printf(struct seq_file *m, const char *f, ...)
 }
 EXPORT_SYMBOL(seq_printf);
 
+#ifdef CONFIG_BINARY_PRINTF
+void seq_bprintf(struct seq_file *m, const char *f, const u32 *binary)
+{
+	int len;
+
+	if (m->count < m->size) {
+		len = bstr_printf(m->buf + m->count, m->size - m->count, f,
+				  binary);
+		if (m->count + len < m->size) {
+			m->count += len;
+			return;
+		}
+	}
+	seq_set_overflow(m);
+}
+EXPORT_SYMBOL(seq_bprintf);
+#endif /* CONFIG_BINARY_PRINTF */
+
 /**
  *	mangle_path -	mangle and copy path to buffer beginning
  *	@s: buffer start
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index b83b3ae3c877..723b1fa1177e 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -146,6 +146,10 @@ void *__seq_open_private(struct file *, const struct seq_operations *, int);
 int seq_open_private(struct file *, const struct seq_operations *, int);
 int seq_release_private(struct inode *, struct file *);
 
+#ifdef CONFIG_BINARY_PRINTF
+void seq_bprintf(struct seq_file *m, const char *f, const u32 *binary);
+#endif
+
 #define DEFINE_SEQ_ATTRIBUTE(__name)					\
 static int __name ## _open(struct inode *inode, struct file *file)	\
 {									\
-- 
cgit v1.2.3


From 48cac3f4a96ddf08df8e53809ed066de0dc93915 Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Tue, 27 Apr 2021 19:43:13 +0200
Subject: bpf: Implement formatted output helpers with bstr_printf

BPF has three formatted output helpers: bpf_trace_printk, bpf_seq_printf
and bpf_snprintf. Their signatures specify that all arguments are
provided from the BPF world as u64s (in an array or as registers). All
of these helpers are currently implemented by calling functions such as
snprintf() whose signatures take a variable number of arguments, then
placed in a va_list by the compiler to call vsnprintf().

"d9c9e4db bpf: Factorize bpf_trace_printk and bpf_seq_printf" introduced
a bpf_printf_prepare function that fills an array of u64 sanitized
arguments with an array of "modifiers" which indicate what the "real"
size of each argument should be (given by the format specifier). The
BPF_CAST_FMT_ARG macro consumes these arrays and casts each argument to
its real size. However, the C promotion rules implicitely cast them all
back to u64s. Therefore, the arguments given to snprintf are u64s and
the va_list constructed by the compiler will use 64 bits for each
argument. On 64 bit machines, this happens to work well because 32 bit
arguments in va_lists need to occupy 64 bits anyway, but on 32 bit
architectures this breaks the layout of the va_list expected by the
called function and mangles values.

In "88a5c690b6 bpf: fix bpf_trace_printk on 32 bit archs", this problem
had been solved for bpf_trace_printk only with a "horrid workaround"
that emitted multiple calls to trace_printk where each call had
different argument types and generated different va_list layouts. One of
the call would be dynamically chosen at runtime. This was ok with the 3
arguments that bpf_trace_printk takes but bpf_seq_printf and
bpf_snprintf accept up to 12 arguments. Because this approach scales
code exponentially, it is not a viable option anymore.

Because the promotion rules are part of the language and because the
construction of a va_list is an arch-specific ABI, it's best to just
avoid variadic arguments and va_lists altogether. Thankfully the
kernel's snprintf() has an alternative in the form of bstr_printf() that
accepts arguments in a "binary buffer representation". These binary
buffers are currently created by vbin_printf and used in the tracing
subsystem to split the cost of printing into two parts: a fast one that
only dereferences and remembers values, and a slower one, called later,
that does the pretty-printing.

This patch refactors bpf_printf_prepare to construct binary buffers of
arguments consumable by bstr_printf() instead of arrays of arguments and
modifiers. This gets rid of BPF_CAST_FMT_ARG and greatly simplifies the
bpf_printf_prepare usage but there are a few gotchas that change how
bpf_printf_prepare needs to do things.

Currently, bpf_printf_prepare uses a per cpu temporary buffer as a
generic storage for strings and IP addresses. With this refactoring, the
temporary buffers now holds all the arguments in a structured binary
format.

To comply with the format expected by bstr_printf, certain format
specifiers also need to be pre-formatted: %pB and %pi6/%pi4/%pI4/%pI6.
Because vsnprintf subroutines for these specifiers are hard to expose,
we pre-format these arguments with calls to snprintf().

Reported-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Florent Revest <revest@chromium.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210427174313.860948-3-revest@chromium.org
---
 include/linux/bpf.h      |  22 +-----
 init/Kconfig             |   1 +
 kernel/bpf/helpers.c     | 188 +++++++++++++++++++++++++----------------------
 kernel/bpf/verifier.c    |   2 +-
 kernel/trace/bpf_trace.c |  34 +++------
 5 files changed, 115 insertions(+), 132 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ad4bcf1cadbb..b33f199c4cc2 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2081,24 +2081,8 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
 struct btf_id_set;
 bool btf_id_set_contains(const struct btf_id_set *set, u32 id);
 
-enum bpf_printf_mod_type {
-	BPF_PRINTF_INT,
-	BPF_PRINTF_LONG,
-	BPF_PRINTF_LONG_LONG,
-};
-
-/* Workaround for getting va_list handling working with different argument type
- * combinations generically for 32 and 64 bit archs.
- */
-#define BPF_CAST_FMT_ARG(arg_nb, args, mod)				\
-	(mod[arg_nb] == BPF_PRINTF_LONG_LONG ||				\
-	 (mod[arg_nb] == BPF_PRINTF_LONG && __BITS_PER_LONG == 64)	\
-	  ? (u64)args[arg_nb]						\
-	  : (u32)args[arg_nb])
-
-int bpf_printf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
-		       u64 *final_args, enum bpf_printf_mod_type *mod,
-		       u32 num_args);
-void bpf_printf_cleanup(void);
+int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
+			u32 **bin_buf, u32 num_args);
+void bpf_bprintf_cleanup(void);
 
 #endif /* _LINUX_BPF_H */
diff --git a/init/Kconfig b/init/Kconfig
index 5deae45b8d81..0d82a1f838cc 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1708,6 +1708,7 @@ config BPF_SYSCALL
 	select BPF
 	select IRQ_WORK
 	select TASKS_TRACE_RCU
+	select BINARY_PRINTF
 	select NET_SOCK_MSG if INET
 	default n
 	help
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 85b26ca5aacd..544773970dbc 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -707,9 +707,6 @@ static int try_get_fmt_tmp_buf(char **tmp_buf)
 	struct bpf_printf_buf *bufs;
 	int used;
 
-	if (*tmp_buf)
-		return 0;
-
 	preempt_disable();
 	used = this_cpu_inc_return(bpf_printf_buf_used);
 	if (WARN_ON_ONCE(used > 1)) {
@@ -723,7 +720,7 @@ static int try_get_fmt_tmp_buf(char **tmp_buf)
 	return 0;
 }
 
-void bpf_printf_cleanup(void)
+void bpf_bprintf_cleanup(void)
 {
 	if (this_cpu_read(bpf_printf_buf_used)) {
 		this_cpu_dec(bpf_printf_buf_used);
@@ -732,43 +729,45 @@ void bpf_printf_cleanup(void)
 }
 
 /*
- * bpf_parse_fmt_str - Generic pass on format strings for printf-like helpers
+ * bpf_bprintf_prepare - Generic pass on format strings for bprintf-like helpers
  *
  * Returns a negative value if fmt is an invalid format string or 0 otherwise.
  *
  * This can be used in two ways:
- * - Format string verification only: when final_args and mod are NULL
+ * - Format string verification only: when bin_args is NULL
  * - Arguments preparation: in addition to the above verification, it writes in
- *   final_args a copy of raw_args where pointers from BPF have been sanitized
- *   into pointers safe to use by snprintf. This also writes in the mod array
- *   the size requirement of each argument, usable by BPF_CAST_FMT_ARG for ex.
+ *   bin_args a binary representation of arguments usable by bstr_printf where
+ *   pointers from BPF have been sanitized.
  *
  * In argument preparation mode, if 0 is returned, safe temporary buffers are
- * allocated and bpf_printf_cleanup should be called to free them after use.
+ * allocated and bpf_bprintf_cleanup should be called to free them after use.
  */
-int bpf_printf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
-			u64 *final_args, enum bpf_printf_mod_type *mod,
-			u32 num_args)
+int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
+			u32 **bin_args, u32 num_args)
 {
-	char *unsafe_ptr = NULL, *tmp_buf = NULL, *fmt_end;
-	size_t tmp_buf_len = MAX_PRINTF_BUF_LEN;
-	int err, i, num_spec = 0, copy_size;
-	enum bpf_printf_mod_type cur_mod;
+	char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end;
+	size_t sizeof_cur_arg, sizeof_cur_ip;
+	int err, i, num_spec = 0;
 	u64 cur_arg;
-	char fmt_ptype;
-
-	if (!!final_args != !!mod)
-		return -EINVAL;
+	char fmt_ptype, cur_ip[16], ip_spec[] = "%pXX";
 
 	fmt_end = strnchr(fmt, fmt_size, 0);
 	if (!fmt_end)
 		return -EINVAL;
 	fmt_size = fmt_end - fmt;
 
+	if (bin_args) {
+		if (num_args && try_get_fmt_tmp_buf(&tmp_buf))
+			return -EBUSY;
+
+		tmp_buf_end = tmp_buf + MAX_PRINTF_BUF_LEN;
+		*bin_args = (u32 *)tmp_buf;
+	}
+
 	for (i = 0; i < fmt_size; i++) {
 		if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
 			err = -EINVAL;
-			goto cleanup;
+			goto out;
 		}
 
 		if (fmt[i] != '%')
@@ -781,7 +780,7 @@ int bpf_printf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
 
 		if (num_spec >= num_args) {
 			err = -EINVAL;
-			goto cleanup;
+			goto out;
 		}
 
 		/* The string is zero-terminated so if fmt[i] != 0, we can
@@ -800,7 +799,7 @@ int bpf_printf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
 		}
 
 		if (fmt[i] == 'p') {
-			cur_mod = BPF_PRINTF_LONG;
+			sizeof_cur_arg = sizeof(long);
 
 			if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') &&
 			    fmt[i + 2] == 's') {
@@ -811,117 +810,140 @@ int bpf_printf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
 
 			if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) ||
 			    ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' ||
-			    fmt[i + 1] == 'x' || fmt[i + 1] == 'B' ||
-			    fmt[i + 1] == 's' || fmt[i + 1] == 'S') {
+			    fmt[i + 1] == 'x' || fmt[i + 1] == 's' ||
+			    fmt[i + 1] == 'S') {
 				/* just kernel pointers */
-				if (final_args)
+				if (tmp_buf)
 					cur_arg = raw_args[num_spec];
-				goto fmt_next;
+				i++;
+				goto nocopy_fmt;
+			}
+
+			if (fmt[i + 1] == 'B') {
+				if (tmp_buf)  {
+					err = snprintf(tmp_buf,
+						       (tmp_buf_end - tmp_buf),
+						       "%pB",
+						       (void *)(long)raw_args[num_spec]);
+					tmp_buf += (err + 1);
+				}
+
+				i++;
+				num_spec++;
+				continue;
 			}
 
 			/* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
 			if ((fmt[i + 1] != 'i' && fmt[i + 1] != 'I') ||
 			    (fmt[i + 2] != '4' && fmt[i + 2] != '6')) {
 				err = -EINVAL;
-				goto cleanup;
+				goto out;
 			}
 
 			i += 2;
-			if (!final_args)
-				goto fmt_next;
+			if (!tmp_buf)
+				goto nocopy_fmt;
 
-			if (try_get_fmt_tmp_buf(&tmp_buf)) {
-				err = -EBUSY;
-				goto out;
-			}
-
-			copy_size = (fmt[i + 2] == '4') ? 4 : 16;
-			if (tmp_buf_len < copy_size) {
+			sizeof_cur_ip = (fmt[i] == '4') ? 4 : 16;
+			if (tmp_buf_end - tmp_buf < sizeof_cur_ip) {
 				err = -ENOSPC;
-				goto cleanup;
+				goto out;
 			}
 
 			unsafe_ptr = (char *)(long)raw_args[num_spec];
-			err = copy_from_kernel_nofault(tmp_buf, unsafe_ptr,
-						       copy_size);
+			err = copy_from_kernel_nofault(cur_ip, unsafe_ptr,
+						       sizeof_cur_ip);
 			if (err < 0)
-				memset(tmp_buf, 0, copy_size);
-			cur_arg = (u64)(long)tmp_buf;
-			tmp_buf += copy_size;
-			tmp_buf_len -= copy_size;
+				memset(cur_ip, 0, sizeof_cur_ip);
+
+			/* hack: bstr_printf expects IP addresses to be
+			 * pre-formatted as strings, ironically, the easiest way
+			 * to do that is to call snprintf.
+			 */
+			ip_spec[2] = fmt[i - 1];
+			ip_spec[3] = fmt[i];
+			err = snprintf(tmp_buf, tmp_buf_end - tmp_buf,
+				       ip_spec, &cur_ip);
 
-			goto fmt_next;
+			tmp_buf += err + 1;
+			num_spec++;
+
+			continue;
 		} else if (fmt[i] == 's') {
-			cur_mod = BPF_PRINTF_LONG;
 			fmt_ptype = fmt[i];
 fmt_str:
 			if (fmt[i + 1] != 0 &&
 			    !isspace(fmt[i + 1]) &&
 			    !ispunct(fmt[i + 1])) {
 				err = -EINVAL;
-				goto cleanup;
-			}
-
-			if (!final_args)
-				goto fmt_next;
-
-			if (try_get_fmt_tmp_buf(&tmp_buf)) {
-				err = -EBUSY;
 				goto out;
 			}
 
-			if (!tmp_buf_len) {
+			if (!tmp_buf)
+				goto nocopy_fmt;
+
+			if (tmp_buf_end == tmp_buf) {
 				err = -ENOSPC;
-				goto cleanup;
+				goto out;
 			}
 
 			unsafe_ptr = (char *)(long)raw_args[num_spec];
 			err = bpf_trace_copy_string(tmp_buf, unsafe_ptr,
-						    fmt_ptype, tmp_buf_len);
+						    fmt_ptype,
+						    tmp_buf_end - tmp_buf);
 			if (err < 0) {
 				tmp_buf[0] = '\0';
 				err = 1;
 			}
 
-			cur_arg = (u64)(long)tmp_buf;
 			tmp_buf += err;
-			tmp_buf_len -= err;
+			num_spec++;
 
-			goto fmt_next;
+			continue;
 		}
 
-		cur_mod = BPF_PRINTF_INT;
+		sizeof_cur_arg = sizeof(int);
 
 		if (fmt[i] == 'l') {
-			cur_mod = BPF_PRINTF_LONG;
+			sizeof_cur_arg = sizeof(long);
 			i++;
 		}
 		if (fmt[i] == 'l') {
-			cur_mod = BPF_PRINTF_LONG_LONG;
+			sizeof_cur_arg = sizeof(long long);
 			i++;
 		}
 
 		if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' &&
 		    fmt[i] != 'x' && fmt[i] != 'X') {
 			err = -EINVAL;
-			goto cleanup;
+			goto out;
 		}
 
-		if (final_args)
+		if (tmp_buf)
 			cur_arg = raw_args[num_spec];
-fmt_next:
-		if (final_args) {
-			mod[num_spec] = cur_mod;
-			final_args[num_spec] = cur_arg;
+nocopy_fmt:
+		if (tmp_buf) {
+			tmp_buf = PTR_ALIGN(tmp_buf, sizeof(u32));
+			if (tmp_buf_end - tmp_buf < sizeof_cur_arg) {
+				err = -ENOSPC;
+				goto out;
+			}
+
+			if (sizeof_cur_arg == 8) {
+				*(u32 *)tmp_buf = *(u32 *)&cur_arg;
+				*(u32 *)(tmp_buf + 4) = *((u32 *)&cur_arg + 1);
+			} else {
+				*(u32 *)tmp_buf = (u32)(long)cur_arg;
+			}
+			tmp_buf += sizeof_cur_arg;
 		}
 		num_spec++;
 	}
 
 	err = 0;
-cleanup:
-	if (err)
-		bpf_printf_cleanup();
 out:
+	if (err)
+		bpf_bprintf_cleanup();
 	return err;
 }
 
@@ -930,9 +952,8 @@ out:
 BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt,
 	   const void *, data, u32, data_len)
 {
-	enum bpf_printf_mod_type mod[MAX_SNPRINTF_VARARGS];
-	u64 args[MAX_SNPRINTF_VARARGS];
 	int err, num_args;
+	u32 *bin_args;
 
 	if (data_len % 8 || data_len > MAX_SNPRINTF_VARARGS * 8 ||
 	    (data_len && !data))
@@ -942,22 +963,13 @@ BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt,
 	/* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we
 	 * can safely give an unbounded size.
 	 */
-	err = bpf_printf_prepare(fmt, UINT_MAX, data, args, mod, num_args);
+	err = bpf_bprintf_prepare(fmt, UINT_MAX, data, &bin_args, num_args);
 	if (err < 0)
 		return err;
 
-	/* Maximumly we can have MAX_SNPRINTF_VARARGS parameters, just give
-	 * all of them to snprintf().
-	 */
-	err = snprintf(str, str_size, fmt, BPF_CAST_FMT_ARG(0, args, mod),
-		BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod),
-		BPF_CAST_FMT_ARG(3, args, mod), BPF_CAST_FMT_ARG(4, args, mod),
-		BPF_CAST_FMT_ARG(5, args, mod), BPF_CAST_FMT_ARG(6, args, mod),
-		BPF_CAST_FMT_ARG(7, args, mod), BPF_CAST_FMT_ARG(8, args, mod),
-		BPF_CAST_FMT_ARG(9, args, mod), BPF_CAST_FMT_ARG(10, args, mod),
-		BPF_CAST_FMT_ARG(11, args, mod));
-
-	bpf_printf_cleanup();
+	err = bstr_printf(str, str_size, fmt, bin_args);
+
+	bpf_bprintf_cleanup();
 
 	return err + 1;
 }
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9145f88b2a0a..8fd552c16763 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5946,7 +5946,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
 	/* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
 	 * can focus on validating the format specifiers.
 	 */
-	err = bpf_printf_prepare(fmt, UINT_MAX, NULL, NULL, NULL, num_args);
+	err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, NULL, num_args);
 	if (err < 0)
 		verbose(env, "Invalid format string\n");
 
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 0e67d12a8f40..d2d7cf6cfe83 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -381,27 +381,23 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
 	   u64, arg2, u64, arg3)
 {
 	u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
-	enum bpf_printf_mod_type mod[MAX_TRACE_PRINTK_VARARGS];
+	u32 *bin_args;
 	static char buf[BPF_TRACE_PRINTK_SIZE];
 	unsigned long flags;
 	int ret;
 
-	ret = bpf_printf_prepare(fmt, fmt_size, args, args, mod,
-				 MAX_TRACE_PRINTK_VARARGS);
+	ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args,
+				  MAX_TRACE_PRINTK_VARARGS);
 	if (ret < 0)
 		return ret;
 
 	raw_spin_lock_irqsave(&trace_printk_lock, flags);
-	ret = snprintf(buf, sizeof(buf), fmt, BPF_CAST_FMT_ARG(0, args, mod),
-		BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod));
-	/* snprintf() will not append null for zero-length strings */
-	if (ret == 0)
-		buf[0] = '\0';
+	ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
 
 	trace_bpf_trace_printk(buf);
 	raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
 
-	bpf_printf_cleanup();
+	bpf_bprintf_cleanup();
 
 	return ret;
 }
@@ -435,31 +431,21 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
 BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
 	   const void *, data, u32, data_len)
 {
-	enum bpf_printf_mod_type mod[MAX_SEQ_PRINTF_VARARGS];
-	u64 args[MAX_SEQ_PRINTF_VARARGS];
 	int err, num_args;
+	u32 *bin_args;
 
 	if (data_len & 7 || data_len > MAX_SEQ_PRINTF_VARARGS * 8 ||
 	    (data_len && !data))
 		return -EINVAL;
 	num_args = data_len / 8;
 
-	err = bpf_printf_prepare(fmt, fmt_size, data, args, mod, num_args);
+	err = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args);
 	if (err < 0)
 		return err;
 
-	/* Maximumly we can have MAX_SEQ_PRINTF_VARARGS parameter, just give
-	 * all of them to seq_printf().
-	 */
-	seq_printf(m, fmt, BPF_CAST_FMT_ARG(0, args, mod),
-		BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod),
-		BPF_CAST_FMT_ARG(3, args, mod), BPF_CAST_FMT_ARG(4, args, mod),
-		BPF_CAST_FMT_ARG(5, args, mod), BPF_CAST_FMT_ARG(6, args, mod),
-		BPF_CAST_FMT_ARG(7, args, mod), BPF_CAST_FMT_ARG(8, args, mod),
-		BPF_CAST_FMT_ARG(9, args, mod), BPF_CAST_FMT_ARG(10, args, mod),
-		BPF_CAST_FMT_ARG(11, args, mod));
-
-	bpf_printf_cleanup();
+	seq_bprintf(m, fmt, bin_args);
+
+	bpf_bprintf_cleanup();
 
 	return seq_has_overflowed(m) ? -EOVERFLOW : 0;
 }
-- 
cgit v1.2.3


From f008d732ab181fd00d95c2e8a6e479d2f7c634b3 Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@gmail.com>
Date: Sat, 24 Apr 2021 18:45:09 -0300
Subject: bpf: Add batched ops support for percpu array

Uses the already in-place infrastructure provided by the
'generic_map_*_batch' functions.

No tweak was needed as it transparently handles the percpu variant.

As arrays don't have delete operations, let it return a error to
user space (default behaviour).

Suggested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210424214510.806627-2-pctammela@mojatatu.com
---
 kernel/bpf/arraymap.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 463d25e1e67e..3c4105603f9d 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -698,6 +698,8 @@ const struct bpf_map_ops percpu_array_map_ops = {
 	.map_delete_elem = array_map_delete_elem,
 	.map_seq_show_elem = percpu_array_map_seq_show_elem,
 	.map_check_btf = array_map_check_btf,
+	.map_lookup_batch = generic_map_lookup_batch,
+	.map_update_batch = generic_map_update_batch,
 	.map_set_for_each_callback_args = map_set_for_each_callback_args,
 	.map_for_each_callback = bpf_for_each_array_elem,
 	.map_btf_name = "bpf_array",
-- 
cgit v1.2.3


From 3733bfbbdd28f7a65340d0058d15d15190a4944a Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@gmail.com>
Date: Sat, 24 Apr 2021 18:45:10 -0300
Subject: bpf, selftests: Update array map tests for per-cpu batched ops

Follows the same logic as the hashtable tests.

Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210424214510.806627-3-pctammela@mojatatu.com
---
 .../selftests/bpf/map_tests/array_map_batch_ops.c  | 104 +++++++++++++++------
 1 file changed, 75 insertions(+), 29 deletions(-)

diff --git a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c
index e42ea1195d18..f4d870da7684 100644
--- a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c
+++ b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c
@@ -9,10 +9,13 @@
 
 #include <test_maps.h>
 
+static int nr_cpus;
+
 static void map_batch_update(int map_fd, __u32 max_entries, int *keys,
-			     int *values)
+			     __s64 *values, bool is_pcpu)
 {
-	int i, err;
+	int i, j, err;
+	int cpu_offset = 0;
 	DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
 		.elem_flags = 0,
 		.flags = 0,
@@ -20,22 +23,41 @@ static void map_batch_update(int map_fd, __u32 max_entries, int *keys,
 
 	for (i = 0; i < max_entries; i++) {
 		keys[i] = i;
-		values[i] = i + 1;
+		if (is_pcpu) {
+			cpu_offset = i * nr_cpus;
+			for (j = 0; j < nr_cpus; j++)
+				(values + cpu_offset)[j] = i + 1 + j;
+		} else {
+			values[i] = i + 1;
+		}
 	}
 
 	err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts);
 	CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno));
 }
 
-static void map_batch_verify(int *visited, __u32 max_entries,
-			     int *keys, int *values)
+static void map_batch_verify(int *visited, __u32 max_entries, int *keys,
+			     __s64 *values, bool is_pcpu)
 {
-	int i;
+	int i, j;
+	int cpu_offset = 0;
 
 	memset(visited, 0, max_entries * sizeof(*visited));
 	for (i = 0; i < max_entries; i++) {
-		CHECK(keys[i] + 1 != values[i], "key/value checking",
-		      "error: i %d key %d value %d\n", i, keys[i], values[i]);
+		if (is_pcpu) {
+			cpu_offset = i * nr_cpus;
+			for (j = 0; j < nr_cpus; j++) {
+				__s64 value = (values + cpu_offset)[j];
+				CHECK(keys[i] + j + 1 != value,
+				      "key/value checking",
+				      "error: i %d j %d key %d value %lld\n", i,
+				      j, keys[i], value);
+			}
+		} else {
+			CHECK(keys[i] + 1 != values[i], "key/value checking",
+			      "error: i %d key %d value %lld\n", i, keys[i],
+			      values[i]);
+		}
 		visited[i] = 1;
 	}
 	for (i = 0; i < max_entries; i++) {
@@ -44,19 +66,21 @@ static void map_batch_verify(int *visited, __u32 max_entries,
 	}
 }
 
-void test_array_map_batch_ops(void)
+static void __test_map_lookup_and_update_batch(bool is_pcpu)
 {
 	struct bpf_create_map_attr xattr = {
 		.name = "array_map",
-		.map_type = BPF_MAP_TYPE_ARRAY,
+		.map_type = is_pcpu ? BPF_MAP_TYPE_PERCPU_ARRAY :
+				      BPF_MAP_TYPE_ARRAY,
 		.key_size = sizeof(int),
-		.value_size = sizeof(int),
+		.value_size = sizeof(__s64),
 	};
-	int map_fd, *keys, *values, *visited;
+	int map_fd, *keys, *visited;
 	__u32 count, total, total_success;
 	const __u32 max_entries = 10;
 	__u64 batch = 0;
-	int err, step;
+	int err, step, value_size;
+	void *values;
 	DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
 		.elem_flags = 0,
 		.flags = 0,
@@ -67,22 +91,23 @@ void test_array_map_batch_ops(void)
 	CHECK(map_fd == -1,
 	      "bpf_create_map_xattr()", "error:%s\n", strerror(errno));
 
-	keys = malloc(max_entries * sizeof(int));
-	values = malloc(max_entries * sizeof(int));
-	visited = malloc(max_entries * sizeof(int));
+	value_size = sizeof(__s64);
+	if (is_pcpu)
+		value_size *= nr_cpus;
+
+	keys = calloc(max_entries, sizeof(*keys));
+	values = calloc(max_entries, value_size);
+	visited = calloc(max_entries, sizeof(*visited));
 	CHECK(!keys || !values || !visited, "malloc()", "error:%s\n",
 	      strerror(errno));
 
-	/* populate elements to the map */
-	map_batch_update(map_fd, max_entries, keys, values);
-
 	/* test 1: lookup in a loop with various steps. */
 	total_success = 0;
 	for (step = 1; step < max_entries; step++) {
-		map_batch_update(map_fd, max_entries, keys, values);
-		map_batch_verify(visited, max_entries, keys, values);
+		map_batch_update(map_fd, max_entries, keys, values, is_pcpu);
+		map_batch_verify(visited, max_entries, keys, values, is_pcpu);
 		memset(keys, 0, max_entries * sizeof(*keys));
-		memset(values, 0, max_entries * sizeof(*values));
+		memset(values, 0, max_entries * value_size);
 		batch = 0;
 		total = 0;
 		/* iteratively lookup/delete elements with 'step'
@@ -91,10 +116,10 @@ void test_array_map_batch_ops(void)
 		count = step;
 		while (true) {
 			err = bpf_map_lookup_batch(map_fd,
-						total ? &batch : NULL, &batch,
-						keys + total,
-						values + total,
-						&count, &opts);
+						   total ? &batch : NULL,
+						   &batch, keys + total,
+						   values + total * value_size,
+						   &count, &opts);
 
 			CHECK((err && errno != ENOENT), "lookup with steps",
 			      "error: %s\n", strerror(errno));
@@ -108,7 +133,7 @@ void test_array_map_batch_ops(void)
 		CHECK(total != max_entries, "lookup with steps",
 		      "total = %u, max_entries = %u\n", total, max_entries);
 
-		map_batch_verify(visited, max_entries, keys, values);
+		map_batch_verify(visited, max_entries, keys, values, is_pcpu);
 
 		total_success++;
 	}
@@ -116,9 +141,30 @@ void test_array_map_batch_ops(void)
 	CHECK(total_success == 0, "check total_success",
 	      "unexpected failure\n");
 
-	printf("%s:PASS\n", __func__);
-
 	free(keys);
 	free(values);
 	free(visited);
 }
+
+static void array_map_batch_ops(void)
+{
+	__test_map_lookup_and_update_batch(false);
+	printf("test_%s:PASS\n", __func__);
+}
+
+static void array_percpu_map_batch_ops(void)
+{
+	__test_map_lookup_and_update_batch(true);
+	printf("test_%s:PASS\n", __func__);
+}
+
+void test_array_map_batch_ops(void)
+{
+	nr_cpus = libbpf_num_possible_cpus();
+
+	CHECK(nr_cpus < 0, "nr_cpus checking",
+	      "error: get possible cpus failed");
+
+	array_map_batch_ops();
+	array_percpu_map_batch_ops();
+}
-- 
cgit v1.2.3


From e542d29ca81d005651680a0a697b72ca13ddc4cc Mon Sep 17 00:00:00 2001
From: Andreas Roeseler <andreas.a.roeseler@gmail.com>
Date: Tue, 27 Apr 2021 10:36:35 -0500
Subject: icmp: standardize naming of RFC 8335 PROBE constants

The current definitions of constants for PROBE, currently defined only
in the net-next kernel branch, are inconsistent, with
some beginning with ICMP and others with simply EXT. This patch
attempts to standardize the naming conventions of the constants for
PROBE before their release into a stable Kernel, and to update the
relevant definitions in net/ipv4/icmp.c.

Similarly, the definitions for the code field (previously
ICMP_EXT_MAL_QUERY, etc) use the same prefixes as the type field. This
patch adds _CODE_ to the prefix to clarify the distinction of these
constants.

Signed-off-by: Andreas Roeseler <andreas.a.roeseler@gmail.com>
Acked-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20210427153635.2591-1-andreas.a.roeseler@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/icmp.h | 28 ++++++++++++++--------------
 net/ipv4/icmp.c           | 16 ++++++++--------
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/include/uapi/linux/icmp.h b/include/uapi/linux/icmp.h
index 222325d1d80e..c1da8244c5e1 100644
--- a/include/uapi/linux/icmp.h
+++ b/include/uapi/linux/icmp.h
@@ -70,22 +70,22 @@
 #define ICMP_EXC_FRAGTIME	1	/* Fragment Reass time exceeded	*/
 
 /* Codes for EXT_ECHO (PROBE) */
-#define ICMP_EXT_ECHO		42
-#define ICMP_EXT_ECHOREPLY	43
-#define ICMP_EXT_MAL_QUERY	1	/* Malformed Query */
-#define ICMP_EXT_NO_IF		2	/* No such Interface */
-#define ICMP_EXT_NO_TABLE_ENT	3	/* No such Table Entry */
-#define ICMP_EXT_MULT_IFS	4	/* Multiple Interfaces Satisfy Query */
+#define ICMP_EXT_ECHO			42
+#define ICMP_EXT_ECHOREPLY		43
+#define ICMP_EXT_CODE_MAL_QUERY		1	/* Malformed Query */
+#define ICMP_EXT_CODE_NO_IF		2	/* No such Interface */
+#define ICMP_EXT_CODE_NO_TABLE_ENT	3	/* No such Table Entry */
+#define ICMP_EXT_CODE_MULT_IFS		4	/* Multiple Interfaces Satisfy Query */
 
 /* Constants for EXT_ECHO (PROBE) */
-#define EXT_ECHOREPLY_ACTIVE	(1 << 2)/* active bit in reply message */
-#define EXT_ECHOREPLY_IPV4	(1 << 1)/* ipv4 bit in reply message */
-#define EXT_ECHOREPLY_IPV6	1	/* ipv6 bit in reply message */
-#define EXT_ECHO_CTYPE_NAME	1
-#define EXT_ECHO_CTYPE_INDEX	2
-#define EXT_ECHO_CTYPE_ADDR	3
-#define ICMP_AFI_IP		1	/* Address Family Identifier for ipv4 */
-#define ICMP_AFI_IP6		2	/* Address Family Identifier for ipv6 */
+#define ICMP_EXT_ECHOREPLY_ACTIVE	(1 << 2)/* active bit in reply message */
+#define ICMP_EXT_ECHOREPLY_IPV4		(1 << 1)/* ipv4 bit in reply message */
+#define ICMP_EXT_ECHOREPLY_IPV6		1	/* ipv6 bit in reply message */
+#define ICMP_EXT_ECHO_CTYPE_NAME	1
+#define ICMP_EXT_ECHO_CTYPE_INDEX	2
+#define ICMP_EXT_ECHO_CTYPE_ADDR	3
+#define ICMP_AFI_IP			1	/* Address Family Identifier for ipv4 */
+#define ICMP_AFI_IP6			2	/* Address Family Identifier for ipv6 */
 
 struct icmphdr {
   __u8		type;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 8bd988fbcb31..7b6931a4d775 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1033,7 +1033,7 @@ static bool icmp_echo(struct sk_buff *skb)
 	status = 0;
 	dev = NULL;
 	switch (iio->extobj_hdr.class_type) {
-	case EXT_ECHO_CTYPE_NAME:
+	case ICMP_EXT_ECHO_CTYPE_NAME:
 		iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(_iio), &_iio);
 		if (ident_len >= IFNAMSIZ)
 			goto send_mal_query;
@@ -1041,14 +1041,14 @@ static bool icmp_echo(struct sk_buff *skb)
 		memcpy(buff, &iio->ident.name, ident_len);
 		dev = dev_get_by_name(net, buff);
 		break;
-	case EXT_ECHO_CTYPE_INDEX:
+	case ICMP_EXT_ECHO_CTYPE_INDEX:
 		iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr) +
 					 sizeof(iio->ident.ifindex), &_iio);
 		if (ident_len != sizeof(iio->ident.ifindex))
 			goto send_mal_query;
 		dev = dev_get_by_index(net, ntohl(iio->ident.ifindex));
 		break;
-	case EXT_ECHO_CTYPE_ADDR:
+	case ICMP_EXT_ECHO_CTYPE_ADDR:
 		if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
 				 iio->ident.addr.ctype3_hdr.addrlen)
 			goto send_mal_query;
@@ -1080,23 +1080,23 @@ static bool icmp_echo(struct sk_buff *skb)
 		goto send_mal_query;
 	}
 	if (!dev) {
-		icmp_param.data.icmph.code = ICMP_EXT_NO_IF;
+		icmp_param.data.icmph.code = ICMP_EXT_CODE_NO_IF;
 		goto send_reply;
 	}
 	/* Fill bits in reply message */
 	if (dev->flags & IFF_UP)
-		status |= EXT_ECHOREPLY_ACTIVE;
+		status |= ICMP_EXT_ECHOREPLY_ACTIVE;
 	if (__in_dev_get_rcu(dev) && __in_dev_get_rcu(dev)->ifa_list)
-		status |= EXT_ECHOREPLY_IPV4;
+		status |= ICMP_EXT_ECHOREPLY_IPV4;
 	if (!list_empty(&rcu_dereference(dev->ip6_ptr)->addr_list))
-		status |= EXT_ECHOREPLY_IPV6;
+		status |= ICMP_EXT_ECHOREPLY_IPV6;
 	dev_put(dev);
 	icmp_param.data.icmph.un.echo.sequence |= htons(status);
 send_reply:
 	icmp_reply(&icmp_param, skb);
 		return true;
 send_mal_query:
-	icmp_param.data.icmph.code = ICMP_EXT_MAL_QUERY;
+	icmp_param.data.icmph.code = ICMP_EXT_CODE_MAL_QUERY;
 	goto send_reply;
 }
 
-- 
cgit v1.2.3


From 9be02dd3858137f7bf83320568279eeda59faf01 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Wed, 28 Apr 2021 09:05:32 +0300
Subject: net/sched: act_ct: Remove redundant ct get and check

The assignment is not being used and redundant.
The check for null is redundant as nf_conntrack_put() also
checks this.

Signed-off-by: Roi Dayan <roid@nvidia.com>
Reviewed-by: Paul Blakey <paulb@nvidia.com>
Link: https://lore.kernel.org/r/20210428060532.3330974-1-roid@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/sched/act_ct.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 48fdf7293dea..ec7a1c438df9 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -991,9 +991,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
 
 		/* Associate skb with specified zone. */
 		if (tmpl) {
-			ct = nf_ct_get(skb, &ctinfo);
-			if (skb_nfct(skb))
-				nf_conntrack_put(skb_nfct(skb));
+			nf_conntrack_put(skb_nfct(skb));
 			nf_conntrack_get(&tmpl->ct_general);
 			nf_ct_set(skb, tmpl, IP_CT_NEW);
 		}
-- 
cgit v1.2.3


From 12c2bb96c3f1916624d671904430b7714b48dd71 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 28 Apr 2021 13:00:10 +0100
Subject: net: dsa: ksz: Make reg_mib_cnt a u8 as it never exceeds 255

Currently the for-loop in ksz8_port_init_cnt is causing a static
analysis infinite loop warning with the comparison of
mib->cnt_ptr < dev->reg_mib_cnt. This occurs because mib->cnt_ptr
is a u8 and dev->reg_mib_cnt is an int and the analyzer determines
that mib->cnt_ptr potentially can wrap around to zero if the value
in dev->reg_mib_cnt is > 255. However, this value is never this
large, it is always less than 256 so make reg_mib_cnt a u8.

Addresses-Coverity: ("Infinite loop")
Fixes: e66f840c08a2 ("net: dsa: ksz: Add Microchip KSZ8795 DSA driver")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/20210428120010.337959-1-colin.king@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/microchip/ksz_common.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
index e0bbdca64375..2e6bfd333f50 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -69,7 +69,7 @@ struct ksz_device {
 	int cpu_ports;			/* port bitmap can be cpu port */
 	int phy_port_cnt;
 	int port_cnt;
-	int reg_mib_cnt;
+	u8 reg_mib_cnt;
 	int mib_cnt;
 	const struct mib_names *mib_names;
 	phy_interface_t compat_interface;
-- 
cgit v1.2.3


From 65ad85f63b15af6995473724ab8562772db22753 Mon Sep 17 00:00:00 2001
From: Maxim Kochetkov <fido_max@inbox.ru>
Date: Wed, 28 Apr 2021 12:53:56 +0300
Subject: net: phy: marvell: add downshift support for M88E1240

Add downshift support for 88E1240, it uses the same downshift
configuration registers as 88E1011.

Signed-off-by: Maxim Kochetkov <fido_max@inbox.ru>
Link: https://lore.kernel.org/r/20210428095356.621536-1-fido_max@inbox.ru
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/marvell.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index a61fde7013bd..0b2cccb0d865 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -2870,6 +2870,8 @@ static struct phy_driver marvell_drivers[] = {
 		.get_sset_count = marvell_get_sset_count,
 		.get_strings = marvell_get_strings,
 		.get_stats = marvell_get_stats,
+		.get_tunable = m88e1011_get_tunable,
+		.set_tunable = m88e1011_set_tunable,
 	},
 	{
 		.phy_id = MARVELL_PHY_ID_88E1116R,
-- 
cgit v1.2.3


From 808337bec7366f948663952d8e739eb6c235a90f Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Wed, 28 Apr 2021 17:57:32 +0800
Subject: net: tun: Remove redundant assignment to ret

Variable 'ret' is set to zero but this value is never read as it is
overwritten with a new value later on, hence it is a redundant
assignment and can be removed.

Cleans up the following clang-analyzer warning:

drivers/net/tun.c:3008:2: warning: Value stored to 'ret' is never read
[clang-analyzer-deadcode.DeadStores]

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Link: https://lore.kernel.org/r/1619603852-114996-1-git-send-email-yang.lee@linux.alibaba.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/tun.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 36443d506b67..84f832806313 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -3008,7 +3008,6 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		return open_related_ns(&net->ns, get_net_ns);
 	}
 
-	ret = 0;
 	rtnl_lock();
 
 	tun = tun_get(tfile);
-- 
cgit v1.2.3


From 15c0a64bfcbcc7a8dca805746f46ea6e746736ed Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Wed, 28 Apr 2021 17:58:05 +0800
Subject: net: netrom: nr_in: Remove redundant assignment to ns

Variable ns is set to 'skb->data[17]' but this value is never read as
it is overwritten or not used later on, hence it is a redundant
assignment and can be removed.

Cleans up the following clang-analyzer warning:

net/netrom/nr_in.c:156:2: warning: Value stored to 'ns' is never read
[clang-analyzer-deadcode.DeadStores].

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Link: https://lore.kernel.org/r/1619603885-115604-1-git-send-email-jiapeng.chong@linux.alibaba.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/netrom/nr_in.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
index 69e58906c32b..2f084b6f69d7 100644
--- a/net/netrom/nr_in.c
+++ b/net/netrom/nr_in.c
@@ -153,7 +153,6 @@ static int nr_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype
 	int queued = 0;
 
 	nr = skb->data[18];
-	ns = skb->data[17];
 
 	switch (frametype) {
 	case NR_CONNREQ:
-- 
cgit v1.2.3


From 4a52dd8fefb45626dace70a63c0738dbd83b7edb Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Wed, 28 Apr 2021 15:09:46 +0200
Subject: net: selftest: fix build issue if INET is disabled

In case ethernet driver is enabled and INET is disabled, selftest will
fail to build.

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Fixes: 3e1e58d64c3d ("net: add generic selftest support")
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/20210428130947.29649-1-o.rempel@pengutronix.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/atheros/Kconfig   |  2 +-
 drivers/net/ethernet/freescale/Kconfig |  2 +-
 include/net/selftests.h                | 19 +++++++++++++++++++
 net/Kconfig                            |  2 +-
 net/core/Makefile                      |  2 +-
 net/dsa/Kconfig                        |  2 +-
 6 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/atheros/Kconfig b/drivers/net/ethernet/atheros/Kconfig
index 6842b74b0696..482c58c4c584 100644
--- a/drivers/net/ethernet/atheros/Kconfig
+++ b/drivers/net/ethernet/atheros/Kconfig
@@ -20,8 +20,8 @@ if NET_VENDOR_ATHEROS
 config AG71XX
 	tristate "Atheros AR7XXX/AR9XXX built-in ethernet mac support"
 	depends on ATH79
-	select NET_SELFTESTS
 	select PHYLINK
+	imply NET_SELFTESTS
 	help
 	  If you wish to compile a kernel for AR7XXX/91XXX and enable
 	  ethernet support, then you should always answer Y to this.
diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig
index 3d937b4650b2..2d1abdd58fab 100644
--- a/drivers/net/ethernet/freescale/Kconfig
+++ b/drivers/net/ethernet/freescale/Kconfig
@@ -26,8 +26,8 @@ config FEC
 		   ARCH_MXC || SOC_IMX28 || COMPILE_TEST)
 	default ARCH_MXC || SOC_IMX28 if ARM
 	select CRC32
-	select NET_SELFTESTS
 	select PHYLIB
+	imply NET_SELFTESTS
 	imply PTP_1588_CLOCK
 	help
 	  Say Y here if you want to use the built-in 10/100 Fast ethernet
diff --git a/include/net/selftests.h b/include/net/selftests.h
index 9993b9498cf3..e65e8d230d33 100644
--- a/include/net/selftests.h
+++ b/include/net/selftests.h
@@ -4,9 +4,28 @@
 
 #include <linux/ethtool.h>
 
+#if IS_ENABLED(CONFIG_NET_SELFTESTS)
+
 void net_selftest(struct net_device *ndev, struct ethtool_test *etest,
 		  u64 *buf);
 int net_selftest_get_count(void);
 void net_selftest_get_strings(u8 *data);
 
+#else
+
+static inline void net_selftest(struct net_device *ndev, struct ethtool_test *etest,
+				u64 *buf)
+{
+}
+
+static inline int net_selftest_get_count(void)
+{
+	return 0;
+}
+
+static inline void net_selftest_get_strings(u8 *data)
+{
+}
+
+#endif
 #endif /* _NET_SELFTESTS */
diff --git a/net/Kconfig b/net/Kconfig
index 8d955195c069..f5ee7c65e6b4 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -431,7 +431,7 @@ config SOCK_VALIDATE_XMIT
 
 config NET_SELFTESTS
 	def_tristate PHYLIB
-	depends on PHYLIB
+	depends on PHYLIB && INET
 
 config NET_SOCK_MSG
 	bool
diff --git a/net/core/Makefile b/net/core/Makefile
index 1a6168d8f23b..f7f16650fe9e 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_NETPOLL) += netpoll.o
 obj-$(CONFIG_FIB_RULES) += fib_rules.o
 obj-$(CONFIG_TRACEPOINTS) += net-traces.o
 obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
+obj-$(CONFIG_NET_SELFTESTS) += selftests.o
 obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
 obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o
 obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
@@ -33,7 +34,6 @@ obj-$(CONFIG_NET_DEVLINK) += devlink.o
 obj-$(CONFIG_GRO_CELLS) += gro_cells.o
 obj-$(CONFIG_FAILOVER) += failover.o
 ifeq ($(CONFIG_INET),y)
-obj-$(CONFIG_NET_SELFTESTS) += selftests.o
 obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
 obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
 endif
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 5baba7021427..00bb89b2d86f 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -9,7 +9,7 @@ menuconfig NET_DSA
 	select NET_SWITCHDEV
 	select PHYLINK
 	select NET_DEVLINK
-	select NET_SELFTESTS
+	imply NET_SELFTESTS
 	help
 	  Say Y if you want to enable support for the hardware switches supported
 	  by the Distributed Switch Architecture.
-- 
cgit v1.2.3